diff --git a/AArch64-Fix-constraints-for-CPY-M.patch b/AArch64-Fix-constraints-for-CPY-M.patch deleted file mode 100644 index 5fcb38e91d8472edde03bce5b7b3e8efe7143844..0000000000000000000000000000000000000000 --- a/AArch64-Fix-constraints-for-CPY-M.patch +++ /dev/null @@ -1,67 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-AArch64-Fix-constraints-for-CPY-M.patch -3c2707f33af46ac145769872b65e25fd0b870903 - -diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md -index cbf29a82e28..59bf4a69507 100644 ---- a/gcc/config/aarch64/aarch64-sve.md -+++ b/gcc/config/aarch64/aarch64-sve.md -@@ -6523,7 +6523,7 @@ - (define_insn "@aarch64_sel_dup" - [(set (match_operand:SVE_FULL 0 "register_operand" "=?w, w, ??w, ?&w, ??&w, ?&w") - (unspec:SVE_FULL -- [(match_operand: 3 "register_operand" "Upa, Upa, Upl, Upl, Upl, Upl") -+ [(match_operand: 3 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") - (vec_duplicate:SVE_FULL - (match_operand: 1 "register_operand" "r, w, r, w, r, w")) - (match_operand:SVE_FULL 2 "aarch64_simd_reg_or_zero" "0, 0, Dz, Dz, w, w")] -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cpy_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cpy_1.c -new file mode 100644 -index 00000000000..1d8f429caeb ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cpy_1.c -@@ -0,0 +1,42 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#include -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+/* -+** dup_x0_m: -+** add (x[0-9]+), x0, #?1 -+** mov (p[0-7])\.b, p15\.b -+** mov z0\.d, \2/m, \1 -+** ret -+*/ -+svuint64_t -+dup_x0_m (svuint64_t z0, uint64_t x0) -+{ -+ register svbool_t pg asm ("p15"); -+ asm volatile ("" : "=Upa" (pg)); -+ return svdup_u64_m (z0, pg, x0 + 1); -+} -+ -+/* -+** dup_d1_z: -+** mov (p[0-7])\.b, p15\.b -+** mov z0\.d, \1/m, d1 -+** ret -+*/ -+svfloat64_t -+dup_d1_z (svfloat64_t z0, float64_t d1) -+{ -+ register svbool_t pg asm ("p15"); -+ asm volatile ("" : "=Upa" (pg)); -+ return svdup_f64_m (z0, pg, d1); -+} -+ -+#ifdef __cplusplus -+} -+#endif diff --git a/Apply-maximum-nunits-for-BB-SLP.patch b/Apply-maximum-nunits-for-BB-SLP.patch deleted file mode 100644 index 43fc0e0d558f1c5497f637b97201a9a8687822a7..0000000000000000000000000000000000000000 --- a/Apply-maximum-nunits-for-BB-SLP.patch +++ /dev/null @@ -1,694 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-Apply-maximum-nunits-for-BB-SLP.patch -9b75f56d4b7951c60a656396dddd4a65787b95bc - -diff -Nurp a/gcc/testsuite/gcc.dg/vect/bb-slp-4.c b/gcc/testsuite/gcc.dg/vect/bb-slp-4.c ---- a/gcc/testsuite/gcc.dg/vect/bb-slp-4.c 2020-12-20 18:46:19.539633230 +0800 -+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-4.c 2020-12-20 18:48:12.799633230 +0800 -@@ -38,5 +38,4 @@ int main (void) - return 0; - } - --/* { dg-final { scan-tree-dump-times "basic block vectorized" 0 "slp2" } } */ -- -+/* { dg-final { scan-tree-dump-times "basic block vectorized" 1 "slp2" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/vect/bb-slp-bool-1.c b/gcc/testsuite/gcc.dg/vect/bb-slp-bool-1.c ---- a/gcc/testsuite/gcc.dg/vect/bb-slp-bool-1.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-bool-1.c 2020-12-20 18:48:12.799633230 +0800 -@@ -0,0 +1,44 @@ -+#include "tree-vect.h" -+ -+void __attribute__ ((noipa)) -+f1 (_Bool *x, unsigned short *y) -+{ -+ x[0] = (y[0] == 1); -+ x[1] = (y[1] == 1); -+} -+ -+void __attribute__ ((noipa)) -+f2 (_Bool *x, unsigned short *y) -+{ -+ x[0] = (y[0] == 1); -+ x[1] = (y[1] == 1); -+ x[2] = (y[2] == 1); -+ x[3] = (y[3] == 1); -+ x[4] = (y[4] == 1); -+ x[5] = (y[5] == 1); -+ x[6] = (y[6] == 1); -+ x[7] = (y[7] == 1); -+} -+ -+_Bool x[8]; -+unsigned short y[8] = { 11, 1, 9, 5, 1, 44, 1, 1 }; -+ -+int -+main (void) -+{ -+ check_vect (); -+ -+ f1 (x, y); -+ -+ if (x[0] || !x[1]) -+ __builtin_abort (); -+ -+ x[1] = 0; -+ -+ f2 (x, y); -+ -+ if (x[0] || !x[1] || x[2] | x[3] || !x[4] || x[5] || !x[6] || !x[7]) -+ __builtin_abort (); -+ -+ return 0; -+} -diff -Nurp a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_14.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_14.c ---- a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_14.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_14.c 2020-12-20 18:48:11.811633230 +0800 -@@ -0,0 +1,26 @@ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+/* -+** foo: -+** ( -+** ldr d([0-9]+), \[x1\] -+** ldr q([0-9]+), \[x0\] -+** saddw v([0-9]+)\.4s, v\2\.4s, v\1\.4h -+** str q\3, \[x0\] -+** | -+** ldr q([0-9]+), \[x0\] -+** ldr d([0-9]+), \[x1\] -+** saddw v([0-9]+)\.4s, v\4\.4s, v\5\.4h -+** str q\6, \[x0\] -+** ) -+** ret -+*/ -+void -+foo (int *x, short *y) -+{ -+ x[0] += y[0]; -+ x[1] += y[1]; -+ x[2] += y[2]; -+ x[3] += y[3]; -+} -diff -Nurp a/gcc/testsuite/gcc.target/i386/pr84101.c b/gcc/testsuite/gcc.target/i386/pr84101.c ---- a/gcc/testsuite/gcc.target/i386/pr84101.c 2020-12-20 18:46:18.383633230 +0800 -+++ b/gcc/testsuite/gcc.target/i386/pr84101.c 2020-12-20 18:48:11.611633230 +0800 -@@ -18,4 +18,5 @@ uint64_pair_t pair(int num) - return p ; - } - --/* { dg-final { scan-tree-dump-not "basic block vectorized" "slp2" } } */ -+/* See PR92266 for the XFAIL. */ -+/* { dg-final { scan-tree-dump-not "basic block vectorized" "slp2" { xfail ilp32 } } } */ -diff -Nurp a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c ---- a/gcc/tree-vect-data-refs.c 2020-12-20 18:46:19.911633230 +0800 -+++ b/gcc/tree-vect-data-refs.c 2020-12-20 18:48:11.047633230 +0800 -@@ -4312,9 +4312,8 @@ vect_analyze_data_refs (vec_info *vinfo, - - /* Set vectype for STMT. */ - scalar_type = TREE_TYPE (DR_REF (dr)); -- STMT_VINFO_VECTYPE (stmt_info) -- = get_vectype_for_scalar_type (vinfo, scalar_type); -- if (!STMT_VINFO_VECTYPE (stmt_info)) -+ tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type); -+ if (!vectype) - { - if (dump_enabled_p ()) - { -@@ -4345,14 +4344,19 @@ vect_analyze_data_refs (vec_info *vinfo, - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, - "got vectype for stmt: %G%T\n", -- stmt_info->stmt, STMT_VINFO_VECTYPE (stmt_info)); -+ stmt_info->stmt, vectype); - } - - /* Adjust the minimal vectorization factor according to the - vector type. */ -- vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info)); -+ vf = TYPE_VECTOR_SUBPARTS (vectype); - *min_vf = upper_bound (*min_vf, vf); - -+ /* Leave the BB vectorizer to pick the vector type later, based on -+ the final dataref group size and SLP node size. */ -+ if (is_a (vinfo)) -+ STMT_VINFO_VECTYPE (stmt_info) = vectype; -+ - if (gatherscatter != SG_NONE) - { - gather_scatter_info gs_info; -diff -Nurp a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c ---- a/gcc/tree-vect-patterns.c 2020-12-20 18:46:19.979633230 +0800 -+++ b/gcc/tree-vect-patterns.c 2020-12-20 18:48:11.227633230 +0800 -@@ -4142,9 +4142,10 @@ vect_recog_bool_pattern (stmt_vec_info s - && STMT_VINFO_DATA_REF (stmt_vinfo)) - { - stmt_vec_info pattern_stmt_info; -- vectype = STMT_VINFO_VECTYPE (stmt_vinfo); -- gcc_assert (vectype != NULL_TREE); -- if (!VECTOR_MODE_P (TYPE_MODE (vectype))) -+ tree nunits_vectype; -+ if (!vect_get_vector_types_for_stmt (stmt_vinfo, &vectype, -+ &nunits_vectype) -+ || !VECTOR_MODE_P (TYPE_MODE (vectype))) - return NULL; - - if (check_bool_pattern (var, vinfo, bool_stmts)) -diff -Nurp a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c ---- a/gcc/tree-vect-slp.c 2020-12-20 18:46:17.763633230 +0800 -+++ b/gcc/tree-vect-slp.c 2020-12-20 18:48:11.227633230 +0800 -@@ -606,6 +606,77 @@ again: - return 0; - } - -+/* Try to assign vector type VECTYPE to STMT_INFO for BB vectorization. -+ Return true if we can, meaning that this choice doesn't conflict with -+ existing SLP nodes that use STMT_INFO. */ -+ -+static bool -+vect_update_shared_vectype (stmt_vec_info stmt_info, tree vectype) -+{ -+ tree old_vectype = STMT_VINFO_VECTYPE (stmt_info); -+ if (old_vectype && useless_type_conversion_p (vectype, old_vectype)) -+ return true; -+ -+ if (STMT_VINFO_GROUPED_ACCESS (stmt_info) -+ && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info))) -+ { -+ /* We maintain the invariant that if any statement in the group is -+ used, all other members of the group have the same vector type. */ -+ stmt_vec_info first_info = DR_GROUP_FIRST_ELEMENT (stmt_info); -+ stmt_vec_info member_info = first_info; -+ for (; member_info; member_info = DR_GROUP_NEXT_ELEMENT (member_info)) -+ if (STMT_VINFO_NUM_SLP_USES (member_info) > 0 -+ || is_pattern_stmt_p (member_info)) -+ break; -+ -+ if (!member_info) -+ { -+ for (member_info = first_info; member_info; -+ member_info = DR_GROUP_NEXT_ELEMENT (member_info)) -+ STMT_VINFO_VECTYPE (member_info) = vectype; -+ return true; -+ } -+ } -+ else if (STMT_VINFO_NUM_SLP_USES (stmt_info) == 0 -+ && !is_pattern_stmt_p (stmt_info)) -+ { -+ STMT_VINFO_VECTYPE (stmt_info) = vectype; -+ return true; -+ } -+ -+ if (dump_enabled_p ()) -+ { -+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, -+ "Build SLP failed: incompatible vector" -+ " types for: %G", stmt_info->stmt); -+ dump_printf_loc (MSG_NOTE, vect_location, -+ " old vector type: %T\n", old_vectype); -+ dump_printf_loc (MSG_NOTE, vect_location, -+ " new vector type: %T\n", vectype); -+ } -+ return false; -+} -+ -+/* Try to infer and assign a vector type to all the statements in STMTS. -+ Used only for BB vectorization. */ -+ -+static bool -+vect_update_all_shared_vectypes (vec stmts) -+{ -+ tree vectype, nunits_vectype; -+ if (!vect_get_vector_types_for_stmt (stmts[0], &vectype, -+ &nunits_vectype, stmts.length ())) -+ return false; -+ -+ stmt_vec_info stmt_info; -+ unsigned int i; -+ FOR_EACH_VEC_ELT (stmts, i, stmt_info) -+ if (!vect_update_shared_vectype (stmt_info, vectype)) -+ return false; -+ -+ return true; -+} -+ - /* Return true if call statements CALL1 and CALL2 are similar enough - to be combined into the same SLP group. */ - -@@ -751,6 +822,7 @@ vect_build_slp_tree_1 (unsigned char *sw - stmt_vec_info stmt_info; - FOR_EACH_VEC_ELT (stmts, i, stmt_info) - { -+ vec_info *vinfo = stmt_info->vinfo; - gimple *stmt = stmt_info->stmt; - swap[i] = 0; - matches[i] = false; -@@ -784,7 +856,7 @@ vect_build_slp_tree_1 (unsigned char *sw - - tree nunits_vectype; - if (!vect_get_vector_types_for_stmt (stmt_info, &vectype, -- &nunits_vectype) -+ &nunits_vectype, group_size) - || (nunits_vectype - && !vect_record_max_nunits (stmt_info, group_size, - nunits_vectype, max_nunits))) -@@ -796,6 +868,10 @@ vect_build_slp_tree_1 (unsigned char *sw - - gcc_assert (vectype); - -+ if (is_a (vinfo) -+ && !vect_update_shared_vectype (stmt_info, vectype)) -+ continue; -+ - if (gcall *call_stmt = dyn_cast (stmt)) - { - rhs_code = CALL_EXPR; -@@ -1328,7 +1404,8 @@ vect_build_slp_tree_2 (vec_info *vinfo, - FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild) - if (SLP_TREE_DEF_TYPE (grandchild) != vect_external_def) - break; -- if (!grandchild) -+ if (!grandchild -+ && vect_update_all_shared_vectypes (oprnd_info->def_stmts)) - { - /* Roll back. */ - this_tree_size = old_tree_size; -@@ -1369,7 +1446,8 @@ vect_build_slp_tree_2 (vec_info *vinfo, - do extra work to cancel the pattern so the uses see the - scalar version. */ - && !is_pattern_stmt_p (stmt_info) -- && !oprnd_info->any_pattern) -+ && !oprnd_info->any_pattern -+ && vect_update_all_shared_vectypes (oprnd_info->def_stmts)) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, -@@ -1488,7 +1566,9 @@ vect_build_slp_tree_2 (vec_info *vinfo, - FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild) - if (SLP_TREE_DEF_TYPE (grandchild) != vect_external_def) - break; -- if (!grandchild) -+ if (!grandchild -+ && (vect_update_all_shared_vectypes -+ (oprnd_info->def_stmts))) - { - /* Roll back. */ - this_tree_size = old_tree_size; -@@ -2026,8 +2106,8 @@ vect_analyze_slp_instance (vec_info *vin - if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) - { - scalar_type = TREE_TYPE (DR_REF (dr)); -- vectype = get_vectype_for_scalar_type (vinfo, scalar_type); - group_size = DR_GROUP_SIZE (stmt_info); -+ vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size); - } - else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info)) - { -@@ -2669,22 +2749,13 @@ vect_slp_analyze_node_operations_1 (vec_ - Memory accesses already got their vector type assigned - in vect_analyze_data_refs. */ - bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); -- if (bb_vinfo -- && ! STMT_VINFO_DATA_REF (stmt_info)) -+ if (bb_vinfo && STMT_VINFO_VECTYPE (stmt_info) == boolean_type_node) - { -- tree vectype, nunits_vectype; -- if (!vect_get_vector_types_for_stmt (stmt_info, &vectype, -- &nunits_vectype)) -- /* We checked this when building the node. */ -- gcc_unreachable (); -- if (vectype == boolean_type_node) -- { -- vectype = vect_get_mask_type_for_stmt (stmt_info); -- if (!vectype) -- /* vect_get_mask_type_for_stmt has already explained the -- failure. */ -- return false; -- } -+ tree vectype = vect_get_mask_type_for_stmt (stmt_info, node); -+ if (!vectype) -+ /* vect_get_mask_type_for_stmt has already explained the -+ failure. */ -+ return false; - - stmt_vec_info sstmt_info; - unsigned int i; -@@ -3585,7 +3656,7 @@ vect_get_constant_vectors (slp_tree op_n - && vect_mask_constant_operand_p (stmt_vinfo)) - vector_type = truth_type_for (stmt_vectype); - else -- vector_type = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op)); -+ vector_type = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), op_node); - - unsigned int number_of_vectors - = vect_get_num_vectors (SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) -diff -Nurp a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c ---- a/gcc/tree-vect-stmts.c 2020-12-20 18:46:17.707633230 +0800 -+++ b/gcc/tree-vect-stmts.c 2020-12-20 18:48:11.227633230 +0800 -@@ -798,7 +798,7 @@ vect_prologue_cost_for_slp_op (slp_tree - /* Without looking at the actual initializer a vector of - constants can be implemented as load from the constant pool. - When all elements are the same we can use a splat. */ -- tree vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op)); -+ tree vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), node); - unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length (); - unsigned num_vects_to_check; - unsigned HOST_WIDE_INT const_nunits; -@@ -3308,7 +3308,7 @@ vectorizable_call (stmt_vec_info stmt_in - /* If all arguments are external or constant defs, infer the vector type - from the scalar type. */ - if (!vectype_in) -- vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type); -+ vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node); - if (vec_stmt) - gcc_assert (vectype_in); - if (!vectype_in) -@@ -4106,7 +4106,8 @@ vectorizable_simd_clone_call (stmt_vec_i - && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR) - { - tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i)); -- arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type); -+ arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type, -+ slp_node); - if (arginfo[i].vectype == NULL - || (simd_clone_subparts (arginfo[i].vectype) - > bestn->simdclone->simdlen)) -@@ -4805,7 +4806,7 @@ vectorizable_conversion (stmt_vec_info s - /* If op0 is an external or constant def, infer the vector type - from the scalar type. */ - if (!vectype_in) -- vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type); -+ vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node); - if (vec_stmt) - gcc_assert (vectype_in); - if (!vectype_in) -@@ -5558,7 +5559,7 @@ vectorizable_shift (stmt_vec_info stmt_i - /* If op0 is an external or constant def, infer the vector type - from the scalar type. */ - if (!vectype) -- vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0)); -+ vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node); - if (vec_stmt) - gcc_assert (vectype); - if (!vectype) -@@ -5656,7 +5657,8 @@ vectorizable_shift (stmt_vec_info stmt_i - "vector/vector shift/rotate found.\n"); - - if (!op1_vectype) -- op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1)); -+ op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1), -+ slp_node); - incompatible_op1_vectype_p - = (op1_vectype == NULL_TREE - || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype), -@@ -6000,7 +6002,8 @@ vectorizable_operation (stmt_vec_info st - vectype = vectype_out; - } - else -- vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0)); -+ vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), -+ slp_node); - } - if (vec_stmt) - gcc_assert (vectype); -@@ -8903,7 +8906,7 @@ vectorizable_load (stmt_vec_info stmt_in - condition operands are supportable using vec_is_simple_use. */ - - static bool --vect_is_simple_cond (tree cond, vec_info *vinfo, -+vect_is_simple_cond (tree cond, vec_info *vinfo, slp_tree slp_node, - tree *comp_vectype, enum vect_def_type *dts, - tree vectype) - { -@@ -8966,7 +8969,8 @@ vect_is_simple_cond (tree cond, vec_info - scalar_type = build_nonstandard_integer_type - (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))), - TYPE_UNSIGNED (scalar_type)); -- *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type); -+ *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type, -+ slp_node); - } - - return true; -@@ -9073,7 +9077,7 @@ vectorizable_condition (stmt_vec_info st - then_clause = gimple_assign_rhs2 (stmt); - else_clause = gimple_assign_rhs3 (stmt); - -- if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo, -+ if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo, slp_node, - &comp_vectype, &dts[0], slp_node ? NULL : vectype) - || !comp_vectype) - return false; -@@ -9564,7 +9568,8 @@ vectorizable_comparison (stmt_vec_info s - /* Invariant comparison. */ - if (!vectype) - { -- vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1)); -+ vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1), -+ slp_node); - if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits)) - return false; - } -@@ -10322,31 +10327,93 @@ get_related_vectype_for_scalar_type (mac - /* Function get_vectype_for_scalar_type. - - Returns the vector type corresponding to SCALAR_TYPE as supported -- by the target. */ -+ by the target. If GROUP_SIZE is nonzero and we're performing BB -+ vectorization, make sure that the number of elements in the vector -+ is no bigger than GROUP_SIZE. */ - - tree --get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type) -+get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, -+ unsigned int group_size) - { -+ /* For BB vectorization, we should always have a group size once we've -+ constructed the SLP tree; the only valid uses of zero GROUP_SIZEs -+ are tentative requests during things like early data reference -+ analysis and pattern recognition. */ -+ if (is_a (vinfo)) -+ gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0); -+ else -+ group_size = 0; -+ - tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode, - scalar_type); - if (vectype && vinfo->vector_mode == VOIDmode) - vinfo->vector_mode = TYPE_MODE (vectype); - -+ /* Register the natural choice of vector type, before the group size -+ has been applied. */ - if (vectype) - vinfo->used_vector_modes.add (TYPE_MODE (vectype)); - -+ /* If the natural choice of vector type doesn't satisfy GROUP_SIZE, -+ try again with an explicit number of elements. */ -+ if (vectype -+ && group_size -+ && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size)) -+ { -+ /* Start with the biggest number of units that fits within -+ GROUP_SIZE and halve it until we find a valid vector type. -+ Usually either the first attempt will succeed or all will -+ fail (in the latter case because GROUP_SIZE is too small -+ for the target), but it's possible that a target could have -+ a hole between supported vector types. -+ -+ If GROUP_SIZE is not a power of 2, this has the effect of -+ trying the largest power of 2 that fits within the group, -+ even though the group is not a multiple of that vector size. -+ The BB vectorizer will then try to carve up the group into -+ smaller pieces. */ -+ unsigned int nunits = 1 << floor_log2 (group_size); -+ do -+ { -+ vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode, -+ scalar_type, nunits); -+ nunits /= 2; -+ } -+ while (nunits > 1 && !vectype); -+ } -+ - return vectype; - } - -+/* Return the vector type corresponding to SCALAR_TYPE as supported -+ by the target. NODE, if nonnull, is the SLP tree node that will -+ use the returned vector type. */ -+ -+tree -+get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node) -+{ -+ unsigned int group_size = 0; -+ if (node) -+ { -+ group_size = SLP_TREE_SCALAR_OPS (node).length (); -+ if (group_size == 0) -+ group_size = SLP_TREE_SCALAR_STMTS (node).length (); -+ } -+ return get_vectype_for_scalar_type (vinfo, scalar_type, group_size); -+} -+ - /* Function get_mask_type_for_scalar_type. - - Returns the mask type corresponding to a result of comparison -- of vectors of specified SCALAR_TYPE as supported by target. */ -+ of vectors of specified SCALAR_TYPE as supported by target. -+ NODE, if nonnull, is the SLP tree node that will use the returned -+ vector type. */ - - tree --get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type) -+get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type, -+ slp_tree node) - { -- tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type); -+ tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, node); - - if (!vectype) - return NULL; -@@ -11033,6 +11100,9 @@ vect_gen_while_not (gimple_seq *seq, tre - - /* Try to compute the vector types required to vectorize STMT_INFO, - returning true on success and false if vectorization isn't possible. -+ If GROUP_SIZE is nonzero and we're performing BB vectorization, -+ take sure that the number of elements in the vectors is no bigger -+ than GROUP_SIZE. - - On success: - -@@ -11050,11 +11120,21 @@ vect_gen_while_not (gimple_seq *seq, tre - opt_result - vect_get_vector_types_for_stmt (stmt_vec_info stmt_info, - tree *stmt_vectype_out, -- tree *nunits_vectype_out) -+ tree *nunits_vectype_out, -+ unsigned int group_size) - { - vec_info *vinfo = stmt_info->vinfo; - gimple *stmt = stmt_info->stmt; - -+ /* For BB vectorization, we should always have a group size once we've -+ constructed the SLP tree; the only valid uses of zero GROUP_SIZEs -+ are tentative requests during things like early data reference -+ analysis and pattern recognition. */ -+ if (is_a (vinfo)) -+ gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0); -+ else -+ group_size = 0; -+ - *stmt_vectype_out = NULL_TREE; - *nunits_vectype_out = NULL_TREE; - -@@ -11085,7 +11165,7 @@ vect_get_vector_types_for_stmt (stmt_vec - - tree vectype; - tree scalar_type = NULL_TREE; -- if (STMT_VINFO_VECTYPE (stmt_info)) -+ if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info)) - { - *stmt_vectype_out = vectype = STMT_VINFO_VECTYPE (stmt_info); - if (dump_enabled_p ()) -@@ -11094,15 +11174,17 @@ vect_get_vector_types_for_stmt (stmt_vec - } - else - { -- gcc_assert (!STMT_VINFO_DATA_REF (stmt_info)); -- if (gimple_call_internal_p (stmt, IFN_MASK_STORE)) -+ if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info)) -+ scalar_type = TREE_TYPE (DR_REF (dr)); -+ else if (gimple_call_internal_p (stmt, IFN_MASK_STORE)) - scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3)); - else - scalar_type = TREE_TYPE (gimple_get_lhs (stmt)); - - /* Pure bool ops don't participate in number-of-units computation. - For comparisons use the types being compared. */ -- if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type) -+ if (!STMT_VINFO_DATA_REF (stmt_info) -+ && VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type) - && is_gimple_assign (stmt) - && gimple_assign_rhs_code (stmt) != COND_EXPR) - { -@@ -11122,9 +11204,16 @@ vect_get_vector_types_for_stmt (stmt_vec - } - - if (dump_enabled_p ()) -- dump_printf_loc (MSG_NOTE, vect_location, -- "get vectype for scalar type: %T\n", scalar_type); -- vectype = get_vectype_for_scalar_type (vinfo, scalar_type); -+ { -+ if (group_size) -+ dump_printf_loc (MSG_NOTE, vect_location, -+ "get vectype for scalar type (group size %d):" -+ " %T\n", group_size, scalar_type); -+ else -+ dump_printf_loc (MSG_NOTE, vect_location, -+ "get vectype for scalar type: %T\n", scalar_type); -+ } -+ vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size); - if (!vectype) - return opt_result::failure_at (stmt, - "not vectorized:" -@@ -11155,7 +11244,8 @@ vect_get_vector_types_for_stmt (stmt_vec - dump_printf_loc (MSG_NOTE, vect_location, - "get vectype for smallest scalar type: %T\n", - scalar_type); -- nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type); -+ nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type, -+ group_size); - if (!nunits_vectype) - return opt_result::failure_at - (stmt, "not vectorized: unsupported data-type %T\n", -@@ -11183,10 +11273,11 @@ vect_get_vector_types_for_stmt (stmt_vec - - /* Try to determine the correct vector type for STMT_INFO, which is a - statement that produces a scalar boolean result. Return the vector -- type on success, otherwise return NULL_TREE. */ -+ type on success, otherwise return NULL_TREE. NODE, if nonnull, -+ is the SLP tree node that will use the returned vector type. */ - - opt_tree --vect_get_mask_type_for_stmt (stmt_vec_info stmt_info) -+vect_get_mask_type_for_stmt (stmt_vec_info stmt_info, slp_tree node) - { - vec_info *vinfo = stmt_info->vinfo; - gimple *stmt = stmt_info->stmt; -@@ -11198,7 +11289,7 @@ vect_get_mask_type_for_stmt (stmt_vec_in - && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt)))) - { - scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt)); -- mask_type = get_mask_type_for_scalar_type (vinfo, scalar_type); -+ mask_type = get_mask_type_for_scalar_type (vinfo, scalar_type, node); - - if (!mask_type) - return opt_tree::failure_at (stmt, -diff -Nurp a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h ---- a/gcc/tree-vectorizer.h 2020-12-20 18:46:17.851633230 +0800 -+++ b/gcc/tree-vectorizer.h 2020-12-20 18:48:11.227633230 +0800 -@@ -1618,8 +1618,9 @@ extern void vect_update_inits_of_drs (lo - /* In tree-vect-stmts.c. */ - extern tree get_related_vectype_for_scalar_type (machine_mode, tree, - poly_uint64 = 0); --extern tree get_vectype_for_scalar_type (vec_info *, tree); --extern tree get_mask_type_for_scalar_type (vec_info *, tree); -+extern tree get_vectype_for_scalar_type (vec_info *, tree, unsigned int = 0); -+extern tree get_vectype_for_scalar_type (vec_info *, tree, slp_tree); -+extern tree get_mask_type_for_scalar_type (vec_info *, tree, slp_tree = 0); - extern tree get_same_sized_vectype (tree, tree); - extern bool vect_chooses_same_modes_p (vec_info *, machine_mode); - extern bool vect_get_loop_mask_type (loop_vec_info); -@@ -1671,8 +1672,8 @@ extern void optimize_mask_stores (struct - extern gcall *vect_gen_while (tree, tree, tree); - extern tree vect_gen_while_not (gimple_seq *, tree, tree, tree); - extern opt_result vect_get_vector_types_for_stmt (stmt_vec_info, tree *, -- tree *); --extern opt_tree vect_get_mask_type_for_stmt (stmt_vec_info); -+ tree *, unsigned int = 0); -+extern opt_tree vect_get_mask_type_for_stmt (stmt_vec_info, slp_tree = 0); - - /* In tree-vect-data-refs.c. */ - extern bool vect_can_force_dr_alignment_p (const_tree, poly_uint64); diff --git a/Fix-EXTRACT_LAST_REDUCTION-segfault.patch b/Fix-EXTRACT_LAST_REDUCTION-segfault.patch deleted file mode 100644 index aa3b320dff674adb0da7098b1eec04e4deafbbb9..0000000000000000000000000000000000000000 --- a/Fix-EXTRACT_LAST_REDUCTION-segfault.patch +++ /dev/null @@ -1,82 +0,0 @@ -This backport contains 2 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-Fix-EXTRACT_LAST_REDUCTION-handling-of-pattern-stmts.patch -9ec35478ccf0f3539988a054b7996278706a7710 - -0001-Fix-EXTRACT_LAST_REDUCTION-segfault.patch -dc176c3ccd6a8cd3f809f3c1549ad00674061eb5 - -diff -Nurp a/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-6.c b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-6.c ---- a/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-6.c 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-6.c 2020-12-14 21:16:26.492000000 -0500 -@@ -0,0 +1,10 @@ -+/* { dg-do compile } */ -+ -+int -+f (int *y) -+{ -+ int res = 0; -+ for (int i = 0; i < 100; ++i) -+ res = (y[i] & 1) == 0 && (y[i] < 10) ? res : 1; -+ return res; -+} -diff -Nurp a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c ---- a/gcc/tree-vect-stmts.c 2020-12-14 21:15:27.004000000 -0500 -+++ b/gcc/tree-vect-stmts.c 2020-12-14 21:16:26.492000000 -0500 -@@ -1777,9 +1777,10 @@ vect_finish_stmt_generation_1 (stmt_vec_ - stmt_vec_info - vect_finish_replace_stmt (stmt_vec_info stmt_info, gimple *vec_stmt) - { -- gcc_assert (gimple_get_lhs (stmt_info->stmt) == gimple_get_lhs (vec_stmt)); -+ gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt; -+ gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt)); - -- gimple_stmt_iterator gsi = gsi_for_stmt (stmt_info->stmt); -+ gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt); - gsi_replace (&gsi, vec_stmt, true); - - return vect_finish_stmt_generation_1 (stmt_info, vec_stmt); -@@ -9118,10 +9119,12 @@ vectorizable_condition (stmt_vec_info st - if (new_code == ERROR_MARK) - must_invert_cmp_result = true; - else -- cond_code = new_code; -+ { -+ cond_code = new_code; -+ /* Make sure we don't accidentally use the old condition. */ -+ cond_expr = NULL_TREE; -+ } - } -- /* Make sure we don't accidentally use the old condition. */ -- cond_expr = NULL_TREE; - std::swap (then_clause, else_clause); - } - -@@ -9426,20 +9429,21 @@ vectorizable_condition (stmt_vec_info st - vect_finish_stmt_generation (stmt_info, new_stmt, gsi); - vec_compare = vec_compare_name; - } -+ gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt; -+ tree lhs = gimple_get_lhs (old_stmt); - gcall *new_stmt = gimple_build_call_internal - (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare, - vec_then_clause); -- gimple_call_set_lhs (new_stmt, scalar_dest); -- SSA_NAME_DEF_STMT (scalar_dest) = new_stmt; -- if (stmt_info->stmt == gsi_stmt (*gsi)) -+ gimple_call_set_lhs (new_stmt, lhs); -+ SSA_NAME_DEF_STMT (lhs) = new_stmt; -+ if (old_stmt == gsi_stmt (*gsi)) - new_stmt_info = vect_finish_replace_stmt (stmt_info, new_stmt); - else - { - /* In this case we're moving the definition to later in the - block. That doesn't matter because the only uses of the - lhs are in phi statements. */ -- gimple_stmt_iterator old_gsi -- = gsi_for_stmt (stmt_info->stmt); -+ gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt); - gsi_remove (&old_gsi, true); - new_stmt_info - = vect_finish_stmt_generation (stmt_info, new_stmt, gsi); diff --git a/Fix-PR94185.patch b/Fix-PR94185.patch deleted file mode 100644 index 370ec0abbb49e8cddbe42e1af79372ceba4bc7b8..0000000000000000000000000000000000000000 --- a/Fix-PR94185.patch +++ /dev/null @@ -1,66 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-Fix-PR94185-Do-not-reuse-insn-alternative-after-chan.patch -bae7b38cf8a21e068ad5c0bab089dedb78af3346 - -diff -uprN a/gcc/lra-spills.c b/gcc/lra-spills.c ---- a/gcc/lra-spills.c -+++ b/gcc/lra-spills.c -@@ -427,7 +427,17 @@ remove_pseudos (rtx *loc, rtx_insn *insn) - and avoid LRA cycling in case of subreg memory reload. */ - res = remove_pseudos (&SUBREG_REG (*loc), insn); - if (GET_CODE (SUBREG_REG (*loc)) == MEM) -- alter_subreg (loc, false); -+ { -+ alter_subreg (loc, false); -+ if (GET_CODE (*loc) == MEM) -+ { -+ lra_get_insn_recog_data (insn)->used_insn_alternative = -1; -+ if (lra_dump_file != NULL) -+ fprintf (lra_dump_file, -+ "Memory subreg was simplified in in insn #%u\n", -+ INSN_UID (insn)); -+ } -+ } - return res; - } - else if (code == REG && (i = REGNO (*loc)) >= FIRST_PSEUDO_REGISTER -diff -uprN a/gcc/testsuite/g++.target/i386/pr94185.C b/gcc/testsuite/g++.target/i386/pr94185.C -new file mode 100644 ---- /dev/null -+++ b/gcc/testsuite/g++.target/i386/pr94185.C -@@ -0,0 +1,33 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -fPIE -fstack-protector-strong" } */ -+ -+struct a { -+ int b; -+ int c(); -+ a() : b(c()) {} -+ ~a(); -+ char *e(); -+}; -+struct f { -+ void g(int); -+}; -+struct ar { -+ int au[256]; -+ f h(int); -+} bb; -+a i(); -+a j(int); -+long k(int, ar); -+int d; -+void l(char *, ar m, long n) { -+ switch (m.au[d]) -+ case 0: -+ n &= 4294967295; -+ bb.h(0).g(n); -+} -+void o() { -+ ar bd; -+ a bh, bi, attrname = j(0) = i(); -+ int be = k(0, bd); -+ l(attrname.e(), bd, be); -+} diff --git a/Fix-interaction-between-aka-changes-and-DR1558.patch b/Fix-interaction-between-aka-changes-and-DR1558.patch deleted file mode 100644 index edf7dd44cfd5eb41c4a0c4c4eb4ac74e42813c7a..0000000000000000000000000000000000000000 --- a/Fix-interaction-between-aka-changes-and-DR1558.patch +++ /dev/null @@ -1,98 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-Fix-interaction-between-aka-changes-and-DR1558.patch -ae83b9deb87787371cd94b4417e160d41dd0322c - -diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h -index adc021b2a5c..42afe1bd5cb 100644 ---- a/gcc/cp/cp-tree.h -+++ b/gcc/cp/cp-tree.h -@@ -5759,8 +5759,13 @@ enum auto_deduction_context - - STF_USER_VISIBLE: use heuristics to try to avoid stripping user-facing - aliases of internal details. This is intended for diagnostics, -- where it should (for example) give more useful "aka" types. */ -+ where it should (for example) give more useful "aka" types. -+ -+ STF_STRIP_DEPENDENT: allow the stripping of aliases with dependent -+ template parameters, relying on code elsewhere to report any -+ appropriate diagnostics. */ - const unsigned int STF_USER_VISIBLE = 1U; -+const unsigned int STF_STRIP_DEPENDENT = 1U << 1; - - /* Returns the TEMPLATE_DECL associated to a TEMPLATE_TEMPLATE_PARM - node. */ -diff --git a/gcc/cp/tree.c b/gcc/cp/tree.c -index ba635d4ddbd..6c39c004b01 100644 ---- a/gcc/cp/tree.c -+++ b/gcc/cp/tree.c -@@ -1488,7 +1488,8 @@ strip_typedefs (tree t, bool *remove_attributes, unsigned int flags) - if (t == TYPE_CANONICAL (t)) - return t; - -- if (dependent_alias_template_spec_p (t)) -+ if (!(flags & STF_STRIP_DEPENDENT) -+ && dependent_alias_template_spec_p (t)) - /* DR 1558: However, if the template-id is dependent, subsequent - template argument substitution still applies to the template-id. */ - return t; -@@ -1673,7 +1674,8 @@ strip_typedefs (tree t, bool *remove_attributes, unsigned int flags) - && !user_facing_original_type_p (t)) - return t; - result = strip_typedefs (DECL_ORIGINAL_TYPE (TYPE_NAME (t)), -- remove_attributes, flags); -+ remove_attributes, -+ flags | STF_STRIP_DEPENDENT); - } - else - result = TYPE_MAIN_VARIANT (t); -diff --git a/gcc/testsuite/g++.dg/cpp0x/alias-decl-pr92206-1.C b/gcc/testsuite/g++.dg/cpp0x/alias-decl-pr92206-1.C -new file mode 100644 -index 00000000000..c3f7b1977db ---- /dev/null -+++ b/gcc/testsuite/g++.dg/cpp0x/alias-decl-pr92206-1.C -@@ -0,0 +1,9 @@ -+// { dg-require-effective-target c++11 } -+ -+template struct A {}; -+template using alias1 = A; -+template class B { -+ using alias2 = alias1>; // { dg-error {no type named 'value'} } -+ A a; // { dg-bogus {no type named 'value'} } -+}; -+B b; -diff --git a/gcc/testsuite/g++.dg/cpp0x/alias-decl-pr92206-2.C b/gcc/testsuite/g++.dg/cpp0x/alias-decl-pr92206-2.C -new file mode 100644 -index 00000000000..31d73d6bad3 ---- /dev/null -+++ b/gcc/testsuite/g++.dg/cpp0x/alias-decl-pr92206-2.C -@@ -0,0 +1,14 @@ -+// { dg-require-effective-target c++11 } -+ -+template struct A; -+class Vector { -+ template struct TypeIsGCThing { -+ template ::Type> using Vector = Vector; -+ struct B; -+ template class ContainerIter { -+ using Action = B; -+ using ActionVector = Vector; -+ ContainerIter a; -+ }; -+ }; -+}; -diff --git a/gcc/testsuite/g++.dg/cpp0x/alias-decl-pr92206-3.C b/gcc/testsuite/g++.dg/cpp0x/alias-decl-pr92206-3.C -new file mode 100644 -index 00000000000..6698a366411 ---- /dev/null -+++ b/gcc/testsuite/g++.dg/cpp0x/alias-decl-pr92206-3.C -@@ -0,0 +1,8 @@ -+// { dg-require-effective-target c++11 } -+ -+template void a(); -+template struct b; -+template using c = int; -+template )> using f = e; -+template using g = f; -+template c>::i> j; diff --git a/Fix-type-mismatch-in-SLPed-constructors.patch b/Fix-type-mismatch-in-SLPed-constructors.patch deleted file mode 100644 index b0732999f7899041f75c4ff6a375142dcc8b2b1c..0000000000000000000000000000000000000000 --- a/Fix-type-mismatch-in-SLPed-constructors.patch +++ /dev/null @@ -1,118 +0,0 @@ -This backport contains 2 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-Fix-type-mismatch-in-SLPed-constructors.patch -86c3a7d891f9f175d09d61f5ce163c6dc5ce681f -0001-re-PR-fortran-91003-ICE-when-compiling-LAPACK-CGEGV-.patch -d005f61e7a0dbb2c991f13b4b61b1a27ca2d8b73 - -diff -urpN a/gcc/testsuite/gfortran.dg/pr91003.f90 b/gcc/testsuite/gfortran.dg/pr91003.f90 ---- a/gcc/testsuite/gfortran.dg/pr91003.f90 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/gfortran.dg/pr91003.f90 2021-02-22 03:02:39.484000000 -0500 -@@ -0,0 +1,33 @@ -+! { dg-do compile } -+! { dg-options "-Ofast" } -+ SUBROUTINE FOO(N, A, B, C, D, E, F, G) -+ COMPLEX A(*) -+ LOGICAL H -+ INTEGER G -+ REAL I, C, J, F, F1, F2, K, E, L, M, B, D -+ DO JC = 1, N -+ K = F*REAL(A(JC)) -+ Z = F*AIMAG(A(JC)) -+ H = .FALSE. -+ L = G -+ IF(ABS(Z).LT.D .AND. I.GE. MAX(D, B*C, B*J)) THEN -+ H = .TRUE. -+ L = (D / F1) / MAX(D, F2*I) -+ END IF -+ IF(ABS(K).LT.D .AND. C.GE. MAX(D, B*I, B*J)) THEN -+ L = MAX(L, (D / F1) / MAX(D, F2*C)) -+ END IF -+ IF(ABS(E).LT.D .AND. J.GE. MAX(D, B*C, B*I)) THEN -+ H = .TRUE. -+ L = MAX(L, (D / BNRM1) / MAX(D, BNRM2*J)) -+ END IF -+ IF(H) THEN -+ M = (L*D)*MAX(ABS(K), ABS(Z), ABS(E)) -+ END IF -+ IF(H) THEN -+ K = (L*REAL(A(JC)))*F -+ Z = (L*AIMAG(A(JC)))*F -+ END IF -+ A(JC) = CMPLX(K, Z) -+ END DO -+ END -diff -urpN a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c ---- a/gcc/tree-vect-slp.c 2021-02-22 02:56:51.328000000 -0500 -+++ b/gcc/tree-vect-slp.c 2021-02-22 03:03:22.676000000 -0500 -@@ -3442,7 +3442,7 @@ vect_slp_bb (basic_block bb) - /* Return 1 if vector type STMT_VINFO is a boolean vector. */ - - static bool --vect_mask_constant_operand_p (stmt_vec_info stmt_vinfo) -+vect_mask_constant_operand_p (stmt_vec_info stmt_vinfo, unsigned op_num) - { - enum tree_code code = gimple_expr_code (stmt_vinfo->stmt); - tree op, vectype; -@@ -3467,9 +3467,17 @@ vect_mask_constant_operand_p (stmt_vec_i - tree cond = gimple_assign_rhs1 (stmt); - - if (TREE_CODE (cond) == SSA_NAME) -- op = cond; -+ { -+ if (op_num > 0) -+ return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo)); -+ op = cond; -+ } - else -- op = TREE_OPERAND (cond, 0); -+ { -+ if (op_num > 1) -+ return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo)); -+ op = TREE_OPERAND (cond, 0); -+ } - - if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype)) - gcc_unreachable (); -@@ -3600,9 +3608,10 @@ duplicate_and_interleave (vec_info *vinf - operands. */ - - static void --vect_get_constant_vectors (slp_tree op_node, slp_tree slp_node, -+vect_get_constant_vectors (slp_tree slp_node, unsigned op_num, - vec *vec_oprnds) - { -+ slp_tree op_node = SLP_TREE_CHILDREN (slp_node)[op_num]; - stmt_vec_info stmt_vinfo = SLP_TREE_SCALAR_STMTS (slp_node)[0]; - vec_info *vinfo = stmt_vinfo->vinfo; - unsigned HOST_WIDE_INT nunits; -@@ -3624,7 +3633,7 @@ vect_get_constant_vectors (slp_tree op_n - /* Check if vector type is a boolean vector. */ - tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo); - if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op)) -- && vect_mask_constant_operand_p (stmt_vinfo)) -+ && vect_mask_constant_operand_p (stmt_vinfo, op_num)) - vector_type = truth_type_for (stmt_vectype); - else - vector_type = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), op_node); -@@ -3848,7 +3857,7 @@ vect_get_slp_defs (slp_tree slp_node, ve - vect_get_slp_vect_defs (child, &vec_defs); - } - else -- vect_get_constant_vectors (child, slp_node, &vec_defs); -+ vect_get_constant_vectors (slp_node, i, &vec_defs); - - vec_oprnds->quick_push (vec_defs); - } -@@ -4269,6 +4278,10 @@ vectorize_slp_instance_root_stmt (slp_tr - { - tree vect_lhs = gimple_get_lhs (child_stmt_info->stmt); - tree root_lhs = gimple_get_lhs (instance->root_stmt->stmt); -+ if (!useless_type_conversion_p (TREE_TYPE (root_lhs), -+ TREE_TYPE (vect_lhs))) -+ vect_lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (root_lhs), -+ vect_lhs); - rstmt = gimple_build_assign (root_lhs, vect_lhs); - break; - } diff --git a/Fix-up-push_partial_def-little-endian-bitfield.patch b/Fix-up-push_partial_def-little-endian-bitfield.patch deleted file mode 100644 index b707a36f8e4752583ef095e52c9601678151f367..0000000000000000000000000000000000000000 --- a/Fix-up-push_partial_def-little-endian-bitfield.patch +++ /dev/null @@ -1,51 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -c69325a5db450dbac198f76f1162734af05a1061 -0001-sccvn-Fix-up-push_partial_def-little-endian-bitfield.patch - -diff -urpN a/gcc/testsuite/gcc.c-torture/execute/pr97764.c b/gcc/testsuite/gcc.c-torture/execute/pr97764.c ---- a/gcc/testsuite/gcc.c-torture/execute/pr97764.c 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/gcc.c-torture/execute/pr97764.c 2020-12-07 03:42:13.404000000 -0500 -@@ -0,0 +1,14 @@ -+/* PR tree-optimization/97764 */ -+/* { dg-require-effective-target int32plus } */ -+ -+struct S { int b : 3; int c : 28; int d : 1; }; -+ -+int -+main () -+{ -+ struct S e = {}; -+ e.c = -1; -+ if (e.d) -+ __builtin_abort (); -+ return 0; -+} -diff -urpN a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c ---- a/gcc/tree-ssa-sccvn.c 2020-12-07 03:43:37.792000000 -0500 -+++ b/gcc/tree-ssa-sccvn.c 2020-12-07 03:42:13.404000000 -0500 -@@ -2013,12 +2013,12 @@ vn_walk_cb_data::push_partial_def (const - } - else - { -- size = MIN (size, (HOST_WIDE_INT) needed_len * BITS_PER_UNIT); - if (pd.offset >= 0) - { - /* LSB of this_buffer[0] byte should be at pd.offset bits - in buffer. */ - unsigned int msk; -+ size = MIN (size, (HOST_WIDE_INT) needed_len * BITS_PER_UNIT); - amnt = pd.offset % BITS_PER_UNIT; - if (amnt) - shift_bytes_in_array_left (this_buffer, len + 1, amnt); -@@ -2046,6 +2046,9 @@ vn_walk_cb_data::push_partial_def (const - { - amnt = (unsigned HOST_WIDE_INT) pd.offset % BITS_PER_UNIT; - if (amnt) -+ size -= BITS_PER_UNIT - amnt; -+ size = MIN (size, (HOST_WIDE_INT) needed_len * BITS_PER_UNIT); -+ if (amnt) - shift_bytes_in_array_left (this_buffer, len + 1, amnt); - } - memcpy (p, this_buffer + (amnt != 0), size / BITS_PER_UNIT); diff --git a/Fix-zero-masking-for-vcvtps2ph.patch b/Fix-zero-masking-for-vcvtps2ph.patch deleted file mode 100644 index df8c5a857ad482bcc498ea243ee7db37823f0bd9..0000000000000000000000000000000000000000 --- a/Fix-zero-masking-for-vcvtps2ph.patch +++ /dev/null @@ -1,139 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-Fix-zero-masking-for-vcvtps2ph-when-dest-operand-is-.patch -43088bb4dadd3d14b6b594c5f9363fe879f3d7f7 - -diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md -index 87354451c58..7815d77bcbf 100644 ---- a/gcc/config/i386/sse.md -+++ b/gcc/config/i386/sse.md -@@ -21775,19 +21775,19 @@ - (set_attr "prefix" "maybe_evex") - (set_attr "mode" "V4SF")]) - --(define_insn "*vcvtps2ph_store" -+(define_insn "*vcvtps2ph_store" - [(set (match_operand:V4HI 0 "memory_operand" "=m") - (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v") - (match_operand:SI 2 "const_0_to_255_operand" "N")] - UNSPEC_VCVTPS2PH))] - "TARGET_F16C || TARGET_AVX512VL" -- "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}" -+ "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "ssecvt") - (set_attr "prefix" "maybe_evex") - (set_attr "mode" "V4SF")]) - - (define_insn "vcvtps2ph256" -- [(set (match_operand:V8HI 0 "nonimmediate_operand" "=vm") -+ [(set (match_operand:V8HI 0 "register_operand" "=v") - (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "v") - (match_operand:SI 2 "const_0_to_255_operand" "N")] - UNSPEC_VCVTPS2PH))] -@@ -21798,8 +21798,20 @@ - (set_attr "btver2_decode" "vector") - (set_attr "mode" "V8SF")]) - -+(define_insn "*vcvtps2ph256" -+ [(set (match_operand:V8HI 0 "memory_operand" "=m") -+ (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "v") -+ (match_operand:SI 2 "const_0_to_255_operand" "N")] -+ UNSPEC_VCVTPS2PH))] -+ "TARGET_F16C || TARGET_AVX512VL" -+ "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}" -+ [(set_attr "type" "ssecvt") -+ (set_attr "prefix" "maybe_evex") -+ (set_attr "btver2_decode" "vector") -+ (set_attr "mode" "V8SF")]) -+ - (define_insn "avx512f_vcvtps2ph512" -- [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm") -+ [(set (match_operand:V16HI 0 "register_operand" "=v") - (unspec:V16HI - [(match_operand:V16SF 1 "register_operand" "v") - (match_operand:SI 2 "const_0_to_255_operand" "N")] -@@ -21810,6 +21822,18 @@ - (set_attr "prefix" "evex") - (set_attr "mode" "V16SF")]) - -+(define_insn "*avx512f_vcvtps2ph512" -+ [(set (match_operand:V16HI 0 "memory_operand" "=m") -+ (unspec:V16HI -+ [(match_operand:V16SF 1 "register_operand" "v") -+ (match_operand:SI 2 "const_0_to_255_operand" "N")] -+ UNSPEC_VCVTPS2PH))] -+ "TARGET_AVX512F" -+ "vcvtps2ph\t{%2, %1, %0|%0, %1, %2}" -+ [(set_attr "type" "ssecvt") -+ (set_attr "prefix" "evex") -+ (set_attr "mode" "V16SF")]) -+ - ;; For gather* insn patterns - (define_mode_iterator VEC_GATHER_MODE - [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF]) -diff --git a/gcc/config/i386/subst.md b/gcc/config/i386/subst.md -index a5ca144c7f7..58ea9dc83e2 100644 ---- a/gcc/config/i386/subst.md -+++ b/gcc/config/i386/subst.md -@@ -73,6 +73,18 @@ - (match_operand:SUBST_V 2 "nonimm_or_0_operand" "0C") - (match_operand: 3 "register_operand" "Yk")))]) - -+(define_subst_attr "merge_mask_name" "merge_mask" "" "_merge_mask") -+(define_subst_attr "merge_mask_operand3" "merge_mask" "" "%{%3%}") -+(define_subst "merge_mask" -+ [(set (match_operand:SUBST_V 0) -+ (match_operand:SUBST_V 1))] -+ "TARGET_AVX512F" -+ [(set (match_dup 0) -+ (vec_merge:SUBST_V -+ (match_dup 1) -+ (match_dup 0) -+ (match_operand: 2 "register_operand" "Yk")))]) -+ - (define_subst_attr "mask_scalar_merge_name" "mask_scalar_merge" "" "_mask") - (define_subst_attr "mask_scalar_merge_operand3" "mask_scalar_merge" "" "%{%3%}") - (define_subst_attr "mask_scalar_merge_operand4" "mask_scalar_merge" "" "%{%4%}") -diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtps2ph-pr95254.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtps2ph-pr95254.c -new file mode 100644 -index 00000000000..9e0da947368 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtps2ph-pr95254.c -@@ -0,0 +1,12 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -mavx512f" } */ -+ -+#include -+extern __m256i res; -+void -+foo (__m512 a, __mmask16 m) -+{ -+ res = _mm512_maskz_cvtps_ph (m, a, 10); -+} -+ -+/* { dg-final { scan-assembler-not "vcvtps2ph\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]\[^\n\]*res\[^\n\]*\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)"} } */ -diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr95254.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr95254.c -new file mode 100644 -index 00000000000..0c685ea66fd ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr95254.c -@@ -0,0 +1,18 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -mavx512vl -mavx512f" } */ -+ -+#include -+extern __m128i res; -+void -+foo (__m256 a, __mmask8 m) -+{ -+ res = _mm256_maskz_cvtps_ph (m, a, 10); -+} -+ -+void -+foo1 (__m128 a, __mmask8 m) -+{ -+ res = _mm_maskz_cvtps_ph (m, a, 10); -+} -+ -+/* { dg-final { scan-assembler-not "vcvtps2ph\[ \\t\]+\[^\{\n\]*%\[xy\]mm\[0-9\]\[^\n\]*res\[^\n\]*\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)"} } */ diff --git a/Handle-POLY_INT_CST-in-copy_reference_ops_from_ref.patch b/Handle-POLY_INT_CST-in-copy_reference_ops_from_ref.patch deleted file mode 100644 index 0c165bb3ff9958cc5f81257d7ce90e7502f2ddd7..0000000000000000000000000000000000000000 --- a/Handle-POLY_INT_CST-in-copy_reference_ops_from_ref.patch +++ /dev/null @@ -1,122 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-Handle-POLY_INT_CST-in-copy_reference_ops_from_ref.patch -74266b00112a85660b1e9f6e546f0a2c007dd062 - -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/deref_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/deref_2.c -new file mode 100644 -index 00000000000..08902983199 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/deref_2.c -@@ -0,0 +1,20 @@ -+/* { dg-options "-O2" } */ -+ -+#include -+#include -+ -+inline void -+copy (void *dst, svbool_t src) -+{ -+ memcpy (dst, &src, svcntd ()); -+} -+ -+uint64_t -+f (int32_t *x, int32_t *y) -+{ -+ union { uint64_t x; char c[8]; } u; -+ svbool_t pg = svptrue_b32 (); -+ copy (u.c, svcmpeq (pg, svld1 (pg, x), 0)); -+ copy (u.c + 4, svcmpeq (pg, svld1 (pg, y), 1)); -+ return u.x; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_8.c -new file mode 100644 -index 00000000000..c7b6663eff2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_8.c -@@ -0,0 +1,33 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+/* { dg-final { scan-assembler-not {\tptrue\t} } } */ -+/* { dg-final { scan-assembler-not {\tpfalse\t} } } */ -+ -+void -+test1 (svbool_t *ptr) -+{ -+ *ptr = svwhilele_b32_s32 (-4, 0); -+} -+ -+void -+test2 (svbool_t *ptr) -+{ -+ *ptr = svwhilele_b16_s64 (svcntb (), svcntb () + 8); -+} -+ -+void -+test3 (svbool_t *ptr) -+{ -+ *ptr = svwhilele_b64_s32 (0, 2); -+} -+ -+void -+test4 (svbool_t *ptr) -+{ -+ *ptr = svwhilele_b8_s64 (16, svcntb ()); -+} -+ -+/* { dg-final { scan-assembler-times {\twhilel[et]\t} 4 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilelt_4.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilelt_4.c -new file mode 100644 -index 00000000000..849cd45c67e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilelt_4.c -@@ -0,0 +1,33 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+/* { dg-final { scan-assembler-not {\tptrue\t} } } */ -+/* { dg-final { scan-assembler-not {\tpfalse\t} } } */ -+ -+void -+test1 (svbool_t *ptr) -+{ -+ *ptr = svwhilelt_b32_s32 (-4, 1); -+} -+ -+void -+test2 (svbool_t *ptr) -+{ -+ *ptr = svwhilelt_b16_s64 (svcntb (), svcntb () + 9); -+} -+ -+void -+test3 (svbool_t *ptr) -+{ -+ *ptr = svwhilelt_b64_s32 (0, 3); -+} -+ -+void -+test4 (svbool_t *ptr) -+{ -+ *ptr = svwhilelt_b8_s64 (16, svcntb ()); -+} -+ -+/* { dg-final { scan-assembler-times {\twhilel[et]\t} 4 } } */ -diff --git a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c -index 7465bedb349..f58dbe15047 100644 ---- a/gcc/tree-ssa-sccvn.c -+++ b/gcc/tree-ssa-sccvn.c -@@ -928,6 +928,7 @@ copy_reference_ops_from_ref (tree ref, vec *result) - break; - case STRING_CST: - case INTEGER_CST: -+ case POLY_INT_CST: - case COMPLEX_CST: - case VECTOR_CST: - case REAL_CST: diff --git a/Handle-POLY_INT_CSTs-in-declare_return_value.patch b/Handle-POLY_INT_CSTs-in-declare_return_value.patch deleted file mode 100644 index 36331263c84a3cbd46f803716cfd38626fbc4bb5..0000000000000000000000000000000000000000 --- a/Handle-POLY_INT_CSTs-in-declare_return_value.patch +++ /dev/null @@ -1,40 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-Handle-POLY_INT_CSTs-in-declare_return_value.patch -be5f7eccf766a4c3a59b821f77ce320001df838f - -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/inline_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/inline_1.c -new file mode 100644 -index 00000000000..f736ac3f082 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/inline_1.c -@@ -0,0 +1,6 @@ -+/* { dg-options "-O2" } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+static inline svint32_t foo () { return svdup_s32 (32); } -+svint32_t bar () { return svadd_x (svptrue_b8 (), foo (), 1); } -diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c -index 2b8b9ee58c1..9d0acd97f77 100644 ---- a/gcc/tree-inline.c -+++ b/gcc/tree-inline.c -@@ -3654,7 +3654,7 @@ declare_return_variable (copy_body_data *id, tree return_slot, tree modify_dest, - /* ??? If we're assigning to a variable sized type, then we must - reuse the destination variable, because we've no good way to - create variable sized temporaries at this point. */ -- else if (TREE_CODE (TYPE_SIZE_UNIT (caller_type)) != INTEGER_CST) -+ else if (!poly_int_tree_p (TYPE_SIZE_UNIT (caller_type))) - use_it = true; - - /* If the callee cannot possibly modify MODIFY_DEST, then we can -@@ -3689,7 +3689,7 @@ declare_return_variable (copy_body_data *id, tree return_slot, tree modify_dest, - } - } - -- gcc_assert (TREE_CODE (TYPE_SIZE_UNIT (callee_type)) == INTEGER_CST); -+ gcc_assert (poly_int_tree_p (TYPE_SIZE_UNIT (callee_type))); - - var = copy_result_decl_to_var (result, id); - DECL_SEEN_IN_BIND_EXPR_P (var) = 1; diff --git a/IRA-Handle-fully-tied-destinations.patch b/IRA-Handle-fully-tied-destinations.patch deleted file mode 100644 index ad181cd5de9d2f035299854de70db71a86f6525d..0000000000000000000000000000000000000000 --- a/IRA-Handle-fully-tied-destinations.patch +++ /dev/null @@ -1,155 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-IRA-Handle-fully-tied-destinations-in-a-similar-way-.patch -9b0365879b3c4917f5a2485a1fca8bb678484bfe - -diff --git a/gcc/ira-lives.c b/gcc/ira-lives.c -index cce73a1c3d4..098b0e73953 100644 ---- a/gcc/ira-lives.c -+++ b/gcc/ira-lives.c -@@ -633,9 +633,28 @@ check_and_make_def_use_conflict (rtx dreg, rtx orig_dreg, - - /* Check and make if necessary conflicts for definition DEF of class - DEF_CL of the current insn with input operands. Process only -- constraints of alternative ALT. */ -+ constraints of alternative ALT. -+ -+ One of three things is true when this function is called: -+ -+ (1) DEF is an earlyclobber for alternative ALT. Input operands then -+ conflict with DEF in ALT unless they explicitly match DEF via 0-9 -+ constraints. -+ -+ (2) DEF matches (via 0-9 constraints) an operand that is an -+ earlyclobber for alternative ALT. Other input operands then -+ conflict with DEF in ALT. -+ -+ (3) [FOR_TIE_P] Some input operand X matches DEF for alternative ALT. -+ Input operands with a different value from X then conflict with -+ DEF in ALT. -+ -+ However, there's still a judgement call to make when deciding -+ whether a conflict in ALT is important enough to be reflected -+ in the pan-alternative allocno conflict set. */ - static void --check_and_make_def_conflict (int alt, int def, enum reg_class def_cl) -+check_and_make_def_conflict (int alt, int def, enum reg_class def_cl, -+ bool for_tie_p) - { - int use, use_match; - ira_allocno_t a; -@@ -669,14 +688,40 @@ check_and_make_def_conflict (int alt, int def, enum reg_class def_cl) - if (use == def || recog_data.operand_type[use] == OP_OUT) - continue; - -+ /* An earlyclobber on DEF doesn't apply to an input operand X if X -+ explicitly matches DEF, but it applies to other input operands -+ even if they happen to be the same value as X. -+ -+ In contrast, if an input operand X is tied to a non-earlyclobber -+ DEF, there's no conflict with other input operands that have the -+ same value as X. */ -+ if (op_alt[use].matches == def -+ || (for_tie_p -+ && rtx_equal_p (recog_data.operand[use], -+ recog_data.operand[op_alt[def].matched]))) -+ continue; -+ - if (op_alt[use].anything_ok) - use_cl = ALL_REGS; - else - use_cl = op_alt[use].cl; -+ if (use_cl == NO_REGS) -+ continue; -+ -+ /* If DEF is simply a tied operand, ignore cases in which this -+ alternative requires USE to have a likely-spilled class. -+ Adding a conflict would just constrain USE further if DEF -+ happens to be allocated first. */ -+ if (for_tie_p && targetm.class_likely_spilled_p (use_cl)) -+ continue; - - /* If there's any alternative that allows USE to match DEF, do not - record a conflict. If that causes us to create an invalid -- instruction due to the earlyclobber, reload must fix it up. */ -+ instruction due to the earlyclobber, reload must fix it up. -+ -+ Likewise, if we're treating a tied DEF like a partial earlyclobber, -+ do not record a conflict if there's another alternative in which -+ DEF is neither tied nor earlyclobber. */ - for (alt1 = 0; alt1 < recog_data.n_alternatives; alt1++) - { - if (!TEST_BIT (preferred_alternatives, alt1)) -@@ -691,6 +736,12 @@ check_and_make_def_conflict (int alt, int def, enum reg_class def_cl) - && recog_data.constraints[use - 1][0] == '%' - && op_alt1[use - 1].matches == def)) - break; -+ if (for_tie_p -+ && !op_alt1[def].earlyclobber -+ && op_alt1[def].matched < 0 -+ && alternative_class (op_alt1, def) != NO_REGS -+ && alternative_class (op_alt1, use) != NO_REGS) -+ break; - } - - if (alt1 < recog_data.n_alternatives) -@@ -701,8 +752,7 @@ check_and_make_def_conflict (int alt, int def, enum reg_class def_cl) - - if ((use_match = op_alt[use].matches) >= 0) - { -- if (use_match == def) -- continue; -+ gcc_checking_assert (use_match != def); - - if (op_alt[use_match].anything_ok) - use_cl = ALL_REGS; -@@ -717,7 +767,11 @@ check_and_make_def_conflict (int alt, int def, enum reg_class def_cl) - /* Make conflicts of early clobber pseudo registers of the current - insn with its inputs. Avoid introducing unnecessary conflicts by - checking classes of the constraints and pseudos because otherwise -- significant code degradation is possible for some targets. */ -+ significant code degradation is possible for some targets. -+ -+ For these purposes, tying an input to an output makes that output act -+ like an earlyclobber for inputs with a different value, since the output -+ register then has a predetermined purpose on input to the instruction. */ - static void - make_early_clobber_and_input_conflicts (void) - { -@@ -732,15 +786,19 @@ make_early_clobber_and_input_conflicts (void) - if (TEST_BIT (preferred_alternatives, alt)) - for (def = 0; def < n_operands; def++) - { -- def_cl = NO_REGS; -- if (op_alt[def].earlyclobber) -+ if (op_alt[def].anything_ok) -+ def_cl = ALL_REGS; -+ else -+ def_cl = op_alt[def].cl; -+ if (def_cl != NO_REGS) - { -- if (op_alt[def].anything_ok) -- def_cl = ALL_REGS; -- else -- def_cl = op_alt[def].cl; -- check_and_make_def_conflict (alt, def, def_cl); -+ if (op_alt[def].earlyclobber) -+ check_and_make_def_conflict (alt, def, def_cl, false); -+ else if (op_alt[def].matched >= 0 -+ && !targetm.class_likely_spilled_p (def_cl)) -+ check_and_make_def_conflict (alt, def, def_cl, true); - } -+ - if ((def_match = op_alt[def].matches) >= 0 - && (op_alt[def_match].earlyclobber - || op_alt[def].earlyclobber)) -@@ -749,7 +807,7 @@ make_early_clobber_and_input_conflicts (void) - def_cl = ALL_REGS; - else - def_cl = op_alt[def_match].cl; -- check_and_make_def_conflict (alt, def, def_cl); -+ check_and_make_def_conflict (alt, def, def_cl, false); - } - } - } diff --git a/PR92303-Try-to-simplify-memory-subreg.patch b/PR92303-Try-to-simplify-memory-subreg.patch deleted file mode 100644 index 31c1acd594a9600cf5098695d9f2bee02823486e..0000000000000000000000000000000000000000 --- a/PR92303-Try-to-simplify-memory-subreg.patch +++ /dev/null @@ -1,27 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-PR92303-Try-to-simplify-memory-subreg.patch -a4504f32c056db781a2bdc104dffa1b29684c930 - -diff -uprN a/gcc/lra-spills.c b/gcc/lra-spills.c ---- a/gcc/lra-spills.c -+++ b/gcc/lra-spills.c -@@ -421,7 +421,16 @@ remove_pseudos (rtx *loc, rtx_insn *insn) - if (*loc == NULL_RTX) - return res; - code = GET_CODE (*loc); -- if (code == REG && (i = REGNO (*loc)) >= FIRST_PSEUDO_REGISTER -+ if (code == SUBREG && REG_P (SUBREG_REG (*loc))) -+ { -+ /* Try to remove memory subregs to simplify LRA job -+ and avoid LRA cycling in case of subreg memory reload. */ -+ res = remove_pseudos (&SUBREG_REG (*loc), insn); -+ if (GET_CODE (SUBREG_REG (*loc)) == MEM) -+ alter_subreg (loc, false); -+ return res; -+ } -+ else if (code == REG && (i = REGNO (*loc)) >= FIRST_PSEUDO_REGISTER - && lra_get_regno_hard_regno (i) < 0 - /* We do not want to assign memory for former scratches because - it might result in an address reload for some targets. In diff --git a/PR92429-do-not-fold-when-updating.patch b/PR92429-do-not-fold-when-updating.patch deleted file mode 100644 index 65749eef7dd4ea182ffbabc8de97ce88f3bdf492..0000000000000000000000000000000000000000 --- a/PR92429-do-not-fold-when-updating.patch +++ /dev/null @@ -1,70 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-PR-tree-optimization-92429-do-not-fold-when-updating.patch -f7dff7699fd70d3b8c3e637818e18c86f93ccfec - -diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c -index 4d5e0494511..6e6df0bfdb8 100644 ---- a/gcc/tree-ssa-loop-niter.c -+++ b/gcc/tree-ssa-loop-niter.c -@@ -1934,7 +1934,8 @@ number_of_iterations_cond (class loop *loop, - - tree - simplify_replace_tree (tree expr, tree old, tree new_tree, -- tree (*valueize) (tree, void*), void *context) -+ tree (*valueize) (tree, void*), void *context, -+ bool do_fold) - { - unsigned i, n; - tree ret = NULL_TREE, e, se; -@@ -1966,7 +1967,7 @@ simplify_replace_tree (tree expr, tree old, tree new_tree, - for (i = 0; i < n; i++) - { - e = TREE_OPERAND (expr, i); -- se = simplify_replace_tree (e, old, new_tree, valueize, context); -+ se = simplify_replace_tree (e, old, new_tree, valueize, context, do_fold); - if (e == se) - continue; - -@@ -1976,7 +1977,7 @@ simplify_replace_tree (tree expr, tree old, tree new_tree, - TREE_OPERAND (ret, i) = se; - } - -- return (ret ? fold (ret) : expr); -+ return (ret ? (do_fold ? fold (ret) : ret) : expr); - } - - /* Expand definitions of ssa names in EXPR as long as they are simple -diff --git a/gcc/tree-ssa-loop-niter.h b/gcc/tree-ssa-loop-niter.h -index 621e2c2e28d..eb8d1579479 100644 ---- a/gcc/tree-ssa-loop-niter.h -+++ b/gcc/tree-ssa-loop-niter.h -@@ -58,7 +58,7 @@ extern void free_numbers_of_iterations_estimates (class loop *); - extern void free_numbers_of_iterations_estimates (function *); - extern tree simplify_replace_tree (tree, tree, - tree, tree (*)(tree, void *) = NULL, -- void * = NULL); -+ void * = NULL, bool do_fold = true); - extern void substitute_in_loop_info (struct loop *, tree, tree); - - #endif /* GCC_TREE_SSA_LOOP_NITER_H */ -diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c -index 8e318a037a7..e5fb434bd4e 100644 ---- a/gcc/tree-vect-loop.c -+++ b/gcc/tree-vect-loop.c -@@ -8434,8 +8434,13 @@ update_epilogue_loop_vinfo (class loop *epilogue, tree advance) - gimple_set_op (stmt, j, *new_op); - else - { -+ /* PR92429: The last argument of simplify_replace_tree disables -+ folding when replacing arguments. This is required as -+ otherwise you might end up with different statements than the -+ ones analyzed in vect_loop_analyze, leading to different -+ vectorization. */ - op = simplify_replace_tree (op, NULL_TREE, NULL_TREE, -- &find_in_mapping, &mapping); -+ &find_in_mapping, &mapping, false); - gimple_set_op (stmt, j, op); - } - } diff --git a/SLP-VECT-Add-check-to-fix-96837.patch b/SLP-VECT-Add-check-to-fix-96837.patch deleted file mode 100644 index bfc60bcddcf61fe1d9290452bfa50ad51188c1cc..0000000000000000000000000000000000000000 --- a/SLP-VECT-Add-check-to-fix-96837.patch +++ /dev/null @@ -1,99 +0,0 @@ -This backport contains 2 patchs from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -97b798d80baf945ea28236eef3fa69f36626b579 -0001-SLP-VECT-Add-check-to-fix-96837.patch - -373b99dc40949efa697326f378e5022a02e0328b -0002-Add-a-testcase-for-PR-target-96827.patch - -diff -uprN a/gcc/testsuite/gcc.dg/vect/bb-slp-49.c b/gcc/testsuite/gcc.dg/vect/bb-slp-49.c ---- a/gcc/testsuite/gcc.dg/vect/bb-slp-49.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-49.c 2020-11-17 15:58:12.118126065 +0800 -@@ -0,0 +1,28 @@ -+/* This checks that vectorized constructors have the correct ordering. */ -+/* { dg-require-effective-target vect_int } */ -+ -+typedef int V __attribute__((__vector_size__(16))); -+ -+__attribute__((__noipa__)) void -+foo (unsigned int x, V *y) -+{ -+ unsigned int a[4] = { x + 0, x + 2, x + 4, x + 6 }; -+ for (unsigned int i = 0; i < 3; ++i) -+ if (a[i] == 1234) -+ a[i]--; -+ *y = (V) { a[3], a[2], a[1], a[0] }; -+} -+ -+int -+main () -+{ -+ V b; -+ foo (0, &b); -+ if (b[0] != 6 || b[1] != 4 || b[2] != 2 || b[3] != 0) -+ __builtin_abort (); -+ return 0; -+} -+ -+/* See that we vectorize an SLP instance. */ -+/* { dg-final { scan-tree-dump "Analyzing vectorizable constructor" "slp1" } } */ -+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "slp1" } } */ -diff -uprN a/gcc/testsuite/gcc.target/i386/pr96827.c b/gcc/testsuite/gcc.target/i386/pr96827.c ---- a/gcc/testsuite/gcc.target/i386/pr96827.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.target/i386/pr96827.c 2020-11-17 15:58:15.182126065 +0800 -@@ -0,0 +1,41 @@ -+/* { dg-do run { target sse2_runtime } } */ -+/* { dg-options "-O3 -msse2 -mfpmath=sse" } */ -+ -+typedef unsigned short int __uint16_t; -+typedef unsigned int __uint32_t; -+typedef __uint16_t uint16_t; -+typedef __uint32_t uint32_t; -+typedef int __v4si __attribute__ ((__vector_size__ (16))); -+typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__)); -+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -+_mm_store_si128 (__m128i *__P, __m128i __B) -+{ -+ *__P = __B; -+} -+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -+_mm_set_epi32 (int __q3, int __q2, int __q1, int __q0) -+{ -+ return __extension__ (__m128i)(__v4si){ __q0, __q1, __q2, __q3 }; -+} -+typedef uint16_t u16; -+typedef uint32_t u32; -+extern int printf (const char *__restrict __format, ...); -+void do_the_thing(u32 idx, __m128i *dude) -+{ -+ u32 dude_[4] = { idx+0, idx+2, idx+4, idx+6 }; -+ for (u32 i = 0; i < 3; ++i) -+ if (dude_[i] == 1234) -+ dude_[i]--; -+ *dude = _mm_set_epi32(dude_[0], dude_[1], dude_[2], dude_[3]); -+} -+int main() -+{ -+ __m128i dude; -+ u32 idx = 0; -+ do_the_thing(idx, &dude); -+ __attribute__((aligned(16))) u32 dude_[4]; -+ _mm_store_si128((__m128i*)dude_, dude); -+ if (!(6 == dude_[0] && 4 == dude_[1] && 2 == dude_[2] && 0 == dude_[3])) -+ __builtin_abort (); -+ return 0; -+} -diff -uprN a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c ---- a/gcc/tree-vect-slp.c 2020-11-17 15:55:57.098126065 +0800 -+++ b/gcc/tree-vect-slp.c 2020-11-17 15:59:25.862126065 +0800 -@@ -1842,7 +1842,8 @@ vect_supported_load_permutation_p (slp_i - /* Reduction (there are no data-refs in the root). - In reduction chain the order of the loads is not important. */ - if (!STMT_VINFO_DATA_REF (stmt_info) -- && !REDUC_GROUP_FIRST_ELEMENT (stmt_info)) -+ && !REDUC_GROUP_FIRST_ELEMENT (stmt_info) -+ && !SLP_INSTANCE_ROOT_STMT (slp_instn)) - vect_attempt_slp_rearrange_stmts (slp_instn); - - /* In basic block vectorization we allow any subchain of an interleaving diff --git a/Simplify-X-C1-C2.patch b/Simplify-X-C1-C2.patch deleted file mode 100644 index 0997a006b047a7feaa342673e6276cfcad7dd0da..0000000000000000000000000000000000000000 --- a/Simplify-X-C1-C2.patch +++ /dev/null @@ -1,197 +0,0 @@ -This backport contains 2 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-Simplify-X-C1-C2-with-undefined-overflow.patch -ca2b8c082c4f16919071c9f8de8db0b33b54c405 - -0002-Simplify-X-C1-C2-with-wrapping-overflow.patch -287522613d661b4c5ba8403b051eb470c1674cba - -diff -Nurp a/gcc/expr.c b/gcc/expr.c ---- a/gcc/expr.c 2021-03-17 16:34:24.700000000 +0800 -+++ b/gcc/expr.c 2021-03-17 10:30:11.500000000 +0800 -@@ -11706,38 +11706,6 @@ string_constant (tree arg, tree *ptr_off - return init; - } - --/* Compute the modular multiplicative inverse of A modulo M -- using extended Euclid's algorithm. Assumes A and M are coprime. */ --static wide_int --mod_inv (const wide_int &a, const wide_int &b) --{ -- /* Verify the assumption. */ -- gcc_checking_assert (wi::eq_p (wi::gcd (a, b), 1)); -- -- unsigned int p = a.get_precision () + 1; -- gcc_checking_assert (b.get_precision () + 1 == p); -- wide_int c = wide_int::from (a, p, UNSIGNED); -- wide_int d = wide_int::from (b, p, UNSIGNED); -- wide_int x0 = wide_int::from (0, p, UNSIGNED); -- wide_int x1 = wide_int::from (1, p, UNSIGNED); -- -- if (wi::eq_p (b, 1)) -- return wide_int::from (1, p, UNSIGNED); -- -- while (wi::gt_p (c, 1, UNSIGNED)) -- { -- wide_int t = d; -- wide_int q = wi::divmod_trunc (c, d, UNSIGNED, &d); -- c = t; -- wide_int s = x0; -- x0 = wi::sub (x1, wi::mul (q, x0)); -- x1 = s; -- } -- if (wi::lt_p (x1, 0, SIGNED)) -- x1 += d; -- return x1; --} -- - /* Optimize x % C1 == C2 for signed modulo if C1 is a power of two and C2 - is non-zero and C3 ((1<<(prec-1)) | (C1 - 1)): - for C2 > 0 to x & C3 == C2 -@@ -11948,7 +11916,7 @@ maybe_optimize_mod_cmp (enum tree_code c - w = wi::lrshift (w, shift); - wide_int a = wide_int::from (w, prec + 1, UNSIGNED); - wide_int b = wi::shifted_mask (prec, 1, false, prec + 1); -- wide_int m = wide_int::from (mod_inv (a, b), prec, UNSIGNED); -+ wide_int m = wide_int::from (wi::mod_inv (a, b), prec, UNSIGNED); - tree c3 = wide_int_to_tree (type, m); - tree c5 = NULL_TREE; - wide_int d, e; -diff -Nurp a/gcc/match.pd b/gcc/match.pd ---- a/gcc/match.pd 2021-03-17 16:34:19.320000000 +0800 -+++ b/gcc/match.pd 2021-03-17 10:30:11.500000000 +0800 -@@ -3290,6 +3290,35 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) - (scmp @0 @2) - (cmp @0 @2)))))) - -+/* For integral types with undefined overflow fold -+ x * C1 == C2 into x == C2 / C1 or false. -+ If overflow wraps and C1 is odd, simplify to x == C2 / C1 in the ring -+ Z / 2^n Z. */ -+(for cmp (eq ne) -+ (simplify -+ (cmp (mult @0 INTEGER_CST@1) INTEGER_CST@2) -+ (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) -+ && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0)) -+ && wi::to_wide (@1) != 0) -+ (with { widest_int quot; } -+ (if (wi::multiple_of_p (wi::to_widest (@2), wi::to_widest (@1), -+ TYPE_SIGN (TREE_TYPE (@0)), ")) -+ (cmp @0 { wide_int_to_tree (TREE_TYPE (@0), quot); }) -+ { constant_boolean_node (cmp == NE_EXPR, type); })) -+ (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) -+ && TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)) -+ && (wi::bit_and (wi::to_wide (@1), 1) == 1)) -+ (cmp @0 -+ { -+ tree itype = TREE_TYPE (@0); -+ int p = TYPE_PRECISION (itype); -+ wide_int m = wi::one (p + 1) << p; -+ wide_int a = wide_int::from (wi::to_wide (@1), p + 1, UNSIGNED); -+ wide_int i = wide_int::from (wi::mod_inv (a, m), -+ p, TYPE_SIGN (itype)); -+ wide_int_to_tree (itype, wi::mul (i, wi::to_wide (@2))); -+ }))))) -+ - /* Simplify comparison of something with itself. For IEEE - floating-point, we can only do some of these simplifications. */ - (for cmp (eq ge le) -diff -Nurp a/gcc/testsuite/gcc.c-torture/execute/pr23135.c b/gcc/testsuite/gcc.c-torture/execute/pr23135.c ---- a/gcc/testsuite/gcc.c-torture/execute/pr23135.c 2021-03-17 16:34:24.016000000 +0800 -+++ b/gcc/testsuite/gcc.c-torture/execute/pr23135.c 2021-03-17 10:30:13.572000000 +0800 -@@ -1,7 +1,7 @@ - /* Based on execute/simd-1.c, modified by joern.rennecke@st.com to - trigger a reload bug. Verified for gcc mainline from 20050722 13:00 UTC - for sh-elf -m4 -O2. */ --/* { dg-options "-Wno-psabi" } */ -+/* { dg-options "-Wno-psabi -fwrapv" } */ - /* { dg-add-options stack_size } */ - - #ifndef STACK_SIZE -diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr95433-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr95433-2.c ---- a/gcc/testsuite/gcc.dg/tree-ssa/pr95433-2.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr95433-2.c 2021-03-17 10:30:13.276000000 +0800 -@@ -0,0 +1,15 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -fwrapv -fdump-tree-gimple" } */ -+ -+typedef __INT32_TYPE__ int32_t; -+typedef unsigned __INT32_TYPE__ uint32_t; -+ -+int e(int32_t x){return 3*x==5;} -+int f(int32_t x){return 3*x==-5;} -+int g(int32_t x){return -3*x==5;} -+int h(int32_t x){return 7*x==3;} -+int i(uint32_t x){return 7*x==3;} -+ -+/* { dg-final { scan-tree-dump-times "== 1431655767" 1 "gimple" } } */ -+/* { dg-final { scan-tree-dump-times "== -1431655767" 2 "gimple" } } */ -+/* { dg-final { scan-tree-dump-times "== 613566757" 2 "gimple" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr95433.c b/gcc/testsuite/gcc.dg/tree-ssa/pr95433.c ---- a/gcc/testsuite/gcc.dg/tree-ssa/pr95433.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr95433.c 2021-03-17 10:30:13.276000000 +0800 -@@ -0,0 +1,8 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -fdump-tree-optimized" } */ -+ -+int f(int x){return x*7==17;} -+int g(int x){return x*3==15;} -+ -+/* { dg-final { scan-tree-dump "return 0;" "optimized" } } */ -+/* { dg-final { scan-tree-dump "== 5;" "optimized" } } */ -diff -Nurp a/gcc/wide-int.cc b/gcc/wide-int.cc ---- a/gcc/wide-int.cc 2021-03-17 16:34:24.488000000 +0800 -+++ b/gcc/wide-int.cc 2021-03-17 10:30:11.500000000 +0800 -@@ -2223,6 +2223,39 @@ wi::round_up_for_mask (const wide_int &v - return (val | tmp) & -tmp; - } - -+/* Compute the modular multiplicative inverse of A modulo B -+ using extended Euclid's algorithm. Assumes A and B are coprime, -+ and that A and B have the same precision. */ -+wide_int -+wi::mod_inv (const wide_int &a, const wide_int &b) -+{ -+ /* Verify the assumption. */ -+ gcc_checking_assert (wi::eq_p (wi::gcd (a, b), 1)); -+ -+ unsigned int p = a.get_precision () + 1; -+ gcc_checking_assert (b.get_precision () + 1 == p); -+ wide_int c = wide_int::from (a, p, UNSIGNED); -+ wide_int d = wide_int::from (b, p, UNSIGNED); -+ wide_int x0 = wide_int::from (0, p, UNSIGNED); -+ wide_int x1 = wide_int::from (1, p, UNSIGNED); -+ -+ if (wi::eq_p (b, 1)) -+ return wide_int::from (1, p, UNSIGNED); -+ -+ while (wi::gt_p (c, 1, UNSIGNED)) -+ { -+ wide_int t = d; -+ wide_int q = wi::divmod_trunc (c, d, UNSIGNED, &d); -+ c = t; -+ wide_int s = x0; -+ x0 = wi::sub (x1, wi::mul (q, x0)); -+ x1 = s; -+ } -+ if (wi::lt_p (x1, 0, SIGNED)) -+ x1 += d; -+ return x1; -+} -+ - /* - * Private utilities. - */ -diff -Nurp a/gcc/wide-int.h b/gcc/wide-int.h ---- a/gcc/wide-int.h 2021-03-17 16:34:14.792000000 +0800 -+++ b/gcc/wide-int.h 2021-03-17 10:30:11.500000000 +0800 -@@ -3368,6 +3368,8 @@ namespace wi - wide_int round_down_for_mask (const wide_int &, const wide_int &); - wide_int round_up_for_mask (const wide_int &, const wide_int &); - -+ wide_int mod_inv (const wide_int &a, const wide_int &b); -+ - template - T mask (unsigned int, bool); - diff --git a/aarch64-Fix-ash-lr-lshr-mode-3-expanders.patch b/aarch64-Fix-ash-lr-lshr-mode-3-expanders.patch deleted file mode 100644 index e28c8a6005b12975ec356dca0b40163b4bbe20b5..0000000000000000000000000000000000000000 --- a/aarch64-Fix-ash-lr-lshr-mode-3-expanders.patch +++ /dev/null @@ -1,165 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -7a6588fe65432c0f1a8b5fdefba81700ebf88711 -0001-aarch64-Fix-ash-lr-lshr-mode-3-expanders-PR94488.patch - -diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md -index 24a11fb5040..9f0e2bd1e6f 100644 ---- a/gcc/config/aarch64/aarch64-simd.md -+++ b/gcc/config/aarch64/aarch64-simd.md -@@ -1105,31 +1105,17 @@ - tmp)); - DONE; - } -- else -- { -- operands[2] = force_reg (SImode, operands[2]); -- } -- } -- else if (MEM_P (operands[2])) -- { -- operands[2] = force_reg (SImode, operands[2]); - } - -- if (REG_P (operands[2])) -- { -- rtx tmp = gen_reg_rtx (mode); -- emit_insn (gen_aarch64_simd_dup (tmp, -- convert_to_mode (mode, -- operands[2], -- 0))); -- emit_insn (gen_aarch64_simd_reg_sshl (operands[0], operands[1], -- tmp)); -- DONE; -- } -- else -- FAIL; --} --) -+ operands[2] = force_reg (SImode, operands[2]); -+ -+ rtx tmp = gen_reg_rtx (mode); -+ emit_insn (gen_aarch64_simd_dup (tmp, convert_to_mode (mode, -+ operands[2], -+ 0))); -+ emit_insn (gen_aarch64_simd_reg_sshl (operands[0], operands[1], tmp)); -+ DONE; -+}) - - (define_expand "lshr3" - [(match_operand:VDQ_I 0 "register_operand") -@@ -1152,31 +1138,19 @@ - tmp)); - DONE; - } -- else -- operands[2] = force_reg (SImode, operands[2]); -- } -- else if (MEM_P (operands[2])) -- { -- operands[2] = force_reg (SImode, operands[2]); - } - -- if (REG_P (operands[2])) -- { -- rtx tmp = gen_reg_rtx (SImode); -- rtx tmp1 = gen_reg_rtx (mode); -- emit_insn (gen_negsi2 (tmp, operands[2])); -- emit_insn (gen_aarch64_simd_dup (tmp1, -- convert_to_mode (mode, -- tmp, 0))); -- emit_insn (gen_aarch64_simd_reg_shl_unsigned (operands[0], -- operands[1], -- tmp1)); -- DONE; -- } -- else -- FAIL; --} --) -+ operands[2] = force_reg (SImode, operands[2]); -+ -+ rtx tmp = gen_reg_rtx (SImode); -+ rtx tmp1 = gen_reg_rtx (mode); -+ emit_insn (gen_negsi2 (tmp, operands[2])); -+ emit_insn (gen_aarch64_simd_dup (tmp1, -+ convert_to_mode (mode, tmp, 0))); -+ emit_insn (gen_aarch64_simd_reg_shl_unsigned (operands[0], operands[1], -+ tmp1)); -+ DONE; -+}) - - (define_expand "ashr3" - [(match_operand:VDQ_I 0 "register_operand") -@@ -1199,31 +1173,19 @@ - tmp)); - DONE; - } -- else -- operands[2] = force_reg (SImode, operands[2]); -- } -- else if (MEM_P (operands[2])) -- { -- operands[2] = force_reg (SImode, operands[2]); - } - -- if (REG_P (operands[2])) -- { -- rtx tmp = gen_reg_rtx (SImode); -- rtx tmp1 = gen_reg_rtx (mode); -- emit_insn (gen_negsi2 (tmp, operands[2])); -- emit_insn (gen_aarch64_simd_dup (tmp1, -- convert_to_mode (mode, -- tmp, 0))); -- emit_insn (gen_aarch64_simd_reg_shl_signed (operands[0], -- operands[1], -- tmp1)); -- DONE; -- } -- else -- FAIL; --} --) -+ operands[2] = force_reg (SImode, operands[2]); -+ -+ rtx tmp = gen_reg_rtx (SImode); -+ rtx tmp1 = gen_reg_rtx (mode); -+ emit_insn (gen_negsi2 (tmp, operands[2])); -+ emit_insn (gen_aarch64_simd_dup (tmp1, convert_to_mode (mode, -+ tmp, 0))); -+ emit_insn (gen_aarch64_simd_reg_shl_signed (operands[0], operands[1], -+ tmp1)); -+ DONE; -+}) - - (define_expand "vashl3" - [(match_operand:VDQ_I 0 "register_operand") -diff --git a/gcc/testsuite/gcc.c-torture/compile/pr94488.c b/gcc/testsuite/gcc.c-torture/compile/pr94488.c -new file mode 100644 -index 00000000000..6e20a4168de ---- /dev/null -+++ b/gcc/testsuite/gcc.c-torture/compile/pr94488.c -@@ -0,0 +1,22 @@ -+/* PR target/94488 */ -+ -+typedef unsigned long V __attribute__((__vector_size__(16))); -+typedef long W __attribute__((__vector_size__(16))); -+ -+void -+foo (V *x, unsigned long y) -+{ -+ *x = *x >> (unsigned int) y; -+} -+ -+void -+bar (V *x, unsigned long y) -+{ -+ *x = *x << (unsigned int) y; -+} -+ -+void -+baz (W *x, unsigned long y) -+{ -+ *x = *x >> (unsigned int) y; -+} diff --git a/aarch64-Fix-bf16-and-matrix-g++-gfortran.patch b/aarch64-Fix-bf16-and-matrix-g++-gfortran.patch deleted file mode 100644 index 6bc36da43e820c2e6d3b85421a680ce93d5f3dfc..0000000000000000000000000000000000000000 --- a/aarch64-Fix-bf16-and-matrix-g++-gfortran.patch +++ /dev/null @@ -1,1613 +0,0 @@ -This backport contains 5 patchs from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-re-PR-target-88838-SVE-Use-32-bit-WHILELO-in-LP64-mo.patch -9b884225bfc609606f9b169b021c4da93feba48e - -0002-C-Avoid-aka-types-that-just-add-tags.patch -558798156b41fcbe5ba68b75171708cad135b041 - -0003-g-.dg-tree-ssa-pr61034.C-Add-param-max-inline-insns-.patch -cfcf3551c432da3a8154ef11a26a4d75655deb3d - -0004-C-Avoid-exposing-internal-details-in-aka-types.patch -56898e437a538c7edc0724a3650f5cb81c9d5721 - -0005-C-Avoid-exposing-internal-details-in-aka-types.patch -10bce48f104de56503b17954ed79f019df3252e3 - -diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c -index bf3db074a..2c10743b9 100644 ---- a/gcc/c-family/c-common.c -+++ b/gcc/c-family/c-common.c -@@ -48,6 +48,7 @@ along with GCC; see the file COPYING3. If not see - #include "gimplify.h" - #include "substring-locations.h" - #include "spellcheck.h" -+#include "c-spellcheck.h" - #include "selftest.h" - - cpp_reader *parse_in; /* Declared in c-pragma.h. */ -@@ -7685,6 +7686,52 @@ set_underlying_type (tree x) - } - } - -+/* Return true if it is worth exposing the DECL_ORIGINAL_TYPE of TYPE to -+ the user in diagnostics, false if it would be better to use TYPE itself. -+ TYPE is known to satisfy typedef_variant_p. */ -+ -+bool -+user_facing_original_type_p (const_tree type) -+{ -+ gcc_assert (typedef_variant_p (type)); -+ tree decl = TYPE_NAME (type); -+ -+ /* Look through any typedef in "user" code. */ -+ if (!DECL_IN_SYSTEM_HEADER (decl) && !DECL_IS_BUILTIN (decl)) -+ return true; -+ -+ /* If the original type is also named and is in the user namespace, -+ assume it too is a user-facing type. */ -+ tree orig_type = DECL_ORIGINAL_TYPE (decl); -+ if (tree orig_id = TYPE_IDENTIFIER (orig_type)) -+ if (!name_reserved_for_implementation_p (IDENTIFIER_POINTER (orig_id))) -+ return true; -+ -+ switch (TREE_CODE (orig_type)) -+ { -+ /* Don't look through to an anonymous vector type, since the syntax -+ we use for them in diagnostics isn't real C or C++ syntax. -+ And if ORIG_TYPE is named but in the implementation namespace, -+ TYPE is likely to be more meaningful to the user. */ -+ case VECTOR_TYPE: -+ return false; -+ -+ /* Don't expose anonymous tag types that are presumably meant to be -+ known by their typedef name. Also don't expose tags that are in -+ the implementation namespace, such as: -+ -+ typedef struct __foo foo; */ -+ case RECORD_TYPE: -+ case UNION_TYPE: -+ case ENUMERAL_TYPE: -+ return false; -+ -+ /* Look through to anything else. */ -+ default: -+ return true; -+ } -+} -+ - /* Record the types used by the current global variable declaration - being parsed, so that we can decide later to emit their debug info. - Those types are in types_used_by_cur_var_decl, and we are going to -diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h -index 46b8d265a..73ce7c5df 100644 ---- a/gcc/c-family/c-common.h -+++ b/gcc/c-family/c-common.h -@@ -1063,6 +1063,7 @@ extern tree builtin_type_for_size (int, bool); - extern void c_common_mark_addressable_vec (tree); - - extern void set_underlying_type (tree); -+extern bool user_facing_original_type_p (const_tree); - extern void record_types_used_by_current_var_decl (tree); - extern vec *make_tree_vector (void); - extern void release_tree_vector (vec *); -diff --git a/gcc/c/c-objc-common.c b/gcc/c/c-objc-common.c -index 2b76737a7..10d72c57d 100644 ---- a/gcc/c/c-objc-common.c -+++ b/gcc/c/c-objc-common.c -@@ -28,6 +28,8 @@ along with GCC; see the file COPYING3. If not see - #include "langhooks.h" - #include "c-objc-common.h" - #include "gcc-rich-location.h" -+#include "stringpool.h" -+#include "attribs.h" - - static bool c_tree_printer (pretty_printer *, text_info *, const char *, - int, bool, bool, bool, bool *, const char **); -@@ -62,6 +64,122 @@ c_objc_common_init (void) - return c_common_init (); - } - -+/* Decide whether it's worth saying that TYPE is also known as some other -+ type. Return the other type if so, otherwise return TYPE. */ -+ -+static tree -+get_aka_type (tree type) -+{ -+ if (type == error_mark_node) -+ return type; -+ -+ tree result; -+ if (typedef_variant_p (type)) -+ { -+ /* Saying that "foo" is also known as "struct foo" or -+ "struct " is unlikely to be useful, since users of -+ structure-like types would already know that they're structures. -+ The same applies to unions and enums; in general, printing the -+ tag is only useful if it has a different name. */ -+ tree orig_type = DECL_ORIGINAL_TYPE (TYPE_NAME (type)); -+ tree_code code = TREE_CODE (orig_type); -+ tree orig_id = TYPE_IDENTIFIER (orig_type); -+ if ((code == RECORD_TYPE || code == UNION_TYPE || code == ENUMERAL_TYPE) -+ && (!orig_id || TYPE_IDENTIFIER (type) == orig_id)) -+ return type; -+ -+ if (!user_facing_original_type_p (type)) -+ return type; -+ -+ result = get_aka_type (orig_type); -+ } -+ else -+ { -+ tree canonical = TYPE_CANONICAL (type); -+ if (canonical && TREE_CODE (type) != TREE_CODE (canonical)) -+ return canonical; -+ -+ /* Recursive calls might choose a middle ground between TYPE -+ (which has no typedefs stripped) and CANONICAL (which has -+ all typedefs stripped). So try to reuse TYPE or CANONICAL if -+ convenient, but be prepared to create a new type if necessary. */ -+ switch (TREE_CODE (type)) -+ { -+ case POINTER_TYPE: -+ case REFERENCE_TYPE: -+ { -+ tree target_type = get_aka_type (TREE_TYPE (type)); -+ -+ if (target_type == TREE_TYPE (type)) -+ return type; -+ -+ if (canonical && target_type == TREE_TYPE (canonical)) -+ return canonical; -+ -+ result = (TREE_CODE (type) == POINTER_TYPE -+ ? build_pointer_type (target_type) -+ : build_reference_type (target_type)); -+ break; -+ } -+ -+ case ARRAY_TYPE: -+ { -+ tree element_type = get_aka_type (TREE_TYPE (type)); -+ tree index_type = (TYPE_DOMAIN (type) -+ ? get_aka_type (TYPE_DOMAIN (type)) -+ : NULL_TREE); -+ -+ if (element_type == TREE_TYPE (type) -+ && index_type == TYPE_DOMAIN (type)) -+ return type; -+ -+ if (canonical -+ && element_type == TREE_TYPE (canonical) -+ && index_type == TYPE_DOMAIN (canonical)) -+ return canonical; -+ -+ result = build_array_type (element_type, index_type, -+ TYPE_TYPELESS_STORAGE (type)); -+ break; -+ } -+ -+ case FUNCTION_TYPE: -+ { -+ tree return_type = get_aka_type (TREE_TYPE (type)); -+ -+ tree args = TYPE_ARG_TYPES (type); -+ if (args == error_mark_node) -+ return type; -+ -+ auto_vec arg_types; -+ bool type_ok_p = true; -+ while (args && args != void_list_node) -+ { -+ tree arg_type = get_aka_type (TREE_VALUE (args)); -+ arg_types.safe_push (arg_type); -+ type_ok_p &= (arg_type == TREE_VALUE (args)); -+ args = TREE_CHAIN (args); -+ } -+ -+ if (type_ok_p && return_type == TREE_TYPE (type)) -+ return type; -+ -+ unsigned int i; -+ tree arg_type; -+ FOR_EACH_VEC_ELT_REVERSE (arg_types, i, arg_type) -+ args = tree_cons (NULL_TREE, arg_type, args); -+ result = build_function_type (return_type, args); -+ break; -+ } -+ -+ default: -+ return canonical ? canonical : type; -+ } -+ } -+ return build_type_attribute_qual_variant (result, TYPE_ATTRIBUTES (type), -+ TYPE_QUALS (type)); -+} -+ - /* Print T to CPP. */ - - static void -@@ -83,11 +201,12 @@ print_type (c_pretty_printer *cpp, tree t, bool *quoted) - stripped version. But sometimes the stripped version looks - exactly the same, so we don't want it after all. To avoid - printing it in that case, we play ugly obstack games. */ -- if (TYPE_CANONICAL (t) && t != TYPE_CANONICAL (t)) -+ tree aka_type = get_aka_type (t); -+ if (aka_type != t) - { - c_pretty_printer cpp2; - /* Print the stripped version into a temporary printer. */ -- cpp2.type_id (TYPE_CANONICAL (t)); -+ cpp2.type_id (aka_type); - struct obstack *ob2 = cpp2.buffer->obstack; - /* Get the stripped version from the temporary printer. */ - const char *aka = (char *) obstack_base (ob2); -@@ -107,7 +226,7 @@ print_type (c_pretty_printer *cpp, tree t, bool *quoted) - pp_c_whitespace (cpp); - if (*quoted) - pp_begin_quote (cpp, pp_show_color (cpp)); -- cpp->type_id (TYPE_CANONICAL (t)); -+ cpp->type_id (aka_type); - if (*quoted) - pp_end_quote (cpp, pp_show_color (cpp)); - pp_right_brace (cpp); -diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h -index 4bba1887f..e802dcbeb 100644 ---- a/gcc/cp/cp-tree.h -+++ b/gcc/cp/cp-tree.h -@@ -5662,6 +5662,13 @@ enum auto_deduction_context - #define TFF_NO_TEMPLATE_BINDINGS (1 << 13) - #define TFF_POINTER (1 << 14) - -+/* These constants can be used as bit flags to control strip_typedefs. -+ -+ STF_USER_VISIBLE: use heuristics to try to avoid stripping user-facing -+ aliases of internal details. This is intended for diagnostics, -+ where it should (for example) give more useful "aka" types. */ -+const unsigned int STF_USER_VISIBLE = 1U; -+ - /* Returns the TEMPLATE_DECL associated to a TEMPLATE_TEMPLATE_PARM - node. */ - #define TEMPLATE_TEMPLATE_PARM_TEMPLATE_DECL(NODE) \ -@@ -7221,8 +7228,10 @@ extern int zero_init_p (const_tree); - extern bool check_abi_tag_redeclaration (const_tree, const_tree, - const_tree); - extern bool check_abi_tag_args (tree, tree); --extern tree strip_typedefs (tree, bool * = NULL); --extern tree strip_typedefs_expr (tree, bool * = NULL); -+extern tree strip_typedefs (tree, bool * = NULL, -+ unsigned int = 0); -+extern tree strip_typedefs_expr (tree, bool * = NULL, -+ unsigned int = 0); - extern tree copy_binfo (tree, tree, tree, - tree *, int); - extern int member_p (const_tree); -diff --git a/gcc/cp/error.c b/gcc/cp/error.c -index 4a0aed2b7..5beaf2dc1 100644 ---- a/gcc/cp/error.c -+++ b/gcc/cp/error.c -@@ -408,7 +408,7 @@ dump_template_bindings (cxx_pretty_printer *pp, tree parms, tree args, - pop_deferring_access_checks (); - /* Strip typedefs. We can't just use TFF_CHASE_TYPEDEF because - pp_simple_type_specifier doesn't know about it. */ -- t = strip_typedefs (t); -+ t = strip_typedefs (t, NULL, STF_USER_VISIBLE); - dump_type (pp, t, TFF_PLAIN_IDENTIFIER); - } - } -@@ -447,7 +447,11 @@ dump_type (cxx_pretty_printer *pp, tree t, int flags) - || DECL_SELF_REFERENCE_P (decl) - || (!flag_pretty_templates - && DECL_LANG_SPECIFIC (decl) && DECL_TEMPLATE_INFO (decl))) -- t = strip_typedefs (t); -+ { -+ unsigned int stf_flags = (!(pp->flags & pp_c_flag_gnu_v3) -+ ? STF_USER_VISIBLE : 0); -+ t = strip_typedefs (t, NULL, stf_flags); -+ } - else if (alias_template_specialization_p (t)) - { - dump_alias_template_specialization (pp, t, flags); -@@ -3193,7 +3197,7 @@ type_to_string (tree typ, int verbose, bool postprocessed, bool *quote, - && !uses_template_parms (typ)) - { - int aka_start, aka_len; char *p; -- tree aka = strip_typedefs (typ); -+ tree aka = strip_typedefs (typ, NULL, STF_USER_VISIBLE); - if (quote && *quote) - pp_end_quote (cxx_pp, show_color); - pp_string (cxx_pp, " {aka"); -diff --git a/gcc/cp/tree.c b/gcc/cp/tree.c -index 3f3583c82..6a1f760ba 100644 ---- a/gcc/cp/tree.c -+++ b/gcc/cp/tree.c -@@ -1421,7 +1421,10 @@ apply_identity_attributes (tree result, tree attribs, bool *remove_attributes) - return cp_build_type_attribute_variant (result, new_attribs); - } - --/* Builds a qualified variant of T that is not a typedef variant. -+/* Builds a qualified variant of T that is either not a typedef variant -+ (the default behavior) or not a typedef variant of a user-facing type -+ (if FLAGS contains STF_USER_FACING). -+ - E.g. consider the following declarations: - typedef const int ConstInt; - typedef ConstInt* PtrConstInt; -@@ -1446,7 +1449,7 @@ apply_identity_attributes (tree result, tree attribs, bool *remove_attributes) - stripped. */ - - tree --strip_typedefs (tree t, bool *remove_attributes) -+strip_typedefs (tree t, bool *remove_attributes, unsigned int flags) - { - tree result = NULL, type = NULL, t0 = NULL; - -@@ -1461,7 +1464,7 @@ strip_typedefs (tree t, bool *remove_attributes) - for (; t; t = TREE_CHAIN (t)) - { - gcc_assert (!TREE_PURPOSE (t)); -- tree elt = strip_typedefs (TREE_VALUE (t), remove_attributes); -+ tree elt = strip_typedefs (TREE_VALUE (t), remove_attributes, flags); - if (elt != TREE_VALUE (t)) - changed = true; - vec_safe_push (vec, elt); -@@ -1485,28 +1488,29 @@ strip_typedefs (tree t, bool *remove_attributes) - switch (TREE_CODE (t)) - { - case POINTER_TYPE: -- type = strip_typedefs (TREE_TYPE (t), remove_attributes); -+ type = strip_typedefs (TREE_TYPE (t), remove_attributes, flags); - result = build_pointer_type (type); - break; - case REFERENCE_TYPE: -- type = strip_typedefs (TREE_TYPE (t), remove_attributes); -+ type = strip_typedefs (TREE_TYPE (t), remove_attributes, flags); - result = cp_build_reference_type (type, TYPE_REF_IS_RVALUE (t)); - break; - case OFFSET_TYPE: -- t0 = strip_typedefs (TYPE_OFFSET_BASETYPE (t), remove_attributes); -- type = strip_typedefs (TREE_TYPE (t), remove_attributes); -+ t0 = strip_typedefs (TYPE_OFFSET_BASETYPE (t), remove_attributes, flags); -+ type = strip_typedefs (TREE_TYPE (t), remove_attributes, flags); - result = build_offset_type (t0, type); - break; - case RECORD_TYPE: - if (TYPE_PTRMEMFUNC_P (t)) - { -- t0 = strip_typedefs (TYPE_PTRMEMFUNC_FN_TYPE (t), remove_attributes); -+ t0 = strip_typedefs (TYPE_PTRMEMFUNC_FN_TYPE (t), -+ remove_attributes, flags); - result = build_ptrmemfunc_type (t0); - } - break; - case ARRAY_TYPE: -- type = strip_typedefs (TREE_TYPE (t), remove_attributes); -- t0 = strip_typedefs (TYPE_DOMAIN (t), remove_attributes); -+ type = strip_typedefs (TREE_TYPE (t), remove_attributes, flags); -+ t0 = strip_typedefs (TYPE_DOMAIN (t), remove_attributes, flags); - result = build_cplus_array_type (type, t0); - break; - case FUNCTION_TYPE: -@@ -1525,7 +1529,7 @@ strip_typedefs (tree t, bool *remove_attributes) - && (TYPE_ATTRIBUTES (t) || TYPE_USER_ALIGN (t))) - is_variant = true; - -- type = strip_typedefs (TREE_TYPE (t), remove_attributes); -+ type = strip_typedefs (TREE_TYPE (t), remove_attributes, flags); - tree canon_spec = (flag_noexcept_type - ? canonical_eh_spec (TYPE_RAISES_EXCEPTIONS (t)) - : NULL_TREE); -@@ -1539,7 +1543,7 @@ strip_typedefs (tree t, bool *remove_attributes) - if (arg_node == void_list_node) - break; - arg_type = strip_typedefs (TREE_VALUE (arg_node), -- remove_attributes); -+ remove_attributes, flags); - gcc_assert (arg_type); - if (arg_type == TREE_VALUE (arg_node) && !changed) - continue; -@@ -1603,9 +1607,10 @@ strip_typedefs (tree t, bool *remove_attributes) - tree arg = TREE_VEC_ELT (args, i); - tree strip_arg; - if (TYPE_P (arg)) -- strip_arg = strip_typedefs (arg, remove_attributes); -+ strip_arg = strip_typedefs (arg, remove_attributes, flags); - else -- strip_arg = strip_typedefs_expr (arg, remove_attributes); -+ strip_arg = strip_typedefs_expr (arg, remove_attributes, -+ flags); - TREE_VEC_ELT (new_args, i) = strip_arg; - if (strip_arg != arg) - changed = true; -@@ -1621,7 +1626,7 @@ strip_typedefs (tree t, bool *remove_attributes) - else - ggc_free (new_args); - } -- tree ctx = strip_typedefs (TYPE_CONTEXT (t), remove_attributes); -+ tree ctx = strip_typedefs (TYPE_CONTEXT (t), remove_attributes, flags); - if (!changed && ctx == TYPE_CONTEXT (t) && !typedef_variant_p (t)) - return t; - tree name = fullname; -@@ -1634,7 +1639,7 @@ strip_typedefs (tree t, bool *remove_attributes) - break; - case DECLTYPE_TYPE: - result = strip_typedefs_expr (DECLTYPE_TYPE_EXPR (t), -- remove_attributes); -+ remove_attributes, flags); - if (result == DECLTYPE_TYPE_EXPR (t)) - result = NULL_TREE; - else -@@ -1644,7 +1649,8 @@ strip_typedefs (tree t, bool *remove_attributes) - tf_none)); - break; - case UNDERLYING_TYPE: -- type = strip_typedefs (UNDERLYING_TYPE_TYPE (t), remove_attributes); -+ type = strip_typedefs (UNDERLYING_TYPE_TYPE (t), -+ remove_attributes, flags); - result = finish_underlying_type (type); - break; - default: -@@ -1655,15 +1661,18 @@ strip_typedefs (tree t, bool *remove_attributes) - { - if (typedef_variant_p (t)) - { -- /* Explicitly get the underlying type, as TYPE_MAIN_VARIANT doesn't -- strip typedefs with attributes. */ -- result = TYPE_MAIN_VARIANT (DECL_ORIGINAL_TYPE (TYPE_NAME (t))); -- result = strip_typedefs (result); -+ if ((flags & STF_USER_VISIBLE) -+ && !user_facing_original_type_p (t)) -+ return t; -+ result = strip_typedefs (DECL_ORIGINAL_TYPE (TYPE_NAME (t)), -+ remove_attributes, flags); - } - else - result = TYPE_MAIN_VARIANT (t); - } -- gcc_assert (!typedef_variant_p (result)); -+ gcc_assert (!typedef_variant_p (result) -+ || ((flags & STF_USER_VISIBLE) -+ && !user_facing_original_type_p (result))); - - if (COMPLETE_TYPE_P (result) && !COMPLETE_TYPE_P (t)) - /* If RESULT is complete and T isn't, it's likely the case that T -@@ -1712,7 +1721,7 @@ strip_typedefs (tree t, bool *remove_attributes) - sizeof(TT) is replaced by sizeof(T). */ - - tree --strip_typedefs_expr (tree t, bool *remove_attributes) -+strip_typedefs_expr (tree t, bool *remove_attributes, unsigned int flags) - { - unsigned i,n; - tree r, type, *ops; -@@ -1729,7 +1738,7 @@ strip_typedefs_expr (tree t, bool *remove_attributes) - /* Some expressions have type operands, so let's handle types here rather - than check TYPE_P in multiple places below. */ - if (TYPE_P (t)) -- return strip_typedefs (t, remove_attributes); -+ return strip_typedefs (t, remove_attributes, flags); - - code = TREE_CODE (t); - switch (code) -@@ -1743,8 +1752,10 @@ strip_typedefs_expr (tree t, bool *remove_attributes) - - case TRAIT_EXPR: - { -- tree type1 = strip_typedefs (TRAIT_EXPR_TYPE1 (t), remove_attributes); -- tree type2 = strip_typedefs (TRAIT_EXPR_TYPE2 (t), remove_attributes); -+ tree type1 = strip_typedefs (TRAIT_EXPR_TYPE1 (t), -+ remove_attributes, flags); -+ tree type2 = strip_typedefs (TRAIT_EXPR_TYPE2 (t), -+ remove_attributes, flags); - if (type1 == TRAIT_EXPR_TYPE1 (t) - && type2 == TRAIT_EXPR_TYPE2 (t)) - return t; -@@ -1761,7 +1772,8 @@ strip_typedefs_expr (tree t, bool *remove_attributes) - tree it; - for (it = t; it; it = TREE_CHAIN (it)) - { -- tree val = strip_typedefs_expr (TREE_VALUE (it), remove_attributes); -+ tree val = strip_typedefs_expr (TREE_VALUE (it), -+ remove_attributes, flags); - vec_safe_push (vec, val); - if (val != TREE_VALUE (it)) - changed = true; -@@ -1788,7 +1800,7 @@ strip_typedefs_expr (tree t, bool *remove_attributes) - for (i = 0; i < n; ++i) - { - tree op = strip_typedefs_expr (TREE_VEC_ELT (t, i), -- remove_attributes); -+ remove_attributes, flags); - vec->quick_push (op); - if (op != TREE_VEC_ELT (t, i)) - changed = true; -@@ -1813,18 +1825,19 @@ strip_typedefs_expr (tree t, bool *remove_attributes) - vec *vec - = vec_safe_copy (CONSTRUCTOR_ELTS (t)); - n = CONSTRUCTOR_NELTS (t); -- type = strip_typedefs (TREE_TYPE (t), remove_attributes); -+ type = strip_typedefs (TREE_TYPE (t), remove_attributes, flags); - for (i = 0; i < n; ++i) - { - constructor_elt *e = &(*vec)[i]; -- tree op = strip_typedefs_expr (e->value, remove_attributes); -+ tree op = strip_typedefs_expr (e->value, remove_attributes, flags); - if (op != e->value) - { - changed = true; - e->value = op; - } - gcc_checking_assert -- (e->index == strip_typedefs_expr (e->index, remove_attributes)); -+ (e->index == strip_typedefs_expr (e->index, remove_attributes, -+ flags)); - } - - if (!changed && type == TREE_TYPE (t)) -@@ -1868,12 +1881,13 @@ strip_typedefs_expr (tree t, bool *remove_attributes) - case REINTERPRET_CAST_EXPR: - case CAST_EXPR: - case NEW_EXPR: -- type = strip_typedefs (type, remove_attributes); -+ type = strip_typedefs (type, remove_attributes, flags); - /* fallthrough */ - - default: - for (i = 0; i < n; ++i) -- ops[i] = strip_typedefs_expr (TREE_OPERAND (t, i), remove_attributes); -+ ops[i] = strip_typedefs_expr (TREE_OPERAND (t, i), -+ remove_attributes, flags); - break; - } - -diff --git a/gcc/testsuite/g++.dg/diagnostic/aka5.h b/gcc/testsuite/g++.dg/diagnostic/aka5.h -new file mode 100644 -index 000000000..0c7404d76 ---- /dev/null -+++ b/gcc/testsuite/g++.dg/diagnostic/aka5.h -@@ -0,0 +1,22 @@ -+#ifdef IS_SYSTEM_HEADER -+#pragma GCC system_header -+#endif -+ -+typedef enum __internal_enum { A, B } user_enum; -+typedef user_enum *user_enum_ptr; -+ -+typedef struct __internal_struct { int i; } user_struct; -+typedef user_struct user_struct_copy; -+typedef user_struct *user_struct_ptr; -+ -+typedef union __internal_union { int i; } user_union; -+typedef user_union user_union_copy; -+typedef user_union *user_union_ptr; -+ -+typedef unsigned int user_vector __attribute__((__vector_size__(16))); -+typedef user_vector user_vector_copy; -+typedef user_vector *user_vector_ptr; -+ -+typedef int user_int; -+typedef user_int user_int_copy; -+typedef user_int *user_int_ptr; -diff --git a/gcc/testsuite/g++.dg/diagnostic/aka5a.C b/gcc/testsuite/g++.dg/diagnostic/aka5a.C -new file mode 100644 -index 000000000..e9d4c02f6 ---- /dev/null -+++ b/gcc/testsuite/g++.dg/diagnostic/aka5a.C -@@ -0,0 +1,127 @@ -+#define IS_SYSTEM_HEADER -+#include "aka5.h" -+ -+typedef user_enum user_enum_copy; -+ -+struct s { int i; }; -+ -+user_enum ue1; -+user_enum_copy ue2; -+user_enum_ptr ue_ptr1; -+user_enum *ue_ptr2; -+const user_enum *const_ue_ptr1; -+const user_enum_copy *const_ue_ptr2; -+volatile user_enum *volatile_ue_ptr1; -+volatile user_enum_copy *volatile_ue_ptr2; -+user_enum (*ue_array_ptr1)[10]; -+user_enum_copy (*ue_array_ptr2)[10]; -+user_enum (*ue_fn_ptr1) (void); -+void (*ue_fn_ptr2) (user_enum); -+void (*ue_fn_ptr3) (user_enum, ...); -+user_enum_copy (*ue_fn_ptr4) (void); -+void (*ue_fn_ptr5) (user_enum_copy); -+void (*ue_fn_ptr6) (user_enum_copy, ...); -+user_enum (*__attribute__((__transaction_unsafe__)) unsafe_ue_fn_ptr1) (void); -+user_enum_copy (*__attribute__((__transaction_unsafe__)) unsafe_ue_fn_ptr2) (void); -+ -+user_struct us1; -+user_struct_copy us2; -+user_struct_ptr us_ptr1; -+user_struct *us_ptr2; -+const user_struct *const_us_ptr1; -+const user_struct_copy *const_us_ptr2; -+ -+user_union uu1; -+user_union_copy uu2; -+user_union_ptr uu_ptr1; -+user_union *uu_ptr2; -+const user_union *const_uu_ptr1; -+const user_union_copy *const_uu_ptr2; -+ -+user_vector uv1; -+user_vector_copy uv2; -+user_vector_ptr uv_ptr1; -+user_vector *uv_ptr2; -+const user_vector *const_uv_ptr1; -+const user_vector_copy *const_uv_ptr2; -+ -+user_int ui1; -+user_int_copy ui2; -+user_int_ptr ui_ptr1; -+user_int *ui_ptr2; -+const user_int *const_ui_ptr1; -+const user_int_copy *const_ui_ptr2; -+volatile user_int *volatile_ui_ptr1; -+volatile user_int_copy *volatile_ui_ptr2; -+user_int (*ui_array_ptr1)[10]; -+user_int_copy (*ui_array_ptr2)[10]; -+user_int (*ui_fn_ptr1) (void); -+void (*ui_fn_ptr2) (user_int); -+void (*ui_fn_ptr3) (user_int, ...); -+user_int_copy (*ui_fn_ptr4) (void); -+void (*ui_fn_ptr5) (user_int_copy); -+void (*ui_fn_ptr6) (user_int_copy, ...); -+user_int (*__attribute__((__transaction_unsafe__)) unsafe_ui_fn_ptr1) (void); -+user_int_copy (*__attribute__((__transaction_unsafe__)) unsafe_ui_fn_ptr2) (void); -+ -+void f (s s1) -+{ -+ ue1 = s1; // { dg-error {cannot convert 's' to 'user_enum' in assignment} } -+ ue2 = s1; // { dg-error {cannot convert 's' to 'user_enum_copy' {aka 'user_enum'} in assignment} } -+ ue_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'user_enum_ptr' {aka 'user_enum\*'} in assignment} } -+ ue_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'user_enum\*' in assignment} } -+ const_ue_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'const user_enum\*' in assignment} } -+ const_ue_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'const user_enum_copy\*' {aka 'const user_enum\*'} in assignment} } -+ volatile_ue_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'volatile user_enum\*' in assignment} } -+ volatile_ue_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'volatile user_enum_copy\*' {aka 'volatile user_enum\*'} in assignment} } -+ ue_array_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'user_enum \(\*\)\[10\]' in assignment} } -+ ue_array_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'user_enum_copy \(\*\)\[10\]' {aka 'user_enum \(\*\)\[10\]'} in assignment} } -+ ue_fn_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'user_enum \(\*\)\(\)' in assignment} } -+ ue_fn_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'void \(\*\)\(user_enum\)' in assignment} } -+ ue_fn_ptr3 = &s1; // { dg-error {cannot convert 's\*' to 'void \(\*\)\(user_enum, \.\.\.\)' in assignment} } -+ ue_fn_ptr4 = &s1; // { dg-error {cannot convert 's\*' to 'user_enum_copy \(\*\)\(\)' {aka 'user_enum \(\*\)\(\)'} in assignment} } -+ ue_fn_ptr5 = &s1; // { dg-error {cannot convert 's\*' to 'void \(\*\)\(user_enum_copy\)' {aka 'void \(\*\)\(user_enum\)'} in assignment} } -+ ue_fn_ptr6 = &s1; // { dg-error {cannot convert 's\*' to 'void \(\*\)\(user_enum_copy, \.\.\.\)' {aka 'void \(\*\)\(user_enum, \.\.\.\)'} in assignment} } -+ unsafe_ue_fn_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'user_enum \(__attribute__\(\(transaction_unsafe\)\) \*\)\(\)' in assignment} } -+ unsafe_ue_fn_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'user_enum_copy \(__attribute__\(\(transaction_unsafe\)\) \*\)\(\)' {aka 'user_enum \(__attribute__\(\(transaction_unsafe\)\) \*\)\(\)'} in assignment} } -+ -+ us1 = s1; // { dg-error {no match for 'operator=' in 'us1 = s1' \(operand types are 'user_struct' and 's'\)} } -+ us2 = s1; // { dg-error {no match for 'operator=' in 'us2 = s1' \(operand types are 'user_struct_copy' {aka 'user_struct'} and 's'\)} } -+ us_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'user_struct_ptr' {aka 'user_struct\*'} in assignment} } -+ us_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'user_struct\*' in assignment} } -+ const_us_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'const user_struct\*' in assignment} } -+ const_us_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'const user_struct_copy\*' {aka 'const user_struct\*'} in assignment} } -+ -+ uu1 = s1; // { dg-error {no match for 'operator=' in 'uu1 = s1' \(operand types are 'user_union' and 's'\)} } -+ uu2 = s1; // { dg-error {no match for 'operator=' in 'uu2 = s1' \(operand types are 'user_union_copy' {aka 'user_union'} and 's'\)} } -+ uu_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'user_union_ptr' {aka 'user_union\*'} in assignment} } -+ uu_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'user_union\*' in assignment} } -+ const_uu_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'const user_union\*' in assignment} } -+ const_uu_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'const user_union_copy\*' {aka 'const user_union\*'} in assignment} } -+ -+ uv1 = s1; // { dg-error {cannot convert 's' to 'user_vector' in assignment} } -+ uv2 = s1; // { dg-error {cannot convert 's' to 'user_vector_copy' {aka 'user_vector'} in assignment} } -+ uv_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'user_vector_ptr' {aka 'user_vector\*'} in assignment} } -+ uv_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'user_vector\*' in assignment} } -+ const_uv_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'const user_vector\*' in assignment} } -+ const_uv_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'const user_vector_copy\*' {aka 'const user_vector\*'} in assignment} } -+ -+ ui1 = s1; // { dg-error {cannot convert 's' to 'user_int' {aka 'int'} in assignment} } -+ ui2 = s1; // { dg-error {cannot convert 's' to 'user_int_copy' {aka 'int'} in assignment} } -+ ui_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'user_int_ptr' {aka 'int\*'} in assignment} } -+ ui_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'user_int\*' {aka 'int\*'} in assignment} } -+ const_ui_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'const user_int\*' {aka 'const int\*'} in assignment} } -+ const_ui_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'const user_int_copy\*' {aka 'const int\*'} in assignment} } -+ volatile_ui_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'volatile user_int\*' {aka 'volatile int\*'} in assignment} } -+ volatile_ui_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'volatile user_int_copy\*' {aka 'volatile int\*'} in assignment} } -+ ui_array_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'user_int \(\*\)\[10\]' {aka 'int \(\*\)\[10\]'} in assignment} } -+ ui_array_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'user_int_copy \(\*\)\[10\]' {aka 'int \(\*\)\[10\]'} in assignment} } -+ ui_fn_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'user_int \(\*\)\(\)' {aka 'int \(\*\)\(\)'} in assignment} } -+ ui_fn_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'void \(\*\)\(user_int\)' {aka 'void \(\*\)\(int\)'} in assignment} } -+ ui_fn_ptr3 = &s1; // { dg-error {cannot convert 's\*' to 'void \(\*\)\(user_int, \.\.\.\)' {aka 'void \(\*\)\(int, \.\.\.\)'} in assignment} } -+ ui_fn_ptr4 = &s1; // { dg-error {cannot convert 's\*' to 'user_int_copy \(\*\)\(\)' {aka 'int \(\*\)\(\)'} in assignment} } -+ ui_fn_ptr5 = &s1; // { dg-error {cannot convert 's\*' to 'void \(\*\)\(user_int_copy\)' {aka 'void \(\*\)\(int\)'} in assignment} } -+ ui_fn_ptr6 = &s1; // { dg-error {cannot convert 's\*' to 'void \(\*\)\(user_int_copy, \.\.\.\)' {aka 'void \(\*\)\(int, \.\.\.\)'} in assignment} } -+ unsafe_ui_fn_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'user_int \(__attribute__\(\(transaction_unsafe\)\) \*\)\(\)' {aka 'int \(__attribute__\(\(transaction_unsafe\)\) \*\)\(\)'} in assignment} } -+ unsafe_ui_fn_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'user_int_copy \(__attribute__\(\(transaction_unsafe\)\) \*\)\(\)' {aka 'int \(__attribute__\(\(transaction_unsafe\)\) \*\)\(\)'} in assignment} } -+} -diff --git a/gcc/testsuite/g++.dg/diagnostic/aka5b.C b/gcc/testsuite/g++.dg/diagnostic/aka5b.C -new file mode 100644 -index 000000000..6942be3ee ---- /dev/null -+++ b/gcc/testsuite/g++.dg/diagnostic/aka5b.C -@@ -0,0 +1,127 @@ -+#include "aka5.h" -+ -+typedef user_enum user_enum_copy; -+ -+struct s { int i; }; -+ -+user_enum ue1; -+user_enum_copy ue2; -+user_enum_ptr ue_ptr1; -+user_enum *ue_ptr2; -+const user_enum *const_ue_ptr1; -+const user_enum_copy *const_ue_ptr2; -+volatile user_enum *volatile_ue_ptr1; -+volatile user_enum_copy *volatile_ue_ptr2; -+user_enum (*ue_array_ptr1)[10]; -+user_enum_copy (*ue_array_ptr2)[10]; -+user_enum (*ue_fn_ptr1) (void); -+void (*ue_fn_ptr2) (user_enum); -+void (*ue_fn_ptr3) (user_enum, ...); -+user_enum_copy (*ue_fn_ptr4) (void); -+void (*ue_fn_ptr5) (user_enum_copy); -+void (*ue_fn_ptr6) (user_enum_copy, ...); -+user_enum (*__attribute__((__transaction_unsafe__)) unsafe_ue_fn_ptr1) (void); -+user_enum_copy (*__attribute__((__transaction_unsafe__)) unsafe_ue_fn_ptr2) (void); -+ -+user_struct us1; -+user_struct_copy us2; -+user_struct_ptr us_ptr1; -+user_struct *us_ptr2; -+const user_struct *const_us_ptr1; -+const user_struct_copy *const_us_ptr2; -+ -+user_union uu1; -+user_union_copy uu2; -+user_union_ptr uu_ptr1; -+user_union *uu_ptr2; -+const user_union *const_uu_ptr1; -+const user_union_copy *const_uu_ptr2; -+ -+user_vector uv1; -+user_vector_copy uv2; -+user_vector_ptr uv_ptr1; -+user_vector *uv_ptr2; -+const user_vector *const_uv_ptr1; -+const user_vector_copy *const_uv_ptr2; -+ -+user_int ui1; -+user_int_copy ui2; -+user_int_ptr ui_ptr1; -+user_int *ui_ptr2; -+const user_int *const_ui_ptr1; -+const user_int_copy *const_ui_ptr2; -+volatile user_int *volatile_ui_ptr1; -+volatile user_int_copy *volatile_ui_ptr2; -+user_int (*ui_array_ptr1)[10]; -+user_int_copy (*ui_array_ptr2)[10]; -+user_int (*ui_fn_ptr1) (void); -+void (*ui_fn_ptr2) (user_int); -+void (*ui_fn_ptr3) (user_int, ...); -+user_int_copy (*ui_fn_ptr4) (void); -+void (*ui_fn_ptr5) (user_int_copy); -+void (*ui_fn_ptr6) (user_int_copy, ...); -+user_int (*__attribute__((__transaction_unsafe__)) unsafe_ui_fn_ptr1) (void); -+user_int_copy (*__attribute__((__transaction_unsafe__)) unsafe_ui_fn_ptr2) (void); -+ -+void f (s s1) -+{ -+ ue1 = s1; // { dg-error {cannot convert 's' to 'user_enum' {aka '__internal_enum'} in assignment} } -+ ue2 = s1; // { dg-error {cannot convert 's' to 'user_enum_copy' {aka '__internal_enum'} in assignment} } -+ ue_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'user_enum_ptr' {aka '__internal_enum\*'} in assignment} } -+ ue_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'user_enum\*' {aka '__internal_enum\*'} in assignment} } -+ const_ue_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'const user_enum\*' {aka 'const __internal_enum\*'} in assignment} } -+ const_ue_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'const user_enum_copy\*' {aka 'const __internal_enum\*'} in assignment} } -+ volatile_ue_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'volatile user_enum\*' {aka 'volatile __internal_enum\*'} in assignment} } -+ volatile_ue_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'volatile user_enum_copy\*' {aka 'volatile __internal_enum\*'} in assignment} } -+ ue_array_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'user_enum \(\*\)\[10\]' {aka '__internal_enum \(\*\)\[10\]'} in assignment} } -+ ue_array_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'user_enum_copy \(\*\)\[10\]' {aka '__internal_enum \(\*\)\[10\]'} in assignment} } -+ ue_fn_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'user_enum \(\*\)\(\)' {aka '__internal_enum \(\*\)\(\)'} in assignment} } -+ ue_fn_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'void \(\*\)\(user_enum\)' {aka 'void \(\*\)\(__internal_enum\)'} in assignment} } -+ ue_fn_ptr3 = &s1; // { dg-error {cannot convert 's\*' to 'void \(\*\)\(user_enum, \.\.\.\)' {aka 'void \(\*\)\(__internal_enum, \.\.\.\)'} in assignment} } -+ ue_fn_ptr4 = &s1; // { dg-error {cannot convert 's\*' to 'user_enum_copy \(\*\)\(\)' {aka '__internal_enum \(\*\)\(\)'} in assignment} } -+ ue_fn_ptr5 = &s1; // { dg-error {cannot convert 's\*' to 'void \(\*\)\(user_enum_copy\)' {aka 'void \(\*\)\(__internal_enum\)'} in assignment} } -+ ue_fn_ptr6 = &s1; // { dg-error {cannot convert 's\*' to 'void \(\*\)\(user_enum_copy, \.\.\.\)' {aka 'void \(\*\)\(__internal_enum, \.\.\.\)'} in assignment} } -+ unsafe_ue_fn_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'user_enum \(__attribute__\(\(transaction_unsafe\)\) \*\)\(\)' {aka '__internal_enum \(__attribute__\(\(transaction_unsafe\)\) \*\)\(\)'} in assignment} } -+ unsafe_ue_fn_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'user_enum_copy \(__attribute__\(\(transaction_unsafe\)\) \*\)\(\)' {aka '__internal_enum \(__attribute__\(\(transaction_unsafe\)\) \*\)\(\)'} in assignment} } -+ -+ us1 = s1; // { dg-error {no match for 'operator=' in 'us1 = s1' \(operand types are 'user_struct' {aka '__internal_struct'} and 's'\)} } -+ us2 = s1; // { dg-error {no match for 'operator=' in 'us2 = s1' \(operand types are 'user_struct_copy' {aka '__internal_struct'} and 's'\)} } -+ us_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'user_struct_ptr' {aka '__internal_struct\*'} in assignment} } -+ us_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'user_struct\*' {aka '__internal_struct\*'} in assignment} } -+ const_us_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'const user_struct\*' {aka 'const __internal_struct\*'} in assignment} } -+ const_us_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'const user_struct_copy\*' {aka 'const __internal_struct\*'} in assignment} } -+ -+ uu1 = s1; // { dg-error {no match for 'operator=' in 'uu1 = s1' \(operand types are 'user_union' {aka '__internal_union'} and 's'\)} } -+ uu2 = s1; // { dg-error {no match for 'operator=' in 'uu2 = s1' \(operand types are 'user_union_copy' {aka '__internal_union'} and 's'\)} } -+ uu_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'user_union_ptr' {aka '__internal_union\*'} in assignment} } -+ uu_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'user_union\*' {aka '__internal_union\*'} in assignment} } -+ const_uu_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'const user_union\*' {aka 'const __internal_union\*'} in assignment} } -+ const_uu_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'const user_union_copy\*' {aka 'const __internal_union\*'} in assignment} } -+ -+ uv1 = s1; // { dg-error {cannot convert 's' to 'user_vector' {aka '__vector\([48]\) unsigned int'} in assignment} } -+ uv2 = s1; // { dg-error {cannot convert 's' to 'user_vector_copy' {aka '__vector\([48]\) unsigned int'} in assignment} } -+ uv_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'user_vector_ptr' {aka '__vector\([48]\) unsigned int\*'} in assignment} } -+ uv_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'user_vector\*' {aka '__vector\([48]\) unsigned int\*'} in assignment} } -+ const_uv_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'const user_vector\*' {aka 'const __vector\([48]\) unsigned int\*'} in assignment} } -+ const_uv_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'const user_vector_copy\*' {aka 'const __vector\([48]\) unsigned int\*'} in assignment} } -+ -+ ui1 = s1; // { dg-error {cannot convert 's' to 'user_int' {aka 'int'} in assignment} } -+ ui2 = s1; // { dg-error {cannot convert 's' to 'user_int_copy' {aka 'int'} in assignment} } -+ ui_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'user_int_ptr' {aka 'int\*'} in assignment} } -+ ui_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'user_int\*' {aka 'int\*'} in assignment} } -+ const_ui_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'const user_int\*' {aka 'const int\*'} in assignment} } -+ const_ui_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'const user_int_copy\*' {aka 'const int\*'} in assignment} } -+ volatile_ui_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'volatile user_int\*' {aka 'volatile int\*'} in assignment} } -+ volatile_ui_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'volatile user_int_copy\*' {aka 'volatile int\*'} in assignment} } -+ ui_array_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'user_int \(\*\)\[10\]' {aka 'int \(\*\)\[10\]'} in assignment} } -+ ui_array_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'user_int_copy \(\*\)\[10\]' {aka 'int \(\*\)\[10\]'} in assignment} } -+ ui_fn_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'user_int \(\*\)\(\)' {aka 'int \(\*\)\(\)'} in assignment} } -+ ui_fn_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'void \(\*\)\(user_int\)' {aka 'void \(\*\)\(int\)'} in assignment} } -+ ui_fn_ptr3 = &s1; // { dg-error {cannot convert 's\*' to 'void \(\*\)\(user_int, \.\.\.\)' {aka 'void \(\*\)\(int, \.\.\.\)'} in assignment} } -+ ui_fn_ptr4 = &s1; // { dg-error {cannot convert 's\*' to 'user_int_copy \(\*\)\(\)' {aka 'int \(\*\)\(\)'} in assignment} } -+ ui_fn_ptr5 = &s1; // { dg-error {cannot convert 's\*' to 'void \(\*\)\(user_int_copy\)' {aka 'void \(\*\)\(int\)'} in assignment} } -+ ui_fn_ptr6 = &s1; // { dg-error {cannot convert 's\*' to 'void \(\*\)\(user_int_copy, \.\.\.\)' {aka 'void \(\*\)\(int, \.\.\.\)'} in assignment} } -+ unsafe_ui_fn_ptr1 = &s1; // { dg-error {cannot convert 's\*' to 'user_int \(__attribute__\(\(transaction_unsafe\)\) \*\)\(\)' {aka 'int \(__attribute__\(\(transaction_unsafe\)\) \*\)\(\)'} in assignment} } -+ unsafe_ui_fn_ptr2 = &s1; // { dg-error {cannot convert 's\*' to 'user_int_copy \(__attribute__\(\(transaction_unsafe\)\) \*\)\(\)' {aka 'int \(__attribute__\(\(transaction_unsafe\)\) \*\)\(\)'} in assignment} } -+} -+ -diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr61034.C b/gcc/testsuite/g++.dg/tree-ssa/pr61034.C -index 2e3dfecac..6a76adb5b 100644 ---- a/gcc/testsuite/g++.dg/tree-ssa/pr61034.C -+++ b/gcc/testsuite/g++.dg/tree-ssa/pr61034.C -@@ -1,5 +1,5 @@ - // { dg-do compile } --// { dg-options "-O2 -fdump-tree-fre3 -fdump-tree-optimized -fdelete-null-pointer-checks --param early-inlining-insns-O2=14" } -+// { dg-options "-O2 -fdump-tree-fre3 -fdump-tree-optimized -fdelete-null-pointer-checks --param early-inlining-insns-O2=14 --param max-inline-insns-single-O2=200" } - - #define assume(x) if(!(x))__builtin_unreachable() - -diff --git a/gcc/testsuite/g++.target/aarch64/diag_aka_1.C b/gcc/testsuite/g++.target/aarch64/diag_aka_1.C -new file mode 100644 -index 000000000..6b489981f ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/diag_aka_1.C -@@ -0,0 +1,13 @@ -+#include -+ -+typedef int16x4_t myvec; -+ -+void f (float x) -+{ -+ __Int8x8_t y1 = x; // { dg-error {cannot convert 'float' to '__Int8x8_t' in initialization} } -+ __Int8x8_t *ptr1 = &x; // { dg-error {cannot convert 'float\*' to '__Int8x8_t\*' in initialization} } -+ int8x8_t y2 = x; // { dg-error {cannot convert 'float' to 'int8x8_t' in initialization} } -+ int8x8_t *ptr2 = &x; // { dg-error {cannot convert 'float\*' to 'int8x8_t\*' in initialization} } -+ myvec y3 = x; // { dg-error {cannot convert 'float' to 'myvec' {aka 'int16x4_t'} in initialization} } -+ myvec *ptr3 = &x; // { dg-error {cannot convert 'float\*' to 'myvec\*' {aka 'int16x4_t\*'} in initialization} } -+} -diff --git a/gcc/testsuite/gcc.dg/diag-aka-1.c b/gcc/testsuite/gcc.dg/diag-aka-1.c -index fde4ca7c7..3383c1c26 100644 ---- a/gcc/testsuite/gcc.dg/diag-aka-1.c -+++ b/gcc/testsuite/gcc.dg/diag-aka-1.c -@@ -2,7 +2,7 @@ - /* { dg-options "-Wc++-compat" } */ - - typedef struct A { int i; } B; --typedef struct T { int i; } T; -+typedef struct T { int i; } *T; /* { dg-warning "using 'T' as both a typedef and a tag is invalid" } */ - typedef const float TFA; - typedef TFA TFB; - typedef TFB TFC; -@@ -24,6 +24,6 @@ bar (B *b, int *i) - int - foo (void *a) - { -- T *t = a; /* { dg-warning "request for implicit conversion from 'void \\*' to 'T \\*' {aka 'struct T \\*'} not" } */ -+ T t = a; /* { dg-warning "request for implicit conversion from 'void \\*' to 'T' {aka 'struct T \\*'} not" } */ - return t->i; - } -diff --git a/gcc/testsuite/gcc.dg/diag-aka-4.c b/gcc/testsuite/gcc.dg/diag-aka-4.c -new file mode 100644 -index 000000000..cf98dd96a ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/diag-aka-4.c -@@ -0,0 +1,72 @@ -+typedef struct struct_wrapper { int i; } struct_wrapper; -+typedef struct { int i; } anon_struct_wrapper; -+ -+typedef union union_wrapper { int i; } union_wrapper; -+typedef union { int i; } anon_union_wrapper; -+ -+typedef enum enum_wrapper { A, B } enum_wrapper; -+typedef enum { C, D } anon_enum_wrapper; -+ -+void test_struct_wrapper (struct_wrapper y, int x) -+{ -+ struct_wrapper *ptr = &x; /* { dg-error {initialization of 'struct_wrapper \*' from incompatible pointer type 'int \*'} } */ -+ const struct_wrapper *const_ptr = &x; /* { dg-error {initialization of 'const struct_wrapper \*' from incompatible pointer type 'int \*'} } */ -+ volatile struct_wrapper *volatile_ptr = &x; /* { dg-error {initialization of 'volatile struct_wrapper \*' from incompatible pointer type 'int \*'} } */ -+ struct_wrapper (*aptr)[10] = &x; /* { dg-error {initialization of 'struct_wrapper \(\*\)\[10\]' from incompatible pointer type 'int \*'} } */ -+ struct_wrapper (*f1)(int) = &x; /* { dg-error {initialization of 'struct_wrapper \(\*\)\(int\)' from incompatible pointer type 'int \*'} } */ -+ int (*f2)(struct_wrapper) = &x; /* { dg-error {initialization of 'int \(\*\)\(struct_wrapper\)' from incompatible pointer type 'int \*'} } */ -+ y = x; /* { dg-error {incompatible types when assigning to type 'struct_wrapper' from type 'int'} } */ -+} -+ -+void test_anon_struct_wrapper (anon_struct_wrapper y, int x) -+{ -+ anon_struct_wrapper *ptr = &x; /* { dg-error {initialization of 'anon_struct_wrapper \*' from incompatible pointer type 'int \*'} } */ -+ const anon_struct_wrapper *const_ptr = &x; /* { dg-error {initialization of 'const anon_struct_wrapper \*' from incompatible pointer type 'int \*'} } */ -+ volatile anon_struct_wrapper *volatile_ptr = &x; /* { dg-error {initialization of 'volatile anon_struct_wrapper \*' from incompatible pointer type 'int \*'} } */ -+ anon_struct_wrapper (*aptr)[10] = &x; /* { dg-error {initialization of 'anon_struct_wrapper \(\*\)\[10\]' from incompatible pointer type 'int \*'} } */ -+ anon_struct_wrapper (*f1)(int) = &x; /* { dg-error {initialization of 'anon_struct_wrapper \(\*\)\(int\)' from incompatible pointer type 'int \*'} } */ -+ int (*f2)(anon_struct_wrapper) = &x; /* { dg-error {initialization of 'int \(\*\)\(anon_struct_wrapper\)' from incompatible pointer type 'int \*'} } */ -+ y = x; /* { dg-error {incompatible types when assigning to type 'anon_struct_wrapper' from type 'int'} } */ -+} -+ -+void test_union_wrapper (union_wrapper y, int x) -+{ -+ union_wrapper *ptr = &x; /* { dg-error {initialization of 'union_wrapper \*' from incompatible pointer type 'int \*'} } */ -+ const union_wrapper *const_ptr = &x; /* { dg-error {initialization of 'const union_wrapper \*' from incompatible pointer type 'int \*'} } */ -+ volatile union_wrapper *volatile_ptr = &x; /* { dg-error {initialization of 'volatile union_wrapper \*' from incompatible pointer type 'int \*'} } */ -+ union_wrapper (*aptr)[10] = &x; /* { dg-error {initialization of 'union_wrapper \(\*\)\[10\]' from incompatible pointer type 'int \*'} } */ -+ union_wrapper (*f1)(int) = &x; /* { dg-error {initialization of 'union_wrapper \(\*\)\(int\)' from incompatible pointer type 'int \*'} } */ -+ int (*f2)(union_wrapper) = &x; /* { dg-error {initialization of 'int \(\*\)\(union_wrapper\)' from incompatible pointer type 'int \*'} } */ -+ y = x; /* { dg-error {incompatible types when assigning to type 'union_wrapper' from type 'int'} } */ -+} -+ -+void test_anon_union_wrapper (anon_union_wrapper y, int x) -+{ -+ anon_union_wrapper *ptr = &x; /* { dg-error {initialization of 'anon_union_wrapper \*' from incompatible pointer type 'int \*'} } */ -+ const anon_union_wrapper *const_ptr = &x; /* { dg-error {initialization of 'const anon_union_wrapper \*' from incompatible pointer type 'int \*'} } */ -+ volatile anon_union_wrapper *volatile_ptr = &x; /* { dg-error {initialization of 'volatile anon_union_wrapper \*' from incompatible pointer type 'int \*'} } */ -+ anon_union_wrapper (*aptr)[10] = &x; /* { dg-error {initialization of 'anon_union_wrapper \(\*\)\[10\]' from incompatible pointer type 'int \*'} } */ -+ anon_union_wrapper (*f1)(int) = &x; /* { dg-error {initialization of 'anon_union_wrapper \(\*\)\(int\)' from incompatible pointer type 'int \*'} } */ -+ int (*f2)(anon_union_wrapper) = &x; /* { dg-error {initialization of 'int \(\*\)\(anon_union_wrapper\)' from incompatible pointer type 'int \*'} } */ -+ y = x; /* { dg-error {incompatible types when assigning to type 'anon_union_wrapper' from type 'int'} } */ -+} -+ -+void test_enum_wrapper (enum_wrapper y, int x) -+{ -+ enum_wrapper *ptr = &x; /* { dg-error {initialization of 'enum_wrapper \*' from incompatible pointer type 'int \*'} } */ -+ const enum_wrapper *const_ptr = &x; /* { dg-error {initialization of 'const enum_wrapper \*' from incompatible pointer type 'int \*'} } */ -+ volatile enum_wrapper *volatile_ptr = &x; /* { dg-error {initialization of 'volatile enum_wrapper \*' from incompatible pointer type 'int \*'} } */ -+ enum_wrapper (*aptr)[10] = &x; /* { dg-error {initialization of 'enum_wrapper \(\*\)\[10\]' from incompatible pointer type 'int \*'} } */ -+ enum_wrapper (*f1)(int) = &x; /* { dg-error {initialization of 'enum_wrapper \(\*\)\(int\)' from incompatible pointer type 'int \*'} } */ -+ int (*f2)(enum_wrapper) = &x; /* { dg-error {initialization of 'int \(\*\)\(enum_wrapper\)' from incompatible pointer type 'int \*'} } */ -+} -+ -+void test_anon_enum_wrapper (anon_enum_wrapper y, int x) -+{ -+ anon_enum_wrapper *ptr = &x; /* { dg-error {initialization of 'anon_enum_wrapper \*' from incompatible pointer type 'int \*'} } */ -+ const anon_enum_wrapper *const_ptr = &x; /* { dg-error {initialization of 'const anon_enum_wrapper \*' from incompatible pointer type 'int \*'} } */ -+ volatile anon_enum_wrapper *volatile_ptr = &x; /* { dg-error {initialization of 'volatile anon_enum_wrapper \*' from incompatible pointer type 'int \*'} } */ -+ anon_enum_wrapper (*aptr)[10] = &x; /* { dg-error {initialization of 'anon_enum_wrapper \(\*\)\[10\]' from incompatible pointer type 'int \*'} } */ -+ anon_enum_wrapper (*f1)(int) = &x; /* { dg-error {initialization of 'anon_enum_wrapper \(\*\)\(int\)' from incompatible pointer type 'int \*'} } */ -+ int (*f2)(anon_enum_wrapper) = &x; /* { dg-error {initialization of 'int \(\*\)\(anon_enum_wrapper\)' from incompatible pointer type 'int \*'} } */ -+} -diff --git a/gcc/testsuite/gcc.dg/diag-aka-5.h b/gcc/testsuite/gcc.dg/diag-aka-5.h -new file mode 100644 -index 000000000..0c7404d76 ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/diag-aka-5.h -@@ -0,0 +1,22 @@ -+#ifdef IS_SYSTEM_HEADER -+#pragma GCC system_header -+#endif -+ -+typedef enum __internal_enum { A, B } user_enum; -+typedef user_enum *user_enum_ptr; -+ -+typedef struct __internal_struct { int i; } user_struct; -+typedef user_struct user_struct_copy; -+typedef user_struct *user_struct_ptr; -+ -+typedef union __internal_union { int i; } user_union; -+typedef user_union user_union_copy; -+typedef user_union *user_union_ptr; -+ -+typedef unsigned int user_vector __attribute__((__vector_size__(16))); -+typedef user_vector user_vector_copy; -+typedef user_vector *user_vector_ptr; -+ -+typedef int user_int; -+typedef user_int user_int_copy; -+typedef user_int *user_int_ptr; -diff --git a/gcc/testsuite/gcc.dg/diag-aka-5a.c b/gcc/testsuite/gcc.dg/diag-aka-5a.c -new file mode 100644 -index 000000000..573020659 ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/diag-aka-5a.c -@@ -0,0 +1,135 @@ -+#define IS_SYSTEM_HEADER -+#include "diag-aka-5.h" -+ -+typedef user_enum user_enum_copy; -+ -+struct s { int i; }; -+ -+user_enum ue1; -+user_enum_copy ue2; -+user_enum_ptr ue_ptr1; -+user_enum *ue_ptr2; -+const user_enum *const_ue_ptr1; -+const user_enum_copy *const_ue_ptr2; -+volatile user_enum *volatile_ue_ptr1; -+volatile user_enum_copy *volatile_ue_ptr2; -+__extension__ _Atomic user_enum *atomic_ue_ptr1; -+__extension__ _Atomic user_enum_copy *atomic_ue_ptr2; -+user_enum (*ue_array_ptr1)[10]; -+user_enum_copy (*ue_array_ptr2)[10]; -+user_enum (*ue_fn_ptr1) (void); -+void (*ue_fn_ptr2) (user_enum); -+void (*ue_fn_ptr3) (user_enum, ...); -+user_enum_copy (*ue_fn_ptr4) (void); -+void (*ue_fn_ptr5) (user_enum_copy); -+void (*ue_fn_ptr6) (user_enum_copy, ...); -+user_enum (*__attribute__((__transaction_unsafe__)) unsafe_ue_fn_ptr1) (void); -+user_enum_copy (*__attribute__((__transaction_unsafe__)) unsafe_ue_fn_ptr2) (void); -+ -+user_struct us1; -+user_struct_copy us2; -+user_struct_ptr us_ptr1; -+user_struct *us_ptr2; -+const user_struct *const_us_ptr1; -+const user_struct_copy *const_us_ptr2; -+ -+user_union uu1; -+user_union_copy uu2; -+user_union_ptr uu_ptr1; -+user_union *uu_ptr2; -+const user_union *const_uu_ptr1; -+const user_union_copy *const_uu_ptr2; -+ -+user_vector uv1; -+user_vector_copy uv2; -+user_vector_ptr uv_ptr1; -+user_vector *uv_ptr2; -+const user_vector *const_uv_ptr1; -+const user_vector_copy *const_uv_ptr2; -+ -+user_int ui1; -+user_int_copy ui2; -+user_int_ptr ui_ptr1; -+user_int *ui_ptr2; -+const user_int *const_ui_ptr1; -+const user_int_copy *const_ui_ptr2; -+volatile user_int *volatile_ui_ptr1; -+volatile user_int_copy *volatile_ui_ptr2; -+__extension__ _Atomic user_int *atomic_ui_ptr1; -+__extension__ _Atomic user_int_copy *atomic_ui_ptr2; -+user_int (*ui_array_ptr1)[10]; -+user_int_copy (*ui_array_ptr2)[10]; -+user_int (*ui_fn_ptr1) (void); -+void (*ui_fn_ptr2) (user_int); -+void (*ui_fn_ptr3) (user_int, ...); -+user_int_copy (*ui_fn_ptr4) (void); -+void (*ui_fn_ptr5) (user_int_copy); -+void (*ui_fn_ptr6) (user_int_copy, ...); -+user_int (*__attribute__((__transaction_unsafe__)) unsafe_ui_fn_ptr1) (void); -+user_int_copy (*__attribute__((__transaction_unsafe__)) unsafe_ui_fn_ptr2) (void); -+ -+void f (struct s s) -+{ -+ ue1 = s; /* { dg-error {assigning to type 'user_enum' from type 'struct s'} } */ -+ ue2 = s; /* { dg-error {assigning to type 'user_enum_copy' {aka 'user_enum'} from type 'struct s'} } */ -+ ue_ptr1 = &s; /* { dg-error {assignment to 'user_enum_ptr' {aka 'user_enum \*'} from incompatible pointer type 'struct s \*'} } */ -+ ue_ptr2 = &s; /* { dg-error {assignment to 'user_enum \*' from incompatible pointer type 'struct s \*'} } */ -+ const_ue_ptr1 = &s; /* { dg-error {assignment to 'const user_enum \*' from incompatible pointer type 'struct s \*'} } */ -+ const_ue_ptr2 = &s; /* { dg-error {assignment to 'const user_enum_copy \*' {aka 'const user_enum \*'} from incompatible pointer type 'struct s \*'} } */ -+ volatile_ue_ptr1 = &s; /* { dg-error {assignment to 'volatile user_enum \*' from incompatible pointer type 'struct s \*'} } */ -+ volatile_ue_ptr2 = &s; /* { dg-error {assignment to 'volatile user_enum_copy \*' {aka 'volatile user_enum \*'} from incompatible pointer type 'struct s \*'} } */ -+ atomic_ue_ptr1 = &s; /* { dg-error {assignment to '_Atomic user_enum \*' from incompatible pointer type 'struct s \*'} } */ -+ atomic_ue_ptr2 = &s; /* { dg-error {assignment to '_Atomic user_enum_copy \*' {aka '_Atomic user_enum \*'} from incompatible pointer type 'struct s \*'} } */ -+ ue_array_ptr1 = &s; /* { dg-error {assignment to 'user_enum \(\*\)\[10\]' from incompatible pointer type 'struct s \*'} } */ -+ ue_array_ptr2 = &s; /* { dg-error {assignment to 'user_enum_copy \(\*\)\[10\]' {aka 'user_enum \(\*\)\[10\]'} from incompatible pointer type 'struct s \*'} } */ -+ ue_fn_ptr1 = &s; /* { dg-error {assignment to 'user_enum \(\*\)\(void\)' from incompatible pointer type 'struct s \*'} } */ -+ ue_fn_ptr2 = &s; /* { dg-error {assignment to 'void \(\*\)\(user_enum\)' from incompatible pointer type 'struct s \*'} } */ -+ ue_fn_ptr3 = &s; /* { dg-error {assignment to 'void \(\*\)\(user_enum, \.\.\.\)' from incompatible pointer type 'struct s \*'} } */ -+ ue_fn_ptr4 = &s; /* { dg-error {assignment to 'user_enum_copy \(\*\)\(void\)' {aka 'user_enum \(\*\)\(void\)'} from incompatible pointer type 'struct s \*'} } */ -+ ue_fn_ptr5 = &s; /* { dg-error {assignment to 'void \(\*\)\(user_enum_copy\)' {aka 'void \(\*\)\(user_enum\)'} from incompatible pointer type 'struct s \*'} } */ -+ ue_fn_ptr6 = &s; /* { dg-error {assignment to 'void \(\*\)\(user_enum_copy, \.\.\.\)' {aka 'void \(\*\)\(user_enum, \.\.\.\)'} from incompatible pointer type 'struct s \*'} } */ -+ unsafe_ue_fn_ptr1 = &s; /* { dg-error {assignment to 'user_enum \(__attribute__\(\(transaction_unsafe\)\) \*\)\(void\)' from incompatible pointer type 'struct s \*'} } */ -+ unsafe_ue_fn_ptr2 = &s; /* { dg-error {assignment to 'user_enum_copy \(__attribute__\(\(transaction_unsafe\)\) \*\)\(void\)' {aka 'user_enum \(__attribute__\(\(transaction_unsafe\)\) \*\)\(void\)'} from incompatible pointer type 'struct s \*'} } */ -+ -+ us1 = s; /* { dg-error {assigning to type 'user_struct' from type 'struct s'} } */ -+ us2 = s; /* { dg-error {assigning to type 'user_struct_copy' {aka 'user_struct'} from type 'struct s'} } */ -+ us_ptr1 = &s; /* { dg-error {assignment to 'user_struct_ptr' {aka 'user_struct \*'} from incompatible pointer type 'struct s \*'} } */ -+ us_ptr2 = &s; /* { dg-error {assignment to 'user_struct \*' from incompatible pointer type 'struct s \*'} } */ -+ const_us_ptr1 = &s; /* { dg-error {assignment to 'const user_struct \*' from incompatible pointer type 'struct s \*'} } */ -+ const_us_ptr2 = &s; /* { dg-error {assignment to 'const user_struct_copy \*' {aka 'const user_struct \*'} from incompatible pointer type 'struct s \*'} } */ -+ -+ uu1 = s; /* { dg-error {assigning to type 'user_union' from type 'struct s'} } */ -+ uu2 = s; /* { dg-error {assigning to type 'user_union_copy' {aka 'user_union'} from type 'struct s'} } */ -+ uu_ptr1 = &s; /* { dg-error {assignment to 'user_union_ptr' {aka 'user_union \*'} from incompatible pointer type 'struct s \*'} } */ -+ uu_ptr2 = &s; /* { dg-error {assignment to 'user_union \*' from incompatible pointer type 'struct s \*'} } */ -+ const_uu_ptr1 = &s; /* { dg-error {assignment to 'const user_union \*' from incompatible pointer type 'struct s \*'} } */ -+ const_uu_ptr2 = &s; /* { dg-error {assignment to 'const user_union_copy \*' {aka 'const user_union \*'} from incompatible pointer type 'struct s \*'} } */ -+ -+ uv1 = s; /* { dg-error {assigning to type 'user_vector' from type 'struct s'} } */ -+ uv2 = s; /* { dg-error {assigning to type 'user_vector_copy' {aka 'user_vector'} from type 'struct s'} } */ -+ uv_ptr1 = &s; /* { dg-error {assignment to 'user_vector_ptr' {aka 'user_vector \*'} from incompatible pointer type 'struct s \*'} } */ -+ uv_ptr2 = &s; /* { dg-error {assignment to 'user_vector \*' from incompatible pointer type 'struct s \*'} } */ -+ const_uv_ptr1 = &s; /* { dg-error {assignment to 'const user_vector \*' from incompatible pointer type 'struct s \*'} } */ -+ const_uv_ptr2 = &s; /* { dg-error {assignment to 'const user_vector_copy \*' {aka 'const user_vector \*'} from incompatible pointer type 'struct s \*'} } */ -+ -+ ui1 = s; /* { dg-error {assigning to type 'user_int' {aka 'int'} from type 'struct s'} } */ -+ ui2 = s; /* { dg-error {assigning to type 'user_int_copy' {aka 'int'} from type 'struct s'} } */ -+ ui_ptr1 = &s; /* { dg-error {assignment to 'user_int_ptr' {aka 'int \*'} from incompatible pointer type 'struct s \*'} } */ -+ ui_ptr2 = &s; /* { dg-error {assignment to 'user_int \*' {aka 'int \*'} from incompatible pointer type 'struct s \*'} } */ -+ const_ui_ptr1 = &s; /* { dg-error {assignment to 'const user_int \*' {aka 'const int \*'} from incompatible pointer type 'struct s \*'} } */ -+ const_ui_ptr2 = &s; /* { dg-error {assignment to 'const user_int_copy \*' {aka 'const int \*'} from incompatible pointer type 'struct s \*'} } */ -+ volatile_ui_ptr1 = &s; /* { dg-error {assignment to 'volatile user_int \*' {aka 'volatile int \*'} from incompatible pointer type 'struct s \*'} } */ -+ volatile_ui_ptr2 = &s; /* { dg-error {assignment to 'volatile user_int_copy \*' {aka 'volatile int \*'} from incompatible pointer type 'struct s \*'} } */ -+ atomic_ui_ptr1 = &s; /* { dg-error {assignment to '_Atomic user_int \*' {aka '_Atomic int \*'} from incompatible pointer type 'struct s \*'} } */ -+ atomic_ui_ptr2 = &s; /* { dg-error {assignment to '_Atomic user_int_copy \*' {aka '_Atomic int \*'} from incompatible pointer type 'struct s \*'} } */ -+ ui_array_ptr1 = &s; /* { dg-error {assignment to 'user_int \(\*\)\[10\]' {aka 'int \(\*\)\[10\]'} from incompatible pointer type 'struct s \*'} } */ -+ ui_array_ptr2 = &s; /* { dg-error {assignment to 'user_int_copy \(\*\)\[10\]' {aka 'int \(\*\)\[10\]'} from incompatible pointer type 'struct s \*'} } */ -+ ui_fn_ptr1 = &s; /* { dg-error {assignment to 'user_int \(\*\)\(void\)' {aka 'int \(\*\)\(void\)'} from incompatible pointer type 'struct s \*'} } */ -+ ui_fn_ptr2 = &s; /* { dg-error {assignment to 'void \(\*\)\(user_int\)' {aka 'void \(\*\)\(int\)'} from incompatible pointer type 'struct s \*'} } */ -+ ui_fn_ptr3 = &s; /* { dg-error {assignment to 'void \(\*\)\(user_int, \.\.\.\)' {aka 'void \(\*\)\(int, \.\.\.\)'} from incompatible pointer type 'struct s \*'} } */ -+ ui_fn_ptr4 = &s; /* { dg-error {assignment to 'user_int_copy \(\*\)\(void\)' {aka 'int \(\*\)\(void\)'} from incompatible pointer type 'struct s \*'} } */ -+ ui_fn_ptr5 = &s; /* { dg-error {assignment to 'void \(\*\)\(user_int_copy\)' {aka 'void \(\*\)\(int\)'} from incompatible pointer type 'struct s \*'} } */ -+ ui_fn_ptr6 = &s; /* { dg-error {assignment to 'void \(\*\)\(user_int_copy, \.\.\.\)' {aka 'void \(\*\)\(int, \.\.\.\)'} from incompatible pointer type 'struct s \*'} } */ -+ unsafe_ui_fn_ptr1 = &s; /* { dg-error {assignment to 'user_int \(__attribute__\(\(transaction_unsafe\)\) \*\)\(void\)' {aka 'int \(__attribute__\(\(transaction_unsafe\)\) \*\)\(void\)'} from incompatible pointer type 'struct s \*'} } */ -+ unsafe_ui_fn_ptr2 = &s; /* { dg-error {assignment to 'user_int_copy \(__attribute__\(\(transaction_unsafe\)\) \*\)\(void\)' {aka 'int \(__attribute__\(\(transaction_unsafe\)\) \*\)\(void\)'} from incompatible pointer type 'struct s \*'} } */ -+} -diff --git a/gcc/testsuite/gcc.dg/diag-aka-5b.c b/gcc/testsuite/gcc.dg/diag-aka-5b.c -new file mode 100644 -index 000000000..f510d0d40 ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/diag-aka-5b.c -@@ -0,0 +1,134 @@ -+#include "diag-aka-5.h" -+ -+typedef user_enum user_enum_copy; -+ -+struct s { int i; }; -+ -+user_enum ue1; -+user_enum_copy ue2; -+user_enum_ptr ue_ptr1; -+user_enum *ue_ptr2; -+const user_enum *const_ue_ptr1; -+const user_enum_copy *const_ue_ptr2; -+volatile user_enum *volatile_ue_ptr1; -+volatile user_enum_copy *volatile_ue_ptr2; -+__extension__ _Atomic user_enum *atomic_ue_ptr1; -+__extension__ _Atomic user_enum_copy *atomic_ue_ptr2; -+user_enum (*ue_array_ptr1)[10]; -+user_enum_copy (*ue_array_ptr2)[10]; -+user_enum (*ue_fn_ptr1) (void); -+void (*ue_fn_ptr2) (user_enum); -+void (*ue_fn_ptr3) (user_enum, ...); -+user_enum_copy (*ue_fn_ptr4) (void); -+void (*ue_fn_ptr5) (user_enum_copy); -+void (*ue_fn_ptr6) (user_enum_copy, ...); -+user_enum (*__attribute__((__transaction_unsafe__)) unsafe_ue_fn_ptr1) (void); -+user_enum_copy (*__attribute__((__transaction_unsafe__)) unsafe_ue_fn_ptr2) (void); -+ -+user_struct us1; -+user_struct_copy us2; -+user_struct_ptr us_ptr1; -+user_struct *us_ptr2; -+const user_struct *const_us_ptr1; -+const user_struct_copy *const_us_ptr2; -+ -+user_union uu1; -+user_union_copy uu2; -+user_union_ptr uu_ptr1; -+user_union *uu_ptr2; -+const user_union *const_uu_ptr1; -+const user_union_copy *const_uu_ptr2; -+ -+user_vector uv1; -+user_vector_copy uv2; -+user_vector_ptr uv_ptr1; -+user_vector *uv_ptr2; -+const user_vector *const_uv_ptr1; -+const user_vector_copy *const_uv_ptr2; -+ -+user_int ui1; -+user_int_copy ui2; -+user_int_ptr ui_ptr1; -+user_int *ui_ptr2; -+const user_int *const_ui_ptr1; -+const user_int_copy *const_ui_ptr2; -+volatile user_int *volatile_ui_ptr1; -+volatile user_int_copy *volatile_ui_ptr2; -+__extension__ _Atomic user_int *atomic_ui_ptr1; -+__extension__ _Atomic user_int_copy *atomic_ui_ptr2; -+user_int (*ui_array_ptr1)[10]; -+user_int_copy (*ui_array_ptr2)[10]; -+user_int (*ui_fn_ptr1) (void); -+void (*ui_fn_ptr2) (user_int); -+void (*ui_fn_ptr3) (user_int, ...); -+user_int_copy (*ui_fn_ptr4) (void); -+void (*ui_fn_ptr5) (user_int_copy); -+void (*ui_fn_ptr6) (user_int_copy, ...); -+user_int (*__attribute__((__transaction_unsafe__)) unsafe_ui_fn_ptr1) (void); -+user_int_copy (*__attribute__((__transaction_unsafe__)) unsafe_ui_fn_ptr2) (void); -+ -+void f (struct s s) -+{ -+ ue1 = s; /* { dg-error {assigning to type 'user_enum' {aka 'enum __internal_enum'} from type 'struct s'} } */ -+ ue2 = s; /* { dg-error {assigning to type 'user_enum_copy' {aka 'enum __internal_enum'} from type 'struct s'} } */ -+ ue_ptr1 = &s; /* { dg-error {assignment to 'user_enum_ptr' {aka 'enum __internal_enum \*'} from incompatible pointer type 'struct s \*'} } */ -+ ue_ptr2 = &s; /* { dg-error {assignment to 'user_enum \*' {aka 'enum __internal_enum \*'} from incompatible pointer type 'struct s \*'} } */ -+ const_ue_ptr1 = &s; /* { dg-error {assignment to 'const user_enum \*' {aka 'const enum __internal_enum \*'} from incompatible pointer type 'struct s \*'} } */ -+ const_ue_ptr2 = &s; /* { dg-error {assignment to 'const user_enum_copy \*' {aka 'const enum __internal_enum \*'} from incompatible pointer type 'struct s \*'} } */ -+ volatile_ue_ptr1 = &s; /* { dg-error {assignment to 'volatile user_enum \*' {aka 'volatile enum __internal_enum \*'} from incompatible pointer type 'struct s \*'} } */ -+ volatile_ue_ptr2 = &s; /* { dg-error {assignment to 'volatile user_enum_copy \*' {aka 'volatile enum __internal_enum \*'} from incompatible pointer type 'struct s \*'} } */ -+ atomic_ue_ptr1 = &s; /* { dg-error {assignment to '_Atomic user_enum \*' {aka '_Atomic enum __internal_enum \*'} from incompatible pointer type 'struct s \*'} } */ -+ atomic_ue_ptr2 = &s; /* { dg-error {assignment to '_Atomic user_enum_copy \*' {aka '_Atomic enum __internal_enum \*'} from incompatible pointer type 'struct s \*'} } */ -+ ue_array_ptr1 = &s; /* { dg-error {assignment to 'user_enum \(\*\)\[10\]' {aka 'enum __internal_enum \(\*\)\[10\]'} from incompatible pointer type 'struct s \*'} } */ -+ ue_array_ptr2 = &s; /* { dg-error {assignment to 'user_enum_copy \(\*\)\[10\]' {aka 'enum __internal_enum \(\*\)\[10\]'} from incompatible pointer type 'struct s \*'} } */ -+ ue_fn_ptr1 = &s; /* { dg-error {assignment to 'user_enum \(\*\)\(void\)' {aka 'enum __internal_enum \(\*\)\(void\)'} from incompatible pointer type 'struct s \*'} } */ -+ ue_fn_ptr2 = &s; /* { dg-error {assignment to 'void \(\*\)\(user_enum\)' {aka 'void \(\*\)\(enum __internal_enum\)'} from incompatible pointer type 'struct s \*'} } */ -+ ue_fn_ptr3 = &s; /* { dg-error {assignment to 'void \(\*\)\(user_enum, \.\.\.\)' {aka 'void \(\*\)\(enum __internal_enum, \.\.\.\)'} from incompatible pointer type 'struct s \*'} } */ -+ ue_fn_ptr4 = &s; /* { dg-error {assignment to 'user_enum_copy \(\*\)\(void\)' {aka 'enum __internal_enum \(\*\)\(void\)'} from incompatible pointer type 'struct s \*'} } */ -+ ue_fn_ptr5 = &s; /* { dg-error {assignment to 'void \(\*\)\(user_enum_copy\)' {aka 'void \(\*\)\(enum __internal_enum\)'} from incompatible pointer type 'struct s \*'} } */ -+ ue_fn_ptr6 = &s; /* { dg-error {assignment to 'void \(\*\)\(user_enum_copy, \.\.\.\)' {aka 'void \(\*\)\(enum __internal_enum, \.\.\.\)'} from incompatible pointer type 'struct s \*'} } */ -+ unsafe_ue_fn_ptr1 = &s; /* { dg-error {assignment to 'user_enum \(__attribute__\(\(transaction_unsafe\)\) \*\)\(void\)' {aka 'enum __internal_enum \(__attribute__\(\(transaction_unsafe\)\) \*\)\(void\)'} from incompatible pointer type 'struct s \*'} } */ -+ unsafe_ue_fn_ptr2 = &s; /* { dg-error {assignment to 'user_enum_copy \(__attribute__\(\(transaction_unsafe\)\) \*\)\(void\)' {aka 'enum __internal_enum \(__attribute__\(\(transaction_unsafe\)\) \*\)\(void\)'} from incompatible pointer type 'struct s \*'} } */ -+ -+ us1 = s; /* { dg-error {assigning to type 'user_struct' {aka 'struct __internal_struct'} from type 'struct s'} } */ -+ us2 = s; /* { dg-error {assigning to type 'user_struct_copy' {aka 'struct __internal_struct'} from type 'struct s'} } */ -+ us_ptr1 = &s; /* { dg-error {assignment to 'user_struct_ptr' {aka 'struct __internal_struct \*'} from incompatible pointer type 'struct s \*'} } */ -+ us_ptr2 = &s; /* { dg-error {assignment to 'user_struct \*' {aka 'struct __internal_struct \*'} from incompatible pointer type 'struct s \*'} } */ -+ const_us_ptr1 = &s; /* { dg-error {assignment to 'const user_struct \*' {aka 'const struct __internal_struct \*'} from incompatible pointer type 'struct s \*'} } */ -+ const_us_ptr2 = &s; /* { dg-error {assignment to 'const user_struct_copy \*' {aka 'const struct __internal_struct \*'} from incompatible pointer type 'struct s \*'} } */ -+ -+ uu1 = s; /* { dg-error {assigning to type 'user_union' {aka 'union __internal_union'} from type 'struct s'} } */ -+ uu2 = s; /* { dg-error {assigning to type 'user_union_copy' {aka 'union __internal_union'} from type 'struct s'} } */ -+ uu_ptr1 = &s; /* { dg-error {assignment to 'user_union_ptr' {aka 'union __internal_union \*'} from incompatible pointer type 'struct s \*'} } */ -+ uu_ptr2 = &s; /* { dg-error {assignment to 'user_union \*' {aka 'union __internal_union \*'} from incompatible pointer type 'struct s \*'} } */ -+ const_uu_ptr1 = &s; /* { dg-error {assignment to 'const user_union \*' {aka 'const union __internal_union \*'} from incompatible pointer type 'struct s \*'} } */ -+ const_uu_ptr2 = &s; /* { dg-error {assignment to 'const user_union_copy \*' {aka 'const union __internal_union \*'} from incompatible pointer type 'struct s \*'} } */ -+ -+ uv1 = s; /* { dg-error {assigning to type 'user_vector' {aka '__vector\([48]\) unsigned int'} from type 'struct s'} } */ -+ uv2 = s; /* { dg-error {assigning to type 'user_vector_copy' {aka '__vector\([48]\) unsigned int'} from type 'struct s'} } */ -+ uv_ptr1 = &s; /* { dg-error {assignment to 'user_vector_ptr' {aka '__vector\([48]\) unsigned int \*'} from incompatible pointer type 'struct s \*'} } */ -+ uv_ptr2 = &s; /* { dg-error {assignment to 'user_vector \*' {aka '__vector\([48]\) unsigned int \*'} from incompatible pointer type 'struct s \*'} } */ -+ const_uv_ptr1 = &s; /* { dg-error {assignment to 'const user_vector \*' {aka 'const __vector\([48]\) unsigned int \*'} from incompatible pointer type 'struct s \*'} } */ -+ const_uv_ptr2 = &s; /* { dg-error {assignment to 'const user_vector_copy \*' {aka 'const __vector\([48]\) unsigned int \*'} from incompatible pointer type 'struct s \*'} } */ -+ -+ ui1 = s; /* { dg-error {assigning to type 'user_int' {aka 'int'} from type 'struct s'} } */ -+ ui2 = s; /* { dg-error {assigning to type 'user_int_copy' {aka 'int'} from type 'struct s'} } */ -+ ui_ptr1 = &s; /* { dg-error {assignment to 'user_int_ptr' {aka 'int \*'} from incompatible pointer type 'struct s \*'} } */ -+ ui_ptr2 = &s; /* { dg-error {assignment to 'user_int \*' {aka 'int \*'} from incompatible pointer type 'struct s \*'} } */ -+ const_ui_ptr1 = &s; /* { dg-error {assignment to 'const user_int \*' {aka 'const int \*'} from incompatible pointer type 'struct s \*'} } */ -+ const_ui_ptr2 = &s; /* { dg-error {assignment to 'const user_int_copy \*' {aka 'const int \*'} from incompatible pointer type 'struct s \*'} } */ -+ volatile_ui_ptr1 = &s; /* { dg-error {assignment to 'volatile user_int \*' {aka 'volatile int \*'} from incompatible pointer type 'struct s \*'} } */ -+ volatile_ui_ptr2 = &s; /* { dg-error {assignment to 'volatile user_int_copy \*' {aka 'volatile int \*'} from incompatible pointer type 'struct s \*'} } */ -+ atomic_ui_ptr1 = &s; /* { dg-error {assignment to '_Atomic user_int \*' {aka '_Atomic int \*'} from incompatible pointer type 'struct s \*'} } */ -+ atomic_ui_ptr2 = &s; /* { dg-error {assignment to '_Atomic user_int_copy \*' {aka '_Atomic int \*'} from incompatible pointer type 'struct s \*'} } */ -+ ui_array_ptr1 = &s; /* { dg-error {assignment to 'user_int \(\*\)\[10\]' {aka 'int \(\*\)\[10\]'} from incompatible pointer type 'struct s \*'} } */ -+ ui_array_ptr2 = &s; /* { dg-error {assignment to 'user_int_copy \(\*\)\[10\]' {aka 'int \(\*\)\[10\]'} from incompatible pointer type 'struct s \*'} } */ -+ ui_fn_ptr1 = &s; /* { dg-error {assignment to 'user_int \(\*\)\(void\)' {aka 'int \(\*\)\(void\)'} from incompatible pointer type 'struct s \*'} } */ -+ ui_fn_ptr2 = &s; /* { dg-error {assignment to 'void \(\*\)\(user_int\)' {aka 'void \(\*\)\(int\)'} from incompatible pointer type 'struct s \*'} } */ -+ ui_fn_ptr3 = &s; /* { dg-error {assignment to 'void \(\*\)\(user_int, \.\.\.\)' {aka 'void \(\*\)\(int, \.\.\.\)'} from incompatible pointer type 'struct s \*'} } */ -+ ui_fn_ptr4 = &s; /* { dg-error {assignment to 'user_int_copy \(\*\)\(void\)' {aka 'int \(\*\)\(void\)'} from incompatible pointer type 'struct s \*'} } */ -+ ui_fn_ptr5 = &s; /* { dg-error {assignment to 'void \(\*\)\(user_int_copy\)' {aka 'void \(\*\)\(int\)'} from incompatible pointer type 'struct s \*'} } */ -+ ui_fn_ptr6 = &s; /* { dg-error {assignment to 'void \(\*\)\(user_int_copy, \.\.\.\)' {aka 'void \(\*\)\(int, \.\.\.\)'} from incompatible pointer type 'struct s \*'} } */ -+ unsafe_ui_fn_ptr1 = &s; /* { dg-error {assignment to 'user_int \(__attribute__\(\(transaction_unsafe\)\) \*\)\(void\)' {aka 'int \(__attribute__\(\(transaction_unsafe\)\) \*\)\(void\)'} from incompatible pointer type 'struct s \*'} } */ -+ unsafe_ui_fn_ptr2 = &s; /* { dg-error {assignment to 'user_int_copy \(__attribute__\(\(transaction_unsafe\)\) \*\)\(void\)' {aka 'int \(__attribute__\(\(transaction_unsafe\)\) \*\)\(void\)'} from incompatible pointer type 'struct s \*'} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/diag_aka_1.c b/gcc/testsuite/gcc.target/aarch64/diag_aka_1.c -index 59e24f48b..98dffead6 100644 ---- a/gcc/testsuite/gcc.target/aarch64/diag_aka_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/diag_aka_1.c -@@ -8,7 +8,6 @@ void f (float x) - __Int8x8_t *ptr1 = &x; /* { dg-error {initialization of '__Int8x8_t \*' from incompatible pointer type 'float \*'} } */ - int8x8_t y2 = x; /* { dg-error {incompatible types when initializing type 'int8x8_t' using type 'float'} } */ - int8x8_t *ptr2 = &x; /* { dg-error {initialization of 'int8x8_t \*' from incompatible pointer type 'float \*'} } */ -- /* ??? For these it would be better to print an aka for 'int16x4_t'. */ -- myvec y3 = x; /* { dg-error {incompatible types when initializing type 'myvec' using type 'float'} } */ -- myvec *ptr3 = &x; /* { dg-error {initialization of 'myvec \*' from incompatible pointer type 'float \*'} } */ -+ myvec y3 = x; /* { dg-error {incompatible types when initializing type 'myvec' {aka 'int16x4_t'} using type 'float'} } */ -+ myvec *ptr3 = &x; /* { dg-error {initialization of 'myvec \*' {aka 'int16x4_t \*'} from incompatible pointer type 'float \*'} } */ - } -diff --git a/gcc/testsuite/gcc.target/aarch64/pr88838.c b/gcc/testsuite/gcc.target/aarch64/pr88838.c -new file mode 100644 -index 000000000..d7db84758 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/pr88838.c -@@ -0,0 +1,11 @@ -+/* { dg-do compile } */ -+/* { dg-options "-S -O3 -march=armv8.2-a+sve" } */ -+ -+void -+f (int *restrict x, int *restrict y, int *restrict z, int n) -+{ -+ for (int i = 0; i < n; i += 1) -+ x[i] = y[i] + z[i]; -+} -+ -+/* { dg-final { scan-assembler-not "sxtw" } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/while_1.c b/gcc/testsuite/gcc.target/aarch64/sve/while_1.c -index 2655c4242..2cfb3f697 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/while_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/while_1.c -@@ -26,14 +26,14 @@ - TEST_ALL (ADD_LOOP) - - /* { dg-final { scan-assembler-not {\tuqdec} } } */ --/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b, xzr,} 2 } } */ --/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b, x[0-9]+,} 2 } } */ --/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h, xzr,} 2 } } */ --/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h, x[0-9]+,} 2 } } */ --/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s, xzr,} 3 } } */ --/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s, x[0-9]+,} 3 } } */ --/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d, xzr,} 3 } } */ --/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d, x[0-9]+,} 3 } } */ -+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b, wzr,} 2 } } */ -+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b, w[0-9]+,} 2 } } */ -+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h, wzr,} 2 } } */ -+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h, w[0-9]+,} 2 } } */ -+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s, wzr,} 3 } } */ -+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s, w[0-9]+,} 3 } } */ -+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d, wzr,} 3 } } */ -+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d, w[0-9]+,} 3 } } */ - /* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.b, p[0-7]/z, \[x0, x[0-9]+\]\n} 2 } } */ - /* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b, p[0-7], \[x0, x[0-9]+\]\n} 2 } } */ - /* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.h, p[0-7]/z, \[x0, x[0-9]+, lsl 1\]\n} 2 } } */ -diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c -index d38b298aa..e51b95593 100644 ---- a/gcc/tree-vect-loop-manip.c -+++ b/gcc/tree-vect-loop-manip.c -@@ -423,6 +423,7 @@ vect_set_loop_masks_directly (struct loop *loop, loop_vec_info loop_vinfo, - bool might_wrap_p) - { - tree compare_type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo); -+ tree iv_type = LOOP_VINFO_MASK_IV_TYPE (loop_vinfo); - tree mask_type = rgm->mask_type; - unsigned int nscalars_per_iter = rgm->max_nscalars_per_iter; - poly_uint64 nscalars_per_mask = TYPE_VECTOR_SUBPARTS (mask_type); -@@ -453,11 +454,16 @@ vect_set_loop_masks_directly (struct loop *loop, loop_vec_info loop_vinfo, - tree index_before_incr, index_after_incr; - gimple_stmt_iterator incr_gsi; - bool insert_after; -- tree zero_index = build_int_cst (compare_type, 0); - standard_iv_increment_position (loop, &incr_gsi, &insert_after); -- create_iv (zero_index, nscalars_step, NULL_TREE, loop, &incr_gsi, -+ -+ tree zero_index = build_int_cst (iv_type, 0); -+ tree step = build_int_cst (iv_type, -+ LOOP_VINFO_VECT_FACTOR (loop_vinfo)); -+ /* Create IV of iv_type. */ -+ create_iv (zero_index, step, NULL_TREE, loop, &incr_gsi, - insert_after, &index_before_incr, &index_after_incr); - -+ zero_index = build_int_cst (compare_type, 0); - tree test_index, test_limit, first_limit; - gimple_stmt_iterator *test_gsi; - if (might_wrap_p) -@@ -537,6 +543,10 @@ vect_set_loop_masks_directly (struct loop *loop, loop_vec_info loop_vinfo, - tree next_mask = NULL_TREE; - tree mask; - unsigned int i; -+ gimple_seq test_seq = NULL; -+ test_index = gimple_convert (&test_seq, compare_type, test_index); -+ gsi_insert_seq_before (test_gsi, test_seq, GSI_SAME_STMT); -+ - FOR_EACH_VEC_ELT_REVERSE (rgm->masks, i, mask) - { - /* Previous masks will cover BIAS scalars. This mask covers the -@@ -645,12 +655,12 @@ vect_set_loop_condition_masked (struct loop *loop, loop_vec_info loop_vinfo, - - tree compare_type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo); - unsigned int compare_precision = TYPE_PRECISION (compare_type); -- unsigned HOST_WIDE_INT max_vf = vect_max_vf (loop_vinfo); - tree orig_niters = niters; - - /* Type of the initial value of NITERS. */ - tree ni_actual_type = TREE_TYPE (niters); - unsigned int ni_actual_precision = TYPE_PRECISION (ni_actual_type); -+ tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo); - - /* Convert NITERS to the same size as the compare. */ - if (compare_precision > ni_actual_precision -@@ -669,33 +679,7 @@ vect_set_loop_condition_masked (struct loop *loop, loop_vec_info loop_vinfo, - else - niters = gimple_convert (&preheader_seq, compare_type, niters); - -- /* Convert skip_niters to the right type. */ -- tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo); -- -- /* Now calculate the value that the induction variable must be able -- to hit in order to ensure that we end the loop with an all-false mask. -- This involves adding the maximum number of inactive trailing scalar -- iterations. */ -- widest_int iv_limit; -- bool known_max_iters = max_loop_iterations (loop, &iv_limit); -- if (known_max_iters) -- { -- if (niters_skip) -- { -- /* Add the maximum number of skipped iterations to the -- maximum iteration count. */ -- if (TREE_CODE (niters_skip) == INTEGER_CST) -- iv_limit += wi::to_widest (niters_skip); -- else -- iv_limit += max_vf - 1; -- } -- /* IV_LIMIT is the maximum number of latch iterations, which is also -- the maximum in-range IV value. Round this value down to the previous -- vector alignment boundary and then add an extra full iteration. */ -- poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); -- iv_limit = (iv_limit & -(int) known_alignment (vf)) + max_vf; -- } -- -+ widest_int iv_limit = vect_iv_limit_for_full_masking (loop_vinfo); - /* Get the vectorization factor in tree form. */ - tree vf = build_int_cst (compare_type, - LOOP_VINFO_VECT_FACTOR (loop_vinfo)); -@@ -725,7 +709,7 @@ vect_set_loop_condition_masked (struct loop *loop, loop_vec_info loop_vinfo, - /* See whether zero-based IV would ever generate all-false masks - before wrapping around. */ - bool might_wrap_p -- = (!known_max_iters -+ = (iv_limit == -1 - || (wi::min_precision (iv_limit * rgm->max_nscalars_per_iter, - UNSIGNED) - > compare_precision)); -diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c -index 16d7d7788..e98bf2c15 100644 ---- a/gcc/tree-vect-loop.c -+++ b/gcc/tree-vect-loop.c -@@ -1038,6 +1038,8 @@ vect_verify_full_masking (loop_vec_info loop_vinfo) - { - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); - unsigned int min_ni_width; -+ unsigned int max_nscalars_per_iter -+ = vect_get_max_nscalars_per_iter (loop_vinfo); - - /* Use a normal loop if there are no statements that need masking. - This only happens in rare degenerate cases: it means that the loop -@@ -1056,7 +1058,7 @@ vect_verify_full_masking (loop_vec_info loop_vinfo) - max_ni = wi::smin (max_ni, max_back_edges + 1); - - /* Account for rgroup masks, in which each bit is replicated N times. */ -- max_ni *= vect_get_max_nscalars_per_iter (loop_vinfo); -+ max_ni *= max_nscalars_per_iter; - - /* Work out how many bits we need to represent the limit. */ - min_ni_width = wi::min_precision (max_ni, UNSIGNED); -@@ -1064,6 +1066,14 @@ vect_verify_full_masking (loop_vec_info loop_vinfo) - /* Find a scalar mode for which WHILE_ULT is supported. */ - opt_scalar_int_mode cmp_mode_iter; - tree cmp_type = NULL_TREE; -+ tree iv_type = NULL_TREE; -+ widest_int iv_limit = vect_iv_limit_for_full_masking (loop_vinfo); -+ widest_int iv_precision = UINT_MAX; -+ -+ if (iv_limit != -1) -+ iv_precision = wi::min_precision (iv_limit * max_nscalars_per_iter, -+ UNSIGNED); -+ - FOR_EACH_MODE_IN_CLASS (cmp_mode_iter, MODE_INT) - { - unsigned int cmp_bits = GET_MODE_BITSIZE (cmp_mode_iter.require ()); -@@ -1075,10 +1085,32 @@ vect_verify_full_masking (loop_vec_info loop_vinfo) - && can_produce_all_loop_masks_p (loop_vinfo, this_type)) - { - /* Although we could stop as soon as we find a valid mode, -- it's often better to continue until we hit Pmode, since the -- operands to the WHILE are more likely to be reusable in -- address calculations. */ -- cmp_type = this_type; -+ there are at least two reasons why that's not always the -+ best choice: -+ -+ - An IV that's Pmode or wider is more likely to be reusable -+ in address calculations than an IV that's narrower than -+ Pmode. -+ -+ - Doing the comparison in IV_PRECISION or wider allows -+ a natural 0-based IV, whereas using a narrower comparison -+ type requires mitigations against wrap-around. -+ -+ Conversely, if the IV limit is variable, doing the comparison -+ in a wider type than the original type can introduce -+ unnecessary extensions, so picking the widest valid mode -+ is not always a good choice either. -+ -+ Here we prefer the first IV type that's Pmode or wider, -+ and the first comparison type that's IV_PRECISION or wider. -+ (The comparison type must be no wider than the IV type, -+ to avoid extensions in the vector loop.) -+ -+ ??? We might want to try continuing beyond Pmode for ILP32 -+ targets if CMP_BITS < IV_PRECISION. */ -+ iv_type = this_type; -+ if (!cmp_type || iv_precision > TYPE_PRECISION (cmp_type)) -+ cmp_type = this_type; - if (cmp_bits >= GET_MODE_BITSIZE (Pmode)) - break; - } -@@ -1089,6 +1121,7 @@ vect_verify_full_masking (loop_vec_info loop_vinfo) - return false; - - LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo) = cmp_type; -+ LOOP_VINFO_MASK_IV_TYPE (loop_vinfo) = iv_type; - return true; - } - -@@ -9080,3 +9113,45 @@ optimize_mask_stores (struct loop *loop) - add_phi_arg (phi, gimple_vuse (last_store), e, UNKNOWN_LOCATION); - } - } -+ -+/* Decide whether it is possible to use a zero-based induction variable -+ when vectorizing LOOP_VINFO with a fully-masked loop. If it is, -+ return the value that the induction variable must be able to hold -+ in order to ensure that the loop ends with an all-false mask. -+ Return -1 otherwise. */ -+widest_int -+vect_iv_limit_for_full_masking (loop_vec_info loop_vinfo) -+{ -+ tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo); -+ struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); -+ unsigned HOST_WIDE_INT max_vf = vect_max_vf (loop_vinfo); -+ -+ /* Calculate the value that the induction variable must be able -+ to hit in order to ensure that we end the loop with an all-false mask. -+ This involves adding the maximum number of inactive trailing scalar -+ iterations. */ -+ widest_int iv_limit = -1; -+ if (max_loop_iterations (loop, &iv_limit)) -+ { -+ if (niters_skip) -+ { -+ /* Add the maximum number of skipped iterations to the -+ maximum iteration count. */ -+ if (TREE_CODE (niters_skip) == INTEGER_CST) -+ iv_limit += wi::to_widest (niters_skip); -+ else -+ iv_limit += max_vf - 1; -+ } -+ else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)) -+ /* Make a conservatively-correct assumption. */ -+ iv_limit += max_vf - 1; -+ -+ /* IV_LIMIT is the maximum number of latch iterations, which is also -+ the maximum in-range IV value. Round this value down to the previous -+ vector alignment boundary and then add an extra full iteration. */ -+ poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); -+ iv_limit = (iv_limit & -(int) known_alignment (vf)) + max_vf; -+ } -+ return iv_limit; -+} -+ -diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h -index 34ba49f4d..fae4df52d 100644 ---- a/gcc/tree-vectorizer.h -+++ b/gcc/tree-vectorizer.h -@@ -529,6 +529,10 @@ typedef struct _loop_vec_info : public vec_info { - is false and vectorized loop otherwise. */ - tree simd_if_cond; - -+ /* Type of the IV to use in the WHILE_ULT call for fully-masked -+ loops. */ -+ tree iv_type; -+ - /* Unknown DRs according to which loop was peeled. */ - struct dr_vec_info *unaligned_dr; - -@@ -675,6 +679,7 @@ typedef struct _loop_vec_info : public vec_info { - #define LOOP_VINFO_MASKS(L) (L)->masks - #define LOOP_VINFO_MASK_SKIP_NITERS(L) (L)->mask_skip_niters - #define LOOP_VINFO_MASK_COMPARE_TYPE(L) (L)->mask_compare_type -+#define LOOP_VINFO_MASK_IV_TYPE(L) (L)->iv_type - #define LOOP_VINFO_PTR_MASK(L) (L)->ptr_mask - #define LOOP_VINFO_LOOP_NEST(L) (L)->shared->loop_nest - #define LOOP_VINFO_DATAREFS(L) (L)->shared->datarefs -@@ -1720,6 +1725,7 @@ extern tree vect_create_addr_base_for_vector_ref (stmt_vec_info, gimple_seq *, - /* In tree-vect-loop.c. */ - /* Used in tree-vect-loop-manip.c */ - extern void determine_peel_for_niter (loop_vec_info); -+extern widest_int vect_iv_limit_for_full_masking (loop_vec_info loop_vinfo); - /* Used in gimple-loop-interchange.c and tree-parloops.c. */ - extern bool check_reduction_path (dump_user_location_t, loop_p, gphi *, tree, - enum tree_code); diff --git a/aarch64-Fix-mismatched-SVE-predicate-modes.patch b/aarch64-Fix-mismatched-SVE-predicate-modes.patch deleted file mode 100644 index 8bb66d9e555ac5d08951e9112f205ac0443f55cd..0000000000000000000000000000000000000000 --- a/aarch64-Fix-mismatched-SVE-predicate-modes.patch +++ /dev/null @@ -1,34 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-aarch64-Fix-mismatched-SVE-predicate-modes.patch -26bebf576ddcdcfb596f07e8c2896f17c48516e7 - -diff -urpN a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c ---- a/gcc/config/aarch64/aarch64.c 2020-12-14 00:57:20.128000000 -0500 -+++ b/gcc/config/aarch64/aarch64.c 2020-12-14 01:00:15.080000000 -0500 -@@ -4328,6 +4328,7 @@ aarch64_expand_sve_const_pred_eor (rtx t - /* EOR the result with an ELT_SIZE PTRUE. */ - rtx mask = aarch64_ptrue_all (elt_size); - mask = force_reg (VNx16BImode, mask); -+ inv = gen_lowpart (VNx16BImode, inv); - target = aarch64_target_reg (target, VNx16BImode); - emit_insn (gen_aarch64_pred_z (XOR, VNx16BImode, target, mask, inv, mask)); - return target; -diff -urpN a/gcc/testsuite/gcc.dg/vect/pr94606.c b/gcc/testsuite/gcc.dg/vect/pr94606.c ---- a/gcc/testsuite/gcc.dg/vect/pr94606.c 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/gcc.dg/vect/pr94606.c 2020-12-14 01:00:15.080000000 -0500 -@@ -0,0 +1,13 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-march=armv8.2-a+sve -msve-vector-bits=256" { target aarch64*-*-* } } */ -+ -+const short mask[] = { 0, 0, 0, 0, 0, 0, 0, 0, -+ 0, 0, 0, 1, 1, 1, 1, 1 }; -+ -+int -+foo (short *restrict x, short *restrict y) -+{ -+ for (int i = 0; i < 16; ++i) -+ if (mask[i]) -+ x[i] += y[i]; -+} diff --git a/aarch64-fix-sve-acle-error.patch b/aarch64-fix-sve-acle-error.patch deleted file mode 100644 index 237093a5ac821f338fa5c3267b0912d8e771b460..0000000000000000000000000000000000000000 --- a/aarch64-fix-sve-acle-error.patch +++ /dev/null @@ -1,2128 +0,0 @@ -This backport contains 4 patchs from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-AArch64-Fix-build-for-non-default-languages.patch -6ff0cdebb1bc281ba2374f3ecdbe358c4fa74093 - -0002-C-Opt-out-of-GNU-vector-extensions-for-built-in-SVE-.patch -f486280c53be53136f0bb9b578f43dc6c9c5acea - -0003-C-Add-a-target-hook-that-allows-targets-to-verify-ty.patch -65ef05d0b7fb429c5760189e638c441dc3da33f4 - -0004-AArch64-Run-general-SVE-ACLE-tests-for-C.patch -6da4c454acee4dac53c4c549fa1caeb73fe1f82b - -diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c -index 2c10743b9..50423ec0f 100644 ---- a/gcc/c-family/c-common.c -+++ b/gcc/c-family/c-common.c -@@ -1012,7 +1012,8 @@ c_build_vec_perm_expr (location_t loc, tree v0, tree v1, tree mask, - || mask == error_mark_node) - return error_mark_node; - -- if (!VECTOR_INTEGER_TYPE_P (TREE_TYPE (mask))) -+ if (!gnu_vector_type_p (TREE_TYPE (mask)) -+ || !VECTOR_INTEGER_TYPE_P (TREE_TYPE (mask))) - { - if (complain) - error_at (loc, "%<__builtin_shuffle%> last argument must " -@@ -1020,8 +1021,8 @@ c_build_vec_perm_expr (location_t loc, tree v0, tree v1, tree mask, - return error_mark_node; - } - -- if (!VECTOR_TYPE_P (TREE_TYPE (v0)) -- || !VECTOR_TYPE_P (TREE_TYPE (v1))) -+ if (!gnu_vector_type_p (TREE_TYPE (v0)) -+ || !gnu_vector_type_p (TREE_TYPE (v1))) - { - if (complain) - error_at (loc, "%<__builtin_shuffle%> arguments must be vectors"); -@@ -1096,8 +1097,9 @@ c_build_vec_convert (location_t loc1, tree expr, location_t loc2, tree type, - if (error_operand_p (expr)) - return error_mark_node; - -- if (!VECTOR_INTEGER_TYPE_P (TREE_TYPE (expr)) -- && !VECTOR_FLOAT_TYPE_P (TREE_TYPE (expr))) -+ if (!gnu_vector_type_p (TREE_TYPE (expr)) -+ || (!VECTOR_INTEGER_TYPE_P (TREE_TYPE (expr)) -+ && !VECTOR_FLOAT_TYPE_P (TREE_TYPE (expr)))) - { - if (complain) - error_at (loc1, "%<__builtin_convertvector%> first argument must " -@@ -1105,7 +1107,8 @@ c_build_vec_convert (location_t loc1, tree expr, location_t loc2, tree type, - return error_mark_node; - } - -- if (!VECTOR_INTEGER_TYPE_P (type) && !VECTOR_FLOAT_TYPE_P (type)) -+ if (!gnu_vector_type_p (type) -+ || (!VECTOR_INTEGER_TYPE_P (type) && !VECTOR_FLOAT_TYPE_P (type))) - { - if (complain) - error_at (loc2, "%<__builtin_convertvector%> second argument must " -@@ -3128,6 +3131,9 @@ pointer_int_sum (location_t loc, enum tree_code resultcode, - return error_mark_node; - size_exp = integer_one_node; - } -+ else if (!verify_type_context (loc, TCTX_POINTER_ARITH, -+ TREE_TYPE (result_type))) -+ size_exp = integer_one_node; - else - size_exp = size_in_bytes_loc (loc, TREE_TYPE (result_type)); - -@@ -3673,6 +3679,13 @@ c_sizeof_or_alignof_type (location_t loc, - "incomplete element type", op_name, type); - return error_mark_node; - } -+ else if (!verify_type_context (loc, is_sizeof ? TCTX_SIZEOF : TCTX_ALIGNOF, -+ type, !complain)) -+ { -+ if (!complain) -+ return error_mark_node; -+ value = size_one_node; -+ } - else - { - if (is_sizeof) -@@ -3705,7 +3718,10 @@ c_alignof_expr (location_t loc, tree expr) - { - tree t; - -- if (VAR_OR_FUNCTION_DECL_P (expr)) -+ if (!verify_type_context (loc, TCTX_ALIGNOF, TREE_TYPE (expr))) -+ t = size_one_node; -+ -+ else if (VAR_OR_FUNCTION_DECL_P (expr)) - t = size_int (DECL_ALIGN_UNIT (expr)); - - else if (TREE_CODE (expr) == COMPONENT_REF -@@ -7994,7 +8010,7 @@ convert_vector_to_array_for_subscript (location_t loc, - tree *vecp, tree index) - { - bool ret = false; -- if (VECTOR_TYPE_P (TREE_TYPE (*vecp))) -+ if (gnu_vector_type_p (TREE_TYPE (*vecp))) - { - tree type = TREE_TYPE (*vecp); - -@@ -8030,7 +8046,7 @@ scalar_to_vector (location_t loc, enum tree_code code, tree op0, tree op1, - bool integer_only_op = false; - enum stv_conv ret = stv_firstarg; - -- gcc_assert (VECTOR_TYPE_P (type0) || VECTOR_TYPE_P (type1)); -+ gcc_assert (gnu_vector_type_p (type0) || gnu_vector_type_p (type1)); - switch (code) - { - /* Most GENERIC binary expressions require homogeneous arguments. -@@ -8081,7 +8097,7 @@ scalar_to_vector (location_t loc, enum tree_code code, tree op0, tree op1, - case LT_EXPR: - case GT_EXPR: - /* What about UNLT_EXPR? */ -- if (VECTOR_TYPE_P (type0)) -+ if (gnu_vector_type_p (type0)) - { - ret = stv_secondarg; - std::swap (type0, type1); -diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h -index 73ce7c5df..2a9008af4 100644 ---- a/gcc/c-family/c-common.h -+++ b/gcc/c-family/c-common.h -@@ -756,6 +756,16 @@ extern bool done_lexing; - #define C_TYPE_OBJECT_OR_INCOMPLETE_P(type) \ - (!C_TYPE_FUNCTION_P (type)) - -+/* Return true if TYPE is a vector type that should be subject to the GNU -+ vector extensions (as opposed to a vector type that is used only for -+ the purposes of defining target-specific built-in functions). */ -+ -+inline bool -+gnu_vector_type_p (const_tree type) -+{ -+ return TREE_CODE (type) == VECTOR_TYPE && !TYPE_INDIVISIBLE_P (type); -+} -+ - struct visibility_flags - { - unsigned inpragma : 1; /* True when in #pragma GCC visibility. */ -diff --git a/gcc/c/c-convert.c b/gcc/c/c-convert.c -index f0f846013..21b127d0d 100644 ---- a/gcc/c/c-convert.c -+++ b/gcc/c/c-convert.c -@@ -147,8 +147,20 @@ convert (tree type, tree expr) - goto maybe_fold; - - case VECTOR_TYPE: -- ret = convert_to_vector (type, e); -- goto maybe_fold; -+ if (gnu_vector_type_p (type) -+ || gnu_vector_type_p (TREE_TYPE (e)) -+ /* Allow conversions between compatible non-GNU vector types -+ when -flax-vector-conversions is passed. The whole purpose -+ of the option is to bend the normal type rules and accept -+ nonconforming code. */ -+ || (flag_lax_vector_conversions -+ && VECTOR_TYPE_P (TREE_TYPE (e)) -+ && vector_types_convertible_p (type, TREE_TYPE (e), false))) -+ { -+ ret = convert_to_vector (type, e); -+ goto maybe_fold; -+ } -+ break; - - case RECORD_TYPE: - case UNION_TYPE: -diff --git a/gcc/c/c-decl.c b/gcc/c/c-decl.c -index 288dbe9d9..bf88d3c7d 100644 ---- a/gcc/c/c-decl.c -+++ b/gcc/c/c-decl.c -@@ -4927,7 +4927,7 @@ start_decl (struct c_declarator *declarator, struct c_declspecs *declspecs, - { - /* A complete type is ok if size is fixed. */ - -- if (TREE_CODE (TYPE_SIZE (TREE_TYPE (decl))) != INTEGER_CST -+ if (!poly_int_tree_p (TYPE_SIZE (TREE_TYPE (decl))) - || C_DECL_VARIABLE_SIZE (decl)) - { - error ("variable-sized object may not be initialized"); -@@ -5210,6 +5210,15 @@ finish_decl (tree decl, location_t init_loc, tree init, - - complete_flexible_array_elts (DECL_INITIAL (decl)); - -+ if (is_global_var (decl)) -+ { -+ type_context_kind context = (DECL_THREAD_LOCAL_P (decl) -+ ? TCTX_THREAD_STORAGE -+ : TCTX_STATIC_STORAGE); -+ if (!verify_type_context (input_location, context, TREE_TYPE (decl))) -+ TREE_TYPE (decl) = error_mark_node; -+ } -+ - if (DECL_SIZE (decl) == NULL_TREE && TREE_TYPE (decl) != error_mark_node - && COMPLETE_TYPE_P (TREE_TYPE (decl))) - layout_decl (decl, 0); -@@ -5239,7 +5248,9 @@ finish_decl (tree decl, location_t init_loc, tree init, - && TREE_STATIC (decl)) - incomplete_record_decls.safe_push (decl); - -- if (is_global_var (decl) && DECL_SIZE (decl) != NULL_TREE) -+ if (is_global_var (decl) -+ && DECL_SIZE (decl) != NULL_TREE -+ && TREE_TYPE (decl) != error_mark_node) - { - if (TREE_CODE (DECL_SIZE (decl)) == INTEGER_CST) - constant_expression_warning (DECL_SIZE (decl)); -@@ -5559,6 +5570,10 @@ build_compound_literal (location_t loc, tree type, tree init, bool non_const, - return error_mark_node; - } - -+ if (TREE_STATIC (decl) -+ && !verify_type_context (loc, TCTX_STATIC_STORAGE, type)) -+ return error_mark_node; -+ - stmt = build_stmt (DECL_SOURCE_LOCATION (decl), DECL_EXPR, decl); - complit = build1 (COMPOUND_LITERAL_EXPR, type, stmt); - TREE_SIDE_EFFECTS (complit) = 1; -@@ -6227,6 +6242,12 @@ grokdeclarator (const struct c_declarator *declarator, - if (type == error_mark_node) - continue; - -+ if (!verify_type_context (loc, TCTX_ARRAY_ELEMENT, type)) -+ { -+ type = error_mark_node; -+ continue; -+ } -+ - /* If size was specified, set ITYPE to a range-type for - that size. Otherwise, ITYPE remains null. finish_decl - may figure it out from an initial value. */ -@@ -7076,6 +7097,10 @@ grokdeclarator (const struct c_declarator *declarator, - if (orig_qual_indirect == 0) - orig_qual_type = NULL_TREE; - } -+ if (type != error_mark_node -+ && !verify_type_context (loc, TCTX_FIELD, type)) -+ type = error_mark_node; -+ - type = c_build_qualified_type (type, type_quals, orig_qual_type, - orig_qual_indirect); - decl = build_decl (declarator->id_loc, -diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c -index 87f4178ec..f456a66fb 100644 ---- a/gcc/c/c-typeck.c -+++ b/gcc/c/c-typeck.c -@@ -2609,7 +2609,7 @@ build_array_ref (location_t loc, tree array, tree index) - if (TREE_CODE (TREE_TYPE (array)) != ARRAY_TYPE - && TREE_CODE (TREE_TYPE (array)) != POINTER_TYPE - /* Allow vector[index] but not index[vector]. */ -- && !VECTOR_TYPE_P (TREE_TYPE (array))) -+ && !gnu_vector_type_p (TREE_TYPE (array))) - { - if (TREE_CODE (TREE_TYPE (index)) != ARRAY_TYPE - && TREE_CODE (TREE_TYPE (index)) != POINTER_TYPE) -@@ -3891,6 +3891,7 @@ pointer_diff (location_t loc, tree op0, tree op1, tree *instrument_expr) - addr_space_t as0 = TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (op0))); - addr_space_t as1 = TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (op1))); - tree target_type = TREE_TYPE (TREE_TYPE (op0)); -+ tree orig_op0 = op0; - tree orig_op1 = op1; - - /* If the operands point into different address spaces, we need to -@@ -3961,6 +3962,10 @@ pointer_diff (location_t loc, tree op0, tree op1, tree *instrument_expr) - /* This generates an error if op1 is pointer to incomplete type. */ - if (!COMPLETE_OR_VOID_TYPE_P (TREE_TYPE (TREE_TYPE (orig_op1)))) - error_at (loc, "arithmetic on pointer to an incomplete type"); -+ else if (verify_type_context (loc, TCTX_POINTER_ARITH, -+ TREE_TYPE (TREE_TYPE (orig_op0)))) -+ verify_type_context (loc, TCTX_POINTER_ARITH, -+ TREE_TYPE (TREE_TYPE (orig_op1))); - - op1 = c_size_in_bytes (target_type); - -@@ -4359,7 +4364,7 @@ build_unary_op (location_t location, enum tree_code code, tree xarg, - associativity, but won't generate any code. */ - if (!(typecode == INTEGER_TYPE || typecode == REAL_TYPE - || typecode == FIXED_POINT_TYPE || typecode == COMPLEX_TYPE -- || typecode == VECTOR_TYPE)) -+ || gnu_vector_type_p (TREE_TYPE (arg)))) - { - error_at (location, "wrong type argument to unary plus"); - return error_mark_node; -@@ -4372,7 +4377,7 @@ build_unary_op (location_t location, enum tree_code code, tree xarg, - case NEGATE_EXPR: - if (!(typecode == INTEGER_TYPE || typecode == REAL_TYPE - || typecode == FIXED_POINT_TYPE || typecode == COMPLEX_TYPE -- || typecode == VECTOR_TYPE)) -+ || gnu_vector_type_p (TREE_TYPE (arg)))) - { - error_at (location, "wrong type argument to unary minus"); - return error_mark_node; -@@ -4384,7 +4389,7 @@ build_unary_op (location_t location, enum tree_code code, tree xarg, - case BIT_NOT_EXPR: - /* ~ works on integer types and non float vectors. */ - if (typecode == INTEGER_TYPE -- || (typecode == VECTOR_TYPE -+ || (gnu_vector_type_p (TREE_TYPE (arg)) - && !VECTOR_FLOAT_TYPE_P (TREE_TYPE (arg)))) - { - tree e = arg; -@@ -4570,7 +4575,8 @@ build_unary_op (location_t location, enum tree_code code, tree xarg, - - if (typecode != POINTER_TYPE && typecode != FIXED_POINT_TYPE - && typecode != INTEGER_TYPE && typecode != REAL_TYPE -- && typecode != COMPLEX_TYPE && typecode != VECTOR_TYPE) -+ && typecode != COMPLEX_TYPE -+ && !gnu_vector_type_p (TREE_TYPE (arg))) - { - if (code == PREINCREMENT_EXPR || code == POSTINCREMENT_EXPR) - error_at (location, "wrong type argument to increment"); -@@ -4612,6 +4618,9 @@ build_unary_op (location_t location, enum tree_code code, tree xarg, - pedwarn (location, OPT_Wpointer_arith, - "wrong type argument to decrement"); - } -+ else -+ verify_type_context (location, TCTX_POINTER_ARITH, -+ TREE_TYPE (argtype)); - - inc = c_size_in_bytes (TREE_TYPE (argtype)); - inc = convert_to_ptrofftype_loc (location, inc); -@@ -7854,7 +7863,7 @@ digest_init (location_t init_loc, tree type, tree init, tree origtype, - TYPE_MAIN_VARIANT (type)) - || (code == ARRAY_TYPE - && comptypes (TREE_TYPE (inside_init), type)) -- || (code == VECTOR_TYPE -+ || (gnu_vector_type_p (type) - && comptypes (TREE_TYPE (inside_init), type)) - || (code == POINTER_TYPE - && TREE_CODE (TREE_TYPE (inside_init)) == ARRAY_TYPE -@@ -8352,7 +8361,7 @@ really_start_incremental_init (tree type) - - constructor_unfilled_index = constructor_index; - } -- else if (VECTOR_TYPE_P (constructor_type)) -+ else if (gnu_vector_type_p (constructor_type)) - { - /* Vectors are like simple fixed-size arrays. */ - constructor_max_index = -@@ -8526,7 +8535,7 @@ push_init_level (location_t loc, int implicit, - constructor_unfilled_fields = constructor_fields; - constructor_bit_index = bitsize_zero_node; - } -- else if (VECTOR_TYPE_P (constructor_type)) -+ else if (gnu_vector_type_p (constructor_type)) - { - /* Vectors are like simple fixed-size arrays. */ - constructor_max_index = -@@ -8715,7 +8724,7 @@ pop_init_level (location_t loc, int implicit, - ; - else if (!RECORD_OR_UNION_TYPE_P (constructor_type) - && TREE_CODE (constructor_type) != ARRAY_TYPE -- && !VECTOR_TYPE_P (constructor_type)) -+ && !gnu_vector_type_p (constructor_type)) - { - /* A nonincremental scalar initializer--just return - the element, after verifying there is just one. */ -@@ -9941,7 +9950,7 @@ process_init_element (location_t loc, struct c_expr value, bool implicit, - last_init_list_comma), - true, braced_init_obstack); - else if ((TREE_CODE (constructor_type) == ARRAY_TYPE -- || VECTOR_TYPE_P (constructor_type)) -+ || gnu_vector_type_p (constructor_type)) - && constructor_max_index - && tree_int_cst_lt (constructor_max_index, - constructor_index)) -@@ -10042,7 +10051,8 @@ process_init_element (location_t loc, struct c_expr value, bool implicit, - && value.value != error_mark_node - && TYPE_MAIN_VARIANT (TREE_TYPE (value.value)) != fieldtype - && (fieldcode == RECORD_TYPE || fieldcode == ARRAY_TYPE -- || fieldcode == UNION_TYPE || fieldcode == VECTOR_TYPE)) -+ || fieldcode == UNION_TYPE -+ || gnu_vector_type_p (fieldtype))) - { - push_init_level (loc, 1, braced_init_obstack); - continue; -@@ -10133,7 +10143,8 @@ process_init_element (location_t loc, struct c_expr value, bool implicit, - && value.value != error_mark_node - && TYPE_MAIN_VARIANT (TREE_TYPE (value.value)) != fieldtype - && (fieldcode == RECORD_TYPE || fieldcode == ARRAY_TYPE -- || fieldcode == UNION_TYPE || fieldcode == VECTOR_TYPE)) -+ || fieldcode == UNION_TYPE -+ || gnu_vector_type_p (fieldtype))) - { - push_init_level (loc, 1, braced_init_obstack); - continue; -@@ -10175,7 +10186,8 @@ process_init_element (location_t loc, struct c_expr value, bool implicit, - && value.value != error_mark_node - && TYPE_MAIN_VARIANT (TREE_TYPE (value.value)) != elttype - && (eltcode == RECORD_TYPE || eltcode == ARRAY_TYPE -- || eltcode == UNION_TYPE || eltcode == VECTOR_TYPE)) -+ || eltcode == UNION_TYPE -+ || gnu_vector_type_p (elttype))) - { - push_init_level (loc, 1, braced_init_obstack); - continue; -@@ -10211,7 +10223,7 @@ process_init_element (location_t loc, struct c_expr value, bool implicit, - constructor_unfilled_index. */ - constructor_unfilled_index = constructor_index; - } -- else if (VECTOR_TYPE_P (constructor_type)) -+ else if (gnu_vector_type_p (constructor_type)) - { - tree elttype = TYPE_MAIN_VARIANT (TREE_TYPE (constructor_type)); - -@@ -11555,7 +11567,8 @@ build_binary_op (location_t location, enum tree_code code, - - /* In case when one of the operands of the binary operation is - a vector and another is a scalar -- convert scalar to vector. */ -- if ((code0 == VECTOR_TYPE) != (code1 == VECTOR_TYPE)) -+ if ((gnu_vector_type_p (type0) && code1 != VECTOR_TYPE) -+ || (gnu_vector_type_p (type1) && code0 != VECTOR_TYPE)) - { - enum stv_conv convert_flag = scalar_to_vector (location, code, op0, op1, - true); -@@ -11650,10 +11663,12 @@ build_binary_op (location_t location, enum tree_code code, - - if ((code0 == INTEGER_TYPE || code0 == REAL_TYPE - || code0 == FIXED_POINT_TYPE -- || code0 == COMPLEX_TYPE || code0 == VECTOR_TYPE) -+ || code0 == COMPLEX_TYPE -+ || gnu_vector_type_p (type0)) - && (code1 == INTEGER_TYPE || code1 == REAL_TYPE - || code1 == FIXED_POINT_TYPE -- || code1 == COMPLEX_TYPE || code1 == VECTOR_TYPE)) -+ || code1 == COMPLEX_TYPE -+ || gnu_vector_type_p (type1))) - { - enum tree_code tcode0 = code0, tcode1 = code1; - -@@ -11684,8 +11699,8 @@ build_binary_op (location_t location, enum tree_code code, - if (code0 == INTEGER_TYPE && code1 == INTEGER_TYPE) - shorten = -1; - /* Allow vector types which are not floating point types. */ -- else if (code0 == VECTOR_TYPE -- && code1 == VECTOR_TYPE -+ else if (gnu_vector_type_p (type0) -+ && gnu_vector_type_p (type1) - && !VECTOR_FLOAT_TYPE_P (type0) - && !VECTOR_FLOAT_TYPE_P (type1)) - common = 1; -@@ -11696,7 +11711,8 @@ build_binary_op (location_t location, enum tree_code code, - doing_div_or_mod = true; - warn_for_div_by_zero (location, op1); - -- if (code0 == VECTOR_TYPE && code1 == VECTOR_TYPE -+ if (gnu_vector_type_p (type0) -+ && gnu_vector_type_p (type1) - && TREE_CODE (TREE_TYPE (type0)) == INTEGER_TYPE - && TREE_CODE (TREE_TYPE (type1)) == INTEGER_TYPE) - common = 1; -@@ -11775,7 +11791,8 @@ build_binary_op (location_t location, enum tree_code code, - Also set SHORT_SHIFT if shifting rightward. */ - - case RSHIFT_EXPR: -- if (code0 == VECTOR_TYPE && code1 == VECTOR_TYPE -+ if (gnu_vector_type_p (type0) -+ && gnu_vector_type_p (type1) - && TREE_CODE (TREE_TYPE (type0)) == INTEGER_TYPE - && TREE_CODE (TREE_TYPE (type1)) == INTEGER_TYPE - && known_eq (TYPE_VECTOR_SUBPARTS (type0), -@@ -11785,7 +11802,7 @@ build_binary_op (location_t location, enum tree_code code, - converted = 1; - } - else if ((code0 == INTEGER_TYPE || code0 == FIXED_POINT_TYPE -- || (code0 == VECTOR_TYPE -+ || (gnu_vector_type_p (type0) - && TREE_CODE (TREE_TYPE (type0)) == INTEGER_TYPE)) - && code1 == INTEGER_TYPE) - { -@@ -11834,7 +11851,8 @@ build_binary_op (location_t location, enum tree_code code, - break; - - case LSHIFT_EXPR: -- if (code0 == VECTOR_TYPE && code1 == VECTOR_TYPE -+ if (gnu_vector_type_p (type0) -+ && gnu_vector_type_p (type1) - && TREE_CODE (TREE_TYPE (type0)) == INTEGER_TYPE - && TREE_CODE (TREE_TYPE (type1)) == INTEGER_TYPE - && known_eq (TYPE_VECTOR_SUBPARTS (type0), -@@ -11844,7 +11862,7 @@ build_binary_op (location_t location, enum tree_code code, - converted = 1; - } - else if ((code0 == INTEGER_TYPE || code0 == FIXED_POINT_TYPE -- || (code0 == VECTOR_TYPE -+ || (gnu_vector_type_p (type0) - && TREE_CODE (TREE_TYPE (type0)) == INTEGER_TYPE)) - && code1 == INTEGER_TYPE) - { -@@ -11903,7 +11921,7 @@ build_binary_op (location_t location, enum tree_code code, - - case EQ_EXPR: - case NE_EXPR: -- if (code0 == VECTOR_TYPE && code1 == VECTOR_TYPE) -+ if (gnu_vector_type_p (type0) && gnu_vector_type_p (type1)) - { - tree intt; - if (!vector_types_compatible_elements_p (type0, type1)) -@@ -12071,7 +12089,7 @@ build_binary_op (location_t location, enum tree_code code, - case GE_EXPR: - case LT_EXPR: - case GT_EXPR: -- if (code0 == VECTOR_TYPE && code1 == VECTOR_TYPE) -+ if (gnu_vector_type_p (type0) && gnu_vector_type_p (type1)) - { - tree intt; - if (!vector_types_compatible_elements_p (type0, type1)) -@@ -12218,7 +12236,8 @@ build_binary_op (location_t location, enum tree_code code, - if (code0 == ERROR_MARK || code1 == ERROR_MARK) - return error_mark_node; - -- if (code0 == VECTOR_TYPE && code1 == VECTOR_TYPE -+ if (gnu_vector_type_p (type0) -+ && gnu_vector_type_p (type1) - && (!tree_int_cst_equal (TYPE_SIZE (type0), TYPE_SIZE (type1)) - || !vector_types_compatible_elements_p (type0, type1))) - { -@@ -12233,10 +12252,12 @@ build_binary_op (location_t location, enum tree_code code, - } - - if ((code0 == INTEGER_TYPE || code0 == REAL_TYPE || code0 == COMPLEX_TYPE -- || code0 == FIXED_POINT_TYPE || code0 == VECTOR_TYPE) -+ || code0 == FIXED_POINT_TYPE -+ || gnu_vector_type_p (type0)) - && - (code1 == INTEGER_TYPE || code1 == REAL_TYPE || code1 == COMPLEX_TYPE -- || code1 == FIXED_POINT_TYPE || code1 == VECTOR_TYPE)) -+ || code1 == FIXED_POINT_TYPE -+ || gnu_vector_type_p (type1))) - { - bool first_complex = (code0 == COMPLEX_TYPE); - bool second_complex = (code1 == COMPLEX_TYPE); -diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h -index a9123c858..51356da37 100644 ---- a/gcc/config/aarch64/aarch64-protos.h -+++ b/gcc/config/aarch64/aarch64-protos.h -@@ -717,6 +717,9 @@ namespace aarch64_sve { - tree, unsigned int, tree *); - gimple *gimple_fold_builtin (unsigned int, gimple_stmt_iterator *, gcall *); - rtx expand_builtin (unsigned int, tree, rtx); -+#ifdef GCC_TARGET_H -+ bool verify_type_context (location_t, type_context_kind, const_tree, bool); -+#endif - } - - extern void aarch64_split_combinev16qi (rtx operands[3]); -diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc -index f830d9294..10595a5ab 100644 ---- a/gcc/config/aarch64/aarch64-sve-builtins.cc -+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc -@@ -3248,8 +3248,10 @@ register_builtin_types () - BITS_PER_SVE_VECTOR)); - } - vectype = build_distinct_type_copy (vectype); -+ gcc_assert (vectype == TYPE_MAIN_VARIANT (vectype)); - SET_TYPE_STRUCTURAL_EQUALITY (vectype); - TYPE_ARTIFICIAL (vectype) = 1; -+ TYPE_INDIVISIBLE_P (vectype) = 1; - abi_vector_types[i] = vectype; - lang_hooks.types.register_builtin_type (vectype, - vector_types[i].abi_name); -@@ -3490,8 +3492,7 @@ bool - svbool_type_p (const_tree type) - { - tree abi_type = abi_vector_types[VECTOR_TYPE_svbool_t]; -- return (type != error_mark_node -- && TYPE_MAIN_VARIANT (type) == TYPE_MAIN_VARIANT (abi_type)); -+ return type != error_mark_node && TYPE_MAIN_VARIANT (type) == abi_type; - } - - /* If TYPE is a built-in type defined by the SVE ABI, return the mangled name, -@@ -3546,6 +3547,55 @@ builtin_type_p (const_tree type) - return svbool_type_p (type) || nvectors_if_data_type (type) > 0; - } - -+/* Implement TARGET_VERIFY_TYPE_CONTEXT for SVE types. */ -+bool -+verify_type_context (location_t loc, type_context_kind context, -+ const_tree type, bool silent_p) -+{ -+ if (!builtin_type_p (type)) -+ return true; -+ -+ switch (context) -+ { -+ case TCTX_SIZEOF: -+ case TCTX_STATIC_STORAGE: -+ if (!silent_p) -+ error_at (loc, "SVE type %qT does not have a fixed size", type); -+ return false; -+ -+ case TCTX_ALIGNOF: -+ if (!silent_p) -+ error_at (loc, "SVE type %qT does not have a defined alignment", type); -+ return false; -+ -+ case TCTX_THREAD_STORAGE: -+ if (!silent_p) -+ error_at (loc, "variables of type %qT cannot have thread-local" -+ " storage duration", type); -+ return false; -+ -+ case TCTX_POINTER_ARITH: -+ if (!silent_p) -+ error_at (loc, "arithmetic on pointer to SVE type %qT", type); -+ return false; -+ -+ case TCTX_FIELD: -+ if (silent_p) -+ ; -+ else if (lang_GNU_CXX ()) -+ error_at (loc, "member variables cannot have SVE type %qT", type); -+ else -+ error_at (loc, "fields cannot have SVE type %qT", type); -+ return false; -+ -+ case TCTX_ARRAY_ELEMENT: -+ if (!silent_p) -+ error_at (loc, "array elements cannot have SVE type %qT", type); -+ return false; -+ } -+ gcc_unreachable (); -+} -+ - } - - using namespace aarch64_sve; -diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c -index 3486cca89..c2ab7af56 100644 ---- a/gcc/config/aarch64/aarch64.c -+++ b/gcc/config/aarch64/aarch64.c -@@ -16201,6 +16201,15 @@ aarch64_mangle_type (const_tree type) - return NULL; - } - -+/* Implement TARGET_VERIFY_TYPE_CONTEXT. */ -+ -+static bool -+aarch64_verify_type_context (location_t loc, type_context_kind context, -+ const_tree type, bool silent_p) -+{ -+ return aarch64_sve::verify_type_context (loc, context, type, silent_p); -+} -+ - /* Find the first rtx_insn before insn that will generate an assembly - instruction. */ - -@@ -21967,6 +21976,9 @@ aarch64_libgcc_floating_mode_supported_p - #undef TARGET_MANGLE_TYPE - #define TARGET_MANGLE_TYPE aarch64_mangle_type - -+#undef TARGET_VERIFY_TYPE_CONTEXT -+#define TARGET_VERIFY_TYPE_CONTEXT aarch64_verify_type_context -+ - #undef TARGET_INVALID_CONVERSION - #define TARGET_INVALID_CONVERSION aarch64_invalid_conversion - -diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi -index 3f22bb1f6..220bbe7dd 100644 ---- a/gcc/doc/tm.texi -+++ b/gcc/doc/tm.texi -@@ -11963,6 +11963,19 @@ conversion rules. - This is currently used only by the C and C++ front ends. - @end deftypefn - -+@deftypefn {Target Hook} bool TARGET_VERIFY_TYPE_CONTEXT (location_t @var{loc}, type_context_kind @var{context}, const_tree @var{type}, bool @var{silent_p}) -+If defined, this hook returns false if there is a target-specific reason -+why type @var{type} cannot be used in the source language context described -+by @var{context}. When @var{silent_p} is false, the hook also reports an -+error against @var{loc} for invalid uses of @var{type}. -+ -+Calls to this hook should be made through the global function -+@code{verify_type_context}, which makes the @var{silent_p} parameter -+default to false and also handles @code{error_mark_node}. -+ -+The default implementation always returns true. -+@end deftypefn -+ - @defmac OBJC_JBLEN - This macro determines the size of the objective C jump buffer for the - NeXT runtime. By default, OBJC_JBLEN is defined to an innocuous value. -diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in -index 89cfb5253..a8cb42a6b 100644 ---- a/gcc/doc/tm.texi.in -+++ b/gcc/doc/tm.texi.in -@@ -8095,6 +8095,8 @@ and scanf formatter settings. - - @hook TARGET_CONVERT_TO_TYPE - -+@hook TARGET_VERIFY_TYPE_CONTEXT -+ - @defmac OBJC_JBLEN - This macro determines the size of the objective C jump buffer for the - NeXT runtime. By default, OBJC_JBLEN is defined to an innocuous value. -diff --git a/gcc/target.def b/gcc/target.def -index 05389cdd1..4e3dc341c 100644 ---- a/gcc/target.def -+++ b/gcc/target.def -@@ -5234,6 +5234,22 @@ This is currently used only by the C and C++ front ends.", - tree, (tree type, tree expr), - hook_tree_tree_tree_null) - -+DEFHOOK -+(verify_type_context, -+ "If defined, this hook returns false if there is a target-specific reason\n\ -+why type @var{type} cannot be used in the source language context described\n\ -+by @var{context}. When @var{silent_p} is false, the hook also reports an\n\ -+error against @var{loc} for invalid uses of @var{type}.\n\ -+\n\ -+Calls to this hook should be made through the global function\n\ -+@code{verify_type_context}, which makes the @var{silent_p} parameter\n\ -+default to false and also handles @code{error_mark_node}.\n\ -+\n\ -+The default implementation always returns true.", -+ bool, (location_t loc, type_context_kind context, const_tree type, -+ bool silent_p), -+ NULL) -+ - DEFHOOK - (can_change_mode_class, - "This hook returns true if it is possible to bitcast values held in\n\ -diff --git a/gcc/target.h b/gcc/target.h -index 964629669..3e6d34d34 100644 ---- a/gcc/target.h -+++ b/gcc/target.h -@@ -219,6 +219,35 @@ typedef auto_vec auto_vector_modes; - will choose the first mode that works. */ - const unsigned int VECT_COMPARE_COSTS = 1U << 0; - -+/* The contexts in which the use of a type T can be checked by -+ TARGET_VERIFY_TYPE_CONTEXT. */ -+enum type_context_kind { -+ /* Directly measuring the size of T. */ -+ TCTX_SIZEOF, -+ -+ /* Directly measuring the alignment of T. */ -+ TCTX_ALIGNOF, -+ -+ /* Creating objects of type T with static storage duration. */ -+ TCTX_STATIC_STORAGE, -+ -+ /* Creating objects of type T with thread-local storage duration. */ -+ TCTX_THREAD_STORAGE, -+ -+ /* Creating a field of type T. */ -+ TCTX_FIELD, -+ -+ /* Creating an array with elements of type T. */ -+ TCTX_ARRAY_ELEMENT, -+ -+ /* Adding to or subtracting from a pointer to T, or computing the -+ difference between two pointers when one of them is a pointer to T. */ -+ TCTX_POINTER_ARITH -+}; -+ -+extern bool verify_type_context (location_t, type_context_kind, const_tree, -+ bool = false); -+ - /* The target structure. This holds all the backend hooks. */ - #define DEFHOOKPOD(NAME, DOC, TYPE, INIT) TYPE NAME; - #define DEFHOOK(NAME, DOC, TYPE, PARAMS, INIT) TYPE (* NAME) PARAMS; -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/aarch64-sve-acle.exp b/gcc/testsuite/gcc.target/aarch64/sve/acle/aarch64-sve-acle.exp -index 34d9dfd43..1672ddfef 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/acle/aarch64-sve-acle.exp -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/aarch64-sve-acle.exp -@@ -45,9 +45,9 @@ if { [check_effective_target_aarch64_sve] } { - } - - # Main loop. --# FIXME: This should include general/*.c too, but leave that until the --# C frontend allows initialization of SVE vectors. --set files [glob -nocomplain $srcdir/$subdir/general-c/*.c] -+set files [glob -nocomplain \ -+ "$srcdir/$subdir/general/*.c" \ -+ "$srcdir/$subdir/general-c/*.c"] - dg-runtest [lsort $files] "$sve_flags" $DEFAULT_CFLAGS - - # All done. -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/gnu_vectors_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/gnu_vectors_1.c -new file mode 100644 -index 000000000..c4596f7e9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/gnu_vectors_1.c -@@ -0,0 +1,415 @@ -+/* { dg-options "-msve-vector-bits=256" } */ -+ -+#include -+ -+typedef uint8_t gnu_uint8_t __attribute__ ((vector_size (32))); -+typedef int8_t gnu_int8_t __attribute__ ((vector_size (32))); -+ -+void -+f (svuint8_t sve_u1, svint8_t sve_s1, -+ gnu_uint8_t gnu_u1, gnu_int8_t gnu_s1, int n, unsigned char uc) -+{ -+ /* Initialization. */ -+ -+ svuint8_t init_sve_u1 = 0; /* { dg-error {incompatible types when initializing type 'svuint8_t' using type 'int'} } */ -+ svuint8_t init_sve_u2 = {}; /* { dg-error {empty scalar initializer} } */ -+ svuint8_t init_sve_u3 = { sve_u1 }; -+ svuint8_t init_sve_u4 = { gnu_u1 }; -+ svuint8_t init_sve_u5 = { sve_s1 }; /* { dg-error {incompatible types when initializing type 'svuint8_t' using type 'svint8_t'} } */ -+ svuint8_t init_sve_u6 = { gnu_s1 }; /* { dg-error {incompatible types when initializing type 'svuint8_t'} } */ -+ svuint8_t init_sve_u7 = { 0 }; /* { dg-error {incompatible types when initializing type 'svuint8_t' using type 'int'} } */ -+ svuint8_t init_sve_u8 = { sve_u1, sve_u1 }; /* { dg-warning {excess elements in scalar initializer} } */ -+ svuint8_t init_sve_u9 = { gnu_u1, gnu_u1 }; /* { dg-warning {excess elements in scalar initializer} } */ -+ -+ gnu_uint8_t init_gnu_u1 = 0; /* { dg-error {incompatible types when initializing type 'gnu_uint8_t'[^\n]* using type 'int'} } */ -+ gnu_uint8_t init_gnu_u2 = {}; -+ gnu_uint8_t init_gnu_u3 = { sve_u1 }; /* { dg-error {incompatible types when initializing type 'unsigned char'} } */ -+ gnu_uint8_t init_gnu_u4 = { gnu_u1 }; /* { dg-error {incompatible types when initializing type 'unsigned char'} } */ -+ gnu_uint8_t init_gnu_u5 = { sve_s1 }; /* { dg-error {incompatible types when initializing type 'unsigned char'} } */ -+ gnu_uint8_t init_gnu_u6 = { gnu_s1 }; /* { dg-error {incompatible types when initializing type 'unsigned char'} } */ -+ gnu_uint8_t init_gnu_u7 = { 0 }; -+ -+ /* Compound literals. */ -+ -+ (svuint8_t) {}; /* { dg-error {empty scalar initializer} } */ -+ (svuint8_t) { 0 }; /* { dg-error {incompatible types when initializing type 'svuint8_t' using type 'int'} } */ -+ (svuint8_t) { sve_u1 }; -+ (svuint8_t) { gnu_u1 }; -+ (svuint8_t) { sve_s1 }; /* { dg-error {incompatible types when initializing type 'svuint8_t' using type 'svint8_t'} } */ -+ (svuint8_t) { gnu_s1 }; /* { dg-error {incompatible types when initializing type 'svuint8_t'} } */ -+ -+ (gnu_uint8_t) {}; -+ (gnu_uint8_t) { 0 }; -+ (gnu_uint8_t) { sve_u1 }; /* { dg-error {incompatible types when initializing type 'unsigned char'} } */ -+ (gnu_uint8_t) { gnu_u1 }; /* { dg-error {incompatible types when initializing type 'unsigned char'} } */ -+ -+ /* Assignment. */ -+ -+ sve_u1 = 0; /* { dg-error {incompatible types when assigning to type 'svuint8_t' from type 'int'} } */ -+ sve_u1 = sve_u1; -+ sve_u1 = gnu_u1; -+ sve_u1 = sve_s1; /* { dg-error {incompatible types when assigning to type 'svuint8_t' from type 'svint8_t'} } */ -+ sve_u1 = gnu_s1; /* { dg-error {incompatible types when assigning to type 'svuint8_t' from type 'gnu_int8_t'} } */ -+ -+ gnu_u1 = 0; /* { dg-error {incompatible types when assigning to type 'gnu_uint8_t'[^\n]* from type 'int'} } */ -+ gnu_u1 = sve_u1; -+ gnu_u1 = gnu_u1; -+ gnu_u1 = sve_s1; /* { dg-error {incompatible types when assigning to type 'gnu_uint8_t'[^\n]* from type 'svint8_t'} } */ -+ gnu_u1 = gnu_s1; /* { dg-error {incompatible types when assigning to type 'gnu_uint8_t'[^\n]* from type 'gnu_int8_t'} } */ -+ -+ /* Casts. */ -+ -+ (void) sve_u1; -+ (svuint8_t) sve_u1; -+ (svuint8_t) gnu_u1; -+ (svuint8_t) 0; /* { dg-error {conversion to non-scalar type requested} } */ -+ (svuint8_t) n; /* { dg-error {conversion to non-scalar type requested} } */ -+ (svint8_t) sve_u1; /* { dg-error {conversion to non-scalar type requested} } */ -+ (svint8_t) gnu_u1; -+ -+ (void) gnu_u1; -+ (gnu_uint8_t) sve_u1; -+ (gnu_uint8_t) gnu_u1; -+ (gnu_uint8_t) 0; /* { dg-error {can't convert a value of type 'int' to vector type '[^']*' which has different size} } */ -+ (gnu_uint8_t) n; /* { dg-error {can't convert a value of type 'int' to vector type '[^']*' which has different size} } */ -+ (gnu_int8_t) sve_u1; -+ (gnu_int8_t) gnu_u1; -+ -+ /* Vector indexing. */ -+ -+ sve_u1[0]; /* { dg-error {subscripted value is neither array nor pointer} } */ -+ &sve_u1[0]; /* { dg-error {subscripted value is neither array nor pointer} } */ -+ -+ gnu_u1[0]; -+ &gnu_u1[0]; -+ -+ /* Unary operators. */ -+ -+ +sve_u1; /* { dg-error {wrong type argument to unary plus} } */ -+ -sve_u1; /* { dg-error {wrong type argument to unary minus} } */ -+ ~sve_u1; /* { dg-error {wrong type argument to bit-complement} } */ -+ !sve_u1; /* { dg-error {wrong type argument to unary exclamation mark} } */ -+ *sve_u1; /* { dg-error {invalid type argument of unary '\*'} } */ -+ __real sve_u1; /* { dg-error {wrong type argument to __real} } */ -+ __imag sve_u1; /* { dg-error {wrong type argument to __imag} } */ -+ ++sve_u1; /* { dg-error {wrong type argument to increment} } */ -+ --sve_u1; /* { dg-error {wrong type argument to decrement} } */ -+ sve_u1++; /* { dg-error {wrong type argument to increment} } */ -+ sve_u1--; /* { dg-error {wrong type argument to decrement} } */ -+ -+ +gnu_u1; -+ -gnu_u1; -+ ~gnu_u1; -+ !gnu_u1; /* { dg-error {wrong type argument to unary exclamation mark} } */ -+ *gnu_u1; /* { dg-error {invalid type argument of unary '\*'} } */ -+ __real gnu_u1; /* { dg-error {wrong type argument to __real} } */ -+ __imag gnu_u1; /* { dg-error {wrong type argument to __imag} } */ -+ ++gnu_u1; -+ --gnu_u1; -+ gnu_u1++; -+ gnu_u1--; -+ -+ /* Vector-vector binary arithmetic. */ -+ -+ sve_u1 + sve_u1; /* { dg-error {invalid operands to binary \+} } */ -+ sve_u1 - sve_u1; /* { dg-error {invalid operands to binary -} } */ -+ sve_u1 * sve_u1; /* { dg-error {invalid operands to binary \*} } */ -+ sve_u1 / sve_u1; /* { dg-error {invalid operands to binary /} } */ -+ sve_u1 % sve_u1; /* { dg-error {invalid operands to binary %} } */ -+ sve_u1 & sve_u1; /* { dg-error {invalid operands to binary \&} } */ -+ sve_u1 | sve_u1; /* { dg-error {invalid operands to binary \|} } */ -+ sve_u1 ^ sve_u1; /* { dg-error {invalid operands to binary \^} } */ -+ sve_u1 == sve_u1; /* { dg-error {invalid operands to binary ==} } */ -+ sve_u1 != sve_u1; /* { dg-error {invalid operands to binary !=} } */ -+ sve_u1 <= sve_u1; /* { dg-error {invalid operands to binary <=} } */ -+ sve_u1 < sve_u1; /* { dg-error {invalid operands to binary <} } */ -+ sve_u1 > sve_u1; /* { dg-error {invalid operands to binary >} } */ -+ sve_u1 >= sve_u1; /* { dg-error {invalid operands to binary >=} } */ -+ sve_u1 << sve_u1; /* { dg-error {invalid operands to binary <<} } */ -+ sve_u1 >> sve_u1; /* { dg-error {invalid operands to binary >>} } */ -+ sve_u1 && sve_u1; /* { dg-error {used vector type where scalar is required} } */ -+ sve_u1 || sve_u1; /* { dg-error {used vector type where scalar is required} } */ -+ -+ sve_u1 + gnu_u1; /* { dg-error {invalid operands to binary \+} } */ -+ sve_u1 - gnu_u1; /* { dg-error {invalid operands to binary -} } */ -+ sve_u1 * gnu_u1; /* { dg-error {invalid operands to binary \*} } */ -+ sve_u1 / gnu_u1; /* { dg-error {invalid operands to binary /} } */ -+ sve_u1 % gnu_u1; /* { dg-error {invalid operands to binary %} } */ -+ sve_u1 & gnu_u1; /* { dg-error {invalid operands to binary \&} } */ -+ sve_u1 | gnu_u1; /* { dg-error {invalid operands to binary \|} } */ -+ sve_u1 ^ gnu_u1; /* { dg-error {invalid operands to binary \^} } */ -+ sve_u1 == gnu_u1; /* { dg-error {invalid operands to binary ==} } */ -+ sve_u1 != gnu_u1; /* { dg-error {invalid operands to binary !=} } */ -+ sve_u1 <= gnu_u1; /* { dg-error {invalid operands to binary <=} } */ -+ sve_u1 < gnu_u1; /* { dg-error {invalid operands to binary <} } */ -+ sve_u1 > gnu_u1; /* { dg-error {invalid operands to binary >} } */ -+ sve_u1 >= gnu_u1; /* { dg-error {invalid operands to binary >=} } */ -+ sve_u1 << gnu_u1; /* { dg-error {invalid operands to binary <<} } */ -+ sve_u1 >> gnu_u1; /* { dg-error {invalid operands to binary >>} } */ -+ sve_u1 && gnu_u1; /* { dg-error {used vector type where scalar is required} } */ -+ sve_u1 || gnu_u1; /* { dg-error {used vector type where scalar is required} } */ -+ -+ gnu_u1 + sve_u1; /* { dg-error {invalid operands to binary \+} } */ -+ gnu_u1 - sve_u1; /* { dg-error {invalid operands to binary -} } */ -+ gnu_u1 * sve_u1; /* { dg-error {invalid operands to binary \*} } */ -+ gnu_u1 / sve_u1; /* { dg-error {invalid operands to binary /} } */ -+ gnu_u1 % sve_u1; /* { dg-error {invalid operands to binary %} } */ -+ gnu_u1 & sve_u1; /* { dg-error {invalid operands to binary \&} } */ -+ gnu_u1 | sve_u1; /* { dg-error {invalid operands to binary \|} } */ -+ gnu_u1 ^ sve_u1; /* { dg-error {invalid operands to binary \^} } */ -+ gnu_u1 == sve_u1; /* { dg-error {invalid operands to binary ==} } */ -+ gnu_u1 != sve_u1; /* { dg-error {invalid operands to binary !=} } */ -+ gnu_u1 <= sve_u1; /* { dg-error {invalid operands to binary <=} } */ -+ gnu_u1 < sve_u1; /* { dg-error {invalid operands to binary <} } */ -+ gnu_u1 > sve_u1; /* { dg-error {invalid operands to binary >} } */ -+ gnu_u1 >= sve_u1; /* { dg-error {invalid operands to binary >=} } */ -+ gnu_u1 << sve_u1; /* { dg-error {invalid operands to binary <<} } */ -+ gnu_u1 >> sve_u1; /* { dg-error {invalid operands to binary >>} } */ -+ gnu_u1 && sve_u1; /* { dg-error {used vector type where scalar is required} } */ -+ gnu_u1 || sve_u1; /* { dg-error {used vector type where scalar is required} } */ -+ -+ gnu_u1 + gnu_u1; -+ gnu_u1 - gnu_u1; -+ gnu_u1 * gnu_u1; -+ gnu_u1 / gnu_u1; -+ gnu_u1 % gnu_u1; -+ gnu_u1 & gnu_u1; -+ gnu_u1 | gnu_u1; -+ gnu_u1 ^ gnu_u1; -+ gnu_u1 == gnu_u1; -+ gnu_u1 != gnu_u1; -+ gnu_u1 <= gnu_u1; -+ gnu_u1 < gnu_u1; -+ gnu_u1 > gnu_u1; -+ gnu_u1 >= gnu_u1; -+ gnu_u1 << gnu_u1; -+ gnu_u1 >> gnu_u1; -+ gnu_u1 && gnu_u1; /* { dg-error {used vector type where scalar is required} } */ -+ gnu_u1 || gnu_u1; /* { dg-error {used vector type where scalar is required} } */ -+ -+ /* Vector-scalar binary arithmetic. */ -+ -+ sve_u1 + 2; /* { dg-error {invalid operands to binary \+} } */ -+ sve_u1 - 2; /* { dg-error {invalid operands to binary -} } */ -+ sve_u1 * 2; /* { dg-error {invalid operands to binary \*} } */ -+ sve_u1 / 2; /* { dg-error {invalid operands to binary /} } */ -+ sve_u1 % 2; /* { dg-error {invalid operands to binary %} } */ -+ sve_u1 & 2; /* { dg-error {invalid operands to binary \&} } */ -+ sve_u1 | 2; /* { dg-error {invalid operands to binary \|} } */ -+ sve_u1 ^ 2; /* { dg-error {invalid operands to binary \^} } */ -+ sve_u1 == 2; /* { dg-error {invalid operands to binary ==} } */ -+ sve_u1 != 2; /* { dg-error {invalid operands to binary !=} } */ -+ sve_u1 <= 2; /* { dg-error {invalid operands to binary <=} } */ -+ sve_u1 < 2; /* { dg-error {invalid operands to binary <} } */ -+ sve_u1 > 2; /* { dg-error {invalid operands to binary >} } */ -+ sve_u1 >= 2; /* { dg-error {invalid operands to binary >=} } */ -+ sve_u1 << 2; /* { dg-error {invalid operands to binary <<} } */ -+ sve_u1 >> 2; /* { dg-error {invalid operands to binary >>} } */ -+ sve_u1 && 2; /* { dg-error {used vector type where scalar is required} } */ -+ sve_u1 || 2; /* { dg-error {used vector type where scalar is required} } */ -+ -+ sve_u1 + uc; /* { dg-error {invalid operands to binary \+} } */ -+ sve_u1 - uc; /* { dg-error {invalid operands to binary -} } */ -+ sve_u1 * uc; /* { dg-error {invalid operands to binary \*} } */ -+ sve_u1 / uc; /* { dg-error {invalid operands to binary /} } */ -+ sve_u1 % uc; /* { dg-error {invalid operands to binary %} } */ -+ sve_u1 & uc; /* { dg-error {invalid operands to binary \&} } */ -+ sve_u1 | uc; /* { dg-error {invalid operands to binary \|} } */ -+ sve_u1 ^ uc; /* { dg-error {invalid operands to binary \^} } */ -+ sve_u1 == uc; /* { dg-error {invalid operands to binary ==} } */ -+ sve_u1 != uc; /* { dg-error {invalid operands to binary !=} } */ -+ sve_u1 <= uc; /* { dg-error {invalid operands to binary <=} } */ -+ sve_u1 < uc; /* { dg-error {invalid operands to binary <} } */ -+ sve_u1 > uc; /* { dg-error {invalid operands to binary >} } */ -+ sve_u1 >= uc; /* { dg-error {invalid operands to binary >=} } */ -+ sve_u1 << uc; /* { dg-error {invalid operands to binary <<} } */ -+ sve_u1 >> uc; /* { dg-error {invalid operands to binary >>} } */ -+ sve_u1 && uc; /* { dg-error {used vector type where scalar is required} } */ -+ sve_u1 || uc; /* { dg-error {used vector type where scalar is required} } */ -+ -+ gnu_u1 + 2; -+ gnu_u1 - 2; -+ gnu_u1 * 2; -+ gnu_u1 / 2; -+ gnu_u1 % 2; -+ gnu_u1 & 2; -+ gnu_u1 | 2; -+ gnu_u1 ^ 2; -+ gnu_u1 == 2; -+ gnu_u1 != 2; -+ gnu_u1 <= 2; -+ gnu_u1 < 2; -+ gnu_u1 > 2; -+ gnu_u1 >= 2; -+ gnu_u1 << 2; -+ gnu_u1 >> 2; -+ gnu_u1 && 2; /* { dg-error {used vector type where scalar is required} } */ -+ gnu_u1 || 2; /* { dg-error {used vector type where scalar is required} } */ -+ -+ gnu_u1 + uc; -+ gnu_u1 - uc; -+ gnu_u1 * uc; -+ gnu_u1 / uc; -+ gnu_u1 % uc; -+ gnu_u1 & uc; -+ gnu_u1 | uc; -+ gnu_u1 ^ uc; -+ gnu_u1 == uc; -+ gnu_u1 != uc; -+ gnu_u1 <= uc; -+ gnu_u1 < uc; -+ gnu_u1 > uc; -+ gnu_u1 >= uc; -+ gnu_u1 << uc; -+ gnu_u1 >> uc; -+ gnu_u1 && uc; /* { dg-error {used vector type where scalar is required} } */ -+ gnu_u1 || uc; /* { dg-error {used vector type where scalar is required} } */ -+ -+ /* Scalar-vector binary arithmetic. */ -+ -+ 3 + sve_u1; /* { dg-error {invalid operands to binary \+} } */ -+ 3 - sve_u1; /* { dg-error {invalid operands to binary -} } */ -+ 3 * sve_u1; /* { dg-error {invalid operands to binary \*} } */ -+ 3 / sve_u1; /* { dg-error {invalid operands to binary /} } */ -+ 3 % sve_u1; /* { dg-error {invalid operands to binary %} } */ -+ 3 & sve_u1; /* { dg-error {invalid operands to binary \&} } */ -+ 3 | sve_u1; /* { dg-error {invalid operands to binary \|} } */ -+ 3 ^ sve_u1; /* { dg-error {invalid operands to binary \^} } */ -+ 3 == sve_u1; /* { dg-error {invalid operands to binary ==} } */ -+ 3 != sve_u1; /* { dg-error {invalid operands to binary !=} } */ -+ 3 <= sve_u1; /* { dg-error {invalid operands to binary <=} } */ -+ 3 < sve_u1; /* { dg-error {invalid operands to binary <} } */ -+ 3 > sve_u1; /* { dg-error {invalid operands to binary >} } */ -+ 3 >= sve_u1; /* { dg-error {invalid operands to binary >=} } */ -+ 3 << sve_u1; /* { dg-error {invalid operands to binary <<} } */ -+ 3 >> sve_u1; /* { dg-error {invalid operands to binary >>} } */ -+ 3 && sve_u1; /* { dg-error {invalid operands to binary \&\&} } */ -+ 3 || sve_u1; /* { dg-error {invalid operands to binary \|\|} } */ -+ -+ 3 + gnu_u1; -+ 3 - gnu_u1; -+ 3 * gnu_u1; -+ 3 / gnu_u1; -+ 3 % gnu_u1; -+ 3 & gnu_u1; -+ 3 | gnu_u1; -+ 3 ^ gnu_u1; -+ 3 == gnu_u1; -+ 3 != gnu_u1; -+ 3 <= gnu_u1; -+ 3 < gnu_u1; -+ 3 > gnu_u1; -+ 3 >= gnu_u1; -+ 3 << gnu_u1; -+ 3 >> gnu_u1; -+ 3 && gnu_u1; /* { dg-error {invalid operands to binary \&\&} } */ -+ 3 || gnu_u1; /* { dg-error {invalid operands to binary \|\|} } */ -+ -+ /* Mismatched types. */ -+ -+ sve_u1 + sve_s1; /* { dg-error {invalid operands to binary \+} } */ -+ sve_u1 - sve_s1; /* { dg-error {invalid operands to binary -} } */ -+ sve_u1 * sve_s1; /* { dg-error {invalid operands to binary \*} } */ -+ sve_u1 / sve_s1; /* { dg-error {invalid operands to binary /} } */ -+ sve_u1 % sve_s1; /* { dg-error {invalid operands to binary %} } */ -+ sve_u1 & sve_s1; /* { dg-error {invalid operands to binary \&} } */ -+ sve_u1 | sve_s1; /* { dg-error {invalid operands to binary \|} } */ -+ sve_u1 ^ sve_s1; /* { dg-error {invalid operands to binary \^} } */ -+ sve_u1 == sve_s1; /* { dg-error {invalid operands to binary ==} } */ -+ sve_u1 != sve_s1; /* { dg-error {invalid operands to binary !=} } */ -+ sve_u1 <= sve_s1; /* { dg-error {invalid operands to binary <=} } */ -+ sve_u1 < sve_s1; /* { dg-error {invalid operands to binary <} } */ -+ sve_u1 > sve_s1; /* { dg-error {invalid operands to binary >} } */ -+ sve_u1 >= sve_s1; /* { dg-error {invalid operands to binary >=} } */ -+ sve_u1 << sve_s1; /* { dg-error {invalid operands to binary <<} } */ -+ sve_u1 >> sve_s1; /* { dg-error {invalid operands to binary >>} } */ -+ sve_u1 && sve_s1; /* { dg-error {used vector type where scalar is required} } */ -+ sve_u1 || sve_s1; /* { dg-error {used vector type where scalar is required} } */ -+ -+ sve_u1 + gnu_s1; /* { dg-error {invalid operands to binary \+} } */ -+ sve_u1 - gnu_s1; /* { dg-error {invalid operands to binary -} } */ -+ sve_u1 * gnu_s1; /* { dg-error {invalid operands to binary \*} } */ -+ sve_u1 / gnu_s1; /* { dg-error {invalid operands to binary /} } */ -+ sve_u1 % gnu_s1; /* { dg-error {invalid operands to binary %} } */ -+ sve_u1 & gnu_s1; /* { dg-error {invalid operands to binary \&} } */ -+ sve_u1 | gnu_s1; /* { dg-error {invalid operands to binary \|} } */ -+ sve_u1 ^ gnu_s1; /* { dg-error {invalid operands to binary \^} } */ -+ sve_u1 == gnu_s1; /* { dg-error {invalid operands to binary ==} } */ -+ sve_u1 != gnu_s1; /* { dg-error {invalid operands to binary !=} } */ -+ sve_u1 <= gnu_s1; /* { dg-error {invalid operands to binary <=} } */ -+ sve_u1 < gnu_s1; /* { dg-error {invalid operands to binary <} } */ -+ sve_u1 > gnu_s1; /* { dg-error {invalid operands to binary >} } */ -+ sve_u1 >= gnu_s1; /* { dg-error {invalid operands to binary >=} } */ -+ sve_u1 << gnu_s1; /* { dg-error {invalid operands to binary <<} } */ -+ sve_u1 >> gnu_s1; /* { dg-error {invalid operands to binary >>} } */ -+ sve_u1 && gnu_s1; /* { dg-error {used vector type where scalar is required} } */ -+ sve_u1 || gnu_s1; /* { dg-error {used vector type where scalar is required} } */ -+ -+ gnu_u1 + sve_s1; /* { dg-error {invalid operands to binary \+} } */ -+ gnu_u1 - sve_s1; /* { dg-error {invalid operands to binary -} } */ -+ gnu_u1 * sve_s1; /* { dg-error {invalid operands to binary \*} } */ -+ gnu_u1 / sve_s1; /* { dg-error {invalid operands to binary /} } */ -+ gnu_u1 % sve_s1; /* { dg-error {invalid operands to binary %} } */ -+ gnu_u1 & sve_s1; /* { dg-error {invalid operands to binary \&} } */ -+ gnu_u1 | sve_s1; /* { dg-error {invalid operands to binary \|} } */ -+ gnu_u1 ^ sve_s1; /* { dg-error {invalid operands to binary \^} } */ -+ gnu_u1 == sve_s1; /* { dg-error {invalid operands to binary ==} } */ -+ gnu_u1 != sve_s1; /* { dg-error {invalid operands to binary !=} } */ -+ gnu_u1 <= sve_s1; /* { dg-error {invalid operands to binary <=} } */ -+ gnu_u1 < sve_s1; /* { dg-error {invalid operands to binary <} } */ -+ gnu_u1 > sve_s1; /* { dg-error {invalid operands to binary >} } */ -+ gnu_u1 >= sve_s1; /* { dg-error {invalid operands to binary >=} } */ -+ gnu_u1 << sve_s1; /* { dg-error {invalid operands to binary <<} } */ -+ gnu_u1 >> sve_s1; /* { dg-error {invalid operands to binary >>} } */ -+ gnu_u1 && sve_s1; /* { dg-error {used vector type where scalar is required} } */ -+ gnu_u1 || sve_s1; /* { dg-error {used vector type where scalar is required} } */ -+ -+ gnu_u1 + gnu_s1; -+ gnu_u1 - gnu_s1; -+ gnu_u1 * gnu_s1; -+ gnu_u1 / gnu_s1; -+ gnu_u1 % gnu_s1; -+ gnu_u1 & gnu_s1; -+ gnu_u1 | gnu_s1; -+ gnu_u1 ^ gnu_s1; -+ gnu_u1 == gnu_s1; -+ gnu_u1 != gnu_s1; -+ gnu_u1 <= gnu_s1; -+ gnu_u1 < gnu_s1; -+ gnu_u1 > gnu_s1; -+ gnu_u1 >= gnu_s1; -+ gnu_u1 << gnu_s1; -+ gnu_u1 >> gnu_s1; -+ gnu_u1 && gnu_s1; /* { dg-error {used vector type where scalar is required} } */ -+ gnu_u1 || gnu_s1; /* { dg-error {used vector type where scalar is required} } */ -+ -+ /* Conditional expressions. */ -+ -+ uc ? sve_u1 : sve_u1; -+ uc ? gnu_u1 : sve_u1; /* { dg-error {type mismatch in conditional expression} } */ -+ uc ? sve_u1 : gnu_u1; /* { dg-error {type mismatch in conditional expression} } */ -+ uc ? gnu_u1 : gnu_u1; -+ -+ sve_u1 ? sve_u1 : sve_u1; /* { dg-error {used vector type where scalar is required} } */ -+ sve_u1 ? gnu_u1 : sve_u1; /* { dg-error {used vector type where scalar is required} } */ -+ sve_u1 ? sve_u1 : gnu_u1; /* { dg-error {used vector type where scalar is required} } */ -+ sve_u1 ? gnu_u1 : gnu_u1; /* { dg-error {used vector type where scalar is required} } */ -+ -+ gnu_u1 ? sve_u1 : sve_u1; /* { dg-error {used vector type where scalar is required} } */ -+ gnu_u1 ? gnu_u1 : sve_u1; /* { dg-error {used vector type where scalar is required} } */ -+ gnu_u1 ? sve_u1 : gnu_u1; /* { dg-error {used vector type where scalar is required} } */ -+ gnu_u1 ? gnu_u1 : gnu_u1; /* { dg-error {used vector type where scalar is required} } */ -+ -+ /* Vector built-ins. */ -+ -+ __builtin_shuffle (sve_u1, sve_u1, sve_u1); /* { dg-error {'__builtin_shuffle' last argument must be an integer vector} } */ -+ __builtin_shuffle (sve_u1, gnu_u1, gnu_u1); /* { dg-error {'__builtin_shuffle' arguments must be vectors} } */ -+ __builtin_shuffle (gnu_u1, sve_u1, gnu_u1); /* { dg-error {'__builtin_shuffle' arguments must be vectors} } */ -+ __builtin_shuffle (gnu_u1, gnu_u1, sve_u1); /* { dg-error {'__builtin_shuffle' last argument must be an integer vector} } */ -+ __builtin_shuffle (gnu_u1, gnu_u1, gnu_u1); -+ -+ __builtin_convertvector (sve_u1, svuint8_t); /* { dg-error {'__builtin_convertvector' first argument must be an integer or floating vector} } */ -+ __builtin_convertvector (gnu_u1, svuint8_t); /* { dg-error {'__builtin_convertvector' second argument must be an integer or floating vector type} } */ -+ __builtin_convertvector (sve_u1, gnu_uint8_t); /* { dg-error {'__builtin_convertvector' first argument must be an integer or floating vector} } */ -+ __builtin_convertvector (gnu_u1, gnu_uint8_t); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/gnu_vectors_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/gnu_vectors_2.c -new file mode 100644 -index 000000000..61e6d2163 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/gnu_vectors_2.c -@@ -0,0 +1,415 @@ -+/* { dg-options "-msve-vector-bits=256 -flax-vector-conversions" } */ -+ -+#include -+ -+typedef uint8_t gnu_uint8_t __attribute__ ((vector_size (32))); -+typedef int8_t gnu_int8_t __attribute__ ((vector_size (32))); -+ -+void -+f (svuint8_t sve_u1, svint8_t sve_s1, -+ gnu_uint8_t gnu_u1, gnu_int8_t gnu_s1, int n, unsigned char uc) -+{ -+ /* Initialization. */ -+ -+ svuint8_t init_sve_u1 = 0; /* { dg-error {incompatible types when initializing type 'svuint8_t' using type 'int'} } */ -+ svuint8_t init_sve_u2 = {}; /* { dg-error {empty scalar initializer} } */ -+ svuint8_t init_sve_u3 = { sve_u1 }; -+ svuint8_t init_sve_u4 = { gnu_u1 }; -+ svuint8_t init_sve_u5 = { sve_s1 }; -+ svuint8_t init_sve_u6 = { gnu_s1 }; -+ svuint8_t init_sve_u7 = { 0 }; /* { dg-error {incompatible types when initializing type 'svuint8_t' using type 'int'} } */ -+ svuint8_t init_sve_u8 = { sve_u1, sve_u1 }; /* { dg-warning {excess elements in scalar initializer} } */ -+ svuint8_t init_sve_u9 = { gnu_u1, gnu_u1 }; /* { dg-warning {excess elements in scalar initializer} } */ -+ -+ gnu_uint8_t init_gnu_u1 = 0; /* { dg-error {incompatible types when initializing type 'gnu_uint8_t'[^\n]* using type 'int'} } */ -+ gnu_uint8_t init_gnu_u2 = {}; -+ gnu_uint8_t init_gnu_u3 = { sve_u1 }; /* { dg-error {incompatible types when initializing type 'unsigned char'} } */ -+ gnu_uint8_t init_gnu_u4 = { gnu_u1 }; /* { dg-error {incompatible types when initializing type 'unsigned char'} } */ -+ gnu_uint8_t init_gnu_u5 = { sve_s1 }; /* { dg-error {incompatible types when initializing type 'unsigned char'} } */ -+ gnu_uint8_t init_gnu_u6 = { gnu_s1 }; /* { dg-error {incompatible types when initializing type 'unsigned char'} } */ -+ gnu_uint8_t init_gnu_u7 = { 0 }; -+ -+ /* Compound literals. */ -+ -+ (svuint8_t) {}; /* { dg-error {empty scalar initializer} } */ -+ (svuint8_t) { 0 }; /* { dg-error {incompatible types when initializing type 'svuint8_t' using type 'int'} } */ -+ (svuint8_t) { sve_u1 }; -+ (svuint8_t) { gnu_u1 }; -+ (svuint8_t) { sve_s1 }; -+ (svuint8_t) { gnu_s1 }; -+ -+ (gnu_uint8_t) {}; -+ (gnu_uint8_t) { 0 }; -+ (gnu_uint8_t) { sve_u1 }; /* { dg-error {incompatible types when initializing type 'unsigned char'} } */ -+ (gnu_uint8_t) { gnu_u1 }; /* { dg-error {incompatible types when initializing type 'unsigned char'} } */ -+ -+ /* Assignment. */ -+ -+ sve_u1 = 0; /* { dg-error {incompatible types when assigning to type 'svuint8_t' from type 'int'} } */ -+ sve_u1 = sve_u1; -+ sve_u1 = gnu_u1; -+ sve_u1 = sve_s1; -+ sve_u1 = gnu_s1; -+ -+ gnu_u1 = 0; /* { dg-error {incompatible types when assigning to type 'gnu_uint8_t'[^\n]* from type 'int'} } */ -+ gnu_u1 = sve_u1; -+ gnu_u1 = gnu_u1; -+ gnu_u1 = sve_s1; -+ gnu_u1 = gnu_s1; -+ -+ /* Casts. */ -+ -+ (void) sve_u1; -+ (svuint8_t) sve_u1; -+ (svuint8_t) gnu_u1; -+ (svuint8_t) 0; /* { dg-error {conversion to non-scalar type requested} } */ -+ (svuint8_t) n; /* { dg-error {conversion to non-scalar type requested} } */ -+ (svint8_t) sve_u1; -+ (svint8_t) gnu_u1; -+ -+ (void) gnu_u1; -+ (gnu_uint8_t) sve_u1; -+ (gnu_uint8_t) gnu_u1; -+ (gnu_uint8_t) 0; /* { dg-error {can't convert a value of type 'int' to vector type '[^']*' which has different size} } */ -+ (gnu_uint8_t) n; /* { dg-error {can't convert a value of type 'int' to vector type '[^']*' which has different size} } */ -+ (gnu_int8_t) sve_u1; -+ (gnu_int8_t) gnu_u1; -+ -+ /* Vector indexing. */ -+ -+ sve_u1[0]; /* { dg-error {subscripted value is neither array nor pointer} } */ -+ &sve_u1[0]; /* { dg-error {subscripted value is neither array nor pointer} } */ -+ -+ gnu_u1[0]; -+ &gnu_u1[0]; -+ -+ /* Unary operators. */ -+ -+ +sve_u1; /* { dg-error {wrong type argument to unary plus} } */ -+ -sve_u1; /* { dg-error {wrong type argument to unary minus} } */ -+ ~sve_u1; /* { dg-error {wrong type argument to bit-complement} } */ -+ !sve_u1; /* { dg-error {wrong type argument to unary exclamation mark} } */ -+ *sve_u1; /* { dg-error {invalid type argument of unary '\*'} } */ -+ __real sve_u1; /* { dg-error {wrong type argument to __real} } */ -+ __imag sve_u1; /* { dg-error {wrong type argument to __imag} } */ -+ ++sve_u1; /* { dg-error {wrong type argument to increment} } */ -+ --sve_u1; /* { dg-error {wrong type argument to decrement} } */ -+ sve_u1++; /* { dg-error {wrong type argument to increment} } */ -+ sve_u1--; /* { dg-error {wrong type argument to decrement} } */ -+ -+ +gnu_u1; -+ -gnu_u1; -+ ~gnu_u1; -+ !gnu_u1; /* { dg-error {wrong type argument to unary exclamation mark} } */ -+ *gnu_u1; /* { dg-error {invalid type argument of unary '\*'} } */ -+ __real gnu_u1; /* { dg-error {wrong type argument to __real} } */ -+ __imag gnu_u1; /* { dg-error {wrong type argument to __imag} } */ -+ ++gnu_u1; -+ --gnu_u1; -+ gnu_u1++; -+ gnu_u1--; -+ -+ /* Vector-vector binary arithmetic. */ -+ -+ sve_u1 + sve_u1; /* { dg-error {invalid operands to binary \+} } */ -+ sve_u1 - sve_u1; /* { dg-error {invalid operands to binary -} } */ -+ sve_u1 * sve_u1; /* { dg-error {invalid operands to binary \*} } */ -+ sve_u1 / sve_u1; /* { dg-error {invalid operands to binary /} } */ -+ sve_u1 % sve_u1; /* { dg-error {invalid operands to binary %} } */ -+ sve_u1 & sve_u1; /* { dg-error {invalid operands to binary \&} } */ -+ sve_u1 | sve_u1; /* { dg-error {invalid operands to binary \|} } */ -+ sve_u1 ^ sve_u1; /* { dg-error {invalid operands to binary \^} } */ -+ sve_u1 == sve_u1; /* { dg-error {invalid operands to binary ==} } */ -+ sve_u1 != sve_u1; /* { dg-error {invalid operands to binary !=} } */ -+ sve_u1 <= sve_u1; /* { dg-error {invalid operands to binary <=} } */ -+ sve_u1 < sve_u1; /* { dg-error {invalid operands to binary <} } */ -+ sve_u1 > sve_u1; /* { dg-error {invalid operands to binary >} } */ -+ sve_u1 >= sve_u1; /* { dg-error {invalid operands to binary >=} } */ -+ sve_u1 << sve_u1; /* { dg-error {invalid operands to binary <<} } */ -+ sve_u1 >> sve_u1; /* { dg-error {invalid operands to binary >>} } */ -+ sve_u1 && sve_u1; /* { dg-error {used vector type where scalar is required} } */ -+ sve_u1 || sve_u1; /* { dg-error {used vector type where scalar is required} } */ -+ -+ sve_u1 + gnu_u1; /* { dg-error {invalid operands to binary \+} } */ -+ sve_u1 - gnu_u1; /* { dg-error {invalid operands to binary -} } */ -+ sve_u1 * gnu_u1; /* { dg-error {invalid operands to binary \*} } */ -+ sve_u1 / gnu_u1; /* { dg-error {invalid operands to binary /} } */ -+ sve_u1 % gnu_u1; /* { dg-error {invalid operands to binary %} } */ -+ sve_u1 & gnu_u1; /* { dg-error {invalid operands to binary \&} } */ -+ sve_u1 | gnu_u1; /* { dg-error {invalid operands to binary \|} } */ -+ sve_u1 ^ gnu_u1; /* { dg-error {invalid operands to binary \^} } */ -+ sve_u1 == gnu_u1; /* { dg-error {invalid operands to binary ==} } */ -+ sve_u1 != gnu_u1; /* { dg-error {invalid operands to binary !=} } */ -+ sve_u1 <= gnu_u1; /* { dg-error {invalid operands to binary <=} } */ -+ sve_u1 < gnu_u1; /* { dg-error {invalid operands to binary <} } */ -+ sve_u1 > gnu_u1; /* { dg-error {invalid operands to binary >} } */ -+ sve_u1 >= gnu_u1; /* { dg-error {invalid operands to binary >=} } */ -+ sve_u1 << gnu_u1; /* { dg-error {invalid operands to binary <<} } */ -+ sve_u1 >> gnu_u1; /* { dg-error {invalid operands to binary >>} } */ -+ sve_u1 && gnu_u1; /* { dg-error {used vector type where scalar is required} } */ -+ sve_u1 || gnu_u1; /* { dg-error {used vector type where scalar is required} } */ -+ -+ gnu_u1 + sve_u1; /* { dg-error {invalid operands to binary \+} } */ -+ gnu_u1 - sve_u1; /* { dg-error {invalid operands to binary -} } */ -+ gnu_u1 * sve_u1; /* { dg-error {invalid operands to binary \*} } */ -+ gnu_u1 / sve_u1; /* { dg-error {invalid operands to binary /} } */ -+ gnu_u1 % sve_u1; /* { dg-error {invalid operands to binary %} } */ -+ gnu_u1 & sve_u1; /* { dg-error {invalid operands to binary \&} } */ -+ gnu_u1 | sve_u1; /* { dg-error {invalid operands to binary \|} } */ -+ gnu_u1 ^ sve_u1; /* { dg-error {invalid operands to binary \^} } */ -+ gnu_u1 == sve_u1; /* { dg-error {invalid operands to binary ==} } */ -+ gnu_u1 != sve_u1; /* { dg-error {invalid operands to binary !=} } */ -+ gnu_u1 <= sve_u1; /* { dg-error {invalid operands to binary <=} } */ -+ gnu_u1 < sve_u1; /* { dg-error {invalid operands to binary <} } */ -+ gnu_u1 > sve_u1; /* { dg-error {invalid operands to binary >} } */ -+ gnu_u1 >= sve_u1; /* { dg-error {invalid operands to binary >=} } */ -+ gnu_u1 << sve_u1; /* { dg-error {invalid operands to binary <<} } */ -+ gnu_u1 >> sve_u1; /* { dg-error {invalid operands to binary >>} } */ -+ gnu_u1 && sve_u1; /* { dg-error {used vector type where scalar is required} } */ -+ gnu_u1 || sve_u1; /* { dg-error {used vector type where scalar is required} } */ -+ -+ gnu_u1 + gnu_u1; -+ gnu_u1 - gnu_u1; -+ gnu_u1 * gnu_u1; -+ gnu_u1 / gnu_u1; -+ gnu_u1 % gnu_u1; -+ gnu_u1 & gnu_u1; -+ gnu_u1 | gnu_u1; -+ gnu_u1 ^ gnu_u1; -+ gnu_u1 == gnu_u1; -+ gnu_u1 != gnu_u1; -+ gnu_u1 <= gnu_u1; -+ gnu_u1 < gnu_u1; -+ gnu_u1 > gnu_u1; -+ gnu_u1 >= gnu_u1; -+ gnu_u1 << gnu_u1; -+ gnu_u1 >> gnu_u1; -+ gnu_u1 && gnu_u1; /* { dg-error {used vector type where scalar is required} } */ -+ gnu_u1 || gnu_u1; /* { dg-error {used vector type where scalar is required} } */ -+ -+ /* Vector-scalar binary arithmetic. */ -+ -+ sve_u1 + 2; /* { dg-error {invalid operands to binary \+} } */ -+ sve_u1 - 2; /* { dg-error {invalid operands to binary -} } */ -+ sve_u1 * 2; /* { dg-error {invalid operands to binary \*} } */ -+ sve_u1 / 2; /* { dg-error {invalid operands to binary /} } */ -+ sve_u1 % 2; /* { dg-error {invalid operands to binary %} } */ -+ sve_u1 & 2; /* { dg-error {invalid operands to binary \&} } */ -+ sve_u1 | 2; /* { dg-error {invalid operands to binary \|} } */ -+ sve_u1 ^ 2; /* { dg-error {invalid operands to binary \^} } */ -+ sve_u1 == 2; /* { dg-error {invalid operands to binary ==} } */ -+ sve_u1 != 2; /* { dg-error {invalid operands to binary !=} } */ -+ sve_u1 <= 2; /* { dg-error {invalid operands to binary <=} } */ -+ sve_u1 < 2; /* { dg-error {invalid operands to binary <} } */ -+ sve_u1 > 2; /* { dg-error {invalid operands to binary >} } */ -+ sve_u1 >= 2; /* { dg-error {invalid operands to binary >=} } */ -+ sve_u1 << 2; /* { dg-error {invalid operands to binary <<} } */ -+ sve_u1 >> 2; /* { dg-error {invalid operands to binary >>} } */ -+ sve_u1 && 2; /* { dg-error {used vector type where scalar is required} } */ -+ sve_u1 || 2; /* { dg-error {used vector type where scalar is required} } */ -+ -+ sve_u1 + uc; /* { dg-error {invalid operands to binary \+} } */ -+ sve_u1 - uc; /* { dg-error {invalid operands to binary -} } */ -+ sve_u1 * uc; /* { dg-error {invalid operands to binary \*} } */ -+ sve_u1 / uc; /* { dg-error {invalid operands to binary /} } */ -+ sve_u1 % uc; /* { dg-error {invalid operands to binary %} } */ -+ sve_u1 & uc; /* { dg-error {invalid operands to binary \&} } */ -+ sve_u1 | uc; /* { dg-error {invalid operands to binary \|} } */ -+ sve_u1 ^ uc; /* { dg-error {invalid operands to binary \^} } */ -+ sve_u1 == uc; /* { dg-error {invalid operands to binary ==} } */ -+ sve_u1 != uc; /* { dg-error {invalid operands to binary !=} } */ -+ sve_u1 <= uc; /* { dg-error {invalid operands to binary <=} } */ -+ sve_u1 < uc; /* { dg-error {invalid operands to binary <} } */ -+ sve_u1 > uc; /* { dg-error {invalid operands to binary >} } */ -+ sve_u1 >= uc; /* { dg-error {invalid operands to binary >=} } */ -+ sve_u1 << uc; /* { dg-error {invalid operands to binary <<} } */ -+ sve_u1 >> uc; /* { dg-error {invalid operands to binary >>} } */ -+ sve_u1 && uc; /* { dg-error {used vector type where scalar is required} } */ -+ sve_u1 || uc; /* { dg-error {used vector type where scalar is required} } */ -+ -+ gnu_u1 + 2; -+ gnu_u1 - 2; -+ gnu_u1 * 2; -+ gnu_u1 / 2; -+ gnu_u1 % 2; -+ gnu_u1 & 2; -+ gnu_u1 | 2; -+ gnu_u1 ^ 2; -+ gnu_u1 == 2; -+ gnu_u1 != 2; -+ gnu_u1 <= 2; -+ gnu_u1 < 2; -+ gnu_u1 > 2; -+ gnu_u1 >= 2; -+ gnu_u1 << 2; -+ gnu_u1 >> 2; -+ gnu_u1 && 2; /* { dg-error {used vector type where scalar is required} } */ -+ gnu_u1 || 2; /* { dg-error {used vector type where scalar is required} } */ -+ -+ gnu_u1 + uc; -+ gnu_u1 - uc; -+ gnu_u1 * uc; -+ gnu_u1 / uc; -+ gnu_u1 % uc; -+ gnu_u1 & uc; -+ gnu_u1 | uc; -+ gnu_u1 ^ uc; -+ gnu_u1 == uc; -+ gnu_u1 != uc; -+ gnu_u1 <= uc; -+ gnu_u1 < uc; -+ gnu_u1 > uc; -+ gnu_u1 >= uc; -+ gnu_u1 << uc; -+ gnu_u1 >> uc; -+ gnu_u1 && uc; /* { dg-error {used vector type where scalar is required} } */ -+ gnu_u1 || uc; /* { dg-error {used vector type where scalar is required} } */ -+ -+ /* Scalar-vector binary arithmetic. */ -+ -+ 3 + sve_u1; /* { dg-error {invalid operands to binary \+} } */ -+ 3 - sve_u1; /* { dg-error {invalid operands to binary -} } */ -+ 3 * sve_u1; /* { dg-error {invalid operands to binary \*} } */ -+ 3 / sve_u1; /* { dg-error {invalid operands to binary /} } */ -+ 3 % sve_u1; /* { dg-error {invalid operands to binary %} } */ -+ 3 & sve_u1; /* { dg-error {invalid operands to binary \&} } */ -+ 3 | sve_u1; /* { dg-error {invalid operands to binary \|} } */ -+ 3 ^ sve_u1; /* { dg-error {invalid operands to binary \^} } */ -+ 3 == sve_u1; /* { dg-error {invalid operands to binary ==} } */ -+ 3 != sve_u1; /* { dg-error {invalid operands to binary !=} } */ -+ 3 <= sve_u1; /* { dg-error {invalid operands to binary <=} } */ -+ 3 < sve_u1; /* { dg-error {invalid operands to binary <} } */ -+ 3 > sve_u1; /* { dg-error {invalid operands to binary >} } */ -+ 3 >= sve_u1; /* { dg-error {invalid operands to binary >=} } */ -+ 3 << sve_u1; /* { dg-error {invalid operands to binary <<} } */ -+ 3 >> sve_u1; /* { dg-error {invalid operands to binary >>} } */ -+ 3 && sve_u1; /* { dg-error {invalid operands to binary \&\&} } */ -+ 3 || sve_u1; /* { dg-error {invalid operands to binary \|\|} } */ -+ -+ 3 + gnu_u1; -+ 3 - gnu_u1; -+ 3 * gnu_u1; -+ 3 / gnu_u1; -+ 3 % gnu_u1; -+ 3 & gnu_u1; -+ 3 | gnu_u1; -+ 3 ^ gnu_u1; -+ 3 == gnu_u1; -+ 3 != gnu_u1; -+ 3 <= gnu_u1; -+ 3 < gnu_u1; -+ 3 > gnu_u1; -+ 3 >= gnu_u1; -+ 3 << gnu_u1; -+ 3 >> gnu_u1; -+ 3 && gnu_u1; /* { dg-error {invalid operands to binary \&\&} } */ -+ 3 || gnu_u1; /* { dg-error {invalid operands to binary \|\|} } */ -+ -+ /* Mismatched types. */ -+ -+ sve_u1 + sve_s1; /* { dg-error {invalid operands to binary \+} } */ -+ sve_u1 - sve_s1; /* { dg-error {invalid operands to binary -} } */ -+ sve_u1 * sve_s1; /* { dg-error {invalid operands to binary \*} } */ -+ sve_u1 / sve_s1; /* { dg-error {invalid operands to binary /} } */ -+ sve_u1 % sve_s1; /* { dg-error {invalid operands to binary %} } */ -+ sve_u1 & sve_s1; /* { dg-error {invalid operands to binary \&} } */ -+ sve_u1 | sve_s1; /* { dg-error {invalid operands to binary \|} } */ -+ sve_u1 ^ sve_s1; /* { dg-error {invalid operands to binary \^} } */ -+ sve_u1 == sve_s1; /* { dg-error {invalid operands to binary ==} } */ -+ sve_u1 != sve_s1; /* { dg-error {invalid operands to binary !=} } */ -+ sve_u1 <= sve_s1; /* { dg-error {invalid operands to binary <=} } */ -+ sve_u1 < sve_s1; /* { dg-error {invalid operands to binary <} } */ -+ sve_u1 > sve_s1; /* { dg-error {invalid operands to binary >} } */ -+ sve_u1 >= sve_s1; /* { dg-error {invalid operands to binary >=} } */ -+ sve_u1 << sve_s1; /* { dg-error {invalid operands to binary <<} } */ -+ sve_u1 >> sve_s1; /* { dg-error {invalid operands to binary >>} } */ -+ sve_u1 && sve_s1; /* { dg-error {used vector type where scalar is required} } */ -+ sve_u1 || sve_s1; /* { dg-error {used vector type where scalar is required} } */ -+ -+ sve_u1 + gnu_s1; /* { dg-error {invalid operands to binary \+} } */ -+ sve_u1 - gnu_s1; /* { dg-error {invalid operands to binary -} } */ -+ sve_u1 * gnu_s1; /* { dg-error {invalid operands to binary \*} } */ -+ sve_u1 / gnu_s1; /* { dg-error {invalid operands to binary /} } */ -+ sve_u1 % gnu_s1; /* { dg-error {invalid operands to binary %} } */ -+ sve_u1 & gnu_s1; /* { dg-error {invalid operands to binary \&} } */ -+ sve_u1 | gnu_s1; /* { dg-error {invalid operands to binary \|} } */ -+ sve_u1 ^ gnu_s1; /* { dg-error {invalid operands to binary \^} } */ -+ sve_u1 == gnu_s1; /* { dg-error {invalid operands to binary ==} } */ -+ sve_u1 != gnu_s1; /* { dg-error {invalid operands to binary !=} } */ -+ sve_u1 <= gnu_s1; /* { dg-error {invalid operands to binary <=} } */ -+ sve_u1 < gnu_s1; /* { dg-error {invalid operands to binary <} } */ -+ sve_u1 > gnu_s1; /* { dg-error {invalid operands to binary >} } */ -+ sve_u1 >= gnu_s1; /* { dg-error {invalid operands to binary >=} } */ -+ sve_u1 << gnu_s1; /* { dg-error {invalid operands to binary <<} } */ -+ sve_u1 >> gnu_s1; /* { dg-error {invalid operands to binary >>} } */ -+ sve_u1 && gnu_s1; /* { dg-error {used vector type where scalar is required} } */ -+ sve_u1 || gnu_s1; /* { dg-error {used vector type where scalar is required} } */ -+ -+ gnu_u1 + sve_s1; /* { dg-error {invalid operands to binary \+} } */ -+ gnu_u1 - sve_s1; /* { dg-error {invalid operands to binary -} } */ -+ gnu_u1 * sve_s1; /* { dg-error {invalid operands to binary \*} } */ -+ gnu_u1 / sve_s1; /* { dg-error {invalid operands to binary /} } */ -+ gnu_u1 % sve_s1; /* { dg-error {invalid operands to binary %} } */ -+ gnu_u1 & sve_s1; /* { dg-error {invalid operands to binary \&} } */ -+ gnu_u1 | sve_s1; /* { dg-error {invalid operands to binary \|} } */ -+ gnu_u1 ^ sve_s1; /* { dg-error {invalid operands to binary \^} } */ -+ gnu_u1 == sve_s1; /* { dg-error {invalid operands to binary ==} } */ -+ gnu_u1 != sve_s1; /* { dg-error {invalid operands to binary !=} } */ -+ gnu_u1 <= sve_s1; /* { dg-error {invalid operands to binary <=} } */ -+ gnu_u1 < sve_s1; /* { dg-error {invalid operands to binary <} } */ -+ gnu_u1 > sve_s1; /* { dg-error {invalid operands to binary >} } */ -+ gnu_u1 >= sve_s1; /* { dg-error {invalid operands to binary >=} } */ -+ gnu_u1 << sve_s1; /* { dg-error {invalid operands to binary <<} } */ -+ gnu_u1 >> sve_s1; /* { dg-error {invalid operands to binary >>} } */ -+ gnu_u1 && sve_s1; /* { dg-error {used vector type where scalar is required} } */ -+ gnu_u1 || sve_s1; /* { dg-error {used vector type where scalar is required} } */ -+ -+ gnu_u1 + gnu_s1; -+ gnu_u1 - gnu_s1; -+ gnu_u1 * gnu_s1; -+ gnu_u1 / gnu_s1; -+ gnu_u1 % gnu_s1; -+ gnu_u1 & gnu_s1; -+ gnu_u1 | gnu_s1; -+ gnu_u1 ^ gnu_s1; -+ gnu_u1 == gnu_s1; -+ gnu_u1 != gnu_s1; -+ gnu_u1 <= gnu_s1; -+ gnu_u1 < gnu_s1; -+ gnu_u1 > gnu_s1; -+ gnu_u1 >= gnu_s1; -+ gnu_u1 << gnu_s1; -+ gnu_u1 >> gnu_s1; -+ gnu_u1 && gnu_s1; /* { dg-error {used vector type where scalar is required} } */ -+ gnu_u1 || gnu_s1; /* { dg-error {used vector type where scalar is required} } */ -+ -+ /* Conditional expressions. */ -+ -+ uc ? sve_u1 : sve_u1; -+ uc ? gnu_u1 : sve_u1; /* { dg-error {type mismatch in conditional expression} } */ -+ uc ? sve_u1 : gnu_u1; /* { dg-error {type mismatch in conditional expression} } */ -+ uc ? gnu_u1 : gnu_u1; -+ -+ sve_u1 ? sve_u1 : sve_u1; /* { dg-error {used vector type where scalar is required} } */ -+ sve_u1 ? gnu_u1 : sve_u1; /* { dg-error {used vector type where scalar is required} } */ -+ sve_u1 ? sve_u1 : gnu_u1; /* { dg-error {used vector type where scalar is required} } */ -+ sve_u1 ? gnu_u1 : gnu_u1; /* { dg-error {used vector type where scalar is required} } */ -+ -+ gnu_u1 ? sve_u1 : sve_u1; /* { dg-error {used vector type where scalar is required} } */ -+ gnu_u1 ? gnu_u1 : sve_u1; /* { dg-error {used vector type where scalar is required} } */ -+ gnu_u1 ? sve_u1 : gnu_u1; /* { dg-error {used vector type where scalar is required} } */ -+ gnu_u1 ? gnu_u1 : gnu_u1; /* { dg-error {used vector type where scalar is required} } */ -+ -+ /* Vector built-ins. */ -+ -+ __builtin_shuffle (sve_u1, sve_u1, sve_u1); /* { dg-error {'__builtin_shuffle' last argument must be an integer vector} } */ -+ __builtin_shuffle (sve_u1, gnu_u1, gnu_u1); /* { dg-error {'__builtin_shuffle' arguments must be vectors} } */ -+ __builtin_shuffle (gnu_u1, sve_u1, gnu_u1); /* { dg-error {'__builtin_shuffle' arguments must be vectors} } */ -+ __builtin_shuffle (gnu_u1, gnu_u1, sve_u1); /* { dg-error {'__builtin_shuffle' last argument must be an integer vector} } */ -+ __builtin_shuffle (gnu_u1, gnu_u1, gnu_u1); -+ -+ __builtin_convertvector (sve_u1, svuint8_t); /* { dg-error {'__builtin_convertvector' first argument must be an integer or floating vector} } */ -+ __builtin_convertvector (gnu_u1, svuint8_t); /* { dg-error {'__builtin_convertvector' second argument must be an integer or floating vector type} } */ -+ __builtin_convertvector (sve_u1, gnu_uint8_t); /* { dg-error {'__builtin_convertvector' first argument must be an integer or floating vector} } */ -+ __builtin_convertvector (gnu_u1, gnu_uint8_t); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-1.c -new file mode 100644 -index 000000000..ec892a3fc ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-1.c -@@ -0,0 +1,217 @@ -+/* { dg-options "-std=gnu99" } */ -+ -+#include -+ -+typedef signed char int8x32_t __attribute__((__vector_size__ (32))); -+ -+/* Sizeless objects with global scope. */ -+ -+svint8_t global_sve_sc; /* { dg-error {SVE type 'svint8_t' does not have a fixed size} } */ -+static svint8_t local_sve_sc; /* { dg-error {SVE type 'svint8_t' does not have a fixed size} } */ -+extern svint8_t extern_sve_sc; /* { dg-error {SVE type 'svint8_t' does not have a fixed size} } */ -+__thread svint8_t tls_sve_sc; /* { dg-error {variables of type 'svint8_t' cannot have thread-local storage duration} } */ -+_Atomic svint8_t atomic_sve_sc; /* { dg-error {SVE type 'svint8_t' does not have a fixed size} } */ -+ -+/* Sizeless arrays. */ -+ -+typedef svint8_t array_type[2]; /* { dg-error {array elements cannot have SVE type 'svint8_t'} } */ -+extern svint8_t extern_array[]; /* { dg-error {array elements cannot have SVE type 'svint8_t'} } */ -+ -+/* Sizeless fields. */ -+ -+struct struct1 { -+ svint8_t a; /* { dg-error {fields cannot have SVE type 'svint8_t'} } */ -+}; -+ -+union union1 { -+ svint8_t a; /* { dg-error {fields cannot have SVE type 'svint8_t'} } */ -+}; -+ -+/* Pointers to sizeless types. */ -+ -+svint8_t *global_sve_sc_ptr; -+svint8_t *invalid_sve_sc_ptr = &(svint8_t) { *global_sve_sc_ptr }; /* { dg-error {initializer element is not constant} } */ -+ /* { dg-error {SVE type 'svint8_t' does not have a fixed size} "" { target *-*-* } .-1 } */ -+ -+/* Sizeless arguments and return values. */ -+ -+void ext_consume_sve_sc (svint8_t); -+void ext_consume_varargs (int, ...); -+svint8_t ext_produce_sve_sc (); -+ -+/* Main tests for statements and expressions. */ -+ -+void -+statements (int n) -+{ -+ /* Local declarations. */ -+ -+ unsigned char va __attribute__((__vector_size__(2))); -+ svint8_t sve_sc1, sve_sc2; -+ _Atomic svint8_t atomic_sve_sc; -+ int8x32_t gnu_sc1; -+ svint16_t sve_sh1; -+ static svint8_t local_static_sve_sc; /* { dg-error {SVE type 'svint8_t' does not have a fixed size} } */ -+ -+ /* Layout queries. */ -+ -+ sizeof (svint8_t); /* { dg-error {SVE type 'svint8_t' does not have a fixed size} } */ -+ sizeof (sve_sc1); /* { dg-error {SVE type 'svint8_t' does not have a fixed size} } */ -+ sizeof (ext_produce_sve_sc ()); /* { dg-error {SVE type 'svint8_t' does not have a fixed size} } */ -+ _Alignof (svint8_t); /* { dg-error {SVE type 'svint8_t' does not have a defined alignment} } */ -+ _Alignof (sve_sc1); /* { dg-error {SVE type 'svint8_t' does not have a defined alignment} } */ -+ _Alignof (ext_produce_sve_sc ()); /* { dg-error {SVE type 'svint8_t' does not have a defined alignment} } */ -+ -+ /* Initialization. */ -+ -+ svint8_t init_sve_sc1 = sve_sc1; -+ svint8_t init_sve_sc2 = sve_sh1; /* { dg-error {incompatible types when initializing type 'svint8_t' using type 'svint16_t'} } */ -+ svint8_t init_sve_sc3 = {}; /* { dg-error {empty scalar initializer} } */ -+ -+ int initi_a = sve_sc1; /* { dg-error {incompatible types when initializing type 'int' using type 'svint8_t'} } */ -+ int initi_b = { sve_sc1 }; /* { dg-error {incompatible types when initializing type 'int' using type 'svint8_t'} } */ -+ -+ /* Compound literals. */ -+ -+ (svint8_t) {}; /* { dg-error {empty scalar initializer} } */ -+ (svint8_t) { sve_sc1 }; -+ -+ (int) { sve_sc1 }; /* { dg-error {incompatible types when initializing type 'int' using type 'svint8_t'} } */ -+ -+ /* Arrays. */ -+ -+ svint8_t array[2]; /* { dg-error {array elements cannot have SVE type 'svint8_t'} } */ -+ svint8_t zero_length_array[0]; /* { dg-error {array elements cannot have SVE type 'svint8_t'} } */ -+ svint8_t empty_init_array[] = {}; /* { dg-error {array elements cannot have SVE type 'svint8_t'} } */ -+ /* { dg-error {empty scalar initializer} "" { target *-*-* } .-1 } */ -+ typedef svint8_t vla_type[n]; /* { dg-error {array elements cannot have SVE type 'svint8_t'} } */ -+ -+ /* Assignment. */ -+ -+ n = sve_sc1; /* { dg-error {incompatible types when assigning to type 'int' from type 'svint8_t'} } */ -+ -+ sve_sc1 = 0; /* { dg-error {incompatible types when assigning to type 'svint8_t' from type 'int'} } */ -+ sve_sc1 = sve_sc2; -+ sve_sc1 = sve_sh1; /* { dg-error {incompatible types when assigning to type 'svint8_t' from type 'svint16_t'} } */ -+ -+ /* Casting. */ -+ -+ (void) sve_sc1; -+ (svint8_t) sve_sc1; -+ -+ /* Addressing and dereferencing. */ -+ -+ svint8_t *sve_sc_ptr = &sve_sc1; -+ int8x32_t *gnu_sc_ptr = &gnu_sc1; -+ sve_sc1 = *sve_sc_ptr; -+ -+ /* Pointer assignment. */ -+ -+ gnu_sc_ptr = sve_sc_ptr; /* { dg-warning {assignment to [^\n]* from incompatible pointer type} } */ -+ sve_sc_ptr = gnu_sc_ptr; /* { dg-warning {assignment to [^\n]* from incompatible pointer type} } */ -+ -+ /* Pointer arithmetic. */ -+ -+ ++sve_sc_ptr; /* { dg-error {arithmetic on pointer to SVE type 'svint8_t'} } */ -+ --sve_sc_ptr; /* { dg-error {arithmetic on pointer to SVE type 'svint8_t'} } */ -+ sve_sc_ptr++; /* { dg-error {arithmetic on pointer to SVE type 'svint8_t'} } */ -+ sve_sc_ptr--; /* { dg-error {arithmetic on pointer to SVE type 'svint8_t'} } */ -+ sve_sc_ptr += 0; /* { dg-error {arithmetic on pointer to SVE type 'svint8_t'} } */ -+ sve_sc_ptr += 1; /* { dg-error {arithmetic on pointer to SVE type 'svint8_t'} } */ -+ sve_sc_ptr -= 0; /* { dg-error {arithmetic on pointer to SVE type 'svint8_t'} } */ -+ sve_sc_ptr -= 1; /* { dg-error {arithmetic on pointer to SVE type 'svint8_t'} } */ -+ sve_sc_ptr - sve_sc_ptr; /* { dg-error {arithmetic on pointer to SVE type 'svint8_t'} } */ -+ gnu_sc_ptr - sve_sc_ptr; /* { dg-error {invalid operands to binary -} } */ -+ sve_sc_ptr - gnu_sc_ptr; /* { dg-error {invalid operands to binary -} } */ -+ sve_sc1 = sve_sc_ptr[0]; /* { dg-error {arithmetic on pointer to SVE type 'svint8_t'} } */ -+ sve_sc1 = sve_sc_ptr[1]; /* { dg-error {arithmetic on pointer to SVE type 'svint8_t'} } */ -+ -+ /* Pointer comparison. */ -+ -+ sve_sc_ptr == &sve_sc1; -+ sve_sc_ptr != &sve_sc1; -+ sve_sc_ptr < &sve_sc1; -+ sve_sc_ptr <= &sve_sc1; -+ sve_sc_ptr > &sve_sc1; -+ sve_sc_ptr >= &sve_sc1; -+ gnu_sc_ptr == sve_sc_ptr; /* { dg-warning {comparison of distinct pointer types lacks a cast} } */ -+ gnu_sc_ptr != sve_sc_ptr; /* { dg-warning {comparison of distinct pointer types lacks a cast} } */ -+ gnu_sc_ptr < sve_sc_ptr; /* { dg-warning {comparison of distinct pointer types lacks a cast} } */ -+ gnu_sc_ptr <= sve_sc_ptr; /* { dg-warning {comparison of distinct pointer types lacks a cast} } */ -+ gnu_sc_ptr > sve_sc_ptr; /* { dg-warning {comparison of distinct pointer types lacks a cast} } */ -+ gnu_sc_ptr >= sve_sc_ptr; /* { dg-warning {comparison of distinct pointer types lacks a cast} } */ -+ sve_sc_ptr == gnu_sc_ptr; /* { dg-warning {comparison of distinct pointer types lacks a cast} } */ -+ sve_sc_ptr != gnu_sc_ptr; /* { dg-warning {comparison of distinct pointer types lacks a cast} } */ -+ sve_sc_ptr < gnu_sc_ptr; /* { dg-warning {comparison of distinct pointer types lacks a cast} } */ -+ sve_sc_ptr <= gnu_sc_ptr; /* { dg-warning {comparison of distinct pointer types lacks a cast} } */ -+ sve_sc_ptr > gnu_sc_ptr; /* { dg-warning {comparison of distinct pointer types lacks a cast} } */ -+ sve_sc_ptr >= gnu_sc_ptr; /* { dg-warning {comparison of distinct pointer types lacks a cast} } */ -+ -+ /* Conditional expressions. */ -+ -+ 0 ? sve_sc1 : sve_sc1; -+ 0 ? sve_sc1 : sve_sh1; /* { dg-error {type mismatch in conditional expression} } */ -+ 0 ? sve_sc1 : 0; /* { dg-error {type mismatch in conditional expression} } */ -+ 0 ? 0 : sve_sc1; /* { dg-error {type mismatch in conditional expression} } */ -+ 0 ?: sve_sc1; /* { dg-error {type mismatch in conditional expression} } */ -+ 0 ? sve_sc_ptr : sve_sc_ptr; -+ 0 ? sve_sc_ptr : gnu_sc_ptr; /* { dg-warning {pointer type mismatch in conditional expression} } */ -+ 0 ? gnu_sc_ptr : sve_sc_ptr; /* { dg-warning {pointer type mismatch in conditional expression} } */ -+ -+ /* Generic associations. */ -+ -+ _Generic (sve_sc1, default: 100); -+ _Generic (1, svint8_t: 10, default: 20); -+ -+ /* Function arguments. */ -+ -+ ext_consume_sve_sc (sve_sc1); -+ ext_consume_sve_sc (sve_sh1); /* { dg-error {incompatible type for argument 1 of 'ext_consume_sve_sc'} } */ -+ ext_consume_varargs (sve_sc1); /* { dg-error {incompatible type for argument 1 of 'ext_consume_varargs'} } */ -+ ext_consume_varargs (1, sve_sc1); -+ -+ /* Function returns. */ -+ -+ ext_produce_sve_sc (); -+ sve_sc1 = ext_produce_sve_sc (); -+ sve_sh1 = ext_produce_sve_sc (); /* { dg-error {incompatible types when assigning to type 'svint16_t' from type 'svint8_t'} } */ -+ -+ /* Varargs processing. */ -+ -+ __builtin_va_list valist; -+ __builtin_va_arg (valist, svint8_t); -+ -+ /* Statement expressions. */ -+ -+ ({ sve_sc1; }); -+ ({ svint8_t another_sve_sc = *sve_sc_ptr; another_sve_sc; }); -+} -+ -+/* Function parameters in definitions. */ -+ -+void -+old_style (input_sve_sc) /* { dg-error {SVE type 'svint8_t' cannot be passed to an unprototyped function} } */ -+ svint8_t input_sve_sc; -+{ -+ svint8_t sve_sc1 = input_sve_sc; -+} -+ -+void -+new_style_param (svint8_t input_sve_sc) -+{ -+ svint8_t sve_sc1 = input_sve_sc; -+} -+ -+/* Function return values in definitions. */ -+ -+svint8_t -+good_return_sve_sc (svint8_t param) -+{ -+ return param; -+} -+ -+svint8_t -+bad_return_sve_sc (svint16_t param) -+{ -+ return param; /* { dg-error {incompatible types when returning type 'svint16_t' but 'svint8_t' was expected} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-2.c -new file mode 100644 -index 000000000..717439300 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/sizeless-2.c -@@ -0,0 +1,217 @@ -+/* { dg-options "-std=gnu99 -msve-vector-bits=256" } */ -+ -+#include -+ -+typedef signed char int8x32_t __attribute__((__vector_size__ (32))); -+ -+/* Sizeless objects with global scope. */ -+ -+svint8_t global_sve_sc; /* { dg-error {SVE type 'svint8_t' does not have a fixed size} } */ -+static svint8_t local_sve_sc; /* { dg-error {SVE type 'svint8_t' does not have a fixed size} } */ -+extern svint8_t extern_sve_sc; /* { dg-error {SVE type 'svint8_t' does not have a fixed size} } */ -+__thread svint8_t tls_sve_sc; /* { dg-error {variables of type 'svint8_t' cannot have thread-local storage duration} } */ -+_Atomic svint8_t atomic_sve_sc; /* { dg-error {SVE type 'svint8_t' does not have a fixed size} } */ -+ -+/* Sizeless arrays. */ -+ -+typedef svint8_t array_type[2]; /* { dg-error {array elements cannot have SVE type 'svint8_t'} } */ -+extern svint8_t extern_array[]; /* { dg-error {array elements cannot have SVE type 'svint8_t'} } */ -+ -+/* Sizeless fields. */ -+ -+struct struct1 { -+ svint8_t a; /* { dg-error {fields cannot have SVE type 'svint8_t'} } */ -+}; -+ -+union union1 { -+ svint8_t a; /* { dg-error {fields cannot have SVE type 'svint8_t'} } */ -+}; -+ -+/* Pointers to sizeless types. */ -+ -+svint8_t *global_sve_sc_ptr; -+svint8_t *invalid_sve_sc_ptr = &(svint8_t) { *global_sve_sc_ptr }; /* { dg-error {initializer element is not constant} } */ -+ /* { dg-error {SVE type 'svint8_t' does not have a fixed size} "" { target *-*-* } .-1 } */ -+ -+/* Sizeless arguments and return values. */ -+ -+void ext_consume_sve_sc (svint8_t); -+void ext_consume_varargs (int, ...); -+svint8_t ext_produce_sve_sc (); -+ -+/* Main tests for statements and expressions. */ -+ -+void -+statements (int n) -+{ -+ /* Local declarations. */ -+ -+ unsigned char va __attribute__((__vector_size__(2))); -+ svint8_t sve_sc1, sve_sc2; -+ _Atomic svint8_t atomic_sve_sc; -+ int8x32_t gnu_sc1; -+ svint16_t sve_sh1; -+ static svint8_t local_static_sve_sc; /* { dg-error {SVE type 'svint8_t' does not have a fixed size} } */ -+ -+ /* Layout queries. */ -+ -+ sizeof (svint8_t); /* { dg-error {SVE type 'svint8_t' does not have a fixed size} } */ -+ sizeof (sve_sc1); /* { dg-error {SVE type 'svint8_t' does not have a fixed size} } */ -+ sizeof (ext_produce_sve_sc ()); /* { dg-error {SVE type 'svint8_t' does not have a fixed size} } */ -+ _Alignof (svint8_t); /* { dg-error {SVE type 'svint8_t' does not have a defined alignment} } */ -+ _Alignof (sve_sc1); /* { dg-error {SVE type 'svint8_t' does not have a defined alignment} } */ -+ _Alignof (ext_produce_sve_sc ()); /* { dg-error {SVE type 'svint8_t' does not have a defined alignment} } */ -+ -+ /* Initialization. */ -+ -+ svint8_t init_sve_sc1 = sve_sc1; -+ svint8_t init_sve_sc2 = sve_sh1; /* { dg-error {incompatible types when initializing type 'svint8_t' using type 'svint16_t'} } */ -+ svint8_t init_sve_sc3 = {}; /* { dg-error {empty scalar initializer} } */ -+ -+ int initi_a = sve_sc1; /* { dg-error {incompatible types when initializing type 'int' using type 'svint8_t'} } */ -+ int initi_b = { sve_sc1 }; /* { dg-error {incompatible types when initializing type 'int' using type 'svint8_t'} } */ -+ -+ /* Compound literals. */ -+ -+ (svint8_t) {}; /* { dg-error {empty scalar initializer} } */ -+ (svint8_t) { sve_sc1 }; -+ -+ (int) { sve_sc1 }; /* { dg-error {incompatible types when initializing type 'int' using type 'svint8_t'} } */ -+ -+ /* Arrays. */ -+ -+ svint8_t array[2]; /* { dg-error {array elements cannot have SVE type 'svint8_t'} } */ -+ svint8_t zero_length_array[0]; /* { dg-error {array elements cannot have SVE type 'svint8_t'} } */ -+ svint8_t empty_init_array[] = {}; /* { dg-error {array elements cannot have SVE type 'svint8_t'} } */ -+ /* { dg-error {empty scalar initializer} "" { target *-*-* } .-1 } */ -+ typedef svint8_t vla_type[n]; /* { dg-error {array elements cannot have SVE type 'svint8_t'} } */ -+ -+ /* Assignment. */ -+ -+ n = sve_sc1; /* { dg-error {incompatible types when assigning to type 'int' from type 'svint8_t'} } */ -+ -+ sve_sc1 = 0; /* { dg-error {incompatible types when assigning to type 'svint8_t' from type 'int'} } */ -+ sve_sc1 = sve_sc2; -+ sve_sc1 = sve_sh1; /* { dg-error {incompatible types when assigning to type 'svint8_t' from type 'svint16_t'} } */ -+ -+ /* Casting. */ -+ -+ (void) sve_sc1; -+ (svint8_t) sve_sc1; -+ -+ /* Addressing and dereferencing. */ -+ -+ svint8_t *sve_sc_ptr = &sve_sc1; -+ int8x32_t *gnu_sc_ptr = &gnu_sc1; -+ sve_sc1 = *sve_sc_ptr; -+ -+ /* Pointer assignment. */ -+ -+ gnu_sc_ptr = sve_sc_ptr; -+ sve_sc_ptr = gnu_sc_ptr; -+ -+ /* Pointer arithmetic. */ -+ -+ ++sve_sc_ptr; /* { dg-error {arithmetic on pointer to SVE type 'svint8_t'} } */ -+ --sve_sc_ptr; /* { dg-error {arithmetic on pointer to SVE type 'svint8_t'} } */ -+ sve_sc_ptr++; /* { dg-error {arithmetic on pointer to SVE type 'svint8_t'} } */ -+ sve_sc_ptr--; /* { dg-error {arithmetic on pointer to SVE type 'svint8_t'} } */ -+ sve_sc_ptr += 0; /* { dg-error {arithmetic on pointer to SVE type 'svint8_t'} } */ -+ sve_sc_ptr += 1; /* { dg-error {arithmetic on pointer to SVE type 'svint8_t'} } */ -+ sve_sc_ptr -= 0; /* { dg-error {arithmetic on pointer to SVE type 'svint8_t'} } */ -+ sve_sc_ptr -= 1; /* { dg-error {arithmetic on pointer to SVE type 'svint8_t'} } */ -+ sve_sc_ptr - sve_sc_ptr; /* { dg-error {arithmetic on pointer to SVE type 'svint8_t'} } */ -+ gnu_sc_ptr - sve_sc_ptr; /* { dg-error {arithmetic on pointer to SVE type 'svint8_t'} } */ -+ sve_sc_ptr - gnu_sc_ptr; /* { dg-error {arithmetic on pointer to SVE type 'svint8_t'} } */ -+ sve_sc1 = sve_sc_ptr[0]; /* { dg-error {arithmetic on pointer to SVE type 'svint8_t'} } */ -+ sve_sc1 = sve_sc_ptr[1]; /* { dg-error {arithmetic on pointer to SVE type 'svint8_t'} } */ -+ -+ /* Pointer comparison. */ -+ -+ sve_sc_ptr == &sve_sc1; -+ sve_sc_ptr != &sve_sc1; -+ sve_sc_ptr < &sve_sc1; -+ sve_sc_ptr <= &sve_sc1; -+ sve_sc_ptr > &sve_sc1; -+ sve_sc_ptr >= &sve_sc1; -+ gnu_sc_ptr == sve_sc_ptr; -+ gnu_sc_ptr != sve_sc_ptr; -+ gnu_sc_ptr < sve_sc_ptr; -+ gnu_sc_ptr <= sve_sc_ptr; -+ gnu_sc_ptr > sve_sc_ptr; -+ gnu_sc_ptr >= sve_sc_ptr; -+ sve_sc_ptr == gnu_sc_ptr; -+ sve_sc_ptr != gnu_sc_ptr; -+ sve_sc_ptr < gnu_sc_ptr; -+ sve_sc_ptr <= gnu_sc_ptr; -+ sve_sc_ptr > gnu_sc_ptr; -+ sve_sc_ptr >= gnu_sc_ptr; -+ -+ /* Conditional expressions. */ -+ -+ 0 ? sve_sc1 : sve_sc1; -+ 0 ? sve_sc1 : sve_sh1; /* { dg-error {type mismatch in conditional expression} } */ -+ 0 ? sve_sc1 : 0; /* { dg-error {type mismatch in conditional expression} } */ -+ 0 ? 0 : sve_sc1; /* { dg-error {type mismatch in conditional expression} } */ -+ 0 ?: sve_sc1; /* { dg-error {type mismatch in conditional expression} } */ -+ 0 ? sve_sc_ptr : sve_sc_ptr; -+ 0 ? sve_sc_ptr : gnu_sc_ptr; -+ 0 ? gnu_sc_ptr : sve_sc_ptr; -+ -+ /* Generic associations. */ -+ -+ _Generic (sve_sc1, default: 100); -+ _Generic (1, svint8_t: 10, default: 20); -+ -+ /* Function arguments. */ -+ -+ ext_consume_sve_sc (sve_sc1); -+ ext_consume_sve_sc (sve_sh1); /* { dg-error {incompatible type for argument 1 of 'ext_consume_sve_sc'} } */ -+ ext_consume_varargs (sve_sc1); /* { dg-error {incompatible type for argument 1 of 'ext_consume_varargs'} } */ -+ ext_consume_varargs (1, sve_sc1); -+ -+ /* Function returns. */ -+ -+ ext_produce_sve_sc (); -+ sve_sc1 = ext_produce_sve_sc (); -+ sve_sh1 = ext_produce_sve_sc (); /* { dg-error {incompatible types when assigning to type 'svint16_t' from type 'svint8_t'} } */ -+ -+ /* Varargs processing. */ -+ -+ __builtin_va_list valist; -+ __builtin_va_arg (valist, svint8_t); -+ -+ /* Statement expressions. */ -+ -+ ({ sve_sc1; }); -+ ({ svint8_t another_sve_sc = *sve_sc_ptr; another_sve_sc; }); -+} -+ -+/* Function parameters in definitions. */ -+ -+void -+old_style (input_sve_sc) /* { dg-error {SVE type 'svint8_t' cannot be passed to an unprototyped function} } */ -+ svint8_t input_sve_sc; -+{ -+ svint8_t sve_sc1 = input_sve_sc; -+} -+ -+void -+new_style_param (svint8_t input_sve_sc) -+{ -+ svint8_t sve_sc1 = input_sve_sc; -+} -+ -+/* Function return values in definitions. */ -+ -+svint8_t -+good_return_sve_sc (svint8_t param) -+{ -+ return param; -+} -+ -+svint8_t -+bad_return_sve_sc (svint16_t param) -+{ -+ return param; /* { dg-error {incompatible types when returning type 'svint16_t' but 'svint8_t' was expected} } */ -+} -diff --git a/gcc/tree-core.h b/gcc/tree-core.h -index 26b6f46ad..fca4abf2a 100644 ---- a/gcc/tree-core.h -+++ b/gcc/tree-core.h -@@ -1585,7 +1585,8 @@ struct GTY(()) tree_type_common { - unsigned warn_if_not_align : 6; - unsigned typeless_storage : 1; - unsigned empty_flag : 1; -- unsigned spare : 17; -+ unsigned indivisible_p : 1; -+ unsigned spare : 16; - - alias_set_type alias_set; - tree pointer_to; -diff --git a/gcc/tree.c b/gcc/tree.c -index 62607c63a..33e8dca2a 100644 ---- a/gcc/tree.c -+++ b/gcc/tree.c -@@ -15146,6 +15146,21 @@ max_object_size (void) - return TYPE_MAX_VALUE (ptrdiff_type_node); - } - -+/* A wrapper around TARGET_VERIFY_TYPE_CONTEXT that makes the silent_p -+ parameter default to false and that weeds out error_mark_node. */ -+ -+bool -+verify_type_context (location_t loc, type_context_kind context, -+ const_tree type, bool silent_p) -+{ -+ if (type == error_mark_node) -+ return true; -+ -+ gcc_assert (TYPE_P (type)); -+ return (!targetm.verify_type_context -+ || targetm.verify_type_context (loc, context, type, silent_p)); -+} -+ - #if CHECKING_P - - namespace selftest { -diff --git a/gcc/tree.h b/gcc/tree.h -index 356a9f544..97d18fc2b 100644 ---- a/gcc/tree.h -+++ b/gcc/tree.h -@@ -704,6 +704,11 @@ extern void omp_clause_range_check_failed (const_tree, const char *, int, - /* Used to indicate that this TYPE represents a compiler-generated entity. */ - #define TYPE_ARTIFICIAL(NODE) (TYPE_CHECK (NODE)->base.nowarning_flag) - -+/* True if the type is indivisible at the source level, i.e. if its -+ component parts cannot be accessed directly. This is used to suppress -+ normal GNU extensions for target-specific vector types. */ -+#define TYPE_INDIVISIBLE_P(NODE) (TYPE_CHECK (NODE)->type_common.indivisible_p) -+ - /* In an IDENTIFIER_NODE, this means that assemble_name was called with - this string as an argument. */ - #define TREE_SYMBOL_REFERENCED(NODE) \ diff --git a/add-check-for-pressure-in-sche1.patch b/add-check-for-pressure-in-sche1.patch deleted file mode 100644 index b57a6853379c1f7adc6177ebbaa29ab1d23150b3..0000000000000000000000000000000000000000 --- a/add-check-for-pressure-in-sche1.patch +++ /dev/null @@ -1,52 +0,0 @@ ---- a/gcc/haifa-sched.c 2021-03-08 14:46:59.204000000 +0800 -+++ b/gcc/haifa-sched.c 2021-03-09 13:32:40.656000000 +0800 -@@ -2036,8 +2036,10 @@ model_start_update_pressure (struct mode - /* The instruction wasn't part of the model schedule; it was moved - from a different block. Update the pressure for the end of - the model schedule. */ -- MODEL_REF_PRESSURE (group, point, pci) += delta; -- MODEL_MAX_PRESSURE (group, point, pci) += delta; -+ if (MODEL_REF_PRESSURE (group, point, pci) != -1 || delta > 0) -+ MODEL_REF_PRESSURE (group, point, pci) += delta; -+ if (MODEL_MAX_PRESSURE (group, point, pci) != -1 || delta > 0) -+ MODEL_MAX_PRESSURE (group, point, pci) += delta; - } - else - { -diff -uprN a/gcc/testsuite/gcc.dg/sche1-pressure-check.c b/gcc/testsuite/gcc.dg/sche1-pressure-check.c ---- a/gcc/testsuite/gcc.dg/sche1-pressure-check.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/sche1-pressure-check.c 2021-03-09 13:40:34.036000000 +0800 -@@ -0,0 +1,33 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O3" } */ -+ -+int a, g, h; -+char b, c; -+short d; -+static int e; -+int *volatile f; -+void i() { -+ int j = 0; -+ int *k = &a; -+ for (; c; c--) { -+ g && (d = 0); -+ j ^= 10; -+ { -+ int l[2]; -+ l; -+ h = l[1]; -+ } -+ e = 1; -+ for (; e <= 7; e++) { -+ *k = 6; -+ *f = b = 0; -+ for (; b <= 7; b++) { -+ int m = 5; -+ if (g) -+ *k &= m ^= j; -+ } -+ } -+ } -+} -+int main() {} -+ diff --git a/add-checks-to-avoid-spoiling-if-conversion.patch b/add-checks-to-avoid-spoiling-if-conversion.patch deleted file mode 100644 index 34d7505995598c5e9f9b09ed6bf0333ccd170c82..0000000000000000000000000000000000000000 --- a/add-checks-to-avoid-spoiling-if-conversion.patch +++ /dev/null @@ -1,86 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-tree-optimization-95855-Add-checks-to-avoid-spoiling.patch -33d114f570b4a3583421c700396fd5945acebc28 - -diff -uprN a/gcc/gimple-ssa-split-paths.c b/gcc/gimple-ssa-split-paths.c ---- a/gcc/gimple-ssa-split-paths.c -+++ b/gcc/gimple-ssa-split-paths.c -@@ -34,6 +34,7 @@ along with GCC; see the file COPYING3. If not see - #include "gimple-ssa.h" - #include "tree-phinodes.h" - #include "ssa-iterators.h" -+#include "fold-const.h" - - /* Given LATCH, the latch block in a loop, see if the shape of the - path reaching LATCH is suitable for being split by duplication. -@@ -254,6 +255,44 @@ is_feasible_trace (basic_block bb) - } - } - -+ /* Canonicalize the form. */ -+ if (single_pred_p (pred1) && single_pred (pred1) == pred2 -+ && num_stmts_in_pred1 == 0) -+ std::swap (pred1, pred2); -+ -+ /* This is meant to catch another kind of cases that are likely opportunities -+ for if-conversion. After canonicalizing, PRED2 must be an empty block and -+ PRED1 must be the only predecessor of PRED2. Moreover, PRED1 is supposed -+ to end with a cond_stmt which has the same args with the PHI in BB. */ -+ if (single_pred_p (pred2) && single_pred (pred2) == pred1 -+ && num_stmts_in_pred2 == 0) -+ { -+ gimple *cond_stmt = last_stmt (pred1); -+ if (cond_stmt && gimple_code (cond_stmt) == GIMPLE_COND) -+ { -+ tree lhs = gimple_cond_lhs (cond_stmt); -+ tree rhs = gimple_cond_rhs (cond_stmt); -+ -+ gimple_stmt_iterator gsi; -+ for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) -+ { -+ gimple *phi = gsi_stmt (gsi); -+ if ((operand_equal_p (gimple_phi_arg_def (phi, 0), lhs) -+ && operand_equal_p (gimple_phi_arg_def (phi, 1), rhs)) -+ || (operand_equal_p (gimple_phi_arg_def (phi, 0), rhs) -+ && (operand_equal_p (gimple_phi_arg_def (phi, 1), lhs)))) -+ { -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ fprintf (dump_file, -+ "Block %d appears to be optimized to a join " -+ "point for if-convertable half-diamond.\n", -+ bb->index); -+ return false; -+ } -+ } -+ } -+ } -+ - /* If the joiner has no PHIs with useful uses there is zero chance - of CSE/DCE/jump-threading possibilities exposed by duplicating it. */ - bool found_useful_phi = false; -diff -uprN a/gcc/testsuite/gcc.dg/tree-ssa/split-path-12.c b/gcc/testsuite/gcc.dg/tree-ssa/split-path-12.c -new file mode 100644 ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/tree-ssa/split-path-12.c -@@ -0,0 +1,19 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -fsplit-paths -fdump-tree-split-paths-details " } */ -+ -+double -+foo(double *d1, double *d2, double *d3, int num, double *ip) -+{ -+ double dmax[3]; -+ -+ for (int i = 0; i < num; i++) { -+ dmax[0] = d1[i] < dmax[0] ? dmax[0] : d1[i]; -+ dmax[1] = d2[i] < dmax[1] ? dmax[1] : d2[i]; -+ dmax[2] = d3[i] < dmax[2] ? dmax[2] : d3[i]; -+ ip[i] = dmax[2]; -+ } -+ -+ return dmax[0] + dmax[1] + dmax[2]; -+} -+ -+/* { dg-final { scan-tree-dump "appears to be optimized to a join point for if-convertable half-diamond" "split-paths" } } */ diff --git a/add-fp-model-options.patch b/add-fp-model-options.patch deleted file mode 100644 index 8d23b990fd41cbfa999955b4a7928faeca9e4172..0000000000000000000000000000000000000000 --- a/add-fp-model-options.patch +++ /dev/null @@ -1,376 +0,0 @@ -diff -Nurp a/gcc/common.opt b/gcc/common.opt ---- a/gcc/common.opt 2021-02-18 21:22:07.216000000 +0800 -+++ b/gcc/common.opt 2021-02-19 16:04:17.876000000 +0800 -@@ -1506,6 +1506,32 @@ ffp-int-builtin-inexact - Common Report Var(flag_fp_int_builtin_inexact) Init(1) Optimization - Allow built-in functions ceil, floor, round, trunc to raise \"inexact\" exceptions. - -+fftz -+Common Report Var(flag_ftz) Optimization -+Control fpcr register for flush to zero. -+ -+fp-model= -+Common Joined RejectNegative Enum(fp_model) Var(flag_fp_model) Init(FP_MODEL_NORMAL) Optimization -+-fp-model=[normal|fast|precise|except|strict] Perform floating-point precision control. -+ -+Enum -+Name(fp_model) Type(enum fp_model) UnknownError(unknown floating point precision model %qs) -+ -+EnumValue -+Enum(fp_model) String(normal) Value(FP_MODEL_NORMAL) -+ -+EnumValue -+Enum(fp_model) String(fast) Value(FP_MODEL_FAST) -+ -+EnumValue -+Enum(fp_model) String(precise) Value(FP_MODEL_PRECISE) -+ -+EnumValue -+Enum(fp_model) String(except) Value(FP_MODEL_EXCEPT) -+ -+EnumValue -+Enum(fp_model) String(strict) Value(FP_MODEL_STRICT) -+ - ; Nonzero means don't put addresses of constant functions in registers. - ; Used for compiling the Unix kernel, where strange substitutions are - ; done on the assembly output. -diff -Nurp a/gcc/config/aarch64/aarch64-linux.h b/gcc/config/aarch64/aarch64-linux.h ---- a/gcc/config/aarch64/aarch64-linux.h 2021-02-18 21:22:07.220000000 +0800 -+++ b/gcc/config/aarch64/aarch64-linux.h 2021-02-18 21:23:55.932000000 +0800 -@@ -50,7 +50,8 @@ - #define LINK_SPEC LINUX_TARGET_LINK_SPEC AARCH64_ERRATA_LINK_SPEC - - #define GNU_USER_TARGET_MATHFILE_SPEC \ -- "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}" -+ "%{Ofast|ffast-math|funsafe-math-optimizations|fp-model=fast|fftz:\ -+ %{!fno-ftz:crtfastmath.o%s}}" - - #undef ENDFILE_SPEC - #define ENDFILE_SPEC \ -diff -Nurp a/gcc/flag-types.h b/gcc/flag-types.h ---- a/gcc/flag-types.h 2020-03-12 19:07:21.000000000 +0800 -+++ b/gcc/flag-types.h 2021-02-18 21:23:55.932000000 +0800 -@@ -207,6 +207,15 @@ enum fp_contract_mode { - FP_CONTRACT_FAST = 2 - }; - -+/* Floating-point precision mode. */ -+enum fp_model { -+ FP_MODEL_NORMAL = 0, -+ FP_MODEL_FAST = 1, -+ FP_MODEL_PRECISE = 2, -+ FP_MODEL_EXCEPT = 3, -+ FP_MODEL_STRICT = 4 -+}; -+ - /* Scalar storage order kind. */ - enum scalar_storage_order_kind { - SSO_NATIVE = 0, -diff -Nurp a/gcc/fortran/options.c b/gcc/fortran/options.c ---- a/gcc/fortran/options.c 2020-03-12 19:07:21.000000000 +0800 -+++ b/gcc/fortran/options.c 2021-02-18 21:23:55.932000000 +0800 -@@ -247,6 +247,7 @@ form_from_filename (const char *filename - return f_form; - } - -+static void gfc_handle_fpe_option (const char *arg, bool trap); - - /* Finalize commandline options. */ - -@@ -274,6 +275,13 @@ gfc_post_options (const char **pfilename - if (flag_protect_parens == -1) - flag_protect_parens = !optimize_fast; - -+ /* If fp-model=precise/strict, turn on all ffpe-trap and ffpe-summary. */ -+ if (flag_fp_model == FP_MODEL_EXCEPT || flag_fp_model == FP_MODEL_STRICT) -+ { -+ gfc_handle_fpe_option ("all", false); -+ gfc_handle_fpe_option ("invalid,zero,overflow,underflow", true); -+ } -+ - /* -Ofast sets implies -fstack-arrays unless an explicit size is set for - stack arrays. */ - if (flag_stack_arrays == -1 && flag_max_stack_var_size == -2) -diff -Nurp a/gcc/opts.c b/gcc/opts.c ---- a/gcc/opts.c 2021-02-18 21:22:07.424000000 +0800 -+++ b/gcc/opts.c 2021-02-19 16:00:08.628000000 +0800 -@@ -196,6 +196,7 @@ static void set_debug_level (enum debug_ - struct gcc_options *opts_set, - location_t loc); - static void set_fast_math_flags (struct gcc_options *opts, int set); -+static void set_fp_model_flags (struct gcc_options *opts, int set); - static void decode_d_option (const char *arg, struct gcc_options *opts, - location_t loc, diagnostic_context *dc); - static void set_unsafe_math_optimizations_flags (struct gcc_options *opts, -@@ -2433,6 +2434,10 @@ common_handle_option (struct gcc_options - set_fast_math_flags (opts, value); - break; - -+ case OPT_fp_model_: -+ set_fp_model_flags (opts, value); -+ break; -+ - case OPT_funsafe_math_optimizations: - set_unsafe_math_optimizations_flags (opts, value); - break; -@@ -2905,6 +2910,69 @@ set_fast_math_flags (struct gcc_options - } - } - -+/* Handle fp-model options. */ -+static void -+set_fp_model_flags (struct gcc_options *opts, int set) -+{ -+ enum fp_model model = (enum fp_model) set; -+ switch (model) -+ { -+ case FP_MODEL_FAST: -+ /* Equivalent to open ffast-math. */ -+ set_fast_math_flags (opts, 1); -+ break; -+ -+ case FP_MODEL_PRECISE: -+ /* Equivalent to close ffast-math. */ -+ set_fast_math_flags (opts, 0); -+ /* Turn on -frounding-math -fsignaling-nans. */ -+ if (!opts->frontend_set_flag_signaling_nans) -+ opts->x_flag_signaling_nans = 1; -+ if (!opts->frontend_set_flag_rounding_math) -+ opts->x_flag_rounding_math = 1; -+ opts->x_flag_expensive_optimizations = 0; -+ opts->x_flag_code_hoisting = 0; -+ opts->x_flag_predictive_commoning = 0; -+ opts->x_flag_fp_contract_mode = FP_CONTRACT_OFF; -+ break; -+ -+ case FP_MODEL_EXCEPT: -+ if (!opts->frontend_set_flag_signaling_nans) -+ opts->x_flag_signaling_nans = 1; -+ if (!opts->frontend_set_flag_errno_math) -+ opts->x_flag_errno_math = 1; -+ if (!opts->frontend_set_flag_trapping_math) -+ opts->x_flag_trapping_math = 1; -+ opts->x_flag_fp_int_builtin_inexact = 1; -+ /* Also turn on ffpe-trap in fortran. */ -+ break; -+ -+ case FP_MODEL_STRICT: -+ /* Turn on both precise and except. */ -+ if (!opts->frontend_set_flag_signaling_nans) -+ opts->x_flag_signaling_nans = 1; -+ if (!opts->frontend_set_flag_rounding_math) -+ opts->x_flag_rounding_math = 1; -+ opts->x_flag_expensive_optimizations = 0; -+ opts->x_flag_code_hoisting = 0; -+ opts->x_flag_predictive_commoning = 0; -+ if (!opts->frontend_set_flag_errno_math) -+ opts->x_flag_errno_math = 1; -+ if (!opts->frontend_set_flag_trapping_math) -+ opts->x_flag_trapping_math = 1; -+ opts->x_flag_fp_int_builtin_inexact = 1; -+ opts->x_flag_fp_contract_mode = FP_CONTRACT_OFF; -+ break; -+ -+ case FP_MODEL_NORMAL: -+ /* Do nothing. */ -+ break; -+ -+ default: -+ gcc_unreachable (); -+ } -+} -+ - /* When -funsafe-math-optimizations is set the following - flags are set as well. */ - static void -diff -Nurp a/gcc/opts-common.c b/gcc/opts-common.c ---- a/gcc/opts-common.c 2020-03-12 19:07:21.000000000 +0800 -+++ b/gcc/opts-common.c 2021-02-19 09:49:18.880000000 +0800 -@@ -26,7 +26,8 @@ along with GCC; see the file COPYING3. - #include "diagnostic.h" - #include "spellcheck.h" - --static void prune_options (struct cl_decoded_option **, unsigned int *); -+static void prune_options (struct cl_decoded_option **, unsigned int *, -+ unsigned int); - - /* An option that is undocumented, that takes a joined argument, and - that doesn't fit any of the classes of uses (language/common, -@@ -968,7 +969,7 @@ decode_cmdline_options_to_array (unsigne - - *decoded_options = opt_array; - *decoded_options_count = num_decoded_options; -- prune_options (decoded_options, decoded_options_count); -+ prune_options (decoded_options, decoded_options_count, lang_mask); - } - - /* Return true if NEXT_OPT_IDX cancels OPT_IDX. Return false if the -@@ -989,11 +990,108 @@ cancel_option (int opt_idx, int next_opt - return false; - } - -+/* Check whether opt_idx exists in decoded_options array bewteen index -+ start and end. If found, return its index in decoded_options, -+ else return end. */ -+static unsigned int -+find_opt_idx (struct cl_decoded_option *decoded_options, -+ unsigned int decoded_options_count, -+ unsigned int start, unsigned int end, unsigned int opt_idx) -+{ -+ gcc_assert (end <= decoded_options_count); -+ gcc_assert (opt_idx < cl_options_count); -+ unsigned int k; -+ for (k = start; k < end; k++) -+ { -+ if (decoded_options[k].opt_index == opt_idx) -+ { -+ return k; -+ } -+ } -+ return k; -+} -+ -+/* remove the opt_index element from decoded_options array. */ -+static unsigned int -+remove_option (struct cl_decoded_option *decoded_options, -+ unsigned int decoded_options_count, -+ unsigned int opt_index) -+{ -+ gcc_assert (opt_index < decoded_options_count); -+ unsigned int i; -+ for (i = opt_index; i < decoded_options_count - 1; i++) -+ { -+ decoded_options[i] = decoded_options[i + 1]; -+ } -+ return decoded_options_count - 1; -+} -+ -+/* Handle the priority between fp-model, Ofast, and -+ ffast-math. */ -+static unsigned int -+handle_fp_model_driver (struct cl_decoded_option *decoded_options, -+ unsigned int decoded_options_count, -+ unsigned int fp_model_index, -+ unsigned int lang_mask) -+{ -+ struct cl_decoded_option fp_model_opt = decoded_options[fp_model_index]; -+ enum fp_model model = (enum fp_model) fp_model_opt.value; -+ if (model == FP_MODEL_PRECISE || model == FP_MODEL_STRICT) -+ { -+ /* If found Ofast, override Ofast with O3. */ -+ unsigned int Ofast_index; -+ Ofast_index = find_opt_idx (decoded_options, decoded_options_count, -+ 0, decoded_options_count, OPT_Ofast); -+ while (Ofast_index != decoded_options_count) -+ { -+ const char *tmp_argv = "-O3"; -+ decode_cmdline_option (&tmp_argv, lang_mask, -+ &decoded_options[Ofast_index]); -+ warning (0, "'-Ofast' is degraded to '-O3' due to %qs", -+ fp_model_opt.orig_option_with_args_text); -+ Ofast_index = find_opt_idx (decoded_options, decoded_options_count, -+ 0, decoded_options_count, OPT_Ofast); -+ } -+ /* If found ffast-math before fp-model=precise/strict -+ it, cancel it. */ -+ unsigned int ffast_math_index; -+ ffast_math_index -+ = find_opt_idx (decoded_options, decoded_options_count, 0, -+ fp_model_index, OPT_ffast_math); -+ if (ffast_math_index != fp_model_index) -+ { -+ decoded_options_count -+ = remove_option (decoded_options, decoded_options_count, -+ ffast_math_index); -+ warning (0, "'-ffast-math' before %qs is canceled", -+ fp_model_opt.orig_option_with_args_text); -+ } -+ } -+ if (model == FP_MODEL_FAST) -+ { -+ /* If found -fno-fast-math after fp-model=fast, cancel this one. */ -+ unsigned int fno_fast_math_index; -+ fno_fast_math_index -+ = find_opt_idx (decoded_options, decoded_options_count, fp_model_index, -+ decoded_options_count, OPT_ffast_math); -+ if (fno_fast_math_index != decoded_options_count -+ && decoded_options[fno_fast_math_index].value == 0) -+ { -+ decoded_options_count -+ = remove_option (decoded_options, decoded_options_count, -+ fp_model_index); -+ warning (0, "'-fp-model=fast' before '-fno-fast-math' is canceled"); -+ } -+ } -+ return decoded_options_count; -+} -+ - /* Filter out options canceled by the ones after them. */ - - static void - prune_options (struct cl_decoded_option **decoded_options, -- unsigned int *decoded_options_count) -+ unsigned int *decoded_options_count, -+ unsigned int lang_mask) - { - unsigned int old_decoded_options_count = *decoded_options_count; - struct cl_decoded_option *old_decoded_options = *decoded_options; -@@ -1005,6 +1103,8 @@ prune_options (struct cl_decoded_option - unsigned int fdiagnostics_color_idx = 0; - - /* Remove arguments which are negated by others after them. */ -+ -+ unsigned int fp_model_index = old_decoded_options_count; - new_decoded_options_count = 0; - for (i = 0; i < old_decoded_options_count; i++) - { -@@ -1028,6 +1128,34 @@ prune_options (struct cl_decoded_option - fdiagnostics_color_idx = i; - continue; - -+ case OPT_fp_model_: -+ /* Only the last fp-model option will take effect. */ -+ unsigned int next_fp_model_idx; -+ next_fp_model_idx = find_opt_idx (old_decoded_options, -+ old_decoded_options_count, -+ i + 1, -+ old_decoded_options_count, -+ OPT_fp_model_); -+ if (next_fp_model_idx != old_decoded_options_count) -+ { -+ /* Found more than one fp-model, cancel this one. */ -+ if (old_decoded_options[i].value -+ != old_decoded_options[next_fp_model_idx].value) -+ { -+ warning (0, "%qs is overrided by %qs", -+ old_decoded_options[i]. -+ orig_option_with_args_text, -+ old_decoded_options[next_fp_model_idx]. -+ orig_option_with_args_text); -+ } -+ break; -+ } -+ else -+ { -+ /* Found the last fp-model option. */ -+ fp_model_index = new_decoded_options_count; -+ } -+ /* FALLTHRU. */ - default: - gcc_assert (opt_idx < cl_options_count); - option = &cl_options[opt_idx]; -@@ -1067,6 +1195,14 @@ keep: - break; - } - } -+ if (fp_model_index < new_decoded_options_count) -+ { -+ new_decoded_options_count -+ = handle_fp_model_driver (new_decoded_options, -+ new_decoded_options_count, -+ fp_model_index, -+ lang_mask); -+ } - - if (fdiagnostics_color_idx >= 1) - { diff --git a/add-option-fallow-store-data-races.patch b/add-option-fallow-store-data-races.patch deleted file mode 100644 index 8ecb581036e616c0685ff8aac64d1e226da579e1..0000000000000000000000000000000000000000 --- a/add-option-fallow-store-data-races.patch +++ /dev/null @@ -1,298 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-re-PR-middle-end-92046-Command-line-options-that-are.patch -e622a32db78300821fc1327637ec6413febc2c66 - -diff -uprN a/gcc/common.opt b/gcc/common.opt ---- a/gcc/common.opt 2020-05-28 16:12:58.815511599 +0800 -+++ b/gcc/common.opt 2020-05-28 15:54:33.797511589 +0800 -@@ -993,6 +993,10 @@ Align the start of loops. - falign-loops= - Common RejectNegative Joined Var(str_align_loops) Optimization - -+fallow-store-data-races -+Common Report Var(flag_store_data_races) Optimization -+Allow the compiler to introduce new data races on stores. -+ - fargument-alias - Common Ignore - Does nothing. Preserved for backward compatibility. -diff -uprN a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi ---- a/gcc/doc/invoke.texi 2020-05-28 16:12:56.875511599 +0800 -+++ b/gcc/doc/invoke.texi 2020-05-28 15:54:33.757511589 +0800 -@@ -400,6 +400,7 @@ Objective-C and Objective-C++ Dialects}. - -falign-jumps[=@var{n}[:@var{m}:[@var{n2}[:@var{m2}]]]] @gol - -falign-labels[=@var{n}[:@var{m}:[@var{n2}[:@var{m2}]]]] @gol - -falign-loops[=@var{n}[:@var{m}:[@var{n2}[:@var{m2}]]]] @gol -+-fallow-store-data-races @gol - -fassociative-math -fauto-profile -fauto-profile[=@var{path}] @gol - -fauto-inc-dec -fbranch-probabilities @gol - -fbranch-target-load-optimize -fbranch-target-load-optimize2 @gol -@@ -8365,9 +8366,9 @@ designed to reduce code size. - Disregard strict standards compliance. @option{-Ofast} enables all - @option{-O3} optimizations. It also enables optimizations that are not - valid for all standard-compliant programs. --It turns on @option{-ffast-math} and the Fortran-specific --@option{-fstack-arrays}, unless @option{-fmax-stack-var-size} is --specified, and @option{-fno-protect-parens}. -+It turns on @option{-ffast-math}, @option{-fallow-store-data-races} -+and the Fortran-specific @option{-fstack-arrays}, unless -+@option{-fmax-stack-var-size} is specified, and @option{-fno-protect-parens}. - - @item -Og - @opindex Og -@@ -10120,6 +10121,12 @@ The maximum allowed @var{n} option value - - Enabled at levels @option{-O2}, @option{-O3}. - -+@item -fallow-store-data-races -+@opindex fallow-store-data-races -+Allow the compiler to introduce new data races on stores. -+ -+Enabled at level @option{-Ofast}. -+ - @item -funit-at-a-time - @opindex funit-at-a-time - This option is left for compatibility reasons. @option{-funit-at-a-time} -@@ -11902,10 +11909,6 @@ The maximum number of conditional store - if either vectorization (@option{-ftree-vectorize}) or if-conversion - (@option{-ftree-loop-if-convert}) is disabled. - --@item allow-store-data-races --Allow optimizers to introduce new data races on stores. --Set to 1 to allow, otherwise to 0. -- - @item case-values-threshold - The smallest number of different values for which it is best to use a - jump-table instead of a tree of conditional branches. If the value is -diff -uprN a/gcc/opts.c b/gcc/opts.c ---- a/gcc/opts.c 2020-05-28 16:12:58.847511599 +0800 -+++ b/gcc/opts.c 2020-05-28 15:54:35.713511589 +0800 -@@ -560,6 +560,7 @@ static const struct default_options defa - - /* -Ofast adds optimizations to -O3. */ - { OPT_LEVELS_FAST, OPT_ffast_math, NULL, 1 }, -+ { OPT_LEVELS_FAST, OPT_fallow_store_data_races, NULL, 1 }, - - { OPT_LEVELS_NONE, 0, NULL, 0 } - }; -@@ -682,13 +683,6 @@ default_options_optimization (struct gcc - : default_param_value (PARAM_MAX_DSE_ACTIVE_LOCAL_STORES) / 10, - opts->x_param_values, opts_set->x_param_values); - -- /* At -Ofast, allow store motion to introduce potential race conditions. */ -- maybe_set_param_value -- (PARAM_ALLOW_STORE_DATA_RACES, -- opts->x_optimize_fast ? 1 -- : default_param_value (PARAM_ALLOW_STORE_DATA_RACES), -- opts->x_param_values, opts_set->x_param_values); -- - if (opts->x_optimize_size) - /* We want to crossjump as much as possible. */ - maybe_set_param_value (PARAM_MIN_CROSSJUMP_INSNS, 1, -diff -uprN a/gcc/params.def b/gcc/params.def ---- a/gcc/params.def 2020-05-28 16:12:58.831511599 +0800 -+++ b/gcc/params.def 2020-05-28 15:54:35.725511589 +0800 -@@ -1199,12 +1199,6 @@ DEFPARAM (PARAM_CASE_VALUES_THRESHOLD, - "if 0, use the default for the machine.", - 0, 0, 0) - --/* Data race flags for C++0x memory model compliance. */ --DEFPARAM (PARAM_ALLOW_STORE_DATA_RACES, -- "allow-store-data-races", -- "Allow new data races on stores to be introduced.", -- 0, 0, 1) -- - /* Reassociation width to be used by tree reassoc optimization. */ - DEFPARAM (PARAM_TREE_REASSOC_WIDTH, - "tree-reassoc-width", -diff -uprN a/gcc/params.h b/gcc/params.h ---- a/gcc/params.h 2020-05-28 16:12:58.843511599 +0800 -+++ b/gcc/params.h 2020-05-28 15:54:35.725511589 +0800 -@@ -228,8 +228,6 @@ extern void init_param_values (int *para - PARAM_VALUE (PARAM_MAX_STORES_TO_SINK) - #define ALLOW_LOAD_DATA_RACES \ - PARAM_VALUE (PARAM_ALLOW_LOAD_DATA_RACES) --#define ALLOW_STORE_DATA_RACES \ -- PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES) - #define ALLOW_PACKED_LOAD_DATA_RACES \ - PARAM_VALUE (PARAM_ALLOW_PACKED_LOAD_DATA_RACES) - #define ALLOW_PACKED_STORE_DATA_RACES \ -diff -uprN a/gcc/testsuite/c-c++-common/cxxbitfields-3.c b/gcc/testsuite/c-c++-common/cxxbitfields-3.c ---- a/gcc/testsuite/c-c++-common/cxxbitfields-3.c 2020-05-28 16:12:56.959511599 +0800 -+++ b/gcc/testsuite/c-c++-common/cxxbitfields-3.c 2020-05-28 15:54:33.853511589 +0800 -@@ -1,5 +1,5 @@ - /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ --/* { dg-options "-O2 --param allow-store-data-races=0" } */ -+/* { dg-options "-O2 -fno-allow-store-data-races" } */ - - /* Make sure we don't narrow down to a QI or HI to store into VAR.J, - but instead use an SI. */ -diff -uprN a/gcc/testsuite/c-c++-common/cxxbitfields-6.c b/gcc/testsuite/c-c++-common/cxxbitfields-6.c ---- a/gcc/testsuite/c-c++-common/cxxbitfields-6.c 2020-05-28 16:12:56.935511599 +0800 -+++ b/gcc/testsuite/c-c++-common/cxxbitfields-6.c 2020-05-28 15:54:33.845511589 +0800 -@@ -1,6 +1,6 @@ - /* PR middle-end/50141 */ - /* { dg-do compile } */ --/* { dg-options "-O2 --param allow-store-data-races=0" } */ -+/* { dg-options "-O2 -fno-allow-store-data-races" } */ - - struct S - { -diff -uprN a/gcc/testsuite/c-c++-common/simulate-thread/bitfields-1.c b/gcc/testsuite/c-c++-common/simulate-thread/bitfields-1.c ---- a/gcc/testsuite/c-c++-common/simulate-thread/bitfields-1.c 2020-05-28 16:12:56.939511599 +0800 -+++ b/gcc/testsuite/c-c++-common/simulate-thread/bitfields-1.c 2020-05-28 15:54:33.821511589 +0800 -@@ -1,5 +1,5 @@ - /* { dg-do link } */ --/* { dg-options "--param allow-store-data-races=0" } */ -+/* { dg-options "-fno-allow-store-data-races" } */ - /* { dg-final { simulate-thread } } */ - - #include -diff -uprN a/gcc/testsuite/c-c++-common/simulate-thread/bitfields-2.c b/gcc/testsuite/c-c++-common/simulate-thread/bitfields-2.c ---- a/gcc/testsuite/c-c++-common/simulate-thread/bitfields-2.c 2020-05-28 16:12:56.939511599 +0800 -+++ b/gcc/testsuite/c-c++-common/simulate-thread/bitfields-2.c 2020-05-28 15:54:33.821511589 +0800 -@@ -1,5 +1,5 @@ - /* { dg-do link { target { ! int16 } } } */ --/* { dg-options "--param allow-store-data-races=0" } */ -+/* { dg-options "-fno-allow-store-data-races" } */ - /* { dg-final { simulate-thread } } */ - - #include -diff -uprN a/gcc/testsuite/c-c++-common/simulate-thread/bitfields-3.c b/gcc/testsuite/c-c++-common/simulate-thread/bitfields-3.c ---- a/gcc/testsuite/c-c++-common/simulate-thread/bitfields-3.c 2020-05-28 16:12:56.939511599 +0800 -+++ b/gcc/testsuite/c-c++-common/simulate-thread/bitfields-3.c 2020-05-28 15:54:33.821511589 +0800 -@@ -1,5 +1,5 @@ - /* { dg-do link } */ --/* { dg-options "--param allow-store-data-races=0" } */ -+/* { dg-options "-fno-allow-store-data-races" } */ - /* { dg-final { simulate-thread } } */ - - #include -diff -uprN a/gcc/testsuite/c-c++-common/simulate-thread/bitfields-4.c b/gcc/testsuite/c-c++-common/simulate-thread/bitfields-4.c ---- a/gcc/testsuite/c-c++-common/simulate-thread/bitfields-4.c 2020-05-28 16:12:56.939511599 +0800 -+++ b/gcc/testsuite/c-c++-common/simulate-thread/bitfields-4.c 2020-05-28 15:54:33.821511589 +0800 -@@ -1,5 +1,5 @@ - /* { dg-do link } */ --/* { dg-options "--param allow-store-data-races=0" } */ -+/* { dg-options "-fno-allow-store-data-races" } */ - /* { dg-final { simulate-thread } } */ - - #include -diff -uprN a/gcc/testsuite/gcc.dg/lto/pr52097_0.c b/gcc/testsuite/gcc.dg/lto/pr52097_0.c ---- a/gcc/testsuite/gcc.dg/lto/pr52097_0.c 2020-05-28 16:12:57.803511599 +0800 -+++ b/gcc/testsuite/gcc.dg/lto/pr52097_0.c 2020-05-28 15:54:34.777511589 +0800 -@@ -1,5 +1,5 @@ - /* { dg-lto-do link } */ --/* { dg-lto-options { { -O -flto -fexceptions -fnon-call-exceptions --param allow-store-data-races=0 } } } */ -+/* { dg-lto-options { { -O -flto -fexceptions -fnon-call-exceptions -fno-allow-store-data-races } } } */ - /* { dg-require-effective-target exceptions } */ - - typedef struct { unsigned int e0 : 16; } s1; -diff -uprN a/gcc/testsuite/gcc.dg/simulate-thread/speculative-store-2.c b/gcc/testsuite/gcc.dg/simulate-thread/speculative-store-2.c ---- a/gcc/testsuite/gcc.dg/simulate-thread/speculative-store-2.c 2020-05-28 16:12:57.815511599 +0800 -+++ b/gcc/testsuite/gcc.dg/simulate-thread/speculative-store-2.c 2020-05-28 15:54:34.781511589 +0800 -@@ -1,5 +1,5 @@ - /* { dg-do link } */ --/* { dg-options "--param allow-store-data-races=0 -O2" } */ -+/* { dg-options "-fno-allow-store-data-races -O2" } */ - /* { dg-final { simulate-thread } } */ - - #include -diff -uprN a/gcc/testsuite/gcc.dg/simulate-thread/speculative-store-3.c b/gcc/testsuite/gcc.dg/simulate-thread/speculative-store-3.c ---- a/gcc/testsuite/gcc.dg/simulate-thread/speculative-store-3.c 2020-05-28 16:12:57.815511599 +0800 -+++ b/gcc/testsuite/gcc.dg/simulate-thread/speculative-store-3.c 2020-05-28 15:54:34.781511589 +0800 -@@ -1,5 +1,5 @@ - /* { dg-do link } */ --/* { dg-options "--param allow-store-data-races=0 -O2" } */ -+/* { dg-options "-fno-allow-store-data-races -O2" } */ - /* { dg-final { simulate-thread } } */ - - #include -diff -uprN a/gcc/testsuite/gcc.dg/simulate-thread/speculative-store-4.c b/gcc/testsuite/gcc.dg/simulate-thread/speculative-store-4.c ---- a/gcc/testsuite/gcc.dg/simulate-thread/speculative-store-4.c 2020-05-28 16:12:57.815511599 +0800 -+++ b/gcc/testsuite/gcc.dg/simulate-thread/speculative-store-4.c 2020-05-28 15:54:34.781511589 +0800 -@@ -1,5 +1,5 @@ - /* { dg-do link } */ --/* { dg-options "--param allow-store-data-races=0" } */ -+/* { dg-options "-fno-allow-store-data-races" } */ - /* { dg-final { simulate-thread } } */ - - #include -diff -uprN a/gcc/testsuite/gcc.dg/simulate-thread/speculative-store.c b/gcc/testsuite/gcc.dg/simulate-thread/speculative-store.c ---- a/gcc/testsuite/gcc.dg/simulate-thread/speculative-store.c 2020-05-28 16:12:57.815511599 +0800 -+++ b/gcc/testsuite/gcc.dg/simulate-thread/speculative-store.c 2020-05-28 15:54:34.781511589 +0800 -@@ -1,12 +1,12 @@ - /* { dg-do link } */ --/* { dg-options "--param allow-store-data-races=0" } */ -+/* { dg-options "-fno-allow-store-data-races" } */ - /* { dg-final { simulate-thread } } */ - - #include - #include "simulate-thread.h" - - /* This file tests that speculative store movement out of a loop doesn't -- happen. This is disallowed when --param allow-store-data-races is 0. */ -+ happen. This is disallowed when -fno-allow-store-data-races. */ - - int global = 100; - -diff -uprN a/gcc/testsuite/gcc.dg/tree-ssa/20050314-1.c b/gcc/testsuite/gcc.dg/tree-ssa/20050314-1.c ---- a/gcc/testsuite/gcc.dg/tree-ssa/20050314-1.c 2020-05-28 16:12:58.027511599 +0800 -+++ b/gcc/testsuite/gcc.dg/tree-ssa/20050314-1.c 2020-05-28 15:54:34.997511589 +0800 -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-O1 -fdump-tree-lim2-details --param allow-store-data-races=1" } */ -+/* { dg-options "-O1 -fdump-tree-lim2-details -fallow-store-data-races" } */ - - float a[100]; - -diff -uprN a/gcc/testsuite/g++.dg/simulate-thread/bitfields-2.C b/gcc/testsuite/g++.dg/simulate-thread/bitfields-2.C ---- a/gcc/testsuite/g++.dg/simulate-thread/bitfields-2.C 2020-05-28 16:12:57.015511599 +0800 -+++ b/gcc/testsuite/g++.dg/simulate-thread/bitfields-2.C 2020-05-28 15:54:33.885511589 +0800 -@@ -1,5 +1,5 @@ - /* { dg-do link } */ --/* { dg-options "--param allow-store-data-races=0" } */ -+/* { dg-options "-fno-allow-store-data-races" } */ - /* { dg-final { simulate-thread } } */ - - /* Test that setting does not touch either or . -diff -uprN a/gcc/testsuite/g++.dg/simulate-thread/bitfields.C b/gcc/testsuite/g++.dg/simulate-thread/bitfields.C ---- a/gcc/testsuite/g++.dg/simulate-thread/bitfields.C 2020-05-28 16:12:57.015511599 +0800 -+++ b/gcc/testsuite/g++.dg/simulate-thread/bitfields.C 2020-05-28 15:54:33.885511589 +0800 -@@ -1,5 +1,5 @@ - /* { dg-do link } */ --/* { dg-options "--param allow-store-data-races=0" } */ -+/* { dg-options "-fno-allow-store-data-races" } */ - /* { dg-final { simulate-thread } } */ - - /* Test that setting does not touch either or . -diff -uprN a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c ---- a/gcc/tree-if-conv.c 2020-05-28 16:12:58.831511599 +0800 -+++ b/gcc/tree-if-conv.c 2020-05-28 15:54:35.641511589 +0800 -@@ -913,10 +913,10 @@ ifcvt_memrefs_wont_trap (gimple *stmt, v - to unconditionally. */ - if (base_master_dr - && DR_BASE_W_UNCONDITIONALLY (*base_master_dr)) -- return PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES); -+ return flag_store_data_races; - /* or the base is known to be not readonly. */ - else if (base_object_writable (DR_REF (a))) -- return PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES); -+ return flag_store_data_races; - } - - return false; -diff -uprN a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c ---- a/gcc/tree-ssa-loop-im.c 2020-05-28 16:12:58.779511599 +0800 -+++ b/gcc/tree-ssa-loop-im.c 2020-05-28 15:54:35.729511589 +0800 -@@ -2088,7 +2088,7 @@ execute_sm (struct loop *loop, vec - for_each_index (&ref->mem.ref, force_move_till, &fmt_data); - - if (bb_in_transaction (loop_preheader_edge (loop)->src) -- || (! PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES) -+ || (! flag_store_data_races - && ! ref_always_accessed_p (loop, ref, true))) - multi_threaded_model_p = true; - diff --git a/address-calculation-optimization-within-loop.patch b/address-calculation-optimization-within-loop.patch deleted file mode 100644 index 9bfa0b6f3e786216df8ee28fcb30936b99b4823f..0000000000000000000000000000000000000000 --- a/address-calculation-optimization-within-loop.patch +++ /dev/null @@ -1,74 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-widening_mul-restrict-ops-to-be-defined-in-the-same-.patch: -d21dff5b4fee51ae432143065bededfc763dc344 - -diff -Nurp a/gcc/testsuite/gcc.dg/pr94269.c b/gcc/testsuite/gcc.dg/pr94269.c ---- a/gcc/testsuite/gcc.dg/pr94269.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/pr94269.c 2020-04-17 17:04:50.608000000 +0800 -@@ -0,0 +1,26 @@ -+/* { dg-do compile { target aarch64*-*-* } } */ -+/* { dg-options "-O2 -ftree-loop-vectorize -funsafe-math-optimizations -march=armv8.2-a+sve -msve-vector-bits=256" } */ -+ -+float -+foo(long n, float *x, int inc_x, -+ float *y, int inc_y) -+{ -+ float dot = 0.0; -+ int ix = 0, iy = 0; -+ -+ if (n < 0) { -+ return dot; -+ } -+ -+ int i = 0; -+ while (i < n) { -+ dot += y[iy] * x[ix]; -+ ix += inc_x; -+ iy += inc_y; -+ i++; -+ } -+ -+ return dot; -+} -+ -+/* { dg-final { scan-assembler-not "smaddl" { target aarch64*-*-* } } } */ -diff -Nurp a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c ---- a/gcc/tree-ssa-math-opts.c 2020-04-17 16:43:59.540000000 +0800 -+++ b/gcc/tree-ssa-math-opts.c 2020-04-17 16:48:34.072036000 +0800 -@@ -2721,11 +2721,14 @@ convert_plusminus_to_widen (gimple_stmt_ - multiply-and-accumulate instructions. - - If the widened-multiplication result has more than one uses, it is -- probably wiser not to do the conversion. */ -+ probably wiser not to do the conversion. Also restrict this operation -+ to single basic block to avoid moving the multiply to a different block -+ with a higher execution frequency. */ - if (code == PLUS_EXPR - && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR)) - { - if (!has_single_use (rhs1) -+ || gimple_bb (rhs1_stmt) != gimple_bb (stmt) - || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1, - &type2, &mult_rhs2)) - return false; -@@ -2735,6 +2738,7 @@ convert_plusminus_to_widen (gimple_stmt_ - else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR) - { - if (!has_single_use (rhs2) -+ || gimple_bb (rhs2_stmt) != gimple_bb (stmt) - || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1, - &type2, &mult_rhs2)) - return false; -diff -Nurp a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_1.c b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_1.c ---- a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_1.c 2020-03-31 09:51:36.000000000 +0800 -+++ b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_1.c 2020-04-29 10:55:44.937471475 +0800 -@@ -17,7 +17,6 @@ f (TYPE *x, TYPE *y, unsigned short n, l - /* { dg-final { scan-assembler {\tstr\tw[0-9]+} } } */ - /* Should multiply by (VF-1)*4 rather than (257-1)*4. */ - /* { dg-final { scan-assembler-not {, 1024} } } */ --/* { dg-final { scan-assembler-not {\t.bfiz\t} } } */ - /* { dg-final { scan-assembler-not {lsl[^\n]*[, ]10} } } */ - /* { dg-final { scan-assembler-not {\tcmp\tx[0-9]+, 0} } } */ - /* { dg-final { scan-assembler-not {\tcmp\tw[0-9]+, 0} } } */ diff --git a/adjust-vector-cost-and-move-EXTRACT_LAST_REDUCTION-costing.patch b/adjust-vector-cost-and-move-EXTRACT_LAST_REDUCTION-costing.patch deleted file mode 100644 index 6ee3d33e9422a6af1efbe01343303d4025dfdb88..0000000000000000000000000000000000000000 --- a/adjust-vector-cost-and-move-EXTRACT_LAST_REDUCTION-costing.patch +++ /dev/null @@ -1,88 +0,0 @@ -This backport contains 2 patchs from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -4bf29d15f2e01348a45a1f4e1a135962f123fdd6 -0001-AArch64-PR79262-Adjust-vector-cost.patch - -27071013521b015d17a2666448f27a6ff0c55aca -0001-Move-EXTRACT_LAST_REDUCTION-costing-to-vectorizable_.patch - -diff -Nurp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c ---- a/gcc/config/aarch64/aarch64.c 2020-11-20 04:36:33.988000000 +0800 -+++ b/gcc/config/aarch64/aarch64.c 2020-11-20 04:32:20.984000000 +0800 -@@ -448,7 +448,7 @@ static const struct cpu_vector_cost gene - 1, /* vec_int_stmt_cost */ - 1, /* vec_fp_stmt_cost */ - 2, /* vec_permute_cost */ -- 1, /* vec_to_scalar_cost */ -+ 2, /* vec_to_scalar_cost */ - 1, /* scalar_to_vec_cost */ - 1, /* vec_align_load_cost */ - 1, /* vec_unalign_load_cost */ -diff -Nurp a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c ---- a/gcc/tree-vect-loop.c 2020-11-20 04:36:34.016000000 +0800 -+++ b/gcc/tree-vect-loop.c 2020-11-20 04:32:20.984000000 +0800 -@@ -3926,8 +3926,11 @@ vect_model_reduction_cost (stmt_vec_info - - code = gimple_assign_rhs_code (orig_stmt_info->stmt); - -- if (reduction_type == EXTRACT_LAST_REDUCTION -- || reduction_type == FOLD_LEFT_REDUCTION) -+ if (reduction_type == EXTRACT_LAST_REDUCTION) -+ /* No extra instructions are needed in the prologue. The loop body -+ operations are costed in vectorizable_condition. */ -+ inside_cost = 0; -+ else if (reduction_type == FOLD_LEFT_REDUCTION) - { - /* No extra instructions needed in the prologue. */ - prologue_cost = 0; -diff -Nurp a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c ---- a/gcc/tree-vect-stmts.c 2020-11-20 04:36:33.996000000 +0800 -+++ b/gcc/tree-vect-stmts.c 2020-11-20 04:32:20.984000000 +0800 -@@ -859,7 +859,8 @@ vect_model_simple_cost (stmt_vec_info st - enum vect_def_type *dt, - int ndts, - slp_tree node, -- stmt_vector_for_cost *cost_vec) -+ stmt_vector_for_cost *cost_vec, -+ vect_cost_for_stmt kind = vector_stmt) - { - int inside_cost = 0, prologue_cost = 0; - -@@ -906,7 +907,7 @@ vect_model_simple_cost (stmt_vec_info st - } - - /* Pass the inside-of-loop statements to the target-specific cost model. */ -- inside_cost += record_stmt_cost (cost_vec, ncopies, vector_stmt, -+ inside_cost += record_stmt_cost (cost_vec, ncopies, kind, - stmt_info, 0, vect_body); - - if (dump_enabled_p ()) -@@ -9194,15 +9195,18 @@ vectorizable_condition (stmt_vec_info st - " EXTRACT_LAST_REDUCTION.\n"); - LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false; - } -- if (expand_vec_cond_expr_p (vectype, comp_vectype, -- cond_code)) -- { -- STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type; -- vect_model_simple_cost (stmt_info, ncopies, dts, ndts, slp_node, -- cost_vec); -- return true; -- } -- return false; -+ -+ vect_cost_for_stmt kind = vector_stmt; -+ if (reduction_type == EXTRACT_LAST_REDUCTION) -+ /* Count one reduction-like operation per vector. */ -+ kind = vec_to_scalar; -+ else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code)) -+ return false; -+ -+ STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type; -+ vect_model_simple_cost (stmt_info, ncopies, dts, ndts, slp_node, -+ cost_vec, kind); -+ return true; - } - - /* Transform. */ diff --git a/avoid-cycling-on-vertain-subreg-reloads.patch b/avoid-cycling-on-vertain-subreg-reloads.patch deleted file mode 100644 index f4139acfe94fdbe6757fa9d1586c830d6496e7cd..0000000000000000000000000000000000000000 --- a/avoid-cycling-on-vertain-subreg-reloads.patch +++ /dev/null @@ -1,154 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-lra-Avoid-cycling-on-certain-subreg-reloads-PR96796.patch -6001db79c477b03eacc7e7049560921fb54b7845 - -diff -uprN a/gcc/lra-constraints.c b/gcc/lra-constraints.c ---- a/gcc/lra-constraints.c 2020-03-12 19:07:21.000000000 +0800 -+++ b/gcc/lra-constraints.c 2020-09-08 10:02:52.308147305 +0800 -@@ -235,12 +235,17 @@ get_reg_class (int regno) - CL. Use elimination first if REG is a hard register. If REG is a - reload pseudo created by this constraints pass, assume that it will - be allocated a hard register from its allocno class, but allow that -- class to be narrowed to CL if it is currently a superset of CL. -+ class to be narrowed to CL if it is currently a superset of CL and -+ if either: -+ -+ - ALLOW_ALL_RELOAD_CLASS_CHANGES_P is true or -+ - the instruction we're processing is not a reload move. - - If NEW_CLASS is nonnull, set *NEW_CLASS to the new allocno class of - REGNO (reg), or NO_REGS if no change in its class was needed. */ - static bool --in_class_p (rtx reg, enum reg_class cl, enum reg_class *new_class) -+in_class_p (rtx reg, enum reg_class cl, enum reg_class *new_class, -+ bool allow_all_reload_class_changes_p = false) - { - enum reg_class rclass, common_class; - machine_mode reg_mode; -@@ -265,7 +270,8 @@ in_class_p (rtx reg, enum reg_class cl, - typically moves that have many alternatives, and restricting - reload pseudos for one alternative may lead to situations - where other reload pseudos are no longer allocatable. */ -- || (INSN_UID (curr_insn) >= new_insn_uid_start -+ || (!allow_all_reload_class_changes_p -+ && INSN_UID (curr_insn) >= new_insn_uid_start - && curr_insn_set != NULL - && ((OBJECT_P (SET_SRC (curr_insn_set)) - && ! CONSTANT_P (SET_SRC (curr_insn_set))) -@@ -557,13 +563,12 @@ init_curr_insn_input_reloads (void) - curr_insn_input_reloads_num = 0; - } - --/* Create a new pseudo using MODE, RCLASS, ORIGINAL or reuse already -- created input reload pseudo (only if TYPE is not OP_OUT). Don't -- reuse pseudo if IN_SUBREG_P is true and the reused pseudo should be -- wrapped up in SUBREG. The result pseudo is returned through -- RESULT_REG. Return TRUE if we created a new pseudo, FALSE if we -- reused the already created input reload pseudo. Use TITLE to -- describe new registers for debug purposes. */ -+/* Create a new pseudo using MODE, RCLASS, ORIGINAL or reuse an existing -+ reload pseudo. Don't reuse an existing reload pseudo if IN_SUBREG_P -+ is true and the reused pseudo should be wrapped up in a SUBREG. -+ The result pseudo is returned through RESULT_REG. Return TRUE if we -+ created a new pseudo, FALSE if we reused an existing reload pseudo. -+ Use TITLE to describe new registers for debug purposes. */ - static bool - get_reload_reg (enum op_type type, machine_mode mode, rtx original, - enum reg_class rclass, bool in_subreg_p, -@@ -575,6 +580,35 @@ get_reload_reg (enum op_type type, machi - - if (type == OP_OUT) - { -+ /* Output reload registers tend to start out with a conservative -+ choice of register class. Usually this is ALL_REGS, although -+ a target might narrow it (for performance reasons) through -+ targetm.preferred_reload_class. It's therefore quite common -+ for a reload instruction to require a more restrictive class -+ than the class that was originally assigned to the reload register. -+ -+ In these situations, it's more efficient to refine the choice -+ of register class rather than create a second reload register. -+ This also helps to avoid cycling for registers that are only -+ used by reload instructions. */ -+ if (REG_P (original) -+ && (int) REGNO (original) >= new_regno_start -+ && INSN_UID (curr_insn) >= new_insn_uid_start -+ && in_class_p (original, rclass, &new_class, true)) -+ { -+ unsigned int regno = REGNO (original); -+ if (lra_dump_file != NULL) -+ { -+ fprintf (lra_dump_file, " Reuse r%d for output ", regno); -+ dump_value_slim (lra_dump_file, original, 1); -+ } -+ if (new_class != lra_get_allocno_class (regno)) -+ lra_change_class (regno, new_class, ", change to", false); -+ if (lra_dump_file != NULL) -+ fprintf (lra_dump_file, "\n"); -+ *result_reg = original; -+ return false; -+ } - *result_reg - = lra_create_new_reg_with_unique_value (mode, original, rclass, title); - return true; -diff -uprN a/gcc/testsuite/gcc.c-torture/compile/pr96796.c b/gcc/testsuite/gcc.c-torture/compile/pr96796.c ---- a/gcc/testsuite/gcc.c-torture/compile/pr96796.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.c-torture/compile/pr96796.c 2020-09-08 09:59:40.077774393 +0800 -@@ -0,0 +1,55 @@ -+/* { dg-additional-options "-fcommon" } */ -+ -+struct S0 { -+ signed f0 : 8; -+ unsigned f1; -+ unsigned f4; -+}; -+struct S1 { -+ long f3; -+ char f4; -+} g_3_4; -+ -+int g_5, func_1_l_32, func_50___trans_tmp_31; -+static struct S0 g_144, g_834, g_1255, g_1261; -+ -+int g_273[120] = {}; -+int *g_555; -+char **g_979; -+static int g_1092_0; -+static int g_1193; -+int safe_mul_func_int16_t_s_s(int si1, int si2) { return si1 * si2; } -+static struct S0 *func_50(); -+int func_1() { func_50(g_3_4, g_5, func_1_l_32, 8, 3); } -+void safe_div_func_int64_t_s_s(int *); -+void safe_mod_func_uint32_t_u_u(struct S0); -+struct S0 *func_50(int p_51, struct S0 p_52, struct S1 p_53, int p_54, -+ int p_55) { -+ int __trans_tmp_30; -+ char __trans_tmp_22; -+ short __trans_tmp_19; -+ long l_985_1; -+ long l_1191[8]; -+ safe_div_func_int64_t_s_s(g_273); -+ __builtin_printf((char*)g_1261.f4); -+ safe_mod_func_uint32_t_u_u(g_834); -+ g_144.f0 += 1; -+ for (;;) { -+ struct S1 l_1350 = {&l_1350}; -+ for (; p_53.f3; p_53.f3 -= 1) -+ for (; g_1193 <= 2; g_1193 += 1) { -+ __trans_tmp_19 = safe_mul_func_int16_t_s_s(l_1191[l_985_1 + p_53.f3], -+ p_55 % (**g_979 = 10)); -+ __trans_tmp_22 = g_1255.f1 * p_53.f4; -+ __trans_tmp_30 = __trans_tmp_19 + __trans_tmp_22; -+ if (__trans_tmp_30) -+ g_1261.f0 = p_51; -+ else { -+ g_1255.f0 = p_53.f3; -+ int *l_1422 = g_834.f0 = g_144.f4 != (*l_1422)++ > 0 < 0 ^ 51; -+ g_555 = ~0; -+ g_1092_0 |= func_50___trans_tmp_31; -+ } -+ } -+ } -+} diff --git a/bf16-and-matrix-characteristic.patch b/bf16-and-matrix-characteristic.patch deleted file mode 100644 index 8f9e252ff05121b55d1075a6e1a17f45abb2d9f3..0000000000000000000000000000000000000000 --- a/bf16-and-matrix-characteristic.patch +++ /dev/null @@ -1,466067 +0,0 @@ -This backport contains 309 patchs from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-re-PR-target-89261-ix86_data_alignment-has-wrong-arg.patch -f8b906a2de3044f1dea753b182c244a1a560d40e - -0002-Fix-Wenum-compare-switch-warning-in-i386.c.patch -791536baadc9f469ec8eef2d7213c6f6091c5fa9 - -0003-Prefer-to-use-strlen-call-instead-of-inline-expansio.patch -786e0e5239529de9a4254fe8411a0e8f843e721a - -0004-Enhance-target-and-target_clone-error-messages.patch -cc2a672a60ff7476b3e4751ba41cb77c7fc85b09 - -0005-re-PR-middle-end-88963-gcc-generates-terrible-code-f.patch -a7eb97ad269b6509bd7b31ca373daea98e4d7e85 - -0006-Split-i386.c.patch -2bf6d93547e516b6b2b2051c0fb1b47ea4acc8a4 - -0007-Split-part-of-functionality-from-lto.c-to-lto-common.patch -a79420f995764129dc40d1abcbf8ce75a0b0f906 - -0008-Error-only-when-a-non-default-mabi-is-used-with-sani.patch -080629d32eca5ea202479022f0bd429a813be7c4 - -0009-This-patch-adds-support-to-vectorize-sum-of-abslolut.patch -a9fad8fe6c84de272f2a56d462e67d53c9f4a73d - -0010-cfgexpand.c-asm_clobber_reg_is_valid-Reject-clobbers.patch -0a59215131c02dee4c8829f93d1ee678647614da - -0011-re-PR-tree-optimization-90395-ICE-verify_flow_info-f.patch -362e280d10c61bec13c1d02c11a1c4ac0846db7e - -0012-re-PR-c-59813-tail-call-elimination-didn-t-fire-for-.patch -b5b9147d35ee509714c34d813c7723bf18bb7b7a - -0013-Accept-code-attributes-as-rtx-codes-in-.md-files.patch -75df257b38bd4cdcb750fc893c5023363230cfe8 - -0014-x86-fix-pr82920.patch -0f8768f73440b040707deafd254d189c2887d00d - -0015-2019-05-14-Przemyslaw-Wirkus-przemyslaw.wirkus-arm.c.patch -a52cf5cf278e4a9e58bfa2bb67a93244766a122f - -0016-re-PR-tree-optimization-88828-Inefficient-update-of-.patch -962372f9f853c582c879f11c0db14973cc8687e0 - -0017-re-PR-tree-optimization-88828-Inefficient-update-of-.patch -595ffc073bf5b1753e3a18dfa704391ad5fad626 - -0018-gcc-move-assemble_start_function-assemble_end_functi.patch -f7430263c07b4a1bcf3deb708c8c691f233fcb40 - -0019-trans.c-check_inlining_for_nested_subprog-Quote-rese.patch -a9c697b88395a0f2b175ac30c59bd8c0c22d0db1 - -0020-gcc-aarch64-move-assemble_start_function-assemble_en.patch -6b5777c6c7059b6b8e372e567a74bdccb59a02c3 - -0021-gimple-match-head.c-Include-vec-perm-indices.h.patch -ebd733a78ccf5792067e94852c6c81a5f9aa0020 - -0022-i386-Fold-__builtin_ia32_shufpd-to-VEC_PERM_EXPR.patch -4d508751f421491052bc1d83150344e6cba30b3b - -0023-aarch64-Introduce-flags-for-SVE2.patch -28108a5341653568e9ebc49ea755ff93cc1e1711 - -0024-aarch64-Change-two-function-declaration-types.patch -1ec77eedd529f81b1dc99cda9818f1ef9e952b96 - -0025-PATCH-3-3-GCC-AARCH64-Add-support-for-pointer-authen.patch -8fc16d725206f2c40bae423d7d0d93bd1baf6da2 - -0026-This-patch-implements-the-u-avgM3_floor-and-u-avgM3_.patch -0617e23c9531373d3b232152c0d81a2c707858d9 - -0027-tree-ssa-alias-access-spath-1.c-new-testcase.patch -987c9fc581ffb04d5ab7a782bb7aee6205c45663 - -0028-PATCH-GCC-AARCH64-Fix-libstdc-build-failure-after-r2.patch -0e2e15abd0765c1866f36f0312f77c9595e7fdec - -0029-aarch64-add-support-for-fabd-in-sve.patch -3db85990dbde7f9c8212fe0fb8a241c5d2993198 - -0030-New-.md-construct-define_insn_and_rewrite.patch -f4fde1b378ad68fb2dec6719ed26c1b901488e03 - -0031-re-PR-target-88837-SVE-Poor-vector-construction-code.patch -3a0afad0d212b3ff213b393728e018caf2daa526 - -0032-AArch64-Emit-TARGET_DOTPROD-specific-sequence-for-us.patch -72215009a9f9827397a4eb74e9341b2b7dc658df - -0033-AARCH64-ILP32-Fix-aarch64_asan_shadow_offset.patch -10078f3e1d0cbebc5e6f7f4821d3ad41421ef1e0 - -0034-Make-SRA-re-construct-orginal-memory-accesses-when-e.patch -3b47da42de621c6c3bf7d2f9245df989aa7eb5a1 - -0035-Fix-fwprop-call-to-call-to-paradoxical_subreg_p.patch -6c202d9dc65833e04e35f566c645fde8278c1a24 - -0036-init_1.c-Remove-options-O2-fno-schedule-insns-and-in.patch -3a9debbd7660bafbd7658c9e843eddbac8980188 - -0037-iterators.md-ADDSUB-Fix-typo-in-comment.patch -dd550c996578ea7e94f3a59e57f24636186fbb95 - -0038-re-PR-target-88834-SVE-Poor-addressing-mode-choices-.patch -fa9863e7d34ecd011ae75083be2ae124e5831b64 - -0039-Darwin-The-need-for-picsym-stubs-is-dependent-on-lin.patch -ce3a201593d0ed5b606360c064778de34b5b04ef - -0040-netbsd-aarch64-add-netbsd-aarch64-target.patch -f32f75858a14e7b304df7a71dae15d75081b0deb - -0041-Vectorizer-Support-masking-fold-left-reductions.patch -bce29d65ebe1316d15ec7582a1d257ef1be163f7 - -0042-Darwin-The-need-for-FDE-symbols-is-dependent-on-link.patch -dbe89f49da468fbd42a27bdb7b8f06de76a871b4 - -0043-AArch64-Simplify-SVE-IFN_COND-patterns.patch -32cf949cec180799d3fb14d405772ea35b5aafd3 - -0044-AArch64-Factor-out-ptrue-predicate-creation.patch -16de3637c4df37e0203b3ad52b238887e6ca38fc - -0045-AArch64-Factor-out-pfalse-predicate-creation.patch -e7053b0c7cf3f1cd8a23cc71e7e36ec29c46b217 - -0046-AArch64-Tabify-aarch64-sve.md.patch -ea403d8bb5129632aac4d2f270566d2d0073a8ae - -0047-AArch64-Add-a-new-CC-mode-for-SVE-conditions.patch -57d6f4d04d438522dc03488ca31f71b4b7b904c8 - -0048-aarch64-Refactor-common-errata-work-around-specs.patch -91bed1a15a6dfb891b9658532b49f9488b5537f4 - -0049-objective-c-c-testsuite-Fix-stubify-tests-for-fnext-.patch -b7a0332ccd21c04a37535c97f04abc4bc28fb321 - -0050-builtins.c-get_memory_rtx-Fix-comment.patch -76715c3216cf6ccd071fc852920af55d6b0054ae - -0051-Use-alternative_mask-for-add_insn_allocno_copies.patch -73bb8fe9e915cf3219f16afdc61c308c08aa7659 - -0052-Simplify-ira_setup_alts.patch -06a65e803ed06f3ad1fd8e5f90db03aa0a7e5414 - -0053-Make-ira_get_dup_out_num-handle-more-cases.patch -ed680e2cc18c73f90e6bfbd3f346a8820476371b - -0054-Allow-earlyclobbers-in-ira_get_dup_out_num.patch -ae5569fa33c9f3286e0b747f8b6607d21a4b9827 - -0055-Use-ira_setup_alts-for-conflict-detection.patch -6de20b9d7a1af863fb51b4a783c153ea0092810a - -0056-aarch64-force-frame-pointer-setup-before-tlsdesc-cal.patch -0e510d1824241953c67b38f7a894de7238c23c61 - -0057-AArch64-Remove-constraint-strings-from-define_expand.patch -1bbffb87a9ecc3e27a4074145e55e3315df57b7d - -0058-re-PR-target-88833-SVE-Redundant-moves-for-WHILELO-b.patch -75da268e1a563a1a52389cd2ecee12d07c45a655 - -0059-PATCH-GCC-AARCH64-PR-target-90712-Fix-gcc.dg-rtl-aar.patch -2bdc7dcbbd2eee4f114c09443933cc37a546dbff - -0060-aarch64-redefine-aes-patterns.patch -5169fa77322e36dd4783bc5126185159c35a3584 - -0061-simplify-rtx.c-simplify_unary_operation_1-Use-GET_MO.patch -4faba5c3bc37c0bfceec6b254d76c5d0b3e2fe8b - -0062-Support-multiple-operand-counts-for-.md-patterns.patch -d281492de84960b5885f88fffeeb226650f5141d - -0063-arch64-Fix-ambiguous-.md-attribute-uses.patch -e7ba492a04d0bfef9752cbb16fcce3ffc31bf99f - -0064-Relax-vector_builder-elt-sanity-check.patch -72ab1c51b607dd5446ee24ff9fce9178d6b811cb - -0065-re-PR-target-90723-pr88598-2.c-segfaults-with-msve-v.patch -f2b29269c407f10718bc935b3dd5c7e8641b6847 - -0066-AArch64-Rename-bitperm-to-sve2-bitperm.patch -c10abf530e52972ef708f6e72cf20dd920cd22a2 - -0067-aarch64-add-usra-and-ssra-combine-patterns.patch -462e6f9a932a44ca73715dc5c2960e5b332f63f7 - -0068-config-i386-x86-tune.def-X86_TUNE_AVOID_256FMA_CHAIN.patch -ef893a2a769b18c61953d80670b1db8c27bc44e0 - -0069-i386-options.c-ix86_option_override_internal-Default.patch -105c2795b0d63b2cc5cb224ba066fa8b9a0ad0ff - -0070-Come-up-with-function_decl_type-and-use-it-in-tree_f.patch -cb50701ec2c7abdc48db278802022f7e94675d07 - -0071-cif-code.def-NEVER_CALL-New-code.patch -5ab2422adf894bdf84deed8c7c0557c16d6dca2b - -0072-AArch64-Make-processing-less-fragile-in-config.gcc.patch -3644cadf6a9d5a5cd8e83b0123316cf184fa4e3e - -0073-Implement-more-rtx-vector-folds-on-variable-length-v.patch -4ce6ab6889446984fd7017e2150962eb4550a7ee - -0074-Generalise-VEC_DUPLICATE-folding-for-variable-length.patch -708cc6132bb374e2c5bd1c4f43f9fe7306d20970 - -0075-Add-dg-test-for-matching-function-bodies.patch -4d706ff86ea86868615558e92407674a4f4b4af9 - -0076-Prevent-Og-from-deleting-stores-to-write-only-variab.patch -ec8ac265ff21fb379ac072848561a91e4990c47f - -0077-Don-t-run-DSE-at-Og.patch -c0fe6bce2a8c35e997f45b0a674ab2058ba50ae0 - -0078-Prevent-tree-ssa-dce.c-from-deleting-stores-at-Og.patch -f33b9c40b97f6f8a72ee370068ad81e33d71434e - -0079-re-PR-target-91150-wrong-code-with-O-mavx512vbmi-due.patch -fa2987ed8db073b9d59688363e2dfb6c60f47d70 - -0080-Handle-IFN_COND_MUL-in-tree-ssa-math-opts.c.patch -c1b3d827832f883e0634b18c88eb2bbde335aa42 - -0081-Make-lra-use-per-alternative-earlyclobber-info.patch -a25f3e8efbbc7182fa58c445574848a73856e9b4 - -0082-GCC-AArch64-Enable-Transactional-Memory-Extension.patch -89626179b6fe42cbd58c715808f7c6401879757f - -0083-Add-a-gimple_move_vops-helper-function.patch -779724a5913b4e6a7ccccc0b8b415a772144a067 - -0084-Make-function_code-a-32-bit-field.patch -55f863c4d694deafb968dbf44d08ba49bb7c0766 - -0085-AArch64-Remove-unused-commutative-attribute.patch -871b49afafe043d57f717e70532d66c5a56ca173 - -0086-AArch64-Reorganise-aarch64-sve.md.patch -915d28fe74dbb30352702ab07ea5bf30747043bb - -0087-AArch64-Make-SVE-UNSPEC_COND_-s-match-the-insn-mnemo.patch -cb18e86dd005fe009c536a8bb0aec7aa88ca66df - -0088-AArch64-Remove-redundant-SVE-FADDA-pattern.patch -8ad84de26e1032d80225905c611a47b64a385e8a - -0089-AArch64-Merge-SVE-FP-unary-patterns.patch -d45b20a5539b6f306a559470c3a7e9f84a058bfb - -0090-AArch64-Merge-SVE-FMAXNM-FMINNM-patterns.patch -214c42faa06a9eb1aa7f0296399f28df4fb068ec - -0091-AArch64-Merge-SVE-ternary-FP-operations.patch -0d80d083a2e1d368fcb11eb7ea5490c274f0ea15 - -0092-AArch64-Merge-SVE-reduction-patterns.patch -b0760a40bef3ca690691bf5d214da95b5dc25266 - -0093-AArch64-Prefer-FPRs-over-GPRs-for-CLASTB.patch -801790b37ca817089ecbae214340162e6d94ea6a - -0094-AArch64-Prefer-FPRs-over-GPRs-for-INSR.patch -61ee25b9e7d84fbb18218887d1fecfb10f72993a - -0095-AArch64-Fix-INSR-for-zero-floats.patch -9b6fb97c99abe64147f82a3ea6e6ed598e387482 - -0096-C-Fix-bogus-nested-enum-error-message.patch -99769e7fb6ed153a53174b7f08415eee347655f0 - -0097-AArch64-Make-perm_insn-the-complete-mnemonic.patch -3e2751ce5591dc8f3b5f4ffd3dacf0fb8f789395 - -0098-AArch64-Add-a-y-constraint-for-V0-V7.patch -163b1f6ab2950553e1cc1b39a6b49293b3390e46 - -0099-AArch64-Make-aarch64_classify_vector_mode-use-a-swit.patch -806f69cd68c18399e8e54b1a0913ae57beabbe69 - -0100-AArch64-Make-simd_immediate_info-INDEX-explicit.patch -1da83ccee8e7b61e7777abb63eb0e5a0ff1f1e93 - -0101-AArch64-Use-simd_immediate_info-for-SVE-predicate-co.patch -1044fa32e2b456b59b3cdc31b4f261145f1589cc - -0102-AArch64-Increase-default-function-alignment.patch -4e55aefa3ee19167a41892e4920a3e8c520aee42 - -0103-AArch64-Improve-SVE-constant-moves.patch -4aeb1ba7f62c1d680c819ae3e137c3bad6f520ca - -0104-Darwin-There-is-no-need-to-distinguish-PIC-non-PIC-s.patch -d308419c64c52c2d48bdf53a65e1790a2c897e83 - -0105-Optimise-constant-IFN_WHILE_ULTs.patch -0b1fe8cf6f1dde656c505dde6d27279dff388962 - -0106-Protect-some-checks-of-DECL_FUNCTION_CODE.patch -cb1180d547e3b28547134a06ee020163afa59cc3 - -0107-Use-checking-forms-of-DECL_FUNCTION_CODE-PR-91421.patch -4d732405bd91b54c196fdc38191f838bb01f23a6 - -0108-AArch64-Rework-SVE-PTEST-patterns.patch -34467289631e29545e14148515ab5f5d0d9e4fa7 - -0109-AArch64-Canonicalise-SVE-predicate-constants.patch -678faefcab01f9e9eeb222852675b5a042aaf900 - -0110-AArch64-Don-t-rely-on-REG_EQUAL-notes-to-combine-SVE.patch -35d6c5913d2209eb50f48b589b29f0dce13cb9b7 - -0111-AArch64-Use-unspecs-for-remaining-SVE-FP-binary-ops.patch -6fe679cc6be7a55832f9b88a8cf0751e8d5eff6e - -0112-AArch64-Add-a-GP-strictness-operand-to-SVE-FP-unspec.patch -c9c5a8090c58b84c1eb45e39e77eee223f992009 - -0113-AArch64-Commonise-some-SVE-FP-patterns.patch -0254ed7970e64abd82f21aedf9373720a73671c7 - -0114-AArch64-Add-support-for-SVE-HF-vconds.patch -a70965b114281553fa46cac9b8abab543f36793f - -0115-AArch64-Rework-SVE-FP-comparisons.patch -4a942af61c16f38f7fe51ed72a7ac23f73f62f2a - -0116-AArch64-Use-unspecs-for-SVE-conversions-involving-fl.patch -99361551624427aebe7a856a4327e083aa33733a - -0117-AArch64-Rearrange-SVE-conversion-patterns.patch -95eb5537d8bb23b952105b46250ed4fba8766b84 - -0118-AArch64-Use-x-predication-for-SVE-integer-arithmetic.patch -063082768aab23d26e42954eb115b76318f0176d - -0119-AArch64-Rework-SVE-integer-comparisons.patch -00fa90d975bfacfd91a615fbee24e3e6a100100f - -0120-AArch64-Handle-more-SVE-predicate-constants.patch -2803bc3bbca332f53801770715a5b592b2467492 - -0121-AArch64-Use-SVE-ADR-to-optimise-shift-add-sequences.patch -a229966c9c76afe0cf18c566a3c13ddde3878288 - -0122-AArch64-Add-support-for-SVE-CLS-and-CLZ.patch -bca5a9971f47cf5fe79e6595beb762539f200f46 - -0123-AArch64-Add-support-for-SVE-CNOT.patch -e0a0be93d7c2b760779c3085c5abfd0496e3458b - -0124-AArch64-Add-support-for-SVE-SU-MAX-MIN-immediate.patch -f8c22a8bbaf3ef4260f7d8beea22ed151ca4b726 - -0125-AArch64-Add-support-for-SVE-F-MAX-MIN-NM-immediate.patch -75079ddf9cb867576bbef66f3e8370d9fdeea3b8 - -0126-AArch64-Make-more-use-of-SVE-conditional-constant-mo.patch -d29f7dd50de9e8e46f7e247c53f3b0405a3dadd9 - -0127-AArch64-Use-SVE-MOV-M-of-scalars.patch -88a37c4d72899c5a3f5a7b2bca0ae0096f3270a3 - -0128-AArch64-Add-support-for-SVE-absolute-comparisons.patch -42b4e87d317377d6dcbb25ee2523da4a0c42478a - -0129-AArch64-Add-SVE-conditional-integer-unary-patterns.patch -3c9f496337f754f7c22afb46b017871db5844a97 - -0130-AArch64-Add-SVE-conditional-floating-point-unary-pat.patch -b21f7d53095b253753c5622f99809e9c82fd3009 - -0131-AArch64-Add-SVE-conditional-conversion-patterns.patch -c5e16983cd1bd6dd6eca1b939c3c8859f0c6c866 - -0132-AArch64-Use-SVE-UXT-BHW-as-a-form-of-predicated-AND.patch -d113ece60450b2efb07e9057b6d2732b08fee2c4 - -0133-AArch64-Use-SVE-BIC-for-conditional-arithmetic.patch -1b187f36ec16d43d0227805955d8fae51af26970 - -0134-Add-support-for-conditional-shifts.patch -20103c0ea9336d2b5286eb7f2605ace3fd49a431 - -0135-AArch64-Use-SVE-SU-ABD-in-conditional-arithmetic.patch -9730c5ccd522cd955bcb6e65295023621cade8b6 - -0136-AArch64-Use-SVE-FABD-in-conditional-arithmetic.patch -bf30864e4c241e50585745af504b09db55f7f08b - -0137-AArch64-Use-SVE-binary-immediate-instructions-for-co.patch -a19ba9e1b15d248e5a13ee773f4acd4ae29fdeaa - -0138-AArch64-Use-SVE-MLA-MLS-MAD-and-MSB-for-conditional-.patch -b6c3aea1892c148c21f8b87668f344b2397f4aa5 - -0139-AArch64-Add-a-commutativity-marker-to-the-SVE-SU-ABD.patch -9a8d9b3f2422d4885e5c846dee66acf6336e6ccf - -0140-aarch64-Use-neoversen1-tuning-struct-for-mcpu-cortex.patch -42418c1f7f5cb3b2f466f88053acc818ddc5cd4d - -0141-AArch64-Use-SVE-reversed-shifts-in-preference-to-MOV.patch -7d1f24018b04c13134bc47619fb8aaa390b01754 - -0142-AArch64-Add-more-unpredicated-MOVPRFX-alternatives.patch -5e176a613ef2eda92aa65736763a562dc42a50fe - -0143-AArch64-Remove-unneeded-FSUB-alternatives-and-add-a-.patch -2ae21bd133c357fcd7b6e06dc7d7d9e0660abe2c - -0144-AArch64-Add-MOVPRFX-alternatives-for-SVE-EXT-pattern.patch -06b3ba23eb6ff965a92cd99d2835d4c29316a447 - -0145-AArch64-Add-more-SVE-FMLA-and-FMAD-z-alternatives.patch -432b29c189a6d26ed701c7518402708b2fcb794f - -0146-AArch64-Rework-SVE-REV-BHW-patterns.patch -d7a09c445a475a95559e8b9f29eb06ad92effa91 - -0147-AArch64-Rework-SVE-INC-DEC-handling.patch -0fdc30bcf56d7b46122d7e67d61b56c0a198f3b3 - -0148-AArch64-Optimise-aarch64_add_offset-for-SVE-VL-const.patch -7d8bdfa7e409821c50f6d8a7b557bd7dc760c4ce - -0149-AArch64-Pass-a-pattern-to-aarch64_output_sve_cnt_imm.patch -139df05a29eb71075e42f502978dea4d00a99708 - -0150-AArch64-Tweak-operand-choice-for-SVE-predicate-AND.patch -2d2388f82f2e7f2fd1da063192ba98be45f099d2 - -0151-AArch64-Fix-predicate-alignment-for-fixed-length-SVE.patch -07108a9ebe4776610bb23f684b3a346d28511bed - -0152-AArch64-Add-a-aarch64_sve_mode_p-query.patch -5c38705dbde776f68bf1f99a71657d0e21b772a5 - -0153-Remove-TARGET_SETUP_INCOMING_VARARG_BOUNDS.patch -06b5889c434b941804d5592cd4fc8946b25c1c4b - -0154-As-discussed-below.patch -1f2a3ac34620ab4669f9f32417a7a4496c8f603a - -0155-AArch64-Use-scvtf-fbits-option-where-appropriate.patch -188d00796f5bd338b9b8ab1cc8ba4b43af8ab8fd - -0156-Add-pass_va_arg_by_reference.patch -fde65a89fad742c2dca8ad50452e482d22f3c1b2 - -0157-Add-must_pass_va_arg_in_stack.patch -4f53599cb5b822cd7f95997861c2e064977ecb6a - -0158-Use-function_arg_info-for-TARGET_ARG_PARTIAL_BYTES.patch -a7c81bc1fb43366ca1b4332d8a6042b648a84cdc - -0159-Use-function_arg_info-for-TARGET_PASS_BY_REFERENCE.patch -52090e4dbd064f486af606e3f8a283dbddc7c18a - -0160-Use-function_arg_info-for-TARGET_SETUP_INCOMING_ARGS.patch -e7056ca417326a70eca05defb6a8b20b737d3417 - -0161-Use-function_arg_info-for-TARGET_FUNCTION_-INCOMING_.patch -6783fdb7057d559aa1da8afa2c15a702c532a03e - -0162-Use-function_arg_info-for-TARGET_FUNCTION_ARG_ADVANC.patch -6930c98c69ad695469ee7daa74b3b6d578afdd0d - -0163-Use-function_arg_info-for-TARGET_CALLEE_COPIES.patch -7256c7194e186fce6ff866a124a77b08196c2a5f - -0164-Use-function_arg_info-for-TARGET_MUST_PASS_IN_STACK.patch -0ffef2005fd7536efbc9c3a572701998c8a8080c - -0165-Add-a-apply_pass_by_reference_rules-helper.patch -b12cdd6e8e8dd1f39a941b731ba1056d656a094f - -0166-re-PR-target-88839-SVE-Poor-implementation-of-blend-.patch -9556ef20164e69d094f5a3e1af262dbb45ed8e3a - -0167-aarch64-sve.md-vcond_mask-Add.patch -b1c9ec725da365165ce4c2fdf63daa33b7d86649 - -0168-aarch64-add-intrinsics-for-vld1-q-_x4-and-vst1-q-_x4.patch -391625888d4d97f9016ab9ac04acc55d81f0c26f - -0169-arm-aarch64-Add-comments-warning-that-stack-protecto.patch -a7e73b4158f528600ef97aca29201ddc92b3439f - -0170-AArch64-Add-Linux-hwcap-strings-for-some-extensions.patch -75f935365dba3eb5e9cbd11bc0d75009cad3d019 - -0171-AArch64-Add-support-for-missing-CPUs.patch -e0664b7a63ed8305e9f8539309df7fb3eb13babe - -0172-AArch64-Implement-ACLE-intrinsics-for-FRINT-32-64-Z-.patch -10bd1d964ef12daa9f92ff0b8d1e5f600aa63f7b - -0173-AArch64-Add-support-for-__jcvt-intrinsic.patch -e1d5d19ec4f84b67ac693fef5b2add7dc9cf056d - -0174-Remove-bt-load.c.patch -f78f73cbd284abe4f1718fd7803f5f98800de225 - -0175-Simplify-the-implementation-of-HARD_REG_SET.patch -504279ae0a0ce28ad37f820dcdb7f6557aabef7c - -0176-Make-note_stores-take-an-rtx_insn.patch -e8448ba5300e32917fb12f877ae40711c2b452a3 - -0177-Remove-COPY_HARD_REG_SET.patch -6576d245386e2ce52df274ef8f2ffed81cfaa1c3 - -0178-Remove-COMPL_HARD_REG_SET.patch -50b3f54d551787e0a066451ef60ef3b055a893e6 - -0179-Remove-AND_HARD_REG_SET.patch -dc333d8ff60909dbed89126443e3024f1592f8a4 - -0180-Remove-IOR_HARD_REG_SET.patch -44942965f4eae141bd1f8300e7f77d0c9a3936e4 - -0181-Remove-AND_COMPL_HARD_REG_SET.patch -d15e5131845e2a68513230a624839ef5abcda690 - -0182-Remove-IOR_COMPL_HARD_REG_SET.patch -4897c5aaa7a5db4c1ece28ef66acb3d5e41787b3 - -0183-Remove-hard_reg_set_equal_p.patch -a85796511b2b7985f79331c996761f7a87cb8116 - -0184-Tweak-interface-to-ira-build.c-ior_hard_reg_conflict.patch -75f4e3a1b322e16a1aca28bd0ced9af57cb0a683 - -0185-Add-fast-conversions-from-arrays-to-bitmaps.patch -148909bc700e4f52aa582346a29abc5bc51a9bda - -0186-Remove-global-REG_SETs.patch -0b0310e9a0e0d553bbe9f961c52e0851328aa8b0 - -0187-Remove-call_fixed_reg_set.patch -df1f0eef67939274e9ddd3df426e8dfc5184086b - -0188-Remove-no_caller_save_reg_set.patch -026116ce2a4dedad81518b0ca89dd8243b545778 - -0189-Replace-call_used_reg_set-with-call_used_or_fixed_re.patch -a5647ae846f6765f12a359acba6a71fc12254fa8 - -0190-Add-call_used_or_fixed_reg_p.patch -a365fa0636886aeda83e57b84d837cfba13597fe - -0191-Hide-call_used_regs-in-target-independent-code.patch -53bee79caba4fb88acbcd9bad7891ea45b5511e3 - -0192-Remove-call_really_used_regs.patch -d7fb4c3162307590c0babddcea4fb60c07a7c033 - -0193-Vectorise-multiply-high-with-scaling-operations-PR-8.patch -58cc98767aa1d8136d36467b892dc4adaf427acc - -0194-arm-aarch64-Make-no_insn-issue-to-nothing.patch -f62281dc1b3d751977266d8c30b4488833fcb9dd - -0195-Two-more-POLY_INT-cases-for-dwarf2out.c.patch -ef20d2215067b1bfa8b3f9549ca0baed636a94a0 - -0196-Handle-variable-length-vectors-in-compute_record_mod.patch -defc6f266c1dd625cc64ad1ecfbd1eacbcd66e4f - -0197-Don-t-treat-variable-length-vectors-as-VLAs-during-g.patch -22b6299199da4efd3944cdaabca1d095d19ff901 - -0198-Make-get_value_for_expr-check-for-INTEGER_CSTs.patch -01b57ebf58b8cc0d16db827d1d9aa5f10da23cce - -0199-aarch64-Extend-R-for-integer-registers.patch -e3f15286d1129de2cceee6acd5d5584cb5422db6 - -0200-aarch64-Implement-TImode-compare-and-swap.patch -4a2095ebace8534038ce2adf4ae94bfc854066c4 - -0201-aarch64-Tidy-aarch64_split_compare_and_swap.patch -b7e560deb37e38fb224a0cf108e15df4a717167a - -0202-aarch64-Implement-moutline-atomics.patch -3950b229a5ed6710f30241c2ddc3c74909bf4740 - -0203-Rework-constant-subreg-folds-and-handle-more-variabl.patch -f24f4c15884bf1ee65a10e2f959842eec4198876 - -0204-Extend-neg_const_int-simplifications-to-other-const-.patch -681fc0fa40cc4f018cb691d796aa819a24257774 - -0205-Avoid-adding-impossible-copies-in-ira-conflicts.c-pr.patch -9f635bd13fe9e85872e441b6f3618947f989909a - -0206-AArch64-Fix-memmodel-index-in-aarch64_store_exclusiv.patch -3a30d2558b3a199fe346479e6140cddae7fba5ed - -0207-AArch64-Use-implementation-namespace-consistently-in.patch -9a3afc3564b36fb34826899a345a9c35b1c53e39 - -0208-C-C-Allow-targets-to-check-calls-to-BUILT_IN_MD-func.patch -c6447c2014b76b5c077a07712a7f0b0aaa2e14d4 - -0209-AArch64-Split-built-in-function-codes-into-major-and.patch -6d4d616a782d5be693ea9575f69d5ebf450be090 - -0210-AArch64-Strengthen-aarch64_hard_regno_call_part_clob.patch -51051f474a768d285714d713f1b7535d6a139350 - -0211-Add-function_abi.-h-cc.patch -bd785b44932274f7067105de417938597289962c - -0212-Add-a-target-hook-for-getting-an-ABI-from-a-function.patch -002ffd3caa684c3eb30f8f53206439b7aa34b370 - -0213-Add-a-function-for-getting-the-ABI-of-a-call-insn-ta.patch -5a5a3bc5fa14664be26748c11325021b6b6f8e74 - -0214-Pass-an-ABI-identifier-to-hard_regno_call_part_clobb.patch -6ee2cc70024253d2670a4a317158b2a65251a1d1 - -0215-Remove-global-call-sets-DF-entry-exit-defs.patch -559c1ae100489da76a0283750361ace146fdeb77 - -0216-Remove-global-call-sets-IRA.patch -6c47622219d6386807b26890dcdc84f192499d33 - -0217-Remove-global-call-sets-LRA.patch -a1e6ee38e708ef2bdef4dfbb99473344bd56fa2f - -0218-Remove-global-call-sets-regrename.c.patch -0ce77f463d1d150e70a91807502d628492ca7ae5 - -0219-Make-ira-call-df_set_regs_ever_live-for-extra-call-c.patch -6d1e98dfd2bfce30640d71df355bedf114229744 - -0220-AArch64-Allow-shrink-wrapping-of-non-leaf-vector-PCS.patch -ce9d2a37f2db20328286f5d3d5a13a4e765c59f7 - -0221-AArch64-Make-more-use-of-function_abi.patch -dcdd0f055731a8c960a15e5de8715d041d9a7876 - -0222-AArch64-SVE-Utilize-ASRD-instruction-for-division-an.patch -c0c2f013906a695b8a02226f119649a370d9e083 - -0223-AArch64-Make-call-insns-record-the-callee-s-arm_pcs.patch -08cc4d925f640c3cd0336bae4dc6004244a5c80a - -0224-AArch64-Use-calls-for-SVE-TLSDESC.patch -bb6ce448fc194cca8e51aea274a1b2408c7746c3 - -0225-Remove-clobber_high.patch -17d184e5c4896264c27c27d125a6c1f8462d9d37 - -0226-C-Improve-diagnostics-for-vector-types.patch -8209db250f305cc79fd751c3ed056fb9ff551a83 - -0227-invoke.texi-early-inlining-insns-O2-Document.patch -0b92cf305dcf34387a8e2564e55ca8948df3b47a - -0228-cif-code.def-MAX_INLINE_INSNS_SINGLE_O2_LIMIT-.-New.patch -562d1e9556777988ae46c5d1357af2636bc272ea - -0229-Fix-EXECUTE_IF_SET_IN_HARD_REG_SET-use.patch -1c8264003ab1d6932d874bd1a9af4ac498d4b4a4 - -0230-Use-CONSTEXPR-in-machmode.h.patch -ad00d6c1746fdcbfd86b2d50f2500d7ccb0d1691 - -0231-pretty-print-support-URL-escape-sequences-PR-87488.patch -d26082357676a3c3843595dfe88a6c682b56e334 - -0232-Relax-store_bit_field-call-in-store_expr.patch -8b27c9052b8d191c98686e77d2fa610390c78f32 - -0233-Darwin-machopic-8-n-Back-out-part-of-PR71767-fix.patch -f922d945244558904be6868dc036c31fd05750dd - -0234-Add-expr_callee_abi.patch -63d25773e166e2e3babe626a5800e70939844754 - -0235-AArch64-Use-frame-reference-in-aarch64_layout_frame.patch -ab43763e519ed8efbbfdac801d008c338fbcb187 - -0236-AArch64-Add-an-assert-to-aarch64_layout_frame.patch -8e66b377a93e3fc371d0836768740d68ef8fffc5 - -0237-AArch64-Improve-poly_int-handling-in-aarch64_layout_.patch -9b17a646d90ad0cc30daf8432aa60ad0d751d914 - -0238-AArch64-Add-partial-SVE-vector-modes.patch -550a338052c374cb1f6c07ffd883c4046565fdd4 - -0239-AArch64-Fix-symbol-offset-limit.patch -7d3b27ff12610fde9d6c4b56abc70c6ee9b6b3db - -0240-AArch64-SVE2-Support-for-EOR3-and-variants-of-BSL.patch -2d57b12e2acd52b843adbcd6d5909cb0b9f7196b - -0241-re-PR-target-86753-gcc.target-aarch64-sve-vcond_-45-.patch -cc1facefe3b4e3b067d95291a7dba834b830ff18 - -0242-Pass-a-vec_info-to-get_vectype_for_scalar_type.patch -7ed54790da87bbb4a134020a9fb8bd1b72fd0acb - -0243-AArch64-Implement-__rndr-__rndrrs-intrinsics.patch -c5dc215df17071281c21450fa2d584e1161e4bc2 - -0244-re-PR-debug-90231-ivopts-causes-optimized-away-itera.patch -d9eabacb0483ac1f730112d551551c258365f02e - -0245-Add-a-simulate_builin_function_decl-langhook.patch -740785381ec9944c861dcc29b420c96aa933f040 - -0246-Add-a-simulate_enum_decl-langhook.patch -ac2cfa6cc35175311f92c25acbdd244f0f3bbb87 - -0247-AArch64-Handle-scalars-in-cmp-and-shift-immediate-qu.patch -6bc67182b6500b942674d6031c1bf0f02c779cbd - -0248-AArch64-Add-FFR-and-FFRT-registers.patch -183bfdafc6f1f98711c5400498a7268cc1441096 - -0249-AArch64-Extend-SVE-reverse-permutes-to-predicates.patch -28350fd1bee1e238e9c57b04c0796e1e17b659e4 - -0250-AArch64-Add-support-for-arm_sve.h.patch -624d0f07d51b7fa8bc99142bd0e8380fb9e7badc - -0251-AArch64-Add-support-for-the-SVE-PCS.patch -c600df9a4060da3c6121ff4d0b93f179eafd69d1 - -0252-AArch64-Add-main-SVE-ACLE-tests.patch -bc73c4c24daec96ad3e7ff904645c3095a4febe9 - -0253-Remove-cgraph_global_info.patch -a62bfab5d2a332925fcf10c45b4c5d8ca499439d - -0254-AArch64-Remove-unused-mode-iterators.patch -ffc111637291037e5546428275e39d8ca16d1fac - -0255-AArch64-Use-aarch64_sve_int_mode-in-SVE-ACLE-code.patch -86194087ce338c8d0073d905eb60dca654d6bba3 - -0256-Add-build_truth_vector_type_for_mode.patch -0a0ef2387cc1561d537d8d949aef9479ef17ba35 - -0257-AArch64-Add-FULL-to-SVE-mode-iterator-names.patch -f75cdd2c4e5282985a6fbdb2e72e17cb77782044 - -0258-LRA-handle-memory-constraints-that-accept-more-than-.patch -1aeffdce2dfe718e1337d75eb4f22c3c300df9bb - -0259-Handle-VIEW_CONVERT_EXPR-for-variable-length-vectors.patch -13c247d6f2a75b7e7a11546e897489716bc31506 - -0260-re-PR-target-90867-Multiplication-or-typecast-of-int.patch -94cdd3b7ceff688d039a9f134013ac9069df2e8c - -0261-re-PR-inline-asm-92615-ICE-in-extract_insn.patch -8d0d7a63019a7d67943d1867348673e3ca3dc824 - -0262-re-PR-tree-optimization-92645-Hand-written-vector-co.patch -1fa715db5490fb44668e0a37f9a5927d9030a50e - -0263-re-PR-tree-optimization-92690-vector-CTOR-optimizati.patch -88feafba3cb5b186d53080c4958474065c4bd5d2 - -0264-target.def-TARGET_VECTORIZE_BUILTIN_CONVERSION-Remov.patch -477daf831aea18923733772d686eb1ed448d96e7 - -0265-re-PR-tree-optimization-92645-Hand-written-vector-co.patch -78307657cf9675bc4aa2e77561c823834714b4c8 - -0266-re-PR-tree-optimization-92715-error-position-plus-si.patch -438d9c4afa635c7a1475feebbc220fe8d335c664 - -0267-re-PR-target-92758-r278833-breaks-gcc.target-powerpc.patch -577f4a0e5e7f7ef9b5729a3eed79e523cba9dfa9 - -0268-re-PR-tree-optimization-92803-error-type-mismatch-in.patch -a3408fa3fbf20455eb3b17b5c78397f9d66065c7 - -0269-Add-ARM-specific-Bfloat-format-support-to-middle-end.patch -d5ffd47e9a739770aa7ef5ad06c07fe9f16a3260 - -0270-re-PR-target-92904-varargs-for-__int128-is-placed-at.patch -46f3e52e834ab0c06902e7424e57513ee6a8aacd - -0271-AArch64-Enable-CLI-for-Armv8.6-a-armv8.6-a-i8mm-and-.patch -a93e1d5c70abe9fba3522318131a352fad0a4f48 - -0272-gcc-testsuite-ChangeLog.patch -9260fb066b7ed0b237a3300e05fca9bffe018c6b - -0273-Add-a-compatible_vector_types_p-target-hook.patch -482b2b43e5101921ad94e51e052a18b353f8a3f5 - -0274-AArch64-Specify-some-SVE-ACLE-functions-in-a-more-ge.patch -99a3b91535cb41807d62478cd769bc1bed0db5df - -0275-AArch64-Rename-SVE-shape-unary_count-to-unary_to_uin.patch -5b052959dcd2e9c390c7de34f806c4b22a66d8f7 - -0276-AArch64-Rename-UNSPEC_WHILE-to-match-instruction-mne.patch -6ad9571b172cd98099b477cba4efdd92c85bd222 - -0277-AArch64-Add-support-for-the-SVE2-ACLE.patch -0a09a9483825233f16e5b26bb0ffee76752339fc - -0278-config.gcc-Add-arm_bf16.h.patch -abbe1ed27355178223cd099fb73227f392416ea6 - -0279-aarch64.c-aarch64_invalid_conversion-New-function-fo.patch -9869896730f3055850034c05c596828d517fa9a2 - -0280-GCC-PATCH-AArch64-Add-ACLE-intrinsics-for-dot-produc.patch -8c197c851e7528baba7cb837f34c05ba2242f705 - -0281-GCC-PATCH-AArch64-Add-ACLE-intrinsics-for-bfdot-for-.patch -f275d73a57f1e5a07fbd4978f4b4457a5eaa1e39 - -0282-AArch64-Fix-shrinkwrapping-interactions-with-atomics.patch -e5e07b68187b9aa334519746c45b8cffc5eb7e5c - -0283-AArch64-Enable-CLI-for-Armv8.6-A-f64mm.patch -336e1b950db8b91027cdf0ab33bd905930d7f363 - -0284-AArch64-SVE-Implement-svld1ro-intrinsic.patch -9ceec73fc0e5033049704becef5d79001e31a245 - -0285-AArch64-Obvious-Correct-pattern-target-requirement.patch -568f0f355f259f58688dd73f749f4d80adc10e40 - -0286-AArch64-effective_target-for-aarch64-f64mm-asm.patch -3c9e580511e713068c0ea0d7b34f6e50ebf85447 - -0287-testsuite-Add-target-xfail-argument-to-check-functio.patch -4c33b2daeb5a87aedef77993971db1a1a1c291e6 - -0288-aarch64-Skip-some-SVE-ACLE-function-body-tests-for-I.patch -b02fbed15a36a86dda6a09a8dc237a8d288f6c09 - -0289-i386-Fix-ix86_fold_builtin-shift-folding-PR93418.patch -bff948aa337807260344c83ac9079d6386410094 - -0290-forwprop-Tweak-choice-of-VEC_PERM_EXPR-filler-PR9282.patch -1ee3b380dfb479b335f3b50039ce26abcbffe59a - -0291-SRA-Add-verification-of-accesses.patch -5b9e89c922dc2e7e8b8da644bd3a8917c16b22ac - -0292-SRA-Total-scalarization-after-access-propagation-PR9.patch -636e80eea24b780f1d5f4c14c58fc00001df8508 - -0293-aarch64-Fix-SVE-PCS-failures-for-BE-ILP32.patch -2171a9207f51bc486ed9c502cb4da706f594615e - -0294-aarch64-Add-Armv8.6-SVE-matrix-multiply-support.patch -3669677425f249c163201c4760d05abb3cf4e6bc - -0295-aarch64-Add-svbfloat16_t-support-to-arm_sve.h.patch -02fcd8ac408be56d2a6e67e2e09b26532862f233 - -0296-aarch64-Add-Armv8.6-SVE-bfloat16-support.patch -896dff99e18d67afdbe4d1effec20a3da474b22b - -0297-aarch64-ACLE-intrinsics-bfmmla-and-bfmlal-b-t.patch -f78335df69993a900512f92324cab6a20b1bde0c - -0298-aarch64-Add-an-extra-sbfiz-pattern-PR87763.patch -b65a1eb3fae53f2e1ea1ef8c1164f490d55855a1 - -0299-x86-64-Pass-aggregates-with-only-float-double-in-GPR.patch -ea5ca698dca15dc86b823661ac357a30b49dd0f6 - -0300-aarch64-ACLE-I8MM-multiply-accumulate-intrinsics.patch -40f648378061c170cf6a9ab680af01b3a3a83569 - -0301-i386-Skip-ENDBR32-at-the-target-function-entry.patch -1d69147af203d4dcd2270429f90c93f1a37ddfff - -0302-testsuite-Fix-recently-added-ipa-testcases-PR93763.patch -103bc4db7665a03bf2390ccc8ceca0dc5a7a81b7 - -0303-aarch64-Add-bfloat16-vdup-and-vreinterpret-ACLE-intr.patch -8ea6c1b89a20ef7c675535ba1994355361dac977 - -0304-aarch64-Add-bfloat16-vldn-vstn-intrinsics.patch -e603cd43b145c426468c95cf85b3c12c94daedaa - -0305-aarch64-ACLE-intrinsics-for-BFCVTN-BFCVTN2-and-BFCVT.patch -1f520d3412962e22b0338461d82f41abba8a4f12 - -0306-testsuite-Fix-misquoted-string-in-bfcvt-nosimd.c.patch -db3fa3476e9e922ca3e283df03ebd14be7220b6e - -0307-aarch64-Fix-bf16_v-ld-st-n.c-failures-for-big-endian.patch -cf9c3bff39cf973c5c8621ff44199dcb831193a7 - -0308-testsuite-Fix-gcc.target-aarch64-advsimd-intrinsics-.patch -58a703f0726b3bb6c5ac8b600369106985906590 - -0309-cleanup-graphite-results.patch -1acde74cf611f560172c74324610c29ca81edf94 - -diff --git a/gcc/Makefile.in b/gcc/Makefile.in -index bc188bbed..46ba89598 100644 ---- a/gcc/Makefile.in -+++ b/gcc/Makefile.in -@@ -1239,7 +1239,6 @@ OBJS = \ - auto-profile.o \ - bb-reorder.o \ - bitmap.o \ -- bt-load.o \ - builtins.o \ - caller-save.o \ - calls.o \ -@@ -1305,6 +1304,7 @@ OBJS = \ - fold-const.o \ - fold-const-call.o \ - function.o \ -+ function-abi.o \ - function-tests.o \ - fwprop.o \ - gcc-rich-location.o \ -@@ -2522,6 +2522,7 @@ GTFILES = $(CPPLIB_H) $(srcdir)/input.h $(srcdir)/coretypes.h \ - $(srcdir)/libfuncs.h $(SYMTAB_H) \ - $(srcdir)/real.h $(srcdir)/function.h $(srcdir)/insn-addr.h $(srcdir)/hwint.h \ - $(srcdir)/fixed-value.h \ -+ $(srcdir)/function-abi.h \ - $(srcdir)/output.h $(srcdir)/cfgloop.h $(srcdir)/cfg.h $(srcdir)/profile-count.h \ - $(srcdir)/cselib.h $(srcdir)/basic-block.h $(srcdir)/ipa-ref.h $(srcdir)/cgraph.h \ - $(srcdir)/reload.h $(srcdir)/caller-save.c $(srcdir)/symtab.c \ -diff --git a/gcc/alias.c b/gcc/alias.c -index 053c3494e..1a60f905a 100644 ---- a/gcc/alias.c -+++ b/gcc/alias.c -@@ -1572,16 +1572,6 @@ record_set (rtx dest, const_rtx set, void *data ATTRIBUTE_UNUSED) - new_reg_base_value[regno] = 0; - return; - } -- /* A CLOBBER_HIGH only wipes out the old value if the mode of the old -- value is greater than that of the clobber. */ -- else if (GET_CODE (set) == CLOBBER_HIGH) -- { -- if (new_reg_base_value[regno] != 0 -- && reg_is_clobbered_by_clobber_high ( -- regno, GET_MODE (new_reg_base_value[regno]), XEXP (set, 0))) -- new_reg_base_value[regno] = 0; -- return; -- } - - src = SET_SRC (set); - } -@@ -3284,7 +3274,8 @@ memory_modified_in_insn_p (const_rtx mem, const_rtx insn) - if (CALL_P (insn)) - return true; - memory_modified = false; -- note_stores (PATTERN (insn), memory_modified_1, CONST_CAST_RTX(mem)); -+ note_stores (as_a (insn), memory_modified_1, -+ CONST_CAST_RTX(mem)); - return memory_modified; - } - -@@ -3412,7 +3403,7 @@ init_alias_analysis (void) - && find_reg_note (insn, REG_NOALIAS, NULL_RTX)) - record_set (SET_DEST (PATTERN (insn)), NULL_RTX, NULL); - else -- note_stores (PATTERN (insn), record_set, NULL); -+ note_stores (insn, record_set, NULL); - - set = single_set (insn); - -diff --git a/gcc/array-traits.h b/gcc/array-traits.h -new file mode 100644 -index 000000000..eb65ede94 ---- /dev/null -+++ b/gcc/array-traits.h -@@ -0,0 +1,48 @@ -+/* Descriptions of array-like objects. -+ Copyright (C) 2019 Free Software Foundation, Inc. -+ -+This file is part of GCC. -+ -+GCC is free software; you can redistribute it and/or modify it under -+the terms of the GNU General Public License as published by the Free -+Software Foundation; either version 3, or (at your option) any later -+version. -+ -+GCC is distributed in the hope that it will be useful, but WITHOUT ANY -+WARRANTY; without even the implied warranty of MERCHANTABILITY or -+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+for more details. -+ -+You should have received a copy of the GNU General Public License -+along with GCC; see the file COPYING3. If not see -+. */ -+ -+#ifndef GCC_ARRAY_TRAITS_H -+#define GCC_ARRAY_TRAITS_H -+ -+/* Implementation for single integers (and similar types). */ -+template -+struct scalar_array_traits -+{ -+ typedef T element_type; -+ static const bool has_constant_size = true; -+ static const size_t constant_size = 1; -+ static const T *base (const T &x) { return &x; } -+ static size_t size (const T &) { return 1; } -+}; -+ -+template -+struct array_traits : scalar_array_traits {}; -+ -+/* Implementation for arrays with a static size. */ -+template -+struct array_traits -+{ -+ typedef T element_type; -+ static const bool has_constant_size = true; -+ static const size_t constant_size = N; -+ static const T *base (const T (&x)[N]) { return x; } -+ static size_t size (const T (&x)[N]) { return N; } -+}; -+ -+#endif -diff --git a/gcc/attribs.c b/gcc/attribs.c -index 444192254..d447ea0e4 100644 ---- a/gcc/attribs.c -+++ b/gcc/attribs.c -@@ -691,6 +691,7 @@ decl_attributes (tree *node, tree attributes, int flags, - - if (!built_in - || !DECL_P (*anode) -+ || DECL_BUILT_IN_CLASS (*anode) != BUILT_IN_NORMAL - || (DECL_FUNCTION_CODE (*anode) != BUILT_IN_UNREACHABLE - && (DECL_FUNCTION_CODE (*anode) - != BUILT_IN_UBSAN_HANDLE_BUILTIN_UNREACHABLE))) -diff --git a/gcc/bitmap.c b/gcc/bitmap.c -index 5a8236de7..911d506f3 100644 ---- a/gcc/bitmap.c -+++ b/gcc/bitmap.c -@@ -958,17 +958,17 @@ bitmap_set_bit (bitmap head, int bit) - /* Return whether a bit is set within a bitmap. */ - - int --bitmap_bit_p (bitmap head, int bit) -+bitmap_bit_p (const_bitmap head, int bit) - { - unsigned int indx = bit / BITMAP_ELEMENT_ALL_BITS; -- bitmap_element *ptr; -+ const bitmap_element *ptr; - unsigned bit_num; - unsigned word_num; - - if (!head->tree_form) -- ptr = bitmap_list_find_element (head, indx); -+ ptr = bitmap_list_find_element (const_cast (head), indx); - else -- ptr = bitmap_tree_find_element (head, indx); -+ ptr = bitmap_tree_find_element (const_cast (head), indx); - if (ptr == 0) - return 0; - -diff --git a/gcc/bitmap.h b/gcc/bitmap.h -index ed25c1ee5..7217f9e0a 100644 ---- a/gcc/bitmap.h -+++ b/gcc/bitmap.h -@@ -210,6 +210,7 @@ along with GCC; see the file COPYING3. If not see - on which many random-access membership tests will happen. */ - - #include "obstack.h" -+#include "array-traits.h" - - /* Bitmap memory usage. */ - struct bitmap_usage: public mem_usage -@@ -418,7 +419,7 @@ extern bool bitmap_clear_bit (bitmap, int); - extern bool bitmap_set_bit (bitmap, int); - - /* Return true if a bit is set in a bitmap. */ --extern int bitmap_bit_p (bitmap, int); -+extern int bitmap_bit_p (const_bitmap, int); - - /* Debug functions to print a bitmap. */ - extern void debug_bitmap (const_bitmap); -@@ -937,4 +938,123 @@ class auto_bitmap - bitmap_head m_bits; - }; - -+/* Base class for bitmap_view; see there for details. */ -+template > -+class base_bitmap_view -+{ -+public: -+ typedef typename Traits::element_type array_element_type; -+ -+ base_bitmap_view (const T &, bitmap_element *); -+ operator const_bitmap () const { return &m_head; } -+ -+private: -+ base_bitmap_view (const base_bitmap_view &); -+ -+ bitmap_head m_head; -+}; -+ -+/* Provides a read-only bitmap view of a single integer bitmask or a -+ constant-sized array of integer bitmasks, or of a wrapper around such -+ bitmasks. */ -+template -+class bitmap_view : public base_bitmap_view -+{ -+public: -+ bitmap_view (const T &array) -+ : base_bitmap_view (array, m_bitmap_elements) {} -+ -+private: -+ /* How many bitmap_elements we need to hold a full T. */ -+ static const size_t num_bitmap_elements -+ = CEIL (CHAR_BIT -+ * sizeof (typename Traits::element_type) -+ * Traits::constant_size, -+ BITMAP_ELEMENT_ALL_BITS); -+ bitmap_element m_bitmap_elements[num_bitmap_elements]; -+}; -+ -+/* Initialize the view for array ARRAY, using the array of bitmap -+ elements in BITMAP_ELEMENTS (which is known to contain enough -+ entries). */ -+template -+base_bitmap_view::base_bitmap_view (const T &array, -+ bitmap_element *bitmap_elements) -+{ -+ m_head.obstack = NULL; -+ -+ /* The code currently assumes that each element of ARRAY corresponds -+ to exactly one bitmap_element. */ -+ const size_t array_element_bits = CHAR_BIT * sizeof (array_element_type); -+ STATIC_ASSERT (BITMAP_ELEMENT_ALL_BITS % array_element_bits == 0); -+ size_t array_step = BITMAP_ELEMENT_ALL_BITS / array_element_bits; -+ size_t array_size = Traits::size (array); -+ -+ /* Process each potential bitmap_element in turn. The loop is written -+ this way rather than per array element because usually there are -+ only a small number of array elements per bitmap element (typically -+ two or four). The inner loops should therefore unroll completely. */ -+ const array_element_type *array_elements = Traits::base (array); -+ unsigned int indx = 0; -+ for (size_t array_base = 0; -+ array_base < array_size; -+ array_base += array_step, indx += 1) -+ { -+ /* How many array elements are in this particular bitmap_element. */ -+ unsigned int array_count -+ = (STATIC_CONSTANT_P (array_size % array_step == 0) -+ ? array_step : MIN (array_step, array_size - array_base)); -+ -+ /* See whether we need this bitmap element. */ -+ array_element_type ior = array_elements[array_base]; -+ for (size_t i = 1; i < array_count; ++i) -+ ior |= array_elements[array_base + i]; -+ if (ior == 0) -+ continue; -+ -+ /* Grab the next bitmap element and chain it. */ -+ bitmap_element *bitmap_element = bitmap_elements++; -+ if (m_head.current) -+ m_head.current->next = bitmap_element; -+ else -+ m_head.first = bitmap_element; -+ bitmap_element->prev = m_head.current; -+ bitmap_element->next = NULL; -+ bitmap_element->indx = indx; -+ m_head.current = bitmap_element; -+ m_head.indx = indx; -+ -+ /* Fill in the bits of the bitmap element. */ -+ if (array_element_bits < BITMAP_WORD_BITS) -+ { -+ /* Multiple array elements fit in one element of -+ bitmap_element->bits. */ -+ size_t array_i = array_base; -+ for (unsigned int word_i = 0; word_i < BITMAP_ELEMENT_WORDS; -+ ++word_i) -+ { -+ BITMAP_WORD word = 0; -+ for (unsigned int shift = 0; -+ shift < BITMAP_WORD_BITS && array_i < array_size; -+ shift += array_element_bits) -+ word |= array_elements[array_i++] << shift; -+ bitmap_element->bits[word_i] = word; -+ } -+ } -+ else -+ { -+ /* Array elements are the same size as elements of -+ bitmap_element->bits, or are an exact multiple of that size. */ -+ unsigned int word_i = 0; -+ for (unsigned int i = 0; i < array_count; ++i) -+ for (unsigned int shift = 0; shift < array_element_bits; -+ shift += BITMAP_WORD_BITS) -+ bitmap_element->bits[word_i++] -+ = array_elements[array_base + i] >> shift; -+ while (word_i < BITMAP_ELEMENT_WORDS) -+ bitmap_element->bits[word_i++] = 0; -+ } -+ } -+} -+ - #endif /* GCC_BITMAP_H */ -diff --git a/gcc/bt-load.c b/gcc/bt-load.c -deleted file mode 100644 -index f68879ca4..000000000 ---- a/gcc/bt-load.c -+++ /dev/null -@@ -1,1577 +0,0 @@ --/* Perform branch target register load optimizations. -- Copyright (C) 2001-2019 Free Software Foundation, Inc. -- --This file is part of GCC. -- --GCC is free software; you can redistribute it and/or modify it under --the terms of the GNU General Public License as published by the Free --Software Foundation; either version 3, or (at your option) any later --version. -- --GCC is distributed in the hope that it will be useful, but WITHOUT ANY --WARRANTY; without even the implied warranty of MERCHANTABILITY or --FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --for more details. -- --You should have received a copy of the GNU General Public License --along with GCC; see the file COPYING3. If not see --. */ -- --#include "config.h" --#include "system.h" --#include "coretypes.h" --#include "backend.h" --#include "target.h" --#include "rtl.h" --#include "tree.h" --#include "df.h" --#include "insn-config.h" --#include "regs.h" --#include "memmodel.h" --#include "emit-rtl.h" --#include "recog.h" --#include "diagnostic-core.h" --#include "expr.h" --#include "insn-attr.h" --#include "tree-pass.h" --#include "cfgrtl.h" --#include "cfganal.h" --#include "cfgcleanup.h" --#include "cfgloop.h" --#include "rtl-iter.h" --#include "fibonacci_heap.h" -- --struct btr_def; -- --/* Target register optimizations - these are performed after reload. */ -- --struct btr_def_group --{ -- btr_def_group *next; -- rtx src; -- btr_def *members; --}; -- --struct btr_user --{ -- btr_user *next; -- basic_block bb; -- int luid; -- rtx_insn *insn; -- /* If INSN has a single use of a single branch register, then -- USE points to it within INSN. If there is more than -- one branch register use, or the use is in some way ambiguous, -- then USE is NULL. */ -- rtx use; -- int n_reaching_defs; -- int first_reaching_def; -- char other_use_this_block; --}; -- --/* btr_def structs appear on three lists: -- 1. A list of all btr_def structures (head is -- ALL_BTR_DEFS, linked by the NEXT field). -- 2. A list of branch reg definitions per basic block (head is -- BB_BTR_DEFS[i], linked by the NEXT_THIS_BB field). -- 3. A list of all branch reg definitions belonging to the same -- group (head is in a BTR_DEF_GROUP struct, linked by -- NEXT_THIS_GROUP field). */ -- --struct btr_def --{ -- btr_def *next_this_bb; -- btr_def *next_this_group; -- basic_block bb; -- int luid; -- rtx_insn *insn; -- int btr; -- int cost; -- /* For a branch register setting insn that has a constant -- source (i.e. a label), group links together all the -- insns with the same source. For other branch register -- setting insns, group is NULL. */ -- btr_def_group *group; -- btr_user *uses; -- /* If this def has a reaching use which is not a simple use -- in a branch instruction, then has_ambiguous_use will be true, -- and we will not attempt to migrate this definition. */ -- char has_ambiguous_use; -- /* live_range is an approximation to the true live range for this -- def/use web, because it records the set of blocks that contain -- the live range. There could be other live ranges for the same -- branch register in that set of blocks, either in the block -- containing the def (before the def), or in a block containing -- a use (after the use). If there are such other live ranges, then -- other_btr_uses_before_def or other_btr_uses_after_use must be set true -- as appropriate. */ -- char other_btr_uses_before_def; -- char other_btr_uses_after_use; -- /* We set own_end when we have moved a definition into a dominator. -- Thus, when a later combination removes this definition again, we know -- to clear out trs_live_at_end again. */ -- char own_end; -- bitmap live_range; --}; -- --typedef fibonacci_heap btr_heap_t; --typedef fibonacci_node btr_heap_node_t; -- --static int issue_rate; -- --static int basic_block_freq (const_basic_block); --static int insn_sets_btr_p (const rtx_insn *, int, int *); --static void find_btr_def_group (btr_def_group **, btr_def *); --static btr_def *add_btr_def (btr_heap_t *, basic_block, int, rtx_insn *, -- unsigned int, int, btr_def_group **); --static btr_user *new_btr_user (basic_block, int, rtx_insn *); --static void dump_hard_reg_set (HARD_REG_SET); --static void dump_btrs_live (int); --static void note_other_use_this_block (unsigned int, btr_user *); --static void compute_defs_uses_and_gen (btr_heap_t *, btr_def **, btr_user **, -- sbitmap *, sbitmap *, HARD_REG_SET *); --static void compute_kill (sbitmap *, sbitmap *, HARD_REG_SET *); --static void compute_out (sbitmap *bb_out, sbitmap *, sbitmap *, int); --static void link_btr_uses (btr_def **, btr_user **, sbitmap *, sbitmap *, int); --static void build_btr_def_use_webs (btr_heap_t *); --static int block_at_edge_of_live_range_p (int, btr_def *); --static void clear_btr_from_live_range (btr_def *def); --static void add_btr_to_live_range (btr_def *, int); --static void augment_live_range (bitmap, HARD_REG_SET *, basic_block, -- basic_block, int); --static int choose_btr (HARD_REG_SET); --static void combine_btr_defs (btr_def *, HARD_REG_SET *); --static void btr_def_live_range (btr_def *, HARD_REG_SET *); --static void move_btr_def (basic_block, int, btr_def *, bitmap, HARD_REG_SET *); --static int migrate_btr_def (btr_def *, int); --static void migrate_btr_defs (enum reg_class, int); --static int can_move_up (const_basic_block, const rtx_insn *, int); --static void note_btr_set (rtx, const_rtx, void *); -- --/* The following code performs code motion of target load instructions -- (instructions that set branch target registers), to move them -- forward away from the branch instructions and out of loops (or, -- more generally, from a more frequently executed place to a less -- frequently executed place). -- Moving target load instructions further in front of the branch -- instruction that uses the target register value means that the hardware -- has a better chance of preloading the instructions at the branch -- target by the time the branch is reached. This avoids bubbles -- when a taken branch needs to flush out the pipeline. -- Moving target load instructions out of loops means they are executed -- less frequently. */ -- --/* An obstack to hold the def-use web data structures built up for -- migrating branch target load instructions. */ --static struct obstack migrate_btrl_obstack; -- --/* Array indexed by basic block number, giving the set of registers -- live in that block. */ --static HARD_REG_SET *btrs_live; -- --/* Array indexed by basic block number, giving the set of registers live at -- the end of that block, including any uses by a final jump insn, if any. */ --static HARD_REG_SET *btrs_live_at_end; -- --/* Set of all target registers that we are willing to allocate. */ --static HARD_REG_SET all_btrs; -- --/* Provide lower and upper bounds for target register numbers, so that -- we don't need to search through all the hard registers all the time. */ --static int first_btr, last_btr; -- -- -- --/* Return an estimate of the frequency of execution of block bb. */ --static int --basic_block_freq (const_basic_block bb) --{ -- return bb->count.to_frequency (cfun); --} -- --/* If the rtx at *XP references (sets or reads) any branch target -- register, return one such register. If EXCLUDEP is set, disregard -- any references within that location. */ --static rtx * --find_btr_use (rtx *xp, rtx *excludep = 0) --{ -- subrtx_ptr_iterator::array_type array; -- FOR_EACH_SUBRTX_PTR (iter, array, xp, NONCONST) -- { -- rtx *loc = *iter; -- if (loc == excludep) -- iter.skip_subrtxes (); -- else -- { -- const_rtx x = *loc; -- if (REG_P (x) -- && overlaps_hard_reg_set_p (all_btrs, GET_MODE (x), REGNO (x))) -- return loc; -- } -- } -- return 0; --} -- --/* Return true if insn is an instruction that sets a target register. -- if CHECK_CONST is true, only return true if the source is constant. -- If such a set is found and REGNO is nonzero, assign the register number -- of the destination register to *REGNO. */ --static int --insn_sets_btr_p (const rtx_insn *insn, int check_const, int *regno) --{ -- rtx set; -- -- if (NONJUMP_INSN_P (insn) -- && (set = single_set (insn))) -- { -- rtx dest = SET_DEST (set); -- rtx src = SET_SRC (set); -- -- if (GET_CODE (dest) == SUBREG) -- dest = XEXP (dest, 0); -- -- if (REG_P (dest) -- && TEST_HARD_REG_BIT (all_btrs, REGNO (dest))) -- { -- gcc_assert (!find_btr_use (&src)); -- -- if (!check_const || CONSTANT_P (src)) -- { -- if (regno) -- *regno = REGNO (dest); -- return 1; -- } -- } -- } -- return 0; --} -- --/* Find the group that the target register definition DEF belongs -- to in the list starting with *ALL_BTR_DEF_GROUPS. If no such -- group exists, create one. Add def to the group. */ --static void --find_btr_def_group (btr_def_group **all_btr_def_groups, btr_def *def) --{ -- if (insn_sets_btr_p (def->insn, 1, NULL)) -- { -- btr_def_group *this_group; -- rtx def_src = SET_SRC (single_set (def->insn)); -- -- /* ?? This linear search is an efficiency concern, particularly -- as the search will almost always fail to find a match. */ -- for (this_group = *all_btr_def_groups; -- this_group != NULL; -- this_group = this_group->next) -- if (rtx_equal_p (def_src, this_group->src)) -- break; -- -- if (!this_group) -- { -- this_group = XOBNEW (&migrate_btrl_obstack, btr_def_group); -- this_group->src = def_src; -- this_group->members = NULL; -- this_group->next = *all_btr_def_groups; -- *all_btr_def_groups = this_group; -- } -- def->group = this_group; -- def->next_this_group = this_group->members; -- this_group->members = def; -- } -- else -- def->group = NULL; --} -- --/* Create a new target register definition structure, for a definition in -- block BB, instruction INSN, and insert it into ALL_BTR_DEFS. Return -- the new definition. */ --static btr_def * --add_btr_def (btr_heap_t *all_btr_defs, basic_block bb, int insn_luid, -- rtx_insn *insn, -- unsigned int dest_reg, int other_btr_uses_before_def, -- btr_def_group **all_btr_def_groups) --{ -- btr_def *this_def = XOBNEW (&migrate_btrl_obstack, btr_def); -- this_def->bb = bb; -- this_def->luid = insn_luid; -- this_def->insn = insn; -- this_def->btr = dest_reg; -- this_def->cost = basic_block_freq (bb); -- this_def->has_ambiguous_use = 0; -- this_def->other_btr_uses_before_def = other_btr_uses_before_def; -- this_def->other_btr_uses_after_use = 0; -- this_def->next_this_bb = NULL; -- this_def->next_this_group = NULL; -- this_def->uses = NULL; -- this_def->live_range = NULL; -- find_btr_def_group (all_btr_def_groups, this_def); -- -- all_btr_defs->insert (-this_def->cost, this_def); -- -- if (dump_file) -- fprintf (dump_file, -- "Found target reg definition: sets %u { bb %d, insn %d }%s priority %d\n", -- dest_reg, bb->index, INSN_UID (insn), -- (this_def->group ? "" : ":not const"), this_def->cost); -- -- return this_def; --} -- --/* Create a new target register user structure, for a use in block BB, -- instruction INSN. Return the new user. */ --static btr_user * --new_btr_user (basic_block bb, int insn_luid, rtx_insn *insn) --{ -- /* This instruction reads target registers. We need -- to decide whether we can replace all target register -- uses easily. -- */ -- rtx *usep = find_btr_use (&PATTERN (insn)); -- rtx use; -- btr_user *user = NULL; -- -- if (usep) -- { -- int unambiguous_single_use; -- -- /* We want to ensure that USE is the only use of a target -- register in INSN, so that we know that to rewrite INSN to use -- a different target register, all we have to do is replace USE. */ -- unambiguous_single_use = !find_btr_use (&PATTERN (insn), usep); -- if (!unambiguous_single_use) -- usep = NULL; -- } -- use = usep ? *usep : NULL_RTX; -- user = XOBNEW (&migrate_btrl_obstack, btr_user); -- user->bb = bb; -- user->luid = insn_luid; -- user->insn = insn; -- user->use = use; -- user->other_use_this_block = 0; -- user->next = NULL; -- user->n_reaching_defs = 0; -- user->first_reaching_def = -1; -- -- if (dump_file) -- { -- fprintf (dump_file, "Uses target reg: { bb %d, insn %d }", -- bb->index, INSN_UID (insn)); -- -- if (user->use) -- fprintf (dump_file, ": unambiguous use of reg %d\n", -- REGNO (user->use)); -- } -- -- return user; --} -- --/* Write the contents of S to the dump file. */ --static void --dump_hard_reg_set (HARD_REG_SET s) --{ -- int reg; -- for (reg = 0; reg < FIRST_PSEUDO_REGISTER; reg++) -- if (TEST_HARD_REG_BIT (s, reg)) -- fprintf (dump_file, " %d", reg); --} -- --/* Write the set of target regs live in block BB to the dump file. */ --static void --dump_btrs_live (int bb) --{ -- fprintf (dump_file, "BB%d live:", bb); -- dump_hard_reg_set (btrs_live[bb]); -- fprintf (dump_file, "\n"); --} -- --/* REGNO is the number of a branch target register that is being used or -- set. USERS_THIS_BB is a list of preceding branch target register users; -- If any of them use the same register, set their other_use_this_block -- flag. */ --static void --note_other_use_this_block (unsigned int regno, btr_user *users_this_bb) --{ -- btr_user *user; -- -- for (user = users_this_bb; user != NULL; user = user->next) -- if (user->use && REGNO (user->use) == regno) -- user->other_use_this_block = 1; --} -- --struct defs_uses_info { -- btr_user *users_this_bb; -- HARD_REG_SET btrs_written_in_block; -- HARD_REG_SET btrs_live_in_block; -- sbitmap bb_gen; -- sbitmap *btr_defset; --}; -- --/* Called via note_stores or directly to register stores into / -- clobbers of a branch target register DEST that are not recognized as -- straightforward definitions. DATA points to information about the -- current basic block that needs updating. */ --static void --note_btr_set (rtx dest, const_rtx set ATTRIBUTE_UNUSED, void *data) --{ -- defs_uses_info *info = (defs_uses_info *) data; -- int regno, end_regno; -- -- if (!REG_P (dest)) -- return; -- regno = REGNO (dest); -- end_regno = END_REGNO (dest); -- for (; regno < end_regno; regno++) -- if (TEST_HARD_REG_BIT (all_btrs, regno)) -- { -- note_other_use_this_block (regno, info->users_this_bb); -- SET_HARD_REG_BIT (info->btrs_written_in_block, regno); -- SET_HARD_REG_BIT (info->btrs_live_in_block, regno); -- bitmap_and_compl (info->bb_gen, info->bb_gen, -- info->btr_defset[regno - first_btr]); -- } --} -- --static void --compute_defs_uses_and_gen (btr_heap_t *all_btr_defs, btr_def **def_array, -- btr_user **use_array, sbitmap *btr_defset, -- sbitmap *bb_gen, HARD_REG_SET *btrs_written) --{ -- /* Scan the code building up the set of all defs and all uses. -- For each target register, build the set of defs of that register. -- For each block, calculate the set of target registers -- written in that block. -- Also calculate the set of btrs ever live in that block. -- */ -- int i; -- int insn_luid = 0; -- btr_def_group *all_btr_def_groups = NULL; -- defs_uses_info info; -- -- bitmap_vector_clear (bb_gen, last_basic_block_for_fn (cfun)); -- for (i = NUM_FIXED_BLOCKS; i < last_basic_block_for_fn (cfun); i++) -- { -- basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i); -- int reg; -- btr_def *defs_this_bb = NULL; -- rtx_insn *insn; -- rtx_insn *last; -- int can_throw = 0; -- -- info.users_this_bb = NULL; -- info.bb_gen = bb_gen[i]; -- info.btr_defset = btr_defset; -- -- CLEAR_HARD_REG_SET (info.btrs_live_in_block); -- CLEAR_HARD_REG_SET (info.btrs_written_in_block); -- for (reg = first_btr; reg <= last_btr; reg++) -- if (TEST_HARD_REG_BIT (all_btrs, reg) -- && REGNO_REG_SET_P (df_get_live_in (bb), reg)) -- SET_HARD_REG_BIT (info.btrs_live_in_block, reg); -- -- for (insn = BB_HEAD (bb), last = NEXT_INSN (BB_END (bb)); -- insn != last; -- insn = NEXT_INSN (insn), insn_luid++) -- { -- if (INSN_P (insn)) -- { -- int regno; -- int insn_uid = INSN_UID (insn); -- -- if (insn_sets_btr_p (insn, 0, ®no)) -- { -- btr_def *def = add_btr_def ( -- all_btr_defs, bb, insn_luid, insn, regno, -- TEST_HARD_REG_BIT (info.btrs_live_in_block, regno), -- &all_btr_def_groups); -- -- def_array[insn_uid] = def; -- SET_HARD_REG_BIT (info.btrs_written_in_block, regno); -- SET_HARD_REG_BIT (info.btrs_live_in_block, regno); -- bitmap_and_compl (bb_gen[i], bb_gen[i], -- btr_defset[regno - first_btr]); -- bitmap_set_bit (bb_gen[i], insn_uid); -- def->next_this_bb = defs_this_bb; -- defs_this_bb = def; -- bitmap_set_bit (btr_defset[regno - first_btr], insn_uid); -- note_other_use_this_block (regno, info.users_this_bb); -- } -- /* Check for the blockage emitted by expand_nl_goto_receiver. */ -- else if (cfun->has_nonlocal_label -- && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE) -- { -- btr_user *user; -- -- /* Do the equivalent of calling note_other_use_this_block -- for every target register. */ -- for (user = info.users_this_bb; user != NULL; -- user = user->next) -- if (user->use) -- user->other_use_this_block = 1; -- IOR_HARD_REG_SET (info.btrs_written_in_block, all_btrs); -- IOR_HARD_REG_SET (info.btrs_live_in_block, all_btrs); -- bitmap_clear (info.bb_gen); -- } -- else -- { -- if (find_btr_use (&PATTERN (insn))) -- { -- btr_user *user = new_btr_user (bb, insn_luid, insn); -- -- use_array[insn_uid] = user; -- if (user->use) -- SET_HARD_REG_BIT (info.btrs_live_in_block, -- REGNO (user->use)); -- else -- { -- int reg; -- for (reg = first_btr; reg <= last_btr; reg++) -- if (TEST_HARD_REG_BIT (all_btrs, reg) -- && refers_to_regno_p (reg, user->insn)) -- { -- note_other_use_this_block (reg, -- info.users_this_bb); -- SET_HARD_REG_BIT (info.btrs_live_in_block, reg); -- } -- note_stores (PATTERN (insn), note_btr_set, &info); -- } -- user->next = info.users_this_bb; -- info.users_this_bb = user; -- } -- if (CALL_P (insn)) -- { -- HARD_REG_SET *clobbered = &call_used_reg_set; -- HARD_REG_SET call_saved; -- rtx pat = PATTERN (insn); -- int i; -- -- /* Check for sibcall. */ -- if (GET_CODE (pat) == PARALLEL) -- for (i = XVECLEN (pat, 0) - 1; i >= 0; i--) -- if (ANY_RETURN_P (XVECEXP (pat, 0, i))) -- { -- COMPL_HARD_REG_SET (call_saved, -- call_used_reg_set); -- clobbered = &call_saved; -- } -- -- for (regno = first_btr; regno <= last_btr; regno++) -- if (TEST_HARD_REG_BIT (*clobbered, regno)) -- note_btr_set (regno_reg_rtx[regno], NULL_RTX, &info); -- } -- } -- } -- } -- -- COPY_HARD_REG_SET (btrs_live[i], info.btrs_live_in_block); -- COPY_HARD_REG_SET (btrs_written[i], info.btrs_written_in_block); -- -- REG_SET_TO_HARD_REG_SET (btrs_live_at_end[i], df_get_live_out (bb)); -- /* If this block ends in a jump insn, add any uses or even clobbers -- of branch target registers that it might have. */ -- for (insn = BB_END (bb); insn != BB_HEAD (bb) && ! INSN_P (insn); ) -- insn = PREV_INSN (insn); -- /* ??? for the fall-through edge, it would make sense to insert the -- btr set on the edge, but that would require to split the block -- early on so that we can distinguish between dominance from the fall -- through edge - which can use the call-clobbered registers - from -- dominance by the throw edge. */ -- if (can_throw_internal (insn)) -- { -- HARD_REG_SET tmp; -- -- COPY_HARD_REG_SET (tmp, call_used_reg_set); -- AND_HARD_REG_SET (tmp, all_btrs); -- IOR_HARD_REG_SET (btrs_live_at_end[i], tmp); -- can_throw = 1; -- } -- if (can_throw || JUMP_P (insn)) -- { -- int regno; -- -- for (regno = first_btr; regno <= last_btr; regno++) -- if (refers_to_regno_p (regno, insn)) -- SET_HARD_REG_BIT (btrs_live_at_end[i], regno); -- } -- -- if (dump_file) -- dump_btrs_live (i); -- } --} -- --static void --compute_kill (sbitmap *bb_kill, sbitmap *btr_defset, -- HARD_REG_SET *btrs_written) --{ -- int i; -- int regno; -- -- /* For each basic block, form the set BB_KILL - the set -- of definitions that the block kills. */ -- bitmap_vector_clear (bb_kill, last_basic_block_for_fn (cfun)); -- for (i = NUM_FIXED_BLOCKS; i < last_basic_block_for_fn (cfun); i++) -- { -- for (regno = first_btr; regno <= last_btr; regno++) -- if (TEST_HARD_REG_BIT (all_btrs, regno) -- && TEST_HARD_REG_BIT (btrs_written[i], regno)) -- bitmap_ior (bb_kill[i], bb_kill[i], -- btr_defset[regno - first_btr]); -- } --} -- --static void --compute_out (sbitmap *bb_out, sbitmap *bb_gen, sbitmap *bb_kill, int max_uid) --{ -- /* Perform iterative dataflow: -- Initially, for all blocks, BB_OUT = BB_GEN. -- For each block, -- BB_IN = union over predecessors of BB_OUT(pred) -- BB_OUT = (BB_IN - BB_KILL) + BB_GEN -- Iterate until the bb_out sets stop growing. */ -- int i; -- int changed; -- auto_sbitmap bb_in (max_uid); -- -- for (i = NUM_FIXED_BLOCKS; i < last_basic_block_for_fn (cfun); i++) -- bitmap_copy (bb_out[i], bb_gen[i]); -- -- changed = 1; -- while (changed) -- { -- changed = 0; -- for (i = NUM_FIXED_BLOCKS; i < last_basic_block_for_fn (cfun); i++) -- { -- bitmap_union_of_preds (bb_in, bb_out, BASIC_BLOCK_FOR_FN (cfun, i)); -- changed |= bitmap_ior_and_compl (bb_out[i], bb_gen[i], -- bb_in, bb_kill[i]); -- } -- } --} -- --static void --link_btr_uses (btr_def **def_array, btr_user **use_array, sbitmap *bb_out, -- sbitmap *btr_defset, int max_uid) --{ -- int i; -- auto_sbitmap reaching_defs (max_uid); -- -- /* Link uses to the uses lists of all of their reaching defs. -- Count up the number of reaching defs of each use. */ -- for (i = NUM_FIXED_BLOCKS; i < last_basic_block_for_fn (cfun); i++) -- { -- basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i); -- rtx_insn *insn; -- rtx_insn *last; -- -- bitmap_union_of_preds (reaching_defs, bb_out, BASIC_BLOCK_FOR_FN (cfun, i)); -- for (insn = BB_HEAD (bb), last = NEXT_INSN (BB_END (bb)); -- insn != last; -- insn = NEXT_INSN (insn)) -- { -- if (INSN_P (insn)) -- { -- int insn_uid = INSN_UID (insn); -- -- btr_def *def = def_array[insn_uid]; -- btr_user *user = use_array[insn_uid]; -- if (def != NULL) -- { -- /* Remove all reaching defs of regno except -- for this one. */ -- bitmap_and_compl (reaching_defs, reaching_defs, -- btr_defset[def->btr - first_btr]); -- bitmap_set_bit (reaching_defs, insn_uid); -- } -- -- if (user != NULL) -- { -- /* Find all the reaching defs for this use. */ -- auto_sbitmap reaching_defs_of_reg (max_uid); -- unsigned int uid = 0; -- sbitmap_iterator sbi; -- -- if (user->use) -- bitmap_and ( -- reaching_defs_of_reg, -- reaching_defs, -- btr_defset[REGNO (user->use) - first_btr]); -- else -- { -- int reg; -- -- bitmap_clear (reaching_defs_of_reg); -- for (reg = first_btr; reg <= last_btr; reg++) -- if (TEST_HARD_REG_BIT (all_btrs, reg) -- && refers_to_regno_p (reg, user->insn)) -- bitmap_or_and (reaching_defs_of_reg, -- reaching_defs_of_reg, -- reaching_defs, -- btr_defset[reg - first_btr]); -- } -- EXECUTE_IF_SET_IN_BITMAP (reaching_defs_of_reg, 0, uid, sbi) -- { -- btr_def *def = def_array[uid]; -- -- /* We now know that def reaches user. */ -- -- if (dump_file) -- fprintf (dump_file, -- "Def in insn %d reaches use in insn %d\n", -- uid, insn_uid); -- -- user->n_reaching_defs++; -- if (!user->use) -- def->has_ambiguous_use = 1; -- if (user->first_reaching_def != -1) -- { /* There is more than one reaching def. This is -- a rare case, so just give up on this def/use -- web when it occurs. */ -- def->has_ambiguous_use = 1; -- def_array[user->first_reaching_def] -- ->has_ambiguous_use = 1; -- if (dump_file) -- fprintf (dump_file, -- "(use %d has multiple reaching defs)\n", -- insn_uid); -- } -- else -- user->first_reaching_def = uid; -- if (user->other_use_this_block) -- def->other_btr_uses_after_use = 1; -- user->next = def->uses; -- def->uses = user; -- } -- } -- -- if (CALL_P (insn)) -- { -- int regno; -- -- for (regno = first_btr; regno <= last_btr; regno++) -- if (TEST_HARD_REG_BIT (all_btrs, regno) -- && TEST_HARD_REG_BIT (call_used_reg_set, regno)) -- bitmap_and_compl (reaching_defs, reaching_defs, -- btr_defset[regno - first_btr]); -- } -- } -- } -- } --} -- --static void --build_btr_def_use_webs (btr_heap_t *all_btr_defs) --{ -- const int max_uid = get_max_uid (); -- btr_def **def_array = XCNEWVEC (btr_def *, max_uid); -- btr_user **use_array = XCNEWVEC (btr_user *, max_uid); -- sbitmap *btr_defset = sbitmap_vector_alloc ( -- (last_btr - first_btr) + 1, max_uid); -- sbitmap *bb_gen = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), -- max_uid); -- HARD_REG_SET *btrs_written = XCNEWVEC (HARD_REG_SET, -- last_basic_block_for_fn (cfun)); -- sbitmap *bb_kill; -- sbitmap *bb_out; -- -- bitmap_vector_clear (btr_defset, (last_btr - first_btr) + 1); -- -- compute_defs_uses_and_gen (all_btr_defs, def_array, use_array, btr_defset, -- bb_gen, btrs_written); -- -- bb_kill = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), max_uid); -- compute_kill (bb_kill, btr_defset, btrs_written); -- free (btrs_written); -- -- bb_out = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), max_uid); -- compute_out (bb_out, bb_gen, bb_kill, max_uid); -- -- sbitmap_vector_free (bb_gen); -- sbitmap_vector_free (bb_kill); -- -- link_btr_uses (def_array, use_array, bb_out, btr_defset, max_uid); -- -- sbitmap_vector_free (bb_out); -- sbitmap_vector_free (btr_defset); -- free (use_array); -- free (def_array); --} -- --/* Return true if basic block BB contains the start or end of the -- live range of the definition DEF, AND there are other live -- ranges of the same target register that include BB. */ --static int --block_at_edge_of_live_range_p (int bb, btr_def *def) --{ -- if (def->other_btr_uses_before_def -- && BASIC_BLOCK_FOR_FN (cfun, bb) == def->bb) -- return 1; -- else if (def->other_btr_uses_after_use) -- { -- btr_user *user; -- for (user = def->uses; user != NULL; user = user->next) -- if (BASIC_BLOCK_FOR_FN (cfun, bb) == user->bb) -- return 1; -- } -- return 0; --} -- --/* We are removing the def/use web DEF. The target register -- used in this web is therefore no longer live in the live range -- of this web, so remove it from the live set of all basic blocks -- in the live range of the web. -- Blocks at the boundary of the live range may contain other live -- ranges for the same target register, so we have to be careful -- to remove the target register from the live set of these blocks -- only if they do not contain other live ranges for the same register. */ --static void --clear_btr_from_live_range (btr_def *def) --{ -- unsigned bb; -- bitmap_iterator bi; -- -- EXECUTE_IF_SET_IN_BITMAP (def->live_range, 0, bb, bi) -- { -- if ((!def->other_btr_uses_before_def -- && !def->other_btr_uses_after_use) -- || !block_at_edge_of_live_range_p (bb, def)) -- { -- CLEAR_HARD_REG_BIT (btrs_live[bb], def->btr); -- CLEAR_HARD_REG_BIT (btrs_live_at_end[bb], def->btr); -- if (dump_file) -- dump_btrs_live (bb); -- } -- } -- if (def->own_end) -- CLEAR_HARD_REG_BIT (btrs_live_at_end[def->bb->index], def->btr); --} -- -- --/* We are adding the def/use web DEF. Add the target register used -- in this web to the live set of all of the basic blocks that contain -- the live range of the web. -- If OWN_END is set, also show that the register is live from our -- definitions at the end of the basic block where it is defined. */ --static void --add_btr_to_live_range (btr_def *def, int own_end) --{ -- unsigned bb; -- bitmap_iterator bi; -- -- EXECUTE_IF_SET_IN_BITMAP (def->live_range, 0, bb, bi) -- { -- SET_HARD_REG_BIT (btrs_live[bb], def->btr); -- SET_HARD_REG_BIT (btrs_live_at_end[bb], def->btr); -- if (dump_file) -- dump_btrs_live (bb); -- } -- if (own_end) -- { -- SET_HARD_REG_BIT (btrs_live_at_end[def->bb->index], def->btr); -- def->own_end = 1; -- } --} -- --/* Update a live range to contain the basic block NEW_BLOCK, and all -- blocks on paths between the existing live range and NEW_BLOCK. -- HEAD is a block contained in the existing live range that dominates -- all other blocks in the existing live range. -- Also add to the set BTRS_LIVE_IN_RANGE all target registers that -- are live in the blocks that we add to the live range. -- If FULL_RANGE is set, include the full live range of NEW_BB; -- otherwise, if NEW_BB dominates HEAD_BB, only add registers that -- are life at the end of NEW_BB for NEW_BB itself. -- It is a precondition that either NEW_BLOCK dominates HEAD,or -- HEAD dom NEW_BLOCK. This is used to speed up the -- implementation of this function. */ --static void --augment_live_range (bitmap live_range, HARD_REG_SET *btrs_live_in_range, -- basic_block head_bb, basic_block new_bb, int full_range) --{ -- basic_block *worklist, *tos; -- -- tos = worklist = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun) + 1); -- -- if (dominated_by_p (CDI_DOMINATORS, new_bb, head_bb)) -- { -- if (new_bb == head_bb) -- { -- if (full_range) -- IOR_HARD_REG_SET (*btrs_live_in_range, btrs_live[new_bb->index]); -- free (tos); -- return; -- } -- *tos++ = new_bb; -- } -- else -- { -- edge e; -- edge_iterator ei; -- int new_block = new_bb->index; -- -- gcc_assert (dominated_by_p (CDI_DOMINATORS, head_bb, new_bb)); -- -- IOR_HARD_REG_SET (*btrs_live_in_range, btrs_live[head_bb->index]); -- bitmap_set_bit (live_range, new_block); -- /* A previous btr migration could have caused a register to be -- live just at the end of new_block which we need in full, so -- use trs_live_at_end even if full_range is set. */ -- IOR_HARD_REG_SET (*btrs_live_in_range, btrs_live_at_end[new_block]); -- if (full_range) -- IOR_HARD_REG_SET (*btrs_live_in_range, btrs_live[new_block]); -- if (dump_file) -- { -- fprintf (dump_file, -- "Adding end of block %d and rest of %d to live range\n", -- new_block, head_bb->index); -- fprintf (dump_file,"Now live btrs are "); -- dump_hard_reg_set (*btrs_live_in_range); -- fprintf (dump_file, "\n"); -- } -- FOR_EACH_EDGE (e, ei, head_bb->preds) -- *tos++ = e->src; -- } -- -- while (tos != worklist) -- { -- basic_block bb = *--tos; -- if (!bitmap_bit_p (live_range, bb->index)) -- { -- edge e; -- edge_iterator ei; -- -- bitmap_set_bit (live_range, bb->index); -- IOR_HARD_REG_SET (*btrs_live_in_range, -- btrs_live[bb->index]); -- /* A previous btr migration could have caused a register to be -- live just at the end of a block which we need in full. */ -- IOR_HARD_REG_SET (*btrs_live_in_range, -- btrs_live_at_end[bb->index]); -- if (dump_file) -- { -- fprintf (dump_file, -- "Adding block %d to live range\n", bb->index); -- fprintf (dump_file,"Now live btrs are "); -- dump_hard_reg_set (*btrs_live_in_range); -- fprintf (dump_file, "\n"); -- } -- -- FOR_EACH_EDGE (e, ei, bb->preds) -- { -- basic_block pred = e->src; -- if (!bitmap_bit_p (live_range, pred->index)) -- *tos++ = pred; -- } -- } -- } -- -- free (worklist); --} -- --/* Return the most desirable target register that is not in -- the set USED_BTRS. */ --static int --choose_btr (HARD_REG_SET used_btrs) --{ -- int i; -- -- if (!hard_reg_set_subset_p (all_btrs, used_btrs)) -- for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) -- { --#ifdef REG_ALLOC_ORDER -- int regno = reg_alloc_order[i]; --#else -- int regno = i; --#endif -- if (TEST_HARD_REG_BIT (all_btrs, regno) -- && !TEST_HARD_REG_BIT (used_btrs, regno)) -- return regno; -- } -- return -1; --} -- --/* Calculate the set of basic blocks that contain the live range of -- the def/use web DEF. -- Also calculate the set of target registers that are live at time -- in this live range, but ignore the live range represented by DEF -- when calculating this set. */ --static void --btr_def_live_range (btr_def *def, HARD_REG_SET *btrs_live_in_range) --{ -- if (!def->live_range) -- { -- btr_user *user; -- -- def->live_range = BITMAP_ALLOC (NULL); -- -- bitmap_set_bit (def->live_range, def->bb->index); -- COPY_HARD_REG_SET (*btrs_live_in_range, -- (flag_btr_bb_exclusive -- ? btrs_live : btrs_live_at_end)[def->bb->index]); -- -- for (user = def->uses; user != NULL; user = user->next) -- augment_live_range (def->live_range, btrs_live_in_range, -- def->bb, user->bb, -- (flag_btr_bb_exclusive -- || user->insn != BB_END (def->bb) -- || !JUMP_P (user->insn))); -- } -- else -- { -- /* def->live_range is accurate, but we need to recompute -- the set of target registers live over it, because migration -- of other PT instructions may have affected it. -- */ -- unsigned bb; -- unsigned def_bb = flag_btr_bb_exclusive ? -1 : def->bb->index; -- bitmap_iterator bi; -- -- CLEAR_HARD_REG_SET (*btrs_live_in_range); -- EXECUTE_IF_SET_IN_BITMAP (def->live_range, 0, bb, bi) -- { -- IOR_HARD_REG_SET (*btrs_live_in_range, -- (def_bb == bb -- ? btrs_live_at_end : btrs_live) [bb]); -- } -- } -- if (!def->other_btr_uses_before_def && -- !def->other_btr_uses_after_use) -- CLEAR_HARD_REG_BIT (*btrs_live_in_range, def->btr); --} -- --/* Merge into the def/use web DEF any other def/use webs in the same -- group that are dominated by DEF, provided that there is a target -- register available to allocate to the merged web. */ --static void --combine_btr_defs (btr_def *def, HARD_REG_SET *btrs_live_in_range) --{ -- btr_def *other_def; -- -- for (other_def = def->group->members; -- other_def != NULL; -- other_def = other_def->next_this_group) -- { -- if (other_def != def -- && other_def->uses != NULL -- && ! other_def->has_ambiguous_use -- && dominated_by_p (CDI_DOMINATORS, other_def->bb, def->bb)) -- { -- /* def->bb dominates the other def, so def and other_def could -- be combined. */ -- /* Merge their live ranges, and get the set of -- target registers live over the merged range. */ -- int btr; -- HARD_REG_SET combined_btrs_live; -- auto_bitmap combined_live_range; -- btr_user *user; -- -- if (other_def->live_range == NULL) -- { -- HARD_REG_SET dummy_btrs_live_in_range; -- btr_def_live_range (other_def, &dummy_btrs_live_in_range); -- } -- COPY_HARD_REG_SET (combined_btrs_live, *btrs_live_in_range); -- bitmap_copy (combined_live_range, def->live_range); -- -- for (user = other_def->uses; user != NULL; user = user->next) -- augment_live_range (combined_live_range, &combined_btrs_live, -- def->bb, user->bb, -- (flag_btr_bb_exclusive -- || user->insn != BB_END (def->bb) -- || !JUMP_P (user->insn))); -- -- btr = choose_btr (combined_btrs_live); -- if (btr != -1) -- { -- /* We can combine them. */ -- if (dump_file) -- fprintf (dump_file, -- "Combining def in insn %d with def in insn %d\n", -- INSN_UID (other_def->insn), INSN_UID (def->insn)); -- -- def->btr = btr; -- user = other_def->uses; -- while (user != NULL) -- { -- btr_user *next = user->next; -- -- user->next = def->uses; -- def->uses = user; -- user = next; -- } -- /* Combining def/use webs can make target registers live -- after uses where they previously were not. This means -- some REG_DEAD notes may no longer be correct. We could -- be more precise about this if we looked at the combined -- live range, but here I just delete any REG_DEAD notes -- in case they are no longer correct. */ -- for (user = def->uses; user != NULL; user = user->next) -- remove_note (user->insn, -- find_regno_note (user->insn, REG_DEAD, -- REGNO (user->use))); -- clear_btr_from_live_range (other_def); -- other_def->uses = NULL; -- bitmap_copy (def->live_range, combined_live_range); -- if (other_def->btr == btr && other_def->other_btr_uses_after_use) -- def->other_btr_uses_after_use = 1; -- COPY_HARD_REG_SET (*btrs_live_in_range, combined_btrs_live); -- -- /* Delete the old target register initialization. */ -- delete_insn (other_def->insn); -- -- } -- } -- } --} -- --/* Move the definition DEF from its current position to basic -- block NEW_DEF_BB, and modify it to use branch target register BTR. -- Delete the old defining insn, and insert a new one in NEW_DEF_BB. -- Update all reaching uses of DEF in the RTL to use BTR. -- If this new position means that other defs in the -- same group can be combined with DEF then combine them. */ --static void --move_btr_def (basic_block new_def_bb, int btr, btr_def *def, bitmap live_range, -- HARD_REG_SET *btrs_live_in_range) --{ -- /* We can move the instruction. -- Set a target register in block NEW_DEF_BB to the value -- needed for this target register definition. -- Replace all uses of the old target register definition by -- uses of the new definition. Delete the old definition. */ -- basic_block b = new_def_bb; -- rtx_insn *insp = BB_HEAD (b); -- rtx_insn *old_insn = def->insn; -- rtx src; -- rtx btr_rtx; -- rtx_insn *new_insn; -- machine_mode btr_mode; -- btr_user *user; -- rtx set; -- -- if (dump_file) -- fprintf(dump_file, "migrating to basic block %d, using reg %d\n", -- new_def_bb->index, btr); -- -- clear_btr_from_live_range (def); -- def->btr = btr; -- def->bb = new_def_bb; -- def->luid = 0; -- def->cost = basic_block_freq (new_def_bb); -- bitmap_copy (def->live_range, live_range); -- combine_btr_defs (def, btrs_live_in_range); -- btr = def->btr; -- def->other_btr_uses_before_def -- = TEST_HARD_REG_BIT (btrs_live[b->index], btr) ? 1 : 0; -- add_btr_to_live_range (def, 1); -- if (LABEL_P (insp)) -- insp = NEXT_INSN (insp); -- /* N.B.: insp is expected to be NOTE_INSN_BASIC_BLOCK now. Some -- optimizations can result in insp being both first and last insn of -- its basic block. */ -- /* ?? some assertions to check that insp is sensible? */ -- -- if (def->other_btr_uses_before_def) -- { -- for (insp = BB_END (b); ! INSN_P (insp); insp = PREV_INSN (insp)) -- gcc_assert (insp != BB_HEAD (b)); -- -- if (JUMP_P (insp) || can_throw_internal (insp)) -- insp = PREV_INSN (insp); -- } -- -- set = single_set (old_insn); -- src = SET_SRC (set); -- btr_mode = GET_MODE (SET_DEST (set)); -- btr_rtx = gen_rtx_REG (btr_mode, btr); -- -- new_insn = gen_move_insn (btr_rtx, src); -- -- /* Insert target register initialization at head of basic block. */ -- def->insn = emit_insn_after (new_insn, insp); -- -- df_set_regs_ever_live (btr, true); -- -- if (dump_file) -- fprintf (dump_file, "New pt is insn %d, inserted after insn %d\n", -- INSN_UID (def->insn), INSN_UID (insp)); -- -- /* Delete the old target register initialization. */ -- delete_insn (old_insn); -- -- /* Replace each use of the old target register by a use of the new target -- register. */ -- for (user = def->uses; user != NULL; user = user->next) -- { -- /* Some extra work here to ensure consistent modes, because -- it seems that a target register REG rtx can be given a different -- mode depending on the context (surely that should not be -- the case?). */ -- rtx replacement_rtx; -- if (GET_MODE (user->use) == GET_MODE (btr_rtx) -- || GET_MODE (user->use) == VOIDmode) -- replacement_rtx = btr_rtx; -- else -- replacement_rtx = gen_rtx_REG (GET_MODE (user->use), btr); -- validate_replace_rtx (user->use, replacement_rtx, user->insn); -- user->use = replacement_rtx; -- } --} -- --/* We anticipate intra-block scheduling to be done. See if INSN could move -- up within BB by N_INSNS. */ --static int --can_move_up (const_basic_block bb, const rtx_insn *insn, int n_insns) --{ -- while (insn != BB_HEAD (bb) && n_insns > 0) -- { -- insn = PREV_INSN (insn); -- /* ??? What if we have an anti-dependency that actually prevents the -- scheduler from doing the move? We'd like to re-allocate the register, -- but not necessarily put the load into another basic block. */ -- if (INSN_P (insn)) -- n_insns--; -- } -- return n_insns <= 0; --} -- --/* Attempt to migrate the target register definition DEF to an -- earlier point in the flowgraph. -- -- It is a precondition of this function that DEF is migratable: -- i.e. it has a constant source, and all uses are unambiguous. -- -- Only migrations that reduce the cost of DEF will be made. -- MIN_COST is the lower bound on the cost of the DEF after migration. -- If we migrate DEF so that its cost falls below MIN_COST, -- then we do not attempt to migrate further. The idea is that -- we migrate definitions in a priority order based on their cost, -- when the cost of this definition falls below MIN_COST, then -- there is another definition with cost == MIN_COST which now -- has a higher priority than this definition. -- -- Return nonzero if there may be benefit from attempting to -- migrate this DEF further (i.e. we have reduced the cost below -- MIN_COST, but we may be able to reduce it further). -- Return zero if no further migration is possible. */ --static int --migrate_btr_def (btr_def *def, int min_cost) --{ -- HARD_REG_SET btrs_live_in_range; -- int btr_used_near_def = 0; -- int def_basic_block_freq; -- basic_block attempt; -- int give_up = 0; -- int def_moved = 0; -- btr_user *user; -- int def_latency; -- -- if (dump_file) -- fprintf (dump_file, -- "Attempting to migrate pt from insn %d (cost = %d, min_cost = %d) ... ", -- INSN_UID (def->insn), def->cost, min_cost); -- -- if (!def->group || def->has_ambiguous_use) -- /* These defs are not migratable. */ -- { -- if (dump_file) -- fprintf (dump_file, "it's not migratable\n"); -- return 0; -- } -- -- if (!def->uses) -- /* We have combined this def with another in the same group, so -- no need to consider it further. -- */ -- { -- if (dump_file) -- fprintf (dump_file, "it's already combined with another pt\n"); -- return 0; -- } -- -- btr_def_live_range (def, &btrs_live_in_range); -- auto_bitmap live_range; -- bitmap_copy (live_range, def->live_range); -- --#ifdef INSN_SCHEDULING -- def_latency = insn_default_latency (def->insn) * issue_rate; --#else -- def_latency = issue_rate; --#endif -- -- for (user = def->uses; user != NULL; user = user->next) -- { -- if (user->bb == def->bb -- && user->luid > def->luid -- && (def->luid + def_latency) > user->luid -- && ! can_move_up (def->bb, def->insn, -- (def->luid + def_latency) - user->luid)) -- { -- btr_used_near_def = 1; -- break; -- } -- } -- -- def_basic_block_freq = basic_block_freq (def->bb); -- -- for (attempt = get_immediate_dominator (CDI_DOMINATORS, def->bb); -- !give_up && attempt && attempt != ENTRY_BLOCK_PTR_FOR_FN (cfun) -- && def->cost >= min_cost; -- attempt = get_immediate_dominator (CDI_DOMINATORS, attempt)) -- { -- /* Try to move the instruction that sets the target register into -- basic block ATTEMPT. */ -- int try_freq = basic_block_freq (attempt); -- edge_iterator ei; -- edge e; -- -- /* If ATTEMPT has abnormal edges, skip it. */ -- FOR_EACH_EDGE (e, ei, attempt->succs) -- if (e->flags & EDGE_COMPLEX) -- break; -- if (e) -- continue; -- -- if (dump_file) -- fprintf (dump_file, "trying block %d ...", attempt->index); -- -- if (try_freq < def_basic_block_freq -- || (try_freq == def_basic_block_freq && btr_used_near_def)) -- { -- int btr; -- augment_live_range (live_range, &btrs_live_in_range, def->bb, attempt, -- flag_btr_bb_exclusive); -- if (dump_file) -- { -- fprintf (dump_file, "Now btrs live in range are: "); -- dump_hard_reg_set (btrs_live_in_range); -- fprintf (dump_file, "\n"); -- } -- btr = choose_btr (btrs_live_in_range); -- if (btr != -1) -- { -- move_btr_def (attempt, btr, def, live_range, &btrs_live_in_range); -- bitmap_copy (live_range, def->live_range); -- btr_used_near_def = 0; -- def_moved = 1; -- def_basic_block_freq = basic_block_freq (def->bb); -- } -- else -- { -- /* There are no free target registers available to move -- this far forward, so give up */ -- give_up = 1; -- if (dump_file) -- fprintf (dump_file, -- "giving up because there are no free target registers\n"); -- } -- -- } -- } -- if (!def_moved) -- { -- give_up = 1; -- if (dump_file) -- fprintf (dump_file, "failed to move\n"); -- } -- -- return !give_up; --} -- --/* Attempt to move instructions that set target registers earlier -- in the flowgraph, away from their corresponding uses. */ --static void --migrate_btr_defs (enum reg_class btr_class, int allow_callee_save) --{ -- btr_heap_t all_btr_defs (LONG_MIN); -- int reg; -- -- gcc_obstack_init (&migrate_btrl_obstack); -- if (dump_file) -- { -- int i; -- -- for (i = NUM_FIXED_BLOCKS; i < last_basic_block_for_fn (cfun); i++) -- { -- basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i); -- fprintf (dump_file, "Basic block %d: count = ", i); -- bb->count.dump (dump_file); -- fprintf (dump_file, " loop-depth = %d idom = %d\n", -- bb_loop_depth (bb), -- get_immediate_dominator (CDI_DOMINATORS, bb)->index); -- } -- } -- -- CLEAR_HARD_REG_SET (all_btrs); -- for (first_btr = -1, reg = 0; reg < FIRST_PSEUDO_REGISTER; reg++) -- if (TEST_HARD_REG_BIT (reg_class_contents[(int) btr_class], reg) -- && (allow_callee_save || call_used_regs[reg] -- || df_regs_ever_live_p (reg))) -- { -- SET_HARD_REG_BIT (all_btrs, reg); -- last_btr = reg; -- if (first_btr < 0) -- first_btr = reg; -- } -- -- btrs_live = XCNEWVEC (HARD_REG_SET, last_basic_block_for_fn (cfun)); -- btrs_live_at_end = XCNEWVEC (HARD_REG_SET, last_basic_block_for_fn (cfun)); -- -- build_btr_def_use_webs (&all_btr_defs); -- -- while (!all_btr_defs.empty ()) -- { -- int min_cost = -all_btr_defs.min_key (); -- btr_def *def = all_btr_defs.extract_min (); -- if (migrate_btr_def (def, min_cost)) -- { -- all_btr_defs.insert (-def->cost, def); -- if (dump_file) -- { -- fprintf (dump_file, -- "Putting insn %d back on queue with priority %d\n", -- INSN_UID (def->insn), def->cost); -- } -- } -- else -- BITMAP_FREE (def->live_range); -- } -- -- free (btrs_live); -- free (btrs_live_at_end); -- obstack_free (&migrate_btrl_obstack, NULL); --} -- --static void --branch_target_load_optimize (bool after_prologue_epilogue_gen) --{ -- enum reg_class klass -- = (enum reg_class) targetm.branch_target_register_class (); -- if (klass != NO_REGS) -- { -- /* Initialize issue_rate. */ -- if (targetm.sched.issue_rate) -- issue_rate = targetm.sched.issue_rate (); -- else -- issue_rate = 1; -- -- if (!after_prologue_epilogue_gen) -- { -- /* Build the CFG for migrate_btr_defs. */ --#if 1 -- /* This may or may not be needed, depending on where we -- run this phase. */ -- cleanup_cfg (optimize ? CLEANUP_EXPENSIVE : 0); --#endif -- } -- df_analyze (); -- -- -- /* Dominator info is also needed for migrate_btr_def. */ -- calculate_dominance_info (CDI_DOMINATORS); -- migrate_btr_defs (klass, -- (targetm.branch_target_register_callee_saved -- (after_prologue_epilogue_gen))); -- -- free_dominance_info (CDI_DOMINATORS); -- } --} -- --namespace { -- --const pass_data pass_data_branch_target_load_optimize1 = --{ -- RTL_PASS, /* type */ -- "btl1", /* name */ -- OPTGROUP_NONE, /* optinfo_flags */ -- TV_NONE, /* tv_id */ -- 0, /* properties_required */ -- 0, /* properties_provided */ -- 0, /* properties_destroyed */ -- 0, /* todo_flags_start */ -- 0, /* todo_flags_finish */ --}; -- --class pass_branch_target_load_optimize1 : public rtl_opt_pass --{ --public: -- pass_branch_target_load_optimize1 (gcc::context *ctxt) -- : rtl_opt_pass (pass_data_branch_target_load_optimize1, ctxt) -- {} -- -- /* opt_pass methods: */ -- virtual bool gate (function *) { return flag_branch_target_load_optimize; } -- virtual unsigned int execute (function *) -- { -- branch_target_load_optimize (epilogue_completed); -- return 0; -- } -- --}; // class pass_branch_target_load_optimize1 -- --} // anon namespace -- --rtl_opt_pass * --make_pass_branch_target_load_optimize1 (gcc::context *ctxt) --{ -- return new pass_branch_target_load_optimize1 (ctxt); --} -- -- --namespace { -- --const pass_data pass_data_branch_target_load_optimize2 = --{ -- RTL_PASS, /* type */ -- "btl2", /* name */ -- OPTGROUP_NONE, /* optinfo_flags */ -- TV_NONE, /* tv_id */ -- 0, /* properties_required */ -- 0, /* properties_provided */ -- 0, /* properties_destroyed */ -- 0, /* todo_flags_start */ -- 0, /* todo_flags_finish */ --}; -- --class pass_branch_target_load_optimize2 : public rtl_opt_pass --{ --public: -- pass_branch_target_load_optimize2 (gcc::context *ctxt) -- : rtl_opt_pass (pass_data_branch_target_load_optimize2, ctxt) -- {} -- -- /* opt_pass methods: */ -- virtual bool gate (function *) -- { -- return (optimize > 0 && flag_branch_target_load_optimize2); -- } -- -- virtual unsigned int execute (function *); -- --}; // class pass_branch_target_load_optimize2 -- --unsigned int --pass_branch_target_load_optimize2::execute (function *) --{ -- static int warned = 0; -- -- /* Leave this a warning for now so that it is possible to experiment -- with running this pass twice. In 3.6, we should either make this -- an error, or use separate dump files. */ -- if (flag_branch_target_load_optimize -- && flag_branch_target_load_optimize2 -- && !warned) -- { -- warning (0, "branch target register load optimization is not intended " -- "to be run twice"); -- -- warned = 1; -- } -- -- branch_target_load_optimize (epilogue_completed); -- return 0; --} -- --} // anon namespace -- --rtl_opt_pass * --make_pass_branch_target_load_optimize2 (gcc::context *ctxt) --{ -- return new pass_branch_target_load_optimize2 (ctxt); --} -diff --git a/gcc/builtins.c b/gcc/builtins.c -index 910e614a4..945205c1d 100644 ---- a/gcc/builtins.c -+++ b/gcc/builtins.c -@@ -1431,7 +1431,7 @@ expand_builtin_prefetch (tree exp) - } - - /* Get a MEM rtx for expression EXP which is the address of an operand -- to be used in a string instruction (cmpstrsi, movmemsi, ..). LEN is -+ to be used in a string instruction (cmpstrsi, cpymemsi, ..). LEN is - the maximum length of the block of memory that might be accessed or - NULL if unknown. */ - -@@ -7224,7 +7224,6 @@ expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode, - int ignore) - { - tree fndecl = get_callee_fndecl (exp); -- enum built_in_function fcode = DECL_FUNCTION_CODE (fndecl); - machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp)); - int flags; - -@@ -7236,6 +7235,7 @@ expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode, - redundant checks and be sure, that possible overflow will be detected - by ASan. */ - -+ enum built_in_function fcode = DECL_FUNCTION_CODE (fndecl); - if ((flag_sanitize & SANITIZE_ADDRESS) && asan_intercepted_p (fcode)) - return expand_call (exp, target, ignore); - -diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c -index d220e8135..bf3db074a 100644 ---- a/gcc/c-family/c-common.c -+++ b/gcc/c-family/c-common.c -@@ -5835,15 +5835,27 @@ builtin_function_validate_nargs (location_t loc, tree fndecl, int nargs, - /* Verifies the NARGS arguments ARGS to the builtin function FNDECL. - Returns false if there was an error, otherwise true. LOC is the - location of the function; ARG_LOC is a vector of locations of the -- arguments. */ -+ arguments. If FNDECL is the result of resolving an overloaded -+ target built-in, ORIG_FNDECL is the original function decl, -+ otherwise it is null. */ - - bool - check_builtin_function_arguments (location_t loc, vec arg_loc, -- tree fndecl, int nargs, tree *args) -+ tree fndecl, tree orig_fndecl, -+ int nargs, tree *args) - { -- if (!fndecl_built_in_p (fndecl, BUILT_IN_NORMAL)) -+ if (!fndecl_built_in_p (fndecl)) - return true; - -+ if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD) -+ return (!targetm.check_builtin_call -+ || targetm.check_builtin_call (loc, arg_loc, fndecl, -+ orig_fndecl, nargs, args)); -+ -+ if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_FRONTEND) -+ return true; -+ -+ gcc_assert (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL); - switch (DECL_FUNCTION_CODE (fndecl)) - { - case BUILT_IN_ALLOCA_WITH_ALIGN_AND_MAX: -@@ -7317,8 +7329,6 @@ tree - resolve_overloaded_builtin (location_t loc, tree function, - vec *params) - { -- enum built_in_function orig_code = DECL_FUNCTION_CODE (function); -- - /* Is function one of the _FETCH_OP_ or _OP_FETCH_ built-ins? - Those are not valid to call with a pointer to _Bool (or C++ bool) - and so must be rejected. */ -@@ -7340,6 +7350,7 @@ resolve_overloaded_builtin (location_t loc, tree function, - } - - /* Handle BUILT_IN_NORMAL here. */ -+ enum built_in_function orig_code = DECL_FUNCTION_CODE (function); - switch (orig_code) - { - case BUILT_IN_SPECULATION_SAFE_VALUE_N: -diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h -index 683764267..46b8d265a 100644 ---- a/gcc/c-family/c-common.h -+++ b/gcc/c-family/c-common.h -@@ -818,7 +818,7 @@ extern void check_function_arguments_recurse (void (*) - void *, tree, - unsigned HOST_WIDE_INT); - extern bool check_builtin_function_arguments (location_t, vec, -- tree, int, tree *); -+ tree, tree, int, tree *); - extern void check_function_format (const_tree, tree, int, tree *, - vec *); - extern bool attribute_fallthrough_p (tree); -@@ -995,7 +995,8 @@ extern bool c_switch_covers_all_cases_p (splay_tree, tree); - extern tree build_function_call (location_t, tree, tree); - - extern tree build_function_call_vec (location_t, vec, tree, -- vec *, vec *); -+ vec *, vec *, -+ tree = NULL_TREE); - - extern tree resolve_overloaded_builtin (location_t, tree, vec *); - -diff --git a/gcc/c-family/c-pretty-print.c b/gcc/c-family/c-pretty-print.c -index 3e25624d3..1e14658c0 100644 ---- a/gcc/c-family/c-pretty-print.c -+++ b/gcc/c-family/c-pretty-print.c -@@ -470,6 +470,16 @@ pp_c_specifier_qualifier_list (c_pretty_printer *pp, tree t) - ? "_Complex" : "__complex__")); - else if (code == VECTOR_TYPE) - { -+ /* The syntax we print for vector types isn't real C or C++ syntax, -+ so it's better to print the type name if we have one. */ -+ tree name = TYPE_NAME (t); -+ if (!(pp->flags & pp_c_flag_gnu_v3) -+ && name -+ && TREE_CODE (name) == TYPE_DECL) -+ { -+ pp->id_expression (name); -+ break; -+ } - pp_c_ws_string (pp, "__vector"); - pp_c_left_paren (pp); - pp_wide_integer (pp, TYPE_VECTOR_SUBPARTS (t)); -diff --git a/gcc/c/c-decl.c b/gcc/c/c-decl.c -index 859a62412..288dbe9d9 100644 ---- a/gcc/c/c-decl.c -+++ b/gcc/c/c-decl.c -@@ -604,7 +604,7 @@ static tree grokparms (struct c_arg_info *, bool); - static void layout_array_type (tree); - static void warn_defaults_to (location_t, int, const char *, ...) - ATTRIBUTE_GCC_DIAG(3,4); --static const char *header_for_builtin_fn (enum built_in_function); -+static const char *header_for_builtin_fn (tree); - - /* T is a statement. Add it to the statement-tree. This is the - C/ObjC version--C++ has a slightly different version of this -@@ -1951,7 +1951,8 @@ diagnose_mismatched_decls (tree newdecl, tree olddecl, - if (!comptypes (oldtype, newtype)) - { - if (TREE_CODE (olddecl) == FUNCTION_DECL -- && fndecl_built_in_p (olddecl) && !C_DECL_DECLARED_BUILTIN (olddecl)) -+ && fndecl_built_in_p (olddecl, BUILT_IN_NORMAL) -+ && !C_DECL_DECLARED_BUILTIN (olddecl)) - { - /* Accept "harmless" mismatches in function types such - as missing qualifiers or pointer vs same size integer -@@ -1973,8 +1974,7 @@ diagnose_mismatched_decls (tree newdecl, tree olddecl, - /* If types don't match for a built-in, throw away the - built-in. No point in calling locate_old_decl here, it - won't print anything. */ -- const char *header -- = header_for_builtin_fn (DECL_FUNCTION_CODE (olddecl)); -+ const char *header = header_for_builtin_fn (olddecl); - location_t loc = DECL_SOURCE_LOCATION (newdecl); - if (warning_at (loc, OPT_Wbuiltin_declaration_mismatch, - "conflicting types for built-in function %q+D; " -@@ -2637,7 +2637,8 @@ merge_decls (tree newdecl, tree olddecl, tree newtype, tree oldtype) - |= DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (olddecl); - TREE_THIS_VOLATILE (newdecl) |= TREE_THIS_VOLATILE (olddecl); - DECL_IS_MALLOC (newdecl) |= DECL_IS_MALLOC (olddecl); -- DECL_IS_OPERATOR_NEW (newdecl) |= DECL_IS_OPERATOR_NEW (olddecl); -+ if (DECL_IS_OPERATOR_NEW_P (olddecl)) -+ DECL_SET_IS_OPERATOR_NEW (newdecl, true); - TREE_READONLY (newdecl) |= TREE_READONLY (olddecl); - DECL_PURE_P (newdecl) |= DECL_PURE_P (olddecl); - DECL_IS_NOVOPS (newdecl) |= DECL_IS_NOVOPS (olddecl); -@@ -2731,8 +2732,7 @@ merge_decls (tree newdecl, tree olddecl, tree newtype, tree oldtype) - { - /* If redeclaring a builtin function, it stays built in. - But it gets tagged as having been declared. */ -- DECL_BUILT_IN_CLASS (newdecl) = DECL_BUILT_IN_CLASS (olddecl); -- DECL_FUNCTION_CODE (newdecl) = DECL_FUNCTION_CODE (olddecl); -+ copy_decl_built_in_function (newdecl, olddecl); - C_DECL_DECLARED_BUILTIN (newdecl) = 1; - if (new_is_prototype) - { -@@ -3334,13 +3334,17 @@ implicit_decl_warning (location_t loc, tree id, tree olddecl) - hint.suppress (); - } - --/* This function represents mapping of a function code FCODE -- to its respective header. */ -+/* Return the name of the header file that declares built-in function -+ FNDECL, or null if either we don't know or don't expect to see an -+ explicit declaration. */ - - static const char * --header_for_builtin_fn (enum built_in_function fcode) -+header_for_builtin_fn (tree fndecl) - { -- switch (fcode) -+ if (DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL) -+ return NULL; -+ -+ switch (DECL_FUNCTION_CODE (fndecl)) - { - CASE_FLT_FN (BUILT_IN_ACOS): - CASE_FLT_FN (BUILT_IN_ACOSH): -@@ -3590,8 +3594,7 @@ implicitly_declare (location_t loc, tree functionid) - "declaration of built-in " - "function %qD", decl); - /* See if we can hint which header to include. */ -- const char *header -- = header_for_builtin_fn (DECL_FUNCTION_CODE (decl)); -+ const char *header = header_for_builtin_fn (decl); - if (header != NULL && warned) - { - rich_location richloc (line_table, loc); -@@ -4471,6 +4474,16 @@ c_builtin_function_ext_scope (tree decl) - - return decl; - } -+ -+/* Implement LANG_HOOKS_SIMULATE_BUILTIN_FUNCTION_DECL. */ -+ -+tree -+c_simulate_builtin_function_decl (tree decl) -+{ -+ tree type = TREE_TYPE (decl); -+ C_DECL_BUILTIN_PROTOTYPE (decl) = prototype_p (type); -+ return pushdecl (decl); -+} - - /* Called when a declaration is seen that contains no names to declare. - If its type is a reference to a structure, union or enum inherited -@@ -8746,6 +8759,8 @@ finish_enum (tree enumtype, tree values, tree attributes) - && !in_sizeof && !in_typeof && !in_alignof) - struct_parse_info->struct_types.safe_push (enumtype); - -+ C_TYPE_BEING_DEFINED (enumtype) = 0; -+ - return enumtype; - } - -@@ -8851,6 +8866,36 @@ build_enumerator (location_t decl_loc, location_t loc, - return tree_cons (decl, value, NULL_TREE); - } - -+/* Implement LANG_HOOKS_SIMULATE_ENUM_DECL. */ -+ -+tree -+c_simulate_enum_decl (location_t loc, const char *name, -+ vec values) -+{ -+ location_t saved_loc = input_location; -+ input_location = loc; -+ -+ struct c_enum_contents the_enum; -+ tree enumtype = start_enum (loc, &the_enum, get_identifier (name)); -+ -+ tree value_chain = NULL_TREE; -+ string_int_pair *value; -+ unsigned int i; -+ FOR_EACH_VEC_ELT (values, i, value) -+ { -+ tree decl = build_enumerator (loc, loc, &the_enum, -+ get_identifier (value->first), -+ build_int_cst (integer_type_node, -+ value->second)); -+ TREE_CHAIN (decl) = value_chain; -+ value_chain = decl; -+ } -+ -+ finish_enum (enumtype, nreverse (value_chain), NULL_TREE); -+ -+ input_location = saved_loc; -+ return enumtype; -+} - - /* Create the FUNCTION_DECL for a function definition. - DECLSPECS, DECLARATOR and ATTRIBUTES are the parts of -diff --git a/gcc/c/c-objc-common.h b/gcc/c/c-objc-common.h -index f5e820420..c8739e0b8 100644 ---- a/gcc/c/c-objc-common.h -+++ b/gcc/c/c-objc-common.h -@@ -60,6 +60,9 @@ along with GCC; see the file COPYING3. If not see - #define LANG_HOOKS_BUILTIN_FUNCTION c_builtin_function - #undef LANG_HOOKS_BUILTIN_FUNCTION_EXT_SCOPE - #define LANG_HOOKS_BUILTIN_FUNCTION_EXT_SCOPE c_builtin_function_ext_scope -+#undef LANG_HOOKS_SIMULATE_BUILTIN_FUNCTION_DECL -+#define LANG_HOOKS_SIMULATE_BUILTIN_FUNCTION_DECL \ -+ c_simulate_builtin_function_decl - #undef LANG_HOOKS_EMITS_BEGIN_STMT - #define LANG_HOOKS_EMITS_BEGIN_STMT true - -@@ -72,6 +75,8 @@ along with GCC; see the file COPYING3. If not see - #undef LANG_HOOKS_TREE_DUMP_DUMP_TREE_FN - #define LANG_HOOKS_TREE_DUMP_DUMP_TREE_FN c_dump_tree - -+#undef LANG_HOOKS_SIMULATE_ENUM_DECL -+#define LANG_HOOKS_SIMULATE_ENUM_DECL c_simulate_enum_decl - #undef LANG_HOOKS_TYPE_FOR_MODE - #define LANG_HOOKS_TYPE_FOR_MODE c_common_type_for_mode - #undef LANG_HOOKS_TYPE_FOR_SIZE -diff --git a/gcc/c/c-tree.h b/gcc/c/c-tree.h -index 7e35ab1f0..19925e793 100644 ---- a/gcc/c/c-tree.h -+++ b/gcc/c/c-tree.h -@@ -561,6 +561,8 @@ extern tree finish_enum (tree, tree, tree); - extern void finish_function (void); - extern tree finish_struct (location_t, tree, tree, tree, - struct c_struct_parse_info *); -+extern tree c_simulate_enum_decl (location_t, const char *, -+ vec); - extern struct c_arg_info *build_arg_info (void); - extern struct c_arg_info *get_parm_info (bool, tree); - extern tree grokfield (location_t, struct c_declarator *, -@@ -577,6 +579,7 @@ extern struct c_declarator *set_array_declarator_inner (struct c_declarator *, - struct c_declarator *); - extern tree c_builtin_function (tree); - extern tree c_builtin_function_ext_scope (tree); -+extern tree c_simulate_builtin_function_decl (tree); - extern void shadow_tag (const struct c_declspecs *); - extern void shadow_tag_warned (const struct c_declspecs *, int); - extern tree start_enum (location_t, struct c_enum_contents *, tree); -diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c -index cb999cbf8..87f4178ec 100644 ---- a/gcc/c/c-typeck.c -+++ b/gcc/c/c-typeck.c -@@ -3002,6 +3002,8 @@ inform_declaration (tree decl) - } - - /* Build a function call to function FUNCTION with parameters PARAMS. -+ If FUNCTION is the result of resolving an overloaded target built-in, -+ ORIG_FUNDECL is the original function decl, otherwise it is null. - ORIGTYPES, if not NULL, is a vector of types; each element is - either NULL or the original type of the corresponding element in - PARAMS. The original type may differ from TREE_TYPE of the -@@ -3012,7 +3014,7 @@ inform_declaration (tree decl) - tree - build_function_call_vec (location_t loc, vec arg_loc, - tree function, vec *params, -- vec *origtypes) -+ vec *origtypes, tree orig_fundecl) - { - tree fntype, fundecl = NULL_TREE; - tree name = NULL_TREE, result; -@@ -3032,6 +3034,8 @@ build_function_call_vec (location_t loc, vec arg_loc, - if (flag_tm) - tm_malloc_replacement (function); - fundecl = function; -+ if (!orig_fundecl) -+ orig_fundecl = fundecl; - /* Atomic functions have type checking/casting already done. They are - often rewritten and don't match the original parameter list. */ - if (name && !strncmp (IDENTIFIER_POINTER (name), "__atomic_", 9)) -@@ -3109,9 +3113,10 @@ build_function_call_vec (location_t loc, vec arg_loc, - argarray = vec_safe_address (params); - - /* Check that arguments to builtin functions match the expectations. */ -- if (fundecl && fndecl_built_in_p (fundecl, BUILT_IN_NORMAL) -- && !check_builtin_function_arguments (loc, arg_loc, fundecl, nargs, -- argarray)) -+ if (fundecl -+ && fndecl_built_in_p (fundecl) -+ && !check_builtin_function_arguments (loc, arg_loc, fundecl, -+ orig_fundecl, nargs, argarray)) - return error_mark_node; - - /* Check that the arguments to the function are valid. */ -diff --git a/gcc/caller-save.c b/gcc/caller-save.c -index 9ff470c33..0d66e0ce5 100644 ---- a/gcc/caller-save.c -+++ b/gcc/caller-save.c -@@ -37,6 +37,7 @@ along with GCC; see the file COPYING3. If not see - #include "dumpfile.h" - #include "rtl-iter.h" - #include "target.h" -+#include "function-abi.h" - - #define MOVE_MAX_WORDS (MOVE_MAX / UNITS_PER_WORD) - -@@ -192,29 +193,17 @@ init_caller_save (void) - - caller_save_initialized_p = true; - -- CLEAR_HARD_REG_SET (no_caller_save_reg_set); - /* First find all the registers that we need to deal with and all - the modes that they can have. If we can't find a mode to use, - we can't have the register live over calls. */ - - for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) -- { -- if (call_used_regs[i] -- && !TEST_HARD_REG_BIT (call_fixed_reg_set, i)) -- { -- for (j = 1; j <= MOVE_MAX_WORDS; j++) -- { -- regno_save_mode[i][j] = HARD_REGNO_CALLER_SAVE_MODE (i, j, -- VOIDmode); -- if (regno_save_mode[i][j] == VOIDmode && j == 1) -- { -- SET_HARD_REG_BIT (call_fixed_reg_set, i); -- } -- } -- } -- else -- regno_save_mode[i][1] = VOIDmode; -- } -+ for (j = 1; j <= MOVE_MAX_WORDS; j++) -+ { -+ regno_save_mode[i][j] = HARD_REGNO_CALLER_SAVE_MODE (i, j, VOIDmode); -+ if (regno_save_mode[i][j] == VOIDmode && j == 1) -+ CLEAR_HARD_REG_BIT (savable_regs, i); -+ } - - /* The following code tries to approximate the conditions under which - we can easily save and restore a register without scratch registers or -@@ -275,11 +264,7 @@ init_caller_save (void) - { - regno_save_mode[i][j] = VOIDmode; - if (j == 1) -- { -- SET_HARD_REG_BIT (call_fixed_reg_set, i); -- if (call_used_regs[i]) -- SET_HARD_REG_BIT (no_caller_save_reg_set, i); -- } -+ CLEAR_HARD_REG_BIT (savable_regs, i); - } - } - -@@ -442,7 +427,9 @@ setup_save_areas (void) - freq = REG_FREQ_FROM_BB (BLOCK_FOR_INSN (insn)); - REG_SET_TO_HARD_REG_SET (hard_regs_to_save, - &chain->live_throughout); -- get_call_reg_set_usage (insn, &used_regs, call_used_reg_set); -+ used_regs = insn_callee_abi (insn).full_reg_clobbers (); -+ /* ??? This preserves traditional behavior; it might not be needed. */ -+ used_regs |= fixed_reg_set; - - /* Record all registers set in this call insn. These don't - need to be saved. N.B. the call insn might set a subreg -@@ -450,14 +437,13 @@ setup_save_areas (void) - live during the call, but the subreg that is set - isn't. */ - CLEAR_HARD_REG_SET (this_insn_sets); -- note_stores (PATTERN (insn), mark_set_regs, &this_insn_sets); -+ note_stores (insn, mark_set_regs, &this_insn_sets); - /* Sibcalls are considered to set the return value. */ - if (SIBLING_CALL_P (insn) && crtl->return_rtx) - mark_set_regs (crtl->return_rtx, NULL_RTX, &this_insn_sets); - -- AND_COMPL_HARD_REG_SET (used_regs, call_fixed_reg_set); -- AND_COMPL_HARD_REG_SET (used_regs, this_insn_sets); -- AND_HARD_REG_SET (hard_regs_to_save, used_regs); -+ used_regs &= ~(fixed_reg_set | this_insn_sets); -+ hard_regs_to_save &= used_regs & savable_regs; - for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) - if (TEST_HARD_REG_BIT (hard_regs_to_save, regno)) - { -@@ -526,7 +512,10 @@ setup_save_areas (void) - - REG_SET_TO_HARD_REG_SET (hard_regs_to_save, - &chain->live_throughout); -- get_call_reg_set_usage (insn, &used_regs, call_used_reg_set); -+ used_regs = insn_callee_abi (insn).full_reg_clobbers (); -+ /* ??? This preserves traditional behavior; it might not -+ be needed. */ -+ used_regs |= fixed_reg_set; - - /* Record all registers set in this call insn. These don't - need to be saved. N.B. the call insn might set a subreg -@@ -534,15 +523,14 @@ setup_save_areas (void) - live during the call, but the subreg that is set - isn't. */ - CLEAR_HARD_REG_SET (this_insn_sets); -- note_stores (PATTERN (insn), mark_set_regs, &this_insn_sets); -+ note_stores (insn, mark_set_regs, &this_insn_sets); - /* Sibcalls are considered to set the return value, - compare df-scan.c:df_get_call_refs. */ - if (SIBLING_CALL_P (insn) && crtl->return_rtx) - mark_set_regs (crtl->return_rtx, NULL_RTX, &this_insn_sets); - -- AND_COMPL_HARD_REG_SET (used_regs, call_fixed_reg_set); -- AND_COMPL_HARD_REG_SET (used_regs, this_insn_sets); -- AND_HARD_REG_SET (hard_regs_to_save, used_regs); -+ used_regs &= ~(fixed_reg_set | this_insn_sets); -+ hard_regs_to_save &= used_regs & savable_regs; - for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) - if (TEST_HARD_REG_BIT (hard_regs_to_save, regno)) - { -@@ -775,13 +763,13 @@ save_call_clobbered_regs (void) - - if (code == JUMP_INSN) - /* Restore all registers if this is a JUMP_INSN. */ -- COPY_HARD_REG_SET (referenced_regs, hard_regs_saved); -+ referenced_regs = hard_regs_saved; - else - { - CLEAR_HARD_REG_SET (referenced_regs); - mark_referenced_regs (&PATTERN (insn), - mark_reg_as_referenced, NULL); -- AND_HARD_REG_SET (referenced_regs, hard_regs_saved); -+ referenced_regs &= hard_regs_saved; - } - - for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) -@@ -795,8 +783,8 @@ save_call_clobbered_regs (void) - be live across the call, while the other is set - afterwards. */ - CLEAR_HARD_REG_SET (this_insn_sets); -- note_stores (PATTERN (insn), mark_set_regs, &this_insn_sets); -- AND_COMPL_HARD_REG_SET (hard_regs_saved, this_insn_sets); -+ note_stores (insn, mark_set_regs, &this_insn_sets); -+ hard_regs_saved &= ~this_insn_sets; - } - - if (code == CALL_INSN -@@ -849,15 +837,18 @@ save_call_clobbered_regs (void) - multi-hard-reg pseudo; then the pseudo is considered live - during the call, but the subreg that is set isn't. */ - CLEAR_HARD_REG_SET (this_insn_sets); -- note_stores (PATTERN (insn), mark_set_regs, &this_insn_sets); -+ note_stores (insn, mark_set_regs, &this_insn_sets); - - /* Compute which hard regs must be saved before this call. */ -- AND_COMPL_HARD_REG_SET (hard_regs_to_save, call_fixed_reg_set); -- AND_COMPL_HARD_REG_SET (hard_regs_to_save, this_insn_sets); -- AND_COMPL_HARD_REG_SET (hard_regs_to_save, hard_regs_saved); -- get_call_reg_set_usage (insn, &call_def_reg_set, -- call_used_reg_set); -- AND_HARD_REG_SET (hard_regs_to_save, call_def_reg_set); -+ hard_regs_to_save &= ~(fixed_reg_set -+ | this_insn_sets -+ | hard_regs_saved); -+ hard_regs_to_save &= savable_regs; -+ call_def_reg_set = insn_callee_abi (insn).full_reg_clobbers (); -+ /* ??? This preserves traditional behavior; it might not -+ be needed. */ -+ call_def_reg_set |= fixed_reg_set; -+ hard_regs_to_save &= call_def_reg_set; - - for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) - if (TEST_HARD_REG_BIT (hard_regs_to_save, regno)) -@@ -872,7 +863,8 @@ save_call_clobbered_regs (void) - - if (cheap - && HARD_REGISTER_P (cheap) -- && TEST_HARD_REG_BIT (call_used_reg_set, REGNO (cheap))) -+ && TEST_HARD_REG_BIT (call_used_or_fixed_regs, -+ REGNO (cheap))) - { - rtx dest, newpat; - rtx pat = PATTERN (insn); -@@ -1414,8 +1406,7 @@ insert_one_insn (struct insn_chain *chain, int before_p, int code, rtx pat) - /* Registers that are set in CHAIN->INSN live in the new insn. - (Unless there is a REG_UNUSED note for them, but we don't - look for them here.) */ -- note_stores (PATTERN (chain->insn), add_stored_regs, -- &new_chain->live_throughout); -+ note_stores (chain->insn, add_stored_regs, &new_chain->live_throughout); - CLEAR_REG_SET (&new_chain->dead_or_set); - if (chain->insn == BB_END (BASIC_BLOCK_FOR_FN (cfun, chain->block))) - BB_END (BASIC_BLOCK_FOR_FN (cfun, chain->block)) = new_chain->insn; -diff --git a/gcc/calls.c b/gcc/calls.c -index 567959956..2638752ad 100644 ---- a/gcc/calls.c -+++ b/gcc/calls.c -@@ -346,7 +346,8 @@ prepare_call_address (tree fndecl_or_type, rtx funexp, rtx static_chain_value, - It is zero if this call doesn't want a structure value. - - NEXT_ARG_REG is the rtx that results from executing -- targetm.calls.function_arg (&args_so_far, VOIDmode, void_type_node, true) -+ targetm.calls.function_arg (&args_so_far, -+ function_arg_info::end_marker ()); - just after all the args have had their registers assigned. - This could be whatever you like, but normally it is the first - arg-register beyond those used for args in this call, -@@ -897,13 +898,12 @@ call_expr_flags (const_tree t) - return flags; - } - --/* Return true if TYPE should be passed by invisible reference. */ -+/* Return true if ARG should be passed by invisible reference. */ - - bool --pass_by_reference (CUMULATIVE_ARGS *ca, machine_mode mode, -- tree type, bool named_arg) -+pass_by_reference (CUMULATIVE_ARGS *ca, function_arg_info arg) - { -- if (type) -+ if (tree type = arg.type) - { - /* If this type contains non-trivial constructors, then it is - forbidden for the middle-end to create any new copies. */ -@@ -911,33 +911,55 @@ pass_by_reference (CUMULATIVE_ARGS *ca, machine_mode mode, - return true; - - /* GCC post 3.4 passes *all* variable sized types by reference. */ -- if (!TYPE_SIZE (type) || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST) -+ if (!TYPE_SIZE (type) || !poly_int_tree_p (TYPE_SIZE (type))) - return true; - - /* If a record type should be passed the same as its first (and only) - member, use the type and mode of that member. */ - if (TREE_CODE (type) == RECORD_TYPE && TYPE_TRANSPARENT_AGGR (type)) - { -- type = TREE_TYPE (first_field (type)); -- mode = TYPE_MODE (type); -+ arg.type = TREE_TYPE (first_field (type)); -+ arg.mode = TYPE_MODE (arg.type); - } - } - -- return targetm.calls.pass_by_reference (pack_cumulative_args (ca), mode, -- type, named_arg); -+ return targetm.calls.pass_by_reference (pack_cumulative_args (ca), arg); - } - --/* Return true if TYPE, which is passed by reference, should be callee -+/* Return true if TYPE should be passed by reference when passed to -+ the "..." arguments of a function. */ -+ -+bool -+pass_va_arg_by_reference (tree type) -+{ -+ return pass_by_reference (NULL, function_arg_info (type, /*named=*/false)); -+} -+ -+/* Decide whether ARG, which occurs in the state described by CA, -+ should be passed by reference. Return true if so and update -+ ARG accordingly. */ -+ -+bool -+apply_pass_by_reference_rules (CUMULATIVE_ARGS *ca, function_arg_info &arg) -+{ -+ if (pass_by_reference (ca, arg)) -+ { -+ arg.type = build_pointer_type (arg.type); -+ arg.mode = TYPE_MODE (arg.type); -+ return true; -+ } -+ return false; -+} -+ -+/* Return true if ARG, which is passed by reference, should be callee - copied instead of caller copied. */ - - bool --reference_callee_copied (CUMULATIVE_ARGS *ca, machine_mode mode, -- tree type, bool named_arg) -+reference_callee_copied (CUMULATIVE_ARGS *ca, const function_arg_info &arg) - { -- if (type && TREE_ADDRESSABLE (type)) -+ if (arg.type && TREE_ADDRESSABLE (arg.type)) - return false; -- return targetm.calls.callee_copies (pack_cumulative_args (ca), mode, type, -- named_arg); -+ return targetm.calls.callee_copies (pack_cumulative_args (ca), arg); - } - - -@@ -1350,7 +1372,6 @@ maybe_warn_alloc_args_overflow (tree fn, tree exp, tree args[2], int idx[2]) - location_t loc = EXPR_LOCATION (exp); - - tree fntype = fn ? TREE_TYPE (fn) : TREE_TYPE (TREE_TYPE (exp)); -- built_in_function fncode = fn ? DECL_FUNCTION_CODE (fn) : BUILT_IN_NONE; - bool warned = false; - - /* Validate each argument individually. */ -@@ -1376,11 +1397,10 @@ maybe_warn_alloc_args_overflow (tree fn, tree exp, tree args[2], int idx[2]) - friends. - Also avoid issuing the warning for calls to function named - "alloca". */ -- if ((fncode == BUILT_IN_ALLOCA -- && IDENTIFIER_LENGTH (DECL_NAME (fn)) != 6) -- || (fncode != BUILT_IN_ALLOCA -- && !lookup_attribute ("returns_nonnull", -- TYPE_ATTRIBUTES (fntype)))) -+ if (fn && fndecl_built_in_p (fn, BUILT_IN_ALLOCA) -+ ? IDENTIFIER_LENGTH (DECL_NAME (fn)) != 6 -+ : !lookup_attribute ("returns_nonnull", -+ TYPE_ATTRIBUTES (fntype))) - warned = warning_at (loc, OPT_Walloc_zero, - "%Kargument %i value is zero", - exp, idx[i] + 1); -@@ -1395,7 +1415,7 @@ maybe_warn_alloc_args_overflow (tree fn, tree exp, tree args[2], int idx[2]) - && fn - && !args[1] - && lang_GNU_CXX () -- && DECL_IS_OPERATOR_NEW (fn) -+ && DECL_IS_OPERATOR_NEW_P (fn) - && integer_all_onesp (args[i])) - continue; - -@@ -1989,15 +2009,13 @@ initialize_argument_information (int num_actuals ATTRIBUTE_UNUSED, - with those made by function.c. */ - - /* See if this argument should be passed by invisible reference. */ -- if (pass_by_reference (args_so_far_pnt, TYPE_MODE (type), -- type, argpos < n_named_args)) -+ function_arg_info orig_arg (type, argpos < n_named_args); -+ if (pass_by_reference (args_so_far_pnt, orig_arg)) - { - bool callee_copies; - tree base = NULL_TREE; - -- callee_copies -- = reference_callee_copied (args_so_far_pnt, TYPE_MODE (type), -- type, argpos < n_named_args); -+ callee_copies = reference_callee_copied (args_so_far_pnt, orig_arg); - - /* If we're compiling a thunk, pass through invisible references - instead of making a copy. */ -@@ -2118,8 +2136,8 @@ initialize_argument_information (int num_actuals ATTRIBUTE_UNUSED, - - targetm.calls.warn_parameter_passing_abi (args_so_far, type); - -- args[i].reg = targetm.calls.function_arg (args_so_far, mode, type, -- argpos < n_named_args); -+ function_arg_info arg (type, mode, argpos < n_named_args); -+ args[i].reg = targetm.calls.function_arg (args_so_far, arg); - - if (args[i].reg && CONST_INT_P (args[i].reg)) - args[i].reg = NULL; -@@ -2129,17 +2147,14 @@ initialize_argument_information (int num_actuals ATTRIBUTE_UNUSED, - arguments have to go into the incoming registers. */ - if (targetm.calls.function_incoming_arg != targetm.calls.function_arg) - args[i].tail_call_reg -- = targetm.calls.function_incoming_arg (args_so_far, mode, type, -- argpos < n_named_args); -+ = targetm.calls.function_incoming_arg (args_so_far, arg); - else - args[i].tail_call_reg = args[i].reg; - - if (args[i].reg) -- args[i].partial -- = targetm.calls.arg_partial_bytes (args_so_far, mode, type, -- argpos < n_named_args); -+ args[i].partial = targetm.calls.arg_partial_bytes (args_so_far, arg); - -- args[i].pass_on_stack = targetm.calls.must_pass_in_stack (mode, type); -+ args[i].pass_on_stack = targetm.calls.must_pass_in_stack (arg); - - /* If FUNCTION_ARG returned a (parallel [(expr_list (nil) ...) ...]), - it means that we are to pass this arg in the register(s) designated -@@ -2188,8 +2203,13 @@ initialize_argument_information (int num_actuals ATTRIBUTE_UNUSED, - /* Increment ARGS_SO_FAR, which has info about which arg-registers - have been used, etc. */ - -- targetm.calls.function_arg_advance (args_so_far, TYPE_MODE (type), -- type, argpos < n_named_args); -+ /* ??? Traditionally we've passed TYPE_MODE here, instead of the -+ promoted_mode used for function_arg above. However, the -+ corresponding handling of incoming arguments in function.c -+ does pass the promoted mode. */ -+ function_arg_info arg_to_skip (type, TYPE_MODE (type), -+ argpos < n_named_args); -+ targetm.calls.function_arg_advance (args_so_far, arg_to_skip); - - /* Store argument values for functions decorated with attribute - alloc_size. */ -@@ -4222,14 +4242,11 @@ expand_call (tree exp, rtx target, int ignore) - /* Set up next argument register. For sibling calls on machines - with register windows this should be the incoming register. */ - if (pass == 0) -- next_arg_reg = targetm.calls.function_incoming_arg (args_so_far, -- VOIDmode, -- void_type_node, -- true); -+ next_arg_reg = targetm.calls.function_incoming_arg -+ (args_so_far, function_arg_info::end_marker ()); - else -- next_arg_reg = targetm.calls.function_arg (args_so_far, -- VOIDmode, void_type_node, -- true); -+ next_arg_reg = targetm.calls.function_arg -+ (args_so_far, function_arg_info::end_marker ()); - - if (pass == 1 && (return_flags & ERF_RETURNS_ARG)) - { -@@ -4846,10 +4863,9 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value, - argvec[count].mode = Pmode; - argvec[count].partial = 0; - -- argvec[count].reg = targetm.calls.function_arg (args_so_far, -- Pmode, NULL_TREE, true); -- gcc_assert (targetm.calls.arg_partial_bytes (args_so_far, Pmode, -- NULL_TREE, 1) == 0); -+ function_arg_info ptr_arg (Pmode, /*named=*/true); -+ argvec[count].reg = targetm.calls.function_arg (args_so_far, ptr_arg); -+ gcc_assert (targetm.calls.arg_partial_bytes (args_so_far, ptr_arg) == 0); - - locate_and_pad_parm (Pmode, NULL_TREE, - #ifdef STACK_PARMS_IN_REG_PARM_AREA -@@ -4864,7 +4880,7 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value, - || reg_parm_stack_space > 0) - args_size.constant += argvec[count].locate.size.constant; - -- targetm.calls.function_arg_advance (args_so_far, Pmode, (tree) 0, true); -+ targetm.calls.function_arg_advance (args_so_far, ptr_arg); - - count++; - } -@@ -4885,11 +4901,11 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value, - && !(CONSTANT_P (val) && targetm.legitimate_constant_p (mode, val))) - val = force_operand (val, NULL_RTX); - -- if (pass_by_reference (&args_so_far_v, mode, NULL_TREE, 1)) -+ function_arg_info orig_arg (mode, /*named=*/true); -+ if (pass_by_reference (&args_so_far_v, orig_arg)) - { - rtx slot; -- int must_copy -- = !reference_callee_copied (&args_so_far_v, mode, NULL_TREE, 1); -+ int must_copy = !reference_callee_copied (&args_so_far_v, orig_arg); - - /* If this was a CONST function, it is now PURE since it now - reads memory. */ -@@ -4927,13 +4943,13 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value, - } - - mode = promote_function_mode (NULL_TREE, mode, &unsigned_p, NULL_TREE, 0); -+ function_arg_info arg (mode, /*named=*/true); - argvec[count].mode = mode; - argvec[count].value = convert_modes (mode, GET_MODE (val), val, unsigned_p); -- argvec[count].reg = targetm.calls.function_arg (args_so_far, mode, -- NULL_TREE, true); -+ argvec[count].reg = targetm.calls.function_arg (args_so_far, arg); - - argvec[count].partial -- = targetm.calls.arg_partial_bytes (args_so_far, mode, NULL_TREE, 1); -+ = targetm.calls.arg_partial_bytes (args_so_far, arg); - - if (argvec[count].reg == 0 - || argvec[count].partial != 0 -@@ -4959,7 +4975,7 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value, - known_le (GET_MODE_SIZE (mode), UNITS_PER_WORD)); - #endif - -- targetm.calls.function_arg_advance (args_so_far, mode, (tree) 0, true); -+ targetm.calls.function_arg_advance (args_so_far, arg); - } - - /* If this machine requires an external definition for library -@@ -5302,7 +5318,7 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value, - original_args_size.constant, args_size.constant, - struct_value_size, - targetm.calls.function_arg (args_so_far, -- VOIDmode, void_type_node, true), -+ function_arg_info::end_marker ()), - valreg, - old_inhibit_defer_pop + 1, call_fusage, flags, args_so_far); - -@@ -5815,22 +5831,21 @@ store_one_arg (struct arg_data *arg, rtx argblock, int flags, - return sibcall_failure; - } - --/* Nonzero if we do not know how to pass TYPE solely in registers. */ -+/* Nonzero if we do not know how to pass ARG solely in registers. */ - - bool --must_pass_in_stack_var_size (machine_mode mode ATTRIBUTE_UNUSED, -- const_tree type) -+must_pass_in_stack_var_size (const function_arg_info &arg) - { -- if (!type) -+ if (!arg.type) - return false; - - /* If the type has variable size... */ -- if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST) -+ if (!poly_int_tree_p (TYPE_SIZE (arg.type))) - return true; - - /* If the type is marked as addressable (it is required - to be constructed into the stack)... */ -- if (TREE_ADDRESSABLE (type)) -+ if (TREE_ADDRESSABLE (arg.type)) - return true; - - return false; -@@ -5841,33 +5856,43 @@ must_pass_in_stack_var_size (machine_mode mode ATTRIBUTE_UNUSED, - /* ??? Should be able to merge these two by examining BLOCK_REG_PADDING. */ - - bool --must_pass_in_stack_var_size_or_pad (machine_mode mode, const_tree type) -+must_pass_in_stack_var_size_or_pad (const function_arg_info &arg) - { -- if (!type) -+ if (!arg.type) - return false; - - /* If the type has variable size... */ -- if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST) -+ if (TREE_CODE (TYPE_SIZE (arg.type)) != INTEGER_CST) - return true; - - /* If the type is marked as addressable (it is required - to be constructed into the stack)... */ -- if (TREE_ADDRESSABLE (type)) -+ if (TREE_ADDRESSABLE (arg.type)) - return true; - -- if (TYPE_EMPTY_P (type)) -+ if (TYPE_EMPTY_P (arg.type)) - return false; - - /* If the padding and mode of the type is such that a copy into - a register would put it into the wrong part of the register. */ -- if (mode == BLKmode -- && int_size_in_bytes (type) % (PARM_BOUNDARY / BITS_PER_UNIT) -- && (targetm.calls.function_arg_padding (mode, type) -+ if (arg.mode == BLKmode -+ && int_size_in_bytes (arg.type) % (PARM_BOUNDARY / BITS_PER_UNIT) -+ && (targetm.calls.function_arg_padding (arg.mode, arg.type) - == (BYTES_BIG_ENDIAN ? PAD_UPWARD : PAD_DOWNWARD))) - return true; - - return false; - } - -+/* Return true if TYPE must be passed on the stack when passed to -+ the "..." arguments of a function. */ -+ -+bool -+must_pass_va_arg_in_stack (tree type) -+{ -+ function_arg_info arg (type, /*named=*/false); -+ return targetm.calls.must_pass_in_stack (arg); -+} -+ - /* Tell the garbage collector about GTY markers in this source file. */ - #include "gt-calls.h" -diff --git a/gcc/calls.h b/gcc/calls.h -index 128bb5130..01ab3905a 100644 ---- a/gcc/calls.h -+++ b/gcc/calls.h -@@ -20,23 +20,108 @@ along with GCC; see the file COPYING3. If not see - #ifndef GCC_CALLS_H - #define GCC_CALLS_H - -+/* Describes a function argument. -+ -+ Each argument conceptually has a gimple-level type. Usually this type -+ is available directly as a tree via the TYPE field, but when calling -+ libgcc support functions it might instead be inferred from a mode, -+ in which case the type isn't available directly. -+ -+ This gimple-level type might go through promotion before being passed to -+ the target function. Depending on the context, the MODE field is either -+ the mode of the gimple-level type (whether explicitly given or not) -+ or the mode after promotion has been performed. */ -+class function_arg_info -+{ -+public: -+ function_arg_info () : type (NULL_TREE), mode (VOIDmode), named (false) {} -+ -+ /* Initialize an argument of mode MODE, either before or after promotion. */ -+ function_arg_info (machine_mode mode, bool named) -+ : type (NULL_TREE), mode (mode), named (named) -+ {} -+ -+ /* Initialize an unpromoted argument of type TYPE. */ -+ function_arg_info (tree type, bool named) -+ : type (type), mode (TYPE_MODE (type)), named (named) -+ {} -+ -+ /* Initialize an argument with explicit properties. */ -+ function_arg_info (tree type, machine_mode mode, bool named) -+ : type (type), mode (mode), named (named) -+ {} -+ -+ /* Return true if the gimple-level type is an aggregate. */ -+ bool aggregate_type_p () const { return type && AGGREGATE_TYPE_P (type); } -+ -+ /* Return the size of the gimple-level type, or -1 if the size is -+ variable or otherwise not representable as a poly_int64. -+ -+ Use this function when MODE is the mode of the type before promotion, -+ or in any context if the target never promotes function arguments. */ -+ poly_int64 type_size_in_bytes () const -+ { -+ if (type) -+ return int_size_in_bytes (type); -+ return GET_MODE_SIZE (mode); -+ } -+ -+ /* Return the size of the argument after promotion, or -1 if the size -+ is variable or otherwise not representable as a poly_int64. -+ -+ Use this function when MODE is the mode of the type after promotion. */ -+ poly_int64 promoted_size_in_bytes () const -+ { -+ if (mode == BLKmode) -+ return int_size_in_bytes (type); -+ return GET_MODE_SIZE (mode); -+ } -+ -+ /* True if the argument represents the end of the argument list, -+ as returned by end_marker (). */ -+ bool end_marker_p () const { return mode == VOIDmode; } -+ -+ /* Return a function_arg_info that represents the end of the -+ argument list. */ -+ static function_arg_info end_marker () -+ { -+ return function_arg_info (void_type_node, /*named=*/true); -+ } -+ -+ /* The type of the argument, or null if not known (which is true for -+ libgcc support functions). */ -+ tree type; -+ -+ /* The mode of the argument. Depending on context, this might be -+ the mode of the argument type or the mode after promotion. */ -+ machine_mode mode; -+ -+ /* True if the argument is treated as a named argument, false if it is -+ treated as an unnamed variadic argument (i.e. one passed through -+ "..."). See also TARGET_STRICT_ARGUMENT_NAMING. */ -+ unsigned int named : 1; -+}; -+ - extern int flags_from_decl_or_type (const_tree); - extern int call_expr_flags (const_tree); - extern int setjmp_call_p (const_tree); - extern bool gimple_maybe_alloca_call_p (const gimple *); - extern bool gimple_alloca_call_p (const gimple *); - extern bool alloca_call_p (const_tree); --extern bool must_pass_in_stack_var_size (machine_mode, const_tree); --extern bool must_pass_in_stack_var_size_or_pad (machine_mode, const_tree); -+extern bool must_pass_in_stack_var_size (const function_arg_info &); -+extern bool must_pass_in_stack_var_size_or_pad (const function_arg_info &); -+extern bool must_pass_va_arg_in_stack (tree); - extern rtx prepare_call_address (tree, rtx, rtx, rtx *, int, int); - extern bool shift_return_value (machine_mode, bool, rtx); - extern rtx expand_call (tree, rtx, int); - extern void fixup_tail_calls (void); - --extern bool pass_by_reference (CUMULATIVE_ARGS *, machine_mode, -- tree, bool); --extern bool reference_callee_copied (CUMULATIVE_ARGS *, machine_mode, -- tree, bool); -+extern bool pass_by_reference (CUMULATIVE_ARGS *, function_arg_info); -+extern bool pass_va_arg_by_reference (tree); -+extern bool apply_pass_by_reference_rules (CUMULATIVE_ARGS *, -+ function_arg_info &); -+extern bool reference_callee_copied (CUMULATIVE_ARGS *, -+ const function_arg_info &); - extern void maybe_warn_alloc_args_overflow (tree, tree, tree[2], int[2]); - extern tree get_attr_nonstring_decl (tree, tree * = NULL); - extern void maybe_warn_nonstring_arg (tree, tree); -diff --git a/gcc/cfgcleanup.c b/gcc/cfgcleanup.c -index 8c464ec79..ff7f014da 100644 ---- a/gcc/cfgcleanup.c -+++ b/gcc/cfgcleanup.c -@@ -54,6 +54,7 @@ along with GCC; see the file COPYING3. If not see - #include "dbgcnt.h" - #include "rtl-iter.h" - #include "regs.h" -+#include "function-abi.h" - - #define FORWARDER_BLOCK_P(BB) ((BB)->flags & BB_FORWARDER_BLOCK) - -@@ -1230,12 +1231,13 @@ old_insns_match_p (int mode ATTRIBUTE_UNUSED, rtx_insn *i1, rtx_insn *i2) - } - } - -- HARD_REG_SET i1_used, i2_used; -+ HARD_REG_SET i1_used = insn_callee_abi (i1).full_reg_clobbers (); -+ HARD_REG_SET i2_used = insn_callee_abi (i2).full_reg_clobbers (); -+ /* ??? This preserves traditional behavior; it might not be needed. */ -+ i1_used |= fixed_reg_set; -+ i2_used |= fixed_reg_set; - -- get_call_reg_set_usage (i1, &i1_used, call_used_reg_set); -- get_call_reg_set_usage (i2, &i2_used, call_used_reg_set); -- -- if (!hard_reg_set_equal_p (i1_used, i2_used)) -+ if (i1_used != i2_used) - return dir_none; - } - -@@ -1269,7 +1271,7 @@ old_insns_match_p (int mode ATTRIBUTE_UNUSED, rtx_insn *i1, rtx_insn *i2) - if (REG_NOTE_KIND (note) == REG_DEAD && STACK_REG_P (XEXP (note, 0))) - SET_HARD_REG_BIT (i2_regset, REGNO (XEXP (note, 0))); - -- if (!hard_reg_set_equal_p (i1_regset, i2_regset)) -+ if (i1_regset != i2_regset) - return dir_none; - } - #endif -diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c -index 4ae8e3b32..218414b39 100644 ---- a/gcc/cfgexpand.c -+++ b/gcc/cfgexpand.c -@@ -2874,6 +2874,15 @@ asm_clobber_reg_is_valid (int regno, int nregs, const char *regname) - error ("PIC register clobbered by %qs in %", regname); - is_valid = false; - } -+ else if (!in_hard_reg_set_p -+ (accessible_reg_set, reg_raw_mode[regno], regno)) -+ { -+ /* ??? Diagnose during gimplification? */ -+ error ("the register %qs cannot be clobbered in %" -+ " for the current target", regname); -+ is_valid = false; -+ } -+ - /* Clobbering the stack pointer register is deprecated. GCC expects - the value of the stack pointer after an asm statement to be the same - as it was before, so no asm can validly clobber the stack pointer in -@@ -3865,7 +3874,6 @@ expand_gimple_stmt (gimple *stmt) - /* If we want exceptions for non-call insns, any - may_trap_p instruction may throw. */ - && GET_CODE (PATTERN (insn)) != CLOBBER -- && GET_CODE (PATTERN (insn)) != CLOBBER_HIGH - && GET_CODE (PATTERN (insn)) != USE - && insn_could_throw_p (insn)) - make_reg_eh_region_note (insn, 0, lp_nr); -diff --git a/gcc/cfgloopanal.c b/gcc/cfgloopanal.c -index 6dbe96f9d..3388da7dd 100644 ---- a/gcc/cfgloopanal.c -+++ b/gcc/cfgloopanal.c -@@ -353,7 +353,7 @@ init_set_costs (void) - && !fixed_regs[i]) - { - target_avail_regs++; -- if (call_used_regs[i]) -+ if (call_used_or_fixed_reg_p (i)) - target_clobbered_regs++; - } - -diff --git a/gcc/cgraph.c b/gcc/cgraph.c -index 62f1afa2a..9dca43031 100644 ---- a/gcc/cgraph.c -+++ b/gcc/cgraph.c -@@ -1883,7 +1883,7 @@ cgraph_node::local_info (tree decl) - /* Return local info for the compiled function. */ - - cgraph_rtl_info * --cgraph_node::rtl_info (tree decl) -+cgraph_node::rtl_info (const_tree decl) - { - gcc_assert (TREE_CODE (decl) == FUNCTION_DECL); - cgraph_node *node = get (decl); -@@ -1898,7 +1898,10 @@ cgraph_node::rtl_info (tree decl) - return NULL; - /* Allocate if it doesn't exist. */ - if (node->rtl == NULL) -- node->rtl = ggc_cleared_alloc (); -+ { -+ node->rtl = ggc_cleared_alloc (); -+ node->rtl->function_used_regs = reg_class_contents[ALL_REGS]; -+ } - return node->rtl; - } - -diff --git a/gcc/cgraph.h b/gcc/cgraph.h -index 10d1a2c6f..ad6720a4b 100644 ---- a/gcc/cgraph.h -+++ b/gcc/cgraph.h -@@ -1347,7 +1347,7 @@ public: - static cgraph_local_info *local_info (tree decl); - - /* Return local info for the compiled function. */ -- static struct cgraph_rtl_info *rtl_info (tree); -+ static struct cgraph_rtl_info *rtl_info (const_tree); - - /* Return the cgraph node that has ASMNAME for its DECL_ASSEMBLER_NAME. - Return NULL if there's no such node. */ -diff --git a/gcc/cgraphclones.c b/gcc/cgraphclones.c -index cd3f585bd..43423234b 100644 ---- a/gcc/cgraphclones.c -+++ b/gcc/cgraphclones.c -@@ -225,10 +225,7 @@ build_function_decl_skip_args (tree orig_decl, bitmap args_to_skip, - if (fndecl_built_in_p (new_decl) - && args_to_skip - && !bitmap_empty_p (args_to_skip)) -- { -- DECL_BUILT_IN_CLASS (new_decl) = NOT_BUILT_IN; -- DECL_FUNCTION_CODE (new_decl) = (enum built_in_function) 0; -- } -+ set_decl_built_in_function (new_decl, NOT_BUILT_IN, 0); - /* The FE might have information and assumptions about the other - arguments. */ - DECL_LANG_SPECIFIC (new_decl) = NULL; -@@ -415,7 +412,7 @@ dump_callgraph_transformation (const cgraph_node *original, - - If the new node is being inlined into another one, NEW_INLINED_TO should be - the outline function the new one is (even indirectly) inlined to. All hooks -- will see this in node's global.inlined_to, when invoked. Can be NULL if the -+ will see this in node's inlined_to, when invoked. Can be NULL if the - node is not inlined. */ - - cgraph_node * -@@ -1056,7 +1053,7 @@ cgraph_node::create_version_clone_with_body - location_t saved_loc = input_location; - tree v = TREE_VALUE (target_attributes); - input_location = DECL_SOURCE_LOCATION (new_decl); -- bool r = targetm.target_option.valid_attribute_p (new_decl, NULL, v, 0); -+ bool r = targetm.target_option.valid_attribute_p (new_decl, NULL, v, 1); - input_location = saved_loc; - if (!r) - return NULL; -diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c -index dee6becc7..ddf298583 100644 ---- a/gcc/cgraphunit.c -+++ b/gcc/cgraphunit.c -@@ -1793,7 +1793,6 @@ cgraph_node::expand_thunk (bool output_asm_thunks, bool force_gimple_thunk) - && targetm.asm_out.can_output_mi_thunk (thunk_fndecl, fixed_offset, - virtual_value, alias)) - { -- const char *fnname; - tree fn_block; - tree restype = TREE_TYPE (TREE_TYPE (thunk_fndecl)); - -@@ -1817,7 +1816,6 @@ cgraph_node::expand_thunk (bool output_asm_thunks, bool force_gimple_thunk) - = build_decl (DECL_SOURCE_LOCATION (thunk_fndecl), - RESULT_DECL, 0, restype); - DECL_CONTEXT (DECL_RESULT (thunk_fndecl)) = thunk_fndecl; -- fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl)); - - /* The back end expects DECL_INITIAL to contain a BLOCK, so we - create one. */ -@@ -1831,12 +1829,10 @@ cgraph_node::expand_thunk (bool output_asm_thunks, bool force_gimple_thunk) - insn_locations_init (); - set_curr_insn_location (DECL_SOURCE_LOCATION (thunk_fndecl)); - prologue_location = curr_insn_location (); -- assemble_start_function (thunk_fndecl, fnname); - - targetm.asm_out.output_mi_thunk (asm_out_file, thunk_fndecl, - fixed_offset, virtual_value, alias); - -- assemble_end_function (thunk_fndecl, fnname); - insn_locations_finalize (); - init_insn_lengths (); - free_after_compilation (cfun); -diff --git a/gcc/cif-code.def b/gcc/cif-code.def -index 3356377a1..a154f24f1 100644 ---- a/gcc/cif-code.def -+++ b/gcc/cif-code.def -@@ -70,8 +70,12 @@ DEFCIFCODE(LARGE_STACK_FRAME_GROWTH_LIMIT, CIF_FINAL_NORMAL, - N_("--param large-stack-frame-growth limit reached")) - DEFCIFCODE(MAX_INLINE_INSNS_SINGLE_LIMIT, CIF_FINAL_NORMAL, - N_("--param max-inline-insns-single limit reached")) -+DEFCIFCODE(MAX_INLINE_INSNS_SINGLE_O2_LIMIT, CIF_FINAL_NORMAL, -+ N_("--param max-inline-insns-single-O2 limit reached")) - DEFCIFCODE(MAX_INLINE_INSNS_AUTO_LIMIT, CIF_FINAL_NORMAL, - N_("--param max-inline-insns-auto limit reached")) -+DEFCIFCODE(MAX_INLINE_INSNS_AUTO_O2_LIMIT, CIF_FINAL_NORMAL, -+ N_("--param max-inline-insns-auto-O2 limit reached")) - DEFCIFCODE(INLINE_UNIT_GROWTH_LIMIT, CIF_FINAL_NORMAL, - N_("--param inline-unit-growth limit reached")) - -@@ -83,6 +87,10 @@ DEFCIFCODE(RECURSIVE_INLINING, CIF_FINAL_NORMAL, - DEFCIFCODE(UNLIKELY_CALL, CIF_FINAL_NORMAL, - N_("call is unlikely and code size would grow")) - -+/* Call is considered never executed. */ -+DEFCIFCODE(NEVER_CALL, CIF_FINAL_NORMAL, -+ N_("call is considered never executed and code size would grow")) -+ - /* Function is not declared as inline. */ - DEFCIFCODE(NOT_DECLARED_INLINED, CIF_FINAL_NORMAL, - N_("function not declared inline and code size would grow")) -diff --git a/gcc/combine-stack-adj.c b/gcc/combine-stack-adj.c -index 3638a1b10..d14d59abc 100644 ---- a/gcc/combine-stack-adj.c -+++ b/gcc/combine-stack-adj.c -@@ -133,7 +133,6 @@ single_set_for_csa (rtx_insn *insn) - && SET_SRC (this_rtx) == SET_DEST (this_rtx)) - ; - else if (GET_CODE (this_rtx) != CLOBBER -- && GET_CODE (this_rtx) != CLOBBER_HIGH - && GET_CODE (this_rtx) != USE) - return NULL_RTX; - } -diff --git a/gcc/combine.c b/gcc/combine.c -index b9d674c96..a425f0ca6 100644 ---- a/gcc/combine.c -+++ b/gcc/combine.c -@@ -571,7 +571,6 @@ find_single_use_1 (rtx dest, rtx *loc) - case SYMBOL_REF: - CASE_CONST_ANY: - case CLOBBER: -- case CLOBBER_HIGH: - return 0; - - case SET: -@@ -1224,8 +1223,7 @@ combine_instructions (rtx_insn *f, unsigned int nregs) - subst_low_luid = DF_INSN_LUID (insn); - subst_insn = insn; - -- note_stores (PATTERN (insn), set_nonzero_bits_and_sign_copies, -- insn); -+ note_stores (insn, set_nonzero_bits_and_sign_copies, insn); - record_dead_and_set_regs (insn); - - if (AUTO_INC_DEC) -@@ -1763,9 +1761,6 @@ set_nonzero_bits_and_sign_copies (rtx x, const_rtx set, void *data) - return; - } - -- /* Should not happen as we only using pseduo registers. */ -- gcc_assert (GET_CODE (set) != CLOBBER_HIGH); -- - /* If this register is being initialized using itself, and the - register is uninitialized in this basic block, and there are - no LOG_LINKS which set the register, then part of the -@@ -1924,7 +1919,6 @@ can_combine_p (rtx_insn *insn, rtx_insn *i3, rtx_insn *pred ATTRIBUTE_UNUSED, - - /* We can ignore CLOBBERs. */ - case CLOBBER: -- case CLOBBER_HIGH: - break; - - case SET: -@@ -2439,7 +2433,7 @@ likely_spilled_retval_p (rtx_insn *insn) - info.mask = mask; - for (p = PREV_INSN (use); info.mask && p != insn; p = PREV_INSN (p)) - if (INSN_P (p)) -- note_stores (PATTERN (p), likely_spilled_retval_1, &info); -+ note_stores (p, likely_spilled_retval_1, &info); - mask = info.mask; - - /* Check if any of the (probably) live return value registers is -@@ -2595,8 +2589,6 @@ is_parallel_of_n_reg_sets (rtx pat, int n) - if (XEXP (XVECEXP (pat, 0, i), 0) == const0_rtx) - return false; - break; -- case CLOBBER_HIGH: -- break; - default: - return false; - } -@@ -2897,8 +2889,7 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0, - for (i = 0; ok && i < XVECLEN (p2, 0); i++) - { - if ((GET_CODE (XVECEXP (p2, 0, i)) == SET -- || GET_CODE (XVECEXP (p2, 0, i)) == CLOBBER -- || GET_CODE (XVECEXP (p2, 0, i)) == CLOBBER_HIGH) -+ || GET_CODE (XVECEXP (p2, 0, i)) == CLOBBER) - && reg_overlap_mentioned_p (SET_DEST (PATTERN (i3)), - SET_DEST (XVECEXP (p2, 0, i)))) - ok = false; -@@ -4741,8 +4732,8 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0, - been made to this insn. The order is important, because newi2pat - can affect nonzero_bits of newpat. */ - if (newi2pat) -- note_stores (newi2pat, set_nonzero_bits_and_sign_copies, NULL); -- note_stores (newpat, set_nonzero_bits_and_sign_copies, NULL); -+ note_pattern_stores (newi2pat, set_nonzero_bits_and_sign_copies, NULL); -+ note_pattern_stores (newpat, set_nonzero_bits_and_sign_copies, NULL); - } - - if (undobuf.other_insn != NULL_RTX) -@@ -13409,15 +13400,6 @@ record_dead_and_set_regs_1 (rtx dest, const_rtx setter, void *data) - ? SET_SRC (setter) - : gen_lowpart (GET_MODE (dest), - SET_SRC (setter))); -- else if (GET_CODE (setter) == CLOBBER_HIGH) -- { -- reg_stat_type *rsp = ®_stat[REGNO (dest)]; -- if (rsp->last_set_value -- && reg_is_clobbered_by_clobber_high -- (REGNO (dest), GET_MODE (rsp->last_set_value), -- XEXP (setter, 0))) -- record_value_for_reg (dest, NULL, NULL_RTX); -- } - else - record_value_for_reg (dest, record_dead_insn, NULL_RTX); - } -@@ -13487,10 +13469,10 @@ record_dead_and_set_regs (rtx_insn *insn) - the return value register is set at this LUID. We could - still replace a register with the return value from the - wrong subroutine call! */ -- note_stores (PATTERN (insn), record_dead_and_set_regs_1, NULL_RTX); -+ note_stores (insn, record_dead_and_set_regs_1, NULL_RTX); - } - else -- note_stores (PATTERN (insn), record_dead_and_set_regs_1, insn); -+ note_stores (insn, record_dead_and_set_regs_1, insn); - } - - /* If a SUBREG has the promoted bit set, it is in fact a property of the -@@ -13853,10 +13835,6 @@ reg_dead_at_p_1 (rtx dest, const_rtx x, void *data ATTRIBUTE_UNUSED) - if (!REG_P (dest)) - return; - -- if (GET_CODE (x) == CLOBBER_HIGH -- && !reg_is_clobbered_by_clobber_high (reg_dead_reg, XEXP (x, 0))) -- return; -- - regno = REGNO (dest); - endregno = END_REGNO (dest); - if (reg_dead_endregno > regno && reg_dead_regno < endregno) -@@ -13904,7 +13882,7 @@ reg_dead_at_p (rtx reg, rtx_insn *insn) - if (find_regno_note (insn, REG_UNUSED, reg_dead_regno)) - return 1; - -- note_stores (PATTERN (insn), reg_dead_at_p_1, NULL); -+ note_stores (insn, reg_dead_at_p_1, NULL); - if (reg_dead_flag) - return reg_dead_flag == 1 ? 1 : 0; - -diff --git a/gcc/common.opt b/gcc/common.opt -index 0bdf51dd8..7dee534b8 100644 ---- a/gcc/common.opt -+++ b/gcc/common.opt -@@ -1080,16 +1080,16 @@ Common Report Var(flag_branch_probabilities) Optimization - Use profiling information for branch probabilities. - - fbranch-target-load-optimize --Common Report Var(flag_branch_target_load_optimize) Optimization --Perform branch target load optimization before prologue / epilogue threading. -+Common Ignore -+Does nothing. Preserved for backward compatibility. - - fbranch-target-load-optimize2 --Common Report Var(flag_branch_target_load_optimize2) Optimization --Perform branch target load optimization after prologue / epilogue threading. -+Common Ignore -+Does nothing. Preserved for backward compatibility. - - fbtr-bb-exclusive --Common Report Var(flag_btr_bb_exclusive) Optimization --Restrict target load migration not to re-use registers in any basic block. -+Common Ignore -+Does nothing. Preserved for backward compatibility. - - fcall-saved- - Common Joined RejectNegative Var(common_deferred_options) Defer -@@ -1289,6 +1289,26 @@ Enum(diagnostic_color_rule) String(always) Value(DIAGNOSTICS_COLOR_YES) - EnumValue - Enum(diagnostic_color_rule) String(auto) Value(DIAGNOSTICS_COLOR_AUTO) - -+fdiagnostics-urls= -+Driver Common Joined RejectNegative Var(flag_diagnostics_show_urls) Enum(diagnostic_url_rule) Init(DIAGNOSTICS_URL_AUTO) -+-fdiagnostics-urls=[never|always|auto] Embed URLs in diagnostics. -+ -+; Required for these enum values. -+SourceInclude -+diagnostic-url.h -+ -+Enum -+Name(diagnostic_url_rule) Type(int) -+ -+EnumValue -+Enum(diagnostic_url_rule) String(never) Value(DIAGNOSTICS_URL_NO) -+ -+EnumValue -+Enum(diagnostic_url_rule) String(always) Value(DIAGNOSTICS_URL_YES) -+ -+EnumValue -+Enum(diagnostic_url_rule) String(auto) Value(DIAGNOSTICS_URL_AUTO) -+ - fdiagnostics-format= - Common Joined RejectNegative Enum(diagnostics_output_format) - -fdiagnostics-format=[text|json] Select output format. -@@ -1963,7 +1983,7 @@ Common Var(flag_dce) Init(1) Optimization - Use the RTL dead code elimination pass. - - fdse --Common Var(flag_dse) Init(1) Optimization -+Common Var(flag_dse) Init(0) Optimization - Use the RTL dead store elimination pass. - - freschedule-modulo-scheduled-loops -diff --git a/gcc/common/config/aarch64/aarch64-common.c b/gcc/common/config/aarch64/aarch64-common.c -index bab3ab3fa..07c032539 100644 ---- a/gcc/common/config/aarch64/aarch64-common.c -+++ b/gcc/common/config/aarch64/aarch64-common.c -@@ -170,9 +170,9 @@ aarch64_handle_option (struct gcc_options *opts, - struct aarch64_option_extension - { - const char *const name; -- const unsigned long flag_canonical; -- const unsigned long flags_on; -- const unsigned long flags_off; -+ const uint64_t flag_canonical; -+ const uint64_t flags_on; -+ const uint64_t flags_off; - const bool is_synthetic; - }; - -@@ -201,14 +201,14 @@ struct processor_name_to_arch - { - const std::string processor_name; - const enum aarch64_arch arch; -- const unsigned long flags; -+ const uint64_t flags; - }; - - struct arch_to_arch_name - { - const enum aarch64_arch arch; - const std::string arch_name; -- const unsigned long flags; -+ const uint64_t flags; - }; - - /* Map processor names to the architecture revision they implement and -@@ -238,7 +238,7 @@ static const struct arch_to_arch_name all_architectures[] = - a copy of the string is created and stored to INVALID_EXTENSION. */ - - enum aarch64_parse_opt_result --aarch64_parse_extension (const char *str, unsigned long *isa_flags, -+aarch64_parse_extension (const char *str, uint64_t *isa_flags, - std::string *invalid_extension) - { - /* The extension string is parsed left to right. */ -@@ -326,18 +326,21 @@ int opt_ext_cmp (const void* a, const void* b) - turns on as a dependency. As an example +dotprod turns on FL_DOTPROD and - FL_SIMD. As such the set of bits represented by this option is - {FL_DOTPROD, FL_SIMD}. */ -- unsigned long total_flags_a = opt_a->flag_canonical & opt_a->flags_on; -- unsigned long total_flags_b = opt_b->flag_canonical & opt_b->flags_on; -+ uint64_t total_flags_a = opt_a->flag_canonical & opt_a->flags_on; -+ uint64_t total_flags_b = opt_b->flag_canonical & opt_b->flags_on; - int popcnt_a = popcount_hwi ((HOST_WIDE_INT)total_flags_a); - int popcnt_b = popcount_hwi ((HOST_WIDE_INT)total_flags_b); - int order = popcnt_b - popcnt_a; - - /* If they have the same amount of bits set, give it a more - deterministic ordering by using the value of the bits themselves. */ -- if (order == 0) -- return total_flags_b - total_flags_a; -+ if (order != 0) -+ return order; - -- return order; -+ if (total_flags_a != total_flags_b) -+ return total_flags_a < total_flags_b ? 1 : -1; -+ -+ return 0; - } - - /* Implement TARGET_OPTION_INIT_STRUCT. */ -@@ -373,9 +376,9 @@ aarch64_option_init_struct (struct gcc_options *opts ATTRIBUTE_UNUSED) - */ - - static bool --aarch64_contains_opt (unsigned long isa_flag_bits, opt_ext *opt) -+aarch64_contains_opt (uint64_t isa_flag_bits, opt_ext *opt) - { -- unsigned long flags_check -+ uint64_t flags_check - = opt->is_synthetic ? opt->flags_on : opt->flag_canonical; - - return (isa_flag_bits & flags_check) == flags_check; -@@ -388,13 +391,13 @@ aarch64_contains_opt (unsigned long isa_flag_bits, opt_ext *opt) - that all the "+" flags come before the "+no" flags. */ - - std::string --aarch64_get_extension_string_for_isa_flags (unsigned long isa_flags, -- unsigned long default_arch_flags) -+aarch64_get_extension_string_for_isa_flags (uint64_t isa_flags, -+ uint64_t default_arch_flags) - { - const struct aarch64_option_extension *opt = NULL; - std::string outstr = ""; - -- unsigned long isa_flag_bits = isa_flags; -+ uint64_t isa_flag_bits = isa_flags; - - /* Pass one: Minimize the search space by reducing the set of options - to the smallest set that still turns on the same features as before in -@@ -538,7 +541,7 @@ aarch64_rewrite_selected_cpu (const char *name) - || a_to_an->arch == aarch64_no_arch) - fatal_error (input_location, "unknown value %qs for %<-mcpu%>", name); - -- unsigned long extensions = p_to_a->flags; -+ uint64_t extensions = p_to_a->flags; - aarch64_parse_extension (extension_str.c_str (), &extensions, NULL); - - std::string outstr = a_to_an->arch_name -diff --git a/gcc/config.gcc b/gcc/config.gcc -index b2282ecdf..506a918ed 100644 ---- a/gcc/config.gcc -+++ b/gcc/config.gcc -@@ -315,12 +315,12 @@ m32c*-*-*) - ;; - aarch64*-*-*) - cpu_type=aarch64 -- extra_headers="arm_fp16.h arm_neon.h arm_acle.h" -+ extra_headers="arm_fp16.h arm_neon.h arm_bf16.h arm_acle.h arm_sve.h" - c_target_objs="aarch64-c.o" - cxx_target_objs="aarch64-c.o" - d_target_objs="aarch64-d.o" -- extra_objs="aarch64-builtins.o aarch-common.o cortex-a57-fma-steering.o aarch64-speculation.o falkor-tag-collision-avoidance.o aarch64-bti-insert.o" -- target_gtfiles="\$(srcdir)/config/aarch64/aarch64-builtins.c" -+ extra_objs="aarch64-builtins.o aarch-common.o aarch64-sve-builtins.o aarch64-sve-builtins-shapes.o aarch64-sve-builtins-base.o cortex-a57-fma-steering.o aarch64-speculation.o falkor-tag-collision-avoidance.o aarch64-bti-insert.o" -+ target_gtfiles="\$(srcdir)/config/aarch64/aarch64-builtins.c \$(srcdir)/config/aarch64/aarch64-sve-builtins.h \$(srcdir)/config/aarch64/aarch64-sve-builtins.cc" - target_has_targetm_common=yes - ;; - alpha*-*-*) -@@ -382,7 +382,8 @@ i[34567]86-*-*) - c_target_objs="i386-c.o" - cxx_target_objs="i386-c.o" - d_target_objs="i386-d.o" -- extra_objs="x86-tune-sched.o x86-tune-sched-bd.o x86-tune-sched-atom.o x86-tune-sched-core.o" -+ extra_objs="x86-tune-sched.o x86-tune-sched-bd.o x86-tune-sched-atom.o x86-tune-sched-core.o i386-options.o i386-builtins.o i386-expand.o i386-features.o" -+ target_gtfiles="\$(srcdir)/config/i386/i386-builtins.c \$(srcdir)/config/i386/i386-expand.c \$(srcdir)/config/i386/i386-options.c" - extra_options="${extra_options} fused-madd.opt" - extra_headers="cpuid.h mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h - pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h -@@ -414,7 +415,8 @@ x86_64-*-*) - cxx_target_objs="i386-c.o" - d_target_objs="i386-d.o" - extra_options="${extra_options} fused-madd.opt" -- extra_objs="x86-tune-sched.o x86-tune-sched-bd.o x86-tune-sched-atom.o x86-tune-sched-core.o" -+ extra_objs="x86-tune-sched.o x86-tune-sched-bd.o x86-tune-sched-atom.o x86-tune-sched-core.o i386-options.o i386-builtins.o i386-expand.o i386-features.o" -+ target_gtfiles="\$(srcdir)/config/i386/i386-builtins.c \$(srcdir)/config/i386/i386-expand.c \$(srcdir)/config/i386/i386-options.c" - extra_headers="cpuid.h mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h - pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h - nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h -@@ -980,7 +982,7 @@ esac - case ${target} in - aarch64*-*-elf | aarch64*-*-fuchsia* | aarch64*-*-rtems*) - tm_file="${tm_file} dbxelf.h elfos.h newlib-stdint.h" -- tm_file="${tm_file} aarch64/aarch64-elf.h aarch64/aarch64-elf-raw.h" -+ tm_file="${tm_file} aarch64/aarch64-elf.h aarch64/aarch64-errata.h aarch64/aarch64-elf-raw.h" - tmake_file="${tmake_file} aarch64/t-aarch64" - case $target in - aarch64-*-elf*) -@@ -1017,13 +1019,19 @@ aarch64*-*-elf | aarch64*-*-fuchsia* | aarch64*-*-rtems*) - ;; - aarch64*-*-freebsd*) - tm_file="${tm_file} dbxelf.h elfos.h ${fbsd_tm_file}" -- tm_file="${tm_file} aarch64/aarch64-elf.h aarch64/aarch64-freebsd.h" -+ tm_file="${tm_file} aarch64/aarch64-elf.h aarch64/aarch64-errata.h aarch64/aarch64-freebsd.h" - tmake_file="${tmake_file} aarch64/t-aarch64 aarch64/t-aarch64-freebsd" - tm_defines="${tm_defines} TARGET_DEFAULT_ASYNC_UNWIND_TABLES=1" - ;; -+aarch64*-*-netbsd*) -+ tm_file="${tm_file} dbxelf.h elfos.h ${nbsd_tm_file}" -+ tm_file="${tm_file} aarch64/aarch64-elf.h aarch64/aarch64-errata.h aarch64/aarch64-netbsd.h" -+ tmake_file="${tmake_file} aarch64/t-aarch64 aarch64/t-aarch64-netbsd" -+ extra_options="${extra_options} netbsd.opt netbsd-elf.opt" -+ ;; - aarch64*-*-linux*) - tm_file="${tm_file} dbxelf.h elfos.h gnu-user.h linux.h glibc-stdint.h" -- tm_file="${tm_file} aarch64/aarch64-elf.h aarch64/aarch64-linux.h" -+ tm_file="${tm_file} aarch64/aarch64-elf.h aarch64/aarch64-errata.h aarch64/aarch64-linux.h" - tmake_file="${tmake_file} aarch64/t-aarch64 aarch64/t-aarch64-linux" - tm_defines="${tm_defines} TARGET_DEFAULT_ASYNC_UNWIND_TABLES=1" - case $target in -@@ -3847,32 +3855,40 @@ case "${target}" in - sed -e 's/,.*$//'` - fi - -+ # Use the pre-processor to strip flatten the options. -+ # This makes the format less rigid than if we use -+ # grep and sed directly here. -+ opt_macro="AARCH64_OPT_EXTENSION(A, B, C, D, E, F)=A, B, C, D, E, F" -+ options_parsed="`$ac_cv_prog_CPP -D"$opt_macro" -x c \ -+ ${srcdir}/config/aarch64/aarch64-option-extensions.def`" -+ -+ # Match one element inside AARCH64_OPT_EXTENSION, we -+ # consume anything that's not a ,. -+ elem="[ ]*\([^,]\+\)[ ]*" -+ -+ # Repeat the pattern for the number of entries in the -+ # AARCH64_OPT_EXTENSION, currently 6 times. -+ sed_patt="^$elem,$elem,$elem,$elem,$elem,$elem" -+ - while [ x"$ext_val" != x ] - do - ext_val=`echo $ext_val | sed -e 's/\+//'` - ext=`echo $ext_val | sed -e 's/\+.*//'` - base_ext=`echo $ext | sed -e 's/^no//'` -+ opt_line=`echo -e "$options_parsed" | \ -+ grep "^\"$base_ext\""` - - if [ x"$base_ext" = x ] \ -- || grep "^AARCH64_OPT_EXTENSION(\"$base_ext\"," \ -- ${srcdir}/config/aarch64/aarch64-option-extensions.def \ -- > /dev/null; then -- -- ext_canon=`grep "^AARCH64_OPT_EXTENSION(\"$base_ext\"," \ -- ${srcdir}/config/aarch64/aarch64-option-extensions.def | \ -- sed -e 's/^[^,]*,[ ]*//' | \ -- sed -e 's/,.*$//'` -- ext_on=`grep "^AARCH64_OPT_EXTENSION(\"$base_ext\"," \ -- ${srcdir}/config/aarch64/aarch64-option-extensions.def | \ -- sed -e 's/^[^,]*,[ ]*[^,]*,[ ]*//' | \ -- sed -e 's/,.*$//' | \ -- sed -e 's/).*$//'` -- ext_off=`grep "^AARCH64_OPT_EXTENSION(\"$base_ext\"," \ -- ${srcdir}/config/aarch64/aarch64-option-extensions.def | \ -- sed -e 's/^[^,]*,[ ]*[^,]*,[ ]*[^,]*,[ ]*//' | \ -- sed -e 's/,.*$//' | \ -- sed -e 's/).*$//'` -- -+ || [[ -n $opt_line ]]; then -+ -+ # These regexp extract the elements based on -+ # their group match index in the regexp. -+ ext_canon=`echo -e "$opt_line" | \ -+ sed -e "s/$sed_patt/\2/"` -+ ext_on=`echo -e "$opt_line" | \ -+ sed -e "s/$sed_patt/\3/"` -+ ext_off=`echo -e "$opt_line" | \ -+ sed -e "s/$sed_patt/\4/"` - - if [ $ext = $base_ext ]; then - # Adding extension -diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def -index d258bd492..e464d329c 100644 ---- a/gcc/config/aarch64/aarch64-arches.def -+++ b/gcc/config/aarch64/aarch64-arches.def -@@ -36,5 +36,6 @@ AARCH64_ARCH("armv8.2-a", generic, 8_2A, 8, AARCH64_FL_FOR_ARCH8_2) - AARCH64_ARCH("armv8.3-a", generic, 8_3A, 8, AARCH64_FL_FOR_ARCH8_3) - AARCH64_ARCH("armv8.4-a", generic, 8_4A, 8, AARCH64_FL_FOR_ARCH8_4) - AARCH64_ARCH("armv8.5-a", generic, 8_5A, 8, AARCH64_FL_FOR_ARCH8_5) -+AARCH64_ARCH("armv8.6-a", generic, 8_6A, 8, AARCH64_FL_FOR_ARCH8_6) - - #undef AARCH64_ARCH -diff --git a/gcc/config/aarch64/aarch64-bti-insert.c b/gcc/config/aarch64/aarch64-bti-insert.c -index e519a0f0a..db8ebb1ba 100644 ---- a/gcc/config/aarch64/aarch64-bti-insert.c -+++ b/gcc/config/aarch64/aarch64-bti-insert.c -@@ -106,7 +106,9 @@ aarch64_pac_insn_p (rtx x) - int unspec_val = XINT (sub, 1); - switch (unspec_val) - { -- case UNSPEC_PACISP: -+ case UNSPEC_PACIASP: -+ /* fall-through. */ -+ case UNSPEC_PACIBSP: - return true; - - default: -diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c -index d7b1b7bd6..c890fcc37 100644 ---- a/gcc/config/aarch64/aarch64-builtins.c -+++ b/gcc/config/aarch64/aarch64-builtins.c -@@ -68,6 +68,9 @@ - #define hi_UP E_HImode - #define hf_UP E_HFmode - #define qi_UP E_QImode -+#define bf_UP E_BFmode -+#define v4bf_UP E_V4BFmode -+#define v8bf_UP E_V8BFmode - #define UP(X) X##_UP - - #define SIMD_MAX_BUILTIN_ARGS 5 -@@ -107,6 +110,9 @@ enum aarch64_type_qualifiers - /* Lane indices selected in pairs. - must be in range, and flipped for - bigendian. */ - qualifier_lane_pair_index = 0x800, -+ /* Lane indices selected in quadtuplets. - must be in range, and flipped for -+ bigendian. */ -+ qualifier_lane_quadtup_index = 0x1000, - }; - - typedef struct -@@ -173,6 +179,10 @@ aarch64_types_ternopu_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] - = { qualifier_unsigned, qualifier_unsigned, - qualifier_unsigned, qualifier_immediate }; - #define TYPES_TERNOPUI (aarch64_types_ternopu_imm_qualifiers) -+static enum aarch64_type_qualifiers -+aarch64_types_ternop_ssus_qualifiers[SIMD_MAX_BUILTIN_ARGS] -+ = { qualifier_none, qualifier_none, qualifier_unsigned, qualifier_none }; -+#define TYPES_TERNOP_SSUS (aarch64_types_ternop_ssus_qualifiers) - - - static enum aarch64_type_qualifiers -@@ -191,6 +201,19 @@ aarch64_types_quadopu_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS] - qualifier_unsigned, qualifier_lane_index }; - #define TYPES_QUADOPU_LANE (aarch64_types_quadopu_lane_qualifiers) - -+static enum aarch64_type_qualifiers -+aarch64_types_quadopssus_lane_quadtup_qualifiers[SIMD_MAX_BUILTIN_ARGS] -+ = { qualifier_none, qualifier_none, qualifier_unsigned, -+ qualifier_none, qualifier_lane_quadtup_index }; -+#define TYPES_QUADOPSSUS_LANE_QUADTUP \ -+ (aarch64_types_quadopssus_lane_quadtup_qualifiers) -+static enum aarch64_type_qualifiers -+aarch64_types_quadopsssu_lane_quadtup_qualifiers[SIMD_MAX_BUILTIN_ARGS] -+ = { qualifier_none, qualifier_none, qualifier_none, -+ qualifier_unsigned, qualifier_lane_quadtup_index }; -+#define TYPES_QUADOPSSSU_LANE_QUADTUP \ -+ (aarch64_types_quadopsssu_lane_quadtup_qualifiers) -+ - static enum aarch64_type_qualifiers - aarch64_types_quadopu_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] - = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned, -@@ -347,6 +370,12 @@ aarch64_types_storestruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS] - #define VAR14(T, X, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M, N) \ - VAR13 (T, X, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M) \ - VAR1 (T, X, MAP, N) -+#define VAR15(T, X, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O) \ -+ VAR14 (T, X, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M, N) \ -+ VAR1 (T, X, MAP, O) -+#define VAR16(T, X, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \ -+ VAR15 (T, X, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O) \ -+ VAR1 (T, X, MAP, P) - - #include "aarch64-builtin-iterators.h" - -@@ -432,10 +461,22 @@ enum aarch64_builtins - /* ARMv8.3-A Pointer Authentication Builtins. */ - AARCH64_PAUTH_BUILTIN_AUTIA1716, - AARCH64_PAUTH_BUILTIN_PACIA1716, -+ AARCH64_PAUTH_BUILTIN_AUTIB1716, -+ AARCH64_PAUTH_BUILTIN_PACIB1716, - AARCH64_PAUTH_BUILTIN_XPACLRI, - /* Special cased Armv8.3-A Complex FMA by Lane quad Builtins. */ - AARCH64_SIMD_FCMLA_LANEQ_BUILTIN_BASE, - AARCH64_SIMD_FCMLA_LANEQ_BUILTINS -+ /* Builtin for Arm8.3-a Javascript conversion instruction. */ -+ AARCH64_JSCVT, -+ /* TME builtins. */ -+ AARCH64_TME_BUILTIN_TSTART, -+ AARCH64_TME_BUILTIN_TCOMMIT, -+ AARCH64_TME_BUILTIN_TTEST, -+ AARCH64_TME_BUILTIN_TCANCEL, -+ /* Armv8.5-a RNG instruction builtins. */ -+ AARCH64_BUILTIN_RNG_RNDR, -+ AARCH64_BUILTIN_RNG_RNDRRS, - AARCH64_BUILTIN_MAX - }; - -@@ -490,6 +531,7 @@ const char *aarch64_scalar_builtin_types[] = { - "__builtin_aarch64_simd_oi", - "__builtin_aarch64_simd_ci", - "__builtin_aarch64_simd_xi", -+ "__builtin_aarch64_simd_bf", - NULL - }; - -@@ -547,6 +589,21 @@ static tree aarch64_simd_intXI_type_node = NULL_TREE; - tree aarch64_fp16_type_node = NULL_TREE; - tree aarch64_fp16_ptr_type_node = NULL_TREE; - -+/* Back-end node type for brain float (bfloat) types. */ -+tree aarch64_bf16_type_node = NULL_TREE; -+tree aarch64_bf16_ptr_type_node = NULL_TREE; -+ -+/* Wrapper around add_builtin_function. NAME is the name of the built-in -+ function, TYPE is the function type, and CODE is the function subcode -+ (relative to AARCH64_BUILTIN_GENERAL). */ -+static tree -+aarch64_general_add_builtin (const char *name, tree type, unsigned int code) -+{ -+ code = (code << AARCH64_BUILTIN_SHIFT) | AARCH64_BUILTIN_GENERAL; -+ return add_builtin_function (name, type, code, BUILT_IN_MD, -+ NULL, NULL_TREE); -+} -+ - static const char * - aarch64_mangle_builtin_scalar_type (const_tree type) - { -@@ -585,7 +642,7 @@ aarch64_mangle_builtin_vector_type (const_tree type) - } - - const char * --aarch64_mangle_builtin_type (const_tree type) -+aarch64_general_mangle_builtin_type (const_tree type) - { - const char *mangle; - /* Walk through all the AArch64 builtins types tables to filter out the -@@ -627,6 +684,8 @@ aarch64_simd_builtin_std_type (machine_mode mode, - return float_type_node; - case E_DFmode: - return double_type_node; -+ case E_BFmode: -+ return aarch64_bf16_type_node; - default: - gcc_unreachable (); - } -@@ -718,6 +777,10 @@ aarch64_init_simd_builtin_types (void) - aarch64_simd_types[Float64x1_t].eltype = double_type_node; - aarch64_simd_types[Float64x2_t].eltype = double_type_node; - -+ /* Init Bfloat vector types with underlying __bf16 type. */ -+ aarch64_simd_types[Bfloat16x4_t].eltype = aarch64_bf16_type_node; -+ aarch64_simd_types[Bfloat16x8_t].eltype = aarch64_bf16_type_node; -+ - for (i = 0; i < nelts; i++) - { - tree eltype = aarch64_simd_types[i].eltype; -@@ -782,6 +845,8 @@ aarch64_init_simd_builtin_scalar_types (void) - "__builtin_aarch64_simd_poly128"); - (*lang_hooks.types.register_builtin_type) (intTI_type_node, - "__builtin_aarch64_simd_ti"); -+ (*lang_hooks.types.register_builtin_type) (aarch64_bf16_type_node, -+ "__builtin_aarch64_simd_bf"); - /* Unsigned integer types for various mode sizes. */ - (*lang_hooks.types.register_builtin_type) (unsigned_intQI_type_node, - "__builtin_aarch64_simd_uqi"); -@@ -816,8 +881,7 @@ aarch64_init_fcmla_laneq_builtins (void) - = aarch64_simd_builtin_std_type (SImode, qualifier_lane_pair_index); - tree ftype = build_function_type_list (argtype, argtype, argtype, - quadtype, lanetype, NULL_TREE); -- tree fndecl = add_builtin_function (d->name, ftype, d->fcode, -- BUILT_IN_MD, NULL, NULL_TREE); -+ tree fndecl = aarch64_general_add_builtin (d->name, ftype, d->fcode); - - aarch64_builtin_decls[d->fcode] = fndecl; - } -@@ -846,10 +910,10 @@ aarch64_init_simd_builtins (void) - size_type_node, - intSI_type_node, - NULL); -- aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_LANE_CHECK] = -- add_builtin_function ("__builtin_aarch64_im_lane_boundsi", lane_check_fpr, -- AARCH64_SIMD_BUILTIN_LANE_CHECK, BUILT_IN_MD, -- NULL, NULL_TREE); -+ aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_LANE_CHECK] -+ = aarch64_general_add_builtin ("__builtin_aarch64_im_lane_boundsi", -+ lane_check_fpr, -+ AARCH64_SIMD_BUILTIN_LANE_CHECK); - - for (i = 0; i < ARRAY_SIZE (aarch64_simd_builtin_data); i++, fcode++) - { -@@ -947,8 +1011,7 @@ aarch64_init_simd_builtins (void) - snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s", - d->name); - -- fndecl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, -- NULL, NULL_TREE); -+ fndecl = aarch64_general_add_builtin (namebuf, ftype, fcode); - aarch64_builtin_decls[fcode] = fndecl; - } - -@@ -968,8 +1031,7 @@ aarch64_init_crc32_builtins () - tree argtype = aarch64_simd_builtin_std_type (d->mode, - qualifier_unsigned); - tree ftype = build_function_type_list (usi_type, usi_type, argtype, NULL_TREE); -- tree fndecl = add_builtin_function (d->name, ftype, d->fcode, -- BUILT_IN_MD, NULL, NULL_TREE); -+ tree fndecl = aarch64_general_add_builtin (d->name, ftype, d->fcode); - - aarch64_builtin_decls[d->fcode] = fndecl; - } -@@ -1009,8 +1071,8 @@ aarch64_init_builtin_rsqrt (void) - for (; bdd < bdd_end; bdd++) - { - ftype = build_function_type_list (bdd->type_node, bdd->type_node, NULL_TREE); -- fndecl = add_builtin_function (bdd->builtin_name, -- ftype, bdd->function_code, BUILT_IN_MD, NULL, NULL_TREE); -+ fndecl = aarch64_general_add_builtin (bdd->builtin_name, -+ ftype, bdd->function_code); - aarch64_builtin_decls[bdd->function_code] = fndecl; - } - } -@@ -1030,6 +1092,19 @@ aarch64_init_fp16_types (void) - aarch64_fp16_ptr_type_node = build_pointer_type (aarch64_fp16_type_node); - } - -+/* Initialize the backend REAL_TYPE type supporting bfloat types. */ -+static void -+aarch64_init_bf16_types (void) -+{ -+ aarch64_bf16_type_node = make_node (REAL_TYPE); -+ TYPE_PRECISION (aarch64_bf16_type_node) = 16; -+ SET_TYPE_MODE (aarch64_bf16_type_node, BFmode); -+ layout_type (aarch64_bf16_type_node); -+ -+ lang_hooks.types.register_builtin_type (aarch64_bf16_type_node, "__bf16"); -+ aarch64_bf16_ptr_type_node = build_pointer_type (aarch64_bf16_type_node); -+} -+ - /* Pointer authentication builtins that will become NOP on legacy platform. - Currently, these builtins are for internal use only (libgcc EH unwinder). */ - -@@ -1044,21 +1119,77 @@ aarch64_init_pauth_hint_builtins (void) - = build_function_type_list (ptr_type_node, ptr_type_node, NULL_TREE); - - aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_AUTIA1716] -- = add_builtin_function ("__builtin_aarch64_autia1716", ftype_pointer_auth, -- AARCH64_PAUTH_BUILTIN_AUTIA1716, BUILT_IN_MD, NULL, -- NULL_TREE); -+ = aarch64_general_add_builtin ("__builtin_aarch64_autia1716", -+ ftype_pointer_auth, -+ AARCH64_PAUTH_BUILTIN_AUTIA1716); - aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_PACIA1716] -- = add_builtin_function ("__builtin_aarch64_pacia1716", ftype_pointer_auth, -- AARCH64_PAUTH_BUILTIN_PACIA1716, BUILT_IN_MD, NULL, -- NULL_TREE); -+ = aarch64_general_add_builtin ("__builtin_aarch64_pacia1716", -+ ftype_pointer_auth, -+ AARCH64_PAUTH_BUILTIN_PACIA1716); -+ aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_AUTIB1716] -+ = aarch64_general_add_builtin ("__builtin_aarch64_autib1716", -+ ftype_pointer_auth, -+ AARCH64_PAUTH_BUILTIN_AUTIB1716); -+ aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_PACIB1716] -+ = aarch64_general_add_builtin ("__builtin_aarch64_pacib1716", -+ ftype_pointer_auth, -+ AARCH64_PAUTH_BUILTIN_PACIB1716); - aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_XPACLRI] -- = add_builtin_function ("__builtin_aarch64_xpaclri", ftype_pointer_strip, -- AARCH64_PAUTH_BUILTIN_XPACLRI, BUILT_IN_MD, NULL, -- NULL_TREE); -+ = aarch64_general_add_builtin ("__builtin_aarch64_xpaclri", -+ ftype_pointer_strip, -+ AARCH64_PAUTH_BUILTIN_XPACLRI); -+} -+ -+/* Initialize the transactional memory extension (TME) builtins. */ -+static void -+aarch64_init_tme_builtins (void) -+{ -+ tree ftype_uint64_void -+ = build_function_type_list (uint64_type_node, NULL); -+ tree ftype_void_void -+ = build_function_type_list (void_type_node, NULL); -+ tree ftype_void_uint64 -+ = build_function_type_list (void_type_node, uint64_type_node, NULL); -+ -+ aarch64_builtin_decls[AARCH64_TME_BUILTIN_TSTART] -+ = aarch64_general_add_builtin ("__builtin_aarch64_tstart", -+ ftype_uint64_void, -+ AARCH64_TME_BUILTIN_TSTART); -+ aarch64_builtin_decls[AARCH64_TME_BUILTIN_TTEST] -+ = aarch64_general_add_builtin ("__builtin_aarch64_ttest", -+ ftype_uint64_void, -+ AARCH64_TME_BUILTIN_TTEST); -+ aarch64_builtin_decls[AARCH64_TME_BUILTIN_TCOMMIT] -+ = aarch64_general_add_builtin ("__builtin_aarch64_tcommit", -+ ftype_void_void, -+ AARCH64_TME_BUILTIN_TCOMMIT); -+ aarch64_builtin_decls[AARCH64_TME_BUILTIN_TCANCEL] -+ = aarch64_general_add_builtin ("__builtin_aarch64_tcancel", -+ ftype_void_uint64, -+ AARCH64_TME_BUILTIN_TCANCEL); -+} -+ -+/* Add builtins for Random Number instructions. */ -+ -+static void -+aarch64_init_rng_builtins (void) -+{ -+ tree unsigned_ptr_type = build_pointer_type (unsigned_intDI_type_node); -+ tree ftype -+ = build_function_type_list (integer_type_node, unsigned_ptr_type, NULL); -+ aarch64_builtin_decls[AARCH64_BUILTIN_RNG_RNDR] -+ = aarch64_general_add_builtin ("__builtin_aarch64_rndr", ftype, -+ AARCH64_BUILTIN_RNG_RNDR); -+ aarch64_builtin_decls[AARCH64_BUILTIN_RNG_RNDRRS] -+ = aarch64_general_add_builtin ("__builtin_aarch64_rndrrs", ftype, -+ AARCH64_BUILTIN_RNG_RNDRRS); - } - -+ -+/* Initialize all builtins in the AARCH64_BUILTIN_GENERAL group. */ -+ - void --aarch64_init_builtins (void) -+aarch64_general_init_builtins (void) - { - tree ftype_set_fpr - = build_function_type_list (void_type_node, unsigned_type_node, NULL); -@@ -1066,25 +1197,38 @@ aarch64_init_builtins (void) - = build_function_type_list (unsigned_type_node, NULL); - - aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR] -- = add_builtin_function ("__builtin_aarch64_get_fpcr", ftype_get_fpr, -- AARCH64_BUILTIN_GET_FPCR, BUILT_IN_MD, NULL, NULL_TREE); -+ = aarch64_general_add_builtin ("__builtin_aarch64_get_fpcr", -+ ftype_get_fpr, -+ AARCH64_BUILTIN_GET_FPCR); - aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR] -- = add_builtin_function ("__builtin_aarch64_set_fpcr", ftype_set_fpr, -- AARCH64_BUILTIN_SET_FPCR, BUILT_IN_MD, NULL, NULL_TREE); -+ = aarch64_general_add_builtin ("__builtin_aarch64_set_fpcr", -+ ftype_set_fpr, -+ AARCH64_BUILTIN_SET_FPCR); - aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR] -- = add_builtin_function ("__builtin_aarch64_get_fpsr", ftype_get_fpr, -- AARCH64_BUILTIN_GET_FPSR, BUILT_IN_MD, NULL, NULL_TREE); -+ = aarch64_general_add_builtin ("__builtin_aarch64_get_fpsr", -+ ftype_get_fpr, -+ AARCH64_BUILTIN_GET_FPSR); - aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR] -- = add_builtin_function ("__builtin_aarch64_set_fpsr", ftype_set_fpr, -- AARCH64_BUILTIN_SET_FPSR, BUILT_IN_MD, NULL, NULL_TREE); -+ = aarch64_general_add_builtin ("__builtin_aarch64_set_fpsr", -+ ftype_set_fpr, -+ AARCH64_BUILTIN_SET_FPSR); - - aarch64_init_fp16_types (); - -+ aarch64_init_bf16_types (); -+ - if (TARGET_SIMD) - aarch64_init_simd_builtins (); - - aarch64_init_crc32_builtins (); - aarch64_init_builtin_rsqrt (); -+ aarch64_init_rng_builtins (); -+ -+ tree ftype_jcvt -+ = build_function_type_list (intSI_type_node, double_type_node, NULL); -+ aarch64_builtin_decls[AARCH64_JSCVT] -+ = aarch64_general_add_builtin ("__builtin_aarch64_jcvtzs", ftype_jcvt, -+ AARCH64_JSCVT); - - /* Initialize pointer authentication builtins which are backed by instructions - in NOP encoding space. -@@ -1094,10 +1238,14 @@ aarch64_init_builtins (void) - register them. */ - if (!TARGET_ILP32) - aarch64_init_pauth_hint_builtins (); -+ -+ if (TARGET_TME) -+ aarch64_init_tme_builtins (); - } - -+/* Implement TARGET_BUILTIN_DECL for the AARCH64_BUILTIN_GENERAL group. */ - tree --aarch64_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED) -+aarch64_general_builtin_decl (unsigned code, bool) - { - if (code >= AARCH64_BUILTIN_MAX) - return error_mark_node; -@@ -1112,6 +1260,7 @@ typedef enum - SIMD_ARG_LANE_INDEX, - SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX, - SIMD_ARG_LANE_PAIR_INDEX, -+ SIMD_ARG_LANE_QUADTUP_INDEX, - SIMD_ARG_STOP - } builtin_simd_arg; - -@@ -1201,9 +1350,25 @@ aarch64_simd_expand_args (rtx target, int icode, int have_retval, - op[opc] = gen_int_mode (ENDIAN_LANE_N (nunits / 2, lane), - SImode); - } -- /* Fall through - if the lane index isn't a constant then -- the next case will error. */ -- /* FALLTHRU */ -+ /* If the lane index isn't a constant then error out. */ -+ goto constant_arg; -+ case SIMD_ARG_LANE_QUADTUP_INDEX: -+ /* Must be a previous operand into which this is an index and -+ index is restricted to nunits / 4. */ -+ gcc_assert (opc > 0); -+ if (CONST_INT_P (op[opc])) -+ { -+ machine_mode vmode = insn_data[icode].operand[opc - 1].mode; -+ unsigned int nunits -+ = GET_MODE_NUNITS (vmode).to_constant (); -+ aarch64_simd_lane_bounds (op[opc], 0, nunits / 4, exp); -+ /* Keep to GCC-vector-extension lane indices in the RTL. */ -+ int lane = INTVAL (op[opc]); -+ op[opc] = gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane), -+ SImode); -+ } -+ /* If the lane index isn't a constant then error out. */ -+ goto constant_arg; - case SIMD_ARG_CONSTANT: - constant_arg: - if (!(*insn_data[icode].operand[opc].predicate) -@@ -1316,6 +1481,8 @@ aarch64_simd_expand_builtin (int fcode, tree exp, rtx target) - args[k] = SIMD_ARG_LANE_INDEX; - else if (d->qualifiers[qualifiers_k] & qualifier_lane_pair_index) - args[k] = SIMD_ARG_LANE_PAIR_INDEX; -+ else if (d->qualifiers[qualifiers_k] & qualifier_lane_quadtup_index) -+ args[k] = SIMD_ARG_LANE_QUADTUP_INDEX; - else if (d->qualifiers[qualifiers_k] & qualifier_struct_load_store_lane_index) - args[k] = SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX; - else if (d->qualifiers[qualifiers_k] & qualifier_immediate) -@@ -1497,17 +1664,90 @@ aarch64_expand_fcmla_builtin (tree exp, rtx target, int fcode) - return target; - } - --/* Expand an expression EXP that calls a built-in function, -- with result going to TARGET if that's convenient. */ -+/* Function to expand an expression EXP which calls one of the Transactional -+ Memory Extension (TME) builtins FCODE with the result going to TARGET. */ -+static rtx -+aarch64_expand_builtin_tme (int fcode, tree exp, rtx target) -+{ -+ switch (fcode) -+ { -+ case AARCH64_TME_BUILTIN_TSTART: -+ target = gen_reg_rtx (DImode); -+ emit_insn (GEN_FCN (CODE_FOR_tstart) (target)); -+ break; -+ -+ case AARCH64_TME_BUILTIN_TTEST: -+ target = gen_reg_rtx (DImode); -+ emit_insn (GEN_FCN (CODE_FOR_ttest) (target)); -+ break; -+ -+ case AARCH64_TME_BUILTIN_TCOMMIT: -+ emit_insn (GEN_FCN (CODE_FOR_tcommit) ()); -+ break; -+ -+ case AARCH64_TME_BUILTIN_TCANCEL: -+ { -+ tree arg0 = CALL_EXPR_ARG (exp, 0); -+ rtx op0 = expand_normal (arg0); -+ if (CONST_INT_P (op0) && UINTVAL (op0) <= 65536) -+ emit_insn (GEN_FCN (CODE_FOR_tcancel) (op0)); -+ else -+ { -+ error ("%Kargument must be a 16-bit constant immediate", exp); -+ return const0_rtx; -+ } -+ } -+ break; -+ -+ default : -+ gcc_unreachable (); -+ } -+ return target; -+} -+ -+/* Expand a random number builtin EXP with code FCODE, putting the result -+ int TARGET. If IGNORE is true the return value is ignored. */ -+ - rtx --aarch64_expand_builtin (tree exp, -- rtx target, -- rtx subtarget ATTRIBUTE_UNUSED, -- machine_mode mode ATTRIBUTE_UNUSED, -- int ignore ATTRIBUTE_UNUSED) -+aarch64_expand_rng_builtin (tree exp, rtx target, int fcode, int ignore) -+{ -+ rtx pat; -+ enum insn_code icode; -+ if (fcode == AARCH64_BUILTIN_RNG_RNDR) -+ icode = CODE_FOR_aarch64_rndr; -+ else if (fcode == AARCH64_BUILTIN_RNG_RNDRRS) -+ icode = CODE_FOR_aarch64_rndrrs; -+ else -+ gcc_unreachable (); -+ -+ rtx rand = gen_reg_rtx (DImode); -+ pat = GEN_FCN (icode) (rand); -+ if (!pat) -+ return NULL_RTX; -+ -+ tree arg0 = CALL_EXPR_ARG (exp, 0); -+ rtx res_addr = expand_normal (arg0); -+ res_addr = convert_memory_address (Pmode, res_addr); -+ rtx res_mem = gen_rtx_MEM (DImode, res_addr); -+ emit_insn (pat); -+ emit_move_insn (res_mem, rand); -+ /* If the status result is unused don't generate the CSET code. */ -+ if (ignore) -+ return target; -+ -+ rtx cc_reg = gen_rtx_REG (CC_Zmode, CC_REGNUM); -+ rtx cmp_rtx = gen_rtx_fmt_ee (NE, SImode, cc_reg, const0_rtx); -+ emit_insn (gen_aarch64_cstoresi (target, cmp_rtx, cc_reg)); -+ return target; -+} -+ -+/* Expand an expression EXP that calls built-in function FCODE, -+ with result going to TARGET if that's convenient. IGNORE is true -+ if the result of the builtin is ignored. */ -+rtx -+aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target, -+ int ignore) - { -- tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); -- int fcode = DECL_FUNCTION_CODE (fndecl); - int icode; - rtx pat, op0; - tree arg0; -@@ -1540,6 +1780,8 @@ aarch64_expand_builtin (tree exp, - - case AARCH64_PAUTH_BUILTIN_AUTIA1716: - case AARCH64_PAUTH_BUILTIN_PACIA1716: -+ case AARCH64_PAUTH_BUILTIN_AUTIB1716: -+ case AARCH64_PAUTH_BUILTIN_PACIB1716: - case AARCH64_PAUTH_BUILTIN_XPACLRI: - arg0 = CALL_EXPR_ARG (exp, 0); - op0 = force_reg (Pmode, expand_normal (arg0)); -@@ -1563,8 +1805,24 @@ aarch64_expand_builtin (tree exp, - { - tree arg1 = CALL_EXPR_ARG (exp, 1); - rtx op1 = force_reg (Pmode, expand_normal (arg1)); -- icode = (fcode == AARCH64_PAUTH_BUILTIN_PACIA1716 -- ? CODE_FOR_paci1716 : CODE_FOR_auti1716); -+ switch (fcode) -+ { -+ case AARCH64_PAUTH_BUILTIN_AUTIA1716: -+ icode = CODE_FOR_autia1716; -+ break; -+ case AARCH64_PAUTH_BUILTIN_AUTIB1716: -+ icode = CODE_FOR_autib1716; -+ break; -+ case AARCH64_PAUTH_BUILTIN_PACIA1716: -+ icode = CODE_FOR_pacia1716; -+ break; -+ case AARCH64_PAUTH_BUILTIN_PACIB1716: -+ icode = CODE_FOR_pacib1716; -+ break; -+ default: -+ icode = 0; -+ gcc_unreachable (); -+ } - - rtx x16_reg = gen_rtx_REG (Pmode, R16_REGNUM); - rtx x17_reg = gen_rtx_REG (Pmode, R17_REGNUM); -@@ -1576,6 +1834,16 @@ aarch64_expand_builtin (tree exp, - - return target; - -+ case AARCH64_JSCVT: -+ arg0 = CALL_EXPR_ARG (exp, 0); -+ op0 = force_reg (DFmode, expand_normal (arg0)); -+ if (!target) -+ target = gen_reg_rtx (SImode); -+ else -+ target = force_reg (SImode, target); -+ emit_insn (GEN_FCN (CODE_FOR_aarch64_fjcvtzs) (target, op0)); -+ return target; -+ - case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V2SF: - case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V2SF: - case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ180_V2SF: -@@ -1585,6 +1853,9 @@ aarch64_expand_builtin (tree exp, - case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ180_V4HF: - case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ270_V4HF: - return aarch64_expand_fcmla_builtin (exp, target, fcode); -+ case AARCH64_BUILTIN_RNG_RNDR: -+ case AARCH64_BUILTIN_RNG_RNDRRS: -+ return aarch64_expand_rng_builtin (exp, target, fcode, ignore); - } - - if (fcode >= AARCH64_SIMD_BUILTIN_BASE && fcode <= AARCH64_SIMD_BUILTIN_MAX) -@@ -1599,6 +1870,12 @@ aarch64_expand_builtin (tree exp, - || fcode == AARCH64_BUILTIN_RSQRT_V4SF) - return aarch64_expand_builtin_rsqrt (fcode, exp, target); - -+ if (fcode == AARCH64_TME_BUILTIN_TSTART -+ || fcode == AARCH64_TME_BUILTIN_TCOMMIT -+ || fcode == AARCH64_TME_BUILTIN_TTEST -+ || fcode == AARCH64_TME_BUILTIN_TCANCEL) -+ return aarch64_expand_builtin_tme (fcode, exp, target); -+ - gcc_unreachable (); - } - -@@ -1750,7 +2027,7 @@ aarch64_builtin_vectorized_function (unsigned int fn, tree type_out, - /* Return builtin for reciprocal square root. */ - - tree --aarch64_builtin_rsqrt (unsigned int fn) -+aarch64_general_builtin_rsqrt (unsigned int fn) - { - if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv2df) - return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V2DF]; -@@ -1765,13 +2042,14 @@ aarch64_builtin_rsqrt (unsigned int fn) - #define VAR1(T, N, MAP, A) \ - case AARCH64_SIMD_BUILTIN_##T##_##N##A: - -+/* Try to fold a call to the built-in function with subcode FCODE. The -+ function is passed the N_ARGS arguments in ARGS and it returns a value -+ of type TYPE. Return the new expression on success and NULL_TREE on -+ failure. */ - tree --aarch64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *args, -- bool ignore ATTRIBUTE_UNUSED) -+aarch64_general_fold_builtin (unsigned int fcode, tree type, -+ unsigned int n_args ATTRIBUTE_UNUSED, tree *args) - { -- int fcode = DECL_FUNCTION_CODE (fndecl); -- tree type = TREE_TYPE (TREE_TYPE (fndecl)); -- - switch (fcode) - { - BUILTIN_VDQF (UNOP, abs, 2) -@@ -1787,109 +2065,90 @@ aarch64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *args, - return NULL_TREE; - } - --bool --aarch64_gimple_fold_builtin (gimple_stmt_iterator *gsi) -+/* Try to fold STMT, given that it's a call to the built-in function with -+ subcode FCODE. Return the new statement on success and null on -+ failure. */ -+gimple * -+aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt) - { -- bool changed = false; -- gimple *stmt = gsi_stmt (*gsi); -- tree call = gimple_call_fn (stmt); -- tree fndecl; - gimple *new_stmt = NULL; -- -- if (call) -+ unsigned nargs = gimple_call_num_args (stmt); -+ tree *args = (nargs > 0 -+ ? gimple_call_arg_ptr (stmt, 0) -+ : &error_mark_node); -+ -+ /* We use gimple's IFN_REDUC_(PLUS|MIN|MAX)s for float, signed int -+ and unsigned int; it will distinguish according to the types of -+ the arguments to the __builtin. */ -+ switch (fcode) - { -- fndecl = gimple_call_fndecl (stmt); -- if (fndecl) -+ BUILTIN_VALL (UNOP, reduc_plus_scal_, 10) -+ new_stmt = gimple_build_call_internal (IFN_REDUC_PLUS, -+ 1, args[0]); -+ gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt)); -+ break; -+ BUILTIN_VDQIF (UNOP, reduc_smax_scal_, 10) -+ BUILTIN_VDQ_BHSI (UNOPU, reduc_umax_scal_, 10) -+ new_stmt = gimple_build_call_internal (IFN_REDUC_MAX, -+ 1, args[0]); -+ gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt)); -+ break; -+ BUILTIN_VDQIF (UNOP, reduc_smin_scal_, 10) -+ BUILTIN_VDQ_BHSI (UNOPU, reduc_umin_scal_, 10) -+ new_stmt = gimple_build_call_internal (IFN_REDUC_MIN, -+ 1, args[0]); -+ gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt)); -+ break; -+ BUILTIN_GPF (BINOP, fmulx, 0) - { -- int fcode = DECL_FUNCTION_CODE (fndecl); -- unsigned nargs = gimple_call_num_args (stmt); -- tree *args = (nargs > 0 -- ? gimple_call_arg_ptr (stmt, 0) -- : &error_mark_node); -- -- /* We use gimple's IFN_REDUC_(PLUS|MIN|MAX)s for float, signed int -- and unsigned int; it will distinguish according to the types of -- the arguments to the __builtin. */ -- switch (fcode) -+ gcc_assert (nargs == 2); -+ bool a0_cst_p = TREE_CODE (args[0]) == REAL_CST; -+ bool a1_cst_p = TREE_CODE (args[1]) == REAL_CST; -+ if (a0_cst_p || a1_cst_p) - { -- BUILTIN_VALL (UNOP, reduc_plus_scal_, 10) -- new_stmt = gimple_build_call_internal (IFN_REDUC_PLUS, -- 1, args[0]); -- gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt)); -- break; -- BUILTIN_VDQIF (UNOP, reduc_smax_scal_, 10) -- BUILTIN_VDQ_BHSI (UNOPU, reduc_umax_scal_, 10) -- new_stmt = gimple_build_call_internal (IFN_REDUC_MAX, -- 1, args[0]); -- gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt)); -- break; -- BUILTIN_VDQIF (UNOP, reduc_smin_scal_, 10) -- BUILTIN_VDQ_BHSI (UNOPU, reduc_umin_scal_, 10) -- new_stmt = gimple_build_call_internal (IFN_REDUC_MIN, -- 1, args[0]); -- gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt)); -- break; -- BUILTIN_GPF (BINOP, fmulx, 0) -+ if (a0_cst_p && a1_cst_p) - { -- gcc_assert (nargs == 2); -- bool a0_cst_p = TREE_CODE (args[0]) == REAL_CST; -- bool a1_cst_p = TREE_CODE (args[1]) == REAL_CST; -- if (a0_cst_p || a1_cst_p) -+ tree t0 = TREE_TYPE (args[0]); -+ real_value a0 = (TREE_REAL_CST (args[0])); -+ real_value a1 = (TREE_REAL_CST (args[1])); -+ if (real_equal (&a1, &dconst0)) -+ std::swap (a0, a1); -+ /* According to real_equal (), +0 equals -0. */ -+ if (real_equal (&a0, &dconst0) && real_isinf (&a1)) - { -- if (a0_cst_p && a1_cst_p) -- { -- tree t0 = TREE_TYPE (args[0]); -- real_value a0 = (TREE_REAL_CST (args[0])); -- real_value a1 = (TREE_REAL_CST (args[1])); -- if (real_equal (&a1, &dconst0)) -- std::swap (a0, a1); -- /* According to real_equal (), +0 equals -0. */ -- if (real_equal (&a0, &dconst0) && real_isinf (&a1)) -- { -- real_value res = dconst2; -- res.sign = a0.sign ^ a1.sign; -- new_stmt = -- gimple_build_assign (gimple_call_lhs (stmt), -- REAL_CST, -- build_real (t0, res)); -- } -- else -- new_stmt = -- gimple_build_assign (gimple_call_lhs (stmt), -- MULT_EXPR, -- args[0], args[1]); -- } -- else /* a0_cst_p ^ a1_cst_p. */ -- { -- real_value const_part = a0_cst_p -- ? TREE_REAL_CST (args[0]) : TREE_REAL_CST (args[1]); -- if (!real_equal (&const_part, &dconst0) -- && !real_isinf (&const_part)) -- new_stmt = -- gimple_build_assign (gimple_call_lhs (stmt), -- MULT_EXPR, args[0], args[1]); -- } -+ real_value res = dconst2; -+ res.sign = a0.sign ^ a1.sign; -+ new_stmt = gimple_build_assign (gimple_call_lhs (stmt), -+ REAL_CST, -+ build_real (t0, res)); - } -- if (new_stmt) -- { -- gimple_set_vuse (new_stmt, gimple_vuse (stmt)); -- gimple_set_vdef (new_stmt, gimple_vdef (stmt)); -- } -- break; -+ else -+ new_stmt = gimple_build_assign (gimple_call_lhs (stmt), -+ MULT_EXPR, -+ args[0], args[1]); - } -- default: -- break; -+ else /* a0_cst_p ^ a1_cst_p. */ -+ { -+ real_value const_part = a0_cst_p -+ ? TREE_REAL_CST (args[0]) : TREE_REAL_CST (args[1]); -+ if (!real_equal (&const_part, &dconst0) -+ && !real_isinf (&const_part)) -+ new_stmt = gimple_build_assign (gimple_call_lhs (stmt), -+ MULT_EXPR, args[0], -+ args[1]); -+ } -+ } -+ if (new_stmt) -+ { -+ gimple_set_vuse (new_stmt, gimple_vuse (stmt)); -+ gimple_set_vdef (new_stmt, gimple_vdef (stmt)); - } -+ break; - } -+ default: -+ break; - } -- -- if (new_stmt) -- { -- gsi_replace (gsi, new_stmt, true); -- changed = true; -- } -- -- return changed; -+ return new_stmt; - } - - void -diff --git a/gcc/config/aarch64/aarch64-c.c b/gcc/config/aarch64/aarch64-c.c -index 6d5acb02f..da78f6fe3 100644 ---- a/gcc/config/aarch64/aarch64-c.c -+++ b/gcc/config/aarch64/aarch64-c.c -@@ -110,6 +110,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) - aarch64_def_or_undef (TARGET_CRC32, "__ARM_FEATURE_CRC32", pfile); - aarch64_def_or_undef (TARGET_DOTPROD, "__ARM_FEATURE_DOTPROD", pfile); - aarch64_def_or_undef (TARGET_COMPLEX, "__ARM_FEATURE_COMPLEX", pfile); -+ aarch64_def_or_undef (TARGET_JSCVT, "__ARM_FEATURE_JCVT", pfile); - - cpp_undef (pfile, "__AARCH64_CMODEL_TINY__"); - cpp_undef (pfile, "__AARCH64_CMODEL_SMALL__"); -@@ -146,6 +147,13 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) - bits = 0; - builtin_define_with_int_value ("__ARM_FEATURE_SVE_BITS", bits); - } -+ aarch64_def_or_undef (TARGET_SVE_I8MM, -+ "__ARM_FEATURE_SVE_MATMUL_INT8", pfile); -+ aarch64_def_or_undef (TARGET_SVE_F32MM, -+ "__ARM_FEATURE_SVE_MATMUL_FP32", pfile); -+ aarch64_def_or_undef (TARGET_SVE_F64MM, -+ "__ARM_FEATURE_SVE_MATMUL_FP64", pfile); -+ aarch64_def_or_undef (TARGET_SVE2, "__ARM_FEATURE_SVE2", pfile); - - aarch64_def_or_undef (TARGET_LSE, "__ARM_FEATURE_ATOMICS", pfile); - aarch64_def_or_undef (TARGET_AES, "__ARM_FEATURE_AES", pfile); -@@ -156,6 +164,16 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) - aarch64_def_or_undef (TARGET_SM4, "__ARM_FEATURE_SM4", pfile); - aarch64_def_or_undef (TARGET_F16FML, "__ARM_FEATURE_FP16_FML", pfile); - -+ aarch64_def_or_undef (TARGET_FRINT, "__ARM_FEATURE_FRINT", pfile); -+ aarch64_def_or_undef (TARGET_TME, "__ARM_FEATURE_TME", pfile); -+ aarch64_def_or_undef (TARGET_RNG, "__ARM_FEATURE_RNG", pfile); -+ -+ aarch64_def_or_undef (TARGET_I8MM, "__ARM_FEATURE_MATMUL_INT8", pfile); -+ aarch64_def_or_undef (TARGET_BF16_SIMD, -+ "__ARM_FEATURE_BF16_VECTOR_ARITHMETIC", pfile); -+ aarch64_def_or_undef (TARGET_BF16_FP, -+ "__ARM_FEATURE_BF16_SCALAR_ARITHMETIC", pfile); -+ - /* Not for ACLE, but required to keep "float.h" correct if we switch - target between implementations that do or do not support ARMv8.2-A - 16-bit floating-point extensions. */ -@@ -237,6 +255,73 @@ aarch64_pragma_target_parse (tree args, tree pop_target) - return true; - } - -+/* Implement "#pragma GCC aarch64". */ -+static void -+aarch64_pragma_aarch64 (cpp_reader *) -+{ -+ tree x; -+ if (pragma_lex (&x) != CPP_STRING) -+ { -+ error ("%<#pragma GCC aarch64%> requires a string parameter"); -+ return; -+ } -+ -+ const char *name = TREE_STRING_POINTER (x); -+ if (strcmp (name, "arm_sve.h") == 0) -+ aarch64_sve::handle_arm_sve_h (); -+ else -+ error ("unknown %<#pragma GCC aarch64%> option %qs", name); -+} -+ -+/* Implement TARGET_RESOLVE_OVERLOADED_BUILTIN. */ -+static tree -+aarch64_resolve_overloaded_builtin (unsigned int uncast_location, -+ tree fndecl, void *uncast_arglist) -+{ -+ vec empty = {}; -+ location_t location = (location_t) uncast_location; -+ vec *arglist = (uncast_arglist -+ ? (vec *) uncast_arglist -+ : &empty); -+ unsigned int code = DECL_MD_FUNCTION_CODE (fndecl); -+ unsigned int subcode = code >> AARCH64_BUILTIN_SHIFT; -+ tree new_fndecl; -+ switch (code & AARCH64_BUILTIN_CLASS) -+ { -+ case AARCH64_BUILTIN_GENERAL: -+ return NULL_TREE; -+ -+ case AARCH64_BUILTIN_SVE: -+ new_fndecl = aarch64_sve::resolve_overloaded_builtin (location, subcode, -+ arglist); -+ break; -+ } -+ if (new_fndecl == NULL_TREE || new_fndecl == error_mark_node) -+ return new_fndecl; -+ return build_function_call_vec (location, vNULL, new_fndecl, arglist, -+ NULL, fndecl); -+} -+ -+/* Implement TARGET_CHECK_BUILTIN_CALL. */ -+static bool -+aarch64_check_builtin_call (location_t loc, vec arg_loc, -+ tree fndecl, tree orig_fndecl, -+ unsigned int nargs, tree *args) -+{ -+ unsigned int code = DECL_MD_FUNCTION_CODE (fndecl); -+ unsigned int subcode = code >> AARCH64_BUILTIN_SHIFT; -+ switch (code & AARCH64_BUILTIN_CLASS) -+ { -+ case AARCH64_BUILTIN_GENERAL: -+ return true; -+ -+ case AARCH64_BUILTIN_SVE: -+ return aarch64_sve::check_builtin_call (loc, arg_loc, subcode, -+ orig_fndecl, nargs, args); -+ } -+ gcc_unreachable (); -+} -+ - /* Implement REGISTER_TARGET_PRAGMAS. */ - - void -@@ -244,4 +329,9 @@ aarch64_register_pragmas (void) - { - /* Update pragma hook to allow parsing #pragma GCC target. */ - targetm.target_option.pragma_parse = aarch64_pragma_target_parse; -+ -+ targetm.resolve_overloaded_builtin = aarch64_resolve_overloaded_builtin; -+ targetm.check_builtin_call = aarch64_check_builtin_call; -+ -+ c_register_pragma ("GCC", "aarch64", aarch64_pragma_aarch64); - } -diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def -index 82d91d625..053c6390e 100644 ---- a/gcc/config/aarch64/aarch64-cores.def -+++ b/gcc/config/aarch64/aarch64-cores.def -@@ -46,6 +46,7 @@ - /* ARMv8-A Architecture Processors. */ - - /* ARM ('A') cores. */ -+AARCH64_CORE("cortex-a34", cortexa34, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa35, 0x41, 0xd02, -1) - AARCH64_CORE("cortex-a35", cortexa35, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1) - AARCH64_CORE("cortex-a53", cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03, -1) - AARCH64_CORE("cortex-a57", cortexa57, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07, -1) -@@ -99,7 +100,11 @@ AARCH64_CORE("thunderx2t99", thunderx2t99, thunderx2t99, 8_1A, AARCH64_FL_FOR - /* ARM ('A') cores. */ - AARCH64_CORE("cortex-a55", cortexa55, cortexa53, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa53, 0x41, 0xd05, -1) - AARCH64_CORE("cortex-a75", cortexa75, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, 0xd0a, -1) --AARCH64_CORE("cortex-a76", cortexa76, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa72, 0x41, 0xd0b, -1) -+AARCH64_CORE("cortex-a76", cortexa76, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, 0xd0b, -1) -+AARCH64_CORE("cortex-a76ae", cortexa76ae, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa72, 0x41, 0xd0e, -1) -+AARCH64_CORE("cortex-a77", cortexa77, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa72, 0x41, 0xd0d, -1) -+AARCH64_CORE("cortex-a65", cortexa65, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd06, -1) -+AARCH64_CORE("cortex-a65ae", cortexa65ae, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd43, -1) - AARCH64_CORE("ares", ares, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1) - AARCH64_CORE("neoverse-n1", neoversen1, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1) - AARCH64_CORE("neoverse-e1", neoversee1, cortexa53, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa53, 0x41, 0xd4a, -1) -diff --git a/gcc/config/aarch64/aarch64-elf-raw.h b/gcc/config/aarch64/aarch64-elf-raw.h -index bbebd0ef0..8fe7b3783 100644 ---- a/gcc/config/aarch64/aarch64-elf-raw.h -+++ b/gcc/config/aarch64/aarch64-elf-raw.h -@@ -27,22 +27,6 @@ - " crtend%O%s crtn%O%s " \ - "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}" - --#if TARGET_FIX_ERR_A53_835769_DEFAULT --#define CA53_ERR_835769_SPEC \ -- " %{!mno-fix-cortex-a53-835769:--fix-cortex-a53-835769}" --#else --#define CA53_ERR_835769_SPEC \ -- " %{mfix-cortex-a53-835769:--fix-cortex-a53-835769}" --#endif -- --#if TARGET_FIX_ERR_A53_843419_DEFAULT --#define CA53_ERR_843419_SPEC \ -- " %{!mno-fix-cortex-a53-843419:--fix-cortex-a53-843419}" --#else --#define CA53_ERR_843419_SPEC \ -- " %{mfix-cortex-a53-843419:--fix-cortex-a53-843419}" --#endif -- - #ifndef LINK_SPEC - #define LINK_SPEC "%{h*} \ - %{static:-Bstatic} \ -@@ -51,8 +35,7 @@ - %{!static:%{rdynamic:-export-dynamic}} \ - %{mbig-endian:-EB} %{mlittle-endian:-EL} -X \ - -maarch64elf%{mabi=ilp32*:32}%{mbig-endian:b}" \ -- CA53_ERR_835769_SPEC \ -- CA53_ERR_843419_SPEC -+ AARCH64_ERRATA_LINK_SPEC - #endif - - #endif /* GCC_AARCH64_ELF_RAW_H */ -diff --git a/gcc/config/aarch64/aarch64-errata.h b/gcc/config/aarch64/aarch64-errata.h -new file mode 100644 -index 000000000..8f062536e ---- /dev/null -+++ b/gcc/config/aarch64/aarch64-errata.h -@@ -0,0 +1,44 @@ -+/* Machine description for AArch64 architecture. -+ Copyright (C) 2009-2019 Free Software Foundation, Inc. -+ Contributed by ARM Ltd. -+ -+ This file is part of GCC. -+ -+ GCC is free software; you can redistribute it and/or modify it -+ under the terms of the GNU General Public License as published by -+ the Free Software Foundation; either version 3, or (at your option) -+ any later version. -+ -+ GCC is distributed in the hope that it will be useful, but -+ WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ General Public License for more details. -+ -+ You should have received a copy of the GNU General Public License -+ along with GCC; see the file COPYING3. If not see -+ . */ -+ -+#ifndef GCC_AARCH64_ERRATA_H -+#define GCC_AARCH64_ERRATA_H -+ -+#if TARGET_FIX_ERR_A53_835769_DEFAULT -+#define CA53_ERR_835769_SPEC \ -+ " %{!mno-fix-cortex-a53-835769:--fix-cortex-a53-835769}" -+#else -+#define CA53_ERR_835769_SPEC \ -+ " %{mfix-cortex-a53-835769:--fix-cortex-a53-835769}" -+#endif -+ -+#if TARGET_FIX_ERR_A53_843419_DEFAULT -+#define CA53_ERR_843419_SPEC \ -+ " %{!mno-fix-cortex-a53-843419:--fix-cortex-a53-843419}" -+#else -+#define CA53_ERR_843419_SPEC \ -+ " %{mfix-cortex-a53-843419:--fix-cortex-a53-843419}" -+#endif -+ -+#define AARCH64_ERRATA_LINK_SPEC \ -+ CA53_ERR_835769_SPEC \ -+ CA53_ERR_843419_SPEC -+ -+#endif /* GCC_AARCH64_ERRATA_H */ -diff --git a/gcc/config/aarch64/aarch64-freebsd.h b/gcc/config/aarch64/aarch64-freebsd.h -index 899e6f95e..7a3e89b1b 100644 ---- a/gcc/config/aarch64/aarch64-freebsd.h -+++ b/gcc/config/aarch64/aarch64-freebsd.h -@@ -46,26 +46,8 @@ - -X" SUBTARGET_EXTRA_LINK_SPEC " \ - %{mbig-endian:-EB} %{mlittle-endian:-EL}" - --#if TARGET_FIX_ERR_A53_835769_DEFAULT --#define CA53_ERR_835769_SPEC \ -- " %{!mno-fix-cortex-a53-835769:--fix-cortex-a53-835769}" --#else --#define CA53_ERR_835769_SPEC \ -- " %{mfix-cortex-a53-835769:--fix-cortex-a53-835769}" --#endif -- --#ifdef TARGET_FIX_ERR_A53_843419_DEFAULT --#define CA53_ERR_843419_SPEC \ -- " %{!mno-fix-cortex-a53-843419:--fix-cortex-a53-843419}" --#else --#define CA53_ERR_843419_SPEC \ -- " %{mfix-cortex-a53-843419:--fix-cortex-a53-843419}" --#endif -- - #undef LINK_SPEC --#define LINK_SPEC FBSD_TARGET_LINK_SPEC \ -- CA53_ERR_835769_SPEC \ -- CA53_ERR_843419_SPEC -+#define LINK_SPEC FBSD_TARGET_LINK_SPEC AARCH64_ERRATA_LINK_SPEC - - #define GNU_USER_TARGET_MATHFILE_SPEC \ - "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}" -diff --git a/gcc/config/aarch64/aarch64-linux.h b/gcc/config/aarch64/aarch64-linux.h -index 5e8b34ded..6ff2163b6 100644 ---- a/gcc/config/aarch64/aarch64-linux.h -+++ b/gcc/config/aarch64/aarch64-linux.h -@@ -46,25 +46,8 @@ - %{mbig-endian:-EB} %{mlittle-endian:-EL} \ - -maarch64linux%{mabi=ilp32:32}%{mbig-endian:b}" - --#if TARGET_FIX_ERR_A53_835769_DEFAULT --#define CA53_ERR_835769_SPEC \ -- " %{!mno-fix-cortex-a53-835769:--fix-cortex-a53-835769}" --#else --#define CA53_ERR_835769_SPEC \ -- " %{mfix-cortex-a53-835769:--fix-cortex-a53-835769}" --#endif -- --#if TARGET_FIX_ERR_A53_843419_DEFAULT --#define CA53_ERR_843419_SPEC \ -- " %{!mno-fix-cortex-a53-843419:--fix-cortex-a53-843419}" --#else --#define CA53_ERR_843419_SPEC \ -- " %{mfix-cortex-a53-843419:--fix-cortex-a53-843419}" --#endif -- --#define LINK_SPEC LINUX_TARGET_LINK_SPEC \ -- CA53_ERR_835769_SPEC \ -- CA53_ERR_843419_SPEC -+ -+#define LINK_SPEC LINUX_TARGET_LINK_SPEC AARCH64_ERRATA_LINK_SPEC - - #define GNU_USER_TARGET_MATHFILE_SPEC \ - "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}" -diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def -index 14c1a43fe..3640540b3 100644 ---- a/gcc/config/aarch64/aarch64-modes.def -+++ b/gcc/config/aarch64/aarch64-modes.def -@@ -33,6 +33,8 @@ - CC_MODE (CCFP); - CC_MODE (CCFPE); - CC_MODE (CC_SWP); -+CC_MODE (CC_NZC); /* Only N, Z and C bits of condition flags are valid. -+ (Used with SVE predicate tests.) */ - CC_MODE (CC_NZ); /* Only N and Z bits of condition flags are valid. */ - CC_MODE (CC_Z); /* Only Z bit of condition flags is valid. */ - CC_MODE (CC_C); /* C represents unsigned overflow of a simple addition. */ -@@ -60,6 +62,10 @@ ADJUST_ALIGNMENT (VNx8BI, 2); - ADJUST_ALIGNMENT (VNx4BI, 2); - ADJUST_ALIGNMENT (VNx2BI, 2); - -+/* Bfloat16 modes. */ -+FLOAT_MODE (BF, 2, 0); -+ADJUST_FLOAT_FORMAT (BF, &arm_bfloat_half_format); -+ - VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI. */ - VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI. */ - VECTOR_MODES (FLOAT, 8); /* V2SF. */ -@@ -80,13 +86,14 @@ INT_MODE (XI, 64); - strictly necessary to set the alignment here, since the default would - be clamped to BIGGEST_ALIGNMENT anyhow, but it seems clearer. */ - #define SVE_MODES(NVECS, VB, VH, VS, VD) \ -- VECTOR_MODES_WITH_PREFIX (VNx, INT, 16 * NVECS); \ -- VECTOR_MODES_WITH_PREFIX (VNx, FLOAT, 16 * NVECS); \ -+ VECTOR_MODES_WITH_PREFIX (VNx, INT, 16 * NVECS, 0); \ -+ VECTOR_MODES_WITH_PREFIX (VNx, FLOAT, 16 * NVECS, 0); \ - \ - ADJUST_NUNITS (VB##QI, aarch64_sve_vg * NVECS * 8); \ - ADJUST_NUNITS (VH##HI, aarch64_sve_vg * NVECS * 4); \ - ADJUST_NUNITS (VS##SI, aarch64_sve_vg * NVECS * 2); \ - ADJUST_NUNITS (VD##DI, aarch64_sve_vg * NVECS); \ -+ ADJUST_NUNITS (VH##BF, aarch64_sve_vg * NVECS * 4); \ - ADJUST_NUNITS (VH##HF, aarch64_sve_vg * NVECS * 4); \ - ADJUST_NUNITS (VS##SF, aarch64_sve_vg * NVECS * 2); \ - ADJUST_NUNITS (VD##DF, aarch64_sve_vg * NVECS); \ -@@ -95,6 +102,7 @@ INT_MODE (XI, 64); - ADJUST_ALIGNMENT (VH##HI, 16); \ - ADJUST_ALIGNMENT (VS##SI, 16); \ - ADJUST_ALIGNMENT (VD##DI, 16); \ -+ ADJUST_ALIGNMENT (VH##BF, 16); \ - ADJUST_ALIGNMENT (VH##HF, 16); \ - ADJUST_ALIGNMENT (VS##SF, 16); \ - ADJUST_ALIGNMENT (VD##DF, 16); -@@ -106,6 +114,40 @@ SVE_MODES (2, VNx32, VNx16, VNx8, VNx4) - SVE_MODES (3, VNx48, VNx24, VNx12, VNx6) - SVE_MODES (4, VNx64, VNx32, VNx16, VNx8) - -+/* Partial SVE vectors: -+ -+ VNx2QI VNx4QI VNx8QI -+ VNx2HI VNx4HI -+ VNx2SI -+ -+ In memory they occupy contiguous locations, in the same way as fixed-length -+ vectors. E.g. VNx8QImode is half the size of VNx16QImode. -+ -+ Passing 1 as the final argument ensures that the modes come after all -+ other modes in the GET_MODE_WIDER chain, so that we never pick them -+ in preference to a full vector mode. */ -+VECTOR_MODES_WITH_PREFIX (VNx, INT, 2, 1); -+VECTOR_MODES_WITH_PREFIX (VNx, INT, 4, 1); -+VECTOR_MODES_WITH_PREFIX (VNx, INT, 8, 1); -+ -+ADJUST_NUNITS (VNx2QI, aarch64_sve_vg); -+ADJUST_NUNITS (VNx2HI, aarch64_sve_vg); -+ADJUST_NUNITS (VNx2SI, aarch64_sve_vg); -+ -+ADJUST_NUNITS (VNx4QI, aarch64_sve_vg * 2); -+ADJUST_NUNITS (VNx4HI, aarch64_sve_vg * 2); -+ -+ADJUST_NUNITS (VNx8QI, aarch64_sve_vg * 4); -+ -+ADJUST_ALIGNMENT (VNx2QI, 1); -+ADJUST_ALIGNMENT (VNx4QI, 1); -+ADJUST_ALIGNMENT (VNx8QI, 1); -+ -+ADJUST_ALIGNMENT (VNx2HI, 2); -+ADJUST_ALIGNMENT (VNx4HI, 2); -+ -+ADJUST_ALIGNMENT (VNx2SI, 4); -+ - /* Quad float: 128-bit floating mode for long doubles. */ - FLOAT_MODE (TF, 16, ieee_quad_format); - -diff --git a/gcc/config/aarch64/aarch64-netbsd.h b/gcc/config/aarch64/aarch64-netbsd.h -new file mode 100644 -index 000000000..e6c9264bd ---- /dev/null -+++ b/gcc/config/aarch64/aarch64-netbsd.h -@@ -0,0 +1,63 @@ -+/* Definitions for AArch64 running NetBSD -+ Copyright (C) 2016-2019 Free Software Foundation, Inc. -+ -+ This file is part of GCC. -+ -+ GCC is free software; you can redistribute it and/or modify it -+ under the terms of the GNU General Public License as published by -+ the Free Software Foundation; either version 3, or (at your option) -+ any later version. -+ -+ GCC is distributed in the hope that it will be useful, but -+ WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ General Public License for more details. -+ -+ You should have received a copy of the GNU General Public License -+ along with GCC; see the file COPYING3. If not see -+ . */ -+ -+#ifndef GCC_AARCH64_NETBSD_H -+#define GCC_AARCH64_NETBSD_H -+ -+#define TARGET_LINKER_BIG_EMULATION "aarch64nbsdb" -+#define TARGET_LINKER_LITTLE_EMULATION "aarch64nbsd" -+ -+#if TARGET_BIG_ENDIAN_DEFAULT -+#define TARGET_LINKER_EMULATION TARGET_LINKER_BIG_EMULATION -+#else -+#define TARGET_LINKER_EMULATION TARGET_LINKER_LITTLE_EMULATION -+#endif -+ -+#undef SUBTARGET_EXTRA_LINK_SPEC -+#define SUBTARGET_EXTRA_LINK_SPEC " -m" TARGET_LINKER_EMULATION -+ -+#define NETBSD_ENTRY_POINT "__start" -+ -+#define NETBSD_TARGET_LINK_SPEC "%{h*} " \ -+ "-X %{mbig-endian:-EB -m " TARGET_LINKER_BIG_EMULATION "} " \ -+ "%{mlittle-endian:-EL -m " TARGET_LINKER_LITTLE_EMULATION "} " \ -+ "%(netbsd_link_spec)" -+ -+#undef LINK_SPEC -+#define LINK_SPEC NETBSD_LINK_SPEC_ELF \ -+ NETBSD_TARGET_LINK_SPEC \ -+ AARCH64_ERRATA_LINK_SPEC -+ -+#undef TARGET_OS_CPP_BUILTINS -+#define TARGET_OS_CPP_BUILTINS() \ -+ do \ -+ { \ -+ NETBSD_OS_CPP_BUILTINS_ELF(); \ -+ } \ -+ while (0) -+ -+#undef SUBTARGET_CPP_SPEC -+#define SUBTARGET_CPP_SPEC NETBSD_CPP_SPEC -+ -+#undef EXTRA_SPECS -+#define EXTRA_SPECS \ -+ { "asm_cpu_spec", ASM_CPU_SPEC }, \ -+ NETBSD_SUBTARGET_EXTRA_SPECS -+ -+#endif /* GCC_AARCH64_NETBSD_H */ -diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def -index 010fd3ccf..345cdc4da 100644 ---- a/gcc/config/aarch64/aarch64-option-extensions.def -+++ b/gcc/config/aarch64/aarch64-option-extensions.def -@@ -45,29 +45,46 @@ - entries: aes, pmull, sha1, sha2 being present). In that case this field - should contain a space (" ") separated list of the strings in 'Features' - that are required. Their order is not important. An empty string means -- do not detect this feature during auto detection. */ -+ do not detect this feature during auto detection. - --/* NOTE: This file is being parsed by config.gcc and so the -- AARCH64_OPT_EXTENSION must adhere to a strict format: -- 1) No space between the AARCH64_OPT_EXTENSION and the opening (. -- 2) No space between the opening ( and the extension name. -- 3) No space after the extension name before the ,. -- 4) Spaces are only allowed after a , and around |. -- 5) Everything must be on one line. */ -+ NOTE: Any changes to the AARCH64_OPT_EXTENSION macro need to be mirrored in -+ config.gcc. */ - - /* Enabling "fp" just enables "fp". - Disabling "fp" also disables "simd", "crypto", "fp16", "aes", "sha2", -- "sha3", sm3/sm4 and "sve". */ --AARCH64_OPT_EXTENSION("fp", AARCH64_FL_FP, 0, AARCH64_FL_SIMD | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2 | AARCH64_FL_SHA3 | AARCH64_FL_SM4 | AARCH64_FL_SVE, false, "fp") -+ "sha3", sm3/sm4, "sve", "sve2", "sve2-aes", "sve2-sha3", "sve2-sm4", -+ "sve2-bitperm", "i8mm", "f32mm", "f64mm", and "bf16". */ -+AARCH64_OPT_EXTENSION("fp", AARCH64_FL_FP, 0, AARCH64_FL_SIMD | \ -+ AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | \ -+ AARCH64_FL_SHA2 | AARCH64_FL_SHA3 | AARCH64_FL_SM4 | \ -+ AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_SVE2_AES | \ -+ AARCH64_FL_SVE2_SHA3 | AARCH64_FL_SVE2_SM4 | \ -+ AARCH64_FL_SVE2_BITPERM | AARCH64_FL_I8MM | \ -+ AARCH64_FL_F32MM | AARCH64_FL_F64MM | AARCH64_FL_BF16, -+ false, "fp") - - /* Enabling "simd" also enables "fp". - Disabling "simd" also disables "crypto", "dotprod", "aes", "sha2", "sha3", -- "sm3/sm4" and "sve". */ --AARCH64_OPT_EXTENSION("simd", AARCH64_FL_SIMD, AARCH64_FL_FP, AARCH64_FL_CRYPTO | AARCH64_FL_DOTPROD | AARCH64_FL_AES | AARCH64_FL_SHA2 | AARCH64_FL_SHA3 | AARCH64_FL_SM4 | AARCH64_FL_SVE, false, "asimd") -+ "sm3/sm4", "sve", "sve2", "sve2-aes", "sve2-sha3", "sve2-sm4", -+ "sve2-bitperm", "i8mm", "f32mm" and "f64mm". */ -+AARCH64_OPT_EXTENSION("simd", AARCH64_FL_SIMD, AARCH64_FL_FP, \ -+ AARCH64_FL_CRYPTO | AARCH64_FL_DOTPROD | \ -+ AARCH64_FL_AES | AARCH64_FL_SHA2 | AARCH64_FL_SHA3 | \ -+ AARCH64_FL_SM4 | AARCH64_FL_SVE | AARCH64_FL_SVE2 | \ -+ AARCH64_FL_SVE2_AES | AARCH64_FL_SVE2_SHA3 | \ -+ AARCH64_FL_SVE2_SM4 | AARCH64_FL_SVE2_BITPERM | \ -+ AARCH64_FL_I8MM | AARCH64_FL_F32MM | AARCH64_FL_F64MM, \ -+ false, "asimd") - - /* Enabling "crypto" also enables "fp", "simd", "aes" and "sha2". -- Disabling "crypto" disables "crypto", "aes", "sha2", "sha3" and "sm3/sm4". */ --AARCH64_OPT_EXTENSION("crypto", AARCH64_FL_CRYPTO, AARCH64_FL_FP | AARCH64_FL_SIMD | AARCH64_FL_AES | AARCH64_FL_SHA2, AARCH64_FL_AES | AARCH64_FL_SHA2 |AARCH64_FL_SHA3 | AARCH64_FL_SM4, true, "aes pmull sha1 sha2") -+ Disabling "crypto" disables "crypto", "aes", "sha2", "sha3" and "sm3/sm4", -+ "sve2-aes", "sve2-sha3", "sve2-sm4". */ -+AARCH64_OPT_EXTENSION("crypto", AARCH64_FL_CRYPTO, AARCH64_FL_FP | \ -+ AARCH64_FL_SIMD | AARCH64_FL_AES | AARCH64_FL_SHA2, \ -+ AARCH64_FL_AES | AARCH64_FL_SHA2 | AARCH64_FL_SHA3 | \ -+ AARCH64_FL_SM4 | AARCH64_FL_SVE2_AES | \ -+ AARCH64_FL_SVE2_SHA3 | AARCH64_FL_SVE2_SM4, true, \ -+ "aes pmull sha1 sha2") - - /* Enabling or disabling "crc" only changes "crc". */ - AARCH64_OPT_EXTENSION("crc", AARCH64_FL_CRC, 0, 0, false, "crc32") -@@ -76,43 +93,63 @@ AARCH64_OPT_EXTENSION("crc", AARCH64_FL_CRC, 0, 0, false, "crc32") - AARCH64_OPT_EXTENSION("lse", AARCH64_FL_LSE, 0, 0, false, "atomics") - - /* Enabling "fp16" also enables "fp". -- Disabling "fp16" disables "fp16", "fp16fml" and "sve". */ --AARCH64_OPT_EXTENSION("fp16", AARCH64_FL_F16, AARCH64_FL_FP, AARCH64_FL_F16FML | AARCH64_FL_SVE, false, "fphp asimdhp") -+ Disabling "fp16" disables "fp16", "fp16fml", "sve", "sve2", -+ "sve2-aes", "sve2-sha3", "sve2-sm4", "sve2-bitperm", "f32mm" and -+ "f64mm". */ -+AARCH64_OPT_EXTENSION("fp16", AARCH64_FL_F16, AARCH64_FL_FP, \ -+ AARCH64_FL_F16FML | AARCH64_FL_SVE | AARCH64_FL_F32MM | \ -+ AARCH64_FL_F64MM | AARCH64_FL_SVE2 | \ -+ AARCH64_FL_SVE2_AES | AARCH64_FL_SVE2_SHA3 | \ -+ AARCH64_FL_SVE2_SM4 | AARCH64_FL_SVE2_BITPERM, false, \ -+ "fphp asimdhp") - - /* Enabling or disabling "rcpc" only changes "rcpc". */ - AARCH64_OPT_EXTENSION("rcpc", AARCH64_FL_RCPC, 0, 0, false, "lrcpc") - - /* Enabling "rdma" also enables "fp", "simd". - Disabling "rdma" just disables "rdma". */ --AARCH64_OPT_EXTENSION("rdma", AARCH64_FL_RDMA, AARCH64_FL_FP | AARCH64_FL_SIMD, 0, false, "asimdrdm") -+AARCH64_OPT_EXTENSION("rdma", AARCH64_FL_RDMA, \ -+ AARCH64_FL_FP | AARCH64_FL_SIMD, 0, false, "asimdrdm") - - /* Enabling "dotprod" also enables "simd". - Disabling "dotprod" only disables "dotprod". */ --AARCH64_OPT_EXTENSION("dotprod", AARCH64_FL_DOTPROD, AARCH64_FL_SIMD, 0, false, "asimddp") -+AARCH64_OPT_EXTENSION("dotprod", AARCH64_FL_DOTPROD, AARCH64_FL_SIMD, 0, \ -+ false, "asimddp") - - /* Enabling "aes" also enables "simd". -- Disabling "aes" just disables "aes". */ --AARCH64_OPT_EXTENSION("aes", AARCH64_FL_AES, AARCH64_FL_SIMD, 0, false, "aes") -+ Disabling "aes" disables "aes" and "sve2-aes'. */ -+AARCH64_OPT_EXTENSION("aes", AARCH64_FL_AES, AARCH64_FL_SIMD, \ -+ AARCH64_FL_SVE2_AES, false, "aes") - - /* Enabling "sha2" also enables "simd". - Disabling "sha2" just disables "sha2". */ --AARCH64_OPT_EXTENSION("sha2", AARCH64_FL_SHA2, AARCH64_FL_SIMD, 0, false, "sha1 sha2") -+AARCH64_OPT_EXTENSION("sha2", AARCH64_FL_SHA2, AARCH64_FL_SIMD, 0, false, \ -+ "sha1 sha2") - - /* Enabling "sha3" enables "simd" and "sha2". -- Disabling "sha3" just disables "sha3". */ --AARCH64_OPT_EXTENSION("sha3", AARCH64_FL_SHA3, AARCH64_FL_SIMD | AARCH64_FL_SHA2, 0, false, "sha3 sha512") -+ Disabling "sha3" disables "sha3" and "sve2-sha3". */ -+AARCH64_OPT_EXTENSION("sha3", AARCH64_FL_SHA3, AARCH64_FL_SIMD | \ -+ AARCH64_FL_SHA2, AARCH64_FL_SVE2_SHA3, false, \ -+ "sha3 sha512") - - /* Enabling "sm4" also enables "simd". -- Disabling "sm4" just disables "sm4". */ --AARCH64_OPT_EXTENSION("sm4", AARCH64_FL_SM4, AARCH64_FL_SIMD, 0, false, "sm3 sm4") -+ Disabling "sm4" disables "sm4" and "sve2-sm4". */ -+AARCH64_OPT_EXTENSION("sm4", AARCH64_FL_SM4, AARCH64_FL_SIMD, \ -+ AARCH64_FL_SVE2_SM4, false, "sm3 sm4") - - /* Enabling "fp16fml" also enables "fp" and "fp16". - Disabling "fp16fml" just disables "fp16fml". */ --AARCH64_OPT_EXTENSION("fp16fml", AARCH64_FL_F16FML, AARCH64_FL_FP | AARCH64_FL_F16, 0, false, "asimdfhm") -+AARCH64_OPT_EXTENSION("fp16fml", AARCH64_FL_F16FML, \ -+ AARCH64_FL_FP | AARCH64_FL_F16, 0, false, "asimdfhm") - - /* Enabling "sve" also enables "fp16", "fp" and "simd". -- Disabling "sve" just disables "sve". */ --AARCH64_OPT_EXTENSION("sve", AARCH64_FL_SVE, AARCH64_FL_FP | AARCH64_FL_SIMD | AARCH64_FL_F16, 0, false, "sve") -+ Disabling "sve" disables "sve", "f32mm", "f64mm", "sve2", "sve2-aes", -+ "sve2-sha3", "sve2-sm4" and "sve2-bitperm". */ -+AARCH64_OPT_EXTENSION("sve", AARCH64_FL_SVE, AARCH64_FL_FP | AARCH64_FL_SIMD | \ -+ AARCH64_FL_F16, AARCH64_FL_F32MM | AARCH64_FL_F64MM | \ -+ AARCH64_FL_SVE2 | AARCH64_FL_SVE2_AES | \ -+ AARCH64_FL_SVE2_SHA3 | AARCH64_FL_SVE2_SM4 | \ -+ AARCH64_FL_SVE2_BITPERM, false, "sve") - - /* Enabling/Disabling "profile" does not enable/disable any other feature. */ - AARCH64_OPT_EXTENSION("profile", AARCH64_FL_PROFILE, 0, 0, false, "") -@@ -124,12 +161,69 @@ AARCH64_OPT_EXTENSION("rng", AARCH64_FL_RNG, 0, 0, false, "") - AARCH64_OPT_EXTENSION("memtag", AARCH64_FL_MEMTAG, 0, 0, false, "") - - /* Enabling/Disabling "sb" only changes "sb". */ --AARCH64_OPT_EXTENSION("sb", AARCH64_FL_SB, 0, 0, false, "") -+AARCH64_OPT_EXTENSION("sb", AARCH64_FL_SB, 0, 0, false, "sb") - - /* Enabling/Disabling "ssbs" only changes "ssbs". */ --AARCH64_OPT_EXTENSION("ssbs", AARCH64_FL_SSBS, 0, 0, false, "") -+AARCH64_OPT_EXTENSION("ssbs", AARCH64_FL_SSBS, 0, 0, false, "ssbs") - - /* Enabling/Disabling "predres" only changes "predres". */ - AARCH64_OPT_EXTENSION("predres", AARCH64_FL_PREDRES, 0, 0, false, "") - -+/* Enabling "sve2" also enables "sve", "fp16", "fp", and "simd". -+ Disabling "sve2" disables "sve2", "sve2-aes", "sve2-sha3", "sve2-sm4", and -+ "sve2-bitperm". */ -+AARCH64_OPT_EXTENSION("sve2", AARCH64_FL_SVE2, AARCH64_FL_SVE | \ -+ AARCH64_FL_FP | AARCH64_FL_SIMD | AARCH64_FL_F16, \ -+ AARCH64_FL_SVE2_AES | AARCH64_FL_SVE2_SHA3 | \ -+ AARCH64_FL_SVE2_SM4 | AARCH64_FL_SVE2_BITPERM, false, "sve2") -+ -+/* Enabling "sve2-sm4" also enables "sm4", "simd", "fp16", "fp", "sve", and -+ "sve2". Disabling "sve2-sm4" just disables "sve2-sm4". */ -+AARCH64_OPT_EXTENSION("sve2-sm4", AARCH64_FL_SVE2_SM4, AARCH64_FL_SM4 | \ -+ AARCH64_FL_SIMD | AARCH64_FL_F16 | AARCH64_FL_FP | \ -+ AARCH64_FL_SVE | AARCH64_FL_SVE2, 0, false, "svesm4") -+ -+/* Enabling "sve2-aes" also enables "aes", "simd", "fp16", "fp", "sve", and -+ "sve2". Disabling "sve2-aes" just disables "sve2-aes". */ -+AARCH64_OPT_EXTENSION("sve2-aes", AARCH64_FL_SVE2_AES, AARCH64_FL_AES | \ -+ AARCH64_FL_SIMD | AARCH64_FL_F16 | AARCH64_FL_FP | \ -+ AARCH64_FL_SVE | AARCH64_FL_SVE2, 0, false, "sveaes") -+ -+/* Enabling "sve2-sha3" also enables "sha3", "simd", "fp16", "fp", "sve", and -+ "sve2". Disabling "sve2-sha3" just disables "sve2-sha3". */ -+AARCH64_OPT_EXTENSION("sve2-sha3", AARCH64_FL_SVE2_SHA3, AARCH64_FL_SHA3 | \ -+ AARCH64_FL_SIMD | AARCH64_FL_F16 | AARCH64_FL_FP | \ -+ AARCH64_FL_SVE | AARCH64_FL_SVE2, 0, false, "svesha3") -+ -+/* Enabling "sve2-bitperm" also enables "simd", "fp16", "fp", "sve", and -+ "sve2". Disabling "sve2-bitperm" just disables "sve2-bitperm". */ -+AARCH64_OPT_EXTENSION("sve2-bitperm", AARCH64_FL_SVE2_BITPERM, AARCH64_FL_SIMD | \ -+ AARCH64_FL_F16 | AARCH64_FL_FP | AARCH64_FL_SVE | \ -+ AARCH64_FL_SVE2, 0, false, "svebitperm") -+ -+/* Enabling or disabling "tme" only changes "tme". */ -+AARCH64_OPT_EXTENSION("tme", AARCH64_FL_TME, 0, 0, false, "") -+ -+/* Enabling "i8mm" also enables "simd" and "fp". -+ Disabling "i8mm" only disables "i8mm". */ -+AARCH64_OPT_EXTENSION("i8mm", AARCH64_FL_I8MM, \ -+ AARCH64_FL_SIMD | AARCH64_FL_FP, 0, false, "i8mm") -+ -+/* Enabling "f32mm" also enables "sve", "fp16", "fp", and "simd". -+ Disabling "f32mm" only disables "f32mm". */ -+AARCH64_OPT_EXTENSION("f32mm", AARCH64_FL_F32MM, \ -+ AARCH64_FL_SVE | AARCH64_FL_F16 | AARCH64_FL_FP | \ -+ AARCH64_FL_SIMD, 0, false, "f32mm") -+ -+/* Enabling "f64mm" also enables "sve", "fp16", "fp", and "simd". -+ Disabling "f64mm" only disables "f64mm". */ -+AARCH64_OPT_EXTENSION("f64mm", AARCH64_FL_F64MM, \ -+ AARCH64_FL_SVE | AARCH64_FL_F16 | AARCH64_FL_FP | \ -+ AARCH64_FL_SIMD, 0, false, "f64mm") -+ -+/* Enabling "bf16" also enables "simd" and "fp". -+ Disabling "bf16" only disables "bf16". */ -+AARCH64_OPT_EXTENSION("bf16", AARCH64_FL_BF16, \ -+ AARCH64_FL_SIMD | AARCH64_FL_FP, 0, false, "bf16") -+ - #undef AARCH64_OPT_EXTENSION -diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h -index 994bcfc7e..5e0a499e8 100644 ---- a/gcc/config/aarch64/aarch64-protos.h -+++ b/gcc/config/aarch64/aarch64-protos.h -@@ -396,8 +396,81 @@ enum simd_immediate_check { - AARCH64_CHECK_MOV = AARCH64_CHECK_ORR | AARCH64_CHECK_BIC - }; - -+/* The key type that -msign-return-address should use. */ -+enum aarch64_key_type { -+ AARCH64_KEY_A, -+ AARCH64_KEY_B -+}; -+ -+extern enum aarch64_key_type aarch64_ra_sign_key; -+ - extern struct tune_params aarch64_tune_params; - -+/* The available SVE predicate patterns, known in the ACLE as "svpattern". */ -+#define AARCH64_FOR_SVPATTERN(T) \ -+ T (POW2, pow2, 0) \ -+ T (VL1, vl1, 1) \ -+ T (VL2, vl2, 2) \ -+ T (VL3, vl3, 3) \ -+ T (VL4, vl4, 4) \ -+ T (VL5, vl5, 5) \ -+ T (VL6, vl6, 6) \ -+ T (VL7, vl7, 7) \ -+ T (VL8, vl8, 8) \ -+ T (VL16, vl16, 9) \ -+ T (VL32, vl32, 10) \ -+ T (VL64, vl64, 11) \ -+ T (VL128, vl128, 12) \ -+ T (VL256, vl256, 13) \ -+ T (MUL4, mul4, 29) \ -+ T (MUL3, mul3, 30) \ -+ T (ALL, all, 31) -+ -+/* The available SVE prefetch operations, known in the ACLE as "svprfop". */ -+#define AARCH64_FOR_SVPRFOP(T) \ -+ T (PLDL1KEEP, pldl1keep, 0) \ -+ T (PLDL1STRM, pldl1strm, 1) \ -+ T (PLDL2KEEP, pldl2keep, 2) \ -+ T (PLDL2STRM, pldl2strm, 3) \ -+ T (PLDL3KEEP, pldl3keep, 4) \ -+ T (PLDL3STRM, pldl3strm, 5) \ -+ T (PSTL1KEEP, pstl1keep, 8) \ -+ T (PSTL1STRM, pstl1strm, 9) \ -+ T (PSTL2KEEP, pstl2keep, 10) \ -+ T (PSTL2STRM, pstl2strm, 11) \ -+ T (PSTL3KEEP, pstl3keep, 12) \ -+ T (PSTL3STRM, pstl3strm, 13) -+ -+#define AARCH64_SVENUM(UPPER, LOWER, VALUE) AARCH64_SV_##UPPER = VALUE, -+enum aarch64_svpattern { -+ AARCH64_FOR_SVPATTERN (AARCH64_SVENUM) -+ AARCH64_NUM_SVPATTERNS -+}; -+ -+enum aarch64_svprfop { -+ AARCH64_FOR_SVPRFOP (AARCH64_SVENUM) -+ AARCH64_NUM_SVPRFOPS -+}; -+#undef AARCH64_SVENUM -+ -+/* It's convenient to divide the built-in function codes into groups, -+ rather than having everything in a single enum. This type enumerates -+ those groups. */ -+enum aarch64_builtin_class -+{ -+ AARCH64_BUILTIN_GENERAL, -+ AARCH64_BUILTIN_SVE -+}; -+ -+/* Built-in function codes are structured so that the low -+ AARCH64_BUILTIN_SHIFT bits contain the aarch64_builtin_class -+ and the upper bits contain a group-specific subcode. */ -+const unsigned int AARCH64_BUILTIN_SHIFT = 1; -+ -+/* Mask that selects the aarch64_builtin_class part of a function code. */ -+const unsigned int AARCH64_BUILTIN_CLASS = (1 << AARCH64_BUILTIN_SHIFT) - 1; -+ -+void aarch64_post_cfi_startproc (void); - poly_int64 aarch64_initial_elimination_offset (unsigned, unsigned); - int aarch64_get_condition_code (rtx); - bool aarch64_address_valid_for_prefetch_p (rtx, bool); -@@ -407,6 +480,8 @@ unsigned HOST_WIDE_INT aarch64_and_split_imm2 (HOST_WIDE_INT val_in); - bool aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode); - int aarch64_branch_cost (bool, bool); - enum aarch64_symbol_type aarch64_classify_symbolic_expression (rtx); -+opt_machine_mode aarch64_vq_mode (scalar_mode); -+opt_machine_mode aarch64_full_sve_mode (scalar_mode); - bool aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode); - bool aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT); - bool aarch64_const_vec_all_same_in_range_p (rtx, HOST_WIDE_INT, -@@ -414,14 +489,13 @@ bool aarch64_const_vec_all_same_in_range_p (rtx, HOST_WIDE_INT, - bool aarch64_constant_address_p (rtx); - bool aarch64_emit_approx_div (rtx, rtx, rtx); - bool aarch64_emit_approx_sqrt (rtx, rtx, bool); --void aarch64_expand_call (rtx, rtx, bool); --bool aarch64_expand_movmem (rtx *); -+void aarch64_expand_call (rtx, rtx, rtx, bool); -+bool aarch64_expand_cpymem (rtx *); - bool aarch64_float_const_zero_rtx_p (rtx); - bool aarch64_float_const_rtx_p (rtx); - bool aarch64_function_arg_regno_p (unsigned); - bool aarch64_fusion_enabled_p (enum aarch64_fusion_pairs); --bool aarch64_gen_movmemqi (rtx *); --bool aarch64_gimple_fold_builtin (gimple_stmt_iterator *); -+bool aarch64_gen_cpymemqi (rtx *); - bool aarch64_is_extend_from_extract (scalar_int_mode, rtx, rtx); - bool aarch64_is_long_call_p (rtx); - bool aarch64_is_noplt_call_p (rtx); -@@ -436,24 +510,32 @@ bool aarch64_masks_and_shift_for_bfi_p (scalar_int_mode, unsigned HOST_WIDE_INT, - unsigned HOST_WIDE_INT); - bool aarch64_zero_extend_const_eq (machine_mode, rtx, machine_mode, rtx); - bool aarch64_move_imm (HOST_WIDE_INT, machine_mode); -+machine_mode aarch64_sve_int_mode (machine_mode); - opt_machine_mode aarch64_sve_pred_mode (unsigned int); -+opt_machine_mode aarch64_sve_data_mode (scalar_mode, poly_uint64); -+bool aarch64_sve_mode_p (machine_mode); -+HOST_WIDE_INT aarch64_fold_sve_cnt_pat (aarch64_svpattern, unsigned int); - bool aarch64_sve_cnt_immediate_p (rtx); -+bool aarch64_sve_scalar_inc_dec_immediate_p (rtx); - bool aarch64_sve_addvl_addpl_immediate_p (rtx); --bool aarch64_sve_inc_dec_immediate_p (rtx); -+bool aarch64_sve_vector_inc_dec_immediate_p (rtx); - int aarch64_add_offset_temporaries (rtx); - void aarch64_split_add_offset (scalar_int_mode, rtx, rtx, rtx, rtx, rtx); - bool aarch64_mov_operand_p (rtx, machine_mode); - rtx aarch64_reverse_mask (machine_mode, unsigned int); - bool aarch64_offset_7bit_signed_scaled_p (machine_mode, poly_int64); - bool aarch64_offset_9bit_signed_unscaled_p (machine_mode, poly_int64); -+char *aarch64_output_sve_prefetch (const char *, rtx, const char *); - char *aarch64_output_sve_cnt_immediate (const char *, const char *, rtx); --char *aarch64_output_sve_addvl_addpl (rtx, rtx, rtx); --char *aarch64_output_sve_inc_dec_immediate (const char *, rtx); -+char *aarch64_output_sve_cnt_pat_immediate (const char *, const char *, rtx *); -+char *aarch64_output_sve_scalar_inc_dec (rtx); -+char *aarch64_output_sve_addvl_addpl (rtx); -+char *aarch64_output_sve_vector_inc_dec (const char *, rtx); - char *aarch64_output_scalar_simd_mov_immediate (rtx, scalar_int_mode); - char *aarch64_output_simd_mov_immediate (rtx, unsigned, - enum simd_immediate_check w = AARCH64_CHECK_MOV); - char *aarch64_output_sve_mov_immediate (rtx); --char *aarch64_output_ptrue (machine_mode, char); -+char *aarch64_output_sve_ptrues (rtx); - bool aarch64_pad_reg_upward (machine_mode, const_tree, bool); - bool aarch64_regno_ok_for_base_p (int, bool); - bool aarch64_regno_ok_for_index_p (int, bool); -@@ -462,11 +544,13 @@ bool aarch64_simd_check_vect_par_cnst_half (rtx op, machine_mode mode, - bool high); - bool aarch64_simd_scalar_immediate_valid_for_move (rtx, scalar_int_mode); - bool aarch64_simd_shift_imm_p (rtx, machine_mode, bool); -+bool aarch64_sve_ptrue_svpattern_p (rtx, struct simd_immediate_info *); - bool aarch64_simd_valid_immediate (rtx, struct simd_immediate_info *, - enum simd_immediate_check w = AARCH64_CHECK_MOV); - rtx aarch64_check_zero_based_sve_index_immediate (rtx); - bool aarch64_sve_index_immediate_p (rtx); - bool aarch64_sve_arith_immediate_p (rtx, bool); -+bool aarch64_sve_sqadd_sqsub_immediate_p (rtx, bool); - bool aarch64_sve_bitmask_immediate_p (rtx); - bool aarch64_sve_dup_immediate_p (rtx); - bool aarch64_sve_cmp_immediate_p (rtx, bool); -@@ -476,15 +560,15 @@ bool aarch64_split_dimode_const_store (rtx, rtx); - bool aarch64_symbolic_address_p (rtx); - bool aarch64_uimm12_shift (HOST_WIDE_INT); - bool aarch64_use_return_insn_p (void); --bool aarch64_use_simple_return_insn_p (void); --const char *aarch64_mangle_builtin_type (const_tree); - const char *aarch64_output_casesi (rtx *); - -+unsigned int aarch64_tlsdesc_abi_id (); - enum aarch64_symbol_type aarch64_classify_symbol (rtx, HOST_WIDE_INT); - enum aarch64_symbol_type aarch64_classify_tls_symbol (rtx); - enum reg_class aarch64_regno_regclass (unsigned); - int aarch64_asm_preferred_eh_data_format (int, int); - int aarch64_fpconst_pow_of_2 (rtx); -+int aarch64_fpconst_pow2_recip (rtx); - machine_mode aarch64_hard_regno_caller_save_mode (unsigned, unsigned, - machine_mode); - int aarch64_uxt_size (int, HOST_WIDE_INT); -@@ -496,13 +580,17 @@ rtx aarch64_return_addr (int, rtx); - rtx aarch64_simd_gen_const_vector_dup (machine_mode, HOST_WIDE_INT); - bool aarch64_simd_mem_operand_p (rtx); - bool aarch64_sve_ld1r_operand_p (rtx); -+bool aarch64_sve_ld1rq_operand_p (rtx); -+bool aarch64_sve_ld1ro_operand_p (rtx, scalar_mode); -+bool aarch64_sve_ldff1_operand_p (rtx); -+bool aarch64_sve_ldnf1_operand_p (rtx); - bool aarch64_sve_ldr_operand_p (rtx); -+bool aarch64_sve_prefetch_operand_p (rtx, machine_mode); - bool aarch64_sve_struct_memory_operand_p (rtx); - rtx aarch64_simd_vect_par_cnst_half (machine_mode, int, bool); - rtx aarch64_gen_stepped_int_parallel (unsigned int, int, int); - bool aarch64_stepped_int_parallel_p (rtx, int); - rtx aarch64_tls_get_addr (void); --tree aarch64_fold_builtin (tree, int, tree *, bool); - unsigned aarch64_dbx_register_number (unsigned); - unsigned aarch64_trampoline_size (void); - void aarch64_asm_output_labelref (FILE *, const char *); -@@ -512,7 +600,15 @@ const char * aarch64_output_probe_stack_range (rtx, rtx); - const char * aarch64_output_probe_sve_stack_clash (rtx, rtx, rtx, rtx); - void aarch64_err_no_fpadvsimd (machine_mode); - void aarch64_expand_epilogue (bool); --void aarch64_expand_mov_immediate (rtx, rtx, rtx (*) (rtx, rtx) = 0); -+rtx aarch64_ptrue_all (unsigned int); -+opt_machine_mode aarch64_ptrue_all_mode (rtx); -+rtx aarch64_convert_sve_data_to_pred (rtx, machine_mode, rtx); -+rtx aarch64_expand_sve_dupq (rtx, machine_mode, rtx); -+void aarch64_expand_mov_immediate (rtx, rtx); -+rtx aarch64_ptrue_reg (machine_mode); -+rtx aarch64_pfalse_reg (machine_mode); -+bool aarch64_sve_pred_dominates_p (rtx *, rtx); -+bool aarch64_sve_same_pred_for_ptest_p (rtx *, rtx *); - void aarch64_emit_sve_pred_move (rtx, rtx, rtx); - void aarch64_expand_sve_mem_move (rtx, rtx, machine_mode); - bool aarch64_maybe_expand_sve_subreg_move (rtx, rtx); -@@ -520,8 +616,9 @@ rtx aarch64_replace_reg_mode (rtx, machine_mode); - void aarch64_split_sve_subreg_move (rtx, rtx, rtx); - void aarch64_expand_prologue (void); - void aarch64_expand_vector_init (rtx, rtx); -+void aarch64_sve_expand_vector_init (rtx, rtx); - void aarch64_init_cumulative_args (CUMULATIVE_ARGS *, const_tree, rtx, -- const_tree, unsigned); -+ const_tree, unsigned, bool = false); - void aarch64_init_expanders (void); - void aarch64_init_simd_builtins (void); - void aarch64_emit_call_insn (rtx); -@@ -587,22 +684,39 @@ bool aarch64_gen_adjusted_ldpstp (rtx *, bool, scalar_mode, RTX_CODE); - void aarch64_expand_sve_vec_cmp_int (rtx, rtx_code, rtx, rtx); - bool aarch64_expand_sve_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool); - void aarch64_expand_sve_vcond (machine_mode, machine_mode, rtx *); --#endif /* RTX_CODE */ - --void aarch64_init_builtins (void); -+bool aarch64_prepare_sve_int_fma (rtx *, rtx_code); -+bool aarch64_prepare_sve_cond_int_fma (rtx *, rtx_code); -+#endif /* RTX_CODE */ - - bool aarch64_process_target_attr (tree); - void aarch64_override_options_internal (struct gcc_options *); - --rtx aarch64_expand_builtin (tree exp, -- rtx target, -- rtx subtarget ATTRIBUTE_UNUSED, -- machine_mode mode ATTRIBUTE_UNUSED, -- int ignore ATTRIBUTE_UNUSED); --tree aarch64_builtin_decl (unsigned, bool ATTRIBUTE_UNUSED); --tree aarch64_builtin_rsqrt (unsigned int); -+const char *aarch64_general_mangle_builtin_type (const_tree); -+void aarch64_general_init_builtins (void); -+tree aarch64_general_fold_builtin (unsigned int, tree, unsigned int, tree *); -+gimple *aarch64_general_gimple_fold_builtin (unsigned int, gcall *); -+rtx aarch64_general_expand_builtin (unsigned int, tree, rtx, int); -+tree aarch64_general_builtin_decl (unsigned, bool); -+tree aarch64_general_builtin_rsqrt (unsigned int); - tree aarch64_builtin_vectorized_function (unsigned int, tree, tree); - -+namespace aarch64_sve { -+ void init_builtins (); -+ void handle_arm_sve_h (); -+ tree builtin_decl (unsigned, bool); -+ bool builtin_type_p (const_tree); -+ bool svbool_type_p (const_tree); -+ unsigned int nvectors_if_data_type (const_tree); -+ const char *mangle_builtin_type (const_tree); -+ tree resolve_overloaded_builtin (location_t, unsigned int, -+ vec *); -+ bool check_builtin_call (location_t, vec, unsigned int, -+ tree, unsigned int, tree *); -+ gimple *gimple_fold_builtin (unsigned int, gimple_stmt_iterator *, gcall *); -+ rtx expand_builtin (unsigned int, tree, rtx); -+} -+ - extern void aarch64_split_combinev16qi (rtx operands[3]); - extern void aarch64_expand_vec_perm (rtx, rtx, rtx, rtx, unsigned int); - extern void aarch64_expand_sve_vec_perm (rtx, rtx, rtx, rtx); -@@ -629,11 +743,10 @@ bool aarch64_handle_option (struct gcc_options *, struct gcc_options *, - const struct cl_decoded_option *, location_t); - const char *aarch64_rewrite_selected_cpu (const char *name); - enum aarch64_parse_opt_result aarch64_parse_extension (const char *, -- unsigned long *, -+ uint64_t *, - std::string *); - void aarch64_get_all_extension_candidates (auto_vec *candidates); --std::string aarch64_get_extension_string_for_isa_flags (unsigned long, -- unsigned long); -+std::string aarch64_get_extension_string_for_isa_flags (uint64_t, uint64_t); - - /* Defined in aarch64-d.c */ - extern void aarch64_d_target_versions (void); -@@ -647,4 +760,17 @@ poly_uint64 aarch64_regmode_natural_size (machine_mode); - - bool aarch64_high_bits_all_ones_p (HOST_WIDE_INT); - -+struct atomic_ool_names -+{ -+ const char *str[5][4]; -+}; -+ -+rtx aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx, -+ const atomic_ool_names *names); -+extern const atomic_ool_names aarch64_ool_swp_names; -+extern const atomic_ool_names aarch64_ool_ldadd_names; -+extern const atomic_ool_names aarch64_ool_ldset_names; -+extern const atomic_ool_names aarch64_ool_ldclr_names; -+extern const atomic_ool_names aarch64_ool_ldeor_names; -+ - #endif /* GCC_AARCH64_PROTOS_H */ -diff --git a/gcc/config/aarch64/aarch64-simd-builtin-types.def b/gcc/config/aarch64/aarch64-simd-builtin-types.def -index b01569429..2be0ce824 100644 ---- a/gcc/config/aarch64/aarch64-simd-builtin-types.def -+++ b/gcc/config/aarch64/aarch64-simd-builtin-types.def -@@ -50,3 +50,5 @@ - ENTRY (Float32x4_t, V4SF, none, 13) - ENTRY (Float64x1_t, V1DF, none, 13) - ENTRY (Float64x2_t, V2DF, none, 13) -+ ENTRY (Bfloat16x4_t, V4BF, none, 14) -+ ENTRY (Bfloat16x8_t, V8BF, none, 14) -diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def -index 17bb0c486..d0fe4e7c8 100644 ---- a/gcc/config/aarch64/aarch64-simd-builtins.def -+++ b/gcc/config/aarch64/aarch64-simd-builtins.def -@@ -212,10 +212,15 @@ - /* Implemented by aarch64_{_lane}{q}. */ - BUILTIN_VB (TERNOP, sdot, 0) - BUILTIN_VB (TERNOPU, udot, 0) -+ BUILTIN_VB (TERNOP_SSUS, usdot, 0) - BUILTIN_VB (QUADOP_LANE, sdot_lane, 0) - BUILTIN_VB (QUADOPU_LANE, udot_lane, 0) - BUILTIN_VB (QUADOP_LANE, sdot_laneq, 0) - BUILTIN_VB (QUADOPU_LANE, udot_laneq, 0) -+ BUILTIN_VB (QUADOPSSUS_LANE_QUADTUP, usdot_lane, 0) -+ BUILTIN_VB (QUADOPSSUS_LANE_QUADTUP, usdot_laneq, 0) -+ BUILTIN_VB (QUADOPSSSU_LANE_QUADTUP, sudot_lane, 0) -+ BUILTIN_VB (QUADOPSSSU_LANE_QUADTUP, sudot_laneq, 0) - - /* Implemented by aarch64_fcadd. */ - BUILTIN_VHSDF (BINOP, fcadd90, 0) -@@ -424,7 +429,7 @@ - BUILTIN_VB (UNOP, rbit, 0) - - /* Implemented by -- aarch64_. */ -+ aarch64_. */ - BUILTIN_VALL (BINOP, zip1, 0) - BUILTIN_VALL (BINOP, zip2, 0) - BUILTIN_VALL (BINOP, uzp1, 0) -@@ -465,12 +470,18 @@ - /* Implemented by aarch64_ld1x3. */ - BUILTIN_VALLDIF (LOADSTRUCT, ld1x3, 0) - -+ /* Implemented by aarch64_ld1x4. */ -+ BUILTIN_VALLDIF (LOADSTRUCT, ld1x4, 0) -+ - /* Implemented by aarch64_st1x2. */ - BUILTIN_VALLDIF (STORESTRUCT, st1x2, 0) - - /* Implemented by aarch64_st1x3. */ - BUILTIN_VALLDIF (STORESTRUCT, st1x3, 0) - -+ /* Implemented by aarch64_st1x4. */ -+ BUILTIN_VALLDIF (STORESTRUCT, st1x4, 0) -+ - /* Implemented by fma4. */ - BUILTIN_VHSDF (TERNOP, fma, 4) - VAR1 (TERNOP, fma, 4, hf) -@@ -670,3 +681,36 @@ - /* Implemented by aarch64_fmllq_laneq_highv4sf. */ - VAR1 (QUADOP_LANE, fmlalq_laneq_high, 0, v4sf) - VAR1 (QUADOP_LANE, fmlslq_laneq_high, 0, v4sf) -+ -+ /* Implemented by aarch64_. */ -+ BUILTIN_VSFDF (UNOP, frint32z, 0) -+ BUILTIN_VSFDF (UNOP, frint32x, 0) -+ BUILTIN_VSFDF (UNOP, frint64z, 0) -+ BUILTIN_VSFDF (UNOP, frint64x, 0) -+ -+ /* Implemented by aarch64_bfdot{_lane}{q}. */ -+ VAR2 (TERNOP, bfdot, 0, v2sf, v4sf) -+ VAR2 (QUADOP_LANE_PAIR, bfdot_lane, 0, v2sf, v4sf) -+ VAR2 (QUADOP_LANE_PAIR, bfdot_laneq, 0, v2sf, v4sf) -+ -+ /* Implemented by aarch64_bfmmlaqv4sf */ -+ VAR1 (TERNOP, bfmmlaq, 0, v4sf) -+ -+ /* Implemented by aarch64_bfmlal{_lane{q}}v4sf */ -+ VAR1 (TERNOP, bfmlalb, 0, v4sf) -+ VAR1 (TERNOP, bfmlalt, 0, v4sf) -+ VAR1 (QUADOP_LANE, bfmlalb_lane, 0, v4sf) -+ VAR1 (QUADOP_LANE, bfmlalt_lane, 0, v4sf) -+ VAR1 (QUADOP_LANE, bfmlalb_lane_q, 0, v4sf) -+ VAR1 (QUADOP_LANE, bfmlalt_lane_q, 0, v4sf) -+ -+ /* Implemented by aarch64_simd_mmlav16qi. */ -+ VAR1 (TERNOP, simd_smmla, 0, v16qi) -+ VAR1 (TERNOPU, simd_ummla, 0, v16qi) -+ VAR1 (TERNOP_SSUS, simd_usmmla, 0, v16qi) -+ -+ /* Implemented by aarch64_bfcvtn{q}{2} */ -+ VAR1 (UNOP, bfcvtn, 0, v4bf) -+ VAR1 (UNOP, bfcvtn_q, 0, v8bf) -+ VAR1 (BINOP, bfcvtn2, 0, v8bf) -+ VAR1 (UNOP, bfcvt, 0, bf) -diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md -index 29ca37c65..137c88da1 100644 ---- a/gcc/config/aarch64/aarch64-simd.md -+++ b/gcc/config/aarch64/aarch64-simd.md -@@ -19,8 +19,8 @@ - ;; . - - (define_expand "mov" -- [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "") -- (match_operand:VALL_F16 1 "general_operand" ""))] -+ [(set (match_operand:VALL_F16MOV 0 "nonimmediate_operand") -+ (match_operand:VALL_F16MOV 1 "general_operand"))] - "TARGET_SIMD" - " - /* Force the operand into a register if it is not an -@@ -39,8 +39,8 @@ - ) - - (define_expand "movmisalign" -- [(set (match_operand:VALL 0 "nonimmediate_operand" "") -- (match_operand:VALL 1 "general_operand" ""))] -+ [(set (match_operand:VALL 0 "nonimmediate_operand") -+ (match_operand:VALL 1 "general_operand"))] - "TARGET_SIMD" - { - /* This pattern is not permitted to fail during expansion: if both arguments -@@ -101,10 +101,10 @@ - [(set_attr "type" "neon_dup")] - ) - --(define_insn "*aarch64_simd_mov" -- [(set (match_operand:VD 0 "nonimmediate_operand" -+(define_insn "*aarch64_simd_mov" -+ [(set (match_operand:VDMOV 0 "nonimmediate_operand" - "=w, m, m, w, ?r, ?w, ?r, w") -- (match_operand:VD 1 "general_operand" -+ (match_operand:VDMOV 1 "general_operand" - "m, Dz, w, w, w, r, r, Dn"))] - "TARGET_SIMD - && (register_operand (operands[0], mode) -@@ -129,10 +129,10 @@ - mov_reg, neon_move")] - ) - --(define_insn "*aarch64_simd_mov" -- [(set (match_operand:VQ 0 "nonimmediate_operand" -+(define_insn "*aarch64_simd_mov" -+ [(set (match_operand:VQMOV 0 "nonimmediate_operand" - "=w, Umn, m, w, ?r, ?w, ?r, w") -- (match_operand:VQ 1 "general_operand" -+ (match_operand:VQMOV 1 "general_operand" - "m, Dz, w, w, w, r, r, Dn"))] - "TARGET_SIMD - && (register_operand (operands[0], mode) -@@ -234,8 +234,8 @@ - - - (define_split -- [(set (match_operand:VQ 0 "register_operand" "") -- (match_operand:VQ 1 "register_operand" ""))] -+ [(set (match_operand:VQMOV 0 "register_operand" "") -+ (match_operand:VQMOV 1 "register_operand" ""))] - "TARGET_SIMD && reload_completed - && GP_REGNUM_P (REGNO (operands[0])) - && GP_REGNUM_P (REGNO (operands[1]))" -@@ -246,8 +246,8 @@ - }) - - (define_split -- [(set (match_operand:VQ 0 "register_operand" "") -- (match_operand:VQ 1 "register_operand" ""))] -+ [(set (match_operand:VQMOV 0 "register_operand" "") -+ (match_operand:VQMOV 1 "register_operand" ""))] - "TARGET_SIMD && reload_completed - && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1]))) - || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))" -@@ -258,8 +258,8 @@ - }) - - (define_expand "@aarch64_split_simd_mov" -- [(set (match_operand:VQ 0) -- (match_operand:VQ 1))] -+ [(set (match_operand:VQMOV 0) -+ (match_operand:VQMOV 1))] - "TARGET_SIMD" - { - rtx dst = operands[0]; -@@ -520,6 +520,20 @@ - [(set_attr "type" "neon_dot")] - ) - -+;; These instructions map to the __builtins for the armv8.6a I8MM usdot -+;; (vector) Dot Product operation. -+(define_insn "aarch64_usdot" -+ [(set (match_operand:VS 0 "register_operand" "=w") -+ (plus:VS -+ (unspec:VS [(match_operand: 2 "register_operand" "w") -+ (match_operand: 3 "register_operand" "w")] -+ UNSPEC_USDOT) -+ (match_operand:VS 1 "register_operand" "0")))] -+ "TARGET_I8MM" -+ "usdot\\t%0., %2., %3." -+ [(set_attr "type" "neon_dot")] -+) -+ - ;; These expands map to the Dot Product optab the vectorizer checks for. - ;; The auto-vectorizer expects a dot product builtin that also does an - ;; accumulation into the provided register. -@@ -587,6 +601,26 @@ - [(set_attr "type" "neon_dot")] - ) - -+;; These instructions map to the __builtins for the armv8.6a I8MM usdot, sudot -+;; (by element) Dot Product operations. -+(define_insn "aarch64_dot_lane" -+ [(set (match_operand:VS 0 "register_operand" "=w") -+ (plus:VS -+ (unspec:VS [(match_operand: 2 "register_operand" "w") -+ (match_operand:VB 3 "register_operand" "w") -+ (match_operand:SI 4 "immediate_operand" "i")] -+ DOTPROD_I8MM) -+ (match_operand:VS 1 "register_operand" "0")))] -+ "TARGET_I8MM" -+ { -+ int nunits = GET_MODE_NUNITS (mode).to_constant (); -+ int lane = INTVAL (operands[4]); -+ operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane), SImode); -+ return "dot\\t%0., %2., %3.4b[%4]"; -+ } -+ [(set_attr "type" "neon_dot")] -+) -+ - (define_expand "copysign3" - [(match_operand:VHSDF 0 "register_operand") - (match_operand:VHSDF 1 "register_operand") -@@ -666,8 +700,8 @@ - [(set_attr "type" "neon_fp_rsqrts_")]) - - (define_expand "rsqrt2" -- [(set (match_operand:VALLF 0 "register_operand" "=w") -- (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")] -+ [(set (match_operand:VALLF 0 "register_operand") -+ (unspec:VALLF [(match_operand:VALLF 1 "register_operand")] - UNSPEC_RSQRT))] - "TARGET_SIMD" - { -@@ -724,15 +758,15 @@ - ;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64. - ;; Whereas SABD would return 192 (-64 signed) on the above example. - ;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead. --(define_insn "*aarch64_abd_3" -+(define_insn "aarch64_abd_3" - [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") - (minus:VDQ_BHSI - (USMAX:VDQ_BHSI - (match_operand:VDQ_BHSI 1 "register_operand" "w") - (match_operand:VDQ_BHSI 2 "register_operand" "w")) -- (match_operator 3 "aarch64_" -- [(match_dup 1) -- (match_dup 2)])))] -+ (:VDQ_BHSI -+ (match_dup 1) -+ (match_dup 2))))] - "TARGET_SIMD" - "abd\t%0., %1., %2." - [(set_attr "type" "neon_abd")] -@@ -778,7 +812,16 @@ - ;; UABAL tmp.8h, op1.16b, op2.16b - ;; UADALP op3.4s, tmp.8h - ;; MOV op0, op3 // should be eliminated in later passes. --;; The signed version just uses the signed variants of the above instructions. -+;; -+;; For TARGET_DOTPROD we do: -+;; MOV tmp1.16b, #1 // Can be CSE'd and hoisted out of loops. -+;; UABD tmp2.16b, op1.16b, op2.16b -+;; UDOT op3.4s, tmp2.16b, tmp1.16b -+;; MOV op0, op3 // RA will tie the operands of UDOT appropriately. -+;; -+;; The signed version just uses the signed variants of the above instructions -+;; but for TARGET_DOTPROD still emits a UDOT as the absolute difference is -+;; unsigned. - - (define_expand "sadv16qi" - [(use (match_operand:V4SI 0 "register_operand")) -@@ -787,6 +830,15 @@ - (use (match_operand:V4SI 3 "register_operand"))] - "TARGET_SIMD" - { -+ if (TARGET_DOTPROD) -+ { -+ rtx ones = force_reg (V16QImode, CONST1_RTX (V16QImode)); -+ rtx abd = gen_reg_rtx (V16QImode); -+ emit_insn (gen_aarch64_abdv16qi_3 (abd, operands[1], operands[2])); -+ emit_insn (gen_aarch64_udotv16qi (operands[0], operands[3], -+ abd, ones)); -+ DONE; -+ } - rtx reduc = gen_reg_rtx (V8HImode); - emit_insn (gen_aarch64_abdl2v16qi_3 (reduc, operands[1], - operands[2])); -@@ -949,6 +1001,21 @@ - [(set_attr "type" "neon_ins")] - ) - -+(define_expand "signbit2" -+ [(use (match_operand: 0 "register_operand")) -+ (use (match_operand:VDQSF 1 "register_operand"))] -+ "TARGET_SIMD" -+{ -+ int shift_amount = GET_MODE_UNIT_BITSIZE (mode) - 1; -+ rtx shift_vector = aarch64_simd_gen_const_vector_dup (mode, -+ shift_amount); -+ operands[1] = lowpart_subreg (mode, operands[1], mode); -+ -+ emit_insn (gen_aarch64_simd_lshr (operands[0], operands[1], -+ shift_vector)); -+ DONE; -+}) -+ - (define_insn "aarch64_simd_lshr" - [(set (match_operand:VDQ_I 0 "register_operand" "=w") - (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") -@@ -967,6 +1034,18 @@ - [(set_attr "type" "neon_shift_imm")] - ) - -+(define_insn "*aarch64_simd_sra" -+ [(set (match_operand:VDQ_I 0 "register_operand" "=w") -+ (plus:VDQ_I -+ (SHIFTRT:VDQ_I -+ (match_operand:VDQ_I 1 "register_operand" "w") -+ (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")) -+ (match_operand:VDQ_I 3 "register_operand" "0")))] -+ "TARGET_SIMD" -+ "sra\t%0., %1., %2" -+ [(set_attr "type" "neon_shift_acc")] -+) -+ - (define_insn "aarch64_simd_imm_shl" - [(set (match_operand:VDQ_I 0 "register_operand" "=w") - (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") -@@ -1006,9 +1085,9 @@ - ) - - (define_expand "ashl3" -- [(match_operand:VDQ_I 0 "register_operand" "") -- (match_operand:VDQ_I 1 "register_operand" "") -- (match_operand:SI 2 "general_operand" "")] -+ [(match_operand:VDQ_I 0 "register_operand") -+ (match_operand:VDQ_I 1 "register_operand") -+ (match_operand:SI 2 "general_operand")] - "TARGET_SIMD" - { - int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT; -@@ -1053,9 +1132,9 @@ - ) - - (define_expand "lshr3" -- [(match_operand:VDQ_I 0 "register_operand" "") -- (match_operand:VDQ_I 1 "register_operand" "") -- (match_operand:SI 2 "general_operand" "")] -+ [(match_operand:VDQ_I 0 "register_operand") -+ (match_operand:VDQ_I 1 "register_operand") -+ (match_operand:SI 2 "general_operand")] - "TARGET_SIMD" - { - int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT; -@@ -1100,9 +1179,9 @@ - ) - - (define_expand "ashr3" -- [(match_operand:VDQ_I 0 "register_operand" "") -- (match_operand:VDQ_I 1 "register_operand" "") -- (match_operand:SI 2 "general_operand" "")] -+ [(match_operand:VDQ_I 0 "register_operand") -+ (match_operand:VDQ_I 1 "register_operand") -+ (match_operand:SI 2 "general_operand")] - "TARGET_SIMD" - { - int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT; -@@ -1147,9 +1226,9 @@ - ) - - (define_expand "vashl3" -- [(match_operand:VDQ_I 0 "register_operand" "") -- (match_operand:VDQ_I 1 "register_operand" "") -- (match_operand:VDQ_I 2 "register_operand" "")] -+ [(match_operand:VDQ_I 0 "register_operand") -+ (match_operand:VDQ_I 1 "register_operand") -+ (match_operand:VDQ_I 2 "register_operand")] - "TARGET_SIMD" - { - emit_insn (gen_aarch64_simd_reg_sshl (operands[0], operands[1], -@@ -1161,9 +1240,9 @@ - ;; Negating individual lanes most certainly offsets the - ;; gain from vectorization. - (define_expand "vashr3" -- [(match_operand:VDQ_BHSI 0 "register_operand" "") -- (match_operand:VDQ_BHSI 1 "register_operand" "") -- (match_operand:VDQ_BHSI 2 "register_operand" "")] -+ [(match_operand:VDQ_BHSI 0 "register_operand") -+ (match_operand:VDQ_BHSI 1 "register_operand") -+ (match_operand:VDQ_BHSI 2 "register_operand")] - "TARGET_SIMD" - { - rtx neg = gen_reg_rtx (mode); -@@ -1175,9 +1254,9 @@ - - ;; DI vector shift - (define_expand "aarch64_ashr_simddi" -- [(match_operand:DI 0 "register_operand" "=w") -- (match_operand:DI 1 "register_operand" "w") -- (match_operand:SI 2 "aarch64_shift_imm64_di" "")] -+ [(match_operand:DI 0 "register_operand") -+ (match_operand:DI 1 "register_operand") -+ (match_operand:SI 2 "aarch64_shift_imm64_di")] - "TARGET_SIMD" - { - /* An arithmetic shift right by 64 fills the result with copies of the sign -@@ -1191,9 +1270,9 @@ - ) - - (define_expand "vlshr3" -- [(match_operand:VDQ_BHSI 0 "register_operand" "") -- (match_operand:VDQ_BHSI 1 "register_operand" "") -- (match_operand:VDQ_BHSI 2 "register_operand" "")] -+ [(match_operand:VDQ_BHSI 0 "register_operand") -+ (match_operand:VDQ_BHSI 1 "register_operand") -+ (match_operand:VDQ_BHSI 2 "register_operand")] - "TARGET_SIMD" - { - rtx neg = gen_reg_rtx (mode); -@@ -1204,9 +1283,9 @@ - }) - - (define_expand "aarch64_lshr_simddi" -- [(match_operand:DI 0 "register_operand" "=w") -- (match_operand:DI 1 "register_operand" "w") -- (match_operand:SI 2 "aarch64_shift_imm64_di" "")] -+ [(match_operand:DI 0 "register_operand") -+ (match_operand:DI 1 "register_operand") -+ (match_operand:SI 2 "aarch64_shift_imm64_di")] - "TARGET_SIMD" - { - if (INTVAL (operands[2]) == 64) -@@ -1234,9 +1313,9 @@ - ) - - (define_expand "vec_set" -- [(match_operand:VALL_F16 0 "register_operand" "+w") -- (match_operand: 1 "register_operand" "w") -- (match_operand:SI 2 "immediate_operand" "")] -+ [(match_operand:VALL_F16 0 "register_operand") -+ (match_operand: 1 "register_operand") -+ (match_operand:SI 2 "immediate_operand")] - "TARGET_SIMD" - { - HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]); -@@ -1375,9 +1454,9 @@ - ) - - (define_expand "v2di3" -- [(set (match_operand:V2DI 0 "register_operand" "") -- (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "") -- (match_operand:V2DI 2 "register_operand" "")))] -+ [(set (match_operand:V2DI 0 "register_operand") -+ (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand") -+ (match_operand:V2DI 2 "register_operand")))] - "TARGET_SIMD" - { - enum rtx_code cmp_operator; -@@ -1440,8 +1519,8 @@ - ;; On big-endian this is { zeroes, operand } - - (define_insn "move_lo_quad_internal_" -- [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w") -- (vec_concat:VQ_NO2E -+ [(set (match_operand:VQMOV_NO2E 0 "register_operand" "=w,w,w") -+ (vec_concat:VQMOV_NO2E - (match_operand: 1 "register_operand" "w,r,r") - (vec_duplicate: (const_int 0))))] - "TARGET_SIMD && !BYTES_BIG_ENDIAN" -@@ -1470,8 +1549,8 @@ - ) - - (define_insn "move_lo_quad_internal_be_" -- [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w") -- (vec_concat:VQ_NO2E -+ [(set (match_operand:VQMOV_NO2E 0 "register_operand" "=w,w,w") -+ (vec_concat:VQMOV_NO2E - (vec_duplicate: (const_int 0)) - (match_operand: 1 "register_operand" "w,r,r")))] - "TARGET_SIMD && BYTES_BIG_ENDIAN" -@@ -1500,8 +1579,8 @@ - ) - - (define_expand "move_lo_quad_" -- [(match_operand:VQ 0 "register_operand") -- (match_operand:VQ 1 "register_operand")] -+ [(match_operand:VQMOV 0 "register_operand") -+ (match_operand:VQMOV 1 "register_operand")] - "TARGET_SIMD" - { - if (BYTES_BIG_ENDIAN) -@@ -1518,11 +1597,11 @@ - ;; For big-endian this is { operand1, operand2 } - - (define_insn "aarch64_simd_move_hi_quad_" -- [(set (match_operand:VQ 0 "register_operand" "+w,w") -- (vec_concat:VQ -+ [(set (match_operand:VQMOV 0 "register_operand" "+w,w") -+ (vec_concat:VQMOV - (vec_select: - (match_dup 0) -- (match_operand:VQ 2 "vect_par_cnst_lo_half" "")) -+ (match_operand:VQMOV 2 "vect_par_cnst_lo_half" "")) - (match_operand: 1 "register_operand" "w,r")))] - "TARGET_SIMD && !BYTES_BIG_ENDIAN" - "@ -@@ -1532,12 +1611,12 @@ - ) - - (define_insn "aarch64_simd_move_hi_quad_be_" -- [(set (match_operand:VQ 0 "register_operand" "+w,w") -- (vec_concat:VQ -+ [(set (match_operand:VQMOV 0 "register_operand" "+w,w") -+ (vec_concat:VQMOV - (match_operand: 1 "register_operand" "w,r") - (vec_select: - (match_dup 0) -- (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))] -+ (match_operand:VQMOV 2 "vect_par_cnst_lo_half" ""))))] - "TARGET_SIMD && BYTES_BIG_ENDIAN" - "@ - ins\\t%0.d[1], %1.d[0] -@@ -1546,8 +1625,8 @@ - ) - - (define_expand "move_hi_quad_" -- [(match_operand:VQ 0 "register_operand" "") -- (match_operand: 1 "register_operand" "")] -+ [(match_operand:VQMOV 0 "register_operand") -+ (match_operand: 1 "register_operand")] - "TARGET_SIMD" - { - rtx p = aarch64_simd_vect_par_cnst_half (mode, , false); -@@ -1571,10 +1650,122 @@ - [(set_attr "type" "neon_shift_imm_narrow_q")] - ) - -+(define_insn "aarch64_bfdot" -+ [(set (match_operand:VDQSF 0 "register_operand" "=w") -+ (plus:VDQSF -+ (unspec:VDQSF -+ [(match_operand: 2 "register_operand" "w") -+ (match_operand: 3 "register_operand" "w")] -+ UNSPEC_BFDOT) -+ (match_operand:VDQSF 1 "register_operand" "0")))] -+ "TARGET_BF16_SIMD" -+ "bfdot\t%0., %2., %3." -+ [(set_attr "type" "neon_dot")] -+) -+ -+(define_insn "aarch64_bfdot_lane" -+ [(set (match_operand:VDQSF 0 "register_operand" "=w") -+ (plus:VDQSF -+ (unspec:VDQSF -+ [(match_operand: 2 "register_operand" "w") -+ (match_operand:VBF 3 "register_operand" "w") -+ (match_operand:SI 4 "const_int_operand" "n")] -+ UNSPEC_BFDOT) -+ (match_operand:VDQSF 1 "register_operand" "0")))] -+ "TARGET_BF16_SIMD" -+{ -+ int nunits = GET_MODE_NUNITS (mode).to_constant (); -+ int lane = INTVAL (operands[4]); -+ operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 2, lane), SImode); -+ return "bfdot\t%0., %2., %3.2h[%4]"; -+} -+ [(set_attr "type" "neon_dot")] -+) -+ -+;; bfmmla -+(define_insn "aarch64_bfmmlaqv4sf" -+ [(set (match_operand:V4SF 0 "register_operand" "=w") -+ (plus:V4SF (match_operand:V4SF 1 "register_operand" "0") -+ (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w") -+ (match_operand:V8BF 3 "register_operand" "w")] -+ UNSPEC_BFMMLA)))] -+ "TARGET_BF16_SIMD" -+ "bfmmla\\t%0.4s, %2.8h, %3.8h" -+ [(set_attr "type" "neon_fp_mla_s_q")] -+) -+ -+;; bfmlal -+(define_insn "aarch64_bfmlalv4sf" -+ [(set (match_operand:V4SF 0 "register_operand" "=w") -+ (plus: V4SF (match_operand:V4SF 1 "register_operand" "0") -+ (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w") -+ (match_operand:V8BF 3 "register_operand" "w")] -+ BF_MLA)))] -+ "TARGET_BF16_SIMD" -+ "bfmlal\\t%0.4s, %2.8h, %3.8h" -+ [(set_attr "type" "neon_fp_mla_s_q")] -+) -+ -+(define_insn "aarch64_bfmlal_lanev4sf" -+ [(set (match_operand:V4SF 0 "register_operand" "=w") -+ (plus: V4SF (match_operand:V4SF 1 "register_operand" "0") -+ (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w") -+ (match_operand:VBF 3 "register_operand" "w") -+ (match_operand:SI 4 "const_int_operand" "n")] -+ BF_MLA)))] -+ "TARGET_BF16_SIMD" -+{ -+ operands[4] = aarch64_endian_lane_rtx (mode, INTVAL (operands[4])); -+ return "bfmlal\\t%0.4s, %2.8h, %3.h[%4]"; -+} -+ [(set_attr "type" "neon_fp_mla_s_scalar_q")] -+) -+ -+;; 8-bit integer matrix multiply-accumulate -+(define_insn "aarch64_simd_mmlav16qi" -+ [(set (match_operand:V4SI 0 "register_operand" "=w") -+ (plus:V4SI -+ (unspec:V4SI [(match_operand:V16QI 2 "register_operand" "w") -+ (match_operand:V16QI 3 "register_operand" "w")] MATMUL) -+ (match_operand:V4SI 1 "register_operand" "0")))] -+ "TARGET_I8MM" -+ "mmla\\t%0.4s, %2.16b, %3.16b" -+ [(set_attr "type" "neon_mla_s_q")] -+) -+ -+;; bfcvtn -+(define_insn "aarch64_bfcvtn" -+ [(set (match_operand:V4SF_TO_BF 0 "register_operand" "=w") -+ (unspec:V4SF_TO_BF [(match_operand:V4SF 1 "register_operand" "w")] -+ UNSPEC_BFCVTN))] -+ "TARGET_BF16_SIMD" -+ "bfcvtn\\t%0.4h, %1.4s" -+ [(set_attr "type" "neon_fp_cvt_narrow_s_q")] -+) -+ -+(define_insn "aarch64_bfcvtn2v8bf" -+ [(set (match_operand:V8BF 0 "register_operand" "=w") -+ (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "0") -+ (match_operand:V4SF 2 "register_operand" "w")] -+ UNSPEC_BFCVTN2))] -+ "TARGET_BF16_SIMD" -+ "bfcvtn2\\t%0.8h, %2.4s" -+ [(set_attr "type" "neon_fp_cvt_narrow_s_q")] -+) -+ -+(define_insn "aarch64_bfcvtbf" -+ [(set (match_operand:BF 0 "register_operand" "=w") -+ (unspec:BF [(match_operand:SF 1 "register_operand" "w")] -+ UNSPEC_BFCVT))] -+ "TARGET_BF16_FP" -+ "bfcvt\\t%h0, %s1" -+ [(set_attr "type" "f_cvt")] -+) -+ - (define_expand "vec_pack_trunc_" -- [(match_operand: 0 "register_operand" "") -- (match_operand:VDN 1 "register_operand" "") -- (match_operand:VDN 2 "register_operand" "")] -+ [(match_operand: 0 "register_operand") -+ (match_operand:VDN 1 "register_operand") -+ (match_operand:VDN 2 "register_operand")] - "TARGET_SIMD" - { - rtx tempreg = gen_reg_rtx (mode); -@@ -1630,7 +1821,7 @@ - ) - - (define_expand "vec_unpack_hi_" -- [(match_operand: 0 "register_operand" "") -+ [(match_operand: 0 "register_operand") - (ANY_EXTEND: (match_operand:VQW 1 "register_operand"))] - "TARGET_SIMD" - { -@@ -1642,8 +1833,8 @@ - ) - - (define_expand "vec_unpack_lo_" -- [(match_operand: 0 "register_operand" "") -- (ANY_EXTEND: (match_operand:VQW 1 "register_operand" ""))] -+ [(match_operand: 0 "register_operand") -+ (ANY_EXTEND: (match_operand:VQW 1 "register_operand"))] - "TARGET_SIMD" - { - rtx p = aarch64_simd_vect_par_cnst_half (mode, , false); -@@ -1761,9 +1952,9 @@ - ) - - (define_expand "vec_widen_mult_lo_" -- [(match_operand: 0 "register_operand" "") -- (ANY_EXTEND: (match_operand:VQW 1 "register_operand" "")) -- (ANY_EXTEND: (match_operand:VQW 2 "register_operand" ""))] -+ [(match_operand: 0 "register_operand") -+ (ANY_EXTEND: (match_operand:VQW 1 "register_operand")) -+ (ANY_EXTEND: (match_operand:VQW 2 "register_operand"))] - "TARGET_SIMD" - { - rtx p = aarch64_simd_vect_par_cnst_half (mode, , false); -@@ -1788,9 +1979,9 @@ - ) - - (define_expand "vec_widen_mult_hi_" -- [(match_operand: 0 "register_operand" "") -- (ANY_EXTEND: (match_operand:VQW 1 "register_operand" "")) -- (ANY_EXTEND: (match_operand:VQW 2 "register_operand" ""))] -+ [(match_operand: 0 "register_operand") -+ (ANY_EXTEND: (match_operand:VQW 1 "register_operand")) -+ (ANY_EXTEND: (match_operand:VQW 2 "register_operand"))] - "TARGET_SIMD" - { - rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); -@@ -1855,9 +2046,9 @@ - ) - - (define_expand "div3" -- [(set (match_operand:VHSDF 0 "register_operand" "=w") -- (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w") -- (match_operand:VHSDF 2 "register_operand" "w")))] -+ [(set (match_operand:VHSDF 0 "register_operand") -+ (div:VHSDF (match_operand:VHSDF 1 "register_operand") -+ (match_operand:VHSDF 2 "register_operand")))] - "TARGET_SIMD" - { - if (aarch64_emit_approx_div (operands[0], operands[1], operands[2])) -@@ -2192,8 +2383,8 @@ - ;; other big-endian patterns their behavior is as required. - - (define_expand "vec_unpacks_lo_" -- [(match_operand: 0 "register_operand" "") -- (match_operand:VQ_HSF 1 "register_operand" "")] -+ [(match_operand: 0 "register_operand") -+ (match_operand:VQ_HSF 1 "register_operand")] - "TARGET_SIMD" - { - rtx p = aarch64_simd_vect_par_cnst_half (mode, , false); -@@ -2215,8 +2406,8 @@ - ) - - (define_expand "vec_unpacks_hi_" -- [(match_operand: 0 "register_operand" "") -- (match_operand:VQ_HSF 1 "register_operand" "")] -+ [(match_operand: 0 "register_operand") -+ (match_operand:VQ_HSF 1 "register_operand")] - "TARGET_SIMD" - { - rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); -@@ -2268,9 +2459,9 @@ - ) - - (define_expand "aarch64_float_truncate_hi_" -- [(match_operand: 0 "register_operand" "=w") -- (match_operand:VDF 1 "register_operand" "0") -- (match_operand: 2 "register_operand" "w")] -+ [(match_operand: 0 "register_operand") -+ (match_operand:VDF 1 "register_operand") -+ (match_operand: 2 "register_operand")] - "TARGET_SIMD" - { - rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN -@@ -2363,8 +2554,8 @@ - ;; 'across lanes' add. - - (define_expand "reduc_plus_scal_" -- [(match_operand: 0 "register_operand" "=w") -- (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")] -+ [(match_operand: 0 "register_operand") -+ (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand")] - UNSPEC_ADDV)] - "TARGET_SIMD" - { -@@ -3116,30 +3307,31 @@ - (define_insn "*aarch64_get_lane_extend" - [(set (match_operand:GPI 0 "register_operand" "=r") - (sign_extend:GPI -- (vec_select: -+ (vec_select: - (match_operand:VDQQH 1 "register_operand" "w") - (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] - "TARGET_SIMD" - { -- operands[2] = aarch64_endian_lane_rtx (mode, INTVAL (operands[2])); -+ operands[2] = aarch64_endian_lane_rtx (mode, -+ INTVAL (operands[2])); - return "smov\\t%0, %1.[%2]"; - } -- [(set_attr "type" "neon_to_gp")] --) -- --(define_insn "*aarch64_get_lane_zero_extend" -- [(set (match_operand:GPI 0 "register_operand" "=r") -- (zero_extend:GPI -- (vec_select: -- (match_operand:VDQQH 1 "register_operand" "w") -- (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] -- "TARGET_SIMD" -- { -- operands[2] = aarch64_endian_lane_rtx (mode, -- INTVAL (operands[2])); -- return "umov\\t%w0, %1.[%2]"; -- } -- [(set_attr "type" "neon_to_gp")] -+ [(set_attr "type" "neon_to_gp")] -+) -+ -+(define_insn "*aarch64_get_lane_zero_extend" -+ [(set (match_operand:GPI 0 "register_operand" "=r") -+ (zero_extend:GPI -+ (vec_select: -+ (match_operand:VDQQH 1 "register_operand" "w") -+ (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] -+ "TARGET_SIMD" -+ { -+ operands[2] = aarch64_endian_lane_rtx (mode, -+ INTVAL (operands[2])); -+ return "umov\\t%w0, %1.[%2]"; -+ } -+ [(set_attr "type" "neon_to_gp")] - ) - - ;; Lane extraction of a value, neither sign nor zero extension -@@ -3280,9 +3472,9 @@ - - - (define_expand "aarch64_saddl2" -- [(match_operand: 0 "register_operand" "=w") -- (match_operand:VQW 1 "register_operand" "w") -- (match_operand:VQW 2 "register_operand" "w")] -+ [(match_operand: 0 "register_operand") -+ (match_operand:VQW 1 "register_operand") -+ (match_operand:VQW 2 "register_operand")] - "TARGET_SIMD" - { - rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); -@@ -3292,9 +3484,9 @@ - }) - - (define_expand "aarch64_uaddl2" -- [(match_operand: 0 "register_operand" "=w") -- (match_operand:VQW 1 "register_operand" "w") -- (match_operand:VQW 2 "register_operand" "w")] -+ [(match_operand: 0 "register_operand") -+ (match_operand:VQW 1 "register_operand") -+ (match_operand:VQW 2 "register_operand")] - "TARGET_SIMD" - { - rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); -@@ -3304,9 +3496,9 @@ - }) - - (define_expand "aarch64_ssubl2" -- [(match_operand: 0 "register_operand" "=w") -- (match_operand:VQW 1 "register_operand" "w") -- (match_operand:VQW 2 "register_operand" "w")] -+ [(match_operand: 0 "register_operand") -+ (match_operand:VQW 1 "register_operand") -+ (match_operand:VQW 2 "register_operand")] - "TARGET_SIMD" - { - rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); -@@ -3316,9 +3508,9 @@ - }) - - (define_expand "aarch64_usubl2" -- [(match_operand: 0 "register_operand" "=w") -- (match_operand:VQW 1 "register_operand" "w") -- (match_operand:VQW 2 "register_operand" "w")] -+ [(match_operand: 0 "register_operand") -+ (match_operand:VQW 1 "register_operand") -+ (match_operand:VQW 2 "register_operand")] - "TARGET_SIMD" - { - rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); -@@ -3341,10 +3533,10 @@ - ;; w. - - (define_expand "widen_ssum3" -- [(set (match_operand: 0 "register_operand" "") -+ [(set (match_operand: 0 "register_operand") - (plus: (sign_extend: -- (match_operand:VQW 1 "register_operand" "")) -- (match_operand: 2 "register_operand" "")))] -+ (match_operand:VQW 1 "register_operand")) -+ (match_operand: 2 "register_operand")))] - "TARGET_SIMD" - { - rtx p = aarch64_simd_vect_par_cnst_half (mode, , false); -@@ -3358,10 +3550,10 @@ - ) - - (define_expand "widen_ssum3" -- [(set (match_operand: 0 "register_operand" "") -+ [(set (match_operand: 0 "register_operand") - (plus: (sign_extend: -- (match_operand:VD_BHSI 1 "register_operand" "")) -- (match_operand: 2 "register_operand" "")))] -+ (match_operand:VD_BHSI 1 "register_operand")) -+ (match_operand: 2 "register_operand")))] - "TARGET_SIMD" - { - emit_insn (gen_aarch64_saddw (operands[0], operands[2], operands[1])); -@@ -3369,10 +3561,10 @@ - }) - - (define_expand "widen_usum3" -- [(set (match_operand: 0 "register_operand" "") -+ [(set (match_operand: 0 "register_operand") - (plus: (zero_extend: -- (match_operand:VQW 1 "register_operand" "")) -- (match_operand: 2 "register_operand" "")))] -+ (match_operand:VQW 1 "register_operand")) -+ (match_operand: 2 "register_operand")))] - "TARGET_SIMD" - { - rtx p = aarch64_simd_vect_par_cnst_half (mode, , false); -@@ -3386,10 +3578,10 @@ - ) - - (define_expand "widen_usum3" -- [(set (match_operand: 0 "register_operand" "") -+ [(set (match_operand: 0 "register_operand") - (plus: (zero_extend: -- (match_operand:VD_BHSI 1 "register_operand" "")) -- (match_operand: 2 "register_operand" "")))] -+ (match_operand:VD_BHSI 1 "register_operand")) -+ (match_operand: 2 "register_operand")))] - "TARGET_SIMD" - { - emit_insn (gen_aarch64_uaddw (operands[0], operands[2], operands[1])); -@@ -3467,9 +3659,9 @@ - ) - - (define_expand "aarch64_saddw2" -- [(match_operand: 0 "register_operand" "=w") -- (match_operand: 1 "register_operand" "w") -- (match_operand:VQW 2 "register_operand" "w")] -+ [(match_operand: 0 "register_operand") -+ (match_operand: 1 "register_operand") -+ (match_operand:VQW 2 "register_operand")] - "TARGET_SIMD" - { - rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); -@@ -3479,9 +3671,9 @@ - }) - - (define_expand "aarch64_uaddw2" -- [(match_operand: 0 "register_operand" "=w") -- (match_operand: 1 "register_operand" "w") -- (match_operand:VQW 2 "register_operand" "w")] -+ [(match_operand: 0 "register_operand") -+ (match_operand: 1 "register_operand") -+ (match_operand:VQW 2 "register_operand")] - "TARGET_SIMD" - { - rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); -@@ -3492,9 +3684,9 @@ - - - (define_expand "aarch64_ssubw2" -- [(match_operand: 0 "register_operand" "=w") -- (match_operand: 1 "register_operand" "w") -- (match_operand:VQW 2 "register_operand" "w")] -+ [(match_operand: 0 "register_operand") -+ (match_operand: 1 "register_operand") -+ (match_operand:VQW 2 "register_operand")] - "TARGET_SIMD" - { - rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); -@@ -3504,9 +3696,9 @@ - }) - - (define_expand "aarch64_usubw2" -- [(match_operand: 0 "register_operand" "=w") -- (match_operand: 1 "register_operand" "w") -- (match_operand:VQW 2 "register_operand" "w")] -+ [(match_operand: 0 "register_operand") -+ (match_operand: 1 "register_operand") -+ (match_operand:VQW 2 "register_operand")] - "TARGET_SIMD" - { - rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); -@@ -4039,10 +4231,10 @@ - ) - - (define_expand "aarch64_sqdmlal2" -- [(match_operand: 0 "register_operand" "=w") -- (match_operand: 1 "register_operand" "w") -- (match_operand:VQ_HSI 2 "register_operand" "w") -- (match_operand:VQ_HSI 3 "register_operand" "w")] -+ [(match_operand: 0 "register_operand") -+ (match_operand: 1 "register_operand") -+ (match_operand:VQ_HSI 2 "register_operand") -+ (match_operand:VQ_HSI 3 "register_operand")] - "TARGET_SIMD" - { - rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); -@@ -4052,10 +4244,10 @@ - }) - - (define_expand "aarch64_sqdmlsl2" -- [(match_operand: 0 "register_operand" "=w") -- (match_operand: 1 "register_operand" "w") -- (match_operand:VQ_HSI 2 "register_operand" "w") -- (match_operand:VQ_HSI 3 "register_operand" "w")] -+ [(match_operand: 0 "register_operand") -+ (match_operand: 1 "register_operand") -+ (match_operand:VQ_HSI 2 "register_operand") -+ (match_operand:VQ_HSI 3 "register_operand")] - "TARGET_SIMD" - { - rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); -@@ -4119,11 +4311,11 @@ - ) - - (define_expand "aarch64_sqdmlal2_lane" -- [(match_operand: 0 "register_operand" "=w") -- (match_operand: 1 "register_operand" "w") -- (match_operand:VQ_HSI 2 "register_operand" "w") -- (match_operand: 3 "register_operand" "") -- (match_operand:SI 4 "immediate_operand" "i")] -+ [(match_operand: 0 "register_operand") -+ (match_operand: 1 "register_operand") -+ (match_operand:VQ_HSI 2 "register_operand") -+ (match_operand: 3 "register_operand") -+ (match_operand:SI 4 "immediate_operand")] - "TARGET_SIMD" - { - rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); -@@ -4134,11 +4326,11 @@ - }) - - (define_expand "aarch64_sqdmlal2_laneq" -- [(match_operand: 0 "register_operand" "=w") -- (match_operand: 1 "register_operand" "w") -- (match_operand:VQ_HSI 2 "register_operand" "w") -- (match_operand: 3 "register_operand" "") -- (match_operand:SI 4 "immediate_operand" "i")] -+ [(match_operand: 0 "register_operand") -+ (match_operand: 1 "register_operand") -+ (match_operand:VQ_HSI 2 "register_operand") -+ (match_operand: 3 "register_operand") -+ (match_operand:SI 4 "immediate_operand")] - "TARGET_SIMD" - { - rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); -@@ -4149,11 +4341,11 @@ - }) - - (define_expand "aarch64_sqdmlsl2_lane" -- [(match_operand: 0 "register_operand" "=w") -- (match_operand: 1 "register_operand" "w") -- (match_operand:VQ_HSI 2 "register_operand" "w") -- (match_operand: 3 "register_operand" "") -- (match_operand:SI 4 "immediate_operand" "i")] -+ [(match_operand: 0 "register_operand") -+ (match_operand: 1 "register_operand") -+ (match_operand:VQ_HSI 2 "register_operand") -+ (match_operand: 3 "register_operand") -+ (match_operand:SI 4 "immediate_operand")] - "TARGET_SIMD" - { - rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); -@@ -4164,11 +4356,11 @@ - }) - - (define_expand "aarch64_sqdmlsl2_laneq" -- [(match_operand: 0 "register_operand" "=w") -- (match_operand: 1 "register_operand" "w") -- (match_operand:VQ_HSI 2 "register_operand" "w") -- (match_operand: 3 "register_operand" "") -- (match_operand:SI 4 "immediate_operand" "i")] -+ [(match_operand: 0 "register_operand") -+ (match_operand: 1 "register_operand") -+ (match_operand:VQ_HSI 2 "register_operand") -+ (match_operand: 3 "register_operand") -+ (match_operand:SI 4 "immediate_operand")] - "TARGET_SIMD" - { - rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); -@@ -4198,10 +4390,10 @@ - ) - - (define_expand "aarch64_sqdmlal2_n" -- [(match_operand: 0 "register_operand" "=w") -- (match_operand: 1 "register_operand" "w") -- (match_operand:VQ_HSI 2 "register_operand" "w") -- (match_operand: 3 "register_operand" "w")] -+ [(match_operand: 0 "register_operand") -+ (match_operand: 1 "register_operand") -+ (match_operand:VQ_HSI 2 "register_operand") -+ (match_operand: 3 "register_operand")] - "TARGET_SIMD" - { - rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); -@@ -4212,10 +4404,10 @@ - }) - - (define_expand "aarch64_sqdmlsl2_n" -- [(match_operand: 0 "register_operand" "=w") -- (match_operand: 1 "register_operand" "w") -- (match_operand:VQ_HSI 2 "register_operand" "w") -- (match_operand: 3 "register_operand" "w")] -+ [(match_operand: 0 "register_operand") -+ (match_operand: 1 "register_operand") -+ (match_operand:VQ_HSI 2 "register_operand") -+ (match_operand: 3 "register_operand")] - "TARGET_SIMD" - { - rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); -@@ -4367,9 +4559,9 @@ - ) - - (define_expand "aarch64_sqdmull2" -- [(match_operand: 0 "register_operand" "=w") -- (match_operand:VQ_HSI 1 "register_operand" "w") -- (match_operand:VQ_HSI 2 "register_operand" "w")] -+ [(match_operand: 0 "register_operand") -+ (match_operand:VQ_HSI 1 "register_operand") -+ (match_operand:VQ_HSI 2 "register_operand")] - "TARGET_SIMD" - { - rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); -@@ -4427,10 +4619,10 @@ - ) - - (define_expand "aarch64_sqdmull2_lane" -- [(match_operand: 0 "register_operand" "=w") -- (match_operand:VQ_HSI 1 "register_operand" "w") -- (match_operand: 2 "register_operand" "") -- (match_operand:SI 3 "immediate_operand" "i")] -+ [(match_operand: 0 "register_operand") -+ (match_operand:VQ_HSI 1 "register_operand") -+ (match_operand: 2 "register_operand") -+ (match_operand:SI 3 "immediate_operand")] - "TARGET_SIMD" - { - rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); -@@ -4441,10 +4633,10 @@ - }) - - (define_expand "aarch64_sqdmull2_laneq" -- [(match_operand: 0 "register_operand" "=w") -- (match_operand:VQ_HSI 1 "register_operand" "w") -- (match_operand: 2 "register_operand" "") -- (match_operand:SI 3 "immediate_operand" "i")] -+ [(match_operand: 0 "register_operand") -+ (match_operand:VQ_HSI 1 "register_operand") -+ (match_operand: 2 "register_operand") -+ (match_operand:SI 3 "immediate_operand")] - "TARGET_SIMD" - { - rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); -@@ -4475,9 +4667,9 @@ - ) - - (define_expand "aarch64_sqdmull2_n" -- [(match_operand: 0 "register_operand" "=w") -- (match_operand:VQ_HSI 1 "register_operand" "w") -- (match_operand: 2 "register_operand" "w")] -+ [(match_operand: 0 "register_operand") -+ (match_operand:VQ_HSI 1 "register_operand") -+ (match_operand: 2 "register_operand")] - "TARGET_SIMD" - { - rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); -@@ -4879,8 +5071,8 @@ - ;; sqrt - - (define_expand "sqrt2" -- [(set (match_operand:VHSDF 0 "register_operand" "=w") -- (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))] -+ [(set (match_operand:VHSDF 0 "register_operand") -+ (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand")))] - "TARGET_SIMD" - { - if (aarch64_emit_approx_sqrt (operands[0], operands[1], false)) -@@ -4933,8 +5125,8 @@ - ) - - (define_expand "vec_load_lanesoi" -- [(set (match_operand:OI 0 "register_operand" "=w") -- (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv") -+ [(set (match_operand:OI 0 "register_operand") -+ (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] - UNSPEC_LD2))] - "TARGET_SIMD" -@@ -4977,8 +5169,8 @@ - ) - - (define_expand "vec_store_lanesoi" -- [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv") -- (unspec:OI [(match_operand:OI 1 "register_operand" "w") -+ [(set (match_operand:OI 0 "aarch64_simd_struct_operand") -+ (unspec:OI [(match_operand:OI 1 "register_operand") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] - UNSPEC_ST2))] - "TARGET_SIMD" -@@ -5031,8 +5223,8 @@ - ) - - (define_expand "vec_load_lanesci" -- [(set (match_operand:CI 0 "register_operand" "=w") -- (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv") -+ [(set (match_operand:CI 0 "register_operand") -+ (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] - UNSPEC_LD3))] - "TARGET_SIMD" -@@ -5075,8 +5267,8 @@ - ) - - (define_expand "vec_store_lanesci" -- [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv") -- (unspec:CI [(match_operand:CI 1 "register_operand" "w") -+ [(set (match_operand:CI 0 "aarch64_simd_struct_operand") -+ (unspec:CI [(match_operand:CI 1 "register_operand") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] - UNSPEC_ST3))] - "TARGET_SIMD" -@@ -5129,8 +5321,8 @@ - ) - - (define_expand "vec_load_lanesxi" -- [(set (match_operand:XI 0 "register_operand" "=w") -- (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv") -+ [(set (match_operand:XI 0 "register_operand") -+ (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] - UNSPEC_LD4))] - "TARGET_SIMD" -@@ -5173,8 +5365,8 @@ - ) - - (define_expand "vec_store_lanesxi" -- [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv") -- (unspec:XI [(match_operand:XI 1 "register_operand" "w") -+ [(set (match_operand:XI 0 "aarch64_simd_struct_operand") -+ (unspec:XI [(match_operand:XI 1 "register_operand") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] - UNSPEC_ST4))] - "TARGET_SIMD" -@@ -5219,8 +5411,8 @@ - ;; Reload patterns for AdvSIMD register list operands. - - (define_expand "mov" -- [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "") -- (match_operand:VSTRUCT 1 "general_operand" ""))] -+ [(set (match_operand:VSTRUCT 0 "nonimmediate_operand") -+ (match_operand:VSTRUCT 1 "general_operand"))] - "TARGET_SIMD" - { - if (can_create_pseudo_p ()) -@@ -5232,8 +5424,8 @@ - - - (define_expand "aarch64_ld1x3" -- [(match_operand:CI 0 "register_operand" "=w") -- (match_operand:DI 1 "register_operand" "r") -+ [(match_operand:CI 0 "register_operand") -+ (match_operand:DI 1 "register_operand") - (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] - "TARGET_SIMD" - { -@@ -5252,9 +5444,31 @@ - [(set_attr "type" "neon_load1_3reg")] - ) - -+(define_expand "aarch64_ld1x4" -+ [(match_operand:XI 0 "register_operand" "=w") -+ (match_operand:DI 1 "register_operand" "r") -+ (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] -+ "TARGET_SIMD" -+{ -+ rtx mem = gen_rtx_MEM (XImode, operands[1]); -+ emit_insn (gen_aarch64_ld1_x4_ (operands[0], mem)); -+ DONE; -+}) -+ -+(define_insn "aarch64_ld1_x4_" -+ [(set (match_operand:XI 0 "register_operand" "=w") -+ (unspec:XI -+ [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv") -+ (unspec:VALLDIF [(const_int 4)] UNSPEC_VSTRUCTDUMMY)] -+ UNSPEC_LD1))] -+ "TARGET_SIMD" -+ "ld1\\t{%S0. - %V0.}, %1" -+ [(set_attr "type" "neon_load1_4reg")] -+) -+ - (define_expand "aarch64_st1x2" -- [(match_operand:DI 0 "register_operand" "") -- (match_operand:OI 1 "register_operand" "") -+ [(match_operand:DI 0 "register_operand") -+ (match_operand:OI 1 "register_operand") - (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] - "TARGET_SIMD" - { -@@ -5274,8 +5488,8 @@ - ) - - (define_expand "aarch64_st1x3" -- [(match_operand:DI 0 "register_operand" "") -- (match_operand:CI 1 "register_operand" "") -+ [(match_operand:DI 0 "register_operand") -+ (match_operand:CI 1 "register_operand") - (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] - "TARGET_SIMD" - { -@@ -5294,6 +5508,28 @@ - [(set_attr "type" "neon_store1_3reg")] - ) - -+(define_expand "aarch64_st1x4" -+ [(match_operand:DI 0 "register_operand" "") -+ (match_operand:XI 1 "register_operand" "") -+ (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] -+ "TARGET_SIMD" -+{ -+ rtx mem = gen_rtx_MEM (XImode, operands[0]); -+ emit_insn (gen_aarch64_st1_x4_ (mem, operands[1])); -+ DONE; -+}) -+ -+(define_insn "aarch64_st1_x4_" -+ [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv") -+ (unspec:XI -+ [(match_operand:XI 1 "register_operand" "w") -+ (unspec:VALLDIF [(const_int 4)] UNSPEC_VSTRUCTDUMMY)] -+ UNSPEC_ST1))] -+ "TARGET_SIMD" -+ "st1\\t{%S1. - %V1.}, %0" -+ [(set_attr "type" "neon_store1_4reg")] -+) -+ - (define_insn "*aarch64_mov" - [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w") - (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))] -@@ -5427,8 +5663,8 @@ - }) - - (define_expand "aarch64_ldr" -- [(match_operand:VSTRUCT 0 "register_operand" "=w") -- (match_operand:DI 1 "register_operand" "w") -+ [(match_operand:VSTRUCT 0 "register_operand") -+ (match_operand:DI 1 "register_operand") - (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] - "TARGET_SIMD" - { -@@ -5502,8 +5738,8 @@ - ) - - (define_expand "aarch64_ld" -- [(match_operand:VSTRUCT 0 "register_operand" "=w") -- (match_operand:DI 1 "register_operand" "r") -+ [(match_operand:VSTRUCT 0 "register_operand") -+ (match_operand:DI 1 "register_operand") - (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] - "TARGET_SIMD" - { -@@ -5530,8 +5766,8 @@ - }) - - (define_expand "aarch64_ld" -- [(match_operand:VSTRUCT 0 "register_operand" "=w") -- (match_operand:DI 1 "register_operand" "r") -+ [(match_operand:VSTRUCT 0 "register_operand") -+ (match_operand:DI 1 "register_operand") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] - "TARGET_SIMD" - { -@@ -5543,8 +5779,8 @@ - }) - - (define_expand "aarch64_ld1x2" -- [(match_operand:OI 0 "register_operand" "=w") -- (match_operand:DI 1 "register_operand" "r") -+ [(match_operand:OI 0 "register_operand") -+ (match_operand:DI 1 "register_operand") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] - "TARGET_SIMD" - { -@@ -5556,8 +5792,8 @@ - }) - - (define_expand "aarch64_ld1x2" -- [(match_operand:OI 0 "register_operand" "=w") -- (match_operand:DI 1 "register_operand" "r") -+ [(match_operand:OI 0 "register_operand") -+ (match_operand:DI 1 "register_operand") - (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] - "TARGET_SIMD" - { -@@ -5570,10 +5806,10 @@ - - - (define_expand "aarch64_ld_lane" -- [(match_operand:VSTRUCT 0 "register_operand" "=w") -- (match_operand:DI 1 "register_operand" "w") -- (match_operand:VSTRUCT 2 "register_operand" "0") -- (match_operand:SI 3 "immediate_operand" "i") -+ [(match_operand:VSTRUCT 0 "register_operand") -+ (match_operand:DI 1 "register_operand") -+ (match_operand:VSTRUCT 2 "register_operand") -+ (match_operand:SI 3 "immediate_operand") - (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] - "TARGET_SIMD" - { -@@ -5593,9 +5829,9 @@ - ;; D-register list. - - (define_expand "aarch64_get_dreg" -- [(match_operand:VDC 0 "register_operand" "=w") -- (match_operand:VSTRUCT 1 "register_operand" "w") -- (match_operand:SI 2 "immediate_operand" "i")] -+ [(match_operand:VDC 0 "register_operand") -+ (match_operand:VSTRUCT 1 "register_operand") -+ (match_operand:SI 2 "immediate_operand")] - "TARGET_SIMD" - { - int part = INTVAL (operands[2]); -@@ -5610,9 +5846,9 @@ - ;; Q-register list. - - (define_expand "aarch64_get_qreg" -- [(match_operand:VQ 0 "register_operand" "=w") -- (match_operand:VSTRUCT 1 "register_operand" "w") -- (match_operand:SI 2 "immediate_operand" "i")] -+ [(match_operand:VQ 0 "register_operand") -+ (match_operand:VSTRUCT 1 "register_operand") -+ (match_operand:SI 2 "immediate_operand")] - "TARGET_SIMD" - { - int part = INTVAL (operands[2]); -@@ -5749,13 +5985,13 @@ - ;; This instruction's pattern is generated directly by - ;; aarch64_expand_vec_perm_const, so any changes to the pattern would - ;; need corresponding changes there. --(define_insn "aarch64_" -+(define_insn "aarch64_" - [(set (match_operand:VALL_F16 0 "register_operand" "=w") - (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w") - (match_operand:VALL_F16 2 "register_operand" "w")] - PERMUTE))] - "TARGET_SIMD" -- "\\t%0., %1., %2." -+ "\\t%0., %1., %2." - [(set_attr "type" "neon_permute")] - ) - -@@ -5851,8 +6087,8 @@ - ) - - (define_expand "aarch64_st" -- [(match_operand:DI 0 "register_operand" "r") -- (match_operand:VSTRUCT 1 "register_operand" "w") -+ [(match_operand:DI 0 "register_operand") -+ (match_operand:VSTRUCT 1 "register_operand") - (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] - "TARGET_SIMD" - { -@@ -5864,8 +6100,8 @@ - }) - - (define_expand "aarch64_st" -- [(match_operand:DI 0 "register_operand" "r") -- (match_operand:VSTRUCT 1 "register_operand" "w") -+ [(match_operand:DI 0 "register_operand") -+ (match_operand:VSTRUCT 1 "register_operand") - (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] - "TARGET_SIMD" - { -@@ -5877,8 +6113,8 @@ - }) - - (define_expand "aarch64_st_lane" -- [(match_operand:DI 0 "register_operand" "r") -- (match_operand:VSTRUCT 1 "register_operand" "w") -+ [(match_operand:DI 0 "register_operand") -+ (match_operand:VSTRUCT 1 "register_operand") - (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) - (match_operand:SI 2 "immediate_operand")] - "TARGET_SIMD" -@@ -5914,10 +6150,10 @@ - ;; extend them in arm_neon.h and insert the resulting Q-regs. - - (define_expand "aarch64_set_qreg" -- [(match_operand:VSTRUCT 0 "register_operand" "+w") -- (match_operand:VSTRUCT 1 "register_operand" "0") -- (match_operand:VQ 2 "register_operand" "w") -- (match_operand:SI 3 "immediate_operand" "i")] -+ [(match_operand:VSTRUCT 0 "register_operand") -+ (match_operand:VSTRUCT 1 "register_operand") -+ (match_operand:VQ 2 "register_operand") -+ (match_operand:SI 3 "immediate_operand")] - "TARGET_SIMD" - { - int part = INTVAL (operands[3]); -@@ -5932,7 +6168,7 @@ - ;; Standard pattern name vec_init. - - (define_expand "vec_init" -- [(match_operand:VALL_F16 0 "register_operand" "") -+ [(match_operand:VALL_F16 0 "register_operand") - (match_operand 1 "" "")] - "TARGET_SIMD" - { -@@ -5941,7 +6177,7 @@ - }) - - (define_expand "vec_init" -- [(match_operand:VQ_NO2E 0 "register_operand" "") -+ [(match_operand:VQ_NO2E 0 "register_operand") - (match_operand 1 "" "")] - "TARGET_SIMD" - { -@@ -6020,9 +6256,9 @@ - ;; Standard pattern name vec_extract. - - (define_expand "vec_extract" -- [(match_operand: 0 "aarch64_simd_nonimmediate_operand" "") -- (match_operand:VALL_F16 1 "register_operand" "") -- (match_operand:SI 2 "immediate_operand" "")] -+ [(match_operand: 0 "aarch64_simd_nonimmediate_operand") -+ (match_operand:VALL_F16 1 "register_operand") -+ (match_operand:SI 2 "immediate_operand")] - "TARGET_SIMD" - { - emit_insn -@@ -6063,56 +6299,23 @@ - - (define_insn "aarch64_crypto_aesv16qi" - [(set (match_operand:V16QI 0 "register_operand" "=w") -- (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "%0") -- (match_operand:V16QI 2 "register_operand" "w")] -+ (unspec:V16QI -+ [(xor:V16QI -+ (match_operand:V16QI 1 "register_operand" "%0") -+ (match_operand:V16QI 2 "register_operand" "w"))] - CRYPTO_AES))] - "TARGET_SIMD && TARGET_AES" - "aes\\t%0.16b, %2.16b" - [(set_attr "type" "crypto_aese")] - ) - --(define_insn "*aarch64_crypto_aesv16qi_xor_combine" -- [(set (match_operand:V16QI 0 "register_operand" "=w") -- (unspec:V16QI [(xor:V16QI -- (match_operand:V16QI 1 "register_operand" "%0") -- (match_operand:V16QI 2 "register_operand" "w")) -- (match_operand:V16QI 3 "aarch64_simd_imm_zero" "")] -- CRYPTO_AES))] -- "TARGET_SIMD && TARGET_AES" -- "aes\\t%0.16b, %2.16b" -- [(set_attr "type" "crypto_aese")] --) -- --(define_insn "*aarch64_crypto_aesv16qi_xor_combine" -- [(set (match_operand:V16QI 0 "register_operand" "=w") -- (unspec:V16QI [(match_operand:V16QI 3 "aarch64_simd_imm_zero" "") -- (xor:V16QI (match_operand:V16QI 1 "register_operand" "%0") -- (match_operand:V16QI 2 "register_operand" "w"))] -- CRYPTO_AES))] -- "TARGET_SIMD && TARGET_AES" -- "aes\\t%0.16b, %2.16b" -- [(set_attr "type" "crypto_aese")] --) -- --;; When AES/AESMC fusion is enabled we want the register allocation to --;; look like: --;; AESE Vn, _ --;; AESMC Vn, Vn --;; So prefer to tie operand 1 to operand 0 when fusing. -- - (define_insn "aarch64_crypto_aesv16qi" -- [(set (match_operand:V16QI 0 "register_operand" "=w,w") -- (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")] -+ [(set (match_operand:V16QI 0 "register_operand" "=w") -+ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")] - CRYPTO_AESMC))] - "TARGET_SIMD && TARGET_AES" - "aes\\t%0.16b, %1.16b" -- [(set_attr "type" "crypto_aesmc") -- (set_attr_alternative "enabled" -- [(if_then_else (match_test -- "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)") -- (const_string "yes" ) -- (const_string "no")) -- (const_string "yes")])] -+ [(set_attr "type" "crypto_aesmc")] - ) - - ;; When AESE/AESMC fusion is enabled we really want to keep the two together -@@ -6121,12 +6324,14 @@ - ;; Mash the two together during combine. - - (define_insn "*aarch64_crypto_aese_fused" -- [(set (match_operand:V16QI 0 "register_operand" "=&w") -+ [(set (match_operand:V16QI 0 "register_operand" "=w") - (unspec:V16QI - [(unspec:V16QI -- [(match_operand:V16QI 1 "register_operand" "0") -- (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESE) -- ] UNSPEC_AESMC))] -+ [(xor:V16QI -+ (match_operand:V16QI 1 "register_operand" "%0") -+ (match_operand:V16QI 2 "register_operand" "w"))] -+ UNSPEC_AESE)] -+ UNSPEC_AESMC))] - "TARGET_SIMD && TARGET_AES - && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)" - "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b" -@@ -6140,12 +6345,14 @@ - ;; Mash the two together during combine. - - (define_insn "*aarch64_crypto_aesd_fused" -- [(set (match_operand:V16QI 0 "register_operand" "=&w") -+ [(set (match_operand:V16QI 0 "register_operand" "=w") - (unspec:V16QI - [(unspec:V16QI -- [(match_operand:V16QI 1 "register_operand" "0") -- (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESD) -- ] UNSPEC_AESIMC))] -+ [(xor:V16QI -+ (match_operand:V16QI 1 "register_operand" "%0") -+ (match_operand:V16QI 2 "register_operand" "w"))] -+ UNSPEC_AESD)] -+ UNSPEC_AESIMC))] - "TARGET_SIMD && TARGET_AES - && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)" - "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b" -@@ -6397,11 +6604,11 @@ - ;; fp16fml - - (define_expand "aarch64_fmll_low" -- [(set (match_operand:VDQSF 0 "register_operand" "=w") -+ [(set (match_operand:VDQSF 0 "register_operand") - (unspec:VDQSF -- [(match_operand:VDQSF 1 "register_operand" "0") -- (match_operand: 2 "register_operand" "w") -- (match_operand: 3 "register_operand" "w")] -+ [(match_operand:VDQSF 1 "register_operand") -+ (match_operand: 2 "register_operand") -+ (match_operand: 3 "register_operand")] - VFMLA16_LOW))] - "TARGET_F16FML" - { -@@ -6420,11 +6627,11 @@ - }) - - (define_expand "aarch64_fmll_high" -- [(set (match_operand:VDQSF 0 "register_operand" "=w") -+ [(set (match_operand:VDQSF 0 "register_operand") - (unspec:VDQSF -- [(match_operand:VDQSF 1 "register_operand" "0") -- (match_operand: 2 "register_operand" "w") -- (match_operand: 3 "register_operand" "w")] -+ [(match_operand:VDQSF 1 "register_operand") -+ (match_operand: 2 "register_operand") -+ (match_operand: 3 "register_operand")] - VFMLA16_HIGH))] - "TARGET_F16FML" - { -@@ -6510,11 +6717,11 @@ - ) - - (define_expand "aarch64_fmll_lane_lowv2sf" -- [(set (match_operand:V2SF 0 "register_operand" "") -- (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "") -- (match_operand:V4HF 2 "register_operand" "") -- (match_operand:V4HF 3 "register_operand" "") -- (match_operand:SI 4 "aarch64_imm2" "")] -+ [(set (match_operand:V2SF 0 "register_operand") -+ (unspec:V2SF [(match_operand:V2SF 1 "register_operand") -+ (match_operand:V4HF 2 "register_operand") -+ (match_operand:V4HF 3 "register_operand") -+ (match_operand:SI 4 "aarch64_imm2")] - VFMLA16_LOW))] - "TARGET_F16FML" - { -@@ -6531,11 +6738,11 @@ - ) - - (define_expand "aarch64_fmll_lane_highv2sf" -- [(set (match_operand:V2SF 0 "register_operand" "") -- (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "") -- (match_operand:V4HF 2 "register_operand" "") -- (match_operand:V4HF 3 "register_operand" "") -- (match_operand:SI 4 "aarch64_imm2" "")] -+ [(set (match_operand:V2SF 0 "register_operand") -+ (unspec:V2SF [(match_operand:V2SF 1 "register_operand") -+ (match_operand:V4HF 2 "register_operand") -+ (match_operand:V4HF 3 "register_operand") -+ (match_operand:SI 4 "aarch64_imm2")] - VFMLA16_HIGH))] - "TARGET_F16FML" - { -@@ -6625,11 +6832,11 @@ - ) - - (define_expand "aarch64_fmllq_laneq_lowv4sf" -- [(set (match_operand:V4SF 0 "register_operand" "") -- (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "") -- (match_operand:V8HF 2 "register_operand" "") -- (match_operand:V8HF 3 "register_operand" "") -- (match_operand:SI 4 "aarch64_lane_imm3" "")] -+ [(set (match_operand:V4SF 0 "register_operand") -+ (unspec:V4SF [(match_operand:V4SF 1 "register_operand") -+ (match_operand:V8HF 2 "register_operand") -+ (match_operand:V8HF 3 "register_operand") -+ (match_operand:SI 4 "aarch64_lane_imm3")] - VFMLA16_LOW))] - "TARGET_F16FML" - { -@@ -6645,11 +6852,11 @@ - }) - - (define_expand "aarch64_fmllq_laneq_highv4sf" -- [(set (match_operand:V4SF 0 "register_operand" "") -- (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "") -- (match_operand:V8HF 2 "register_operand" "") -- (match_operand:V8HF 3 "register_operand" "") -- (match_operand:SI 4 "aarch64_lane_imm3" "")] -+ [(set (match_operand:V4SF 0 "register_operand") -+ (unspec:V4SF [(match_operand:V4SF 1 "register_operand") -+ (match_operand:V8HF 2 "register_operand") -+ (match_operand:V8HF 3 "register_operand") -+ (match_operand:SI 4 "aarch64_lane_imm3")] - VFMLA16_HIGH))] - "TARGET_F16FML" - { -@@ -6739,11 +6946,11 @@ - ) - - (define_expand "aarch64_fmll_laneq_lowv2sf" -- [(set (match_operand:V2SF 0 "register_operand" "") -- (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "") -- (match_operand:V4HF 2 "register_operand" "") -- (match_operand:V8HF 3 "register_operand" "") -- (match_operand:SI 4 "aarch64_lane_imm3" "")] -+ [(set (match_operand:V2SF 0 "register_operand") -+ (unspec:V2SF [(match_operand:V2SF 1 "register_operand") -+ (match_operand:V4HF 2 "register_operand") -+ (match_operand:V8HF 3 "register_operand") -+ (match_operand:SI 4 "aarch64_lane_imm3")] - VFMLA16_LOW))] - "TARGET_F16FML" - { -@@ -6760,11 +6967,11 @@ - }) - - (define_expand "aarch64_fmll_laneq_highv2sf" -- [(set (match_operand:V2SF 0 "register_operand" "") -- (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "") -- (match_operand:V4HF 2 "register_operand" "") -- (match_operand:V8HF 3 "register_operand" "") -- (match_operand:SI 4 "aarch64_lane_imm3" "")] -+ [(set (match_operand:V2SF 0 "register_operand") -+ (unspec:V2SF [(match_operand:V2SF 1 "register_operand") -+ (match_operand:V4HF 2 "register_operand") -+ (match_operand:V8HF 3 "register_operand") -+ (match_operand:SI 4 "aarch64_lane_imm3")] - VFMLA16_HIGH))] - "TARGET_F16FML" - { -@@ -6855,11 +7062,11 @@ - ) - - (define_expand "aarch64_fmllq_lane_lowv4sf" -- [(set (match_operand:V4SF 0 "register_operand" "") -- (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "") -- (match_operand:V8HF 2 "register_operand" "") -- (match_operand:V4HF 3 "register_operand" "") -- (match_operand:SI 4 "aarch64_imm2" "")] -+ [(set (match_operand:V4SF 0 "register_operand") -+ (unspec:V4SF [(match_operand:V4SF 1 "register_operand") -+ (match_operand:V8HF 2 "register_operand") -+ (match_operand:V4HF 3 "register_operand") -+ (match_operand:SI 4 "aarch64_imm2")] - VFMLA16_LOW))] - "TARGET_F16FML" - { -@@ -6875,11 +7082,11 @@ - }) - - (define_expand "aarch64_fmllq_lane_highv4sf" -- [(set (match_operand:V4SF 0 "register_operand" "") -- (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "") -- (match_operand:V8HF 2 "register_operand" "") -- (match_operand:V4HF 3 "register_operand" "") -- (match_operand:SI 4 "aarch64_imm2" "")] -+ [(set (match_operand:V4SF 0 "register_operand") -+ (unspec:V4SF [(match_operand:V4SF 1 "register_operand") -+ (match_operand:V8HF 2 "register_operand") -+ (match_operand:V4HF 3 "register_operand") -+ (match_operand:SI 4 "aarch64_imm2")] - VFMLA16_HIGH))] - "TARGET_F16FML" - { -diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc -new file mode 100644 -index 000000000..b28ded0f5 ---- /dev/null -+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc -@@ -0,0 +1,2760 @@ -+/* ACLE support for AArch64 SVE (__ARM_FEATURE_SVE intrinsics) -+ Copyright (C) 2018-2019 Free Software Foundation, Inc. -+ -+ This file is part of GCC. -+ -+ GCC is free software; you can redistribute it and/or modify it -+ under the terms of the GNU General Public License as published by -+ the Free Software Foundation; either version 3, or (at your option) -+ any later version. -+ -+ GCC is distributed in the hope that it will be useful, but -+ WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ General Public License for more details. -+ -+ You should have received a copy of the GNU General Public License -+ along with GCC; see the file COPYING3. If not see -+ . */ -+ -+#include "config.h" -+#include "system.h" -+#include "coretypes.h" -+#include "tm.h" -+#include "tree.h" -+#include "rtl.h" -+#include "tm_p.h" -+#include "memmodel.h" -+#include "insn-codes.h" -+#include "optabs.h" -+#include "recog.h" -+#include "expr.h" -+#include "basic-block.h" -+#include "function.h" -+#include "fold-const.h" -+#include "gimple.h" -+#include "gimple-iterator.h" -+#include "gimplify.h" -+#include "explow.h" -+#include "emit-rtl.h" -+#include "tree-vector-builder.h" -+#include "rtx-vector-builder.h" -+#include "vec-perm-indices.h" -+#include "aarch64-sve-builtins.h" -+#include "aarch64-sve-builtins-shapes.h" -+#include "aarch64-sve-builtins-base.h" -+#include "aarch64-sve-builtins-functions.h" -+ -+using namespace aarch64_sve; -+ -+namespace { -+ -+/* Expand a call to svmad, or svmla after reordering its operands. -+ Make _m forms merge with argument MERGE_ARGNO. */ -+static rtx -+expand_mad (function_expander &e, -+ unsigned int merge_argno = DEFAULT_MERGE_ARGNO) -+{ -+ if (e.pred == PRED_x) -+ { -+ insn_code icode; -+ if (e.type_suffix (0).integer_p) -+ icode = code_for_aarch64_pred_fma (e.vector_mode (0)); -+ else -+ icode = code_for_aarch64_pred (UNSPEC_COND_FMLA, e.vector_mode (0)); -+ return e.use_pred_x_insn (icode); -+ } -+ -+ insn_code icode = e.direct_optab_handler (cond_fma_optab); -+ return e.use_cond_insn (icode, merge_argno); -+} -+ -+/* Expand a call to svmsb, or svmls after reordering its operands. -+ Make _m forms merge with argument MERGE_ARGNO. */ -+static rtx -+expand_msb (function_expander &e, -+ unsigned int merge_argno = DEFAULT_MERGE_ARGNO) -+{ -+ if (e.pred == PRED_x) -+ { -+ insn_code icode; -+ if (e.type_suffix (0).integer_p) -+ icode = code_for_aarch64_pred_fnma (e.vector_mode (0)); -+ else -+ icode = code_for_aarch64_pred (UNSPEC_COND_FMLS, e.vector_mode (0)); -+ return e.use_pred_x_insn (icode); -+ } -+ -+ insn_code icode = e.direct_optab_handler (cond_fnma_optab); -+ return e.use_cond_insn (icode, merge_argno); -+} -+ -+class svabd_impl : public function_base -+{ -+public: -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ /* The integer operations are represented as the subtraction of the -+ minimum from the maximum, with the signedness of the instruction -+ keyed off the signedness of the maximum operation. */ -+ rtx_code max_code = e.type_suffix (0).unsigned_p ? UMAX : SMAX; -+ insn_code icode; -+ if (e.pred == PRED_x) -+ { -+ if (e.type_suffix (0).integer_p) -+ icode = code_for_aarch64_pred_abd (max_code, e.vector_mode (0)); -+ else -+ icode = code_for_aarch64_pred_abd (e.vector_mode (0)); -+ return e.use_pred_x_insn (icode); -+ } -+ -+ if (e.type_suffix (0).integer_p) -+ icode = code_for_aarch64_cond_abd (max_code, e.vector_mode (0)); -+ else -+ icode = code_for_aarch64_cond_abd (e.vector_mode (0)); -+ return e.use_cond_insn (icode); -+ } -+}; -+ -+/* Implements svacge, svacgt, svacle and svaclt. */ -+class svac_impl : public function_base -+{ -+public: -+ CONSTEXPR svac_impl (int unspec) : m_unspec (unspec) {} -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ e.add_ptrue_hint (0, e.gp_mode (0)); -+ insn_code icode = code_for_aarch64_pred_fac (m_unspec, e.vector_mode (0)); -+ return e.use_exact_insn (icode); -+ } -+ -+ /* The unspec code for the underlying comparison. */ -+ int m_unspec; -+}; -+ -+class svadda_impl : public function_base -+{ -+public: -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ /* Put the predicate last, as required by mask_fold_left_plus_optab. */ -+ e.rotate_inputs_left (0, 3); -+ machine_mode mode = e.vector_mode (0); -+ insn_code icode = direct_optab_handler (mask_fold_left_plus_optab, mode); -+ return e.use_exact_insn (icode); -+ } -+}; -+ -+/* Implements svadr[bhwd]. */ -+class svadr_bhwd_impl : public function_base -+{ -+public: -+ CONSTEXPR svadr_bhwd_impl (unsigned int shift) : m_shift (shift) {} -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ machine_mode mode = GET_MODE (e.args[0]); -+ if (m_shift == 0) -+ return e.use_exact_insn (code_for_aarch64_adr (mode)); -+ -+ /* Turn the access size into an extra shift argument. */ -+ rtx shift = gen_int_mode (m_shift, GET_MODE_INNER (mode)); -+ e.args.quick_push (expand_vector_broadcast (mode, shift)); -+ return e.use_exact_insn (code_for_aarch64_adr_shift (mode)); -+ } -+ -+ /* How many bits left to shift the vector displacement. */ -+ unsigned int m_shift; -+}; -+ -+class svasrd_impl : public function_base -+{ -+public: -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ return e.use_cond_insn (code_for_cond_asrd (e.vector_mode (0))); -+ } -+}; -+ -+class svbic_impl : public function_base -+{ -+public: -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ /* Convert svbic of a constant into svand of its inverse. */ -+ if (CONST_INT_P (e.args[2])) -+ { -+ machine_mode mode = GET_MODE_INNER (e.vector_mode (0)); -+ e.args[2] = simplify_unary_operation (NOT, mode, e.args[2], mode); -+ return e.map_to_rtx_codes (AND, AND, -1); -+ } -+ -+ if (e.type_suffix_ids[0] == TYPE_SUFFIX_b) -+ { -+ gcc_assert (e.pred == PRED_z); -+ return e.use_exact_insn (CODE_FOR_aarch64_pred_bicvnx16bi_z); -+ } -+ -+ if (e.pred == PRED_x) -+ return e.use_unpred_insn (code_for_aarch64_bic (e.vector_mode (0))); -+ -+ return e.use_cond_insn (code_for_cond_bic (e.vector_mode (0))); -+ } -+}; -+ -+/* Implements svbrkn, svbrkpa and svbrkpb. */ -+class svbrk_binary_impl : public function_base -+{ -+public: -+ CONSTEXPR svbrk_binary_impl (int unspec) : m_unspec (unspec) {} -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ return e.use_exact_insn (code_for_aarch64_brk (m_unspec)); -+ } -+ -+ /* The unspec code associated with the operation. */ -+ int m_unspec; -+}; -+ -+/* Implements svbrka and svbrkb. */ -+class svbrk_unary_impl : public function_base -+{ -+public: -+ CONSTEXPR svbrk_unary_impl (int unspec) : m_unspec (unspec) {} -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ return e.use_cond_insn (code_for_aarch64_brk (m_unspec)); -+ } -+ -+ /* The unspec code associated with the operation. */ -+ int m_unspec; -+}; -+ -+class svcadd_impl : public function_base -+{ -+public: -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ /* Convert the rotation amount into a specific unspec. */ -+ int rot = INTVAL (e.args[3]); -+ e.args.ordered_remove (3); -+ int unspec = (rot == 90 ? UNSPEC_COND_FCADD90 -+ : rot == 270 ? UNSPEC_COND_FCADD270 -+ : (gcc_unreachable (), 0)); -+ return e.map_to_unspecs (-1, -1, unspec); -+ } -+}; -+ -+/* Implements svclasta and svclastb. */ -+class svclast_impl : public quiet -+{ -+public: -+ CONSTEXPR svclast_impl (int unspec) : m_unspec (unspec) {} -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ /* Match the fold_extract_optab order. */ -+ std::swap (e.args[0], e.args[1]); -+ machine_mode mode = e.vector_mode (0); -+ insn_code icode; -+ if (e.mode_suffix_id == MODE_n) -+ icode = code_for_fold_extract (m_unspec, mode); -+ else -+ icode = code_for_aarch64_fold_extract_vector (m_unspec, mode); -+ return e.use_exact_insn (icode); -+ } -+ -+ /* The unspec code associated with the operation. */ -+ int m_unspec; -+}; -+ -+class svcmla_impl : public function_base -+{ -+public: -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ /* Convert the rotation amount into a specific unspec. */ -+ int rot = INTVAL (e.args[4]); -+ e.args.ordered_remove (4); -+ int unspec = (rot == 0 ? UNSPEC_COND_FCMLA -+ : rot == 90 ? UNSPEC_COND_FCMLA90 -+ : rot == 180 ? UNSPEC_COND_FCMLA180 -+ : rot == 270 ? UNSPEC_COND_FCMLA270 -+ : (gcc_unreachable (), 0)); -+ -+ /* Make the operand order the same as the one used by the fma optabs, -+ with the accumulator last. */ -+ e.rotate_inputs_left (1, 4); -+ return e.map_to_unspecs (-1, -1, unspec, 3); -+ } -+}; -+ -+class svcmla_lane_impl : public function_base -+{ -+public: -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ /* Convert the rotation amount into a specific unspec. */ -+ int rot = INTVAL (e.args[4]); -+ e.args.ordered_remove (4); -+ int unspec = (rot == 0 ? UNSPEC_FCMLA -+ : rot == 90 ? UNSPEC_FCMLA90 -+ : rot == 180 ? UNSPEC_FCMLA180 -+ : rot == 270 ? UNSPEC_FCMLA270 -+ : (gcc_unreachable (), 0)); -+ -+ /* Make the operand order the same as the one used by the fma optabs, -+ with the accumulator last. */ -+ e.rotate_inputs_left (0, 4); -+ insn_code icode = code_for_aarch64_lane (unspec, e.vector_mode (0)); -+ return e.use_exact_insn (icode); -+ } -+}; -+ -+/* Implements svcmp (except svcmpuo, which is handled separately). */ -+class svcmp_impl : public function_base -+{ -+public: -+ CONSTEXPR svcmp_impl (tree_code code, int unspec_for_fp) -+ : m_code (code), m_unspec_for_fp (unspec_for_fp) {} -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ machine_mode mode = e.vector_mode (0); -+ -+ /* Comparisons are UNSPEC_PRED_Z operations and so need a hint -+ operand. */ -+ e.add_ptrue_hint (0, e.gp_mode (0)); -+ -+ if (e.type_suffix (0).integer_p) -+ { -+ bool unsigned_p = e.type_suffix (0).unsigned_p; -+ rtx_code code = get_rtx_code (m_code, unsigned_p); -+ return e.use_exact_insn (code_for_aarch64_pred_cmp (code, mode)); -+ } -+ -+ insn_code icode = code_for_aarch64_pred_fcm (m_unspec_for_fp, mode); -+ return e.use_exact_insn (icode); -+ } -+ -+ /* The tree code associated with the comparison. */ -+ tree_code m_code; -+ -+ /* The unspec code to use for floating-point comparisons. */ -+ int m_unspec_for_fp; -+}; -+ -+/* Implements svcmp_wide. */ -+class svcmp_wide_impl : public function_base -+{ -+public: -+ CONSTEXPR svcmp_wide_impl (tree_code code, int unspec_for_sint, -+ int unspec_for_uint) -+ : m_code (code), m_unspec_for_sint (unspec_for_sint), -+ m_unspec_for_uint (unspec_for_uint) {} -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ machine_mode mode = e.vector_mode (0); -+ bool unsigned_p = e.type_suffix (0).unsigned_p; -+ rtx_code code = get_rtx_code (m_code, unsigned_p); -+ -+ /* Comparisons are UNSPEC_PRED_Z operations and so need a hint -+ operand. */ -+ e.add_ptrue_hint (0, e.gp_mode (0)); -+ -+ /* If the argument is a constant that the unwidened comparisons -+ can handle directly, use them instead. */ -+ insn_code icode = code_for_aarch64_pred_cmp (code, mode); -+ rtx op2 = unwrap_const_vec_duplicate (e.args[3]); -+ if (CONSTANT_P (op2) -+ && insn_data[icode].operand[4].predicate (op2, DImode)) -+ { -+ e.args[3] = op2; -+ return e.use_exact_insn (icode); -+ } -+ -+ int unspec = (unsigned_p ? m_unspec_for_uint : m_unspec_for_sint); -+ return e.use_exact_insn (code_for_aarch64_pred_cmp_wide (unspec, mode)); -+ } -+ -+ /* The tree code associated with the comparison. */ -+ tree_code m_code; -+ -+ /* The unspec codes for signed and unsigned wide comparisons -+ respectively. */ -+ int m_unspec_for_sint; -+ int m_unspec_for_uint; -+}; -+ -+class svcmpuo_impl : public quiet -+{ -+public: -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ e.add_ptrue_hint (0, e.gp_mode (0)); -+ return e.use_exact_insn (code_for_aarch64_pred_fcmuo (e.vector_mode (0))); -+ } -+}; -+ -+class svcnot_impl : public function_base -+{ -+public: -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ machine_mode mode = e.vector_mode (0); -+ if (e.pred == PRED_x) -+ { -+ /* The pattern for CNOT includes an UNSPEC_PRED_Z, so needs -+ a ptrue hint. */ -+ e.add_ptrue_hint (0, e.gp_mode (0)); -+ return e.use_pred_x_insn (code_for_aarch64_pred_cnot (mode)); -+ } -+ -+ return e.use_cond_insn (code_for_cond_cnot (mode), 0); -+ } -+}; -+ -+/* Implements svcnt[bhwd], which count the number of elements -+ in a particular vector mode. */ -+class svcnt_bhwd_impl : public function_base -+{ -+public: -+ CONSTEXPR svcnt_bhwd_impl (machine_mode ref_mode) : m_ref_mode (ref_mode) {} -+ -+ gimple * -+ fold (gimple_folder &f) const OVERRIDE -+ { -+ tree count = build_int_cstu (TREE_TYPE (f.lhs), -+ GET_MODE_NUNITS (m_ref_mode)); -+ return gimple_build_assign (f.lhs, count); -+ } -+ -+ rtx -+ expand (function_expander &) const OVERRIDE -+ { -+ return gen_int_mode (GET_MODE_NUNITS (m_ref_mode), DImode); -+ } -+ -+ /* The mode of the vector associated with the [bhwd] suffix. */ -+ machine_mode m_ref_mode; -+}; -+ -+/* Implements svcnt[bhwd]_pat. */ -+class svcnt_bhwd_pat_impl : public svcnt_bhwd_impl -+{ -+public: -+ CONSTEXPR svcnt_bhwd_pat_impl (machine_mode ref_mode) -+ : svcnt_bhwd_impl (ref_mode) {} -+ -+ gimple * -+ fold (gimple_folder &f) const OVERRIDE -+ { -+ tree pattern_arg = gimple_call_arg (f.call, 0); -+ aarch64_svpattern pattern = (aarch64_svpattern) tree_to_shwi (pattern_arg); -+ -+ if (pattern == AARCH64_SV_ALL) -+ /* svcvnt[bwhd]_pat (SV_ALL) == svcnt[bwhd] (). */ -+ return svcnt_bhwd_impl::fold (f); -+ -+ /* See whether we can count the number of elements in the pattern -+ at compile time. */ -+ unsigned int elements_per_vq = 128 / GET_MODE_UNIT_BITSIZE (m_ref_mode); -+ HOST_WIDE_INT value = aarch64_fold_sve_cnt_pat (pattern, elements_per_vq); -+ if (value >= 0) -+ { -+ tree count = build_int_cstu (TREE_TYPE (f.lhs), value); -+ return gimple_build_assign (f.lhs, count); -+ } -+ -+ return NULL; -+ } -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ unsigned int elements_per_vq = 128 / GET_MODE_UNIT_BITSIZE (m_ref_mode); -+ e.args.quick_push (gen_int_mode (elements_per_vq, DImode)); -+ e.args.quick_push (const1_rtx); -+ return e.use_exact_insn (CODE_FOR_aarch64_sve_cnt_pat); -+ } -+}; -+ -+class svcntp_impl : public function_base -+{ -+public: -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ machine_mode mode = e.vector_mode (0); -+ e.add_ptrue_hint (0, mode); -+ return e.use_exact_insn (code_for_aarch64_pred_cntp (mode)); -+ } -+}; -+ -+/* Implements svcreate2, svcreate3 and svcreate4. */ -+class svcreate_impl : public quiet -+{ -+public: -+ CONSTEXPR svcreate_impl (unsigned int vectors_per_tuple) -+ : quiet (vectors_per_tuple) {} -+ -+ gimple * -+ fold (gimple_folder &f) const OVERRIDE -+ { -+ unsigned int nargs = gimple_call_num_args (f.call); -+ tree lhs_type = TREE_TYPE (f.lhs); -+ -+ /* Replace the call with a clobber of the result (to prevent it from -+ becoming upwards exposed) followed by stores into each individual -+ vector of tuple. -+ -+ The fold routines expect the replacement statement to have the -+ same lhs as the original call, so return the clobber statement -+ rather than the final vector store. */ -+ gassign *clobber = gimple_build_assign (f.lhs, build_clobber (lhs_type)); -+ -+ for (unsigned int i = nargs; i-- > 0; ) -+ { -+ tree rhs_vector = gimple_call_arg (f.call, i); -+ tree field = tuple_type_field (TREE_TYPE (f.lhs)); -+ tree lhs_array = build3 (COMPONENT_REF, TREE_TYPE (field), -+ unshare_expr (f.lhs), field, NULL_TREE); -+ tree lhs_vector = build4 (ARRAY_REF, TREE_TYPE (rhs_vector), -+ lhs_array, size_int (i), -+ NULL_TREE, NULL_TREE); -+ gassign *assign = gimple_build_assign (lhs_vector, rhs_vector); -+ gsi_insert_after (f.gsi, assign, GSI_SAME_STMT); -+ } -+ return clobber; -+ } -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ rtx lhs_tuple = e.get_nonoverlapping_reg_target (); -+ -+ /* Record that LHS_TUPLE is dead before the first store. */ -+ emit_clobber (lhs_tuple); -+ for (unsigned int i = 0; i < e.args.length (); ++i) -+ { -+ /* Use an lvalue subreg to refer to vector I in LHS_TUPLE. */ -+ rtx lhs_vector = simplify_gen_subreg (GET_MODE (e.args[i]), -+ lhs_tuple, GET_MODE (lhs_tuple), -+ i * BYTES_PER_SVE_VECTOR); -+ emit_move_insn (lhs_vector, e.args[i]); -+ } -+ return lhs_tuple; -+ } -+}; -+ -+class svcvt_impl : public function_base -+{ -+public: -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ machine_mode mode0 = e.vector_mode (0); -+ machine_mode mode1 = e.vector_mode (1); -+ insn_code icode; -+ /* All this complication comes from the need to select four things -+ simultaneously: -+ -+ (1) the kind of conversion (int<-float, float<-int, float<-float) -+ (2) signed vs. unsigned integers, where relevant -+ (3) the predication mode, which must be the wider of the predication -+ modes for MODE0 and MODE1 -+ (4) the predication type (m, x or z) -+ -+ The only supported int<->float conversions for which the integer is -+ narrower than the float are SI<->DF. It's therefore more convenient -+ to handle (3) by defining two patterns for int<->float conversions: -+ one in which the integer is at least as wide as the float and so -+ determines the predication mode, and another single SI<->DF pattern -+ in which the float's mode determines the predication mode (which is -+ always VNx2BI in that case). -+ -+ The names of the patterns follow the optab convention of giving -+ the source mode before the destination mode. */ -+ if (e.type_suffix (1).integer_p) -+ { -+ int unspec = (e.type_suffix (1).unsigned_p -+ ? UNSPEC_COND_UCVTF -+ : UNSPEC_COND_SCVTF); -+ if (e.type_suffix (0).element_bytes <= e.type_suffix (1).element_bytes) -+ icode = (e.pred == PRED_x -+ ? code_for_aarch64_sve_nonextend (unspec, mode1, mode0) -+ : code_for_cond_nonextend (unspec, mode1, mode0)); -+ else -+ icode = (e.pred == PRED_x -+ ? code_for_aarch64_sve_extend (unspec, mode1, mode0) -+ : code_for_cond_extend (unspec, mode1, mode0)); -+ } -+ else -+ { -+ int unspec = (!e.type_suffix (0).integer_p ? UNSPEC_COND_FCVT -+ : e.type_suffix (0).unsigned_p ? UNSPEC_COND_FCVTZU -+ : UNSPEC_COND_FCVTZS); -+ if (e.type_suffix (0).element_bytes >= e.type_suffix (1).element_bytes) -+ icode = (e.pred == PRED_x -+ ? code_for_aarch64_sve_nontrunc (unspec, mode1, mode0) -+ : code_for_cond_nontrunc (unspec, mode1, mode0)); -+ else -+ icode = (e.pred == PRED_x -+ ? code_for_aarch64_sve_trunc (unspec, mode1, mode0) -+ : code_for_cond_trunc (unspec, mode1, mode0)); -+ } -+ -+ if (e.pred == PRED_x) -+ return e.use_pred_x_insn (icode); -+ return e.use_cond_insn (icode); -+ } -+}; -+ -+class svdot_impl : public function_base -+{ -+public: -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ /* In the optab, the multiplication operands come before the accumulator -+ operand. The optab is keyed off the multiplication mode. */ -+ e.rotate_inputs_left (0, 3); -+ insn_code icode -+ = e.direct_optab_handler_for_sign (sdot_prod_optab, udot_prod_optab, -+ 0, GET_MODE (e.args[0])); -+ return e.use_unpred_insn (icode); -+ } -+}; -+ -+class svdotprod_lane_impl : public unspec_based_function_base -+{ -+public: -+ CONSTEXPR svdotprod_lane_impl (int unspec_for_sint, -+ int unspec_for_uint, -+ int unspec_for_float) -+ : unspec_based_function_base (unspec_for_sint, -+ unspec_for_uint, -+ unspec_for_float) {} -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ /* Use the same ordering as the dot_prod_optab, with the -+ accumulator last. */ -+ e.rotate_inputs_left (0, 4); -+ int unspec = unspec_for (e); -+ machine_mode mode = e.vector_mode (0); -+ return e.use_exact_insn (code_for_aarch64_dot_prod_lane (unspec, mode)); -+ } -+}; -+ -+class svdup_impl : public quiet -+{ -+public: -+ gimple * -+ fold (gimple_folder &f) const OVERRIDE -+ { -+ tree vec_type = TREE_TYPE (f.lhs); -+ tree rhs = gimple_call_arg (f.call, f.pred == PRED_none ? 0 : 1); -+ -+ if (f.pred == PRED_none || f.pred == PRED_x) -+ { -+ if (CONSTANT_CLASS_P (rhs)) -+ { -+ if (f.type_suffix (0).bool_p) -+ return (tree_to_shwi (rhs) -+ ? f.fold_to_ptrue () -+ : f.fold_to_pfalse ()); -+ -+ tree rhs_vector = build_vector_from_val (vec_type, rhs); -+ return gimple_build_assign (f.lhs, rhs_vector); -+ } -+ -+ /* Avoid folding _b to a VEC_DUPLICATE_EXPR, since to do that we -+ would need to introduce an extra and unwanted conversion to -+ the truth vector element type. */ -+ if (!f.type_suffix (0).bool_p) -+ return gimple_build_assign (f.lhs, VEC_DUPLICATE_EXPR, rhs); -+ } -+ -+ return NULL; -+ } -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ if (e.pred == PRED_none || e.pred == PRED_x) -+ /* There's no benefit to using predicated instructions for _x here. */ -+ return e.use_unpred_insn (e.direct_optab_handler (vec_duplicate_optab)); -+ -+ /* Model predicated svdups as a SEL in which the "true" value is -+ the duplicate of the function argument and the "false" value -+ is the value of inactive lanes. */ -+ insn_code icode; -+ machine_mode mode = e.vector_mode (0); -+ if (valid_for_const_vector_p (GET_MODE_INNER (mode), e.args.last ())) -+ /* Duplicate the constant to fill a vector. The pattern optimizes -+ various cases involving constant operands, falling back to SEL -+ if necessary. */ -+ icode = code_for_vcond_mask (mode, mode); -+ else -+ /* Use the pattern for selecting between a duplicated scalar -+ variable and a vector fallback. */ -+ icode = code_for_aarch64_sel_dup (mode); -+ return e.use_vcond_mask_insn (icode); -+ } -+}; -+ -+class svdup_lane_impl : public quiet -+{ -+public: -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ /* The native DUP lane has an index range of 64 bytes. */ -+ machine_mode mode = e.vector_mode (0); -+ if (CONST_INT_P (e.args[1]) -+ && IN_RANGE (INTVAL (e.args[1]) * GET_MODE_UNIT_SIZE (mode), 0, 63)) -+ return e.use_exact_insn (code_for_aarch64_sve_dup_lane (mode)); -+ -+ /* Treat svdup_lane as if it were svtbl_n. */ -+ return e.use_exact_insn (code_for_aarch64_sve_tbl (e.vector_mode (0))); -+ } -+}; -+ -+class svdupq_impl : public quiet -+{ -+public: -+ gimple * -+ fold (gimple_folder &f) const OVERRIDE -+ { -+ tree vec_type = TREE_TYPE (f.lhs); -+ unsigned int nargs = gimple_call_num_args (f.call); -+ /* For predicates, pad out each argument so that we have one element -+ per bit. */ -+ unsigned int factor = (f.type_suffix (0).bool_p -+ ? f.type_suffix (0).element_bytes : 1); -+ tree_vector_builder builder (vec_type, nargs * factor, 1); -+ for (unsigned int i = 0; i < nargs; ++i) -+ { -+ tree elt = gimple_call_arg (f.call, i); -+ if (!CONSTANT_CLASS_P (elt)) -+ return NULL; -+ builder.quick_push (elt); -+ for (unsigned int j = 1; j < factor; ++j) -+ builder.quick_push (build_zero_cst (TREE_TYPE (vec_type))); -+ } -+ return gimple_build_assign (f.lhs, builder.build ()); -+ } -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ machine_mode mode = e.vector_mode (0); -+ unsigned int elements_per_vq = e.args.length (); -+ if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL) -+ { -+ /* Construct a vector of integers so that we can compare them against -+ zero below. Zero vs. nonzero is the only distinction that -+ matters. */ -+ mode = aarch64_sve_int_mode (mode); -+ for (unsigned int i = 0; i < elements_per_vq; ++i) -+ e.args[i] = simplify_gen_unary (ZERO_EXTEND, GET_MODE_INNER (mode), -+ e.args[i], QImode); -+ } -+ -+ /* Get the 128-bit Advanced SIMD vector for this data size. */ -+ scalar_mode element_mode = GET_MODE_INNER (mode); -+ machine_mode vq_mode = aarch64_vq_mode (element_mode).require (); -+ gcc_assert (known_eq (elements_per_vq, GET_MODE_NUNITS (vq_mode))); -+ -+ /* Put the arguments into a 128-bit Advanced SIMD vector. We want -+ argument N to go into architectural lane N, whereas Advanced SIMD -+ vectors are loaded memory lsb to register lsb. We therefore need -+ to reverse the elements for big-endian targets. */ -+ rtx vq_reg = gen_reg_rtx (vq_mode); -+ rtvec vec = rtvec_alloc (elements_per_vq); -+ for (unsigned int i = 0; i < elements_per_vq; ++i) -+ { -+ unsigned int argno = BYTES_BIG_ENDIAN ? elements_per_vq - i - 1 : i; -+ RTVEC_ELT (vec, i) = e.args[argno]; -+ } -+ aarch64_expand_vector_init (vq_reg, gen_rtx_PARALLEL (vq_mode, vec)); -+ -+ /* If the result is a boolean, compare the data vector against zero. */ -+ if (mode != e.vector_mode (0)) -+ { -+ rtx data_dupq = aarch64_expand_sve_dupq (NULL, mode, vq_reg); -+ return aarch64_convert_sve_data_to_pred (e.possible_target, -+ e.vector_mode (0), data_dupq); -+ } -+ -+ return aarch64_expand_sve_dupq (e.possible_target, mode, vq_reg); -+ } -+}; -+ -+class svdupq_lane_impl : public quiet -+{ -+public: -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ machine_mode mode = e.vector_mode (0); -+ rtx index = e.args[1]; -+ if (CONST_INT_P (index) && IN_RANGE (INTVAL (index), 0, 3)) -+ { -+ /* Use the .Q form of DUP, which is the native instruction for -+ this function. */ -+ insn_code icode = code_for_aarch64_sve_dupq_lane (mode); -+ unsigned int num_indices = e.elements_per_vq (0); -+ rtx indices = aarch64_gen_stepped_int_parallel -+ (num_indices, INTVAL (index) * num_indices, 1); -+ -+ e.add_output_operand (icode); -+ e.add_input_operand (icode, e.args[0]); -+ e.add_fixed_operand (indices); -+ return e.generate_insn (icode); -+ } -+ -+ /* Build a .D TBL index for the pairs of doublewords that we want to -+ duplicate. */ -+ if (CONST_INT_P (index)) -+ { -+ /* The index vector is a constant. */ -+ rtx_vector_builder builder (VNx2DImode, 2, 1); -+ builder.quick_push (gen_int_mode (INTVAL (index) * 2, DImode)); -+ builder.quick_push (gen_int_mode (INTVAL (index) * 2 + 1, DImode)); -+ index = builder.build (); -+ } -+ else -+ { -+ /* Duplicate INDEX * 2 to fill a DImode vector. The ACLE spec -+ explicitly allows the top of the index to be dropped. */ -+ index = force_reg (DImode, simplify_gen_binary (ASHIFT, DImode, -+ index, const1_rtx)); -+ index = expand_vector_broadcast (VNx2DImode, index); -+ -+ /* Get an alternating 0, 1 predicate. */ -+ rtx_vector_builder builder (VNx2BImode, 2, 1); -+ builder.quick_push (const0_rtx); -+ builder.quick_push (constm1_rtx); -+ rtx pg = force_reg (VNx2BImode, builder.build ()); -+ -+ /* Add one to the odd elements of the index. */ -+ rtx one = force_reg (VNx2DImode, CONST1_RTX (VNx2DImode)); -+ rtx target = gen_reg_rtx (VNx2DImode); -+ emit_insn (gen_cond_addvnx2di (target, pg, index, one, index)); -+ index = target; -+ } -+ -+ e.args[0] = gen_lowpart (VNx2DImode, e.args[0]); -+ e.args[1] = index; -+ return e.use_exact_insn (CODE_FOR_aarch64_sve_tblvnx2di); -+ } -+}; -+ -+/* Implements svextb, svexth and svextw. */ -+class svext_bhw_impl : public function_base -+{ -+public: -+ CONSTEXPR svext_bhw_impl (scalar_int_mode from_mode) -+ : m_from_mode (from_mode) {} -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ if (e.type_suffix (0).unsigned_p) -+ { -+ /* Convert to an AND. The widest we go is 0xffffffff, which fits -+ in a CONST_INT. */ -+ e.args.quick_push (GEN_INT (GET_MODE_MASK (m_from_mode))); -+ if (e.pred == PRED_m) -+ /* We now have arguments "(inactive, pg, op, mask)". Convert this -+ to "(pg, op, mask, inactive)" so that the order matches svand_m -+ with an extra argument on the end. Take the inactive elements -+ from this extra argument. */ -+ e.rotate_inputs_left (0, 4); -+ return e.map_to_rtx_codes (AND, AND, -1, 3); -+ } -+ -+ machine_mode wide_mode = e.vector_mode (0); -+ poly_uint64 nunits = GET_MODE_NUNITS (wide_mode); -+ machine_mode narrow_mode -+ = aarch64_sve_data_mode (m_from_mode, nunits).require (); -+ if (e.pred == PRED_x) -+ { -+ insn_code icode = code_for_aarch64_pred_sxt (wide_mode, narrow_mode); -+ return e.use_pred_x_insn (icode); -+ } -+ -+ insn_code icode = code_for_aarch64_cond_sxt (wide_mode, narrow_mode); -+ return e.use_cond_insn (icode); -+ } -+ -+ /* The element mode that we're extending from. */ -+ scalar_int_mode m_from_mode; -+}; -+ -+/* Implements svget2, svget3 and svget4. */ -+class svget_impl : public quiet -+{ -+public: -+ CONSTEXPR svget_impl (unsigned int vectors_per_tuple) -+ : quiet (vectors_per_tuple) {} -+ -+ gimple * -+ fold (gimple_folder &f) const OVERRIDE -+ { -+ /* Fold into a normal gimple component access. */ -+ tree rhs_tuple = gimple_call_arg (f.call, 0); -+ tree index = gimple_call_arg (f.call, 1); -+ tree field = tuple_type_field (TREE_TYPE (rhs_tuple)); -+ tree rhs_array = build3 (COMPONENT_REF, TREE_TYPE (field), -+ rhs_tuple, field, NULL_TREE); -+ tree rhs_vector = build4 (ARRAY_REF, TREE_TYPE (f.lhs), -+ rhs_array, index, NULL_TREE, NULL_TREE); -+ return gimple_build_assign (f.lhs, rhs_vector); -+ } -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ /* Fold the access into a subreg rvalue. */ -+ return simplify_gen_subreg (e.vector_mode (0), e.args[0], -+ GET_MODE (e.args[0]), -+ INTVAL (e.args[1]) * BYTES_PER_SVE_VECTOR); -+ } -+}; -+ -+class svindex_impl : public function_base -+{ -+public: -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ return e.use_exact_insn (e.direct_optab_handler (vec_series_optab)); -+ } -+}; -+ -+class svinsr_impl : public quiet -+{ -+public: -+ gimple * -+ fold (gimple_folder &f) const OVERRIDE -+ { -+ gcall *new_call = gimple_build_call_internal (IFN_VEC_SHL_INSERT, 2, -+ gimple_call_arg (f.call, 0), -+ gimple_call_arg (f.call, 1)); -+ gimple_call_set_lhs (new_call, f.lhs); -+ return new_call; -+ } -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ insn_code icode = direct_optab_handler (vec_shl_insert_optab, -+ e.vector_mode (0)); -+ return e.use_exact_insn (icode); -+ } -+}; -+ -+/* Implements svlasta and svlastb. */ -+class svlast_impl : public quiet -+{ -+public: -+ CONSTEXPR svlast_impl (int unspec) : m_unspec (unspec) {} -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ return e.use_exact_insn (code_for_extract (m_unspec, e.vector_mode (0))); -+ } -+ -+ /* The unspec code associated with the operation. */ -+ int m_unspec; -+}; -+ -+class svld1_impl : public full_width_access -+{ -+public: -+ unsigned int -+ call_properties (const function_instance &) const OVERRIDE -+ { -+ return CP_READ_MEMORY; -+ } -+ -+ gimple * -+ fold (gimple_folder &f) const OVERRIDE -+ { -+ tree vectype = f.vector_type (0); -+ -+ /* Get the predicate and base pointer. */ -+ gimple_seq stmts = NULL; -+ tree pred = f.convert_pred (stmts, vectype, 0); -+ tree base = f.fold_contiguous_base (stmts, vectype); -+ gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT); -+ -+ tree cookie = f.load_store_cookie (TREE_TYPE (vectype)); -+ gcall *new_call = gimple_build_call_internal (IFN_MASK_LOAD, 3, -+ base, cookie, pred); -+ gimple_call_set_lhs (new_call, f.lhs); -+ return new_call; -+ } -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ insn_code icode = convert_optab_handler (maskload_optab, -+ e.vector_mode (0), e.gp_mode (0)); -+ return e.use_contiguous_load_insn (icode); -+ } -+}; -+ -+/* Implements extending contiguous forms of svld1. */ -+class svld1_extend_impl : public extending_load -+{ -+public: -+ CONSTEXPR svld1_extend_impl (type_suffix_index memory_type) -+ : extending_load (memory_type) {} -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ insn_code icode = code_for_aarch64_load (extend_rtx_code (), -+ e.vector_mode (0), -+ e.memory_vector_mode ()); -+ return e.use_contiguous_load_insn (icode); -+ } -+}; -+ -+class svld1_gather_impl : public full_width_access -+{ -+public: -+ unsigned int -+ call_properties (const function_instance &) const OVERRIDE -+ { -+ return CP_READ_MEMORY; -+ } -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ e.prepare_gather_address_operands (1); -+ /* Put the predicate last, as required by mask_gather_load_optab. */ -+ e.rotate_inputs_left (0, 5); -+ machine_mode mem_mode = e.memory_vector_mode (); -+ insn_code icode = direct_optab_handler (mask_gather_load_optab, mem_mode); -+ return e.use_exact_insn (icode); -+ } -+}; -+ -+/* Implements extending forms of svld1_gather. */ -+class svld1_gather_extend_impl : public extending_load -+{ -+public: -+ CONSTEXPR svld1_gather_extend_impl (type_suffix_index memory_type) -+ : extending_load (memory_type) {} -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ e.prepare_gather_address_operands (1); -+ /* Put the predicate last, since the extending gathers use the same -+ operand order as mask_gather_load_optab. */ -+ e.rotate_inputs_left (0, 5); -+ insn_code icode = code_for_aarch64_gather_load (extend_rtx_code (), -+ e.vector_mode (0), -+ e.memory_vector_mode ()); -+ return e.use_exact_insn (icode); -+ } -+}; -+ -+class load_replicate : public function_base -+{ -+public: -+ unsigned int -+ call_properties (const function_instance &) const OVERRIDE -+ { -+ return CP_READ_MEMORY; -+ } -+ -+ tree -+ memory_scalar_type (const function_instance &fi) const OVERRIDE -+ { -+ return fi.scalar_type (0); -+ } -+}; -+ -+class svld1rq_impl : public load_replicate -+{ -+public: -+ machine_mode -+ memory_vector_mode (const function_instance &fi) const OVERRIDE -+ { -+ return aarch64_vq_mode (GET_MODE_INNER (fi.vector_mode (0))).require (); -+ } -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ insn_code icode = code_for_aarch64_sve_ld1rq (e.vector_mode (0)); -+ return e.use_contiguous_load_insn (icode); -+ } -+}; -+ -+class svld1ro_impl : public load_replicate -+{ -+public: -+ machine_mode -+ memory_vector_mode (const function_instance &fi) const OVERRIDE -+ { -+ return OImode; -+ } -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ insn_code icode = code_for_aarch64_sve_ld1ro (e.vector_mode (0)); -+ return e.use_contiguous_load_insn (icode); -+ } -+}; -+ -+/* Implements svld2, svld3 and svld4. */ -+class svld234_impl : public full_width_access -+{ -+public: -+ CONSTEXPR svld234_impl (unsigned int vectors_per_tuple) -+ : full_width_access (vectors_per_tuple) {} -+ -+ unsigned int -+ call_properties (const function_instance &) const OVERRIDE -+ { -+ return CP_READ_MEMORY; -+ } -+ -+ gimple * -+ fold (gimple_folder &f) const OVERRIDE -+ { -+ tree tuple_type = TREE_TYPE (f.lhs); -+ tree vectype = f.vector_type (0); -+ -+ /* Get the predicate and base pointer. */ -+ gimple_seq stmts = NULL; -+ tree pred = f.convert_pred (stmts, vectype, 0); -+ tree base = f.fold_contiguous_base (stmts, vectype); -+ gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT); -+ -+ /* Emit two statements: a clobber of the lhs, so that it isn't -+ upwards exposed, and then the load itself. -+ -+ The fold routines expect the replacement statement to have the -+ same lhs as the original call, so return the clobber statement -+ rather than the load. */ -+ gimple *clobber = gimple_build_assign (f.lhs, build_clobber (tuple_type)); -+ -+ /* View the loaded data as an array of vectors. */ -+ tree field = tuple_type_field (tuple_type); -+ tree lhs_array = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (field), -+ unshare_expr (f.lhs)); -+ -+ /* Emit the load itself. */ -+ tree cookie = f.load_store_cookie (TREE_TYPE (vectype)); -+ gcall *new_call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3, -+ base, cookie, pred); -+ gimple_call_set_lhs (new_call, lhs_array); -+ gsi_insert_after (f.gsi, new_call, GSI_SAME_STMT); -+ -+ return clobber; -+ } -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ machine_mode tuple_mode = TYPE_MODE (TREE_TYPE (e.call_expr)); -+ insn_code icode = convert_optab_handler (vec_mask_load_lanes_optab, -+ tuple_mode, e.vector_mode (0)); -+ return e.use_contiguous_load_insn (icode); -+ } -+}; -+ -+class svldff1_gather_impl : public full_width_access -+{ -+public: -+ unsigned int -+ call_properties (const function_instance &) const OVERRIDE -+ { -+ return CP_READ_MEMORY | CP_READ_FFR | CP_WRITE_FFR; -+ } -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ /* See the block comment in aarch64-sve.md for details about the -+ FFR handling. */ -+ emit_insn (gen_aarch64_update_ffr_for_load ()); -+ -+ e.prepare_gather_address_operands (1); -+ /* Put the predicate last, since ldff1_gather uses the same operand -+ order as mask_gather_load_optab. */ -+ e.rotate_inputs_left (0, 5); -+ machine_mode mem_mode = e.memory_vector_mode (); -+ return e.use_exact_insn (code_for_aarch64_ldff1_gather (mem_mode)); -+ } -+}; -+ -+/* Implements extending forms of svldff1_gather. */ -+class svldff1_gather_extend : public extending_load -+{ -+public: -+ CONSTEXPR svldff1_gather_extend (type_suffix_index memory_type) -+ : extending_load (memory_type) {} -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ /* See the block comment in aarch64-sve.md for details about the -+ FFR handling. */ -+ emit_insn (gen_aarch64_update_ffr_for_load ()); -+ -+ e.prepare_gather_address_operands (1); -+ /* Put the predicate last, since ldff1_gather uses the same operand -+ order as mask_gather_load_optab. */ -+ e.rotate_inputs_left (0, 5); -+ insn_code icode = code_for_aarch64_ldff1_gather (extend_rtx_code (), -+ e.vector_mode (0), -+ e.memory_vector_mode ()); -+ return e.use_exact_insn (icode); -+ } -+}; -+ -+class svldnt1_impl : public full_width_access -+{ -+public: -+ unsigned int -+ call_properties (const function_instance &) const OVERRIDE -+ { -+ return CP_READ_MEMORY; -+ } -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ insn_code icode = code_for_aarch64_ldnt1 (e.vector_mode (0)); -+ return e.use_contiguous_load_insn (icode); -+ } -+}; -+ -+/* Implements svldff1 and svldnf1. */ -+class svldxf1_impl : public full_width_access -+{ -+public: -+ CONSTEXPR svldxf1_impl (int unspec) : m_unspec (unspec) {} -+ -+ unsigned int -+ call_properties (const function_instance &) const OVERRIDE -+ { -+ return CP_READ_MEMORY | CP_READ_FFR | CP_WRITE_FFR; -+ } -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ /* See the block comment in aarch64-sve.md for details about the -+ FFR handling. */ -+ emit_insn (gen_aarch64_update_ffr_for_load ()); -+ -+ machine_mode mode = e.vector_mode (0); -+ return e.use_contiguous_load_insn (code_for_aarch64_ldf1 (m_unspec, mode)); -+ } -+ -+ /* The unspec associated with the load. */ -+ int m_unspec; -+}; -+ -+/* Implements extending contiguous forms of svldff1 and svldnf1. */ -+class svldxf1_extend_impl : public extending_load -+{ -+public: -+ CONSTEXPR svldxf1_extend_impl (type_suffix_index memory_type, int unspec) -+ : extending_load (memory_type), m_unspec (unspec) {} -+ -+ unsigned int -+ call_properties (const function_instance &) const OVERRIDE -+ { -+ return CP_READ_MEMORY | CP_READ_FFR | CP_WRITE_FFR; -+ } -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ /* See the block comment in aarch64-sve.md for details about the -+ FFR handling. */ -+ emit_insn (gen_aarch64_update_ffr_for_load ()); -+ -+ insn_code icode = code_for_aarch64_ldf1 (m_unspec, extend_rtx_code (), -+ e.vector_mode (0), -+ e.memory_vector_mode ()); -+ return e.use_contiguous_load_insn (icode); -+ } -+ -+ /* The unspec associated with the load. */ -+ int m_unspec; -+}; -+ -+class svlen_impl : public quiet -+{ -+public: -+ gimple * -+ fold (gimple_folder &f) const OVERRIDE -+ { -+ /* The argument only exists for its type. */ -+ tree rhs_type = TREE_TYPE (gimple_call_arg (f.call, 0)); -+ tree count = build_int_cstu (TREE_TYPE (f.lhs), -+ TYPE_VECTOR_SUBPARTS (rhs_type)); -+ return gimple_build_assign (f.lhs, count); -+ } -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ /* The argument only exists for its type. */ -+ return gen_int_mode (GET_MODE_NUNITS (e.vector_mode (0)), DImode); -+ } -+}; -+ -+class svmad_impl : public function_base -+{ -+public: -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ return expand_mad (e); -+ } -+}; -+ -+class svmla_impl : public function_base -+{ -+public: -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ /* Put the accumulator at the end (argument 3), but keep it as the -+ merge input for _m functions. */ -+ e.rotate_inputs_left (1, 4); -+ return expand_mad (e, 3); -+ } -+}; -+ -+/* Base class for svmla_lane and svmls_lane. */ -+class svmla_svmls_lane_impl : public function_base -+{ -+public: -+ CONSTEXPR svmla_svmls_lane_impl (int unspec) -+ : m_unspec (unspec) {} -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ /* Put the operands in the normal (fma ...) order, with the accumulator -+ last. This fits naturally since that's also the unprinted operand -+ in the asm output. */ -+ e.rotate_inputs_left (0, 4); -+ insn_code icode = code_for_aarch64_lane (m_unspec, e.vector_mode (0)); -+ return e.use_exact_insn (icode); -+ } -+ -+ /* The unspec code associated with the operation. */ -+ int m_unspec; -+}; -+ -+class svmls_impl : public function_base -+{ -+public: -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ /* Put the accumulator at the end (argument 3), but keep it as the -+ merge input for _m functions. */ -+ e.rotate_inputs_left (1, 4); -+ return expand_msb (e, 3); -+ } -+}; -+ -+class svmov_impl : public function_base -+{ -+public: -+ gimple * -+ fold (gimple_folder &f) const OVERRIDE -+ { -+ return gimple_build_assign (f.lhs, BIT_AND_EXPR, -+ gimple_call_arg (f.call, 0), -+ gimple_call_arg (f.call, 1)); -+ } -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ /* The canonical form for the assembler alias "MOV Pa.B, Pb/Z, Pc.B" -+ is "AND Pa.B, Pb/Z, Pc.B, Pc.B". */ -+ gcc_assert (e.pred == PRED_z); -+ e.args.quick_push (e.args[1]); -+ return e.use_exact_insn (CODE_FOR_aarch64_pred_andvnx16bi_z); -+ } -+}; -+ -+class svmmla_impl : public function_base -+{ -+public: -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ insn_code icode; -+ if (e.type_suffix (0).integer_p) -+ { -+ if (e.type_suffix (0).unsigned_p) -+ icode = code_for_aarch64_sve_add (UNSPEC_UMATMUL, e.vector_mode (0)); -+ else -+ icode = code_for_aarch64_sve_add (UNSPEC_SMATMUL, e.vector_mode (0)); -+ } -+ else -+ icode = code_for_aarch64_sve (UNSPEC_FMMLA, e.vector_mode (0)); -+ return e.use_exact_insn (icode); -+ } -+}; -+ -+class svmsb_impl : public function_base -+{ -+public: -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ return expand_msb (e); -+ } -+}; -+ -+class svnand_impl : public function_base -+{ -+public: -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ gcc_assert (e.pred == PRED_z); -+ return e.use_exact_insn (CODE_FOR_aarch64_pred_nandvnx16bi_z); -+ } -+}; -+ -+class svnor_impl : public function_base -+{ -+public: -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ gcc_assert (e.pred == PRED_z); -+ return e.use_exact_insn (CODE_FOR_aarch64_pred_norvnx16bi_z); -+ } -+}; -+ -+class svnot_impl : public rtx_code_function -+{ -+public: -+ CONSTEXPR svnot_impl () : rtx_code_function (NOT, NOT, -1) {} -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ if (e.type_suffix_ids[0] == TYPE_SUFFIX_b) -+ { -+ /* The canonical form for the assembler alias "NOT Pa.B, Pb/Z, Pc.B" -+ is "EOR Pa.B, Pb/Z, Pb.B, Pc.B". */ -+ gcc_assert (e.pred == PRED_z); -+ e.args.quick_insert (1, e.args[0]); -+ return e.use_exact_insn (CODE_FOR_aarch64_pred_xorvnx16bi_z); -+ } -+ return rtx_code_function::expand (e); -+ } -+}; -+ -+class svorn_impl : public function_base -+{ -+public: -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ gcc_assert (e.pred == PRED_z); -+ return e.use_exact_insn (CODE_FOR_aarch64_pred_ornvnx16bi_z); -+ } -+}; -+ -+class svpfalse_impl : public function_base -+{ -+public: -+ gimple * -+ fold (gimple_folder &f) const OVERRIDE -+ { -+ return f.fold_to_pfalse (); -+ } -+ -+ rtx -+ expand (function_expander &) const OVERRIDE -+ { -+ return CONST0_RTX (VNx16BImode); -+ } -+}; -+ -+/* Implements svpfirst and svpnext, which share the same .md patterns. */ -+class svpfirst_svpnext_impl : public function_base -+{ -+public: -+ CONSTEXPR svpfirst_svpnext_impl (int unspec) : m_unspec (unspec) {} -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ machine_mode mode = e.vector_mode (0); -+ e.add_ptrue_hint (0, mode); -+ return e.use_exact_insn (code_for_aarch64_sve (m_unspec, mode)); -+ } -+ -+ /* The unspec associated with the operation. */ -+ int m_unspec; -+}; -+ -+/* Implements contiguous forms of svprf[bhwd]. */ -+class svprf_bhwd_impl : public function_base -+{ -+public: -+ CONSTEXPR svprf_bhwd_impl (machine_mode mode) : m_mode (mode) {} -+ -+ unsigned int -+ call_properties (const function_instance &) const OVERRIDE -+ { -+ return CP_PREFETCH_MEMORY; -+ } -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ e.prepare_prefetch_operands (); -+ insn_code icode = code_for_aarch64_sve_prefetch (m_mode); -+ return e.use_contiguous_prefetch_insn (icode); -+ } -+ -+ /* The mode that we'd use to hold one vector of prefetched data. */ -+ machine_mode m_mode; -+}; -+ -+/* Implements svprf[bhwd]_gather. */ -+class svprf_bhwd_gather_impl : public function_base -+{ -+public: -+ CONSTEXPR svprf_bhwd_gather_impl (machine_mode mode) : m_mode (mode) {} -+ -+ unsigned int -+ call_properties (const function_instance &) const OVERRIDE -+ { -+ return CP_PREFETCH_MEMORY; -+ } -+ -+ machine_mode -+ memory_vector_mode (const function_instance &) const OVERRIDE -+ { -+ return m_mode; -+ } -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ e.prepare_prefetch_operands (); -+ e.prepare_gather_address_operands (1); -+ -+ /* Insert a zero operand to identify the mode of the memory being -+ accessed. This goes between the gather operands and prefetch -+ operands created above. */ -+ e.args.quick_insert (5, CONST0_RTX (m_mode)); -+ -+ machine_mode reg_mode = GET_MODE (e.args[2]); -+ insn_code icode = code_for_aarch64_sve_gather_prefetch (m_mode, reg_mode); -+ return e.use_exact_insn (icode); -+ } -+ -+ /* The mode that we'd use to hold one vector of prefetched data. */ -+ machine_mode m_mode; -+}; -+ -+/* Implements svptest_any, svptest_first and svptest_last. */ -+class svptest_impl : public function_base -+{ -+public: -+ CONSTEXPR svptest_impl (rtx_code compare) : m_compare (compare) {} -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ /* See whether GP is an exact ptrue for some predicate mode; -+ i.e. whether converting the GP to that mode will not drop -+ set bits and will leave all significant bits set. */ -+ machine_mode wide_mode; -+ int hint; -+ if (aarch64_ptrue_all_mode (e.args[0]).exists (&wide_mode)) -+ hint = SVE_KNOWN_PTRUE; -+ else -+ { -+ hint = SVE_MAYBE_NOT_PTRUE; -+ wide_mode = VNx16BImode; -+ } -+ -+ /* Generate the PTEST itself. */ -+ rtx pg = force_reg (VNx16BImode, e.args[0]); -+ rtx wide_pg = gen_lowpart (wide_mode, pg); -+ rtx hint_rtx = gen_int_mode (hint, DImode); -+ rtx op = force_reg (wide_mode, gen_lowpart (wide_mode, e.args[1])); -+ emit_insn (gen_aarch64_ptestvnx16bi (pg, wide_pg, hint_rtx, op)); -+ -+ /* Get the location of the boolean result. We can provide SImode and -+ DImode values directly; rely on generic code to convert others. */ -+ rtx target = e.possible_target; -+ if (!target -+ || !REG_P (target) -+ || (GET_MODE (target) != SImode && GET_MODE (target) != DImode)) -+ target = gen_reg_rtx (DImode); -+ -+ /* Generate a CSET to convert the CC result of the PTEST to a boolean. */ -+ rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM); -+ rtx compare = gen_rtx_fmt_ee (m_compare, GET_MODE (target), -+ cc_reg, const0_rtx); -+ emit_insn (gen_rtx_SET (target, compare)); -+ return target; -+ } -+ -+ /* The comparison code associated with ptest condition. */ -+ rtx_code m_compare; -+}; -+ -+class svptrue_impl : public function_base -+{ -+public: -+ gimple * -+ fold (gimple_folder &f) const OVERRIDE -+ { -+ return f.fold_to_ptrue (); -+ } -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ return aarch64_ptrue_all (e.type_suffix (0).element_bytes); -+ } -+}; -+ -+class svptrue_pat_impl : public function_base -+{ -+public: -+ gimple * -+ fold (gimple_folder &f) const OVERRIDE -+ { -+ tree pattern_arg = gimple_call_arg (f.call, 0); -+ aarch64_svpattern pattern = (aarch64_svpattern) tree_to_shwi (pattern_arg); -+ -+ if (pattern == AARCH64_SV_ALL) -+ /* svptrue_pat_bN (SV_ALL) == svptrue_bN (). */ -+ return f.fold_to_ptrue (); -+ -+ /* See whether we can count the number of elements in the pattern -+ at compile time. If so, construct a predicate with that number -+ of 1s followed by all 0s. */ -+ int nelts_per_vq = f.elements_per_vq (0); -+ HOST_WIDE_INT value = aarch64_fold_sve_cnt_pat (pattern, nelts_per_vq); -+ if (value >= 0) -+ return f.fold_to_vl_pred (value); -+ -+ return NULL; -+ } -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ /* In rtl, the predicate is represented as the constant: -+ -+ (const:V16BI (unspec:V16BI [(const_int PATTERN) -+ (const_vector:VnnBI [zeros])] -+ UNSPEC_PTRUE)) -+ -+ where nn determines the element size. */ -+ rtvec vec = gen_rtvec (2, e.args[0], CONST0_RTX (e.vector_mode (0))); -+ return gen_rtx_CONST (VNx16BImode, -+ gen_rtx_UNSPEC (VNx16BImode, vec, UNSPEC_PTRUE)); -+ } -+}; -+ -+class svqadd_impl : public function_base -+{ -+public: -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ return e.expand_signed_unpred_op (SS_PLUS, US_PLUS); -+ } -+}; -+ -+/* Implements svqdec[bhwd]{,_pat} and svqinc[bhwd]{,_pat}. */ -+class svqdec_svqinc_bhwd_impl : public function_base -+{ -+public: -+ CONSTEXPR svqdec_svqinc_bhwd_impl (rtx_code code_for_sint, -+ rtx_code code_for_uint, -+ scalar_int_mode elem_mode) -+ : m_code_for_sint (code_for_sint), -+ m_code_for_uint (code_for_uint), -+ m_elem_mode (elem_mode) -+ {} -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ /* Treat non-_pat functions in the same way as _pat functions with -+ an SV_ALL argument. */ -+ if (e.args.length () == 2) -+ e.args.quick_insert (1, gen_int_mode (AARCH64_SV_ALL, DImode)); -+ -+ /* Insert the number of elements per 128-bit block as a fake argument, -+ between the pattern and the multiplier. Arguments 1, 2 and 3 then -+ correspond exactly with the 3 UNSPEC_SVE_CNT_PAT operands; see -+ aarch64_sve_cnt_pat for details. */ -+ unsigned int elements_per_vq = 128 / GET_MODE_BITSIZE (m_elem_mode); -+ e.args.quick_insert (2, gen_int_mode (elements_per_vq, DImode)); -+ -+ rtx_code code = (e.type_suffix (0).unsigned_p -+ ? m_code_for_uint -+ : m_code_for_sint); -+ -+ /* Choose between operating on integer scalars or integer vectors. */ -+ machine_mode mode = e.vector_mode (0); -+ if (e.mode_suffix_id == MODE_n) -+ mode = GET_MODE_INNER (mode); -+ return e.use_exact_insn (code_for_aarch64_sve_pat (code, mode)); -+ } -+ -+ /* The saturating addition or subtraction codes to use for signed and -+ unsigned values respectively. */ -+ rtx_code m_code_for_sint; -+ rtx_code m_code_for_uint; -+ -+ /* The integer mode associated with the [bhwd] suffix. */ -+ scalar_int_mode m_elem_mode; -+}; -+ -+/* Implements svqdec[bhwd]{,_pat}. */ -+class svqdec_bhwd_impl : public svqdec_svqinc_bhwd_impl -+{ -+public: -+ CONSTEXPR svqdec_bhwd_impl (scalar_int_mode elem_mode) -+ : svqdec_svqinc_bhwd_impl (SS_MINUS, US_MINUS, elem_mode) {} -+}; -+ -+/* Implements svqinc[bhwd]{,_pat}. */ -+class svqinc_bhwd_impl : public svqdec_svqinc_bhwd_impl -+{ -+public: -+ CONSTEXPR svqinc_bhwd_impl (scalar_int_mode elem_mode) -+ : svqdec_svqinc_bhwd_impl (SS_PLUS, US_PLUS, elem_mode) {} -+}; -+ -+/* Implements svqdecp and svqincp. */ -+class svqdecp_svqincp_impl : public function_base -+{ -+public: -+ CONSTEXPR svqdecp_svqincp_impl (rtx_code code_for_sint, -+ rtx_code code_for_uint) -+ : m_code_for_sint (code_for_sint), -+ m_code_for_uint (code_for_uint) -+ {} -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ rtx_code code = (e.type_suffix (0).unsigned_p -+ ? m_code_for_uint -+ : m_code_for_sint); -+ insn_code icode; -+ if (e.mode_suffix_id == MODE_n) -+ { -+ /* Increment or decrement a scalar (whose mode is given by the first -+ type suffix) by the number of active elements in a predicate -+ (whose mode is given by the second type suffix). */ -+ machine_mode mode = GET_MODE_INNER (e.vector_mode (0)); -+ icode = code_for_aarch64_sve_cntp (code, mode, e.vector_mode (1)); -+ } -+ else -+ /* Increment a vector by the number of active elements in a predicate, -+ with the vector mode determining the predicate mode. */ -+ icode = code_for_aarch64_sve_cntp (code, e.vector_mode (0)); -+ return e.use_exact_insn (icode); -+ } -+ -+ /* The saturating addition or subtraction codes to use for signed and -+ unsigned values respectively. */ -+ rtx_code m_code_for_sint; -+ rtx_code m_code_for_uint; -+}; -+ -+class svqsub_impl : public function_base -+{ -+public: -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ return e.expand_signed_unpred_op (SS_MINUS, US_MINUS); -+ } -+}; -+ -+class svrdffr_impl : public function_base -+{ -+public: -+ unsigned int -+ call_properties (const function_instance &) const OVERRIDE -+ { -+ return CP_READ_FFR; -+ } -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ /* See the block comment in aarch64-sve.md for details about the -+ FFR handling. */ -+ emit_insn (gen_aarch64_copy_ffr_to_ffrt ()); -+ rtx result = e.use_exact_insn (e.pred == PRED_z -+ ? CODE_FOR_aarch64_rdffr_z -+ : CODE_FOR_aarch64_rdffr); -+ emit_insn (gen_aarch64_update_ffrt ()); -+ return result; -+ } -+}; -+ -+class svreinterpret_impl : public quiet -+{ -+public: -+ gimple * -+ fold (gimple_folder &f) const OVERRIDE -+ { -+ /* Punt to rtl if the effect of the reinterpret on registers does not -+ conform to GCC's endianness model. */ -+ if (!targetm.can_change_mode_class (f.vector_mode (0), -+ f.vector_mode (1), FP_REGS)) -+ return NULL; -+ -+ /* Otherwise svreinterpret corresponds directly to a VIEW_CONVERT_EXPR -+ reinterpretation. */ -+ tree rhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (f.lhs), -+ gimple_call_arg (f.call, 0)); -+ return gimple_build_assign (f.lhs, VIEW_CONVERT_EXPR, rhs); -+ } -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ machine_mode mode = e.vector_mode (0); -+ return e.use_exact_insn (code_for_aarch64_sve_reinterpret (mode)); -+ } -+}; -+ -+class svrev_impl : public permute -+{ -+public: -+ gimple * -+ fold (gimple_folder &f) const OVERRIDE -+ { -+ /* Punt for now on _b16 and wider; we'd need more complex evpc logic -+ to rerecognize the result. */ -+ if (f.type_suffix (0).bool_p && f.type_suffix (0).element_bits > 8) -+ return NULL; -+ -+ /* Permute as { nelts - 1, nelts - 2, nelts - 3, ... }. */ -+ poly_int64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs)); -+ vec_perm_builder builder (nelts, 1, 3); -+ for (int i = 0; i < 3; ++i) -+ builder.quick_push (nelts - i - 1); -+ return fold_permute (f, builder); -+ } -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ return e.use_exact_insn (code_for_aarch64_sve_rev (e.vector_mode (0))); -+ } -+}; -+ -+class svsel_impl : public quiet -+{ -+public: -+ gimple * -+ fold (gimple_folder &f) const OVERRIDE -+ { -+ /* svsel corresponds exactly to VEC_COND_EXPR. */ -+ gimple_seq stmts = NULL; -+ tree pred = f.convert_pred (stmts, f.vector_type (0), 0); -+ gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT); -+ return gimple_build_assign (f.lhs, VEC_COND_EXPR, pred, -+ gimple_call_arg (f.call, 1), -+ gimple_call_arg (f.call, 2)); -+ } -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ /* svsel (cond, truev, falsev) is vcond_mask (truev, falsev, cond). */ -+ e.rotate_inputs_left (0, 3); -+ insn_code icode = convert_optab_handler (vcond_mask_optab, -+ e.vector_mode (0), -+ e.gp_mode (0)); -+ return e.use_exact_insn (icode); -+ } -+}; -+ -+/* Implements svset2, svset3 and svset4. */ -+class svset_impl : public quiet -+{ -+public: -+ CONSTEXPR svset_impl (unsigned int vectors_per_tuple) -+ : quiet (vectors_per_tuple) {} -+ -+ gimple * -+ fold (gimple_folder &f) const OVERRIDE -+ { -+ tree rhs_tuple = gimple_call_arg (f.call, 0); -+ tree index = gimple_call_arg (f.call, 1); -+ tree rhs_vector = gimple_call_arg (f.call, 2); -+ -+ /* Replace the call with two statements: a copy of the full tuple -+ to the call result, followed by an update of the individual vector. -+ -+ The fold routines expect the replacement statement to have the -+ same lhs as the original call, so return the copy statement -+ rather than the field update. */ -+ gassign *copy = gimple_build_assign (unshare_expr (f.lhs), rhs_tuple); -+ -+ /* Get a reference to the individual vector. */ -+ tree field = tuple_type_field (TREE_TYPE (f.lhs)); -+ tree lhs_array = build3 (COMPONENT_REF, TREE_TYPE (field), -+ f.lhs, field, NULL_TREE); -+ tree lhs_vector = build4 (ARRAY_REF, TREE_TYPE (rhs_vector), -+ lhs_array, index, NULL_TREE, NULL_TREE); -+ gassign *update = gimple_build_assign (lhs_vector, rhs_vector); -+ gsi_insert_after (f.gsi, update, GSI_SAME_STMT); -+ -+ return copy; -+ } -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ rtx rhs_tuple = e.args[0]; -+ unsigned int index = INTVAL (e.args[1]); -+ rtx rhs_vector = e.args[2]; -+ -+ /* First copy the full tuple to the target register. */ -+ rtx lhs_tuple = e.get_nonoverlapping_reg_target (); -+ emit_move_insn (lhs_tuple, rhs_tuple); -+ -+ /* ...then update the individual vector. */ -+ rtx lhs_vector = simplify_gen_subreg (GET_MODE (rhs_vector), -+ lhs_tuple, GET_MODE (lhs_tuple), -+ index * BYTES_PER_SVE_VECTOR); -+ emit_move_insn (lhs_vector, rhs_vector); -+ return lhs_vector; -+ } -+}; -+ -+class svsetffr_impl : public function_base -+{ -+public: -+ unsigned int -+ call_properties (const function_instance &) const OVERRIDE -+ { -+ return CP_WRITE_FFR; -+ } -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ e.args.quick_push (CONSTM1_RTX (VNx16BImode)); -+ return e.use_exact_insn (CODE_FOR_aarch64_wrffr); -+ } -+}; -+ -+class svst1_impl : public full_width_access -+{ -+public: -+ unsigned int -+ call_properties (const function_instance &) const OVERRIDE -+ { -+ return CP_WRITE_MEMORY; -+ } -+ -+ gimple * -+ fold (gimple_folder &f) const OVERRIDE -+ { -+ tree vectype = f.vector_type (0); -+ -+ /* Get the predicate and base pointer. */ -+ gimple_seq stmts = NULL; -+ tree pred = f.convert_pred (stmts, vectype, 0); -+ tree base = f.fold_contiguous_base (stmts, vectype); -+ gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT); -+ -+ tree cookie = f.load_store_cookie (TREE_TYPE (vectype)); -+ tree rhs = gimple_call_arg (f.call, gimple_call_num_args (f.call) - 1); -+ return gimple_build_call_internal (IFN_MASK_STORE, 4, -+ base, cookie, pred, rhs); -+ } -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ insn_code icode = convert_optab_handler (maskstore_optab, -+ e.vector_mode (0), e.gp_mode (0)); -+ return e.use_contiguous_store_insn (icode); -+ } -+}; -+ -+class svst1_scatter_impl : public full_width_access -+{ -+public: -+ unsigned int -+ call_properties (const function_instance &) const OVERRIDE -+ { -+ return CP_WRITE_MEMORY; -+ } -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ e.prepare_gather_address_operands (1); -+ /* Put the predicate last, as required by mask_scatter_store_optab. */ -+ e.rotate_inputs_left (0, 6); -+ insn_code icode = direct_optab_handler (mask_scatter_store_optab, -+ e.memory_vector_mode ()); -+ return e.use_exact_insn (icode); -+ } -+}; -+ -+/* Implements truncating forms of svst1_scatter. */ -+class svst1_scatter_truncate_impl : public truncating_store -+{ -+public: -+ CONSTEXPR svst1_scatter_truncate_impl (scalar_int_mode to_mode) -+ : truncating_store (to_mode) {} -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ e.prepare_gather_address_operands (1); -+ /* Put the predicate last, since the truncating scatters use the same -+ operand order as mask_scatter_store_optab. */ -+ e.rotate_inputs_left (0, 6); -+ insn_code icode = code_for_aarch64_scatter_store_trunc -+ (e.memory_vector_mode (), e.vector_mode (0)); -+ return e.use_exact_insn (icode); -+ } -+}; -+ -+/* Implements truncating contiguous forms of svst1. */ -+class svst1_truncate_impl : public truncating_store -+{ -+public: -+ CONSTEXPR svst1_truncate_impl (scalar_int_mode to_mode) -+ : truncating_store (to_mode) {} -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ insn_code icode = code_for_aarch64_store_trunc (e.memory_vector_mode (), -+ e.vector_mode (0)); -+ return e.use_contiguous_store_insn (icode); -+ } -+}; -+ -+/* Implements svst2, svst3 and svst4. */ -+class svst234_impl : public full_width_access -+{ -+public: -+ CONSTEXPR svst234_impl (unsigned int vectors_per_tuple) -+ : full_width_access (vectors_per_tuple) {} -+ -+ unsigned int -+ call_properties (const function_instance &) const OVERRIDE -+ { -+ return CP_WRITE_MEMORY; -+ } -+ -+ gimple * -+ fold (gimple_folder &f) const OVERRIDE -+ { -+ tree vectype = f.vector_type (0); -+ -+ /* Get the predicate and base pointer. */ -+ gimple_seq stmts = NULL; -+ tree pred = f.convert_pred (stmts, vectype, 0); -+ tree base = f.fold_contiguous_base (stmts, vectype); -+ gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT); -+ -+ /* View the stored data as an array of vectors. */ -+ unsigned int num_args = gimple_call_num_args (f.call); -+ tree rhs_tuple = gimple_call_arg (f.call, num_args - 1); -+ tree field = tuple_type_field (TREE_TYPE (rhs_tuple)); -+ tree rhs_array = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (field), rhs_tuple); -+ -+ tree cookie = f.load_store_cookie (TREE_TYPE (vectype)); -+ return gimple_build_call_internal (IFN_MASK_STORE_LANES, 4, -+ base, cookie, pred, rhs_array); -+ } -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ machine_mode tuple_mode = GET_MODE (e.args.last ()); -+ insn_code icode = convert_optab_handler (vec_mask_store_lanes_optab, -+ tuple_mode, e.vector_mode (0)); -+ return e.use_contiguous_store_insn (icode); -+ } -+}; -+ -+class svstnt1_impl : public full_width_access -+{ -+public: -+ unsigned int -+ call_properties (const function_instance &) const OVERRIDE -+ { -+ return CP_WRITE_MEMORY; -+ } -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ insn_code icode = code_for_aarch64_stnt1 (e.vector_mode (0)); -+ return e.use_contiguous_store_insn (icode); -+ } -+}; -+ -+class svsub_impl : public rtx_code_function -+{ -+public: -+ CONSTEXPR svsub_impl () -+ : rtx_code_function (MINUS, MINUS, UNSPEC_COND_FSUB) {} -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ /* Canonicalize subtractions of constants to additions. */ -+ machine_mode mode = e.vector_mode (0); -+ if (e.try_negating_argument (2, mode)) -+ return e.map_to_rtx_codes (PLUS, PLUS, UNSPEC_COND_FADD); -+ -+ return rtx_code_function::expand (e); -+ } -+}; -+ -+class svtbl_impl : public permute -+{ -+public: -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ return e.use_exact_insn (code_for_aarch64_sve_tbl (e.vector_mode (0))); -+ } -+}; -+ -+/* Implements svtrn1 and svtrn2. */ -+class svtrn_impl : public binary_permute -+{ -+public: -+ CONSTEXPR svtrn_impl (int base) -+ : binary_permute (base ? UNSPEC_TRN2 : UNSPEC_TRN1), m_base (base) {} -+ -+ gimple * -+ fold (gimple_folder &f) const OVERRIDE -+ { -+ /* svtrn1: { 0, nelts, 2, nelts + 2, 4, nelts + 4, ... } -+ svtrn2: as for svtrn1, but with 1 added to each index. */ -+ poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs)); -+ vec_perm_builder builder (nelts, 2, 3); -+ for (unsigned int i = 0; i < 3; ++i) -+ { -+ builder.quick_push (m_base + i * 2); -+ builder.quick_push (m_base + i * 2 + nelts); -+ } -+ return fold_permute (f, builder); -+ } -+ -+ /* 0 for svtrn1, 1 for svtrn2. */ -+ unsigned int m_base; -+}; -+ -+/* Base class for svundef{,2,3,4}. */ -+class svundef_impl : public quiet -+{ -+public: -+ CONSTEXPR svundef_impl (unsigned int vectors_per_tuple) -+ : quiet (vectors_per_tuple) {} -+ -+ gimple * -+ fold (gimple_folder &f) const OVERRIDE -+ { -+ /* Don't fold svundef at the gimple level. There's no exact -+ correspondence for SSA_NAMEs, and we explicitly don't want -+ to generate a specific value (like an all-zeros vector). */ -+ if (vectors_per_tuple () == 1) -+ return NULL; -+ return gimple_build_assign (f.lhs, build_clobber (TREE_TYPE (f.lhs))); -+ } -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ rtx target = e.get_reg_target (); -+ emit_clobber (copy_rtx (target)); -+ return target; -+ } -+}; -+ -+/* Implements svunpklo and svunpkhi. */ -+class svunpk_impl : public quiet -+{ -+public: -+ CONSTEXPR svunpk_impl (bool high_p) : m_high_p (high_p) {} -+ -+ gimple * -+ fold (gimple_folder &f) const OVERRIDE -+ { -+ /* Don't fold the predicate ops, since every bit of the svbool_t -+ result is significant. */ -+ if (f.type_suffix_ids[0] == TYPE_SUFFIX_b) -+ return NULL; -+ -+ /* The first half in memory is VEC_UNPACK_LO_EXPR for little-endian -+ and VEC_UNPACK_HI_EXPR for big-endian. */ -+ bool high_p = BYTES_BIG_ENDIAN ? !m_high_p : m_high_p; -+ tree_code code = high_p ? VEC_UNPACK_HI_EXPR : VEC_UNPACK_LO_EXPR; -+ return gimple_build_assign (f.lhs, code, gimple_call_arg (f.call, 0)); -+ } -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ machine_mode mode = GET_MODE (e.args[0]); -+ unsigned int unpacku = m_high_p ? UNSPEC_UNPACKUHI : UNSPEC_UNPACKULO; -+ unsigned int unpacks = m_high_p ? UNSPEC_UNPACKSHI : UNSPEC_UNPACKSLO; -+ insn_code icode; -+ if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL) -+ icode = code_for_aarch64_sve_punpk (unpacku, mode); -+ else -+ { -+ int unspec = e.type_suffix (0).unsigned_p ? unpacku : unpacks; -+ icode = code_for_aarch64_sve_unpk (unspec, unspec, mode); -+ } -+ return e.use_exact_insn (icode); -+ } -+ -+ /* True for svunpkhi, false for svunpklo. */ -+ bool m_high_p; -+}; -+ -+/* Also implements svsudot. */ -+class svusdot_impl : public function_base -+{ -+public: -+ CONSTEXPR svusdot_impl (bool su) : m_su (su) {} -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ /* The implementation of the ACLE function svsudot (for the non-lane -+ version) is through the USDOT instruction but with the second and third -+ inputs swapped. */ -+ if (m_su) -+ e.rotate_inputs_left (1, 2); -+ /* The ACLE function has the same order requirements as for svdot. -+ While there's no requirement for the RTL pattern to have the same sort -+ of order as that for dot_prod, it's easier to read. -+ Hence we do the same rotation on arguments as svdot_impl does. */ -+ e.rotate_inputs_left (0, 3); -+ machine_mode mode = e.vector_mode (0); -+ insn_code icode = code_for_aarch64_dot_prod (UNSPEC_USDOT, mode); -+ return e.use_exact_insn (icode); -+ } -+ -+private: -+ bool m_su; -+}; -+ -+/* Implements svuzp1 and svuzp2. */ -+class svuzp_impl : public binary_permute -+{ -+public: -+ CONSTEXPR svuzp_impl (unsigned int base) -+ : binary_permute (base ? UNSPEC_UZP2 : UNSPEC_UZP1), m_base (base) {} -+ -+ gimple * -+ fold (gimple_folder &f) const OVERRIDE -+ { -+ /* svuzp1: { 0, 2, 4, 6, ... } -+ svuzp2: { 1, 3, 5, 7, ... }. */ -+ poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs)); -+ vec_perm_builder builder (nelts, 1, 3); -+ for (unsigned int i = 0; i < 3; ++i) -+ builder.quick_push (m_base + i * 2); -+ return fold_permute (f, builder); -+ } -+ -+ /* 0 for svuzp1, 1 for svuzp2. */ -+ unsigned int m_base; -+}; -+ -+/* A function_base for svwhilele and svwhilelt functions. */ -+class svwhile_impl : public function_base -+{ -+public: -+ CONSTEXPR svwhile_impl (int unspec_for_sint, int unspec_for_uint, bool eq_p) -+ : m_unspec_for_sint (unspec_for_sint), -+ m_unspec_for_uint (unspec_for_uint), m_eq_p (eq_p) -+ {} -+ -+ /* Try to fold a call by treating its arguments as constants of type T. */ -+ template -+ gimple * -+ fold_type (gimple_folder &f) const -+ { -+ /* Only handle cases in which both operands are constant. */ -+ T arg0, arg1; -+ if (!poly_int_tree_p (gimple_call_arg (f.call, 0), &arg0) -+ || !poly_int_tree_p (gimple_call_arg (f.call, 1), &arg1)) -+ return NULL; -+ -+ /* Check whether the result is known to be all-false. */ -+ if (m_eq_p ? known_gt (arg0, arg1) : known_ge (arg0, arg1)) -+ return f.fold_to_pfalse (); -+ -+ /* Punt if we can't tell at compile time whether the result -+ is all-false. */ -+ if (m_eq_p ? maybe_gt (arg0, arg1) : maybe_ge (arg0, arg1)) -+ return NULL; -+ -+ /* At this point we know the result has at least one set element. */ -+ poly_uint64 diff = arg1 - arg0; -+ poly_uint64 nelts = GET_MODE_NUNITS (f.vector_mode (0)); -+ -+ /* Canonicalize the svwhilele form to the svwhilelt form. Subtract -+ from NELTS rather than adding to DIFF, to prevent overflow. */ -+ if (m_eq_p) -+ nelts -= 1; -+ -+ /* Check whether the result is known to be all-true. */ -+ if (known_ge (diff, nelts)) -+ return f.fold_to_ptrue (); -+ -+ /* Punt if DIFF might not be the actual number of set elements -+ in the result. Conditional equality is fine. */ -+ if (maybe_gt (diff, nelts)) -+ return NULL; -+ -+ /* At this point we know that the predicate will have DIFF set elements -+ for svwhilelt and DIFF + 1 set elements for svwhilele (which stops -+ after rather than before ARG1 is reached). See if we can create -+ the predicate at compile time. */ -+ unsigned HOST_WIDE_INT vl; -+ if (diff.is_constant (&vl)) -+ /* Overflow is no longer possible after the checks above. */ -+ return f.fold_to_vl_pred (m_eq_p ? vl + 1 : vl); -+ -+ return NULL; -+ } -+ -+ gimple * -+ fold (gimple_folder &f) const OVERRIDE -+ { -+ if (f.type_suffix (1).unsigned_p) -+ return fold_type (f); -+ else -+ return fold_type (f); -+ } -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ /* Suffix 0 determines the predicate mode, suffix 1 determines the -+ scalar mode and signedness. */ -+ int unspec = (e.type_suffix (1).unsigned_p -+ ? m_unspec_for_uint -+ : m_unspec_for_sint); -+ machine_mode pred_mode = e.vector_mode (0); -+ scalar_mode reg_mode = GET_MODE_INNER (e.vector_mode (1)); -+ return e.use_exact_insn (code_for_while (unspec, reg_mode, pred_mode)); -+ } -+ -+ /* The unspec codes associated with signed and unsigned operations -+ respectively. */ -+ int m_unspec_for_sint; -+ int m_unspec_for_uint; -+ -+ /* True svwhilele, false for svwhilelt. */ -+ bool m_eq_p; -+}; -+ -+class svwrffr_impl : public function_base -+{ -+public: -+ unsigned int -+ call_properties (const function_instance &) const OVERRIDE -+ { -+ return CP_WRITE_FFR; -+ } -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ return e.use_exact_insn (CODE_FOR_aarch64_wrffr); -+ } -+}; -+ -+/* Implements svzip1 and svzip2. */ -+class svzip_impl : public binary_permute -+{ -+public: -+ CONSTEXPR svzip_impl (unsigned int base) -+ : binary_permute (base ? UNSPEC_ZIP2 : UNSPEC_ZIP1), m_base (base) {} -+ -+ gimple * -+ fold (gimple_folder &f) const OVERRIDE -+ { -+ /* svzip1: { 0, nelts, 1, nelts + 1, 2, nelts + 2, ... } -+ svzip2: as for svzip1, but with nelts / 2 added to each index. */ -+ poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs)); -+ poly_uint64 base = m_base * exact_div (nelts, 2); -+ vec_perm_builder builder (nelts, 2, 3); -+ for (unsigned int i = 0; i < 3; ++i) -+ { -+ builder.quick_push (base + i); -+ builder.quick_push (base + i + nelts); -+ } -+ return fold_permute (f, builder); -+ } -+ -+ /* 0 for svzip1, 1 for svzip2. */ -+ unsigned int m_base; -+}; -+ -+} /* end anonymous namespace */ -+ -+namespace aarch64_sve { -+ -+FUNCTION (svabd, svabd_impl,) -+FUNCTION (svabs, quiet, (ABS, ABS, UNSPEC_COND_FABS)) -+FUNCTION (svacge, svac_impl, (UNSPEC_COND_FCMGE)) -+FUNCTION (svacgt, svac_impl, (UNSPEC_COND_FCMGT)) -+FUNCTION (svacle, svac_impl, (UNSPEC_COND_FCMLE)) -+FUNCTION (svaclt, svac_impl, (UNSPEC_COND_FCMLT)) -+FUNCTION (svadd, rtx_code_function, (PLUS, PLUS, UNSPEC_COND_FADD)) -+FUNCTION (svadda, svadda_impl,) -+FUNCTION (svaddv, reduction, (UNSPEC_SADDV, UNSPEC_UADDV, UNSPEC_FADDV)) -+FUNCTION (svadrb, svadr_bhwd_impl, (0)) -+FUNCTION (svadrd, svadr_bhwd_impl, (3)) -+FUNCTION (svadrh, svadr_bhwd_impl, (1)) -+FUNCTION (svadrw, svadr_bhwd_impl, (2)) -+FUNCTION (svand, rtx_code_function, (AND, AND)) -+FUNCTION (svandv, reduction, (UNSPEC_ANDV)) -+FUNCTION (svasr, rtx_code_function, (ASHIFTRT, ASHIFTRT)) -+FUNCTION (svasr_wide, shift_wide, (ASHIFTRT, UNSPEC_ASHIFTRT_WIDE)) -+FUNCTION (svasrd, svasrd_impl,) -+FUNCTION (svbfdot, fixed_insn_function, (CODE_FOR_aarch64_sve_bfdotvnx4sf)) -+FUNCTION (svbfdot_lane, fixed_insn_function, -+ (CODE_FOR_aarch64_sve_bfdot_lanevnx4sf)) -+FUNCTION (svbfmlalb, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmlalbvnx4sf)) -+FUNCTION (svbfmlalb_lane, fixed_insn_function, -+ (CODE_FOR_aarch64_sve_bfmlalb_lanevnx4sf)) -+FUNCTION (svbfmlalt, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmlaltvnx4sf)) -+FUNCTION (svbfmlalt_lane, fixed_insn_function, -+ (CODE_FOR_aarch64_sve_bfmlalt_lanevnx4sf)) -+FUNCTION (svbfmmla, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmmlavnx4sf)) -+FUNCTION (svbic, svbic_impl,) -+FUNCTION (svbrka, svbrk_unary_impl, (UNSPEC_BRKA)) -+FUNCTION (svbrkb, svbrk_unary_impl, (UNSPEC_BRKB)) -+FUNCTION (svbrkn, svbrk_binary_impl, (UNSPEC_BRKN)) -+FUNCTION (svbrkpa, svbrk_binary_impl, (UNSPEC_BRKPA)) -+FUNCTION (svbrkpb, svbrk_binary_impl, (UNSPEC_BRKPB)) -+FUNCTION (svcadd, svcadd_impl,) -+FUNCTION (svclasta, svclast_impl, (UNSPEC_CLASTA)) -+FUNCTION (svclastb, svclast_impl, (UNSPEC_CLASTB)) -+FUNCTION (svcls, unary_count, (CLRSB)) -+FUNCTION (svclz, unary_count, (CLZ)) -+FUNCTION (svcmla, svcmla_impl,) -+FUNCTION (svcmla_lane, svcmla_lane_impl,) -+FUNCTION (svcmpeq, svcmp_impl, (EQ_EXPR, UNSPEC_COND_FCMEQ)) -+FUNCTION (svcmpeq_wide, svcmp_wide_impl, (EQ_EXPR, UNSPEC_COND_CMPEQ_WIDE, -+ UNSPEC_COND_CMPEQ_WIDE)) -+FUNCTION (svcmpge, svcmp_impl, (GE_EXPR, UNSPEC_COND_FCMGE)) -+FUNCTION (svcmpge_wide, svcmp_wide_impl, (GE_EXPR, UNSPEC_COND_CMPGE_WIDE, -+ UNSPEC_COND_CMPHS_WIDE)) -+FUNCTION (svcmpgt, svcmp_impl, (GT_EXPR, UNSPEC_COND_FCMGT)) -+FUNCTION (svcmpgt_wide, svcmp_wide_impl, (GT_EXPR, UNSPEC_COND_CMPGT_WIDE, -+ UNSPEC_COND_CMPHI_WIDE)) -+FUNCTION (svcmple, svcmp_impl, (LE_EXPR, UNSPEC_COND_FCMLE)) -+FUNCTION (svcmple_wide, svcmp_wide_impl, (LE_EXPR, UNSPEC_COND_CMPLE_WIDE, -+ UNSPEC_COND_CMPLS_WIDE)) -+FUNCTION (svcmplt, svcmp_impl, (LT_EXPR, UNSPEC_COND_FCMLT)) -+FUNCTION (svcmplt_wide, svcmp_wide_impl, (LT_EXPR, UNSPEC_COND_CMPLT_WIDE, -+ UNSPEC_COND_CMPLO_WIDE)) -+FUNCTION (svcmpne, svcmp_impl, (NE_EXPR, UNSPEC_COND_FCMNE)) -+FUNCTION (svcmpne_wide, svcmp_wide_impl, (NE_EXPR, UNSPEC_COND_CMPNE_WIDE, -+ UNSPEC_COND_CMPNE_WIDE)) -+FUNCTION (svcmpuo, svcmpuo_impl,) -+FUNCTION (svcnot, svcnot_impl,) -+FUNCTION (svcnt, unary_count, (POPCOUNT)) -+FUNCTION (svcntb, svcnt_bhwd_impl, (VNx16QImode)) -+FUNCTION (svcntb_pat, svcnt_bhwd_pat_impl, (VNx16QImode)) -+FUNCTION (svcntd, svcnt_bhwd_impl, (VNx2DImode)) -+FUNCTION (svcntd_pat, svcnt_bhwd_pat_impl, (VNx2DImode)) -+FUNCTION (svcnth, svcnt_bhwd_impl, (VNx8HImode)) -+FUNCTION (svcnth_pat, svcnt_bhwd_pat_impl, (VNx8HImode)) -+FUNCTION (svcntp, svcntp_impl,) -+FUNCTION (svcntw, svcnt_bhwd_impl, (VNx4SImode)) -+FUNCTION (svcntw_pat, svcnt_bhwd_pat_impl, (VNx4SImode)) -+FUNCTION (svcompact, QUIET_CODE_FOR_MODE0 (aarch64_sve_compact),) -+FUNCTION (svcreate2, svcreate_impl, (2)) -+FUNCTION (svcreate3, svcreate_impl, (3)) -+FUNCTION (svcreate4, svcreate_impl, (4)) -+FUNCTION (svcvt, svcvt_impl,) -+FUNCTION (svcvtnt, CODE_FOR_MODE0 (aarch64_sve_cvtnt),) -+FUNCTION (svdiv, rtx_code_function, (DIV, UDIV, UNSPEC_COND_FDIV)) -+FUNCTION (svdivr, rtx_code_function_rotated, (DIV, UDIV, UNSPEC_COND_FDIV)) -+FUNCTION (svdot, svdot_impl,) -+FUNCTION (svdot_lane, svdotprod_lane_impl, (UNSPEC_SDOT, UNSPEC_UDOT, -1)) -+FUNCTION (svdup, svdup_impl,) -+FUNCTION (svdup_lane, svdup_lane_impl,) -+FUNCTION (svdupq, svdupq_impl,) -+FUNCTION (svdupq_lane, svdupq_lane_impl,) -+FUNCTION (sveor, rtx_code_function, (XOR, XOR, -1)) -+FUNCTION (sveorv, reduction, (UNSPEC_XORV)) -+FUNCTION (svexpa, unspec_based_function, (-1, -1, UNSPEC_FEXPA)) -+FUNCTION (svext, QUIET_CODE_FOR_MODE0 (aarch64_sve_ext),) -+FUNCTION (svextb, svext_bhw_impl, (QImode)) -+FUNCTION (svexth, svext_bhw_impl, (HImode)) -+FUNCTION (svextw, svext_bhw_impl, (SImode)) -+FUNCTION (svget2, svget_impl, (2)) -+FUNCTION (svget3, svget_impl, (3)) -+FUNCTION (svget4, svget_impl, (4)) -+FUNCTION (svindex, svindex_impl,) -+FUNCTION (svinsr, svinsr_impl,) -+FUNCTION (svlasta, svlast_impl, (UNSPEC_LASTA)) -+FUNCTION (svlastb, svlast_impl, (UNSPEC_LASTB)) -+FUNCTION (svld1, svld1_impl,) -+FUNCTION (svld1_gather, svld1_gather_impl,) -+FUNCTION (svld1ro, svld1ro_impl,) -+FUNCTION (svld1rq, svld1rq_impl,) -+FUNCTION (svld1sb, svld1_extend_impl, (TYPE_SUFFIX_s8)) -+FUNCTION (svld1sb_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_s8)) -+FUNCTION (svld1sh, svld1_extend_impl, (TYPE_SUFFIX_s16)) -+FUNCTION (svld1sh_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_s16)) -+FUNCTION (svld1sw, svld1_extend_impl, (TYPE_SUFFIX_s32)) -+FUNCTION (svld1sw_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_s32)) -+FUNCTION (svld1ub, svld1_extend_impl, (TYPE_SUFFIX_u8)) -+FUNCTION (svld1ub_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_u8)) -+FUNCTION (svld1uh, svld1_extend_impl, (TYPE_SUFFIX_u16)) -+FUNCTION (svld1uh_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_u16)) -+FUNCTION (svld1uw, svld1_extend_impl, (TYPE_SUFFIX_u32)) -+FUNCTION (svld1uw_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_u32)) -+FUNCTION (svld2, svld234_impl, (2)) -+FUNCTION (svld3, svld234_impl, (3)) -+FUNCTION (svld4, svld234_impl, (4)) -+FUNCTION (svldff1, svldxf1_impl, (UNSPEC_LDFF1)) -+FUNCTION (svldff1_gather, svldff1_gather_impl,) -+FUNCTION (svldff1sb, svldxf1_extend_impl, (TYPE_SUFFIX_s8, UNSPEC_LDFF1)) -+FUNCTION (svldff1sb_gather, svldff1_gather_extend, (TYPE_SUFFIX_s8)) -+FUNCTION (svldff1sh, svldxf1_extend_impl, (TYPE_SUFFIX_s16, UNSPEC_LDFF1)) -+FUNCTION (svldff1sh_gather, svldff1_gather_extend, (TYPE_SUFFIX_s16)) -+FUNCTION (svldff1sw, svldxf1_extend_impl, (TYPE_SUFFIX_s32, UNSPEC_LDFF1)) -+FUNCTION (svldff1sw_gather, svldff1_gather_extend, (TYPE_SUFFIX_s32)) -+FUNCTION (svldff1ub, svldxf1_extend_impl, (TYPE_SUFFIX_u8, UNSPEC_LDFF1)) -+FUNCTION (svldff1ub_gather, svldff1_gather_extend, (TYPE_SUFFIX_u8)) -+FUNCTION (svldff1uh, svldxf1_extend_impl, (TYPE_SUFFIX_u16, UNSPEC_LDFF1)) -+FUNCTION (svldff1uh_gather, svldff1_gather_extend, (TYPE_SUFFIX_u16)) -+FUNCTION (svldff1uw, svldxf1_extend_impl, (TYPE_SUFFIX_u32, UNSPEC_LDFF1)) -+FUNCTION (svldff1uw_gather, svldff1_gather_extend, (TYPE_SUFFIX_u32)) -+FUNCTION (svldnf1, svldxf1_impl, (UNSPEC_LDNF1)) -+FUNCTION (svldnf1sb, svldxf1_extend_impl, (TYPE_SUFFIX_s8, UNSPEC_LDNF1)) -+FUNCTION (svldnf1sh, svldxf1_extend_impl, (TYPE_SUFFIX_s16, UNSPEC_LDNF1)) -+FUNCTION (svldnf1sw, svldxf1_extend_impl, (TYPE_SUFFIX_s32, UNSPEC_LDNF1)) -+FUNCTION (svldnf1ub, svldxf1_extend_impl, (TYPE_SUFFIX_u8, UNSPEC_LDNF1)) -+FUNCTION (svldnf1uh, svldxf1_extend_impl, (TYPE_SUFFIX_u16, UNSPEC_LDNF1)) -+FUNCTION (svldnf1uw, svldxf1_extend_impl, (TYPE_SUFFIX_u32, UNSPEC_LDNF1)) -+FUNCTION (svldnt1, svldnt1_impl,) -+FUNCTION (svlen, svlen_impl,) -+FUNCTION (svlsl, rtx_code_function, (ASHIFT, ASHIFT)) -+FUNCTION (svlsl_wide, shift_wide, (ASHIFT, UNSPEC_ASHIFT_WIDE)) -+FUNCTION (svlsr, rtx_code_function, (LSHIFTRT, LSHIFTRT)) -+FUNCTION (svlsr_wide, shift_wide, (LSHIFTRT, UNSPEC_LSHIFTRT_WIDE)) -+FUNCTION (svmad, svmad_impl,) -+FUNCTION (svmax, rtx_code_function, (SMAX, UMAX, UNSPEC_COND_FMAX)) -+FUNCTION (svmaxnm, unspec_based_function, (-1, -1, UNSPEC_COND_FMAXNM)) -+FUNCTION (svmaxnmv, reduction, (UNSPEC_FMAXNMV)) -+FUNCTION (svmaxv, reduction, (UNSPEC_SMAXV, UNSPEC_UMAXV, UNSPEC_FMAXV)) -+FUNCTION (svmin, rtx_code_function, (SMIN, UMIN, UNSPEC_COND_FMIN)) -+FUNCTION (svminnm, unspec_based_function, (-1, -1, UNSPEC_COND_FMINNM)) -+FUNCTION (svminnmv, reduction, (UNSPEC_FMINNMV)) -+FUNCTION (svminv, reduction, (UNSPEC_SMINV, UNSPEC_UMINV, UNSPEC_FMINV)) -+FUNCTION (svmla, svmla_impl,) -+FUNCTION (svmla_lane, svmla_svmls_lane_impl, (UNSPEC_FMLA)) -+FUNCTION (svmls, svmls_impl,) -+FUNCTION (svmls_lane, svmla_svmls_lane_impl, (UNSPEC_FMLS)) -+FUNCTION (svmmla, svmmla_impl,) -+FUNCTION (svmov, svmov_impl,) -+FUNCTION (svmsb, svmsb_impl,) -+FUNCTION (svmul, rtx_code_function, (MULT, MULT, UNSPEC_COND_FMUL)) -+FUNCTION (svmul_lane, CODE_FOR_MODE0 (aarch64_mul_lane),) -+FUNCTION (svmulh, unspec_based_function, (UNSPEC_SMUL_HIGHPART, -+ UNSPEC_UMUL_HIGHPART, -1)) -+FUNCTION (svmulx, unspec_based_function, (-1, -1, UNSPEC_COND_FMULX)) -+FUNCTION (svnand, svnand_impl,) -+FUNCTION (svneg, quiet, (NEG, NEG, UNSPEC_COND_FNEG)) -+FUNCTION (svnmad, unspec_based_function, (-1, -1, UNSPEC_COND_FNMLA)) -+FUNCTION (svnmla, unspec_based_function_rotated, (-1, -1, UNSPEC_COND_FNMLA)) -+FUNCTION (svnmls, unspec_based_function_rotated, (-1, -1, UNSPEC_COND_FNMLS)) -+FUNCTION (svnmsb, unspec_based_function, (-1, -1, UNSPEC_COND_FNMLS)) -+FUNCTION (svnor, svnor_impl,) -+FUNCTION (svnot, svnot_impl,) -+FUNCTION (svorn, svorn_impl,) -+FUNCTION (svorr, rtx_code_function, (IOR, IOR)) -+FUNCTION (svorv, reduction, (UNSPEC_IORV)) -+FUNCTION (svpfalse, svpfalse_impl,) -+FUNCTION (svpfirst, svpfirst_svpnext_impl, (UNSPEC_PFIRST)) -+FUNCTION (svpnext, svpfirst_svpnext_impl, (UNSPEC_PNEXT)) -+FUNCTION (svprfb, svprf_bhwd_impl, (VNx16QImode)) -+FUNCTION (svprfb_gather, svprf_bhwd_gather_impl, (VNx16QImode)) -+FUNCTION (svprfd, svprf_bhwd_impl, (VNx2DImode)) -+FUNCTION (svprfd_gather, svprf_bhwd_gather_impl, (VNx2DImode)) -+FUNCTION (svprfh, svprf_bhwd_impl, (VNx8HImode)) -+FUNCTION (svprfh_gather, svprf_bhwd_gather_impl, (VNx8HImode)) -+FUNCTION (svprfw, svprf_bhwd_impl, (VNx4SImode)) -+FUNCTION (svprfw_gather, svprf_bhwd_gather_impl, (VNx4SImode)) -+FUNCTION (svptest_any, svptest_impl, (NE)) -+FUNCTION (svptest_first, svptest_impl, (LT)) -+FUNCTION (svptest_last, svptest_impl, (LTU)) -+FUNCTION (svptrue, svptrue_impl,) -+FUNCTION (svptrue_pat, svptrue_pat_impl,) -+FUNCTION (svqadd, svqadd_impl,) -+FUNCTION (svqdecb, svqdec_bhwd_impl, (QImode)) -+FUNCTION (svqdecb_pat, svqdec_bhwd_impl, (QImode)) -+FUNCTION (svqdecd, svqdec_bhwd_impl, (DImode)) -+FUNCTION (svqdecd_pat, svqdec_bhwd_impl, (DImode)) -+FUNCTION (svqdech, svqdec_bhwd_impl, (HImode)) -+FUNCTION (svqdech_pat, svqdec_bhwd_impl, (HImode)) -+FUNCTION (svqdecp, svqdecp_svqincp_impl, (SS_MINUS, US_MINUS)) -+FUNCTION (svqdecw, svqdec_bhwd_impl, (SImode)) -+FUNCTION (svqdecw_pat, svqdec_bhwd_impl, (SImode)) -+FUNCTION (svqincb, svqinc_bhwd_impl, (QImode)) -+FUNCTION (svqincb_pat, svqinc_bhwd_impl, (QImode)) -+FUNCTION (svqincd, svqinc_bhwd_impl, (DImode)) -+FUNCTION (svqincd_pat, svqinc_bhwd_impl, (DImode)) -+FUNCTION (svqinch, svqinc_bhwd_impl, (HImode)) -+FUNCTION (svqinch_pat, svqinc_bhwd_impl, (HImode)) -+FUNCTION (svqincp, svqdecp_svqincp_impl, (SS_PLUS, US_PLUS)) -+FUNCTION (svqincw, svqinc_bhwd_impl, (SImode)) -+FUNCTION (svqincw_pat, svqinc_bhwd_impl, (SImode)) -+FUNCTION (svqsub, svqsub_impl,) -+FUNCTION (svrbit, unspec_based_function, (UNSPEC_RBIT, UNSPEC_RBIT, -1)) -+FUNCTION (svrdffr, svrdffr_impl,) -+FUNCTION (svrecpe, unspec_based_function, (-1, -1, UNSPEC_FRECPE)) -+FUNCTION (svrecps, unspec_based_function, (-1, -1, UNSPEC_FRECPS)) -+FUNCTION (svrecpx, unspec_based_function, (-1, -1, UNSPEC_COND_FRECPX)) -+FUNCTION (svreinterpret, svreinterpret_impl,) -+FUNCTION (svrev, svrev_impl,) -+FUNCTION (svrevb, unspec_based_function, (UNSPEC_REVB, UNSPEC_REVB, -1)) -+FUNCTION (svrevh, unspec_based_function, (UNSPEC_REVH, UNSPEC_REVH, -1)) -+FUNCTION (svrevw, unspec_based_function, (UNSPEC_REVW, UNSPEC_REVW, -1)) -+FUNCTION (svrinta, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTA)) -+FUNCTION (svrinti, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTI)) -+FUNCTION (svrintm, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTM)) -+FUNCTION (svrintn, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTN)) -+FUNCTION (svrintp, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTP)) -+FUNCTION (svrintx, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTX)) -+FUNCTION (svrintz, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTZ)) -+FUNCTION (svrsqrte, unspec_based_function, (-1, -1, UNSPEC_RSQRTE)) -+FUNCTION (svrsqrts, unspec_based_function, (-1, -1, UNSPEC_RSQRTS)) -+FUNCTION (svscale, unspec_based_function, (-1, -1, UNSPEC_COND_FSCALE)) -+FUNCTION (svsel, svsel_impl,) -+FUNCTION (svset2, svset_impl, (2)) -+FUNCTION (svset3, svset_impl, (3)) -+FUNCTION (svset4, svset_impl, (4)) -+FUNCTION (svsetffr, svsetffr_impl,) -+FUNCTION (svsplice, QUIET_CODE_FOR_MODE0 (aarch64_sve_splice),) -+FUNCTION (svsqrt, rtx_code_function, (SQRT, SQRT, UNSPEC_COND_FSQRT)) -+FUNCTION (svst1, svst1_impl,) -+FUNCTION (svst1_scatter, svst1_scatter_impl,) -+FUNCTION (svst1b, svst1_truncate_impl, (QImode)) -+FUNCTION (svst1b_scatter, svst1_scatter_truncate_impl, (QImode)) -+FUNCTION (svst1h, svst1_truncate_impl, (HImode)) -+FUNCTION (svst1h_scatter, svst1_scatter_truncate_impl, (HImode)) -+FUNCTION (svst1w, svst1_truncate_impl, (SImode)) -+FUNCTION (svst1w_scatter, svst1_scatter_truncate_impl, (SImode)) -+FUNCTION (svst2, svst234_impl, (2)) -+FUNCTION (svst3, svst234_impl, (3)) -+FUNCTION (svst4, svst234_impl, (4)) -+FUNCTION (svstnt1, svstnt1_impl,) -+FUNCTION (svsub, svsub_impl,) -+FUNCTION (svsubr, rtx_code_function_rotated, (MINUS, MINUS, UNSPEC_COND_FSUB)) -+FUNCTION (svsudot, svusdot_impl, (true)) -+FUNCTION (svsudot_lane, svdotprod_lane_impl, (UNSPEC_SUDOT, -1, -1)) -+FUNCTION (svtbl, svtbl_impl,) -+FUNCTION (svtmad, CODE_FOR_MODE0 (aarch64_sve_tmad),) -+FUNCTION (svtrn1, svtrn_impl, (0)) -+FUNCTION (svtrn1q, unspec_based_function, (UNSPEC_TRN1Q, UNSPEC_TRN1Q, -+ UNSPEC_TRN1Q)) -+FUNCTION (svtrn2, svtrn_impl, (1)) -+FUNCTION (svtrn2q, unspec_based_function, (UNSPEC_TRN2Q, UNSPEC_TRN2Q, -+ UNSPEC_TRN2Q)) -+FUNCTION (svtsmul, unspec_based_function, (-1, -1, UNSPEC_FTSMUL)) -+FUNCTION (svtssel, unspec_based_function, (-1, -1, UNSPEC_FTSSEL)) -+FUNCTION (svundef, svundef_impl, (1)) -+FUNCTION (svundef2, svundef_impl, (2)) -+FUNCTION (svundef3, svundef_impl, (3)) -+FUNCTION (svundef4, svundef_impl, (4)) -+FUNCTION (svunpkhi, svunpk_impl, (true)) -+FUNCTION (svunpklo, svunpk_impl, (false)) -+FUNCTION (svusdot, svusdot_impl, (false)) -+FUNCTION (svusdot_lane, svdotprod_lane_impl, (UNSPEC_USDOT, -1, -1)) -+FUNCTION (svusmmla, unspec_based_add_function, (UNSPEC_USMATMUL, -1, -1)) -+FUNCTION (svuzp1, svuzp_impl, (0)) -+FUNCTION (svuzp1q, unspec_based_function, (UNSPEC_UZP1Q, UNSPEC_UZP1Q, -+ UNSPEC_UZP1Q)) -+FUNCTION (svuzp2, svuzp_impl, (1)) -+FUNCTION (svuzp2q, unspec_based_function, (UNSPEC_UZP2Q, UNSPEC_UZP2Q, -+ UNSPEC_UZP2Q)) -+FUNCTION (svwhilele, svwhile_impl, (UNSPEC_WHILELE, UNSPEC_WHILELS, true)) -+FUNCTION (svwhilelt, svwhile_impl, (UNSPEC_WHILELT, UNSPEC_WHILELO, false)) -+FUNCTION (svwrffr, svwrffr_impl,) -+FUNCTION (svzip1, svzip_impl, (0)) -+FUNCTION (svzip1q, unspec_based_function, (UNSPEC_ZIP1Q, UNSPEC_ZIP1Q, -+ UNSPEC_ZIP1Q)) -+FUNCTION (svzip2, svzip_impl, (1)) -+FUNCTION (svzip2q, unspec_based_function, (UNSPEC_ZIP2Q, UNSPEC_ZIP2Q, -+ UNSPEC_ZIP2Q)) -+ -+} /* end namespace aarch64_sve */ -diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.def b/gcc/config/aarch64/aarch64-sve-builtins-base.def -new file mode 100644 -index 000000000..795a5fd90 ---- /dev/null -+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.def -@@ -0,0 +1,355 @@ -+/* ACLE support for AArch64 SVE (__ARM_FEATURE_SVE intrinsics) -+ Copyright (C) 2018-2019 Free Software Foundation, Inc. -+ -+ This file is part of GCC. -+ -+ GCC is free software; you can redistribute it and/or modify it -+ under the terms of the GNU General Public License as published by -+ the Free Software Foundation; either version 3, or (at your option) -+ any later version. -+ -+ GCC is distributed in the hope that it will be useful, but -+ WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ General Public License for more details. -+ -+ You should have received a copy of the GNU General Public License -+ along with GCC; see the file COPYING3. If not see -+ . */ -+ -+#define REQUIRED_EXTENSIONS 0 -+DEF_SVE_FUNCTION (svabd, binary_opt_n, all_arith, mxz) -+DEF_SVE_FUNCTION (svabs, unary, all_float_and_signed, mxz) -+DEF_SVE_FUNCTION (svacge, compare_opt_n, all_float, implicit) -+DEF_SVE_FUNCTION (svacgt, compare_opt_n, all_float, implicit) -+DEF_SVE_FUNCTION (svacle, compare_opt_n, all_float, implicit) -+DEF_SVE_FUNCTION (svaclt, compare_opt_n, all_float, implicit) -+DEF_SVE_FUNCTION (svadd, binary_opt_n, all_arith, mxz) -+DEF_SVE_FUNCTION (svadda, fold_left, all_float, implicit) -+DEF_SVE_FUNCTION (svaddv, reduction_wide, all_arith, implicit) -+DEF_SVE_FUNCTION (svadrb, adr_offset, none, none) -+DEF_SVE_FUNCTION (svadrd, adr_index, none, none) -+DEF_SVE_FUNCTION (svadrh, adr_index, none, none) -+DEF_SVE_FUNCTION (svadrw, adr_index, none, none) -+DEF_SVE_FUNCTION (svand, binary_opt_n, all_integer, mxz) -+DEF_SVE_FUNCTION (svand, binary_opt_n, b, z) -+DEF_SVE_FUNCTION (svandv, reduction, all_integer, implicit) -+DEF_SVE_FUNCTION (svasr, binary_uint_opt_n, all_signed, mxz) -+DEF_SVE_FUNCTION (svasr_wide, binary_uint64_opt_n, bhs_signed, mxz) -+DEF_SVE_FUNCTION (svasrd, shift_right_imm, all_signed, mxz) -+DEF_SVE_FUNCTION (svbic, binary_opt_n, all_integer, mxz) -+DEF_SVE_FUNCTION (svbic, binary_opt_n, b, z) -+DEF_SVE_FUNCTION (svbrka, unary, b, mz) -+DEF_SVE_FUNCTION (svbrkb, unary, b, mz) -+DEF_SVE_FUNCTION (svbrkn, binary, b, z) -+DEF_SVE_FUNCTION (svbrkpa, binary, b, z) -+DEF_SVE_FUNCTION (svbrkpb, binary, b, z) -+DEF_SVE_FUNCTION (svcadd, binary_rotate, all_float, mxz) -+DEF_SVE_FUNCTION (svclasta, clast, all_data, implicit) -+DEF_SVE_FUNCTION (svclastb, clast, all_data, implicit) -+DEF_SVE_FUNCTION (svcls, unary_to_uint, all_signed, mxz) -+DEF_SVE_FUNCTION (svclz, unary_to_uint, all_integer, mxz) -+DEF_SVE_FUNCTION (svcmla, ternary_rotate, all_float, mxz) -+DEF_SVE_FUNCTION (svcmla_lane, ternary_lane_rotate, hs_float, none) -+DEF_SVE_FUNCTION (svcmpeq, compare_opt_n, all_arith, implicit) -+DEF_SVE_FUNCTION (svcmpeq_wide, compare_wide_opt_n, bhs_signed, implicit) -+DEF_SVE_FUNCTION (svcmpge, compare_opt_n, all_arith, implicit) -+DEF_SVE_FUNCTION (svcmpge_wide, compare_wide_opt_n, bhs_integer, implicit) -+DEF_SVE_FUNCTION (svcmpgt, compare_opt_n, all_arith, implicit) -+DEF_SVE_FUNCTION (svcmpgt_wide, compare_wide_opt_n, bhs_integer, implicit) -+DEF_SVE_FUNCTION (svcmple, compare_opt_n, all_arith, implicit) -+DEF_SVE_FUNCTION (svcmple_wide, compare_wide_opt_n, bhs_integer, implicit) -+DEF_SVE_FUNCTION (svcmplt, compare_opt_n, all_arith, implicit) -+DEF_SVE_FUNCTION (svcmplt_wide, compare_wide_opt_n, bhs_integer, implicit) -+DEF_SVE_FUNCTION (svcmpne, compare_opt_n, all_arith, implicit) -+DEF_SVE_FUNCTION (svcmpne_wide, compare_wide_opt_n, bhs_signed, implicit) -+DEF_SVE_FUNCTION (svcmpuo, compare_opt_n, all_float, implicit) -+DEF_SVE_FUNCTION (svcnot, unary, all_integer, mxz) -+DEF_SVE_FUNCTION (svcnt, unary_to_uint, all_data, mxz) -+DEF_SVE_FUNCTION (svcntb, count_inherent, none, none) -+DEF_SVE_FUNCTION (svcntb_pat, count_pat, none, none) -+DEF_SVE_FUNCTION (svcntd, count_inherent, none, none) -+DEF_SVE_FUNCTION (svcntd_pat, count_pat, none, none) -+DEF_SVE_FUNCTION (svcnth, count_inherent, none, none) -+DEF_SVE_FUNCTION (svcnth_pat, count_pat, none, none) -+DEF_SVE_FUNCTION (svcntp, count_pred, all_pred, implicit) -+DEF_SVE_FUNCTION (svcntw, count_inherent, none, none) -+DEF_SVE_FUNCTION (svcntw_pat, count_pat, none, none) -+DEF_SVE_FUNCTION (svcompact, unary, sd_data, implicit) -+DEF_SVE_FUNCTION (svcreate2, create, all_data, none) -+DEF_SVE_FUNCTION (svcreate3, create, all_data, none) -+DEF_SVE_FUNCTION (svcreate4, create, all_data, none) -+DEF_SVE_FUNCTION (svcvt, unary_convert, cvt, mxz) -+DEF_SVE_FUNCTION (svdiv, binary_opt_n, all_float_and_sd_integer, mxz) -+DEF_SVE_FUNCTION (svdivr, binary_opt_n, all_float_and_sd_integer, mxz) -+DEF_SVE_FUNCTION (svdot, ternary_qq_opt_n, sd_integer, none) -+DEF_SVE_FUNCTION (svdot_lane, ternary_qq_lane, sd_integer, none) -+DEF_SVE_FUNCTION (svdup, unary_n, all_data, mxz_or_none) -+DEF_SVE_FUNCTION (svdup, unary_n, all_pred, none) -+DEF_SVE_FUNCTION (svdup_lane, binary_uint_n, all_data, none) -+DEF_SVE_FUNCTION (svdupq, dupq, all_data, none) -+DEF_SVE_FUNCTION (svdupq, dupq, all_pred, none) -+DEF_SVE_FUNCTION (svdupq_lane, binary_uint64_n, all_data, none) -+DEF_SVE_FUNCTION (sveor, binary_opt_n, all_integer, mxz) -+DEF_SVE_FUNCTION (sveor, binary_opt_n, b, z) -+DEF_SVE_FUNCTION (sveorv, reduction, all_integer, implicit) -+DEF_SVE_FUNCTION (svexpa, unary_uint, all_float, none) -+DEF_SVE_FUNCTION (svext, ext, all_data, none) -+DEF_SVE_FUNCTION (svextb, unary, hsd_integer, mxz) -+DEF_SVE_FUNCTION (svexth, unary, sd_integer, mxz) -+DEF_SVE_FUNCTION (svextw, unary, d_integer, mxz) -+DEF_SVE_FUNCTION (svget2, get, all_data, none) -+DEF_SVE_FUNCTION (svget3, get, all_data, none) -+DEF_SVE_FUNCTION (svget4, get, all_data, none) -+DEF_SVE_FUNCTION (svindex, binary_scalar, all_integer, none) -+DEF_SVE_FUNCTION (svinsr, binary_n, all_data, none) -+DEF_SVE_FUNCTION (svlasta, reduction, all_data, implicit) -+DEF_SVE_FUNCTION (svlastb, reduction, all_data, implicit) -+DEF_SVE_FUNCTION (svld1, load, all_data, implicit) -+DEF_SVE_FUNCTION (svld1_gather, load_gather_sv, sd_data, implicit) -+DEF_SVE_FUNCTION (svld1_gather, load_gather_vs, sd_data, implicit) -+DEF_SVE_FUNCTION (svld1rq, load_replicate, all_data, implicit) -+DEF_SVE_FUNCTION (svld1sb, load_ext, hsd_integer, implicit) -+DEF_SVE_FUNCTION (svld1sb_gather, load_ext_gather_offset, sd_integer, implicit) -+DEF_SVE_FUNCTION (svld1sh, load_ext, sd_integer, implicit) -+DEF_SVE_FUNCTION (svld1sh_gather, load_ext_gather_offset, sd_integer, implicit) -+DEF_SVE_FUNCTION (svld1sh_gather, load_ext_gather_index, sd_integer, implicit) -+DEF_SVE_FUNCTION (svld1sw, load_ext, d_integer, implicit) -+DEF_SVE_FUNCTION (svld1sw_gather, load_ext_gather_offset, d_integer, implicit) -+DEF_SVE_FUNCTION (svld1sw_gather, load_ext_gather_index, d_integer, implicit) -+DEF_SVE_FUNCTION (svld1ub, load_ext, hsd_integer, implicit) -+DEF_SVE_FUNCTION (svld1ub_gather, load_ext_gather_offset, sd_integer, implicit) -+DEF_SVE_FUNCTION (svld1uh, load_ext, sd_integer, implicit) -+DEF_SVE_FUNCTION (svld1uh_gather, load_ext_gather_offset, sd_integer, implicit) -+DEF_SVE_FUNCTION (svld1uh_gather, load_ext_gather_index, sd_integer, implicit) -+DEF_SVE_FUNCTION (svld1uw, load_ext, d_integer, implicit) -+DEF_SVE_FUNCTION (svld1uw_gather, load_ext_gather_offset, d_integer, implicit) -+DEF_SVE_FUNCTION (svld1uw_gather, load_ext_gather_index, d_integer, implicit) -+DEF_SVE_FUNCTION (svldff1, load, all_data, implicit) -+DEF_SVE_FUNCTION (svldff1_gather, load_gather_sv, sd_data, implicit) -+DEF_SVE_FUNCTION (svldff1_gather, load_gather_vs, sd_data, implicit) -+DEF_SVE_FUNCTION (svldff1sb, load_ext, hsd_integer, implicit) -+DEF_SVE_FUNCTION (svldff1sb_gather, load_ext_gather_offset, sd_integer, implicit) -+DEF_SVE_FUNCTION (svldff1sh, load_ext, sd_integer, implicit) -+DEF_SVE_FUNCTION (svldff1sh_gather, load_ext_gather_offset, sd_integer, implicit) -+DEF_SVE_FUNCTION (svldff1sh_gather, load_ext_gather_index, sd_integer, implicit) -+DEF_SVE_FUNCTION (svldff1sw, load_ext, d_integer, implicit) -+DEF_SVE_FUNCTION (svldff1sw_gather, load_ext_gather_offset, d_integer, implicit) -+DEF_SVE_FUNCTION (svldff1sw_gather, load_ext_gather_index, d_integer, implicit) -+DEF_SVE_FUNCTION (svldff1ub, load_ext, hsd_integer, implicit) -+DEF_SVE_FUNCTION (svldff1ub_gather, load_ext_gather_offset, sd_integer, implicit) -+DEF_SVE_FUNCTION (svldff1uh, load_ext, sd_integer, implicit) -+DEF_SVE_FUNCTION (svldff1uh_gather, load_ext_gather_offset, sd_integer, implicit) -+DEF_SVE_FUNCTION (svldff1uh_gather, load_ext_gather_index, sd_integer, implicit) -+DEF_SVE_FUNCTION (svldff1uw, load_ext, d_integer, implicit) -+DEF_SVE_FUNCTION (svldff1uw_gather, load_ext_gather_offset, d_integer, implicit) -+DEF_SVE_FUNCTION (svldff1uw_gather, load_ext_gather_index, d_integer, implicit) -+DEF_SVE_FUNCTION (svldnf1, load, all_data, implicit) -+DEF_SVE_FUNCTION (svldnf1sb, load_ext, hsd_integer, implicit) -+DEF_SVE_FUNCTION (svldnf1sh, load_ext, sd_integer, implicit) -+DEF_SVE_FUNCTION (svldnf1sw, load_ext, d_integer, implicit) -+DEF_SVE_FUNCTION (svldnf1ub, load_ext, hsd_integer, implicit) -+DEF_SVE_FUNCTION (svldnf1uh, load_ext, sd_integer, implicit) -+DEF_SVE_FUNCTION (svldnf1uw, load_ext, d_integer, implicit) -+DEF_SVE_FUNCTION (svldnt1, load, all_data, implicit) -+DEF_SVE_FUNCTION (svld2, load, all_data, implicit) -+DEF_SVE_FUNCTION (svld3, load, all_data, implicit) -+DEF_SVE_FUNCTION (svld4, load, all_data, implicit) -+DEF_SVE_FUNCTION (svlen, count_vector, all_data, none) -+DEF_SVE_FUNCTION (svlsl, binary_uint_opt_n, all_integer, mxz) -+DEF_SVE_FUNCTION (svlsl_wide, binary_uint64_opt_n, bhs_integer, mxz) -+DEF_SVE_FUNCTION (svlsr, binary_uint_opt_n, all_unsigned, mxz) -+DEF_SVE_FUNCTION (svlsr_wide, binary_uint64_opt_n, bhs_unsigned, mxz) -+DEF_SVE_FUNCTION (svmad, ternary_opt_n, all_arith, mxz) -+DEF_SVE_FUNCTION (svmax, binary_opt_n, all_arith, mxz) -+DEF_SVE_FUNCTION (svmaxnm, binary_opt_n, all_float, mxz) -+DEF_SVE_FUNCTION (svmaxnmv, reduction, all_float, implicit) -+DEF_SVE_FUNCTION (svmaxv, reduction, all_arith, implicit) -+DEF_SVE_FUNCTION (svmin, binary_opt_n, all_arith, mxz) -+DEF_SVE_FUNCTION (svminnm, binary_opt_n, all_float, mxz) -+DEF_SVE_FUNCTION (svminnmv, reduction, all_float, implicit) -+DEF_SVE_FUNCTION (svminv, reduction, all_arith, implicit) -+DEF_SVE_FUNCTION (svmla, ternary_opt_n, all_arith, mxz) -+DEF_SVE_FUNCTION (svmla_lane, ternary_lane, all_float, none) -+DEF_SVE_FUNCTION (svmls, ternary_opt_n, all_arith, mxz) -+DEF_SVE_FUNCTION (svmls_lane, ternary_lane, all_float, none) -+DEF_SVE_FUNCTION (svmmla, mmla, none, none) -+DEF_SVE_FUNCTION (svmov, unary, b, z) -+DEF_SVE_FUNCTION (svmsb, ternary_opt_n, all_arith, mxz) -+DEF_SVE_FUNCTION (svmul, binary_opt_n, all_arith, mxz) -+DEF_SVE_FUNCTION (svmul_lane, binary_lane, all_float, none) -+DEF_SVE_FUNCTION (svmulh, binary_opt_n, all_integer, mxz) -+DEF_SVE_FUNCTION (svmulx, binary_opt_n, all_float, mxz) -+DEF_SVE_FUNCTION (svnand, binary_opt_n, b, z) -+DEF_SVE_FUNCTION (svneg, unary, all_float_and_signed, mxz) -+DEF_SVE_FUNCTION (svnmad, ternary_opt_n, all_float, mxz) -+DEF_SVE_FUNCTION (svnmla, ternary_opt_n, all_float, mxz) -+DEF_SVE_FUNCTION (svnmls, ternary_opt_n, all_float, mxz) -+DEF_SVE_FUNCTION (svnmsb, ternary_opt_n, all_float, mxz) -+DEF_SVE_FUNCTION (svnor, binary_opt_n, b, z) -+DEF_SVE_FUNCTION (svnot, unary, all_integer, mxz) -+DEF_SVE_FUNCTION (svnot, unary, b, z) -+DEF_SVE_FUNCTION (svorn, binary_opt_n, b, z) -+DEF_SVE_FUNCTION (svorr, binary_opt_n, all_integer, mxz) -+DEF_SVE_FUNCTION (svorr, binary_opt_n, b, z) -+DEF_SVE_FUNCTION (svorv, reduction, all_integer, implicit) -+DEF_SVE_FUNCTION (svpfalse, inherent_b, b, none) -+DEF_SVE_FUNCTION (svpfirst, unary, b, implicit) -+DEF_SVE_FUNCTION (svpnext, unary_pred, all_pred, implicit) -+DEF_SVE_FUNCTION (svprfb, prefetch, none, implicit) -+DEF_SVE_FUNCTION (svprfb_gather, prefetch_gather_offset, none, implicit) -+DEF_SVE_FUNCTION (svprfd, prefetch, none, implicit) -+DEF_SVE_FUNCTION (svprfd_gather, prefetch_gather_index, none, implicit) -+DEF_SVE_FUNCTION (svprfh, prefetch, none, implicit) -+DEF_SVE_FUNCTION (svprfh_gather, prefetch_gather_index, none, implicit) -+DEF_SVE_FUNCTION (svprfw, prefetch, none, implicit) -+DEF_SVE_FUNCTION (svprfw_gather, prefetch_gather_index, none, implicit) -+DEF_SVE_FUNCTION (svptest_any, ptest, none, implicit) -+DEF_SVE_FUNCTION (svptest_first, ptest, none, implicit) -+DEF_SVE_FUNCTION (svptest_last, ptest, none, implicit) -+DEF_SVE_FUNCTION (svptrue, inherent, all_pred, none) -+DEF_SVE_FUNCTION (svptrue_pat, pattern_pred, all_pred, none) -+DEF_SVE_FUNCTION (svqadd, binary_opt_n, all_integer, none) -+DEF_SVE_FUNCTION (svqdecb, inc_dec, sd_integer, none) -+DEF_SVE_FUNCTION (svqdecb_pat, inc_dec_pat, sd_integer, none) -+DEF_SVE_FUNCTION (svqdecd, inc_dec, d_integer, none) -+DEF_SVE_FUNCTION (svqdecd, inc_dec, sd_integer, none) -+DEF_SVE_FUNCTION (svqdecd_pat, inc_dec_pat, d_integer, none) -+DEF_SVE_FUNCTION (svqdecd_pat, inc_dec_pat, sd_integer, none) -+DEF_SVE_FUNCTION (svqdech, inc_dec, h_integer, none) -+DEF_SVE_FUNCTION (svqdech, inc_dec, sd_integer, none) -+DEF_SVE_FUNCTION (svqdech_pat, inc_dec_pat, h_integer, none) -+DEF_SVE_FUNCTION (svqdech_pat, inc_dec_pat, sd_integer, none) -+DEF_SVE_FUNCTION (svqdecp, inc_dec_pred, hsd_integer, none) -+DEF_SVE_FUNCTION (svqdecp, inc_dec_pred_scalar, inc_dec_n, none) -+DEF_SVE_FUNCTION (svqdecw, inc_dec, s_integer, none) -+DEF_SVE_FUNCTION (svqdecw, inc_dec, sd_integer, none) -+DEF_SVE_FUNCTION (svqdecw_pat, inc_dec_pat, s_integer, none) -+DEF_SVE_FUNCTION (svqdecw_pat, inc_dec_pat, sd_integer, none) -+DEF_SVE_FUNCTION (svqincb, inc_dec, sd_integer, none) -+DEF_SVE_FUNCTION (svqincb_pat, inc_dec_pat, sd_integer, none) -+DEF_SVE_FUNCTION (svqincd, inc_dec, d_integer, none) -+DEF_SVE_FUNCTION (svqincd, inc_dec, sd_integer, none) -+DEF_SVE_FUNCTION (svqincd_pat, inc_dec_pat, d_integer, none) -+DEF_SVE_FUNCTION (svqincd_pat, inc_dec_pat, sd_integer, none) -+DEF_SVE_FUNCTION (svqinch, inc_dec, h_integer, none) -+DEF_SVE_FUNCTION (svqinch, inc_dec, sd_integer, none) -+DEF_SVE_FUNCTION (svqinch_pat, inc_dec_pat, h_integer, none) -+DEF_SVE_FUNCTION (svqinch_pat, inc_dec_pat, sd_integer, none) -+DEF_SVE_FUNCTION (svqincp, inc_dec_pred, hsd_integer, none) -+DEF_SVE_FUNCTION (svqincp, inc_dec_pred_scalar, inc_dec_n, none) -+DEF_SVE_FUNCTION (svqincw, inc_dec, s_integer, none) -+DEF_SVE_FUNCTION (svqincw, inc_dec, sd_integer, none) -+DEF_SVE_FUNCTION (svqincw_pat, inc_dec_pat, s_integer, none) -+DEF_SVE_FUNCTION (svqincw_pat, inc_dec_pat, sd_integer, none) -+DEF_SVE_FUNCTION (svqsub, binary_opt_n, all_integer, none) -+DEF_SVE_FUNCTION (svrbit, unary, all_integer, mxz) -+DEF_SVE_FUNCTION (svrdffr, rdffr, none, z_or_none) -+DEF_SVE_FUNCTION (svrecpe, unary, all_float, none) -+DEF_SVE_FUNCTION (svrecps, binary, all_float, none) -+DEF_SVE_FUNCTION (svrecpx, unary, all_float, mxz) -+DEF_SVE_FUNCTION (svreinterpret, unary_convert, reinterpret, none) -+DEF_SVE_FUNCTION (svrev, unary, all_data, none) -+DEF_SVE_FUNCTION (svrev, unary_pred, all_pred, none) -+DEF_SVE_FUNCTION (svrevb, unary, hsd_integer, mxz) -+DEF_SVE_FUNCTION (svrevh, unary, sd_integer, mxz) -+DEF_SVE_FUNCTION (svrevw, unary, d_integer, mxz) -+DEF_SVE_FUNCTION (svrinta, unary, all_float, mxz) -+DEF_SVE_FUNCTION (svrinti, unary, all_float, mxz) -+DEF_SVE_FUNCTION (svrintm, unary, all_float, mxz) -+DEF_SVE_FUNCTION (svrintn, unary, all_float, mxz) -+DEF_SVE_FUNCTION (svrintp, unary, all_float, mxz) -+DEF_SVE_FUNCTION (svrintx, unary, all_float, mxz) -+DEF_SVE_FUNCTION (svrintz, unary, all_float, mxz) -+DEF_SVE_FUNCTION (svrsqrte, unary, all_float, none) -+DEF_SVE_FUNCTION (svrsqrts, binary, all_float, none) -+DEF_SVE_FUNCTION (svscale, binary_int_opt_n, all_float, mxz) -+DEF_SVE_FUNCTION (svsel, binary, all_data, implicit) -+DEF_SVE_FUNCTION (svsel, binary, b, implicit) -+DEF_SVE_FUNCTION (svset2, set, all_data, none) -+DEF_SVE_FUNCTION (svset3, set, all_data, none) -+DEF_SVE_FUNCTION (svset4, set, all_data, none) -+DEF_SVE_FUNCTION (svsetffr, setffr, none, none) -+DEF_SVE_FUNCTION (svsplice, binary, all_data, implicit) -+DEF_SVE_FUNCTION (svsqrt, unary, all_float, mxz) -+DEF_SVE_FUNCTION (svst1, store, all_data, implicit) -+DEF_SVE_FUNCTION (svst1_scatter, store_scatter_index, sd_data, implicit) -+DEF_SVE_FUNCTION (svst1_scatter, store_scatter_offset, sd_data, implicit) -+DEF_SVE_FUNCTION (svst1b, store, hsd_integer, implicit) -+DEF_SVE_FUNCTION (svst1b_scatter, store_scatter_offset, sd_integer, implicit) -+DEF_SVE_FUNCTION (svst1h, store, sd_integer, implicit) -+DEF_SVE_FUNCTION (svst1h_scatter, store_scatter_index, sd_integer, implicit) -+DEF_SVE_FUNCTION (svst1h_scatter, store_scatter_offset, sd_integer, implicit) -+DEF_SVE_FUNCTION (svst1w, store, d_integer, implicit) -+DEF_SVE_FUNCTION (svst1w_scatter, store_scatter_index, d_integer, implicit) -+DEF_SVE_FUNCTION (svst1w_scatter, store_scatter_offset, d_integer, implicit) -+DEF_SVE_FUNCTION (svst2, store, all_data, implicit) -+DEF_SVE_FUNCTION (svst3, store, all_data, implicit) -+DEF_SVE_FUNCTION (svst4, store, all_data, implicit) -+DEF_SVE_FUNCTION (svstnt1, store, all_data, implicit) -+DEF_SVE_FUNCTION (svsub, binary_opt_n, all_arith, mxz) -+DEF_SVE_FUNCTION (svsubr, binary_opt_n, all_arith, mxz) -+DEF_SVE_FUNCTION (svtbl, binary_uint, all_data, none) -+DEF_SVE_FUNCTION (svtmad, tmad, all_float, none) -+DEF_SVE_FUNCTION (svtrn1, binary, all_data, none) -+DEF_SVE_FUNCTION (svtrn1, binary_pred, all_pred, none) -+DEF_SVE_FUNCTION (svtrn2, binary, all_data, none) -+DEF_SVE_FUNCTION (svtrn2, binary_pred, all_pred, none) -+DEF_SVE_FUNCTION (svtsmul, binary_uint, all_float, none) -+DEF_SVE_FUNCTION (svtssel, binary_uint, all_float, none) -+DEF_SVE_FUNCTION (svundef, inherent, all_data, none) -+DEF_SVE_FUNCTION (svundef2, inherent, all_data, none) -+DEF_SVE_FUNCTION (svundef3, inherent, all_data, none) -+DEF_SVE_FUNCTION (svundef4, inherent, all_data, none) -+DEF_SVE_FUNCTION (svunpkhi, unary_widen, hsd_integer, none) -+DEF_SVE_FUNCTION (svunpkhi, unary_widen, b, none) -+DEF_SVE_FUNCTION (svunpklo, unary_widen, hsd_integer, none) -+DEF_SVE_FUNCTION (svunpklo, unary_widen, b, none) -+DEF_SVE_FUNCTION (svuzp1, binary, all_data, none) -+DEF_SVE_FUNCTION (svuzp1, binary_pred, all_pred, none) -+DEF_SVE_FUNCTION (svuzp2, binary, all_data, none) -+DEF_SVE_FUNCTION (svuzp2, binary_pred, all_pred, none) -+DEF_SVE_FUNCTION (svwhilele, compare_scalar, while, none) -+DEF_SVE_FUNCTION (svwhilelt, compare_scalar, while, none) -+DEF_SVE_FUNCTION (svwrffr, setffr, none, implicit) -+DEF_SVE_FUNCTION (svzip1, binary, all_data, none) -+DEF_SVE_FUNCTION (svzip1, binary_pred, all_pred, none) -+DEF_SVE_FUNCTION (svzip2, binary, all_data, none) -+DEF_SVE_FUNCTION (svzip2, binary_pred, all_pred, none) -+#undef REQUIRED_EXTENSIONS -+ -+#define REQUIRED_EXTENSIONS AARCH64_FL_BF16 -+DEF_SVE_FUNCTION (svbfdot, ternary_bfloat_opt_n, s_float, none) -+DEF_SVE_FUNCTION (svbfdot_lane, ternary_bfloat_lanex2, s_float, none) -+DEF_SVE_FUNCTION (svbfmlalb, ternary_bfloat_opt_n, s_float, none) -+DEF_SVE_FUNCTION (svbfmlalb_lane, ternary_bfloat_lane, s_float, none) -+DEF_SVE_FUNCTION (svbfmlalt, ternary_bfloat_opt_n, s_float, none) -+DEF_SVE_FUNCTION (svbfmlalt_lane, ternary_bfloat_lane, s_float, none) -+DEF_SVE_FUNCTION (svbfmmla, ternary_bfloat, s_float, none) -+DEF_SVE_FUNCTION (svcvt, unary_convert, cvt_bfloat, mxz) -+DEF_SVE_FUNCTION (svcvtnt, unary_convert_narrowt, cvt_bfloat, mx) -+#undef REQUIRED_EXTENSIONS -+ -+#define REQUIRED_EXTENSIONS AARCH64_FL_I8MM -+DEF_SVE_FUNCTION (svmmla, mmla, s_integer, none) -+DEF_SVE_FUNCTION (svusmmla, ternary_uintq_intq, s_signed, none) -+DEF_SVE_FUNCTION (svsudot, ternary_intq_uintq_opt_n, s_signed, none) -+DEF_SVE_FUNCTION (svsudot_lane, ternary_intq_uintq_lane, s_signed, none) -+DEF_SVE_FUNCTION (svusdot, ternary_uintq_intq_opt_n, s_signed, none) -+DEF_SVE_FUNCTION (svusdot_lane, ternary_uintq_intq_lane, s_signed, none) -+#undef REQUIRED_EXTENSIONS -+ -+#define REQUIRED_EXTENSIONS AARCH64_FL_F32MM -+DEF_SVE_FUNCTION (svmmla, mmla, s_float, none) -+#undef REQUIRED_EXTENSIONS -+ -+#define REQUIRED_EXTENSIONS AARCH64_FL_F64MM -+DEF_SVE_FUNCTION (svld1ro, load_replicate, all_data, implicit) -+DEF_SVE_FUNCTION (svmmla, mmla, d_float, none) -+DEF_SVE_FUNCTION (svtrn1q, binary, all_data, none) -+DEF_SVE_FUNCTION (svtrn2q, binary, all_data, none) -+DEF_SVE_FUNCTION (svuzp1q, binary, all_data, none) -+DEF_SVE_FUNCTION (svuzp2q, binary, all_data, none) -+DEF_SVE_FUNCTION (svzip1q, binary, all_data, none) -+DEF_SVE_FUNCTION (svzip2q, binary, all_data, none) -+#undef REQUIRED_EXTENSIONS -diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.h b/gcc/config/aarch64/aarch64-sve-builtins-base.h -new file mode 100644 -index 000000000..2467e729e ---- /dev/null -+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.h -@@ -0,0 +1,304 @@ -+/* ACLE support for AArch64 SVE (__ARM_FEATURE_SVE intrinsics) -+ Copyright (C) 2018-2019 Free Software Foundation, Inc. -+ -+ This file is part of GCC. -+ -+ GCC is free software; you can redistribute it and/or modify it -+ under the terms of the GNU General Public License as published by -+ the Free Software Foundation; either version 3, or (at your option) -+ any later version. -+ -+ GCC is distributed in the hope that it will be useful, but -+ WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ General Public License for more details. -+ -+ You should have received a copy of the GNU General Public License -+ along with GCC; see the file COPYING3. If not see -+ . */ -+ -+#ifndef GCC_AARCH64_SVE_BUILTINS_BASE_H -+#define GCC_AARCH64_SVE_BUILTINS_BASE_H -+ -+namespace aarch64_sve -+{ -+ namespace functions -+ { -+ extern const function_base *const svabd; -+ extern const function_base *const svabs; -+ extern const function_base *const svacge; -+ extern const function_base *const svacgt; -+ extern const function_base *const svacle; -+ extern const function_base *const svaclt; -+ extern const function_base *const svadd; -+ extern const function_base *const svadda; -+ extern const function_base *const svaddv; -+ extern const function_base *const svadrb; -+ extern const function_base *const svadrd; -+ extern const function_base *const svadrh; -+ extern const function_base *const svadrw; -+ extern const function_base *const svand; -+ extern const function_base *const svandv; -+ extern const function_base *const svasr; -+ extern const function_base *const svasr_wide; -+ extern const function_base *const svasrd; -+ extern const function_base *const svbfdot; -+ extern const function_base *const svbfdot_lane; -+ extern const function_base *const svbfmlalb; -+ extern const function_base *const svbfmlalb_lane; -+ extern const function_base *const svbfmlalt; -+ extern const function_base *const svbfmlalt_lane; -+ extern const function_base *const svbfmmla; -+ extern const function_base *const svbic; -+ extern const function_base *const svbrka; -+ extern const function_base *const svbrkb; -+ extern const function_base *const svbrkn; -+ extern const function_base *const svbrkpa; -+ extern const function_base *const svbrkpb; -+ extern const function_base *const svcadd; -+ extern const function_base *const svclasta; -+ extern const function_base *const svclastb; -+ extern const function_base *const svcls; -+ extern const function_base *const svclz; -+ extern const function_base *const svcmla; -+ extern const function_base *const svcmla_lane; -+ extern const function_base *const svcmpeq; -+ extern const function_base *const svcmpeq_wide; -+ extern const function_base *const svcmpge; -+ extern const function_base *const svcmpge_wide; -+ extern const function_base *const svcmpgt; -+ extern const function_base *const svcmpgt_wide; -+ extern const function_base *const svcmple; -+ extern const function_base *const svcmple_wide; -+ extern const function_base *const svcmplt; -+ extern const function_base *const svcmplt_wide; -+ extern const function_base *const svcmpne; -+ extern const function_base *const svcmpne_wide; -+ extern const function_base *const svcmpuo; -+ extern const function_base *const svcnot; -+ extern const function_base *const svcnt; -+ extern const function_base *const svcntb; -+ extern const function_base *const svcntb_pat; -+ extern const function_base *const svcntd; -+ extern const function_base *const svcntd_pat; -+ extern const function_base *const svcnth; -+ extern const function_base *const svcnth_pat; -+ extern const function_base *const svcntp; -+ extern const function_base *const svcntw; -+ extern const function_base *const svcntw_pat; -+ extern const function_base *const svcompact; -+ extern const function_base *const svcreate2; -+ extern const function_base *const svcreate3; -+ extern const function_base *const svcreate4; -+ extern const function_base *const svcvt; -+ extern const function_base *const svcvtnt; -+ extern const function_base *const svdiv; -+ extern const function_base *const svdivr; -+ extern const function_base *const svdot; -+ extern const function_base *const svdot_lane; -+ extern const function_base *const svdup; -+ extern const function_base *const svdup_lane; -+ extern const function_base *const svdupq; -+ extern const function_base *const svdupq_lane; -+ extern const function_base *const sveor; -+ extern const function_base *const sveorv; -+ extern const function_base *const svexpa; -+ extern const function_base *const svext; -+ extern const function_base *const svextb; -+ extern const function_base *const svexth; -+ extern const function_base *const svextw; -+ extern const function_base *const svget2; -+ extern const function_base *const svget3; -+ extern const function_base *const svget4; -+ extern const function_base *const svindex; -+ extern const function_base *const svinsr; -+ extern const function_base *const svlasta; -+ extern const function_base *const svlastb; -+ extern const function_base *const svld1; -+ extern const function_base *const svld1_gather; -+ extern const function_base *const svld1ro; -+ extern const function_base *const svld1rq; -+ extern const function_base *const svld1sb; -+ extern const function_base *const svld1sb_gather; -+ extern const function_base *const svld1sh; -+ extern const function_base *const svld1sh_gather; -+ extern const function_base *const svld1sw; -+ extern const function_base *const svld1sw_gather; -+ extern const function_base *const svld1ub; -+ extern const function_base *const svld1ub_gather; -+ extern const function_base *const svld1uh; -+ extern const function_base *const svld1uh_gather; -+ extern const function_base *const svld1uw; -+ extern const function_base *const svld1uw_gather; -+ extern const function_base *const svld2; -+ extern const function_base *const svld3; -+ extern const function_base *const svld4; -+ extern const function_base *const svldff1; -+ extern const function_base *const svldff1_gather; -+ extern const function_base *const svldff1sb; -+ extern const function_base *const svldff1sb_gather; -+ extern const function_base *const svldff1sh; -+ extern const function_base *const svldff1sh_gather; -+ extern const function_base *const svldff1sw; -+ extern const function_base *const svldff1sw_gather; -+ extern const function_base *const svldff1ub; -+ extern const function_base *const svldff1ub_gather; -+ extern const function_base *const svldff1uh; -+ extern const function_base *const svldff1uh_gather; -+ extern const function_base *const svldff1uw; -+ extern const function_base *const svldff1uw_gather; -+ extern const function_base *const svldnf1; -+ extern const function_base *const svldnf1sb; -+ extern const function_base *const svldnf1sh; -+ extern const function_base *const svldnf1sw; -+ extern const function_base *const svldnf1ub; -+ extern const function_base *const svldnf1uh; -+ extern const function_base *const svldnf1uw; -+ extern const function_base *const svldnt1; -+ extern const function_base *const svlen; -+ extern const function_base *const svlsl; -+ extern const function_base *const svlsl_wide; -+ extern const function_base *const svlsr; -+ extern const function_base *const svlsr_wide; -+ extern const function_base *const svmad; -+ extern const function_base *const svmax; -+ extern const function_base *const svmaxnm; -+ extern const function_base *const svmaxnmv; -+ extern const function_base *const svmaxv; -+ extern const function_base *const svmin; -+ extern const function_base *const svminnm; -+ extern const function_base *const svminnmv; -+ extern const function_base *const svminv; -+ extern const function_base *const svmla; -+ extern const function_base *const svmla_lane; -+ extern const function_base *const svmls; -+ extern const function_base *const svmls_lane; -+ extern const function_base *const svmmla; -+ extern const function_base *const svmov; -+ extern const function_base *const svmsb; -+ extern const function_base *const svmul; -+ extern const function_base *const svmul_lane; -+ extern const function_base *const svmulh; -+ extern const function_base *const svmulx; -+ extern const function_base *const svnand; -+ extern const function_base *const svneg; -+ extern const function_base *const svnmad; -+ extern const function_base *const svnmla; -+ extern const function_base *const svnmls; -+ extern const function_base *const svnmsb; -+ extern const function_base *const svnor; -+ extern const function_base *const svnot; -+ extern const function_base *const svorn; -+ extern const function_base *const svorr; -+ extern const function_base *const svorv; -+ extern const function_base *const svpfalse; -+ extern const function_base *const svpfirst; -+ extern const function_base *const svpnext; -+ extern const function_base *const svprfb; -+ extern const function_base *const svprfb_gather; -+ extern const function_base *const svprfd; -+ extern const function_base *const svprfd_gather; -+ extern const function_base *const svprfh; -+ extern const function_base *const svprfh_gather; -+ extern const function_base *const svprfw; -+ extern const function_base *const svprfw_gather; -+ extern const function_base *const svptest_any; -+ extern const function_base *const svptest_first; -+ extern const function_base *const svptest_last; -+ extern const function_base *const svptrue; -+ extern const function_base *const svptrue_pat; -+ extern const function_base *const svqadd; -+ extern const function_base *const svqdecb; -+ extern const function_base *const svqdecb_pat; -+ extern const function_base *const svqdecd; -+ extern const function_base *const svqdecd_pat; -+ extern const function_base *const svqdech; -+ extern const function_base *const svqdech_pat; -+ extern const function_base *const svqdecp; -+ extern const function_base *const svqdecw; -+ extern const function_base *const svqdecw_pat; -+ extern const function_base *const svqincb; -+ extern const function_base *const svqincb_pat; -+ extern const function_base *const svqincd; -+ extern const function_base *const svqincd_pat; -+ extern const function_base *const svqinch; -+ extern const function_base *const svqinch_pat; -+ extern const function_base *const svqincp; -+ extern const function_base *const svqincw; -+ extern const function_base *const svqincw_pat; -+ extern const function_base *const svqsub; -+ extern const function_base *const svrbit; -+ extern const function_base *const svrdffr; -+ extern const function_base *const svrecpe; -+ extern const function_base *const svrecps; -+ extern const function_base *const svrecpx; -+ extern const function_base *const svreinterpret; -+ extern const function_base *const svrev; -+ extern const function_base *const svrevb; -+ extern const function_base *const svrevh; -+ extern const function_base *const svrevw; -+ extern const function_base *const svrinta; -+ extern const function_base *const svrinti; -+ extern const function_base *const svrintm; -+ extern const function_base *const svrintn; -+ extern const function_base *const svrintp; -+ extern const function_base *const svrintx; -+ extern const function_base *const svrintz; -+ extern const function_base *const svrsqrte; -+ extern const function_base *const svrsqrts; -+ extern const function_base *const svscale; -+ extern const function_base *const svsel; -+ extern const function_base *const svset2; -+ extern const function_base *const svset3; -+ extern const function_base *const svset4; -+ extern const function_base *const svsetffr; -+ extern const function_base *const svsplice; -+ extern const function_base *const svsqrt; -+ extern const function_base *const svst1; -+ extern const function_base *const svst1_scatter; -+ extern const function_base *const svst1b; -+ extern const function_base *const svst1b_scatter; -+ extern const function_base *const svst1h; -+ extern const function_base *const svst1h_scatter; -+ extern const function_base *const svst1w; -+ extern const function_base *const svst1w_scatter; -+ extern const function_base *const svst2; -+ extern const function_base *const svst3; -+ extern const function_base *const svst4; -+ extern const function_base *const svstnt1; -+ extern const function_base *const svsub; -+ extern const function_base *const svsubr; -+ extern const function_base *const svsudot; -+ extern const function_base *const svsudot_lane; -+ extern const function_base *const svtbl; -+ extern const function_base *const svtmad; -+ extern const function_base *const svtrn1; -+ extern const function_base *const svtrn1q; -+ extern const function_base *const svtrn2; -+ extern const function_base *const svtrn2q; -+ extern const function_base *const svtsmul; -+ extern const function_base *const svtssel; -+ extern const function_base *const svundef; -+ extern const function_base *const svundef2; -+ extern const function_base *const svundef3; -+ extern const function_base *const svundef4; -+ extern const function_base *const svunpkhi; -+ extern const function_base *const svunpklo; -+ extern const function_base *const svusdot; -+ extern const function_base *const svusdot_lane; -+ extern const function_base *const svusmmla; -+ extern const function_base *const svuzp1; -+ extern const function_base *const svuzp1q; -+ extern const function_base *const svuzp2; -+ extern const function_base *const svuzp2q; -+ extern const function_base *const svwhilele; -+ extern const function_base *const svwhilelt; -+ extern const function_base *const svwrffr; -+ extern const function_base *const svzip1; -+ extern const function_base *const svzip1q; -+ extern const function_base *const svzip2; -+ extern const function_base *const svzip2q; -+ } -+} -+ -+#endif -diff --git a/gcc/config/aarch64/aarch64-sve-builtins-functions.h b/gcc/config/aarch64/aarch64-sve-builtins-functions.h -new file mode 100644 -index 000000000..ee1760668 ---- /dev/null -+++ b/gcc/config/aarch64/aarch64-sve-builtins-functions.h -@@ -0,0 +1,630 @@ -+/* ACLE support for AArch64 SVE (function_base classes) -+ Copyright (C) 2018-2019 Free Software Foundation, Inc. -+ -+ This file is part of GCC. -+ -+ GCC is free software; you can redistribute it and/or modify it -+ under the terms of the GNU General Public License as published by -+ the Free Software Foundation; either version 3, or (at your option) -+ any later version. -+ -+ GCC is distributed in the hope that it will be useful, but -+ WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ General Public License for more details. -+ -+ You should have received a copy of the GNU General Public License -+ along with GCC; see the file COPYING3. If not see -+ . */ -+ -+#ifndef GCC_AARCH64_SVE_BUILTINS_FUNCTIONS_H -+#define GCC_AARCH64_SVE_BUILTINS_FUNCTIONS_H -+ -+namespace aarch64_sve { -+ -+/* Wrap T, which is derived from function_base, and indicate that the -+ function never has side effects. It is only necessary to use this -+ wrapper on functions that might have floating-point suffixes, since -+ otherwise we assume by default that the function has no side effects. */ -+template -+class quiet : public T -+{ -+public: -+ CONSTEXPR quiet () : T () {} -+ -+ /* Unfortunately we can't use parameter packs yet. */ -+ template -+ CONSTEXPR quiet (const T1 &t1) : T (t1) {} -+ -+ template -+ CONSTEXPR quiet (const T1 &t1, const T2 &t2) : T (t1, t2) {} -+ -+ template -+ CONSTEXPR quiet (const T1 &t1, const T2 &t2, const T3 &t3) -+ : T (t1, t2, t3) {} -+ -+ unsigned int -+ call_properties (const function_instance &) const OVERRIDE -+ { -+ return 0; -+ } -+}; -+ -+/* A function_base that sometimes or always operates on tuples of -+ vectors. */ -+class multi_vector_function : public function_base -+{ -+public: -+ CONSTEXPR multi_vector_function (unsigned int vectors_per_tuple) -+ : m_vectors_per_tuple (vectors_per_tuple) {} -+ -+ unsigned int -+ vectors_per_tuple () const OVERRIDE -+ { -+ return m_vectors_per_tuple; -+ } -+ -+ /* The number of vectors in a tuple, or 1 if the function only operates -+ on single vectors. */ -+ unsigned int m_vectors_per_tuple; -+}; -+ -+/* A function_base that loads or stores contiguous memory elements -+ without extending or truncating them. */ -+class full_width_access : public multi_vector_function -+{ -+public: -+ CONSTEXPR full_width_access (unsigned int vectors_per_tuple = 1) -+ : multi_vector_function (vectors_per_tuple) {} -+ -+ tree -+ memory_scalar_type (const function_instance &fi) const OVERRIDE -+ { -+ return fi.scalar_type (0); -+ } -+ -+ machine_mode -+ memory_vector_mode (const function_instance &fi) const OVERRIDE -+ { -+ machine_mode mode = fi.vector_mode (0); -+ if (m_vectors_per_tuple != 1) -+ mode = targetm.array_mode (mode, m_vectors_per_tuple).require (); -+ return mode; -+ } -+}; -+ -+/* A function_base that loads elements from memory and extends them -+ to a wider element. The memory element type is a fixed part of -+ the function base name. */ -+class extending_load : public function_base -+{ -+public: -+ CONSTEXPR extending_load (type_suffix_index memory_type) -+ : m_memory_type (memory_type) {} -+ -+ unsigned int -+ call_properties (const function_instance &) const OVERRIDE -+ { -+ return CP_READ_MEMORY; -+ } -+ -+ tree -+ memory_scalar_type (const function_instance &) const OVERRIDE -+ { -+ return scalar_types[type_suffixes[m_memory_type].vector_type]; -+ } -+ -+ machine_mode -+ memory_vector_mode (const function_instance &fi) const OVERRIDE -+ { -+ machine_mode mem_mode = type_suffixes[m_memory_type].vector_mode; -+ machine_mode reg_mode = fi.vector_mode (0); -+ return aarch64_sve_data_mode (GET_MODE_INNER (mem_mode), -+ GET_MODE_NUNITS (reg_mode)).require (); -+ } -+ -+ /* Return the rtx code associated with the kind of extension that -+ the load performs. */ -+ rtx_code -+ extend_rtx_code () const -+ { -+ return (type_suffixes[m_memory_type].unsigned_p -+ ? ZERO_EXTEND : SIGN_EXTEND); -+ } -+ -+ /* The type of the memory elements. This is part of the function base -+ name rather than a true type suffix. */ -+ type_suffix_index m_memory_type; -+}; -+ -+/* A function_base that truncates vector elements and stores them to memory. -+ The memory element width is a fixed part of the function base name. */ -+class truncating_store : public function_base -+{ -+public: -+ CONSTEXPR truncating_store (scalar_int_mode to_mode) : m_to_mode (to_mode) {} -+ -+ unsigned int -+ call_properties (const function_instance &) const OVERRIDE -+ { -+ return CP_WRITE_MEMORY; -+ } -+ -+ tree -+ memory_scalar_type (const function_instance &fi) const OVERRIDE -+ { -+ /* In truncating stores, the signedness of the memory element is defined -+ to be the same as the signedness of the vector element. The signedness -+ doesn't make any difference to the behavior of the function. */ -+ type_class_index tclass = fi.type_suffix (0).tclass; -+ unsigned int element_bits = GET_MODE_BITSIZE (m_to_mode); -+ type_suffix_index suffix = find_type_suffix (tclass, element_bits); -+ return scalar_types[type_suffixes[suffix].vector_type]; -+ } -+ -+ machine_mode -+ memory_vector_mode (const function_instance &fi) const OVERRIDE -+ { -+ poly_uint64 nunits = GET_MODE_NUNITS (fi.vector_mode (0)); -+ return aarch64_sve_data_mode (m_to_mode, nunits).require (); -+ } -+ -+ /* The mode of a single memory element. */ -+ scalar_int_mode m_to_mode; -+}; -+ -+/* An incomplete function_base for functions that have an associated rtx code. -+ It simply records information about the mapping for derived classes -+ to use. */ -+class rtx_code_function_base : public function_base -+{ -+public: -+ CONSTEXPR rtx_code_function_base (rtx_code code_for_sint, -+ rtx_code code_for_uint, -+ int unspec_for_fp = -1) -+ : m_code_for_sint (code_for_sint), m_code_for_uint (code_for_uint), -+ m_unspec_for_fp (unspec_for_fp) {} -+ -+ /* The rtx code to use for signed and unsigned integers respectively. -+ Can be UNKNOWN for functions that don't have integer forms. */ -+ rtx_code m_code_for_sint; -+ rtx_code m_code_for_uint; -+ -+ /* The UNSPEC_COND_* to use for floating-point operations. Can be -1 -+ for functions that only operate on integers. */ -+ int m_unspec_for_fp; -+}; -+ -+/* A function_base for functions that have an associated rtx code. -+ It supports all forms of predication except PRED_implicit. */ -+class rtx_code_function : public rtx_code_function_base -+{ -+public: -+ CONSTEXPR rtx_code_function (rtx_code code_for_sint, rtx_code code_for_uint, -+ int unspec_for_fp = -1) -+ : rtx_code_function_base (code_for_sint, code_for_uint, unspec_for_fp) {} -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ return e.map_to_rtx_codes (m_code_for_sint, m_code_for_uint, -+ m_unspec_for_fp); -+ } -+}; -+ -+/* Like rtx_code_function, but for functions that take what is normally -+ the final argument first. One use of this class is to handle binary -+ reversed operations; another is to handle MLA-style operations that -+ are normally expressed in GCC as MAD-style operations. */ -+class rtx_code_function_rotated : public rtx_code_function_base -+{ -+public: -+ CONSTEXPR rtx_code_function_rotated (rtx_code code_for_sint, -+ rtx_code code_for_uint, -+ int unspec_for_fp = -1) -+ : rtx_code_function_base (code_for_sint, code_for_uint, unspec_for_fp) {} -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ /* Rotate the inputs into their normal order, but continue to make _m -+ functions merge with what was originally the first vector argument. */ -+ unsigned int nargs = e.args.length (); -+ e.rotate_inputs_left (e.pred != PRED_none ? 1 : 0, nargs); -+ return e.map_to_rtx_codes (m_code_for_sint, m_code_for_uint, -+ m_unspec_for_fp, nargs - 1); -+ } -+}; -+ -+/* An incomplete function_base for functions that have an associated -+ unspec code, with separate codes for signed integers, unsigned -+ integers and floating-point values. The class simply records -+ information about the mapping for derived classes to use. */ -+class unspec_based_function_base : public function_base -+{ -+public: -+ CONSTEXPR unspec_based_function_base (int unspec_for_sint, -+ int unspec_for_uint, -+ int unspec_for_fp) -+ : m_unspec_for_sint (unspec_for_sint), -+ m_unspec_for_uint (unspec_for_uint), -+ m_unspec_for_fp (unspec_for_fp) -+ {} -+ -+ /* Return the unspec code to use for INSTANCE, based on type suffix 0. */ -+ int -+ unspec_for (const function_instance &instance) const -+ { -+ return (!instance.type_suffix (0).integer_p ? m_unspec_for_fp -+ : instance.type_suffix (0).unsigned_p ? m_unspec_for_uint -+ : m_unspec_for_sint); -+ } -+ -+ /* The unspec code associated with signed-integer, unsigned-integer -+ and floating-point operations respectively. */ -+ int m_unspec_for_sint; -+ int m_unspec_for_uint; -+ int m_unspec_for_fp; -+}; -+ -+/* A function_base for functions that have an associated unspec code. -+ It supports all forms of predication except PRED_implicit. */ -+class unspec_based_function : public unspec_based_function_base -+{ -+public: -+ CONSTEXPR unspec_based_function (int unspec_for_sint, int unspec_for_uint, -+ int unspec_for_fp) -+ : unspec_based_function_base (unspec_for_sint, unspec_for_uint, -+ unspec_for_fp) -+ {} -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ return e.map_to_unspecs (m_unspec_for_sint, m_unspec_for_uint, -+ m_unspec_for_fp); -+ } -+}; -+ -+/* Like unspec_based_function, but for functions that take what is normally -+ the final argument first. One use of this class is to handle binary -+ reversed operations; another is to handle MLA-style operations that -+ are normally expressed in GCC as MAD-style operations. */ -+class unspec_based_function_rotated : public unspec_based_function_base -+{ -+public: -+ CONSTEXPR unspec_based_function_rotated (int unspec_for_sint, -+ int unspec_for_uint, -+ int unspec_for_fp) -+ : unspec_based_function_base (unspec_for_sint, unspec_for_uint, -+ unspec_for_fp) -+ {} -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ /* Rotate the inputs into their normal order, but continue to make _m -+ functions merge with what was originally the first vector argument. */ -+ unsigned int nargs = e.args.length (); -+ e.rotate_inputs_left (e.pred != PRED_none ? 1 : 0, nargs); -+ return e.map_to_unspecs (m_unspec_for_sint, m_unspec_for_uint, -+ m_unspec_for_fp, nargs - 1); -+ } -+}; -+ -+/* Like unspec_based_function, but map the function directly to -+ CODE (UNSPEC, M) instead of using the generic predication-based -+ expansion. where M is the vector mode associated with type suffix 0. -+ This is useful if the unspec doesn't describe the full operation or -+ if the usual predication rules don't apply for some reason. */ -+template -+class unspec_based_function_exact_insn : public unspec_based_function_base -+{ -+public: -+ CONSTEXPR unspec_based_function_exact_insn (int unspec_for_sint, -+ int unspec_for_uint, -+ int unspec_for_fp) -+ : unspec_based_function_base (unspec_for_sint, unspec_for_uint, -+ unspec_for_fp) -+ {} -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ return e.use_exact_insn (CODE (unspec_for (e), e.vector_mode (0))); -+ } -+}; -+ -+/* A function that performs an unspec and then adds it to another value. */ -+typedef unspec_based_function_exact_insn -+ unspec_based_add_function; -+ -+/* A functon that uses aarch64_pred* patterns regardless of the -+ predication type. */ -+typedef unspec_based_function_exact_insn -+ unspec_based_pred_function; -+ -+/* A function that acts like unspec_based_function_exact_insn -+ when operating on integers, but that expands to an (fma ...)-style -+ aarch64_sve* operation when applied to floats. */ -+template -+class unspec_based_fused_function : public unspec_based_function_base -+{ -+public: -+ CONSTEXPR unspec_based_fused_function (int unspec_for_sint, -+ int unspec_for_uint, -+ int unspec_for_fp) -+ : unspec_based_function_base (unspec_for_sint, unspec_for_uint, -+ unspec_for_fp) -+ {} -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ int unspec = unspec_for (e); -+ insn_code icode; -+ if (e.type_suffix (0).float_p) -+ { -+ /* Put the operands in the normal (fma ...) order, with the accumulator -+ last. This fits naturally since that's also the unprinted operand -+ in the asm output. */ -+ e.rotate_inputs_left (0, e.pred != PRED_none ? 4 : 3); -+ icode = code_for_aarch64_sve (unspec, e.vector_mode (0)); -+ } -+ else -+ icode = INT_CODE (unspec, e.vector_mode (0)); -+ return e.use_exact_insn (icode); -+ } -+}; -+ -+/* Like unspec_based_fused_function, but for _lane functions. */ -+template -+class unspec_based_fused_lane_function : public unspec_based_function_base -+{ -+public: -+ CONSTEXPR unspec_based_fused_lane_function (int unspec_for_sint, -+ int unspec_for_uint, -+ int unspec_for_fp) -+ : unspec_based_function_base (unspec_for_sint, unspec_for_uint, -+ unspec_for_fp) -+ {} -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ int unspec = unspec_for (e); -+ insn_code icode; -+ if (e.type_suffix (0).float_p) -+ { -+ /* Put the operands in the normal (fma ...) order, with the accumulator -+ last. This fits naturally since that's also the unprinted operand -+ in the asm output. */ -+ e.rotate_inputs_left (0, e.pred != PRED_none ? 5 : 4); -+ icode = code_for_aarch64_lane (unspec, e.vector_mode (0)); -+ } -+ else -+ icode = INT_CODE (unspec, e.vector_mode (0)); -+ return e.use_exact_insn (icode); -+ } -+}; -+ -+/* A function_base that uses CODE_FOR_MODE (M) to get the associated -+ instruction code, where M is the vector mode associated with type -+ suffix N. */ -+template -+class code_for_mode_function : public function_base -+{ -+public: -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ return e.use_exact_insn (CODE_FOR_MODE (e.vector_mode (N))); -+ } -+}; -+ -+/* A function that uses code_for_ (M), where M is the vector -+ mode associated with the first type suffix. */ -+#define CODE_FOR_MODE0(PATTERN) code_for_mode_function -+ -+/* Likewise for the second type suffix. */ -+#define CODE_FOR_MODE1(PATTERN) code_for_mode_function -+ -+/* Like CODE_FOR_MODE0, but the function doesn't raise exceptions when -+ operating on floating-point data. */ -+#define QUIET_CODE_FOR_MODE0(PATTERN) \ -+ quiet< code_for_mode_function > -+ -+/* A function_base for functions that always expand to a fixed insn pattern, -+ regardless of what the suffixes are. */ -+class fixed_insn_function : public function_base -+{ -+public: -+ CONSTEXPR fixed_insn_function (insn_code code) : m_code (code) {} -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ return e.use_exact_insn (m_code); -+ } -+ -+ /* The instruction to use. */ -+ insn_code m_code; -+}; -+ -+/* A function_base for functions that permute their arguments. */ -+class permute : public quiet -+{ -+public: -+ /* Fold a unary or binary permute with the permute vector given by -+ BUILDER. */ -+ gimple * -+ fold_permute (const gimple_folder &f, const vec_perm_builder &builder) const -+ { -+ /* Punt for now on _b16 and wider; we'd need more complex evpc logic -+ to rerecognize the result. */ -+ if (f.type_suffix (0).bool_p && f.type_suffix (0).element_bits > 8) -+ return NULL; -+ -+ unsigned int nargs = gimple_call_num_args (f.call); -+ poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs)); -+ vec_perm_indices indices (builder, nargs, nelts); -+ tree perm_type = build_vector_type (ssizetype, nelts); -+ return gimple_build_assign (f.lhs, VEC_PERM_EXPR, -+ gimple_call_arg (f.call, 0), -+ gimple_call_arg (f.call, nargs - 1), -+ vec_perm_indices_to_tree (perm_type, indices)); -+ } -+}; -+ -+/* A function_base for functions that permute two vectors using a fixed -+ choice of indices. */ -+class binary_permute : public permute -+{ -+public: -+ CONSTEXPR binary_permute (int unspec) : m_unspec (unspec) {} -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ insn_code icode = code_for_aarch64_sve (m_unspec, e.vector_mode (0)); -+ return e.use_exact_insn (icode); -+ } -+ -+ /* The unspec code associated with the operation. */ -+ int m_unspec; -+}; -+ -+/* A function_base for functions that reduce a vector to a scalar. */ -+class reduction : public function_base -+{ -+public: -+ CONSTEXPR reduction (int unspec) -+ : m_unspec_for_sint (unspec), -+ m_unspec_for_uint (unspec), -+ m_unspec_for_fp (unspec) -+ {} -+ -+ CONSTEXPR reduction (int unspec_for_sint, int unspec_for_uint, -+ int unspec_for_fp) -+ : m_unspec_for_sint (unspec_for_sint), -+ m_unspec_for_uint (unspec_for_uint), -+ m_unspec_for_fp (unspec_for_fp) -+ {} -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ machine_mode mode = e.vector_mode (0); -+ int unspec = (!e.type_suffix (0).integer_p ? m_unspec_for_fp -+ : e.type_suffix (0).unsigned_p ? m_unspec_for_uint -+ : m_unspec_for_sint); -+ /* There's no distinction between SADDV and UADDV for 64-bit elements; -+ the signed versions only exist for narrower elements. */ -+ if (GET_MODE_UNIT_BITSIZE (mode) == 64 && unspec == UNSPEC_SADDV) -+ unspec = UNSPEC_UADDV; -+ return e.use_exact_insn (code_for_aarch64_pred_reduc (unspec, mode)); -+ } -+ -+ /* The unspec code associated with signed-integer, unsigned-integer -+ and floating-point operations respectively. */ -+ int m_unspec_for_sint; -+ int m_unspec_for_uint; -+ int m_unspec_for_fp; -+}; -+ -+/* A function_base for functions that shift narrower-than-64-bit values -+ by 64-bit amounts. */ -+class shift_wide : public function_base -+{ -+public: -+ CONSTEXPR shift_wide (rtx_code code, int wide_unspec) -+ : m_code (code), m_wide_unspec (wide_unspec) {} -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ machine_mode mode = e.vector_mode (0); -+ machine_mode elem_mode = GET_MODE_INNER (mode); -+ -+ /* If the argument is a constant that the normal shifts can handle -+ directly, use them instead. */ -+ rtx shift = unwrap_const_vec_duplicate (e.args.last ()); -+ if (aarch64_simd_shift_imm_p (shift, elem_mode, m_code == ASHIFT)) -+ { -+ e.args.last () = shift; -+ return e.map_to_rtx_codes (m_code, m_code, -1); -+ } -+ -+ if (e.pred == PRED_x) -+ return e.use_unpred_insn (code_for_aarch64_sve (m_wide_unspec, mode)); -+ -+ return e.use_cond_insn (code_for_cond (m_wide_unspec, mode)); -+ } -+ -+ /* The rtx code associated with a "normal" shift. */ -+ rtx_code m_code; -+ -+ /* The unspec code associated with the wide shift. */ -+ int m_wide_unspec; -+}; -+ -+/* A function_base for unary functions that count bits. */ -+class unary_count : public quiet -+{ -+public: -+ CONSTEXPR unary_count (rtx_code code) : m_code (code) {} -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ /* The md patterns treat the operand as an integer. */ -+ machine_mode mode = aarch64_sve_int_mode (e.vector_mode (0)); -+ e.args.last () = gen_lowpart (mode, e.args.last ()); -+ -+ if (e.pred == PRED_x) -+ return e.use_pred_x_insn (code_for_aarch64_pred (m_code, mode)); -+ -+ return e.use_cond_insn (code_for_cond (m_code, mode)); -+ } -+ -+ /* The rtx code associated with the operation. */ -+ rtx_code m_code; -+}; -+ -+/* A function_base for svwhile* functions. */ -+class while_comparison : public function_base -+{ -+public: -+ CONSTEXPR while_comparison (int unspec_for_sint, int unspec_for_uint) -+ : m_unspec_for_sint (unspec_for_sint), -+ m_unspec_for_uint (unspec_for_uint) -+ {} -+ -+ rtx -+ expand (function_expander &e) const OVERRIDE -+ { -+ /* Suffix 0 determines the predicate mode, suffix 1 determines the -+ scalar mode and signedness. */ -+ int unspec = (e.type_suffix (1).unsigned_p -+ ? m_unspec_for_uint -+ : m_unspec_for_sint); -+ machine_mode pred_mode = e.vector_mode (0); -+ scalar_mode reg_mode = GET_MODE_INNER (e.vector_mode (1)); -+ return e.use_exact_insn (code_for_while (unspec, reg_mode, pred_mode)); -+ } -+ -+ /* The unspec codes associated with signed and unsigned operations -+ respectively. */ -+ int m_unspec_for_sint; -+ int m_unspec_for_uint; -+}; -+ -+} -+ -+/* Declare the global function base NAME, creating it from an instance -+ of class CLASS with constructor arguments ARGS. */ -+#define FUNCTION(NAME, CLASS, ARGS) \ -+ namespace { static CONSTEXPR const CLASS NAME##_obj ARGS; } \ -+ namespace functions { const function_base *const NAME = &NAME##_obj; } -+ -+#endif -diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc -new file mode 100644 -index 000000000..c6f6ce170 ---- /dev/null -+++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc -@@ -0,0 +1,3451 @@ -+/* ACLE support for AArch64 SVE (function shapes) -+ Copyright (C) 2018-2019 Free Software Foundation, Inc. -+ -+ This file is part of GCC. -+ -+ GCC is free software; you can redistribute it and/or modify it -+ under the terms of the GNU General Public License as published by -+ the Free Software Foundation; either version 3, or (at your option) -+ any later version. -+ -+ GCC is distributed in the hope that it will be useful, but -+ WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ General Public License for more details. -+ -+ You should have received a copy of the GNU General Public License -+ along with GCC; see the file COPYING3. If not see -+ . */ -+ -+#include "config.h" -+#include "system.h" -+#include "coretypes.h" -+#include "tm.h" -+#include "tree.h" -+#include "rtl.h" -+#include "tm_p.h" -+#include "memmodel.h" -+#include "insn-codes.h" -+#include "optabs.h" -+#include "aarch64-sve-builtins.h" -+#include "aarch64-sve-builtins-shapes.h" -+ -+/* In the comments below, _t0 represents the first type suffix and _t1 -+ represents the second. Square brackets enclose characters that are -+ present in only the full name, not the overloaded name. Governing -+ predicate arguments and predicate suffixes are not shown, since they -+ depend on the predication type, which is a separate piece of -+ information from the shape. -+ -+ Non-overloaded functions may have additional suffixes beyond the -+ ones shown, if those suffixes don't affect the types in the type -+ signature. E.g. the predicate form of svtrn1 has a _b suffix, -+ but this does not affect the prototype, which is always -+ "svbool_t(svbool_t, svbool_t)". */ -+ -+namespace aarch64_sve { -+ -+/* Return a representation of "const T *". */ -+static tree -+build_const_pointer (tree t) -+{ -+ return build_pointer_type (build_qualified_type (t, TYPE_QUAL_CONST)); -+} -+ -+/* If INSTANCE has a governing predicate, add it to the list of argument -+ types in ARGUMENT_TYPES. RETURN_TYPE is the type returned by the -+ function. */ -+static void -+apply_predication (const function_instance &instance, tree return_type, -+ vec &argument_types) -+{ -+ if (instance.pred != PRED_none) -+ { -+ argument_types.quick_insert (0, get_svbool_t ()); -+ /* For unary merge operations, the first argument is a vector with -+ the same type as the result. For unary_convert_narrowt it also -+ provides the "bottom" half of active elements, and is present -+ for all types of predication. */ -+ if ((argument_types.length () == 2 && instance.pred == PRED_m) -+ || instance.shape == shapes::unary_convert_narrowt) -+ argument_types.quick_insert (0, return_type); -+ } -+} -+ -+/* Parse and move past an element type in FORMAT and return it as a type -+ suffix. The format is: -+ -+ [01] - the element type in type suffix 0 or 1 of INSTANCE -+ f - a floating-point type with the given number of bits -+ f[01] - a floating-point type with the same width as type suffix 0 or 1 -+ B - bfloat16_t -+ h - a half-sized version of -+ p - a predicate (represented as TYPE_SUFFIX_b) -+ q - a quarter-sized version of -+ s - a signed type with the given number of bits -+ s[01] - a signed type with the same width as type suffix 0 or 1 -+ u - an unsigned type with the given number of bits -+ u[01] - an unsigned type with the same width as type suffix 0 or 1 -+ w - a 64-bit version of if is integral, otherwise -+ -+ where is another element type. */ -+static type_suffix_index -+parse_element_type (const function_instance &instance, const char *&format) -+{ -+ int ch = *format++; -+ -+ if (ch == 'f' || ch == 's' || ch == 'u') -+ { -+ type_class_index tclass = (ch == 'f' ? TYPE_float -+ : ch == 's' ? TYPE_signed -+ : TYPE_unsigned); -+ char *end; -+ unsigned int bits = strtol (format, &end, 10); -+ format = end; -+ if (bits == 0 || bits == 1) -+ bits = instance.type_suffix (bits).element_bits; -+ return find_type_suffix (tclass, bits); -+ } -+ -+ if (ch == 'w') -+ { -+ type_suffix_index suffix = parse_element_type (instance, format); -+ if (type_suffixes[suffix].integer_p) -+ return find_type_suffix (type_suffixes[suffix].tclass, 64); -+ return suffix; -+ } -+ -+ if (ch == 'p') -+ return TYPE_SUFFIX_b; -+ -+ if (ch == 'B') -+ return TYPE_SUFFIX_bf16; -+ -+ if (ch == 'q') -+ { -+ type_suffix_index suffix = parse_element_type (instance, format); -+ return find_type_suffix (type_suffixes[suffix].tclass, -+ type_suffixes[suffix].element_bits / 4); -+ } -+ -+ if (ch == 'h') -+ { -+ type_suffix_index suffix = parse_element_type (instance, format); -+ /* Widening and narrowing doesn't change the type for predicates; -+ everything's still an svbool_t. */ -+ if (suffix == TYPE_SUFFIX_b) -+ return suffix; -+ return find_type_suffix (type_suffixes[suffix].tclass, -+ type_suffixes[suffix].element_bits / 2); -+ } -+ -+ if (ch == '0' || ch == '1') -+ return instance.type_suffix_ids[ch - '0']; -+ -+ gcc_unreachable (); -+} -+ -+/* Read and return a type from FORMAT for function INSTANCE. Advance -+ FORMAT beyond the type string. The format is: -+ -+ _ - void -+ al - array pointer for loads -+ ap - array pointer for prefetches -+ as - array pointer for stores -+ b - base vector type (from a _base suffix) -+ d - displacement vector type (from a _index or _offset suffix) -+ e - an enum with the given name -+ s - a scalar type with the given element suffix -+ t - a vector or tuple type with given element suffix [*1] -+ v - a vector with the given element suffix -+ -+ where has the format described above parse_element_type -+ -+ [*1] the vectors_per_tuple function indicates whether the type should -+ be a tuple, and if so, how many vectors it should contain. */ -+static tree -+parse_type (const function_instance &instance, const char *&format) -+{ -+ int ch = *format++; -+ -+ if (ch == '_') -+ return void_type_node; -+ -+ if (ch == 'a') -+ { -+ ch = *format++; -+ if (ch == 'l') -+ return build_const_pointer (instance.memory_scalar_type ()); -+ if (ch == 'p') -+ return const_ptr_type_node; -+ if (ch == 's') -+ return build_pointer_type (instance.memory_scalar_type ()); -+ gcc_unreachable (); -+ } -+ -+ if (ch == 'b') -+ return instance.base_vector_type (); -+ -+ if (ch == 'd') -+ return instance.displacement_vector_type (); -+ -+ if (ch == 'e') -+ { -+ if (strncmp (format, "pattern", 7) == 0) -+ { -+ format += 7; -+ return acle_svpattern; -+ } -+ if (strncmp (format, "prfop", 5) == 0) -+ { -+ format += 5; -+ return acle_svprfop; -+ } -+ gcc_unreachable (); -+ } -+ -+ if (ch == 's') -+ { -+ type_suffix_index suffix = parse_element_type (instance, format); -+ return scalar_types[type_suffixes[suffix].vector_type]; -+ } -+ -+ if (ch == 't') -+ { -+ type_suffix_index suffix = parse_element_type (instance, format); -+ vector_type_index vector_type = type_suffixes[suffix].vector_type; -+ unsigned int num_vectors = instance.vectors_per_tuple (); -+ return acle_vector_types[num_vectors - 1][vector_type]; -+ } -+ -+ if (ch == 'v') -+ { -+ type_suffix_index suffix = parse_element_type (instance, format); -+ return acle_vector_types[0][type_suffixes[suffix].vector_type]; -+ } -+ -+ gcc_unreachable (); -+} -+ -+/* Read and move past any argument count at FORMAT for the function -+ signature of INSTANCE. The counts are: -+ -+ *q: one argument per element in a 128-bit quadword (as for svdupq) -+ *t: one argument per vector in a tuple (as for svcreate) -+ -+ Otherwise the count is 1. */ -+static unsigned int -+parse_count (const function_instance &instance, const char *&format) -+{ -+ if (format[0] == '*' && format[1] == 'q') -+ { -+ format += 2; -+ return instance.elements_per_vq (0); -+ } -+ if (format[0] == '*' && format[1] == 't') -+ { -+ format += 2; -+ return instance.vectors_per_tuple (); -+ } -+ return 1; -+} -+ -+/* Read a type signature for INSTANCE from FORMAT. Add the argument types -+ to ARGUMENT_TYPES and return the return type. -+ -+ The format is a comma-separated list of types (as for parse_type), -+ with the first type being the return type and the rest being the -+ argument types. Each argument type can be followed by an optional -+ count (as for parse_count). */ -+static tree -+parse_signature (const function_instance &instance, const char *format, -+ vec &argument_types) -+{ -+ tree return_type = parse_type (instance, format); -+ while (format[0] == ',') -+ { -+ format += 1; -+ tree argument_type = parse_type (instance, format); -+ unsigned int count = parse_count (instance, format); -+ for (unsigned int i = 0; i < count; ++i) -+ argument_types.quick_push (argument_type); -+ } -+ gcc_assert (format[0] == 0); -+ return return_type; -+} -+ -+/* Add one function instance for GROUP, using mode suffix MODE_SUFFIX_ID, -+ the type suffixes at index TI and the predication suffix at index PI. -+ The other arguments are as for build_all. */ -+static void -+build_one (function_builder &b, const char *signature, -+ const function_group_info &group, mode_suffix_index mode_suffix_id, -+ unsigned int ti, unsigned int pi, bool force_direct_overloads) -+{ -+ /* Byte forms of svdupq take 16 arguments. */ -+ auto_vec argument_types; -+ function_instance instance (group.base_name, *group.base, *group.shape, -+ mode_suffix_id, group.types[ti], -+ group.preds[pi]); -+ tree return_type = parse_signature (instance, signature, argument_types); -+ apply_predication (instance, return_type, argument_types); -+ b.add_unique_function (instance, return_type, argument_types, -+ group.required_extensions, force_direct_overloads); -+} -+ -+/* GROUP describes some sort of gather or scatter operation. There are -+ two cases: -+ -+ - If the function has any type suffixes (as for loads and stores), the -+ first function type suffix specifies either a 32-bit or a 64-bit type, -+ which in turn selects either MODE32 or MODE64 as the addressing mode. -+ Add a function instance for every type and predicate combination -+ in GROUP for which the associated addressing mode is not MODE_none. -+ -+ - If the function has no type suffixes (as for prefetches), add one -+ MODE32 form and one MODE64 form for each predication type. -+ -+ The other arguments are as for build_all. */ -+static void -+build_32_64 (function_builder &b, const char *signature, -+ const function_group_info &group, mode_suffix_index mode32, -+ mode_suffix_index mode64, bool force_direct_overloads = false) -+{ -+ for (unsigned int pi = 0; group.preds[pi] != NUM_PREDS; ++pi) -+ if (group.types[0][0] == NUM_TYPE_SUFFIXES) -+ { -+ gcc_assert (mode32 != MODE_none && mode64 != MODE_none); -+ build_one (b, signature, group, mode32, 0, pi, -+ force_direct_overloads); -+ build_one (b, signature, group, mode64, 0, pi, -+ force_direct_overloads); -+ } -+ else -+ for (unsigned int ti = 0; group.types[ti][0] != NUM_TYPE_SUFFIXES; ++ti) -+ { -+ unsigned int bits = type_suffixes[group.types[ti][0]].element_bits; -+ gcc_assert (bits == 32 || bits == 64); -+ mode_suffix_index mode = bits == 32 ? mode32 : mode64; -+ if (mode != MODE_none) -+ build_one (b, signature, group, mode, ti, pi, -+ force_direct_overloads); -+ } -+} -+ -+/* For every type and predicate combination in GROUP, add one function -+ that takes a scalar (pointer) base and a signed vector array index, -+ and another that instead takes an unsigned vector array index. -+ The vector array index has the same element size as the first -+ function type suffix. SIGNATURE is as for build_all. */ -+static void -+build_sv_index (function_builder &b, const char *signature, -+ const function_group_info &group) -+{ -+ build_32_64 (b, signature, group, MODE_s32index, MODE_s64index); -+ build_32_64 (b, signature, group, MODE_u32index, MODE_u64index); -+} -+ -+/* Like build_sv_index, but only handle 64-bit types. */ -+static void -+build_sv_index64 (function_builder &b, const char *signature, -+ const function_group_info &group) -+{ -+ build_32_64 (b, signature, group, MODE_none, MODE_s64index); -+ build_32_64 (b, signature, group, MODE_none, MODE_u64index); -+} -+ -+/* Like build_sv_index, but taking vector byte offsets instead of vector -+ array indices. */ -+static void -+build_sv_offset (function_builder &b, const char *signature, -+ const function_group_info &group) -+{ -+ build_32_64 (b, signature, group, MODE_s32offset, MODE_s64offset); -+ build_32_64 (b, signature, group, MODE_u32offset, MODE_u64offset); -+} -+ -+/* Like build_sv_offset, but exclude offsets that must be interpreted -+ as signed (i.e. s32offset). */ -+static void -+build_sv_uint_offset (function_builder &b, const char *signature, -+ const function_group_info &group) -+{ -+ build_32_64 (b, signature, group, MODE_none, MODE_s64offset); -+ build_32_64 (b, signature, group, MODE_u32offset, MODE_u64offset); -+} -+ -+/* For every type and predicate combination in GROUP, add a function -+ that takes a vector base address and no displacement. The vector -+ base has the same element size as the first type suffix. -+ -+ The other arguments are as for build_all. */ -+static void -+build_v_base (function_builder &b, const char *signature, -+ const function_group_info &group, -+ bool force_direct_overloads = false) -+{ -+ build_32_64 (b, signature, group, MODE_u32base, MODE_u64base, -+ force_direct_overloads); -+} -+ -+/* Like build_v_base, but for functions that also take a scalar array -+ index. */ -+static void -+build_vs_index (function_builder &b, const char *signature, -+ const function_group_info &group, -+ bool force_direct_overloads = false) -+{ -+ build_32_64 (b, signature, group, MODE_u32base_index, MODE_u64base_index, -+ force_direct_overloads); -+} -+ -+/* Like build_v_base, but for functions that also take a scalar byte -+ offset. */ -+static void -+build_vs_offset (function_builder &b, const char *signature, -+ const function_group_info &group, -+ bool force_direct_overloads = false) -+{ -+ build_32_64 (b, signature, group, MODE_u32base_offset, MODE_u64base_offset, -+ force_direct_overloads); -+} -+ -+/* Add a function instance for every type and predicate combination -+ in GROUP. Take the function base name from GROUP and the mode suffix -+ from MODE_SUFFIX_ID. Use SIGNATURE to construct the function signature -+ without a governing predicate, then use apply_predication to add in the -+ predicate. FORCE_DIRECT_OVERLOADS is true if there is a one-to-one -+ mapping between "short" and "full" names, and if standard overload -+ resolution therefore isn't necessary. */ -+static void -+build_all (function_builder &b, const char *signature, -+ const function_group_info &group, mode_suffix_index mode_suffix_id, -+ bool force_direct_overloads = false) -+{ -+ for (unsigned int pi = 0; group.preds[pi] != NUM_PREDS; ++pi) -+ for (unsigned int ti = 0; -+ ti == 0 || group.types[ti][0] != NUM_TYPE_SUFFIXES; ++ti) -+ build_one (b, signature, group, mode_suffix_id, ti, pi, -+ force_direct_overloads); -+} -+ -+/* TYPE is the largest type suffix associated with the arguments of R, -+ but the result is twice as wide. Return the associated type suffix -+ if it exists, otherwise report an appropriate error and return -+ NUM_TYPE_SUFFIXES. */ -+static type_suffix_index -+long_type_suffix (function_resolver &r, type_suffix_index type) -+{ -+ unsigned int element_bits = type_suffixes[type].element_bits; -+ if (type_suffixes[type].integer_p && element_bits < 64) -+ return find_type_suffix (type_suffixes[type].tclass, element_bits * 2); -+ -+ r.report_no_such_form (type); -+ return NUM_TYPE_SUFFIXES; -+} -+ -+/* Declare the function shape NAME, pointing it to an instance -+ of class _def. */ -+#define SHAPE(NAME) \ -+ static CONSTEXPR const NAME##_def NAME##_obj; \ -+ namespace shapes { const function_shape *const NAME = &NAME##_obj; } -+ -+/* Base class for functions that are not overloaded. */ -+struct nonoverloaded_base : public function_shape -+{ -+ bool -+ explicit_type_suffix_p (unsigned int) const OVERRIDE -+ { -+ return true; -+ } -+ -+ tree -+ resolve (function_resolver &) const OVERRIDE -+ { -+ gcc_unreachable (); -+ } -+}; -+ -+/* Base class for overloaded functions. Bit N of EXPLICIT_MASK is true -+ if type suffix N appears in the overloaded name. */ -+template -+struct overloaded_base : public function_shape -+{ -+ bool -+ explicit_type_suffix_p (unsigned int i) const OVERRIDE -+ { -+ return (EXPLICIT_MASK >> i) & 1; -+ } -+}; -+ -+/* Base class for adr_index and adr_offset. */ -+struct adr_base : public overloaded_base<0> -+{ -+ /* The function takes two arguments: a vector base and a vector displacement -+ (either an index or an offset). Resolve based on them both. */ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ unsigned int i, nargs; -+ mode_suffix_index mode; -+ if (!r.check_gp_argument (2, i, nargs) -+ || (mode = r.resolve_adr_address (0)) == MODE_none) -+ return error_mark_node; -+ -+ return r.resolve_to (mode); -+ }; -+}; -+ -+/* Base class for narrowing bottom binary functions that take an -+ immediate second operand. The result is half the size of input -+ and has class CLASS. */ -+template -+struct binary_imm_narrowb_base : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_n); -+ STATIC_ASSERT (CLASS == function_resolver::SAME_TYPE_CLASS -+ || CLASS == TYPE_unsigned); -+ if (CLASS == TYPE_unsigned) -+ build_all (b, "vhu0,v0,su64", group, MODE_n); -+ else -+ build_all (b, "vh0,v0,su64", group, MODE_n); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ return r.resolve_uniform (1, 1); -+ } -+}; -+ -+/* The top equivalent of binary_imm_narrowb_base. It takes three arguments, -+ with the first being the values of the even elements, which are typically -+ the result of the narrowb operation. */ -+template -+struct binary_imm_narrowt_base : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_n); -+ STATIC_ASSERT (CLASS == function_resolver::SAME_TYPE_CLASS -+ || CLASS == TYPE_unsigned); -+ if (CLASS == TYPE_unsigned) -+ build_all (b, "vhu0,vhu0,v0,su64", group, MODE_n); -+ else -+ build_all (b, "vh0,vh0,v0,su64", group, MODE_n); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ unsigned int i, nargs; -+ type_suffix_index type; -+ if (!r.check_gp_argument (3, i, nargs) -+ || (type = r.infer_vector_type (i + 1)) == NUM_TYPE_SUFFIXES -+ || !r.require_derived_vector_type (i, i + 1, type, CLASS, r.HALF_SIZE) -+ || !r.require_integer_immediate (i + 2)) -+ return error_mark_node; -+ -+ return r.resolve_to (r.mode_suffix_id, type); -+ } -+}; -+ -+/* Base class for long (i.e. narrow op narrow -> wide) binary functions -+ that take an immediate second operand. The type suffix specifies -+ the wider type. */ -+struct binary_imm_long_base : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_n); -+ build_all (b, "v0,vh0,su64", group, MODE_n); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ unsigned int i, nargs; -+ type_suffix_index type, result_type; -+ if (!r.check_gp_argument (2, i, nargs) -+ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES -+ || !r.require_integer_immediate (i + 1) -+ || (result_type = long_type_suffix (r, type)) == NUM_TYPE_SUFFIXES) -+ return error_mark_node; -+ -+ if (tree res = r.lookup_form (r.mode_suffix_id, result_type)) -+ return res; -+ -+ return r.report_no_such_form (type); -+ } -+}; -+ -+/* Base class for inc_dec and inc_dec_pat. */ -+struct inc_dec_base : public overloaded_base<0> -+{ -+ CONSTEXPR inc_dec_base (bool pat_p) : m_pat_p (pat_p) {} -+ -+ /* Resolve based on the first argument only, which must be either a -+ scalar or a vector. If it's a scalar, it must be a 32-bit or -+ 64-bit integer. */ -+ tree -+ resolve (function_resolver &r) const -+ { -+ unsigned int i, nargs; -+ if (!r.check_gp_argument (m_pat_p ? 3 : 2, i, nargs) -+ || !r.require_vector_or_scalar_type (i)) -+ return error_mark_node; -+ -+ mode_suffix_index mode; -+ type_suffix_index type; -+ if (r.scalar_argument_p (i)) -+ { -+ mode = MODE_n; -+ type = r.infer_integer_scalar_type (i); -+ } -+ else -+ { -+ mode = MODE_none; -+ type = r.infer_vector_type (i); -+ } -+ if (type == NUM_TYPE_SUFFIXES) -+ return error_mark_node; -+ -+ for (++i; i < nargs; ++i) -+ if (!r.require_integer_immediate (i)) -+ return error_mark_node; -+ -+ return r.resolve_to (mode, type); -+ } -+ -+ bool -+ check (function_checker &c) const OVERRIDE -+ { -+ return c.require_immediate_range (m_pat_p ? 2 : 1, 1, 16); -+ } -+ -+ bool m_pat_p; -+}; -+ -+/* Base class for load and load_replicate. */ -+struct load_contiguous_base : public overloaded_base<0> -+{ -+ /* Resolve a call based purely on a pointer argument. The other arguments -+ are a governing predicate and (for MODE_vnum) a vnum offset. */ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ bool vnum_p = r.mode_suffix_id == MODE_vnum; -+ gcc_assert (r.mode_suffix_id == MODE_none || vnum_p); -+ -+ unsigned int i, nargs; -+ type_suffix_index type; -+ if (!r.check_gp_argument (vnum_p ? 2 : 1, i, nargs) -+ || (type = r.infer_pointer_type (i)) == NUM_TYPE_SUFFIXES -+ || (vnum_p && !r.require_scalar_type (i + 1, "int64_t"))) -+ return error_mark_node; -+ -+ return r.resolve_to (r.mode_suffix_id, type); -+ } -+}; -+ -+/* Base class for gather loads that take a scalar base and a vector -+ displacement (either an offset or an index). */ -+struct load_gather_sv_base : public overloaded_base<0> -+{ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ unsigned int i, nargs; -+ mode_suffix_index mode; -+ type_suffix_index type; -+ if (!r.check_gp_argument (2, i, nargs) -+ || (type = r.infer_pointer_type (i, true)) == NUM_TYPE_SUFFIXES -+ || (mode = r.resolve_sv_displacement (i + 1, type, true), -+ mode == MODE_none)) -+ return error_mark_node; -+ -+ return r.resolve_to (mode, type); -+ } -+}; -+ -+/* Base class for load_ext_gather_index and load_ext_gather_offset, -+ which differ only in the units of the displacement. */ -+struct load_ext_gather_base : public overloaded_base<1> -+{ -+ /* Resolve a gather load that takes one of: -+ -+ - a scalar pointer base and a vector displacement -+ - a vector base with no displacement or -+ - a vector base and a scalar displacement -+ -+ The function has an explicit type suffix that determines the type -+ of the loaded data. */ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ /* No resolution is needed for a vector base with no displacement; -+ there's a one-to-one mapping between short and long names. */ -+ gcc_assert (r.displacement_units () != UNITS_none); -+ -+ type_suffix_index type = r.type_suffix_ids[0]; -+ -+ unsigned int i, nargs; -+ mode_suffix_index mode; -+ if (!r.check_gp_argument (2, i, nargs) -+ || (mode = r.resolve_gather_address (i, type, true)) == MODE_none) -+ return error_mark_node; -+ -+ return r.resolve_to (mode, type); -+ } -+}; -+ -+/* sv_t svfoo[_t0](sv_t, sv_t, -+ sv_t) (for integer t0) -+ sv_t svmmla[_t0](sv_t, sv_t, sv_t) (for floating-point t0) -+ -+ The functions act like the equivalent of "ternary_qq" for integer elements -+ and normal vector-only ternary functions for floating-point elements. */ -+struct mmla_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ /* svmmla is distributed over several extensions. Allow the common -+ denominator to define the overloaded svmmla function without -+ defining any specific versions. */ -+ if (group.types[0][0] != NUM_TYPE_SUFFIXES) -+ { -+ if (type_suffixes[group.types[0][0]].float_p) -+ build_all (b, "v0,v0,v0,v0", group, MODE_none); -+ else -+ build_all (b, "v0,v0,vq0,vq0", group, MODE_none); -+ } -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ unsigned int i, nargs; -+ type_suffix_index type; -+ if (!r.check_gp_argument (3, i, nargs) -+ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES) -+ return error_mark_node; -+ -+ /* Make sure that the function exists now, since not all forms -+ follow a set pattern after this point. */ -+ tree res = r.resolve_to (r.mode_suffix_id, type); -+ if (res == error_mark_node) -+ return res; -+ -+ bool float_p = type_suffixes[type].float_p; -+ unsigned int modifier = float_p ? r.SAME_SIZE : r.QUARTER_SIZE; -+ if (!r.require_derived_vector_type (i + 1, i, type, r.SAME_TYPE_CLASS, -+ modifier) -+ || !r.require_derived_vector_type (i + 2, i, type, r.SAME_TYPE_CLASS, -+ modifier)) -+ return error_mark_node; -+ -+ return res; -+ } -+}; -+SHAPE (mmla) -+ -+/* Base class for prefetch_gather_index and prefetch_gather_offset, -+ which differ only in the units of the displacement. */ -+struct prefetch_gather_base : public overloaded_base<0> -+{ -+ /* Resolve a gather prefetch that takes one of: -+ -+ - a scalar pointer base (const void *) and a vector displacement -+ - a vector base with no displacement or -+ - a vector base and a scalar displacement -+ -+ The prefetch operation is the final argument. This is purely a -+ mode-based resolution; there are no type suffixes. */ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ bool has_displacement_p = r.displacement_units () != UNITS_none; -+ -+ unsigned int i, nargs; -+ mode_suffix_index mode; -+ if (!r.check_gp_argument (has_displacement_p ? 3 : 2, i, nargs) -+ || (mode = r.resolve_gather_address (i, NUM_TYPE_SUFFIXES, -+ false)) == MODE_none -+ || !r.require_integer_immediate (nargs - 1)) -+ return error_mark_node; -+ -+ return r.resolve_to (mode); -+ } -+}; -+ -+/* Wraps BASE to provide a narrowing shift right function. Argument N -+ is an immediate shift amount in the range [1, sizeof(_t) * 4]. */ -+template -+struct shift_right_imm_narrow_wrapper : public BASE -+{ -+ bool -+ check (function_checker &c) const OVERRIDE -+ { -+ unsigned int bits = c.type_suffix (0).element_bits / 2; -+ return c.require_immediate_range (N, 1, bits); -+ } -+}; -+ -+/* Base class for store_scatter_index and store_scatter_offset, -+ which differ only in the units of the displacement. */ -+struct store_scatter_base : public overloaded_base<0> -+{ -+ /* Resolve a scatter store that takes one of: -+ -+ - a scalar pointer base and a vector displacement -+ - a vector base with no displacement or -+ - a vector base and a scalar displacement -+ -+ The stored data is the final argument, and it determines the -+ type suffix. */ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ bool has_displacement_p = r.displacement_units () != UNITS_none; -+ -+ unsigned int i, nargs; -+ mode_suffix_index mode; -+ type_suffix_index type; -+ if (!r.check_gp_argument (has_displacement_p ? 3 : 2, i, nargs) -+ || (type = r.infer_sd_vector_type (nargs - 1)) == NUM_TYPE_SUFFIXES -+ || (mode = r.resolve_gather_address (i, type, false)) == MODE_none) -+ return error_mark_node; -+ -+ return r.resolve_to (mode, type); -+ } -+}; -+ -+/* Base class for ternary operations in which the final argument is an -+ immediate shift amount. The derived class should check the range. */ -+struct ternary_shift_imm_base : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_n); -+ build_all (b, "v0,v0,v0,su64", group, MODE_n); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ return r.resolve_uniform (2, 1); -+ } -+}; -+ -+/* Base class for ternary operations in which the first argument has the -+ same element type as the result, and in which the second and third -+ arguments have an element type that is derived the first. -+ -+ MODIFIER is the number of element bits in the second and third -+ arguments, or a function_resolver modifier that says how this -+ precision is derived from the first argument's elements. -+ -+ TYPE_CLASS2 and TYPE_CLASS3 are the type classes of the second and -+ third arguments, or function_resolver::SAME_TYPE_CLASS if the type -+ class is the same as the first argument. */ -+template -+struct ternary_resize2_opt_n_base : public overloaded_base<0> -+{ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ unsigned int i, nargs; -+ type_suffix_index type; -+ if (!r.check_gp_argument (3, i, nargs) -+ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES -+ || !r.require_derived_vector_type (i + 1, i, type, TYPE_CLASS2, -+ MODIFIER)) -+ return error_mark_node; -+ -+ return r.finish_opt_n_resolution (i + 2, i, type, TYPE_CLASS3, MODIFIER); -+ } -+}; -+ -+/* Like ternary_resize2_opt_n_base, but for functions that don't take -+ a final scalar argument. */ -+template -+struct ternary_resize2_base : public overloaded_base<0> -+{ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ unsigned int i, nargs; -+ type_suffix_index type; -+ if (!r.check_gp_argument (3, i, nargs) -+ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES -+ || !r.require_derived_vector_type (i + 1, i, type, TYPE_CLASS2, -+ MODIFIER) -+ || !r.require_derived_vector_type (i + 2, i, type, TYPE_CLASS3, -+ MODIFIER)) -+ return error_mark_node; -+ -+ return r.resolve_to (r.mode_suffix_id, type); -+ } -+}; -+ -+/* Like ternary_resize2_opt_n_base, but for functions that take a final -+ lane argument. */ -+template -+struct ternary_resize2_lane_base : public overloaded_base<0> -+{ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ unsigned int i, nargs; -+ type_suffix_index type; -+ if (!r.check_gp_argument (4, i, nargs) -+ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES -+ || !r.require_derived_vector_type (i + 1, i, type, TYPE_CLASS2, -+ MODIFIER) -+ || !r.require_derived_vector_type (i + 2, i, type, TYPE_CLASS3, -+ MODIFIER) -+ || !r.require_integer_immediate (i + 3)) -+ return error_mark_node; -+ -+ return r.resolve_to (r.mode_suffix_id, type); -+ } -+}; -+ -+/* A specialization of ternary_resize2_lane_base for bfloat16 elements, -+ indexed in groups of N elements. */ -+template -+struct ternary_bfloat_lane_base -+ : public ternary_resize2_lane_base<16, TYPE_bfloat, TYPE_bfloat> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "v0,v0,vB,vB,su64", group, MODE_none); -+ } -+ -+ bool -+ check (function_checker &c) const OVERRIDE -+ { -+ return c.require_immediate_lane_index (3, N); -+ } -+}; -+ -+/* A specialization of ternary_resize2_lane_base for quarter-sized -+ elements. */ -+template -+struct ternary_qq_lane_base -+ : public ternary_resize2_lane_base -+{ -+ bool -+ check (function_checker &c) const OVERRIDE -+ { -+ return c.require_immediate_lane_index (3, 4); -+ } -+}; -+ -+/* Base class for narrowing bottom unary functions. The result is half -+ the size of input and has class CLASS. */ -+template -+struct unary_narrowb_base : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ STATIC_ASSERT (CLASS == function_resolver::SAME_TYPE_CLASS -+ || CLASS == TYPE_unsigned); -+ if (CLASS == TYPE_unsigned) -+ build_all (b, "vhu0,v0", group, MODE_none); -+ else -+ build_all (b, "vh0,v0", group, MODE_none); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ return r.resolve_unary (CLASS, r.HALF_SIZE); -+ } -+}; -+ -+/* The top equivalent of unary_imm_narrowb_base. All forms take the values -+ of the even elements as an extra argument, before any governing predicate. -+ These even elements are typically the result of the narrowb operation. */ -+template -+struct unary_narrowt_base : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ STATIC_ASSERT (CLASS == function_resolver::SAME_TYPE_CLASS -+ || CLASS == TYPE_unsigned); -+ if (CLASS == TYPE_unsigned) -+ build_all (b, "vhu0,vhu0,v0", group, MODE_none); -+ else -+ build_all (b, "vh0,vh0,v0", group, MODE_none); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ unsigned int i, nargs; -+ type_suffix_index type; -+ if (!r.check_gp_argument (2, i, nargs) -+ || (type = r.infer_vector_type (i + 1)) == NUM_TYPE_SUFFIXES -+ || !r.require_derived_vector_type (i, i + 1, type, CLASS, r.HALF_SIZE)) -+ return error_mark_node; -+ -+ return r.resolve_to (r.mode_suffix_id, type); -+ } -+}; -+ -+/* sv_t svfoo[_m0base]_[m1]index(sv_t, sv_t) -+ -+ for all valid combinations of vector base type and vector -+ displacement type . */ -+struct adr_index_def : public adr_base -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_index); -+ build_all (b, "b,b,d", group, MODE_u32base_s32index); -+ build_all (b, "b,b,d", group, MODE_u32base_u32index); -+ build_all (b, "b,b,d", group, MODE_u64base_s64index); -+ build_all (b, "b,b,d", group, MODE_u64base_u64index); -+ } -+}; -+SHAPE (adr_index) -+ -+/* sv_t svfoo[_m0base]_[m1]offset(sv_t, sv_t). -+ -+ for all valid combinations of vector base type and vector -+ displacement type . */ -+struct adr_offset_def : public adr_base -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_offset); -+ build_all (b, "b,b,d", group, MODE_u32base_s32offset); -+ build_all (b, "b,b,d", group, MODE_u32base_u32offset); -+ build_all (b, "b,b,d", group, MODE_u64base_s64offset); -+ build_all (b, "b,b,d", group, MODE_u64base_u64offset); -+ } -+}; -+SHAPE (adr_offset) -+ -+/* sv_t svfoo[_t0](sv_t, sv_t) -+ -+ i.e. a binary operation with uniform types, but with no scalar form. */ -+struct binary_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "v0,v0,v0", group, MODE_none); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ return r.resolve_uniform (2); -+ } -+}; -+SHAPE (binary) -+ -+/* sv_t svfoo[_t0](sv_t, sv_t) -+ sv_t svfoo[_n_t0](sv_t, _t). -+ -+ i.e. a version of the standard binary shape binary_opt_n in which -+ the final argument is always a signed integer. */ -+struct binary_int_opt_n_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "v0,v0,vs0", group, MODE_none); -+ build_all (b, "v0,v0,ss0", group, MODE_n); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ unsigned int i, nargs; -+ type_suffix_index type; -+ if (!r.check_gp_argument (2, i, nargs) -+ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES) -+ return error_mark_node; -+ -+ return r.finish_opt_n_resolution (i + 1, i, type, TYPE_signed); -+ } -+}; -+SHAPE (binary_int_opt_n) -+ -+/* sv_t svfoo_(sv_t, sv_t, uint64_t) -+ -+ where the final argument is an integer constant expression in the -+ range [0, 16 / sizeof (_t) - 1]. */ -+struct binary_lane_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "v0,v0,v0,su64", group, MODE_none); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ return r.resolve_uniform (2, 1); -+ } -+ -+ bool -+ check (function_checker &c) const OVERRIDE -+ { -+ return c.require_immediate_lane_index (2); -+ } -+}; -+SHAPE (binary_lane) -+ -+/* sv_t svfoo[_t0](sv_t, sv_t, uint64_t). -+ -+ where the final argument is an integer constant expression in the -+ range [0, 32 / sizeof (_t) - 1]. */ -+struct binary_long_lane_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "v0,vh0,vh0,su64", group, MODE_none); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ unsigned int i, nargs; -+ type_suffix_index type, result_type; -+ if (!r.check_gp_argument (3, i, nargs) -+ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES -+ || !r.require_matching_vector_type (i + 1, type) -+ || !r.require_integer_immediate (i + 2) -+ || (result_type = long_type_suffix (r, type)) == NUM_TYPE_SUFFIXES) -+ return error_mark_node; -+ -+ if (tree res = r.lookup_form (r.mode_suffix_id, result_type)) -+ return res; -+ -+ return r.report_no_such_form (type); -+ } -+ -+ bool -+ check (function_checker &c) const OVERRIDE -+ { -+ return c.require_immediate_lane_index (2); -+ } -+}; -+SHAPE (binary_long_lane) -+ -+/* sv_t svfoo[_t0](sv_t, sv_t) -+ sv_t svfoo[_n_t0](sv_t, _t). */ -+struct binary_long_opt_n_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "v0,vh0,vh0", group, MODE_none); -+ build_all (b, "v0,vh0,sh0", group, MODE_n); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ unsigned int i, nargs; -+ type_suffix_index type, result_type; -+ if (!r.check_gp_argument (2, i, nargs) -+ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES -+ || (result_type = long_type_suffix (r, type)) == NUM_TYPE_SUFFIXES) -+ return error_mark_node; -+ -+ return r.finish_opt_n_resolution (i + 1, i, type, r.SAME_TYPE_CLASS, -+ r.SAME_SIZE, result_type); -+ } -+}; -+SHAPE (binary_long_opt_n) -+ -+/* sv_t svfoo[_n_t0](sv_t, _t). -+ -+ i.e. a binary operation in which the final argument is always a scalar -+ rather than a vector. */ -+struct binary_n_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_n); -+ build_all (b, "v0,v0,s0", group, MODE_n); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ unsigned int i, nargs; -+ type_suffix_index type; -+ if (!r.check_gp_argument (2, i, nargs) -+ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES -+ || !r.require_derived_scalar_type (i + 1, r.SAME_TYPE_CLASS)) -+ return error_mark_node; -+ -+ return r.resolve_to (r.mode_suffix_id, type); -+ } -+}; -+SHAPE (binary_n) -+ -+/* sv_t svfoo[_t0](sv_t, sv_t) -+ sv_t svfoo[_n_t0](sv_t, _t) -+ -+ i.e. a version of binary_opt_n in which the output elements are half the -+ width of the input elements. */ -+struct binary_narrowb_opt_n_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "vh0,v0,v0", group, MODE_none); -+ build_all (b, "vh0,v0,s0", group, MODE_n); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ return r.resolve_uniform_opt_n (2); -+ } -+}; -+SHAPE (binary_narrowb_opt_n) -+ -+/* sv_t svfoo[_t0](sv_t, sv_t, sv_t) -+ sv_t svfoo[_n_t0](sv_t, sv_t, _t) -+ -+ This is the "top" counterpart to binary_narrowb_opt_n. */ -+struct binary_narrowt_opt_n_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "vh0,vh0,v0,v0", group, MODE_none); -+ build_all (b, "vh0,vh0,v0,s0", group, MODE_n); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ unsigned int i, nargs; -+ type_suffix_index type; -+ if (!r.check_gp_argument (3, i, nargs) -+ || (type = r.infer_vector_type (i + 1)) == NUM_TYPE_SUFFIXES -+ || !r.require_derived_vector_type (i, i + 1, type, r.SAME_TYPE_CLASS, -+ r.HALF_SIZE)) -+ return error_mark_node; -+ -+ return r.finish_opt_n_resolution (i + 2, i + 1, type); -+ } -+}; -+SHAPE (binary_narrowt_opt_n) -+ -+/* sv_t svfoo[_t0](sv_t, sv_t) -+ sv_t svfoo[_n_t0](sv_t, _t) -+ -+ i.e. the standard shape for binary operations that operate on -+ uniform types. */ -+struct binary_opt_n_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "v0,v0,v0", group, MODE_none); -+ /* _b functions do not have an _n form, but are classified as -+ binary_opt_n so that they can be overloaded with vector -+ functions. */ -+ if (group.types[0][0] == TYPE_SUFFIX_b) -+ gcc_assert (group.types[0][1] == NUM_TYPE_SUFFIXES); -+ else -+ build_all (b, "v0,v0,s0", group, MODE_n); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ return r.resolve_uniform_opt_n (2); -+ } -+}; -+SHAPE (binary_opt_n) -+ -+/* svbool_t svfoo(svbool_t, svbool_t). */ -+struct binary_pred_def : public nonoverloaded_base -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ build_all (b, "v0,v0,v0", group, MODE_none); -+ } -+}; -+SHAPE (binary_pred) -+ -+/* sv_t svfoo[_](sv_t, sv_t, uint64_t) -+ -+ where the final argument must be 90 or 270. */ -+struct binary_rotate_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "v0,v0,v0,su64", group, MODE_none); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ return r.resolve_uniform (2, 1); -+ } -+ -+ bool -+ check (function_checker &c) const OVERRIDE -+ { -+ return c.require_immediate_either_or (2, 90, 270); -+ } -+}; -+SHAPE (binary_rotate) -+ -+/* sv_t svfoo_t0(_t, _t) -+ -+ i.e. a binary function that takes two scalars and returns a vector. -+ An explicit type suffix is required. */ -+struct binary_scalar_def : public nonoverloaded_base -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ build_all (b, "v0,s0,s0", group, MODE_none); -+ } -+}; -+SHAPE (binary_scalar) -+ -+/* sv_t svfoo[_t0](sv_t, sv_t). -+ -+ i.e. a version of "binary" that returns unsigned integers. */ -+struct binary_to_uint_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "vu0,v0,v0", group, MODE_none); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ return r.resolve_uniform (2); -+ } -+}; -+SHAPE (binary_to_uint) -+ -+/* sv_t svfoo[_t0](sv_t, sv_t) -+ -+ i.e. a version of "binary" in which the final argument is always an -+ unsigned integer. */ -+struct binary_uint_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "v0,v0,vu0", group, MODE_none); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ unsigned int i, nargs; -+ type_suffix_index type; -+ if (!r.check_gp_argument (2, i, nargs) -+ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES -+ || !r.require_derived_vector_type (i + 1, i, type, TYPE_unsigned)) -+ return error_mark_node; -+ -+ return r.resolve_to (r.mode_suffix_id, type); -+ } -+}; -+SHAPE (binary_uint) -+ -+/* sv_t svfoo[_t0](sv_t, _t) -+ -+ i.e. a version of binary_n in which the final argument is always an -+ unsigned integer. */ -+struct binary_uint_n_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "v0,v0,su0", group, MODE_none); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ unsigned int i, nargs; -+ type_suffix_index type; -+ if (!r.check_gp_argument (2, i, nargs) -+ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES -+ || !r.require_derived_scalar_type (i + 1, TYPE_unsigned)) -+ return error_mark_node; -+ -+ return r.resolve_to (r.mode_suffix_id, type); -+ } -+}; -+SHAPE (binary_uint_n) -+ -+/* sv_t svfoo[_t0](sv_t, sv_t) -+ sv_t svfoo[_n_t0](sv_t, _t) -+ -+ i.e. a version of the standard binary shape binary_opt_n in which -+ the final argument is always an unsigned integer. */ -+struct binary_uint_opt_n_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "v0,v0,vu0", group, MODE_none); -+ build_all (b, "v0,v0,su0", group, MODE_n); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ unsigned int i, nargs; -+ type_suffix_index type; -+ if (!r.check_gp_argument (2, i, nargs) -+ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES) -+ return error_mark_node; -+ -+ return r.finish_opt_n_resolution (i + 1, i, type, TYPE_unsigned); -+ } -+}; -+SHAPE (binary_uint_opt_n) -+ -+/* sv_t svfoo[_t0](sv_t, uint64_t). -+ -+ i.e. a version of binary_n in which the final argument is always -+ a 64-bit unsigned integer. */ -+struct binary_uint64_n_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "v0,v0,su64", group, MODE_none); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ unsigned int i, nargs; -+ type_suffix_index type; -+ if (!r.check_gp_argument (2, i, nargs) -+ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES -+ || !r.require_scalar_type (i + 1, "uint64_t")) -+ return error_mark_node; -+ -+ return r.resolve_to (r.mode_suffix_id, type); -+ } -+}; -+SHAPE (binary_uint64_n) -+ -+/* sv_t svfoo[_t0](sv_t, svuint64_t) -+ sv_t svfoo[_n_t0](sv_t, uint64_t) -+ -+ i.e. a version of the standard binary shape binary_opt_n in which -+ the final argument is always a uint64_t. */ -+struct binary_uint64_opt_n_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "v0,v0,vu64", group, MODE_none); -+ build_all (b, "v0,v0,su64", group, MODE_n); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ unsigned int i, nargs; -+ type_suffix_index type; -+ if (!r.check_gp_argument (2, i, nargs) -+ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES) -+ return error_mark_node; -+ -+ return r.finish_opt_n_resolution (i + 1, i, type, TYPE_unsigned, 64); -+ } -+}; -+SHAPE (binary_uint64_opt_n) -+ -+/* sv_t svfoo[_t0](sv_t, sv_t). */ -+struct binary_wide_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "v0,v0,vh0", group, MODE_none); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ unsigned int i, nargs; -+ type_suffix_index type; -+ if (!r.check_gp_argument (2, i, nargs) -+ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES -+ || !r.require_derived_vector_type (i + 1, i, type, r.SAME_TYPE_CLASS, -+ r.HALF_SIZE)) -+ return error_mark_node; -+ -+ return r.resolve_to (r.mode_suffix_id, type); -+ } -+}; -+SHAPE (binary_wide) -+ -+/* sv_t svfoo[_t0](sv_t, sv_t) -+ sv_t svfoo[_n_t0](sv_t, _t). */ -+struct binary_wide_opt_n_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "v0,v0,vh0", group, MODE_none); -+ build_all (b, "v0,v0,sh0", group, MODE_n); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ unsigned int i, nargs; -+ type_suffix_index type; -+ if (!r.check_gp_argument (2, i, nargs) -+ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES) -+ return error_mark_node; -+ -+ return r.finish_opt_n_resolution (i + 1, i, type, r.SAME_TYPE_CLASS, -+ r.HALF_SIZE); -+ } -+}; -+SHAPE (binary_wide_opt_n) -+ -+/* sv_t svfoo[_t0](sv_t, sv_t) -+ _t svfoo[_n_t0](_t, sv_t). */ -+struct clast_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "v0,v0,v0", group, MODE_none); -+ build_all (b, "s0,s0,v0", group, MODE_n); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ unsigned int i, nargs; -+ if (!r.check_gp_argument (2, i, nargs) -+ || !r.require_vector_or_scalar_type (i)) -+ return error_mark_node; -+ -+ if (r.scalar_argument_p (i)) -+ { -+ type_suffix_index type; -+ if (!r.require_derived_scalar_type (i, r.SAME_TYPE_CLASS) -+ || (type = r.infer_vector_type (i + 1)) == NUM_TYPE_SUFFIXES) -+ return error_mark_node; -+ return r.resolve_to (MODE_n, type); -+ } -+ else -+ { -+ type_suffix_index type; -+ if ((type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES -+ || !r.require_matching_vector_type (i + 1, type)) -+ return error_mark_node; -+ return r.resolve_to (MODE_none, type); -+ } -+ } -+}; -+SHAPE (clast) -+ -+/* svbool_t svfoo[_t0](sv_t, sv_t). */ -+struct compare_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "vp,v0,v0", group, MODE_none); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ return r.resolve_uniform (2); -+ } -+}; -+SHAPE (compare) -+ -+/* svbool_t svfoo[_t0](sv_t, sv_t) -+ svbool_t svfoo[_n_t0](sv_t, _t) -+ -+ i.e. a comparison between two vectors, or between a vector and a scalar. */ -+struct compare_opt_n_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "vp,v0,v0", group, MODE_none); -+ build_all (b, "vp,v0,s0", group, MODE_n); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ return r.resolve_uniform_opt_n (2); -+ } -+}; -+SHAPE (compare_opt_n) -+ -+/* svbool_t svfoo[_t0](const _t *, const _t *). */ -+struct compare_ptr_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "vp,al,al", group, MODE_none); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ unsigned int i, nargs; -+ type_suffix_index type; -+ if (!r.check_gp_argument (2, i, nargs) -+ || (type = r.infer_pointer_type (i)) == NUM_TYPE_SUFFIXES -+ || !r.require_matching_pointer_type (i + 1, i, type)) -+ return error_mark_node; -+ -+ return r.resolve_to (r.mode_suffix_id, type); -+ } -+}; -+SHAPE (compare_ptr) -+ -+/* svbool_t svfoo_t0[_t1](_t, _t) -+ -+ where _t0 is a _b suffix that describes the predicate result. -+ There is no direct relationship between the element sizes of _t0 -+ and _t1. */ -+struct compare_scalar_def : public overloaded_base<1> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "vp,s1,s1", group, MODE_none); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ unsigned int i, nargs; -+ type_suffix_index type; -+ if (!r.check_gp_argument (2, i, nargs) -+ || (type = r.infer_integer_scalar_type (i)) == NUM_TYPE_SUFFIXES -+ || !r.require_matching_integer_scalar_type (i + 1, i, type)) -+ return error_mark_node; -+ -+ return r.resolve_to (r.mode_suffix_id, r.type_suffix_ids[0], type); -+ } -+}; -+SHAPE (compare_scalar) -+ -+/* svbool_t svfoo[_t0](sv_t, svint64_t) (for signed t0) -+ svbool_t svfoo[_n_t0](sv_t, int64_t) (for signed t0) -+ svbool_t svfoo[_t0](sv_t, svuint64_t) (for unsigned t0) -+ svbool_t svfoo[_n_t0](sv_t, uint64_t) (for unsigned t0) -+ -+ i.e. a comparison in which the second argument is 64 bits. */ -+struct compare_wide_opt_n_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "vp,v0,vw0", group, MODE_none); -+ build_all (b, "vp,v0,sw0", group, MODE_n); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ unsigned int i, nargs; -+ type_suffix_index type; -+ if (!r.check_gp_argument (2, i, nargs) -+ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES) -+ return error_mark_node; -+ -+ return r.finish_opt_n_resolution (i + 1, i, type, r.SAME_TYPE_CLASS, 64); -+ } -+}; -+SHAPE (compare_wide_opt_n) -+ -+/* uint64_t svfoo(). */ -+struct count_inherent_def : public nonoverloaded_base -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ build_all (b, "su64", group, MODE_none); -+ } -+}; -+SHAPE (count_inherent) -+ -+/* uint64_t svfoo(enum svpattern). */ -+struct count_pat_def : public nonoverloaded_base -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ build_all (b, "su64,epattern", group, MODE_none); -+ } -+}; -+SHAPE (count_pat) -+ -+/* uint64_t svfoo(svbool_t). */ -+struct count_pred_def : public nonoverloaded_base -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ build_all (b, "su64,vp", group, MODE_none); -+ } -+}; -+SHAPE (count_pred) -+ -+/* uint64_t svfoo[_t0](sv_t). */ -+struct count_vector_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "su64,v0", group, MODE_none); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ return r.resolve_uniform (1); -+ } -+}; -+SHAPE (count_vector) -+ -+/* svxN_t svfoo[_t0](sv_t, ..., sv_t) -+ -+ where there are N arguments in total. */ -+struct create_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "t0,v0*t", group, MODE_none); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ return r.resolve_uniform (r.vectors_per_tuple ()); -+ } -+}; -+SHAPE (create) -+ -+/* sv_t svfoo[_n]_t0(_t, ..., _t) -+ -+ where there are enough arguments to fill 128 bits of data (or to -+ control 128 bits of data in the case of predicates). */ -+struct dupq_def : public overloaded_base<1> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ /* The "_n" suffix is optional; the full name has it, but the short -+ name doesn't. */ -+ build_all (b, "v0,s0*q", group, MODE_n, true); -+ } -+ -+ tree -+ resolve (function_resolver &) const OVERRIDE -+ { -+ /* The short forms just make "_n" implicit, so no resolution is needed. */ -+ gcc_unreachable (); -+ } -+}; -+SHAPE (dupq) -+ -+/* sv_t svfoo[_t0](sv_t, sv_t, uint64_t) -+ -+ where the final argument is an integer constant expression that when -+ multiplied by the number of bytes in t0 is in the range [0, 255]. */ -+struct ext_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "v0,v0,v0,su64", group, MODE_none); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ return r.resolve_uniform (2, 1); -+ } -+ -+ bool -+ check (function_checker &c) const OVERRIDE -+ { -+ unsigned int bytes = c.type_suffix (0).element_bytes; -+ return c.require_immediate_range (2, 0, 256 / bytes - 1); -+ } -+}; -+SHAPE (ext) -+ -+/* _t svfoo[_t0](_t, sv_t). */ -+struct fold_left_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "s0,s0,v0", group, MODE_none); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ unsigned int i, nargs; -+ type_suffix_index type; -+ if (!r.check_gp_argument (2, i, nargs) -+ || !r.require_derived_scalar_type (i, r.SAME_TYPE_CLASS) -+ || (type = r.infer_vector_type (i + 1)) == NUM_TYPE_SUFFIXES) -+ return error_mark_node; -+ -+ return r.resolve_to (r.mode_suffix_id, type); -+ } -+}; -+SHAPE (fold_left) -+ -+/* sv_t svfoo[_t0](svxN_t, uint64_t) -+ -+ where the final argument is an integer constant expression in -+ the range [0, N - 1]. */ -+struct get_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "v0,t0,su64", group, MODE_none); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ unsigned int i, nargs; -+ type_suffix_index type; -+ if (!r.check_gp_argument (2, i, nargs) -+ || (type = r.infer_tuple_type (i)) == NUM_TYPE_SUFFIXES -+ || !r.require_integer_immediate (i + 1)) -+ return error_mark_node; -+ -+ return r.resolve_to (r.mode_suffix_id, type); -+ } -+ -+ bool -+ check (function_checker &c) const OVERRIDE -+ { -+ unsigned int nvectors = c.vectors_per_tuple (); -+ return c.require_immediate_range (1, 0, nvectors - 1); -+ } -+}; -+SHAPE (get) -+ -+/* sv_t svfoo[_t0](sv_t, uint64_t) -+ _t svfoo[_n_t0](_t, uint64_t) -+ -+ where the t0 in the vector form is a signed or unsigned integer -+ whose size is tied to the [bhwd] suffix of "svfoo". */ -+struct inc_dec_def : public inc_dec_base -+{ -+ CONSTEXPR inc_dec_def () : inc_dec_base (false) {} -+ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ /* These functions are unusual in that the type suffixes for -+ the scalar and vector forms are not related. The vector -+ form always has exactly two potential suffixes while the -+ scalar form always has four. */ -+ if (group.types[2][0] == NUM_TYPE_SUFFIXES) -+ build_all (b, "v0,v0,su64", group, MODE_none); -+ else -+ build_all (b, "s0,s0,su64", group, MODE_n); -+ } -+}; -+SHAPE (inc_dec) -+ -+/* sv_t svfoo[_t0](sv_t, enum svpattern, uint64_t) -+ _t svfoo[_n_t0](_t, enum svpattern, uint64_t) -+ -+ where the t0 in the vector form is a signed or unsigned integer -+ whose size is tied to the [bhwd] suffix of "svfoo". */ -+struct inc_dec_pat_def : public inc_dec_base -+{ -+ CONSTEXPR inc_dec_pat_def () : inc_dec_base (true) {} -+ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ /* These functions are unusual in that the type suffixes for -+ the scalar and vector forms are not related. The vector -+ form always has exactly two potential suffixes while the -+ scalar form always has four. */ -+ if (group.types[2][0] == NUM_TYPE_SUFFIXES) -+ build_all (b, "v0,v0,epattern,su64", group, MODE_none); -+ else -+ build_all (b, "s0,s0,epattern,su64", group, MODE_n); -+ } -+}; -+SHAPE (inc_dec_pat) -+ -+/* sv_t svfoo[_t0](sv_t, svbool_t). */ -+struct inc_dec_pred_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "v0,v0,vp", group, MODE_none); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ unsigned int i, nargs; -+ type_suffix_index type; -+ if (!r.check_gp_argument (2, i, nargs) -+ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES -+ || !r.require_vector_type (i + 1, VECTOR_TYPE_svbool_t)) -+ return error_mark_node; -+ -+ return r.resolve_to (r.mode_suffix_id, type); -+ } -+}; -+SHAPE (inc_dec_pred) -+ -+/* _t svfoo[_n_t0]_t1(_t, svbool_t) -+ -+ where _t1 is a _b suffix that describes the svbool_t argument. */ -+struct inc_dec_pred_scalar_def : public overloaded_base<2> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_n); -+ build_all (b, "s0,s0,vp", group, MODE_n); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ unsigned int i, nargs; -+ type_suffix_index type; -+ if (!r.check_gp_argument (2, i, nargs) -+ || (type = r.infer_integer_scalar_type (i)) == NUM_TYPE_SUFFIXES -+ || !r.require_vector_type (i + 1, VECTOR_TYPE_svbool_t)) -+ return error_mark_node; -+ -+ return r.resolve_to (r.mode_suffix_id, type, r.type_suffix_ids[1]); -+ } -+}; -+SHAPE (inc_dec_pred_scalar) -+ -+/* sv[xN]_t svfoo_t0(). */ -+struct inherent_def : public nonoverloaded_base -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ build_all (b, "t0", group, MODE_none); -+ } -+}; -+SHAPE (inherent) -+ -+/* svbool_t svfoo[_b](). */ -+struct inherent_b_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ /* The "_b" suffix is optional; the full name has it, but the short -+ name doesn't. */ -+ build_all (b, "v0", group, MODE_none, true); -+ } -+ -+ tree -+ resolve (function_resolver &) const OVERRIDE -+ { -+ /* The short forms just make "_b" implicit, so no resolution is needed. */ -+ gcc_unreachable (); -+ } -+}; -+SHAPE (inherent_b) -+ -+/* sv[xN]_t svfoo[_t0](const _t *) -+ sv[xN]_t svfoo_vnum[_t0](const _t *, int64_t). */ -+struct load_def : public load_contiguous_base -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ b.add_overloaded_functions (group, MODE_vnum); -+ build_all (b, "t0,al", group, MODE_none); -+ build_all (b, "t0,al,ss64", group, MODE_vnum); -+ } -+}; -+SHAPE (load) -+ -+/* sv_t svfoo_t0(const _t *) -+ sv_t svfoo_vnum_t0(const _t *, int64_t) -+ -+ where is determined by the function base name. */ -+struct load_ext_def : public nonoverloaded_base -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ build_all (b, "t0,al", group, MODE_none); -+ build_all (b, "t0,al,ss64", group, MODE_vnum); -+ } -+}; -+SHAPE (load_ext) -+ -+/* sv_t svfoo_[s32]index_t0(const _t *, svint32_t) -+ sv_t svfoo_[s64]index_t0(const _t *, svint64_t) -+ sv_t svfoo_[u32]index_t0(const _t *, svuint32_t) -+ sv_t svfoo_[u64]index_t0(const _t *, svuint64_t) -+ -+ sv_t svfoo[_u32base]_index_t0(svuint32_t, int64_t) -+ sv_t svfoo[_u64base]_index_t0(svuint64_t, int64_t) -+ -+ where is determined by the function base name. */ -+struct load_ext_gather_index_def : public load_ext_gather_base -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_index); -+ build_sv_index (b, "t0,al,d", group); -+ build_vs_index (b, "t0,b,ss64", group); -+ } -+}; -+SHAPE (load_ext_gather_index) -+ -+/* sv_t svfoo_[s64]index_t0(const _t *, svint64_t) -+ sv_t svfoo_[u64]index_t0(const _t *, svuint64_t) -+ -+ sv_t svfoo[_u32base]_index_t0(svuint32_t, int64_t) -+ sv_t svfoo[_u64base]_index_t0(svuint64_t, int64_t) -+ -+ where is determined by the function base name. This is -+ load_ext_gather_index that doesn't support 32-bit vector indices. */ -+struct load_ext_gather_index_restricted_def : public load_ext_gather_base -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_index); -+ build_sv_index64 (b, "t0,al,d", group); -+ build_vs_index (b, "t0,b,ss64", group); -+ } -+}; -+SHAPE (load_ext_gather_index_restricted) -+ -+/* sv_t svfoo_[s32]offset_t0(const _t *, svint32_t) -+ sv_t svfoo_[s64]offset_t0(const _t *, svint64_t) -+ sv_t svfoo_[u32]offset_t0(const _t *, svuint32_t) -+ sv_t svfoo_[u64]offset_t0(const _t *, svuint64_t) -+ -+ sv_t svfoo[_u32base]_t0(svuint32_t) -+ sv_t svfoo[_u64base]_t0(svuint64_t) -+ -+ sv_t svfoo[_u32base]_offset_t0(svuint32_t, int64_t) -+ sv_t svfoo[_u64base]_offset_t0(svuint64_t, int64_t) -+ -+ where is determined by the function base name. */ -+struct load_ext_gather_offset_def : public load_ext_gather_base -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_offset); -+ build_sv_offset (b, "t0,al,d", group); -+ build_v_base (b, "t0,b", group, true); -+ build_vs_offset (b, "t0,b,ss64", group); -+ } -+}; -+SHAPE (load_ext_gather_offset) -+ -+/* sv_t svfoo_[s64]offset_t0(const _t *, svint64_t) -+ sv_t svfoo_[u32]offset_t0(const _t *, svuint32_t) -+ sv_t svfoo_[u64]offset_t0(const _t *, svuint64_t) -+ -+ sv_t svfoo[_u32base]_t0(svuint32_t) -+ sv_t svfoo[_u64base]_t0(svuint64_t) -+ -+ sv_t svfoo[_u32base]_offset_t0(svuint32_t, int64_t) -+ sv_t svfoo[_u64base]_offset_t0(svuint64_t, int64_t) -+ -+ where is determined by the function base name. This is -+ load_ext_gather_offset without the s32 vector offset form. */ -+struct load_ext_gather_offset_restricted_def : public load_ext_gather_base -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_offset); -+ build_sv_uint_offset (b, "t0,al,d", group); -+ build_v_base (b, "t0,b", group, true); -+ build_vs_offset (b, "t0,b,ss64", group); -+ } -+}; -+SHAPE (load_ext_gather_offset_restricted) -+ -+/* sv_t svfoo_[s32]index[_t0](const _t *, svint32_t) -+ sv_t svfoo_[s64]index[_t0](const _t *, svint64_t) -+ sv_t svfoo_[u32]index[_t0](const _t *, svuint32_t) -+ sv_t svfoo_[u64]index[_t0](const _t *, svuint64_t) -+ -+ sv_t svfoo_[s32]offset[_t0](const _t *, svint32_t) -+ sv_t svfoo_[s64]offset[_t0](const _t *, svint64_t) -+ sv_t svfoo_[u32]offset[_t0](const _t *, svuint32_t) -+ sv_t svfoo_[u64]offset[_t0](const _t *, svuint64_t). */ -+struct load_gather_sv_def : public load_gather_sv_base -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_index); -+ b.add_overloaded_functions (group, MODE_offset); -+ build_sv_index (b, "t0,al,d", group); -+ build_sv_offset (b, "t0,al,d", group); -+ } -+}; -+SHAPE (load_gather_sv) -+ -+/* sv_t svfoo_[u32]index[_t0](const _t *, svuint32_t) -+ sv_t svfoo_[u64]index[_t0](const _t *, svuint64_t) -+ -+ sv_t svfoo_[s64]offset[_t0](const _t *, svint64_t) -+ sv_t svfoo_[u32]offset[_t0](const _t *, svuint32_t) -+ sv_t svfoo_[u64]offset[_t0](const _t *, svuint64_t) -+ -+ This is load_gather_sv without the 32-bit vector index forms and -+ without the s32 vector offset form. */ -+struct load_gather_sv_restricted_def : public load_gather_sv_base -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_index); -+ b.add_overloaded_functions (group, MODE_offset); -+ build_sv_index64 (b, "t0,al,d", group); -+ build_sv_uint_offset (b, "t0,al,d", group); -+ } -+}; -+SHAPE (load_gather_sv_restricted) -+ -+/* sv_t svfoo[_u32base]_t0(svuint32_t) -+ sv_t svfoo[_u64base]_t0(svuint64_t) -+ -+ sv_t svfoo[_u32base]_index_t0(svuint32_t, int64_t) -+ sv_t svfoo[_u64base]_index_t0(svuint64_t, int64_t) -+ -+ sv_t svfoo[_u32base]_offset_t0(svuint32_t, int64_t) -+ sv_t svfoo[_u64base]_offset_t0(svuint64_t, int64_t). */ -+struct load_gather_vs_def : public overloaded_base<1> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ /* The base vector mode is optional; the full name has it but the -+ short name doesn't. There is no ambiguity with SHAPE_load_gather_sv -+ because the latter uses an implicit type suffix. */ -+ build_v_base (b, "t0,b", group, true); -+ build_vs_index (b, "t0,b,ss64", group, true); -+ build_vs_offset (b, "t0,b,ss64", group, true); -+ } -+ -+ tree -+ resolve (function_resolver &) const OVERRIDE -+ { -+ /* The short name just makes the base vector mode implicit; -+ no resolution is needed. */ -+ gcc_unreachable (); -+ } -+}; -+SHAPE (load_gather_vs) -+ -+/* sv_t svfoo[_t0](const _t *) -+ -+ The only difference from "load" is that this shape has no vnum form. */ -+struct load_replicate_def : public load_contiguous_base -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "t0,al", group, MODE_none); -+ } -+}; -+SHAPE (load_replicate) -+ -+/* svbool_t svfoo(enum svpattern). */ -+struct pattern_pred_def : public nonoverloaded_base -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ build_all (b, "vp,epattern", group, MODE_none); -+ } -+}; -+SHAPE (pattern_pred) -+ -+/* void svfoo(const void *, svprfop) -+ void svfoo_vnum(const void *, int64_t, svprfop). */ -+struct prefetch_def : public nonoverloaded_base -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ build_all (b, "_,ap,eprfop", group, MODE_none); -+ build_all (b, "_,ap,ss64,eprfop", group, MODE_vnum); -+ } -+}; -+SHAPE (prefetch) -+ -+/* void svfoo_[s32]index(const void *, svint32_t, svprfop) -+ void svfoo_[s64]index(const void *, svint64_t, svprfop) -+ void svfoo_[u32]index(const void *, svuint32_t, svprfop) -+ void svfoo_[u64]index(const void *, svuint64_t, svprfop) -+ -+ void svfoo[_u32base](svuint32_t, svprfop) -+ void svfoo[_u64base](svuint64_t, svprfop) -+ -+ void svfoo[_u32base]_index(svuint32_t, int64_t, svprfop) -+ void svfoo[_u64base]_index(svuint64_t, int64_t, svprfop). */ -+struct prefetch_gather_index_def : public prefetch_gather_base -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ b.add_overloaded_functions (group, MODE_index); -+ build_sv_index (b, "_,ap,d,eprfop", group); -+ build_v_base (b, "_,b,eprfop", group); -+ build_vs_index (b, "_,b,ss64,eprfop", group); -+ } -+}; -+SHAPE (prefetch_gather_index) -+ -+/* void svfoo_[s32]offset(const void *, svint32_t, svprfop) -+ void svfoo_[s64]offset(const void *, svint64_t, svprfop) -+ void svfoo_[u32]offset(const void *, svuint32_t, svprfop) -+ void svfoo_[u64]offset(const void *, svuint64_t, svprfop) -+ -+ void svfoo[_u32base](svuint32_t, svprfop) -+ void svfoo[_u64base](svuint64_t, svprfop) -+ -+ void svfoo[_u32base]_offset(svuint32_t, int64_t, svprfop) -+ void svfoo[_u64base]_offset(svuint64_t, int64_t, svprfop). */ -+struct prefetch_gather_offset_def : public prefetch_gather_base -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ b.add_overloaded_functions (group, MODE_offset); -+ build_sv_offset (b, "_,ap,d,eprfop", group); -+ build_v_base (b, "_,b,eprfop", group); -+ build_vs_offset (b, "_,b,ss64,eprfop", group); -+ } -+}; -+SHAPE (prefetch_gather_offset) -+ -+/* bool svfoo(svbool_t). */ -+struct ptest_def : public nonoverloaded_base -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ build_all (b, "sp,vp", group, MODE_none); -+ } -+}; -+SHAPE (ptest) -+ -+/* svbool_t svfoo(). */ -+struct rdffr_def : public nonoverloaded_base -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ build_all (b, "vp", group, MODE_none); -+ } -+}; -+SHAPE (rdffr) -+ -+/* _t svfoo[_t0](sv_t). */ -+struct reduction_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "s0,v0", group, MODE_none); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ return r.resolve_uniform (1); -+ } -+}; -+SHAPE (reduction) -+ -+/* int64_t svfoo[_t0](sv_t) (for signed t0) -+ uint64_t svfoo[_t0](sv_t) (for unsigned t0) -+ _t svfoo[_t0](sv_t) (for floating-point t0) -+ -+ i.e. a version of "reduction" in which the return type for integers -+ always has 64 bits. */ -+struct reduction_wide_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "sw0,v0", group, MODE_none); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ return r.resolve_uniform (1); -+ } -+}; -+SHAPE (reduction_wide) -+ -+/* svxN_t svfoo[_t0](svxN_t, uint64_t, sv_t) -+ -+ where the second argument is an integer constant expression in the -+ range [0, N - 1]. */ -+struct set_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "t0,t0,su64,v0", group, MODE_none); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ unsigned int i, nargs; -+ type_suffix_index type; -+ if (!r.check_gp_argument (3, i, nargs) -+ || (type = r.infer_tuple_type (i)) == NUM_TYPE_SUFFIXES -+ || !r.require_integer_immediate (i + 1) -+ || !r.require_derived_vector_type (i + 2, i, type)) -+ return error_mark_node; -+ -+ return r.resolve_to (r.mode_suffix_id, type); -+ } -+ -+ bool -+ check (function_checker &c) const OVERRIDE -+ { -+ unsigned int nvectors = c.vectors_per_tuple (); -+ return c.require_immediate_range (1, 0, nvectors - 1); -+ } -+}; -+SHAPE (set) -+ -+/* void svfoo(). */ -+struct setffr_def : public nonoverloaded_base -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ build_all (b, "_", group, MODE_none); -+ } -+}; -+SHAPE (setffr) -+ -+/* sv_t svfoo[_n_t0])(sv_t, uint64_t) -+ -+ where the final argument must be an integer constant expression in the -+ range [0, sizeof (_t) * 8 - 1]. */ -+struct shift_left_imm_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_n); -+ build_all (b, "v0,v0,su64", group, MODE_n); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ return r.resolve_uniform (1, 1); -+ } -+ -+ bool -+ check (function_checker &c) const OVERRIDE -+ { -+ unsigned int bits = c.type_suffix (0).element_bits; -+ return c.require_immediate_range (1, 0, bits - 1); -+ } -+}; -+SHAPE (shift_left_imm) -+ -+/* sv_t svfoo[_n_t0])(sv_t, uint64_t) -+ -+ where the final argument must be an integer constant expression in the -+ range [0, sizeof (_t) * 4 - 1]. */ -+struct shift_left_imm_long_def : public binary_imm_long_base -+{ -+ bool -+ check (function_checker &c) const OVERRIDE -+ { -+ unsigned int bits = c.type_suffix (0).element_bits / 2; -+ return c.require_immediate_range (1, 0, bits - 1); -+ } -+}; -+SHAPE (shift_left_imm_long) -+ -+/* sv_t svfoo[_n_t0])(sv_t, uint64_t) -+ -+ where the final argument must be an integer constant expression in the -+ range [0, sizeof (_t) * 8 - 1]. */ -+struct shift_left_imm_to_uint_def : public shift_left_imm_def -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_n); -+ build_all (b, "vu0,v0,su64", group, MODE_n); -+ } -+}; -+SHAPE (shift_left_imm_to_uint) -+ -+/* sv_t svfoo[_n_t0])(sv_t, uint64_t) -+ -+ where the final argument must be an integer constant expression in the -+ range [1, sizeof (_t) * 8]. */ -+struct shift_right_imm_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_n); -+ build_all (b, "v0,v0,su64", group, MODE_n); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ return r.resolve_uniform (1, 1); -+ } -+ -+ bool -+ check (function_checker &c) const OVERRIDE -+ { -+ unsigned int bits = c.type_suffix (0).element_bits; -+ return c.require_immediate_range (1, 1, bits); -+ } -+}; -+SHAPE (shift_right_imm) -+ -+/* sv_t svfoo[_n_t0])(sv_t, uint64_t) -+ -+ where the final argument must be an integer constant expression in the -+ range [1, sizeof (_t) * 4]. */ -+typedef shift_right_imm_narrow_wrapper, 1> -+ shift_right_imm_narrowb_def; -+SHAPE (shift_right_imm_narrowb) -+ -+/* sv_t svfoo[_n_t0])(sv_t, sv_t, uint64_t) -+ -+ where the final argument must be an integer constant expression in the -+ range [1, sizeof (_t) * 4]. */ -+typedef shift_right_imm_narrow_wrapper, 2> -+ shift_right_imm_narrowt_def; -+SHAPE (shift_right_imm_narrowt) -+ -+/* sv_t svfoo[_n_t0])(sv_t, uint64_t) -+ -+ where the final argument must be an integer constant expression in the -+ range [1, sizeof (_t) * 4]. */ -+typedef binary_imm_narrowb_base -+ binary_imm_narrowb_base_unsigned; -+typedef shift_right_imm_narrow_wrapper -+ shift_right_imm_narrowb_to_uint_def; -+SHAPE (shift_right_imm_narrowb_to_uint) -+ -+/* sv_t svfoo[_n_t0])(sv_t, sv_t, uint64_t) -+ -+ where the final argument must be an integer constant expression in the -+ range [1, sizeof (_t) * 4]. */ -+typedef binary_imm_narrowt_base -+ binary_imm_narrowt_base_unsigned; -+typedef shift_right_imm_narrow_wrapper -+ shift_right_imm_narrowt_to_uint_def; -+SHAPE (shift_right_imm_narrowt_to_uint) -+ -+/* void svfoo[_t0](_t *, sv[xN]_t) -+ void svfoo_vnum[_t0](_t *, int64_t, sv[xN]_t) -+ -+ where might be tied to (for non-truncating stores) or might -+ depend on the function base name (for truncating stores). */ -+struct store_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ b.add_overloaded_functions (group, MODE_vnum); -+ build_all (b, "_,as,t0", group, MODE_none); -+ build_all (b, "_,as,ss64,t0", group, MODE_vnum); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ bool vnum_p = r.mode_suffix_id == MODE_vnum; -+ gcc_assert (r.mode_suffix_id == MODE_none || vnum_p); -+ -+ unsigned int i, nargs; -+ type_suffix_index type; -+ if (!r.check_gp_argument (vnum_p ? 3 : 2, i, nargs) -+ || !r.require_pointer_type (i) -+ || (vnum_p && !r.require_scalar_type (i + 1, "int64_t")) -+ || ((type = r.infer_tuple_type (nargs - 1)) == NUM_TYPE_SUFFIXES)) -+ return error_mark_node; -+ -+ return r.resolve_to (r.mode_suffix_id, type); -+ } -+}; -+SHAPE (store) -+ -+/* void svfoo_[s32]index[_t0](_t *, svint32_t, sv_t) -+ void svfoo_[s64]index[_t0](_t *, svint64_t, sv_t) -+ void svfoo_[u32]index[_t0](_t *, svuint32_t, sv_t) -+ void svfoo_[u64]index[_t0](_t *, svuint64_t, sv_t) -+ -+ void svfoo[_u32base]_index[_t0](svuint32_t, int64_t, sv_t) -+ void svfoo[_u64base]_index[_t0](svuint64_t, int64_t, sv_t) -+ -+ where might be tied to (for non-truncating stores) or might -+ depend on the function base name (for truncating stores). */ -+struct store_scatter_index_def : public store_scatter_base -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_index); -+ build_sv_index (b, "_,as,d,t0", group); -+ build_vs_index (b, "_,b,ss64,t0", group); -+ } -+}; -+SHAPE (store_scatter_index) -+ -+/* void svfoo_[s64]index[_t0](_t *, svint64_t, sv_t) -+ void svfoo_[u64]index[_t0](_t *, svuint64_t, sv_t) -+ -+ void svfoo[_u32base]_index[_t0](svuint32_t, int64_t, sv_t) -+ void svfoo[_u64base]_index[_t0](svuint64_t, int64_t, sv_t) -+ -+ i.e. a version of store_scatter_index that doesn't support 32-bit -+ vector indices. */ -+struct store_scatter_index_restricted_def : public store_scatter_base -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_index); -+ build_sv_index64 (b, "_,as,d,t0", group); -+ build_vs_index (b, "_,b,ss64,t0", group); -+ } -+}; -+SHAPE (store_scatter_index_restricted) -+ -+/* void svfoo_[s32]offset[_t0](_t *, svint32_t, sv_t) -+ void svfoo_[s64]offset[_t0](_t *, svint64_t, sv_t) -+ void svfoo_[u32]offset[_t0](_t *, svuint32_t, sv_t) -+ void svfoo_[u64]offset[_t0](_t *, svuint64_t, sv_t) -+ -+ void svfoo[_u32base_t0](svuint32_t, sv_t) -+ void svfoo[_u64base_t0](svuint64_t, sv_t) -+ -+ void svfoo[_u32base]_offset[_t0](svuint32_t, int64_t, sv_t) -+ void svfoo[_u64base]_offset[_t0](svuint64_t, int64_t, sv_t) -+ -+ where might be tied to (for non-truncating stores) or might -+ depend on the function base name (for truncating stores). */ -+struct store_scatter_offset_def : public store_scatter_base -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ b.add_overloaded_functions (group, MODE_offset); -+ build_sv_offset (b, "_,as,d,t0", group); -+ build_v_base (b, "_,b,t0", group); -+ build_vs_offset (b, "_,b,ss64,t0", group); -+ } -+}; -+SHAPE (store_scatter_offset) -+ -+/* void svfoo_[s64]offset[_t0](_t *, svint64_t, sv_t) -+ void svfoo_[u32]offset[_t0](_t *, svuint32_t, sv_t) -+ void svfoo_[u64]offset[_t0](_t *, svuint64_t, sv_t) -+ -+ void svfoo[_u32base_t0](svuint32_t, sv_t) -+ void svfoo[_u64base_t0](svuint64_t, sv_t) -+ -+ void svfoo[_u32base]_offset[_t0](svuint32_t, int64_t, sv_t) -+ void svfoo[_u64base]_offset[_t0](svuint64_t, int64_t, sv_t) -+ -+ i.e. a version of store_scatter_offset that doesn't support svint32_t -+ offsets. */ -+struct store_scatter_offset_restricted_def : public store_scatter_base -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ b.add_overloaded_functions (group, MODE_offset); -+ build_sv_uint_offset (b, "_,as,d,t0", group); -+ build_v_base (b, "_,b,t0", group); -+ build_vs_offset (b, "_,b,ss64,t0", group); -+ } -+}; -+SHAPE (store_scatter_offset_restricted) -+ -+/* sv_t svfoo[_t0](svxN_t, sv_t). */ -+struct tbl_tuple_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "v0,t0,vu0", group, MODE_none); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ unsigned int i, nargs; -+ type_suffix_index type; -+ if (!r.check_gp_argument (2, i, nargs) -+ || (type = r.infer_tuple_type (i)) == NUM_TYPE_SUFFIXES -+ || !r.require_derived_vector_type (i + 1, i, type, TYPE_unsigned)) -+ return error_mark_node; -+ -+ return r.resolve_to (r.mode_suffix_id, type); -+ } -+}; -+SHAPE (tbl_tuple) -+ -+/* sv_t svfoo[_t0](sv_t, svbfloatt16_t, svbfloat16_t). */ -+struct ternary_bfloat_def -+ : public ternary_resize2_base<16, TYPE_bfloat, TYPE_bfloat> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "v0,v0,vB,vB", group, MODE_none); -+ } -+}; -+SHAPE (ternary_bfloat) -+ -+/* sv_t svfoo[_t0](sv_t, svbfloat16_t, svbfloat16_t, uint64_t) -+ -+ where the final argument is an integer constant expression in the range -+ [0, 7]. */ -+typedef ternary_bfloat_lane_base<1> ternary_bfloat_lane_def; -+SHAPE (ternary_bfloat_lane) -+ -+/* sv_t svfoo[_t0](sv_t, svbfloat16_t, svbfloat16_t, uint64_t) -+ -+ where the final argument is an integer constant expression in the range -+ [0, 3]. */ -+typedef ternary_bfloat_lane_base<2> ternary_bfloat_lanex2_def; -+SHAPE (ternary_bfloat_lanex2) -+ -+/* sv_t svfoo[_t0](sv_t, svbfloatt16_t, svbfloat16_t) -+ sv_t svfoo[_n_t0](sv_t, svbfloat16_t, bfloat16_t). */ -+struct ternary_bfloat_opt_n_def -+ : public ternary_resize2_opt_n_base<16, TYPE_bfloat, TYPE_bfloat> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "v0,v0,vB,vB", group, MODE_none); -+ build_all (b, "v0,v0,vB,sB", group, MODE_n); -+ } -+}; -+SHAPE (ternary_bfloat_opt_n) -+ -+/* sv_t svfoo[_t0](sv_t, sv_t, sv_t, -+ uint64_t) -+ -+ where the final argument is an integer constant expression in the range -+ [0, 16 / sizeof (_t) - 1]. */ -+struct ternary_intq_uintq_lane_def -+ : public ternary_qq_lane_base -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "v0,v0,vqs0,vqu0,su64", group, MODE_none); -+ } -+}; -+SHAPE (ternary_intq_uintq_lane) -+ -+/* sv_t svfoo[_t0](sv_t, sv_t, sv_t) -+ sv_t svfoo[_n_t0](sv_t, sv_t, -+ _t). */ -+struct ternary_intq_uintq_opt_n_def -+ : public ternary_resize2_opt_n_base -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "v0,v0,vqs0,vqu0", group, MODE_none); -+ build_all (b, "v0,v0,vqs0,squ0", group, MODE_n); -+ } -+}; -+SHAPE (ternary_intq_uintq_opt_n) -+ -+/* svbool_t svfoo[_](sv_t, sv_t, sv_t, uint64_t) -+ -+ where the final argument is an integer constant expression in the -+ range [0, 16 / sizeof (_t) - 1]. */ -+struct ternary_lane_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "v0,v0,v0,v0,su64", group, MODE_none); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ return r.resolve_uniform (3, 1); -+ } -+ -+ bool -+ check (function_checker &c) const OVERRIDE -+ { -+ return c.require_immediate_lane_index (3); -+ } -+}; -+SHAPE (ternary_lane) -+ -+/* svbool_t svfoo[_](sv_t, sv_t, sv_t, uint64_t, uint64_t) -+ -+ where the penultimate argument is an integer constant expression in -+ the range [0, 8 / sizeof (_t) - 1] and where the final argument -+ is an integer constant expression in {0, 90, 180, 270}. */ -+struct ternary_lane_rotate_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "v0,v0,v0,v0,su64,su64", group, MODE_none); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ return r.resolve_uniform (3, 2); -+ } -+ -+ bool -+ check (function_checker &c) const OVERRIDE -+ { -+ return (c.require_immediate_lane_index (3, 2) -+ && c.require_immediate_one_of (4, 0, 90, 180, 270)); -+ } -+}; -+SHAPE (ternary_lane_rotate) -+ -+/* sv_t svfoo[_t0](sv_t, sv_t, sv_t, uint64_t) -+ -+ where the final argument is an integer constant expression in the range -+ [0, 32 / sizeof (_t) - 1]. */ -+struct ternary_long_lane_def -+ : public ternary_resize2_lane_base -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "v0,v0,vh0,vh0,su64", group, MODE_none); -+ } -+ -+ bool -+ check (function_checker &c) const OVERRIDE -+ { -+ return c.require_immediate_lane_index (3); -+ } -+}; -+SHAPE (ternary_long_lane) -+ -+/* sv_t svfoo[_t0](sv_t, sv_t, sv_t) -+ sv_t svfoo[_n_t0](sv_t, sv_t, _t) -+ -+ i.e. a version of the standard ternary shape ternary_opt_n in which -+ the element type of the last two arguments is the half-sized -+ equivalent of . */ -+struct ternary_long_opt_n_def -+ : public ternary_resize2_opt_n_base -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "v0,v0,vh0,vh0", group, MODE_none); -+ build_all (b, "v0,v0,vh0,sh0", group, MODE_n); -+ } -+}; -+SHAPE (ternary_long_opt_n) -+ -+/* sv_t svfoo[_t0](sv_t, sv_t, sv_t) -+ sv_t svfoo[_n_t0](sv_t, sv_t, _t) -+ -+ i.e. the standard shape for ternary operations that operate on -+ uniform types. */ -+struct ternary_opt_n_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "v0,v0,v0,v0", group, MODE_none); -+ build_all (b, "v0,v0,v0,s0", group, MODE_n); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ return r.resolve_uniform_opt_n (3); -+ } -+}; -+SHAPE (ternary_opt_n) -+ -+/* sv_t svfoo[_t0](sv_t, sv_t, sv_t, uint64_t) -+ -+ where the final argument is an integer constant expression in the range -+ [0, 16 / sizeof (_t) - 1]. */ -+struct ternary_qq_lane_def : public ternary_qq_lane_base<> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "v0,v0,vq0,vq0,su64", group, MODE_none); -+ } -+}; -+SHAPE (ternary_qq_lane) -+ -+/* svbool_t svfoo[_](sv_t, sv_t, sv_t, -+ uint64_t) -+ -+ where the final argument is an integer constant expression in -+ {0, 90, 180, 270}. */ -+struct ternary_qq_lane_rotate_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "v0,v0,vq0,vq0,su64,su64", group, MODE_none); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ unsigned int i, nargs; -+ type_suffix_index type; -+ if (!r.check_gp_argument (5, i, nargs) -+ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES -+ || !r.require_derived_vector_type (i + 1, i, type, r.SAME_TYPE_CLASS, -+ r.QUARTER_SIZE) -+ || !r.require_derived_vector_type (i + 2, i, type, r.SAME_TYPE_CLASS, -+ r.QUARTER_SIZE) -+ || !r.require_integer_immediate (i + 3) -+ || !r.require_integer_immediate (i + 4)) -+ return error_mark_node; -+ -+ return r.resolve_to (r.mode_suffix_id, type); -+ } -+ -+ bool -+ check (function_checker &c) const OVERRIDE -+ { -+ return (c.require_immediate_lane_index (3, 4) -+ && c.require_immediate_one_of (4, 0, 90, 180, 270)); -+ } -+}; -+SHAPE (ternary_qq_lane_rotate) -+ -+/* sv_t svfoo[_t0](sv_t, sv_t, sv_t) -+ sv_t svfoo[_n_t0](sv_t, sv_t, _t) -+ -+ i.e. a version of the standard ternary shape ternary_opt_n in which -+ the element type of the last two arguments is the quarter-sized -+ equivalent of . */ -+struct ternary_qq_opt_n_def -+ : public ternary_resize2_opt_n_base -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "v0,v0,vq0,vq0", group, MODE_none); -+ build_all (b, "v0,v0,vq0,sq0", group, MODE_n); -+ } -+}; -+SHAPE (ternary_qq_opt_n) -+ -+/* svbool_t svfoo[_](sv_t, sv_t, sv_t, -+ uint64_t) -+ -+ where the final argument is an integer constant expression in -+ {0, 90, 180, 270}. */ -+struct ternary_qq_rotate_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "v0,v0,vq0,vq0,su64", group, MODE_none); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ unsigned int i, nargs; -+ type_suffix_index type; -+ if (!r.check_gp_argument (4, i, nargs) -+ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES -+ || !r.require_derived_vector_type (i + 1, i, type, r.SAME_TYPE_CLASS, -+ r.QUARTER_SIZE) -+ || !r.require_derived_vector_type (i + 2, i, type, r.SAME_TYPE_CLASS, -+ r.QUARTER_SIZE) -+ || !r.require_integer_immediate (i + 3)) -+ return error_mark_node; -+ -+ return r.resolve_to (r.mode_suffix_id, type); -+ } -+ -+ bool -+ check (function_checker &c) const OVERRIDE -+ { -+ return c.require_immediate_one_of (3, 0, 90, 180, 270); -+ } -+}; -+SHAPE (ternary_qq_rotate) -+ -+/* svbool_t svfoo[_](sv_t, sv_t, sv_t, uint64_t) -+ -+ where the final argument is an integer constant expression in -+ {0, 90, 180, 270}. */ -+struct ternary_rotate_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "v0,v0,v0,v0,su64", group, MODE_none); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ return r.resolve_uniform (3, 1); -+ } -+ -+ bool -+ check (function_checker &c) const OVERRIDE -+ { -+ return c.require_immediate_one_of (3, 0, 90, 180, 270); -+ } -+}; -+SHAPE (ternary_rotate) -+ -+/* sv_t svfoo[_n_t0])(sv_t, sv_t, uint64_t) -+ -+ where the final argument must be an integer constant expression in the -+ range [0, sizeof (_t) * 8 - 1]. */ -+struct ternary_shift_left_imm_def : public ternary_shift_imm_base -+{ -+ bool -+ check (function_checker &c) const OVERRIDE -+ { -+ unsigned int bits = c.type_suffix (0).element_bits; -+ return c.require_immediate_range (2, 0, bits - 1); -+ } -+}; -+SHAPE (ternary_shift_left_imm) -+ -+/* sv_t svfoo[_n_t0])(sv_t, sv_t, uint64_t) -+ -+ where the final argument must be an integer constant expression in the -+ range [1, sizeof (_t) * 8]. */ -+struct ternary_shift_right_imm_def : public ternary_shift_imm_base -+{ -+ bool -+ check (function_checker &c) const OVERRIDE -+ { -+ unsigned int bits = c.type_suffix (0).element_bits; -+ return c.require_immediate_range (2, 1, bits); -+ } -+}; -+SHAPE (ternary_shift_right_imm) -+ -+/* sv_t svfoo[_t0](sv_t, sv_t, sv_t). */ -+struct ternary_uint_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "v0,v0,v0,vu0", group, MODE_none); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ unsigned int i, nargs; -+ type_suffix_index type; -+ if (!r.check_gp_argument (3, i, nargs) -+ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES -+ || !r.require_matching_vector_type (i + 1, type) -+ || !r.require_derived_vector_type (i + 2, i, type, TYPE_unsigned)) -+ return error_mark_node; -+ -+ return r.resolve_to (r.mode_suffix_id, type); -+ } -+}; -+SHAPE (ternary_uint) -+ -+/* sv_t svfoo[_t0](sv_t, svu_t, -+ sv_t). */ -+struct ternary_uintq_intq_def -+ : public ternary_resize2_base -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "v0,v0,vqu0,vqs0", group, MODE_none); -+ } -+}; -+SHAPE (ternary_uintq_intq) -+ -+/* sv_t svfoo[_t0](sv_t, sv_t, sv_t, -+ uint64_t) -+ -+ where the final argument is an integer constant expression in the range -+ [0, 16 / sizeof (_t) - 1]. */ -+struct ternary_uintq_intq_lane_def -+ : public ternary_qq_lane_base -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "v0,v0,vqu0,vqs0,su64", group, MODE_none); -+ } -+}; -+SHAPE (ternary_uintq_intq_lane) -+ -+/* sv_t svfoo[_t0](sv_t, sv_t, sv_t) -+ sv_t svfoo[_n_t0](sv_t, sv_t, -+ _t). */ -+struct ternary_uintq_intq_opt_n_def -+ : public ternary_resize2_opt_n_base -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "v0,v0,vqu0,vqs0", group, MODE_none); -+ build_all (b, "v0,v0,vqu0,sqs0", group, MODE_n); -+ } -+}; -+SHAPE (ternary_uintq_intq_opt_n) -+ -+/* svbool_t svfoo[_](sv_t, sv_t, uint64_t) -+ -+ where the final argument is an integer constant expression in the -+ range [0, 7]. */ -+struct tmad_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "v0,v0,v0,su64", group, MODE_none); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ return r.resolve_uniform (2, 1); -+ } -+ -+ bool -+ check (function_checker &c) const OVERRIDE -+ { -+ return c.require_immediate_range (2, 0, 7); -+ } -+}; -+SHAPE (tmad) -+ -+/* sv_t svfoo[_t0](sv_t) -+ -+ i.e. the standard shape for unary operations that operate on -+ uniform types. */ -+struct unary_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "v0,v0", group, MODE_none); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ return r.resolve_unary (); -+ } -+}; -+SHAPE (unary) -+ -+/* sv_t svfoo_t0[_t1](sv_t) -+ -+ where the target type must be specified explicitly but the source -+ type can be inferred. */ -+struct unary_convert_def : public overloaded_base<1> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "v0,v1", group, MODE_none); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ return r.resolve_unary (r.type_suffix (0).tclass, -+ r.type_suffix (0).element_bits); -+ } -+}; -+SHAPE (unary_convert) -+ -+/* sv_t svfoo_t0[_t1](sv_t, sv_t) -+ -+ This is a version of unary_convert in which the even-indexed -+ elements are passed in as a first parameter, before any governing -+ predicate. */ -+struct unary_convert_narrowt_def : public overloaded_base<1> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "v0,v1", group, MODE_none); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ return r.resolve_unary (r.type_suffix (0).tclass, -+ r.type_suffix (0).element_bits, true); -+ } -+}; -+SHAPE (unary_convert_narrowt) -+ -+/* sv_t svfoo[_t0](sv_t). */ -+struct unary_long_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "v0,vh0", group, MODE_none); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ unsigned int i, nargs; -+ type_suffix_index type, result_type; -+ if (!r.check_gp_argument (1, i, nargs) -+ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES -+ || (result_type = long_type_suffix (r, type)) == NUM_TYPE_SUFFIXES) -+ return error_mark_node; -+ -+ if (tree res = r.lookup_form (r.mode_suffix_id, result_type)) -+ return res; -+ -+ return r.report_no_such_form (type); -+ } -+}; -+SHAPE (unary_long) -+ -+/* sv_t svfoo[_n]_t0(_t). */ -+struct unary_n_def : public overloaded_base<1> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ /* The "_n" suffix is optional; the full name has it, but the short -+ name doesn't. */ -+ build_all (b, "v0,s0", group, MODE_n, true); -+ } -+ -+ tree -+ resolve (function_resolver &) const OVERRIDE -+ { -+ /* The short forms just make "_n" implicit, so no resolution is needed. */ -+ gcc_unreachable (); -+ } -+}; -+SHAPE (unary_n) -+ -+/* sv_t svfoo[_t0](sv_t). */ -+typedef unary_narrowb_base<> unary_narrowb_def; -+SHAPE (unary_narrowb) -+ -+/* sv_t svfoo[_t0](sv_t, sv_t). */ -+typedef unary_narrowt_base<> unary_narrowt_def; -+SHAPE (unary_narrowt) -+ -+/* sv_t svfoo[_t0](sv_t). */ -+typedef unary_narrowb_base unary_narrowb_to_uint_def; -+SHAPE (unary_narrowb_to_uint) -+ -+/* sv_t svfoo[_t0](sv_t, sv_t). */ -+typedef unary_narrowt_base unary_narrowt_to_uint_def; -+SHAPE (unary_narrowt_to_uint) -+ -+/* svbool_t svfoo(svbool_t). */ -+struct unary_pred_def : public nonoverloaded_base -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ build_all (b, "v0,v0", group, MODE_none); -+ } -+}; -+SHAPE (unary_pred) -+ -+/* sv_t svfoo[_t0](sv_t) -+ -+ i.e. a version of "unary" in which the returned vector contains -+ signed integers. */ -+struct unary_to_int_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "vs0,v0", group, MODE_none); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ return r.resolve_unary (TYPE_signed); -+ } -+}; -+SHAPE (unary_to_int) -+ -+/* sv_t svfoo[_t0](sv_t) -+ -+ i.e. a version of "unary" in which the returned vector contains -+ unsigned integers. */ -+struct unary_to_uint_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "vu0,v0", group, MODE_none); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ return r.resolve_unary (TYPE_unsigned); -+ } -+}; -+SHAPE (unary_to_uint) -+ -+/* sv_t svfoo[_t0](sv_t) -+ -+ where always belongs a certain type class, and where -+ therefore uniquely determines . */ -+struct unary_uint_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "v0,vu0", group, MODE_none); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ unsigned int i, nargs; -+ type_suffix_index type; -+ if (!r.check_gp_argument (1, i, nargs) -+ || (type = r.infer_unsigned_vector_type (i)) == NUM_TYPE_SUFFIXES) -+ return error_mark_node; -+ -+ /* Search for a valid suffix with the same number of bits as TYPE. */ -+ unsigned int element_bits = type_suffixes[type].element_bits; -+ if (type_suffixes[type].unsigned_p) -+ for (unsigned int j = 0; j < NUM_TYPE_SUFFIXES; ++j) -+ if (type_suffixes[j].element_bits == element_bits) -+ if (tree res = r.lookup_form (r.mode_suffix_id, -+ type_suffix_index (j))) -+ return res; -+ -+ return r.report_no_such_form (type); -+ } -+}; -+SHAPE (unary_uint) -+ -+/* sv_t svfoo[_](sv_t) -+ -+ i.e. a version of "unary" in which the source elements are half the -+ size of the destination elements, but have the same type class. */ -+struct unary_widen_def : public overloaded_base<0> -+{ -+ void -+ build (function_builder &b, const function_group_info &group) const OVERRIDE -+ { -+ b.add_overloaded_functions (group, MODE_none); -+ build_all (b, "v0,vh0", group, MODE_none); -+ } -+ -+ tree -+ resolve (function_resolver &r) const OVERRIDE -+ { -+ unsigned int i, nargs; -+ type_suffix_index type; -+ if (!r.check_gp_argument (1, i, nargs) -+ || (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES) -+ return error_mark_node; -+ -+ /* There is only a single form for predicates. */ -+ if (type == TYPE_SUFFIX_b) -+ return r.resolve_to (r.mode_suffix_id, type); -+ -+ if (type_suffixes[type].integer_p -+ && type_suffixes[type].element_bits < 64) -+ { -+ type_suffix_index wide_suffix -+ = find_type_suffix (type_suffixes[type].tclass, -+ type_suffixes[type].element_bits * 2); -+ if (tree res = r.lookup_form (r.mode_suffix_id, wide_suffix)) -+ return res; -+ } -+ -+ return r.report_no_such_form (type); -+ } -+}; -+SHAPE (unary_widen) -+ -+} -diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h -new file mode 100644 -index 000000000..b36f50acd ---- /dev/null -+++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h -@@ -0,0 +1,191 @@ -+/* ACLE support for AArch64 SVE (function shapes) -+ Copyright (C) 2018-2019 Free Software Foundation, Inc. -+ -+ This file is part of GCC. -+ -+ GCC is free software; you can redistribute it and/or modify it -+ under the terms of the GNU General Public License as published by -+ the Free Software Foundation; either version 3, or (at your option) -+ any later version. -+ -+ GCC is distributed in the hope that it will be useful, but -+ WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ General Public License for more details. -+ -+ You should have received a copy of the GNU General Public License -+ along with GCC; see the file COPYING3. If not see -+ . */ -+ -+#ifndef GCC_AARCH64_SVE_BUILTINS_SHAPES_H -+#define GCC_AARCH64_SVE_BUILTINS_SHAPES_H -+ -+namespace aarch64_sve -+{ -+ /* The naming convention is: -+ -+ - to use the name of the function if the rules are very specific to -+ a particular function (e.g. svext, for which the range of the -+ final immediate value is in no way generic). -+ -+ - to use names like "unary" etc. if the rules are somewhat generic, -+ especially if there are no ranges involved. -+ -+ When using generic names, the handling of the final vector argument -+ can be modified as follows: -+ -+ - an "_n" suffix changes the argument from a vector to a scalar. -+ -+ - an "_opt_n" suffix says that there are two forms of each function: -+ one in which the argument is the usual vector, and one in which it -+ is replaced by a scalar. -+ -+ - "_int" and "_uint" replace the argument's element type with a -+ signed or unsigned integer of the same width. The suffixes above -+ then indicate whether this final argument is or might be a scalar. -+ -+ - "_int64" and "_uint64" similarly replace the argument's element type -+ with int64_t or uint64_t. -+ -+ - "_wide" replaces the argument's element type with a 64-bit integer -+ of the same signedness. This only makes sense for integer elements. -+ -+ - "_lane" indicates that the argument is indexed by a constant lane -+ number, provided as an immediately-following argument of type uint64_t. -+ -+ Also: -+ -+ - "inherent" means that the function takes no arguments. -+ -+ - "_rotate" means that the final argument is a rotation amount -+ (0, 90, 180 or 270). -+ -+ - "_scalar" indicates that all data arguments are scalars rather -+ than vectors. -+ -+ - in gather/scatter addresses, "sv" stands for "scalar base, -+ vector displacement" while "vs" stands for "vector base, -+ scalar displacement". -+ -+ - "_pred" indicates that the function takes an svbool_t argument -+ that does not act as a governing predicate.. */ -+ namespace shapes -+ { -+ extern const function_shape *const adr_index; -+ extern const function_shape *const adr_offset; -+ extern const function_shape *const binary; -+ extern const function_shape *const binary_int_opt_n; -+ extern const function_shape *const binary_lane; -+ extern const function_shape *const binary_long_lane; -+ extern const function_shape *const binary_long_opt_n; -+ extern const function_shape *const binary_n; -+ extern const function_shape *const binary_narrowb_opt_n; -+ extern const function_shape *const binary_narrowt_opt_n; -+ extern const function_shape *const binary_opt_n; -+ extern const function_shape *const binary_pred; -+ extern const function_shape *const binary_rotate; -+ extern const function_shape *const binary_scalar; -+ extern const function_shape *const binary_to_uint; -+ extern const function_shape *const binary_uint; -+ extern const function_shape *const binary_uint_n; -+ extern const function_shape *const binary_uint_opt_n; -+ extern const function_shape *const binary_uint64_n; -+ extern const function_shape *const binary_uint64_opt_n; -+ extern const function_shape *const binary_wide; -+ extern const function_shape *const binary_wide_opt_n; -+ extern const function_shape *const clast; -+ extern const function_shape *const compare; -+ extern const function_shape *const compare_opt_n; -+ extern const function_shape *const compare_ptr; -+ extern const function_shape *const compare_scalar; -+ extern const function_shape *const compare_wide_opt_n; -+ extern const function_shape *const count_inherent; -+ extern const function_shape *const count_pat; -+ extern const function_shape *const count_pred; -+ extern const function_shape *const count_vector; -+ extern const function_shape *const create; -+ extern const function_shape *const dupq; -+ extern const function_shape *const ext; -+ extern const function_shape *const fold_left; -+ extern const function_shape *const get; -+ extern const function_shape *const inc_dec; -+ extern const function_shape *const inc_dec_pat; -+ extern const function_shape *const inc_dec_pred; -+ extern const function_shape *const inc_dec_pred_scalar; -+ extern const function_shape *const inherent; -+ extern const function_shape *const inherent_b; -+ extern const function_shape *const load; -+ extern const function_shape *const load_ext; -+ extern const function_shape *const load_ext_gather_index; -+ extern const function_shape *const load_ext_gather_index_restricted; -+ extern const function_shape *const load_ext_gather_offset; -+ extern const function_shape *const load_ext_gather_offset_restricted; -+ extern const function_shape *const load_gather_sv; -+ extern const function_shape *const load_gather_sv_restricted; -+ extern const function_shape *const load_gather_vs; -+ extern const function_shape *const load_replicate; -+ extern const function_shape *const mmla; -+ extern const function_shape *const pattern_pred; -+ extern const function_shape *const prefetch; -+ extern const function_shape *const prefetch_gather_index; -+ extern const function_shape *const prefetch_gather_offset; -+ extern const function_shape *const ptest; -+ extern const function_shape *const rdffr; -+ extern const function_shape *const reduction; -+ extern const function_shape *const reduction_wide; -+ extern const function_shape *const set; -+ extern const function_shape *const setffr; -+ extern const function_shape *const shift_left_imm_long; -+ extern const function_shape *const shift_left_imm_to_uint; -+ extern const function_shape *const shift_right_imm; -+ extern const function_shape *const shift_right_imm_narrowb; -+ extern const function_shape *const shift_right_imm_narrowt; -+ extern const function_shape *const shift_right_imm_narrowb_to_uint; -+ extern const function_shape *const shift_right_imm_narrowt_to_uint; -+ extern const function_shape *const store; -+ extern const function_shape *const store_scatter_index; -+ extern const function_shape *const store_scatter_index_restricted; -+ extern const function_shape *const store_scatter_offset; -+ extern const function_shape *const store_scatter_offset_restricted; -+ extern const function_shape *const tbl_tuple; -+ extern const function_shape *const ternary_bfloat; -+ extern const function_shape *const ternary_bfloat_lane; -+ extern const function_shape *const ternary_bfloat_lanex2; -+ extern const function_shape *const ternary_bfloat_opt_n; -+ extern const function_shape *const ternary_intq_uintq_lane; -+ extern const function_shape *const ternary_intq_uintq_opt_n; -+ extern const function_shape *const ternary_lane; -+ extern const function_shape *const ternary_lane_rotate; -+ extern const function_shape *const ternary_long_lane; -+ extern const function_shape *const ternary_long_opt_n; -+ extern const function_shape *const ternary_opt_n; -+ extern const function_shape *const ternary_qq_lane; -+ extern const function_shape *const ternary_qq_lane_rotate; -+ extern const function_shape *const ternary_qq_opt_n; -+ extern const function_shape *const ternary_qq_rotate; -+ extern const function_shape *const ternary_rotate; -+ extern const function_shape *const ternary_shift_left_imm; -+ extern const function_shape *const ternary_shift_right_imm; -+ extern const function_shape *const ternary_uint; -+ extern const function_shape *const ternary_uintq_intq; -+ extern const function_shape *const ternary_uintq_intq_lane; -+ extern const function_shape *const ternary_uintq_intq_opt_n; -+ extern const function_shape *const tmad; -+ extern const function_shape *const unary; -+ extern const function_shape *const unary_convert; -+ extern const function_shape *const unary_convert_narrowt; -+ extern const function_shape *const unary_long; -+ extern const function_shape *const unary_n; -+ extern const function_shape *const unary_narrowb; -+ extern const function_shape *const unary_narrowt; -+ extern const function_shape *const unary_narrowb_to_uint; -+ extern const function_shape *const unary_narrowt_to_uint; -+ extern const function_shape *const unary_pred; -+ extern const function_shape *const unary_to_int; -+ extern const function_shape *const unary_to_uint; -+ extern const function_shape *const unary_uint; -+ extern const function_shape *const unary_widen; -+ } -+} -+ -+#endif -diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc -new file mode 100644 -index 000000000..f830d9294 ---- /dev/null -+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc -@@ -0,0 +1,3568 @@ -+/* ACLE support for AArch64 SVE -+ Copyright (C) 2018-2019 Free Software Foundation, Inc. -+ -+ This file is part of GCC. -+ -+ GCC is free software; you can redistribute it and/or modify it -+ under the terms of the GNU General Public License as published by -+ the Free Software Foundation; either version 3, or (at your option) -+ any later version. -+ -+ GCC is distributed in the hope that it will be useful, but -+ WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ General Public License for more details. -+ -+ You should have received a copy of the GNU General Public License -+ along with GCC; see the file COPYING3. If not see -+ . */ -+ -+#define IN_TARGET_CODE 1 -+ -+#include "config.h" -+#include "system.h" -+#include "coretypes.h" -+#include "tm.h" -+#include "tree.h" -+#include "rtl.h" -+#include "tm_p.h" -+#include "memmodel.h" -+#include "insn-codes.h" -+#include "optabs.h" -+#include "recog.h" -+#include "diagnostic.h" -+#include "expr.h" -+#include "basic-block.h" -+#include "function.h" -+#include "fold-const.h" -+#include "gimple.h" -+#include "gimple-iterator.h" -+#include "gimplify.h" -+#include "explow.h" -+#include "emit-rtl.h" -+#include "tree-vector-builder.h" -+#include "stor-layout.h" -+#include "regs.h" -+#include "alias.h" -+#include "gimple-fold.h" -+#include "langhooks.h" -+#include "stringpool.h" -+#include "aarch64-sve-builtins.h" -+#include "aarch64-sve-builtins-base.h" -+#include "aarch64-sve-builtins-shapes.h" -+ -+namespace aarch64_sve { -+ -+/* Static information about each single-predicate or single-vector -+ ABI and ACLE type. */ -+struct vector_type_info -+{ -+ /* The name of the type as declared by arm_sve.h. */ -+ const char *acle_name; -+ -+ /* The name of the type specified in AAPCS64. The type is always -+ available under this name, even when arm_sve.h isn't included. */ -+ const char *abi_name; -+ -+ /* The C++ mangling of ABI_NAME. */ -+ const char *mangled_name; -+}; -+ -+/* Describes a function decl. */ -+class GTY(()) registered_function -+{ -+public: -+ /* The ACLE function that the decl represents. */ -+ function_instance instance GTY ((skip)); -+ -+ /* The decl itself. */ -+ tree decl; -+ -+ /* The architecture extensions that the function requires, as a set of -+ AARCH64_FL_* flags. */ -+ uint64_t required_extensions; -+ -+ /* True if the decl represents an overloaded function that needs to be -+ resolved by function_resolver. */ -+ bool overloaded_p; -+}; -+ -+/* Hash traits for registered_function. */ -+struct registered_function_hasher : nofree_ptr_hash -+{ -+ typedef function_instance compare_type; -+ -+ static hashval_t hash (value_type); -+ static bool equal (value_type, const compare_type &); -+}; -+ -+/* Information about each single-predicate or single-vector type. */ -+static CONSTEXPR const vector_type_info vector_types[] = { -+#define DEF_SVE_TYPE(ACLE_NAME, NCHARS, ABI_NAME, SCALAR_TYPE) \ -+ { #ACLE_NAME, #ABI_NAME, #NCHARS #ABI_NAME }, -+#include "aarch64-sve-builtins.def" -+}; -+ -+/* The function name suffix associated with each predication type. */ -+static const char *const pred_suffixes[NUM_PREDS + 1] = { -+ "", -+ "", -+ "_m", -+ "_x", -+ "_z", -+ "" -+}; -+ -+/* Static information about each mode_suffix_index. */ -+CONSTEXPR const mode_suffix_info mode_suffixes[] = { -+#define VECTOR_TYPE_none NUM_VECTOR_TYPES -+#define DEF_SVE_MODE(NAME, BASE, DISPLACEMENT, UNITS) \ -+ { "_" #NAME, VECTOR_TYPE_##BASE, VECTOR_TYPE_##DISPLACEMENT, UNITS_##UNITS }, -+#include "aarch64-sve-builtins.def" -+#undef VECTOR_TYPE_none -+ { "", NUM_VECTOR_TYPES, NUM_VECTOR_TYPES, UNITS_none } -+}; -+ -+/* Static information about each type_suffix_index. */ -+CONSTEXPR const type_suffix_info type_suffixes[NUM_TYPE_SUFFIXES + 1] = { -+#define DEF_SVE_TYPE_SUFFIX(NAME, ACLE_TYPE, CLASS, BITS, MODE) \ -+ { "_" #NAME, \ -+ VECTOR_TYPE_##ACLE_TYPE, \ -+ TYPE_##CLASS, \ -+ BITS, \ -+ BITS / BITS_PER_UNIT, \ -+ TYPE_##CLASS == TYPE_signed || TYPE_##CLASS == TYPE_unsigned, \ -+ TYPE_##CLASS == TYPE_unsigned, \ -+ TYPE_##CLASS == TYPE_float, \ -+ TYPE_##CLASS == TYPE_bool, \ -+ 0, \ -+ MODE }, -+#include "aarch64-sve-builtins.def" -+ { "", NUM_VECTOR_TYPES, TYPE_bool, 0, 0, false, false, false, false, -+ 0, VOIDmode } -+}; -+ -+/* Define a TYPES_ macro for each combination of type -+ suffixes that an ACLE function can have, where is the -+ name used in DEF_SVE_FUNCTION entries. -+ -+ Use S (T) for single type suffix T and D (T1, T2) for a pair of type -+ suffixes T1 and T2. Use commas to separate the suffixes. -+ -+ Although the order shouldn't matter, the convention is to sort the -+ suffixes lexicographically after dividing suffixes into a type -+ class ("b", "f", etc.) and a numerical bit count. */ -+ -+/* _b8 _b16 _b32 _b64. */ -+#define TYPES_all_pred(S, D) \ -+ S (b8), S (b16), S (b32), S (b64) -+ -+/* _f16 _f32 _f64. */ -+#define TYPES_all_float(S, D) \ -+ S (f16), S (f32), S (f64) -+ -+/* _s8 _s16 _s32 _s64. */ -+#define TYPES_all_signed(S, D) \ -+ S (s8), S (s16), S (s32), S (s64) -+ -+/* _f16 _f32 _f64 -+ _s8 _s16 _s32 _s64. */ -+#define TYPES_all_float_and_signed(S, D) \ -+ TYPES_all_float (S, D), TYPES_all_signed (S, D) -+ -+/* _u8 _u16 _u32 _u64. */ -+#define TYPES_all_unsigned(S, D) \ -+ S (u8), S (u16), S (u32), S (u64) -+ -+/* _s8 _s16 _s32 _s64 -+ _u8 _u16 _u32 _u64. */ -+#define TYPES_all_integer(S, D) \ -+ TYPES_all_signed (S, D), TYPES_all_unsigned (S, D) -+ -+/* _f16 _f32 _f64 -+ _s8 _s16 _s32 _s64 -+ _u8 _u16 _u32 _u64. */ -+#define TYPES_all_arith(S, D) \ -+ TYPES_all_float (S, D), TYPES_all_integer (S, D) -+ -+/* _bf16 -+ _f16 _f32 _f64 -+ _s8 _s16 _s32 _s64 -+ _u8 _u16 _u32 _u64. */ -+#define TYPES_all_data(S, D) \ -+ S (bf16), TYPES_all_arith (S, D) -+ -+/* _b only. */ -+#define TYPES_b(S, D) \ -+ S (b) -+ -+/* _u8. */ -+#define TYPES_b_unsigned(S, D) \ -+ S (u8) -+ -+/* _s8 -+ _u8. */ -+#define TYPES_b_integer(S, D) \ -+ S (s8), TYPES_b_unsigned (S, D) -+ -+/* _s8 _s16 -+ _u8 _u16. */ -+#define TYPES_bh_integer(S, D) \ -+ S (s8), S (s16), S (u8), S (u16) -+ -+/* _u8 _u32. */ -+#define TYPES_bs_unsigned(S, D) \ -+ S (u8), S (u32) -+ -+/* _s8 _s16 _s32. */ -+#define TYPES_bhs_signed(S, D) \ -+ S (s8), S (s16), S (s32) -+ -+/* _u8 _u16 _u32. */ -+#define TYPES_bhs_unsigned(S, D) \ -+ S (u8), S (u16), S (u32) -+ -+/* _s8 _s16 _s32 -+ _u8 _u16 _u32. */ -+#define TYPES_bhs_integer(S, D) \ -+ TYPES_bhs_signed (S, D), TYPES_bhs_unsigned (S, D) -+ -+/* _s16 -+ _u16. */ -+#define TYPES_h_integer(S, D) \ -+ S (s16), S (u16) -+ -+/* _s16 _s32. */ -+#define TYPES_hs_signed(S, D) \ -+ S (s16), S (s32) -+ -+/* _s16 _s32 -+ _u16 _u32. */ -+#define TYPES_hs_integer(S, D) \ -+ TYPES_hs_signed (S, D), S (u16), S (u32) -+ -+/* _f16 _f32. */ -+#define TYPES_hs_float(S, D) \ -+ S (f16), S (f32) -+ -+/* _u16 _u64. */ -+#define TYPES_hd_unsigned(S, D) \ -+ S (u16), S (u64) -+ -+/* _s16 _s32 _s64. */ -+#define TYPES_hsd_signed(S, D) \ -+ S (s16), S (s32), S (s64) -+ -+/* _s16 _s32 _s64 -+ _u16 _u32 _u64. */ -+#define TYPES_hsd_integer(S, D) \ -+ TYPES_hsd_signed (S, D), S (u16), S (u32), S (u64) -+ -+/* _f32. */ -+#define TYPES_s_float(S, D) \ -+ S (f32) -+ -+/* _f32 -+ _s16 _s32 _s64 -+ _u16 _u32 _u64. */ -+#define TYPES_s_float_hsd_integer(S, D) \ -+ TYPES_s_float (S, D), TYPES_hsd_integer (S, D) -+ -+/* _f32 -+ _s32 _s64 -+ _u32 _u64. */ -+#define TYPES_s_float_sd_integer(S, D) \ -+ TYPES_s_float (S, D), TYPES_sd_integer (S, D) -+ -+/* _s32. */ -+#define TYPES_s_signed(S, D) \ -+ S (s32) -+ -+/* _u32. */ -+#define TYPES_s_unsigned(S, D) \ -+ S (u32) -+ -+/* _s32 _u32. */ -+#define TYPES_s_integer(S, D) \ -+ TYPES_s_signed (S, D), TYPES_s_unsigned (S, D) -+ -+/* _s32 _s64. */ -+#define TYPES_sd_signed(S, D) \ -+ S (s32), S (s64) -+ -+/* _u32 _u64. */ -+#define TYPES_sd_unsigned(S, D) \ -+ S (u32), S (u64) -+ -+/* _s32 _s64 -+ _u32 _u64. */ -+#define TYPES_sd_integer(S, D) \ -+ TYPES_sd_signed (S, D), TYPES_sd_unsigned (S, D) -+ -+/* _f32 _f64 -+ _s32 _s64 -+ _u32 _u64. */ -+#define TYPES_sd_data(S, D) \ -+ S (f32), S (f64), TYPES_sd_integer (S, D) -+ -+/* _f16 _f32 _f64 -+ _s32 _s64 -+ _u32 _u64. */ -+#define TYPES_all_float_and_sd_integer(S, D) \ -+ TYPES_all_float (S, D), TYPES_sd_integer (S, D) -+ -+/* _f64. */ -+#define TYPES_d_float(S, D) \ -+ S (f64) -+ -+/* _u64. */ -+#define TYPES_d_unsigned(S, D) \ -+ S (u64) -+ -+/* _s64 -+ _u64. */ -+#define TYPES_d_integer(S, D) \ -+ S (s64), TYPES_d_unsigned (S, D) -+ -+/* _f64 -+ _s64 -+ _u64. */ -+#define TYPES_d_data(S, D) \ -+ TYPES_d_float (S, D), TYPES_d_integer (S, D) -+ -+/* All the type combinations allowed by svcvt. */ -+#define TYPES_cvt(S, D) \ -+ D (f16, f32), D (f16, f64), \ -+ D (f16, s16), D (f16, s32), D (f16, s64), \ -+ D (f16, u16), D (f16, u32), D (f16, u64), \ -+ \ -+ D (f32, f16), D (f32, f64), \ -+ D (f32, s32), D (f32, s64), \ -+ D (f32, u32), D (f32, u64), \ -+ \ -+ D (f64, f16), D (f64, f32), \ -+ D (f64, s32), D (f64, s64), \ -+ D (f64, u32), D (f64, u64), \ -+ \ -+ D (s16, f16), \ -+ D (s32, f16), D (s32, f32), D (s32, f64), \ -+ D (s64, f16), D (s64, f32), D (s64, f64), \ -+ \ -+ D (u16, f16), \ -+ D (u32, f16), D (u32, f32), D (u32, f64), \ -+ D (u64, f16), D (u64, f32), D (u64, f64) -+ -+/* _bf16_f32. */ -+#define TYPES_cvt_bfloat(S, D) \ -+ D (bf16, f32) -+ -+/* _f32_f16 -+ _f64_f32. */ -+#define TYPES_cvt_long(S, D) \ -+ D (f32, f16), D (f64, f32) -+ -+/* _f16_f32. */ -+#define TYPES_cvt_narrow_s(S, D) \ -+ D (f32, f64) -+ -+/* _f16_f32 -+ _f32_f64. */ -+#define TYPES_cvt_narrow(S, D) \ -+ D (f16, f32), TYPES_cvt_narrow_s (S, D) -+ -+/* { _s32 _s64 } x { _b8 _b16 _b32 _b64 } -+ { _u32 _u64 }. */ -+#define TYPES_inc_dec_n1(D, A) \ -+ D (A, b8), D (A, b16), D (A, b32), D (A, b64) -+#define TYPES_inc_dec_n(S, D) \ -+ TYPES_inc_dec_n1 (D, s32), \ -+ TYPES_inc_dec_n1 (D, s64), \ -+ TYPES_inc_dec_n1 (D, u32), \ -+ TYPES_inc_dec_n1 (D, u64) -+ -+/* { _bf16 } { _bf16 } -+ { _f16 _f32 _f64 } { _f16 _f32 _f64 } -+ { _s8 _s16 _s32 _s64 } x { _s8 _s16 _s32 _s64 } -+ { _u8 _u16 _u32 _u64 } { _u8 _u16 _u32 _u64 }. */ -+#define TYPES_reinterpret1(D, A) \ -+ D (A, bf16), \ -+ D (A, f16), D (A, f32), D (A, f64), \ -+ D (A, s8), D (A, s16), D (A, s32), D (A, s64), \ -+ D (A, u8), D (A, u16), D (A, u32), D (A, u64) -+#define TYPES_reinterpret(S, D) \ -+ TYPES_reinterpret1 (D, bf16), \ -+ TYPES_reinterpret1 (D, f16), \ -+ TYPES_reinterpret1 (D, f32), \ -+ TYPES_reinterpret1 (D, f64), \ -+ TYPES_reinterpret1 (D, s8), \ -+ TYPES_reinterpret1 (D, s16), \ -+ TYPES_reinterpret1 (D, s32), \ -+ TYPES_reinterpret1 (D, s64), \ -+ TYPES_reinterpret1 (D, u8), \ -+ TYPES_reinterpret1 (D, u16), \ -+ TYPES_reinterpret1 (D, u32), \ -+ TYPES_reinterpret1 (D, u64) -+ -+/* { _b8 _b16 _b32 _b64 } x { _s32 _s64 } -+ { _u32 _u64 } */ -+#define TYPES_while1(D, bn) \ -+ D (bn, s32), D (bn, s64), D (bn, u32), D (bn, u64) -+#define TYPES_while(S, D) \ -+ TYPES_while1 (D, b8), \ -+ TYPES_while1 (D, b16), \ -+ TYPES_while1 (D, b32), \ -+ TYPES_while1 (D, b64) -+ -+/* Describe a pair of type suffixes in which only the first is used. */ -+#define DEF_VECTOR_TYPE(X) { TYPE_SUFFIX_ ## X, NUM_TYPE_SUFFIXES } -+ -+/* Describe a pair of type suffixes in which both are used. */ -+#define DEF_DOUBLE_TYPE(X, Y) { TYPE_SUFFIX_ ## X, TYPE_SUFFIX_ ## Y } -+ -+/* Create an array that can be used in aarch64-sve-builtins.def to -+ select the type suffixes in TYPES_. */ -+#define DEF_SVE_TYPES_ARRAY(NAME) \ -+ static const type_suffix_pair types_##NAME[] = { \ -+ TYPES_##NAME (DEF_VECTOR_TYPE, DEF_DOUBLE_TYPE), \ -+ { NUM_TYPE_SUFFIXES, NUM_TYPE_SUFFIXES } \ -+ } -+ -+/* For functions that don't take any type suffixes. */ -+static const type_suffix_pair types_none[] = { -+ { NUM_TYPE_SUFFIXES, NUM_TYPE_SUFFIXES }, -+ { NUM_TYPE_SUFFIXES, NUM_TYPE_SUFFIXES } -+}; -+ -+/* Create an array for each TYPES_ macro above. */ -+DEF_SVE_TYPES_ARRAY (all_pred); -+DEF_SVE_TYPES_ARRAY (all_float); -+DEF_SVE_TYPES_ARRAY (all_signed); -+DEF_SVE_TYPES_ARRAY (all_float_and_signed); -+DEF_SVE_TYPES_ARRAY (all_unsigned); -+DEF_SVE_TYPES_ARRAY (all_integer); -+DEF_SVE_TYPES_ARRAY (all_arith); -+DEF_SVE_TYPES_ARRAY (all_data); -+DEF_SVE_TYPES_ARRAY (b); -+DEF_SVE_TYPES_ARRAY (b_unsigned); -+DEF_SVE_TYPES_ARRAY (b_integer); -+DEF_SVE_TYPES_ARRAY (bh_integer); -+DEF_SVE_TYPES_ARRAY (bs_unsigned); -+DEF_SVE_TYPES_ARRAY (bhs_signed); -+DEF_SVE_TYPES_ARRAY (bhs_unsigned); -+DEF_SVE_TYPES_ARRAY (bhs_integer); -+DEF_SVE_TYPES_ARRAY (h_integer); -+DEF_SVE_TYPES_ARRAY (hs_signed); -+DEF_SVE_TYPES_ARRAY (hs_integer); -+DEF_SVE_TYPES_ARRAY (hs_float); -+DEF_SVE_TYPES_ARRAY (hd_unsigned); -+DEF_SVE_TYPES_ARRAY (hsd_signed); -+DEF_SVE_TYPES_ARRAY (hsd_integer); -+DEF_SVE_TYPES_ARRAY (s_float); -+DEF_SVE_TYPES_ARRAY (s_float_hsd_integer); -+DEF_SVE_TYPES_ARRAY (s_float_sd_integer); -+DEF_SVE_TYPES_ARRAY (s_signed); -+DEF_SVE_TYPES_ARRAY (s_unsigned); -+DEF_SVE_TYPES_ARRAY (s_integer); -+DEF_SVE_TYPES_ARRAY (sd_signed); -+DEF_SVE_TYPES_ARRAY (sd_unsigned); -+DEF_SVE_TYPES_ARRAY (sd_integer); -+DEF_SVE_TYPES_ARRAY (sd_data); -+DEF_SVE_TYPES_ARRAY (all_float_and_sd_integer); -+DEF_SVE_TYPES_ARRAY (d_float); -+DEF_SVE_TYPES_ARRAY (d_unsigned); -+DEF_SVE_TYPES_ARRAY (d_integer); -+DEF_SVE_TYPES_ARRAY (d_data); -+DEF_SVE_TYPES_ARRAY (cvt); -+DEF_SVE_TYPES_ARRAY (cvt_bfloat); -+DEF_SVE_TYPES_ARRAY (cvt_long); -+DEF_SVE_TYPES_ARRAY (cvt_narrow_s); -+DEF_SVE_TYPES_ARRAY (cvt_narrow); -+DEF_SVE_TYPES_ARRAY (inc_dec_n); -+DEF_SVE_TYPES_ARRAY (reinterpret); -+DEF_SVE_TYPES_ARRAY (while); -+ -+/* Used by functions that have no governing predicate. */ -+static const predication_index preds_none[] = { PRED_none, NUM_PREDS }; -+ -+/* Used by functions that have a governing predicate but do not have an -+ explicit suffix. */ -+static const predication_index preds_implicit[] = { PRED_implicit, NUM_PREDS }; -+ -+/* Used by functions that allow merging and "don't care" predication, -+ but are not suitable for predicated MOVPRFX. */ -+static const predication_index preds_mx[] = { -+ PRED_m, PRED_x, NUM_PREDS -+}; -+ -+/* Used by functions that allow merging, zeroing and "don't care" -+ predication. */ -+static const predication_index preds_mxz[] = { -+ PRED_m, PRED_x, PRED_z, NUM_PREDS -+}; -+ -+/* Used by functions that have the mxz predicated forms above, and in addition -+ have an unpredicated form. */ -+static const predication_index preds_mxz_or_none[] = { -+ PRED_m, PRED_x, PRED_z, PRED_none, NUM_PREDS -+}; -+ -+/* Used by functions that allow merging and zeroing predication but have -+ no "_x" form. */ -+static const predication_index preds_mz[] = { PRED_m, PRED_z, NUM_PREDS }; -+ -+/* Used by functions that have an unpredicated form and a _z predicated -+ form. */ -+static const predication_index preds_z_or_none[] = { -+ PRED_z, PRED_none, NUM_PREDS -+}; -+ -+/* Used by (mostly predicate) functions that only support "_z" predication. */ -+static const predication_index preds_z[] = { PRED_z, NUM_PREDS }; -+ -+/* A list of all SVE ACLE functions. */ -+static CONSTEXPR const function_group_info function_groups[] = { -+#define DEF_SVE_FUNCTION(NAME, SHAPE, TYPES, PREDS) \ -+ { #NAME, &functions::NAME, &shapes::SHAPE, types_##TYPES, preds_##PREDS, \ -+ REQUIRED_EXTENSIONS | AARCH64_FL_SVE }, -+#include "aarch64-sve-builtins.def" -+}; -+ -+/* The scalar type associated with each vector type. */ -+GTY(()) tree scalar_types[NUM_VECTOR_TYPES]; -+ -+/* The single-predicate and single-vector types, with their built-in -+ "__SV..._t" name. Allow an index of NUM_VECTOR_TYPES, which always -+ yields a null tree. */ -+static GTY(()) tree abi_vector_types[NUM_VECTOR_TYPES + 1]; -+ -+/* Same, but with the arm_sve.h "sv..._t" name. */ -+GTY(()) tree acle_vector_types[MAX_TUPLE_SIZE][NUM_VECTOR_TYPES + 1]; -+ -+/* The svpattern enum type. */ -+GTY(()) tree acle_svpattern; -+ -+/* The svprfop enum type. */ -+GTY(()) tree acle_svprfop; -+ -+/* The list of all registered function decls, indexed by code. */ -+static GTY(()) vec *registered_functions; -+ -+/* All registered function decls, hashed on the function_instance -+ that they implement. This is used for looking up implementations of -+ overloaded functions. */ -+static hash_table *function_table; -+ -+/* True if we've already complained about attempts to use functions -+ when the required extension is disabled. */ -+static bool reported_missing_extension_p; -+ -+/* If TYPE is an ACLE vector type, return the associated vector_type, -+ otherwise return NUM_VECTOR_TYPES. */ -+static vector_type_index -+find_vector_type (const_tree type) -+{ -+ /* A linear search should be OK here, since the code isn't hot and -+ the number of types is only small. */ -+ type = TYPE_MAIN_VARIANT (type); -+ for (unsigned int i = 0; i < NUM_VECTOR_TYPES; ++i) -+ if (type == abi_vector_types[i]) -+ return vector_type_index (i); -+ return NUM_VECTOR_TYPES; -+} -+ -+/* If TYPE is a valid SVE element type, return the corresponding type -+ suffix, otherwise return NUM_TYPE_SUFFIXES. */ -+static type_suffix_index -+find_type_suffix_for_scalar_type (const_tree type) -+{ -+ /* A linear search should be OK here, since the code isn't hot and -+ the number of types is only small. */ -+ type = TYPE_MAIN_VARIANT (type); -+ for (unsigned int suffix_i = 0; suffix_i < NUM_TYPE_SUFFIXES; ++suffix_i) -+ if (!type_suffixes[suffix_i].bool_p) -+ { -+ vector_type_index vector_i = type_suffixes[suffix_i].vector_type; -+ if (type == TYPE_MAIN_VARIANT (scalar_types[vector_i])) -+ return type_suffix_index (suffix_i); -+ } -+ return NUM_TYPE_SUFFIXES; -+} -+ -+/* Report an error against LOCATION that the user has tried to use -+ function FNDECL when extension EXTENSION is disabled. */ -+static void -+report_missing_extension (location_t location, tree fndecl, -+ const char *extension) -+{ -+ /* Avoid reporting a slew of messages for a single oversight. */ -+ if (reported_missing_extension_p) -+ return; -+ -+ error_at (location, "ACLE function %qD requires ISA extension %qs", -+ fndecl, extension); -+ inform (location, "you can enable %qs using the command-line" -+ " option %<-march%>, or by using the %" -+ " attribute or pragma", extension); -+ reported_missing_extension_p = true; -+} -+ -+/* Check whether all the AARCH64_FL_* values in REQUIRED_EXTENSIONS are -+ enabled, given that those extensions are required for function FNDECL. -+ Report an error against LOCATION if not. */ -+static bool -+check_required_extensions (location_t location, tree fndecl, -+ uint64_t required_extensions) -+{ -+ uint64_t missing_extensions = required_extensions & ~aarch64_isa_flags; -+ if (missing_extensions == 0) -+ return true; -+ -+ static const struct { uint64_t flag; const char *name; } extensions[] = { -+#define AARCH64_OPT_EXTENSION(EXT_NAME, FLAG_CANONICAL, FLAGS_ON, FLAGS_OFF, \ -+ SYNTHETIC, FEATURE_STRING) \ -+ { FLAG_CANONICAL, EXT_NAME }, -+#include "aarch64-option-extensions.def" -+ }; -+ -+ for (unsigned int i = 0; i < ARRAY_SIZE (extensions); ++i) -+ if (missing_extensions & extensions[i].flag) -+ { -+ report_missing_extension (location, fndecl, extensions[i].name); -+ return false; -+ } -+ gcc_unreachable (); -+} -+ -+/* Report that LOCATION has a call to FNDECL in which argument ARGNO -+ was not an integer constant expression. ARGNO counts from zero. */ -+static void -+report_non_ice (location_t location, tree fndecl, unsigned int argno) -+{ -+ error_at (location, "argument %d of %qE must be an integer constant" -+ " expression", argno + 1, fndecl); -+} -+ -+/* Report that LOCATION has a call to FNDECL in which argument ARGNO has -+ the value ACTUAL, whereas the function requires a value in the range -+ [MIN, MAX]. ARGNO counts from zero. */ -+static void -+report_out_of_range (location_t location, tree fndecl, unsigned int argno, -+ HOST_WIDE_INT actual, HOST_WIDE_INT min, -+ HOST_WIDE_INT max) -+{ -+ error_at (location, "passing %wd to argument %d of %qE, which expects" -+ " a value in the range [%wd, %wd]", actual, argno + 1, fndecl, -+ min, max); -+} -+ -+/* Report that LOCATION has a call to FNDECL in which argument ARGNO has -+ the value ACTUAL, whereas the function requires either VALUE0 or -+ VALUE1. ARGNO counts from zero. */ -+static void -+report_neither_nor (location_t location, tree fndecl, unsigned int argno, -+ HOST_WIDE_INT actual, HOST_WIDE_INT value0, -+ HOST_WIDE_INT value1) -+{ -+ error_at (location, "passing %wd to argument %d of %qE, which expects" -+ " either %wd or %wd", actual, argno + 1, fndecl, value0, value1); -+} -+ -+/* Report that LOCATION has a call to FNDECL in which argument ARGNO has -+ the value ACTUAL, whereas the function requires one of VALUE0..3. -+ ARGNO counts from zero. */ -+static void -+report_not_one_of (location_t location, tree fndecl, unsigned int argno, -+ HOST_WIDE_INT actual, HOST_WIDE_INT value0, -+ HOST_WIDE_INT value1, HOST_WIDE_INT value2, -+ HOST_WIDE_INT value3) -+{ -+ error_at (location, "passing %wd to argument %d of %qE, which expects" -+ " %wd, %wd, %wd or %wd", actual, argno + 1, fndecl, value0, value1, -+ value2, value3); -+} -+ -+/* Report that LOCATION has a call to FNDECL in which argument ARGNO has -+ the value ACTUAL, whereas the function requires a valid value of -+ enum type ENUMTYPE. ARGNO counts from zero. */ -+static void -+report_not_enum (location_t location, tree fndecl, unsigned int argno, -+ HOST_WIDE_INT actual, tree enumtype) -+{ -+ error_at (location, "passing %wd to argument %d of %qE, which expects" -+ " a valid %qT value", actual, argno + 1, fndecl, enumtype); -+} -+ -+/* Return a hash code for a function_instance. */ -+hashval_t -+function_instance::hash () const -+{ -+ inchash::hash h; -+ /* BASE uniquely determines BASE_NAME, so we don't need to hash both. */ -+ h.add_ptr (base); -+ h.add_ptr (shape); -+ h.add_int (mode_suffix_id); -+ h.add_int (type_suffix_ids[0]); -+ h.add_int (type_suffix_ids[1]); -+ h.add_int (pred); -+ return h.end (); -+} -+ -+/* Return a set of CP_* flags that describe what the function could do, -+ taking the command-line flags into account. */ -+unsigned int -+function_instance::call_properties () const -+{ -+ unsigned int flags = base->call_properties (*this); -+ -+ /* -fno-trapping-math means that we can assume any FP exceptions -+ are not user-visible. */ -+ if (!flag_trapping_math) -+ flags &= ~CP_RAISE_FP_EXCEPTIONS; -+ -+ return flags; -+} -+ -+/* Return true if calls to the function could read some form of -+ global state. */ -+bool -+function_instance::reads_global_state_p () const -+{ -+ unsigned int flags = call_properties (); -+ -+ /* Preserve any dependence on rounding mode, flush to zero mode, etc. -+ There is currently no way of turning this off; in particular, -+ -fno-rounding-math (which is the default) means that we should make -+ the usual assumptions about rounding mode, which for intrinsics means -+ acting as the instructions do. */ -+ if (flags & CP_READ_FPCR) -+ return true; -+ -+ /* Handle direct reads of global state. */ -+ return flags & (CP_READ_MEMORY | CP_READ_FFR); -+} -+ -+/* Return true if calls to the function could modify some form of -+ global state. */ -+bool -+function_instance::modifies_global_state_p () const -+{ -+ unsigned int flags = call_properties (); -+ -+ /* Preserve any exception state written back to the FPCR, -+ unless -fno-trapping-math says this is unnecessary. */ -+ if (flags & CP_RAISE_FP_EXCEPTIONS) -+ return true; -+ -+ /* Treat prefetches as modifying global state, since that's the -+ only means we have of keeping them in their correct position. */ -+ if (flags & CP_PREFETCH_MEMORY) -+ return true; -+ -+ /* Handle direct modifications of global state. */ -+ return flags & (CP_WRITE_MEMORY | CP_WRITE_FFR); -+} -+ -+/* Return true if calls to the function could raise a signal. */ -+bool -+function_instance::could_trap_p () const -+{ -+ unsigned int flags = call_properties (); -+ -+ /* Handle functions that could raise SIGFPE. */ -+ if (flags & CP_RAISE_FP_EXCEPTIONS) -+ return true; -+ -+ /* Handle functions that could raise SIGBUS or SIGSEGV. */ -+ if (flags & (CP_READ_MEMORY | CP_WRITE_MEMORY)) -+ return true; -+ -+ return false; -+} -+ -+inline hashval_t -+registered_function_hasher::hash (value_type value) -+{ -+ return value->instance.hash (); -+} -+ -+inline bool -+registered_function_hasher::equal (value_type value, const compare_type &key) -+{ -+ return value->instance == key; -+} -+ -+sve_switcher::sve_switcher () -+ : m_old_isa_flags (aarch64_isa_flags) -+{ -+ /* Changing the ISA flags and have_regs_of_mode should be enough here. -+ We shouldn't need to pay the compile-time cost of a full target -+ switch. */ -+ aarch64_isa_flags = (AARCH64_FL_FP | AARCH64_FL_SIMD | AARCH64_FL_F16 -+ | AARCH64_FL_SVE); -+ -+ memcpy (m_old_have_regs_of_mode, have_regs_of_mode, -+ sizeof (have_regs_of_mode)); -+ for (int i = 0; i < NUM_MACHINE_MODES; ++i) -+ if (aarch64_sve_mode_p ((machine_mode) i)) -+ have_regs_of_mode[i] = true; -+} -+ -+sve_switcher::~sve_switcher () -+{ -+ memcpy (have_regs_of_mode, m_old_have_regs_of_mode, -+ sizeof (have_regs_of_mode)); -+ aarch64_isa_flags = m_old_isa_flags; -+} -+ -+function_builder::function_builder () -+{ -+ m_overload_type = build_function_type (void_type_node, void_list_node); -+ m_direct_overloads = lang_GNU_CXX (); -+ gcc_obstack_init (&m_string_obstack); -+} -+ -+function_builder::~function_builder () -+{ -+ obstack_free (&m_string_obstack, NULL); -+} -+ -+/* Add NAME to the end of the function name being built. */ -+void -+function_builder::append_name (const char *name) -+{ -+ obstack_grow (&m_string_obstack, name, strlen (name)); -+} -+ -+/* Zero-terminate and complete the function name being built. */ -+char * -+function_builder::finish_name () -+{ -+ obstack_1grow (&m_string_obstack, 0); -+ return (char *) obstack_finish (&m_string_obstack); -+} -+ -+/* Return the overloaded or full function name for INSTANCE; OVERLOADED_P -+ selects which. Allocate the string on m_string_obstack; the caller -+ must use obstack_free to free it after use. */ -+char * -+function_builder::get_name (const function_instance &instance, -+ bool overloaded_p) -+{ -+ append_name (instance.base_name); -+ if (overloaded_p) -+ switch (instance.displacement_units ()) -+ { -+ case UNITS_none: -+ break; -+ -+ case UNITS_bytes: -+ append_name ("_offset"); -+ break; -+ -+ case UNITS_elements: -+ append_name ("_index"); -+ break; -+ -+ case UNITS_vectors: -+ append_name ("_vnum"); -+ break; -+ } -+ else -+ append_name (instance.mode_suffix ().string); -+ for (unsigned int i = 0; i < 2; ++i) -+ if (!overloaded_p || instance.shape->explicit_type_suffix_p (i)) -+ append_name (instance.type_suffix (i).string); -+ append_name (pred_suffixes[instance.pred]); -+ return finish_name (); -+} -+ -+/* Add attribute NAME to ATTRS. */ -+static tree -+add_attribute (const char *name, tree attrs) -+{ -+ return tree_cons (get_identifier (name), NULL_TREE, attrs); -+} -+ -+/* Return the appropriate function attributes for INSTANCE. */ -+tree -+function_builder::get_attributes (const function_instance &instance) -+{ -+ tree attrs = NULL_TREE; -+ -+ if (!instance.modifies_global_state_p ()) -+ { -+ if (instance.reads_global_state_p ()) -+ attrs = add_attribute ("pure", attrs); -+ else -+ attrs = add_attribute ("const", attrs); -+ } -+ -+ if (!flag_non_call_exceptions || !instance.could_trap_p ()) -+ attrs = add_attribute ("nothrow", attrs); -+ -+ return add_attribute ("leaf", attrs); -+} -+ -+/* Add a function called NAME with type FNTYPE and attributes ATTRS. -+ INSTANCE describes what the function does and OVERLOADED_P indicates -+ whether it is overloaded. REQUIRED_EXTENSIONS are the set of -+ architecture extensions that the function requires. */ -+registered_function & -+function_builder::add_function (const function_instance &instance, -+ const char *name, tree fntype, tree attrs, -+ uint64_t required_extensions, -+ bool overloaded_p) -+{ -+ unsigned int code = vec_safe_length (registered_functions); -+ code = (code << AARCH64_BUILTIN_SHIFT) | AARCH64_BUILTIN_SVE; -+ tree decl = simulate_builtin_function_decl (input_location, name, fntype, -+ code, NULL, attrs); -+ -+ registered_function &rfn = *ggc_alloc (); -+ rfn.instance = instance; -+ rfn.decl = decl; -+ rfn.required_extensions = required_extensions; -+ rfn.overloaded_p = overloaded_p; -+ vec_safe_push (registered_functions, &rfn); -+ -+ return rfn; -+} -+ -+/* Add a built-in function for INSTANCE, with the argument types given -+ by ARGUMENT_TYPES and the return type given by RETURN_TYPE. -+ REQUIRED_EXTENSIONS are the set of architecture extensions that the -+ function requires. FORCE_DIRECT_OVERLOADS is true if there is a -+ one-to-one mapping between "short" and "full" names, and if standard -+ overload resolution therefore isn't necessary. */ -+void -+function_builder::add_unique_function (const function_instance &instance, -+ tree return_type, -+ vec &argument_types, -+ uint64_t required_extensions, -+ bool force_direct_overloads) -+{ -+ /* Add the function under its full (unique) name. */ -+ char *name = get_name (instance, false); -+ tree fntype = build_function_type_array (return_type, -+ argument_types.length (), -+ argument_types.address ()); -+ tree attrs = get_attributes (instance); -+ registered_function &rfn = add_function (instance, name, fntype, attrs, -+ required_extensions, false); -+ -+ /* Enter the function into the hash table. */ -+ hashval_t hash = instance.hash (); -+ registered_function **rfn_slot -+ = function_table->find_slot_with_hash (instance, hash, INSERT); -+ gcc_assert (!*rfn_slot); -+ *rfn_slot = &rfn; -+ -+ /* Also add the function under its overloaded alias, if we want -+ a separate decl for each instance of an overloaded function. */ -+ if (m_direct_overloads || force_direct_overloads) -+ { -+ char *overload_name = get_name (instance, true); -+ if (strcmp (name, overload_name) != 0) -+ { -+ /* Attribute lists shouldn't be shared. */ -+ tree attrs = get_attributes (instance); -+ add_function (instance, overload_name, fntype, attrs, -+ required_extensions, false); -+ } -+ } -+ -+ obstack_free (&m_string_obstack, name); -+} -+ -+/* Add one function decl for INSTANCE, to be used with manual overload -+ resolution. REQUIRED_EXTENSIONS are the set of architecture extensions -+ that the function requires. -+ -+ For simplicity, deal with duplicate attempts to add the same function, -+ including cases in which the new function requires more features than -+ the original one did. In that case we'll check whether the required -+ features are available as part of resolving the function to the -+ relevant unique function. */ -+void -+function_builder::add_overloaded_function (const function_instance &instance, -+ uint64_t required_extensions) -+{ -+ char *name = get_name (instance, true); -+ if (registered_function **map_value = m_overload_names.get (name)) -+ gcc_assert ((*map_value)->instance == instance -+ && ((*map_value)->required_extensions -+ & ~required_extensions) == 0); -+ else -+ { -+ registered_function &rfn -+ = add_function (instance, name, m_overload_type, NULL_TREE, -+ required_extensions, true); -+ const char *permanent_name = IDENTIFIER_POINTER (DECL_NAME (rfn.decl)); -+ m_overload_names.put (permanent_name, &rfn); -+ } -+ obstack_free (&m_string_obstack, name); -+} -+ -+/* If we are using manual overload resolution, add one function decl -+ for each overloaded function in GROUP. Take the function base name -+ from GROUP and the mode from MODE. */ -+void -+function_builder::add_overloaded_functions (const function_group_info &group, -+ mode_suffix_index mode) -+{ -+ if (m_direct_overloads) -+ return; -+ -+ unsigned int explicit_type0 = (*group.shape)->explicit_type_suffix_p (0); -+ unsigned int explicit_type1 = (*group.shape)->explicit_type_suffix_p (1); -+ for (unsigned int pi = 0; group.preds[pi] != NUM_PREDS; ++pi) -+ { -+ if (!explicit_type0 && !explicit_type1) -+ { -+ /* Deal with the common case in which there is one overloaded -+ function for all type combinations. */ -+ function_instance instance (group.base_name, *group.base, -+ *group.shape, mode, types_none[0], -+ group.preds[pi]); -+ add_overloaded_function (instance, group.required_extensions); -+ } -+ else -+ for (unsigned int ti = 0; group.types[ti][0] != NUM_TYPE_SUFFIXES; -+ ++ti) -+ { -+ /* Stub out the types that are determined by overload -+ resolution. */ -+ type_suffix_pair types = { -+ explicit_type0 ? group.types[ti][0] : NUM_TYPE_SUFFIXES, -+ explicit_type1 ? group.types[ti][1] : NUM_TYPE_SUFFIXES -+ }; -+ function_instance instance (group.base_name, *group.base, -+ *group.shape, mode, types, -+ group.preds[pi]); -+ add_overloaded_function (instance, group.required_extensions); -+ } -+ } -+} -+ -+/* Register all the functions in GROUP. */ -+void -+function_builder::register_function_group (const function_group_info &group) -+{ -+ (*group.shape)->build (*this, group); -+} -+ -+function_call_info::function_call_info (location_t location_in, -+ const function_instance &instance_in, -+ tree fndecl_in) -+ : function_instance (instance_in), location (location_in), fndecl (fndecl_in) -+{ -+} -+ -+function_resolver::function_resolver (location_t location, -+ const function_instance &instance, -+ tree fndecl, vec &arglist) -+ : function_call_info (location, instance, fndecl), m_arglist (arglist) -+{ -+} -+ -+/* Return the vector type associated with type suffix TYPE. */ -+tree -+function_resolver::get_vector_type (type_suffix_index type) -+{ -+ return acle_vector_types[0][type_suffixes[type].vector_type]; -+} -+ -+/* Return the name associated with TYPE. Using the -+ name should be more user-friendly than the underlying canonical type, -+ since it makes the signedness and bitwidth explicit. */ -+const char * -+function_resolver::get_scalar_type_name (type_suffix_index type) -+{ -+ return vector_types[type_suffixes[type].vector_type].acle_name + 2; -+} -+ -+/* Return the type of argument I, or error_mark_node if it isn't -+ well-formed. */ -+tree -+function_resolver::get_argument_type (unsigned int i) -+{ -+ tree arg = m_arglist[i]; -+ return arg == error_mark_node ? arg : TREE_TYPE (arg); -+} -+ -+/* Return true if argument I is some form of scalar value. */ -+bool -+function_resolver::scalar_argument_p (unsigned int i) -+{ -+ tree type = get_argument_type (i); -+ return (INTEGRAL_TYPE_P (type) -+ /* Allow pointer types, leaving the frontend to warn where -+ necessary. */ -+ || POINTER_TYPE_P (type) -+ || SCALAR_FLOAT_TYPE_P (type)); -+} -+ -+/* Report that the function has no form that takes type suffix TYPE. -+ Return error_mark_node. */ -+tree -+function_resolver::report_no_such_form (type_suffix_index type) -+{ -+ error_at (location, "%qE has no form that takes %qT arguments", -+ fndecl, get_vector_type (type)); -+ return error_mark_node; -+} -+ -+/* Silently check whether there is an instance of the function with the -+ mode suffix given by MODE and the type suffixes given by TYPE0 and TYPE1. -+ Return its function decl if so, otherwise return null. */ -+tree -+function_resolver::lookup_form (mode_suffix_index mode, -+ type_suffix_index type0, -+ type_suffix_index type1) -+{ -+ type_suffix_pair types = { type0, type1 }; -+ function_instance instance (base_name, base, shape, mode, types, pred); -+ registered_function *rfn -+ = function_table->find_with_hash (instance, instance.hash ()); -+ return rfn ? rfn->decl : NULL_TREE; -+} -+ -+/* Resolve the function to one with the mode suffix given by MODE and the -+ type suffixes given by TYPE0 and TYPE1. Return its function decl on -+ success, otherwise report an error and return error_mark_node. */ -+tree -+function_resolver::resolve_to (mode_suffix_index mode, -+ type_suffix_index type0, -+ type_suffix_index type1) -+{ -+ tree res = lookup_form (mode, type0, type1); -+ if (!res) -+ { -+ if (type1 == NUM_TYPE_SUFFIXES) -+ return report_no_such_form (type0); -+ if (type0 == type_suffix_ids[0]) -+ return report_no_such_form (type1); -+ /* To be filled in when we have other cases. */ -+ gcc_unreachable (); -+ } -+ return res; -+} -+ -+/* Require argument ARGNO to be a 32-bit or 64-bit scalar integer type. -+ Return the associated type suffix on success, otherwise report an -+ error and return NUM_TYPE_SUFFIXES. */ -+type_suffix_index -+function_resolver::infer_integer_scalar_type (unsigned int argno) -+{ -+ tree actual = get_argument_type (argno); -+ if (actual == error_mark_node) -+ return NUM_TYPE_SUFFIXES; -+ -+ /* Allow enums and booleans to decay to integers, for compatibility -+ with C++ overloading rules. */ -+ if (INTEGRAL_TYPE_P (actual)) -+ { -+ bool uns_p = TYPE_UNSIGNED (actual); -+ /* Honor the usual integer promotions, so that resolution works -+ in the same way as for C++. */ -+ if (TYPE_PRECISION (actual) < 32) -+ return TYPE_SUFFIX_s32; -+ if (TYPE_PRECISION (actual) == 32) -+ return uns_p ? TYPE_SUFFIX_u32 : TYPE_SUFFIX_s32; -+ if (TYPE_PRECISION (actual) == 64) -+ return uns_p ? TYPE_SUFFIX_u64 : TYPE_SUFFIX_s64; -+ } -+ -+ error_at (location, "passing %qT to argument %d of %qE, which expects" -+ " a 32-bit or 64-bit integer type", actual, argno + 1, fndecl); -+ return NUM_TYPE_SUFFIXES; -+} -+ -+/* Require argument ARGNO to be a pointer to a scalar type that has a -+ corresponding type suffix. Return that type suffix on success, -+ otherwise report an error and return NUM_TYPE_SUFFIXES. -+ GATHER_SCATTER_P is true if the function is a gather/scatter -+ operation, and so requires a pointer to 32-bit or 64-bit data. */ -+type_suffix_index -+function_resolver::infer_pointer_type (unsigned int argno, -+ bool gather_scatter_p) -+{ -+ tree actual = get_argument_type (argno); -+ if (actual == error_mark_node) -+ return NUM_TYPE_SUFFIXES; -+ -+ if (TREE_CODE (actual) != POINTER_TYPE) -+ { -+ error_at (location, "passing %qT to argument %d of %qE, which" -+ " expects a pointer type", actual, argno + 1, fndecl); -+ if (VECTOR_TYPE_P (actual) && gather_scatter_p) -+ inform (location, "an explicit type suffix is needed" -+ " when using a vector of base addresses"); -+ return NUM_TYPE_SUFFIXES; -+ } -+ -+ tree target = TREE_TYPE (actual); -+ type_suffix_index type = find_type_suffix_for_scalar_type (target); -+ if (type == NUM_TYPE_SUFFIXES) -+ { -+ error_at (location, "passing %qT to argument %d of %qE, but %qT is not" -+ " a valid SVE element type", actual, argno + 1, fndecl, -+ build_qualified_type (target, 0)); -+ return NUM_TYPE_SUFFIXES; -+ } -+ unsigned int bits = type_suffixes[type].element_bits; -+ if (gather_scatter_p && bits != 32 && bits != 64) -+ { -+ error_at (location, "passing %qT to argument %d of %qE, which" -+ " expects a pointer to 32-bit or 64-bit elements", -+ actual, argno + 1, fndecl); -+ return NUM_TYPE_SUFFIXES; -+ } -+ -+ return type; -+} -+ -+/* Require argument ARGNO to be a single vector or a tuple of NUM_VECTORS -+ vectors; NUM_VECTORS is 1 for the former. Return the associated type -+ suffix on success, using TYPE_SUFFIX_b for predicates. Report an error -+ and return NUM_TYPE_SUFFIXES on failure. */ -+type_suffix_index -+function_resolver::infer_vector_or_tuple_type (unsigned int argno, -+ unsigned int num_vectors) -+{ -+ tree actual = get_argument_type (argno); -+ if (actual == error_mark_node) -+ return NUM_TYPE_SUFFIXES; -+ -+ /* A linear search should be OK here, since the code isn't hot and -+ the number of types is only small. */ -+ for (unsigned int size_i = 0; size_i < MAX_TUPLE_SIZE; ++size_i) -+ for (unsigned int suffix_i = 0; suffix_i < NUM_TYPE_SUFFIXES; ++suffix_i) -+ { -+ vector_type_index type_i = type_suffixes[suffix_i].vector_type; -+ tree type = acle_vector_types[size_i][type_i]; -+ if (type && TYPE_MAIN_VARIANT (actual) == TYPE_MAIN_VARIANT (type)) -+ { -+ if (size_i + 1 == num_vectors) -+ return type_suffix_index (suffix_i); -+ -+ if (num_vectors == 1) -+ error_at (location, "passing %qT to argument %d of %qE, which" -+ " expects a single SVE vector rather than a tuple", -+ actual, argno + 1, fndecl); -+ else if (size_i == 0 && type_i != VECTOR_TYPE_svbool_t) -+ error_at (location, "passing single vector %qT to argument %d" -+ " of %qE, which expects a tuple of %d vectors", -+ actual, argno + 1, fndecl, num_vectors); -+ else -+ error_at (location, "passing %qT to argument %d of %qE, which" -+ " expects a tuple of %d vectors", actual, argno + 1, -+ fndecl, num_vectors); -+ return NUM_TYPE_SUFFIXES; -+ } -+ } -+ -+ if (num_vectors == 1) -+ error_at (location, "passing %qT to argument %d of %qE, which" -+ " expects an SVE vector type", actual, argno + 1, fndecl); -+ else -+ error_at (location, "passing %qT to argument %d of %qE, which" -+ " expects an SVE tuple type", actual, argno + 1, fndecl); -+ return NUM_TYPE_SUFFIXES; -+} -+ -+/* Require argument ARGNO to have some form of vector type. Return the -+ associated type suffix on success, using TYPE_SUFFIX_b for predicates. -+ Report an error and return NUM_TYPE_SUFFIXES on failure. */ -+type_suffix_index -+function_resolver::infer_vector_type (unsigned int argno) -+{ -+ return infer_vector_or_tuple_type (argno, 1); -+} -+ -+/* Like infer_vector_type, but also require the type to be integral. */ -+type_suffix_index -+function_resolver::infer_integer_vector_type (unsigned int argno) -+{ -+ type_suffix_index type = infer_vector_type (argno); -+ if (type == NUM_TYPE_SUFFIXES) -+ return type; -+ -+ if (!type_suffixes[type].integer_p) -+ { -+ error_at (location, "passing %qT to argument %d of %qE, which" -+ " expects a vector of integers", get_argument_type (argno), -+ argno + 1, fndecl); -+ return NUM_TYPE_SUFFIXES; -+ } -+ -+ return type; -+} -+ -+/* Like infer_vector_type, but also require the type to be an unsigned -+ integer. */ -+type_suffix_index -+function_resolver::infer_unsigned_vector_type (unsigned int argno) -+{ -+ type_suffix_index type = infer_vector_type (argno); -+ if (type == NUM_TYPE_SUFFIXES) -+ return type; -+ -+ if (!type_suffixes[type].unsigned_p) -+ { -+ error_at (location, "passing %qT to argument %d of %qE, which" -+ " expects a vector of unsigned integers", -+ get_argument_type (argno), argno + 1, fndecl); -+ return NUM_TYPE_SUFFIXES; -+ } -+ -+ return type; -+} -+ -+/* Like infer_vector_type, but also require the element size to be -+ 32 or 64 bits. */ -+type_suffix_index -+function_resolver::infer_sd_vector_type (unsigned int argno) -+{ -+ type_suffix_index type = infer_vector_type (argno); -+ if (type == NUM_TYPE_SUFFIXES) -+ return type; -+ -+ unsigned int bits = type_suffixes[type].element_bits; -+ if (bits != 32 && bits != 64) -+ { -+ error_at (location, "passing %qT to argument %d of %qE, which" -+ " expects a vector of 32-bit or 64-bit elements", -+ get_argument_type (argno), argno + 1, fndecl); -+ return NUM_TYPE_SUFFIXES; -+ } -+ -+ return type; -+} -+ -+/* If the function operates on tuples of vectors, require argument ARGNO to be -+ a tuple with the appropriate number of vectors, otherwise require it to be -+ a single vector. Return the associated type suffix on success, using -+ TYPE_SUFFIX_b for predicates. Report an error and return NUM_TYPE_SUFFIXES -+ on failure. */ -+type_suffix_index -+function_resolver::infer_tuple_type (unsigned int argno) -+{ -+ return infer_vector_or_tuple_type (argno, vectors_per_tuple ()); -+} -+ -+/* Require argument ARGNO to be a vector or scalar argument. Return true -+ if it is, otherwise report an appropriate error. */ -+bool -+function_resolver::require_vector_or_scalar_type (unsigned int argno) -+{ -+ tree actual = get_argument_type (argno); -+ if (actual == error_mark_node) -+ return false; -+ -+ if (!scalar_argument_p (argno) && !VECTOR_TYPE_P (actual)) -+ { -+ error_at (location, "passing %qT to argument %d of %qE, which" -+ " expects a vector or scalar type", actual, argno + 1, fndecl); -+ return false; -+ } -+ -+ return true; -+} -+ -+/* Require argument ARGNO to have vector type TYPE, in cases where this -+ requirement holds for all uses of the function. Return true if the -+ argument has the right form, otherwise report an appropriate error. */ -+bool -+function_resolver::require_vector_type (unsigned int argno, -+ vector_type_index type) -+{ -+ tree expected = acle_vector_types[0][type]; -+ tree actual = get_argument_type (argno); -+ if (actual != error_mark_node -+ && TYPE_MAIN_VARIANT (expected) != TYPE_MAIN_VARIANT (actual)) -+ { -+ error_at (location, "passing %qT to argument %d of %qE, which" -+ " expects %qT", actual, argno + 1, fndecl, expected); -+ return false; -+ } -+ return true; -+} -+ -+/* Like require_vector_type, but TYPE is inferred from previous arguments -+ rather than being a fixed part of the function signature. This changes -+ the nature of the error messages. */ -+bool -+function_resolver::require_matching_vector_type (unsigned int argno, -+ type_suffix_index type) -+{ -+ type_suffix_index new_type = infer_vector_type (argno); -+ if (new_type == NUM_TYPE_SUFFIXES) -+ return false; -+ -+ if (type != new_type) -+ { -+ error_at (location, "passing %qT to argument %d of %qE, but" -+ " previous arguments had type %qT", -+ get_vector_type (new_type), argno + 1, fndecl, -+ get_vector_type (type)); -+ return false; -+ } -+ return true; -+} -+ -+/* Require argument ARGNO to be a vector type with the following properties: -+ -+ - the type class must be the same as FIRST_TYPE's if EXPECTED_TCLASS -+ is SAME_TYPE_CLASS, otherwise it must be EXPECTED_TCLASS itself. -+ -+ - the element size must be: -+ -+ - the same as FIRST_TYPE's if EXPECTED_BITS == SAME_SIZE -+ - half of FIRST_TYPE's if EXPECTED_BITS == HALF_SIZE -+ - a quarter of FIRST_TYPE's if EXPECTED_BITS == QUARTER_SIZE -+ - EXPECTED_BITS itself otherwise -+ -+ Return true if the argument has the required type, otherwise report -+ an appropriate error. -+ -+ FIRST_ARGNO is the first argument that is known to have type FIRST_TYPE. -+ Usually it comes before ARGNO, but sometimes it is more natural to resolve -+ arguments out of order. -+ -+ If the required properties depend on FIRST_TYPE then both FIRST_ARGNO and -+ ARGNO contribute to the resolution process. If the required properties -+ are fixed, only FIRST_ARGNO contributes to the resolution process. -+ -+ This function is a bit of a Swiss army knife. The complication comes -+ from trying to give good error messages when FIRST_ARGNO and ARGNO are -+ inconsistent, since either of them might be wrong. */ -+bool function_resolver:: -+require_derived_vector_type (unsigned int argno, -+ unsigned int first_argno, -+ type_suffix_index first_type, -+ type_class_index expected_tclass, -+ unsigned int expected_bits) -+{ -+ /* If the type needs to match FIRST_ARGNO exactly, use the preferred -+ error message for that case. The VECTOR_TYPE_P test excludes tuple -+ types, which we handle below instead. */ -+ bool both_vectors_p = VECTOR_TYPE_P (get_argument_type (first_argno)); -+ if (both_vectors_p -+ && expected_tclass == SAME_TYPE_CLASS -+ && expected_bits == SAME_SIZE) -+ { -+ /* There's no need to resolve this case out of order. */ -+ gcc_assert (argno > first_argno); -+ return require_matching_vector_type (argno, first_type); -+ } -+ -+ /* Use FIRST_TYPE to get the expected type class and element size. */ -+ type_class_index orig_expected_tclass = expected_tclass; -+ if (expected_tclass == NUM_TYPE_CLASSES) -+ expected_tclass = type_suffixes[first_type].tclass; -+ -+ unsigned int orig_expected_bits = expected_bits; -+ if (expected_bits == SAME_SIZE) -+ expected_bits = type_suffixes[first_type].element_bits; -+ else if (expected_bits == HALF_SIZE) -+ expected_bits = type_suffixes[first_type].element_bits / 2; -+ else if (expected_bits == QUARTER_SIZE) -+ expected_bits = type_suffixes[first_type].element_bits / 4; -+ -+ /* If the expected type doesn't depend on FIRST_TYPE at all, -+ just check for the fixed choice of vector type. */ -+ if (expected_tclass == orig_expected_tclass -+ && expected_bits == orig_expected_bits) -+ { -+ const type_suffix_info &expected_suffix -+ = type_suffixes[find_type_suffix (expected_tclass, expected_bits)]; -+ return require_vector_type (argno, expected_suffix.vector_type); -+ } -+ -+ /* Require the argument to be some form of SVE vector type, -+ without being specific about the type of vector we want. */ -+ type_suffix_index actual_type = infer_vector_type (argno); -+ if (actual_type == NUM_TYPE_SUFFIXES) -+ return false; -+ -+ /* Exit now if we got the right type. */ -+ bool tclass_ok_p = (type_suffixes[actual_type].tclass == expected_tclass); -+ bool size_ok_p = (type_suffixes[actual_type].element_bits == expected_bits); -+ if (tclass_ok_p && size_ok_p) -+ return true; -+ -+ /* First look for cases in which the actual type contravenes a fixed -+ size requirement, without having to refer to FIRST_TYPE. */ -+ if (!size_ok_p && expected_bits == orig_expected_bits) -+ { -+ error_at (location, "passing %qT to argument %d of %qE, which" -+ " expects a vector of %d-bit elements", -+ get_vector_type (actual_type), argno + 1, fndecl, -+ expected_bits); -+ return false; -+ } -+ -+ /* Likewise for a fixed type class requirement. This is only ever -+ needed for signed and unsigned types, so don't create unnecessary -+ translation work for other type classes. */ -+ if (!tclass_ok_p && orig_expected_tclass == TYPE_signed) -+ { -+ error_at (location, "passing %qT to argument %d of %qE, which" -+ " expects a vector of signed integers", -+ get_vector_type (actual_type), argno + 1, fndecl); -+ return false; -+ } -+ if (!tclass_ok_p && orig_expected_tclass == TYPE_unsigned) -+ { -+ error_at (location, "passing %qT to argument %d of %qE, which" -+ " expects a vector of unsigned integers", -+ get_vector_type (actual_type), argno + 1, fndecl); -+ return false; -+ } -+ -+ /* Make sure that FIRST_TYPE itself is sensible before using it -+ as a basis for an error message. */ -+ if (resolve_to (mode_suffix_id, first_type) == error_mark_node) -+ return false; -+ -+ /* If the arguments have consistent type classes, but a link between -+ the sizes has been broken, try to describe the error in those terms. */ -+ if (both_vectors_p && tclass_ok_p && orig_expected_bits == SAME_SIZE) -+ { -+ if (argno < first_argno) -+ { -+ std::swap (argno, first_argno); -+ std::swap (actual_type, first_type); -+ } -+ error_at (location, "arguments %d and %d of %qE must have the" -+ " same element size, but the values passed here have type" -+ " %qT and %qT respectively", first_argno + 1, argno + 1, -+ fndecl, get_vector_type (first_type), -+ get_vector_type (actual_type)); -+ return false; -+ } -+ -+ /* Likewise in reverse: look for cases in which the sizes are consistent -+ but a link between the type classes has been broken. */ -+ if (both_vectors_p -+ && size_ok_p -+ && orig_expected_tclass == SAME_TYPE_CLASS -+ && type_suffixes[first_type].integer_p -+ && type_suffixes[actual_type].integer_p) -+ { -+ if (argno < first_argno) -+ { -+ std::swap (argno, first_argno); -+ std::swap (actual_type, first_type); -+ } -+ error_at (location, "arguments %d and %d of %qE must have the" -+ " same signedness, but the values passed here have type" -+ " %qT and %qT respectively", first_argno + 1, argno + 1, -+ fndecl, get_vector_type (first_type), -+ get_vector_type (actual_type)); -+ return false; -+ } -+ -+ /* The two arguments are wildly inconsistent. */ -+ type_suffix_index expected_type -+ = find_type_suffix (expected_tclass, expected_bits); -+ error_at (location, "passing %qT instead of the expected %qT to argument" -+ " %d of %qE, after passing %qT to argument %d", -+ get_vector_type (actual_type), get_vector_type (expected_type), -+ argno + 1, fndecl, get_argument_type (first_argno), -+ first_argno + 1); -+ return false; -+} -+ -+/* Require argument ARGNO to match argument FIRST_ARGNO, which was inferred -+ to be a pointer to a scalar element of type TYPE. */ -+bool -+function_resolver::require_matching_pointer_type (unsigned int argno, -+ unsigned int first_argno, -+ type_suffix_index type) -+{ -+ type_suffix_index new_type = infer_pointer_type (argno); -+ if (new_type == NUM_TYPE_SUFFIXES) -+ return false; -+ -+ if (type != new_type) -+ { -+ error_at (location, "passing %qT to argument %d of %qE, but" -+ " argument %d had type %qT", get_argument_type (argno), -+ argno + 1, fndecl, first_argno + 1, -+ get_argument_type (first_argno)); -+ return false; -+ } -+ return true; -+} -+ -+/* Require argument ARGNO to be a (possibly variable) scalar, using EXPECTED -+ as the name of its expected type. Return true if the argument has the -+ right form, otherwise report an appropriate error. */ -+bool -+function_resolver::require_scalar_type (unsigned int argno, -+ const char *expected) -+{ -+ if (!scalar_argument_p (argno)) -+ { -+ error_at (location, "passing %qT to argument %d of %qE, which" -+ " expects %qs", get_argument_type (argno), argno + 1, -+ fndecl, expected); -+ return false; -+ } -+ return true; -+} -+ -+/* Require argument ARGNO to be some form of pointer, without being specific -+ about its target type. Return true if the argument has the right form, -+ otherwise report an appropriate error. */ -+bool -+function_resolver::require_pointer_type (unsigned int argno) -+{ -+ if (!scalar_argument_p (argno)) -+ { -+ error_at (location, "passing %qT to argument %d of %qE, which" -+ " expects a scalar pointer", get_argument_type (argno), -+ argno + 1, fndecl); -+ return false; -+ } -+ return true; -+} -+ -+/* Argument FIRST_ARGNO is a scalar with type EXPECTED_TYPE, and argument -+ ARGNO should be consistent with it. Return true if it is, otherwise -+ report an appropriate error. */ -+bool function_resolver:: -+require_matching_integer_scalar_type (unsigned int argno, -+ unsigned int first_argno, -+ type_suffix_index expected_type) -+{ -+ type_suffix_index actual_type = infer_integer_scalar_type (argno); -+ if (actual_type == NUM_TYPE_SUFFIXES) -+ return false; -+ -+ if (actual_type == expected_type) -+ return true; -+ -+ error_at (location, "call to %qE is ambiguous; argument %d has type" -+ " %qs but argument %d has type %qs", fndecl, -+ first_argno + 1, get_scalar_type_name (expected_type), -+ argno + 1, get_scalar_type_name (actual_type)); -+ return false; -+} -+ -+/* Require argument ARGNO to be a (possibly variable) scalar, expecting it -+ to have the following properties: -+ -+ - the type class must be the same as for type suffix 0 if EXPECTED_TCLASS -+ is SAME_TYPE_CLASS, otherwise it must be EXPECTED_TCLASS itself. -+ -+ - the element size must be the same as for type suffix 0 if EXPECTED_BITS -+ is SAME_TYPE_SIZE, otherwise it must be EXPECTED_BITS itself. -+ -+ Return true if the argument is valid, otherwise report an appropriate error. -+ -+ Note that we don't check whether the scalar type actually has the required -+ properties, since that's subject to implicit promotions and conversions. -+ Instead we just use the expected properties to tune the error message. */ -+bool function_resolver:: -+require_derived_scalar_type (unsigned int argno, -+ type_class_index expected_tclass, -+ unsigned int expected_bits) -+{ -+ gcc_assert (expected_tclass == SAME_TYPE_CLASS -+ || expected_tclass == TYPE_signed -+ || expected_tclass == TYPE_unsigned); -+ -+ /* If the expected type doesn't depend on the type suffix at all, -+ just check for the fixed choice of scalar type. */ -+ if (expected_tclass != SAME_TYPE_CLASS && expected_bits != SAME_SIZE) -+ { -+ type_suffix_index expected_type -+ = find_type_suffix (expected_tclass, expected_bits); -+ return require_scalar_type (argno, get_scalar_type_name (expected_type)); -+ } -+ -+ if (scalar_argument_p (argno)) -+ return true; -+ -+ if (expected_tclass == SAME_TYPE_CLASS) -+ /* It doesn't really matter whether the element is expected to be -+ the same size as type suffix 0. */ -+ error_at (location, "passing %qT to argument %d of %qE, which" -+ " expects a scalar element", get_argument_type (argno), -+ argno + 1, fndecl); -+ else -+ /* It doesn't seem useful to distinguish between signed and unsigned -+ scalars here. */ -+ error_at (location, "passing %qT to argument %d of %qE, which" -+ " expects a scalar integer", get_argument_type (argno), -+ argno + 1, fndecl); -+ return false; -+} -+ -+/* Require argument ARGNO to be suitable for an integer constant expression. -+ Return true if it is, otherwise report an appropriate error. -+ -+ function_checker checks whether the argument is actually constant and -+ has a suitable range. The reason for distinguishing immediate arguments -+ here is because it provides more consistent error messages than -+ require_scalar_type would. */ -+bool -+function_resolver::require_integer_immediate (unsigned int argno) -+{ -+ if (!scalar_argument_p (argno)) -+ { -+ report_non_ice (location, fndecl, argno); -+ return false; -+ } -+ return true; -+} -+ -+/* Require argument ARGNO to be a vector base in a gather-style address. -+ Return its type on success, otherwise return NUM_VECTOR_TYPES. */ -+vector_type_index -+function_resolver::infer_vector_base_type (unsigned int argno) -+{ -+ type_suffix_index type = infer_vector_type (argno); -+ if (type == NUM_TYPE_SUFFIXES) -+ return NUM_VECTOR_TYPES; -+ -+ if (type == TYPE_SUFFIX_u32 || type == TYPE_SUFFIX_u64) -+ return type_suffixes[type].vector_type; -+ -+ error_at (location, "passing %qT to argument %d of %qE, which" -+ " expects %qs or %qs", get_argument_type (argno), -+ argno + 1, fndecl, "svuint32_t", "svuint64_t"); -+ return NUM_VECTOR_TYPES; -+} -+ -+/* Require argument ARGNO to be a vector displacement in a gather-style -+ address. Return its type on success, otherwise return NUM_VECTOR_TYPES. */ -+vector_type_index -+function_resolver::infer_vector_displacement_type (unsigned int argno) -+{ -+ type_suffix_index type = infer_integer_vector_type (argno); -+ if (type == NUM_TYPE_SUFFIXES) -+ return NUM_VECTOR_TYPES; -+ -+ if (type_suffixes[type].integer_p -+ && (type_suffixes[type].element_bits == 32 -+ || type_suffixes[type].element_bits == 64)) -+ return type_suffixes[type].vector_type; -+ -+ error_at (location, "passing %qT to argument %d of %qE, which" -+ " expects a vector of 32-bit or 64-bit integers", -+ get_argument_type (argno), argno + 1, fndecl); -+ return NUM_VECTOR_TYPES; -+} -+ -+/* Require argument ARGNO to be a vector displacement in a gather-style -+ address. There are three possible uses: -+ -+ - for loading into elements of type TYPE (when LOAD_P is true) -+ - for storing from elements of type TYPE (when LOAD_P is false) -+ - for prefetching data (when TYPE is NUM_TYPE_SUFFIXES) -+ -+ The overloaded function's mode suffix determines the units of the -+ displacement (bytes for "_offset", elements for "_index"). -+ -+ Return the associated mode on success, otherwise report an error -+ and return MODE_none. */ -+mode_suffix_index -+function_resolver::resolve_sv_displacement (unsigned int argno, -+ type_suffix_index type, -+ bool load_p) -+{ -+ if (type == NUM_TYPE_SUFFIXES) -+ { -+ /* For prefetches, the base is a void pointer and the displacement -+ can be any valid offset or index type. */ -+ vector_type_index displacement_vector_type -+ = infer_vector_displacement_type (argno); -+ if (displacement_vector_type == NUM_VECTOR_TYPES) -+ return MODE_none; -+ -+ mode_suffix_index mode = find_mode_suffix (NUM_VECTOR_TYPES, -+ displacement_vector_type, -+ displacement_units ()); -+ gcc_assert (mode != MODE_none); -+ return mode; -+ } -+ -+ unsigned int required_bits = type_suffixes[type].element_bits; -+ if (required_bits == 32 -+ && displacement_units () == UNITS_elements -+ && !lookup_form (MODE_s32index, type) -+ && !lookup_form (MODE_u32index, type)) -+ { -+ if (lookup_form (MODE_u32base_index, type)) -+ { -+ if (type_suffix_ids[0] == NUM_TYPE_SUFFIXES) -+ { -+ gcc_assert (!load_p); -+ error_at (location, "when storing %qT, %qE requires a vector" -+ " base and a scalar index", get_vector_type (type), -+ fndecl); -+ } -+ else -+ error_at (location, "%qE requires a vector base and a scalar" -+ " index", fndecl); -+ } -+ else -+ error_at (location, "%qE does not support 32-bit vector type %qT", -+ fndecl, get_vector_type (type)); -+ return MODE_none; -+ } -+ -+ /* Check for some form of vector type, without naming any in particular -+ as being expected. */ -+ type_suffix_index displacement_type = infer_vector_type (argno); -+ if (displacement_type == NUM_TYPE_SUFFIXES) -+ return MODE_none; -+ -+ /* If the displacement type is consistent with the data vector type, -+ try to find the associated mode suffix. This will fall through -+ for non-integral displacement types. */ -+ if (type_suffixes[displacement_type].element_bits == required_bits) -+ { -+ vector_type_index displacement_vector_type -+ = type_suffixes[displacement_type].vector_type; -+ mode_suffix_index mode = find_mode_suffix (NUM_VECTOR_TYPES, -+ displacement_vector_type, -+ displacement_units ()); -+ if (mode != MODE_none) -+ { -+ if (mode == MODE_s32offset -+ && !lookup_form (mode, type) -+ && lookup_form (MODE_u32offset, type)) -+ { -+ if (type_suffix_ids[0] == NUM_TYPE_SUFFIXES) -+ error_at (location, "%qE does not support 32-bit sign-extended" -+ " offsets", fndecl); -+ else -+ error_at (location, "%qE does not support sign-extended" -+ " offsets", fndecl); -+ return MODE_none; -+ } -+ return mode; -+ } -+ } -+ -+ if (type_suffix_ids[0] == NUM_TYPE_SUFFIXES) -+ { -+ /* TYPE has been inferred rather than specified by the user, -+ so mention it in the error messages. */ -+ if (load_p) -+ error_at (location, "passing %qT to argument %d of %qE, which when" -+ " loading %qT expects a vector of %d-bit integers", -+ get_argument_type (argno), argno + 1, fndecl, -+ get_vector_type (type), required_bits); -+ else -+ error_at (location, "passing %qT to argument %d of %qE, which when" -+ " storing %qT expects a vector of %d-bit integers", -+ get_argument_type (argno), argno + 1, fndecl, -+ get_vector_type (type), required_bits); -+ } -+ else -+ /* TYPE is part of the function name. */ -+ error_at (location, "passing %qT to argument %d of %qE, which" -+ " expects a vector of %d-bit integers", -+ get_argument_type (argno), argno + 1, fndecl, required_bits); -+ return MODE_none; -+} -+ -+/* Require the arguments starting at ARGNO to form a gather-style address. -+ There are three possible uses: -+ -+ - for loading into elements of type TYPE (when LOAD_P is true) -+ - for storing from elements of type TYPE (when LOAD_P is false) -+ - for prefetching data (when TYPE is NUM_TYPE_SUFFIXES) -+ -+ The three possible addresses are: -+ -+ - a vector base with no displacement -+ - a vector base and a scalar displacement -+ - a scalar (pointer) base and a vector displacement -+ -+ The overloaded function's mode suffix determines whether there is -+ a displacement, and if so, what units it uses: -+ -+ - MODE_none: no displacement -+ - MODE_offset: the displacement is measured in bytes -+ - MODE_index: the displacement is measured in elements -+ -+ Return the mode of the non-overloaded function on success, otherwise -+ report an error and return MODE_none. */ -+mode_suffix_index -+function_resolver::resolve_gather_address (unsigned int argno, -+ type_suffix_index type, -+ bool load_p) -+{ -+ tree actual = get_argument_type (argno); -+ if (actual == error_mark_node) -+ return MODE_none; -+ -+ if (displacement_units () != UNITS_none) -+ { -+ /* Some form of displacement is needed. First handle a scalar -+ pointer base and a vector displacement. */ -+ if (scalar_argument_p (argno)) -+ /* Don't check the pointer type here, since there's only one valid -+ choice. Leave that to the frontend. */ -+ return resolve_sv_displacement (argno + 1, type, load_p); -+ -+ if (!VECTOR_TYPE_P (actual)) -+ { -+ error_at (location, "passing %qT to argument %d of %qE," -+ " which expects a vector or pointer base address", -+ actual, argno + 1, fndecl); -+ return MODE_none; -+ } -+ } -+ -+ /* Check for the correct choice of vector base type. */ -+ vector_type_index base_vector_type; -+ if (type == NUM_TYPE_SUFFIXES) -+ { -+ /* Since prefetches have no type suffix, there is a free choice -+ between 32-bit and 64-bit base addresses. */ -+ base_vector_type = infer_vector_base_type (argno); -+ if (base_vector_type == NUM_VECTOR_TYPES) -+ return MODE_none; -+ } -+ else -+ { -+ /* Check for some form of vector type, without saying which type -+ we expect. */ -+ type_suffix_index base_type = infer_vector_type (argno); -+ if (base_type == NUM_TYPE_SUFFIXES) -+ return MODE_none; -+ -+ /* Check whether the type is the right one. */ -+ unsigned int required_bits = type_suffixes[type].element_bits; -+ gcc_assert (required_bits == 32 || required_bits == 64); -+ type_suffix_index required_type = (required_bits == 32 -+ ? TYPE_SUFFIX_u32 -+ : TYPE_SUFFIX_u64); -+ if (required_type != base_type) -+ { -+ error_at (location, "passing %qT to argument %d of %qE," -+ " which expects %qT", actual, argno + 1, fndecl, -+ get_vector_type (required_type)); -+ return MODE_none; -+ } -+ base_vector_type = type_suffixes[base_type].vector_type; -+ } -+ -+ /* Check the scalar displacement, if any. */ -+ if (displacement_units () != UNITS_none -+ && !require_scalar_type (argno + 1, "int64_t")) -+ return MODE_none; -+ -+ /* Find the appropriate mode suffix. The checks above should have -+ weeded out all erroneous cases. */ -+ for (unsigned int mode_i = 0; mode_i < ARRAY_SIZE (mode_suffixes); ++mode_i) -+ { -+ const mode_suffix_info &mode = mode_suffixes[mode_i]; -+ if (mode.base_vector_type == base_vector_type -+ && mode.displacement_vector_type == NUM_VECTOR_TYPES -+ && mode.displacement_units == displacement_units ()) -+ return mode_suffix_index (mode_i); -+ } -+ -+ gcc_unreachable (); -+} -+ -+/* Require arguments ARGNO and ARGNO + 1 to form an ADR-style address, -+ i.e. one with a vector of base addresses and a vector of displacements. -+ The overloaded function's mode suffix determines the units of the -+ displacement (bytes for "_offset", elements for "_index"). -+ -+ Return the associated mode suffix on success, otherwise report -+ an error and return MODE_none. */ -+mode_suffix_index -+function_resolver::resolve_adr_address (unsigned int argno) -+{ -+ vector_type_index base_type = infer_vector_base_type (argno); -+ if (base_type == NUM_VECTOR_TYPES) -+ return MODE_none; -+ -+ vector_type_index displacement_type -+ = infer_vector_displacement_type (argno + 1); -+ if (displacement_type == NUM_VECTOR_TYPES) -+ return MODE_none; -+ -+ mode_suffix_index mode = find_mode_suffix (base_type, displacement_type, -+ displacement_units ()); -+ if (mode == MODE_none) -+ { -+ if (mode_suffix_id == MODE_offset) -+ error_at (location, "cannot combine a base of type %qT with" -+ " an offset of type %qT", -+ get_argument_type (argno), get_argument_type (argno + 1)); -+ else -+ error_at (location, "cannot combine a base of type %qT with" -+ " an index of type %qT", -+ get_argument_type (argno), get_argument_type (argno + 1)); -+ } -+ return mode; -+} -+ -+/* Require the function to have exactly EXPECTED arguments. Return true -+ if it does, otherwise report an appropriate error. */ -+bool -+function_resolver::check_num_arguments (unsigned int expected) -+{ -+ if (m_arglist.length () < expected) -+ error_at (location, "too few arguments to function %qE", fndecl); -+ else if (m_arglist.length () > expected) -+ error_at (location, "too many arguments to function %qE", fndecl); -+ return m_arglist.length () == expected; -+} -+ -+/* If the function is predicated, check that the first argument is a -+ suitable governing predicate. Also check that there are NOPS further -+ arguments after any governing predicate, but don't check what they are. -+ -+ Return true on success, otherwise report a suitable error. -+ When returning true: -+ -+ - set I to the number of the first unchecked argument. -+ - set NARGS to the total number of arguments. */ -+bool -+function_resolver::check_gp_argument (unsigned int nops, -+ unsigned int &i, unsigned int &nargs) -+{ -+ i = 0; -+ if (pred != PRED_none) -+ { -+ /* Unary merge operations should use resolve_unary instead. */ -+ gcc_assert (nops != 1 || pred != PRED_m); -+ nargs = nops + 1; -+ if (!check_num_arguments (nargs) -+ || !require_vector_type (i, VECTOR_TYPE_svbool_t)) -+ return false; -+ i += 1; -+ } -+ else -+ { -+ nargs = nops; -+ if (!check_num_arguments (nargs)) -+ return false; -+ } -+ -+ return true; -+} -+ -+/* Finish resolving a function whose final argument can be a vector -+ or a scalar, with the function having an implicit "_n" suffix -+ in the latter case. This "_n" form might only exist for certain -+ type suffixes. -+ -+ ARGNO is the index of the final argument. The inferred type suffix -+ was obtained from argument FIRST_ARGNO, which has type FIRST_TYPE. -+ EXPECTED_TCLASS and EXPECTED_BITS describe the expected properties -+ of the final vector or scalar argument, in the same way as for -+ require_derived_vector_type. INFERRED_TYPE is the inferred type -+ suffix itself, or NUM_TYPE_SUFFIXES if it's the same as FIRST_TYPE. -+ -+ Return the function decl of the resolved function on success, -+ otherwise report a suitable error and return error_mark_node. */ -+tree function_resolver:: -+finish_opt_n_resolution (unsigned int argno, unsigned int first_argno, -+ type_suffix_index first_type, -+ type_class_index expected_tclass, -+ unsigned int expected_bits, -+ type_suffix_index inferred_type) -+{ -+ if (inferred_type == NUM_TYPE_SUFFIXES) -+ inferred_type = first_type; -+ tree scalar_form = lookup_form (MODE_n, inferred_type); -+ -+ /* Allow the final argument to be scalar, if an _n form exists. */ -+ if (scalar_argument_p (argno)) -+ { -+ if (scalar_form) -+ return scalar_form; -+ -+ /* Check the vector form normally. If that succeeds, raise an -+ error about having no corresponding _n form. */ -+ tree res = resolve_to (mode_suffix_id, inferred_type); -+ if (res != error_mark_node) -+ error_at (location, "passing %qT to argument %d of %qE, but its" -+ " %qT form does not accept scalars", -+ get_argument_type (argno), argno + 1, fndecl, -+ get_vector_type (first_type)); -+ return error_mark_node; -+ } -+ -+ /* If an _n form does exist, provide a more accurate message than -+ require_derived_vector_type would for arguments that are neither -+ vectors nor scalars. */ -+ if (scalar_form && !require_vector_or_scalar_type (argno)) -+ return error_mark_node; -+ -+ /* Check for the correct vector type. */ -+ if (!require_derived_vector_type (argno, first_argno, first_type, -+ expected_tclass, expected_bits)) -+ return error_mark_node; -+ -+ return resolve_to (mode_suffix_id, inferred_type); -+} -+ -+/* Resolve a (possibly predicated) unary function. If the function uses -+ merge predication or if TREAT_AS_MERGE_P is true, there is an extra -+ vector argument before the governing predicate that specifies the -+ values of inactive elements. This argument has the following -+ properties: -+ -+ - the type class must be the same as for active elements if MERGE_TCLASS -+ is SAME_TYPE_CLASS, otherwise it must be MERGE_TCLASS itself. -+ -+ - the element size must be the same as for active elements if MERGE_BITS -+ is SAME_TYPE_SIZE, otherwise it must be MERGE_BITS itself. -+ -+ Return the function decl of the resolved function on success, -+ otherwise report a suitable error and return error_mark_node. */ -+tree -+function_resolver::resolve_unary (type_class_index merge_tclass, -+ unsigned int merge_bits, -+ bool treat_as_merge_p) -+{ -+ type_suffix_index type; -+ if (pred == PRED_m || treat_as_merge_p) -+ { -+ if (!check_num_arguments (3)) -+ return error_mark_node; -+ if (merge_tclass == SAME_TYPE_CLASS && merge_bits == SAME_SIZE) -+ { -+ /* The inactive elements are the same as the active elements, -+ so we can use normal left-to-right resolution. */ -+ if ((type = infer_vector_type (0)) == NUM_TYPE_SUFFIXES -+ || !require_vector_type (1, VECTOR_TYPE_svbool_t) -+ || !require_matching_vector_type (2, type)) -+ return error_mark_node; -+ } -+ else -+ { -+ /* The inactive element type is a function of the active one, -+ so resolve the active one first. */ -+ if (!require_vector_type (1, VECTOR_TYPE_svbool_t) -+ || (type = infer_vector_type (2)) == NUM_TYPE_SUFFIXES -+ || !require_derived_vector_type (0, 2, type, merge_tclass, -+ merge_bits)) -+ return error_mark_node; -+ } -+ } -+ else -+ { -+ /* We just need to check the predicate (if any) and the single -+ vector argument. */ -+ unsigned int i, nargs; -+ if (!check_gp_argument (1, i, nargs) -+ || (type = infer_vector_type (i)) == NUM_TYPE_SUFFIXES) -+ return error_mark_node; -+ } -+ -+ /* Handle convert-like functions in which the first type suffix is -+ explicit. */ -+ if (type_suffix_ids[0] != NUM_TYPE_SUFFIXES) -+ return resolve_to (mode_suffix_id, type_suffix_ids[0], type); -+ -+ return resolve_to (mode_suffix_id, type); -+} -+ -+/* Resolve a (possibly predicated) function that takes NOPS like-typed -+ vector arguments followed by NIMM integer immediates. Return the -+ function decl of the resolved function on success, otherwise report -+ a suitable error and return error_mark_node. */ -+tree -+function_resolver::resolve_uniform (unsigned int nops, unsigned int nimm) -+{ -+ unsigned int i, nargs; -+ type_suffix_index type; -+ if (!check_gp_argument (nops + nimm, i, nargs) -+ || (type = infer_vector_type (i)) == NUM_TYPE_SUFFIXES) -+ return error_mark_node; -+ -+ i += 1; -+ for (; i < nargs - nimm; ++i) -+ if (!require_matching_vector_type (i, type)) -+ return error_mark_node; -+ -+ for (; i < nargs; ++i) -+ if (!require_integer_immediate (i)) -+ return error_mark_node; -+ -+ return resolve_to (mode_suffix_id, type); -+} -+ -+/* Resolve a (possibly predicated) function that offers a choice between -+ taking: -+ -+ - NOPS like-typed vector arguments or -+ - NOPS - 1 like-typed vector arguments followed by a scalar argument -+ -+ Return the function decl of the resolved function on success, -+ otherwise report a suitable error and return error_mark_node. */ -+tree -+function_resolver::resolve_uniform_opt_n (unsigned int nops) -+{ -+ unsigned int i, nargs; -+ type_suffix_index type; -+ if (!check_gp_argument (nops, i, nargs) -+ || (type = infer_vector_type (i)) == NUM_TYPE_SUFFIXES) -+ return error_mark_node; -+ -+ unsigned int first_arg = i++; -+ for (; i < nargs - 1; ++i) -+ if (!require_matching_vector_type (i, type)) -+ return error_mark_node; -+ -+ return finish_opt_n_resolution (i, first_arg, type); -+} -+ -+/* If the call is erroneous, report an appropriate error and return -+ error_mark_node. Otherwise, if the function is overloaded, return -+ the decl of the non-overloaded function. Return NULL_TREE otherwise, -+ indicating that the call should be processed in the normal way. */ -+tree -+function_resolver::resolve () -+{ -+ return shape->resolve (*this); -+} -+ -+function_checker::function_checker (location_t location, -+ const function_instance &instance, -+ tree fndecl, tree fntype, -+ unsigned int nargs, tree *args) -+ : function_call_info (location, instance, fndecl), -+ m_fntype (fntype), m_nargs (nargs), m_args (args), -+ /* We don't have to worry about unary _m operations here, since they -+ never have arguments that need checking. */ -+ m_base_arg (pred != PRED_none ? 1 : 0) -+{ -+} -+ -+/* Return true if argument ARGNO exists. which it might not for -+ erroneous calls. It is safe to wave through checks if this -+ function returns false. */ -+bool -+function_checker::argument_exists_p (unsigned int argno) -+{ -+ gcc_assert (argno < (unsigned int) type_num_arguments (m_fntype)); -+ return argno < m_nargs; -+} -+ -+/* Check that argument ARGNO is an integer constant expression and -+ store its value in VALUE_OUT if so. The caller should first -+ check that argument ARGNO exists. */ -+bool -+function_checker::require_immediate (unsigned int argno, -+ HOST_WIDE_INT &value_out) -+{ -+ gcc_assert (argno < m_nargs); -+ tree arg = m_args[argno]; -+ -+ /* The type and range are unsigned, so read the argument as an -+ unsigned rather than signed HWI. */ -+ if (!tree_fits_uhwi_p (arg)) -+ { -+ report_non_ice (location, fndecl, argno); -+ return false; -+ } -+ -+ /* ...but treat VALUE_OUT as signed for error reporting, since printing -+ -1 is more user-friendly than the maximum uint64_t value. */ -+ value_out = tree_to_uhwi (arg); -+ return true; -+} -+ -+/* Check that argument REL_ARGNO is an integer constant expression that -+ has the value VALUE0 or VALUE1. REL_ARGNO counts from the end of the -+ predication arguments. */ -+bool -+function_checker::require_immediate_either_or (unsigned int rel_argno, -+ HOST_WIDE_INT value0, -+ HOST_WIDE_INT value1) -+{ -+ unsigned int argno = m_base_arg + rel_argno; -+ if (!argument_exists_p (argno)) -+ return true; -+ -+ HOST_WIDE_INT actual; -+ if (!require_immediate (argno, actual)) -+ return false; -+ -+ if (actual != value0 && actual != value1) -+ { -+ report_neither_nor (location, fndecl, argno, actual, 90, 270); -+ return false; -+ } -+ -+ return true; -+} -+ -+/* Check that argument REL_ARGNO is an integer constant expression that has -+ a valid value for enumeration type TYPE. REL_ARGNO counts from the end -+ of the predication arguments. */ -+bool -+function_checker::require_immediate_enum (unsigned int rel_argno, tree type) -+{ -+ unsigned int argno = m_base_arg + rel_argno; -+ if (!argument_exists_p (argno)) -+ return true; -+ -+ HOST_WIDE_INT actual; -+ if (!require_immediate (argno, actual)) -+ return false; -+ -+ for (tree entry = TYPE_VALUES (type); entry; entry = TREE_CHAIN (entry)) -+ { -+ /* The value is an INTEGER_CST for C and a CONST_DECL wrapper -+ around an INTEGER_CST for C++. */ -+ tree value = TREE_VALUE (entry); -+ if (TREE_CODE (value) == CONST_DECL) -+ value = DECL_INITIAL (value); -+ if (wi::to_widest (value) == actual) -+ return true; -+ } -+ -+ report_not_enum (location, fndecl, argno, actual, type); -+ return false; -+} -+ -+/* Check that argument REL_ARGNO is suitable for indexing argument -+ REL_ARGNO - 1, in groups of GROUP_SIZE elements. REL_ARGNO counts -+ from the end of the predication arguments. */ -+bool -+function_checker::require_immediate_lane_index (unsigned int rel_argno, -+ unsigned int group_size) -+{ -+ unsigned int argno = m_base_arg + rel_argno; -+ if (!argument_exists_p (argno)) -+ return true; -+ -+ /* Get the type of the previous argument. tree_argument_type wants a -+ 1-based number, whereas ARGNO is 0-based. */ -+ machine_mode mode = TYPE_MODE (type_argument_type (m_fntype, argno)); -+ gcc_assert (VECTOR_MODE_P (mode)); -+ unsigned int nlanes = 128 / (group_size * GET_MODE_UNIT_BITSIZE (mode)); -+ return require_immediate_range (rel_argno, 0, nlanes - 1); -+} -+ -+/* Check that argument REL_ARGNO is an integer constant expression that -+ has one of the given values. */ -+bool -+function_checker::require_immediate_one_of (unsigned int rel_argno, -+ HOST_WIDE_INT value0, -+ HOST_WIDE_INT value1, -+ HOST_WIDE_INT value2, -+ HOST_WIDE_INT value3) -+{ -+ unsigned int argno = m_base_arg + rel_argno; -+ if (!argument_exists_p (argno)) -+ return true; -+ -+ HOST_WIDE_INT actual; -+ if (!require_immediate (argno, actual)) -+ return false; -+ -+ if (actual != value0 -+ && actual != value1 -+ && actual != value2 -+ && actual != value3) -+ { -+ report_not_one_of (location, fndecl, argno, actual, -+ value0, value1, value2, value3); -+ return false; -+ } -+ -+ return true; -+} -+ -+/* Check that argument REL_ARGNO is an integer constant expression in the -+ range [MIN, MAX]. REL_ARGNO counts from the end of the predication -+ arguments. */ -+bool -+function_checker::require_immediate_range (unsigned int rel_argno, -+ HOST_WIDE_INT min, -+ HOST_WIDE_INT max) -+{ -+ unsigned int argno = m_base_arg + rel_argno; -+ if (!argument_exists_p (argno)) -+ return true; -+ -+ /* Required because of the tree_to_uhwi -> HOST_WIDE_INT conversion -+ in require_immediate. */ -+ gcc_assert (min >= 0 && min <= max); -+ HOST_WIDE_INT actual; -+ if (!require_immediate (argno, actual)) -+ return false; -+ -+ if (!IN_RANGE (actual, min, max)) -+ { -+ report_out_of_range (location, fndecl, argno, actual, min, max); -+ return false; -+ } -+ -+ return true; -+} -+ -+/* Perform semantic checks on the call. Return true if the call is valid, -+ otherwise report a suitable error. */ -+bool -+function_checker::check () -+{ -+ function_args_iterator iter; -+ tree type; -+ unsigned int i = 0; -+ FOREACH_FUNCTION_ARGS (m_fntype, type, iter) -+ { -+ if (type == void_type_node || i >= m_nargs) -+ break; -+ -+ if (i >= m_base_arg -+ && TREE_CODE (type) == ENUMERAL_TYPE -+ && !require_immediate_enum (i - m_base_arg, type)) -+ return false; -+ -+ i += 1; -+ } -+ -+ return shape->check (*this); -+} -+ -+gimple_folder::gimple_folder (const function_instance &instance, tree fndecl, -+ gimple_stmt_iterator *gsi_in, gcall *call_in) -+ : function_call_info (gimple_location (call_in), instance, fndecl), -+ gsi (gsi_in), call (call_in), lhs (gimple_call_lhs (call_in)) -+{ -+} -+ -+/* Convert predicate argument ARGNO so that it has the type appropriate for -+ an operation on VECTYPE. Add any new statements to STMTS. */ -+tree -+gimple_folder::convert_pred (gimple_seq &stmts, tree vectype, -+ unsigned int argno) -+{ -+ tree pred = gimple_call_arg (call, argno); -+ if (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (pred)), -+ TYPE_VECTOR_SUBPARTS (vectype))) -+ return pred; -+ -+ return gimple_build (&stmts, VIEW_CONVERT_EXPR, -+ truth_type_for (vectype), pred); -+} -+ -+/* Return a pointer to the address in a contiguous load or store, -+ given that each memory vector has type VECTYPE. Add any new -+ statements to STMTS. */ -+tree -+gimple_folder::fold_contiguous_base (gimple_seq &stmts, tree vectype) -+{ -+ tree base = gimple_call_arg (call, 1); -+ if (mode_suffix_id == MODE_vnum) -+ { -+ tree offset = gimple_call_arg (call, 2); -+ offset = gimple_convert (&stmts, sizetype, offset); -+ offset = gimple_build (&stmts, MULT_EXPR, sizetype, offset, -+ TYPE_SIZE_UNIT (vectype)); -+ base = gimple_build (&stmts, POINTER_PLUS_EXPR, TREE_TYPE (base), -+ base, offset); -+ } -+ return base; -+} -+ -+/* Return the alignment and TBAA argument to an internal load or store -+ function like IFN_MASK_LOAD or IFN_MASK_STORE, given that it accesses -+ memory elements of type TYPE. */ -+tree -+gimple_folder::load_store_cookie (tree type) -+{ -+ return build_int_cst (build_pointer_type (type), TYPE_ALIGN_UNIT (type)); -+} -+ -+/* Fold the call to a call to INSTANCE, with the same arguments. */ -+gimple * -+gimple_folder::redirect_call (const function_instance &instance) -+{ -+ registered_function *rfn -+ = function_table->find_with_hash (instance, instance.hash ()); -+ if (!rfn) -+ return NULL; -+ -+ gimple_call_set_fndecl (call, rfn->decl); -+ return call; -+} -+ -+/* Fold the call to a PTRUE, taking the element size from type suffix 0. */ -+gimple * -+gimple_folder::fold_to_ptrue () -+{ -+ tree svbool_type = TREE_TYPE (lhs); -+ tree bool_type = TREE_TYPE (svbool_type); -+ unsigned int element_bytes = type_suffix (0).element_bytes; -+ -+ /* The return type is svbool_t for all type suffixes, thus for b8 we -+ want { 1, 1, 1, 1, ... }, for b16 we want { 1, 0, 1, 0, ... }, etc. */ -+ tree_vector_builder builder (svbool_type, element_bytes, 1); -+ builder.quick_push (build_all_ones_cst (bool_type)); -+ for (unsigned int i = 1; i < element_bytes; ++i) -+ builder.quick_push (build_zero_cst (bool_type)); -+ return gimple_build_assign (lhs, builder.build ()); -+} -+ -+/* Fold the call to a PFALSE. */ -+gimple * -+gimple_folder::fold_to_pfalse () -+{ -+ return gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs))); -+} -+ -+/* Fold an operation to a constant predicate in which the first VL -+ elements are set and the rest are clear. Take the element size -+ from type suffix 0. */ -+gimple * -+gimple_folder::fold_to_vl_pred (unsigned int vl) -+{ -+ tree vectype = TREE_TYPE (lhs); -+ tree element_type = TREE_TYPE (vectype); -+ tree minus_one = build_all_ones_cst (element_type); -+ tree zero = build_zero_cst (element_type); -+ unsigned int element_bytes = type_suffix (0).element_bytes; -+ -+ /* Construct COUNT elements that contain the ptrue followed by -+ a repeating sequence of COUNT elements. */ -+ unsigned int count = constant_lower_bound (TYPE_VECTOR_SUBPARTS (vectype)); -+ gcc_assert (vl * element_bytes <= count); -+ tree_vector_builder builder (vectype, count, 2); -+ for (unsigned int i = 0; i < count * 2; ++i) -+ { -+ bool bit = (i & (element_bytes - 1)) == 0 && i < vl * element_bytes; -+ builder.quick_push (bit ? minus_one : zero); -+ } -+ return gimple_build_assign (lhs, builder.build ()); -+} -+ -+/* Try to fold the call. Return the new statement on success and null -+ on failure. */ -+gimple * -+gimple_folder::fold () -+{ -+ /* Don't fold anything when SVE is disabled; emit an error during -+ expansion instead. */ -+ if (!TARGET_SVE) -+ return NULL; -+ -+ /* Punt if the function has a return type and no result location is -+ provided. The attributes should allow target-independent code to -+ remove the calls if appropriate. */ -+ if (!lhs && TREE_TYPE (gimple_call_fntype (call)) != void_type_node) -+ return NULL; -+ -+ return base->fold (*this); -+} -+ -+function_expander::function_expander (const function_instance &instance, -+ tree fndecl, tree call_expr_in, -+ rtx possible_target_in) -+ : function_call_info (EXPR_LOCATION (call_expr_in), instance, fndecl), -+ call_expr (call_expr_in), possible_target (possible_target_in) -+{ -+} -+ -+/* Return the handler of direct optab OP for type suffix SUFFIX_I. */ -+insn_code -+function_expander::direct_optab_handler (optab op, unsigned int suffix_i) -+{ -+ return ::direct_optab_handler (op, vector_mode (suffix_i)); -+} -+ -+/* Choose between signed and unsigned direct optabs SIGNED_OP and -+ UNSIGNED_OP based on the signedness of type suffix SUFFIX_I, then -+ pick the appropriate optab handler for the mode. Use MODE as the -+ mode if given, otherwise use the mode of type suffix SUFFIX_I. */ -+insn_code -+function_expander::direct_optab_handler_for_sign (optab signed_op, -+ optab unsigned_op, -+ unsigned int suffix_i, -+ machine_mode mode) -+{ -+ if (mode == VOIDmode) -+ mode = vector_mode (suffix_i); -+ optab op = type_suffix (suffix_i).unsigned_p ? unsigned_op : signed_op; -+ return ::direct_optab_handler (op, mode); -+} -+ -+/* Return true if X overlaps any input. */ -+bool -+function_expander::overlaps_input_p (rtx x) -+{ -+ for (unsigned int i = 0; i < args.length (); ++i) -+ if (reg_overlap_mentioned_p (x, args[i])) -+ return true; -+ return false; -+} -+ -+/* Return the base address for a contiguous load or store function. -+ MEM_MODE is the mode of the addressed memory. */ -+rtx -+function_expander::get_contiguous_base (machine_mode mem_mode) -+{ -+ rtx base = args[1]; -+ if (mode_suffix_id == MODE_vnum) -+ { -+ /* Use the size of the memory mode for extending loads and truncating -+ stores. Use the size of a full vector for non-extending loads -+ and non-truncating stores (including svld[234] and svst[234]). */ -+ poly_int64 size = ordered_min (GET_MODE_SIZE (mem_mode), -+ BYTES_PER_SVE_VECTOR); -+ rtx offset = gen_int_mode (size, Pmode); -+ offset = simplify_gen_binary (MULT, Pmode, args[2], offset); -+ base = simplify_gen_binary (PLUS, Pmode, base, offset); -+ } -+ return base; -+} -+ -+/* For a function that does the equivalent of: -+ -+ OUTPUT = COND ? FN (INPUTS) : FALLBACK; -+ -+ return the value of FALLBACK. -+ -+ MODE is the mode of OUTPUT. NOPS is the number of operands in INPUTS. -+ MERGE_ARGNO is the argument that provides FALLBACK for _m functions, -+ or DEFAULT_MERGE_ARGNO if we should apply the usual rules. -+ -+ ARGNO is the caller's index into args. If the returned value is -+ argument 0 (as for unary _m operations), increment ARGNO past the -+ returned argument. */ -+rtx -+function_expander::get_fallback_value (machine_mode mode, unsigned int nops, -+ unsigned int merge_argno, -+ unsigned int &argno) -+{ -+ if (pred == PRED_z) -+ return CONST0_RTX (mode); -+ -+ gcc_assert (pred == PRED_m || pred == PRED_x); -+ if (merge_argno == DEFAULT_MERGE_ARGNO) -+ merge_argno = nops == 1 && pred == PRED_m ? 0 : 1; -+ -+ if (merge_argno == 0) -+ return args[argno++]; -+ -+ return args[merge_argno]; -+} -+ -+/* Return a REG rtx that can be used for the result of the function, -+ using the preferred target if suitable. */ -+rtx -+function_expander::get_reg_target () -+{ -+ machine_mode target_mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (fndecl))); -+ if (!possible_target || GET_MODE (possible_target) != target_mode) -+ possible_target = gen_reg_rtx (target_mode); -+ return possible_target; -+} -+ -+/* As for get_reg_target, but make sure that the returned REG does not -+ overlap any inputs. */ -+rtx -+function_expander::get_nonoverlapping_reg_target () -+{ -+ if (possible_target && overlaps_input_p (possible_target)) -+ possible_target = NULL_RTX; -+ return get_reg_target (); -+} -+ -+/* Add an output operand to the instruction we're building, which has -+ code ICODE. Bind the output to the preferred target rtx if possible. */ -+void -+function_expander::add_output_operand (insn_code icode) -+{ -+ unsigned int opno = m_ops.length (); -+ machine_mode mode = insn_data[icode].operand[opno].mode; -+ m_ops.safe_grow (opno + 1); -+ create_output_operand (&m_ops.last (), possible_target, mode); -+} -+ -+/* Add an input operand to the instruction we're building, which has -+ code ICODE. Calculate the value of the operand as follows: -+ -+ - If the operand is a vector and X is not, broadcast X to fill a -+ vector of the appropriate mode. -+ -+ - Otherwise, if the operand is a predicate, coerce X to have the -+ mode that the instruction expects. In this case X is known to be -+ VNx16BImode (the mode of svbool_t). -+ -+ - Otherwise use X directly. The expand machinery checks that X has -+ the right mode for the instruction. */ -+void -+function_expander::add_input_operand (insn_code icode, rtx x) -+{ -+ unsigned int opno = m_ops.length (); -+ const insn_operand_data &operand = insn_data[icode].operand[opno]; -+ machine_mode mode = operand.mode; -+ if (mode == VOIDmode) -+ { -+ /* The only allowable use of VOIDmode is the wildcard -+ aarch64_any_register_operand, which is used to avoid -+ combinatorial explosion in the reinterpret patterns. */ -+ gcc_assert (operand.predicate == aarch64_any_register_operand); -+ mode = GET_MODE (x); -+ } -+ else if (!VECTOR_MODE_P (GET_MODE (x)) && VECTOR_MODE_P (mode)) -+ x = expand_vector_broadcast (mode, x); -+ else if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL) -+ { -+ gcc_assert (GET_MODE (x) == VNx16BImode); -+ x = gen_lowpart (mode, x); -+ } -+ m_ops.safe_grow (m_ops.length () + 1); -+ create_input_operand (&m_ops.last (), x, mode); -+} -+ -+/* Add an integer operand with value X to the instruction. */ -+void -+function_expander::add_integer_operand (HOST_WIDE_INT x) -+{ -+ m_ops.safe_grow (m_ops.length () + 1); -+ create_integer_operand (&m_ops.last (), x); -+} -+ -+/* Add a memory operand with mode MODE and address ADDR. */ -+void -+function_expander::add_mem_operand (machine_mode mode, rtx addr) -+{ -+ /* Exception for OImode for the ld1ro intrinsics. -+ They act on 256 bit octaword data, and it's just easier to use a scalar -+ mode to represent that than add a new vector mode solely for the purpose -+ of this intrinsic. */ -+ gcc_assert (VECTOR_MODE_P (mode) || mode == OImode); -+ rtx mem = gen_rtx_MEM (mode, memory_address (mode, addr)); -+ /* The memory is only guaranteed to be element-aligned. */ -+ set_mem_align (mem, GET_MODE_ALIGNMENT (GET_MODE_INNER (mode))); -+ add_fixed_operand (mem); -+} -+ -+/* Add an address operand with value X. The static operand data says -+ what mode and form the address must have. */ -+void -+function_expander::add_address_operand (rtx x) -+{ -+ m_ops.safe_grow (m_ops.length () + 1); -+ create_address_operand (&m_ops.last (), x); -+} -+ -+/* Add an operand that must be X. The only way of legitimizing an -+ invalid X is to reload the address of a MEM. */ -+void -+function_expander::add_fixed_operand (rtx x) -+{ -+ m_ops.safe_grow (m_ops.length () + 1); -+ create_fixed_operand (&m_ops.last (), x); -+} -+ -+/* Generate instruction ICODE, given that its operands have already -+ been added to M_OPS. Return the value of the first operand. */ -+rtx -+function_expander::generate_insn (insn_code icode) -+{ -+ expand_insn (icode, m_ops.length (), m_ops.address ()); -+ return function_returns_void_p () ? const0_rtx : m_ops[0].value; -+} -+ -+/* Convert the arguments to a gather/scatter function into the -+ associated md operands. Argument ARGNO is the scalar or vector base and -+ argument ARGNO + 1 is the scalar or vector displacement (if applicable). -+ The md pattern expects: -+ -+ - a scalar base -+ - a vector displacement -+ -+ If SCALED_P is true, it also expects: -+ -+ - a const_int that is 1 if the displacement is zero-extended from 32 bits -+ - a scaling multiplier (1 for bytes, 2 for .h indices, etc.). -+ -+ If SCALED_P is false, the displacement is implicitly zero-extended -+ and the scaling multiplier is implicitly 1. */ -+void -+function_expander::prepare_gather_address_operands (unsigned int argno, -+ bool scaled_p) -+{ -+ machine_mode mem_mode = memory_vector_mode (); -+ tree vector_type = base_vector_type (); -+ units_index units = displacement_units (); -+ int shift_idx = -1; -+ if (units == UNITS_none) -+ { -+ /* Vector base, no displacement. Convert to an integer zero base -+ and a vector byte offset. */ -+ args.quick_insert (argno, const0_rtx); -+ units = UNITS_bytes; -+ } -+ else if (vector_type) -+ { -+ /* Vector base, scalar displacement. Convert to a scalar base and -+ a vector byte offset. */ -+ std::swap (args[argno], args[argno + 1]); -+ if (units == UNITS_elements) -+ shift_idx = argno; -+ } -+ else -+ { -+ /* Scalar base, vector displacement. This is the order that the md -+ pattern wants. */ -+ if (Pmode == SImode) -+ args[argno] = simplify_gen_unary (ZERO_EXTEND, DImode, -+ args[argno], SImode); -+ vector_type = displacement_vector_type (); -+ if (units == UNITS_elements && !scaled_p) -+ shift_idx = argno + 1; -+ } -+ tree scalar_displacement_type = TREE_TYPE (vector_type); -+ -+ if (shift_idx >= 0) -+ { -+ machine_mode arg_mode = GET_MODE (args[shift_idx]); -+ if (arg_mode == VOIDmode) -+ arg_mode = DImode; -+ unsigned int elt_bytes = GET_MODE_UNIT_SIZE (mem_mode); -+ rtx shift = gen_int_mode (exact_log2 (elt_bytes), DImode); -+ args[shift_idx] = simplify_gen_binary (ASHIFT, arg_mode, -+ args[shift_idx], shift); -+ units = UNITS_bytes; -+ } -+ -+ bool uxtw_p = (TYPE_PRECISION (scalar_displacement_type) == 64 -+ || TYPE_UNSIGNED (scalar_displacement_type)); -+ unsigned int scale = (units == UNITS_bytes -+ ? 1 : GET_MODE_UNIT_SIZE (mem_mode)); -+ -+ if (scaled_p) -+ { -+ args.quick_insert (argno + 2, GEN_INT (uxtw_p)); -+ args.quick_insert (argno + 3, GEN_INT (scale)); -+ } -+ else -+ gcc_assert (uxtw_p && scale == 1); -+} -+ -+/* The final argument is an immediate svprfop value. Add two fake arguments -+ to represent the rw and locality operands of a PREFETCH rtx. */ -+void -+function_expander::prepare_prefetch_operands () -+{ -+ unsigned int prfop = INTVAL (args.last ()); -+ /* Bit 3 of the prfop selects stores over loads. */ -+ args.quick_push (GEN_INT ((prfop & 8) != 0)); -+ /* Bits 1 and 2 specify the locality; 0-based for svprfop but -+ 1-based for PREFETCH. */ -+ args.quick_push (GEN_INT (((prfop >> 1) & 3) + 1)); -+} -+ -+/* Add a dummy argument to indicate whether predicate argument ARGNO -+ is all-true when interpreted in mode PRED_MODE. The hint goes -+ immediately after ARGNO. */ -+void -+function_expander::add_ptrue_hint (unsigned int argno, machine_mode pred_mode) -+{ -+ rtx pred = gen_lowpart (pred_mode, args[argno]); -+ int hint = (pred == CONSTM1_RTX (pred_mode) -+ ? SVE_KNOWN_PTRUE : SVE_MAYBE_NOT_PTRUE); -+ args.quick_insert (argno + 1, gen_int_mode (hint, SImode)); -+} -+ -+/* Rotate inputs args[START:END] one position to the left, so that -+ args[START] becomes args[END - 1]. */ -+void -+function_expander::rotate_inputs_left (unsigned int start, unsigned int end) -+{ -+ rtx new_last = args[start]; -+ for (unsigned int i = start; i < end - 1; ++i) -+ args[i] = args[i + 1]; -+ args[end - 1] = new_last; -+} -+ -+/* Return true if the negation of argument ARGNO can be folded away, -+ replacing it with the negated value if so. MODE is the associated -+ vector mode, but the argument could be a single element. The main -+ case this handles is constant arguments. */ -+bool -+function_expander::try_negating_argument (unsigned int argno, -+ machine_mode mode) -+{ -+ rtx x = args[argno]; -+ if (!VECTOR_MODE_P (GET_MODE (x))) -+ mode = GET_MODE_INNER (mode); -+ -+ x = simplify_unary_operation (NEG, mode, x, mode); -+ if (!x) -+ return false; -+ -+ args[argno] = x; -+ return true; -+} -+ -+/* Implement the call using instruction ICODE, with a 1:1 mapping between -+ arguments and input operands. */ -+rtx -+function_expander::use_exact_insn (insn_code icode) -+{ -+ unsigned int nops = insn_data[icode].n_operands; -+ if (!function_returns_void_p ()) -+ { -+ add_output_operand (icode); -+ nops -= 1; -+ } -+ for (unsigned int i = 0; i < nops; ++i) -+ add_input_operand (icode, args[i]); -+ return generate_insn (icode); -+} -+ -+/* Implement the call using instruction ICODE, which does not use a -+ governing predicate. We must therefore drop the GP from an _x call. */ -+rtx -+function_expander::use_unpred_insn (insn_code icode) -+{ -+ /* We can't drop the predicate for _z and _m. */ -+ gcc_assert (pred == PRED_x || pred == PRED_none); -+ /* Discount the output operand. */ -+ unsigned int nops = insn_data[icode].n_operands - 1; -+ /* Drop the predicate argument in the case of _x predication. */ -+ unsigned int bias = (pred == PRED_x ? 1 : 0); -+ unsigned int i = 0; -+ -+ add_output_operand (icode); -+ for (; i < nops; ++i) -+ add_input_operand (icode, args[i + bias]); -+ -+ return generate_insn (icode); -+} -+ -+/* Implement the call using instruction ICODE, which is a predicated -+ operation that returns arbitrary values for inactive lanes. */ -+rtx -+function_expander::use_pred_x_insn (insn_code icode) -+{ -+ /* At present we never need to handle PRED_none, which would involve -+ creating a new predicate rather than using one supplied by the user. */ -+ gcc_assert (pred == PRED_x); -+ /* Discount the output operand. */ -+ unsigned int nops = args.length () - 1; -+ -+ bool has_float_operand_p = FLOAT_MODE_P (insn_data[icode].operand[0].mode); -+ -+ /* Add the normal operands. */ -+ add_output_operand (icode); -+ add_input_operand (icode, args[0]); -+ for (unsigned int i = 0; i < nops; ++i) -+ { -+ add_input_operand (icode, args[i + 1]); -+ if (FLOAT_MODE_P (GET_MODE (args[i + 1]))) -+ has_float_operand_p = true; -+ } -+ -+ if (has_float_operand_p) -+ { -+ /* Add a flag that indicates whether unpredicated instructions -+ are allowed. */ -+ rtx pred = m_ops[1].value; -+ if (flag_trapping_math && pred != CONST1_RTX (GET_MODE (pred))) -+ add_integer_operand (SVE_STRICT_GP); -+ else -+ add_integer_operand (SVE_RELAXED_GP); -+ } -+ -+ return generate_insn (icode); -+} -+ -+/* Implement the call using instruction ICODE, which does the equivalent of: -+ -+ OUTPUT = COND ? FN (INPUTS) : FALLBACK; -+ -+ The instruction operands are in the order above: OUTPUT, COND, INPUTS -+ and FALLBACK. MERGE_ARGNO is the argument that provides FALLBACK for _m -+ functions, or DEFAULT_MERGE_ARGNO if we should apply the usual rules. */ -+rtx -+function_expander::use_cond_insn (insn_code icode, unsigned int merge_argno) -+{ -+ /* At present we never need to handle PRED_none, which would involve -+ creating a new predicate rather than using one supplied by the user. */ -+ gcc_assert (pred != PRED_none); -+ /* Discount the output, predicate and fallback value. */ -+ unsigned int nops = insn_data[icode].n_operands - 3; -+ machine_mode mode = insn_data[icode].operand[0].mode; -+ -+ unsigned int opno = 0; -+ rtx fallback_arg = get_fallback_value (mode, nops, merge_argno, opno); -+ rtx pred = args[opno++]; -+ -+ add_output_operand (icode); -+ add_input_operand (icode, pred); -+ for (unsigned int i = 0; i < nops; ++i) -+ add_input_operand (icode, args[opno + i]); -+ add_input_operand (icode, fallback_arg); -+ return generate_insn (icode); -+} -+ -+/* Implement the call using instruction ICODE, which is a select-like -+ operation with the following operands: -+ -+ 0: output -+ 1: true value -+ 2: false value -+ 3: predicate -+ -+ MERGE_ARGNO is the argument that provides the "false" value for _m -+ functions, or DEFAULT_MERGE_ARGNO if we should apply the usual rules. */ -+rtx -+function_expander::use_vcond_mask_insn (insn_code icode, -+ unsigned int merge_argno) -+{ -+ machine_mode mode = vector_mode (0); -+ -+ unsigned int opno = 0; -+ rtx false_arg = get_fallback_value (mode, 1, merge_argno, opno); -+ rtx pred_arg = args[opno++]; -+ rtx true_arg = args[opno++]; -+ -+ add_output_operand (icode); -+ add_input_operand (icode, true_arg); -+ add_input_operand (icode, false_arg); -+ add_input_operand (icode, pred_arg); -+ return generate_insn (icode); -+} -+ -+/* Implement the call using instruction ICODE, which loads memory operand 1 -+ into register operand 0 under the control of predicate operand 2. */ -+rtx -+function_expander::use_contiguous_load_insn (insn_code icode) -+{ -+ machine_mode mem_mode = memory_vector_mode (); -+ -+ add_output_operand (icode); -+ add_mem_operand (mem_mode, get_contiguous_base (mem_mode)); -+ add_input_operand (icode, args[0]); -+ return generate_insn (icode); -+} -+ -+/* Implement the call using instruction ICODE, which prefetches from -+ address operand 1 under the control of predicate operand 0. -+ Operands 2, 3 and 4 respectively specify the svprfop value, -+ the PREFETCH rw flag and the PREFETCH locality. */ -+rtx -+function_expander::use_contiguous_prefetch_insn (insn_code icode) -+{ -+ add_input_operand (icode, args[0]); -+ add_address_operand (get_contiguous_base (VNx16QImode)); -+ for (unsigned int i = args.length () - 3; i < args.length (); ++i) -+ add_input_operand (icode, args[i]); -+ return generate_insn (icode); -+} -+ -+/* Implement the call using instruction ICODE, which stores register operand 1 -+ into memory operand 0 under the control of predicate operand 2. */ -+rtx -+function_expander::use_contiguous_store_insn (insn_code icode) -+{ -+ machine_mode mem_mode = memory_vector_mode (); -+ -+ add_mem_operand (mem_mode, get_contiguous_base (mem_mode)); -+ add_input_operand (icode, args.last ()); -+ add_input_operand (icode, args[0]); -+ return generate_insn (icode); -+} -+ -+/* Implement the call using one of the following strategies, chosen in order: -+ -+ (1) "aarch64_pred__z" for PRED_z predicate functions -+ -+ (2) "aarch64_pred_" for PRED_x functions -+ -+ (3) a normal unpredicated optab for PRED_none and PRED_x functions, -+ dropping the predicate in the latter case -+ -+ (4) "cond_" otherwise -+ -+ where corresponds to: -+ -+ - CODE_FOR_SINT for signed integers -+ - CODE_FOR_UINT for unsigned integers -+ - UNSPEC_FOR_FP for floating-point values -+ -+ MERGE_ARGNO is the argument that provides the values of inactive lanes for -+ _m functions, or DEFAULT_MERGE_ARGNO if we should apply the usual rules. */ -+rtx -+function_expander::map_to_rtx_codes (rtx_code code_for_sint, -+ rtx_code code_for_uint, -+ int unspec_for_fp, -+ unsigned int merge_argno) -+{ -+ machine_mode mode = vector_mode (0); -+ rtx_code code = (type_suffix (0).unsigned_p ? code_for_uint : code_for_sint); -+ insn_code icode; -+ -+ /* Handle predicate logic operations, which always use _z predication. */ -+ if (type_suffix (0).tclass == TYPE_bool) -+ { -+ gcc_assert (pred == PRED_z && code_for_uint == code_for_sint); -+ return use_exact_insn (code_for_aarch64_pred_z (code, mode)); -+ } -+ -+ /* First try using UNSPEC_PRED_X patterns for _x predication, -+ if available. */ -+ if (pred == PRED_x) -+ { -+ if (type_suffix (0).integer_p) -+ icode = maybe_code_for_aarch64_pred (code, mode); -+ else -+ icode = maybe_code_for_aarch64_pred (unspec_for_fp, mode); -+ if (icode != CODE_FOR_nothing) -+ return use_pred_x_insn (icode); -+ } -+ -+ /* Otherwise expand PRED_none and PRED_x operations without a predicate. -+ Floating-point operations conventionally use the signed rtx code. */ -+ if (pred == PRED_none || pred == PRED_x) -+ return use_unpred_insn (direct_optab_handler (code_to_optab (code), 0)); -+ -+ /* Don't use cond_*_optabs here, since not all codes have one yet. */ -+ if (type_suffix (0).integer_p) -+ icode = code_for_cond (code, mode); -+ else -+ icode = code_for_cond (unspec_for_fp, mode); -+ return use_cond_insn (icode, merge_argno); -+} -+ -+/* Implement the call using one of the following strategies, chosen in order: -+ -+ (1) "aarch64_pred_" for PRED_x functions; this is a -+ predicated pattern -+ -+ (2) "aarch64_sve_" for PRED_none and PRED_x functions; -+ this is an unpredicated pattern -+ -+ (3) "cond_" otherwise -+ -+ where corresponds to: -+ -+ - UNSPEC_FOR_SINT for signed integers -+ - UNSPEC_FOR_UINT for unsigned integers -+ - UNSPEC_FOR_FP for floating-point values -+ -+ MERGE_ARGNO is the argument that provides the values of inactive lanes for -+ _m functions, or DEFAULT_MERGE_ARGNO if we should apply the usual rules. */ -+rtx -+function_expander::map_to_unspecs (int unspec_for_sint, int unspec_for_uint, -+ int unspec_for_fp, unsigned int merge_argno) -+{ -+ machine_mode mode = vector_mode (0); -+ int unspec = (!type_suffix (0).integer_p ? unspec_for_fp -+ : type_suffix (0).unsigned_p ? unspec_for_uint -+ : unspec_for_sint); -+ -+ if (pred == PRED_x) -+ { -+ insn_code icode = maybe_code_for_aarch64_pred (unspec, mode); -+ if (icode != CODE_FOR_nothing) -+ return use_pred_x_insn (icode); -+ } -+ -+ if (pred == PRED_none || pred == PRED_x) -+ { -+ insn_code icode = maybe_code_for_aarch64_sve (unspec, mode); -+ if (icode != CODE_FOR_nothing) -+ return use_unpred_insn (icode); -+ } -+ -+ insn_code icode = code_for_cond (unspec, vector_mode (0)); -+ return use_cond_insn (icode, merge_argno); -+} -+ -+/* Implement the call using an @aarch64 instruction and the -+ instructions are parameterized by an rtx_code. CODE_FOR_SINT -+ is the rtx_code for signed integer operations, CODE_FOR_UINT -+ is the rtx_code for unsigned integer operations. */ -+rtx -+function_expander::expand_signed_unpred_op (rtx_code code_for_sint, -+ rtx_code code_for_uint) -+{ -+ insn_code icode; -+ if (type_suffix (0).unsigned_p) -+ icode = code_for_aarch64 (code_for_uint, code_for_uint, vector_mode (0)); -+ else -+ icode = code_for_aarch64 (code_for_sint, code_for_sint, vector_mode (0)); -+ return use_unpred_insn (icode); -+} -+ -+/* Expand the call and return its lhs. */ -+rtx -+function_expander::expand () -+{ -+ unsigned int nargs = call_expr_nargs (call_expr); -+ args.reserve (nargs); -+ for (unsigned int i = 0; i < nargs; ++i) -+ args.quick_push (expand_normal (CALL_EXPR_ARG (call_expr, i))); -+ -+ return base->expand (*this); -+} -+ -+/* Register the built-in SVE ABI types, such as __SVBool_t. */ -+static void -+register_builtin_types () -+{ -+#define DEF_SVE_TYPE(ACLE_NAME, NCHARS, ABI_NAME, SCALAR_TYPE) \ -+ scalar_types[VECTOR_TYPE_ ## ACLE_NAME] = SCALAR_TYPE; -+#include "aarch64-sve-builtins.def" -+ -+ for (unsigned int i = 0; i < NUM_VECTOR_TYPES; ++i) -+ { -+ tree eltype = scalar_types[i]; -+ tree vectype; -+ if (eltype == boolean_type_node) -+ { -+ vectype = build_truth_vector_type_for_mode (BYTES_PER_SVE_VECTOR, -+ VNx16BImode); -+ gcc_assert (TYPE_MODE (vectype) == VNx16BImode -+ && TYPE_MODE (vectype) == TYPE_MODE_RAW (vectype) -+ && TYPE_ALIGN (vectype) == 16 -+ && known_eq (wi::to_poly_offset (TYPE_SIZE (vectype)), -+ BYTES_PER_SVE_VECTOR)); -+ } -+ else -+ { -+ unsigned int elbytes = tree_to_uhwi (TYPE_SIZE_UNIT (eltype)); -+ poly_uint64 nunits = exact_div (BYTES_PER_SVE_VECTOR, elbytes); -+ vectype = build_vector_type (eltype, nunits); -+ gcc_assert (VECTOR_MODE_P (TYPE_MODE (vectype)) -+ && TYPE_MODE (vectype) == TYPE_MODE_RAW (vectype) -+ && TYPE_ALIGN (vectype) == 128 -+ && known_eq (wi::to_poly_offset (TYPE_SIZE (vectype)), -+ BITS_PER_SVE_VECTOR)); -+ } -+ vectype = build_distinct_type_copy (vectype); -+ SET_TYPE_STRUCTURAL_EQUALITY (vectype); -+ TYPE_ARTIFICIAL (vectype) = 1; -+ abi_vector_types[i] = vectype; -+ lang_hooks.types.register_builtin_type (vectype, -+ vector_types[i].abi_name); -+ } -+} -+ -+/* Initialize all compiler built-ins related to SVE that should be -+ defined at start-up. */ -+void -+init_builtins () -+{ -+ sve_switcher sve; -+ register_builtin_types (); -+} -+ -+/* Register vector type TYPE under its arm_sve.h name. */ -+static void -+register_vector_type (vector_type_index type) -+{ -+ tree vectype = abi_vector_types[type]; -+ tree id = get_identifier (vector_types[type].acle_name); -+ tree decl = build_decl (input_location, TYPE_DECL, id, vectype); -+ decl = lang_hooks.decls.pushdecl (decl); -+ -+ /* Record the new ACLE type if pushdecl succeeded without error. Use -+ the ABI type otherwise, so that the type we record at least has the -+ right form, even if it doesn't have the right name. This should give -+ better error recovery behavior than installing error_mark_node or -+ installing an incorrect type. */ -+ if (TREE_CODE (decl) == TYPE_DECL -+ && TYPE_MAIN_VARIANT (TREE_TYPE (decl)) == vectype) -+ vectype = TREE_TYPE (decl); -+ acle_vector_types[0][type] = vectype; -+} -+ -+/* Register the tuple type that contains NUM_VECTORS vectors of type TYPE. */ -+static void -+register_tuple_type (unsigned int num_vectors, vector_type_index type) -+{ -+ tree tuple_type = lang_hooks.types.make_type (RECORD_TYPE); -+ -+ /* The contents of the type are opaque, so we can define them in any -+ way that maps to the correct ABI type. -+ -+ Here we choose to use the same layout as for arm_neon.h, but with -+ "__val" instead of "val": -+ -+ struct svfooxN_t { svfoo_t __val[N]; }; -+ -+ (It wouldn't be possible to write that directly in C or C++ for -+ sizeless types, but that's not a problem for this function.) -+ -+ Using arrays simplifies the handling of svget and svset for variable -+ arguments. */ -+ tree vector_type = acle_vector_types[0][type]; -+ tree array_type = build_array_type_nelts (vector_type, num_vectors); -+ gcc_assert (VECTOR_MODE_P (TYPE_MODE (array_type)) -+ && TYPE_MODE_RAW (array_type) == TYPE_MODE (array_type) -+ && TYPE_ALIGN (array_type) == 128); -+ -+ tree field = build_decl (input_location, FIELD_DECL, -+ get_identifier ("__val"), array_type); -+ DECL_FIELD_CONTEXT (field) = tuple_type; -+ TYPE_FIELDS (tuple_type) = field; -+ layout_type (tuple_type); -+ gcc_assert (VECTOR_MODE_P (TYPE_MODE (tuple_type)) -+ && TYPE_MODE_RAW (tuple_type) == TYPE_MODE (tuple_type) -+ && TYPE_ALIGN (tuple_type) == 128); -+ -+ /* Work out the structure name. */ -+ char buffer[sizeof ("svbfloat16x4_t")]; -+ const char *vector_type_name = vector_types[type].acle_name; -+ snprintf (buffer, sizeof (buffer), "%.*sx%d_t", -+ (int) strlen (vector_type_name) - 2, vector_type_name, -+ num_vectors); -+ -+ tree decl = build_decl (input_location, TYPE_DECL, -+ get_identifier (buffer), tuple_type); -+ TYPE_NAME (tuple_type) = decl; -+ TYPE_STUB_DECL (tuple_type) = decl; -+ lang_hooks.decls.pushdecl (decl); -+ /* ??? Undo the effect of set_underlying_type for C. The C frontend -+ doesn't recognize DECL as a built-in because (as intended) the decl has -+ a real location instead of BUILTINS_LOCATION. The frontend therefore -+ treats the decl like a normal C "typedef struct foo foo;", expecting -+ the type for tag "struct foo" to have a dummy unnamed TYPE_DECL instead -+ of the named one we attached above. It then sets DECL_ORIGINAL_TYPE -+ on the supposedly unnamed decl, creating a circularity that upsets -+ dwarf2out. -+ -+ We don't want to follow the normal C model and create "struct foo" -+ tags for tuple types since (a) the types are supposed to be opaque -+ and (b) they couldn't be defined as a real struct anyway. Treating -+ the TYPE_DECLs as "typedef struct foo foo;" without creating -+ "struct foo" would lead to confusing error messages. */ -+ DECL_ORIGINAL_TYPE (decl) = NULL_TREE; -+ -+ acle_vector_types[num_vectors - 1][type] = tuple_type; -+} -+ -+/* Register the svpattern enum. */ -+static void -+register_svpattern () -+{ -+ auto_vec values; -+#define PUSH(UPPER, LOWER, VALUE) \ -+ values.quick_push (string_int_pair ("SV_" #UPPER, VALUE)); -+ AARCH64_FOR_SVPATTERN (PUSH) -+#undef PUSH -+ -+ acle_svpattern = lang_hooks.types.simulate_enum_decl (input_location, -+ "svpattern", values); -+} -+ -+/* Register the svprfop enum. */ -+static void -+register_svprfop () -+{ -+ auto_vec values; -+#define PUSH(UPPER, LOWER, VALUE) \ -+ values.quick_push (string_int_pair ("SV_" #UPPER, VALUE)); -+ AARCH64_FOR_SVPRFOP (PUSH) -+#undef PUSH -+ -+ acle_svprfop = lang_hooks.types.simulate_enum_decl (input_location, -+ "svprfop", values); -+} -+ -+/* Implement #pragma GCC aarch64 "arm_sve.h". */ -+void -+handle_arm_sve_h () -+{ -+ if (function_table) -+ { -+ error ("duplicate definition of %qs", "arm_sve.h"); -+ return; -+ } -+ -+ sve_switcher sve; -+ -+ /* Define the vector and tuple types. */ -+ for (unsigned int type_i = 0; type_i < NUM_VECTOR_TYPES; ++type_i) -+ { -+ vector_type_index type = vector_type_index (type_i); -+ register_vector_type (type); -+ if (type != VECTOR_TYPE_svbool_t) -+ for (unsigned int count = 2; count <= MAX_TUPLE_SIZE; ++count) -+ register_tuple_type (count, type); -+ } -+ -+ /* Define the enums. */ -+ register_svpattern (); -+ register_svprfop (); -+ -+ /* Define the functions. */ -+ function_table = new hash_table (1023); -+ function_builder builder; -+ for (unsigned int i = 0; i < ARRAY_SIZE (function_groups); ++i) -+ builder.register_function_group (function_groups[i]); -+} -+ -+/* Return the function decl with SVE function subcode CODE, or error_mark_node -+ if no such function exists. */ -+tree -+builtin_decl (unsigned int code, bool) -+{ -+ if (code >= vec_safe_length (registered_functions)) -+ return error_mark_node; -+ return (*registered_functions)[code]->decl; -+} -+ -+/* If we're implementing manual overloading, check whether the SVE -+ function with subcode CODE is overloaded, and if so attempt to -+ determine the corresponding non-overloaded function. The call -+ occurs at location LOCATION and has the arguments given by ARGLIST. -+ -+ If the call is erroneous, report an appropriate error and return -+ error_mark_node. Otherwise, if the function is overloaded, return -+ the decl of the non-overloaded function. Return NULL_TREE otherwise, -+ indicating that the call should be processed in the normal way. */ -+tree -+resolve_overloaded_builtin (location_t location, unsigned int code, -+ vec *arglist) -+{ -+ if (code >= vec_safe_length (registered_functions)) -+ return NULL_TREE; -+ -+ registered_function &rfn = *(*registered_functions)[code]; -+ if (rfn.overloaded_p) -+ return function_resolver (location, rfn.instance, rfn.decl, -+ *arglist).resolve (); -+ return NULL_TREE; -+} -+ -+/* Perform any semantic checks needed for a call to the SVE function -+ with subcode CODE, such as testing for integer constant expressions. -+ The call occurs at location LOCATION and has NARGS arguments, -+ given by ARGS. FNDECL is the original function decl, before -+ overload resolution. -+ -+ Return true if the call is valid, otherwise report a suitable error. */ -+bool -+check_builtin_call (location_t location, vec, unsigned int code, -+ tree fndecl, unsigned int nargs, tree *args) -+{ -+ const registered_function &rfn = *(*registered_functions)[code]; -+ if (!check_required_extensions (location, rfn.decl, rfn.required_extensions)) -+ return false; -+ return function_checker (location, rfn.instance, fndecl, -+ TREE_TYPE (rfn.decl), nargs, args).check (); -+} -+ -+/* Attempt to fold STMT, given that it's a call to the SVE function -+ with subcode CODE. Return the new statement on success and null -+ on failure. Insert any other new statements at GSI. */ -+gimple * -+gimple_fold_builtin (unsigned int code, gimple_stmt_iterator *gsi, gcall *stmt) -+{ -+ registered_function &rfn = *(*registered_functions)[code]; -+ return gimple_folder (rfn.instance, rfn.decl, gsi, stmt).fold (); -+} -+ -+/* Expand a call to the SVE function with subcode CODE. EXP is the call -+ expression and TARGET is the preferred location for the result. -+ Return the value of the lhs. */ -+rtx -+expand_builtin (unsigned int code, tree exp, rtx target) -+{ -+ registered_function &rfn = *(*registered_functions)[code]; -+ if (!check_required_extensions (EXPR_LOCATION (exp), rfn.decl, -+ rfn.required_extensions)) -+ return target; -+ return function_expander (rfn.instance, rfn.decl, exp, target).expand (); -+} -+ -+/* Return true if TYPE is the ABI-defined __SVBool_t type. */ -+bool -+svbool_type_p (const_tree type) -+{ -+ tree abi_type = abi_vector_types[VECTOR_TYPE_svbool_t]; -+ return (type != error_mark_node -+ && TYPE_MAIN_VARIANT (type) == TYPE_MAIN_VARIANT (abi_type)); -+} -+ -+/* If TYPE is a built-in type defined by the SVE ABI, return the mangled name, -+ otherwise return NULL. */ -+const char * -+mangle_builtin_type (const_tree type) -+{ -+ if (type == error_mark_node) -+ return NULL; -+ -+ vector_type_index vtype = find_vector_type (type); -+ if (vtype != NUM_VECTOR_TYPES) -+ return vector_types[vtype].mangled_name; -+ -+ return NULL; -+} -+ -+/* If TYPE is one of the ABI-defined SVE vector types, or an ACLE-defined -+ tuple of them, return the number of vectors it contains. Return 0 -+ otherwise. */ -+unsigned int -+nvectors_if_data_type (const_tree type) -+{ -+ if (type == error_mark_node) -+ return 0; -+ -+ type = TYPE_MAIN_VARIANT (type); -+ if (VECTOR_TYPE_P (type)) -+ { -+ vector_type_index type_id = find_vector_type (type); -+ if (type_id != VECTOR_TYPE_svbool_t && type_id != NUM_VECTOR_TYPES) -+ return 1; -+ } -+ else if (TREE_CODE (type) == RECORD_TYPE) -+ { -+ for (unsigned int size_i = 1; size_i < MAX_TUPLE_SIZE; ++size_i) -+ for (unsigned int type_i = 0; type_i < NUM_VECTOR_TYPES; ++type_i) -+ { -+ tree tuple_type = acle_vector_types[size_i][type_i]; -+ if (tuple_type && type == TYPE_MAIN_VARIANT (tuple_type)) -+ return size_i + 1; -+ } -+ } -+ -+ return 0; -+} -+ -+/* Return true if TYPE is a built-in type defined by the SVE ABI. */ -+bool -+builtin_type_p (const_tree type) -+{ -+ return svbool_type_p (type) || nvectors_if_data_type (type) > 0; -+} -+ -+} -+ -+using namespace aarch64_sve; -+ -+inline void -+gt_ggc_mx (function_instance *) -+{ -+} -+ -+inline void -+gt_pch_nx (function_instance *) -+{ -+} -+ -+inline void -+gt_pch_nx (function_instance *, void (*) (void *, void *), void *) -+{ -+} -+ -+#include "gt-aarch64-sve-builtins.h" -diff --git a/gcc/config/aarch64/aarch64-sve-builtins.def b/gcc/config/aarch64/aarch64-sve-builtins.def -new file mode 100644 -index 000000000..83fba0d41 ---- /dev/null -+++ b/gcc/config/aarch64/aarch64-sve-builtins.def -@@ -0,0 +1,100 @@ -+/* Builtin lists for AArch64 SVE -+ Copyright (C) 2018-2019 Free Software Foundation, Inc. -+ -+ This file is part of GCC. -+ -+ GCC is free software; you can redistribute it and/or modify it -+ under the terms of the GNU General Public License as published by -+ the Free Software Foundation; either version 3, or (at your option) -+ any later version. -+ -+ GCC is distributed in the hope that it will be useful, but -+ WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ General Public License for more details. -+ -+ You should have received a copy of the GNU General Public License -+ along with GCC; see the file COPYING3. If not see -+ . */ -+ -+#ifndef DEF_SVE_MODE -+#define DEF_SVE_MODE(A, B, C, D) -+#endif -+ -+#ifndef DEF_SVE_TYPE -+#define DEF_SVE_TYPE(A, B, C, D) -+#endif -+ -+#ifndef DEF_SVE_TYPE_SUFFIX -+#define DEF_SVE_TYPE_SUFFIX(A, B, C, D, E) -+#endif -+ -+#ifndef DEF_SVE_FUNCTION -+#define DEF_SVE_FUNCTION(A, B, C, D) -+#endif -+ -+DEF_SVE_MODE (n, none, none, none) -+DEF_SVE_MODE (index, none, none, elements) -+DEF_SVE_MODE (offset, none, none, bytes) -+DEF_SVE_MODE (s32index, none, svint32_t, elements) -+DEF_SVE_MODE (s32offset, none, svint32_t, bytes) -+DEF_SVE_MODE (s64index, none, svint64_t, elements) -+DEF_SVE_MODE (s64offset, none, svint64_t, bytes) -+DEF_SVE_MODE (u32base, svuint32_t, none, none) -+DEF_SVE_MODE (u32base_index, svuint32_t, none, elements) -+DEF_SVE_MODE (u32base_offset, svuint32_t, none, bytes) -+DEF_SVE_MODE (u32base_s32index, svuint32_t, svint32_t, elements) -+DEF_SVE_MODE (u32base_s32offset, svuint32_t, svint32_t, bytes) -+DEF_SVE_MODE (u32base_u32index, svuint32_t, svuint32_t, elements) -+DEF_SVE_MODE (u32base_u32offset, svuint32_t, svuint32_t, bytes) -+DEF_SVE_MODE (u32index, none, svuint32_t, elements) -+DEF_SVE_MODE (u32offset, none, svuint32_t, bytes) -+DEF_SVE_MODE (u64base, svuint64_t, none, none) -+DEF_SVE_MODE (u64base_index, svuint64_t, none, elements) -+DEF_SVE_MODE (u64base_offset, svuint64_t, none, bytes) -+DEF_SVE_MODE (u64base_s64index, svuint64_t, svint64_t, elements) -+DEF_SVE_MODE (u64base_s64offset, svuint64_t, svint64_t, bytes) -+DEF_SVE_MODE (u64base_u64index, svuint64_t, svuint64_t, elements) -+DEF_SVE_MODE (u64base_u64offset, svuint64_t, svuint64_t, bytes) -+DEF_SVE_MODE (u64index, none, svuint64_t, elements) -+DEF_SVE_MODE (u64offset, none, svuint64_t, bytes) -+DEF_SVE_MODE (vnum, none, none, vectors) -+ -+DEF_SVE_TYPE (svbool_t, 10, __SVBool_t, boolean_type_node) -+DEF_SVE_TYPE (svbfloat16_t, 14, __SVBfloat16_t, aarch64_bf16_type_node) -+DEF_SVE_TYPE (svfloat16_t, 13, __SVFloat16_t, aarch64_fp16_type_node) -+DEF_SVE_TYPE (svfloat32_t, 13, __SVFloat32_t, float_type_node) -+DEF_SVE_TYPE (svfloat64_t, 13, __SVFloat64_t, double_type_node) -+DEF_SVE_TYPE (svint8_t, 10, __SVInt8_t, intQI_type_node) -+DEF_SVE_TYPE (svint16_t, 11, __SVInt16_t, intHI_type_node) -+DEF_SVE_TYPE (svint32_t, 11, __SVInt32_t, intSI_type_node) -+DEF_SVE_TYPE (svint64_t, 11, __SVInt64_t, intDI_type_node) -+DEF_SVE_TYPE (svuint8_t, 11, __SVUint8_t, unsigned_intQI_type_node) -+DEF_SVE_TYPE (svuint16_t, 12, __SVUint16_t, unsigned_intHI_type_node) -+DEF_SVE_TYPE (svuint32_t, 12, __SVUint32_t, unsigned_intSI_type_node) -+DEF_SVE_TYPE (svuint64_t, 12, __SVUint64_t, unsigned_intDI_type_node) -+ -+DEF_SVE_TYPE_SUFFIX (b, svbool_t, bool, 8, VNx16BImode) -+DEF_SVE_TYPE_SUFFIX (b8, svbool_t, bool, 8, VNx16BImode) -+DEF_SVE_TYPE_SUFFIX (b16, svbool_t, bool, 16, VNx8BImode) -+DEF_SVE_TYPE_SUFFIX (b32, svbool_t, bool, 32, VNx4BImode) -+DEF_SVE_TYPE_SUFFIX (b64, svbool_t, bool, 64, VNx2BImode) -+DEF_SVE_TYPE_SUFFIX (bf16, svbfloat16_t, bfloat, 16, VNx8BFmode) -+DEF_SVE_TYPE_SUFFIX (f16, svfloat16_t, float, 16, VNx8HFmode) -+DEF_SVE_TYPE_SUFFIX (f32, svfloat32_t, float, 32, VNx4SFmode) -+DEF_SVE_TYPE_SUFFIX (f64, svfloat64_t, float, 64, VNx2DFmode) -+DEF_SVE_TYPE_SUFFIX (s8, svint8_t, signed, 8, VNx16QImode) -+DEF_SVE_TYPE_SUFFIX (s16, svint16_t, signed, 16, VNx8HImode) -+DEF_SVE_TYPE_SUFFIX (s32, svint32_t, signed, 32, VNx4SImode) -+DEF_SVE_TYPE_SUFFIX (s64, svint64_t, signed, 64, VNx2DImode) -+DEF_SVE_TYPE_SUFFIX (u8, svuint8_t, unsigned, 8, VNx16QImode) -+DEF_SVE_TYPE_SUFFIX (u16, svuint16_t, unsigned, 16, VNx8HImode) -+DEF_SVE_TYPE_SUFFIX (u32, svuint32_t, unsigned, 32, VNx4SImode) -+DEF_SVE_TYPE_SUFFIX (u64, svuint64_t, unsigned, 64, VNx2DImode) -+ -+#include "aarch64-sve-builtins-base.def" -+ -+#undef DEF_SVE_FUNCTION -+#undef DEF_SVE_TYPE_SUFFIX -+#undef DEF_SVE_TYPE -+#undef DEF_SVE_MODE -diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h -new file mode 100644 -index 000000000..d1aa612b9 ---- /dev/null -+++ b/gcc/config/aarch64/aarch64-sve-builtins.h -@@ -0,0 +1,878 @@ -+/* ACLE support for AArch64 SVE -+ Copyright (C) 2018-2019 Free Software Foundation, Inc. -+ -+ This file is part of GCC. -+ -+ GCC is free software; you can redistribute it and/or modify it -+ under the terms of the GNU General Public License as published by -+ the Free Software Foundation; either version 3, or (at your option) -+ any later version. -+ -+ GCC is distributed in the hope that it will be useful, but -+ WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ General Public License for more details. -+ -+ You should have received a copy of the GNU General Public License -+ along with GCC; see the file COPYING3. If not see -+ . */ -+ -+#ifndef GCC_AARCH64_SVE_BUILTINS_H -+#define GCC_AARCH64_SVE_BUILTINS_H -+ -+/* The full name of an SVE ACLE function is the concatenation of: -+ -+ - the base name ("svadd", etc.) -+ - the "mode" suffix ("_n", "_index", etc.) -+ - the type suffixes ("_s32", "_b8", etc.) -+ - the predication suffix ("_x", "_z", etc.) -+ -+ Each piece of information is individually useful, so we retain this -+ classification throughout: -+ -+ - function_base represents the base name -+ -+ - mode_suffix_index represents the mode suffix -+ -+ - type_suffix_index represents individual type suffixes, while -+ type_suffix_pair represents a pair of them -+ -+ - prediction_index extends the predication suffix with an additional -+ alternative: PRED_implicit for implicitly-predicated operations -+ -+ In addition to its unique full name, a function may have a shorter -+ overloaded alias. This alias removes pieces of the suffixes that -+ can be inferred from the arguments, such as by shortening the mode -+ suffix or dropping some of the type suffixes. The base name and the -+ predication suffix stay the same. -+ -+ The function_shape class describes what arguments a given function -+ takes and what its overloaded alias is called. In broad terms, -+ function_base describes how the underlying instruction behaves while -+ function_shape describes how that instruction has been presented at -+ the language level. -+ -+ The static list of functions uses function_group to describe a group -+ of related functions. The function_builder class is responsible for -+ expanding this static description into a list of individual functions -+ and registering the associated built-in functions. function_instance -+ describes one of these individual functions in terms of the properties -+ described above. -+ -+ The classes involved in compiling a function call are: -+ -+ - function_resolver, which resolves an overloaded function call to a -+ specific function_instance and its associated function decl -+ -+ - function_checker, which checks whether the values of the arguments -+ conform to the ACLE specification -+ -+ - gimple_folder, which tries to fold a function call at the gimple level -+ -+ - function_expander, which expands a function call into rtl instructions -+ -+ function_resolver and function_checker operate at the language level -+ and so are associated with the function_shape. gimple_folder and -+ function_expander are concerned with the behavior of the function -+ and so are associated with the function_base. -+ -+ Note that we've specifically chosen not to fold calls in the frontend, -+ since SVE intrinsics will hardly ever fold a useful language-level -+ constant. */ -+namespace aarch64_sve -+{ -+/* The maximum number of vectors in an ACLE tuple type. */ -+const unsigned int MAX_TUPLE_SIZE = 4; -+ -+/* Used to represent the default merge argument index for _m functions. -+ The actual index depends on how many arguments the function takes. */ -+const unsigned int DEFAULT_MERGE_ARGNO = ~0U; -+ -+/* Flags that describe what a function might do, in addition to reading -+ its arguments and returning a result. */ -+const unsigned int CP_READ_FPCR = 1U << 0; -+const unsigned int CP_RAISE_FP_EXCEPTIONS = 1U << 1; -+const unsigned int CP_READ_MEMORY = 1U << 2; -+const unsigned int CP_PREFETCH_MEMORY = 1U << 3; -+const unsigned int CP_WRITE_MEMORY = 1U << 4; -+const unsigned int CP_READ_FFR = 1U << 5; -+const unsigned int CP_WRITE_FFR = 1U << 6; -+ -+/* Enumerates the SVE predicate and (data) vector types, together called -+ "vector types" for brevity. */ -+enum vector_type_index -+{ -+#define DEF_SVE_TYPE(ACLE_NAME, NCHARS, ABI_NAME, SCALAR_TYPE) \ -+ VECTOR_TYPE_ ## ACLE_NAME, -+#include "aarch64-sve-builtins.def" -+ NUM_VECTOR_TYPES -+}; -+ -+/* Classifies the available measurement units for an address displacement. */ -+enum units_index -+{ -+ UNITS_none, -+ UNITS_bytes, -+ UNITS_elements, -+ UNITS_vectors -+}; -+ -+/* Describes the various uses of a governing predicate. */ -+enum predication_index -+{ -+ /* No governing predicate is present. */ -+ PRED_none, -+ -+ /* A governing predicate is present but there is no predication suffix -+ associated with it. This is used when the result is neither a vector -+ nor a predicate, since the distinction between "zeroing" and "merging" -+ doesn't apply in that case. It is also used when a suffix would be -+ redundant (such as for loads and comparisons, which are inherently -+ zeroing operations). */ -+ PRED_implicit, -+ -+ /* Merging predication: copy inactive lanes from the first data argument -+ to the vector result. */ -+ PRED_m, -+ -+ /* "Don't care" predication: set inactive lanes of the vector result -+ to arbitrary values. */ -+ PRED_x, -+ -+ /* Zero predication: set inactive lanes of the vector result to zero. */ -+ PRED_z, -+ -+ NUM_PREDS -+}; -+ -+/* Classifies element types, based on type suffixes with the bit count -+ removed. */ -+enum type_class_index -+{ -+ TYPE_bool, -+ TYPE_bfloat, -+ TYPE_float, -+ TYPE_signed, -+ TYPE_unsigned, -+ NUM_TYPE_CLASSES -+}; -+ -+/* Classifies an operation into "modes"; for example, to distinguish -+ vector-scalar operations from vector-vector operations, or to -+ distinguish between different addressing modes. This classification -+ accounts for the function suffixes that occur between the base name -+ and the first type suffix. */ -+enum mode_suffix_index -+{ -+#define DEF_SVE_MODE(NAME, BASE, DISPLACEMENT, UNITS) MODE_##NAME, -+#include "aarch64-sve-builtins.def" -+ MODE_none -+}; -+ -+/* Enumerates the possible type suffixes. Each suffix is associated with -+ a vector type, but for predicates provides extra information about the -+ element size. */ -+enum type_suffix_index -+{ -+#define DEF_SVE_TYPE_SUFFIX(NAME, ACLE_TYPE, CLASS, BITS, MODE) \ -+ TYPE_SUFFIX_ ## NAME, -+#include "aarch64-sve-builtins.def" -+ NUM_TYPE_SUFFIXES -+}; -+ -+/* Combines two type suffixes. */ -+typedef enum type_suffix_index type_suffix_pair[2]; -+ -+class function_base; -+class function_shape; -+ -+/* Static information about a mode suffix. */ -+struct mode_suffix_info -+{ -+ /* The suffix string itself. */ -+ const char *string; -+ -+ /* The type of the vector base address, or NUM_VECTOR_TYPES if the -+ mode does not include a vector base address. */ -+ vector_type_index base_vector_type; -+ -+ /* The type of the vector displacement, or NUM_VECTOR_TYPES if the -+ mode does not include a vector displacement. (Note that scalar -+ displacements are always int64_t.) */ -+ vector_type_index displacement_vector_type; -+ -+ /* The units in which the vector or scalar displacement is measured, -+ or UNITS_none if the mode doesn't take a displacement. */ -+ units_index displacement_units; -+}; -+ -+/* Static information about a type suffix. */ -+struct type_suffix_info -+{ -+ /* The suffix string itself. */ -+ const char *string; -+ -+ /* The associated ACLE vector or predicate type. */ -+ vector_type_index vector_type : 8; -+ -+ /* What kind of type the suffix represents. */ -+ type_class_index tclass : 8; -+ -+ /* The number of bits and bytes in an element. For predicates this -+ measures the associated data elements. */ -+ unsigned int element_bits : 8; -+ unsigned int element_bytes : 8; -+ -+ /* True if the suffix is for an integer type. */ -+ unsigned int integer_p : 1; -+ /* True if the suffix is for an unsigned type. */ -+ unsigned int unsigned_p : 1; -+ /* True if the suffix is for a floating-point type. */ -+ unsigned int float_p : 1; -+ /* True if the suffix is for a boolean type. */ -+ unsigned int bool_p : 1; -+ unsigned int spare : 12; -+ -+ /* The associated vector or predicate mode. */ -+ machine_mode vector_mode : 16; -+}; -+ -+/* Static information about a set of functions. */ -+struct function_group_info -+{ -+ /* The base name, as a string. */ -+ const char *base_name; -+ -+ /* Describes the behavior associated with the function base name. */ -+ const function_base *const *base; -+ -+ /* The shape of the functions, as described above the class definition. -+ It's possible to have entries with the same base name but different -+ shapes. */ -+ const function_shape *const *shape; -+ -+ /* A list of the available type suffixes, and of the available predication -+ types. The function supports every combination of the two. -+ -+ The list of type suffixes is terminated by two NUM_TYPE_SUFFIXES -+ while the list of predication types is terminated by NUM_PREDS. -+ The list of type suffixes is lexicographically ordered based -+ on the index value. */ -+ const type_suffix_pair *types; -+ const predication_index *preds; -+ -+ /* The architecture extensions that the functions require, as a set of -+ AARCH64_FL_* flags. */ -+ uint64_t required_extensions; -+}; -+ -+/* Describes a single fully-resolved function (i.e. one that has a -+ unique full name). */ -+class GTY((user)) function_instance -+{ -+public: -+ function_instance (const char *, const function_base *, -+ const function_shape *, mode_suffix_index, -+ const type_suffix_pair &, predication_index); -+ -+ bool operator== (const function_instance &) const; -+ bool operator!= (const function_instance &) const; -+ hashval_t hash () const; -+ -+ unsigned int call_properties () const; -+ bool reads_global_state_p () const; -+ bool modifies_global_state_p () const; -+ bool could_trap_p () const; -+ -+ unsigned int vectors_per_tuple () const; -+ tree memory_scalar_type () const; -+ machine_mode memory_vector_mode () const; -+ -+ const mode_suffix_info &mode_suffix () const; -+ tree base_vector_type () const; -+ tree displacement_vector_type () const; -+ units_index displacement_units () const; -+ -+ const type_suffix_info &type_suffix (unsigned int) const; -+ tree scalar_type (unsigned int) const; -+ tree vector_type (unsigned int) const; -+ tree tuple_type (unsigned int) const; -+ unsigned int elements_per_vq (unsigned int i) const; -+ machine_mode vector_mode (unsigned int) const; -+ machine_mode gp_mode (unsigned int) const; -+ -+ /* The properties of the function. (The explicit "enum"s are required -+ for gengtype.) */ -+ const char *base_name; -+ const function_base *base; -+ const function_shape *shape; -+ enum mode_suffix_index mode_suffix_id; -+ type_suffix_pair type_suffix_ids; -+ enum predication_index pred; -+}; -+ -+class registered_function; -+ -+/* A class for building and registering function decls. */ -+class function_builder -+{ -+public: -+ function_builder (); -+ ~function_builder (); -+ -+ void add_unique_function (const function_instance &, tree, -+ vec &, uint64_t, bool); -+ void add_overloaded_function (const function_instance &, uint64_t); -+ void add_overloaded_functions (const function_group_info &, -+ mode_suffix_index); -+ -+ void register_function_group (const function_group_info &); -+ -+private: -+ void append_name (const char *); -+ char *finish_name (); -+ -+ char *get_name (const function_instance &, bool); -+ -+ tree get_attributes (const function_instance &); -+ -+ registered_function &add_function (const function_instance &, -+ const char *, tree, tree, uint64_t, bool); -+ -+ /* The function type to use for functions that are resolved by -+ function_resolver. */ -+ tree m_overload_type; -+ -+ /* True if we should create a separate decl for each instance of an -+ overloaded function, instead of using function_resolver. */ -+ bool m_direct_overloads; -+ -+ /* Used for building up function names. */ -+ obstack m_string_obstack; -+ -+ /* Maps all overloaded function names that we've registered so far -+ to their associated function_instances. */ -+ hash_map m_overload_names; -+}; -+ -+/* A base class for handling calls to built-in functions. */ -+class function_call_info : public function_instance -+{ -+public: -+ function_call_info (location_t, const function_instance &, tree); -+ -+ bool function_returns_void_p (); -+ -+ /* The location of the call. */ -+ location_t location; -+ -+ /* The FUNCTION_DECL that is being called. */ -+ tree fndecl; -+}; -+ -+/* A class for resolving an overloaded function call. */ -+class function_resolver : public function_call_info -+{ -+public: -+ enum { SAME_SIZE = 256, HALF_SIZE, QUARTER_SIZE }; -+ static const type_class_index SAME_TYPE_CLASS = NUM_TYPE_CLASSES; -+ -+ function_resolver (location_t, const function_instance &, tree, -+ vec &); -+ -+ tree get_vector_type (type_suffix_index); -+ const char *get_scalar_type_name (type_suffix_index); -+ tree get_argument_type (unsigned int); -+ bool scalar_argument_p (unsigned int); -+ -+ tree report_no_such_form (type_suffix_index); -+ tree lookup_form (mode_suffix_index, -+ type_suffix_index = NUM_TYPE_SUFFIXES, -+ type_suffix_index = NUM_TYPE_SUFFIXES); -+ tree resolve_to (mode_suffix_index, -+ type_suffix_index = NUM_TYPE_SUFFIXES, -+ type_suffix_index = NUM_TYPE_SUFFIXES); -+ -+ type_suffix_index infer_integer_scalar_type (unsigned int); -+ type_suffix_index infer_pointer_type (unsigned int, bool = false); -+ type_suffix_index infer_vector_or_tuple_type (unsigned int, unsigned int); -+ type_suffix_index infer_vector_type (unsigned int); -+ type_suffix_index infer_integer_vector_type (unsigned int); -+ type_suffix_index infer_unsigned_vector_type (unsigned int); -+ type_suffix_index infer_sd_vector_type (unsigned int); -+ type_suffix_index infer_tuple_type (unsigned int); -+ -+ bool require_vector_or_scalar_type (unsigned int); -+ -+ bool require_vector_type (unsigned int, vector_type_index); -+ bool require_matching_vector_type (unsigned int, type_suffix_index); -+ bool require_derived_vector_type (unsigned int, unsigned int, -+ type_suffix_index, -+ type_class_index = SAME_TYPE_CLASS, -+ unsigned int = SAME_SIZE); -+ -+ bool require_scalar_type (unsigned int, const char *); -+ bool require_pointer_type (unsigned int); -+ bool require_matching_integer_scalar_type (unsigned int, unsigned int, -+ type_suffix_index); -+ bool require_derived_scalar_type (unsigned int, type_class_index, -+ unsigned int = SAME_SIZE); -+ bool require_matching_pointer_type (unsigned int, unsigned int, -+ type_suffix_index); -+ bool require_integer_immediate (unsigned int); -+ -+ vector_type_index infer_vector_base_type (unsigned int); -+ vector_type_index infer_vector_displacement_type (unsigned int); -+ -+ mode_suffix_index resolve_sv_displacement (unsigned int, -+ type_suffix_index, bool); -+ mode_suffix_index resolve_gather_address (unsigned int, -+ type_suffix_index, bool); -+ mode_suffix_index resolve_adr_address (unsigned int); -+ -+ bool check_num_arguments (unsigned int); -+ bool check_gp_argument (unsigned int, unsigned int &, unsigned int &); -+ tree resolve_unary (type_class_index = SAME_TYPE_CLASS, -+ unsigned int = SAME_SIZE, bool = false); -+ tree resolve_uniform (unsigned int, unsigned int = 0); -+ tree resolve_uniform_opt_n (unsigned int); -+ tree finish_opt_n_resolution (unsigned int, unsigned int, type_suffix_index, -+ type_class_index = SAME_TYPE_CLASS, -+ unsigned int = SAME_SIZE, -+ type_suffix_index = NUM_TYPE_SUFFIXES); -+ -+ tree resolve (); -+ -+private: -+ /* The arguments to the overloaded function. */ -+ vec &m_arglist; -+}; -+ -+/* A class for checking that the semantic constraints on a function call are -+ satisfied, such as arguments being integer constant expressions with -+ a particular range. The parent class's FNDECL is the decl that was -+ called in the original source, before overload resolution. */ -+class function_checker : public function_call_info -+{ -+public: -+ function_checker (location_t, const function_instance &, tree, -+ tree, unsigned int, tree *); -+ -+ bool require_immediate_either_or (unsigned int, HOST_WIDE_INT, -+ HOST_WIDE_INT); -+ bool require_immediate_enum (unsigned int, tree); -+ bool require_immediate_lane_index (unsigned int, unsigned int = 1); -+ bool require_immediate_one_of (unsigned int, HOST_WIDE_INT, HOST_WIDE_INT, -+ HOST_WIDE_INT, HOST_WIDE_INT); -+ bool require_immediate_range (unsigned int, HOST_WIDE_INT, HOST_WIDE_INT); -+ -+ bool check (); -+ -+private: -+ bool argument_exists_p (unsigned int); -+ -+ bool require_immediate (unsigned int, HOST_WIDE_INT &); -+ -+ /* The type of the resolved function. */ -+ tree m_fntype; -+ -+ /* The arguments to the function. */ -+ unsigned int m_nargs; -+ tree *m_args; -+ -+ /* The first argument not associated with the function's predication -+ type. */ -+ unsigned int m_base_arg; -+}; -+ -+/* A class for folding a gimple function call. */ -+class gimple_folder : public function_call_info -+{ -+public: -+ gimple_folder (const function_instance &, tree, -+ gimple_stmt_iterator *, gcall *); -+ -+ tree convert_pred (gimple_seq &, tree, unsigned int); -+ tree fold_contiguous_base (gimple_seq &, tree); -+ tree load_store_cookie (tree); -+ -+ gimple *redirect_call (const function_instance &); -+ gimple *fold_to_pfalse (); -+ gimple *fold_to_ptrue (); -+ gimple *fold_to_vl_pred (unsigned int); -+ -+ gimple *fold (); -+ -+ /* Where to insert extra statements that feed the final replacement. */ -+ gimple_stmt_iterator *gsi; -+ -+ /* The call we're folding. */ -+ gcall *call; -+ -+ /* The result of the call, or null if none. */ -+ tree lhs; -+}; -+ -+/* A class for expanding a function call into RTL. */ -+class function_expander : public function_call_info -+{ -+public: -+ function_expander (const function_instance &, tree, tree, rtx); -+ rtx expand (); -+ -+ insn_code direct_optab_handler (optab, unsigned int = 0); -+ insn_code direct_optab_handler_for_sign (optab, optab, unsigned int = 0, -+ machine_mode = E_VOIDmode); -+ -+ bool overlaps_input_p (rtx); -+ -+ rtx get_contiguous_base (machine_mode); -+ rtx get_fallback_value (machine_mode, unsigned int, -+ unsigned int, unsigned int &); -+ rtx get_reg_target (); -+ rtx get_nonoverlapping_reg_target (); -+ -+ void add_output_operand (insn_code); -+ void add_input_operand (insn_code, rtx); -+ void add_integer_operand (HOST_WIDE_INT); -+ void add_mem_operand (machine_mode, rtx); -+ void add_address_operand (rtx); -+ void add_fixed_operand (rtx); -+ rtx generate_insn (insn_code); -+ -+ void prepare_gather_address_operands (unsigned int, bool = true); -+ void prepare_prefetch_operands (); -+ void add_ptrue_hint (unsigned int, machine_mode); -+ void rotate_inputs_left (unsigned int, unsigned int); -+ bool try_negating_argument (unsigned int, machine_mode); -+ -+ rtx use_exact_insn (insn_code); -+ rtx use_unpred_insn (insn_code); -+ rtx use_pred_x_insn (insn_code); -+ rtx use_cond_insn (insn_code, unsigned int = DEFAULT_MERGE_ARGNO); -+ rtx use_vcond_mask_insn (insn_code, unsigned int = DEFAULT_MERGE_ARGNO); -+ rtx use_contiguous_load_insn (insn_code); -+ rtx use_contiguous_prefetch_insn (insn_code); -+ rtx use_contiguous_store_insn (insn_code); -+ -+ rtx map_to_rtx_codes (rtx_code, rtx_code, int, -+ unsigned int = DEFAULT_MERGE_ARGNO); -+ rtx map_to_unspecs (int, int, int, unsigned int = DEFAULT_MERGE_ARGNO); -+ rtx expand_signed_unpred_op (rtx_code, rtx_code); -+ -+ /* The function call expression. */ -+ tree call_expr; -+ -+ /* For functions that return a value, this is the preferred location -+ of that value. It could be null or could have a different mode -+ from the function return type. */ -+ rtx possible_target; -+ -+ /* The expanded arguments. */ -+ auto_vec args; -+ -+private: -+ /* Used to build up the operands to an instruction. */ -+ auto_vec m_ops; -+}; -+ -+/* Provides information about a particular function base name, and handles -+ tasks related to the base name. */ -+class function_base -+{ -+public: -+ /* Return a set of CP_* flags that describe what the function might do, -+ in addition to reading its arguments and returning a result. */ -+ virtual unsigned int call_properties (const function_instance &) const; -+ -+ /* If the function operates on tuples of vectors, return the number -+ of vectors in the tuples, otherwise return 1. */ -+ virtual unsigned int vectors_per_tuple () const { return 1; } -+ -+ /* If the function addresses memory, return the type of a single -+ scalar memory element. */ -+ virtual tree -+ memory_scalar_type (const function_instance &) const -+ { -+ gcc_unreachable (); -+ } -+ -+ /* If the function addresses memory, return a vector mode whose -+ GET_MODE_NUNITS is the number of elements addressed and whose -+ GET_MODE_INNER is the mode of a single scalar memory element. */ -+ virtual machine_mode -+ memory_vector_mode (const function_instance &) const -+ { -+ gcc_unreachable (); -+ } -+ -+ /* Try to fold the given gimple call. Return the new gimple statement -+ on success, otherwise return null. */ -+ virtual gimple *fold (gimple_folder &) const { return NULL; } -+ -+ /* Expand the given call into rtl. Return the result of the function, -+ or an arbitrary value if the function doesn't return a result. */ -+ virtual rtx expand (function_expander &) const = 0; -+}; -+ -+/* Classifies functions into "shapes". The idea is to take all the -+ type signatures for a set of functions, remove the governing predicate -+ (if any), and classify what's left based on: -+ -+ - the number of arguments -+ -+ - the process of determining the types in the signature from the mode -+ and type suffixes in the function name (including types that are not -+ affected by the suffixes) -+ -+ - which arguments must be integer constant expressions, and what range -+ those arguments have -+ -+ - the process for mapping overloaded names to "full" names. */ -+class function_shape -+{ -+public: -+ virtual bool explicit_type_suffix_p (unsigned int) const = 0; -+ -+ /* Define all functions associated with the given group. */ -+ virtual void build (function_builder &, -+ const function_group_info &) const = 0; -+ -+ /* Try to resolve the overloaded call. Return the non-overloaded -+ function decl on success and error_mark_node on failure. */ -+ virtual tree resolve (function_resolver &) const = 0; -+ -+ /* Check whether the given call is semantically valid. Return true -+ if it is, otherwise report an error and return false. */ -+ virtual bool check (function_checker &) const { return true; } -+}; -+ -+/* RAII class for enabling enough SVE features to define the built-in -+ types and implement the arm_sve.h pragma. */ -+class sve_switcher -+{ -+public: -+ sve_switcher (); -+ ~sve_switcher (); -+ -+private: -+ unsigned long m_old_isa_flags; -+ bool m_old_have_regs_of_mode[MAX_MACHINE_MODE]; -+}; -+ -+extern const type_suffix_info type_suffixes[NUM_TYPE_SUFFIXES + 1]; -+extern const mode_suffix_info mode_suffixes[MODE_none + 1]; -+ -+extern tree scalar_types[NUM_VECTOR_TYPES]; -+extern tree acle_vector_types[MAX_TUPLE_SIZE][NUM_VECTOR_TYPES + 1]; -+extern tree acle_svpattern; -+extern tree acle_svprfop; -+ -+/* Return the ACLE type svbool_t. */ -+inline tree -+get_svbool_t (void) -+{ -+ return acle_vector_types[0][VECTOR_TYPE_svbool_t]; -+} -+ -+/* Try to find a mode with the given mode_suffix_info fields. Return the -+ mode on success or MODE_none on failure. */ -+inline mode_suffix_index -+find_mode_suffix (vector_type_index base_vector_type, -+ vector_type_index displacement_vector_type, -+ units_index displacement_units) -+{ -+ for (unsigned int mode_i = 0; mode_i < ARRAY_SIZE (mode_suffixes); ++mode_i) -+ { -+ const mode_suffix_info &mode = mode_suffixes[mode_i]; -+ if (mode.base_vector_type == base_vector_type -+ && mode.displacement_vector_type == displacement_vector_type -+ && mode.displacement_units == displacement_units) -+ return mode_suffix_index (mode_i); -+ } -+ return MODE_none; -+} -+ -+/* Return the type suffix associated with ELEMENT_BITS-bit elements of type -+ class TCLASS. */ -+inline type_suffix_index -+find_type_suffix (type_class_index tclass, unsigned int element_bits) -+{ -+ for (unsigned int i = 0; i < NUM_TYPE_SUFFIXES; ++i) -+ if (type_suffixes[i].tclass == tclass -+ && type_suffixes[i].element_bits == element_bits) -+ return type_suffix_index (i); -+ gcc_unreachable (); -+} -+ -+/* Return the single field in tuple type TYPE. */ -+inline tree -+tuple_type_field (tree type) -+{ -+ for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) -+ if (TREE_CODE (field) == FIELD_DECL) -+ return field; -+ gcc_unreachable (); -+} -+ -+inline function_instance:: -+function_instance (const char *base_name_in, -+ const function_base *base_in, -+ const function_shape *shape_in, -+ mode_suffix_index mode_suffix_id_in, -+ const type_suffix_pair &type_suffix_ids_in, -+ predication_index pred_in) -+ : base_name (base_name_in), base (base_in), shape (shape_in), -+ mode_suffix_id (mode_suffix_id_in), pred (pred_in) -+{ -+ memcpy (type_suffix_ids, type_suffix_ids_in, sizeof (type_suffix_ids)); -+} -+ -+inline bool -+function_instance::operator== (const function_instance &other) const -+{ -+ return (base == other.base -+ && shape == other.shape -+ && mode_suffix_id == other.mode_suffix_id -+ && pred == other.pred -+ && type_suffix_ids[0] == other.type_suffix_ids[0] -+ && type_suffix_ids[1] == other.type_suffix_ids[1]); -+} -+ -+inline bool -+function_instance::operator!= (const function_instance &other) const -+{ -+ return !operator== (other); -+} -+ -+/* If the function operates on tuples of vectors, return the number -+ of vectors in the tuples, otherwise return 1. */ -+inline unsigned int -+function_instance::vectors_per_tuple () const -+{ -+ return base->vectors_per_tuple (); -+} -+ -+/* If the function addresses memory, return the type of a single -+ scalar memory element. */ -+inline tree -+function_instance::memory_scalar_type () const -+{ -+ return base->memory_scalar_type (*this); -+} -+ -+/* If the function addresses memory, return a vector mode whose -+ GET_MODE_NUNITS is the number of elements addressed and whose -+ GET_MODE_INNER is the mode of a single scalar memory element. */ -+inline machine_mode -+function_instance::memory_vector_mode () const -+{ -+ return base->memory_vector_mode (*this); -+} -+ -+/* Return information about the function's mode suffix. */ -+inline const mode_suffix_info & -+function_instance::mode_suffix () const -+{ -+ return mode_suffixes[mode_suffix_id]; -+} -+ -+/* Return the type of the function's vector base address argument, -+ or null it doesn't have a vector base address. */ -+inline tree -+function_instance::base_vector_type () const -+{ -+ return acle_vector_types[0][mode_suffix ().base_vector_type]; -+} -+ -+/* Return the type of the function's vector index or offset argument, -+ or null if doesn't have a vector index or offset argument. */ -+inline tree -+function_instance::displacement_vector_type () const -+{ -+ return acle_vector_types[0][mode_suffix ().displacement_vector_type]; -+} -+ -+/* If the function takes a vector or scalar displacement, return the units -+ in which the displacement is measured, otherwise return UNITS_none. */ -+inline units_index -+function_instance::displacement_units () const -+{ -+ return mode_suffix ().displacement_units; -+} -+ -+/* Return information about type suffix I. */ -+inline const type_suffix_info & -+function_instance::type_suffix (unsigned int i) const -+{ -+ return type_suffixes[type_suffix_ids[i]]; -+} -+ -+/* Return the scalar type associated with type suffix I. */ -+inline tree -+function_instance::scalar_type (unsigned int i) const -+{ -+ return scalar_types[type_suffix (i).vector_type]; -+} -+ -+/* Return the vector type associated with type suffix I. */ -+inline tree -+function_instance::vector_type (unsigned int i) const -+{ -+ return acle_vector_types[0][type_suffix (i).vector_type]; -+} -+ -+/* If the function operates on tuples of vectors, return the tuple type -+ associated with type suffix I, otherwise return the vector type associated -+ with type suffix I. */ -+inline tree -+function_instance::tuple_type (unsigned int i) const -+{ -+ unsigned int num_vectors = vectors_per_tuple (); -+ return acle_vector_types[num_vectors - 1][type_suffix (i).vector_type]; -+} -+ -+/* Return the number of elements of type suffix I that fit within a -+ 128-bit block. */ -+inline unsigned int -+function_instance::elements_per_vq (unsigned int i) const -+{ -+ return 128 / type_suffix (i).element_bits; -+} -+ -+/* Return the vector or predicate mode associated with type suffix I. */ -+inline machine_mode -+function_instance::vector_mode (unsigned int i) const -+{ -+ return type_suffix (i).vector_mode; -+} -+ -+/* Return the mode of the governing predicate to use when operating on -+ type suffix I. */ -+inline machine_mode -+function_instance::gp_mode (unsigned int i) const -+{ -+ return aarch64_sve_pred_mode (type_suffix (i).element_bytes).require (); -+} -+ -+/* Return true if the function has no return value. */ -+inline bool -+function_call_info::function_returns_void_p () -+{ -+ return TREE_TYPE (TREE_TYPE (fndecl)) == void_type_node; -+} -+ -+/* Default implementation of function::call_properties, with conservatively -+ correct behavior for floating-point instructions. */ -+inline unsigned int -+function_base::call_properties (const function_instance &instance) const -+{ -+ unsigned int flags = 0; -+ if (instance.type_suffix (0).float_p || instance.type_suffix (1).float_p) -+ flags |= CP_READ_FPCR | CP_RAISE_FP_EXCEPTIONS; -+ return flags; -+} -+ -+} -+ -+#endif -diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md -index 02d33b727..11198e8a9 100644 ---- a/gcc/config/aarch64/aarch64-sve.md -+++ b/gcc/config/aarch64/aarch64-sve.md -@@ -18,8 +18,168 @@ - ;; along with GCC; see the file COPYING3. If not see - ;; . - --;; Note on the handling of big-endian SVE --;; -------------------------------------- -+;; The file is organised into the following sections (search for the full -+;; line): -+;; -+;; == General notes -+;; ---- Note on the handling of big-endian SVE -+;; ---- Description of UNSPEC_PTEST -+;; ---- Description of UNSPEC_PRED_Z -+;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X -+;; ---- Note on predicated FP arithmetic patterns and GP "strictness" -+;; ---- Note on FFR handling -+;; -+;; == Moves -+;; ---- Moves of single vectors -+;; ---- Moves of multiple vectors -+;; ---- Moves of predicates -+;; ---- Moves relating to the FFR -+;; -+;; == Loads -+;; ---- Normal contiguous loads -+;; ---- Extending contiguous loads -+;; ---- First-faulting contiguous loads -+;; ---- First-faulting extending contiguous loads -+;; ---- Non-temporal contiguous loads -+;; ---- Normal gather loads -+;; ---- Extending gather loads -+;; ---- First-faulting gather loads -+;; ---- First-faulting extending gather loads -+;; -+;; == Prefetches -+;; ---- Contiguous prefetches -+;; ---- Gather prefetches -+;; -+;; == Stores -+;; ---- Normal contiguous stores -+;; ---- Truncating contiguous stores -+;; ---- Non-temporal contiguous stores -+;; ---- Normal scatter stores -+;; ---- Truncating scatter stores -+;; -+;; == Vector creation -+;; ---- [INT,FP] Duplicate element -+;; ---- [INT,FP] Initialize from individual elements -+;; ---- [INT] Linear series -+;; ---- [PRED] Duplicate element -+;; -+;; == Vector decomposition -+;; ---- [INT,FP] Extract index -+;; ---- [INT,FP] Extract active element -+;; ---- [PRED] Extract index -+;; -+;; == Unary arithmetic -+;; ---- [INT] General unary arithmetic corresponding to rtx codes -+;; ---- [INT] General unary arithmetic corresponding to unspecs -+;; ---- [INT] Sign extension -+;; ---- [INT] Zero extension -+;; ---- [INT] Logical inverse -+;; ---- [FP<-INT] General unary arithmetic that maps to unspecs -+;; ---- [FP] General unary arithmetic corresponding to unspecs -+;; ---- [PRED] Inverse -+ -+;; == Binary arithmetic -+;; ---- [INT] General binary arithmetic corresponding to rtx codes -+;; ---- [INT] Addition -+;; ---- [INT] Subtraction -+;; ---- [INT] Take address -+;; ---- [INT] Absolute difference -+;; ---- [INT] Saturating addition and subtraction -+;; ---- [INT] Highpart multiplication -+;; ---- [INT] Division -+;; ---- [INT] Binary logical operations -+;; ---- [INT] Binary logical operations (inverted second input) -+;; ---- [INT] Shifts (rounding towards -Inf) -+;; ---- [INT] Shifts (rounding towards 0) -+;; ---- [FP<-INT] General binary arithmetic corresponding to unspecs -+;; ---- [FP] General binary arithmetic corresponding to rtx codes -+;; ---- [FP] General binary arithmetic corresponding to unspecs -+;; ---- [FP] Addition -+;; ---- [FP] Complex addition -+;; ---- [FP] Subtraction -+;; ---- [FP] Absolute difference -+;; ---- [FP] Multiplication -+;; ---- [FP] Binary logical operations -+;; ---- [FP] Sign copying -+;; ---- [FP] Maximum and minimum -+;; ---- [PRED] Binary logical operations -+;; ---- [PRED] Binary logical operations (inverted second input) -+;; ---- [PRED] Binary logical operations (inverted result) -+;; -+;; == Ternary arithmetic -+;; ---- [INT] MLA and MAD -+;; ---- [INT] MLS and MSB -+;; ---- [INT] Dot product -+;; ---- [INT] Sum of absolute differences -+;; ---- [INT] Matrix multiply-accumulate -+;; ---- [FP] General ternary arithmetic corresponding to unspecs -+;; ---- [FP] Complex multiply-add -+;; ---- [FP] Trigonometric multiply-add -+;; ---- [FP] Bfloat16 long ternary arithmetic (SF,BF,BF) -+;; ---- [FP] Matrix multiply-accumulate -+;; -+;; == Comparisons and selects -+;; ---- [INT,FP] Select based on predicates -+;; ---- [INT,FP] Compare and select -+;; ---- [INT] Comparisons -+;; ---- [INT] While tests -+;; ---- [FP] Direct comparisons -+;; ---- [FP] Absolute comparisons -+;; ---- [PRED] Select -+;; ---- [PRED] Test bits -+;; -+;; == Reductions -+;; ---- [INT,FP] Conditional reductions -+;; ---- [INT] Tree reductions -+;; ---- [FP] Tree reductions -+;; ---- [FP] Left-to-right reductions -+;; -+;; == Permutes -+;; ---- [INT,FP] General permutes -+;; ---- [INT,FP] Special-purpose unary permutes -+;; ---- [INT,FP] Special-purpose binary permutes -+;; ---- [PRED] Special-purpose unary permutes -+;; ---- [PRED] Special-purpose binary permutes -+;; -+;; == Conversions -+;; ---- [INT<-INT] Packs -+;; ---- [INT<-INT] Unpacks -+;; ---- [INT<-FP] Conversions -+;; ---- [INT<-FP] Packs -+;; ---- [INT<-FP] Unpacks -+;; ---- [FP<-INT] Conversions -+;; ---- [FP<-INT] Packs -+;; ---- [FP<-INT] Unpacks -+;; ---- [FP<-FP] Packs -+;; ---- [FP<-FP] Packs (bfloat16) -+;; ---- [FP<-FP] Unpacks -+;; ---- [PRED<-PRED] Packs -+;; ---- [PRED<-PRED] Unpacks -+;; -+;; == Vector partitioning -+;; ---- [PRED] Unary partitioning -+;; ---- [PRED] Binary partitioning -+;; ---- [PRED] Scalarization -+;; -+;; == Counting elements -+;; ---- [INT] Count elements in a pattern (scalar) -+;; ---- [INT] Increment by the number of elements in a pattern (scalar) -+;; ---- [INT] Increment by the number of elements in a pattern (vector) -+;; ---- [INT] Decrement by the number of elements in a pattern (scalar) -+;; ---- [INT] Decrement by the number of elements in a pattern (vector) -+;; ---- [INT] Count elements in a predicate (scalar) -+;; ---- [INT] Increment by the number of elements in a predicate (scalar) -+;; ---- [INT] Increment by the number of elements in a predicate (vector) -+;; ---- [INT] Decrement by the number of elements in a predicate (scalar) -+;; ---- [INT] Decrement by the number of elements in a predicate (vector) -+ -+;; ========================================================================= -+;; == General notes -+;; ========================================================================= -+;; -+;; ------------------------------------------------------------------------- -+;; ---- Note on the handling of big-endian SVE -+;; ------------------------------------------------------------------------- - ;; - ;; On big-endian systems, Advanced SIMD mov patterns act in the - ;; same way as movdi or movti would: the first byte of memory goes -@@ -59,12 +219,339 @@ - ;; the order of the bytes within the elements is different. We instead - ;; access spill slots via LD1 and ST1, using secondary reloads to - ;; reserve a predicate register. -+;; -+;; ------------------------------------------------------------------------- -+;; ---- Description of UNSPEC_PTEST -+;; ------------------------------------------------------------------------- -+;; -+;; SVE provides a PTEST instruction for testing the active lanes of a -+;; predicate and setting the flags based on the result. The associated -+;; condition code tests are: -+;; -+;; - any (= ne): at least one active bit is set -+;; - none (= eq): all active bits are clear (*) -+;; - first (= mi): the first active bit is set -+;; - nfrst (= pl): the first active bit is clear (*) -+;; - last (= cc): the last active bit is set -+;; - nlast (= cs): the last active bit is clear (*) -+;; -+;; where the conditions marked (*) are also true when there are no active -+;; lanes (i.e. when the governing predicate is a PFALSE). The flags results -+;; of a PTEST use the condition code mode CC_NZC. -+;; -+;; PTEST is always a .B operation (i.e. it always operates on VNx16BI). -+;; This means that for other predicate modes, we need a governing predicate -+;; in which all bits are defined. -+;; -+;; For example, most predicated .H operations ignore the odd bits of the -+;; governing predicate, so that an active lane is represented by the -+;; bits "1x" and an inactive lane by the bits "0x", where "x" can be -+;; any value. To test a .H predicate, we instead need "10" and "00" -+;; respectively, so that the condition only tests the even bits of the -+;; predicate. -+;; -+;; Several instructions set the flags as a side-effect, in the same way -+;; that a separate PTEST would. It's important for code quality that we -+;; use these flags results as often as possible, particularly in the case -+;; of WHILE* and RDFFR. -+;; -+;; Also, some of the instructions that set the flags are unpredicated -+;; and instead implicitly test all .B, .H, .S or .D elements, as though -+;; they were predicated on a PTRUE of that size. For example, a .S -+;; WHILELO sets the flags in the same way as a PTEST with a .S PTRUE -+;; would. -+;; -+;; We therefore need to represent PTEST operations in a way that -+;; makes it easy to combine them with both predicated and unpredicated -+;; operations, while using a VNx16BI governing predicate for all -+;; predicate modes. We do this using: -+;; -+;; (unspec:CC_NZC [gp cast_gp ptrue_flag op] UNSPEC_PTEST) -+;; -+;; where: -+;; -+;; - GP is the real VNx16BI governing predicate -+;; -+;; - CAST_GP is GP cast to the mode of OP. All bits dropped by casting -+;; GP to CAST_GP are guaranteed to be clear in GP. -+;; -+;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value -+;; SVE_KNOWN_PTRUE if we know that CAST_GP (rather than GP) is all-true and -+;; SVE_MAYBE_NOT_PTRUE otherwise. -+;; -+;; - OP is the predicate we want to test, of the same mode as CAST_GP. -+;; -+;; ------------------------------------------------------------------------- -+;; ---- Description of UNSPEC_PRED_Z -+;; ------------------------------------------------------------------------- -+;; -+;; SVE integer comparisons are predicated and return zero for inactive -+;; lanes. Sometimes we use them with predicates that are all-true and -+;; sometimes we use them with general predicates. -+;; -+;; The integer comparisons also set the flags and so build-in the effect -+;; of a PTEST. We therefore want to be able to combine integer comparison -+;; patterns with PTESTs of the result. One difficulty with doing this is -+;; that (as noted above) the PTEST is always a .B operation and so can place -+;; stronger requirements on the governing predicate than the comparison does. -+;; -+;; For example, when applying a separate PTEST to the result of a full-vector -+;; .H comparison, the PTEST must be predicated on a .H PTRUE instead of a -+;; .B PTRUE. In constrast, the comparison might be predicated on either -+;; a .H PTRUE or a .B PTRUE, since the values of odd-indexed predicate -+;; bits don't matter for .H operations. -+;; -+;; We therefore can't rely on a full-vector comparison using the same -+;; predicate register as a following PTEST. We instead need to remember -+;; whether a comparison is known to be a full-vector comparison and use -+;; this information in addition to a check for equal predicate registers. -+;; At the same time, it's useful to have a common representation for all -+;; integer comparisons, so that they can be handled by a single set of -+;; patterns. -+;; -+;; We therefore take a similar approach to UNSPEC_PTEST above and use: -+;; -+;; (unspec: [gp ptrue_flag (code:M op0 op1)] UNSPEC_PRED_Z) -+;; -+;; where: -+;; -+;; - GP is the governing predicate, of mode -+;; -+;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value -+;; SVE_KNOWN_PTRUE if we know that GP is all-true and SVE_MAYBE_NOT_PTRUE -+;; otherwise -+;; -+;; - CODE is the comparison code -+;; -+;; - OP0 and OP1 are the values being compared, of mode M -+;; -+;; The "Z" in UNSPEC_PRED_Z indicates that inactive lanes are zero. -+;; -+;; ------------------------------------------------------------------------- -+;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X -+;; ------------------------------------------------------------------------- -+;; -+;; Many SVE integer operations are predicated. We can generate them -+;; from four sources: -+;; -+;; (1) Using normal unpredicated optabs. In this case we need to create -+;; an all-true predicate register to act as the governing predicate -+;; for the SVE instruction. There are no inactive lanes, and thus -+;; the values of inactive lanes don't matter. -+;; -+;; (2) Using _x ACLE functions. In this case the function provides a -+;; specific predicate and some lanes might be inactive. However, -+;; as for (1), the values of the inactive lanes don't matter. -+;; We can make extra lanes active without changing the behavior -+;; (although for code-quality reasons we should avoid doing so -+;; needlessly). -+;; -+;; (3) Using cond_* optabs that correspond to IFN_COND_* internal functions. -+;; These optabs have a predicate operand that specifies which lanes are -+;; active and another operand that provides the values of inactive lanes. -+;; -+;; (4) Using _m and _z ACLE functions. These functions map to the same -+;; patterns as (3), with the _z functions setting inactive lanes to zero -+;; and the _m functions setting the inactive lanes to one of the function -+;; arguments. -+;; -+;; For (1) and (2) we need a way of attaching the predicate to a normal -+;; unpredicated integer operation. We do this using: -+;; -+;; (unspec:M [pred (code:M (op0 op1 ...))] UNSPEC_PRED_X) -+;; -+;; where (code:M (op0 op1 ...)) is the normal integer operation and PRED -+;; is a predicate of mode . PRED might or might not be a PTRUE; -+;; it always is for (1), but might not be for (2). -+;; -+;; The unspec as a whole has the same value as (code:M ...) when PRED is -+;; all-true. It is always semantically valid to replace PRED with a PTRUE, -+;; but as noted above, we should only do so if there's a specific benefit. -+;; -+;; (The "_X" in the unspec is named after the ACLE functions in (2).) -+;; -+;; For (3) and (4) we can simply use the SVE port's normal representation -+;; of a predicate-based select: -+;; -+;; (unspec:M [pred (code:M (op0 op1 ...)) inactive] UNSPEC_SEL) -+;; -+;; where INACTIVE specifies the values of inactive lanes. -+;; -+;; We can also use the UNSPEC_PRED_X wrapper in the UNSPEC_SEL rather -+;; than inserting the integer operation directly. This is mostly useful -+;; if we want the combine pass to merge an integer operation with an explicit -+;; vcond_mask (in other words, with a following SEL instruction). However, -+;; it's generally better to merge such operations at the gimple level -+;; using (3). -+;; -+;; ------------------------------------------------------------------------- -+;; ---- Note on predicated FP arithmetic patterns and GP "strictness" -+;; ------------------------------------------------------------------------- -+;; -+;; Most SVE floating-point operations are predicated. We can generate -+;; them from four sources: -+;; -+;; (1) Using normal unpredicated optabs. In this case we need to create -+;; an all-true predicate register to act as the governing predicate -+;; for the SVE instruction. There are no inactive lanes, and thus -+;; the values of inactive lanes don't matter. -+;; -+;; (2) Using _x ACLE functions. In this case the function provides a -+;; specific predicate and some lanes might be inactive. However, -+;; as for (1), the values of the inactive lanes don't matter. -+;; -+;; The instruction must have the same exception behavior as the -+;; function call unless things like command-line flags specifically -+;; allow otherwise. For example, with -ffast-math, it is OK to -+;; raise exceptions for inactive lanes, but normally it isn't. -+;; -+;; (3) Using cond_* optabs that correspond to IFN_COND_* internal functions. -+;; These optabs have a predicate operand that specifies which lanes are -+;; active and another operand that provides the values of inactive lanes. -+;; -+;; (4) Using _m and _z ACLE functions. These functions map to the same -+;; patterns as (3), with the _z functions setting inactive lanes to zero -+;; and the _m functions setting the inactive lanes to one of the function -+;; arguments. -+;; -+;; So: -+;; -+;; - In (1), the predicate is known to be all true and the pattern can use -+;; unpredicated operations where available. -+;; -+;; - In (2), the predicate might or might not be all true. The pattern can -+;; use unpredicated instructions if the predicate is all-true or if things -+;; like command-line flags allow exceptions for inactive lanes. -+;; -+;; - (3) and (4) represent a native SVE predicated operation. Some lanes -+;; might be inactive and inactive lanes of the result must have specific -+;; values. There is no scope for using unpredicated instructions (and no -+;; reason to want to), so the question about command-line flags doesn't -+;; arise. -+;; -+;; It would be inaccurate to model (2) as an rtx code like (sqrt ...) -+;; in combination with a separate predicate operand, e.g. -+;; -+;; (unspec [(match_operand: 1 "register_operand" "Upl") -+;; (sqrt:SVE_FULL_F 2 "register_operand" "w")] -+;; ....) -+;; -+;; because (sqrt ...) can raise an exception for any lane, including -+;; inactive ones. We therefore need to use an unspec instead. -+;; -+;; Also, (2) requires some way of distinguishing the case in which the -+;; predicate might have inactive lanes and cannot be changed from the -+;; case in which the predicate has no inactive lanes or can be changed. -+;; This information is also useful when matching combined FP patterns -+;; in which the predicates might not be equal. -+;; -+;; We therefore model FP operations as an unspec of the form: -+;; -+;; (unspec [pred strictness op0 op1 ...] UNSPEC_COND_) -+;; -+;; where: -+;; -+;; - PRED is the governing predicate. -+;; -+;; - STRICTNESS is a CONST_INT that conceptually has mode SI. It has the -+;; value SVE_STRICT_GP if PRED might have inactive lanes and if those -+;; lanes must remain inactive. It has the value SVE_RELAXED_GP otherwise. -+;; -+;; - OP0 OP1 ... are the normal input operands to the operation. -+;; -+;; - MNEMONIC is the mnemonic of the associated SVE instruction. -+;; -+;; ------------------------------------------------------------------------- -+;; ---- Note on FFR handling -+;; ------------------------------------------------------------------------- -+;; -+;; Logically we want to divide FFR-related instructions into regions -+;; that contain exactly one of: -+;; -+;; - a single write to the FFR -+;; - any number of reads from the FFR (but only one read is likely) -+;; - any number of LDFF1 and LDNF1 instructions -+;; -+;; However, LDFF1 and LDNF1 instructions should otherwise behave like -+;; normal loads as far as possible. This means that they should be -+;; schedulable within a region in the same way that LD1 would be, -+;; and they should be deleted as dead if the result is unused. The loads -+;; should therefore not write to the FFR, since that would both serialize -+;; the loads with respect to each other and keep the loads live for any -+;; later RDFFR. -+;; -+;; We get around this by using a fake "FFR token" (FFRT) to help describe -+;; the dependencies. Writing to the FFRT starts a new "FFRT region", -+;; while using the FFRT keeps the instruction within its region. -+;; Specifically: -+;; -+;; - Writes start a new FFRT region as well as setting the FFR: -+;; -+;; W1: parallel (FFRT = , FFR = ) -+;; -+;; - Loads use an LD1-like instruction that also uses the FFRT, so that the -+;; loads stay within the same FFRT region: -+;; -+;; L1: load data while using the FFRT -+;; -+;; In addition, any FFRT region that includes a load also has at least one -+;; instance of: -+;; -+;; L2: FFR = update(FFR, FFRT) [type == no_insn] -+;; -+;; to make it clear that the region both reads from and writes to the FFR. -+;; -+;; - Reads do the following: -+;; -+;; R1: FFRT = FFR [type == no_insn] -+;; R2: read from the FFRT -+;; R3: FFRT = update(FFRT) [type == no_insn] -+;; -+;; R1 and R3 both create new FFRT regions, so that previous LDFF1s and -+;; LDNF1s cannot move forwards across R1 and later LDFF1s and LDNF1s -+;; cannot move backwards across R3. -+;; -+;; This way, writes are only kept alive by later loads or reads, -+;; and write/read pairs fold normally. For two consecutive reads, -+;; the first R3 is made dead by the second R1, which in turn becomes -+;; redundant with the first R1. We then have: -+;; -+;; first R1: FFRT = FFR -+;; first read from the FFRT -+;; second read from the FFRT -+;; second R3: FFRT = update(FFRT) -+;; -+;; i.e. the two FFRT regions collapse into a single one with two -+;; independent reads. -+;; -+;; The model still prevents some valid optimizations though. For example, -+;; if all loads in an FFRT region are deleted as dead, nothing would remove -+;; the L2 instructions. -+ -+;; ========================================================================= -+;; == Moves -+;; ========================================================================= -+ -+;; ------------------------------------------------------------------------- -+;; ---- Moves of single vectors -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - MOV (including aliases) -+;; - LD1B (contiguous form) -+;; - LD1D ( " " ) -+;; - LD1H ( " " ) -+;; - LD1W ( " " ) -+;; - LDR -+;; - ST1B (contiguous form) -+;; - ST1D ( " " ) -+;; - ST1H ( " " ) -+;; - ST1W ( " " ) -+;; - STR -+;; ------------------------------------------------------------------------- - -- --;; SVE data moves. - (define_expand "mov" -- [(set (match_operand:SVE_ALL 0 "nonimmediate_operand") -- (match_operand:SVE_ALL 1 "general_operand"))] -+ [(set (match_operand:SVE_FULL 0 "nonimmediate_operand") -+ (match_operand:SVE_FULL 1 "general_operand"))] - "TARGET_SVE" - { - /* Use the predicated load and store patterns where possible. -@@ -72,7 +559,7 @@ - head of the file) and increases the addressing choices for - little-endian. */ - if ((MEM_P (operands[0]) || MEM_P (operands[1])) -- && can_create_pseudo_p ()) -+ && can_create_pseudo_p ()) - { - aarch64_expand_sve_mem_move (operands[0], operands[1], mode); - DONE; -@@ -80,47 +567,37 @@ - - if (CONSTANT_P (operands[1])) - { -- aarch64_expand_mov_immediate (operands[0], operands[1], -- gen_vec_duplicate); -+ aarch64_expand_mov_immediate (operands[0], operands[1]); - DONE; - } - - /* Optimize subregs on big-endian targets: we can use REV[BHW] - instead of going through memory. */ - if (BYTES_BIG_ENDIAN -- && aarch64_maybe_expand_sve_subreg_move (operands[0], operands[1])) -+ && aarch64_maybe_expand_sve_subreg_move (operands[0], operands[1])) - DONE; - } - ) - --;; A pattern for optimizing SUBREGs that have a reinterpreting effect --;; on big-endian targets; see aarch64_maybe_expand_sve_subreg_move --;; for details. We use a special predicate for operand 2 to reduce --;; the number of patterns. --(define_insn_and_split "*aarch64_sve_mov_subreg_be" -- [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w") -- (unspec:SVE_ALL -- [(match_operand:VNx16BI 1 "register_operand" "Upl") -- (match_operand 2 "aarch64_any_register_operand" "w")] -- UNSPEC_REV_SUBREG))] -- "TARGET_SVE && BYTES_BIG_ENDIAN" -- "#" -- "&& reload_completed" -- [(const_int 0)] -+(define_expand "movmisalign" -+ [(set (match_operand:SVE_FULL 0 "nonimmediate_operand") -+ (match_operand:SVE_FULL 1 "general_operand"))] -+ "TARGET_SVE" - { -- aarch64_split_sve_subreg_move (operands[0], operands[1], operands[2]); -+ /* Equivalent to a normal move for our purpooses. */ -+ emit_move_insn (operands[0], operands[1]); - DONE; - } - ) - --;; Unpredicated moves (little-endian). Only allow memory operations --;; during and after RA; before RA we want the predicated load and --;; store patterns to be used instead. -+;; Unpredicated moves (bytes or little-endian). Only allow memory operations -+;; during and after RA; before RA we want the predicated load and store -+;; patterns to be used instead. - (define_insn "*aarch64_sve_mov_le" -- [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w") -- (match_operand:SVE_ALL 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))] -+ [(set (match_operand:SVE_FULL 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w") -+ (match_operand:SVE_FULL 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))] - "TARGET_SVE -- && !BYTES_BIG_ENDIAN -+ && (mode == VNx16QImode || !BYTES_BIG_ENDIAN) - && ((lra_in_progress || reload_completed) - || (register_operand (operands[0], mode) - && nonmemory_operand (operands[1], mode)))" -@@ -131,12 +608,12 @@ - * return aarch64_output_sve_mov_immediate (operands[1]);" - ) - --;; Unpredicated moves (big-endian). Memory accesses require secondary -+;; Unpredicated moves (non-byte big-endian). Memory accesses require secondary - ;; reloads. - (define_insn "*aarch64_sve_mov_be" -- [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w") -- (match_operand:SVE_ALL 1 "aarch64_nonmemory_operand" "w, Dn"))] -- "TARGET_SVE && BYTES_BIG_ENDIAN" -+ [(set (match_operand:SVE_FULL 0 "register_operand" "=w, w") -+ (match_operand:SVE_FULL 1 "aarch64_nonmemory_operand" "w, Dn"))] -+ "TARGET_SVE && BYTES_BIG_ENDIAN && mode != VNx16QImode" - "@ - mov\t%0.d, %1.d - * return aarch64_output_sve_mov_immediate (operands[1]);" -@@ -144,10 +621,11 @@ - - ;; Handle big-endian memory reloads. We use byte PTRUE for all modes - ;; to try to encourage reuse. -+;; This pattern needs constraints due to TARGET_SECONDARY_RELOAD hook. - (define_expand "aarch64_sve_reload_be" - [(parallel - [(set (match_operand 0) -- (match_operand 1)) -+ (match_operand 1)) - (clobber (match_operand:VNx16BI 2 "register_operand" "=Upl"))])] - "TARGET_SVE && BYTES_BIG_ENDIAN" - { -@@ -166,16 +644,15 @@ - } - ) - --;; A predicated load or store for which the predicate is known to be --;; all-true. Note that this pattern is generated directly by --;; aarch64_emit_sve_pred_move, so changes to this pattern will --;; need changes there as well. -+;; A predicated move in which the predicate is known to be all-true. -+;; Note that this pattern is generated directly by aarch64_emit_sve_pred_move, -+;; so changes to this pattern will need changes there as well. - (define_insn_and_split "@aarch64_pred_mov" -- [(set (match_operand:SVE_ALL 0 "nonimmediate_operand" "=w, w, m") -- (unspec:SVE_ALL -+ [(set (match_operand:SVE_FULL 0 "nonimmediate_operand" "=w, w, m") -+ (unspec:SVE_FULL - [(match_operand: 1 "register_operand" "Upl, Upl, Upl") -- (match_operand:SVE_ALL 2 "nonimmediate_operand" "w, m, w")] -- UNSPEC_MERGE_PTRUE))] -+ (match_operand:SVE_FULL 2 "nonimmediate_operand" "w, m, w")] -+ UNSPEC_PRED_X))] - "TARGET_SVE - && (register_operand (operands[0], mode) - || register_operand (operands[2], mode))" -@@ -188,152 +665,67 @@ - [(set (match_dup 0) (match_dup 2))] - ) - --(define_expand "movmisalign" -- [(set (match_operand:SVE_ALL 0 "nonimmediate_operand") -- (match_operand:SVE_ALL 1 "general_operand"))] -- "TARGET_SVE" -+;; A pattern for optimizing SUBREGs that have a reinterpreting effect -+;; on big-endian targets; see aarch64_maybe_expand_sve_subreg_move -+;; for details. We use a special predicate for operand 2 to reduce -+;; the number of patterns. -+(define_insn_and_split "*aarch64_sve_mov_subreg_be" -+ [(set (match_operand:SVE_FULL 0 "aarch64_sve_nonimmediate_operand" "=w") -+ (unspec:SVE_FULL -+ [(match_operand:VNx16BI 1 "register_operand" "Upl") -+ (match_operand 2 "aarch64_any_register_operand" "w")] -+ UNSPEC_REV_SUBREG))] -+ "TARGET_SVE && BYTES_BIG_ENDIAN" -+ "#" -+ "&& reload_completed" -+ [(const_int 0)] - { -- /* Equivalent to a normal move for our purpooses. */ -- emit_move_insn (operands[0], operands[1]); -+ aarch64_split_sve_subreg_move (operands[0], operands[1], operands[2]); - DONE; - } - ) - --(define_insn "maskload" -- [(set (match_operand:SVE_ALL 0 "register_operand" "=w") -- (unspec:SVE_ALL -- [(match_operand: 2 "register_operand" "Upl") -- (match_operand:SVE_ALL 1 "memory_operand" "m")] -- UNSPEC_LD1_SVE))] -- "TARGET_SVE" -- "ld1\t%0., %2/z, %1" --) -- --(define_insn "maskstore" -- [(set (match_operand:SVE_ALL 0 "memory_operand" "+m") -- (unspec:SVE_ALL [(match_operand: 2 "register_operand" "Upl") -- (match_operand:SVE_ALL 1 "register_operand" "w") -- (match_dup 0)] -- UNSPEC_ST1_SVE))] -- "TARGET_SVE" -- "st1\t%1., %2, %0" --) -- --;; Unpredicated gather loads. --(define_expand "gather_load" -- [(set (match_operand:SVE_SD 0 "register_operand") -- (unspec:SVE_SD -- [(match_dup 5) -- (match_operand:DI 1 "aarch64_reg_or_zero") -- (match_operand: 2 "register_operand") -- (match_operand:DI 3 "const_int_operand") -- (match_operand:DI 4 "aarch64_gather_scale_operand_") -- (mem:BLK (scratch))] -- UNSPEC_LD1_GATHER))] -+;; Reinterpret operand 1 in operand 0's mode, without changing its contents. -+;; This is equivalent to a subreg on little-endian targets but not for -+;; big-endian; see the comment at the head of the file for details. -+(define_expand "@aarch64_sve_reinterpret" -+ [(set (match_operand:SVE_FULL 0 "register_operand") -+ (unspec:SVE_FULL -+ [(match_operand 1 "aarch64_any_register_operand")] -+ UNSPEC_REINTERPRET))] - "TARGET_SVE" - { -- operands[5] = force_reg (mode, CONSTM1_RTX (mode)); -+ if (!BYTES_BIG_ENDIAN) -+ { -+ emit_move_insn (operands[0], gen_lowpart (mode, operands[1])); -+ DONE; -+ } - } - ) - --;; Predicated gather loads for 32-bit elements. Operand 3 is true for --;; unsigned extension and false for signed extension. --(define_insn "mask_gather_load" -- [(set (match_operand:SVE_S 0 "register_operand" "=w, w, w, w, w") -- (unspec:SVE_S -- [(match_operand: 5 "register_operand" "Upl, Upl, Upl, Upl, Upl") -- (match_operand:DI 1 "aarch64_reg_or_zero" "Z, rk, rk, rk, rk") -- (match_operand: 2 "register_operand" "w, w, w, w, w") -- (match_operand:DI 3 "const_int_operand" "i, Z, Ui1, Z, Ui1") -- (match_operand:DI 4 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, i, i") -- (mem:BLK (scratch))] -- UNSPEC_LD1_GATHER))] -- "TARGET_SVE" -- "@ -- ld1w\t%0.s, %5/z, [%2.s] -- ld1w\t%0.s, %5/z, [%1, %2.s, sxtw] -- ld1w\t%0.s, %5/z, [%1, %2.s, uxtw] -- ld1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4] -- ld1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4]" --) -- --;; Predicated gather loads for 64-bit elements. The value of operand 3 --;; doesn't matter in this case. --(define_insn "mask_gather_load" -- [(set (match_operand:SVE_D 0 "register_operand" "=w, w, w") -- (unspec:SVE_D -- [(match_operand: 5 "register_operand" "Upl, Upl, Upl") -- (match_operand:DI 1 "aarch64_reg_or_zero" "Z, rk, rk") -- (match_operand: 2 "register_operand" "w, w, w") -- (match_operand:DI 3 "const_int_operand") -- (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, Ui1, i") -- (mem:BLK (scratch))] -- UNSPEC_LD1_GATHER))] -- "TARGET_SVE" -- "@ -- ld1d\t%0.d, %5/z, [%2.d] -- ld1d\t%0.d, %5/z, [%1, %2.d] -- ld1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]" --) -- --;; Unpredicated scatter store. --(define_expand "scatter_store" -- [(set (mem:BLK (scratch)) -- (unspec:BLK -- [(match_dup 5) -- (match_operand:DI 0 "aarch64_reg_or_zero") -- (match_operand: 1 "register_operand") -- (match_operand:DI 2 "const_int_operand") -- (match_operand:DI 3 "aarch64_gather_scale_operand_") -- (match_operand:SVE_SD 4 "register_operand")] -- UNSPEC_ST1_SCATTER))] -+;; A pattern for handling type punning on big-endian targets. We use a -+;; special predicate for operand 1 to reduce the number of patterns. -+(define_insn_and_split "*aarch64_sve_reinterpret" -+ [(set (match_operand:SVE_FULL 0 "register_operand" "=w") -+ (unspec:SVE_FULL -+ [(match_operand 1 "aarch64_any_register_operand" "w")] -+ UNSPEC_REINTERPRET))] - "TARGET_SVE" -+ "#" -+ "&& reload_completed" -+ [(set (match_dup 0) (match_dup 1))] - { -- operands[5] = force_reg (mode, CONSTM1_RTX (mode)); -+ operands[1] = aarch64_replace_reg_mode (operands[1], mode); - } - ) - --;; Predicated scatter stores for 32-bit elements. Operand 2 is true for --;; unsigned extension and false for signed extension. --(define_insn "mask_scatter_store" -- [(set (mem:BLK (scratch)) -- (unspec:BLK -- [(match_operand: 5 "register_operand" "Upl, Upl, Upl, Upl, Upl") -- (match_operand:DI 0 "aarch64_reg_or_zero" "Z, rk, rk, rk, rk") -- (match_operand: 1 "register_operand" "w, w, w, w, w") -- (match_operand:DI 2 "const_int_operand" "i, Z, Ui1, Z, Ui1") -- (match_operand:DI 3 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, i, i") -- (match_operand:SVE_S 4 "register_operand" "w, w, w, w, w")] -- UNSPEC_ST1_SCATTER))] -- "TARGET_SVE" -- "@ -- st1w\t%4.s, %5, [%1.s] -- st1w\t%4.s, %5, [%0, %1.s, sxtw] -- st1w\t%4.s, %5, [%0, %1.s, uxtw] -- st1w\t%4.s, %5, [%0, %1.s, sxtw %p3] -- st1w\t%4.s, %5, [%0, %1.s, uxtw %p3]" --) -- --;; Predicated scatter stores for 64-bit elements. The value of operand 2 --;; doesn't matter in this case. --(define_insn "mask_scatter_store" -- [(set (mem:BLK (scratch)) -- (unspec:BLK -- [(match_operand: 5 "register_operand" "Upl, Upl, Upl") -- (match_operand:DI 0 "aarch64_reg_or_zero" "Z, rk, rk") -- (match_operand: 1 "register_operand" "w, w, w") -- (match_operand:DI 2 "const_int_operand") -- (match_operand:DI 3 "aarch64_gather_scale_operand_d" "Ui1, Ui1, i") -- (match_operand:SVE_D 4 "register_operand" "w, w, w")] -- UNSPEC_ST1_SCATTER))] -- "TARGET_SVE" -- "@ -- st1d\t%4.d, %5, [%1.d] -- st1d\t%4.d, %5, [%0, %1.d] -- st1d\t%4.d, %5, [%0, %1.d, lsl %p3]" --) -+;; ------------------------------------------------------------------------- -+;; ---- Moves of multiple vectors -+;; ------------------------------------------------------------------------- -+;; All patterns in this section are synthetic and split to real -+;; instructions after reload. -+;; ------------------------------------------------------------------------- - --;; SVE structure moves. - (define_expand "mov" - [(set (match_operand:SVE_STRUCT 0 "nonimmediate_operand") - (match_operand:SVE_STRUCT 1 "general_operand"))] -@@ -368,7 +760,7 @@ - - ;; Unpredicated structure moves (big-endian). Memory accesses require - ;; secondary reloads. --(define_insn "*aarch64_sve_mov_le" -+(define_insn "*aarch64_sve_mov_be" - [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w, w") - (match_operand:SVE_STRUCT 1 "aarch64_nonmemory_operand" "w, Dn"))] - "TARGET_SVE && BYTES_BIG_ENDIAN" -@@ -409,7 +801,7 @@ - (unspec:SVE_STRUCT - [(match_operand: 1 "register_operand" "Upl, Upl, Upl") - (match_operand:SVE_STRUCT 2 "aarch64_sve_struct_nonimmediate_operand" "w, Utx, w")] -- UNSPEC_MERGE_PTRUE))] -+ UNSPEC_PRED_X))] - "TARGET_SVE - && (register_operand (operands[0], mode) - || register_operand (operands[2], mode))" -@@ -432,6 +824,18 @@ - [(set_attr "length" "")] - ) - -+;; ------------------------------------------------------------------------- -+;; ---- Moves of predicates -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - MOV -+;; - LDR -+;; - PFALSE -+;; - PTRUE -+;; - PTRUES -+;; - STR -+;; ------------------------------------------------------------------------- -+ - (define_expand "mov" - [(set (match_operand:PRED_ALL 0 "nonimmediate_operand") - (match_operand:PRED_ALL 1 "general_operand"))] -@@ -439,12 +843,18 @@ - { - if (GET_CODE (operands[0]) == MEM) - operands[1] = force_reg (mode, operands[1]); -+ -+ if (CONSTANT_P (operands[1])) -+ { -+ aarch64_expand_mov_immediate (operands[0], operands[1]); -+ DONE; -+ } - } - ) - - (define_insn "*aarch64_sve_mov" -- [(set (match_operand:PRED_ALL 0 "nonimmediate_operand" "=Upa, m, Upa, Upa, Upa") -- (match_operand:PRED_ALL 1 "general_operand" "Upa, Upa, m, Dz, Dm"))] -+ [(set (match_operand:PRED_ALL 0 "nonimmediate_operand" "=Upa, m, Upa, Upa") -+ (match_operand:PRED_ALL 1 "aarch64_mov_operand" "Upa, Upa, m, Dn"))] - "TARGET_SVE - && (register_operand (operands[0], mode) - || register_operand (operands[1], mode))" -@@ -452,287 +862,296 @@ - mov\t%0.b, %1.b - str\t%1, %0 - ldr\t%0, %1 -- pfalse\t%0.b -- * return aarch64_output_ptrue (mode, '');" -+ * return aarch64_output_sve_mov_immediate (operands[1]);" - ) - --;; Handle extractions from a predicate by converting to an integer vector --;; and extracting from there. --(define_expand "vec_extract" -- [(match_operand: 0 "register_operand") -- (match_operand: 1 "register_operand") -- (match_operand:SI 2 "nonmemory_operand") -- ;; Dummy operand to which we can attach the iterator. -- (reg:SVE_I V0_REGNUM)] -+;; Match PTRUES Pn.B when both the predicate and flags are useful. -+(define_insn_and_rewrite "*aarch64_sve_ptruevnx16bi_cc" -+ [(set (reg:CC_NZC CC_REGNUM) -+ (unspec:CC_NZC -+ [(match_operand 2) -+ (match_operand 3) -+ (const_int SVE_KNOWN_PTRUE) -+ (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate" -+ [(unspec:VNx16BI -+ [(match_operand:SI 4 "const_int_operand") -+ (match_operand:VNx16BI 5 "aarch64_simd_imm_zero")] -+ UNSPEC_PTRUE)])] -+ UNSPEC_PTEST)) -+ (set (match_operand:VNx16BI 0 "register_operand" "=Upa") -+ (match_dup 1))] - "TARGET_SVE" - { -- rtx tmp = gen_reg_rtx (mode); -- emit_insn (gen_aarch64_sve_dup_const (tmp, operands[1], -- CONST1_RTX (mode), -- CONST0_RTX (mode))); -- emit_insn (gen_vec_extract (operands[0], tmp, operands[2])); -- DONE; -+ return aarch64_output_sve_ptrues (operands[1]); -+ } -+ "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))" -+ { -+ operands[2] = operands[3] = CONSTM1_RTX (VNx16BImode); - } - ) - --(define_expand "vec_extract" -- [(set (match_operand: 0 "register_operand") -- (vec_select: -- (match_operand:SVE_ALL 1 "register_operand") -- (parallel [(match_operand:SI 2 "nonmemory_operand")])))] -+;; Match PTRUES Pn.[HSD] when both the predicate and flags are useful. -+(define_insn_and_rewrite "*aarch64_sve_ptrue_cc" -+ [(set (reg:CC_NZC CC_REGNUM) -+ (unspec:CC_NZC -+ [(match_operand 2) -+ (match_operand 3) -+ (const_int SVE_KNOWN_PTRUE) -+ (subreg:PRED_HSD -+ (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate" -+ [(unspec:VNx16BI -+ [(match_operand:SI 4 "const_int_operand") -+ (match_operand:PRED_HSD 5 "aarch64_simd_imm_zero")] -+ UNSPEC_PTRUE)]) 0)] -+ UNSPEC_PTEST)) -+ (set (match_operand:VNx16BI 0 "register_operand" "=Upa") -+ (match_dup 1))] - "TARGET_SVE" - { -- poly_int64 val; -- if (poly_int_rtx_p (operands[2], &val) -- && known_eq (val, GET_MODE_NUNITS (mode) - 1)) -- { -- /* The last element can be extracted with a LASTB and a false -- predicate. */ -- rtx sel = force_reg (mode, CONST0_RTX (mode)); -- emit_insn (gen_extract_last_ (operands[0], sel, operands[1])); -- DONE; -- } -- if (!CONST_INT_P (operands[2])) -- { -- /* Create an index with operand[2] as the base and -1 as the step. -- It will then be zero for the element we care about. */ -- rtx index = gen_lowpart (mode, operands[2]); -- index = force_reg (mode, index); -- rtx series = gen_reg_rtx (mode); -- emit_insn (gen_vec_series (series, index, constm1_rtx)); -- -- /* Get a predicate that is true for only that element. */ -- rtx zero = CONST0_RTX (mode); -- rtx cmp = gen_rtx_EQ (mode, series, zero); -- rtx sel = gen_reg_rtx (mode); -- emit_insn (gen_vec_cmp (sel, cmp, series, zero)); -- -- /* Select the element using LASTB. */ -- emit_insn (gen_extract_last_ (operands[0], sel, operands[1])); -- DONE; -- } -- } --) -- --;; Extract element zero. This is a special case because we want to force --;; the registers to be the same for the second alternative, and then --;; split the instruction into nothing after RA. --(define_insn_and_split "*vec_extract_0" -- [(set (match_operand: 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv") -- (vec_select: -- (match_operand:SVE_ALL 1 "register_operand" "w, 0, w") -- (parallel [(const_int 0)])))] -- "TARGET_SVE" -- { -- operands[1] = gen_rtx_REG (mode, REGNO (operands[1])); -- switch (which_alternative) -- { -- case 0: -- return "umov\\t%0, %1.[0]"; -- case 1: -- return "#"; -- case 2: -- return "st1\\t{%1.}[0], %0"; -- default: -- gcc_unreachable (); -- } -+ return aarch64_output_sve_ptrues (operands[1]); - } -- "&& reload_completed -- && REG_P (operands[0]) -- && REGNO (operands[0]) == REGNO (operands[1])" -- [(const_int 0)] -+ "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))" - { -- emit_note (NOTE_INSN_DELETED); -- DONE; -+ operands[2] = CONSTM1_RTX (VNx16BImode); -+ operands[3] = CONSTM1_RTX (mode); - } -- [(set_attr "type" "neon_to_gp_q, untyped, neon_store1_one_lane_q")] - ) - --;; Extract an element from the Advanced SIMD portion of the register. --;; We don't just reuse the aarch64-simd.md pattern because we don't --;; want any change in lane number on big-endian targets. --(define_insn "*vec_extract_v128" -- [(set (match_operand: 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv") -- (vec_select: -- (match_operand:SVE_ALL 1 "register_operand" "w, w, w") -- (parallel [(match_operand:SI 2 "const_int_operand")])))] -- "TARGET_SVE -- && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (mode), 1, 15)" -+;; Match PTRUES Pn.B when only the flags result is useful (which is -+;; a way of testing VL). -+(define_insn_and_rewrite "*aarch64_sve_ptruevnx16bi_ptest" -+ [(set (reg:CC_NZC CC_REGNUM) -+ (unspec:CC_NZC -+ [(match_operand 2) -+ (match_operand 3) -+ (const_int SVE_KNOWN_PTRUE) -+ (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate" -+ [(unspec:VNx16BI -+ [(match_operand:SI 4 "const_int_operand") -+ (match_operand:VNx16BI 5 "aarch64_simd_imm_zero")] -+ UNSPEC_PTRUE)])] -+ UNSPEC_PTEST)) -+ (clobber (match_scratch:VNx16BI 0 "=Upa"))] -+ "TARGET_SVE" - { -- operands[1] = gen_rtx_REG (mode, REGNO (operands[1])); -- switch (which_alternative) -- { -- case 0: -- return "umov\\t%0, %1.[%2]"; -- case 1: -- return "dup\\t%0, %1.[%2]"; -- case 2: -- return "st1\\t{%1.}[%2], %0"; -- default: -- gcc_unreachable (); -- } -+ return aarch64_output_sve_ptrues (operands[1]); - } -- [(set_attr "type" "neon_to_gp_q, neon_dup_q, neon_store1_one_lane_q")] --) -- --;; Extract an element in the range of DUP. This pattern allows the --;; source and destination to be different. --(define_insn "*vec_extract_dup" -- [(set (match_operand: 0 "register_operand" "=w") -- (vec_select: -- (match_operand:SVE_ALL 1 "register_operand" "w") -- (parallel [(match_operand:SI 2 "const_int_operand")])))] -- "TARGET_SVE -- && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (mode), 16, 63)" -+ "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))" - { -- operands[0] = gen_rtx_REG (mode, REGNO (operands[0])); -- return "dup\t%0., %1.[%2]"; -+ operands[2] = operands[3] = CONSTM1_RTX (VNx16BImode); - } - ) - --;; Extract an element outside the range of DUP. This pattern requires the --;; source and destination to be the same. --(define_insn "*vec_extract_ext" -- [(set (match_operand: 0 "register_operand" "=w") -- (vec_select: -- (match_operand:SVE_ALL 1 "register_operand" "0") -- (parallel [(match_operand:SI 2 "const_int_operand")])))] -- "TARGET_SVE && INTVAL (operands[2]) * GET_MODE_SIZE (mode) >= 64" -+;; Match PTRUES Pn.[HWD] when only the flags result is useful (which is -+;; a way of testing VL). -+(define_insn_and_rewrite "*aarch64_sve_ptrue_ptest" -+ [(set (reg:CC_NZC CC_REGNUM) -+ (unspec:CC_NZC -+ [(match_operand 2) -+ (match_operand 3) -+ (const_int SVE_KNOWN_PTRUE) -+ (subreg:PRED_HSD -+ (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate" -+ [(unspec:VNx16BI -+ [(match_operand:SI 4 "const_int_operand") -+ (match_operand:PRED_HSD 5 "aarch64_simd_imm_zero")] -+ UNSPEC_PTRUE)]) 0)] -+ UNSPEC_PTEST)) -+ (clobber (match_scratch:VNx16BI 0 "=Upa"))] -+ "TARGET_SVE" - { -- operands[0] = gen_rtx_REG (mode, REGNO (operands[0])); -- operands[2] = GEN_INT (INTVAL (operands[2]) * GET_MODE_SIZE (mode)); -- return "ext\t%0.b, %0.b, %0.b, #%2"; -+ return aarch64_output_sve_ptrues (operands[1]); - } --) -- --;; Extract the last active element of operand 1 into operand 0. --;; If no elements are active, extract the last inactive element instead. --(define_insn "extract_last_" -- [(set (match_operand: 0 "register_operand" "=r, w") -- (unspec: -- [(match_operand: 1 "register_operand" "Upl, Upl") -- (match_operand:SVE_ALL 2 "register_operand" "w, w")] -- UNSPEC_LASTB))] -- "TARGET_SVE" -- "@ -- lastb\t%0, %1, %2. -- lastb\t%0, %1, %2." --) -- --(define_expand "vec_duplicate" -- [(parallel -- [(set (match_operand:SVE_ALL 0 "register_operand") -- (vec_duplicate:SVE_ALL -- (match_operand: 1 "aarch64_sve_dup_operand"))) -- (clobber (scratch:))])] -- "TARGET_SVE" -+ "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))" - { -- if (MEM_P (operands[1])) -- { -- rtx ptrue = force_reg (mode, CONSTM1_RTX (mode)); -- emit_insn (gen_sve_ld1r (operands[0], ptrue, operands[1], -- CONST0_RTX (mode))); -- DONE; -- } -+ operands[2] = CONSTM1_RTX (VNx16BImode); -+ operands[3] = CONSTM1_RTX (mode); - } - ) - --;; Accept memory operands for the benefit of combine, and also in case --;; the scalar input gets spilled to memory during RA. We want to split --;; the load at the first opportunity in order to allow the PTRUE to be --;; optimized with surrounding code. --(define_insn_and_split "*vec_duplicate_reg" -- [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w") -- (vec_duplicate:SVE_ALL -- (match_operand: 1 "aarch64_sve_dup_operand" "r, w, Uty"))) -- (clobber (match_scratch: 2 "=X, X, Upl"))] -+;; ------------------------------------------------------------------------- -+;; ---- Moves relating to the FFR -+;; ------------------------------------------------------------------------- -+;; RDFFR -+;; RDFFRS -+;; SETFFR -+;; WRFFR -+;; ------------------------------------------------------------------------- -+ -+;; [W1 in the block comment above about FFR handling] -+;; -+;; Write to the FFR and start a new FFRT scheduling region. -+(define_insn "aarch64_wrffr" -+ [(set (reg:VNx16BI FFR_REGNUM) -+ (match_operand:VNx16BI 0 "aarch64_simd_reg_or_minus_one" "Dm, Upa")) -+ (set (reg:VNx16BI FFRT_REGNUM) -+ (match_dup 0))] - "TARGET_SVE" - "@ -- mov\t%0., %1 -- mov\t%0., %1 -- #" -- "&& MEM_P (operands[1])" -- [(const_int 0)] -- { -- if (GET_CODE (operands[2]) == SCRATCH) -- operands[2] = gen_reg_rtx (mode); -- emit_move_insn (operands[2], CONSTM1_RTX (mode)); -- emit_insn (gen_sve_ld1r (operands[0], operands[2], operands[1], -- CONST0_RTX (mode))); -- DONE; -- } -- [(set_attr "length" "4,4,8")] -+ setffr -+ wrffr\t%0.b" - ) - --;; This is used for vec_duplicates from memory, but can also --;; be used by combine to optimize selects of a a vec_duplicate --;; with zero. --(define_insn "sve_ld1r" -- [(set (match_operand:SVE_ALL 0 "register_operand" "=w") -- (unspec:SVE_ALL -- [(match_operand: 1 "register_operand" "Upl") -- (vec_duplicate:SVE_ALL -- (match_operand: 2 "aarch64_sve_ld1r_operand" "Uty")) -- (match_operand:SVE_ALL 3 "aarch64_simd_imm_zero")] -- UNSPEC_SEL))] -+;; [L2 in the block comment above about FFR handling] -+;; -+;; Introduce a read from and write to the FFR in the current FFRT region, -+;; so that the FFR value is live on entry to the region and so that the FFR -+;; value visibly changes within the region. This is used (possibly multiple -+;; times) in an FFRT region that includes LDFF1 or LDNF1 instructions. -+(define_insn "aarch64_update_ffr_for_load" -+ [(set (reg:VNx16BI FFR_REGNUM) -+ (unspec:VNx16BI [(reg:VNx16BI FFRT_REGNUM) -+ (reg:VNx16BI FFR_REGNUM)] UNSPEC_UPDATE_FFR))] - "TARGET_SVE" -- "ld1r\t%0., %1/z, %2" -+ "" -+ [(set_attr "type" "no_insn")] - ) - --;; Load 128 bits from memory and duplicate to fill a vector. Since there --;; are so few operations on 128-bit "elements", we don't define a VNx1TI --;; and simply use vectors of bytes instead. --(define_insn "*sve_ld1rq" -- [(set (match_operand:SVE_ALL 0 "register_operand" "=w") -- (unspec:SVE_ALL -- [(match_operand: 1 "register_operand" "Upl") -- (match_operand:TI 2 "aarch64_sve_ld1r_operand" "Uty")] -- UNSPEC_LD1RQ))] -+;; [R1 in the block comment above about FFR handling] -+;; -+;; Notionally copy the FFR to the FFRT, so that the current FFR value -+;; can be read from there by the RDFFR instructions below. This acts -+;; as a scheduling barrier for earlier LDFF1 and LDNF1 instructions and -+;; creates a natural dependency with earlier writes. -+(define_insn "aarch64_copy_ffr_to_ffrt" -+ [(set (reg:VNx16BI FFRT_REGNUM) -+ (reg:VNx16BI FFR_REGNUM))] - "TARGET_SVE" -- "ld1rq\t%0., %1/z, %2" -+ "" -+ [(set_attr "type" "no_insn")] - ) - --;; Implement a predicate broadcast by shifting the low bit of the scalar --;; input into the top bit and using a WHILELO. An alternative would be to --;; duplicate the input and do a compare with zero. --(define_expand "vec_duplicate" -- [(set (match_operand:PRED_ALL 0 "register_operand") -- (vec_duplicate:PRED_ALL (match_operand 1 "register_operand")))] -+;; [R2 in the block comment above about FFR handling] -+;; -+;; Read the FFR via the FFRT. -+(define_insn "aarch64_rdffr" -+ [(set (match_operand:VNx16BI 0 "register_operand" "=Upa") -+ (reg:VNx16BI FFRT_REGNUM))] -+ "TARGET_SVE" -+ "rdffr\t%0.b" -+) -+ -+;; Likewise with zero predication. -+(define_insn "aarch64_rdffr_z" -+ [(set (match_operand:VNx16BI 0 "register_operand" "=Upa") -+ (and:VNx16BI -+ (reg:VNx16BI FFRT_REGNUM) -+ (match_operand:VNx16BI 1 "register_operand" "Upa")))] -+ "TARGET_SVE" -+ "rdffr\t%0.b, %1/z" -+) -+ -+;; Read the FFR to test for a fault, without using the predicate result. -+(define_insn "*aarch64_rdffr_z_ptest" -+ [(set (reg:CC_NZC CC_REGNUM) -+ (unspec:CC_NZC -+ [(match_operand:VNx16BI 1 "register_operand" "Upa") -+ (match_dup 1) -+ (match_operand:SI 2 "aarch64_sve_ptrue_flag") -+ (and:VNx16BI -+ (reg:VNx16BI FFRT_REGNUM) -+ (match_dup 1))] -+ UNSPEC_PTEST)) -+ (clobber (match_scratch:VNx16BI 0 "=Upa"))] -+ "TARGET_SVE" -+ "rdffrs\t%0.b, %1/z" -+) -+ -+;; Same for unpredicated RDFFR when tested with a known PTRUE. -+(define_insn "*aarch64_rdffr_ptest" -+ [(set (reg:CC_NZC CC_REGNUM) -+ (unspec:CC_NZC -+ [(match_operand:VNx16BI 1 "register_operand" "Upa") -+ (match_dup 1) -+ (const_int SVE_KNOWN_PTRUE) -+ (reg:VNx16BI FFRT_REGNUM)] -+ UNSPEC_PTEST)) -+ (clobber (match_scratch:VNx16BI 0 "=Upa"))] -+ "TARGET_SVE" -+ "rdffrs\t%0.b, %1/z" -+) -+ -+;; Read the FFR with zero predication and test the result. -+(define_insn "*aarch64_rdffr_z_cc" -+ [(set (reg:CC_NZC CC_REGNUM) -+ (unspec:CC_NZC -+ [(match_operand:VNx16BI 1 "register_operand" "Upa") -+ (match_dup 1) -+ (match_operand:SI 2 "aarch64_sve_ptrue_flag") -+ (and:VNx16BI -+ (reg:VNx16BI FFRT_REGNUM) -+ (match_dup 1))] -+ UNSPEC_PTEST)) -+ (set (match_operand:VNx16BI 0 "register_operand" "=Upa") -+ (and:VNx16BI -+ (reg:VNx16BI FFRT_REGNUM) -+ (match_dup 1)))] -+ "TARGET_SVE" -+ "rdffrs\t%0.b, %1/z" -+) -+ -+;; Same for unpredicated RDFFR when tested with a known PTRUE. -+(define_insn "*aarch64_rdffr_cc" -+ [(set (reg:CC_NZC CC_REGNUM) -+ (unspec:CC_NZC -+ [(match_operand:VNx16BI 1 "register_operand" "Upa") -+ (match_dup 1) -+ (const_int SVE_KNOWN_PTRUE) -+ (reg:VNx16BI FFRT_REGNUM)] -+ UNSPEC_PTEST)) -+ (set (match_operand:VNx16BI 0 "register_operand" "=Upa") -+ (reg:VNx16BI FFRT_REGNUM))] -+ "TARGET_SVE" -+ "rdffrs\t%0.b, %1/z" -+) -+ -+;; [R3 in the block comment above about FFR handling] -+;; -+;; Arbitrarily update the FFRT after a read from the FFR. This acts as -+;; a scheduling barrier for later LDFF1 and LDNF1 instructions. -+(define_insn "aarch64_update_ffrt" -+ [(set (reg:VNx16BI FFRT_REGNUM) -+ (unspec:VNx16BI [(reg:VNx16BI FFRT_REGNUM)] UNSPEC_UPDATE_FFRT))] - "TARGET_SVE" -- { -- rtx tmp = gen_reg_rtx (DImode); -- rtx op1 = gen_lowpart (DImode, operands[1]); -- emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode))); -- emit_insn (gen_while_ultdi (operands[0], const0_rtx, tmp)); -- DONE; -- } --) -- --(define_insn "vec_series" -- [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w") -- (vec_series:SVE_I -- (match_operand: 1 "aarch64_sve_index_operand" "Usi, r, r") -- (match_operand: 2 "aarch64_sve_index_operand" "r, Usi, r")))] -+ "" -+ [(set_attr "type" "no_insn")] -+) -+ -+;; ========================================================================= -+;; == Loads -+;; ========================================================================= -+ -+;; ------------------------------------------------------------------------- -+;; ---- Normal contiguous loads -+;; ------------------------------------------------------------------------- -+;; Includes contiguous forms of: -+;; - LD1B -+;; - LD1D -+;; - LD1H -+;; - LD1W -+;; - LD2B -+;; - LD2D -+;; - LD2H -+;; - LD2W -+;; - LD3B -+;; - LD3D -+;; - LD3H -+;; - LD3W -+;; - LD4B -+;; - LD4D -+;; - LD4H -+;; - LD4W -+;; ------------------------------------------------------------------------- -+ -+;; Predicated LD1. -+(define_insn "maskload" -+ [(set (match_operand:SVE_FULL 0 "register_operand" "=w") -+ (unspec:SVE_FULL -+ [(match_operand: 2 "register_operand" "Upl") -+ (match_operand:SVE_FULL 1 "memory_operand" "m")] -+ UNSPEC_LD1_SVE))] - "TARGET_SVE" -- "@ -- index\t%0., #%1, %2 -- index\t%0., %1, #%2 -- index\t%0., %1, %2" --) -- --;; Optimize {x, x, x, x, ...} + {0, n, 2*n, 3*n, ...} if n is in range --;; of an INDEX instruction. --(define_insn "*vec_series_plus" -- [(set (match_operand:SVE_I 0 "register_operand" "=w") -- (plus:SVE_I -- (vec_duplicate:SVE_I -- (match_operand: 1 "register_operand" "r")) -- (match_operand:SVE_I 2 "immediate_operand")))] -- "TARGET_SVE && aarch64_check_zero_based_sve_index_immediate (operands[2])" -- { -- operands[2] = aarch64_check_zero_based_sve_index_immediate (operands[2]); -- return "index\t%0., %1, #%2"; -- } -+ "ld1\t%0., %2/z, %1" - ) - - ;; Unpredicated LD[234]. -@@ -744,7 +1163,7 @@ - UNSPEC_LDN))] - "TARGET_SVE" - { -- operands[2] = force_reg (mode, CONSTM1_RTX (mode)); -+ operands[2] = aarch64_ptrue_reg (mode); - } - ) - -@@ -759,884 +1178,5373 @@ - "ld\t%0, %2/z, %1" - ) - --;; Unpredicated ST[234]. This is always a full update, so the dependence --;; on the old value of the memory location (via (match_dup 0)) is redundant. --;; There doesn't seem to be any obvious benefit to treating the all-true --;; case differently though. In particular, it's very unlikely that we'll --;; only find out during RTL that a store_lanes is dead. --(define_expand "vec_store_lanes" -- [(set (match_operand:SVE_STRUCT 0 "memory_operand") -- (unspec:SVE_STRUCT -- [(match_dup 2) -- (match_operand:SVE_STRUCT 1 "register_operand") -- (match_dup 0)] -- UNSPEC_STN))] -+;; ------------------------------------------------------------------------- -+;; ---- Extending contiguous loads -+;; ------------------------------------------------------------------------- -+;; Includes contiguous forms of: -+;; LD1B -+;; LD1H -+;; LD1SB -+;; LD1SH -+;; LD1SW -+;; LD1W -+;; ------------------------------------------------------------------------- -+ -+;; Predicated load and extend, with 8 elements per 128-bit block. -+(define_insn "@aarch64_load_" -+ [(set (match_operand:VNx8_WIDE 0 "register_operand" "=w") -+ (ANY_EXTEND:VNx8_WIDE -+ (unspec:VNx8_NARROW -+ [(match_operand:VNx8BI 2 "register_operand" "Upl") -+ (match_operand:VNx8_NARROW 1 "memory_operand" "m")] -+ UNSPEC_LD1_SVE)))] -+ "TARGET_SVE" -+ "ld1\t%0., %2/z, %1" -+) -+ -+;; Predicated load and extend, with 4 elements per 128-bit block. -+(define_insn "@aarch64_load_" -+ [(set (match_operand:VNx4_WIDE 0 "register_operand" "=w") -+ (ANY_EXTEND:VNx4_WIDE -+ (unspec:VNx4_NARROW -+ [(match_operand:VNx4BI 2 "register_operand" "Upl") -+ (match_operand:VNx4_NARROW 1 "memory_operand" "m")] -+ UNSPEC_LD1_SVE)))] -+ "TARGET_SVE" -+ "ld1\t%0., %2/z, %1" -+) -+ -+;; Predicated load and extend, with 2 elements per 128-bit block. -+(define_insn "@aarch64_load_" -+ [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w") -+ (ANY_EXTEND:VNx2_WIDE -+ (unspec:VNx2_NARROW -+ [(match_operand:VNx2BI 2 "register_operand" "Upl") -+ (match_operand:VNx2_NARROW 1 "memory_operand" "m")] -+ UNSPEC_LD1_SVE)))] -+ "TARGET_SVE" -+ "ld1\t%0., %2/z, %1" -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- First-faulting contiguous loads -+;; ------------------------------------------------------------------------- -+;; Includes contiguous forms of: -+;; - LDFF1B -+;; - LDFF1D -+;; - LDFF1H -+;; - LDFF1W -+;; - LDNF1B -+;; - LDNF1D -+;; - LDNF1H -+;; - LDNF1W -+;; ------------------------------------------------------------------------- -+ -+;; Contiguous non-extending first-faulting or non-faulting loads. -+(define_insn "@aarch64_ldf1" -+ [(set (match_operand:SVE_FULL 0 "register_operand" "=w") -+ (unspec:SVE_FULL -+ [(match_operand: 2 "register_operand" "Upl") -+ (match_operand:SVE_FULL 1 "aarch64_sve_ldf1_operand" "Ut") -+ (reg:VNx16BI FFRT_REGNUM)] -+ SVE_LDFF1_LDNF1))] -+ "TARGET_SVE" -+ "ldf1\t%0., %2/z, %1" -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- First-faulting extending contiguous loads -+;; ------------------------------------------------------------------------- -+;; Includes contiguous forms of: -+;; - LDFF1B -+;; - LDFF1H -+;; - LDFF1SB -+;; - LDFF1SH -+;; - LDFF1SW -+;; - LDFF1W -+;; - LDNF1B -+;; - LDNF1H -+;; - LDNF1SB -+;; - LDNF1SH -+;; - LDNF1SW -+;; - LDNF1W -+;; ------------------------------------------------------------------------- -+ -+;; Predicated first-faulting or non-faulting load and extend, with 8 elements -+;; per 128-bit block. -+(define_insn "@aarch64_ldf1_" -+ [(set (match_operand:VNx8_WIDE 0 "register_operand" "=w") -+ (ANY_EXTEND:VNx8_WIDE -+ (unspec:VNx8_NARROW -+ [(match_operand:VNx8BI 2 "register_operand" "Upl") -+ (match_operand:VNx8_NARROW 1 "aarch64_sve_ldf1_operand" "Ut") -+ (reg:VNx16BI FFRT_REGNUM)] -+ SVE_LDFF1_LDNF1)))] -+ "TARGET_SVE" -+ "ldf1\t%0., %2/z, %1" -+) -+ -+;; Predicated first-faulting or non-faulting load and extend, with 4 elements -+;; per 128-bit block. -+(define_insn "@aarch64_ldf1_" -+ [(set (match_operand:VNx4_WIDE 0 "register_operand" "=w") -+ (ANY_EXTEND:VNx4_WIDE -+ (unspec:VNx4_NARROW -+ [(match_operand:VNx4BI 2 "register_operand" "Upl") -+ (match_operand:VNx4_NARROW 1 "aarch64_sve_ldf1_operand" "Ut") -+ (reg:VNx16BI FFRT_REGNUM)] -+ SVE_LDFF1_LDNF1)))] -+ "TARGET_SVE" -+ "ldf1\t%0., %2/z, %1" -+) -+ -+;; Predicated first-faulting or non-faulting load and extend, with 2 elements -+;; per 128-bit block. -+(define_insn "@aarch64_ldf1_" -+ [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w") -+ (ANY_EXTEND:VNx2_WIDE -+ (unspec:VNx2_NARROW -+ [(match_operand:VNx2BI 2 "register_operand" "Upl") -+ (match_operand:VNx2_NARROW 1 "aarch64_sve_ldf1_operand" "Ut") -+ (reg:VNx16BI FFRT_REGNUM)] -+ SVE_LDFF1_LDNF1)))] -+ "TARGET_SVE" -+ "ldf1\t%0., %2/z, %1" -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- Non-temporal contiguous loads -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - LDNT1B -+;; - LDNT1D -+;; - LDNT1H -+;; - LDNT1W -+;; ------------------------------------------------------------------------- -+ -+;; Predicated contiguous non-temporal load. -+(define_insn "@aarch64_ldnt1" -+ [(set (match_operand:SVE_FULL 0 "register_operand" "=w") -+ (unspec:SVE_FULL -+ [(match_operand: 2 "register_operand" "Upl") -+ (match_operand:SVE_FULL 1 "memory_operand" "m")] -+ UNSPEC_LDNT1_SVE))] -+ "TARGET_SVE" -+ "ldnt1\t%0., %2/z, %1" -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- Normal gather loads -+;; ------------------------------------------------------------------------- -+;; Includes gather forms of: -+;; - LD1D -+;; - LD1W -+;; ------------------------------------------------------------------------- -+ -+;; Unpredicated gather loads. -+(define_expand "gather_load" -+ [(set (match_operand:SVE_FULL_SD 0 "register_operand") -+ (unspec:SVE_FULL_SD -+ [(match_dup 5) -+ (match_operand:DI 1 "aarch64_sve_gather_offset_") -+ (match_operand: 2 "register_operand") -+ (match_operand:DI 3 "const_int_operand") -+ (match_operand:DI 4 "aarch64_gather_scale_operand_") -+ (mem:BLK (scratch))] -+ UNSPEC_LD1_GATHER))] -+ "TARGET_SVE" -+ { -+ operands[5] = aarch64_ptrue_reg (mode); -+ } -+) -+ -+;; Predicated gather loads for 32-bit elements. Operand 3 is true for -+;; unsigned extension and false for signed extension. -+(define_insn "mask_gather_load" -+ [(set (match_operand:SVE_FULL_S 0 "register_operand" "=w, w, w, w, w, w") -+ (unspec:SVE_FULL_S -+ [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") -+ (match_operand:DI 1 "aarch64_sve_gather_offset_w" "Z, vgw, rk, rk, rk, rk") -+ (match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w") -+ (match_operand:DI 3 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1") -+ (match_operand:DI 4 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, Ui1, i, i") -+ (mem:BLK (scratch))] -+ UNSPEC_LD1_GATHER))] -+ "TARGET_SVE" -+ "@ -+ ld1w\t%0.s, %5/z, [%2.s] -+ ld1w\t%0.s, %5/z, [%2.s, #%1] -+ ld1w\t%0.s, %5/z, [%1, %2.s, sxtw] -+ ld1w\t%0.s, %5/z, [%1, %2.s, uxtw] -+ ld1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4] -+ ld1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4]" -+) -+ -+;; Predicated gather loads for 64-bit elements. The value of operand 3 -+;; doesn't matter in this case. -+(define_insn "mask_gather_load" -+ [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w, w, w") -+ (unspec:SVE_FULL_D -+ [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl") -+ (match_operand:DI 1 "aarch64_sve_gather_offset_d" "Z, vgd, rk, rk") -+ (match_operand:VNx2DI 2 "register_operand" "w, w, w, w") -+ (match_operand:DI 3 "const_int_operand") -+ (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, Ui1, Ui1, i") -+ (mem:BLK (scratch))] -+ UNSPEC_LD1_GATHER))] - "TARGET_SVE" -+ "@ -+ ld1d\t%0.d, %5/z, [%2.d] -+ ld1d\t%0.d, %5/z, [%2.d, #%1] -+ ld1d\t%0.d, %5/z, [%1, %2.d] -+ ld1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]" -+) -+ -+;; Likewise, but with the offset being sign-extended from 32 bits. -+(define_insn "*mask_gather_load_sxtw" -+ [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w") -+ (unspec:SVE_FULL_D -+ [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") -+ (match_operand:DI 1 "register_operand" "rk, rk") -+ (unspec:VNx2DI -+ [(match_dup 5) -+ (sign_extend:VNx2DI -+ (truncate:VNx2SI -+ (match_operand:VNx2DI 2 "register_operand" "w, w")))] -+ UNSPEC_PRED_X) -+ (match_operand:DI 3 "const_int_operand") -+ (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, i") -+ (mem:BLK (scratch))] -+ UNSPEC_LD1_GATHER))] -+ "TARGET_SVE" -+ "@ -+ ld1d\t%0.d, %5/z, [%1, %2.d, sxtw] -+ ld1d\t%0.d, %5/z, [%1, %2.d, sxtw %p4]" -+) -+ -+;; Likewise, but with the offset being zero-extended from 32 bits. -+(define_insn "*mask_gather_load_uxtw" -+ [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w") -+ (unspec:SVE_FULL_D -+ [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") -+ (match_operand:DI 1 "register_operand" "rk, rk") -+ (and:VNx2DI -+ (match_operand:VNx2DI 2 "register_operand" "w, w") -+ (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate")) -+ (match_operand:DI 3 "const_int_operand") -+ (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, i") -+ (mem:BLK (scratch))] -+ UNSPEC_LD1_GATHER))] -+ "TARGET_SVE" -+ "@ -+ ld1d\t%0.d, %5/z, [%1, %2.d, uxtw] -+ ld1d\t%0.d, %5/z, [%1, %2.d, uxtw %p4]" -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- Extending gather loads -+;; ------------------------------------------------------------------------- -+;; Includes gather forms of: -+;; - LD1B -+;; - LD1H -+;; - LD1SB -+;; - LD1SH -+;; - LD1SW -+;; - LD1W -+;; ------------------------------------------------------------------------- -+ -+;; Predicated extending gather loads for 32-bit elements. Operand 3 is -+;; true for unsigned extension and false for signed extension. -+(define_insn "@aarch64_gather_load_" -+ [(set (match_operand:VNx4_WIDE 0 "register_operand" "=w, w, w, w, w, w") -+ (ANY_EXTEND:VNx4_WIDE -+ (unspec:VNx4_NARROW -+ [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") -+ (match_operand:DI 1 "aarch64_sve_gather_offset_" "Z, vg, rk, rk, rk, rk") -+ (match_operand:VNx4_WIDE 2 "register_operand" "w, w, w, w, w, w") -+ (match_operand:DI 3 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1") -+ (match_operand:DI 4 "aarch64_gather_scale_operand_" "Ui1, Ui1, Ui1, Ui1, i, i") -+ (mem:BLK (scratch))] -+ UNSPEC_LD1_GATHER)))] -+ "TARGET_SVE" -+ "@ -+ ld1\t%0.s, %5/z, [%2.s] -+ ld1\t%0.s, %5/z, [%2.s, #%1] -+ ld1\t%0.s, %5/z, [%1, %2.s, sxtw] -+ ld1\t%0.s, %5/z, [%1, %2.s, uxtw] -+ ld1\t%0.s, %5/z, [%1, %2.s, sxtw %p4] -+ ld1\t%0.s, %5/z, [%1, %2.s, uxtw %p4]" -+) -+ -+;; Predicated extending gather loads for 64-bit elements. The value of -+;; operand 3 doesn't matter in this case. -+(define_insn "@aarch64_gather_load_" -+ [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w, w, w") -+ (ANY_EXTEND:VNx2_WIDE -+ (unspec:VNx2_NARROW -+ [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl") -+ (match_operand:DI 1 "aarch64_sve_gather_offset_" "Z, vg, rk, rk") -+ (match_operand:VNx2_WIDE 2 "register_operand" "w, w, w, w") -+ (match_operand:DI 3 "const_int_operand") -+ (match_operand:DI 4 "aarch64_gather_scale_operand_" "Ui1, Ui1, Ui1, i") -+ (mem:BLK (scratch))] -+ UNSPEC_LD1_GATHER)))] -+ "TARGET_SVE" -+ "@ -+ ld1\t%0.d, %5/z, [%2.d] -+ ld1\t%0.d, %5/z, [%2.d, #%1] -+ ld1\t%0.d, %5/z, [%1, %2.d] -+ ld1\t%0.d, %5/z, [%1, %2.d, lsl %p4]" -+) -+ -+;; Likewise, but with the offset being sign-extended from 32 bits. -+(define_insn_and_rewrite "*aarch64_gather_load__sxtw" -+ [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w") -+ (ANY_EXTEND:VNx2_WIDE -+ (unspec:VNx2_NARROW -+ [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") -+ (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk") -+ (unspec:VNx2DI -+ [(match_operand 6) -+ (sign_extend:VNx2DI -+ (truncate:VNx2SI -+ (match_operand:VNx2DI 2 "register_operand" "w, w")))] -+ UNSPEC_PRED_X) -+ (match_operand:DI 3 "const_int_operand") -+ (match_operand:DI 4 "aarch64_gather_scale_operand_" "Ui1, i") -+ (mem:BLK (scratch))] -+ UNSPEC_LD1_GATHER)))] -+ "TARGET_SVE" -+ "@ -+ ld1\t%0.d, %5/z, [%1, %2.d, sxtw] -+ ld1\t%0.d, %5/z, [%1, %2.d, sxtw %p4]" -+ "&& !rtx_equal_p (operands[5], operands[6])" -+ { -+ operands[6] = copy_rtx (operands[5]); -+ } -+) -+ -+;; Likewise, but with the offset being zero-extended from 32 bits. -+(define_insn "*aarch64_gather_load__uxtw" -+ [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w") -+ (ANY_EXTEND:VNx2_WIDE -+ (unspec:VNx2_NARROW -+ [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") -+ (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk") -+ (and:VNx2DI -+ (match_operand:VNx2DI 2 "register_operand" "w, w") -+ (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate")) -+ (match_operand:DI 3 "const_int_operand") -+ (match_operand:DI 4 "aarch64_gather_scale_operand_" "Ui1, i") -+ (mem:BLK (scratch))] -+ UNSPEC_LD1_GATHER)))] -+ "TARGET_SVE" -+ "@ -+ ld1\t%0.d, %5/z, [%1, %2.d, uxtw] -+ ld1\t%0.d, %5/z, [%1, %2.d, uxtw %p4]" -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- First-faulting gather loads -+;; ------------------------------------------------------------------------- -+;; Includes gather forms of: -+;; - LDFF1D -+;; - LDFF1W -+;; ------------------------------------------------------------------------- -+ -+;; Predicated first-faulting gather loads for 32-bit elements. Operand -+;; 3 is true for unsigned extension and false for signed extension. -+(define_insn "@aarch64_ldff1_gather" -+ [(set (match_operand:SVE_FULL_S 0 "register_operand" "=w, w, w, w, w, w") -+ (unspec:SVE_FULL_S -+ [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") -+ (match_operand:DI 1 "aarch64_sve_gather_offset_w" "Z, vgw, rk, rk, rk, rk") -+ (match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w") -+ (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1") -+ (match_operand:DI 4 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, Ui1, i, i") -+ (mem:BLK (scratch)) -+ (reg:VNx16BI FFRT_REGNUM)] -+ UNSPEC_LDFF1_GATHER))] -+ "TARGET_SVE" -+ "@ -+ ldff1w\t%0.s, %5/z, [%2.s] -+ ldff1w\t%0.s, %5/z, [%2.s, #%1] -+ ldff1w\t%0.s, %5/z, [%1, %2.s, sxtw] -+ ldff1w\t%0.s, %5/z, [%1, %2.s, uxtw] -+ ldff1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4] -+ ldff1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4]" -+) -+ -+;; Predicated first-faulting gather loads for 64-bit elements. The value -+;; of operand 3 doesn't matter in this case. -+(define_insn "@aarch64_ldff1_gather" -+ [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w, w, w") -+ (unspec:SVE_FULL_D -+ [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl") -+ (match_operand:DI 1 "aarch64_sve_gather_offset_d" "Z, vgd, rk, rk") -+ (match_operand:VNx2DI 2 "register_operand" "w, w, w, w") -+ (match_operand:DI 3 "const_int_operand") -+ (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, Ui1, Ui1, i") -+ (mem:BLK (scratch)) -+ (reg:VNx16BI FFRT_REGNUM)] -+ UNSPEC_LDFF1_GATHER))] -+ "TARGET_SVE" -+ "@ -+ ldff1d\t%0.d, %5/z, [%2.d] -+ ldff1d\t%0.d, %5/z, [%2.d, #%1] -+ ldff1d\t%0.d, %5/z, [%1, %2.d] -+ ldff1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]" -+) -+ -+;; Likewise, but with the offset being sign-extended from 32 bits. -+(define_insn_and_rewrite "*aarch64_ldff1_gather_sxtw" -+ [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w") -+ (unspec:SVE_FULL_D -+ [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") -+ (match_operand:DI 1 "register_operand" "rk, rk") -+ (unspec:VNx2DI -+ [(match_operand 6) -+ (sign_extend:VNx2DI -+ (truncate:VNx2SI -+ (match_operand:VNx2DI 2 "register_operand" "w, w")))] -+ UNSPEC_PRED_X) -+ (match_operand:DI 3 "const_int_operand") -+ (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, i") -+ (mem:BLK (scratch)) -+ (reg:VNx16BI FFRT_REGNUM)] -+ UNSPEC_LDFF1_GATHER))] -+ "TARGET_SVE" -+ "@ -+ ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw] -+ ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw %p4]" -+ "&& !rtx_equal_p (operands[5], operands[6])" -+ { -+ operands[6] = copy_rtx (operands[5]); -+ } -+) -+ -+;; Likewise, but with the offset being zero-extended from 32 bits. -+(define_insn "*aarch64_ldff1_gather_uxtw" -+ [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w") -+ (unspec:SVE_FULL_D -+ [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") -+ (match_operand:DI 1 "register_operand" "rk, rk") -+ (and:VNx2DI -+ (match_operand:VNx2DI 2 "register_operand" "w, w") -+ (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate")) -+ (match_operand:DI 3 "const_int_operand") -+ (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, i") -+ (mem:BLK (scratch)) -+ (reg:VNx16BI FFRT_REGNUM)] -+ UNSPEC_LDFF1_GATHER))] -+ "TARGET_SVE" -+ "@ -+ ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw] -+ ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw %p4]" -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- First-faulting extending gather loads -+;; ------------------------------------------------------------------------- -+;; Includes gather forms of: -+;; - LDFF1B -+;; - LDFF1H -+;; - LDFF1SB -+;; - LDFF1SH -+;; - LDFF1SW -+;; - LDFF1W -+;; ------------------------------------------------------------------------- -+ -+;; Predicated extending first-faulting gather loads for 32-bit elements. -+;; Operand 3 is true for unsigned extension and false for signed extension. -+(define_insn "@aarch64_ldff1_gather_" -+ [(set (match_operand:VNx4_WIDE 0 "register_operand" "=w, w, w, w, w, w") -+ (ANY_EXTEND:VNx4_WIDE -+ (unspec:VNx4_NARROW -+ [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") -+ (match_operand:DI 1 "aarch64_sve_gather_offset_" "Z, vg, rk, rk, rk, rk") -+ (match_operand:VNx4_WIDE 2 "register_operand" "w, w, w, w, w, w") -+ (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1") -+ (match_operand:DI 4 "aarch64_gather_scale_operand_" "Ui1, Ui1, Ui1, Ui1, i, i") -+ (mem:BLK (scratch)) -+ (reg:VNx16BI FFRT_REGNUM)] -+ UNSPEC_LDFF1_GATHER)))] -+ "TARGET_SVE" -+ "@ -+ ldff1\t%0.s, %5/z, [%2.s] -+ ldff1\t%0.s, %5/z, [%2.s, #%1] -+ ldff1\t%0.s, %5/z, [%1, %2.s, sxtw] -+ ldff1\t%0.s, %5/z, [%1, %2.s, uxtw] -+ ldff1\t%0.s, %5/z, [%1, %2.s, sxtw %p4] -+ ldff1\t%0.s, %5/z, [%1, %2.s, uxtw %p4]" -+) -+ -+;; Predicated extending first-faulting gather loads for 64-bit elements. -+;; The value of operand 3 doesn't matter in this case. -+(define_insn "@aarch64_ldff1_gather_" -+ [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w, w, w") -+ (ANY_EXTEND:VNx2_WIDE -+ (unspec:VNx2_NARROW -+ [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl") -+ (match_operand:DI 1 "aarch64_sve_gather_offset_" "Z, vg, rk, rk") -+ (match_operand:VNx2_WIDE 2 "register_operand" "w, w, w, w") -+ (match_operand:DI 3 "const_int_operand") -+ (match_operand:DI 4 "aarch64_gather_scale_operand_" "Ui1, Ui1, Ui1, i") -+ (mem:BLK (scratch)) -+ (reg:VNx16BI FFRT_REGNUM)] -+ UNSPEC_LDFF1_GATHER)))] -+ "TARGET_SVE" -+ "@ -+ ldff1\t%0.d, %5/z, [%2.d] -+ ldff1\t%0.d, %5/z, [%2.d, #%1] -+ ldff1\t%0.d, %5/z, [%1, %2.d] -+ ldff1\t%0.d, %5/z, [%1, %2.d, lsl %p4]" -+) -+ -+;; Likewise, but with the offset being sign-extended from 32 bits. -+(define_insn_and_rewrite "*aarch64_ldff1_gather__sxtw" -+ [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w") -+ (ANY_EXTEND:VNx2_WIDE -+ (unspec:VNx2_NARROW -+ [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") -+ (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk") -+ (unspec:VNx2DI -+ [(match_operand 6) -+ (sign_extend:VNx2DI -+ (truncate:VNx2SI -+ (match_operand:VNx2DI 2 "register_operand" "w, w")))] -+ UNSPEC_PRED_X) -+ (match_operand:DI 3 "const_int_operand") -+ (match_operand:DI 4 "aarch64_gather_scale_operand_" "Ui1, i") -+ (mem:BLK (scratch)) -+ (reg:VNx16BI FFRT_REGNUM)] -+ UNSPEC_LDFF1_GATHER)))] -+ "TARGET_SVE" -+ "@ -+ ldff1\t%0.d, %5/z, [%1, %2.d, sxtw] -+ ldff1\t%0.d, %5/z, [%1, %2.d, sxtw %p4]" -+ "&& !rtx_equal_p (operands[5], operands[6])" -+ { -+ operands[6] = copy_rtx (operands[5]); -+ } -+) -+ -+;; Likewise, but with the offset being zero-extended from 32 bits. -+(define_insn "*aarch64_ldff1_gather__uxtw" -+ [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w") -+ (ANY_EXTEND:VNx2_WIDE -+ (unspec:VNx2_NARROW -+ [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") -+ (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk") -+ (and:VNx2DI -+ (match_operand:VNx2DI 2 "register_operand" "w, w") -+ (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate")) -+ (match_operand:DI 3 "const_int_operand") -+ (match_operand:DI 4 "aarch64_gather_scale_operand_" "Ui1, i") -+ (mem:BLK (scratch)) -+ (reg:VNx16BI FFRT_REGNUM)] -+ UNSPEC_LDFF1_GATHER)))] -+ "TARGET_SVE" -+ "@ -+ ldff1\t%0.d, %5/z, [%1, %2.d, uxtw] -+ ldff1\t%0.d, %5/z, [%1, %2.d, uxtw %p4]" -+) -+ -+;; ========================================================================= -+;; == Prefetches -+;; ========================================================================= -+ -+;; ------------------------------------------------------------------------- -+;; ---- Contiguous prefetches -+;; ------------------------------------------------------------------------- -+;; Includes contiguous forms of: -+;; - PRFB -+;; - PRFD -+;; - PRFH -+;; - PRFW -+;; ------------------------------------------------------------------------- -+ -+;; Contiguous predicated prefetches. Operand 2 gives the real prefetch -+;; operation (as an svprfop), with operands 3 and 4 providing distilled -+;; information. -+(define_insn "@aarch64_sve_prefetch" -+ [(prefetch (unspec:DI -+ [(match_operand: 0 "register_operand" "Upl") -+ (match_operand:SVE_FULL_I 1 "aarch64_sve_prefetch_operand" "UP") -+ (match_operand:DI 2 "const_int_operand")] -+ UNSPEC_SVE_PREFETCH) -+ (match_operand:DI 3 "const_int_operand") -+ (match_operand:DI 4 "const_int_operand"))] -+ "TARGET_SVE" -+ { -+ operands[1] = gen_rtx_MEM (mode, operands[1]); -+ return aarch64_output_sve_prefetch ("prf", operands[2], "%0, %1"); -+ } -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- Gather prefetches -+;; ------------------------------------------------------------------------- -+;; Includes gather forms of: -+;; - PRFB -+;; - PRFD -+;; - PRFH -+;; - PRFW -+;; ------------------------------------------------------------------------- -+ -+;; Predicated gather prefetches for 32-bit bases and offsets. The operands -+;; are: -+;; 0: the governing predicate -+;; 1: the scalar component of the address -+;; 2: the vector component of the address -+;; 3: 1 for zero extension, 0 for sign extension -+;; 4: the scale multiplier -+;; 5: a vector zero that identifies the mode of data being accessed -+;; 6: the prefetch operator (an svprfop) -+;; 7: the normal RTL prefetch rw flag -+;; 8: the normal RTL prefetch locality value -+(define_insn "@aarch64_sve_gather_prefetch" -+ [(prefetch (unspec:DI -+ [(match_operand:VNx4BI 0 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") -+ (match_operand:DI 1 "aarch64_sve_gather_offset_" "Z, vg, rk, rk, rk, rk") -+ (match_operand:VNx4SI_ONLY 2 "register_operand" "w, w, w, w, w, w") -+ (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1") -+ (match_operand:DI 4 "aarch64_gather_scale_operand_" "Ui1, Ui1, Ui1, Ui1, i, i") -+ (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero") -+ (match_operand:DI 6 "const_int_operand")] -+ UNSPEC_SVE_PREFETCH_GATHER) -+ (match_operand:DI 7 "const_int_operand") -+ (match_operand:DI 8 "const_int_operand"))] -+ "TARGET_SVE" -+ { -+ static const char *const insns[][2] = { -+ "prf", "%0, [%2.s]", -+ "prf", "%0, [%2.s, #%1]", -+ "prfb", "%0, [%1, %2.s, sxtw]", -+ "prfb", "%0, [%1, %2.s, uxtw]", -+ "prf", "%0, [%1, %2.s, sxtw %p4]", -+ "prf", "%0, [%1, %2.s, uxtw %p4]" -+ }; -+ const char *const *parts = insns[which_alternative]; -+ return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]); -+ } -+) -+ -+;; Predicated gather prefetches for 64-bit elements. The value of operand 3 -+;; doesn't matter in this case. -+(define_insn "@aarch64_sve_gather_prefetch" -+ [(prefetch (unspec:DI -+ [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl, Upl, Upl") -+ (match_operand:DI 1 "aarch64_sve_gather_offset_" "Z, vg, rk, rk") -+ (match_operand:VNx2DI_ONLY 2 "register_operand" "w, w, w, w") -+ (match_operand:DI 3 "const_int_operand") -+ (match_operand:DI 4 "aarch64_gather_scale_operand_" "Ui1, Ui1, Ui1, i") -+ (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero") -+ (match_operand:DI 6 "const_int_operand")] -+ UNSPEC_SVE_PREFETCH_GATHER) -+ (match_operand:DI 7 "const_int_operand") -+ (match_operand:DI 8 "const_int_operand"))] -+ "TARGET_SVE" -+ { -+ static const char *const insns[][2] = { -+ "prf", "%0, [%2.d]", -+ "prf", "%0, [%2.d, #%1]", -+ "prfb", "%0, [%1, %2.d]", -+ "prf", "%0, [%1, %2.d, lsl %p4]" -+ }; -+ const char *const *parts = insns[which_alternative]; -+ return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]); -+ } -+) -+ -+;; Likewise, but with the offset being sign-extended from 32 bits. -+(define_insn_and_rewrite "*aarch64_sve_gather_prefetch_sxtw" -+ [(prefetch (unspec:DI -+ [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl") -+ (match_operand:DI 1 "register_operand" "rk, rk") -+ (unspec:VNx2DI_ONLY -+ [(match_operand 9) -+ (sign_extend:VNx2DI -+ (truncate:VNx2SI -+ (match_operand:VNx2DI 2 "register_operand" "w, w")))] -+ UNSPEC_PRED_X) -+ (match_operand:DI 3 "const_int_operand") -+ (match_operand:DI 4 "aarch64_gather_scale_operand_" "Ui1, i") -+ (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero") -+ (match_operand:DI 6 "const_int_operand")] -+ UNSPEC_SVE_PREFETCH_GATHER) -+ (match_operand:DI 7 "const_int_operand") -+ (match_operand:DI 8 "const_int_operand"))] -+ "TARGET_SVE" -+ { -+ static const char *const insns[][2] = { -+ "prfb", "%0, [%1, %2.d, sxtw]", -+ "prf", "%0, [%1, %2.d, sxtw %p4]" -+ }; -+ const char *const *parts = insns[which_alternative]; -+ return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]); -+ } -+ "&& !rtx_equal_p (operands[0], operands[9])" -+ { -+ operands[9] = copy_rtx (operands[0]); -+ } -+) -+ -+;; Likewise, but with the offset being zero-extended from 32 bits. -+(define_insn "*aarch64_sve_gather_prefetch_uxtw" -+ [(prefetch (unspec:DI -+ [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl") -+ (match_operand:DI 1 "register_operand" "rk, rk") -+ (and:VNx2DI_ONLY -+ (match_operand:VNx2DI 2 "register_operand" "w, w") -+ (match_operand:VNx2DI 9 "aarch64_sve_uxtw_immediate")) -+ (match_operand:DI 3 "const_int_operand") -+ (match_operand:DI 4 "aarch64_gather_scale_operand_" "Ui1, i") -+ (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero") -+ (match_operand:DI 6 "const_int_operand")] -+ UNSPEC_SVE_PREFETCH_GATHER) -+ (match_operand:DI 7 "const_int_operand") -+ (match_operand:DI 8 "const_int_operand"))] -+ "TARGET_SVE" -+ { -+ static const char *const insns[][2] = { -+ "prfb", "%0, [%1, %2.d, uxtw]", -+ "prf", "%0, [%1, %2.d, uxtw %p4]" -+ }; -+ const char *const *parts = insns[which_alternative]; -+ return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]); -+ } -+) -+ -+;; ========================================================================= -+;; == Stores -+;; ========================================================================= -+ -+;; ------------------------------------------------------------------------- -+;; ---- Normal contiguous stores -+;; ------------------------------------------------------------------------- -+;; Includes contiguous forms of: -+;; - ST1B -+;; - ST1D -+;; - ST1H -+;; - ST1W -+;; - ST2B -+;; - ST2D -+;; - ST2H -+;; - ST2W -+;; - ST3B -+;; - ST3D -+;; - ST3H -+;; - ST3W -+;; - ST4B -+;; - ST4D -+;; - ST4H -+;; - ST4W -+;; ------------------------------------------------------------------------- -+ -+;; Predicated ST1. -+(define_insn "maskstore" -+ [(set (match_operand:SVE_FULL 0 "memory_operand" "+m") -+ (unspec:SVE_FULL -+ [(match_operand: 2 "register_operand" "Upl") -+ (match_operand:SVE_FULL 1 "register_operand" "w") -+ (match_dup 0)] -+ UNSPEC_ST1_SVE))] -+ "TARGET_SVE" -+ "st1\t%1., %2, %0" -+) -+ -+;; Unpredicated ST[234]. This is always a full update, so the dependence -+;; on the old value of the memory location (via (match_dup 0)) is redundant. -+;; There doesn't seem to be any obvious benefit to treating the all-true -+;; case differently though. In particular, it's very unlikely that we'll -+;; only find out during RTL that a store_lanes is dead. -+(define_expand "vec_store_lanes" -+ [(set (match_operand:SVE_STRUCT 0 "memory_operand") -+ (unspec:SVE_STRUCT -+ [(match_dup 2) -+ (match_operand:SVE_STRUCT 1 "register_operand") -+ (match_dup 0)] -+ UNSPEC_STN))] -+ "TARGET_SVE" -+ { -+ operands[2] = aarch64_ptrue_reg (mode); -+ } -+) -+ -+;; Predicated ST[234]. -+(define_insn "vec_mask_store_lanes" -+ [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m") -+ (unspec:SVE_STRUCT -+ [(match_operand: 2 "register_operand" "Upl") -+ (match_operand:SVE_STRUCT 1 "register_operand" "w") -+ (match_dup 0)] -+ UNSPEC_STN))] -+ "TARGET_SVE" -+ "st\t%1, %2, %0" -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- Truncating contiguous stores -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - ST1B -+;; - ST1H -+;; - ST1W -+;; ------------------------------------------------------------------------- -+ -+;; Predicated truncate and store, with 8 elements per 128-bit block. -+(define_insn "@aarch64_store_trunc" -+ [(set (match_operand:VNx8_NARROW 0 "memory_operand" "+m") -+ (unspec:VNx8_NARROW -+ [(match_operand:VNx8BI 2 "register_operand" "Upl") -+ (truncate:VNx8_NARROW -+ (match_operand:VNx8_WIDE 1 "register_operand" "w")) -+ (match_dup 0)] -+ UNSPEC_ST1_SVE))] -+ "TARGET_SVE" -+ "st1\t%1., %2, %0" -+) -+ -+;; Predicated truncate and store, with 4 elements per 128-bit block. -+(define_insn "@aarch64_store_trunc" -+ [(set (match_operand:VNx4_NARROW 0 "memory_operand" "+m") -+ (unspec:VNx4_NARROW -+ [(match_operand:VNx4BI 2 "register_operand" "Upl") -+ (truncate:VNx4_NARROW -+ (match_operand:VNx4_WIDE 1 "register_operand" "w")) -+ (match_dup 0)] -+ UNSPEC_ST1_SVE))] -+ "TARGET_SVE" -+ "st1\t%1., %2, %0" -+) -+ -+;; Predicated truncate and store, with 2 elements per 128-bit block. -+(define_insn "@aarch64_store_trunc" -+ [(set (match_operand:VNx2_NARROW 0 "memory_operand" "+m") -+ (unspec:VNx2_NARROW -+ [(match_operand:VNx2BI 2 "register_operand" "Upl") -+ (truncate:VNx2_NARROW -+ (match_operand:VNx2_WIDE 1 "register_operand" "w")) -+ (match_dup 0)] -+ UNSPEC_ST1_SVE))] -+ "TARGET_SVE" -+ "st1\t%1., %2, %0" -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- Non-temporal contiguous stores -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - STNT1B -+;; - STNT1D -+;; - STNT1H -+;; - STNT1W -+;; ------------------------------------------------------------------------- -+ -+(define_insn "@aarch64_stnt1" -+ [(set (match_operand:SVE_FULL 0 "memory_operand" "+m") -+ (unspec:SVE_FULL -+ [(match_operand: 2 "register_operand" "Upl") -+ (match_operand:SVE_FULL 1 "register_operand" "w") -+ (match_dup 0)] -+ UNSPEC_STNT1_SVE))] -+ "TARGET_SVE" -+ "stnt1\t%1., %2, %0" -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- Normal scatter stores -+;; ------------------------------------------------------------------------- -+;; Includes scatter forms of: -+;; - ST1D -+;; - ST1W -+;; ------------------------------------------------------------------------- -+ -+;; Unpredicated scatter stores. -+(define_expand "scatter_store" -+ [(set (mem:BLK (scratch)) -+ (unspec:BLK -+ [(match_dup 5) -+ (match_operand:DI 0 "aarch64_sve_gather_offset_") -+ (match_operand: 1 "register_operand") -+ (match_operand:DI 2 "const_int_operand") -+ (match_operand:DI 3 "aarch64_gather_scale_operand_") -+ (match_operand:SVE_FULL_SD 4 "register_operand")] -+ UNSPEC_ST1_SCATTER))] -+ "TARGET_SVE" -+ { -+ operands[5] = aarch64_ptrue_reg (mode); -+ } -+) -+ -+;; Predicated scatter stores for 32-bit elements. Operand 2 is true for -+;; unsigned extension and false for signed extension. -+(define_insn "mask_scatter_store" -+ [(set (mem:BLK (scratch)) -+ (unspec:BLK -+ [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") -+ (match_operand:DI 0 "aarch64_sve_gather_offset_w" "Z, vgw, rk, rk, rk, rk") -+ (match_operand:VNx4SI 1 "register_operand" "w, w, w, w, w, w") -+ (match_operand:DI 2 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1") -+ (match_operand:DI 3 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, Ui1, i, i") -+ (match_operand:SVE_FULL_S 4 "register_operand" "w, w, w, w, w, w")] -+ UNSPEC_ST1_SCATTER))] -+ "TARGET_SVE" -+ "@ -+ st1w\t%4.s, %5, [%1.s] -+ st1w\t%4.s, %5, [%1.s, #%0] -+ st1w\t%4.s, %5, [%0, %1.s, sxtw] -+ st1w\t%4.s, %5, [%0, %1.s, uxtw] -+ st1w\t%4.s, %5, [%0, %1.s, sxtw %p3] -+ st1w\t%4.s, %5, [%0, %1.s, uxtw %p3]" -+) -+ -+;; Predicated scatter stores for 64-bit elements. The value of operand 2 -+;; doesn't matter in this case. -+(define_insn "mask_scatter_store" -+ [(set (mem:BLK (scratch)) -+ (unspec:BLK -+ [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl") -+ (match_operand:DI 0 "aarch64_sve_gather_offset_d" "Z, vgd, rk, rk") -+ (match_operand:VNx2DI 1 "register_operand" "w, w, w, w") -+ (match_operand:DI 2 "const_int_operand") -+ (match_operand:DI 3 "aarch64_gather_scale_operand_d" "Ui1, Ui1, Ui1, i") -+ (match_operand:SVE_FULL_D 4 "register_operand" "w, w, w, w")] -+ UNSPEC_ST1_SCATTER))] -+ "TARGET_SVE" -+ "@ -+ st1d\t%4.d, %5, [%1.d] -+ st1d\t%4.d, %5, [%1.d, #%0] -+ st1d\t%4.d, %5, [%0, %1.d] -+ st1d\t%4.d, %5, [%0, %1.d, lsl %p3]" -+) -+ -+;; Likewise, but with the offset being sign-extended from 32 bits. -+(define_insn_and_rewrite "*mask_scatter_store_sxtw" -+ [(set (mem:BLK (scratch)) -+ (unspec:BLK -+ [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") -+ (match_operand:DI 0 "register_operand" "rk, rk") -+ (unspec:VNx2DI -+ [(match_operand 6) -+ (sign_extend:VNx2DI -+ (truncate:VNx2SI -+ (match_operand:VNx2DI 1 "register_operand" "w, w")))] -+ UNSPEC_PRED_X) -+ (match_operand:DI 2 "const_int_operand") -+ (match_operand:DI 3 "aarch64_gather_scale_operand_d" "Ui1, i") -+ (match_operand:SVE_FULL_D 4 "register_operand" "w, w")] -+ UNSPEC_ST1_SCATTER))] -+ "TARGET_SVE" -+ "@ -+ st1d\t%4.d, %5, [%0, %1.d, sxtw] -+ st1d\t%4.d, %5, [%0, %1.d, sxtw %p3]" -+ "&& !rtx_equal_p (operands[5], operands[6])" -+ { -+ operands[6] = copy_rtx (operands[5]); -+ } -+) -+ -+;; Likewise, but with the offset being zero-extended from 32 bits. -+(define_insn "*mask_scatter_store_uxtw" -+ [(set (mem:BLK (scratch)) -+ (unspec:BLK -+ [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") -+ (match_operand:DI 0 "aarch64_reg_or_zero" "rk, rk") -+ (and:VNx2DI -+ (match_operand:VNx2DI 1 "register_operand" "w, w") -+ (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate")) -+ (match_operand:DI 2 "const_int_operand") -+ (match_operand:DI 3 "aarch64_gather_scale_operand_d" "Ui1, i") -+ (match_operand:SVE_FULL_D 4 "register_operand" "w, w")] -+ UNSPEC_ST1_SCATTER))] -+ "TARGET_SVE" -+ "@ -+ st1d\t%4.d, %5, [%0, %1.d, uxtw] -+ st1d\t%4.d, %5, [%0, %1.d, uxtw %p3]" -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- Truncating scatter stores -+;; ------------------------------------------------------------------------- -+;; Includes scatter forms of: -+;; - ST1B -+;; - ST1H -+;; - ST1W -+;; ------------------------------------------------------------------------- -+ -+;; Predicated truncating scatter stores for 32-bit elements. Operand 2 is -+;; true for unsigned extension and false for signed extension. -+(define_insn "@aarch64_scatter_store_trunc" -+ [(set (mem:BLK (scratch)) -+ (unspec:BLK -+ [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") -+ (match_operand:DI 0 "aarch64_sve_gather_offset_" "Z, vg, rk, rk, rk, rk") -+ (match_operand:VNx4SI 1 "register_operand" "w, w, w, w, w, w") -+ (match_operand:DI 2 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1") -+ (match_operand:DI 3 "aarch64_gather_scale_operand_" "Ui1, Ui1, Ui1, Ui1, i, i") -+ (truncate:VNx4_NARROW -+ (match_operand:VNx4_WIDE 4 "register_operand" "w, w, w, w, w, w"))] -+ UNSPEC_ST1_SCATTER))] -+ "TARGET_SVE" -+ "@ -+ st1\t%4.s, %5, [%1.s] -+ st1\t%4.s, %5, [%1.s, #%0] -+ st1\t%4.s, %5, [%0, %1.s, sxtw] -+ st1\t%4.s, %5, [%0, %1.s, uxtw] -+ st1\t%4.s, %5, [%0, %1.s, sxtw %p3] -+ st1\t%4.s, %5, [%0, %1.s, uxtw %p3]" -+) -+ -+;; Predicated truncating scatter stores for 64-bit elements. The value of -+;; operand 2 doesn't matter in this case. -+(define_insn "@aarch64_scatter_store_trunc" -+ [(set (mem:BLK (scratch)) -+ (unspec:BLK -+ [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl") -+ (match_operand:DI 0 "aarch64_sve_gather_offset_" "Z, vg, rk, rk") -+ (match_operand:VNx2DI 1 "register_operand" "w, w, w, w") -+ (match_operand:DI 2 "const_int_operand") -+ (match_operand:DI 3 "aarch64_gather_scale_operand_" "Ui1, Ui1, Ui1, i") -+ (truncate:VNx2_NARROW -+ (match_operand:VNx2_WIDE 4 "register_operand" "w, w, w, w"))] -+ UNSPEC_ST1_SCATTER))] -+ "TARGET_SVE" -+ "@ -+ st1\t%4.d, %5, [%1.d] -+ st1\t%4.d, %5, [%1.d, #%0] -+ st1\t%4.d, %5, [%0, %1.d] -+ st1\t%4.d, %5, [%0, %1.d, lsl %p3]" -+) -+ -+;; Likewise, but with the offset being sign-extended from 32 bits. -+(define_insn_and_rewrite "*aarch64_scatter_store_trunc_sxtw" -+ [(set (mem:BLK (scratch)) -+ (unspec:BLK -+ [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") -+ (match_operand:DI 0 "register_operand" "rk, rk") -+ (unspec:VNx2DI -+ [(match_operand 6) -+ (sign_extend:VNx2DI -+ (truncate:VNx2SI -+ (match_operand:VNx2DI 1 "register_operand" "w, w")))] -+ UNSPEC_PRED_X) -+ (match_operand:DI 2 "const_int_operand") -+ (match_operand:DI 3 "aarch64_gather_scale_operand_" "Ui1, i") -+ (truncate:VNx2_NARROW -+ (match_operand:VNx2_WIDE 4 "register_operand" "w, w"))] -+ UNSPEC_ST1_SCATTER))] -+ "TARGET_SVE" -+ "@ -+ st1\t%4.d, %5, [%0, %1.d, sxtw] -+ st1\t%4.d, %5, [%0, %1.d, sxtw %p3]" -+ "&& !rtx_equal_p (operands[5], operands[6])" -+ { -+ operands[6] = copy_rtx (operands[5]); -+ } -+) -+ -+;; Likewise, but with the offset being zero-extended from 32 bits. -+(define_insn "*aarch64_scatter_store_trunc_uxtw" -+ [(set (mem:BLK (scratch)) -+ (unspec:BLK -+ [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") -+ (match_operand:DI 0 "aarch64_reg_or_zero" "rk, rk") -+ (and:VNx2DI -+ (match_operand:VNx2DI 1 "register_operand" "w, w") -+ (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate")) -+ (match_operand:DI 2 "const_int_operand") -+ (match_operand:DI 3 "aarch64_gather_scale_operand_" "Ui1, i") -+ (truncate:VNx2_NARROW -+ (match_operand:VNx2_WIDE 4 "register_operand" "w, w"))] -+ UNSPEC_ST1_SCATTER))] -+ "TARGET_SVE" -+ "@ -+ st1\t%4.d, %5, [%0, %1.d, uxtw] -+ st1\t%4.d, %5, [%0, %1.d, uxtw %p3]" -+) -+ -+;; ========================================================================= -+;; == Vector creation -+;; ========================================================================= -+ -+;; ------------------------------------------------------------------------- -+;; ---- [INT,FP] Duplicate element -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - DUP -+;; - MOV -+;; - LD1RB -+;; - LD1RD -+;; - LD1RH -+;; - LD1RW -+;; - LD1ROB (F64MM) -+;; - LD1ROD (F64MM) -+;; - LD1ROH (F64MM) -+;; - LD1ROW (F64MM) -+;; - LD1RQB -+;; - LD1RQD -+;; - LD1RQH -+;; - LD1RQW -+;; ------------------------------------------------------------------------- -+ -+(define_expand "vec_duplicate" -+ [(parallel -+ [(set (match_operand:SVE_FULL 0 "register_operand") -+ (vec_duplicate:SVE_FULL -+ (match_operand: 1 "aarch64_sve_dup_operand"))) -+ (clobber (scratch:VNx16BI))])] -+ "TARGET_SVE" -+ { -+ if (MEM_P (operands[1])) -+ { -+ rtx ptrue = aarch64_ptrue_reg (mode); -+ emit_insn (gen_sve_ld1r (operands[0], ptrue, operands[1], -+ CONST0_RTX (mode))); -+ DONE; -+ } -+ } -+) -+ -+;; Accept memory operands for the benefit of combine, and also in case -+;; the scalar input gets spilled to memory during RA. We want to split -+;; the load at the first opportunity in order to allow the PTRUE to be -+;; optimized with surrounding code. -+(define_insn_and_split "*vec_duplicate_reg" -+ [(set (match_operand:SVE_FULL 0 "register_operand" "=w, w, w") -+ (vec_duplicate:SVE_FULL -+ (match_operand: 1 "aarch64_sve_dup_operand" "r, w, Uty"))) -+ (clobber (match_scratch:VNx16BI 2 "=X, X, Upl"))] -+ "TARGET_SVE" -+ "@ -+ mov\t%0., %1 -+ mov\t%0., %1 -+ #" -+ "&& MEM_P (operands[1])" -+ [(const_int 0)] -+ { -+ if (GET_CODE (operands[2]) == SCRATCH) -+ operands[2] = gen_reg_rtx (VNx16BImode); -+ emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode)); -+ rtx gp = gen_lowpart (mode, operands[2]); -+ emit_insn (gen_sve_ld1r (operands[0], gp, operands[1], -+ CONST0_RTX (mode))); -+ DONE; -+ } -+ [(set_attr "length" "4,4,8")] -+) -+ -+;; Duplicate an Advanced SIMD vector to fill an SVE vector (LE version). -+(define_insn "@aarch64_vec_duplicate_vq_le" -+ [(set (match_operand:SVE_FULL 0 "register_operand" "=w") -+ (vec_duplicate:SVE_FULL -+ (match_operand: 1 "register_operand" "w")))] -+ "TARGET_SVE && !BYTES_BIG_ENDIAN" -+ { -+ operands[1] = gen_rtx_REG (mode, REGNO (operands[1])); -+ return "dup\t%0.q, %1.q[0]"; -+ } -+) -+ -+;; Duplicate an Advanced SIMD vector to fill an SVE vector (BE version). -+;; The SVE register layout puts memory lane N into (architectural) -+;; register lane N, whereas the Advanced SIMD layout puts the memory -+;; lsb into the register lsb. We therefore have to describe this in rtl -+;; terms as a reverse of the V128 vector followed by a duplicate. -+(define_insn "@aarch64_vec_duplicate_vq_be" -+ [(set (match_operand:SVE_FULL 0 "register_operand" "=w") -+ (vec_duplicate:SVE_FULL -+ (vec_select: -+ (match_operand: 1 "register_operand" "w") -+ (match_operand 2 "descending_int_parallel"))))] -+ "TARGET_SVE -+ && BYTES_BIG_ENDIAN -+ && known_eq (INTVAL (XVECEXP (operands[2], 0, 0)), -+ GET_MODE_NUNITS (mode) - 1)" -+ { -+ operands[1] = gen_rtx_REG (mode, REGNO (operands[1])); -+ return "dup\t%0.q, %1.q[0]"; -+ } -+) -+ -+;; This is used for vec_duplicates from memory, but can also -+;; be used by combine to optimize selects of a a vec_duplicate -+;; with zero. -+(define_insn "sve_ld1r" -+ [(set (match_operand:SVE_FULL 0 "register_operand" "=w") -+ (unspec:SVE_FULL -+ [(match_operand: 1 "register_operand" "Upl") -+ (vec_duplicate:SVE_FULL -+ (match_operand: 2 "aarch64_sve_ld1r_operand" "Uty")) -+ (match_operand:SVE_FULL 3 "aarch64_simd_imm_zero")] -+ UNSPEC_SEL))] -+ "TARGET_SVE" -+ "ld1r\t%0., %1/z, %2" -+) -+ -+;; Load 128 bits from memory under predicate control and duplicate to -+;; fill a vector. -+(define_insn "@aarch64_sve_ld1rq" -+ [(set (match_operand:SVE_FULL 0 "register_operand" "=w") -+ (unspec:SVE_FULL -+ [(match_operand: 2 "register_operand" "Upl") -+ (match_operand: 1 "aarch64_sve_ld1rq_operand" "UtQ")] -+ UNSPEC_LD1RQ))] -+ "TARGET_SVE" -+ { -+ operands[1] = gen_rtx_MEM (mode, XEXP (operands[1], 0)); -+ return "ld1rq\t%0., %2/z, %1"; -+ } -+) -+ -+(define_insn "@aarch64_sve_ld1ro" -+ [(set (match_operand:SVE_FULL 0 "register_operand" "=w") -+ (unspec:SVE_FULL -+ [(match_operand: 2 "register_operand" "Upl") -+ (match_operand:OI 1 "aarch64_sve_ld1ro_operand_" -+ "UO")] -+ UNSPEC_LD1RO))] -+ "TARGET_SVE_F64MM" -+ { -+ operands[1] = gen_rtx_MEM (mode, XEXP (operands[1], 0)); -+ return "ld1ro\t%0., %2/z, %1"; -+ } -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- [INT,FP] Initialize from individual elements -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - INSR -+;; ------------------------------------------------------------------------- -+ -+(define_expand "vec_init" -+ [(match_operand:SVE_FULL 0 "register_operand") -+ (match_operand 1 "")] -+ "TARGET_SVE" -+ { -+ aarch64_sve_expand_vector_init (operands[0], operands[1]); -+ DONE; -+ } -+) -+ -+;; Shift an SVE vector left and insert a scalar into element 0. -+(define_insn "vec_shl_insert_" -+ [(set (match_operand:SVE_FULL 0 "register_operand" "=?w, w, ??&w, ?&w") -+ (unspec:SVE_FULL -+ [(match_operand:SVE_FULL 1 "register_operand" "0, 0, w, w") -+ (match_operand: 2 "aarch64_reg_or_zero" "rZ, w, rZ, w")] -+ UNSPEC_INSR))] -+ "TARGET_SVE" -+ "@ -+ insr\t%0., %2 -+ insr\t%0., %2 -+ movprfx\t%0, %1\;insr\t%0., %2 -+ movprfx\t%0, %1\;insr\t%0., %2" -+ [(set_attr "movprfx" "*,*,yes,yes")] -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- [INT] Linear series -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - INDEX -+;; ------------------------------------------------------------------------- -+ -+(define_insn "vec_series" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, w") -+ (vec_series:SVE_FULL_I -+ (match_operand: 1 "aarch64_sve_index_operand" "Usi, r, r") -+ (match_operand: 2 "aarch64_sve_index_operand" "r, Usi, r")))] -+ "TARGET_SVE" -+ "@ -+ index\t%0., #%1, %2 -+ index\t%0., %1, #%2 -+ index\t%0., %1, %2" -+) -+ -+;; Optimize {x, x, x, x, ...} + {0, n, 2*n, 3*n, ...} if n is in range -+;; of an INDEX instruction. -+(define_insn "*vec_series_plus" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w") -+ (plus:SVE_FULL_I -+ (vec_duplicate:SVE_FULL_I -+ (match_operand: 1 "register_operand" "r")) -+ (match_operand:SVE_FULL_I 2 "immediate_operand")))] -+ "TARGET_SVE && aarch64_check_zero_based_sve_index_immediate (operands[2])" -+ { -+ operands[2] = aarch64_check_zero_based_sve_index_immediate (operands[2]); -+ return "index\t%0., %1, #%2"; -+ } -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- [PRED] Duplicate element -+;; ------------------------------------------------------------------------- -+;; The patterns in this section are synthetic. -+;; ------------------------------------------------------------------------- -+ -+;; Implement a predicate broadcast by shifting the low bit of the scalar -+;; input into the top bit and using a WHILELO. An alternative would be to -+;; duplicate the input and do a compare with zero. -+(define_expand "vec_duplicate" -+ [(set (match_operand:PRED_ALL 0 "register_operand") -+ (vec_duplicate:PRED_ALL (match_operand:QI 1 "register_operand")))] -+ "TARGET_SVE" -+ { -+ rtx tmp = gen_reg_rtx (DImode); -+ rtx op1 = gen_lowpart (DImode, operands[1]); -+ emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode))); -+ emit_insn (gen_while_ultdi (operands[0], const0_rtx, tmp)); -+ DONE; -+ } -+) -+ -+;; ========================================================================= -+;; == Vector decomposition -+;; ========================================================================= -+ -+;; ------------------------------------------------------------------------- -+;; ---- [INT,FP] Extract index -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - DUP (Advanced SIMD) -+;; - DUP (SVE) -+;; - EXT (SVE) -+;; - ST1 (Advanced SIMD) -+;; - UMOV (Advanced SIMD) -+;; ------------------------------------------------------------------------- -+ -+(define_expand "vec_extract" -+ [(set (match_operand: 0 "register_operand") -+ (vec_select: -+ (match_operand:SVE_FULL 1 "register_operand") -+ (parallel [(match_operand:SI 2 "nonmemory_operand")])))] -+ "TARGET_SVE" -+ { -+ poly_int64 val; -+ if (poly_int_rtx_p (operands[2], &val) -+ && known_eq (val, GET_MODE_NUNITS (mode) - 1)) -+ { -+ /* The last element can be extracted with a LASTB and a false -+ predicate. */ -+ rtx sel = aarch64_pfalse_reg (mode); -+ emit_insn (gen_extract_last_ (operands[0], sel, operands[1])); -+ DONE; -+ } -+ if (!CONST_INT_P (operands[2])) -+ { -+ /* Create an index with operand[2] as the base and -1 as the step. -+ It will then be zero for the element we care about. */ -+ rtx index = gen_lowpart (mode, operands[2]); -+ index = force_reg (mode, index); -+ rtx series = gen_reg_rtx (mode); -+ emit_insn (gen_vec_series (series, index, constm1_rtx)); -+ -+ /* Get a predicate that is true for only that element. */ -+ rtx zero = CONST0_RTX (mode); -+ rtx cmp = gen_rtx_EQ (mode, series, zero); -+ rtx sel = gen_reg_rtx (mode); -+ emit_insn (gen_vec_cmp (sel, cmp, series, zero)); -+ -+ /* Select the element using LASTB. */ -+ emit_insn (gen_extract_last_ (operands[0], sel, operands[1])); -+ DONE; -+ } -+ } -+) -+ -+;; Extract element zero. This is a special case because we want to force -+;; the registers to be the same for the second alternative, and then -+;; split the instruction into nothing after RA. -+(define_insn_and_split "*vec_extract_0" -+ [(set (match_operand: 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv") -+ (vec_select: -+ (match_operand:SVE_FULL 1 "register_operand" "w, 0, w") -+ (parallel [(const_int 0)])))] -+ "TARGET_SVE" -+ { -+ operands[1] = gen_rtx_REG (mode, REGNO (operands[1])); -+ switch (which_alternative) -+ { -+ case 0: -+ return "umov\\t%0, %1.[0]"; -+ case 1: -+ return "#"; -+ case 2: -+ return "st1\\t{%1.}[0], %0"; -+ default: -+ gcc_unreachable (); -+ } -+ } -+ "&& reload_completed -+ && REG_P (operands[0]) -+ && REGNO (operands[0]) == REGNO (operands[1])" -+ [(const_int 0)] -+ { -+ emit_note (NOTE_INSN_DELETED); -+ DONE; -+ } -+ [(set_attr "type" "neon_to_gp_q, untyped, neon_store1_one_lane_q")] -+) -+ -+;; Extract an element from the Advanced SIMD portion of the register. -+;; We don't just reuse the aarch64-simd.md pattern because we don't -+;; want any change in lane number on big-endian targets. -+(define_insn "*vec_extract_v128" -+ [(set (match_operand: 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv") -+ (vec_select: -+ (match_operand:SVE_FULL 1 "register_operand" "w, w, w") -+ (parallel [(match_operand:SI 2 "const_int_operand")])))] -+ "TARGET_SVE -+ && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (mode), 1, 15)" -+ { -+ operands[1] = gen_rtx_REG (mode, REGNO (operands[1])); -+ switch (which_alternative) -+ { -+ case 0: -+ return "umov\\t%0, %1.[%2]"; -+ case 1: -+ return "dup\\t%0, %1.[%2]"; -+ case 2: -+ return "st1\\t{%1.}[%2], %0"; -+ default: -+ gcc_unreachable (); -+ } -+ } -+ [(set_attr "type" "neon_to_gp_q, neon_dup_q, neon_store1_one_lane_q")] -+) -+ -+;; Extract an element in the range of DUP. This pattern allows the -+;; source and destination to be different. -+(define_insn "*vec_extract_dup" -+ [(set (match_operand: 0 "register_operand" "=w") -+ (vec_select: -+ (match_operand:SVE_FULL 1 "register_operand" "w") -+ (parallel [(match_operand:SI 2 "const_int_operand")])))] -+ "TARGET_SVE -+ && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (mode), 16, 63)" -+ { -+ operands[0] = gen_rtx_REG (mode, REGNO (operands[0])); -+ return "dup\t%0., %1.[%2]"; -+ } -+) -+ -+;; Extract an element outside the range of DUP. This pattern requires the -+;; source and destination to be the same. -+(define_insn "*vec_extract_ext" -+ [(set (match_operand: 0 "register_operand" "=w, ?&w") -+ (vec_select: -+ (match_operand:SVE_FULL 1 "register_operand" "0, w") -+ (parallel [(match_operand:SI 2 "const_int_operand")])))] -+ "TARGET_SVE && INTVAL (operands[2]) * GET_MODE_SIZE (mode) >= 64" -+ { -+ operands[0] = gen_rtx_REG (mode, REGNO (operands[0])); -+ operands[2] = GEN_INT (INTVAL (operands[2]) * GET_MODE_SIZE (mode)); -+ return (which_alternative == 0 -+ ? "ext\t%0.b, %0.b, %0.b, #%2" -+ : "movprfx\t%0, %1\;ext\t%0.b, %0.b, %1.b, #%2"); -+ } -+ [(set_attr "movprfx" "*,yes")] -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- [INT,FP] Extract active element -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - LASTA -+;; - LASTB -+;; ------------------------------------------------------------------------- -+ -+;; Extract the last active element of operand 1 into operand 0. -+;; If no elements are active, extract the last inactive element instead. -+(define_insn "@extract__" -+ [(set (match_operand: 0 "register_operand" "=?r, w") -+ (unspec: -+ [(match_operand: 1 "register_operand" "Upl, Upl") -+ (match_operand:SVE_FULL 2 "register_operand" "w, w")] -+ LAST))] -+ "TARGET_SVE" -+ "@ -+ last\t%0, %1, %2. -+ last\t%0, %1, %2." -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- [PRED] Extract index -+;; ------------------------------------------------------------------------- -+;; The patterns in this section are synthetic. -+;; ------------------------------------------------------------------------- -+ -+;; Handle extractions from a predicate by converting to an integer vector -+;; and extracting from there. -+(define_expand "vec_extract" -+ [(match_operand: 0 "register_operand") -+ (match_operand: 1 "register_operand") -+ (match_operand:SI 2 "nonmemory_operand") -+ ;; Dummy operand to which we can attach the iterator. -+ (reg:SVE_FULL_I V0_REGNUM)] -+ "TARGET_SVE" -+ { -+ rtx tmp = gen_reg_rtx (mode); -+ emit_insn (gen_vcond_mask_ (tmp, operands[1], -+ CONST1_RTX (mode), -+ CONST0_RTX (mode))); -+ emit_insn (gen_vec_extract (operands[0], tmp, operands[2])); -+ DONE; -+ } -+) -+ -+;; ========================================================================= -+;; == Unary arithmetic -+;; ========================================================================= -+ -+;; ------------------------------------------------------------------------- -+;; ---- [INT] General unary arithmetic corresponding to rtx codes -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - ABS -+;; - CLS (= clrsb) -+;; - CLZ -+;; - CNT (= popcount) -+;; - NEG -+;; - NOT -+;; ------------------------------------------------------------------------- -+ -+;; Unpredicated integer unary arithmetic. -+(define_expand "2" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand") -+ (unspec:SVE_FULL_I -+ [(match_dup 2) -+ (SVE_INT_UNARY:SVE_FULL_I -+ (match_operand:SVE_FULL_I 1 "register_operand"))] -+ UNSPEC_PRED_X))] -+ "TARGET_SVE" -+ { -+ operands[2] = aarch64_ptrue_reg (mode); -+ } -+) -+ -+;; Integer unary arithmetic predicated with a PTRUE. -+(define_insn "@aarch64_pred_" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w") -+ (unspec:SVE_FULL_I -+ [(match_operand: 1 "register_operand" "Upl") -+ (SVE_INT_UNARY:SVE_FULL_I -+ (match_operand:SVE_FULL_I 2 "register_operand" "w"))] -+ UNSPEC_PRED_X))] -+ "TARGET_SVE" -+ "\t%0., %1/m, %2." -+) -+ -+;; Predicated integer unary arithmetic with merging. -+(define_expand "@cond_" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand") -+ (unspec:SVE_FULL_I -+ [(match_operand: 1 "register_operand") -+ (SVE_INT_UNARY:SVE_FULL_I -+ (match_operand:SVE_FULL_I 2 "register_operand")) -+ (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero")] -+ UNSPEC_SEL))] -+ "TARGET_SVE" -+) -+ -+;; Predicated integer unary arithmetic, merging with the first input. -+(define_insn "*cond__2" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") -+ (unspec:SVE_FULL_I -+ [(match_operand: 1 "register_operand" "Upl, Upl") -+ (SVE_INT_UNARY:SVE_FULL_I -+ (match_operand:SVE_FULL_I 2 "register_operand" "0, w")) -+ (match_dup 2)] -+ UNSPEC_SEL))] -+ "TARGET_SVE" -+ "@ -+ \t%0., %1/m, %0. -+ movprfx\t%0, %2\;\t%0., %1/m, %2." -+ [(set_attr "movprfx" "*,yes")] -+) -+ -+;; Predicated integer unary arithmetic, merging with an independent value. -+;; -+;; The earlyclobber isn't needed for the first alternative, but omitting -+;; it would only help the case in which operands 2 and 3 are the same, -+;; which is handled above rather than here. Marking all the alternatives -+;; as earlyclobber helps to make the instruction more regular to the -+;; register allocator. -+(define_insn "*cond__any" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, ?&w, ?&w") -+ (unspec:SVE_FULL_I -+ [(match_operand: 1 "register_operand" "Upl, Upl, Upl") -+ (SVE_INT_UNARY:SVE_FULL_I -+ (match_operand:SVE_FULL_I 2 "register_operand" "w, w, w")) -+ (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] -+ UNSPEC_SEL))] -+ "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])" -+ "@ -+ \t%0., %1/m, %2. -+ movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %2. -+ movprfx\t%0, %3\;\t%0., %1/m, %2." -+ [(set_attr "movprfx" "*,yes,yes")] -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- [INT] General unary arithmetic corresponding to unspecs -+;; ------------------------------------------------------------------------- -+;; Includes -+;; - RBIT -+;; - REVB -+;; - REVH -+;; - REVW -+;; ------------------------------------------------------------------------- -+ -+;; Predicated integer unary operations. -+(define_insn "@aarch64_pred_" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w") -+ (unspec:SVE_FULL_I -+ [(match_operand: 1 "register_operand" "Upl") -+ (unspec:SVE_FULL_I -+ [(match_operand:SVE_FULL_I 2 "register_operand" "w")] -+ SVE_INT_UNARY)] -+ UNSPEC_PRED_X))] -+ "TARGET_SVE && >= " -+ "\t%0., %1/m, %2." -+) -+ -+;; Predicated integer unary operations with merging. -+(define_insn "@cond_" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w, ?&w") -+ (unspec:SVE_FULL_I -+ [(match_operand: 1 "register_operand" "Upl, Upl, Upl") -+ (unspec:SVE_FULL_I -+ [(match_operand:SVE_FULL_I 2 "register_operand" "w, w, w")] -+ SVE_INT_UNARY) -+ (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] -+ UNSPEC_SEL))] -+ "TARGET_SVE && >= " -+ "@ -+ \t%0., %1/m, %2. -+ movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %2. -+ movprfx\t%0, %3\;\t%0., %1/m, %2." -+ [(set_attr "movprfx" "*,yes,yes")] -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- [INT] Sign extension -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - SXTB -+;; - SXTH -+;; - SXTW -+;; ------------------------------------------------------------------------- -+ -+;; Predicated SXT[BHW]. -+(define_insn "@aarch64_pred_sxt" -+ [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w") -+ (unspec:SVE_FULL_HSDI -+ [(match_operand: 1 "register_operand" "Upl") -+ (sign_extend:SVE_FULL_HSDI -+ (truncate:SVE_PARTIAL -+ (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")))] -+ UNSPEC_PRED_X))] -+ "TARGET_SVE && (~ & ) == 0" -+ "sxt\t%0., %1/m, %2." -+) -+ -+;; Predicated SXT[BHW] with merging. -+(define_insn "@aarch64_cond_sxt" -+ [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w, ?&w") -+ (unspec:SVE_FULL_HSDI -+ [(match_operand: 1 "register_operand" "Upl, Upl, Upl") -+ (sign_extend:SVE_FULL_HSDI -+ (truncate:SVE_PARTIAL -+ (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w"))) -+ (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] -+ UNSPEC_SEL))] -+ "TARGET_SVE && (~ & ) == 0" -+ "@ -+ sxt\t%0., %1/m, %2. -+ movprfx\t%0., %1/z, %2.\;sxt\t%0., %1/m, %2. -+ movprfx\t%0, %3\;sxt\t%0., %1/m, %2." -+ [(set_attr "movprfx" "*,yes,yes")] -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- [INT] Zero extension -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - UXTB -+;; - UXTH -+;; - UXTW -+;; ------------------------------------------------------------------------- -+ -+;; Match UXT[BHW] as a conditional AND of a constant, merging with the -+;; first input. -+(define_insn "*cond_uxt_2" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") -+ (unspec:SVE_FULL_I -+ [(match_operand: 1 "register_operand" "Upl, Upl") -+ (and:SVE_FULL_I -+ (match_operand:SVE_FULL_I 2 "register_operand" "0, w") -+ (match_operand:SVE_FULL_I 3 "aarch64_sve_uxt_immediate")) -+ (match_dup 2)] -+ UNSPEC_SEL))] -+ "TARGET_SVE" -+ "@ -+ uxt%e3\t%0., %1/m, %0. -+ movprfx\t%0, %2\;uxt%e3\t%0., %1/m, %2." -+ [(set_attr "movprfx" "*,yes")] -+) -+ -+;; Match UXT[BHW] as a conditional AND of a constant, merging with an -+;; independent value. -+;; -+;; The earlyclobber isn't needed for the first alternative, but omitting -+;; it would only help the case in which operands 2 and 4 are the same, -+;; which is handled above rather than here. Marking all the alternatives -+;; as early-clobber helps to make the instruction more regular to the -+;; register allocator. -+(define_insn "*cond_uxt_any" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, ?&w, ?&w") -+ (unspec:SVE_FULL_I -+ [(match_operand: 1 "register_operand" "Upl, Upl, Upl") -+ (and:SVE_FULL_I -+ (match_operand:SVE_FULL_I 2 "register_operand" "w, w, w") -+ (match_operand:SVE_FULL_I 3 "aarch64_sve_uxt_immediate")) -+ (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "0, Dz, w")] -+ UNSPEC_SEL))] -+ "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" -+ "@ -+ uxt%e3\t%0., %1/m, %2. -+ movprfx\t%0., %1/z, %2.\;uxt%e3\t%0., %1/m, %2. -+ movprfx\t%0, %4\;uxt%e3\t%0., %1/m, %2." -+ [(set_attr "movprfx" "*,yes,yes")] -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- [INT] Logical inverse -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - CNOT -+;; ------------------------------------------------------------------------- -+ -+;; Predicated logical inverse. -+(define_expand "@aarch64_pred_cnot" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand") -+ (unspec:SVE_FULL_I -+ [(unspec: -+ [(match_operand: 1 "register_operand") -+ (match_operand:SI 2 "aarch64_sve_ptrue_flag") -+ (eq: -+ (match_operand:SVE_FULL_I 3 "register_operand") -+ (match_dup 4))] -+ UNSPEC_PRED_Z) -+ (match_dup 5) -+ (match_dup 4)] -+ UNSPEC_SEL))] -+ "TARGET_SVE" -+ { -+ operands[4] = CONST0_RTX (mode); -+ operands[5] = CONST1_RTX (mode); -+ } -+) -+ -+(define_insn "*cnot" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w") -+ (unspec:SVE_FULL_I -+ [(unspec: -+ [(match_operand: 1 "register_operand" "Upl") -+ (match_operand:SI 5 "aarch64_sve_ptrue_flag") -+ (eq: -+ (match_operand:SVE_FULL_I 2 "register_operand" "w") -+ (match_operand:SVE_FULL_I 3 "aarch64_simd_imm_zero"))] -+ UNSPEC_PRED_Z) -+ (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_one") -+ (match_dup 3)] -+ UNSPEC_SEL))] -+ "TARGET_SVE" -+ "cnot\t%0., %1/m, %2." -+) -+ -+;; Predicated logical inverse with merging. -+(define_expand "@cond_cnot" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand") -+ (unspec:SVE_FULL_I -+ [(match_operand: 1 "register_operand") -+ (unspec:SVE_FULL_I -+ [(unspec: -+ [(match_dup 4) -+ (const_int SVE_KNOWN_PTRUE) -+ (eq: -+ (match_operand:SVE_FULL_I 2 "register_operand") -+ (match_dup 5))] -+ UNSPEC_PRED_Z) -+ (match_dup 6) -+ (match_dup 5)] -+ UNSPEC_SEL) -+ (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero")] -+ UNSPEC_SEL))] -+ "TARGET_SVE" -+ { -+ operands[4] = CONSTM1_RTX (mode); -+ operands[5] = CONST0_RTX (mode); -+ operands[6] = CONST1_RTX (mode); -+ } -+) -+ -+;; Predicated logical inverse, merging with the first input. -+(define_insn_and_rewrite "*cond_cnot_2" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") -+ (unspec:SVE_FULL_I -+ [(match_operand: 1 "register_operand" "Upl, Upl") -+ ;; Logical inverse of operand 2 (as above). -+ (unspec:SVE_FULL_I -+ [(unspec: -+ [(match_operand 5) -+ (const_int SVE_KNOWN_PTRUE) -+ (eq: -+ (match_operand:SVE_FULL_I 2 "register_operand" "0, w") -+ (match_operand:SVE_FULL_I 3 "aarch64_simd_imm_zero"))] -+ UNSPEC_PRED_Z) -+ (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_one") -+ (match_dup 3)] -+ UNSPEC_SEL) -+ (match_dup 2)] -+ UNSPEC_SEL))] -+ "TARGET_SVE" -+ "@ -+ cnot\t%0., %1/m, %0. -+ movprfx\t%0, %2\;cnot\t%0., %1/m, %2." -+ "&& !CONSTANT_P (operands[5])" -+ { -+ operands[5] = CONSTM1_RTX (mode); -+ } -+ [(set_attr "movprfx" "*,yes")] -+) -+ -+;; Predicated logical inverse, merging with an independent value. -+;; -+;; The earlyclobber isn't needed for the first alternative, but omitting -+;; it would only help the case in which operands 2 and 6 are the same, -+;; which is handled above rather than here. Marking all the alternatives -+;; as earlyclobber helps to make the instruction more regular to the -+;; register allocator. -+(define_insn_and_rewrite "*cond_cnot_any" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, ?&w, ?&w") -+ (unspec:SVE_FULL_I -+ [(match_operand: 1 "register_operand" "Upl, Upl, Upl") -+ ;; Logical inverse of operand 2 (as above). -+ (unspec:SVE_FULL_I -+ [(unspec: -+ [(match_operand 5) -+ (const_int SVE_KNOWN_PTRUE) -+ (eq: -+ (match_operand:SVE_FULL_I 2 "register_operand" "w, w, w") -+ (match_operand:SVE_FULL_I 3 "aarch64_simd_imm_zero"))] -+ UNSPEC_PRED_Z) -+ (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_one") -+ (match_dup 3)] -+ UNSPEC_SEL) -+ (match_operand:SVE_FULL_I 6 "aarch64_simd_reg_or_zero" "0, Dz, w")] -+ UNSPEC_SEL))] -+ "TARGET_SVE && !rtx_equal_p (operands[2], operands[6])" -+ "@ -+ cnot\t%0., %1/m, %2. -+ movprfx\t%0., %1/z, %2.\;cnot\t%0., %1/m, %2. -+ movprfx\t%0, %6\;cnot\t%0., %1/m, %2." -+ "&& !CONSTANT_P (operands[5])" -+ { -+ operands[5] = CONSTM1_RTX (mode); -+ } -+ [(set_attr "movprfx" "*,yes,yes")] -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- [FP<-INT] General unary arithmetic that maps to unspecs -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - FEXPA -+;; ------------------------------------------------------------------------- -+ -+;; Unpredicated unary operations that take an integer and return a float. -+(define_insn "@aarch64_sve_" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w") -+ (unspec:SVE_FULL_F -+ [(match_operand: 1 "register_operand" "w")] -+ SVE_FP_UNARY_INT))] -+ "TARGET_SVE" -+ "\t%0., %1." -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- [FP] General unary arithmetic corresponding to unspecs -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - FABS -+;; - FNEG -+;; - FRECPE -+;; - FRECPX -+;; - FRINTA -+;; - FRINTI -+;; - FRINTM -+;; - FRINTN -+;; - FRINTP -+;; - FRINTX -+;; - FRINTZ -+;; - FRSQRT -+;; - FSQRT -+;; ------------------------------------------------------------------------- -+ -+;; Unpredicated floating-point unary operations. -+(define_insn "@aarch64_sve_" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w") -+ (unspec:SVE_FULL_F -+ [(match_operand:SVE_FULL_F 1 "register_operand" "w")] -+ SVE_FP_UNARY))] -+ "TARGET_SVE" -+ "\t%0., %1." -+) -+ -+;; Unpredicated floating-point unary operations. -+(define_expand "2" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand") -+ (unspec:SVE_FULL_F -+ [(match_dup 2) -+ (const_int SVE_RELAXED_GP) -+ (match_operand:SVE_FULL_F 1 "register_operand")] -+ SVE_COND_FP_UNARY))] -+ "TARGET_SVE" -+ { -+ operands[2] = aarch64_ptrue_reg (mode); -+ } -+) -+ -+;; Predicated floating-point unary operations. -+(define_insn "@aarch64_pred_" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w") -+ (unspec:SVE_FULL_F -+ [(match_operand: 1 "register_operand" "Upl") -+ (match_operand:SI 3 "aarch64_sve_gp_strictness") -+ (match_operand:SVE_FULL_F 2 "register_operand" "w")] -+ SVE_COND_FP_UNARY))] -+ "TARGET_SVE" -+ "\t%0., %1/m, %2." -+) -+ -+;; Predicated floating-point unary arithmetic with merging. -+(define_expand "@cond_" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand") -+ (unspec:SVE_FULL_F -+ [(match_operand: 1 "register_operand") -+ (unspec:SVE_FULL_F -+ [(match_dup 1) -+ (const_int SVE_STRICT_GP) -+ (match_operand:SVE_FULL_F 2 "register_operand")] -+ SVE_COND_FP_UNARY) -+ (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")] -+ UNSPEC_SEL))] -+ "TARGET_SVE" -+) -+ -+;; Predicated floating-point unary arithmetic, merging with the first input. -+(define_insn_and_rewrite "*cond__2" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") -+ (unspec:SVE_FULL_F -+ [(match_operand: 1 "register_operand" "Upl, Upl") -+ (unspec:SVE_FULL_F -+ [(match_operand 3) -+ (match_operand:SI 4 "aarch64_sve_gp_strictness") -+ (match_operand:SVE_FULL_F 2 "register_operand" "0, w")] -+ SVE_COND_FP_UNARY) -+ (match_dup 2)] -+ UNSPEC_SEL))] -+ "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[3], operands[1])" -+ "@ -+ \t%0., %1/m, %0. -+ movprfx\t%0, %2\;\t%0., %1/m, %2." -+ "&& !rtx_equal_p (operands[1], operands[3])" -+ { -+ operands[3] = copy_rtx (operands[1]); -+ } -+ [(set_attr "movprfx" "*,yes")] -+) -+ -+;; Predicated floating-point unary arithmetic, merging with an independent -+;; value. -+;; -+;; The earlyclobber isn't needed for the first alternative, but omitting -+;; it would only help the case in which operands 2 and 3 are the same, -+;; which is handled above rather than here. Marking all the alternatives -+;; as earlyclobber helps to make the instruction more regular to the -+;; register allocator. -+(define_insn_and_rewrite "*cond__any" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, ?&w, ?&w") -+ (unspec:SVE_FULL_F -+ [(match_operand: 1 "register_operand" "Upl, Upl, Upl") -+ (unspec:SVE_FULL_F -+ [(match_operand 4) -+ (match_operand:SI 5 "aarch64_sve_gp_strictness") -+ (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")] -+ SVE_COND_FP_UNARY) -+ (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] -+ UNSPEC_SEL))] -+ "TARGET_SVE -+ && !rtx_equal_p (operands[2], operands[3]) -+ && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" -+ "@ -+ \t%0., %1/m, %2. -+ movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %2. -+ movprfx\t%0, %3\;\t%0., %1/m, %2." -+ "&& !rtx_equal_p (operands[1], operands[4])" -+ { -+ operands[4] = copy_rtx (operands[1]); -+ } -+ [(set_attr "movprfx" "*,yes,yes")] -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- [PRED] Inverse -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - NOT -+;; ------------------------------------------------------------------------- -+ -+;; Unpredicated predicate inverse. -+(define_expand "one_cmpl2" -+ [(set (match_operand:PRED_ALL 0 "register_operand") -+ (and:PRED_ALL -+ (not:PRED_ALL (match_operand:PRED_ALL 1 "register_operand")) -+ (match_dup 2)))] -+ "TARGET_SVE" -+ { -+ operands[2] = aarch64_ptrue_reg (mode); -+ } -+) -+ -+;; Predicated predicate inverse. -+(define_insn "*one_cmpl3" -+ [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") -+ (and:PRED_ALL -+ (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa")) -+ (match_operand:PRED_ALL 1 "register_operand" "Upa")))] -+ "TARGET_SVE" -+ "not\t%0.b, %1/z, %2.b" -+) -+ -+;; ========================================================================= -+;; == Binary arithmetic -+;; ========================================================================= -+ -+;; ------------------------------------------------------------------------- -+;; ---- [INT] General binary arithmetic corresponding to rtx codes -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - ADD (merging form only) -+;; - AND (merging form only) -+;; - ASR (merging form only) -+;; - EOR (merging form only) -+;; - LSL (merging form only) -+;; - LSR (merging form only) -+;; - MUL -+;; - ORR (merging form only) -+;; - SMAX -+;; - SMIN -+;; - SUB (merging form only) -+;; - UMAX -+;; - UMIN -+;; ------------------------------------------------------------------------- -+ -+;; Unpredicated integer binary operations that have an immediate form. -+(define_expand "3" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand") -+ (unspec:SVE_FULL_I -+ [(match_dup 3) -+ (SVE_INT_BINARY_IMM:SVE_FULL_I -+ (match_operand:SVE_FULL_I 1 "register_operand") -+ (match_operand:SVE_FULL_I 2 "aarch64_sve__operand"))] -+ UNSPEC_PRED_X))] -+ "TARGET_SVE" -+ { -+ operands[3] = aarch64_ptrue_reg (mode); -+ } -+) -+ -+;; Integer binary operations that have an immediate form, predicated -+;; with a PTRUE. We don't actually need the predicate for the first -+;; and third alternatives, but using Upa or X isn't likely to gain much -+;; and would make the instruction seem less uniform to the register -+;; allocator. -+(define_insn_and_split "@aarch64_pred_" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w, ?&w") -+ (unspec:SVE_FULL_I -+ [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl") -+ (SVE_INT_BINARY_IMM:SVE_FULL_I -+ (match_operand:SVE_FULL_I 2 "register_operand" "%0, 0, w, w") -+ (match_operand:SVE_FULL_I 3 "aarch64_sve__operand" ", w, , w"))] -+ UNSPEC_PRED_X))] -+ "TARGET_SVE" -+ "@ -+ # -+ \t%0., %1/m, %0., %3. -+ # -+ movprfx\t%0, %2\;\t%0., %1/m, %0., %3." -+ ; Split the unpredicated form after reload, so that we don't have -+ ; the unnecessary PTRUE. -+ "&& reload_completed -+ && !register_operand (operands[3], mode)" -+ [(set (match_dup 0) -+ (SVE_INT_BINARY_IMM:SVE_FULL_I (match_dup 2) (match_dup 3)))] -+ "" -+ [(set_attr "movprfx" "*,*,yes,yes")] -+) -+ -+;; Unpredicated binary operations with a constant (post-RA only). -+;; These are generated by splitting a predicated instruction whose -+;; predicate is unused. -+(define_insn "*post_ra_3" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") -+ (SVE_INT_BINARY_IMM:SVE_FULL_I -+ (match_operand:SVE_FULL_I 1 "register_operand" "0, w") -+ (match_operand:SVE_FULL_I 2 "aarch64_sve__immediate")))] -+ "TARGET_SVE && reload_completed" -+ "@ -+ \t%0., %0., #%2 -+ movprfx\t%0, %1\;\t%0., %0., #%2" -+ [(set_attr "movprfx" "*,yes")] -+) -+ -+;; Predicated integer operations with merging. -+(define_expand "@cond_" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand") -+ (unspec:SVE_FULL_I -+ [(match_operand: 1 "register_operand") -+ (SVE_INT_BINARY:SVE_FULL_I -+ (match_operand:SVE_FULL_I 2 "register_operand") -+ (match_operand:SVE_FULL_I 3 "")) -+ (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")] -+ UNSPEC_SEL))] -+ "TARGET_SVE" -+) -+ -+;; Predicated integer operations, merging with the first input. -+(define_insn "*cond__2" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") -+ (unspec:SVE_FULL_I -+ [(match_operand: 1 "register_operand" "Upl, Upl") -+ (SVE_INT_BINARY:SVE_FULL_I -+ (match_operand:SVE_FULL_I 2 "register_operand" "0, w") -+ (match_operand:SVE_FULL_I 3 "register_operand" "w, w")) -+ (match_dup 2)] -+ UNSPEC_SEL))] -+ "TARGET_SVE" -+ "@ -+ \t%0., %1/m, %0., %3. -+ movprfx\t%0, %2\;\t%0., %1/m, %0., %3." -+ [(set_attr "movprfx" "*,yes")] -+) -+ -+;; Predicated integer operations, merging with the second input. -+(define_insn "*cond__3" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") -+ (unspec:SVE_FULL_I -+ [(match_operand: 1 "register_operand" "Upl, Upl") -+ (SVE_INT_BINARY:SVE_FULL_I -+ (match_operand:SVE_FULL_I 2 "register_operand" "w, w") -+ (match_operand:SVE_FULL_I 3 "register_operand" "0, w")) -+ (match_dup 3)] -+ UNSPEC_SEL))] -+ "TARGET_SVE" -+ "@ -+ \t%0., %1/m, %0., %2. -+ movprfx\t%0, %3\;\t%0., %1/m, %0., %2." -+ [(set_attr "movprfx" "*,yes")] -+) -+ -+;; Predicated integer operations, merging with an independent value. -+(define_insn_and_rewrite "*cond__any" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, ?&w") -+ (unspec:SVE_FULL_I -+ [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") -+ (SVE_INT_BINARY:SVE_FULL_I -+ (match_operand:SVE_FULL_I 2 "register_operand" "0, w, w, w, w") -+ (match_operand:SVE_FULL_I 3 "register_operand" "w, 0, w, w, w")) -+ (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] -+ UNSPEC_SEL))] -+ "TARGET_SVE -+ && !rtx_equal_p (operands[2], operands[4]) -+ && !rtx_equal_p (operands[3], operands[4])" -+ "@ -+ movprfx\t%0., %1/z, %0.\;\t%0., %1/m, %0., %3. -+ movprfx\t%0., %1/z, %0.\;\t%0., %1/m, %0., %2. -+ movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %0., %3. -+ movprfx\t%0., %1/m, %2.\;\t%0., %1/m, %0., %3. -+ #" -+ "&& reload_completed -+ && register_operand (operands[4], mode) -+ && !rtx_equal_p (operands[0], operands[4])" -+ { -+ emit_insn (gen_vcond_mask_ (operands[0], operands[2], -+ operands[4], operands[1])); -+ operands[4] = operands[2] = operands[0]; -+ } -+ [(set_attr "movprfx" "yes")] -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- [INT] Addition -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - ADD -+;; - DECB -+;; - DECD -+;; - DECH -+;; - DECW -+;; - INCB -+;; - INCD -+;; - INCH -+;; - INCW -+;; - SUB -+;; ------------------------------------------------------------------------- -+ -+(define_insn "add3" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, w, ?w, ?w, w") -+ (plus:SVE_FULL_I -+ (match_operand:SVE_FULL_I 1 "register_operand" "%0, 0, 0, w, w, w") -+ (match_operand:SVE_FULL_I 2 "aarch64_sve_add_operand" "vsa, vsn, vsi, vsa, vsn, w")))] -+ "TARGET_SVE" -+ "@ -+ add\t%0., %0., #%D2 -+ sub\t%0., %0., #%N2 -+ * return aarch64_output_sve_vector_inc_dec (\"%0.\", operands[2]); -+ movprfx\t%0, %1\;add\t%0., %0., #%D2 -+ movprfx\t%0, %1\;sub\t%0., %0., #%N2 -+ add\t%0., %1., %2." -+ [(set_attr "movprfx" "*,*,*,yes,yes,*")] -+) -+ -+;; Merging forms are handled through SVE_INT_BINARY. -+ -+;; ------------------------------------------------------------------------- -+;; ---- [INT] Subtraction -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - SUB -+;; - SUBR -+;; ------------------------------------------------------------------------- -+ -+(define_insn "sub3" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w") -+ (minus:SVE_FULL_I -+ (match_operand:SVE_FULL_I 1 "aarch64_sve_arith_operand" "w, vsa, vsa") -+ (match_operand:SVE_FULL_I 2 "register_operand" "w, 0, w")))] -+ "TARGET_SVE" -+ "@ -+ sub\t%0., %1., %2. -+ subr\t%0., %0., #%D1 -+ movprfx\t%0, %2\;subr\t%0., %0., #%D1" -+ [(set_attr "movprfx" "*,*,yes")] -+) -+ -+;; Merging forms are handled through SVE_INT_BINARY. -+ -+;; ------------------------------------------------------------------------- -+;; ---- [INT] Take address -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - ADR -+;; ------------------------------------------------------------------------- -+ -+;; An unshifted and unscaled ADR. This is functionally equivalent to an ADD, -+;; but the svadrb intrinsics should preserve the user's choice. -+(define_insn "@aarch64_adr" -+ [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w") -+ (unspec:SVE_FULL_SDI -+ [(match_operand:SVE_FULL_SDI 1 "register_operand" "w") -+ (match_operand:SVE_FULL_SDI 2 "register_operand" "w")] -+ UNSPEC_ADR))] -+ "TARGET_SVE" -+ "adr\t%0., [%1., %2.]" -+) -+ -+;; Same, but with the offset being sign-extended from the low 32 bits. -+(define_insn_and_rewrite "*aarch64_adr_sxtw" -+ [(set (match_operand:VNx2DI 0 "register_operand" "=w") -+ (unspec:VNx2DI -+ [(match_operand:VNx2DI 1 "register_operand" "w") -+ (unspec:VNx2DI -+ [(match_operand 3) -+ (sign_extend:VNx2DI -+ (truncate:VNx2SI -+ (match_operand:VNx2DI 2 "register_operand" "w")))] -+ UNSPEC_PRED_X)] -+ UNSPEC_ADR))] -+ "TARGET_SVE" -+ "adr\t%0.d, [%1.d, %2.d, sxtw]" -+ "&& !CONSTANT_P (operands[3])" -+ { -+ operands[3] = CONSTM1_RTX (VNx2BImode); -+ } -+) -+ -+;; Same, but with the offset being zero-extended from the low 32 bits. -+(define_insn "*aarch64_adr_uxtw_unspec" -+ [(set (match_operand:VNx2DI 0 "register_operand" "=w") -+ (unspec:VNx2DI -+ [(match_operand:VNx2DI 1 "register_operand" "w") -+ (and:VNx2DI -+ (match_operand:VNx2DI 2 "register_operand" "w") -+ (match_operand:VNx2DI 3 "aarch64_sve_uxtw_immediate"))] -+ UNSPEC_ADR))] -+ "TARGET_SVE" -+ "adr\t%0.d, [%1.d, %2.d, uxtw]" -+) -+ -+;; Same, matching as a PLUS rather than unspec. -+(define_insn "*aarch64_adr_uxtw_and" -+ [(set (match_operand:VNx2DI 0 "register_operand" "=w") -+ (plus:VNx2DI -+ (and:VNx2DI -+ (match_operand:VNx2DI 2 "register_operand" "w") -+ (match_operand:VNx2DI 3 "aarch64_sve_uxtw_immediate")) -+ (match_operand:VNx2DI 1 "register_operand" "w")))] -+ "TARGET_SVE" -+ "adr\t%0.d, [%1.d, %2.d, uxtw]" -+) -+ -+;; ADR with a nonzero shift. -+(define_expand "@aarch64_adr_shift" -+ [(set (match_operand:SVE_FULL_SDI 0 "register_operand") -+ (plus:SVE_FULL_SDI -+ (unspec:SVE_FULL_SDI -+ [(match_dup 4) -+ (ashift:SVE_FULL_SDI -+ (match_operand:SVE_FULL_SDI 2 "register_operand") -+ (match_operand:SVE_FULL_SDI 3 "const_1_to_3_operand"))] -+ UNSPEC_PRED_X) -+ (match_operand:SVE_FULL_SDI 1 "register_operand")))] -+ "TARGET_SVE" -+ { -+ operands[4] = CONSTM1_RTX (mode); -+ } -+) -+ -+(define_insn_and_rewrite "*aarch64_adr_shift" -+ [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w") -+ (plus:SVE_FULL_SDI -+ (unspec:SVE_FULL_SDI -+ [(match_operand 4) -+ (ashift:SVE_FULL_SDI -+ (match_operand:SVE_FULL_SDI 2 "register_operand" "w") -+ (match_operand:SVE_FULL_SDI 3 "const_1_to_3_operand"))] -+ UNSPEC_PRED_X) -+ (match_operand:SVE_FULL_SDI 1 "register_operand" "w")))] -+ "TARGET_SVE" -+ "adr\t%0., [%1., %2., lsl %3]" -+ "&& !CONSTANT_P (operands[4])" -+ { -+ operands[4] = CONSTM1_RTX (mode); -+ } -+) -+ -+;; Same, but with the index being sign-extended from the low 32 bits. -+(define_insn_and_rewrite "*aarch64_adr_shift_sxtw" -+ [(set (match_operand:VNx2DI 0 "register_operand" "=w") -+ (plus:VNx2DI -+ (unspec:VNx2DI -+ [(match_operand 4) -+ (ashift:VNx2DI -+ (unspec:VNx2DI -+ [(match_operand 5) -+ (sign_extend:VNx2DI -+ (truncate:VNx2SI -+ (match_operand:VNx2DI 2 "register_operand" "w")))] -+ UNSPEC_PRED_X) -+ (match_operand:VNx2DI 3 "const_1_to_3_operand"))] -+ UNSPEC_PRED_X) -+ (match_operand:VNx2DI 1 "register_operand" "w")))] -+ "TARGET_SVE" -+ "adr\t%0.d, [%1.d, %2.d, sxtw %3]" -+ "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))" -+ { -+ operands[5] = operands[4] = CONSTM1_RTX (VNx2BImode); -+ } -+) -+ -+;; Same, but with the index being zero-extended from the low 32 bits. -+(define_insn_and_rewrite "*aarch64_adr_shift_uxtw" -+ [(set (match_operand:VNx2DI 0 "register_operand" "=w") -+ (plus:VNx2DI -+ (unspec:VNx2DI -+ [(match_operand 5) -+ (ashift:VNx2DI -+ (and:VNx2DI -+ (match_operand:VNx2DI 2 "register_operand" "w") -+ (match_operand:VNx2DI 4 "aarch64_sve_uxtw_immediate")) -+ (match_operand:VNx2DI 3 "const_1_to_3_operand"))] -+ UNSPEC_PRED_X) -+ (match_operand:VNx2DI 1 "register_operand" "w")))] -+ "TARGET_SVE" -+ "adr\t%0.d, [%1.d, %2.d, uxtw %3]" -+ "&& !CONSTANT_P (operands[5])" -+ { -+ operands[5] = CONSTM1_RTX (VNx2BImode); -+ } -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- [INT] Absolute difference -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - SABD -+;; - UABD -+;; ------------------------------------------------------------------------- -+ -+;; Unpredicated integer absolute difference. -+(define_expand "abd_3" -+ [(use (match_operand:SVE_FULL_I 0 "register_operand")) -+ (USMAX:SVE_FULL_I -+ (match_operand:SVE_FULL_I 1 "register_operand") -+ (match_operand:SVE_FULL_I 2 "register_operand"))] -+ "TARGET_SVE" -+ { -+ rtx pred = aarch64_ptrue_reg (mode); -+ emit_insn (gen_aarch64_pred_abd (operands[0], pred, operands[1], -+ operands[2])); -+ DONE; -+ } -+) -+ -+;; Predicated integer absolute difference. -+(define_insn "@aarch64_pred_abd" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") -+ (unspec:SVE_FULL_I -+ [(match_operand: 1 "register_operand" "Upl, Upl") -+ (minus:SVE_FULL_I -+ (USMAX:SVE_FULL_I -+ (match_operand:SVE_FULL_I 2 "register_operand" "%0, w") -+ (match_operand:SVE_FULL_I 3 "register_operand" "w, w")) -+ (:SVE_FULL_I -+ (match_dup 2) -+ (match_dup 3)))] -+ UNSPEC_PRED_X))] -+ "TARGET_SVE" -+ "@ -+ abd\t%0., %1/m, %0., %3. -+ movprfx\t%0, %2\;abd\t%0., %1/m, %0., %3." -+ [(set_attr "movprfx" "*,yes")] -+) -+ -+(define_expand "@aarch64_cond_abd" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand") -+ (unspec:SVE_FULL_I -+ [(match_operand: 1 "register_operand") -+ (minus:SVE_FULL_I -+ (unspec:SVE_FULL_I -+ [(match_dup 1) -+ (USMAX:SVE_FULL_I -+ (match_operand:SVE_FULL_I 2 "register_operand") -+ (match_operand:SVE_FULL_I 3 "register_operand"))] -+ UNSPEC_PRED_X) -+ (unspec:SVE_FULL_I -+ [(match_dup 1) -+ (:SVE_FULL_I -+ (match_dup 2) -+ (match_dup 3))] -+ UNSPEC_PRED_X)) -+ (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")] -+ UNSPEC_SEL))] -+ "TARGET_SVE" -+{ -+ if (rtx_equal_p (operands[3], operands[4])) -+ std::swap (operands[2], operands[3]); -+}) -+ -+;; Predicated integer absolute difference, merging with the first input. -+(define_insn_and_rewrite "*aarch64_cond_abd_2" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") -+ (unspec:SVE_FULL_I -+ [(match_operand: 1 "register_operand" "Upl, Upl") -+ (minus:SVE_FULL_I -+ (unspec:SVE_FULL_I -+ [(match_operand 4) -+ (USMAX:SVE_FULL_I -+ (match_operand:SVE_FULL_I 2 "register_operand" "0, w") -+ (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))] -+ UNSPEC_PRED_X) -+ (unspec:SVE_FULL_I -+ [(match_operand 5) -+ (:SVE_FULL_I -+ (match_dup 2) -+ (match_dup 3))] -+ UNSPEC_PRED_X)) -+ (match_dup 2)] -+ UNSPEC_SEL))] -+ "TARGET_SVE" -+ "@ -+ abd\t%0., %1/m, %0., %3. -+ movprfx\t%0, %2\;abd\t%0., %1/m, %0., %3." -+ "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))" -+ { -+ operands[4] = operands[5] = CONSTM1_RTX (mode); -+ } -+ [(set_attr "movprfx" "*,yes")] -+) -+ -+;; Predicated integer absolute difference, merging with an independent value. -+(define_insn_and_rewrite "*aarch64_cond_abd_any" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, ?&w") -+ (unspec:SVE_FULL_I -+ [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") -+ (minus:SVE_FULL_I -+ (unspec:SVE_FULL_I -+ [(match_operand 5) -+ (USMAX:SVE_FULL_I -+ (match_operand:SVE_FULL_I 2 "register_operand" "0, w, w, w, w") -+ (match_operand:SVE_FULL_I 3 "register_operand" "w, 0, w, w, w"))] -+ UNSPEC_PRED_X) -+ (unspec:SVE_FULL_I -+ [(match_operand 6) -+ (:SVE_FULL_I -+ (match_dup 2) -+ (match_dup 3))] -+ UNSPEC_PRED_X)) -+ (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] -+ UNSPEC_SEL))] -+ "TARGET_SVE -+ && !rtx_equal_p (operands[2], operands[4]) -+ && !rtx_equal_p (operands[3], operands[4])" -+ "@ -+ movprfx\t%0., %1/z, %0.\;abd\t%0., %1/m, %0., %3. -+ movprfx\t%0., %1/z, %0.\;abd\t%0., %1/m, %0., %2. -+ movprfx\t%0., %1/z, %2.\;abd\t%0., %1/m, %0., %3. -+ movprfx\t%0., %1/m, %2.\;abd\t%0., %1/m, %0., %3. -+ #" -+ "&& 1" -+ { -+ if (!CONSTANT_P (operands[5]) || !CONSTANT_P (operands[6])) -+ operands[5] = operands[6] = CONSTM1_RTX (mode); -+ else if (reload_completed -+ && register_operand (operands[4], mode) -+ && !rtx_equal_p (operands[0], operands[4])) -+ { -+ emit_insn (gen_vcond_mask_ (operands[0], operands[2], -+ operands[4], operands[1])); -+ operands[4] = operands[2] = operands[0]; -+ } -+ else -+ FAIL; -+ } -+ [(set_attr "movprfx" "yes")] -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- [INT] Saturating addition and subtraction -+;; ------------------------------------------------------------------------- -+;; - SQADD -+;; - SQSUB -+;; - UQADD -+;; - UQSUB -+;; ------------------------------------------------------------------------- -+ -+;; Unpredicated saturating signed addition and subtraction. -+(define_insn "@aarch64_" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w, ?&w, w") -+ (SBINQOPS:SVE_FULL_I -+ (match_operand:SVE_FULL_I 1 "register_operand" "0, 0, w, w, w") -+ (match_operand:SVE_FULL_I 2 "aarch64_sve_sqadd_operand" "vsQ, vsS, vsQ, vsS, w")))] -+ "TARGET_SVE" -+ "@ -+ \t%0., %0., #%D2 -+ \t%0., %0., #%N2 -+ movprfx\t%0, %1\;\t%0., %0., #%D2 -+ movprfx\t%0, %1\;\t%0., %0., #%N2 -+ \t%0., %1., %2." -+ [(set_attr "movprfx" "*,*,yes,yes,*")] -+) -+ -+;; Unpredicated saturating unsigned addition and subtraction. -+(define_insn "@aarch64_" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w, w") -+ (UBINQOPS:SVE_FULL_I -+ (match_operand:SVE_FULL_I 1 "register_operand" "0, w, w") -+ (match_operand:SVE_FULL_I 2 "aarch64_sve_arith_operand" "vsa, vsa, w")))] -+ "TARGET_SVE" -+ "@ -+ \t%0., %0., #%D2 -+ movprfx\t%0, %1\;\t%0., %0., #%D2 -+ \t%0., %1., %2." -+ [(set_attr "movprfx" "*,yes,*")] -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- [INT] Highpart multiplication -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - SMULH -+;; - UMULH -+;; ------------------------------------------------------------------------- -+ -+;; Unpredicated highpart multiplication. -+(define_expand "mul3_highpart" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand") -+ (unspec:SVE_FULL_I -+ [(match_dup 3) -+ (unspec:SVE_FULL_I -+ [(match_operand:SVE_FULL_I 1 "register_operand") -+ (match_operand:SVE_FULL_I 2 "register_operand")] -+ MUL_HIGHPART)] -+ UNSPEC_PRED_X))] -+ "TARGET_SVE" -+ { -+ operands[3] = aarch64_ptrue_reg (mode); -+ } -+) -+ -+;; Predicated highpart multiplication. -+(define_insn "@aarch64_pred_" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") -+ (unspec:SVE_FULL_I -+ [(match_operand: 1 "register_operand" "Upl, Upl") -+ (unspec:SVE_FULL_I -+ [(match_operand:SVE_FULL_I 2 "register_operand" "%0, w") -+ (match_operand:SVE_FULL_I 3 "register_operand" "w, w")] -+ MUL_HIGHPART)] -+ UNSPEC_PRED_X))] -+ "TARGET_SVE" -+ "@ -+ mulh\t%0., %1/m, %0., %3. -+ movprfx\t%0, %2\;mulh\t%0., %1/m, %0., %3." -+ [(set_attr "movprfx" "*,yes")] -+) -+ -+;; Predicated highpart multiplications with merging. -+(define_expand "@cond_" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand") -+ (unspec:SVE_FULL_I -+ [(match_operand: 1 "register_operand") -+ (unspec:SVE_FULL_I -+ [(match_operand:SVE_FULL_I 2 "register_operand") -+ (match_operand:SVE_FULL_I 3 "register_operand")] -+ MUL_HIGHPART) -+ (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")] -+ UNSPEC_SEL))] -+ "TARGET_SVE" -+{ -+ /* Only target code is aware of these operations, so we don't need -+ to handle the fully-general case. */ -+ gcc_assert (rtx_equal_p (operands[2], operands[4]) -+ || CONSTANT_P (operands[4])); -+}) -+ -+;; Predicated highpart multiplications, merging with the first input. -+(define_insn "*cond__2" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") -+ (unspec:SVE_FULL_I -+ [(match_operand: 1 "register_operand" "Upl, Upl") -+ (unspec:SVE_FULL_I -+ [(match_operand:SVE_FULL_I 2 "register_operand" "0, w") -+ (match_operand:SVE_FULL_I 3 "register_operand" "w, w")] -+ MUL_HIGHPART) -+ (match_dup 2)] -+ UNSPEC_SEL))] -+ "TARGET_SVE" -+ "@ -+ \t%0., %1/m, %0., %3. -+ movprfx\t%0, %2\;\t%0., %1/m, %0., %3." -+ [(set_attr "movprfx" "*,yes")]) -+ -+;; Predicated highpart multiplications, merging with zero. -+(define_insn "*cond__z" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w") -+ (unspec:SVE_FULL_I -+ [(match_operand: 1 "register_operand" "Upl, Upl") -+ (unspec:SVE_FULL_I -+ [(match_operand:SVE_FULL_I 2 "register_operand" "%0, w") -+ (match_operand:SVE_FULL_I 3 "register_operand" "w, w")] -+ MUL_HIGHPART) -+ (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_zero")] -+ UNSPEC_SEL))] -+ "TARGET_SVE" -+ "@ -+ movprfx\t%0., %1/z, %0.\;\t%0., %1/m, %0., %3. -+ movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %0., %3." -+ [(set_attr "movprfx" "yes")]) -+ -+;; ------------------------------------------------------------------------- -+;; ---- [INT] Division -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - SDIV -+;; - SDIVR -+;; - UDIV -+;; - UDIVR -+;; ------------------------------------------------------------------------- -+ -+;; Unpredicated integer division. -+(define_expand "3" -+ [(set (match_operand:SVE_FULL_SDI 0 "register_operand") -+ (unspec:SVE_FULL_SDI -+ [(match_dup 3) -+ (SVE_INT_BINARY_SD:SVE_FULL_SDI -+ (match_operand:SVE_FULL_SDI 1 "register_operand") -+ (match_operand:SVE_FULL_SDI 2 "register_operand"))] -+ UNSPEC_PRED_X))] -+ "TARGET_SVE" -+ { -+ operands[3] = aarch64_ptrue_reg (mode); -+ } -+) -+ -+;; Integer division predicated with a PTRUE. -+(define_insn "@aarch64_pred_" -+ [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, w, ?&w") -+ (unspec:SVE_FULL_SDI -+ [(match_operand: 1 "register_operand" "Upl, Upl, Upl") -+ (SVE_INT_BINARY_SD:SVE_FULL_SDI -+ (match_operand:SVE_FULL_SDI 2 "register_operand" "0, w, w") -+ (match_operand:SVE_FULL_SDI 3 "register_operand" "w, 0, w"))] -+ UNSPEC_PRED_X))] -+ "TARGET_SVE" -+ "@ -+ \t%0., %1/m, %0., %3. -+ r\t%0., %1/m, %0., %2. -+ movprfx\t%0, %2\;\t%0., %1/m, %0., %3." -+ [(set_attr "movprfx" "*,*,yes")] -+) -+ -+;; Predicated integer division with merging. -+(define_expand "@cond_" -+ [(set (match_operand:SVE_FULL_SDI 0 "register_operand") -+ (unspec:SVE_FULL_SDI -+ [(match_operand: 1 "register_operand") -+ (SVE_INT_BINARY_SD:SVE_FULL_SDI -+ (match_operand:SVE_FULL_SDI 2 "register_operand") -+ (match_operand:SVE_FULL_SDI 3 "register_operand")) -+ (match_operand:SVE_FULL_SDI 4 "aarch64_simd_reg_or_zero")] -+ UNSPEC_SEL))] -+ "TARGET_SVE" -+) -+ -+;; Predicated integer division, merging with the first input. -+(define_insn "*cond__2" -+ [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w") -+ (unspec:SVE_FULL_SDI -+ [(match_operand: 1 "register_operand" "Upl, Upl") -+ (SVE_INT_BINARY_SD:SVE_FULL_SDI -+ (match_operand:SVE_FULL_SDI 2 "register_operand" "0, w") -+ (match_operand:SVE_FULL_SDI 3 "register_operand" "w, w")) -+ (match_dup 2)] -+ UNSPEC_SEL))] -+ "TARGET_SVE" -+ "@ -+ \t%0., %1/m, %0., %3. -+ movprfx\t%0, %2\;\t%0., %1/m, %0., %3." -+ [(set_attr "movprfx" "*,yes")] -+) -+ -+;; Predicated integer division, merging with the second input. -+(define_insn "*cond__3" -+ [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w") -+ (unspec:SVE_FULL_SDI -+ [(match_operand: 1 "register_operand" "Upl, Upl") -+ (SVE_INT_BINARY_SD:SVE_FULL_SDI -+ (match_operand:SVE_FULL_SDI 2 "register_operand" "w, w") -+ (match_operand:SVE_FULL_SDI 3 "register_operand" "0, w")) -+ (match_dup 3)] -+ UNSPEC_SEL))] -+ "TARGET_SVE" -+ "@ -+ \t%0., %1/m, %0., %2. -+ movprfx\t%0, %3\;\t%0., %1/m, %0., %2." -+ [(set_attr "movprfx" "*,yes")] -+) -+ -+;; Predicated integer division, merging with an independent value. -+(define_insn_and_rewrite "*cond__any" -+ [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=&w, &w, &w, &w, ?&w") -+ (unspec:SVE_FULL_SDI -+ [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") -+ (SVE_INT_BINARY_SD:SVE_FULL_SDI -+ (match_operand:SVE_FULL_SDI 2 "register_operand" "0, w, w, w, w") -+ (match_operand:SVE_FULL_SDI 3 "register_operand" "w, 0, w, w, w")) -+ (match_operand:SVE_FULL_SDI 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] -+ UNSPEC_SEL))] -+ "TARGET_SVE -+ && !rtx_equal_p (operands[2], operands[4]) -+ && !rtx_equal_p (operands[3], operands[4])" -+ "@ -+ movprfx\t%0., %1/z, %0.\;\t%0., %1/m, %0., %3. -+ movprfx\t%0., %1/z, %0.\;\t%0., %1/m, %0., %2. -+ movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %0., %3. -+ movprfx\t%0., %1/m, %2.\;\t%0., %1/m, %0., %3. -+ #" -+ "&& reload_completed -+ && register_operand (operands[4], mode) -+ && !rtx_equal_p (operands[0], operands[4])" -+ { -+ emit_insn (gen_vcond_mask_ (operands[0], operands[2], -+ operands[4], operands[1])); -+ operands[4] = operands[2] = operands[0]; -+ } -+ [(set_attr "movprfx" "yes")] -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- [INT] Binary logical operations -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - AND -+;; - EOR -+;; - ORR -+;; ------------------------------------------------------------------------- -+ -+;; Unpredicated integer binary logical operations. -+(define_insn "3" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?w, w") -+ (LOGICAL:SVE_FULL_I -+ (match_operand:SVE_FULL_I 1 "register_operand" "%0, w, w") -+ (match_operand:SVE_FULL_I 2 "aarch64_sve_logical_operand" "vsl, vsl, w")))] -+ "TARGET_SVE" -+ "@ -+ \t%0., %0., #%C2 -+ movprfx\t%0, %1\;\t%0., %0., #%C2 -+ \t%0.d, %1.d, %2.d" -+ [(set_attr "movprfx" "*,yes,*")] -+) -+ -+;; Merging forms are handled through SVE_INT_BINARY. -+ -+;; ------------------------------------------------------------------------- -+;; ---- [INT] Binary logical operations (inverted second input) -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - BIC -+;; ------------------------------------------------------------------------- -+ -+;; Unpredicated BIC. -+(define_expand "@aarch64_bic" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand") -+ (and:SVE_FULL_I -+ (unspec:SVE_FULL_I -+ [(match_dup 3) -+ (not:SVE_FULL_I (match_operand:SVE_FULL_I 2 "register_operand"))] -+ UNSPEC_PRED_X) -+ (match_operand:SVE_FULL_I 1 "register_operand")))] -+ "TARGET_SVE" -+ { -+ operands[3] = CONSTM1_RTX (mode); -+ } -+) -+ -+;; Predicated BIC. -+(define_insn_and_rewrite "*bic3" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w") -+ (and:SVE_FULL_I -+ (unspec:SVE_FULL_I -+ [(match_operand 3) -+ (not:SVE_FULL_I -+ (match_operand:SVE_FULL_I 2 "register_operand" "w"))] -+ UNSPEC_PRED_X) -+ (match_operand:SVE_FULL_I 1 "register_operand" "w")))] -+ "TARGET_SVE" -+ "bic\t%0.d, %1.d, %2.d" -+ "&& !CONSTANT_P (operands[3])" -+ { -+ operands[3] = CONSTM1_RTX (mode); -+ } -+) -+ -+;; Predicated BIC with merging. -+(define_expand "@cond_bic" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand") -+ (unspec:SVE_FULL_I -+ [(match_operand: 1 "register_operand") -+ (and:SVE_FULL_I -+ (not:SVE_FULL_I (match_operand:SVE_FULL_I 3 "register_operand")) -+ (match_operand:SVE_FULL_I 2 "register_operand")) -+ (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")] -+ UNSPEC_SEL))] -+ "TARGET_SVE" -+) -+ -+;; Predicated integer BIC, merging with the first input. -+(define_insn "*cond_bic_2" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") -+ (unspec:SVE_FULL_I -+ [(match_operand: 1 "register_operand" "Upl, Upl") -+ (and:SVE_FULL_I -+ (not:SVE_FULL_I -+ (match_operand:SVE_FULL_I 3 "register_operand" "w, w")) -+ (match_operand:SVE_FULL_I 2 "register_operand" "0, w")) -+ (match_dup 2)] -+ UNSPEC_SEL))] -+ "TARGET_SVE" -+ "@ -+ bic\t%0., %1/m, %0., %3. -+ movprfx\t%0, %2\;bic\t%0., %1/m, %0., %3." -+ [(set_attr "movprfx" "*,yes")] -+) -+ -+;; Predicated integer BIC, merging with an independent value. -+(define_insn_and_rewrite "*cond_bic_any" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, ?&w") -+ (unspec:SVE_FULL_I -+ [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl") -+ (and:SVE_FULL_I -+ (not:SVE_FULL_I -+ (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w, w")) -+ (match_operand:SVE_FULL_I 2 "register_operand" "0, w, w, w")) -+ (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")] -+ UNSPEC_SEL))] -+ "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" -+ "@ -+ movprfx\t%0., %1/z, %0.\;bic\t%0., %1/m, %0., %3. -+ movprfx\t%0., %1/z, %2.\;bic\t%0., %1/m, %0., %3. -+ movprfx\t%0., %1/m, %2.\;bic\t%0., %1/m, %0., %3. -+ #" -+ "&& reload_completed -+ && register_operand (operands[4], mode) -+ && !rtx_equal_p (operands[0], operands[4])" -+ { -+ emit_insn (gen_vcond_mask_ (operands[0], operands[2], -+ operands[4], operands[1])); -+ operands[4] = operands[2] = operands[0]; -+ } -+ [(set_attr "movprfx" "yes")] -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- [INT] Shifts (rounding towards -Inf) -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - ASR -+;; - ASRR -+;; - LSL -+;; - LSLR -+;; - LSR -+;; - LSRR -+;; ------------------------------------------------------------------------- -+ -+;; Unpredicated shift by a scalar, which expands into one of the vector -+;; shifts below. -+(define_expand "3" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand") -+ (ASHIFT:SVE_FULL_I -+ (match_operand:SVE_FULL_I 1 "register_operand") -+ (match_operand: 2 "general_operand")))] -+ "TARGET_SVE" -+ { -+ rtx amount; -+ if (CONST_INT_P (operands[2])) -+ { -+ amount = gen_const_vec_duplicate (mode, operands[2]); -+ if (!aarch64_sve_shift_operand (operands[2], mode)) -+ amount = force_reg (mode, amount); -+ } -+ else -+ { -+ amount = gen_reg_rtx (mode); -+ emit_insn (gen_vec_duplicate (amount, -+ convert_to_mode (mode, -+ operands[2], 0))); -+ } -+ emit_insn (gen_v3 (operands[0], operands[1], amount)); -+ DONE; -+ } -+) -+ -+;; Unpredicated shift by a vector. -+(define_expand "v3" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand") -+ (unspec:SVE_FULL_I -+ [(match_dup 3) -+ (ASHIFT:SVE_FULL_I -+ (match_operand:SVE_FULL_I 1 "register_operand") -+ (match_operand:SVE_FULL_I 2 "aarch64_sve_shift_operand"))] -+ UNSPEC_PRED_X))] -+ "TARGET_SVE" -+ { -+ operands[3] = aarch64_ptrue_reg (mode); -+ } -+) -+ -+;; Shift by a vector, predicated with a PTRUE. We don't actually need -+;; the predicate for the first alternative, but using Upa or X isn't -+;; likely to gain much and would make the instruction seem less uniform -+;; to the register allocator. -+(define_insn_and_split "@aarch64_pred_" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, w, ?&w") -+ (unspec:SVE_FULL_I -+ [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl") -+ (ASHIFT:SVE_FULL_I -+ (match_operand:SVE_FULL_I 2 "register_operand" "w, 0, w, w") -+ (match_operand:SVE_FULL_I 3 "aarch64_sve_shift_operand" "D, w, 0, w"))] -+ UNSPEC_PRED_X))] -+ "TARGET_SVE" -+ "@ -+ # -+ \t%0., %1/m, %0., %3. -+ r\t%0., %1/m, %3., %2. -+ movprfx\t%0, %2\;\t%0., %1/m, %0., %3." -+ "&& reload_completed -+ && !register_operand (operands[3], mode)" -+ [(set (match_dup 0) (ASHIFT:SVE_FULL_I (match_dup 2) (match_dup 3)))] -+ "" -+ [(set_attr "movprfx" "*,*,*,yes")] -+) -+ -+;; Unpredicated shift operations by a constant (post-RA only). -+;; These are generated by splitting a predicated instruction whose -+;; predicate is unused. -+(define_insn "*post_ra_v3" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w") -+ (ASHIFT:SVE_FULL_I -+ (match_operand:SVE_FULL_I 1 "register_operand" "w") -+ (match_operand:SVE_FULL_I 2 "aarch64_simd_shift_imm")))] -+ "TARGET_SVE && reload_completed" -+ "\t%0., %1., #%2" -+) -+ -+;; Predicated integer shift, merging with the first input. -+(define_insn "*cond__2_const" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") -+ (unspec:SVE_FULL_I -+ [(match_operand: 1 "register_operand" "Upl, Upl") -+ (ASHIFT:SVE_FULL_I -+ (match_operand:SVE_FULL_I 2 "register_operand" "0, w") -+ (match_operand:SVE_FULL_I 3 "aarch64_simd_shift_imm")) -+ (match_dup 2)] -+ UNSPEC_SEL))] -+ "TARGET_SVE" -+ "@ -+ \t%0., %1/m, %0., #%3 -+ movprfx\t%0, %2\;\t%0., %1/m, %0., #%3" -+ [(set_attr "movprfx" "*,yes")] -+) -+ -+;; Predicated integer shift, merging with an independent value. -+(define_insn_and_rewrite "*cond__any_const" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, &w, ?&w") -+ (unspec:SVE_FULL_I -+ [(match_operand: 1 "register_operand" "Upl, Upl, Upl") -+ (ASHIFT:SVE_FULL_I -+ (match_operand:SVE_FULL_I 2 "register_operand" "w, w, w") -+ (match_operand:SVE_FULL_I 3 "aarch64_simd_shift_imm")) -+ (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")] -+ UNSPEC_SEL))] -+ "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" -+ "@ -+ movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %0., #%3 -+ movprfx\t%0., %1/m, %2.\;\t%0., %1/m, %0., #%3 -+ #" -+ "&& reload_completed -+ && register_operand (operands[4], mode) -+ && !rtx_equal_p (operands[0], operands[4])" -+ { -+ emit_insn (gen_vcond_mask_ (operands[0], operands[2], -+ operands[4], operands[1])); -+ operands[4] = operands[2] = operands[0]; -+ } -+ [(set_attr "movprfx" "yes")] -+) -+ -+;; Unpredicated shifts of narrow elements by 64-bit amounts. -+(define_insn "@aarch64_sve_" -+ [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w") -+ (unspec:SVE_FULL_BHSI -+ [(match_operand:SVE_FULL_BHSI 1 "register_operand" "w") -+ (match_operand:VNx2DI 2 "register_operand" "w")] -+ SVE_SHIFT_WIDE))] -+ "TARGET_SVE" -+ "\t%0., %1., %2.d" -+) -+ -+;; Merging predicated shifts of narrow elements by 64-bit amounts. -+(define_expand "@cond_" -+ [(set (match_operand:SVE_FULL_BHSI 0 "register_operand") -+ (unspec:SVE_FULL_BHSI -+ [(match_operand: 1 "register_operand") -+ (unspec:SVE_FULL_BHSI -+ [(match_operand:SVE_FULL_BHSI 2 "register_operand") -+ (match_operand:VNx2DI 3 "register_operand")] -+ SVE_SHIFT_WIDE) -+ (match_operand:SVE_FULL_BHSI 4 "aarch64_simd_reg_or_zero")] -+ UNSPEC_SEL))] -+ "TARGET_SVE" -+) -+ -+;; Predicated shifts of narrow elements by 64-bit amounts, merging with -+;; the first input. -+(define_insn "*cond__m" -+ [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w, ?&w") -+ (unspec:SVE_FULL_BHSI -+ [(match_operand: 1 "register_operand" "Upl, Upl") -+ (unspec:SVE_FULL_BHSI -+ [(match_operand:SVE_FULL_BHSI 2 "register_operand" "0, w") -+ (match_operand:VNx2DI 3 "register_operand" "w, w")] -+ SVE_SHIFT_WIDE) -+ (match_dup 2)] -+ UNSPEC_SEL))] -+ "TARGET_SVE" -+ "@ -+ \t%0., %1/m, %0., %3.d -+ movprfx\t%0, %2\;\t%0., %1/m, %0., %3.d" -+ [(set_attr "movprfx" "*, yes")]) -+ -+;; Predicated shifts of narrow elements by 64-bit amounts, merging with zero. -+(define_insn "*cond__z" -+ [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=&w, &w") -+ (unspec:SVE_FULL_BHSI -+ [(match_operand: 1 "register_operand" "Upl, Upl") -+ (unspec:SVE_FULL_BHSI -+ [(match_operand:SVE_FULL_BHSI 2 "register_operand" "0, w") -+ (match_operand:VNx2DI 3 "register_operand" "w, w")] -+ SVE_SHIFT_WIDE) -+ (match_operand:SVE_FULL_BHSI 4 "aarch64_simd_imm_zero")] -+ UNSPEC_SEL))] -+ "TARGET_SVE" -+ "@ -+ movprfx\t%0., %1/z, %0.\;\t%0., %1/m, %0., %3.d -+ movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %0., %3.d" -+ [(set_attr "movprfx" "yes")]) -+ -+;; ------------------------------------------------------------------------- -+;; ---- [INT] Shifts (rounding towards 0) -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - ASRD -+;; ------------------------------------------------------------------------- -+ -+;; Unpredicated ASRD. -+(define_expand "sdiv_pow23" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand") -+ (unspec:SVE_FULL_I -+ [(match_dup 3) -+ (unspec:SVE_FULL_I -+ [(match_operand:SVE_FULL_I 1 "register_operand") -+ (match_operand 2 "aarch64_simd_rshift_imm")] -+ UNSPEC_ASRD) -+ (match_dup 1)] -+ UNSPEC_SEL))] -+ "TARGET_SVE" -+ { -+ operands[3] = aarch64_ptrue_reg (mode); -+ } -+) -+ -+;; Predicated ASRD with merging. -+(define_expand "@cond_asrd" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand") -+ (unspec:SVE_FULL_I -+ [(match_operand: 1 "register_operand") -+ (unspec:SVE_FULL_I -+ [(match_operand:SVE_FULL_I 2 "register_operand") -+ (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm")] -+ UNSPEC_ASRD) -+ (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")] -+ UNSPEC_SEL))] -+ "TARGET_SVE" -+) -+ -+;; Predicated ASRD, merging with the first input. -+(define_insn "*cond_asrd_2" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") -+ (unspec:SVE_FULL_I -+ [(match_operand: 1 "register_operand" "Upl, Upl") -+ (unspec:SVE_FULL_I -+ [(match_operand:SVE_FULL_I 2 "register_operand" "0, w") -+ (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm")] -+ UNSPEC_ASRD) -+ (match_dup 2)] -+ UNSPEC_SEL))] -+ "TARGET_SVE" -+ "@ -+ asrd\t%0., %1/m, %0., #%3 -+ movprfx\t%0, %2\;asrd\t%0., %1/m, %0., #%3" -+ [(set_attr "movprfx" "*,yes")]) -+ -+;; Predicated ASRD, merging with zero. -+(define_insn "*cond_asrd_z" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w") -+ (unspec:SVE_FULL_I -+ [(match_operand: 1 "register_operand" "Upl") -+ (unspec:SVE_FULL_I -+ [(match_operand:SVE_FULL_I 2 "register_operand" "w") -+ (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm")] -+ UNSPEC_ASRD) -+ (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_zero")] -+ UNSPEC_SEL))] -+ "TARGET_SVE" -+ "movprfx\t%0., %1/z, %2.\;asrd\t%0., %1/m, %0., #%3" -+ [(set_attr "movprfx" "yes")]) -+ -+;; ------------------------------------------------------------------------- -+;; ---- [FP<-INT] General binary arithmetic corresponding to unspecs -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - FSCALE -+;; - FTSMUL -+;; - FTSSEL -+;; ------------------------------------------------------------------------- -+ -+;; Unpredicated floating-point binary operations that take an integer as -+;; their second operand. -+(define_insn "@aarch64_sve_" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w") -+ (unspec:SVE_FULL_F -+ [(match_operand:SVE_FULL_F 1 "register_operand" "w") -+ (match_operand: 2 "register_operand" "w")] -+ SVE_FP_BINARY_INT))] -+ "TARGET_SVE" -+ "\t%0., %1., %2." -+) -+ -+;; Predicated floating-point binary operations that take an integer -+;; as their second operand. -+(define_insn "@aarch64_pred_" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") -+ (unspec:SVE_FULL_F -+ [(match_operand: 1 "register_operand" "Upl, Upl") -+ (match_operand:SI 4 "aarch64_sve_gp_strictness") -+ (match_operand:SVE_FULL_F 2 "register_operand" "0, w") -+ (match_operand: 3 "register_operand" "w, w")] -+ SVE_COND_FP_BINARY_INT))] -+ "TARGET_SVE" -+ "@ -+ \t%0., %1/m, %0., %3. -+ movprfx\t%0, %2\;\t%0., %1/m, %0., %3." -+ [(set_attr "movprfx" "*,yes")] -+) -+ -+;; Predicated floating-point binary operations with merging, taking an -+;; integer as their second operand. -+(define_expand "@cond_" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand") -+ (unspec:SVE_FULL_F -+ [(match_operand: 1 "register_operand") -+ (unspec:SVE_FULL_F -+ [(match_dup 1) -+ (const_int SVE_STRICT_GP) -+ (match_operand:SVE_FULL_F 2 "register_operand") -+ (match_operand: 3 "register_operand")] -+ SVE_COND_FP_BINARY_INT) -+ (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] -+ UNSPEC_SEL))] -+ "TARGET_SVE" -+) -+ -+;; Predicated floating-point binary operations that take an integer as their -+;; second operand, with inactive lanes coming from the first operand. -+(define_insn_and_rewrite "*cond__2" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") -+ (unspec:SVE_FULL_F -+ [(match_operand: 1 "register_operand" "Upl, Upl") -+ (unspec:SVE_FULL_F -+ [(match_operand 4) -+ (match_operand:SI 5 "aarch64_sve_gp_strictness") -+ (match_operand:SVE_FULL_F 2 "register_operand" "0, w") -+ (match_operand: 3 "register_operand" "w, w")] -+ SVE_COND_FP_BINARY_INT) -+ (match_dup 2)] -+ UNSPEC_SEL))] -+ "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" -+ "@ -+ \t%0., %1/m, %0., %3. -+ movprfx\t%0, %2\;\t%0., %1/m, %0., %3." -+ "&& !rtx_equal_p (operands[1], operands[4])" -+ { -+ operands[4] = copy_rtx (operands[1]); -+ } -+ [(set_attr "movprfx" "*,yes")] -+) -+ -+;; Predicated floating-point binary operations that take an integer as -+;; their second operand, with the values of inactive lanes being distinct -+;; from the other inputs. -+(define_insn_and_rewrite "*cond__any" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w") -+ (unspec:SVE_FULL_F -+ [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl") -+ (unspec:SVE_FULL_F -+ [(match_operand 5) -+ (match_operand:SI 6 "aarch64_sve_gp_strictness") -+ (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w") -+ (match_operand: 3 "register_operand" "w, w, w, w")] -+ SVE_COND_FP_BINARY_INT) -+ (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")] -+ UNSPEC_SEL))] -+ "TARGET_SVE -+ && !rtx_equal_p (operands[2], operands[4]) -+ && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" -+ "@ -+ movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %0., %3. -+ movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %0., %3. -+ movprfx\t%0., %1/m, %2.\;\t%0., %1/m, %0., %3. -+ #" -+ "&& 1" -+ { -+ if (reload_completed -+ && register_operand (operands[4], mode) -+ && !rtx_equal_p (operands[0], operands[4])) -+ { -+ emit_insn (gen_vcond_mask_ (operands[0], operands[2], -+ operands[4], operands[1])); -+ operands[4] = operands[2] = operands[0]; -+ } -+ else if (!rtx_equal_p (operands[1], operands[5])) -+ operands[5] = copy_rtx (operands[1]); -+ else -+ FAIL; -+ } -+ [(set_attr "movprfx" "yes")] -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- [FP] General binary arithmetic corresponding to rtx codes -+;; ------------------------------------------------------------------------- -+;; Includes post-RA forms of: -+;; - FADD -+;; - FMUL -+;; - FSUB -+;; ------------------------------------------------------------------------- -+ -+;; Unpredicated floating-point binary operations (post-RA only). -+;; These are generated by splitting a predicated instruction whose -+;; predicate is unused. -+(define_insn "*post_ra_3" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w") -+ (SVE_UNPRED_FP_BINARY:SVE_FULL_F -+ (match_operand:SVE_FULL_F 1 "register_operand" "w") -+ (match_operand:SVE_FULL_F 2 "register_operand" "w")))] -+ "TARGET_SVE && reload_completed" -+ "\t%0., %1., %2.") -+ -+;; ------------------------------------------------------------------------- -+;; ---- [FP] General binary arithmetic corresponding to unspecs -+;; ------------------------------------------------------------------------- -+;; Includes merging forms of: -+;; - FADD (constant forms handled in the "Addition" section) -+;; - FDIV -+;; - FDIVR -+;; - FMAX -+;; - FMAXNM (including #0.0 and #1.0) -+;; - FMIN -+;; - FMINNM (including #0.0 and #1.0) -+;; - FMUL (including #0.5 and #2.0) -+;; - FMULX -+;; - FRECPS -+;; - FRSQRTS -+;; - FSUB (constant forms handled in the "Addition" section) -+;; - FSUBR (constant forms handled in the "Subtraction" section) -+;; ------------------------------------------------------------------------- -+ -+;; Unpredicated floating-point binary operations. -+(define_insn "@aarch64_sve_" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w") -+ (unspec:SVE_FULL_F -+ [(match_operand:SVE_FULL_F 1 "register_operand" "w") -+ (match_operand:SVE_FULL_F 2 "register_operand" "w")] -+ SVE_FP_BINARY))] -+ "TARGET_SVE" -+ "\t%0., %1., %2." -+) -+ -+;; Unpredicated floating-point binary operations that need to be predicated -+;; for SVE. -+(define_expand "3" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand") -+ (unspec:SVE_FULL_F -+ [(match_dup 3) -+ (const_int SVE_RELAXED_GP) -+ (match_operand:SVE_FULL_F 1 "") -+ (match_operand:SVE_FULL_F 2 "")] -+ SVE_COND_FP_BINARY))] -+ "TARGET_SVE" -+ { -+ operands[3] = aarch64_ptrue_reg (mode); -+ } -+) -+ -+;; Predicated floating-point binary operations that have no immediate forms. -+(define_insn "@aarch64_pred_" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?&w") -+ (unspec:SVE_FULL_F -+ [(match_operand: 1 "register_operand" "Upl, Upl, Upl") -+ (match_operand:SI 4 "aarch64_sve_gp_strictness") -+ (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w") -+ (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w")] -+ SVE_COND_FP_BINARY_REG))] -+ "TARGET_SVE" -+ "@ -+ \t%0., %1/m, %0., %3. -+ \t%0., %1/m, %0., %2. -+ movprfx\t%0, %2\;\t%0., %1/m, %0., %3." -+ [(set_attr "movprfx" "*,*,yes")] -+) -+ -+;; Predicated floating-point operations with merging. -+(define_expand "@cond_" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand") -+ (unspec:SVE_FULL_F -+ [(match_operand: 1 "register_operand") -+ (unspec:SVE_FULL_F -+ [(match_dup 1) -+ (const_int SVE_STRICT_GP) -+ (match_operand:SVE_FULL_F 2 "") -+ (match_operand:SVE_FULL_F 3 "")] -+ SVE_COND_FP_BINARY) -+ (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] -+ UNSPEC_SEL))] -+ "TARGET_SVE" -+) -+ -+;; Predicated floating-point operations, merging with the first input. -+(define_insn_and_rewrite "*cond__2" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") -+ (unspec:SVE_FULL_F -+ [(match_operand: 1 "register_operand" "Upl, Upl") -+ (unspec:SVE_FULL_F -+ [(match_operand 4) -+ (match_operand:SI 5 "aarch64_sve_gp_strictness") -+ (match_operand:SVE_FULL_F 2 "register_operand" "0, w") -+ (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] -+ SVE_COND_FP_BINARY) -+ (match_dup 2)] -+ UNSPEC_SEL))] -+ "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" -+ "@ -+ \t%0., %1/m, %0., %3. -+ movprfx\t%0, %2\;\t%0., %1/m, %0., %3." -+ "&& !rtx_equal_p (operands[1], operands[4])" -+ { -+ operands[4] = copy_rtx (operands[1]); -+ } -+ [(set_attr "movprfx" "*,yes")] -+) -+ -+;; Same for operations that take a 1-bit constant. -+(define_insn_and_rewrite "*cond__2_const" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w") -+ (unspec:SVE_FULL_F -+ [(match_operand: 1 "register_operand" "Upl, Upl") -+ (unspec:SVE_FULL_F -+ [(match_operand 4) -+ (match_operand:SI 5 "aarch64_sve_gp_strictness") -+ (match_operand:SVE_FULL_F 2 "register_operand" "0, w") -+ (match_operand:SVE_FULL_F 3 "")] -+ SVE_COND_FP_BINARY_I1) -+ (match_dup 2)] -+ UNSPEC_SEL))] -+ "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" -+ "@ -+ \t%0., %1/m, %0., #%3 -+ movprfx\t%0, %2\;\t%0., %1/m, %0., #%3" -+ "&& !rtx_equal_p (operands[1], operands[4])" -+ { -+ operands[4] = copy_rtx (operands[1]); -+ } -+ [(set_attr "movprfx" "*,yes")] -+) -+ -+;; Predicated floating-point operations, merging with the second input. -+(define_insn_and_rewrite "*cond__3" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") -+ (unspec:SVE_FULL_F -+ [(match_operand: 1 "register_operand" "Upl, Upl") -+ (unspec:SVE_FULL_F -+ [(match_operand 4) -+ (match_operand:SI 5 "aarch64_sve_gp_strictness") -+ (match_operand:SVE_FULL_F 2 "register_operand" "w, w") -+ (match_operand:SVE_FULL_F 3 "register_operand" "0, w")] -+ SVE_COND_FP_BINARY) -+ (match_dup 3)] -+ UNSPEC_SEL))] -+ "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" -+ "@ -+ \t%0., %1/m, %0., %2. -+ movprfx\t%0, %3\;\t%0., %1/m, %0., %2." -+ "&& !rtx_equal_p (operands[1], operands[4])" -+ { -+ operands[4] = copy_rtx (operands[1]); -+ } -+ [(set_attr "movprfx" "*,yes")] -+) -+ -+;; Predicated floating-point operations, merging with an independent value. -+(define_insn_and_rewrite "*cond__any" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w") -+ (unspec:SVE_FULL_F -+ [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") -+ (unspec:SVE_FULL_F -+ [(match_operand 5) -+ (match_operand:SI 6 "aarch64_sve_gp_strictness") -+ (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w") -+ (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")] -+ SVE_COND_FP_BINARY) -+ (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] -+ UNSPEC_SEL))] -+ "TARGET_SVE -+ && !rtx_equal_p (operands[2], operands[4]) -+ && !rtx_equal_p (operands[3], operands[4]) -+ && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" -+ "@ -+ movprfx\t%0., %1/z, %0.\;\t%0., %1/m, %0., %3. -+ movprfx\t%0., %1/z, %0.\;\t%0., %1/m, %0., %2. -+ movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %0., %3. -+ movprfx\t%0., %1/m, %2.\;\t%0., %1/m, %0., %3. -+ #" -+ "&& 1" -+ { -+ if (reload_completed -+ && register_operand (operands[4], mode) -+ && !rtx_equal_p (operands[0], operands[4])) -+ { -+ emit_insn (gen_vcond_mask_ (operands[0], operands[2], -+ operands[4], operands[1])); -+ operands[4] = operands[2] = operands[0]; -+ } -+ else if (!rtx_equal_p (operands[1], operands[5])) -+ operands[5] = copy_rtx (operands[1]); -+ else -+ FAIL; -+ } -+ [(set_attr "movprfx" "yes")] -+) -+ -+;; Same for operations that take a 1-bit constant. -+(define_insn_and_rewrite "*cond__any_const" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w") -+ (unspec:SVE_FULL_F -+ [(match_operand: 1 "register_operand" "Upl, Upl, Upl") -+ (unspec:SVE_FULL_F -+ [(match_operand 5) -+ (match_operand:SI 6 "aarch64_sve_gp_strictness") -+ (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w") -+ (match_operand:SVE_FULL_F 3 "")] -+ SVE_COND_FP_BINARY_I1) -+ (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")] -+ UNSPEC_SEL))] -+ "TARGET_SVE -+ && !rtx_equal_p (operands[2], operands[4]) -+ && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" -+ "@ -+ movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %0., #%3 -+ movprfx\t%0., %1/m, %2.\;\t%0., %1/m, %0., #%3 -+ #" -+ "&& 1" -+ { -+ if (reload_completed -+ && register_operand (operands[4], mode) -+ && !rtx_equal_p (operands[0], operands[4])) -+ { -+ emit_insn (gen_vcond_mask_ (operands[0], operands[2], -+ operands[4], operands[1])); -+ operands[4] = operands[2] = operands[0]; -+ } -+ else if (!rtx_equal_p (operands[1], operands[5])) -+ operands[5] = copy_rtx (operands[1]); -+ else -+ FAIL; -+ } -+ [(set_attr "movprfx" "yes")] -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- [FP] Addition -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - FADD -+;; - FSUB -+;; ------------------------------------------------------------------------- -+ -+;; Predicated floating-point addition. -+(define_insn_and_split "@aarch64_pred_" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?&w, ?&w, ?&w") -+ (unspec:SVE_FULL_F -+ [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl, Upl") -+ (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, i, Z, Ui1, i, i, Ui1") -+ (match_operand:SVE_FULL_F 2 "register_operand" "%0, 0, w, 0, w, w, w") -+ (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, w, w, vsA, vsN, w")] -+ SVE_COND_FP_ADD))] -+ "TARGET_SVE" -+ "@ -+ fadd\t%0., %1/m, %0., #%3 -+ fsub\t%0., %1/m, %0., #%N3 -+ # -+ fadd\t%0., %1/m, %0., %3. -+ movprfx\t%0, %2\;fadd\t%0., %1/m, %0., #%3 -+ movprfx\t%0, %2\;fsub\t%0., %1/m, %0., #%N3 -+ movprfx\t%0, %2\;fadd\t%0., %1/m, %0., %3." -+ ; Split the unpredicated form after reload, so that we don't have -+ ; the unnecessary PTRUE. -+ "&& reload_completed -+ && register_operand (operands[3], mode) -+ && INTVAL (operands[4]) == SVE_RELAXED_GP" -+ [(set (match_dup 0) (plus:SVE_FULL_F (match_dup 2) (match_dup 3)))] -+ "" -+ [(set_attr "movprfx" "*,*,*,*,yes,yes,yes")] -+) -+ -+;; Predicated floating-point addition of a constant, merging with the -+;; first input. -+(define_insn_and_rewrite "*cond_add_2_const" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w, ?w") -+ (unspec:SVE_FULL_F -+ [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl") -+ (unspec:SVE_FULL_F -+ [(match_operand 4) -+ (match_operand:SI 5 "aarch64_sve_gp_strictness") -+ (match_operand:SVE_FULL_F 2 "register_operand" "0, 0, w, w") -+ (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN")] -+ UNSPEC_COND_FADD) -+ (match_dup 2)] -+ UNSPEC_SEL))] -+ "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" -+ "@ -+ fadd\t%0., %1/m, %0., #%3 -+ fsub\t%0., %1/m, %0., #%N3 -+ movprfx\t%0, %2\;fadd\t%0., %1/m, %0., #%3 -+ movprfx\t%0, %2\;fsub\t%0., %1/m, %0., #%N3" -+ "&& !rtx_equal_p (operands[1], operands[4])" -+ { -+ operands[4] = copy_rtx (operands[1]); -+ } -+ [(set_attr "movprfx" "*,*,yes,yes")] -+) -+ -+;; Predicated floating-point addition of a constant, merging with an -+;; independent value. -+(define_insn_and_rewrite "*cond_add_any_const" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?w, ?w") -+ (unspec:SVE_FULL_F -+ [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") -+ (unspec:SVE_FULL_F -+ [(match_operand 5) -+ (match_operand:SI 6 "aarch64_sve_gp_strictness") -+ (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w, w, w") -+ (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN, vsA, vsN")] -+ UNSPEC_COND_FADD) -+ (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, 0, w, w")] -+ UNSPEC_SEL))] -+ "TARGET_SVE -+ && !rtx_equal_p (operands[2], operands[4]) -+ && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" -+ "@ -+ movprfx\t%0., %1/z, %2.\;fadd\t%0., %1/m, %0., #%3 -+ movprfx\t%0., %1/z, %2.\;fsub\t%0., %1/m, %0., #%N3 -+ movprfx\t%0., %1/m, %2.\;fadd\t%0., %1/m, %0., #%3 -+ movprfx\t%0., %1/m, %2.\;fsub\t%0., %1/m, %0., #%N3 -+ # -+ #" -+ "&& 1" -+ { -+ if (reload_completed -+ && register_operand (operands[4], mode) -+ && !rtx_equal_p (operands[0], operands[4])) -+ { -+ emit_insn (gen_vcond_mask_ (operands[0], operands[2], -+ operands[4], operands[1])); -+ operands[4] = operands[2] = operands[0]; -+ } -+ else if (!rtx_equal_p (operands[1], operands[5])) -+ operands[5] = copy_rtx (operands[1]); -+ else -+ FAIL; -+ } -+ [(set_attr "movprfx" "yes")] -+) -+ -+;; Register merging forms are handled through SVE_COND_FP_BINARY. -+ -+;; ------------------------------------------------------------------------- -+;; ---- [FP] Complex addition -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - FCADD -+;; ------------------------------------------------------------------------- -+ -+;; Predicated FCADD. -+(define_insn "@aarch64_pred_" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") -+ (unspec:SVE_FULL_F -+ [(match_operand: 1 "register_operand" "Upl, Upl") -+ (match_operand:SI 4 "aarch64_sve_gp_strictness") -+ (match_operand:SVE_FULL_F 2 "register_operand" "0, w") -+ (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] -+ SVE_COND_FCADD))] -+ "TARGET_SVE" -+ "@ -+ fcadd\t%0., %1/m, %0., %3., # -+ movprfx\t%0, %2\;fcadd\t%0., %1/m, %0., %3., #" -+ [(set_attr "movprfx" "*,yes")] -+) -+ -+;; Predicated FCADD with merging. -+(define_expand "@cond_" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand") -+ (unspec:SVE_FULL_F -+ [(match_operand: 1 "register_operand") -+ (unspec:SVE_FULL_F -+ [(match_dup 1) -+ (const_int SVE_STRICT_GP) -+ (match_operand:SVE_FULL_F 2 "register_operand") -+ (match_operand:SVE_FULL_F 3 "register_operand")] -+ SVE_COND_FCADD) -+ (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] -+ UNSPEC_SEL))] -+ "TARGET_SVE" -+) -+ -+;; Predicated FCADD, merging with the first input. -+(define_insn_and_rewrite "*cond__2" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") -+ (unspec:SVE_FULL_F -+ [(match_operand: 1 "register_operand" "Upl, Upl") -+ (unspec:SVE_FULL_F -+ [(match_operand 4) -+ (match_operand:SI 5 "aarch64_sve_gp_strictness") -+ (match_operand:SVE_FULL_F 2 "register_operand" "0, w") -+ (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] -+ SVE_COND_FCADD) -+ (match_dup 2)] -+ UNSPEC_SEL))] -+ "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" -+ "@ -+ fcadd\t%0., %1/m, %0., %3., # -+ movprfx\t%0, %2\;fcadd\t%0., %1/m, %0., %3., #" -+ "&& !rtx_equal_p (operands[1], operands[4])" -+ { -+ operands[4] = copy_rtx (operands[1]); -+ } -+ [(set_attr "movprfx" "*,yes")] -+) -+ -+;; Predicated FCADD, merging with an independent value. -+(define_insn_and_rewrite "*cond__any" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w") -+ (unspec:SVE_FULL_F -+ [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl") -+ (unspec:SVE_FULL_F -+ [(match_operand 5) -+ (match_operand:SI 6 "aarch64_sve_gp_strictness") -+ (match_operand:SVE_FULL_F 2 "register_operand" "w, 0, w, w") -+ (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")] -+ SVE_COND_FCADD) -+ (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")] -+ UNSPEC_SEL))] -+ "TARGET_SVE -+ && !rtx_equal_p (operands[2], operands[4]) -+ && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" -+ "@ -+ movprfx\t%0., %1/z, %2.\;fcadd\t%0., %1/m, %0., %3., # -+ movprfx\t%0., %1/z, %0.\;fcadd\t%0., %1/m, %0., %3., # -+ movprfx\t%0., %1/m, %2.\;fcadd\t%0., %1/m, %0., %3., # -+ #" -+ "&& 1" -+ { -+ if (reload_completed -+ && register_operand (operands[4], mode) -+ && !rtx_equal_p (operands[0], operands[4])) -+ { -+ emit_insn (gen_vcond_mask_ (operands[0], operands[2], -+ operands[4], operands[1])); -+ operands[4] = operands[2] = operands[0]; -+ } -+ else if (!rtx_equal_p (operands[1], operands[5])) -+ operands[5] = copy_rtx (operands[1]); -+ else -+ FAIL; -+ } -+ [(set_attr "movprfx" "yes")] -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- [FP] Subtraction -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - FSUB -+;; - FSUBR -+;; ------------------------------------------------------------------------- -+ -+;; Predicated floating-point subtraction. -+(define_insn_and_split "@aarch64_pred_" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?&w, ?&w") -+ (unspec:SVE_FULL_F -+ [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") -+ (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, Z, Ui1, Ui1, i, Ui1") -+ (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_operand" "vsA, w, 0, w, vsA, w") -+ (match_operand:SVE_FULL_F 3 "register_operand" "0, w, w, 0, w, w")] -+ SVE_COND_FP_SUB))] -+ "TARGET_SVE" -+ "@ -+ fsubr\t%0., %1/m, %0., #%2 -+ # -+ fsub\t%0., %1/m, %0., %3. -+ fsubr\t%0., %1/m, %0., %2. -+ movprfx\t%0, %3\;fsubr\t%0., %1/m, %0., #%2 -+ movprfx\t%0, %2\;fsub\t%0., %1/m, %0., %3." -+ ; Split the unpredicated form after reload, so that we don't have -+ ; the unnecessary PTRUE. -+ "&& reload_completed -+ && register_operand (operands[2], mode) -+ && INTVAL (operands[4]) == SVE_RELAXED_GP" -+ [(set (match_dup 0) (minus:SVE_FULL_F (match_dup 2) (match_dup 3)))] -+ "" -+ [(set_attr "movprfx" "*,*,*,*,yes,yes")] -+) -+ -+;; Predicated floating-point subtraction from a constant, merging with the -+;; second input. -+(define_insn_and_rewrite "*cond_sub_3_const" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w") -+ (unspec:SVE_FULL_F -+ [(match_operand: 1 "register_operand" "Upl, Upl") -+ (unspec:SVE_FULL_F -+ [(match_operand 4) -+ (match_operand:SI 5 "aarch64_sve_gp_strictness") -+ (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate") -+ (match_operand:SVE_FULL_F 3 "register_operand" "0, w")] -+ UNSPEC_COND_FSUB) -+ (match_dup 3)] -+ UNSPEC_SEL))] -+ "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" -+ "@ -+ fsubr\t%0., %1/m, %0., #%2 -+ movprfx\t%0, %3\;fsubr\t%0., %1/m, %0., #%2" -+ "&& !rtx_equal_p (operands[1], operands[4])" -+ { -+ operands[4] = copy_rtx (operands[1]); -+ } -+ [(set_attr "movprfx" "*,yes")] -+) -+ -+;; Predicated floating-point subtraction from a constant, merging with an -+;; independent value. -+(define_insn_and_rewrite "*cond_sub_any_const" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w") -+ (unspec:SVE_FULL_F -+ [(match_operand: 1 "register_operand" "Upl, Upl, Upl") -+ (unspec:SVE_FULL_F -+ [(match_operand 5) -+ (match_operand:SI 6 "aarch64_sve_gp_strictness") -+ (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate") -+ (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w")] -+ UNSPEC_COND_FSUB) -+ (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")] -+ UNSPEC_SEL))] -+ "TARGET_SVE -+ && !rtx_equal_p (operands[3], operands[4]) -+ && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" -+ "@ -+ movprfx\t%0., %1/z, %3.\;fsubr\t%0., %1/m, %0., #%2 -+ movprfx\t%0., %1/m, %3.\;fsubr\t%0., %1/m, %0., #%2 -+ #" -+ "&& 1" - { -- operands[2] = force_reg (mode, CONSTM1_RTX (mode)); -+ if (reload_completed -+ && register_operand (operands[4], mode) -+ && !rtx_equal_p (operands[0], operands[4])) -+ { -+ emit_insn (gen_vcond_mask_ (operands[0], operands[3], -+ operands[4], operands[1])); -+ operands[4] = operands[3] = operands[0]; -+ } -+ else if (!rtx_equal_p (operands[1], operands[5])) -+ operands[5] = copy_rtx (operands[1]); -+ else -+ FAIL; - } -+ [(set_attr "movprfx" "yes")] - ) - --;; Predicated ST[234]. --(define_insn "vec_mask_store_lanes" -- [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m") -- (unspec:SVE_STRUCT -- [(match_operand: 2 "register_operand" "Upl") -- (match_operand:SVE_STRUCT 1 "register_operand" "w") -- (match_dup 0)] -- UNSPEC_STN))] -+;; Register merging forms are handled through SVE_COND_FP_BINARY. -+ -+;; ------------------------------------------------------------------------- -+;; ---- [FP] Absolute difference -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - FABD -+;; ------------------------------------------------------------------------- -+ -+;; Predicated floating-point absolute difference. -+(define_expand "@aarch64_pred_abd" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand") -+ (unspec:SVE_FULL_F -+ [(match_operand: 1 "register_operand") -+ (match_operand:SI 4 "aarch64_sve_gp_strictness") -+ (unspec:SVE_FULL_F -+ [(match_dup 1) -+ (match_dup 4) -+ (match_operand:SVE_FULL_F 2 "register_operand") -+ (match_operand:SVE_FULL_F 3 "register_operand")] -+ UNSPEC_COND_FSUB)] -+ UNSPEC_COND_FABS))] - "TARGET_SVE" -- "st\t%1, %2, %0" - ) - --(define_expand "vec_perm" -- [(match_operand:SVE_ALL 0 "register_operand") -- (match_operand:SVE_ALL 1 "register_operand") -- (match_operand:SVE_ALL 2 "register_operand") -- (match_operand: 3 "aarch64_sve_vec_perm_operand")] -- "TARGET_SVE && GET_MODE_NUNITS (mode).is_constant ()" -+;; Predicated floating-point absolute difference. -+(define_insn_and_rewrite "*aarch64_pred_abd" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") -+ (unspec:SVE_FULL_F -+ [(match_operand: 1 "register_operand" "Upl, Upl") -+ (match_operand:SI 4 "aarch64_sve_gp_strictness") -+ (unspec:SVE_FULL_F -+ [(match_operand 5) -+ (match_operand:SI 6 "aarch64_sve_gp_strictness") -+ (match_operand:SVE_FULL_F 2 "register_operand" "%0, w") -+ (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] -+ UNSPEC_COND_FSUB)] -+ UNSPEC_COND_FABS))] -+ "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" -+ "@ -+ fabd\t%0., %1/m, %0., %3. -+ movprfx\t%0, %2\;fabd\t%0., %1/m, %0., %3." -+ "&& !rtx_equal_p (operands[1], operands[5])" - { -- aarch64_expand_sve_vec_perm (operands[0], operands[1], -- operands[2], operands[3]); -- DONE; -+ operands[5] = copy_rtx (operands[1]); - } -+ [(set_attr "movprfx" "*,yes")] - ) - --(define_insn "*aarch64_sve_tbl" -- [(set (match_operand:SVE_ALL 0 "register_operand" "=w") -- (unspec:SVE_ALL -- [(match_operand:SVE_ALL 1 "register_operand" "w") -- (match_operand: 2 "register_operand" "w")] -- UNSPEC_TBL))] -+(define_expand "@aarch64_cond_abd" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand") -+ (unspec:SVE_FULL_F -+ [(match_operand: 1 "register_operand") -+ (unspec:SVE_FULL_F -+ [(match_dup 1) -+ (const_int SVE_STRICT_GP) -+ (unspec:SVE_FULL_F -+ [(match_dup 1) -+ (const_int SVE_STRICT_GP) -+ (match_operand:SVE_FULL_F 2 "register_operand") -+ (match_operand:SVE_FULL_F 3 "register_operand")] -+ UNSPEC_COND_FSUB)] -+ UNSPEC_COND_FABS) -+ (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] -+ UNSPEC_SEL))] - "TARGET_SVE" -- "tbl\t%0., %1., %2." -+{ -+ if (rtx_equal_p (operands[3], operands[4])) -+ std::swap (operands[2], operands[3]); -+}) -+ -+;; Predicated floating-point absolute difference, merging with the first -+;; input. -+(define_insn_and_rewrite "*aarch64_cond_abd_2" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") -+ (unspec:SVE_FULL_F -+ [(match_operand: 1 "register_operand" "Upl, Upl") -+ (unspec:SVE_FULL_F -+ [(match_operand 4) -+ (match_operand:SI 5 "aarch64_sve_gp_strictness") -+ (unspec:SVE_FULL_F -+ [(match_operand 6) -+ (match_operand:SI 7 "aarch64_sve_gp_strictness") -+ (match_operand:SVE_FULL_F 2 "register_operand" "0, w") -+ (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] -+ UNSPEC_COND_FSUB)] -+ UNSPEC_COND_FABS) -+ (match_dup 2)] -+ UNSPEC_SEL))] -+ "TARGET_SVE -+ && aarch64_sve_pred_dominates_p (&operands[4], operands[1]) -+ && aarch64_sve_pred_dominates_p (&operands[6], operands[1])" -+ "@ -+ fabd\t%0., %1/m, %0., %3. -+ movprfx\t%0, %2\;fabd\t%0., %1/m, %0., %3." -+ "&& (!rtx_equal_p (operands[1], operands[4]) -+ || !rtx_equal_p (operands[1], operands[6]))" -+ { -+ operands[4] = copy_rtx (operands[1]); -+ operands[6] = copy_rtx (operands[1]); -+ } -+ [(set_attr "movprfx" "*,yes")] - ) - --(define_insn "*aarch64_sve_" -- [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") -- (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa") -- (match_operand:PRED_ALL 2 "register_operand" "Upa")] -- PERMUTE))] -- "TARGET_SVE" -- "\t%0., %1., %2." -+;; Predicated floating-point absolute difference, merging with the second -+;; input. -+(define_insn_and_rewrite "*aarch64_cond_abd_3" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") -+ (unspec:SVE_FULL_F -+ [(match_operand: 1 "register_operand" "Upl, Upl") -+ (unspec:SVE_FULL_F -+ [(match_operand 4) -+ (match_operand:SI 5 "aarch64_sve_gp_strictness") -+ (unspec:SVE_FULL_F -+ [(match_operand 6) -+ (match_operand:SI 7 "aarch64_sve_gp_strictness") -+ (match_operand:SVE_FULL_F 2 "register_operand" "w, w") -+ (match_operand:SVE_FULL_F 3 "register_operand" "0, w")] -+ UNSPEC_COND_FSUB)] -+ UNSPEC_COND_FABS) -+ (match_dup 3)] -+ UNSPEC_SEL))] -+ "TARGET_SVE -+ && aarch64_sve_pred_dominates_p (&operands[4], operands[1]) -+ && aarch64_sve_pred_dominates_p (&operands[6], operands[1])" -+ "@ -+ fabd\t%0., %1/m, %0., %2. -+ movprfx\t%0, %3\;fabd\t%0., %1/m, %0., %2." -+ "&& (!rtx_equal_p (operands[1], operands[4]) -+ || !rtx_equal_p (operands[1], operands[6]))" -+ { -+ operands[4] = copy_rtx (operands[1]); -+ operands[6] = copy_rtx (operands[1]); -+ } -+ [(set_attr "movprfx" "*,yes")] -+) -+ -+;; Predicated floating-point absolute difference, merging with an -+;; independent value. -+(define_insn_and_rewrite "*aarch64_cond_abd_any" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w") -+ (unspec:SVE_FULL_F -+ [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") -+ (unspec:SVE_FULL_F -+ [(match_operand 5) -+ (match_operand:SI 6 "aarch64_sve_gp_strictness") -+ (unspec:SVE_FULL_F -+ [(match_operand 7) -+ (match_operand:SI 8 "aarch64_sve_gp_strictness") -+ (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w") -+ (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")] -+ UNSPEC_COND_FSUB)] -+ UNSPEC_COND_FABS) -+ (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] -+ UNSPEC_SEL))] -+ "TARGET_SVE -+ && !rtx_equal_p (operands[2], operands[4]) -+ && !rtx_equal_p (operands[3], operands[4]) -+ && aarch64_sve_pred_dominates_p (&operands[5], operands[1]) -+ && aarch64_sve_pred_dominates_p (&operands[7], operands[1])" -+ "@ -+ movprfx\t%0., %1/z, %0.\;fabd\t%0., %1/m, %0., %3. -+ movprfx\t%0., %1/z, %0.\;fabd\t%0., %1/m, %0., %2. -+ movprfx\t%0., %1/z, %2.\;fabd\t%0., %1/m, %0., %3. -+ movprfx\t%0., %1/m, %2.\;fabd\t%0., %1/m, %0., %3. -+ #" -+ "&& 1" -+ { -+ if (reload_completed -+ && register_operand (operands[4], mode) -+ && !rtx_equal_p (operands[0], operands[4])) -+ { -+ emit_insn (gen_vcond_mask_ (operands[0], operands[3], -+ operands[4], operands[1])); -+ operands[4] = operands[3] = operands[0]; -+ } -+ else if (!rtx_equal_p (operands[1], operands[5]) -+ || !rtx_equal_p (operands[1], operands[7])) -+ { -+ operands[5] = copy_rtx (operands[1]); -+ operands[7] = copy_rtx (operands[1]); -+ } -+ else -+ FAIL; -+ } -+ [(set_attr "movprfx" "yes")] - ) - --(define_insn "aarch64_sve_" -- [(set (match_operand:SVE_ALL 0 "register_operand" "=w") -- (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w") -- (match_operand:SVE_ALL 2 "register_operand" "w")] -- PERMUTE))] -+;; ------------------------------------------------------------------------- -+;; ---- [FP] Multiplication -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - FMUL -+;; ------------------------------------------------------------------------- -+ -+;; Predicated floating-point multiplication. -+(define_insn_and_split "@aarch64_pred_" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, ?&w, ?&w") -+ (unspec:SVE_FULL_F -+ [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") -+ (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, Z, Ui1, i, Ui1") -+ (match_operand:SVE_FULL_F 2 "register_operand" "%0, w, 0, w, w") -+ (match_operand:SVE_FULL_F 3 "aarch64_sve_float_mul_operand" "vsM, w, w, vsM, w")] -+ SVE_COND_FP_MUL))] - "TARGET_SVE" -- "\t%0., %1., %2." -+ "@ -+ fmul\t%0., %1/m, %0., #%3 -+ # -+ fmul\t%0., %1/m, %0., %3. -+ movprfx\t%0, %2\;fmul\t%0., %1/m, %0., #%3 -+ movprfx\t%0, %2\;fmul\t%0., %1/m, %0., %3." -+ ; Split the unpredicated form after reload, so that we don't have -+ ; the unnecessary PTRUE. -+ "&& reload_completed -+ && register_operand (operands[3], mode) -+ && INTVAL (operands[4]) == SVE_RELAXED_GP" -+ [(set (match_dup 0) (mult:SVE_FULL_F (match_dup 2) (match_dup 3)))] -+ "" -+ [(set_attr "movprfx" "*,*,*,yes,yes")] - ) - --(define_insn "*aarch64_sve_rev64" -- [(set (match_operand:SVE_BHS 0 "register_operand" "=w") -- (unspec:SVE_BHS -- [(match_operand:VNx2BI 1 "register_operand" "Upl") -- (unspec:SVE_BHS [(match_operand:SVE_BHS 2 "register_operand" "w")] -- UNSPEC_REV64)] -- UNSPEC_MERGE_PTRUE))] -+;; Merging forms are handled through SVE_COND_FP_BINARY and -+;; SVE_COND_FP_BINARY_I1. -+ -+;; Unpredicated multiplication by selected lanes. -+(define_insn "@aarch64_mul_lane_" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w") -+ (mult:SVE_FULL_F -+ (unspec:SVE_FULL_F -+ [(match_operand:SVE_FULL_F 2 "register_operand" "") -+ (match_operand:SI 3 "const_int_operand")] -+ UNSPEC_SVE_LANE_SELECT) -+ (match_operand:SVE_FULL_F 1 "register_operand" "w")))] - "TARGET_SVE" -- "rev\t%0.d, %1/m, %2.d" -+ "fmul\t%0., %1., %2.[%3]" - ) - --(define_insn "*aarch64_sve_rev32" -- [(set (match_operand:SVE_BH 0 "register_operand" "=w") -- (unspec:SVE_BH -- [(match_operand:VNx4BI 1 "register_operand" "Upl") -- (unspec:SVE_BH [(match_operand:SVE_BH 2 "register_operand" "w")] -- UNSPEC_REV32)] -- UNSPEC_MERGE_PTRUE))] -+;; ------------------------------------------------------------------------- -+;; ---- [FP] Binary logical operations -+;; ------------------------------------------------------------------------- -+;; Includes -+;; - AND -+;; - EOR -+;; - ORR -+;; ------------------------------------------------------------------------- -+ -+;; Binary logical operations on floating-point modes. We avoid subregs -+;; by providing this, but we need to use UNSPECs since rtx logical ops -+;; aren't defined for floating-point modes. -+(define_insn "*3" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w") -+ (unspec:SVE_FULL_F -+ [(match_operand:SVE_FULL_F 1 "register_operand" "w") -+ (match_operand:SVE_FULL_F 2 "register_operand" "w")] -+ LOGICALF))] - "TARGET_SVE" -- "rev\t%0.s, %1/m, %2.s" -+ "\t%0.d, %1.d, %2.d" - ) - --(define_insn "*aarch64_sve_rev16vnx16qi" -- [(set (match_operand:VNx16QI 0 "register_operand" "=w") -- (unspec:VNx16QI -- [(match_operand:VNx8BI 1 "register_operand" "Upl") -- (unspec:VNx16QI [(match_operand:VNx16QI 2 "register_operand" "w")] -- UNSPEC_REV16)] -- UNSPEC_MERGE_PTRUE))] -+;; ------------------------------------------------------------------------- -+;; ---- [FP] Sign copying -+;; ------------------------------------------------------------------------- -+;; The patterns in this section are synthetic. -+;; ------------------------------------------------------------------------- -+ -+(define_expand "copysign3" -+ [(match_operand:SVE_FULL_F 0 "register_operand") -+ (match_operand:SVE_FULL_F 1 "register_operand") -+ (match_operand:SVE_FULL_F 2 "register_operand")] - "TARGET_SVE" -- "revb\t%0.h, %1/m, %2.h" -+ { -+ rtx sign = gen_reg_rtx (mode); -+ rtx mant = gen_reg_rtx (mode); -+ rtx int_res = gen_reg_rtx (mode); -+ int bits = GET_MODE_UNIT_BITSIZE (mode) - 1; -+ -+ rtx arg1 = lowpart_subreg (mode, operands[1], mode); -+ rtx arg2 = lowpart_subreg (mode, operands[2], mode); -+ -+ emit_insn (gen_and3 -+ (sign, arg2, -+ aarch64_simd_gen_const_vector_dup (mode, -+ HOST_WIDE_INT_M1U -+ << bits))); -+ emit_insn (gen_and3 -+ (mant, arg1, -+ aarch64_simd_gen_const_vector_dup (mode, -+ ~(HOST_WIDE_INT_M1U -+ << bits)))); -+ emit_insn (gen_ior3 (int_res, sign, mant)); -+ emit_move_insn (operands[0], gen_lowpart (mode, int_res)); -+ DONE; -+ } - ) - --(define_insn "*aarch64_sve_rev" -- [(set (match_operand:SVE_ALL 0 "register_operand" "=w") -- (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w")] -- UNSPEC_REV))] -+(define_expand "xorsign3" -+ [(match_operand:SVE_FULL_F 0 "register_operand") -+ (match_operand:SVE_FULL_F 1 "register_operand") -+ (match_operand:SVE_FULL_F 2 "register_operand")] - "TARGET_SVE" -- "rev\t%0., %1.") -+ { -+ rtx sign = gen_reg_rtx (mode); -+ rtx int_res = gen_reg_rtx (mode); -+ int bits = GET_MODE_UNIT_BITSIZE (mode) - 1; - --(define_insn "*aarch64_sve_dup_lane" -- [(set (match_operand:SVE_ALL 0 "register_operand" "=w") -- (vec_duplicate:SVE_ALL -- (vec_select: -- (match_operand:SVE_ALL 1 "register_operand" "w") -- (parallel [(match_operand:SI 2 "const_int_operand")]))))] -- "TARGET_SVE -- && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (mode), 0, 63)" -- "dup\t%0., %1.[%2]" -+ rtx arg1 = lowpart_subreg (mode, operands[1], mode); -+ rtx arg2 = lowpart_subreg (mode, operands[2], mode); -+ -+ emit_insn (gen_and3 -+ (sign, arg2, -+ aarch64_simd_gen_const_vector_dup (mode, -+ HOST_WIDE_INT_M1U -+ << bits))); -+ emit_insn (gen_xor3 (int_res, arg1, sign)); -+ emit_move_insn (operands[0], gen_lowpart (mode, int_res)); -+ DONE; -+ } - ) - --;; Note that the immediate (third) operand is the lane index not --;; the byte index. --(define_insn "*aarch64_sve_ext" -- [(set (match_operand:SVE_ALL 0 "register_operand" "=w") -- (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "0") -- (match_operand:SVE_ALL 2 "register_operand" "w") -- (match_operand:SI 3 "const_int_operand")] -- UNSPEC_EXT))] -- "TARGET_SVE -- && IN_RANGE (INTVAL (operands[3]) * GET_MODE_SIZE (mode), 0, 255)" -+;; ------------------------------------------------------------------------- -+;; ---- [FP] Maximum and minimum -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - FMAX -+;; - FMAXNM -+;; - FMIN -+;; - FMINNM -+;; ------------------------------------------------------------------------- -+ -+;; Unpredicated fmax/fmin (the libm functions). The optabs for the -+;; smin/smax rtx codes are handled in the generic section above. -+(define_expand "3" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand") -+ (unspec:SVE_FULL_F -+ [(match_dup 3) -+ (const_int SVE_RELAXED_GP) -+ (match_operand:SVE_FULL_F 1 "register_operand") -+ (match_operand:SVE_FULL_F 2 "aarch64_sve_float_maxmin_operand")] -+ SVE_COND_FP_MAXMIN_PUBLIC))] -+ "TARGET_SVE" - { -- operands[3] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (mode)); -- return "ext\\t%0.b, %0.b, %2.b, #%3"; -+ operands[3] = aarch64_ptrue_reg (mode); - } - ) - --(define_insn "add3" -- [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, w") -- (plus:SVE_I -- (match_operand:SVE_I 1 "register_operand" "%0, 0, 0, w") -- (match_operand:SVE_I 2 "aarch64_sve_add_operand" "vsa, vsn, vsi, w")))] -+;; Predicated floating-point maximum/minimum. -+(define_insn "@aarch64_pred_" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?&w, ?&w") -+ (unspec:SVE_FULL_F -+ [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl") -+ (match_operand:SI 4 "aarch64_sve_gp_strictness") -+ (match_operand:SVE_FULL_F 2 "register_operand" "%0, 0, w, w") -+ (match_operand:SVE_FULL_F 3 "aarch64_sve_float_maxmin_operand" "vsB, w, vsB, w")] -+ SVE_COND_FP_MAXMIN))] - "TARGET_SVE" - "@ -- add\t%0., %0., #%D2 -- sub\t%0., %0., #%N2 -- * return aarch64_output_sve_inc_dec_immediate (\"%0.\", operands[2]); -- add\t%0., %1., %2." -+ \t%0., %1/m, %0., #%3 -+ \t%0., %1/m, %0., %3. -+ movprfx\t%0, %2\;\t%0., %1/m, %0., #%3 -+ movprfx\t%0, %2\;\t%0., %1/m, %0., %3." -+ [(set_attr "movprfx" "*,*,yes,yes")] - ) - --(define_insn "sub3" -- [(set (match_operand:SVE_I 0 "register_operand" "=w, w") -- (minus:SVE_I -- (match_operand:SVE_I 1 "aarch64_sve_arith_operand" "w, vsa") -- (match_operand:SVE_I 2 "register_operand" "w, 0")))] -+;; Merging forms are handled through SVE_COND_FP_BINARY and -+;; SVE_COND_FP_BINARY_I1. -+ -+;; ------------------------------------------------------------------------- -+;; ---- [PRED] Binary logical operations -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - AND -+;; - ANDS -+;; - EOR -+;; - EORS -+;; - ORR -+;; - ORRS -+;; ------------------------------------------------------------------------- -+ -+;; Predicate AND. We can reuse one of the inputs as the GP. -+;; Doubling the second operand is the preferred implementation -+;; of the MOV alias, so we use that instead of %1/z, %1, %2. -+(define_insn "and3" -+ [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") -+ (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand" "Upa") -+ (match_operand:PRED_ALL 2 "register_operand" "Upa")))] - "TARGET_SVE" -- "@ -- sub\t%0., %1., %2. -- subr\t%0., %0., #%D1" -+ "and\t%0.b, %1/z, %2.b, %2.b" - ) - --;; Unpredicated multiplication. --(define_expand "mul3" -- [(set (match_operand:SVE_I 0 "register_operand") -- (unspec:SVE_I -- [(match_dup 3) -- (mult:SVE_I -- (match_operand:SVE_I 1 "register_operand") -- (match_operand:SVE_I 2 "aarch64_sve_mul_operand"))] -- UNSPEC_MERGE_PTRUE))] -+;; Unpredicated predicate EOR and ORR. -+(define_expand "3" -+ [(set (match_operand:PRED_ALL 0 "register_operand") -+ (and:PRED_ALL -+ (LOGICAL_OR:PRED_ALL -+ (match_operand:PRED_ALL 1 "register_operand") -+ (match_operand:PRED_ALL 2 "register_operand")) -+ (match_dup 3)))] - "TARGET_SVE" - { -- operands[3] = force_reg (mode, CONSTM1_RTX (mode)); -+ operands[3] = aarch64_ptrue_reg (mode); - } - ) - --;; Multiplication predicated with a PTRUE. We don't actually need the --;; predicate for the first alternative, but using Upa or X isn't likely --;; to gain much and would make the instruction seem less uniform to the --;; register allocator. --(define_insn_and_split "*mul3" -- [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w") -- (unspec:SVE_I -- [(match_operand: 1 "register_operand" "Upl, Upl, Upl") -- (mult:SVE_I -- (match_operand:SVE_I 2 "register_operand" "%0, 0, w") -- (match_operand:SVE_I 3 "aarch64_sve_mul_operand" "vsm, w, w"))] -- UNSPEC_MERGE_PTRUE))] -+;; Predicated predicate AND, EOR and ORR. -+(define_insn "@aarch64_pred__z" -+ [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") -+ (and:PRED_ALL -+ (LOGICAL:PRED_ALL -+ (match_operand:PRED_ALL 2 "register_operand" "Upa") -+ (match_operand:PRED_ALL 3 "register_operand" "Upa")) -+ (match_operand:PRED_ALL 1 "register_operand" "Upa")))] - "TARGET_SVE" -- "@ -- # -- mul\t%0., %1/m, %0., %3. -- movprfx\t%0, %2\;mul\t%0., %1/m, %0., %3." -- ; Split the unpredicated form after reload, so that we don't have -- ; the unnecessary PTRUE. -- "&& reload_completed -- && !register_operand (operands[3], mode)" -- [(set (match_dup 0) (mult:SVE_I (match_dup 2) (match_dup 3)))] -- "" -- [(set_attr "movprfx" "*,*,yes")] --) -- --;; Unpredicated multiplications by a constant (post-RA only). --;; These are generated by splitting a predicated instruction whose --;; predicate is unused. --(define_insn "*post_ra_mul3" -- [(set (match_operand:SVE_I 0 "register_operand" "=w") -- (mult:SVE_I -- (match_operand:SVE_I 1 "register_operand" "0") -- (match_operand:SVE_I 2 "aarch64_sve_mul_immediate")))] -- "TARGET_SVE && reload_completed" -- "mul\t%0., %0., #%2" -+ "\t%0.b, %1/z, %2.b, %3.b" - ) - --(define_insn "*madd" -- [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w") -- (plus:SVE_I -- (unspec:SVE_I -- [(match_operand: 1 "register_operand" "Upl, Upl, Upl") -- (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w, w") -- (match_operand:SVE_I 3 "register_operand" "w, w, w"))] -- UNSPEC_MERGE_PTRUE) -- (match_operand:SVE_I 4 "register_operand" "w, 0, w")))] -+;; Perform a logical operation on operands 2 and 3, using operand 1 as -+;; the GP. Store the result in operand 0 and set the flags in the same -+;; way as for PTEST. -+(define_insn "*3_cc" -+ [(set (reg:CC_NZC CC_REGNUM) -+ (unspec:CC_NZC -+ [(match_operand:VNx16BI 1 "register_operand" "Upa") -+ (match_operand 4) -+ (match_operand:SI 5 "aarch64_sve_ptrue_flag") -+ (and:PRED_ALL -+ (LOGICAL:PRED_ALL -+ (match_operand:PRED_ALL 2 "register_operand" "Upa") -+ (match_operand:PRED_ALL 3 "register_operand" "Upa")) -+ (match_dup 4))] -+ UNSPEC_PTEST)) -+ (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") -+ (and:PRED_ALL (LOGICAL:PRED_ALL (match_dup 2) (match_dup 3)) -+ (match_dup 4)))] - "TARGET_SVE" -- "@ -- mad\t%0., %1/m, %3., %4. -- mla\t%0., %1/m, %2., %3. -- movprfx\t%0, %4\;mla\t%0., %1/m, %2., %3." -- [(set_attr "movprfx" "*,*,yes")] -+ "s\t%0.b, %1/z, %2.b, %3.b" - ) - --(define_insn "*msub3" -- [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w") -- (minus:SVE_I -- (match_operand:SVE_I 4 "register_operand" "w, 0, w") -- (unspec:SVE_I -- [(match_operand: 1 "register_operand" "Upl, Upl, Upl") -- (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w, w") -- (match_operand:SVE_I 3 "register_operand" "w, w, w"))] -- UNSPEC_MERGE_PTRUE)))] -+;; Same with just the flags result. -+(define_insn "*3_ptest" -+ [(set (reg:CC_NZC CC_REGNUM) -+ (unspec:CC_NZC -+ [(match_operand:VNx16BI 1 "register_operand" "Upa") -+ (match_operand 4) -+ (match_operand:SI 5 "aarch64_sve_ptrue_flag") -+ (and:PRED_ALL -+ (LOGICAL:PRED_ALL -+ (match_operand:PRED_ALL 2 "register_operand" "Upa") -+ (match_operand:PRED_ALL 3 "register_operand" "Upa")) -+ (match_dup 4))] -+ UNSPEC_PTEST)) -+ (clobber (match_scratch:VNx16BI 0 "=Upa"))] - "TARGET_SVE" -- "@ -- msb\t%0., %1/m, %3., %4. -- mls\t%0., %1/m, %2., %3. -- movprfx\t%0, %4\;mls\t%0., %1/m, %2., %3." -- [(set_attr "movprfx" "*,*,yes")] -+ "s\t%0.b, %1/z, %2.b, %3.b" - ) - --;; Unpredicated highpart multiplication. --(define_expand "mul3_highpart" -- [(set (match_operand:SVE_I 0 "register_operand") -- (unspec:SVE_I -- [(match_dup 3) -- (unspec:SVE_I [(match_operand:SVE_I 1 "register_operand") -- (match_operand:SVE_I 2 "register_operand")] -- MUL_HIGHPART)] -- UNSPEC_MERGE_PTRUE))] -+;; ------------------------------------------------------------------------- -+;; ---- [PRED] Binary logical operations (inverted second input) -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - BIC -+;; - ORN -+;; ------------------------------------------------------------------------- -+ -+;; Predicated predicate BIC and ORN. -+(define_insn "aarch64_pred__z" -+ [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") -+ (and:PRED_ALL -+ (NLOGICAL:PRED_ALL -+ (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa")) -+ (match_operand:PRED_ALL 2 "register_operand" "Upa")) -+ (match_operand:PRED_ALL 1 "register_operand" "Upa")))] - "TARGET_SVE" -- { -- operands[3] = force_reg (mode, CONSTM1_RTX (mode)); -- } --) -+ "\t%0.b, %1/z, %2.b, %3.b" -+) -+ -+;; Same, but set the flags as a side-effect. -+(define_insn "*3_cc" -+ [(set (reg:CC_NZC CC_REGNUM) -+ (unspec:CC_NZC -+ [(match_operand:VNx16BI 1 "register_operand" "Upa") -+ (match_operand 4) -+ (match_operand:SI 5 "aarch64_sve_ptrue_flag") -+ (and:PRED_ALL -+ (NLOGICAL:PRED_ALL -+ (not:PRED_ALL -+ (match_operand:PRED_ALL 3 "register_operand" "Upa")) -+ (match_operand:PRED_ALL 2 "register_operand" "Upa")) -+ (match_dup 4))] -+ UNSPEC_PTEST)) -+ (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") -+ (and:PRED_ALL (NLOGICAL:PRED_ALL -+ (not:PRED_ALL (match_dup 3)) -+ (match_dup 2)) -+ (match_dup 4)))] -+ "TARGET_SVE" -+ "s\t%0.b, %1/z, %2.b, %3.b" -+) -+ -+;; Same with just the flags result. -+(define_insn "*3_ptest" -+ [(set (reg:CC_NZC CC_REGNUM) -+ (unspec:CC_NZC -+ [(match_operand:VNx16BI 1 "register_operand" "Upa") -+ (match_operand 4) -+ (match_operand:SI 5 "aarch64_sve_ptrue_flag") -+ (and:PRED_ALL -+ (NLOGICAL:PRED_ALL -+ (not:PRED_ALL -+ (match_operand:PRED_ALL 3 "register_operand" "Upa")) -+ (match_operand:PRED_ALL 2 "register_operand" "Upa")) -+ (match_dup 4))] -+ UNSPEC_PTEST)) -+ (clobber (match_scratch:VNx16BI 0 "=Upa"))] -+ "TARGET_SVE" -+ "s\t%0.b, %1/z, %2.b, %3.b" -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- [PRED] Binary logical operations (inverted result) -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - NAND -+;; - NOR -+;; ------------------------------------------------------------------------- - --;; Predicated highpart multiplication. --(define_insn "*mul3_highpart" -- [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") -- (unspec:SVE_I -- [(match_operand: 1 "register_operand" "Upl, Upl") -- (unspec:SVE_I [(match_operand:SVE_I 2 "register_operand" "%0, w") -- (match_operand:SVE_I 3 "register_operand" "w, w")] -- MUL_HIGHPART)] -- UNSPEC_MERGE_PTRUE))] -+;; Predicated predicate NAND and NOR. -+(define_insn "aarch64_pred__z" -+ [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") -+ (and:PRED_ALL -+ (NLOGICAL:PRED_ALL -+ (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa")) -+ (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa"))) -+ (match_operand:PRED_ALL 1 "register_operand" "Upa")))] - "TARGET_SVE" -- "@ -- mulh\t%0., %1/m, %0., %3. -- movprfx\t%0, %2\;mulh\t%0., %1/m, %0., %3." -- [(set_attr "movprfx" "*,yes")] -+ "\t%0.b, %1/z, %2.b, %3.b" - ) - --;; Unpredicated division. --(define_expand "3" -- [(set (match_operand:SVE_SDI 0 "register_operand") -- (unspec:SVE_SDI -- [(match_dup 3) -- (SVE_INT_BINARY_SD:SVE_SDI -- (match_operand:SVE_SDI 1 "register_operand") -- (match_operand:SVE_SDI 2 "register_operand"))] -- UNSPEC_MERGE_PTRUE))] -+;; Same, but set the flags as a side-effect. -+(define_insn "*3_cc" -+ [(set (reg:CC_NZC CC_REGNUM) -+ (unspec:CC_NZC -+ [(match_operand:VNx16BI 1 "register_operand" "Upa") -+ (match_operand 4) -+ (match_operand:SI 5 "aarch64_sve_ptrue_flag") -+ (and:PRED_ALL -+ (NLOGICAL:PRED_ALL -+ (not:PRED_ALL -+ (match_operand:PRED_ALL 2 "register_operand" "Upa")) -+ (not:PRED_ALL -+ (match_operand:PRED_ALL 3 "register_operand" "Upa"))) -+ (match_dup 4))] -+ UNSPEC_PTEST)) -+ (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") -+ (and:PRED_ALL (NLOGICAL:PRED_ALL -+ (not:PRED_ALL (match_dup 2)) -+ (not:PRED_ALL (match_dup 3))) -+ (match_dup 4)))] -+ "TARGET_SVE" -+ "s\t%0.b, %1/z, %2.b, %3.b" -+) -+ -+;; Same with just the flags result. -+(define_insn "*3_ptest" -+ [(set (reg:CC_NZC CC_REGNUM) -+ (unspec:CC_NZC -+ [(match_operand:VNx16BI 1 "register_operand" "Upa") -+ (match_operand 4) -+ (match_operand:SI 5 "aarch64_sve_ptrue_flag") -+ (and:PRED_ALL -+ (NLOGICAL:PRED_ALL -+ (not:PRED_ALL -+ (match_operand:PRED_ALL 2 "register_operand" "Upa")) -+ (not:PRED_ALL -+ (match_operand:PRED_ALL 3 "register_operand" "Upa"))) -+ (match_dup 4))] -+ UNSPEC_PTEST)) -+ (clobber (match_scratch:VNx16BI 0 "=Upa"))] -+ "TARGET_SVE" -+ "s\t%0.b, %1/z, %2.b, %3.b" -+) -+ -+;; ========================================================================= -+;; == Ternary arithmetic -+;; ========================================================================= -+ -+;; ------------------------------------------------------------------------- -+;; ---- [INT] MLA and MAD -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - MAD -+;; - MLA -+;; ------------------------------------------------------------------------- -+ -+;; Unpredicated integer addition of product. -+(define_expand "fma4" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand") -+ (plus:SVE_FULL_I -+ (unspec:SVE_FULL_I -+ [(match_dup 4) -+ (mult:SVE_FULL_I -+ (match_operand:SVE_FULL_I 1 "register_operand") -+ (match_operand:SVE_FULL_I 2 "nonmemory_operand"))] -+ UNSPEC_PRED_X) -+ (match_operand:SVE_FULL_I 3 "register_operand")))] - "TARGET_SVE" - { -- operands[3] = force_reg (mode, CONSTM1_RTX (mode)); -+ if (aarch64_prepare_sve_int_fma (operands, PLUS)) -+ DONE; -+ operands[4] = aarch64_ptrue_reg (mode); - } - ) - --;; Division predicated with a PTRUE. --(define_insn "*3" -- [(set (match_operand:SVE_SDI 0 "register_operand" "=w, w, ?&w") -- (unspec:SVE_SDI -- [(match_operand: 1 "register_operand" "Upl, Upl, Upl") -- (SVE_INT_BINARY_SD:SVE_SDI -- (match_operand:SVE_SDI 2 "register_operand" "0, w, w") -- (match_operand:SVE_SDI 3 "aarch64_sve_mul_operand" "w, 0, w"))] -- UNSPEC_MERGE_PTRUE))] -+;; Predicated integer addition of product. -+(define_insn "@aarch64_pred_fma" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w") -+ (plus:SVE_FULL_I -+ (unspec:SVE_FULL_I -+ [(match_operand: 1 "register_operand" "Upl, Upl, Upl") -+ (mult:SVE_FULL_I -+ (match_operand:SVE_FULL_I 2 "register_operand" "%0, w, w") -+ (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w"))] -+ UNSPEC_PRED_X) -+ (match_operand:SVE_FULL_I 4 "register_operand" "w, 0, w")))] - "TARGET_SVE" - "@ -- \t%0., %1/m, %0., %3. -- r\t%0., %1/m, %0., %2. -- movprfx\t%0, %2\;\t%0., %1/m, %0., %3." -+ mad\t%0., %1/m, %3., %4. -+ mla\t%0., %1/m, %2., %3. -+ movprfx\t%0, %4\;mla\t%0., %1/m, %2., %3." - [(set_attr "movprfx" "*,*,yes")] - ) - --;; Unpredicated NEG, NOT and POPCOUNT. --(define_expand "2" -- [(set (match_operand:SVE_I 0 "register_operand") -- (unspec:SVE_I -- [(match_dup 2) -- (SVE_INT_UNARY:SVE_I (match_operand:SVE_I 1 "register_operand"))] -- UNSPEC_MERGE_PTRUE))] -+;; Predicated integer addition of product with merging. -+(define_expand "cond_fma" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand") -+ (unspec:SVE_FULL_I -+ [(match_operand: 1 "register_operand") -+ (plus:SVE_FULL_I -+ (mult:SVE_FULL_I -+ (match_operand:SVE_FULL_I 2 "register_operand") -+ (match_operand:SVE_FULL_I 3 "general_operand")) -+ (match_operand:SVE_FULL_I 4 "register_operand")) -+ (match_operand:SVE_FULL_I 5 "aarch64_simd_reg_or_zero")] -+ UNSPEC_SEL))] - "TARGET_SVE" - { -- operands[2] = force_reg (mode, CONSTM1_RTX (mode)); -+ if (aarch64_prepare_sve_cond_int_fma (operands, PLUS)) -+ DONE; -+ /* Swap the multiplication operands if the fallback value is the -+ second of the two. */ -+ if (rtx_equal_p (operands[3], operands[5])) -+ std::swap (operands[2], operands[3]); - } - ) - --;; NEG, NOT and POPCOUNT predicated with a PTRUE. --(define_insn "*2" -- [(set (match_operand:SVE_I 0 "register_operand" "=w") -- (unspec:SVE_I -- [(match_operand: 1 "register_operand" "Upl") -- (SVE_INT_UNARY:SVE_I -- (match_operand:SVE_I 2 "register_operand" "w"))] -- UNSPEC_MERGE_PTRUE))] -+;; Predicated integer addition of product, merging with the first input. -+(define_insn "*cond_fma_2" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") -+ (unspec:SVE_FULL_I -+ [(match_operand: 1 "register_operand" "Upl, Upl") -+ (plus:SVE_FULL_I -+ (mult:SVE_FULL_I -+ (match_operand:SVE_FULL_I 2 "register_operand" "0, w") -+ (match_operand:SVE_FULL_I 3 "register_operand" "w, w")) -+ (match_operand:SVE_FULL_I 4 "register_operand" "w, w")) -+ (match_dup 2)] -+ UNSPEC_SEL))] - "TARGET_SVE" -- "\t%0., %1/m, %2." -+ "@ -+ mad\t%0., %1/m, %3., %4. -+ movprfx\t%0, %2\;mad\t%0., %1/m, %3., %4." -+ [(set_attr "movprfx" "*,yes")] - ) - --;; Vector AND, ORR and XOR. --(define_insn "3" -- [(set (match_operand:SVE_I 0 "register_operand" "=w, w") -- (LOGICAL:SVE_I -- (match_operand:SVE_I 1 "register_operand" "%0, w") -- (match_operand:SVE_I 2 "aarch64_sve_logical_operand" "vsl, w")))] -+;; Predicated integer addition of product, merging with the third input. -+(define_insn "*cond_fma_4" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") -+ (unspec:SVE_FULL_I -+ [(match_operand: 1 "register_operand" "Upl, Upl") -+ (plus:SVE_FULL_I -+ (mult:SVE_FULL_I -+ (match_operand:SVE_FULL_I 2 "register_operand" "w, w") -+ (match_operand:SVE_FULL_I 3 "register_operand" "w, w")) -+ (match_operand:SVE_FULL_I 4 "register_operand" "0, w")) -+ (match_dup 4)] -+ UNSPEC_SEL))] - "TARGET_SVE" - "@ -- \t%0., %0., #%C2 -- \t%0.d, %1.d, %2.d" -+ mla\t%0., %1/m, %2., %3. -+ movprfx\t%0, %4\;mla\t%0., %1/m, %2., %3." -+ [(set_attr "movprfx" "*,yes")] - ) - --;; Vector AND, ORR and XOR on floating-point modes. We avoid subregs --;; by providing this, but we need to use UNSPECs since rtx logical ops --;; aren't defined for floating-point modes. --(define_insn "*3" -- [(set (match_operand:SVE_F 0 "register_operand" "=w") -- (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand" "w") -- (match_operand:SVE_F 2 "register_operand" "w")] -- LOGICALF))] -- "TARGET_SVE" -- "\t%0.d, %1.d, %2.d" -+;; Predicated integer addition of product, merging with an independent value. -+(define_insn_and_rewrite "*cond_fma_any" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w") -+ (unspec:SVE_FULL_I -+ [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") -+ (plus:SVE_FULL_I -+ (mult:SVE_FULL_I -+ (match_operand:SVE_FULL_I 2 "register_operand" "w, w, 0, w, w, w") -+ (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w, 0, w, w")) -+ (match_operand:SVE_FULL_I 4 "register_operand" "w, 0, w, w, w, w")) -+ (match_operand:SVE_FULL_I 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")] -+ UNSPEC_SEL))] -+ "TARGET_SVE -+ && !rtx_equal_p (operands[2], operands[5]) -+ && !rtx_equal_p (operands[3], operands[5]) -+ && !rtx_equal_p (operands[4], operands[5])" -+ "@ -+ movprfx\t%0., %1/z, %4.\;mla\t%0., %1/m, %2., %3. -+ movprfx\t%0., %1/z, %0.\;mla\t%0., %1/m, %2., %3. -+ movprfx\t%0., %1/z, %0.\;mad\t%0., %1/m, %3., %4. -+ movprfx\t%0., %1/z, %0.\;mad\t%0., %1/m, %2., %4. -+ movprfx\t%0., %1/m, %4.\;mla\t%0., %1/m, %2., %3. -+ #" -+ "&& reload_completed -+ && register_operand (operands[5], mode) -+ && !rtx_equal_p (operands[0], operands[5])" -+ { -+ emit_insn (gen_vcond_mask_ (operands[0], operands[4], -+ operands[5], operands[1])); -+ operands[5] = operands[4] = operands[0]; -+ } -+ [(set_attr "movprfx" "yes")] - ) - --;; REG_EQUAL notes on "not3" should ensure that we can generate --;; this pattern even though the NOT instruction itself is predicated. --(define_insn "bic3" -- [(set (match_operand:SVE_I 0 "register_operand" "=w") -- (and:SVE_I -- (not:SVE_I (match_operand:SVE_I 1 "register_operand" "w")) -- (match_operand:SVE_I 2 "register_operand" "w")))] -+;; ------------------------------------------------------------------------- -+;; ---- [INT] MLS and MSB -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - MLS -+;; - MSB -+;; ------------------------------------------------------------------------- -+ -+;; Unpredicated integer subtraction of product. -+(define_expand "fnma4" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand") -+ (minus:SVE_FULL_I -+ (match_operand:SVE_FULL_I 3 "register_operand") -+ (unspec:SVE_FULL_I -+ [(match_dup 4) -+ (mult:SVE_FULL_I -+ (match_operand:SVE_FULL_I 1 "register_operand") -+ (match_operand:SVE_FULL_I 2 "general_operand"))] -+ UNSPEC_PRED_X)))] - "TARGET_SVE" -- "bic\t%0.d, %2.d, %1.d" -+ { -+ if (aarch64_prepare_sve_int_fma (operands, MINUS)) -+ DONE; -+ operands[4] = aarch64_ptrue_reg (mode); -+ } - ) - --;; Predicate AND. We can reuse one of the inputs as the GP. --(define_insn "and3" -- [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") -- (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand" "Upa") -- (match_operand:PRED_ALL 2 "register_operand" "Upa")))] -+;; Predicated integer subtraction of product. -+(define_insn "@aarch64_pred_fnma" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w") -+ (minus:SVE_FULL_I -+ (match_operand:SVE_FULL_I 4 "register_operand" "w, 0, w") -+ (unspec:SVE_FULL_I -+ [(match_operand: 1 "register_operand" "Upl, Upl, Upl") -+ (mult:SVE_FULL_I -+ (match_operand:SVE_FULL_I 2 "register_operand" "%0, w, w") -+ (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w"))] -+ UNSPEC_PRED_X)))] - "TARGET_SVE" -- "and\t%0.b, %1/z, %1.b, %2.b" -+ "@ -+ msb\t%0., %1/m, %3., %4. -+ mls\t%0., %1/m, %2., %3. -+ movprfx\t%0, %4\;mls\t%0., %1/m, %2., %3." -+ [(set_attr "movprfx" "*,*,yes")] - ) - --;; Unpredicated predicate ORR and XOR. --(define_expand "3" -- [(set (match_operand:PRED_ALL 0 "register_operand") -- (and:PRED_ALL -- (LOGICAL_OR:PRED_ALL -- (match_operand:PRED_ALL 1 "register_operand") -- (match_operand:PRED_ALL 2 "register_operand")) -- (match_dup 3)))] -+;; Predicated integer subtraction of product with merging. -+(define_expand "cond_fnma" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand") -+ (unspec:SVE_FULL_I -+ [(match_operand: 1 "register_operand") -+ (minus:SVE_FULL_I -+ (match_operand:SVE_FULL_I 4 "register_operand") -+ (mult:SVE_FULL_I -+ (match_operand:SVE_FULL_I 2 "register_operand") -+ (match_operand:SVE_FULL_I 3 "general_operand"))) -+ (match_operand:SVE_FULL_I 5 "aarch64_simd_reg_or_zero")] -+ UNSPEC_SEL))] - "TARGET_SVE" - { -- operands[3] = force_reg (mode, CONSTM1_RTX (mode)); -+ if (aarch64_prepare_sve_cond_int_fma (operands, MINUS)) -+ DONE; -+ /* Swap the multiplication operands if the fallback value is the -+ second of the two. */ -+ if (rtx_equal_p (operands[3], operands[5])) -+ std::swap (operands[2], operands[3]); - } - ) - --;; Predicated predicate ORR and XOR. --(define_insn "pred_3" -- [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") -- (and:PRED_ALL -- (LOGICAL:PRED_ALL -- (match_operand:PRED_ALL 2 "register_operand" "Upa") -- (match_operand:PRED_ALL 3 "register_operand" "Upa")) -- (match_operand:PRED_ALL 1 "register_operand" "Upa")))] -+;; Predicated integer subtraction of product, merging with the first input. -+(define_insn "*cond_fnma_2" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") -+ (unspec:SVE_FULL_I -+ [(match_operand: 1 "register_operand" "Upl, Upl") -+ (minus:SVE_FULL_I -+ (match_operand:SVE_FULL_I 4 "register_operand" "w, w") -+ (mult:SVE_FULL_I -+ (match_operand:SVE_FULL_I 2 "register_operand" "0, w") -+ (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))) -+ (match_dup 2)] -+ UNSPEC_SEL))] - "TARGET_SVE" -- "\t%0.b, %1/z, %2.b, %3.b" -+ "@ -+ msb\t%0., %1/m, %3., %4. -+ movprfx\t%0, %2\;msb\t%0., %1/m, %3., %4." -+ [(set_attr "movprfx" "*,yes")] - ) - --;; Perform a logical operation on operands 2 and 3, using operand 1 as --;; the GP (which is known to be a PTRUE). Store the result in operand 0 --;; and set the flags in the same way as for PTEST. The (and ...) in the --;; UNSPEC_PTEST_PTRUE is logically redundant, but means that the tested --;; value is structurally equivalent to rhs of the second set. --(define_insn "*3_cc" -- [(set (reg:CC CC_REGNUM) -- (compare:CC -- (unspec:SI [(match_operand:PRED_ALL 1 "register_operand" "Upa") -- (and:PRED_ALL -- (LOGICAL:PRED_ALL -- (match_operand:PRED_ALL 2 "register_operand" "Upa") -- (match_operand:PRED_ALL 3 "register_operand" "Upa")) -- (match_dup 1))] -- UNSPEC_PTEST_PTRUE) -- (const_int 0))) -- (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") -- (and:PRED_ALL (LOGICAL:PRED_ALL (match_dup 2) (match_dup 3)) -- (match_dup 1)))] -+;; Predicated integer subtraction of product, merging with the third input. -+(define_insn "*cond_fnma_4" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") -+ (unspec:SVE_FULL_I -+ [(match_operand: 1 "register_operand" "Upl, Upl") -+ (minus:SVE_FULL_I -+ (match_operand:SVE_FULL_I 4 "register_operand" "0, w") -+ (mult:SVE_FULL_I -+ (match_operand:SVE_FULL_I 2 "register_operand" "w, w") -+ (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))) -+ (match_dup 4)] -+ UNSPEC_SEL))] - "TARGET_SVE" -- "s\t%0.b, %1/z, %2.b, %3.b" -+ "@ -+ mls\t%0., %1/m, %2., %3. -+ movprfx\t%0, %4\;mls\t%0., %1/m, %2., %3." -+ [(set_attr "movprfx" "*,yes")] - ) - --;; Unpredicated predicate inverse. --(define_expand "one_cmpl2" -- [(set (match_operand:PRED_ALL 0 "register_operand") -- (and:PRED_ALL -- (not:PRED_ALL (match_operand:PRED_ALL 1 "register_operand")) -- (match_dup 2)))] -- "TARGET_SVE" -+;; Predicated integer subtraction of product, merging with an -+;; independent value. -+(define_insn_and_rewrite "*cond_fnma_any" -+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w") -+ (unspec:SVE_FULL_I -+ [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") -+ (minus:SVE_FULL_I -+ (match_operand:SVE_FULL_I 4 "register_operand" "w, 0, w, w, w, w") -+ (mult:SVE_FULL_I -+ (match_operand:SVE_FULL_I 2 "register_operand" "w, w, 0, w, w, w") -+ (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w, 0, w, w"))) -+ (match_operand:SVE_FULL_I 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")] -+ UNSPEC_SEL))] -+ "TARGET_SVE -+ && !rtx_equal_p (operands[2], operands[5]) -+ && !rtx_equal_p (operands[3], operands[5]) -+ && !rtx_equal_p (operands[4], operands[5])" -+ "@ -+ movprfx\t%0., %1/z, %4.\;mls\t%0., %1/m, %2., %3. -+ movprfx\t%0., %1/z, %0.\;mls\t%0., %1/m, %2., %3. -+ movprfx\t%0., %1/z, %0.\;msb\t%0., %1/m, %3., %4. -+ movprfx\t%0., %1/z, %0.\;msb\t%0., %1/m, %2., %4. -+ movprfx\t%0., %1/m, %4.\;mls\t%0., %1/m, %2., %3. -+ #" -+ "&& reload_completed -+ && register_operand (operands[5], mode) -+ && !rtx_equal_p (operands[0], operands[5])" - { -- operands[2] = force_reg (mode, CONSTM1_RTX (mode)); -+ emit_insn (gen_vcond_mask_ (operands[0], operands[4], -+ operands[5], operands[1])); -+ operands[5] = operands[4] = operands[0]; - } -+ [(set_attr "movprfx" "yes")] - ) - --;; Predicated predicate inverse. --(define_insn "*one_cmpl3" -- [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") -- (and:PRED_ALL -- (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa")) -- (match_operand:PRED_ALL 1 "register_operand" "Upa")))] -- "TARGET_SVE" -- "not\t%0.b, %1/z, %2.b" --) -- --;; Predicated predicate BIC and ORN. --(define_insn "*3" -- [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") -- (and:PRED_ALL -- (NLOGICAL:PRED_ALL -- (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa")) -- (match_operand:PRED_ALL 3 "register_operand" "Upa")) -- (match_operand:PRED_ALL 1 "register_operand" "Upa")))] -- "TARGET_SVE" -- "\t%0.b, %1/z, %3.b, %2.b" --) -+;; ------------------------------------------------------------------------- -+;; ---- [INT] Dot product -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - SDOT -+;; - SUDOT (I8MM) -+;; - UDOT -+;; - USDOT (I8MM) -+;; ------------------------------------------------------------------------- - --;; Predicated predicate NAND and NOR. --(define_insn "*3" -- [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") -- (and:PRED_ALL -- (NLOGICAL:PRED_ALL -- (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa")) -- (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa"))) -- (match_operand:PRED_ALL 1 "register_operand" "Upa")))] -+;; Four-element integer dot-product with accumulation. -+(define_insn "dot_prod" -+ [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w") -+ (plus:SVE_FULL_SDI -+ (unspec:SVE_FULL_SDI -+ [(match_operand: 1 "register_operand" "w, w") -+ (match_operand: 2 "register_operand" "w, w")] -+ DOTPROD) -+ (match_operand:SVE_FULL_SDI 3 "register_operand" "0, w")))] - "TARGET_SVE" -- "\t%0.b, %1/z, %2.b, %3.b" -+ "@ -+ dot\\t%0., %1., %2. -+ movprfx\t%0, %3\;dot\\t%0., %1., %2." -+ [(set_attr "movprfx" "*,yes")] - ) - --;; Unpredicated LSL, LSR and ASR by a vector. --(define_expand "v3" -- [(set (match_operand:SVE_I 0 "register_operand") -- (unspec:SVE_I -- [(match_dup 3) -- (ASHIFT:SVE_I -- (match_operand:SVE_I 1 "register_operand") -- (match_operand:SVE_I 2 "aarch64_sve_shift_operand"))] -- UNSPEC_MERGE_PTRUE))] -+;; Four-element integer dot-product by selected lanes with accumulation. -+(define_insn "@aarch64_dot_prod_lane" -+ [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w") -+ (plus:SVE_FULL_SDI -+ (unspec:SVE_FULL_SDI -+ [(match_operand: 1 "register_operand" "w, w") -+ (unspec: -+ [(match_operand: 2 "register_operand" ", ") -+ (match_operand:SI 3 "const_int_operand")] -+ UNSPEC_SVE_LANE_SELECT)] -+ DOTPROD) -+ (match_operand:SVE_FULL_SDI 4 "register_operand" "0, w")))] - "TARGET_SVE" -- { -- operands[3] = force_reg (mode, CONSTM1_RTX (mode)); -- } -+ "@ -+ dot\\t%0., %1., %2.[%3] -+ movprfx\t%0, %4\;dot\\t%0., %1., %2.[%3]" -+ [(set_attr "movprfx" "*,yes")] - ) - --;; LSL, LSR and ASR by a vector, predicated with a PTRUE. We don't --;; actually need the predicate for the first alternative, but using Upa --;; or X isn't likely to gain much and would make the instruction seem --;; less uniform to the register allocator. --(define_insn_and_split "*v3" -- [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w") -- (unspec:SVE_I -- [(match_operand: 1 "register_operand" "Upl, Upl, Upl") -- (ASHIFT:SVE_I -- (match_operand:SVE_I 2 "register_operand" "w, 0, w") -- (match_operand:SVE_I 3 "aarch64_sve_shift_operand" "D, w, w"))] -- UNSPEC_MERGE_PTRUE))] -- "TARGET_SVE" -+(define_insn "@aarch64_dot_prod" -+ [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w") -+ (plus:VNx4SI_ONLY -+ (unspec:VNx4SI_ONLY -+ [(match_operand: 1 "register_operand" "w, w") -+ (match_operand: 2 "register_operand" "w, w")] -+ DOTPROD_US_ONLY) -+ (match_operand:VNx4SI_ONLY 3 "register_operand" "0, w")))] -+ "TARGET_SVE_I8MM" - "@ -- # -- \t%0., %1/m, %0., %3. -- movprfx\t%0, %2\;\t%0., %1/m, %0., %3." -- "&& reload_completed -- && !register_operand (operands[3], mode)" -- [(set (match_dup 0) (ASHIFT:SVE_I (match_dup 2) (match_dup 3)))] -- "" -- [(set_attr "movprfx" "*,*,yes")] -+ dot\\t%0.s, %1.b, %2.b -+ movprfx\t%0, %3\;dot\\t%0.s, %1.b, %2.b" -+ [(set_attr "movprfx" "*,yes")] - ) - --;; Unpredicated shift operations by a constant (post-RA only). --;; These are generated by splitting a predicated instruction whose --;; predicate is unused. --(define_insn "*post_ra_v3" -- [(set (match_operand:SVE_I 0 "register_operand" "=w") -- (ASHIFT:SVE_I -- (match_operand:SVE_I 1 "register_operand" "w") -- (match_operand:SVE_I 2 "aarch64_simd_shift_imm")))] -- "TARGET_SVE && reload_completed" -- "\t%0., %1., #%2" -+(define_insn "@aarch64_dot_prod_lane" -+ [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w") -+ (plus:VNx4SI_ONLY -+ (unspec:VNx4SI_ONLY -+ [(match_operand: 1 "register_operand" "w, w") -+ (unspec: -+ [(match_operand: 2 "register_operand" "y, y") -+ (match_operand:SI 3 "const_int_operand")] -+ UNSPEC_SVE_LANE_SELECT)] -+ DOTPROD_I8MM) -+ (match_operand:VNx4SI_ONLY 4 "register_operand" "0, w")))] -+ "TARGET_SVE_I8MM" -+ "@ -+ dot\\t%0.s, %1.b, %2.b[%3] -+ movprfx\t%0, %4\;dot\\t%0.s, %1.b, %2.b[%3]" -+ [(set_attr "movprfx" "*,yes")] - ) - --;; LSL, LSR and ASR by a scalar, which expands into one of the vector --;; shifts above. --(define_expand "3" -- [(set (match_operand:SVE_I 0 "register_operand") -- (ASHIFT:SVE_I (match_operand:SVE_I 1 "register_operand") -- (match_operand: 2 "general_operand")))] -+;; ------------------------------------------------------------------------- -+;; ---- [INT] Sum of absolute differences -+;; ------------------------------------------------------------------------- -+;; The patterns in this section are synthetic. -+;; ------------------------------------------------------------------------- -+ -+;; Emit a sequence to produce a sum-of-absolute-differences of the inputs in -+;; operands 1 and 2. The sequence also has to perform a widening reduction of -+;; the difference into a vector and accumulate that into operand 3 before -+;; copying that into the result operand 0. -+;; Perform that with a sequence of: -+;; MOV ones.b, #1 -+;; [SU]ABD diff.b, p0/m, op1.b, op2.b -+;; MOVPRFX op0, op3 // If necessary -+;; UDOT op0.s, diff.b, ones.b -+(define_expand "sad" -+ [(use (match_operand:SVE_FULL_SDI 0 "register_operand")) -+ (unspec: [(use (match_operand: 1 "register_operand")) -+ (use (match_operand: 2 "register_operand"))] ABAL) -+ (use (match_operand:SVE_FULL_SDI 3 "register_operand"))] - "TARGET_SVE" - { -- rtx amount; -- if (CONST_INT_P (operands[2])) -- { -- amount = gen_const_vec_duplicate (mode, operands[2]); -- if (!aarch64_sve_shift_operand (operands[2], mode)) -- amount = force_reg (mode, amount); -- } -- else -- { -- amount = gen_reg_rtx (mode); -- emit_insn (gen_vec_duplicate (amount, -- convert_to_mode (mode, -- operands[2], 0))); -- } -- emit_insn (gen_v3 (operands[0], operands[1], amount)); -+ rtx ones = force_reg (mode, CONST1_RTX (mode)); -+ rtx diff = gen_reg_rtx (mode); -+ emit_insn (gen_abd_3 (diff, operands[1], operands[2])); -+ emit_insn (gen_udot_prod (operands[0], diff, ones, operands[3])); - DONE; - } - ) - --;; Test all bits of operand 1. Operand 0 is a GP that is known to hold PTRUE. --;; --;; Using UNSPEC_PTEST_PTRUE allows combine patterns to assume that the GP --;; is a PTRUE even if the optimizers haven't yet been able to propagate --;; the constant. We would use a separate unspec code for PTESTs involving --;; GPs that might not be PTRUEs. --(define_insn "ptest_ptrue" -- [(set (reg:CC CC_REGNUM) -- (compare:CC -- (unspec:SI [(match_operand:PRED_ALL 0 "register_operand" "Upa") -- (match_operand:PRED_ALL 1 "register_operand" "Upa")] -- UNSPEC_PTEST_PTRUE) -- (const_int 0)))] -- "TARGET_SVE" -- "ptest\t%0, %1.b" -+;; ------------------------------------------------------------------------- -+;; ---- [INT] Matrix multiply-accumulate -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - SMMLA (I8MM) -+;; - UMMLA (I8MM) -+;; - USMMLA (I8MM) -+;; ------------------------------------------------------------------------- -+ -+(define_insn "@aarch64_sve_add_" -+ [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w") -+ (plus:VNx4SI_ONLY -+ (unspec:VNx4SI_ONLY -+ [(match_operand: 2 "register_operand" "w, w") -+ (match_operand: 3 "register_operand" "w, w")] -+ MATMUL) -+ (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))] -+ "TARGET_SVE_I8MM" -+ "@ -+ mmla\\t%0.s, %2.b, %3.b -+ movprfx\t%0, %1\;mmla\\t%0.s, %2.b, %3.b" -+ [(set_attr "movprfx" "*,yes")] - ) - --;; Set element I of the result if operand1 + J < operand2 for all J in [0, I]. --;; with the comparison being unsigned. --(define_insn "while_ult" -- [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") -- (unspec:PRED_ALL [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ") -- (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")] -- UNSPEC_WHILE_LO)) -- (clobber (reg:CC CC_REGNUM))] -- "TARGET_SVE" -- "whilelo\t%0., %1, %2" --) -- --;; WHILELO sets the flags in the same way as a PTEST with a PTRUE GP. --;; Handle the case in which both results are useful. The GP operand --;; to the PTEST isn't needed, so we allow it to be anything. --(define_insn_and_split "while_ult_cc" -- [(set (reg:CC CC_REGNUM) -- (compare:CC -- (unspec:SI [(match_operand:PRED_ALL 1) -- (unspec:PRED_ALL -- [(match_operand:GPI 2 "aarch64_reg_or_zero" "rZ") -- (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ")] -- UNSPEC_WHILE_LO)] -- UNSPEC_PTEST_PTRUE) -- (const_int 0))) -- (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") -- (unspec:PRED_ALL [(match_dup 2) -- (match_dup 3)] -- UNSPEC_WHILE_LO))] -+;; ------------------------------------------------------------------------- -+;; ---- [FP] General ternary arithmetic corresponding to unspecs -+;; ------------------------------------------------------------------------- -+;; Includes merging patterns for: -+;; - FMAD -+;; - FMLA -+;; - FMLS -+;; - FMSB -+;; - FNMAD -+;; - FNMLA -+;; - FNMLS -+;; - FNMSB -+;; ------------------------------------------------------------------------- -+ -+;; Unpredicated floating-point ternary operations. -+(define_expand "4" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand") -+ (unspec:SVE_FULL_F -+ [(match_dup 4) -+ (const_int SVE_RELAXED_GP) -+ (match_operand:SVE_FULL_F 1 "register_operand") -+ (match_operand:SVE_FULL_F 2 "register_operand") -+ (match_operand:SVE_FULL_F 3 "register_operand")] -+ SVE_COND_FP_TERNARY))] - "TARGET_SVE" -- "whilelo\t%0., %2, %3" -- ;; Force the compiler to drop the unused predicate operand, so that we -- ;; don't have an unnecessary PTRUE. -- "&& !CONSTANT_P (operands[1])" -- [(const_int 0)] - { -- emit_insn (gen_while_ult_cc -- (operands[0], CONSTM1_RTX (mode), -- operands[2], operands[3])); -- DONE; -+ operands[4] = aarch64_ptrue_reg (mode); - } - ) - --;; Integer comparisons predicated with a PTRUE. --(define_insn "*cmp" -- [(set (match_operand: 0 "register_operand" "=Upa, Upa") -- (unspec: -- [(match_operand: 1 "register_operand" "Upl, Upl") -- (SVE_INT_CMP: -- (match_operand:SVE_I 2 "register_operand" "w, w") -- (match_operand:SVE_I 3 "aarch64_sve_cmp__operand" ", w"))] -- UNSPEC_MERGE_PTRUE)) -- (clobber (reg:CC CC_REGNUM))] -+;; Predicated floating-point ternary operations. -+(define_insn "@aarch64_pred_" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?&w") -+ (unspec:SVE_FULL_F -+ [(match_operand: 1 "register_operand" "Upl, Upl, Upl") -+ (match_operand:SI 5 "aarch64_sve_gp_strictness") -+ (match_operand:SVE_FULL_F 2 "register_operand" "%w, 0, w") -+ (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w") -+ (match_operand:SVE_FULL_F 4 "register_operand" "0, w, w")] -+ SVE_COND_FP_TERNARY))] - "TARGET_SVE" - "@ -- cmp\t%0., %1/z, %2., #%3 -- cmp\t%0., %1/z, %2., %3." -+ \t%0., %1/m, %2., %3. -+ \t%0., %1/m, %3., %4. -+ movprfx\t%0, %4\;\t%0., %1/m, %2., %3." -+ [(set_attr "movprfx" "*,*,yes")] - ) - --;; Integer comparisons predicated with a PTRUE in which only the flags result --;; is interesting. --(define_insn "*cmp_ptest" -- [(set (reg:CC CC_REGNUM) -- (compare:CC -- (unspec:SI -- [(match_operand: 1 "register_operand" "Upl, Upl") -- (unspec: -- [(match_dup 1) -- (SVE_INT_CMP: -- (match_operand:SVE_I 2 "register_operand" "w, w") -- (match_operand:SVE_I 3 "aarch64_sve_cmp__operand" ", w"))] -- UNSPEC_MERGE_PTRUE)] -- UNSPEC_PTEST_PTRUE) -- (const_int 0))) -- (clobber (match_scratch: 0 "=Upa, Upa"))] -+;; Predicated floating-point ternary operations with merging. -+(define_expand "@cond_" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand") -+ (unspec:SVE_FULL_F -+ [(match_operand: 1 "register_operand") -+ (unspec:SVE_FULL_F -+ [(match_dup 1) -+ (const_int SVE_STRICT_GP) -+ (match_operand:SVE_FULL_F 2 "register_operand") -+ (match_operand:SVE_FULL_F 3 "register_operand") -+ (match_operand:SVE_FULL_F 4 "register_operand")] -+ SVE_COND_FP_TERNARY) -+ (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")] -+ UNSPEC_SEL))] - "TARGET_SVE" -+{ -+ /* Swap the multiplication operands if the fallback value is the -+ second of the two. */ -+ if (rtx_equal_p (operands[3], operands[5])) -+ std::swap (operands[2], operands[3]); -+}) -+ -+;; Predicated floating-point ternary operations, merging with the -+;; first input. -+(define_insn_and_rewrite "*cond__2" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") -+ (unspec:SVE_FULL_F -+ [(match_operand: 1 "register_operand" "Upl, Upl") -+ (unspec:SVE_FULL_F -+ [(match_operand 5) -+ (match_operand:SI 6 "aarch64_sve_gp_strictness") -+ (match_operand:SVE_FULL_F 2 "register_operand" "0, w") -+ (match_operand:SVE_FULL_F 3 "register_operand" "w, w") -+ (match_operand:SVE_FULL_F 4 "register_operand" "w, w")] -+ SVE_COND_FP_TERNARY) -+ (match_dup 2)] -+ UNSPEC_SEL))] -+ "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" - "@ -- cmp\t%0., %1/z, %2., #%3 -- cmp\t%0., %1/z, %2., %3." -+ \t%0., %1/m, %3., %4. -+ movprfx\t%0, %2\;\t%0., %1/m, %3., %4." -+ "&& !rtx_equal_p (operands[1], operands[5])" -+ { -+ operands[5] = copy_rtx (operands[1]); -+ } -+ [(set_attr "movprfx" "*,yes")] - ) - --;; Integer comparisons predicated with a PTRUE in which both the flag and --;; predicate results are interesting. --(define_insn "*cmp_cc" -- [(set (reg:CC CC_REGNUM) -- (compare:CC -- (unspec:SI -- [(match_operand: 1 "register_operand" "Upl, Upl") -- (unspec: -- [(match_dup 1) -- (SVE_INT_CMP: -- (match_operand:SVE_I 2 "register_operand" "w, w") -- (match_operand:SVE_I 3 "aarch64_sve_cmp__operand" ", w"))] -- UNSPEC_MERGE_PTRUE)] -- UNSPEC_PTEST_PTRUE) -- (const_int 0))) -- (set (match_operand: 0 "register_operand" "=Upa, Upa") -- (unspec: -- [(match_dup 1) -- (SVE_INT_CMP: -- (match_dup 2) -- (match_dup 3))] -- UNSPEC_MERGE_PTRUE))] -- "TARGET_SVE" -+;; Predicated floating-point ternary operations, merging with the -+;; third input. -+(define_insn_and_rewrite "*cond__4" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") -+ (unspec:SVE_FULL_F -+ [(match_operand: 1 "register_operand" "Upl, Upl") -+ (unspec:SVE_FULL_F -+ [(match_operand 5) -+ (match_operand:SI 6 "aarch64_sve_gp_strictness") -+ (match_operand:SVE_FULL_F 2 "register_operand" "w, w") -+ (match_operand:SVE_FULL_F 3 "register_operand" "w, w") -+ (match_operand:SVE_FULL_F 4 "register_operand" "0, w")] -+ SVE_COND_FP_TERNARY) -+ (match_dup 4)] -+ UNSPEC_SEL))] -+ "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" - "@ -- cmp\t%0., %1/z, %2., #%3 -- cmp\t%0., %1/z, %2., %3." -+ \t%0., %1/m, %2., %3. -+ movprfx\t%0, %4\;\t%0., %1/m, %2., %3." -+ "&& !rtx_equal_p (operands[1], operands[5])" -+ { -+ operands[5] = copy_rtx (operands[1]); -+ } -+ [(set_attr "movprfx" "*,yes")] - ) - --;; Predicated integer comparisons, formed by combining a PTRUE-predicated --;; comparison with an AND. Split the instruction into its preferred form --;; (below) at the earliest opportunity, in order to get rid of the --;; redundant operand 1. --(define_insn_and_split "*pred_cmp_combine" -- [(set (match_operand: 0 "register_operand" "=Upa, Upa") -- (and: -- (unspec: -- [(match_operand: 1) -- (SVE_INT_CMP: -- (match_operand:SVE_I 2 "register_operand" "w, w") -- (match_operand:SVE_I 3 "aarch64_sve_cmp__operand" ", w"))] -- UNSPEC_MERGE_PTRUE) -- (match_operand: 4 "register_operand" "Upl, Upl"))) -- (clobber (reg:CC CC_REGNUM))] -- "TARGET_SVE" -- "#" -+;; Predicated floating-point ternary operations, merging with an -+;; independent value. -+(define_insn_and_rewrite "*cond__any" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w") -+ (unspec:SVE_FULL_F -+ [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") -+ (unspec:SVE_FULL_F -+ [(match_operand 6) -+ (match_operand:SI 7 "aarch64_sve_gp_strictness") -+ (match_operand:SVE_FULL_F 2 "register_operand" "w, w, 0, w, w, w") -+ (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, 0, w, w") -+ (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w, w, w")] -+ SVE_COND_FP_TERNARY) -+ (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")] -+ UNSPEC_SEL))] -+ "TARGET_SVE -+ && !rtx_equal_p (operands[2], operands[5]) -+ && !rtx_equal_p (operands[3], operands[5]) -+ && !rtx_equal_p (operands[4], operands[5]) -+ && aarch64_sve_pred_dominates_p (&operands[6], operands[1])" -+ "@ -+ movprfx\t%0., %1/z, %4.\;\t%0., %1/m, %2., %3. -+ movprfx\t%0., %1/z, %0.\;\t%0., %1/m, %2., %3. -+ movprfx\t%0., %1/z, %0.\;\t%0., %1/m, %3., %4. -+ movprfx\t%0., %1/z, %0.\;\t%0., %1/m, %2., %4. -+ movprfx\t%0., %1/m, %4.\;\t%0., %1/m, %2., %3. -+ #" - "&& 1" -- [(parallel -- [(set (match_dup 0) -- (and: -- (SVE_INT_CMP: -- (match_dup 2) -- (match_dup 3)) -- (match_dup 4))) -- (clobber (reg:CC CC_REGNUM))])] -+ { -+ if (reload_completed -+ && register_operand (operands[5], mode) -+ && !rtx_equal_p (operands[0], operands[5])) -+ { -+ emit_insn (gen_vcond_mask_ (operands[0], operands[4], -+ operands[5], operands[1])); -+ operands[5] = operands[4] = operands[0]; -+ } -+ else if (!rtx_equal_p (operands[1], operands[6])) -+ operands[6] = copy_rtx (operands[1]); -+ else -+ FAIL; -+ } -+ [(set_attr "movprfx" "yes")] - ) - --;; Predicated integer comparisons. --(define_insn "*pred_cmp" -- [(set (match_operand: 0 "register_operand" "=Upa, Upa") -- (and: -- (SVE_INT_CMP: -- (match_operand:SVE_I 2 "register_operand" "w, w") -- (match_operand:SVE_I 3 "aarch64_sve_cmp__operand" ", w")) -- (match_operand: 1 "register_operand" "Upl, Upl"))) -- (clobber (reg:CC CC_REGNUM))] -+;; Unpredicated FMLA and FMLS by selected lanes. It doesn't seem worth using -+;; (fma ...) since target-independent code won't understand the indexing. -+(define_insn "@aarch64__lane_" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") -+ (unspec:SVE_FULL_F -+ [(match_operand:SVE_FULL_F 1 "register_operand" "w, w") -+ (unspec:SVE_FULL_F -+ [(match_operand:SVE_FULL_F 2 "register_operand" ", ") -+ (match_operand:SI 3 "const_int_operand")] -+ UNSPEC_SVE_LANE_SELECT) -+ (match_operand:SVE_FULL_F 4 "register_operand" "0, w")] -+ SVE_FP_TERNARY_LANE))] - "TARGET_SVE" - "@ -- cmp\t%0., %1/z, %2., #%3 -- cmp\t%0., %1/z, %2., %3." -+ \t%0., %1., %2.[%3] -+ movprfx\t%0, %4\;\t%0., %1., %2.[%3]" -+ [(set_attr "movprfx" "*,yes")] - ) - --;; Floating-point comparisons predicated with a PTRUE. --(define_insn "*fcm" -- [(set (match_operand: 0 "register_operand" "=Upa, Upa") -- (unspec: -+;; ------------------------------------------------------------------------- -+;; ---- [FP] Complex multiply-add -+;; ------------------------------------------------------------------------- -+;; Includes merging patterns for: -+;; - FCMLA -+;; ------------------------------------------------------------------------- -+ -+;; Predicated FCMLA. -+(define_insn "@aarch64_pred_" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") -+ (unspec:SVE_FULL_F - [(match_operand: 1 "register_operand" "Upl, Upl") -- (SVE_FP_CMP: -- (match_operand:SVE_F 2 "register_operand" "w, w") -- (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))] -- UNSPEC_MERGE_PTRUE))] -+ (match_operand:SI 5 "aarch64_sve_gp_strictness") -+ (match_operand:SVE_FULL_F 2 "register_operand" "w, w") -+ (match_operand:SVE_FULL_F 3 "register_operand" "w, w") -+ (match_operand:SVE_FULL_F 4 "register_operand" "0, w")] -+ SVE_COND_FCMLA))] - "TARGET_SVE" - "@ -- fcm\t%0., %1/z, %2., #0.0 -- fcm\t%0., %1/z, %2., %3." -+ fcmla\t%0., %1/m, %2., %3., # -+ movprfx\t%0, %4\;fcmla\t%0., %1/m, %2., %3., #" -+ [(set_attr "movprfx" "*,yes")] - ) - --(define_insn "*fcmuo" -- [(set (match_operand: 0 "register_operand" "=Upa") -- (unspec: -- [(match_operand: 1 "register_operand" "Upl") -- (unordered: -- (match_operand:SVE_F 2 "register_operand" "w") -- (match_operand:SVE_F 3 "register_operand" "w"))] -- UNSPEC_MERGE_PTRUE))] -+;; Predicated FCMLA with merging. -+(define_expand "@cond_" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand") -+ (unspec:SVE_FULL_F -+ [(match_operand: 1 "register_operand") -+ (unspec:SVE_FULL_F -+ [(match_dup 1) -+ (const_int SVE_STRICT_GP) -+ (match_operand:SVE_FULL_F 2 "register_operand") -+ (match_operand:SVE_FULL_F 3 "register_operand") -+ (match_operand:SVE_FULL_F 4 "register_operand")] -+ SVE_COND_FCMLA) -+ (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")] -+ UNSPEC_SEL))] - "TARGET_SVE" -- "fcmuo\t%0., %1/z, %2., %3." - ) - --;; Floating-point comparisons predicated on a PTRUE, with the results ANDed --;; with another predicate P. This does not have the same trapping behavior --;; as predicating the comparison itself on P, but it's a legitimate fold, --;; since we can drop any potentially-trapping operations whose results --;; are not needed. --;; --;; Split the instruction into its preferred form (below) at the earliest --;; opportunity, in order to get rid of the redundant operand 1. --(define_insn_and_split "*fcm_and_combine" -- [(set (match_operand: 0 "register_operand" "=Upa, Upa") -- (and: -- (unspec: -- [(match_operand: 1) -- (SVE_FP_CMP -- (match_operand:SVE_F 2 "register_operand" "w, w") -- (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))] -- UNSPEC_MERGE_PTRUE) -- (match_operand: 4 "register_operand" "Upl, Upl")))] -- "TARGET_SVE" -- "#" -- "&& 1" -- [(set (match_dup 0) -- (and: -- (SVE_FP_CMP: -- (match_dup 2) -- (match_dup 3)) -- (match_dup 4)))] -+;; Predicated FCMLA, merging with the third input. -+(define_insn_and_rewrite "*cond__4" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") -+ (unspec:SVE_FULL_F -+ [(match_operand: 1 "register_operand" "Upl, Upl") -+ (unspec:SVE_FULL_F -+ [(match_operand 5) -+ (match_operand:SI 6 "aarch64_sve_gp_strictness") -+ (match_operand:SVE_FULL_F 2 "register_operand" "w, w") -+ (match_operand:SVE_FULL_F 3 "register_operand" "w, w") -+ (match_operand:SVE_FULL_F 4 "register_operand" "0, w")] -+ SVE_COND_FCMLA) -+ (match_dup 4)] -+ UNSPEC_SEL))] -+ "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])" -+ "@ -+ fcmla\t%0., %1/m, %2., %3., # -+ movprfx\t%0, %4\;fcmla\t%0., %1/m, %2., %3., #" -+ "&& !rtx_equal_p (operands[1], operands[5])" -+ { -+ operands[5] = copy_rtx (operands[1]); -+ } -+ [(set_attr "movprfx" "*,yes")] - ) - --(define_insn_and_split "*fcmuo_and_combine" -- [(set (match_operand: 0 "register_operand" "=Upa") -- (and: -- (unspec: -- [(match_operand: 1) -- (unordered -- (match_operand:SVE_F 2 "register_operand" "w") -- (match_operand:SVE_F 3 "register_operand" "w"))] -- UNSPEC_MERGE_PTRUE) -- (match_operand: 4 "register_operand" "Upl")))] -- "TARGET_SVE" -- "#" -+;; Predicated FCMLA, merging with an independent value. -+(define_insn_and_rewrite "*cond__any" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w") -+ (unspec:SVE_FULL_F -+ [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl") -+ (unspec:SVE_FULL_F -+ [(match_operand 6) -+ (match_operand:SI 7 "aarch64_sve_gp_strictness") -+ (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w") -+ (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w") -+ (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w")] -+ SVE_COND_FCMLA) -+ (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")] -+ UNSPEC_SEL))] -+ "TARGET_SVE -+ && !rtx_equal_p (operands[4], operands[5]) -+ && aarch64_sve_pred_dominates_p (&operands[6], operands[1])" -+ "@ -+ movprfx\t%0., %1/z, %4.\;fcmla\t%0., %1/m, %2., %3., # -+ movprfx\t%0., %1/z, %0.\;fcmla\t%0., %1/m, %2., %3., # -+ movprfx\t%0., %1/m, %4.\;fcmla\t%0., %1/m, %2., %3., # -+ #" - "&& 1" -- [(set (match_dup 0) -- (and: -- (unordered: -- (match_dup 2) -- (match_dup 3)) -- (match_dup 4)))] -+ { -+ if (reload_completed -+ && register_operand (operands[5], mode) -+ && !rtx_equal_p (operands[0], operands[5])) -+ { -+ emit_insn (gen_vcond_mask_ (operands[0], operands[4], -+ operands[5], operands[1])); -+ operands[5] = operands[4] = operands[0]; -+ } -+ else if (!rtx_equal_p (operands[1], operands[6])) -+ operands[6] = copy_rtx (operands[1]); -+ else -+ FAIL; -+ } -+ [(set_attr "movprfx" "yes")] - ) - --;; Unpredicated floating-point comparisons, with the results ANDed --;; with another predicate. This is a valid fold for the same reasons --;; as above. --(define_insn "*fcm_and" -- [(set (match_operand: 0 "register_operand" "=Upa, Upa") -- (and: -- (SVE_FP_CMP: -- (match_operand:SVE_F 2 "register_operand" "w, w") -- (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")) -- (match_operand: 1 "register_operand" "Upl, Upl")))] -+;; Unpredicated FCMLA with indexing. -+(define_insn "@aarch64__lane_" -+ [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w, ?&w") -+ (unspec:SVE_FULL_HSF -+ [(match_operand:SVE_FULL_HSF 1 "register_operand" "w, w") -+ (unspec:SVE_FULL_HSF -+ [(match_operand:SVE_FULL_HSF 2 "register_operand" ", ") -+ (match_operand:SI 3 "const_int_operand")] -+ UNSPEC_SVE_LANE_SELECT) -+ (match_operand:SVE_FULL_HSF 4 "register_operand" "0, w")] -+ FCMLA))] - "TARGET_SVE" - "@ -- fcm\t%0., %1/z, %2., #0.0 -- fcm\t%0., %1/z, %2., %3." -+ fcmla\t%0., %1., %2.[%3], # -+ movprfx\t%0, %4\;fcmla\t%0., %1., %2.[%3], #" -+ [(set_attr "movprfx" "*,yes")] - ) - --(define_insn "*fcmuo_and" -- [(set (match_operand: 0 "register_operand" "=Upa") -- (and: -- (unordered: -- (match_operand:SVE_F 2 "register_operand" "w") -- (match_operand:SVE_F 3 "register_operand" "w")) -- (match_operand: 1 "register_operand" "Upl")))] -+;; ------------------------------------------------------------------------- -+;; ---- [FP] Trigonometric multiply-add -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - FTMAD -+;; ------------------------------------------------------------------------- -+ -+(define_insn "@aarch64_sve_tmad" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") -+ (unspec:SVE_FULL_F -+ [(match_operand:SVE_FULL_F 1 "register_operand" "0, w") -+ (match_operand:SVE_FULL_F 2 "register_operand" "w, w") -+ (match_operand:DI 3 "const_int_operand")] -+ UNSPEC_FTMAD))] - "TARGET_SVE" -- "fcmuo\t%0., %1/z, %2., %3." -+ "@ -+ ftmad\t%0., %0., %2., #%3 -+ movprfx\t%0, %1\;ftmad\t%0., %0., %2., #%3" -+ [(set_attr "movprfx" "*,yes")] - ) - --;; Predicated floating-point comparisons. We don't need a version --;; of this for unordered comparisons. --(define_insn "*pred_fcm" -- [(set (match_operand: 0 "register_operand" "=Upa, Upa") -- (unspec: -- [(match_operand: 1 "register_operand" "Upl, Upl") -- (match_operand:SVE_F 2 "register_operand" "w, w") -- (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")] -- SVE_COND_FP_CMP))] -+;; ------------------------------------------------------------------------- -+;; ---- [FP] Bfloat16 long ternary arithmetic (SF,BF,BF) -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - BFDOT (BF16) -+;; - BFMLALB (BF16) -+;; - BFMLALT (BF16) -+;; - BFMMLA (BF16) -+;; ------------------------------------------------------------------------- -+ -+(define_insn "@aarch64_sve_vnx4sf" -+ [(set (match_operand:VNx4SF 0 "register_operand" "=w, ?&w") -+ (unspec:VNx4SF -+ [(match_operand:VNx4SF 1 "register_operand" "0, w") -+ (match_operand:VNx8BF 2 "register_operand" "w, w") -+ (match_operand:VNx8BF 3 "register_operand" "w, w")] -+ SVE_BFLOAT_TERNARY_LONG))] -+ "TARGET_SVE_BF16" -+ "@ -+ \t%0.s, %2.h, %3.h -+ movprfx\t%0, %1\;\t%0.s, %2.h, %3.h" -+ [(set_attr "movprfx" "*,yes")] -+) -+ -+;; The immediate range is enforced before generating the instruction. -+(define_insn "@aarch64_sve__lanevnx4sf" -+ [(set (match_operand:VNx4SF 0 "register_operand" "=w, ?&w") -+ (unspec:VNx4SF -+ [(match_operand:VNx4SF 1 "register_operand" "0, w") -+ (match_operand:VNx8BF 2 "register_operand" "w, w") -+ (match_operand:VNx8BF 3 "register_operand" "y, y") -+ (match_operand:SI 4 "const_int_operand")] -+ SVE_BFLOAT_TERNARY_LONG_LANE))] -+ "TARGET_SVE_BF16" -+ "@ -+ \t%0.s, %2.h, %3.h[%4] -+ movprfx\t%0, %1\;\t%0.s, %2.h, %3.h[%4]" -+ [(set_attr "movprfx" "*,yes")] -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- [FP] Matrix multiply-accumulate -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - FMMLA (F32MM,F64MM) -+;; ------------------------------------------------------------------------- -+ -+;; The mode iterator enforces the target requirements. -+(define_insn "@aarch64_sve_" -+ [(set (match_operand:SVE_MATMULF 0 "register_operand" "=w, ?&w") -+ (unspec:SVE_MATMULF -+ [(match_operand:SVE_MATMULF 2 "register_operand" "w, w") -+ (match_operand:SVE_MATMULF 3 "register_operand" "w, w") -+ (match_operand:SVE_MATMULF 1 "register_operand" "0, w")] -+ FMMLA))] - "TARGET_SVE" - "@ -- fcm\t%0., %1/z, %2., #0.0 -- fcm\t%0., %1/z, %2., %3." -+ \\t%0., %2., %3. -+ movprfx\t%0, %1\;\\t%0., %2., %3." -+ [(set_attr "movprfx" "*,yes")] - ) - -+;; ========================================================================= -+;; == Comparisons and selects -+;; ========================================================================= -+ -+;; ------------------------------------------------------------------------- -+;; ---- [INT,FP] Select based on predicates -+;; ------------------------------------------------------------------------- -+;; Includes merging patterns for: -+;; - FMOV -+;; - MOV -+;; - SEL -+;; ------------------------------------------------------------------------- -+ - ;; vcond_mask operand order: true, false, mask - ;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR) - ;; SEL operand order: mask, true, false --(define_insn "vcond_mask_" -- [(set (match_operand:SVE_ALL 0 "register_operand" "=w") -- (unspec:SVE_ALL -- [(match_operand: 3 "register_operand" "Upa") -- (match_operand:SVE_ALL 1 "register_operand" "w") -- (match_operand:SVE_ALL 2 "register_operand" "w")] -+(define_expand "@vcond_mask_" -+ [(set (match_operand:SVE_FULL 0 "register_operand") -+ (unspec:SVE_FULL -+ [(match_operand: 3 "register_operand") -+ (match_operand:SVE_FULL 1 "aarch64_sve_reg_or_dup_imm") -+ (match_operand:SVE_FULL 2 "aarch64_simd_reg_or_zero")] - UNSPEC_SEL))] - "TARGET_SVE" -- "sel\t%0., %3, %1., %2." -+ { -+ if (register_operand (operands[1], mode)) -+ operands[2] = force_reg (mode, operands[2]); -+ } - ) - --;; Selects between a duplicated immediate and zero. --(define_insn "aarch64_sve_dup_const" -- [(set (match_operand:SVE_I 0 "register_operand" "=w") -- (unspec:SVE_I -- [(match_operand: 1 "register_operand" "Upl") -- (match_operand:SVE_I 2 "aarch64_sve_dup_immediate") -- (match_operand:SVE_I 3 "aarch64_simd_imm_zero")] -+;; Selects between: -+;; - two registers -+;; - a duplicated immediate and a register -+;; - a duplicated immediate and zero -+(define_insn "*vcond_mask_" -+ [(set (match_operand:SVE_FULL 0 "register_operand" "=w, w, w, w, ?w, ?&w, ?&w") -+ (unspec:SVE_FULL -+ [(match_operand: 3 "register_operand" "Upa, Upa, Upa, Upa, Upl, Upl, Upl") -+ (match_operand:SVE_FULL 1 "aarch64_sve_reg_or_dup_imm" "w, vss, vss, Ufc, Ufc, vss, Ufc") -+ (match_operand:SVE_FULL 2 "aarch64_simd_reg_or_zero" "w, 0, Dz, 0, Dz, w, w")] -+ UNSPEC_SEL))] -+ "TARGET_SVE -+ && (!register_operand (operands[1], mode) -+ || register_operand (operands[2], mode))" -+ "@ -+ sel\t%0., %3, %1., %2. -+ mov\t%0., %3/m, #%I1 -+ mov\t%0., %3/z, #%I1 -+ fmov\t%0., %3/m, #%1 -+ movprfx\t%0., %3/z, %0.\;fmov\t%0., %3/m, #%1 -+ movprfx\t%0, %2\;mov\t%0., %3/m, #%I1 -+ movprfx\t%0, %2\;fmov\t%0., %3/m, #%1" -+ [(set_attr "movprfx" "*,*,*,*,yes,yes,yes")] -+) -+ -+;; Optimize selects between a duplicated scalar variable and another vector, -+;; the latter of which can be a zero constant or a variable. Treat duplicates -+;; of GPRs as being more expensive than duplicates of FPRs, since they -+;; involve a cross-file move. -+(define_insn "@aarch64_sel_dup" -+ [(set (match_operand:SVE_FULL 0 "register_operand" "=?w, w, ??w, ?&w, ??&w, ?&w") -+ (unspec:SVE_FULL -+ [(match_operand: 3 "register_operand" "Upa, Upa, Upl, Upl, Upl, Upl") -+ (vec_duplicate:SVE_FULL -+ (match_operand: 1 "register_operand" "r, w, r, w, r, w")) -+ (match_operand:SVE_FULL 2 "aarch64_simd_reg_or_zero" "0, 0, Dz, Dz, w, w")] - UNSPEC_SEL))] - "TARGET_SVE" -- "mov\t%0., %1/z, #%2" -+ "@ -+ mov\t%0., %3/m, %1 -+ mov\t%0., %3/m, %1 -+ movprfx\t%0., %3/z, %0.\;mov\t%0., %3/m, %1 -+ movprfx\t%0., %3/z, %0.\;mov\t%0., %3/m, %1 -+ movprfx\t%0, %2\;mov\t%0., %3/m, %1 -+ movprfx\t%0, %2\;mov\t%0., %3/m, %1" -+ [(set_attr "movprfx" "*,*,yes,yes,yes,yes")] - ) - -+;; ------------------------------------------------------------------------- -+;; ---- [INT,FP] Compare and select -+;; ------------------------------------------------------------------------- -+;; The patterns in this section are synthetic. -+;; ------------------------------------------------------------------------- -+ - ;; Integer (signed) vcond. Don't enforce an immediate range here, since it - ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead. - (define_expand "vcond" -- [(set (match_operand:SVE_ALL 0 "register_operand") -- (if_then_else:SVE_ALL -+ [(set (match_operand:SVE_FULL 0 "register_operand") -+ (if_then_else:SVE_FULL - (match_operator 3 "comparison_operator" - [(match_operand: 4 "register_operand") - (match_operand: 5 "nonmemory_operand")]) -- (match_operand:SVE_ALL 1 "register_operand") -- (match_operand:SVE_ALL 2 "register_operand")))] -+ (match_operand:SVE_FULL 1 "nonmemory_operand") -+ (match_operand:SVE_FULL 2 "nonmemory_operand")))] - "TARGET_SVE" - { - aarch64_expand_sve_vcond (mode, mode, operands); -@@ -1647,13 +6555,13 @@ - ;; Integer vcondu. Don't enforce an immediate range here, since it - ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead. - (define_expand "vcondu" -- [(set (match_operand:SVE_ALL 0 "register_operand") -- (if_then_else:SVE_ALL -+ [(set (match_operand:SVE_FULL 0 "register_operand") -+ (if_then_else:SVE_FULL - (match_operator 3 "comparison_operator" - [(match_operand: 4 "register_operand") - (match_operand: 5 "nonmemory_operand")]) -- (match_operand:SVE_ALL 1 "register_operand") -- (match_operand:SVE_ALL 2 "register_operand")))] -+ (match_operand:SVE_FULL 1 "nonmemory_operand") -+ (match_operand:SVE_FULL 2 "nonmemory_operand")))] - "TARGET_SVE" - { - aarch64_expand_sve_vcond (mode, mode, operands); -@@ -1661,17 +6569,16 @@ - } - ) - --;; Floating-point vcond. All comparisons except FCMUO allow a zero --;; operand; aarch64_expand_sve_vcond handles the case of an FCMUO --;; with zero. -+;; Floating-point vcond. All comparisons except FCMUO allow a zero operand; -+;; aarch64_expand_sve_vcond handles the case of an FCMUO with zero. - (define_expand "vcond" -- [(set (match_operand:SVE_SD 0 "register_operand") -- (if_then_else:SVE_SD -+ [(set (match_operand:SVE_FULL_HSD 0 "register_operand") -+ (if_then_else:SVE_FULL_HSD - (match_operator 3 "comparison_operator" - [(match_operand: 4 "register_operand") - (match_operand: 5 "aarch64_simd_reg_or_zero")]) -- (match_operand:SVE_SD 1 "register_operand") -- (match_operand:SVE_SD 2 "register_operand")))] -+ (match_operand:SVE_FULL_HSD 1 "nonmemory_operand") -+ (match_operand:SVE_FULL_HSD 2 "nonmemory_operand")))] - "TARGET_SVE" - { - aarch64_expand_sve_vcond (mode, mode, operands); -@@ -1679,6 +6586,22 @@ - } - ) - -+;; ------------------------------------------------------------------------- -+;; ---- [INT] Comparisons -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - CMPEQ -+;; - CMPGE -+;; - CMPGT -+;; - CMPHI -+;; - CMPHS -+;; - CMPLE -+;; - CMPLO -+;; - CMPLS -+;; - CMPLT -+;; - CMPNE -+;; ------------------------------------------------------------------------- -+ - ;; Signed integer comparisons. Don't enforce an immediate range here, since - ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int - ;; instead. -@@ -1686,9 +6609,9 @@ - [(parallel - [(set (match_operand: 0 "register_operand") - (match_operator: 1 "comparison_operator" -- [(match_operand:SVE_I 2 "register_operand") -- (match_operand:SVE_I 3 "nonmemory_operand")])) -- (clobber (reg:CC CC_REGNUM))])] -+ [(match_operand:SVE_FULL_I 2 "register_operand") -+ (match_operand:SVE_FULL_I 3 "nonmemory_operand")])) -+ (clobber (reg:CC_NZC CC_REGNUM))])] - "TARGET_SVE" - { - aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]), -@@ -1704,9 +6627,9 @@ - [(parallel - [(set (match_operand: 0 "register_operand") - (match_operator: 1 "comparison_operator" -- [(match_operand:SVE_I 2 "register_operand") -- (match_operand:SVE_I 3 "nonmemory_operand")])) -- (clobber (reg:CC CC_REGNUM))])] -+ [(match_operand:SVE_FULL_I 2 "register_operand") -+ (match_operand:SVE_FULL_I 3 "nonmemory_operand")])) -+ (clobber (reg:CC_NZC CC_REGNUM))])] - "TARGET_SVE" - { - aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]), -@@ -1715,14 +6638,285 @@ - } - ) - -+;; Predicated integer comparisons. -+(define_insn "@aarch64_pred_cmp" -+ [(set (match_operand: 0 "register_operand" "=Upa, Upa") -+ (unspec: -+ [(match_operand: 1 "register_operand" "Upl, Upl") -+ (match_operand:SI 2 "aarch64_sve_ptrue_flag") -+ (SVE_INT_CMP: -+ (match_operand:SVE_FULL_I 3 "register_operand" "w, w") -+ (match_operand:SVE_FULL_I 4 "aarch64_sve_cmp__operand" ", w"))] -+ UNSPEC_PRED_Z)) -+ (clobber (reg:CC_NZC CC_REGNUM))] -+ "TARGET_SVE" -+ "@ -+ cmp\t%0., %1/z, %3., #%4 -+ cmp\t%0., %1/z, %3., %4." -+) -+ -+;; Predicated integer comparisons in which both the flag and predicate -+;; results are interesting. -+(define_insn_and_rewrite "*cmp_cc" -+ [(set (reg:CC_NZC CC_REGNUM) -+ (unspec:CC_NZC -+ [(match_operand:VNx16BI 1 "register_operand" "Upl, Upl") -+ (match_operand 4) -+ (match_operand:SI 5 "aarch64_sve_ptrue_flag") -+ (unspec: -+ [(match_operand 6) -+ (match_operand:SI 7 "aarch64_sve_ptrue_flag") -+ (SVE_INT_CMP: -+ (match_operand:SVE_FULL_I 2 "register_operand" "w, w") -+ (match_operand:SVE_FULL_I 3 "aarch64_sve_cmp__operand" ", w"))] -+ UNSPEC_PRED_Z)] -+ UNSPEC_PTEST)) -+ (set (match_operand: 0 "register_operand" "=Upa, Upa") -+ (unspec: -+ [(match_dup 6) -+ (match_dup 7) -+ (SVE_INT_CMP: -+ (match_dup 2) -+ (match_dup 3))] -+ UNSPEC_PRED_Z))] -+ "TARGET_SVE -+ && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])" -+ "@ -+ cmp\t%0., %1/z, %2., #%3 -+ cmp\t%0., %1/z, %2., %3." -+ "&& !rtx_equal_p (operands[4], operands[6])" -+ { -+ operands[6] = copy_rtx (operands[4]); -+ operands[7] = operands[5]; -+ } -+) -+ -+;; Predicated integer comparisons in which only the flags result is -+;; interesting. -+(define_insn_and_rewrite "*cmp_ptest" -+ [(set (reg:CC_NZC CC_REGNUM) -+ (unspec:CC_NZC -+ [(match_operand:VNx16BI 1 "register_operand" "Upl, Upl") -+ (match_operand 4) -+ (match_operand:SI 5 "aarch64_sve_ptrue_flag") -+ (unspec: -+ [(match_operand 6) -+ (match_operand:SI 7 "aarch64_sve_ptrue_flag") -+ (SVE_INT_CMP: -+ (match_operand:SVE_FULL_I 2 "register_operand" "w, w") -+ (match_operand:SVE_FULL_I 3 "aarch64_sve_cmp__operand" ", w"))] -+ UNSPEC_PRED_Z)] -+ UNSPEC_PTEST)) -+ (clobber (match_scratch: 0 "=Upa, Upa"))] -+ "TARGET_SVE -+ && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])" -+ "@ -+ cmp\t%0., %1/z, %2., #%3 -+ cmp\t%0., %1/z, %2., %3." -+ "&& !rtx_equal_p (operands[4], operands[6])" -+ { -+ operands[6] = copy_rtx (operands[4]); -+ operands[7] = operands[5]; -+ } -+) -+ -+;; Predicated integer comparisons, formed by combining a PTRUE-predicated -+;; comparison with an AND. Split the instruction into its preferred form -+;; at the earliest opportunity, in order to get rid of the redundant -+;; operand 4. -+(define_insn_and_split "*cmp_and" -+ [(set (match_operand: 0 "register_operand" "=Upa, Upa") -+ (and: -+ (unspec: -+ [(match_operand 4) -+ (const_int SVE_KNOWN_PTRUE) -+ (SVE_INT_CMP: -+ (match_operand:SVE_FULL_I 2 "register_operand" "w, w") -+ (match_operand:SVE_FULL_I 3 "aarch64_sve_cmp__operand" ", w"))] -+ UNSPEC_PRED_Z) -+ (match_operand: 1 "register_operand" "Upl, Upl"))) -+ (clobber (reg:CC_NZC CC_REGNUM))] -+ "TARGET_SVE" -+ "#" -+ "&& 1" -+ [(parallel -+ [(set (match_dup 0) -+ (unspec: -+ [(match_dup 1) -+ (const_int SVE_MAYBE_NOT_PTRUE) -+ (SVE_INT_CMP: -+ (match_dup 2) -+ (match_dup 3))] -+ UNSPEC_PRED_Z)) -+ (clobber (reg:CC_NZC CC_REGNUM))])] -+) -+ -+;; Predicated integer wide comparisons. -+(define_insn "@aarch64_pred_cmp_wide" -+ [(set (match_operand: 0 "register_operand" "=Upa") -+ (unspec: -+ [(match_operand:VNx16BI 1 "register_operand" "Upl") -+ (match_operand:SI 2 "aarch64_sve_ptrue_flag") -+ (unspec: -+ [(match_operand:SVE_FULL_BHSI 3 "register_operand" "w") -+ (match_operand:VNx2DI 4 "register_operand" "w")] -+ SVE_COND_INT_CMP_WIDE)] -+ UNSPEC_PRED_Z)) -+ (clobber (reg:CC_NZC CC_REGNUM))] -+ "TARGET_SVE" -+ "cmp\t%0., %1/z, %3., %4.d" -+) -+ -+;; Predicated integer wide comparisons in which both the flag and -+;; predicate results are interesting. -+(define_insn "*aarch64_pred_cmp_wide_cc" -+ [(set (reg:CC_NZC CC_REGNUM) -+ (unspec:CC_NZC -+ [(match_operand:VNx16BI 1 "register_operand" "Upl") -+ (match_operand 4) -+ (match_operand:SI 5 "aarch64_sve_ptrue_flag") -+ (unspec: -+ [(match_operand:VNx16BI 6 "register_operand" "Upl") -+ (match_operand:SI 7 "aarch64_sve_ptrue_flag") -+ (unspec: -+ [(match_operand:SVE_FULL_BHSI 2 "register_operand" "w") -+ (match_operand:VNx2DI 3 "register_operand" "w")] -+ SVE_COND_INT_CMP_WIDE)] -+ UNSPEC_PRED_Z)] -+ UNSPEC_PTEST)) -+ (set (match_operand: 0 "register_operand" "=Upa") -+ (unspec: -+ [(match_dup 6) -+ (match_dup 7) -+ (unspec: -+ [(match_dup 2) -+ (match_dup 3)] -+ SVE_COND_INT_CMP_WIDE)] -+ UNSPEC_PRED_Z))] -+ "TARGET_SVE -+ && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])" -+ "cmp\t%0., %1/z, %2., %3.d" -+) -+ -+;; Predicated integer wide comparisons in which only the flags result -+;; is interesting. -+(define_insn "*aarch64_pred_cmp_wide_ptest" -+ [(set (reg:CC_NZC CC_REGNUM) -+ (unspec:CC_NZC -+ [(match_operand:VNx16BI 1 "register_operand" "Upl") -+ (match_operand 4) -+ (match_operand:SI 5 "aarch64_sve_ptrue_flag") -+ (unspec: -+ [(match_operand:VNx16BI 6 "register_operand" "Upl") -+ (match_operand:SI 7 "aarch64_sve_ptrue_flag") -+ (unspec: -+ [(match_operand:SVE_FULL_BHSI 2 "register_operand" "w") -+ (match_operand:VNx2DI 3 "register_operand" "w")] -+ SVE_COND_INT_CMP_WIDE)] -+ UNSPEC_PRED_Z)] -+ UNSPEC_PTEST)) -+ (clobber (match_scratch: 0 "=Upa"))] -+ "TARGET_SVE -+ && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])" -+ "cmp\t%0., %1/z, %2., %3.d" -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- [INT] While tests -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - WHILELE -+;; - WHILELO -+;; - WHILELS -+;; - WHILELT -+;; ------------------------------------------------------------------------- -+ -+;; Set element I of the result if (cmp (plus operand1 J) operand2) is -+;; true for all J in [0, I]. -+(define_insn "@while_" -+ [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") -+ (unspec:PRED_ALL [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ") -+ (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")] -+ SVE_WHILE)) -+ (clobber (reg:CC_NZC CC_REGNUM))] -+ "TARGET_SVE" -+ "while\t%0., %1, %2" -+) -+ -+;; The WHILE instructions set the flags in the same way as a PTEST with -+;; a PTRUE GP. Handle the case in which both results are useful. The GP -+;; operands to the PTEST aren't needed, so we allow them to be anything. -+(define_insn_and_rewrite "*while__cc" -+ [(set (reg:CC_NZC CC_REGNUM) -+ (unspec:CC_NZC -+ [(match_operand 3) -+ (match_operand 4) -+ (const_int SVE_KNOWN_PTRUE) -+ (unspec:PRED_ALL -+ [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ") -+ (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")] -+ SVE_WHILE)] -+ UNSPEC_PTEST)) -+ (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") -+ (unspec:PRED_ALL [(match_dup 1) -+ (match_dup 2)] -+ SVE_WHILE))] -+ "TARGET_SVE" -+ "while\t%0., %1, %2" -+ ;; Force the compiler to drop the unused predicate operand, so that we -+ ;; don't have an unnecessary PTRUE. -+ "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))" -+ { -+ operands[3] = CONSTM1_RTX (VNx16BImode); -+ operands[4] = CONSTM1_RTX (mode); -+ } -+) -+ -+;; Same, but handle the case in which only the flags result is useful. -+(define_insn_and_rewrite "*while__ptest" -+ [(set (reg:CC_NZC CC_REGNUM) -+ (unspec:CC_NZC -+ [(match_operand 3) -+ (match_operand 4) -+ (const_int SVE_KNOWN_PTRUE) -+ (unspec:PRED_ALL -+ [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ") -+ (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")] -+ SVE_WHILE)] -+ UNSPEC_PTEST)) -+ (clobber (match_scratch:PRED_ALL 0 "=Upa"))] -+ "TARGET_SVE" -+ "while\t%0., %1, %2" -+ ;; Force the compiler to drop the unused predicate operand, so that we -+ ;; don't have an unnecessary PTRUE. -+ "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))" -+ { -+ operands[3] = CONSTM1_RTX (VNx16BImode); -+ operands[4] = CONSTM1_RTX (mode); -+ } -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- [FP] Direct comparisons -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - FCMEQ -+;; - FCMGE -+;; - FCMGT -+;; - FCMLE -+;; - FCMLT -+;; - FCMNE -+;; - FCMUO -+;; ------------------------------------------------------------------------- -+ - ;; Floating-point comparisons. All comparisons except FCMUO allow a zero - ;; operand; aarch64_expand_sve_vec_cmp_float handles the case of an FCMUO - ;; with zero. - (define_expand "vec_cmp" - [(set (match_operand: 0 "register_operand") - (match_operator: 1 "comparison_operator" -- [(match_operand:SVE_F 2 "register_operand") -- (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero")]))] -+ [(match_operand:SVE_FULL_F 2 "register_operand") -+ (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]))] - "TARGET_SVE" - { - aarch64_expand_sve_vec_cmp_float (operands[0], GET_CODE (operands[1]), -@@ -1731,6 +6925,172 @@ - } - ) - -+;; Predicated floating-point comparisons. -+(define_insn "@aarch64_pred_fcm" -+ [(set (match_operand: 0 "register_operand" "=Upa, Upa") -+ (unspec: -+ [(match_operand: 1 "register_operand" "Upl, Upl") -+ (match_operand:SI 2 "aarch64_sve_ptrue_flag") -+ (match_operand:SVE_FULL_F 3 "register_operand" "w, w") -+ (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, w")] -+ SVE_COND_FP_CMP_I0))] -+ "TARGET_SVE" -+ "@ -+ fcm\t%0., %1/z, %3., #0.0 -+ fcm\t%0., %1/z, %3., %4." -+) -+ -+;; Same for unordered comparisons. -+(define_insn "@aarch64_pred_fcmuo" -+ [(set (match_operand: 0 "register_operand" "=Upa") -+ (unspec: -+ [(match_operand: 1 "register_operand" "Upl") -+ (match_operand:SI 2 "aarch64_sve_ptrue_flag") -+ (match_operand:SVE_FULL_F 3 "register_operand" "w") -+ (match_operand:SVE_FULL_F 4 "register_operand" "w")] -+ UNSPEC_COND_FCMUO))] -+ "TARGET_SVE" -+ "fcmuo\t%0., %1/z, %3., %4." -+) -+ -+;; Floating-point comparisons predicated on a PTRUE, with the results ANDed -+;; with another predicate P. This does not have the same trapping behavior -+;; as predicating the comparison itself on P, but it's a legitimate fold, -+;; since we can drop any potentially-trapping operations whose results -+;; are not needed. -+;; -+;; Split the instruction into its preferred form (below) at the earliest -+;; opportunity, in order to get rid of the redundant operand 1. -+(define_insn_and_split "*fcm_and_combine" -+ [(set (match_operand: 0 "register_operand" "=Upa, Upa") -+ (and: -+ (unspec: -+ [(match_operand: 1) -+ (const_int SVE_KNOWN_PTRUE) -+ (match_operand:SVE_FULL_F 2 "register_operand" "w, w") -+ (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "Dz, w")] -+ SVE_COND_FP_CMP_I0) -+ (match_operand: 4 "register_operand" "Upl, Upl")))] -+ "TARGET_SVE" -+ "#" -+ "&& 1" -+ [(set (match_dup 0) -+ (unspec: -+ [(match_dup 4) -+ (const_int SVE_MAYBE_NOT_PTRUE) -+ (match_dup 2) -+ (match_dup 3)] -+ SVE_COND_FP_CMP_I0))] -+) -+ -+;; Same for unordered comparisons. -+(define_insn_and_split "*fcmuo_and_combine" -+ [(set (match_operand: 0 "register_operand" "=Upa") -+ (and: -+ (unspec: -+ [(match_operand: 1) -+ (const_int SVE_KNOWN_PTRUE) -+ (match_operand:SVE_FULL_F 2 "register_operand" "w") -+ (match_operand:SVE_FULL_F 3 "register_operand" "w")] -+ UNSPEC_COND_FCMUO) -+ (match_operand: 4 "register_operand" "Upl")))] -+ "TARGET_SVE" -+ "#" -+ "&& 1" -+ [(set (match_dup 0) -+ (unspec: -+ [(match_dup 4) -+ (const_int SVE_MAYBE_NOT_PTRUE) -+ (match_dup 2) -+ (match_dup 3)] -+ UNSPEC_COND_FCMUO))] -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- [FP] Absolute comparisons -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - FACGE -+;; - FACGT -+;; - FACLE -+;; - FACLT -+;; ------------------------------------------------------------------------- -+ -+;; Predicated floating-point absolute comparisons. -+(define_expand "@aarch64_pred_fac" -+ [(set (match_operand: 0 "register_operand") -+ (unspec: -+ [(match_operand: 1 "register_operand") -+ (match_operand:SI 2 "aarch64_sve_ptrue_flag") -+ (unspec:SVE_FULL_F -+ [(match_dup 1) -+ (match_dup 2) -+ (match_operand:SVE_FULL_F 3 "register_operand")] -+ UNSPEC_COND_FABS) -+ (unspec:SVE_FULL_F -+ [(match_dup 1) -+ (match_dup 2) -+ (match_operand:SVE_FULL_F 4 "register_operand")] -+ UNSPEC_COND_FABS)] -+ SVE_COND_FP_ABS_CMP))] -+ "TARGET_SVE" -+) -+ -+(define_insn_and_rewrite "*aarch64_pred_fac" -+ [(set (match_operand: 0 "register_operand" "=Upa") -+ (unspec: -+ [(match_operand: 1 "register_operand" "Upl") -+ (match_operand:SI 4 "aarch64_sve_ptrue_flag") -+ (unspec:SVE_FULL_F -+ [(match_operand 5) -+ (match_operand:SI 6 "aarch64_sve_gp_strictness") -+ (match_operand:SVE_FULL_F 2 "register_operand" "w")] -+ UNSPEC_COND_FABS) -+ (unspec:SVE_FULL_F -+ [(match_operand 7) -+ (match_operand:SI 8 "aarch64_sve_gp_strictness") -+ (match_operand:SVE_FULL_F 3 "register_operand" "w")] -+ UNSPEC_COND_FABS)] -+ SVE_COND_FP_ABS_CMP))] -+ "TARGET_SVE -+ && aarch64_sve_pred_dominates_p (&operands[5], operands[1]) -+ && aarch64_sve_pred_dominates_p (&operands[7], operands[1])" -+ "fac\t%0., %1/z, %2., %3." -+ "&& (!rtx_equal_p (operands[1], operands[5]) -+ || !rtx_equal_p (operands[1], operands[7]))" -+ { -+ operands[5] = copy_rtx (operands[1]); -+ operands[7] = copy_rtx (operands[1]); -+ } -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- [PRED] Select -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - SEL -+;; ------------------------------------------------------------------------- -+ -+(define_insn "@vcond_mask_" -+ [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") -+ (ior:PRED_ALL -+ (and:PRED_ALL -+ (match_operand:PRED_ALL 3 "register_operand" "Upa") -+ (match_operand:PRED_ALL 1 "register_operand" "Upa")) -+ (and:PRED_ALL -+ (not (match_dup 3)) -+ (match_operand:PRED_ALL 2 "register_operand" "Upa"))))] -+ "TARGET_SVE" -+ "sel\t%0.b, %3, %1.b, %2.b" -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- [PRED] Test bits -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - PTEST -+;; ------------------------------------------------------------------------- -+ - ;; Branch based on predicate equality or inequality. - (define_expand "cbranch4" - [(set (pc) -@@ -1742,1409 +7102,2120 @@ - (pc)))] - "" - { -- rtx ptrue = force_reg (mode, CONSTM1_RTX (mode)); -+ rtx ptrue = force_reg (VNx16BImode, aarch64_ptrue_all ()); -+ rtx cast_ptrue = gen_lowpart (mode, ptrue); -+ rtx ptrue_flag = gen_int_mode (SVE_KNOWN_PTRUE, SImode); - rtx pred; - if (operands[2] == CONST0_RTX (mode)) - pred = operands[1]; - else - { - pred = gen_reg_rtx (mode); -- emit_insn (gen_pred_xor3 (pred, ptrue, operands[1], -- operands[2])); -+ emit_insn (gen_aarch64_pred_xor_z (pred, cast_ptrue, operands[1], -+ operands[2])); - } -- emit_insn (gen_ptest_ptrue (ptrue, pred)); -- operands[1] = gen_rtx_REG (CCmode, CC_REGNUM); -+ emit_insn (gen_aarch64_ptest (ptrue, cast_ptrue, ptrue_flag, pred)); -+ operands[1] = gen_rtx_REG (CC_NZCmode, CC_REGNUM); - operands[2] = const0_rtx; - } - ) - --;; Unpredicated integer MIN/MAX. --(define_expand "3" -- [(set (match_operand:SVE_I 0 "register_operand") -- (unspec:SVE_I -- [(match_dup 3) -- (MAXMIN:SVE_I (match_operand:SVE_I 1 "register_operand") -- (match_operand:SVE_I 2 "register_operand"))] -- UNSPEC_MERGE_PTRUE))] -+;; See "Description of UNSPEC_PTEST" above for details. -+(define_insn "aarch64_ptest" -+ [(set (reg:CC_NZC CC_REGNUM) -+ (unspec:CC_NZC [(match_operand:VNx16BI 0 "register_operand" "Upa") -+ (match_operand 1) -+ (match_operand:SI 2 "aarch64_sve_ptrue_flag") -+ (match_operand:PRED_ALL 3 "register_operand" "Upa")] -+ UNSPEC_PTEST))] -+ "TARGET_SVE" -+ "ptest\t%0, %3.b" -+) -+ -+;; ========================================================================= -+;; == Reductions -+;; ========================================================================= -+ -+;; ------------------------------------------------------------------------- -+;; ---- [INT,FP] Conditional reductions -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - CLASTA -+;; - CLASTB -+;; ------------------------------------------------------------------------- -+ -+;; Set operand 0 to the last active element in operand 3, or to tied -+;; operand 1 if no elements are active. -+(define_insn "@fold_extract__" -+ [(set (match_operand: 0 "register_operand" "=?r, w") -+ (unspec: -+ [(match_operand: 1 "register_operand" "0, 0") -+ (match_operand: 2 "register_operand" "Upl, Upl") -+ (match_operand:SVE_FULL 3 "register_operand" "w, w")] -+ CLAST))] - "TARGET_SVE" -- { -- operands[3] = force_reg (mode, CONSTM1_RTX (mode)); -- } -+ "@ -+ clast\t%0, %2, %0, %3. -+ clast\t%0, %2, %0, %3." - ) - --;; Integer MIN/MAX predicated with a PTRUE. --(define_insn "*3" -- [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") -- (unspec:SVE_I -- [(match_operand: 1 "register_operand" "Upl, Upl") -- (MAXMIN:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w") -- (match_operand:SVE_I 3 "register_operand" "w, w"))] -- UNSPEC_MERGE_PTRUE))] -+(define_insn "@aarch64_fold_extract_vector__" -+ [(set (match_operand:SVE_FULL 0 "register_operand" "=w, ?&w") -+ (unspec:SVE_FULL -+ [(match_operand:SVE_FULL 1 "register_operand" "0, w") -+ (match_operand: 2 "register_operand" "Upl, Upl") -+ (match_operand:SVE_FULL 3 "register_operand" "w, w")] -+ CLAST))] - "TARGET_SVE" - "@ -- \t%0., %1/m, %0., %3. -- movprfx\t%0, %2\;\t%0., %1/m, %0., %3." -- [(set_attr "movprfx" "*,yes")] --) -+ clast\t%0., %2, %0., %3. -+ movprfx\t%0, %1\;clast\t%0., %2, %0., %3." -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- [INT] Tree reductions -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - ANDV -+;; - EORV -+;; - ORV -+;; - SADDV -+;; - SMAXV -+;; - SMINV -+;; - UADDV -+;; - UMAXV -+;; - UMINV -+;; ------------------------------------------------------------------------- - --;; Unpredicated floating-point MIN/MAX. --(define_expand "3" -- [(set (match_operand:SVE_F 0 "register_operand") -- (unspec:SVE_F -- [(match_dup 3) -- (FMAXMIN:SVE_F (match_operand:SVE_F 1 "register_operand") -- (match_operand:SVE_F 2 "register_operand"))] -- UNSPEC_MERGE_PTRUE))] -+;; Unpredicated integer add reduction. -+(define_expand "reduc_plus_scal_" -+ [(match_operand: 0 "register_operand") -+ (match_operand:SVE_FULL_I 1 "register_operand")] - "TARGET_SVE" - { -- operands[3] = force_reg (mode, CONSTM1_RTX (mode)); -+ rtx pred = aarch64_ptrue_reg (mode); -+ rtx tmp = mode == DImode ? operands[0] : gen_reg_rtx (DImode); -+ emit_insn (gen_aarch64_pred_reduc_uadd_ (tmp, pred, operands[1])); -+ if (tmp != operands[0]) -+ emit_move_insn (operands[0], gen_lowpart (mode, tmp)); -+ DONE; - } - ) - --;; Floating-point MIN/MAX predicated with a PTRUE. --(define_insn "*3" -- [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w") -- (unspec:SVE_F -- [(match_operand: 1 "register_operand" "Upl, Upl") -- (FMAXMIN:SVE_F (match_operand:SVE_F 2 "register_operand" "%0, w") -- (match_operand:SVE_F 3 "register_operand" "w, w"))] -- UNSPEC_MERGE_PTRUE))] -- "TARGET_SVE" -- "@ -- fnm\t%0., %1/m, %0., %3. -- movprfx\t%0, %2\;fnm\t%0., %1/m, %0., %3." -- [(set_attr "movprfx" "*,yes")] -+;; Predicated integer add reduction. The result is always 64-bits. -+(define_insn "@aarch64_pred_reduc__" -+ [(set (match_operand:DI 0 "register_operand" "=w") -+ (unspec:DI [(match_operand: 1 "register_operand" "Upl") -+ (match_operand:SVE_FULL_I 2 "register_operand" "w")] -+ SVE_INT_ADDV))] -+ "TARGET_SVE && >= " -+ "addv\t%d0, %1, %2." - ) - --;; Unpredicated fmin/fmax. --(define_expand "3" -- [(set (match_operand:SVE_F 0 "register_operand") -- (unspec:SVE_F -- [(match_dup 3) -- (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand") -- (match_operand:SVE_F 2 "register_operand")] -- FMAXMIN_UNS)] -- UNSPEC_MERGE_PTRUE))] -+;; Unpredicated integer reductions. -+(define_expand "reduc__scal_" -+ [(set (match_operand: 0 "register_operand") -+ (unspec: [(match_dup 2) -+ (match_operand:SVE_FULL_I 1 "register_operand")] -+ SVE_INT_REDUCTION))] - "TARGET_SVE" - { -- operands[3] = force_reg (mode, CONSTM1_RTX (mode)); -+ operands[2] = aarch64_ptrue_reg (mode); - } - ) - --;; fmin/fmax predicated with a PTRUE. --(define_insn "*3" -- [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w") -- (unspec:SVE_F -- [(match_operand: 1 "register_operand" "Upl, Upl") -- (unspec:SVE_F [(match_operand:SVE_F 2 "register_operand" "%0, w") -- (match_operand:SVE_F 3 "register_operand" "w, w")] -- FMAXMIN_UNS)] -- UNSPEC_MERGE_PTRUE))] -+;; Predicated integer reductions. -+(define_insn "@aarch64_pred_reduc__" -+ [(set (match_operand: 0 "register_operand" "=w") -+ (unspec: [(match_operand: 1 "register_operand" "Upl") -+ (match_operand:SVE_FULL_I 2 "register_operand" "w")] -+ SVE_INT_REDUCTION))] - "TARGET_SVE" -- "@ -- \t%0., %1/m, %0., %3. -- movprfx\t%0, %2\;\t%0., %1/m, %0., %3." -- [(set_attr "movprfx" "*,yes")] -+ "\t%0, %1, %2." - ) - --;; Predicated integer operations with select. --(define_expand "cond_" -- [(set (match_operand:SVE_I 0 "register_operand") -- (unspec:SVE_I -- [(match_operand: 1 "register_operand") -- (SVE_INT_BINARY:SVE_I -- (match_operand:SVE_I 2 "register_operand") -- (match_operand:SVE_I 3 "register_operand")) -- (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")] -- UNSPEC_SEL))] -- "TARGET_SVE" --) -+;; ------------------------------------------------------------------------- -+;; ---- [FP] Tree reductions -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - FADDV -+;; - FMAXNMV -+;; - FMAXV -+;; - FMINNMV -+;; - FMINV -+;; ------------------------------------------------------------------------- - --(define_expand "cond_" -- [(set (match_operand:SVE_SDI 0 "register_operand") -- (unspec:SVE_SDI -- [(match_operand: 1 "register_operand") -- (SVE_INT_BINARY_SD:SVE_SDI -- (match_operand:SVE_SDI 2 "register_operand") -- (match_operand:SVE_SDI 3 "register_operand")) -- (match_operand:SVE_SDI 4 "aarch64_simd_reg_or_zero")] -- UNSPEC_SEL))] -+;; Unpredicated floating-point tree reductions. -+(define_expand "reduc__scal_" -+ [(set (match_operand: 0 "register_operand") -+ (unspec: [(match_dup 2) -+ (match_operand:SVE_FULL_F 1 "register_operand")] -+ SVE_FP_REDUCTION))] - "TARGET_SVE" -+ { -+ operands[2] = aarch64_ptrue_reg (mode); -+ } - ) - --;; Predicated integer operations with select matching the output operand. --(define_insn "*cond__0" -- [(set (match_operand:SVE_I 0 "register_operand" "+w, w, ?&w") -- (unspec:SVE_I -- [(match_operand: 1 "register_operand" "Upl, Upl, Upl") -- (SVE_INT_BINARY:SVE_I -- (match_operand:SVE_I 2 "register_operand" "0, w, w") -- (match_operand:SVE_I 3 "register_operand" "w, 0, w")) -- (match_dup 0)] -- UNSPEC_SEL))] -+;; Predicated floating-point tree reductions. -+(define_insn "@aarch64_pred_reduc__" -+ [(set (match_operand: 0 "register_operand" "=w") -+ (unspec: [(match_operand: 1 "register_operand" "Upl") -+ (match_operand:SVE_FULL_F 2 "register_operand" "w")] -+ SVE_FP_REDUCTION))] - "TARGET_SVE" -- "@ -- \t%0., %1/m, %0., %3. -- \t%0., %1/m, %0., %2. -- movprfx\t%0, %1/m, %2\;\t%0., %1/m, %0., %3." -- [(set_attr "movprfx" "*,*,yes")] -+ "\t%0, %1, %2." - ) - --(define_insn "*cond__0" -- [(set (match_operand:SVE_SDI 0 "register_operand" "+w, w, ?&w") -- (unspec:SVE_SDI -- [(match_operand: 1 "register_operand" "Upl, Upl, Upl") -- (SVE_INT_BINARY_SD:SVE_SDI -- (match_operand:SVE_SDI 2 "register_operand" "0, w, w") -- (match_operand:SVE_SDI 3 "register_operand" "w, 0, w")) -- (match_dup 0)] -- UNSPEC_SEL))] -- "TARGET_SVE" -- "@ -- \t%0., %1/m, %0., %3. -- \t%0., %1/m, %0., %2. -- movprfx\t%0, %1/m, %2\;\t%0., %1/m, %0., %3." -- [(set_attr "movprfx" "*,*,yes")] --) -+;; ------------------------------------------------------------------------- -+;; ---- [FP] Left-to-right reductions -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - FADDA -+;; ------------------------------------------------------------------------- - --;; Predicated integer operations with select matching the first operand. --(define_insn "*cond__2" -- [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") -- (unspec:SVE_I -- [(match_operand: 1 "register_operand" "Upl, Upl") -- (SVE_INT_BINARY:SVE_I -- (match_operand:SVE_I 2 "register_operand" "0, w") -- (match_operand:SVE_I 3 "register_operand" "w, w")) -- (match_dup 2)] -- UNSPEC_SEL))] -+;; Unpredicated in-order FP reductions. -+(define_expand "fold_left_plus_" -+ [(set (match_operand: 0 "register_operand") -+ (unspec: [(match_dup 3) -+ (match_operand: 1 "register_operand") -+ (match_operand:SVE_FULL_F 2 "register_operand")] -+ UNSPEC_FADDA))] - "TARGET_SVE" -- "@ -- \t%0., %1/m, %0., %3. -- movprfx\t%0, %2\;\t%0., %1/m, %0., %3." -- [(set_attr "movprfx" "*,yes")] -+ { -+ operands[3] = aarch64_ptrue_reg (mode); -+ } - ) - --(define_insn "*cond__2" -- [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w") -- (unspec:SVE_SDI -- [(match_operand: 1 "register_operand" "Upl, Upl") -- (SVE_INT_BINARY_SD:SVE_SDI -- (match_operand:SVE_SDI 2 "register_operand" "0, w") -- (match_operand:SVE_SDI 3 "register_operand" "w, w")) -- (match_dup 2)] -- UNSPEC_SEL))] -+;; Predicated in-order FP reductions. -+(define_insn "mask_fold_left_plus_" -+ [(set (match_operand: 0 "register_operand" "=w") -+ (unspec: [(match_operand: 3 "register_operand" "Upl") -+ (match_operand: 1 "register_operand" "0") -+ (match_operand:SVE_FULL_F 2 "register_operand" "w")] -+ UNSPEC_FADDA))] - "TARGET_SVE" -- "@ -- \t%0., %1/m, %0., %3. -- movprfx\t%0, %2\;\t%0., %1/m, %0., %3." -- [(set_attr "movprfx" "*,yes")] -+ "fadda\t%0, %3, %0, %2." - ) - --;; Predicated integer operations with select matching the second operand. --(define_insn "*cond__3" -- [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") -- (unspec:SVE_I -- [(match_operand: 1 "register_operand" "Upl, Upl") -- (SVE_INT_BINARY:SVE_I -- (match_operand:SVE_I 2 "register_operand" "w, w") -- (match_operand:SVE_I 3 "register_operand" "0, w")) -- (match_dup 3)] -- UNSPEC_SEL))] -- "TARGET_SVE" -- "@ -- \t%0., %1/m, %0., %2. -- movprfx\t%0, %3\;\t%0., %1/m, %0., %2." -- [(set_attr "movprfx" "*,yes")] --) -+;; ========================================================================= -+;; == Permutes -+;; ========================================================================= - --(define_insn "*cond__3" -- [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w") -- (unspec:SVE_SDI -- [(match_operand: 1 "register_operand" "Upl, Upl") -- (SVE_INT_BINARY_SD:SVE_SDI -- (match_operand:SVE_SDI 2 "register_operand" "w, w") -- (match_operand:SVE_SDI 3 "register_operand" "0, w")) -- (match_dup 3)] -- UNSPEC_SEL))] -- "TARGET_SVE" -- "@ -- \t%0., %1/m, %0., %2. -- movprfx\t%0, %3\;\t%0., %1/m, %0., %2." -- [(set_attr "movprfx" "*,yes")] --) -+;; ------------------------------------------------------------------------- -+;; ---- [INT,FP] General permutes -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - TBL -+;; ------------------------------------------------------------------------- - --;; Predicated integer operations with select matching zero. --(define_insn "*cond__z" -- [(set (match_operand:SVE_I 0 "register_operand" "=&w") -- (unspec:SVE_I -- [(match_operand: 1 "register_operand" "Upl") -- (SVE_INT_BINARY:SVE_I -- (match_operand:SVE_I 2 "register_operand" "w") -- (match_operand:SVE_I 3 "register_operand" "w")) -- (match_operand:SVE_I 4 "aarch64_simd_imm_zero")] -- UNSPEC_SEL))] -- "TARGET_SVE" -- "movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %0., %3." -- [(set_attr "movprfx" "yes")] -+(define_expand "vec_perm" -+ [(match_operand:SVE_FULL 0 "register_operand") -+ (match_operand:SVE_FULL 1 "register_operand") -+ (match_operand:SVE_FULL 2 "register_operand") -+ (match_operand: 3 "aarch64_sve_vec_perm_operand")] -+ "TARGET_SVE && GET_MODE_NUNITS (mode).is_constant ()" -+ { -+ aarch64_expand_sve_vec_perm (operands[0], operands[1], -+ operands[2], operands[3]); -+ DONE; -+ } - ) - --(define_insn "*cond__z" -- [(set (match_operand:SVE_SDI 0 "register_operand" "=&w") -- (unspec:SVE_SDI -- [(match_operand: 1 "register_operand" "Upl") -- (SVE_INT_BINARY_SD:SVE_SDI -- (match_operand:SVE_SDI 2 "register_operand" "w") -- (match_operand:SVE_SDI 3 "register_operand" "w")) -- (match_operand:SVE_SDI 4 "aarch64_simd_imm_zero")] -- UNSPEC_SEL))] -+(define_insn "@aarch64_sve_tbl" -+ [(set (match_operand:SVE_FULL 0 "register_operand" "=w") -+ (unspec:SVE_FULL -+ [(match_operand:SVE_FULL 1 "register_operand" "w") -+ (match_operand: 2 "register_operand" "w")] -+ UNSPEC_TBL))] - "TARGET_SVE" -- "movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %0., %3." -- [(set_attr "movprfx" "yes")] -+ "tbl\t%0., %1., %2." - ) - --;; Synthetic predications with select unmatched. --(define_insn "*cond__any" -- [(set (match_operand:SVE_I 0 "register_operand" "=&w") -- (unspec:SVE_I -+;; ------------------------------------------------------------------------- -+;; ---- [INT,FP] Special-purpose unary permutes -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - COMPACT -+;; - DUP -+;; - REV -+;; ------------------------------------------------------------------------- -+ -+;; Compact active elements and pad with zeros. -+(define_insn "@aarch64_sve_compact" -+ [(set (match_operand:SVE_FULL_SD 0 "register_operand" "=w") -+ (unspec:SVE_FULL_SD - [(match_operand: 1 "register_operand" "Upl") -- (SVE_INT_BINARY:SVE_I -- (match_operand:SVE_I 2 "register_operand" "w") -- (match_operand:SVE_I 3 "register_operand" "w")) -- (match_operand:SVE_I 4 "register_operand" "w")] -- UNSPEC_SEL))] -+ (match_operand:SVE_FULL_SD 2 "register_operand" "w")] -+ UNSPEC_SVE_COMPACT))] - "TARGET_SVE" -- "#" -+ "compact\t%0., %1, %2." - ) - --(define_insn "*cond__any" -- [(set (match_operand:SVE_SDI 0 "register_operand" "=&w") -- (unspec:SVE_SDI -- [(match_operand: 1 "register_operand" "Upl") -- (SVE_INT_BINARY_SD:SVE_I -- (match_operand:SVE_SDI 2 "register_operand" "w") -- (match_operand:SVE_SDI 3 "register_operand" "w")) -- (match_operand:SVE_SDI 4 "register_operand" "w")] -- UNSPEC_SEL))] -- "TARGET_SVE" -- "#" -+;; Duplicate one element of a vector. -+(define_insn "@aarch64_sve_dup_lane" -+ [(set (match_operand:SVE_FULL 0 "register_operand" "=w") -+ (vec_duplicate:SVE_FULL -+ (vec_select: -+ (match_operand:SVE_FULL 1 "register_operand" "w") -+ (parallel [(match_operand:SI 2 "const_int_operand")]))))] -+ "TARGET_SVE -+ && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (mode), 0, 63)" -+ "dup\t%0., %1.[%2]" - ) - --(define_split -- [(set (match_operand:SVE_I 0 "register_operand") -- (unspec:SVE_I -- [(match_operand: 1 "register_operand") -- (match_operator:SVE_I 5 "aarch64_sve_any_binary_operator" -- [(match_operand:SVE_I 2 "register_operand") -- (match_operand:SVE_I 3 "register_operand")]) -- (match_operand:SVE_I 4 "register_operand")] -- UNSPEC_SEL))] -- "TARGET_SVE && reload_completed -- && !(rtx_equal_p (operands[0], operands[4]) -- || rtx_equal_p (operands[2], operands[4]) -- || rtx_equal_p (operands[3], operands[4]))" -- ; Not matchable by any one insn or movprfx insn. We need a separate select. -- [(set (match_dup 0) -- (unspec:SVE_I [(match_dup 1) (match_dup 2) (match_dup 4)] -- UNSPEC_SEL)) -- (set (match_dup 0) -- (unspec:SVE_I -- [(match_dup 1) -- (match_op_dup 5 [(match_dup 0) (match_dup 3)]) -- (match_dup 0)] -- UNSPEC_SEL))] -+;; Use DUP.Q to duplicate a 128-bit segment of a register. -+;; -+;; The vec_select: sets memory lane number N of the V128 to lane -+;; number op2 + N of op1. (We don't need to distinguish between memory -+;; and architectural register lane numbering for op1 or op0, since the -+;; two numbering schemes are the same for SVE.) -+;; -+;; The vec_duplicate:SVE_FULL then copies memory lane number N of the -+;; V128 (and thus lane number op2 + N of op1) to lane numbers N + I * STEP -+;; of op0. We therefore get the correct result for both endiannesses. -+;; -+;; The wrinkle is that for big-endian V128 registers, memory lane numbering -+;; is in the opposite order to architectural register lane numbering. -+;; Thus if we were to do this operation via a V128 temporary register, -+;; the vec_select and vec_duplicate would both involve a reverse operation -+;; for big-endian targets. In this fused pattern the two reverses cancel -+;; each other out. -+(define_insn "@aarch64_sve_dupq_lane" -+ [(set (match_operand:SVE_FULL 0 "register_operand" "=w") -+ (vec_duplicate:SVE_FULL -+ (vec_select: -+ (match_operand:SVE_FULL 1 "register_operand" "w") -+ (match_operand 2 "ascending_int_parallel"))))] -+ "TARGET_SVE -+ && (INTVAL (XVECEXP (operands[2], 0, 0)) -+ * GET_MODE_SIZE (mode)) % 16 == 0 -+ && IN_RANGE (INTVAL (XVECEXP (operands[2], 0, 0)) -+ * GET_MODE_SIZE (mode), 0, 63)" -+ { -+ unsigned int byte = (INTVAL (XVECEXP (operands[2], 0, 0)) -+ * GET_MODE_SIZE (mode)); -+ operands[2] = gen_int_mode (byte / 16, DImode); -+ return "dup\t%0.q, %1.q[%2]"; -+ } - ) - --;; Set operand 0 to the last active element in operand 3, or to tied --;; operand 1 if no elements are active. --(define_insn "fold_extract_last_" -- [(set (match_operand: 0 "register_operand" "=r, w") -- (unspec: -- [(match_operand: 1 "register_operand" "0, 0") -- (match_operand: 2 "register_operand" "Upl, Upl") -- (match_operand:SVE_ALL 3 "register_operand" "w, w")] -- UNSPEC_CLASTB))] -+;; Reverse the order of elements within a full vector. -+(define_insn "@aarch64_sve_rev" -+ [(set (match_operand:SVE_FULL 0 "register_operand" "=w") -+ (unspec:SVE_FULL -+ [(match_operand:SVE_FULL 1 "register_operand" "w")] -+ UNSPEC_REV))] - "TARGET_SVE" -- "@ -- clastb\t%0, %2, %0, %3. -- clastb\t%0, %2, %0, %3." --) -+ "rev\t%0., %1.") - --;; Unpredicated integer add reduction. --(define_expand "reduc_plus_scal_" -- [(set (match_operand: 0 "register_operand") -- (unspec: [(match_dup 2) -- (match_operand:SVE_I 1 "register_operand")] -- UNSPEC_ADDV))] -+;; ------------------------------------------------------------------------- -+;; ---- [INT,FP] Special-purpose binary permutes -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - SPLICE -+;; - TRN1 -+;; - TRN2 -+;; - UZP1 -+;; - UZP2 -+;; - ZIP1 -+;; - ZIP2 -+;; ------------------------------------------------------------------------- -+ -+;; Like EXT, but start at the first active element. -+(define_insn "@aarch64_sve_splice" -+ [(set (match_operand:SVE_FULL 0 "register_operand" "=w, ?&w") -+ (unspec:SVE_FULL -+ [(match_operand: 1 "register_operand" "Upl, Upl") -+ (match_operand:SVE_FULL 2 "register_operand" "0, w") -+ (match_operand:SVE_FULL 3 "register_operand" "w, w")] -+ UNSPEC_SVE_SPLICE))] - "TARGET_SVE" -+ "@ -+ splice\t%0., %1, %0., %3. -+ movprfx\t%0, %2\;splice\t%0., %1, %0., %3." -+ [(set_attr "movprfx" "*, yes")] -+) -+ -+;; Permutes that take half the elements from one vector and half the -+;; elements from the other. -+(define_insn "@aarch64_sve_" -+ [(set (match_operand:SVE_FULL 0 "register_operand" "=w") -+ (unspec:SVE_FULL -+ [(match_operand:SVE_FULL 1 "register_operand" "w") -+ (match_operand:SVE_FULL 2 "register_operand" "w")] -+ PERMUTE))] -+ "TARGET_SVE" -+ "\t%0., %1., %2." -+) -+ -+;; Apply PERMUTE to 128-bit sequences. The behavior of these patterns -+;; doesn't depend on the mode. -+(define_insn "@aarch64_sve_" -+ [(set (match_operand:SVE_FULL 0 "register_operand" "=w") -+ (unspec:SVE_FULL -+ [(match_operand:SVE_FULL 1 "register_operand" "w") -+ (match_operand:SVE_FULL 2 "register_operand" "w")] -+ PERMUTEQ))] -+ "TARGET_SVE_F64MM" -+ "\t%0.q, %1.q, %2.q" -+) -+ -+;; Concatenate two vectors and extract a subvector. Note that the -+;; immediate (third) operand is the lane index not the byte index. -+(define_insn "@aarch64_sve_ext" -+ [(set (match_operand:SVE_FULL 0 "register_operand" "=w, ?&w") -+ (unspec:SVE_FULL -+ [(match_operand:SVE_FULL 1 "register_operand" "0, w") -+ (match_operand:SVE_FULL 2 "register_operand" "w, w") -+ (match_operand:SI 3 "const_int_operand")] -+ UNSPEC_EXT))] -+ "TARGET_SVE -+ && IN_RANGE (INTVAL (operands[3]) * GET_MODE_SIZE (mode), 0, 255)" - { -- operands[2] = force_reg (mode, CONSTM1_RTX (mode)); -+ operands[3] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (mode)); -+ return (which_alternative == 0 -+ ? "ext\\t%0.b, %0.b, %2.b, #%3" -+ : "movprfx\t%0, %1\;ext\\t%0.b, %0.b, %2.b, #%3"); - } -+ [(set_attr "movprfx" "*,yes")] - ) - --;; Predicated integer add reduction. The result is always 64-bits. --(define_insn "*reduc_plus_scal_" -- [(set (match_operand: 0 "register_operand" "=w") -- (unspec: [(match_operand: 1 "register_operand" "Upl") -- (match_operand:SVE_I 2 "register_operand" "w")] -- UNSPEC_ADDV))] -+;; ------------------------------------------------------------------------- -+;; ---- [PRED] Special-purpose unary permutes -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - REV -+;; ------------------------------------------------------------------------- -+ -+(define_insn "@aarch64_sve_rev" -+ [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") -+ (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")] -+ UNSPEC_REV))] - "TARGET_SVE" -- "uaddv\t%d0, %1, %2." --) -+ "rev\t%0., %1.") - --;; Unpredicated floating-point add reduction. --(define_expand "reduc_plus_scal_" -- [(set (match_operand: 0 "register_operand") -- (unspec: [(match_dup 2) -- (match_operand:SVE_F 1 "register_operand")] -- UNSPEC_FADDV))] -+;; ------------------------------------------------------------------------- -+;; ---- [PRED] Special-purpose binary permutes -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - TRN1 -+;; - TRN2 -+;; - UZP1 -+;; - UZP2 -+;; - ZIP1 -+;; - ZIP2 -+;; ------------------------------------------------------------------------- -+ -+;; Permutes that take half the elements from one vector and half the -+;; elements from the other. -+(define_insn "@aarch64_sve_" -+ [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") -+ (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa") -+ (match_operand:PRED_ALL 2 "register_operand" "Upa")] -+ PERMUTE))] - "TARGET_SVE" -- { -- operands[2] = force_reg (mode, CONSTM1_RTX (mode)); -- } -+ "\t%0., %1., %2." - ) - --;; Predicated floating-point add reduction. --(define_insn "*reduc_plus_scal_" -- [(set (match_operand: 0 "register_operand" "=w") -- (unspec: [(match_operand: 1 "register_operand" "Upl") -- (match_operand:SVE_F 2 "register_operand" "w")] -- UNSPEC_FADDV))] -+;; ========================================================================= -+;; == Conversions -+;; ========================================================================= -+ -+;; ------------------------------------------------------------------------- -+;; ---- [INT<-INT] Packs -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - UZP1 -+;; ------------------------------------------------------------------------- -+ -+;; Integer pack. Use UZP1 on the narrower type, which discards -+;; the high part of each wide element. -+(define_insn "vec_pack_trunc_" -+ [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w") -+ (unspec:SVE_FULL_BHSI -+ [(match_operand: 1 "register_operand" "w") -+ (match_operand: 2 "register_operand" "w")] -+ UNSPEC_PACK))] - "TARGET_SVE" -- "faddv\t%0, %1, %2." -+ "uzp1\t%0., %1., %2." - ) - --;; Unpredicated integer MIN/MAX reduction. --(define_expand "reduc__scal_" -- [(set (match_operand: 0 "register_operand") -- (unspec: [(match_dup 2) -- (match_operand:SVE_I 1 "register_operand")] -- MAXMINV))] -+;; ------------------------------------------------------------------------- -+;; ---- [INT<-INT] Unpacks -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - SUNPKHI -+;; - SUNPKLO -+;; - UUNPKHI -+;; - UUNPKLO -+;; ------------------------------------------------------------------------- -+ -+;; Unpack the low or high half of a vector, where "high" refers to -+;; the low-numbered lanes for big-endian and the high-numbered lanes -+;; for little-endian. -+(define_expand "vec_unpack__" -+ [(match_operand: 0 "register_operand") -+ (unspec: -+ [(match_operand:SVE_FULL_BHSI 1 "register_operand")] UNPACK)] - "TARGET_SVE" - { -- operands[2] = force_reg (mode, CONSTM1_RTX (mode)); -+ emit_insn (( -+ ? gen_aarch64_sve_unpkhi_ -+ : gen_aarch64_sve_unpklo_) -+ (operands[0], operands[1])); -+ DONE; - } - ) - --;; Predicated integer MIN/MAX reduction. --(define_insn "*reduc__scal_" -- [(set (match_operand: 0 "register_operand" "=w") -- (unspec: [(match_operand: 1 "register_operand" "Upl") -- (match_operand:SVE_I 2 "register_operand" "w")] -- MAXMINV))] -+(define_insn "@aarch64_sve_unpk_" -+ [(set (match_operand: 0 "register_operand" "=w") -+ (unspec: -+ [(match_operand:SVE_FULL_BHSI 1 "register_operand" "w")] -+ UNPACK))] - "TARGET_SVE" -- "v\t%0, %1, %2." -+ "unpk\t%0., %1." - ) - --;; Unpredicated floating-point MIN/MAX reduction. --(define_expand "reduc__scal_" -- [(set (match_operand: 0 "register_operand") -- (unspec: [(match_dup 2) -- (match_operand:SVE_F 1 "register_operand")] -- FMAXMINV))] -+;; ------------------------------------------------------------------------- -+;; ---- [INT<-FP] Conversions -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - FCVTZS -+;; - FCVTZU -+;; ------------------------------------------------------------------------- -+ -+;; Unpredicated conversion of floats to integers of the same size (HF to HI, -+;; SF to SI or DF to DI). -+(define_expand "2" -+ [(set (match_operand: 0 "register_operand") -+ (unspec: -+ [(match_dup 2) -+ (const_int SVE_RELAXED_GP) -+ (match_operand:SVE_FULL_F 1 "register_operand")] -+ SVE_COND_FCVTI))] - "TARGET_SVE" - { -- operands[2] = force_reg (mode, CONSTM1_RTX (mode)); -+ operands[2] = aarch64_ptrue_reg (mode); - } - ) - --;; Predicated floating-point MIN/MAX reduction. --(define_insn "*reduc__scal_" -- [(set (match_operand: 0 "register_operand" "=w") -- (unspec: [(match_operand: 1 "register_operand" "Upl") -- (match_operand:SVE_F 2 "register_operand" "w")] -- FMAXMINV))] -- "TARGET_SVE" -- "v\t%0, %1, %2." -+;; Predicated float-to-integer conversion, either to the same width or wider. -+(define_insn "@aarch64_sve__nontrunc" -+ [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w") -+ (unspec:SVE_FULL_HSDI -+ [(match_operand: 1 "register_operand" "Upl") -+ (match_operand:SI 3 "aarch64_sve_gp_strictness") -+ (match_operand:SVE_FULL_F 2 "register_operand" "w")] -+ SVE_COND_FCVTI))] -+ "TARGET_SVE && >= " -+ "fcvtz\t%0., %1/m, %2." - ) - --(define_expand "reduc__scal_" -- [(set (match_operand: 0 "register_operand") -- (unspec: [(match_dup 2) -- (match_operand:SVE_I 1 "register_operand")] -- BITWISEV))] -- "TARGET_SVE" -+;; Predicated narrowing float-to-integer conversion. -+(define_insn "@aarch64_sve__trunc" -+ [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w") -+ (unspec:VNx4SI_ONLY -+ [(match_operand:VNx2BI 1 "register_operand" "Upl") -+ (match_operand:SI 3 "aarch64_sve_gp_strictness") -+ (match_operand:VNx2DF_ONLY 2 "register_operand" "w")] -+ SVE_COND_FCVTI))] -+ "TARGET_SVE" -+ "fcvtz\t%0., %1/m, %2." -+) -+ -+;; Predicated float-to-integer conversion with merging, either to the same -+;; width or wider. -+(define_expand "@cond__nontrunc" -+ [(set (match_operand:SVE_FULL_HSDI 0 "register_operand") -+ (unspec:SVE_FULL_HSDI -+ [(match_operand: 1 "register_operand") -+ (unspec:SVE_FULL_HSDI -+ [(match_dup 1) -+ (const_int SVE_STRICT_GP) -+ (match_operand:SVE_FULL_F 2 "register_operand")] -+ SVE_COND_FCVTI) -+ (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero")] -+ UNSPEC_SEL))] -+ "TARGET_SVE && >= " -+) -+ -+;; The first alternative doesn't need the earlyclobber, but the only case -+;; it would help is the uninteresting one in which operands 2 and 3 are -+;; the same register (despite having different modes). Making all the -+;; alternatives earlyclobber makes things more consistent for the -+;; register allocator. -+(define_insn_and_rewrite "*cond__nontrunc" -+ [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=&w, &w, ?&w") -+ (unspec:SVE_FULL_HSDI -+ [(match_operand: 1 "register_operand" "Upl, Upl, Upl") -+ (unspec:SVE_FULL_HSDI -+ [(match_operand 4) -+ (match_operand:SI 5 "aarch64_sve_gp_strictness") -+ (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")] -+ SVE_COND_FCVTI) -+ (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] -+ UNSPEC_SEL))] -+ "TARGET_SVE -+ && >= -+ && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" -+ "@ -+ fcvtz\t%0., %1/m, %2. -+ movprfx\t%0., %1/z, %2.\;fcvtz\t%0., %1/m, %2. -+ movprfx\t%0, %3\;fcvtz\t%0., %1/m, %2." -+ "&& !rtx_equal_p (operands[1], operands[4])" - { -- operands[2] = force_reg (mode, CONSTM1_RTX (mode)); -+ operands[4] = copy_rtx (operands[1]); - } -+ [(set_attr "movprfx" "*,yes,yes")] -+) -+ -+;; Predicated narrowing float-to-integer conversion with merging. -+(define_expand "@cond__trunc" -+ [(set (match_operand:VNx4SI_ONLY 0 "register_operand") -+ (unspec:VNx4SI_ONLY -+ [(match_operand:VNx2BI 1 "register_operand") -+ (unspec:VNx4SI_ONLY -+ [(match_dup 1) -+ (const_int SVE_STRICT_GP) -+ (match_operand:VNx2DF_ONLY 2 "register_operand")] -+ SVE_COND_FCVTI) -+ (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero")] -+ UNSPEC_SEL))] -+ "TARGET_SVE" - ) - --(define_insn "*reduc__scal_" -- [(set (match_operand: 0 "register_operand" "=w") -- (unspec: [(match_operand: 1 "register_operand" "Upl") -- (match_operand:SVE_I 2 "register_operand" "w")] -- BITWISEV))] -+(define_insn "*cond__trunc" -+ [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=&w, &w, ?&w") -+ (unspec:VNx4SI_ONLY -+ [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl, Upl") -+ (unspec:VNx4SI_ONLY -+ [(match_dup 1) -+ (match_operand:SI 4 "aarch64_sve_gp_strictness") -+ (match_operand:VNx2DF_ONLY 2 "register_operand" "w, w, w")] -+ SVE_COND_FCVTI) -+ (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] -+ UNSPEC_SEL))] - "TARGET_SVE" -- "\t%0, %1, %2." -+ "@ -+ fcvtz\t%0., %1/m, %2. -+ movprfx\t%0., %1/z, %2.\;fcvtz\t%0., %1/m, %2. -+ movprfx\t%0, %3\;fcvtz\t%0., %1/m, %2." -+ [(set_attr "movprfx" "*,yes,yes")] - ) - --;; Unpredicated in-order FP reductions. --(define_expand "fold_left_plus_" -- [(set (match_operand: 0 "register_operand") -- (unspec: [(match_dup 3) -- (match_operand: 1 "register_operand") -- (match_operand:SVE_F 2 "register_operand")] -- UNSPEC_FADDA))] -+;; ------------------------------------------------------------------------- -+;; ---- [INT<-FP] Packs -+;; ------------------------------------------------------------------------- -+;; The patterns in this section are synthetic. -+;; ------------------------------------------------------------------------- -+ -+;; Convert two vectors of DF to SI and pack the results into a single vector. -+(define_expand "vec_pack_fix_trunc_vnx2df" -+ [(set (match_dup 4) -+ (unspec:VNx4SI -+ [(match_dup 3) -+ (const_int SVE_RELAXED_GP) -+ (match_operand:VNx2DF 1 "register_operand")] -+ SVE_COND_FCVTI)) -+ (set (match_dup 5) -+ (unspec:VNx4SI -+ [(match_dup 3) -+ (const_int SVE_RELAXED_GP) -+ (match_operand:VNx2DF 2 "register_operand")] -+ SVE_COND_FCVTI)) -+ (set (match_operand:VNx4SI 0 "register_operand") -+ (unspec:VNx4SI [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))] - "TARGET_SVE" - { -- operands[3] = force_reg (mode, CONSTM1_RTX (mode)); -+ operands[3] = aarch64_ptrue_reg (VNx2BImode); -+ operands[4] = gen_reg_rtx (VNx4SImode); -+ operands[5] = gen_reg_rtx (VNx4SImode); - } - ) - --;; In-order FP reductions predicated with PTRUE. --(define_insn "*fold_left_plus_" -- [(set (match_operand: 0 "register_operand" "=w") -- (unspec: [(match_operand: 1 "register_operand" "Upl") -- (match_operand: 2 "register_operand" "0") -- (match_operand:SVE_F 3 "register_operand" "w")] -- UNSPEC_FADDA))] -- "TARGET_SVE" -- "fadda\t%0, %1, %0, %3." --) -+;; ------------------------------------------------------------------------- -+;; ---- [INT<-FP] Unpacks -+;; ------------------------------------------------------------------------- -+;; No patterns here yet! -+;; ------------------------------------------------------------------------- - --;; Predicated form of the above in-order reduction. --(define_insn "*pred_fold_left_plus_" -- [(set (match_operand: 0 "register_operand" "=w") -- (unspec: -- [(match_operand: 1 "register_operand" "0") -- (unspec:SVE_F -- [(match_operand: 2 "register_operand" "Upl") -- (match_operand:SVE_F 3 "register_operand" "w") -- (match_operand:SVE_F 4 "aarch64_simd_imm_zero")] -- UNSPEC_SEL)] -- UNSPEC_FADDA))] -- "TARGET_SVE" -- "fadda\t%0, %2, %0, %3." --) -+;; ------------------------------------------------------------------------- -+;; ---- [FP<-INT] Conversions -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - SCVTF -+;; - UCVTF -+;; ------------------------------------------------------------------------- - --;; Unpredicated floating-point addition. --(define_expand "add3" -- [(set (match_operand:SVE_F 0 "register_operand") -- (unspec:SVE_F -- [(match_dup 3) -- (plus:SVE_F -- (match_operand:SVE_F 1 "register_operand") -- (match_operand:SVE_F 2 "aarch64_sve_float_arith_with_sub_operand"))] -- UNSPEC_MERGE_PTRUE))] -+;; Unpredicated conversion of integers to floats of the same size -+;; (HI to HF, SI to SF or DI to DF). -+(define_expand "2" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand") -+ (unspec:SVE_FULL_F -+ [(match_dup 2) -+ (const_int SVE_RELAXED_GP) -+ (match_operand: 1 "register_operand")] -+ SVE_COND_ICVTF))] - "TARGET_SVE" - { -- operands[3] = force_reg (mode, CONSTM1_RTX (mode)); -+ operands[2] = aarch64_ptrue_reg (mode); - } - ) - --;; Floating-point addition predicated with a PTRUE. --(define_insn_and_split "*add3" -- [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w") -- (unspec:SVE_F -- [(match_operand: 1 "register_operand" "Upl, Upl, Upl") -- (plus:SVE_F -- (match_operand:SVE_F 2 "register_operand" "%0, 0, w") -- (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, w"))] -- UNSPEC_MERGE_PTRUE))] -- "TARGET_SVE" -- "@ -- fadd\t%0., %1/m, %0., #%3 -- fsub\t%0., %1/m, %0., #%N3 -- #" -- ; Split the unpredicated form after reload, so that we don't have -- ; the unnecessary PTRUE. -- "&& reload_completed -- && register_operand (operands[3], mode)" -- [(set (match_dup 0) (plus:SVE_F (match_dup 2) (match_dup 3)))] -+;; Predicated integer-to-float conversion, either to the same width or -+;; narrower. -+(define_insn "@aarch64_sve__nonextend" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w") -+ (unspec:SVE_FULL_F -+ [(match_operand: 1 "register_operand" "Upl") -+ (match_operand:SI 3 "aarch64_sve_gp_strictness") -+ (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")] -+ SVE_COND_ICVTF))] -+ "TARGET_SVE && >= " -+ "cvtf\t%0., %1/m, %2." - ) - --;; Unpredicated floating-point subtraction. --(define_expand "sub3" -- [(set (match_operand:SVE_F 0 "register_operand") -- (unspec:SVE_F -- [(match_dup 3) -- (minus:SVE_F -- (match_operand:SVE_F 1 "aarch64_sve_float_arith_operand") -- (match_operand:SVE_F 2 "register_operand"))] -- UNSPEC_MERGE_PTRUE))] -- "TARGET_SVE" -- { -- operands[3] = force_reg (mode, CONSTM1_RTX (mode)); -- } -+;; Predicated widening integer-to-float conversion. -+(define_insn "@aarch64_sve__extend" -+ [(set (match_operand:VNx2DF_ONLY 0 "register_operand" "=w") -+ (unspec:VNx2DF_ONLY -+ [(match_operand:VNx2BI 1 "register_operand" "Upl") -+ (match_operand:SI 3 "aarch64_sve_gp_strictness") -+ (match_operand:VNx4SI_ONLY 2 "register_operand" "w")] -+ SVE_COND_ICVTF))] -+ "TARGET_SVE" -+ "cvtf\t%0., %1/m, %2." -+) -+ -+;; Predicated integer-to-float conversion with merging, either to the same -+;; width or narrower. -+(define_expand "@cond__nonextend" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand") -+ (unspec:SVE_FULL_F -+ [(match_operand: 1 "register_operand") -+ (unspec:SVE_FULL_F -+ [(match_dup 1) -+ (const_int SVE_STRICT_GP) -+ (match_operand:SVE_FULL_HSDI 2 "register_operand")] -+ SVE_COND_ICVTF) -+ (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")] -+ UNSPEC_SEL))] -+ "TARGET_SVE && >= " - ) - --;; Floating-point subtraction predicated with a PTRUE. --(define_insn_and_split "*sub3" -- [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w, w") -- (unspec:SVE_F -- [(match_operand: 1 "register_operand" "Upl, Upl, Upl, Upl") -- (minus:SVE_F -- (match_operand:SVE_F 2 "aarch64_sve_float_arith_operand" "0, 0, vsA, w") -- (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, 0, w"))] -- UNSPEC_MERGE_PTRUE))] -+;; The first alternative doesn't need the earlyclobber, but the only case -+;; it would help is the uninteresting one in which operands 2 and 3 are -+;; the same register (despite having different modes). Making all the -+;; alternatives earlyclobber makes things more consistent for the -+;; register allocator. -+(define_insn_and_rewrite "*cond__nonextend" -+ [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, ?&w") -+ (unspec:SVE_FULL_F -+ [(match_operand: 1 "register_operand" "Upl, Upl, Upl") -+ (unspec:SVE_FULL_F -+ [(match_operand 4) -+ (match_operand:SI 5 "aarch64_sve_gp_strictness") -+ (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w")] -+ SVE_COND_ICVTF) -+ (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] -+ UNSPEC_SEL))] - "TARGET_SVE -- && (register_operand (operands[2], mode) -- || register_operand (operands[3], mode))" -+ && >= -+ && aarch64_sve_pred_dominates_p (&operands[4], operands[1])" - "@ -- fsub\t%0., %1/m, %0., #%3 -- fadd\t%0., %1/m, %0., #%N3 -- fsubr\t%0., %1/m, %0., #%2 -- #" -- ; Split the unpredicated form after reload, so that we don't have -- ; the unnecessary PTRUE. -- "&& reload_completed -- && register_operand (operands[2], mode) -- && register_operand (operands[3], mode)" -- [(set (match_dup 0) (minus:SVE_F (match_dup 2) (match_dup 3)))] --) -- --;; Unpredicated floating-point multiplication. --(define_expand "mul3" -- [(set (match_operand:SVE_F 0 "register_operand") -- (unspec:SVE_F -- [(match_dup 3) -- (mult:SVE_F -- (match_operand:SVE_F 1 "register_operand") -- (match_operand:SVE_F 2 "aarch64_sve_float_mul_operand"))] -- UNSPEC_MERGE_PTRUE))] -- "TARGET_SVE" -+ cvtf\t%0., %1/m, %2. -+ movprfx\t%0., %1/z, %2.\;cvtf\t%0., %1/m, %2. -+ movprfx\t%0, %3\;cvtf\t%0., %1/m, %2." -+ "&& !rtx_equal_p (operands[1], operands[4])" - { -- operands[3] = force_reg (mode, CONSTM1_RTX (mode)); -+ operands[4] = copy_rtx (operands[1]); - } -+ [(set_attr "movprfx" "*,yes,yes")] -+) -+ -+;; Predicated widening integer-to-float conversion with merging. -+(define_expand "@cond__extend" -+ [(set (match_operand:VNx2DF_ONLY 0 "register_operand") -+ (unspec:VNx2DF_ONLY -+ [(match_operand:VNx2BI 1 "register_operand") -+ (unspec:VNx2DF_ONLY -+ [(match_dup 1) -+ (const_int SVE_STRICT_GP) -+ (match_operand:VNx4SI_ONLY 2 "register_operand")] -+ SVE_COND_ICVTF) -+ (match_operand:VNx2DF_ONLY 3 "aarch64_simd_reg_or_zero")] -+ UNSPEC_SEL))] -+ "TARGET_SVE" - ) - --;; Floating-point multiplication predicated with a PTRUE. --(define_insn_and_split "*mul3" -- [(set (match_operand:SVE_F 0 "register_operand" "=w, w") -- (unspec:SVE_F -- [(match_operand: 1 "register_operand" "Upl, Upl") -- (mult:SVE_F -- (match_operand:SVE_F 2 "register_operand" "%0, w") -- (match_operand:SVE_F 3 "aarch64_sve_float_mul_operand" "vsM, w"))] -- UNSPEC_MERGE_PTRUE))] -+(define_insn "*cond__extend" -+ [(set (match_operand:VNx2DF_ONLY 0 "register_operand" "=w, ?&w, ?&w") -+ (unspec:VNx2DF_ONLY -+ [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl, Upl") -+ (unspec:VNx2DF_ONLY -+ [(match_dup 1) -+ (match_operand:SI 4 "aarch64_sve_gp_strictness") -+ (match_operand:VNx4SI_ONLY 2 "register_operand" "w, w, w")] -+ SVE_COND_ICVTF) -+ (match_operand:VNx2DF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] -+ UNSPEC_SEL))] - "TARGET_SVE" - "@ -- fmul\t%0., %1/m, %0., #%3 -- #" -- ; Split the unpredicated form after reload, so that we don't have -- ; the unnecessary PTRUE. -- "&& reload_completed -- && register_operand (operands[3], mode)" -- [(set (match_dup 0) (mult:SVE_F (match_dup 2) (match_dup 3)))] -+ cvtf\t%0., %1/m, %2. -+ movprfx\t%0., %1/z, %2.\;cvtf\t%0., %1/m, %2. -+ movprfx\t%0, %3\;cvtf\t%0., %1/m, %2." -+ [(set_attr "movprfx" "*,yes,yes")] - ) - --;; Unpredicated floating-point binary operations (post-RA only). --;; These are generated by splitting a predicated instruction whose --;; predicate is unused. --(define_insn "*post_ra_3" -- [(set (match_operand:SVE_F 0 "register_operand" "=w") -- (SVE_UNPRED_FP_BINARY:SVE_F -- (match_operand:SVE_F 1 "register_operand" "w") -- (match_operand:SVE_F 2 "register_operand" "w")))] -- "TARGET_SVE && reload_completed" -- "\t%0., %1., %2.") -+;; ------------------------------------------------------------------------- -+;; ---- [FP<-INT] Packs -+;; ------------------------------------------------------------------------- -+;; No patterns here yet! -+;; ------------------------------------------------------------------------- - --;; Unpredicated fma (%0 = (%1 * %2) + %3). --(define_expand "fma4" -- [(set (match_operand:SVE_F 0 "register_operand") -- (unspec:SVE_F -- [(match_dup 4) -- (fma:SVE_F (match_operand:SVE_F 1 "register_operand") -- (match_operand:SVE_F 2 "register_operand") -- (match_operand:SVE_F 3 "register_operand"))] -- UNSPEC_MERGE_PTRUE))] -+;; ------------------------------------------------------------------------- -+;; ---- [FP<-INT] Unpacks -+;; ------------------------------------------------------------------------- -+;; The patterns in this section are synthetic. -+;; ------------------------------------------------------------------------- -+ -+;; Unpack one half of a VNx4SI to VNx2DF. First unpack from VNx4SI -+;; to VNx2DI, reinterpret the VNx2DI as a VNx4SI, then convert the -+;; unpacked VNx4SI to VNx2DF. -+(define_expand "vec_unpack_float__vnx4si" -+ [(match_operand:VNx2DF 0 "register_operand") -+ (FLOATUORS:VNx2DF -+ (unspec:VNx2DI [(match_operand:VNx4SI 1 "register_operand")] -+ UNPACK_UNSIGNED))] - "TARGET_SVE" - { -- operands[4] = force_reg (mode, CONSTM1_RTX (mode)); -+ /* Use ZIP to do the unpack, since we don't care about the upper halves -+ and since it has the nice property of not needing any subregs. -+ If using UUNPK* turns out to be preferable, we could model it as -+ a ZIP whose first operand is zero. */ -+ rtx temp = gen_reg_rtx (VNx4SImode); -+ emit_insn (( -+ ? gen_aarch64_sve_zip2vnx4si -+ : gen_aarch64_sve_zip1vnx4si) -+ (temp, operands[1], operands[1])); -+ rtx ptrue = aarch64_ptrue_reg (VNx2BImode); -+ rtx strictness = gen_int_mode (SVE_RELAXED_GP, SImode); -+ emit_insn (gen_aarch64_sve__extendvnx4sivnx2df -+ (operands[0], ptrue, temp, strictness)); -+ DONE; - } - ) - --;; fma predicated with a PTRUE. --(define_insn "*fma4" -- [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w") -- (unspec:SVE_F -- [(match_operand: 1 "register_operand" "Upl, Upl, Upl") -- (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w, w") -- (match_operand:SVE_F 4 "register_operand" "w, w, w") -- (match_operand:SVE_F 2 "register_operand" "w, 0, w"))] -- UNSPEC_MERGE_PTRUE))] -- "TARGET_SVE" -- "@ -- fmad\t%0., %1/m, %4., %2. -- fmla\t%0., %1/m, %3., %4. -- movprfx\t%0, %2\;fmla\t%0., %1/m, %3., %4." -- [(set_attr "movprfx" "*,*,yes")] --) -+;; ------------------------------------------------------------------------- -+;; ---- [FP<-FP] Packs -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - FCVT -+;; ------------------------------------------------------------------------- - --;; Unpredicated fnma (%0 = (-%1 * %2) + %3). --(define_expand "fnma4" -- [(set (match_operand:SVE_F 0 "register_operand") -- (unspec:SVE_F -- [(match_dup 4) -- (fma:SVE_F (neg:SVE_F -- (match_operand:SVE_F 1 "register_operand")) -- (match_operand:SVE_F 2 "register_operand") -- (match_operand:SVE_F 3 "register_operand"))] -- UNSPEC_MERGE_PTRUE))] -+;; Convert two vectors of DF to SF, or two vectors of SF to HF, and pack -+;; the results into a single vector. -+(define_expand "vec_pack_trunc_" -+ [(set (match_dup 4) -+ (unspec:SVE_FULL_HSF -+ [(match_dup 3) -+ (const_int SVE_RELAXED_GP) -+ (match_operand: 1 "register_operand")] -+ UNSPEC_COND_FCVT)) -+ (set (match_dup 5) -+ (unspec:SVE_FULL_HSF -+ [(match_dup 3) -+ (const_int SVE_RELAXED_GP) -+ (match_operand: 2 "register_operand")] -+ UNSPEC_COND_FCVT)) -+ (set (match_operand:SVE_FULL_HSF 0 "register_operand") -+ (unspec:SVE_FULL_HSF [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))] - "TARGET_SVE" - { -- operands[4] = force_reg (mode, CONSTM1_RTX (mode)); -+ operands[3] = aarch64_ptrue_reg (mode); -+ operands[4] = gen_reg_rtx (mode); -+ operands[5] = gen_reg_rtx (mode); - } - ) - --;; fnma predicated with a PTRUE. --(define_insn "*fnma4" -- [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w") -- (unspec:SVE_F -- [(match_operand: 1 "register_operand" "Upl, Upl, Upl") -- (fma:SVE_F (neg:SVE_F -- (match_operand:SVE_F 3 "register_operand" "%0, w, w")) -- (match_operand:SVE_F 4 "register_operand" "w, w, w") -- (match_operand:SVE_F 2 "register_operand" "w, 0, w"))] -- UNSPEC_MERGE_PTRUE))] -- "TARGET_SVE" -+;; Predicated float-to-float truncation. -+(define_insn "@aarch64_sve__trunc" -+ [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w") -+ (unspec:SVE_FULL_HSF -+ [(match_operand: 1 "register_operand" "Upl") -+ (match_operand:SI 3 "aarch64_sve_gp_strictness") -+ (match_operand:SVE_FULL_SDF 2 "register_operand" "w")] -+ SVE_COND_FCVT))] -+ "TARGET_SVE && > " -+ "fcvt\t%0., %1/m, %2." -+) -+ -+;; Predicated float-to-float truncation with merging. -+(define_expand "@cond__trunc" -+ [(set (match_operand:SVE_FULL_HSF 0 "register_operand") -+ (unspec:SVE_FULL_HSF -+ [(match_operand: 1 "register_operand") -+ (unspec:SVE_FULL_HSF -+ [(match_dup 1) -+ (const_int SVE_STRICT_GP) -+ (match_operand:SVE_FULL_SDF 2 "register_operand")] -+ SVE_COND_FCVT) -+ (match_operand:SVE_FULL_HSF 3 "aarch64_simd_reg_or_zero")] -+ UNSPEC_SEL))] -+ "TARGET_SVE && > " -+) -+ -+(define_insn "*cond__trunc" -+ [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w, ?&w, ?&w") -+ (unspec:SVE_FULL_HSF -+ [(match_operand: 1 "register_operand" "Upl, Upl, Upl") -+ (unspec:SVE_FULL_HSF -+ [(match_dup 1) -+ (match_operand:SI 4 "aarch64_sve_gp_strictness") -+ (match_operand:SVE_FULL_SDF 2 "register_operand" "w, w, w")] -+ SVE_COND_FCVT) -+ (match_operand:SVE_FULL_HSF 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] -+ UNSPEC_SEL))] -+ "TARGET_SVE && > " - "@ -- fmsb\t%0., %1/m, %4., %2. -- fmls\t%0., %1/m, %3., %4. -- movprfx\t%0, %2\;fmls\t%0., %1/m, %3., %4." -- [(set_attr "movprfx" "*,*,yes")] -+ fcvt\t%0., %1/m, %2. -+ movprfx\t%0., %1/z, %2.\;fcvt\t%0., %1/m, %2. -+ movprfx\t%0, %3\;fcvt\t%0., %1/m, %2." -+ [(set_attr "movprfx" "*,yes,yes")] -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- [FP<-FP] Packs (bfloat16) -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - BFCVT (BF16) -+;; - BFCVTNT (BF16) -+;; ------------------------------------------------------------------------- -+ -+;; Predicated BFCVT. -+(define_insn "@aarch64_sve__trunc" -+ [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w") -+ (unspec:VNx8BF_ONLY -+ [(match_operand:VNx4BI 1 "register_operand" "Upl") -+ (match_operand:SI 3 "aarch64_sve_gp_strictness") -+ (match_operand:VNx4SF_ONLY 2 "register_operand" "w")] -+ SVE_COND_FCVT))] -+ "TARGET_SVE_BF16" -+ "bfcvt\t%0.h, %1/m, %2.s" -+) -+ -+;; Predicated BFCVT with merging. -+(define_expand "@cond__trunc" -+ [(set (match_operand:VNx8BF_ONLY 0 "register_operand") -+ (unspec:VNx8BF_ONLY -+ [(match_operand:VNx4BI 1 "register_operand") -+ (unspec:VNx8BF_ONLY -+ [(match_dup 1) -+ (const_int SVE_STRICT_GP) -+ (match_operand:VNx4SF_ONLY 2 "register_operand")] -+ SVE_COND_FCVT) -+ (match_operand:VNx8BF_ONLY 3 "aarch64_simd_reg_or_zero")] -+ UNSPEC_SEL))] -+ "TARGET_SVE_BF16" -+) -+ -+(define_insn "*cond__trunc" -+ [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w, ?&w, ?&w") -+ (unspec:VNx8BF_ONLY -+ [(match_operand:VNx4BI 1 "register_operand" "Upl, Upl, Upl") -+ (unspec:VNx8BF_ONLY -+ [(match_dup 1) -+ (match_operand:SI 4 "aarch64_sve_gp_strictness") -+ (match_operand:VNx4SF_ONLY 2 "register_operand" "w, w, w")] -+ SVE_COND_FCVT) -+ (match_operand:VNx8BF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] -+ UNSPEC_SEL))] -+ "TARGET_SVE_BF16" -+ "@ -+ bfcvt\t%0.h, %1/m, %2.s -+ movprfx\t%0.s, %1/z, %2.s\;bfcvt\t%0.h, %1/m, %2.s -+ movprfx\t%0, %3\;bfcvt\t%0.h, %1/m, %2.s" -+ [(set_attr "movprfx" "*,yes,yes")] - ) - --;; Unpredicated fms (%0 = (%1 * %2) - %3). --(define_expand "fms4" -- [(set (match_operand:SVE_F 0 "register_operand") -- (unspec:SVE_F -- [(match_dup 4) -- (fma:SVE_F (match_operand:SVE_F 1 "register_operand") -- (match_operand:SVE_F 2 "register_operand") -- (neg:SVE_F -- (match_operand:SVE_F 3 "register_operand")))] -- UNSPEC_MERGE_PTRUE))] -+;; Predicated BFCVTNT. This doesn't give a natural aarch64_pred_*/cond_* -+;; pair because the even elements always have to be supplied for active -+;; elements, even if the inactive elements don't matter. -+;; -+;; This instructions does not take MOVPRFX. -+(define_insn "@aarch64_sve_cvtnt" -+ [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w") -+ (unspec:VNx8BF_ONLY -+ [(match_operand:VNx4BI 2 "register_operand" "Upl") -+ (const_int SVE_STRICT_GP) -+ (match_operand:VNx8BF_ONLY 1 "register_operand" "0") -+ (match_operand:VNx4SF 3 "register_operand" "w")] -+ UNSPEC_COND_FCVTNT))] -+ "TARGET_SVE_BF16" -+ "bfcvtnt\t%0.h, %2/m, %3.s" -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- [FP<-FP] Unpacks -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - FCVT -+;; ------------------------------------------------------------------------- -+ -+;; Unpack one half of a VNx4SF to VNx2DF, or one half of a VNx8HF to VNx4SF. -+;; First unpack the source without conversion, then float-convert the -+;; unpacked source. -+(define_expand "vec_unpacks__" -+ [(match_operand: 0 "register_operand") -+ (unspec:SVE_FULL_HSF -+ [(match_operand:SVE_FULL_HSF 1 "register_operand")] -+ UNPACK_UNSIGNED)] - "TARGET_SVE" - { -- operands[4] = force_reg (mode, CONSTM1_RTX (mode)); -+ /* Use ZIP to do the unpack, since we don't care about the upper halves -+ and since it has the nice property of not needing any subregs. -+ If using UUNPK* turns out to be preferable, we could model it as -+ a ZIP whose first operand is zero. */ -+ rtx temp = gen_reg_rtx (mode); -+ emit_insn (( -+ ? gen_aarch64_sve_zip2 -+ : gen_aarch64_sve_zip1) -+ (temp, operands[1], operands[1])); -+ rtx ptrue = aarch64_ptrue_reg (mode); -+ rtx strictness = gen_int_mode (SVE_RELAXED_GP, SImode); -+ emit_insn (gen_aarch64_sve_fcvt_nontrunc -+ (operands[0], ptrue, temp, strictness)); -+ DONE; - } - ) - --;; fms predicated with a PTRUE. --(define_insn "*fms4" -- [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w") -- (unspec:SVE_F -- [(match_operand: 1 "register_operand" "Upl, Upl, Upl") -- (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w, w") -- (match_operand:SVE_F 4 "register_operand" "w, w, w") -- (neg:SVE_F -- (match_operand:SVE_F 2 "register_operand" "w, 0, w")))] -- UNSPEC_MERGE_PTRUE))] -- "TARGET_SVE" -+;; Predicated float-to-float extension. -+(define_insn "@aarch64_sve__nontrunc" -+ [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w") -+ (unspec:SVE_FULL_SDF -+ [(match_operand: 1 "register_operand" "Upl") -+ (match_operand:SI 3 "aarch64_sve_gp_strictness") -+ (match_operand:SVE_FULL_HSF 2 "register_operand" "w")] -+ SVE_COND_FCVT))] -+ "TARGET_SVE && > " -+ "fcvt\t%0., %1/m, %2." -+) -+ -+;; Predicated float-to-float extension with merging. -+(define_expand "@cond__nontrunc" -+ [(set (match_operand:SVE_FULL_SDF 0 "register_operand") -+ (unspec:SVE_FULL_SDF -+ [(match_operand: 1 "register_operand") -+ (unspec:SVE_FULL_SDF -+ [(match_dup 1) -+ (const_int SVE_STRICT_GP) -+ (match_operand:SVE_FULL_HSF 2 "register_operand")] -+ SVE_COND_FCVT) -+ (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_zero")] -+ UNSPEC_SEL))] -+ "TARGET_SVE && > " -+) -+ -+(define_insn "*cond__nontrunc" -+ [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w, ?&w, ?&w") -+ (unspec:SVE_FULL_SDF -+ [(match_operand: 1 "register_operand" "Upl, Upl, Upl") -+ (unspec:SVE_FULL_SDF -+ [(match_dup 1) -+ (match_operand:SI 4 "aarch64_sve_gp_strictness") -+ (match_operand:SVE_FULL_HSF 2 "register_operand" "w, w, w")] -+ SVE_COND_FCVT) -+ (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] -+ UNSPEC_SEL))] -+ "TARGET_SVE && > " - "@ -- fnmsb\t%0., %1/m, %4., %2. -- fnmls\t%0., %1/m, %3., %4. -- movprfx\t%0, %2\;fnmls\t%0., %1/m, %3., %4." -- [(set_attr "movprfx" "*,*,yes")] -+ fcvt\t%0., %1/m, %2. -+ movprfx\t%0., %1/z, %2.\;fcvt\t%0., %1/m, %2. -+ movprfx\t%0, %3\;fcvt\t%0., %1/m, %2." -+ [(set_attr "movprfx" "*,yes,yes")] - ) - --;; Unpredicated fnms (%0 = (-%1 * %2) - %3). --(define_expand "fnms4" -- [(set (match_operand:SVE_F 0 "register_operand") -- (unspec:SVE_F -- [(match_dup 4) -- (fma:SVE_F (neg:SVE_F -- (match_operand:SVE_F 1 "register_operand")) -- (match_operand:SVE_F 2 "register_operand") -- (neg:SVE_F -- (match_operand:SVE_F 3 "register_operand")))] -- UNSPEC_MERGE_PTRUE))] -- "TARGET_SVE" -- { -- operands[4] = force_reg (mode, CONSTM1_RTX (mode)); -- } --) -+;; ------------------------------------------------------------------------- -+;; ---- [PRED<-PRED] Packs -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - UZP1 -+;; ------------------------------------------------------------------------- - --;; fnms predicated with a PTRUE. --(define_insn "*fnms4" -- [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w") -- (unspec:SVE_F -- [(match_operand: 1 "register_operand" "Upl, Upl, Upl") -- (fma:SVE_F (neg:SVE_F -- (match_operand:SVE_F 3 "register_operand" "%0, w, w")) -- (match_operand:SVE_F 4 "register_operand" "w, w, w") -- (neg:SVE_F -- (match_operand:SVE_F 2 "register_operand" "w, 0, w")))] -- UNSPEC_MERGE_PTRUE))] -+;; Predicate pack. Use UZP1 on the narrower type, which discards -+;; the high part of each wide element. -+(define_insn "vec_pack_trunc_" -+ [(set (match_operand:PRED_BHS 0 "register_operand" "=Upa") -+ (unspec:PRED_BHS -+ [(match_operand: 1 "register_operand" "Upa") -+ (match_operand: 2 "register_operand" "Upa")] -+ UNSPEC_PACK))] - "TARGET_SVE" -- "@ -- fnmad\t%0., %1/m, %4., %2. -- fnmla\t%0., %1/m, %3., %4. -- movprfx\t%0, %2\;fnmla\t%0., %1/m, %3., %4." -- [(set_attr "movprfx" "*,*,yes")] -+ "uzp1\t%0., %1., %2." - ) - --;; Unpredicated floating-point division. --(define_expand "div3" -- [(set (match_operand:SVE_F 0 "register_operand") -- (unspec:SVE_F -- [(match_dup 3) -- (div:SVE_F (match_operand:SVE_F 1 "register_operand") -- (match_operand:SVE_F 2 "register_operand"))] -- UNSPEC_MERGE_PTRUE))] -+;; ------------------------------------------------------------------------- -+;; ---- [PRED<-PRED] Unpacks -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - PUNPKHI -+;; - PUNPKLO -+;; ------------------------------------------------------------------------- -+ -+;; Unpack the low or high half of a predicate, where "high" refers to -+;; the low-numbered lanes for big-endian and the high-numbered lanes -+;; for little-endian. -+(define_expand "vec_unpack__" -+ [(match_operand: 0 "register_operand") -+ (unspec: [(match_operand:PRED_BHS 1 "register_operand")] -+ UNPACK)] - "TARGET_SVE" - { -- operands[3] = force_reg (mode, CONSTM1_RTX (mode)); -+ emit_insn (( -+ ? gen_aarch64_sve_punpkhi_ -+ : gen_aarch64_sve_punpklo_) -+ (operands[0], operands[1])); -+ DONE; - } - ) - --;; Floating-point division predicated with a PTRUE. --(define_insn "*div3" -- [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w") -- (unspec:SVE_F -- [(match_operand: 1 "register_operand" "Upl, Upl, Upl") -- (div:SVE_F (match_operand:SVE_F 2 "register_operand" "0, w, w") -- (match_operand:SVE_F 3 "register_operand" "w, 0, w"))] -- UNSPEC_MERGE_PTRUE))] -+(define_insn "@aarch64_sve_punpk_" -+ [(set (match_operand: 0 "register_operand" "=Upa") -+ (unspec: [(match_operand:PRED_BHS 1 "register_operand" "Upa")] -+ UNPACK_UNSIGNED))] - "TARGET_SVE" -- "@ -- fdiv\t%0., %1/m, %0., %3. -- fdivr\t%0., %1/m, %0., %2. -- movprfx\t%0, %2\;fdiv\t%0., %1/m, %0., %3." -- [(set_attr "movprfx" "*,*,yes")] -+ "punpk\t%0.h, %1.b" - ) - --;; Unpredicated FNEG, FABS and FSQRT. --(define_expand "2" -- [(set (match_operand:SVE_F 0 "register_operand") -- (unspec:SVE_F -- [(match_dup 2) -- (SVE_FP_UNARY:SVE_F (match_operand:SVE_F 1 "register_operand"))] -- UNSPEC_MERGE_PTRUE))] -+;; ========================================================================= -+;; == Vector partitioning -+;; ========================================================================= -+ -+;; ------------------------------------------------------------------------- -+;; ---- [PRED] Unary partitioning -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - BRKA -+;; - BRKAS -+;; - BRKB -+;; - BRKBS -+;; ------------------------------------------------------------------------- -+ -+;; Note that unlike most other instructions that have both merging and -+;; zeroing forms, these instructions don't operate elementwise and so -+;; don't fit the IFN_COND model. -+(define_insn "@aarch64_brk" -+ [(set (match_operand:VNx16BI 0 "register_operand" "=Upa, Upa") -+ (unspec:VNx16BI -+ [(match_operand:VNx16BI 1 "register_operand" "Upa, Upa") -+ (match_operand:VNx16BI 2 "register_operand" "Upa, Upa") -+ (match_operand:VNx16BI 3 "aarch64_simd_reg_or_zero" "Dz, 0")] -+ SVE_BRK_UNARY))] -+ "TARGET_SVE" -+ "@ -+ brk\t%0.b, %1/z, %2.b -+ brk\t%0.b, %1/m, %2.b" -+) -+ -+;; Same, but also producing a flags result. -+(define_insn "*aarch64_brk_cc" -+ [(set (reg:CC_NZC CC_REGNUM) -+ (unspec:CC_NZC -+ [(match_operand:VNx16BI 1 "register_operand" "Upa, Upa") -+ (match_dup 1) -+ (match_operand:SI 4 "aarch64_sve_ptrue_flag") -+ (unspec:VNx16BI -+ [(match_dup 1) -+ (match_operand:VNx16BI 2 "register_operand" "Upa, Upa") -+ (match_operand:VNx16BI 3 "aarch64_simd_reg_or_zero" "Dz, 0")] -+ SVE_BRK_UNARY)] -+ UNSPEC_PTEST)) -+ (set (match_operand:VNx16BI 0 "register_operand" "=Upa, Upa") -+ (unspec:VNx16BI -+ [(match_dup 1) -+ (match_dup 2) -+ (match_dup 3)] -+ SVE_BRK_UNARY))] -+ "TARGET_SVE" -+ "@ -+ brks\t%0.b, %1/z, %2.b -+ brks\t%0.b, %1/m, %2.b" -+) -+ -+;; Same, but with only the flags result being interesting. -+(define_insn "*aarch64_brk_ptest" -+ [(set (reg:CC_NZC CC_REGNUM) -+ (unspec:CC_NZC -+ [(match_operand:VNx16BI 1 "register_operand" "Upa, Upa") -+ (match_dup 1) -+ (match_operand:SI 4 "aarch64_sve_ptrue_flag") -+ (unspec:VNx16BI -+ [(match_dup 1) -+ (match_operand:VNx16BI 2 "register_operand" "Upa, Upa") -+ (match_operand:VNx16BI 3 "aarch64_simd_reg_or_zero" "Dz, 0")] -+ SVE_BRK_UNARY)] -+ UNSPEC_PTEST)) -+ (clobber (match_scratch:VNx16BI 0 "=Upa, Upa"))] - "TARGET_SVE" -+ "@ -+ brks\t%0.b, %1/z, %2.b -+ brks\t%0.b, %1/m, %2.b" -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- [PRED] Binary partitioning -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - BRKN -+;; - BRKNS -+;; - BRKPA -+;; - BRKPAS -+;; - BRKPB -+;; - BRKPBS -+;; ------------------------------------------------------------------------- -+ -+;; Binary BRKs (BRKN, BRKPA, BRKPB). -+(define_insn "@aarch64_brk" -+ [(set (match_operand:VNx16BI 0 "register_operand" "=Upa") -+ (unspec:VNx16BI -+ [(match_operand:VNx16BI 1 "register_operand" "Upa") -+ (match_operand:VNx16BI 2 "register_operand" "Upa") -+ (match_operand:VNx16BI 3 "register_operand" "")] -+ SVE_BRK_BINARY))] -+ "TARGET_SVE" -+ "brk\t%0.b, %1/z, %2.b, %.b" -+) -+ -+;; Same, but also producing a flags result. -+(define_insn "*aarch64_brk_cc" -+ [(set (reg:CC_NZC CC_REGNUM) -+ (unspec:CC_NZC -+ [(match_operand:VNx16BI 1 "register_operand" "Upa") -+ (match_dup 1) -+ (match_operand:SI 4 "aarch64_sve_ptrue_flag") -+ (unspec:VNx16BI -+ [(match_dup 1) -+ (match_operand:VNx16BI 2 "register_operand" "Upa") -+ (match_operand:VNx16BI 3 "register_operand" "")] -+ SVE_BRK_BINARY)] -+ UNSPEC_PTEST)) -+ (set (match_operand:VNx16BI 0 "register_operand" "=Upa") -+ (unspec:VNx16BI -+ [(match_dup 1) -+ (match_dup 2) -+ (match_dup 3)] -+ SVE_BRK_BINARY))] -+ "TARGET_SVE" -+ "brks\t%0.b, %1/z, %2.b, %.b" -+) -+ -+;; Same, but with only the flags result being interesting. -+(define_insn "*aarch64_brk_ptest" -+ [(set (reg:CC_NZC CC_REGNUM) -+ (unspec:CC_NZC -+ [(match_operand:VNx16BI 1 "register_operand" "Upa") -+ (match_dup 1) -+ (match_operand:SI 4 "aarch64_sve_ptrue_flag") -+ (unspec:VNx16BI -+ [(match_dup 1) -+ (match_operand:VNx16BI 2 "register_operand" "Upa") -+ (match_operand:VNx16BI 3 "register_operand" "")] -+ SVE_BRK_BINARY)] -+ UNSPEC_PTEST)) -+ (clobber (match_scratch:VNx16BI 0 "=Upa"))] -+ "TARGET_SVE" -+ "brks\t%0.b, %1/z, %2.b, %.b" -+) -+ -+;; ------------------------------------------------------------------------- -+;; ---- [PRED] Scalarization -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - PFIRST -+;; - PNEXT -+;; ------------------------------------------------------------------------- -+ -+(define_insn "@aarch64_sve_" -+ [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") -+ (unspec:PRED_ALL -+ [(match_operand:PRED_ALL 1 "register_operand" "Upa") -+ (match_operand:SI 2 "aarch64_sve_ptrue_flag") -+ (match_operand:PRED_ALL 3 "register_operand" "0")] -+ SVE_PITER)) -+ (clobber (reg:CC_NZC CC_REGNUM))] -+ "TARGET_SVE && >= " -+ "\t%0., %1, %0." -+) -+ -+;; Same, but also producing a flags result. -+(define_insn_and_rewrite "*aarch64_sve__cc" -+ [(set (reg:CC_NZC CC_REGNUM) -+ (unspec:CC_NZC -+ [(match_operand:VNx16BI 1 "register_operand" "Upa") -+ (match_operand 2) -+ (match_operand:SI 3 "aarch64_sve_ptrue_flag") -+ (unspec:PRED_ALL -+ [(match_operand 4) -+ (match_operand:SI 5 "aarch64_sve_ptrue_flag") -+ (match_operand:PRED_ALL 6 "register_operand" "0")] -+ SVE_PITER)] -+ UNSPEC_PTEST)) -+ (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") -+ (unspec:PRED_ALL -+ [(match_dup 4) -+ (match_dup 5) -+ (match_dup 6)] -+ SVE_PITER))] -+ "TARGET_SVE -+ && >= -+ && aarch64_sve_same_pred_for_ptest_p (&operands[2], &operands[4])" -+ "\t%0., %1, %0." -+ "&& !rtx_equal_p (operands[2], operands[4])" - { -- operands[2] = force_reg (mode, CONSTM1_RTX (mode)); -+ operands[4] = operands[2]; -+ operands[5] = operands[3]; - } - ) - --;; FNEG, FABS and FSQRT predicated with a PTRUE. --(define_insn "*2" -- [(set (match_operand:SVE_F 0 "register_operand" "=w") -- (unspec:SVE_F -- [(match_operand: 1 "register_operand" "Upl") -- (SVE_FP_UNARY:SVE_F (match_operand:SVE_F 2 "register_operand" "w"))] -- UNSPEC_MERGE_PTRUE))] -- "TARGET_SVE" -- "\t%0., %1/m, %2." --) -- --;; Unpredicated FRINTy. --(define_expand "2" -- [(set (match_operand:SVE_F 0 "register_operand") -- (unspec:SVE_F -- [(match_dup 2) -- (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand")] -- FRINT)] -- UNSPEC_MERGE_PTRUE))] -- "TARGET_SVE" -+;; Same, but with only the flags result being interesting. -+(define_insn_and_rewrite "*aarch64_sve__ptest" -+ [(set (reg:CC_NZC CC_REGNUM) -+ (unspec:CC_NZC -+ [(match_operand:VNx16BI 1 "register_operand" "Upa") -+ (match_operand 2) -+ (match_operand:SI 3 "aarch64_sve_ptrue_flag") -+ (unspec:PRED_ALL -+ [(match_operand 4) -+ (match_operand:SI 5 "aarch64_sve_ptrue_flag") -+ (match_operand:PRED_ALL 6 "register_operand" "0")] -+ SVE_PITER)] -+ UNSPEC_PTEST)) -+ (clobber (match_scratch:PRED_ALL 0 "=Upa"))] -+ "TARGET_SVE -+ && >= -+ && aarch64_sve_same_pred_for_ptest_p (&operands[2], &operands[4])" -+ "\t%0., %1, %0." -+ "&& !rtx_equal_p (operands[2], operands[4])" - { -- operands[2] = force_reg (mode, CONSTM1_RTX (mode)); -+ operands[4] = operands[2]; -+ operands[5] = operands[3]; - } - ) - --;; FRINTy predicated with a PTRUE. --(define_insn "*2" -- [(set (match_operand:SVE_F 0 "register_operand" "=w") -- (unspec:SVE_F -- [(match_operand: 1 "register_operand" "Upl") -- (unspec:SVE_F [(match_operand:SVE_F 2 "register_operand" "w")] -- FRINT)] -- UNSPEC_MERGE_PTRUE))] -- "TARGET_SVE" -- "frint\t%0., %1/m, %2." --) -+;; ========================================================================= -+;; == Counting elements -+;; ========================================================================= - --;; Unpredicated conversion of floats to integers of the same size (HF to HI, --;; SF to SI or DF to DI). --(define_expand "2" -- [(set (match_operand: 0 "register_operand") -- (unspec: -- [(match_dup 2) -- (FIXUORS: -- (match_operand:SVE_F 1 "register_operand"))] -- UNSPEC_MERGE_PTRUE))] -+;; ------------------------------------------------------------------------- -+;; ---- [INT] Count elements in a pattern (scalar) -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - CNTB -+;; - CNTD -+;; - CNTH -+;; - CNTW -+;; ------------------------------------------------------------------------- -+ -+;; Count the number of elements in an svpattern. Operand 1 is the pattern, -+;; operand 2 is the number of elements that fit in a 128-bit block, and -+;; operand 3 is a multiplier in the range [1, 16]. -+;; -+;; Note that this pattern isn't used for SV_ALL (but would work for that too). -+(define_insn "aarch64_sve_cnt_pat" -+ [(set (match_operand:DI 0 "register_operand" "=r") -+ (zero_extend:DI -+ (unspec:SI [(match_operand:DI 1 "const_int_operand") -+ (match_operand:DI 2 "const_int_operand") -+ (match_operand:DI 3 "const_int_operand")] -+ UNSPEC_SVE_CNT_PAT)))] - "TARGET_SVE" - { -- operands[2] = force_reg (mode, CONSTM1_RTX (mode)); -+ return aarch64_output_sve_cnt_pat_immediate ("cnt", "%x0", operands + 1); - } - ) - --;; Conversion of SF to DI, SI or HI, predicated with a PTRUE. --(define_insn "*v16hsf2" -- [(set (match_operand:SVE_HSDI 0 "register_operand" "=w") -- (unspec:SVE_HSDI -- [(match_operand: 1 "register_operand" "Upl") -- (FIXUORS:SVE_HSDI -- (match_operand:VNx8HF 2 "register_operand" "w"))] -- UNSPEC_MERGE_PTRUE))] -+;; ------------------------------------------------------------------------- -+;; ---- [INT] Increment by the number of elements in a pattern (scalar) -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - INC -+;; - SQINC -+;; - UQINC -+;; ------------------------------------------------------------------------- -+ -+;; Increment a DImode register by the number of elements in an svpattern. -+;; See aarch64_sve_cnt_pat for the counting behavior. -+(define_insn "@aarch64_sve__pat" -+ [(set (match_operand:DI 0 "register_operand" "=r") -+ (ANY_PLUS:DI (zero_extend:DI -+ (unspec:SI [(match_operand:DI 2 "const_int_operand") -+ (match_operand:DI 3 "const_int_operand") -+ (match_operand:DI 4 "const_int_operand")] -+ UNSPEC_SVE_CNT_PAT)) -+ (match_operand:DI_ONLY 1 "register_operand" "0")))] - "TARGET_SVE" -- "fcvtz\t%0., %1/m, %2.h" -+ { -+ return aarch64_output_sve_cnt_pat_immediate ("", "%x0", -+ operands + 2); -+ } - ) - --;; Conversion of SF to DI or SI, predicated with a PTRUE. --(define_insn "*vnx4sf2" -- [(set (match_operand:SVE_SDI 0 "register_operand" "=w") -- (unspec:SVE_SDI -- [(match_operand: 1 "register_operand" "Upl") -- (FIXUORS:SVE_SDI -- (match_operand:VNx4SF 2 "register_operand" "w"))] -- UNSPEC_MERGE_PTRUE))] -+;; Increment an SImode register by the number of elements in an svpattern -+;; using modular arithmetic. See aarch64_sve_cnt_pat for the counting -+;; behavior. -+(define_insn "*aarch64_sve_incsi_pat" -+ [(set (match_operand:SI 0 "register_operand" "=r") -+ (plus:SI (unspec:SI [(match_operand:DI 2 "const_int_operand") -+ (match_operand:DI 3 "const_int_operand") -+ (match_operand:DI 4 "const_int_operand")] -+ UNSPEC_SVE_CNT_PAT) -+ (match_operand:SI 1 "register_operand" "0")))] - "TARGET_SVE" -- "fcvtz\t%0., %1/m, %2.s" -+ { -+ return aarch64_output_sve_cnt_pat_immediate ("inc", "%x0", operands + 2); -+ } - ) - --;; Conversion of DF to DI or SI, predicated with a PTRUE. --(define_insn "*vnx2df2" -- [(set (match_operand:SVE_SDI 0 "register_operand" "=w") -- (unspec:SVE_SDI -- [(match_operand:VNx2BI 1 "register_operand" "Upl") -- (FIXUORS:SVE_SDI -- (match_operand:VNx2DF 2 "register_operand" "w"))] -- UNSPEC_MERGE_PTRUE))] -+;; Increment an SImode register by the number of elements in an svpattern -+;; using saturating arithmetic, extending the result to 64 bits. -+;; -+;; See aarch64_sve_cnt_pat for the counting behavior. -+(define_insn "@aarch64_sve__pat" -+ [(set (match_operand:DI 0 "register_operand" "=r") -+ (:DI -+ (SAT_PLUS:SI -+ (unspec:SI [(match_operand:DI 2 "const_int_operand") -+ (match_operand:DI 3 "const_int_operand") -+ (match_operand:DI 4 "const_int_operand")] -+ UNSPEC_SVE_CNT_PAT) -+ (match_operand:SI_ONLY 1 "register_operand" "0"))))] - "TARGET_SVE" -- "fcvtz\t%0., %1/m, %2.d" -+ { -+ const char *registers = ( == SS_PLUS ? "%x0, %w0" : "%w0"); -+ return aarch64_output_sve_cnt_pat_immediate ("", registers, -+ operands + 2); -+ } - ) - --;; Unpredicated conversion of integers to floats of the same size --;; (HI to HF, SI to SF or DI to DF). --(define_expand "2" -- [(set (match_operand:SVE_F 0 "register_operand") -- (unspec:SVE_F -- [(match_dup 2) -- (FLOATUORS:SVE_F -- (match_operand: 1 "register_operand"))] -- UNSPEC_MERGE_PTRUE))] -+;; ------------------------------------------------------------------------- -+;; ---- [INT] Increment by the number of elements in a pattern (vector) -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - INC -+;; - SQINC -+;; - UQINC -+;; ------------------------------------------------------------------------- -+ -+;; Increment a vector of DIs by the number of elements in an svpattern. -+;; See aarch64_sve_cnt_pat for the counting behavior. -+(define_insn "@aarch64_sve__pat" -+ [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w") -+ (ANY_PLUS:VNx2DI -+ (vec_duplicate:VNx2DI -+ (zero_extend:DI -+ (unspec:SI [(match_operand:DI 2 "const_int_operand") -+ (match_operand:DI 3 "const_int_operand") -+ (match_operand:DI 4 "const_int_operand")] -+ UNSPEC_SVE_CNT_PAT))) -+ (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")))] - "TARGET_SVE" - { -- operands[2] = force_reg (mode, CONSTM1_RTX (mode)); -+ if (which_alternative == 1) -+ output_asm_insn ("movprfx\t%0, %1", operands); -+ return aarch64_output_sve_cnt_pat_immediate ("", "%0.", -+ operands + 2); - } -+ [(set_attr "movprfx" "*,yes")] - ) - --;; Conversion of DI, SI or HI to the same number of HFs, predicated --;; with a PTRUE. --(define_insn "*vnx8hf2" -- [(set (match_operand:VNx8HF 0 "register_operand" "=w") -- (unspec:VNx8HF -- [(match_operand: 1 "register_operand" "Upl") -- (FLOATUORS:VNx8HF -- (match_operand:SVE_HSDI 2 "register_operand" "w"))] -- UNSPEC_MERGE_PTRUE))] -+;; Increment a vector of SIs by the number of elements in an svpattern. -+;; See aarch64_sve_cnt_pat for the counting behavior. -+(define_insn "@aarch64_sve__pat" -+ [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w") -+ (ANY_PLUS:VNx4SI -+ (vec_duplicate:VNx4SI -+ (unspec:SI [(match_operand:DI 2 "const_int_operand") -+ (match_operand:DI 3 "const_int_operand") -+ (match_operand:DI 4 "const_int_operand")] -+ UNSPEC_SVE_CNT_PAT)) -+ (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))] - "TARGET_SVE" -- "cvtf\t%0.h, %1/m, %2." -+ { -+ if (which_alternative == 1) -+ output_asm_insn ("movprfx\t%0, %1", operands); -+ return aarch64_output_sve_cnt_pat_immediate ("", "%0.", -+ operands + 2); -+ } -+ [(set_attr "movprfx" "*,yes")] - ) - --;; Conversion of DI or SI to the same number of SFs, predicated with a PTRUE. --(define_insn "*vnx4sf2" -- [(set (match_operand:VNx4SF 0 "register_operand" "=w") -- (unspec:VNx4SF -- [(match_operand: 1 "register_operand" "Upl") -- (FLOATUORS:VNx4SF -- (match_operand:SVE_SDI 2 "register_operand" "w"))] -- UNSPEC_MERGE_PTRUE))] -+;; Increment a vector of HIs by the number of elements in an svpattern. -+;; See aarch64_sve_cnt_pat for the counting behavior. -+(define_expand "@aarch64_sve__pat" -+ [(set (match_operand:VNx8HI 0 "register_operand") -+ (ANY_PLUS:VNx8HI -+ (vec_duplicate:VNx8HI -+ (truncate:HI -+ (unspec:SI [(match_operand:DI 2 "const_int_operand") -+ (match_operand:DI 3 "const_int_operand") -+ (match_operand:DI 4 "const_int_operand")] -+ UNSPEC_SVE_CNT_PAT))) -+ (match_operand:VNx8HI_ONLY 1 "register_operand")))] -+ "TARGET_SVE" -+) -+ -+(define_insn "*aarch64_sve__pat" -+ [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w") -+ (ANY_PLUS:VNx8HI -+ (vec_duplicate:VNx8HI -+ (match_operator:HI 5 "subreg_lowpart_operator" -+ [(unspec:SI [(match_operand:DI 2 "const_int_operand") -+ (match_operand:DI 3 "const_int_operand") -+ (match_operand:DI 4 "const_int_operand")] -+ UNSPEC_SVE_CNT_PAT)])) -+ (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")))] - "TARGET_SVE" -- "cvtf\t%0.s, %1/m, %2." -+ { -+ if (which_alternative == 1) -+ output_asm_insn ("movprfx\t%0, %1", operands); -+ return aarch64_output_sve_cnt_pat_immediate ("", "%0.", -+ operands + 2); -+ } -+ [(set_attr "movprfx" "*,yes")] - ) - --;; Conversion of DI or SI to DF, predicated with a PTRUE. --(define_insn "aarch64_sve_vnx2df2" -- [(set (match_operand:VNx2DF 0 "register_operand" "=w") -- (unspec:VNx2DF -- [(match_operand:VNx2BI 1 "register_operand" "Upl") -- (FLOATUORS:VNx2DF -- (match_operand:SVE_SDI 2 "register_operand" "w"))] -- UNSPEC_MERGE_PTRUE))] -+;; ------------------------------------------------------------------------- -+;; ---- [INT] Decrement by the number of elements in a pattern (scalar) -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - DEC -+;; - SQDEC -+;; - UQDEC -+;; ------------------------------------------------------------------------- -+ -+;; Decrement a DImode register by the number of elements in an svpattern. -+;; See aarch64_sve_cnt_pat for the counting behavior. -+(define_insn "@aarch64_sve__pat" -+ [(set (match_operand:DI 0 "register_operand" "=r") -+ (ANY_MINUS:DI (match_operand:DI_ONLY 1 "register_operand" "0") -+ (zero_extend:DI -+ (unspec:SI [(match_operand:DI 2 "const_int_operand") -+ (match_operand:DI 3 "const_int_operand") -+ (match_operand:DI 4 "const_int_operand")] -+ UNSPEC_SVE_CNT_PAT))))] - "TARGET_SVE" -- "cvtf\t%0.d, %1/m, %2." -+ { -+ return aarch64_output_sve_cnt_pat_immediate ("", "%x0", -+ operands + 2); -+ } - ) - --;; Conversion of DFs to the same number of SFs, or SFs to the same number --;; of HFs. --(define_insn "*trunc2" -- [(set (match_operand:SVE_HSF 0 "register_operand" "=w") -- (unspec:SVE_HSF -- [(match_operand: 1 "register_operand" "Upl") -- (unspec:SVE_HSF -- [(match_operand: 2 "register_operand" "w")] -- UNSPEC_FLOAT_CONVERT)] -- UNSPEC_MERGE_PTRUE))] -+;; Decrement an SImode register by the number of elements in an svpattern -+;; using modular arithmetic. See aarch64_sve_cnt_pat for the counting -+;; behavior. -+(define_insn "*aarch64_sve_decsi_pat" -+ [(set (match_operand:SI 0 "register_operand" "=r") -+ (minus:SI (match_operand:SI 1 "register_operand" "0") -+ (unspec:SI [(match_operand:DI 2 "const_int_operand") -+ (match_operand:DI 3 "const_int_operand") -+ (match_operand:DI 4 "const_int_operand")] -+ UNSPEC_SVE_CNT_PAT)))] - "TARGET_SVE" -- "fcvt\t%0., %1/m, %2." -+ { -+ return aarch64_output_sve_cnt_pat_immediate ("dec", "%x0", operands + 2); -+ } - ) - --;; Conversion of SFs to the same number of DFs, or HFs to the same number --;; of SFs. --(define_insn "aarch64_sve_extend2" -- [(set (match_operand: 0 "register_operand" "=w") -- (unspec: -- [(match_operand: 1 "register_operand" "Upl") -- (unspec: -- [(match_operand:SVE_HSF 2 "register_operand" "w")] -- UNSPEC_FLOAT_CONVERT)] -- UNSPEC_MERGE_PTRUE))] -+;; Decrement an SImode register by the number of elements in an svpattern -+;; using saturating arithmetic, extending the result to 64 bits. -+;; -+;; See aarch64_sve_cnt_pat for the counting behavior. -+(define_insn "@aarch64_sve__pat" -+ [(set (match_operand:DI 0 "register_operand" "=r") -+ (:DI -+ (SAT_MINUS:SI -+ (match_operand:SI_ONLY 1 "register_operand" "0") -+ (unspec:SI [(match_operand:DI 2 "const_int_operand") -+ (match_operand:DI 3 "const_int_operand") -+ (match_operand:DI 4 "const_int_operand")] -+ UNSPEC_SVE_CNT_PAT))))] - "TARGET_SVE" -- "fcvt\t%0., %1/m, %2." -+ { -+ const char *registers = ( == SS_MINUS ? "%x0, %w0" : "%w0"); -+ return aarch64_output_sve_cnt_pat_immediate ("", registers, -+ operands + 2); -+ } - ) - --;; Unpack the low or high half of a predicate, where "high" refers to --;; the low-numbered lanes for big-endian and the high-numbered lanes --;; for little-endian. --(define_expand "vec_unpack__" -- [(match_operand: 0 "register_operand") -- (unspec: [(match_operand:PRED_BHS 1 "register_operand")] -- UNPACK)] -+;; ------------------------------------------------------------------------- -+;; ---- [INT] Decrement by the number of elements in a pattern (vector) -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - DEC -+;; - SQDEC -+;; - UQDEC -+;; ------------------------------------------------------------------------- -+ -+;; Decrement a vector of DIs by the number of elements in an svpattern. -+;; See aarch64_sve_cnt_pat for the counting behavior. -+(define_insn "@aarch64_sve__pat" -+ [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w") -+ (ANY_MINUS:VNx2DI -+ (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w") -+ (vec_duplicate:VNx2DI -+ (zero_extend:DI -+ (unspec:SI [(match_operand:DI 2 "const_int_operand") -+ (match_operand:DI 3 "const_int_operand") -+ (match_operand:DI 4 "const_int_operand")] -+ UNSPEC_SVE_CNT_PAT)))))] - "TARGET_SVE" - { -- emit_insn (( -- ? gen_aarch64_sve_punpkhi_ -- : gen_aarch64_sve_punpklo_) -- (operands[0], operands[1])); -- DONE; -+ if (which_alternative == 1) -+ output_asm_insn ("movprfx\t%0, %1", operands); -+ return aarch64_output_sve_cnt_pat_immediate ("", "%0.", -+ operands + 2); - } -+ [(set_attr "movprfx" "*,yes")] - ) - --;; PUNPKHI and PUNPKLO. --(define_insn "aarch64_sve_punpk_" -- [(set (match_operand: 0 "register_operand" "=Upa") -- (unspec: [(match_operand:PRED_BHS 1 "register_operand" "Upa")] -- UNPACK_UNSIGNED))] -+;; Decrement a vector of SIs by the number of elements in an svpattern. -+;; See aarch64_sve_cnt_pat for the counting behavior. -+(define_insn "@aarch64_sve__pat" -+ [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w") -+ (ANY_MINUS:VNx4SI -+ (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w") -+ (vec_duplicate:VNx4SI -+ (unspec:SI [(match_operand:DI 2 "const_int_operand") -+ (match_operand:DI 3 "const_int_operand") -+ (match_operand:DI 4 "const_int_operand")] -+ UNSPEC_SVE_CNT_PAT))))] - "TARGET_SVE" -- "punpk\t%0.h, %1.b" -+ { -+ if (which_alternative == 1) -+ output_asm_insn ("movprfx\t%0, %1", operands); -+ return aarch64_output_sve_cnt_pat_immediate ("", "%0.", -+ operands + 2); -+ } -+ [(set_attr "movprfx" "*,yes")] - ) - --;; Unpack the low or high half of a vector, where "high" refers to --;; the low-numbered lanes for big-endian and the high-numbered lanes --;; for little-endian. --(define_expand "vec_unpack__" -- [(match_operand: 0 "register_operand") -- (unspec: [(match_operand:SVE_BHSI 1 "register_operand")] UNPACK)] -+;; Decrement a vector of HIs by the number of elements in an svpattern. -+;; See aarch64_sve_cnt_pat for the counting behavior. -+(define_expand "@aarch64_sve__pat" -+ [(set (match_operand:VNx8HI 0 "register_operand") -+ (ANY_MINUS:VNx8HI -+ (match_operand:VNx8HI_ONLY 1 "register_operand") -+ (vec_duplicate:VNx8HI -+ (truncate:HI -+ (unspec:SI [(match_operand:DI 2 "const_int_operand") -+ (match_operand:DI 3 "const_int_operand") -+ (match_operand:DI 4 "const_int_operand")] -+ UNSPEC_SVE_CNT_PAT)))))] -+ "TARGET_SVE" -+) -+ -+(define_insn "*aarch64_sve__pat" -+ [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w") -+ (ANY_MINUS:VNx8HI -+ (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w") -+ (vec_duplicate:VNx8HI -+ (match_operator:HI 5 "subreg_lowpart_operator" -+ [(unspec:SI [(match_operand:DI 2 "const_int_operand") -+ (match_operand:DI 3 "const_int_operand") -+ (match_operand:DI 4 "const_int_operand")] -+ UNSPEC_SVE_CNT_PAT)]))))] - "TARGET_SVE" - { -- emit_insn (( -- ? gen_aarch64_sve_unpkhi_ -- : gen_aarch64_sve_unpklo_) -- (operands[0], operands[1])); -- DONE; -+ if (which_alternative == 1) -+ output_asm_insn ("movprfx\t%0, %1", operands); -+ return aarch64_output_sve_cnt_pat_immediate ("", "%0.", -+ operands + 2); - } -+ [(set_attr "movprfx" "*,yes")] - ) - --;; SUNPKHI, UUNPKHI, SUNPKLO and UUNPKLO. --(define_insn "aarch64_sve_unpk_" -- [(set (match_operand: 0 "register_operand" "=w") -- (unspec: [(match_operand:SVE_BHSI 1 "register_operand" "w")] -- UNPACK))] -+;; ------------------------------------------------------------------------- -+;; ---- [INT] Count elements in a predicate (scalar) -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - CNTP -+;; ------------------------------------------------------------------------- -+ -+;; Count the number of set bits in a predicate. Operand 3 is true if -+;; operand 1 is known to be all-true. -+(define_insn "@aarch64_pred_cntp" -+ [(set (match_operand:DI 0 "register_operand" "=r") -+ (zero_extend:DI -+ (unspec:SI [(match_operand:PRED_ALL 1 "register_operand" "Upl") -+ (match_operand:SI 2 "aarch64_sve_ptrue_flag") -+ (match_operand:PRED_ALL 3 "register_operand" "Upa")] -+ UNSPEC_CNTP)))] -+ "TARGET_SVE" -+ "cntp\t%x0, %1, %3.") -+ -+;; ------------------------------------------------------------------------- -+;; ---- [INT] Increment by the number of elements in a predicate (scalar) -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - INCP -+;; - SQINCP -+;; - UQINCP -+;; ------------------------------------------------------------------------- -+ -+;; Increment a DImode register by the number of set bits in a predicate. -+;; See aarch64_sve_cntp for a description of the operands. -+(define_expand "@aarch64_sve__cntp" -+ [(set (match_operand:DI 0 "register_operand") -+ (ANY_PLUS:DI -+ (zero_extend:DI -+ (unspec:SI [(match_dup 3) -+ (const_int SVE_KNOWN_PTRUE) -+ (match_operand:PRED_ALL 2 "register_operand")] -+ UNSPEC_CNTP)) -+ (match_operand:DI_ONLY 1 "register_operand")))] - "TARGET_SVE" -- "unpk\t%0., %1." -+ { -+ operands[3] = CONSTM1_RTX (mode); -+ } - ) - --;; Unpack one half of a VNx4SF to VNx2DF, or one half of a VNx8HF to VNx4SF. --;; First unpack the source without conversion, then float-convert the --;; unpacked source. --(define_expand "vec_unpacks__" -- [(match_operand: 0 "register_operand") -- (unspec:SVE_HSF [(match_operand:SVE_HSF 1 "register_operand")] -- UNPACK_UNSIGNED)] -+(define_insn_and_rewrite "*aarch64_sve__cntp" -+ [(set (match_operand:DI 0 "register_operand" "=r") -+ (ANY_PLUS:DI -+ (zero_extend:DI -+ (unspec:SI [(match_operand 3) -+ (const_int SVE_KNOWN_PTRUE) -+ (match_operand:PRED_ALL 2 "register_operand" "Upa")] -+ UNSPEC_CNTP)) -+ (match_operand:DI_ONLY 1 "register_operand" "0")))] - "TARGET_SVE" -+ "p\t%x0, %2." -+ "&& !CONSTANT_P (operands[3])" - { -- /* Use ZIP to do the unpack, since we don't care about the upper halves -- and since it has the nice property of not needing any subregs. -- If using UUNPK* turns out to be preferable, we could model it as -- a ZIP whose first operand is zero. */ -- rtx temp = gen_reg_rtx (mode); -- emit_insn (( -- ? gen_aarch64_sve_zip2 -- : gen_aarch64_sve_zip1) -- (temp, operands[1], operands[1])); -- rtx ptrue = force_reg (mode, CONSTM1_RTX (mode)); -- emit_insn (gen_aarch64_sve_extend2 (operands[0], -- ptrue, temp)); -- DONE; -+ operands[3] = CONSTM1_RTX (mode); - } - ) - --;; Unpack one half of a VNx4SI to VNx2DF. First unpack from VNx4SI --;; to VNx2DI, reinterpret the VNx2DI as a VNx4SI, then convert the --;; unpacked VNx4SI to VNx2DF. --(define_expand "vec_unpack_float__vnx4si" -- [(match_operand:VNx2DF 0 "register_operand") -- (FLOATUORS:VNx2DF -- (unspec:VNx2DI [(match_operand:VNx4SI 1 "register_operand")] -- UNPACK_UNSIGNED))] -- "TARGET_SVE" -+;; Increment an SImode register by the number of set bits in a predicate -+;; using modular arithmetic. See aarch64_sve_cntp for a description of -+;; the operands. -+(define_insn_and_rewrite "*aarch64_incsi_cntp" -+ [(set (match_operand:SI 0 "register_operand" "=r") -+ (plus:SI -+ (unspec:SI [(match_operand 3) -+ (const_int SVE_KNOWN_PTRUE) -+ (match_operand:PRED_ALL 2 "register_operand" "Upa")] -+ UNSPEC_CNTP) -+ (match_operand:SI 1 "register_operand" "0")))] -+ "TARGET_SVE" -+ "incp\t%x0, %2." -+ "&& !CONSTANT_P (operands[3])" - { -- /* Use ZIP to do the unpack, since we don't care about the upper halves -- and since it has the nice property of not needing any subregs. -- If using UUNPK* turns out to be preferable, we could model it as -- a ZIP whose first operand is zero. */ -- rtx temp = gen_reg_rtx (VNx4SImode); -- emit_insn (( -- ? gen_aarch64_sve_zip2vnx4si -- : gen_aarch64_sve_zip1vnx4si) -- (temp, operands[1], operands[1])); -- rtx ptrue = force_reg (VNx2BImode, CONSTM1_RTX (VNx2BImode)); -- emit_insn (gen_aarch64_sve_vnx4sivnx2df2 (operands[0], -- ptrue, temp)); -- DONE; -+ operands[3] = CONSTM1_RTX (mode); - } - ) - --;; Predicate pack. Use UZP1 on the narrower type, which discards --;; the high part of each wide element. --(define_insn "vec_pack_trunc_" -- [(set (match_operand:PRED_BHS 0 "register_operand" "=Upa") -- (unspec:PRED_BHS -- [(match_operand: 1 "register_operand" "Upa") -- (match_operand: 2 "register_operand" "Upa")] -- UNSPEC_PACK))] -+;; Increment an SImode register by the number of set bits in a predicate -+;; using saturating arithmetic, extending the result to 64 bits. -+;; -+;; See aarch64_sve_cntp for a description of the operands. -+(define_expand "@aarch64_sve__cntp" -+ [(set (match_operand:DI 0 "register_operand") -+ (:DI -+ (SAT_PLUS:SI -+ (unspec:SI [(match_dup 3) -+ (const_int SVE_KNOWN_PTRUE) -+ (match_operand:PRED_ALL 2 "register_operand")] -+ UNSPEC_CNTP) -+ (match_operand:SI_ONLY 1 "register_operand"))))] - "TARGET_SVE" -- "uzp1\t%0., %1., %2." -+ { -+ operands[3] = CONSTM1_RTX (mode); -+ } - ) - --;; Integer pack. Use UZP1 on the narrower type, which discards --;; the high part of each wide element. --(define_insn "vec_pack_trunc_" -- [(set (match_operand:SVE_BHSI 0 "register_operand" "=w") -- (unspec:SVE_BHSI -- [(match_operand: 1 "register_operand" "w") -- (match_operand: 2 "register_operand" "w")] -- UNSPEC_PACK))] -+(define_insn_and_rewrite "*aarch64_sve__cntp" -+ [(set (match_operand:DI 0 "register_operand" "=r") -+ (:DI -+ (SAT_PLUS:SI -+ (unspec:SI [(match_operand 3) -+ (const_int SVE_KNOWN_PTRUE) -+ (match_operand:PRED_ALL 2 "register_operand" "Upa")] -+ UNSPEC_CNTP) -+ (match_operand:SI_ONLY 1 "register_operand" "0"))))] - "TARGET_SVE" -- "uzp1\t%0., %1., %2." -+ { -+ if ( == SS_PLUS) -+ return "p\t%x0, %2., %w0"; -+ else -+ return "p\t%w0, %2."; -+ } -+ "&& !CONSTANT_P (operands[3])" -+ { -+ operands[3] = CONSTM1_RTX (mode); -+ } - ) - --;; Convert two vectors of DF to SF, or two vectors of SF to HF, and pack --;; the results into a single vector. --(define_expand "vec_pack_trunc_" -- [(set (match_dup 4) -- (unspec:SVE_HSF -- [(match_dup 3) -- (unspec:SVE_HSF [(match_operand: 1 "register_operand")] -- UNSPEC_FLOAT_CONVERT)] -- UNSPEC_MERGE_PTRUE)) -- (set (match_dup 5) -- (unspec:SVE_HSF -- [(match_dup 3) -- (unspec:SVE_HSF [(match_operand: 2 "register_operand")] -- UNSPEC_FLOAT_CONVERT)] -- UNSPEC_MERGE_PTRUE)) -- (set (match_operand:SVE_HSF 0 "register_operand") -- (unspec:SVE_HSF [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))] -+;; ------------------------------------------------------------------------- -+;; ---- [INT] Increment by the number of elements in a predicate (vector) -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - INCP -+;; - SQINCP -+;; - UQINCP -+;; ------------------------------------------------------------------------- -+ -+;; Increment a vector of DIs by the number of set bits in a predicate. -+;; See aarch64_sve_cntp for a description of the operands. -+(define_expand "@aarch64_sve__cntp" -+ [(set (match_operand:VNx2DI 0 "register_operand") -+ (ANY_PLUS:VNx2DI -+ (vec_duplicate:VNx2DI -+ (zero_extend:DI -+ (unspec:SI -+ [(match_dup 3) -+ (const_int SVE_KNOWN_PTRUE) -+ (match_operand: 2 "register_operand")] -+ UNSPEC_CNTP))) -+ (match_operand:VNx2DI_ONLY 1 "register_operand")))] - "TARGET_SVE" - { -- operands[3] = force_reg (mode, CONSTM1_RTX (mode)); -- operands[4] = gen_reg_rtx (mode); -- operands[5] = gen_reg_rtx (mode); -+ operands[3] = CONSTM1_RTX (mode); - } - ) - --;; Convert two vectors of DF to SI and pack the results into a single vector. --(define_expand "vec_pack_fix_trunc_vnx2df" -- [(set (match_dup 4) -- (unspec:VNx4SI -- [(match_dup 3) -- (FIXUORS:VNx4SI (match_operand:VNx2DF 1 "register_operand"))] -- UNSPEC_MERGE_PTRUE)) -- (set (match_dup 5) -- (unspec:VNx4SI -- [(match_dup 3) -- (FIXUORS:VNx4SI (match_operand:VNx2DF 2 "register_operand"))] -- UNSPEC_MERGE_PTRUE)) -- (set (match_operand:VNx4SI 0 "register_operand") -- (unspec:VNx4SI [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))] -+(define_insn_and_rewrite "*aarch64_sve__cntp" -+ [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w") -+ (ANY_PLUS:VNx2DI -+ (vec_duplicate:VNx2DI -+ (zero_extend:DI -+ (unspec:SI -+ [(match_operand 3) -+ (const_int SVE_KNOWN_PTRUE) -+ (match_operand: 2 "register_operand" "Upa, Upa")] -+ UNSPEC_CNTP))) -+ (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")))] - "TARGET_SVE" -+ "@ -+ p\t%0.d, %2 -+ movprfx\t%0, %1\;p\t%0.d, %2" -+ "&& !CONSTANT_P (operands[3])" - { -- operands[3] = force_reg (VNx2BImode, CONSTM1_RTX (VNx2BImode)); -- operands[4] = gen_reg_rtx (VNx4SImode); -- operands[5] = gen_reg_rtx (VNx4SImode); -+ operands[3] = CONSTM1_RTX (mode); - } -+ [(set_attr "movprfx" "*,yes")] - ) - --;; Predicated floating-point operations with select. --(define_expand "cond_" -- [(set (match_operand:SVE_F 0 "register_operand") -- (unspec:SVE_F -- [(match_operand: 1 "register_operand") -- (unspec:SVE_F -- [(match_operand:SVE_F 2 "register_operand") -- (match_operand:SVE_F 3 "register_operand")] -- SVE_COND_FP_BINARY) -- (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")] -- UNSPEC_SEL))] -+;; Increment a vector of SIs by the number of set bits in a predicate. -+;; See aarch64_sve_cntp for a description of the operands. -+(define_expand "@aarch64_sve__cntp" -+ [(set (match_operand:VNx4SI 0 "register_operand") -+ (ANY_PLUS:VNx4SI -+ (vec_duplicate:VNx4SI -+ (unspec:SI -+ [(match_dup 3) -+ (const_int SVE_KNOWN_PTRUE) -+ (match_operand: 2 "register_operand")] -+ UNSPEC_CNTP)) -+ (match_operand:VNx4SI_ONLY 1 "register_operand")))] - "TARGET_SVE" -+ { -+ operands[3] = CONSTM1_RTX (mode); -+ } - ) - --;; Predicated floating-point operations with select matching output. --(define_insn "*cond__0" -- [(set (match_operand:SVE_F 0 "register_operand" "+w, w, ?&w") -- (unspec:SVE_F -- [(match_operand: 1 "register_operand" "Upl, Upl, Upl") -- (unspec:SVE_F -- [(match_operand:SVE_F 2 "register_operand" "0, w, w") -- (match_operand:SVE_F 3 "register_operand" "w, 0, w")] -- SVE_COND_FP_BINARY) -- (match_dup 0)] -- UNSPEC_SEL))] -+(define_insn_and_rewrite "*aarch64_sve__cntp" -+ [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w") -+ (ANY_PLUS:VNx4SI -+ (vec_duplicate:VNx4SI -+ (unspec:SI -+ [(match_operand 3) -+ (const_int SVE_KNOWN_PTRUE) -+ (match_operand: 2 "register_operand" "Upa, Upa")] -+ UNSPEC_CNTP)) -+ (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))] - "TARGET_SVE" - "@ -- \t%0., %1/m, %0., %3. -- \t%0., %1/m, %0., %2. -- movprfx\t%0, %1/m, %2\;\t%0., %1/m, %0., %3." -- [(set_attr "movprfx" "*,*,yes")] -+ p\t%0.s, %2 -+ movprfx\t%0, %1\;p\t%0.s, %2" -+ "&& !CONSTANT_P (operands[3])" -+ { -+ operands[3] = CONSTM1_RTX (mode); -+ } -+ [(set_attr "movprfx" "*,yes")] - ) - --;; Predicated floating-point operations with select matching first operand. --(define_insn "*cond__2" -- [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w") -- (unspec:SVE_F -- [(match_operand: 1 "register_operand" "Upl, Upl") -- (unspec:SVE_F -- [(match_operand:SVE_F 2 "register_operand" "0, w") -- (match_operand:SVE_F 3 "register_operand" "w, w")] -- SVE_COND_FP_BINARY) -- (match_dup 2)] -- UNSPEC_SEL))] -+;; Increment a vector of HIs by the number of set bits in a predicate. -+;; See aarch64_sve_cntp for a description of the operands. -+(define_expand "@aarch64_sve__cntp" -+ [(set (match_operand:VNx8HI 0 "register_operand") -+ (ANY_PLUS:VNx8HI -+ (vec_duplicate:VNx8HI -+ (truncate:HI -+ (unspec:SI -+ [(match_dup 3) -+ (const_int SVE_KNOWN_PTRUE) -+ (match_operand: 2 "register_operand")] -+ UNSPEC_CNTP))) -+ (match_operand:VNx8HI_ONLY 1 "register_operand")))] - "TARGET_SVE" -- "@ -- \t%0., %1/m, %0., %3. -- movprfx\t%0, %2\;\t%0., %1/m, %0., %3." -- [(set_attr "movprfx" "*,yes")] -+ { -+ operands[3] = CONSTM1_RTX (mode); -+ } - ) - --;; Predicated floating-point operations with select matching second operand. --(define_insn "*cond__3" -- [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w") -- (unspec:SVE_F -- [(match_operand: 1 "register_operand" "Upl, Upl") -- (unspec:SVE_F -- [(match_operand:SVE_F 2 "register_operand" "w, w") -- (match_operand:SVE_F 3 "register_operand" "0, w")] -- SVE_COND_FP_BINARY) -- (match_dup 3)] -- UNSPEC_SEL))] -+(define_insn_and_rewrite "*aarch64_sve__cntp" -+ [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w") -+ (ANY_PLUS:VNx8HI -+ (vec_duplicate:VNx8HI -+ (match_operator:HI 3 "subreg_lowpart_operator" -+ [(unspec:SI -+ [(match_operand 4) -+ (const_int SVE_KNOWN_PTRUE) -+ (match_operand: 2 "register_operand" "Upa, Upa")] -+ UNSPEC_CNTP)])) -+ (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")))] - "TARGET_SVE" - "@ -- \t%0., %1/m, %0., %2. -- movprfx\t%0, %3\;\t%0., %1/m, %0., %2." -+ p\t%0.h, %2 -+ movprfx\t%0, %1\;p\t%0.h, %2" -+ "&& !CONSTANT_P (operands[4])" -+ { -+ operands[4] = CONSTM1_RTX (mode); -+ } - [(set_attr "movprfx" "*,yes")] - ) - --;; Predicated floating-point operations with select matching zero. --(define_insn "*cond__z" -- [(set (match_operand:SVE_F 0 "register_operand" "=&w") -- (unspec:SVE_F -- [(match_operand: 1 "register_operand" "Upl") -- (unspec:SVE_F -- [(match_operand:SVE_F 2 "register_operand" "w") -- (match_operand:SVE_F 3 "register_operand" "w")] -- SVE_COND_FP_BINARY) -- (match_operand:SVE_F 4 "aarch64_simd_imm_zero")] -- UNSPEC_SEL))] -+;; ------------------------------------------------------------------------- -+;; ---- [INT] Decrement by the number of elements in a predicate (scalar) -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - DECP -+;; - SQDECP -+;; - UQDECP -+;; ------------------------------------------------------------------------- -+ -+;; Decrement a DImode register by the number of set bits in a predicate. -+;; See aarch64_sve_cntp for a description of the operands. -+(define_expand "@aarch64_sve__cntp" -+ [(set (match_operand:DI 0 "register_operand") -+ (ANY_MINUS:DI -+ (match_operand:DI_ONLY 1 "register_operand") -+ (zero_extend:DI -+ (unspec:SI [(match_dup 3) -+ (const_int SVE_KNOWN_PTRUE) -+ (match_operand:PRED_ALL 2 "register_operand")] -+ UNSPEC_CNTP))))] - "TARGET_SVE" -- "movprfx\t%0., %1/z, %2.\;\t%0., %1/m, %0., %3." -- [(set_attr "movprfx" "yes")] -+ { -+ operands[3] = CONSTM1_RTX (mode); -+ } - ) - --;; Synthetic predication of floating-point operations with select unmatched. --(define_insn_and_split "*cond__any" -- [(set (match_operand:SVE_F 0 "register_operand" "=&w") -- (unspec:SVE_F -- [(match_operand: 1 "register_operand" "Upl") -- (unspec:SVE_F -- [(match_operand:SVE_F 2 "register_operand" "w") -- (match_operand:SVE_F 3 "register_operand" "w")] -- SVE_COND_FP_BINARY) -- (match_operand:SVE_F 4 "register_operand" "w")] -- UNSPEC_SEL))] -+(define_insn_and_rewrite "*aarch64_sve__cntp" -+ [(set (match_operand:DI 0 "register_operand" "=r") -+ (ANY_MINUS:DI -+ (match_operand:DI_ONLY 1 "register_operand" "0") -+ (zero_extend:DI -+ (unspec:SI [(match_operand 3) -+ (const_int SVE_KNOWN_PTRUE) -+ (match_operand:PRED_ALL 2 "register_operand" "Upa")] -+ UNSPEC_CNTP))))] - "TARGET_SVE" -- "#" -- "&& reload_completed -- && !(rtx_equal_p (operands[0], operands[4]) -- || rtx_equal_p (operands[2], operands[4]) -- || rtx_equal_p (operands[3], operands[4]))" -- ; Not matchable by any one insn or movprfx insn. We need a separate select. -- [(set (match_dup 0) -- (unspec:SVE_F [(match_dup 1) (match_dup 2) (match_dup 4)] UNSPEC_SEL)) -- (set (match_dup 0) -- (unspec:SVE_F -- [(match_dup 1) -- (unspec:SVE_F [(match_dup 0) (match_dup 3)] SVE_COND_FP_BINARY) -- (match_dup 0)] -- UNSPEC_SEL))] -+ "p\t%x0, %2." -+ "&& !CONSTANT_P (operands[3])" -+ { -+ operands[3] = CONSTM1_RTX (mode); -+ } - ) - --;; Predicated floating-point ternary operations with select. --(define_expand "cond_" -- [(set (match_operand:SVE_F 0 "register_operand") -- (unspec:SVE_F -- [(match_operand: 1 "register_operand") -- (unspec:SVE_F -- [(match_operand:SVE_F 2 "register_operand") -- (match_operand:SVE_F 3 "register_operand") -- (match_operand:SVE_F 4 "register_operand")] -- SVE_COND_FP_TERNARY) -- (match_operand:SVE_F 5 "aarch64_simd_reg_or_zero")] -- UNSPEC_SEL))] -- "TARGET_SVE" --{ -- /* Swap the multiplication operands if the fallback value is the -- second of the two. */ -- if (rtx_equal_p (operands[3], operands[5])) -- std::swap (operands[2], operands[3]); --}) -+;; Decrement an SImode register by the number of set bits in a predicate -+;; using modular arithmetic. See aarch64_sve_cntp for a description of the -+;; operands. -+(define_insn_and_rewrite "*aarch64_decsi_cntp" -+ [(set (match_operand:SI 0 "register_operand" "=r") -+ (minus:SI -+ (match_operand:SI 1 "register_operand" "0") -+ (unspec:SI [(match_operand 3) -+ (const_int SVE_KNOWN_PTRUE) -+ (match_operand:PRED_ALL 2 "register_operand" "Upa")] -+ UNSPEC_CNTP)))] -+ "TARGET_SVE" -+ "decp\t%x0, %2." -+ "&& !CONSTANT_P (operands[3])" -+ { -+ operands[3] = CONSTM1_RTX (mode); -+ } -+) - --;; Predicated floating-point ternary operations using the FMAD-like form. --(define_insn "*cond__2" -- [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w") -- (unspec:SVE_F -- [(match_operand: 1 "register_operand" "Upl, Upl") -- (unspec:SVE_F -- [(match_operand:SVE_F 2 "register_operand" "0, w") -- (match_operand:SVE_F 3 "register_operand" "w, w") -- (match_operand:SVE_F 4 "register_operand" "w, w")] -- SVE_COND_FP_TERNARY) -- (match_dup 2)] -- UNSPEC_SEL))] -+;; Decrement an SImode register by the number of set bits in a predicate -+;; using saturating arithmetic, extending the result to 64 bits. -+;; -+;; See aarch64_sve_cntp for a description of the operands. -+(define_expand "@aarch64_sve__cntp" -+ [(set (match_operand:DI 0 "register_operand") -+ (:DI -+ (SAT_MINUS:SI -+ (match_operand:SI_ONLY 1 "register_operand") -+ (unspec:SI [(match_dup 3) -+ (const_int SVE_KNOWN_PTRUE) -+ (match_operand:PRED_ALL 2 "register_operand")] -+ UNSPEC_CNTP))))] - "TARGET_SVE" -- "@ -- \t%0., %1/m, %3., %4. -- movprfx\t%0, %2\;\t%0., %1/m, %3., %4." -- [(set_attr "movprfx" "*,yes")] -+ { -+ operands[3] = CONSTM1_RTX (mode); -+ } - ) - --;; Predicated floating-point ternary operations using the FMLA-like form. --(define_insn "*cond__4" -- [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w") -- (unspec:SVE_F -- [(match_operand: 1 "register_operand" "Upl, Upl") -- (unspec:SVE_F -- [(match_operand:SVE_F 2 "register_operand" "w, w") -- (match_operand:SVE_F 3 "register_operand" "w, w") -- (match_operand:SVE_F 4 "register_operand" "0, w")] -- SVE_COND_FP_TERNARY) -- (match_dup 4)] -- UNSPEC_SEL))] -+(define_insn_and_rewrite "*aarch64_sve__cntp" -+ [(set (match_operand:DI 0 "register_operand" "=r") -+ (:DI -+ (SAT_MINUS:SI -+ (match_operand:SI_ONLY 1 "register_operand" "0") -+ (unspec:SI [(match_operand 3) -+ (const_int SVE_KNOWN_PTRUE) -+ (match_operand:PRED_ALL 2 "register_operand" "Upa")] -+ UNSPEC_CNTP))))] - "TARGET_SVE" -- "@ -- \t%0., %1/m, %2., %3. -- movprfx\t%0, %4\;\t%0., %1/m, %2., %3." -- [(set_attr "movprfx" "*,yes")] -+ { -+ if ( == SS_MINUS) -+ return "p\t%x0, %2., %w0"; -+ else -+ return "p\t%w0, %2."; -+ } -+ "&& !CONSTANT_P (operands[3])" -+ { -+ operands[3] = CONSTM1_RTX (mode); -+ } - ) - --;; Predicated floating-point ternary operations in which the value for --;; inactive lanes is distinct from the other inputs. --(define_insn_and_split "*cond__any" -- [(set (match_operand:SVE_F 0 "register_operand" "=&w, &w, ?&w") -- (unspec:SVE_F -- [(match_operand: 1 "register_operand" "Upl, Upl, Upl") -- (unspec:SVE_F -- [(match_operand:SVE_F 2 "register_operand" "w, w, w") -- (match_operand:SVE_F 3 "register_operand" "w, w, w") -- (match_operand:SVE_F 4 "register_operand" "w, w, w")] -- SVE_COND_FP_TERNARY) -- (match_operand:SVE_F 5 "aarch64_simd_reg_or_zero" "Dz, 0, w")] -- UNSPEC_SEL))] -- "TARGET_SVE -- && !rtx_equal_p (operands[2], operands[5]) -- && !rtx_equal_p (operands[3], operands[5]) -- && !rtx_equal_p (operands[4], operands[5])" -- "@ -- movprfx\t%0., %1/z, %4.\;\t%0., %1/m, %2., %3. -- movprfx\t%0., %1/m, %4.\;\t%0., %1/m, %2., %3. -- #" -- "&& reload_completed -- && !CONSTANT_P (operands[5]) -- && !rtx_equal_p (operands[0], operands[5])" -- [(set (match_dup 0) -- (unspec:SVE_F [(match_dup 1) (match_dup 4) (match_dup 5)] UNSPEC_SEL)) -- (set (match_dup 0) -- (unspec:SVE_F -- [(match_dup 1) -- (unspec:SVE_F [(match_dup 2) (match_dup 3) (match_dup 0)] -- SVE_COND_FP_TERNARY) -- (match_dup 0)] -- UNSPEC_SEL))] -- "" -- [(set_attr "movprfx" "yes")] -+;; ------------------------------------------------------------------------- -+;; ---- [INT] Decrement by the number of elements in a predicate (vector) -+;; ------------------------------------------------------------------------- -+;; Includes: -+;; - DECP -+;; - SQDECP -+;; - UQDECP -+;; ------------------------------------------------------------------------- -+ -+;; Decrement a vector of DIs by the number of set bits in a predicate. -+;; See aarch64_sve_cntp for a description of the operands. -+(define_expand "@aarch64_sve__cntp" -+ [(set (match_operand:VNx2DI 0 "register_operand") -+ (ANY_MINUS:VNx2DI -+ (match_operand:VNx2DI_ONLY 1 "register_operand") -+ (vec_duplicate:VNx2DI -+ (zero_extend:DI -+ (unspec:SI -+ [(match_dup 3) -+ (const_int SVE_KNOWN_PTRUE) -+ (match_operand: 2 "register_operand")] -+ UNSPEC_CNTP)))))] -+ "TARGET_SVE" -+ { -+ operands[3] = CONSTM1_RTX (mode); -+ } - ) - --;; Shift an SVE vector left and insert a scalar into element 0. --(define_insn "vec_shl_insert_" -- [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w") -- (unspec:SVE_ALL -- [(match_operand:SVE_ALL 1 "register_operand" "0, 0") -- (match_operand: 2 "register_operand" "rZ, w")] -- UNSPEC_INSR))] -+(define_insn_and_rewrite "*aarch64_sve__cntp" -+ [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w") -+ (ANY_MINUS:VNx2DI -+ (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w") -+ (vec_duplicate:VNx2DI -+ (zero_extend:DI -+ (unspec:SI -+ [(match_operand 3) -+ (const_int SVE_KNOWN_PTRUE) -+ (match_operand: 2 "register_operand" "Upa, Upa")] -+ UNSPEC_CNTP)))))] - "TARGET_SVE" - "@ -- insr\t%0., %2 -- insr\t%0., %2" -+ p\t%0.d, %2 -+ movprfx\t%0, %1\;p\t%0.d, %2" -+ "&& !CONSTANT_P (operands[3])" -+ { -+ operands[3] = CONSTM1_RTX (mode); -+ } -+ [(set_attr "movprfx" "*,yes")] - ) - --(define_expand "copysign3" -- [(match_operand:SVE_F 0 "register_operand") -- (match_operand:SVE_F 1 "register_operand") -- (match_operand:SVE_F 2 "register_operand")] -+;; Decrement a vector of SIs by the number of set bits in a predicate. -+;; See aarch64_sve_cntp for a description of the operands. -+(define_expand "@aarch64_sve__cntp" -+ [(set (match_operand:VNx4SI 0 "register_operand") -+ (ANY_MINUS:VNx4SI -+ (match_operand:VNx4SI_ONLY 1 "register_operand") -+ (vec_duplicate:VNx4SI -+ (unspec:SI -+ [(match_dup 3) -+ (const_int SVE_KNOWN_PTRUE) -+ (match_operand: 2 "register_operand")] -+ UNSPEC_CNTP))))] - "TARGET_SVE" - { -- rtx sign = gen_reg_rtx (mode); -- rtx mant = gen_reg_rtx (mode); -- rtx int_res = gen_reg_rtx (mode); -- int bits = GET_MODE_UNIT_BITSIZE (mode) - 1; -- -- rtx arg1 = lowpart_subreg (mode, operands[1], mode); -- rtx arg2 = lowpart_subreg (mode, operands[2], mode); -- -- emit_insn (gen_and3 -- (sign, arg2, -- aarch64_simd_gen_const_vector_dup (mode, -- HOST_WIDE_INT_M1U -- << bits))); -- emit_insn (gen_and3 -- (mant, arg1, -- aarch64_simd_gen_const_vector_dup (mode, -- ~(HOST_WIDE_INT_M1U -- << bits)))); -- emit_insn (gen_ior3 (int_res, sign, mant)); -- emit_move_insn (operands[0], gen_lowpart (mode, int_res)); -- DONE; -+ operands[3] = CONSTM1_RTX (mode); - } - ) - --(define_expand "xorsign3" -- [(match_operand:SVE_F 0 "register_operand") -- (match_operand:SVE_F 1 "register_operand") -- (match_operand:SVE_F 2 "register_operand")] -+(define_insn_and_rewrite "*aarch64_sve__cntp" -+ [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w") -+ (ANY_MINUS:VNx4SI -+ (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w") -+ (vec_duplicate:VNx4SI -+ (unspec:SI -+ [(match_operand 3) -+ (const_int SVE_KNOWN_PTRUE) -+ (match_operand: 2 "register_operand" "Upa, Upa")] -+ UNSPEC_CNTP))))] - "TARGET_SVE" -+ "@ -+ p\t%0.s, %2 -+ movprfx\t%0, %1\;p\t%0.s, %2" -+ "&& !CONSTANT_P (operands[3])" - { -- rtx sign = gen_reg_rtx (mode); -- rtx int_res = gen_reg_rtx (mode); -- int bits = GET_MODE_UNIT_BITSIZE (mode) - 1; -- -- rtx arg1 = lowpart_subreg (mode, operands[1], mode); -- rtx arg2 = lowpart_subreg (mode, operands[2], mode); -+ operands[3] = CONSTM1_RTX (mode); -+ } -+ [(set_attr "movprfx" "*,yes")] -+) - -- emit_insn (gen_and3 -- (sign, arg2, -- aarch64_simd_gen_const_vector_dup (mode, -- HOST_WIDE_INT_M1U -- << bits))); -- emit_insn (gen_xor3 (int_res, arg1, sign)); -- emit_move_insn (operands[0], gen_lowpart (mode, int_res)); -- DONE; -+;; Decrement a vector of HIs by the number of set bits in a predicate. -+;; See aarch64_sve_cntp for a description of the operands. -+(define_expand "@aarch64_sve__cntp" -+ [(set (match_operand:VNx8HI 0 "register_operand") -+ (ANY_MINUS:VNx8HI -+ (match_operand:VNx8HI_ONLY 1 "register_operand") -+ (vec_duplicate:VNx8HI -+ (truncate:HI -+ (unspec:SI -+ [(match_dup 3) -+ (const_int SVE_KNOWN_PTRUE) -+ (match_operand: 2 "register_operand")] -+ UNSPEC_CNTP)))))] -+ "TARGET_SVE" -+ { -+ operands[3] = CONSTM1_RTX (mode); - } - ) - --;; Unpredicated DOT product. --(define_insn "dot_prod" -- [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w") -- (plus:SVE_SDI -- (unspec:SVE_SDI -- [(match_operand: 1 "register_operand" "w, w") -- (match_operand: 2 "register_operand" "w, w")] -- DOTPROD) -- (match_operand:SVE_SDI 3 "register_operand" "0, w")))] -+(define_insn_and_rewrite "*aarch64_sve__cntp" -+ [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w") -+ (ANY_MINUS:VNx8HI -+ (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w") -+ (vec_duplicate:VNx8HI -+ (match_operator:HI 3 "subreg_lowpart_operator" -+ [(unspec:SI -+ [(match_operand 4) -+ (const_int SVE_KNOWN_PTRUE) -+ (match_operand: 2 "register_operand" "Upa, Upa")] -+ UNSPEC_CNTP)]))))] - "TARGET_SVE" - "@ -- dot\\t%0., %1., %2. -- movprfx\t%0, %3\;dot\\t%0., %1., %2." -+ p\t%0.h, %2 -+ movprfx\t%0, %1\;p\t%0.h, %2" -+ "&& !CONSTANT_P (operands[4])" -+ { -+ operands[4] = CONSTM1_RTX (mode); -+ } - [(set_attr "movprfx" "*,yes")] - ) -diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md -index 2b1ec85ae..a6a14b7fc 100644 ---- a/gcc/config/aarch64/aarch64-tune.md -+++ b/gcc/config/aarch64/aarch64-tune.md -@@ -1,5 +1,5 @@ - ;; -*- buffer-read-only: t -*- - ;; Generated automatically by gentune.sh from aarch64-cores.def - (define_attr "tune" -- "cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,ares,neoversen1,neoversee1,tsv110,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55" -+ "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa65,cortexa65ae,ares,neoversen1,neoversee1,tsv110,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55" - (const (symbol_ref "((enum attr_tune) aarch64_tune)"))) -diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c -index 5dfcaf57e..ee85bb4e2 100644 ---- a/gcc/config/aarch64/aarch64.c -+++ b/gcc/config/aarch64/aarch64.c -@@ -73,6 +73,8 @@ - #include "selftest-rtl.h" - #include "rtx-vector-builder.h" - #include "intl.h" -+#include "expmed.h" -+#include "function-abi.h" - - /* This file should be included last. */ - #include "target-def.h" -@@ -83,7 +85,7 @@ - /* Information about a legitimate vector immediate operand. */ - struct simd_immediate_info - { -- enum insn_type { MOV, MVN }; -+ enum insn_type { MOV, MVN, INDEX, PTRUE }; - enum modifier_type { LSL, MSL }; - - simd_immediate_info () {} -@@ -92,33 +94,51 @@ struct simd_immediate_info - insn_type = MOV, modifier_type = LSL, - unsigned int = 0); - simd_immediate_info (scalar_mode, rtx, rtx); -+ simd_immediate_info (scalar_int_mode, aarch64_svpattern); - - /* The mode of the elements. */ - scalar_mode elt_mode; - -- /* The value of each element if all elements are the same, or the -- first value if the constant is a series. */ -- rtx value; -- -- /* The value of the step if the constant is a series, null otherwise. */ -- rtx step; -- - /* The instruction to use to move the immediate into a vector. */ - insn_type insn; - -- /* The kind of shift modifier to use, and the number of bits to shift. -- This is (LSL, 0) if no shift is needed. */ -- modifier_type modifier; -- unsigned int shift; -+ union -+ { -+ /* For MOV and MVN. */ -+ struct -+ { -+ /* The value of each element. */ -+ rtx value; -+ -+ /* The kind of shift modifier to use, and the number of bits to shift. -+ This is (LSL, 0) if no shift is needed. */ -+ modifier_type modifier; -+ unsigned int shift; -+ } mov; -+ -+ /* For INDEX. */ -+ struct -+ { -+ /* The value of the first element and the step to be added for each -+ subsequent element. */ -+ rtx base, step; -+ } index; -+ -+ /* For PTRUE. */ -+ aarch64_svpattern pattern; -+ } u; - }; - - /* Construct a floating-point immediate in which each element has mode - ELT_MODE_IN and value VALUE_IN. */ - inline simd_immediate_info - ::simd_immediate_info (scalar_float_mode elt_mode_in, rtx value_in) -- : elt_mode (elt_mode_in), value (value_in), step (NULL_RTX), insn (MOV), -- modifier (LSL), shift (0) --{} -+ : elt_mode (elt_mode_in), insn (MOV) -+{ -+ u.mov.value = value_in; -+ u.mov.modifier = LSL; -+ u.mov.shift = 0; -+} - - /* Construct an integer immediate in which each element has mode ELT_MODE_IN - and value VALUE_IN. The other parameters are as for the structure -@@ -128,17 +148,32 @@ inline simd_immediate_info - unsigned HOST_WIDE_INT value_in, - insn_type insn_in, modifier_type modifier_in, - unsigned int shift_in) -- : elt_mode (elt_mode_in), value (gen_int_mode (value_in, elt_mode_in)), -- step (NULL_RTX), insn (insn_in), modifier (modifier_in), shift (shift_in) --{} -+ : elt_mode (elt_mode_in), insn (insn_in) -+{ -+ u.mov.value = gen_int_mode (value_in, elt_mode_in); -+ u.mov.modifier = modifier_in; -+ u.mov.shift = shift_in; -+} - - /* Construct an integer immediate in which each element has mode ELT_MODE_IN -- and where element I is equal to VALUE_IN + I * STEP_IN. */ -+ and where element I is equal to BASE_IN + I * STEP_IN. */ -+inline simd_immediate_info -+::simd_immediate_info (scalar_mode elt_mode_in, rtx base_in, rtx step_in) -+ : elt_mode (elt_mode_in), insn (INDEX) -+{ -+ u.index.base = base_in; -+ u.index.step = step_in; -+} -+ -+/* Construct a predicate that controls elements of mode ELT_MODE_IN -+ and has PTRUE pattern PATTERN_IN. */ - inline simd_immediate_info --::simd_immediate_info (scalar_mode elt_mode_in, rtx value_in, rtx step_in) -- : elt_mode (elt_mode_in), value (value_in), step (step_in), insn (MOV), -- modifier (LSL), shift (0) --{} -+::simd_immediate_info (scalar_int_mode elt_mode_in, -+ aarch64_svpattern pattern_in) -+ : elt_mode (elt_mode_in), insn (PTRUE) -+{ -+ u.pattern = pattern_in; -+} - - /* The current code model. */ - enum aarch64_code_model aarch64_cmodel; -@@ -177,7 +212,7 @@ unsigned aarch64_architecture_version; - enum aarch64_processor aarch64_tune = cortexa53; - - /* Mask to specify which instruction scheduling options should be used. */ --unsigned long aarch64_tune_flags = 0; -+uint64_t aarch64_tune_flags = 0; - - /* Global flag for PC relative loads. */ - bool aarch64_pcrelative_literal_loads; -@@ -693,7 +728,7 @@ static const struct tune_params generic_tunings = - 4, /* memmov_cost */ - 2, /* issue_rate */ - (AARCH64_FUSE_AES_AESMC), /* fusible_ops */ -- "8", /* function_align. */ -+ "16:12", /* function_align. */ - "4", /* jump_align. */ - "8", /* loop_align. */ - 2, /* int_reassoc_width. */ -@@ -1139,7 +1174,7 @@ struct processor - enum aarch64_processor sched_core; - enum aarch64_arch arch; - unsigned architecture_version; -- const unsigned long flags; -+ const uint64_t flags; - const struct tune_params *const tune; - }; - -@@ -1172,15 +1207,46 @@ static const struct processor *selected_arch; - static const struct processor *selected_cpu; - static const struct processor *selected_tune; - -+enum aarch64_key_type aarch64_ra_sign_key = AARCH64_KEY_A; -+ - /* The current tuning set. */ - struct tune_params aarch64_tune_params = generic_tunings; - -+/* Check whether an 'aarch64_vector_pcs' attribute is valid. */ -+ -+static tree -+handle_aarch64_vector_pcs_attribute (tree *node, tree name, tree, -+ int, bool *no_add_attrs) -+{ -+ /* Since we set fn_type_req to true, the caller should have checked -+ this for us. */ -+ gcc_assert (FUNC_OR_METHOD_TYPE_P (*node)); -+ switch ((arm_pcs) fntype_abi (*node).id ()) -+ { -+ case ARM_PCS_AAPCS64: -+ case ARM_PCS_SIMD: -+ return NULL_TREE; -+ -+ case ARM_PCS_SVE: -+ error ("the %qE attribute cannot be applied to an SVE function type", -+ name); -+ *no_add_attrs = true; -+ return NULL_TREE; -+ -+ case ARM_PCS_TLSDESC: -+ case ARM_PCS_UNKNOWN: -+ break; -+ } -+ gcc_unreachable (); -+} -+ - /* Table of machine attributes. */ - static const struct attribute_spec aarch64_attribute_table[] = - { - /* { name, min_len, max_len, decl_req, type_req, fn_type_req, - affects_type_identity, handler, exclude } */ -- { "aarch64_vector_pcs", 0, 0, false, true, true, true, NULL, NULL }, -+ { "aarch64_vector_pcs", 0, 0, false, true, true, true, -+ handle_aarch64_vector_pcs_attribute, NULL }, - { NULL, 0, 0, false, false, false, false, NULL, NULL } - }; - -@@ -1241,6 +1307,7 @@ static enum aarch64_parse_opt_result - aarch64_handle_standard_branch_protection (char* str, char* rest) - { - aarch64_ra_sign_scope = AARCH64_FUNCTION_NON_LEAF; -+ aarch64_ra_sign_key = AARCH64_KEY_A; - aarch64_enable_bti = 1; - if (rest) - { -@@ -1255,6 +1322,7 @@ aarch64_handle_pac_ret_protection (char* str ATTRIBUTE_UNUSED, - char* rest ATTRIBUTE_UNUSED) - { - aarch64_ra_sign_scope = AARCH64_FUNCTION_NON_LEAF; -+ aarch64_ra_sign_key = AARCH64_KEY_A; - return AARCH64_PARSE_OK; - } - -@@ -1266,6 +1334,14 @@ aarch64_handle_pac_ret_leaf (char* str ATTRIBUTE_UNUSED, - return AARCH64_PARSE_OK; - } - -+static enum aarch64_parse_opt_result -+aarch64_handle_pac_ret_b_key (char* str ATTRIBUTE_UNUSED, -+ char* rest ATTRIBUTE_UNUSED) -+{ -+ aarch64_ra_sign_key = AARCH64_KEY_B; -+ return AARCH64_PARSE_OK; -+} -+ - static enum aarch64_parse_opt_result - aarch64_handle_bti_protection (char* str ATTRIBUTE_UNUSED, - char* rest ATTRIBUTE_UNUSED) -@@ -1276,6 +1352,7 @@ aarch64_handle_bti_protection (char* str ATTRIBUTE_UNUSED, - - static const struct aarch64_branch_protect_type aarch64_pac_ret_subtypes[] = { - { "leaf", aarch64_handle_pac_ret_leaf, NULL, 0 }, -+ { "b-key", aarch64_handle_pac_ret_b_key, NULL, 0 }, - { NULL, NULL, NULL, 0 } - }; - -@@ -1295,6 +1372,66 @@ static const char * const aarch64_condition_codes[] = - "hi", "ls", "ge", "lt", "gt", "le", "al", "nv" - }; - -+/* The preferred condition codes for SVE conditions. */ -+static const char *const aarch64_sve_condition_codes[] = -+{ -+ "none", "any", "nlast", "last", "first", "nfrst", "vs", "vc", -+ "pmore", "plast", "tcont", "tstop", "gt", "le", "al", "nv" -+}; -+ -+/* Return the assembly token for svpattern value VALUE. */ -+ -+static const char * -+svpattern_token (enum aarch64_svpattern pattern) -+{ -+ switch (pattern) -+ { -+#define CASE(UPPER, LOWER, VALUE) case AARCH64_SV_##UPPER: return #LOWER; -+ AARCH64_FOR_SVPATTERN (CASE) -+#undef CASE -+ case AARCH64_NUM_SVPATTERNS: -+ break; -+ } -+ gcc_unreachable (); -+} -+ -+/* Return the descriptor of the SIMD ABI. */ -+ -+static const predefined_function_abi & -+aarch64_simd_abi (void) -+{ -+ predefined_function_abi &simd_abi = function_abis[ARM_PCS_SIMD]; -+ if (!simd_abi.initialized_p ()) -+ { -+ HARD_REG_SET full_reg_clobbers -+ = default_function_abi.full_reg_clobbers (); -+ for (int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) -+ if (FP_SIMD_SAVED_REGNUM_P (regno)) -+ CLEAR_HARD_REG_BIT (full_reg_clobbers, regno); -+ simd_abi.initialize (ARM_PCS_SIMD, full_reg_clobbers); -+ } -+ return simd_abi; -+} -+ -+/* Return the descriptor of the SVE PCS. */ -+ -+static const predefined_function_abi & -+aarch64_sve_abi (void) -+{ -+ predefined_function_abi &sve_abi = function_abis[ARM_PCS_SVE]; -+ if (!sve_abi.initialized_p ()) -+ { -+ HARD_REG_SET full_reg_clobbers -+ = default_function_abi.full_reg_clobbers (); -+ for (int regno = V8_REGNUM; regno <= V23_REGNUM; ++regno) -+ CLEAR_HARD_REG_BIT (full_reg_clobbers, regno); -+ for (int regno = P4_REGNUM; regno <= P11_REGNUM; ++regno) -+ CLEAR_HARD_REG_BIT (full_reg_clobbers, regno); -+ sve_abi.initialize (ARM_PCS_SVE, full_reg_clobbers); -+ } -+ return sve_abi; -+} -+ - /* Generate code to enable conditional branches in functions over 1 MiB. */ - const char * - aarch64_gen_far_branch (rtx * operands, int pos_label, const char * dest, -@@ -1337,6 +1474,14 @@ aarch64_err_no_fpadvsimd (machine_mode mode) - " vector types", "+nofp"); - } - -+/* Return true if REGNO is P0-P15 or one of the special FFR-related -+ registers. */ -+inline bool -+pr_or_ffr_regnum_p (unsigned int regno) -+{ -+ return PR_REGNUM_P (regno) || regno == FFR_REGNUM || regno == FFRT_REGNUM; -+} -+ - /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS. - The register allocator chooses POINTER_AND_FP_REGS if FP_REGS and - GENERAL_REGS have the same cost - even if POINTER_AND_FP_REGS has a much -@@ -1413,6 +1558,16 @@ aarch64_dbx_register_number (unsigned regno) - return DWARF_FRAME_REGISTERS; - } - -+/* If X is a CONST_DOUBLE, return its bit representation as a constant -+ integer, otherwise return X unmodified. */ -+static rtx -+aarch64_bit_representation (rtx x) -+{ -+ if (CONST_DOUBLE_P (x)) -+ x = gen_lowpart (int_mode_for_mode (GET_MODE (x)).require (), x); -+ return x; -+} -+ - /* Return true if MODE is any of the Advanced SIMD structure modes. */ - static bool - aarch64_advsimd_struct_mode_p (machine_mode mode) -@@ -1439,6 +1594,9 @@ const unsigned int VEC_SVE_PRED = 4; - /* Can be used in combination with VEC_ADVSIMD or VEC_SVE_DATA to indicate - a structure of 2, 3 or 4 vectors. */ - const unsigned int VEC_STRUCT = 8; -+/* Can be used in combination with VEC_SVE_DATA to indicate that the -+ vector has fewer significant bytes than a full SVE vector. */ -+const unsigned int VEC_PARTIAL = 16; - /* Useful combinations of the above. */ - const unsigned int VEC_ANY_SVE = VEC_SVE_DATA | VEC_SVE_PRED; - const unsigned int VEC_ANY_DATA = VEC_ADVSIMD | VEC_SVE_DATA; -@@ -1454,34 +1612,84 @@ aarch64_classify_vector_mode (machine_mode mode) - if (aarch64_sve_pred_mode_p (mode)) - return VEC_SVE_PRED; - -- scalar_mode inner = GET_MODE_INNER (mode); -- if (VECTOR_MODE_P (mode) -- && (inner == QImode -- || inner == HImode -- || inner == HFmode -- || inner == SImode -- || inner == SFmode -- || inner == DImode -- || inner == DFmode)) -- { -- if (TARGET_SVE) -- { -- if (known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR)) -- return VEC_SVE_DATA; -- if (known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR * 2) -- || known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR * 3) -- || known_eq (GET_MODE_BITSIZE (mode), BITS_PER_SVE_VECTOR * 4)) -- return VEC_SVE_DATA | VEC_STRUCT; -- } -+ /* Make the decision based on the mode's enum value rather than its -+ properties, so that we keep the correct classification regardless -+ of -msve-vector-bits. */ -+ switch (mode) -+ { -+ /* Partial SVE QI vectors. */ -+ case E_VNx2QImode: -+ case E_VNx4QImode: -+ case E_VNx8QImode: -+ /* Partial SVE HI vectors. */ -+ case E_VNx2HImode: -+ case E_VNx4HImode: -+ /* Partial SVE SI vector. */ -+ case E_VNx2SImode: -+ return TARGET_SVE ? VEC_SVE_DATA | VEC_PARTIAL : 0; -+ -+ case E_VNx16QImode: -+ case E_VNx8HImode: -+ case E_VNx4SImode: -+ case E_VNx2DImode: -+ case E_VNx8BFmode: -+ case E_VNx8HFmode: -+ case E_VNx4SFmode: -+ case E_VNx2DFmode: -+ return TARGET_SVE ? VEC_SVE_DATA : 0; -+ -+ /* x2 SVE vectors. */ -+ case E_VNx32QImode: -+ case E_VNx16HImode: -+ case E_VNx8SImode: -+ case E_VNx4DImode: -+ case E_VNx16BFmode: -+ case E_VNx16HFmode: -+ case E_VNx8SFmode: -+ case E_VNx4DFmode: -+ /* x3 SVE vectors. */ -+ case E_VNx48QImode: -+ case E_VNx24HImode: -+ case E_VNx12SImode: -+ case E_VNx6DImode: -+ case E_VNx24BFmode: -+ case E_VNx24HFmode: -+ case E_VNx12SFmode: -+ case E_VNx6DFmode: -+ /* x4 SVE vectors. */ -+ case E_VNx64QImode: -+ case E_VNx32HImode: -+ case E_VNx16SImode: -+ case E_VNx8DImode: -+ case E_VNx32BFmode: -+ case E_VNx32HFmode: -+ case E_VNx16SFmode: -+ case E_VNx8DFmode: -+ return TARGET_SVE ? VEC_SVE_DATA | VEC_STRUCT : 0; -+ -+ /* 64-bit Advanced SIMD vectors. */ -+ case E_V8QImode: -+ case E_V4HImode: -+ case E_V2SImode: -+ /* ...E_V1DImode doesn't exist. */ -+ case E_V4HFmode: -+ case E_V4BFmode: -+ case E_V2SFmode: -+ case E_V1DFmode: -+ /* 128-bit Advanced SIMD vectors. */ -+ case E_V16QImode: -+ case E_V8HImode: -+ case E_V4SImode: -+ case E_V2DImode: -+ case E_V8HFmode: -+ case E_V8BFmode: -+ case E_V4SFmode: -+ case E_V2DFmode: -+ return TARGET_SIMD ? VEC_ADVSIMD : 0; - -- /* This includes V1DF but not V1DI (which doesn't exist). */ -- if (TARGET_SIMD -- && (known_eq (GET_MODE_BITSIZE (mode), 64) -- || known_eq (GET_MODE_BITSIZE (mode), 128))) -- return VEC_ADVSIMD; -+ default: -+ return 0; - } -- -- return 0; - } - - /* Return true if MODE is any of the data vector modes, including -@@ -1492,6 +1700,14 @@ aarch64_vector_data_mode_p (machine_mode mode) - return aarch64_classify_vector_mode (mode) & VEC_ANY_DATA; - } - -+/* Return true if MODE is any form of SVE mode, including predicates, -+ vectors and structures. */ -+bool -+aarch64_sve_mode_p (machine_mode mode) -+{ -+ return aarch64_classify_vector_mode (mode) & VEC_ANY_SVE; -+} -+ - /* Return true if MODE is an SVE data vector mode; either a single vector - or a structure of vectors. */ - static bool -@@ -1500,6 +1716,24 @@ aarch64_sve_data_mode_p (machine_mode mode) - return aarch64_classify_vector_mode (mode) & VEC_SVE_DATA; - } - -+/* Return the number of defined bytes in one constituent vector of -+ SVE mode MODE, which has vector flags VEC_FLAGS. */ -+static poly_int64 -+aarch64_vl_bytes (machine_mode mode, unsigned int vec_flags) -+{ -+ if (vec_flags & VEC_PARTIAL) -+ /* A single partial vector. */ -+ return GET_MODE_SIZE (mode); -+ -+ if (vec_flags & VEC_SVE_DATA) -+ /* A single vector or a tuple. */ -+ return BYTES_PER_SVE_VECTOR; -+ -+ /* A single predicate. */ -+ gcc_assert (vec_flags & VEC_SVE_PRED); -+ return BYTES_PER_SVE_PRED; -+} -+ - /* Implement target hook TARGET_ARRAY_MODE. */ - static opt_machine_mode - aarch64_array_mode (machine_mode mode, unsigned HOST_WIDE_INT nelems) -@@ -1582,6 +1816,43 @@ aarch64_vectorize_related_mode (machine_mode vector_mode, - return default_vectorize_related_mode (vector_mode, element_mode, nunits); - } - -+/* Return the SVE vector mode that has NUNITS elements of mode INNER_MODE. */ -+ -+opt_machine_mode -+aarch64_sve_data_mode (scalar_mode inner_mode, poly_uint64 nunits) -+{ -+ enum mode_class mclass = (is_a (inner_mode) -+ ? MODE_VECTOR_FLOAT : MODE_VECTOR_INT); -+ machine_mode mode; -+ FOR_EACH_MODE_IN_CLASS (mode, mclass) -+ if (inner_mode == GET_MODE_INNER (mode) -+ && known_eq (nunits, GET_MODE_NUNITS (mode)) -+ && aarch64_sve_data_mode_p (mode)) -+ return mode; -+ return opt_machine_mode (); -+} -+ -+/* Return the integer element mode associated with SVE mode MODE. */ -+ -+static scalar_int_mode -+aarch64_sve_element_int_mode (machine_mode mode) -+{ -+ unsigned int elt_bits = vector_element_size (BITS_PER_SVE_VECTOR, -+ GET_MODE_NUNITS (mode)); -+ return int_mode_for_size (elt_bits, 0).require (); -+} -+ -+/* Return the integer vector mode associated with SVE mode MODE. -+ Unlike mode_for_int_vector, this can handle the case in which -+ MODE is a predicate (and thus has a different total size). */ -+ -+machine_mode -+aarch64_sve_int_mode (machine_mode mode) -+{ -+ scalar_int_mode int_mode = aarch64_sve_element_int_mode (mode); -+ return aarch64_sve_data_mode (int_mode, GET_MODE_NUNITS (mode)).require (); -+} -+ - /* Implement TARGET_PREFERRED_ELSE_VALUE. For binary operations, - prefer to use the first arithmetic operand as the else value if - the else value doesn't matter, since that exactly matches the SVE -@@ -1610,13 +1881,19 @@ aarch64_hard_regno_nregs (unsigned regno, machine_mode mode) - { - case FP_REGS: - case FP_LO_REGS: -- if (aarch64_sve_data_mode_p (mode)) -- return exact_div (GET_MODE_SIZE (mode), -- BYTES_PER_SVE_VECTOR).to_constant (); -- return CEIL (lowest_size, UNITS_PER_VREG); -+ case FP_LO8_REGS: -+ { -+ unsigned int vec_flags = aarch64_classify_vector_mode (mode); -+ if (vec_flags & VEC_SVE_DATA) -+ return exact_div (GET_MODE_SIZE (mode), -+ aarch64_vl_bytes (mode, vec_flags)).to_constant (); -+ return CEIL (lowest_size, UNITS_PER_VREG); -+ } - case PR_REGS: - case PR_LO_REGS: - case PR_HI_REGS: -+ case FFR_REGS: -+ case PR_AND_FFR_REGS: - return 1; - default: - return CEIL (lowest_size, UNITS_PER_WORD); -@@ -1637,11 +1914,16 @@ aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode) - return mode == DImode; - - unsigned int vec_flags = aarch64_classify_vector_mode (mode); -+ /* At the moment, partial vector modes are only useful for memory -+ references, but that could change in future. */ -+ if (vec_flags & VEC_PARTIAL) -+ return false; -+ - if (vec_flags & VEC_SVE_PRED) -- return PR_REGNUM_P (regno); -+ return pr_or_ffr_regnum_p (regno); - -- if (PR_REGNUM_P (regno)) -- return 0; -+ if (pr_or_ffr_regnum_p (regno)) -+ return false; - - if (regno == SP_REGNUM) - /* The purpose of comparing with ptr_mode is to support the -@@ -1670,102 +1952,184 @@ aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode) - return false; - } - --/* Return true if this is a definition of a vectorized simd function. */ -+/* Return true if TYPE is a type that should be passed or returned in -+ SVE registers, assuming enough registers are available. When returning -+ true, set *NUM_ZR and *NUM_PR to the number of required Z and P registers -+ respectively. */ - - static bool --aarch64_simd_decl_p (tree fndecl) -+aarch64_sve_argument_p (const_tree type, unsigned int *num_zr, -+ unsigned int *num_pr) - { -- tree fntype; -- -- if (fndecl == NULL) -- return false; -- fntype = TREE_TYPE (fndecl); -- if (fntype == NULL) -- return false; -+ if (aarch64_sve::svbool_type_p (type)) -+ { -+ *num_pr = 1; -+ *num_zr = 0; -+ return true; -+ } - -- /* Functions with the aarch64_vector_pcs attribute use the simd ABI. */ -- if (lookup_attribute ("aarch64_vector_pcs", TYPE_ATTRIBUTES (fntype)) != NULL) -- return true; -+ if (unsigned int nvectors = aarch64_sve::nvectors_if_data_type (type)) -+ { -+ *num_pr = 0; -+ *num_zr = nvectors; -+ return true; -+ } - - return false; - } - --/* Return the mode a register save/restore should use. DImode for integer -- registers, DFmode for FP registers in non-SIMD functions (they only save -- the bottom half of a 128 bit register), or TFmode for FP registers in -- SIMD functions. */ -+/* Return true if a function with type FNTYPE returns its value in -+ SVE vector or predicate registers. */ - --static machine_mode --aarch64_reg_save_mode (tree fndecl, unsigned regno) -+static bool -+aarch64_returns_value_in_sve_regs_p (const_tree fntype) - { -- return GP_REGNUM_P (regno) -- ? E_DImode -- : (aarch64_simd_decl_p (fndecl) ? E_TFmode : E_DFmode); -+ unsigned int num_zr, num_pr; -+ tree return_type = TREE_TYPE (fntype); -+ return (return_type != error_mark_node -+ && aarch64_sve_argument_p (return_type, &num_zr, &num_pr)); - } - --/* Return true if the instruction is a call to a SIMD function, false -- if it is not a SIMD function or if we do not know anything about -- the function. */ -+/* Return true if a function with type FNTYPE takes arguments in -+ SVE vector or predicate registers. */ - - static bool --aarch64_simd_call_p (rtx_insn *insn) -+aarch64_takes_arguments_in_sve_regs_p (const_tree fntype) - { -- rtx symbol; -- rtx call; -- tree fndecl; -+ CUMULATIVE_ARGS args_so_far_v; -+ aarch64_init_cumulative_args (&args_so_far_v, NULL_TREE, NULL_RTX, -+ NULL_TREE, 0, true); -+ cumulative_args_t args_so_far = pack_cumulative_args (&args_so_far_v); - -- gcc_assert (CALL_P (insn)); -- call = get_call_rtx_from (insn); -- symbol = XEXP (XEXP (call, 0), 0); -- if (GET_CODE (symbol) != SYMBOL_REF) -- return false; -- fndecl = SYMBOL_REF_DECL (symbol); -- if (!fndecl) -- return false; -+ for (tree chain = TYPE_ARG_TYPES (fntype); -+ chain && chain != void_list_node; -+ chain = TREE_CHAIN (chain)) -+ { -+ tree arg_type = TREE_VALUE (chain); -+ if (arg_type == error_mark_node) -+ return false; -+ -+ function_arg_info arg (arg_type, /*named=*/true); -+ apply_pass_by_reference_rules (&args_so_far_v, arg); -+ unsigned int num_zr, num_pr; -+ if (aarch64_sve_argument_p (arg.type, &num_zr, &num_pr)) -+ return true; - -- return aarch64_simd_decl_p (fndecl); -+ targetm.calls.function_arg_advance (args_so_far, arg); -+ } -+ return false; - } - --/* Implement TARGET_REMOVE_EXTRA_CALL_PRESERVED_REGS. If INSN calls -- a function that uses the SIMD ABI, take advantage of the extra -- call-preserved registers that the ABI provides. */ -+/* Implement TARGET_FNTYPE_ABI. */ - --void --aarch64_remove_extra_call_preserved_regs (rtx_insn *insn, -- HARD_REG_SET *return_set) -+static const predefined_function_abi & -+aarch64_fntype_abi (const_tree fntype) - { -- if (aarch64_simd_call_p (insn)) -- { -- for (int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) -- if (FP_SIMD_SAVED_REGNUM_P (regno)) -- CLEAR_HARD_REG_BIT (*return_set, regno); -- } -+ if (lookup_attribute ("aarch64_vector_pcs", TYPE_ATTRIBUTES (fntype))) -+ return aarch64_simd_abi (); -+ -+ if (aarch64_returns_value_in_sve_regs_p (fntype) -+ || aarch64_takes_arguments_in_sve_regs_p (fntype)) -+ return aarch64_sve_abi (); -+ -+ return default_function_abi; - } - --/* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The callee only saves -- the lower 64 bits of a 128-bit register. Tell the compiler the callee -- clobbers the top 64 bits when restoring the bottom 64 bits. */ -+/* Implement TARGET_COMPATIBLE_VECTOR_TYPES_P. */ - - static bool --aarch64_hard_regno_call_part_clobbered (rtx_insn *insn, unsigned int regno, -- machine_mode mode) -+aarch64_compatible_vector_types_p (const_tree type1, const_tree type2) -+{ -+ return (aarch64_sve::builtin_type_p (type1) -+ == aarch64_sve::builtin_type_p (type2)); -+} -+ -+/* Return true if we should emit CFI for register REGNO. */ -+ -+static bool -+aarch64_emit_cfi_for_reg_p (unsigned int regno) - { -- bool simd_p = insn && CALL_P (insn) && aarch64_simd_call_p (insn); -- return FP_REGNUM_P (regno) -- && maybe_gt (GET_MODE_SIZE (mode), simd_p ? 16 : 8); -+ return (GP_REGNUM_P (regno) -+ || !default_function_abi.clobbers_full_reg_p (regno)); - } - --/* Implement TARGET_RETURN_CALL_WITH_MAX_CLOBBERS. */ -+/* Return the mode we should use to save and restore register REGNO. */ - --rtx_insn * --aarch64_return_call_with_max_clobbers (rtx_insn *call_1, rtx_insn *call_2) -+static machine_mode -+aarch64_reg_save_mode (unsigned int regno) - { -- gcc_assert (CALL_P (call_1) && CALL_P (call_2)); -+ if (GP_REGNUM_P (regno)) -+ return DImode; - -- if (!aarch64_simd_call_p (call_1) || aarch64_simd_call_p (call_2)) -- return call_1; -- else -- return call_2; -+ if (FP_REGNUM_P (regno)) -+ switch (crtl->abi->id ()) -+ { -+ case ARM_PCS_AAPCS64: -+ /* Only the low 64 bits are saved by the base PCS. */ -+ return DFmode; -+ -+ case ARM_PCS_SIMD: -+ /* The vector PCS saves the low 128 bits (which is the full -+ register on non-SVE targets). */ -+ return TFmode; -+ -+ case ARM_PCS_SVE: -+ /* Use vectors of DImode for registers that need frame -+ information, so that the first 64 bytes of the save slot -+ are always the equivalent of what storing D would give. */ -+ if (aarch64_emit_cfi_for_reg_p (regno)) -+ return VNx2DImode; -+ -+ /* Use vectors of bytes otherwise, so that the layout is -+ endian-agnostic, and so that we can use LDR and STR for -+ big-endian targets. */ -+ return VNx16QImode; -+ -+ case ARM_PCS_TLSDESC: -+ case ARM_PCS_UNKNOWN: -+ break; -+ } -+ -+ if (PR_REGNUM_P (regno)) -+ /* Save the full predicate register. */ -+ return VNx16BImode; -+ -+ gcc_unreachable (); -+} -+ -+/* Implement TARGET_INSN_CALLEE_ABI. */ -+ -+const predefined_function_abi & -+aarch64_insn_callee_abi (const rtx_insn *insn) -+{ -+ rtx pat = PATTERN (insn); -+ gcc_assert (GET_CODE (pat) == PARALLEL); -+ rtx unspec = XVECEXP (pat, 0, 1); -+ gcc_assert (GET_CODE (unspec) == UNSPEC -+ && XINT (unspec, 1) == UNSPEC_CALLEE_ABI); -+ return function_abis[INTVAL (XVECEXP (unspec, 0, 0))]; -+} -+ -+/* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The callee only saves -+ the lower 64 bits of a 128-bit register. Tell the compiler the callee -+ clobbers the top 64 bits when restoring the bottom 64 bits. */ -+ -+static bool -+aarch64_hard_regno_call_part_clobbered (unsigned int abi_id, -+ unsigned int regno, -+ machine_mode mode) -+{ -+ if (FP_REGNUM_P (regno) && abi_id != ARM_PCS_SVE) -+ { -+ poly_int64 per_register_size = GET_MODE_SIZE (mode); -+ unsigned int nregs = hard_regno_nregs (regno, mode); -+ if (nregs > 1) -+ per_register_size = exact_div (per_register_size, nregs); -+ if (abi_id == ARM_PCS_SIMD || abi_id == ARM_PCS_TLSDESC) -+ return maybe_gt (per_register_size, 16); -+ return maybe_gt (per_register_size, 8); -+ } -+ return false; - } - - /* Implement REGMODE_NATURAL_SIZE. */ -@@ -1899,10 +2263,33 @@ emit_set_insn (rtx x, rtx y) - rtx - aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y) - { -- machine_mode mode = SELECT_CC_MODE (code, x, y); -- rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM); -+ machine_mode cmp_mode = GET_MODE (x); -+ machine_mode cc_mode; -+ rtx cc_reg; -+ -+ if (cmp_mode == TImode) -+ { -+ gcc_assert (code == NE); -+ -+ cc_mode = CCmode; -+ cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM); - -- emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y)); -+ rtx x_lo = operand_subword (x, 0, 0, TImode); -+ rtx y_lo = operand_subword (y, 0, 0, TImode); -+ emit_set_insn (cc_reg, gen_rtx_COMPARE (cc_mode, x_lo, y_lo)); -+ -+ rtx x_hi = operand_subword (x, 1, 0, TImode); -+ rtx y_hi = operand_subword (y, 1, 0, TImode); -+ emit_insn (gen_ccmpdi (cc_reg, cc_reg, x_hi, y_hi, -+ gen_rtx_EQ (cc_mode, cc_reg, const0_rtx), -+ GEN_INT (AARCH64_EQ))); -+ } -+ else -+ { -+ cc_mode = SELECT_CC_MODE (code, x, y); -+ cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM); -+ emit_set_insn (cc_reg, gen_rtx_COMPARE (cc_mode, x, y)); -+ } - return cc_reg; - } - -@@ -2466,7 +2853,36 @@ aarch64_zero_extend_const_eq (machine_mode xmode, rtx x, - gcc_assert (r != NULL); - return rtx_equal_p (x, r); - } -- -+ -+/* Return TARGET if it is nonnull and a register of mode MODE. -+ Otherwise, return a fresh register of mode MODE if we can, -+ or TARGET reinterpreted as MODE if we can't. */ -+ -+static rtx -+aarch64_target_reg (rtx target, machine_mode mode) -+{ -+ if (target && REG_P (target) && GET_MODE (target) == mode) -+ return target; -+ if (!can_create_pseudo_p ()) -+ { -+ gcc_assert (target); -+ return gen_lowpart (mode, target); -+ } -+ return gen_reg_rtx (mode); -+} -+ -+/* Return a register that contains the constant in BUILDER, given that -+ the constant is a legitimate move operand. Use TARGET as the register -+ if it is nonnull and convenient. */ -+ -+static rtx -+aarch64_emit_set_immediate (rtx target, rtx_vector_builder &builder) -+{ -+ rtx src = builder.build (); -+ target = aarch64_target_reg (target, GET_MODE (src)); -+ emit_insn (gen_rtx_SET (target, src)); -+ return target; -+} - - static rtx - aarch64_force_temporary (machine_mode mode, rtx x, rtx value) -@@ -2481,82 +2897,474 @@ aarch64_force_temporary (machine_mode mode, rtx x, rtx value) - } - } - --/* Return true if we can move VALUE into a register using a single -- CNT[BHWD] instruction. */ -+/* Return true if predicate value X is a constant in which every element -+ is a CONST_INT. When returning true, describe X in BUILDER as a VNx16BI -+ value, i.e. as a predicate in which all bits are significant. */ - - static bool --aarch64_sve_cnt_immediate_p (poly_int64 value) -+aarch64_get_sve_pred_bits (rtx_vector_builder &builder, rtx x) - { -- HOST_WIDE_INT factor = value.coeffs[0]; -- /* The coefficient must be [1, 16] * {2, 4, 8, 16}. */ -- return (value.coeffs[1] == factor -- && IN_RANGE (factor, 2, 16 * 16) -- && (factor & 1) == 0 -- && factor <= 16 * (factor & -factor)); -+ if (GET_CODE (x) != CONST_VECTOR) -+ return false; -+ -+ unsigned int factor = vector_element_size (GET_MODE_NUNITS (VNx16BImode), -+ GET_MODE_NUNITS (GET_MODE (x))); -+ unsigned int npatterns = CONST_VECTOR_NPATTERNS (x) * factor; -+ unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x); -+ builder.new_vector (VNx16BImode, npatterns, nelts_per_pattern); -+ -+ unsigned int nelts = const_vector_encoded_nelts (x); -+ for (unsigned int i = 0; i < nelts; ++i) -+ { -+ rtx elt = CONST_VECTOR_ENCODED_ELT (x, i); -+ if (!CONST_INT_P (elt)) -+ return false; -+ -+ builder.quick_push (elt); -+ for (unsigned int j = 1; j < factor; ++j) -+ builder.quick_push (const0_rtx); -+ } -+ builder.finalize (); -+ return true; - } - --/* Likewise for rtx X. */ -+/* BUILDER contains a predicate constant of mode VNx16BI. Return the -+ widest predicate element size it can have (that is, the largest size -+ for which each element would still be 0 or 1). */ - --bool --aarch64_sve_cnt_immediate_p (rtx x) -+unsigned int -+aarch64_widest_sve_pred_elt_size (rtx_vector_builder &builder) - { -- poly_int64 value; -- return poly_int_rtx_p (x, &value) && aarch64_sve_cnt_immediate_p (value); -+ /* Start with the most optimistic assumption: that we only need -+ one bit per pattern. This is what we will use if only the first -+ bit in each pattern is ever set. */ -+ unsigned int mask = GET_MODE_SIZE (DImode); -+ mask |= builder.npatterns (); -+ -+ /* Look for set bits. */ -+ unsigned int nelts = builder.encoded_nelts (); -+ for (unsigned int i = 1; i < nelts; ++i) -+ if (INTVAL (builder.elt (i)) != 0) -+ { -+ if (i & 1) -+ return 1; -+ mask |= i; -+ } -+ return mask & -mask; - } - --/* Return the asm string for an instruction with a CNT-like vector size -- operand (a vector pattern followed by a multiplier in the range [1, 16]). -- PREFIX is the mnemonic without the size suffix and OPERANDS is the -- first part of the operands template (the part that comes before the -- vector size itself). FACTOR is the number of quadwords. -- NELTS_PER_VQ, if nonzero, is the number of elements in each quadword. -- If it is zero, we can use any element size. */ -+/* If VNx16BImode rtx X is a canonical PTRUE for a predicate mode, -+ return that predicate mode, otherwise return opt_machine_mode (). */ - --static char * --aarch64_output_sve_cnt_immediate (const char *prefix, const char *operands, -- unsigned int factor, -- unsigned int nelts_per_vq) -+opt_machine_mode -+aarch64_ptrue_all_mode (rtx x) - { -- static char buffer[sizeof ("sqincd\t%x0, %w0, all, mul #16")]; -+ gcc_assert (GET_MODE (x) == VNx16BImode); -+ if (GET_CODE (x) != CONST_VECTOR -+ || !CONST_VECTOR_DUPLICATE_P (x) -+ || !CONST_INT_P (CONST_VECTOR_ENCODED_ELT (x, 0)) -+ || INTVAL (CONST_VECTOR_ENCODED_ELT (x, 0)) == 0) -+ return opt_machine_mode (); - -- if (nelts_per_vq == 0) -- /* There is some overlap in the ranges of the four CNT instructions. -- Here we always use the smallest possible element size, so that the -- multiplier is 1 whereever possible. */ -- nelts_per_vq = factor & -factor; -- int shift = std::min (exact_log2 (nelts_per_vq), 4); -- gcc_assert (IN_RANGE (shift, 1, 4)); -- char suffix = "dwhb"[shift - 1]; -+ unsigned int nelts = const_vector_encoded_nelts (x); -+ for (unsigned int i = 1; i < nelts; ++i) -+ if (CONST_VECTOR_ENCODED_ELT (x, i) != const0_rtx) -+ return opt_machine_mode (); - -- factor >>= shift; -- unsigned int written; -- if (factor == 1) -- written = snprintf (buffer, sizeof (buffer), "%s%c\t%s", -- prefix, suffix, operands); -- else -- written = snprintf (buffer, sizeof (buffer), "%s%c\t%s, all, mul #%d", -- prefix, suffix, operands, factor); -- gcc_assert (written < sizeof (buffer)); -- return buffer; -+ return aarch64_sve_pred_mode (nelts); - } - --/* Return the asm string for an instruction with a CNT-like vector size -- operand (a vector pattern followed by a multiplier in the range [1, 16]). -- PREFIX is the mnemonic without the size suffix and OPERANDS is the -- first part of the operands template (the part that comes before the -- vector size itself). X is the value of the vector size operand, -- as a polynomial integer rtx. */ -+/* BUILDER is a predicate constant of mode VNx16BI. Consider the value -+ that the constant would have with predicate element size ELT_SIZE -+ (ignoring the upper bits in each element) and return: - --char * -+ * -1 if all bits are set -+ * N if the predicate has N leading set bits followed by all clear bits -+ * 0 if the predicate does not have any of these forms. */ -+ -+int -+aarch64_partial_ptrue_length (rtx_vector_builder &builder, -+ unsigned int elt_size) -+{ -+ /* If nelts_per_pattern is 3, we have set bits followed by clear bits -+ followed by set bits. */ -+ if (builder.nelts_per_pattern () == 3) -+ return 0; -+ -+ /* Skip over leading set bits. */ -+ unsigned int nelts = builder.encoded_nelts (); -+ unsigned int i = 0; -+ for (; i < nelts; i += elt_size) -+ if (INTVAL (builder.elt (i)) == 0) -+ break; -+ unsigned int vl = i / elt_size; -+ -+ /* Check for the all-true case. */ -+ if (i == nelts) -+ return -1; -+ -+ /* If nelts_per_pattern is 1, then either VL is zero, or we have a -+ repeating pattern of set bits followed by clear bits. */ -+ if (builder.nelts_per_pattern () != 2) -+ return 0; -+ -+ /* We have a "foreground" value and a duplicated "background" value. -+ If the background might repeat and the last set bit belongs to it, -+ we might have set bits followed by clear bits followed by set bits. */ -+ if (i > builder.npatterns () && maybe_ne (nelts, builder.full_nelts ())) -+ return 0; -+ -+ /* Make sure that the rest are all clear. */ -+ for (; i < nelts; i += elt_size) -+ if (INTVAL (builder.elt (i)) != 0) -+ return 0; -+ -+ return vl; -+} -+ -+/* See if there is an svpattern that encodes an SVE predicate of mode -+ PRED_MODE in which the first VL bits are set and the rest are clear. -+ Return the pattern if so, otherwise return AARCH64_NUM_SVPATTERNS. -+ A VL of -1 indicates an all-true vector. */ -+ -+aarch64_svpattern -+aarch64_svpattern_for_vl (machine_mode pred_mode, int vl) -+{ -+ if (vl < 0) -+ return AARCH64_SV_ALL; -+ -+ if (maybe_gt (vl, GET_MODE_NUNITS (pred_mode))) -+ return AARCH64_NUM_SVPATTERNS; -+ -+ if (vl >= 1 && vl <= 8) -+ return aarch64_svpattern (AARCH64_SV_VL1 + (vl - 1)); -+ -+ if (vl >= 16 && vl <= 256 && pow2p_hwi (vl)) -+ return aarch64_svpattern (AARCH64_SV_VL16 + (exact_log2 (vl) - 4)); -+ -+ int max_vl; -+ if (GET_MODE_NUNITS (pred_mode).is_constant (&max_vl)) -+ { -+ if (vl == (max_vl / 3) * 3) -+ return AARCH64_SV_MUL3; -+ /* These would only trigger for non-power-of-2 lengths. */ -+ if (vl == (max_vl & -4)) -+ return AARCH64_SV_MUL4; -+ if (vl == (1 << floor_log2 (max_vl))) -+ return AARCH64_SV_POW2; -+ if (vl == max_vl) -+ return AARCH64_SV_ALL; -+ } -+ return AARCH64_NUM_SVPATTERNS; -+} -+ -+/* Return a VNx16BImode constant in which every sequence of ELT_SIZE -+ bits has the lowest bit set and the upper bits clear. This is the -+ VNx16BImode equivalent of a PTRUE for controlling elements of -+ ELT_SIZE bytes. However, because the constant is VNx16BImode, -+ all bits are significant, even the upper zeros. */ -+ -+rtx -+aarch64_ptrue_all (unsigned int elt_size) -+{ -+ rtx_vector_builder builder (VNx16BImode, elt_size, 1); -+ builder.quick_push (const1_rtx); -+ for (unsigned int i = 1; i < elt_size; ++i) -+ builder.quick_push (const0_rtx); -+ return builder.build (); -+} -+ -+/* Return an all-true predicate register of mode MODE. */ -+ -+rtx -+aarch64_ptrue_reg (machine_mode mode) -+{ -+ gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL); -+ rtx reg = force_reg (VNx16BImode, CONSTM1_RTX (VNx16BImode)); -+ return gen_lowpart (mode, reg); -+} -+ -+/* Return an all-false predicate register of mode MODE. */ -+ -+rtx -+aarch64_pfalse_reg (machine_mode mode) -+{ -+ gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL); -+ rtx reg = force_reg (VNx16BImode, CONST0_RTX (VNx16BImode)); -+ return gen_lowpart (mode, reg); -+} -+ -+/* Return true if predicate PRED1[0] is true whenever predicate PRED2 is -+ true, or alternatively if we know that the operation predicated by -+ PRED1[0] is safe to perform whenever PRED2 is true. PRED1[1] is a -+ aarch64_sve_gp_strictness operand that describes the operation -+ predicated by PRED1[0]. */ -+ -+bool -+aarch64_sve_pred_dominates_p (rtx *pred1, rtx pred2) -+{ -+ machine_mode mode = GET_MODE (pred2); -+ gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL -+ && mode == GET_MODE (pred1[0]) -+ && aarch64_sve_gp_strictness (pred1[1], SImode)); -+ return (pred1[0] == CONSTM1_RTX (mode) -+ || INTVAL (pred1[1]) == SVE_RELAXED_GP -+ || rtx_equal_p (pred1[0], pred2)); -+} -+ -+/* PRED1[0] is a PTEST predicate and PRED1[1] is an aarch64_sve_ptrue_flag -+ for it. PRED2[0] is the predicate for the instruction whose result -+ is tested by the PTEST and PRED2[1] is again an aarch64_sve_ptrue_flag -+ for it. Return true if we can prove that the two predicates are -+ equivalent for PTEST purposes; that is, if we can replace PRED2[0] -+ with PRED1[0] without changing behavior. */ -+ -+bool -+aarch64_sve_same_pred_for_ptest_p (rtx *pred1, rtx *pred2) -+{ -+ machine_mode mode = GET_MODE (pred1[0]); -+ gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL -+ && mode == GET_MODE (pred2[0]) -+ && aarch64_sve_ptrue_flag (pred1[1], SImode) -+ && aarch64_sve_ptrue_flag (pred2[1], SImode)); -+ -+ bool ptrue1_p = (pred1[0] == CONSTM1_RTX (mode) -+ || INTVAL (pred1[1]) == SVE_KNOWN_PTRUE); -+ bool ptrue2_p = (pred2[0] == CONSTM1_RTX (mode) -+ || INTVAL (pred2[1]) == SVE_KNOWN_PTRUE); -+ return (ptrue1_p && ptrue2_p) || rtx_equal_p (pred1[0], pred2[0]); -+} -+ -+/* Emit a comparison CMP between OP0 and OP1, both of which have mode -+ DATA_MODE, and return the result in a predicate of mode PRED_MODE. -+ Use TARGET as the target register if nonnull and convenient. */ -+ -+static rtx -+aarch64_sve_emit_int_cmp (rtx target, machine_mode pred_mode, rtx_code cmp, -+ machine_mode data_mode, rtx op1, rtx op2) -+{ -+ insn_code icode = code_for_aarch64_pred_cmp (cmp, data_mode); -+ expand_operand ops[5]; -+ create_output_operand (&ops[0], target, pred_mode); -+ create_input_operand (&ops[1], CONSTM1_RTX (pred_mode), pred_mode); -+ create_integer_operand (&ops[2], SVE_KNOWN_PTRUE); -+ create_input_operand (&ops[3], op1, data_mode); -+ create_input_operand (&ops[4], op2, data_mode); -+ expand_insn (icode, 5, ops); -+ return ops[0].value; -+} -+ -+/* Use a comparison to convert integer vector SRC into MODE, which is -+ the corresponding SVE predicate mode. Use TARGET for the result -+ if it's nonnull and convenient. */ -+ -+rtx -+aarch64_convert_sve_data_to_pred (rtx target, machine_mode mode, rtx src) -+{ -+ machine_mode src_mode = GET_MODE (src); -+ return aarch64_sve_emit_int_cmp (target, mode, NE, src_mode, -+ src, CONST0_RTX (src_mode)); -+} -+ -+/* Return the assembly token for svprfop value PRFOP. */ -+ -+static const char * -+svprfop_token (enum aarch64_svprfop prfop) -+{ -+ switch (prfop) -+ { -+#define CASE(UPPER, LOWER, VALUE) case AARCH64_SV_##UPPER: return #LOWER; -+ AARCH64_FOR_SVPRFOP (CASE) -+#undef CASE -+ case AARCH64_NUM_SVPRFOPS: -+ break; -+ } -+ gcc_unreachable (); -+} -+ -+/* Return the assembly string for an SVE prefetch operation with -+ mnemonic MNEMONIC, given that PRFOP_RTX is the prefetch operation -+ and that SUFFIX is the format for the remaining operands. */ -+ -+char * -+aarch64_output_sve_prefetch (const char *mnemonic, rtx prfop_rtx, -+ const char *suffix) -+{ -+ static char buffer[128]; -+ aarch64_svprfop prfop = (aarch64_svprfop) INTVAL (prfop_rtx); -+ unsigned int written = snprintf (buffer, sizeof (buffer), "%s\t%s, %s", -+ mnemonic, svprfop_token (prfop), suffix); -+ gcc_assert (written < sizeof (buffer)); -+ return buffer; -+} -+ -+/* Check whether we can calculate the number of elements in PATTERN -+ at compile time, given that there are NELTS_PER_VQ elements per -+ 128-bit block. Return the value if so, otherwise return -1. */ -+ -+HOST_WIDE_INT -+aarch64_fold_sve_cnt_pat (aarch64_svpattern pattern, unsigned int nelts_per_vq) -+{ -+ unsigned int vl, const_vg; -+ if (pattern >= AARCH64_SV_VL1 && pattern <= AARCH64_SV_VL8) -+ vl = 1 + (pattern - AARCH64_SV_VL1); -+ else if (pattern >= AARCH64_SV_VL16 && pattern <= AARCH64_SV_VL256) -+ vl = 16 << (pattern - AARCH64_SV_VL16); -+ else if (aarch64_sve_vg.is_constant (&const_vg)) -+ { -+ /* There are two vector granules per quadword. */ -+ unsigned int nelts = (const_vg / 2) * nelts_per_vq; -+ switch (pattern) -+ { -+ case AARCH64_SV_POW2: return 1 << floor_log2 (nelts); -+ case AARCH64_SV_MUL4: return nelts & -4; -+ case AARCH64_SV_MUL3: return (nelts / 3) * 3; -+ case AARCH64_SV_ALL: return nelts; -+ default: gcc_unreachable (); -+ } -+ } -+ else -+ return -1; -+ -+ /* There are two vector granules per quadword. */ -+ poly_uint64 nelts_all = exact_div (aarch64_sve_vg, 2) * nelts_per_vq; -+ if (known_le (vl, nelts_all)) -+ return vl; -+ -+ /* Requesting more elements than are available results in a PFALSE. */ -+ if (known_gt (vl, nelts_all)) -+ return 0; -+ -+ return -1; -+} -+ -+/* Return true if we can move VALUE into a register using a single -+ CNT[BHWD] instruction. */ -+ -+static bool -+aarch64_sve_cnt_immediate_p (poly_int64 value) -+{ -+ HOST_WIDE_INT factor = value.coeffs[0]; -+ /* The coefficient must be [1, 16] * {2, 4, 8, 16}. */ -+ return (value.coeffs[1] == factor -+ && IN_RANGE (factor, 2, 16 * 16) -+ && (factor & 1) == 0 -+ && factor <= 16 * (factor & -factor)); -+} -+ -+/* Likewise for rtx X. */ -+ -+bool -+aarch64_sve_cnt_immediate_p (rtx x) -+{ -+ poly_int64 value; -+ return poly_int_rtx_p (x, &value) && aarch64_sve_cnt_immediate_p (value); -+} -+ -+/* Return the asm string for an instruction with a CNT-like vector size -+ operand (a vector pattern followed by a multiplier in the range [1, 16]). -+ PREFIX is the mnemonic without the size suffix and OPERANDS is the -+ first part of the operands template (the part that comes before the -+ vector size itself). PATTERN is the pattern to use. FACTOR is the -+ number of quadwords. NELTS_PER_VQ, if nonzero, is the number of elements -+ in each quadword. If it is zero, we can use any element size. */ -+ -+static char * -+aarch64_output_sve_cnt_immediate (const char *prefix, const char *operands, -+ aarch64_svpattern pattern, -+ unsigned int factor, -+ unsigned int nelts_per_vq) -+{ -+ static char buffer[sizeof ("sqincd\t%x0, %w0, vl256, mul #16")]; -+ -+ if (nelts_per_vq == 0) -+ /* There is some overlap in the ranges of the four CNT instructions. -+ Here we always use the smallest possible element size, so that the -+ multiplier is 1 whereever possible. */ -+ nelts_per_vq = factor & -factor; -+ int shift = std::min (exact_log2 (nelts_per_vq), 4); -+ gcc_assert (IN_RANGE (shift, 1, 4)); -+ char suffix = "dwhb"[shift - 1]; -+ -+ factor >>= shift; -+ unsigned int written; -+ if (pattern == AARCH64_SV_ALL && factor == 1) -+ written = snprintf (buffer, sizeof (buffer), "%s%c\t%s", -+ prefix, suffix, operands); -+ else if (factor == 1) -+ written = snprintf (buffer, sizeof (buffer), "%s%c\t%s, %s", -+ prefix, suffix, operands, svpattern_token (pattern)); -+ else -+ written = snprintf (buffer, sizeof (buffer), "%s%c\t%s, %s, mul #%d", -+ prefix, suffix, operands, svpattern_token (pattern), -+ factor); -+ gcc_assert (written < sizeof (buffer)); -+ return buffer; -+} -+ -+/* Return the asm string for an instruction with a CNT-like vector size -+ operand (a vector pattern followed by a multiplier in the range [1, 16]). -+ PREFIX is the mnemonic without the size suffix and OPERANDS is the -+ first part of the operands template (the part that comes before the -+ vector size itself). X is the value of the vector size operand, -+ as a polynomial integer rtx; we need to convert this into an "all" -+ pattern with a multiplier. */ -+ -+char * - aarch64_output_sve_cnt_immediate (const char *prefix, const char *operands, - rtx x) - { - poly_int64 value = rtx_to_poly_int64 (x); - gcc_assert (aarch64_sve_cnt_immediate_p (value)); -- return aarch64_output_sve_cnt_immediate (prefix, operands, -+ return aarch64_output_sve_cnt_immediate (prefix, operands, AARCH64_SV_ALL, - value.coeffs[1], 0); - } - -+/* Return the asm string for an instruction with a CNT-like vector size -+ operand (a vector pattern followed by a multiplier in the range [1, 16]). -+ PREFIX is the mnemonic without the size suffix and OPERANDS is the -+ first part of the operands template (the part that comes before the -+ vector size itself). CNT_PAT[0..2] are the operands of the -+ UNSPEC_SVE_CNT_PAT; see aarch64_sve_cnt_pat for details. */ -+ -+char * -+aarch64_output_sve_cnt_pat_immediate (const char *prefix, -+ const char *operands, rtx *cnt_pat) -+{ -+ aarch64_svpattern pattern = (aarch64_svpattern) INTVAL (cnt_pat[0]); -+ unsigned int nelts_per_vq = INTVAL (cnt_pat[1]); -+ unsigned int factor = INTVAL (cnt_pat[2]) * nelts_per_vq; -+ return aarch64_output_sve_cnt_immediate (prefix, operands, pattern, -+ factor, nelts_per_vq); -+} -+ -+/* Return true if we can add X using a single SVE INC or DEC instruction. */ -+ -+bool -+aarch64_sve_scalar_inc_dec_immediate_p (rtx x) -+{ -+ poly_int64 value; -+ return (poly_int_rtx_p (x, &value) -+ && (aarch64_sve_cnt_immediate_p (value) -+ || aarch64_sve_cnt_immediate_p (-value))); -+} -+ -+/* Return the asm string for adding SVE INC/DEC immediate OFFSET to -+ operand 0. */ -+ -+char * -+aarch64_output_sve_scalar_inc_dec (rtx offset) -+{ -+ poly_int64 offset_value = rtx_to_poly_int64 (offset); -+ gcc_assert (offset_value.coeffs[0] == offset_value.coeffs[1]); -+ if (offset_value.coeffs[1] > 0) -+ return aarch64_output_sve_cnt_immediate ("inc", "%x0", AARCH64_SV_ALL, -+ offset_value.coeffs[1], 0); -+ else -+ return aarch64_output_sve_cnt_immediate ("dec", "%x0", AARCH64_SV_ALL, -+ -offset_value.coeffs[1], 0); -+} -+ - /* Return true if we can add VALUE to a register using a single ADDVL - or ADDPL instruction. */ - -@@ -2582,27 +3390,16 @@ aarch64_sve_addvl_addpl_immediate_p (rtx x) - && aarch64_sve_addvl_addpl_immediate_p (value)); - } - --/* Return the asm string for adding ADDVL or ADDPL immediate X to operand 1 -- and storing the result in operand 0. */ -+/* Return the asm string for adding ADDVL or ADDPL immediate OFFSET -+ to operand 1 and storing the result in operand 0. */ - - char * --aarch64_output_sve_addvl_addpl (rtx dest, rtx base, rtx offset) -+aarch64_output_sve_addvl_addpl (rtx offset) - { - static char buffer[sizeof ("addpl\t%x0, %x1, #-") + 3 * sizeof (int)]; - poly_int64 offset_value = rtx_to_poly_int64 (offset); - gcc_assert (aarch64_sve_addvl_addpl_immediate_p (offset_value)); - -- /* Use INC or DEC if possible. */ -- if (rtx_equal_p (dest, base) && GP_REGNUM_P (REGNO (dest))) -- { -- if (aarch64_sve_cnt_immediate_p (offset_value)) -- return aarch64_output_sve_cnt_immediate ("inc", "%x0", -- offset_value.coeffs[1], 0); -- if (aarch64_sve_cnt_immediate_p (-offset_value)) -- return aarch64_output_sve_cnt_immediate ("dec", "%x0", -- -offset_value.coeffs[1], 0); -- } -- - int factor = offset_value.coeffs[1]; - if ((factor & 15) == 0) - snprintf (buffer, sizeof (buffer), "addvl\t%%x0, %%x1, #%d", factor / 16); -@@ -2617,8 +3414,8 @@ aarch64_output_sve_addvl_addpl (rtx dest, rtx base, rtx offset) - factor in *FACTOR_OUT (if nonnull). */ - - bool --aarch64_sve_inc_dec_immediate_p (rtx x, int *factor_out, -- unsigned int *nelts_per_vq_out) -+aarch64_sve_vector_inc_dec_immediate_p (rtx x, int *factor_out, -+ unsigned int *nelts_per_vq_out) - { - rtx elt; - poly_int64 value; -@@ -2652,9 +3449,9 @@ aarch64_sve_inc_dec_immediate_p (rtx x, int *factor_out, - instruction. */ - - bool --aarch64_sve_inc_dec_immediate_p (rtx x) -+aarch64_sve_vector_inc_dec_immediate_p (rtx x) - { -- return aarch64_sve_inc_dec_immediate_p (x, NULL, NULL); -+ return aarch64_sve_vector_inc_dec_immediate_p (x, NULL, NULL); - } - - /* Return the asm template for an SVE vector INC or DEC instruction. -@@ -2662,18 +3459,18 @@ aarch64_sve_inc_dec_immediate_p (rtx x) - value of the vector count operand itself. */ - - char * --aarch64_output_sve_inc_dec_immediate (const char *operands, rtx x) -+aarch64_output_sve_vector_inc_dec (const char *operands, rtx x) - { - int factor; - unsigned int nelts_per_vq; -- if (!aarch64_sve_inc_dec_immediate_p (x, &factor, &nelts_per_vq)) -+ if (!aarch64_sve_vector_inc_dec_immediate_p (x, &factor, &nelts_per_vq)) - gcc_unreachable (); - if (factor < 0) -- return aarch64_output_sve_cnt_immediate ("dec", operands, -factor, -- nelts_per_vq); -+ return aarch64_output_sve_cnt_immediate ("dec", operands, AARCH64_SV_ALL, -+ -factor, nelts_per_vq); - else -- return aarch64_output_sve_cnt_immediate ("inc", operands, factor, -- nelts_per_vq); -+ return aarch64_output_sve_cnt_immediate ("inc", operands, AARCH64_SV_ALL, -+ factor, nelts_per_vq); - } - - static int -@@ -3056,20 +3853,36 @@ aarch64_add_offset (scalar_int_mode mode, rtx dest, rtx src, - } - else - { -- /* Use CNTD, then multiply it by FACTOR. */ -- val = gen_int_mode (poly_int64 (2, 2), mode); -+ /* Base the factor on LOW_BIT if we can calculate LOW_BIT -+ directly, since that should increase the chances of being -+ able to use a shift and add sequence. If LOW_BIT itself -+ is out of range, just use CNTD. */ -+ if (low_bit <= 16 * 8) -+ factor /= low_bit; -+ else -+ low_bit = 1; -+ -+ val = gen_int_mode (poly_int64 (low_bit * 2, low_bit * 2), mode); - val = aarch64_force_temporary (mode, temp1, val); - -- /* Go back to using a negative multiplication factor if we have -- no register from which to subtract. */ -- if (code == MINUS && src == const0_rtx) -+ if (can_create_pseudo_p ()) -+ { -+ rtx coeff1 = gen_int_mode (factor, mode); -+ val = expand_mult (mode, val, coeff1, NULL_RTX, false, true); -+ } -+ else - { -- factor = -factor; -- code = PLUS; -+ /* Go back to using a negative multiplication factor if we have -+ no register from which to subtract. */ -+ if (code == MINUS && src == const0_rtx) -+ { -+ factor = -factor; -+ code = PLUS; -+ } -+ rtx coeff1 = gen_int_mode (factor, mode); -+ coeff1 = aarch64_force_temporary (mode, temp2, coeff1); -+ val = gen_rtx_MULT (mode, val, coeff1); - } -- rtx coeff1 = gen_int_mode (factor, mode); -- coeff1 = aarch64_force_temporary (mode, temp2, coeff1); -- val = gen_rtx_MULT (mode, val, coeff1); - } - - if (shift > 0) -@@ -3176,32 +3989,55 @@ aarch64_expand_vec_series (rtx dest, rtx base, rtx step) - emit_set_insn (dest, gen_rtx_VEC_SERIES (mode, base, step)); - } - --/* Try to duplicate SRC into SVE register DEST, given that SRC is an -- integer of mode INT_MODE. Return true on success. */ -+/* Duplicate 128-bit Advanced SIMD vector SRC so that it fills an SVE -+ register of mode MODE. Use TARGET for the result if it's nonnull -+ and convenient. - --static bool --aarch64_expand_sve_widened_duplicate (rtx dest, scalar_int_mode src_mode, -- rtx src) --{ -- /* If the constant is smaller than 128 bits, we can do the move -- using a vector of SRC_MODEs. */ -- if (src_mode != TImode) -- { -- poly_uint64 count = exact_div (GET_MODE_SIZE (GET_MODE (dest)), -- GET_MODE_SIZE (src_mode)); -- machine_mode dup_mode = mode_for_vector (src_mode, count).require (); -- emit_move_insn (gen_lowpart (dup_mode, dest), -- gen_const_vec_duplicate (dup_mode, src)); -- return true; -+ The two vector modes must have the same element mode. The behavior -+ is to duplicate architectural lane N of SRC into architectural lanes -+ N + I * STEP of the result. On big-endian targets, architectural -+ lane 0 of an Advanced SIMD vector is the last element of the vector -+ in memory layout, so for big-endian targets this operation has the -+ effect of reversing SRC before duplicating it. Callers need to -+ account for this. */ -+ -+rtx -+aarch64_expand_sve_dupq (rtx target, machine_mode mode, rtx src) -+{ -+ machine_mode src_mode = GET_MODE (src); -+ gcc_assert (GET_MODE_INNER (mode) == GET_MODE_INNER (src_mode)); -+ insn_code icode = (BYTES_BIG_ENDIAN -+ ? code_for_aarch64_vec_duplicate_vq_be (mode) -+ : code_for_aarch64_vec_duplicate_vq_le (mode)); -+ -+ unsigned int i = 0; -+ expand_operand ops[3]; -+ create_output_operand (&ops[i++], target, mode); -+ create_output_operand (&ops[i++], src, src_mode); -+ if (BYTES_BIG_ENDIAN) -+ { -+ /* Create a PARALLEL describing the reversal of SRC. */ -+ unsigned int nelts_per_vq = 128 / GET_MODE_UNIT_BITSIZE (mode); -+ rtx sel = aarch64_gen_stepped_int_parallel (nelts_per_vq, -+ nelts_per_vq - 1, -1); -+ create_fixed_operand (&ops[i++], sel); - } -+ expand_insn (icode, i, ops); -+ return ops[0].value; -+} -+ -+/* Try to force 128-bit vector value SRC into memory and use LD1RQ to fetch -+ the memory image into DEST. Return true on success. */ - -- /* Use LD1RQ[BHWD] to load the 128 bits from memory. */ -- src = force_const_mem (src_mode, src); -+static bool -+aarch64_expand_sve_ld1rq (rtx dest, rtx src) -+{ -+ src = force_const_mem (GET_MODE (src), src); - if (!src) - return false; - - /* Make sure that the address is legitimate. */ -- if (!aarch64_sve_ld1r_operand_p (src)) -+ if (!aarch64_sve_ld1rq_operand_p (src)) - { - rtx addr = force_reg (Pmode, XEXP (src, 0)); - src = replace_equiv_address (src, addr); -@@ -3210,47 +4046,128 @@ aarch64_expand_sve_widened_duplicate (rtx dest, scalar_int_mode src_mode, - machine_mode mode = GET_MODE (dest); - unsigned int elem_bytes = GET_MODE_UNIT_SIZE (mode); - machine_mode pred_mode = aarch64_sve_pred_mode (elem_bytes).require (); -- rtx ptrue = force_reg (pred_mode, CONSTM1_RTX (pred_mode)); -- src = gen_rtx_UNSPEC (mode, gen_rtvec (2, ptrue, src), UNSPEC_LD1RQ); -- emit_insn (gen_rtx_SET (dest, src)); -+ rtx ptrue = aarch64_ptrue_reg (pred_mode); -+ emit_insn (gen_aarch64_sve_ld1rq (mode, dest, src, ptrue)); - return true; - } - --/* Expand a move of general CONST_VECTOR SRC into DEST, given that it -- isn't a simple duplicate or series. */ -+/* Return a register containing CONST_VECTOR SRC, given that SRC has an -+ SVE data mode and isn't a legitimate constant. Use TARGET for the -+ result if convenient. - --static void --aarch64_expand_sve_const_vector (rtx dest, rtx src) -+ The returned register can have whatever mode seems most natural -+ given the contents of SRC. */ -+ -+static rtx -+aarch64_expand_sve_const_vector (rtx target, rtx src) - { - machine_mode mode = GET_MODE (src); - unsigned int npatterns = CONST_VECTOR_NPATTERNS (src); - unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (src); -- gcc_assert (npatterns > 1); -+ scalar_mode elt_mode = GET_MODE_INNER (mode); -+ unsigned int elt_bits = GET_MODE_BITSIZE (elt_mode); -+ unsigned int encoded_bits = npatterns * nelts_per_pattern * elt_bits; -+ -+ if (nelts_per_pattern == 1 && encoded_bits == 128) -+ { -+ /* The constant is a duplicated quadword but can't be narrowed -+ beyond a quadword. Get the memory image of the first quadword -+ as a 128-bit vector and try using LD1RQ to load it from memory. -+ -+ The effect for both endiannesses is to load memory lane N into -+ architectural lanes N + I * STEP of the result. On big-endian -+ targets, the layout of the 128-bit vector in an Advanced SIMD -+ register would be different from its layout in an SVE register, -+ but this 128-bit vector is a memory value only. */ -+ machine_mode vq_mode = aarch64_vq_mode (elt_mode).require (); -+ rtx vq_value = simplify_gen_subreg (vq_mode, src, mode, 0); -+ if (vq_value && aarch64_expand_sve_ld1rq (target, vq_value)) -+ return target; -+ } -+ -+ if (nelts_per_pattern == 1 && encoded_bits < 128) -+ { -+ /* The vector is a repeating sequence of 64 bits or fewer. -+ See if we can load them using an Advanced SIMD move and then -+ duplicate it to fill a vector. This is better than using a GPR -+ move because it keeps everything in the same register file. */ -+ machine_mode vq_mode = aarch64_vq_mode (elt_mode).require (); -+ rtx_vector_builder builder (vq_mode, npatterns, 1); -+ for (unsigned int i = 0; i < npatterns; ++i) -+ { -+ /* We want memory lane N to go into architectural lane N, -+ so reverse for big-endian targets. The DUP .Q pattern -+ has a compensating reverse built-in. */ -+ unsigned int srci = BYTES_BIG_ENDIAN ? npatterns - i - 1 : i; -+ builder.quick_push (CONST_VECTOR_ENCODED_ELT (src, srci)); -+ } -+ rtx vq_src = builder.build (); -+ if (aarch64_simd_valid_immediate (vq_src, NULL)) -+ { -+ vq_src = force_reg (vq_mode, vq_src); -+ return aarch64_expand_sve_dupq (target, mode, vq_src); -+ } - -- if (nelts_per_pattern == 1) -- { -- /* The constant is a repeating seqeuence of at least two elements, -- where the repeating elements occupy no more than 128 bits. -- Get an integer representation of the replicated value. */ -- scalar_int_mode int_mode; -- if (BYTES_BIG_ENDIAN) -- /* For now, always use LD1RQ to load the value on big-endian -- targets, since the handling of smaller integers includes a -- subreg that is semantically an element reverse. */ -- int_mode = TImode; -- else -+ /* Get an integer representation of the repeating part of Advanced -+ SIMD vector VQ_SRC. This preserves the endianness of VQ_SRC, -+ which for big-endian targets is lane-swapped wrt a normal -+ Advanced SIMD vector. This means that for both endiannesses, -+ memory lane N of SVE vector SRC corresponds to architectural -+ lane N of a register holding VQ_SRC. This in turn means that -+ memory lane 0 of SVE vector SRC is in the lsb of VQ_SRC (viewed -+ as a single 128-bit value) and thus that memory lane 0 of SRC is -+ in the lsb of the integer. Duplicating the integer therefore -+ ensures that memory lane N of SRC goes into architectural lane -+ N + I * INDEX of the SVE register. */ -+ scalar_mode int_mode = int_mode_for_size (encoded_bits, 0).require (); -+ rtx elt_value = simplify_gen_subreg (int_mode, vq_src, vq_mode, 0); -+ if (elt_value) - { -- unsigned int int_bits = GET_MODE_UNIT_BITSIZE (mode) * npatterns; -- gcc_assert (int_bits <= 128); -- int_mode = int_mode_for_size (int_bits, 0).require (); -+ /* Pretend that we had a vector of INT_MODE to start with. */ -+ elt_mode = int_mode; -+ mode = aarch64_full_sve_mode (int_mode).require (); -+ -+ /* If the integer can be moved into a general register by a -+ single instruction, do that and duplicate the result. */ -+ if (CONST_INT_P (elt_value) -+ && aarch64_move_imm (INTVAL (elt_value), elt_mode)) -+ { -+ elt_value = force_reg (elt_mode, elt_value); -+ return expand_vector_broadcast (mode, elt_value); -+ } -+ } -+ else if (npatterns == 1) -+ /* We're duplicating a single value, but can't do better than -+ force it to memory and load from there. This handles things -+ like symbolic constants. */ -+ elt_value = CONST_VECTOR_ENCODED_ELT (src, 0); -+ -+ if (elt_value) -+ { -+ /* Load the element from memory if we can, otherwise move it into -+ a register and use a DUP. */ -+ rtx op = force_const_mem (elt_mode, elt_value); -+ if (!op) -+ op = force_reg (elt_mode, elt_value); -+ return expand_vector_broadcast (mode, op); - } -- rtx int_value = simplify_gen_subreg (int_mode, src, mode, 0); -- if (int_value -- && aarch64_expand_sve_widened_duplicate (dest, int_mode, int_value)) -- return; - } - -+ /* Try using INDEX. */ -+ rtx base, step; -+ if (const_vec_series_p (src, &base, &step)) -+ { -+ aarch64_expand_vec_series (target, base, step); -+ return target; -+ } -+ -+ /* From here on, it's better to force the whole constant to memory -+ if we can. */ -+ if (GET_MODE_NUNITS (mode).is_constant ()) -+ return NULL_RTX; -+ - /* Expand each pattern individually. */ -+ gcc_assert (npatterns > 1); - rtx_vector_builder builder; - auto_vec vectors (npatterns); - for (unsigned int i = 0; i < npatterns; ++i) -@@ -3267,22 +4184,263 @@ aarch64_expand_sve_const_vector (rtx dest, rtx src) - npatterns /= 2; - for (unsigned int i = 0; i < npatterns; ++i) - { -- rtx tmp = (npatterns == 1 ? dest : gen_reg_rtx (mode)); -+ rtx tmp = (npatterns == 1 ? target : gen_reg_rtx (mode)); - rtvec v = gen_rtvec (2, vectors[i], vectors[i + npatterns]); - emit_set_insn (tmp, gen_rtx_UNSPEC (mode, v, UNSPEC_ZIP1)); - vectors[i] = tmp; - } - } -- gcc_assert (vectors[0] == dest); -+ gcc_assert (vectors[0] == target); -+ return target; -+} -+ -+/* Use WHILE to set a predicate register of mode MODE in which the first -+ VL bits are set and the rest are clear. Use TARGET for the register -+ if it's nonnull and convenient. */ -+ -+static rtx -+aarch64_sve_move_pred_via_while (rtx target, machine_mode mode, -+ unsigned int vl) -+{ -+ rtx limit = force_reg (DImode, gen_int_mode (vl, DImode)); -+ target = aarch64_target_reg (target, mode); -+ emit_insn (gen_while (UNSPEC_WHILELO, DImode, mode, -+ target, const0_rtx, limit)); -+ return target; -+} -+ -+static rtx -+aarch64_expand_sve_const_pred_1 (rtx, rtx_vector_builder &, bool); -+ -+/* BUILDER is a constant predicate in which the index of every set bit -+ is a multiple of ELT_SIZE (which is <= 8). Try to load the constant -+ by inverting every element at a multiple of ELT_SIZE and EORing the -+ result with an ELT_SIZE PTRUE. -+ -+ Return a register that contains the constant on success, otherwise -+ return null. Use TARGET as the register if it is nonnull and -+ convenient. */ -+ -+static rtx -+aarch64_expand_sve_const_pred_eor (rtx target, rtx_vector_builder &builder, -+ unsigned int elt_size) -+{ -+ /* Invert every element at a multiple of ELT_SIZE, keeping the -+ other bits zero. */ -+ rtx_vector_builder inv_builder (VNx16BImode, builder.npatterns (), -+ builder.nelts_per_pattern ()); -+ for (unsigned int i = 0; i < builder.encoded_nelts (); ++i) -+ if ((i & (elt_size - 1)) == 0 && INTVAL (builder.elt (i)) == 0) -+ inv_builder.quick_push (const1_rtx); -+ else -+ inv_builder.quick_push (const0_rtx); -+ inv_builder.finalize (); -+ -+ /* See if we can load the constant cheaply. */ -+ rtx inv = aarch64_expand_sve_const_pred_1 (NULL_RTX, inv_builder, false); -+ if (!inv) -+ return NULL_RTX; -+ -+ /* EOR the result with an ELT_SIZE PTRUE. */ -+ rtx mask = aarch64_ptrue_all (elt_size); -+ mask = force_reg (VNx16BImode, mask); -+ target = aarch64_target_reg (target, VNx16BImode); -+ emit_insn (gen_aarch64_pred_z (XOR, VNx16BImode, target, mask, inv, mask)); -+ return target; -+} -+ -+/* BUILDER is a constant predicate in which the index of every set bit -+ is a multiple of ELT_SIZE (which is <= 8). Try to load the constant -+ using a TRN1 of size PERMUTE_SIZE, which is >= ELT_SIZE. Return the -+ register on success, otherwise return null. Use TARGET as the register -+ if nonnull and convenient. */ -+ -+static rtx -+aarch64_expand_sve_const_pred_trn (rtx target, rtx_vector_builder &builder, -+ unsigned int elt_size, -+ unsigned int permute_size) -+{ -+ /* We're going to split the constant into two new constants A and B, -+ with element I of BUILDER going into A if (I & PERMUTE_SIZE) == 0 -+ and into B otherwise. E.g. for PERMUTE_SIZE == 4 && ELT_SIZE == 1: -+ -+ A: { 0, 1, 2, 3, _, _, _, _, 8, 9, 10, 11, _, _, _, _ } -+ B: { 4, 5, 6, 7, _, _, _, _, 12, 13, 14, 15, _, _, _, _ } -+ -+ where _ indicates elements that will be discarded by the permute. -+ -+ First calculate the ELT_SIZEs for A and B. */ -+ unsigned int a_elt_size = GET_MODE_SIZE (DImode); -+ unsigned int b_elt_size = GET_MODE_SIZE (DImode); -+ for (unsigned int i = 0; i < builder.encoded_nelts (); i += elt_size) -+ if (INTVAL (builder.elt (i)) != 0) -+ { -+ if (i & permute_size) -+ b_elt_size |= i - permute_size; -+ else -+ a_elt_size |= i; -+ } -+ a_elt_size &= -a_elt_size; -+ b_elt_size &= -b_elt_size; -+ -+ /* Now construct the vectors themselves. */ -+ rtx_vector_builder a_builder (VNx16BImode, builder.npatterns (), -+ builder.nelts_per_pattern ()); -+ rtx_vector_builder b_builder (VNx16BImode, builder.npatterns (), -+ builder.nelts_per_pattern ()); -+ unsigned int nelts = builder.encoded_nelts (); -+ for (unsigned int i = 0; i < nelts; ++i) -+ if (i & (elt_size - 1)) -+ { -+ a_builder.quick_push (const0_rtx); -+ b_builder.quick_push (const0_rtx); -+ } -+ else if ((i & permute_size) == 0) -+ { -+ /* The A and B elements are significant. */ -+ a_builder.quick_push (builder.elt (i)); -+ b_builder.quick_push (builder.elt (i + permute_size)); -+ } -+ else -+ { -+ /* The A and B elements are going to be discarded, so pick whatever -+ is likely to give a nice constant. We are targeting element -+ sizes A_ELT_SIZE and B_ELT_SIZE for A and B respectively, -+ with the aim of each being a sequence of ones followed by -+ a sequence of zeros. So: -+ -+ * if X_ELT_SIZE <= PERMUTE_SIZE, the best approach is to -+ duplicate the last X_ELT_SIZE element, to extend the -+ current sequence of ones or zeros. -+ -+ * if X_ELT_SIZE > PERMUTE_SIZE, the best approach is to add a -+ zero, so that the constant really does have X_ELT_SIZE and -+ not a smaller size. */ -+ if (a_elt_size > permute_size) -+ a_builder.quick_push (const0_rtx); -+ else -+ a_builder.quick_push (a_builder.elt (i - a_elt_size)); -+ if (b_elt_size > permute_size) -+ b_builder.quick_push (const0_rtx); -+ else -+ b_builder.quick_push (b_builder.elt (i - b_elt_size)); -+ } -+ a_builder.finalize (); -+ b_builder.finalize (); -+ -+ /* Try loading A into a register. */ -+ rtx_insn *last = get_last_insn (); -+ rtx a = aarch64_expand_sve_const_pred_1 (NULL_RTX, a_builder, false); -+ if (!a) -+ return NULL_RTX; -+ -+ /* Try loading B into a register. */ -+ rtx b = a; -+ if (a_builder != b_builder) -+ { -+ b = aarch64_expand_sve_const_pred_1 (NULL_RTX, b_builder, false); -+ if (!b) -+ { -+ delete_insns_since (last); -+ return NULL_RTX; -+ } -+ } -+ -+ /* Emit the TRN1 itself. */ -+ machine_mode mode = aarch64_sve_pred_mode (permute_size).require (); -+ target = aarch64_target_reg (target, mode); -+ emit_insn (gen_aarch64_sve (UNSPEC_TRN1, mode, target, -+ gen_lowpart (mode, a), -+ gen_lowpart (mode, b))); -+ return target; -+} -+ -+/* Subroutine of aarch64_expand_sve_const_pred. Try to load the VNx16BI -+ constant in BUILDER into an SVE predicate register. Return the register -+ on success, otherwise return null. Use TARGET for the register if -+ nonnull and convenient. -+ -+ ALLOW_RECURSE_P is true if we can use methods that would call this -+ function recursively. */ -+ -+static rtx -+aarch64_expand_sve_const_pred_1 (rtx target, rtx_vector_builder &builder, -+ bool allow_recurse_p) -+{ -+ if (builder.encoded_nelts () == 1) -+ /* A PFALSE or a PTRUE .B ALL. */ -+ return aarch64_emit_set_immediate (target, builder); -+ -+ unsigned int elt_size = aarch64_widest_sve_pred_elt_size (builder); -+ if (int vl = aarch64_partial_ptrue_length (builder, elt_size)) -+ { -+ /* If we can load the constant using PTRUE, use it as-is. */ -+ machine_mode mode = aarch64_sve_pred_mode (elt_size).require (); -+ if (aarch64_svpattern_for_vl (mode, vl) != AARCH64_NUM_SVPATTERNS) -+ return aarch64_emit_set_immediate (target, builder); -+ -+ /* Otherwise use WHILE to set the first VL bits. */ -+ return aarch64_sve_move_pred_via_while (target, mode, vl); -+ } -+ -+ if (!allow_recurse_p) -+ return NULL_RTX; -+ -+ /* Try inverting the vector in element size ELT_SIZE and then EORing -+ the result with an ELT_SIZE PTRUE. */ -+ if (INTVAL (builder.elt (0)) == 0) -+ if (rtx res = aarch64_expand_sve_const_pred_eor (target, builder, -+ elt_size)) -+ return res; -+ -+ /* Try using TRN1 to permute two simpler constants. */ -+ for (unsigned int i = elt_size; i <= 8; i *= 2) -+ if (rtx res = aarch64_expand_sve_const_pred_trn (target, builder, -+ elt_size, i)) -+ return res; -+ -+ return NULL_RTX; - } - --/* Set DEST to immediate IMM. For SVE vector modes, GEN_VEC_DUPLICATE -- is a pattern that can be used to set DEST to a replicated scalar -- element. */ -+/* Return an SVE predicate register that contains the VNx16BImode -+ constant in BUILDER, without going through the move expanders. -+ -+ The returned register can have whatever mode seems most natural -+ given the contents of BUILDER. Use TARGET for the result if -+ convenient. */ -+ -+static rtx -+aarch64_expand_sve_const_pred (rtx target, rtx_vector_builder &builder) -+{ -+ /* Try loading the constant using pure predicate operations. */ -+ if (rtx res = aarch64_expand_sve_const_pred_1 (target, builder, true)) -+ return res; -+ -+ /* Try forcing the constant to memory. */ -+ if (builder.full_nelts ().is_constant ()) -+ if (rtx mem = force_const_mem (VNx16BImode, builder.build ())) -+ { -+ target = aarch64_target_reg (target, VNx16BImode); -+ emit_move_insn (target, mem); -+ return target; -+ } -+ -+ /* The last resort is to load the constant as an integer and then -+ compare it against zero. Use -1 for set bits in order to increase -+ the changes of using SVE DUPM or an Advanced SIMD byte mask. */ -+ rtx_vector_builder int_builder (VNx16QImode, builder.npatterns (), -+ builder.nelts_per_pattern ()); -+ for (unsigned int i = 0; i < builder.encoded_nelts (); ++i) -+ int_builder.quick_push (INTVAL (builder.elt (i)) -+ ? constm1_rtx : const0_rtx); -+ return aarch64_convert_sve_data_to_pred (target, VNx16BImode, -+ int_builder.build ()); -+} -+ -+/* Set DEST to immediate IMM. */ - - void --aarch64_expand_mov_immediate (rtx dest, rtx imm, -- rtx (*gen_vec_duplicate) (rtx, rtx)) -+aarch64_expand_mov_immediate (rtx dest, rtx imm) - { - machine_mode mode = GET_MODE (dest); - -@@ -3405,38 +4563,50 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm, - - if (!CONST_INT_P (imm)) - { -- rtx base, step, value; -- if (GET_CODE (imm) == HIGH -- || aarch64_simd_valid_immediate (imm, NULL)) -- emit_insn (gen_rtx_SET (dest, imm)); -- else if (const_vec_series_p (imm, &base, &step)) -- aarch64_expand_vec_series (dest, base, step); -- else if (const_vec_duplicate_p (imm, &value)) -+ if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL) - { -- /* If the constant is out of range of an SVE vector move, -- load it from memory if we can, otherwise move it into -- a register and use a DUP. */ -- scalar_mode inner_mode = GET_MODE_INNER (mode); -- rtx op = force_const_mem (inner_mode, value); -- if (!op) -- op = force_reg (inner_mode, value); -- else if (!aarch64_sve_ld1r_operand_p (op)) -+ /* Only the low bit of each .H, .S and .D element is defined, -+ so we can set the upper bits to whatever we like. If the -+ predicate is all-true in MODE, prefer to set all the undefined -+ bits as well, so that we can share a single .B predicate for -+ all modes. */ -+ if (imm == CONSTM1_RTX (mode)) -+ imm = CONSTM1_RTX (VNx16BImode); -+ -+ /* All methods for constructing predicate modes wider than VNx16BI -+ will set the upper bits of each element to zero. Expose this -+ by moving such constants as a VNx16BI, so that all bits are -+ significant and so that constants for different modes can be -+ shared. The wider constant will still be available as a -+ REG_EQUAL note. */ -+ rtx_vector_builder builder; -+ if (aarch64_get_sve_pred_bits (builder, imm)) - { -- rtx addr = force_reg (Pmode, XEXP (op, 0)); -- op = replace_equiv_address (op, addr); -+ rtx res = aarch64_expand_sve_const_pred (dest, builder); -+ if (dest != res) -+ emit_move_insn (dest, gen_lowpart (mode, res)); -+ return; - } -- emit_insn (gen_vec_duplicate (dest, op)); - } -- else if (GET_CODE (imm) == CONST_VECTOR -- && !GET_MODE_NUNITS (GET_MODE (imm)).is_constant ()) -- aarch64_expand_sve_const_vector (dest, imm); -- else -+ -+ if (GET_CODE (imm) == HIGH -+ || aarch64_simd_valid_immediate (imm, NULL)) - { -- rtx mem = force_const_mem (mode, imm); -- gcc_assert (mem); -- emit_move_insn (dest, mem); -+ emit_insn (gen_rtx_SET (dest, imm)); -+ return; - } - -+ if (GET_CODE (imm) == CONST_VECTOR && aarch64_sve_data_mode_p (mode)) -+ if (rtx res = aarch64_expand_sve_const_vector (dest, imm)) -+ { -+ if (dest != res) -+ emit_insn (gen_aarch64_sve_reinterpret (mode, dest, res)); -+ return; -+ } -+ -+ rtx mem = force_const_mem (mode, imm); -+ gcc_assert (mem); -+ emit_move_insn (dest, mem); - return; - } - -@@ -3455,6 +4625,7 @@ aarch64_emit_sve_pred_move (rtx dest, rtx pred, rtx src) - create_output_operand (&ops[0], dest, mode); - create_input_operand (&ops[1], pred, GET_MODE(pred)); - create_input_operand (&ops[2], src, mode); -+ temporary_volatile_ok v (true); - expand_insn (code_for_aarch64_pred_mov (mode), 3, ops); - } - -@@ -3471,7 +4642,7 @@ void - aarch64_expand_sve_mem_move (rtx dest, rtx src, machine_mode pred_mode) - { - machine_mode mode = GET_MODE (dest); -- rtx ptrue = force_reg (pred_mode, CONSTM1_RTX (pred_mode)); -+ rtx ptrue = aarch64_ptrue_reg (pred_mode); - if (!register_operand (src, mode) - && !register_operand (dest, mode)) - { -@@ -3535,7 +4706,7 @@ aarch64_maybe_expand_sve_subreg_move (rtx dest, rtx src) - return false; - - /* Generate *aarch64_sve_mov_subreg_be. */ -- rtx ptrue = force_reg (VNx16BImode, CONSTM1_RTX (VNx16BImode)); -+ rtx ptrue = aarch64_ptrue_reg (VNx16BImode); - rtx unspec = gen_rtx_UNSPEC (GET_MODE (dest), gen_rtvec (2, ptrue, src), - UNSPEC_REV_SUBREG); - emit_insn (gen_rtx_SET (dest, unspec)); -@@ -3557,14 +4728,29 @@ aarch64_replace_reg_mode (rtx x, machine_mode mode) - return x; - } - -+/* Return the SVE REV[BHW] unspec for reversing quantites of mode MODE -+ stored in wider integer containers. */ -+ -+static unsigned int -+aarch64_sve_rev_unspec (machine_mode mode) -+{ -+ switch (GET_MODE_UNIT_SIZE (mode)) -+ { -+ case 1: return UNSPEC_REVB; -+ case 2: return UNSPEC_REVH; -+ case 4: return UNSPEC_REVW; -+ } -+ gcc_unreachable (); -+} -+ - /* Split a *aarch64_sve_mov_subreg_be pattern with the given - operands. */ - - void - aarch64_split_sve_subreg_move (rtx dest, rtx ptrue, rtx src) - { -- /* Decide which REV operation we need. The mode with narrower elements -- determines the mode of the operands and the mode with the wider -+ /* Decide which REV operation we need. The mode with wider elements -+ determines the mode of the operands and the mode with the narrower - elements determines the reverse width. */ - machine_mode mode_with_wider_elts = GET_MODE (dest); - machine_mode mode_with_narrower_elts = GET_MODE (src); -@@ -3572,38 +4758,22 @@ aarch64_split_sve_subreg_move (rtx dest, rtx ptrue, rtx src) - < GET_MODE_UNIT_SIZE (mode_with_narrower_elts)) - std::swap (mode_with_wider_elts, mode_with_narrower_elts); - -+ unsigned int unspec = aarch64_sve_rev_unspec (mode_with_narrower_elts); - unsigned int wider_bytes = GET_MODE_UNIT_SIZE (mode_with_wider_elts); -- unsigned int unspec; -- if (wider_bytes == 8) -- unspec = UNSPEC_REV64; -- else if (wider_bytes == 4) -- unspec = UNSPEC_REV32; -- else if (wider_bytes == 2) -- unspec = UNSPEC_REV16; -- else -- gcc_unreachable (); - machine_mode pred_mode = aarch64_sve_pred_mode (wider_bytes).require (); - -- /* Emit: -- -- (set DEST (unspec [PTRUE (unspec [SRC] UNSPEC_REV)] -- UNSPEC_MERGE_PTRUE)) -- -- with the appropriate modes. */ -+ /* Get the operands in the appropriate modes and emit the instruction. */ - ptrue = gen_lowpart (pred_mode, ptrue); -- dest = aarch64_replace_reg_mode (dest, mode_with_narrower_elts); -- src = aarch64_replace_reg_mode (src, mode_with_narrower_elts); -- src = gen_rtx_UNSPEC (mode_with_narrower_elts, gen_rtvec (1, src), unspec); -- src = gen_rtx_UNSPEC (mode_with_narrower_elts, gen_rtvec (2, ptrue, src), -- UNSPEC_MERGE_PTRUE); -- emit_insn (gen_rtx_SET (dest, src)); -+ dest = aarch64_replace_reg_mode (dest, mode_with_wider_elts); -+ src = aarch64_replace_reg_mode (src, mode_with_wider_elts); -+ emit_insn (gen_aarch64_pred (unspec, mode_with_wider_elts, -+ dest, ptrue, src)); - } - - static bool --aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED, -- tree exp ATTRIBUTE_UNUSED) -+aarch64_function_ok_for_sibcall (tree, tree exp) - { -- if (aarch64_simd_decl_p (cfun->decl) != aarch64_simd_decl_p (decl)) -+ if (crtl->abi->id () != expr_callee_abi (exp).id ()) - return false; - - return true; -@@ -3612,35 +4782,48 @@ aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED, - /* Implement TARGET_PASS_BY_REFERENCE. */ - - static bool --aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED, -- machine_mode mode, -- const_tree type, -- bool named ATTRIBUTE_UNUSED) -+aarch64_pass_by_reference (cumulative_args_t pcum_v, -+ const function_arg_info &arg) - { -+ CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); - HOST_WIDE_INT size; - machine_mode dummymode; - int nregs; - -+ unsigned int num_zr, num_pr; -+ if (arg.type && aarch64_sve_argument_p (arg.type, &num_zr, &num_pr)) -+ { -+ if (pcum && !pcum->silent_p && !TARGET_SVE) -+ /* We can't gracefully recover at this point, so make this a -+ fatal error. */ -+ fatal_error (input_location, "arguments of type %qT require" -+ " the SVE ISA extension", arg.type); -+ -+ /* Variadic SVE types are passed by reference. Normal non-variadic -+ arguments are too if we've run out of registers. */ -+ return (!arg.named -+ || pcum->aapcs_nvrn + num_zr > NUM_FP_ARG_REGS -+ || pcum->aapcs_nprn + num_pr > NUM_PR_ARG_REGS); -+ } -+ - /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */ -- if (mode == BLKmode && type) -- size = int_size_in_bytes (type); -+ if (arg.mode == BLKmode && arg.type) -+ size = int_size_in_bytes (arg.type); - else - /* No frontends can create types with variable-sized modes, so we - shouldn't be asked to pass or return them. */ -- size = GET_MODE_SIZE (mode).to_constant (); -+ size = GET_MODE_SIZE (arg.mode).to_constant (); - - /* Aggregates are passed by reference based on their size. */ -- if (type && AGGREGATE_TYPE_P (type)) -- { -- size = int_size_in_bytes (type); -- } -+ if (arg.aggregate_type_p ()) -+ size = int_size_in_bytes (arg.type); - - /* Variable sized arguments are always returned by reference. */ - if (size < 0) - return true; - - /* Can this be a candidate to be passed in fp/simd register(s)? */ -- if (aarch64_vfp_is_call_or_return_candidate (mode, type, -+ if (aarch64_vfp_is_call_or_return_candidate (arg.mode, arg.type, - &dummymode, &nregs, - NULL)) - return false; -@@ -3696,6 +4879,29 @@ aarch64_function_value (const_tree type, const_tree func, - if (INTEGRAL_TYPE_P (type)) - mode = promote_function_mode (type, mode, &unsignedp, func, 1); - -+ unsigned int num_zr, num_pr; -+ if (type && aarch64_sve_argument_p (type, &num_zr, &num_pr)) -+ { -+ /* Don't raise an error here if we're called when SVE is disabled, -+ since this is really just a query function. Other code must -+ do that where appropriate. */ -+ mode = TYPE_MODE_RAW (type); -+ gcc_assert (VECTOR_MODE_P (mode) -+ && (!TARGET_SVE || aarch64_sve_mode_p (mode))); -+ -+ if (num_zr > 0 && num_pr == 0) -+ return gen_rtx_REG (mode, V0_REGNUM); -+ -+ if (num_zr == 0 && num_pr == 1) -+ return gen_rtx_REG (mode, P0_REGNUM); -+ -+ gcc_unreachable (); -+ } -+ -+ /* Generic vectors that map to SVE modes with -msve-vector-bits=N are -+ returned in memory, not by value. */ -+ gcc_assert (!aarch64_sve_mode_p (mode)); -+ - if (aarch64_return_in_msb (type)) - { - HOST_WIDE_INT size = int_size_in_bytes (type); -@@ -3778,6 +4984,16 @@ aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED) - /* Simple scalar types always returned in registers. */ - return false; - -+ unsigned int num_zr, num_pr; -+ if (type && aarch64_sve_argument_p (type, &num_zr, &num_pr)) -+ { -+ /* All SVE types we support fit in registers. For example, it isn't -+ yet possible to define an aggregate of 9+ SVE vectors or 5+ SVE -+ predicates. */ -+ gcc_assert (num_zr <= NUM_FP_ARG_REGS && num_pr <= NUM_PR_ARG_REGS); -+ return false; -+ } -+ - if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type), - type, - &ag_mode, -@@ -3853,11 +5069,11 @@ aarch64_function_arg_alignment (machine_mode mode, const_tree type, - numbers refer to the rule numbers in the AAPCS64. */ - - static void --aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode, -- const_tree type, -- bool named ATTRIBUTE_UNUSED) -+aarch64_layout_arg (cumulative_args_t pcum_v, const function_arg_info &arg) - { - CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); -+ tree type = arg.type; -+ machine_mode mode = arg.mode; - int ncrn, nvrn, nregs; - bool allocate_ncrn, allocate_nvrn; - HOST_WIDE_INT size; -@@ -3869,6 +5085,46 @@ aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode, - - pcum->aapcs_arg_processed = true; - -+ unsigned int num_zr, num_pr; -+ if (type && aarch64_sve_argument_p (type, &num_zr, &num_pr)) -+ { -+ /* The PCS says that it is invalid to pass an SVE value to an -+ unprototyped function. There is no ABI-defined location we -+ can return in this case, so we have no real choice but to raise -+ an error immediately, even though this is only a query function. */ -+ if (arg.named && pcum->pcs_variant != ARM_PCS_SVE) -+ { -+ gcc_assert (!pcum->silent_p); -+ error ("SVE type %qT cannot be passed to an unprototyped function", -+ arg.type); -+ /* Avoid repeating the message, and avoid tripping the assert -+ below. */ -+ pcum->pcs_variant = ARM_PCS_SVE; -+ } -+ -+ /* We would have converted the argument into pass-by-reference -+ form if it didn't fit in registers. */ -+ pcum->aapcs_nextnvrn = pcum->aapcs_nvrn + num_zr; -+ pcum->aapcs_nextnprn = pcum->aapcs_nprn + num_pr; -+ gcc_assert (arg.named -+ && pcum->pcs_variant == ARM_PCS_SVE -+ && aarch64_sve_mode_p (mode) -+ && pcum->aapcs_nextnvrn <= NUM_FP_ARG_REGS -+ && pcum->aapcs_nextnprn <= NUM_PR_ARG_REGS); -+ -+ if (num_zr > 0 && num_pr == 0) -+ pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + pcum->aapcs_nvrn); -+ else if (num_zr == 0 && num_pr == 1) -+ pcum->aapcs_reg = gen_rtx_REG (mode, P0_REGNUM + pcum->aapcs_nprn); -+ else -+ gcc_unreachable (); -+ return; -+ } -+ -+ /* Generic vectors that map to SVE modes with -msve-vector-bits=N are -+ passed by reference, not by value. */ -+ gcc_assert (!aarch64_sve_mode_p (mode)); -+ - /* Size in bytes, rounded to the nearest multiple of 8 bytes. */ - if (type) - size = int_size_in_bytes (type); -@@ -3893,7 +5149,7 @@ aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode, - and homogenous short-vector aggregates (HVA). */ - if (allocate_nvrn) - { -- if (!TARGET_FLOAT) -+ if (!pcum->silent_p && !TARGET_FLOAT) - aarch64_err_no_fpadvsimd (mode); - - if (nvrn + nregs <= NUM_FP_ARG_REGS) -@@ -4009,37 +5265,46 @@ on_stack: - /* Implement TARGET_FUNCTION_ARG. */ - - static rtx --aarch64_function_arg (cumulative_args_t pcum_v, machine_mode mode, -- const_tree type, bool named) -+aarch64_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg) - { - CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); -- gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64); -+ gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64 -+ || pcum->pcs_variant == ARM_PCS_SIMD -+ || pcum->pcs_variant == ARM_PCS_SVE); - -- if (mode == VOIDmode) -- return NULL_RTX; -+ if (arg.end_marker_p ()) -+ return gen_int_mode (pcum->pcs_variant, DImode); - -- aarch64_layout_arg (pcum_v, mode, type, named); -+ aarch64_layout_arg (pcum_v, arg); - return pcum->aapcs_reg; - } - - void - aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum, -- const_tree fntype ATTRIBUTE_UNUSED, -- rtx libname ATTRIBUTE_UNUSED, -- const_tree fndecl ATTRIBUTE_UNUSED, -- unsigned n_named ATTRIBUTE_UNUSED) -+ const_tree fntype, -+ rtx libname ATTRIBUTE_UNUSED, -+ const_tree fndecl ATTRIBUTE_UNUSED, -+ unsigned n_named ATTRIBUTE_UNUSED, -+ bool silent_p) - { - pcum->aapcs_ncrn = 0; - pcum->aapcs_nvrn = 0; -+ pcum->aapcs_nprn = 0; - pcum->aapcs_nextncrn = 0; - pcum->aapcs_nextnvrn = 0; -- pcum->pcs_variant = ARM_PCS_AAPCS64; -+ pcum->aapcs_nextnprn = 0; -+ if (fntype) -+ pcum->pcs_variant = (arm_pcs) fntype_abi (fntype).id (); -+ else -+ pcum->pcs_variant = ARM_PCS_AAPCS64; - pcum->aapcs_reg = NULL_RTX; - pcum->aapcs_arg_processed = false; - pcum->aapcs_stack_words = 0; - pcum->aapcs_stack_size = 0; -+ pcum->silent_p = silent_p; - -- if (!TARGET_FLOAT -+ if (!silent_p -+ && !TARGET_FLOAT - && fndecl && TREE_PUBLIC (fndecl) - && fntype && fntype != error_mark_node) - { -@@ -4050,24 +5315,38 @@ aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum, - &mode, &nregs, NULL)) - aarch64_err_no_fpadvsimd (TYPE_MODE (type)); - } -- return; -+ -+ if (!silent_p -+ && !TARGET_SVE -+ && pcum->pcs_variant == ARM_PCS_SVE) -+ { -+ /* We can't gracefully recover at this point, so make this a -+ fatal error. */ -+ if (fndecl) -+ fatal_error (input_location, "%qE requires the SVE ISA extension", -+ fndecl); -+ else -+ fatal_error (input_location, "calls to functions of type %qT require" -+ " the SVE ISA extension", fntype); -+ } - } - - static void - aarch64_function_arg_advance (cumulative_args_t pcum_v, -- machine_mode mode, -- const_tree type, -- bool named) -+ const function_arg_info &arg) - { - CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); -- if (pcum->pcs_variant == ARM_PCS_AAPCS64) -+ if (pcum->pcs_variant == ARM_PCS_AAPCS64 -+ || pcum->pcs_variant == ARM_PCS_SIMD -+ || pcum->pcs_variant == ARM_PCS_SVE) - { -- aarch64_layout_arg (pcum_v, mode, type, named); -+ aarch64_layout_arg (pcum_v, arg); - gcc_assert ((pcum->aapcs_reg != NULL_RTX) - != (pcum->aapcs_stack_words != 0)); - pcum->aapcs_arg_processed = false; - pcum->aapcs_ncrn = pcum->aapcs_nextncrn; - pcum->aapcs_nvrn = pcum->aapcs_nextnvrn; -+ pcum->aapcs_nprn = pcum->aapcs_nextnprn; - pcum->aapcs_stack_size += pcum->aapcs_stack_words; - pcum->aapcs_stack_words = 0; - pcum->aapcs_reg = NULL_RTX; -@@ -4500,11 +5779,14 @@ aarch64_needs_frame_chain (void) - static void - aarch64_layout_frame (void) - { -- HOST_WIDE_INT offset = 0; -+ poly_int64 offset = 0; - int regno, last_fp_reg = INVALID_REGNUM; -- bool simd_function = aarch64_simd_decl_p (cfun->decl); -+ machine_mode vector_save_mode = aarch64_reg_save_mode (V8_REGNUM); -+ poly_int64 vector_save_size = GET_MODE_SIZE (vector_save_mode); -+ bool frame_related_fp_reg_p = false; -+ aarch64_frame &frame = cfun->machine->frame; - -- cfun->machine->frame.emit_frame_chain = aarch64_needs_frame_chain (); -+ frame.emit_frame_chain = aarch64_needs_frame_chain (); - - /* Adjust the outgoing arguments size if required. Keep it in sync with what - the mid-end is doing. */ -@@ -4513,184 +5795,264 @@ aarch64_layout_frame (void) - #define SLOT_NOT_REQUIRED (-2) - #define SLOT_REQUIRED (-1) - -- cfun->machine->frame.wb_candidate1 = INVALID_REGNUM; -- cfun->machine->frame.wb_candidate2 = INVALID_REGNUM; -- -- /* If this is a non-leaf simd function with calls we assume that -- at least one of those calls is to a non-simd function and thus -- we must save V8 to V23 in the prologue. */ -- -- if (simd_function && !crtl->is_leaf) -- { -- for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) -- if (FP_SIMD_SAVED_REGNUM_P (regno)) -- df_set_regs_ever_live (regno, true); -- } -+ frame.wb_candidate1 = INVALID_REGNUM; -+ frame.wb_candidate2 = INVALID_REGNUM; -+ frame.spare_pred_reg = INVALID_REGNUM; - - /* First mark all the registers that really need to be saved... */ -- for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++) -- cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED; -- -- for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) -- cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED; -+ for (regno = 0; regno <= LAST_SAVED_REGNUM; regno++) -+ frame.reg_offset[regno] = SLOT_NOT_REQUIRED; - - /* ... that includes the eh data registers (if needed)... */ - if (crtl->calls_eh_return) - for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++) -- cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] -- = SLOT_REQUIRED; -+ frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = SLOT_REQUIRED; - - /* ... and any callee saved register that dataflow says is live. */ - for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++) - if (df_regs_ever_live_p (regno) -+ && !fixed_regs[regno] - && (regno == R30_REGNUM -- || !call_used_regs[regno])) -- cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED; -+ || !crtl->abi->clobbers_full_reg_p (regno))) -+ frame.reg_offset[regno] = SLOT_REQUIRED; - - for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) - if (df_regs_ever_live_p (regno) -- && (!call_used_regs[regno] -- || (simd_function && FP_SIMD_SAVED_REGNUM_P (regno)))) -+ && !fixed_regs[regno] -+ && !crtl->abi->clobbers_full_reg_p (regno)) - { -- cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED; -+ frame.reg_offset[regno] = SLOT_REQUIRED; - last_fp_reg = regno; -+ if (aarch64_emit_cfi_for_reg_p (regno)) -+ frame_related_fp_reg_p = true; - } - -- if (cfun->machine->frame.emit_frame_chain) -- { -- /* FP and LR are placed in the linkage record. */ -- cfun->machine->frame.reg_offset[R29_REGNUM] = 0; -- cfun->machine->frame.wb_candidate1 = R29_REGNUM; -- cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD; -- cfun->machine->frame.wb_candidate2 = R30_REGNUM; -- offset = 2 * UNITS_PER_WORD; -+ /* Big-endian SVE frames need a spare predicate register in order -+ to save Z8-Z15. Decide which register they should use. Prefer -+ an unused argument register if possible, so that we don't force P4 -+ to be saved unnecessarily. */ -+ if (frame_related_fp_reg_p -+ && crtl->abi->id () == ARM_PCS_SVE -+ && BYTES_BIG_ENDIAN) -+ { -+ bitmap live1 = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)); -+ bitmap live2 = df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun)); -+ for (regno = P0_REGNUM; regno <= P7_REGNUM; regno++) -+ if (!bitmap_bit_p (live1, regno) && !bitmap_bit_p (live2, regno)) -+ break; -+ gcc_assert (regno <= P7_REGNUM); -+ frame.spare_pred_reg = regno; -+ df_set_regs_ever_live (regno, true); - } - -+ for (regno = P0_REGNUM; regno <= P15_REGNUM; regno++) -+ if (df_regs_ever_live_p (regno) -+ && !fixed_regs[regno] -+ && !crtl->abi->clobbers_full_reg_p (regno)) -+ frame.reg_offset[regno] = SLOT_REQUIRED; -+ - /* With stack-clash, LR must be saved in non-leaf functions. */ - gcc_assert (crtl->is_leaf -- || (cfun->machine->frame.reg_offset[R30_REGNUM] -- != SLOT_NOT_REQUIRED)); -+ || maybe_ne (frame.reg_offset[R30_REGNUM], SLOT_NOT_REQUIRED)); -+ -+ /* Now assign stack slots for the registers. Start with the predicate -+ registers, since predicate LDR and STR have a relatively small -+ offset range. These saves happen below the hard frame pointer. */ -+ for (regno = P0_REGNUM; regno <= P15_REGNUM; regno++) -+ if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED)) -+ { -+ frame.reg_offset[regno] = offset; -+ offset += BYTES_PER_SVE_PRED; -+ } -+ -+ /* We save a maximum of 8 predicate registers, and since vector -+ registers are 8 times the size of a predicate register, all the -+ saved predicates fit within a single vector. Doing this also -+ rounds the offset to a 128-bit boundary. */ -+ if (maybe_ne (offset, 0)) -+ { -+ gcc_assert (known_le (offset, vector_save_size)); -+ offset = vector_save_size; -+ } -+ -+ /* If we need to save any SVE vector registers, add them next. */ -+ if (last_fp_reg != (int) INVALID_REGNUM && crtl->abi->id () == ARM_PCS_SVE) -+ for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) -+ if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED)) -+ { -+ frame.reg_offset[regno] = offset; -+ offset += vector_save_size; -+ } -+ -+ /* OFFSET is now the offset of the hard frame pointer from the bottom -+ of the callee save area. */ -+ bool saves_below_hard_fp_p = maybe_ne (offset, 0); -+ frame.below_hard_fp_saved_regs_size = offset; -+ if (frame.emit_frame_chain) -+ { -+ /* FP and LR are placed in the linkage record. */ -+ frame.reg_offset[R29_REGNUM] = offset; -+ frame.wb_candidate1 = R29_REGNUM; -+ frame.reg_offset[R30_REGNUM] = offset + UNITS_PER_WORD; -+ frame.wb_candidate2 = R30_REGNUM; -+ offset += 2 * UNITS_PER_WORD; -+ } - -- /* Now assign stack slots for them. */ - for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++) -- if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED) -+ if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED)) - { -- cfun->machine->frame.reg_offset[regno] = offset; -- if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM) -- cfun->machine->frame.wb_candidate1 = regno; -- else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM) -- cfun->machine->frame.wb_candidate2 = regno; -+ frame.reg_offset[regno] = offset; -+ if (frame.wb_candidate1 == INVALID_REGNUM) -+ frame.wb_candidate1 = regno; -+ else if (frame.wb_candidate2 == INVALID_REGNUM) -+ frame.wb_candidate2 = regno; - offset += UNITS_PER_WORD; - } - -- HOST_WIDE_INT max_int_offset = offset; -- offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT); -- bool has_align_gap = offset != max_int_offset; -+ poly_int64 max_int_offset = offset; -+ offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); -+ bool has_align_gap = maybe_ne (offset, max_int_offset); - - for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) -- if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED) -+ if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED)) - { - /* If there is an alignment gap between integer and fp callee-saves, - allocate the last fp register to it if possible. */ - if (regno == last_fp_reg - && has_align_gap -- && !simd_function -- && (offset & 8) == 0) -+ && known_eq (vector_save_size, 8) -+ && multiple_p (offset, 16)) - { -- cfun->machine->frame.reg_offset[regno] = max_int_offset; -+ frame.reg_offset[regno] = max_int_offset; - break; - } - -- cfun->machine->frame.reg_offset[regno] = offset; -- if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM) -- cfun->machine->frame.wb_candidate1 = regno; -- else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM -- && cfun->machine->frame.wb_candidate1 >= V0_REGNUM) -- cfun->machine->frame.wb_candidate2 = regno; -- offset += simd_function ? UNITS_PER_VREG : UNITS_PER_WORD; -+ frame.reg_offset[regno] = offset; -+ if (frame.wb_candidate1 == INVALID_REGNUM) -+ frame.wb_candidate1 = regno; -+ else if (frame.wb_candidate2 == INVALID_REGNUM -+ && frame.wb_candidate1 >= V0_REGNUM) -+ frame.wb_candidate2 = regno; -+ offset += vector_save_size; - } - -- offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT); -+ offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT); - -- cfun->machine->frame.saved_regs_size = offset; -+ frame.saved_regs_size = offset; - -- HOST_WIDE_INT varargs_and_saved_regs_size -- = offset + cfun->machine->frame.saved_varargs_size; -+ poly_int64 varargs_and_saved_regs_size = offset + frame.saved_varargs_size; - -- cfun->machine->frame.hard_fp_offset -+ poly_int64 above_outgoing_args - = aligned_upper_bound (varargs_and_saved_regs_size - + get_frame_size (), - STACK_BOUNDARY / BITS_PER_UNIT); - -+ frame.hard_fp_offset -+ = above_outgoing_args - frame.below_hard_fp_saved_regs_size; -+ - /* Both these values are already aligned. */ - gcc_assert (multiple_p (crtl->outgoing_args_size, - STACK_BOUNDARY / BITS_PER_UNIT)); -- cfun->machine->frame.frame_size -- = (cfun->machine->frame.hard_fp_offset -- + crtl->outgoing_args_size); -+ frame.frame_size = above_outgoing_args + crtl->outgoing_args_size; - -- cfun->machine->frame.locals_offset = cfun->machine->frame.saved_varargs_size; -+ frame.locals_offset = frame.saved_varargs_size; - -- cfun->machine->frame.initial_adjust = 0; -- cfun->machine->frame.final_adjust = 0; -- cfun->machine->frame.callee_adjust = 0; -- cfun->machine->frame.callee_offset = 0; -+ frame.initial_adjust = 0; -+ frame.final_adjust = 0; -+ frame.callee_adjust = 0; -+ frame.sve_callee_adjust = 0; -+ frame.callee_offset = 0; - - HOST_WIDE_INT max_push_offset = 0; -- if (cfun->machine->frame.wb_candidate2 != INVALID_REGNUM) -+ if (frame.wb_candidate2 != INVALID_REGNUM) - max_push_offset = 512; -- else if (cfun->machine->frame.wb_candidate1 != INVALID_REGNUM) -+ else if (frame.wb_candidate1 != INVALID_REGNUM) - max_push_offset = 256; - -- HOST_WIDE_INT const_size, const_fp_offset; -- if (cfun->machine->frame.frame_size.is_constant (&const_size) -+ HOST_WIDE_INT const_size, const_outgoing_args_size, const_fp_offset; -+ HOST_WIDE_INT const_saved_regs_size; -+ if (frame.frame_size.is_constant (&const_size) - && const_size < max_push_offset -- && known_eq (crtl->outgoing_args_size, 0)) -+ && known_eq (frame.hard_fp_offset, const_size)) - { - /* Simple, small frame with no outgoing arguments: -+ - stp reg1, reg2, [sp, -frame_size]! - stp reg3, reg4, [sp, 16] */ -- cfun->machine->frame.callee_adjust = const_size; -- } -- else if (known_lt (crtl->outgoing_args_size -- + cfun->machine->frame.saved_regs_size, 512) -+ frame.callee_adjust = const_size; -+ } -+ else if (crtl->outgoing_args_size.is_constant (&const_outgoing_args_size) -+ && frame.saved_regs_size.is_constant (&const_saved_regs_size) -+ && const_outgoing_args_size + const_saved_regs_size < 512 -+ /* We could handle this case even with outgoing args, provided -+ that the number of args left us with valid offsets for all -+ predicate and vector save slots. It's such a rare case that -+ it hardly seems worth the effort though. */ -+ && (!saves_below_hard_fp_p || const_outgoing_args_size == 0) - && !(cfun->calls_alloca -- && known_lt (cfun->machine->frame.hard_fp_offset, -- max_push_offset))) -+ && frame.hard_fp_offset.is_constant (&const_fp_offset) -+ && const_fp_offset < max_push_offset)) - { - /* Frame with small outgoing arguments: -+ - sub sp, sp, frame_size - stp reg1, reg2, [sp, outgoing_args_size] - stp reg3, reg4, [sp, outgoing_args_size + 16] */ -- cfun->machine->frame.initial_adjust = cfun->machine->frame.frame_size; -- cfun->machine->frame.callee_offset -- = cfun->machine->frame.frame_size - cfun->machine->frame.hard_fp_offset; -+ frame.initial_adjust = frame.frame_size; -+ frame.callee_offset = const_outgoing_args_size; - } -- else if (cfun->machine->frame.hard_fp_offset.is_constant (&const_fp_offset) -+ else if (saves_below_hard_fp_p -+ && known_eq (frame.saved_regs_size, -+ frame.below_hard_fp_saved_regs_size)) -+ { -+ /* Frame in which all saves are SVE saves: -+ -+ sub sp, sp, hard_fp_offset + below_hard_fp_saved_regs_size -+ save SVE registers relative to SP -+ sub sp, sp, outgoing_args_size */ -+ frame.initial_adjust = (frame.hard_fp_offset -+ + frame.below_hard_fp_saved_regs_size); -+ frame.final_adjust = crtl->outgoing_args_size; -+ } -+ else if (frame.hard_fp_offset.is_constant (&const_fp_offset) - && const_fp_offset < max_push_offset) - { -- /* Frame with large outgoing arguments but a small local area: -+ /* Frame with large outgoing arguments or SVE saves, but with -+ a small local area: -+ - stp reg1, reg2, [sp, -hard_fp_offset]! - stp reg3, reg4, [sp, 16] -+ [sub sp, sp, below_hard_fp_saved_regs_size] -+ [save SVE registers relative to SP] - sub sp, sp, outgoing_args_size */ -- cfun->machine->frame.callee_adjust = const_fp_offset; -- cfun->machine->frame.final_adjust -- = cfun->machine->frame.frame_size - cfun->machine->frame.callee_adjust; -+ frame.callee_adjust = const_fp_offset; -+ frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size; -+ frame.final_adjust = crtl->outgoing_args_size; - } - else - { -- /* Frame with large local area and outgoing arguments using frame pointer: -+ /* Frame with large local area and outgoing arguments or SVE saves, -+ using frame pointer: -+ - sub sp, sp, hard_fp_offset - stp x29, x30, [sp, 0] - add x29, sp, 0 - stp reg3, reg4, [sp, 16] -+ [sub sp, sp, below_hard_fp_saved_regs_size] -+ [save SVE registers relative to SP] - sub sp, sp, outgoing_args_size */ -- cfun->machine->frame.initial_adjust = cfun->machine->frame.hard_fp_offset; -- cfun->machine->frame.final_adjust -- = cfun->machine->frame.frame_size - cfun->machine->frame.initial_adjust; -+ frame.initial_adjust = frame.hard_fp_offset; -+ frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size; -+ frame.final_adjust = crtl->outgoing_args_size; - } - -- cfun->machine->frame.laid_out = true; -+ /* Make sure the individual adjustments add up to the full frame size. */ -+ gcc_assert (known_eq (frame.initial_adjust -+ + frame.callee_adjust -+ + frame.sve_callee_adjust -+ + frame.final_adjust, frame.frame_size)); -+ -+ frame.laid_out = true; - } - - /* Return true if the register REGNO is saved on entry to -@@ -4699,7 +6061,7 @@ aarch64_layout_frame (void) - static bool - aarch64_register_saved_on_entry (int regno) - { -- return cfun->machine->frame.reg_offset[regno] >= 0; -+ return known_ge (cfun->machine->frame.reg_offset[regno], 0); - } - - /* Return the next register up from REGNO up to LIMIT for the callee -@@ -4766,7 +6128,7 @@ static void - aarch64_push_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment) - { - rtx_insn *insn; -- machine_mode mode = aarch64_reg_save_mode (cfun->decl, regno1); -+ machine_mode mode = aarch64_reg_save_mode (regno1); - - if (regno2 == INVALID_REGNUM) - return aarch64_pushwb_single_reg (mode, regno1, adjustment); -@@ -4812,7 +6174,7 @@ static void - aarch64_pop_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment, - rtx *cfi_ops) - { -- machine_mode mode = aarch64_reg_save_mode (cfun->decl, regno1); -+ machine_mode mode = aarch64_reg_save_mode (regno1); - rtx reg1 = gen_rtx_REG (mode, regno1); - - *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg1, *cfi_ops); -@@ -4888,10 +6250,10 @@ aarch64_return_address_signing_enabled (void) - gcc_assert (cfun->machine->frame.laid_out); - - /* If signing scope is AARCH64_FUNCTION_NON_LEAF, we only sign a leaf function -- if it's LR is pushed onto stack. */ -+ if its LR is pushed onto stack. */ - return (aarch64_ra_sign_scope == AARCH64_FUNCTION_ALL - || (aarch64_ra_sign_scope == AARCH64_FUNCTION_NON_LEAF -- && cfun->machine->frame.reg_offset[LR_REGNUM] >= 0)); -+ && known_ge (cfun->machine->frame.reg_offset[LR_REGNUM], 0))); - } - - /* Return TRUE if Branch Target Identification Mechanism is enabled. */ -@@ -4901,17 +6263,75 @@ aarch64_bti_enabled (void) - return (aarch64_enable_bti == 1); - } - -+/* The caller is going to use ST1D or LD1D to save or restore an SVE -+ register in mode MODE at BASE_RTX + OFFSET, where OFFSET is in -+ the range [1, 16] * GET_MODE_SIZE (MODE). Prepare for this by: -+ -+ (1) updating BASE_RTX + OFFSET so that it is a legitimate ST1D -+ or LD1D address -+ -+ (2) setting PRED to a valid predicate register for the ST1D or LD1D, -+ if the variable isn't already nonnull -+ -+ (1) is needed when OFFSET is in the range [8, 16] * GET_MODE_SIZE (MODE). -+ Handle this case using a temporary base register that is suitable for -+ all offsets in that range. Use ANCHOR_REG as this base register if it -+ is nonnull, otherwise create a new register and store it in ANCHOR_REG. */ -+ -+static inline void -+aarch64_adjust_sve_callee_save_base (machine_mode mode, rtx &base_rtx, -+ rtx &anchor_reg, poly_int64 &offset, -+ rtx &ptrue) -+{ -+ if (maybe_ge (offset, 8 * GET_MODE_SIZE (mode))) -+ { -+ /* This is the maximum valid offset of the anchor from the base. -+ Lower values would be valid too. */ -+ poly_int64 anchor_offset = 16 * GET_MODE_SIZE (mode); -+ if (!anchor_reg) -+ { -+ anchor_reg = gen_rtx_REG (Pmode, STACK_CLASH_SVE_CFA_REGNUM); -+ emit_insn (gen_add3_insn (anchor_reg, base_rtx, -+ gen_int_mode (anchor_offset, Pmode))); -+ } -+ base_rtx = anchor_reg; -+ offset -= anchor_offset; -+ } -+ if (!ptrue) -+ { -+ int pred_reg = cfun->machine->frame.spare_pred_reg; -+ emit_move_insn (gen_rtx_REG (VNx16BImode, pred_reg), -+ CONSTM1_RTX (VNx16BImode)); -+ ptrue = gen_rtx_REG (VNx2BImode, pred_reg); -+ } -+} -+ -+/* Add a REG_CFA_EXPRESSION note to INSN to say that register REG -+ is saved at BASE + OFFSET. */ -+ -+static void -+aarch64_add_cfa_expression (rtx_insn *insn, rtx reg, -+ rtx base, poly_int64 offset) -+{ -+ rtx mem = gen_frame_mem (GET_MODE (reg), -+ plus_constant (Pmode, base, offset)); -+ add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg)); -+} -+ - /* Emit code to save the callee-saved registers from register number START - to LIMIT to the stack at the location starting at offset START_OFFSET, -- skipping any write-back candidates if SKIP_WB is true. */ -+ skipping any write-back candidates if SKIP_WB is true. HARD_FP_VALID_P -+ is true if the hard frame pointer has been set up. */ - - static void --aarch64_save_callee_saves (machine_mode mode, poly_int64 start_offset, -- unsigned start, unsigned limit, bool skip_wb) -+aarch64_save_callee_saves (poly_int64 start_offset, -+ unsigned start, unsigned limit, bool skip_wb, -+ bool hard_fp_valid_p) - { - rtx_insn *insn; - unsigned regno; - unsigned regno2; -+ rtx anchor_reg = NULL_RTX, ptrue = NULL_RTX; - - for (regno = aarch64_next_callee_save (start, limit); - regno <= limit; -@@ -4919,7 +6339,7 @@ aarch64_save_callee_saves (machine_mode mode, poly_int64 start_offset, - { - rtx reg, mem; - poly_int64 offset; -- int offset_diff; -+ bool frame_related_p = aarch64_emit_cfi_for_reg_p (regno); - - if (skip_wb - && (regno == cfun->machine->frame.wb_candidate1 -@@ -4927,27 +6347,53 @@ aarch64_save_callee_saves (machine_mode mode, poly_int64 start_offset, - continue; - - if (cfun->machine->reg_is_wrapped_separately[regno]) -- continue; -+ continue; - -+ machine_mode mode = aarch64_reg_save_mode (regno); - reg = gen_rtx_REG (mode, regno); - offset = start_offset + cfun->machine->frame.reg_offset[regno]; -- mem = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx, -- offset)); -+ rtx base_rtx = stack_pointer_rtx; -+ poly_int64 sp_offset = offset; - -- regno2 = aarch64_next_callee_save (regno + 1, limit); -- offset_diff = cfun->machine->frame.reg_offset[regno2] -- - cfun->machine->frame.reg_offset[regno]; -+ HOST_WIDE_INT const_offset; -+ if (mode == VNx2DImode && BYTES_BIG_ENDIAN) -+ aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg, -+ offset, ptrue); -+ else if (GP_REGNUM_P (regno) -+ && (!offset.is_constant (&const_offset) || const_offset >= 512)) -+ { -+ gcc_assert (known_eq (start_offset, 0)); -+ poly_int64 fp_offset -+ = cfun->machine->frame.below_hard_fp_saved_regs_size; -+ if (hard_fp_valid_p) -+ base_rtx = hard_frame_pointer_rtx; -+ else -+ { -+ if (!anchor_reg) -+ { -+ anchor_reg = gen_rtx_REG (Pmode, STACK_CLASH_SVE_CFA_REGNUM); -+ emit_insn (gen_add3_insn (anchor_reg, base_rtx, -+ gen_int_mode (fp_offset, Pmode))); -+ } -+ base_rtx = anchor_reg; -+ } -+ offset -= fp_offset; -+ } -+ mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset)); -+ bool need_cfa_note_p = (base_rtx != stack_pointer_rtx); - -- if (regno2 <= limit -+ if (!aarch64_sve_mode_p (mode) -+ && (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit - && !cfun->machine->reg_is_wrapped_separately[regno2] -- && known_eq (GET_MODE_SIZE (mode), offset_diff)) -+ && known_eq (GET_MODE_SIZE (mode), -+ cfun->machine->frame.reg_offset[regno2] -+ - cfun->machine->frame.reg_offset[regno])) - { - rtx reg2 = gen_rtx_REG (mode, regno2); - rtx mem2; - -- offset = start_offset + cfun->machine->frame.reg_offset[regno2]; -- mem2 = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx, -- offset)); -+ offset += GET_MODE_SIZE (mode); -+ mem2 = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset)); - insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2, - reg2)); - -@@ -4955,71 +6401,96 @@ aarch64_save_callee_saves (machine_mode mode, poly_int64 start_offset, - always assumed to be relevant to the frame - calculations; subsequent parts, are only - frame-related if explicitly marked. */ -- RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1; -+ if (aarch64_emit_cfi_for_reg_p (regno2)) -+ { -+ if (need_cfa_note_p) -+ aarch64_add_cfa_expression (insn, reg2, stack_pointer_rtx, -+ sp_offset + GET_MODE_SIZE (mode)); -+ else -+ RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1; -+ } -+ - regno = regno2; - } -+ else if (mode == VNx2DImode && BYTES_BIG_ENDIAN) -+ { -+ insn = emit_insn (gen_aarch64_pred_mov (mode, mem, ptrue, reg)); -+ need_cfa_note_p = true; -+ } -+ else if (aarch64_sve_mode_p (mode)) -+ insn = emit_insn (gen_rtx_SET (mem, reg)); - else - insn = emit_move_insn (mem, reg); - -- RTX_FRAME_RELATED_P (insn) = 1; -+ RTX_FRAME_RELATED_P (insn) = frame_related_p; -+ if (frame_related_p && need_cfa_note_p) -+ aarch64_add_cfa_expression (insn, reg, stack_pointer_rtx, sp_offset); - } - } - --/* Emit code to restore the callee registers of mode MODE from register -- number START up to and including LIMIT. Restore from the stack offset -- START_OFFSET, skipping any write-back candidates if SKIP_WB is true. -- Write the appropriate REG_CFA_RESTORE notes into CFI_OPS. */ -+/* Emit code to restore the callee registers from register number START -+ up to and including LIMIT. Restore from the stack offset START_OFFSET, -+ skipping any write-back candidates if SKIP_WB is true. Write the -+ appropriate REG_CFA_RESTORE notes into CFI_OPS. */ - - static void --aarch64_restore_callee_saves (machine_mode mode, -- poly_int64 start_offset, unsigned start, -+aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start, - unsigned limit, bool skip_wb, rtx *cfi_ops) - { -- rtx base_rtx = stack_pointer_rtx; - unsigned regno; - unsigned regno2; - poly_int64 offset; -+ rtx anchor_reg = NULL_RTX, ptrue = NULL_RTX; - - for (regno = aarch64_next_callee_save (start, limit); - regno <= limit; - regno = aarch64_next_callee_save (regno + 1, limit)) - { -+ bool frame_related_p = aarch64_emit_cfi_for_reg_p (regno); - if (cfun->machine->reg_is_wrapped_separately[regno]) -- continue; -+ continue; - - rtx reg, mem; -- int offset_diff; - - if (skip_wb - && (regno == cfun->machine->frame.wb_candidate1 - || regno == cfun->machine->frame.wb_candidate2)) - continue; - -+ machine_mode mode = aarch64_reg_save_mode (regno); - reg = gen_rtx_REG (mode, regno); - offset = start_offset + cfun->machine->frame.reg_offset[regno]; -+ rtx base_rtx = stack_pointer_rtx; -+ if (mode == VNx2DImode && BYTES_BIG_ENDIAN) -+ aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg, -+ offset, ptrue); - mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset)); - -- regno2 = aarch64_next_callee_save (regno + 1, limit); -- offset_diff = cfun->machine->frame.reg_offset[regno2] -- - cfun->machine->frame.reg_offset[regno]; -- -- if (regno2 <= limit -+ if (!aarch64_sve_mode_p (mode) -+ && (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit - && !cfun->machine->reg_is_wrapped_separately[regno2] -- && known_eq (GET_MODE_SIZE (mode), offset_diff)) -+ && known_eq (GET_MODE_SIZE (mode), -+ cfun->machine->frame.reg_offset[regno2] -+ - cfun->machine->frame.reg_offset[regno])) - { - rtx reg2 = gen_rtx_REG (mode, regno2); - rtx mem2; - -- offset = start_offset + cfun->machine->frame.reg_offset[regno2]; -+ offset += GET_MODE_SIZE (mode); - mem2 = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset)); - emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2)); - - *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops); - regno = regno2; - } -+ else if (mode == VNx2DImode && BYTES_BIG_ENDIAN) -+ emit_insn (gen_aarch64_pred_mov (mode, reg, ptrue, mem)); -+ else if (aarch64_sve_mode_p (mode)) -+ emit_insn (gen_rtx_SET (reg, mem)); - else - emit_move_insn (reg, mem); -- *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops); -+ if (frame_related_p) -+ *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops); - } - } - -@@ -5101,13 +6572,35 @@ aarch64_get_separate_components (void) - for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++) - if (aarch64_register_saved_on_entry (regno)) - { -+ /* Punt on saves and restores that use ST1D and LD1D. We could -+ try to be smarter, but it would involve making sure that the -+ spare predicate register itself is safe to use at the save -+ and restore points. Also, when a frame pointer is being used, -+ the slots are often out of reach of ST1D and LD1D anyway. */ -+ machine_mode mode = aarch64_reg_save_mode (regno); -+ if (mode == VNx2DImode && BYTES_BIG_ENDIAN) -+ continue; -+ - poly_int64 offset = cfun->machine->frame.reg_offset[regno]; -- if (!frame_pointer_needed) -- offset += cfun->machine->frame.frame_size -- - cfun->machine->frame.hard_fp_offset; -+ -+ /* If the register is saved in the first SVE save slot, we use -+ it as a stack probe for -fstack-clash-protection. */ -+ if (flag_stack_clash_protection -+ && maybe_ne (cfun->machine->frame.below_hard_fp_saved_regs_size, 0) -+ && known_eq (offset, 0)) -+ continue; -+ -+ /* Get the offset relative to the register we'll use. */ -+ if (frame_pointer_needed) -+ offset -= cfun->machine->frame.below_hard_fp_saved_regs_size; -+ else -+ offset += crtl->outgoing_args_size; -+ - /* Check that we can access the stack slot of the register with one - direct load with no adjustments needed. */ -- if (offset_12bit_unsigned_scaled_p (DImode, offset)) -+ if (aarch64_sve_mode_p (mode) -+ ? offset_9bit_signed_scaled_p (mode, offset) -+ : offset_12bit_unsigned_scaled_p (mode, offset)) - bitmap_set_bit (components, regno); - } - -@@ -5115,6 +6608,12 @@ aarch64_get_separate_components (void) - if (frame_pointer_needed) - bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM); - -+ /* If the spare predicate register used by big-endian SVE code -+ is call-preserved, it must be saved in the main prologue -+ before any saves that use it. */ -+ if (cfun->machine->frame.spare_pred_reg != INVALID_REGNUM) -+ bitmap_clear_bit (components, cfun->machine->frame.spare_pred_reg); -+ - unsigned reg1 = cfun->machine->frame.wb_candidate1; - unsigned reg2 = cfun->machine->frame.wb_candidate2; - /* If registers have been chosen to be stored/restored with -@@ -5139,31 +6638,48 @@ aarch64_components_for_bb (basic_block bb) - bitmap in = DF_LIVE_IN (bb); - bitmap gen = &DF_LIVE_BB_INFO (bb)->gen; - bitmap kill = &DF_LIVE_BB_INFO (bb)->kill; -- bool simd_function = aarch64_simd_decl_p (cfun->decl); - - sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1); - bitmap_clear (components); - -+ /* Clobbered registers don't generate values in any meaningful sense, -+ since nothing after the clobber can rely on their value. And we can't -+ say that partially-clobbered registers are unconditionally killed, -+ because whether they're killed or not depends on the mode of the -+ value they're holding. Thus partially call-clobbered registers -+ appear in neither the kill set nor the gen set. -+ -+ Check manually for any calls that clobber more of a register than the -+ current function can. */ -+ function_abi_aggregator callee_abis; -+ rtx_insn *insn; -+ FOR_BB_INSNS (bb, insn) -+ if (CALL_P (insn)) -+ callee_abis.note_callee_abi (insn_callee_abi (insn)); -+ HARD_REG_SET extra_caller_saves = callee_abis.caller_save_regs (*crtl->abi); -+ - /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets. */ - for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++) -- if ((!call_used_regs[regno] -- || (simd_function && FP_SIMD_SAVED_REGNUM_P (regno))) -- && (bitmap_bit_p (in, regno) -- || bitmap_bit_p (gen, regno) -- || bitmap_bit_p (kill, regno))) -+ if (!fixed_regs[regno] -+ && !crtl->abi->clobbers_full_reg_p (regno) -+ && (TEST_HARD_REG_BIT (extra_caller_saves, regno) -+ || bitmap_bit_p (in, regno) -+ || bitmap_bit_p (gen, regno) -+ || bitmap_bit_p (kill, regno))) - { -- unsigned regno2, offset, offset2; - bitmap_set_bit (components, regno); - - /* If there is a callee-save at an adjacent offset, add it too - to increase the use of LDP/STP. */ -- offset = cfun->machine->frame.reg_offset[regno]; -- regno2 = ((offset & 8) == 0) ? regno + 1 : regno - 1; -+ poly_int64 offset = cfun->machine->frame.reg_offset[regno]; -+ unsigned regno2 = multiple_p (offset, 16) ? regno + 1 : regno - 1; - - if (regno2 <= LAST_SAVED_REGNUM) - { -- offset2 = cfun->machine->frame.reg_offset[regno2]; -- if ((offset & ~8) == (offset2 & ~8)) -+ poly_int64 offset2 = cfun->machine->frame.reg_offset[regno2]; -+ if (regno < regno2 -+ ? known_eq (offset + 8, offset2) -+ : multiple_p (offset2, 16) && known_eq (offset2 + 8, offset)) - bitmap_set_bit (components, regno2); - } - } -@@ -5218,16 +6734,16 @@ aarch64_process_components (sbitmap components, bool prologue_p) - - while (regno != last_regno) - { -- /* AAPCS64 section 5.1.2 requires only the low 64 bits to be saved -- so DFmode for the vector registers is enough. For simd functions -- we want to save the low 128 bits. */ -- machine_mode mode = aarch64_reg_save_mode (cfun->decl, regno); -+ bool frame_related_p = aarch64_emit_cfi_for_reg_p (regno); -+ machine_mode mode = aarch64_reg_save_mode (regno); - - rtx reg = gen_rtx_REG (mode, regno); - poly_int64 offset = cfun->machine->frame.reg_offset[regno]; -- if (!frame_pointer_needed) -- offset += cfun->machine->frame.frame_size -- - cfun->machine->frame.hard_fp_offset; -+ if (frame_pointer_needed) -+ offset -= cfun->machine->frame.below_hard_fp_saved_regs_size; -+ else -+ offset += crtl->outgoing_args_size; -+ - rtx addr = plus_constant (Pmode, ptr_reg, offset); - rtx mem = gen_frame_mem (mode, addr); - -@@ -5238,39 +6754,49 @@ aarch64_process_components (sbitmap components, bool prologue_p) - if (regno2 == last_regno) - { - insn = emit_insn (set); -- RTX_FRAME_RELATED_P (insn) = 1; -- if (prologue_p) -- add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set)); -- else -- add_reg_note (insn, REG_CFA_RESTORE, reg); -+ if (frame_related_p) -+ { -+ RTX_FRAME_RELATED_P (insn) = 1; -+ if (prologue_p) -+ add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set)); -+ else -+ add_reg_note (insn, REG_CFA_RESTORE, reg); -+ } - break; - } - - poly_int64 offset2 = cfun->machine->frame.reg_offset[regno2]; - /* The next register is not of the same class or its offset is not - mergeable with the current one into a pair. */ -- if (!satisfies_constraint_Ump (mem) -+ if (aarch64_sve_mode_p (mode) -+ || !satisfies_constraint_Ump (mem) - || GP_REGNUM_P (regno) != GP_REGNUM_P (regno2) -- || (aarch64_simd_decl_p (cfun->decl) && FP_REGNUM_P (regno)) -+ || (crtl->abi->id () == ARM_PCS_SIMD && FP_REGNUM_P (regno)) - || maybe_ne ((offset2 - cfun->machine->frame.reg_offset[regno]), - GET_MODE_SIZE (mode))) - { - insn = emit_insn (set); -- RTX_FRAME_RELATED_P (insn) = 1; -- if (prologue_p) -- add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set)); -- else -- add_reg_note (insn, REG_CFA_RESTORE, reg); -+ if (frame_related_p) -+ { -+ RTX_FRAME_RELATED_P (insn) = 1; -+ if (prologue_p) -+ add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set)); -+ else -+ add_reg_note (insn, REG_CFA_RESTORE, reg); -+ } - - regno = regno2; - continue; - } - -+ bool frame_related2_p = aarch64_emit_cfi_for_reg_p (regno2); -+ - /* REGNO2 can be saved/restored in a pair with REGNO. */ - rtx reg2 = gen_rtx_REG (mode, regno2); -- if (!frame_pointer_needed) -- offset2 += cfun->machine->frame.frame_size -- - cfun->machine->frame.hard_fp_offset; -+ if (frame_pointer_needed) -+ offset2 -= cfun->machine->frame.below_hard_fp_saved_regs_size; -+ else -+ offset2 += crtl->outgoing_args_size; - rtx addr2 = plus_constant (Pmode, ptr_reg, offset2); - rtx mem2 = gen_frame_mem (mode, addr2); - rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2) -@@ -5281,16 +6807,23 @@ aarch64_process_components (sbitmap components, bool prologue_p) - else - insn = emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2)); - -- RTX_FRAME_RELATED_P (insn) = 1; -- if (prologue_p) -- { -- add_reg_note (insn, REG_CFA_OFFSET, set); -- add_reg_note (insn, REG_CFA_OFFSET, set2); -- } -- else -+ if (frame_related_p || frame_related2_p) - { -- add_reg_note (insn, REG_CFA_RESTORE, reg); -- add_reg_note (insn, REG_CFA_RESTORE, reg2); -+ RTX_FRAME_RELATED_P (insn) = 1; -+ if (prologue_p) -+ { -+ if (frame_related_p) -+ add_reg_note (insn, REG_CFA_OFFSET, set); -+ if (frame_related2_p) -+ add_reg_note (insn, REG_CFA_OFFSET, set2); -+ } -+ else -+ { -+ if (frame_related_p) -+ add_reg_note (insn, REG_CFA_RESTORE, reg); -+ if (frame_related2_p) -+ add_reg_note (insn, REG_CFA_RESTORE, reg2); -+ } - } - - regno = aarch64_get_next_set_bit (components, regno2 + 1); -@@ -5359,15 +6892,31 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, - HOST_WIDE_INT guard_size - = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE); - HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD; -- /* When doing the final adjustment for the outgoing argument size we can't -- assume that LR was saved at position 0. So subtract it's offset from the -- ABI safe buffer so that we don't accidentally allow an adjustment that -- would result in an allocation larger than the ABI buffer without -- probing. */ - HOST_WIDE_INT min_probe_threshold -- = final_adjustment_p -- ? guard_used_by_caller - cfun->machine->frame.reg_offset[LR_REGNUM] -- : guard_size - guard_used_by_caller; -+ = (final_adjustment_p -+ ? guard_used_by_caller -+ : guard_size - guard_used_by_caller); -+ /* When doing the final adjustment for the outgoing arguments, take into -+ account any unprobed space there is above the current SP. There are -+ two cases: -+ -+ - When saving SVE registers below the hard frame pointer, we force -+ the lowest save to take place in the prologue before doing the final -+ adjustment (i.e. we don't allow the save to be shrink-wrapped). -+ This acts as a probe at SP, so there is no unprobed space. -+ -+ - When there are no SVE register saves, we use the store of the link -+ register as a probe. We can't assume that LR was saved at position 0 -+ though, so treat any space below it as unprobed. */ -+ if (final_adjustment_p -+ && known_eq (cfun->machine->frame.below_hard_fp_saved_regs_size, 0)) -+ { -+ poly_int64 lr_offset = cfun->machine->frame.reg_offset[LR_REGNUM]; -+ if (known_ge (lr_offset, 0)) -+ min_probe_threshold -= lr_offset.to_constant (); -+ else -+ gcc_assert (!flag_stack_clash_protection || known_eq (poly_size, 0)); -+ } - - poly_int64 frame_size = cfun->machine->frame.frame_size; - -@@ -5377,13 +6926,15 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2, - if (flag_stack_clash_protection && !final_adjustment_p) - { - poly_int64 initial_adjust = cfun->machine->frame.initial_adjust; -+ poly_int64 sve_callee_adjust = cfun->machine->frame.sve_callee_adjust; - poly_int64 final_adjust = cfun->machine->frame.final_adjust; - - if (known_eq (frame_size, 0)) - { - dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false); - } -- else if (known_lt (initial_adjust, guard_size - guard_used_by_caller) -+ else if (known_lt (initial_adjust + sve_callee_adjust, -+ guard_size - guard_used_by_caller) - && known_lt (final_adjust, guard_used_by_caller)) - { - dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true); -@@ -5583,24 +7134,10 @@ aarch64_epilogue_uses (int regno) - { - if (regno == LR_REGNUM) - return 1; -- if (aarch64_simd_decl_p (cfun->decl) && FP_SIMD_SAVED_REGNUM_P (regno)) -- return 1; - } - return 0; - } - --/* Add a REG_CFA_EXPRESSION note to INSN to say that register REG -- is saved at BASE + OFFSET. */ -- --static void --aarch64_add_cfa_expression (rtx_insn *insn, unsigned int reg, -- rtx base, poly_int64 offset) --{ -- rtx mem = gen_frame_mem (DImode, plus_constant (Pmode, base, offset)); -- add_reg_note (insn, REG_CFA_EXPRESSION, -- gen_rtx_SET (mem, regno_reg_rtx[reg])); --} -- - /* AArch64 stack frames generated by this compiler look like: - - +-------------------------------+ -@@ -5622,8 +7159,12 @@ aarch64_add_cfa_expression (rtx_insn *insn, unsigned int reg, - +-------------------------------+ | - | LR' | | - +-------------------------------+ | -- | FP' | / <- hard_frame_pointer_rtx (aligned) -- +-------------------------------+ -+ | FP' | | -+ +-------------------------------+ |<- hard_frame_pointer_rtx (aligned) -+ | SVE vector registers | | \ -+ +-------------------------------+ | | below_hard_fp_saved_regs_size -+ | SVE predicate registers | / / -+ +-------------------------------+ - | dynamic allocation | - +-------------------------------+ - | padding | -@@ -5656,7 +7197,8 @@ aarch64_add_cfa_expression (rtx_insn *insn, unsigned int reg, - The following registers are reserved during frame layout and should not be - used for any other purpose: - -- - r11: Used by stack clash protection when SVE is enabled. -+ - r11: Used by stack clash protection when SVE is enabled, and also -+ as an anchor register when saving and restoring registers - - r12(EP0) and r13(EP1): Used as temporaries for stack adjustment. - - r14 and r15: Used for speculation tracking. - - r16(IP0), r17(IP1): Used by indirect tailcalls. -@@ -5679,15 +7221,37 @@ aarch64_expand_prologue (void) - HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust; - poly_int64 final_adjust = cfun->machine->frame.final_adjust; - poly_int64 callee_offset = cfun->machine->frame.callee_offset; -+ poly_int64 sve_callee_adjust = cfun->machine->frame.sve_callee_adjust; -+ poly_int64 below_hard_fp_saved_regs_size -+ = cfun->machine->frame.below_hard_fp_saved_regs_size; - unsigned reg1 = cfun->machine->frame.wb_candidate1; - unsigned reg2 = cfun->machine->frame.wb_candidate2; - bool emit_frame_chain = cfun->machine->frame.emit_frame_chain; - rtx_insn *insn; - -+ if (flag_stack_clash_protection && known_eq (callee_adjust, 0)) -+ { -+ /* Fold the SVE allocation into the initial allocation. -+ We don't do this in aarch64_layout_arg to avoid pessimizing -+ the epilogue code. */ -+ initial_adjust += sve_callee_adjust; -+ sve_callee_adjust = 0; -+ } -+ - /* Sign return address for functions. */ - if (aarch64_return_address_signing_enabled ()) - { -- insn = emit_insn (gen_pacisp ()); -+ switch (aarch64_ra_sign_key) -+ { -+ case AARCH64_KEY_A: -+ insn = emit_insn (gen_paciasp ()); -+ break; -+ case AARCH64_KEY_B: -+ insn = emit_insn (gen_pacibsp ()); -+ break; -+ default: -+ gcc_unreachable (); -+ } - add_reg_note (insn, REG_CFA_TOGGLE_RA_MANGLE, const0_rtx); - RTX_FRAME_RELATED_P (insn) = 1; - } -@@ -5726,18 +7290,27 @@ aarch64_expand_prologue (void) - if (callee_adjust != 0) - aarch64_push_regs (reg1, reg2, callee_adjust); - -+ /* The offset of the frame chain record (if any) from the current SP. */ -+ poly_int64 chain_offset = (initial_adjust + callee_adjust -+ - cfun->machine->frame.hard_fp_offset); -+ gcc_assert (known_ge (chain_offset, 0)); -+ -+ /* The offset of the bottom of the save area from the current SP. */ -+ poly_int64 saved_regs_offset = chain_offset - below_hard_fp_saved_regs_size; -+ - if (emit_frame_chain) - { -- poly_int64 reg_offset = callee_adjust; - if (callee_adjust == 0) - { - reg1 = R29_REGNUM; - reg2 = R30_REGNUM; -- reg_offset = callee_offset; -- aarch64_save_callee_saves (DImode, reg_offset, reg1, reg2, false); -+ aarch64_save_callee_saves (saved_regs_offset, reg1, reg2, -+ false, false); - } -+ else -+ gcc_assert (known_eq (chain_offset, 0)); - aarch64_add_offset (Pmode, hard_frame_pointer_rtx, -- stack_pointer_rtx, callee_offset, -+ stack_pointer_rtx, chain_offset, - tmp1_rtx, tmp0_rtx, frame_pointer_needed); - if (frame_pointer_needed && !frame_size.is_constant ()) - { -@@ -5764,23 +7337,31 @@ aarch64_expand_prologue (void) - - /* Change the save slot expressions for the registers that - we've already saved. */ -- reg_offset -= callee_offset; -- aarch64_add_cfa_expression (insn, reg2, hard_frame_pointer_rtx, -- reg_offset + UNITS_PER_WORD); -- aarch64_add_cfa_expression (insn, reg1, hard_frame_pointer_rtx, -- reg_offset); -+ aarch64_add_cfa_expression (insn, regno_reg_rtx[reg2], -+ hard_frame_pointer_rtx, UNITS_PER_WORD); -+ aarch64_add_cfa_expression (insn, regno_reg_rtx[reg1], -+ hard_frame_pointer_rtx, 0); - } - emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx)); - } - -- aarch64_save_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM, -- callee_adjust != 0 || emit_frame_chain); -- if (aarch64_simd_decl_p (cfun->decl)) -- aarch64_save_callee_saves (TFmode, callee_offset, V0_REGNUM, V31_REGNUM, -- callee_adjust != 0 || emit_frame_chain); -- else -- aarch64_save_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM, -- callee_adjust != 0 || emit_frame_chain); -+ aarch64_save_callee_saves (saved_regs_offset, R0_REGNUM, R30_REGNUM, -+ callee_adjust != 0 || emit_frame_chain, -+ emit_frame_chain); -+ if (maybe_ne (sve_callee_adjust, 0)) -+ { -+ gcc_assert (!flag_stack_clash_protection -+ || known_eq (initial_adjust, 0)); -+ aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, -+ sve_callee_adjust, -+ !frame_pointer_needed, false); -+ saved_regs_offset += sve_callee_adjust; -+ } -+ aarch64_save_callee_saves (saved_regs_offset, P0_REGNUM, P15_REGNUM, -+ false, emit_frame_chain); -+ aarch64_save_callee_saves (saved_regs_offset, V0_REGNUM, V31_REGNUM, -+ callee_adjust != 0 || emit_frame_chain, -+ emit_frame_chain); - - /* We may need to probe the final adjustment if it is larger than the guard - that is assumed by the called. */ -@@ -5806,19 +7387,6 @@ aarch64_use_return_insn_p (void) - return known_eq (cfun->machine->frame.frame_size, 0); - } - --/* Return false for non-leaf SIMD functions in order to avoid -- shrink-wrapping them. Doing this will lose the necessary -- save/restore of FP registers. */ -- --bool --aarch64_use_simple_return_insn_p (void) --{ -- if (aarch64_simd_decl_p (cfun->decl) && !crtl->is_leaf) -- return false; -- -- return true; --} -- - /* Generate the epilogue instructions for returning from a function. - This is almost exactly the reverse of the prolog sequence, except - that we need to insert barriers to avoid scheduling loads that read -@@ -5831,6 +7399,9 @@ aarch64_expand_epilogue (bool for_sibcall) - HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust; - poly_int64 final_adjust = cfun->machine->frame.final_adjust; - poly_int64 callee_offset = cfun->machine->frame.callee_offset; -+ poly_int64 sve_callee_adjust = cfun->machine->frame.sve_callee_adjust; -+ poly_int64 below_hard_fp_saved_regs_size -+ = cfun->machine->frame.below_hard_fp_saved_regs_size; - unsigned reg1 = cfun->machine->frame.wb_candidate1; - unsigned reg2 = cfun->machine->frame.wb_candidate2; - rtx cfi_ops = NULL; -@@ -5844,15 +7415,23 @@ aarch64_expand_epilogue (bool for_sibcall) - = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE); - HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD; - -- /* We can re-use the registers when the allocation amount is smaller than -- guard_size - guard_used_by_caller because we won't be doing any probes -- then. In such situations the register should remain live with the correct -+ /* We can re-use the registers when: -+ -+ (a) the deallocation amount is the same as the corresponding -+ allocation amount (which is false if we combine the initial -+ and SVE callee save allocations in the prologue); and -+ -+ (b) the allocation amount doesn't need a probe (which is false -+ if the amount is guard_size - guard_used_by_caller or greater). -+ -+ In such situations the register should remain live with the correct - value. */ - bool can_inherit_p = (initial_adjust.is_constant () -- && final_adjust.is_constant ()) -+ && final_adjust.is_constant () - && (!flag_stack_clash_protection -- || known_lt (initial_adjust, -- guard_size - guard_used_by_caller)); -+ || (known_lt (initial_adjust, -+ guard_size - guard_used_by_caller) -+ && known_eq (sve_callee_adjust, 0)))); - - /* We need to add memory barrier to prevent read from deallocated stack. */ - bool need_barrier_p -@@ -5877,7 +7456,8 @@ aarch64_expand_epilogue (bool for_sibcall) - /* If writeback is used when restoring callee-saves, the CFA - is restored on the instruction doing the writeback. */ - aarch64_add_offset (Pmode, stack_pointer_rtx, -- hard_frame_pointer_rtx, -callee_offset, -+ hard_frame_pointer_rtx, -+ -callee_offset - below_hard_fp_saved_regs_size, - tmp1_rtx, tmp0_rtx, callee_adjust == 0); - else - /* The case where we need to re-use the register here is very rare, so -@@ -5885,14 +7465,17 @@ aarch64_expand_epilogue (bool for_sibcall) - immediate doesn't fit. */ - aarch64_add_sp (tmp1_rtx, tmp0_rtx, final_adjust, true); - -- aarch64_restore_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM, -+ /* Restore the vector registers before the predicate registers, -+ so that we can use P4 as a temporary for big-endian SVE frames. */ -+ aarch64_restore_callee_saves (callee_offset, V0_REGNUM, V31_REGNUM, -+ callee_adjust != 0, &cfi_ops); -+ aarch64_restore_callee_saves (callee_offset, P0_REGNUM, P15_REGNUM, -+ false, &cfi_ops); -+ if (maybe_ne (sve_callee_adjust, 0)) -+ aarch64_add_sp (NULL_RTX, NULL_RTX, sve_callee_adjust, true); -+ aarch64_restore_callee_saves (callee_offset - sve_callee_adjust, -+ R0_REGNUM, R30_REGNUM, - callee_adjust != 0, &cfi_ops); -- if (aarch64_simd_decl_p (cfun->decl)) -- aarch64_restore_callee_saves (TFmode, callee_offset, V0_REGNUM, V31_REGNUM, -- callee_adjust != 0, &cfi_ops); -- else -- aarch64_restore_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM, -- callee_adjust != 0, &cfi_ops); - - if (need_barrier_p) - emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx)); -@@ -5943,13 +7526,23 @@ aarch64_expand_epilogue (bool for_sibcall) - if (aarch64_return_address_signing_enabled () - && (for_sibcall || !TARGET_ARMV8_3 || crtl->calls_eh_return)) - { -- insn = emit_insn (gen_autisp ()); -+ switch (aarch64_ra_sign_key) -+ { -+ case AARCH64_KEY_A: -+ insn = emit_insn (gen_autiasp ()); -+ break; -+ case AARCH64_KEY_B: -+ insn = emit_insn (gen_autibsp ()); -+ break; -+ default: -+ gcc_unreachable (); -+ } - add_reg_note (insn, REG_CFA_TOGGLE_RA_MANGLE, const0_rtx); - RTX_FRAME_RELATED_P (insn) = 1; - } - - /* Stack adjustment for exception handler. */ -- if (crtl->calls_eh_return) -+ if (crtl->calls_eh_return && !for_sibcall) - { - /* We need to unwind the stack by the offset computed by - EH_RETURN_STACKADJ_RTX. We have already reset the CFA -@@ -6015,6 +7608,7 @@ aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, - int this_regno = R0_REGNUM; - rtx this_rtx, temp0, temp1, addr, funexp; - rtx_insn *insn; -+ const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk)); - - if (aarch64_bti_enabled ()) - emit_insn (gen_bti_c()); -@@ -6077,14 +7671,18 @@ aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, - } - funexp = XEXP (DECL_RTL (function), 0); - funexp = gen_rtx_MEM (FUNCTION_MODE, funexp); -- insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX)); -+ rtx callee_abi = gen_int_mode (fndecl_abi (function).id (), DImode); -+ insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, callee_abi)); - SIBLING_CALL_P (insn) = 1; - - insn = get_insns (); - shorten_branches (insn); -+ -+ assemble_start_function (thunk, fnname); - final_start_function (insn, file, 1); - final (insn, file, 1); - final_end_function (); -+ assemble_end_function (thunk, fnname); - - /* Stop pretending to be a post-reload pass. */ - reload_completed = 0; -@@ -6608,9 +8206,15 @@ aarch64_classify_address (struct aarch64_address_info *info, - - HOST_WIDE_INT const_size; - -+ /* Whether a vector mode is partial doesn't affect address legitimacy. -+ Partial vectors like VNx8QImode allow the same indexed addressing -+ mode and MUL VL addressing mode as full vectors like VNx16QImode; -+ in both cases, MUL VL counts multiples of GET_MODE_SIZE. */ -+ unsigned int vec_flags = aarch64_classify_vector_mode (mode); -+ vec_flags &= ~VEC_PARTIAL; -+ - /* On BE, we use load/store pair for all large int mode load/stores. - TI/TFmode may also use a load/store pair. */ -- unsigned int vec_flags = aarch64_classify_vector_mode (mode); - bool advsimd_struct_p = (vec_flags == (VEC_ADVSIMD | VEC_STRUCT)); - bool load_store_pair_p = (type == ADDR_QUERY_LDP_STP - || type == ADDR_QUERY_LDP_STP_N -@@ -6628,7 +8232,7 @@ aarch64_classify_address (struct aarch64_address_info *info, - bool allow_reg_index_p = (!load_store_pair_p - && (known_lt (GET_MODE_SIZE (mode), 16) - || vec_flags == VEC_ADVSIMD -- || vec_flags == VEC_SVE_DATA)); -+ || vec_flags & VEC_SVE_DATA)); - - /* For SVE, only accept [Rn], [Rn, Rm, LSL #shift] and - [Rn, #offset, MUL VL]. */ -@@ -7152,11 +8756,12 @@ aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2) - RESULT is the register in which the result is returned. It's NULL for - "call" and "sibcall". - MEM is the location of the function call. -+ CALLEE_ABI is a const_int that gives the arm_pcs of the callee. - SIBCALL indicates whether this function call is normal call or sibling call. - It will generate different pattern accordingly. */ - - void --aarch64_expand_call (rtx result, rtx mem, bool sibcall) -+aarch64_expand_call (rtx result, rtx mem, rtx callee_abi, bool sibcall) - { - rtx call, callee, tmp; - rtvec vec; -@@ -7186,7 +8791,11 @@ aarch64_expand_call (rtx result, rtx mem, bool sibcall) - else - tmp = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNUM)); - -- vec = gen_rtvec (2, call, tmp); -+ gcc_assert (CONST_INT_P (callee_abi)); -+ callee_abi = gen_rtx_UNSPEC (DImode, gen_rtvec (1, callee_abi), -+ UNSPEC_CALLEE_ABI); -+ -+ vec = gen_rtvec (3, call, callee_abi, tmp); - call = gen_rtx_PARALLEL (VOIDmode, vec); - - aarch64_emit_call_insn (call); -@@ -7382,6 +8991,21 @@ aarch64_get_condition_code_1 (machine_mode mode, enum rtx_code comp_code) - } - break; - -+ case E_CC_NZCmode: -+ switch (comp_code) -+ { -+ case NE: return AARCH64_NE; /* = any */ -+ case EQ: return AARCH64_EQ; /* = none */ -+ case GE: return AARCH64_PL; /* = nfrst */ -+ case LT: return AARCH64_MI; /* = first */ -+ case GEU: return AARCH64_CS; /* = nlast */ -+ case GTU: return AARCH64_HI; /* = pmore */ -+ case LEU: return AARCH64_LS; /* = plast */ -+ case LTU: return AARCH64_CC; /* = last */ -+ default: return -1; -+ } -+ break; -+ - case E_CC_NZmode: - switch (comp_code) - { -@@ -7524,15 +9148,24 @@ aarch64_print_vector_float_operand (FILE *f, rtx x, bool negate) - if (negate) - r = real_value_negate (&r); - -- /* We only handle the SVE single-bit immediates here. */ -+ /* Handle the SVE single-bit immediates specially, since they have a -+ fixed form in the assembly syntax. */ - if (real_equal (&r, &dconst0)) - asm_fprintf (f, "0.0"); -+ else if (real_equal (&r, &dconst2)) -+ asm_fprintf (f, "2.0"); - else if (real_equal (&r, &dconst1)) - asm_fprintf (f, "1.0"); - else if (real_equal (&r, &dconsthalf)) - asm_fprintf (f, "0.5"); - else -- return false; -+ { -+ const int buf_size = 20; -+ char float_buf[buf_size] = {'\0'}; -+ real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, -+ 1, GET_MODE (elt)); -+ asm_fprintf (f, "%s", float_buf); -+ } - - return true; - } -@@ -7560,7 +9193,13 @@ sizetochar (int size) - 'D': Take the duplicated element in a vector constant - and print it as an unsigned integer, in decimal. - 'e': Print the sign/zero-extend size as a character 8->b, -- 16->h, 32->w. -+ 16->h, 32->w. Can also be used for masks: -+ 0xff->b, 0xffff->h, 0xffffffff->w. -+ 'I': If the operand is a duplicated vector constant, -+ replace it with the duplicated scalar. If the -+ operand is then a floating-point constant, replace -+ it with the integer bit representation. Print the -+ transformed constant as a signed decimal number. - 'p': Prints N such that 2^N == X (X must be power of 2 and - const int). - 'P': Print the number of non-zero bits in X (a const_int). -@@ -7574,7 +9213,7 @@ sizetochar (int size) - 'S/T/U/V': Print a FP/SIMD register name for a register list. - The register printed is the FP/SIMD register name - of X + 0/1/2/3 for S/T/U/V. -- 'R': Print a scalar FP/SIMD register name + 1. -+ 'R': Print a scalar Integer/FP/SIMD register name + 1. - 'X': Print bottom 16 bits of integer constant in hex. - 'w/x': Print a general register name or the zero register - (32-bit or 64-bit). -@@ -7626,27 +9265,22 @@ aarch64_print_operand (FILE *f, rtx x, int code) - - case 'e': - { -- int n; -- -- if (!CONST_INT_P (x) -- || (n = exact_log2 (INTVAL (x) & ~7)) <= 0) -+ x = unwrap_const_vec_duplicate (x); -+ if (!CONST_INT_P (x)) - { - output_operand_lossage ("invalid operand for '%%%c'", code); - return; - } - -- switch (n) -+ HOST_WIDE_INT val = INTVAL (x); -+ if ((val & ~7) == 8 || val == 0xff) -+ fputc ('b', f); -+ else if ((val & ~7) == 16 || val == 0xffff) -+ fputc ('h', f); -+ else if ((val & ~7) == 32 || val == 0xffffffff) -+ fputc ('w', f); -+ else - { -- case 3: -- fputc ('b', f); -- break; -- case 4: -- fputc ('h', f); -- break; -- case 5: -- fputc ('w', f); -- break; -- default: - output_operand_lossage ("invalid operand for '%%%c'", code); - return; - } -@@ -7693,6 +9327,19 @@ aarch64_print_operand (FILE *f, rtx x, int code) - asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]); - break; - -+ case 'I': -+ { -+ x = aarch64_bit_representation (unwrap_const_vec_duplicate (x)); -+ if (CONST_INT_P (x)) -+ asm_fprintf (f, "%wd", INTVAL (x)); -+ else -+ { -+ output_operand_lossage ("invalid operand for '%%%c'", code); -+ return; -+ } -+ break; -+ } -+ - case 'M': - case 'm': - { -@@ -7715,7 +9362,10 @@ aarch64_print_operand (FILE *f, rtx x, int code) - gcc_assert (cond_code >= 0); - if (code == 'M') - cond_code = AARCH64_INVERSE_CONDITION_CODE (cond_code); -- fputs (aarch64_condition_codes[cond_code], f); -+ if (GET_MODE (XEXP (x, 0)) == CC_NZCmode) -+ fputs (aarch64_sve_condition_codes[cond_code], f); -+ else -+ fputs (aarch64_condition_codes[cond_code], f); - } - break; - -@@ -7766,12 +9416,13 @@ aarch64_print_operand (FILE *f, rtx x, int code) - break; - - case 'R': -- if (!REG_P (x) || !FP_REGNUM_P (REGNO (x))) -- { -- output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code); -- return; -- } -- asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1); -+ if (REG_P (x) && FP_REGNUM_P (REGNO (x))) -+ asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1); -+ else if (REG_P (x) && GP_REGNUM_P (REGNO (x))) -+ asm_fprintf (f, "x%d", REGNO (x) - R0_REGNUM + 1); -+ else -+ output_operand_lossage ("incompatible register operand for '%%%c'", -+ code); - break; - - case 'X': -@@ -8068,7 +9719,7 @@ aarch64_print_address_internal (FILE *f, machine_mode mode, rtx x, - aarch64_addr_query_type type) - { - struct aarch64_address_info addr; -- unsigned int size; -+ unsigned int size, vec_flags; - - /* Check all addresses are Pmode - including ILP32. */ - if (GET_MODE (x) != Pmode -@@ -8084,26 +9735,24 @@ aarch64_print_address_internal (FILE *f, machine_mode mode, rtx x, - { - case ADDRESS_REG_IMM: - if (known_eq (addr.const_offset, 0)) -- asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]); -- else if (aarch64_sve_data_mode_p (mode)) - { -- HOST_WIDE_INT vnum -- = exact_div (addr.const_offset, -- BYTES_PER_SVE_VECTOR).to_constant (); -- asm_fprintf (f, "[%s, #%wd, mul vl]", -- reg_names[REGNO (addr.base)], vnum); -+ asm_fprintf (f, "[%s]", reg_names[REGNO (addr.base)]); -+ return true; - } -- else if (aarch64_sve_pred_mode_p (mode)) -+ -+ vec_flags = aarch64_classify_vector_mode (mode); -+ if (vec_flags & VEC_ANY_SVE) - { - HOST_WIDE_INT vnum - = exact_div (addr.const_offset, -- BYTES_PER_SVE_PRED).to_constant (); -+ aarch64_vl_bytes (mode, vec_flags)).to_constant (); - asm_fprintf (f, "[%s, #%wd, mul vl]", - reg_names[REGNO (addr.base)], vnum); -+ return true; - } -- else -- asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)], -- INTVAL (addr.offset)); -+ -+ asm_fprintf (f, "[%s, %wd]", reg_names[REGNO (addr.base)], -+ INTVAL (addr.offset)); - return true; - - case ADDRESS_REG_REG: -@@ -8234,11 +9883,15 @@ aarch64_regno_regclass (unsigned regno) - return POINTER_REGS; - - if (FP_REGNUM_P (regno)) -- return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS; -+ return (FP_LO8_REGNUM_P (regno) ? FP_LO8_REGS -+ : FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS); - - if (PR_REGNUM_P (regno)) - return PR_LO_REGNUM_P (regno) ? PR_LO_REGS : PR_HI_REGS; - -+ if (regno == FFR_REGNUM || regno == FFRT_REGNUM) -+ return FFR_REGS; -+ - return NO_REGS; - } - -@@ -8348,13 +10001,14 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x, - secondary_reload_info *sri) - { - /* Use aarch64_sve_reload_be for SVE reloads that cannot be handled -- directly by the *aarch64_sve_mov_be move pattern. See the -+ directly by the *aarch64_sve_mov_[lb]e move patterns. See the - comment at the head of aarch64-sve.md for more details about the - big-endian handling. */ - if (BYTES_BIG_ENDIAN - && reg_class_subset_p (rclass, FP_REGS) - && !((REG_P (x) && HARD_REGISTER_P (x)) - || aarch64_simd_valid_immediate (x, NULL)) -+ && mode != VNx16QImode - && aarch64_sve_data_mode_p (mode)) - { - sri->icode = CODE_FOR_aarch64_sve_reload_be; -@@ -8514,7 +10168,7 @@ aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode) - can hold MODE, but at the moment we need to handle all modes. - Just ignore any runtime parts for registers that can't store them. */ - HOST_WIDE_INT lowest_size = constant_lower_bound (GET_MODE_SIZE (mode)); -- unsigned int nregs; -+ unsigned int nregs, vec_flags; - switch (regclass) - { - case TAILCALL_ADDR_REGS: -@@ -8524,17 +10178,21 @@ aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode) - case POINTER_AND_FP_REGS: - case FP_REGS: - case FP_LO_REGS: -- if (aarch64_sve_data_mode_p (mode) -+ case FP_LO8_REGS: -+ vec_flags = aarch64_classify_vector_mode (mode); -+ if ((vec_flags & VEC_SVE_DATA) - && constant_multiple_p (GET_MODE_SIZE (mode), -- BYTES_PER_SVE_VECTOR, &nregs)) -+ aarch64_vl_bytes (mode, vec_flags), &nregs)) - return nregs; -- return (aarch64_vector_data_mode_p (mode) -+ return (vec_flags & VEC_ADVSIMD - ? CEIL (lowest_size, UNITS_PER_VREG) - : CEIL (lowest_size, UNITS_PER_WORD)); - case STACK_REG: - case PR_REGS: - case PR_LO_REGS: - case PR_HI_REGS: -+ case FFR_REGS: -+ case PR_AND_FFR_REGS: - return 1; - - case NO_REGS: -@@ -10715,6 +12373,14 @@ aarch64_register_move_cost (machine_mode mode, - if (from == TAILCALL_ADDR_REGS || from == POINTER_REGS) - from = GENERAL_REGS; - -+ /* Make RDFFR very expensive. In particular, if we know that the FFR -+ contains a PTRUE (e.g. after a SETFFR), we must never use RDFFR -+ as a way of obtaining a PTRUE. */ -+ if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL -+ && hard_reg_set_subset_p (reg_class_contents[from_i], -+ reg_class_contents[FFR_REGS])) -+ return 80; -+ - /* Moving between GPR and stack cost is the same as GP2GP. */ - if ((from == GENERAL_REGS && to == STACK_REG) - || (to == GENERAL_REGS && from == STACK_REG)) -@@ -10764,6 +12430,93 @@ aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED, - return aarch64_tune_params.memmov_cost; - } - -+/* Implement TARGET_INIT_BUILTINS. */ -+static void -+aarch64_init_builtins () -+{ -+ aarch64_general_init_builtins (); -+ aarch64_sve::init_builtins (); -+} -+ -+/* Implement TARGET_FOLD_BUILTIN. */ -+static tree -+aarch64_fold_builtin (tree fndecl, int nargs, tree *args, bool) -+{ -+ unsigned int code = DECL_MD_FUNCTION_CODE (fndecl); -+ unsigned int subcode = code >> AARCH64_BUILTIN_SHIFT; -+ tree type = TREE_TYPE (TREE_TYPE (fndecl)); -+ switch (code & AARCH64_BUILTIN_CLASS) -+ { -+ case AARCH64_BUILTIN_GENERAL: -+ return aarch64_general_fold_builtin (subcode, type, nargs, args); -+ -+ case AARCH64_BUILTIN_SVE: -+ return NULL_TREE; -+ } -+ gcc_unreachable (); -+} -+ -+/* Implement TARGET_GIMPLE_FOLD_BUILTIN. */ -+static bool -+aarch64_gimple_fold_builtin (gimple_stmt_iterator *gsi) -+{ -+ gcall *stmt = as_a (gsi_stmt (*gsi)); -+ tree fndecl = gimple_call_fndecl (stmt); -+ unsigned int code = DECL_MD_FUNCTION_CODE (fndecl); -+ unsigned int subcode = code >> AARCH64_BUILTIN_SHIFT; -+ gimple *new_stmt = NULL; -+ switch (code & AARCH64_BUILTIN_CLASS) -+ { -+ case AARCH64_BUILTIN_GENERAL: -+ new_stmt = aarch64_general_gimple_fold_builtin (subcode, stmt); -+ break; -+ -+ case AARCH64_BUILTIN_SVE: -+ new_stmt = aarch64_sve::gimple_fold_builtin (subcode, gsi, stmt); -+ break; -+ } -+ -+ if (!new_stmt) -+ return false; -+ -+ gsi_replace (gsi, new_stmt, true); -+ return true; -+} -+ -+/* Implement TARGET_EXPAND_BUILTIN. */ -+static rtx -+aarch64_expand_builtin (tree exp, rtx target, rtx, machine_mode, int ignore) -+{ -+ tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); -+ unsigned int code = DECL_MD_FUNCTION_CODE (fndecl); -+ unsigned int subcode = code >> AARCH64_BUILTIN_SHIFT; -+ switch (code & AARCH64_BUILTIN_CLASS) -+ { -+ case AARCH64_BUILTIN_GENERAL: -+ return aarch64_general_expand_builtin (subcode, exp, target, ignore); -+ -+ case AARCH64_BUILTIN_SVE: -+ return aarch64_sve::expand_builtin (subcode, exp, target); -+ } -+ gcc_unreachable (); -+} -+ -+/* Implement TARGET_BUILTIN_DECL. */ -+static tree -+aarch64_builtin_decl (unsigned int code, bool initialize_p) -+{ -+ unsigned int subcode = code >> AARCH64_BUILTIN_SHIFT; -+ switch (code & AARCH64_BUILTIN_CLASS) -+ { -+ case AARCH64_BUILTIN_GENERAL: -+ return aarch64_general_builtin_decl (subcode, initialize_p); -+ -+ case AARCH64_BUILTIN_SVE: -+ return aarch64_sve::builtin_decl (subcode, initialize_p); -+ } -+ gcc_unreachable (); -+} -+ - /* Return true if it is safe and beneficial to use the approximate rsqrt optabs - to optimize 1.0/sqrt. */ - -@@ -10787,7 +12540,17 @@ aarch64_builtin_reciprocal (tree fndecl) - - if (!use_rsqrt_p (mode)) - return NULL_TREE; -- return aarch64_builtin_rsqrt (DECL_FUNCTION_CODE (fndecl)); -+ unsigned int code = DECL_MD_FUNCTION_CODE (fndecl); -+ unsigned int subcode = code >> AARCH64_BUILTIN_SHIFT; -+ switch (code & AARCH64_BUILTIN_CLASS) -+ { -+ case AARCH64_BUILTIN_GENERAL: -+ return aarch64_general_builtin_rsqrt (subcode); -+ -+ case AARCH64_BUILTIN_SVE: -+ return NULL_TREE; -+ } -+ gcc_unreachable (); - } - - /* Emit instruction sequence to compute either the approximate square root -@@ -11096,7 +12859,7 @@ static void initialize_aarch64_code_model (struct gcc_options *); - - static enum aarch64_parse_opt_result - aarch64_parse_arch (const char *to_parse, const struct processor **res, -- unsigned long *isa_flags, std::string *invalid_extension) -+ uint64_t *isa_flags, std::string *invalid_extension) - { - const char *ext; - const struct processor *arch; -@@ -11119,7 +12882,7 @@ aarch64_parse_arch (const char *to_parse, const struct processor **res, - if (strlen (arch->name) == len - && strncmp (arch->name, to_parse, len) == 0) - { -- unsigned long isa_temp = arch->flags; -+ uint64_t isa_temp = arch->flags; - - if (ext != NULL) - { -@@ -11151,7 +12914,7 @@ aarch64_parse_arch (const char *to_parse, const struct processor **res, - - static enum aarch64_parse_opt_result - aarch64_parse_cpu (const char *to_parse, const struct processor **res, -- unsigned long *isa_flags, std::string *invalid_extension) -+ uint64_t *isa_flags, std::string *invalid_extension) - { - const char *ext; - const struct processor *cpu; -@@ -11173,7 +12936,7 @@ aarch64_parse_cpu (const char *to_parse, const struct processor **res, - { - if (strlen (cpu->name) == len && strncmp (cpu->name, to_parse, len) == 0) - { -- unsigned long isa_temp = cpu->flags; -+ uint64_t isa_temp = cpu->flags; - - - if (ext != NULL) -@@ -11758,7 +13521,7 @@ aarch64_print_hint_for_extensions (const std::string &str) - - static bool - aarch64_validate_mcpu (const char *str, const struct processor **res, -- unsigned long *isa_flags) -+ uint64_t *isa_flags) - { - std::string invalid_extension; - enum aarch64_parse_opt_result parse_res -@@ -11885,9 +13648,9 @@ aarch64_validate_mbranch_protection (const char *const_str) - enum aarch64_parse_opt_result res = - aarch64_parse_branch_protection (const_str, &str); - if (res == AARCH64_PARSE_INVALID_ARG) -- error ("invalid arg %<%s%> for %<-mbranch-protection=%>", str); -+ error ("invalid argument %<%s%> for %<-mbranch-protection=%>", str); - else if (res == AARCH64_PARSE_MISSING_ARG) -- error ("missing arg for %<-mbranch-protection=%>"); -+ error ("missing argument for %<-mbranch-protection=%>"); - free (str); - return res == AARCH64_PARSE_OK; - } -@@ -11899,7 +13662,7 @@ aarch64_validate_mbranch_protection (const char *const_str) - - static bool - aarch64_validate_march (const char *str, const struct processor **res, -- unsigned long *isa_flags) -+ uint64_t *isa_flags) - { - std::string invalid_extension; - enum aarch64_parse_opt_result parse_res -@@ -12014,8 +13777,8 @@ aarch64_convert_sve_vector_bits (aarch64_sve_vector_bits_enum value) - static void - aarch64_override_options (void) - { -- unsigned long cpu_isa = 0; -- unsigned long arch_isa = 0; -+ uint64_t cpu_isa = 0; -+ uint64_t arch_isa = 0; - aarch64_isa_flags = 0; - - bool valid_cpu = true; -@@ -12255,7 +14018,7 @@ aarch64_option_print (FILE *file, int indent, struct cl_target_option *ptr) - { - const struct processor *cpu - = aarch64_get_tune_cpu (ptr->x_explicit_tune_core); -- unsigned long isa_flags = ptr->x_aarch64_isa_flags; -+ uint64_t isa_flags = ptr->x_aarch64_isa_flags; - const struct processor *arch = aarch64_get_arch (ptr->x_explicit_arch); - std::string extension - = aarch64_get_extension_string_for_isa_flags (isa_flags, arch->flags); -@@ -12508,7 +14271,7 @@ static bool - aarch64_handle_attr_isa_flags (char *str) - { - enum aarch64_parse_opt_result parse_res; -- unsigned long isa_flags = aarch64_isa_flags; -+ uint64_t isa_flags = aarch64_isa_flags; - - /* We allow "+nothing" in the beginning to clear out all architectural - features if the user wants to handpick specific features. */ -@@ -12999,6 +14762,26 @@ aarch64_can_inline_p (tree caller, tree callee) - return true; - } - -+/* Return the ID of the TLDESC ABI, initializing the descriptor if hasn't -+ been already. */ -+ -+unsigned int -+aarch64_tlsdesc_abi_id () -+{ -+ predefined_function_abi &tlsdesc_abi = function_abis[ARM_PCS_TLSDESC]; -+ if (!tlsdesc_abi.initialized_p ()) -+ { -+ HARD_REG_SET full_reg_clobbers; -+ CLEAR_HARD_REG_SET (full_reg_clobbers); -+ SET_HARD_REG_BIT (full_reg_clobbers, R0_REGNUM); -+ SET_HARD_REG_BIT (full_reg_clobbers, CC_REGNUM); -+ for (int regno = P0_REGNUM; regno <= P15_REGNUM; ++regno) -+ SET_HARD_REG_BIT (full_reg_clobbers, regno); -+ tlsdesc_abi.initialize (ARM_PCS_TLSDESC, full_reg_clobbers); -+ } -+ return tlsdesc_abi.id (); -+} -+ - /* Return true if SYMBOL_REF X binds locally. */ - - static bool -@@ -13104,26 +14887,31 @@ aarch64_classify_symbol (rtx x, HOST_WIDE_INT offset) - the offset does not cause overflow of the final address. But - we have no way of knowing the address of symbol at compile time - so we can't accurately say if the distance between the PC and -- symbol + offset is outside the addressible range of +/-1M in the -- TINY code model. So we rely on images not being greater than -- 1M and cap the offset at 1M and anything beyond 1M will have to -- be loaded using an alternative mechanism. Furthermore if the -- symbol is a weak reference to something that isn't known to -- resolve to a symbol in this module, then force to memory. */ -- if ((SYMBOL_REF_WEAK (x) -- && !aarch64_symbol_binds_local_p (x)) -- || !IN_RANGE (offset, -1048575, 1048575)) -+ symbol + offset is outside the addressible range of +/-1MB in the -+ TINY code model. So we limit the maximum offset to +/-64KB and -+ assume the offset to the symbol is not larger than +/-(1MB - 64KB). -+ If offset_within_block_p is true we allow larger offsets. -+ Furthermore force to memory if the symbol is a weak reference to -+ something that doesn't resolve to a symbol in this module. */ -+ -+ if (SYMBOL_REF_WEAK (x) && !aarch64_symbol_binds_local_p (x)) -+ return SYMBOL_FORCE_TO_MEM; -+ if (!(IN_RANGE (offset, -0x10000, 0x10000) -+ || offset_within_block_p (x, offset))) - return SYMBOL_FORCE_TO_MEM; -+ - return SYMBOL_TINY_ABSOLUTE; - - case AARCH64_CMODEL_SMALL: - /* Same reasoning as the tiny code model, but the offset cap here is -- 4G. */ -- if ((SYMBOL_REF_WEAK (x) -- && !aarch64_symbol_binds_local_p (x)) -- || !IN_RANGE (offset, HOST_WIDE_INT_C (-4294967263), -- HOST_WIDE_INT_C (4294967264))) -+ 1MB, allowing +/-3.9GB for the offset to the symbol. */ -+ -+ if (SYMBOL_REF_WEAK (x) && !aarch64_symbol_binds_local_p (x)) - return SYMBOL_FORCE_TO_MEM; -+ if (!(IN_RANGE (offset, -0x100000, 0x100000) -+ || offset_within_block_p (x, offset))) -+ return SYMBOL_FORCE_TO_MEM; -+ - return SYMBOL_SMALL_ABSOLUTE; - - case AARCH64_CMODEL_TINY_PIC: -@@ -13432,7 +15220,7 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, - HOST_WIDE_INT size, rsize, adjust, align; - tree t, u, cond1, cond2; - -- indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false); -+ indirect_p = pass_va_arg_by_reference (type); - if (indirect_p) - type = build_pointer_type (type); - -@@ -13626,6 +15414,10 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, - field_t = aarch64_fp16_type_node; - field_ptr_t = aarch64_fp16_ptr_type_node; - break; -+ case E_BFmode: -+ field_t = aarch64_bf16_type_node; -+ field_ptr_t = aarch64_bf16_ptr_type_node; -+ break; - case E_V2SImode: - case E_V4SImode: - { -@@ -13677,9 +15469,9 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, - /* Implement TARGET_SETUP_INCOMING_VARARGS. */ - - static void --aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode, -- tree type, int *pretend_size ATTRIBUTE_UNUSED, -- int no_rtl) -+aarch64_setup_incoming_varargs (cumulative_args_t cum_v, -+ const function_arg_info &arg, -+ int *pretend_size ATTRIBUTE_UNUSED, int no_rtl) - { - CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); - CUMULATIVE_ARGS local_cum; -@@ -13690,7 +15482,7 @@ aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode, - argument. Advance a local copy of CUM past the last "real" named - argument, to find out how many registers are left over. */ - local_cum = *cum; -- aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true); -+ aarch64_function_arg_advance (pack_cumulative_args(&local_cum), arg); - - /* Found out how many registers we need to save. - Honor tree-stdvar analysis results. */ -@@ -13777,6 +15569,10 @@ aarch64_conditional_register_usage (void) - call_used_regs[i] = 1; - } - -+ /* Only allow the FFR and FFRT to be accessed via special patterns. */ -+ CLEAR_HARD_REG_BIT (operand_reg_set, FFR_REGNUM); -+ CLEAR_HARD_REG_BIT (operand_reg_set, FFRT_REGNUM); -+ - /* When tracking speculation, we need a couple of call-clobbered registers - to track the speculation state. It would be nice to just use - IP0 and IP1, but currently there are numerous places that just -@@ -13802,6 +15598,10 @@ aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep) - machine_mode mode; - HOST_WIDE_INT size; - -+ /* SVE types (and types containing SVE types) must be handled -+ before calling this function. */ -+ gcc_assert (!aarch64_sve::builtin_type_p (type)); -+ - switch (TREE_CODE (type)) - { - case REAL_TYPE: -@@ -13973,6 +15773,9 @@ aarch64_short_vector_p (const_tree type, - { - poly_int64 size = -1; - -+ if (type && aarch64_sve::builtin_type_p (type)) -+ return false; -+ - if (type && TREE_CODE (type) == VECTOR_TYPE) - size = int_size_in_bytes (type); - else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT -@@ -14033,11 +15836,14 @@ aarch64_vfp_is_call_or_return_candidate (machine_mode mode, - int *count, - bool *is_ha) - { -+ if (is_ha != NULL) *is_ha = false; -+ -+ if (type && aarch64_sve::builtin_type_p (type)) -+ return false; -+ - machine_mode new_mode = VOIDmode; - bool composite_p = aarch64_composite_type_p (type, mode); - -- if (is_ha != NULL) *is_ha = false; -- - if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT) - || aarch64_short_vector_p (type, mode)) - { -@@ -14083,7 +15889,63 @@ static bool - aarch64_vector_mode_supported_p (machine_mode mode) - { - unsigned int vec_flags = aarch64_classify_vector_mode (mode); -- return vec_flags != 0 && (vec_flags & VEC_STRUCT) == 0; -+ return vec_flags != 0 && (vec_flags & (VEC_STRUCT | VEC_PARTIAL)) == 0; -+} -+ -+/* Return the full-width SVE vector mode for element mode MODE, if one -+ exists. */ -+opt_machine_mode -+aarch64_full_sve_mode (scalar_mode mode) -+{ -+ switch (mode) -+ { -+ case E_DFmode: -+ return VNx2DFmode; -+ case E_SFmode: -+ return VNx4SFmode; -+ case E_HFmode: -+ return VNx8HFmode; -+ case E_BFmode: -+ return VNx8BFmode; -+ case E_DImode: -+ return VNx2DImode; -+ case E_SImode: -+ return VNx4SImode; -+ case E_HImode: -+ return VNx8HImode; -+ case E_QImode: -+ return VNx16QImode; -+ default: -+ return opt_machine_mode (); -+ } -+} -+ -+/* Return the 128-bit Advanced SIMD vector mode for element mode MODE, -+ if it exists. */ -+opt_machine_mode -+aarch64_vq_mode (scalar_mode mode) -+{ -+ switch (mode) -+ { -+ case E_DFmode: -+ return V2DFmode; -+ case E_SFmode: -+ return V4SFmode; -+ case E_HFmode: -+ return V8HFmode; -+ case E_BFmode: -+ return V8BFmode; -+ case E_SImode: -+ return V4SImode; -+ case E_HImode: -+ return V8HImode; -+ case E_QImode: -+ return V16QImode; -+ case E_DImode: -+ return V2DImode; -+ default: -+ return opt_machine_mode (); -+ } - } - - /* Return appropriate SIMD container -@@ -14092,49 +15954,13 @@ static machine_mode - aarch64_simd_container_mode (scalar_mode mode, poly_int64 width) - { - if (TARGET_SVE && known_eq (width, BITS_PER_SVE_VECTOR)) -- switch (mode) -- { -- case E_DFmode: -- return VNx2DFmode; -- case E_SFmode: -- return VNx4SFmode; -- case E_HFmode: -- return VNx8HFmode; -- case E_DImode: -- return VNx2DImode; -- case E_SImode: -- return VNx4SImode; -- case E_HImode: -- return VNx8HImode; -- case E_QImode: -- return VNx16QImode; -- default: -- return word_mode; -- } -+ return aarch64_full_sve_mode (mode).else_mode (word_mode); - - gcc_assert (known_eq (width, 64) || known_eq (width, 128)); - if (TARGET_SIMD) - { - if (known_eq (width, 128)) -- switch (mode) -- { -- case E_DFmode: -- return V2DFmode; -- case E_SFmode: -- return V4SFmode; -- case E_HFmode: -- return V8HFmode; -- case E_SImode: -- return V4SImode; -- case E_HImode: -- return V8HImode; -- case E_QImode: -- return V16QImode; -- case E_DImode: -- return V2DImode; -- default: -- break; -- } -+ return aarch64_vq_mode (mode).else_mode (word_mode); - else - switch (mode) - { -@@ -14142,6 +15968,8 @@ aarch64_simd_container_mode (scalar_mode mode, poly_int64 width) - return V2SFmode; - case E_HFmode: - return V4HFmode; -+ case E_BFmode: -+ return V4BFmode; - case E_SImode: - return V2SImode; - case E_HImode: -@@ -14205,14 +16033,24 @@ aarch64_mangle_type (const_tree type) - if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type)) - return "St9__va_list"; - -- /* Half-precision float. */ -+ /* Half-precision floating point types. */ - if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16) -- return "Dh"; -+ { -+ if (TYPE_MODE (type) == BFmode) -+ return "u6__bf16"; -+ else -+ return "Dh"; -+ } - - /* Mangle AArch64-specific internal types. TYPE_NAME is non-NULL_TREE for - builtin types. */ - if (TYPE_NAME (type) != NULL) -- return aarch64_mangle_builtin_type (type); -+ { -+ const char *res; -+ if ((res = aarch64_general_mangle_builtin_type (type)) -+ || (res = aarch64_sve::mangle_builtin_type (type))) -+ return res; -+ } - - /* Use the default mangling. */ - return NULL; -@@ -14370,6 +16208,27 @@ aarch64_sve_arith_immediate_p (rtx x, bool negate_p) - return IN_RANGE (val, 0, 0xff00); - } - -+/* Return true if X is a valid immediate for the SVE SQADD and SQSUB -+ instructions. Negate X first if NEGATE_P is true. */ -+ -+bool -+aarch64_sve_sqadd_sqsub_immediate_p (rtx x, bool negate_p) -+{ -+ rtx elt; -+ -+ if (!const_vec_duplicate_p (x, &elt) -+ || !CONST_INT_P (elt)) -+ return false; -+ -+ if (!aarch64_sve_arith_immediate_p (x, negate_p)) -+ return false; -+ -+ /* After the optional negation, the immediate must be nonnegative. -+ E.g. a saturating add of -127 must be done via SQSUB Zn.B, Zn.B, #127 -+ instead of SQADD Zn.B, Zn.B, #129. */ -+ return negate_p == (INTVAL (elt) < 0); -+} -+ - /* Return true if X is a valid immediate operand for an SVE logical - instruction such as AND. */ - -@@ -14390,13 +16249,11 @@ aarch64_sve_bitmask_immediate_p (rtx x) - bool - aarch64_sve_dup_immediate_p (rtx x) - { -- rtx elt; -- -- if (!const_vec_duplicate_p (x, &elt) -- || !CONST_INT_P (elt)) -+ x = aarch64_bit_representation (unwrap_const_vec_duplicate (x)); -+ if (!CONST_INT_P (x)) - return false; - -- HOST_WIDE_INT val = INTVAL (elt); -+ HOST_WIDE_INT val = INTVAL (x); - if (val & 0xff) - return IN_RANGE (val, -0x80, 0x7f); - return IN_RANGE (val, -0x8000, 0x7f00); -@@ -14408,13 +16265,11 @@ aarch64_sve_dup_immediate_p (rtx x) - bool - aarch64_sve_cmp_immediate_p (rtx x, bool signed_p) - { -- rtx elt; -- -- return (const_vec_duplicate_p (x, &elt) -- && CONST_INT_P (elt) -+ x = unwrap_const_vec_duplicate (x); -+ return (CONST_INT_P (x) - && (signed_p -- ? IN_RANGE (INTVAL (elt), -16, 15) -- : IN_RANGE (INTVAL (elt), 0, 127))); -+ ? IN_RANGE (INTVAL (x), -16, 15) -+ : IN_RANGE (INTVAL (x), 0, 127))); - } - - /* Return true if X is a valid immediate operand for an SVE FADD or FSUB -@@ -14450,11 +16305,10 @@ aarch64_sve_float_mul_immediate_p (rtx x) - { - rtx elt; - -- /* GCC will never generate a multiply with an immediate of 2, so there is no -- point testing for it (even though it is a valid constant). */ - return (const_vec_duplicate_p (x, &elt) - && GET_CODE (elt) == CONST_DOUBLE -- && real_equal (CONST_DOUBLE_REAL_VALUE (elt), &dconsthalf)); -+ && (real_equal (CONST_DOUBLE_REAL_VALUE (elt), &dconsthalf) -+ || real_equal (CONST_DOUBLE_REAL_VALUE (elt), &dconst2))); - } - - /* Return true if replicating VAL32 is a valid 2-byte or 4-byte immediate -@@ -14607,6 +16461,77 @@ aarch64_sve_valid_immediate (unsigned HOST_WIDE_INT val64, - return false; - } - -+/* Return true if X is an UNSPEC_PTRUE constant of the form: -+ -+ (const (unspec [PATTERN ZERO] UNSPEC_PTRUE)) -+ -+ where PATTERN is the svpattern as a CONST_INT and where ZERO -+ is a zero constant of the required PTRUE mode (which can have -+ fewer elements than X's mode, if zero bits are significant). -+ -+ If so, and if INFO is nonnull, describe the immediate in INFO. */ -+bool -+aarch64_sve_ptrue_svpattern_p (rtx x, struct simd_immediate_info *info) -+{ -+ if (GET_CODE (x) != CONST) -+ return false; -+ -+ x = XEXP (x, 0); -+ if (GET_CODE (x) != UNSPEC || XINT (x, 1) != UNSPEC_PTRUE) -+ return false; -+ -+ if (info) -+ { -+ aarch64_svpattern pattern -+ = (aarch64_svpattern) INTVAL (XVECEXP (x, 0, 0)); -+ machine_mode pred_mode = GET_MODE (XVECEXP (x, 0, 1)); -+ scalar_int_mode int_mode = aarch64_sve_element_int_mode (pred_mode); -+ *info = simd_immediate_info (int_mode, pattern); -+ } -+ return true; -+} -+ -+/* Return true if X is a valid SVE predicate. If INFO is nonnull, use -+ it to describe valid immediates. */ -+ -+static bool -+aarch64_sve_pred_valid_immediate (rtx x, simd_immediate_info *info) -+{ -+ if (aarch64_sve_ptrue_svpattern_p (x, info)) -+ return true; -+ -+ if (x == CONST0_RTX (GET_MODE (x))) -+ { -+ if (info) -+ *info = simd_immediate_info (DImode, 0); -+ return true; -+ } -+ -+ /* Analyze the value as a VNx16BImode. This should be relatively -+ efficient, since rtx_vector_builder has enough built-in capacity -+ to store all VLA predicate constants without needing the heap. */ -+ rtx_vector_builder builder; -+ if (!aarch64_get_sve_pred_bits (builder, x)) -+ return false; -+ -+ unsigned int elt_size = aarch64_widest_sve_pred_elt_size (builder); -+ if (int vl = aarch64_partial_ptrue_length (builder, elt_size)) -+ { -+ machine_mode mode = aarch64_sve_pred_mode (elt_size).require (); -+ aarch64_svpattern pattern = aarch64_svpattern_for_vl (mode, vl); -+ if (pattern != AARCH64_NUM_SVPATTERNS) -+ { -+ if (info) -+ { -+ scalar_int_mode int_mode = aarch64_sve_element_int_mode (mode); -+ *info = simd_immediate_info (int_mode, pattern); -+ } -+ return true; -+ } -+ } -+ return false; -+} -+ - /* Return true if OP is a valid SIMD immediate for the operation - described by WHICH. If INFO is nonnull, use it to describe valid - immediates. */ -@@ -14619,6 +16544,9 @@ aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info, - if (vec_flags == 0 || vec_flags == (VEC_ADVSIMD | VEC_STRUCT)) - return false; - -+ if (vec_flags & VEC_SVE_PRED) -+ return aarch64_sve_pred_valid_immediate (op, info); -+ - scalar_mode elt_mode = GET_MODE_INNER (mode); - rtx base, step; - unsigned int n_elts; -@@ -14643,11 +16571,6 @@ aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info, - else - return false; - -- /* Handle PFALSE and PTRUE. */ -- if (vec_flags & VEC_SVE_PRED) -- return (op == CONST0_RTX (mode) -- || op == CONSTM1_RTX (mode)); -- - scalar_float_mode elt_float_mode; - if (n_elts == 1 - && is_a (elt_mode, &elt_float_mode)) -@@ -14731,11 +16654,14 @@ aarch64_check_zero_based_sve_index_immediate (rtx x) - bool - aarch64_simd_shift_imm_p (rtx x, machine_mode mode, bool left) - { -+ x = unwrap_const_vec_duplicate (x); -+ if (!CONST_INT_P (x)) -+ return false; - int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT; - if (left) -- return aarch64_const_vec_all_same_in_range_p (x, 0, bit_width - 1); -+ return IN_RANGE (INTVAL (x), 0, bit_width - 1); - else -- return aarch64_const_vec_all_same_in_range_p (x, 1, bit_width); -+ return IN_RANGE (INTVAL (x), 1, bit_width); - } - - /* Return the bitmask CONST_INT to select the bits required by a zero extract -@@ -14763,7 +16689,17 @@ aarch64_mov_operand_p (rtx x, machine_mode mode) - return true; - - if (VECTOR_MODE_P (GET_MODE (x))) -- return aarch64_simd_valid_immediate (x, NULL); -+ { -+ /* Require predicate constants to be VNx16BI before RA, so that we -+ force everything to have a canonical form. */ -+ if (!lra_in_progress -+ && !reload_completed -+ && GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_BOOL -+ && GET_MODE (x) != VNx16BImode) -+ return false; -+ -+ return aarch64_simd_valid_immediate (x, NULL); -+ } - - if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x)) - return true; -@@ -14953,6 +16889,72 @@ aarch64_sve_ld1r_operand_p (rtx op) - && offset_6bit_unsigned_scaled_p (mode, addr.const_offset)); - } - -+/* Return true if OP is a valid MEM operand for an SVE LD1R{Q,O} instruction -+ where the size of the read data is specified by `mode` and the size of the -+ vector elements are specified by `elem_mode`. */ -+bool -+aarch64_sve_ld1rq_ld1ro_operand_p (rtx op, machine_mode mode, -+ scalar_mode elem_mode) -+{ -+ struct aarch64_address_info addr; -+ if (!MEM_P (op) -+ || !aarch64_classify_address (&addr, XEXP (op, 0), elem_mode, false)) -+ return false; -+ -+ if (addr.type == ADDRESS_REG_IMM) -+ return offset_4bit_signed_scaled_p (mode, addr.const_offset); -+ -+ if (addr.type == ADDRESS_REG_REG) -+ return (1U << addr.shift) == GET_MODE_SIZE (elem_mode); -+ -+ return false; -+} -+ -+/* Return true if OP is a valid MEM operand for an SVE LD1RQ instruction. */ -+bool -+aarch64_sve_ld1rq_operand_p (rtx op) -+{ -+ return aarch64_sve_ld1rq_ld1ro_operand_p (op, TImode, -+ GET_MODE_INNER (GET_MODE (op))); -+} -+ -+/* Return true if OP is a valid MEM operand for an SVE LD1RO instruction for -+ accessing a vector where the element size is specified by `elem_mode`. */ -+bool -+aarch64_sve_ld1ro_operand_p (rtx op, scalar_mode elem_mode) -+{ -+ return aarch64_sve_ld1rq_ld1ro_operand_p (op, OImode, elem_mode); -+} -+ -+/* Return true if OP is a valid MEM operand for an SVE LDFF1 instruction. */ -+bool -+aarch64_sve_ldff1_operand_p (rtx op) -+{ -+ if (!MEM_P (op)) -+ return false; -+ -+ struct aarch64_address_info addr; -+ if (!aarch64_classify_address (&addr, XEXP (op, 0), GET_MODE (op), false)) -+ return false; -+ -+ if (addr.type == ADDRESS_REG_IMM) -+ return known_eq (addr.const_offset, 0); -+ -+ return addr.type == ADDRESS_REG_REG; -+} -+ -+/* Return true if OP is a valid MEM operand for an SVE LDNF1 instruction. */ -+bool -+aarch64_sve_ldnf1_operand_p (rtx op) -+{ -+ struct aarch64_address_info addr; -+ -+ return (MEM_P (op) -+ && aarch64_classify_address (&addr, XEXP (op, 0), -+ GET_MODE (op), false) -+ && addr.type == ADDRESS_REG_IMM); -+} -+ - /* Return true if OP is a valid MEM operand for an SVE LDR instruction. - The conditions for STR are the same. */ - bool -@@ -14966,6 +16968,21 @@ aarch64_sve_ldr_operand_p (rtx op) - && addr.type == ADDRESS_REG_IMM); - } - -+/* Return true if OP is a valid address for an SVE PRF[BHWD] instruction, -+ addressing memory of mode MODE. */ -+bool -+aarch64_sve_prefetch_operand_p (rtx op, machine_mode mode) -+{ -+ struct aarch64_address_info addr; -+ if (!aarch64_classify_address (&addr, op, mode, false)) -+ return false; -+ -+ if (addr.type == ADDRESS_REG_IMM) -+ return known_eq (addr.const_offset, 0); -+ -+ return addr.type == ADDRESS_REG_REG; -+} -+ - /* Return true if OP is a valid MEM operand for an SVE_STRUCT mode. - We need to be able to access the individual pieces, so the range - is different from LD[234] and ST[234]. */ -@@ -15027,11 +17044,13 @@ aarch64_simd_attr_length_rglist (machine_mode mode) - static HOST_WIDE_INT - aarch64_simd_vector_alignment (const_tree type) - { -+ /* ??? Checking the mode isn't ideal, but VECTOR_BOOLEAN_TYPE_P can -+ be set for non-predicate vectors of booleans. Modes are the most -+ direct way we have of identifying real SVE predicate types. */ -+ if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_VECTOR_BOOL) -+ return 16; - if (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST) -- /* ??? Checking the mode isn't ideal, but VECTOR_BOOLEAN_TYPE_P can -- be set for non-predicate vectors of booleans. Modes are the most -- direct way we have of identifying real SVE predicate types. */ -- return GET_MODE_CLASS (TYPE_MODE (type)) == MODE_VECTOR_BOOL ? 16 : 128; -+ return 128; - return wi::umin (wi::to_wide (TYPE_SIZE (type)), 128).to_uhwi (); - } - -@@ -15361,34 +17380,383 @@ aarch64_expand_vector_init (rtx target, rtx vals) - (see aarch64_simd_valid_immediate). */ - for (int i = 0; i < n_elts; i++) - { -- rtx x = XVECEXP (vals, 0, i); -- if (CONST_INT_P (x) || CONST_DOUBLE_P (x)) -- continue; -- rtx subst = any_const; -- for (int bit = n_elts / 2; bit > 0; bit /= 2) -- { -- /* Look in the copied vector, as more elements are const. */ -- rtx test = XVECEXP (copy, 0, i ^ bit); -- if (CONST_INT_P (test) || CONST_DOUBLE_P (test)) -- { -- subst = test; -- break; -- } -- } -- XVECEXP (copy, 0, i) = subst; -+ rtx x = XVECEXP (vals, 0, i); -+ if (CONST_INT_P (x) || CONST_DOUBLE_P (x)) -+ continue; -+ rtx subst = any_const; -+ for (int bit = n_elts / 2; bit > 0; bit /= 2) -+ { -+ /* Look in the copied vector, as more elements are const. */ -+ rtx test = XVECEXP (copy, 0, i ^ bit); -+ if (CONST_INT_P (test) || CONST_DOUBLE_P (test)) -+ { -+ subst = test; -+ break; -+ } -+ } -+ XVECEXP (copy, 0, i) = subst; -+ } -+ aarch64_expand_vector_init (target, copy); -+ } -+ -+ /* Insert the variable lanes directly. */ -+ for (int i = 0; i < n_elts; i++) -+ { -+ rtx x = XVECEXP (vals, 0, i); -+ if (CONST_INT_P (x) || CONST_DOUBLE_P (x)) -+ continue; -+ x = copy_to_mode_reg (inner_mode, x); -+ emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i))); -+ } -+} -+ -+/* Emit RTL corresponding to: -+ insr TARGET, ELEM. */ -+ -+static void -+emit_insr (rtx target, rtx elem) -+{ -+ machine_mode mode = GET_MODE (target); -+ scalar_mode elem_mode = GET_MODE_INNER (mode); -+ elem = force_reg (elem_mode, elem); -+ -+ insn_code icode = optab_handler (vec_shl_insert_optab, mode); -+ gcc_assert (icode != CODE_FOR_nothing); -+ emit_insn (GEN_FCN (icode) (target, target, elem)); -+} -+ -+/* Subroutine of aarch64_sve_expand_vector_init for handling -+ trailing constants. -+ This function works as follows: -+ (a) Create a new vector consisting of trailing constants. -+ (b) Initialize TARGET with the constant vector using emit_move_insn. -+ (c) Insert remaining elements in TARGET using insr. -+ NELTS is the total number of elements in original vector while -+ while NELTS_REQD is the number of elements that are actually -+ significant. -+ -+ ??? The heuristic used is to do above only if number of constants -+ is at least half the total number of elements. May need fine tuning. */ -+ -+static bool -+aarch64_sve_expand_vector_init_handle_trailing_constants -+ (rtx target, const rtx_vector_builder &builder, int nelts, int nelts_reqd) -+{ -+ machine_mode mode = GET_MODE (target); -+ scalar_mode elem_mode = GET_MODE_INNER (mode); -+ int n_trailing_constants = 0; -+ -+ for (int i = nelts_reqd - 1; -+ i >= 0 && aarch64_legitimate_constant_p (elem_mode, builder.elt (i)); -+ i--) -+ n_trailing_constants++; -+ -+ if (n_trailing_constants >= nelts_reqd / 2) -+ { -+ rtx_vector_builder v (mode, 1, nelts); -+ for (int i = 0; i < nelts; i++) -+ v.quick_push (builder.elt (i + nelts_reqd - n_trailing_constants)); -+ rtx const_vec = v.build (); -+ emit_move_insn (target, const_vec); -+ -+ for (int i = nelts_reqd - n_trailing_constants - 1; i >= 0; i--) -+ emit_insr (target, builder.elt (i)); -+ -+ return true; -+ } -+ -+ return false; -+} -+ -+/* Subroutine of aarch64_sve_expand_vector_init. -+ Works as follows: -+ (a) Initialize TARGET by broadcasting element NELTS_REQD - 1 of BUILDER. -+ (b) Skip trailing elements from BUILDER, which are the same as -+ element NELTS_REQD - 1. -+ (c) Insert earlier elements in reverse order in TARGET using insr. */ -+ -+static void -+aarch64_sve_expand_vector_init_insert_elems (rtx target, -+ const rtx_vector_builder &builder, -+ int nelts_reqd) -+{ -+ machine_mode mode = GET_MODE (target); -+ scalar_mode elem_mode = GET_MODE_INNER (mode); -+ -+ struct expand_operand ops[2]; -+ enum insn_code icode = optab_handler (vec_duplicate_optab, mode); -+ gcc_assert (icode != CODE_FOR_nothing); -+ -+ create_output_operand (&ops[0], target, mode); -+ create_input_operand (&ops[1], builder.elt (nelts_reqd - 1), elem_mode); -+ expand_insn (icode, 2, ops); -+ -+ int ndups = builder.count_dups (nelts_reqd - 1, -1, -1); -+ for (int i = nelts_reqd - ndups - 1; i >= 0; i--) -+ emit_insr (target, builder.elt (i)); -+} -+ -+/* Subroutine of aarch64_sve_expand_vector_init to handle case -+ when all trailing elements of builder are same. -+ This works as follows: -+ (a) Use expand_insn interface to broadcast last vector element in TARGET. -+ (b) Insert remaining elements in TARGET using insr. -+ -+ ??? The heuristic used is to do above if number of same trailing elements -+ is at least 3/4 of total number of elements, loosely based on -+ heuristic from mostly_zeros_p. May need fine-tuning. */ -+ -+static bool -+aarch64_sve_expand_vector_init_handle_trailing_same_elem -+ (rtx target, const rtx_vector_builder &builder, int nelts_reqd) -+{ -+ int ndups = builder.count_dups (nelts_reqd - 1, -1, -1); -+ if (ndups >= (3 * nelts_reqd) / 4) -+ { -+ aarch64_sve_expand_vector_init_insert_elems (target, builder, -+ nelts_reqd - ndups + 1); -+ return true; -+ } -+ -+ return false; -+} -+ -+/* Initialize register TARGET from BUILDER. NELTS is the constant number -+ of elements in BUILDER. -+ -+ The function tries to initialize TARGET from BUILDER if it fits one -+ of the special cases outlined below. -+ -+ Failing that, the function divides BUILDER into two sub-vectors: -+ v_even = even elements of BUILDER; -+ v_odd = odd elements of BUILDER; -+ -+ and recursively calls itself with v_even and v_odd. -+ -+ if (recursive call succeeded for v_even or v_odd) -+ TARGET = zip (v_even, v_odd) -+ -+ The function returns true if it managed to build TARGET from BUILDER -+ with one of the special cases, false otherwise. -+ -+ Example: {a, 1, b, 2, c, 3, d, 4} -+ -+ The vector gets divided into: -+ v_even = {a, b, c, d} -+ v_odd = {1, 2, 3, 4} -+ -+ aarch64_sve_expand_vector_init(v_odd) hits case 1 and -+ initialize tmp2 from constant vector v_odd using emit_move_insn. -+ -+ aarch64_sve_expand_vector_init(v_even) fails since v_even contains -+ 4 elements, so we construct tmp1 from v_even using insr: -+ tmp1 = dup(d) -+ insr tmp1, c -+ insr tmp1, b -+ insr tmp1, a -+ -+ And finally: -+ TARGET = zip (tmp1, tmp2) -+ which sets TARGET to {a, 1, b, 2, c, 3, d, 4}. */ -+ -+static bool -+aarch64_sve_expand_vector_init (rtx target, const rtx_vector_builder &builder, -+ int nelts, int nelts_reqd) -+{ -+ machine_mode mode = GET_MODE (target); -+ -+ /* Case 1: Vector contains trailing constants. */ -+ -+ if (aarch64_sve_expand_vector_init_handle_trailing_constants -+ (target, builder, nelts, nelts_reqd)) -+ return true; -+ -+ /* Case 2: Vector contains leading constants. */ -+ -+ rtx_vector_builder rev_builder (mode, 1, nelts_reqd); -+ for (int i = 0; i < nelts_reqd; i++) -+ rev_builder.quick_push (builder.elt (nelts_reqd - i - 1)); -+ rev_builder.finalize (); -+ -+ if (aarch64_sve_expand_vector_init_handle_trailing_constants -+ (target, rev_builder, nelts, nelts_reqd)) -+ { -+ emit_insn (gen_aarch64_sve_rev (mode, target, target)); -+ return true; -+ } -+ -+ /* Case 3: Vector contains trailing same element. */ -+ -+ if (aarch64_sve_expand_vector_init_handle_trailing_same_elem -+ (target, builder, nelts_reqd)) -+ return true; -+ -+ /* Case 4: Vector contains leading same element. */ -+ -+ if (aarch64_sve_expand_vector_init_handle_trailing_same_elem -+ (target, rev_builder, nelts_reqd) && nelts_reqd == nelts) -+ { -+ emit_insn (gen_aarch64_sve_rev (mode, target, target)); -+ return true; -+ } -+ -+ /* Avoid recursing below 4-elements. -+ ??? The threshold 4 may need fine-tuning. */ -+ -+ if (nelts_reqd <= 4) -+ return false; -+ -+ rtx_vector_builder v_even (mode, 1, nelts); -+ rtx_vector_builder v_odd (mode, 1, nelts); -+ -+ for (int i = 0; i < nelts * 2; i += 2) -+ { -+ v_even.quick_push (builder.elt (i)); -+ v_odd.quick_push (builder.elt (i + 1)); -+ } -+ -+ v_even.finalize (); -+ v_odd.finalize (); -+ -+ rtx tmp1 = gen_reg_rtx (mode); -+ bool did_even_p = aarch64_sve_expand_vector_init (tmp1, v_even, -+ nelts, nelts_reqd / 2); -+ -+ rtx tmp2 = gen_reg_rtx (mode); -+ bool did_odd_p = aarch64_sve_expand_vector_init (tmp2, v_odd, -+ nelts, nelts_reqd / 2); -+ -+ if (!did_even_p && !did_odd_p) -+ return false; -+ -+ /* Initialize v_even and v_odd using INSR if it didn't match any of the -+ special cases and zip v_even, v_odd. */ -+ -+ if (!did_even_p) -+ aarch64_sve_expand_vector_init_insert_elems (tmp1, v_even, nelts_reqd / 2); -+ -+ if (!did_odd_p) -+ aarch64_sve_expand_vector_init_insert_elems (tmp2, v_odd, nelts_reqd / 2); -+ -+ rtvec v = gen_rtvec (2, tmp1, tmp2); -+ emit_set_insn (target, gen_rtx_UNSPEC (mode, v, UNSPEC_ZIP1)); -+ return true; -+} -+ -+/* Initialize register TARGET from the elements in PARALLEL rtx VALS. */ -+ -+void -+aarch64_sve_expand_vector_init (rtx target, rtx vals) -+{ -+ machine_mode mode = GET_MODE (target); -+ int nelts = XVECLEN (vals, 0); -+ -+ rtx_vector_builder v (mode, 1, nelts); -+ for (int i = 0; i < nelts; i++) -+ v.quick_push (XVECEXP (vals, 0, i)); -+ v.finalize (); -+ -+ /* If neither sub-vectors of v could be initialized specially, -+ then use INSR to insert all elements from v into TARGET. -+ ??? This might not be optimal for vectors with large -+ initializers like 16-element or above. -+ For nelts < 4, it probably isn't useful to handle specially. */ -+ -+ if (nelts < 4 -+ || !aarch64_sve_expand_vector_init (target, v, nelts, nelts)) -+ aarch64_sve_expand_vector_init_insert_elems (target, v, nelts); -+} -+ -+/* Check whether VALUE is a vector constant in which every element -+ is either a power of 2 or a negated power of 2. If so, return -+ a constant vector of log2s, and flip CODE between PLUS and MINUS -+ if VALUE contains negated powers of 2. Return NULL_RTX otherwise. */ -+ -+static rtx -+aarch64_convert_mult_to_shift (rtx value, rtx_code &code) -+{ -+ if (GET_CODE (value) != CONST_VECTOR) -+ return NULL_RTX; -+ -+ rtx_vector_builder builder; -+ if (!builder.new_unary_operation (GET_MODE (value), value, false)) -+ return NULL_RTX; -+ -+ scalar_mode int_mode = GET_MODE_INNER (GET_MODE (value)); -+ /* 1 if the result of the multiplication must be negated, -+ 0 if it mustn't, or -1 if we don't yet care. */ -+ int negate = -1; -+ unsigned int encoded_nelts = const_vector_encoded_nelts (value); -+ for (unsigned int i = 0; i < encoded_nelts; ++i) -+ { -+ rtx elt = CONST_VECTOR_ENCODED_ELT (value, i); -+ if (!CONST_SCALAR_INT_P (elt)) -+ return NULL_RTX; -+ rtx_mode_t val (elt, int_mode); -+ wide_int pow2 = wi::neg (val); -+ if (val != pow2) -+ { -+ /* It matters whether we negate or not. Make that choice, -+ and make sure that it's consistent with previous elements. */ -+ if (negate == !wi::neg_p (val)) -+ return NULL_RTX; -+ negate = wi::neg_p (val); -+ if (!negate) -+ pow2 = val; - } -- aarch64_expand_vector_init (target, copy); -+ /* POW2 is now the value that we want to be a power of 2. */ -+ int shift = wi::exact_log2 (pow2); -+ if (shift < 0) -+ return NULL_RTX; -+ builder.quick_push (gen_int_mode (shift, int_mode)); -+ } -+ if (negate == -1) -+ /* PLUS and MINUS are equivalent; canonicalize on PLUS. */ -+ code = PLUS; -+ else if (negate == 1) -+ code = code == PLUS ? MINUS : PLUS; -+ return builder.build (); -+} -+ -+/* Prepare for an integer SVE multiply-add or multiply-subtract pattern; -+ CODE is PLUS for the former and MINUS for the latter. OPERANDS is the -+ operands array, in the same order as for fma_optab. Return true if -+ the function emitted all the necessary instructions, false if the caller -+ should generate the pattern normally with the new OPERANDS array. */ -+ -+bool -+aarch64_prepare_sve_int_fma (rtx *operands, rtx_code code) -+{ -+ machine_mode mode = GET_MODE (operands[0]); -+ if (rtx shifts = aarch64_convert_mult_to_shift (operands[2], code)) -+ { -+ rtx product = expand_binop (mode, vashl_optab, operands[1], shifts, -+ NULL_RTX, true, OPTAB_DIRECT); -+ force_expand_binop (mode, code == PLUS ? add_optab : sub_optab, -+ operands[3], product, operands[0], true, -+ OPTAB_DIRECT); -+ return true; - } -+ operands[2] = force_reg (mode, operands[2]); -+ return false; -+} - -- /* Insert the variable lanes directly. */ -- for (int i = 0; i < n_elts; i++) -+/* Likewise, but for a conditional pattern. */ -+ -+bool -+aarch64_prepare_sve_cond_int_fma (rtx *operands, rtx_code code) -+{ -+ machine_mode mode = GET_MODE (operands[0]); -+ if (rtx shifts = aarch64_convert_mult_to_shift (operands[3], code)) - { -- rtx x = XVECEXP (vals, 0, i); -- if (CONST_INT_P (x) || CONST_DOUBLE_P (x)) -- continue; -- x = copy_to_mode_reg (inner_mode, x); -- emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i))); -+ rtx product = expand_binop (mode, vashl_optab, operands[2], shifts, -+ NULL_RTX, true, OPTAB_DIRECT); -+ emit_insn (gen_cond (code, mode, operands[0], operands[1], -+ operands[4], product, operands[5])); -+ return true; - } -+ operands[3] = force_reg (mode, operands[3]); -+ return false; - } - - static unsigned HOST_WIDE_INT -@@ -15428,11 +17796,15 @@ aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global) - static void - aarch64_asm_output_variant_pcs (FILE *stream, const tree decl, const char* name) - { -- if (aarch64_simd_decl_p (decl)) -+ if (TREE_CODE (decl) == FUNCTION_DECL) - { -- fprintf (stream, "\t.variant_pcs\t"); -- assemble_name (stream, name); -- fprintf (stream, "\n"); -+ arm_pcs pcs = (arm_pcs) fndecl_abi (decl).id (); -+ if (pcs == ARM_PCS_SIMD || pcs == ARM_PCS_SVE) -+ { -+ fprintf (stream, "\t.variant_pcs\t"); -+ assemble_name (stream, name); -+ fprintf (stream, "\n"); -+ } - } - } - -@@ -15459,7 +17831,7 @@ aarch64_declare_function_name (FILE *stream, const char* name, - const struct processor *this_arch - = aarch64_get_arch (targ_options->x_explicit_arch); - -- unsigned long isa_flags = targ_options->x_aarch64_isa_flags; -+ uint64_t isa_flags = targ_options->x_aarch64_isa_flags; - std::string extension - = aarch64_get_extension_string_for_isa_flags (isa_flags, - this_arch->flags); -@@ -15541,6 +17913,18 @@ aarch64_asm_output_external (FILE *stream, tree decl, const char* name) - aarch64_asm_output_variant_pcs (stream, decl, name); - } - -+/* Triggered after a .cfi_startproc directive is emitted into the assembly file. -+ Used to output the .cfi_b_key_frame directive when signing the current -+ function with the B key. */ -+ -+void -+aarch64_post_cfi_startproc (FILE *f, tree ignored ATTRIBUTE_UNUSED) -+{ -+ if (cfun->machine->frame.laid_out && aarch64_return_address_signing_enabled () -+ && aarch64_ra_sign_key == AARCH64_KEY_B) -+ asm_fprintf (f, "\t.cfi_b_key_frame\n"); -+} -+ - /* Implements TARGET_ASM_FILE_START. Output the assembly header. */ - - static void -@@ -15551,7 +17935,7 @@ aarch64_start_file (void) - - const struct processor *default_arch - = aarch64_get_arch (default_options->x_explicit_arch); -- unsigned long default_isa_flags = default_options->x_aarch64_isa_flags; -+ uint64_t default_isa_flags = default_options->x_aarch64_isa_flags; - std::string extension - = aarch64_get_extension_string_for_isa_flags (default_isa_flags, - default_arch->flags); -@@ -15570,16 +17954,26 @@ static void - aarch64_emit_load_exclusive (machine_mode mode, rtx rval, - rtx mem, rtx model_rtx) - { -- emit_insn (gen_aarch64_load_exclusive (mode, rval, mem, model_rtx)); -+ if (mode == TImode) -+ emit_insn (gen_aarch64_load_exclusive_pair (gen_lowpart (DImode, rval), -+ gen_highpart (DImode, rval), -+ mem, model_rtx)); -+ else -+ emit_insn (gen_aarch64_load_exclusive (mode, rval, mem, model_rtx)); - } - - /* Emit store exclusive. */ - - static void - aarch64_emit_store_exclusive (machine_mode mode, rtx bval, -- rtx rval, rtx mem, rtx model_rtx) -+ rtx mem, rtx rval, rtx model_rtx) - { -- emit_insn (gen_aarch64_store_exclusive (mode, bval, rval, mem, model_rtx)); -+ if (mode == TImode) -+ emit_insn (gen_aarch64_store_exclusive_pair -+ (bval, mem, operand_subword (rval, 0, 0, TImode), -+ operand_subword (rval, 1, 0, TImode), model_rtx)); -+ else -+ emit_insn (gen_aarch64_store_exclusive (mode, bval, mem, rval, model_rtx)); - } - - /* Mark the previous jump instruction as unlikely. */ -@@ -15591,6 +17985,82 @@ aarch64_emit_unlikely_jump (rtx insn) - add_reg_br_prob_note (jump, profile_probability::very_unlikely ()); - } - -+/* We store the names of the various atomic helpers in a 5x4 array. -+ Return the libcall function given MODE, MODEL and NAMES. */ -+ -+rtx -+aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx, -+ const atomic_ool_names *names) -+{ -+ memmodel model = memmodel_base (INTVAL (model_rtx)); -+ int mode_idx, model_idx; -+ -+ switch (mode) -+ { -+ case E_QImode: -+ mode_idx = 0; -+ break; -+ case E_HImode: -+ mode_idx = 1; -+ break; -+ case E_SImode: -+ mode_idx = 2; -+ break; -+ case E_DImode: -+ mode_idx = 3; -+ break; -+ case E_TImode: -+ mode_idx = 4; -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ -+ switch (model) -+ { -+ case MEMMODEL_RELAXED: -+ model_idx = 0; -+ break; -+ case MEMMODEL_CONSUME: -+ case MEMMODEL_ACQUIRE: -+ model_idx = 1; -+ break; -+ case MEMMODEL_RELEASE: -+ model_idx = 2; -+ break; -+ case MEMMODEL_ACQ_REL: -+ case MEMMODEL_SEQ_CST: -+ model_idx = 3; -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ -+ return init_one_libfunc_visibility (names->str[mode_idx][model_idx], -+ VISIBILITY_HIDDEN); -+} -+ -+#define DEF0(B, N) \ -+ { "__aarch64_" #B #N "_relax", \ -+ "__aarch64_" #B #N "_acq", \ -+ "__aarch64_" #B #N "_rel", \ -+ "__aarch64_" #B #N "_acq_rel" } -+ -+#define DEF4(B) DEF0(B, 1), DEF0(B, 2), DEF0(B, 4), DEF0(B, 8), \ -+ { NULL, NULL, NULL, NULL } -+#define DEF5(B) DEF0(B, 1), DEF0(B, 2), DEF0(B, 4), DEF0(B, 8), DEF0(B, 16) -+ -+static const atomic_ool_names aarch64_ool_cas_names = { { DEF5(cas) } }; -+const atomic_ool_names aarch64_ool_swp_names = { { DEF4(swp) } }; -+const atomic_ool_names aarch64_ool_ldadd_names = { { DEF4(ldadd) } }; -+const atomic_ool_names aarch64_ool_ldset_names = { { DEF4(ldset) } }; -+const atomic_ool_names aarch64_ool_ldclr_names = { { DEF4(ldclr) } }; -+const atomic_ool_names aarch64_ool_ldeor_names = { { DEF4(ldeor) } }; -+ -+#undef DEF0 -+#undef DEF4 -+#undef DEF5 -+ - /* Expand a compare and swap pattern. */ - - void -@@ -15637,6 +18107,17 @@ aarch64_expand_compare_and_swap (rtx operands[]) - newval, mod_s)); - cc_reg = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode); - } -+ else if (TARGET_OUTLINE_ATOMICS) -+ { -+ /* Oldval must satisfy compare afterward. */ -+ if (!aarch64_plus_operand (oldval, mode)) -+ oldval = force_reg (mode, oldval); -+ rtx func = aarch64_atomic_ool_func (mode, mod_s, &aarch64_ool_cas_names); -+ rval = emit_library_call_value (func, NULL_RTX, LCT_NORMAL, r_mode, -+ oldval, mode, newval, mode, -+ XEXP (mem, 0), Pmode); -+ cc_reg = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode); -+ } - else - { - /* The oldval predicate varies by mode. Test it and force to reg. */ -@@ -15682,13 +18163,11 @@ aarch64_split_compare_and_swap (rtx operands[]) - /* Split after prolog/epilog to avoid interactions with shrinkwrapping. */ - gcc_assert (epilogue_completed); - -- rtx rval, mem, oldval, newval, scratch; -+ rtx rval, mem, oldval, newval, scratch, x, model_rtx; - machine_mode mode; - bool is_weak; - rtx_code_label *label1, *label2; -- rtx x, cond; - enum memmodel model; -- rtx model_rtx; - - rval = operands[0]; - mem = operands[1]; -@@ -15709,7 +18188,8 @@ aarch64_split_compare_and_swap (rtx operands[]) - CBNZ scratch, .label1 - .label2: - CMP rval, 0. */ -- bool strong_zero_p = !is_weak && oldval == const0_rtx; -+ bool strong_zero_p = (!is_weak && !aarch64_track_speculation && -+ oldval == const0_rtx && mode != TImode); - - label1 = NULL; - if (!is_weak) -@@ -15722,35 +18202,20 @@ aarch64_split_compare_and_swap (rtx operands[]) - /* The initial load can be relaxed for a __sync operation since a final - barrier will be emitted to stop code hoisting. */ - if (is_mm_sync (model)) -- aarch64_emit_load_exclusive (mode, rval, mem, -- GEN_INT (MEMMODEL_RELAXED)); -+ aarch64_emit_load_exclusive (mode, rval, mem, GEN_INT (MEMMODEL_RELAXED)); - else - aarch64_emit_load_exclusive (mode, rval, mem, model_rtx); - - if (strong_zero_p) -- { -- if (aarch64_track_speculation) -- { -- /* Emit an explicit compare instruction, so that we can correctly -- track the condition codes. */ -- rtx cc_reg = aarch64_gen_compare_reg (NE, rval, const0_rtx); -- x = gen_rtx_NE (GET_MODE (cc_reg), cc_reg, const0_rtx); -- } -- else -- x = gen_rtx_NE (VOIDmode, rval, const0_rtx); -- -- x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, -- gen_rtx_LABEL_REF (Pmode, label2), pc_rtx); -- aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x)); -- } -+ x = gen_rtx_NE (VOIDmode, rval, const0_rtx); - else - { -- cond = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode); -- x = gen_rtx_NE (VOIDmode, cond, const0_rtx); -- x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, -- gen_rtx_LABEL_REF (Pmode, label2), pc_rtx); -- aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x)); -+ rtx cc_reg = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode); -+ x = gen_rtx_NE (VOIDmode, cc_reg, const0_rtx); - } -+ x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, -+ gen_rtx_LABEL_REF (Pmode, label2), pc_rtx); -+ aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x)); - - aarch64_emit_store_exclusive (mode, scratch, mem, newval, model_rtx); - -@@ -15771,22 +18236,16 @@ aarch64_split_compare_and_swap (rtx operands[]) - aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x)); - } - else -- { -- cond = gen_rtx_REG (CCmode, CC_REGNUM); -- x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx); -- emit_insn (gen_rtx_SET (cond, x)); -- } -+ aarch64_gen_compare_reg (NE, scratch, const0_rtx); - - emit_label (label2); -+ - /* If we used a CBNZ in the exchange loop emit an explicit compare with RVAL - to set the condition flags. If this is not used it will be removed by - later passes. */ - if (strong_zero_p) -- { -- cond = gen_rtx_REG (CCmode, CC_REGNUM); -- x = gen_rtx_COMPARE (CCmode, rval, const0_rtx); -- emit_insn (gen_rtx_SET (cond, x)); -- } -+ aarch64_gen_compare_reg (NE, rval, const0_rtx); -+ - /* Emit any final barrier needed for a __sync operation. */ - if (is_mm_sync (model)) - aarch64_emit_post_barrier (model); -@@ -15939,6 +18398,7 @@ aarch64_float_const_representable_p (rtx x) - REAL_VALUE_TYPE r, m; - bool fail; - -+ x = unwrap_const_vec_duplicate (x); - if (!CONST_DOUBLE_P (x)) - return false; - -@@ -16034,17 +18494,18 @@ aarch64_output_simd_mov_immediate (rtx const_vector, unsigned width, - - if (GET_MODE_CLASS (info.elt_mode) == MODE_FLOAT) - { -- gcc_assert (info.shift == 0 && info.insn == simd_immediate_info::MOV); -+ gcc_assert (info.insn == simd_immediate_info::MOV -+ && info.u.mov.shift == 0); - /* For FP zero change it to a CONST_INT 0 and use the integer SIMD - move immediate path. */ -- if (aarch64_float_const_zero_rtx_p (info.value)) -- info.value = GEN_INT (0); -+ if (aarch64_float_const_zero_rtx_p (info.u.mov.value)) -+ info.u.mov.value = GEN_INT (0); - else - { - const unsigned int buf_size = 20; - char float_buf[buf_size] = {'\0'}; - real_to_decimal_for_mode (float_buf, -- CONST_DOUBLE_REAL_VALUE (info.value), -+ CONST_DOUBLE_REAL_VALUE (info.u.mov.value), - buf_size, buf_size, 1, info.elt_mode); - - if (lane_count == 1) -@@ -16056,36 +18517,39 @@ aarch64_output_simd_mov_immediate (rtx const_vector, unsigned width, - } - } - -- gcc_assert (CONST_INT_P (info.value)); -+ gcc_assert (CONST_INT_P (info.u.mov.value)); - - if (which == AARCH64_CHECK_MOV) - { - mnemonic = info.insn == simd_immediate_info::MVN ? "mvni" : "movi"; -- shift_op = info.modifier == simd_immediate_info::MSL ? "msl" : "lsl"; -+ shift_op = (info.u.mov.modifier == simd_immediate_info::MSL -+ ? "msl" : "lsl"); - if (lane_count == 1) - snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX, -- mnemonic, UINTVAL (info.value)); -- else if (info.shift) -+ mnemonic, UINTVAL (info.u.mov.value)); -+ else if (info.u.mov.shift) - snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " - HOST_WIDE_INT_PRINT_HEX ", %s %d", mnemonic, lane_count, -- element_char, UINTVAL (info.value), shift_op, info.shift); -+ element_char, UINTVAL (info.u.mov.value), shift_op, -+ info.u.mov.shift); - else - snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " - HOST_WIDE_INT_PRINT_HEX, mnemonic, lane_count, -- element_char, UINTVAL (info.value)); -+ element_char, UINTVAL (info.u.mov.value)); - } - else - { - /* For AARCH64_CHECK_BIC and AARCH64_CHECK_ORR. */ - mnemonic = info.insn == simd_immediate_info::MVN ? "bic" : "orr"; -- if (info.shift) -+ if (info.u.mov.shift) - snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, #" - HOST_WIDE_INT_PRINT_DEC ", %s #%d", mnemonic, lane_count, -- element_char, UINTVAL (info.value), "lsl", info.shift); -+ element_char, UINTVAL (info.u.mov.value), "lsl", -+ info.u.mov.shift); - else - snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, #" - HOST_WIDE_INT_PRINT_DEC, mnemonic, lane_count, -- element_char, UINTVAL (info.value)); -+ element_char, UINTVAL (info.u.mov.value)); - } - return templ; - } -@@ -16129,24 +18593,49 @@ aarch64_output_sve_mov_immediate (rtx const_vector) - - element_char = sizetochar (GET_MODE_BITSIZE (info.elt_mode)); - -- if (info.step) -+ machine_mode vec_mode = GET_MODE (const_vector); -+ if (aarch64_sve_pred_mode_p (vec_mode)) -+ { -+ static char buf[sizeof ("ptrue\t%0.N, vlNNNNN")]; -+ if (info.insn == simd_immediate_info::MOV) -+ { -+ gcc_assert (info.u.mov.value == const0_rtx); -+ snprintf (buf, sizeof (buf), "pfalse\t%%0.b"); -+ } -+ else -+ { -+ gcc_assert (info.insn == simd_immediate_info::PTRUE); -+ unsigned int total_bytes; -+ if (info.u.pattern == AARCH64_SV_ALL -+ && BYTES_PER_SVE_VECTOR.is_constant (&total_bytes)) -+ snprintf (buf, sizeof (buf), "ptrue\t%%0.%c, vl%d", element_char, -+ total_bytes / GET_MODE_SIZE (info.elt_mode)); -+ else -+ snprintf (buf, sizeof (buf), "ptrue\t%%0.%c, %s", element_char, -+ svpattern_token (info.u.pattern)); -+ } -+ return buf; -+ } -+ -+ if (info.insn == simd_immediate_info::INDEX) - { - snprintf (templ, sizeof (templ), "index\t%%0.%c, #" - HOST_WIDE_INT_PRINT_DEC ", #" HOST_WIDE_INT_PRINT_DEC, -- element_char, INTVAL (info.value), INTVAL (info.step)); -+ element_char, INTVAL (info.u.index.base), -+ INTVAL (info.u.index.step)); - return templ; - } - - if (GET_MODE_CLASS (info.elt_mode) == MODE_FLOAT) - { -- if (aarch64_float_const_zero_rtx_p (info.value)) -- info.value = GEN_INT (0); -+ if (aarch64_float_const_zero_rtx_p (info.u.mov.value)) -+ info.u.mov.value = GEN_INT (0); - else - { - const int buf_size = 20; - char float_buf[buf_size] = {}; - real_to_decimal_for_mode (float_buf, -- CONST_DOUBLE_REAL_VALUE (info.value), -+ CONST_DOUBLE_REAL_VALUE (info.u.mov.value), - buf_size, buf_size, 1, info.elt_mode); - - snprintf (templ, sizeof (templ), "fmov\t%%0.%c, #%s", -@@ -16156,23 +18645,27 @@ aarch64_output_sve_mov_immediate (rtx const_vector) - } - - snprintf (templ, sizeof (templ), "mov\t%%0.%c, #" HOST_WIDE_INT_PRINT_DEC, -- element_char, INTVAL (info.value)); -+ element_char, INTVAL (info.u.mov.value)); - return templ; - } - --/* Return the asm format for a PTRUE instruction whose destination has -- mode MODE. SUFFIX is the element size suffix. */ -+/* Return the asm template for a PTRUES. CONST_UNSPEC is the -+ aarch64_sve_ptrue_svpattern_immediate that describes the predicate -+ pattern. */ - - char * --aarch64_output_ptrue (machine_mode mode, char suffix) -+aarch64_output_sve_ptrues (rtx const_unspec) - { -- unsigned int nunits; -- static char buf[sizeof ("ptrue\t%0.N, vlNNNNN")]; -- if (GET_MODE_NUNITS (mode).is_constant (&nunits)) -- snprintf (buf, sizeof (buf), "ptrue\t%%0.%c, vl%d", suffix, nunits); -- else -- snprintf (buf, sizeof (buf), "ptrue\t%%0.%c, all", suffix); -- return buf; -+ static char templ[40]; -+ -+ struct simd_immediate_info info; -+ bool is_valid = aarch64_simd_valid_immediate (const_unspec, &info); -+ gcc_assert (is_valid && info.insn == simd_immediate_info::PTRUE); -+ -+ char element_char = sizetochar (GET_MODE_BITSIZE (info.elt_mode)); -+ snprintf (templ, sizeof (templ), "ptrues\t%%0.%c, %s", element_char, -+ svpattern_token (info.u.pattern)); -+ return templ; - } - - /* Split operands into moves from op[1] + op[2] into op[0]. */ -@@ -16590,13 +19083,31 @@ aarch64_evpc_rev_local (struct expand_vec_perm_d *d) - if (d->testing_p) - return true; - -- rtx src = gen_rtx_UNSPEC (d->vmode, gen_rtvec (1, d->op0), unspec); - if (d->vec_flags == VEC_SVE_DATA) - { -- rtx pred = force_reg (pred_mode, CONSTM1_RTX (pred_mode)); -- src = gen_rtx_UNSPEC (d->vmode, gen_rtvec (2, pred, src), -- UNSPEC_MERGE_PTRUE); -+ machine_mode int_mode = aarch64_sve_int_mode (pred_mode); -+ rtx target = gen_reg_rtx (int_mode); -+ if (BYTES_BIG_ENDIAN) -+ /* The act of taking a subreg between INT_MODE and d->vmode -+ is itself a reversing operation on big-endian targets; -+ see the comment at the head of aarch64-sve.md for details. -+ First reinterpret OP0 as INT_MODE without using a subreg -+ and without changing the contents. */ -+ emit_insn (gen_aarch64_sve_reinterpret (int_mode, target, d->op0)); -+ else -+ { -+ /* For SVE we use REV[BHW] unspecs derived from the element size -+ of v->mode and vector modes whose elements have SIZE bytes. -+ This ensures that the vector modes match the predicate modes. */ -+ int unspec = aarch64_sve_rev_unspec (d->vmode); -+ rtx pred = aarch64_ptrue_reg (pred_mode); -+ emit_insn (gen_aarch64_pred (unspec, int_mode, target, pred, -+ gen_lowpart (int_mode, d->op0))); -+ } -+ emit_move_insn (d->target, gen_lowpart (d->vmode, target)); -+ return true; - } -+ rtx src = gen_rtx_UNSPEC (d->vmode, gen_rtvec (1, d->op0), unspec); - emit_set_insn (d->target, src); - return true; - } -@@ -16609,7 +19120,7 @@ aarch64_evpc_rev_global (struct expand_vec_perm_d *d) - { - poly_uint64 nelt = d->perm.length (); - -- if (!d->one_vector_p || d->vec_flags != VEC_SVE_DATA) -+ if (!d->one_vector_p || d->vec_flags == VEC_ADVSIMD) - return false; - - if (!d->perm.series_p (0, 1, nelt - 1, -1)) -@@ -16722,6 +19233,50 @@ aarch64_evpc_sve_tbl (struct expand_vec_perm_d *d) - return true; - } - -+/* Try to implement D using SVE SEL instruction. */ -+ -+static bool -+aarch64_evpc_sel (struct expand_vec_perm_d *d) -+{ -+ machine_mode vmode = d->vmode; -+ int unit_size = GET_MODE_UNIT_SIZE (vmode); -+ -+ if (d->vec_flags != VEC_SVE_DATA -+ || unit_size > 8) -+ return false; -+ -+ int n_patterns = d->perm.encoding ().npatterns (); -+ poly_int64 vec_len = d->perm.length (); -+ -+ for (int i = 0; i < n_patterns; ++i) -+ if (!known_eq (d->perm[i], i) -+ && !known_eq (d->perm[i], vec_len + i)) -+ return false; -+ -+ for (int i = n_patterns; i < n_patterns * 2; i++) -+ if (!d->perm.series_p (i, n_patterns, i, n_patterns) -+ && !d->perm.series_p (i, n_patterns, vec_len + i, n_patterns)) -+ return false; -+ -+ if (d->testing_p) -+ return true; -+ -+ machine_mode pred_mode = aarch64_sve_pred_mode (unit_size).require (); -+ -+ rtx_vector_builder builder (pred_mode, n_patterns, 2); -+ for (int i = 0; i < n_patterns * 2; i++) -+ { -+ rtx elem = known_eq (d->perm[i], i) ? CONST1_RTX (BImode) -+ : CONST0_RTX (BImode); -+ builder.quick_push (elem); -+ } -+ -+ rtx const_vec = builder.build (); -+ rtx pred = force_reg (pred_mode, const_vec); -+ emit_insn (gen_vcond_mask (vmode, vmode, d->target, d->op1, d->op0, pred)); -+ return true; -+} -+ - static bool - aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) - { -@@ -16754,6 +19309,8 @@ aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) - return true; - else if (aarch64_evpc_trn (d)) - return true; -+ else if (aarch64_evpc_sel (d)) -+ return true; - if (d->vec_flags == VEC_SVE_DATA) - return aarch64_evpc_sve_tbl (d); - else if (d->vec_flags == VEC_ADVSIMD) -@@ -16829,60 +19386,19 @@ aarch64_reverse_mask (machine_mode mode, unsigned int nunits) - return force_reg (V16QImode, mask); - } - --/* Return true if X is a valid second operand for the SVE instruction -- that implements integer comparison OP_CODE. */ -- --static bool --aarch64_sve_cmp_operand_p (rtx_code op_code, rtx x) --{ -- if (register_operand (x, VOIDmode)) -- return true; -- -- switch (op_code) -- { -- case LTU: -- case LEU: -- case GEU: -- case GTU: -- return aarch64_sve_cmp_immediate_p (x, false); -- case LT: -- case LE: -- case GE: -- case GT: -- case NE: -- case EQ: -- return aarch64_sve_cmp_immediate_p (x, true); -- default: -- gcc_unreachable (); -- } --} -- --/* Use predicated SVE instructions to implement the equivalent of: -- -- (set TARGET OP) -- -- given that PTRUE is an all-true predicate of the appropriate mode. */ -- --static void --aarch64_emit_sve_ptrue_op (rtx target, rtx ptrue, rtx op) --{ -- rtx unspec = gen_rtx_UNSPEC (GET_MODE (target), -- gen_rtvec (2, ptrue, op), -- UNSPEC_MERGE_PTRUE); -- rtx_insn *insn = emit_set_insn (target, unspec); -- set_unique_reg_note (insn, REG_EQUAL, copy_rtx (op)); --} -+/* Expand an SVE integer comparison using the SVE equivalent of: - --/* Likewise, but also clobber the condition codes. */ -+ (set TARGET (CODE OP0 OP1)). */ - --static void --aarch64_emit_sve_ptrue_op_cc (rtx target, rtx ptrue, rtx op) -+void -+aarch64_expand_sve_vec_cmp_int (rtx target, rtx_code code, rtx op0, rtx op1) - { -- rtx unspec = gen_rtx_UNSPEC (GET_MODE (target), -- gen_rtvec (2, ptrue, op), -- UNSPEC_MERGE_PTRUE); -- rtx_insn *insn = emit_insn (gen_set_clobber_cc (target, unspec)); -- set_unique_reg_note (insn, REG_EQUAL, copy_rtx (op)); -+ machine_mode pred_mode = GET_MODE (target); -+ machine_mode data_mode = GET_MODE (op0); -+ rtx res = aarch64_sve_emit_int_cmp (target, pred_mode, code, data_mode, -+ op0, op1); -+ if (!rtx_equal_p (target, res)) -+ emit_move_insn (target, res); - } - - /* Return the UNSPEC_COND_* code for comparison CODE. */ -@@ -16893,17 +19409,19 @@ aarch64_unspec_cond_code (rtx_code code) - switch (code) - { - case NE: -- return UNSPEC_COND_NE; -+ return UNSPEC_COND_FCMNE; - case EQ: -- return UNSPEC_COND_EQ; -+ return UNSPEC_COND_FCMEQ; - case LT: -- return UNSPEC_COND_LT; -+ return UNSPEC_COND_FCMLT; - case GT: -- return UNSPEC_COND_GT; -+ return UNSPEC_COND_FCMGT; - case LE: -- return UNSPEC_COND_LE; -+ return UNSPEC_COND_FCMLE; - case GE: -- return UNSPEC_COND_GE; -+ return UNSPEC_COND_FCMGE; -+ case UNORDERED: -+ return UNSPEC_COND_FCMUO; - default: - gcc_unreachable (); - } -@@ -16911,78 +19429,58 @@ aarch64_unspec_cond_code (rtx_code code) - - /* Emit: - -- (set TARGET (unspec [PRED OP0 OP1] UNSPEC_COND_)) -+ (set TARGET (unspec [PRED KNOWN_PTRUE_P OP0 OP1] UNSPEC_COND_)) - -- where is the operation associated with comparison CODE. This form -- of instruction is used when (and (CODE OP0 OP1) PRED) would have different -- semantics, such as when PRED might not be all-true and when comparing -- inactive lanes could have side effects. */ -+ where is the operation associated with comparison CODE. -+ KNOWN_PTRUE_P is true if PRED is known to be a PTRUE. */ - - static void --aarch64_emit_sve_predicated_cond (rtx target, rtx_code code, -- rtx pred, rtx op0, rtx op1) -+aarch64_emit_sve_fp_cond (rtx target, rtx_code code, rtx pred, -+ bool known_ptrue_p, rtx op0, rtx op1) - { -+ rtx flag = gen_int_mode (known_ptrue_p, SImode); - rtx unspec = gen_rtx_UNSPEC (GET_MODE (pred), -- gen_rtvec (3, pred, op0, op1), -+ gen_rtvec (4, pred, flag, op0, op1), - aarch64_unspec_cond_code (code)); - emit_set_insn (target, unspec); - } - --/* Expand an SVE integer comparison using the SVE equivalent of: -- -- (set TARGET (CODE OP0 OP1)). */ -- --void --aarch64_expand_sve_vec_cmp_int (rtx target, rtx_code code, rtx op0, rtx op1) --{ -- machine_mode pred_mode = GET_MODE (target); -- machine_mode data_mode = GET_MODE (op0); -- -- if (!aarch64_sve_cmp_operand_p (code, op1)) -- op1 = force_reg (data_mode, op1); -- -- rtx ptrue = force_reg (pred_mode, CONSTM1_RTX (pred_mode)); -- rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1); -- aarch64_emit_sve_ptrue_op_cc (target, ptrue, cond); --} -- - /* Emit the SVE equivalent of: - -- (set TMP1 (CODE1 OP0 OP1)) -- (set TMP2 (CODE2 OP0 OP1)) -+ (set TMP1 (unspec [PRED KNOWN_PTRUE_P OP0 OP1] UNSPEC_COND_)) -+ (set TMP2 (unspec [PRED KNOWN_PTRUE_P OP0 OP1] UNSPEC_COND_)) - (set TARGET (ior:PRED_MODE TMP1 TMP2)) - -- PTRUE is an all-true predicate with the same mode as TARGET. */ -+ where is the operation associated with comparison CODEi. -+ KNOWN_PTRUE_P is true if PRED is known to be a PTRUE. */ - - static void --aarch64_emit_sve_or_conds (rtx target, rtx_code code1, rtx_code code2, -- rtx ptrue, rtx op0, rtx op1) -+aarch64_emit_sve_or_fp_conds (rtx target, rtx_code code1, rtx_code code2, -+ rtx pred, bool known_ptrue_p, rtx op0, rtx op1) - { -- machine_mode pred_mode = GET_MODE (ptrue); -+ machine_mode pred_mode = GET_MODE (pred); - rtx tmp1 = gen_reg_rtx (pred_mode); -- aarch64_emit_sve_ptrue_op (tmp1, ptrue, -- gen_rtx_fmt_ee (code1, pred_mode, op0, op1)); -+ aarch64_emit_sve_fp_cond (tmp1, code1, pred, known_ptrue_p, op0, op1); - rtx tmp2 = gen_reg_rtx (pred_mode); -- aarch64_emit_sve_ptrue_op (tmp2, ptrue, -- gen_rtx_fmt_ee (code2, pred_mode, op0, op1)); -+ aarch64_emit_sve_fp_cond (tmp2, code2, pred, known_ptrue_p, op0, op1); - aarch64_emit_binop (target, ior_optab, tmp1, tmp2); - } - - /* Emit the SVE equivalent of: - -- (set TMP (CODE OP0 OP1)) -+ (set TMP (unspec [PRED KNOWN_PTRUE_P OP0 OP1] UNSPEC_COND_)) - (set TARGET (not TMP)) - -- PTRUE is an all-true predicate with the same mode as TARGET. */ -+ where is the operation associated with comparison CODE. -+ KNOWN_PTRUE_P is true if PRED is known to be a PTRUE. */ - - static void --aarch64_emit_sve_inverted_cond (rtx target, rtx ptrue, rtx_code code, -- rtx op0, rtx op1) -+aarch64_emit_sve_invert_fp_cond (rtx target, rtx_code code, rtx pred, -+ bool known_ptrue_p, rtx op0, rtx op1) - { -- machine_mode pred_mode = GET_MODE (ptrue); -+ machine_mode pred_mode = GET_MODE (pred); - rtx tmp = gen_reg_rtx (pred_mode); -- aarch64_emit_sve_ptrue_op (tmp, ptrue, -- gen_rtx_fmt_ee (code, pred_mode, op0, op1)); -+ aarch64_emit_sve_fp_cond (tmp, code, pred, known_ptrue_p, op0, op1); - aarch64_emit_unop (target, one_cmpl_optab, tmp); - } - -@@ -17000,7 +19498,7 @@ aarch64_expand_sve_vec_cmp_float (rtx target, rtx_code code, - machine_mode pred_mode = GET_MODE (target); - machine_mode data_mode = GET_MODE (op0); - -- rtx ptrue = force_reg (pred_mode, CONSTM1_RTX (pred_mode)); -+ rtx ptrue = aarch64_ptrue_reg (pred_mode); - switch (code) - { - case UNORDERED: -@@ -17015,14 +19513,13 @@ aarch64_expand_sve_vec_cmp_float (rtx target, rtx_code code, - case NE: - { - /* There is native support for the comparison. */ -- rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1); -- aarch64_emit_sve_ptrue_op (target, ptrue, cond); -+ aarch64_emit_sve_fp_cond (target, code, ptrue, true, op0, op1); - return false; - } - - case LTGT: - /* This is a trapping operation (LT or GT). */ -- aarch64_emit_sve_or_conds (target, LT, GT, ptrue, op0, op1); -+ aarch64_emit_sve_or_fp_conds (target, LT, GT, ptrue, true, op0, op1); - return false; - - case UNEQ: -@@ -17030,7 +19527,8 @@ aarch64_expand_sve_vec_cmp_float (rtx target, rtx_code code, - { - /* This would trap for signaling NaNs. */ - op1 = force_reg (data_mode, op1); -- aarch64_emit_sve_or_conds (target, UNORDERED, EQ, ptrue, op0, op1); -+ aarch64_emit_sve_or_fp_conds (target, UNORDERED, EQ, -+ ptrue, true, op0, op1); - return false; - } - /* fall through */ -@@ -17043,7 +19541,8 @@ aarch64_expand_sve_vec_cmp_float (rtx target, rtx_code code, - /* Work out which elements are ordered. */ - rtx ordered = gen_reg_rtx (pred_mode); - op1 = force_reg (data_mode, op1); -- aarch64_emit_sve_inverted_cond (ordered, ptrue, UNORDERED, op0, op1); -+ aarch64_emit_sve_invert_fp_cond (ordered, UNORDERED, -+ ptrue, true, op0, op1); - - /* Test the opposite condition for the ordered elements, - then invert the result. */ -@@ -17053,13 +19552,12 @@ aarch64_expand_sve_vec_cmp_float (rtx target, rtx_code code, - code = reverse_condition_maybe_unordered (code); - if (can_invert_p) - { -- aarch64_emit_sve_predicated_cond (target, code, -- ordered, op0, op1); -+ aarch64_emit_sve_fp_cond (target, code, -+ ordered, false, op0, op1); - return true; - } -- rtx tmp = gen_reg_rtx (pred_mode); -- aarch64_emit_sve_predicated_cond (tmp, code, ordered, op0, op1); -- aarch64_emit_unop (target, one_cmpl_optab, tmp); -+ aarch64_emit_sve_invert_fp_cond (target, code, -+ ordered, false, op0, op1); - return false; - } - break; -@@ -17077,11 +19575,10 @@ aarch64_expand_sve_vec_cmp_float (rtx target, rtx_code code, - code = reverse_condition_maybe_unordered (code); - if (can_invert_p) - { -- rtx cond = gen_rtx_fmt_ee (code, pred_mode, op0, op1); -- aarch64_emit_sve_ptrue_op (target, ptrue, cond); -+ aarch64_emit_sve_fp_cond (target, code, ptrue, true, op0, op1); - return true; - } -- aarch64_emit_sve_inverted_cond (target, ptrue, code, op0, op1); -+ aarch64_emit_sve_invert_fp_cond (target, code, ptrue, true, op0, op1); - return false; - } - -@@ -17104,6 +19601,13 @@ aarch64_expand_sve_vcond (machine_mode data_mode, machine_mode cmp_mode, - else - aarch64_expand_sve_vec_cmp_int (pred, GET_CODE (ops[3]), ops[4], ops[5]); - -+ if (!aarch64_sve_reg_or_dup_imm (ops[1], data_mode)) -+ ops[1] = force_reg (data_mode, ops[1]); -+ /* The "false" value can only be zero if the "true" value is a constant. */ -+ if (register_operand (ops[1], data_mode) -+ || !aarch64_simd_reg_or_zero (ops[2], data_mode)) -+ ops[2] = force_reg (data_mode, ops[2]); -+ - rtvec vec = gen_rtvec (3, pred, ops[1], ops[2]); - emit_set_insn (ops[0], gen_rtx_UNSPEC (data_mode, vec, UNSPEC_SEL)); - } -@@ -17181,11 +19685,11 @@ aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst, - *dst = aarch64_progress_pointer (*dst); - } - --/* Expand movmem, as if from a __builtin_memcpy. Return true if -+/* Expand cpymem, as if from a __builtin_memcpy. Return true if - we succeed, otherwise return false. */ - - bool --aarch64_expand_movmem (rtx *operands) -+aarch64_expand_cpymem (rtx *operands) - { - int n, mode_bits; - rtx dst = operands[0]; -@@ -17452,7 +19956,10 @@ aarch64_expand_subvti (rtx op0, rtx low_dest, rtx low_in1, - static unsigned HOST_WIDE_INT - aarch64_asan_shadow_offset (void) - { -- return (HOST_WIDE_INT_1 << 36); -+ if (TARGET_ILP32) -+ return (HOST_WIDE_INT_1 << 29); -+ else -+ return (HOST_WIDE_INT_1 << 36); - } - - static rtx -@@ -17758,10 +20265,6 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) - } - } - -- if (aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC) -- && aarch_crypto_can_dual_issue (prev, curr)) -- return true; -- - if (aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_BRANCH) - && any_condjump_p (curr)) - { -@@ -18545,6 +21048,29 @@ aarch64_fpconst_pow_of_2 (rtx x) - return exact_log2 (real_to_integer (r)); - } - -+/* If X is a positive CONST_DOUBLE with a value that is the reciprocal of a -+ power of 2 (i.e 1/2^n) return the number of float bits. e.g. for x==(1/2^n) -+ return n. Otherwise return -1. */ -+ -+int -+aarch64_fpconst_pow2_recip (rtx x) -+{ -+ REAL_VALUE_TYPE r0; -+ -+ if (!CONST_DOUBLE_P (x)) -+ return -1; -+ -+ r0 = *CONST_DOUBLE_REAL_VALUE (x); -+ if (exact_real_inverse (DFmode, &r0) -+ && !REAL_VALUE_NEGATIVE (r0)) -+ { -+ int ret = exact_log2 (real_to_integer (&r0)); -+ if (ret >= 1 && ret <= 32) -+ return ret; -+ } -+ return -1; -+} -+ - /* If X is a vector of equal CONST_DOUBLE values and that value is - Y, return the aarch64_fpconst_pow_of_2 of Y. Otherwise return -1. */ - -@@ -18765,12 +21291,8 @@ aarch64_select_early_remat_modes (sbitmap modes) - /* SVE values are not normally live across a call, so it should be - worth doing early rematerialization even in VL-specific mode. */ - for (int i = 0; i < NUM_MACHINE_MODES; ++i) -- { -- machine_mode mode = (machine_mode) i; -- unsigned int vec_flags = aarch64_classify_vector_mode (mode); -- if (vec_flags & VEC_ANY_SVE) -- bitmap_set_bit (modes, i); -- } -+ if (aarch64_sve_mode_p ((machine_mode) i)) -+ bitmap_set_bit (modes, i); - } - - /* Override the default target speculation_safe_value. */ -@@ -18994,6 +21516,55 @@ aarch64_stack_protect_guard (void) - return NULL_TREE; - } - -+/* Return the diagnostic message string if conversion from FROMTYPE to -+ TOTYPE is not allowed, NULL otherwise. */ -+ -+static const char * -+aarch64_invalid_conversion (const_tree fromtype, const_tree totype) -+{ -+ if (element_mode (fromtype) != element_mode (totype)) -+ { -+ /* Do no allow conversions to/from BFmode scalar types. */ -+ if (TYPE_MODE (fromtype) == BFmode) -+ return N_("invalid conversion from type %"); -+ if (TYPE_MODE (totype) == BFmode) -+ return N_("invalid conversion to type %"); -+ } -+ -+ /* Conversion allowed. */ -+ return NULL; -+} -+ -+/* Return the diagnostic message string if the unary operation OP is -+ not permitted on TYPE, NULL otherwise. */ -+ -+static const char * -+aarch64_invalid_unary_op (int op, const_tree type) -+{ -+ /* Reject all single-operand operations on BFmode except for &. */ -+ if (element_mode (type) == BFmode && op != ADDR_EXPR) -+ return N_("operation not permitted on type %"); -+ -+ /* Operation allowed. */ -+ return NULL; -+} -+ -+/* Return the diagnostic message string if the binary operation OP is -+ not permitted on TYPE1 and TYPE2, NULL otherwise. */ -+ -+static const char * -+aarch64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, -+ const_tree type2) -+{ -+ /* Reject all 2-operand operations on BFmode. */ -+ if (element_mode (type1) == BFmode -+ || element_mode (type2) == BFmode) -+ return N_("operation not permitted on type %"); -+ -+ /* Operation allowed. */ -+ return NULL; -+} -+ - /* Implement TARGET_ASM_FILE_END for AArch64. This adds the AArch64 GNU NOTE - section at the end if needed. */ - #define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000 -@@ -19137,7 +21708,7 @@ aarch64_run_selftests (void) - #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list - - #undef TARGET_CALLEE_COPIES --#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false -+#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_arg_info_false - - #undef TARGET_CAN_ELIMINATE - #define TARGET_CAN_ELIMINATE aarch64_can_eliminate -@@ -19247,6 +21818,15 @@ aarch64_libgcc_floating_mode_supported_p - #undef TARGET_MANGLE_TYPE - #define TARGET_MANGLE_TYPE aarch64_mangle_type - -+#undef TARGET_INVALID_CONVERSION -+#define TARGET_INVALID_CONVERSION aarch64_invalid_conversion -+ -+#undef TARGET_INVALID_UNARY_OP -+#define TARGET_INVALID_UNARY_OP aarch64_invalid_unary_op -+ -+#undef TARGET_INVALID_BINARY_OP -+#define TARGET_INVALID_BINARY_OP aarch64_invalid_binary_op -+ - #undef TARGET_MEMORY_MOVE_COST - #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost - -@@ -19370,6 +21950,9 @@ aarch64_libgcc_floating_mode_supported_p - #undef TARGET_VECTOR_MODE_SUPPORTED_P - #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p - -+#undef TARGET_COMPATIBLE_VECTOR_TYPES_P -+#define TARGET_COMPATIBLE_VECTOR_TYPES_P aarch64_compatible_vector_types_p -+ - #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT - #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \ - aarch64_builtin_support_vector_misalignment -@@ -19517,13 +22100,8 @@ aarch64_libgcc_floating_mode_supported_p - #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \ - aarch64_hard_regno_call_part_clobbered - --#undef TARGET_REMOVE_EXTRA_CALL_PRESERVED_REGS --#define TARGET_REMOVE_EXTRA_CALL_PRESERVED_REGS \ -- aarch64_remove_extra_call_preserved_regs -- --#undef TARGET_RETURN_CALL_WITH_MAX_CLOBBERS --#define TARGET_RETURN_CALL_WITH_MAX_CLOBBERS \ -- aarch64_return_call_with_max_clobbers -+#undef TARGET_INSN_CALLEE_ABI -+#define TARGET_INSN_CALLEE_ABI aarch64_insn_callee_abi - - #undef TARGET_CONSTANT_ALIGNMENT - #define TARGET_CONSTANT_ALIGNMENT aarch64_constant_alignment -@@ -19566,11 +22144,20 @@ aarch64_libgcc_floating_mode_supported_p - #undef TARGET_GET_MULTILIB_ABI_NAME - #define TARGET_GET_MULTILIB_ABI_NAME aarch64_get_multilib_abi_name - -+#undef TARGET_FNTYPE_ABI -+#define TARGET_FNTYPE_ABI aarch64_fntype_abi -+ - #if CHECKING_P - #undef TARGET_RUN_TARGET_SELFTESTS - #define TARGET_RUN_TARGET_SELFTESTS selftest::aarch64_run_selftests - #endif /* #if CHECKING_P */ - -+#undef TARGET_ASM_POST_CFI_STARTPROC -+#define TARGET_ASM_POST_CFI_STARTPROC aarch64_post_cfi_startproc -+ -+#undef TARGET_STRICT_ARGUMENT_NAMING -+#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true -+ - struct gcc_target targetm = TARGET_INITIALIZER; - - #include "gt-aarch64.h" -diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h -index 772a97296..d5341656f 100644 ---- a/gcc/config/aarch64/aarch64.h -+++ b/gcc/config/aarch64/aarch64.h -@@ -192,6 +192,31 @@ extern unsigned aarch64_architecture_version; - /* Execution and Data Prediction Restriction instructions supported. */ - #define AARCH64_FL_PREDRES (1 << 27) - -+/* SVE2 instruction supported. */ -+#define AARCH64_FL_SVE2 (1 << 28) -+#define AARCH64_FL_SVE2_AES (1 << 29) -+#define AARCH64_FL_SVE2_SM4 (1 << 30) -+#define AARCH64_FL_SVE2_SHA3 (1ULL << 31) -+#define AARCH64_FL_SVE2_BITPERM (1ULL << 32) -+ -+/* Transactional Memory Extension. */ -+#define AARCH64_FL_TME (1ULL << 33) /* Has TME instructions. */ -+ -+/* Armv8.6-A architecture extensions. */ -+#define AARCH64_FL_V8_6 (1ULL << 34) -+ -+/* 8-bit Integer Matrix Multiply (I8MM) extensions. */ -+#define AARCH64_FL_I8MM (1ULL << 35) -+ -+/* Brain half-precision floating-point (BFloat16) Extension. */ -+#define AARCH64_FL_BF16 (1ULL << 36) -+ -+/* 32-bit Floating-point Matrix Multiply (F32MM) extensions. */ -+#define AARCH64_FL_F32MM (1ULL << 37) -+ -+/* 64-bit Floating-point Matrix Multiply (F64MM) extensions. */ -+#define AARCH64_FL_F64MM (1ULL << 38) -+ - /* Has FP and SIMD. */ - #define AARCH64_FL_FPSIMD (AARCH64_FL_FP | AARCH64_FL_SIMD) - -@@ -213,6 +238,9 @@ extern unsigned aarch64_architecture_version; - #define AARCH64_FL_FOR_ARCH8_5 \ - (AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_V8_5 \ - | AARCH64_FL_SB | AARCH64_FL_SSBS | AARCH64_FL_PREDRES) -+#define AARCH64_FL_FOR_ARCH8_6 \ -+ (AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_V8_6 | AARCH64_FL_FPSIMD \ -+ | AARCH64_FL_I8MM | AARCH64_FL_BF16) - - /* Macros to test ISA flags. */ - -@@ -225,6 +253,7 @@ extern unsigned aarch64_architecture_version; - #define AARCH64_ISA_V8_2 (aarch64_isa_flags & AARCH64_FL_V8_2) - #define AARCH64_ISA_F16 (aarch64_isa_flags & AARCH64_FL_F16) - #define AARCH64_ISA_SVE (aarch64_isa_flags & AARCH64_FL_SVE) -+#define AARCH64_ISA_SVE2 (aarch64_isa_flags & AARCH64_FL_SVE2) - #define AARCH64_ISA_V8_3 (aarch64_isa_flags & AARCH64_FL_V8_3) - #define AARCH64_ISA_DOTPROD (aarch64_isa_flags & AARCH64_FL_DOTPROD) - #define AARCH64_ISA_AES (aarch64_isa_flags & AARCH64_FL_AES) -@@ -234,7 +263,14 @@ extern unsigned aarch64_architecture_version; - #define AARCH64_ISA_SHA3 (aarch64_isa_flags & AARCH64_FL_SHA3) - #define AARCH64_ISA_F16FML (aarch64_isa_flags & AARCH64_FL_F16FML) - #define AARCH64_ISA_RCPC8_4 (aarch64_isa_flags & AARCH64_FL_RCPC8_4) -+#define AARCH64_ISA_RNG (aarch64_isa_flags & AARCH64_FL_RNG) - #define AARCH64_ISA_V8_5 (aarch64_isa_flags & AARCH64_FL_V8_5) -+#define AARCH64_ISA_TME (aarch64_isa_flags & AARCH64_FL_TME) -+#define AARCH64_ISA_V8_6 (aarch64_isa_flags & AARCH64_FL_V8_6) -+#define AARCH64_ISA_I8MM (aarch64_isa_flags & AARCH64_FL_I8MM) -+#define AARCH64_ISA_F32MM (aarch64_isa_flags & AARCH64_FL_F32MM) -+#define AARCH64_ISA_F64MM (aarch64_isa_flags & AARCH64_FL_F64MM) -+#define AARCH64_ISA_BF16 (aarch64_isa_flags & AARCH64_FL_BF16) - - /* Crypto is an optional extension to AdvSIMD. */ - #define TARGET_CRYPTO (TARGET_SIMD && AARCH64_ISA_CRYPTO) -@@ -270,12 +306,44 @@ extern unsigned aarch64_architecture_version; - /* SVE instructions, enabled through +sve. */ - #define TARGET_SVE (AARCH64_ISA_SVE) - -+/* SVE2 instructions, enabled through +sve2. */ -+#define TARGET_SVE2 (AARCH64_ISA_SVE2) -+ - /* ARMv8.3-A features. */ - #define TARGET_ARMV8_3 (AARCH64_ISA_V8_3) - -+/* Javascript conversion instruction from Armv8.3-a. */ -+#define TARGET_JSCVT (TARGET_FLOAT && AARCH64_ISA_V8_3) -+ - /* Armv8.3-a Complex number extension to AdvSIMD extensions. */ - #define TARGET_COMPLEX (TARGET_SIMD && TARGET_ARMV8_3) - -+/* Floating-point rounding instructions from Armv8.5-a. */ -+#define TARGET_FRINT (AARCH64_ISA_V8_5 && TARGET_FLOAT) -+ -+/* TME instructions are enabled. */ -+#define TARGET_TME (AARCH64_ISA_TME) -+ -+/* Random number instructions from Armv8.5-a. */ -+#define TARGET_RNG (AARCH64_ISA_RNG) -+ -+/* I8MM instructions are enabled through +i8mm. */ -+#define TARGET_I8MM (AARCH64_ISA_I8MM) -+#define TARGET_SVE_I8MM (TARGET_SVE && AARCH64_ISA_I8MM) -+ -+/* F32MM instructions are enabled through +f32mm. */ -+#define TARGET_F32MM (AARCH64_ISA_F32MM) -+#define TARGET_SVE_F32MM (TARGET_SVE && AARCH64_ISA_F32MM) -+ -+/* F64MM instructions are enabled through +f64mm. */ -+#define TARGET_F64MM (AARCH64_ISA_F64MM) -+#define TARGET_SVE_F64MM (TARGET_SVE && AARCH64_ISA_F64MM) -+ -+/* BF16 instructions are enabled through +bf16. */ -+#define TARGET_BF16_FP (AARCH64_ISA_BF16) -+#define TARGET_BF16_SIMD (AARCH64_ISA_BF16 && TARGET_SIMD) -+#define TARGET_SVE_BF16 (TARGET_SVE && AARCH64_ISA_BF16) -+ - /* Make sure this is always defined so we don't have to check for ifdefs - but rather use normal ifs. */ - #ifndef TARGET_FIX_ERR_A53_835769_DEFAULT -@@ -338,6 +406,9 @@ extern unsigned aarch64_architecture_version; - P0-P7 Predicate low registers: valid in all predicate contexts - P8-P15 Predicate high registers: used as scratch space - -+ FFR First Fault Register, a fixed-use SVE predicate register -+ FFRT FFR token: a fake register used for modelling dependencies -+ - VG Pseudo "vector granules" register - - VG is the number of 64-bit elements in an SVE vector. We define -@@ -358,6 +429,7 @@ extern unsigned aarch64_architecture_version; - 1, 1, 1, 1, /* SFP, AP, CC, VG */ \ - 0, 0, 0, 0, 0, 0, 0, 0, /* P0 - P7 */ \ - 0, 0, 0, 0, 0, 0, 0, 0, /* P8 - P15 */ \ -+ 1, 1 /* FFR and FFRT */ \ - } - - /* X30 is marked as caller-saved which is in line with regular function call -@@ -380,6 +452,7 @@ extern unsigned aarch64_architecture_version; - 1, 1, 1, 1, /* SFP, AP, CC, VG */ \ - 1, 1, 1, 1, 1, 1, 1, 1, /* P0 - P7 */ \ - 1, 1, 1, 1, 1, 1, 1, 1, /* P8 - P15 */ \ -+ 1, 1 /* FFR and FFRT */ \ - } - - #define REGISTER_NAMES \ -@@ -395,6 +468,7 @@ extern unsigned aarch64_architecture_version; - "sfp", "ap", "cc", "vg", \ - "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", \ - "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15", \ -+ "ffr", "ffrt" \ - } - - /* Generate the register aliases for core register N */ -@@ -443,11 +517,12 @@ extern unsigned aarch64_architecture_version; - #define FRAME_POINTER_REGNUM SFP_REGNUM - #define STACK_POINTER_REGNUM SP_REGNUM - #define ARG_POINTER_REGNUM AP_REGNUM --#define FIRST_PSEUDO_REGISTER (P15_REGNUM + 1) -+#define FIRST_PSEUDO_REGISTER (FFRT_REGNUM + 1) - --/* The number of (integer) argument register available. */ -+/* The number of argument registers available for each class. */ - #define NUM_ARG_REGS 8 - #define NUM_FP_ARG_REGS 8 -+#define NUM_PR_ARG_REGS 4 - - /* A Homogeneous Floating-Point or Short-Vector Aggregate may have at most - four members. */ -@@ -514,6 +589,9 @@ extern unsigned aarch64_architecture_version; - #define ASM_OUTPUT_EXTERNAL(STR, DECL, NAME) \ - aarch64_asm_output_external (STR, DECL, NAME) - -+/* Output assembly strings after .cfi_startproc is emitted. */ -+#define ASM_POST_CFI_STARTPROC aarch64_post_cfi_startproc -+ - /* For EH returns X4 contains the stack adjustment. */ - #define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, R4_REGNUM) - #define EH_RETURN_HANDLER_RTX aarch64_eh_return_handler_rtx () -@@ -542,6 +620,9 @@ extern unsigned aarch64_architecture_version; - #define FP_LO_REGNUM_P(REGNO) \ - (((unsigned) (REGNO - V0_REGNUM)) <= (V15_REGNUM - V0_REGNUM)) - -+#define FP_LO8_REGNUM_P(REGNO) \ -+ (((unsigned) (REGNO - V0_REGNUM)) <= (V7_REGNUM - V0_REGNUM)) -+ - #define PR_REGNUM_P(REGNO)\ - (((unsigned) (REGNO - P0_REGNUM)) <= (P15_REGNUM - P0_REGNUM)) - -@@ -560,12 +641,15 @@ enum reg_class - GENERAL_REGS, - STACK_REG, - POINTER_REGS, -+ FP_LO8_REGS, - FP_LO_REGS, - FP_REGS, - POINTER_AND_FP_REGS, - PR_LO_REGS, - PR_HI_REGS, - PR_REGS, -+ FFR_REGS, -+ PR_AND_FFR_REGS, - ALL_REGS, - LIM_REG_CLASSES /* Last */ - }; -@@ -579,12 +663,15 @@ enum reg_class - "GENERAL_REGS", \ - "STACK_REG", \ - "POINTER_REGS", \ -+ "FP_LO8_REGS", \ - "FP_LO_REGS", \ - "FP_REGS", \ - "POINTER_AND_FP_REGS", \ - "PR_LO_REGS", \ - "PR_HI_REGS", \ - "PR_REGS", \ -+ "FFR_REGS", \ -+ "PR_AND_FFR_REGS", \ - "ALL_REGS" \ - } - -@@ -595,12 +682,15 @@ enum reg_class - { 0x7fffffff, 0x00000000, 0x00000003 }, /* GENERAL_REGS */ \ - { 0x80000000, 0x00000000, 0x00000000 }, /* STACK_REG */ \ - { 0xffffffff, 0x00000000, 0x00000003 }, /* POINTER_REGS */ \ -+ { 0x00000000, 0x000000ff, 0x00000000 }, /* FP_LO8_REGS */ \ - { 0x00000000, 0x0000ffff, 0x00000000 }, /* FP_LO_REGS */ \ - { 0x00000000, 0xffffffff, 0x00000000 }, /* FP_REGS */ \ - { 0xffffffff, 0xffffffff, 0x00000003 }, /* POINTER_AND_FP_REGS */\ - { 0x00000000, 0x00000000, 0x00000ff0 }, /* PR_LO_REGS */ \ - { 0x00000000, 0x00000000, 0x000ff000 }, /* PR_HI_REGS */ \ - { 0x00000000, 0x00000000, 0x000ffff0 }, /* PR_REGS */ \ -+ { 0x00000000, 0x00000000, 0x00300000 }, /* FFR_REGS */ \ -+ { 0x00000000, 0x00000000, 0x003ffff0 }, /* PR_AND_FFR_REGS */ \ - { 0xffffffff, 0xffffffff, 0x000fffff } /* ALL_REGS */ \ - } - -@@ -676,7 +766,7 @@ extern enum aarch64_processor aarch64_tune; - #ifdef HAVE_POLY_INT_H - struct GTY (()) aarch64_frame - { -- HOST_WIDE_INT reg_offset[FIRST_PSEUDO_REGISTER]; -+ poly_int64 reg_offset[LAST_SAVED_REGNUM + 1]; - - /* The number of extra stack bytes taken up by register varargs. - This area is allocated by the callee at the very top of the -@@ -684,9 +774,12 @@ struct GTY (()) aarch64_frame - STACK_BOUNDARY. */ - HOST_WIDE_INT saved_varargs_size; - -- /* The size of the saved callee-save int/FP registers. */ -+ /* The size of the callee-save registers with a slot in REG_OFFSET. */ -+ poly_int64 saved_regs_size; - -- HOST_WIDE_INT saved_regs_size; -+ /* The size of the callee-save registers with a slot in REG_OFFSET that -+ are saved below the hard frame pointer. */ -+ poly_int64 below_hard_fp_saved_regs_size; - - /* Offset from the base of the frame (incomming SP) to the - top of the locals area. This value is always a multiple of -@@ -714,6 +807,10 @@ struct GTY (()) aarch64_frame - It may be non-zero if no push is used (ie. callee_adjust == 0). */ - poly_int64 callee_offset; - -+ /* The size of the stack adjustment before saving or after restoring -+ SVE registers. */ -+ poly_int64 sve_callee_adjust; -+ - /* The size of the stack adjustment after saving callee-saves. */ - poly_int64 final_adjust; - -@@ -723,6 +820,11 @@ struct GTY (()) aarch64_frame - unsigned wb_candidate1; - unsigned wb_candidate2; - -+ /* Big-endian SVE frames need a spare predicate register in order -+ to save vector registers in the correct layout for unwinding. -+ This is the register they should use. */ -+ unsigned spare_pred_reg; -+ - bool laid_out; - }; - -@@ -751,6 +853,10 @@ enum aarch64_abi_type - enum arm_pcs - { - ARM_PCS_AAPCS64, /* Base standard AAPCS for 64 bit. */ -+ ARM_PCS_SIMD, /* For aarch64_vector_pcs functions. */ -+ ARM_PCS_SVE, /* For functions that pass or return -+ values in SVE registers. */ -+ ARM_PCS_TLSDESC, /* For targets of tlsdesc calls. */ - ARM_PCS_UNKNOWN - }; - -@@ -777,6 +883,8 @@ typedef struct - int aapcs_nextncrn; /* Next next core register number. */ - int aapcs_nvrn; /* Next Vector register number. */ - int aapcs_nextnvrn; /* Next Next Vector register number. */ -+ int aapcs_nprn; /* Next Predicate register number. */ -+ int aapcs_nextnprn; /* Next Next Predicate register number. */ - rtx aapcs_reg; /* Register assigned to this argument. This - is NULL_RTX if this parameter goes on - the stack. */ -@@ -787,6 +895,8 @@ typedef struct - aapcs_reg == NULL_RTX. */ - int aapcs_stack_size; /* The total size (in words, per 8 byte) of the - stack arg area so far. */ -+ bool silent_p; /* True if we should act silently, rather than -+ raise an error for invalid calls. */ - } CUMULATIVE_ARGS; - #endif - -@@ -842,7 +952,7 @@ typedef struct - /* MOVE_RATIO dictates when we will use the move_by_pieces infrastructure. - move_by_pieces will continually copy the largest safe chunks. So a - 7-byte copy is a 4-byte + 2-byte + byte copy. This proves inefficient -- for both size and speed of copy, so we will instead use the "movmem" -+ for both size and speed of copy, so we will instead use the "cpymem" - standard name to implement the copy. This logic does not apply when - targeting -mstrict-align, so keep a sensible default in that case. */ - #define MOVE_RATIO(speed) \ -@@ -1025,13 +1135,13 @@ extern enum aarch64_code_model aarch64_cmodel; - #define AARCH64_VALID_SIMD_DREG_MODE(MODE) \ - ((MODE) == V2SImode || (MODE) == V4HImode || (MODE) == V8QImode \ - || (MODE) == V2SFmode || (MODE) == V4HFmode || (MODE) == DImode \ -- || (MODE) == DFmode) -+ || (MODE) == DFmode || (MODE) == V4BFmode) - - /* Modes valid for AdvSIMD Q registers. */ - #define AARCH64_VALID_SIMD_QREG_MODE(MODE) \ - ((MODE) == V4SImode || (MODE) == V8HImode || (MODE) == V16QImode \ - || (MODE) == V4SFmode || (MODE) == V8HFmode || (MODE) == V2DImode \ -- || (MODE) == V2DFmode) -+ || (MODE) == V2DFmode || (MODE) == V8BFmode) - - #define ENDIAN_LANE_N(NUNITS, N) \ - (BYTES_BIG_ENDIAN ? NUNITS - 1 - N : N) -@@ -1079,6 +1189,11 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); - extern tree aarch64_fp16_type_node; - extern tree aarch64_fp16_ptr_type_node; - -+/* This type is the user-visible __bf16, and a pointer to that type. Defined -+ in aarch64-builtins.c. */ -+extern tree aarch64_bf16_type_node; -+extern tree aarch64_bf16_ptr_type_node; -+ - /* The generic unwind code in libgcc does not initialize the frame pointer. - So in order to unwind a function using a frame pointer, the very first - function that is unwound must save the frame pointer. That way the frame -@@ -1094,7 +1209,8 @@ extern poly_uint16 aarch64_sve_vg; - #define BITS_PER_SVE_VECTOR (poly_uint16 (aarch64_sve_vg * 64)) - #define BYTES_PER_SVE_VECTOR (poly_uint16 (aarch64_sve_vg * 8)) - --/* The number of bytes in an SVE predicate. */ -+/* The number of bits and bytes in an SVE predicate. */ -+#define BITS_PER_SVE_PRED BYTES_PER_SVE_VECTOR - #define BYTES_PER_SVE_PRED aarch64_sve_vg - - /* The SVE mode for a vector of bytes. */ -diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md -index 73c34a227..34cccc7cd 100644 ---- a/gcc/config/aarch64/aarch64.md -+++ b/gcc/config/aarch64/aarch64.md -@@ -85,7 +85,6 @@ - (V29_REGNUM 61) - (V30_REGNUM 62) - (V31_REGNUM 63) -- (LAST_SAVED_REGNUM 63) - (SFP_REGNUM 64) - (AP_REGNUM 65) - (CC_REGNUM 66) -@@ -107,6 +106,11 @@ - (P13_REGNUM 81) - (P14_REGNUM 82) - (P15_REGNUM 83) -+ (LAST_SAVED_REGNUM 83) -+ (FFR_REGNUM 84) -+ ;; "FFR token": a fake register used for representing the scheduling -+ ;; restrictions on FFR-related operations. -+ (FFRT_REGNUM 85) - ;; Scratch register used by stack clash protection to calculate - ;; SVE CFA offsets during probing. - (STACK_CLASH_SVE_CFA_REGNUM 11) -@@ -120,13 +124,17 @@ - ;; Scratch registers used in frame layout. - (IP0_REGNUM 16) - (IP1_REGNUM 17) -+ (FP_REGNUM 29) - (LR_REGNUM 30) - ] - ) - - (define_c_enum "unspec" [ -- UNSPEC_AUTI1716 -- UNSPEC_AUTISP -+ UNSPEC_AUTIA1716 -+ UNSPEC_AUTIB1716 -+ UNSPEC_AUTIASP -+ UNSPEC_AUTIBSP -+ UNSPEC_CALLEE_ABI - UNSPEC_CASESI - UNSPEC_CRC32B - UNSPEC_CRC32CB -@@ -138,6 +146,11 @@ - UNSPEC_CRC32X - UNSPEC_FCVTZS - UNSPEC_FCVTZU -+ UNSPEC_FJCVTZS -+ UNSPEC_FRINT32Z -+ UNSPEC_FRINT32X -+ UNSPEC_FRINT64Z -+ UNSPEC_FRINT64X - UNSPEC_URECPE - UNSPEC_FRECPE - UNSPEC_FRECPS -@@ -169,8 +182,10 @@ - UNSPEC_LD4_LANE - UNSPEC_MB - UNSPEC_NOP -- UNSPEC_PACI1716 -- UNSPEC_PACISP -+ UNSPEC_PACIA1716 -+ UNSPEC_PACIB1716 -+ UNSPEC_PACIASP -+ UNSPEC_PACIBSP - UNSPEC_PRLG_STK - UNSPEC_REV - UNSPEC_RBIT -@@ -211,26 +226,49 @@ - UNSPEC_XPACLRI - UNSPEC_LD1_SVE - UNSPEC_ST1_SVE -+ UNSPEC_LDNT1_SVE -+ UNSPEC_STNT1_SVE - UNSPEC_LD1RQ - UNSPEC_LD1_GATHER -+ UNSPEC_LDFF1_GATHER - UNSPEC_ST1_SCATTER -- UNSPEC_MERGE_PTRUE -- UNSPEC_PTEST_PTRUE -+ UNSPEC_PRED_X -+ UNSPEC_PRED_Z -+ UNSPEC_PTEST -+ UNSPEC_PTRUE - UNSPEC_UNPACKSHI - UNSPEC_UNPACKUHI - UNSPEC_UNPACKSLO - UNSPEC_UNPACKULO - UNSPEC_PACK -- UNSPEC_FLOAT_CONVERT -- UNSPEC_WHILE_LO -+ UNSPEC_WHILELE -+ UNSPEC_WHILELO -+ UNSPEC_WHILELS -+ UNSPEC_WHILELT - UNSPEC_LDN - UNSPEC_STN - UNSPEC_INSR -+ UNSPEC_CLASTA - UNSPEC_CLASTB - UNSPEC_FADDA - UNSPEC_REV_SUBREG -+ UNSPEC_REINTERPRET - UNSPEC_SPECULATION_TRACKER - UNSPEC_COPYSIGN -+ UNSPEC_TTEST ; Represent transaction test. -+ UNSPEC_UPDATE_FFR -+ UNSPEC_UPDATE_FFRT -+ UNSPEC_RDFFR -+ UNSPEC_WRFFR -+ ;; Represents an SVE-style lane index, in which the indexing applies -+ ;; within the containing 128-bit block. -+ UNSPEC_SVE_LANE_SELECT -+ UNSPEC_SVE_CNT_PAT -+ UNSPEC_SVE_PREFETCH -+ UNSPEC_SVE_PREFETCH_GATHER -+ UNSPEC_SVE_COMPACT -+ UNSPEC_SVE_SPLICE -+ UNSPEC_LD1RO - ]) - - (define_c_enum "unspecv" [ -@@ -246,9 +284,35 @@ - UNSPECV_BTI_C ; Represent BTI c. - UNSPECV_BTI_J ; Represent BTI j. - UNSPECV_BTI_JC ; Represent BTI jc. -+ UNSPECV_TSTART ; Represent transaction start. -+ UNSPECV_TCOMMIT ; Represent transaction commit. -+ UNSPECV_TCANCEL ; Represent transaction cancel. -+ UNSPEC_RNDR ; Represent RNDR -+ UNSPEC_RNDRRS ; Represent RNDRRS - ] - ) - -+;; These constants are used as a const_int in various SVE unspecs -+;; to indicate whether the governing predicate is known to be a PTRUE. -+(define_constants -+ [; Indicates that the predicate might not be a PTRUE. -+ (SVE_MAYBE_NOT_PTRUE 0) -+ -+ ; Indicates that the predicate is known to be a PTRUE. -+ (SVE_KNOWN_PTRUE 1)]) -+ -+;; These constants are used as a const_int in predicated SVE FP arithmetic -+;; to indicate whether the operation is allowed to make additional lanes -+;; active without worrying about the effect on faulting behavior. -+(define_constants -+ [; Indicates either that all lanes are active or that the instruction may -+ ; operate on inactive inputs even if doing so could induce a fault. -+ (SVE_RELAXED_GP 0) -+ -+ ; Indicates that some lanes might be inactive and that the instruction -+ ; must not operate on inactive inputs if doing so could induce a fault. -+ (SVE_STRICT_GP 1)]) -+ - ;; If further include files are added the defintion of MD_INCLUDES - ;; must be updated. - -@@ -383,8 +447,8 @@ - - (define_expand "cbranch4" - [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator" -- [(match_operand:GPI 1 "register_operand" "") -- (match_operand:GPI 2 "aarch64_plus_operand" "")]) -+ [(match_operand:GPI 1 "register_operand") -+ (match_operand:GPI 2 "aarch64_plus_operand")]) - (label_ref (match_operand 3 "" "")) - (pc)))] - "" -@@ -397,8 +461,8 @@ - - (define_expand "cbranch4" - [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator" -- [(match_operand:GPF 1 "register_operand" "") -- (match_operand:GPF 2 "aarch64_fp_compare_operand" "")]) -+ [(match_operand:GPF 1 "register_operand") -+ (match_operand:GPF 2 "aarch64_fp_compare_operand")]) - (label_ref (match_operand 3 "" "")) - (pc)))] - "" -@@ -412,7 +476,7 @@ - (define_expand "cbranchcc4" - [(set (pc) (if_then_else - (match_operator 0 "aarch64_comparison_operator" -- [(match_operand 1 "cc_register" "") -+ [(match_operand 1 "cc_register") - (match_operand 2 "const0_operand")]) - (label_ref (match_operand 3 "" "")) - (pc)))] -@@ -475,9 +539,9 @@ - ;; csneg x0, x0, x1, mi - - (define_expand "mod3" -- [(match_operand:GPI 0 "register_operand" "") -- (match_operand:GPI 1 "register_operand" "") -- (match_operand:GPI 2 "const_int_operand" "")] -+ [(match_operand:GPI 0 "register_operand") -+ (match_operand:GPI 1 "register_operand") -+ (match_operand:GPI 2 "const_int_operand")] - "" - { - HOST_WIDE_INT val = INTVAL (operands[2]); -@@ -530,10 +594,14 @@ - (pc)))] - "" - { -+ /* GCC's traditional style has been to use "beq" instead of "b.eq", etc., -+ but the "." is required for SVE conditions. */ -+ bool use_dot_p = GET_MODE (operands[1]) == CC_NZCmode; - if (get_attr_length (insn) == 8) -- return aarch64_gen_far_branch (operands, 2, "Lbcond", "b%M0\\t"); -+ return aarch64_gen_far_branch (operands, 2, "Lbcond", -+ use_dot_p ? "b.%M0\\t" : "b%M0\\t"); - else -- return "b%m0\\t%l2"; -+ return use_dot_p ? "b.%m0\\t%l2" : "b%m0\\t%l2"; - } - [(set_attr "type" "branch") - (set (attr "length") -@@ -558,14 +626,14 @@ - ;; sub x0, x1, #(CST & 0xfff000) - ;; subs x0, x0, #(CST & 0x000fff) - ;; b .Label --(define_insn_and_split "*compare_condjump" -+(define_insn_and_split "*compare_condjump" - [(set (pc) (if_then_else (EQL - (match_operand:GPI 0 "register_operand" "r") - (match_operand:GPI 1 "aarch64_imm24" "n")) - (label_ref:P (match_operand 2 "" "")) - (pc)))] -- "!aarch64_move_imm (INTVAL (operands[1]), mode) -- && !aarch64_plus_operand (operands[1], mode) -+ "!aarch64_move_imm (INTVAL (operands[1]), mode) -+ && !aarch64_plus_operand (operands[1], mode) - && !reload_completed" - "#" - "&& true" -@@ -573,20 +641,21 @@ - { - HOST_WIDE_INT lo_imm = UINTVAL (operands[1]) & 0xfff; - HOST_WIDE_INT hi_imm = UINTVAL (operands[1]) & 0xfff000; -- rtx tmp = gen_reg_rtx (mode); -- emit_insn (gen_add3 (tmp, operands[0], GEN_INT (-hi_imm))); -- emit_insn (gen_add3_compare0 (tmp, tmp, GEN_INT (-lo_imm))); -+ rtx tmp = gen_reg_rtx (mode); -+ emit_insn (gen_add3 (tmp, operands[0], GEN_INT (-hi_imm))); -+ emit_insn (gen_add3_compare0 (tmp, tmp, GEN_INT (-lo_imm))); - rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM); -- rtx cmp_rtx = gen_rtx_fmt_ee (, mode, cc_reg, const0_rtx); -+ rtx cmp_rtx = gen_rtx_fmt_ee (, mode, -+ cc_reg, const0_rtx); - emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, operands[2])); - DONE; - } - ) - - (define_expand "casesi" -- [(match_operand:SI 0 "register_operand" "") ; Index -- (match_operand:SI 1 "const_int_operand" "") ; Lower bound -- (match_operand:SI 2 "const_int_operand" "") ; Total range -+ [(match_operand:SI 0 "register_operand") ; Index -+ (match_operand:SI 1 "const_int_operand") ; Lower bound -+ (match_operand:SI 2 "const_int_operand") ; Total range - (match_operand:DI 3 "" "") ; Table label - (match_operand:DI 4 "" "")] ; Out of range label - "" -@@ -739,8 +808,12 @@ - if (aarch64_return_address_signing_enabled () - && TARGET_ARMV8_3 - && !crtl->calls_eh_return) -- return "retaa"; -- -+ { -+ if (aarch64_ra_sign_key == AARCH64_KEY_B) -+ return "retab"; -+ else -+ return "retaa"; -+ } - return "ret"; - } - [(set_attr "type" "branch")] -@@ -754,7 +827,7 @@ - - (define_insn "simple_return" - [(simple_return)] -- "aarch64_use_simple_return_insn_p ()" -+ "" - "ret" - [(set_attr "type" "branch")] - ) -@@ -868,14 +941,15 @@ - ;; ------------------------------------------------------------------- - - (define_expand "call" -- [(parallel [(call (match_operand 0 "memory_operand" "") -- (match_operand 1 "general_operand" "")) -- (use (match_operand 2 "" "")) -- (clobber (reg:DI LR_REGNUM))])] -+ [(parallel -+ [(call (match_operand 0 "memory_operand") -+ (match_operand 1 "general_operand")) -+ (unspec:DI [(match_operand 2 "const_int_operand")] UNSPEC_CALLEE_ABI) -+ (clobber (reg:DI LR_REGNUM))])] - "" - " - { -- aarch64_expand_call (NULL_RTX, operands[0], false); -+ aarch64_expand_call (NULL_RTX, operands[0], operands[2], false); - DONE; - }" - ) -@@ -883,6 +957,7 @@ - (define_insn "*call_insn" - [(call (mem:DI (match_operand:DI 0 "aarch64_call_insn_operand" "r, Usf")) - (match_operand 1 "" "")) -+ (unspec:DI [(match_operand:DI 2 "const_int_operand")] UNSPEC_CALLEE_ABI) - (clobber (reg:DI LR_REGNUM))] - "" - "@ -@@ -892,15 +967,16 @@ - ) - - (define_expand "call_value" -- [(parallel [(set (match_operand 0 "" "") -- (call (match_operand 1 "memory_operand" "") -- (match_operand 2 "general_operand" ""))) -- (use (match_operand 3 "" "")) -- (clobber (reg:DI LR_REGNUM))])] -+ [(parallel -+ [(set (match_operand 0 "") -+ (call (match_operand 1 "memory_operand") -+ (match_operand 2 "general_operand"))) -+ (unspec:DI [(match_operand 3 "const_int_operand")] UNSPEC_CALLEE_ABI) -+ (clobber (reg:DI LR_REGNUM))])] - "" - " - { -- aarch64_expand_call (operands[0], operands[1], false); -+ aarch64_expand_call (operands[0], operands[1], operands[3], false); - DONE; - }" - ) -@@ -909,6 +985,7 @@ - [(set (match_operand 0 "" "") - (call (mem:DI (match_operand:DI 1 "aarch64_call_insn_operand" "r, Usf")) - (match_operand 2 "" ""))) -+ (unspec:DI [(match_operand:DI 3 "const_int_operand")] UNSPEC_CALLEE_ABI) - (clobber (reg:DI LR_REGNUM))] - "" - "@ -@@ -918,33 +995,36 @@ - ) - - (define_expand "sibcall" -- [(parallel [(call (match_operand 0 "memory_operand" "") -- (match_operand 1 "general_operand" "")) -- (return) -- (use (match_operand 2 "" ""))])] -+ [(parallel -+ [(call (match_operand 0 "memory_operand") -+ (match_operand 1 "general_operand")) -+ (unspec:DI [(match_operand 2 "const_int_operand")] UNSPEC_CALLEE_ABI) -+ (return)])] - "" - { -- aarch64_expand_call (NULL_RTX, operands[0], true); -+ aarch64_expand_call (NULL_RTX, operands[0], operands[2], true); - DONE; - } - ) - - (define_expand "sibcall_value" -- [(parallel [(set (match_operand 0 "" "") -- (call (match_operand 1 "memory_operand" "") -- (match_operand 2 "general_operand" ""))) -- (return) -- (use (match_operand 3 "" ""))])] -+ [(parallel -+ [(set (match_operand 0 "") -+ (call (match_operand 1 "memory_operand") -+ (match_operand 2 "general_operand"))) -+ (unspec:DI [(match_operand 3 "const_int_operand")] UNSPEC_CALLEE_ABI) -+ (return)])] - "" - { -- aarch64_expand_call (operands[0], operands[1], true); -+ aarch64_expand_call (operands[0], operands[1], operands[3], true); - DONE; - } - ) - - (define_insn "*sibcall_insn" - [(call (mem:DI (match_operand:DI 0 "aarch64_call_insn_operand" "Ucs, Usf")) -- (match_operand 1 "" "")) -+ (match_operand 1 "")) -+ (unspec:DI [(match_operand:DI 2 "const_int_operand")] UNSPEC_CALLEE_ABI) - (return)] - "SIBLING_CALL_P (insn)" - "@ -@@ -954,10 +1034,11 @@ - ) - - (define_insn "*sibcall_value_insn" -- [(set (match_operand 0 "" "") -+ [(set (match_operand 0 "") - (call (mem:DI - (match_operand:DI 1 "aarch64_call_insn_operand" "Ucs, Usf")) -- (match_operand 2 "" ""))) -+ (match_operand 2 ""))) -+ (unspec:DI [(match_operand:DI 3 "const_int_operand")] UNSPEC_CALLEE_ABI) - (return)] - "SIBLING_CALL_P (insn)" - "@ -@@ -977,7 +1058,9 @@ - { - int i; - -- emit_call_insn (gen_call (operands[0], const0_rtx, NULL)); -+ /* Untyped calls always use the default ABI. It's only possible to use -+ ABI variants if we know the type of the target function. */ -+ emit_call_insn (gen_call (operands[0], const0_rtx, const0_rtx)); - - for (i = 0; i < XVECLEN (operands[2], 0); i++) - { -@@ -998,8 +1081,8 @@ - ;; ------------------------------------------------------------------- - - (define_expand "mov" -- [(set (match_operand:SHORT 0 "nonimmediate_operand" "") -- (match_operand:SHORT 1 "general_operand" ""))] -+ [(set (match_operand:SHORT 0 "nonimmediate_operand") -+ (match_operand:SHORT 1 "general_operand"))] - "" - " - if (GET_CODE (operands[0]) == MEM && operands[1] != const0_rtx) -@@ -1055,8 +1138,8 @@ - ) - - (define_expand "mov" -- [(set (match_operand:GPI 0 "nonimmediate_operand" "") -- (match_operand:GPI 1 "general_operand" ""))] -+ [(set (match_operand:GPI 0 "nonimmediate_operand") -+ (match_operand:GPI 1 "general_operand"))] - "" - " - if (MEM_P (operands[0]) && !MEM_VOLATILE_P (operands[0]) -@@ -1162,8 +1245,8 @@ - ) - - (define_expand "movti" -- [(set (match_operand:TI 0 "nonimmediate_operand" "") -- (match_operand:TI 1 "general_operand" ""))] -+ [(set (match_operand:TI 0 "nonimmediate_operand") -+ (match_operand:TI 1 "general_operand"))] - "" - " - if (GET_CODE (operands[0]) == MEM && operands[1] != const0_rtx) -@@ -1217,8 +1300,8 @@ - }) - - (define_expand "mov" -- [(set (match_operand:GPF_TF_F16 0 "nonimmediate_operand" "") -- (match_operand:GPF_TF_F16 1 "general_operand" ""))] -+ [(set (match_operand:GPF_TF_F16_MOV 0 "nonimmediate_operand") -+ (match_operand:GPF_TF_F16_MOV 1 "general_operand"))] - "" - { - if (!TARGET_FLOAT) -@@ -1234,11 +1317,11 @@ - } - ) - --(define_insn "*movhf_aarch64" -- [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w , w,?r,w,w ,w ,w,m,r,m ,r") -- (match_operand:HF 1 "general_operand" "Y ,?rY,?r, w,w,Ufc,Uvi,m,w,m,rY,r"))] -- "TARGET_FLOAT && (register_operand (operands[0], HFmode) -- || aarch64_reg_or_fp_zero (operands[1], HFmode))" -+(define_insn "*mov_aarch64" -+ [(set (match_operand:HFBF 0 "nonimmediate_operand" "=w,w , w,?r,w,w ,w ,w,m,r,m ,r") -+ (match_operand:HFBF 1 "general_operand" "Y ,?rY,?r, w,w,Ufc,Uvi,m,w,m,rY,r"))] -+ "TARGET_FLOAT && (register_operand (operands[0], mode) -+ || aarch64_reg_or_fp_zero (operands[1], mode))" - "@ - movi\\t%0.4h, #0 - fmov\\t%h0, %w1 -@@ -1363,17 +1446,17 @@ - - ;; 0 is dst - ;; 1 is src --;; 2 is size of move in bytes -+;; 2 is size of copy in bytes - ;; 3 is alignment - --(define_expand "movmemdi" -+(define_expand "cpymemdi" - [(match_operand:BLK 0 "memory_operand") - (match_operand:BLK 1 "memory_operand") - (match_operand:DI 2 "immediate_operand") - (match_operand:DI 3 "immediate_operand")] - "!STRICT_ALIGNMENT" - { -- if (aarch64_expand_movmem (operands)) -+ if (aarch64_expand_cpymem (operands)) - DONE; - FAIL; - } -@@ -1492,8 +1575,8 @@ - (mem:GPI (plus:P (match_dup 1) - (match_operand:P 5 "const_int_operand" "n"))))])] - "INTVAL (operands[5]) == GET_MODE_SIZE (mode)" -- "ldp\\t%2, %3, [%1], %4" -- [(set_attr "type" "load_")] -+ "ldp\\t%2, %3, [%1], %4" -+ [(set_attr "type" "load_")] - ) - - (define_insn "loadwb_pair_" -@@ -1507,7 +1590,7 @@ - (mem:GPF (plus:P (match_dup 1) - (match_operand:P 5 "const_int_operand" "n"))))])] - "INTVAL (operands[5]) == GET_MODE_SIZE (mode)" -- "ldp\\t%2, %3, [%1], %4" -+ "ldp\\t%2, %3, [%1], %4" - [(set_attr "type" "neon_load1_2reg")] - ) - -@@ -1540,8 +1623,8 @@ - (match_operand:P 5 "const_int_operand" "n"))) - (match_operand:GPI 3 "register_operand" "r"))])] - "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (mode)" -- "stp\\t%2, %3, [%0, %4]!" -- [(set_attr "type" "store_")] -+ "stp\\t%2, %3, [%0, %4]!" -+ [(set_attr "type" "store_")] - ) - - (define_insn "storewb_pair_" -@@ -1556,7 +1639,7 @@ - (match_operand:P 5 "const_int_operand" "n"))) - (match_operand:GPF 3 "register_operand" "w"))])] - "INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (mode)" -- "stp\\t%2, %3, [%0, %4]!" -+ "stp\\t%2, %3, [%0, %4]!" - [(set_attr "type" "neon_store1_2reg")] - ) - -@@ -1702,9 +1785,9 @@ - - (define_expand "add3" - [(set -- (match_operand:GPI 0 "register_operand" "") -- (plus:GPI (match_operand:GPI 1 "register_operand" "") -- (match_operand:GPI 2 "aarch64_pluslong_or_poly_operand" "")))] -+ (match_operand:GPI 0 "register_operand") -+ (plus:GPI (match_operand:GPI 1 "register_operand") -+ (match_operand:GPI 2 "aarch64_pluslong_or_poly_operand")))] - "" - { - /* If operands[1] is a subreg extract the inner RTX. */ -@@ -1713,6 +1796,7 @@ - /* If the constant is too large for a single instruction and isn't frame - based, split off the immediate so it is available for CSE. */ - if (!aarch64_plus_immediate (operands[2], mode) -+ && !(TARGET_SVE && aarch64_sve_plus_immediate (operands[2], mode)) - && can_create_pseudo_p () - && (!REG_P (op1) - || !REGNO_PTR_FRAME_P (REGNO (op1)))) -@@ -1730,10 +1814,10 @@ - - (define_insn "*add3_aarch64" - [(set -- (match_operand:GPI 0 "register_operand" "=rk,rk,w,rk,r,rk") -+ (match_operand:GPI 0 "register_operand" "=rk,rk,w,rk,r,r,rk") - (plus:GPI -- (match_operand:GPI 1 "register_operand" "%rk,rk,w,rk,rk,rk") -- (match_operand:GPI 2 "aarch64_pluslong_operand" "I,r,w,J,Uaa,Uav")))] -+ (match_operand:GPI 1 "register_operand" "%rk,rk,w,rk,rk,0,rk") -+ (match_operand:GPI 2 "aarch64_pluslong_operand" "I,r,w,J,Uaa,Uai,Uav")))] - "" - "@ - add\\t%0, %1, %2 -@@ -1741,10 +1825,11 @@ - add\\t%0, %1, %2 - sub\\t%0, %1, #%n2 - # -- * return aarch64_output_sve_addvl_addpl (operands[0], operands[1], operands[2]);" -- ;; The "alu_imm" type for ADDVL/ADDPL is just a placeholder. -- [(set_attr "type" "alu_imm,alu_sreg,neon_add,alu_imm,multiple,alu_imm") -- (set_attr "arch" "*,*,simd,*,*,*")] -+ * return aarch64_output_sve_scalar_inc_dec (operands[2]); -+ * return aarch64_output_sve_addvl_addpl (operands[2]);" -+ ;; The "alu_imm" types for INC/DEC and ADDVL/ADDPL are just placeholders. -+ [(set_attr "type" "alu_imm,alu_sreg,neon_add,alu_imm,multiple,alu_imm,alu_imm") -+ (set_attr "arch" "*,*,simd,*,*,sve,sve")] - ) - - ;; zero_extend version of above -@@ -1823,17 +1908,18 @@ - ;; this pattern. - (define_insn_and_split "*add3_poly_1" - [(set -- (match_operand:GPI 0 "register_operand" "=r,r,r,r,r,&r") -+ (match_operand:GPI 0 "register_operand" "=r,r,r,r,r,r,&r") - (plus:GPI -- (match_operand:GPI 1 "register_operand" "%rk,rk,rk,rk,rk,rk") -- (match_operand:GPI 2 "aarch64_pluslong_or_poly_operand" "I,r,J,Uaa,Uav,Uat")))] -+ (match_operand:GPI 1 "register_operand" "%rk,rk,rk,rk,rk,0,rk") -+ (match_operand:GPI 2 "aarch64_pluslong_or_poly_operand" "I,r,J,Uaa,Uav,Uai,Uat")))] - "TARGET_SVE && operands[0] != stack_pointer_rtx" - "@ - add\\t%0, %1, %2 - add\\t%0, %1, %2 - sub\\t%0, %1, #%n2 - # -- * return aarch64_output_sve_addvl_addpl (operands[0], operands[1], operands[2]); -+ * return aarch64_output_sve_scalar_inc_dec (operands[2]); -+ * return aarch64_output_sve_addvl_addpl (operands[2]); - #" - "&& epilogue_completed - && !reg_overlap_mentioned_p (operands[0], operands[1]) -@@ -1844,8 +1930,8 @@ - operands[2], operands[0], NULL_RTX); - DONE; - } -- ;; The "alu_imm" type for ADDVL/ADDPL is just a placeholder. -- [(set_attr "type" "alu_imm,alu_sreg,alu_imm,multiple,alu_imm,multiple")] -+ ;; The "alu_imm" types for INC/DEC and ADDVL/ADDPL are just placeholders. -+ [(set_attr "type" "alu_imm,alu_sreg,alu_imm,multiple,alu_imm,alu_imm,multiple")] - ) - - (define_split -@@ -1897,9 +1983,9 @@ - }) - - (define_expand "addti3" -- [(set (match_operand:TI 0 "register_operand" "") -- (plus:TI (match_operand:TI 1 "register_operand" "") -- (match_operand:TI 2 "aarch64_reg_or_imm" "")))] -+ [(set (match_operand:TI 0 "register_operand") -+ (plus:TI (match_operand:TI 1 "register_operand") -+ (match_operand:TI 2 "aarch64_reg_or_imm")))] - "" - { - rtx low_dest, op1_low, op2_low, high_dest, op1_high, op2_high; -@@ -1930,9 +2016,9 @@ - }) - - (define_expand "addvti4" -- [(match_operand:TI 0 "register_operand" "") -- (match_operand:TI 1 "register_operand" "") -- (match_operand:TI 2 "aarch64_reg_or_imm" "") -+ [(match_operand:TI 0 "register_operand") -+ (match_operand:TI 1 "register_operand") -+ (match_operand:TI 2 "aarch64_reg_or_imm") - (label_ref (match_operand 3 "" ""))] - "" - { -@@ -1964,9 +2050,9 @@ - }) - - (define_expand "uaddvti4" -- [(match_operand:TI 0 "register_operand" "") -- (match_operand:TI 1 "register_operand" "") -- (match_operand:TI 2 "aarch64_reg_or_imm" "") -+ [(match_operand:TI 0 "register_operand") -+ (match_operand:TI 1 "register_operand") -+ (match_operand:TI 2 "aarch64_reg_or_imm") - (label_ref (match_operand 3 "" ""))] - "" - { -@@ -2501,9 +2587,9 @@ - (plus: - (match_dup 4) - (zero_extend: -- (match_operand:GPI 1 "register_operand" ""))) -+ (match_operand:GPI 1 "register_operand"))) - (zero_extend: -- (match_operand:GPI 2 "register_operand" ""))) -+ (match_operand:GPI 2 "register_operand"))) - (match_dup 6))) - (set (match_operand:GPI 0 "register_operand") - (plus:GPI -@@ -2564,9 +2650,9 @@ - (plus: - (match_dup 3) - (sign_extend: -- (match_operand:GPI 1 "register_operand" ""))) -+ (match_operand:GPI 1 "register_operand"))) - (sign_extend: -- (match_operand:GPI 2 "register_operand" ""))) -+ (match_operand:GPI 2 "register_operand"))) - (sign_extend: - (plus:GPI - (plus:GPI (match_dup 4) (match_dup 1)) -@@ -2835,9 +2921,9 @@ - }) - - (define_expand "subti3" -- [(set (match_operand:TI 0 "register_operand" "") -- (minus:TI (match_operand:TI 1 "aarch64_reg_or_zero" "") -- (match_operand:TI 2 "register_operand" "")))] -+ [(set (match_operand:TI 0 "register_operand") -+ (minus:TI (match_operand:TI 1 "aarch64_reg_or_zero") -+ (match_operand:TI 2 "register_operand")))] - "" - { - rtx low_dest, op1_low, op2_low, high_dest, op1_high, op2_high; -@@ -3285,12 +3371,12 @@ - [(set (reg:CC CC_REGNUM) - (compare:CC - (zero_extend: -- (match_operand:GPI 1 "aarch64_reg_or_zero" "")) -+ (match_operand:GPI 1 "aarch64_reg_or_zero")) - (plus: - (zero_extend: -- (match_operand:GPI 2 "register_operand" "")) -+ (match_operand:GPI 2 "register_operand")) - (ltu: (reg:CC CC_REGNUM) (const_int 0))))) -- (set (match_operand:GPI 0 "register_operand" "") -+ (set (match_operand:GPI 0 "register_operand") - (minus:GPI - (minus:GPI (match_dup 1) (match_dup 2)) - (ltu:GPI (reg:CC CC_REGNUM) (const_int 0))))])] -@@ -3353,16 +3439,16 @@ - (compare:CC_V - (minus: - (sign_extend: -- (match_operand:GPI 1 "aarch64_reg_or_zero" "")) -+ (match_operand:GPI 1 "aarch64_reg_or_zero")) - (plus: - (sign_extend: -- (match_operand:GPI 2 "register_operand" "")) -+ (match_operand:GPI 2 "register_operand")) - (ltu: (reg:CC CC_REGNUM) (const_int 0)))) - (sign_extend: - (minus:GPI (match_dup 1) - (plus:GPI (ltu:GPI (reg:CC CC_REGNUM) (const_int 0)) - (match_dup 2)))))) -- (set (match_operand:GPI 0 "register_operand" "") -+ (set (match_operand:GPI 0 "register_operand") - (minus:GPI - (minus:GPI (match_dup 1) (match_dup 2)) - (ltu:GPI (reg:CC CC_REGNUM) (const_int 0))))])] -@@ -3475,8 +3561,8 @@ - ) - - (define_expand "abs2" -- [(match_operand:GPI 0 "register_operand" "") -- (match_operand:GPI 1 "register_operand" "")] -+ [(match_operand:GPI 0 "register_operand") -+ (match_operand:GPI 1 "register_operand")] - "" - { - rtx ccreg = aarch64_gen_compare_reg (LT, operands[1], const0_rtx); -@@ -3889,10 +3975,10 @@ - ;; ------------------------------------------------------------------- - - (define_expand "cstore4" -- [(set (match_operand:SI 0 "register_operand" "") -+ [(set (match_operand:SI 0 "register_operand") - (match_operator:SI 1 "aarch64_comparison_operator" -- [(match_operand:GPI 2 "register_operand" "") -- (match_operand:GPI 3 "aarch64_plus_operand" "")]))] -+ [(match_operand:GPI 2 "register_operand") -+ (match_operand:GPI 3 "aarch64_plus_operand")]))] - "" - " - operands[2] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2], -@@ -3914,10 +4000,10 @@ - - - (define_expand "cstore4" -- [(set (match_operand:SI 0 "register_operand" "") -+ [(set (match_operand:SI 0 "register_operand") - (match_operator:SI 1 "aarch64_comparison_operator_mode" -- [(match_operand:GPF 2 "register_operand" "") -- (match_operand:GPF 3 "aarch64_fp_compare_operand" "")]))] -+ [(match_operand:GPF 2 "register_operand") -+ (match_operand:GPF 3 "aarch64_fp_compare_operand")]))] - "" - " - operands[2] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2], -@@ -4002,13 +4088,13 @@ - ) - - (define_expand "cmov6" -- [(set (match_operand:GPI 0 "register_operand" "") -+ [(set (match_operand:GPI 0 "register_operand") - (if_then_else:GPI - (match_operator 1 "aarch64_comparison_operator" -- [(match_operand:GPI 2 "register_operand" "") -- (match_operand:GPI 3 "aarch64_plus_operand" "")]) -- (match_operand:GPI 4 "register_operand" "") -- (match_operand:GPI 5 "register_operand" "")))] -+ [(match_operand:GPI 2 "register_operand") -+ (match_operand:GPI 3 "aarch64_plus_operand")]) -+ (match_operand:GPI 4 "register_operand") -+ (match_operand:GPI 5 "register_operand")))] - "" - " - operands[2] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2], -@@ -4018,13 +4104,13 @@ - ) - - (define_expand "cmov6" -- [(set (match_operand:GPF 0 "register_operand" "") -+ [(set (match_operand:GPF 0 "register_operand") - (if_then_else:GPF - (match_operator 1 "aarch64_comparison_operator" -- [(match_operand:GPF 2 "register_operand" "") -- (match_operand:GPF 3 "aarch64_fp_compare_operand" "")]) -- (match_operand:GPF 4 "register_operand" "") -- (match_operand:GPF 5 "register_operand" "")))] -+ [(match_operand:GPF 2 "register_operand") -+ (match_operand:GPF 3 "aarch64_fp_compare_operand")]) -+ (match_operand:GPF 4 "register_operand") -+ (match_operand:GPF 5 "register_operand")))] - "" - " - operands[2] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2], -@@ -4102,10 +4188,10 @@ - ) - - (define_expand "movcc" -- [(set (match_operand:ALLI 0 "register_operand" "") -- (if_then_else:ALLI (match_operand 1 "aarch64_comparison_operator" "") -- (match_operand:ALLI 2 "register_operand" "") -- (match_operand:ALLI 3 "register_operand" "")))] -+ [(set (match_operand:ALLI 0 "register_operand") -+ (if_then_else:ALLI (match_operand 1 "aarch64_comparison_operator") -+ (match_operand:ALLI 2 "register_operand") -+ (match_operand:ALLI 3 "register_operand")))] - "" - { - rtx ccreg; -@@ -4121,10 +4207,10 @@ - ) - - (define_expand "movcc" -- [(set (match_operand:GPI 0 "register_operand" "") -- (if_then_else:GPI (match_operand 1 "aarch64_comparison_operator" "") -- (match_operand:GPF 2 "register_operand" "") -- (match_operand:GPF 3 "register_operand" "")))] -+ [(set (match_operand:GPI 0 "register_operand") -+ (if_then_else:GPI (match_operand 1 "aarch64_comparison_operator") -+ (match_operand:GPF 2 "register_operand") -+ (match_operand:GPF 3 "register_operand")))] - "" - { - rtx ccreg; -@@ -4140,10 +4226,10 @@ - ) - - (define_expand "movcc" -- [(set (match_operand:GPF 0 "register_operand" "") -- (if_then_else:GPF (match_operand 1 "aarch64_comparison_operator" "") -- (match_operand:GPF 2 "register_operand" "") -- (match_operand:GPF 3 "register_operand" "")))] -+ [(set (match_operand:GPF 0 "register_operand") -+ (if_then_else:GPF (match_operand 1 "aarch64_comparison_operator") -+ (match_operand:GPF 2 "register_operand") -+ (match_operand:GPF 3 "register_operand")))] - "" - { - rtx ccreg; -@@ -4159,10 +4245,10 @@ - ) - - (define_expand "cc" -- [(set (match_operand:GPI 0 "register_operand" "") -- (if_then_else:GPI (match_operand 1 "aarch64_comparison_operator" "") -- (NEG_NOT:GPI (match_operand:GPI 2 "register_operand" "")) -- (match_operand:GPI 3 "register_operand" "")))] -+ [(set (match_operand:GPI 0 "register_operand") -+ (if_then_else:GPI (match_operand 1 "aarch64_comparison_operator") -+ (NEG_NOT:GPI (match_operand:GPI 2 "register_operand")) -+ (match_operand:GPI 3 "register_operand")))] - "" - { - rtx ccreg; -@@ -4769,7 +4855,7 @@ - [(set_attr "type" "alus_imm")] - ) - --(define_insn "*ands_compare0" -+(define_insn "*ands_compare0" - [(set (reg:CC_NZ CC_REGNUM) - (compare:CC_NZ - (zero_extend:GPI (match_operand:SHORT 1 "register_operand" "r")) -@@ -5391,7 +5477,7 @@ - ;; ------------------------------------------------------------------- - - (define_expand "" -- [(set (match_operand:DI 0 "register_operand" "=r") -+ [(set (match_operand:DI 0 "register_operand") - (ANY_EXTRACT:DI (match_operand:DI 1 "register_operand") - (match_operand 2 - "aarch64_simd_shift_imm_offset_di") -@@ -5647,6 +5733,21 @@ - [(set_attr "type" "bfx")] - ) - -+(define_insn "*ashiftsi_extvdi_bfiz" -+ [(set (match_operand:SI 0 "register_operand" "=r") -+ (ashift:SI -+ (match_operator:SI 4 "subreg_lowpart_operator" -+ [(sign_extract:DI -+ (match_operand:DI 1 "register_operand" "r") -+ (match_operand 2 "aarch64_simd_shift_imm_offset_si") -+ (const_int 0))]) -+ (match_operand 3 "aarch64_simd_shift_imm_si")))] -+ "IN_RANGE (INTVAL (operands[2]) + INTVAL (operands[3]), -+ 1, GET_MODE_BITSIZE (SImode) - 1)" -+ "sbfiz\\t%w0, %w1, %3, %2" -+ [(set_attr "type" "bfx")] -+) -+ - ;; When the bit position and width of the equivalent extraction add up to 32 - ;; we can use a W-reg LSL instruction taking advantage of the implicit - ;; zero-extension of the X-reg. -@@ -6008,6 +6109,44 @@ - [(set_attr "type" "f_cvtf2i")] - ) - -+;; Equal width integer to fp and multiply combine. -+(define_insn "*aarch64_cvtf2_mult" -+ [(set (match_operand:GPF 0 "register_operand" "=w,w") -+ (mult:GPF (FLOATUORS:GPF -+ (match_operand: 1 "register_operand" "w,?r")) -+ (match_operand:GPF 2 "aarch64_fp_pow2_recip" "Dt,Dt")))] -+ "TARGET_FLOAT" -+ { -+ operands[2] = GEN_INT (aarch64_fpconst_pow2_recip (operands[2])); -+ switch (which_alternative) -+ { -+ case 0: -+ return "cvtf\t%0, %1, #%2"; -+ case 1: -+ return "cvtf\t%0, %1, #%2"; -+ default: -+ gcc_unreachable (); -+ } -+ } -+ [(set_attr "type" "neon_int_to_fp_,f_cvti2f") -+ (set_attr "arch" "simd,fp")] -+) -+ -+;; Unequal width integer to fp and multiply combine. -+(define_insn "*aarch64_cvtf2_mult" -+ [(set (match_operand:GPF 0 "register_operand" "=w") -+ (mult:GPF (FLOATUORS:GPF -+ (match_operand: 1 "register_operand" "r")) -+ (match_operand:GPF 2 "aarch64_fp_pow2_recip" "Dt")))] -+ "TARGET_FLOAT" -+ { -+ operands[2] = GEN_INT (aarch64_fpconst_pow2_recip (operands[2])); -+ return "cvtf\t%0, %1, #%2"; -+ } -+ [(set_attr "type" "f_cvti2f")] -+) -+ -+;; Equal width integer to fp conversion. - (define_insn "2" - [(set (match_operand:GPF 0 "register_operand" "=w,w") - (FLOATUORS:GPF (match_operand: 1 "register_operand" "w,?r")))] -@@ -6019,6 +6158,7 @@ - (set_attr "arch" "simd,fp")] - ) - -+;; Unequal width integer to fp conversions. - (define_insn "2" - [(set (match_operand:GPF 0 "register_operand" "=w") - (FLOATUORS:GPF (match_operand: 1 "register_operand" "r")))] -@@ -6241,8 +6381,8 @@ - ) - - (define_expand "sqrt2" -- [(set (match_operand:GPF_F16 0 "register_operand" "=w") -- (sqrt:GPF_F16 (match_operand:GPF_F16 1 "register_operand" "w")))] -+ [(set (match_operand:GPF_F16 0 "register_operand") -+ (sqrt:GPF_F16 (match_operand:GPF_F16 1 "register_operand")))] - "TARGET_FLOAT" - { - if (aarch64_emit_approx_sqrt (operands[0], operands[1], false)) -@@ -6401,6 +6541,7 @@ - ;; ------------------------------------------------------------------- - ;; Reload Scalar Floating point modes from constant pool. - ;; The AArch64 port doesn't have __int128 constant move support. -+;; The patterns need constraints due to TARGET_SECONDARY_RELOAD hook. - (define_expand "@aarch64_reload_movcp" - [(set (match_operand:GPF_TF 0 "register_operand" "=w") - (mem:GPF_TF (match_operand 1 "aarch64_constant_pool_symref" "S"))) -@@ -6501,9 +6642,9 @@ - ;; rodata section. - - (define_expand "add_losym" -- [(set (match_operand 0 "register_operand" "=r") -- (lo_sum (match_operand 1 "register_operand" "r") -- (match_operand 2 "aarch64_valid_symref" "S")))] -+ [(set (match_operand 0 "register_operand") -+ (lo_sum (match_operand 1 "register_operand") -+ (match_operand 2 "aarch64_valid_symref")))] - "" - { - machine_mode mode = GET_MODE (operands[0]); -@@ -6602,9 +6743,10 @@ - ;; instructions in the TLS stubs, in order to enable linker relaxation. - ;; Therefore we treat the stubs as an atomic sequence. - (define_expand "tlsgd_small_" -- [(parallel [(set (match_operand 0 "register_operand" "") -+ [(parallel [(set (match_operand 0 "register_operand") - (call (mem:DI (match_dup 2)) (const_int 1))) -- (unspec:DI [(match_operand:PTR 1 "aarch64_valid_symref" "")] UNSPEC_GOTSMALLTLS) -+ (unspec:DI [(const_int 0)] UNSPEC_CALLEE_ABI) -+ (unspec:DI [(match_operand:PTR 1 "aarch64_valid_symref")] UNSPEC_GOTSMALLTLS) - (clobber (reg:DI LR_REGNUM))])] - "" - { -@@ -6614,6 +6756,7 @@ - (define_insn "*tlsgd_small_" - [(set (match_operand 0 "register_operand" "") - (call (mem:DI (match_operand:DI 2 "" "")) (const_int 1))) -+ (unspec:DI [(const_int 0)] UNSPEC_CALLEE_ABI) - (unspec:DI [(match_operand:PTR 1 "aarch64_valid_symref" "S")] UNSPEC_GOTSMALLTLS) - (clobber (reg:DI LR_REGNUM)) - ] -@@ -6714,7 +6857,12 @@ - "TARGET_TLS_DESC" - { - if (TARGET_SVE) -- emit_insn (gen_tlsdesc_small_sve_ (operands[0])); -+ { -+ rtx abi = gen_int_mode (aarch64_tlsdesc_abi_id (), DImode); -+ rtx_insn *call -+ = emit_call_insn (gen_tlsdesc_small_sve_ (operands[0], abi)); -+ RTL_CONST_CALL_P (call) = 1; -+ } - else - emit_insn (gen_tlsdesc_small_advsimd_ (operands[0])); - DONE; -@@ -6729,72 +6877,27 @@ - UNSPEC_TLSDESC)) - (clobber (reg:DI LR_REGNUM)) - (clobber (reg:CC CC_REGNUM)) -- (clobber (match_scratch:DI 1 "=r"))] -+ (clobber (match_scratch:DI 1 "=r")) -+ (use (reg:DI FP_REGNUM))] - "TARGET_TLS_DESC && !TARGET_SVE" - "adrp\\tx0, %A0\;ldr\\t%1, [x0, #%L0]\;add\\t0, 0, %L0\;.tlsdesccall\\t%0\;blr\\t%1" - [(set_attr "type" "call") - (set_attr "length" "16")]) - --;; For SVE, model tlsdesc calls as clobbering the lower 128 bits of --;; all vector registers, and clobber all predicate registers, on --;; top of the usual R0 and LR. -+;; For SVE, model tlsdesc calls as normal calls, with the callee ABI -+;; describing the extra call-preserved guarantees. This would work -+;; for non-SVE too, but avoiding a call is probably better if we can. - (define_insn "tlsdesc_small_sve_" - [(set (reg:PTR R0_REGNUM) -- (unspec:PTR [(match_operand 0 "aarch64_valid_symref" "S")] -- UNSPEC_TLSDESC)) -+ (call (mem:DI (unspec:PTR -+ [(match_operand 0 "aarch64_valid_symref")] -+ UNSPEC_TLSDESC)) -+ (const_int 0))) -+ (unspec:DI [(match_operand:DI 1 "const_int_operand")] UNSPEC_CALLEE_ABI) - (clobber (reg:DI LR_REGNUM)) -- (clobber (reg:CC CC_REGNUM)) -- (clobber_high (reg:TI V0_REGNUM)) -- (clobber_high (reg:TI V1_REGNUM)) -- (clobber_high (reg:TI V2_REGNUM)) -- (clobber_high (reg:TI V3_REGNUM)) -- (clobber_high (reg:TI V4_REGNUM)) -- (clobber_high (reg:TI V5_REGNUM)) -- (clobber_high (reg:TI V6_REGNUM)) -- (clobber_high (reg:TI V7_REGNUM)) -- (clobber_high (reg:TI V8_REGNUM)) -- (clobber_high (reg:TI V9_REGNUM)) -- (clobber_high (reg:TI V10_REGNUM)) -- (clobber_high (reg:TI V11_REGNUM)) -- (clobber_high (reg:TI V12_REGNUM)) -- (clobber_high (reg:TI V13_REGNUM)) -- (clobber_high (reg:TI V14_REGNUM)) -- (clobber_high (reg:TI V15_REGNUM)) -- (clobber_high (reg:TI V16_REGNUM)) -- (clobber_high (reg:TI V17_REGNUM)) -- (clobber_high (reg:TI V18_REGNUM)) -- (clobber_high (reg:TI V19_REGNUM)) -- (clobber_high (reg:TI V20_REGNUM)) -- (clobber_high (reg:TI V21_REGNUM)) -- (clobber_high (reg:TI V22_REGNUM)) -- (clobber_high (reg:TI V23_REGNUM)) -- (clobber_high (reg:TI V24_REGNUM)) -- (clobber_high (reg:TI V25_REGNUM)) -- (clobber_high (reg:TI V26_REGNUM)) -- (clobber_high (reg:TI V27_REGNUM)) -- (clobber_high (reg:TI V28_REGNUM)) -- (clobber_high (reg:TI V29_REGNUM)) -- (clobber_high (reg:TI V30_REGNUM)) -- (clobber_high (reg:TI V31_REGNUM)) -- (clobber (reg:VNx2BI P0_REGNUM)) -- (clobber (reg:VNx2BI P1_REGNUM)) -- (clobber (reg:VNx2BI P2_REGNUM)) -- (clobber (reg:VNx2BI P3_REGNUM)) -- (clobber (reg:VNx2BI P4_REGNUM)) -- (clobber (reg:VNx2BI P5_REGNUM)) -- (clobber (reg:VNx2BI P6_REGNUM)) -- (clobber (reg:VNx2BI P7_REGNUM)) -- (clobber (reg:VNx2BI P8_REGNUM)) -- (clobber (reg:VNx2BI P9_REGNUM)) -- (clobber (reg:VNx2BI P10_REGNUM)) -- (clobber (reg:VNx2BI P11_REGNUM)) -- (clobber (reg:VNx2BI P12_REGNUM)) -- (clobber (reg:VNx2BI P13_REGNUM)) -- (clobber (reg:VNx2BI P14_REGNUM)) -- (clobber (reg:VNx2BI P15_REGNUM)) -- (clobber (match_scratch:DI 1 "=r"))] -+ (clobber (match_scratch:DI 2 "=r"))] - "TARGET_TLS_DESC && TARGET_SVE" -- "adrp\\tx0, %A0\;ldr\\t%1, [x0, #%L0]\;add\\t0, 0, %L0\;.tlsdesccall\\t%0\;blr\\t%1" -+ "adrp\\tx0, %A0\;ldr\\t%2, [x0, #%L0]\;add\\t0, 0, %L0\;.tlsdesccall\\t%0\;blr\\t%2" - [(set_attr "type" "call") - (set_attr "length" "16")]) - -@@ -6808,6 +6911,15 @@ - [(set_attr "length" "0")] - ) - -+(define_insn "aarch64_fjcvtzs" -+ [(set (match_operand:SI 0 "register_operand" "=r") -+ (unspec:SI [(match_operand:DF 1 "register_operand" "w")] -+ UNSPEC_FJCVTZS))] -+ "TARGET_JSCVT" -+ "fjcvtzs\\t%w0, %d1" -+ [(set_attr "type" "f_cvtf2i")] -+) -+ - ;; Pointer authentication patterns are always provided. In architecture - ;; revisions prior to ARMv8.3-A these HINT instructions operate as NOPs. - ;; This lets the user write portable software which authenticates pointers -@@ -6821,7 +6933,7 @@ - [(set (reg:DI R30_REGNUM) - (unspec:DI [(reg:DI R30_REGNUM) (reg:DI SP_REGNUM)] PAUTH_LR_SP))] - "" -- "hint\t // asp"; -+ "hint\t // sp"; - ) - - ;; Signing/Authenticating X17 using X16 as the salt. -@@ -6830,7 +6942,7 @@ - [(set (reg:DI R17_REGNUM) - (unspec:DI [(reg:DI R17_REGNUM) (reg:DI R16_REGNUM)] PAUTH_17_16))] - "" -- "hint\t // a1716"; -+ "hint\t // 1716"; - ) - - ;; Stripping the signature in R30. -@@ -6885,7 +6997,7 @@ - - ;; Named pattern for expanding thread pointer reference. - (define_expand "get_thread_pointerdi" -- [(match_operand:DI 0 "register_operand" "=r")] -+ [(match_operand:DI 0 "register_operand")] - "" - { - rtx tmp = aarch64_load_tp (operands[0]); -@@ -6941,13 +7053,15 @@ - } - [(set_attr "type" "mrs")]) - -+;; DO NOT SPLIT THIS PATTERN. It is important for security reasons that the -+;; canary value does not live beyond the life of this sequence. - (define_insn "stack_protect_set_" - [(set (match_operand:PTR 0 "memory_operand" "=m") - (unspec:PTR [(match_operand:PTR 1 "memory_operand" "m")] - UNSPEC_SP_SET)) - (set (match_scratch:PTR 2 "=&r") (const_int 0))] - "" -- "ldr\\t%2, %1\;str\\t%2, %0\;mov\t%2,0" -+ "ldr\\t%2, %1\;str\\t%2, %0\;mov\t%2, 0" - [(set_attr "length" "12") - (set_attr "type" "multiple")]) - -@@ -7122,12 +7236,6 @@ - [(set_attr "type" "no_insn")] - ) - --;; Helper for aarch64.c code. --(define_expand "set_clobber_cc" -- [(parallel [(set (match_operand 0) -- (match_operand 1)) -- (clobber (reg:CC CC_REGNUM))])]) -- - ;; Hard speculation barrier. - (define_insn "speculation_barrier" - [(unspec_volatile [(const_int 0)] UNSPECV_SPECULATION_BARRIER)] -@@ -7142,10 +7250,10 @@ - ;; tracking enabled. Use the speculation tracker to decide whether to - ;; copy operand 1 to the target, or to copy the fail value (operand 2). - (define_expand "@despeculate_copy" -- [(set (match_operand:ALLI_TI 0 "register_operand" "=r") -+ [(set (match_operand:ALLI_TI 0 "register_operand") - (unspec_volatile:ALLI_TI -- [(match_operand:ALLI_TI 1 "register_operand" "r") -- (match_operand:ALLI_TI 2 "aarch64_reg_or_zero" "rZ") -+ [(match_operand:ALLI_TI 1 "register_operand") -+ (match_operand:ALLI_TI 2 "aarch64_reg_or_zero") - (use (reg:DI SPECULATION_TRACKER_REGNUM)) - (clobber (reg:CC CC_REGNUM))] UNSPECV_SPECULATION_BARRIER))] - "" -@@ -7235,6 +7343,73 @@ - (set_attr "speculation_barrier" "true")] - ) - -+(define_insn "aarch64_" -+ [(set (match_operand:VSFDF 0 "register_operand" "=w") -+ (unspec:VSFDF [(match_operand:VSFDF 1 "register_operand" "w")] -+ FRINTNZX))] -+ "TARGET_FRINT && TARGET_FLOAT -+ && !(VECTOR_MODE_P (mode) && !TARGET_SIMD)" -+ "\\t%0, %1" -+ [(set_attr "type" "f_rint")] -+) -+ -+;; Transactional Memory Extension (TME) instructions. -+ -+(define_insn "tstart" -+ [(set (match_operand:DI 0 "register_operand" "=r") -+ (unspec_volatile:DI [(const_int 0)] UNSPECV_TSTART)) -+ (clobber (mem:BLK (scratch)))] -+ "TARGET_TME" -+ "tstart\\t%0" -+ [(set_attr "type" "tme")] -+) -+ -+(define_insn "ttest" -+ [(set (match_operand:DI 0 "register_operand" "=r") -+ (unspec_volatile:DI [(const_int 0)] UNSPEC_TTEST)) -+ (clobber (mem:BLK (scratch)))] -+ "TARGET_TME" -+ "ttest\\t%0" -+ [(set_attr "type" "tme")] -+) -+ -+(define_insn "tcommit" -+ [(unspec_volatile:BLK [(const_int 0)] UNSPECV_TCOMMIT) -+ (clobber (mem:BLK (scratch)))] -+ "TARGET_TME" -+ "tcommit" -+ [(set_attr "type" "tme")] -+) -+ -+(define_insn "tcancel" -+ [(unspec_volatile:BLK -+ [(match_operand 0 "const_int_operand" "n")] UNSPECV_TCANCEL) -+ (clobber (mem:BLK (scratch)))] -+ "TARGET_TME && (UINTVAL (operands[0]) <= 65535)" -+ "tcancel\\t#%0" -+ [(set_attr "type" "tme")] -+) -+ -+(define_insn "aarch64_rndr" -+ [(set (match_operand:DI 0 "register_operand" "=r") -+ (unspec_volatile:DI [(const_int 0)] UNSPEC_RNDR)) -+ (set (reg:CC_Z CC_REGNUM) -+ (unspec_volatile:CC_Z [(const_int 0)] UNSPEC_RNDR))] -+ "TARGET_RNG" -+ "mrs\t%0, RNDR" -+ [(set_attr "type" "mrs")] -+) -+ -+(define_insn "aarch64_rndrrs" -+ [(set (match_operand:DI 0 "register_operand" "=r") -+ (unspec_volatile:DI [(const_int 0)] UNSPEC_RNDRRS)) -+ (set (reg:CC_Z CC_REGNUM) -+ (unspec_volatile:CC_Z [(const_int 0)] UNSPEC_RNDRRS))] -+ "TARGET_RNG" -+ "mrs\t%0, RNDRRS" -+ [(set_attr "type" "mrs")] -+) -+ - ;; AdvSIMD Stuff - (include "aarch64-simd.md") - -diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt -index d2cb41be6..e2be8ff6f 100644 ---- a/gcc/config/aarch64/aarch64.opt -+++ b/gcc/config/aarch64/aarch64.opt -@@ -31,7 +31,7 @@ TargetSave - const char *x_aarch64_override_tune_string - - TargetVariable --unsigned long aarch64_isa_flags = 0 -+uint64_t aarch64_isa_flags = 0 - - TargetVariable - unsigned aarch64_enable_bti = 2 -@@ -261,3 +261,6 @@ user-land code. - TargetVariable - long aarch64_stack_protector_guard_offset = 0 - -+moutline-atomics -+Target Report Mask(OUTLINE_ATOMICS) Save -+Generate local calls to out-of-line atomic operations. -diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h -index 534a989c3..2284e7164 100644 ---- a/gcc/config/aarch64/arm_acle.h -+++ b/gcc/config/aarch64/arm_acle.h -@@ -29,14 +29,77 @@ - - #include - --#pragma GCC push_options -- --#pragma GCC target ("+nothing+crc") -- - #ifdef __cplusplus - extern "C" { - #endif - -+#pragma GCC push_options -+#pragma GCC target ("arch=armv8.3-a") -+__extension__ static __inline int32_t __attribute__ ((__always_inline__)) -+__jcvt (double __a) -+{ -+ return __builtin_aarch64_jcvtzs (__a); -+} -+ -+#pragma GCC pop_options -+ -+#pragma GCC push_options -+#pragma GCC target ("arch=armv8.5-a") -+__extension__ static __inline float __attribute__ ((__always_inline__)) -+__rint32zf (float __a) -+{ -+ return __builtin_aarch64_frint32zsf (__a); -+} -+ -+__extension__ static __inline double __attribute__ ((__always_inline__)) -+__rint32z (double __a) -+{ -+ return __builtin_aarch64_frint32zdf (__a); -+} -+ -+__extension__ static __inline float __attribute__ ((__always_inline__)) -+__rint64zf (float __a) -+{ -+ return __builtin_aarch64_frint64zsf (__a); -+} -+ -+__extension__ static __inline double __attribute__ ((__always_inline__)) -+__rint64z (double __a) -+{ -+ return __builtin_aarch64_frint64zdf (__a); -+} -+ -+__extension__ static __inline float __attribute__ ((__always_inline__)) -+__rint32xf (float __a) -+{ -+ return __builtin_aarch64_frint32xsf (__a); -+} -+ -+__extension__ static __inline double __attribute__ ((__always_inline__)) -+__rint32x (double __a) -+{ -+ return __builtin_aarch64_frint32xdf (__a); -+} -+ -+__extension__ static __inline float __attribute__ ((__always_inline__)) -+__rint64xf (float __a) -+{ -+ return __builtin_aarch64_frint64xsf (__a); -+} -+ -+__extension__ static __inline double __attribute__ ((__always_inline__)) -+__rint64x (double __a) -+{ -+ return __builtin_aarch64_frint64xdf (__a); -+} -+ -+ -+#pragma GCC pop_options -+ -+#pragma GCC push_options -+ -+#pragma GCC target ("+nothing+crc") -+ - __extension__ static __inline uint32_t __attribute__ ((__always_inline__)) - __crc32b (uint32_t __a, uint8_t __b) - { -@@ -85,10 +148,69 @@ __crc32d (uint32_t __a, uint64_t __b) - return __builtin_aarch64_crc32x (__a, __b); - } - --#ifdef __cplusplus -+#pragma GCC pop_options -+ -+#ifdef __ARM_FEATURE_TME -+#pragma GCC push_options -+#pragma GCC target ("+nothing+tme") -+ -+#define _TMFAILURE_REASON 0x00007fffu -+#define _TMFAILURE_RTRY 0x00008000u -+#define _TMFAILURE_CNCL 0x00010000u -+#define _TMFAILURE_MEM 0x00020000u -+#define _TMFAILURE_IMP 0x00040000u -+#define _TMFAILURE_ERR 0x00080000u -+#define _TMFAILURE_SIZE 0x00100000u -+#define _TMFAILURE_NEST 0x00200000u -+#define _TMFAILURE_DBG 0x00400000u -+#define _TMFAILURE_INT 0x00800000u -+#define _TMFAILURE_TRIVIAL 0x01000000u -+ -+__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) -+__tstart (void) -+{ -+ return __builtin_aarch64_tstart (); -+} -+ -+__extension__ static __inline void __attribute__ ((__always_inline__)) -+__tcommit (void) -+{ -+ __builtin_aarch64_tcommit (); -+} -+ -+__extension__ static __inline void __attribute__ ((__always_inline__)) -+__tcancel (const uint64_t __reason) -+{ -+ __builtin_aarch64_tcancel (__reason); - } -+ -+__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) -+__ttest (void) -+{ -+ return __builtin_aarch64_ttest (); -+} -+ -+#pragma GCC pop_options - #endif - -+#pragma GCC push_options -+#pragma GCC target ("+nothing+rng") -+__extension__ static __inline int __attribute__ ((__always_inline__)) -+__rndr (uint64_t *__res) -+{ -+ return __builtin_aarch64_rndr (__res); -+} -+ -+__extension__ static __inline int __attribute__ ((__always_inline__)) -+__rndrrs (uint64_t *__res) -+{ -+ return __builtin_aarch64_rndrrs (__res); -+} -+ - #pragma GCC pop_options - -+#ifdef __cplusplus -+} -+#endif -+ - #endif -diff --git a/gcc/config/aarch64/arm_bf16.h b/gcc/config/aarch64/arm_bf16.h -new file mode 100644 -index 000000000..984875dcc ---- /dev/null -+++ b/gcc/config/aarch64/arm_bf16.h -@@ -0,0 +1,45 @@ -+/* Arm BF16 instrinsics include file. -+ -+ Copyright (C) 2019-2020 Free Software Foundation, Inc. -+ Contributed by Arm. -+ -+ This file is part of GCC. -+ -+ GCC is free software; you can redistribute it and/or modify it -+ under the terms of the GNU General Public License as published -+ by the Free Software Foundation; either version 3, or (at your -+ option) any later version. -+ -+ GCC is distributed in the hope that it will be useful, but WITHOUT -+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public -+ License for more details. -+ -+ Under Section 7 of GPL version 3, you are granted additional -+ permissions described in the GCC Runtime Library Exception, version -+ 3.1, as published by the Free Software Foundation. -+ -+ You should have received a copy of the GNU General Public License and -+ a copy of the GCC Runtime Library Exception along with this program; -+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -+ . */ -+ -+#ifndef _AARCH64_BF16_H_ -+#define _AARCH64_BF16_H_ -+ -+typedef __bf16 bfloat16_t; -+typedef float float32_t; -+ -+#pragma GCC push_options -+#pragma GCC target ("+nothing+bf16+nosimd") -+ -+__extension__ extern __inline bfloat16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvth_bf16_f32 (float32_t __a) -+{ -+ return __builtin_aarch64_bfcvtbf (__a); -+} -+ -+#pragma GCC pop_options -+ -+#endif -diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h -index 314ef3018..7435905ff 100644 ---- a/gcc/config/aarch64/arm_neon.h -+++ b/gcc/config/aarch64/arm_neon.h -@@ -73,6 +73,39 @@ typedef __fp16 float16_t; - typedef float float32_t; - typedef double float64_t; - -+typedef __Bfloat16x4_t bfloat16x4_t; -+typedef __Bfloat16x8_t bfloat16x8_t; -+ -+typedef struct bfloat16x4x2_t -+{ -+ bfloat16x4_t val[2]; -+} bfloat16x4x2_t; -+ -+typedef struct bfloat16x8x2_t -+{ -+ bfloat16x8_t val[2]; -+} bfloat16x8x2_t; -+ -+typedef struct bfloat16x4x3_t -+{ -+ bfloat16x4_t val[3]; -+} bfloat16x4x3_t; -+ -+typedef struct bfloat16x8x3_t -+{ -+ bfloat16x8_t val[3]; -+} bfloat16x8x3_t; -+ -+typedef struct bfloat16x4x4_t -+{ -+ bfloat16x4_t val[4]; -+} bfloat16x4x4_t; -+ -+typedef struct bfloat16x8x4_t -+{ -+ bfloat16x8_t val[4]; -+} bfloat16x8x4_t; -+ - typedef struct int8x8x2_t - { - int8x8_t val[2]; -@@ -6572,867 +6605,867 @@ vcombine_p64 (poly64x1_t __a, poly64x1_t __b) - - __extension__ extern __inline int8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vaba_s8 (int8x8_t a, int8x8_t b, int8x8_t c) -+vaba_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c) - { -- int8x8_t result; -+ int8x8_t __result; - __asm__ ("saba %0.8b,%2.8b,%3.8b" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int16x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vaba_s16 (int16x4_t a, int16x4_t b, int16x4_t c) -+vaba_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c) - { -- int16x4_t result; -+ int16x4_t __result; - __asm__ ("saba %0.4h,%2.4h,%3.4h" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int32x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vaba_s32 (int32x2_t a, int32x2_t b, int32x2_t c) -+vaba_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c) - { -- int32x2_t result; -+ int32x2_t __result; - __asm__ ("saba %0.2s,%2.2s,%3.2s" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vaba_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c) -+vaba_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) - { -- uint8x8_t result; -+ uint8x8_t __result; - __asm__ ("uaba %0.8b,%2.8b,%3.8b" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint16x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vaba_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c) -+vaba_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) - { -- uint16x4_t result; -+ uint16x4_t __result; - __asm__ ("uaba %0.4h,%2.4h,%3.4h" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint32x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vaba_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c) -+vaba_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) - { -- uint32x2_t result; -+ uint32x2_t __result; - __asm__ ("uaba %0.2s,%2.2s,%3.2s" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vabal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c) -+vabal_high_s8 (int16x8_t __a, int8x16_t __b, int8x16_t __c) - { -- int16x8_t result; -+ int16x8_t __result; - __asm__ ("sabal2 %0.8h,%2.16b,%3.16b" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vabal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c) -+vabal_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c) - { -- int32x4_t result; -+ int32x4_t __result; - __asm__ ("sabal2 %0.4s,%2.8h,%3.8h" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vabal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c) -+vabal_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c) - { -- int64x2_t result; -+ int64x2_t __result; - __asm__ ("sabal2 %0.2d,%2.4s,%3.4s" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vabal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c) -+vabal_high_u8 (uint16x8_t __a, uint8x16_t __b, uint8x16_t __c) - { -- uint16x8_t result; -+ uint16x8_t __result; - __asm__ ("uabal2 %0.8h,%2.16b,%3.16b" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vabal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c) -+vabal_high_u16 (uint32x4_t __a, uint16x8_t __b, uint16x8_t __c) - { -- uint32x4_t result; -+ uint32x4_t __result; - __asm__ ("uabal2 %0.4s,%2.8h,%3.8h" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vabal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c) -+vabal_high_u32 (uint64x2_t __a, uint32x4_t __b, uint32x4_t __c) - { -- uint64x2_t result; -+ uint64x2_t __result; - __asm__ ("uabal2 %0.2d,%2.4s,%3.4s" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vabal_s8 (int16x8_t a, int8x8_t b, int8x8_t c) -+vabal_s8 (int16x8_t __a, int8x8_t __b, int8x8_t __c) - { -- int16x8_t result; -+ int16x8_t __result; - __asm__ ("sabal %0.8h,%2.8b,%3.8b" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vabal_s16 (int32x4_t a, int16x4_t b, int16x4_t c) -+vabal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c) - { -- int32x4_t result; -+ int32x4_t __result; - __asm__ ("sabal %0.4s,%2.4h,%3.4h" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vabal_s32 (int64x2_t a, int32x2_t b, int32x2_t c) -+vabal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c) - { -- int64x2_t result; -+ int64x2_t __result; - __asm__ ("sabal %0.2d,%2.2s,%3.2s" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vabal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c) -+vabal_u8 (uint16x8_t __a, uint8x8_t __b, uint8x8_t __c) - { -- uint16x8_t result; -+ uint16x8_t __result; - __asm__ ("uabal %0.8h,%2.8b,%3.8b" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vabal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c) -+vabal_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c) - { -- uint32x4_t result; -+ uint32x4_t __result; - __asm__ ("uabal %0.4s,%2.4h,%3.4h" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vabal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c) -+vabal_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c) - { -- uint64x2_t result; -+ uint64x2_t __result; - __asm__ ("uabal %0.2d,%2.2s,%3.2s" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vabaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c) -+vabaq_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c) - { -- int8x16_t result; -+ int8x16_t __result; - __asm__ ("saba %0.16b,%2.16b,%3.16b" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vabaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c) -+vabaq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c) - { -- int16x8_t result; -+ int16x8_t __result; - __asm__ ("saba %0.8h,%2.8h,%3.8h" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vabaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c) -+vabaq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c) - { -- int32x4_t result; -+ int32x4_t __result; - __asm__ ("saba %0.4s,%2.4s,%3.4s" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vabaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c) -+vabaq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) - { -- uint8x16_t result; -+ uint8x16_t __result; - __asm__ ("uaba %0.16b,%2.16b,%3.16b" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vabaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c) -+vabaq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) - { -- uint16x8_t result; -+ uint16x8_t __result; - __asm__ ("uaba %0.8h,%2.8h,%3.8h" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vabaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c) -+vabaq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) - { -- uint32x4_t result; -+ uint32x4_t __result; - __asm__ ("uaba %0.4s,%2.4s,%3.4s" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vabd_s8 (int8x8_t a, int8x8_t b) -+vabd_s8 (int8x8_t __a, int8x8_t __b) - { -- int8x8_t result; -+ int8x8_t __result; - __asm__ ("sabd %0.8b, %1.8b, %2.8b" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int16x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vabd_s16 (int16x4_t a, int16x4_t b) -+vabd_s16 (int16x4_t __a, int16x4_t __b) - { -- int16x4_t result; -+ int16x4_t __result; - __asm__ ("sabd %0.4h, %1.4h, %2.4h" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int32x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vabd_s32 (int32x2_t a, int32x2_t b) -+vabd_s32 (int32x2_t __a, int32x2_t __b) - { -- int32x2_t result; -+ int32x2_t __result; - __asm__ ("sabd %0.2s, %1.2s, %2.2s" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vabd_u8 (uint8x8_t a, uint8x8_t b) -+vabd_u8 (uint8x8_t __a, uint8x8_t __b) - { -- uint8x8_t result; -+ uint8x8_t __result; - __asm__ ("uabd %0.8b, %1.8b, %2.8b" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint16x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vabd_u16 (uint16x4_t a, uint16x4_t b) -+vabd_u16 (uint16x4_t __a, uint16x4_t __b) - { -- uint16x4_t result; -+ uint16x4_t __result; - __asm__ ("uabd %0.4h, %1.4h, %2.4h" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint32x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vabd_u32 (uint32x2_t a, uint32x2_t b) -+vabd_u32 (uint32x2_t __a, uint32x2_t __b) - { -- uint32x2_t result; -+ uint32x2_t __result; - __asm__ ("uabd %0.2s, %1.2s, %2.2s" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vabdl_high_s8 (int8x16_t a, int8x16_t b) -+vabdl_high_s8 (int8x16_t __a, int8x16_t __b) - { -- int16x8_t result; -+ int16x8_t __result; - __asm__ ("sabdl2 %0.8h,%1.16b,%2.16b" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vabdl_high_s16 (int16x8_t a, int16x8_t b) -+vabdl_high_s16 (int16x8_t __a, int16x8_t __b) - { -- int32x4_t result; -+ int32x4_t __result; - __asm__ ("sabdl2 %0.4s,%1.8h,%2.8h" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vabdl_high_s32 (int32x4_t a, int32x4_t b) -+vabdl_high_s32 (int32x4_t __a, int32x4_t __b) - { -- int64x2_t result; -+ int64x2_t __result; - __asm__ ("sabdl2 %0.2d,%1.4s,%2.4s" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vabdl_high_u8 (uint8x16_t a, uint8x16_t b) -+vabdl_high_u8 (uint8x16_t __a, uint8x16_t __b) - { -- uint16x8_t result; -+ uint16x8_t __result; - __asm__ ("uabdl2 %0.8h,%1.16b,%2.16b" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vabdl_high_u16 (uint16x8_t a, uint16x8_t b) -+vabdl_high_u16 (uint16x8_t __a, uint16x8_t __b) - { -- uint32x4_t result; -+ uint32x4_t __result; - __asm__ ("uabdl2 %0.4s,%1.8h,%2.8h" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vabdl_high_u32 (uint32x4_t a, uint32x4_t b) -+vabdl_high_u32 (uint32x4_t __a, uint32x4_t __b) - { -- uint64x2_t result; -+ uint64x2_t __result; - __asm__ ("uabdl2 %0.2d,%1.4s,%2.4s" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vabdl_s8 (int8x8_t a, int8x8_t b) -+vabdl_s8 (int8x8_t __a, int8x8_t __b) - { -- int16x8_t result; -+ int16x8_t __result; - __asm__ ("sabdl %0.8h, %1.8b, %2.8b" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vabdl_s16 (int16x4_t a, int16x4_t b) -+vabdl_s16 (int16x4_t __a, int16x4_t __b) - { -- int32x4_t result; -+ int32x4_t __result; - __asm__ ("sabdl %0.4s, %1.4h, %2.4h" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vabdl_s32 (int32x2_t a, int32x2_t b) -+vabdl_s32 (int32x2_t __a, int32x2_t __b) - { -- int64x2_t result; -+ int64x2_t __result; - __asm__ ("sabdl %0.2d, %1.2s, %2.2s" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vabdl_u8 (uint8x8_t a, uint8x8_t b) -+vabdl_u8 (uint8x8_t __a, uint8x8_t __b) - { -- uint16x8_t result; -+ uint16x8_t __result; - __asm__ ("uabdl %0.8h, %1.8b, %2.8b" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vabdl_u16 (uint16x4_t a, uint16x4_t b) -+vabdl_u16 (uint16x4_t __a, uint16x4_t __b) - { -- uint32x4_t result; -+ uint32x4_t __result; - __asm__ ("uabdl %0.4s, %1.4h, %2.4h" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vabdl_u32 (uint32x2_t a, uint32x2_t b) -+vabdl_u32 (uint32x2_t __a, uint32x2_t __b) - { -- uint64x2_t result; -+ uint64x2_t __result; - __asm__ ("uabdl %0.2d, %1.2s, %2.2s" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vabdq_s8 (int8x16_t a, int8x16_t b) -+vabdq_s8 (int8x16_t __a, int8x16_t __b) - { -- int8x16_t result; -+ int8x16_t __result; - __asm__ ("sabd %0.16b, %1.16b, %2.16b" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vabdq_s16 (int16x8_t a, int16x8_t b) -+vabdq_s16 (int16x8_t __a, int16x8_t __b) - { -- int16x8_t result; -+ int16x8_t __result; - __asm__ ("sabd %0.8h, %1.8h, %2.8h" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vabdq_s32 (int32x4_t a, int32x4_t b) -+vabdq_s32 (int32x4_t __a, int32x4_t __b) - { -- int32x4_t result; -+ int32x4_t __result; - __asm__ ("sabd %0.4s, %1.4s, %2.4s" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vabdq_u8 (uint8x16_t a, uint8x16_t b) -+vabdq_u8 (uint8x16_t __a, uint8x16_t __b) - { -- uint8x16_t result; -+ uint8x16_t __result; - __asm__ ("uabd %0.16b, %1.16b, %2.16b" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vabdq_u16 (uint16x8_t a, uint16x8_t b) -+vabdq_u16 (uint16x8_t __a, uint16x8_t __b) - { -- uint16x8_t result; -+ uint16x8_t __result; - __asm__ ("uabd %0.8h, %1.8h, %2.8h" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vabdq_u32 (uint32x4_t a, uint32x4_t b) -+vabdq_u32 (uint32x4_t __a, uint32x4_t __b) - { -- uint32x4_t result; -+ uint32x4_t __result; - __asm__ ("uabd %0.4s, %1.4s, %2.4s" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vaddlv_s8 (int8x8_t a) -+vaddlv_s8 (int8x8_t __a) - { -- int16_t result; -+ int16_t __result; - __asm__ ("saddlv %h0,%1.8b" -- : "=w"(result) -- : "w"(a) -+ : "=w"(__result) -+ : "w"(__a) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int32_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vaddlv_s16 (int16x4_t a) -+vaddlv_s16 (int16x4_t __a) - { -- int32_t result; -+ int32_t __result; - __asm__ ("saddlv %s0,%1.4h" -- : "=w"(result) -- : "w"(a) -+ : "=w"(__result) -+ : "w"(__a) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vaddlv_u8 (uint8x8_t a) -+vaddlv_u8 (uint8x8_t __a) - { -- uint16_t result; -+ uint16_t __result; - __asm__ ("uaddlv %h0,%1.8b" -- : "=w"(result) -- : "w"(a) -+ : "=w"(__result) -+ : "w"(__a) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint32_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vaddlv_u16 (uint16x4_t a) -+vaddlv_u16 (uint16x4_t __a) - { -- uint32_t result; -+ uint32_t __result; - __asm__ ("uaddlv %s0,%1.4h" -- : "=w"(result) -- : "w"(a) -+ : "=w"(__result) -+ : "w"(__a) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vaddlvq_s8 (int8x16_t a) -+vaddlvq_s8 (int8x16_t __a) - { -- int16_t result; -+ int16_t __result; - __asm__ ("saddlv %h0,%1.16b" -- : "=w"(result) -- : "w"(a) -+ : "=w"(__result) -+ : "w"(__a) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int32_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vaddlvq_s16 (int16x8_t a) -+vaddlvq_s16 (int16x8_t __a) - { -- int32_t result; -+ int32_t __result; - __asm__ ("saddlv %s0,%1.8h" -- : "=w"(result) -- : "w"(a) -+ : "=w"(__result) -+ : "w"(__a) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int64_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vaddlvq_s32 (int32x4_t a) -+vaddlvq_s32 (int32x4_t __a) - { -- int64_t result; -+ int64_t __result; - __asm__ ("saddlv %d0,%1.4s" -- : "=w"(result) -- : "w"(a) -+ : "=w"(__result) -+ : "w"(__a) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vaddlvq_u8 (uint8x16_t a) -+vaddlvq_u8 (uint8x16_t __a) - { -- uint16_t result; -+ uint16_t __result; - __asm__ ("uaddlv %h0,%1.16b" -- : "=w"(result) -- : "w"(a) -+ : "=w"(__result) -+ : "w"(__a) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint32_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vaddlvq_u16 (uint16x8_t a) -+vaddlvq_u16 (uint16x8_t __a) - { -- uint32_t result; -+ uint32_t __result; - __asm__ ("uaddlv %s0,%1.8h" -- : "=w"(result) -- : "w"(a) -+ : "=w"(__result) -+ : "w"(__a) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint64_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vaddlvq_u32 (uint32x4_t a) -+vaddlvq_u32 (uint32x4_t __a) - { -- uint64_t result; -+ uint64_t __result; - __asm__ ("uaddlv %d0,%1.4s" -- : "=w"(result) -- : "w"(a) -+ : "=w"(__result) -+ : "w"(__a) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline float32x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vcvtx_f32_f64 (float64x2_t a) -+vcvtx_f32_f64 (float64x2_t __a) - { -- float32x2_t result; -+ float32x2_t __result; - __asm__ ("fcvtxn %0.2s,%1.2d" -- : "=w"(result) -- : "w"(a) -+ : "=w"(__result) -+ : "w"(__a) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline float32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vcvtx_high_f32_f64 (float32x2_t a, float64x2_t b) -+vcvtx_high_f32_f64 (float32x2_t __a, float64x2_t __b) - { -- float32x4_t result; -+ float32x4_t __result; - __asm__ ("fcvtxn2 %0.4s,%1.2d" -- : "=w"(result) -- : "w" (b), "0"(a) -+ : "=w"(__result) -+ : "w" (__b), "0"(__a) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline float32_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vcvtxd_f32_f64 (float64_t a) -+vcvtxd_f32_f64 (float64_t __a) - { -- float32_t result; -+ float32_t __result; - __asm__ ("fcvtxn %s0,%d1" -- : "=w"(result) -- : "w"(a) -+ : "=w"(__result) -+ : "w"(__a) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline float32x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmla_n_f32 (float32x2_t a, float32x2_t b, float32_t c) -+vmla_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c) - { -- float32x2_t result; -- float32x2_t t1; -+ float32x2_t __result; -+ float32x2_t __t1; - __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fadd %0.2s, %0.2s, %1.2s" -- : "=w"(result), "=w"(t1) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result), "=w"(__t1) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int16x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmla_n_s16 (int16x4_t a, int16x4_t b, int16_t c) -+vmla_n_s16 (int16x4_t __a, int16x4_t __b, int16_t __c) - { -- int16x4_t result; -+ int16x4_t __result; - __asm__ ("mla %0.4h,%2.4h,%3.h[0]" -- : "=w"(result) -- : "0"(a), "w"(b), "x"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "x"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int32x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmla_n_s32 (int32x2_t a, int32x2_t b, int32_t c) -+vmla_n_s32 (int32x2_t __a, int32x2_t __b, int32_t __c) - { -- int32x2_t result; -+ int32x2_t __result; - __asm__ ("mla %0.2s,%2.2s,%3.s[0]" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint16x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmla_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c) -+vmla_n_u16 (uint16x4_t __a, uint16x4_t __b, uint16_t __c) - { -- uint16x4_t result; -+ uint16x4_t __result; - __asm__ ("mla %0.4h,%2.4h,%3.h[0]" -- : "=w"(result) -- : "0"(a), "w"(b), "x"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "x"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint32x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmla_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c) -+vmla_n_u32 (uint32x2_t __a, uint32x2_t __b, uint32_t __c) - { -- uint32x2_t result; -+ uint32x2_t __result; - __asm__ ("mla %0.2s,%2.2s,%3.s[0]" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmla_s8 (int8x8_t a, int8x8_t b, int8x8_t c) -+vmla_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c) - { -- int8x8_t result; -+ int8x8_t __result; - __asm__ ("mla %0.8b, %2.8b, %3.8b" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int16x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmla_s16 (int16x4_t a, int16x4_t b, int16x4_t c) -+vmla_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c) - { -- int16x4_t result; -+ int16x4_t __result; - __asm__ ("mla %0.4h, %2.4h, %3.4h" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int32x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmla_s32 (int32x2_t a, int32x2_t b, int32x2_t c) -+vmla_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c) - { -- int32x2_t result; -+ int32x2_t __result; - __asm__ ("mla %0.2s, %2.2s, %3.2s" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmla_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c) -+vmla_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) - { -- uint8x8_t result; -+ uint8x8_t __result; - __asm__ ("mla %0.8b, %2.8b, %3.8b" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint16x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmla_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c) -+vmla_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) - { -- uint16x4_t result; -+ uint16x4_t __result; - __asm__ ("mla %0.4h, %2.4h, %3.4h" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint32x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c) -+vmla_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) - { -- uint32x2_t result; -+ uint32x2_t __result; - __asm__ ("mla %0.2s, %2.2s, %3.2s" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - #define vmlal_high_lane_s16(a, b, c, d) \ -@@ -7549,122 +7582,122 @@ vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c) - - __extension__ extern __inline int32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlal_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c) -+vmlal_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c) - { -- int32x4_t result; -+ int32x4_t __result; - __asm__ ("smlal2 %0.4s,%2.8h,%3.h[0]" -- : "=w"(result) -- : "0"(a), "w"(b), "x"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "x"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlal_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c) -+vmlal_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c) - { -- int64x2_t result; -+ int64x2_t __result; - __asm__ ("smlal2 %0.2d,%2.4s,%3.s[0]" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlal_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c) -+vmlal_high_n_u16 (uint32x4_t __a, uint16x8_t __b, uint16_t __c) - { -- uint32x4_t result; -+ uint32x4_t __result; - __asm__ ("umlal2 %0.4s,%2.8h,%3.h[0]" -- : "=w"(result) -- : "0"(a), "w"(b), "x"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "x"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlal_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c) -+vmlal_high_n_u32 (uint64x2_t __a, uint32x4_t __b, uint32_t __c) - { -- uint64x2_t result; -+ uint64x2_t __result; - __asm__ ("umlal2 %0.2d,%2.4s,%3.s[0]" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c) -+vmlal_high_s8 (int16x8_t __a, int8x16_t __b, int8x16_t __c) - { -- int16x8_t result; -+ int16x8_t __result; - __asm__ ("smlal2 %0.8h,%2.16b,%3.16b" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c) -+vmlal_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c) - { -- int32x4_t result; -+ int32x4_t __result; - __asm__ ("smlal2 %0.4s,%2.8h,%3.8h" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c) -+vmlal_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c) - { -- int64x2_t result; -+ int64x2_t __result; - __asm__ ("smlal2 %0.2d,%2.4s,%3.4s" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c) -+vmlal_high_u8 (uint16x8_t __a, uint8x16_t __b, uint8x16_t __c) - { -- uint16x8_t result; -+ uint16x8_t __result; - __asm__ ("umlal2 %0.8h,%2.16b,%3.16b" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c) -+vmlal_high_u16 (uint32x4_t __a, uint16x8_t __b, uint16x8_t __c) - { -- uint32x4_t result; -+ uint32x4_t __result; - __asm__ ("umlal2 %0.4s,%2.8h,%3.8h" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c) -+vmlal_high_u32 (uint64x2_t __a, uint32x4_t __b, uint32x4_t __c) - { -- uint64x2_t result; -+ uint64x2_t __result; - __asm__ ("umlal2 %0.2d,%2.4s,%3.4s" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - #define vmlal_lane_s16(a, b, c, d) \ -@@ -7781,388 +7814,388 @@ vmlal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c) - - __extension__ extern __inline int32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlal_n_s16 (int32x4_t a, int16x4_t b, int16_t c) -+vmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c) - { -- int32x4_t result; -+ int32x4_t __result; - __asm__ ("smlal %0.4s,%2.4h,%3.h[0]" -- : "=w"(result) -- : "0"(a), "w"(b), "x"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "x"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlal_n_s32 (int64x2_t a, int32x2_t b, int32_t c) -+vmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c) - { -- int64x2_t result; -+ int64x2_t __result; - __asm__ ("smlal %0.2d,%2.2s,%3.s[0]" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlal_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c) -+vmlal_n_u16 (uint32x4_t __a, uint16x4_t __b, uint16_t __c) - { -- uint32x4_t result; -+ uint32x4_t __result; - __asm__ ("umlal %0.4s,%2.4h,%3.h[0]" -- : "=w"(result) -- : "0"(a), "w"(b), "x"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "x"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlal_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c) -+vmlal_n_u32 (uint64x2_t __a, uint32x2_t __b, uint32_t __c) - { -- uint64x2_t result; -+ uint64x2_t __result; - __asm__ ("umlal %0.2d,%2.2s,%3.s[0]" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlal_s8 (int16x8_t a, int8x8_t b, int8x8_t c) -+vmlal_s8 (int16x8_t __a, int8x8_t __b, int8x8_t __c) - { -- int16x8_t result; -+ int16x8_t __result; - __asm__ ("smlal %0.8h,%2.8b,%3.8b" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlal_s16 (int32x4_t a, int16x4_t b, int16x4_t c) -+vmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c) - { -- int32x4_t result; -+ int32x4_t __result; - __asm__ ("smlal %0.4s,%2.4h,%3.4h" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlal_s32 (int64x2_t a, int32x2_t b, int32x2_t c) -+vmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c) - { -- int64x2_t result; -+ int64x2_t __result; - __asm__ ("smlal %0.2d,%2.2s,%3.2s" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c) -+vmlal_u8 (uint16x8_t __a, uint8x8_t __b, uint8x8_t __c) - { -- uint16x8_t result; -+ uint16x8_t __result; - __asm__ ("umlal %0.8h,%2.8b,%3.8b" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c) -+vmlal_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c) - { -- uint32x4_t result; -+ uint32x4_t __result; - __asm__ ("umlal %0.4s,%2.4h,%3.4h" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c) -+vmlal_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c) - { -- uint64x2_t result; -+ uint64x2_t __result; - __asm__ ("umlal %0.2d,%2.2s,%3.2s" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline float32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c) -+vmlaq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c) - { -- float32x4_t result; -- float32x4_t t1; -+ float32x4_t __result; -+ float32x4_t __t1; - __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fadd %0.4s, %0.4s, %1.4s" -- : "=w"(result), "=w"(t1) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result), "=w"(__t1) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlaq_n_s16 (int16x8_t a, int16x8_t b, int16_t c) -+vmlaq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c) - { -- int16x8_t result; -+ int16x8_t __result; - __asm__ ("mla %0.8h,%2.8h,%3.h[0]" -- : "=w"(result) -- : "0"(a), "w"(b), "x"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "x"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlaq_n_s32 (int32x4_t a, int32x4_t b, int32_t c) -+vmlaq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c) - { -- int32x4_t result; -+ int32x4_t __result; - __asm__ ("mla %0.4s,%2.4s,%3.s[0]" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlaq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c) -+vmlaq_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c) - { -- uint16x8_t result; -+ uint16x8_t __result; - __asm__ ("mla %0.8h,%2.8h,%3.h[0]" -- : "=w"(result) -- : "0"(a), "w"(b), "x"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "x"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlaq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c) -+vmlaq_n_u32 (uint32x4_t __a, uint32x4_t __b, uint32_t __c) - { -- uint32x4_t result; -+ uint32x4_t __result; - __asm__ ("mla %0.4s,%2.4s,%3.s[0]" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c) -+vmlaq_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c) - { -- int8x16_t result; -+ int8x16_t __result; - __asm__ ("mla %0.16b, %2.16b, %3.16b" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c) -+vmlaq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c) - { -- int16x8_t result; -+ int16x8_t __result; - __asm__ ("mla %0.8h, %2.8h, %3.8h" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c) -+vmlaq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c) - { -- int32x4_t result; -+ int32x4_t __result; - __asm__ ("mla %0.4s, %2.4s, %3.4s" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c) -+vmlaq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) - { -- uint8x16_t result; -+ uint8x16_t __result; - __asm__ ("mla %0.16b, %2.16b, %3.16b" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c) -+vmlaq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) - { -- uint16x8_t result; -+ uint16x8_t __result; - __asm__ ("mla %0.8h, %2.8h, %3.8h" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c) -+vmlaq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) - { -- uint32x4_t result; -+ uint32x4_t __result; - __asm__ ("mla %0.4s, %2.4s, %3.4s" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline float32x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmls_n_f32 (float32x2_t a, float32x2_t b, float32_t c) -+vmls_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c) - { -- float32x2_t result; -- float32x2_t t1; -+ float32x2_t __result; -+ float32x2_t __t1; - __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fsub %0.2s, %0.2s, %1.2s" -- : "=w"(result), "=w"(t1) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result), "=w"(__t1) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int16x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmls_n_s16 (int16x4_t a, int16x4_t b, int16_t c) -+vmls_n_s16 (int16x4_t __a, int16x4_t __b, int16_t __c) - { -- int16x4_t result; -+ int16x4_t __result; - __asm__ ("mls %0.4h, %2.4h, %3.h[0]" -- : "=w"(result) -- : "0"(a), "w"(b), "x"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "x"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int32x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmls_n_s32 (int32x2_t a, int32x2_t b, int32_t c) -+vmls_n_s32 (int32x2_t __a, int32x2_t __b, int32_t __c) - { -- int32x2_t result; -+ int32x2_t __result; - __asm__ ("mls %0.2s, %2.2s, %3.s[0]" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint16x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmls_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c) -+vmls_n_u16 (uint16x4_t __a, uint16x4_t __b, uint16_t __c) - { -- uint16x4_t result; -+ uint16x4_t __result; - __asm__ ("mls %0.4h, %2.4h, %3.h[0]" -- : "=w"(result) -- : "0"(a), "w"(b), "x"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "x"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint32x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmls_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c) -+vmls_n_u32 (uint32x2_t __a, uint32x2_t __b, uint32_t __c) - { -- uint32x2_t result; -+ uint32x2_t __result; - __asm__ ("mls %0.2s, %2.2s, %3.s[0]" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmls_s8 (int8x8_t a, int8x8_t b, int8x8_t c) -+vmls_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c) - { -- int8x8_t result; -+ int8x8_t __result; - __asm__ ("mls %0.8b,%2.8b,%3.8b" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int16x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmls_s16 (int16x4_t a, int16x4_t b, int16x4_t c) -+vmls_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c) - { -- int16x4_t result; -+ int16x4_t __result; - __asm__ ("mls %0.4h,%2.4h,%3.4h" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int32x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmls_s32 (int32x2_t a, int32x2_t b, int32x2_t c) -+vmls_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c) - { -- int32x2_t result; -+ int32x2_t __result; - __asm__ ("mls %0.2s,%2.2s,%3.2s" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmls_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c) -+vmls_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) - { -- uint8x8_t result; -+ uint8x8_t __result; - __asm__ ("mls %0.8b,%2.8b,%3.8b" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint16x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmls_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c) -+vmls_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) - { -- uint16x4_t result; -+ uint16x4_t __result; - __asm__ ("mls %0.4h,%2.4h,%3.4h" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint32x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c) -+vmls_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) - { -- uint32x2_t result; -+ uint32x2_t __result; - __asm__ ("mls %0.2s,%2.2s,%3.2s" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - #define vmlsl_high_lane_s16(a, b, c, d) \ -@@ -8279,122 +8312,122 @@ vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c) - - __extension__ extern __inline int32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlsl_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c) -+vmlsl_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c) - { -- int32x4_t result; -+ int32x4_t __result; - __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[0]" -- : "=w"(result) -- : "0"(a), "w"(b), "x"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "x"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlsl_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c) -+vmlsl_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c) - { -- int64x2_t result; -+ int64x2_t __result; - __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[0]" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlsl_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c) -+vmlsl_high_n_u16 (uint32x4_t __a, uint16x8_t __b, uint16_t __c) - { -- uint32x4_t result; -+ uint32x4_t __result; - __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[0]" -- : "=w"(result) -- : "0"(a), "w"(b), "x"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "x"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlsl_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c) -+vmlsl_high_n_u32 (uint64x2_t __a, uint32x4_t __b, uint32_t __c) - { -- uint64x2_t result; -+ uint64x2_t __result; - __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[0]" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlsl_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c) -+vmlsl_high_s8 (int16x8_t __a, int8x16_t __b, int8x16_t __c) - { -- int16x8_t result; -+ int16x8_t __result; - __asm__ ("smlsl2 %0.8h,%2.16b,%3.16b" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlsl_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c) -+vmlsl_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c) - { -- int32x4_t result; -+ int32x4_t __result; - __asm__ ("smlsl2 %0.4s,%2.8h,%3.8h" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlsl_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c) -+vmlsl_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c) - { -- int64x2_t result; -+ int64x2_t __result; - __asm__ ("smlsl2 %0.2d,%2.4s,%3.4s" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlsl_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c) -+vmlsl_high_u8 (uint16x8_t __a, uint8x16_t __b, uint8x16_t __c) - { -- uint16x8_t result; -+ uint16x8_t __result; - __asm__ ("umlsl2 %0.8h,%2.16b,%3.16b" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlsl_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c) -+vmlsl_high_u16 (uint32x4_t __a, uint16x8_t __b, uint16x8_t __c) - { -- uint32x4_t result; -+ uint32x4_t __result; - __asm__ ("umlsl2 %0.4s,%2.8h,%3.8h" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlsl_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c) -+vmlsl_high_u32 (uint64x2_t __a, uint32x4_t __b, uint32x4_t __c) - { -- uint64x2_t result; -+ uint64x2_t __result; - __asm__ ("umlsl2 %0.2d,%2.4s,%3.4s" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - #define vmlsl_lane_s16(a, b, c, d) \ -@@ -8511,543 +8544,543 @@ vmlsl_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c) - - __extension__ extern __inline int32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlsl_n_s16 (int32x4_t a, int16x4_t b, int16_t c) -+vmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c) - { -- int32x4_t result; -+ int32x4_t __result; - __asm__ ("smlsl %0.4s, %2.4h, %3.h[0]" -- : "=w"(result) -- : "0"(a), "w"(b), "x"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "x"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlsl_n_s32 (int64x2_t a, int32x2_t b, int32_t c) -+vmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c) - { -- int64x2_t result; -+ int64x2_t __result; - __asm__ ("smlsl %0.2d, %2.2s, %3.s[0]" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlsl_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c) -+vmlsl_n_u16 (uint32x4_t __a, uint16x4_t __b, uint16_t __c) - { -- uint32x4_t result; -+ uint32x4_t __result; - __asm__ ("umlsl %0.4s, %2.4h, %3.h[0]" -- : "=w"(result) -- : "0"(a), "w"(b), "x"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "x"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlsl_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c) -+vmlsl_n_u32 (uint64x2_t __a, uint32x2_t __b, uint32_t __c) - { -- uint64x2_t result; -+ uint64x2_t __result; - __asm__ ("umlsl %0.2d, %2.2s, %3.s[0]" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlsl_s8 (int16x8_t a, int8x8_t b, int8x8_t c) -+vmlsl_s8 (int16x8_t __a, int8x8_t __b, int8x8_t __c) - { -- int16x8_t result; -+ int16x8_t __result; - __asm__ ("smlsl %0.8h, %2.8b, %3.8b" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlsl_s16 (int32x4_t a, int16x4_t b, int16x4_t c) -+vmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c) - { -- int32x4_t result; -+ int32x4_t __result; - __asm__ ("smlsl %0.4s, %2.4h, %3.4h" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlsl_s32 (int64x2_t a, int32x2_t b, int32x2_t c) -+vmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c) - { -- int64x2_t result; -+ int64x2_t __result; - __asm__ ("smlsl %0.2d, %2.2s, %3.2s" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlsl_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c) -+vmlsl_u8 (uint16x8_t __a, uint8x8_t __b, uint8x8_t __c) - { -- uint16x8_t result; -+ uint16x8_t __result; - __asm__ ("umlsl %0.8h, %2.8b, %3.8b" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlsl_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c) -+vmlsl_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c) - { -- uint32x4_t result; -+ uint32x4_t __result; - __asm__ ("umlsl %0.4s, %2.4h, %3.4h" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlsl_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c) -+vmlsl_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c) - { -- uint64x2_t result; -+ uint64x2_t __result; - __asm__ ("umlsl %0.2d, %2.2s, %3.2s" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline float32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlsq_n_f32 (float32x4_t a, float32x4_t b, float32_t c) -+vmlsq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c) - { -- float32x4_t result; -- float32x4_t t1; -+ float32x4_t __result; -+ float32x4_t __t1; - __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fsub %0.4s, %0.4s, %1.4s" -- : "=w"(result), "=w"(t1) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result), "=w"(__t1) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlsq_n_s16 (int16x8_t a, int16x8_t b, int16_t c) -+vmlsq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c) - { -- int16x8_t result; -+ int16x8_t __result; - __asm__ ("mls %0.8h, %2.8h, %3.h[0]" -- : "=w"(result) -- : "0"(a), "w"(b), "x"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "x"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlsq_n_s32 (int32x4_t a, int32x4_t b, int32_t c) -+vmlsq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c) - { -- int32x4_t result; -+ int32x4_t __result; - __asm__ ("mls %0.4s, %2.4s, %3.s[0]" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlsq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c) -+vmlsq_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c) - { -- uint16x8_t result; -+ uint16x8_t __result; - __asm__ ("mls %0.8h, %2.8h, %3.h[0]" -- : "=w"(result) -- : "0"(a), "w"(b), "x"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "x"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlsq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c) -+vmlsq_n_u32 (uint32x4_t __a, uint32x4_t __b, uint32_t __c) - { -- uint32x4_t result; -+ uint32x4_t __result; - __asm__ ("mls %0.4s, %2.4s, %3.s[0]" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlsq_s8 (int8x16_t a, int8x16_t b, int8x16_t c) -+vmlsq_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c) - { -- int8x16_t result; -+ int8x16_t __result; - __asm__ ("mls %0.16b,%2.16b,%3.16b" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlsq_s16 (int16x8_t a, int16x8_t b, int16x8_t c) -+vmlsq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c) - { -- int16x8_t result; -+ int16x8_t __result; - __asm__ ("mls %0.8h,%2.8h,%3.8h" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlsq_s32 (int32x4_t a, int32x4_t b, int32x4_t c) -+vmlsq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c) - { -- int32x4_t result; -+ int32x4_t __result; - __asm__ ("mls %0.4s,%2.4s,%3.4s" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlsq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c) -+vmlsq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) - { -- uint8x16_t result; -+ uint8x16_t __result; - __asm__ ("mls %0.16b,%2.16b,%3.16b" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlsq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c) -+vmlsq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) - { -- uint16x8_t result; -+ uint16x8_t __result; - __asm__ ("mls %0.8h,%2.8h,%3.8h" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlsq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c) -+vmlsq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) - { -- uint32x4_t result; -+ uint32x4_t __result; - __asm__ ("mls %0.4s,%2.4s,%3.4s" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b), "w"(__c) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmovl_high_s8 (int8x16_t a) -+vmovl_high_s8 (int8x16_t __a) - { -- int16x8_t result; -+ int16x8_t __result; - __asm__ ("sshll2 %0.8h,%1.16b,#0" -- : "=w"(result) -- : "w"(a) -+ : "=w"(__result) -+ : "w"(__a) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmovl_high_s16 (int16x8_t a) -+vmovl_high_s16 (int16x8_t __a) - { -- int32x4_t result; -+ int32x4_t __result; - __asm__ ("sshll2 %0.4s,%1.8h,#0" -- : "=w"(result) -- : "w"(a) -+ : "=w"(__result) -+ : "w"(__a) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmovl_high_s32 (int32x4_t a) -+vmovl_high_s32 (int32x4_t __a) - { -- int64x2_t result; -+ int64x2_t __result; - __asm__ ("sshll2 %0.2d,%1.4s,#0" -- : "=w"(result) -- : "w"(a) -+ : "=w"(__result) -+ : "w"(__a) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmovl_high_u8 (uint8x16_t a) -+vmovl_high_u8 (uint8x16_t __a) - { -- uint16x8_t result; -+ uint16x8_t __result; - __asm__ ("ushll2 %0.8h,%1.16b,#0" -- : "=w"(result) -- : "w"(a) -+ : "=w"(__result) -+ : "w"(__a) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmovl_high_u16 (uint16x8_t a) -+vmovl_high_u16 (uint16x8_t __a) - { -- uint32x4_t result; -+ uint32x4_t __result; - __asm__ ("ushll2 %0.4s,%1.8h,#0" -- : "=w"(result) -- : "w"(a) -+ : "=w"(__result) -+ : "w"(__a) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmovl_high_u32 (uint32x4_t a) -+vmovl_high_u32 (uint32x4_t __a) - { -- uint64x2_t result; -+ uint64x2_t __result; - __asm__ ("ushll2 %0.2d,%1.4s,#0" -- : "=w"(result) -- : "w"(a) -+ : "=w"(__result) -+ : "w"(__a) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmovl_s8 (int8x8_t a) -+vmovl_s8 (int8x8_t __a) - { -- int16x8_t result; -+ int16x8_t __result; - __asm__ ("sshll %0.8h,%1.8b,#0" -- : "=w"(result) -- : "w"(a) -+ : "=w"(__result) -+ : "w"(__a) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmovl_s16 (int16x4_t a) -+vmovl_s16 (int16x4_t __a) - { -- int32x4_t result; -+ int32x4_t __result; - __asm__ ("sshll %0.4s,%1.4h,#0" -- : "=w"(result) -- : "w"(a) -+ : "=w"(__result) -+ : "w"(__a) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmovl_s32 (int32x2_t a) -+vmovl_s32 (int32x2_t __a) - { -- int64x2_t result; -+ int64x2_t __result; - __asm__ ("sshll %0.2d,%1.2s,#0" -- : "=w"(result) -- : "w"(a) -+ : "=w"(__result) -+ : "w"(__a) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmovl_u8 (uint8x8_t a) -+vmovl_u8 (uint8x8_t __a) - { -- uint16x8_t result; -+ uint16x8_t __result; - __asm__ ("ushll %0.8h,%1.8b,#0" -- : "=w"(result) -- : "w"(a) -+ : "=w"(__result) -+ : "w"(__a) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmovl_u16 (uint16x4_t a) -+vmovl_u16 (uint16x4_t __a) - { -- uint32x4_t result; -+ uint32x4_t __result; - __asm__ ("ushll %0.4s,%1.4h,#0" -- : "=w"(result) -- : "w"(a) -+ : "=w"(__result) -+ : "w"(__a) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmovl_u32 (uint32x2_t a) -+vmovl_u32 (uint32x2_t __a) - { -- uint64x2_t result; -+ uint64x2_t __result; - __asm__ ("ushll %0.2d,%1.2s,#0" -- : "=w"(result) -- : "w"(a) -+ : "=w"(__result) -+ : "w"(__a) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmovn_high_s16 (int8x8_t a, int16x8_t b) -+vmovn_high_s16 (int8x8_t __a, int16x8_t __b) - { -- int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0))); -+ int8x16_t __result = vcombine_s8 (__a, vcreate_s8 (__AARCH64_UINT64_C (0x0))); - __asm__ ("xtn2 %0.16b,%1.8h" -- : "+w"(result) -- : "w"(b) -+ : "+w"(__result) -+ : "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmovn_high_s32 (int16x4_t a, int32x4_t b) -+vmovn_high_s32 (int16x4_t __a, int32x4_t __b) - { -- int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0))); -+ int16x8_t __result = vcombine_s16 (__a, vcreate_s16 (__AARCH64_UINT64_C (0x0))); - __asm__ ("xtn2 %0.8h,%1.4s" -- : "+w"(result) -- : "w"(b) -+ : "+w"(__result) -+ : "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmovn_high_s64 (int32x2_t a, int64x2_t b) -+vmovn_high_s64 (int32x2_t __a, int64x2_t __b) - { -- int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0))); -+ int32x4_t __result = vcombine_s32 (__a, vcreate_s32 (__AARCH64_UINT64_C (0x0))); - __asm__ ("xtn2 %0.4s,%1.2d" -- : "+w"(result) -- : "w"(b) -+ : "+w"(__result) -+ : "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmovn_high_u16 (uint8x8_t a, uint16x8_t b) -+vmovn_high_u16 (uint8x8_t __a, uint16x8_t __b) - { -- uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0))); -+ uint8x16_t __result = vcombine_u8 (__a, vcreate_u8 (__AARCH64_UINT64_C (0x0))); - __asm__ ("xtn2 %0.16b,%1.8h" -- : "+w"(result) -- : "w"(b) -+ : "+w"(__result) -+ : "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmovn_high_u32 (uint16x4_t a, uint32x4_t b) -+vmovn_high_u32 (uint16x4_t __a, uint32x4_t __b) - { -- uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0))); -+ uint16x8_t __result = vcombine_u16 (__a, vcreate_u16 (__AARCH64_UINT64_C (0x0))); - __asm__ ("xtn2 %0.8h,%1.4s" -- : "+w"(result) -- : "w"(b) -+ : "+w"(__result) -+ : "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmovn_high_u64 (uint32x2_t a, uint64x2_t b) -+vmovn_high_u64 (uint32x2_t __a, uint64x2_t __b) - { -- uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0))); -+ uint32x4_t __result = vcombine_u32 (__a, vcreate_u32 (__AARCH64_UINT64_C (0x0))); - __asm__ ("xtn2 %0.4s,%1.2d" -- : "+w"(result) -- : "w"(b) -+ : "+w"(__result) -+ : "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmovn_s16 (int16x8_t a) -+vmovn_s16 (int16x8_t __a) - { -- int8x8_t result; -+ int8x8_t __result; - __asm__ ("xtn %0.8b,%1.8h" -- : "=w"(result) -- : "w"(a) -+ : "=w"(__result) -+ : "w"(__a) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int16x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmovn_s32 (int32x4_t a) -+vmovn_s32 (int32x4_t __a) - { -- int16x4_t result; -+ int16x4_t __result; - __asm__ ("xtn %0.4h,%1.4s" -- : "=w"(result) -- : "w"(a) -+ : "=w"(__result) -+ : "w"(__a) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int32x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmovn_s64 (int64x2_t a) -+vmovn_s64 (int64x2_t __a) - { -- int32x2_t result; -+ int32x2_t __result; - __asm__ ("xtn %0.2s,%1.2d" -- : "=w"(result) -- : "w"(a) -+ : "=w"(__result) -+ : "w"(__a) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmovn_u16 (uint16x8_t a) -+vmovn_u16 (uint16x8_t __a) - { -- uint8x8_t result; -+ uint8x8_t __result; - __asm__ ("xtn %0.8b,%1.8h" -- : "=w"(result) -- : "w"(a) -+ : "=w"(__result) -+ : "w"(__a) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint16x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmovn_u32 (uint32x4_t a) -+vmovn_u32 (uint32x4_t __a) - { -- uint16x4_t result; -+ uint16x4_t __result; - __asm__ ("xtn %0.4h,%1.4s" -- : "=w"(result) -- : "w"(a) -+ : "=w"(__result) -+ : "w"(__a) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint32x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmovn_u64 (uint64x2_t a) -+vmovn_u64 (uint64x2_t __a) - { -- uint32x2_t result; -+ uint32x2_t __result; - __asm__ ("xtn %0.2s,%1.2d" -- : "=w"(result) -- : "w"(a) -+ : "=w"(__result) -+ : "w"(__a) - : /* No clobbers */); -- return result; -+ return __result; - } - - #define vmull_high_lane_s16(a, b, c) \ -@@ -9156,134 +9189,134 @@ vmovn_u64 (uint64x2_t a) - - __extension__ extern __inline int32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmull_high_n_s16 (int16x8_t a, int16_t b) -+vmull_high_n_s16 (int16x8_t __a, int16_t __b) - { -- int32x4_t result; -+ int32x4_t __result; - __asm__ ("smull2 %0.4s,%1.8h,%2.h[0]" -- : "=w"(result) -- : "w"(a), "x"(b) -+ : "=w"(__result) -+ : "w"(__a), "x"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmull_high_n_s32 (int32x4_t a, int32_t b) -+vmull_high_n_s32 (int32x4_t __a, int32_t __b) - { -- int64x2_t result; -+ int64x2_t __result; - __asm__ ("smull2 %0.2d,%1.4s,%2.s[0]" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmull_high_n_u16 (uint16x8_t a, uint16_t b) -+vmull_high_n_u16 (uint16x8_t __a, uint16_t __b) - { -- uint32x4_t result; -+ uint32x4_t __result; - __asm__ ("umull2 %0.4s,%1.8h,%2.h[0]" -- : "=w"(result) -- : "w"(a), "x"(b) -+ : "=w"(__result) -+ : "w"(__a), "x"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmull_high_n_u32 (uint32x4_t a, uint32_t b) -+vmull_high_n_u32 (uint32x4_t __a, uint32_t __b) - { -- uint64x2_t result; -+ uint64x2_t __result; - __asm__ ("umull2 %0.2d,%1.4s,%2.s[0]" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline poly16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmull_high_p8 (poly8x16_t a, poly8x16_t b) -+vmull_high_p8 (poly8x16_t __a, poly8x16_t __b) - { -- poly16x8_t result; -+ poly16x8_t __result; - __asm__ ("pmull2 %0.8h,%1.16b,%2.16b" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmull_high_s8 (int8x16_t a, int8x16_t b) -+vmull_high_s8 (int8x16_t __a, int8x16_t __b) - { -- int16x8_t result; -+ int16x8_t __result; - __asm__ ("smull2 %0.8h,%1.16b,%2.16b" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmull_high_s16 (int16x8_t a, int16x8_t b) -+vmull_high_s16 (int16x8_t __a, int16x8_t __b) - { -- int32x4_t result; -+ int32x4_t __result; - __asm__ ("smull2 %0.4s,%1.8h,%2.8h" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmull_high_s32 (int32x4_t a, int32x4_t b) -+vmull_high_s32 (int32x4_t __a, int32x4_t __b) - { -- int64x2_t result; -+ int64x2_t __result; - __asm__ ("smull2 %0.2d,%1.4s,%2.4s" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmull_high_u8 (uint8x16_t a, uint8x16_t b) -+vmull_high_u8 (uint8x16_t __a, uint8x16_t __b) - { -- uint16x8_t result; -+ uint16x8_t __result; - __asm__ ("umull2 %0.8h,%1.16b,%2.16b" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmull_high_u16 (uint16x8_t a, uint16x8_t b) -+vmull_high_u16 (uint16x8_t __a, uint16x8_t __b) - { -- uint32x4_t result; -+ uint32x4_t __result; - __asm__ ("umull2 %0.4s,%1.8h,%2.8h" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmull_high_u32 (uint32x4_t a, uint32x4_t b) -+vmull_high_u32 (uint32x4_t __a, uint32x4_t __b) - { -- uint64x2_t result; -+ uint64x2_t __result; - __asm__ ("umull2 %0.2d,%1.4s,%2.4s" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - #define vmull_lane_s16(a, b, c) \ -@@ -9392,722 +9425,722 @@ vmull_high_u32 (uint32x4_t a, uint32x4_t b) - - __extension__ extern __inline int32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmull_n_s16 (int16x4_t a, int16_t b) -+vmull_n_s16 (int16x4_t __a, int16_t __b) - { -- int32x4_t result; -+ int32x4_t __result; - __asm__ ("smull %0.4s,%1.4h,%2.h[0]" -- : "=w"(result) -- : "w"(a), "x"(b) -+ : "=w"(__result) -+ : "w"(__a), "x"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmull_n_s32 (int32x2_t a, int32_t b) -+vmull_n_s32 (int32x2_t __a, int32_t __b) - { -- int64x2_t result; -+ int64x2_t __result; - __asm__ ("smull %0.2d,%1.2s,%2.s[0]" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmull_n_u16 (uint16x4_t a, uint16_t b) -+vmull_n_u16 (uint16x4_t __a, uint16_t __b) - { -- uint32x4_t result; -+ uint32x4_t __result; - __asm__ ("umull %0.4s,%1.4h,%2.h[0]" -- : "=w"(result) -- : "w"(a), "x"(b) -+ : "=w"(__result) -+ : "w"(__a), "x"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmull_n_u32 (uint32x2_t a, uint32_t b) -+vmull_n_u32 (uint32x2_t __a, uint32_t __b) - { -- uint64x2_t result; -+ uint64x2_t __result; - __asm__ ("umull %0.2d,%1.2s,%2.s[0]" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline poly16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmull_p8 (poly8x8_t a, poly8x8_t b) -+vmull_p8 (poly8x8_t __a, poly8x8_t __b) - { -- poly16x8_t result; -+ poly16x8_t __result; - __asm__ ("pmull %0.8h, %1.8b, %2.8b" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmull_s8 (int8x8_t a, int8x8_t b) -+vmull_s8 (int8x8_t __a, int8x8_t __b) - { -- int16x8_t result; -+ int16x8_t __result; - __asm__ ("smull %0.8h, %1.8b, %2.8b" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmull_s16 (int16x4_t a, int16x4_t b) -+vmull_s16 (int16x4_t __a, int16x4_t __b) - { -- int32x4_t result; -+ int32x4_t __result; - __asm__ ("smull %0.4s, %1.4h, %2.4h" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmull_s32 (int32x2_t a, int32x2_t b) -+vmull_s32 (int32x2_t __a, int32x2_t __b) - { -- int64x2_t result; -+ int64x2_t __result; - __asm__ ("smull %0.2d, %1.2s, %2.2s" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmull_u8 (uint8x8_t a, uint8x8_t b) -+vmull_u8 (uint8x8_t __a, uint8x8_t __b) - { -- uint16x8_t result; -+ uint16x8_t __result; - __asm__ ("umull %0.8h, %1.8b, %2.8b" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmull_u16 (uint16x4_t a, uint16x4_t b) -+vmull_u16 (uint16x4_t __a, uint16x4_t __b) - { -- uint32x4_t result; -+ uint32x4_t __result; - __asm__ ("umull %0.4s, %1.4h, %2.4h" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmull_u32 (uint32x2_t a, uint32x2_t b) -+vmull_u32 (uint32x2_t __a, uint32x2_t __b) - { -- uint64x2_t result; -+ uint64x2_t __result; - __asm__ ("umull %0.2d, %1.2s, %2.2s" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int16x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpadal_s8 (int16x4_t a, int8x8_t b) -+vpadal_s8 (int16x4_t __a, int8x8_t __b) - { -- int16x4_t result; -+ int16x4_t __result; - __asm__ ("sadalp %0.4h,%2.8b" -- : "=w"(result) -- : "0"(a), "w"(b) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int32x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpadal_s16 (int32x2_t a, int16x4_t b) -+vpadal_s16 (int32x2_t __a, int16x4_t __b) - { -- int32x2_t result; -+ int32x2_t __result; - __asm__ ("sadalp %0.2s,%2.4h" -- : "=w"(result) -- : "0"(a), "w"(b) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int64x1_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpadal_s32 (int64x1_t a, int32x2_t b) -+vpadal_s32 (int64x1_t __a, int32x2_t __b) - { -- int64x1_t result; -+ int64x1_t __result; - __asm__ ("sadalp %0.1d,%2.2s" -- : "=w"(result) -- : "0"(a), "w"(b) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint16x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpadal_u8 (uint16x4_t a, uint8x8_t b) -+vpadal_u8 (uint16x4_t __a, uint8x8_t __b) - { -- uint16x4_t result; -+ uint16x4_t __result; - __asm__ ("uadalp %0.4h,%2.8b" -- : "=w"(result) -- : "0"(a), "w"(b) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint32x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpadal_u16 (uint32x2_t a, uint16x4_t b) -+vpadal_u16 (uint32x2_t __a, uint16x4_t __b) - { -- uint32x2_t result; -+ uint32x2_t __result; - __asm__ ("uadalp %0.2s,%2.4h" -- : "=w"(result) -- : "0"(a), "w"(b) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint64x1_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpadal_u32 (uint64x1_t a, uint32x2_t b) -+vpadal_u32 (uint64x1_t __a, uint32x2_t __b) - { -- uint64x1_t result; -+ uint64x1_t __result; - __asm__ ("uadalp %0.1d,%2.2s" -- : "=w"(result) -- : "0"(a), "w"(b) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpadalq_s8 (int16x8_t a, int8x16_t b) -+vpadalq_s8 (int16x8_t __a, int8x16_t __b) - { -- int16x8_t result; -+ int16x8_t __result; - __asm__ ("sadalp %0.8h,%2.16b" -- : "=w"(result) -- : "0"(a), "w"(b) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpadalq_s16 (int32x4_t a, int16x8_t b) -+vpadalq_s16 (int32x4_t __a, int16x8_t __b) - { -- int32x4_t result; -+ int32x4_t __result; - __asm__ ("sadalp %0.4s,%2.8h" -- : "=w"(result) -- : "0"(a), "w"(b) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpadalq_s32 (int64x2_t a, int32x4_t b) -+vpadalq_s32 (int64x2_t __a, int32x4_t __b) - { -- int64x2_t result; -+ int64x2_t __result; - __asm__ ("sadalp %0.2d,%2.4s" -- : "=w"(result) -- : "0"(a), "w"(b) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpadalq_u8 (uint16x8_t a, uint8x16_t b) -+vpadalq_u8 (uint16x8_t __a, uint8x16_t __b) - { -- uint16x8_t result; -+ uint16x8_t __result; - __asm__ ("uadalp %0.8h,%2.16b" -- : "=w"(result) -- : "0"(a), "w"(b) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpadalq_u16 (uint32x4_t a, uint16x8_t b) -+vpadalq_u16 (uint32x4_t __a, uint16x8_t __b) - { -- uint32x4_t result; -+ uint32x4_t __result; - __asm__ ("uadalp %0.4s,%2.8h" -- : "=w"(result) -- : "0"(a), "w"(b) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpadalq_u32 (uint64x2_t a, uint32x4_t b) -+vpadalq_u32 (uint64x2_t __a, uint32x4_t __b) - { -- uint64x2_t result; -+ uint64x2_t __result; - __asm__ ("uadalp %0.2d,%2.4s" -- : "=w"(result) -- : "0"(a), "w"(b) -+ : "=w"(__result) -+ : "0"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int16x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpaddl_s8 (int8x8_t a) -+vpaddl_s8 (int8x8_t __a) - { -- int16x4_t result; -+ int16x4_t __result; - __asm__ ("saddlp %0.4h,%1.8b" -- : "=w"(result) -- : "w"(a) -+ : "=w"(__result) -+ : "w"(__a) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int32x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpaddl_s16 (int16x4_t a) -+vpaddl_s16 (int16x4_t __a) - { -- int32x2_t result; -+ int32x2_t __result; - __asm__ ("saddlp %0.2s,%1.4h" -- : "=w"(result) -- : "w"(a) -+ : "=w"(__result) -+ : "w"(__a) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int64x1_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpaddl_s32 (int32x2_t a) -+vpaddl_s32 (int32x2_t __a) - { -- int64x1_t result; -+ int64x1_t __result; - __asm__ ("saddlp %0.1d,%1.2s" -- : "=w"(result) -- : "w"(a) -+ : "=w"(__result) -+ : "w"(__a) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint16x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpaddl_u8 (uint8x8_t a) -+vpaddl_u8 (uint8x8_t __a) - { -- uint16x4_t result; -+ uint16x4_t __result; - __asm__ ("uaddlp %0.4h,%1.8b" -- : "=w"(result) -- : "w"(a) -+ : "=w"(__result) -+ : "w"(__a) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint32x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpaddl_u16 (uint16x4_t a) -+vpaddl_u16 (uint16x4_t __a) - { -- uint32x2_t result; -+ uint32x2_t __result; - __asm__ ("uaddlp %0.2s,%1.4h" -- : "=w"(result) -- : "w"(a) -+ : "=w"(__result) -+ : "w"(__a) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint64x1_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpaddl_u32 (uint32x2_t a) -+vpaddl_u32 (uint32x2_t __a) - { -- uint64x1_t result; -+ uint64x1_t __result; - __asm__ ("uaddlp %0.1d,%1.2s" -- : "=w"(result) -- : "w"(a) -+ : "=w"(__result) -+ : "w"(__a) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpaddlq_s8 (int8x16_t a) -+vpaddlq_s8 (int8x16_t __a) - { -- int16x8_t result; -+ int16x8_t __result; - __asm__ ("saddlp %0.8h,%1.16b" -- : "=w"(result) -- : "w"(a) -+ : "=w"(__result) -+ : "w"(__a) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpaddlq_s16 (int16x8_t a) -+vpaddlq_s16 (int16x8_t __a) - { -- int32x4_t result; -+ int32x4_t __result; - __asm__ ("saddlp %0.4s,%1.8h" -- : "=w"(result) -- : "w"(a) -+ : "=w"(__result) -+ : "w"(__a) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpaddlq_s32 (int32x4_t a) -+vpaddlq_s32 (int32x4_t __a) - { -- int64x2_t result; -+ int64x2_t __result; - __asm__ ("saddlp %0.2d,%1.4s" -- : "=w"(result) -- : "w"(a) -+ : "=w"(__result) -+ : "w"(__a) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpaddlq_u8 (uint8x16_t a) -+vpaddlq_u8 (uint8x16_t __a) - { -- uint16x8_t result; -+ uint16x8_t __result; - __asm__ ("uaddlp %0.8h,%1.16b" -- : "=w"(result) -- : "w"(a) -+ : "=w"(__result) -+ : "w"(__a) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpaddlq_u16 (uint16x8_t a) -+vpaddlq_u16 (uint16x8_t __a) - { -- uint32x4_t result; -+ uint32x4_t __result; - __asm__ ("uaddlp %0.4s,%1.8h" -- : "=w"(result) -- : "w"(a) -+ : "=w"(__result) -+ : "w"(__a) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpaddlq_u32 (uint32x4_t a) -+vpaddlq_u32 (uint32x4_t __a) - { -- uint64x2_t result; -+ uint64x2_t __result; - __asm__ ("uaddlp %0.2d,%1.4s" -- : "=w"(result) -- : "w"(a) -+ : "=w"(__result) -+ : "w"(__a) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpaddq_s8 (int8x16_t a, int8x16_t b) -+vpaddq_s8 (int8x16_t __a, int8x16_t __b) - { -- int8x16_t result; -+ int8x16_t __result; - __asm__ ("addp %0.16b,%1.16b,%2.16b" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpaddq_s16 (int16x8_t a, int16x8_t b) -+vpaddq_s16 (int16x8_t __a, int16x8_t __b) - { -- int16x8_t result; -+ int16x8_t __result; - __asm__ ("addp %0.8h,%1.8h,%2.8h" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpaddq_s32 (int32x4_t a, int32x4_t b) -+vpaddq_s32 (int32x4_t __a, int32x4_t __b) - { -- int32x4_t result; -+ int32x4_t __result; - __asm__ ("addp %0.4s,%1.4s,%2.4s" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpaddq_s64 (int64x2_t a, int64x2_t b) -+vpaddq_s64 (int64x2_t __a, int64x2_t __b) - { -- int64x2_t result; -+ int64x2_t __result; - __asm__ ("addp %0.2d,%1.2d,%2.2d" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpaddq_u8 (uint8x16_t a, uint8x16_t b) -+vpaddq_u8 (uint8x16_t __a, uint8x16_t __b) - { -- uint8x16_t result; -+ uint8x16_t __result; - __asm__ ("addp %0.16b,%1.16b,%2.16b" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpaddq_u16 (uint16x8_t a, uint16x8_t b) -+vpaddq_u16 (uint16x8_t __a, uint16x8_t __b) - { -- uint16x8_t result; -+ uint16x8_t __result; - __asm__ ("addp %0.8h,%1.8h,%2.8h" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpaddq_u32 (uint32x4_t a, uint32x4_t b) -+vpaddq_u32 (uint32x4_t __a, uint32x4_t __b) - { -- uint32x4_t result; -+ uint32x4_t __result; - __asm__ ("addp %0.4s,%1.4s,%2.4s" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpaddq_u64 (uint64x2_t a, uint64x2_t b) -+vpaddq_u64 (uint64x2_t __a, uint64x2_t __b) - { -- uint64x2_t result; -+ uint64x2_t __result; - __asm__ ("addp %0.2d,%1.2d,%2.2d" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int16x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqdmulh_n_s16 (int16x4_t a, int16_t b) -+vqdmulh_n_s16 (int16x4_t __a, int16_t __b) - { -- int16x4_t result; -+ int16x4_t __result; - __asm__ ("sqdmulh %0.4h,%1.4h,%2.h[0]" -- : "=w"(result) -- : "w"(a), "x"(b) -+ : "=w"(__result) -+ : "w"(__a), "x"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int32x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqdmulh_n_s32 (int32x2_t a, int32_t b) -+vqdmulh_n_s32 (int32x2_t __a, int32_t __b) - { -- int32x2_t result; -+ int32x2_t __result; - __asm__ ("sqdmulh %0.2s,%1.2s,%2.s[0]" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqdmulhq_n_s16 (int16x8_t a, int16_t b) -+vqdmulhq_n_s16 (int16x8_t __a, int16_t __b) - { -- int16x8_t result; -+ int16x8_t __result; - __asm__ ("sqdmulh %0.8h,%1.8h,%2.h[0]" -- : "=w"(result) -- : "w"(a), "x"(b) -+ : "=w"(__result) -+ : "w"(__a), "x"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqdmulhq_n_s32 (int32x4_t a, int32_t b) -+vqdmulhq_n_s32 (int32x4_t __a, int32_t __b) - { -- int32x4_t result; -+ int32x4_t __result; - __asm__ ("sqdmulh %0.4s,%1.4s,%2.s[0]" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqmovn_high_s16 (int8x8_t a, int16x8_t b) -+vqmovn_high_s16 (int8x8_t __a, int16x8_t __b) - { -- int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0))); -+ int8x16_t __result = vcombine_s8 (__a, vcreate_s8 (__AARCH64_UINT64_C (0x0))); - __asm__ ("sqxtn2 %0.16b, %1.8h" -- : "+w"(result) -- : "w"(b) -+ : "+w"(__result) -+ : "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqmovn_high_s32 (int16x4_t a, int32x4_t b) -+vqmovn_high_s32 (int16x4_t __a, int32x4_t __b) - { -- int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0))); -+ int16x8_t __result = vcombine_s16 (__a, vcreate_s16 (__AARCH64_UINT64_C (0x0))); - __asm__ ("sqxtn2 %0.8h, %1.4s" -- : "+w"(result) -- : "w"(b) -+ : "+w"(__result) -+ : "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqmovn_high_s64 (int32x2_t a, int64x2_t b) -+vqmovn_high_s64 (int32x2_t __a, int64x2_t __b) - { -- int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0))); -+ int32x4_t __result = vcombine_s32 (__a, vcreate_s32 (__AARCH64_UINT64_C (0x0))); - __asm__ ("sqxtn2 %0.4s, %1.2d" -- : "+w"(result) -- : "w"(b) -+ : "+w"(__result) -+ : "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqmovn_high_u16 (uint8x8_t a, uint16x8_t b) -+vqmovn_high_u16 (uint8x8_t __a, uint16x8_t __b) - { -- uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0))); -+ uint8x16_t __result = vcombine_u8 (__a, vcreate_u8 (__AARCH64_UINT64_C (0x0))); - __asm__ ("uqxtn2 %0.16b, %1.8h" -- : "+w"(result) -- : "w"(b) -+ : "+w"(__result) -+ : "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqmovn_high_u32 (uint16x4_t a, uint32x4_t b) -+vqmovn_high_u32 (uint16x4_t __a, uint32x4_t __b) - { -- uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0))); -+ uint16x8_t __result = vcombine_u16 (__a, vcreate_u16 (__AARCH64_UINT64_C (0x0))); - __asm__ ("uqxtn2 %0.8h, %1.4s" -- : "+w"(result) -- : "w"(b) -+ : "+w"(__result) -+ : "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqmovn_high_u64 (uint32x2_t a, uint64x2_t b) -+vqmovn_high_u64 (uint32x2_t __a, uint64x2_t __b) - { -- uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0))); -+ uint32x4_t __result = vcombine_u32 (__a, vcreate_u32 (__AARCH64_UINT64_C (0x0))); - __asm__ ("uqxtn2 %0.4s, %1.2d" -- : "+w"(result) -- : "w"(b) -+ : "+w"(__result) -+ : "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqmovun_high_s16 (uint8x8_t a, int16x8_t b) -+vqmovun_high_s16 (uint8x8_t __a, int16x8_t __b) - { -- uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0))); -+ uint8x16_t __result = vcombine_u8 (__a, vcreate_u8 (__AARCH64_UINT64_C (0x0))); - __asm__ ("sqxtun2 %0.16b, %1.8h" -- : "+w"(result) -- : "w"(b) -+ : "+w"(__result) -+ : "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqmovun_high_s32 (uint16x4_t a, int32x4_t b) -+vqmovun_high_s32 (uint16x4_t __a, int32x4_t __b) - { -- uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0))); -+ uint16x8_t __result = vcombine_u16 (__a, vcreate_u16 (__AARCH64_UINT64_C (0x0))); - __asm__ ("sqxtun2 %0.8h, %1.4s" -- : "+w"(result) -- : "w"(b) -+ : "+w"(__result) -+ : "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqmovun_high_s64 (uint32x2_t a, int64x2_t b) -+vqmovun_high_s64 (uint32x2_t __a, int64x2_t __b) - { -- uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0))); -+ uint32x4_t __result = vcombine_u32 (__a, vcreate_u32 (__AARCH64_UINT64_C (0x0))); - __asm__ ("sqxtun2 %0.4s, %1.2d" -- : "+w"(result) -- : "w"(b) -+ : "+w"(__result) -+ : "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int16x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqrdmulh_n_s16 (int16x4_t a, int16_t b) -+vqrdmulh_n_s16 (int16x4_t __a, int16_t __b) - { -- int16x4_t result; -+ int16x4_t __result; - __asm__ ("sqrdmulh %0.4h,%1.4h,%2.h[0]" -- : "=w"(result) -- : "w"(a), "x"(b) -+ : "=w"(__result) -+ : "w"(__a), "x"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int32x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqrdmulh_n_s32 (int32x2_t a, int32_t b) -+vqrdmulh_n_s32 (int32x2_t __a, int32_t __b) - { -- int32x2_t result; -+ int32x2_t __result; - __asm__ ("sqrdmulh %0.2s,%1.2s,%2.s[0]" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqrdmulhq_n_s16 (int16x8_t a, int16_t b) -+vqrdmulhq_n_s16 (int16x8_t __a, int16_t __b) - { -- int16x8_t result; -+ int16x8_t __result; - __asm__ ("sqrdmulh %0.8h,%1.8h,%2.h[0]" -- : "=w"(result) -- : "w"(a), "x"(b) -+ : "=w"(__result) -+ : "w"(__a), "x"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqrdmulhq_n_s32 (int32x4_t a, int32_t b) -+vqrdmulhq_n_s32 (int32x4_t __a, int32_t __b) - { -- int32x4_t result; -+ int32x4_t __result; - __asm__ ("sqrdmulh %0.4s,%1.4s,%2.s[0]" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - #define vqrshrn_high_n_s16(a, b, c) \ -@@ -10544,26 +10577,26 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b) - - __extension__ extern __inline uint32x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vrsqrte_u32 (uint32x2_t a) -+vrsqrte_u32 (uint32x2_t __a) - { -- uint32x2_t result; -+ uint32x2_t __result; - __asm__ ("ursqrte %0.2s,%1.2s" -- : "=w"(result) -- : "w"(a) -+ : "=w"(__result) -+ : "w"(__a) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vrsqrteq_u32 (uint32x4_t a) -+vrsqrteq_u32 (uint32x4_t __a) - { -- uint32x4_t result; -+ uint32x4_t __result; - __asm__ ("ursqrte %0.4s,%1.4s" -- : "=w"(result) -- : "w"(a) -+ : "=w"(__result) -+ : "w"(__a) - : /* No clobbers */); -- return result; -+ return __result; - } - - #define vshrn_high_n_s16(a, b, c) \ -@@ -10860,48 +10893,48 @@ vrsqrteq_u32 (uint32x4_t a) - - __extension__ extern __inline uint8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vtst_p8 (poly8x8_t a, poly8x8_t b) -+vtst_p8 (poly8x8_t __a, poly8x8_t __b) - { -- return (uint8x8_t) ((((uint8x8_t) a) & ((uint8x8_t) b)) -+ return (uint8x8_t) ((((uint8x8_t) __a) & ((uint8x8_t) __b)) - != 0); - } - - __extension__ extern __inline uint16x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vtst_p16 (poly16x4_t a, poly16x4_t b) -+vtst_p16 (poly16x4_t __a, poly16x4_t __b) - { -- return (uint16x4_t) ((((uint16x4_t) a) & ((uint16x4_t) b)) -+ return (uint16x4_t) ((((uint16x4_t) __a) & ((uint16x4_t) __b)) - != 0); - } - - __extension__ extern __inline uint64x1_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vtst_p64 (poly64x1_t a, poly64x1_t b) -+vtst_p64 (poly64x1_t __a, poly64x1_t __b) - { -- return (uint64x1_t) ((a & b) != __AARCH64_INT64_C (0)); -+ return (uint64x1_t) ((__a & __b) != __AARCH64_INT64_C (0)); - } - - __extension__ extern __inline uint8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vtstq_p8 (poly8x16_t a, poly8x16_t b) -+vtstq_p8 (poly8x16_t __a, poly8x16_t __b) - { -- return (uint8x16_t) ((((uint8x16_t) a) & ((uint8x16_t) b)) -+ return (uint8x16_t) ((((uint8x16_t) __a) & ((uint8x16_t) __b)) - != 0); - } - - __extension__ extern __inline uint16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vtstq_p16 (poly16x8_t a, poly16x8_t b) -+vtstq_p16 (poly16x8_t __a, poly16x8_t __b) - { -- return (uint16x8_t) ((((uint16x8_t) a) & ((uint16x8_t) b)) -+ return (uint16x8_t) ((((uint16x8_t) __a) & ((uint16x8_t) __b)) - != 0); - } - - __extension__ extern __inline uint64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vtstq_p64 (poly64x2_t a, poly64x2_t b) -+vtstq_p64 (poly64x2_t __a, poly64x2_t __b) - { -- return (uint64x2_t) ((((uint64x2_t) a) & ((uint64x2_t) b)) -+ return (uint64x2_t) ((((uint64x2_t) __a) & ((uint64x2_t) __b)) - != __AARCH64_INT64_C (0)); - } - -@@ -11248,20 +11281,20 @@ __ST4_LANE_FUNC (uint64x2x4_t, uint64_t, v2di, di, u64) - - __extension__ extern __inline int64_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vaddlv_s32 (int32x2_t a) -+vaddlv_s32 (int32x2_t __a) - { -- int64_t result; -- __asm__ ("saddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : ); -- return result; -+ int64_t __result; -+ __asm__ ("saddlp %0.1d, %1.2s" : "=w"(__result) : "w"(__a) : ); -+ return __result; - } - - __extension__ extern __inline uint64_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vaddlv_u32 (uint32x2_t a) -+vaddlv_u32 (uint32x2_t __a) - { -- uint64_t result; -- __asm__ ("uaddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : ); -- return result; -+ uint64_t __result; -+ __asm__ ("uaddlp %0.1d, %1.2s" : "=w"(__result) : "w"(__a) : ); -+ return __result; - } - - __extension__ extern __inline int16x4_t -@@ -11324,367 +11357,367 @@ vqrdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c) - - __extension__ extern __inline poly8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqtbl1_p8 (poly8x16_t a, uint8x8_t b) -+vqtbl1_p8 (poly8x16_t __a, uint8x8_t __b) - { -- poly8x8_t result; -+ poly8x8_t __result; - __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqtbl1_s8 (int8x16_t a, uint8x8_t b) -+vqtbl1_s8 (int8x16_t __a, uint8x8_t __b) - { -- int8x8_t result; -+ int8x8_t __result; - __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqtbl1_u8 (uint8x16_t a, uint8x8_t b) -+vqtbl1_u8 (uint8x16_t __a, uint8x8_t __b) - { -- uint8x8_t result; -+ uint8x8_t __result; - __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline poly8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqtbl1q_p8 (poly8x16_t a, uint8x16_t b) -+vqtbl1q_p8 (poly8x16_t __a, uint8x16_t __b) - { -- poly8x16_t result; -+ poly8x16_t __result; - __asm__ ("tbl %0.16b, {%1.16b}, %2.16b" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqtbl1q_s8 (int8x16_t a, uint8x16_t b) -+vqtbl1q_s8 (int8x16_t __a, uint8x16_t __b) - { -- int8x16_t result; -+ int8x16_t __result; - __asm__ ("tbl %0.16b, {%1.16b}, %2.16b" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqtbl1q_u8 (uint8x16_t a, uint8x16_t b) -+vqtbl1q_u8 (uint8x16_t __a, uint8x16_t __b) - { -- uint8x16_t result; -+ uint8x16_t __result; - __asm__ ("tbl %0.16b, {%1.16b}, %2.16b" -- : "=w"(result) -- : "w"(a), "w"(b) -+ : "=w"(__result) -+ : "w"(__a), "w"(__b) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqtbx1_s8 (int8x8_t r, int8x16_t tab, uint8x8_t idx) -+vqtbx1_s8 (int8x8_t __r, int8x16_t __tab, uint8x8_t __idx) - { -- int8x8_t result = r; -+ int8x8_t __result = __r; - __asm__ ("tbx %0.8b,{%1.16b},%2.8b" -- : "+w"(result) -- : "w"(tab), "w"(idx) -+ : "+w"(__result) -+ : "w"(__tab), "w"(__idx) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqtbx1_u8 (uint8x8_t r, uint8x16_t tab, uint8x8_t idx) -+vqtbx1_u8 (uint8x8_t __r, uint8x16_t __tab, uint8x8_t __idx) - { -- uint8x8_t result = r; -+ uint8x8_t __result = __r; - __asm__ ("tbx %0.8b,{%1.16b},%2.8b" -- : "+w"(result) -- : "w"(tab), "w"(idx) -+ : "+w"(__result) -+ : "w"(__tab), "w"(__idx) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline poly8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqtbx1_p8 (poly8x8_t r, poly8x16_t tab, uint8x8_t idx) -+vqtbx1_p8 (poly8x8_t __r, poly8x16_t __tab, uint8x8_t __idx) - { -- poly8x8_t result = r; -+ poly8x8_t __result = __r; - __asm__ ("tbx %0.8b,{%1.16b},%2.8b" -- : "+w"(result) -- : "w"(tab), "w"(idx) -+ : "+w"(__result) -+ : "w"(__tab), "w"(__idx) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqtbx1q_s8 (int8x16_t r, int8x16_t tab, uint8x16_t idx) -+vqtbx1q_s8 (int8x16_t __r, int8x16_t __tab, uint8x16_t __idx) - { -- int8x16_t result = r; -+ int8x16_t __result = __r; - __asm__ ("tbx %0.16b,{%1.16b},%2.16b" -- : "+w"(result) -- : "w"(tab), "w"(idx) -+ : "+w"(__result) -+ : "w"(__tab), "w"(__idx) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqtbx1q_u8 (uint8x16_t r, uint8x16_t tab, uint8x16_t idx) -+vqtbx1q_u8 (uint8x16_t __r, uint8x16_t __tab, uint8x16_t __idx) - { -- uint8x16_t result = r; -+ uint8x16_t __result = __r; - __asm__ ("tbx %0.16b,{%1.16b},%2.16b" -- : "+w"(result) -- : "w"(tab), "w"(idx) -+ : "+w"(__result) -+ : "w"(__tab), "w"(__idx) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline poly8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqtbx1q_p8 (poly8x16_t r, poly8x16_t tab, uint8x16_t idx) -+vqtbx1q_p8 (poly8x16_t __r, poly8x16_t __tab, uint8x16_t __idx) - { -- poly8x16_t result = r; -+ poly8x16_t __result = __r; - __asm__ ("tbx %0.16b,{%1.16b},%2.16b" -- : "+w"(result) -- : "w"(tab), "w"(idx) -+ : "+w"(__result) -+ : "w"(__tab), "w"(__idx) - : /* No clobbers */); -- return result; -+ return __result; - } - - /* V7 legacy table intrinsics. */ - - __extension__ extern __inline int8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vtbl1_s8 (int8x8_t tab, int8x8_t idx) -+vtbl1_s8 (int8x8_t __tab, int8x8_t __idx) - { -- int8x8_t result; -- int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (__AARCH64_UINT64_C (0x0))); -+ int8x8_t __result; -+ int8x16_t __temp = vcombine_s8 (__tab, vcreate_s8 (__AARCH64_UINT64_C (0x0))); - __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" -- : "=w"(result) -- : "w"(temp), "w"(idx) -+ : "=w"(__result) -+ : "w"(__temp), "w"(__idx) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vtbl1_u8 (uint8x8_t tab, uint8x8_t idx) -+vtbl1_u8 (uint8x8_t __tab, uint8x8_t __idx) - { -- uint8x8_t result; -- uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (__AARCH64_UINT64_C (0x0))); -+ uint8x8_t __result; -+ uint8x16_t __temp = vcombine_u8 (__tab, vcreate_u8 (__AARCH64_UINT64_C (0x0))); - __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" -- : "=w"(result) -- : "w"(temp), "w"(idx) -+ : "=w"(__result) -+ : "w"(__temp), "w"(__idx) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline poly8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vtbl1_p8 (poly8x8_t tab, uint8x8_t idx) -+vtbl1_p8 (poly8x8_t __tab, uint8x8_t __idx) - { -- poly8x8_t result; -- poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (__AARCH64_UINT64_C (0x0))); -+ poly8x8_t __result; -+ poly8x16_t __temp = vcombine_p8 (__tab, vcreate_p8 (__AARCH64_UINT64_C (0x0))); - __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" -- : "=w"(result) -- : "w"(temp), "w"(idx) -+ : "=w"(__result) -+ : "w"(__temp), "w"(__idx) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vtbl2_s8 (int8x8x2_t tab, int8x8_t idx) -+vtbl2_s8 (int8x8x2_t __tab, int8x8_t __idx) - { -- int8x8_t result; -- int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]); -+ int8x8_t __result; -+ int8x16_t __temp = vcombine_s8 (__tab.val[0], __tab.val[1]); - __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" -- : "=w"(result) -- : "w"(temp), "w"(idx) -+ : "=w"(__result) -+ : "w"(__temp), "w"(__idx) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vtbl2_u8 (uint8x8x2_t tab, uint8x8_t idx) -+vtbl2_u8 (uint8x8x2_t __tab, uint8x8_t __idx) - { -- uint8x8_t result; -- uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]); -+ uint8x8_t __result; -+ uint8x16_t __temp = vcombine_u8 (__tab.val[0], __tab.val[1]); - __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" -- : "=w"(result) -- : "w"(temp), "w"(idx) -+ : "=w"(__result) -+ : "w"(__temp), "w"(__idx) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline poly8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vtbl2_p8 (poly8x8x2_t tab, uint8x8_t idx) -+vtbl2_p8 (poly8x8x2_t __tab, uint8x8_t __idx) - { -- poly8x8_t result; -- poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]); -+ poly8x8_t __result; -+ poly8x16_t __temp = vcombine_p8 (__tab.val[0], __tab.val[1]); - __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" -- : "=w"(result) -- : "w"(temp), "w"(idx) -+ : "=w"(__result) -+ : "w"(__temp), "w"(__idx) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline int8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vtbl3_s8 (int8x8x3_t tab, int8x8_t idx) -+vtbl3_s8 (int8x8x3_t __tab, int8x8_t __idx) - { -- int8x8_t result; -- int8x16x2_t temp; -+ int8x8_t __result; -+ int8x16x2_t __temp; - __builtin_aarch64_simd_oi __o; -- temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]); -- temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (__AARCH64_UINT64_C (0x0))); -+ __temp.val[0] = vcombine_s8 (__tab.val[0], __tab.val[1]); -+ __temp.val[1] = vcombine_s8 (__tab.val[2], vcreate_s8 (__AARCH64_UINT64_C (0x0))); - __o = __builtin_aarch64_set_qregoiv16qi (__o, -- (int8x16_t) temp.val[0], 0); -+ (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, -- (int8x16_t) temp.val[1], 1); -- result = __builtin_aarch64_tbl3v8qi (__o, idx); -- return result; -+ (int8x16_t) __temp.val[1], 1); -+ __result = __builtin_aarch64_tbl3v8qi (__o, __idx); -+ return __result; - } - - __extension__ extern __inline uint8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vtbl3_u8 (uint8x8x3_t tab, uint8x8_t idx) -+vtbl3_u8 (uint8x8x3_t __tab, uint8x8_t __idx) - { -- uint8x8_t result; -- uint8x16x2_t temp; -+ uint8x8_t __result; -+ uint8x16x2_t __temp; - __builtin_aarch64_simd_oi __o; -- temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]); -- temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (__AARCH64_UINT64_C (0x0))); -+ __temp.val[0] = vcombine_u8 (__tab.val[0], __tab.val[1]); -+ __temp.val[1] = vcombine_u8 (__tab.val[2], vcreate_u8 (__AARCH64_UINT64_C (0x0))); - __o = __builtin_aarch64_set_qregoiv16qi (__o, -- (int8x16_t) temp.val[0], 0); -+ (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, -- (int8x16_t) temp.val[1], 1); -- result = (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx); -- return result; -+ (int8x16_t) __temp.val[1], 1); -+ __result = (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx); -+ return __result; - } - - __extension__ extern __inline poly8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vtbl3_p8 (poly8x8x3_t tab, uint8x8_t idx) -+vtbl3_p8 (poly8x8x3_t __tab, uint8x8_t __idx) - { -- poly8x8_t result; -- poly8x16x2_t temp; -+ poly8x8_t __result; -+ poly8x16x2_t __temp; - __builtin_aarch64_simd_oi __o; -- temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]); -- temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (__AARCH64_UINT64_C (0x0))); -+ __temp.val[0] = vcombine_p8 (__tab.val[0], __tab.val[1]); -+ __temp.val[1] = vcombine_p8 (__tab.val[2], vcreate_p8 (__AARCH64_UINT64_C (0x0))); - __o = __builtin_aarch64_set_qregoiv16qi (__o, -- (int8x16_t) temp.val[0], 0); -+ (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, -- (int8x16_t) temp.val[1], 1); -- result = (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx); -- return result; -+ (int8x16_t) __temp.val[1], 1); -+ __result = (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx); -+ return __result; - } - - __extension__ extern __inline int8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vtbl4_s8 (int8x8x4_t tab, int8x8_t idx) -+vtbl4_s8 (int8x8x4_t __tab, int8x8_t __idx) - { -- int8x8_t result; -- int8x16x2_t temp; -+ int8x8_t __result; -+ int8x16x2_t __temp; - __builtin_aarch64_simd_oi __o; -- temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]); -- temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]); -+ __temp.val[0] = vcombine_s8 (__tab.val[0], __tab.val[1]); -+ __temp.val[1] = vcombine_s8 (__tab.val[2], __tab.val[3]); - __o = __builtin_aarch64_set_qregoiv16qi (__o, -- (int8x16_t) temp.val[0], 0); -+ (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, -- (int8x16_t) temp.val[1], 1); -- result = __builtin_aarch64_tbl3v8qi (__o, idx); -- return result; -+ (int8x16_t) __temp.val[1], 1); -+ __result = __builtin_aarch64_tbl3v8qi (__o, __idx); -+ return __result; - } - - __extension__ extern __inline uint8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vtbl4_u8 (uint8x8x4_t tab, uint8x8_t idx) -+vtbl4_u8 (uint8x8x4_t __tab, uint8x8_t __idx) - { -- uint8x8_t result; -- uint8x16x2_t temp; -+ uint8x8_t __result; -+ uint8x16x2_t __temp; - __builtin_aarch64_simd_oi __o; -- temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]); -- temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]); -+ __temp.val[0] = vcombine_u8 (__tab.val[0], __tab.val[1]); -+ __temp.val[1] = vcombine_u8 (__tab.val[2], __tab.val[3]); - __o = __builtin_aarch64_set_qregoiv16qi (__o, -- (int8x16_t) temp.val[0], 0); -+ (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, -- (int8x16_t) temp.val[1], 1); -- result = (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx); -- return result; -+ (int8x16_t) __temp.val[1], 1); -+ __result = (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx); -+ return __result; - } - - __extension__ extern __inline poly8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vtbl4_p8 (poly8x8x4_t tab, uint8x8_t idx) -+vtbl4_p8 (poly8x8x4_t __tab, uint8x8_t __idx) - { -- poly8x8_t result; -- poly8x16x2_t temp; -+ poly8x8_t __result; -+ poly8x16x2_t __temp; - __builtin_aarch64_simd_oi __o; -- temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]); -- temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]); -+ __temp.val[0] = vcombine_p8 (__tab.val[0], __tab.val[1]); -+ __temp.val[1] = vcombine_p8 (__tab.val[2], __tab.val[3]); - __o = __builtin_aarch64_set_qregoiv16qi (__o, -- (int8x16_t) temp.val[0], 0); -+ (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, -- (int8x16_t) temp.val[1], 1); -- result = (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx); -- return result; -+ (int8x16_t) __temp.val[1], 1); -+ __result = (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx); -+ return __result; - } - - __extension__ extern __inline int8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vtbx2_s8 (int8x8_t r, int8x8x2_t tab, int8x8_t idx) -+vtbx2_s8 (int8x8_t __r, int8x8x2_t __tab, int8x8_t __idx) - { -- int8x8_t result = r; -- int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]); -+ int8x8_t __result = __r; -+ int8x16_t __temp = vcombine_s8 (__tab.val[0], __tab.val[1]); - __asm__ ("tbx %0.8b, {%1.16b}, %2.8b" -- : "+w"(result) -- : "w"(temp), "w"(idx) -+ : "+w"(__result) -+ : "w"(__temp), "w"(__idx) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline uint8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vtbx2_u8 (uint8x8_t r, uint8x8x2_t tab, uint8x8_t idx) -+vtbx2_u8 (uint8x8_t __r, uint8x8x2_t __tab, uint8x8_t __idx) - { -- uint8x8_t result = r; -- uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]); -+ uint8x8_t __result = __r; -+ uint8x16_t __temp = vcombine_u8 (__tab.val[0], __tab.val[1]); - __asm__ ("tbx %0.8b, {%1.16b}, %2.8b" -- : "+w"(result) -- : "w"(temp), "w"(idx) -+ : "+w"(__result) -+ : "w"(__temp), "w"(__idx) - : /* No clobbers */); -- return result; -+ return __result; - } - - __extension__ extern __inline poly8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vtbx2_p8 (poly8x8_t r, poly8x8x2_t tab, uint8x8_t idx) -+vtbx2_p8 (poly8x8_t __r, poly8x8x2_t __tab, uint8x8_t __idx) - { -- poly8x8_t result = r; -- poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]); -+ poly8x8_t __result = __r; -+ poly8x16_t __temp = vcombine_p8 (__tab.val[0], __tab.val[1]); - __asm__ ("tbx %0.8b, {%1.16b}, %2.8b" -- : "+w"(result) -- : "w"(temp), "w"(idx) -+ : "+w"(__result) -+ : "w"(__temp), "w"(__idx) - : /* No clobbers */); -- return result; -+ return __result; - } - - /* End of temporary inline asm. */ -@@ -17063,98 +17096,98 @@ vld1_f16 (const float16_t *__a) - - __extension__ extern __inline float32x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vld1_f32 (const float32_t *a) -+vld1_f32 (const float32_t *__a) - { -- return __builtin_aarch64_ld1v2sf ((const __builtin_aarch64_simd_sf *) a); -+ return __builtin_aarch64_ld1v2sf ((const __builtin_aarch64_simd_sf *) __a); - } - - __extension__ extern __inline float64x1_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vld1_f64 (const float64_t *a) -+vld1_f64 (const float64_t *__a) - { -- return (float64x1_t) {*a}; -+ return (float64x1_t) {*__a}; - } - - __extension__ extern __inline poly8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vld1_p8 (const poly8_t *a) -+vld1_p8 (const poly8_t *__a) - { - return (poly8x8_t) -- __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a); -+ __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) __a); - } - - __extension__ extern __inline poly16x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vld1_p16 (const poly16_t *a) -+vld1_p16 (const poly16_t *__a) - { - return (poly16x4_t) -- __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a); -+ __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) __a); - } - - __extension__ extern __inline poly64x1_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vld1_p64 (const poly64_t *a) -+vld1_p64 (const poly64_t *__a) - { -- return (poly64x1_t) {*a}; -+ return (poly64x1_t) {*__a}; - } - - __extension__ extern __inline int8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vld1_s8 (const int8_t *a) -+vld1_s8 (const int8_t *__a) - { -- return __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a); -+ return __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) __a); - } - - __extension__ extern __inline int16x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vld1_s16 (const int16_t *a) -+vld1_s16 (const int16_t *__a) - { -- return __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a); -+ return __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) __a); - } - - __extension__ extern __inline int32x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vld1_s32 (const int32_t *a) -+vld1_s32 (const int32_t *__a) - { -- return __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a); -+ return __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) __a); - } - - __extension__ extern __inline int64x1_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vld1_s64 (const int64_t *a) -+vld1_s64 (const int64_t *__a) - { -- return (int64x1_t) {*a}; -+ return (int64x1_t) {*__a}; - } - - __extension__ extern __inline uint8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vld1_u8 (const uint8_t *a) -+vld1_u8 (const uint8_t *__a) - { - return (uint8x8_t) -- __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a); -+ __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) __a); - } - - __extension__ extern __inline uint16x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vld1_u16 (const uint16_t *a) -+vld1_u16 (const uint16_t *__a) - { - return (uint16x4_t) -- __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a); -+ __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) __a); - } - - __extension__ extern __inline uint32x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vld1_u32 (const uint32_t *a) -+vld1_u32 (const uint32_t *__a) - { - return (uint32x2_t) -- __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a); -+ __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) __a); - } - - __extension__ extern __inline uint64x1_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vld1_u64 (const uint64_t *a) -+vld1_u64 (const uint64_t *__a) - { -- return (uint64x1_t) {*a}; -+ return (uint64x1_t) {*__a}; - } - - /* vld1x3 */ -@@ -17536,76 +17569,76 @@ vld1q_f16 (const float16_t *__a) - - __extension__ extern __inline float32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vld1q_f32 (const float32_t *a) -+vld1q_f32 (const float32_t *__a) - { -- return __builtin_aarch64_ld1v4sf ((const __builtin_aarch64_simd_sf *) a); -+ return __builtin_aarch64_ld1v4sf ((const __builtin_aarch64_simd_sf *) __a); - } - - __extension__ extern __inline float64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vld1q_f64 (const float64_t *a) -+vld1q_f64 (const float64_t *__a) - { -- return __builtin_aarch64_ld1v2df ((const __builtin_aarch64_simd_df *) a); -+ return __builtin_aarch64_ld1v2df ((const __builtin_aarch64_simd_df *) __a); - } - - __extension__ extern __inline poly8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vld1q_p8 (const poly8_t *a) -+vld1q_p8 (const poly8_t *__a) - { - return (poly8x16_t) -- __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a); -+ __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) __a); - } - - __extension__ extern __inline poly16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vld1q_p16 (const poly16_t *a) -+vld1q_p16 (const poly16_t *__a) - { - return (poly16x8_t) -- __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a); -+ __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) __a); - } - - __extension__ extern __inline poly64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vld1q_p64 (const poly64_t *a) -+vld1q_p64 (const poly64_t *__a) - { - return (poly64x2_t) -- __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a); -+ __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) __a); - } - - __extension__ extern __inline int8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vld1q_s8 (const int8_t *a) -+vld1q_s8 (const int8_t *__a) - { -- return __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a); -+ return __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) __a); - } - - __extension__ extern __inline int16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vld1q_s16 (const int16_t *a) -+vld1q_s16 (const int16_t *__a) - { -- return __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a); -+ return __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) __a); - } - - __extension__ extern __inline int32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vld1q_s32 (const int32_t *a) -+vld1q_s32 (const int32_t *__a) - { -- return __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a); -+ return __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) __a); - } - - __extension__ extern __inline int64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vld1q_s64 (const int64_t *a) -+vld1q_s64 (const int64_t *__a) - { -- return __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a); -+ return __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) __a); - } - - __extension__ extern __inline uint8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vld1q_u8 (const uint8_t *a) -+vld1q_u8 (const uint8_t *__a) - { - return (uint8x16_t) -- __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a); -+ __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) __a); - } - - __extension__ extern __inline uint8x8x2_t -@@ -17946,26 +17979,308 @@ vld1q_p64_x2 (const poly64_t *__a) - - __extension__ extern __inline uint16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vld1q_u16 (const uint16_t *a) -+vld1q_u16 (const uint16_t *__a) - { - return (uint16x8_t) -- __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a); -+ __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) __a); - } - - __extension__ extern __inline uint32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vld1q_u32 (const uint32_t *a) -+vld1q_u32 (const uint32_t *__a) - { - return (uint32x4_t) -- __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a); -+ __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) __a); - } - - __extension__ extern __inline uint64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vld1q_u64 (const uint64_t *a) -+vld1q_u64 (const uint64_t *__a) - { - return (uint64x2_t) -- __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a); -+ __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) __a); -+} -+ -+/* vld1(q)_x4. */ -+ -+__extension__ extern __inline int8x8x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_s8_x4 (const int8_t *__a) -+{ -+ union { int8x8x4_t __i; __builtin_aarch64_simd_xi __o; } __au; -+ __au.__o -+ = __builtin_aarch64_ld1x4v8qi ((const __builtin_aarch64_simd_qi *) __a); -+ return __au.__i; -+} -+ -+__extension__ extern __inline int8x16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_s8_x4 (const int8_t *__a) -+{ -+ union { int8x16x4_t __i; __builtin_aarch64_simd_xi __o; } __au; -+ __au.__o -+ = __builtin_aarch64_ld1x4v16qi ((const __builtin_aarch64_simd_qi *) __a); -+ return __au.__i; -+} -+ -+__extension__ extern __inline int16x4x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_s16_x4 (const int16_t *__a) -+{ -+ union { int16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __au; -+ __au.__o -+ = __builtin_aarch64_ld1x4v4hi ((const __builtin_aarch64_simd_hi *) __a); -+ return __au.__i; -+} -+ -+__extension__ extern __inline int16x8x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_s16_x4 (const int16_t *__a) -+{ -+ union { int16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __au; -+ __au.__o -+ = __builtin_aarch64_ld1x4v8hi ((const __builtin_aarch64_simd_hi *) __a); -+ return __au.__i; -+} -+ -+__extension__ extern __inline int32x2x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_s32_x4 (const int32_t *__a) -+{ -+ union { int32x2x4_t __i; __builtin_aarch64_simd_xi __o; } __au; -+ __au.__o -+ = __builtin_aarch64_ld1x4v2si ((const __builtin_aarch64_simd_si *) __a); -+ return __au.__i; -+} -+ -+__extension__ extern __inline int32x4x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_s32_x4 (const int32_t *__a) -+{ -+ union { int32x4x4_t __i; __builtin_aarch64_simd_xi __o; } __au; -+ __au.__o -+ = __builtin_aarch64_ld1x4v4si ((const __builtin_aarch64_simd_si *) __a); -+ return __au.__i; -+} -+ -+__extension__ extern __inline uint8x8x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_u8_x4 (const uint8_t *__a) -+{ -+ union { uint8x8x4_t __i; __builtin_aarch64_simd_xi __o; } __au; -+ __au.__o -+ = __builtin_aarch64_ld1x4v8qi ((const __builtin_aarch64_simd_qi *) __a); -+ return __au.__i; -+} -+ -+__extension__ extern __inline uint8x16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_u8_x4 (const uint8_t *__a) -+{ -+ union { uint8x16x4_t __i; __builtin_aarch64_simd_xi __o; } __au; -+ __au.__o -+ = __builtin_aarch64_ld1x4v16qi ((const __builtin_aarch64_simd_qi *) __a); -+ return __au.__i; -+} -+ -+__extension__ extern __inline uint16x4x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_u16_x4 (const uint16_t *__a) -+{ -+ union { uint16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __au; -+ __au.__o -+ = __builtin_aarch64_ld1x4v4hi ((const __builtin_aarch64_simd_hi *) __a); -+ return __au.__i; -+} -+ -+__extension__ extern __inline uint16x8x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_u16_x4 (const uint16_t *__a) -+{ -+ union { uint16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __au; -+ __au.__o -+ = __builtin_aarch64_ld1x4v8hi ((const __builtin_aarch64_simd_hi *) __a); -+ return __au.__i; -+} -+ -+__extension__ extern __inline uint32x2x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_u32_x4 (const uint32_t *__a) -+{ -+ union { uint32x2x4_t __i; __builtin_aarch64_simd_xi __o; } __au; -+ __au.__o -+ = __builtin_aarch64_ld1x4v2si ((const __builtin_aarch64_simd_si *) __a); -+ return __au.__i; -+} -+ -+__extension__ extern __inline uint32x4x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_u32_x4 (const uint32_t *__a) -+{ -+ union { uint32x4x4_t __i; __builtin_aarch64_simd_xi __o; } __au; -+ __au.__o -+ = __builtin_aarch64_ld1x4v4si ((const __builtin_aarch64_simd_si *) __a); -+ return __au.__i; -+} -+ -+__extension__ extern __inline float16x4x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_f16_x4 (const float16_t *__a) -+{ -+ union { float16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __au; -+ __au.__o -+ = __builtin_aarch64_ld1x4v4hf ((const __builtin_aarch64_simd_hf *) __a); -+ return __au.__i; -+} -+ -+__extension__ extern __inline float16x8x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_f16_x4 (const float16_t *__a) -+{ -+ union { float16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __au; -+ __au.__o -+ = __builtin_aarch64_ld1x4v8hf ((const __builtin_aarch64_simd_hf *) __a); -+ return __au.__i; -+} -+ -+__extension__ extern __inline float32x2x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_f32_x4 (const float32_t *__a) -+{ -+ union { float32x2x4_t __i; __builtin_aarch64_simd_xi __o; } __au; -+ __au.__o -+ = __builtin_aarch64_ld1x4v2sf ((const __builtin_aarch64_simd_sf *) __a); -+ return __au.__i; -+} -+ -+__extension__ extern __inline float32x4x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_f32_x4 (const float32_t *__a) -+{ -+ union { float32x4x4_t __i; __builtin_aarch64_simd_xi __o; } __au; -+ __au.__o -+ = __builtin_aarch64_ld1x4v4sf ((const __builtin_aarch64_simd_sf *) __a); -+ return __au.__i; -+} -+ -+__extension__ extern __inline poly8x8x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_p8_x4 (const poly8_t *__a) -+{ -+ union { poly8x8x4_t __i; __builtin_aarch64_simd_xi __o; } __au; -+ __au.__o -+ = __builtin_aarch64_ld1x4v8qi ((const __builtin_aarch64_simd_qi *) __a); -+ return __au.__i; -+} -+ -+__extension__ extern __inline poly8x16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_p8_x4 (const poly8_t *__a) -+{ -+ union { poly8x16x4_t __i; __builtin_aarch64_simd_xi __o; } __au; -+ __au.__o -+ = __builtin_aarch64_ld1x4v16qi ((const __builtin_aarch64_simd_qi *) __a); -+ return __au.__i; -+} -+ -+__extension__ extern __inline poly16x4x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_p16_x4 (const poly16_t *__a) -+{ -+ union { poly16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __au; -+ __au.__o -+ = __builtin_aarch64_ld1x4v4hi ((const __builtin_aarch64_simd_hi *) __a); -+ return __au.__i; -+} -+ -+__extension__ extern __inline poly16x8x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_p16_x4 (const poly16_t *__a) -+{ -+ union { poly16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __au; -+ __au.__o -+ = __builtin_aarch64_ld1x4v8hi ((const __builtin_aarch64_simd_hi *) __a); -+ return __au.__i; -+} -+ -+__extension__ extern __inline int64x1x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_s64_x4 (const int64_t *__a) -+{ -+ union { int64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __au; -+ __au.__o -+ = __builtin_aarch64_ld1x4di ((const __builtin_aarch64_simd_di *) __a); -+ return __au.__i; -+} -+ -+__extension__ extern __inline uint64x1x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_u64_x4 (const uint64_t *__a) -+{ -+ union { uint64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __au; -+ __au.__o -+ = __builtin_aarch64_ld1x4di ((const __builtin_aarch64_simd_di *) __a); -+ return __au.__i; -+} -+ -+__extension__ extern __inline poly64x1x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_p64_x4 (const poly64_t *__a) -+{ -+ union { poly64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __au; -+ __au.__o -+ = __builtin_aarch64_ld1x4di ((const __builtin_aarch64_simd_di *) __a); -+ return __au.__i; -+} -+ -+__extension__ extern __inline int64x2x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_s64_x4 (const int64_t *__a) -+{ -+ union { int64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __au; -+ __au.__o -+ = __builtin_aarch64_ld1x4v2di ((const __builtin_aarch64_simd_di *) __a); -+ return __au.__i; -+} -+ -+__extension__ extern __inline uint64x2x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_u64_x4 (const uint64_t *__a) -+{ -+ union { uint64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __au; -+ __au.__o -+ = __builtin_aarch64_ld1x4v2di ((const __builtin_aarch64_simd_di *) __a); -+ return __au.__i; -+} -+ -+__extension__ extern __inline poly64x2x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_p64_x4 (const poly64_t *__a) -+{ -+ union { poly64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __au; -+ __au.__o -+ = __builtin_aarch64_ld1x4v2di ((const __builtin_aarch64_simd_di *) __a); -+ return __au.__i; -+} -+ -+__extension__ extern __inline float64x1x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_f64_x4 (const float64_t *__a) -+{ -+ union { float64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __au; -+ __au.__o -+ = __builtin_aarch64_ld1x4df ((const __builtin_aarch64_simd_df *) __a); -+ return __au.__i; -+} -+ -+__extension__ extern __inline float64x2x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_f64_x4 (const float64_t *__a) -+{ -+ union { float64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __au; -+ __au.__o -+ = __builtin_aarch64_ld1x4v2df ((const __builtin_aarch64_simd_df *) __a); -+ return __au.__i; - } - - /* vld1_dup */ -@@ -21115,328 +21430,328 @@ vmulxd_laneq_f64 (float64_t __a, float64x2_t __v, const int __lane) - - __extension__ extern __inline int8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpmax_s8 (int8x8_t a, int8x8_t b) -+vpmax_s8 (int8x8_t __a, int8x8_t __b) - { -- return __builtin_aarch64_smaxpv8qi (a, b); -+ return __builtin_aarch64_smaxpv8qi (__a, __b); - } - - __extension__ extern __inline int16x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpmax_s16 (int16x4_t a, int16x4_t b) -+vpmax_s16 (int16x4_t __a, int16x4_t __b) - { -- return __builtin_aarch64_smaxpv4hi (a, b); -+ return __builtin_aarch64_smaxpv4hi (__a, __b); - } - - __extension__ extern __inline int32x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpmax_s32 (int32x2_t a, int32x2_t b) -+vpmax_s32 (int32x2_t __a, int32x2_t __b) - { -- return __builtin_aarch64_smaxpv2si (a, b); -+ return __builtin_aarch64_smaxpv2si (__a, __b); - } - - __extension__ extern __inline uint8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpmax_u8 (uint8x8_t a, uint8x8_t b) -+vpmax_u8 (uint8x8_t __a, uint8x8_t __b) - { -- return (uint8x8_t) __builtin_aarch64_umaxpv8qi ((int8x8_t) a, -- (int8x8_t) b); -+ return (uint8x8_t) __builtin_aarch64_umaxpv8qi ((int8x8_t) __a, -+ (int8x8_t) __b); - } - - __extension__ extern __inline uint16x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpmax_u16 (uint16x4_t a, uint16x4_t b) -+vpmax_u16 (uint16x4_t __a, uint16x4_t __b) - { -- return (uint16x4_t) __builtin_aarch64_umaxpv4hi ((int16x4_t) a, -- (int16x4_t) b); -+ return (uint16x4_t) __builtin_aarch64_umaxpv4hi ((int16x4_t) __a, -+ (int16x4_t) __b); - } - - __extension__ extern __inline uint32x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpmax_u32 (uint32x2_t a, uint32x2_t b) -+vpmax_u32 (uint32x2_t __a, uint32x2_t __b) - { -- return (uint32x2_t) __builtin_aarch64_umaxpv2si ((int32x2_t) a, -- (int32x2_t) b); -+ return (uint32x2_t) __builtin_aarch64_umaxpv2si ((int32x2_t) __a, -+ (int32x2_t) __b); - } - - __extension__ extern __inline int8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpmaxq_s8 (int8x16_t a, int8x16_t b) -+vpmaxq_s8 (int8x16_t __a, int8x16_t __b) - { -- return __builtin_aarch64_smaxpv16qi (a, b); -+ return __builtin_aarch64_smaxpv16qi (__a, __b); - } - - __extension__ extern __inline int16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpmaxq_s16 (int16x8_t a, int16x8_t b) -+vpmaxq_s16 (int16x8_t __a, int16x8_t __b) - { -- return __builtin_aarch64_smaxpv8hi (a, b); -+ return __builtin_aarch64_smaxpv8hi (__a, __b); - } - - __extension__ extern __inline int32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpmaxq_s32 (int32x4_t a, int32x4_t b) -+vpmaxq_s32 (int32x4_t __a, int32x4_t __b) - { -- return __builtin_aarch64_smaxpv4si (a, b); -+ return __builtin_aarch64_smaxpv4si (__a, __b); - } - - __extension__ extern __inline uint8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpmaxq_u8 (uint8x16_t a, uint8x16_t b) -+vpmaxq_u8 (uint8x16_t __a, uint8x16_t __b) - { -- return (uint8x16_t) __builtin_aarch64_umaxpv16qi ((int8x16_t) a, -- (int8x16_t) b); -+ return (uint8x16_t) __builtin_aarch64_umaxpv16qi ((int8x16_t) __a, -+ (int8x16_t) __b); - } - - __extension__ extern __inline uint16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpmaxq_u16 (uint16x8_t a, uint16x8_t b) -+vpmaxq_u16 (uint16x8_t __a, uint16x8_t __b) - { -- return (uint16x8_t) __builtin_aarch64_umaxpv8hi ((int16x8_t) a, -- (int16x8_t) b); -+ return (uint16x8_t) __builtin_aarch64_umaxpv8hi ((int16x8_t) __a, -+ (int16x8_t) __b); - } - - __extension__ extern __inline uint32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpmaxq_u32 (uint32x4_t a, uint32x4_t b) -+vpmaxq_u32 (uint32x4_t __a, uint32x4_t __b) - { -- return (uint32x4_t) __builtin_aarch64_umaxpv4si ((int32x4_t) a, -- (int32x4_t) b); -+ return (uint32x4_t) __builtin_aarch64_umaxpv4si ((int32x4_t) __a, -+ (int32x4_t) __b); - } - - __extension__ extern __inline float32x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpmax_f32 (float32x2_t a, float32x2_t b) -+vpmax_f32 (float32x2_t __a, float32x2_t __b) - { -- return __builtin_aarch64_smax_nanpv2sf (a, b); -+ return __builtin_aarch64_smax_nanpv2sf (__a, __b); - } - - __extension__ extern __inline float32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpmaxq_f32 (float32x4_t a, float32x4_t b) -+vpmaxq_f32 (float32x4_t __a, float32x4_t __b) - { -- return __builtin_aarch64_smax_nanpv4sf (a, b); -+ return __builtin_aarch64_smax_nanpv4sf (__a, __b); - } - - __extension__ extern __inline float64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpmaxq_f64 (float64x2_t a, float64x2_t b) -+vpmaxq_f64 (float64x2_t __a, float64x2_t __b) - { -- return __builtin_aarch64_smax_nanpv2df (a, b); -+ return __builtin_aarch64_smax_nanpv2df (__a, __b); - } - - __extension__ extern __inline float64_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpmaxqd_f64 (float64x2_t a) -+vpmaxqd_f64 (float64x2_t __a) - { -- return __builtin_aarch64_reduc_smax_nan_scal_v2df (a); -+ return __builtin_aarch64_reduc_smax_nan_scal_v2df (__a); - } - - __extension__ extern __inline float32_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpmaxs_f32 (float32x2_t a) -+vpmaxs_f32 (float32x2_t __a) - { -- return __builtin_aarch64_reduc_smax_nan_scal_v2sf (a); -+ return __builtin_aarch64_reduc_smax_nan_scal_v2sf (__a); - } - - /* vpmaxnm */ - - __extension__ extern __inline float32x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpmaxnm_f32 (float32x2_t a, float32x2_t b) -+vpmaxnm_f32 (float32x2_t __a, float32x2_t __b) - { -- return __builtin_aarch64_smaxpv2sf (a, b); -+ return __builtin_aarch64_smaxpv2sf (__a, __b); - } - - __extension__ extern __inline float32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpmaxnmq_f32 (float32x4_t a, float32x4_t b) -+vpmaxnmq_f32 (float32x4_t __a, float32x4_t __b) - { -- return __builtin_aarch64_smaxpv4sf (a, b); -+ return __builtin_aarch64_smaxpv4sf (__a, __b); - } - - __extension__ extern __inline float64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpmaxnmq_f64 (float64x2_t a, float64x2_t b) -+vpmaxnmq_f64 (float64x2_t __a, float64x2_t __b) - { -- return __builtin_aarch64_smaxpv2df (a, b); -+ return __builtin_aarch64_smaxpv2df (__a, __b); - } - - __extension__ extern __inline float64_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpmaxnmqd_f64 (float64x2_t a) -+vpmaxnmqd_f64 (float64x2_t __a) - { -- return __builtin_aarch64_reduc_smax_scal_v2df (a); -+ return __builtin_aarch64_reduc_smax_scal_v2df (__a); - } - - __extension__ extern __inline float32_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpmaxnms_f32 (float32x2_t a) -+vpmaxnms_f32 (float32x2_t __a) - { -- return __builtin_aarch64_reduc_smax_scal_v2sf (a); -+ return __builtin_aarch64_reduc_smax_scal_v2sf (__a); - } - - /* vpmin */ - - __extension__ extern __inline int8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpmin_s8 (int8x8_t a, int8x8_t b) -+vpmin_s8 (int8x8_t __a, int8x8_t __b) - { -- return __builtin_aarch64_sminpv8qi (a, b); -+ return __builtin_aarch64_sminpv8qi (__a, __b); - } - - __extension__ extern __inline int16x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpmin_s16 (int16x4_t a, int16x4_t b) -+vpmin_s16 (int16x4_t __a, int16x4_t __b) - { -- return __builtin_aarch64_sminpv4hi (a, b); -+ return __builtin_aarch64_sminpv4hi (__a, __b); - } - - __extension__ extern __inline int32x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpmin_s32 (int32x2_t a, int32x2_t b) -+vpmin_s32 (int32x2_t __a, int32x2_t __b) - { -- return __builtin_aarch64_sminpv2si (a, b); -+ return __builtin_aarch64_sminpv2si (__a, __b); - } - - __extension__ extern __inline uint8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpmin_u8 (uint8x8_t a, uint8x8_t b) -+vpmin_u8 (uint8x8_t __a, uint8x8_t __b) - { -- return (uint8x8_t) __builtin_aarch64_uminpv8qi ((int8x8_t) a, -- (int8x8_t) b); -+ return (uint8x8_t) __builtin_aarch64_uminpv8qi ((int8x8_t) __a, -+ (int8x8_t) __b); - } - - __extension__ extern __inline uint16x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpmin_u16 (uint16x4_t a, uint16x4_t b) -+vpmin_u16 (uint16x4_t __a, uint16x4_t __b) - { -- return (uint16x4_t) __builtin_aarch64_uminpv4hi ((int16x4_t) a, -- (int16x4_t) b); -+ return (uint16x4_t) __builtin_aarch64_uminpv4hi ((int16x4_t) __a, -+ (int16x4_t) __b); - } - - __extension__ extern __inline uint32x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpmin_u32 (uint32x2_t a, uint32x2_t b) -+vpmin_u32 (uint32x2_t __a, uint32x2_t __b) - { -- return (uint32x2_t) __builtin_aarch64_uminpv2si ((int32x2_t) a, -- (int32x2_t) b); -+ return (uint32x2_t) __builtin_aarch64_uminpv2si ((int32x2_t) __a, -+ (int32x2_t) __b); - } - - __extension__ extern __inline int8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpminq_s8 (int8x16_t a, int8x16_t b) -+vpminq_s8 (int8x16_t __a, int8x16_t __b) - { -- return __builtin_aarch64_sminpv16qi (a, b); -+ return __builtin_aarch64_sminpv16qi (__a, __b); - } - - __extension__ extern __inline int16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpminq_s16 (int16x8_t a, int16x8_t b) -+vpminq_s16 (int16x8_t __a, int16x8_t __b) - { -- return __builtin_aarch64_sminpv8hi (a, b); -+ return __builtin_aarch64_sminpv8hi (__a, __b); - } - - __extension__ extern __inline int32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpminq_s32 (int32x4_t a, int32x4_t b) -+vpminq_s32 (int32x4_t __a, int32x4_t __b) - { -- return __builtin_aarch64_sminpv4si (a, b); -+ return __builtin_aarch64_sminpv4si (__a, __b); - } - - __extension__ extern __inline uint8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpminq_u8 (uint8x16_t a, uint8x16_t b) -+vpminq_u8 (uint8x16_t __a, uint8x16_t __b) - { -- return (uint8x16_t) __builtin_aarch64_uminpv16qi ((int8x16_t) a, -- (int8x16_t) b); -+ return (uint8x16_t) __builtin_aarch64_uminpv16qi ((int8x16_t) __a, -+ (int8x16_t) __b); - } - - __extension__ extern __inline uint16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpminq_u16 (uint16x8_t a, uint16x8_t b) -+vpminq_u16 (uint16x8_t __a, uint16x8_t __b) - { -- return (uint16x8_t) __builtin_aarch64_uminpv8hi ((int16x8_t) a, -- (int16x8_t) b); -+ return (uint16x8_t) __builtin_aarch64_uminpv8hi ((int16x8_t) __a, -+ (int16x8_t) __b); - } - - __extension__ extern __inline uint32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpminq_u32 (uint32x4_t a, uint32x4_t b) -+vpminq_u32 (uint32x4_t __a, uint32x4_t __b) - { -- return (uint32x4_t) __builtin_aarch64_uminpv4si ((int32x4_t) a, -- (int32x4_t) b); -+ return (uint32x4_t) __builtin_aarch64_uminpv4si ((int32x4_t) __a, -+ (int32x4_t) __b); - } - - __extension__ extern __inline float32x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpmin_f32 (float32x2_t a, float32x2_t b) -+vpmin_f32 (float32x2_t __a, float32x2_t __b) - { -- return __builtin_aarch64_smin_nanpv2sf (a, b); -+ return __builtin_aarch64_smin_nanpv2sf (__a, __b); - } - - __extension__ extern __inline float32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpminq_f32 (float32x4_t a, float32x4_t b) -+vpminq_f32 (float32x4_t __a, float32x4_t __b) - { -- return __builtin_aarch64_smin_nanpv4sf (a, b); -+ return __builtin_aarch64_smin_nanpv4sf (__a, __b); - } - - __extension__ extern __inline float64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpminq_f64 (float64x2_t a, float64x2_t b) -+vpminq_f64 (float64x2_t __a, float64x2_t __b) - { -- return __builtin_aarch64_smin_nanpv2df (a, b); -+ return __builtin_aarch64_smin_nanpv2df (__a, __b); - } - - __extension__ extern __inline float64_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpminqd_f64 (float64x2_t a) -+vpminqd_f64 (float64x2_t __a) - { -- return __builtin_aarch64_reduc_smin_nan_scal_v2df (a); -+ return __builtin_aarch64_reduc_smin_nan_scal_v2df (__a); - } - - __extension__ extern __inline float32_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpmins_f32 (float32x2_t a) -+vpmins_f32 (float32x2_t __a) - { -- return __builtin_aarch64_reduc_smin_nan_scal_v2sf (a); -+ return __builtin_aarch64_reduc_smin_nan_scal_v2sf (__a); - } - - /* vpminnm */ - - __extension__ extern __inline float32x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpminnm_f32 (float32x2_t a, float32x2_t b) -+vpminnm_f32 (float32x2_t __a, float32x2_t __b) - { -- return __builtin_aarch64_sminpv2sf (a, b); -+ return __builtin_aarch64_sminpv2sf (__a, __b); - } - - __extension__ extern __inline float32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpminnmq_f32 (float32x4_t a, float32x4_t b) -+vpminnmq_f32 (float32x4_t __a, float32x4_t __b) - { -- return __builtin_aarch64_sminpv4sf (a, b); -+ return __builtin_aarch64_sminpv4sf (__a, __b); - } - - __extension__ extern __inline float64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpminnmq_f64 (float64x2_t a, float64x2_t b) -+vpminnmq_f64 (float64x2_t __a, float64x2_t __b) - { -- return __builtin_aarch64_sminpv2df (a, b); -+ return __builtin_aarch64_sminpv2df (__a, __b); - } - - __extension__ extern __inline float64_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpminnmqd_f64 (float64x2_t a) -+vpminnmqd_f64 (float64x2_t __a) - { -- return __builtin_aarch64_reduc_smin_scal_v2df (a); -+ return __builtin_aarch64_reduc_smin_scal_v2df (__a); - } - - __extension__ extern __inline float32_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpminnms_f32 (float32x2_t a) -+vpminnms_f32 (float32x2_t __a) - { -- return __builtin_aarch64_reduc_smin_scal_v2sf (a); -+ return __builtin_aarch64_reduc_smin_scal_v2sf (__a); - } - - /* vmaxnm */ -@@ -21889,9 +22204,9 @@ vminnmvq_f64 (float64x2_t __a) - - __extension__ extern __inline float32x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmla_f32 (float32x2_t a, float32x2_t b, float32x2_t c) -+vmla_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c) - { -- return a + b * c; -+ return __a + __b * __c; - } - - __extension__ extern __inline float64x1_t -@@ -21903,16 +22218,16 @@ vmla_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c) - - __extension__ extern __inline float32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c) -+vmlaq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c) - { -- return a + b * c; -+ return __a + __b * __c; - } - - __extension__ extern __inline float64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c) -+vmlaq_f64 (float64x2_t __a, float64x2_t __b, float64x2_t __c) - { -- return a + b * c; -+ return __a + __b * __c; - } - - /* vmla_lane */ -@@ -22087,9 +22402,9 @@ vmlaq_laneq_u32 (uint32x4_t __a, uint32x4_t __b, - - __extension__ extern __inline float32x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmls_f32 (float32x2_t a, float32x2_t b, float32x2_t c) -+vmls_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c) - { -- return a - b * c; -+ return __a - __b * __c; - } - - __extension__ extern __inline float64x1_t -@@ -22101,16 +22416,16 @@ vmls_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c) - - __extension__ extern __inline float32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c) -+vmlsq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c) - { -- return a - b * c; -+ return __a - __b * __c; - } - - __extension__ extern __inline float64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmlsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c) -+vmlsq_f64 (float64x2_t __a, float64x2_t __b, float64x2_t __c) - { -- return a - b * c; -+ return __a - __b * __c; - } - - /* vmls_lane */ -@@ -24874,419 +25189,419 @@ vqsubd_u64 (uint64_t __a, uint64_t __b) - - __extension__ extern __inline int8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqtbl2_s8 (int8x16x2_t tab, uint8x8_t idx) -+vqtbl2_s8 (int8x16x2_t __tab, uint8x8_t __idx) - { - __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[1], 1); -- return __builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[1], 1); -+ return __builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx); - } - - __extension__ extern __inline uint8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqtbl2_u8 (uint8x16x2_t tab, uint8x8_t idx) -+vqtbl2_u8 (uint8x16x2_t __tab, uint8x8_t __idx) - { - __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1); -- return (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); -+ return (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx); - } - - __extension__ extern __inline poly8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqtbl2_p8 (poly8x16x2_t tab, uint8x8_t idx) -+vqtbl2_p8 (poly8x16x2_t __tab, uint8x8_t __idx) - { - __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1); -- return (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); -+ return (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx); - } - - __extension__ extern __inline int8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqtbl2q_s8 (int8x16x2_t tab, uint8x16_t idx) -+vqtbl2q_s8 (int8x16x2_t __tab, uint8x16_t __idx) - { - __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1); -- return __builtin_aarch64_tbl3v16qi (__o, (int8x16_t)idx); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); -+ return __builtin_aarch64_tbl3v16qi (__o, (int8x16_t)__idx); - } - - __extension__ extern __inline uint8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqtbl2q_u8 (uint8x16x2_t tab, uint8x16_t idx) -+vqtbl2q_u8 (uint8x16x2_t __tab, uint8x16_t __idx) - { - __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1); -- return (uint8x16_t)__builtin_aarch64_tbl3v16qi (__o, (int8x16_t)idx); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); -+ return (uint8x16_t)__builtin_aarch64_tbl3v16qi (__o, (int8x16_t)__idx); - } - - __extension__ extern __inline poly8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqtbl2q_p8 (poly8x16x2_t tab, uint8x16_t idx) -+vqtbl2q_p8 (poly8x16x2_t __tab, uint8x16_t __idx) - { - __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1); -- return (poly8x16_t)__builtin_aarch64_tbl3v16qi (__o, (int8x16_t)idx); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); -+ return (poly8x16_t)__builtin_aarch64_tbl3v16qi (__o, (int8x16_t)__idx); - } - - /* vqtbl3 */ - - __extension__ extern __inline int8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqtbl3_s8 (int8x16x3_t tab, uint8x8_t idx) -+vqtbl3_s8 (int8x16x3_t __tab, uint8x8_t __idx) - { - __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); -- return __builtin_aarch64_qtbl3v8qi (__o, (int8x8_t)idx); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2); -+ return __builtin_aarch64_qtbl3v8qi (__o, (int8x8_t)__idx); - } - - __extension__ extern __inline uint8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqtbl3_u8 (uint8x16x3_t tab, uint8x8_t idx) -+vqtbl3_u8 (uint8x16x3_t __tab, uint8x8_t __idx) - { - __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); -- return (uint8x8_t)__builtin_aarch64_qtbl3v8qi (__o, (int8x8_t)idx); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2); -+ return (uint8x8_t)__builtin_aarch64_qtbl3v8qi (__o, (int8x8_t)__idx); - } - - __extension__ extern __inline poly8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqtbl3_p8 (poly8x16x3_t tab, uint8x8_t idx) -+vqtbl3_p8 (poly8x16x3_t __tab, uint8x8_t __idx) - { - __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); -- return (poly8x8_t)__builtin_aarch64_qtbl3v8qi (__o, (int8x8_t)idx); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2); -+ return (poly8x8_t)__builtin_aarch64_qtbl3v8qi (__o, (int8x8_t)__idx); - } - - __extension__ extern __inline int8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqtbl3q_s8 (int8x16x3_t tab, uint8x16_t idx) -+vqtbl3q_s8 (int8x16x3_t __tab, uint8x16_t __idx) - { - __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); -- return __builtin_aarch64_qtbl3v16qi (__o, (int8x16_t)idx); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2); -+ return __builtin_aarch64_qtbl3v16qi (__o, (int8x16_t)__idx); - } - - __extension__ extern __inline uint8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqtbl3q_u8 (uint8x16x3_t tab, uint8x16_t idx) -+vqtbl3q_u8 (uint8x16x3_t __tab, uint8x16_t __idx) - { - __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); -- return (uint8x16_t)__builtin_aarch64_qtbl3v16qi (__o, (int8x16_t)idx); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2); -+ return (uint8x16_t)__builtin_aarch64_qtbl3v16qi (__o, (int8x16_t)__idx); - } - - __extension__ extern __inline poly8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqtbl3q_p8 (poly8x16x3_t tab, uint8x16_t idx) -+vqtbl3q_p8 (poly8x16x3_t __tab, uint8x16_t __idx) - { - __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); -- return (poly8x16_t)__builtin_aarch64_qtbl3v16qi (__o, (int8x16_t)idx); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2); -+ return (poly8x16_t)__builtin_aarch64_qtbl3v16qi (__o, (int8x16_t)__idx); - } - - /* vqtbl4 */ - - __extension__ extern __inline int8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqtbl4_s8 (int8x16x4_t tab, uint8x8_t idx) -+vqtbl4_s8 (int8x16x4_t __tab, uint8x8_t __idx) - { - __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); -- return __builtin_aarch64_qtbl4v8qi (__o, (int8x8_t)idx); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3); -+ return __builtin_aarch64_qtbl4v8qi (__o, (int8x8_t)__idx); - } - - __extension__ extern __inline uint8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqtbl4_u8 (uint8x16x4_t tab, uint8x8_t idx) -+vqtbl4_u8 (uint8x16x4_t __tab, uint8x8_t __idx) - { - __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); -- return (uint8x8_t)__builtin_aarch64_qtbl4v8qi (__o, (int8x8_t)idx); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3); -+ return (uint8x8_t)__builtin_aarch64_qtbl4v8qi (__o, (int8x8_t)__idx); - } - - __extension__ extern __inline poly8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqtbl4_p8 (poly8x16x4_t tab, uint8x8_t idx) -+vqtbl4_p8 (poly8x16x4_t __tab, uint8x8_t __idx) - { - __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); -- return (poly8x8_t)__builtin_aarch64_qtbl4v8qi (__o, (int8x8_t)idx); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3); -+ return (poly8x8_t)__builtin_aarch64_qtbl4v8qi (__o, (int8x8_t)__idx); - } - - __extension__ extern __inline int8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqtbl4q_s8 (int8x16x4_t tab, uint8x16_t idx) -+vqtbl4q_s8 (int8x16x4_t __tab, uint8x16_t __idx) - { - __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); -- return __builtin_aarch64_qtbl4v16qi (__o, (int8x16_t)idx); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3); -+ return __builtin_aarch64_qtbl4v16qi (__o, (int8x16_t)__idx); - } - - __extension__ extern __inline uint8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqtbl4q_u8 (uint8x16x4_t tab, uint8x16_t idx) -+vqtbl4q_u8 (uint8x16x4_t __tab, uint8x16_t __idx) - { - __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); -- return (uint8x16_t)__builtin_aarch64_qtbl4v16qi (__o, (int8x16_t)idx); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3); -+ return (uint8x16_t)__builtin_aarch64_qtbl4v16qi (__o, (int8x16_t)__idx); - } - - __extension__ extern __inline poly8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqtbl4q_p8 (poly8x16x4_t tab, uint8x16_t idx) -+vqtbl4q_p8 (poly8x16x4_t __tab, uint8x16_t __idx) - { - __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); -- return (poly8x16_t)__builtin_aarch64_qtbl4v16qi (__o, (int8x16_t)idx); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3); -+ return (poly8x16_t)__builtin_aarch64_qtbl4v16qi (__o, (int8x16_t)__idx); - } - - - /* vqtbx2 */ - __extension__ extern __inline int8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqtbx2_s8 (int8x8_t r, int8x16x2_t tab, uint8x8_t idx) -+vqtbx2_s8 (int8x8_t __r, int8x16x2_t __tab, uint8x8_t __idx) - { - __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[1], 1); -- return __builtin_aarch64_tbx4v8qi (r, __o, (int8x8_t)idx); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[1], 1); -+ return __builtin_aarch64_tbx4v8qi (__r, __o, (int8x8_t)__idx); - } - - __extension__ extern __inline uint8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqtbx2_u8 (uint8x8_t r, uint8x16x2_t tab, uint8x8_t idx) -+vqtbx2_u8 (uint8x8_t __r, uint8x16x2_t __tab, uint8x8_t __idx) - { - __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1); -- return (uint8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)r, __o, -- (int8x8_t)idx); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); -+ return (uint8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o, -+ (int8x8_t)__idx); - } - - __extension__ extern __inline poly8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqtbx2_p8 (poly8x8_t r, poly8x16x2_t tab, uint8x8_t idx) -+vqtbx2_p8 (poly8x8_t __r, poly8x16x2_t __tab, uint8x8_t __idx) - { - __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1); -- return (poly8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)r, __o, -- (int8x8_t)idx); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); -+ return (poly8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o, -+ (int8x8_t)__idx); - } - - __extension__ extern __inline int8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqtbx2q_s8 (int8x16_t r, int8x16x2_t tab, uint8x16_t idx) -+vqtbx2q_s8 (int8x16_t __r, int8x16x2_t __tab, uint8x16_t __idx) - { - __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[1], 1); -- return __builtin_aarch64_tbx4v16qi (r, __o, (int8x16_t)idx); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[1], 1); -+ return __builtin_aarch64_tbx4v16qi (__r, __o, (int8x16_t)__idx); - } - - __extension__ extern __inline uint8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqtbx2q_u8 (uint8x16_t r, uint8x16x2_t tab, uint8x16_t idx) -+vqtbx2q_u8 (uint8x16_t __r, uint8x16x2_t __tab, uint8x16_t __idx) - { - __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1); -- return (uint8x16_t)__builtin_aarch64_tbx4v16qi ((int8x16_t)r, __o, -- (int8x16_t)idx); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); -+ return (uint8x16_t)__builtin_aarch64_tbx4v16qi ((int8x16_t)__r, __o, -+ (int8x16_t)__idx); - } - - __extension__ extern __inline poly8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqtbx2q_p8 (poly8x16_t r, poly8x16x2_t tab, uint8x16_t idx) -+vqtbx2q_p8 (poly8x16_t __r, poly8x16x2_t __tab, uint8x16_t __idx) - { - __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1); -- return (poly8x16_t)__builtin_aarch64_tbx4v16qi ((int8x16_t)r, __o, -- (int8x16_t)idx); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1); -+ return (poly8x16_t)__builtin_aarch64_tbx4v16qi ((int8x16_t)__r, __o, -+ (int8x16_t)__idx); - } - - /* vqtbx3 */ - __extension__ extern __inline int8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqtbx3_s8 (int8x8_t r, int8x16x3_t tab, uint8x8_t idx) -+vqtbx3_s8 (int8x8_t __r, int8x16x3_t __tab, uint8x8_t __idx) - { - __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[0], 0); -- __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[1], 1); -- __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[2], 2); -- return __builtin_aarch64_qtbx3v8qi (r, __o, (int8x8_t)idx); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, __tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, __tab.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, __tab.val[2], 2); -+ return __builtin_aarch64_qtbx3v8qi (__r, __o, (int8x8_t)__idx); - } - - __extension__ extern __inline uint8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqtbx3_u8 (uint8x8_t r, uint8x16x3_t tab, uint8x8_t idx) -+vqtbx3_u8 (uint8x8_t __r, uint8x16x3_t __tab, uint8x8_t __idx) - { - __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); -- return (uint8x8_t)__builtin_aarch64_qtbx3v8qi ((int8x8_t)r, __o, -- (int8x8_t)idx); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2); -+ return (uint8x8_t)__builtin_aarch64_qtbx3v8qi ((int8x8_t)__r, __o, -+ (int8x8_t)__idx); - } - - __extension__ extern __inline poly8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqtbx3_p8 (poly8x8_t r, poly8x16x3_t tab, uint8x8_t idx) -+vqtbx3_p8 (poly8x8_t __r, poly8x16x3_t __tab, uint8x8_t __idx) - { - __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); -- return (poly8x8_t)__builtin_aarch64_qtbx3v8qi ((int8x8_t)r, __o, -- (int8x8_t)idx); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2); -+ return (poly8x8_t)__builtin_aarch64_qtbx3v8qi ((int8x8_t)__r, __o, -+ (int8x8_t)__idx); - } - - __extension__ extern __inline int8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqtbx3q_s8 (int8x16_t r, int8x16x3_t tab, uint8x16_t idx) -+vqtbx3q_s8 (int8x16_t __r, int8x16x3_t __tab, uint8x16_t __idx) - { - __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[0], 0); -- __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[1], 1); -- __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[2], 2); -- return __builtin_aarch64_qtbx3v16qi (r, __o, (int8x16_t)idx); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, __tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, __tab.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, __tab.val[2], 2); -+ return __builtin_aarch64_qtbx3v16qi (__r, __o, (int8x16_t)__idx); - } - - __extension__ extern __inline uint8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqtbx3q_u8 (uint8x16_t r, uint8x16x3_t tab, uint8x16_t idx) -+vqtbx3q_u8 (uint8x16_t __r, uint8x16x3_t __tab, uint8x16_t __idx) - { - __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); -- return (uint8x16_t)__builtin_aarch64_qtbx3v16qi ((int8x16_t)r, __o, -- (int8x16_t)idx); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2); -+ return (uint8x16_t)__builtin_aarch64_qtbx3v16qi ((int8x16_t)__r, __o, -+ (int8x16_t)__idx); - } - - __extension__ extern __inline poly8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqtbx3q_p8 (poly8x16_t r, poly8x16x3_t tab, uint8x16_t idx) -+vqtbx3q_p8 (poly8x16_t __r, poly8x16x3_t __tab, uint8x16_t __idx) - { - __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); -- return (poly8x16_t)__builtin_aarch64_qtbx3v16qi ((int8x16_t)r, __o, -- (int8x16_t)idx); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)__tab.val[2], 2); -+ return (poly8x16_t)__builtin_aarch64_qtbx3v16qi ((int8x16_t)__r, __o, -+ (int8x16_t)__idx); - } - - /* vqtbx4 */ - - __extension__ extern __inline int8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqtbx4_s8 (int8x8_t r, int8x16x4_t tab, uint8x8_t idx) -+vqtbx4_s8 (int8x8_t __r, int8x16x4_t __tab, uint8x8_t __idx) - { - __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[3], 3); -- return __builtin_aarch64_qtbx4v8qi (r, __o, (int8x8_t)idx); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, __tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, __tab.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, __tab.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, __tab.val[3], 3); -+ return __builtin_aarch64_qtbx4v8qi (__r, __o, (int8x8_t)__idx); - } - - __extension__ extern __inline uint8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqtbx4_u8 (uint8x8_t r, uint8x16x4_t tab, uint8x8_t idx) -+vqtbx4_u8 (uint8x8_t __r, uint8x16x4_t __tab, uint8x8_t __idx) - { - __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); -- return (uint8x8_t)__builtin_aarch64_qtbx4v8qi ((int8x8_t)r, __o, -- (int8x8_t)idx); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3); -+ return (uint8x8_t)__builtin_aarch64_qtbx4v8qi ((int8x8_t)__r, __o, -+ (int8x8_t)__idx); - } - - __extension__ extern __inline poly8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqtbx4_p8 (poly8x8_t r, poly8x16x4_t tab, uint8x8_t idx) -+vqtbx4_p8 (poly8x8_t __r, poly8x16x4_t __tab, uint8x8_t __idx) - { - __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); -- return (poly8x8_t)__builtin_aarch64_qtbx4v8qi ((int8x8_t)r, __o, -- (int8x8_t)idx); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3); -+ return (poly8x8_t)__builtin_aarch64_qtbx4v8qi ((int8x8_t)__r, __o, -+ (int8x8_t)__idx); - } - - __extension__ extern __inline int8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqtbx4q_s8 (int8x16_t r, int8x16x4_t tab, uint8x16_t idx) -+vqtbx4q_s8 (int8x16_t __r, int8x16x4_t __tab, uint8x16_t __idx) - { - __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[3], 3); -- return __builtin_aarch64_qtbx4v16qi (r, __o, (int8x16_t)idx); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, __tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, __tab.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, __tab.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, __tab.val[3], 3); -+ return __builtin_aarch64_qtbx4v16qi (__r, __o, (int8x16_t)__idx); - } - - __extension__ extern __inline uint8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqtbx4q_u8 (uint8x16_t r, uint8x16x4_t tab, uint8x16_t idx) -+vqtbx4q_u8 (uint8x16_t __r, uint8x16x4_t __tab, uint8x16_t __idx) - { - __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); -- return (uint8x16_t)__builtin_aarch64_qtbx4v16qi ((int8x16_t)r, __o, -- (int8x16_t)idx); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3); -+ return (uint8x16_t)__builtin_aarch64_qtbx4v16qi ((int8x16_t)__r, __o, -+ (int8x16_t)__idx); - } - - __extension__ extern __inline poly8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vqtbx4q_p8 (poly8x16_t r, poly8x16x4_t tab, uint8x16_t idx) -+vqtbx4q_p8 (poly8x16_t __r, poly8x16x4_t __tab, uint8x16_t __idx) - { - __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); -- return (poly8x16_t)__builtin_aarch64_qtbx4v16qi ((int8x16_t)r, __o, -- (int8x16_t)idx); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)__tab.val[3], 3); -+ return (poly8x16_t)__builtin_aarch64_qtbx4v16qi ((int8x16_t)__r, __o, -+ (int8x16_t)__idx); - } - - /* vrbit */ -@@ -25457,134 +25772,134 @@ vrecpxd_f64 (float64_t __a) - - __extension__ extern __inline poly8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vrev16_p8 (poly8x8_t a) -+vrev16_p8 (poly8x8_t __a) - { -- return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); -+ return __builtin_shuffle (__a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); - } - - __extension__ extern __inline int8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vrev16_s8 (int8x8_t a) -+vrev16_s8 (int8x8_t __a) - { -- return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); -+ return __builtin_shuffle (__a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); - } - - __extension__ extern __inline uint8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vrev16_u8 (uint8x8_t a) -+vrev16_u8 (uint8x8_t __a) - { -- return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); -+ return __builtin_shuffle (__a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); - } - - __extension__ extern __inline poly8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vrev16q_p8 (poly8x16_t a) -+vrev16q_p8 (poly8x16_t __a) - { -- return __builtin_shuffle (a, -+ return __builtin_shuffle (__a, - (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }); - } - - __extension__ extern __inline int8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vrev16q_s8 (int8x16_t a) -+vrev16q_s8 (int8x16_t __a) - { -- return __builtin_shuffle (a, -+ return __builtin_shuffle (__a, - (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }); - } - - __extension__ extern __inline uint8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vrev16q_u8 (uint8x16_t a) -+vrev16q_u8 (uint8x16_t __a) - { -- return __builtin_shuffle (a, -+ return __builtin_shuffle (__a, - (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }); - } - - __extension__ extern __inline poly8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vrev32_p8 (poly8x8_t a) -+vrev32_p8 (poly8x8_t __a) - { -- return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); -+ return __builtin_shuffle (__a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); - } - - __extension__ extern __inline poly16x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vrev32_p16 (poly16x4_t a) -+vrev32_p16 (poly16x4_t __a) - { -- return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 }); -+ return __builtin_shuffle (__a, (uint16x4_t) { 1, 0, 3, 2 }); - } - - __extension__ extern __inline int8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vrev32_s8 (int8x8_t a) -+vrev32_s8 (int8x8_t __a) - { -- return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); -+ return __builtin_shuffle (__a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); - } - - __extension__ extern __inline int16x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vrev32_s16 (int16x4_t a) -+vrev32_s16 (int16x4_t __a) - { -- return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 }); -+ return __builtin_shuffle (__a, (uint16x4_t) { 1, 0, 3, 2 }); - } - - __extension__ extern __inline uint8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vrev32_u8 (uint8x8_t a) -+vrev32_u8 (uint8x8_t __a) - { -- return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); -+ return __builtin_shuffle (__a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); - } - - __extension__ extern __inline uint16x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vrev32_u16 (uint16x4_t a) -+vrev32_u16 (uint16x4_t __a) - { -- return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 }); -+ return __builtin_shuffle (__a, (uint16x4_t) { 1, 0, 3, 2 }); - } - - __extension__ extern __inline poly8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vrev32q_p8 (poly8x16_t a) -+vrev32q_p8 (poly8x16_t __a) - { -- return __builtin_shuffle (a, -+ return __builtin_shuffle (__a, - (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }); - } - - __extension__ extern __inline poly16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vrev32q_p16 (poly16x8_t a) -+vrev32q_p16 (poly16x8_t __a) - { -- return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); -+ return __builtin_shuffle (__a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); - } - - __extension__ extern __inline int8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vrev32q_s8 (int8x16_t a) -+vrev32q_s8 (int8x16_t __a) - { -- return __builtin_shuffle (a, -+ return __builtin_shuffle (__a, - (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }); - } - - __extension__ extern __inline int16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vrev32q_s16 (int16x8_t a) -+vrev32q_s16 (int16x8_t __a) - { -- return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); -+ return __builtin_shuffle (__a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); - } - - __extension__ extern __inline uint8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vrev32q_u8 (uint8x16_t a) -+vrev32q_u8 (uint8x16_t __a) - { -- return __builtin_shuffle (a, -+ return __builtin_shuffle (__a, - (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }); - } - - __extension__ extern __inline uint16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vrev32q_u16 (uint16x8_t a) -+vrev32q_u16 (uint16x8_t __a) - { -- return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); -+ return __builtin_shuffle (__a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); - } - - __extension__ extern __inline float16x4_t -@@ -25596,65 +25911,65 @@ vrev64_f16 (float16x4_t __a) - - __extension__ extern __inline float32x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vrev64_f32 (float32x2_t a) -+vrev64_f32 (float32x2_t __a) - { -- return __builtin_shuffle (a, (uint32x2_t) { 1, 0 }); -+ return __builtin_shuffle (__a, (uint32x2_t) { 1, 0 }); - } - - __extension__ extern __inline poly8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vrev64_p8 (poly8x8_t a) -+vrev64_p8 (poly8x8_t __a) - { -- return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 }); -+ return __builtin_shuffle (__a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 }); - } - - __extension__ extern __inline poly16x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vrev64_p16 (poly16x4_t a) -+vrev64_p16 (poly16x4_t __a) - { -- return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 }); -+ return __builtin_shuffle (__a, (uint16x4_t) { 3, 2, 1, 0 }); - } - - __extension__ extern __inline int8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vrev64_s8 (int8x8_t a) -+vrev64_s8 (int8x8_t __a) - { -- return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 }); -+ return __builtin_shuffle (__a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 }); - } - - __extension__ extern __inline int16x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vrev64_s16 (int16x4_t a) -+vrev64_s16 (int16x4_t __a) - { -- return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 }); -+ return __builtin_shuffle (__a, (uint16x4_t) { 3, 2, 1, 0 }); - } - - __extension__ extern __inline int32x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vrev64_s32 (int32x2_t a) -+vrev64_s32 (int32x2_t __a) - { -- return __builtin_shuffle (a, (uint32x2_t) { 1, 0 }); -+ return __builtin_shuffle (__a, (uint32x2_t) { 1, 0 }); - } - - __extension__ extern __inline uint8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vrev64_u8 (uint8x8_t a) -+vrev64_u8 (uint8x8_t __a) - { -- return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 }); -+ return __builtin_shuffle (__a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 }); - } - - __extension__ extern __inline uint16x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vrev64_u16 (uint16x4_t a) -+vrev64_u16 (uint16x4_t __a) - { -- return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 }); -+ return __builtin_shuffle (__a, (uint16x4_t) { 3, 2, 1, 0 }); - } - - __extension__ extern __inline uint32x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vrev64_u32 (uint32x2_t a) -+vrev64_u32 (uint32x2_t __a) - { -- return __builtin_shuffle (a, (uint32x2_t) { 1, 0 }); -+ return __builtin_shuffle (__a, (uint32x2_t) { 1, 0 }); - } - - __extension__ extern __inline float16x8_t -@@ -25666,68 +25981,68 @@ vrev64q_f16 (float16x8_t __a) - - __extension__ extern __inline float32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vrev64q_f32 (float32x4_t a) -+vrev64q_f32 (float32x4_t __a) - { -- return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 }); -+ return __builtin_shuffle (__a, (uint32x4_t) { 1, 0, 3, 2 }); - } - - __extension__ extern __inline poly8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vrev64q_p8 (poly8x16_t a) -+vrev64q_p8 (poly8x16_t __a) - { -- return __builtin_shuffle (a, -+ return __builtin_shuffle (__a, - (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }); - } - - __extension__ extern __inline poly16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vrev64q_p16 (poly16x8_t a) -+vrev64q_p16 (poly16x8_t __a) - { -- return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); -+ return __builtin_shuffle (__a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); - } - - __extension__ extern __inline int8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vrev64q_s8 (int8x16_t a) -+vrev64q_s8 (int8x16_t __a) - { -- return __builtin_shuffle (a, -+ return __builtin_shuffle (__a, - (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }); - } - - __extension__ extern __inline int16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vrev64q_s16 (int16x8_t a) -+vrev64q_s16 (int16x8_t __a) - { -- return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); -+ return __builtin_shuffle (__a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); - } - - __extension__ extern __inline int32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vrev64q_s32 (int32x4_t a) -+vrev64q_s32 (int32x4_t __a) - { -- return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 }); -+ return __builtin_shuffle (__a, (uint32x4_t) { 1, 0, 3, 2 }); - } - - __extension__ extern __inline uint8x16_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vrev64q_u8 (uint8x16_t a) -+vrev64q_u8 (uint8x16_t __a) - { -- return __builtin_shuffle (a, -+ return __builtin_shuffle (__a, - (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }); - } - - __extension__ extern __inline uint16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vrev64q_u16 (uint16x8_t a) -+vrev64q_u16 (uint16x8_t __a) - { -- return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); -+ return __builtin_shuffle (__a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); - } - - __extension__ extern __inline uint32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vrev64q_u32 (uint32x4_t a) -+vrev64q_u32 (uint32x4_t __a) - { -- return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 }); -+ return __builtin_shuffle (__a, (uint32x4_t) { 1, 0, 3, 2 }); - } - - /* vrnd */ -@@ -26420,87 +26735,90 @@ vrsrad_n_u64 (uint64_t __a, uint64_t __b, const int __c) - - __extension__ extern __inline uint32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vsha1cq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk) -+vsha1cq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk) - { -- return __builtin_aarch64_crypto_sha1cv4si_uuuu (hash_abcd, hash_e, wk); -+ return __builtin_aarch64_crypto_sha1cv4si_uuuu (__hash_abcd, __hash_e, __wk); - } - - __extension__ extern __inline uint32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vsha1mq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk) -+vsha1mq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk) - { -- return __builtin_aarch64_crypto_sha1mv4si_uuuu (hash_abcd, hash_e, wk); -+ return __builtin_aarch64_crypto_sha1mv4si_uuuu (__hash_abcd, __hash_e, __wk); - } - - __extension__ extern __inline uint32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vsha1pq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk) -+vsha1pq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk) - { -- return __builtin_aarch64_crypto_sha1pv4si_uuuu (hash_abcd, hash_e, wk); -+ return __builtin_aarch64_crypto_sha1pv4si_uuuu (__hash_abcd, __hash_e, __wk); - } - - __extension__ extern __inline uint32_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vsha1h_u32 (uint32_t hash_e) -+vsha1h_u32 (uint32_t __hash_e) - { -- return __builtin_aarch64_crypto_sha1hsi_uu (hash_e); -+ return __builtin_aarch64_crypto_sha1hsi_uu (__hash_e); - } - - __extension__ extern __inline uint32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vsha1su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7, uint32x4_t w8_11) -+vsha1su0q_u32 (uint32x4_t __w0_3, uint32x4_t __w4_7, uint32x4_t __w8_11) - { -- return __builtin_aarch64_crypto_sha1su0v4si_uuuu (w0_3, w4_7, w8_11); -+ return __builtin_aarch64_crypto_sha1su0v4si_uuuu (__w0_3, __w4_7, __w8_11); - } - - __extension__ extern __inline uint32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vsha1su1q_u32 (uint32x4_t tw0_3, uint32x4_t w12_15) -+vsha1su1q_u32 (uint32x4_t __tw0_3, uint32x4_t __w12_15) - { -- return __builtin_aarch64_crypto_sha1su1v4si_uuu (tw0_3, w12_15); -+ return __builtin_aarch64_crypto_sha1su1v4si_uuu (__tw0_3, __w12_15); - } - - __extension__ extern __inline uint32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vsha256hq_u32 (uint32x4_t hash_abcd, uint32x4_t hash_efgh, uint32x4_t wk) -+vsha256hq_u32 (uint32x4_t __hash_abcd, uint32x4_t __hash_efgh, uint32x4_t __wk) - { -- return __builtin_aarch64_crypto_sha256hv4si_uuuu (hash_abcd, hash_efgh, wk); -+ return __builtin_aarch64_crypto_sha256hv4si_uuuu (__hash_abcd, __hash_efgh, -+ __wk); - } - - __extension__ extern __inline uint32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vsha256h2q_u32 (uint32x4_t hash_efgh, uint32x4_t hash_abcd, uint32x4_t wk) -+vsha256h2q_u32 (uint32x4_t __hash_efgh, uint32x4_t __hash_abcd, uint32x4_t __wk) - { -- return __builtin_aarch64_crypto_sha256h2v4si_uuuu (hash_efgh, hash_abcd, wk); -+ return __builtin_aarch64_crypto_sha256h2v4si_uuuu (__hash_efgh, __hash_abcd, -+ __wk); - } - - __extension__ extern __inline uint32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vsha256su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7) -+vsha256su0q_u32 (uint32x4_t __w0_3, uint32x4_t __w4_7) - { -- return __builtin_aarch64_crypto_sha256su0v4si_uuu (w0_3, w4_7); -+ return __builtin_aarch64_crypto_sha256su0v4si_uuu (__w0_3, __w4_7); - } - - __extension__ extern __inline uint32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vsha256su1q_u32 (uint32x4_t tw0_3, uint32x4_t w8_11, uint32x4_t w12_15) -+vsha256su1q_u32 (uint32x4_t __tw0_3, uint32x4_t __w8_11, uint32x4_t __w12_15) - { -- return __builtin_aarch64_crypto_sha256su1v4si_uuuu (tw0_3, w8_11, w12_15); -+ return __builtin_aarch64_crypto_sha256su1v4si_uuuu (__tw0_3, __w8_11, -+ __w12_15); - } - - __extension__ extern __inline poly128_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmull_p64 (poly64_t a, poly64_t b) -+vmull_p64 (poly64_t __a, poly64_t __b) - { - return -- __builtin_aarch64_crypto_pmulldi_ppp (a, b); -+ __builtin_aarch64_crypto_pmulldi_ppp (__a, __b); - } - - __extension__ extern __inline poly128_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vmull_high_p64 (poly64x2_t a, poly64x2_t b) -+vmull_high_p64 (poly64x2_t __a, poly64x2_t __b) - { -- return __builtin_aarch64_crypto_pmullv2di_ppp (a, b); -+ return __builtin_aarch64_crypto_pmullv2di_ppp (__a, __b); - } - - #pragma GCC pop_options -@@ -27202,30 +27520,30 @@ vsqaddd_u64 (uint64_t __a, int64_t __b) - /* vsqrt */ - __extension__ extern __inline float32x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vsqrt_f32 (float32x2_t a) -+vsqrt_f32 (float32x2_t __a) - { -- return __builtin_aarch64_sqrtv2sf (a); -+ return __builtin_aarch64_sqrtv2sf (__a); - } - - __extension__ extern __inline float32x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vsqrtq_f32 (float32x4_t a) -+vsqrtq_f32 (float32x4_t __a) - { -- return __builtin_aarch64_sqrtv4sf (a); -+ return __builtin_aarch64_sqrtv4sf (__a); - } - - __extension__ extern __inline float64x1_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vsqrt_f64 (float64x1_t a) -+vsqrt_f64 (float64x1_t __a) - { -- return (float64x1_t) { __builtin_aarch64_sqrtdf (a[0]) }; -+ return (float64x1_t) { __builtin_aarch64_sqrtdf (__a[0]) }; - } - - __extension__ extern __inline float64x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vsqrtq_f64 (float64x2_t a) -+vsqrtq_f64 (float64x2_t __a) - { -- return __builtin_aarch64_sqrtv2df (a); -+ return __builtin_aarch64_sqrtv2df (__a); - } - - /* vsra */ -@@ -27495,98 +27813,98 @@ vst1_f16 (float16_t *__a, float16x4_t __b) - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1_f32 (float32_t *a, float32x2_t b) -+vst1_f32 (float32_t *__a, float32x2_t __b) - { -- __builtin_aarch64_st1v2sf ((__builtin_aarch64_simd_sf *) a, b); -+ __builtin_aarch64_st1v2sf ((__builtin_aarch64_simd_sf *) __a, __b); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1_f64 (float64_t *a, float64x1_t b) -+vst1_f64 (float64_t *__a, float64x1_t __b) - { -- *a = b[0]; -+ *__a = __b[0]; - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1_p8 (poly8_t *a, poly8x8_t b) -+vst1_p8 (poly8_t *__a, poly8x8_t __b) - { -- __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a, -- (int8x8_t) b); -+ __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) __a, -+ (int8x8_t) __b); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1_p16 (poly16_t *a, poly16x4_t b) -+vst1_p16 (poly16_t *__a, poly16x4_t __b) - { -- __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a, -- (int16x4_t) b); -+ __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) __a, -+ (int16x4_t) __b); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1_p64 (poly64_t *a, poly64x1_t b) -+vst1_p64 (poly64_t *__a, poly64x1_t __b) - { -- *a = b[0]; -+ *__a = __b[0]; - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1_s8 (int8_t *a, int8x8_t b) -+vst1_s8 (int8_t *__a, int8x8_t __b) - { -- __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a, b); -+ __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) __a, __b); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1_s16 (int16_t *a, int16x4_t b) -+vst1_s16 (int16_t *__a, int16x4_t __b) - { -- __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a, b); -+ __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) __a, __b); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1_s32 (int32_t *a, int32x2_t b) -+vst1_s32 (int32_t *__a, int32x2_t __b) - { -- __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a, b); -+ __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) __a, __b); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1_s64 (int64_t *a, int64x1_t b) -+vst1_s64 (int64_t *__a, int64x1_t __b) - { -- *a = b[0]; -+ *__a = __b[0]; - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1_u8 (uint8_t *a, uint8x8_t b) -+vst1_u8 (uint8_t *__a, uint8x8_t __b) - { -- __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a, -- (int8x8_t) b); -+ __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) __a, -+ (int8x8_t) __b); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1_u16 (uint16_t *a, uint16x4_t b) -+vst1_u16 (uint16_t *__a, uint16x4_t __b) - { -- __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a, -- (int16x4_t) b); -+ __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) __a, -+ (int16x4_t) __b); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1_u32 (uint32_t *a, uint32x2_t b) -+vst1_u32 (uint32_t *__a, uint32x2_t __b) - { -- __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a, -- (int32x2_t) b); -+ __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) __a, -+ (int32x2_t) __b); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1_u64 (uint64_t *a, uint64x1_t b) -+vst1_u64 (uint64_t *__a, uint64x1_t __b) - { -- *a = b[0]; -+ *__a = __b[0]; - } - - /* vst1q */ -@@ -27600,100 +27918,100 @@ vst1q_f16 (float16_t *__a, float16x8_t __b) - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1q_f32 (float32_t *a, float32x4_t b) -+vst1q_f32 (float32_t *__a, float32x4_t __b) - { -- __builtin_aarch64_st1v4sf ((__builtin_aarch64_simd_sf *) a, b); -+ __builtin_aarch64_st1v4sf ((__builtin_aarch64_simd_sf *) __a, __b); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1q_f64 (float64_t *a, float64x2_t b) -+vst1q_f64 (float64_t *__a, float64x2_t __b) - { -- __builtin_aarch64_st1v2df ((__builtin_aarch64_simd_df *) a, b); -+ __builtin_aarch64_st1v2df ((__builtin_aarch64_simd_df *) __a, __b); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1q_p8 (poly8_t *a, poly8x16_t b) -+vst1q_p8 (poly8_t *__a, poly8x16_t __b) - { -- __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a, -- (int8x16_t) b); -+ __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) __a, -+ (int8x16_t) __b); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1q_p16 (poly16_t *a, poly16x8_t b) -+vst1q_p16 (poly16_t *__a, poly16x8_t __b) - { -- __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a, -- (int16x8_t) b); -+ __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) __a, -+ (int16x8_t) __b); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1q_p64 (poly64_t *a, poly64x2_t b) -+vst1q_p64 (poly64_t *__a, poly64x2_t __b) - { -- __builtin_aarch64_st1v2di_sp ((__builtin_aarch64_simd_di *) a, -- (poly64x2_t) b); -+ __builtin_aarch64_st1v2di_sp ((__builtin_aarch64_simd_di *) __a, -+ (poly64x2_t) __b); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1q_s8 (int8_t *a, int8x16_t b) -+vst1q_s8 (int8_t *__a, int8x16_t __b) - { -- __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a, b); -+ __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) __a, __b); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1q_s16 (int16_t *a, int16x8_t b) -+vst1q_s16 (int16_t *__a, int16x8_t __b) - { -- __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a, b); -+ __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) __a, __b); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1q_s32 (int32_t *a, int32x4_t b) -+vst1q_s32 (int32_t *__a, int32x4_t __b) - { -- __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a, b); -+ __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) __a, __b); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1q_s64 (int64_t *a, int64x2_t b) -+vst1q_s64 (int64_t *__a, int64x2_t __b) - { -- __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a, b); -+ __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) __a, __b); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1q_u8 (uint8_t *a, uint8x16_t b) -+vst1q_u8 (uint8_t *__a, uint8x16_t __b) - { -- __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a, -- (int8x16_t) b); -+ __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) __a, -+ (int8x16_t) __b); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1q_u16 (uint16_t *a, uint16x8_t b) -+vst1q_u16 (uint16_t *__a, uint16x8_t __b) - { -- __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a, -- (int16x8_t) b); -+ __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) __a, -+ (int16x8_t) __b); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1q_u32 (uint32_t *a, uint32x4_t b) -+vst1q_u32 (uint32_t *__a, uint32x4_t __b) - { -- __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a, -- (int32x4_t) b); -+ __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) __a, -+ (int32x4_t) __b); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1q_u64 (uint64_t *a, uint64x2_t b) -+vst1q_u64 (uint64_t *__a, uint64x2_t __b) - { -- __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a, -- (int64x2_t) b); -+ __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) __a, -+ (int64x2_t) __b); - } - - /* vst1_lane */ -@@ -27900,327 +28218,343 @@ vst1q_lane_u64 (uint64_t *__a, uint64x2_t __b, const int __lane) - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1_s64_x2 (int64_t * __a, int64x1x2_t val) -+vst1_s64_x2 (int64_t * __a, int64x1x2_t __val) - { - __builtin_aarch64_simd_oi __o; -- int64x2x2_t temp; -- temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0))); -- temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); -- __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1); -+ int64x2x2_t __temp; -+ __temp.val[0] -+ = vcombine_s64 (__val.val[0], vcreate_s64 (__AARCH64_INT64_C (0))); -+ __temp.val[1] -+ = vcombine_s64 (__val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); -+ __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[1], 1); - __builtin_aarch64_st1x2di ((__builtin_aarch64_simd_di *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1_u64_x2 (uint64_t * __a, uint64x1x2_t val) -+vst1_u64_x2 (uint64_t * __a, uint64x1x2_t __val) - { - __builtin_aarch64_simd_oi __o; -- uint64x2x2_t temp; -- temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1); -+ uint64x2x2_t __temp; -+ __temp.val[0] -+ = vcombine_u64 (__val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] -+ = vcombine_u64 (__val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[1], 1); - __builtin_aarch64_st1x2di ((__builtin_aarch64_simd_di *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1_f64_x2 (float64_t * __a, float64x1x2_t val) -+vst1_f64_x2 (float64_t * __a, float64x1x2_t __val) - { - __builtin_aarch64_simd_oi __o; -- float64x2x2_t temp; -- temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[1], 1); -+ float64x2x2_t __temp; -+ __temp.val[0] -+ = vcombine_f64 (__val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] -+ = vcombine_f64 (__val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __temp.val[1], 1); - __builtin_aarch64_st1x2df ((__builtin_aarch64_simd_df *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1_s8_x2 (int8_t * __a, int8x8x2_t val) -+vst1_s8_x2 (int8_t * __a, int8x8x2_t __val) - { - __builtin_aarch64_simd_oi __o; -- int8x16x2_t temp; -- temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0))); -- temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1); -+ int8x16x2_t __temp; -+ __temp.val[0] -+ = vcombine_s8 (__val.val[0], vcreate_s8 (__AARCH64_INT64_C (0))); -+ __temp.val[1] -+ = vcombine_s8 (__val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 1); - __builtin_aarch64_st1x2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1_p8_x2 (poly8_t * __a, poly8x8x2_t val) -+vst1_p8_x2 (poly8_t * __a, poly8x8x2_t __val) - { - __builtin_aarch64_simd_oi __o; -- poly8x16x2_t temp; -- temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1); -+ poly8x16x2_t __temp; -+ __temp.val[0] -+ = vcombine_p8 (__val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] -+ = vcombine_p8 (__val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 1); - __builtin_aarch64_st1x2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1_s16_x2 (int16_t * __a, int16x4x2_t val) -+vst1_s16_x2 (int16_t * __a, int16x4x2_t __val) - { - __builtin_aarch64_simd_oi __o; -- int16x8x2_t temp; -- temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0))); -- temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); -- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1); -+ int16x8x2_t __temp; -+ __temp.val[0] -+ = vcombine_s16 (__val.val[0], vcreate_s16 (__AARCH64_INT64_C (0))); -+ __temp.val[1] -+ = vcombine_s16 (__val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); -+ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[1], 1); - __builtin_aarch64_st1x2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1_p16_x2 (poly16_t * __a, poly16x4x2_t val) -+vst1_p16_x2 (poly16_t * __a, poly16x4x2_t __val) - { - __builtin_aarch64_simd_oi __o; -- poly16x8x2_t temp; -- temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1); -+ poly16x8x2_t __temp; -+ __temp.val[0] -+ = vcombine_p16 (__val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] -+ = vcombine_p16 (__val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[1], 1); - __builtin_aarch64_st1x2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1_s32_x2 (int32_t * __a, int32x2x2_t val) -+vst1_s32_x2 (int32_t * __a, int32x2x2_t __val) - { - __builtin_aarch64_simd_oi __o; -- int32x4x2_t temp; -- temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0))); -- temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); -- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1); -+ int32x4x2_t __temp; -+ __temp.val[0] -+ = vcombine_s32 (__val.val[0], vcreate_s32 (__AARCH64_INT64_C (0))); -+ __temp.val[1] -+ = vcombine_s32 (__val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); -+ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[1], 1); - __builtin_aarch64_st1x2v2si ((__builtin_aarch64_simd_si *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1_u8_x2 (uint8_t * __a, uint8x8x2_t val) -+vst1_u8_x2 (uint8_t * __a, uint8x8x2_t __val) - { - __builtin_aarch64_simd_oi __o; -- uint8x16x2_t temp; -- temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1); -+ uint8x16x2_t __temp; -+ __temp.val[0] = vcombine_u8 (__val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_u8 (__val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 1); - __builtin_aarch64_st1x2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1_u16_x2 (uint16_t * __a, uint16x4x2_t val) -+vst1_u16_x2 (uint16_t * __a, uint16x4x2_t __val) - { - __builtin_aarch64_simd_oi __o; -- uint16x8x2_t temp; -- temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1); -+ uint16x8x2_t __temp; -+ __temp.val[0] = vcombine_u16 (__val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_u16 (__val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[1], 1); - __builtin_aarch64_st1x2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1_u32_x2 (uint32_t * __a, uint32x2x2_t val) -+vst1_u32_x2 (uint32_t * __a, uint32x2x2_t __val) - { - __builtin_aarch64_simd_oi __o; -- uint32x4x2_t temp; -- temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1); -+ uint32x4x2_t __temp; -+ __temp.val[0] = vcombine_u32 (__val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_u32 (__val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[1], 1); - __builtin_aarch64_st1x2v2si ((__builtin_aarch64_simd_si *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1_f16_x2 (float16_t * __a, float16x4x2_t val) -+vst1_f16_x2 (float16_t * __a, float16x4x2_t __val) - { - __builtin_aarch64_simd_oi __o; -- float16x8x2_t temp; -- temp.val[0] = vcombine_f16 (val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_f16 (val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregoiv8hf (__o, temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv8hf (__o, temp.val[1], 1); -+ float16x8x2_t __temp; -+ __temp.val[0] = vcombine_f16 (__val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_f16 (__val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregoiv8hf (__o, __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv8hf (__o, __temp.val[1], 1); - __builtin_aarch64_st1x2v4hf (__a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1_f32_x2 (float32_t * __a, float32x2x2_t val) -+vst1_f32_x2 (float32_t * __a, float32x2x2_t __val) - { - __builtin_aarch64_simd_oi __o; -- float32x4x2_t temp; -- temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[1], 1); -+ float32x4x2_t __temp; -+ __temp.val[0] = vcombine_f32 (__val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_f32 (__val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __temp.val[1], 1); - __builtin_aarch64_st1x2v2sf ((__builtin_aarch64_simd_sf *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1_p64_x2 (poly64_t * __a, poly64x1x2_t val) -+vst1_p64_x2 (poly64_t * __a, poly64x1x2_t __val) - { - __builtin_aarch64_simd_oi __o; -- poly64x2x2_t temp; -- temp.val[0] = vcombine_p64 (val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_p64 (val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0))); -+ poly64x2x2_t __temp; -+ __temp.val[0] = vcombine_p64 (__val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_p64 (__val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregoiv2di_ssps (__o, -- (poly64x2_t) temp.val[0], 0); -+ (poly64x2_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv2di_ssps (__o, -- (poly64x2_t) temp.val[1], 1); -+ (poly64x2_t) __temp.val[1], 1); - __builtin_aarch64_st1x2di ((__builtin_aarch64_simd_di *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1q_s8_x2 (int8_t * __a, int8x16x2_t val) -+vst1q_s8_x2 (int8_t * __a, int8x16x2_t __val) - { - __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[1], 1); - __builtin_aarch64_st1x2v16qi ((__builtin_aarch64_simd_qi *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1q_p8_x2 (poly8_t * __a, poly8x16x2_t val) -+vst1q_p8_x2 (poly8_t * __a, poly8x16x2_t __val) - { - __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[1], 1); - __builtin_aarch64_st1x2v16qi ((__builtin_aarch64_simd_qi *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1q_s16_x2 (int16_t * __a, int16x8x2_t val) -+vst1q_s16_x2 (int16_t * __a, int16x8x2_t __val) - { - __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1); -+ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[1], 1); - __builtin_aarch64_st1x2v8hi ((__builtin_aarch64_simd_hi *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1q_p16_x2 (poly16_t * __a, poly16x8x2_t val) -+vst1q_p16_x2 (poly16_t * __a, poly16x8x2_t __val) - { - __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1); -+ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[1], 1); - __builtin_aarch64_st1x2v8hi ((__builtin_aarch64_simd_hi *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1q_s32_x2 (int32_t * __a, int32x4x2_t val) -+vst1q_s32_x2 (int32_t * __a, int32x4x2_t __val) - { - __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1); -+ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[1], 1); - __builtin_aarch64_st1x2v4si ((__builtin_aarch64_simd_si *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1q_s64_x2 (int64_t * __a, int64x2x2_t val) -+vst1q_s64_x2 (int64_t * __a, int64x2x2_t __val) - { - __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1); -+ __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[1], 1); - __builtin_aarch64_st1x2v2di ((__builtin_aarch64_simd_di *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1q_u8_x2 (uint8_t * __a, uint8x16x2_t val) -+vst1q_u8_x2 (uint8_t * __a, uint8x16x2_t __val) - { - __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[1], 1); - __builtin_aarch64_st1x2v16qi ((__builtin_aarch64_simd_qi *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1q_u16_x2 (uint16_t * __a, uint16x8x2_t val) -+vst1q_u16_x2 (uint16_t * __a, uint16x8x2_t __val) - { - __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1); -+ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[1], 1); - __builtin_aarch64_st1x2v8hi ((__builtin_aarch64_simd_hi *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1q_u32_x2 (uint32_t * __a, uint32x4x2_t val) -+vst1q_u32_x2 (uint32_t * __a, uint32x4x2_t __val) - { - __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1); -+ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[1], 1); - __builtin_aarch64_st1x2v4si ((__builtin_aarch64_simd_si *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1q_u64_x2 (uint64_t * __a, uint64x2x2_t val) -+vst1q_u64_x2 (uint64_t * __a, uint64x2x2_t __val) - { - __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1); -+ __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[1], 1); - __builtin_aarch64_st1x2v2di ((__builtin_aarch64_simd_di *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1q_f16_x2 (float16_t * __a, float16x8x2_t val) -+vst1q_f16_x2 (float16_t * __a, float16x8x2_t __val) - { - __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv8hf (__o, val.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv8hf (__o, val.val[1], 1); -+ __o = __builtin_aarch64_set_qregoiv8hf (__o, __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv8hf (__o, __val.val[1], 1); - __builtin_aarch64_st1x2v8hf (__a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1q_f32_x2 (float32_t * __a, float32x4x2_t val) -+vst1q_f32_x2 (float32_t * __a, float32x4x2_t __val) - { - __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[1], 1); -+ __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __val.val[1], 1); - __builtin_aarch64_st1x2v4sf ((__builtin_aarch64_simd_sf *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1q_f64_x2 (float64_t * __a, float64x2x2_t val) -+vst1q_f64_x2 (float64_t * __a, float64x2x2_t __val) - { - __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[1], 1); -+ __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __val.val[1], 1); - __builtin_aarch64_st1x2v2df ((__builtin_aarch64_simd_df *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1q_p64_x2 (poly64_t * __a, poly64x2x2_t val) -+vst1q_p64_x2 (poly64_t * __a, poly64x2x2_t __val) - { - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_set_qregoiv2di_ssps (__o, -- (poly64x2_t) val.val[0], 0); -+ (poly64x2_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregoiv2di_ssps (__o, -- (poly64x2_t) val.val[1], 1); -+ (poly64x2_t) __val.val[1], 1); - __builtin_aarch64_st1x2v2di ((__builtin_aarch64_simd_di *) __a, __o); - } - -@@ -28228,1483 +28562,1709 @@ vst1q_p64_x2 (poly64_t * __a, poly64x2x2_t val) - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1_s64_x3 (int64_t * __a, int64x1x3_t val) -+vst1_s64_x3 (int64_t * __a, int64x1x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- int64x2x3_t temp; -- temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0))); -- temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); -- temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0))); -- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2); -+ int64x2x3_t __temp; -+ __temp.val[0] = vcombine_s64 (__val.val[0], vcreate_s64 (__AARCH64_INT64_C (0))); -+ __temp.val[1] = vcombine_s64 (__val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); -+ __temp.val[2] = vcombine_s64 (__val.val[2], vcreate_s64 (__AARCH64_INT64_C (0))); -+ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[2], 2); - __builtin_aarch64_st1x3di ((__builtin_aarch64_simd_di *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1_u64_x3 (uint64_t * __a, uint64x1x3_t val) -+vst1_u64_x3 (uint64_t * __a, uint64x1x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- uint64x2x3_t temp; -- temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); -- temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2); -+ uint64x2x3_t __temp; -+ __temp.val[0] = vcombine_u64 (__val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_u64 (__val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); -+ __temp.val[2] = vcombine_u64 (__val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[2], 2); - __builtin_aarch64_st1x3di ((__builtin_aarch64_simd_di *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1_f64_x3 (float64_t * __a, float64x1x3_t val) -+vst1_f64_x3 (float64_t * __a, float64x1x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- float64x2x3_t temp; -- temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); -- temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[2], 2); -+ float64x2x3_t __temp; -+ __temp.val[0] = vcombine_f64 (__val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_f64 (__val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); -+ __temp.val[2] = vcombine_f64 (__val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __temp.val[2], 2); - __builtin_aarch64_st1x3df ((__builtin_aarch64_simd_df *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1_s8_x3 (int8_t * __a, int8x8x3_t val) -+vst1_s8_x3 (int8_t * __a, int8x8x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- int8x16x3_t temp; -- temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0))); -- temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); -- temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0))); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2); -+ int8x16x3_t __temp; -+ __temp.val[0] = vcombine_s8 (__val.val[0], vcreate_s8 (__AARCH64_INT64_C (0))); -+ __temp.val[1] = vcombine_s8 (__val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); -+ __temp.val[2] = vcombine_s8 (__val.val[2], vcreate_s8 (__AARCH64_INT64_C (0))); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[2], 2); - __builtin_aarch64_st1x3v8qi ((__builtin_aarch64_simd_qi *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1_p8_x3 (poly8_t * __a, poly8x8x3_t val) -+vst1_p8_x3 (poly8_t * __a, poly8x8x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- poly8x16x3_t temp; -- temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); -- temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2); -+ poly8x16x3_t __temp; -+ __temp.val[0] = vcombine_p8 (__val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_p8 (__val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); -+ __temp.val[2] = vcombine_p8 (__val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[2], 2); - __builtin_aarch64_st1x3v8qi ((__builtin_aarch64_simd_qi *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1_s16_x3 (int16_t * __a, int16x4x3_t val) -+vst1_s16_x3 (int16_t * __a, int16x4x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- int16x8x3_t temp; -- temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0))); -- temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); -- temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0))); -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2); -+ int16x8x3_t __temp; -+ __temp.val[0] = vcombine_s16 (__val.val[0], vcreate_s16 (__AARCH64_INT64_C (0))); -+ __temp.val[1] = vcombine_s16 (__val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); -+ __temp.val[2] = vcombine_s16 (__val.val[2], vcreate_s16 (__AARCH64_INT64_C (0))); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[2], 2); - __builtin_aarch64_st1x3v4hi ((__builtin_aarch64_simd_hi *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1_p16_x3 (poly16_t * __a, poly16x4x3_t val) -+vst1_p16_x3 (poly16_t * __a, poly16x4x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- poly16x8x3_t temp; -- temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); -- temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2); -+ poly16x8x3_t __temp; -+ __temp.val[0] = vcombine_p16 (__val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_p16 (__val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); -+ __temp.val[2] = vcombine_p16 (__val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[2], 2); - __builtin_aarch64_st1x3v4hi ((__builtin_aarch64_simd_hi *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1_s32_x3 (int32_t * __a, int32x2x3_t val) -+vst1_s32_x3 (int32_t * __a, int32x2x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- int32x4x3_t temp; -- temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0))); -- temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); -- temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0))); -- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2); -+ int32x4x3_t __temp; -+ __temp.val[0] = vcombine_s32 (__val.val[0], vcreate_s32 (__AARCH64_INT64_C (0))); -+ __temp.val[1] = vcombine_s32 (__val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); -+ __temp.val[2] = vcombine_s32 (__val.val[2], vcreate_s32 (__AARCH64_INT64_C (0))); -+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[2], 2); - __builtin_aarch64_st1x3v2si ((__builtin_aarch64_simd_si *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1_u8_x3 (uint8_t * __a, uint8x8x3_t val) -+vst1_u8_x3 (uint8_t * __a, uint8x8x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- uint8x16x3_t temp; -- temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); -- temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2); -+ uint8x16x3_t __temp; -+ __temp.val[0] = vcombine_u8 (__val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_u8 (__val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); -+ __temp.val[2] = vcombine_u8 (__val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[2], 2); - __builtin_aarch64_st1x3v8qi ((__builtin_aarch64_simd_qi *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1_u16_x3 (uint16_t * __a, uint16x4x3_t val) -+vst1_u16_x3 (uint16_t * __a, uint16x4x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- uint16x8x3_t temp; -- temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); -- temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2); -+ uint16x8x3_t __temp; -+ __temp.val[0] = vcombine_u16 (__val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_u16 (__val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); -+ __temp.val[2] = vcombine_u16 (__val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[2], 2); - __builtin_aarch64_st1x3v4hi ((__builtin_aarch64_simd_hi *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1_u32_x3 (uint32_t * __a, uint32x2x3_t val) -+vst1_u32_x3 (uint32_t * __a, uint32x2x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- uint32x4x3_t temp; -- temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); -- temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2); -+ uint32x4x3_t __temp; -+ __temp.val[0] = vcombine_u32 (__val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_u32 (__val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); -+ __temp.val[2] = vcombine_u32 (__val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[2], 2); - __builtin_aarch64_st1x3v2si ((__builtin_aarch64_simd_si *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1_f16_x3 (float16_t * __a, float16x4x3_t val) -+vst1_f16_x3 (float16_t * __a, float16x4x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- float16x8x3_t temp; -- temp.val[0] = vcombine_f16 (val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_f16 (val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0))); -- temp.val[2] = vcombine_f16 (val.val[2], vcreate_f16 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) temp.val[2], 2); -+ float16x8x3_t __temp; -+ __temp.val[0] = vcombine_f16 (__val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_f16 (__val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0))); -+ __temp.val[2] = vcombine_f16 (__val.val[2], vcreate_f16 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __temp.val[2], 2); - __builtin_aarch64_st1x3v4hf ((__builtin_aarch64_simd_hf *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1_f32_x3 (float32_t * __a, float32x2x3_t val) -+vst1_f32_x3 (float32_t * __a, float32x2x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- float32x4x3_t temp; -- temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); -- temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[2], 2); -+ float32x4x3_t __temp; -+ __temp.val[0] = vcombine_f32 (__val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_f32 (__val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); -+ __temp.val[2] = vcombine_f32 (__val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __temp.val[2], 2); - __builtin_aarch64_st1x3v2sf ((__builtin_aarch64_simd_sf *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1_p64_x3 (poly64_t * __a, poly64x1x3_t val) -+vst1_p64_x3 (poly64_t * __a, poly64x1x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- poly64x2x3_t temp; -- temp.val[0] = vcombine_p64 (val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_p64 (val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0))); -- temp.val[2] = vcombine_p64 (val.val[2], vcreate_p64 (__AARCH64_UINT64_C (0))); -+ poly64x2x3_t __temp; -+ __temp.val[0] = vcombine_p64 (__val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_p64 (__val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0))); -+ __temp.val[2] = vcombine_p64 (__val.val[2], vcreate_p64 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregciv2di_ssps (__o, -- (poly64x2_t) temp.val[0], 0); -+ (poly64x2_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv2di_ssps (__o, -- (poly64x2_t) temp.val[1], 1); -+ (poly64x2_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv2di_ssps (__o, -- (poly64x2_t) temp.val[2], 2); -+ (poly64x2_t) __temp.val[2], 2); - __builtin_aarch64_st1x3di ((__builtin_aarch64_simd_di *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1q_s8_x3 (int8_t * __a, int8x16x3_t val) -+vst1q_s8_x3 (int8_t * __a, int8x16x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[2], 2); - __builtin_aarch64_st1x3v16qi ((__builtin_aarch64_simd_qi *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1q_p8_x3 (poly8_t * __a, poly8x16x3_t val) -+vst1q_p8_x3 (poly8_t * __a, poly8x16x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[2], 2); - __builtin_aarch64_st1x3v16qi ((__builtin_aarch64_simd_qi *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1q_s16_x3 (int16_t * __a, int16x8x3_t val) -+vst1q_s16_x3 (int16_t * __a, int16x8x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[2], 2); - __builtin_aarch64_st1x3v8hi ((__builtin_aarch64_simd_hi *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1q_p16_x3 (poly16_t * __a, poly16x8x3_t val) -+vst1q_p16_x3 (poly16_t * __a, poly16x8x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[2], 2); - __builtin_aarch64_st1x3v8hi ((__builtin_aarch64_simd_hi *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1q_s32_x3 (int32_t * __a, int32x4x3_t val) -+vst1q_s32_x3 (int32_t * __a, int32x4x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2); -+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[2], 2); - __builtin_aarch64_st1x3v4si ((__builtin_aarch64_simd_si *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1q_s64_x3 (int64_t * __a, int64x2x3_t val) -+vst1q_s64_x3 (int64_t * __a, int64x2x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2); -+ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[2], 2); - __builtin_aarch64_st1x3v2di ((__builtin_aarch64_simd_di *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1q_u8_x3 (uint8_t * __a, uint8x16x3_t val) -+vst1q_u8_x3 (uint8_t * __a, uint8x16x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[2], 2); - __builtin_aarch64_st1x3v16qi ((__builtin_aarch64_simd_qi *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1q_u16_x3 (uint16_t * __a, uint16x8x3_t val) -+vst1q_u16_x3 (uint16_t * __a, uint16x8x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[2], 2); - __builtin_aarch64_st1x3v8hi ((__builtin_aarch64_simd_hi *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1q_u32_x3 (uint32_t * __a, uint32x4x3_t val) -+vst1q_u32_x3 (uint32_t * __a, uint32x4x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2); -+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[2], 2); - __builtin_aarch64_st1x3v4si ((__builtin_aarch64_simd_si *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1q_u64_x3 (uint64_t * __a, uint64x2x3_t val) -+vst1q_u64_x3 (uint64_t * __a, uint64x2x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2); -+ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[2], 2); - __builtin_aarch64_st1x3v2di ((__builtin_aarch64_simd_di *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1q_f16_x3 (float16_t * __a, float16x8x3_t val) -+vst1q_f16_x3 (float16_t * __a, float16x8x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) val.val[2], 2); -+ __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __val.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __val.val[2], 2); - __builtin_aarch64_st1x3v8hf ((__builtin_aarch64_simd_hf *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1q_f32_x3 (float32_t * __a, float32x4x3_t val) -+vst1q_f32_x3 (float32_t * __a, float32x4x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[2], 2); -+ __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __val.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __val.val[2], 2); - __builtin_aarch64_st1x3v4sf ((__builtin_aarch64_simd_sf *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1q_f64_x3 (float64_t * __a, float64x2x3_t val) -+vst1q_f64_x3 (float64_t * __a, float64x2x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[2], 2); -+ __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __val.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __val.val[2], 2); - __builtin_aarch64_st1x3v2df ((__builtin_aarch64_simd_df *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst1q_p64_x3 (poly64_t * __a, poly64x2x3_t val) -+vst1q_p64_x3 (poly64_t * __a, poly64x2x3_t __val) - { - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv2di_ssps (__o, -- (poly64x2_t) val.val[0], 0); -+ (poly64x2_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv2di_ssps (__o, -- (poly64x2_t) val.val[1], 1); -+ (poly64x2_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv2di_ssps (__o, -- (poly64x2_t) val.val[2], 2); -+ (poly64x2_t) __val.val[2], 2); - __builtin_aarch64_st1x3v2di ((__builtin_aarch64_simd_di *) __a, __o); - } - --/* vstn */ -+/* vst1(q)_x4. */ - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst2_s64 (int64_t * __a, int64x1x2_t val) -+vst1_s8_x4 (int8_t * __a, int8x8x4_t val) - { -- __builtin_aarch64_simd_oi __o; -- int64x2x2_t temp; -- temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0))); -- temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); -- __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1); -- __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o); -+ union { int8x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; -+ __builtin_aarch64_st1x4v8qi ((__builtin_aarch64_simd_qi *) __a, __u.__o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst2_u64 (uint64_t * __a, uint64x1x2_t val) -+vst1q_s8_x4 (int8_t * __a, int8x16x4_t val) - { -- __builtin_aarch64_simd_oi __o; -- uint64x2x2_t temp; -- temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1); -- __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o); -+ union { int8x16x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; -+ __builtin_aarch64_st1x4v16qi ((__builtin_aarch64_simd_qi *) __a, __u.__o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst2_f64 (float64_t * __a, float64x1x2_t val) -+vst1_s16_x4 (int16_t * __a, int16x4x4_t val) - { -- __builtin_aarch64_simd_oi __o; -- float64x2x2_t temp; -- temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[1], 1); -- __builtin_aarch64_st2df ((__builtin_aarch64_simd_df *) __a, __o); -+ union { int16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; -+ __builtin_aarch64_st1x4v4hi ((__builtin_aarch64_simd_hi *) __a, __u.__o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst2_s8 (int8_t * __a, int8x8x2_t val) -+vst1q_s16_x4 (int16_t * __a, int16x8x4_t val) - { -- __builtin_aarch64_simd_oi __o; -- int8x16x2_t temp; -- temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0))); -- temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1); -- __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); -+ union { int16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; -+ __builtin_aarch64_st1x4v8hi ((__builtin_aarch64_simd_hi *) __a, __u.__o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst2_p8 (poly8_t * __a, poly8x8x2_t val) -+vst1_s32_x4 (int32_t * __a, int32x2x4_t val) - { -- __builtin_aarch64_simd_oi __o; -- poly8x16x2_t temp; -- temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1); -- __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); -+ union { int32x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; -+ __builtin_aarch64_st1x4v2si ((__builtin_aarch64_simd_si *) __a, __u.__o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst2_s16 (int16_t * __a, int16x4x2_t val) -+vst1q_s32_x4 (int32_t * __a, int32x4x4_t val) - { -- __builtin_aarch64_simd_oi __o; -- int16x8x2_t temp; -- temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0))); -- temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); -- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1); -- __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); -+ union { int32x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; -+ __builtin_aarch64_st1x4v4si ((__builtin_aarch64_simd_si *) __a, __u.__o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst2_p16 (poly16_t * __a, poly16x4x2_t val) -+vst1_u8_x4 (uint8_t * __a, uint8x8x4_t val) - { -- __builtin_aarch64_simd_oi __o; -- poly16x8x2_t temp; -- temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1); -- __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); -+ union { uint8x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; -+ __builtin_aarch64_st1x4v8qi ((__builtin_aarch64_simd_qi *) __a, __u.__o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst2_s32 (int32_t * __a, int32x2x2_t val) -+vst1q_u8_x4 (uint8_t * __a, uint8x16x4_t val) - { -- __builtin_aarch64_simd_oi __o; -- int32x4x2_t temp; -- temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0))); -- temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); -- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1); -- __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o); -+ union { uint8x16x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; -+ __builtin_aarch64_st1x4v16qi ((__builtin_aarch64_simd_qi *) __a, __u.__o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst2_u8 (uint8_t * __a, uint8x8x2_t val) -+vst1_u16_x4 (uint16_t * __a, uint16x4x4_t val) - { -- __builtin_aarch64_simd_oi __o; -- uint8x16x2_t temp; -- temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1); -- __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); -+ union { uint16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; -+ __builtin_aarch64_st1x4v4hi ((__builtin_aarch64_simd_hi *) __a, __u.__o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst2_u16 (uint16_t * __a, uint16x4x2_t val) -+vst1q_u16_x4 (uint16_t * __a, uint16x8x4_t val) - { -- __builtin_aarch64_simd_oi __o; -- uint16x8x2_t temp; -- temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1); -- __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); -+ union { uint16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; -+ __builtin_aarch64_st1x4v8hi ((__builtin_aarch64_simd_hi *) __a, __u.__o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst2_u32 (uint32_t * __a, uint32x2x2_t val) -+vst1_u32_x4 (uint32_t * __a, uint32x2x4_t val) - { -- __builtin_aarch64_simd_oi __o; -- uint32x4x2_t temp; -- temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1); -- __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o); -+ union { uint32x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; -+ __builtin_aarch64_st1x4v2si ((__builtin_aarch64_simd_si *) __a, __u.__o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst2_f16 (float16_t * __a, float16x4x2_t val) -+vst1q_u32_x4 (uint32_t * __a, uint32x4x4_t val) - { -- __builtin_aarch64_simd_oi __o; -- float16x8x2_t temp; -- temp.val[0] = vcombine_f16 (val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_f16 (val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregoiv8hf (__o, temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv8hf (__o, temp.val[1], 1); -- __builtin_aarch64_st2v4hf (__a, __o); -+ union { uint32x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; -+ __builtin_aarch64_st1x4v4si ((__builtin_aarch64_simd_si *) __a, __u.__o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst2_f32 (float32_t * __a, float32x2x2_t val) -+vst1_f16_x4 (float16_t * __a, float16x4x4_t val) - { -- __builtin_aarch64_simd_oi __o; -- float32x4x2_t temp; -- temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[1], 1); -- __builtin_aarch64_st2v2sf ((__builtin_aarch64_simd_sf *) __a, __o); -+ union { float16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; -+ __builtin_aarch64_st1x4v4hf ((__builtin_aarch64_simd_hf *) __a, __u.__o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst2_p64 (poly64_t * __a, poly64x1x2_t val) -+vst1q_f16_x4 (float16_t * __a, float16x8x4_t val) - { -- __builtin_aarch64_simd_oi __o; -- poly64x2x2_t temp; -- temp.val[0] = vcombine_p64 (val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_p64 (val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregoiv2di_ssps (__o, -- (poly64x2_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv2di_ssps (__o, -- (poly64x2_t) temp.val[1], 1); -- __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o); -+ union { float16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; -+ __builtin_aarch64_st1x4v8hf ((__builtin_aarch64_simd_hf *) __a, __u.__o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst2q_s8 (int8_t * __a, int8x16x2_t val) -+vst1_f32_x4 (float32_t * __a, float32x2x4_t val) - { -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1); -- __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o); -+ union { float32x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; -+ __builtin_aarch64_st1x4v2sf ((__builtin_aarch64_simd_sf *) __a, __u.__o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst2q_p8 (poly8_t * __a, poly8x16x2_t val) -+vst1q_f32_x4 (float32_t * __a, float32x4x4_t val) - { -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1); -- __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o); -+ union { float32x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; -+ __builtin_aarch64_st1x4v4sf ((__builtin_aarch64_simd_sf *) __a, __u.__o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst2q_s16 (int16_t * __a, int16x8x2_t val) -+vst1_p8_x4 (poly8_t * __a, poly8x8x4_t val) - { -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1); -- __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o); -+ union { poly8x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; -+ __builtin_aarch64_st1x4v8qi ((__builtin_aarch64_simd_qi *) __a, __u.__o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst2q_p16 (poly16_t * __a, poly16x8x2_t val) -+vst1q_p8_x4 (poly8_t * __a, poly8x16x4_t val) - { -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1); -- __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o); -+ union { poly8x16x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; -+ __builtin_aarch64_st1x4v16qi ((__builtin_aarch64_simd_qi *) __a, __u.__o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst2q_s32 (int32_t * __a, int32x4x2_t val) -+vst1_p16_x4 (poly16_t * __a, poly16x4x4_t val) - { -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1); -- __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o); -+ union { poly16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; -+ __builtin_aarch64_st1x4v4hi ((__builtin_aarch64_simd_hi *) __a, __u.__o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst2q_s64 (int64_t * __a, int64x2x2_t val) -+vst1q_p16_x4 (poly16_t * __a, poly16x8x4_t val) - { -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1); -- __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o); -+ union { poly16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; -+ __builtin_aarch64_st1x4v8hi ((__builtin_aarch64_simd_hi *) __a, __u.__o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst2q_u8 (uint8_t * __a, uint8x16x2_t val) -+vst1_s64_x4 (int64_t * __a, int64x1x4_t val) - { -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1); -- __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o); -+ union { int64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; -+ __builtin_aarch64_st1x4di ((__builtin_aarch64_simd_di *) __a, __u.__o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst2q_u16 (uint16_t * __a, uint16x8x2_t val) -+vst1_u64_x4 (uint64_t * __a, uint64x1x4_t val) - { -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1); -- __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o); -+ union { uint64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; -+ __builtin_aarch64_st1x4di ((__builtin_aarch64_simd_di *) __a, __u.__o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst2q_u32 (uint32_t * __a, uint32x4x2_t val) -+vst1_p64_x4 (poly64_t * __a, poly64x1x4_t val) - { -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1); -- __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o); -+ union { poly64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; -+ __builtin_aarch64_st1x4di ((__builtin_aarch64_simd_di *) __a, __u.__o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst2q_u64 (uint64_t * __a, uint64x2x2_t val) -+vst1q_s64_x4 (int64_t * __a, int64x2x4_t val) - { -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1); -- __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o); -+ union { int64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; -+ __builtin_aarch64_st1x4v2di ((__builtin_aarch64_simd_di *) __a, __u.__o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst2q_f16 (float16_t * __a, float16x8x2_t val) -+vst1q_u64_x4 (uint64_t * __a, uint64x2x4_t val) - { -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv8hf (__o, val.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv8hf (__o, val.val[1], 1); -- __builtin_aarch64_st2v8hf (__a, __o); -+ union { uint64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; -+ __builtin_aarch64_st1x4v2di ((__builtin_aarch64_simd_di *) __a, __u.__o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst2q_f32 (float32_t * __a, float32x4x2_t val) -+vst1q_p64_x4 (poly64_t * __a, poly64x2x4_t val) - { -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[1], 1); -- __builtin_aarch64_st2v4sf ((__builtin_aarch64_simd_sf *) __a, __o); -+ union { poly64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; -+ __builtin_aarch64_st1x4v2di ((__builtin_aarch64_simd_di *) __a, __u.__o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst2q_f64 (float64_t * __a, float64x2x2_t val) -+vst1_f64_x4 (float64_t * __a, float64x1x4_t val) - { -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[1], 1); -- __builtin_aarch64_st2v2df ((__builtin_aarch64_simd_df *) __a, __o); -+ union { float64x1x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; -+ __builtin_aarch64_st1x4df ((__builtin_aarch64_simd_df *) __a, __u.__o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst2q_p64 (poly64_t * __a, poly64x2x2_t val) -+vst1q_f64_x4 (float64_t * __a, float64x2x4_t val) - { -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv2di_ssps (__o, -- (poly64x2_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv2di_ssps (__o, -- (poly64x2_t) val.val[1], 1); -- __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o); -+ union { float64x2x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; -+ __builtin_aarch64_st1x4v2df ((__builtin_aarch64_simd_df *) __a, __u.__o); - } - -+/* vstn */ -+ - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst3_s64 (int64_t * __a, int64x1x3_t val) -+vst2_s64 (int64_t * __a, int64x1x2_t __val) - { -- __builtin_aarch64_simd_ci __o; -- int64x2x3_t temp; -- temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0))); -- temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); -- temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0))); -- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2); -- __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o); -+ __builtin_aarch64_simd_oi __o; -+ int64x2x2_t __temp; -+ __temp.val[0] = vcombine_s64 (__val.val[0], vcreate_s64 (__AARCH64_INT64_C (0))); -+ __temp.val[1] = vcombine_s64 (__val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); -+ __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[1], 1); -+ __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst3_u64 (uint64_t * __a, uint64x1x3_t val) -+vst2_u64 (uint64_t * __a, uint64x1x2_t __val) - { -- __builtin_aarch64_simd_ci __o; -- uint64x2x3_t temp; -- temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); -- temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2); -- __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o); -+ __builtin_aarch64_simd_oi __o; -+ uint64x2x2_t __temp; -+ __temp.val[0] = vcombine_u64 (__val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_u64 (__val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __temp.val[1], 1); -+ __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o); -+} -+ -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2_f64 (float64_t * __a, float64x1x2_t __val) -+{ -+ __builtin_aarch64_simd_oi __o; -+ float64x2x2_t __temp; -+ __temp.val[0] = vcombine_f64 (__val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_f64 (__val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __temp.val[1], 1); -+ __builtin_aarch64_st2df ((__builtin_aarch64_simd_df *) __a, __o); -+} -+ -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2_s8 (int8_t * __a, int8x8x2_t __val) -+{ -+ __builtin_aarch64_simd_oi __o; -+ int8x16x2_t __temp; -+ __temp.val[0] = vcombine_s8 (__val.val[0], vcreate_s8 (__AARCH64_INT64_C (0))); -+ __temp.val[1] = vcombine_s8 (__val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 1); -+ __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); -+} -+ -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2_p8 (poly8_t * __a, poly8x8x2_t __val) -+{ -+ __builtin_aarch64_simd_oi __o; -+ poly8x16x2_t __temp; -+ __temp.val[0] = vcombine_p8 (__val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_p8 (__val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 1); -+ __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); -+} -+ -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2_s16 (int16_t * __a, int16x4x2_t __val) -+{ -+ __builtin_aarch64_simd_oi __o; -+ int16x8x2_t __temp; -+ __temp.val[0] = vcombine_s16 (__val.val[0], vcreate_s16 (__AARCH64_INT64_C (0))); -+ __temp.val[1] = vcombine_s16 (__val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); -+ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[1], 1); -+ __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); -+} -+ -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2_p16 (poly16_t * __a, poly16x4x2_t __val) -+{ -+ __builtin_aarch64_simd_oi __o; -+ poly16x8x2_t __temp; -+ __temp.val[0] = vcombine_p16 (__val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_p16 (__val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[1], 1); -+ __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); -+} -+ -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2_s32 (int32_t * __a, int32x2x2_t __val) -+{ -+ __builtin_aarch64_simd_oi __o; -+ int32x4x2_t __temp; -+ __temp.val[0] = vcombine_s32 (__val.val[0], vcreate_s32 (__AARCH64_INT64_C (0))); -+ __temp.val[1] = vcombine_s32 (__val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); -+ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[1], 1); -+ __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o); -+} -+ -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2_u8 (uint8_t * __a, uint8x8x2_t __val) -+{ -+ __builtin_aarch64_simd_oi __o; -+ uint8x16x2_t __temp; -+ __temp.val[0] = vcombine_u8 (__val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_u8 (__val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __temp.val[1], 1); -+ __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); -+} -+ -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2_u16 (uint16_t * __a, uint16x4x2_t __val) -+{ -+ __builtin_aarch64_simd_oi __o; -+ uint16x8x2_t __temp; -+ __temp.val[0] = vcombine_u16 (__val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_u16 (__val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __temp.val[1], 1); -+ __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); -+} -+ -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2_u32 (uint32_t * __a, uint32x2x2_t __val) -+{ -+ __builtin_aarch64_simd_oi __o; -+ uint32x4x2_t __temp; -+ __temp.val[0] = vcombine_u32 (__val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_u32 (__val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __temp.val[1], 1); -+ __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o); -+} -+ -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2_f16 (float16_t * __a, float16x4x2_t __val) -+{ -+ __builtin_aarch64_simd_oi __o; -+ float16x8x2_t __temp; -+ __temp.val[0] = vcombine_f16 (__val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_f16 (__val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregoiv8hf (__o, __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv8hf (__o, __temp.val[1], 1); -+ __builtin_aarch64_st2v4hf (__a, __o); -+} -+ -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2_f32 (float32_t * __a, float32x2x2_t __val) -+{ -+ __builtin_aarch64_simd_oi __o; -+ float32x4x2_t __temp; -+ __temp.val[0] = vcombine_f32 (__val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_f32 (__val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __temp.val[1], 1); -+ __builtin_aarch64_st2v2sf ((__builtin_aarch64_simd_sf *) __a, __o); -+} -+ -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2_p64 (poly64_t * __a, poly64x1x2_t __val) -+{ -+ __builtin_aarch64_simd_oi __o; -+ poly64x2x2_t __temp; -+ __temp.val[0] = vcombine_p64 (__val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_p64 (__val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregoiv2di_ssps (__o, -+ (poly64x2_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv2di_ssps (__o, -+ (poly64x2_t) __temp.val[1], 1); -+ __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o); -+} -+ -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2q_s8 (int8_t * __a, int8x16x2_t __val) -+{ -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[1], 1); -+ __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o); -+} -+ -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2q_p8 (poly8_t * __a, poly8x16x2_t __val) -+{ -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[1], 1); -+ __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o); -+} -+ -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2q_s16 (int16_t * __a, int16x8x2_t __val) -+{ -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[1], 1); -+ __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o); -+} -+ -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2q_p16 (poly16_t * __a, poly16x8x2_t __val) -+{ -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[1], 1); -+ __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o); -+} -+ -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2q_s32 (int32_t * __a, int32x4x2_t __val) -+{ -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[1], 1); -+ __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o); -+} -+ -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2q_s64 (int64_t * __a, int64x2x2_t __val) -+{ -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[1], 1); -+ __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o); -+} -+ -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2q_u8 (uint8_t * __a, uint8x16x2_t __val) -+{ -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) __val.val[1], 1); -+ __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o); -+} -+ -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2q_u16 (uint16_t * __a, uint16x8x2_t __val) -+{ -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) __val.val[1], 1); -+ __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o); -+} -+ -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2q_u32 (uint32_t * __a, uint32x4x2_t __val) -+{ -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __val.val[1], 1); -+ __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o); -+} -+ -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2q_u64 (uint64_t * __a, uint64x2x2_t __val) -+{ -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) __val.val[1], 1); -+ __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o); -+} -+ -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2q_f16 (float16_t * __a, float16x8x2_t __val) -+{ -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_set_qregoiv8hf (__o, __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv8hf (__o, __val.val[1], 1); -+ __builtin_aarch64_st2v8hf (__a, __o); -+} -+ -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2q_f32 (float32_t * __a, float32x4x2_t __val) -+{ -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) __val.val[1], 1); -+ __builtin_aarch64_st2v4sf ((__builtin_aarch64_simd_sf *) __a, __o); -+} -+ -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2q_f64 (float64_t * __a, float64x2x2_t __val) -+{ -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) __val.val[1], 1); -+ __builtin_aarch64_st2v2df ((__builtin_aarch64_simd_df *) __a, __o); -+} -+ -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2q_p64 (poly64_t * __a, poly64x2x2_t __val) -+{ -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_set_qregoiv2di_ssps (__o, -+ (poly64x2_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv2di_ssps (__o, -+ (poly64x2_t) __val.val[1], 1); -+ __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o); -+} -+ -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst3_s64 (int64_t * __a, int64x1x3_t __val) -+{ -+ __builtin_aarch64_simd_ci __o; -+ int64x2x3_t __temp; -+ __temp.val[0] = vcombine_s64 (__val.val[0], vcreate_s64 (__AARCH64_INT64_C (0))); -+ __temp.val[1] = vcombine_s64 (__val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); -+ __temp.val[2] = vcombine_s64 (__val.val[2], vcreate_s64 (__AARCH64_INT64_C (0))); -+ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[2], 2); -+ __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o); -+} -+ -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst3_u64 (uint64_t * __a, uint64x1x3_t __val) -+{ -+ __builtin_aarch64_simd_ci __o; -+ uint64x2x3_t __temp; -+ __temp.val[0] = vcombine_u64 (__val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_u64 (__val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); -+ __temp.val[2] = vcombine_u64 (__val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __temp.val[2], 2); -+ __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst3_f64 (float64_t * __a, float64x1x3_t val) -+vst3_f64 (float64_t * __a, float64x1x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- float64x2x3_t temp; -- temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); -- temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[2], 2); -+ float64x2x3_t __temp; -+ __temp.val[0] = vcombine_f64 (__val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_f64 (__val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); -+ __temp.val[2] = vcombine_f64 (__val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __temp.val[2], 2); - __builtin_aarch64_st3df ((__builtin_aarch64_simd_df *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst3_s8 (int8_t * __a, int8x8x3_t val) -+vst3_s8 (int8_t * __a, int8x8x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- int8x16x3_t temp; -- temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0))); -- temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); -- temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0))); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2); -+ int8x16x3_t __temp; -+ __temp.val[0] = vcombine_s8 (__val.val[0], vcreate_s8 (__AARCH64_INT64_C (0))); -+ __temp.val[1] = vcombine_s8 (__val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); -+ __temp.val[2] = vcombine_s8 (__val.val[2], vcreate_s8 (__AARCH64_INT64_C (0))); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[2], 2); - __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst3_p8 (poly8_t * __a, poly8x8x3_t val) -+vst3_p8 (poly8_t * __a, poly8x8x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- poly8x16x3_t temp; -- temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); -- temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2); -+ poly8x16x3_t __temp; -+ __temp.val[0] = vcombine_p8 (__val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_p8 (__val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); -+ __temp.val[2] = vcombine_p8 (__val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[2], 2); - __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst3_s16 (int16_t * __a, int16x4x3_t val) -+vst3_s16 (int16_t * __a, int16x4x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- int16x8x3_t temp; -- temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0))); -- temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); -- temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0))); -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2); -+ int16x8x3_t __temp; -+ __temp.val[0] = vcombine_s16 (__val.val[0], vcreate_s16 (__AARCH64_INT64_C (0))); -+ __temp.val[1] = vcombine_s16 (__val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); -+ __temp.val[2] = vcombine_s16 (__val.val[2], vcreate_s16 (__AARCH64_INT64_C (0))); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[2], 2); - __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst3_p16 (poly16_t * __a, poly16x4x3_t val) -+vst3_p16 (poly16_t * __a, poly16x4x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- poly16x8x3_t temp; -- temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); -- temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2); -+ poly16x8x3_t __temp; -+ __temp.val[0] = vcombine_p16 (__val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_p16 (__val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); -+ __temp.val[2] = vcombine_p16 (__val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[2], 2); - __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst3_s32 (int32_t * __a, int32x2x3_t val) -+vst3_s32 (int32_t * __a, int32x2x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- int32x4x3_t temp; -- temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0))); -- temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); -- temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0))); -- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2); -+ int32x4x3_t __temp; -+ __temp.val[0] = vcombine_s32 (__val.val[0], vcreate_s32 (__AARCH64_INT64_C (0))); -+ __temp.val[1] = vcombine_s32 (__val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); -+ __temp.val[2] = vcombine_s32 (__val.val[2], vcreate_s32 (__AARCH64_INT64_C (0))); -+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[2], 2); - __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst3_u8 (uint8_t * __a, uint8x8x3_t val) -+vst3_u8 (uint8_t * __a, uint8x8x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- uint8x16x3_t temp; -- temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); -- temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2); -+ uint8x16x3_t __temp; -+ __temp.val[0] = vcombine_u8 (__val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_u8 (__val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); -+ __temp.val[2] = vcombine_u8 (__val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __temp.val[2], 2); - __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst3_u16 (uint16_t * __a, uint16x4x3_t val) -+vst3_u16 (uint16_t * __a, uint16x4x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- uint16x8x3_t temp; -- temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); -- temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2); -+ uint16x8x3_t __temp; -+ __temp.val[0] = vcombine_u16 (__val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_u16 (__val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); -+ __temp.val[2] = vcombine_u16 (__val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __temp.val[2], 2); - __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst3_u32 (uint32_t * __a, uint32x2x3_t val) -+vst3_u32 (uint32_t * __a, uint32x2x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- uint32x4x3_t temp; -- temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); -- temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2); -+ uint32x4x3_t __temp; -+ __temp.val[0] = vcombine_u32 (__val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_u32 (__val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); -+ __temp.val[2] = vcombine_u32 (__val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __temp.val[2], 2); - __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst3_f16 (float16_t * __a, float16x4x3_t val) -+vst3_f16 (float16_t * __a, float16x4x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- float16x8x3_t temp; -- temp.val[0] = vcombine_f16 (val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_f16 (val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0))); -- temp.val[2] = vcombine_f16 (val.val[2], vcreate_f16 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) temp.val[2], 2); -+ float16x8x3_t __temp; -+ __temp.val[0] = vcombine_f16 (__val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_f16 (__val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0))); -+ __temp.val[2] = vcombine_f16 (__val.val[2], vcreate_f16 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __temp.val[2], 2); - __builtin_aarch64_st3v4hf ((__builtin_aarch64_simd_hf *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst3_f32 (float32_t * __a, float32x2x3_t val) -+vst3_f32 (float32_t * __a, float32x2x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- float32x4x3_t temp; -- temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); -- temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[2], 2); -+ float32x4x3_t __temp; -+ __temp.val[0] = vcombine_f32 (__val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_f32 (__val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); -+ __temp.val[2] = vcombine_f32 (__val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __temp.val[2], 2); - __builtin_aarch64_st3v2sf ((__builtin_aarch64_simd_sf *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst3_p64 (poly64_t * __a, poly64x1x3_t val) -+vst3_p64 (poly64_t * __a, poly64x1x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- poly64x2x3_t temp; -- temp.val[0] = vcombine_p64 (val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_p64 (val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0))); -- temp.val[2] = vcombine_p64 (val.val[2], vcreate_p64 (__AARCH64_UINT64_C (0))); -+ poly64x2x3_t __temp; -+ __temp.val[0] = vcombine_p64 (__val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_p64 (__val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0))); -+ __temp.val[2] = vcombine_p64 (__val.val[2], vcreate_p64 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregciv2di_ssps (__o, -- (poly64x2_t) temp.val[0], 0); -+ (poly64x2_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregciv2di_ssps (__o, -- (poly64x2_t) temp.val[1], 1); -+ (poly64x2_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregciv2di_ssps (__o, -- (poly64x2_t) temp.val[2], 2); -+ (poly64x2_t) __temp.val[2], 2); - __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst3q_s8 (int8_t * __a, int8x16x3_t val) -+vst3q_s8 (int8_t * __a, int8x16x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[2], 2); - __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst3q_p8 (poly8_t * __a, poly8x16x3_t val) -+vst3q_p8 (poly8_t * __a, poly8x16x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[2], 2); - __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst3q_s16 (int16_t * __a, int16x8x3_t val) -+vst3q_s16 (int16_t * __a, int16x8x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[2], 2); - __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst3q_p16 (poly16_t * __a, poly16x8x3_t val) -+vst3q_p16 (poly16_t * __a, poly16x8x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[2], 2); - __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst3q_s32 (int32_t * __a, int32x4x3_t val) -+vst3q_s32 (int32_t * __a, int32x4x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2); -+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[2], 2); - __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst3q_s64 (int64_t * __a, int64x2x3_t val) -+vst3q_s64 (int64_t * __a, int64x2x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2); -+ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[2], 2); - __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst3q_u8 (uint8_t * __a, uint8x16x3_t val) -+vst3q_u8 (uint8_t * __a, uint8x16x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) __val.val[2], 2); - __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst3q_u16 (uint16_t * __a, uint16x8x3_t val) -+vst3q_u16 (uint16_t * __a, uint16x8x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) __val.val[2], 2); - __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst3q_u32 (uint32_t * __a, uint32x4x3_t val) -+vst3q_u32 (uint32_t * __a, uint32x4x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2); -+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __val.val[2], 2); - __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst3q_u64 (uint64_t * __a, uint64x2x3_t val) -+vst3q_u64 (uint64_t * __a, uint64x2x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2); -+ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) __val.val[2], 2); - __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst3q_f16 (float16_t * __a, float16x8x3_t val) -+vst3q_f16 (float16_t * __a, float16x8x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) val.val[2], 2); -+ __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __val.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) __val.val[2], 2); - __builtin_aarch64_st3v8hf ((__builtin_aarch64_simd_hf *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst3q_f32 (float32_t * __a, float32x4x3_t val) -+vst3q_f32 (float32_t * __a, float32x4x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[2], 2); -+ __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __val.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) __val.val[2], 2); - __builtin_aarch64_st3v4sf ((__builtin_aarch64_simd_sf *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst3q_f64 (float64_t * __a, float64x2x3_t val) -+vst3q_f64 (float64_t * __a, float64x2x3_t __val) - { - __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[2], 2); -+ __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __val.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) __val.val[2], 2); - __builtin_aarch64_st3v2df ((__builtin_aarch64_simd_df *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst3q_p64 (poly64_t * __a, poly64x2x3_t val) -+vst3q_p64 (poly64_t * __a, poly64x2x3_t __val) - { - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_set_qregciv2di_ssps (__o, -- (poly64x2_t) val.val[0], 0); -+ (poly64x2_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregciv2di_ssps (__o, -- (poly64x2_t) val.val[1], 1); -+ (poly64x2_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregciv2di_ssps (__o, -- (poly64x2_t) val.val[2], 2); -+ (poly64x2_t) __val.val[2], 2); - __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst4_s64 (int64_t * __a, int64x1x4_t val) -+vst4_s64 (int64_t * __a, int64x1x4_t __val) - { - __builtin_aarch64_simd_xi __o; -- int64x2x4_t temp; -- temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0))); -- temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); -- temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0))); -- temp.val[3] = vcombine_s64 (val.val[3], vcreate_s64 (__AARCH64_INT64_C (0))); -- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3); -+ int64x2x4_t __temp; -+ __temp.val[0] = vcombine_s64 (__val.val[0], vcreate_s64 (__AARCH64_INT64_C (0))); -+ __temp.val[1] = vcombine_s64 (__val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); -+ __temp.val[2] = vcombine_s64 (__val.val[2], vcreate_s64 (__AARCH64_INT64_C (0))); -+ __temp.val[3] = vcombine_s64 (__val.val[3], vcreate_s64 (__AARCH64_INT64_C (0))); -+ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[3], 3); - __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst4_u64 (uint64_t * __a, uint64x1x4_t val) -+vst4_u64 (uint64_t * __a, uint64x1x4_t __val) - { - __builtin_aarch64_simd_xi __o; -- uint64x2x4_t temp; -- temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); -- temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0))); -- temp.val[3] = vcombine_u64 (val.val[3], vcreate_u64 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3); -+ uint64x2x4_t __temp; -+ __temp.val[0] = vcombine_u64 (__val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_u64 (__val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); -+ __temp.val[2] = vcombine_u64 (__val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0))); -+ __temp.val[3] = vcombine_u64 (__val.val[3], vcreate_u64 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __temp.val[3], 3); - __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst4_f64 (float64_t * __a, float64x1x4_t val) -+vst4_f64 (float64_t * __a, float64x1x4_t __val) - { - __builtin_aarch64_simd_xi __o; -- float64x2x4_t temp; -- temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); -- temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0))); -- temp.val[3] = vcombine_f64 (val.val[3], vcreate_f64 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[3], 3); -+ float64x2x4_t __temp; -+ __temp.val[0] = vcombine_f64 (__val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_f64 (__val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); -+ __temp.val[2] = vcombine_f64 (__val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0))); -+ __temp.val[3] = vcombine_f64 (__val.val[3], vcreate_f64 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __temp.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __temp.val[3], 3); - __builtin_aarch64_st4df ((__builtin_aarch64_simd_df *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst4_s8 (int8_t * __a, int8x8x4_t val) -+vst4_s8 (int8_t * __a, int8x8x4_t __val) - { - __builtin_aarch64_simd_xi __o; -- int8x16x4_t temp; -- temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0))); -- temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); -- temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0))); -- temp.val[3] = vcombine_s8 (val.val[3], vcreate_s8 (__AARCH64_INT64_C (0))); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3); -+ int8x16x4_t __temp; -+ __temp.val[0] = vcombine_s8 (__val.val[0], vcreate_s8 (__AARCH64_INT64_C (0))); -+ __temp.val[1] = vcombine_s8 (__val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); -+ __temp.val[2] = vcombine_s8 (__val.val[2], vcreate_s8 (__AARCH64_INT64_C (0))); -+ __temp.val[3] = vcombine_s8 (__val.val[3], vcreate_s8 (__AARCH64_INT64_C (0))); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[3], 3); - __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst4_p8 (poly8_t * __a, poly8x8x4_t val) -+vst4_p8 (poly8_t * __a, poly8x8x4_t __val) - { - __builtin_aarch64_simd_xi __o; -- poly8x16x4_t temp; -- temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); -- temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0))); -- temp.val[3] = vcombine_p8 (val.val[3], vcreate_p8 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3); -+ poly8x16x4_t __temp; -+ __temp.val[0] = vcombine_p8 (__val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_p8 (__val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); -+ __temp.val[2] = vcombine_p8 (__val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0))); -+ __temp.val[3] = vcombine_p8 (__val.val[3], vcreate_p8 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[3], 3); - __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst4_s16 (int16_t * __a, int16x4x4_t val) -+vst4_s16 (int16_t * __a, int16x4x4_t __val) - { - __builtin_aarch64_simd_xi __o; -- int16x8x4_t temp; -- temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0))); -- temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); -- temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0))); -- temp.val[3] = vcombine_s16 (val.val[3], vcreate_s16 (__AARCH64_INT64_C (0))); -- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3); -+ int16x8x4_t __temp; -+ __temp.val[0] = vcombine_s16 (__val.val[0], vcreate_s16 (__AARCH64_INT64_C (0))); -+ __temp.val[1] = vcombine_s16 (__val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); -+ __temp.val[2] = vcombine_s16 (__val.val[2], vcreate_s16 (__AARCH64_INT64_C (0))); -+ __temp.val[3] = vcombine_s16 (__val.val[3], vcreate_s16 (__AARCH64_INT64_C (0))); -+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[3], 3); - __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst4_p16 (poly16_t * __a, poly16x4x4_t val) -+vst4_p16 (poly16_t * __a, poly16x4x4_t __val) - { - __builtin_aarch64_simd_xi __o; -- poly16x8x4_t temp; -- temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); -- temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0))); -- temp.val[3] = vcombine_p16 (val.val[3], vcreate_p16 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3); -+ poly16x8x4_t __temp; -+ __temp.val[0] = vcombine_p16 (__val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_p16 (__val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); -+ __temp.val[2] = vcombine_p16 (__val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0))); -+ __temp.val[3] = vcombine_p16 (__val.val[3], vcreate_p16 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[3], 3); - __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst4_s32 (int32_t * __a, int32x2x4_t val) -+vst4_s32 (int32_t * __a, int32x2x4_t __val) - { - __builtin_aarch64_simd_xi __o; -- int32x4x4_t temp; -- temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0))); -- temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); -- temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0))); -- temp.val[3] = vcombine_s32 (val.val[3], vcreate_s32 (__AARCH64_INT64_C (0))); -- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3); -+ int32x4x4_t __temp; -+ __temp.val[0] = vcombine_s32 (__val.val[0], vcreate_s32 (__AARCH64_INT64_C (0))); -+ __temp.val[1] = vcombine_s32 (__val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); -+ __temp.val[2] = vcombine_s32 (__val.val[2], vcreate_s32 (__AARCH64_INT64_C (0))); -+ __temp.val[3] = vcombine_s32 (__val.val[3], vcreate_s32 (__AARCH64_INT64_C (0))); -+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[3], 3); - __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst4_u8 (uint8_t * __a, uint8x8x4_t val) -+vst4_u8 (uint8_t * __a, uint8x8x4_t __val) - { - __builtin_aarch64_simd_xi __o; -- uint8x16x4_t temp; -- temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); -- temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0))); -- temp.val[3] = vcombine_u8 (val.val[3], vcreate_u8 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3); -+ uint8x16x4_t __temp; -+ __temp.val[0] = vcombine_u8 (__val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_u8 (__val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); -+ __temp.val[2] = vcombine_u8 (__val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0))); -+ __temp.val[3] = vcombine_u8 (__val.val[3], vcreate_u8 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __temp.val[3], 3); - __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst4_u16 (uint16_t * __a, uint16x4x4_t val) -+vst4_u16 (uint16_t * __a, uint16x4x4_t __val) - { - __builtin_aarch64_simd_xi __o; -- uint16x8x4_t temp; -- temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); -- temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0))); -- temp.val[3] = vcombine_u16 (val.val[3], vcreate_u16 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3); -+ uint16x8x4_t __temp; -+ __temp.val[0] = vcombine_u16 (__val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_u16 (__val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); -+ __temp.val[2] = vcombine_u16 (__val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0))); -+ __temp.val[3] = vcombine_u16 (__val.val[3], vcreate_u16 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __temp.val[3], 3); - __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst4_u32 (uint32_t * __a, uint32x2x4_t val) -+vst4_u32 (uint32_t * __a, uint32x2x4_t __val) - { - __builtin_aarch64_simd_xi __o; -- uint32x4x4_t temp; -- temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); -- temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0))); -- temp.val[3] = vcombine_u32 (val.val[3], vcreate_u32 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3); -+ uint32x4x4_t __temp; -+ __temp.val[0] = vcombine_u32 (__val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_u32 (__val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); -+ __temp.val[2] = vcombine_u32 (__val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0))); -+ __temp.val[3] = vcombine_u32 (__val.val[3], vcreate_u32 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __temp.val[3], 3); - __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst4_f16 (float16_t * __a, float16x4x4_t val) -+vst4_f16 (float16_t * __a, float16x4x4_t __val) - { - __builtin_aarch64_simd_xi __o; -- float16x8x4_t temp; -- temp.val[0] = vcombine_f16 (val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_f16 (val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0))); -- temp.val[2] = vcombine_f16 (val.val[2], vcreate_f16 (__AARCH64_UINT64_C (0))); -- temp.val[3] = vcombine_f16 (val.val[3], vcreate_f16 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) temp.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) temp.val[3], 3); -+ float16x8x4_t __temp; -+ __temp.val[0] = vcombine_f16 (__val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_f16 (__val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0))); -+ __temp.val[2] = vcombine_f16 (__val.val[2], vcreate_f16 (__AARCH64_UINT64_C (0))); -+ __temp.val[3] = vcombine_f16 (__val.val[3], vcreate_f16 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __temp.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __temp.val[3], 3); - __builtin_aarch64_st4v4hf ((__builtin_aarch64_simd_hf *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst4_f32 (float32_t * __a, float32x2x4_t val) -+vst4_f32 (float32_t * __a, float32x2x4_t __val) - { - __builtin_aarch64_simd_xi __o; -- float32x4x4_t temp; -- temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); -- temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0))); -- temp.val[3] = vcombine_f32 (val.val[3], vcreate_f32 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[3], 3); -+ float32x4x4_t __temp; -+ __temp.val[0] = vcombine_f32 (__val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_f32 (__val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); -+ __temp.val[2] = vcombine_f32 (__val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0))); -+ __temp.val[3] = vcombine_f32 (__val.val[3], vcreate_f32 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __temp.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __temp.val[3], 3); - __builtin_aarch64_st4v2sf ((__builtin_aarch64_simd_sf *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst4_p64 (poly64_t * __a, poly64x1x4_t val) -+vst4_p64 (poly64_t * __a, poly64x1x4_t __val) - { - __builtin_aarch64_simd_xi __o; -- poly64x2x4_t temp; -- temp.val[0] = vcombine_p64 (val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_p64 (val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0))); -- temp.val[2] = vcombine_p64 (val.val[2], vcreate_p64 (__AARCH64_UINT64_C (0))); -- temp.val[3] = vcombine_p64 (val.val[3], vcreate_p64 (__AARCH64_UINT64_C (0))); -+ poly64x2x4_t __temp; -+ __temp.val[0] = vcombine_p64 (__val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_p64 (__val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0))); -+ __temp.val[2] = vcombine_p64 (__val.val[2], vcreate_p64 (__AARCH64_UINT64_C (0))); -+ __temp.val[3] = vcombine_p64 (__val.val[3], vcreate_p64 (__AARCH64_UINT64_C (0))); - __o = __builtin_aarch64_set_qregxiv2di_ssps (__o, -- (poly64x2_t) temp.val[0], 0); -+ (poly64x2_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregxiv2di_ssps (__o, -- (poly64x2_t) temp.val[1], 1); -+ (poly64x2_t) __temp.val[1], 1); - __o = __builtin_aarch64_set_qregxiv2di_ssps (__o, -- (poly64x2_t) temp.val[2], 2); -+ (poly64x2_t) __temp.val[2], 2); - __o = __builtin_aarch64_set_qregxiv2di_ssps (__o, -- (poly64x2_t) temp.val[3], 3); -+ (poly64x2_t) __temp.val[3], 3); - __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst4q_s8 (int8_t * __a, int8x16x4_t val) -+vst4q_s8 (int8_t * __a, int8x16x4_t __val) - { - __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[3], 3); - __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst4q_p8 (poly8_t * __a, poly8x16x4_t val) -+vst4q_p8 (poly8_t * __a, poly8x16x4_t __val) - { - __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[3], 3); - __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst4q_s16 (int16_t * __a, int16x8x4_t val) -+vst4q_s16 (int16_t * __a, int16x8x4_t __val) - { - __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3); -+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[3], 3); - __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst4q_p16 (poly16_t * __a, poly16x8x4_t val) -+vst4q_p16 (poly16_t * __a, poly16x8x4_t __val) - { - __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3); -+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[3], 3); - __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst4q_s32 (int32_t * __a, int32x4x4_t val) -+vst4q_s32 (int32_t * __a, int32x4x4_t __val) - { - __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3); -+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __val.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __val.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __val.val[3], 3); - __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst4q_s64 (int64_t * __a, int64x2x4_t val) -+vst4q_s64 (int64_t * __a, int64x2x4_t __val) - { - __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3); -+ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __val.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __val.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __val.val[3], 3); - __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst4q_u8 (uint8_t * __a, uint8x16x4_t val) -+vst4q_u8 (uint8_t * __a, uint8x16x4_t __val) - { - __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) __val.val[3], 3); - __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst4q_u16 (uint16_t * __a, uint16x8x4_t val) -+vst4q_u16 (uint16_t * __a, uint16x8x4_t __val) - { - __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3); -+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) __val.val[3], 3); - __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst4q_u32 (uint32_t * __a, uint32x4x4_t val) -+vst4q_u32 (uint32_t * __a, uint32x4x4_t __val) - { - __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3); -+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __val.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __val.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __val.val[3], 3); - __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst4q_u64 (uint64_t * __a, uint64x2x4_t val) -+vst4q_u64 (uint64_t * __a, uint64x2x4_t __val) - { - __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3); -+ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __val.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __val.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) __val.val[3], 3); - __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst4q_f16 (float16_t * __a, float16x8x4_t val) -+vst4q_f16 (float16_t * __a, float16x8x4_t __val) - { - __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) val.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) val.val[3], 3); -+ __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __val.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __val.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) __val.val[3], 3); - __builtin_aarch64_st4v8hf ((__builtin_aarch64_simd_hf *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst4q_f32 (float32_t * __a, float32x4x4_t val) -+vst4q_f32 (float32_t * __a, float32x4x4_t __val) - { - __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[3], 3); -+ __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __val.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __val.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) __val.val[3], 3); - __builtin_aarch64_st4v4sf ((__builtin_aarch64_simd_sf *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst4q_f64 (float64_t * __a, float64x2x4_t val) -+vst4q_f64 (float64_t * __a, float64x2x4_t __val) - { - __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[3], 3); -+ __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __val.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __val.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) __val.val[3], 3); - __builtin_aarch64_st4v2df ((__builtin_aarch64_simd_df *) __a, __o); - } - - __extension__ extern __inline void - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vst4q_p64 (poly64_t * __a, poly64x2x4_t val) -+vst4q_p64 (poly64_t * __a, poly64x2x4_t __val) - { - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_set_qregxiv2di_ssps (__o, -- (poly64x2_t) val.val[0], 0); -+ (poly64x2_t) __val.val[0], 0); - __o = __builtin_aarch64_set_qregxiv2di_ssps (__o, -- (poly64x2_t) val.val[1], 1); -+ (poly64x2_t) __val.val[1], 1); - __o = __builtin_aarch64_set_qregxiv2di_ssps (__o, -- (poly64x2_t) val.val[2], 2); -+ (poly64x2_t) __val.val[2], 2); - __o = __builtin_aarch64_set_qregxiv2di_ssps (__o, -- (poly64x2_t) val.val[3], 3); -+ (poly64x2_t) __val.val[3], 3); - __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o); - } - -@@ -29796,53 +30356,53 @@ __extension__ extern __inline int8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtbx4_s8 (int8x8_t __r, int8x8x4_t __tab, int8x8_t __idx) - { -- int8x8_t result; -- int8x16x2_t temp; -+ int8x8_t __result; -+ int8x16x2_t __temp; - __builtin_aarch64_simd_oi __o; -- temp.val[0] = vcombine_s8 (__tab.val[0], __tab.val[1]); -- temp.val[1] = vcombine_s8 (__tab.val[2], __tab.val[3]); -+ __temp.val[0] = vcombine_s8 (__tab.val[0], __tab.val[1]); -+ __temp.val[1] = vcombine_s8 (__tab.val[2], __tab.val[3]); - __o = __builtin_aarch64_set_qregoiv16qi (__o, -- (int8x16_t) temp.val[0], 0); -+ (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, -- (int8x16_t) temp.val[1], 1); -- result = __builtin_aarch64_tbx4v8qi (__r, __o, __idx); -- return result; -+ (int8x16_t) __temp.val[1], 1); -+ __result = __builtin_aarch64_tbx4v8qi (__r, __o, __idx); -+ return __result; - } - - __extension__ extern __inline uint8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtbx4_u8 (uint8x8_t __r, uint8x8x4_t __tab, uint8x8_t __idx) - { -- uint8x8_t result; -- uint8x16x2_t temp; -+ uint8x8_t __result; -+ uint8x16x2_t __temp; - __builtin_aarch64_simd_oi __o; -- temp.val[0] = vcombine_u8 (__tab.val[0], __tab.val[1]); -- temp.val[1] = vcombine_u8 (__tab.val[2], __tab.val[3]); -+ __temp.val[0] = vcombine_u8 (__tab.val[0], __tab.val[1]); -+ __temp.val[1] = vcombine_u8 (__tab.val[2], __tab.val[3]); - __o = __builtin_aarch64_set_qregoiv16qi (__o, -- (int8x16_t) temp.val[0], 0); -+ (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, -- (int8x16_t) temp.val[1], 1); -- result = (uint8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o, -+ (int8x16_t) __temp.val[1], 1); -+ __result = (uint8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o, - (int8x8_t)__idx); -- return result; -+ return __result; - } - - __extension__ extern __inline poly8x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtbx4_p8 (poly8x8_t __r, poly8x8x4_t __tab, uint8x8_t __idx) - { -- poly8x8_t result; -- poly8x16x2_t temp; -+ poly8x8_t __result; -+ poly8x16x2_t __temp; - __builtin_aarch64_simd_oi __o; -- temp.val[0] = vcombine_p8 (__tab.val[0], __tab.val[1]); -- temp.val[1] = vcombine_p8 (__tab.val[2], __tab.val[3]); -+ __temp.val[0] = vcombine_p8 (__tab.val[0], __tab.val[1]); -+ __temp.val[1] = vcombine_p8 (__tab.val[2], __tab.val[3]); - __o = __builtin_aarch64_set_qregoiv16qi (__o, -- (int8x16_t) temp.val[0], 0); -+ (int8x16_t) __temp.val[0], 0); - __o = __builtin_aarch64_set_qregoiv16qi (__o, -- (int8x16_t) temp.val[1], 1); -- result = (poly8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o, -+ (int8x16_t) __temp.val[1], 1); -+ __result = (poly8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o, - (int8x8_t)__idx); -- return result; -+ return __result; - } - - /* vtrn */ -@@ -30374,65 +30934,65 @@ vtrn_f16 (float16x4_t __a, float16x4_t __b) - - __extension__ extern __inline float32x2x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vtrn_f32 (float32x2_t a, float32x2_t b) -+vtrn_f32 (float32x2_t __a, float32x2_t __b) - { -- return (float32x2x2_t) {vtrn1_f32 (a, b), vtrn2_f32 (a, b)}; -+ return (float32x2x2_t) {vtrn1_f32 (__a, __b), vtrn2_f32 (__a, __b)}; - } - - __extension__ extern __inline poly8x8x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vtrn_p8 (poly8x8_t a, poly8x8_t b) -+vtrn_p8 (poly8x8_t __a, poly8x8_t __b) - { -- return (poly8x8x2_t) {vtrn1_p8 (a, b), vtrn2_p8 (a, b)}; -+ return (poly8x8x2_t) {vtrn1_p8 (__a, __b), vtrn2_p8 (__a, __b)}; - } - - __extension__ extern __inline poly16x4x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vtrn_p16 (poly16x4_t a, poly16x4_t b) -+vtrn_p16 (poly16x4_t __a, poly16x4_t __b) - { -- return (poly16x4x2_t) {vtrn1_p16 (a, b), vtrn2_p16 (a, b)}; -+ return (poly16x4x2_t) {vtrn1_p16 (__a, __b), vtrn2_p16 (__a, __b)}; - } - - __extension__ extern __inline int8x8x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vtrn_s8 (int8x8_t a, int8x8_t b) -+vtrn_s8 (int8x8_t __a, int8x8_t __b) - { -- return (int8x8x2_t) {vtrn1_s8 (a, b), vtrn2_s8 (a, b)}; -+ return (int8x8x2_t) {vtrn1_s8 (__a, __b), vtrn2_s8 (__a, __b)}; - } - - __extension__ extern __inline int16x4x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vtrn_s16 (int16x4_t a, int16x4_t b) -+vtrn_s16 (int16x4_t __a, int16x4_t __b) - { -- return (int16x4x2_t) {vtrn1_s16 (a, b), vtrn2_s16 (a, b)}; -+ return (int16x4x2_t) {vtrn1_s16 (__a, __b), vtrn2_s16 (__a, __b)}; - } - - __extension__ extern __inline int32x2x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vtrn_s32 (int32x2_t a, int32x2_t b) -+vtrn_s32 (int32x2_t __a, int32x2_t __b) - { -- return (int32x2x2_t) {vtrn1_s32 (a, b), vtrn2_s32 (a, b)}; -+ return (int32x2x2_t) {vtrn1_s32 (__a, __b), vtrn2_s32 (__a, __b)}; - } - - __extension__ extern __inline uint8x8x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vtrn_u8 (uint8x8_t a, uint8x8_t b) -+vtrn_u8 (uint8x8_t __a, uint8x8_t __b) - { -- return (uint8x8x2_t) {vtrn1_u8 (a, b), vtrn2_u8 (a, b)}; -+ return (uint8x8x2_t) {vtrn1_u8 (__a, __b), vtrn2_u8 (__a, __b)}; - } - - __extension__ extern __inline uint16x4x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vtrn_u16 (uint16x4_t a, uint16x4_t b) -+vtrn_u16 (uint16x4_t __a, uint16x4_t __b) - { -- return (uint16x4x2_t) {vtrn1_u16 (a, b), vtrn2_u16 (a, b)}; -+ return (uint16x4x2_t) {vtrn1_u16 (__a, __b), vtrn2_u16 (__a, __b)}; - } - - __extension__ extern __inline uint32x2x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vtrn_u32 (uint32x2_t a, uint32x2_t b) -+vtrn_u32 (uint32x2_t __a, uint32x2_t __b) - { -- return (uint32x2x2_t) {vtrn1_u32 (a, b), vtrn2_u32 (a, b)}; -+ return (uint32x2x2_t) {vtrn1_u32 (__a, __b), vtrn2_u32 (__a, __b)}; - } - - __extension__ extern __inline float16x8x2_t -@@ -30444,65 +31004,65 @@ vtrnq_f16 (float16x8_t __a, float16x8_t __b) - - __extension__ extern __inline float32x4x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vtrnq_f32 (float32x4_t a, float32x4_t b) -+vtrnq_f32 (float32x4_t __a, float32x4_t __b) - { -- return (float32x4x2_t) {vtrn1q_f32 (a, b), vtrn2q_f32 (a, b)}; -+ return (float32x4x2_t) {vtrn1q_f32 (__a, __b), vtrn2q_f32 (__a, __b)}; - } - - __extension__ extern __inline poly8x16x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vtrnq_p8 (poly8x16_t a, poly8x16_t b) -+vtrnq_p8 (poly8x16_t __a, poly8x16_t __b) - { -- return (poly8x16x2_t) {vtrn1q_p8 (a, b), vtrn2q_p8 (a, b)}; -+ return (poly8x16x2_t) {vtrn1q_p8 (__a, __b), vtrn2q_p8 (__a, __b)}; - } - - __extension__ extern __inline poly16x8x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vtrnq_p16 (poly16x8_t a, poly16x8_t b) -+vtrnq_p16 (poly16x8_t __a, poly16x8_t __b) - { -- return (poly16x8x2_t) {vtrn1q_p16 (a, b), vtrn2q_p16 (a, b)}; -+ return (poly16x8x2_t) {vtrn1q_p16 (__a, __b), vtrn2q_p16 (__a, __b)}; - } - - __extension__ extern __inline int8x16x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vtrnq_s8 (int8x16_t a, int8x16_t b) -+vtrnq_s8 (int8x16_t __a, int8x16_t __b) - { -- return (int8x16x2_t) {vtrn1q_s8 (a, b), vtrn2q_s8 (a, b)}; -+ return (int8x16x2_t) {vtrn1q_s8 (__a, __b), vtrn2q_s8 (__a, __b)}; - } - - __extension__ extern __inline int16x8x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vtrnq_s16 (int16x8_t a, int16x8_t b) -+vtrnq_s16 (int16x8_t __a, int16x8_t __b) - { -- return (int16x8x2_t) {vtrn1q_s16 (a, b), vtrn2q_s16 (a, b)}; -+ return (int16x8x2_t) {vtrn1q_s16 (__a, __b), vtrn2q_s16 (__a, __b)}; - } - - __extension__ extern __inline int32x4x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vtrnq_s32 (int32x4_t a, int32x4_t b) -+vtrnq_s32 (int32x4_t __a, int32x4_t __b) - { -- return (int32x4x2_t) {vtrn1q_s32 (a, b), vtrn2q_s32 (a, b)}; -+ return (int32x4x2_t) {vtrn1q_s32 (__a, __b), vtrn2q_s32 (__a, __b)}; - } - - __extension__ extern __inline uint8x16x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vtrnq_u8 (uint8x16_t a, uint8x16_t b) -+vtrnq_u8 (uint8x16_t __a, uint8x16_t __b) - { -- return (uint8x16x2_t) {vtrn1q_u8 (a, b), vtrn2q_u8 (a, b)}; -+ return (uint8x16x2_t) {vtrn1q_u8 (__a, __b), vtrn2q_u8 (__a, __b)}; - } - - __extension__ extern __inline uint16x8x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vtrnq_u16 (uint16x8_t a, uint16x8_t b) -+vtrnq_u16 (uint16x8_t __a, uint16x8_t __b) - { -- return (uint16x8x2_t) {vtrn1q_u16 (a, b), vtrn2q_u16 (a, b)}; -+ return (uint16x8x2_t) {vtrn1q_u16 (__a, __b), vtrn2q_u16 (__a, __b)}; - } - - __extension__ extern __inline uint32x4x2_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vtrnq_u32 (uint32x4_t a, uint32x4_t b) -+vtrnq_u32 (uint32x4_t __a, uint32x4_t __b) - { -- return (uint32x4x2_t) {vtrn1q_u32 (a, b), vtrn2q_u32 (a, b)}; -+ return (uint32x4x2_t) {vtrn1q_u32 (__a, __b), vtrn2q_u32 (__a, __b)}; - } - - /* vtst */ -@@ -32200,30 +32760,30 @@ vrndxq_f16 (float16x8_t __a) - - __extension__ extern __inline float16x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vrsqrte_f16 (float16x4_t a) -+vrsqrte_f16 (float16x4_t __a) - { -- return __builtin_aarch64_rsqrtev4hf (a); -+ return __builtin_aarch64_rsqrtev4hf (__a); - } - - __extension__ extern __inline float16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vrsqrteq_f16 (float16x8_t a) -+vrsqrteq_f16 (float16x8_t __a) - { -- return __builtin_aarch64_rsqrtev8hf (a); -+ return __builtin_aarch64_rsqrtev8hf (__a); - } - - __extension__ extern __inline float16x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vsqrt_f16 (float16x4_t a) -+vsqrt_f16 (float16x4_t __a) - { -- return __builtin_aarch64_sqrtv4hf (a); -+ return __builtin_aarch64_sqrtv4hf (__a); - } - - __extension__ extern __inline float16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vsqrtq_f16 (float16x8_t a) -+vsqrtq_f16 (float16x8_t __a) - { -- return __builtin_aarch64_sqrtv8hf (a); -+ return __builtin_aarch64_sqrtv8hf (__a); - } - - /* ARMv8.2-A FP16 two operands vector intrinsics. */ -@@ -32244,16 +32804,16 @@ vaddq_f16 (float16x8_t __a, float16x8_t __b) - - __extension__ extern __inline float16x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vabd_f16 (float16x4_t a, float16x4_t b) -+vabd_f16 (float16x4_t __a, float16x4_t __b) - { -- return __builtin_aarch64_fabdv4hf (a, b); -+ return __builtin_aarch64_fabdv4hf (__a, __b); - } - - __extension__ extern __inline float16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vabdq_f16 (float16x8_t a, float16x8_t b) -+vabdq_f16 (float16x8_t __a, float16x8_t __b) - { -- return __builtin_aarch64_fabdv8hf (a, b); -+ return __builtin_aarch64_fabdv8hf (__a, __b); - } - - __extension__ extern __inline uint16x4_t -@@ -32538,72 +33098,72 @@ vmulxq_f16 (float16x8_t __a, float16x8_t __b) - - __extension__ extern __inline float16x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpadd_f16 (float16x4_t a, float16x4_t b) -+vpadd_f16 (float16x4_t __a, float16x4_t __b) - { -- return __builtin_aarch64_faddpv4hf (a, b); -+ return __builtin_aarch64_faddpv4hf (__a, __b); - } - - __extension__ extern __inline float16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpaddq_f16 (float16x8_t a, float16x8_t b) -+vpaddq_f16 (float16x8_t __a, float16x8_t __b) - { -- return __builtin_aarch64_faddpv8hf (a, b); -+ return __builtin_aarch64_faddpv8hf (__a, __b); - } - - __extension__ extern __inline float16x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpmax_f16 (float16x4_t a, float16x4_t b) -+vpmax_f16 (float16x4_t __a, float16x4_t __b) - { -- return __builtin_aarch64_smax_nanpv4hf (a, b); -+ return __builtin_aarch64_smax_nanpv4hf (__a, __b); - } - - __extension__ extern __inline float16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpmaxq_f16 (float16x8_t a, float16x8_t b) -+vpmaxq_f16 (float16x8_t __a, float16x8_t __b) - { -- return __builtin_aarch64_smax_nanpv8hf (a, b); -+ return __builtin_aarch64_smax_nanpv8hf (__a, __b); - } - - __extension__ extern __inline float16x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpmaxnm_f16 (float16x4_t a, float16x4_t b) -+vpmaxnm_f16 (float16x4_t __a, float16x4_t __b) - { -- return __builtin_aarch64_smaxpv4hf (a, b); -+ return __builtin_aarch64_smaxpv4hf (__a, __b); - } - - __extension__ extern __inline float16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpmaxnmq_f16 (float16x8_t a, float16x8_t b) -+vpmaxnmq_f16 (float16x8_t __a, float16x8_t __b) - { -- return __builtin_aarch64_smaxpv8hf (a, b); -+ return __builtin_aarch64_smaxpv8hf (__a, __b); - } - - __extension__ extern __inline float16x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpmin_f16 (float16x4_t a, float16x4_t b) -+vpmin_f16 (float16x4_t __a, float16x4_t __b) - { -- return __builtin_aarch64_smin_nanpv4hf (a, b); -+ return __builtin_aarch64_smin_nanpv4hf (__a, __b); - } - - __extension__ extern __inline float16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpminq_f16 (float16x8_t a, float16x8_t b) -+vpminq_f16 (float16x8_t __a, float16x8_t __b) - { -- return __builtin_aarch64_smin_nanpv8hf (a, b); -+ return __builtin_aarch64_smin_nanpv8hf (__a, __b); - } - - __extension__ extern __inline float16x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpminnm_f16 (float16x4_t a, float16x4_t b) -+vpminnm_f16 (float16x4_t __a, float16x4_t __b) - { -- return __builtin_aarch64_sminpv4hf (a, b); -+ return __builtin_aarch64_sminpv4hf (__a, __b); - } - - __extension__ extern __inline float16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vpminnmq_f16 (float16x8_t a, float16x8_t b) -+vpminnmq_f16 (float16x8_t __a, float16x8_t __b) - { -- return __builtin_aarch64_sminpv8hf (a, b); -+ return __builtin_aarch64_sminpv8hf (__a, __b); - } - - __extension__ extern __inline float16x4_t -@@ -32622,16 +33182,16 @@ vrecpsq_f16 (float16x8_t __a, float16x8_t __b) - - __extension__ extern __inline float16x4_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vrsqrts_f16 (float16x4_t a, float16x4_t b) -+vrsqrts_f16 (float16x4_t __a, float16x4_t __b) - { -- return __builtin_aarch64_rsqrtsv4hf (a, b); -+ return __builtin_aarch64_rsqrtsv4hf (__a, __b); - } - - __extension__ extern __inline float16x8_t - __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) --vrsqrtsq_f16 (float16x8_t a, float16x8_t b) -+vrsqrtsq_f16 (float16x8_t __a, float16x8_t __b) - { -- return __builtin_aarch64_rsqrtsv8hf (a, b); -+ return __builtin_aarch64_rsqrtsv8hf (__a, __b); - } - - __extension__ extern __inline float16x4_t -@@ -33961,6 +34521,1308 @@ vfmlslq_laneq_high_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b, - - #pragma GCC pop_options - -+#pragma GCC push_options -+#pragma GCC target ("arch=armv8.5-a") -+ -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrnd32z_f32 (float32x2_t __a) -+{ -+ return __builtin_aarch64_frint32zv2sf (__a); -+} -+ -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrnd32zq_f32 (float32x4_t __a) -+{ -+ return __builtin_aarch64_frint32zv4sf (__a); -+} -+ -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrnd32z_f64 (float64x1_t __a) -+{ -+ return (float64x1_t) -+ {__builtin_aarch64_frint32zdf (vget_lane_f64 (__a, 0))}; -+} -+ -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrnd32zq_f64 (float64x2_t __a) -+{ -+ return __builtin_aarch64_frint32zv2df (__a); -+} -+ -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrnd32x_f32 (float32x2_t __a) -+{ -+ return __builtin_aarch64_frint32xv2sf (__a); -+} -+ -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrnd32xq_f32 (float32x4_t __a) -+{ -+ return __builtin_aarch64_frint32xv4sf (__a); -+} -+ -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrnd32x_f64 (float64x1_t __a) -+{ -+ return (float64x1_t) {__builtin_aarch64_frint32xdf (vget_lane_f64 (__a, 0))}; -+} -+ -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrnd32xq_f64 (float64x2_t __a) -+{ -+ return __builtin_aarch64_frint32xv2df (__a); -+} -+ -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrnd64z_f32 (float32x2_t __a) -+{ -+ return __builtin_aarch64_frint64zv2sf (__a); -+} -+ -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrnd64zq_f32 (float32x4_t __a) -+{ -+ return __builtin_aarch64_frint64zv4sf (__a); -+} -+ -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrnd64z_f64 (float64x1_t __a) -+{ -+ return (float64x1_t) {__builtin_aarch64_frint64zdf (vget_lane_f64 (__a, 0))}; -+} -+ -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrnd64zq_f64 (float64x2_t __a) -+{ -+ return __builtin_aarch64_frint64zv2df (__a); -+} -+ -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrnd64x_f32 (float32x2_t __a) -+{ -+ return __builtin_aarch64_frint64xv2sf (__a); -+} -+ -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrnd64xq_f32 (float32x4_t __a) -+{ -+ return __builtin_aarch64_frint64xv4sf (__a); -+} -+ -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrnd64x_f64 (float64x1_t __a) -+{ -+ return (float64x1_t) {__builtin_aarch64_frint64xdf (vget_lane_f64 (__a, 0))}; -+} -+ -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrnd64xq_f64 (float64x2_t __a) -+{ -+ return __builtin_aarch64_frint64xv2df (__a); -+} -+ -+#pragma GCC pop_options -+ -+#include "arm_bf16.h" -+ -+#pragma GCC push_options -+#pragma GCC target ("arch=armv8.2-a+bf16") -+ -+__extension__ extern __inline bfloat16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vset_lane_bf16 (bfloat16_t __elem, bfloat16x4_t __vec, const int __index) -+{ -+ return __aarch64_vset_lane_any (__elem, __vec, __index); -+} -+ -+__extension__ extern __inline bfloat16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsetq_lane_bf16 (bfloat16_t __elem, bfloat16x8_t __vec, const int __index) -+{ -+ return __aarch64_vset_lane_any (__elem, __vec, __index); -+} -+ -+__extension__ extern __inline bfloat16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vget_lane_bf16 (bfloat16x4_t __a, const int __b) -+{ -+ return __aarch64_vget_lane_any (__a, __b); -+} -+ -+__extension__ extern __inline bfloat16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vgetq_lane_bf16 (bfloat16x8_t __a, const int __b) -+{ -+ return __aarch64_vget_lane_any (__a, __b); -+} -+ -+__extension__ extern __inline bfloat16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcreate_bf16 (uint64_t __a) -+{ -+ return (bfloat16x4_t) __a; -+} -+ -+__extension__ extern __inline bfloat16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcombine_bf16 (bfloat16x4_t __a, bfloat16x4_t __b) -+{ -+ return (bfloat16x8_t)__builtin_aarch64_combinev4bf (__a, __b); -+} -+ -+/* vdup */ -+ -+__extension__ extern __inline bfloat16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdup_n_bf16 (bfloat16_t __a) -+{ -+ return (bfloat16x4_t) {__a, __a, __a, __a}; -+} -+ -+__extension__ extern __inline bfloat16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupq_n_bf16 (bfloat16_t __a) -+{ -+ return (bfloat16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a}; -+} -+ -+__extension__ extern __inline bfloat16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdup_lane_bf16 (bfloat16x4_t __a, const int __b) -+{ -+ return vdup_n_bf16 (__aarch64_vget_lane_any (__a, __b)); -+} -+ -+__extension__ extern __inline bfloat16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdup_laneq_bf16 (bfloat16x8_t __a, const int __b) -+{ -+ return vdup_n_bf16 (__aarch64_vget_lane_any (__a, __b)); -+} -+ -+__extension__ extern __inline bfloat16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupq_lane_bf16 (bfloat16x4_t __a, const int __b) -+{ -+ return vdupq_n_bf16 (__aarch64_vget_lane_any (__a, __b)); -+} -+ -+__extension__ extern __inline bfloat16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupq_laneq_bf16 (bfloat16x8_t __a, const int __b) -+{ -+ return vdupq_n_bf16 (__aarch64_vget_lane_any (__a, __b)); -+} -+ -+__extension__ extern __inline bfloat16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vduph_lane_bf16 (bfloat16x4_t __a, const int __b) -+{ -+ return __aarch64_vget_lane_any (__a, __b); -+} -+ -+__extension__ extern __inline bfloat16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vduph_laneq_bf16 (bfloat16x8_t __a, const int __b) -+{ -+ return __aarch64_vget_lane_any (__a, __b); -+} -+ -+/* vld */ -+ -+__extension__ extern __inline bfloat16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_bf16 (const bfloat16_t *__a) -+{ -+ return (bfloat16x4_t) __builtin_aarch64_ld1v4bf (__a); -+} -+ -+__extension__ extern __inline bfloat16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_bf16 (const bfloat16_t *__a) -+{ -+ return __builtin_aarch64_ld1v8bf (__a); -+} -+ -+__extension__ extern __inline bfloat16x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_bf16_x2 (const bfloat16_t *__a) -+{ -+ bfloat16x4x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld1x2v4bf ((const __builtin_aarch64_simd_bf *) __a); -+ ret.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregoiv4bf (__o, 0); -+ ret.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregoiv4bf (__o, 1); -+ return ret; -+} -+ -+__extension__ extern __inline bfloat16x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_bf16_x2 (const bfloat16_t *__a) -+{ -+ bfloat16x8x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld1x2v8bf ((const __builtin_aarch64_simd_bf *) __a); -+ ret.val[0] = (bfloat16x8_t) __builtin_aarch64_get_qregoiv8bf (__o, 0); -+ ret.val[1] = (bfloat16x8_t) __builtin_aarch64_get_qregoiv8bf (__o, 1); -+ return ret; -+} -+ -+__extension__ extern __inline bfloat16x4x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_bf16_x3 (const bfloat16_t *__a) -+{ -+ bfloat16x4x3_t __i; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld1x3v4bf ((const __builtin_aarch64_simd_bf *) __a); -+ __i.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregciv4bf (__o, 0); -+ __i.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregciv4bf (__o, 1); -+ __i.val[2] = (bfloat16x4_t) __builtin_aarch64_get_dregciv4bf (__o, 2); -+ return __i; -+} -+ -+__extension__ extern __inline bfloat16x8x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_bf16_x3 (const bfloat16_t *__a) -+{ -+ bfloat16x8x3_t __i; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld1x3v8bf ((const __builtin_aarch64_simd_bf *) __a); -+ __i.val[0] = (bfloat16x8_t) __builtin_aarch64_get_qregciv8bf (__o, 0); -+ __i.val[1] = (bfloat16x8_t) __builtin_aarch64_get_qregciv8bf (__o, 1); -+ __i.val[2] = (bfloat16x8_t) __builtin_aarch64_get_qregciv8bf (__o, 2); -+ return __i; -+} -+__extension__ extern __inline bfloat16x4x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_bf16_x4 (const bfloat16_t *__a) -+{ -+ union { bfloat16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __au; -+ __au.__o -+ = __builtin_aarch64_ld1x4v4bf ((const __builtin_aarch64_simd_bf *) __a); -+ return __au.__i; -+} -+ -+__extension__ extern __inline bfloat16x8x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_bf16_x4 (const bfloat16_t *__a) -+{ -+ union { bfloat16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __au; -+ __au.__o -+ = __builtin_aarch64_ld1x4v8bf ((const __builtin_aarch64_simd_bf *) __a); -+ return __au.__i; -+} -+ -+__extension__ extern __inline bfloat16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_lane_bf16 (const bfloat16_t *__src, bfloat16x4_t __vec, const int __lane) -+{ -+ return __aarch64_vset_lane_any (*__src, __vec, __lane); -+} -+ -+__extension__ extern __inline bfloat16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_lane_bf16 (const bfloat16_t *__src, bfloat16x8_t __vec, const int __lane) -+{ -+ return __aarch64_vset_lane_any (*__src, __vec, __lane); -+} -+ -+__extension__ extern __inline bfloat16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_dup_bf16 (const bfloat16_t* __a) -+{ -+ return vdup_n_bf16 (*__a); -+} -+ -+__extension__ extern __inline bfloat16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_dup_bf16 (const bfloat16_t* __a) -+{ -+ return vdupq_n_bf16 (*__a); -+} -+ -+__extension__ extern __inline bfloat16x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2_bf16 (const bfloat16_t * __a) -+{ -+ bfloat16x4x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2v4bf (__a); -+ ret.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregoiv4bf (__o, 0); -+ ret.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregoiv4bf (__o, 1); -+ return ret; -+} -+ -+__extension__ extern __inline bfloat16x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2q_bf16 (const bfloat16_t * __a) -+{ -+ bfloat16x8x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2v8bf ((const __builtin_aarch64_simd_bf *) __a); -+ ret.val[0] = (bfloat16x8_t) __builtin_aarch64_get_qregoiv8bf (__o, 0); -+ ret.val[1] = (bfloat16x8_t) __builtin_aarch64_get_qregoiv8bf (__o, 1); -+ return ret; -+} -+ -+__extension__ extern __inline bfloat16x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2_dup_bf16 (const bfloat16_t * __a) -+{ -+ bfloat16x4x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rv4bf ((const __builtin_aarch64_simd_bf *) __a); -+ ret.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregoiv4bf (__o, 0); -+ ret.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregoiv4bf (__o, 1); -+ return ret; -+} -+ -+__extension__ extern __inline bfloat16x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2q_dup_bf16 (const bfloat16_t * __a) -+{ -+ bfloat16x8x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rv8bf ((const __builtin_aarch64_simd_bf *) __a); -+ ret.val[0] = (bfloat16x8_t) __builtin_aarch64_get_qregoiv8bf (__o, 0); -+ ret.val[1] = (bfloat16x8_t) __builtin_aarch64_get_qregoiv8bf (__o, 1); -+ return ret; -+} -+ -+__extension__ extern __inline bfloat16x4x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3_bf16 (const bfloat16_t * __a) -+{ -+ bfloat16x4x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3v4bf ((const __builtin_aarch64_simd_bf *) __a); -+ ret.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregciv4bf (__o, 0); -+ ret.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregciv4bf (__o, 1); -+ ret.val[2] = (bfloat16x4_t) __builtin_aarch64_get_dregciv4bf (__o, 2); -+ return ret; -+} -+ -+__extension__ extern __inline bfloat16x8x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3q_bf16 (const bfloat16_t * __a) -+{ -+ bfloat16x8x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3v8bf ((const __builtin_aarch64_simd_bf *) __a); -+ ret.val[0] = (bfloat16x8_t) __builtin_aarch64_get_qregciv8bf (__o, 0); -+ ret.val[1] = (bfloat16x8_t) __builtin_aarch64_get_qregciv8bf (__o, 1); -+ ret.val[2] = (bfloat16x8_t) __builtin_aarch64_get_qregciv8bf (__o, 2); -+ return ret; -+} -+ -+__extension__ extern __inline bfloat16x4x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3_dup_bf16 (const bfloat16_t * __a) -+{ -+ bfloat16x4x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rv4bf ((const __builtin_aarch64_simd_bf *) __a); -+ ret.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregciv4bf (__o, 0); -+ ret.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregciv4bf (__o, 1); -+ ret.val[2] = (bfloat16x4_t) __builtin_aarch64_get_dregciv4bf (__o, 2); -+ return ret; -+} -+ -+__extension__ extern __inline bfloat16x8x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3q_dup_bf16 (const bfloat16_t * __a) -+{ -+ bfloat16x8x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rv8bf ((const __builtin_aarch64_simd_bf *) __a); -+ ret.val[0] = (bfloat16x8_t) __builtin_aarch64_get_qregciv8bf (__o, 0); -+ ret.val[1] = (bfloat16x8_t) __builtin_aarch64_get_qregciv8bf (__o, 1); -+ ret.val[2] = (bfloat16x8_t) __builtin_aarch64_get_qregciv8bf (__o, 2); -+ return ret; -+} -+ -+__extension__ extern __inline bfloat16x4x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4_bf16 (const bfloat16_t * __a) -+{ -+ bfloat16x4x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4v4bf ((const __builtin_aarch64_simd_bf *) __a); -+ ret.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregxiv4bf (__o, 0); -+ ret.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregxiv4bf (__o, 1); -+ ret.val[2] = (bfloat16x4_t) __builtin_aarch64_get_dregxiv4bf (__o, 2); -+ ret.val[3] = (bfloat16x4_t) __builtin_aarch64_get_dregxiv4bf (__o, 3); -+ return ret; -+} -+ -+__extension__ extern __inline bfloat16x8x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4q_bf16 (const bfloat16_t * __a) -+{ -+ bfloat16x8x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4v8bf ((const __builtin_aarch64_simd_bf *) __a); -+ ret.val[0] = (bfloat16x8_t) __builtin_aarch64_get_qregxiv8bf (__o, 0); -+ ret.val[1] = (bfloat16x8_t) __builtin_aarch64_get_qregxiv8bf (__o, 1); -+ ret.val[2] = (bfloat16x8_t) __builtin_aarch64_get_qregxiv8bf (__o, 2); -+ ret.val[3] = (bfloat16x8_t) __builtin_aarch64_get_qregxiv8bf (__o, 3); -+ return ret; -+} -+ -+__extension__ extern __inline bfloat16x4x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4_dup_bf16 (const bfloat16_t * __a) -+{ -+ bfloat16x4x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rv4bf ((const __builtin_aarch64_simd_bf *) __a); -+ ret.val[0] = (bfloat16x4_t) __builtin_aarch64_get_dregxiv4bf (__o, 0); -+ ret.val[1] = (bfloat16x4_t) __builtin_aarch64_get_dregxiv4bf (__o, 1); -+ ret.val[2] = (bfloat16x4_t) __builtin_aarch64_get_dregxiv4bf (__o, 2); -+ ret.val[3] = (bfloat16x4_t) __builtin_aarch64_get_dregxiv4bf (__o, 3); -+ return ret; -+} -+ -+__extension__ extern __inline bfloat16x8x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4q_dup_bf16 (const bfloat16_t * __a) -+{ -+ bfloat16x8x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rv8bf ((const __builtin_aarch64_simd_bf *) __a); -+ ret.val[0] = (bfloat16x8_t) __builtin_aarch64_get_qregxiv8bf (__o, 0); -+ ret.val[1] = (bfloat16x8_t) __builtin_aarch64_get_qregxiv8bf (__o, 1); -+ ret.val[2] = (bfloat16x8_t) __builtin_aarch64_get_qregxiv8bf (__o, 2); -+ ret.val[3] = (bfloat16x8_t) __builtin_aarch64_get_qregxiv8bf (__o, 3); -+ return ret; -+} -+ -+/* vst */ -+ -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1_bf16 (bfloat16_t *__a, bfloat16x4_t __b) -+{ -+ __builtin_aarch64_st1v4bf (__a, __b); -+} -+ -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1_bf16_x2 (bfloat16_t * __a, bfloat16x4x2_t __val) -+{ -+ __builtin_aarch64_simd_oi __o; -+ bfloat16x8x2_t __temp; -+ __temp.val[0] = vcombine_bf16 (__val.val[0], vcreate_bf16 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_bf16 (__val.val[1], vcreate_bf16 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregoiv8bf (__o, __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv8bf (__o, __temp.val[1], 1); -+ __builtin_aarch64_st1x2v4bf (__a, __o); -+} -+ -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1q_bf16_x2 (bfloat16_t * __a, bfloat16x8x2_t __val) -+{ -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_set_qregoiv8bf (__o, __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv8bf (__o, __val.val[1], 1); -+ __builtin_aarch64_st1x2v8bf (__a, __o); -+} -+ -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1_bf16_x3 (bfloat16_t * __a, bfloat16x4x3_t __val) -+{ -+ __builtin_aarch64_simd_ci __o; -+ bfloat16x8x3_t __temp; -+ __temp.val[0] = vcombine_bf16 (__val.val[0], vcreate_bf16 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_bf16 (__val.val[1], vcreate_bf16 (__AARCH64_UINT64_C (0))); -+ __temp.val[2] = vcombine_bf16 (__val.val[2], vcreate_bf16 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __temp.val[2], 2); -+ __builtin_aarch64_st1x3v4bf ((__builtin_aarch64_simd_bf *) __a, __o); -+} -+ -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1q_bf16_x3 (bfloat16_t * __a, bfloat16x8x3_t __val) -+{ -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __val.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __val.val[2], 2); -+ __builtin_aarch64_st1x3v8bf ((__builtin_aarch64_simd_bf *) __a, __o); -+} -+ -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1_bf16_x4 (bfloat16_t * __a, bfloat16x4x4_t val) -+{ -+ union { bfloat16x4x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; -+ __builtin_aarch64_st1x4v4bf ((__builtin_aarch64_simd_bf *) __a, __u.__o); -+} -+ -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1q_bf16_x4 (bfloat16_t * __a, bfloat16x8x4_t val) -+{ -+ union { bfloat16x8x4_t __i; __builtin_aarch64_simd_xi __o; } __u = { val }; -+ __builtin_aarch64_st1x4v8bf ((__builtin_aarch64_simd_bf *) __a, __u.__o); -+} -+ -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1q_bf16 (bfloat16_t *__a, bfloat16x8_t __b) -+{ -+ __builtin_aarch64_st1v8bf (__a, __b); -+} -+ -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1_lane_bf16 (bfloat16_t *__a, bfloat16x4_t __b, const int __lane) -+{ -+ *__a = __aarch64_vget_lane_any (__b, __lane); -+} -+ -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1q_lane_bf16 (bfloat16_t *__a, bfloat16x8_t __b, const int __lane) -+{ -+ *__a = __aarch64_vget_lane_any (__b, __lane); -+} -+ -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2_bf16 (bfloat16_t * __a, bfloat16x4x2_t __val) -+{ -+ __builtin_aarch64_simd_oi __o; -+ bfloat16x8x2_t __temp; -+ __temp.val[0] = vcombine_bf16 (__val.val[0], vcreate_bf16 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_bf16 (__val.val[1], vcreate_bf16 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregoiv8bf (__o, __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv8bf (__o, __temp.val[1], 1); -+ __builtin_aarch64_st2v4bf (__a, __o); -+} -+ -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2q_bf16 (bfloat16_t * __a, bfloat16x8x2_t __val) -+{ -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_set_qregoiv8bf (__o, __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv8bf (__o, __val.val[1], 1); -+ __builtin_aarch64_st2v8bf (__a, __o); -+} -+ -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst3_bf16 (bfloat16_t * __a, bfloat16x4x3_t __val) -+{ -+ __builtin_aarch64_simd_ci __o; -+ bfloat16x8x3_t __temp; -+ __temp.val[0] = vcombine_bf16 (__val.val[0], vcreate_bf16 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_bf16 (__val.val[1], vcreate_bf16 (__AARCH64_UINT64_C (0))); -+ __temp.val[2] = vcombine_bf16 (__val.val[2], vcreate_bf16 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __temp.val[2], 2); -+ __builtin_aarch64_st3v4bf ((__builtin_aarch64_simd_bf *) __a, __o); -+} -+ -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst3q_bf16 (bfloat16_t * __a, bfloat16x8x3_t __val) -+{ -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __val.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv8bf (__o, (bfloat16x8_t) __val.val[2], 2); -+ __builtin_aarch64_st3v8bf ((__builtin_aarch64_simd_bf *) __a, __o); -+} -+ -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst4_bf16 (bfloat16_t * __a, bfloat16x4x4_t __val) -+{ -+ __builtin_aarch64_simd_xi __o; -+ bfloat16x8x4_t __temp; -+ __temp.val[0] = vcombine_bf16 (__val.val[0], vcreate_bf16 (__AARCH64_UINT64_C (0))); -+ __temp.val[1] = vcombine_bf16 (__val.val[1], vcreate_bf16 (__AARCH64_UINT64_C (0))); -+ __temp.val[2] = vcombine_bf16 (__val.val[2], vcreate_bf16 (__AARCH64_UINT64_C (0))); -+ __temp.val[3] = vcombine_bf16 (__val.val[3], vcreate_bf16 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __temp.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __temp.val[3], 3); -+ __builtin_aarch64_st4v4bf ((__builtin_aarch64_simd_bf *) __a, __o); -+} -+ -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst4q_bf16 (bfloat16_t * __a, bfloat16x8x4_t __val) -+{ -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __val.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __val.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __val.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv8bf (__o, (bfloat16x8_t) __val.val[3], 3); -+ __builtin_aarch64_st4v8bf ((__builtin_aarch64_simd_bf *) __a, __o); -+} -+ -+/* vreinterpret */ -+ -+__extension__ extern __inline bfloat16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_bf16_u8 (uint8x8_t __a) -+{ -+ return (bfloat16x4_t)__a; -+} -+ -+__extension__ extern __inline bfloat16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_bf16_u16 (uint16x4_t __a) -+{ -+ return (bfloat16x4_t)__a; -+} -+ -+__extension__ extern __inline bfloat16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_bf16_u32 (uint32x2_t __a) -+{ -+ return (bfloat16x4_t)__a; -+} -+ -+__extension__ extern __inline bfloat16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_bf16_u64 (uint64x1_t __a) -+{ -+ return (bfloat16x4_t)__a; -+} -+ -+__extension__ extern __inline bfloat16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_bf16_s8 (int8x8_t __a) -+{ -+ return (bfloat16x4_t)__a; -+} -+ -+__extension__ extern __inline bfloat16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_bf16_s16 (int16x4_t __a) -+{ -+ return (bfloat16x4_t)__a; -+} -+ -+__extension__ extern __inline bfloat16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_bf16_s32 (int32x2_t __a) -+{ -+ return (bfloat16x4_t)__a; -+} -+ -+__extension__ extern __inline bfloat16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_bf16_s64 (int64x1_t __a) -+{ -+ return (bfloat16x4_t)__a; -+} -+ -+__extension__ extern __inline bfloat16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_bf16_p8 (poly8x8_t __a) -+{ -+ return (bfloat16x4_t)__a; -+} -+ -+__extension__ extern __inline bfloat16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_bf16_p16 (poly16x4_t __a) -+{ -+ return (bfloat16x4_t)__a; -+} -+ -+__extension__ extern __inline bfloat16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_bf16_p64 (poly64x1_t __a) -+{ -+ return (bfloat16x4_t)__a; -+} -+ -+__extension__ extern __inline bfloat16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_bf16_f16 (float16x4_t __a) -+{ -+ return (bfloat16x4_t)__a; -+} -+ -+__extension__ extern __inline bfloat16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_bf16_f32 (float32x2_t __a) -+{ -+ return (bfloat16x4_t)__a; -+} -+ -+__extension__ extern __inline bfloat16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_bf16_f64 (float64x1_t __a) -+{ -+ return (bfloat16x4_t)__a; -+} -+ -+__extension__ extern __inline bfloat16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_bf16_u8 (uint8x16_t __a) -+{ -+ return (bfloat16x8_t)__a; -+} -+ -+__extension__ extern __inline bfloat16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_bf16_u16 (uint16x8_t __a) -+{ -+ return (bfloat16x8_t)__a; -+} -+ -+__extension__ extern __inline bfloat16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_bf16_u32 (uint32x4_t __a) -+{ -+ return (bfloat16x8_t)__a; -+} -+ -+__extension__ extern __inline bfloat16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_bf16_u64 (uint64x2_t __a) -+{ -+ return (bfloat16x8_t)__a; -+} -+ -+__extension__ extern __inline bfloat16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_bf16_s8 (int8x16_t __a) -+{ -+ return (bfloat16x8_t)__a; -+} -+ -+__extension__ extern __inline bfloat16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_bf16_s16 (int16x8_t __a) -+{ -+ return (bfloat16x8_t)__a; -+} -+ -+__extension__ extern __inline bfloat16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_bf16_s32 (int32x4_t __a) -+{ -+ return (bfloat16x8_t)__a; -+} -+ -+__extension__ extern __inline bfloat16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_bf16_s64 (int64x2_t __a) -+{ -+ return (bfloat16x8_t)__a; -+} -+ -+__extension__ extern __inline bfloat16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_bf16_p8 (poly8x16_t __a) -+{ -+ return (bfloat16x8_t)__a; -+} -+ -+__extension__ extern __inline bfloat16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_bf16_p16 (poly16x8_t __a) -+{ -+ return (bfloat16x8_t)__a; -+} -+ -+__extension__ extern __inline bfloat16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_bf16_p64 (poly64x2_t __a) -+{ -+ return (bfloat16x8_t)__a; -+} -+ -+__extension__ extern __inline bfloat16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_bf16_p128 (poly128_t __a) -+{ -+ return (bfloat16x8_t)__a; -+} -+ -+__extension__ extern __inline bfloat16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_bf16_f16 (float16x8_t __a) -+{ -+ return (bfloat16x8_t)__a; -+} -+ -+__extension__ extern __inline bfloat16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_bf16_f32 (float32x4_t __a) -+{ -+ return (bfloat16x8_t)__a; -+} -+ -+__extension__ extern __inline bfloat16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_bf16_f64 (float64x2_t __a) -+{ -+ return (bfloat16x8_t)__a; -+} -+ -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_s8_bf16 (bfloat16x4_t __a) -+{ -+ return (int8x8_t)__a; -+} -+ -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_s16_bf16 (bfloat16x4_t __a) -+{ -+ return (int16x4_t)__a; -+} -+ -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_s32_bf16 (bfloat16x4_t __a) -+{ -+ return (int32x2_t)__a; -+} -+ -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_s64_bf16 (bfloat16x4_t __a) -+{ -+ return (int64x1_t)__a; -+} -+ -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_u8_bf16 (bfloat16x4_t __a) -+{ -+ return (uint8x8_t)__a; -+} -+ -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_u16_bf16 (bfloat16x4_t __a) -+{ -+ return (uint16x4_t)__a; -+} -+ -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_u32_bf16 (bfloat16x4_t __a) -+{ -+ return (uint32x2_t)__a; -+} -+ -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_u64_bf16 (bfloat16x4_t __a) -+{ -+ return (uint64x1_t)__a; -+} -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_f16_bf16 (bfloat16x4_t __a) -+{ -+ return (float16x4_t)__a; -+} -+ -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_f32_bf16 (bfloat16x4_t __a) -+{ -+ return (float32x2_t)__a; -+} -+ -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_f64_bf16 (bfloat16x4_t __a) -+{ -+ return (float64x1_t)__a; -+} -+ -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_p8_bf16 (bfloat16x4_t __a) -+{ -+ return (poly8x8_t)__a; -+} -+ -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_p16_bf16 (bfloat16x4_t __a) -+{ -+ return (poly16x4_t)__a; -+} -+ -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_p64_bf16 (bfloat16x4_t __a) -+{ -+ return (poly64x1_t)__a; -+} -+ -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_s8_bf16 (bfloat16x8_t __a) -+{ -+ return (int8x16_t)__a; -+} -+ -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_s16_bf16 (bfloat16x8_t __a) -+{ -+ return (int16x8_t)__a; -+} -+ -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_s32_bf16 (bfloat16x8_t __a) -+{ -+ return (int32x4_t)__a; -+} -+ -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_s64_bf16 (bfloat16x8_t __a) -+{ -+ return (int64x2_t)__a; -+} -+ -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_u8_bf16 (bfloat16x8_t __a) -+{ -+ return (uint8x16_t)__a; -+} -+ -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_u16_bf16 (bfloat16x8_t __a) -+{ -+ return (uint16x8_t)__a; -+} -+ -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_u32_bf16 (bfloat16x8_t __a) -+{ -+ return (uint32x4_t)__a; -+} -+ -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_u64_bf16 (bfloat16x8_t __a) -+{ -+ return (uint64x2_t)__a; -+} -+ -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_f16_bf16 (bfloat16x8_t __a) -+{ -+ return (float16x8_t)__a; -+} -+ -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_f32_bf16 (bfloat16x8_t __a) -+{ -+ return (float32x4_t)__a; -+} -+ -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_f64_bf16 (bfloat16x8_t __a) -+{ -+ return (float64x2_t)__a; -+} -+ -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_p8_bf16 (bfloat16x8_t __a) -+{ -+ return (poly8x16_t)__a; -+} -+ -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_p16_bf16 (bfloat16x8_t __a) -+{ -+ return (poly16x8_t)__a; -+} -+ -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_p64_bf16 (bfloat16x8_t __a) -+{ -+ return (poly64x2_t)__a; -+} -+ -+__extension__ extern __inline poly128_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_p128_bf16 (bfloat16x8_t __a) -+{ -+ return (poly128_t)__a; -+} -+ -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vbfdot_f32 (float32x2_t __r, bfloat16x4_t __a, bfloat16x4_t __b) -+{ -+ return __builtin_aarch64_bfdotv2sf (__r, __a, __b); -+} -+ -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vbfdotq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b) -+{ -+ return __builtin_aarch64_bfdotv4sf (__r, __a, __b); -+} -+ -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vbfdot_lane_f32 (float32x2_t __r, bfloat16x4_t __a, bfloat16x4_t __b, -+ const int __index) -+{ -+ return __builtin_aarch64_bfdot_lanev2sf (__r, __a, __b, __index); -+} -+ -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vbfdotq_lane_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x4_t __b, -+ const int __index) -+{ -+ return __builtin_aarch64_bfdot_lanev4sf (__r, __a, __b, __index); -+} -+ -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vbfdot_laneq_f32 (float32x2_t __r, bfloat16x4_t __a, bfloat16x8_t __b, -+ const int __index) -+{ -+ return __builtin_aarch64_bfdot_laneqv2sf (__r, __a, __b, __index); -+} -+ -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vbfdotq_laneq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b, -+ const int __index) -+{ -+ return __builtin_aarch64_bfdot_laneqv4sf (__r, __a, __b, __index); -+} -+ -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vbfmmlaq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b) -+ -+{ -+ return __builtin_aarch64_bfmmlaqv4sf (__r, __a, __b); -+} -+ -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vbfmlalbq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b) -+{ -+ return __builtin_aarch64_bfmlalbv4sf (__r, __a, __b); -+} -+ -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vbfmlaltq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b) -+{ -+ return __builtin_aarch64_bfmlaltv4sf (__r, __a, __b); -+} -+ -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vbfmlalbq_lane_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x4_t __b, -+ const int __index) -+{ -+ return __builtin_aarch64_bfmlalb_lanev4sf (__r, __a, __b, __index); -+} -+ -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vbfmlaltq_lane_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x4_t __b, -+ const int __index) -+{ -+ return __builtin_aarch64_bfmlalt_lanev4sf (__r, __a, __b, __index); -+} -+ -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vbfmlalbq_laneq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b, -+ const int __index) -+{ -+ return __builtin_aarch64_bfmlalb_lane_qv4sf (__r, __a, __b, __index); -+} -+ -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vbfmlaltq_laneq_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x8_t __b, -+ const int __index) -+{ -+ return __builtin_aarch64_bfmlalt_lane_qv4sf (__r, __a, __b, __index); -+} -+ -+__extension__ extern __inline bfloat16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvt_bf16_f32 (float32x4_t __a) -+{ -+ return __builtin_aarch64_bfcvtnv4bf (__a); -+} -+ -+__extension__ extern __inline bfloat16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtq_low_bf16_f32 (float32x4_t __a) -+{ -+ return __builtin_aarch64_bfcvtn_qv8bf (__a); -+} -+ -+__extension__ extern __inline bfloat16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtq_high_bf16_f32 (bfloat16x8_t __inactive, float32x4_t __a) -+{ -+ return __builtin_aarch64_bfcvtn2v8bf (__inactive, __a); -+} -+ -+#pragma GCC pop_options -+ -+/* AdvSIMD 8-bit Integer Matrix Multiply (I8MM) intrinsics. */ -+ -+#pragma GCC push_options -+#pragma GCC target ("arch=armv8.2-a+i8mm") -+ -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vusdot_s32 (int32x2_t __r, uint8x8_t __a, int8x8_t __b) -+{ -+ return __builtin_aarch64_usdotv8qi_ssus (__r, __a, __b); -+} -+ -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vusdotq_s32 (int32x4_t __r, uint8x16_t __a, int8x16_t __b) -+{ -+ return __builtin_aarch64_usdotv16qi_ssus (__r, __a, __b); -+} -+ -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vusdot_lane_s32 (int32x2_t __r, uint8x8_t __a, int8x8_t __b, const int __index) -+{ -+ return __builtin_aarch64_usdot_lanev8qi_ssuss (__r, __a, __b, __index); -+} -+ -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vusdot_laneq_s32 (int32x2_t __r, uint8x8_t __a, int8x16_t __b, -+ const int __index) -+{ -+ return __builtin_aarch64_usdot_laneqv8qi_ssuss (__r, __a, __b, __index); -+} -+ -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vusdotq_lane_s32 (int32x4_t __r, uint8x16_t __a, int8x8_t __b, -+ const int __index) -+{ -+ return __builtin_aarch64_usdot_lanev16qi_ssuss (__r, __a, __b, __index); -+} -+ -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vusdotq_laneq_s32 (int32x4_t __r, uint8x16_t __a, int8x16_t __b, -+ const int __index) -+{ -+ return __builtin_aarch64_usdot_laneqv16qi_ssuss (__r, __a, __b, __index); -+} -+ -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsudot_lane_s32 (int32x2_t __r, int8x8_t __a, uint8x8_t __b, const int __index) -+{ -+ return __builtin_aarch64_sudot_lanev8qi_sssus (__r, __a, __b, __index); -+} -+ -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsudot_laneq_s32 (int32x2_t __r, int8x8_t __a, uint8x16_t __b, -+ const int __index) -+{ -+ return __builtin_aarch64_sudot_laneqv8qi_sssus (__r, __a, __b, __index); -+} -+ -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsudotq_lane_s32 (int32x4_t __r, int8x16_t __a, uint8x8_t __b, -+ const int __index) -+{ -+ return __builtin_aarch64_sudot_lanev16qi_sssus (__r, __a, __b, __index); -+} -+ -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsudotq_laneq_s32 (int32x4_t __r, int8x16_t __a, uint8x16_t __b, -+ const int __index) -+{ -+ return __builtin_aarch64_sudot_laneqv16qi_sssus (__r, __a, __b, __index); -+} -+ -+/* Matrix Multiply-Accumulate. */ -+ -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmmlaq_s32 (int32x4_t __r, int8x16_t __a, int8x16_t __b) -+{ -+ return __builtin_aarch64_simd_smmlav16qi (__r, __a, __b); -+} -+ -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmmlaq_u32 (uint32x4_t __r, uint8x16_t __a, uint8x16_t __b) -+{ -+ return __builtin_aarch64_simd_ummlav16qi_uuuu (__r, __a, __b); -+} -+ -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vusmmlaq_s32 (int32x4_t __r, uint8x16_t __a, int8x16_t __b) -+{ -+ return __builtin_aarch64_simd_usmmlav16qi_ssus (__r, __a, __b); -+} -+ -+#pragma GCC pop_options -+ - #undef __aarch64_vget_lane_any - - #undef __aarch64_vdup_lane_any -diff --git a/gcc/config/aarch64/arm_sve.h b/gcc/config/aarch64/arm_sve.h -new file mode 100644 -index 000000000..0a316c0a0 ---- /dev/null -+++ b/gcc/config/aarch64/arm_sve.h -@@ -0,0 +1,37 @@ -+/* AArch64 SVE intrinsics include file. -+ Copyright (C) 2018-2019 Free Software Foundation, Inc. -+ -+ This file is part of GCC. -+ -+ GCC is free software; you can redistribute it and/or modify it -+ under the terms of the GNU General Public License as published -+ by the Free Software Foundation; either version 3, or (at your -+ option) any later version. -+ -+ GCC is distributed in the hope that it will be useful, but WITHOUT -+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public -+ License for more details. -+ -+ Under Section 7 of GPL version 3, you are granted additional -+ permissions described in the GCC Runtime Library Exception, version -+ 3.1, as published by the Free Software Foundation. -+ -+ You should have received a copy of the GNU General Public License and -+ a copy of the GCC Runtime Library Exception along with this program; -+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -+ . */ -+ -+#ifndef _ARM_SVE_H_ -+#define _ARM_SVE_H_ -+ -+#include -+#include -+ -+typedef __fp16 float16_t; -+typedef float float32_t; -+typedef double float64_t; -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+#endif -diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md -index 0f357662a..002e91d2b 100644 ---- a/gcc/config/aarch64/atomics.md -+++ b/gcc/config/aarch64/atomics.md -@@ -22,10 +22,10 @@ - - (define_expand "@atomic_compare_and_swap" - [(match_operand:SI 0 "register_operand" "") ;; bool out -- (match_operand:ALLI 1 "register_operand" "") ;; val out -- (match_operand:ALLI 2 "aarch64_sync_memory_operand" "") ;; memory -- (match_operand:ALLI 3 "nonmemory_operand" "") ;; expected -- (match_operand:ALLI 4 "aarch64_reg_or_zero" "") ;; desired -+ (match_operand:ALLI_TI 1 "register_operand" "") ;; val out -+ (match_operand:ALLI_TI 2 "aarch64_sync_memory_operand" "") ;; memory -+ (match_operand:ALLI_TI 3 "nonmemory_operand" "") ;; expected -+ (match_operand:ALLI_TI 4 "aarch64_reg_or_zero" "") ;; desired - (match_operand:SI 5 "const_int_operand") ;; is_weak - (match_operand:SI 6 "const_int_operand") ;; mod_s - (match_operand:SI 7 "const_int_operand")] ;; mod_f -@@ -88,6 +88,30 @@ - } - ) - -+(define_insn_and_split "@aarch64_compare_and_swap" -+ [(set (reg:CC CC_REGNUM) ;; bool out -+ (unspec_volatile:CC [(const_int 0)] UNSPECV_ATOMIC_CMPSW)) -+ (set (match_operand:JUST_TI 0 "register_operand" "=&r") ;; val out -+ (match_operand:JUST_TI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory -+ (set (match_dup 1) -+ (unspec_volatile:JUST_TI -+ [(match_operand:JUST_TI 2 "aarch64_reg_or_zero" "rZ") ;; expect -+ (match_operand:JUST_TI 3 "aarch64_reg_or_zero" "rZ") ;; desired -+ (match_operand:SI 4 "const_int_operand") ;; is_weak -+ (match_operand:SI 5 "const_int_operand") ;; mod_s -+ (match_operand:SI 6 "const_int_operand")] ;; mod_f -+ UNSPECV_ATOMIC_CMPSW)) -+ (clobber (match_scratch:SI 7 "=&r"))] -+ "" -+ "#" -+ "&& epilogue_completed" -+ [(const_int 0)] -+ { -+ aarch64_split_compare_and_swap (operands); -+ DONE; -+ } -+) -+ - (define_insn "@aarch64_compare_and_swap_lse" - [(set (match_operand:SI 0 "register_operand" "+r") ;; val out - (zero_extend:SI -@@ -133,23 +157,56 @@ - return "casal\t%0, %2, %1"; - }) - -+(define_insn "@aarch64_compare_and_swap_lse" -+ [(set (match_operand:JUST_TI 0 "register_operand" "+r") ;; val out -+ (match_operand:JUST_TI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory -+ (set (match_dup 1) -+ (unspec_volatile:JUST_TI -+ [(match_dup 0) ;; expect -+ (match_operand:JUST_TI 2 "register_operand" "r") ;; desired -+ (match_operand:SI 3 "const_int_operand")] ;; mod_s -+ UNSPECV_ATOMIC_CMPSW))] -+ "TARGET_LSE" -+{ -+ enum memmodel model = memmodel_from_int (INTVAL (operands[3])); -+ if (is_mm_relaxed (model)) -+ return "casp\t%0, %R0, %2, %R2, %1"; -+ else if (is_mm_acquire (model) || is_mm_consume (model)) -+ return "caspa\t%0, %R0, %2, %R2, %1"; -+ else if (is_mm_release (model)) -+ return "caspl\t%0, %R0, %2, %R2, %1"; -+ else -+ return "caspal\t%0, %R0, %2, %R2, %1"; -+}) -+ - (define_expand "atomic_exchange" -- [(match_operand:ALLI 0 "register_operand" "") -- (match_operand:ALLI 1 "aarch64_sync_memory_operand" "") -- (match_operand:ALLI 2 "aarch64_reg_or_zero" "") -- (match_operand:SI 3 "const_int_operand" "")] -+ [(match_operand:ALLI 0 "register_operand") -+ (match_operand:ALLI 1 "aarch64_sync_memory_operand") -+ (match_operand:ALLI 2 "aarch64_reg_or_zero") -+ (match_operand:SI 3 "const_int_operand")] - "" - { -- rtx (*gen) (rtx, rtx, rtx, rtx); -- - /* Use an atomic SWP when available. */ - if (TARGET_LSE) -- gen = gen_aarch64_atomic_exchange_lse; -+ { -+ emit_insn (gen_aarch64_atomic_exchange_lse -+ (operands[0], operands[1], operands[2], operands[3])); -+ } -+ else if (TARGET_OUTLINE_ATOMICS) -+ { -+ machine_mode mode = mode; -+ rtx func = aarch64_atomic_ool_func (mode, operands[3], -+ &aarch64_ool_swp_names); -+ rtx rval = emit_library_call_value (func, operands[0], LCT_NORMAL, -+ mode, operands[2], mode, -+ XEXP (operands[1], 0), Pmode); -+ emit_move_insn (operands[0], rval); -+ } - else -- gen = gen_aarch64_atomic_exchange; -- -- emit_insn (gen (operands[0], operands[1], operands[2], operands[3])); -- -+ { -+ emit_insn (gen_aarch64_atomic_exchange -+ (operands[0], operands[1], operands[2], operands[3])); -+ } - DONE; - } - ) -@@ -198,9 +255,9 @@ - ) - - (define_expand "atomic_" -- [(match_operand:ALLI 0 "aarch64_sync_memory_operand" "") -+ [(match_operand:ALLI 0 "aarch64_sync_memory_operand") - (atomic_op:ALLI -- (match_operand:ALLI 1 "" "") -+ (match_operand:ALLI 1 "") - (match_operand:SI 2 "const_int_operand"))] - "" - { -@@ -234,6 +291,39 @@ - } - operands[1] = force_reg (mode, operands[1]); - } -+ else if (TARGET_OUTLINE_ATOMICS) -+ { -+ const atomic_ool_names *names; -+ switch () -+ { -+ case MINUS: -+ operands[1] = expand_simple_unop (mode, NEG, operands[1], -+ NULL, 1); -+ /* fallthru */ -+ case PLUS: -+ names = &aarch64_ool_ldadd_names; -+ break; -+ case IOR: -+ names = &aarch64_ool_ldset_names; -+ break; -+ case XOR: -+ names = &aarch64_ool_ldeor_names; -+ break; -+ case AND: -+ operands[1] = expand_simple_unop (mode, NOT, operands[1], -+ NULL, 1); -+ names = &aarch64_ool_ldclr_names; -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ machine_mode mode = mode; -+ rtx func = aarch64_atomic_ool_func (mode, operands[2], names); -+ emit_library_call_value (func, NULL_RTX, LCT_NORMAL, mode, -+ operands[1], mode, -+ XEXP (operands[0], 0), Pmode); -+ DONE; -+ } - else - gen = gen_aarch64_atomic_; - -@@ -322,10 +412,10 @@ - ;; Load-operate-store, returning the original memory data. - - (define_expand "atomic_fetch_" -- [(match_operand:ALLI 0 "register_operand" "") -- (match_operand:ALLI 1 "aarch64_sync_memory_operand" "") -+ [(match_operand:ALLI 0 "register_operand") -+ (match_operand:ALLI 1 "aarch64_sync_memory_operand") - (atomic_op:ALLI -- (match_operand:ALLI 2 "" "") -+ (match_operand:ALLI 2 "") - (match_operand:SI 3 "const_int_operand"))] - "" - { -@@ -359,6 +449,40 @@ - } - operands[2] = force_reg (mode, operands[2]); - } -+ else if (TARGET_OUTLINE_ATOMICS) -+ { -+ const atomic_ool_names *names; -+ switch () -+ { -+ case MINUS: -+ operands[2] = expand_simple_unop (mode, NEG, operands[2], -+ NULL, 1); -+ /* fallthru */ -+ case PLUS: -+ names = &aarch64_ool_ldadd_names; -+ break; -+ case IOR: -+ names = &aarch64_ool_ldset_names; -+ break; -+ case XOR: -+ names = &aarch64_ool_ldeor_names; -+ break; -+ case AND: -+ operands[2] = expand_simple_unop (mode, NOT, operands[2], -+ NULL, 1); -+ names = &aarch64_ool_ldclr_names; -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ machine_mode mode = mode; -+ rtx func = aarch64_atomic_ool_func (mode, operands[3], names); -+ rtx rval = emit_library_call_value (func, operands[0], LCT_NORMAL, mode, -+ operands[2], mode, -+ XEXP (operands[1], 0), Pmode); -+ emit_move_insn (operands[0], rval); -+ DONE; -+ } - else - gen = gen_aarch64_atomic_fetch_; - -@@ -439,16 +563,16 @@ - ;; Load-operate-store, returning the updated memory data. - - (define_expand "atomic__fetch" -- [(match_operand:ALLI 0 "register_operand" "") -+ [(match_operand:ALLI 0 "register_operand") - (atomic_op:ALLI -- (match_operand:ALLI 1 "aarch64_sync_memory_operand" "") -- (match_operand:ALLI 2 "" "")) -+ (match_operand:ALLI 1 "aarch64_sync_memory_operand") -+ (match_operand:ALLI 2 "")) - (match_operand:SI 3 "const_int_operand")] - "" - { - /* Use an atomic load-operate instruction when possible. In this case - we will re-compute the result from the original mem value. */ -- if (TARGET_LSE) -+ if (TARGET_LSE || TARGET_OUTLINE_ATOMICS) - { - rtx tmp = gen_reg_rtx (mode); - operands[2] = force_reg (mode, operands[2]); -@@ -581,6 +705,24 @@ - } - ) - -+(define_insn "aarch64_load_exclusive_pair" -+ [(set (match_operand:DI 0 "register_operand" "=r") -+ (unspec_volatile:DI -+ [(match_operand:TI 2 "aarch64_sync_memory_operand" "Q") -+ (match_operand:SI 3 "const_int_operand")] -+ UNSPECV_LX)) -+ (set (match_operand:DI 1 "register_operand" "=r") -+ (unspec_volatile:DI [(match_dup 2) (match_dup 3)] UNSPECV_LX))] -+ "" -+ { -+ enum memmodel model = memmodel_from_int (INTVAL (operands[3])); -+ if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_release (model)) -+ return "ldxp\t%0, %1, %2"; -+ else -+ return "ldaxp\t%0, %1, %2"; -+ } -+) -+ - (define_insn "@aarch64_store_exclusive" - [(set (match_operand:SI 0 "register_operand" "=&r") - (unspec_volatile:SI [(const_int 0)] UNSPECV_SX)) -@@ -599,8 +741,27 @@ - } - ) - -+(define_insn "aarch64_store_exclusive_pair" -+ [(set (match_operand:SI 0 "register_operand" "=&r") -+ (unspec_volatile:SI [(const_int 0)] UNSPECV_SX)) -+ (set (match_operand:TI 1 "aarch64_sync_memory_operand" "=Q") -+ (unspec_volatile:TI -+ [(match_operand:DI 2 "aarch64_reg_or_zero" "rZ") -+ (match_operand:DI 3 "aarch64_reg_or_zero" "rZ") -+ (match_operand:SI 4 "const_int_operand")] -+ UNSPECV_SX))] -+ "" -+ { -+ enum memmodel model = memmodel_from_int (INTVAL (operands[4])); -+ if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_acquire (model)) -+ return "stxp\t%w0, %x2, %x3, %1"; -+ else -+ return "stlxp\t%w0, %x2, %x3, %1"; -+ } -+) -+ - (define_expand "mem_thread_fence" -- [(match_operand:SI 0 "const_int_operand" "")] -+ [(match_operand:SI 0 "const_int_operand")] - "" - { - enum memmodel model = memmodel_from_int (INTVAL (operands[0])); -diff --git a/gcc/config/aarch64/check-sve-md.awk b/gcc/config/aarch64/check-sve-md.awk -new file mode 100644 -index 000000000..3da78f3dd ---- /dev/null -+++ b/gcc/config/aarch64/check-sve-md.awk -@@ -0,0 +1,66 @@ -+#!/usr/bin/awk -f -+# Copyright (C) 2019 Free Software Foundation, Inc. -+# -+# This program is free software; you can redistribute it and/or modify it -+# under the terms of the GNU General Public License as published by the -+# Free Software Foundation; either version 3, or (at your option) any -+# later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program; see the file COPYING3. If not see -+# . -+ -+# This awk script checks that aarch64-sve.md (passed either on the -+# command line or via stdin) has an up-to-date contents section. -+ -+BEGIN { -+ seen1 = 0 -+ seen2 = 0 -+ errors = 0 -+} -+ -+# The headings in the comments use a two-level hierarchy: ";; == ..." -+# for major sections and ";; ---- ..." for minor sections. Each section -+# heading must be unique. -+# -+# The contents section should list all the section headings, using the -+# same text and in the same order. We should therefore see exactly two -+# copies of the section list. -+/^;; == / || /^;; ---- / { -+ if ($0 in seen || seen2 > 0) -+ { -+ if (seen2 >= seen1) -+ { -+ printf "error: line not in contents: %s\n", $0 > "/dev/stderr" -+ errors += 1 -+ exit(1) -+ } -+ if ($0 != order[seen2]) -+ { -+ printf "error: mismatched contents\n saw: %s\nexpected: %s\n", \ -+ $0, order[seen2] > "/dev/stderr" -+ errors += 1 -+ exit(1) -+ } -+ seen2 += 1 -+ } -+ else -+ { -+ seen[$0] = 1 -+ order[seen1] = $0 -+ seen1 += 1 -+ } -+} -+ -+END { -+ if (seen2 < seen1 && errors == 0) -+ { -+ printf "error: line only in contents: %s\n", order[seen2] > "/dev/stderr" -+ exit(1) -+ } -+} -diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md -index 21f9549e6..191c996c1 100644 ---- a/gcc/config/aarch64/constraints.md -+++ b/gcc/config/aarch64/constraints.md -@@ -36,6 +36,9 @@ - (define_register_constraint "x" "FP_LO_REGS" - "Floating point and SIMD vector registers V0 - V15.") - -+(define_register_constraint "y" "FP_LO8_REGS" -+ "Floating point and SIMD vector registers V0 - V7.") -+ - (define_constraint "I" - "A constant that can be used with an ADD operation." - (and (match_code "const_int") -@@ -46,6 +49,12 @@ - (and (match_code "const_int") - (match_test "aarch64_pluslong_strict_immedate (op, VOIDmode)"))) - -+(define_constraint "Uai" -+ "@internal -+ A constraint that matches a VG-based constant that can be added by -+ a single INC or DEC." -+ (match_operand 0 "aarch64_sve_scalar_inc_dec_immediate")) -+ - (define_constraint "Uav" - "@internal - A constraint that matches a VG-based constant that can be added by -@@ -114,8 +123,8 @@ - (match_test "aarch64_float_const_zero_rtx_p (op)"))) - - (define_constraint "Z" -- "Integer constant zero." -- (match_test "op == const0_rtx")) -+ "Integer or floating-point constant zero." -+ (match_test "op == CONST0_RTX (GET_MODE (op))")) - - (define_constraint "Ush" - "A constraint that matches an absolute symbolic address high part." -@@ -248,6 +257,38 @@ - true, - ADDR_QUERY_LDP_STP_N)"))) - -+(define_address_constraint "UPb" -+ "@internal -+ An address valid for SVE PRFB instructions." -+ (match_test "aarch64_sve_prefetch_operand_p (op, VNx16QImode)")) -+ -+(define_address_constraint "UPd" -+ "@internal -+ An address valid for SVE PRFD instructions." -+ (match_test "aarch64_sve_prefetch_operand_p (op, VNx2DImode)")) -+ -+(define_address_constraint "UPh" -+ "@internal -+ An address valid for SVE PRFH instructions." -+ (match_test "aarch64_sve_prefetch_operand_p (op, VNx8HImode)")) -+ -+(define_address_constraint "UPw" -+ "@internal -+ An address valid for SVE PRFW instructions." -+ (match_test "aarch64_sve_prefetch_operand_p (op, VNx4SImode)")) -+ -+(define_memory_constraint "Utf" -+ "@internal -+ An address valid for SVE LDFF1 instructions." -+ (and (match_code "mem") -+ (match_test "aarch64_sve_ldff1_operand_p (op)"))) -+ -+(define_memory_constraint "Utn" -+ "@internal -+ An address valid for SVE LDNF1 instructions." -+ (and (match_code "mem") -+ (match_test "aarch64_sve_ldnf1_operand_p (op)"))) -+ - (define_memory_constraint "Utr" - "@internal - An address valid for SVE LDR and STR instructions (as distinct from -@@ -269,6 +310,37 @@ - (match_test "aarch64_legitimate_address_p (V2DImode, - XEXP (op, 0), 1)"))) - -+(define_memory_constraint "UtQ" -+ "@internal -+ An address valid for SVE LD1RQs." -+ (and (match_code "mem") -+ (match_test "aarch64_sve_ld1rq_operand_p (op)"))) -+ -+(define_memory_constraint "UOb" -+ "@internal -+ An address valid for SVE LD1ROH." -+ (and (match_code "mem") -+ (match_test "aarch64_sve_ld1ro_operand_p (op, QImode)"))) -+ -+(define_memory_constraint "UOh" -+ "@internal -+ An address valid for SVE LD1ROH." -+ (and (match_code "mem") -+ (match_test "aarch64_sve_ld1ro_operand_p (op, HImode)"))) -+ -+ -+(define_memory_constraint "UOw" -+ "@internal -+ An address valid for SVE LD1ROW." -+ (and (match_code "mem") -+ (match_test "aarch64_sve_ld1ro_operand_p (op, SImode)"))) -+ -+(define_memory_constraint "UOd" -+ "@internal -+ An address valid for SVE LD1ROD." -+ (and (match_code "mem") -+ (match_test "aarch64_sve_ld1ro_operand_p (op, DImode)"))) -+ - (define_memory_constraint "Uty" - "@internal - An address valid for SVE LD1Rs." -@@ -284,7 +356,7 @@ - (define_constraint "Ufc" - "A floating point constant which can be used with an\ - FMOV immediate operation." -- (and (match_code "const_double") -+ (and (match_code "const_double,const_vector") - (match_test "aarch64_float_const_representable_p (op)"))) - - (define_constraint "Uvi" -@@ -329,6 +401,13 @@ - (match_test "aarch64_simd_scalar_immediate_valid_for_move (op, - QImode)"))) - -+(define_constraint "Dt" -+ "@internal -+ A const_double which is the reciprocal of an exact power of two, can be -+ used in an scvtf with fract bits operation" -+ (and (match_code "const_double") -+ (match_test "aarch64_fpconst_pow2_recip (op) > 0"))) -+ - (define_constraint "Dl" - "@internal - A constraint that matches vector of immediates for left shifts." -@@ -373,18 +452,54 @@ - An address valid for a prefetch instruction." - (match_test "aarch64_address_valid_for_prefetch_p (op, true)")) - -+(define_constraint "vgb" -+ "@internal -+ A constraint that matches an immediate offset valid for SVE LD1B -+ gather instructions." -+ (match_operand 0 "aarch64_sve_gather_immediate_b")) -+ -+(define_constraint "vgd" -+ "@internal -+ A constraint that matches an immediate offset valid for SVE LD1D -+ gather instructions." -+ (match_operand 0 "aarch64_sve_gather_immediate_d")) -+ -+(define_constraint "vgh" -+ "@internal -+ A constraint that matches an immediate offset valid for SVE LD1H -+ gather instructions." -+ (match_operand 0 "aarch64_sve_gather_immediate_h")) -+ -+(define_constraint "vgw" -+ "@internal -+ A constraint that matches an immediate offset valid for SVE LD1W -+ gather instructions." -+ (match_operand 0 "aarch64_sve_gather_immediate_w")) -+ - (define_constraint "vsa" - "@internal - A constraint that matches an immediate operand valid for SVE - arithmetic instructions." - (match_operand 0 "aarch64_sve_arith_immediate")) - -+(define_constraint "vsb" -+ "@internal -+ A constraint that matches an immediate operand valid for SVE UMAX -+ and UMIN operations." -+ (match_operand 0 "aarch64_sve_vsb_immediate")) -+ - (define_constraint "vsc" - "@internal - A constraint that matches a signed immediate operand valid for SVE - CMP instructions." - (match_operand 0 "aarch64_sve_cmp_vsc_immediate")) - -+(define_constraint "vss" -+ "@internal -+ A constraint that matches a signed immediate operand valid for SVE -+ DUP instructions." -+ (match_test "aarch64_sve_dup_immediate_p (op)")) -+ - (define_constraint "vsd" - "@internal - A constraint that matches an unsigned immediate operand valid for SVE -@@ -395,7 +510,7 @@ - "@internal - A constraint that matches a vector count operand valid for SVE INC and - DEC instructions." -- (match_operand 0 "aarch64_sve_inc_dec_immediate")) -+ (match_operand 0 "aarch64_sve_vector_inc_dec_immediate")) - - (define_constraint "vsn" - "@internal -@@ -403,6 +518,18 @@ - is valid for SVE SUB instructions." - (match_operand 0 "aarch64_sve_sub_arith_immediate")) - -+(define_constraint "vsQ" -+ "@internal -+ Like vsa, but additionally check that the immediate is nonnegative -+ when interpreted as a signed value." -+ (match_operand 0 "aarch64_sve_qadd_immediate")) -+ -+(define_constraint "vsS" -+ "@internal -+ Like vsn, but additionally check that the immediate is negative -+ when interpreted as a signed value." -+ (match_operand 0 "aarch64_sve_qsub_immediate")) -+ - (define_constraint "vsl" - "@internal - A constraint that matches an immediate operand valid for SVE logical -@@ -411,9 +538,9 @@ - - (define_constraint "vsm" - "@internal -- A constraint that matches an immediate operand valid for SVE MUL -- operations." -- (match_operand 0 "aarch64_sve_mul_immediate")) -+ A constraint that matches an immediate operand valid for SVE MUL, -+ SMAX and SMIN operations." -+ (match_operand 0 "aarch64_sve_vsm_immediate")) - - (define_constraint "vsA" - "@internal -@@ -421,13 +548,20 @@ - and FSUB operations." - (match_operand 0 "aarch64_sve_float_arith_immediate")) - -+;; "B" for "bound". -+(define_constraint "vsB" -+ "@internal -+ A constraint that matches an immediate operand valid for SVE FMAX -+ and FMIN operations." -+ (match_operand 0 "aarch64_sve_float_maxmin_immediate")) -+ - (define_constraint "vsM" - "@internal -- A constraint that matches an imediate operand valid for SVE FMUL -+ A constraint that matches an immediate operand valid for SVE FMUL - operations." - (match_operand 0 "aarch64_sve_float_mul_immediate")) - - (define_constraint "vsN" - "@internal - A constraint that matches the negative of vsA" -- (match_operand 0 "aarch64_sve_float_arith_with_sub_immediate")) -+ (match_operand 0 "aarch64_sve_float_negated_arith_immediate")) -diff --git a/gcc/config/aarch64/cortex-a57-fma-steering.c b/gcc/config/aarch64/cortex-a57-fma-steering.c -index eb91662b6..d8e6038d1 100644 ---- a/gcc/config/aarch64/cortex-a57-fma-steering.c -+++ b/gcc/config/aarch64/cortex-a57-fma-steering.c -@@ -37,6 +37,7 @@ - #include "insn-attr.h" - #include "context.h" - #include "tree-pass.h" -+#include "function-abi.h" - #include "regrename.h" - #include "aarch64-protos.h" - -@@ -267,7 +268,7 @@ rename_single_chain (du_head_p head, HARD_REG_SET *unavailable) - if (DEBUG_INSN_P (tmp->insn)) - continue; - n_uses++; -- IOR_COMPL_HARD_REG_SET (*unavailable, reg_class_contents[tmp->cl]); -+ *unavailable |= ~reg_class_contents[tmp->cl]; - super_class = reg_class_superunion[(int) super_class][(int) tmp->cl]; - } - -@@ -281,7 +282,7 @@ rename_single_chain (du_head_p head, HARD_REG_SET *unavailable) - { - fprintf (dump_file, "Register %s in insn %d", reg_names[reg], - INSN_UID (head->first->insn)); -- if (head->need_caller_save_reg) -+ if (head->call_abis) - fprintf (dump_file, " crosses a call"); - } - -diff --git a/gcc/config/aarch64/driver-aarch64.c b/gcc/config/aarch64/driver-aarch64.c -index 6f16775f4..ef4f18352 100644 ---- a/gcc/config/aarch64/driver-aarch64.c -+++ b/gcc/config/aarch64/driver-aarch64.c -@@ -32,7 +32,7 @@ std::string aarch64_get_extension_string_for_isa_flags (unsigned long, - struct aarch64_arch_extension - { - const char *ext; -- unsigned int flag; -+ uint64_t flag; - const char *feat_string; - }; - -@@ -52,7 +52,7 @@ struct aarch64_core_data - unsigned char implementer_id; /* Exactly 8 bits */ - unsigned int part_no; /* 12 bits + 12 bits */ - unsigned variant; -- const unsigned long flags; -+ const uint64_t flags; - }; - - #define AARCH64_BIG_LITTLE(BIG, LITTLE) \ -@@ -75,7 +75,7 @@ struct aarch64_arch_driver_info - { - const char* id; - const char* name; -- const unsigned long flags; -+ const uint64_t flags; - }; - - #define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \ -@@ -179,8 +179,8 @@ host_detect_local_cpu (int argc, const char **argv) - unsigned int variants[2] = { ALL_VARIANTS, ALL_VARIANTS }; - unsigned int n_variants = 0; - bool processed_exts = false; -- unsigned long extension_flags = 0; -- unsigned long default_flags = 0; -+ uint64_t extension_flags = 0; -+ uint64_t default_flags = 0; - - gcc_assert (argc); - -diff --git a/gcc/config/aarch64/falkor-tag-collision-avoidance.c b/gcc/config/aarch64/falkor-tag-collision-avoidance.c -index 779dee81f..35ca79232 100644 ---- a/gcc/config/aarch64/falkor-tag-collision-avoidance.c -+++ b/gcc/config/aarch64/falkor-tag-collision-avoidance.c -@@ -38,6 +38,7 @@ - #include "optabs.h" - #include "regs.h" - #include "recog.h" -+#include "function-abi.h" - #include "regrename.h" - #include "print-rtl.h" - -@@ -229,7 +230,7 @@ init_unavailable (tag_insn_info *insn_info, tag_map_t &tag_map, du_head_p head, - if (DEBUG_INSN_P (tmp->insn)) - continue; - -- IOR_COMPL_HARD_REG_SET (*unavailable, reg_class_contents[tmp->cl]); -+ *unavailable |= ~reg_class_contents[tmp->cl]; - super_class = reg_class_superunion[(int) super_class][(int) tmp->cl]; - } - -diff --git a/gcc/config/aarch64/falkor.md b/gcc/config/aarch64/falkor.md -index 41955af81..2bcc661e5 100644 ---- a/gcc/config/aarch64/falkor.md -+++ b/gcc/config/aarch64/falkor.md -@@ -648,7 +648,7 @@ - - (define_insn_reservation "falkor_other_0_nothing" 0 - (and (eq_attr "tune" "falkor") -- (eq_attr "type" "no_insn,trap,block")) -+ (eq_attr "type" "trap,block")) - "nothing") - - (define_insn_reservation "falkor_other_2_z" 2 -diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md -index c7ccd5bf6..7b6456961 100644 ---- a/gcc/config/aarch64/iterators.md -+++ b/gcc/config/aarch64/iterators.md -@@ -29,9 +29,16 @@ - ;; Iterator for HI, SI, DI, some instructions can only work on these modes. - (define_mode_iterator GPI_I16 [(HI "AARCH64_ISA_F16") SI DI]) - -+;; "Iterator" for just TI -- features like @pattern only work with iterators. -+(define_mode_iterator JUST_TI [TI]) -+ - ;; Iterator for QI and HI modes - (define_mode_iterator SHORT [QI HI]) - -+;; Iterators for single modes, for "@" patterns. -+(define_mode_iterator SI_ONLY [SI]) -+(define_mode_iterator DI_ONLY [DI]) -+ - ;; Iterator for all integer modes (up to 64-bit) - (define_mode_iterator ALLI [QI HI SI DI]) - -@@ -50,9 +57,16 @@ - ;; Iterator for all scalar floating point modes (HF, SF, DF) - (define_mode_iterator GPF_HF [HF SF DF]) - -+;; Iterator for all 16-bit scalar floating point modes (HF, BF) -+(define_mode_iterator HFBF [HF BF]) -+ - ;; Iterator for all scalar floating point modes (HF, SF, DF and TF) - (define_mode_iterator GPF_TF_F16 [HF SF DF TF]) - -+;; Iterator for all scalar floating point modes suitable for moving, including -+;; special BF type (HF, SF, DF, TF and BF) -+(define_mode_iterator GPF_TF_F16_MOV [HF BF SF DF TF]) -+ - ;; Double vector modes. - (define_mode_iterator VDF [V2SF V4HF]) - -@@ -70,7 +84,10 @@ - (define_mode_iterator VSDQ_I_DI [V8QI V16QI V4HI V8HI V2SI V4SI V2DI DI]) - - ;; Double vector modes. --(define_mode_iterator VD [V8QI V4HI V4HF V2SI V2SF]) -+(define_mode_iterator VD [V8QI V4HI V4HF V2SI V2SF V4BF]) -+ -+;; Double vector modes suitable for moving. Includes BFmode. -+(define_mode_iterator VDMOV [V8QI V4HI V4HF V4BF V2SI V2SF]) - - ;; All modes stored in registers d0-d31. - (define_mode_iterator DREG [V8QI V4HI V4HF V2SI V2SF DF]) -@@ -85,20 +102,29 @@ - (define_mode_iterator VDQ_BHSI [V8QI V16QI V4HI V8HI V2SI V4SI]) - - ;; Quad vector modes. --(define_mode_iterator VQ [V16QI V8HI V4SI V2DI V8HF V4SF V2DF]) -+(define_mode_iterator VQ [V16QI V8HI V4SI V2DI V8HF V4SF V2DF V8BF]) - - ;; Copy of the above. --(define_mode_iterator VQ2 [V16QI V8HI V4SI V2DI V8HF V4SF V2DF]) -+(define_mode_iterator VQ2 [V16QI V8HI V4SI V2DI V8HF V8BF V4SF V2DF]) -+ -+;; Quad vector modes suitable for moving. Includes BFmode. -+(define_mode_iterator VQMOV [V16QI V8HI V4SI V2DI V8HF V8BF V4SF V2DF]) -+ -+;; VQMOV without 2-element modes. -+(define_mode_iterator VQMOV_NO2E [V16QI V8HI V4SI V8HF V8BF V4SF]) - - ;; Quad integer vector modes. - (define_mode_iterator VQ_I [V16QI V8HI V4SI V2DI]) - - ;; VQ without 2 element modes. --(define_mode_iterator VQ_NO2E [V16QI V8HI V4SI V8HF V4SF]) -+(define_mode_iterator VQ_NO2E [V16QI V8HI V4SI V8HF V4SF V8BF]) - - ;; Quad vector with only 2 element modes. - (define_mode_iterator VQ_2E [V2DI V2DF]) - -+;; BFmode vector modes. -+(define_mode_iterator VBF [V4BF V8BF]) -+ - ;; This mode iterator allows :P to be used for patterns that operate on - ;; addresses in different modes. In LP64, only DI will match, while in - ;; ILP32, either can match. -@@ -110,7 +136,8 @@ - (define_mode_iterator PTR [(SI "ptr_mode == SImode") (DI "ptr_mode == DImode")]) - - ;; Advanced SIMD Float modes suitable for moving, loading and storing. --(define_mode_iterator VDQF_F16 [V4HF V8HF V2SF V4SF V2DF]) -+(define_mode_iterator VDQF_F16 [V4HF V8HF V2SF V4SF V2DF -+ V4BF V8BF]) - - ;; Advanced SIMD Float modes. - (define_mode_iterator VDQF [V2SF V4SF V2DF]) -@@ -128,6 +155,9 @@ - (HF "TARGET_SIMD_F16INST") - SF DF]) - -+;; Scalar and vetor modes for SF, DF. -+(define_mode_iterator VSFDF [V2SF V4SF V2DF DF SF]) -+ - ;; Advanced SIMD single Float modes. - (define_mode_iterator VDQSF [V2SF V4SF]) - -@@ -148,7 +178,12 @@ - - ;; All Advanced SIMD modes suitable for moving, loading, and storing. - (define_mode_iterator VALL_F16 [V8QI V16QI V4HI V8HI V2SI V4SI V2DI -- V4HF V8HF V2SF V4SF V2DF]) -+ V4HF V8HF V4BF V8BF V2SF V4SF V2DF]) -+ -+;; All Advanced SIMD modes suitable for moving, loading, and storing, -+;; including special Bfloat vector types. -+(define_mode_iterator VALL_F16MOV [V8QI V16QI V4HI V8HI V2SI V4SI V2DI -+ V4HF V8HF V4BF V8BF V2SF V4SF V2DF]) - - ;; The VALL_F16 modes except the 128-bit 2-element ones. - (define_mode_iterator VALL_F16_NO_V2Q [V8QI V16QI V4HI V8HI V2SI V4SI -@@ -159,10 +194,10 @@ - - ;; All Advanced SIMD modes and DI. - (define_mode_iterator VALLDI_F16 [V8QI V16QI V4HI V8HI V2SI V4SI V2DI -- V4HF V8HF V2SF V4SF V2DF DI]) -+ V4HF V8HF V4BF V8BF V2SF V4SF V2DF DI]) - - ;; All Advanced SIMD modes, plus DI and DF. --(define_mode_iterator VALLDIF [V8QI V16QI V4HI V8HI V2SI V4SI -+(define_mode_iterator VALLDIF [V8QI V16QI V4HI V8HI V2SI V4SI V4BF V8BF - V2DI V4HF V8HF V2SF V4SF V2DF DI DF]) - - ;; Advanced SIMD modes for Integer reduction across lanes. -@@ -185,7 +220,7 @@ - (define_mode_iterator VQW [V16QI V8HI V4SI]) - - ;; Double vector modes for combines. --(define_mode_iterator VDC [V8QI V4HI V4HF V2SI V2SF DI DF]) -+(define_mode_iterator VDC [V8QI V4HI V4BF V4HF V2SI V2SF DI DF]) - - ;; Advanced SIMD modes except double int. - (define_mode_iterator VDQIF [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF V2DF]) -@@ -274,50 +309,85 @@ - ;; count. - (define_mode_iterator VMUL_CHANGE_NLANES [V4HI V8HI V2SI V4SI V2SF V4SF]) - --;; All SVE vector modes. --(define_mode_iterator SVE_ALL [VNx16QI VNx8HI VNx4SI VNx2DI -- VNx8HF VNx4SF VNx2DF]) -+;; Iterators for single modes, for "@" patterns. -+(define_mode_iterator VNx8HI_ONLY [VNx8HI]) -+(define_mode_iterator VNx8BF_ONLY [VNx8BF]) -+(define_mode_iterator VNx4SI_ONLY [VNx4SI]) -+(define_mode_iterator VNx4SF_ONLY [VNx4SF]) -+(define_mode_iterator VNx2DI_ONLY [VNx2DI]) -+(define_mode_iterator VNx2DF_ONLY [VNx2DF]) - - ;; All SVE vector structure modes. - (define_mode_iterator SVE_STRUCT [VNx32QI VNx16HI VNx8SI VNx4DI -- VNx16HF VNx8SF VNx4DF -+ VNx16BF VNx16HF VNx8SF VNx4DF - VNx48QI VNx24HI VNx12SI VNx6DI -- VNx24HF VNx12SF VNx6DF -+ VNx24BF VNx24HF VNx12SF VNx6DF - VNx64QI VNx32HI VNx16SI VNx8DI -- VNx32HF VNx16SF VNx8DF]) -+ VNx32BF VNx32HF VNx16SF VNx8DF]) - --;; All SVE vector modes that have 8-bit or 16-bit elements. --(define_mode_iterator SVE_BH [VNx16QI VNx8HI VNx8HF]) -+;; All fully-packed SVE vector modes. -+(define_mode_iterator SVE_FULL [VNx16QI VNx8HI VNx4SI VNx2DI -+ VNx8BF VNx8HF VNx4SF VNx2DF]) - --;; All SVE vector modes that have 8-bit, 16-bit or 32-bit elements. --(define_mode_iterator SVE_BHS [VNx16QI VNx8HI VNx4SI VNx8HF VNx4SF]) -+;; All fully-packed SVE integer vector modes. -+(define_mode_iterator SVE_FULL_I [VNx16QI VNx8HI VNx4SI VNx2DI]) - --;; All SVE integer vector modes that have 8-bit, 16-bit or 32-bit elements. --(define_mode_iterator SVE_BHSI [VNx16QI VNx8HI VNx4SI]) -+;; All fully-packed SVE floating-point vector modes. -+(define_mode_iterator SVE_FULL_F [VNx8HF VNx4SF VNx2DF]) - --;; All SVE integer vector modes that have 16-bit, 32-bit or 64-bit elements. --(define_mode_iterator SVE_HSDI [VNx16QI VNx8HI VNx4SI]) -+;; Fully-packed SVE integer vector modes that have 8-bit, 16-bit or 32-bit -+;; elements. -+(define_mode_iterator SVE_FULL_BHSI [VNx16QI VNx8HI VNx4SI]) - --;; All SVE floating-point vector modes that have 16-bit or 32-bit elements. --(define_mode_iterator SVE_HSF [VNx8HF VNx4SF]) -+;; Fully-packed SVE vector modes that have 16-bit, 32-bit or 64-bit elements. -+(define_mode_iterator SVE_FULL_HSD [VNx8HI VNx4SI VNx2DI -+ VNx8BF VNx8HF VNx4SF VNx2DF]) - --;; All SVE vector modes that have 32-bit or 64-bit elements. --(define_mode_iterator SVE_SD [VNx4SI VNx2DI VNx4SF VNx2DF]) -+;; Fully-packed SVE integer vector modes that have 16-bit, 32-bit or 64-bit -+;; elements. -+(define_mode_iterator SVE_FULL_HSDI [VNx8HI VNx4SI VNx2DI]) - --;; All SVE vector modes that have 32-bit elements. --(define_mode_iterator SVE_S [VNx4SI VNx4SF]) -+;; Fully-packed SVE floating-point vector modes that have 16-bit or 32-bit -+;; elements. -+(define_mode_iterator SVE_FULL_HSF [VNx8HF VNx4SF]) - --;; All SVE vector modes that have 64-bit elements. --(define_mode_iterator SVE_D [VNx2DI VNx2DF]) -+;; Fully-packed SVE vector modes that have 32-bit or 64-bit elements. -+(define_mode_iterator SVE_FULL_SD [VNx4SI VNx2DI VNx4SF VNx2DF]) - --;; All SVE integer vector modes that have 32-bit or 64-bit elements. --(define_mode_iterator SVE_SDI [VNx4SI VNx2DI]) -+;; Fully-packed SVE integer vector modes that have 32-bit or 64-bit elements. -+(define_mode_iterator SVE_FULL_SDI [VNx4SI VNx2DI]) - --;; All SVE integer vector modes. --(define_mode_iterator SVE_I [VNx16QI VNx8HI VNx4SI VNx2DI]) -+;; Fully-packed SVE floating-point vector modes that have 32-bit or 64-bit -+;; elements. -+(define_mode_iterator SVE_FULL_SDF [VNx4SF VNx2DF]) - --;; All SVE floating-point vector modes. --(define_mode_iterator SVE_F [VNx8HF VNx4SF VNx2DF]) -+;; Same, but with the appropriate conditions for FMMLA support. -+(define_mode_iterator SVE_MATMULF [(VNx4SF "TARGET_SVE_F32MM") -+ (VNx2DF "TARGET_SVE_F64MM")]) -+ -+;; Fully-packed SVE vector modes that have 32-bit elements. -+(define_mode_iterator SVE_FULL_S [VNx4SI VNx4SF]) -+ -+;; Fully-packed SVE vector modes that have 64-bit elements. -+(define_mode_iterator SVE_FULL_D [VNx2DI VNx2DF]) -+ -+;; All partial SVE modes. -+(define_mode_iterator SVE_PARTIAL [VNx2QI -+ VNx4QI VNx2HI -+ VNx8QI VNx4HI VNx2SI]) -+ -+;; Modes involved in extending or truncating SVE data, for 8 elements per -+;; 128-bit block. -+(define_mode_iterator VNx8_NARROW [VNx8QI]) -+(define_mode_iterator VNx8_WIDE [VNx8HI]) -+ -+;; ...same for 4 elements per 128-bit block. -+(define_mode_iterator VNx4_NARROW [VNx4QI VNx4HI]) -+(define_mode_iterator VNx4_WIDE [VNx4SI]) -+ -+;; ...same for 2 elements per 128-bit block. -+(define_mode_iterator VNx2_NARROW [VNx2QI VNx2HI VNx2SI]) -+(define_mode_iterator VNx2_WIDE [VNx2DI]) - - ;; All SVE predicate modes. - (define_mode_iterator PRED_ALL [VNx16BI VNx8BI VNx4BI VNx2BI]) -@@ -325,6 +395,12 @@ - ;; SVE predicate modes that control 8-bit, 16-bit or 32-bit elements. - (define_mode_iterator PRED_BHS [VNx16BI VNx8BI VNx4BI]) - -+;; SVE predicate modes that control 16-bit, 32-bit or 64-bit elements. -+(define_mode_iterator PRED_HSD [VNx8BI VNx4BI VNx2BI]) -+ -+;; Bfloat16 modes to which V4SF can be converted -+(define_mode_iterator V4SF_TO_BF [V4BF V8BF]) -+ - ;; ------------------------------------------------------------------ - ;; Unspec enumerations for Advance SIMD. These could well go into - ;; aarch64.md but for their use in int_iterators here. -@@ -365,6 +441,10 @@ - UNSPEC_RSUBHN2 ; Used in aarch64-simd.md. - UNSPEC_SQDMULH ; Used in aarch64-simd.md. - UNSPEC_SQRDMULH ; Used in aarch64-simd.md. -+ UNSPEC_SMULLB ; Used in aarch64-sve2.md. -+ UNSPEC_SMULLT ; Used in aarch64-sve2.md. -+ UNSPEC_UMULLB ; Used in aarch64-sve2.md. -+ UNSPEC_UMULLT ; Used in aarch64-sve2.md. - UNSPEC_PMUL ; Used in aarch64-simd.md. - UNSPEC_FMULX ; Used in aarch64-simd.md. - UNSPEC_USQADD ; Used in aarch64-simd.md. -@@ -387,6 +467,10 @@ - UNSPEC_UQSHRN ; Used in aarch64-simd.md. - UNSPEC_SQRSHRN ; Used in aarch64-simd.md. - UNSPEC_UQRSHRN ; Used in aarch64-simd.md. -+ UNSPEC_SHRNB ; Used in aarch64-sve2.md. -+ UNSPEC_SHRNT ; Used in aarch64-sve2.md. -+ UNSPEC_RSHRNB ; Used in aarch64-sve2.md. -+ UNSPEC_RSHRNT ; Used in aarch64-sve2.md. - UNSPEC_SSHL ; Used in aarch64-simd.md. - UNSPEC_USHL ; Used in aarch64-simd.md. - UNSPEC_SRSHL ; Used in aarch64-simd.md. -@@ -459,38 +543,126 @@ - UNSPEC_FMLSL ; Used in aarch64-simd.md. - UNSPEC_FMLAL2 ; Used in aarch64-simd.md. - UNSPEC_FMLSL2 ; Used in aarch64-simd.md. -+ UNSPEC_ADR ; Used in aarch64-sve.md. - UNSPEC_SEL ; Used in aarch64-sve.md. -+ UNSPEC_BRKA ; Used in aarch64-sve.md. -+ UNSPEC_BRKB ; Used in aarch64-sve.md. -+ UNSPEC_BRKN ; Used in aarch64-sve.md. -+ UNSPEC_BRKPA ; Used in aarch64-sve.md. -+ UNSPEC_BRKPB ; Used in aarch64-sve.md. -+ UNSPEC_PFIRST ; Used in aarch64-sve.md. -+ UNSPEC_PNEXT ; Used in aarch64-sve.md. -+ UNSPEC_CNTP ; Used in aarch64-sve.md. -+ UNSPEC_SADDV ; Used in aarch64-sve.md. -+ UNSPEC_UADDV ; Used in aarch64-sve.md. - UNSPEC_ANDV ; Used in aarch64-sve.md. - UNSPEC_IORV ; Used in aarch64-sve.md. - UNSPEC_XORV ; Used in aarch64-sve.md. - UNSPEC_ANDF ; Used in aarch64-sve.md. - UNSPEC_IORF ; Used in aarch64-sve.md. - UNSPEC_XORF ; Used in aarch64-sve.md. -+ UNSPEC_REVB ; Used in aarch64-sve.md. -+ UNSPEC_REVH ; Used in aarch64-sve.md. -+ UNSPEC_REVW ; Used in aarch64-sve.md. - UNSPEC_SMUL_HIGHPART ; Used in aarch64-sve.md. - UNSPEC_UMUL_HIGHPART ; Used in aarch64-sve.md. -- UNSPEC_COND_ADD ; Used in aarch64-sve.md. -- UNSPEC_COND_SUB ; Used in aarch64-sve.md. -- UNSPEC_COND_MUL ; Used in aarch64-sve.md. -- UNSPEC_COND_DIV ; Used in aarch64-sve.md. -- UNSPEC_COND_MAX ; Used in aarch64-sve.md. -- UNSPEC_COND_MIN ; Used in aarch64-sve.md. -+ UNSPEC_FMLA ; Used in aarch64-sve.md. -+ UNSPEC_FMLS ; Used in aarch64-sve.md. -+ UNSPEC_FEXPA ; Used in aarch64-sve.md. -+ UNSPEC_FMMLA ; Used in aarch64-sve.md. -+ UNSPEC_FTMAD ; Used in aarch64-sve.md. -+ UNSPEC_FTSMUL ; Used in aarch64-sve.md. -+ UNSPEC_FTSSEL ; Used in aarch64-sve.md. -+ UNSPEC_SMATMUL ; Used in aarch64-sve.md. -+ UNSPEC_UMATMUL ; Used in aarch64-sve.md. -+ UNSPEC_USMATMUL ; Used in aarch64-sve.md. -+ UNSPEC_TRN1Q ; Used in aarch64-sve.md. -+ UNSPEC_TRN2Q ; Used in aarch64-sve.md. -+ UNSPEC_UZP1Q ; Used in aarch64-sve.md. -+ UNSPEC_UZP2Q ; Used in aarch64-sve.md. -+ UNSPEC_ZIP1Q ; Used in aarch64-sve.md. -+ UNSPEC_ZIP2Q ; Used in aarch64-sve.md. -+ UNSPEC_COND_CMPEQ_WIDE ; Used in aarch64-sve.md. -+ UNSPEC_COND_CMPGE_WIDE ; Used in aarch64-sve.md. -+ UNSPEC_COND_CMPGT_WIDE ; Used in aarch64-sve.md. -+ UNSPEC_COND_CMPHI_WIDE ; Used in aarch64-sve.md. -+ UNSPEC_COND_CMPHS_WIDE ; Used in aarch64-sve.md. -+ UNSPEC_COND_CMPLE_WIDE ; Used in aarch64-sve.md. -+ UNSPEC_COND_CMPLO_WIDE ; Used in aarch64-sve.md. -+ UNSPEC_COND_CMPLS_WIDE ; Used in aarch64-sve.md. -+ UNSPEC_COND_CMPLT_WIDE ; Used in aarch64-sve.md. -+ UNSPEC_COND_CMPNE_WIDE ; Used in aarch64-sve.md. -+ UNSPEC_COND_FABS ; Used in aarch64-sve.md. -+ UNSPEC_COND_FADD ; Used in aarch64-sve.md. -+ UNSPEC_COND_FCADD90 ; Used in aarch64-sve.md. -+ UNSPEC_COND_FCADD270 ; Used in aarch64-sve.md. -+ UNSPEC_COND_FCMEQ ; Used in aarch64-sve.md. -+ UNSPEC_COND_FCMGE ; Used in aarch64-sve.md. -+ UNSPEC_COND_FCMGT ; Used in aarch64-sve.md. -+ UNSPEC_COND_FCMLA ; Used in aarch64-sve.md. -+ UNSPEC_COND_FCMLA90 ; Used in aarch64-sve.md. -+ UNSPEC_COND_FCMLA180 ; Used in aarch64-sve.md. -+ UNSPEC_COND_FCMLA270 ; Used in aarch64-sve.md. -+ UNSPEC_COND_FCMLE ; Used in aarch64-sve.md. -+ UNSPEC_COND_FCMLT ; Used in aarch64-sve.md. -+ UNSPEC_COND_FCMNE ; Used in aarch64-sve.md. -+ UNSPEC_COND_FCMUO ; Used in aarch64-sve.md. -+ UNSPEC_COND_FCVT ; Used in aarch64-sve.md. -+ UNSPEC_COND_FCVTZS ; Used in aarch64-sve.md. -+ UNSPEC_COND_FCVTZU ; Used in aarch64-sve.md. -+ UNSPEC_COND_FDIV ; Used in aarch64-sve.md. -+ UNSPEC_COND_FMAX ; Used in aarch64-sve.md. -+ UNSPEC_COND_FMAXNM ; Used in aarch64-sve.md. -+ UNSPEC_COND_FMIN ; Used in aarch64-sve.md. -+ UNSPEC_COND_FMINNM ; Used in aarch64-sve.md. - UNSPEC_COND_FMLA ; Used in aarch64-sve.md. - UNSPEC_COND_FMLS ; Used in aarch64-sve.md. -+ UNSPEC_COND_FMUL ; Used in aarch64-sve.md. -+ UNSPEC_COND_FMULX ; Used in aarch64-sve.md. -+ UNSPEC_COND_FNEG ; Used in aarch64-sve.md. - UNSPEC_COND_FNMLA ; Used in aarch64-sve.md. - UNSPEC_COND_FNMLS ; Used in aarch64-sve.md. -- UNSPEC_COND_LT ; Used in aarch64-sve.md. -- UNSPEC_COND_LE ; Used in aarch64-sve.md. -- UNSPEC_COND_EQ ; Used in aarch64-sve.md. -- UNSPEC_COND_NE ; Used in aarch64-sve.md. -- UNSPEC_COND_GE ; Used in aarch64-sve.md. -- UNSPEC_COND_GT ; Used in aarch64-sve.md. -+ UNSPEC_COND_FRECPX ; Used in aarch64-sve.md. -+ UNSPEC_COND_FRINTA ; Used in aarch64-sve.md. -+ UNSPEC_COND_FRINTI ; Used in aarch64-sve.md. -+ UNSPEC_COND_FRINTM ; Used in aarch64-sve.md. -+ UNSPEC_COND_FRINTN ; Used in aarch64-sve.md. -+ UNSPEC_COND_FRINTP ; Used in aarch64-sve.md. -+ UNSPEC_COND_FRINTX ; Used in aarch64-sve.md. -+ UNSPEC_COND_FRINTZ ; Used in aarch64-sve.md. -+ UNSPEC_COND_FSCALE ; Used in aarch64-sve.md. -+ UNSPEC_COND_FSQRT ; Used in aarch64-sve.md. -+ UNSPEC_COND_FSUB ; Used in aarch64-sve.md. -+ UNSPEC_COND_SCVTF ; Used in aarch64-sve.md. -+ UNSPEC_COND_UCVTF ; Used in aarch64-sve.md. -+ UNSPEC_LASTA ; Used in aarch64-sve.md. - UNSPEC_LASTB ; Used in aarch64-sve.md. -+ UNSPEC_ASHIFT_WIDE ; Used in aarch64-sve.md. -+ UNSPEC_ASHIFTRT_WIDE ; Used in aarch64-sve.md. -+ UNSPEC_LSHIFTRT_WIDE ; Used in aarch64-sve.md. -+ UNSPEC_LDFF1 ; Used in aarch64-sve.md. -+ UNSPEC_LDNF1 ; Used in aarch64-sve.md. - UNSPEC_FCADD90 ; Used in aarch64-simd.md. - UNSPEC_FCADD270 ; Used in aarch64-simd.md. - UNSPEC_FCMLA ; Used in aarch64-simd.md. - UNSPEC_FCMLA90 ; Used in aarch64-simd.md. - UNSPEC_FCMLA180 ; Used in aarch64-simd.md. - UNSPEC_FCMLA270 ; Used in aarch64-simd.md. -+ UNSPEC_COND_FCVTNT ; Used in aarch64-sve2.md. -+ UNSPEC_SMULHS ; Used in aarch64-sve2.md. -+ UNSPEC_SMULHRS ; Used in aarch64-sve2.md. -+ UNSPEC_UMULHS ; Used in aarch64-sve2.md. -+ UNSPEC_UMULHRS ; Used in aarch64-sve2.md. -+ UNSPEC_ASRD ; Used in aarch64-sve.md. -+ UNSPEC_USDOT ; Used in aarch64-simd.md. -+ UNSPEC_SUDOT ; Used in aarch64-simd.md. -+ UNSPEC_BFDOT ; Used in aarch64-simd.md. -+ UNSPEC_BFMLALB ; Used in aarch64-sve.md. -+ UNSPEC_BFMLALT ; Used in aarch64-sve.md. -+ UNSPEC_BFMMLA ; Used in aarch64-sve.md. -+ UNSPEC_BFCVTN ; Used in aarch64-simd.md. -+ UNSPEC_BFCVTN2 ; Used in aarch64-simd.md. -+ UNSPEC_BFCVT ; Used in aarch64-simd.md. - ]) - - ;; ------------------------------------------------------------------ -@@ -586,6 +758,7 @@ - (V2SI "2") (V4SI "4") - (V2DI "2") - (V4HF "4") (V8HF "8") -+ (V4BF "4") (V8BF "8") - (V2SF "2") (V4SF "4") - (V1DF "1") (V2DF "2") - (DI "1") (DF "1")]) -@@ -610,6 +783,14 @@ - (define_mode_attr sizem1 [(QI "#7") (HI "#15") (SI "#31") (DI "#63") - (HF "#15") (SF "#31") (DF "#63")]) - -+;; The number of bits in a vector element, or controlled by a predicate -+;; element. -+(define_mode_attr elem_bits [(VNx16BI "8") (VNx8BI "16") -+ (VNx4BI "32") (VNx2BI "64") -+ (VNx16QI "8") (VNx8HI "16") -+ (VNx4SI "32") (VNx2DI "64") -+ (VNx8HF "16") (VNx4SF "32") (VNx2DF "64")]) -+ - ;; Attribute to describe constants acceptable in logical operations - (define_mode_attr lconst [(SI "K") (DI "L")]) - -@@ -624,6 +805,7 @@ - - (define_mode_attr Vtype [(V8QI "8b") (V16QI "16b") - (V4HI "4h") (V8HI "8h") -+ (V4BF "4h") (V8BF "8h") - (V2SI "2s") (V4SI "4s") - (DI "1d") (DF "1d") - (V2DI "2d") (V2SF "2s") -@@ -637,7 +819,8 @@ - (V4HI ".4h") (V8HI ".8h") - (V2SI ".2s") (V4SI ".4s") - (V2DI ".2d") (V4HF ".4h") -- (V8HF ".8h") (V2SF ".2s") -+ (V8HF ".8h") (V4BF ".4h") -+ (V8BF ".8h") (V2SF ".2s") - (V4SF ".4s") (V2DF ".2d") - (DI "") (SI "") - (HI "") (QI "") -@@ -655,9 +838,10 @@ - (V4HI "h") (V8HI "h") (VNx8HI "h") (VNx8BI "h") - (V2SI "s") (V4SI "s") (VNx4SI "s") (VNx4BI "s") - (V2DI "d") (VNx2DI "d") (VNx2BI "d") -- (V4HF "h") (V8HF "h") (VNx8HF "h") -+ (V4HF "h") (V8HF "h") (VNx8HF "h") (VNx8BF "h") - (V2SF "s") (V4SF "s") (VNx4SF "s") - (V2DF "d") (VNx2DF "d") -+ (BF "h") (V4BF "h") (V8BF "h") - (HF "h") - (SF "s") (DF "d") - (QI "b") (HI "h") -@@ -667,13 +851,17 @@ - (define_mode_attr Vetype_fourth [(VNx4SI "b") (VNx2DI "h")]) - - ;; Equivalent of "size" for a vector element. --(define_mode_attr Vesize [(VNx16QI "b") -- (VNx8HI "h") (VNx8HF "h") -- (VNx4SI "w") (VNx4SF "w") -+(define_mode_attr Vesize [(VNx16QI "b") (VNx8QI "b") -+ (VNx4QI "b") (VNx2QI "b") -+ (VNx8HI "h") (VNx4HI "h") -+ (VNx2HI "h") (VNx8HF "h") -+ (VNx4SI "w") (VNx2SI "w") (VNx4SF "w") - (VNx2DI "d") (VNx2DF "d") - (VNx32QI "b") (VNx48QI "b") (VNx64QI "b") - (VNx16HI "h") (VNx24HI "h") (VNx32HI "h") - (VNx16HF "h") (VNx24HF "h") (VNx32HF "h") -+ (VNx16BF "h") (VNx24BF "h") (VNx32BF "h") -+ (VNx8BF "h") - (VNx8SI "w") (VNx12SI "w") (VNx16SI "w") - (VNx8SF "w") (VNx12SF "w") (VNx16SF "w") - (VNx4DI "d") (VNx6DI "d") (VNx8DI "d") -@@ -697,13 +885,16 @@ - (V8HF "16b") (V2SF "8b") - (V4SF "16b") (V2DF "16b") - (DI "8b") (DF "8b") -- (SI "8b") (SF "8b")]) -+ (SI "8b") (SF "8b") -+ (V4BF "8b") (V8BF "16b")]) - - ;; Define element mode for each vector mode. - (define_mode_attr VEL [(V8QI "QI") (V16QI "QI") (VNx16QI "QI") - (V4HI "HI") (V8HI "HI") (VNx8HI "HI") - (V2SI "SI") (V4SI "SI") (VNx4SI "SI") -+ (VNx8BF "BF") - (DI "DI") (V2DI "DI") (VNx2DI "DI") -+ (V4BF "BF") (V8BF "BF") - (V4HF "HF") (V8HF "HF") (VNx8HF "HF") - (V2SF "SF") (V4SF "SF") (VNx4SF "SF") - (DF "DF") (V2DF "DF") (VNx2DF "DF") -@@ -713,8 +904,10 @@ - ;; Define element mode for each vector mode (lower case). - (define_mode_attr Vel [(V8QI "qi") (V16QI "qi") (VNx16QI "qi") - (V4HI "hi") (V8HI "hi") (VNx8HI "hi") -+ (VNx8BF "bf") - (V2SI "si") (V4SI "si") (VNx4SI "si") - (DI "di") (V2DI "di") (VNx2DI "di") -+ (V4BF "bf") (V8BF "bf") - (V4HF "hf") (V8HF "hf") (VNx8HF "hf") - (V2SF "sf") (V4SF "sf") (VNx4SF "sf") - (V2DF "df") (DF "df") (VNx2DF "df") -@@ -723,19 +916,19 @@ - - ;; Element mode with floating-point values replaced by like-sized integers. - (define_mode_attr VEL_INT [(VNx16QI "QI") -- (VNx8HI "HI") (VNx8HF "HI") -+ (VNx8HI "HI") (VNx8HF "HI") (VNx8BF "HI") - (VNx4SI "SI") (VNx4SF "SI") - (VNx2DI "DI") (VNx2DF "DI")]) - - ;; Gives the mode of the 128-bit lowpart of an SVE vector. - (define_mode_attr V128 [(VNx16QI "V16QI") -- (VNx8HI "V8HI") (VNx8HF "V8HF") -+ (VNx8HI "V8HI") (VNx8HF "V8HF") (VNx8BF "V8BF") - (VNx4SI "V4SI") (VNx4SF "V4SF") - (VNx2DI "V2DI") (VNx2DF "V2DF")]) - - ;; ...and again in lower case. - (define_mode_attr v128 [(VNx16QI "v16qi") -- (VNx8HI "v8hi") (VNx8HF "v8hf") -+ (VNx8HI "v8hi") (VNx8HF "v8hf") (VNx8BF "v8bf") - (VNx4SI "v4si") (VNx4SF "v4sf") - (VNx2DI "v2di") (VNx2DF "v2df")]) - -@@ -763,19 +956,20 @@ - (V2SI "SI") (V4SI "V2SI") - (V2DI "DI") (V2SF "SF") - (V4SF "V2SF") (V4HF "V2HF") -- (V8HF "V4HF") (V2DF "DF")]) -+ (V8HF "V4HF") (V2DF "DF") -+ (V8BF "V4BF")]) - - ;; Half modes of all vector modes, in lower-case. - (define_mode_attr Vhalf [(V8QI "v4qi") (V16QI "v8qi") - (V4HI "v2hi") (V8HI "v4hi") -- (V8HF "v4hf") -+ (V8HF "v4hf") (V8BF "v4bf") - (V2SI "si") (V4SI "v2si") - (V2DI "di") (V2SF "sf") - (V4SF "v2sf") (V2DF "df")]) - - ;; Double modes of vector modes. - (define_mode_attr VDBL [(V8QI "V16QI") (V4HI "V8HI") -- (V4HF "V8HF") -+ (V4HF "V8HF") (V4BF "V8BF") - (V2SI "V4SI") (V2SF "V4SF") - (SI "V2SI") (DI "V2DI") - (DF "V2DF")]) -@@ -785,7 +979,7 @@ - - ;; Double modes of vector modes (lower case). - (define_mode_attr Vdbl [(V8QI "v16qi") (V4HI "v8hi") -- (V4HF "v8hf") -+ (V4HF "v8hf") (V4BF "v8bf") - (V2SI "v4si") (V2SF "v4sf") - (SI "v2si") (DI "v2di") - (DF "v2df")]) -@@ -879,6 +1073,7 @@ - ;; variation on mapping FP modes to GP regs. - (define_mode_attr vwcore [(V8QI "w") (V16QI "w") (VNx16QI "w") - (V4HI "w") (V8HI "w") (VNx8HI "w") -+ (VNx8BF "w") - (V2SI "w") (V4SI "w") (VNx4SI "w") - (DI "x") (V2DI "x") (VNx2DI "x") - (V4HF "w") (V8HF "w") (VNx8HF "w") -@@ -894,12 +1089,14 @@ - (V2SI "V2SI") (V4SI "V4SI") - (DI "DI") (V2DI "V2DI") - (V4HF "V4HI") (V8HF "V8HI") -+ (V4BF "V4HI") (V8BF "V8HI") - (V2SF "V2SI") (V4SF "V4SI") - (DF "DI") (V2DF "V2DI") - (SF "SI") (SI "SI") - (HF "HI") - (VNx16QI "VNx16QI") - (VNx8HI "VNx8HI") (VNx8HF "VNx8HI") -+ (VNx8BF "VNx8HI") - (VNx4SI "VNx4SI") (VNx4SF "VNx4SI") - (VNx2DI "VNx2DI") (VNx2DF "VNx2DI") - ]) -@@ -910,19 +1107,25 @@ - (V2SI "v2si") (V4SI "v4si") - (DI "di") (V2DI "v2di") - (V4HF "v4hi") (V8HF "v8hi") -+ (V4BF "v4hi") (V8BF "v8hi") - (V2SF "v2si") (V4SF "v4si") - (DF "di") (V2DF "v2di") - (SF "si") - (VNx16QI "vnx16qi") - (VNx8HI "vnx8hi") (VNx8HF "vnx8hi") -+ (VNx8BF "vnx8hi") - (VNx4SI "vnx4si") (VNx4SF "vnx4si") - (VNx2DI "vnx2di") (VNx2DF "vnx2di") - ]) - - ;; Floating-point equivalent of selected modes. --(define_mode_attr V_FP_EQUIV [(VNx4SI "VNx4SF") (VNx4SF "VNx4SF") -+(define_mode_attr V_FP_EQUIV [(VNx8HI "VNx8HF") (VNx8HF "VNx8HF") -+ (VNx8BF "VNx8HF") -+ (VNx4SI "VNx4SF") (VNx4SF "VNx4SF") - (VNx2DI "VNx2DF") (VNx2DF "VNx2DF")]) --(define_mode_attr v_fp_equiv [(VNx4SI "vnx4sf") (VNx4SF "vnx4sf") -+(define_mode_attr v_fp_equiv [(VNx8HI "vnx8hf") (VNx8HF "vnx8hf") -+ (VNx8BF "vnx8hf") -+ (VNx4SI "vnx4sf") (VNx4SF "vnx4sf") - (VNx2DI "vnx2df") (VNx2DF "vnx2df")]) - - ;; Mode for vector conditional operations where the comparison has -@@ -976,6 +1179,7 @@ - - (define_mode_attr VSWAP_WIDTH [(V8QI "V16QI") (V16QI "V8QI") - (V4HI "V8HI") (V8HI "V4HI") -+ (V8BF "V4BF") (V4BF "V8BF") - (V2SI "V4SI") (V4SI "V2SI") - (DI "V2DI") (V2DI "DI") - (V2SF "V4SF") (V4SF "V2SF") -@@ -988,6 +1192,7 @@ - (DI "to_128") (V2DI "to_64") - (V4HF "to_128") (V8HF "to_64") - (V2SF "to_128") (V4SF "to_64") -+ (V4BF "to_128") (V8BF "to_64") - (DF "to_128") (V2DF "to_64")]) - - ;; For certain vector-by-element multiplication instructions we must -@@ -1021,9 +1226,11 @@ - ;; Defined to '_q' for 128-bit types. - (define_mode_attr q [(V8QI "") (V16QI "_q") - (V4HI "") (V8HI "_q") -+ (V4BF "") (V8BF "_q") - (V2SI "") (V4SI "_q") - (DI "") (V2DI "_q") - (V4HF "") (V8HF "_q") -+ (V4BF "") (V8BF "_q") - (V2SF "") (V4SF "_q") - (V2DF "_q") - (QI "") (HI "") (SI "") (DI "") (HF "") (SF "") (DF "")]) -@@ -1044,6 +1251,9 @@ - ;; Register suffix for DOTPROD input types from the return type. - (define_mode_attr Vdottype [(V2SI "8b") (V4SI "16b")]) - -+;; Register suffix for BFDOT input types from the return type. -+(define_mode_attr Vbfdottype [(V2SF "4h") (V4SF "8h")]) -+ - ;; Sum of lengths of instructions needed to move vector registers of a mode. - (define_mode_attr insn_count [(OI "8") (CI "12") (XI "16")]) - -@@ -1054,63 +1264,83 @@ - ;; Width of 2nd and 3rd arguments to fp16 vector multiply add/sub - (define_mode_attr VFMLA_W [(V2SF "V4HF") (V4SF "V8HF")]) - -+;; Width of 2nd and 3rd arguments to bf16 vector multiply add/sub -+(define_mode_attr VBFMLA_W [(V2SF "V4BF") (V4SF "V8BF")]) -+ - (define_mode_attr VFMLA_SEL_W [(V2SF "V2HF") (V4SF "V4HF")]) - - (define_mode_attr f16quad [(V2SF "") (V4SF "q")]) - -+(define_mode_attr isquadop [(V8QI "") (V16QI "q") (V4BF "") (V8BF "q")]) -+ - (define_code_attr f16mac [(plus "a") (minus "s")]) - - ;; Map smax to smin and umax to umin. - (define_code_attr max_opp [(smax "smin") (umax "umin")]) - -+;; Same as above, but louder. -+(define_code_attr MAX_OPP [(smax "SMIN") (umax "UMIN")]) -+ - ;; The number of subvectors in an SVE_STRUCT. - (define_mode_attr vector_count [(VNx32QI "2") (VNx16HI "2") - (VNx8SI "2") (VNx4DI "2") -+ (VNx16BF "2") - (VNx16HF "2") (VNx8SF "2") (VNx4DF "2") - (VNx48QI "3") (VNx24HI "3") - (VNx12SI "3") (VNx6DI "3") -+ (VNx24BF "3") - (VNx24HF "3") (VNx12SF "3") (VNx6DF "3") - (VNx64QI "4") (VNx32HI "4") - (VNx16SI "4") (VNx8DI "4") -+ (VNx32BF "4") - (VNx32HF "4") (VNx16SF "4") (VNx8DF "4")]) - - ;; The number of instruction bytes needed for an SVE_STRUCT move. This is - ;; equal to vector_count * 4. - (define_mode_attr insn_length [(VNx32QI "8") (VNx16HI "8") - (VNx8SI "8") (VNx4DI "8") -+ (VNx16BF "8") - (VNx16HF "8") (VNx8SF "8") (VNx4DF "8") - (VNx48QI "12") (VNx24HI "12") - (VNx12SI "12") (VNx6DI "12") -+ (VNx24BF "12") - (VNx24HF "12") (VNx12SF "12") (VNx6DF "12") - (VNx64QI "16") (VNx32HI "16") - (VNx16SI "16") (VNx8DI "16") -+ (VNx32BF "16") - (VNx32HF "16") (VNx16SF "16") (VNx8DF "16")]) - - ;; The type of a subvector in an SVE_STRUCT. - (define_mode_attr VSINGLE [(VNx32QI "VNx16QI") - (VNx16HI "VNx8HI") (VNx16HF "VNx8HF") -+ (VNx16BF "VNx8BF") - (VNx8SI "VNx4SI") (VNx8SF "VNx4SF") - (VNx4DI "VNx2DI") (VNx4DF "VNx2DF") - (VNx48QI "VNx16QI") - (VNx24HI "VNx8HI") (VNx24HF "VNx8HF") -+ (VNx24BF "VNx8BF") - (VNx12SI "VNx4SI") (VNx12SF "VNx4SF") - (VNx6DI "VNx2DI") (VNx6DF "VNx2DF") - (VNx64QI "VNx16QI") - (VNx32HI "VNx8HI") (VNx32HF "VNx8HF") -+ (VNx32BF "VNx8BF") - (VNx16SI "VNx4SI") (VNx16SF "VNx4SF") - (VNx8DI "VNx2DI") (VNx8DF "VNx2DF")]) - - ;; ...and again in lower case. - (define_mode_attr vsingle [(VNx32QI "vnx16qi") - (VNx16HI "vnx8hi") (VNx16HF "vnx8hf") -+ (VNx16BF "vnx8bf") - (VNx8SI "vnx4si") (VNx8SF "vnx4sf") - (VNx4DI "vnx2di") (VNx4DF "vnx2df") - (VNx48QI "vnx16qi") - (VNx24HI "vnx8hi") (VNx24HF "vnx8hf") -+ (VNx24BF "vnx8bf") - (VNx12SI "vnx4si") (VNx12SF "vnx4sf") - (VNx6DI "vnx2di") (VNx6DF "vnx2df") - (VNx64QI "vnx16qi") - (VNx32HI "vnx8hi") (VNx32HF "vnx8hf") -+ (VNx32BF "vnx8bf") - (VNx16SI "vnx4si") (VNx16SF "vnx4sf") - (VNx8DI "vnx2di") (VNx8DF "vnx2df")]) - -@@ -1118,36 +1348,44 @@ - ;; this is equivalent to the of the subvector mode. - (define_mode_attr VPRED [(VNx16QI "VNx16BI") - (VNx8HI "VNx8BI") (VNx8HF "VNx8BI") -+ (VNx8BF "VNx8BI") - (VNx4SI "VNx4BI") (VNx4SF "VNx4BI") - (VNx2DI "VNx2BI") (VNx2DF "VNx2BI") - (VNx32QI "VNx16BI") - (VNx16HI "VNx8BI") (VNx16HF "VNx8BI") -+ (VNx16BF "VNx8BI") - (VNx8SI "VNx4BI") (VNx8SF "VNx4BI") - (VNx4DI "VNx2BI") (VNx4DF "VNx2BI") - (VNx48QI "VNx16BI") - (VNx24HI "VNx8BI") (VNx24HF "VNx8BI") -+ (VNx24BF "VNx8BI") - (VNx12SI "VNx4BI") (VNx12SF "VNx4BI") - (VNx6DI "VNx2BI") (VNx6DF "VNx2BI") - (VNx64QI "VNx16BI") - (VNx32HI "VNx8BI") (VNx32HF "VNx8BI") -+ (VNx32BF "VNx8BI") - (VNx16SI "VNx4BI") (VNx16SF "VNx4BI") - (VNx8DI "VNx2BI") (VNx8DF "VNx2BI")]) - - ;; ...and again in lower case. - (define_mode_attr vpred [(VNx16QI "vnx16bi") - (VNx8HI "vnx8bi") (VNx8HF "vnx8bi") -+ (VNx8BF "vnx8bi") - (VNx4SI "vnx4bi") (VNx4SF "vnx4bi") - (VNx2DI "vnx2bi") (VNx2DF "vnx2bi") - (VNx32QI "vnx16bi") - (VNx16HI "vnx8bi") (VNx16HF "vnx8bi") -+ (VNx16BF "vnx8bi") - (VNx8SI "vnx4bi") (VNx8SF "vnx4bi") - (VNx4DI "vnx2bi") (VNx4DF "vnx2bi") - (VNx48QI "vnx16bi") - (VNx24HI "vnx8bi") (VNx24HF "vnx8bi") -+ (VNx24BF "vnx8bi") - (VNx12SI "vnx4bi") (VNx12SF "vnx4bi") - (VNx6DI "vnx2bi") (VNx6DF "vnx2bi") - (VNx64QI "vnx16bi") - (VNx32HI "vnx8bi") (VNx32HF "vnx4bi") -+ (VNx32BF "vnx8bi") - (VNx16SI "vnx4bi") (VNx16SF "vnx4bi") - (VNx8DI "vnx2bi") (VNx8DF "vnx2bi")]) - -@@ -1158,6 +1396,30 @@ - (V4HF "[%4]") (V8HF "[%4]") - ]) - -+;; The number of bytes controlled by a predicate -+(define_mode_attr data_bytes [(VNx16BI "1") (VNx8BI "2") -+ (VNx4BI "4") (VNx2BI "8")]) -+ -+;; Two-nybble mask for partial vector modes: nunits, byte size. -+(define_mode_attr self_mask [(VNx8QI "0x81") -+ (VNx4QI "0x41") -+ (VNx2QI "0x21") -+ (VNx4HI "0x42") -+ (VNx2HI "0x22") -+ (VNx2SI "0x24")]) -+ -+;; For full vector modes, the mask of narrower modes, encoded as above. -+(define_mode_attr narrower_mask [(VNx8HI "0x81") -+ (VNx4SI "0x43") -+ (VNx2DI "0x27")]) -+ -+;; The constraint to use for an SVE [SU]DOT, FMUL, FMLA or FMLS lane index. -+(define_mode_attr sve_lane_con [(VNx4SI "y") (VNx2DI "x") -+ (VNx8HF "y") (VNx4SF "y") (VNx2DF "x")]) -+ -+;; The constraint to use for an SVE FCMLA lane index. -+(define_mode_attr sve_lane_pair_con [(VNx8HF "y") (VNx4SF "x")]) -+ - ;; ------------------------------------------------------------------- - ;; Code Iterators - ;; ------------------------------------------------------------------- -@@ -1168,6 +1430,8 @@ - ;; This code iterator allows the shifts supported in arithmetic instructions - (define_code_iterator ASHIFT [ashift ashiftrt lshiftrt]) - -+(define_code_iterator SHIFTRT [ashiftrt lshiftrt]) -+ - ;; Code iterator for logical operations - (define_code_iterator LOGICAL [and ior xor]) - -@@ -1214,7 +1478,7 @@ - ;; Signed and unsigned max operations. - (define_code_iterator USMAX [smax umax]) - --;; Code iterator for variants of vector max and min. -+;; Code iterator for plus and minus. - (define_code_iterator ADDSUB [plus minus]) - - ;; Code iterator for variants of vector saturating binary ops. -@@ -1226,6 +1490,21 @@ - ;; Code iterator for signed variants of vector saturating binary ops. - (define_code_iterator SBINQOPS [ss_plus ss_minus]) - -+;; Code iterator for unsigned variants of vector saturating binary ops. -+(define_code_iterator UBINQOPS [us_plus us_minus]) -+ -+;; Modular and saturating addition. -+(define_code_iterator ANY_PLUS [plus ss_plus us_plus]) -+ -+;; Saturating addition. -+(define_code_iterator SAT_PLUS [ss_plus us_plus]) -+ -+;; Modular and saturating subtraction. -+(define_code_iterator ANY_MINUS [minus ss_minus us_minus]) -+ -+;; Saturating subtraction. -+(define_code_iterator SAT_MINUS [ss_minus us_minus]) -+ - ;; Comparison operators for CM. - (define_code_iterator COMPARISONS [lt le eq ge gt]) - -@@ -1236,27 +1515,25 @@ - (define_code_iterator FAC_COMPARISONS [lt le ge gt]) - - ;; SVE integer unary operations. --(define_code_iterator SVE_INT_UNARY [abs neg not popcount]) -- --;; SVE floating-point unary operations. --(define_code_iterator SVE_FP_UNARY [abs neg sqrt]) -+(define_code_iterator SVE_INT_UNARY [abs neg not clrsb clz popcount]) - - ;; SVE integer binary operations. - (define_code_iterator SVE_INT_BINARY [plus minus mult smax umax smin umin -+ ashift ashiftrt lshiftrt - and ior xor]) - - ;; SVE integer binary division operations. - (define_code_iterator SVE_INT_BINARY_SD [div udiv]) - -+;; SVE integer binary operations that have an immediate form. -+(define_code_iterator SVE_INT_BINARY_IMM [mult smax smin umax umin]) -+ - ;; SVE floating-point operations with an unpredicated all-register form. - (define_code_iterator SVE_UNPRED_FP_BINARY [plus minus mult]) - - ;; SVE integer comparisons. - (define_code_iterator SVE_INT_CMP [lt le eq ne ge gt ltu leu geu gtu]) - --;; SVE floating-point comparisons. --(define_code_iterator SVE_FP_CMP [lt le eq ne ge gt]) -- - ;; ------------------------------------------------------------------- - ;; Code Attributes - ;; ------------------------------------------------------------------- -@@ -1273,6 +1550,8 @@ - (unsigned_fix "fixuns") - (float "float") - (unsigned_float "floatuns") -+ (clrsb "clrsb") -+ (clz "clz") - (popcount "popcount") - (and "and") - (ior "ior") -@@ -1304,8 +1583,7 @@ - (leu "leu") - (geu "geu") - (gtu "gtu") -- (abs "abs") -- (sqrt "sqrt")]) -+ (abs "abs")]) - - ;; For comparison operators we use the FCM* and CM* instructions. - ;; As there are no CMLE or CMLT instructions which act on 3 vector -@@ -1350,6 +1628,9 @@ - (define_code_attr shift [(ashift "lsl") (ashiftrt "asr") - (lshiftrt "lsr") (rotatert "ror")]) - -+;; Op prefix for shift right and accumulate. -+(define_code_attr sra_op [(ashiftrt "s") (lshiftrt "u")]) -+ - ;; Map shift operators onto underlying bit-field instructions - (define_code_attr bfshift [(ashift "ubfiz") (ashiftrt "sbfx") - (lshiftrt "ubfx") (rotatert "extr")]) -@@ -1374,6 +1655,15 @@ - (smax "s") (umax "u") - (smin "s") (umin "u")]) - -+;; "s" for signed ops, empty for unsigned ones. -+(define_code_attr s [(sign_extend "s") (zero_extend "")]) -+ -+;; Map signed/unsigned ops to the corresponding extension. -+(define_code_attr paired_extend [(ss_plus "sign_extend") -+ (us_plus "zero_extend") -+ (ss_minus "sign_extend") -+ (us_minus "zero_extend")]) -+ - ;; Whether a shift is left or right. - (define_code_attr lr [(ashift "l") (ashiftrt "r") (lshiftrt "r")]) - -@@ -1434,35 +1724,45 @@ - (smax "smax") - (umin "umin") - (umax "umax") -+ (ashift "lsl") -+ (ashiftrt "asr") -+ (lshiftrt "lsr") - (and "and") - (ior "orr") - (xor "eor") - (not "not") -+ (clrsb "cls") -+ (clz "clz") - (popcount "cnt")]) - - (define_code_attr sve_int_op_rev [(plus "add") -- (minus "subr") -- (mult "mul") -- (div "sdivr") -- (udiv "udivr") -- (smin "smin") -- (smax "smax") -- (umin "umin") -- (umax "umax") -- (and "and") -- (ior "orr") -- (xor "eor")]) -+ (minus "subr") -+ (mult "mul") -+ (div "sdivr") -+ (udiv "udivr") -+ (smin "smin") -+ (smax "smax") -+ (umin "umin") -+ (umax "umax") -+ (ashift "lslr") -+ (ashiftrt "asrr") -+ (lshiftrt "lsrr") -+ (and "and") -+ (ior "orr") -+ (xor "eor")]) - - ;; The floating-point SVE instruction that implements an rtx code. - (define_code_attr sve_fp_op [(plus "fadd") - (minus "fsub") -- (mult "fmul") -- (neg "fneg") -- (abs "fabs") -- (sqrt "fsqrt")]) -+ (mult "fmul")]) - - ;; The SVE immediate constraint to use for an rtl code. --(define_code_attr sve_imm_con [(eq "vsc") -+(define_code_attr sve_imm_con [(mult "vsm") -+ (smax "vsm") -+ (smin "vsm") -+ (umax "vsb") -+ (umin "vsb") -+ (eq "vsc") - (ne "vsc") - (lt "vsc") - (ge "vsc") -@@ -1473,6 +1773,33 @@ - (geu "vsd") - (gtu "vsd")]) - -+;; The prefix letter to use when printing an immediate operand. -+(define_code_attr sve_imm_prefix [(mult "") -+ (smax "") -+ (smin "") -+ (umax "D") -+ (umin "D")]) -+ -+;; The predicate to use for the second input operand in a cond_ -+;; pattern. -+(define_code_attr sve_pred_int_rhs2_operand -+ [(plus "register_operand") -+ (minus "register_operand") -+ (mult "register_operand") -+ (smax "register_operand") -+ (umax "register_operand") -+ (smin "register_operand") -+ (umin "register_operand") -+ (ashift "aarch64_sve_lshift_operand") -+ (ashiftrt "aarch64_sve_rshift_operand") -+ (lshiftrt "aarch64_sve_rshift_operand") -+ (and "aarch64_sve_pred_and_operand") -+ (ior "register_operand") -+ (xor "register_operand")]) -+ -+(define_code_attr inc_dec [(minus "dec") (ss_minus "sqdec") (us_minus "uqdec") -+ (plus "inc") (ss_plus "sqinc") (us_plus "uqinc")]) -+ - ;; ------------------------------------------------------------------- - ;; Int Iterators. - ;; ------------------------------------------------------------------- -@@ -1492,7 +1819,7 @@ - (define_int_iterator FMAXMINV [UNSPEC_FMAXV UNSPEC_FMINV - UNSPEC_FMAXNMV UNSPEC_FMINNMV]) - --(define_int_iterator BITWISEV [UNSPEC_ANDV UNSPEC_IORV UNSPEC_XORV]) -+(define_int_iterator SVE_INT_ADDV [UNSPEC_SADDV UNSPEC_UADDV]) - - (define_int_iterator LOGICALF [UNSPEC_ANDF UNSPEC_IORF UNSPEC_XORF]) - -@@ -1505,8 +1832,20 @@ - - (define_int_iterator RHADD [UNSPEC_SRHADD UNSPEC_URHADD]) - -+(define_int_iterator MULLBT [UNSPEC_SMULLB UNSPEC_UMULLB -+ UNSPEC_SMULLT UNSPEC_UMULLT]) -+ -+(define_int_iterator SHRNB [UNSPEC_SHRNB UNSPEC_RSHRNB]) -+ -+(define_int_iterator SHRNT [UNSPEC_SHRNT UNSPEC_RSHRNT]) -+ -+(define_int_iterator BSL_DUP [1 2]) -+ - (define_int_iterator DOTPROD [UNSPEC_SDOT UNSPEC_UDOT]) - -+(define_int_iterator DOTPROD_I8MM [UNSPEC_USDOT UNSPEC_SUDOT]) -+(define_int_iterator DOTPROD_US_ONLY [UNSPEC_USDOT]) -+ - (define_int_iterator ADDSUBHN [UNSPEC_ADDHN UNSPEC_RADDHN - UNSPEC_SUBHN UNSPEC_RSUBHN]) - -@@ -1516,12 +1855,17 @@ - (define_int_iterator FMAXMIN_UNS [UNSPEC_FMAX UNSPEC_FMIN - UNSPEC_FMAXNM UNSPEC_FMINNM]) - --(define_int_iterator PAUTH_LR_SP [UNSPEC_PACISP UNSPEC_AUTISP]) -+(define_int_iterator PAUTH_LR_SP [UNSPEC_PACIASP UNSPEC_AUTIASP -+ UNSPEC_PACIBSP UNSPEC_AUTIBSP]) - --(define_int_iterator PAUTH_17_16 [UNSPEC_PACI1716 UNSPEC_AUTI1716]) -+(define_int_iterator PAUTH_17_16 [UNSPEC_PACIA1716 UNSPEC_AUTIA1716 -+ UNSPEC_PACIB1716 UNSPEC_AUTIB1716]) - - (define_int_iterator VQDMULH [UNSPEC_SQDMULH UNSPEC_SQRDMULH]) - -+(define_int_iterator MULHRS [UNSPEC_SMULHS UNSPEC_UMULHS -+ UNSPEC_SMULHRS UNSPEC_UMULHRS]) -+ - (define_int_iterator USSUQADD [UNSPEC_SUQADD UNSPEC_USQADD]) - - (define_int_iterator SUQMOVN [UNSPEC_SQXTN UNSPEC_UQXTN]) -@@ -1555,6 +1899,10 @@ - UNSPEC_TRN1 UNSPEC_TRN2 - UNSPEC_UZP1 UNSPEC_UZP2]) - -+(define_int_iterator PERMUTEQ [UNSPEC_ZIP1Q UNSPEC_ZIP2Q -+ UNSPEC_TRN1Q UNSPEC_TRN2Q -+ UNSPEC_UZP1Q UNSPEC_UZP2Q]) -+ - (define_int_iterator OPTAB_PERMUTE [UNSPEC_ZIP1 UNSPEC_ZIP2 - UNSPEC_UZP1 UNSPEC_UZP2]) - -@@ -1601,18 +1949,144 @@ - - (define_int_iterator MUL_HIGHPART [UNSPEC_SMUL_HIGHPART UNSPEC_UMUL_HIGHPART]) - --(define_int_iterator SVE_COND_FP_BINARY [UNSPEC_COND_ADD UNSPEC_COND_SUB -- UNSPEC_COND_MUL UNSPEC_COND_DIV -- UNSPEC_COND_MAX UNSPEC_COND_MIN]) -+(define_int_iterator CLAST [UNSPEC_CLASTA UNSPEC_CLASTB]) -+ -+(define_int_iterator LAST [UNSPEC_LASTA UNSPEC_LASTB]) -+ -+(define_int_iterator SVE_INT_UNARY [UNSPEC_RBIT UNSPEC_REVB -+ UNSPEC_REVH UNSPEC_REVW]) -+ -+(define_int_iterator SVE_FP_UNARY [UNSPEC_FRECPE UNSPEC_RSQRTE]) -+ -+(define_int_iterator SVE_FP_UNARY_INT [UNSPEC_FEXPA]) -+ -+(define_int_iterator SVE_FP_BINARY [UNSPEC_FRECPS UNSPEC_RSQRTS]) -+ -+(define_int_iterator SVE_FP_BINARY_INT [UNSPEC_FTSMUL UNSPEC_FTSSEL]) -+ -+(define_int_iterator SVE_BFLOAT_TERNARY_LONG [UNSPEC_BFDOT -+ UNSPEC_BFMLALB -+ UNSPEC_BFMLALT -+ UNSPEC_BFMMLA]) -+ -+(define_int_iterator SVE_BFLOAT_TERNARY_LONG_LANE [UNSPEC_BFDOT -+ UNSPEC_BFMLALB -+ UNSPEC_BFMLALT]) -+ -+(define_int_iterator SVE_INT_REDUCTION [UNSPEC_ANDV -+ UNSPEC_IORV -+ UNSPEC_SMAXV -+ UNSPEC_SMINV -+ UNSPEC_UMAXV -+ UNSPEC_UMINV -+ UNSPEC_XORV]) -+ -+(define_int_iterator SVE_FP_REDUCTION [UNSPEC_FADDV -+ UNSPEC_FMAXV -+ UNSPEC_FMAXNMV -+ UNSPEC_FMINV -+ UNSPEC_FMINNMV]) -+ -+(define_int_iterator SVE_COND_FP_UNARY [UNSPEC_COND_FABS -+ UNSPEC_COND_FNEG -+ UNSPEC_COND_FRECPX -+ UNSPEC_COND_FRINTA -+ UNSPEC_COND_FRINTI -+ UNSPEC_COND_FRINTM -+ UNSPEC_COND_FRINTN -+ UNSPEC_COND_FRINTP -+ UNSPEC_COND_FRINTX -+ UNSPEC_COND_FRINTZ -+ UNSPEC_COND_FSQRT]) -+ -+(define_int_iterator SVE_COND_FCVT [UNSPEC_COND_FCVT]) -+(define_int_iterator SVE_COND_FCVTI [UNSPEC_COND_FCVTZS UNSPEC_COND_FCVTZU]) -+(define_int_iterator SVE_COND_ICVTF [UNSPEC_COND_SCVTF UNSPEC_COND_UCVTF]) -+ -+(define_int_iterator SVE_COND_FP_BINARY [UNSPEC_COND_FADD -+ UNSPEC_COND_FDIV -+ UNSPEC_COND_FMAX -+ UNSPEC_COND_FMAXNM -+ UNSPEC_COND_FMIN -+ UNSPEC_COND_FMINNM -+ UNSPEC_COND_FMUL -+ UNSPEC_COND_FMULX -+ UNSPEC_COND_FSUB]) -+ -+(define_int_iterator SVE_COND_FP_BINARY_INT [UNSPEC_COND_FSCALE]) -+ -+(define_int_iterator SVE_COND_FP_ADD [UNSPEC_COND_FADD]) -+(define_int_iterator SVE_COND_FP_SUB [UNSPEC_COND_FSUB]) -+(define_int_iterator SVE_COND_FP_MUL [UNSPEC_COND_FMUL]) -+ -+(define_int_iterator SVE_COND_FP_BINARY_I1 [UNSPEC_COND_FMAX -+ UNSPEC_COND_FMAXNM -+ UNSPEC_COND_FMIN -+ UNSPEC_COND_FMINNM -+ UNSPEC_COND_FMUL]) -+ -+(define_int_iterator SVE_COND_FP_BINARY_REG [UNSPEC_COND_FDIV -+ UNSPEC_COND_FMULX]) -+ -+(define_int_iterator SVE_COND_FCADD [UNSPEC_COND_FCADD90 -+ UNSPEC_COND_FCADD270]) -+ -+(define_int_iterator SVE_COND_FP_MAXMIN [UNSPEC_COND_FMAX -+ UNSPEC_COND_FMAXNM -+ UNSPEC_COND_FMIN -+ UNSPEC_COND_FMINNM]) -+ -+;; Floating-point max/min operations that correspond to optabs, -+;; as opposed to those that are internal to the port. -+(define_int_iterator SVE_COND_FP_MAXMIN_PUBLIC [UNSPEC_COND_FMAXNM -+ UNSPEC_COND_FMINNM]) - - (define_int_iterator SVE_COND_FP_TERNARY [UNSPEC_COND_FMLA - UNSPEC_COND_FMLS - UNSPEC_COND_FNMLA - UNSPEC_COND_FNMLS]) - --(define_int_iterator SVE_COND_FP_CMP [UNSPEC_COND_LT UNSPEC_COND_LE -- UNSPEC_COND_EQ UNSPEC_COND_NE -- UNSPEC_COND_GE UNSPEC_COND_GT]) -+(define_int_iterator SVE_COND_FCMLA [UNSPEC_COND_FCMLA -+ UNSPEC_COND_FCMLA90 -+ UNSPEC_COND_FCMLA180 -+ UNSPEC_COND_FCMLA270]) -+ -+(define_int_iterator SVE_COND_INT_CMP_WIDE [UNSPEC_COND_CMPEQ_WIDE -+ UNSPEC_COND_CMPGE_WIDE -+ UNSPEC_COND_CMPGT_WIDE -+ UNSPEC_COND_CMPHI_WIDE -+ UNSPEC_COND_CMPHS_WIDE -+ UNSPEC_COND_CMPLE_WIDE -+ UNSPEC_COND_CMPLO_WIDE -+ UNSPEC_COND_CMPLS_WIDE -+ UNSPEC_COND_CMPLT_WIDE -+ UNSPEC_COND_CMPNE_WIDE]) -+ -+;; SVE FP comparisons that accept #0.0. -+(define_int_iterator SVE_COND_FP_CMP_I0 [UNSPEC_COND_FCMEQ -+ UNSPEC_COND_FCMGE -+ UNSPEC_COND_FCMGT -+ UNSPEC_COND_FCMLE -+ UNSPEC_COND_FCMLT -+ UNSPEC_COND_FCMNE]) -+ -+(define_int_iterator SVE_COND_FP_ABS_CMP [UNSPEC_COND_FCMGE -+ UNSPEC_COND_FCMGT -+ UNSPEC_COND_FCMLE -+ UNSPEC_COND_FCMLT]) -+ -+(define_int_iterator SVE_FP_TERNARY_LANE [UNSPEC_FMLA UNSPEC_FMLS]) -+ -+(define_int_iterator SVE_CFP_TERNARY_LANE [UNSPEC_FCMLA UNSPEC_FCMLA90 -+ UNSPEC_FCMLA180 UNSPEC_FCMLA270]) -+ -+(define_int_iterator SVE_WHILE [UNSPEC_WHILELE UNSPEC_WHILELO UNSPEC_WHILELS UNSPEC_WHILELT]) -+ -+(define_int_iterator SVE_SHIFT_WIDE [UNSPEC_ASHIFT_WIDE -+ UNSPEC_ASHIFTRT_WIDE -+ UNSPEC_LSHIFTRT_WIDE]) -+ -+(define_int_iterator SVE_LDFF1_LDNF1 [UNSPEC_LDFF1 UNSPEC_LDNF1]) - - (define_int_iterator FCADD [UNSPEC_FCADD90 - UNSPEC_FCADD270]) -@@ -1622,6 +2096,23 @@ - UNSPEC_FCMLA180 - UNSPEC_FCMLA270]) - -+(define_int_iterator FRINTNZX [UNSPEC_FRINT32Z UNSPEC_FRINT32X -+ UNSPEC_FRINT64Z UNSPEC_FRINT64X]) -+ -+(define_int_iterator SVE_BRK_UNARY [UNSPEC_BRKA UNSPEC_BRKB]) -+ -+(define_int_iterator SVE_BRK_BINARY [UNSPEC_BRKN UNSPEC_BRKPA UNSPEC_BRKPB]) -+ -+(define_int_iterator SVE_PITER [UNSPEC_PFIRST UNSPEC_PNEXT]) -+ -+(define_int_iterator MATMUL [UNSPEC_SMATMUL UNSPEC_UMATMUL -+ UNSPEC_USMATMUL]) -+ -+(define_int_iterator FMMLA [UNSPEC_FMMLA]) -+ -+(define_int_iterator BF_MLA [UNSPEC_BFMLALB -+ UNSPEC_BFMLALT]) -+ - ;; Iterators for atomic operations. - - (define_int_iterator ATOMIC_LDOP -@@ -1646,19 +2137,84 @@ - (define_int_attr optab [(UNSPEC_ANDF "and") - (UNSPEC_IORF "ior") - (UNSPEC_XORF "xor") -+ (UNSPEC_SADDV "sadd") -+ (UNSPEC_UADDV "uadd") - (UNSPEC_ANDV "and") - (UNSPEC_IORV "ior") - (UNSPEC_XORV "xor") -- (UNSPEC_COND_ADD "add") -- (UNSPEC_COND_SUB "sub") -- (UNSPEC_COND_MUL "mul") -- (UNSPEC_COND_DIV "div") -- (UNSPEC_COND_MAX "smax") -- (UNSPEC_COND_MIN "smin") -+ (UNSPEC_FRECPE "frecpe") -+ (UNSPEC_FRECPS "frecps") -+ (UNSPEC_RSQRTE "frsqrte") -+ (UNSPEC_RSQRTS "frsqrts") -+ (UNSPEC_RBIT "rbit") -+ (UNSPEC_REVB "revb") -+ (UNSPEC_REVH "revh") -+ (UNSPEC_REVW "revw") -+ (UNSPEC_UMAXV "umax") -+ (UNSPEC_UMINV "umin") -+ (UNSPEC_SMAXV "smax") -+ (UNSPEC_SMINV "smin") -+ (UNSPEC_FADDV "plus") -+ (UNSPEC_FMAXNMV "smax") -+ (UNSPEC_FMAXV "smax_nan") -+ (UNSPEC_FMINNMV "smin") -+ (UNSPEC_FMINV "smin_nan") -+ (UNSPEC_SMUL_HIGHPART "smulh") -+ (UNSPEC_UMUL_HIGHPART "umulh") -+ (UNSPEC_FMLA "fma") -+ (UNSPEC_FMLS "fnma") -+ (UNSPEC_FCMLA "fcmla") -+ (UNSPEC_FCMLA90 "fcmla90") -+ (UNSPEC_FCMLA180 "fcmla180") -+ (UNSPEC_FCMLA270 "fcmla270") -+ (UNSPEC_FEXPA "fexpa") -+ (UNSPEC_FTSMUL "ftsmul") -+ (UNSPEC_FTSSEL "ftssel") -+ (UNSPEC_SMATMUL "smatmul") -+ (UNSPEC_TRN1Q "trn1q") -+ (UNSPEC_TRN2Q "trn2q") -+ (UNSPEC_UMATMUL "umatmul") -+ (UNSPEC_USMATMUL "usmatmul") -+ (UNSPEC_UZP1Q "uzp1q") -+ (UNSPEC_UZP2Q "uzp2q") -+ (UNSPEC_ZIP1Q "zip1q") -+ (UNSPEC_ZIP2Q "zip2q") -+ (UNSPEC_COND_FABS "abs") -+ (UNSPEC_COND_FADD "add") -+ (UNSPEC_COND_FCADD90 "cadd90") -+ (UNSPEC_COND_FCADD270 "cadd270") -+ (UNSPEC_COND_FCMLA "fcmla") -+ (UNSPEC_COND_FCMLA90 "fcmla90") -+ (UNSPEC_COND_FCMLA180 "fcmla180") -+ (UNSPEC_COND_FCMLA270 "fcmla270") -+ (UNSPEC_COND_FCVT "fcvt") -+ (UNSPEC_COND_FCVTZS "fix_trunc") -+ (UNSPEC_COND_FCVTZU "fixuns_trunc") -+ (UNSPEC_COND_FDIV "div") -+ (UNSPEC_COND_FMAX "smax_nan") -+ (UNSPEC_COND_FMAXNM "smax") -+ (UNSPEC_COND_FMIN "smin_nan") -+ (UNSPEC_COND_FMINNM "smin") - (UNSPEC_COND_FMLA "fma") - (UNSPEC_COND_FMLS "fnma") -+ (UNSPEC_COND_FMUL "mul") -+ (UNSPEC_COND_FMULX "mulx") -+ (UNSPEC_COND_FNEG "neg") - (UNSPEC_COND_FNMLA "fnms") -- (UNSPEC_COND_FNMLS "fms")]) -+ (UNSPEC_COND_FNMLS "fms") -+ (UNSPEC_COND_FRECPX "frecpx") -+ (UNSPEC_COND_FRINTA "round") -+ (UNSPEC_COND_FRINTI "nearbyint") -+ (UNSPEC_COND_FRINTM "floor") -+ (UNSPEC_COND_FRINTN "frintn") -+ (UNSPEC_COND_FRINTP "ceil") -+ (UNSPEC_COND_FRINTX "rint") -+ (UNSPEC_COND_FRINTZ "btrunc") -+ (UNSPEC_COND_FSCALE "fscale") -+ (UNSPEC_COND_FSQRT "sqrt") -+ (UNSPEC_COND_FSUB "sub") -+ (UNSPEC_COND_SCVTF "float") -+ (UNSPEC_COND_UCVTF "floatuns")]) - - (define_int_attr maxmin_uns [(UNSPEC_UMAXV "umax") - (UNSPEC_UMINV "umin") -@@ -1671,7 +2227,11 @@ - (UNSPEC_FMINNMV "smin") - (UNSPEC_FMINV "smin_nan") - (UNSPEC_FMAXNM "fmax") -- (UNSPEC_FMINNM "fmin")]) -+ (UNSPEC_FMINNM "fmin") -+ (UNSPEC_COND_FMAX "fmax_nan") -+ (UNSPEC_COND_FMAXNM "fmax") -+ (UNSPEC_COND_FMIN "fmin_nan") -+ (UNSPEC_COND_FMINNM "fmin")]) - - (define_int_attr maxmin_uns_op [(UNSPEC_UMAXV "umax") - (UNSPEC_UMINV "umin") -@@ -1686,22 +2246,41 @@ - (UNSPEC_FMAXNM "fmaxnm") - (UNSPEC_FMINNM "fminnm")]) - --(define_int_attr bit_reduc_op [(UNSPEC_ANDV "andv") -- (UNSPEC_IORV "orv") -- (UNSPEC_XORV "eorv")]) -+(define_code_attr binqops_op [(ss_plus "sqadd") -+ (us_plus "uqadd") -+ (ss_minus "sqsub") -+ (us_minus "uqsub")]) -+ -+(define_code_attr binqops_op_rev [(ss_plus "sqsub") -+ (ss_minus "sqadd")]) - - ;; The SVE logical instruction that implements an unspec. - (define_int_attr logicalf_op [(UNSPEC_ANDF "and") - (UNSPEC_IORF "orr") - (UNSPEC_XORF "eor")]) - -+(define_int_attr last_op [(UNSPEC_CLASTA "after_last") -+ (UNSPEC_CLASTB "last") -+ (UNSPEC_LASTA "after_last") -+ (UNSPEC_LASTB "last")]) -+ - ;; "s" for signed operations and "u" for unsigned ones. --(define_int_attr su [(UNSPEC_UNPACKSHI "s") -+(define_int_attr su [(UNSPEC_SADDV "s") -+ (UNSPEC_UADDV "u") -+ (UNSPEC_UNPACKSHI "s") - (UNSPEC_UNPACKUHI "u") - (UNSPEC_UNPACKSLO "s") - (UNSPEC_UNPACKULO "u") - (UNSPEC_SMUL_HIGHPART "s") -- (UNSPEC_UMUL_HIGHPART "u")]) -+ (UNSPEC_UMUL_HIGHPART "u") -+ (UNSPEC_COND_FCVTZS "s") -+ (UNSPEC_COND_FCVTZU "u") -+ (UNSPEC_COND_SCVTF "s") -+ (UNSPEC_COND_UCVTF "u") -+ (UNSPEC_SMULLB "s") (UNSPEC_UMULLB "u") -+ (UNSPEC_SMULLT "s") (UNSPEC_UMULLT "u") -+ (UNSPEC_SMULHS "s") (UNSPEC_UMULHS "u") -+ (UNSPEC_SMULHRS "s") (UNSPEC_UMULHRS "u")]) - - (define_int_attr sur [(UNSPEC_SHADD "s") (UNSPEC_UHADD "u") - (UNSPEC_SRHADD "sr") (UNSPEC_URHADD "ur") -@@ -1731,6 +2310,9 @@ - (UNSPEC_URSHL "ur") (UNSPEC_SRSHL "sr") - (UNSPEC_UQRSHL "u") (UNSPEC_SQRSHL "s") - (UNSPEC_SDOT "s") (UNSPEC_UDOT "u") -+ (UNSPEC_USDOT "us") (UNSPEC_SUDOT "su") -+ (UNSPEC_SMATMUL "s") (UNSPEC_UMATMUL "u") -+ (UNSPEC_USMATMUL "us") - ]) - - (define_int_attr r [(UNSPEC_SQDMULH "") (UNSPEC_SQRDMULH "r") -@@ -1739,6 +2321,10 @@ - (UNSPEC_SQRSHRN "r") (UNSPEC_UQRSHRN "r") - (UNSPEC_SQSHL "") (UNSPEC_UQSHL "") - (UNSPEC_SQRSHL "r")(UNSPEC_UQRSHL "r") -+ (UNSPEC_SHRNB "") (UNSPEC_SHRNT "") -+ (UNSPEC_RSHRNB "r") (UNSPEC_RSHRNT "r") -+ (UNSPEC_SMULHS "") (UNSPEC_UMULHS "") -+ (UNSPEC_SMULHRS "r") (UNSPEC_UMULHRS "r") - ]) - - (define_int_attr lr [(UNSPEC_SSLI "l") (UNSPEC_USLI "l") -@@ -1751,6 +2337,13 @@ - (UNSPEC_SHADD "") (UNSPEC_UHADD "u") - (UNSPEC_SRHADD "") (UNSPEC_URHADD "u")]) - -+(define_int_attr fn [(UNSPEC_LDFF1 "f") (UNSPEC_LDNF1 "n")]) -+ -+(define_int_attr ab [(UNSPEC_CLASTA "a") (UNSPEC_CLASTB "b") -+ (UNSPEC_LASTA "a") (UNSPEC_LASTB "b")]) -+ -+(define_int_attr bt [(UNSPEC_BFMLALB "b") (UNSPEC_BFMLALT "t")]) -+ - (define_int_attr addsub [(UNSPEC_SHADD "add") - (UNSPEC_UHADD "add") - (UNSPEC_SRHADD "add") -@@ -1768,6 +2361,18 @@ - (UNSPEC_RADDHN2 "add") - (UNSPEC_RSUBHN2 "sub")]) - -+;; BSL variants: first commutative operand. -+(define_int_attr bsl_1st [(1 "w") (2 "0")]) -+ -+;; BSL variants: second commutative operand. -+(define_int_attr bsl_2nd [(1 "0") (2 "w")]) -+ -+;; BSL variants: duplicated input operand. -+(define_int_attr bsl_dup [(1 "1") (2 "2")]) -+ -+;; BSL variants: operand which requires preserving via movprfx. -+(define_int_attr bsl_mov [(1 "2") (2 "1")]) -+ - (define_int_attr offsetlr [(UNSPEC_SSLI "") (UNSPEC_USLI "") - (UNSPEC_SSRI "offset_") - (UNSPEC_USRI "offset_")]) -@@ -1797,29 +2402,47 @@ - (UNSPEC_FCVTZU "fcvtzu")]) - - ;; Pointer authentication mnemonic prefix. --(define_int_attr pauth_mnem_prefix [(UNSPEC_PACISP "paci") -- (UNSPEC_AUTISP "auti") -- (UNSPEC_PACI1716 "paci") -- (UNSPEC_AUTI1716 "auti")]) -- --;; Pointer authentication HINT number for NOP space instructions using A Key. --(define_int_attr pauth_hint_num_a [(UNSPEC_PACISP "25") -- (UNSPEC_AUTISP "29") -- (UNSPEC_PACI1716 "8") -- (UNSPEC_AUTI1716 "12")]) -- --(define_int_attr perm_insn [(UNSPEC_ZIP1 "zip") (UNSPEC_ZIP2 "zip") -- (UNSPEC_TRN1 "trn") (UNSPEC_TRN2 "trn") -- (UNSPEC_UZP1 "uzp") (UNSPEC_UZP2 "uzp")]) -+(define_int_attr pauth_mnem_prefix [(UNSPEC_PACIASP "pacia") -+ (UNSPEC_PACIBSP "pacib") -+ (UNSPEC_PACIA1716 "pacia") -+ (UNSPEC_PACIB1716 "pacib") -+ (UNSPEC_AUTIASP "autia") -+ (UNSPEC_AUTIBSP "autib") -+ (UNSPEC_AUTIA1716 "autia") -+ (UNSPEC_AUTIB1716 "autib")]) -+ -+(define_int_attr pauth_key [(UNSPEC_PACIASP "AARCH64_KEY_A") -+ (UNSPEC_PACIBSP "AARCH64_KEY_B") -+ (UNSPEC_PACIA1716 "AARCH64_KEY_A") -+ (UNSPEC_PACIB1716 "AARCH64_KEY_B") -+ (UNSPEC_AUTIASP "AARCH64_KEY_A") -+ (UNSPEC_AUTIBSP "AARCH64_KEY_B") -+ (UNSPEC_AUTIA1716 "AARCH64_KEY_A") -+ (UNSPEC_AUTIB1716 "AARCH64_KEY_B")]) -+ -+;; Pointer authentication HINT number for NOP space instructions using A and -+;; B key. -+(define_int_attr pauth_hint_num [(UNSPEC_PACIASP "25") -+ (UNSPEC_PACIBSP "27") -+ (UNSPEC_AUTIASP "29") -+ (UNSPEC_AUTIBSP "31") -+ (UNSPEC_PACIA1716 "8") -+ (UNSPEC_PACIB1716 "10") -+ (UNSPEC_AUTIA1716 "12") -+ (UNSPEC_AUTIB1716 "14")]) -+ -+(define_int_attr perm_insn [(UNSPEC_ZIP1 "zip1") (UNSPEC_ZIP2 "zip2") -+ (UNSPEC_ZIP1Q "zip1") (UNSPEC_ZIP2Q "zip2") -+ (UNSPEC_TRN1 "trn1") (UNSPEC_TRN2 "trn2") -+ (UNSPEC_TRN1Q "trn1") (UNSPEC_TRN2Q "trn2") -+ (UNSPEC_UZP1 "uzp1") (UNSPEC_UZP2 "uzp2") -+ (UNSPEC_UZP1Q "uzp1") (UNSPEC_UZP2Q "uzp2")]) - - ; op code for REV instructions (size within which elements are reversed). - (define_int_attr rev_op [(UNSPEC_REV64 "64") (UNSPEC_REV32 "32") - (UNSPEC_REV16 "16")]) - --(define_int_attr perm_hilo [(UNSPEC_ZIP1 "1") (UNSPEC_ZIP2 "2") -- (UNSPEC_TRN1 "1") (UNSPEC_TRN2 "2") -- (UNSPEC_UZP1 "1") (UNSPEC_UZP2 "2") -- (UNSPEC_UNPACKSHI "hi") (UNSPEC_UNPACKUHI "hi") -+(define_int_attr perm_hilo [(UNSPEC_UNPACKSHI "hi") (UNSPEC_UNPACKUHI "hi") - (UNSPEC_UNPACKSLO "lo") (UNSPEC_UNPACKULO "lo")]) - - ;; Return true if the associated optab refers to the high-numbered lanes, -@@ -1861,34 +2484,122 @@ - (define_int_attr f16mac1 [(UNSPEC_FMLAL "a") (UNSPEC_FMLSL "s") - (UNSPEC_FMLAL2 "a") (UNSPEC_FMLSL2 "s")]) - -+(define_int_attr frintnzs_op [(UNSPEC_FRINT32Z "frint32z") (UNSPEC_FRINT32X "frint32x") -+ (UNSPEC_FRINT64Z "frint64z") (UNSPEC_FRINT64X "frint64x")]) -+ - ;; The condition associated with an UNSPEC_COND_. --(define_int_attr cmp_op [(UNSPEC_COND_LT "lt") -- (UNSPEC_COND_LE "le") -- (UNSPEC_COND_EQ "eq") -- (UNSPEC_COND_NE "ne") -- (UNSPEC_COND_GE "ge") -- (UNSPEC_COND_GT "gt")]) -- --(define_int_attr sve_fp_op [(UNSPEC_COND_ADD "fadd") -- (UNSPEC_COND_SUB "fsub") -- (UNSPEC_COND_MUL "fmul") -- (UNSPEC_COND_DIV "fdiv") -- (UNSPEC_COND_MAX "fmaxnm") -- (UNSPEC_COND_MIN "fminnm")]) -- --(define_int_attr sve_fp_op_rev [(UNSPEC_COND_ADD "fadd") -- (UNSPEC_COND_SUB "fsubr") -- (UNSPEC_COND_MUL "fmul") -- (UNSPEC_COND_DIV "fdivr") -- (UNSPEC_COND_MAX "fmaxnm") -- (UNSPEC_COND_MIN "fminnm")]) -+(define_int_attr cmp_op [(UNSPEC_COND_CMPEQ_WIDE "eq") -+ (UNSPEC_COND_CMPGE_WIDE "ge") -+ (UNSPEC_COND_CMPGT_WIDE "gt") -+ (UNSPEC_COND_CMPHI_WIDE "hi") -+ (UNSPEC_COND_CMPHS_WIDE "hs") -+ (UNSPEC_COND_CMPLE_WIDE "le") -+ (UNSPEC_COND_CMPLO_WIDE "lo") -+ (UNSPEC_COND_CMPLS_WIDE "ls") -+ (UNSPEC_COND_CMPLT_WIDE "lt") -+ (UNSPEC_COND_CMPNE_WIDE "ne") -+ (UNSPEC_COND_FCMEQ "eq") -+ (UNSPEC_COND_FCMGE "ge") -+ (UNSPEC_COND_FCMGT "gt") -+ (UNSPEC_COND_FCMLE "le") -+ (UNSPEC_COND_FCMLT "lt") -+ (UNSPEC_COND_FCMNE "ne") -+ (UNSPEC_WHILELE "le") -+ (UNSPEC_WHILELO "lo") -+ (UNSPEC_WHILELS "ls") -+ (UNSPEC_WHILELT "lt")]) -+ -+(define_int_attr while_optab_cmp [(UNSPEC_WHILELE "le") -+ (UNSPEC_WHILELO "ult") -+ (UNSPEC_WHILELS "ule") -+ (UNSPEC_WHILELT "lt")]) -+ -+(define_int_attr brk_op [(UNSPEC_BRKA "a") (UNSPEC_BRKB "b") -+ (UNSPEC_BRKN "n") -+ (UNSPEC_BRKPA "pa") (UNSPEC_BRKPB "pb")]) -+ -+(define_int_attr sve_pred_op [(UNSPEC_PFIRST "pfirst") (UNSPEC_PNEXT "pnext")]) -+ -+(define_int_attr sve_int_op [(UNSPEC_ANDV "andv") -+ (UNSPEC_IORV "orv") -+ (UNSPEC_XORV "eorv") -+ (UNSPEC_UMAXV "umaxv") -+ (UNSPEC_UMINV "uminv") -+ (UNSPEC_SMAXV "smaxv") -+ (UNSPEC_SMINV "sminv") -+ (UNSPEC_SMUL_HIGHPART "smulh") -+ (UNSPEC_UMUL_HIGHPART "umulh") -+ (UNSPEC_ASHIFT_WIDE "lsl") -+ (UNSPEC_ASHIFTRT_WIDE "asr") -+ (UNSPEC_LSHIFTRT_WIDE "lsr") -+ (UNSPEC_RBIT "rbit") -+ (UNSPEC_REVB "revb") -+ (UNSPEC_REVH "revh") -+ (UNSPEC_REVW "revw")]) -+ -+(define_int_attr sve_fp_op [(UNSPEC_BFDOT "bfdot") -+ (UNSPEC_BFMLALB "bfmlalb") -+ (UNSPEC_BFMLALT "bfmlalt") -+ (UNSPEC_BFMMLA "bfmmla") -+ (UNSPEC_FRECPE "frecpe") -+ (UNSPEC_FRECPS "frecps") -+ (UNSPEC_RSQRTE "frsqrte") -+ (UNSPEC_RSQRTS "frsqrts") -+ (UNSPEC_FADDV "faddv") -+ (UNSPEC_FEXPA "fexpa") -+ (UNSPEC_FMAXNMV "fmaxnmv") -+ (UNSPEC_FMAXV "fmaxv") -+ (UNSPEC_FMINNMV "fminnmv") -+ (UNSPEC_FMINV "fminv") -+ (UNSPEC_FMLA "fmla") -+ (UNSPEC_FMLS "fmls") -+ (UNSPEC_FMMLA "fmmla") -+ (UNSPEC_FTSMUL "ftsmul") -+ (UNSPEC_FTSSEL "ftssel") -+ (UNSPEC_COND_FABS "fabs") -+ (UNSPEC_COND_FADD "fadd") -+ (UNSPEC_COND_FDIV "fdiv") -+ (UNSPEC_COND_FMAX "fmax") -+ (UNSPEC_COND_FMAXNM "fmaxnm") -+ (UNSPEC_COND_FMIN "fmin") -+ (UNSPEC_COND_FMINNM "fminnm") -+ (UNSPEC_COND_FMUL "fmul") -+ (UNSPEC_COND_FMULX "fmulx") -+ (UNSPEC_COND_FNEG "fneg") -+ (UNSPEC_COND_FRECPX "frecpx") -+ (UNSPEC_COND_FRINTA "frinta") -+ (UNSPEC_COND_FRINTI "frinti") -+ (UNSPEC_COND_FRINTM "frintm") -+ (UNSPEC_COND_FRINTN "frintn") -+ (UNSPEC_COND_FRINTP "frintp") -+ (UNSPEC_COND_FRINTX "frintx") -+ (UNSPEC_COND_FRINTZ "frintz") -+ (UNSPEC_COND_FSCALE "fscale") -+ (UNSPEC_COND_FSQRT "fsqrt") -+ (UNSPEC_COND_FSUB "fsub")]) -+ -+(define_int_attr sve_fp_op_rev [(UNSPEC_COND_FADD "fadd") -+ (UNSPEC_COND_FDIV "fdivr") -+ (UNSPEC_COND_FMAX "fmax") -+ (UNSPEC_COND_FMAXNM "fmaxnm") -+ (UNSPEC_COND_FMIN "fmin") -+ (UNSPEC_COND_FMINNM "fminnm") -+ (UNSPEC_COND_FMUL "fmul") -+ (UNSPEC_COND_FMULX "fmulx") -+ (UNSPEC_COND_FSUB "fsubr")]) - - (define_int_attr rot [(UNSPEC_FCADD90 "90") - (UNSPEC_FCADD270 "270") - (UNSPEC_FCMLA "0") - (UNSPEC_FCMLA90 "90") - (UNSPEC_FCMLA180 "180") -- (UNSPEC_FCMLA270 "270")]) -+ (UNSPEC_FCMLA270 "270") -+ (UNSPEC_COND_FCADD90 "90") -+ (UNSPEC_COND_FCADD270 "270") -+ (UNSPEC_COND_FCMLA "0") -+ (UNSPEC_COND_FCMLA90 "90") -+ (UNSPEC_COND_FCMLA180 "180") -+ (UNSPEC_COND_FCMLA270 "270")]) - - (define_int_attr sve_fmla_op [(UNSPEC_COND_FMLA "fmla") - (UNSPEC_COND_FMLS "fmls") -@@ -1900,9 +2611,54 @@ - (UNSPEC_COND_FNMLA "fnmad") - (UNSPEC_COND_FNMLS "fnmsb")]) - --(define_int_attr commutative [(UNSPEC_COND_ADD "true") -- (UNSPEC_COND_SUB "false") -- (UNSPEC_COND_MUL "true") -- (UNSPEC_COND_DIV "false") -- (UNSPEC_COND_MIN "true") -- (UNSPEC_COND_MAX "true")]) -+;; The register constraint to use for the final operand in a binary BRK. -+(define_int_attr brk_reg_con [(UNSPEC_BRKN "0") -+ (UNSPEC_BRKPA "Upa") (UNSPEC_BRKPB "Upa")]) -+ -+;; The register number to print for the above. -+(define_int_attr brk_reg_opno [(UNSPEC_BRKN "0") -+ (UNSPEC_BRKPA "3") (UNSPEC_BRKPB "3")]) -+ -+;; The predicate to use for the first input operand in a floating-point -+;; 3 pattern. -+(define_int_attr sve_pred_fp_rhs1_operand -+ [(UNSPEC_COND_FADD "register_operand") -+ (UNSPEC_COND_FDIV "register_operand") -+ (UNSPEC_COND_FMAX "register_operand") -+ (UNSPEC_COND_FMAXNM "register_operand") -+ (UNSPEC_COND_FMIN "register_operand") -+ (UNSPEC_COND_FMINNM "register_operand") -+ (UNSPEC_COND_FMUL "register_operand") -+ (UNSPEC_COND_FMULX "register_operand") -+ (UNSPEC_COND_FSUB "aarch64_sve_float_arith_operand")]) -+ -+;; The predicate to use for the second input operand in a floating-point -+;; 3 pattern. -+(define_int_attr sve_pred_fp_rhs2_operand -+ [(UNSPEC_COND_FADD "aarch64_sve_float_arith_with_sub_operand") -+ (UNSPEC_COND_FDIV "register_operand") -+ (UNSPEC_COND_FMAX "aarch64_sve_float_maxmin_operand") -+ (UNSPEC_COND_FMAXNM "aarch64_sve_float_maxmin_operand") -+ (UNSPEC_COND_FMIN "aarch64_sve_float_maxmin_operand") -+ (UNSPEC_COND_FMINNM "aarch64_sve_float_maxmin_operand") -+ (UNSPEC_COND_FMUL "aarch64_sve_float_mul_operand") -+ (UNSPEC_COND_FMULX "register_operand") -+ (UNSPEC_COND_FSUB "register_operand")]) -+ -+;; Likewise for immediates only. -+(define_int_attr sve_pred_fp_rhs2_immediate -+ [(UNSPEC_COND_FMAX "aarch64_sve_float_maxmin_immediate") -+ (UNSPEC_COND_FMAXNM "aarch64_sve_float_maxmin_immediate") -+ (UNSPEC_COND_FMIN "aarch64_sve_float_maxmin_immediate") -+ (UNSPEC_COND_FMINNM "aarch64_sve_float_maxmin_immediate") -+ (UNSPEC_COND_FMUL "aarch64_sve_float_mul_immediate")]) -+ -+;; The maximum number of element bits that an instruction can handle. -+(define_int_attr max_elem_bits [(UNSPEC_UADDV "64") (UNSPEC_SADDV "32") -+ (UNSPEC_PFIRST "8") (UNSPEC_PNEXT "64")]) -+ -+;; The minimum number of element bits that an instruction can handle. -+(define_int_attr min_elem_bits [(UNSPEC_RBIT "8") -+ (UNSPEC_REVB "16") -+ (UNSPEC_REVH "32") -+ (UNSPEC_REVW "64")]) -diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md -index 5f7f281e2..0b6bf6172 100644 ---- a/gcc/config/aarch64/predicates.md -+++ b/gcc/config/aarch64/predicates.md -@@ -39,9 +39,17 @@ - (and (match_code "const_int") - (match_test "op == CONST0_RTX (mode)"))) - --(define_special_predicate "subreg_lowpart_operator" -- (and (match_code "subreg") -- (match_test "subreg_lowpart_p (op)"))) -+(define_predicate "const_1_to_3_operand" -+ (match_code "const_int,const_vector") -+{ -+ op = unwrap_const_vec_duplicate (op); -+ return CONST_INT_P (op) && IN_RANGE (INTVAL (op), 1, 3); -+}) -+ -+(define_predicate "subreg_lowpart_operator" -+ (ior (match_code "truncate") -+ (and (match_code "subreg") -+ (match_test "subreg_lowpart_p (op)")))) - - (define_predicate "aarch64_ccmp_immediate" - (and (match_code "const_int") -@@ -53,13 +61,12 @@ - - (define_predicate "aarch64_simd_register" - (and (match_code "reg") -- (ior (match_test "REGNO_REG_CLASS (REGNO (op)) == FP_LO_REGS") -- (match_test "REGNO_REG_CLASS (REGNO (op)) == FP_REGS")))) -+ (match_test "FP_REGNUM_P (REGNO (op))"))) - - (define_predicate "aarch64_reg_or_zero" -- (and (match_code "reg,subreg,const_int") -+ (and (match_code "reg,subreg,const_int,const_double") - (ior (match_operand 0 "register_operand") -- (match_test "op == const0_rtx")))) -+ (match_test "op == CONST0_RTX (GET_MODE (op))")))) - - (define_predicate "aarch64_reg_or_fp_zero" - (ior (match_operand 0 "register_operand") -@@ -98,6 +105,10 @@ - (and (match_code "const_double") - (match_test "aarch64_fpconst_pow_of_2 (op) > 0"))) - -+(define_predicate "aarch64_fp_pow2_recip" -+ (and (match_code "const_double") -+ (match_test "aarch64_fpconst_pow2_recip (op) > 0"))) -+ - (define_predicate "aarch64_fp_vec_pow2" - (match_test "aarch64_vec_fpconst_pow_of_2 (op) > 0")) - -@@ -138,10 +149,18 @@ - (and (match_operand 0 "aarch64_pluslong_immediate") - (not (match_operand 0 "aarch64_plus_immediate")))) - -+(define_predicate "aarch64_sve_scalar_inc_dec_immediate" -+ (and (match_code "const_poly_int") -+ (match_test "aarch64_sve_scalar_inc_dec_immediate_p (op)"))) -+ - (define_predicate "aarch64_sve_addvl_addpl_immediate" - (and (match_code "const_poly_int") - (match_test "aarch64_sve_addvl_addpl_immediate_p (op)"))) - -+(define_predicate "aarch64_sve_plus_immediate" -+ (ior (match_operand 0 "aarch64_sve_scalar_inc_dec_immediate") -+ (match_operand 0 "aarch64_sve_addvl_addpl_immediate"))) -+ - (define_predicate "aarch64_split_add_offset_immediate" - (and (match_code "const_poly_int") - (match_test "aarch64_add_offset_temporaries (op) == 1"))) -@@ -149,7 +168,8 @@ - (define_predicate "aarch64_pluslong_operand" - (ior (match_operand 0 "register_operand") - (match_operand 0 "aarch64_pluslong_immediate") -- (match_operand 0 "aarch64_sve_addvl_addpl_immediate"))) -+ (and (match_test "TARGET_SVE") -+ (match_operand 0 "aarch64_sve_plus_immediate")))) - - (define_predicate "aarch64_pluslong_or_poly_operand" - (ior (match_operand 0 "aarch64_pluslong_operand") -@@ -323,12 +343,6 @@ - (ior (match_operand 0 "register_operand") - (match_operand 0 "const_scalar_int_operand"))) - --(define_predicate "aarch64_smin" -- (match_code "smin")) -- --(define_predicate "aarch64_umin" -- (match_code "umin")) -- - ;; True for integer comparisons and for FP comparisons other than LTGT or UNEQ. - (define_special_predicate "aarch64_comparison_operator" - (match_code "eq,ne,le,lt,ge,gt,geu,gtu,leu,ltu,unordered, -@@ -444,6 +458,12 @@ - return aarch64_stepped_int_parallel_p (op, 1); - }) - -+(define_predicate "descending_int_parallel" -+ (match_code "parallel") -+{ -+ return aarch64_stepped_int_parallel_p (op, -1); -+}) -+ - (define_special_predicate "aarch64_simd_lshift_imm" - (match_code "const,const_vector") - { -@@ -460,6 +480,10 @@ - (and (match_code "const,const_vector") - (match_test "op == CONST0_RTX (GET_MODE (op))"))) - -+(define_predicate "aarch64_simd_imm_one" -+ (and (match_code "const_vector") -+ (match_test "op == CONST1_RTX (GET_MODE (op))"))) -+ - (define_predicate "aarch64_simd_or_scalar_imm_zero" - (and (match_code "const_int,const_double,const,const_vector") - (match_test "op == CONST0_RTX (GET_MODE (op))"))) -@@ -474,6 +498,10 @@ - (match_test "op == const0_rtx") - (match_operand 0 "aarch64_simd_or_scalar_imm_zero")))) - -+(define_predicate "aarch64_simd_reg_or_minus_one" -+ (ior (match_operand 0 "register_operand") -+ (match_operand 0 "aarch64_simd_imm_minus_one"))) -+ - (define_predicate "aarch64_simd_struct_operand" - (and (match_code "mem") - (match_test "TARGET_SIMD && aarch64_simd_mem_operand_p (op)"))) -@@ -556,12 +584,44 @@ - (and (match_operand 0 "memory_operand") - (match_test "aarch64_sve_ld1r_operand_p (op)"))) - -+(define_predicate "aarch64_sve_ld1rq_operand" -+ (and (match_code "mem") -+ (match_test "aarch64_sve_ld1rq_operand_p (op)"))) -+ -+(define_predicate "aarch64_sve_ld1ro_operand_b" -+ (and (match_code "mem") -+ (match_test "aarch64_sve_ld1ro_operand_p (op, QImode)"))) -+ -+(define_predicate "aarch64_sve_ld1ro_operand_h" -+ (and (match_code "mem") -+ (match_test "aarch64_sve_ld1ro_operand_p (op, HImode)"))) -+ -+(define_predicate "aarch64_sve_ld1ro_operand_w" -+ (and (match_code "mem") -+ (match_test "aarch64_sve_ld1ro_operand_p (op, SImode)"))) -+ -+(define_predicate "aarch64_sve_ld1ro_operand_d" -+ (and (match_code "mem") -+ (match_test "aarch64_sve_ld1ro_operand_p (op, DImode)"))) -+ -+(define_predicate "aarch64_sve_ldff1_operand" -+ (and (match_code "mem") -+ (match_test "aarch64_sve_ldff1_operand_p (op)"))) -+ -+(define_predicate "aarch64_sve_ldnf1_operand" -+ (and (match_code "mem") -+ (match_test "aarch64_sve_ldnf1_operand_p (op)"))) -+ - ;; Like memory_operand, but restricted to addresses that are valid for - ;; SVE LDR and STR instructions. - (define_predicate "aarch64_sve_ldr_operand" - (and (match_code "mem") - (match_test "aarch64_sve_ldr_operand_p (op)"))) - -+(define_special_predicate "aarch64_sve_prefetch_operand" -+ (and (match_code "reg, plus") -+ (match_test "aarch64_sve_prefetch_operand_p (op, mode)"))) -+ - (define_predicate "aarch64_sve_nonimmediate_operand" - (ior (match_operand 0 "register_operand") - (match_operand 0 "aarch64_sve_ldr_operand"))) -@@ -586,6 +646,10 @@ - (ior (match_operand 0 "register_operand") - (match_operand 0 "aarch64_sve_ld1r_operand"))) - -+(define_predicate "aarch64_sve_ptrue_svpattern_immediate" -+ (and (match_code "const") -+ (match_test "aarch64_sve_ptrue_svpattern_p (op, NULL)"))) -+ - (define_predicate "aarch64_sve_arith_immediate" - (and (match_code "const,const_vector") - (match_test "aarch64_sve_arith_immediate_p (op, false)"))) -@@ -594,28 +658,84 @@ - (and (match_code "const,const_vector") - (match_test "aarch64_sve_arith_immediate_p (op, true)"))) - --(define_predicate "aarch64_sve_inc_dec_immediate" -+(define_predicate "aarch64_sve_qadd_immediate" - (and (match_code "const,const_vector") -- (match_test "aarch64_sve_inc_dec_immediate_p (op)"))) -+ (match_test "aarch64_sve_sqadd_sqsub_immediate_p (op, false)"))) -+ -+(define_predicate "aarch64_sve_qsub_immediate" -+ (and (match_code "const,const_vector") -+ (match_test "aarch64_sve_sqadd_sqsub_immediate_p (op, true)"))) -+ -+(define_predicate "aarch64_sve_vector_inc_dec_immediate" -+ (and (match_code "const,const_vector") -+ (match_test "aarch64_sve_vector_inc_dec_immediate_p (op)"))) -+ -+(define_predicate "aarch64_sve_gather_immediate_b" -+ (and (match_code "const_int") -+ (match_test "IN_RANGE (INTVAL (op), 0, 31)"))) -+ -+(define_predicate "aarch64_sve_gather_immediate_h" -+ (and (match_code "const_int") -+ (match_test "IN_RANGE (INTVAL (op), 0, 62)") -+ (match_test "(INTVAL (op) & 1) == 0"))) -+ -+(define_predicate "aarch64_sve_gather_immediate_w" -+ (and (match_code "const_int") -+ (match_test "IN_RANGE (INTVAL (op), 0, 124)") -+ (match_test "(INTVAL (op) & 3) == 0"))) -+ -+(define_predicate "aarch64_sve_gather_immediate_d" -+ (and (match_code "const_int") -+ (match_test "IN_RANGE (INTVAL (op), 0, 248)") -+ (match_test "(INTVAL (op) & 7) == 0"))) -+ -+(define_predicate "aarch64_sve_uxtb_immediate" -+ (and (match_code "const_vector") -+ (match_test "GET_MODE_UNIT_BITSIZE (GET_MODE (op)) > 8") -+ (match_test "aarch64_const_vec_all_same_int_p (op, 0xff)"))) -+ -+(define_predicate "aarch64_sve_uxth_immediate" -+ (and (match_code "const_vector") -+ (match_test "GET_MODE_UNIT_BITSIZE (GET_MODE (op)) > 16") -+ (match_test "aarch64_const_vec_all_same_int_p (op, 0xffff)"))) -+ -+(define_predicate "aarch64_sve_uxtw_immediate" -+ (and (match_code "const_vector") -+ (match_test "GET_MODE_UNIT_BITSIZE (GET_MODE (op)) > 32") -+ (match_test "aarch64_const_vec_all_same_int_p (op, 0xffffffff)"))) -+ -+(define_predicate "aarch64_sve_uxt_immediate" -+ (ior (match_operand 0 "aarch64_sve_uxtb_immediate") -+ (match_operand 0 "aarch64_sve_uxth_immediate") -+ (match_operand 0 "aarch64_sve_uxtw_immediate"))) - - (define_predicate "aarch64_sve_logical_immediate" - (and (match_code "const,const_vector") - (match_test "aarch64_sve_bitmask_immediate_p (op)"))) - --(define_predicate "aarch64_sve_mul_immediate" -+;; Used for SVE UMAX and UMIN. -+(define_predicate "aarch64_sve_vsb_immediate" -+ (and (match_code "const_vector") -+ (match_test "GET_MODE_INNER (GET_MODE (op)) == QImode -+ ? aarch64_const_vec_all_same_in_range_p (op, -128, 127) -+ : aarch64_const_vec_all_same_in_range_p (op, 0, 255)"))) -+ -+;; Used for SVE MUL, SMAX and SMIN. -+(define_predicate "aarch64_sve_vsm_immediate" - (and (match_code "const,const_vector") - (match_test "aarch64_const_vec_all_same_in_range_p (op, -128, 127)"))) - - (define_predicate "aarch64_sve_dup_immediate" - (and (match_code "const,const_vector") -- (match_test "aarch64_sve_dup_immediate_p (op)"))) -+ (ior (match_test "aarch64_sve_dup_immediate_p (op)") -+ (match_test "aarch64_float_const_representable_p (op)")))) - - (define_predicate "aarch64_sve_cmp_vsc_immediate" -- (and (match_code "const,const_vector") -+ (and (match_code "const_int,const_vector") - (match_test "aarch64_sve_cmp_immediate_p (op, true)"))) - - (define_predicate "aarch64_sve_cmp_vsd_immediate" -- (and (match_code "const,const_vector") -+ (and (match_code "const_int,const_vector") - (match_test "aarch64_sve_cmp_immediate_p (op, false)"))) - - (define_predicate "aarch64_sve_index_immediate" -@@ -626,14 +746,23 @@ - (and (match_code "const,const_vector") - (match_test "aarch64_sve_float_arith_immediate_p (op, false)"))) - --(define_predicate "aarch64_sve_float_arith_with_sub_immediate" -+(define_predicate "aarch64_sve_float_negated_arith_immediate" - (and (match_code "const,const_vector") - (match_test "aarch64_sve_float_arith_immediate_p (op, true)"))) - -+(define_predicate "aarch64_sve_float_arith_with_sub_immediate" -+ (ior (match_operand 0 "aarch64_sve_float_arith_immediate") -+ (match_operand 0 "aarch64_sve_float_negated_arith_immediate"))) -+ - (define_predicate "aarch64_sve_float_mul_immediate" - (and (match_code "const,const_vector") - (match_test "aarch64_sve_float_mul_immediate_p (op)"))) - -+(define_predicate "aarch64_sve_float_maxmin_immediate" -+ (and (match_code "const_vector") -+ (ior (match_test "op == CONST0_RTX (GET_MODE (op))") -+ (match_test "op == CONST1_RTX (GET_MODE (op))")))) -+ - (define_predicate "aarch64_sve_arith_operand" - (ior (match_operand 0 "register_operand") - (match_operand 0 "aarch64_sve_arith_immediate"))) -@@ -641,12 +770,37 @@ - (define_predicate "aarch64_sve_add_operand" - (ior (match_operand 0 "aarch64_sve_arith_operand") - (match_operand 0 "aarch64_sve_sub_arith_immediate") -- (match_operand 0 "aarch64_sve_inc_dec_immediate"))) -+ (match_operand 0 "aarch64_sve_vector_inc_dec_immediate"))) -+ -+(define_predicate "aarch64_sve_sqadd_operand" -+ (ior (match_operand 0 "register_operand") -+ (match_operand 0 "aarch64_sve_qadd_immediate") -+ (match_operand 0 "aarch64_sve_qsub_immediate"))) -+ -+(define_predicate "aarch64_sve_pred_and_operand" -+ (ior (match_operand 0 "register_operand") -+ (match_operand 0 "aarch64_sve_uxt_immediate"))) - - (define_predicate "aarch64_sve_logical_operand" - (ior (match_operand 0 "register_operand") - (match_operand 0 "aarch64_sve_logical_immediate"))) - -+(define_predicate "aarch64_sve_gather_offset_b" -+ (ior (match_operand 0 "register_operand") -+ (match_operand 0 "aarch64_sve_gather_immediate_b"))) -+ -+(define_predicate "aarch64_sve_gather_offset_h" -+ (ior (match_operand 0 "register_operand") -+ (match_operand 0 "aarch64_sve_gather_immediate_h"))) -+ -+(define_predicate "aarch64_sve_gather_offset_w" -+ (ior (match_operand 0 "register_operand") -+ (match_operand 0 "aarch64_sve_gather_immediate_w"))) -+ -+(define_predicate "aarch64_sve_gather_offset_d" -+ (ior (match_operand 0 "register_operand") -+ (match_operand 0 "aarch64_sve_gather_immediate_d"))) -+ - (define_predicate "aarch64_sve_lshift_operand" - (ior (match_operand 0 "register_operand") - (match_operand 0 "aarch64_simd_lshift_imm"))) -@@ -655,9 +809,17 @@ - (ior (match_operand 0 "register_operand") - (match_operand 0 "aarch64_simd_rshift_imm"))) - --(define_predicate "aarch64_sve_mul_operand" -+(define_predicate "aarch64_sve_vsb_operand" - (ior (match_operand 0 "register_operand") -- (match_operand 0 "aarch64_sve_mul_immediate"))) -+ (match_operand 0 "aarch64_sve_vsb_immediate"))) -+ -+(define_predicate "aarch64_sve_vsm_operand" -+ (ior (match_operand 0 "register_operand") -+ (match_operand 0 "aarch64_sve_vsm_immediate"))) -+ -+(define_predicate "aarch64_sve_reg_or_dup_imm" -+ (ior (match_operand 0 "register_operand") -+ (match_operand 0 "aarch64_sve_dup_immediate"))) - - (define_predicate "aarch64_sve_cmp_vsc_operand" - (ior (match_operand 0 "register_operand") -@@ -676,17 +838,39 @@ - (match_operand 0 "aarch64_sve_float_arith_immediate"))) - - (define_predicate "aarch64_sve_float_arith_with_sub_operand" -- (ior (match_operand 0 "aarch64_sve_float_arith_operand") -+ (ior (match_operand 0 "register_operand") - (match_operand 0 "aarch64_sve_float_arith_with_sub_immediate"))) - - (define_predicate "aarch64_sve_float_mul_operand" - (ior (match_operand 0 "register_operand") - (match_operand 0 "aarch64_sve_float_mul_immediate"))) - -+(define_predicate "aarch64_sve_float_maxmin_operand" -+ (ior (match_operand 0 "register_operand") -+ (match_operand 0 "aarch64_sve_float_maxmin_immediate"))) -+ - (define_predicate "aarch64_sve_vec_perm_operand" - (ior (match_operand 0 "register_operand") - (match_operand 0 "aarch64_constant_vector_operand"))) - -+(define_predicate "aarch64_sve_ptrue_flag" -+ (and (match_code "const_int") -+ (ior (match_test "INTVAL (op) == SVE_MAYBE_NOT_PTRUE") -+ (match_test "INTVAL (op) == SVE_KNOWN_PTRUE")))) -+ -+(define_predicate "aarch64_sve_gp_strictness" -+ (and (match_code "const_int") -+ (ior (match_test "INTVAL (op) == SVE_RELAXED_GP") -+ (match_test "INTVAL (op) == SVE_STRICT_GP")))) -+ -+(define_predicate "aarch64_gather_scale_operand_b" -+ (and (match_code "const_int") -+ (match_test "INTVAL (op) == 1"))) -+ -+(define_predicate "aarch64_gather_scale_operand_h" -+ (and (match_code "const_int") -+ (match_test "INTVAL (op) == 1 || INTVAL (op) == 2"))) -+ - (define_predicate "aarch64_gather_scale_operand_w" - (and (match_code "const_int") - (match_test "INTVAL (op) == 1 || INTVAL (op) == 4"))) -diff --git a/gcc/config/aarch64/saphira.md b/gcc/config/aarch64/saphira.md -index 853deeef0..3cc7bc410 100644 ---- a/gcc/config/aarch64/saphira.md -+++ b/gcc/config/aarch64/saphira.md -@@ -520,7 +520,7 @@ - - (define_insn_reservation "saphira_other_0_nothing" 0 - (and (eq_attr "tune" "saphira") -- (eq_attr "type" "no_insn,trap,block")) -+ (eq_attr "type" "trap,block")) - "nothing") - - (define_insn_reservation "saphira_other_2_ld" 2 -diff --git a/gcc/config/aarch64/t-aarch64 b/gcc/config/aarch64/t-aarch64 -index ee471f898..28e1c7aec 100644 ---- a/gcc/config/aarch64/t-aarch64 -+++ b/gcc/config/aarch64/t-aarch64 -@@ -40,6 +40,43 @@ aarch64-builtins.o: $(srcdir)/config/aarch64/aarch64-builtins.c $(CONFIG_H) \ - $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ - $(srcdir)/config/aarch64/aarch64-builtins.c - -+aarch64-sve-builtins.o: $(srcdir)/config/aarch64/aarch64-sve-builtins.cc \ -+ $(srcdir)/config/aarch64/aarch64-sve-builtins.def \ -+ $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) $(RTL_H) \ -+ $(TM_P_H) memmodel.h insn-codes.h $(OPTABS_H) $(RECOG_H) $(DIAGNOSTIC_H) \ -+ $(EXPR_H) $(BASIC_BLOCK_H) $(FUNCTION_H) fold-const.h $(GIMPLE_H) \ -+ gimple-iterator.h gimplify.h explow.h $(EMIT_RTL_H) tree-vector-builder.h \ -+ stor-layout.h $(REG_H) alias.h gimple-fold.h langhooks.h \ -+ stringpool.h \ -+ $(srcdir)/config/aarch64/aarch64-sve-builtins.h \ -+ $(srcdir)/config/aarch64/aarch64-sve-builtins-shapes.h \ -+ $(srcdir)/config/aarch64/aarch64-sve-builtins-base.h -+ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ -+ $(srcdir)/config/aarch64/aarch64-sve-builtins.cc -+ -+aarch64-sve-builtins-shapes.o: \ -+ $(srcdir)/config/aarch64/aarch64-sve-builtins-shapes.cc \ -+ $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) $(RTL_H) \ -+ $(TM_P_H) memmodel.h insn-codes.h $(OPTABS_H) \ -+ $(srcdir)/config/aarch64/aarch64-sve-builtins.h \ -+ $(srcdir)/config/aarch64/aarch64-sve-builtins-shapes.h -+ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ -+ $(srcdir)/config/aarch64/aarch64-sve-builtins-shapes.cc -+ -+aarch64-sve-builtins-base.o: \ -+ $(srcdir)/config/aarch64/aarch64-sve-builtins-base.cc \ -+ $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) $(RTL_H) \ -+ $(TM_P_H) memmodel.h insn-codes.h $(OPTABS_H) $(RECOG_H) \ -+ $(EXPR_H) $(BASIC_BLOCK_H) $(FUNCTION_H) fold-const.h $(GIMPLE_H) \ -+ gimple-iterator.h gimplify.h explow.h $(EMIT_RTL_H) tree-vector-builder.h \ -+ rtx-vector-builder.h vec-perm-indices.h \ -+ $(srcdir)/config/aarch64/aarch64-sve-builtins.h \ -+ $(srcdir)/config/aarch64/aarch64-sve-builtins-shapes.h \ -+ $(srcdir)/config/aarch64/aarch64-sve-builtins-base.h \ -+ $(srcdir)/config/aarch64/aarch64-sve-builtins-functions.h -+ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ -+ $(srcdir)/config/aarch64/aarch64-sve-builtins-base.cc -+ - aarch64-builtin-iterators.h: $(srcdir)/config/aarch64/geniterators.sh \ - $(srcdir)/config/aarch64/iterators.md - $(SHELL) $(srcdir)/config/aarch64/geniterators.sh \ -@@ -103,3 +140,10 @@ aarch64-bti-insert.o: $(srcdir)/config/aarch64/aarch64-bti-insert.c \ - comma=, - MULTILIB_OPTIONS = $(subst $(comma),/, $(patsubst %, mabi=%, $(subst $(comma),$(comma)mabi=,$(TM_MULTILIB_CONFIG)))) - MULTILIB_DIRNAMES = $(subst $(comma), ,$(TM_MULTILIB_CONFIG)) -+ -+insn-conditions.md: s-check-sve-md -+s-check-sve-md: $(srcdir)/config/aarch64/check-sve-md.awk \ -+ $(srcdir)/config/aarch64/aarch64-sve.md -+ $(AWK) -f $(srcdir)/config/aarch64/check-sve-md.awk \ -+ $(srcdir)/config/aarch64/aarch64-sve.md -+ $(STAMP) s-check-sve-md -diff --git a/gcc/config/aarch64/t-aarch64-netbsd b/gcc/config/aarch64/t-aarch64-netbsd -new file mode 100644 -index 000000000..aa447d0f6 ---- /dev/null -+++ b/gcc/config/aarch64/t-aarch64-netbsd -@@ -0,0 +1,21 @@ -+# Machine description for AArch64 architecture. -+# Copyright (C) 2016-2019 Free Software Foundation, Inc. -+# -+# This file is part of GCC. -+# -+# GCC is free software; you can redistribute it and/or modify it -+# under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 3, or (at your option) -+# any later version. -+# -+# GCC is distributed in the hope that it will be useful, but -+# WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+# General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with GCC; see the file COPYING3. If not see -+# . -+ -+LIB1ASMSRC = aarch64/lib1funcs.asm -+LIB1ASMFUNCS = _aarch64_sync_cache_range -diff --git a/gcc/config/aarch64/thunderx2t99.md b/gcc/config/aarch64/thunderx2t99.md -index c43c39ecd..bb6e0abb0 100644 ---- a/gcc/config/aarch64/thunderx2t99.md -+++ b/gcc/config/aarch64/thunderx2t99.md -@@ -74,7 +74,7 @@ - - (define_insn_reservation "thunderx2t99_nothing" 0 - (and (eq_attr "tune" "thunderx2t99") -- (eq_attr "type" "no_insn,block")) -+ (eq_attr "type" "block")) - "nothing") - - (define_insn_reservation "thunderx2t99_mrs" 0 -diff --git a/gcc/config/aarch64/tsv110.md b/gcc/config/aarch64/tsv110.md -index 680c48a68..f20055dae 100644 ---- a/gcc/config/aarch64/tsv110.md -+++ b/gcc/config/aarch64/tsv110.md -@@ -281,7 +281,7 @@ - shift_imm,shift_reg,\ - mov_imm,mov_reg,\ - mvn_imm,mvn_reg,\ -- mrs,multiple,no_insn")) -+ mrs,multiple")) - "tsv110_alu1|tsv110_alu2|tsv110_alu3") - - (define_insn_reservation "tsv110_alus" 1 -diff --git a/gcc/config/alpha/alpha.c b/gcc/config/alpha/alpha.c -index 524379d37..cd6aa117c 100644 ---- a/gcc/config/alpha/alpha.c -+++ b/gcc/config/alpha/alpha.c -@@ -6380,7 +6380,7 @@ alpha_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, - t = fold_convert (build_nonstandard_integer_type (64, 0), offset_field); - offset = get_initialized_tmp_var (t, pre_p, NULL); - -- indirect = pass_by_reference (NULL, TYPE_MODE (type), type, false); -+ indirect = pass_va_arg_by_reference (type); - - if (indirect) - { -diff --git a/gcc/config/alpha/alpha.h b/gcc/config/alpha/alpha.h -index e2008202a..68eafe194 100644 ---- a/gcc/config/alpha/alpha.h -+++ b/gcc/config/alpha/alpha.h -@@ -759,7 +759,7 @@ do { \ - #define MOVE_MAX 8 - - /* If a memory-to-memory move would take MOVE_RATIO or more simple -- move-instruction pairs, we will do a movmem or libcall instead. -+ move-instruction pairs, we will do a cpymem or libcall instead. - - Without byte/word accesses, we want no more than four instructions; - with, several single byte accesses are better. */ -diff --git a/gcc/config/alpha/alpha.md b/gcc/config/alpha/alpha.md -index dd340a08e..228dee44c 100644 ---- a/gcc/config/alpha/alpha.md -+++ b/gcc/config/alpha/alpha.md -@@ -4673,7 +4673,7 @@ - ;; Argument 2 is the length - ;; Argument 3 is the alignment - --(define_expand "movmemqi" -+(define_expand "cpymemqi" - [(parallel [(set (match_operand:BLK 0 "memory_operand") - (match_operand:BLK 1 "memory_operand")) - (use (match_operand:DI 2 "immediate_operand")) -@@ -4686,7 +4686,7 @@ - FAIL; - }) - --(define_expand "movmemdi" -+(define_expand "cpymemdi" - [(parallel [(set (match_operand:BLK 0 "memory_operand") - (match_operand:BLK 1 "memory_operand")) - (use (match_operand:DI 2 "immediate_operand")) -@@ -4703,7 +4703,7 @@ - "TARGET_ABI_OPEN_VMS" - "operands[4] = gen_rtx_SYMBOL_REF (Pmode, \"OTS$MOVE\");") - --(define_insn "*movmemdi_1" -+(define_insn "*cpymemdi_1" - [(set (match_operand:BLK 0 "memory_operand" "=m,m") - (match_operand:BLK 1 "memory_operand" "m,m")) - (use (match_operand:DI 2 "nonmemory_operand" "r,i")) -diff --git a/gcc/config/arc/arc-protos.h b/gcc/config/arc/arc-protos.h -index ac0de6b28..00d2dd2c6 100644 ---- a/gcc/config/arc/arc-protos.h -+++ b/gcc/config/arc/arc-protos.h -@@ -35,7 +35,7 @@ extern void arc_final_prescan_insn (rtx_insn *, rtx *, int); - extern const char *arc_output_libcall (const char *); - extern int arc_output_addsi (rtx *operands, bool, bool); - extern int arc_output_commutative_cond_exec (rtx *operands, bool); --extern bool arc_expand_movmem (rtx *operands); -+extern bool arc_expand_cpymem (rtx *operands); - extern bool prepare_move_operands (rtx *operands, machine_mode mode); - extern void emit_shift (enum rtx_code, rtx, rtx, rtx); - extern void arc_expand_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx); -diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c -index 325dd3cea..c0f13ebe7 100644 ---- a/gcc/config/arc/arc.c -+++ b/gcc/config/arc/arc.c -@@ -8791,7 +8791,7 @@ arc_output_commutative_cond_exec (rtx *operands, bool output_p) - return 8; - } - --/* Helper function of arc_expand_movmem. ADDR points to a chunk of memory. -+/* Helper function of arc_expand_cpymem. ADDR points to a chunk of memory. - Emit code and return an potentially modified address such that offsets - up to SIZE are can be added to yield a legitimate address. - if REUSE is set, ADDR is a register that may be modified. */ -@@ -8825,7 +8825,7 @@ force_offsettable (rtx addr, HOST_WIDE_INT size, bool reuse) - offset ranges. Return true on success. */ - - bool --arc_expand_movmem (rtx *operands) -+arc_expand_cpymem (rtx *operands) - { - rtx dst = operands[0]; - rtx src = operands[1]; -@@ -10335,7 +10335,7 @@ arc_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size, - enum by_pieces_operation op, - bool speed_p) - { -- /* Let the movmem expander handle small block moves. */ -+ /* Let the cpymem expander handle small block moves. */ - if (op == MOVE_BY_PIECES) - return false; - -diff --git a/gcc/config/arc/arc.h b/gcc/config/arc/arc.h -index 00fc3e471..7ae10a666 100644 ---- a/gcc/config/arc/arc.h -+++ b/gcc/config/arc/arc.h -@@ -1423,7 +1423,7 @@ do { \ - in one reasonably fast instruction. */ - #define MOVE_MAX 4 - --/* Undo the effects of the movmem pattern presence on STORE_BY_PIECES_P . */ -+/* Undo the effects of the cpymem pattern presence on STORE_BY_PIECES_P . */ - #define MOVE_RATIO(SPEED) ((SPEED) ? 15 : 3) - - /* Define this to be nonzero if shift instructions ignore all but the -diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md -index 34e8248bc..2cfcf8bdd 100644 ---- a/gcc/config/arc/arc.md -+++ b/gcc/config/arc/arc.md -@@ -5114,13 +5114,13 @@ core_3, archs4x, archs4xd, archs4xd_slow" - (set_attr "type" "loop_end") - (set_attr "length" "4,20")]) - --(define_expand "movmemsi" -+(define_expand "cpymemsi" - [(match_operand:BLK 0 "" "") - (match_operand:BLK 1 "" "") - (match_operand:SI 2 "nonmemory_operand" "") - (match_operand 3 "immediate_operand" "")] - "" -- "if (arc_expand_movmem (operands)) DONE; else FAIL;") -+ "if (arc_expand_cpymem (operands)) DONE; else FAIL;") - - ;; Close http://gcc.gnu.org/bugzilla/show_bug.cgi?id=35803 if this works - ;; to the point that we can generate cmove instructions. -diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h -index 98beb6109..dd1f32798 100644 ---- a/gcc/config/arm/arm-protos.h -+++ b/gcc/config/arm/arm-protos.h -@@ -127,8 +127,8 @@ extern bool offset_ok_for_ldrd_strd (HOST_WIDE_INT); - extern bool operands_ok_ldrd_strd (rtx, rtx, rtx, HOST_WIDE_INT, bool, bool); - extern bool gen_operands_ldrd_strd (rtx *, bool, bool, bool); - extern bool valid_operands_ldrd_strd (rtx *, bool); --extern int arm_gen_movmemqi (rtx *); --extern bool gen_movmem_ldrd_strd (rtx *); -+extern int arm_gen_cpymemqi (rtx *); -+extern bool gen_cpymem_ldrd_strd (rtx *); - extern machine_mode arm_select_cc_mode (RTX_CODE, rtx, rtx); - extern machine_mode arm_select_dominance_cc_mode (rtx, rtx, - HOST_WIDE_INT); -@@ -204,7 +204,7 @@ extern void thumb2_final_prescan_insn (rtx_insn *); - extern const char *thumb_load_double_from_address (rtx *); - extern const char *thumb_output_move_mem_multiple (int, rtx *); - extern const char *thumb_call_via_reg (rtx); --extern void thumb_expand_movmemqi (rtx *); -+extern void thumb_expand_cpymemqi (rtx *); - extern rtx arm_return_addr (int, rtx); - extern void thumb_reload_out_hi (rtx *); - extern void thumb_set_return_address (rtx, rtx); -diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c -index eba26011e..c8a09329a 100644 ---- a/gcc/config/arm/arm.c -+++ b/gcc/config/arm/arm.c -@@ -14426,7 +14426,7 @@ arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length, - core type, optimize_size setting, etc. */ - - static int --arm_movmemqi_unaligned (rtx *operands) -+arm_cpymemqi_unaligned (rtx *operands) - { - HOST_WIDE_INT length = INTVAL (operands[2]); - -@@ -14463,7 +14463,7 @@ arm_movmemqi_unaligned (rtx *operands) - } - - int --arm_gen_movmemqi (rtx *operands) -+arm_gen_cpymemqi (rtx *operands) - { - HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes; - HOST_WIDE_INT srcoffset, dstoffset; -@@ -14477,7 +14477,7 @@ arm_gen_movmemqi (rtx *operands) - return 0; - - if (unaligned_access && (INTVAL (operands[3]) & 3) != 0) -- return arm_movmemqi_unaligned (operands); -+ return arm_cpymemqi_unaligned (operands); - - if (INTVAL (operands[3]) & 3) - return 0; -@@ -14611,7 +14611,7 @@ arm_gen_movmemqi (rtx *operands) - return 1; - } - --/* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx -+/* Helper for gen_cpymem_ldrd_strd. Increase the address of memory rtx - by mode size. */ - inline static rtx - next_consecutive_mem (rtx mem) -@@ -14626,7 +14626,7 @@ next_consecutive_mem (rtx mem) - /* Copy using LDRD/STRD instructions whenever possible. - Returns true upon success. */ - bool --gen_movmem_ldrd_strd (rtx *operands) -+gen_cpymem_ldrd_strd (rtx *operands) - { - unsigned HOST_WIDE_INT len; - HOST_WIDE_INT align; -@@ -14670,7 +14670,7 @@ gen_movmem_ldrd_strd (rtx *operands) - - /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */ - if (!(dst_aligned || src_aligned)) -- return arm_gen_movmemqi (operands); -+ return arm_gen_cpymemqi (operands); - - /* If the either src or dst is unaligned we'll be accessing it as pairs - of unaligned SImode accesses. Otherwise we can generate DImode -@@ -26472,7 +26472,7 @@ thumb_call_via_reg (rtx reg) - - /* Routines for generating rtl. */ - void --thumb_expand_movmemqi (rtx *operands) -+thumb_expand_cpymemqi (rtx *operands) - { - rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0)); - rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0)); -@@ -26481,13 +26481,13 @@ thumb_expand_movmemqi (rtx *operands) - - while (len >= 12) - { -- emit_insn (gen_movmem12b (out, in, out, in)); -+ emit_insn (gen_cpymem12b (out, in, out, in)); - len -= 12; - } - - if (len >= 8) - { -- emit_insn (gen_movmem8b (out, in, out, in)); -+ emit_insn (gen_cpymem8b (out, in, out, in)); - len -= 8; - } - -diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md -index 53e54874c..a1b9d9fac 100644 ---- a/gcc/config/arm/arm.md -+++ b/gcc/config/arm/arm.md -@@ -7260,7 +7260,7 @@ - ;; We could let this apply for blocks of less than this, but it clobbers so - ;; many registers that there is then probably a better way. - --(define_expand "movmemqi" -+(define_expand "cpymemqi" - [(match_operand:BLK 0 "general_operand" "") - (match_operand:BLK 1 "general_operand" "") - (match_operand:SI 2 "const_int_operand" "") -@@ -7272,12 +7272,12 @@ - if (TARGET_LDRD && current_tune->prefer_ldrd_strd - && !optimize_function_for_size_p (cfun)) - { -- if (gen_movmem_ldrd_strd (operands)) -+ if (gen_cpymem_ldrd_strd (operands)) - DONE; - FAIL; - } - -- if (arm_gen_movmemqi (operands)) -+ if (arm_gen_cpymemqi (operands)) - DONE; - FAIL; - } -@@ -7287,7 +7287,7 @@ - || INTVAL (operands[2]) > 48) - FAIL; - -- thumb_expand_movmemqi (operands); -+ thumb_expand_cpymemqi (operands); - DONE; - } - " -@@ -8807,6 +8807,8 @@ - [(set_attr "arch" "t1,32")] - ) - -+;; DO NOT SPLIT THIS INSN. It's important for security reasons that the -+;; canary value does not live beyond the life of this sequence. - (define_insn "*stack_protect_set_insn" - [(set (match_operand:SI 0 "memory_operand" "=m,m") - (unspec:SI [(mem:SI (match_operand:SI 1 "register_operand" "+&l,&r"))] -@@ -8814,8 +8816,8 @@ - (clobber (match_dup 1))] - "" - "@ -- ldr\\t%1, [%1]\;str\\t%1, %0\;movs\t%1,#0 -- ldr\\t%1, [%1]\;str\\t%1, %0\;mov\t%1,#0" -+ ldr\\t%1, [%1]\;str\\t%1, %0\;movs\t%1, #0 -+ ldr\\t%1, [%1]\;str\\t%1, %0\;mov\t%1, #0" - [(set_attr "length" "8,12") - (set_attr "conds" "clob,nocond") - (set_attr "type" "multiple") -diff --git a/gcc/config/arm/arm1020e.md b/gcc/config/arm/arm1020e.md -index b835cbaaa..c4c038b04 100644 ---- a/gcc/config/arm/arm1020e.md -+++ b/gcc/config/arm/arm1020e.md -@@ -72,7 +72,7 @@ - adr,bfm,rev,\ - shift_imm,shift_reg,\ - mov_imm,mov_reg,mvn_imm,mvn_reg,\ -- multiple,no_insn")) -+ multiple")) - "1020a_e,1020a_m,1020a_w") - - ;; ALU operations with a shift-by-constant operand -diff --git a/gcc/config/arm/arm1026ejs.md b/gcc/config/arm/arm1026ejs.md -index 05f4d724f..88546872a 100644 ---- a/gcc/config/arm/arm1026ejs.md -+++ b/gcc/config/arm/arm1026ejs.md -@@ -72,7 +72,7 @@ - adr,bfm,rev,\ - shift_imm,shift_reg,\ - mov_imm,mov_reg,mvn_imm,mvn_reg,\ -- multiple,no_insn")) -+ multiple")) - "a_e,a_m,a_w") - - ;; ALU operations with a shift-by-constant operand -diff --git a/gcc/config/arm/arm1136jfs.md b/gcc/config/arm/arm1136jfs.md -index ae0b54f5e..e7fd53afe 100644 ---- a/gcc/config/arm/arm1136jfs.md -+++ b/gcc/config/arm/arm1136jfs.md -@@ -81,7 +81,7 @@ - adr,bfm,rev,\ - shift_imm,shift_reg,\ - mov_imm,mov_reg,mvn_imm,mvn_reg,\ -- multiple,no_insn")) -+ multiple")) - "e_1,e_2,e_3,e_wb") - - ;; ALU operations with a shift-by-constant operand -diff --git a/gcc/config/arm/arm926ejs.md b/gcc/config/arm/arm926ejs.md -index db4c7db8c..b4f503159 100644 ---- a/gcc/config/arm/arm926ejs.md -+++ b/gcc/config/arm/arm926ejs.md -@@ -67,7 +67,7 @@ - shift_imm,shift_reg,extend,\ - mov_imm,mov_reg,mov_shift,\ - mvn_imm,mvn_reg,mvn_shift,\ -- multiple,no_insn")) -+ multiple")) - "e,m,w") - - ;; ALU operations with a shift-by-register operand -diff --git a/gcc/config/arm/cortex-a15.md b/gcc/config/arm/cortex-a15.md -index f57f98675..26765c3db 100644 ---- a/gcc/config/arm/cortex-a15.md -+++ b/gcc/config/arm/cortex-a15.md -@@ -68,7 +68,7 @@ - shift_imm,shift_reg,\ - mov_imm,mov_reg,\ - mvn_imm,mvn_reg,\ -- mrs,multiple,no_insn")) -+ mrs,multiple")) - "ca15_issue1,(ca15_sx1,ca15_sx1_alu)|(ca15_sx2,ca15_sx2_alu)") - - ;; ALU ops with immediate shift -diff --git a/gcc/config/arm/cortex-a17.md b/gcc/config/arm/cortex-a17.md -index a0c6e5141..97b716414 100644 ---- a/gcc/config/arm/cortex-a17.md -+++ b/gcc/config/arm/cortex-a17.md -@@ -42,7 +42,7 @@ - adc_imm,adcs_imm,adc_reg,adcs_reg,\ - adr, mov_imm,mov_reg,\ - mvn_imm,mvn_reg,extend,\ -- mrs,multiple,no_insn")) -+ mrs,multiple")) - "ca17_alu") - - (define_insn_reservation "cortex_a17_alu_shiftimm" 2 -diff --git a/gcc/config/arm/cortex-a5.md b/gcc/config/arm/cortex-a5.md -index efced646a..08aa90856 100644 ---- a/gcc/config/arm/cortex-a5.md -+++ b/gcc/config/arm/cortex-a5.md -@@ -64,7 +64,7 @@ - adr,bfm,clz,rbit,rev,alu_dsp_reg,\ - shift_imm,shift_reg,\ - mov_imm,mov_reg,mvn_imm,mvn_reg,\ -- mrs,multiple,no_insn")) -+ mrs,multiple")) - "cortex_a5_ex1") - - (define_insn_reservation "cortex_a5_alu_shift" 2 -diff --git a/gcc/config/arm/cortex-a53.md b/gcc/config/arm/cortex-a53.md -index b55d34e91..9b29f3874 100644 ---- a/gcc/config/arm/cortex-a53.md -+++ b/gcc/config/arm/cortex-a53.md -@@ -86,7 +86,7 @@ - alu_sreg,alus_sreg,logic_reg,logics_reg, - adc_imm,adcs_imm,adc_reg,adcs_reg, - csel,clz,rbit,rev,alu_dsp_reg, -- mov_reg,mvn_reg,mrs,multiple,no_insn")) -+ mov_reg,mvn_reg,mrs,multiple")) - "cortex_a53_slot_any") - - (define_insn_reservation "cortex_a53_alu_shift" 3 -diff --git a/gcc/config/arm/cortex-a57.md b/gcc/config/arm/cortex-a57.md -index 577dc8d7f..49654bf18 100644 ---- a/gcc/config/arm/cortex-a57.md -+++ b/gcc/config/arm/cortex-a57.md -@@ -301,7 +301,7 @@ - rotate_imm,shift_imm,shift_reg,\ - mov_imm,mov_reg,\ - mvn_imm,mvn_reg,\ -- mrs,multiple,no_insn")) -+ mrs,multiple")) - "ca57_sx1|ca57_sx2") - - ;; ALU ops with immediate shift -diff --git a/gcc/config/arm/cortex-a7.md b/gcc/config/arm/cortex-a7.md -index 1f9d6414e..f1b60aa27 100644 ---- a/gcc/config/arm/cortex-a7.md -+++ b/gcc/config/arm/cortex-a7.md -@@ -149,7 +149,7 @@ - logic_shift_reg,logics_shift_reg,\ - mov_shift,mov_shift_reg,\ - mvn_shift,mvn_shift_reg,\ -- mrs,multiple,no_insn")) -+ mrs,multiple")) - "cortex_a7_ex1") - - ;; Forwarding path for unshifted operands. -diff --git a/gcc/config/arm/cortex-a8.md b/gcc/config/arm/cortex-a8.md -index 980aed86e..e3372453d 100644 ---- a/gcc/config/arm/cortex-a8.md -+++ b/gcc/config/arm/cortex-a8.md -@@ -90,7 +90,7 @@ - adc_imm,adcs_imm,adc_reg,adcs_reg,\ - adr,bfm,clz,rbit,rev,alu_dsp_reg,\ - shift_imm,shift_reg,\ -- multiple,no_insn")) -+ multiple")) - "cortex_a8_default") - - (define_insn_reservation "cortex_a8_alu_shift" 2 -diff --git a/gcc/config/arm/cortex-a9.md b/gcc/config/arm/cortex-a9.md -index 6402a4438..c8474152c 100644 ---- a/gcc/config/arm/cortex-a9.md -+++ b/gcc/config/arm/cortex-a9.md -@@ -87,7 +87,7 @@ cortex_a9_p1_e2 + cortex_a9_p0_e1 + cortex_a9_p1_e1") - shift_imm,shift_reg,\ - mov_imm,mov_reg,mvn_imm,mvn_reg,\ - mov_shift_reg,mov_shift,\ -- mrs,multiple,no_insn")) -+ mrs,multiple")) - "cortex_a9_p0_default|cortex_a9_p1_default") - - ;; An instruction using the shifter will go down E1. -diff --git a/gcc/config/arm/cortex-m4.md b/gcc/config/arm/cortex-m4.md -index 60038c1e7..f8efcfcfc 100644 ---- a/gcc/config/arm/cortex-m4.md -+++ b/gcc/config/arm/cortex-m4.md -@@ -42,7 +42,7 @@ - logic_shift_reg,logics_shift_reg,\ - mov_imm,mov_reg,mov_shift,mov_shift_reg,\ - mvn_imm,mvn_reg,mvn_shift,mvn_shift_reg,\ -- mrs,multiple,no_insn") -+ mrs,multiple") - (ior (eq_attr "mul32" "yes") - (eq_attr "widen_mul64" "yes")))) - "cortex_m4_ex") -diff --git a/gcc/config/arm/cortex-m7.md b/gcc/config/arm/cortex-m7.md -index e4695ad66..dfe9a742c 100644 ---- a/gcc/config/arm/cortex-m7.md -+++ b/gcc/config/arm/cortex-m7.md -@@ -48,7 +48,7 @@ - logic_shift_imm,logics_shift_imm,\ - alu_shift_reg,alus_shift_reg,\ - logic_shift_reg,logics_shift_reg,\ -- mrs,clz,f_mcr,f_mrc,multiple,no_insn")) -+ mrs,clz,f_mcr,f_mrc,multiple")) - "cm7_i0|cm7_i1,cm7_a0|cm7_a1") - - ;; Simple alu with inline shift operation. -diff --git a/gcc/config/arm/cortex-r4.md b/gcc/config/arm/cortex-r4.md -index d7c0135fc..af5db23a6 100644 ---- a/gcc/config/arm/cortex-r4.md -+++ b/gcc/config/arm/cortex-r4.md -@@ -102,7 +102,7 @@ - (eq_attr "type" "alu_shift_reg,alus_shift_reg,\ - logic_shift_reg,logics_shift_reg,\ - mov_shift_reg,mvn_shift_reg,\ -- mrs,multiple,no_insn")) -+ mrs,multiple")) - "cortex_r4_alu_shift_reg") - - ;; An ALU instruction followed by an ALU instruction with no early dep. -diff --git a/gcc/config/arm/fa526.md b/gcc/config/arm/fa526.md -index e6625b011..294b79692 100644 ---- a/gcc/config/arm/fa526.md -+++ b/gcc/config/arm/fa526.md -@@ -68,7 +68,7 @@ - adr,bfm,rev,\ - shift_imm,shift_reg,\ - mov_imm,mov_reg,mvn_imm,mvn_reg,\ -- mrs,multiple,no_insn")) -+ mrs,multiple")) - "fa526_core") - - (define_insn_reservation "526_alu_shift_op" 2 -diff --git a/gcc/config/arm/fa606te.md b/gcc/config/arm/fa606te.md -index f2c104fb1..9007050ed 100644 ---- a/gcc/config/arm/fa606te.md -+++ b/gcc/config/arm/fa606te.md -@@ -73,7 +73,7 @@ - logic_shift_reg,logics_shift_reg,\ - mov_imm,mov_reg,mov_shift,mov_shift_reg,\ - mvn_imm,mvn_reg,mvn_shift,mvn_shift_reg,\ -- mrs,multiple,no_insn")) -+ mrs,multiple")) - "fa606te_core") - - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -diff --git a/gcc/config/arm/fa626te.md b/gcc/config/arm/fa626te.md -index 880090fd7..6bdc2e8b5 100644 ---- a/gcc/config/arm/fa626te.md -+++ b/gcc/config/arm/fa626te.md -@@ -74,7 +74,7 @@ - adr,bfm,rev,\ - shift_imm,shift_reg,\ - mov_imm,mov_reg,mvn_imm,mvn_reg,\ -- mrs,multiple,no_insn")) -+ mrs,multiple")) - "fa626te_core") - - (define_insn_reservation "626te_alu_shift_op" 2 -diff --git a/gcc/config/arm/fa726te.md b/gcc/config/arm/fa726te.md -index cb5fbaf99..f6f2531c8 100644 ---- a/gcc/config/arm/fa726te.md -+++ b/gcc/config/arm/fa726te.md -@@ -91,7 +91,7 @@ - adc_imm,adcs_imm,adc_reg,adcs_reg,\ - adr,bfm,rev,\ - shift_imm,shift_reg,\ -- mrs,multiple,no_insn")) -+ mrs,multiple")) - "fa726te_issue+(fa726te_alu0_pipe|fa726te_alu1_pipe)") - - ;; ALU operations with a shift-by-register operand. -diff --git a/gcc/config/arm/thumb1.md b/gcc/config/arm/thumb1.md -index 041e2db34..f8eb732ac 100644 ---- a/gcc/config/arm/thumb1.md -+++ b/gcc/config/arm/thumb1.md -@@ -985,7 +985,7 @@ - - ;; Thumb block-move insns - --(define_insn "movmem12b" -+(define_insn "cpymem12b" - [(set (mem:SI (match_operand:SI 2 "register_operand" "0")) - (mem:SI (match_operand:SI 3 "register_operand" "1"))) - (set (mem:SI (plus:SI (match_dup 2) (const_int 4))) -@@ -1007,7 +1007,7 @@ - (set_attr "type" "store_12")] - ) - --(define_insn "movmem8b" -+(define_insn "cpymem8b" - [(set (mem:SI (match_operand:SI 2 "register_operand" "0")) - (mem:SI (match_operand:SI 3 "register_operand" "1"))) - (set (mem:SI (plus:SI (match_dup 2) (const_int 4))) -diff --git a/gcc/config/arm/types.md b/gcc/config/arm/types.md -index f8f8dd090..60faad659 100644 ---- a/gcc/config/arm/types.md -+++ b/gcc/config/arm/types.md -@@ -546,6 +546,10 @@ - ; The classification below is for coprocessor instructions - ; - ; coproc -+; -+; The classification below is for TME instructions -+; -+; tme - - (define_attr "type" - "adc_imm,\ -@@ -1091,7 +1095,8 @@ - crypto_sha3,\ - crypto_sm3,\ - crypto_sm4,\ -- coproc" -+ coproc,\ -+ tme" - (const_string "untyped")) - - ; Is this an (integer side) multiply with a 32-bit (or smaller) result? -@@ -1215,3 +1220,7 @@ - crypto_sha256_fast, crypto_sha256_slow") - (const_string "yes") - (const_string "no"))) -+ -+(define_insn_reservation "no_reservation" 0 -+ (eq_attr "type" "no_insn") -+ "nothing") -diff --git a/gcc/config/arm/xgene1.md b/gcc/config/arm/xgene1.md -index 14156421d..81498daa0 100644 ---- a/gcc/config/arm/xgene1.md -+++ b/gcc/config/arm/xgene1.md -@@ -64,11 +64,6 @@ - (eq_attr "type" "branch")) - "xgene1_decode1op") - --(define_insn_reservation "xgene1_nop" 1 -- (and (eq_attr "tune" "xgene1") -- (eq_attr "type" "no_insn")) -- "xgene1_decode1op") -- - (define_insn_reservation "xgene1_call" 1 - (and (eq_attr "tune" "xgene1") - (eq_attr "type" "call")) -diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h -index dd0babbd7..31fe3a66d 100644 ---- a/gcc/config/avr/avr-protos.h -+++ b/gcc/config/avr/avr-protos.h -@@ -82,7 +82,7 @@ extern rtx avr_to_int_mode (rtx); - - extern void avr_expand_prologue (void); - extern void avr_expand_epilogue (bool); --extern bool avr_emit_movmemhi (rtx*); -+extern bool avr_emit_cpymemhi (rtx*); - extern int avr_epilogue_uses (int regno); - - extern void avr_output_addr_vec (rtx_insn*, rtx); -@@ -92,7 +92,7 @@ extern const char* avr_out_plus (rtx, rtx*, int* =NULL, int* =NULL, bool =true); - extern const char* avr_out_round (rtx_insn *, rtx*, int* =NULL); - extern const char* avr_out_addto_sp (rtx*, int*); - extern const char* avr_out_xload (rtx_insn *, rtx*, int*); --extern const char* avr_out_movmem (rtx_insn *, rtx*, int*); -+extern const char* avr_out_cpymem (rtx_insn *, rtx*, int*); - extern const char* avr_out_insert_bits (rtx*, int*); - extern bool avr_popcount_each_byte (rtx, int, int); - extern bool avr_has_nibble_0xf (rtx); -diff --git a/gcc/config/avr/avr.c b/gcc/config/avr/avr.c -index cb4b14ae3..3e6e5d2ee 100644 ---- a/gcc/config/avr/avr.c -+++ b/gcc/config/avr/avr.c -@@ -9421,7 +9421,7 @@ avr_adjust_insn_length (rtx_insn *insn, int len) - case ADJUST_LEN_MOV16: output_movhi (insn, op, &len); break; - case ADJUST_LEN_MOV24: avr_out_movpsi (insn, op, &len); break; - case ADJUST_LEN_MOV32: output_movsisf (insn, op, &len); break; -- case ADJUST_LEN_MOVMEM: avr_out_movmem (insn, op, &len); break; -+ case ADJUST_LEN_CPYMEM: avr_out_cpymem (insn, op, &len); break; - case ADJUST_LEN_XLOAD: avr_out_xload (insn, op, &len); break; - case ADJUST_LEN_SEXT: avr_out_sign_extend (insn, op, &len); break; - -@@ -13338,7 +13338,7 @@ avr_emit3_fix_outputs (rtx (*gen)(rtx,rtx,rtx), rtx *op, - } - - --/* Worker function for movmemhi expander. -+/* Worker function for cpymemhi expander. - XOP[0] Destination as MEM:BLK - XOP[1] Source " " - XOP[2] # Bytes to copy -@@ -13347,7 +13347,7 @@ avr_emit3_fix_outputs (rtx (*gen)(rtx,rtx,rtx), rtx *op, - Return FALSE if the operand compination is not supported. */ - - bool --avr_emit_movmemhi (rtx *xop) -+avr_emit_cpymemhi (rtx *xop) - { - HOST_WIDE_INT count; - machine_mode loop_mode; -@@ -13424,14 +13424,14 @@ avr_emit_movmemhi (rtx *xop) - Do the copy-loop inline. */ - - rtx (*fun) (rtx, rtx, rtx) -- = QImode == loop_mode ? gen_movmem_qi : gen_movmem_hi; -+ = QImode == loop_mode ? gen_cpymem_qi : gen_cpymem_hi; - - insn = fun (xas, loop_reg, loop_reg); - } - else - { - rtx (*fun) (rtx, rtx) -- = QImode == loop_mode ? gen_movmemx_qi : gen_movmemx_hi; -+ = QImode == loop_mode ? gen_cpymemx_qi : gen_cpymemx_hi; - - emit_move_insn (gen_rtx_REG (QImode, 23), a_hi8); - -@@ -13445,7 +13445,7 @@ avr_emit_movmemhi (rtx *xop) - } - - --/* Print assembler for movmem_qi, movmem_hi insns... -+/* Print assembler for cpymem_qi, cpymem_hi insns... - $0 : Address Space - $1, $2 : Loop register - Z : Source address -@@ -13453,7 +13453,7 @@ avr_emit_movmemhi (rtx *xop) - */ - - const char* --avr_out_movmem (rtx_insn *insn ATTRIBUTE_UNUSED, rtx *op, int *plen) -+avr_out_cpymem (rtx_insn *insn ATTRIBUTE_UNUSED, rtx *op, int *plen) - { - addr_space_t as = (addr_space_t) INTVAL (op[0]); - machine_mode loop_mode = GET_MODE (op[1]); -diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md -index f263b693c..e85bf4963 100644 ---- a/gcc/config/avr/avr.md -+++ b/gcc/config/avr/avr.md -@@ -70,7 +70,7 @@ - - (define_c_enum "unspec" - [UNSPEC_STRLEN -- UNSPEC_MOVMEM -+ UNSPEC_CPYMEM - UNSPEC_INDEX_JMP - UNSPEC_FMUL - UNSPEC_FMULS -@@ -158,7 +158,7 @@ - tsthi, tstpsi, tstsi, compare, compare64, call, - mov8, mov16, mov24, mov32, reload_in16, reload_in24, reload_in32, - ufract, sfract, round, -- xload, movmem, -+ xload, cpymem, - ashlqi, ashrqi, lshrqi, - ashlhi, ashrhi, lshrhi, - ashlsi, ashrsi, lshrsi, -@@ -992,20 +992,20 @@ - ;;========================================================================= - ;; move string (like memcpy) - --(define_expand "movmemhi" -+(define_expand "cpymemhi" - [(parallel [(set (match_operand:BLK 0 "memory_operand" "") - (match_operand:BLK 1 "memory_operand" "")) - (use (match_operand:HI 2 "const_int_operand" "")) - (use (match_operand:HI 3 "const_int_operand" ""))])] - "" - { -- if (avr_emit_movmemhi (operands)) -+ if (avr_emit_cpymemhi (operands)) - DONE; - - FAIL; - }) - --(define_mode_attr MOVMEM_r_d [(QI "r") -+(define_mode_attr CPYMEM_r_d [(QI "r") - (HI "wd")]) - - ;; $0 : Address Space -@@ -1013,23 +1013,23 @@ - ;; R30 : source address - ;; R26 : destination address - --;; "movmem_qi" --;; "movmem_hi" --(define_insn "movmem_" -+;; "cpymem_qi" -+;; "cpymem_hi" -+(define_insn "cpymem_" - [(set (mem:BLK (reg:HI REG_X)) - (mem:BLK (reg:HI REG_Z))) - (unspec [(match_operand:QI 0 "const_int_operand" "n")] -- UNSPEC_MOVMEM) -- (use (match_operand:QIHI 1 "register_operand" "")) -+ UNSPEC_CPYMEM) -+ (use (match_operand:QIHI 1 "register_operand" "")) - (clobber (reg:HI REG_X)) - (clobber (reg:HI REG_Z)) - (clobber (reg:QI LPM_REGNO)) - (clobber (match_operand:QIHI 2 "register_operand" "=1"))] - "" - { -- return avr_out_movmem (insn, operands, NULL); -+ return avr_out_cpymem (insn, operands, NULL); - } -- [(set_attr "adjust_len" "movmem") -+ [(set_attr "adjust_len" "cpymem") - (set_attr "cc" "clobber")]) - - -@@ -1039,14 +1039,14 @@ - ;; R23:Z : 24-bit source address - ;; R26 : 16-bit destination address - --;; "movmemx_qi" --;; "movmemx_hi" --(define_insn "movmemx_" -+;; "cpymemx_qi" -+;; "cpymemx_hi" -+(define_insn "cpymemx_" - [(set (mem:BLK (reg:HI REG_X)) - (mem:BLK (lo_sum:PSI (reg:QI 23) - (reg:HI REG_Z)))) - (unspec [(match_operand:QI 0 "const_int_operand" "n")] -- UNSPEC_MOVMEM) -+ UNSPEC_CPYMEM) - (use (reg:QIHI 24)) - (clobber (reg:HI REG_X)) - (clobber (reg:HI REG_Z)) -diff --git a/gcc/config/bfin/bfin-protos.h b/gcc/config/bfin/bfin-protos.h -index 64a184275..7d0f705e0 100644 ---- a/gcc/config/bfin/bfin-protos.h -+++ b/gcc/config/bfin/bfin-protos.h -@@ -81,7 +81,7 @@ extern bool expand_move (rtx *, machine_mode); - extern void bfin_expand_call (rtx, rtx, rtx, rtx, int); - extern bool bfin_longcall_p (rtx, int); - extern bool bfin_dsp_memref_p (rtx); --extern bool bfin_expand_movmem (rtx, rtx, rtx, rtx); -+extern bool bfin_expand_cpymem (rtx, rtx, rtx, rtx); - - extern enum reg_class secondary_input_reload_class (enum reg_class, - machine_mode, -diff --git a/gcc/config/bfin/bfin.c b/gcc/config/bfin/bfin.c -index 97c2c12d5..288a2ff59 100644 ---- a/gcc/config/bfin/bfin.c -+++ b/gcc/config/bfin/bfin.c -@@ -3208,7 +3208,7 @@ output_pop_multiple (rtx insn, rtx *operands) - /* Adjust DST and SRC by OFFSET bytes, and generate one move in mode MODE. */ - - static void --single_move_for_movmem (rtx dst, rtx src, machine_mode mode, HOST_WIDE_INT offset) -+single_move_for_cpymem (rtx dst, rtx src, machine_mode mode, HOST_WIDE_INT offset) - { - rtx scratch = gen_reg_rtx (mode); - rtx srcmem, dstmem; -@@ -3224,7 +3224,7 @@ single_move_for_movmem (rtx dst, rtx src, machine_mode mode, HOST_WIDE_INT offse - back on a different method. */ - - bool --bfin_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp) -+bfin_expand_cpymem (rtx dst, rtx src, rtx count_exp, rtx align_exp) - { - rtx srcreg, destreg, countreg; - HOST_WIDE_INT align = 0; -@@ -3269,7 +3269,7 @@ bfin_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp) - { - if ((count & ~3) == 4) - { -- single_move_for_movmem (dst, src, SImode, offset); -+ single_move_for_cpymem (dst, src, SImode, offset); - offset = 4; - } - else if (count & ~3) -@@ -3282,7 +3282,7 @@ bfin_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp) - } - if (count & 2) - { -- single_move_for_movmem (dst, src, HImode, offset); -+ single_move_for_cpymem (dst, src, HImode, offset); - offset += 2; - } - } -@@ -3290,7 +3290,7 @@ bfin_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp) - { - if ((count & ~1) == 2) - { -- single_move_for_movmem (dst, src, HImode, offset); -+ single_move_for_cpymem (dst, src, HImode, offset); - offset = 2; - } - else if (count & ~1) -@@ -3304,7 +3304,7 @@ bfin_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp) - } - if (count & 1) - { -- single_move_for_movmem (dst, src, QImode, offset); -+ single_move_for_cpymem (dst, src, QImode, offset); - } - return true; - } -diff --git a/gcc/config/bfin/bfin.h b/gcc/config/bfin/bfin.h -index 19b7f819d..4aba596f6 100644 ---- a/gcc/config/bfin/bfin.h -+++ b/gcc/config/bfin/bfin.h -@@ -793,7 +793,7 @@ typedef struct { - #define MOVE_MAX UNITS_PER_WORD - - /* If a memory-to-memory move would take MOVE_RATIO or more simple -- move-instruction pairs, we will do a movmem or libcall instead. */ -+ move-instruction pairs, we will do a cpymem or libcall instead. */ - - #define MOVE_RATIO(speed) 5 - -diff --git a/gcc/config/bfin/bfin.md b/gcc/config/bfin/bfin.md -index ac5892424..6ac208d04 100644 ---- a/gcc/config/bfin/bfin.md -+++ b/gcc/config/bfin/bfin.md -@@ -2316,14 +2316,14 @@ - (set_attr "length" "16") - (set_attr "seq_insns" "multi")]) - --(define_expand "movmemsi" -+(define_expand "cpymemsi" - [(match_operand:BLK 0 "general_operand" "") - (match_operand:BLK 1 "general_operand" "") - (match_operand:SI 2 "const_int_operand" "") - (match_operand:SI 3 "const_int_operand" "")] - "" - { -- if (bfin_expand_movmem (operands[0], operands[1], operands[2], operands[3])) -+ if (bfin_expand_cpymem (operands[0], operands[1], operands[2], operands[3])) - DONE; - FAIL; - }) -diff --git a/gcc/config/c6x/c6x-protos.h b/gcc/config/c6x/c6x-protos.h -index a657969a2..8c04c315a 100644 ---- a/gcc/config/c6x/c6x-protos.h -+++ b/gcc/config/c6x/c6x-protos.h -@@ -35,7 +35,7 @@ extern bool c6x_long_call_p (rtx); - extern void c6x_expand_call (rtx, rtx, bool); - extern rtx c6x_expand_compare (rtx, machine_mode); - extern bool c6x_force_op_for_comparison_p (enum rtx_code, rtx); --extern bool c6x_expand_movmem (rtx, rtx, rtx, rtx, rtx, rtx); -+extern bool c6x_expand_cpymem (rtx, rtx, rtx, rtx, rtx, rtx); - - extern rtx c6x_subword (rtx, bool); - extern void split_di (rtx *, int, rtx *, rtx *); -diff --git a/gcc/config/c6x/c6x.c b/gcc/config/c6x/c6x.c -index 9a07c4013..e4176774b 100644 ---- a/gcc/config/c6x/c6x.c -+++ b/gcc/config/c6x/c6x.c -@@ -1683,10 +1683,10 @@ c6x_valid_mask_p (HOST_WIDE_INT val) - return true; - } - --/* Expand a block move for a movmemM pattern. */ -+/* Expand a block move for a cpymemM pattern. */ - - bool --c6x_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp, -+c6x_expand_cpymem (rtx dst, rtx src, rtx count_exp, rtx align_exp, - rtx expected_align_exp ATTRIBUTE_UNUSED, - rtx expected_size_exp ATTRIBUTE_UNUSED) - { -diff --git a/gcc/config/c6x/c6x.md b/gcc/config/c6x/c6x.md -index 8218e1dad..f9bf9ba99 100644 ---- a/gcc/config/c6x/c6x.md -+++ b/gcc/config/c6x/c6x.md -@@ -2844,7 +2844,7 @@ - ;; Block moves - ;; ------------------------------------------------------------------------- - --(define_expand "movmemsi" -+(define_expand "cpymemsi" - [(use (match_operand:BLK 0 "memory_operand" "")) - (use (match_operand:BLK 1 "memory_operand" "")) - (use (match_operand:SI 2 "nonmemory_operand" "")) -@@ -2853,7 +2853,7 @@ - (use (match_operand:SI 5 "const_int_operand" ""))] - "" - { -- if (c6x_expand_movmem (operands[0], operands[1], operands[2], operands[3], -+ if (c6x_expand_cpymem (operands[0], operands[1], operands[2], operands[3], - operands[4], operands[5])) - DONE; - else -diff --git a/gcc/config/darwin.c b/gcc/config/darwin.c -index a7610829f..dcd69698f 100644 ---- a/gcc/config/darwin.c -+++ b/gcc/config/darwin.c -@@ -2150,7 +2150,7 @@ darwin_emit_unwind_label (FILE *file, tree decl, int for_eh, int empty) - if (! for_eh || ! ld_needs_eh_markers) - return; - -- /* FIXME: This only works when the eh for all sections of a function are -+ /* FIXME: This only works when the eh for all sections of a function are - emitted at the same time. If that changes, we would need to use a lookup - table of some form to determine what to do. Also, we should emit the - unadorned label for the partition containing the public label for a -@@ -3325,7 +3325,7 @@ darwin_override_options (void) - - /* Linkers >= ld64-62.1 (at least) are capable of making the necessary PIC - indirections and we no longer need to emit pic symbol stubs. -- However, if we are generating code for earlier ones (or for use in the -+ However, if we are generating code for earlier ones (or for use in the - kernel) the stubs might still be required, and this will be set true. - If the user sets it on or off - then that takes precedence. - -@@ -3334,18 +3334,18 @@ darwin_override_options (void) - - if (!global_options_set.x_darwin_symbol_stubs) - { -- if (darwin_target_linker) -+ if (darwin_target_linker) - { - if (strverscmp (darwin_target_linker, MIN_LD64_OMIT_STUBS) < 0) - { - darwin_symbol_stubs = true; - ld_needs_eh_markers = true; - } -- } -+ } - else if (generating_for_darwin_version < 9) - { - /* If we don't know the linker version and we're targeting an old -- system, we know no better than to assume the use of an earlier -+ system, we know no better than to assume the use of an earlier - linker. */ - darwin_symbol_stubs = true; - ld_needs_eh_markers = true; -@@ -3354,7 +3354,7 @@ darwin_override_options (void) - else if (DARWIN_X86 && darwin_symbol_stubs && TARGET_64BIT) - { - inform (input_location, -- "%<-msymbol-stubs%> is not required for 64b code (ignored)"); -+ "%<-mpic-symbol-stubs%> is not required for 64b code (ignored)"); - darwin_symbol_stubs = false; - } - -diff --git a/gcc/config/frv/frv.md b/gcc/config/frv/frv.md -index 064bf53ea..6e8db59fd 100644 ---- a/gcc/config/frv/frv.md -+++ b/gcc/config/frv/frv.md -@@ -1887,7 +1887,7 @@ - ;; Argument 2 is the length - ;; Argument 3 is the alignment - --(define_expand "movmemsi" -+(define_expand "cpymemsi" - [(parallel [(set (match_operand:BLK 0 "" "") - (match_operand:BLK 1 "" "")) - (use (match_operand:SI 2 "" "")) -diff --git a/gcc/config/ft32/ft32.md b/gcc/config/ft32/ft32.md -index de2394644..9e31f2ca7 100644 ---- a/gcc/config/ft32/ft32.md -+++ b/gcc/config/ft32/ft32.md -@@ -851,7 +851,7 @@ - "stpcpy %b1,%b2 # %0 %b1 %b2" - ) - --(define_insn "movmemsi" -+(define_insn "cpymemsi" - [(set (match_operand:BLK 0 "memory_operand" "=W,BW") - (match_operand:BLK 1 "memory_operand" "W,BW")) - (use (match_operand:SI 2 "ft32_imm_operand" "KA,KA")) -diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c -index eb06ff9e0..480bb22ee 100644 ---- a/gcc/config/gcn/gcn.c -+++ b/gcc/config/gcn/gcn.c -@@ -2495,7 +2495,7 @@ gcn_gimplify_va_arg_expr (tree valist, tree type, - tree t, u; - bool indirect; - -- indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0); -+ indirect = pass_va_arg_by_reference (type); - if (indirect) - { - type = ptr; -diff --git a/gcc/config/h8300/h8300.md b/gcc/config/h8300/h8300.md -index eb0ae835f..42610fddb 100644 ---- a/gcc/config/h8300/h8300.md -+++ b/gcc/config/h8300/h8300.md -@@ -474,11 +474,11 @@ - (set_attr "length_table" "*,movl") - (set_attr "cc" "set_zn,set_znv")]) - --;; Implement block moves using movmd. Defining movmemsi allows the full -+;; Implement block copies using movmd. Defining cpymemsi allows the full - ;; range of constant lengths (up to 0x40000 bytes when using movmd.l). - ;; See h8sx_emit_movmd for details. - --(define_expand "movmemsi" -+(define_expand "cpymemsi" - [(use (match_operand:BLK 0 "memory_operand" "")) - (use (match_operand:BLK 1 "memory_operand" "")) - (use (match_operand:SI 2 "" "")) -diff --git a/gcc/config/i386/i386-builtins.c b/gcc/config/i386/i386-builtins.c -new file mode 100644 -index 000000000..6afb246eb ---- /dev/null -+++ b/gcc/config/i386/i386-builtins.c -@@ -0,0 +1,2539 @@ -+/* Copyright (C) 1988-2019 Free Software Foundation, Inc. -+ -+This file is part of GCC. -+ -+GCC is free software; you can redistribute it and/or modify -+it under the terms of the GNU General Public License as published by -+the Free Software Foundation; either version 3, or (at your option) -+any later version. -+ -+GCC is distributed in the hope that it will be useful, -+but WITHOUT ANY WARRANTY; without even the implied warranty of -+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+GNU General Public License for more details. -+ -+You should have received a copy of the GNU General Public License -+along with GCC; see the file COPYING3. If not see -+. */ -+ -+#define IN_TARGET_CODE 1 -+ -+#include "config.h" -+#include "system.h" -+#include "coretypes.h" -+#include "backend.h" -+#include "rtl.h" -+#include "tree.h" -+#include "memmodel.h" -+#include "gimple.h" -+#include "cfghooks.h" -+#include "cfgloop.h" -+#include "df.h" -+#include "tm_p.h" -+#include "stringpool.h" -+#include "expmed.h" -+#include "optabs.h" -+#include "regs.h" -+#include "emit-rtl.h" -+#include "recog.h" -+#include "cgraph.h" -+#include "diagnostic.h" -+#include "cfgbuild.h" -+#include "alias.h" -+#include "fold-const.h" -+#include "attribs.h" -+#include "calls.h" -+#include "stor-layout.h" -+#include "varasm.h" -+#include "output.h" -+#include "insn-attr.h" -+#include "flags.h" -+#include "except.h" -+#include "explow.h" -+#include "expr.h" -+#include "cfgrtl.h" -+#include "common/common-target.h" -+#include "langhooks.h" -+#include "reload.h" -+#include "gimplify.h" -+#include "dwarf2.h" -+#include "tm-constrs.h" -+#include "params.h" -+#include "cselib.h" -+#include "sched-int.h" -+#include "opts.h" -+#include "tree-pass.h" -+#include "context.h" -+#include "pass_manager.h" -+#include "target-globals.h" -+#include "gimple-iterator.h" -+#include "tree-vectorizer.h" -+#include "shrink-wrap.h" -+#include "builtins.h" -+#include "rtl-iter.h" -+#include "tree-iterator.h" -+#include "dbgcnt.h" -+#include "case-cfn-macros.h" -+#include "dojump.h" -+#include "fold-const-call.h" -+#include "tree-vrp.h" -+#include "tree-ssanames.h" -+#include "selftest.h" -+#include "selftest-rtl.h" -+#include "print-rtl.h" -+#include "intl.h" -+#include "ifcvt.h" -+#include "symbol-summary.h" -+#include "ipa-prop.h" -+#include "ipa-fnsummary.h" -+#include "wide-int-bitmask.h" -+#include "tree-vector-builder.h" -+#include "debug.h" -+#include "dwarf2out.h" -+#include "i386-builtins.h" -+ -+#undef BDESC -+#undef BDESC_FIRST -+#undef BDESC_END -+ -+/* Macros for verification of enum ix86_builtins order. */ -+#define BDESC_VERIFY(x, y, z) \ -+ gcc_checking_assert ((x) == (enum ix86_builtins) ((y) + (z))) -+#define BDESC_VERIFYS(x, y, z) \ -+ STATIC_ASSERT ((x) == (enum ix86_builtins) ((y) + (z))) -+ -+BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPESTR_FIRST, -+ IX86_BUILTIN__BDESC_COMI_LAST, 1); -+BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPISTR_FIRST, -+ IX86_BUILTIN__BDESC_PCMPESTR_LAST, 1); -+BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST, -+ IX86_BUILTIN__BDESC_PCMPISTR_LAST, 1); -+BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS_FIRST, -+ IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST, 1); -+BDESC_VERIFYS (IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST, -+ IX86_BUILTIN__BDESC_ARGS_LAST, 1); -+BDESC_VERIFYS (IX86_BUILTIN__BDESC_MULTI_ARG_FIRST, -+ IX86_BUILTIN__BDESC_ROUND_ARGS_LAST, 1); -+BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_FIRST, -+ IX86_BUILTIN__BDESC_MULTI_ARG_LAST, 1); -+BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_NORMAL_FIRST, -+ IX86_BUILTIN__BDESC_CET_LAST, 1); -+BDESC_VERIFYS (IX86_BUILTIN_MAX, -+ IX86_BUILTIN__BDESC_CET_NORMAL_LAST, 1); -+ -+ -+/* Table for the ix86 builtin non-function types. */ -+static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1]; -+ -+/* Retrieve an element from the above table, building some of -+ the types lazily. */ -+ -+static tree -+ix86_get_builtin_type (enum ix86_builtin_type tcode) -+{ -+ unsigned int index; -+ tree type, itype; -+ -+ gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab)); -+ -+ type = ix86_builtin_type_tab[(int) tcode]; -+ if (type != NULL) -+ return type; -+ -+ gcc_assert (tcode > IX86_BT_LAST_PRIM); -+ if (tcode <= IX86_BT_LAST_VECT) -+ { -+ machine_mode mode; -+ -+ index = tcode - IX86_BT_LAST_PRIM - 1; -+ itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]); -+ mode = ix86_builtin_type_vect_mode[index]; -+ -+ type = build_vector_type_for_mode (itype, mode); -+ } -+ else -+ { -+ int quals; -+ -+ index = tcode - IX86_BT_LAST_VECT - 1; -+ if (tcode <= IX86_BT_LAST_PTR) -+ quals = TYPE_UNQUALIFIED; -+ else -+ quals = TYPE_QUAL_CONST; -+ -+ itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]); -+ if (quals != TYPE_UNQUALIFIED) -+ itype = build_qualified_type (itype, quals); -+ -+ type = build_pointer_type (itype); -+ } -+ -+ ix86_builtin_type_tab[(int) tcode] = type; -+ return type; -+} -+ -+/* Table for the ix86 builtin function types. */ -+static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1]; -+ -+/* Retrieve an element from the above table, building some of -+ the types lazily. */ -+ -+static tree -+ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode) -+{ -+ tree type; -+ -+ gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab)); -+ -+ type = ix86_builtin_func_type_tab[(int) tcode]; -+ if (type != NULL) -+ return type; -+ -+ if (tcode <= IX86_BT_LAST_FUNC) -+ { -+ unsigned start = ix86_builtin_func_start[(int) tcode]; -+ unsigned after = ix86_builtin_func_start[(int) tcode + 1]; -+ tree rtype, atype, args = void_list_node; -+ unsigned i; -+ -+ rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]); -+ for (i = after - 1; i > start; --i) -+ { -+ atype = ix86_get_builtin_type (ix86_builtin_func_args[i]); -+ args = tree_cons (NULL, atype, args); -+ } -+ -+ type = build_function_type (rtype, args); -+ } -+ else -+ { -+ unsigned index = tcode - IX86_BT_LAST_FUNC - 1; -+ enum ix86_builtin_func_type icode; -+ -+ icode = ix86_builtin_func_alias_base[index]; -+ type = ix86_get_builtin_func_type (icode); -+ } -+ -+ ix86_builtin_func_type_tab[(int) tcode] = type; -+ return type; -+} -+ -+/* Table for the ix86 builtin decls. */ -+static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX]; -+ -+struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX]; -+ -+tree get_ix86_builtin (enum ix86_builtins c) -+{ -+ return ix86_builtins[c]; -+} -+ -+/* Bits that can still enable any inclusion of a builtin. */ -+HOST_WIDE_INT deferred_isa_values = 0; -+HOST_WIDE_INT deferred_isa_values2 = 0; -+ -+/* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the -+ MASK and MASK2 of which isa_flags and ix86_isa_flags2 to use in the -+ ix86_builtins_isa array. Stores the function decl in the ix86_builtins -+ array. Returns the function decl or NULL_TREE, if the builtin was not -+ added. -+ -+ If the front end has a special hook for builtin functions, delay adding -+ builtin functions that aren't in the current ISA until the ISA is changed -+ with function specific optimization. Doing so, can save about 300K for the -+ default compiler. When the builtin is expanded, check at that time whether -+ it is valid. -+ -+ If the front end doesn't have a special hook, record all builtins, even if -+ it isn't an instruction set in the current ISA in case the user uses -+ function specific options for a different ISA, so that we don't get scope -+ errors if a builtin is added in the middle of a function scope. */ -+ -+static inline tree -+def_builtin (HOST_WIDE_INT mask, HOST_WIDE_INT mask2, -+ const char *name, -+ enum ix86_builtin_func_type tcode, -+ enum ix86_builtins code) -+{ -+ tree decl = NULL_TREE; -+ -+ /* An instruction may be 64bit only regardless of ISAs. */ -+ if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT) -+ { -+ ix86_builtins_isa[(int) code].isa = mask; -+ ix86_builtins_isa[(int) code].isa2 = mask2; -+ -+ mask &= ~OPTION_MASK_ISA_64BIT; -+ -+ /* Filter out the masks most often ored together with others. */ -+ if ((mask & ix86_isa_flags & OPTION_MASK_ISA_AVX512VL) -+ && mask != OPTION_MASK_ISA_AVX512VL) -+ mask &= ~OPTION_MASK_ISA_AVX512VL; -+ if ((mask & ix86_isa_flags & OPTION_MASK_ISA_AVX512BW) -+ && mask != OPTION_MASK_ISA_AVX512BW) -+ mask &= ~OPTION_MASK_ISA_AVX512BW; -+ -+ if (((mask2 == 0 || (mask2 & ix86_isa_flags2) != 0) -+ && (mask == 0 || (mask & ix86_isa_flags) != 0)) -+ || (lang_hooks.builtin_function -+ == lang_hooks.builtin_function_ext_scope)) -+ { -+ tree type = ix86_get_builtin_func_type (tcode); -+ decl = add_builtin_function (name, type, code, BUILT_IN_MD, -+ NULL, NULL_TREE); -+ ix86_builtins[(int) code] = decl; -+ ix86_builtins_isa[(int) code].set_and_not_built_p = false; -+ } -+ else -+ { -+ /* Just MASK and MASK2 where set_and_not_built_p == true can potentially -+ include a builtin. */ -+ deferred_isa_values |= mask; -+ deferred_isa_values2 |= mask2; -+ ix86_builtins[(int) code] = NULL_TREE; -+ ix86_builtins_isa[(int) code].tcode = tcode; -+ ix86_builtins_isa[(int) code].name = name; -+ ix86_builtins_isa[(int) code].const_p = false; -+ ix86_builtins_isa[(int) code].pure_p = false; -+ ix86_builtins_isa[(int) code].set_and_not_built_p = true; -+ } -+ } -+ -+ return decl; -+} -+ -+/* Like def_builtin, but also marks the function decl "const". */ -+ -+static inline tree -+def_builtin_const (HOST_WIDE_INT mask, HOST_WIDE_INT mask2, const char *name, -+ enum ix86_builtin_func_type tcode, enum ix86_builtins code) -+{ -+ tree decl = def_builtin (mask, mask2, name, tcode, code); -+ if (decl) -+ TREE_READONLY (decl) = 1; -+ else -+ ix86_builtins_isa[(int) code].const_p = true; -+ -+ return decl; -+} -+ -+/* Like def_builtin, but also marks the function decl "pure". */ -+ -+static inline tree -+def_builtin_pure (HOST_WIDE_INT mask, HOST_WIDE_INT mask2, const char *name, -+ enum ix86_builtin_func_type tcode, enum ix86_builtins code) -+{ -+ tree decl = def_builtin (mask, mask2, name, tcode, code); -+ if (decl) -+ DECL_PURE_P (decl) = 1; -+ else -+ ix86_builtins_isa[(int) code].pure_p = true; -+ -+ return decl; -+} -+ -+/* Add any new builtin functions for a given ISA that may not have been -+ declared. This saves a bit of space compared to adding all of the -+ declarations to the tree, even if we didn't use them. */ -+ -+void -+ix86_add_new_builtins (HOST_WIDE_INT isa, HOST_WIDE_INT isa2) -+{ -+ isa &= ~OPTION_MASK_ISA_64BIT; -+ -+ if ((isa & deferred_isa_values) == 0 -+ && (isa2 & deferred_isa_values2) == 0) -+ return; -+ -+ /* Bits in ISA value can be removed from potential isa values. */ -+ deferred_isa_values &= ~isa; -+ deferred_isa_values2 &= ~isa2; -+ -+ int i; -+ tree saved_current_target_pragma = current_target_pragma; -+ current_target_pragma = NULL_TREE; -+ -+ for (i = 0; i < (int)IX86_BUILTIN_MAX; i++) -+ { -+ if (((ix86_builtins_isa[i].isa & isa) != 0 -+ || (ix86_builtins_isa[i].isa2 & isa2) != 0) -+ && ix86_builtins_isa[i].set_and_not_built_p) -+ { -+ tree decl, type; -+ -+ /* Don't define the builtin again. */ -+ ix86_builtins_isa[i].set_and_not_built_p = false; -+ -+ type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode); -+ decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name, -+ type, i, BUILT_IN_MD, NULL, -+ NULL_TREE); -+ -+ ix86_builtins[i] = decl; -+ if (ix86_builtins_isa[i].const_p) -+ TREE_READONLY (decl) = 1; -+ } -+ } -+ -+ current_target_pragma = saved_current_target_pragma; -+} -+ -+/* TM vector builtins. */ -+ -+/* Reuse the existing x86-specific `struct builtin_description' cause -+ we're lazy. Add casts to make them fit. */ -+static const struct builtin_description bdesc_tm[] = -+{ -+ { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI }, -+ { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI }, -+ { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI }, -+ { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI }, -+ { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI }, -+ { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI }, -+ { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI }, -+ -+ { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF }, -+ { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF }, -+ { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF }, -+ { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF }, -+ { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF }, -+ { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF }, -+ { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF }, -+ -+ { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF }, -+ { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF }, -+ { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF }, -+ { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF }, -+ { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF }, -+ { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF }, -+ { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF }, -+ -+ { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID }, -+ { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID }, -+ { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID }, -+}; -+ -+/* Initialize the transactional memory vector load/store builtins. */ -+ -+static void -+ix86_init_tm_builtins (void) -+{ -+ enum ix86_builtin_func_type ftype; -+ const struct builtin_description *d; -+ size_t i; -+ tree decl; -+ tree attrs_load, attrs_type_load, attrs_store, attrs_type_store; -+ tree attrs_log, attrs_type_log; -+ -+ if (!flag_tm) -+ return; -+ -+ /* If there are no builtins defined, we must be compiling in a -+ language without trans-mem support. */ -+ if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1)) -+ return; -+ -+ /* Use whatever attributes a normal TM load has. */ -+ decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1); -+ attrs_load = DECL_ATTRIBUTES (decl); -+ attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl)); -+ /* Use whatever attributes a normal TM store has. */ -+ decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1); -+ attrs_store = DECL_ATTRIBUTES (decl); -+ attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl)); -+ /* Use whatever attributes a normal TM log has. */ -+ decl = builtin_decl_explicit (BUILT_IN_TM_LOG); -+ attrs_log = DECL_ATTRIBUTES (decl); -+ attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl)); -+ -+ for (i = 0, d = bdesc_tm; -+ i < ARRAY_SIZE (bdesc_tm); -+ i++, d++) -+ { -+ if ((d->mask & ix86_isa_flags) != 0 -+ || (lang_hooks.builtin_function -+ == lang_hooks.builtin_function_ext_scope)) -+ { -+ tree type, attrs, attrs_type; -+ enum built_in_function code = (enum built_in_function) d->code; -+ -+ ftype = (enum ix86_builtin_func_type) d->flag; -+ type = ix86_get_builtin_func_type (ftype); -+ -+ if (BUILTIN_TM_LOAD_P (code)) -+ { -+ attrs = attrs_load; -+ attrs_type = attrs_type_load; -+ } -+ else if (BUILTIN_TM_STORE_P (code)) -+ { -+ attrs = attrs_store; -+ attrs_type = attrs_type_store; -+ } -+ else -+ { -+ attrs = attrs_log; -+ attrs_type = attrs_type_log; -+ } -+ decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL, -+ /* The builtin without the prefix for -+ calling it directly. */ -+ d->name + strlen ("__builtin_"), -+ attrs); -+ /* add_builtin_function() will set the DECL_ATTRIBUTES, now -+ set the TYPE_ATTRIBUTES. */ -+ decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN); -+ -+ set_builtin_decl (code, decl, false); -+ } -+ } -+} -+ -+/* Set up all the MMX/SSE builtins, even builtins for instructions that are not -+ in the current target ISA to allow the user to compile particular modules -+ with different target specific options that differ from the command line -+ options. */ -+static void -+ix86_init_mmx_sse_builtins (void) -+{ -+ const struct builtin_description * d; -+ enum ix86_builtin_func_type ftype; -+ size_t i; -+ -+ /* Add all special builtins with variable number of operands. */ -+ for (i = 0, d = bdesc_special_args; -+ i < ARRAY_SIZE (bdesc_special_args); -+ i++, d++) -+ { -+ BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST, i); -+ if (d->name == 0) -+ continue; -+ -+ ftype = (enum ix86_builtin_func_type) d->flag; -+ def_builtin (d->mask, d->mask2, d->name, ftype, d->code); -+ } -+ BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST, -+ IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST, -+ ARRAY_SIZE (bdesc_special_args) - 1); -+ -+ /* Add all builtins with variable number of operands. */ -+ for (i = 0, d = bdesc_args; -+ i < ARRAY_SIZE (bdesc_args); -+ i++, d++) -+ { -+ BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_ARGS_FIRST, i); -+ if (d->name == 0) -+ continue; -+ -+ ftype = (enum ix86_builtin_func_type) d->flag; -+ def_builtin_const (d->mask, d->mask2, d->name, ftype, d->code); -+ } -+ BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS_LAST, -+ IX86_BUILTIN__BDESC_ARGS_FIRST, -+ ARRAY_SIZE (bdesc_args) - 1); -+ -+ /* Add all builtins with rounding. */ -+ for (i = 0, d = bdesc_round_args; -+ i < ARRAY_SIZE (bdesc_round_args); -+ i++, d++) -+ { -+ BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST, i); -+ if (d->name == 0) -+ continue; -+ -+ ftype = (enum ix86_builtin_func_type) d->flag; -+ def_builtin_const (d->mask, d->mask2, d->name, ftype, d->code); -+ } -+ BDESC_VERIFYS (IX86_BUILTIN__BDESC_ROUND_ARGS_LAST, -+ IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST, -+ ARRAY_SIZE (bdesc_round_args) - 1); -+ -+ /* pcmpestr[im] insns. */ -+ for (i = 0, d = bdesc_pcmpestr; -+ i < ARRAY_SIZE (bdesc_pcmpestr); -+ i++, d++) -+ { -+ BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_PCMPESTR_FIRST, i); -+ if (d->code == IX86_BUILTIN_PCMPESTRM128) -+ ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT; -+ else -+ ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT; -+ def_builtin_const (d->mask, d->mask2, d->name, ftype, d->code); -+ } -+ BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPESTR_LAST, -+ IX86_BUILTIN__BDESC_PCMPESTR_FIRST, -+ ARRAY_SIZE (bdesc_pcmpestr) - 1); -+ -+ /* pcmpistr[im] insns. */ -+ for (i = 0, d = bdesc_pcmpistr; -+ i < ARRAY_SIZE (bdesc_pcmpistr); -+ i++, d++) -+ { -+ BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_PCMPISTR_FIRST, i); -+ if (d->code == IX86_BUILTIN_PCMPISTRM128) -+ ftype = V16QI_FTYPE_V16QI_V16QI_INT; -+ else -+ ftype = INT_FTYPE_V16QI_V16QI_INT; -+ def_builtin_const (d->mask, d->mask2, d->name, ftype, d->code); -+ } -+ BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPISTR_LAST, -+ IX86_BUILTIN__BDESC_PCMPISTR_FIRST, -+ ARRAY_SIZE (bdesc_pcmpistr) - 1); -+ -+ /* comi/ucomi insns. */ -+ for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++) -+ { -+ BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_COMI_FIRST, i); -+ if (d->mask == OPTION_MASK_ISA_SSE2) -+ ftype = INT_FTYPE_V2DF_V2DF; -+ else -+ ftype = INT_FTYPE_V4SF_V4SF; -+ def_builtin_const (d->mask, d->mask2, d->name, ftype, d->code); -+ } -+ BDESC_VERIFYS (IX86_BUILTIN__BDESC_COMI_LAST, -+ IX86_BUILTIN__BDESC_COMI_FIRST, -+ ARRAY_SIZE (bdesc_comi) - 1); -+ -+ /* SSE */ -+ def_builtin (OPTION_MASK_ISA_SSE, 0, "__builtin_ia32_ldmxcsr", -+ VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR); -+ def_builtin_pure (OPTION_MASK_ISA_SSE, 0, "__builtin_ia32_stmxcsr", -+ UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR); -+ -+ /* SSE or 3DNow!A */ -+ def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A -+ /* As it uses V4HImode, we have to require -mmmx too. */ -+ | OPTION_MASK_ISA_MMX, 0, -+ "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR, -+ IX86_BUILTIN_MASKMOVQ); -+ -+ /* SSE2 */ -+ def_builtin (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_maskmovdqu", -+ VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU); -+ -+ def_builtin (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_clflush", -+ VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH); -+ x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_mfence", -+ VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE); -+ -+ /* SSE3. */ -+ def_builtin (OPTION_MASK_ISA_SSE3, 0, "__builtin_ia32_monitor", -+ VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR); -+ def_builtin (OPTION_MASK_ISA_SSE3, 0, "__builtin_ia32_mwait", -+ VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT); -+ -+ /* AES */ -+ def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0, -+ "__builtin_ia32_aesenc128", -+ V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128); -+ def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0, -+ "__builtin_ia32_aesenclast128", -+ V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128); -+ def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0, -+ "__builtin_ia32_aesdec128", -+ V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128); -+ def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0, -+ "__builtin_ia32_aesdeclast128", -+ V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128); -+ def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0, -+ "__builtin_ia32_aesimc128", -+ V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128); -+ def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0, -+ "__builtin_ia32_aeskeygenassist128", -+ V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128); -+ -+ /* PCLMUL */ -+ def_builtin_const (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2, 0, -+ "__builtin_ia32_pclmulqdq128", -+ V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128); -+ -+ /* RDRND */ -+ def_builtin (OPTION_MASK_ISA_RDRND, 0, "__builtin_ia32_rdrand16_step", -+ INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP); -+ def_builtin (OPTION_MASK_ISA_RDRND, 0, "__builtin_ia32_rdrand32_step", -+ INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP); -+ def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT, 0, -+ "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG, -+ IX86_BUILTIN_RDRAND64_STEP); -+ -+ /* AVX2 */ -+ def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gathersiv2df", -+ V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT, -+ IX86_BUILTIN_GATHERSIV2DF); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gathersiv4df", -+ V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT, -+ IX86_BUILTIN_GATHERSIV4DF); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatherdiv2df", -+ V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT, -+ IX86_BUILTIN_GATHERDIV2DF); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatherdiv4df", -+ V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT, -+ IX86_BUILTIN_GATHERDIV4DF); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gathersiv4sf", -+ V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT, -+ IX86_BUILTIN_GATHERSIV4SF); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gathersiv8sf", -+ V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT, -+ IX86_BUILTIN_GATHERSIV8SF); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatherdiv4sf", -+ V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT, -+ IX86_BUILTIN_GATHERDIV4SF); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatherdiv4sf256", -+ V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT, -+ IX86_BUILTIN_GATHERDIV8SF); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gathersiv2di", -+ V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT, -+ IX86_BUILTIN_GATHERSIV2DI); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gathersiv4di", -+ V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT, -+ IX86_BUILTIN_GATHERSIV4DI); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatherdiv2di", -+ V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT, -+ IX86_BUILTIN_GATHERDIV2DI); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatherdiv4di", -+ V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT, -+ IX86_BUILTIN_GATHERDIV4DI); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gathersiv4si", -+ V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT, -+ IX86_BUILTIN_GATHERSIV4SI); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gathersiv8si", -+ V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT, -+ IX86_BUILTIN_GATHERSIV8SI); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatherdiv4si", -+ V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT, -+ IX86_BUILTIN_GATHERDIV4SI); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatherdiv4si256", -+ V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT, -+ IX86_BUILTIN_GATHERDIV8SI); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatheraltsiv4df ", -+ V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT, -+ IX86_BUILTIN_GATHERALTSIV4DF); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatheraltdiv8sf ", -+ V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT, -+ IX86_BUILTIN_GATHERALTDIV8SF); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatheraltsiv4di ", -+ V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT, -+ IX86_BUILTIN_GATHERALTSIV4DI); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatheraltdiv8si ", -+ V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT, -+ IX86_BUILTIN_GATHERALTDIV8SI); -+ -+ /* AVX512F */ -+ def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gathersiv16sf", -+ V16SF_FTYPE_V16SF_PCVOID_V16SI_HI_INT, -+ IX86_BUILTIN_GATHER3SIV16SF); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gathersiv8df", -+ V8DF_FTYPE_V8DF_PCVOID_V8SI_QI_INT, -+ IX86_BUILTIN_GATHER3SIV8DF); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gatherdiv16sf", -+ V8SF_FTYPE_V8SF_PCVOID_V8DI_QI_INT, -+ IX86_BUILTIN_GATHER3DIV16SF); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gatherdiv8df", -+ V8DF_FTYPE_V8DF_PCVOID_V8DI_QI_INT, -+ IX86_BUILTIN_GATHER3DIV8DF); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gathersiv16si", -+ V16SI_FTYPE_V16SI_PCVOID_V16SI_HI_INT, -+ IX86_BUILTIN_GATHER3SIV16SI); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gathersiv8di", -+ V8DI_FTYPE_V8DI_PCVOID_V8SI_QI_INT, -+ IX86_BUILTIN_GATHER3SIV8DI); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gatherdiv16si", -+ V8SI_FTYPE_V8SI_PCVOID_V8DI_QI_INT, -+ IX86_BUILTIN_GATHER3DIV16SI); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gatherdiv8di", -+ V8DI_FTYPE_V8DI_PCVOID_V8DI_QI_INT, -+ IX86_BUILTIN_GATHER3DIV8DI); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gather3altsiv8df ", -+ V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT, -+ IX86_BUILTIN_GATHER3ALTSIV8DF); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gather3altdiv16sf ", -+ V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT, -+ IX86_BUILTIN_GATHER3ALTDIV16SF); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gather3altsiv8di ", -+ V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT, -+ IX86_BUILTIN_GATHER3ALTSIV8DI); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gather3altdiv16si ", -+ V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT, -+ IX86_BUILTIN_GATHER3ALTDIV16SI); -+ -+ def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scattersiv16sf", -+ VOID_FTYPE_PVOID_HI_V16SI_V16SF_INT, -+ IX86_BUILTIN_SCATTERSIV16SF); -+ -+ def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scattersiv8df", -+ VOID_FTYPE_PVOID_QI_V8SI_V8DF_INT, -+ IX86_BUILTIN_SCATTERSIV8DF); -+ -+ def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatterdiv16sf", -+ VOID_FTYPE_PVOID_QI_V8DI_V8SF_INT, -+ IX86_BUILTIN_SCATTERDIV16SF); -+ -+ def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatterdiv8df", -+ VOID_FTYPE_PVOID_QI_V8DI_V8DF_INT, -+ IX86_BUILTIN_SCATTERDIV8DF); -+ -+ def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scattersiv16si", -+ VOID_FTYPE_PVOID_HI_V16SI_V16SI_INT, -+ IX86_BUILTIN_SCATTERSIV16SI); -+ -+ def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scattersiv8di", -+ VOID_FTYPE_PVOID_QI_V8SI_V8DI_INT, -+ IX86_BUILTIN_SCATTERSIV8DI); -+ -+ def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatterdiv16si", -+ VOID_FTYPE_PVOID_QI_V8DI_V8SI_INT, -+ IX86_BUILTIN_SCATTERDIV16SI); -+ -+ def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatterdiv8di", -+ VOID_FTYPE_PVOID_QI_V8DI_V8DI_INT, -+ IX86_BUILTIN_SCATTERDIV8DI); -+ -+ /* AVX512VL */ -+ def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3siv2df", -+ V2DF_FTYPE_V2DF_PCVOID_V4SI_QI_INT, -+ IX86_BUILTIN_GATHER3SIV2DF); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3siv4df", -+ V4DF_FTYPE_V4DF_PCVOID_V4SI_QI_INT, -+ IX86_BUILTIN_GATHER3SIV4DF); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3div2df", -+ V2DF_FTYPE_V2DF_PCVOID_V2DI_QI_INT, -+ IX86_BUILTIN_GATHER3DIV2DF); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3div4df", -+ V4DF_FTYPE_V4DF_PCVOID_V4DI_QI_INT, -+ IX86_BUILTIN_GATHER3DIV4DF); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3siv4sf", -+ V4SF_FTYPE_V4SF_PCVOID_V4SI_QI_INT, -+ IX86_BUILTIN_GATHER3SIV4SF); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3siv8sf", -+ V8SF_FTYPE_V8SF_PCVOID_V8SI_QI_INT, -+ IX86_BUILTIN_GATHER3SIV8SF); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3div4sf", -+ V4SF_FTYPE_V4SF_PCVOID_V2DI_QI_INT, -+ IX86_BUILTIN_GATHER3DIV4SF); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3div8sf", -+ V4SF_FTYPE_V4SF_PCVOID_V4DI_QI_INT, -+ IX86_BUILTIN_GATHER3DIV8SF); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3siv2di", -+ V2DI_FTYPE_V2DI_PCVOID_V4SI_QI_INT, -+ IX86_BUILTIN_GATHER3SIV2DI); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3siv4di", -+ V4DI_FTYPE_V4DI_PCVOID_V4SI_QI_INT, -+ IX86_BUILTIN_GATHER3SIV4DI); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3div2di", -+ V2DI_FTYPE_V2DI_PCVOID_V2DI_QI_INT, -+ IX86_BUILTIN_GATHER3DIV2DI); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3div4di", -+ V4DI_FTYPE_V4DI_PCVOID_V4DI_QI_INT, -+ IX86_BUILTIN_GATHER3DIV4DI); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3siv4si", -+ V4SI_FTYPE_V4SI_PCVOID_V4SI_QI_INT, -+ IX86_BUILTIN_GATHER3SIV4SI); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3siv8si", -+ V8SI_FTYPE_V8SI_PCVOID_V8SI_QI_INT, -+ IX86_BUILTIN_GATHER3SIV8SI); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3div4si", -+ V4SI_FTYPE_V4SI_PCVOID_V2DI_QI_INT, -+ IX86_BUILTIN_GATHER3DIV4SI); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3div8si", -+ V4SI_FTYPE_V4SI_PCVOID_V4DI_QI_INT, -+ IX86_BUILTIN_GATHER3DIV8SI); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3altsiv4df ", -+ V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT, -+ IX86_BUILTIN_GATHER3ALTSIV4DF); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3altdiv8sf ", -+ V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT, -+ IX86_BUILTIN_GATHER3ALTDIV8SF); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3altsiv4di ", -+ V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT, -+ IX86_BUILTIN_GATHER3ALTSIV4DI); -+ -+ def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3altdiv8si ", -+ V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT, -+ IX86_BUILTIN_GATHER3ALTDIV8SI); -+ -+ def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scattersiv8sf", -+ VOID_FTYPE_PVOID_QI_V8SI_V8SF_INT, -+ IX86_BUILTIN_SCATTERSIV8SF); -+ -+ def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scattersiv4sf", -+ VOID_FTYPE_PVOID_QI_V4SI_V4SF_INT, -+ IX86_BUILTIN_SCATTERSIV4SF); -+ -+ def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scattersiv4df", -+ VOID_FTYPE_PVOID_QI_V4SI_V4DF_INT, -+ IX86_BUILTIN_SCATTERSIV4DF); -+ -+ def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scattersiv2df", -+ VOID_FTYPE_PVOID_QI_V4SI_V2DF_INT, -+ IX86_BUILTIN_SCATTERSIV2DF); -+ -+ def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatterdiv8sf", -+ VOID_FTYPE_PVOID_QI_V4DI_V4SF_INT, -+ IX86_BUILTIN_SCATTERDIV8SF); -+ -+ def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatterdiv4sf", -+ VOID_FTYPE_PVOID_QI_V2DI_V4SF_INT, -+ IX86_BUILTIN_SCATTERDIV4SF); -+ -+ def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatterdiv4df", -+ VOID_FTYPE_PVOID_QI_V4DI_V4DF_INT, -+ IX86_BUILTIN_SCATTERDIV4DF); -+ -+ def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatterdiv2df", -+ VOID_FTYPE_PVOID_QI_V2DI_V2DF_INT, -+ IX86_BUILTIN_SCATTERDIV2DF); -+ -+ def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scattersiv8si", -+ VOID_FTYPE_PVOID_QI_V8SI_V8SI_INT, -+ IX86_BUILTIN_SCATTERSIV8SI); -+ -+ def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scattersiv4si", -+ VOID_FTYPE_PVOID_QI_V4SI_V4SI_INT, -+ IX86_BUILTIN_SCATTERSIV4SI); -+ -+ def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scattersiv4di", -+ VOID_FTYPE_PVOID_QI_V4SI_V4DI_INT, -+ IX86_BUILTIN_SCATTERSIV4DI); -+ -+ def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scattersiv2di", -+ VOID_FTYPE_PVOID_QI_V4SI_V2DI_INT, -+ IX86_BUILTIN_SCATTERSIV2DI); -+ -+ def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatterdiv8si", -+ VOID_FTYPE_PVOID_QI_V4DI_V4SI_INT, -+ IX86_BUILTIN_SCATTERDIV8SI); -+ -+ def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatterdiv4si", -+ VOID_FTYPE_PVOID_QI_V2DI_V4SI_INT, -+ IX86_BUILTIN_SCATTERDIV4SI); -+ -+ def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatterdiv4di", -+ VOID_FTYPE_PVOID_QI_V4DI_V4DI_INT, -+ IX86_BUILTIN_SCATTERDIV4DI); -+ -+ def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatterdiv2di", -+ VOID_FTYPE_PVOID_QI_V2DI_V2DI_INT, -+ IX86_BUILTIN_SCATTERDIV2DI); -+ -+ def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatteraltsiv8df ", -+ VOID_FTYPE_PDOUBLE_QI_V16SI_V8DF_INT, -+ IX86_BUILTIN_SCATTERALTSIV8DF); -+ -+ def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatteraltdiv16sf ", -+ VOID_FTYPE_PFLOAT_HI_V8DI_V16SF_INT, -+ IX86_BUILTIN_SCATTERALTDIV16SF); -+ -+ def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatteraltsiv8di ", -+ VOID_FTYPE_PLONGLONG_QI_V16SI_V8DI_INT, -+ IX86_BUILTIN_SCATTERALTSIV8DI); -+ -+ def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatteraltdiv16si ", -+ VOID_FTYPE_PINT_HI_V8DI_V16SI_INT, -+ IX86_BUILTIN_SCATTERALTDIV16SI); -+ -+ def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatteraltsiv4df ", -+ VOID_FTYPE_PDOUBLE_QI_V8SI_V4DF_INT, -+ IX86_BUILTIN_SCATTERALTSIV4DF); -+ -+ def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatteraltdiv8sf ", -+ VOID_FTYPE_PFLOAT_QI_V4DI_V8SF_INT, -+ IX86_BUILTIN_SCATTERALTDIV8SF); -+ -+ def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatteraltsiv4di ", -+ VOID_FTYPE_PLONGLONG_QI_V8SI_V4DI_INT, -+ IX86_BUILTIN_SCATTERALTSIV4DI); -+ -+ def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatteraltdiv8si ", -+ VOID_FTYPE_PINT_QI_V4DI_V8SI_INT, -+ IX86_BUILTIN_SCATTERALTDIV8SI); -+ -+ def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatteraltsiv2df ", -+ VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT, -+ IX86_BUILTIN_SCATTERALTSIV2DF); -+ -+ def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatteraltdiv4sf ", -+ VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT, -+ IX86_BUILTIN_SCATTERALTDIV4SF); -+ -+ def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatteraltsiv2di ", -+ VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT, -+ IX86_BUILTIN_SCATTERALTSIV2DI); -+ -+ def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatteraltdiv4si ", -+ VOID_FTYPE_PINT_QI_V2DI_V4SI_INT, -+ IX86_BUILTIN_SCATTERALTDIV4SI); -+ -+ /* AVX512PF */ -+ def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_gatherpfdpd", -+ VOID_FTYPE_QI_V8SI_PCVOID_INT_INT, -+ IX86_BUILTIN_GATHERPFDPD); -+ def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_gatherpfdps", -+ VOID_FTYPE_HI_V16SI_PCVOID_INT_INT, -+ IX86_BUILTIN_GATHERPFDPS); -+ def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_gatherpfqpd", -+ VOID_FTYPE_QI_V8DI_PCVOID_INT_INT, -+ IX86_BUILTIN_GATHERPFQPD); -+ def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_gatherpfqps", -+ VOID_FTYPE_QI_V8DI_PCVOID_INT_INT, -+ IX86_BUILTIN_GATHERPFQPS); -+ def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_scatterpfdpd", -+ VOID_FTYPE_QI_V8SI_PCVOID_INT_INT, -+ IX86_BUILTIN_SCATTERPFDPD); -+ def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_scatterpfdps", -+ VOID_FTYPE_HI_V16SI_PCVOID_INT_INT, -+ IX86_BUILTIN_SCATTERPFDPS); -+ def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_scatterpfqpd", -+ VOID_FTYPE_QI_V8DI_PCVOID_INT_INT, -+ IX86_BUILTIN_SCATTERPFQPD); -+ def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_scatterpfqps", -+ VOID_FTYPE_QI_V8DI_PCVOID_INT_INT, -+ IX86_BUILTIN_SCATTERPFQPS); -+ -+ /* SHA */ -+ def_builtin_const (OPTION_MASK_ISA_SHA, 0, "__builtin_ia32_sha1msg1", -+ V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1); -+ def_builtin_const (OPTION_MASK_ISA_SHA, 0, "__builtin_ia32_sha1msg2", -+ V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2); -+ def_builtin_const (OPTION_MASK_ISA_SHA, 0, "__builtin_ia32_sha1nexte", -+ V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE); -+ def_builtin_const (OPTION_MASK_ISA_SHA, 0, "__builtin_ia32_sha1rnds4", -+ V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4); -+ def_builtin_const (OPTION_MASK_ISA_SHA, 0, "__builtin_ia32_sha256msg1", -+ V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1); -+ def_builtin_const (OPTION_MASK_ISA_SHA, 0, "__builtin_ia32_sha256msg2", -+ V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2); -+ def_builtin_const (OPTION_MASK_ISA_SHA, 0, "__builtin_ia32_sha256rnds2", -+ V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2); -+ -+ /* RTM. */ -+ def_builtin (OPTION_MASK_ISA_RTM, 0, "__builtin_ia32_xabort", -+ VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT); -+ -+ /* MMX access to the vec_init patterns. */ -+ def_builtin_const (OPTION_MASK_ISA_MMX, 0, "__builtin_ia32_vec_init_v2si", -+ V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI); -+ -+ def_builtin_const (OPTION_MASK_ISA_MMX, 0, "__builtin_ia32_vec_init_v4hi", -+ V4HI_FTYPE_HI_HI_HI_HI, -+ IX86_BUILTIN_VEC_INIT_V4HI); -+ -+ def_builtin_const (OPTION_MASK_ISA_MMX, 0, "__builtin_ia32_vec_init_v8qi", -+ V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI, -+ IX86_BUILTIN_VEC_INIT_V8QI); -+ -+ /* Access to the vec_extract patterns. */ -+ def_builtin_const (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_vec_ext_v2df", -+ DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF); -+ def_builtin_const (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_vec_ext_v2di", -+ DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI); -+ def_builtin_const (OPTION_MASK_ISA_SSE, 0, "__builtin_ia32_vec_ext_v4sf", -+ FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF); -+ def_builtin_const (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_vec_ext_v4si", -+ SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI); -+ def_builtin_const (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_vec_ext_v8hi", -+ HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI); -+ -+ def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A -+ /* As it uses V4HImode, we have to require -mmmx too. */ -+ | OPTION_MASK_ISA_MMX, 0, -+ "__builtin_ia32_vec_ext_v4hi", -+ HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI); -+ -+ def_builtin_const (OPTION_MASK_ISA_MMX, 0, "__builtin_ia32_vec_ext_v2si", -+ SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI); -+ -+ def_builtin_const (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_vec_ext_v16qi", -+ QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI); -+ -+ /* Access to the vec_set patterns. */ -+ def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, 0, -+ "__builtin_ia32_vec_set_v2di", -+ V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI); -+ -+ def_builtin_const (OPTION_MASK_ISA_SSE4_1, 0, "__builtin_ia32_vec_set_v4sf", -+ V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF); -+ -+ def_builtin_const (OPTION_MASK_ISA_SSE4_1, 0, "__builtin_ia32_vec_set_v4si", -+ V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI); -+ -+ def_builtin_const (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_vec_set_v8hi", -+ V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI); -+ -+ def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A -+ /* As it uses V4HImode, we have to require -mmmx too. */ -+ | OPTION_MASK_ISA_MMX, 0, -+ "__builtin_ia32_vec_set_v4hi", -+ V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI); -+ -+ def_builtin_const (OPTION_MASK_ISA_SSE4_1, 0, "__builtin_ia32_vec_set_v16qi", -+ V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI); -+ -+ /* RDSEED */ -+ def_builtin (OPTION_MASK_ISA_RDSEED, 0, "__builtin_ia32_rdseed_hi_step", -+ INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP); -+ def_builtin (OPTION_MASK_ISA_RDSEED, 0, "__builtin_ia32_rdseed_si_step", -+ INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP); -+ def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT, 0, -+ "__builtin_ia32_rdseed_di_step", -+ INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP); -+ -+ /* ADCX */ -+ def_builtin (0, 0, "__builtin_ia32_addcarryx_u32", -+ UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32); -+ def_builtin (OPTION_MASK_ISA_64BIT, 0, -+ "__builtin_ia32_addcarryx_u64", -+ UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG, -+ IX86_BUILTIN_ADDCARRYX64); -+ -+ /* SBB */ -+ def_builtin (0, 0, "__builtin_ia32_sbb_u32", -+ UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32); -+ def_builtin (OPTION_MASK_ISA_64BIT, 0, -+ "__builtin_ia32_sbb_u64", -+ UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG, -+ IX86_BUILTIN_SBB64); -+ -+ /* Read/write FLAGS. */ -+ if (TARGET_64BIT) -+ { -+ def_builtin (OPTION_MASK_ISA_64BIT, 0, "__builtin_ia32_readeflags_u64", -+ UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS); -+ def_builtin (OPTION_MASK_ISA_64BIT, 0, "__builtin_ia32_writeeflags_u64", -+ VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS); -+ } -+ else -+ { -+ def_builtin (0, 0, "__builtin_ia32_readeflags_u32", -+ UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS); -+ def_builtin (0, 0, "__builtin_ia32_writeeflags_u32", -+ VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS); -+ } -+ -+ /* CLFLUSHOPT. */ -+ def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, 0, "__builtin_ia32_clflushopt", -+ VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT); -+ -+ /* CLWB. */ -+ def_builtin (OPTION_MASK_ISA_CLWB, 0, "__builtin_ia32_clwb", -+ VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB); -+ -+ /* MONITORX and MWAITX. */ -+ def_builtin (0, OPTION_MASK_ISA_MWAITX, "__builtin_ia32_monitorx", -+ VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITORX); -+ def_builtin (0, OPTION_MASK_ISA_MWAITX, "__builtin_ia32_mwaitx", -+ VOID_FTYPE_UNSIGNED_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAITX); -+ -+ /* CLZERO. */ -+ def_builtin (0, OPTION_MASK_ISA_CLZERO, "__builtin_ia32_clzero", -+ VOID_FTYPE_PCVOID, IX86_BUILTIN_CLZERO); -+ -+ /* WAITPKG. */ -+ def_builtin (0, OPTION_MASK_ISA_WAITPKG, "__builtin_ia32_umonitor", -+ VOID_FTYPE_PVOID, IX86_BUILTIN_UMONITOR); -+ def_builtin (0, OPTION_MASK_ISA_WAITPKG, "__builtin_ia32_umwait", -+ UINT8_FTYPE_UNSIGNED_UINT64, IX86_BUILTIN_UMWAIT); -+ def_builtin (0, OPTION_MASK_ISA_WAITPKG, "__builtin_ia32_tpause", -+ UINT8_FTYPE_UNSIGNED_UINT64, IX86_BUILTIN_TPAUSE); -+ -+ /* CLDEMOTE. */ -+ def_builtin (0, OPTION_MASK_ISA_CLDEMOTE, "__builtin_ia32_cldemote", -+ VOID_FTYPE_PCVOID, IX86_BUILTIN_CLDEMOTE); -+ -+ /* Add FMA4 multi-arg argument instructions */ -+ for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++) -+ { -+ BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_MULTI_ARG_FIRST, i); -+ if (d->name == 0) -+ continue; -+ -+ ftype = (enum ix86_builtin_func_type) d->flag; -+ def_builtin_const (d->mask, d->mask2, d->name, ftype, d->code); -+ } -+ BDESC_VERIFYS (IX86_BUILTIN__BDESC_MULTI_ARG_LAST, -+ IX86_BUILTIN__BDESC_MULTI_ARG_FIRST, -+ ARRAY_SIZE (bdesc_multi_arg) - 1); -+ -+ /* Add CET inrinsics. */ -+ for (i = 0, d = bdesc_cet; i < ARRAY_SIZE (bdesc_cet); i++, d++) -+ { -+ BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_CET_FIRST, i); -+ if (d->name == 0) -+ continue; -+ -+ ftype = (enum ix86_builtin_func_type) d->flag; -+ def_builtin (d->mask, d->mask2, d->name, ftype, d->code); -+ } -+ BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_LAST, -+ IX86_BUILTIN__BDESC_CET_FIRST, -+ ARRAY_SIZE (bdesc_cet) - 1); -+ -+ for (i = 0, d = bdesc_cet_rdssp; -+ i < ARRAY_SIZE (bdesc_cet_rdssp); -+ i++, d++) -+ { -+ BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_CET_NORMAL_FIRST, i); -+ if (d->name == 0) -+ continue; -+ -+ ftype = (enum ix86_builtin_func_type) d->flag; -+ def_builtin (d->mask, d->mask2, d->name, ftype, d->code); -+ } -+ BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_NORMAL_LAST, -+ IX86_BUILTIN__BDESC_CET_NORMAL_FIRST, -+ ARRAY_SIZE (bdesc_cet_rdssp) - 1); -+} -+ -+#undef BDESC_VERIFY -+#undef BDESC_VERIFYS -+ -+/* Make builtins to detect cpu type and features supported. NAME is -+ the builtin name, CODE is the builtin code, and FTYPE is the function -+ type of the builtin. */ -+ -+static void -+make_cpu_type_builtin (const char* name, int code, -+ enum ix86_builtin_func_type ftype, bool is_const) -+{ -+ tree decl; -+ tree type; -+ -+ type = ix86_get_builtin_func_type (ftype); -+ decl = add_builtin_function (name, type, code, BUILT_IN_MD, -+ NULL, NULL_TREE); -+ gcc_assert (decl != NULL_TREE); -+ ix86_builtins[(int) code] = decl; -+ TREE_READONLY (decl) = is_const; -+} -+ -+/* Make builtins to get CPU type and features supported. The created -+ builtins are : -+ -+ __builtin_cpu_init (), to detect cpu type and features, -+ __builtin_cpu_is (""), to check if cpu is of type , -+ __builtin_cpu_supports (""), to check if cpu supports -+ */ -+ -+static void -+ix86_init_platform_type_builtins (void) -+{ -+ make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT, -+ INT_FTYPE_VOID, false); -+ make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS, -+ INT_FTYPE_PCCHAR, true); -+ make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS, -+ INT_FTYPE_PCCHAR, true); -+} -+ -+/* Internal method for ix86_init_builtins. */ -+ -+static void -+ix86_init_builtins_va_builtins_abi (void) -+{ -+ tree ms_va_ref, sysv_va_ref; -+ tree fnvoid_va_end_ms, fnvoid_va_end_sysv; -+ tree fnvoid_va_start_ms, fnvoid_va_start_sysv; -+ tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv; -+ tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE; -+ -+ if (!TARGET_64BIT) -+ return; -+ fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE); -+ fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE); -+ ms_va_ref = build_reference_type (ms_va_list_type_node); -+ sysv_va_ref = build_pointer_type (TREE_TYPE (sysv_va_list_type_node)); -+ -+ fnvoid_va_end_ms = build_function_type_list (void_type_node, ms_va_ref, -+ NULL_TREE); -+ fnvoid_va_start_ms -+ = build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE); -+ fnvoid_va_end_sysv -+ = build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE); -+ fnvoid_va_start_sysv -+ = build_varargs_function_type_list (void_type_node, sysv_va_ref, -+ NULL_TREE); -+ fnvoid_va_copy_ms -+ = build_function_type_list (void_type_node, ms_va_ref, -+ ms_va_list_type_node, NULL_TREE); -+ fnvoid_va_copy_sysv -+ = build_function_type_list (void_type_node, sysv_va_ref, -+ sysv_va_ref, NULL_TREE); -+ -+ add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms, -+ BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms); -+ add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms, -+ BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms); -+ add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms, -+ BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms); -+ add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv, -+ BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv); -+ add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv, -+ BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv); -+ add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv, -+ BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv); -+} -+ -+static void -+ix86_init_builtin_types (void) -+{ -+ tree float80_type_node, const_string_type_node; -+ -+ /* The __float80 type. */ -+ float80_type_node = long_double_type_node; -+ if (TYPE_MODE (float80_type_node) != XFmode) -+ { -+ if (float64x_type_node != NULL_TREE -+ && TYPE_MODE (float64x_type_node) == XFmode) -+ float80_type_node = float64x_type_node; -+ else -+ { -+ /* The __float80 type. */ -+ float80_type_node = make_node (REAL_TYPE); -+ -+ TYPE_PRECISION (float80_type_node) = 80; -+ layout_type (float80_type_node); -+ } -+ } -+ lang_hooks.types.register_builtin_type (float80_type_node, "__float80"); -+ -+ /* The __float128 type. The node has already been created as -+ _Float128, so we only need to register the __float128 name for -+ it. */ -+ lang_hooks.types.register_builtin_type (float128_type_node, "__float128"); -+ -+ const_string_type_node -+ = build_pointer_type (build_qualified_type -+ (char_type_node, TYPE_QUAL_CONST)); -+ -+ /* This macro is built by i386-builtin-types.awk. */ -+ DEFINE_BUILTIN_PRIMITIVE_TYPES; -+} -+ -+void -+ix86_init_builtins (void) -+{ -+ tree ftype, decl; -+ -+ ix86_init_builtin_types (); -+ -+ /* Builtins to get CPU type and features. */ -+ ix86_init_platform_type_builtins (); -+ -+ /* TFmode support builtins. */ -+ def_builtin_const (0, 0, "__builtin_infq", -+ FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ); -+ def_builtin_const (0, 0, "__builtin_huge_valq", -+ FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ); -+ -+ ftype = ix86_get_builtin_func_type (FLOAT128_FTYPE_CONST_STRING); -+ decl = add_builtin_function ("__builtin_nanq", ftype, IX86_BUILTIN_NANQ, -+ BUILT_IN_MD, "nanq", NULL_TREE); -+ TREE_READONLY (decl) = 1; -+ ix86_builtins[(int) IX86_BUILTIN_NANQ] = decl; -+ -+ decl = add_builtin_function ("__builtin_nansq", ftype, IX86_BUILTIN_NANSQ, -+ BUILT_IN_MD, "nansq", NULL_TREE); -+ TREE_READONLY (decl) = 1; -+ ix86_builtins[(int) IX86_BUILTIN_NANSQ] = decl; -+ -+ /* We will expand them to normal call if SSE isn't available since -+ they are used by libgcc. */ -+ ftype = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128); -+ decl = add_builtin_function ("__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ, -+ BUILT_IN_MD, "__fabstf2", NULL_TREE); -+ TREE_READONLY (decl) = 1; -+ ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl; -+ -+ ftype = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128); -+ decl = add_builtin_function ("__builtin_copysignq", ftype, -+ IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD, -+ "__copysigntf3", NULL_TREE); -+ TREE_READONLY (decl) = 1; -+ ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl; -+ -+ ix86_init_tm_builtins (); -+ ix86_init_mmx_sse_builtins (); -+ -+ if (TARGET_LP64) -+ ix86_init_builtins_va_builtins_abi (); -+ -+#ifdef SUBTARGET_INIT_BUILTINS -+ SUBTARGET_INIT_BUILTINS; -+#endif -+} -+ -+/* Return the ix86 builtin for CODE. */ -+ -+tree -+ix86_builtin_decl (unsigned code, bool) -+{ -+ if (code >= IX86_BUILTIN_MAX) -+ return error_mark_node; -+ -+ return ix86_builtins[code]; -+} -+ -+/* This returns the target-specific builtin with code CODE if -+ current_function_decl has visibility on this builtin, which is checked -+ using isa flags. Returns NULL_TREE otherwise. */ -+ -+static tree ix86_get_builtin (enum ix86_builtins code) -+{ -+ struct cl_target_option *opts; -+ tree target_tree = NULL_TREE; -+ -+ /* Determine the isa flags of current_function_decl. */ -+ -+ if (current_function_decl) -+ target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl); -+ -+ if (target_tree == NULL) -+ target_tree = target_option_default_node; -+ -+ opts = TREE_TARGET_OPTION (target_tree); -+ -+ if ((ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags) -+ || (ix86_builtins_isa[(int) code].isa2 & opts->x_ix86_isa_flags2)) -+ return ix86_builtin_decl (code, true); -+ else -+ return NULL_TREE; -+} -+ -+/* Vectorization library interface and handlers. */ -+tree (*ix86_veclib_handler) (combined_fn, tree, tree); -+ -+/* Returns a function decl for a vectorized version of the combined function -+ with combined_fn code FN and the result vector type TYPE, or NULL_TREE -+ if it is not available. */ -+ -+tree -+ix86_builtin_vectorized_function (unsigned int fn, tree type_out, -+ tree type_in) -+{ -+ machine_mode in_mode, out_mode; -+ int in_n, out_n; -+ -+ if (TREE_CODE (type_out) != VECTOR_TYPE -+ || TREE_CODE (type_in) != VECTOR_TYPE) -+ return NULL_TREE; -+ -+ out_mode = TYPE_MODE (TREE_TYPE (type_out)); -+ out_n = TYPE_VECTOR_SUBPARTS (type_out); -+ in_mode = TYPE_MODE (TREE_TYPE (type_in)); -+ in_n = TYPE_VECTOR_SUBPARTS (type_in); -+ -+ switch (fn) -+ { -+ CASE_CFN_EXP2: -+ if (out_mode == SFmode && in_mode == SFmode) -+ { -+ if (out_n == 16 && in_n == 16) -+ return ix86_get_builtin (IX86_BUILTIN_EXP2PS); -+ } -+ break; -+ -+ CASE_CFN_IFLOOR: -+ CASE_CFN_LFLOOR: -+ CASE_CFN_LLFLOOR: -+ /* The round insn does not trap on denormals. */ -+ if (flag_trapping_math || !TARGET_SSE4_1) -+ break; -+ -+ if (out_mode == SImode && in_mode == DFmode) -+ { -+ if (out_n == 4 && in_n == 2) -+ return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX); -+ else if (out_n == 8 && in_n == 4) -+ return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256); -+ else if (out_n == 16 && in_n == 8) -+ return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512); -+ } -+ if (out_mode == SImode && in_mode == SFmode) -+ { -+ if (out_n == 4 && in_n == 4) -+ return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX); -+ else if (out_n == 8 && in_n == 8) -+ return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256); -+ else if (out_n == 16 && in_n == 16) -+ return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX512); -+ } -+ break; -+ -+ CASE_CFN_ICEIL: -+ CASE_CFN_LCEIL: -+ CASE_CFN_LLCEIL: -+ /* The round insn does not trap on denormals. */ -+ if (flag_trapping_math || !TARGET_SSE4_1) -+ break; -+ -+ if (out_mode == SImode && in_mode == DFmode) -+ { -+ if (out_n == 4 && in_n == 2) -+ return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX); -+ else if (out_n == 8 && in_n == 4) -+ return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256); -+ else if (out_n == 16 && in_n == 8) -+ return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512); -+ } -+ if (out_mode == SImode && in_mode == SFmode) -+ { -+ if (out_n == 4 && in_n == 4) -+ return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX); -+ else if (out_n == 8 && in_n == 8) -+ return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256); -+ else if (out_n == 16 && in_n == 16) -+ return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX512); -+ } -+ break; -+ -+ CASE_CFN_IRINT: -+ CASE_CFN_LRINT: -+ CASE_CFN_LLRINT: -+ if (out_mode == SImode && in_mode == DFmode) -+ { -+ if (out_n == 4 && in_n == 2) -+ return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX); -+ else if (out_n == 8 && in_n == 4) -+ return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256); -+ else if (out_n == 16 && in_n == 8) -+ return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX512); -+ } -+ if (out_mode == SImode && in_mode == SFmode) -+ { -+ if (out_n == 4 && in_n == 4) -+ return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ); -+ else if (out_n == 8 && in_n == 8) -+ return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256); -+ else if (out_n == 16 && in_n == 16) -+ return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ512); -+ } -+ break; -+ -+ CASE_CFN_IROUND: -+ CASE_CFN_LROUND: -+ CASE_CFN_LLROUND: -+ /* The round insn does not trap on denormals. */ -+ if (flag_trapping_math || !TARGET_SSE4_1) -+ break; -+ -+ if (out_mode == SImode && in_mode == DFmode) -+ { -+ if (out_n == 4 && in_n == 2) -+ return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX); -+ else if (out_n == 8 && in_n == 4) -+ return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256); -+ else if (out_n == 16 && in_n == 8) -+ return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512); -+ } -+ if (out_mode == SImode && in_mode == SFmode) -+ { -+ if (out_n == 4 && in_n == 4) -+ return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX); -+ else if (out_n == 8 && in_n == 8) -+ return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256); -+ else if (out_n == 16 && in_n == 16) -+ return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX512); -+ } -+ break; -+ -+ CASE_CFN_FLOOR: -+ /* The round insn does not trap on denormals. */ -+ if (flag_trapping_math || !TARGET_SSE4_1) -+ break; -+ -+ if (out_mode == DFmode && in_mode == DFmode) -+ { -+ if (out_n == 2 && in_n == 2) -+ return ix86_get_builtin (IX86_BUILTIN_FLOORPD); -+ else if (out_n == 4 && in_n == 4) -+ return ix86_get_builtin (IX86_BUILTIN_FLOORPD256); -+ else if (out_n == 8 && in_n == 8) -+ return ix86_get_builtin (IX86_BUILTIN_FLOORPD512); -+ } -+ if (out_mode == SFmode && in_mode == SFmode) -+ { -+ if (out_n == 4 && in_n == 4) -+ return ix86_get_builtin (IX86_BUILTIN_FLOORPS); -+ else if (out_n == 8 && in_n == 8) -+ return ix86_get_builtin (IX86_BUILTIN_FLOORPS256); -+ else if (out_n == 16 && in_n == 16) -+ return ix86_get_builtin (IX86_BUILTIN_FLOORPS512); -+ } -+ break; -+ -+ CASE_CFN_CEIL: -+ /* The round insn does not trap on denormals. */ -+ if (flag_trapping_math || !TARGET_SSE4_1) -+ break; -+ -+ if (out_mode == DFmode && in_mode == DFmode) -+ { -+ if (out_n == 2 && in_n == 2) -+ return ix86_get_builtin (IX86_BUILTIN_CEILPD); -+ else if (out_n == 4 && in_n == 4) -+ return ix86_get_builtin (IX86_BUILTIN_CEILPD256); -+ else if (out_n == 8 && in_n == 8) -+ return ix86_get_builtin (IX86_BUILTIN_CEILPD512); -+ } -+ if (out_mode == SFmode && in_mode == SFmode) -+ { -+ if (out_n == 4 && in_n == 4) -+ return ix86_get_builtin (IX86_BUILTIN_CEILPS); -+ else if (out_n == 8 && in_n == 8) -+ return ix86_get_builtin (IX86_BUILTIN_CEILPS256); -+ else if (out_n == 16 && in_n == 16) -+ return ix86_get_builtin (IX86_BUILTIN_CEILPS512); -+ } -+ break; -+ -+ CASE_CFN_TRUNC: -+ /* The round insn does not trap on denormals. */ -+ if (flag_trapping_math || !TARGET_SSE4_1) -+ break; -+ -+ if (out_mode == DFmode && in_mode == DFmode) -+ { -+ if (out_n == 2 && in_n == 2) -+ return ix86_get_builtin (IX86_BUILTIN_TRUNCPD); -+ else if (out_n == 4 && in_n == 4) -+ return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256); -+ else if (out_n == 8 && in_n == 8) -+ return ix86_get_builtin (IX86_BUILTIN_TRUNCPD512); -+ } -+ if (out_mode == SFmode && in_mode == SFmode) -+ { -+ if (out_n == 4 && in_n == 4) -+ return ix86_get_builtin (IX86_BUILTIN_TRUNCPS); -+ else if (out_n == 8 && in_n == 8) -+ return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256); -+ else if (out_n == 16 && in_n == 16) -+ return ix86_get_builtin (IX86_BUILTIN_TRUNCPS512); -+ } -+ break; -+ -+ CASE_CFN_RINT: -+ /* The round insn does not trap on denormals. */ -+ if (flag_trapping_math || !TARGET_SSE4_1) -+ break; -+ -+ if (out_mode == DFmode && in_mode == DFmode) -+ { -+ if (out_n == 2 && in_n == 2) -+ return ix86_get_builtin (IX86_BUILTIN_RINTPD); -+ else if (out_n == 4 && in_n == 4) -+ return ix86_get_builtin (IX86_BUILTIN_RINTPD256); -+ } -+ if (out_mode == SFmode && in_mode == SFmode) -+ { -+ if (out_n == 4 && in_n == 4) -+ return ix86_get_builtin (IX86_BUILTIN_RINTPS); -+ else if (out_n == 8 && in_n == 8) -+ return ix86_get_builtin (IX86_BUILTIN_RINTPS256); -+ } -+ break; -+ -+ CASE_CFN_FMA: -+ if (out_mode == DFmode && in_mode == DFmode) -+ { -+ if (out_n == 2 && in_n == 2) -+ return ix86_get_builtin (IX86_BUILTIN_VFMADDPD); -+ if (out_n == 4 && in_n == 4) -+ return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256); -+ } -+ if (out_mode == SFmode && in_mode == SFmode) -+ { -+ if (out_n == 4 && in_n == 4) -+ return ix86_get_builtin (IX86_BUILTIN_VFMADDPS); -+ if (out_n == 8 && in_n == 8) -+ return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256); -+ } -+ break; -+ -+ default: -+ break; -+ } -+ -+ /* Dispatch to a handler for a vectorization library. */ -+ if (ix86_veclib_handler) -+ return ix86_veclib_handler (combined_fn (fn), type_out, type_in); -+ -+ return NULL_TREE; -+} -+ -+/* Returns a decl of a function that implements gather load with -+ memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE. -+ Return NULL_TREE if it is not available. */ -+ -+tree -+ix86_vectorize_builtin_gather (const_tree mem_vectype, -+ const_tree index_type, int scale) -+{ -+ bool si; -+ enum ix86_builtins code; -+ -+ if (! TARGET_AVX2 || !TARGET_USE_GATHER) -+ return NULL_TREE; -+ -+ if ((TREE_CODE (index_type) != INTEGER_TYPE -+ && !POINTER_TYPE_P (index_type)) -+ || (TYPE_MODE (index_type) != SImode -+ && TYPE_MODE (index_type) != DImode)) -+ return NULL_TREE; -+ -+ if (TYPE_PRECISION (index_type) > POINTER_SIZE) -+ return NULL_TREE; -+ -+ /* v*gather* insn sign extends index to pointer mode. */ -+ if (TYPE_PRECISION (index_type) < POINTER_SIZE -+ && TYPE_UNSIGNED (index_type)) -+ return NULL_TREE; -+ -+ if (scale <= 0 -+ || scale > 8 -+ || (scale & (scale - 1)) != 0) -+ return NULL_TREE; -+ -+ si = TYPE_MODE (index_type) == SImode; -+ switch (TYPE_MODE (mem_vectype)) -+ { -+ case E_V2DFmode: -+ if (TARGET_AVX512VL) -+ code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF; -+ else -+ code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF; -+ break; -+ case E_V4DFmode: -+ if (TARGET_AVX512VL) -+ code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF; -+ else -+ code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF; -+ break; -+ case E_V2DImode: -+ if (TARGET_AVX512VL) -+ code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI; -+ else -+ code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI; -+ break; -+ case E_V4DImode: -+ if (TARGET_AVX512VL) -+ code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI; -+ else -+ code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI; -+ break; -+ case E_V4SFmode: -+ if (TARGET_AVX512VL) -+ code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF; -+ else -+ code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF; -+ break; -+ case E_V8SFmode: -+ if (TARGET_AVX512VL) -+ code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF; -+ else -+ code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF; -+ break; -+ case E_V4SImode: -+ if (TARGET_AVX512VL) -+ code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI; -+ else -+ code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI; -+ break; -+ case E_V8SImode: -+ if (TARGET_AVX512VL) -+ code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI; -+ else -+ code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI; -+ break; -+ case E_V8DFmode: -+ if (TARGET_AVX512F) -+ code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF; -+ else -+ return NULL_TREE; -+ break; -+ case E_V8DImode: -+ if (TARGET_AVX512F) -+ code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI; -+ else -+ return NULL_TREE; -+ break; -+ case E_V16SFmode: -+ if (TARGET_AVX512F) -+ code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF; -+ else -+ return NULL_TREE; -+ break; -+ case E_V16SImode: -+ if (TARGET_AVX512F) -+ code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI; -+ else -+ return NULL_TREE; -+ break; -+ default: -+ return NULL_TREE; -+ } -+ -+ return ix86_get_builtin (code); -+} -+ -+/* Returns a code for a target-specific builtin that implements -+ reciprocal of the function, or NULL_TREE if not available. */ -+ -+tree -+ix86_builtin_reciprocal (tree fndecl) -+{ -+ enum ix86_builtins fn_code -+ = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl); -+ switch (fn_code) -+ { -+ /* Vectorized version of sqrt to rsqrt conversion. */ -+ case IX86_BUILTIN_SQRTPS_NR: -+ return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR); -+ -+ case IX86_BUILTIN_SQRTPS_NR256: -+ return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256); -+ -+ default: -+ return NULL_TREE; -+ } -+} -+ -+/* Priority of i386 features, greater value is higher priority. This is -+ used to decide the order in which function dispatch must happen. For -+ instance, a version specialized for SSE4.2 should be checked for dispatch -+ before a version for SSE3, as SSE4.2 implies SSE3. */ -+enum feature_priority -+{ -+ P_ZERO = 0, -+ P_MMX, -+ P_SSE, -+ P_SSE2, -+ P_SSE3, -+ P_SSSE3, -+ P_PROC_SSSE3, -+ P_SSE4_A, -+ P_PROC_SSE4_A, -+ P_SSE4_1, -+ P_SSE4_2, -+ P_PROC_SSE4_2, -+ P_POPCNT, -+ P_AES, -+ P_PCLMUL, -+ P_AVX, -+ P_PROC_AVX, -+ P_BMI, -+ P_PROC_BMI, -+ P_FMA4, -+ P_XOP, -+ P_PROC_XOP, -+ P_FMA, -+ P_PROC_FMA, -+ P_BMI2, -+ P_AVX2, -+ P_PROC_AVX2, -+ P_AVX512F, -+ P_PROC_AVX512F -+}; -+ -+/* This is the order of bit-fields in __processor_features in cpuinfo.c */ -+enum processor_features -+{ -+ F_CMOV = 0, -+ F_MMX, -+ F_POPCNT, -+ F_SSE, -+ F_SSE2, -+ F_SSE3, -+ F_SSSE3, -+ F_SSE4_1, -+ F_SSE4_2, -+ F_AVX, -+ F_AVX2, -+ F_SSE4_A, -+ F_FMA4, -+ F_XOP, -+ F_FMA, -+ F_AVX512F, -+ F_BMI, -+ F_BMI2, -+ F_AES, -+ F_PCLMUL, -+ F_AVX512VL, -+ F_AVX512BW, -+ F_AVX512DQ, -+ F_AVX512CD, -+ F_AVX512ER, -+ F_AVX512PF, -+ F_AVX512VBMI, -+ F_AVX512IFMA, -+ F_AVX5124VNNIW, -+ F_AVX5124FMAPS, -+ F_AVX512VPOPCNTDQ, -+ F_AVX512VBMI2, -+ F_GFNI, -+ F_VPCLMULQDQ, -+ F_AVX512VNNI, -+ F_AVX512BITALG, -+ F_MAX -+}; -+ -+/* These are the values for vendor types and cpu types and subtypes -+ in cpuinfo.c. Cpu types and subtypes should be subtracted by -+ the corresponding start value. */ -+enum processor_model -+{ -+ M_INTEL = 1, -+ M_AMD, -+ M_CPU_TYPE_START, -+ M_INTEL_BONNELL, -+ M_INTEL_CORE2, -+ M_INTEL_COREI7, -+ M_AMDFAM10H, -+ M_AMDFAM15H, -+ M_INTEL_SILVERMONT, -+ M_INTEL_KNL, -+ M_AMD_BTVER1, -+ M_AMD_BTVER2, -+ M_AMDFAM17H, -+ M_INTEL_KNM, -+ M_INTEL_GOLDMONT, -+ M_INTEL_GOLDMONT_PLUS, -+ M_INTEL_TREMONT, -+ M_CPU_SUBTYPE_START, -+ M_INTEL_COREI7_NEHALEM, -+ M_INTEL_COREI7_WESTMERE, -+ M_INTEL_COREI7_SANDYBRIDGE, -+ M_AMDFAM10H_BARCELONA, -+ M_AMDFAM10H_SHANGHAI, -+ M_AMDFAM10H_ISTANBUL, -+ M_AMDFAM15H_BDVER1, -+ M_AMDFAM15H_BDVER2, -+ M_AMDFAM15H_BDVER3, -+ M_AMDFAM15H_BDVER4, -+ M_AMDFAM17H_ZNVER1, -+ M_INTEL_COREI7_IVYBRIDGE, -+ M_INTEL_COREI7_HASWELL, -+ M_INTEL_COREI7_BROADWELL, -+ M_INTEL_COREI7_SKYLAKE, -+ M_INTEL_COREI7_SKYLAKE_AVX512, -+ M_INTEL_COREI7_CANNONLAKE, -+ M_INTEL_COREI7_ICELAKE_CLIENT, -+ M_INTEL_COREI7_ICELAKE_SERVER, -+ M_AMDFAM17H_ZNVER2, -+ M_INTEL_COREI7_CASCADELAKE -+}; -+ -+struct _arch_names_table -+{ -+ const char *const name; -+ const enum processor_model model; -+}; -+ -+static const _arch_names_table arch_names_table[] = -+{ -+ {"amd", M_AMD}, -+ {"intel", M_INTEL}, -+ {"atom", M_INTEL_BONNELL}, -+ {"slm", M_INTEL_SILVERMONT}, -+ {"core2", M_INTEL_CORE2}, -+ {"corei7", M_INTEL_COREI7}, -+ {"nehalem", M_INTEL_COREI7_NEHALEM}, -+ {"westmere", M_INTEL_COREI7_WESTMERE}, -+ {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE}, -+ {"ivybridge", M_INTEL_COREI7_IVYBRIDGE}, -+ {"haswell", M_INTEL_COREI7_HASWELL}, -+ {"broadwell", M_INTEL_COREI7_BROADWELL}, -+ {"skylake", M_INTEL_COREI7_SKYLAKE}, -+ {"skylake-avx512", M_INTEL_COREI7_SKYLAKE_AVX512}, -+ {"cannonlake", M_INTEL_COREI7_CANNONLAKE}, -+ {"icelake-client", M_INTEL_COREI7_ICELAKE_CLIENT}, -+ {"icelake-server", M_INTEL_COREI7_ICELAKE_SERVER}, -+ {"cascadelake", M_INTEL_COREI7_CASCADELAKE}, -+ {"bonnell", M_INTEL_BONNELL}, -+ {"silvermont", M_INTEL_SILVERMONT}, -+ {"goldmont", M_INTEL_GOLDMONT}, -+ {"goldmont-plus", M_INTEL_GOLDMONT_PLUS}, -+ {"tremont", M_INTEL_TREMONT}, -+ {"knl", M_INTEL_KNL}, -+ {"knm", M_INTEL_KNM}, -+ {"amdfam10h", M_AMDFAM10H}, -+ {"barcelona", M_AMDFAM10H_BARCELONA}, -+ {"shanghai", M_AMDFAM10H_SHANGHAI}, -+ {"istanbul", M_AMDFAM10H_ISTANBUL}, -+ {"btver1", M_AMD_BTVER1}, -+ {"amdfam15h", M_AMDFAM15H}, -+ {"bdver1", M_AMDFAM15H_BDVER1}, -+ {"bdver2", M_AMDFAM15H_BDVER2}, -+ {"bdver3", M_AMDFAM15H_BDVER3}, -+ {"bdver4", M_AMDFAM15H_BDVER4}, -+ {"btver2", M_AMD_BTVER2}, -+ {"amdfam17h", M_AMDFAM17H}, -+ {"znver1", M_AMDFAM17H_ZNVER1}, -+ {"znver2", M_AMDFAM17H_ZNVER2}, -+}; -+ -+/* These are the target attribute strings for which a dispatcher is -+ available, from fold_builtin_cpu. */ -+struct _isa_names_table -+{ -+ const char *const name; -+ const enum processor_features feature; -+ const enum feature_priority priority; -+}; -+ -+static const _isa_names_table isa_names_table[] = -+{ -+ {"cmov", F_CMOV, P_ZERO}, -+ {"mmx", F_MMX, P_MMX}, -+ {"popcnt", F_POPCNT, P_POPCNT}, -+ {"sse", F_SSE, P_SSE}, -+ {"sse2", F_SSE2, P_SSE2}, -+ {"sse3", F_SSE3, P_SSE3}, -+ {"ssse3", F_SSSE3, P_SSSE3}, -+ {"sse4a", F_SSE4_A, P_SSE4_A}, -+ {"sse4.1", F_SSE4_1, P_SSE4_1}, -+ {"sse4.2", F_SSE4_2, P_SSE4_2}, -+ {"avx", F_AVX, P_AVX}, -+ {"fma4", F_FMA4, P_FMA4}, -+ {"xop", F_XOP, P_XOP}, -+ {"fma", F_FMA, P_FMA}, -+ {"avx2", F_AVX2, P_AVX2}, -+ {"avx512f", F_AVX512F, P_AVX512F}, -+ {"bmi", F_BMI, P_BMI}, -+ {"bmi2", F_BMI2, P_BMI2}, -+ {"aes", F_AES, P_AES}, -+ {"pclmul", F_PCLMUL, P_PCLMUL}, -+ {"avx512vl",F_AVX512VL, P_ZERO}, -+ {"avx512bw",F_AVX512BW, P_ZERO}, -+ {"avx512dq",F_AVX512DQ, P_ZERO}, -+ {"avx512cd",F_AVX512CD, P_ZERO}, -+ {"avx512er",F_AVX512ER, P_ZERO}, -+ {"avx512pf",F_AVX512PF, P_ZERO}, -+ {"avx512vbmi",F_AVX512VBMI, P_ZERO}, -+ {"avx512ifma",F_AVX512IFMA, P_ZERO}, -+ {"avx5124vnniw",F_AVX5124VNNIW, P_ZERO}, -+ {"avx5124fmaps",F_AVX5124FMAPS, P_ZERO}, -+ {"avx512vpopcntdq",F_AVX512VPOPCNTDQ, P_ZERO}, -+ {"avx512vbmi2", F_AVX512VBMI2, P_ZERO}, -+ {"gfni", F_GFNI, P_ZERO}, -+ {"vpclmulqdq", F_VPCLMULQDQ, P_ZERO}, -+ {"avx512vnni", F_AVX512VNNI, P_ZERO}, -+ {"avx512bitalg", F_AVX512BITALG, P_ZERO} -+}; -+ -+/* This parses the attribute arguments to target in DECL and determines -+ the right builtin to use to match the platform specification. -+ It returns the priority value for this version decl. If PREDICATE_LIST -+ is not NULL, it stores the list of cpu features that need to be checked -+ before dispatching this function. */ -+ -+unsigned int -+get_builtin_code_for_version (tree decl, tree *predicate_list) -+{ -+ tree attrs; -+ struct cl_target_option cur_target; -+ tree target_node; -+ struct cl_target_option *new_target; -+ const char *arg_str = NULL; -+ const char *attrs_str = NULL; -+ char *tok_str = NULL; -+ char *token; -+ -+ enum feature_priority priority = P_ZERO; -+ -+ static unsigned int NUM_FEATURES -+ = sizeof (isa_names_table) / sizeof (_isa_names_table); -+ -+ unsigned int i; -+ -+ tree predicate_chain = NULL_TREE; -+ tree predicate_decl, predicate_arg; -+ -+ attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl)); -+ gcc_assert (attrs != NULL); -+ -+ attrs = TREE_VALUE (TREE_VALUE (attrs)); -+ -+ gcc_assert (TREE_CODE (attrs) == STRING_CST); -+ attrs_str = TREE_STRING_POINTER (attrs); -+ -+ /* Return priority zero for default function. */ -+ if (strcmp (attrs_str, "default") == 0) -+ return 0; -+ -+ /* Handle arch= if specified. For priority, set it to be 1 more than -+ the best instruction set the processor can handle. For instance, if -+ there is a version for atom and a version for ssse3 (the highest ISA -+ priority for atom), the atom version must be checked for dispatch -+ before the ssse3 version. */ -+ if (strstr (attrs_str, "arch=") != NULL) -+ { -+ cl_target_option_save (&cur_target, &global_options); -+ target_node -+ = ix86_valid_target_attribute_tree (decl, attrs, &global_options, -+ &global_options_set, 0); -+ -+ gcc_assert (target_node); -+ if (target_node == error_mark_node) -+ return 0; -+ new_target = TREE_TARGET_OPTION (target_node); -+ gcc_assert (new_target); -+ -+ if (new_target->arch_specified && new_target->arch > 0) -+ { -+ switch (new_target->arch) -+ { -+ case PROCESSOR_CORE2: -+ arg_str = "core2"; -+ priority = P_PROC_SSSE3; -+ break; -+ case PROCESSOR_NEHALEM: -+ if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_PCLMUL) -+ { -+ arg_str = "westmere"; -+ priority = P_PCLMUL; -+ } -+ else -+ { -+ /* We translate "arch=corei7" and "arch=nehalem" to -+ "corei7" so that it will be mapped to M_INTEL_COREI7 -+ as cpu type to cover all M_INTEL_COREI7_XXXs. */ -+ arg_str = "corei7"; -+ priority = P_PROC_SSE4_2; -+ } -+ break; -+ case PROCESSOR_SANDYBRIDGE: -+ if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C) -+ arg_str = "ivybridge"; -+ else -+ arg_str = "sandybridge"; -+ priority = P_PROC_AVX; -+ break; -+ case PROCESSOR_HASWELL: -+ if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX) -+ arg_str = "broadwell"; -+ else -+ arg_str = "haswell"; -+ priority = P_PROC_AVX2; -+ break; -+ case PROCESSOR_SKYLAKE: -+ arg_str = "skylake"; -+ priority = P_PROC_AVX2; -+ break; -+ case PROCESSOR_SKYLAKE_AVX512: -+ arg_str = "skylake-avx512"; -+ priority = P_PROC_AVX512F; -+ break; -+ case PROCESSOR_CANNONLAKE: -+ arg_str = "cannonlake"; -+ priority = P_PROC_AVX512F; -+ break; -+ case PROCESSOR_ICELAKE_CLIENT: -+ arg_str = "icelake-client"; -+ priority = P_PROC_AVX512F; -+ break; -+ case PROCESSOR_ICELAKE_SERVER: -+ arg_str = "icelake-server"; -+ priority = P_PROC_AVX512F; -+ break; -+ case PROCESSOR_CASCADELAKE: -+ arg_str = "cascadelake"; -+ priority = P_PROC_AVX512F; -+ break; -+ case PROCESSOR_BONNELL: -+ arg_str = "bonnell"; -+ priority = P_PROC_SSSE3; -+ break; -+ case PROCESSOR_KNL: -+ arg_str = "knl"; -+ priority = P_PROC_AVX512F; -+ break; -+ case PROCESSOR_KNM: -+ arg_str = "knm"; -+ priority = P_PROC_AVX512F; -+ break; -+ case PROCESSOR_SILVERMONT: -+ arg_str = "silvermont"; -+ priority = P_PROC_SSE4_2; -+ break; -+ case PROCESSOR_GOLDMONT: -+ arg_str = "goldmont"; -+ priority = P_PROC_SSE4_2; -+ break; -+ case PROCESSOR_GOLDMONT_PLUS: -+ arg_str = "goldmont-plus"; -+ priority = P_PROC_SSE4_2; -+ break; -+ case PROCESSOR_TREMONT: -+ arg_str = "tremont"; -+ priority = P_PROC_SSE4_2; -+ break; -+ case PROCESSOR_AMDFAM10: -+ arg_str = "amdfam10h"; -+ priority = P_PROC_SSE4_A; -+ break; -+ case PROCESSOR_BTVER1: -+ arg_str = "btver1"; -+ priority = P_PROC_SSE4_A; -+ break; -+ case PROCESSOR_BTVER2: -+ arg_str = "btver2"; -+ priority = P_PROC_BMI; -+ break; -+ case PROCESSOR_BDVER1: -+ arg_str = "bdver1"; -+ priority = P_PROC_XOP; -+ break; -+ case PROCESSOR_BDVER2: -+ arg_str = "bdver2"; -+ priority = P_PROC_FMA; -+ break; -+ case PROCESSOR_BDVER3: -+ arg_str = "bdver3"; -+ priority = P_PROC_FMA; -+ break; -+ case PROCESSOR_BDVER4: -+ arg_str = "bdver4"; -+ priority = P_PROC_AVX2; -+ break; -+ case PROCESSOR_ZNVER1: -+ arg_str = "znver1"; -+ priority = P_PROC_AVX2; -+ break; -+ case PROCESSOR_ZNVER2: -+ arg_str = "znver2"; -+ priority = P_PROC_AVX2; -+ break; -+ } -+ } -+ -+ cl_target_option_restore (&global_options, &cur_target); -+ -+ if (predicate_list && arg_str == NULL) -+ { -+ error_at (DECL_SOURCE_LOCATION (decl), -+ "no dispatcher found for the versioning attributes"); -+ return 0; -+ } -+ -+ if (predicate_list) -+ { -+ predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS]; -+ /* For a C string literal the length includes the trailing NULL. */ -+ predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str); -+ predicate_chain = tree_cons (predicate_decl, predicate_arg, -+ predicate_chain); -+ } -+ } -+ -+ /* Process feature name. */ -+ tok_str = (char *) xmalloc (strlen (attrs_str) + 1); -+ strcpy (tok_str, attrs_str); -+ token = strtok (tok_str, ","); -+ predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS]; -+ -+ while (token != NULL) -+ { -+ /* Do not process "arch=" */ -+ if (strncmp (token, "arch=", 5) == 0) -+ { -+ token = strtok (NULL, ","); -+ continue; -+ } -+ for (i = 0; i < NUM_FEATURES; ++i) -+ { -+ if (strcmp (token, isa_names_table[i].name) == 0) -+ { -+ if (predicate_list) -+ { -+ predicate_arg = build_string_literal ( -+ strlen (isa_names_table[i].name) + 1, -+ isa_names_table[i].name); -+ predicate_chain = tree_cons (predicate_decl, predicate_arg, -+ predicate_chain); -+ } -+ /* Find the maximum priority feature. */ -+ if (isa_names_table[i].priority > priority) -+ priority = isa_names_table[i].priority; -+ -+ break; -+ } -+ } -+ if (predicate_list && priority == P_ZERO) -+ { -+ error_at (DECL_SOURCE_LOCATION (decl), -+ "ISA %qs is not supported in % attribute, " -+ "use % syntax", token); -+ return 0; -+ } -+ token = strtok (NULL, ","); -+ } -+ free (tok_str); -+ -+ if (predicate_list && predicate_chain == NULL_TREE) -+ { -+ error_at (DECL_SOURCE_LOCATION (decl), -+ "no dispatcher found for the versioning attributes: %s", -+ attrs_str); -+ return 0; -+ } -+ else if (predicate_list) -+ { -+ predicate_chain = nreverse (predicate_chain); -+ *predicate_list = predicate_chain; -+ } -+ -+ return priority; -+} -+ -+/* This builds the processor_model struct type defined in -+ libgcc/config/i386/cpuinfo.c */ -+ -+static tree -+build_processor_model_struct (void) -+{ -+ const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype", -+ "__cpu_features"}; -+ tree field = NULL_TREE, field_chain = NULL_TREE; -+ int i; -+ tree type = make_node (RECORD_TYPE); -+ -+ /* The first 3 fields are unsigned int. */ -+ for (i = 0; i < 3; ++i) -+ { -+ field = build_decl (UNKNOWN_LOCATION, FIELD_DECL, -+ get_identifier (field_name[i]), unsigned_type_node); -+ if (field_chain != NULL_TREE) -+ DECL_CHAIN (field) = field_chain; -+ field_chain = field; -+ } -+ -+ /* The last field is an array of unsigned integers of size one. */ -+ field = build_decl (UNKNOWN_LOCATION, FIELD_DECL, -+ get_identifier (field_name[3]), -+ build_array_type (unsigned_type_node, -+ build_index_type (size_one_node))); -+ if (field_chain != NULL_TREE) -+ DECL_CHAIN (field) = field_chain; -+ field_chain = field; -+ -+ finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE); -+ return type; -+} -+ -+/* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */ -+ -+static tree -+make_var_decl (tree type, const char *name) -+{ -+ tree new_decl; -+ -+ new_decl = build_decl (UNKNOWN_LOCATION, -+ VAR_DECL, -+ get_identifier(name), -+ type); -+ -+ DECL_EXTERNAL (new_decl) = 1; -+ TREE_STATIC (new_decl) = 1; -+ TREE_PUBLIC (new_decl) = 1; -+ DECL_INITIAL (new_decl) = 0; -+ DECL_ARTIFICIAL (new_decl) = 0; -+ DECL_PRESERVE_P (new_decl) = 1; -+ -+ make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl)); -+ assemble_variable (new_decl, 0, 0, 0); -+ -+ return new_decl; -+} -+ -+/* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded -+ into an integer defined in libgcc/config/i386/cpuinfo.c */ -+ -+tree -+fold_builtin_cpu (tree fndecl, tree *args) -+{ -+ unsigned int i; -+ enum ix86_builtins fn_code -+ = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl); -+ tree param_string_cst = NULL; -+ -+ tree __processor_model_type = build_processor_model_struct (); -+ tree __cpu_model_var = make_var_decl (__processor_model_type, -+ "__cpu_model"); -+ -+ -+ varpool_node::add (__cpu_model_var); -+ -+ gcc_assert ((args != NULL) && (*args != NULL)); -+ -+ param_string_cst = *args; -+ while (param_string_cst -+ && TREE_CODE (param_string_cst) != STRING_CST) -+ { -+ /* *args must be a expr that can contain other EXPRS leading to a -+ STRING_CST. */ -+ if (!EXPR_P (param_string_cst)) -+ { -+ error ("parameter to builtin must be a string constant or literal"); -+ return integer_zero_node; -+ } -+ param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0); -+ } -+ -+ gcc_assert (param_string_cst); -+ -+ if (fn_code == IX86_BUILTIN_CPU_IS) -+ { -+ tree ref; -+ tree field; -+ tree final; -+ -+ unsigned int field_val = 0; -+ unsigned int NUM_ARCH_NAMES -+ = sizeof (arch_names_table) / sizeof (struct _arch_names_table); -+ -+ for (i = 0; i < NUM_ARCH_NAMES; i++) -+ if (strcmp (arch_names_table[i].name, -+ TREE_STRING_POINTER (param_string_cst)) == 0) -+ break; -+ -+ if (i == NUM_ARCH_NAMES) -+ { -+ error ("parameter to builtin not valid: %s", -+ TREE_STRING_POINTER (param_string_cst)); -+ return integer_zero_node; -+ } -+ -+ field = TYPE_FIELDS (__processor_model_type); -+ field_val = arch_names_table[i].model; -+ -+ /* CPU types are stored in the next field. */ -+ if (field_val > M_CPU_TYPE_START -+ && field_val < M_CPU_SUBTYPE_START) -+ { -+ field = DECL_CHAIN (field); -+ field_val -= M_CPU_TYPE_START; -+ } -+ -+ /* CPU subtypes are stored in the next field. */ -+ if (field_val > M_CPU_SUBTYPE_START) -+ { -+ field = DECL_CHAIN ( DECL_CHAIN (field)); -+ field_val -= M_CPU_SUBTYPE_START; -+ } -+ -+ /* Get the appropriate field in __cpu_model. */ -+ ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var, -+ field, NULL_TREE); -+ -+ /* Check the value. */ -+ final = build2 (EQ_EXPR, unsigned_type_node, ref, -+ build_int_cstu (unsigned_type_node, field_val)); -+ return build1 (CONVERT_EXPR, integer_type_node, final); -+ } -+ else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS) -+ { -+ tree ref; -+ tree array_elt; -+ tree field; -+ tree final; -+ -+ unsigned int field_val = 0; -+ unsigned int NUM_ISA_NAMES -+ = sizeof (isa_names_table) / sizeof (struct _isa_names_table); -+ -+ for (i = 0; i < NUM_ISA_NAMES; i++) -+ if (strcmp (isa_names_table[i].name, -+ TREE_STRING_POINTER (param_string_cst)) == 0) -+ break; -+ -+ if (i == NUM_ISA_NAMES) -+ { -+ error ("parameter to builtin not valid: %s", -+ TREE_STRING_POINTER (param_string_cst)); -+ return integer_zero_node; -+ } -+ -+ if (isa_names_table[i].feature >= 32) -+ { -+ tree __cpu_features2_var = make_var_decl (unsigned_type_node, -+ "__cpu_features2"); -+ -+ varpool_node::add (__cpu_features2_var); -+ field_val = (1U << (isa_names_table[i].feature - 32)); -+ /* Return __cpu_features2 & field_val */ -+ final = build2 (BIT_AND_EXPR, unsigned_type_node, -+ __cpu_features2_var, -+ build_int_cstu (unsigned_type_node, field_val)); -+ return build1 (CONVERT_EXPR, integer_type_node, final); -+ } -+ -+ field = TYPE_FIELDS (__processor_model_type); -+ /* Get the last field, which is __cpu_features. */ -+ while (DECL_CHAIN (field)) -+ field = DECL_CHAIN (field); -+ -+ /* Get the appropriate field: __cpu_model.__cpu_features */ -+ ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var, -+ field, NULL_TREE); -+ -+ /* Access the 0th element of __cpu_features array. */ -+ array_elt = build4 (ARRAY_REF, unsigned_type_node, ref, -+ integer_zero_node, NULL_TREE, NULL_TREE); -+ -+ field_val = (1U << isa_names_table[i].feature); -+ /* Return __cpu_model.__cpu_features[0] & field_val */ -+ final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt, -+ build_int_cstu (unsigned_type_node, field_val)); -+ return build1 (CONVERT_EXPR, integer_type_node, final); -+ } -+ gcc_unreachable (); -+} -+ -+#include "gt-i386-builtins.h" -diff --git a/gcc/config/i386/i386-builtins.h b/gcc/config/i386/i386-builtins.h -new file mode 100644 -index 000000000..c0264e5bf ---- /dev/null -+++ b/gcc/config/i386/i386-builtins.h -@@ -0,0 +1,330 @@ -+/* Copyright (C) 1988-2019 Free Software Foundation, Inc. -+ -+This file is part of GCC. -+ -+GCC is free software; you can redistribute it and/or modify -+it under the terms of the GNU General Public License as published by -+the Free Software Foundation; either version 3, or (at your option) -+any later version. -+ -+GCC is distributed in the hope that it will be useful, -+but WITHOUT ANY WARRANTY; without even the implied warranty of -+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+GNU General Public License for more details. -+ -+You should have received a copy of the GNU General Public License -+along with GCC; see the file COPYING3. If not see -+. */ -+ -+#ifndef GCC_I386_BUILTINS_H -+#define GCC_I386_BUILTINS_H -+ -+/* The following file contains several enumerations and data structures -+ built from the definitions in i386-builtin-types.def. */ -+ -+#include "i386-builtin-types.inc" -+ -+/* Codes for all the SSE/MMX builtins. Builtins not mentioned in any -+ bdesc_* arrays below should come first, then builtins for each bdesc_* -+ array in ascending order, so that we can use direct array accesses. */ -+enum ix86_builtins -+{ -+ IX86_BUILTIN_MASKMOVQ, -+ IX86_BUILTIN_LDMXCSR, -+ IX86_BUILTIN_STMXCSR, -+ IX86_BUILTIN_MASKMOVDQU, -+ IX86_BUILTIN_PSLLDQ128, -+ IX86_BUILTIN_CLFLUSH, -+ IX86_BUILTIN_MONITOR, -+ IX86_BUILTIN_MWAIT, -+ IX86_BUILTIN_UMONITOR, -+ IX86_BUILTIN_UMWAIT, -+ IX86_BUILTIN_TPAUSE, -+ IX86_BUILTIN_CLZERO, -+ IX86_BUILTIN_CLDEMOTE, -+ IX86_BUILTIN_VEC_INIT_V2SI, -+ IX86_BUILTIN_VEC_INIT_V4HI, -+ IX86_BUILTIN_VEC_INIT_V8QI, -+ IX86_BUILTIN_VEC_EXT_V2DF, -+ IX86_BUILTIN_VEC_EXT_V2DI, -+ IX86_BUILTIN_VEC_EXT_V4SF, -+ IX86_BUILTIN_VEC_EXT_V4SI, -+ IX86_BUILTIN_VEC_EXT_V8HI, -+ IX86_BUILTIN_VEC_EXT_V2SI, -+ IX86_BUILTIN_VEC_EXT_V4HI, -+ IX86_BUILTIN_VEC_EXT_V16QI, -+ IX86_BUILTIN_VEC_SET_V2DI, -+ IX86_BUILTIN_VEC_SET_V4SF, -+ IX86_BUILTIN_VEC_SET_V4SI, -+ IX86_BUILTIN_VEC_SET_V8HI, -+ IX86_BUILTIN_VEC_SET_V4HI, -+ IX86_BUILTIN_VEC_SET_V16QI, -+ IX86_BUILTIN_GATHERSIV2DF, -+ IX86_BUILTIN_GATHERSIV4DF, -+ IX86_BUILTIN_GATHERDIV2DF, -+ IX86_BUILTIN_GATHERDIV4DF, -+ IX86_BUILTIN_GATHERSIV4SF, -+ IX86_BUILTIN_GATHERSIV8SF, -+ IX86_BUILTIN_GATHERDIV4SF, -+ IX86_BUILTIN_GATHERDIV8SF, -+ IX86_BUILTIN_GATHERSIV2DI, -+ IX86_BUILTIN_GATHERSIV4DI, -+ IX86_BUILTIN_GATHERDIV2DI, -+ IX86_BUILTIN_GATHERDIV4DI, -+ IX86_BUILTIN_GATHERSIV4SI, -+ IX86_BUILTIN_GATHERSIV8SI, -+ IX86_BUILTIN_GATHERDIV4SI, -+ IX86_BUILTIN_GATHERDIV8SI, -+ IX86_BUILTIN_GATHER3SIV8SF, -+ IX86_BUILTIN_GATHER3SIV4SF, -+ IX86_BUILTIN_GATHER3SIV4DF, -+ IX86_BUILTIN_GATHER3SIV2DF, -+ IX86_BUILTIN_GATHER3DIV8SF, -+ IX86_BUILTIN_GATHER3DIV4SF, -+ IX86_BUILTIN_GATHER3DIV4DF, -+ IX86_BUILTIN_GATHER3DIV2DF, -+ IX86_BUILTIN_GATHER3SIV8SI, -+ IX86_BUILTIN_GATHER3SIV4SI, -+ IX86_BUILTIN_GATHER3SIV4DI, -+ IX86_BUILTIN_GATHER3SIV2DI, -+ IX86_BUILTIN_GATHER3DIV8SI, -+ IX86_BUILTIN_GATHER3DIV4SI, -+ IX86_BUILTIN_GATHER3DIV4DI, -+ IX86_BUILTIN_GATHER3DIV2DI, -+ IX86_BUILTIN_SCATTERSIV8SF, -+ IX86_BUILTIN_SCATTERSIV4SF, -+ IX86_BUILTIN_SCATTERSIV4DF, -+ IX86_BUILTIN_SCATTERSIV2DF, -+ IX86_BUILTIN_SCATTERDIV8SF, -+ IX86_BUILTIN_SCATTERDIV4SF, -+ IX86_BUILTIN_SCATTERDIV4DF, -+ IX86_BUILTIN_SCATTERDIV2DF, -+ IX86_BUILTIN_SCATTERSIV8SI, -+ IX86_BUILTIN_SCATTERSIV4SI, -+ IX86_BUILTIN_SCATTERSIV4DI, -+ IX86_BUILTIN_SCATTERSIV2DI, -+ IX86_BUILTIN_SCATTERDIV8SI, -+ IX86_BUILTIN_SCATTERDIV4SI, -+ IX86_BUILTIN_SCATTERDIV4DI, -+ IX86_BUILTIN_SCATTERDIV2DI, -+ /* Alternate 4 and 8 element gather/scatter for the vectorizer -+ where all operands are 32-byte or 64-byte wide respectively. */ -+ IX86_BUILTIN_GATHERALTSIV4DF, -+ IX86_BUILTIN_GATHERALTDIV8SF, -+ IX86_BUILTIN_GATHERALTSIV4DI, -+ IX86_BUILTIN_GATHERALTDIV8SI, -+ IX86_BUILTIN_GATHER3ALTDIV16SF, -+ IX86_BUILTIN_GATHER3ALTDIV16SI, -+ IX86_BUILTIN_GATHER3ALTSIV4DF, -+ IX86_BUILTIN_GATHER3ALTDIV8SF, -+ IX86_BUILTIN_GATHER3ALTSIV4DI, -+ IX86_BUILTIN_GATHER3ALTDIV8SI, -+ IX86_BUILTIN_GATHER3ALTSIV8DF, -+ IX86_BUILTIN_GATHER3ALTSIV8DI, -+ IX86_BUILTIN_GATHER3DIV16SF, -+ IX86_BUILTIN_GATHER3DIV16SI, -+ IX86_BUILTIN_GATHER3DIV8DF, -+ IX86_BUILTIN_GATHER3DIV8DI, -+ IX86_BUILTIN_GATHER3SIV16SF, -+ IX86_BUILTIN_GATHER3SIV16SI, -+ IX86_BUILTIN_GATHER3SIV8DF, -+ IX86_BUILTIN_GATHER3SIV8DI, -+ IX86_BUILTIN_SCATTERALTSIV8DF, -+ IX86_BUILTIN_SCATTERALTDIV16SF, -+ IX86_BUILTIN_SCATTERALTSIV8DI, -+ IX86_BUILTIN_SCATTERALTDIV16SI, -+ IX86_BUILTIN_SCATTERALTSIV4DF, -+ IX86_BUILTIN_SCATTERALTDIV8SF, -+ IX86_BUILTIN_SCATTERALTSIV4DI, -+ IX86_BUILTIN_SCATTERALTDIV8SI, -+ IX86_BUILTIN_SCATTERALTSIV2DF, -+ IX86_BUILTIN_SCATTERALTDIV4SF, -+ IX86_BUILTIN_SCATTERALTSIV2DI, -+ IX86_BUILTIN_SCATTERALTDIV4SI, -+ IX86_BUILTIN_SCATTERDIV16SF, -+ IX86_BUILTIN_SCATTERDIV16SI, -+ IX86_BUILTIN_SCATTERDIV8DF, -+ IX86_BUILTIN_SCATTERDIV8DI, -+ IX86_BUILTIN_SCATTERSIV16SF, -+ IX86_BUILTIN_SCATTERSIV16SI, -+ IX86_BUILTIN_SCATTERSIV8DF, -+ IX86_BUILTIN_SCATTERSIV8DI, -+ IX86_BUILTIN_GATHERPFQPD, -+ IX86_BUILTIN_GATHERPFDPS, -+ IX86_BUILTIN_GATHERPFDPD, -+ IX86_BUILTIN_GATHERPFQPS, -+ IX86_BUILTIN_SCATTERPFDPD, -+ IX86_BUILTIN_SCATTERPFDPS, -+ IX86_BUILTIN_SCATTERPFQPD, -+ IX86_BUILTIN_SCATTERPFQPS, -+ IX86_BUILTIN_CLWB, -+ IX86_BUILTIN_CLFLUSHOPT, -+ IX86_BUILTIN_INFQ, -+ IX86_BUILTIN_HUGE_VALQ, -+ IX86_BUILTIN_NANQ, -+ IX86_BUILTIN_NANSQ, -+ IX86_BUILTIN_XABORT, -+ IX86_BUILTIN_ADDCARRYX32, -+ IX86_BUILTIN_ADDCARRYX64, -+ IX86_BUILTIN_SBB32, -+ IX86_BUILTIN_SBB64, -+ IX86_BUILTIN_RDRAND16_STEP, -+ IX86_BUILTIN_RDRAND32_STEP, -+ IX86_BUILTIN_RDRAND64_STEP, -+ IX86_BUILTIN_RDSEED16_STEP, -+ IX86_BUILTIN_RDSEED32_STEP, -+ IX86_BUILTIN_RDSEED64_STEP, -+ IX86_BUILTIN_MONITORX, -+ IX86_BUILTIN_MWAITX, -+ IX86_BUILTIN_CFSTRING, -+ IX86_BUILTIN_CPU_INIT, -+ IX86_BUILTIN_CPU_IS, -+ IX86_BUILTIN_CPU_SUPPORTS, -+ IX86_BUILTIN_READ_FLAGS, -+ IX86_BUILTIN_WRITE_FLAGS, -+ -+ /* All the remaining builtins are tracked in bdesc_* arrays in -+ i386-builtin.def. Don't add any IX86_BUILTIN_* enumerators after -+ this point. */ -+#define BDESC(mask, mask2, icode, name, code, comparison, flag) \ -+ code, -+#define BDESC_FIRST(kind, kindu, mask, mask2, icode, name, code, comparison, flag) \ -+ code, \ -+ IX86_BUILTIN__BDESC_##kindu##_FIRST = code, -+#define BDESC_END(kind, next_kind) -+ -+#include "i386-builtin.def" -+ -+#undef BDESC -+#undef BDESC_FIRST -+#undef BDESC_END -+ -+ IX86_BUILTIN_MAX, -+ -+ IX86_BUILTIN__BDESC_MAX_FIRST = IX86_BUILTIN_MAX, -+ -+ /* Now just the aliases for bdesc_* start/end. */ -+#define BDESC(mask, mask2, icode, name, code, comparison, flag) -+#define BDESC_FIRST(kind, kindu, mask, mask2, icode, name, code, comparison, flag) -+#define BDESC_END(kind, next_kind) \ -+ IX86_BUILTIN__BDESC_##kind##_LAST \ -+ = IX86_BUILTIN__BDESC_##next_kind##_FIRST - 1, -+ -+#include "i386-builtin.def" -+ -+#undef BDESC -+#undef BDESC_FIRST -+#undef BDESC_END -+ -+ /* Just to make sure there is no comma after the last enumerator. */ -+ IX86_BUILTIN__BDESC_MAX_LAST = IX86_BUILTIN__BDESC_MAX_FIRST -+}; -+ -+/* Table of all of the builtin functions that are possible with different ISA's -+ but are waiting to be built until a function is declared to use that -+ ISA. */ -+struct builtin_isa { -+ HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */ -+ HOST_WIDE_INT isa2; /* additional isa_flags this builtin is defined for */ -+ const char *name; /* function name */ -+ enum ix86_builtin_func_type tcode; /* type to use in the declaration */ -+ unsigned char const_p:1; /* true if the declaration is constant */ -+ unsigned char pure_p:1; /* true if the declaration has pure attribute */ -+ bool set_and_not_built_p; -+}; -+ -+/* Bits for builtin_description.flag. */ -+ -+/* Set when we don't support the comparison natively, and should -+ swap_comparison in order to support it. */ -+#define BUILTIN_DESC_SWAP_OPERANDS 1 -+ -+struct builtin_description -+{ -+ const HOST_WIDE_INT mask; -+ const HOST_WIDE_INT mask2; -+ const enum insn_code icode; -+ const char *const name; -+ const enum ix86_builtins code; -+ const enum rtx_code comparison; -+ const int flag; -+}; -+ -+#define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT -+#define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT -+#define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT -+#define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT -+#define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF -+#define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF -+#define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF -+#define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF -+#define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI -+#define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI -+#define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI -+#define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI -+#define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI -+#define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI -+#define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI -+#define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI -+#define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI -+#define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI -+#define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF -+#define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF -+#define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI -+#define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI -+#define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI -+#define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI -+#define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI -+#define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI -+#define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI -+#define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI -+#define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP -+#define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP -+#define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP -+#define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP -+#define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF -+#define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF -+#define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF -+#define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF -+#define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF -+#define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF -+#define MULTI_ARG_1_SF V4SF_FTYPE_V4SF -+#define MULTI_ARG_1_DF V2DF_FTYPE_V2DF -+#define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF -+#define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF -+#define MULTI_ARG_1_DI V2DI_FTYPE_V2DI -+#define MULTI_ARG_1_SI V4SI_FTYPE_V4SI -+#define MULTI_ARG_1_HI V8HI_FTYPE_V8HI -+#define MULTI_ARG_1_QI V16QI_FTYPE_V16QI -+#define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI -+#define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI -+#define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI -+#define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI -+#define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI -+#define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI -+ -+#define BDESC(mask, mask2, icode, name, code, comparison, flag) \ -+ { mask, mask2, icode, name, code, comparison, flag }, -+#define BDESC_FIRST(kind, kindu, mask, mask2, icode, name, code, comparison, flag) \ -+static const struct builtin_description bdesc_##kind[] = \ -+{ \ -+ BDESC (mask, mask2, icode, name, code, comparison, flag) -+#define BDESC_END(kind, next_kind) \ -+}; -+ -+#include "i386-builtin.def" -+ -+extern builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX]; -+ -+tree ix86_builtin_vectorized_function (unsigned int fn, tree type_out, -+ tree type_in); -+void ix86_init_builtins (void); -+tree ix86_vectorize_builtin_gather (const_tree mem_vectype, -+ const_tree index_type, int scale); -+tree ix86_builtin_decl (unsigned code, bool); -+tree ix86_builtin_reciprocal (tree fndecl); -+unsigned int get_builtin_code_for_version (tree decl, tree *predicate_list); -+tree fold_builtin_cpu (tree fndecl, tree *args); -+tree get_ix86_builtin (enum ix86_builtins c); -+ -+#endif /* GCC_I386_BUILTINS_H */ -diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c -index 5e7e46fce..50cac3b1a 100644 ---- a/gcc/config/i386/i386-c.c -+++ b/gcc/config/i386/i386-c.c -@@ -586,8 +586,9 @@ ix86_pragma_target_parse (tree args, tree pop_target) - } - else - { -- cur_tree = ix86_valid_target_attribute_tree (args, &global_options, -- &global_options_set); -+ cur_tree = ix86_valid_target_attribute_tree (NULL_TREE, args, -+ &global_options, -+ &global_options_set, 0); - if (!cur_tree || cur_tree == error_mark_node) - { - cl_target_option_restore (&global_options, -diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c -new file mode 100644 -index 000000000..01f38b9ea ---- /dev/null -+++ b/gcc/config/i386/i386-expand.c -@@ -0,0 +1,19842 @@ -+/* Copyright (C) 1988-2019 Free Software Foundation, Inc. -+ -+This file is part of GCC. -+ -+GCC is free software; you can redistribute it and/or modify -+it under the terms of the GNU General Public License as published by -+the Free Software Foundation; either version 3, or (at your option) -+any later version. -+ -+GCC is distributed in the hope that it will be useful, -+but WITHOUT ANY WARRANTY; without even the implied warranty of -+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+GNU General Public License for more details. -+ -+You should have received a copy of the GNU General Public License -+along with GCC; see the file COPYING3. If not see -+. */ -+ -+#define IN_TARGET_CODE 1 -+ -+#include "config.h" -+#include "system.h" -+#include "coretypes.h" -+#include "backend.h" -+#include "rtl.h" -+#include "tree.h" -+#include "memmodel.h" -+#include "gimple.h" -+#include "cfghooks.h" -+#include "cfgloop.h" -+#include "df.h" -+#include "tm_p.h" -+#include "stringpool.h" -+#include "expmed.h" -+#include "optabs.h" -+#include "regs.h" -+#include "emit-rtl.h" -+#include "recog.h" -+#include "cgraph.h" -+#include "diagnostic.h" -+#include "cfgbuild.h" -+#include "alias.h" -+#include "fold-const.h" -+#include "attribs.h" -+#include "calls.h" -+#include "stor-layout.h" -+#include "varasm.h" -+#include "output.h" -+#include "insn-attr.h" -+#include "flags.h" -+#include "except.h" -+#include "explow.h" -+#include "expr.h" -+#include "cfgrtl.h" -+#include "common/common-target.h" -+#include "langhooks.h" -+#include "reload.h" -+#include "gimplify.h" -+#include "dwarf2.h" -+#include "tm-constrs.h" -+#include "params.h" -+#include "cselib.h" -+#include "sched-int.h" -+#include "opts.h" -+#include "tree-pass.h" -+#include "context.h" -+#include "pass_manager.h" -+#include "target-globals.h" -+#include "gimple-iterator.h" -+#include "tree-vectorizer.h" -+#include "shrink-wrap.h" -+#include "builtins.h" -+#include "rtl-iter.h" -+#include "tree-iterator.h" -+#include "dbgcnt.h" -+#include "case-cfn-macros.h" -+#include "dojump.h" -+#include "fold-const-call.h" -+#include "tree-vrp.h" -+#include "tree-ssanames.h" -+#include "selftest.h" -+#include "selftest-rtl.h" -+#include "print-rtl.h" -+#include "intl.h" -+#include "ifcvt.h" -+#include "symbol-summary.h" -+#include "ipa-prop.h" -+#include "ipa-fnsummary.h" -+#include "wide-int-bitmask.h" -+#include "tree-vector-builder.h" -+#include "debug.h" -+#include "dwarf2out.h" -+#include "i386-options.h" -+#include "i386-builtins.h" -+#include "i386-expand.h" -+ -+/* Split one or more double-mode RTL references into pairs of half-mode -+ references. The RTL can be REG, offsettable MEM, integer constant, or -+ CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to -+ split and "num" is its length. lo_half and hi_half are output arrays -+ that parallel "operands". */ -+ -+void -+split_double_mode (machine_mode mode, rtx operands[], -+ int num, rtx lo_half[], rtx hi_half[]) -+{ -+ machine_mode half_mode; -+ unsigned int byte; -+ -+ switch (mode) -+ { -+ case E_TImode: -+ half_mode = DImode; -+ break; -+ case E_DImode: -+ half_mode = SImode; -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ -+ byte = GET_MODE_SIZE (half_mode); -+ -+ while (num--) -+ { -+ rtx op = operands[num]; -+ -+ /* simplify_subreg refuse to split volatile memory addresses, -+ but we still have to handle it. */ -+ if (MEM_P (op)) -+ { -+ lo_half[num] = adjust_address (op, half_mode, 0); -+ hi_half[num] = adjust_address (op, half_mode, byte); -+ } -+ else -+ { -+ lo_half[num] = simplify_gen_subreg (half_mode, op, -+ GET_MODE (op) == VOIDmode -+ ? mode : GET_MODE (op), 0); -+ hi_half[num] = simplify_gen_subreg (half_mode, op, -+ GET_MODE (op) == VOIDmode -+ ? mode : GET_MODE (op), byte); -+ } -+ } -+} -+ -+/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate -+ for the target. */ -+ -+void -+ix86_expand_clear (rtx dest) -+{ -+ rtx tmp; -+ -+ /* We play register width games, which are only valid after reload. */ -+ gcc_assert (reload_completed); -+ -+ /* Avoid HImode and its attendant prefix byte. */ -+ if (GET_MODE_SIZE (GET_MODE (dest)) < 4) -+ dest = gen_rtx_REG (SImode, REGNO (dest)); -+ tmp = gen_rtx_SET (dest, const0_rtx); -+ -+ if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ()) -+ { -+ rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); -+ tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob)); -+ } -+ -+ emit_insn (tmp); -+} -+ -+void -+ix86_expand_move (machine_mode mode, rtx operands[]) -+{ -+ rtx op0, op1; -+ rtx tmp, addend = NULL_RTX; -+ enum tls_model model; -+ -+ op0 = operands[0]; -+ op1 = operands[1]; -+ -+ switch (GET_CODE (op1)) -+ { -+ case CONST: -+ tmp = XEXP (op1, 0); -+ -+ if (GET_CODE (tmp) != PLUS -+ || GET_CODE (XEXP (tmp, 0)) != SYMBOL_REF) -+ break; -+ -+ op1 = XEXP (tmp, 0); -+ addend = XEXP (tmp, 1); -+ /* FALLTHRU */ -+ -+ case SYMBOL_REF: -+ model = SYMBOL_REF_TLS_MODEL (op1); -+ -+ if (model) -+ op1 = legitimize_tls_address (op1, model, true); -+ else if (ix86_force_load_from_GOT_p (op1)) -+ { -+ /* Load the external function address via GOT slot to avoid PLT. */ -+ op1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op1), -+ (TARGET_64BIT -+ ? UNSPEC_GOTPCREL -+ : UNSPEC_GOT)); -+ op1 = gen_rtx_CONST (Pmode, op1); -+ op1 = gen_const_mem (Pmode, op1); -+ set_mem_alias_set (op1, ix86_GOT_alias_set ()); -+ } -+ else -+ { -+ tmp = legitimize_pe_coff_symbol (op1, addend != NULL_RTX); -+ if (tmp) -+ { -+ op1 = tmp; -+ if (!addend) -+ break; -+ } -+ else -+ { -+ op1 = operands[1]; -+ break; -+ } -+ } -+ -+ if (addend) -+ { -+ op1 = force_operand (op1, NULL_RTX); -+ op1 = expand_simple_binop (Pmode, PLUS, op1, addend, -+ op0, 1, OPTAB_DIRECT); -+ } -+ else -+ op1 = force_operand (op1, op0); -+ -+ if (op1 == op0) -+ return; -+ -+ op1 = convert_to_mode (mode, op1, 1); -+ -+ default: -+ break; -+ } -+ -+ if ((flag_pic || MACHOPIC_INDIRECT) -+ && symbolic_operand (op1, mode)) -+ { -+ if (TARGET_MACHO && !TARGET_64BIT) -+ { -+#if TARGET_MACHO -+ /* dynamic-no-pic */ -+ if (MACHOPIC_INDIRECT) -+ { -+ rtx temp = (op0 && REG_P (op0) && mode == Pmode) -+ ? op0 : gen_reg_rtx (Pmode); -+ op1 = machopic_indirect_data_reference (op1, temp); -+ if (MACHOPIC_PURE) -+ op1 = machopic_legitimize_pic_address (op1, mode, -+ temp == op1 ? 0 : temp); -+ } -+ if (op0 != op1 && GET_CODE (op0) != MEM) -+ { -+ rtx insn = gen_rtx_SET (op0, op1); -+ emit_insn (insn); -+ return; -+ } -+ if (GET_CODE (op0) == MEM) -+ op1 = force_reg (Pmode, op1); -+ else -+ { -+ rtx temp = op0; -+ if (GET_CODE (temp) != REG) -+ temp = gen_reg_rtx (Pmode); -+ temp = legitimize_pic_address (op1, temp); -+ if (temp == op0) -+ return; -+ op1 = temp; -+ } -+ /* dynamic-no-pic */ -+#endif -+ } -+ else -+ { -+ if (MEM_P (op0)) -+ op1 = force_reg (mode, op1); -+ else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode))) -+ { -+ rtx reg = can_create_pseudo_p () ? NULL_RTX : op0; -+ op1 = legitimize_pic_address (op1, reg); -+ if (op0 == op1) -+ return; -+ op1 = convert_to_mode (mode, op1, 1); -+ } -+ } -+ } -+ else -+ { -+ if (MEM_P (op0) -+ && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode) -+ || !push_operand (op0, mode)) -+ && MEM_P (op1)) -+ op1 = force_reg (mode, op1); -+ -+ if (push_operand (op0, mode) -+ && ! general_no_elim_operand (op1, mode)) -+ op1 = copy_to_mode_reg (mode, op1); -+ -+ /* Force large constants in 64bit compilation into register -+ to get them CSEed. */ -+ if (can_create_pseudo_p () -+ && (mode == DImode) && TARGET_64BIT -+ && immediate_operand (op1, mode) -+ && !x86_64_zext_immediate_operand (op1, VOIDmode) -+ && !register_operand (op0, mode) -+ && optimize) -+ op1 = copy_to_mode_reg (mode, op1); -+ -+ if (can_create_pseudo_p () -+ && CONST_DOUBLE_P (op1)) -+ { -+ /* If we are loading a floating point constant to a register, -+ force the value to memory now, since we'll get better code -+ out the back end. */ -+ -+ op1 = validize_mem (force_const_mem (mode, op1)); -+ if (!register_operand (op0, mode)) -+ { -+ rtx temp = gen_reg_rtx (mode); -+ emit_insn (gen_rtx_SET (temp, op1)); -+ emit_move_insn (op0, temp); -+ return; -+ } -+ } -+ } -+ -+ emit_insn (gen_rtx_SET (op0, op1)); -+} -+ -+void -+ix86_expand_vector_move (machine_mode mode, rtx operands[]) -+{ -+ rtx op0 = operands[0], op1 = operands[1]; -+ /* Use GET_MODE_BITSIZE instead of GET_MODE_ALIGNMENT for IA MCU -+ psABI since the biggest alignment is 4 byte for IA MCU psABI. */ -+ unsigned int align = (TARGET_IAMCU -+ ? GET_MODE_BITSIZE (mode) -+ : GET_MODE_ALIGNMENT (mode)); -+ -+ if (push_operand (op0, VOIDmode)) -+ op0 = emit_move_resolve_push (mode, op0); -+ -+ /* Force constants other than zero into memory. We do not know how -+ the instructions used to build constants modify the upper 64 bits -+ of the register, once we have that information we may be able -+ to handle some of them more efficiently. */ -+ if (can_create_pseudo_p () -+ && (CONSTANT_P (op1) -+ || (SUBREG_P (op1) -+ && CONSTANT_P (SUBREG_REG (op1)))) -+ && ((register_operand (op0, mode) -+ && !standard_sse_constant_p (op1, mode)) -+ /* ix86_expand_vector_move_misalign() does not like constants. */ -+ || (SSE_REG_MODE_P (mode) -+ && MEM_P (op0) -+ && MEM_ALIGN (op0) < align))) -+ { -+ if (SUBREG_P (op1)) -+ { -+ machine_mode imode = GET_MODE (SUBREG_REG (op1)); -+ rtx r = force_const_mem (imode, SUBREG_REG (op1)); -+ if (r) -+ r = validize_mem (r); -+ else -+ r = force_reg (imode, SUBREG_REG (op1)); -+ op1 = simplify_gen_subreg (mode, r, imode, SUBREG_BYTE (op1)); -+ } -+ else -+ op1 = validize_mem (force_const_mem (mode, op1)); -+ } -+ -+ /* We need to check memory alignment for SSE mode since attribute -+ can make operands unaligned. */ -+ if (can_create_pseudo_p () -+ && SSE_REG_MODE_P (mode) -+ && ((MEM_P (op0) && (MEM_ALIGN (op0) < align)) -+ || (MEM_P (op1) && (MEM_ALIGN (op1) < align)))) -+ { -+ rtx tmp[2]; -+ -+ /* ix86_expand_vector_move_misalign() does not like both -+ arguments in memory. */ -+ if (!register_operand (op0, mode) -+ && !register_operand (op1, mode)) -+ op1 = force_reg (mode, op1); -+ -+ tmp[0] = op0; tmp[1] = op1; -+ ix86_expand_vector_move_misalign (mode, tmp); -+ return; -+ } -+ -+ /* Make operand1 a register if it isn't already. */ -+ if (can_create_pseudo_p () -+ && !register_operand (op0, mode) -+ && !register_operand (op1, mode)) -+ { -+ emit_move_insn (op0, force_reg (GET_MODE (op0), op1)); -+ return; -+ } -+ -+ emit_insn (gen_rtx_SET (op0, op1)); -+} -+ -+/* Split 32-byte AVX unaligned load and store if needed. */ -+ -+static void -+ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1) -+{ -+ rtx m; -+ rtx (*extract) (rtx, rtx, rtx); -+ machine_mode mode; -+ -+ if ((MEM_P (op1) && !TARGET_AVX256_SPLIT_UNALIGNED_LOAD) -+ || (MEM_P (op0) && !TARGET_AVX256_SPLIT_UNALIGNED_STORE)) -+ { -+ emit_insn (gen_rtx_SET (op0, op1)); -+ return; -+ } -+ -+ rtx orig_op0 = NULL_RTX; -+ mode = GET_MODE (op0); -+ switch (GET_MODE_CLASS (mode)) -+ { -+ case MODE_VECTOR_INT: -+ case MODE_INT: -+ if (mode != V32QImode) -+ { -+ if (!MEM_P (op0)) -+ { -+ orig_op0 = op0; -+ op0 = gen_reg_rtx (V32QImode); -+ } -+ else -+ op0 = gen_lowpart (V32QImode, op0); -+ op1 = gen_lowpart (V32QImode, op1); -+ mode = V32QImode; -+ } -+ break; -+ case MODE_VECTOR_FLOAT: -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ -+ switch (mode) -+ { -+ default: -+ gcc_unreachable (); -+ case E_V32QImode: -+ extract = gen_avx_vextractf128v32qi; -+ mode = V16QImode; -+ break; -+ case E_V8SFmode: -+ extract = gen_avx_vextractf128v8sf; -+ mode = V4SFmode; -+ break; -+ case E_V4DFmode: -+ extract = gen_avx_vextractf128v4df; -+ mode = V2DFmode; -+ break; -+ } -+ -+ if (MEM_P (op1)) -+ { -+ rtx r = gen_reg_rtx (mode); -+ m = adjust_address (op1, mode, 0); -+ emit_move_insn (r, m); -+ m = adjust_address (op1, mode, 16); -+ r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m); -+ emit_move_insn (op0, r); -+ } -+ else if (MEM_P (op0)) -+ { -+ m = adjust_address (op0, mode, 0); -+ emit_insn (extract (m, op1, const0_rtx)); -+ m = adjust_address (op0, mode, 16); -+ emit_insn (extract (m, copy_rtx (op1), const1_rtx)); -+ } -+ else -+ gcc_unreachable (); -+ -+ if (orig_op0) -+ emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0)); -+} -+ -+/* Implement the movmisalign patterns for SSE. Non-SSE modes go -+ straight to ix86_expand_vector_move. */ -+/* Code generation for scalar reg-reg moves of single and double precision data: -+ if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true) -+ movaps reg, reg -+ else -+ movss reg, reg -+ if (x86_sse_partial_reg_dependency == true) -+ movapd reg, reg -+ else -+ movsd reg, reg -+ -+ Code generation for scalar loads of double precision data: -+ if (x86_sse_split_regs == true) -+ movlpd mem, reg (gas syntax) -+ else -+ movsd mem, reg -+ -+ Code generation for unaligned packed loads of single precision data -+ (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency): -+ if (x86_sse_unaligned_move_optimal) -+ movups mem, reg -+ -+ if (x86_sse_partial_reg_dependency == true) -+ { -+ xorps reg, reg -+ movlps mem, reg -+ movhps mem+8, reg -+ } -+ else -+ { -+ movlps mem, reg -+ movhps mem+8, reg -+ } -+ -+ Code generation for unaligned packed loads of double precision data -+ (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs): -+ if (x86_sse_unaligned_move_optimal) -+ movupd mem, reg -+ -+ if (x86_sse_split_regs == true) -+ { -+ movlpd mem, reg -+ movhpd mem+8, reg -+ } -+ else -+ { -+ movsd mem, reg -+ movhpd mem+8, reg -+ } -+ */ -+ -+void -+ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[]) -+{ -+ rtx op0, op1, m; -+ -+ op0 = operands[0]; -+ op1 = operands[1]; -+ -+ /* Use unaligned load/store for AVX512 or when optimizing for size. */ -+ if (GET_MODE_SIZE (mode) == 64 || optimize_insn_for_size_p ()) -+ { -+ emit_insn (gen_rtx_SET (op0, op1)); -+ return; -+ } -+ -+ if (TARGET_AVX) -+ { -+ if (GET_MODE_SIZE (mode) == 32) -+ ix86_avx256_split_vector_move_misalign (op0, op1); -+ else -+ /* Always use 128-bit mov_internal pattern for AVX. */ -+ emit_insn (gen_rtx_SET (op0, op1)); -+ return; -+ } -+ -+ if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL -+ || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) -+ { -+ emit_insn (gen_rtx_SET (op0, op1)); -+ return; -+ } -+ -+ /* ??? If we have typed data, then it would appear that using -+ movdqu is the only way to get unaligned data loaded with -+ integer type. */ -+ if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT) -+ { -+ emit_insn (gen_rtx_SET (op0, op1)); -+ return; -+ } -+ -+ if (MEM_P (op1)) -+ { -+ if (TARGET_SSE2 && mode == V2DFmode) -+ { -+ rtx zero; -+ -+ /* When SSE registers are split into halves, we can avoid -+ writing to the top half twice. */ -+ if (TARGET_SSE_SPLIT_REGS) -+ { -+ emit_clobber (op0); -+ zero = op0; -+ } -+ else -+ { -+ /* ??? Not sure about the best option for the Intel chips. -+ The following would seem to satisfy; the register is -+ entirely cleared, breaking the dependency chain. We -+ then store to the upper half, with a dependency depth -+ of one. A rumor has it that Intel recommends two movsd -+ followed by an unpacklpd, but this is unconfirmed. And -+ given that the dependency depth of the unpacklpd would -+ still be one, I'm not sure why this would be better. */ -+ zero = CONST0_RTX (V2DFmode); -+ } -+ -+ m = adjust_address (op1, DFmode, 0); -+ emit_insn (gen_sse2_loadlpd (op0, zero, m)); -+ m = adjust_address (op1, DFmode, 8); -+ emit_insn (gen_sse2_loadhpd (op0, op0, m)); -+ } -+ else -+ { -+ rtx t; -+ -+ if (mode != V4SFmode) -+ t = gen_reg_rtx (V4SFmode); -+ else -+ t = op0; -+ -+ if (TARGET_SSE_PARTIAL_REG_DEPENDENCY) -+ emit_move_insn (t, CONST0_RTX (V4SFmode)); -+ else -+ emit_clobber (t); -+ -+ m = adjust_address (op1, V2SFmode, 0); -+ emit_insn (gen_sse_loadlps (t, t, m)); -+ m = adjust_address (op1, V2SFmode, 8); -+ emit_insn (gen_sse_loadhps (t, t, m)); -+ if (mode != V4SFmode) -+ emit_move_insn (op0, gen_lowpart (mode, t)); -+ } -+ } -+ else if (MEM_P (op0)) -+ { -+ if (TARGET_SSE2 && mode == V2DFmode) -+ { -+ m = adjust_address (op0, DFmode, 0); -+ emit_insn (gen_sse2_storelpd (m, op1)); -+ m = adjust_address (op0, DFmode, 8); -+ emit_insn (gen_sse2_storehpd (m, op1)); -+ } -+ else -+ { -+ if (mode != V4SFmode) -+ op1 = gen_lowpart (V4SFmode, op1); -+ -+ m = adjust_address (op0, V2SFmode, 0); -+ emit_insn (gen_sse_storelps (m, op1)); -+ m = adjust_address (op0, V2SFmode, 8); -+ emit_insn (gen_sse_storehps (m, copy_rtx (op1))); -+ } -+ } -+ else -+ gcc_unreachable (); -+} -+ -+/* Helper function of ix86_fixup_binary_operands to canonicalize -+ operand order. Returns true if the operands should be swapped. */ -+ -+static bool -+ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode, -+ rtx operands[]) -+{ -+ rtx dst = operands[0]; -+ rtx src1 = operands[1]; -+ rtx src2 = operands[2]; -+ -+ /* If the operation is not commutative, we can't do anything. */ -+ if (GET_RTX_CLASS (code) != RTX_COMM_ARITH -+ && GET_RTX_CLASS (code) != RTX_COMM_COMPARE) -+ return false; -+ -+ /* Highest priority is that src1 should match dst. */ -+ if (rtx_equal_p (dst, src1)) -+ return false; -+ if (rtx_equal_p (dst, src2)) -+ return true; -+ -+ /* Next highest priority is that immediate constants come second. */ -+ if (immediate_operand (src2, mode)) -+ return false; -+ if (immediate_operand (src1, mode)) -+ return true; -+ -+ /* Lowest priority is that memory references should come second. */ -+ if (MEM_P (src2)) -+ return false; -+ if (MEM_P (src1)) -+ return true; -+ -+ return false; -+} -+ -+ -+/* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the -+ destination to use for the operation. If different from the true -+ destination in operands[0], a copy operation will be required. */ -+ -+rtx -+ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode, -+ rtx operands[]) -+{ -+ rtx dst = operands[0]; -+ rtx src1 = operands[1]; -+ rtx src2 = operands[2]; -+ -+ /* Canonicalize operand order. */ -+ if (ix86_swap_binary_operands_p (code, mode, operands)) -+ { -+ /* It is invalid to swap operands of different modes. */ -+ gcc_assert (GET_MODE (src1) == GET_MODE (src2)); -+ -+ std::swap (src1, src2); -+ } -+ -+ /* Both source operands cannot be in memory. */ -+ if (MEM_P (src1) && MEM_P (src2)) -+ { -+ /* Optimization: Only read from memory once. */ -+ if (rtx_equal_p (src1, src2)) -+ { -+ src2 = force_reg (mode, src2); -+ src1 = src2; -+ } -+ else if (rtx_equal_p (dst, src1)) -+ src2 = force_reg (mode, src2); -+ else -+ src1 = force_reg (mode, src1); -+ } -+ -+ /* If the destination is memory, and we do not have matching source -+ operands, do things in registers. */ -+ if (MEM_P (dst) && !rtx_equal_p (dst, src1)) -+ dst = gen_reg_rtx (mode); -+ -+ /* Source 1 cannot be a constant. */ -+ if (CONSTANT_P (src1)) -+ src1 = force_reg (mode, src1); -+ -+ /* Source 1 cannot be a non-matching memory. */ -+ if (MEM_P (src1) && !rtx_equal_p (dst, src1)) -+ src1 = force_reg (mode, src1); -+ -+ /* Improve address combine. */ -+ if (code == PLUS -+ && GET_MODE_CLASS (mode) == MODE_INT -+ && MEM_P (src2)) -+ src2 = force_reg (mode, src2); -+ -+ operands[1] = src1; -+ operands[2] = src2; -+ return dst; -+} -+ -+/* Similarly, but assume that the destination has already been -+ set up properly. */ -+ -+void -+ix86_fixup_binary_operands_no_copy (enum rtx_code code, -+ machine_mode mode, rtx operands[]) -+{ -+ rtx dst = ix86_fixup_binary_operands (code, mode, operands); -+ gcc_assert (dst == operands[0]); -+} -+ -+/* Attempt to expand a binary operator. Make the expansion closer to the -+ actual machine, then just general_operand, which will allow 3 separate -+ memory references (one output, two input) in a single insn. */ -+ -+void -+ix86_expand_binary_operator (enum rtx_code code, machine_mode mode, -+ rtx operands[]) -+{ -+ rtx src1, src2, dst, op, clob; -+ -+ dst = ix86_fixup_binary_operands (code, mode, operands); -+ src1 = operands[1]; -+ src2 = operands[2]; -+ -+ /* Emit the instruction. */ -+ -+ op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, src1, src2)); -+ -+ if (reload_completed -+ && code == PLUS -+ && !rtx_equal_p (dst, src1)) -+ { -+ /* This is going to be an LEA; avoid splitting it later. */ -+ emit_insn (op); -+ } -+ else -+ { -+ clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); -+ emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); -+ } -+ -+ /* Fix up the destination if needed. */ -+ if (dst != operands[0]) -+ emit_move_insn (operands[0], dst); -+} -+ -+/* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with -+ the given OPERANDS. */ -+ -+void -+ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode, -+ rtx operands[]) -+{ -+ rtx op1 = NULL_RTX, op2 = NULL_RTX; -+ if (SUBREG_P (operands[1])) -+ { -+ op1 = operands[1]; -+ op2 = operands[2]; -+ } -+ else if (SUBREG_P (operands[2])) -+ { -+ op1 = operands[2]; -+ op2 = operands[1]; -+ } -+ /* Optimize (__m128i) d | (__m128i) e and similar code -+ when d and e are float vectors into float vector logical -+ insn. In C/C++ without using intrinsics there is no other way -+ to express vector logical operation on float vectors than -+ to cast them temporarily to integer vectors. */ -+ if (op1 -+ && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL -+ && (SUBREG_P (op2) || GET_CODE (op2) == CONST_VECTOR) -+ && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT -+ && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode) -+ && SUBREG_BYTE (op1) == 0 -+ && (GET_CODE (op2) == CONST_VECTOR -+ || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2)) -+ && SUBREG_BYTE (op2) == 0)) -+ && can_create_pseudo_p ()) -+ { -+ rtx dst; -+ switch (GET_MODE (SUBREG_REG (op1))) -+ { -+ case E_V4SFmode: -+ case E_V8SFmode: -+ case E_V16SFmode: -+ case E_V2DFmode: -+ case E_V4DFmode: -+ case E_V8DFmode: -+ dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1))); -+ if (GET_CODE (op2) == CONST_VECTOR) -+ { -+ op2 = gen_lowpart (GET_MODE (dst), op2); -+ op2 = force_reg (GET_MODE (dst), op2); -+ } -+ else -+ { -+ op1 = operands[1]; -+ op2 = SUBREG_REG (operands[2]); -+ if (!vector_operand (op2, GET_MODE (dst))) -+ op2 = force_reg (GET_MODE (dst), op2); -+ } -+ op1 = SUBREG_REG (op1); -+ if (!vector_operand (op1, GET_MODE (dst))) -+ op1 = force_reg (GET_MODE (dst), op1); -+ emit_insn (gen_rtx_SET (dst, -+ gen_rtx_fmt_ee (code, GET_MODE (dst), -+ op1, op2))); -+ emit_move_insn (operands[0], gen_lowpart (mode, dst)); -+ return; -+ default: -+ break; -+ } -+ } -+ if (!vector_operand (operands[1], mode)) -+ operands[1] = force_reg (mode, operands[1]); -+ if (!vector_operand (operands[2], mode)) -+ operands[2] = force_reg (mode, operands[2]); -+ ix86_fixup_binary_operands_no_copy (code, mode, operands); -+ emit_insn (gen_rtx_SET (operands[0], -+ gen_rtx_fmt_ee (code, mode, operands[1], -+ operands[2]))); -+} -+ -+/* Return TRUE or FALSE depending on whether the binary operator meets the -+ appropriate constraints. */ -+ -+bool -+ix86_binary_operator_ok (enum rtx_code code, machine_mode mode, -+ rtx operands[3]) -+{ -+ rtx dst = operands[0]; -+ rtx src1 = operands[1]; -+ rtx src2 = operands[2]; -+ -+ /* Both source operands cannot be in memory. */ -+ if (MEM_P (src1) && MEM_P (src2)) -+ return false; -+ -+ /* Canonicalize operand order for commutative operators. */ -+ if (ix86_swap_binary_operands_p (code, mode, operands)) -+ std::swap (src1, src2); -+ -+ /* If the destination is memory, we must have a matching source operand. */ -+ if (MEM_P (dst) && !rtx_equal_p (dst, src1)) -+ return false; -+ -+ /* Source 1 cannot be a constant. */ -+ if (CONSTANT_P (src1)) -+ return false; -+ -+ /* Source 1 cannot be a non-matching memory. */ -+ if (MEM_P (src1) && !rtx_equal_p (dst, src1)) -+ /* Support "andhi/andsi/anddi" as a zero-extending move. */ -+ return (code == AND -+ && (mode == HImode -+ || mode == SImode -+ || (TARGET_64BIT && mode == DImode)) -+ && satisfies_constraint_L (src2)); -+ -+ return true; -+} -+ -+/* Attempt to expand a unary operator. Make the expansion closer to the -+ actual machine, then just general_operand, which will allow 2 separate -+ memory references (one output, one input) in a single insn. */ -+ -+void -+ix86_expand_unary_operator (enum rtx_code code, machine_mode mode, -+ rtx operands[]) -+{ -+ bool matching_memory = false; -+ rtx src, dst, op, clob; -+ -+ dst = operands[0]; -+ src = operands[1]; -+ -+ /* If the destination is memory, and we do not have matching source -+ operands, do things in registers. */ -+ if (MEM_P (dst)) -+ { -+ if (rtx_equal_p (dst, src)) -+ matching_memory = true; -+ else -+ dst = gen_reg_rtx (mode); -+ } -+ -+ /* When source operand is memory, destination must match. */ -+ if (MEM_P (src) && !matching_memory) -+ src = force_reg (mode, src); -+ -+ /* Emit the instruction. */ -+ -+ op = gen_rtx_SET (dst, gen_rtx_fmt_e (code, mode, src)); -+ -+ if (code == NOT) -+ emit_insn (op); -+ else -+ { -+ clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); -+ emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); -+ } -+ -+ /* Fix up the destination if needed. */ -+ if (dst != operands[0]) -+ emit_move_insn (operands[0], dst); -+} -+ -+/* Predict just emitted jump instruction to be taken with probability PROB. */ -+ -+static void -+predict_jump (int prob) -+{ -+ rtx_insn *insn = get_last_insn (); -+ gcc_assert (JUMP_P (insn)); -+ add_reg_br_prob_note (insn, profile_probability::from_reg_br_prob_base (prob)); -+} -+ -+/* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and -+ divisor are within the range [0-255]. */ -+ -+void -+ix86_split_idivmod (machine_mode mode, rtx operands[], -+ bool signed_p) -+{ -+ rtx_code_label *end_label, *qimode_label; -+ rtx div, mod; -+ rtx_insn *insn; -+ rtx scratch, tmp0, tmp1, tmp2; -+ rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx); -+ rtx (*gen_zero_extend) (rtx, rtx); -+ rtx (*gen_test_ccno_1) (rtx, rtx); -+ -+ switch (mode) -+ { -+ case E_SImode: -+ if (GET_MODE (operands[0]) == SImode) -+ { -+ if (GET_MODE (operands[1]) == SImode) -+ gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1; -+ else -+ gen_divmod4_1 -+ = signed_p ? gen_divmodsi4_zext_2 : gen_udivmodsi4_zext_2; -+ gen_zero_extend = gen_zero_extendqisi2; -+ } -+ else -+ { -+ gen_divmod4_1 -+ = signed_p ? gen_divmodsi4_zext_1 : gen_udivmodsi4_zext_1; -+ gen_zero_extend = gen_zero_extendqidi2; -+ } -+ gen_test_ccno_1 = gen_testsi_ccno_1; -+ break; -+ case E_DImode: -+ gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1; -+ gen_test_ccno_1 = gen_testdi_ccno_1; -+ gen_zero_extend = gen_zero_extendqidi2; -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ -+ end_label = gen_label_rtx (); -+ qimode_label = gen_label_rtx (); -+ -+ scratch = gen_reg_rtx (mode); -+ -+ /* Use 8bit unsigned divimod if dividend and divisor are within -+ the range [0-255]. */ -+ emit_move_insn (scratch, operands[2]); -+ scratch = expand_simple_binop (mode, IOR, scratch, operands[3], -+ scratch, 1, OPTAB_DIRECT); -+ emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100))); -+ tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG); -+ tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx); -+ tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0, -+ gen_rtx_LABEL_REF (VOIDmode, qimode_label), -+ pc_rtx); -+ insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp0)); -+ predict_jump (REG_BR_PROB_BASE * 50 / 100); -+ JUMP_LABEL (insn) = qimode_label; -+ -+ /* Generate original signed/unsigned divimod. */ -+ div = gen_divmod4_1 (operands[0], operands[1], -+ operands[2], operands[3]); -+ emit_insn (div); -+ -+ /* Branch to the end. */ -+ emit_jump_insn (gen_jump (end_label)); -+ emit_barrier (); -+ -+ /* Generate 8bit unsigned divide. */ -+ emit_label (qimode_label); -+ /* Don't use operands[0] for result of 8bit divide since not all -+ registers support QImode ZERO_EXTRACT. */ -+ tmp0 = lowpart_subreg (HImode, scratch, mode); -+ tmp1 = lowpart_subreg (HImode, operands[2], mode); -+ tmp2 = lowpart_subreg (QImode, operands[3], mode); -+ emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2)); -+ -+ if (signed_p) -+ { -+ div = gen_rtx_DIV (mode, operands[2], operands[3]); -+ mod = gen_rtx_MOD (mode, operands[2], operands[3]); -+ } -+ else -+ { -+ div = gen_rtx_UDIV (mode, operands[2], operands[3]); -+ mod = gen_rtx_UMOD (mode, operands[2], operands[3]); -+ } -+ if (mode == SImode) -+ { -+ if (GET_MODE (operands[0]) != SImode) -+ div = gen_rtx_ZERO_EXTEND (DImode, div); -+ if (GET_MODE (operands[1]) != SImode) -+ mod = gen_rtx_ZERO_EXTEND (DImode, mod); -+ } -+ -+ /* Extract remainder from AH. */ -+ tmp1 = gen_rtx_ZERO_EXTRACT (GET_MODE (operands[1]), -+ tmp0, GEN_INT (8), GEN_INT (8)); -+ if (REG_P (operands[1])) -+ insn = emit_move_insn (operands[1], tmp1); -+ else -+ { -+ /* Need a new scratch register since the old one has result -+ of 8bit divide. */ -+ scratch = gen_reg_rtx (GET_MODE (operands[1])); -+ emit_move_insn (scratch, tmp1); -+ insn = emit_move_insn (operands[1], scratch); -+ } -+ set_unique_reg_note (insn, REG_EQUAL, mod); -+ -+ /* Zero extend quotient from AL. */ -+ tmp1 = gen_lowpart (QImode, tmp0); -+ insn = emit_insn (gen_zero_extend (operands[0], tmp1)); -+ set_unique_reg_note (insn, REG_EQUAL, div); -+ -+ emit_label (end_label); -+} -+ -+/* Emit x86 binary operand CODE in mode MODE, where the first operand -+ matches destination. RTX includes clobber of FLAGS_REG. */ -+ -+void -+ix86_emit_binop (enum rtx_code code, machine_mode mode, -+ rtx dst, rtx src) -+{ -+ rtx op, clob; -+ -+ op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, dst, src)); -+ clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); -+ -+ emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); -+} -+ -+/* Return true if regno1 def is nearest to the insn. */ -+ -+static bool -+find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2) -+{ -+ rtx_insn *prev = insn; -+ rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn)); -+ -+ if (insn == start) -+ return false; -+ while (prev && prev != start) -+ { -+ if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev)) -+ { -+ prev = PREV_INSN (prev); -+ continue; -+ } -+ if (insn_defines_reg (regno1, INVALID_REGNUM, prev)) -+ return true; -+ else if (insn_defines_reg (regno2, INVALID_REGNUM, prev)) -+ return false; -+ prev = PREV_INSN (prev); -+ } -+ -+ /* None of the regs is defined in the bb. */ -+ return false; -+} -+ -+/* Split lea instructions into a sequence of instructions -+ which are executed on ALU to avoid AGU stalls. -+ It is assumed that it is allowed to clobber flags register -+ at lea position. */ -+ -+void -+ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode) -+{ -+ unsigned int regno0, regno1, regno2; -+ struct ix86_address parts; -+ rtx target, tmp; -+ int ok, adds; -+ -+ ok = ix86_decompose_address (operands[1], &parts); -+ gcc_assert (ok); -+ -+ target = gen_lowpart (mode, operands[0]); -+ -+ regno0 = true_regnum (target); -+ regno1 = INVALID_REGNUM; -+ regno2 = INVALID_REGNUM; -+ -+ if (parts.base) -+ { -+ parts.base = gen_lowpart (mode, parts.base); -+ regno1 = true_regnum (parts.base); -+ } -+ -+ if (parts.index) -+ { -+ parts.index = gen_lowpart (mode, parts.index); -+ regno2 = true_regnum (parts.index); -+ } -+ -+ if (parts.disp) -+ parts.disp = gen_lowpart (mode, parts.disp); -+ -+ if (parts.scale > 1) -+ { -+ /* Case r1 = r1 + ... */ -+ if (regno1 == regno0) -+ { -+ /* If we have a case r1 = r1 + C * r2 then we -+ should use multiplication which is very -+ expensive. Assume cost model is wrong if we -+ have such case here. */ -+ gcc_assert (regno2 != regno0); -+ -+ for (adds = parts.scale; adds > 0; adds--) -+ ix86_emit_binop (PLUS, mode, target, parts.index); -+ } -+ else -+ { -+ /* r1 = r2 + r3 * C case. Need to move r3 into r1. */ -+ if (regno0 != regno2) -+ emit_insn (gen_rtx_SET (target, parts.index)); -+ -+ /* Use shift for scaling. */ -+ ix86_emit_binop (ASHIFT, mode, target, -+ GEN_INT (exact_log2 (parts.scale))); -+ -+ if (parts.base) -+ ix86_emit_binop (PLUS, mode, target, parts.base); -+ -+ if (parts.disp && parts.disp != const0_rtx) -+ ix86_emit_binop (PLUS, mode, target, parts.disp); -+ } -+ } -+ else if (!parts.base && !parts.index) -+ { -+ gcc_assert(parts.disp); -+ emit_insn (gen_rtx_SET (target, parts.disp)); -+ } -+ else -+ { -+ if (!parts.base) -+ { -+ if (regno0 != regno2) -+ emit_insn (gen_rtx_SET (target, parts.index)); -+ } -+ else if (!parts.index) -+ { -+ if (regno0 != regno1) -+ emit_insn (gen_rtx_SET (target, parts.base)); -+ } -+ else -+ { -+ if (regno0 == regno1) -+ tmp = parts.index; -+ else if (regno0 == regno2) -+ tmp = parts.base; -+ else -+ { -+ rtx tmp1; -+ -+ /* Find better operand for SET instruction, depending -+ on which definition is farther from the insn. */ -+ if (find_nearest_reg_def (insn, regno1, regno2)) -+ tmp = parts.index, tmp1 = parts.base; -+ else -+ tmp = parts.base, tmp1 = parts.index; -+ -+ emit_insn (gen_rtx_SET (target, tmp)); -+ -+ if (parts.disp && parts.disp != const0_rtx) -+ ix86_emit_binop (PLUS, mode, target, parts.disp); -+ -+ ix86_emit_binop (PLUS, mode, target, tmp1); -+ return; -+ } -+ -+ ix86_emit_binop (PLUS, mode, target, tmp); -+ } -+ -+ if (parts.disp && parts.disp != const0_rtx) -+ ix86_emit_binop (PLUS, mode, target, parts.disp); -+ } -+} -+ -+/* Post-reload splitter for converting an SF or DFmode value in an -+ SSE register into an unsigned SImode. */ -+ -+void -+ix86_split_convert_uns_si_sse (rtx operands[]) -+{ -+ machine_mode vecmode; -+ rtx value, large, zero_or_two31, input, two31, x; -+ -+ large = operands[1]; -+ zero_or_two31 = operands[2]; -+ input = operands[3]; -+ two31 = operands[4]; -+ vecmode = GET_MODE (large); -+ value = gen_rtx_REG (vecmode, REGNO (operands[0])); -+ -+ /* Load up the value into the low element. We must ensure that the other -+ elements are valid floats -- zero is the easiest such value. */ -+ if (MEM_P (input)) -+ { -+ if (vecmode == V4SFmode) -+ emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input)); -+ else -+ emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input)); -+ } -+ else -+ { -+ input = gen_rtx_REG (vecmode, REGNO (input)); -+ emit_move_insn (value, CONST0_RTX (vecmode)); -+ if (vecmode == V4SFmode) -+ emit_insn (gen_sse_movss (value, value, input)); -+ else -+ emit_insn (gen_sse2_movsd (value, value, input)); -+ } -+ -+ emit_move_insn (large, two31); -+ emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31); -+ -+ x = gen_rtx_fmt_ee (LE, vecmode, large, value); -+ emit_insn (gen_rtx_SET (large, x)); -+ -+ x = gen_rtx_AND (vecmode, zero_or_two31, large); -+ emit_insn (gen_rtx_SET (zero_or_two31, x)); -+ -+ x = gen_rtx_MINUS (vecmode, value, zero_or_two31); -+ emit_insn (gen_rtx_SET (value, x)); -+ -+ large = gen_rtx_REG (V4SImode, REGNO (large)); -+ emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31))); -+ -+ x = gen_rtx_REG (V4SImode, REGNO (value)); -+ if (vecmode == V4SFmode) -+ emit_insn (gen_fix_truncv4sfv4si2 (x, value)); -+ else -+ emit_insn (gen_sse2_cvttpd2dq (x, value)); -+ value = x; -+ -+ emit_insn (gen_xorv4si3 (value, value, large)); -+} -+ -+static bool ix86_expand_vector_init_one_nonzero (bool mmx_ok, -+ machine_mode mode, rtx target, -+ rtx var, int one_var); -+ -+/* Convert an unsigned DImode value into a DFmode, using only SSE. -+ Expects the 64-bit DImode to be supplied in a pair of integral -+ registers. Requires SSE2; will use SSE3 if available. For x86_32, -+ -mfpmath=sse, !optimize_size only. */ -+ -+void -+ix86_expand_convert_uns_didf_sse (rtx target, rtx input) -+{ -+ REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt; -+ rtx int_xmm, fp_xmm; -+ rtx biases, exponents; -+ rtx x; -+ -+ int_xmm = gen_reg_rtx (V4SImode); -+ if (TARGET_INTER_UNIT_MOVES_TO_VEC) -+ emit_insn (gen_movdi_to_sse (int_xmm, input)); -+ else if (TARGET_SSE_SPLIT_REGS) -+ { -+ emit_clobber (int_xmm); -+ emit_move_insn (gen_lowpart (DImode, int_xmm), input); -+ } -+ else -+ { -+ x = gen_reg_rtx (V2DImode); -+ ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0); -+ emit_move_insn (int_xmm, gen_lowpart (V4SImode, x)); -+ } -+ -+ x = gen_rtx_CONST_VECTOR (V4SImode, -+ gen_rtvec (4, GEN_INT (0x43300000UL), -+ GEN_INT (0x45300000UL), -+ const0_rtx, const0_rtx)); -+ exponents = validize_mem (force_const_mem (V4SImode, x)); -+ -+ /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */ -+ emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents)); -+ -+ /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm) -+ yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)). -+ Similarly (0x45300000UL ## fp_value_hi_xmm) yields -+ (0x1.0p84 + double(fp_value_hi_xmm)). -+ Note these exponents differ by 32. */ -+ -+ fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm)); -+ -+ /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values -+ in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */ -+ real_ldexp (&bias_lo_rvt, &dconst1, 52); -+ real_ldexp (&bias_hi_rvt, &dconst1, 84); -+ biases = const_double_from_real_value (bias_lo_rvt, DFmode); -+ x = const_double_from_real_value (bias_hi_rvt, DFmode); -+ biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x)); -+ biases = validize_mem (force_const_mem (V2DFmode, biases)); -+ emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases)); -+ -+ /* Add the upper and lower DFmode values together. */ -+ if (TARGET_SSE3) -+ emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm)); -+ else -+ { -+ x = copy_to_mode_reg (V2DFmode, fp_xmm); -+ emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm)); -+ emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x)); -+ } -+ -+ ix86_expand_vector_extract (false, target, fp_xmm, 0); -+} -+ -+/* Not used, but eases macroization of patterns. */ -+void -+ix86_expand_convert_uns_sixf_sse (rtx, rtx) -+{ -+ gcc_unreachable (); -+} -+ -+/* Convert an unsigned SImode value into a DFmode. Only currently used -+ for SSE, but applicable anywhere. */ -+ -+void -+ix86_expand_convert_uns_sidf_sse (rtx target, rtx input) -+{ -+ REAL_VALUE_TYPE TWO31r; -+ rtx x, fp; -+ -+ x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1), -+ NULL, 1, OPTAB_DIRECT); -+ -+ fp = gen_reg_rtx (DFmode); -+ emit_insn (gen_floatsidf2 (fp, x)); -+ -+ real_ldexp (&TWO31r, &dconst1, 31); -+ x = const_double_from_real_value (TWO31r, DFmode); -+ -+ x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT); -+ if (x != target) -+ emit_move_insn (target, x); -+} -+ -+/* Convert a signed DImode value into a DFmode. Only used for SSE in -+ 32-bit mode; otherwise we have a direct convert instruction. */ -+ -+void -+ix86_expand_convert_sign_didf_sse (rtx target, rtx input) -+{ -+ REAL_VALUE_TYPE TWO32r; -+ rtx fp_lo, fp_hi, x; -+ -+ fp_lo = gen_reg_rtx (DFmode); -+ fp_hi = gen_reg_rtx (DFmode); -+ -+ emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input))); -+ -+ real_ldexp (&TWO32r, &dconst1, 32); -+ x = const_double_from_real_value (TWO32r, DFmode); -+ fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT); -+ -+ ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input)); -+ -+ x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target, -+ 0, OPTAB_DIRECT); -+ if (x != target) -+ emit_move_insn (target, x); -+} -+ -+/* Convert an unsigned SImode value into a SFmode, using only SSE. -+ For x86_32, -mfpmath=sse, !optimize_size only. */ -+void -+ix86_expand_convert_uns_sisf_sse (rtx target, rtx input) -+{ -+ REAL_VALUE_TYPE ONE16r; -+ rtx fp_hi, fp_lo, int_hi, int_lo, x; -+ -+ real_ldexp (&ONE16r, &dconst1, 16); -+ x = const_double_from_real_value (ONE16r, SFmode); -+ int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff), -+ NULL, 0, OPTAB_DIRECT); -+ int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16), -+ NULL, 0, OPTAB_DIRECT); -+ fp_hi = gen_reg_rtx (SFmode); -+ fp_lo = gen_reg_rtx (SFmode); -+ emit_insn (gen_floatsisf2 (fp_hi, int_hi)); -+ emit_insn (gen_floatsisf2 (fp_lo, int_lo)); -+ fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi, -+ 0, OPTAB_DIRECT); -+ fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target, -+ 0, OPTAB_DIRECT); -+ if (!rtx_equal_p (target, fp_hi)) -+ emit_move_insn (target, fp_hi); -+} -+ -+/* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert -+ a vector of unsigned ints VAL to vector of floats TARGET. */ -+ -+void -+ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val) -+{ -+ rtx tmp[8]; -+ REAL_VALUE_TYPE TWO16r; -+ machine_mode intmode = GET_MODE (val); -+ machine_mode fltmode = GET_MODE (target); -+ rtx (*cvt) (rtx, rtx); -+ -+ if (intmode == V4SImode) -+ cvt = gen_floatv4siv4sf2; -+ else -+ cvt = gen_floatv8siv8sf2; -+ tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff)); -+ tmp[0] = force_reg (intmode, tmp[0]); -+ tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1, -+ OPTAB_DIRECT); -+ tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16), -+ NULL_RTX, 1, OPTAB_DIRECT); -+ tmp[3] = gen_reg_rtx (fltmode); -+ emit_insn (cvt (tmp[3], tmp[1])); -+ tmp[4] = gen_reg_rtx (fltmode); -+ emit_insn (cvt (tmp[4], tmp[2])); -+ real_ldexp (&TWO16r, &dconst1, 16); -+ tmp[5] = const_double_from_real_value (TWO16r, SFmode); -+ tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5])); -+ tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1, -+ OPTAB_DIRECT); -+ tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1, -+ OPTAB_DIRECT); -+ if (tmp[7] != target) -+ emit_move_insn (target, tmp[7]); -+} -+ -+/* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc* -+ pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*. -+ This is done by doing just signed conversion if < 0x1p31, and otherwise by -+ subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */ -+ -+rtx -+ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp) -+{ -+ REAL_VALUE_TYPE TWO31r; -+ rtx two31r, tmp[4]; -+ machine_mode mode = GET_MODE (val); -+ machine_mode scalarmode = GET_MODE_INNER (mode); -+ machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode; -+ rtx (*cmp) (rtx, rtx, rtx, rtx); -+ int i; -+ -+ for (i = 0; i < 3; i++) -+ tmp[i] = gen_reg_rtx (mode); -+ real_ldexp (&TWO31r, &dconst1, 31); -+ two31r = const_double_from_real_value (TWO31r, scalarmode); -+ two31r = ix86_build_const_vector (mode, 1, two31r); -+ two31r = force_reg (mode, two31r); -+ switch (mode) -+ { -+ case E_V8SFmode: cmp = gen_avx_maskcmpv8sf3; break; -+ case E_V4SFmode: cmp = gen_sse_maskcmpv4sf3; break; -+ case E_V4DFmode: cmp = gen_avx_maskcmpv4df3; break; -+ case E_V2DFmode: cmp = gen_sse2_maskcmpv2df3; break; -+ default: gcc_unreachable (); -+ } -+ tmp[3] = gen_rtx_LE (mode, two31r, val); -+ emit_insn (cmp (tmp[0], two31r, val, tmp[3])); -+ tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1], -+ 0, OPTAB_DIRECT); -+ if (intmode == V4SImode || TARGET_AVX2) -+ *xorp = expand_simple_binop (intmode, ASHIFT, -+ gen_lowpart (intmode, tmp[0]), -+ GEN_INT (31), NULL_RTX, 0, -+ OPTAB_DIRECT); -+ else -+ { -+ rtx two31 = GEN_INT (HOST_WIDE_INT_1U << 31); -+ two31 = ix86_build_const_vector (intmode, 1, two31); -+ *xorp = expand_simple_binop (intmode, AND, -+ gen_lowpart (intmode, tmp[0]), -+ two31, NULL_RTX, 0, -+ OPTAB_DIRECT); -+ } -+ return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2], -+ 0, OPTAB_DIRECT); -+} -+ -+/* Generate code for floating point ABS or NEG. */ -+ -+void -+ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode, -+ rtx operands[]) -+{ -+ rtx mask, set, dst, src; -+ bool use_sse = false; -+ bool vector_mode = VECTOR_MODE_P (mode); -+ machine_mode vmode = mode; -+ -+ if (vector_mode) -+ use_sse = true; -+ else if (mode == TFmode) -+ use_sse = true; -+ else if (TARGET_SSE_MATH) -+ { -+ use_sse = SSE_FLOAT_MODE_P (mode); -+ if (mode == SFmode) -+ vmode = V4SFmode; -+ else if (mode == DFmode) -+ vmode = V2DFmode; -+ } -+ -+ /* NEG and ABS performed with SSE use bitwise mask operations. -+ Create the appropriate mask now. */ -+ if (use_sse) -+ mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS); -+ else -+ mask = NULL_RTX; -+ -+ dst = operands[0]; -+ src = operands[1]; -+ -+ set = gen_rtx_fmt_e (code, mode, src); -+ set = gen_rtx_SET (dst, set); -+ -+ if (mask) -+ { -+ rtx use, clob; -+ rtvec par; -+ -+ use = gen_rtx_USE (VOIDmode, mask); -+ if (vector_mode) -+ par = gen_rtvec (2, set, use); -+ else -+ { -+ clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); -+ par = gen_rtvec (3, set, use, clob); -+ } -+ emit_insn (gen_rtx_PARALLEL (VOIDmode, par)); -+ } -+ else -+ emit_insn (set); -+} -+ -+/* Expand a copysign operation. Special case operand 0 being a constant. */ -+ -+void -+ix86_expand_copysign (rtx operands[]) -+{ -+ machine_mode mode, vmode; -+ rtx dest, op0, op1, mask, nmask; -+ -+ dest = operands[0]; -+ op0 = operands[1]; -+ op1 = operands[2]; -+ -+ mode = GET_MODE (dest); -+ -+ if (mode == SFmode) -+ vmode = V4SFmode; -+ else if (mode == DFmode) -+ vmode = V2DFmode; -+ else -+ vmode = mode; -+ -+ if (CONST_DOUBLE_P (op0)) -+ { -+ rtx (*copysign_insn)(rtx, rtx, rtx, rtx); -+ -+ if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0))) -+ op0 = simplify_unary_operation (ABS, mode, op0, mode); -+ -+ if (mode == SFmode || mode == DFmode) -+ { -+ if (op0 == CONST0_RTX (mode)) -+ op0 = CONST0_RTX (vmode); -+ else -+ { -+ rtx v = ix86_build_const_vector (vmode, false, op0); -+ -+ op0 = force_reg (vmode, v); -+ } -+ } -+ else if (op0 != CONST0_RTX (mode)) -+ op0 = force_reg (mode, op0); -+ -+ mask = ix86_build_signbit_mask (vmode, 0, 0); -+ -+ if (mode == SFmode) -+ copysign_insn = gen_copysignsf3_const; -+ else if (mode == DFmode) -+ copysign_insn = gen_copysigndf3_const; -+ else -+ copysign_insn = gen_copysigntf3_const; -+ -+ emit_insn (copysign_insn (dest, op0, op1, mask)); -+ } -+ else -+ { -+ rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx); -+ -+ nmask = ix86_build_signbit_mask (vmode, 0, 1); -+ mask = ix86_build_signbit_mask (vmode, 0, 0); -+ -+ if (mode == SFmode) -+ copysign_insn = gen_copysignsf3_var; -+ else if (mode == DFmode) -+ copysign_insn = gen_copysigndf3_var; -+ else -+ copysign_insn = gen_copysigntf3_var; -+ -+ emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask)); -+ } -+} -+ -+/* Deconstruct a copysign operation into bit masks. Operand 0 is known to -+ be a constant, and so has already been expanded into a vector constant. */ -+ -+void -+ix86_split_copysign_const (rtx operands[]) -+{ -+ machine_mode mode, vmode; -+ rtx dest, op0, mask, x; -+ -+ dest = operands[0]; -+ op0 = operands[1]; -+ mask = operands[3]; -+ -+ mode = GET_MODE (dest); -+ vmode = GET_MODE (mask); -+ -+ dest = lowpart_subreg (vmode, dest, mode); -+ x = gen_rtx_AND (vmode, dest, mask); -+ emit_insn (gen_rtx_SET (dest, x)); -+ -+ if (op0 != CONST0_RTX (vmode)) -+ { -+ x = gen_rtx_IOR (vmode, dest, op0); -+ emit_insn (gen_rtx_SET (dest, x)); -+ } -+} -+ -+/* Deconstruct a copysign operation into bit masks. Operand 0 is variable, -+ so we have to do two masks. */ -+ -+void -+ix86_split_copysign_var (rtx operands[]) -+{ -+ machine_mode mode, vmode; -+ rtx dest, scratch, op0, op1, mask, nmask, x; -+ -+ dest = operands[0]; -+ scratch = operands[1]; -+ op0 = operands[2]; -+ op1 = operands[3]; -+ nmask = operands[4]; -+ mask = operands[5]; -+ -+ mode = GET_MODE (dest); -+ vmode = GET_MODE (mask); -+ -+ if (rtx_equal_p (op0, op1)) -+ { -+ /* Shouldn't happen often (it's useless, obviously), but when it does -+ we'd generate incorrect code if we continue below. */ -+ emit_move_insn (dest, op0); -+ return; -+ } -+ -+ if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */ -+ { -+ gcc_assert (REGNO (op1) == REGNO (scratch)); -+ -+ x = gen_rtx_AND (vmode, scratch, mask); -+ emit_insn (gen_rtx_SET (scratch, x)); -+ -+ dest = mask; -+ op0 = lowpart_subreg (vmode, op0, mode); -+ x = gen_rtx_NOT (vmode, dest); -+ x = gen_rtx_AND (vmode, x, op0); -+ emit_insn (gen_rtx_SET (dest, x)); -+ } -+ else -+ { -+ if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */ -+ { -+ x = gen_rtx_AND (vmode, scratch, mask); -+ } -+ else /* alternative 2,4 */ -+ { -+ gcc_assert (REGNO (mask) == REGNO (scratch)); -+ op1 = lowpart_subreg (vmode, op1, mode); -+ x = gen_rtx_AND (vmode, scratch, op1); -+ } -+ emit_insn (gen_rtx_SET (scratch, x)); -+ -+ if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */ -+ { -+ dest = lowpart_subreg (vmode, op0, mode); -+ x = gen_rtx_AND (vmode, dest, nmask); -+ } -+ else /* alternative 3,4 */ -+ { -+ gcc_assert (REGNO (nmask) == REGNO (dest)); -+ dest = nmask; -+ op0 = lowpart_subreg (vmode, op0, mode); -+ x = gen_rtx_AND (vmode, dest, op0); -+ } -+ emit_insn (gen_rtx_SET (dest, x)); -+ } -+ -+ x = gen_rtx_IOR (vmode, dest, scratch); -+ emit_insn (gen_rtx_SET (dest, x)); -+} -+ -+/* Expand an xorsign operation. */ -+ -+void -+ix86_expand_xorsign (rtx operands[]) -+{ -+ rtx (*xorsign_insn)(rtx, rtx, rtx, rtx); -+ machine_mode mode, vmode; -+ rtx dest, op0, op1, mask; -+ -+ dest = operands[0]; -+ op0 = operands[1]; -+ op1 = operands[2]; -+ -+ mode = GET_MODE (dest); -+ -+ if (mode == SFmode) -+ { -+ xorsign_insn = gen_xorsignsf3_1; -+ vmode = V4SFmode; -+ } -+ else if (mode == DFmode) -+ { -+ xorsign_insn = gen_xorsigndf3_1; -+ vmode = V2DFmode; -+ } -+ else -+ gcc_unreachable (); -+ -+ mask = ix86_build_signbit_mask (vmode, 0, 0); -+ -+ emit_insn (xorsign_insn (dest, op0, op1, mask)); -+} -+ -+/* Deconstruct an xorsign operation into bit masks. */ -+ -+void -+ix86_split_xorsign (rtx operands[]) -+{ -+ machine_mode mode, vmode; -+ rtx dest, op0, mask, x; -+ -+ dest = operands[0]; -+ op0 = operands[1]; -+ mask = operands[3]; -+ -+ mode = GET_MODE (dest); -+ vmode = GET_MODE (mask); -+ -+ dest = lowpart_subreg (vmode, dest, mode); -+ x = gen_rtx_AND (vmode, dest, mask); -+ emit_insn (gen_rtx_SET (dest, x)); -+ -+ op0 = lowpart_subreg (vmode, op0, mode); -+ x = gen_rtx_XOR (vmode, dest, op0); -+ emit_insn (gen_rtx_SET (dest, x)); -+} -+ -+static rtx ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1); -+ -+void -+ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label) -+{ -+ machine_mode mode = GET_MODE (op0); -+ rtx tmp; -+ -+ /* Handle special case - vector comparsion with boolean result, transform -+ it using ptest instruction. */ -+ if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) -+ { -+ rtx flag = gen_rtx_REG (CCZmode, FLAGS_REG); -+ machine_mode p_mode = GET_MODE_SIZE (mode) == 32 ? V4DImode : V2DImode; -+ -+ gcc_assert (code == EQ || code == NE); -+ /* Generate XOR since we can't check that one operand is zero vector. */ -+ tmp = gen_reg_rtx (mode); -+ emit_insn (gen_rtx_SET (tmp, gen_rtx_XOR (mode, op0, op1))); -+ tmp = gen_lowpart (p_mode, tmp); -+ emit_insn (gen_rtx_SET (gen_rtx_REG (CCmode, FLAGS_REG), -+ gen_rtx_UNSPEC (CCmode, -+ gen_rtvec (2, tmp, tmp), -+ UNSPEC_PTEST))); -+ tmp = gen_rtx_fmt_ee (code, VOIDmode, flag, const0_rtx); -+ tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, -+ gen_rtx_LABEL_REF (VOIDmode, label), -+ pc_rtx); -+ emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); -+ return; -+ } -+ -+ switch (mode) -+ { -+ case E_SFmode: -+ case E_DFmode: -+ case E_XFmode: -+ case E_QImode: -+ case E_HImode: -+ case E_SImode: -+ simple: -+ tmp = ix86_expand_compare (code, op0, op1); -+ tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, -+ gen_rtx_LABEL_REF (VOIDmode, label), -+ pc_rtx); -+ emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); -+ return; -+ -+ case E_DImode: -+ if (TARGET_64BIT) -+ goto simple; -+ /* For 32-bit target DI comparison may be performed on -+ SSE registers. To allow this we should avoid split -+ to SI mode which is achieved by doing xor in DI mode -+ and then comparing with zero (which is recognized by -+ STV pass). We don't compare using xor when optimizing -+ for size. */ -+ if (!optimize_insn_for_size_p () -+ && TARGET_STV -+ && (code == EQ || code == NE)) -+ { -+ op0 = force_reg (mode, gen_rtx_XOR (mode, op0, op1)); -+ op1 = const0_rtx; -+ } -+ /* FALLTHRU */ -+ case E_TImode: -+ /* Expand DImode branch into multiple compare+branch. */ -+ { -+ rtx lo[2], hi[2]; -+ rtx_code_label *label2; -+ enum rtx_code code1, code2, code3; -+ machine_mode submode; -+ -+ if (CONSTANT_P (op0) && !CONSTANT_P (op1)) -+ { -+ std::swap (op0, op1); -+ code = swap_condition (code); -+ } -+ -+ split_double_mode (mode, &op0, 1, lo+0, hi+0); -+ split_double_mode (mode, &op1, 1, lo+1, hi+1); -+ -+ submode = mode == DImode ? SImode : DImode; -+ -+ /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to -+ avoid two branches. This costs one extra insn, so disable when -+ optimizing for size. */ -+ -+ if ((code == EQ || code == NE) -+ && (!optimize_insn_for_size_p () -+ || hi[1] == const0_rtx || lo[1] == const0_rtx)) -+ { -+ rtx xor0, xor1; -+ -+ xor1 = hi[0]; -+ if (hi[1] != const0_rtx) -+ xor1 = expand_binop (submode, xor_optab, xor1, hi[1], -+ NULL_RTX, 0, OPTAB_WIDEN); -+ -+ xor0 = lo[0]; -+ if (lo[1] != const0_rtx) -+ xor0 = expand_binop (submode, xor_optab, xor0, lo[1], -+ NULL_RTX, 0, OPTAB_WIDEN); -+ -+ tmp = expand_binop (submode, ior_optab, xor1, xor0, -+ NULL_RTX, 0, OPTAB_WIDEN); -+ -+ ix86_expand_branch (code, tmp, const0_rtx, label); -+ return; -+ } -+ -+ /* Otherwise, if we are doing less-than or greater-or-equal-than, -+ op1 is a constant and the low word is zero, then we can just -+ examine the high word. Similarly for low word -1 and -+ less-or-equal-than or greater-than. */ -+ -+ if (CONST_INT_P (hi[1])) -+ switch (code) -+ { -+ case LT: case LTU: case GE: case GEU: -+ if (lo[1] == const0_rtx) -+ { -+ ix86_expand_branch (code, hi[0], hi[1], label); -+ return; -+ } -+ break; -+ case LE: case LEU: case GT: case GTU: -+ if (lo[1] == constm1_rtx) -+ { -+ ix86_expand_branch (code, hi[0], hi[1], label); -+ return; -+ } -+ break; -+ default: -+ break; -+ } -+ -+ /* Emulate comparisons that do not depend on Zero flag with -+ double-word subtraction. Note that only Overflow, Sign -+ and Carry flags are valid, so swap arguments and condition -+ of comparisons that would otherwise test Zero flag. */ -+ -+ switch (code) -+ { -+ case LE: case LEU: case GT: case GTU: -+ std::swap (lo[0], lo[1]); -+ std::swap (hi[0], hi[1]); -+ code = swap_condition (code); -+ /* FALLTHRU */ -+ -+ case LT: case LTU: case GE: case GEU: -+ { -+ rtx (*cmp_insn) (rtx, rtx); -+ rtx (*sbb_insn) (rtx, rtx, rtx); -+ bool uns = (code == LTU || code == GEU); -+ -+ if (TARGET_64BIT) -+ { -+ cmp_insn = gen_cmpdi_1; -+ sbb_insn -+ = uns ? gen_subdi3_carry_ccc : gen_subdi3_carry_ccgz; -+ } -+ else -+ { -+ cmp_insn = gen_cmpsi_1; -+ sbb_insn -+ = uns ? gen_subsi3_carry_ccc : gen_subsi3_carry_ccgz; -+ } -+ -+ if (!nonimmediate_operand (lo[0], submode)) -+ lo[0] = force_reg (submode, lo[0]); -+ if (!x86_64_general_operand (lo[1], submode)) -+ lo[1] = force_reg (submode, lo[1]); -+ -+ if (!register_operand (hi[0], submode)) -+ hi[0] = force_reg (submode, hi[0]); -+ if ((uns && !nonimmediate_operand (hi[1], submode)) -+ || (!uns && !x86_64_general_operand (hi[1], submode))) -+ hi[1] = force_reg (submode, hi[1]); -+ -+ emit_insn (cmp_insn (lo[0], lo[1])); -+ emit_insn (sbb_insn (gen_rtx_SCRATCH (submode), hi[0], hi[1])); -+ -+ tmp = gen_rtx_REG (uns ? CCCmode : CCGZmode, FLAGS_REG); -+ -+ ix86_expand_branch (code, tmp, const0_rtx, label); -+ return; -+ } -+ -+ default: -+ break; -+ } -+ -+ /* Otherwise, we need two or three jumps. */ -+ -+ label2 = gen_label_rtx (); -+ -+ code1 = code; -+ code2 = swap_condition (code); -+ code3 = unsigned_condition (code); -+ -+ switch (code) -+ { -+ case LT: case GT: case LTU: case GTU: -+ break; -+ -+ case LE: code1 = LT; code2 = GT; break; -+ case GE: code1 = GT; code2 = LT; break; -+ case LEU: code1 = LTU; code2 = GTU; break; -+ case GEU: code1 = GTU; code2 = LTU; break; -+ -+ case EQ: code1 = UNKNOWN; code2 = NE; break; -+ case NE: code2 = UNKNOWN; break; -+ -+ default: -+ gcc_unreachable (); -+ } -+ -+ /* -+ * a < b => -+ * if (hi(a) < hi(b)) goto true; -+ * if (hi(a) > hi(b)) goto false; -+ * if (lo(a) < lo(b)) goto true; -+ * false: -+ */ -+ -+ if (code1 != UNKNOWN) -+ ix86_expand_branch (code1, hi[0], hi[1], label); -+ if (code2 != UNKNOWN) -+ ix86_expand_branch (code2, hi[0], hi[1], label2); -+ -+ ix86_expand_branch (code3, lo[0], lo[1], label); -+ -+ if (code2 != UNKNOWN) -+ emit_label (label2); -+ return; -+ } -+ -+ default: -+ gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC); -+ goto simple; -+ } -+} -+ -+/* Figure out whether to use unordered fp comparisons. */ -+ -+static bool -+ix86_unordered_fp_compare (enum rtx_code code) -+{ -+ if (!TARGET_IEEE_FP) -+ return false; -+ -+ switch (code) -+ { -+ case GT: -+ case GE: -+ case LT: -+ case LE: -+ return false; -+ -+ case EQ: -+ case NE: -+ -+ case LTGT: -+ case UNORDERED: -+ case ORDERED: -+ case UNLT: -+ case UNLE: -+ case UNGT: -+ case UNGE: -+ case UNEQ: -+ return true; -+ -+ default: -+ gcc_unreachable (); -+ } -+} -+ -+/* Return a comparison we can do and that it is equivalent to -+ swap_condition (code) apart possibly from orderedness. -+ But, never change orderedness if TARGET_IEEE_FP, returning -+ UNKNOWN in that case if necessary. */ -+ -+static enum rtx_code -+ix86_fp_swap_condition (enum rtx_code code) -+{ -+ switch (code) -+ { -+ case GT: /* GTU - CF=0 & ZF=0 */ -+ return TARGET_IEEE_FP ? UNKNOWN : UNLT; -+ case GE: /* GEU - CF=0 */ -+ return TARGET_IEEE_FP ? UNKNOWN : UNLE; -+ case UNLT: /* LTU - CF=1 */ -+ return TARGET_IEEE_FP ? UNKNOWN : GT; -+ case UNLE: /* LEU - CF=1 | ZF=1 */ -+ return TARGET_IEEE_FP ? UNKNOWN : GE; -+ default: -+ return swap_condition (code); -+ } -+} -+ -+/* Return cost of comparison CODE using the best strategy for performance. -+ All following functions do use number of instructions as a cost metrics. -+ In future this should be tweaked to compute bytes for optimize_size and -+ take into account performance of various instructions on various CPUs. */ -+ -+static int -+ix86_fp_comparison_cost (enum rtx_code code) -+{ -+ int arith_cost; -+ -+ /* The cost of code using bit-twiddling on %ah. */ -+ switch (code) -+ { -+ case UNLE: -+ case UNLT: -+ case LTGT: -+ case GT: -+ case GE: -+ case UNORDERED: -+ case ORDERED: -+ case UNEQ: -+ arith_cost = 4; -+ break; -+ case LT: -+ case NE: -+ case EQ: -+ case UNGE: -+ arith_cost = TARGET_IEEE_FP ? 5 : 4; -+ break; -+ case LE: -+ case UNGT: -+ arith_cost = TARGET_IEEE_FP ? 6 : 4; -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ -+ switch (ix86_fp_comparison_strategy (code)) -+ { -+ case IX86_FPCMP_COMI: -+ return arith_cost > 4 ? 3 : 2; -+ case IX86_FPCMP_SAHF: -+ return arith_cost > 4 ? 4 : 3; -+ default: -+ return arith_cost; -+ } -+} -+ -+/* Swap, force into registers, or otherwise massage the two operands -+ to a fp comparison. The operands are updated in place; the new -+ comparison code is returned. */ -+ -+static enum rtx_code -+ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1) -+{ -+ bool unordered_compare = ix86_unordered_fp_compare (code); -+ rtx op0 = *pop0, op1 = *pop1; -+ machine_mode op_mode = GET_MODE (op0); -+ bool is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode); -+ -+ /* All of the unordered compare instructions only work on registers. -+ The same is true of the fcomi compare instructions. The XFmode -+ compare instructions require registers except when comparing -+ against zero or when converting operand 1 from fixed point to -+ floating point. */ -+ -+ if (!is_sse -+ && (unordered_compare -+ || (op_mode == XFmode -+ && ! (standard_80387_constant_p (op0) == 1 -+ || standard_80387_constant_p (op1) == 1) -+ && GET_CODE (op1) != FLOAT) -+ || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI)) -+ { -+ op0 = force_reg (op_mode, op0); -+ op1 = force_reg (op_mode, op1); -+ } -+ else -+ { -+ /* %%% We only allow op1 in memory; op0 must be st(0). So swap -+ things around if they appear profitable, otherwise force op0 -+ into a register. */ -+ -+ if (standard_80387_constant_p (op0) == 0 -+ || (MEM_P (op0) -+ && ! (standard_80387_constant_p (op1) == 0 -+ || MEM_P (op1)))) -+ { -+ enum rtx_code new_code = ix86_fp_swap_condition (code); -+ if (new_code != UNKNOWN) -+ { -+ std::swap (op0, op1); -+ code = new_code; -+ } -+ } -+ -+ if (!REG_P (op0)) -+ op0 = force_reg (op_mode, op0); -+ -+ if (CONSTANT_P (op1)) -+ { -+ int tmp = standard_80387_constant_p (op1); -+ if (tmp == 0) -+ op1 = validize_mem (force_const_mem (op_mode, op1)); -+ else if (tmp == 1) -+ { -+ if (TARGET_CMOVE) -+ op1 = force_reg (op_mode, op1); -+ } -+ else -+ op1 = force_reg (op_mode, op1); -+ } -+ } -+ -+ /* Try to rearrange the comparison to make it cheaper. */ -+ if (ix86_fp_comparison_cost (code) -+ > ix86_fp_comparison_cost (swap_condition (code)) -+ && (REG_P (op1) || can_create_pseudo_p ())) -+ { -+ std::swap (op0, op1); -+ code = swap_condition (code); -+ if (!REG_P (op0)) -+ op0 = force_reg (op_mode, op0); -+ } -+ -+ *pop0 = op0; -+ *pop1 = op1; -+ return code; -+} -+ -+/* Generate insn patterns to do a floating point compare of OPERANDS. */ -+ -+static rtx -+ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1) -+{ -+ bool unordered_compare = ix86_unordered_fp_compare (code); -+ machine_mode cmp_mode; -+ rtx tmp, scratch; -+ -+ code = ix86_prepare_fp_compare_args (code, &op0, &op1); -+ -+ tmp = gen_rtx_COMPARE (CCFPmode, op0, op1); -+ if (unordered_compare) -+ tmp = gen_rtx_UNSPEC (CCFPmode, gen_rtvec (1, tmp), UNSPEC_NOTRAP); -+ -+ /* Do fcomi/sahf based test when profitable. */ -+ switch (ix86_fp_comparison_strategy (code)) -+ { -+ case IX86_FPCMP_COMI: -+ cmp_mode = CCFPmode; -+ emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode, FLAGS_REG), tmp)); -+ break; -+ -+ case IX86_FPCMP_SAHF: -+ cmp_mode = CCFPmode; -+ tmp = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW); -+ scratch = gen_reg_rtx (HImode); -+ emit_insn (gen_rtx_SET (scratch, tmp)); -+ emit_insn (gen_x86_sahf_1 (scratch)); -+ break; -+ -+ case IX86_FPCMP_ARITH: -+ cmp_mode = CCNOmode; -+ tmp = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW); -+ scratch = gen_reg_rtx (HImode); -+ emit_insn (gen_rtx_SET (scratch, tmp)); -+ -+ /* In the unordered case, we have to check C2 for NaN's, which -+ doesn't happen to work out to anything nice combination-wise. -+ So do some bit twiddling on the value we've got in AH to come -+ up with an appropriate set of condition codes. */ -+ -+ switch (code) -+ { -+ case GT: -+ case UNGT: -+ if (code == GT || !TARGET_IEEE_FP) -+ { -+ emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x45))); -+ code = EQ; -+ } -+ else -+ { -+ emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45))); -+ emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx)); -+ emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44))); -+ cmp_mode = CCmode; -+ code = GEU; -+ } -+ break; -+ case LT: -+ case UNLT: -+ if (code == LT && TARGET_IEEE_FP) -+ { -+ emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45))); -+ emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx)); -+ cmp_mode = CCmode; -+ code = EQ; -+ } -+ else -+ { -+ emit_insn (gen_testqi_ext_1_ccno (scratch, const1_rtx)); -+ code = NE; -+ } -+ break; -+ case GE: -+ case UNGE: -+ if (code == GE || !TARGET_IEEE_FP) -+ { -+ emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x05))); -+ code = EQ; -+ } -+ else -+ { -+ emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45))); -+ emit_insn (gen_xorqi_ext_1_cc (scratch, scratch, const1_rtx)); -+ code = NE; -+ } -+ break; -+ case LE: -+ case UNLE: -+ if (code == LE && TARGET_IEEE_FP) -+ { -+ emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45))); -+ emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx)); -+ emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40))); -+ cmp_mode = CCmode; -+ code = LTU; -+ } -+ else -+ { -+ emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x45))); -+ code = NE; -+ } -+ break; -+ case EQ: -+ case UNEQ: -+ if (code == EQ && TARGET_IEEE_FP) -+ { -+ emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45))); -+ emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40))); -+ cmp_mode = CCmode; -+ code = EQ; -+ } -+ else -+ { -+ emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x40))); -+ code = NE; -+ } -+ break; -+ case NE: -+ case LTGT: -+ if (code == NE && TARGET_IEEE_FP) -+ { -+ emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45))); -+ emit_insn (gen_xorqi_ext_1_cc (scratch, scratch, -+ GEN_INT (0x40))); -+ code = NE; -+ } -+ else -+ { -+ emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x40))); -+ code = EQ; -+ } -+ break; -+ -+ case UNORDERED: -+ emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x04))); -+ code = NE; -+ break; -+ case ORDERED: -+ emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x04))); -+ code = EQ; -+ break; -+ -+ default: -+ gcc_unreachable (); -+ } -+ break; -+ -+ default: -+ gcc_unreachable(); -+ } -+ -+ /* Return the test that should be put into the flags user, i.e. -+ the bcc, scc, or cmov instruction. */ -+ return gen_rtx_fmt_ee (code, VOIDmode, -+ gen_rtx_REG (cmp_mode, FLAGS_REG), -+ const0_rtx); -+} -+ -+/* Generate insn patterns to do an integer compare of OPERANDS. */ -+ -+static rtx -+ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1) -+{ -+ machine_mode cmpmode; -+ rtx tmp, flags; -+ -+ cmpmode = SELECT_CC_MODE (code, op0, op1); -+ flags = gen_rtx_REG (cmpmode, FLAGS_REG); -+ -+ /* This is very simple, but making the interface the same as in the -+ FP case makes the rest of the code easier. */ -+ tmp = gen_rtx_COMPARE (cmpmode, op0, op1); -+ emit_insn (gen_rtx_SET (flags, tmp)); -+ -+ /* Return the test that should be put into the flags user, i.e. -+ the bcc, scc, or cmov instruction. */ -+ return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx); -+} -+ -+static rtx -+ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1) -+{ -+ rtx ret; -+ -+ if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC) -+ ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1); -+ -+ else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0))) -+ { -+ gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0))); -+ ret = ix86_expand_fp_compare (code, op0, op1); -+ } -+ else -+ ret = ix86_expand_int_compare (code, op0, op1); -+ -+ return ret; -+} -+ -+void -+ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1) -+{ -+ rtx ret; -+ -+ gcc_assert (GET_MODE (dest) == QImode); -+ -+ ret = ix86_expand_compare (code, op0, op1); -+ PUT_MODE (ret, QImode); -+ emit_insn (gen_rtx_SET (dest, ret)); -+} -+ -+/* Expand comparison setting or clearing carry flag. Return true when -+ successful and set pop for the operation. */ -+static bool -+ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop) -+{ -+ machine_mode mode -+ = GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1); -+ -+ /* Do not handle double-mode compares that go through special path. */ -+ if (mode == (TARGET_64BIT ? TImode : DImode)) -+ return false; -+ -+ if (SCALAR_FLOAT_MODE_P (mode)) -+ { -+ rtx compare_op; -+ rtx_insn *compare_seq; -+ -+ gcc_assert (!DECIMAL_FLOAT_MODE_P (mode)); -+ -+ /* Shortcut: following common codes never translate -+ into carry flag compares. */ -+ if (code == EQ || code == NE || code == UNEQ || code == LTGT -+ || code == ORDERED || code == UNORDERED) -+ return false; -+ -+ /* These comparisons require zero flag; swap operands so they won't. */ -+ if ((code == GT || code == UNLE || code == LE || code == UNGT) -+ && !TARGET_IEEE_FP) -+ { -+ std::swap (op0, op1); -+ code = swap_condition (code); -+ } -+ -+ /* Try to expand the comparison and verify that we end up with -+ carry flag based comparison. This fails to be true only when -+ we decide to expand comparison using arithmetic that is not -+ too common scenario. */ -+ start_sequence (); -+ compare_op = ix86_expand_fp_compare (code, op0, op1); -+ compare_seq = get_insns (); -+ end_sequence (); -+ -+ if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode) -+ code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op)); -+ else -+ code = GET_CODE (compare_op); -+ -+ if (code != LTU && code != GEU) -+ return false; -+ -+ emit_insn (compare_seq); -+ *pop = compare_op; -+ return true; -+ } -+ -+ if (!INTEGRAL_MODE_P (mode)) -+ return false; -+ -+ switch (code) -+ { -+ case LTU: -+ case GEU: -+ break; -+ -+ /* Convert a==0 into (unsigned)a<1. */ -+ case EQ: -+ case NE: -+ if (op1 != const0_rtx) -+ return false; -+ op1 = const1_rtx; -+ code = (code == EQ ? LTU : GEU); -+ break; -+ -+ /* Convert a>b into b=b-1. */ -+ case GTU: -+ case LEU: -+ if (CONST_INT_P (op1)) -+ { -+ op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0)); -+ /* Bail out on overflow. We still can swap operands but that -+ would force loading of the constant into register. */ -+ if (op1 == const0_rtx -+ || !x86_64_immediate_operand (op1, GET_MODE (op1))) -+ return false; -+ code = (code == GTU ? GEU : LTU); -+ } -+ else -+ { -+ std::swap (op0, op1); -+ code = (code == GTU ? LTU : GEU); -+ } -+ break; -+ -+ /* Convert a>=0 into (unsigned)a<0x80000000. */ -+ case LT: -+ case GE: -+ if (mode == DImode || op1 != const0_rtx) -+ return false; -+ op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode); -+ code = (code == LT ? GEU : LTU); -+ break; -+ case LE: -+ case GT: -+ if (mode == DImode || op1 != constm1_rtx) -+ return false; -+ op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode); -+ code = (code == LE ? GEU : LTU); -+ break; -+ -+ default: -+ return false; -+ } -+ /* Swapping operands may cause constant to appear as first operand. */ -+ if (!nonimmediate_operand (op0, VOIDmode)) -+ { -+ if (!can_create_pseudo_p ()) -+ return false; -+ op0 = force_reg (mode, op0); -+ } -+ *pop = ix86_expand_compare (code, op0, op1); -+ gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU); -+ return true; -+} -+ -+/* Expand conditional increment or decrement using adb/sbb instructions. -+ The default case using setcc followed by the conditional move can be -+ done by generic code. */ -+bool -+ix86_expand_int_addcc (rtx operands[]) -+{ -+ enum rtx_code code = GET_CODE (operands[1]); -+ rtx flags; -+ rtx (*insn)(rtx, rtx, rtx, rtx, rtx); -+ rtx compare_op; -+ rtx val = const0_rtx; -+ bool fpcmp = false; -+ machine_mode mode; -+ rtx op0 = XEXP (operands[1], 0); -+ rtx op1 = XEXP (operands[1], 1); -+ -+ if (operands[3] != const1_rtx -+ && operands[3] != constm1_rtx) -+ return false; -+ if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op)) -+ return false; -+ code = GET_CODE (compare_op); -+ -+ flags = XEXP (compare_op, 0); -+ -+ if (GET_MODE (flags) == CCFPmode) -+ { -+ fpcmp = true; -+ code = ix86_fp_compare_code_to_integer (code); -+ } -+ -+ if (code != LTU) -+ { -+ val = constm1_rtx; -+ if (fpcmp) -+ PUT_CODE (compare_op, -+ reverse_condition_maybe_unordered -+ (GET_CODE (compare_op))); -+ else -+ PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op))); -+ } -+ -+ mode = GET_MODE (operands[0]); -+ -+ /* Construct either adc or sbb insn. */ -+ if ((code == LTU) == (operands[3] == constm1_rtx)) -+ { -+ switch (mode) -+ { -+ case E_QImode: -+ insn = gen_subqi3_carry; -+ break; -+ case E_HImode: -+ insn = gen_subhi3_carry; -+ break; -+ case E_SImode: -+ insn = gen_subsi3_carry; -+ break; -+ case E_DImode: -+ insn = gen_subdi3_carry; -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ } -+ else -+ { -+ switch (mode) -+ { -+ case E_QImode: -+ insn = gen_addqi3_carry; -+ break; -+ case E_HImode: -+ insn = gen_addhi3_carry; -+ break; -+ case E_SImode: -+ insn = gen_addsi3_carry; -+ break; -+ case E_DImode: -+ insn = gen_adddi3_carry; -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ } -+ emit_insn (insn (operands[0], operands[2], val, flags, compare_op)); -+ -+ return true; -+} -+ -+bool -+ix86_expand_int_movcc (rtx operands[]) -+{ -+ enum rtx_code code = GET_CODE (operands[1]), compare_code; -+ rtx_insn *compare_seq; -+ rtx compare_op; -+ machine_mode mode = GET_MODE (operands[0]); -+ bool sign_bit_compare_p = false; -+ rtx op0 = XEXP (operands[1], 0); -+ rtx op1 = XEXP (operands[1], 1); -+ -+ if (GET_MODE (op0) == TImode -+ || (GET_MODE (op0) == DImode -+ && !TARGET_64BIT)) -+ return false; -+ -+ start_sequence (); -+ compare_op = ix86_expand_compare (code, op0, op1); -+ compare_seq = get_insns (); -+ end_sequence (); -+ -+ compare_code = GET_CODE (compare_op); -+ -+ if ((op1 == const0_rtx && (code == GE || code == LT)) -+ || (op1 == constm1_rtx && (code == GT || code == LE))) -+ sign_bit_compare_p = true; -+ -+ /* Don't attempt mode expansion here -- if we had to expand 5 or 6 -+ HImode insns, we'd be swallowed in word prefix ops. */ -+ -+ if ((mode != HImode || TARGET_FAST_PREFIX) -+ && (mode != (TARGET_64BIT ? TImode : DImode)) -+ && CONST_INT_P (operands[2]) -+ && CONST_INT_P (operands[3])) -+ { -+ rtx out = operands[0]; -+ HOST_WIDE_INT ct = INTVAL (operands[2]); -+ HOST_WIDE_INT cf = INTVAL (operands[3]); -+ HOST_WIDE_INT diff; -+ -+ diff = ct - cf; -+ /* Sign bit compares are better done using shifts than we do by using -+ sbb. */ -+ if (sign_bit_compare_p -+ || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op)) -+ { -+ /* Detect overlap between destination and compare sources. */ -+ rtx tmp = out; -+ -+ if (!sign_bit_compare_p) -+ { -+ rtx flags; -+ bool fpcmp = false; -+ -+ compare_code = GET_CODE (compare_op); -+ -+ flags = XEXP (compare_op, 0); -+ -+ if (GET_MODE (flags) == CCFPmode) -+ { -+ fpcmp = true; -+ compare_code -+ = ix86_fp_compare_code_to_integer (compare_code); -+ } -+ -+ /* To simplify rest of code, restrict to the GEU case. */ -+ if (compare_code == LTU) -+ { -+ std::swap (ct, cf); -+ compare_code = reverse_condition (compare_code); -+ code = reverse_condition (code); -+ } -+ else -+ { -+ if (fpcmp) -+ PUT_CODE (compare_op, -+ reverse_condition_maybe_unordered -+ (GET_CODE (compare_op))); -+ else -+ PUT_CODE (compare_op, -+ reverse_condition (GET_CODE (compare_op))); -+ } -+ diff = ct - cf; -+ -+ if (reg_overlap_mentioned_p (out, op0) -+ || reg_overlap_mentioned_p (out, op1)) -+ tmp = gen_reg_rtx (mode); -+ -+ if (mode == DImode) -+ emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op)); -+ else -+ emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), -+ flags, compare_op)); -+ } -+ else -+ { -+ if (code == GT || code == GE) -+ code = reverse_condition (code); -+ else -+ { -+ std::swap (ct, cf); -+ diff = ct - cf; -+ } -+ tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1); -+ } -+ -+ if (diff == 1) -+ { -+ /* -+ * cmpl op0,op1 -+ * sbbl dest,dest -+ * [addl dest, ct] -+ * -+ * Size 5 - 8. -+ */ -+ if (ct) -+ tmp = expand_simple_binop (mode, PLUS, -+ tmp, GEN_INT (ct), -+ copy_rtx (tmp), 1, OPTAB_DIRECT); -+ } -+ else if (cf == -1) -+ { -+ /* -+ * cmpl op0,op1 -+ * sbbl dest,dest -+ * orl $ct, dest -+ * -+ * Size 8. -+ */ -+ tmp = expand_simple_binop (mode, IOR, -+ tmp, GEN_INT (ct), -+ copy_rtx (tmp), 1, OPTAB_DIRECT); -+ } -+ else if (diff == -1 && ct) -+ { -+ /* -+ * cmpl op0,op1 -+ * sbbl dest,dest -+ * notl dest -+ * [addl dest, cf] -+ * -+ * Size 8 - 11. -+ */ -+ tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1); -+ if (cf) -+ tmp = expand_simple_binop (mode, PLUS, -+ copy_rtx (tmp), GEN_INT (cf), -+ copy_rtx (tmp), 1, OPTAB_DIRECT); -+ } -+ else -+ { -+ /* -+ * cmpl op0,op1 -+ * sbbl dest,dest -+ * [notl dest] -+ * andl cf - ct, dest -+ * [addl dest, ct] -+ * -+ * Size 8 - 11. -+ */ -+ -+ if (cf == 0) -+ { -+ cf = ct; -+ ct = 0; -+ tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1); -+ } -+ -+ tmp = expand_simple_binop (mode, AND, -+ copy_rtx (tmp), -+ gen_int_mode (cf - ct, mode), -+ copy_rtx (tmp), 1, OPTAB_DIRECT); -+ if (ct) -+ tmp = expand_simple_binop (mode, PLUS, -+ copy_rtx (tmp), GEN_INT (ct), -+ copy_rtx (tmp), 1, OPTAB_DIRECT); -+ } -+ -+ if (!rtx_equal_p (tmp, out)) -+ emit_move_insn (copy_rtx (out), copy_rtx (tmp)); -+ -+ return true; -+ } -+ -+ if (diff < 0) -+ { -+ machine_mode cmp_mode = GET_MODE (op0); -+ enum rtx_code new_code; -+ -+ if (SCALAR_FLOAT_MODE_P (cmp_mode)) -+ { -+ gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode)); -+ -+ /* We may be reversing unordered compare to normal compare, that -+ is not valid in general (we may convert non-trapping condition -+ to trapping one), however on i386 we currently emit all -+ comparisons unordered. */ -+ new_code = reverse_condition_maybe_unordered (code); -+ } -+ else -+ new_code = ix86_reverse_condition (code, cmp_mode); -+ if (new_code != UNKNOWN) -+ { -+ std::swap (ct, cf); -+ diff = -diff; -+ code = new_code; -+ } -+ } -+ -+ compare_code = UNKNOWN; -+ if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT -+ && CONST_INT_P (op1)) -+ { -+ if (op1 == const0_rtx -+ && (code == LT || code == GE)) -+ compare_code = code; -+ else if (op1 == constm1_rtx) -+ { -+ if (code == LE) -+ compare_code = LT; -+ else if (code == GT) -+ compare_code = GE; -+ } -+ } -+ -+ /* Optimize dest = (op0 < 0) ? -1 : cf. */ -+ if (compare_code != UNKNOWN -+ && GET_MODE (op0) == GET_MODE (out) -+ && (cf == -1 || ct == -1)) -+ { -+ /* If lea code below could be used, only optimize -+ if it results in a 2 insn sequence. */ -+ -+ if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8 -+ || diff == 3 || diff == 5 || diff == 9) -+ || (compare_code == LT && ct == -1) -+ || (compare_code == GE && cf == -1)) -+ { -+ /* -+ * notl op1 (if necessary) -+ * sarl $31, op1 -+ * orl cf, op1 -+ */ -+ if (ct != -1) -+ { -+ cf = ct; -+ ct = -1; -+ code = reverse_condition (code); -+ } -+ -+ out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1); -+ -+ out = expand_simple_binop (mode, IOR, -+ out, GEN_INT (cf), -+ out, 1, OPTAB_DIRECT); -+ if (out != operands[0]) -+ emit_move_insn (operands[0], out); -+ -+ return true; -+ } -+ } -+ -+ -+ if ((diff == 1 || diff == 2 || diff == 4 || diff == 8 -+ || diff == 3 || diff == 5 || diff == 9) -+ && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL) -+ && (mode != DImode -+ || x86_64_immediate_operand (GEN_INT (cf), VOIDmode))) -+ { -+ /* -+ * xorl dest,dest -+ * cmpl op1,op2 -+ * setcc dest -+ * lea cf(dest*(ct-cf)),dest -+ * -+ * Size 14. -+ * -+ * This also catches the degenerate setcc-only case. -+ */ -+ -+ rtx tmp; -+ int nops; -+ -+ out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1); -+ -+ nops = 0; -+ /* On x86_64 the lea instruction operates on Pmode, so we need -+ to get arithmetics done in proper mode to match. */ -+ if (diff == 1) -+ tmp = copy_rtx (out); -+ else -+ { -+ rtx out1; -+ out1 = copy_rtx (out); -+ tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1)); -+ nops++; -+ if (diff & 1) -+ { -+ tmp = gen_rtx_PLUS (mode, tmp, out1); -+ nops++; -+ } -+ } -+ if (cf != 0) -+ { -+ tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf)); -+ nops++; -+ } -+ if (!rtx_equal_p (tmp, out)) -+ { -+ if (nops == 1) -+ out = force_operand (tmp, copy_rtx (out)); -+ else -+ emit_insn (gen_rtx_SET (copy_rtx (out), copy_rtx (tmp))); -+ } -+ if (!rtx_equal_p (out, operands[0])) -+ emit_move_insn (operands[0], copy_rtx (out)); -+ -+ return true; -+ } -+ -+ /* -+ * General case: Jumpful: -+ * xorl dest,dest cmpl op1, op2 -+ * cmpl op1, op2 movl ct, dest -+ * setcc dest jcc 1f -+ * decl dest movl cf, dest -+ * andl (cf-ct),dest 1: -+ * addl ct,dest -+ * -+ * Size 20. Size 14. -+ * -+ * This is reasonably steep, but branch mispredict costs are -+ * high on modern cpus, so consider failing only if optimizing -+ * for space. -+ */ -+ -+ if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL)) -+ && BRANCH_COST (optimize_insn_for_speed_p (), -+ false) >= 2) -+ { -+ if (cf == 0) -+ { -+ machine_mode cmp_mode = GET_MODE (op0); -+ enum rtx_code new_code; -+ -+ if (SCALAR_FLOAT_MODE_P (cmp_mode)) -+ { -+ gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode)); -+ -+ /* We may be reversing unordered compare to normal compare, -+ that is not valid in general (we may convert non-trapping -+ condition to trapping one), however on i386 we currently -+ emit all comparisons unordered. */ -+ new_code = reverse_condition_maybe_unordered (code); -+ } -+ else -+ { -+ new_code = ix86_reverse_condition (code, cmp_mode); -+ if (compare_code != UNKNOWN && new_code != UNKNOWN) -+ compare_code = reverse_condition (compare_code); -+ } -+ -+ if (new_code != UNKNOWN) -+ { -+ cf = ct; -+ ct = 0; -+ code = new_code; -+ } -+ } -+ -+ if (compare_code != UNKNOWN) -+ { -+ /* notl op1 (if needed) -+ sarl $31, op1 -+ andl (cf-ct), op1 -+ addl ct, op1 -+ -+ For x < 0 (resp. x <= -1) there will be no notl, -+ so if possible swap the constants to get rid of the -+ complement. -+ True/false will be -1/0 while code below (store flag -+ followed by decrement) is 0/-1, so the constants need -+ to be exchanged once more. */ -+ -+ if (compare_code == GE || !cf) -+ { -+ code = reverse_condition (code); -+ compare_code = LT; -+ } -+ else -+ std::swap (ct, cf); -+ -+ out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1); -+ } -+ else -+ { -+ out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1); -+ -+ out = expand_simple_binop (mode, PLUS, copy_rtx (out), -+ constm1_rtx, -+ copy_rtx (out), 1, OPTAB_DIRECT); -+ } -+ -+ out = expand_simple_binop (mode, AND, copy_rtx (out), -+ gen_int_mode (cf - ct, mode), -+ copy_rtx (out), 1, OPTAB_DIRECT); -+ if (ct) -+ out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct), -+ copy_rtx (out), 1, OPTAB_DIRECT); -+ if (!rtx_equal_p (out, operands[0])) -+ emit_move_insn (operands[0], copy_rtx (out)); -+ -+ return true; -+ } -+ } -+ -+ if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL)) -+ { -+ /* Try a few things more with specific constants and a variable. */ -+ -+ optab op; -+ rtx var, orig_out, out, tmp; -+ -+ if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2) -+ return false; -+ -+ /* If one of the two operands is an interesting constant, load a -+ constant with the above and mask it in with a logical operation. */ -+ -+ if (CONST_INT_P (operands[2])) -+ { -+ var = operands[3]; -+ if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx) -+ operands[3] = constm1_rtx, op = and_optab; -+ else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx) -+ operands[3] = const0_rtx, op = ior_optab; -+ else -+ return false; -+ } -+ else if (CONST_INT_P (operands[3])) -+ { -+ var = operands[2]; -+ if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx) -+ operands[2] = constm1_rtx, op = and_optab; -+ else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx) -+ operands[2] = const0_rtx, op = ior_optab; -+ else -+ return false; -+ } -+ else -+ return false; -+ -+ orig_out = operands[0]; -+ tmp = gen_reg_rtx (mode); -+ operands[0] = tmp; -+ -+ /* Recurse to get the constant loaded. */ -+ if (!ix86_expand_int_movcc (operands)) -+ return false; -+ -+ /* Mask in the interesting variable. */ -+ out = expand_binop (mode, op, var, tmp, orig_out, 0, -+ OPTAB_WIDEN); -+ if (!rtx_equal_p (out, orig_out)) -+ emit_move_insn (copy_rtx (orig_out), copy_rtx (out)); -+ -+ return true; -+ } -+ -+ /* -+ * For comparison with above, -+ * -+ * movl cf,dest -+ * movl ct,tmp -+ * cmpl op1,op2 -+ * cmovcc tmp,dest -+ * -+ * Size 15. -+ */ -+ -+ if (! nonimmediate_operand (operands[2], mode)) -+ operands[2] = force_reg (mode, operands[2]); -+ if (! nonimmediate_operand (operands[3], mode)) -+ operands[3] = force_reg (mode, operands[3]); -+ -+ if (! register_operand (operands[2], VOIDmode) -+ && (mode == QImode -+ || ! register_operand (operands[3], VOIDmode))) -+ operands[2] = force_reg (mode, operands[2]); -+ -+ if (mode == QImode -+ && ! register_operand (operands[3], VOIDmode)) -+ operands[3] = force_reg (mode, operands[3]); -+ -+ emit_insn (compare_seq); -+ emit_insn (gen_rtx_SET (operands[0], -+ gen_rtx_IF_THEN_ELSE (mode, -+ compare_op, operands[2], -+ operands[3]))); -+ return true; -+} -+ -+/* Detect conditional moves that exactly match min/max operational -+ semantics. Note that this is IEEE safe, as long as we don't -+ interchange the operands. -+ -+ Returns FALSE if this conditional move doesn't match a MIN/MAX, -+ and TRUE if the operation is successful and instructions are emitted. */ -+ -+static bool -+ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0, -+ rtx cmp_op1, rtx if_true, rtx if_false) -+{ -+ machine_mode mode; -+ bool is_min; -+ rtx tmp; -+ -+ if (code == LT) -+ ; -+ else if (code == UNGE) -+ std::swap (if_true, if_false); -+ else -+ return false; -+ -+ if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false)) -+ is_min = true; -+ else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false)) -+ is_min = false; -+ else -+ return false; -+ -+ mode = GET_MODE (dest); -+ -+ /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here, -+ but MODE may be a vector mode and thus not appropriate. */ -+ if (!flag_finite_math_only || flag_signed_zeros) -+ { -+ int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX; -+ rtvec v; -+ -+ if_true = force_reg (mode, if_true); -+ v = gen_rtvec (2, if_true, if_false); -+ tmp = gen_rtx_UNSPEC (mode, v, u); -+ } -+ else -+ { -+ code = is_min ? SMIN : SMAX; -+ if (MEM_P (if_true) && MEM_P (if_false)) -+ if_true = force_reg (mode, if_true); -+ tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false); -+ } -+ -+ emit_insn (gen_rtx_SET (dest, tmp)); -+ return true; -+} -+ -+/* Expand an SSE comparison. Return the register with the result. */ -+ -+static rtx -+ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1, -+ rtx op_true, rtx op_false) -+{ -+ machine_mode mode = GET_MODE (dest); -+ machine_mode cmp_ops_mode = GET_MODE (cmp_op0); -+ -+ /* In general case result of comparison can differ from operands' type. */ -+ machine_mode cmp_mode; -+ -+ /* In AVX512F the result of comparison is an integer mask. */ -+ bool maskcmp = false; -+ rtx x; -+ -+ if (GET_MODE_SIZE (cmp_ops_mode) == 64) -+ { -+ unsigned int nbits = GET_MODE_NUNITS (cmp_ops_mode); -+ cmp_mode = int_mode_for_size (nbits, 0).require (); -+ maskcmp = true; -+ } -+ else -+ cmp_mode = cmp_ops_mode; -+ -+ cmp_op0 = force_reg (cmp_ops_mode, cmp_op0); -+ -+ int (*op1_predicate)(rtx, machine_mode) -+ = VECTOR_MODE_P (cmp_ops_mode) ? vector_operand : nonimmediate_operand; -+ -+ if (!op1_predicate (cmp_op1, cmp_ops_mode)) -+ cmp_op1 = force_reg (cmp_ops_mode, cmp_op1); -+ -+ if (optimize -+ || (maskcmp && cmp_mode != mode) -+ || (op_true && reg_overlap_mentioned_p (dest, op_true)) -+ || (op_false && reg_overlap_mentioned_p (dest, op_false))) -+ dest = gen_reg_rtx (maskcmp ? cmp_mode : mode); -+ -+ /* Compare patterns for int modes are unspec in AVX512F only. */ -+ if (maskcmp && (code == GT || code == EQ)) -+ { -+ rtx (*gen)(rtx, rtx, rtx); -+ -+ switch (cmp_ops_mode) -+ { -+ case E_V64QImode: -+ gcc_assert (TARGET_AVX512BW); -+ gen = code == GT ? gen_avx512bw_gtv64qi3 : gen_avx512bw_eqv64qi3_1; -+ break; -+ case E_V32HImode: -+ gcc_assert (TARGET_AVX512BW); -+ gen = code == GT ? gen_avx512bw_gtv32hi3 : gen_avx512bw_eqv32hi3_1; -+ break; -+ case E_V16SImode: -+ gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1; -+ break; -+ case E_V8DImode: -+ gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1; -+ break; -+ default: -+ gen = NULL; -+ } -+ -+ if (gen) -+ { -+ emit_insn (gen (dest, cmp_op0, cmp_op1)); -+ return dest; -+ } -+ } -+ x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1); -+ -+ if (cmp_mode != mode && !maskcmp) -+ { -+ x = force_reg (cmp_ops_mode, x); -+ convert_move (dest, x, false); -+ } -+ else -+ emit_insn (gen_rtx_SET (dest, x)); -+ -+ return dest; -+} -+ -+/* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical -+ operations. This is used for both scalar and vector conditional moves. */ -+ -+void -+ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) -+{ -+ machine_mode mode = GET_MODE (dest); -+ machine_mode cmpmode = GET_MODE (cmp); -+ -+ /* In AVX512F the result of comparison is an integer mask. */ -+ bool maskcmp = (mode != cmpmode && TARGET_AVX512F); -+ -+ rtx t2, t3, x; -+ -+ /* If we have an integer mask and FP value then we need -+ to cast mask to FP mode. */ -+ if (mode != cmpmode && VECTOR_MODE_P (cmpmode)) -+ { -+ cmp = force_reg (cmpmode, cmp); -+ cmp = gen_rtx_SUBREG (mode, cmp, 0); -+ } -+ -+ if (maskcmp) -+ { -+ rtx (*gen) (rtx, rtx) = NULL; -+ if ((op_true == CONST0_RTX (mode) -+ && vector_all_ones_operand (op_false, mode)) -+ || (op_false == CONST0_RTX (mode) -+ && vector_all_ones_operand (op_true, mode))) -+ switch (mode) -+ { -+ case E_V64QImode: -+ if (TARGET_AVX512BW) -+ gen = gen_avx512bw_cvtmask2bv64qi; -+ break; -+ case E_V32QImode: -+ if (TARGET_AVX512VL && TARGET_AVX512BW) -+ gen = gen_avx512vl_cvtmask2bv32qi; -+ break; -+ case E_V16QImode: -+ if (TARGET_AVX512VL && TARGET_AVX512BW) -+ gen = gen_avx512vl_cvtmask2bv16qi; -+ break; -+ case E_V32HImode: -+ if (TARGET_AVX512BW) -+ gen = gen_avx512bw_cvtmask2wv32hi; -+ break; -+ case E_V16HImode: -+ if (TARGET_AVX512VL && TARGET_AVX512BW) -+ gen = gen_avx512vl_cvtmask2wv16hi; -+ break; -+ case E_V8HImode: -+ if (TARGET_AVX512VL && TARGET_AVX512BW) -+ gen = gen_avx512vl_cvtmask2wv8hi; -+ break; -+ case E_V16SImode: -+ if (TARGET_AVX512DQ) -+ gen = gen_avx512f_cvtmask2dv16si; -+ break; -+ case E_V8SImode: -+ if (TARGET_AVX512VL && TARGET_AVX512DQ) -+ gen = gen_avx512vl_cvtmask2dv8si; -+ break; -+ case E_V4SImode: -+ if (TARGET_AVX512VL && TARGET_AVX512DQ) -+ gen = gen_avx512vl_cvtmask2dv4si; -+ break; -+ case E_V8DImode: -+ if (TARGET_AVX512DQ) -+ gen = gen_avx512f_cvtmask2qv8di; -+ break; -+ case E_V4DImode: -+ if (TARGET_AVX512VL && TARGET_AVX512DQ) -+ gen = gen_avx512vl_cvtmask2qv4di; -+ break; -+ case E_V2DImode: -+ if (TARGET_AVX512VL && TARGET_AVX512DQ) -+ gen = gen_avx512vl_cvtmask2qv2di; -+ break; -+ default: -+ break; -+ } -+ if (gen && SCALAR_INT_MODE_P (cmpmode)) -+ { -+ cmp = force_reg (cmpmode, cmp); -+ if (op_true == CONST0_RTX (mode)) -+ { -+ rtx (*gen_not) (rtx, rtx); -+ switch (cmpmode) -+ { -+ case E_QImode: gen_not = gen_knotqi; break; -+ case E_HImode: gen_not = gen_knothi; break; -+ case E_SImode: gen_not = gen_knotsi; break; -+ case E_DImode: gen_not = gen_knotdi; break; -+ default: gcc_unreachable (); -+ } -+ rtx n = gen_reg_rtx (cmpmode); -+ emit_insn (gen_not (n, cmp)); -+ cmp = n; -+ } -+ emit_insn (gen (dest, cmp)); -+ return; -+ } -+ } -+ else if (vector_all_ones_operand (op_true, mode) -+ && op_false == CONST0_RTX (mode)) -+ { -+ emit_insn (gen_rtx_SET (dest, cmp)); -+ return; -+ } -+ else if (op_false == CONST0_RTX (mode)) -+ { -+ op_true = force_reg (mode, op_true); -+ x = gen_rtx_AND (mode, cmp, op_true); -+ emit_insn (gen_rtx_SET (dest, x)); -+ return; -+ } -+ else if (op_true == CONST0_RTX (mode)) -+ { -+ op_false = force_reg (mode, op_false); -+ x = gen_rtx_NOT (mode, cmp); -+ x = gen_rtx_AND (mode, x, op_false); -+ emit_insn (gen_rtx_SET (dest, x)); -+ return; -+ } -+ else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)) -+ { -+ op_false = force_reg (mode, op_false); -+ x = gen_rtx_IOR (mode, cmp, op_false); -+ emit_insn (gen_rtx_SET (dest, x)); -+ return; -+ } -+ else if (TARGET_XOP) -+ { -+ op_true = force_reg (mode, op_true); -+ -+ if (!nonimmediate_operand (op_false, mode)) -+ op_false = force_reg (mode, op_false); -+ -+ emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cmp, -+ op_true, -+ op_false))); -+ return; -+ } -+ -+ rtx (*gen) (rtx, rtx, rtx, rtx) = NULL; -+ rtx d = dest; -+ -+ if (!vector_operand (op_true, mode)) -+ op_true = force_reg (mode, op_true); -+ -+ op_false = force_reg (mode, op_false); -+ -+ switch (mode) -+ { -+ case E_V4SFmode: -+ if (TARGET_SSE4_1) -+ gen = gen_sse4_1_blendvps; -+ break; -+ case E_V2DFmode: -+ if (TARGET_SSE4_1) -+ gen = gen_sse4_1_blendvpd; -+ break; -+ case E_SFmode: -+ if (TARGET_SSE4_1) -+ { -+ gen = gen_sse4_1_blendvss; -+ op_true = force_reg (mode, op_true); -+ } -+ break; -+ case E_DFmode: -+ if (TARGET_SSE4_1) -+ { -+ gen = gen_sse4_1_blendvsd; -+ op_true = force_reg (mode, op_true); -+ } -+ break; -+ case E_V16QImode: -+ case E_V8HImode: -+ case E_V4SImode: -+ case E_V2DImode: -+ if (TARGET_SSE4_1) -+ { -+ gen = gen_sse4_1_pblendvb; -+ if (mode != V16QImode) -+ d = gen_reg_rtx (V16QImode); -+ op_false = gen_lowpart (V16QImode, op_false); -+ op_true = gen_lowpart (V16QImode, op_true); -+ cmp = gen_lowpart (V16QImode, cmp); -+ } -+ break; -+ case E_V8SFmode: -+ if (TARGET_AVX) -+ gen = gen_avx_blendvps256; -+ break; -+ case E_V4DFmode: -+ if (TARGET_AVX) -+ gen = gen_avx_blendvpd256; -+ break; -+ case E_V32QImode: -+ case E_V16HImode: -+ case E_V8SImode: -+ case E_V4DImode: -+ if (TARGET_AVX2) -+ { -+ gen = gen_avx2_pblendvb; -+ if (mode != V32QImode) -+ d = gen_reg_rtx (V32QImode); -+ op_false = gen_lowpart (V32QImode, op_false); -+ op_true = gen_lowpart (V32QImode, op_true); -+ cmp = gen_lowpart (V32QImode, cmp); -+ } -+ break; -+ -+ case E_V64QImode: -+ gen = gen_avx512bw_blendmv64qi; -+ break; -+ case E_V32HImode: -+ gen = gen_avx512bw_blendmv32hi; -+ break; -+ case E_V16SImode: -+ gen = gen_avx512f_blendmv16si; -+ break; -+ case E_V8DImode: -+ gen = gen_avx512f_blendmv8di; -+ break; -+ case E_V8DFmode: -+ gen = gen_avx512f_blendmv8df; -+ break; -+ case E_V16SFmode: -+ gen = gen_avx512f_blendmv16sf; -+ break; -+ -+ default: -+ break; -+ } -+ -+ if (gen != NULL) -+ { -+ emit_insn (gen (d, op_false, op_true, cmp)); -+ if (d != dest) -+ emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d)); -+ } -+ else -+ { -+ op_true = force_reg (mode, op_true); -+ -+ t2 = gen_reg_rtx (mode); -+ if (optimize) -+ t3 = gen_reg_rtx (mode); -+ else -+ t3 = dest; -+ -+ x = gen_rtx_AND (mode, op_true, cmp); -+ emit_insn (gen_rtx_SET (t2, x)); -+ -+ x = gen_rtx_NOT (mode, cmp); -+ x = gen_rtx_AND (mode, x, op_false); -+ emit_insn (gen_rtx_SET (t3, x)); -+ -+ x = gen_rtx_IOR (mode, t3, t2); -+ emit_insn (gen_rtx_SET (dest, x)); -+ } -+} -+ -+/* Swap, force into registers, or otherwise massage the two operands -+ to an sse comparison with a mask result. Thus we differ a bit from -+ ix86_prepare_fp_compare_args which expects to produce a flags result. -+ -+ The DEST operand exists to help determine whether to commute commutative -+ operators. The POP0/POP1 operands are updated in place. The new -+ comparison code is returned, or UNKNOWN if not implementable. */ -+ -+static enum rtx_code -+ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code, -+ rtx *pop0, rtx *pop1) -+{ -+ switch (code) -+ { -+ case LTGT: -+ case UNEQ: -+ /* AVX supports all the needed comparisons. */ -+ if (TARGET_AVX) -+ break; -+ /* We have no LTGT as an operator. We could implement it with -+ NE & ORDERED, but this requires an extra temporary. It's -+ not clear that it's worth it. */ -+ return UNKNOWN; -+ -+ case LT: -+ case LE: -+ case UNGT: -+ case UNGE: -+ /* These are supported directly. */ -+ break; -+ -+ case EQ: -+ case NE: -+ case UNORDERED: -+ case ORDERED: -+ /* AVX has 3 operand comparisons, no need to swap anything. */ -+ if (TARGET_AVX) -+ break; -+ /* For commutative operators, try to canonicalize the destination -+ operand to be first in the comparison - this helps reload to -+ avoid extra moves. */ -+ if (!dest || !rtx_equal_p (dest, *pop1)) -+ break; -+ /* FALLTHRU */ -+ -+ case GE: -+ case GT: -+ case UNLE: -+ case UNLT: -+ /* These are not supported directly before AVX, and furthermore -+ ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the -+ comparison operands to transform into something that is -+ supported. */ -+ std::swap (*pop0, *pop1); -+ code = swap_condition (code); -+ break; -+ -+ default: -+ gcc_unreachable (); -+ } -+ -+ return code; -+} -+ -+/* Expand a floating-point conditional move. Return true if successful. */ -+ -+bool -+ix86_expand_fp_movcc (rtx operands[]) -+{ -+ machine_mode mode = GET_MODE (operands[0]); -+ enum rtx_code code = GET_CODE (operands[1]); -+ rtx tmp, compare_op; -+ rtx op0 = XEXP (operands[1], 0); -+ rtx op1 = XEXP (operands[1], 1); -+ -+ if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode)) -+ { -+ machine_mode cmode; -+ -+ /* Since we've no cmove for sse registers, don't force bad register -+ allocation just to gain access to it. Deny movcc when the -+ comparison mode doesn't match the move mode. */ -+ cmode = GET_MODE (op0); -+ if (cmode == VOIDmode) -+ cmode = GET_MODE (op1); -+ if (cmode != mode) -+ return false; -+ -+ code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1); -+ if (code == UNKNOWN) -+ return false; -+ -+ if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1, -+ operands[2], operands[3])) -+ return true; -+ -+ tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1, -+ operands[2], operands[3]); -+ ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]); -+ return true; -+ } -+ -+ if (GET_MODE (op0) == TImode -+ || (GET_MODE (op0) == DImode -+ && !TARGET_64BIT)) -+ return false; -+ -+ /* The floating point conditional move instructions don't directly -+ support conditions resulting from a signed integer comparison. */ -+ -+ compare_op = ix86_expand_compare (code, op0, op1); -+ if (!fcmov_comparison_operator (compare_op, VOIDmode)) -+ { -+ tmp = gen_reg_rtx (QImode); -+ ix86_expand_setcc (tmp, code, op0, op1); -+ -+ compare_op = ix86_expand_compare (NE, tmp, const0_rtx); -+ } -+ -+ emit_insn (gen_rtx_SET (operands[0], -+ gen_rtx_IF_THEN_ELSE (mode, compare_op, -+ operands[2], operands[3]))); -+ -+ return true; -+} -+ -+/* Helper for ix86_cmp_code_to_pcmp_immediate for int modes. */ -+ -+static int -+ix86_int_cmp_code_to_pcmp_immediate (enum rtx_code code) -+{ -+ switch (code) -+ { -+ case EQ: -+ return 0; -+ case LT: -+ case LTU: -+ return 1; -+ case LE: -+ case LEU: -+ return 2; -+ case NE: -+ return 4; -+ case GE: -+ case GEU: -+ return 5; -+ case GT: -+ case GTU: -+ return 6; -+ default: -+ gcc_unreachable (); -+ } -+} -+ -+/* Helper for ix86_cmp_code_to_pcmp_immediate for fp modes. */ -+ -+static int -+ix86_fp_cmp_code_to_pcmp_immediate (enum rtx_code code) -+{ -+ switch (code) -+ { -+ case EQ: -+ return 0x00; -+ case NE: -+ return 0x04; -+ case GT: -+ return 0x0e; -+ case LE: -+ return 0x02; -+ case GE: -+ return 0x0d; -+ case LT: -+ return 0x01; -+ case UNLE: -+ return 0x0a; -+ case UNLT: -+ return 0x09; -+ case UNGE: -+ return 0x05; -+ case UNGT: -+ return 0x06; -+ case UNEQ: -+ return 0x18; -+ case LTGT: -+ return 0x0c; -+ case ORDERED: -+ return 0x07; -+ case UNORDERED: -+ return 0x03; -+ default: -+ gcc_unreachable (); -+ } -+} -+ -+/* Return immediate value to be used in UNSPEC_PCMP -+ for comparison CODE in MODE. */ -+ -+static int -+ix86_cmp_code_to_pcmp_immediate (enum rtx_code code, machine_mode mode) -+{ -+ if (FLOAT_MODE_P (mode)) -+ return ix86_fp_cmp_code_to_pcmp_immediate (code); -+ return ix86_int_cmp_code_to_pcmp_immediate (code); -+} -+ -+/* Expand AVX-512 vector comparison. */ -+ -+bool -+ix86_expand_mask_vec_cmp (rtx operands[]) -+{ -+ machine_mode mask_mode = GET_MODE (operands[0]); -+ machine_mode cmp_mode = GET_MODE (operands[2]); -+ enum rtx_code code = GET_CODE (operands[1]); -+ rtx imm = GEN_INT (ix86_cmp_code_to_pcmp_immediate (code, cmp_mode)); -+ int unspec_code; -+ rtx unspec; -+ -+ switch (code) -+ { -+ case LEU: -+ case GTU: -+ case GEU: -+ case LTU: -+ unspec_code = UNSPEC_UNSIGNED_PCMP; -+ break; -+ -+ default: -+ unspec_code = UNSPEC_PCMP; -+ } -+ -+ unspec = gen_rtx_UNSPEC (mask_mode, gen_rtvec (3, operands[2], -+ operands[3], imm), -+ unspec_code); -+ emit_insn (gen_rtx_SET (operands[0], unspec)); -+ -+ return true; -+} -+ -+/* Expand fp vector comparison. */ -+ -+bool -+ix86_expand_fp_vec_cmp (rtx operands[]) -+{ -+ enum rtx_code code = GET_CODE (operands[1]); -+ rtx cmp; -+ -+ code = ix86_prepare_sse_fp_compare_args (operands[0], code, -+ &operands[2], &operands[3]); -+ if (code == UNKNOWN) -+ { -+ rtx temp; -+ switch (GET_CODE (operands[1])) -+ { -+ case LTGT: -+ temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[2], -+ operands[3], NULL, NULL); -+ cmp = ix86_expand_sse_cmp (operands[0], NE, operands[2], -+ operands[3], NULL, NULL); -+ code = AND; -+ break; -+ case UNEQ: -+ temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[2], -+ operands[3], NULL, NULL); -+ cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[2], -+ operands[3], NULL, NULL); -+ code = IOR; -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1, -+ OPTAB_DIRECT); -+ } -+ else -+ cmp = ix86_expand_sse_cmp (operands[0], code, operands[2], operands[3], -+ operands[1], operands[2]); -+ -+ if (operands[0] != cmp) -+ emit_move_insn (operands[0], cmp); -+ -+ return true; -+} -+ -+static rtx -+ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1, -+ rtx op_true, rtx op_false, bool *negate) -+{ -+ machine_mode data_mode = GET_MODE (dest); -+ machine_mode mode = GET_MODE (cop0); -+ rtx x; -+ -+ *negate = false; -+ -+ /* XOP supports all of the comparisons on all 128-bit vector int types. */ -+ if (TARGET_XOP -+ && (mode == V16QImode || mode == V8HImode -+ || mode == V4SImode || mode == V2DImode)) -+ ; -+ else -+ { -+ /* Canonicalize the comparison to EQ, GT, GTU. */ -+ switch (code) -+ { -+ case EQ: -+ case GT: -+ case GTU: -+ break; -+ -+ case NE: -+ case LE: -+ case LEU: -+ code = reverse_condition (code); -+ *negate = true; -+ break; -+ -+ case GE: -+ case GEU: -+ code = reverse_condition (code); -+ *negate = true; -+ /* FALLTHRU */ -+ -+ case LT: -+ case LTU: -+ std::swap (cop0, cop1); -+ code = swap_condition (code); -+ break; -+ -+ default: -+ gcc_unreachable (); -+ } -+ -+ /* Only SSE4.1/SSE4.2 supports V2DImode. */ -+ if (mode == V2DImode) -+ { -+ switch (code) -+ { -+ case EQ: -+ /* SSE4.1 supports EQ. */ -+ if (!TARGET_SSE4_1) -+ return NULL; -+ break; -+ -+ case GT: -+ case GTU: -+ /* SSE4.2 supports GT/GTU. */ -+ if (!TARGET_SSE4_2) -+ return NULL; -+ break; -+ -+ default: -+ gcc_unreachable (); -+ } -+ } -+ -+ rtx optrue = op_true ? op_true : CONSTM1_RTX (data_mode); -+ rtx opfalse = op_false ? op_false : CONST0_RTX (data_mode); -+ if (*negate) -+ std::swap (optrue, opfalse); -+ -+ /* Transform x > y ? 0 : -1 (i.e. x <= y ? -1 : 0 or x <= y) when -+ not using integer masks into min (x, y) == x ? -1 : 0 (i.e. -+ min (x, y) == x). While we add one instruction (the minimum), -+ we remove the need for two instructions in the negation, as the -+ result is done this way. -+ When using masks, do it for SI/DImode element types, as it is shorter -+ than the two subtractions. */ -+ if ((code != EQ -+ && GET_MODE_SIZE (mode) != 64 -+ && vector_all_ones_operand (opfalse, data_mode) -+ && optrue == CONST0_RTX (data_mode)) -+ || (code == GTU -+ && GET_MODE_SIZE (GET_MODE_INNER (mode)) >= 4 -+ /* Don't do it if not using integer masks and we'd end up with -+ the right values in the registers though. */ -+ && (GET_MODE_SIZE (mode) == 64 -+ || !vector_all_ones_operand (optrue, data_mode) -+ || opfalse != CONST0_RTX (data_mode)))) -+ { -+ rtx (*gen) (rtx, rtx, rtx) = NULL; -+ -+ switch (mode) -+ { -+ case E_V16SImode: -+ gen = (code == GTU) ? gen_uminv16si3 : gen_sminv16si3; -+ break; -+ case E_V8DImode: -+ gen = (code == GTU) ? gen_uminv8di3 : gen_sminv8di3; -+ cop0 = force_reg (mode, cop0); -+ cop1 = force_reg (mode, cop1); -+ break; -+ case E_V32QImode: -+ if (TARGET_AVX2) -+ gen = (code == GTU) ? gen_uminv32qi3 : gen_sminv32qi3; -+ break; -+ case E_V16HImode: -+ if (TARGET_AVX2) -+ gen = (code == GTU) ? gen_uminv16hi3 : gen_sminv16hi3; -+ break; -+ case E_V8SImode: -+ if (TARGET_AVX2) -+ gen = (code == GTU) ? gen_uminv8si3 : gen_sminv8si3; -+ break; -+ case E_V4DImode: -+ if (TARGET_AVX512VL) -+ { -+ gen = (code == GTU) ? gen_uminv4di3 : gen_sminv4di3; -+ cop0 = force_reg (mode, cop0); -+ cop1 = force_reg (mode, cop1); -+ } -+ break; -+ case E_V16QImode: -+ if (code == GTU && TARGET_SSE2) -+ gen = gen_uminv16qi3; -+ else if (code == GT && TARGET_SSE4_1) -+ gen = gen_sminv16qi3; -+ break; -+ case E_V8HImode: -+ if (code == GTU && TARGET_SSE4_1) -+ gen = gen_uminv8hi3; -+ else if (code == GT && TARGET_SSE2) -+ gen = gen_sminv8hi3; -+ break; -+ case E_V4SImode: -+ if (TARGET_SSE4_1) -+ gen = (code == GTU) ? gen_uminv4si3 : gen_sminv4si3; -+ break; -+ case E_V2DImode: -+ if (TARGET_AVX512VL) -+ { -+ gen = (code == GTU) ? gen_uminv2di3 : gen_sminv2di3; -+ cop0 = force_reg (mode, cop0); -+ cop1 = force_reg (mode, cop1); -+ } -+ break; -+ default: -+ break; -+ } -+ -+ if (gen) -+ { -+ rtx tem = gen_reg_rtx (mode); -+ if (!vector_operand (cop0, mode)) -+ cop0 = force_reg (mode, cop0); -+ if (!vector_operand (cop1, mode)) -+ cop1 = force_reg (mode, cop1); -+ *negate = !*negate; -+ emit_insn (gen (tem, cop0, cop1)); -+ cop1 = tem; -+ code = EQ; -+ } -+ } -+ -+ /* Unsigned parallel compare is not supported by the hardware. -+ Play some tricks to turn this into a signed comparison -+ against 0. */ -+ if (code == GTU) -+ { -+ cop0 = force_reg (mode, cop0); -+ -+ switch (mode) -+ { -+ case E_V16SImode: -+ case E_V8DImode: -+ case E_V8SImode: -+ case E_V4DImode: -+ case E_V4SImode: -+ case E_V2DImode: -+ { -+ rtx t1, t2, mask; -+ rtx (*gen_sub3) (rtx, rtx, rtx); -+ -+ switch (mode) -+ { -+ case E_V16SImode: gen_sub3 = gen_subv16si3; break; -+ case E_V8DImode: gen_sub3 = gen_subv8di3; break; -+ case E_V8SImode: gen_sub3 = gen_subv8si3; break; -+ case E_V4DImode: gen_sub3 = gen_subv4di3; break; -+ case E_V4SImode: gen_sub3 = gen_subv4si3; break; -+ case E_V2DImode: gen_sub3 = gen_subv2di3; break; -+ default: -+ gcc_unreachable (); -+ } -+ /* Subtract (-(INT MAX) - 1) from both operands to make -+ them signed. */ -+ mask = ix86_build_signbit_mask (mode, true, false); -+ t1 = gen_reg_rtx (mode); -+ emit_insn (gen_sub3 (t1, cop0, mask)); -+ -+ t2 = gen_reg_rtx (mode); -+ emit_insn (gen_sub3 (t2, cop1, mask)); -+ -+ cop0 = t1; -+ cop1 = t2; -+ code = GT; -+ } -+ break; -+ -+ case E_V64QImode: -+ case E_V32HImode: -+ case E_V32QImode: -+ case E_V16HImode: -+ case E_V16QImode: -+ case E_V8HImode: -+ /* Perform a parallel unsigned saturating subtraction. */ -+ x = gen_reg_rtx (mode); -+ emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, cop0, -+ cop1))); -+ -+ cop0 = x; -+ cop1 = CONST0_RTX (mode); -+ code = EQ; -+ *negate = !*negate; -+ break; -+ -+ default: -+ gcc_unreachable (); -+ } -+ } -+ } -+ -+ if (*negate) -+ std::swap (op_true, op_false); -+ -+ /* Allow the comparison to be done in one mode, but the movcc to -+ happen in another mode. */ -+ if (data_mode == mode) -+ { -+ x = ix86_expand_sse_cmp (dest, code, cop0, cop1, -+ op_true, op_false); -+ } -+ else -+ { -+ gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode)); -+ x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1, -+ op_true, op_false); -+ if (GET_MODE (x) == mode) -+ x = gen_lowpart (data_mode, x); -+ } -+ -+ return x; -+} -+ -+/* Expand integer vector comparison. */ -+ -+bool -+ix86_expand_int_vec_cmp (rtx operands[]) -+{ -+ rtx_code code = GET_CODE (operands[1]); -+ bool negate = false; -+ rtx cmp = ix86_expand_int_sse_cmp (operands[0], code, operands[2], -+ operands[3], NULL, NULL, &negate); -+ -+ if (!cmp) -+ return false; -+ -+ if (negate) -+ cmp = ix86_expand_int_sse_cmp (operands[0], EQ, cmp, -+ CONST0_RTX (GET_MODE (cmp)), -+ NULL, NULL, &negate); -+ -+ gcc_assert (!negate); -+ -+ if (operands[0] != cmp) -+ emit_move_insn (operands[0], cmp); -+ -+ return true; -+} -+ -+/* Expand a floating-point vector conditional move; a vcond operation -+ rather than a movcc operation. */ -+ -+bool -+ix86_expand_fp_vcond (rtx operands[]) -+{ -+ enum rtx_code code = GET_CODE (operands[3]); -+ rtx cmp; -+ -+ code = ix86_prepare_sse_fp_compare_args (operands[0], code, -+ &operands[4], &operands[5]); -+ if (code == UNKNOWN) -+ { -+ rtx temp; -+ switch (GET_CODE (operands[3])) -+ { -+ case LTGT: -+ temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4], -+ operands[5], operands[0], operands[0]); -+ cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4], -+ operands[5], operands[1], operands[2]); -+ code = AND; -+ break; -+ case UNEQ: -+ temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4], -+ operands[5], operands[0], operands[0]); -+ cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4], -+ operands[5], operands[1], operands[2]); -+ code = IOR; -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1, -+ OPTAB_DIRECT); -+ ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]); -+ return true; -+ } -+ -+ if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4], -+ operands[5], operands[1], operands[2])) -+ return true; -+ -+ cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5], -+ operands[1], operands[2]); -+ ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]); -+ return true; -+} -+ -+/* Expand a signed/unsigned integral vector conditional move. */ -+ -+bool -+ix86_expand_int_vcond (rtx operands[]) -+{ -+ machine_mode data_mode = GET_MODE (operands[0]); -+ machine_mode mode = GET_MODE (operands[4]); -+ enum rtx_code code = GET_CODE (operands[3]); -+ bool negate = false; -+ rtx x, cop0, cop1; -+ -+ cop0 = operands[4]; -+ cop1 = operands[5]; -+ -+ /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31 -+ and x < 0 ? 1 : 0 into (unsigned) x >> 31. */ -+ if ((code == LT || code == GE) -+ && data_mode == mode -+ && cop1 == CONST0_RTX (mode) -+ && operands[1 + (code == LT)] == CONST0_RTX (data_mode) -+ && GET_MODE_UNIT_SIZE (data_mode) > 1 -+ && GET_MODE_UNIT_SIZE (data_mode) <= 8 -+ && (GET_MODE_SIZE (data_mode) == 16 -+ || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32))) -+ { -+ rtx negop = operands[2 - (code == LT)]; -+ int shift = GET_MODE_UNIT_BITSIZE (data_mode) - 1; -+ if (negop == CONST1_RTX (data_mode)) -+ { -+ rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift), -+ operands[0], 1, OPTAB_DIRECT); -+ if (res != operands[0]) -+ emit_move_insn (operands[0], res); -+ return true; -+ } -+ else if (GET_MODE_INNER (data_mode) != DImode -+ && vector_all_ones_operand (negop, data_mode)) -+ { -+ rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift), -+ operands[0], 0, OPTAB_DIRECT); -+ if (res != operands[0]) -+ emit_move_insn (operands[0], res); -+ return true; -+ } -+ } -+ -+ if (!nonimmediate_operand (cop1, mode)) -+ cop1 = force_reg (mode, cop1); -+ if (!general_operand (operands[1], data_mode)) -+ operands[1] = force_reg (data_mode, operands[1]); -+ if (!general_operand (operands[2], data_mode)) -+ operands[2] = force_reg (data_mode, operands[2]); -+ -+ x = ix86_expand_int_sse_cmp (operands[0], code, cop0, cop1, -+ operands[1], operands[2], &negate); -+ -+ if (!x) -+ return false; -+ -+ ix86_expand_sse_movcc (operands[0], x, operands[1+negate], -+ operands[2-negate]); -+ return true; -+} -+ -+static bool -+ix86_expand_vec_perm_vpermt2 (rtx target, rtx mask, rtx op0, rtx op1, -+ struct expand_vec_perm_d *d) -+{ -+ /* ix86_expand_vec_perm_vpermt2 is called from both const and non-const -+ expander, so args are either in d, or in op0, op1 etc. */ -+ machine_mode mode = GET_MODE (d ? d->op0 : op0); -+ machine_mode maskmode = mode; -+ rtx (*gen) (rtx, rtx, rtx, rtx) = NULL; -+ -+ switch (mode) -+ { -+ case E_V8HImode: -+ if (TARGET_AVX512VL && TARGET_AVX512BW) -+ gen = gen_avx512vl_vpermt2varv8hi3; -+ break; -+ case E_V16HImode: -+ if (TARGET_AVX512VL && TARGET_AVX512BW) -+ gen = gen_avx512vl_vpermt2varv16hi3; -+ break; -+ case E_V64QImode: -+ if (TARGET_AVX512VBMI) -+ gen = gen_avx512bw_vpermt2varv64qi3; -+ break; -+ case E_V32HImode: -+ if (TARGET_AVX512BW) -+ gen = gen_avx512bw_vpermt2varv32hi3; -+ break; -+ case E_V4SImode: -+ if (TARGET_AVX512VL) -+ gen = gen_avx512vl_vpermt2varv4si3; -+ break; -+ case E_V8SImode: -+ if (TARGET_AVX512VL) -+ gen = gen_avx512vl_vpermt2varv8si3; -+ break; -+ case E_V16SImode: -+ if (TARGET_AVX512F) -+ gen = gen_avx512f_vpermt2varv16si3; -+ break; -+ case E_V4SFmode: -+ if (TARGET_AVX512VL) -+ { -+ gen = gen_avx512vl_vpermt2varv4sf3; -+ maskmode = V4SImode; -+ } -+ break; -+ case E_V8SFmode: -+ if (TARGET_AVX512VL) -+ { -+ gen = gen_avx512vl_vpermt2varv8sf3; -+ maskmode = V8SImode; -+ } -+ break; -+ case E_V16SFmode: -+ if (TARGET_AVX512F) -+ { -+ gen = gen_avx512f_vpermt2varv16sf3; -+ maskmode = V16SImode; -+ } -+ break; -+ case E_V2DImode: -+ if (TARGET_AVX512VL) -+ gen = gen_avx512vl_vpermt2varv2di3; -+ break; -+ case E_V4DImode: -+ if (TARGET_AVX512VL) -+ gen = gen_avx512vl_vpermt2varv4di3; -+ break; -+ case E_V8DImode: -+ if (TARGET_AVX512F) -+ gen = gen_avx512f_vpermt2varv8di3; -+ break; -+ case E_V2DFmode: -+ if (TARGET_AVX512VL) -+ { -+ gen = gen_avx512vl_vpermt2varv2df3; -+ maskmode = V2DImode; -+ } -+ break; -+ case E_V4DFmode: -+ if (TARGET_AVX512VL) -+ { -+ gen = gen_avx512vl_vpermt2varv4df3; -+ maskmode = V4DImode; -+ } -+ break; -+ case E_V8DFmode: -+ if (TARGET_AVX512F) -+ { -+ gen = gen_avx512f_vpermt2varv8df3; -+ maskmode = V8DImode; -+ } -+ break; -+ default: -+ break; -+ } -+ -+ if (gen == NULL) -+ return false; -+ -+ /* ix86_expand_vec_perm_vpermt2 is called from both const and non-const -+ expander, so args are either in d, or in op0, op1 etc. */ -+ if (d) -+ { -+ rtx vec[64]; -+ target = d->target; -+ op0 = d->op0; -+ op1 = d->op1; -+ for (int i = 0; i < d->nelt; ++i) -+ vec[i] = GEN_INT (d->perm[i]); -+ mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec)); -+ } -+ -+ emit_insn (gen (target, force_reg (maskmode, mask), op0, op1)); -+ return true; -+} -+ -+/* Expand a variable vector permutation. */ -+ -+void -+ix86_expand_vec_perm (rtx operands[]) -+{ -+ rtx target = operands[0]; -+ rtx op0 = operands[1]; -+ rtx op1 = operands[2]; -+ rtx mask = operands[3]; -+ rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32]; -+ machine_mode mode = GET_MODE (op0); -+ machine_mode maskmode = GET_MODE (mask); -+ int w, e, i; -+ bool one_operand_shuffle = rtx_equal_p (op0, op1); -+ -+ /* Number of elements in the vector. */ -+ w = GET_MODE_NUNITS (mode); -+ e = GET_MODE_UNIT_SIZE (mode); -+ gcc_assert (w <= 64); -+ -+ if (TARGET_AVX512F && one_operand_shuffle) -+ { -+ rtx (*gen) (rtx, rtx, rtx) = NULL; -+ switch (mode) -+ { -+ case E_V16SImode: -+ gen =gen_avx512f_permvarv16si; -+ break; -+ case E_V16SFmode: -+ gen = gen_avx512f_permvarv16sf; -+ break; -+ case E_V8DImode: -+ gen = gen_avx512f_permvarv8di; -+ break; -+ case E_V8DFmode: -+ gen = gen_avx512f_permvarv8df; -+ break; -+ default: -+ break; -+ } -+ if (gen != NULL) -+ { -+ emit_insn (gen (target, op0, mask)); -+ return; -+ } -+ } -+ -+ if (ix86_expand_vec_perm_vpermt2 (target, mask, op0, op1, NULL)) -+ return; -+ -+ if (TARGET_AVX2) -+ { -+ if (mode == V4DImode || mode == V4DFmode || mode == V16HImode) -+ { -+ /* Unfortunately, the VPERMQ and VPERMPD instructions only support -+ an constant shuffle operand. With a tiny bit of effort we can -+ use VPERMD instead. A re-interpretation stall for V4DFmode is -+ unfortunate but there's no avoiding it. -+ Similarly for V16HImode we don't have instructions for variable -+ shuffling, while for V32QImode we can use after preparing suitable -+ masks vpshufb; vpshufb; vpermq; vpor. */ -+ -+ if (mode == V16HImode) -+ { -+ maskmode = mode = V32QImode; -+ w = 32; -+ e = 1; -+ } -+ else -+ { -+ maskmode = mode = V8SImode; -+ w = 8; -+ e = 4; -+ } -+ t1 = gen_reg_rtx (maskmode); -+ -+ /* Replicate the low bits of the V4DImode mask into V8SImode: -+ mask = { A B C D } -+ t1 = { A A B B C C D D }. */ -+ for (i = 0; i < w / 2; ++i) -+ vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2); -+ vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec)); -+ vt = force_reg (maskmode, vt); -+ mask = gen_lowpart (maskmode, mask); -+ if (maskmode == V8SImode) -+ emit_insn (gen_avx2_permvarv8si (t1, mask, vt)); -+ else -+ emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt)); -+ -+ /* Multiply the shuffle indicies by two. */ -+ t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1, -+ OPTAB_DIRECT); -+ -+ /* Add one to the odd shuffle indicies: -+ t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */ -+ for (i = 0; i < w / 2; ++i) -+ { -+ vec[i * 2] = const0_rtx; -+ vec[i * 2 + 1] = const1_rtx; -+ } -+ vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec)); -+ vt = validize_mem (force_const_mem (maskmode, vt)); -+ t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1, -+ OPTAB_DIRECT); -+ -+ /* Continue as if V8SImode (resp. V32QImode) was used initially. */ -+ operands[3] = mask = t1; -+ target = gen_reg_rtx (mode); -+ op0 = gen_lowpart (mode, op0); -+ op1 = gen_lowpart (mode, op1); -+ } -+ -+ switch (mode) -+ { -+ case E_V8SImode: -+ /* The VPERMD and VPERMPS instructions already properly ignore -+ the high bits of the shuffle elements. No need for us to -+ perform an AND ourselves. */ -+ if (one_operand_shuffle) -+ { -+ emit_insn (gen_avx2_permvarv8si (target, op0, mask)); -+ if (target != operands[0]) -+ emit_move_insn (operands[0], -+ gen_lowpart (GET_MODE (operands[0]), target)); -+ } -+ else -+ { -+ t1 = gen_reg_rtx (V8SImode); -+ t2 = gen_reg_rtx (V8SImode); -+ emit_insn (gen_avx2_permvarv8si (t1, op0, mask)); -+ emit_insn (gen_avx2_permvarv8si (t2, op1, mask)); -+ goto merge_two; -+ } -+ return; -+ -+ case E_V8SFmode: -+ mask = gen_lowpart (V8SImode, mask); -+ if (one_operand_shuffle) -+ emit_insn (gen_avx2_permvarv8sf (target, op0, mask)); -+ else -+ { -+ t1 = gen_reg_rtx (V8SFmode); -+ t2 = gen_reg_rtx (V8SFmode); -+ emit_insn (gen_avx2_permvarv8sf (t1, op0, mask)); -+ emit_insn (gen_avx2_permvarv8sf (t2, op1, mask)); -+ goto merge_two; -+ } -+ return; -+ -+ case E_V4SImode: -+ /* By combining the two 128-bit input vectors into one 256-bit -+ input vector, we can use VPERMD and VPERMPS for the full -+ two-operand shuffle. */ -+ t1 = gen_reg_rtx (V8SImode); -+ t2 = gen_reg_rtx (V8SImode); -+ emit_insn (gen_avx_vec_concatv8si (t1, op0, op1)); -+ emit_insn (gen_avx_vec_concatv8si (t2, mask, mask)); -+ emit_insn (gen_avx2_permvarv8si (t1, t1, t2)); -+ emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx)); -+ return; -+ -+ case E_V4SFmode: -+ t1 = gen_reg_rtx (V8SFmode); -+ t2 = gen_reg_rtx (V8SImode); -+ mask = gen_lowpart (V4SImode, mask); -+ emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1)); -+ emit_insn (gen_avx_vec_concatv8si (t2, mask, mask)); -+ emit_insn (gen_avx2_permvarv8sf (t1, t1, t2)); -+ emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx)); -+ return; -+ -+ case E_V32QImode: -+ t1 = gen_reg_rtx (V32QImode); -+ t2 = gen_reg_rtx (V32QImode); -+ t3 = gen_reg_rtx (V32QImode); -+ vt2 = GEN_INT (-128); -+ vt = gen_const_vec_duplicate (V32QImode, vt2); -+ vt = force_reg (V32QImode, vt); -+ for (i = 0; i < 32; i++) -+ vec[i] = i < 16 ? vt2 : const0_rtx; -+ vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec)); -+ vt2 = force_reg (V32QImode, vt2); -+ /* From mask create two adjusted masks, which contain the same -+ bits as mask in the low 7 bits of each vector element. -+ The first mask will have the most significant bit clear -+ if it requests element from the same 128-bit lane -+ and MSB set if it requests element from the other 128-bit lane. -+ The second mask will have the opposite values of the MSB, -+ and additionally will have its 128-bit lanes swapped. -+ E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have -+ t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and -+ t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ... -+ stands for other 12 bytes. */ -+ /* The bit whether element is from the same lane or the other -+ lane is bit 4, so shift it up by 3 to the MSB position. */ -+ t5 = gen_reg_rtx (V4DImode); -+ emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask), -+ GEN_INT (3))); -+ /* Clear MSB bits from the mask just in case it had them set. */ -+ emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask)); -+ /* After this t1 will have MSB set for elements from other lane. */ -+ emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2)); -+ /* Clear bits other than MSB. */ -+ emit_insn (gen_andv32qi3 (t1, t1, vt)); -+ /* Or in the lower bits from mask into t3. */ -+ emit_insn (gen_iorv32qi3 (t3, t1, t2)); -+ /* And invert MSB bits in t1, so MSB is set for elements from the same -+ lane. */ -+ emit_insn (gen_xorv32qi3 (t1, t1, vt)); -+ /* Swap 128-bit lanes in t3. */ -+ t6 = gen_reg_rtx (V4DImode); -+ emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3), -+ const2_rtx, GEN_INT (3), -+ const0_rtx, const1_rtx)); -+ /* And or in the lower bits from mask into t1. */ -+ emit_insn (gen_iorv32qi3 (t1, t1, t2)); -+ if (one_operand_shuffle) -+ { -+ /* Each of these shuffles will put 0s in places where -+ element from the other 128-bit lane is needed, otherwise -+ will shuffle in the requested value. */ -+ emit_insn (gen_avx2_pshufbv32qi3 (t3, op0, -+ gen_lowpart (V32QImode, t6))); -+ emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1)); -+ /* For t3 the 128-bit lanes are swapped again. */ -+ t7 = gen_reg_rtx (V4DImode); -+ emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3), -+ const2_rtx, GEN_INT (3), -+ const0_rtx, const1_rtx)); -+ /* And oring both together leads to the result. */ -+ emit_insn (gen_iorv32qi3 (target, t1, -+ gen_lowpart (V32QImode, t7))); -+ if (target != operands[0]) -+ emit_move_insn (operands[0], -+ gen_lowpart (GET_MODE (operands[0]), target)); -+ return; -+ } -+ -+ t4 = gen_reg_rtx (V32QImode); -+ /* Similarly to the above one_operand_shuffle code, -+ just for repeated twice for each operand. merge_two: -+ code will merge the two results together. */ -+ emit_insn (gen_avx2_pshufbv32qi3 (t4, op0, -+ gen_lowpart (V32QImode, t6))); -+ emit_insn (gen_avx2_pshufbv32qi3 (t3, op1, -+ gen_lowpart (V32QImode, t6))); -+ emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1)); -+ emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1)); -+ t7 = gen_reg_rtx (V4DImode); -+ emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4), -+ const2_rtx, GEN_INT (3), -+ const0_rtx, const1_rtx)); -+ t8 = gen_reg_rtx (V4DImode); -+ emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3), -+ const2_rtx, GEN_INT (3), -+ const0_rtx, const1_rtx)); -+ emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7))); -+ emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8))); -+ t1 = t4; -+ t2 = t3; -+ goto merge_two; -+ -+ default: -+ gcc_assert (GET_MODE_SIZE (mode) <= 16); -+ break; -+ } -+ } -+ -+ if (TARGET_XOP) -+ { -+ /* The XOP VPPERM insn supports three inputs. By ignoring the -+ one_operand_shuffle special case, we avoid creating another -+ set of constant vectors in memory. */ -+ one_operand_shuffle = false; -+ -+ /* mask = mask & {2*w-1, ...} */ -+ vt = GEN_INT (2*w - 1); -+ } -+ else -+ { -+ /* mask = mask & {w-1, ...} */ -+ vt = GEN_INT (w - 1); -+ } -+ -+ vt = gen_const_vec_duplicate (maskmode, vt); -+ mask = expand_simple_binop (maskmode, AND, mask, vt, -+ NULL_RTX, 0, OPTAB_DIRECT); -+ -+ /* For non-QImode operations, convert the word permutation control -+ into a byte permutation control. */ -+ if (mode != V16QImode) -+ { -+ mask = expand_simple_binop (maskmode, ASHIFT, mask, -+ GEN_INT (exact_log2 (e)), -+ NULL_RTX, 0, OPTAB_DIRECT); -+ -+ /* Convert mask to vector of chars. */ -+ mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask)); -+ -+ /* Replicate each of the input bytes into byte positions: -+ (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8} -+ (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12} -+ (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */ -+ for (i = 0; i < 16; ++i) -+ vec[i] = GEN_INT (i/e * e); -+ vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec)); -+ vt = validize_mem (force_const_mem (V16QImode, vt)); -+ if (TARGET_XOP) -+ emit_insn (gen_xop_pperm (mask, mask, mask, vt)); -+ else -+ emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt)); -+ -+ /* Convert it into the byte positions by doing -+ mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */ -+ for (i = 0; i < 16; ++i) -+ vec[i] = GEN_INT (i % e); -+ vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec)); -+ vt = validize_mem (force_const_mem (V16QImode, vt)); -+ emit_insn (gen_addv16qi3 (mask, mask, vt)); -+ } -+ -+ /* The actual shuffle operations all operate on V16QImode. */ -+ op0 = gen_lowpart (V16QImode, op0); -+ op1 = gen_lowpart (V16QImode, op1); -+ -+ if (TARGET_XOP) -+ { -+ if (GET_MODE (target) != V16QImode) -+ target = gen_reg_rtx (V16QImode); -+ emit_insn (gen_xop_pperm (target, op0, op1, mask)); -+ if (target != operands[0]) -+ emit_move_insn (operands[0], -+ gen_lowpart (GET_MODE (operands[0]), target)); -+ } -+ else if (one_operand_shuffle) -+ { -+ if (GET_MODE (target) != V16QImode) -+ target = gen_reg_rtx (V16QImode); -+ emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask)); -+ if (target != operands[0]) -+ emit_move_insn (operands[0], -+ gen_lowpart (GET_MODE (operands[0]), target)); -+ } -+ else -+ { -+ rtx xops[6]; -+ bool ok; -+ -+ /* Shuffle the two input vectors independently. */ -+ t1 = gen_reg_rtx (V16QImode); -+ t2 = gen_reg_rtx (V16QImode); -+ emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask)); -+ emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask)); -+ -+ merge_two: -+ /* Then merge them together. The key is whether any given control -+ element contained a bit set that indicates the second word. */ -+ mask = operands[3]; -+ vt = GEN_INT (w); -+ if (maskmode == V2DImode && !TARGET_SSE4_1) -+ { -+ /* Without SSE4.1, we don't have V2DImode EQ. Perform one -+ more shuffle to convert the V2DI input mask into a V4SI -+ input mask. At which point the masking that expand_int_vcond -+ will work as desired. */ -+ rtx t3 = gen_reg_rtx (V4SImode); -+ emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask), -+ const0_rtx, const0_rtx, -+ const2_rtx, const2_rtx)); -+ mask = t3; -+ maskmode = V4SImode; -+ e = w = 4; -+ } -+ -+ vt = gen_const_vec_duplicate (maskmode, vt); -+ vt = force_reg (maskmode, vt); -+ mask = expand_simple_binop (maskmode, AND, mask, vt, -+ NULL_RTX, 0, OPTAB_DIRECT); -+ -+ if (GET_MODE (target) != mode) -+ target = gen_reg_rtx (mode); -+ xops[0] = target; -+ xops[1] = gen_lowpart (mode, t2); -+ xops[2] = gen_lowpart (mode, t1); -+ xops[3] = gen_rtx_EQ (maskmode, mask, vt); -+ xops[4] = mask; -+ xops[5] = vt; -+ ok = ix86_expand_int_vcond (xops); -+ gcc_assert (ok); -+ if (target != operands[0]) -+ emit_move_insn (operands[0], -+ gen_lowpart (GET_MODE (operands[0]), target)); -+ } -+} -+ -+/* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is -+ true if we should do zero extension, else sign extension. HIGH_P is -+ true if we want the N/2 high elements, else the low elements. */ -+ -+void -+ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p) -+{ -+ machine_mode imode = GET_MODE (src); -+ rtx tmp; -+ -+ if (TARGET_SSE4_1) -+ { -+ rtx (*unpack)(rtx, rtx); -+ rtx (*extract)(rtx, rtx) = NULL; -+ machine_mode halfmode = BLKmode; -+ -+ switch (imode) -+ { -+ case E_V64QImode: -+ if (unsigned_p) -+ unpack = gen_avx512bw_zero_extendv32qiv32hi2; -+ else -+ unpack = gen_avx512bw_sign_extendv32qiv32hi2; -+ halfmode = V32QImode; -+ extract -+ = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi; -+ break; -+ case E_V32QImode: -+ if (unsigned_p) -+ unpack = gen_avx2_zero_extendv16qiv16hi2; -+ else -+ unpack = gen_avx2_sign_extendv16qiv16hi2; -+ halfmode = V16QImode; -+ extract -+ = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi; -+ break; -+ case E_V32HImode: -+ if (unsigned_p) -+ unpack = gen_avx512f_zero_extendv16hiv16si2; -+ else -+ unpack = gen_avx512f_sign_extendv16hiv16si2; -+ halfmode = V16HImode; -+ extract -+ = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi; -+ break; -+ case E_V16HImode: -+ if (unsigned_p) -+ unpack = gen_avx2_zero_extendv8hiv8si2; -+ else -+ unpack = gen_avx2_sign_extendv8hiv8si2; -+ halfmode = V8HImode; -+ extract -+ = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi; -+ break; -+ case E_V16SImode: -+ if (unsigned_p) -+ unpack = gen_avx512f_zero_extendv8siv8di2; -+ else -+ unpack = gen_avx512f_sign_extendv8siv8di2; -+ halfmode = V8SImode; -+ extract -+ = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si; -+ break; -+ case E_V8SImode: -+ if (unsigned_p) -+ unpack = gen_avx2_zero_extendv4siv4di2; -+ else -+ unpack = gen_avx2_sign_extendv4siv4di2; -+ halfmode = V4SImode; -+ extract -+ = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si; -+ break; -+ case E_V16QImode: -+ if (unsigned_p) -+ unpack = gen_sse4_1_zero_extendv8qiv8hi2; -+ else -+ unpack = gen_sse4_1_sign_extendv8qiv8hi2; -+ break; -+ case E_V8HImode: -+ if (unsigned_p) -+ unpack = gen_sse4_1_zero_extendv4hiv4si2; -+ else -+ unpack = gen_sse4_1_sign_extendv4hiv4si2; -+ break; -+ case E_V4SImode: -+ if (unsigned_p) -+ unpack = gen_sse4_1_zero_extendv2siv2di2; -+ else -+ unpack = gen_sse4_1_sign_extendv2siv2di2; -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ -+ if (GET_MODE_SIZE (imode) >= 32) -+ { -+ tmp = gen_reg_rtx (halfmode); -+ emit_insn (extract (tmp, src)); -+ } -+ else if (high_p) -+ { -+ /* Shift higher 8 bytes to lower 8 bytes. */ -+ tmp = gen_reg_rtx (V1TImode); -+ emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src), -+ GEN_INT (64))); -+ tmp = gen_lowpart (imode, tmp); -+ } -+ else -+ tmp = src; -+ -+ emit_insn (unpack (dest, tmp)); -+ } -+ else -+ { -+ rtx (*unpack)(rtx, rtx, rtx); -+ -+ switch (imode) -+ { -+ case E_V16QImode: -+ if (high_p) -+ unpack = gen_vec_interleave_highv16qi; -+ else -+ unpack = gen_vec_interleave_lowv16qi; -+ break; -+ case E_V8HImode: -+ if (high_p) -+ unpack = gen_vec_interleave_highv8hi; -+ else -+ unpack = gen_vec_interleave_lowv8hi; -+ break; -+ case E_V4SImode: -+ if (high_p) -+ unpack = gen_vec_interleave_highv4si; -+ else -+ unpack = gen_vec_interleave_lowv4si; -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ -+ if (unsigned_p) -+ tmp = force_reg (imode, CONST0_RTX (imode)); -+ else -+ tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode), -+ src, pc_rtx, pc_rtx); -+ -+ rtx tmp2 = gen_reg_rtx (imode); -+ emit_insn (unpack (tmp2, src, tmp)); -+ emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2)); -+ } -+} -+ -+/* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode, -+ but works for floating pointer parameters and nonoffsetable memories. -+ For pushes, it returns just stack offsets; the values will be saved -+ in the right order. Maximally three parts are generated. */ -+ -+static int -+ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode) -+{ -+ int size; -+ -+ if (!TARGET_64BIT) -+ size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4; -+ else -+ size = (GET_MODE_SIZE (mode) + 4) / 8; -+ -+ gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand))); -+ gcc_assert (size >= 2 && size <= 4); -+ -+ /* Optimize constant pool reference to immediates. This is used by fp -+ moves, that force all constants to memory to allow combining. */ -+ if (MEM_P (operand) && MEM_READONLY_P (operand)) -+ operand = avoid_constant_pool_reference (operand); -+ -+ if (MEM_P (operand) && !offsettable_memref_p (operand)) -+ { -+ /* The only non-offsetable memories we handle are pushes. */ -+ int ok = push_operand (operand, VOIDmode); -+ -+ gcc_assert (ok); -+ -+ operand = copy_rtx (operand); -+ PUT_MODE (operand, word_mode); -+ parts[0] = parts[1] = parts[2] = parts[3] = operand; -+ return size; -+ } -+ -+ if (GET_CODE (operand) == CONST_VECTOR) -+ { -+ scalar_int_mode imode = int_mode_for_mode (mode).require (); -+ /* Caution: if we looked through a constant pool memory above, -+ the operand may actually have a different mode now. That's -+ ok, since we want to pun this all the way back to an integer. */ -+ operand = simplify_subreg (imode, operand, GET_MODE (operand), 0); -+ gcc_assert (operand != NULL); -+ mode = imode; -+ } -+ -+ if (!TARGET_64BIT) -+ { -+ if (mode == DImode) -+ split_double_mode (mode, &operand, 1, &parts[0], &parts[1]); -+ else -+ { -+ int i; -+ -+ if (REG_P (operand)) -+ { -+ gcc_assert (reload_completed); -+ for (i = 0; i < size; i++) -+ parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i); -+ } -+ else if (offsettable_memref_p (operand)) -+ { -+ operand = adjust_address (operand, SImode, 0); -+ parts[0] = operand; -+ for (i = 1; i < size; i++) -+ parts[i] = adjust_address (operand, SImode, 4 * i); -+ } -+ else if (CONST_DOUBLE_P (operand)) -+ { -+ const REAL_VALUE_TYPE *r; -+ long l[4]; -+ -+ r = CONST_DOUBLE_REAL_VALUE (operand); -+ switch (mode) -+ { -+ case E_TFmode: -+ real_to_target (l, r, mode); -+ parts[3] = gen_int_mode (l[3], SImode); -+ parts[2] = gen_int_mode (l[2], SImode); -+ break; -+ case E_XFmode: -+ /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since -+ long double may not be 80-bit. */ -+ real_to_target (l, r, mode); -+ parts[2] = gen_int_mode (l[2], SImode); -+ break; -+ case E_DFmode: -+ REAL_VALUE_TO_TARGET_DOUBLE (*r, l); -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ parts[1] = gen_int_mode (l[1], SImode); -+ parts[0] = gen_int_mode (l[0], SImode); -+ } -+ else -+ gcc_unreachable (); -+ } -+ } -+ else -+ { -+ if (mode == TImode) -+ split_double_mode (mode, &operand, 1, &parts[0], &parts[1]); -+ if (mode == XFmode || mode == TFmode) -+ { -+ machine_mode upper_mode = mode==XFmode ? SImode : DImode; -+ if (REG_P (operand)) -+ { -+ gcc_assert (reload_completed); -+ parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0); -+ parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1); -+ } -+ else if (offsettable_memref_p (operand)) -+ { -+ operand = adjust_address (operand, DImode, 0); -+ parts[0] = operand; -+ parts[1] = adjust_address (operand, upper_mode, 8); -+ } -+ else if (CONST_DOUBLE_P (operand)) -+ { -+ long l[4]; -+ -+ real_to_target (l, CONST_DOUBLE_REAL_VALUE (operand), mode); -+ -+ /* real_to_target puts 32-bit pieces in each long. */ -+ parts[0] = gen_int_mode ((l[0] & HOST_WIDE_INT_C (0xffffffff)) -+ | ((l[1] & HOST_WIDE_INT_C (0xffffffff)) -+ << 32), DImode); -+ -+ if (upper_mode == SImode) -+ parts[1] = gen_int_mode (l[2], SImode); -+ else -+ parts[1] -+ = gen_int_mode ((l[2] & HOST_WIDE_INT_C (0xffffffff)) -+ | ((l[3] & HOST_WIDE_INT_C (0xffffffff)) -+ << 32), DImode); -+ } -+ else -+ gcc_unreachable (); -+ } -+ } -+ -+ return size; -+} -+ -+/* Emit insns to perform a move or push of DI, DF, XF, and TF values. -+ Return false when normal moves are needed; true when all required -+ insns have been emitted. Operands 2-4 contain the input values -+ int the correct order; operands 5-7 contain the output values. */ -+ -+void -+ix86_split_long_move (rtx operands[]) -+{ -+ rtx part[2][4]; -+ int nparts, i, j; -+ int push = 0; -+ int collisions = 0; -+ machine_mode mode = GET_MODE (operands[0]); -+ bool collisionparts[4]; -+ -+ /* The DFmode expanders may ask us to move double. -+ For 64bit target this is single move. By hiding the fact -+ here we simplify i386.md splitters. */ -+ if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8) -+ { -+ /* Optimize constant pool reference to immediates. This is used by -+ fp moves, that force all constants to memory to allow combining. */ -+ -+ if (MEM_P (operands[1]) -+ && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF -+ && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0))) -+ operands[1] = get_pool_constant (XEXP (operands[1], 0)); -+ if (push_operand (operands[0], VOIDmode)) -+ { -+ operands[0] = copy_rtx (operands[0]); -+ PUT_MODE (operands[0], word_mode); -+ } -+ else -+ operands[0] = gen_lowpart (DImode, operands[0]); -+ operands[1] = gen_lowpart (DImode, operands[1]); -+ emit_move_insn (operands[0], operands[1]); -+ return; -+ } -+ -+ /* The only non-offsettable memory we handle is push. */ -+ if (push_operand (operands[0], VOIDmode)) -+ push = 1; -+ else -+ gcc_assert (!MEM_P (operands[0]) -+ || offsettable_memref_p (operands[0])); -+ -+ nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0])); -+ ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0])); -+ -+ /* When emitting push, take care for source operands on the stack. */ -+ if (push && MEM_P (operands[1]) -+ && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1])) -+ { -+ rtx src_base = XEXP (part[1][nparts - 1], 0); -+ -+ /* Compensate for the stack decrement by 4. */ -+ if (!TARGET_64BIT && nparts == 3 -+ && mode == XFmode && TARGET_128BIT_LONG_DOUBLE) -+ src_base = plus_constant (Pmode, src_base, 4); -+ -+ /* src_base refers to the stack pointer and is -+ automatically decreased by emitted push. */ -+ for (i = 0; i < nparts; i++) -+ part[1][i] = change_address (part[1][i], -+ GET_MODE (part[1][i]), src_base); -+ } -+ -+ /* We need to do copy in the right order in case an address register -+ of the source overlaps the destination. */ -+ if (REG_P (part[0][0]) && MEM_P (part[1][0])) -+ { -+ rtx tmp; -+ -+ for (i = 0; i < nparts; i++) -+ { -+ collisionparts[i] -+ = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0)); -+ if (collisionparts[i]) -+ collisions++; -+ } -+ -+ /* Collision in the middle part can be handled by reordering. */ -+ if (collisions == 1 && nparts == 3 && collisionparts [1]) -+ { -+ std::swap (part[0][1], part[0][2]); -+ std::swap (part[1][1], part[1][2]); -+ } -+ else if (collisions == 1 -+ && nparts == 4 -+ && (collisionparts [1] || collisionparts [2])) -+ { -+ if (collisionparts [1]) -+ { -+ std::swap (part[0][1], part[0][2]); -+ std::swap (part[1][1], part[1][2]); -+ } -+ else -+ { -+ std::swap (part[0][2], part[0][3]); -+ std::swap (part[1][2], part[1][3]); -+ } -+ } -+ -+ /* If there are more collisions, we can't handle it by reordering. -+ Do an lea to the last part and use only one colliding move. */ -+ else if (collisions > 1) -+ { -+ rtx base, addr; -+ -+ collisions = 1; -+ -+ base = part[0][nparts - 1]; -+ -+ /* Handle the case when the last part isn't valid for lea. -+ Happens in 64-bit mode storing the 12-byte XFmode. */ -+ if (GET_MODE (base) != Pmode) -+ base = gen_rtx_REG (Pmode, REGNO (base)); -+ -+ addr = XEXP (part[1][0], 0); -+ if (TARGET_TLS_DIRECT_SEG_REFS) -+ { -+ struct ix86_address parts; -+ int ok = ix86_decompose_address (addr, &parts); -+ gcc_assert (ok); -+ /* It is not valid to use %gs: or %fs: in lea. */ -+ gcc_assert (parts.seg == ADDR_SPACE_GENERIC); -+ } -+ emit_insn (gen_rtx_SET (base, addr)); -+ part[1][0] = replace_equiv_address (part[1][0], base); -+ for (i = 1; i < nparts; i++) -+ { -+ tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i); -+ part[1][i] = replace_equiv_address (part[1][i], tmp); -+ } -+ } -+ } -+ -+ if (push) -+ { -+ if (!TARGET_64BIT) -+ { -+ if (nparts == 3) -+ { -+ if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode) -+ emit_insn (ix86_gen_add3 (stack_pointer_rtx, -+ stack_pointer_rtx, GEN_INT (-4))); -+ emit_move_insn (part[0][2], part[1][2]); -+ } -+ else if (nparts == 4) -+ { -+ emit_move_insn (part[0][3], part[1][3]); -+ emit_move_insn (part[0][2], part[1][2]); -+ } -+ } -+ else -+ { -+ /* In 64bit mode we don't have 32bit push available. In case this is -+ register, it is OK - we will just use larger counterpart. We also -+ retype memory - these comes from attempt to avoid REX prefix on -+ moving of second half of TFmode value. */ -+ if (GET_MODE (part[1][1]) == SImode) -+ { -+ switch (GET_CODE (part[1][1])) -+ { -+ case MEM: -+ part[1][1] = adjust_address (part[1][1], DImode, 0); -+ break; -+ -+ case REG: -+ part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1])); -+ break; -+ -+ default: -+ gcc_unreachable (); -+ } -+ -+ if (GET_MODE (part[1][0]) == SImode) -+ part[1][0] = part[1][1]; -+ } -+ } -+ emit_move_insn (part[0][1], part[1][1]); -+ emit_move_insn (part[0][0], part[1][0]); -+ return; -+ } -+ -+ /* Choose correct order to not overwrite the source before it is copied. */ -+ if ((REG_P (part[0][0]) -+ && REG_P (part[1][1]) -+ && (REGNO (part[0][0]) == REGNO (part[1][1]) -+ || (nparts == 3 -+ && REGNO (part[0][0]) == REGNO (part[1][2])) -+ || (nparts == 4 -+ && REGNO (part[0][0]) == REGNO (part[1][3])))) -+ || (collisions > 0 -+ && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))) -+ { -+ for (i = 0, j = nparts - 1; i < nparts; i++, j--) -+ { -+ operands[2 + i] = part[0][j]; -+ operands[6 + i] = part[1][j]; -+ } -+ } -+ else -+ { -+ for (i = 0; i < nparts; i++) -+ { -+ operands[2 + i] = part[0][i]; -+ operands[6 + i] = part[1][i]; -+ } -+ } -+ -+ /* If optimizing for size, attempt to locally unCSE nonzero constants. */ -+ if (optimize_insn_for_size_p ()) -+ { -+ for (j = 0; j < nparts - 1; j++) -+ if (CONST_INT_P (operands[6 + j]) -+ && operands[6 + j] != const0_rtx -+ && REG_P (operands[2 + j])) -+ for (i = j; i < nparts - 1; i++) -+ if (CONST_INT_P (operands[7 + i]) -+ && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j])) -+ operands[7 + i] = operands[2 + j]; -+ } -+ -+ for (i = 0; i < nparts; i++) -+ emit_move_insn (operands[2 + i], operands[6 + i]); -+ -+ return; -+} -+ -+/* Helper function of ix86_split_ashl used to generate an SImode/DImode -+ left shift by a constant, either using a single shift or -+ a sequence of add instructions. */ -+ -+static void -+ix86_expand_ashl_const (rtx operand, int count, machine_mode mode) -+{ -+ rtx (*insn)(rtx, rtx, rtx); -+ -+ if (count == 1 -+ || (count * ix86_cost->add <= ix86_cost->shift_const -+ && !optimize_insn_for_size_p ())) -+ { -+ insn = mode == DImode ? gen_addsi3 : gen_adddi3; -+ while (count-- > 0) -+ emit_insn (insn (operand, operand, operand)); -+ } -+ else -+ { -+ insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3; -+ emit_insn (insn (operand, operand, GEN_INT (count))); -+ } -+} -+ -+void -+ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode) -+{ -+ rtx (*gen_ashl3)(rtx, rtx, rtx); -+ rtx (*gen_shld)(rtx, rtx, rtx); -+ int half_width = GET_MODE_BITSIZE (mode) >> 1; -+ -+ rtx low[2], high[2]; -+ int count; -+ -+ if (CONST_INT_P (operands[2])) -+ { -+ split_double_mode (mode, operands, 2, low, high); -+ count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1); -+ -+ if (count >= half_width) -+ { -+ emit_move_insn (high[0], low[1]); -+ emit_move_insn (low[0], const0_rtx); -+ -+ if (count > half_width) -+ ix86_expand_ashl_const (high[0], count - half_width, mode); -+ } -+ else -+ { -+ gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld; -+ -+ if (!rtx_equal_p (operands[0], operands[1])) -+ emit_move_insn (operands[0], operands[1]); -+ -+ emit_insn (gen_shld (high[0], low[0], GEN_INT (count))); -+ ix86_expand_ashl_const (low[0], count, mode); -+ } -+ return; -+ } -+ -+ split_double_mode (mode, operands, 1, low, high); -+ -+ gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3; -+ -+ if (operands[1] == const1_rtx) -+ { -+ /* Assuming we've chosen a QImode capable registers, then 1 << N -+ can be done with two 32/64-bit shifts, no branches, no cmoves. */ -+ if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0])) -+ { -+ rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG); -+ -+ ix86_expand_clear (low[0]); -+ ix86_expand_clear (high[0]); -+ emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width))); -+ -+ d = gen_lowpart (QImode, low[0]); -+ d = gen_rtx_STRICT_LOW_PART (VOIDmode, d); -+ s = gen_rtx_EQ (QImode, flags, const0_rtx); -+ emit_insn (gen_rtx_SET (d, s)); -+ -+ d = gen_lowpart (QImode, high[0]); -+ d = gen_rtx_STRICT_LOW_PART (VOIDmode, d); -+ s = gen_rtx_NE (QImode, flags, const0_rtx); -+ emit_insn (gen_rtx_SET (d, s)); -+ } -+ -+ /* Otherwise, we can get the same results by manually performing -+ a bit extract operation on bit 5/6, and then performing the two -+ shifts. The two methods of getting 0/1 into low/high are exactly -+ the same size. Avoiding the shift in the bit extract case helps -+ pentium4 a bit; no one else seems to care much either way. */ -+ else -+ { -+ machine_mode half_mode; -+ rtx (*gen_lshr3)(rtx, rtx, rtx); -+ rtx (*gen_and3)(rtx, rtx, rtx); -+ rtx (*gen_xor3)(rtx, rtx, rtx); -+ HOST_WIDE_INT bits; -+ rtx x; -+ -+ if (mode == DImode) -+ { -+ half_mode = SImode; -+ gen_lshr3 = gen_lshrsi3; -+ gen_and3 = gen_andsi3; -+ gen_xor3 = gen_xorsi3; -+ bits = 5; -+ } -+ else -+ { -+ half_mode = DImode; -+ gen_lshr3 = gen_lshrdi3; -+ gen_and3 = gen_anddi3; -+ gen_xor3 = gen_xordi3; -+ bits = 6; -+ } -+ -+ if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ()) -+ x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]); -+ else -+ x = gen_lowpart (half_mode, operands[2]); -+ emit_insn (gen_rtx_SET (high[0], x)); -+ -+ emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits))); -+ emit_insn (gen_and3 (high[0], high[0], const1_rtx)); -+ emit_move_insn (low[0], high[0]); -+ emit_insn (gen_xor3 (low[0], low[0], const1_rtx)); -+ } -+ -+ emit_insn (gen_ashl3 (low[0], low[0], operands[2])); -+ emit_insn (gen_ashl3 (high[0], high[0], operands[2])); -+ return; -+ } -+ -+ if (operands[1] == constm1_rtx) -+ { -+ /* For -1 << N, we can avoid the shld instruction, because we -+ know that we're shifting 0...31/63 ones into a -1. */ -+ emit_move_insn (low[0], constm1_rtx); -+ if (optimize_insn_for_size_p ()) -+ emit_move_insn (high[0], low[0]); -+ else -+ emit_move_insn (high[0], constm1_rtx); -+ } -+ else -+ { -+ gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld; -+ -+ if (!rtx_equal_p (operands[0], operands[1])) -+ emit_move_insn (operands[0], operands[1]); -+ -+ split_double_mode (mode, operands, 1, low, high); -+ emit_insn (gen_shld (high[0], low[0], operands[2])); -+ } -+ -+ emit_insn (gen_ashl3 (low[0], low[0], operands[2])); -+ -+ if (TARGET_CMOVE && scratch) -+ { -+ rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx) -+ = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1; -+ -+ ix86_expand_clear (scratch); -+ emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch)); -+ } -+ else -+ { -+ rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx) -+ = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2; -+ -+ emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2])); -+ } -+} -+ -+void -+ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode) -+{ -+ rtx (*gen_ashr3)(rtx, rtx, rtx) -+ = mode == DImode ? gen_ashrsi3 : gen_ashrdi3; -+ rtx (*gen_shrd)(rtx, rtx, rtx); -+ int half_width = GET_MODE_BITSIZE (mode) >> 1; -+ -+ rtx low[2], high[2]; -+ int count; -+ -+ if (CONST_INT_P (operands[2])) -+ { -+ split_double_mode (mode, operands, 2, low, high); -+ count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1); -+ -+ if (count == GET_MODE_BITSIZE (mode) - 1) -+ { -+ emit_move_insn (high[0], high[1]); -+ emit_insn (gen_ashr3 (high[0], high[0], -+ GEN_INT (half_width - 1))); -+ emit_move_insn (low[0], high[0]); -+ -+ } -+ else if (count >= half_width) -+ { -+ emit_move_insn (low[0], high[1]); -+ emit_move_insn (high[0], low[0]); -+ emit_insn (gen_ashr3 (high[0], high[0], -+ GEN_INT (half_width - 1))); -+ -+ if (count > half_width) -+ emit_insn (gen_ashr3 (low[0], low[0], -+ GEN_INT (count - half_width))); -+ } -+ else -+ { -+ gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd; -+ -+ if (!rtx_equal_p (operands[0], operands[1])) -+ emit_move_insn (operands[0], operands[1]); -+ -+ emit_insn (gen_shrd (low[0], high[0], GEN_INT (count))); -+ emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count))); -+ } -+ } -+ else -+ { -+ gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd; -+ -+ if (!rtx_equal_p (operands[0], operands[1])) -+ emit_move_insn (operands[0], operands[1]); -+ -+ split_double_mode (mode, operands, 1, low, high); -+ -+ emit_insn (gen_shrd (low[0], high[0], operands[2])); -+ emit_insn (gen_ashr3 (high[0], high[0], operands[2])); -+ -+ if (TARGET_CMOVE && scratch) -+ { -+ rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx) -+ = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1; -+ -+ emit_move_insn (scratch, high[0]); -+ emit_insn (gen_ashr3 (scratch, scratch, -+ GEN_INT (half_width - 1))); -+ emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2], -+ scratch)); -+ } -+ else -+ { -+ rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx) -+ = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3; -+ -+ emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2])); -+ } -+ } -+} -+ -+void -+ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode) -+{ -+ rtx (*gen_lshr3)(rtx, rtx, rtx) -+ = mode == DImode ? gen_lshrsi3 : gen_lshrdi3; -+ rtx (*gen_shrd)(rtx, rtx, rtx); -+ int half_width = GET_MODE_BITSIZE (mode) >> 1; -+ -+ rtx low[2], high[2]; -+ int count; -+ -+ if (CONST_INT_P (operands[2])) -+ { -+ split_double_mode (mode, operands, 2, low, high); -+ count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1); -+ -+ if (count >= half_width) -+ { -+ emit_move_insn (low[0], high[1]); -+ ix86_expand_clear (high[0]); -+ -+ if (count > half_width) -+ emit_insn (gen_lshr3 (low[0], low[0], -+ GEN_INT (count - half_width))); -+ } -+ else -+ { -+ gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd; -+ -+ if (!rtx_equal_p (operands[0], operands[1])) -+ emit_move_insn (operands[0], operands[1]); -+ -+ emit_insn (gen_shrd (low[0], high[0], GEN_INT (count))); -+ emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count))); -+ } -+ } -+ else -+ { -+ gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd; -+ -+ if (!rtx_equal_p (operands[0], operands[1])) -+ emit_move_insn (operands[0], operands[1]); -+ -+ split_double_mode (mode, operands, 1, low, high); -+ -+ emit_insn (gen_shrd (low[0], high[0], operands[2])); -+ emit_insn (gen_lshr3 (high[0], high[0], operands[2])); -+ -+ if (TARGET_CMOVE && scratch) -+ { -+ rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx) -+ = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1; -+ -+ ix86_expand_clear (scratch); -+ emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2], -+ scratch)); -+ } -+ else -+ { -+ rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx) -+ = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2; -+ -+ emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2])); -+ } -+ } -+} -+ -+/* Return mode for the memcpy/memset loop counter. Prefer SImode over -+ DImode for constant loop counts. */ -+ -+static machine_mode -+counter_mode (rtx count_exp) -+{ -+ if (GET_MODE (count_exp) != VOIDmode) -+ return GET_MODE (count_exp); -+ if (!CONST_INT_P (count_exp)) -+ return Pmode; -+ if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff)) -+ return DImode; -+ return SImode; -+} -+ -+/* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR -+ to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT -+ specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set -+ memory by VALUE (supposed to be in MODE). -+ -+ The size is rounded down to whole number of chunk size moved at once. -+ SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */ -+ -+ -+static void -+expand_set_or_cpymem_via_loop (rtx destmem, rtx srcmem, -+ rtx destptr, rtx srcptr, rtx value, -+ rtx count, machine_mode mode, int unroll, -+ int expected_size, bool issetmem) -+{ -+ rtx_code_label *out_label, *top_label; -+ rtx iter, tmp; -+ machine_mode iter_mode = counter_mode (count); -+ int piece_size_n = GET_MODE_SIZE (mode) * unroll; -+ rtx piece_size = GEN_INT (piece_size_n); -+ rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1)); -+ rtx size; -+ int i; -+ -+ top_label = gen_label_rtx (); -+ out_label = gen_label_rtx (); -+ iter = gen_reg_rtx (iter_mode); -+ -+ size = expand_simple_binop (iter_mode, AND, count, piece_size_mask, -+ NULL, 1, OPTAB_DIRECT); -+ /* Those two should combine. */ -+ if (piece_size == const1_rtx) -+ { -+ emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode, -+ true, out_label); -+ predict_jump (REG_BR_PROB_BASE * 10 / 100); -+ } -+ emit_move_insn (iter, const0_rtx); -+ -+ emit_label (top_label); -+ -+ tmp = convert_modes (Pmode, iter_mode, iter, true); -+ -+ /* This assert could be relaxed - in this case we'll need to compute -+ smallest power of two, containing in PIECE_SIZE_N and pass it to -+ offset_address. */ -+ gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0); -+ destmem = offset_address (destmem, tmp, piece_size_n); -+ destmem = adjust_address (destmem, mode, 0); -+ -+ if (!issetmem) -+ { -+ srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n); -+ srcmem = adjust_address (srcmem, mode, 0); -+ -+ /* When unrolling for chips that reorder memory reads and writes, -+ we can save registers by using single temporary. -+ Also using 4 temporaries is overkill in 32bit mode. */ -+ if (!TARGET_64BIT && 0) -+ { -+ for (i = 0; i < unroll; i++) -+ { -+ if (i) -+ { -+ destmem = adjust_address (copy_rtx (destmem), mode, -+ GET_MODE_SIZE (mode)); -+ srcmem = adjust_address (copy_rtx (srcmem), mode, -+ GET_MODE_SIZE (mode)); -+ } -+ emit_move_insn (destmem, srcmem); -+ } -+ } -+ else -+ { -+ rtx tmpreg[4]; -+ gcc_assert (unroll <= 4); -+ for (i = 0; i < unroll; i++) -+ { -+ tmpreg[i] = gen_reg_rtx (mode); -+ if (i) -+ srcmem = adjust_address (copy_rtx (srcmem), mode, -+ GET_MODE_SIZE (mode)); -+ emit_move_insn (tmpreg[i], srcmem); -+ } -+ for (i = 0; i < unroll; i++) -+ { -+ if (i) -+ destmem = adjust_address (copy_rtx (destmem), mode, -+ GET_MODE_SIZE (mode)); -+ emit_move_insn (destmem, tmpreg[i]); -+ } -+ } -+ } -+ else -+ for (i = 0; i < unroll; i++) -+ { -+ if (i) -+ destmem = adjust_address (copy_rtx (destmem), mode, -+ GET_MODE_SIZE (mode)); -+ emit_move_insn (destmem, value); -+ } -+ -+ tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter, -+ true, OPTAB_LIB_WIDEN); -+ if (tmp != iter) -+ emit_move_insn (iter, tmp); -+ -+ emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode, -+ true, top_label); -+ if (expected_size != -1) -+ { -+ expected_size /= GET_MODE_SIZE (mode) * unroll; -+ if (expected_size == 0) -+ predict_jump (0); -+ else if (expected_size > REG_BR_PROB_BASE) -+ predict_jump (REG_BR_PROB_BASE - 1); -+ else -+ predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) -+ / expected_size); -+ } -+ else -+ predict_jump (REG_BR_PROB_BASE * 80 / 100); -+ iter = ix86_zero_extend_to_Pmode (iter); -+ tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr, -+ true, OPTAB_LIB_WIDEN); -+ if (tmp != destptr) -+ emit_move_insn (destptr, tmp); -+ if (!issetmem) -+ { -+ tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr, -+ true, OPTAB_LIB_WIDEN); -+ if (tmp != srcptr) -+ emit_move_insn (srcptr, tmp); -+ } -+ emit_label (out_label); -+} -+ -+/* Divide COUNTREG by SCALE. */ -+static rtx -+scale_counter (rtx countreg, int scale) -+{ -+ rtx sc; -+ -+ if (scale == 1) -+ return countreg; -+ if (CONST_INT_P (countreg)) -+ return GEN_INT (INTVAL (countreg) / scale); -+ gcc_assert (REG_P (countreg)); -+ -+ sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg, -+ GEN_INT (exact_log2 (scale)), -+ NULL, 1, OPTAB_DIRECT); -+ return sc; -+} -+ -+/* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument. -+ When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored. -+ When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored. -+ For setmem case, VALUE is a promoted to a wider size ORIG_VALUE. -+ ORIG_VALUE is the original value passed to memset to fill the memory with. -+ Other arguments have same meaning as for previous function. */ -+ -+static void -+expand_set_or_cpymem_via_rep (rtx destmem, rtx srcmem, -+ rtx destptr, rtx srcptr, rtx value, rtx orig_value, -+ rtx count, -+ machine_mode mode, bool issetmem) -+{ -+ rtx destexp; -+ rtx srcexp; -+ rtx countreg; -+ HOST_WIDE_INT rounded_count; -+ -+ /* If possible, it is shorter to use rep movs. -+ TODO: Maybe it is better to move this logic to decide_alg. */ -+ if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3) -+ && (!issetmem || orig_value == const0_rtx)) -+ mode = SImode; -+ -+ if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode) -+ destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0); -+ -+ countreg = ix86_zero_extend_to_Pmode (scale_counter (count, -+ GET_MODE_SIZE (mode))); -+ if (mode != QImode) -+ { -+ destexp = gen_rtx_ASHIFT (Pmode, countreg, -+ GEN_INT (exact_log2 (GET_MODE_SIZE (mode)))); -+ destexp = gen_rtx_PLUS (Pmode, destexp, destptr); -+ } -+ else -+ destexp = gen_rtx_PLUS (Pmode, destptr, countreg); -+ if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count)) -+ { -+ rounded_count -+ = ROUND_DOWN (INTVAL (count), (HOST_WIDE_INT) GET_MODE_SIZE (mode)); -+ destmem = shallow_copy_rtx (destmem); -+ set_mem_size (destmem, rounded_count); -+ } -+ else if (MEM_SIZE_KNOWN_P (destmem)) -+ clear_mem_size (destmem); -+ -+ if (issetmem) -+ { -+ value = force_reg (mode, gen_lowpart (mode, value)); -+ emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp)); -+ } -+ else -+ { -+ if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode) -+ srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0); -+ if (mode != QImode) -+ { -+ srcexp = gen_rtx_ASHIFT (Pmode, countreg, -+ GEN_INT (exact_log2 (GET_MODE_SIZE (mode)))); -+ srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr); -+ } -+ else -+ srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg); -+ if (CONST_INT_P (count)) -+ { -+ rounded_count -+ = ROUND_DOWN (INTVAL (count), (HOST_WIDE_INT) GET_MODE_SIZE (mode)); -+ srcmem = shallow_copy_rtx (srcmem); -+ set_mem_size (srcmem, rounded_count); -+ } -+ else -+ { -+ if (MEM_SIZE_KNOWN_P (srcmem)) -+ clear_mem_size (srcmem); -+ } -+ emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg, -+ destexp, srcexp)); -+ } -+} -+ -+/* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to -+ DESTMEM. -+ SRC is passed by pointer to be updated on return. -+ Return value is updated DST. */ -+static rtx -+emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr, -+ HOST_WIDE_INT size_to_move) -+{ -+ rtx dst = destmem, src = *srcmem, adjust, tempreg; -+ enum insn_code code; -+ machine_mode move_mode; -+ int piece_size, i; -+ -+ /* Find the widest mode in which we could perform moves. -+ Start with the biggest power of 2 less than SIZE_TO_MOVE and half -+ it until move of such size is supported. */ -+ piece_size = 1 << floor_log2 (size_to_move); -+ while (!int_mode_for_size (piece_size * BITS_PER_UNIT, 0).exists (&move_mode) -+ || (code = optab_handler (mov_optab, move_mode)) == CODE_FOR_nothing) -+ { -+ gcc_assert (piece_size > 1); -+ piece_size >>= 1; -+ } -+ -+ /* Find the corresponding vector mode with the same size as MOVE_MODE. -+ MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */ -+ if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode)) -+ { -+ int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode); -+ if (!mode_for_vector (word_mode, nunits).exists (&move_mode) -+ || (code = optab_handler (mov_optab, move_mode)) == CODE_FOR_nothing) -+ { -+ move_mode = word_mode; -+ piece_size = GET_MODE_SIZE (move_mode); -+ code = optab_handler (mov_optab, move_mode); -+ } -+ } -+ gcc_assert (code != CODE_FOR_nothing); -+ -+ dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0); -+ src = adjust_automodify_address_nv (src, move_mode, srcptr, 0); -+ -+ /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */ -+ gcc_assert (size_to_move % piece_size == 0); -+ adjust = GEN_INT (piece_size); -+ for (i = 0; i < size_to_move; i += piece_size) -+ { -+ /* We move from memory to memory, so we'll need to do it via -+ a temporary register. */ -+ tempreg = gen_reg_rtx (move_mode); -+ emit_insn (GEN_FCN (code) (tempreg, src)); -+ emit_insn (GEN_FCN (code) (dst, tempreg)); -+ -+ emit_move_insn (destptr, -+ gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust)); -+ emit_move_insn (srcptr, -+ gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust)); -+ -+ dst = adjust_automodify_address_nv (dst, move_mode, destptr, -+ piece_size); -+ src = adjust_automodify_address_nv (src, move_mode, srcptr, -+ piece_size); -+ } -+ -+ /* Update DST and SRC rtx. */ -+ *srcmem = src; -+ return dst; -+} -+ -+/* Helper function for the string operations below. Dest VARIABLE whether -+ it is aligned to VALUE bytes. If true, jump to the label. */ -+ -+static rtx_code_label * -+ix86_expand_aligntest (rtx variable, int value, bool epilogue) -+{ -+ rtx_code_label *label = gen_label_rtx (); -+ rtx tmpcount = gen_reg_rtx (GET_MODE (variable)); -+ if (GET_MODE (variable) == DImode) -+ emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value))); -+ else -+ emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value))); -+ emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable), -+ 1, label); -+ if (epilogue) -+ predict_jump (REG_BR_PROB_BASE * 50 / 100); -+ else -+ predict_jump (REG_BR_PROB_BASE * 90 / 100); -+ return label; -+} -+ -+ -+/* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */ -+ -+static void -+expand_cpymem_epilogue (rtx destmem, rtx srcmem, -+ rtx destptr, rtx srcptr, rtx count, int max_size) -+{ -+ rtx src, dest; -+ if (CONST_INT_P (count)) -+ { -+ HOST_WIDE_INT countval = INTVAL (count); -+ HOST_WIDE_INT epilogue_size = countval % max_size; -+ int i; -+ -+ /* For now MAX_SIZE should be a power of 2. This assert could be -+ relaxed, but it'll require a bit more complicated epilogue -+ expanding. */ -+ gcc_assert ((max_size & (max_size - 1)) == 0); -+ for (i = max_size; i >= 1; i >>= 1) -+ { -+ if (epilogue_size & i) -+ destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i); -+ } -+ return; -+ } -+ if (max_size > 8) -+ { -+ count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1), -+ count, 1, OPTAB_DIRECT); -+ expand_set_or_cpymem_via_loop (destmem, srcmem, destptr, srcptr, NULL, -+ count, QImode, 1, 4, false); -+ return; -+ } -+ -+ /* When there are stringops, we can cheaply increase dest and src pointers. -+ Otherwise we save code size by maintaining offset (zero is readily -+ available from preceding rep operation) and using x86 addressing modes. -+ */ -+ if (TARGET_SINGLE_STRINGOP) -+ { -+ if (max_size > 4) -+ { -+ rtx_code_label *label = ix86_expand_aligntest (count, 4, true); -+ src = change_address (srcmem, SImode, srcptr); -+ dest = change_address (destmem, SImode, destptr); -+ emit_insn (gen_strmov (destptr, dest, srcptr, src)); -+ emit_label (label); -+ LABEL_NUSES (label) = 1; -+ } -+ if (max_size > 2) -+ { -+ rtx_code_label *label = ix86_expand_aligntest (count, 2, true); -+ src = change_address (srcmem, HImode, srcptr); -+ dest = change_address (destmem, HImode, destptr); -+ emit_insn (gen_strmov (destptr, dest, srcptr, src)); -+ emit_label (label); -+ LABEL_NUSES (label) = 1; -+ } -+ if (max_size > 1) -+ { -+ rtx_code_label *label = ix86_expand_aligntest (count, 1, true); -+ src = change_address (srcmem, QImode, srcptr); -+ dest = change_address (destmem, QImode, destptr); -+ emit_insn (gen_strmov (destptr, dest, srcptr, src)); -+ emit_label (label); -+ LABEL_NUSES (label) = 1; -+ } -+ } -+ else -+ { -+ rtx offset = force_reg (Pmode, const0_rtx); -+ rtx tmp; -+ -+ if (max_size > 4) -+ { -+ rtx_code_label *label = ix86_expand_aligntest (count, 4, true); -+ src = change_address (srcmem, SImode, srcptr); -+ dest = change_address (destmem, SImode, destptr); -+ emit_move_insn (dest, src); -+ tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL, -+ true, OPTAB_LIB_WIDEN); -+ if (tmp != offset) -+ emit_move_insn (offset, tmp); -+ emit_label (label); -+ LABEL_NUSES (label) = 1; -+ } -+ if (max_size > 2) -+ { -+ rtx_code_label *label = ix86_expand_aligntest (count, 2, true); -+ tmp = gen_rtx_PLUS (Pmode, srcptr, offset); -+ src = change_address (srcmem, HImode, tmp); -+ tmp = gen_rtx_PLUS (Pmode, destptr, offset); -+ dest = change_address (destmem, HImode, tmp); -+ emit_move_insn (dest, src); -+ tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp, -+ true, OPTAB_LIB_WIDEN); -+ if (tmp != offset) -+ emit_move_insn (offset, tmp); -+ emit_label (label); -+ LABEL_NUSES (label) = 1; -+ } -+ if (max_size > 1) -+ { -+ rtx_code_label *label = ix86_expand_aligntest (count, 1, true); -+ tmp = gen_rtx_PLUS (Pmode, srcptr, offset); -+ src = change_address (srcmem, QImode, tmp); -+ tmp = gen_rtx_PLUS (Pmode, destptr, offset); -+ dest = change_address (destmem, QImode, tmp); -+ emit_move_insn (dest, src); -+ emit_label (label); -+ LABEL_NUSES (label) = 1; -+ } -+ } -+} -+ -+/* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM -+ with value PROMOTED_VAL. -+ SRC is passed by pointer to be updated on return. -+ Return value is updated DST. */ -+static rtx -+emit_memset (rtx destmem, rtx destptr, rtx promoted_val, -+ HOST_WIDE_INT size_to_move) -+{ -+ rtx dst = destmem, adjust; -+ enum insn_code code; -+ machine_mode move_mode; -+ int piece_size, i; -+ -+ /* Find the widest mode in which we could perform moves. -+ Start with the biggest power of 2 less than SIZE_TO_MOVE and half -+ it until move of such size is supported. */ -+ move_mode = GET_MODE (promoted_val); -+ if (move_mode == VOIDmode) -+ move_mode = QImode; -+ if (size_to_move < GET_MODE_SIZE (move_mode)) -+ { -+ unsigned int move_bits = size_to_move * BITS_PER_UNIT; -+ move_mode = int_mode_for_size (move_bits, 0).require (); -+ promoted_val = gen_lowpart (move_mode, promoted_val); -+ } -+ piece_size = GET_MODE_SIZE (move_mode); -+ code = optab_handler (mov_optab, move_mode); -+ gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX); -+ -+ dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0); -+ -+ /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */ -+ gcc_assert (size_to_move % piece_size == 0); -+ adjust = GEN_INT (piece_size); -+ for (i = 0; i < size_to_move; i += piece_size) -+ { -+ if (piece_size <= GET_MODE_SIZE (word_mode)) -+ { -+ emit_insn (gen_strset (destptr, dst, promoted_val)); -+ dst = adjust_automodify_address_nv (dst, move_mode, destptr, -+ piece_size); -+ continue; -+ } -+ -+ emit_insn (GEN_FCN (code) (dst, promoted_val)); -+ -+ emit_move_insn (destptr, -+ gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust)); -+ -+ dst = adjust_automodify_address_nv (dst, move_mode, destptr, -+ piece_size); -+ } -+ -+ /* Update DST rtx. */ -+ return dst; -+} -+/* Output code to set at most count & (max_size - 1) bytes starting by DEST. */ -+static void -+expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value, -+ rtx count, int max_size) -+{ -+ count = expand_simple_binop (counter_mode (count), AND, count, -+ GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT); -+ expand_set_or_cpymem_via_loop (destmem, NULL, destptr, NULL, -+ gen_lowpart (QImode, value), count, QImode, -+ 1, max_size / 2, true); -+} -+ -+/* Output code to set at most count & (max_size - 1) bytes starting by DEST. */ -+static void -+expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value, -+ rtx count, int max_size) -+{ -+ rtx dest; -+ -+ if (CONST_INT_P (count)) -+ { -+ HOST_WIDE_INT countval = INTVAL (count); -+ HOST_WIDE_INT epilogue_size = countval % max_size; -+ int i; -+ -+ /* For now MAX_SIZE should be a power of 2. This assert could be -+ relaxed, but it'll require a bit more complicated epilogue -+ expanding. */ -+ gcc_assert ((max_size & (max_size - 1)) == 0); -+ for (i = max_size; i >= 1; i >>= 1) -+ { -+ if (epilogue_size & i) -+ { -+ if (vec_value && i > GET_MODE_SIZE (GET_MODE (value))) -+ destmem = emit_memset (destmem, destptr, vec_value, i); -+ else -+ destmem = emit_memset (destmem, destptr, value, i); -+ } -+ } -+ return; -+ } -+ if (max_size > 32) -+ { -+ expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size); -+ return; -+ } -+ if (max_size > 16) -+ { -+ rtx_code_label *label = ix86_expand_aligntest (count, 16, true); -+ if (TARGET_64BIT) -+ { -+ dest = change_address (destmem, DImode, destptr); -+ emit_insn (gen_strset (destptr, dest, value)); -+ dest = adjust_automodify_address_nv (dest, DImode, destptr, 8); -+ emit_insn (gen_strset (destptr, dest, value)); -+ } -+ else -+ { -+ dest = change_address (destmem, SImode, destptr); -+ emit_insn (gen_strset (destptr, dest, value)); -+ dest = adjust_automodify_address_nv (dest, SImode, destptr, 4); -+ emit_insn (gen_strset (destptr, dest, value)); -+ dest = adjust_automodify_address_nv (dest, SImode, destptr, 8); -+ emit_insn (gen_strset (destptr, dest, value)); -+ dest = adjust_automodify_address_nv (dest, SImode, destptr, 12); -+ emit_insn (gen_strset (destptr, dest, value)); -+ } -+ emit_label (label); -+ LABEL_NUSES (label) = 1; -+ } -+ if (max_size > 8) -+ { -+ rtx_code_label *label = ix86_expand_aligntest (count, 8, true); -+ if (TARGET_64BIT) -+ { -+ dest = change_address (destmem, DImode, destptr); -+ emit_insn (gen_strset (destptr, dest, value)); -+ } -+ else -+ { -+ dest = change_address (destmem, SImode, destptr); -+ emit_insn (gen_strset (destptr, dest, value)); -+ dest = adjust_automodify_address_nv (dest, SImode, destptr, 4); -+ emit_insn (gen_strset (destptr, dest, value)); -+ } -+ emit_label (label); -+ LABEL_NUSES (label) = 1; -+ } -+ if (max_size > 4) -+ { -+ rtx_code_label *label = ix86_expand_aligntest (count, 4, true); -+ dest = change_address (destmem, SImode, destptr); -+ emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value))); -+ emit_label (label); -+ LABEL_NUSES (label) = 1; -+ } -+ if (max_size > 2) -+ { -+ rtx_code_label *label = ix86_expand_aligntest (count, 2, true); -+ dest = change_address (destmem, HImode, destptr); -+ emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value))); -+ emit_label (label); -+ LABEL_NUSES (label) = 1; -+ } -+ if (max_size > 1) -+ { -+ rtx_code_label *label = ix86_expand_aligntest (count, 1, true); -+ dest = change_address (destmem, QImode, destptr); -+ emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value))); -+ emit_label (label); -+ LABEL_NUSES (label) = 1; -+ } -+} -+ -+/* Adjust COUNTER by the VALUE. */ -+static void -+ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value) -+{ -+ rtx (*gen_add)(rtx, rtx, rtx) -+ = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3; -+ -+ emit_insn (gen_add (countreg, countreg, GEN_INT (-value))); -+} -+ -+/* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to -+ DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN. -+ Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are -+ ignored. -+ Return value is updated DESTMEM. */ -+ -+static rtx -+expand_set_or_cpymem_prologue (rtx destmem, rtx srcmem, -+ rtx destptr, rtx srcptr, rtx value, -+ rtx vec_value, rtx count, int align, -+ int desired_alignment, bool issetmem) -+{ -+ int i; -+ for (i = 1; i < desired_alignment; i <<= 1) -+ { -+ if (align <= i) -+ { -+ rtx_code_label *label = ix86_expand_aligntest (destptr, i, false); -+ if (issetmem) -+ { -+ if (vec_value && i > GET_MODE_SIZE (GET_MODE (value))) -+ destmem = emit_memset (destmem, destptr, vec_value, i); -+ else -+ destmem = emit_memset (destmem, destptr, value, i); -+ } -+ else -+ destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i); -+ ix86_adjust_counter (count, i); -+ emit_label (label); -+ LABEL_NUSES (label) = 1; -+ set_mem_align (destmem, i * 2 * BITS_PER_UNIT); -+ } -+ } -+ return destmem; -+} -+ -+/* Test if COUNT&SIZE is nonzero and if so, expand movme -+ or setmem sequence that is valid for SIZE..2*SIZE-1 bytes -+ and jump to DONE_LABEL. */ -+static void -+expand_small_cpymem_or_setmem (rtx destmem, rtx srcmem, -+ rtx destptr, rtx srcptr, -+ rtx value, rtx vec_value, -+ rtx count, int size, -+ rtx done_label, bool issetmem) -+{ -+ rtx_code_label *label = ix86_expand_aligntest (count, size, false); -+ machine_mode mode = int_mode_for_size (size * BITS_PER_UNIT, 1).else_blk (); -+ rtx modesize; -+ int n; -+ -+ /* If we do not have vector value to copy, we must reduce size. */ -+ if (issetmem) -+ { -+ if (!vec_value) -+ { -+ if (GET_MODE (value) == VOIDmode && size > 8) -+ mode = Pmode; -+ else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value))) -+ mode = GET_MODE (value); -+ } -+ else -+ mode = GET_MODE (vec_value), value = vec_value; -+ } -+ else -+ { -+ /* Choose appropriate vector mode. */ -+ if (size >= 32) -+ mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode; -+ else if (size >= 16) -+ mode = TARGET_SSE ? V16QImode : DImode; -+ srcmem = change_address (srcmem, mode, srcptr); -+ } -+ destmem = change_address (destmem, mode, destptr); -+ modesize = GEN_INT (GET_MODE_SIZE (mode)); -+ gcc_assert (GET_MODE_SIZE (mode) <= size); -+ for (n = 0; n * GET_MODE_SIZE (mode) < size; n++) -+ { -+ if (issetmem) -+ emit_move_insn (destmem, gen_lowpart (mode, value)); -+ else -+ { -+ emit_move_insn (destmem, srcmem); -+ srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode)); -+ } -+ destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode)); -+ } -+ -+ destmem = offset_address (destmem, count, 1); -+ destmem = offset_address (destmem, GEN_INT (-2 * size), -+ GET_MODE_SIZE (mode)); -+ if (!issetmem) -+ { -+ srcmem = offset_address (srcmem, count, 1); -+ srcmem = offset_address (srcmem, GEN_INT (-2 * size), -+ GET_MODE_SIZE (mode)); -+ } -+ for (n = 0; n * GET_MODE_SIZE (mode) < size; n++) -+ { -+ if (issetmem) -+ emit_move_insn (destmem, gen_lowpart (mode, value)); -+ else -+ { -+ emit_move_insn (destmem, srcmem); -+ srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode)); -+ } -+ destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode)); -+ } -+ emit_jump_insn (gen_jump (done_label)); -+ emit_barrier (); -+ -+ emit_label (label); -+ LABEL_NUSES (label) = 1; -+} -+ -+/* Handle small memcpy (up to SIZE that is supposed to be small power of 2. -+ and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN -+ bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can -+ proceed with an loop copying SIZE bytes at once. Do moves in MODE. -+ DONE_LABEL is a label after the whole copying sequence. The label is created -+ on demand if *DONE_LABEL is NULL. -+ MIN_SIZE is minimal size of block copied. This value gets adjusted for new -+ bounds after the initial copies. -+ -+ DESTMEM/SRCMEM are memory expressions pointing to the copies block, -+ DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether -+ we will dispatch to a library call for large blocks. -+ -+ In pseudocode we do: -+ -+ if (COUNT < SIZE) -+ { -+ Assume that SIZE is 4. Bigger sizes are handled analogously -+ if (COUNT & 4) -+ { -+ copy 4 bytes from SRCPTR to DESTPTR -+ copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4 -+ goto done_label -+ } -+ if (!COUNT) -+ goto done_label; -+ copy 1 byte from SRCPTR to DESTPTR -+ if (COUNT & 2) -+ { -+ copy 2 bytes from SRCPTR to DESTPTR -+ copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2 -+ } -+ } -+ else -+ { -+ copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR -+ copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE -+ -+ OLD_DESPTR = DESTPTR; -+ Align DESTPTR up to DESIRED_ALIGN -+ SRCPTR += DESTPTR - OLD_DESTPTR -+ COUNT -= DEST_PTR - OLD_DESTPTR -+ if (DYNAMIC_CHECK) -+ Round COUNT down to multiple of SIZE -+ << optional caller supplied zero size guard is here >> -+ << optional caller supplied dynamic check is here >> -+ << caller supplied main copy loop is here >> -+ } -+ done_label: -+ */ -+static void -+expand_set_or_cpymem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem, -+ rtx *destptr, rtx *srcptr, -+ machine_mode mode, -+ rtx value, rtx vec_value, -+ rtx *count, -+ rtx_code_label **done_label, -+ int size, -+ int desired_align, -+ int align, -+ unsigned HOST_WIDE_INT *min_size, -+ bool dynamic_check, -+ bool issetmem) -+{ -+ rtx_code_label *loop_label = NULL, *label; -+ int n; -+ rtx modesize; -+ int prolog_size = 0; -+ rtx mode_value; -+ -+ /* Chose proper value to copy. */ -+ if (issetmem && VECTOR_MODE_P (mode)) -+ mode_value = vec_value; -+ else -+ mode_value = value; -+ gcc_assert (GET_MODE_SIZE (mode) <= size); -+ -+ /* See if block is big or small, handle small blocks. */ -+ if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size) -+ { -+ int size2 = size; -+ loop_label = gen_label_rtx (); -+ -+ if (!*done_label) -+ *done_label = gen_label_rtx (); -+ -+ emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count), -+ 1, loop_label); -+ size2 >>= 1; -+ -+ /* Handle sizes > 3. */ -+ for (;size2 > 2; size2 >>= 1) -+ expand_small_cpymem_or_setmem (destmem, srcmem, -+ *destptr, *srcptr, -+ value, vec_value, -+ *count, -+ size2, *done_label, issetmem); -+ /* Nothing to copy? Jump to DONE_LABEL if so */ -+ emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count), -+ 1, *done_label); -+ -+ /* Do a byte copy. */ -+ destmem = change_address (destmem, QImode, *destptr); -+ if (issetmem) -+ emit_move_insn (destmem, gen_lowpart (QImode, value)); -+ else -+ { -+ srcmem = change_address (srcmem, QImode, *srcptr); -+ emit_move_insn (destmem, srcmem); -+ } -+ -+ /* Handle sizes 2 and 3. */ -+ label = ix86_expand_aligntest (*count, 2, false); -+ destmem = change_address (destmem, HImode, *destptr); -+ destmem = offset_address (destmem, *count, 1); -+ destmem = offset_address (destmem, GEN_INT (-2), 2); -+ if (issetmem) -+ emit_move_insn (destmem, gen_lowpart (HImode, value)); -+ else -+ { -+ srcmem = change_address (srcmem, HImode, *srcptr); -+ srcmem = offset_address (srcmem, *count, 1); -+ srcmem = offset_address (srcmem, GEN_INT (-2), 2); -+ emit_move_insn (destmem, srcmem); -+ } -+ -+ emit_label (label); -+ LABEL_NUSES (label) = 1; -+ emit_jump_insn (gen_jump (*done_label)); -+ emit_barrier (); -+ } -+ else -+ gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size -+ || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size); -+ -+ /* Start memcpy for COUNT >= SIZE. */ -+ if (loop_label) -+ { -+ emit_label (loop_label); -+ LABEL_NUSES (loop_label) = 1; -+ } -+ -+ /* Copy first desired_align bytes. */ -+ if (!issetmem) -+ srcmem = change_address (srcmem, mode, *srcptr); -+ destmem = change_address (destmem, mode, *destptr); -+ modesize = GEN_INT (GET_MODE_SIZE (mode)); -+ for (n = 0; prolog_size < desired_align - align; n++) -+ { -+ if (issetmem) -+ emit_move_insn (destmem, mode_value); -+ else -+ { -+ emit_move_insn (destmem, srcmem); -+ srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode)); -+ } -+ destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode)); -+ prolog_size += GET_MODE_SIZE (mode); -+ } -+ -+ -+ /* Copy last SIZE bytes. */ -+ destmem = offset_address (destmem, *count, 1); -+ destmem = offset_address (destmem, -+ GEN_INT (-size - prolog_size), -+ 1); -+ if (issetmem) -+ emit_move_insn (destmem, mode_value); -+ else -+ { -+ srcmem = offset_address (srcmem, *count, 1); -+ srcmem = offset_address (srcmem, -+ GEN_INT (-size - prolog_size), -+ 1); -+ emit_move_insn (destmem, srcmem); -+ } -+ for (n = 1; n * GET_MODE_SIZE (mode) < size; n++) -+ { -+ destmem = offset_address (destmem, modesize, 1); -+ if (issetmem) -+ emit_move_insn (destmem, mode_value); -+ else -+ { -+ srcmem = offset_address (srcmem, modesize, 1); -+ emit_move_insn (destmem, srcmem); -+ } -+ } -+ -+ /* Align destination. */ -+ if (desired_align > 1 && desired_align > align) -+ { -+ rtx saveddest = *destptr; -+ -+ gcc_assert (desired_align <= size); -+ /* Align destptr up, place it to new register. */ -+ *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr, -+ GEN_INT (prolog_size), -+ NULL_RTX, 1, OPTAB_DIRECT); -+ if (REG_P (*destptr) && REG_P (saveddest) && REG_POINTER (saveddest)) -+ REG_POINTER (*destptr) = 1; -+ *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr, -+ GEN_INT (-desired_align), -+ *destptr, 1, OPTAB_DIRECT); -+ /* See how many bytes we skipped. */ -+ saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest, -+ *destptr, -+ saveddest, 1, OPTAB_DIRECT); -+ /* Adjust srcptr and count. */ -+ if (!issetmem) -+ *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, -+ saveddest, *srcptr, 1, OPTAB_DIRECT); -+ *count = expand_simple_binop (GET_MODE (*count), PLUS, *count, -+ saveddest, *count, 1, OPTAB_DIRECT); -+ /* We copied at most size + prolog_size. */ -+ if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size)) -+ *min_size -+ = ROUND_DOWN (*min_size - size, (unsigned HOST_WIDE_INT)size); -+ else -+ *min_size = 0; -+ -+ /* Our loops always round down the block size, but for dispatch to -+ library we need precise value. */ -+ if (dynamic_check) -+ *count = expand_simple_binop (GET_MODE (*count), AND, *count, -+ GEN_INT (-size), *count, 1, OPTAB_DIRECT); -+ } -+ else -+ { -+ gcc_assert (prolog_size == 0); -+ /* Decrease count, so we won't end up copying last word twice. */ -+ if (!CONST_INT_P (*count)) -+ *count = expand_simple_binop (GET_MODE (*count), PLUS, *count, -+ constm1_rtx, *count, 1, OPTAB_DIRECT); -+ else -+ *count = GEN_INT (ROUND_DOWN (UINTVAL (*count) - 1, -+ (unsigned HOST_WIDE_INT)size)); -+ if (*min_size) -+ *min_size = ROUND_DOWN (*min_size - 1, (unsigned HOST_WIDE_INT)size); -+ } -+} -+ -+ -+/* This function is like the previous one, except here we know how many bytes -+ need to be copied. That allows us to update alignment not only of DST, which -+ is returned, but also of SRC, which is passed as a pointer for that -+ reason. */ -+static rtx -+expand_set_or_cpymem_constant_prologue (rtx dst, rtx *srcp, rtx destreg, -+ rtx srcreg, rtx value, rtx vec_value, -+ int desired_align, int align_bytes, -+ bool issetmem) -+{ -+ rtx src = NULL; -+ rtx orig_dst = dst; -+ rtx orig_src = NULL; -+ int piece_size = 1; -+ int copied_bytes = 0; -+ -+ if (!issetmem) -+ { -+ gcc_assert (srcp != NULL); -+ src = *srcp; -+ orig_src = src; -+ } -+ -+ for (piece_size = 1; -+ piece_size <= desired_align && copied_bytes < align_bytes; -+ piece_size <<= 1) -+ { -+ if (align_bytes & piece_size) -+ { -+ if (issetmem) -+ { -+ if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value))) -+ dst = emit_memset (dst, destreg, vec_value, piece_size); -+ else -+ dst = emit_memset (dst, destreg, value, piece_size); -+ } -+ else -+ dst = emit_memmov (dst, &src, destreg, srcreg, piece_size); -+ copied_bytes += piece_size; -+ } -+ } -+ if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT) -+ set_mem_align (dst, desired_align * BITS_PER_UNIT); -+ if (MEM_SIZE_KNOWN_P (orig_dst)) -+ set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes); -+ -+ if (!issetmem) -+ { -+ int src_align_bytes = get_mem_align_offset (src, desired_align -+ * BITS_PER_UNIT); -+ if (src_align_bytes >= 0) -+ src_align_bytes = desired_align - src_align_bytes; -+ if (src_align_bytes >= 0) -+ { -+ unsigned int src_align; -+ for (src_align = desired_align; src_align >= 2; src_align >>= 1) -+ { -+ if ((src_align_bytes & (src_align - 1)) -+ == (align_bytes & (src_align - 1))) -+ break; -+ } -+ if (src_align > (unsigned int) desired_align) -+ src_align = desired_align; -+ if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT) -+ set_mem_align (src, src_align * BITS_PER_UNIT); -+ } -+ if (MEM_SIZE_KNOWN_P (orig_src)) -+ set_mem_size (src, MEM_SIZE (orig_src) - align_bytes); -+ *srcp = src; -+ } -+ -+ return dst; -+} -+ -+/* Return true if ALG can be used in current context. -+ Assume we expand memset if MEMSET is true. */ -+static bool -+alg_usable_p (enum stringop_alg alg, bool memset, bool have_as) -+{ -+ if (alg == no_stringop) -+ return false; -+ if (alg == vector_loop) -+ return TARGET_SSE || TARGET_AVX; -+ /* Algorithms using the rep prefix want at least edi and ecx; -+ additionally, memset wants eax and memcpy wants esi. Don't -+ consider such algorithms if the user has appropriated those -+ registers for their own purposes, or if we have a non-default -+ address space, since some string insns cannot override the segment. */ -+ if (alg == rep_prefix_1_byte -+ || alg == rep_prefix_4_byte -+ || alg == rep_prefix_8_byte) -+ { -+ if (have_as) -+ return false; -+ if (fixed_regs[CX_REG] -+ || fixed_regs[DI_REG] -+ || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG])) -+ return false; -+ } -+ return true; -+} -+ -+/* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */ -+static enum stringop_alg -+decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, -+ unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size, -+ bool memset, bool zero_memset, bool have_as, -+ int *dynamic_check, bool *noalign, bool recur) -+{ -+ const struct stringop_algs *algs; -+ bool optimize_for_speed; -+ int max = 0; -+ const struct processor_costs *cost; -+ int i; -+ bool any_alg_usable_p = false; -+ -+ *noalign = false; -+ *dynamic_check = -1; -+ -+ /* Even if the string operation call is cold, we still might spend a lot -+ of time processing large blocks. */ -+ if (optimize_function_for_size_p (cfun) -+ || (optimize_insn_for_size_p () -+ && (max_size < 256 -+ || (expected_size != -1 && expected_size < 256)))) -+ optimize_for_speed = false; -+ else -+ optimize_for_speed = true; -+ -+ cost = optimize_for_speed ? ix86_cost : &ix86_size_cost; -+ if (memset) -+ algs = &cost->memset[TARGET_64BIT != 0]; -+ else -+ algs = &cost->memcpy[TARGET_64BIT != 0]; -+ -+ /* See maximal size for user defined algorithm. */ -+ for (i = 0; i < MAX_STRINGOP_ALGS; i++) -+ { -+ enum stringop_alg candidate = algs->size[i].alg; -+ bool usable = alg_usable_p (candidate, memset, have_as); -+ any_alg_usable_p |= usable; -+ -+ if (candidate != libcall && candidate && usable) -+ max = algs->size[i].max; -+ } -+ -+ /* If expected size is not known but max size is small enough -+ so inline version is a win, set expected size into -+ the range. */ -+ if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1) -+ && expected_size == -1) -+ expected_size = min_size / 2 + max_size / 2; -+ -+ /* If user specified the algorithm, honor it if possible. */ -+ if (ix86_stringop_alg != no_stringop -+ && alg_usable_p (ix86_stringop_alg, memset, have_as)) -+ return ix86_stringop_alg; -+ /* rep; movq or rep; movl is the smallest variant. */ -+ else if (!optimize_for_speed) -+ { -+ *noalign = true; -+ if (!count || (count & 3) || (memset && !zero_memset)) -+ return alg_usable_p (rep_prefix_1_byte, memset, have_as) -+ ? rep_prefix_1_byte : loop_1_byte; -+ else -+ return alg_usable_p (rep_prefix_4_byte, memset, have_as) -+ ? rep_prefix_4_byte : loop; -+ } -+ /* Very tiny blocks are best handled via the loop, REP is expensive to -+ setup. */ -+ else if (expected_size != -1 && expected_size < 4) -+ return loop_1_byte; -+ else if (expected_size != -1) -+ { -+ enum stringop_alg alg = libcall; -+ bool alg_noalign = false; -+ for (i = 0; i < MAX_STRINGOP_ALGS; i++) -+ { -+ /* We get here if the algorithms that were not libcall-based -+ were rep-prefix based and we are unable to use rep prefixes -+ based on global register usage. Break out of the loop and -+ use the heuristic below. */ -+ if (algs->size[i].max == 0) -+ break; -+ if (algs->size[i].max >= expected_size || algs->size[i].max == -1) -+ { -+ enum stringop_alg candidate = algs->size[i].alg; -+ -+ if (candidate != libcall -+ && alg_usable_p (candidate, memset, have_as)) -+ { -+ alg = candidate; -+ alg_noalign = algs->size[i].noalign; -+ } -+ /* Honor TARGET_INLINE_ALL_STRINGOPS by picking -+ last non-libcall inline algorithm. */ -+ if (TARGET_INLINE_ALL_STRINGOPS) -+ { -+ /* When the current size is best to be copied by a libcall, -+ but we are still forced to inline, run the heuristic below -+ that will pick code for medium sized blocks. */ -+ if (alg != libcall) -+ { -+ *noalign = alg_noalign; -+ return alg; -+ } -+ else if (!any_alg_usable_p) -+ break; -+ } -+ else if (alg_usable_p (candidate, memset, have_as)) -+ { -+ *noalign = algs->size[i].noalign; -+ return candidate; -+ } -+ } -+ } -+ } -+ /* When asked to inline the call anyway, try to pick meaningful choice. -+ We look for maximal size of block that is faster to copy by hand and -+ take blocks of at most of that size guessing that average size will -+ be roughly half of the block. -+ -+ If this turns out to be bad, we might simply specify the preferred -+ choice in ix86_costs. */ -+ if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY) -+ && (algs->unknown_size == libcall -+ || !alg_usable_p (algs->unknown_size, memset, have_as))) -+ { -+ enum stringop_alg alg; -+ HOST_WIDE_INT new_expected_size = (max > 0 ? max : 4096) / 2; -+ -+ /* If there aren't any usable algorithms or if recursing already, -+ then recursing on smaller sizes or same size isn't going to -+ find anything. Just return the simple byte-at-a-time copy loop. */ -+ if (!any_alg_usable_p || recur) -+ { -+ /* Pick something reasonable. */ -+ if (TARGET_INLINE_STRINGOPS_DYNAMICALLY && !recur) -+ *dynamic_check = 128; -+ return loop_1_byte; -+ } -+ alg = decide_alg (count, new_expected_size, min_size, max_size, memset, -+ zero_memset, have_as, dynamic_check, noalign, true); -+ gcc_assert (*dynamic_check == -1); -+ if (TARGET_INLINE_STRINGOPS_DYNAMICALLY) -+ *dynamic_check = max; -+ else -+ gcc_assert (alg != libcall); -+ return alg; -+ } -+ return (alg_usable_p (algs->unknown_size, memset, have_as) -+ ? algs->unknown_size : libcall); -+} -+ -+/* Decide on alignment. We know that the operand is already aligned to ALIGN -+ (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */ -+static int -+decide_alignment (int align, -+ enum stringop_alg alg, -+ int expected_size, -+ machine_mode move_mode) -+{ -+ int desired_align = 0; -+ -+ gcc_assert (alg != no_stringop); -+ -+ if (alg == libcall) -+ return 0; -+ if (move_mode == VOIDmode) -+ return 0; -+ -+ desired_align = GET_MODE_SIZE (move_mode); -+ /* PentiumPro has special logic triggering for 8 byte aligned blocks. -+ copying whole cacheline at once. */ -+ if (TARGET_PENTIUMPRO -+ && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte)) -+ desired_align = 8; -+ -+ if (optimize_size) -+ desired_align = 1; -+ if (desired_align < align) -+ desired_align = align; -+ if (expected_size != -1 && expected_size < 4) -+ desired_align = align; -+ -+ return desired_align; -+} -+ -+ -+/* Helper function for memcpy. For QImode value 0xXY produce -+ 0xXYXYXYXY of wide specified by MODE. This is essentially -+ a * 0x10101010, but we can do slightly better than -+ synth_mult by unwinding the sequence by hand on CPUs with -+ slow multiply. */ -+static rtx -+promote_duplicated_reg (machine_mode mode, rtx val) -+{ -+ machine_mode valmode = GET_MODE (val); -+ rtx tmp; -+ int nops = mode == DImode ? 3 : 2; -+ -+ gcc_assert (mode == SImode || mode == DImode || val == const0_rtx); -+ if (val == const0_rtx) -+ return copy_to_mode_reg (mode, CONST0_RTX (mode)); -+ if (CONST_INT_P (val)) -+ { -+ HOST_WIDE_INT v = INTVAL (val) & 255; -+ -+ v |= v << 8; -+ v |= v << 16; -+ if (mode == DImode) -+ v |= (v << 16) << 16; -+ return copy_to_mode_reg (mode, gen_int_mode (v, mode)); -+ } -+ -+ if (valmode == VOIDmode) -+ valmode = QImode; -+ if (valmode != QImode) -+ val = gen_lowpart (QImode, val); -+ if (mode == QImode) -+ return val; -+ if (!TARGET_PARTIAL_REG_STALL) -+ nops--; -+ if (ix86_cost->mult_init[mode == DImode ? 3 : 2] -+ + ix86_cost->mult_bit * (mode == DImode ? 8 : 4) -+ <= (ix86_cost->shift_const + ix86_cost->add) * nops -+ + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0))) -+ { -+ rtx reg = convert_modes (mode, QImode, val, true); -+ tmp = promote_duplicated_reg (mode, const1_rtx); -+ return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1, -+ OPTAB_DIRECT); -+ } -+ else -+ { -+ rtx reg = convert_modes (mode, QImode, val, true); -+ -+ if (!TARGET_PARTIAL_REG_STALL) -+ if (mode == SImode) -+ emit_insn (gen_insvsi_1 (reg, reg)); -+ else -+ emit_insn (gen_insvdi_1 (reg, reg)); -+ else -+ { -+ tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8), -+ NULL, 1, OPTAB_DIRECT); -+ reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, -+ OPTAB_DIRECT); -+ } -+ tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16), -+ NULL, 1, OPTAB_DIRECT); -+ reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT); -+ if (mode == SImode) -+ return reg; -+ tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32), -+ NULL, 1, OPTAB_DIRECT); -+ reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT); -+ return reg; -+ } -+} -+ -+/* Duplicate value VAL using promote_duplicated_reg into maximal size that will -+ be needed by main loop copying SIZE_NEEDED chunks and prologue getting -+ alignment from ALIGN to DESIRED_ALIGN. */ -+static rtx -+promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, -+ int align) -+{ -+ rtx promoted_val; -+ -+ if (TARGET_64BIT -+ && (size_needed > 4 || (desired_align > align && desired_align > 4))) -+ promoted_val = promote_duplicated_reg (DImode, val); -+ else if (size_needed > 2 || (desired_align > align && desired_align > 2)) -+ promoted_val = promote_duplicated_reg (SImode, val); -+ else if (size_needed > 1 || (desired_align > align && desired_align > 1)) -+ promoted_val = promote_duplicated_reg (HImode, val); -+ else -+ promoted_val = val; -+ -+ return promoted_val; -+} -+ -+/* Copy the address to a Pmode register. This is used for x32 to -+ truncate DImode TLS address to a SImode register. */ -+ -+static rtx -+ix86_copy_addr_to_reg (rtx addr) -+{ -+ rtx reg; -+ if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode) -+ { -+ reg = copy_addr_to_reg (addr); -+ REG_POINTER (reg) = 1; -+ return reg; -+ } -+ else -+ { -+ gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode); -+ reg = copy_to_mode_reg (DImode, addr); -+ REG_POINTER (reg) = 1; -+ return gen_rtx_SUBREG (SImode, reg, 0); -+ } -+} -+ -+/* Expand string move (memcpy) ot store (memset) operation. Use i386 string -+ operations when profitable. The code depends upon architecture, block size -+ and alignment, but always has one of the following overall structures: -+ -+ Aligned move sequence: -+ -+ 1) Prologue guard: Conditional that jumps up to epilogues for small -+ blocks that can be handled by epilogue alone. This is faster -+ but also needed for correctness, since prologue assume the block -+ is larger than the desired alignment. -+ -+ Optional dynamic check for size and libcall for large -+ blocks is emitted here too, with -minline-stringops-dynamically. -+ -+ 2) Prologue: copy first few bytes in order to get destination -+ aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less -+ than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be -+ copied. We emit either a jump tree on power of two sized -+ blocks, or a byte loop. -+ -+ 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks -+ with specified algorithm. -+ -+ 4) Epilogue: code copying tail of the block that is too small to be -+ handled by main body (or up to size guarded by prologue guard). -+ -+ Misaligned move sequence -+ -+ 1) missaligned move prologue/epilogue containing: -+ a) Prologue handling small memory blocks and jumping to done_label -+ (skipped if blocks are known to be large enough) -+ b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is -+ needed by single possibly misaligned move -+ (skipped if alignment is not needed) -+ c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves -+ -+ 2) Zero size guard dispatching to done_label, if needed -+ -+ 3) dispatch to library call, if needed, -+ -+ 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks -+ with specified algorithm. */ -+bool -+ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp, -+ rtx align_exp, rtx expected_align_exp, -+ rtx expected_size_exp, rtx min_size_exp, -+ rtx max_size_exp, rtx probable_max_size_exp, -+ bool issetmem) -+{ -+ rtx destreg; -+ rtx srcreg = NULL; -+ rtx_code_label *label = NULL; -+ rtx tmp; -+ rtx_code_label *jump_around_label = NULL; -+ HOST_WIDE_INT align = 1; -+ unsigned HOST_WIDE_INT count = 0; -+ HOST_WIDE_INT expected_size = -1; -+ int size_needed = 0, epilogue_size_needed; -+ int desired_align = 0, align_bytes = 0; -+ enum stringop_alg alg; -+ rtx promoted_val = NULL; -+ rtx vec_promoted_val = NULL; -+ bool force_loopy_epilogue = false; -+ int dynamic_check; -+ bool need_zero_guard = false; -+ bool noalign; -+ machine_mode move_mode = VOIDmode; -+ machine_mode wider_mode; -+ int unroll_factor = 1; -+ /* TODO: Once value ranges are available, fill in proper data. */ -+ unsigned HOST_WIDE_INT min_size = 0; -+ unsigned HOST_WIDE_INT max_size = -1; -+ unsigned HOST_WIDE_INT probable_max_size = -1; -+ bool misaligned_prologue_used = false; -+ bool have_as; -+ -+ if (CONST_INT_P (align_exp)) -+ align = INTVAL (align_exp); -+ /* i386 can do misaligned access on reasonably increased cost. */ -+ if (CONST_INT_P (expected_align_exp) -+ && INTVAL (expected_align_exp) > align) -+ align = INTVAL (expected_align_exp); -+ /* ALIGN is the minimum of destination and source alignment, but we care here -+ just about destination alignment. */ -+ else if (!issetmem -+ && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT) -+ align = MEM_ALIGN (dst) / BITS_PER_UNIT; -+ -+ if (CONST_INT_P (count_exp)) -+ { -+ min_size = max_size = probable_max_size = count = expected_size -+ = INTVAL (count_exp); -+ /* When COUNT is 0, there is nothing to do. */ -+ if (!count) -+ return true; -+ } -+ else -+ { -+ if (min_size_exp) -+ min_size = INTVAL (min_size_exp); -+ if (max_size_exp) -+ max_size = INTVAL (max_size_exp); -+ if (probable_max_size_exp) -+ probable_max_size = INTVAL (probable_max_size_exp); -+ if (CONST_INT_P (expected_size_exp)) -+ expected_size = INTVAL (expected_size_exp); -+ } -+ -+ /* Make sure we don't need to care about overflow later on. */ -+ if (count > (HOST_WIDE_INT_1U << 30)) -+ return false; -+ -+ have_as = !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst)); -+ if (!issetmem) -+ have_as |= !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src)); -+ -+ /* Step 0: Decide on preferred algorithm, desired alignment and -+ size of chunks to be copied by main loop. */ -+ alg = decide_alg (count, expected_size, min_size, probable_max_size, -+ issetmem, -+ issetmem && val_exp == const0_rtx, have_as, -+ &dynamic_check, &noalign, false); -+ -+ if (dump_file) -+ fprintf (dump_file, "Selected stringop expansion strategy: %s\n", -+ stringop_alg_names[alg]); -+ -+ if (alg == libcall) -+ return false; -+ gcc_assert (alg != no_stringop); -+ -+ /* For now vector-version of memset is generated only for memory zeroing, as -+ creating of promoted vector value is very cheap in this case. */ -+ if (issetmem && alg == vector_loop && val_exp != const0_rtx) -+ alg = unrolled_loop; -+ -+ if (!count) -+ count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp); -+ destreg = ix86_copy_addr_to_reg (XEXP (dst, 0)); -+ if (!issetmem) -+ srcreg = ix86_copy_addr_to_reg (XEXP (src, 0)); -+ -+ unroll_factor = 1; -+ move_mode = word_mode; -+ switch (alg) -+ { -+ case libcall: -+ case no_stringop: -+ case last_alg: -+ gcc_unreachable (); -+ case loop_1_byte: -+ need_zero_guard = true; -+ move_mode = QImode; -+ break; -+ case loop: -+ need_zero_guard = true; -+ break; -+ case unrolled_loop: -+ need_zero_guard = true; -+ unroll_factor = (TARGET_64BIT ? 4 : 2); -+ break; -+ case vector_loop: -+ need_zero_guard = true; -+ unroll_factor = 4; -+ /* Find the widest supported mode. */ -+ move_mode = word_mode; -+ while (GET_MODE_WIDER_MODE (move_mode).exists (&wider_mode) -+ && optab_handler (mov_optab, wider_mode) != CODE_FOR_nothing) -+ move_mode = wider_mode; -+ -+ if (TARGET_AVX128_OPTIMAL && GET_MODE_BITSIZE (move_mode) > 128) -+ move_mode = TImode; -+ -+ /* Find the corresponding vector mode with the same size as MOVE_MODE. -+ MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */ -+ if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode)) -+ { -+ int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode); -+ if (!mode_for_vector (word_mode, nunits).exists (&move_mode) -+ || optab_handler (mov_optab, move_mode) == CODE_FOR_nothing) -+ move_mode = word_mode; -+ } -+ gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing); -+ break; -+ case rep_prefix_8_byte: -+ move_mode = DImode; -+ break; -+ case rep_prefix_4_byte: -+ move_mode = SImode; -+ break; -+ case rep_prefix_1_byte: -+ move_mode = QImode; -+ break; -+ } -+ size_needed = GET_MODE_SIZE (move_mode) * unroll_factor; -+ epilogue_size_needed = size_needed; -+ -+ /* If we are going to call any library calls conditionally, make sure any -+ pending stack adjustment happen before the first conditional branch, -+ otherwise they will be emitted before the library call only and won't -+ happen from the other branches. */ -+ if (dynamic_check != -1) -+ do_pending_stack_adjust (); -+ -+ desired_align = decide_alignment (align, alg, expected_size, move_mode); -+ if (!TARGET_ALIGN_STRINGOPS || noalign) -+ align = desired_align; -+ -+ /* Step 1: Prologue guard. */ -+ -+ /* Alignment code needs count to be in register. */ -+ if (CONST_INT_P (count_exp) && desired_align > align) -+ { -+ if (INTVAL (count_exp) > desired_align -+ && INTVAL (count_exp) > size_needed) -+ { -+ align_bytes -+ = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT); -+ if (align_bytes <= 0) -+ align_bytes = 0; -+ else -+ align_bytes = desired_align - align_bytes; -+ } -+ if (align_bytes == 0) -+ count_exp = force_reg (counter_mode (count_exp), count_exp); -+ } -+ gcc_assert (desired_align >= 1 && align >= 1); -+ -+ /* Misaligned move sequences handle both prologue and epilogue at once. -+ Default code generation results in a smaller code for large alignments -+ and also avoids redundant job when sizes are known precisely. */ -+ misaligned_prologue_used -+ = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES -+ && MAX (desired_align, epilogue_size_needed) <= 32 -+ && desired_align <= epilogue_size_needed -+ && ((desired_align > align && !align_bytes) -+ || (!count && epilogue_size_needed > 1))); -+ -+ /* Do the cheap promotion to allow better CSE across the -+ main loop and epilogue (ie one load of the big constant in the -+ front of all code. -+ For now the misaligned move sequences do not have fast path -+ without broadcasting. */ -+ if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used))) -+ { -+ if (alg == vector_loop) -+ { -+ gcc_assert (val_exp == const0_rtx); -+ vec_promoted_val = promote_duplicated_reg (move_mode, val_exp); -+ promoted_val = promote_duplicated_reg_to_size (val_exp, -+ GET_MODE_SIZE (word_mode), -+ desired_align, align); -+ } -+ else -+ { -+ promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed, -+ desired_align, align); -+ } -+ } -+ /* Misaligned move sequences handles both prologues and epilogues at once. -+ Default code generation results in smaller code for large alignments and -+ also avoids redundant job when sizes are known precisely. */ -+ if (misaligned_prologue_used) -+ { -+ /* Misaligned move prologue handled small blocks by itself. */ -+ expand_set_or_cpymem_prologue_epilogue_by_misaligned_moves -+ (dst, src, &destreg, &srcreg, -+ move_mode, promoted_val, vec_promoted_val, -+ &count_exp, -+ &jump_around_label, -+ desired_align < align -+ ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed, -+ desired_align, align, &min_size, dynamic_check, issetmem); -+ if (!issetmem) -+ src = change_address (src, BLKmode, srcreg); -+ dst = change_address (dst, BLKmode, destreg); -+ set_mem_align (dst, desired_align * BITS_PER_UNIT); -+ epilogue_size_needed = 0; -+ if (need_zero_guard -+ && min_size < (unsigned HOST_WIDE_INT) size_needed) -+ { -+ /* It is possible that we copied enough so the main loop will not -+ execute. */ -+ gcc_assert (size_needed > 1); -+ if (jump_around_label == NULL_RTX) -+ jump_around_label = gen_label_rtx (); -+ emit_cmp_and_jump_insns (count_exp, -+ GEN_INT (size_needed), -+ LTU, 0, counter_mode (count_exp), 1, jump_around_label); -+ if (expected_size == -1 -+ || expected_size < (desired_align - align) / 2 + size_needed) -+ predict_jump (REG_BR_PROB_BASE * 20 / 100); -+ else -+ predict_jump (REG_BR_PROB_BASE * 60 / 100); -+ } -+ } -+ /* Ensure that alignment prologue won't copy past end of block. */ -+ else if (size_needed > 1 || (desired_align > 1 && desired_align > align)) -+ { -+ epilogue_size_needed = MAX (size_needed - 1, desired_align - align); -+ /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes. -+ Make sure it is power of 2. */ -+ epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1); -+ -+ /* To improve performance of small blocks, we jump around the VAL -+ promoting mode. This mean that if the promoted VAL is not constant, -+ we might not use it in the epilogue and have to use byte -+ loop variant. */ -+ if (issetmem && epilogue_size_needed > 2 && !promoted_val) -+ force_loopy_epilogue = true; -+ if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed) -+ || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed) -+ { -+ /* If main algorithm works on QImode, no epilogue is needed. -+ For small sizes just don't align anything. */ -+ if (size_needed == 1) -+ desired_align = align; -+ else -+ goto epilogue; -+ } -+ else if (!count -+ && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed) -+ { -+ label = gen_label_rtx (); -+ emit_cmp_and_jump_insns (count_exp, -+ GEN_INT (epilogue_size_needed), -+ LTU, 0, counter_mode (count_exp), 1, label); -+ if (expected_size == -1 || expected_size < epilogue_size_needed) -+ predict_jump (REG_BR_PROB_BASE * 60 / 100); -+ else -+ predict_jump (REG_BR_PROB_BASE * 20 / 100); -+ } -+ } -+ -+ /* Emit code to decide on runtime whether library call or inline should be -+ used. */ -+ if (dynamic_check != -1) -+ { -+ if (!issetmem && CONST_INT_P (count_exp)) -+ { -+ if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check) -+ { -+ emit_block_copy_via_libcall (dst, src, count_exp); -+ count_exp = const0_rtx; -+ goto epilogue; -+ } -+ } -+ else -+ { -+ rtx_code_label *hot_label = gen_label_rtx (); -+ if (jump_around_label == NULL_RTX) -+ jump_around_label = gen_label_rtx (); -+ emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1), -+ LEU, 0, counter_mode (count_exp), -+ 1, hot_label); -+ predict_jump (REG_BR_PROB_BASE * 90 / 100); -+ if (issetmem) -+ set_storage_via_libcall (dst, count_exp, val_exp); -+ else -+ emit_block_copy_via_libcall (dst, src, count_exp); -+ emit_jump (jump_around_label); -+ emit_label (hot_label); -+ } -+ } -+ -+ /* Step 2: Alignment prologue. */ -+ /* Do the expensive promotion once we branched off the small blocks. */ -+ if (issetmem && !promoted_val) -+ promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed, -+ desired_align, align); -+ -+ if (desired_align > align && !misaligned_prologue_used) -+ { -+ if (align_bytes == 0) -+ { -+ /* Except for the first move in prologue, we no longer know -+ constant offset in aliasing info. It don't seems to worth -+ the pain to maintain it for the first move, so throw away -+ the info early. */ -+ dst = change_address (dst, BLKmode, destreg); -+ if (!issetmem) -+ src = change_address (src, BLKmode, srcreg); -+ dst = expand_set_or_cpymem_prologue (dst, src, destreg, srcreg, -+ promoted_val, vec_promoted_val, -+ count_exp, align, desired_align, -+ issetmem); -+ /* At most desired_align - align bytes are copied. */ -+ if (min_size < (unsigned)(desired_align - align)) -+ min_size = 0; -+ else -+ min_size -= desired_align - align; -+ } -+ else -+ { -+ /* If we know how many bytes need to be stored before dst is -+ sufficiently aligned, maintain aliasing info accurately. */ -+ dst = expand_set_or_cpymem_constant_prologue (dst, &src, destreg, -+ srcreg, -+ promoted_val, -+ vec_promoted_val, -+ desired_align, -+ align_bytes, -+ issetmem); -+ -+ count_exp = plus_constant (counter_mode (count_exp), -+ count_exp, -align_bytes); -+ count -= align_bytes; -+ min_size -= align_bytes; -+ max_size -= align_bytes; -+ } -+ if (need_zero_guard -+ && min_size < (unsigned HOST_WIDE_INT) size_needed -+ && (count < (unsigned HOST_WIDE_INT) size_needed -+ || (align_bytes == 0 -+ && count < ((unsigned HOST_WIDE_INT) size_needed -+ + desired_align - align)))) -+ { -+ /* It is possible that we copied enough so the main loop will not -+ execute. */ -+ gcc_assert (size_needed > 1); -+ if (label == NULL_RTX) -+ label = gen_label_rtx (); -+ emit_cmp_and_jump_insns (count_exp, -+ GEN_INT (size_needed), -+ LTU, 0, counter_mode (count_exp), 1, label); -+ if (expected_size == -1 -+ || expected_size < (desired_align - align) / 2 + size_needed) -+ predict_jump (REG_BR_PROB_BASE * 20 / 100); -+ else -+ predict_jump (REG_BR_PROB_BASE * 60 / 100); -+ } -+ } -+ if (label && size_needed == 1) -+ { -+ emit_label (label); -+ LABEL_NUSES (label) = 1; -+ label = NULL; -+ epilogue_size_needed = 1; -+ if (issetmem) -+ promoted_val = val_exp; -+ } -+ else if (label == NULL_RTX && !misaligned_prologue_used) -+ epilogue_size_needed = size_needed; -+ -+ /* Step 3: Main loop. */ -+ -+ switch (alg) -+ { -+ case libcall: -+ case no_stringop: -+ case last_alg: -+ gcc_unreachable (); -+ case loop_1_byte: -+ case loop: -+ case unrolled_loop: -+ expand_set_or_cpymem_via_loop (dst, src, destreg, srcreg, promoted_val, -+ count_exp, move_mode, unroll_factor, -+ expected_size, issetmem); -+ break; -+ case vector_loop: -+ expand_set_or_cpymem_via_loop (dst, src, destreg, srcreg, -+ vec_promoted_val, count_exp, move_mode, -+ unroll_factor, expected_size, issetmem); -+ break; -+ case rep_prefix_8_byte: -+ case rep_prefix_4_byte: -+ case rep_prefix_1_byte: -+ expand_set_or_cpymem_via_rep (dst, src, destreg, srcreg, promoted_val, -+ val_exp, count_exp, move_mode, issetmem); -+ break; -+ } -+ /* Adjust properly the offset of src and dest memory for aliasing. */ -+ if (CONST_INT_P (count_exp)) -+ { -+ if (!issetmem) -+ src = adjust_automodify_address_nv (src, BLKmode, srcreg, -+ (count / size_needed) * size_needed); -+ dst = adjust_automodify_address_nv (dst, BLKmode, destreg, -+ (count / size_needed) * size_needed); -+ } -+ else -+ { -+ if (!issetmem) -+ src = change_address (src, BLKmode, srcreg); -+ dst = change_address (dst, BLKmode, destreg); -+ } -+ -+ /* Step 4: Epilogue to copy the remaining bytes. */ -+ epilogue: -+ if (label) -+ { -+ /* When the main loop is done, COUNT_EXP might hold original count, -+ while we want to copy only COUNT_EXP & SIZE_NEEDED bytes. -+ Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED -+ bytes. Compensate if needed. */ -+ -+ if (size_needed < epilogue_size_needed) -+ { -+ tmp = expand_simple_binop (counter_mode (count_exp), AND, count_exp, -+ GEN_INT (size_needed - 1), count_exp, 1, -+ OPTAB_DIRECT); -+ if (tmp != count_exp) -+ emit_move_insn (count_exp, tmp); -+ } -+ emit_label (label); -+ LABEL_NUSES (label) = 1; -+ } -+ -+ if (count_exp != const0_rtx && epilogue_size_needed > 1) -+ { -+ if (force_loopy_epilogue) -+ expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp, -+ epilogue_size_needed); -+ else -+ { -+ if (issetmem) -+ expand_setmem_epilogue (dst, destreg, promoted_val, -+ vec_promoted_val, count_exp, -+ epilogue_size_needed); -+ else -+ expand_cpymem_epilogue (dst, src, destreg, srcreg, count_exp, -+ epilogue_size_needed); -+ } -+ } -+ if (jump_around_label) -+ emit_label (jump_around_label); -+ return true; -+} -+ -+ -+/* Expand the appropriate insns for doing strlen if not just doing -+ repnz; scasb -+ -+ out = result, initialized with the start address -+ align_rtx = alignment of the address. -+ scratch = scratch register, initialized with the startaddress when -+ not aligned, otherwise undefined -+ -+ This is just the body. It needs the initializations mentioned above and -+ some address computing at the end. These things are done in i386.md. */ -+ -+static void -+ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx) -+{ -+ int align; -+ rtx tmp; -+ rtx_code_label *align_2_label = NULL; -+ rtx_code_label *align_3_label = NULL; -+ rtx_code_label *align_4_label = gen_label_rtx (); -+ rtx_code_label *end_0_label = gen_label_rtx (); -+ rtx mem; -+ rtx tmpreg = gen_reg_rtx (SImode); -+ rtx scratch = gen_reg_rtx (SImode); -+ rtx cmp; -+ -+ align = 0; -+ if (CONST_INT_P (align_rtx)) -+ align = INTVAL (align_rtx); -+ -+ /* Loop to check 1..3 bytes for null to get an aligned pointer. */ -+ -+ /* Is there a known alignment and is it less than 4? */ -+ if (align < 4) -+ { -+ rtx scratch1 = gen_reg_rtx (Pmode); -+ emit_move_insn (scratch1, out); -+ /* Is there a known alignment and is it not 2? */ -+ if (align != 2) -+ { -+ align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */ -+ align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */ -+ -+ /* Leave just the 3 lower bits. */ -+ align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3), -+ NULL_RTX, 0, OPTAB_WIDEN); -+ -+ emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL, -+ Pmode, 1, align_4_label); -+ emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL, -+ Pmode, 1, align_2_label); -+ emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL, -+ Pmode, 1, align_3_label); -+ } -+ else -+ { -+ /* Since the alignment is 2, we have to check 2 or 0 bytes; -+ check if is aligned to 4 - byte. */ -+ -+ align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx, -+ NULL_RTX, 0, OPTAB_WIDEN); -+ -+ emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL, -+ Pmode, 1, align_4_label); -+ } -+ -+ mem = change_address (src, QImode, out); -+ -+ /* Now compare the bytes. */ -+ -+ /* Compare the first n unaligned byte on a byte per byte basis. */ -+ emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, -+ QImode, 1, end_0_label); -+ -+ /* Increment the address. */ -+ emit_insn (ix86_gen_add3 (out, out, const1_rtx)); -+ -+ /* Not needed with an alignment of 2 */ -+ if (align != 2) -+ { -+ emit_label (align_2_label); -+ -+ emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1, -+ end_0_label); -+ -+ emit_insn (ix86_gen_add3 (out, out, const1_rtx)); -+ -+ emit_label (align_3_label); -+ } -+ -+ emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1, -+ end_0_label); -+ -+ emit_insn (ix86_gen_add3 (out, out, const1_rtx)); -+ } -+ -+ /* Generate loop to check 4 bytes at a time. It is not a good idea to -+ align this loop. It gives only huge programs, but does not help to -+ speed up. */ -+ emit_label (align_4_label); -+ -+ mem = change_address (src, SImode, out); -+ emit_move_insn (scratch, mem); -+ emit_insn (ix86_gen_add3 (out, out, GEN_INT (4))); -+ -+ /* This formula yields a nonzero result iff one of the bytes is zero. -+ This saves three branches inside loop and many cycles. */ -+ -+ emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101))); -+ emit_insn (gen_one_cmplsi2 (scratch, scratch)); -+ emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch)); -+ emit_insn (gen_andsi3 (tmpreg, tmpreg, -+ gen_int_mode (0x80808080, SImode))); -+ emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1, -+ align_4_label); -+ -+ if (TARGET_CMOVE) -+ { -+ rtx reg = gen_reg_rtx (SImode); -+ rtx reg2 = gen_reg_rtx (Pmode); -+ emit_move_insn (reg, tmpreg); -+ emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16))); -+ -+ /* If zero is not in the first two bytes, move two bytes forward. */ -+ emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080))); -+ tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); -+ tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); -+ emit_insn (gen_rtx_SET (tmpreg, -+ gen_rtx_IF_THEN_ELSE (SImode, tmp, -+ reg, -+ tmpreg))); -+ /* Emit lea manually to avoid clobbering of flags. */ -+ emit_insn (gen_rtx_SET (reg2, gen_rtx_PLUS (Pmode, out, const2_rtx))); -+ -+ tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); -+ tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); -+ emit_insn (gen_rtx_SET (out, -+ gen_rtx_IF_THEN_ELSE (Pmode, tmp, -+ reg2, -+ out))); -+ } -+ else -+ { -+ rtx_code_label *end_2_label = gen_label_rtx (); -+ /* Is zero in the first two bytes? */ -+ -+ emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080))); -+ tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); -+ tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx); -+ tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, -+ gen_rtx_LABEL_REF (VOIDmode, end_2_label), -+ pc_rtx); -+ tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); -+ JUMP_LABEL (tmp) = end_2_label; -+ -+ /* Not in the first two. Move two bytes forward. */ -+ emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16))); -+ emit_insn (ix86_gen_add3 (out, out, const2_rtx)); -+ -+ emit_label (end_2_label); -+ -+ } -+ -+ /* Avoid branch in fixing the byte. */ -+ tmpreg = gen_lowpart (QImode, tmpreg); -+ emit_insn (gen_addqi3_cconly_overflow (tmpreg, tmpreg)); -+ tmp = gen_rtx_REG (CCmode, FLAGS_REG); -+ cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx); -+ emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp)); -+ -+ emit_label (end_0_label); -+} -+ -+/* Expand strlen. */ -+ -+bool -+ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align) -+{ -+if (TARGET_UNROLL_STRLEN -+ && TARGET_INLINE_ALL_STRINGOPS -+ && eoschar == const0_rtx -+ && optimize > 1) -+ { -+ /* The generic case of strlen expander is long. Avoid it's -+ expanding unless TARGET_INLINE_ALL_STRINGOPS. */ -+ rtx addr = force_reg (Pmode, XEXP (src, 0)); -+ /* Well it seems that some optimizer does not combine a call like -+ foo(strlen(bar), strlen(bar)); -+ when the move and the subtraction is done here. It does calculate -+ the length just once when these instructions are done inside of -+ output_strlen_unroll(). But I think since &bar[strlen(bar)] is -+ often used and I use one fewer register for the lifetime of -+ output_strlen_unroll() this is better. */ -+ -+ emit_move_insn (out, addr); -+ -+ ix86_expand_strlensi_unroll_1 (out, src, align); -+ -+ /* strlensi_unroll_1 returns the address of the zero at the end of -+ the string, like memchr(), so compute the length by subtracting -+ the start address. */ -+ emit_insn (ix86_gen_sub3 (out, out, addr)); -+ return true; -+ } -+ else -+ return false; -+} -+ -+/* For given symbol (function) construct code to compute address of it's PLT -+ entry in large x86-64 PIC model. */ -+ -+static rtx -+construct_plt_address (rtx symbol) -+{ -+ rtx tmp, unspec; -+ -+ gcc_assert (GET_CODE (symbol) == SYMBOL_REF); -+ gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF); -+ gcc_assert (Pmode == DImode); -+ -+ tmp = gen_reg_rtx (Pmode); -+ unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF); -+ -+ emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec)); -+ emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx)); -+ return tmp; -+} -+ -+/* Additional registers that are clobbered by SYSV calls. */ -+ -+static int const x86_64_ms_sysv_extra_clobbered_registers -+ [NUM_X86_64_MS_CLOBBERED_REGS] = -+{ -+ SI_REG, DI_REG, -+ XMM6_REG, XMM7_REG, -+ XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG, -+ XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG -+}; -+ -+rtx_insn * -+ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, -+ rtx callarg2, -+ rtx pop, bool sibcall) -+{ -+ rtx vec[3]; -+ rtx use = NULL, call; -+ unsigned int vec_len = 0; -+ tree fndecl; -+ -+ if (GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF) -+ { -+ fndecl = SYMBOL_REF_DECL (XEXP (fnaddr, 0)); -+ if (fndecl -+ && (lookup_attribute ("interrupt", -+ TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))) -+ error ("interrupt service routine cannot be called directly"); -+ } -+ else -+ fndecl = NULL_TREE; -+ -+ if (pop == const0_rtx) -+ pop = NULL; -+ gcc_assert (!TARGET_64BIT || !pop); -+ -+ if (TARGET_MACHO && !TARGET_64BIT) -+ { -+#if TARGET_MACHO -+ if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF) -+ fnaddr = machopic_indirect_call_target (fnaddr); -+#endif -+ } -+ else -+ { -+ /* Static functions and indirect calls don't need the pic register. Also, -+ check if PLT was explicitly avoided via no-plt or "noplt" attribute, making -+ it an indirect call. */ -+ rtx addr = XEXP (fnaddr, 0); -+ if (flag_pic -+ && GET_CODE (addr) == SYMBOL_REF -+ && !SYMBOL_REF_LOCAL_P (addr)) -+ { -+ if (flag_plt -+ && (SYMBOL_REF_DECL (addr) == NULL_TREE -+ || !lookup_attribute ("noplt", -+ DECL_ATTRIBUTES (SYMBOL_REF_DECL (addr))))) -+ { -+ if (!TARGET_64BIT -+ || (ix86_cmodel == CM_LARGE_PIC -+ && DEFAULT_ABI != MS_ABI)) -+ { -+ use_reg (&use, gen_rtx_REG (Pmode, -+ REAL_PIC_OFFSET_TABLE_REGNUM)); -+ if (ix86_use_pseudo_pic_reg ()) -+ emit_move_insn (gen_rtx_REG (Pmode, -+ REAL_PIC_OFFSET_TABLE_REGNUM), -+ pic_offset_table_rtx); -+ } -+ } -+ else if (!TARGET_PECOFF && !TARGET_MACHO) -+ { -+ if (TARGET_64BIT) -+ { -+ fnaddr = gen_rtx_UNSPEC (Pmode, -+ gen_rtvec (1, addr), -+ UNSPEC_GOTPCREL); -+ fnaddr = gen_rtx_CONST (Pmode, fnaddr); -+ } -+ else -+ { -+ fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), -+ UNSPEC_GOT); -+ fnaddr = gen_rtx_CONST (Pmode, fnaddr); -+ fnaddr = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, -+ fnaddr); -+ } -+ fnaddr = gen_const_mem (Pmode, fnaddr); -+ /* Pmode may not be the same as word_mode for x32, which -+ doesn't support indirect branch via 32-bit memory slot. -+ Since x32 GOT slot is 64 bit with zero upper 32 bits, -+ indirect branch via x32 GOT slot is OK. */ -+ if (GET_MODE (fnaddr) != word_mode) -+ fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr); -+ fnaddr = gen_rtx_MEM (QImode, fnaddr); -+ } -+ } -+ } -+ -+ /* Skip setting up RAX register for -mskip-rax-setup when there are no -+ parameters passed in vector registers. */ -+ if (TARGET_64BIT -+ && (INTVAL (callarg2) > 0 -+ || (INTVAL (callarg2) == 0 -+ && (TARGET_SSE || !flag_skip_rax_setup)))) -+ { -+ rtx al = gen_rtx_REG (QImode, AX_REG); -+ emit_move_insn (al, callarg2); -+ use_reg (&use, al); -+ } -+ -+ if (ix86_cmodel == CM_LARGE_PIC -+ && !TARGET_PECOFF -+ && MEM_P (fnaddr) -+ && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF -+ && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode)) -+ fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0))); -+ /* Since x32 GOT slot is 64 bit with zero upper 32 bits, indirect -+ branch via x32 GOT slot is OK. */ -+ else if (!(TARGET_X32 -+ && MEM_P (fnaddr) -+ && GET_CODE (XEXP (fnaddr, 0)) == ZERO_EXTEND -+ && GOT_memory_operand (XEXP (XEXP (fnaddr, 0), 0), Pmode)) -+ && (sibcall -+ ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode) -+ : !call_insn_operand (XEXP (fnaddr, 0), word_mode))) -+ { -+ fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1); -+ fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr)); -+ } -+ -+ call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1); -+ -+ if (retval) -+ call = gen_rtx_SET (retval, call); -+ vec[vec_len++] = call; -+ -+ if (pop) -+ { -+ pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop); -+ pop = gen_rtx_SET (stack_pointer_rtx, pop); -+ vec[vec_len++] = pop; -+ } -+ -+ if (cfun->machine->no_caller_saved_registers -+ && (!fndecl -+ || (!TREE_THIS_VOLATILE (fndecl) -+ && !lookup_attribute ("no_caller_saved_registers", -+ TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))))) -+ { -+ static const char ix86_call_used_regs[] = CALL_USED_REGISTERS; -+ bool is_64bit_ms_abi = (TARGET_64BIT -+ && ix86_function_abi (fndecl) == MS_ABI); -+ char c_mask = CALL_USED_REGISTERS_MASK (is_64bit_ms_abi); -+ -+ /* If there are no caller-saved registers, add all registers -+ that are clobbered by the call which returns. */ -+ for (int i = 0; i < FIRST_PSEUDO_REGISTER; i++) -+ if (!fixed_regs[i] -+ && (ix86_call_used_regs[i] == 1 -+ || (ix86_call_used_regs[i] & c_mask)) -+ && !STACK_REGNO_P (i) -+ && !MMX_REGNO_P (i)) -+ clobber_reg (&use, -+ gen_rtx_REG (GET_MODE (regno_reg_rtx[i]), i)); -+ } -+ else if (TARGET_64BIT_MS_ABI -+ && (!callarg2 || INTVAL (callarg2) != -2)) -+ { -+ unsigned i; -+ -+ for (i = 0; i < NUM_X86_64_MS_CLOBBERED_REGS; i++) -+ { -+ int regno = x86_64_ms_sysv_extra_clobbered_registers[i]; -+ machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode; -+ -+ clobber_reg (&use, gen_rtx_REG (mode, regno)); -+ } -+ -+ /* Set here, but it may get cleared later. */ -+ if (TARGET_CALL_MS2SYSV_XLOGUES) -+ { -+ if (!TARGET_SSE) -+ ; -+ -+ /* Don't break hot-patched functions. */ -+ else if (ix86_function_ms_hook_prologue (current_function_decl)) -+ ; -+ -+ /* TODO: Cases not yet examined. */ -+ else if (flag_split_stack) -+ warn_once_call_ms2sysv_xlogues ("-fsplit-stack"); -+ -+ else -+ { -+ gcc_assert (!reload_completed); -+ cfun->machine->call_ms2sysv = true; -+ } -+ } -+ } -+ -+ if (vec_len > 1) -+ call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec)); -+ rtx_insn *call_insn = emit_call_insn (call); -+ if (use) -+ CALL_INSN_FUNCTION_USAGE (call_insn) = use; -+ -+ return call_insn; -+} -+ -+/* Split simple return with popping POPC bytes from stack to indirect -+ branch with stack adjustment . */ -+ -+void -+ix86_split_simple_return_pop_internal (rtx popc) -+{ -+ struct machine_function *m = cfun->machine; -+ rtx ecx = gen_rtx_REG (SImode, CX_REG); -+ rtx_insn *insn; -+ -+ /* There is no "pascal" calling convention in any 64bit ABI. */ -+ gcc_assert (!TARGET_64BIT); -+ -+ insn = emit_insn (gen_pop (ecx)); -+ m->fs.cfa_offset -= UNITS_PER_WORD; -+ m->fs.sp_offset -= UNITS_PER_WORD; -+ -+ rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); -+ x = gen_rtx_SET (stack_pointer_rtx, x); -+ add_reg_note (insn, REG_CFA_ADJUST_CFA, x); -+ add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx)); -+ RTX_FRAME_RELATED_P (insn) = 1; -+ -+ x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, popc); -+ x = gen_rtx_SET (stack_pointer_rtx, x); -+ insn = emit_insn (x); -+ add_reg_note (insn, REG_CFA_ADJUST_CFA, x); -+ RTX_FRAME_RELATED_P (insn) = 1; -+ -+ /* Now return address is in ECX. */ -+ emit_jump_insn (gen_simple_return_indirect_internal (ecx)); -+} -+ -+/* Errors in the source file can cause expand_expr to return const0_rtx -+ where we expect a vector. To avoid crashing, use one of the vector -+ clear instructions. */ -+ -+static rtx -+safe_vector_operand (rtx x, machine_mode mode) -+{ -+ if (x == const0_rtx) -+ x = CONST0_RTX (mode); -+ return x; -+} -+ -+/* Subroutine of ix86_expand_builtin to take care of binop insns. */ -+ -+static rtx -+ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target) -+{ -+ rtx pat; -+ tree arg0 = CALL_EXPR_ARG (exp, 0); -+ tree arg1 = CALL_EXPR_ARG (exp, 1); -+ rtx op0 = expand_normal (arg0); -+ rtx op1 = expand_normal (arg1); -+ machine_mode tmode = insn_data[icode].operand[0].mode; -+ machine_mode mode0 = insn_data[icode].operand[1].mode; -+ machine_mode mode1 = insn_data[icode].operand[2].mode; -+ -+ if (VECTOR_MODE_P (mode0)) -+ op0 = safe_vector_operand (op0, mode0); -+ if (VECTOR_MODE_P (mode1)) -+ op1 = safe_vector_operand (op1, mode1); -+ -+ if (optimize || !target -+ || GET_MODE (target) != tmode -+ || !insn_data[icode].operand[0].predicate (target, tmode)) -+ target = gen_reg_rtx (tmode); -+ -+ if (GET_MODE (op1) == SImode && mode1 == TImode) -+ { -+ rtx x = gen_reg_rtx (V4SImode); -+ emit_insn (gen_sse2_loadd (x, op1)); -+ op1 = gen_lowpart (TImode, x); -+ } -+ -+ if (!insn_data[icode].operand[1].predicate (op0, mode0)) -+ op0 = copy_to_mode_reg (mode0, op0); -+ if (!insn_data[icode].operand[2].predicate (op1, mode1)) -+ op1 = copy_to_mode_reg (mode1, op1); -+ -+ pat = GEN_FCN (icode) (target, op0, op1); -+ if (! pat) -+ return 0; -+ -+ emit_insn (pat); -+ -+ return target; -+} -+ -+/* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */ -+ -+static rtx -+ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target, -+ enum ix86_builtin_func_type m_type, -+ enum rtx_code sub_code) -+{ -+ rtx pat; -+ int i; -+ int nargs; -+ bool comparison_p = false; -+ bool tf_p = false; -+ bool last_arg_constant = false; -+ int num_memory = 0; -+ struct { -+ rtx op; -+ machine_mode mode; -+ } args[4]; -+ -+ machine_mode tmode = insn_data[icode].operand[0].mode; -+ -+ switch (m_type) -+ { -+ case MULTI_ARG_4_DF2_DI_I: -+ case MULTI_ARG_4_DF2_DI_I1: -+ case MULTI_ARG_4_SF2_SI_I: -+ case MULTI_ARG_4_SF2_SI_I1: -+ nargs = 4; -+ last_arg_constant = true; -+ break; -+ -+ case MULTI_ARG_3_SF: -+ case MULTI_ARG_3_DF: -+ case MULTI_ARG_3_SF2: -+ case MULTI_ARG_3_DF2: -+ case MULTI_ARG_3_DI: -+ case MULTI_ARG_3_SI: -+ case MULTI_ARG_3_SI_DI: -+ case MULTI_ARG_3_HI: -+ case MULTI_ARG_3_HI_SI: -+ case MULTI_ARG_3_QI: -+ case MULTI_ARG_3_DI2: -+ case MULTI_ARG_3_SI2: -+ case MULTI_ARG_3_HI2: -+ case MULTI_ARG_3_QI2: -+ nargs = 3; -+ break; -+ -+ case MULTI_ARG_2_SF: -+ case MULTI_ARG_2_DF: -+ case MULTI_ARG_2_DI: -+ case MULTI_ARG_2_SI: -+ case MULTI_ARG_2_HI: -+ case MULTI_ARG_2_QI: -+ nargs = 2; -+ break; -+ -+ case MULTI_ARG_2_DI_IMM: -+ case MULTI_ARG_2_SI_IMM: -+ case MULTI_ARG_2_HI_IMM: -+ case MULTI_ARG_2_QI_IMM: -+ nargs = 2; -+ last_arg_constant = true; -+ break; -+ -+ case MULTI_ARG_1_SF: -+ case MULTI_ARG_1_DF: -+ case MULTI_ARG_1_SF2: -+ case MULTI_ARG_1_DF2: -+ case MULTI_ARG_1_DI: -+ case MULTI_ARG_1_SI: -+ case MULTI_ARG_1_HI: -+ case MULTI_ARG_1_QI: -+ case MULTI_ARG_1_SI_DI: -+ case MULTI_ARG_1_HI_DI: -+ case MULTI_ARG_1_HI_SI: -+ case MULTI_ARG_1_QI_DI: -+ case MULTI_ARG_1_QI_SI: -+ case MULTI_ARG_1_QI_HI: -+ nargs = 1; -+ break; -+ -+ case MULTI_ARG_2_DI_CMP: -+ case MULTI_ARG_2_SI_CMP: -+ case MULTI_ARG_2_HI_CMP: -+ case MULTI_ARG_2_QI_CMP: -+ nargs = 2; -+ comparison_p = true; -+ break; -+ -+ case MULTI_ARG_2_SF_TF: -+ case MULTI_ARG_2_DF_TF: -+ case MULTI_ARG_2_DI_TF: -+ case MULTI_ARG_2_SI_TF: -+ case MULTI_ARG_2_HI_TF: -+ case MULTI_ARG_2_QI_TF: -+ nargs = 2; -+ tf_p = true; -+ break; -+ -+ default: -+ gcc_unreachable (); -+ } -+ -+ if (optimize || !target -+ || GET_MODE (target) != tmode -+ || !insn_data[icode].operand[0].predicate (target, tmode)) -+ target = gen_reg_rtx (tmode); -+ else if (memory_operand (target, tmode)) -+ num_memory++; -+ -+ gcc_assert (nargs <= 4); -+ -+ for (i = 0; i < nargs; i++) -+ { -+ tree arg = CALL_EXPR_ARG (exp, i); -+ rtx op = expand_normal (arg); -+ int adjust = (comparison_p) ? 1 : 0; -+ machine_mode mode = insn_data[icode].operand[i+adjust+1].mode; -+ -+ if (last_arg_constant && i == nargs - 1) -+ { -+ if (!insn_data[icode].operand[i + 1].predicate (op, mode)) -+ { -+ enum insn_code new_icode = icode; -+ switch (icode) -+ { -+ case CODE_FOR_xop_vpermil2v2df3: -+ case CODE_FOR_xop_vpermil2v4sf3: -+ case CODE_FOR_xop_vpermil2v4df3: -+ case CODE_FOR_xop_vpermil2v8sf3: -+ error ("the last argument must be a 2-bit immediate"); -+ return gen_reg_rtx (tmode); -+ case CODE_FOR_xop_rotlv2di3: -+ new_icode = CODE_FOR_rotlv2di3; -+ goto xop_rotl; -+ case CODE_FOR_xop_rotlv4si3: -+ new_icode = CODE_FOR_rotlv4si3; -+ goto xop_rotl; -+ case CODE_FOR_xop_rotlv8hi3: -+ new_icode = CODE_FOR_rotlv8hi3; -+ goto xop_rotl; -+ case CODE_FOR_xop_rotlv16qi3: -+ new_icode = CODE_FOR_rotlv16qi3; -+ xop_rotl: -+ if (CONST_INT_P (op)) -+ { -+ int mask = GET_MODE_UNIT_BITSIZE (tmode) - 1; -+ op = GEN_INT (INTVAL (op) & mask); -+ gcc_checking_assert -+ (insn_data[icode].operand[i + 1].predicate (op, mode)); -+ } -+ else -+ { -+ gcc_checking_assert -+ (nargs == 2 -+ && insn_data[new_icode].operand[0].mode == tmode -+ && insn_data[new_icode].operand[1].mode == tmode -+ && insn_data[new_icode].operand[2].mode == mode -+ && insn_data[new_icode].operand[0].predicate -+ == insn_data[icode].operand[0].predicate -+ && insn_data[new_icode].operand[1].predicate -+ == insn_data[icode].operand[1].predicate); -+ icode = new_icode; -+ goto non_constant; -+ } -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ } -+ } -+ else -+ { -+ non_constant: -+ if (VECTOR_MODE_P (mode)) -+ op = safe_vector_operand (op, mode); -+ -+ /* If we aren't optimizing, only allow one memory operand to be -+ generated. */ -+ if (memory_operand (op, mode)) -+ num_memory++; -+ -+ gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode); -+ -+ if (optimize -+ || !insn_data[icode].operand[i+adjust+1].predicate (op, mode) -+ || num_memory > 1) -+ op = force_reg (mode, op); -+ } -+ -+ args[i].op = op; -+ args[i].mode = mode; -+ } -+ -+ switch (nargs) -+ { -+ case 1: -+ pat = GEN_FCN (icode) (target, args[0].op); -+ break; -+ -+ case 2: -+ if (tf_p) -+ pat = GEN_FCN (icode) (target, args[0].op, args[1].op, -+ GEN_INT ((int)sub_code)); -+ else if (! comparison_p) -+ pat = GEN_FCN (icode) (target, args[0].op, args[1].op); -+ else -+ { -+ rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target), -+ args[0].op, -+ args[1].op); -+ -+ pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op); -+ } -+ break; -+ -+ case 3: -+ pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op); -+ break; -+ -+ case 4: -+ pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op); -+ break; -+ -+ default: -+ gcc_unreachable (); -+ } -+ -+ if (! pat) -+ return 0; -+ -+ emit_insn (pat); -+ return target; -+} -+ -+/* Subroutine of ix86_expand_args_builtin to take care of scalar unop -+ insns with vec_merge. */ -+ -+static rtx -+ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp, -+ rtx target) -+{ -+ rtx pat; -+ tree arg0 = CALL_EXPR_ARG (exp, 0); -+ rtx op1, op0 = expand_normal (arg0); -+ machine_mode tmode = insn_data[icode].operand[0].mode; -+ machine_mode mode0 = insn_data[icode].operand[1].mode; -+ -+ if (optimize || !target -+ || GET_MODE (target) != tmode -+ || !insn_data[icode].operand[0].predicate (target, tmode)) -+ target = gen_reg_rtx (tmode); -+ -+ if (VECTOR_MODE_P (mode0)) -+ op0 = safe_vector_operand (op0, mode0); -+ -+ if ((optimize && !register_operand (op0, mode0)) -+ || !insn_data[icode].operand[1].predicate (op0, mode0)) -+ op0 = copy_to_mode_reg (mode0, op0); -+ -+ op1 = op0; -+ if (!insn_data[icode].operand[2].predicate (op1, mode0)) -+ op1 = copy_to_mode_reg (mode0, op1); -+ -+ pat = GEN_FCN (icode) (target, op0, op1); -+ if (! pat) -+ return 0; -+ emit_insn (pat); -+ return target; -+} -+ -+/* Subroutine of ix86_expand_builtin to take care of comparison insns. */ -+ -+static rtx -+ix86_expand_sse_compare (const struct builtin_description *d, -+ tree exp, rtx target, bool swap) -+{ -+ rtx pat; -+ tree arg0 = CALL_EXPR_ARG (exp, 0); -+ tree arg1 = CALL_EXPR_ARG (exp, 1); -+ rtx op0 = expand_normal (arg0); -+ rtx op1 = expand_normal (arg1); -+ rtx op2; -+ machine_mode tmode = insn_data[d->icode].operand[0].mode; -+ machine_mode mode0 = insn_data[d->icode].operand[1].mode; -+ machine_mode mode1 = insn_data[d->icode].operand[2].mode; -+ enum rtx_code comparison = d->comparison; -+ -+ if (VECTOR_MODE_P (mode0)) -+ op0 = safe_vector_operand (op0, mode0); -+ if (VECTOR_MODE_P (mode1)) -+ op1 = safe_vector_operand (op1, mode1); -+ -+ /* Swap operands if we have a comparison that isn't available in -+ hardware. */ -+ if (swap) -+ std::swap (op0, op1); -+ -+ if (optimize || !target -+ || GET_MODE (target) != tmode -+ || !insn_data[d->icode].operand[0].predicate (target, tmode)) -+ target = gen_reg_rtx (tmode); -+ -+ if ((optimize && !register_operand (op0, mode0)) -+ || !insn_data[d->icode].operand[1].predicate (op0, mode0)) -+ op0 = copy_to_mode_reg (mode0, op0); -+ if ((optimize && !register_operand (op1, mode1)) -+ || !insn_data[d->icode].operand[2].predicate (op1, mode1)) -+ op1 = copy_to_mode_reg (mode1, op1); -+ -+ op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1); -+ pat = GEN_FCN (d->icode) (target, op0, op1, op2); -+ if (! pat) -+ return 0; -+ emit_insn (pat); -+ return target; -+} -+ -+/* Subroutine of ix86_expand_builtin to take care of comi insns. */ -+ -+static rtx -+ix86_expand_sse_comi (const struct builtin_description *d, tree exp, -+ rtx target) -+{ -+ rtx pat; -+ tree arg0 = CALL_EXPR_ARG (exp, 0); -+ tree arg1 = CALL_EXPR_ARG (exp, 1); -+ rtx op0 = expand_normal (arg0); -+ rtx op1 = expand_normal (arg1); -+ machine_mode mode0 = insn_data[d->icode].operand[0].mode; -+ machine_mode mode1 = insn_data[d->icode].operand[1].mode; -+ enum rtx_code comparison = d->comparison; -+ -+ if (VECTOR_MODE_P (mode0)) -+ op0 = safe_vector_operand (op0, mode0); -+ if (VECTOR_MODE_P (mode1)) -+ op1 = safe_vector_operand (op1, mode1); -+ -+ /* Swap operands if we have a comparison that isn't available in -+ hardware. */ -+ if (d->flag & BUILTIN_DESC_SWAP_OPERANDS) -+ std::swap (op0, op1); -+ -+ target = gen_reg_rtx (SImode); -+ emit_move_insn (target, const0_rtx); -+ target = gen_rtx_SUBREG (QImode, target, 0); -+ -+ if ((optimize && !register_operand (op0, mode0)) -+ || !insn_data[d->icode].operand[0].predicate (op0, mode0)) -+ op0 = copy_to_mode_reg (mode0, op0); -+ if ((optimize && !register_operand (op1, mode1)) -+ || !insn_data[d->icode].operand[1].predicate (op1, mode1)) -+ op1 = copy_to_mode_reg (mode1, op1); -+ -+ pat = GEN_FCN (d->icode) (op0, op1); -+ if (! pat) -+ return 0; -+ emit_insn (pat); -+ emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target), -+ gen_rtx_fmt_ee (comparison, QImode, -+ SET_DEST (pat), -+ const0_rtx))); -+ -+ return SUBREG_REG (target); -+} -+ -+/* Subroutines of ix86_expand_args_builtin to take care of round insns. */ -+ -+static rtx -+ix86_expand_sse_round (const struct builtin_description *d, tree exp, -+ rtx target) -+{ -+ rtx pat; -+ tree arg0 = CALL_EXPR_ARG (exp, 0); -+ rtx op1, op0 = expand_normal (arg0); -+ machine_mode tmode = insn_data[d->icode].operand[0].mode; -+ machine_mode mode0 = insn_data[d->icode].operand[1].mode; -+ -+ if (optimize || target == 0 -+ || GET_MODE (target) != tmode -+ || !insn_data[d->icode].operand[0].predicate (target, tmode)) -+ target = gen_reg_rtx (tmode); -+ -+ if (VECTOR_MODE_P (mode0)) -+ op0 = safe_vector_operand (op0, mode0); -+ -+ if ((optimize && !register_operand (op0, mode0)) -+ || !insn_data[d->icode].operand[0].predicate (op0, mode0)) -+ op0 = copy_to_mode_reg (mode0, op0); -+ -+ op1 = GEN_INT (d->comparison); -+ -+ pat = GEN_FCN (d->icode) (target, op0, op1); -+ if (! pat) -+ return 0; -+ emit_insn (pat); -+ return target; -+} -+ -+static rtx -+ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d, -+ tree exp, rtx target) -+{ -+ rtx pat; -+ tree arg0 = CALL_EXPR_ARG (exp, 0); -+ tree arg1 = CALL_EXPR_ARG (exp, 1); -+ rtx op0 = expand_normal (arg0); -+ rtx op1 = expand_normal (arg1); -+ rtx op2; -+ machine_mode tmode = insn_data[d->icode].operand[0].mode; -+ machine_mode mode0 = insn_data[d->icode].operand[1].mode; -+ machine_mode mode1 = insn_data[d->icode].operand[2].mode; -+ -+ if (optimize || target == 0 -+ || GET_MODE (target) != tmode -+ || !insn_data[d->icode].operand[0].predicate (target, tmode)) -+ target = gen_reg_rtx (tmode); -+ -+ op0 = safe_vector_operand (op0, mode0); -+ op1 = safe_vector_operand (op1, mode1); -+ -+ if ((optimize && !register_operand (op0, mode0)) -+ || !insn_data[d->icode].operand[0].predicate (op0, mode0)) -+ op0 = copy_to_mode_reg (mode0, op0); -+ if ((optimize && !register_operand (op1, mode1)) -+ || !insn_data[d->icode].operand[1].predicate (op1, mode1)) -+ op1 = copy_to_mode_reg (mode1, op1); -+ -+ op2 = GEN_INT (d->comparison); -+ -+ pat = GEN_FCN (d->icode) (target, op0, op1, op2); -+ if (! pat) -+ return 0; -+ emit_insn (pat); -+ return target; -+} -+ -+/* Subroutine of ix86_expand_builtin to take care of ptest insns. */ -+ -+static rtx -+ix86_expand_sse_ptest (const struct builtin_description *d, tree exp, -+ rtx target) -+{ -+ rtx pat; -+ tree arg0 = CALL_EXPR_ARG (exp, 0); -+ tree arg1 = CALL_EXPR_ARG (exp, 1); -+ rtx op0 = expand_normal (arg0); -+ rtx op1 = expand_normal (arg1); -+ machine_mode mode0 = insn_data[d->icode].operand[0].mode; -+ machine_mode mode1 = insn_data[d->icode].operand[1].mode; -+ enum rtx_code comparison = d->comparison; -+ -+ if (VECTOR_MODE_P (mode0)) -+ op0 = safe_vector_operand (op0, mode0); -+ if (VECTOR_MODE_P (mode1)) -+ op1 = safe_vector_operand (op1, mode1); -+ -+ target = gen_reg_rtx (SImode); -+ emit_move_insn (target, const0_rtx); -+ target = gen_rtx_SUBREG (QImode, target, 0); -+ -+ if ((optimize && !register_operand (op0, mode0)) -+ || !insn_data[d->icode].operand[0].predicate (op0, mode0)) -+ op0 = copy_to_mode_reg (mode0, op0); -+ if ((optimize && !register_operand (op1, mode1)) -+ || !insn_data[d->icode].operand[1].predicate (op1, mode1)) -+ op1 = copy_to_mode_reg (mode1, op1); -+ -+ pat = GEN_FCN (d->icode) (op0, op1); -+ if (! pat) -+ return 0; -+ emit_insn (pat); -+ emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target), -+ gen_rtx_fmt_ee (comparison, QImode, -+ SET_DEST (pat), -+ const0_rtx))); -+ -+ return SUBREG_REG (target); -+} -+ -+/* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */ -+ -+static rtx -+ix86_expand_sse_pcmpestr (const struct builtin_description *d, -+ tree exp, rtx target) -+{ -+ rtx pat; -+ tree arg0 = CALL_EXPR_ARG (exp, 0); -+ tree arg1 = CALL_EXPR_ARG (exp, 1); -+ tree arg2 = CALL_EXPR_ARG (exp, 2); -+ tree arg3 = CALL_EXPR_ARG (exp, 3); -+ tree arg4 = CALL_EXPR_ARG (exp, 4); -+ rtx scratch0, scratch1; -+ rtx op0 = expand_normal (arg0); -+ rtx op1 = expand_normal (arg1); -+ rtx op2 = expand_normal (arg2); -+ rtx op3 = expand_normal (arg3); -+ rtx op4 = expand_normal (arg4); -+ machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm; -+ -+ tmode0 = insn_data[d->icode].operand[0].mode; -+ tmode1 = insn_data[d->icode].operand[1].mode; -+ modev2 = insn_data[d->icode].operand[2].mode; -+ modei3 = insn_data[d->icode].operand[3].mode; -+ modev4 = insn_data[d->icode].operand[4].mode; -+ modei5 = insn_data[d->icode].operand[5].mode; -+ modeimm = insn_data[d->icode].operand[6].mode; -+ -+ if (VECTOR_MODE_P (modev2)) -+ op0 = safe_vector_operand (op0, modev2); -+ if (VECTOR_MODE_P (modev4)) -+ op2 = safe_vector_operand (op2, modev4); -+ -+ if (!insn_data[d->icode].operand[2].predicate (op0, modev2)) -+ op0 = copy_to_mode_reg (modev2, op0); -+ if (!insn_data[d->icode].operand[3].predicate (op1, modei3)) -+ op1 = copy_to_mode_reg (modei3, op1); -+ if ((optimize && !register_operand (op2, modev4)) -+ || !insn_data[d->icode].operand[4].predicate (op2, modev4)) -+ op2 = copy_to_mode_reg (modev4, op2); -+ if (!insn_data[d->icode].operand[5].predicate (op3, modei5)) -+ op3 = copy_to_mode_reg (modei5, op3); -+ -+ if (!insn_data[d->icode].operand[6].predicate (op4, modeimm)) -+ { -+ error ("the fifth argument must be an 8-bit immediate"); -+ return const0_rtx; -+ } -+ -+ if (d->code == IX86_BUILTIN_PCMPESTRI128) -+ { -+ if (optimize || !target -+ || GET_MODE (target) != tmode0 -+ || !insn_data[d->icode].operand[0].predicate (target, tmode0)) -+ target = gen_reg_rtx (tmode0); -+ -+ scratch1 = gen_reg_rtx (tmode1); -+ -+ pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4); -+ } -+ else if (d->code == IX86_BUILTIN_PCMPESTRM128) -+ { -+ if (optimize || !target -+ || GET_MODE (target) != tmode1 -+ || !insn_data[d->icode].operand[1].predicate (target, tmode1)) -+ target = gen_reg_rtx (tmode1); -+ -+ scratch0 = gen_reg_rtx (tmode0); -+ -+ pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4); -+ } -+ else -+ { -+ gcc_assert (d->flag); -+ -+ scratch0 = gen_reg_rtx (tmode0); -+ scratch1 = gen_reg_rtx (tmode1); -+ -+ pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4); -+ } -+ -+ if (! pat) -+ return 0; -+ -+ emit_insn (pat); -+ -+ if (d->flag) -+ { -+ target = gen_reg_rtx (SImode); -+ emit_move_insn (target, const0_rtx); -+ target = gen_rtx_SUBREG (QImode, target, 0); -+ -+ emit_insn -+ (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target), -+ gen_rtx_fmt_ee (EQ, QImode, -+ gen_rtx_REG ((machine_mode) d->flag, -+ FLAGS_REG), -+ const0_rtx))); -+ return SUBREG_REG (target); -+ } -+ else -+ return target; -+} -+ -+ -+/* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */ -+ -+static rtx -+ix86_expand_sse_pcmpistr (const struct builtin_description *d, -+ tree exp, rtx target) -+{ -+ rtx pat; -+ tree arg0 = CALL_EXPR_ARG (exp, 0); -+ tree arg1 = CALL_EXPR_ARG (exp, 1); -+ tree arg2 = CALL_EXPR_ARG (exp, 2); -+ rtx scratch0, scratch1; -+ rtx op0 = expand_normal (arg0); -+ rtx op1 = expand_normal (arg1); -+ rtx op2 = expand_normal (arg2); -+ machine_mode tmode0, tmode1, modev2, modev3, modeimm; -+ -+ tmode0 = insn_data[d->icode].operand[0].mode; -+ tmode1 = insn_data[d->icode].operand[1].mode; -+ modev2 = insn_data[d->icode].operand[2].mode; -+ modev3 = insn_data[d->icode].operand[3].mode; -+ modeimm = insn_data[d->icode].operand[4].mode; -+ -+ if (VECTOR_MODE_P (modev2)) -+ op0 = safe_vector_operand (op0, modev2); -+ if (VECTOR_MODE_P (modev3)) -+ op1 = safe_vector_operand (op1, modev3); -+ -+ if (!insn_data[d->icode].operand[2].predicate (op0, modev2)) -+ op0 = copy_to_mode_reg (modev2, op0); -+ if ((optimize && !register_operand (op1, modev3)) -+ || !insn_data[d->icode].operand[3].predicate (op1, modev3)) -+ op1 = copy_to_mode_reg (modev3, op1); -+ -+ if (!insn_data[d->icode].operand[4].predicate (op2, modeimm)) -+ { -+ error ("the third argument must be an 8-bit immediate"); -+ return const0_rtx; -+ } -+ -+ if (d->code == IX86_BUILTIN_PCMPISTRI128) -+ { -+ if (optimize || !target -+ || GET_MODE (target) != tmode0 -+ || !insn_data[d->icode].operand[0].predicate (target, tmode0)) -+ target = gen_reg_rtx (tmode0); -+ -+ scratch1 = gen_reg_rtx (tmode1); -+ -+ pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2); -+ } -+ else if (d->code == IX86_BUILTIN_PCMPISTRM128) -+ { -+ if (optimize || !target -+ || GET_MODE (target) != tmode1 -+ || !insn_data[d->icode].operand[1].predicate (target, tmode1)) -+ target = gen_reg_rtx (tmode1); -+ -+ scratch0 = gen_reg_rtx (tmode0); -+ -+ pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2); -+ } -+ else -+ { -+ gcc_assert (d->flag); -+ -+ scratch0 = gen_reg_rtx (tmode0); -+ scratch1 = gen_reg_rtx (tmode1); -+ -+ pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2); -+ } -+ -+ if (! pat) -+ return 0; -+ -+ emit_insn (pat); -+ -+ if (d->flag) -+ { -+ target = gen_reg_rtx (SImode); -+ emit_move_insn (target, const0_rtx); -+ target = gen_rtx_SUBREG (QImode, target, 0); -+ -+ emit_insn -+ (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target), -+ gen_rtx_fmt_ee (EQ, QImode, -+ gen_rtx_REG ((machine_mode) d->flag, -+ FLAGS_REG), -+ const0_rtx))); -+ return SUBREG_REG (target); -+ } -+ else -+ return target; -+} -+ -+/* Fixup modeless constants to fit required mode. */ -+ -+static rtx -+fixup_modeless_constant (rtx x, machine_mode mode) -+{ -+ if (GET_MODE (x) == VOIDmode) -+ x = convert_to_mode (mode, x, 1); -+ return x; -+} -+ -+/* Subroutine of ix86_expand_builtin to take care of insns with -+ variable number of operands. */ -+ -+static rtx -+ix86_expand_args_builtin (const struct builtin_description *d, -+ tree exp, rtx target) -+{ -+ rtx pat, real_target; -+ unsigned int i, nargs; -+ unsigned int nargs_constant = 0; -+ unsigned int mask_pos = 0; -+ int num_memory = 0; -+ struct -+ { -+ rtx op; -+ machine_mode mode; -+ } args[6]; -+ bool second_arg_count = false; -+ enum insn_code icode = d->icode; -+ const struct insn_data_d *insn_p = &insn_data[icode]; -+ machine_mode tmode = insn_p->operand[0].mode; -+ machine_mode rmode = VOIDmode; -+ bool swap = false; -+ enum rtx_code comparison = d->comparison; -+ -+ switch ((enum ix86_builtin_func_type) d->flag) -+ { -+ case V2DF_FTYPE_V2DF_ROUND: -+ case V4DF_FTYPE_V4DF_ROUND: -+ case V8DF_FTYPE_V8DF_ROUND: -+ case V4SF_FTYPE_V4SF_ROUND: -+ case V8SF_FTYPE_V8SF_ROUND: -+ case V16SF_FTYPE_V16SF_ROUND: -+ case V4SI_FTYPE_V4SF_ROUND: -+ case V8SI_FTYPE_V8SF_ROUND: -+ case V16SI_FTYPE_V16SF_ROUND: -+ return ix86_expand_sse_round (d, exp, target); -+ case V4SI_FTYPE_V2DF_V2DF_ROUND: -+ case V8SI_FTYPE_V4DF_V4DF_ROUND: -+ case V16SI_FTYPE_V8DF_V8DF_ROUND: -+ return ix86_expand_sse_round_vec_pack_sfix (d, exp, target); -+ case INT_FTYPE_V8SF_V8SF_PTEST: -+ case INT_FTYPE_V4DI_V4DI_PTEST: -+ case INT_FTYPE_V4DF_V4DF_PTEST: -+ case INT_FTYPE_V4SF_V4SF_PTEST: -+ case INT_FTYPE_V2DI_V2DI_PTEST: -+ case INT_FTYPE_V2DF_V2DF_PTEST: -+ return ix86_expand_sse_ptest (d, exp, target); -+ case FLOAT128_FTYPE_FLOAT128: -+ case FLOAT_FTYPE_FLOAT: -+ case INT_FTYPE_INT: -+ case UINT_FTYPE_UINT: -+ case UINT16_FTYPE_UINT16: -+ case UINT64_FTYPE_INT: -+ case UINT64_FTYPE_UINT64: -+ case INT64_FTYPE_INT64: -+ case INT64_FTYPE_V4SF: -+ case INT64_FTYPE_V2DF: -+ case INT_FTYPE_V16QI: -+ case INT_FTYPE_V8QI: -+ case INT_FTYPE_V8SF: -+ case INT_FTYPE_V4DF: -+ case INT_FTYPE_V4SF: -+ case INT_FTYPE_V2DF: -+ case INT_FTYPE_V32QI: -+ case V16QI_FTYPE_V16QI: -+ case V8SI_FTYPE_V8SF: -+ case V8SI_FTYPE_V4SI: -+ case V8HI_FTYPE_V8HI: -+ case V8HI_FTYPE_V16QI: -+ case V8QI_FTYPE_V8QI: -+ case V8SF_FTYPE_V8SF: -+ case V8SF_FTYPE_V8SI: -+ case V8SF_FTYPE_V4SF: -+ case V8SF_FTYPE_V8HI: -+ case V4SI_FTYPE_V4SI: -+ case V4SI_FTYPE_V16QI: -+ case V4SI_FTYPE_V4SF: -+ case V4SI_FTYPE_V8SI: -+ case V4SI_FTYPE_V8HI: -+ case V4SI_FTYPE_V4DF: -+ case V4SI_FTYPE_V2DF: -+ case V4HI_FTYPE_V4HI: -+ case V4DF_FTYPE_V4DF: -+ case V4DF_FTYPE_V4SI: -+ case V4DF_FTYPE_V4SF: -+ case V4DF_FTYPE_V2DF: -+ case V4SF_FTYPE_V4SF: -+ case V4SF_FTYPE_V4SI: -+ case V4SF_FTYPE_V8SF: -+ case V4SF_FTYPE_V4DF: -+ case V4SF_FTYPE_V8HI: -+ case V4SF_FTYPE_V2DF: -+ case V2DI_FTYPE_V2DI: -+ case V2DI_FTYPE_V16QI: -+ case V2DI_FTYPE_V8HI: -+ case V2DI_FTYPE_V4SI: -+ case V2DF_FTYPE_V2DF: -+ case V2DF_FTYPE_V4SI: -+ case V2DF_FTYPE_V4DF: -+ case V2DF_FTYPE_V4SF: -+ case V2DF_FTYPE_V2SI: -+ case V2SI_FTYPE_V2SI: -+ case V2SI_FTYPE_V4SF: -+ case V2SI_FTYPE_V2SF: -+ case V2SI_FTYPE_V2DF: -+ case V2SF_FTYPE_V2SF: -+ case V2SF_FTYPE_V2SI: -+ case V32QI_FTYPE_V32QI: -+ case V32QI_FTYPE_V16QI: -+ case V16HI_FTYPE_V16HI: -+ case V16HI_FTYPE_V8HI: -+ case V8SI_FTYPE_V8SI: -+ case V16HI_FTYPE_V16QI: -+ case V8SI_FTYPE_V16QI: -+ case V4DI_FTYPE_V16QI: -+ case V8SI_FTYPE_V8HI: -+ case V4DI_FTYPE_V8HI: -+ case V4DI_FTYPE_V4SI: -+ case V4DI_FTYPE_V2DI: -+ case UQI_FTYPE_UQI: -+ case UHI_FTYPE_UHI: -+ case USI_FTYPE_USI: -+ case USI_FTYPE_UQI: -+ case USI_FTYPE_UHI: -+ case UDI_FTYPE_UDI: -+ case UHI_FTYPE_V16QI: -+ case USI_FTYPE_V32QI: -+ case UDI_FTYPE_V64QI: -+ case V16QI_FTYPE_UHI: -+ case V32QI_FTYPE_USI: -+ case V64QI_FTYPE_UDI: -+ case V8HI_FTYPE_UQI: -+ case V16HI_FTYPE_UHI: -+ case V32HI_FTYPE_USI: -+ case V4SI_FTYPE_UQI: -+ case V8SI_FTYPE_UQI: -+ case V4SI_FTYPE_UHI: -+ case V8SI_FTYPE_UHI: -+ case UQI_FTYPE_V8HI: -+ case UHI_FTYPE_V16HI: -+ case USI_FTYPE_V32HI: -+ case UQI_FTYPE_V4SI: -+ case UQI_FTYPE_V8SI: -+ case UHI_FTYPE_V16SI: -+ case UQI_FTYPE_V2DI: -+ case UQI_FTYPE_V4DI: -+ case UQI_FTYPE_V8DI: -+ case V16SI_FTYPE_UHI: -+ case V2DI_FTYPE_UQI: -+ case V4DI_FTYPE_UQI: -+ case V16SI_FTYPE_INT: -+ case V16SF_FTYPE_V8SF: -+ case V16SI_FTYPE_V8SI: -+ case V16SF_FTYPE_V4SF: -+ case V16SI_FTYPE_V4SI: -+ case V16SI_FTYPE_V16SF: -+ case V16SI_FTYPE_V16SI: -+ case V64QI_FTYPE_V64QI: -+ case V32HI_FTYPE_V32HI: -+ case V16SF_FTYPE_V16SF: -+ case V8DI_FTYPE_UQI: -+ case V8DI_FTYPE_V8DI: -+ case V8DF_FTYPE_V4DF: -+ case V8DF_FTYPE_V2DF: -+ case V8DF_FTYPE_V8DF: -+ case V4DI_FTYPE_V4DI: -+ nargs = 1; -+ break; -+ case V4SF_FTYPE_V4SF_VEC_MERGE: -+ case V2DF_FTYPE_V2DF_VEC_MERGE: -+ return ix86_expand_unop_vec_merge_builtin (icode, exp, target); -+ case FLOAT128_FTYPE_FLOAT128_FLOAT128: -+ case V16QI_FTYPE_V16QI_V16QI: -+ case V16QI_FTYPE_V8HI_V8HI: -+ case V16SF_FTYPE_V16SF_V16SF: -+ case V8QI_FTYPE_V8QI_V8QI: -+ case V8QI_FTYPE_V4HI_V4HI: -+ case V8HI_FTYPE_V8HI_V8HI: -+ case V8HI_FTYPE_V16QI_V16QI: -+ case V8HI_FTYPE_V4SI_V4SI: -+ case V8SF_FTYPE_V8SF_V8SF: -+ case V8SF_FTYPE_V8SF_V8SI: -+ case V8DF_FTYPE_V8DF_V8DF: -+ case V4SI_FTYPE_V4SI_V4SI: -+ case V4SI_FTYPE_V8HI_V8HI: -+ case V4SI_FTYPE_V2DF_V2DF: -+ case V4HI_FTYPE_V4HI_V4HI: -+ case V4HI_FTYPE_V8QI_V8QI: -+ case V4HI_FTYPE_V2SI_V2SI: -+ case V4DF_FTYPE_V4DF_V4DF: -+ case V4DF_FTYPE_V4DF_V4DI: -+ case V4SF_FTYPE_V4SF_V4SF: -+ case V4SF_FTYPE_V4SF_V4SI: -+ case V4SF_FTYPE_V4SF_V2SI: -+ case V4SF_FTYPE_V4SF_V2DF: -+ case V4SF_FTYPE_V4SF_UINT: -+ case V4SF_FTYPE_V4SF_DI: -+ case V4SF_FTYPE_V4SF_SI: -+ case V2DI_FTYPE_V2DI_V2DI: -+ case V2DI_FTYPE_V16QI_V16QI: -+ case V2DI_FTYPE_V4SI_V4SI: -+ case V2DI_FTYPE_V2DI_V16QI: -+ case V2SI_FTYPE_V2SI_V2SI: -+ case V2SI_FTYPE_V4HI_V4HI: -+ case V2SI_FTYPE_V2SF_V2SF: -+ case V2DF_FTYPE_V2DF_V2DF: -+ case V2DF_FTYPE_V2DF_V4SF: -+ case V2DF_FTYPE_V2DF_V2DI: -+ case V2DF_FTYPE_V2DF_DI: -+ case V2DF_FTYPE_V2DF_SI: -+ case V2DF_FTYPE_V2DF_UINT: -+ case V2SF_FTYPE_V2SF_V2SF: -+ case V1DI_FTYPE_V1DI_V1DI: -+ case V1DI_FTYPE_V8QI_V8QI: -+ case V1DI_FTYPE_V2SI_V2SI: -+ case V32QI_FTYPE_V16HI_V16HI: -+ case V16HI_FTYPE_V8SI_V8SI: -+ case V64QI_FTYPE_V64QI_V64QI: -+ case V32QI_FTYPE_V32QI_V32QI: -+ case V16HI_FTYPE_V32QI_V32QI: -+ case V16HI_FTYPE_V16HI_V16HI: -+ case V8SI_FTYPE_V4DF_V4DF: -+ case V8SI_FTYPE_V8SI_V8SI: -+ case V8SI_FTYPE_V16HI_V16HI: -+ case V4DI_FTYPE_V4DI_V4DI: -+ case V4DI_FTYPE_V8SI_V8SI: -+ case V8DI_FTYPE_V64QI_V64QI: -+ if (comparison == UNKNOWN) -+ return ix86_expand_binop_builtin (icode, exp, target); -+ nargs = 2; -+ break; -+ case V4SF_FTYPE_V4SF_V4SF_SWAP: -+ case V2DF_FTYPE_V2DF_V2DF_SWAP: -+ gcc_assert (comparison != UNKNOWN); -+ nargs = 2; -+ swap = true; -+ break; -+ case V16HI_FTYPE_V16HI_V8HI_COUNT: -+ case V16HI_FTYPE_V16HI_SI_COUNT: -+ case V8SI_FTYPE_V8SI_V4SI_COUNT: -+ case V8SI_FTYPE_V8SI_SI_COUNT: -+ case V4DI_FTYPE_V4DI_V2DI_COUNT: -+ case V4DI_FTYPE_V4DI_INT_COUNT: -+ case V8HI_FTYPE_V8HI_V8HI_COUNT: -+ case V8HI_FTYPE_V8HI_SI_COUNT: -+ case V4SI_FTYPE_V4SI_V4SI_COUNT: -+ case V4SI_FTYPE_V4SI_SI_COUNT: -+ case V4HI_FTYPE_V4HI_V4HI_COUNT: -+ case V4HI_FTYPE_V4HI_SI_COUNT: -+ case V2DI_FTYPE_V2DI_V2DI_COUNT: -+ case V2DI_FTYPE_V2DI_SI_COUNT: -+ case V2SI_FTYPE_V2SI_V2SI_COUNT: -+ case V2SI_FTYPE_V2SI_SI_COUNT: -+ case V1DI_FTYPE_V1DI_V1DI_COUNT: -+ case V1DI_FTYPE_V1DI_SI_COUNT: -+ nargs = 2; -+ second_arg_count = true; -+ break; -+ case V16HI_FTYPE_V16HI_INT_V16HI_UHI_COUNT: -+ case V16HI_FTYPE_V16HI_V8HI_V16HI_UHI_COUNT: -+ case V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT: -+ case V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT: -+ case V2DI_FTYPE_V2DI_INT_V2DI_UQI_COUNT: -+ case V2DI_FTYPE_V2DI_V2DI_V2DI_UQI_COUNT: -+ case V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT: -+ case V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT: -+ case V4DI_FTYPE_V4DI_INT_V4DI_UQI_COUNT: -+ case V4DI_FTYPE_V4DI_V2DI_V4DI_UQI_COUNT: -+ case V4SI_FTYPE_V4SI_INT_V4SI_UQI_COUNT: -+ case V4SI_FTYPE_V4SI_V4SI_V4SI_UQI_COUNT: -+ case V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT: -+ case V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT: -+ case V8HI_FTYPE_V8HI_INT_V8HI_UQI_COUNT: -+ case V8HI_FTYPE_V8HI_V8HI_V8HI_UQI_COUNT: -+ case V8SI_FTYPE_V8SI_INT_V8SI_UQI_COUNT: -+ case V8SI_FTYPE_V8SI_V4SI_V8SI_UQI_COUNT: -+ nargs = 4; -+ second_arg_count = true; -+ break; -+ case UINT64_FTYPE_UINT64_UINT64: -+ case UINT_FTYPE_UINT_UINT: -+ case UINT_FTYPE_UINT_USHORT: -+ case UINT_FTYPE_UINT_UCHAR: -+ case UINT16_FTYPE_UINT16_INT: -+ case UINT8_FTYPE_UINT8_INT: -+ case UQI_FTYPE_UQI_UQI: -+ case UHI_FTYPE_UHI_UHI: -+ case USI_FTYPE_USI_USI: -+ case UDI_FTYPE_UDI_UDI: -+ case V16SI_FTYPE_V8DF_V8DF: -+ nargs = 2; -+ break; -+ case V2DI_FTYPE_V2DI_INT_CONVERT: -+ nargs = 2; -+ rmode = V1TImode; -+ nargs_constant = 1; -+ break; -+ case V4DI_FTYPE_V4DI_INT_CONVERT: -+ nargs = 2; -+ rmode = V2TImode; -+ nargs_constant = 1; -+ break; -+ case V8DI_FTYPE_V8DI_INT_CONVERT: -+ nargs = 2; -+ rmode = V4TImode; -+ nargs_constant = 1; -+ break; -+ case V8HI_FTYPE_V8HI_INT: -+ case V8HI_FTYPE_V8SF_INT: -+ case V16HI_FTYPE_V16SF_INT: -+ case V8HI_FTYPE_V4SF_INT: -+ case V8SF_FTYPE_V8SF_INT: -+ case V4SF_FTYPE_V16SF_INT: -+ case V16SF_FTYPE_V16SF_INT: -+ case V4SI_FTYPE_V4SI_INT: -+ case V4SI_FTYPE_V8SI_INT: -+ case V4HI_FTYPE_V4HI_INT: -+ case V4DF_FTYPE_V4DF_INT: -+ case V4DF_FTYPE_V8DF_INT: -+ case V4SF_FTYPE_V4SF_INT: -+ case V4SF_FTYPE_V8SF_INT: -+ case V2DI_FTYPE_V2DI_INT: -+ case V2DF_FTYPE_V2DF_INT: -+ case V2DF_FTYPE_V4DF_INT: -+ case V16HI_FTYPE_V16HI_INT: -+ case V8SI_FTYPE_V8SI_INT: -+ case V16SI_FTYPE_V16SI_INT: -+ case V4SI_FTYPE_V16SI_INT: -+ case V4DI_FTYPE_V4DI_INT: -+ case V2DI_FTYPE_V4DI_INT: -+ case V4DI_FTYPE_V8DI_INT: -+ case QI_FTYPE_V4SF_INT: -+ case QI_FTYPE_V2DF_INT: -+ case UQI_FTYPE_UQI_UQI_CONST: -+ case UHI_FTYPE_UHI_UQI: -+ case USI_FTYPE_USI_UQI: -+ case UDI_FTYPE_UDI_UQI: -+ nargs = 2; -+ nargs_constant = 1; -+ break; -+ case V16QI_FTYPE_V16QI_V16QI_V16QI: -+ case V8SF_FTYPE_V8SF_V8SF_V8SF: -+ case V4DF_FTYPE_V4DF_V4DF_V4DF: -+ case V4SF_FTYPE_V4SF_V4SF_V4SF: -+ case V2DF_FTYPE_V2DF_V2DF_V2DF: -+ case V32QI_FTYPE_V32QI_V32QI_V32QI: -+ case UHI_FTYPE_V16SI_V16SI_UHI: -+ case UQI_FTYPE_V8DI_V8DI_UQI: -+ case V16HI_FTYPE_V16SI_V16HI_UHI: -+ case V16QI_FTYPE_V16SI_V16QI_UHI: -+ case V16QI_FTYPE_V8DI_V16QI_UQI: -+ case V16SF_FTYPE_V16SF_V16SF_UHI: -+ case V16SF_FTYPE_V4SF_V16SF_UHI: -+ case V16SI_FTYPE_SI_V16SI_UHI: -+ case V16SI_FTYPE_V16HI_V16SI_UHI: -+ case V16SI_FTYPE_V16QI_V16SI_UHI: -+ case V8SF_FTYPE_V4SF_V8SF_UQI: -+ case V4DF_FTYPE_V2DF_V4DF_UQI: -+ case V8SI_FTYPE_V4SI_V8SI_UQI: -+ case V8SI_FTYPE_SI_V8SI_UQI: -+ case V4SI_FTYPE_V4SI_V4SI_UQI: -+ case V4SI_FTYPE_SI_V4SI_UQI: -+ case V4DI_FTYPE_V2DI_V4DI_UQI: -+ case V4DI_FTYPE_DI_V4DI_UQI: -+ case V2DI_FTYPE_V2DI_V2DI_UQI: -+ case V2DI_FTYPE_DI_V2DI_UQI: -+ case V64QI_FTYPE_V64QI_V64QI_UDI: -+ case V64QI_FTYPE_V16QI_V64QI_UDI: -+ case V64QI_FTYPE_QI_V64QI_UDI: -+ case V32QI_FTYPE_V32QI_V32QI_USI: -+ case V32QI_FTYPE_V16QI_V32QI_USI: -+ case V32QI_FTYPE_QI_V32QI_USI: -+ case V16QI_FTYPE_V16QI_V16QI_UHI: -+ case V16QI_FTYPE_QI_V16QI_UHI: -+ case V32HI_FTYPE_V8HI_V32HI_USI: -+ case V32HI_FTYPE_HI_V32HI_USI: -+ case V16HI_FTYPE_V8HI_V16HI_UHI: -+ case V16HI_FTYPE_HI_V16HI_UHI: -+ case V8HI_FTYPE_V8HI_V8HI_UQI: -+ case V8HI_FTYPE_HI_V8HI_UQI: -+ case V8SF_FTYPE_V8HI_V8SF_UQI: -+ case V4SF_FTYPE_V8HI_V4SF_UQI: -+ case V8SI_FTYPE_V8SF_V8SI_UQI: -+ case V4SI_FTYPE_V4SF_V4SI_UQI: -+ case V4DI_FTYPE_V4SF_V4DI_UQI: -+ case V2DI_FTYPE_V4SF_V2DI_UQI: -+ case V4SF_FTYPE_V4DI_V4SF_UQI: -+ case V4SF_FTYPE_V2DI_V4SF_UQI: -+ case V4DF_FTYPE_V4DI_V4DF_UQI: -+ case V2DF_FTYPE_V2DI_V2DF_UQI: -+ case V16QI_FTYPE_V8HI_V16QI_UQI: -+ case V16QI_FTYPE_V16HI_V16QI_UHI: -+ case V16QI_FTYPE_V4SI_V16QI_UQI: -+ case V16QI_FTYPE_V8SI_V16QI_UQI: -+ case V8HI_FTYPE_V4SI_V8HI_UQI: -+ case V8HI_FTYPE_V8SI_V8HI_UQI: -+ case V16QI_FTYPE_V2DI_V16QI_UQI: -+ case V16QI_FTYPE_V4DI_V16QI_UQI: -+ case V8HI_FTYPE_V2DI_V8HI_UQI: -+ case V8HI_FTYPE_V4DI_V8HI_UQI: -+ case V4SI_FTYPE_V2DI_V4SI_UQI: -+ case V4SI_FTYPE_V4DI_V4SI_UQI: -+ case V32QI_FTYPE_V32HI_V32QI_USI: -+ case UHI_FTYPE_V16QI_V16QI_UHI: -+ case USI_FTYPE_V32QI_V32QI_USI: -+ case UDI_FTYPE_V64QI_V64QI_UDI: -+ case UQI_FTYPE_V8HI_V8HI_UQI: -+ case UHI_FTYPE_V16HI_V16HI_UHI: -+ case USI_FTYPE_V32HI_V32HI_USI: -+ case UQI_FTYPE_V4SI_V4SI_UQI: -+ case UQI_FTYPE_V8SI_V8SI_UQI: -+ case UQI_FTYPE_V2DI_V2DI_UQI: -+ case UQI_FTYPE_V4DI_V4DI_UQI: -+ case V4SF_FTYPE_V2DF_V4SF_UQI: -+ case V4SF_FTYPE_V4DF_V4SF_UQI: -+ case V16SI_FTYPE_V16SI_V16SI_UHI: -+ case V16SI_FTYPE_V4SI_V16SI_UHI: -+ case V2DI_FTYPE_V4SI_V2DI_UQI: -+ case V2DI_FTYPE_V8HI_V2DI_UQI: -+ case V2DI_FTYPE_V16QI_V2DI_UQI: -+ case V4DI_FTYPE_V4DI_V4DI_UQI: -+ case V4DI_FTYPE_V4SI_V4DI_UQI: -+ case V4DI_FTYPE_V8HI_V4DI_UQI: -+ case V4DI_FTYPE_V16QI_V4DI_UQI: -+ case V4DI_FTYPE_V4DF_V4DI_UQI: -+ case V2DI_FTYPE_V2DF_V2DI_UQI: -+ case V4SI_FTYPE_V4DF_V4SI_UQI: -+ case V4SI_FTYPE_V2DF_V4SI_UQI: -+ case V4SI_FTYPE_V8HI_V4SI_UQI: -+ case V4SI_FTYPE_V16QI_V4SI_UQI: -+ case V4DI_FTYPE_V4DI_V4DI_V4DI: -+ case V8DF_FTYPE_V2DF_V8DF_UQI: -+ case V8DF_FTYPE_V4DF_V8DF_UQI: -+ case V8DF_FTYPE_V8DF_V8DF_UQI: -+ case V8SF_FTYPE_V8SF_V8SF_UQI: -+ case V8SF_FTYPE_V8SI_V8SF_UQI: -+ case V4DF_FTYPE_V4DF_V4DF_UQI: -+ case V4SF_FTYPE_V4SF_V4SF_UQI: -+ case V2DF_FTYPE_V2DF_V2DF_UQI: -+ case V2DF_FTYPE_V4SF_V2DF_UQI: -+ case V2DF_FTYPE_V4SI_V2DF_UQI: -+ case V4SF_FTYPE_V4SI_V4SF_UQI: -+ case V4DF_FTYPE_V4SF_V4DF_UQI: -+ case V4DF_FTYPE_V4SI_V4DF_UQI: -+ case V8SI_FTYPE_V8SI_V8SI_UQI: -+ case V8SI_FTYPE_V8HI_V8SI_UQI: -+ case V8SI_FTYPE_V16QI_V8SI_UQI: -+ case V8DF_FTYPE_V8SI_V8DF_UQI: -+ case V8DI_FTYPE_DI_V8DI_UQI: -+ case V16SF_FTYPE_V8SF_V16SF_UHI: -+ case V16SI_FTYPE_V8SI_V16SI_UHI: -+ case V16HI_FTYPE_V16HI_V16HI_UHI: -+ case V8HI_FTYPE_V16QI_V8HI_UQI: -+ case V16HI_FTYPE_V16QI_V16HI_UHI: -+ case V32HI_FTYPE_V32HI_V32HI_USI: -+ case V32HI_FTYPE_V32QI_V32HI_USI: -+ case V8DI_FTYPE_V16QI_V8DI_UQI: -+ case V8DI_FTYPE_V2DI_V8DI_UQI: -+ case V8DI_FTYPE_V4DI_V8DI_UQI: -+ case V8DI_FTYPE_V8DI_V8DI_UQI: -+ case V8DI_FTYPE_V8HI_V8DI_UQI: -+ case V8DI_FTYPE_V8SI_V8DI_UQI: -+ case V8HI_FTYPE_V8DI_V8HI_UQI: -+ case V8SI_FTYPE_V8DI_V8SI_UQI: -+ case V4SI_FTYPE_V4SI_V4SI_V4SI: -+ case V16SI_FTYPE_V16SI_V16SI_V16SI: -+ case V8DI_FTYPE_V8DI_V8DI_V8DI: -+ case V32HI_FTYPE_V32HI_V32HI_V32HI: -+ case V2DI_FTYPE_V2DI_V2DI_V2DI: -+ case V16HI_FTYPE_V16HI_V16HI_V16HI: -+ case V8SI_FTYPE_V8SI_V8SI_V8SI: -+ case V8HI_FTYPE_V8HI_V8HI_V8HI: -+ nargs = 3; -+ break; -+ case V32QI_FTYPE_V32QI_V32QI_INT: -+ case V16HI_FTYPE_V16HI_V16HI_INT: -+ case V16QI_FTYPE_V16QI_V16QI_INT: -+ case V4DI_FTYPE_V4DI_V4DI_INT: -+ case V8HI_FTYPE_V8HI_V8HI_INT: -+ case V8SI_FTYPE_V8SI_V8SI_INT: -+ case V8SI_FTYPE_V8SI_V4SI_INT: -+ case V8SF_FTYPE_V8SF_V8SF_INT: -+ case V8SF_FTYPE_V8SF_V4SF_INT: -+ case V4SI_FTYPE_V4SI_V4SI_INT: -+ case V4DF_FTYPE_V4DF_V4DF_INT: -+ case V16SF_FTYPE_V16SF_V16SF_INT: -+ case V16SF_FTYPE_V16SF_V4SF_INT: -+ case V16SI_FTYPE_V16SI_V4SI_INT: -+ case V4DF_FTYPE_V4DF_V2DF_INT: -+ case V4SF_FTYPE_V4SF_V4SF_INT: -+ case V2DI_FTYPE_V2DI_V2DI_INT: -+ case V4DI_FTYPE_V4DI_V2DI_INT: -+ case V2DF_FTYPE_V2DF_V2DF_INT: -+ case UQI_FTYPE_V8DI_V8UDI_INT: -+ case UQI_FTYPE_V8DF_V8DF_INT: -+ case UQI_FTYPE_V2DF_V2DF_INT: -+ case UQI_FTYPE_V4SF_V4SF_INT: -+ case UHI_FTYPE_V16SI_V16SI_INT: -+ case UHI_FTYPE_V16SF_V16SF_INT: -+ case V64QI_FTYPE_V64QI_V64QI_INT: -+ case V32HI_FTYPE_V32HI_V32HI_INT: -+ case V16SI_FTYPE_V16SI_V16SI_INT: -+ case V8DI_FTYPE_V8DI_V8DI_INT: -+ nargs = 3; -+ nargs_constant = 1; -+ break; -+ case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT: -+ nargs = 3; -+ rmode = V4DImode; -+ nargs_constant = 1; -+ break; -+ case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT: -+ nargs = 3; -+ rmode = V2DImode; -+ nargs_constant = 1; -+ break; -+ case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT: -+ nargs = 3; -+ rmode = DImode; -+ nargs_constant = 1; -+ break; -+ case V2DI_FTYPE_V2DI_UINT_UINT: -+ nargs = 3; -+ nargs_constant = 2; -+ break; -+ case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT: -+ nargs = 3; -+ rmode = V8DImode; -+ nargs_constant = 1; -+ break; -+ case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT: -+ nargs = 5; -+ rmode = V8DImode; -+ mask_pos = 2; -+ nargs_constant = 1; -+ break; -+ case QI_FTYPE_V8DF_INT_UQI: -+ case QI_FTYPE_V4DF_INT_UQI: -+ case QI_FTYPE_V2DF_INT_UQI: -+ case HI_FTYPE_V16SF_INT_UHI: -+ case QI_FTYPE_V8SF_INT_UQI: -+ case QI_FTYPE_V4SF_INT_UQI: -+ case V4SI_FTYPE_V4SI_V4SI_UHI: -+ case V8SI_FTYPE_V8SI_V8SI_UHI: -+ nargs = 3; -+ mask_pos = 1; -+ nargs_constant = 1; -+ break; -+ case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_USI_CONVERT: -+ nargs = 5; -+ rmode = V4DImode; -+ mask_pos = 2; -+ nargs_constant = 1; -+ break; -+ case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UHI_CONVERT: -+ nargs = 5; -+ rmode = V2DImode; -+ mask_pos = 2; -+ nargs_constant = 1; -+ break; -+ case V32QI_FTYPE_V32QI_V32QI_V32QI_USI: -+ case V32HI_FTYPE_V32HI_V32HI_V32HI_USI: -+ case V32HI_FTYPE_V64QI_V64QI_V32HI_USI: -+ case V16SI_FTYPE_V32HI_V32HI_V16SI_UHI: -+ case V64QI_FTYPE_V64QI_V64QI_V64QI_UDI: -+ case V32HI_FTYPE_V32HI_V8HI_V32HI_USI: -+ case V16HI_FTYPE_V16HI_V8HI_V16HI_UHI: -+ case V8SI_FTYPE_V8SI_V4SI_V8SI_UQI: -+ case V4DI_FTYPE_V4DI_V2DI_V4DI_UQI: -+ case V64QI_FTYPE_V32HI_V32HI_V64QI_UDI: -+ case V32QI_FTYPE_V16HI_V16HI_V32QI_USI: -+ case V16QI_FTYPE_V8HI_V8HI_V16QI_UHI: -+ case V32HI_FTYPE_V16SI_V16SI_V32HI_USI: -+ case V16HI_FTYPE_V8SI_V8SI_V16HI_UHI: -+ case V8HI_FTYPE_V4SI_V4SI_V8HI_UQI: -+ case V4DF_FTYPE_V4DF_V4DI_V4DF_UQI: -+ case V8SF_FTYPE_V8SF_V8SI_V8SF_UQI: -+ case V4SF_FTYPE_V4SF_V4SI_V4SF_UQI: -+ case V2DF_FTYPE_V2DF_V2DI_V2DF_UQI: -+ case V2DI_FTYPE_V4SI_V4SI_V2DI_UQI: -+ case V4DI_FTYPE_V8SI_V8SI_V4DI_UQI: -+ case V4DF_FTYPE_V4DI_V4DF_V4DF_UQI: -+ case V8SF_FTYPE_V8SI_V8SF_V8SF_UQI: -+ case V2DF_FTYPE_V2DI_V2DF_V2DF_UQI: -+ case V4SF_FTYPE_V4SI_V4SF_V4SF_UQI: -+ case V16SF_FTYPE_V16SF_V16SF_V16SF_UHI: -+ case V16SF_FTYPE_V16SF_V16SI_V16SF_UHI: -+ case V16SF_FTYPE_V16SI_V16SF_V16SF_UHI: -+ case V16SI_FTYPE_V16SI_V16SI_V16SI_UHI: -+ case V16SI_FTYPE_V16SI_V4SI_V16SI_UHI: -+ case V8HI_FTYPE_V8HI_V8HI_V8HI_UQI: -+ case V8SI_FTYPE_V8SI_V8SI_V8SI_UQI: -+ case V4SI_FTYPE_V4SI_V4SI_V4SI_UQI: -+ case V8SF_FTYPE_V8SF_V8SF_V8SF_UQI: -+ case V16QI_FTYPE_V16QI_V16QI_V16QI_UHI: -+ case V16HI_FTYPE_V16HI_V16HI_V16HI_UHI: -+ case V2DI_FTYPE_V2DI_V2DI_V2DI_UQI: -+ case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI: -+ case V4DI_FTYPE_V4DI_V4DI_V4DI_UQI: -+ case V4DF_FTYPE_V4DF_V4DF_V4DF_UQI: -+ case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI: -+ case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI: -+ case V8DF_FTYPE_V8DF_V8DI_V8DF_UQI: -+ case V8DF_FTYPE_V8DI_V8DF_V8DF_UQI: -+ case V8DI_FTYPE_V16SI_V16SI_V8DI_UQI: -+ case V8DI_FTYPE_V8DI_V2DI_V8DI_UQI: -+ case V8DI_FTYPE_V8DI_V8DI_V8DI_UQI: -+ case V8HI_FTYPE_V16QI_V16QI_V8HI_UQI: -+ case V16HI_FTYPE_V32QI_V32QI_V16HI_UHI: -+ case V8SI_FTYPE_V16HI_V16HI_V8SI_UQI: -+ case V4SI_FTYPE_V8HI_V8HI_V4SI_UQI: -+ nargs = 4; -+ break; -+ case V2DF_FTYPE_V2DF_V2DF_V2DI_INT: -+ case V4DF_FTYPE_V4DF_V4DF_V4DI_INT: -+ case V4SF_FTYPE_V4SF_V4SF_V4SI_INT: -+ case V8SF_FTYPE_V8SF_V8SF_V8SI_INT: -+ case V16SF_FTYPE_V16SF_V16SF_V16SI_INT: -+ nargs = 4; -+ nargs_constant = 1; -+ break; -+ case UQI_FTYPE_V4DI_V4DI_INT_UQI: -+ case UQI_FTYPE_V8SI_V8SI_INT_UQI: -+ case QI_FTYPE_V4DF_V4DF_INT_UQI: -+ case QI_FTYPE_V8SF_V8SF_INT_UQI: -+ case UQI_FTYPE_V2DI_V2DI_INT_UQI: -+ case UQI_FTYPE_V4SI_V4SI_INT_UQI: -+ case UQI_FTYPE_V2DF_V2DF_INT_UQI: -+ case UQI_FTYPE_V4SF_V4SF_INT_UQI: -+ case UDI_FTYPE_V64QI_V64QI_INT_UDI: -+ case USI_FTYPE_V32QI_V32QI_INT_USI: -+ case UHI_FTYPE_V16QI_V16QI_INT_UHI: -+ case USI_FTYPE_V32HI_V32HI_INT_USI: -+ case UHI_FTYPE_V16HI_V16HI_INT_UHI: -+ case UQI_FTYPE_V8HI_V8HI_INT_UQI: -+ case V32HI_FTYPE_V32HI_V32HI_V32HI_INT: -+ case V16HI_FTYPE_V16HI_V16HI_V16HI_INT: -+ case V8HI_FTYPE_V8HI_V8HI_V8HI_INT: -+ case V8SI_FTYPE_V8SI_V8SI_V8SI_INT: -+ case V4DI_FTYPE_V4DI_V4DI_V4DI_INT: -+ case V8DI_FTYPE_V8DI_V8DI_V8DI_INT: -+ case V16SI_FTYPE_V16SI_V16SI_V16SI_INT: -+ case V2DI_FTYPE_V2DI_V2DI_V2DI_INT: -+ case V4SI_FTYPE_V4SI_V4SI_V4SI_INT: -+ nargs = 4; -+ mask_pos = 1; -+ nargs_constant = 1; -+ break; -+ case V2DI_FTYPE_V2DI_V2DI_UINT_UINT: -+ nargs = 4; -+ nargs_constant = 2; -+ break; -+ case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED: -+ case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG: -+ nargs = 4; -+ break; -+ case UQI_FTYPE_V8DI_V8DI_INT_UQI: -+ case UHI_FTYPE_V16SI_V16SI_INT_UHI: -+ mask_pos = 1; -+ nargs = 4; -+ nargs_constant = 1; -+ break; -+ case V8SF_FTYPE_V8SF_INT_V8SF_UQI: -+ case V4SF_FTYPE_V4SF_INT_V4SF_UQI: -+ case V2DF_FTYPE_V4DF_INT_V2DF_UQI: -+ case V2DI_FTYPE_V4DI_INT_V2DI_UQI: -+ case V8SF_FTYPE_V16SF_INT_V8SF_UQI: -+ case V8SI_FTYPE_V16SI_INT_V8SI_UQI: -+ case V2DF_FTYPE_V8DF_INT_V2DF_UQI: -+ case V2DI_FTYPE_V8DI_INT_V2DI_UQI: -+ case V4SF_FTYPE_V8SF_INT_V4SF_UQI: -+ case V4SI_FTYPE_V8SI_INT_V4SI_UQI: -+ case V8HI_FTYPE_V8SF_INT_V8HI_UQI: -+ case V8HI_FTYPE_V4SF_INT_V8HI_UQI: -+ case V32HI_FTYPE_V32HI_INT_V32HI_USI: -+ case V16HI_FTYPE_V16HI_INT_V16HI_UHI: -+ case V8HI_FTYPE_V8HI_INT_V8HI_UQI: -+ case V4DI_FTYPE_V4DI_INT_V4DI_UQI: -+ case V2DI_FTYPE_V2DI_INT_V2DI_UQI: -+ case V8SI_FTYPE_V8SI_INT_V8SI_UQI: -+ case V4SI_FTYPE_V4SI_INT_V4SI_UQI: -+ case V4DF_FTYPE_V4DF_INT_V4DF_UQI: -+ case V2DF_FTYPE_V2DF_INT_V2DF_UQI: -+ case V8DF_FTYPE_V8DF_INT_V8DF_UQI: -+ case V16SF_FTYPE_V16SF_INT_V16SF_UHI: -+ case V16HI_FTYPE_V16SF_INT_V16HI_UHI: -+ case V16SI_FTYPE_V16SI_INT_V16SI_UHI: -+ case V4SI_FTYPE_V16SI_INT_V4SI_UQI: -+ case V4DI_FTYPE_V8DI_INT_V4DI_UQI: -+ case V4DF_FTYPE_V8DF_INT_V4DF_UQI: -+ case V4SF_FTYPE_V16SF_INT_V4SF_UQI: -+ case V8DI_FTYPE_V8DI_INT_V8DI_UQI: -+ nargs = 4; -+ mask_pos = 2; -+ nargs_constant = 1; -+ break; -+ case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI: -+ case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI: -+ case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI: -+ case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI: -+ case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI: -+ case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI: -+ case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI: -+ case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI: -+ case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI: -+ case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI: -+ case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI: -+ case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI: -+ case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI: -+ case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI: -+ case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI: -+ case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI: -+ case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_UQI: -+ case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UQI: -+ case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI: -+ case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_UHI: -+ case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_UQI: -+ case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI: -+ case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI: -+ case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_UQI: -+ case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_UQI: -+ case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_UQI: -+ case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_UQI: -+ nargs = 5; -+ mask_pos = 2; -+ nargs_constant = 1; -+ break; -+ case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI: -+ case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI: -+ case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_UQI: -+ case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_UQI: -+ case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI: -+ case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI: -+ case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI: -+ case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI: -+ case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI: -+ case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI: -+ nargs = 5; -+ mask_pos = 1; -+ nargs_constant = 1; -+ break; -+ case V64QI_FTYPE_V64QI_V64QI_INT_V64QI_UDI: -+ case V32QI_FTYPE_V32QI_V32QI_INT_V32QI_USI: -+ case V16QI_FTYPE_V16QI_V16QI_INT_V16QI_UHI: -+ case V32HI_FTYPE_V32HI_V32HI_INT_V32HI_INT: -+ case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_INT: -+ case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_INT: -+ case V16HI_FTYPE_V16HI_V16HI_INT_V16HI_INT: -+ case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_INT: -+ case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_INT: -+ case V8HI_FTYPE_V8HI_V8HI_INT_V8HI_INT: -+ case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_INT: -+ case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_INT: -+ nargs = 5; -+ mask_pos = 1; -+ nargs_constant = 2; -+ break; -+ -+ default: -+ gcc_unreachable (); -+ } -+ -+ gcc_assert (nargs <= ARRAY_SIZE (args)); -+ -+ if (comparison != UNKNOWN) -+ { -+ gcc_assert (nargs == 2); -+ return ix86_expand_sse_compare (d, exp, target, swap); -+ } -+ -+ if (rmode == VOIDmode || rmode == tmode) -+ { -+ if (optimize -+ || target == 0 -+ || GET_MODE (target) != tmode -+ || !insn_p->operand[0].predicate (target, tmode)) -+ target = gen_reg_rtx (tmode); -+ else if (memory_operand (target, tmode)) -+ num_memory++; -+ real_target = target; -+ } -+ else -+ { -+ real_target = gen_reg_rtx (tmode); -+ target = lowpart_subreg (rmode, real_target, tmode); -+ } -+ -+ for (i = 0; i < nargs; i++) -+ { -+ tree arg = CALL_EXPR_ARG (exp, i); -+ rtx op = expand_normal (arg); -+ machine_mode mode = insn_p->operand[i + 1].mode; -+ bool match = insn_p->operand[i + 1].predicate (op, mode); -+ -+ if (second_arg_count && i == 1) -+ { -+ /* SIMD shift insns take either an 8-bit immediate or -+ register as count. But builtin functions take int as -+ count. If count doesn't match, we put it in register. -+ The instructions are using 64-bit count, if op is just -+ 32-bit, zero-extend it, as negative shift counts -+ are undefined behavior and zero-extension is more -+ efficient. */ -+ if (!match) -+ { -+ if (SCALAR_INT_MODE_P (GET_MODE (op))) -+ op = convert_modes (mode, GET_MODE (op), op, 1); -+ else -+ op = lowpart_subreg (mode, op, GET_MODE (op)); -+ if (!insn_p->operand[i + 1].predicate (op, mode)) -+ op = copy_to_reg (op); -+ } -+ } -+ else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) || -+ (!mask_pos && (nargs - i) <= nargs_constant)) -+ { -+ if (!match) -+ switch (icode) -+ { -+ case CODE_FOR_avx_vinsertf128v4di: -+ case CODE_FOR_avx_vextractf128v4di: -+ error ("the last argument must be an 1-bit immediate"); -+ return const0_rtx; -+ -+ case CODE_FOR_avx512f_cmpv8di3_mask: -+ case CODE_FOR_avx512f_cmpv16si3_mask: -+ case CODE_FOR_avx512f_ucmpv8di3_mask: -+ case CODE_FOR_avx512f_ucmpv16si3_mask: -+ case CODE_FOR_avx512vl_cmpv4di3_mask: -+ case CODE_FOR_avx512vl_cmpv8si3_mask: -+ case CODE_FOR_avx512vl_ucmpv4di3_mask: -+ case CODE_FOR_avx512vl_ucmpv8si3_mask: -+ case CODE_FOR_avx512vl_cmpv2di3_mask: -+ case CODE_FOR_avx512vl_cmpv4si3_mask: -+ case CODE_FOR_avx512vl_ucmpv2di3_mask: -+ case CODE_FOR_avx512vl_ucmpv4si3_mask: -+ error ("the last argument must be a 3-bit immediate"); -+ return const0_rtx; -+ -+ case CODE_FOR_sse4_1_roundsd: -+ case CODE_FOR_sse4_1_roundss: -+ -+ case CODE_FOR_sse4_1_roundpd: -+ case CODE_FOR_sse4_1_roundps: -+ case CODE_FOR_avx_roundpd256: -+ case CODE_FOR_avx_roundps256: -+ -+ case CODE_FOR_sse4_1_roundpd_vec_pack_sfix: -+ case CODE_FOR_sse4_1_roundps_sfix: -+ case CODE_FOR_avx_roundpd_vec_pack_sfix256: -+ case CODE_FOR_avx_roundps_sfix256: -+ -+ case CODE_FOR_sse4_1_blendps: -+ case CODE_FOR_avx_blendpd256: -+ case CODE_FOR_avx_vpermilv4df: -+ case CODE_FOR_avx_vpermilv4df_mask: -+ case CODE_FOR_avx512f_getmantv8df_mask: -+ case CODE_FOR_avx512f_getmantv16sf_mask: -+ case CODE_FOR_avx512vl_getmantv8sf_mask: -+ case CODE_FOR_avx512vl_getmantv4df_mask: -+ case CODE_FOR_avx512vl_getmantv4sf_mask: -+ case CODE_FOR_avx512vl_getmantv2df_mask: -+ case CODE_FOR_avx512dq_rangepv8df_mask_round: -+ case CODE_FOR_avx512dq_rangepv16sf_mask_round: -+ case CODE_FOR_avx512dq_rangepv4df_mask: -+ case CODE_FOR_avx512dq_rangepv8sf_mask: -+ case CODE_FOR_avx512dq_rangepv2df_mask: -+ case CODE_FOR_avx512dq_rangepv4sf_mask: -+ case CODE_FOR_avx_shufpd256_mask: -+ error ("the last argument must be a 4-bit immediate"); -+ return const0_rtx; -+ -+ case CODE_FOR_sha1rnds4: -+ case CODE_FOR_sse4_1_blendpd: -+ case CODE_FOR_avx_vpermilv2df: -+ case CODE_FOR_avx_vpermilv2df_mask: -+ case CODE_FOR_xop_vpermil2v2df3: -+ case CODE_FOR_xop_vpermil2v4sf3: -+ case CODE_FOR_xop_vpermil2v4df3: -+ case CODE_FOR_xop_vpermil2v8sf3: -+ case CODE_FOR_avx512f_vinsertf32x4_mask: -+ case CODE_FOR_avx512f_vinserti32x4_mask: -+ case CODE_FOR_avx512f_vextractf32x4_mask: -+ case CODE_FOR_avx512f_vextracti32x4_mask: -+ case CODE_FOR_sse2_shufpd: -+ case CODE_FOR_sse2_shufpd_mask: -+ case CODE_FOR_avx512dq_shuf_f64x2_mask: -+ case CODE_FOR_avx512dq_shuf_i64x2_mask: -+ case CODE_FOR_avx512vl_shuf_i32x4_mask: -+ case CODE_FOR_avx512vl_shuf_f32x4_mask: -+ error ("the last argument must be a 2-bit immediate"); -+ return const0_rtx; -+ -+ case CODE_FOR_avx_vextractf128v4df: -+ case CODE_FOR_avx_vextractf128v8sf: -+ case CODE_FOR_avx_vextractf128v8si: -+ case CODE_FOR_avx_vinsertf128v4df: -+ case CODE_FOR_avx_vinsertf128v8sf: -+ case CODE_FOR_avx_vinsertf128v8si: -+ case CODE_FOR_avx512f_vinsertf64x4_mask: -+ case CODE_FOR_avx512f_vinserti64x4_mask: -+ case CODE_FOR_avx512f_vextractf64x4_mask: -+ case CODE_FOR_avx512f_vextracti64x4_mask: -+ case CODE_FOR_avx512dq_vinsertf32x8_mask: -+ case CODE_FOR_avx512dq_vinserti32x8_mask: -+ case CODE_FOR_avx512vl_vinsertv4df: -+ case CODE_FOR_avx512vl_vinsertv4di: -+ case CODE_FOR_avx512vl_vinsertv8sf: -+ case CODE_FOR_avx512vl_vinsertv8si: -+ error ("the last argument must be a 1-bit immediate"); -+ return const0_rtx; -+ -+ case CODE_FOR_avx_vmcmpv2df3: -+ case CODE_FOR_avx_vmcmpv4sf3: -+ case CODE_FOR_avx_cmpv2df3: -+ case CODE_FOR_avx_cmpv4sf3: -+ case CODE_FOR_avx_cmpv4df3: -+ case CODE_FOR_avx_cmpv8sf3: -+ case CODE_FOR_avx512f_cmpv8df3_mask: -+ case CODE_FOR_avx512f_cmpv16sf3_mask: -+ case CODE_FOR_avx512f_vmcmpv2df3_mask: -+ case CODE_FOR_avx512f_vmcmpv4sf3_mask: -+ error ("the last argument must be a 5-bit immediate"); -+ return const0_rtx; -+ -+ default: -+ switch (nargs_constant) -+ { -+ case 2: -+ if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) || -+ (!mask_pos && (nargs - i) == nargs_constant)) -+ { -+ error ("the next to last argument must be an 8-bit immediate"); -+ break; -+ } -+ /* FALLTHRU */ -+ case 1: -+ error ("the last argument must be an 8-bit immediate"); -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ return const0_rtx; -+ } -+ } -+ else -+ { -+ if (VECTOR_MODE_P (mode)) -+ op = safe_vector_operand (op, mode); -+ -+ /* If we aren't optimizing, only allow one memory operand to -+ be generated. */ -+ if (memory_operand (op, mode)) -+ num_memory++; -+ -+ op = fixup_modeless_constant (op, mode); -+ -+ if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode) -+ { -+ if (optimize || !match || num_memory > 1) -+ op = copy_to_mode_reg (mode, op); -+ } -+ else -+ { -+ op = copy_to_reg (op); -+ op = lowpart_subreg (mode, op, GET_MODE (op)); -+ } -+ } -+ -+ args[i].op = op; -+ args[i].mode = mode; -+ } -+ -+ switch (nargs) -+ { -+ case 1: -+ pat = GEN_FCN (icode) (real_target, args[0].op); -+ break; -+ case 2: -+ pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op); -+ break; -+ case 3: -+ pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op, -+ args[2].op); -+ break; -+ case 4: -+ pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op, -+ args[2].op, args[3].op); -+ break; -+ case 5: -+ pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op, -+ args[2].op, args[3].op, args[4].op); -+ break; -+ case 6: -+ pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op, -+ args[2].op, args[3].op, args[4].op, -+ args[5].op); -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ -+ if (! pat) -+ return 0; -+ -+ emit_insn (pat); -+ return target; -+} -+ -+/* Transform pattern of following layout: -+ (set A -+ (unspec [B C] UNSPEC_EMBEDDED_ROUNDING)) -+ ) -+ into: -+ (set (A B)) */ -+ -+static rtx -+ix86_erase_embedded_rounding (rtx pat) -+{ -+ if (GET_CODE (pat) == INSN) -+ pat = PATTERN (pat); -+ -+ gcc_assert (GET_CODE (pat) == SET); -+ rtx src = SET_SRC (pat); -+ gcc_assert (XVECLEN (src, 0) == 2); -+ rtx p0 = XVECEXP (src, 0, 0); -+ gcc_assert (GET_CODE (src) == UNSPEC -+ && XINT (src, 1) == UNSPEC_EMBEDDED_ROUNDING); -+ rtx res = gen_rtx_SET (SET_DEST (pat), p0); -+ return res; -+} -+ -+/* Subroutine of ix86_expand_round_builtin to take care of comi insns -+ with rounding. */ -+static rtx -+ix86_expand_sse_comi_round (const struct builtin_description *d, -+ tree exp, rtx target) -+{ -+ rtx pat, set_dst; -+ tree arg0 = CALL_EXPR_ARG (exp, 0); -+ tree arg1 = CALL_EXPR_ARG (exp, 1); -+ tree arg2 = CALL_EXPR_ARG (exp, 2); -+ tree arg3 = CALL_EXPR_ARG (exp, 3); -+ rtx op0 = expand_normal (arg0); -+ rtx op1 = expand_normal (arg1); -+ rtx op2 = expand_normal (arg2); -+ rtx op3 = expand_normal (arg3); -+ enum insn_code icode = d->icode; -+ const struct insn_data_d *insn_p = &insn_data[icode]; -+ machine_mode mode0 = insn_p->operand[0].mode; -+ machine_mode mode1 = insn_p->operand[1].mode; -+ enum rtx_code comparison = UNEQ; -+ bool need_ucomi = false; -+ -+ /* See avxintrin.h for values. */ -+ enum rtx_code comi_comparisons[32] = -+ { -+ UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT, -+ UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE, -+ UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT -+ }; -+ bool need_ucomi_values[32] = -+ { -+ true, false, false, true, true, false, false, true, -+ true, false, false, true, true, false, false, true, -+ false, true, true, false, false, true, true, false, -+ false, true, true, false, false, true, true, false -+ }; -+ -+ if (!CONST_INT_P (op2)) -+ { -+ error ("the third argument must be comparison constant"); -+ return const0_rtx; -+ } -+ if (INTVAL (op2) < 0 || INTVAL (op2) >= 32) -+ { -+ error ("incorrect comparison mode"); -+ return const0_rtx; -+ } -+ -+ if (!insn_p->operand[2].predicate (op3, SImode)) -+ { -+ error ("incorrect rounding operand"); -+ return const0_rtx; -+ } -+ -+ comparison = comi_comparisons[INTVAL (op2)]; -+ need_ucomi = need_ucomi_values[INTVAL (op2)]; -+ -+ if (VECTOR_MODE_P (mode0)) -+ op0 = safe_vector_operand (op0, mode0); -+ if (VECTOR_MODE_P (mode1)) -+ op1 = safe_vector_operand (op1, mode1); -+ -+ target = gen_reg_rtx (SImode); -+ emit_move_insn (target, const0_rtx); -+ target = gen_rtx_SUBREG (QImode, target, 0); -+ -+ if ((optimize && !register_operand (op0, mode0)) -+ || !insn_p->operand[0].predicate (op0, mode0)) -+ op0 = copy_to_mode_reg (mode0, op0); -+ if ((optimize && !register_operand (op1, mode1)) -+ || !insn_p->operand[1].predicate (op1, mode1)) -+ op1 = copy_to_mode_reg (mode1, op1); -+ -+ if (need_ucomi) -+ icode = icode == CODE_FOR_sse_comi_round -+ ? CODE_FOR_sse_ucomi_round -+ : CODE_FOR_sse2_ucomi_round; -+ -+ pat = GEN_FCN (icode) (op0, op1, op3); -+ if (! pat) -+ return 0; -+ -+ /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */ -+ if (INTVAL (op3) == NO_ROUND) -+ { -+ pat = ix86_erase_embedded_rounding (pat); -+ if (! pat) -+ return 0; -+ -+ set_dst = SET_DEST (pat); -+ } -+ else -+ { -+ gcc_assert (GET_CODE (pat) == SET); -+ set_dst = SET_DEST (pat); -+ } -+ -+ emit_insn (pat); -+ emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target), -+ gen_rtx_fmt_ee (comparison, QImode, -+ set_dst, -+ const0_rtx))); -+ -+ return SUBREG_REG (target); -+} -+ -+static rtx -+ix86_expand_round_builtin (const struct builtin_description *d, -+ tree exp, rtx target) -+{ -+ rtx pat; -+ unsigned int i, nargs; -+ struct -+ { -+ rtx op; -+ machine_mode mode; -+ } args[6]; -+ enum insn_code icode = d->icode; -+ const struct insn_data_d *insn_p = &insn_data[icode]; -+ machine_mode tmode = insn_p->operand[0].mode; -+ unsigned int nargs_constant = 0; -+ unsigned int redundant_embed_rnd = 0; -+ -+ switch ((enum ix86_builtin_func_type) d->flag) -+ { -+ case UINT64_FTYPE_V2DF_INT: -+ case UINT64_FTYPE_V4SF_INT: -+ case UINT_FTYPE_V2DF_INT: -+ case UINT_FTYPE_V4SF_INT: -+ case INT64_FTYPE_V2DF_INT: -+ case INT64_FTYPE_V4SF_INT: -+ case INT_FTYPE_V2DF_INT: -+ case INT_FTYPE_V4SF_INT: -+ nargs = 2; -+ break; -+ case V4SF_FTYPE_V4SF_UINT_INT: -+ case V4SF_FTYPE_V4SF_UINT64_INT: -+ case V2DF_FTYPE_V2DF_UINT64_INT: -+ case V4SF_FTYPE_V4SF_INT_INT: -+ case V4SF_FTYPE_V4SF_INT64_INT: -+ case V2DF_FTYPE_V2DF_INT64_INT: -+ case V4SF_FTYPE_V4SF_V4SF_INT: -+ case V2DF_FTYPE_V2DF_V2DF_INT: -+ case V4SF_FTYPE_V4SF_V2DF_INT: -+ case V2DF_FTYPE_V2DF_V4SF_INT: -+ nargs = 3; -+ break; -+ case V8SF_FTYPE_V8DF_V8SF_QI_INT: -+ case V8DF_FTYPE_V8DF_V8DF_QI_INT: -+ case V8SI_FTYPE_V8DF_V8SI_QI_INT: -+ case V8DI_FTYPE_V8DF_V8DI_QI_INT: -+ case V8SF_FTYPE_V8DI_V8SF_QI_INT: -+ case V8DF_FTYPE_V8DI_V8DF_QI_INT: -+ case V16SF_FTYPE_V16SF_V16SF_HI_INT: -+ case V8DI_FTYPE_V8SF_V8DI_QI_INT: -+ case V16SF_FTYPE_V16SI_V16SF_HI_INT: -+ case V16SI_FTYPE_V16SF_V16SI_HI_INT: -+ case V8DF_FTYPE_V8SF_V8DF_QI_INT: -+ case V16SF_FTYPE_V16HI_V16SF_HI_INT: -+ case V2DF_FTYPE_V2DF_V2DF_V2DF_INT: -+ case V4SF_FTYPE_V4SF_V4SF_V4SF_INT: -+ nargs = 4; -+ break; -+ case V4SF_FTYPE_V4SF_V4SF_INT_INT: -+ case V2DF_FTYPE_V2DF_V2DF_INT_INT: -+ nargs_constant = 2; -+ nargs = 4; -+ break; -+ case INT_FTYPE_V4SF_V4SF_INT_INT: -+ case INT_FTYPE_V2DF_V2DF_INT_INT: -+ return ix86_expand_sse_comi_round (d, exp, target); -+ case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT: -+ case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT: -+ case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT: -+ case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT: -+ case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT: -+ case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT: -+ case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT: -+ case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT: -+ nargs = 5; -+ break; -+ case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT: -+ case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT: -+ nargs_constant = 4; -+ nargs = 5; -+ break; -+ case UQI_FTYPE_V8DF_V8DF_INT_UQI_INT: -+ case UQI_FTYPE_V2DF_V2DF_INT_UQI_INT: -+ case UHI_FTYPE_V16SF_V16SF_INT_UHI_INT: -+ case UQI_FTYPE_V4SF_V4SF_INT_UQI_INT: -+ nargs_constant = 3; -+ nargs = 5; -+ break; -+ case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT: -+ case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT: -+ case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT: -+ case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT: -+ case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT: -+ case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT: -+ nargs = 6; -+ nargs_constant = 4; -+ break; -+ case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT: -+ case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT: -+ case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT: -+ case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT: -+ nargs = 6; -+ nargs_constant = 3; -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ gcc_assert (nargs <= ARRAY_SIZE (args)); -+ -+ if (optimize -+ || target == 0 -+ || GET_MODE (target) != tmode -+ || !insn_p->operand[0].predicate (target, tmode)) -+ target = gen_reg_rtx (tmode); -+ -+ for (i = 0; i < nargs; i++) -+ { -+ tree arg = CALL_EXPR_ARG (exp, i); -+ rtx op = expand_normal (arg); -+ machine_mode mode = insn_p->operand[i + 1].mode; -+ bool match = insn_p->operand[i + 1].predicate (op, mode); -+ -+ if (i == nargs - nargs_constant) -+ { -+ if (!match) -+ { -+ switch (icode) -+ { -+ case CODE_FOR_avx512f_getmantv8df_mask_round: -+ case CODE_FOR_avx512f_getmantv16sf_mask_round: -+ case CODE_FOR_avx512f_vgetmantv2df_round: -+ case CODE_FOR_avx512f_vgetmantv2df_mask_round: -+ case CODE_FOR_avx512f_vgetmantv4sf_round: -+ case CODE_FOR_avx512f_vgetmantv4sf_mask_round: -+ error ("the immediate argument must be a 4-bit immediate"); -+ return const0_rtx; -+ case CODE_FOR_avx512f_cmpv8df3_mask_round: -+ case CODE_FOR_avx512f_cmpv16sf3_mask_round: -+ case CODE_FOR_avx512f_vmcmpv2df3_mask_round: -+ case CODE_FOR_avx512f_vmcmpv4sf3_mask_round: -+ error ("the immediate argument must be a 5-bit immediate"); -+ return const0_rtx; -+ default: -+ error ("the immediate argument must be an 8-bit immediate"); -+ return const0_rtx; -+ } -+ } -+ } -+ else if (i == nargs-1) -+ { -+ if (!insn_p->operand[nargs].predicate (op, SImode)) -+ { -+ error ("incorrect rounding operand"); -+ return const0_rtx; -+ } -+ -+ /* If there is no rounding use normal version of the pattern. */ -+ if (INTVAL (op) == NO_ROUND) -+ redundant_embed_rnd = 1; -+ } -+ else -+ { -+ if (VECTOR_MODE_P (mode)) -+ op = safe_vector_operand (op, mode); -+ -+ op = fixup_modeless_constant (op, mode); -+ -+ if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode) -+ { -+ if (optimize || !match) -+ op = copy_to_mode_reg (mode, op); -+ } -+ else -+ { -+ op = copy_to_reg (op); -+ op = lowpart_subreg (mode, op, GET_MODE (op)); -+ } -+ } -+ -+ args[i].op = op; -+ args[i].mode = mode; -+ } -+ -+ switch (nargs) -+ { -+ case 1: -+ pat = GEN_FCN (icode) (target, args[0].op); -+ break; -+ case 2: -+ pat = GEN_FCN (icode) (target, args[0].op, args[1].op); -+ break; -+ case 3: -+ pat = GEN_FCN (icode) (target, args[0].op, args[1].op, -+ args[2].op); -+ break; -+ case 4: -+ pat = GEN_FCN (icode) (target, args[0].op, args[1].op, -+ args[2].op, args[3].op); -+ break; -+ case 5: -+ pat = GEN_FCN (icode) (target, args[0].op, args[1].op, -+ args[2].op, args[3].op, args[4].op); -+ break; -+ case 6: -+ pat = GEN_FCN (icode) (target, args[0].op, args[1].op, -+ args[2].op, args[3].op, args[4].op, -+ args[5].op); -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ -+ if (!pat) -+ return 0; -+ -+ if (redundant_embed_rnd) -+ pat = ix86_erase_embedded_rounding (pat); -+ -+ emit_insn (pat); -+ return target; -+} -+ -+/* Subroutine of ix86_expand_builtin to take care of special insns -+ with variable number of operands. */ -+ -+static rtx -+ix86_expand_special_args_builtin (const struct builtin_description *d, -+ tree exp, rtx target) -+{ -+ tree arg; -+ rtx pat, op; -+ unsigned int i, nargs, arg_adjust, memory; -+ bool aligned_mem = false; -+ struct -+ { -+ rtx op; -+ machine_mode mode; -+ } args[3]; -+ enum insn_code icode = d->icode; -+ bool last_arg_constant = false; -+ const struct insn_data_d *insn_p = &insn_data[icode]; -+ machine_mode tmode = insn_p->operand[0].mode; -+ enum { load, store } klass; -+ -+ switch ((enum ix86_builtin_func_type) d->flag) -+ { -+ case VOID_FTYPE_VOID: -+ emit_insn (GEN_FCN (icode) (target)); -+ return 0; -+ case VOID_FTYPE_UINT64: -+ case VOID_FTYPE_UNSIGNED: -+ nargs = 0; -+ klass = store; -+ memory = 0; -+ break; -+ -+ case INT_FTYPE_VOID: -+ case USHORT_FTYPE_VOID: -+ case UINT64_FTYPE_VOID: -+ case UINT_FTYPE_VOID: -+ case UNSIGNED_FTYPE_VOID: -+ nargs = 0; -+ klass = load; -+ memory = 0; -+ break; -+ case UINT64_FTYPE_PUNSIGNED: -+ case V2DI_FTYPE_PV2DI: -+ case V4DI_FTYPE_PV4DI: -+ case V32QI_FTYPE_PCCHAR: -+ case V16QI_FTYPE_PCCHAR: -+ case V8SF_FTYPE_PCV4SF: -+ case V8SF_FTYPE_PCFLOAT: -+ case V4SF_FTYPE_PCFLOAT: -+ case V4DF_FTYPE_PCV2DF: -+ case V4DF_FTYPE_PCDOUBLE: -+ case V2DF_FTYPE_PCDOUBLE: -+ case VOID_FTYPE_PVOID: -+ case V8DI_FTYPE_PV8DI: -+ nargs = 1; -+ klass = load; -+ memory = 0; -+ switch (icode) -+ { -+ case CODE_FOR_sse4_1_movntdqa: -+ case CODE_FOR_avx2_movntdqa: -+ case CODE_FOR_avx512f_movntdqa: -+ aligned_mem = true; -+ break; -+ default: -+ break; -+ } -+ break; -+ case VOID_FTYPE_PV2SF_V4SF: -+ case VOID_FTYPE_PV8DI_V8DI: -+ case VOID_FTYPE_PV4DI_V4DI: -+ case VOID_FTYPE_PV2DI_V2DI: -+ case VOID_FTYPE_PCHAR_V32QI: -+ case VOID_FTYPE_PCHAR_V16QI: -+ case VOID_FTYPE_PFLOAT_V16SF: -+ case VOID_FTYPE_PFLOAT_V8SF: -+ case VOID_FTYPE_PFLOAT_V4SF: -+ case VOID_FTYPE_PDOUBLE_V8DF: -+ case VOID_FTYPE_PDOUBLE_V4DF: -+ case VOID_FTYPE_PDOUBLE_V2DF: -+ case VOID_FTYPE_PLONGLONG_LONGLONG: -+ case VOID_FTYPE_PULONGLONG_ULONGLONG: -+ case VOID_FTYPE_PUNSIGNED_UNSIGNED: -+ case VOID_FTYPE_PINT_INT: -+ nargs = 1; -+ klass = store; -+ /* Reserve memory operand for target. */ -+ memory = ARRAY_SIZE (args); -+ switch (icode) -+ { -+ /* These builtins and instructions require the memory -+ to be properly aligned. */ -+ case CODE_FOR_avx_movntv4di: -+ case CODE_FOR_sse2_movntv2di: -+ case CODE_FOR_avx_movntv8sf: -+ case CODE_FOR_sse_movntv4sf: -+ case CODE_FOR_sse4a_vmmovntv4sf: -+ case CODE_FOR_avx_movntv4df: -+ case CODE_FOR_sse2_movntv2df: -+ case CODE_FOR_sse4a_vmmovntv2df: -+ case CODE_FOR_sse2_movntidi: -+ case CODE_FOR_sse_movntq: -+ case CODE_FOR_sse2_movntisi: -+ case CODE_FOR_avx512f_movntv16sf: -+ case CODE_FOR_avx512f_movntv8df: -+ case CODE_FOR_avx512f_movntv8di: -+ aligned_mem = true; -+ break; -+ default: -+ break; -+ } -+ break; -+ case VOID_FTYPE_PVOID_PCVOID: -+ nargs = 1; -+ klass = store; -+ memory = 0; -+ -+ break; -+ case V4SF_FTYPE_V4SF_PCV2SF: -+ case V2DF_FTYPE_V2DF_PCDOUBLE: -+ nargs = 2; -+ klass = load; -+ memory = 1; -+ break; -+ case V8SF_FTYPE_PCV8SF_V8SI: -+ case V4DF_FTYPE_PCV4DF_V4DI: -+ case V4SF_FTYPE_PCV4SF_V4SI: -+ case V2DF_FTYPE_PCV2DF_V2DI: -+ case V8SI_FTYPE_PCV8SI_V8SI: -+ case V4DI_FTYPE_PCV4DI_V4DI: -+ case V4SI_FTYPE_PCV4SI_V4SI: -+ case V2DI_FTYPE_PCV2DI_V2DI: -+ case VOID_FTYPE_INT_INT64: -+ nargs = 2; -+ klass = load; -+ memory = 0; -+ break; -+ case VOID_FTYPE_PV8DF_V8DF_UQI: -+ case VOID_FTYPE_PV4DF_V4DF_UQI: -+ case VOID_FTYPE_PV2DF_V2DF_UQI: -+ case VOID_FTYPE_PV16SF_V16SF_UHI: -+ case VOID_FTYPE_PV8SF_V8SF_UQI: -+ case VOID_FTYPE_PV4SF_V4SF_UQI: -+ case VOID_FTYPE_PV8DI_V8DI_UQI: -+ case VOID_FTYPE_PV4DI_V4DI_UQI: -+ case VOID_FTYPE_PV2DI_V2DI_UQI: -+ case VOID_FTYPE_PV16SI_V16SI_UHI: -+ case VOID_FTYPE_PV8SI_V8SI_UQI: -+ case VOID_FTYPE_PV4SI_V4SI_UQI: -+ case VOID_FTYPE_PV64QI_V64QI_UDI: -+ case VOID_FTYPE_PV32HI_V32HI_USI: -+ case VOID_FTYPE_PV32QI_V32QI_USI: -+ case VOID_FTYPE_PV16QI_V16QI_UHI: -+ case VOID_FTYPE_PV16HI_V16HI_UHI: -+ case VOID_FTYPE_PV8HI_V8HI_UQI: -+ switch (icode) -+ { -+ /* These builtins and instructions require the memory -+ to be properly aligned. */ -+ case CODE_FOR_avx512f_storev16sf_mask: -+ case CODE_FOR_avx512f_storev16si_mask: -+ case CODE_FOR_avx512f_storev8df_mask: -+ case CODE_FOR_avx512f_storev8di_mask: -+ case CODE_FOR_avx512vl_storev8sf_mask: -+ case CODE_FOR_avx512vl_storev8si_mask: -+ case CODE_FOR_avx512vl_storev4df_mask: -+ case CODE_FOR_avx512vl_storev4di_mask: -+ case CODE_FOR_avx512vl_storev4sf_mask: -+ case CODE_FOR_avx512vl_storev4si_mask: -+ case CODE_FOR_avx512vl_storev2df_mask: -+ case CODE_FOR_avx512vl_storev2di_mask: -+ aligned_mem = true; -+ break; -+ default: -+ break; -+ } -+ /* FALLTHRU */ -+ case VOID_FTYPE_PV8SF_V8SI_V8SF: -+ case VOID_FTYPE_PV4DF_V4DI_V4DF: -+ case VOID_FTYPE_PV4SF_V4SI_V4SF: -+ case VOID_FTYPE_PV2DF_V2DI_V2DF: -+ case VOID_FTYPE_PV8SI_V8SI_V8SI: -+ case VOID_FTYPE_PV4DI_V4DI_V4DI: -+ case VOID_FTYPE_PV4SI_V4SI_V4SI: -+ case VOID_FTYPE_PV2DI_V2DI_V2DI: -+ case VOID_FTYPE_PV8SI_V8DI_UQI: -+ case VOID_FTYPE_PV8HI_V8DI_UQI: -+ case VOID_FTYPE_PV16HI_V16SI_UHI: -+ case VOID_FTYPE_PV16QI_V8DI_UQI: -+ case VOID_FTYPE_PV16QI_V16SI_UHI: -+ case VOID_FTYPE_PV4SI_V4DI_UQI: -+ case VOID_FTYPE_PV4SI_V2DI_UQI: -+ case VOID_FTYPE_PV8HI_V4DI_UQI: -+ case VOID_FTYPE_PV8HI_V2DI_UQI: -+ case VOID_FTYPE_PV8HI_V8SI_UQI: -+ case VOID_FTYPE_PV8HI_V4SI_UQI: -+ case VOID_FTYPE_PV16QI_V4DI_UQI: -+ case VOID_FTYPE_PV16QI_V2DI_UQI: -+ case VOID_FTYPE_PV16QI_V8SI_UQI: -+ case VOID_FTYPE_PV16QI_V4SI_UQI: -+ case VOID_FTYPE_PCHAR_V64QI_UDI: -+ case VOID_FTYPE_PCHAR_V32QI_USI: -+ case VOID_FTYPE_PCHAR_V16QI_UHI: -+ case VOID_FTYPE_PSHORT_V32HI_USI: -+ case VOID_FTYPE_PSHORT_V16HI_UHI: -+ case VOID_FTYPE_PSHORT_V8HI_UQI: -+ case VOID_FTYPE_PINT_V16SI_UHI: -+ case VOID_FTYPE_PINT_V8SI_UQI: -+ case VOID_FTYPE_PINT_V4SI_UQI: -+ case VOID_FTYPE_PINT64_V8DI_UQI: -+ case VOID_FTYPE_PINT64_V4DI_UQI: -+ case VOID_FTYPE_PINT64_V2DI_UQI: -+ case VOID_FTYPE_PDOUBLE_V8DF_UQI: -+ case VOID_FTYPE_PDOUBLE_V4DF_UQI: -+ case VOID_FTYPE_PDOUBLE_V2DF_UQI: -+ case VOID_FTYPE_PFLOAT_V16SF_UHI: -+ case VOID_FTYPE_PFLOAT_V8SF_UQI: -+ case VOID_FTYPE_PFLOAT_V4SF_UQI: -+ case VOID_FTYPE_PV32QI_V32HI_USI: -+ case VOID_FTYPE_PV16QI_V16HI_UHI: -+ case VOID_FTYPE_PV8QI_V8HI_UQI: -+ nargs = 2; -+ klass = store; -+ /* Reserve memory operand for target. */ -+ memory = ARRAY_SIZE (args); -+ break; -+ case V4SF_FTYPE_PCV4SF_V4SF_UQI: -+ case V8SF_FTYPE_PCV8SF_V8SF_UQI: -+ case V16SF_FTYPE_PCV16SF_V16SF_UHI: -+ case V4SI_FTYPE_PCV4SI_V4SI_UQI: -+ case V8SI_FTYPE_PCV8SI_V8SI_UQI: -+ case V16SI_FTYPE_PCV16SI_V16SI_UHI: -+ case V2DF_FTYPE_PCV2DF_V2DF_UQI: -+ case V4DF_FTYPE_PCV4DF_V4DF_UQI: -+ case V8DF_FTYPE_PCV8DF_V8DF_UQI: -+ case V2DI_FTYPE_PCV2DI_V2DI_UQI: -+ case V4DI_FTYPE_PCV4DI_V4DI_UQI: -+ case V8DI_FTYPE_PCV8DI_V8DI_UQI: -+ case V64QI_FTYPE_PCV64QI_V64QI_UDI: -+ case V32HI_FTYPE_PCV32HI_V32HI_USI: -+ case V32QI_FTYPE_PCV32QI_V32QI_USI: -+ case V16QI_FTYPE_PCV16QI_V16QI_UHI: -+ case V16HI_FTYPE_PCV16HI_V16HI_UHI: -+ case V8HI_FTYPE_PCV8HI_V8HI_UQI: -+ switch (icode) -+ { -+ /* These builtins and instructions require the memory -+ to be properly aligned. */ -+ case CODE_FOR_avx512f_loadv16sf_mask: -+ case CODE_FOR_avx512f_loadv16si_mask: -+ case CODE_FOR_avx512f_loadv8df_mask: -+ case CODE_FOR_avx512f_loadv8di_mask: -+ case CODE_FOR_avx512vl_loadv8sf_mask: -+ case CODE_FOR_avx512vl_loadv8si_mask: -+ case CODE_FOR_avx512vl_loadv4df_mask: -+ case CODE_FOR_avx512vl_loadv4di_mask: -+ case CODE_FOR_avx512vl_loadv4sf_mask: -+ case CODE_FOR_avx512vl_loadv4si_mask: -+ case CODE_FOR_avx512vl_loadv2df_mask: -+ case CODE_FOR_avx512vl_loadv2di_mask: -+ case CODE_FOR_avx512bw_loadv64qi_mask: -+ case CODE_FOR_avx512vl_loadv32qi_mask: -+ case CODE_FOR_avx512vl_loadv16qi_mask: -+ case CODE_FOR_avx512bw_loadv32hi_mask: -+ case CODE_FOR_avx512vl_loadv16hi_mask: -+ case CODE_FOR_avx512vl_loadv8hi_mask: -+ aligned_mem = true; -+ break; -+ default: -+ break; -+ } -+ /* FALLTHRU */ -+ case V64QI_FTYPE_PCCHAR_V64QI_UDI: -+ case V32QI_FTYPE_PCCHAR_V32QI_USI: -+ case V16QI_FTYPE_PCCHAR_V16QI_UHI: -+ case V32HI_FTYPE_PCSHORT_V32HI_USI: -+ case V16HI_FTYPE_PCSHORT_V16HI_UHI: -+ case V8HI_FTYPE_PCSHORT_V8HI_UQI: -+ case V16SI_FTYPE_PCINT_V16SI_UHI: -+ case V8SI_FTYPE_PCINT_V8SI_UQI: -+ case V4SI_FTYPE_PCINT_V4SI_UQI: -+ case V8DI_FTYPE_PCINT64_V8DI_UQI: -+ case V4DI_FTYPE_PCINT64_V4DI_UQI: -+ case V2DI_FTYPE_PCINT64_V2DI_UQI: -+ case V8DF_FTYPE_PCDOUBLE_V8DF_UQI: -+ case V4DF_FTYPE_PCDOUBLE_V4DF_UQI: -+ case V2DF_FTYPE_PCDOUBLE_V2DF_UQI: -+ case V16SF_FTYPE_PCFLOAT_V16SF_UHI: -+ case V8SF_FTYPE_PCFLOAT_V8SF_UQI: -+ case V4SF_FTYPE_PCFLOAT_V4SF_UQI: -+ nargs = 3; -+ klass = load; -+ memory = 0; -+ break; -+ case VOID_FTYPE_UINT_UINT_UINT: -+ case VOID_FTYPE_UINT64_UINT_UINT: -+ case UCHAR_FTYPE_UINT_UINT_UINT: -+ case UCHAR_FTYPE_UINT64_UINT_UINT: -+ nargs = 3; -+ klass = load; -+ memory = ARRAY_SIZE (args); -+ last_arg_constant = true; -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ -+ gcc_assert (nargs <= ARRAY_SIZE (args)); -+ -+ if (klass == store) -+ { -+ arg = CALL_EXPR_ARG (exp, 0); -+ op = expand_normal (arg); -+ gcc_assert (target == 0); -+ if (memory) -+ { -+ op = ix86_zero_extend_to_Pmode (op); -+ target = gen_rtx_MEM (tmode, op); -+ /* target at this point has just BITS_PER_UNIT MEM_ALIGN -+ on it. Try to improve it using get_pointer_alignment, -+ and if the special builtin is one that requires strict -+ mode alignment, also from it's GET_MODE_ALIGNMENT. -+ Failure to do so could lead to ix86_legitimate_combined_insn -+ rejecting all changes to such insns. */ -+ unsigned int align = get_pointer_alignment (arg); -+ if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode)) -+ align = GET_MODE_ALIGNMENT (tmode); -+ if (MEM_ALIGN (target) < align) -+ set_mem_align (target, align); -+ } -+ else -+ target = force_reg (tmode, op); -+ arg_adjust = 1; -+ } -+ else -+ { -+ arg_adjust = 0; -+ if (optimize -+ || target == 0 -+ || !register_operand (target, tmode) -+ || GET_MODE (target) != tmode) -+ target = gen_reg_rtx (tmode); -+ } -+ -+ for (i = 0; i < nargs; i++) -+ { -+ machine_mode mode = insn_p->operand[i + 1].mode; -+ bool match; -+ -+ arg = CALL_EXPR_ARG (exp, i + arg_adjust); -+ op = expand_normal (arg); -+ match = insn_p->operand[i + 1].predicate (op, mode); -+ -+ if (last_arg_constant && (i + 1) == nargs) -+ { -+ if (!match) -+ { -+ if (icode == CODE_FOR_lwp_lwpvalsi3 -+ || icode == CODE_FOR_lwp_lwpinssi3 -+ || icode == CODE_FOR_lwp_lwpvaldi3 -+ || icode == CODE_FOR_lwp_lwpinsdi3) -+ error ("the last argument must be a 32-bit immediate"); -+ else -+ error ("the last argument must be an 8-bit immediate"); -+ return const0_rtx; -+ } -+ } -+ else -+ { -+ if (i == memory) -+ { -+ /* This must be the memory operand. */ -+ op = ix86_zero_extend_to_Pmode (op); -+ op = gen_rtx_MEM (mode, op); -+ /* op at this point has just BITS_PER_UNIT MEM_ALIGN -+ on it. Try to improve it using get_pointer_alignment, -+ and if the special builtin is one that requires strict -+ mode alignment, also from it's GET_MODE_ALIGNMENT. -+ Failure to do so could lead to ix86_legitimate_combined_insn -+ rejecting all changes to such insns. */ -+ unsigned int align = get_pointer_alignment (arg); -+ if (aligned_mem && align < GET_MODE_ALIGNMENT (mode)) -+ align = GET_MODE_ALIGNMENT (mode); -+ if (MEM_ALIGN (op) < align) -+ set_mem_align (op, align); -+ } -+ else -+ { -+ /* This must be register. */ -+ if (VECTOR_MODE_P (mode)) -+ op = safe_vector_operand (op, mode); -+ -+ op = fixup_modeless_constant (op, mode); -+ -+ if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode) -+ op = copy_to_mode_reg (mode, op); -+ else -+ { -+ op = copy_to_reg (op); -+ op = lowpart_subreg (mode, op, GET_MODE (op)); -+ } -+ } -+ } -+ -+ args[i].op = op; -+ args[i].mode = mode; -+ } -+ -+ switch (nargs) -+ { -+ case 0: -+ pat = GEN_FCN (icode) (target); -+ break; -+ case 1: -+ pat = GEN_FCN (icode) (target, args[0].op); -+ break; -+ case 2: -+ pat = GEN_FCN (icode) (target, args[0].op, args[1].op); -+ break; -+ case 3: -+ pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op); -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ -+ if (! pat) -+ return 0; -+ emit_insn (pat); -+ return klass == store ? 0 : target; -+} -+ -+/* Return the integer constant in ARG. Constrain it to be in the range -+ of the subparts of VEC_TYPE; issue an error if not. */ -+ -+static int -+get_element_number (tree vec_type, tree arg) -+{ -+ unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1; -+ -+ if (!tree_fits_uhwi_p (arg) -+ || (elt = tree_to_uhwi (arg), elt > max)) -+ { -+ error ("selector must be an integer constant in the range " -+ "[0, %wi]", max); -+ return 0; -+ } -+ -+ return elt; -+} -+ -+/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around -+ ix86_expand_vector_init. We DO have language-level syntax for this, in -+ the form of (type){ init-list }. Except that since we can't place emms -+ instructions from inside the compiler, we can't allow the use of MMX -+ registers unless the user explicitly asks for it. So we do *not* define -+ vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead -+ we have builtins invoked by mmintrin.h that gives us license to emit -+ these sorts of instructions. */ -+ -+static rtx -+ix86_expand_vec_init_builtin (tree type, tree exp, rtx target) -+{ -+ machine_mode tmode = TYPE_MODE (type); -+ machine_mode inner_mode = GET_MODE_INNER (tmode); -+ int i, n_elt = GET_MODE_NUNITS (tmode); -+ rtvec v = rtvec_alloc (n_elt); -+ -+ gcc_assert (VECTOR_MODE_P (tmode)); -+ gcc_assert (call_expr_nargs (exp) == n_elt); -+ -+ for (i = 0; i < n_elt; ++i) -+ { -+ rtx x = expand_normal (CALL_EXPR_ARG (exp, i)); -+ RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x); -+ } -+ -+ if (!target || !register_operand (target, tmode)) -+ target = gen_reg_rtx (tmode); -+ -+ ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v)); -+ return target; -+} -+ -+/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around -+ ix86_expand_vector_extract. They would be redundant (for non-MMX) if we -+ had a language-level syntax for referencing vector elements. */ -+ -+static rtx -+ix86_expand_vec_ext_builtin (tree exp, rtx target) -+{ -+ machine_mode tmode, mode0; -+ tree arg0, arg1; -+ int elt; -+ rtx op0; -+ -+ arg0 = CALL_EXPR_ARG (exp, 0); -+ arg1 = CALL_EXPR_ARG (exp, 1); -+ -+ op0 = expand_normal (arg0); -+ elt = get_element_number (TREE_TYPE (arg0), arg1); -+ -+ tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0))); -+ mode0 = TYPE_MODE (TREE_TYPE (arg0)); -+ gcc_assert (VECTOR_MODE_P (mode0)); -+ -+ op0 = force_reg (mode0, op0); -+ -+ if (optimize || !target || !register_operand (target, tmode)) -+ target = gen_reg_rtx (tmode); -+ -+ ix86_expand_vector_extract (true, target, op0, elt); -+ -+ return target; -+} -+ -+/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around -+ ix86_expand_vector_set. They would be redundant (for non-MMX) if we had -+ a language-level syntax for referencing vector elements. */ -+ -+static rtx -+ix86_expand_vec_set_builtin (tree exp) -+{ -+ machine_mode tmode, mode1; -+ tree arg0, arg1, arg2; -+ int elt; -+ rtx op0, op1, target; -+ -+ arg0 = CALL_EXPR_ARG (exp, 0); -+ arg1 = CALL_EXPR_ARG (exp, 1); -+ arg2 = CALL_EXPR_ARG (exp, 2); -+ -+ tmode = TYPE_MODE (TREE_TYPE (arg0)); -+ mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0))); -+ gcc_assert (VECTOR_MODE_P (tmode)); -+ -+ op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL); -+ op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL); -+ elt = get_element_number (TREE_TYPE (arg0), arg2); -+ -+ if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode) -+ op1 = convert_modes (mode1, GET_MODE (op1), op1, true); -+ -+ op0 = force_reg (tmode, op0); -+ op1 = force_reg (mode1, op1); -+ -+ /* OP0 is the source of these builtin functions and shouldn't be -+ modified. Create a copy, use it and return it as target. */ -+ target = gen_reg_rtx (tmode); -+ emit_move_insn (target, op0); -+ ix86_expand_vector_set (true, target, op1, elt); -+ -+ return target; -+} -+ -+/* Expand an expression EXP that calls a built-in function, -+ with result going to TARGET if that's convenient -+ (and in mode MODE if that's convenient). -+ SUBTARGET may be used as the target for computing one of EXP's operands. -+ IGNORE is nonzero if the value is to be ignored. */ -+ -+rtx -+ix86_expand_builtin (tree exp, rtx target, rtx subtarget, -+ machine_mode mode, int ignore) -+{ -+ size_t i; -+ enum insn_code icode, icode2; -+ tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); -+ tree arg0, arg1, arg2, arg3, arg4; -+ rtx op0, op1, op2, op3, op4, pat, pat2, insn; -+ machine_mode mode0, mode1, mode2, mode3, mode4; -+ unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl); -+ -+ /* For CPU builtins that can be folded, fold first and expand the fold. */ -+ switch (fcode) -+ { -+ case IX86_BUILTIN_CPU_INIT: -+ { -+ /* Make it call __cpu_indicator_init in libgcc. */ -+ tree call_expr, fndecl, type; -+ type = build_function_type_list (integer_type_node, NULL_TREE); -+ fndecl = build_fn_decl ("__cpu_indicator_init", type); -+ call_expr = build_call_expr (fndecl, 0); -+ return expand_expr (call_expr, target, mode, EXPAND_NORMAL); -+ } -+ case IX86_BUILTIN_CPU_IS: -+ case IX86_BUILTIN_CPU_SUPPORTS: -+ { -+ tree arg0 = CALL_EXPR_ARG (exp, 0); -+ tree fold_expr = fold_builtin_cpu (fndecl, &arg0); -+ gcc_assert (fold_expr != NULL_TREE); -+ return expand_expr (fold_expr, target, mode, EXPAND_NORMAL); -+ } -+ } -+ -+ HOST_WIDE_INT isa = ix86_isa_flags; -+ HOST_WIDE_INT isa2 = ix86_isa_flags2; -+ HOST_WIDE_INT bisa = ix86_builtins_isa[fcode].isa; -+ HOST_WIDE_INT bisa2 = ix86_builtins_isa[fcode].isa2; -+ /* The general case is we require all the ISAs specified in bisa{,2} -+ to be enabled. -+ The exceptions are: -+ OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A -+ OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 -+ OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4 -+ where for each this pair it is sufficient if either of the ISAs is -+ enabled, plus if it is ored with other options also those others. */ -+ if (((bisa & (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A)) -+ == (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A)) -+ && (isa & (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A)) != 0) -+ isa |= (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A); -+ if (((bisa & (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32)) -+ == (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32)) -+ && (isa & (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32)) != 0) -+ isa |= (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32); -+ if (((bisa & (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4)) -+ == (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4)) -+ && (isa & (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4)) != 0) -+ isa |= (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4); -+ if ((bisa & isa) != bisa || (bisa2 & isa2) != bisa2) -+ { -+ bool add_abi_p = bisa & OPTION_MASK_ISA_64BIT; -+ if (TARGET_ABI_X32) -+ bisa |= OPTION_MASK_ABI_X32; -+ else -+ bisa |= OPTION_MASK_ABI_64; -+ char *opts = ix86_target_string (bisa, bisa2, 0, 0, NULL, NULL, -+ (enum fpmath_unit) 0, false, add_abi_p); -+ if (!opts) -+ error ("%qE needs unknown isa option", fndecl); -+ else -+ { -+ gcc_assert (opts != NULL); -+ error ("%qE needs isa option %s", fndecl, opts); -+ free (opts); -+ } -+ return expand_call (exp, target, ignore); -+ } -+ -+ switch (fcode) -+ { -+ case IX86_BUILTIN_MASKMOVQ: -+ case IX86_BUILTIN_MASKMOVDQU: -+ icode = (fcode == IX86_BUILTIN_MASKMOVQ -+ ? CODE_FOR_mmx_maskmovq -+ : CODE_FOR_sse2_maskmovdqu); -+ /* Note the arg order is different from the operand order. */ -+ arg1 = CALL_EXPR_ARG (exp, 0); -+ arg2 = CALL_EXPR_ARG (exp, 1); -+ arg0 = CALL_EXPR_ARG (exp, 2); -+ op0 = expand_normal (arg0); -+ op1 = expand_normal (arg1); -+ op2 = expand_normal (arg2); -+ mode0 = insn_data[icode].operand[0].mode; -+ mode1 = insn_data[icode].operand[1].mode; -+ mode2 = insn_data[icode].operand[2].mode; -+ -+ op0 = ix86_zero_extend_to_Pmode (op0); -+ op0 = gen_rtx_MEM (mode1, op0); -+ -+ if (!insn_data[icode].operand[0].predicate (op0, mode0)) -+ op0 = copy_to_mode_reg (mode0, op0); -+ if (!insn_data[icode].operand[1].predicate (op1, mode1)) -+ op1 = copy_to_mode_reg (mode1, op1); -+ if (!insn_data[icode].operand[2].predicate (op2, mode2)) -+ op2 = copy_to_mode_reg (mode2, op2); -+ pat = GEN_FCN (icode) (op0, op1, op2); -+ if (! pat) -+ return 0; -+ emit_insn (pat); -+ return 0; -+ -+ case IX86_BUILTIN_LDMXCSR: -+ op0 = expand_normal (CALL_EXPR_ARG (exp, 0)); -+ target = assign_386_stack_local (SImode, SLOT_TEMP); -+ emit_move_insn (target, op0); -+ emit_insn (gen_sse_ldmxcsr (target)); -+ return 0; -+ -+ case IX86_BUILTIN_STMXCSR: -+ target = assign_386_stack_local (SImode, SLOT_TEMP); -+ emit_insn (gen_sse_stmxcsr (target)); -+ return copy_to_mode_reg (SImode, target); -+ -+ case IX86_BUILTIN_CLFLUSH: -+ arg0 = CALL_EXPR_ARG (exp, 0); -+ op0 = expand_normal (arg0); -+ icode = CODE_FOR_sse2_clflush; -+ if (!insn_data[icode].operand[0].predicate (op0, Pmode)) -+ op0 = ix86_zero_extend_to_Pmode (op0); -+ -+ emit_insn (gen_sse2_clflush (op0)); -+ return 0; -+ -+ case IX86_BUILTIN_CLWB: -+ arg0 = CALL_EXPR_ARG (exp, 0); -+ op0 = expand_normal (arg0); -+ icode = CODE_FOR_clwb; -+ if (!insn_data[icode].operand[0].predicate (op0, Pmode)) -+ op0 = ix86_zero_extend_to_Pmode (op0); -+ -+ emit_insn (gen_clwb (op0)); -+ return 0; -+ -+ case IX86_BUILTIN_CLFLUSHOPT: -+ arg0 = CALL_EXPR_ARG (exp, 0); -+ op0 = expand_normal (arg0); -+ icode = CODE_FOR_clflushopt; -+ if (!insn_data[icode].operand[0].predicate (op0, Pmode)) -+ op0 = ix86_zero_extend_to_Pmode (op0); -+ -+ emit_insn (gen_clflushopt (op0)); -+ return 0; -+ -+ case IX86_BUILTIN_MONITOR: -+ case IX86_BUILTIN_MONITORX: -+ arg0 = CALL_EXPR_ARG (exp, 0); -+ arg1 = CALL_EXPR_ARG (exp, 1); -+ arg2 = CALL_EXPR_ARG (exp, 2); -+ op0 = expand_normal (arg0); -+ op1 = expand_normal (arg1); -+ op2 = expand_normal (arg2); -+ if (!REG_P (op0)) -+ op0 = ix86_zero_extend_to_Pmode (op0); -+ if (!REG_P (op1)) -+ op1 = copy_to_mode_reg (SImode, op1); -+ if (!REG_P (op2)) -+ op2 = copy_to_mode_reg (SImode, op2); -+ -+ emit_insn (fcode == IX86_BUILTIN_MONITOR -+ ? ix86_gen_monitor (op0, op1, op2) -+ : ix86_gen_monitorx (op0, op1, op2)); -+ return 0; -+ -+ case IX86_BUILTIN_MWAIT: -+ arg0 = CALL_EXPR_ARG (exp, 0); -+ arg1 = CALL_EXPR_ARG (exp, 1); -+ op0 = expand_normal (arg0); -+ op1 = expand_normal (arg1); -+ if (!REG_P (op0)) -+ op0 = copy_to_mode_reg (SImode, op0); -+ if (!REG_P (op1)) -+ op1 = copy_to_mode_reg (SImode, op1); -+ emit_insn (gen_sse3_mwait (op0, op1)); -+ return 0; -+ -+ case IX86_BUILTIN_MWAITX: -+ arg0 = CALL_EXPR_ARG (exp, 0); -+ arg1 = CALL_EXPR_ARG (exp, 1); -+ arg2 = CALL_EXPR_ARG (exp, 2); -+ op0 = expand_normal (arg0); -+ op1 = expand_normal (arg1); -+ op2 = expand_normal (arg2); -+ if (!REG_P (op0)) -+ op0 = copy_to_mode_reg (SImode, op0); -+ if (!REG_P (op1)) -+ op1 = copy_to_mode_reg (SImode, op1); -+ if (!REG_P (op2)) -+ op2 = copy_to_mode_reg (SImode, op2); -+ emit_insn (gen_mwaitx (op0, op1, op2)); -+ return 0; -+ -+ case IX86_BUILTIN_UMONITOR: -+ arg0 = CALL_EXPR_ARG (exp, 0); -+ op0 = expand_normal (arg0); -+ -+ op0 = ix86_zero_extend_to_Pmode (op0); -+ -+ insn = (TARGET_64BIT -+ ? gen_umonitor_di (op0) -+ : gen_umonitor_si (op0)); -+ -+ emit_insn (insn); -+ return 0; -+ -+ case IX86_BUILTIN_UMWAIT: -+ case IX86_BUILTIN_TPAUSE: -+ arg0 = CALL_EXPR_ARG (exp, 0); -+ arg1 = CALL_EXPR_ARG (exp, 1); -+ op0 = expand_normal (arg0); -+ op1 = expand_normal (arg1); -+ -+ if (!REG_P (op0)) -+ op0 = copy_to_mode_reg (SImode, op0); -+ -+ op1 = force_reg (DImode, op1); -+ -+ if (TARGET_64BIT) -+ { -+ op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32), -+ NULL, 1, OPTAB_DIRECT); -+ switch (fcode) -+ { -+ case IX86_BUILTIN_UMWAIT: -+ icode = CODE_FOR_umwait_rex64; -+ break; -+ case IX86_BUILTIN_TPAUSE: -+ icode = CODE_FOR_tpause_rex64; -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ -+ op2 = gen_lowpart (SImode, op2); -+ op1 = gen_lowpart (SImode, op1); -+ pat = GEN_FCN (icode) (op0, op1, op2); -+ } -+ else -+ { -+ switch (fcode) -+ { -+ case IX86_BUILTIN_UMWAIT: -+ icode = CODE_FOR_umwait; -+ break; -+ case IX86_BUILTIN_TPAUSE: -+ icode = CODE_FOR_tpause; -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ pat = GEN_FCN (icode) (op0, op1); -+ } -+ -+ if (!pat) -+ return 0; -+ -+ emit_insn (pat); -+ -+ if (target == 0 -+ || !register_operand (target, QImode)) -+ target = gen_reg_rtx (QImode); -+ -+ pat = gen_rtx_EQ (QImode, gen_rtx_REG (CCCmode, FLAGS_REG), -+ const0_rtx); -+ emit_insn (gen_rtx_SET (target, pat)); -+ -+ return target; -+ -+ case IX86_BUILTIN_CLZERO: -+ arg0 = CALL_EXPR_ARG (exp, 0); -+ op0 = expand_normal (arg0); -+ if (!REG_P (op0)) -+ op0 = ix86_zero_extend_to_Pmode (op0); -+ emit_insn (ix86_gen_clzero (op0)); -+ return 0; -+ -+ case IX86_BUILTIN_CLDEMOTE: -+ arg0 = CALL_EXPR_ARG (exp, 0); -+ op0 = expand_normal (arg0); -+ icode = CODE_FOR_cldemote; -+ if (!insn_data[icode].operand[0].predicate (op0, Pmode)) -+ op0 = ix86_zero_extend_to_Pmode (op0); -+ -+ emit_insn (gen_cldemote (op0)); -+ return 0; -+ -+ case IX86_BUILTIN_VEC_INIT_V2SI: -+ case IX86_BUILTIN_VEC_INIT_V4HI: -+ case IX86_BUILTIN_VEC_INIT_V8QI: -+ return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target); -+ -+ case IX86_BUILTIN_VEC_EXT_V2DF: -+ case IX86_BUILTIN_VEC_EXT_V2DI: -+ case IX86_BUILTIN_VEC_EXT_V4SF: -+ case IX86_BUILTIN_VEC_EXT_V4SI: -+ case IX86_BUILTIN_VEC_EXT_V8HI: -+ case IX86_BUILTIN_VEC_EXT_V2SI: -+ case IX86_BUILTIN_VEC_EXT_V4HI: -+ case IX86_BUILTIN_VEC_EXT_V16QI: -+ return ix86_expand_vec_ext_builtin (exp, target); -+ -+ case IX86_BUILTIN_VEC_SET_V2DI: -+ case IX86_BUILTIN_VEC_SET_V4SF: -+ case IX86_BUILTIN_VEC_SET_V4SI: -+ case IX86_BUILTIN_VEC_SET_V8HI: -+ case IX86_BUILTIN_VEC_SET_V4HI: -+ case IX86_BUILTIN_VEC_SET_V16QI: -+ return ix86_expand_vec_set_builtin (exp); -+ -+ case IX86_BUILTIN_NANQ: -+ case IX86_BUILTIN_NANSQ: -+ return expand_call (exp, target, ignore); -+ -+ case IX86_BUILTIN_RDPID: -+ -+ op0 = gen_reg_rtx (word_mode); -+ -+ if (TARGET_64BIT) -+ { -+ insn = gen_rdpid_rex64 (op0); -+ op0 = convert_to_mode (SImode, op0, 1); -+ } -+ else -+ insn = gen_rdpid (op0); -+ -+ emit_insn (insn); -+ -+ if (target == 0 -+ || !register_operand (target, SImode)) -+ target = gen_reg_rtx (SImode); -+ -+ emit_move_insn (target, op0); -+ return target; -+ -+ case IX86_BUILTIN_RDPMC: -+ case IX86_BUILTIN_RDTSC: -+ case IX86_BUILTIN_RDTSCP: -+ case IX86_BUILTIN_XGETBV: -+ -+ op0 = gen_reg_rtx (DImode); -+ op1 = gen_reg_rtx (DImode); -+ -+ if (fcode == IX86_BUILTIN_RDPMC) -+ { -+ arg0 = CALL_EXPR_ARG (exp, 0); -+ op2 = expand_normal (arg0); -+ if (!register_operand (op2, SImode)) -+ op2 = copy_to_mode_reg (SImode, op2); -+ -+ insn = (TARGET_64BIT -+ ? gen_rdpmc_rex64 (op0, op1, op2) -+ : gen_rdpmc (op0, op2)); -+ emit_insn (insn); -+ } -+ else if (fcode == IX86_BUILTIN_XGETBV) -+ { -+ arg0 = CALL_EXPR_ARG (exp, 0); -+ op2 = expand_normal (arg0); -+ if (!register_operand (op2, SImode)) -+ op2 = copy_to_mode_reg (SImode, op2); -+ -+ insn = (TARGET_64BIT -+ ? gen_xgetbv_rex64 (op0, op1, op2) -+ : gen_xgetbv (op0, op2)); -+ emit_insn (insn); -+ } -+ else if (fcode == IX86_BUILTIN_RDTSC) -+ { -+ insn = (TARGET_64BIT -+ ? gen_rdtsc_rex64 (op0, op1) -+ : gen_rdtsc (op0)); -+ emit_insn (insn); -+ } -+ else -+ { -+ op2 = gen_reg_rtx (SImode); -+ -+ insn = (TARGET_64BIT -+ ? gen_rdtscp_rex64 (op0, op1, op2) -+ : gen_rdtscp (op0, op2)); -+ emit_insn (insn); -+ -+ arg0 = CALL_EXPR_ARG (exp, 0); -+ op4 = expand_normal (arg0); -+ if (!address_operand (op4, VOIDmode)) -+ { -+ op4 = convert_memory_address (Pmode, op4); -+ op4 = copy_addr_to_reg (op4); -+ } -+ emit_move_insn (gen_rtx_MEM (SImode, op4), op2); -+ } -+ -+ if (target == 0 -+ || !register_operand (target, DImode)) -+ target = gen_reg_rtx (DImode); -+ -+ if (TARGET_64BIT) -+ { -+ op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32), -+ op1, 1, OPTAB_DIRECT); -+ op0 = expand_simple_binop (DImode, IOR, op0, op1, -+ op0, 1, OPTAB_DIRECT); -+ } -+ -+ emit_move_insn (target, op0); -+ return target; -+ -+ case IX86_BUILTIN_MOVDIR64B: -+ -+ arg0 = CALL_EXPR_ARG (exp, 0); -+ arg1 = CALL_EXPR_ARG (exp, 1); -+ op0 = expand_normal (arg0); -+ op1 = expand_normal (arg1); -+ -+ op0 = ix86_zero_extend_to_Pmode (op0); -+ if (!address_operand (op1, VOIDmode)) -+ { -+ op1 = convert_memory_address (Pmode, op1); -+ op1 = copy_addr_to_reg (op1); -+ } -+ op1 = gen_rtx_MEM (XImode, op1); -+ -+ insn = (TARGET_64BIT -+ ? gen_movdir64b_di (op0, op1) -+ : gen_movdir64b_si (op0, op1)); -+ emit_insn (insn); -+ return 0; -+ -+ case IX86_BUILTIN_FXSAVE: -+ case IX86_BUILTIN_FXRSTOR: -+ case IX86_BUILTIN_FXSAVE64: -+ case IX86_BUILTIN_FXRSTOR64: -+ case IX86_BUILTIN_FNSTENV: -+ case IX86_BUILTIN_FLDENV: -+ mode0 = BLKmode; -+ switch (fcode) -+ { -+ case IX86_BUILTIN_FXSAVE: -+ icode = CODE_FOR_fxsave; -+ break; -+ case IX86_BUILTIN_FXRSTOR: -+ icode = CODE_FOR_fxrstor; -+ break; -+ case IX86_BUILTIN_FXSAVE64: -+ icode = CODE_FOR_fxsave64; -+ break; -+ case IX86_BUILTIN_FXRSTOR64: -+ icode = CODE_FOR_fxrstor64; -+ break; -+ case IX86_BUILTIN_FNSTENV: -+ icode = CODE_FOR_fnstenv; -+ break; -+ case IX86_BUILTIN_FLDENV: -+ icode = CODE_FOR_fldenv; -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ -+ arg0 = CALL_EXPR_ARG (exp, 0); -+ op0 = expand_normal (arg0); -+ -+ if (!address_operand (op0, VOIDmode)) -+ { -+ op0 = convert_memory_address (Pmode, op0); -+ op0 = copy_addr_to_reg (op0); -+ } -+ op0 = gen_rtx_MEM (mode0, op0); -+ -+ pat = GEN_FCN (icode) (op0); -+ if (pat) -+ emit_insn (pat); -+ return 0; -+ -+ case IX86_BUILTIN_XSETBV: -+ arg0 = CALL_EXPR_ARG (exp, 0); -+ arg1 = CALL_EXPR_ARG (exp, 1); -+ op0 = expand_normal (arg0); -+ op1 = expand_normal (arg1); -+ -+ if (!REG_P (op0)) -+ op0 = copy_to_mode_reg (SImode, op0); -+ -+ op1 = force_reg (DImode, op1); -+ -+ if (TARGET_64BIT) -+ { -+ op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32), -+ NULL, 1, OPTAB_DIRECT); -+ -+ icode = CODE_FOR_xsetbv_rex64; -+ -+ op2 = gen_lowpart (SImode, op2); -+ op1 = gen_lowpart (SImode, op1); -+ pat = GEN_FCN (icode) (op0, op1, op2); -+ } -+ else -+ { -+ icode = CODE_FOR_xsetbv; -+ -+ pat = GEN_FCN (icode) (op0, op1); -+ } -+ if (pat) -+ emit_insn (pat); -+ return 0; -+ -+ case IX86_BUILTIN_XSAVE: -+ case IX86_BUILTIN_XRSTOR: -+ case IX86_BUILTIN_XSAVE64: -+ case IX86_BUILTIN_XRSTOR64: -+ case IX86_BUILTIN_XSAVEOPT: -+ case IX86_BUILTIN_XSAVEOPT64: -+ case IX86_BUILTIN_XSAVES: -+ case IX86_BUILTIN_XRSTORS: -+ case IX86_BUILTIN_XSAVES64: -+ case IX86_BUILTIN_XRSTORS64: -+ case IX86_BUILTIN_XSAVEC: -+ case IX86_BUILTIN_XSAVEC64: -+ arg0 = CALL_EXPR_ARG (exp, 0); -+ arg1 = CALL_EXPR_ARG (exp, 1); -+ op0 = expand_normal (arg0); -+ op1 = expand_normal (arg1); -+ -+ if (!address_operand (op0, VOIDmode)) -+ { -+ op0 = convert_memory_address (Pmode, op0); -+ op0 = copy_addr_to_reg (op0); -+ } -+ op0 = gen_rtx_MEM (BLKmode, op0); -+ -+ op1 = force_reg (DImode, op1); -+ -+ if (TARGET_64BIT) -+ { -+ op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32), -+ NULL, 1, OPTAB_DIRECT); -+ switch (fcode) -+ { -+ case IX86_BUILTIN_XSAVE: -+ icode = CODE_FOR_xsave_rex64; -+ break; -+ case IX86_BUILTIN_XRSTOR: -+ icode = CODE_FOR_xrstor_rex64; -+ break; -+ case IX86_BUILTIN_XSAVE64: -+ icode = CODE_FOR_xsave64; -+ break; -+ case IX86_BUILTIN_XRSTOR64: -+ icode = CODE_FOR_xrstor64; -+ break; -+ case IX86_BUILTIN_XSAVEOPT: -+ icode = CODE_FOR_xsaveopt_rex64; -+ break; -+ case IX86_BUILTIN_XSAVEOPT64: -+ icode = CODE_FOR_xsaveopt64; -+ break; -+ case IX86_BUILTIN_XSAVES: -+ icode = CODE_FOR_xsaves_rex64; -+ break; -+ case IX86_BUILTIN_XRSTORS: -+ icode = CODE_FOR_xrstors_rex64; -+ break; -+ case IX86_BUILTIN_XSAVES64: -+ icode = CODE_FOR_xsaves64; -+ break; -+ case IX86_BUILTIN_XRSTORS64: -+ icode = CODE_FOR_xrstors64; -+ break; -+ case IX86_BUILTIN_XSAVEC: -+ icode = CODE_FOR_xsavec_rex64; -+ break; -+ case IX86_BUILTIN_XSAVEC64: -+ icode = CODE_FOR_xsavec64; -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ -+ op2 = gen_lowpart (SImode, op2); -+ op1 = gen_lowpart (SImode, op1); -+ pat = GEN_FCN (icode) (op0, op1, op2); -+ } -+ else -+ { -+ switch (fcode) -+ { -+ case IX86_BUILTIN_XSAVE: -+ icode = CODE_FOR_xsave; -+ break; -+ case IX86_BUILTIN_XRSTOR: -+ icode = CODE_FOR_xrstor; -+ break; -+ case IX86_BUILTIN_XSAVEOPT: -+ icode = CODE_FOR_xsaveopt; -+ break; -+ case IX86_BUILTIN_XSAVES: -+ icode = CODE_FOR_xsaves; -+ break; -+ case IX86_BUILTIN_XRSTORS: -+ icode = CODE_FOR_xrstors; -+ break; -+ case IX86_BUILTIN_XSAVEC: -+ icode = CODE_FOR_xsavec; -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ pat = GEN_FCN (icode) (op0, op1); -+ } -+ -+ if (pat) -+ emit_insn (pat); -+ return 0; -+ -+ case IX86_BUILTIN_LLWPCB: -+ arg0 = CALL_EXPR_ARG (exp, 0); -+ op0 = expand_normal (arg0); -+ icode = CODE_FOR_lwp_llwpcb; -+ if (!insn_data[icode].operand[0].predicate (op0, Pmode)) -+ op0 = ix86_zero_extend_to_Pmode (op0); -+ emit_insn (gen_lwp_llwpcb (op0)); -+ return 0; -+ -+ case IX86_BUILTIN_SLWPCB: -+ icode = CODE_FOR_lwp_slwpcb; -+ if (!target -+ || !insn_data[icode].operand[0].predicate (target, Pmode)) -+ target = gen_reg_rtx (Pmode); -+ emit_insn (gen_lwp_slwpcb (target)); -+ return target; -+ -+ case IX86_BUILTIN_BEXTRI32: -+ case IX86_BUILTIN_BEXTRI64: -+ arg0 = CALL_EXPR_ARG (exp, 0); -+ arg1 = CALL_EXPR_ARG (exp, 1); -+ op0 = expand_normal (arg0); -+ op1 = expand_normal (arg1); -+ icode = (fcode == IX86_BUILTIN_BEXTRI32 -+ ? CODE_FOR_tbm_bextri_si -+ : CODE_FOR_tbm_bextri_di); -+ if (!CONST_INT_P (op1)) -+ { -+ error ("last argument must be an immediate"); -+ return const0_rtx; -+ } -+ else -+ { -+ unsigned char length = (INTVAL (op1) >> 8) & 0xFF; -+ unsigned char lsb_index = INTVAL (op1) & 0xFF; -+ op1 = GEN_INT (length); -+ op2 = GEN_INT (lsb_index); -+ -+ mode1 = insn_data[icode].operand[1].mode; -+ if (!insn_data[icode].operand[1].predicate (op0, mode1)) -+ op0 = copy_to_mode_reg (mode1, op0); -+ -+ mode0 = insn_data[icode].operand[0].mode; -+ if (target == 0 -+ || !register_operand (target, mode0)) -+ target = gen_reg_rtx (mode0); -+ -+ pat = GEN_FCN (icode) (target, op0, op1, op2); -+ if (pat) -+ emit_insn (pat); -+ return target; -+ } -+ -+ case IX86_BUILTIN_RDRAND16_STEP: -+ icode = CODE_FOR_rdrandhi_1; -+ mode0 = HImode; -+ goto rdrand_step; -+ -+ case IX86_BUILTIN_RDRAND32_STEP: -+ icode = CODE_FOR_rdrandsi_1; -+ mode0 = SImode; -+ goto rdrand_step; -+ -+ case IX86_BUILTIN_RDRAND64_STEP: -+ icode = CODE_FOR_rdranddi_1; -+ mode0 = DImode; -+ -+rdrand_step: -+ arg0 = CALL_EXPR_ARG (exp, 0); -+ op1 = expand_normal (arg0); -+ if (!address_operand (op1, VOIDmode)) -+ { -+ op1 = convert_memory_address (Pmode, op1); -+ op1 = copy_addr_to_reg (op1); -+ } -+ -+ op0 = gen_reg_rtx (mode0); -+ emit_insn (GEN_FCN (icode) (op0)); -+ -+ emit_move_insn (gen_rtx_MEM (mode0, op1), op0); -+ -+ op1 = gen_reg_rtx (SImode); -+ emit_move_insn (op1, CONST1_RTX (SImode)); -+ -+ /* Emit SImode conditional move. */ -+ if (mode0 == HImode) -+ { -+ if (TARGET_ZERO_EXTEND_WITH_AND -+ && optimize_function_for_speed_p (cfun)) -+ { -+ op2 = force_reg (SImode, const0_rtx); -+ -+ emit_insn (gen_movstricthi -+ (gen_lowpart (HImode, op2), op0)); -+ } -+ else -+ { -+ op2 = gen_reg_rtx (SImode); -+ -+ emit_insn (gen_zero_extendhisi2 (op2, op0)); -+ } -+ } -+ else if (mode0 == SImode) -+ op2 = op0; -+ else -+ op2 = gen_rtx_SUBREG (SImode, op0, 0); -+ -+ if (target == 0 -+ || !register_operand (target, SImode)) -+ target = gen_reg_rtx (SImode); -+ -+ pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG), -+ const0_rtx); -+ emit_insn (gen_rtx_SET (target, -+ gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1))); -+ return target; -+ -+ case IX86_BUILTIN_RDSEED16_STEP: -+ icode = CODE_FOR_rdseedhi_1; -+ mode0 = HImode; -+ goto rdseed_step; -+ -+ case IX86_BUILTIN_RDSEED32_STEP: -+ icode = CODE_FOR_rdseedsi_1; -+ mode0 = SImode; -+ goto rdseed_step; -+ -+ case IX86_BUILTIN_RDSEED64_STEP: -+ icode = CODE_FOR_rdseeddi_1; -+ mode0 = DImode; -+ -+rdseed_step: -+ arg0 = CALL_EXPR_ARG (exp, 0); -+ op1 = expand_normal (arg0); -+ if (!address_operand (op1, VOIDmode)) -+ { -+ op1 = convert_memory_address (Pmode, op1); -+ op1 = copy_addr_to_reg (op1); -+ } -+ -+ op0 = gen_reg_rtx (mode0); -+ emit_insn (GEN_FCN (icode) (op0)); -+ -+ emit_move_insn (gen_rtx_MEM (mode0, op1), op0); -+ -+ op2 = gen_reg_rtx (QImode); -+ -+ pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG), -+ const0_rtx); -+ emit_insn (gen_rtx_SET (op2, pat)); -+ -+ if (target == 0 -+ || !register_operand (target, SImode)) -+ target = gen_reg_rtx (SImode); -+ -+ emit_insn (gen_zero_extendqisi2 (target, op2)); -+ return target; -+ -+ case IX86_BUILTIN_SBB32: -+ icode = CODE_FOR_subborrowsi; -+ icode2 = CODE_FOR_subborrowsi_0; -+ mode0 = SImode; -+ mode1 = DImode; -+ mode2 = CCmode; -+ goto handlecarry; -+ -+ case IX86_BUILTIN_SBB64: -+ icode = CODE_FOR_subborrowdi; -+ icode2 = CODE_FOR_subborrowdi_0; -+ mode0 = DImode; -+ mode1 = TImode; -+ mode2 = CCmode; -+ goto handlecarry; -+ -+ case IX86_BUILTIN_ADDCARRYX32: -+ icode = CODE_FOR_addcarrysi; -+ icode2 = CODE_FOR_addcarrysi_0; -+ mode0 = SImode; -+ mode1 = DImode; -+ mode2 = CCCmode; -+ goto handlecarry; -+ -+ case IX86_BUILTIN_ADDCARRYX64: -+ icode = CODE_FOR_addcarrydi; -+ icode2 = CODE_FOR_addcarrydi_0; -+ mode0 = DImode; -+ mode1 = TImode; -+ mode2 = CCCmode; -+ -+ handlecarry: -+ arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */ -+ arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */ -+ arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */ -+ arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */ -+ -+ op1 = expand_normal (arg0); -+ if (!integer_zerop (arg0)) -+ op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1)); -+ -+ op2 = expand_normal (arg1); -+ if (!register_operand (op2, mode0)) -+ op2 = copy_to_mode_reg (mode0, op2); -+ -+ op3 = expand_normal (arg2); -+ if (!register_operand (op3, mode0)) -+ op3 = copy_to_mode_reg (mode0, op3); -+ -+ op4 = expand_normal (arg3); -+ if (!address_operand (op4, VOIDmode)) -+ { -+ op4 = convert_memory_address (Pmode, op4); -+ op4 = copy_addr_to_reg (op4); -+ } -+ -+ op0 = gen_reg_rtx (mode0); -+ if (integer_zerop (arg0)) -+ { -+ /* If arg0 is 0, optimize right away into add or sub -+ instruction that sets CCCmode flags. */ -+ op1 = gen_rtx_REG (mode2, FLAGS_REG); -+ emit_insn (GEN_FCN (icode2) (op0, op2, op3)); -+ } -+ else -+ { -+ /* Generate CF from input operand. */ -+ emit_insn (gen_addqi3_cconly_overflow (op1, constm1_rtx)); -+ -+ /* Generate instruction that consumes CF. */ -+ op1 = gen_rtx_REG (CCCmode, FLAGS_REG); -+ pat = gen_rtx_LTU (mode1, op1, const0_rtx); -+ pat2 = gen_rtx_LTU (mode0, op1, const0_rtx); -+ emit_insn (GEN_FCN (icode) (op0, op2, op3, op1, pat, pat2)); -+ } -+ -+ /* Return current CF value. */ -+ if (target == 0) -+ target = gen_reg_rtx (QImode); -+ -+ pat = gen_rtx_LTU (QImode, op1, const0_rtx); -+ emit_insn (gen_rtx_SET (target, pat)); -+ -+ /* Store the result. */ -+ emit_move_insn (gen_rtx_MEM (mode0, op4), op0); -+ -+ return target; -+ -+ case IX86_BUILTIN_READ_FLAGS: -+ emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG))); -+ -+ if (optimize -+ || target == NULL_RTX -+ || !nonimmediate_operand (target, word_mode) -+ || GET_MODE (target) != word_mode) -+ target = gen_reg_rtx (word_mode); -+ -+ emit_insn (gen_pop (target)); -+ return target; -+ -+ case IX86_BUILTIN_WRITE_FLAGS: -+ -+ arg0 = CALL_EXPR_ARG (exp, 0); -+ op0 = expand_normal (arg0); -+ if (!general_no_elim_operand (op0, word_mode)) -+ op0 = copy_to_mode_reg (word_mode, op0); -+ -+ emit_insn (gen_push (op0)); -+ emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG))); -+ return 0; -+ -+ case IX86_BUILTIN_KTESTC8: -+ icode = CODE_FOR_ktestqi; -+ mode3 = CCCmode; -+ goto kortest; -+ -+ case IX86_BUILTIN_KTESTZ8: -+ icode = CODE_FOR_ktestqi; -+ mode3 = CCZmode; -+ goto kortest; -+ -+ case IX86_BUILTIN_KTESTC16: -+ icode = CODE_FOR_ktesthi; -+ mode3 = CCCmode; -+ goto kortest; -+ -+ case IX86_BUILTIN_KTESTZ16: -+ icode = CODE_FOR_ktesthi; -+ mode3 = CCZmode; -+ goto kortest; -+ -+ case IX86_BUILTIN_KTESTC32: -+ icode = CODE_FOR_ktestsi; -+ mode3 = CCCmode; -+ goto kortest; -+ -+ case IX86_BUILTIN_KTESTZ32: -+ icode = CODE_FOR_ktestsi; -+ mode3 = CCZmode; -+ goto kortest; -+ -+ case IX86_BUILTIN_KTESTC64: -+ icode = CODE_FOR_ktestdi; -+ mode3 = CCCmode; -+ goto kortest; -+ -+ case IX86_BUILTIN_KTESTZ64: -+ icode = CODE_FOR_ktestdi; -+ mode3 = CCZmode; -+ goto kortest; -+ -+ case IX86_BUILTIN_KORTESTC8: -+ icode = CODE_FOR_kortestqi; -+ mode3 = CCCmode; -+ goto kortest; -+ -+ case IX86_BUILTIN_KORTESTZ8: -+ icode = CODE_FOR_kortestqi; -+ mode3 = CCZmode; -+ goto kortest; -+ -+ case IX86_BUILTIN_KORTESTC16: -+ icode = CODE_FOR_kortesthi; -+ mode3 = CCCmode; -+ goto kortest; -+ -+ case IX86_BUILTIN_KORTESTZ16: -+ icode = CODE_FOR_kortesthi; -+ mode3 = CCZmode; -+ goto kortest; -+ -+ case IX86_BUILTIN_KORTESTC32: -+ icode = CODE_FOR_kortestsi; -+ mode3 = CCCmode; -+ goto kortest; -+ -+ case IX86_BUILTIN_KORTESTZ32: -+ icode = CODE_FOR_kortestsi; -+ mode3 = CCZmode; -+ goto kortest; -+ -+ case IX86_BUILTIN_KORTESTC64: -+ icode = CODE_FOR_kortestdi; -+ mode3 = CCCmode; -+ goto kortest; -+ -+ case IX86_BUILTIN_KORTESTZ64: -+ icode = CODE_FOR_kortestdi; -+ mode3 = CCZmode; -+ -+ kortest: -+ arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */ -+ arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */ -+ op0 = expand_normal (arg0); -+ op1 = expand_normal (arg1); -+ -+ mode0 = insn_data[icode].operand[0].mode; -+ mode1 = insn_data[icode].operand[1].mode; -+ -+ if (GET_MODE (op0) != VOIDmode) -+ op0 = force_reg (GET_MODE (op0), op0); -+ -+ op0 = gen_lowpart (mode0, op0); -+ -+ if (!insn_data[icode].operand[0].predicate (op0, mode0)) -+ op0 = copy_to_mode_reg (mode0, op0); -+ -+ if (GET_MODE (op1) != VOIDmode) -+ op1 = force_reg (GET_MODE (op1), op1); -+ -+ op1 = gen_lowpart (mode1, op1); -+ -+ if (!insn_data[icode].operand[1].predicate (op1, mode1)) -+ op1 = copy_to_mode_reg (mode1, op1); -+ -+ target = gen_reg_rtx (QImode); -+ -+ /* Emit kortest. */ -+ emit_insn (GEN_FCN (icode) (op0, op1)); -+ /* And use setcc to return result from flags. */ -+ ix86_expand_setcc (target, EQ, -+ gen_rtx_REG (mode3, FLAGS_REG), const0_rtx); -+ return target; -+ -+ case IX86_BUILTIN_GATHERSIV2DF: -+ icode = CODE_FOR_avx2_gathersiv2df; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHERSIV4DF: -+ icode = CODE_FOR_avx2_gathersiv4df; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHERDIV2DF: -+ icode = CODE_FOR_avx2_gatherdiv2df; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHERDIV4DF: -+ icode = CODE_FOR_avx2_gatherdiv4df; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHERSIV4SF: -+ icode = CODE_FOR_avx2_gathersiv4sf; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHERSIV8SF: -+ icode = CODE_FOR_avx2_gathersiv8sf; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHERDIV4SF: -+ icode = CODE_FOR_avx2_gatherdiv4sf; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHERDIV8SF: -+ icode = CODE_FOR_avx2_gatherdiv8sf; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHERSIV2DI: -+ icode = CODE_FOR_avx2_gathersiv2di; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHERSIV4DI: -+ icode = CODE_FOR_avx2_gathersiv4di; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHERDIV2DI: -+ icode = CODE_FOR_avx2_gatherdiv2di; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHERDIV4DI: -+ icode = CODE_FOR_avx2_gatherdiv4di; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHERSIV4SI: -+ icode = CODE_FOR_avx2_gathersiv4si; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHERSIV8SI: -+ icode = CODE_FOR_avx2_gathersiv8si; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHERDIV4SI: -+ icode = CODE_FOR_avx2_gatherdiv4si; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHERDIV8SI: -+ icode = CODE_FOR_avx2_gatherdiv8si; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHERALTSIV4DF: -+ icode = CODE_FOR_avx2_gathersiv4df; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHERALTDIV8SF: -+ icode = CODE_FOR_avx2_gatherdiv8sf; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHERALTSIV4DI: -+ icode = CODE_FOR_avx2_gathersiv4di; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHERALTDIV8SI: -+ icode = CODE_FOR_avx2_gatherdiv8si; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHER3SIV16SF: -+ icode = CODE_FOR_avx512f_gathersiv16sf; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHER3SIV8DF: -+ icode = CODE_FOR_avx512f_gathersiv8df; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHER3DIV16SF: -+ icode = CODE_FOR_avx512f_gatherdiv16sf; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHER3DIV8DF: -+ icode = CODE_FOR_avx512f_gatherdiv8df; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHER3SIV16SI: -+ icode = CODE_FOR_avx512f_gathersiv16si; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHER3SIV8DI: -+ icode = CODE_FOR_avx512f_gathersiv8di; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHER3DIV16SI: -+ icode = CODE_FOR_avx512f_gatherdiv16si; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHER3DIV8DI: -+ icode = CODE_FOR_avx512f_gatherdiv8di; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHER3ALTSIV8DF: -+ icode = CODE_FOR_avx512f_gathersiv8df; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHER3ALTDIV16SF: -+ icode = CODE_FOR_avx512f_gatherdiv16sf; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHER3ALTSIV8DI: -+ icode = CODE_FOR_avx512f_gathersiv8di; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHER3ALTDIV16SI: -+ icode = CODE_FOR_avx512f_gatherdiv16si; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHER3SIV2DF: -+ icode = CODE_FOR_avx512vl_gathersiv2df; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHER3SIV4DF: -+ icode = CODE_FOR_avx512vl_gathersiv4df; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHER3DIV2DF: -+ icode = CODE_FOR_avx512vl_gatherdiv2df; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHER3DIV4DF: -+ icode = CODE_FOR_avx512vl_gatherdiv4df; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHER3SIV4SF: -+ icode = CODE_FOR_avx512vl_gathersiv4sf; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHER3SIV8SF: -+ icode = CODE_FOR_avx512vl_gathersiv8sf; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHER3DIV4SF: -+ icode = CODE_FOR_avx512vl_gatherdiv4sf; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHER3DIV8SF: -+ icode = CODE_FOR_avx512vl_gatherdiv8sf; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHER3SIV2DI: -+ icode = CODE_FOR_avx512vl_gathersiv2di; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHER3SIV4DI: -+ icode = CODE_FOR_avx512vl_gathersiv4di; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHER3DIV2DI: -+ icode = CODE_FOR_avx512vl_gatherdiv2di; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHER3DIV4DI: -+ icode = CODE_FOR_avx512vl_gatherdiv4di; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHER3SIV4SI: -+ icode = CODE_FOR_avx512vl_gathersiv4si; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHER3SIV8SI: -+ icode = CODE_FOR_avx512vl_gathersiv8si; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHER3DIV4SI: -+ icode = CODE_FOR_avx512vl_gatherdiv4si; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHER3DIV8SI: -+ icode = CODE_FOR_avx512vl_gatherdiv8si; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHER3ALTSIV4DF: -+ icode = CODE_FOR_avx512vl_gathersiv4df; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHER3ALTDIV8SF: -+ icode = CODE_FOR_avx512vl_gatherdiv8sf; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHER3ALTSIV4DI: -+ icode = CODE_FOR_avx512vl_gathersiv4di; -+ goto gather_gen; -+ case IX86_BUILTIN_GATHER3ALTDIV8SI: -+ icode = CODE_FOR_avx512vl_gatherdiv8si; -+ goto gather_gen; -+ case IX86_BUILTIN_SCATTERSIV16SF: -+ icode = CODE_FOR_avx512f_scattersiv16sf; -+ goto scatter_gen; -+ case IX86_BUILTIN_SCATTERSIV8DF: -+ icode = CODE_FOR_avx512f_scattersiv8df; -+ goto scatter_gen; -+ case IX86_BUILTIN_SCATTERDIV16SF: -+ icode = CODE_FOR_avx512f_scatterdiv16sf; -+ goto scatter_gen; -+ case IX86_BUILTIN_SCATTERDIV8DF: -+ icode = CODE_FOR_avx512f_scatterdiv8df; -+ goto scatter_gen; -+ case IX86_BUILTIN_SCATTERSIV16SI: -+ icode = CODE_FOR_avx512f_scattersiv16si; -+ goto scatter_gen; -+ case IX86_BUILTIN_SCATTERSIV8DI: -+ icode = CODE_FOR_avx512f_scattersiv8di; -+ goto scatter_gen; -+ case IX86_BUILTIN_SCATTERDIV16SI: -+ icode = CODE_FOR_avx512f_scatterdiv16si; -+ goto scatter_gen; -+ case IX86_BUILTIN_SCATTERDIV8DI: -+ icode = CODE_FOR_avx512f_scatterdiv8di; -+ goto scatter_gen; -+ case IX86_BUILTIN_SCATTERSIV8SF: -+ icode = CODE_FOR_avx512vl_scattersiv8sf; -+ goto scatter_gen; -+ case IX86_BUILTIN_SCATTERSIV4SF: -+ icode = CODE_FOR_avx512vl_scattersiv4sf; -+ goto scatter_gen; -+ case IX86_BUILTIN_SCATTERSIV4DF: -+ icode = CODE_FOR_avx512vl_scattersiv4df; -+ goto scatter_gen; -+ case IX86_BUILTIN_SCATTERSIV2DF: -+ icode = CODE_FOR_avx512vl_scattersiv2df; -+ goto scatter_gen; -+ case IX86_BUILTIN_SCATTERDIV8SF: -+ icode = CODE_FOR_avx512vl_scatterdiv8sf; -+ goto scatter_gen; -+ case IX86_BUILTIN_SCATTERDIV4SF: -+ icode = CODE_FOR_avx512vl_scatterdiv4sf; -+ goto scatter_gen; -+ case IX86_BUILTIN_SCATTERDIV4DF: -+ icode = CODE_FOR_avx512vl_scatterdiv4df; -+ goto scatter_gen; -+ case IX86_BUILTIN_SCATTERDIV2DF: -+ icode = CODE_FOR_avx512vl_scatterdiv2df; -+ goto scatter_gen; -+ case IX86_BUILTIN_SCATTERSIV8SI: -+ icode = CODE_FOR_avx512vl_scattersiv8si; -+ goto scatter_gen; -+ case IX86_BUILTIN_SCATTERSIV4SI: -+ icode = CODE_FOR_avx512vl_scattersiv4si; -+ goto scatter_gen; -+ case IX86_BUILTIN_SCATTERSIV4DI: -+ icode = CODE_FOR_avx512vl_scattersiv4di; -+ goto scatter_gen; -+ case IX86_BUILTIN_SCATTERSIV2DI: -+ icode = CODE_FOR_avx512vl_scattersiv2di; -+ goto scatter_gen; -+ case IX86_BUILTIN_SCATTERDIV8SI: -+ icode = CODE_FOR_avx512vl_scatterdiv8si; -+ goto scatter_gen; -+ case IX86_BUILTIN_SCATTERDIV4SI: -+ icode = CODE_FOR_avx512vl_scatterdiv4si; -+ goto scatter_gen; -+ case IX86_BUILTIN_SCATTERDIV4DI: -+ icode = CODE_FOR_avx512vl_scatterdiv4di; -+ goto scatter_gen; -+ case IX86_BUILTIN_SCATTERDIV2DI: -+ icode = CODE_FOR_avx512vl_scatterdiv2di; -+ goto scatter_gen; -+ case IX86_BUILTIN_GATHERPFDPD: -+ icode = CODE_FOR_avx512pf_gatherpfv8sidf; -+ goto vec_prefetch_gen; -+ case IX86_BUILTIN_SCATTERALTSIV8DF: -+ icode = CODE_FOR_avx512f_scattersiv8df; -+ goto scatter_gen; -+ case IX86_BUILTIN_SCATTERALTDIV16SF: -+ icode = CODE_FOR_avx512f_scatterdiv16sf; -+ goto scatter_gen; -+ case IX86_BUILTIN_SCATTERALTSIV8DI: -+ icode = CODE_FOR_avx512f_scattersiv8di; -+ goto scatter_gen; -+ case IX86_BUILTIN_SCATTERALTDIV16SI: -+ icode = CODE_FOR_avx512f_scatterdiv16si; -+ goto scatter_gen; -+ case IX86_BUILTIN_SCATTERALTSIV4DF: -+ icode = CODE_FOR_avx512vl_scattersiv4df; -+ goto scatter_gen; -+ case IX86_BUILTIN_SCATTERALTDIV8SF: -+ icode = CODE_FOR_avx512vl_scatterdiv8sf; -+ goto scatter_gen; -+ case IX86_BUILTIN_SCATTERALTSIV4DI: -+ icode = CODE_FOR_avx512vl_scattersiv4di; -+ goto scatter_gen; -+ case IX86_BUILTIN_SCATTERALTDIV8SI: -+ icode = CODE_FOR_avx512vl_scatterdiv8si; -+ goto scatter_gen; -+ case IX86_BUILTIN_SCATTERALTSIV2DF: -+ icode = CODE_FOR_avx512vl_scattersiv2df; -+ goto scatter_gen; -+ case IX86_BUILTIN_SCATTERALTDIV4SF: -+ icode = CODE_FOR_avx512vl_scatterdiv4sf; -+ goto scatter_gen; -+ case IX86_BUILTIN_SCATTERALTSIV2DI: -+ icode = CODE_FOR_avx512vl_scattersiv2di; -+ goto scatter_gen; -+ case IX86_BUILTIN_SCATTERALTDIV4SI: -+ icode = CODE_FOR_avx512vl_scatterdiv4si; -+ goto scatter_gen; -+ case IX86_BUILTIN_GATHERPFDPS: -+ icode = CODE_FOR_avx512pf_gatherpfv16sisf; -+ goto vec_prefetch_gen; -+ case IX86_BUILTIN_GATHERPFQPD: -+ icode = CODE_FOR_avx512pf_gatherpfv8didf; -+ goto vec_prefetch_gen; -+ case IX86_BUILTIN_GATHERPFQPS: -+ icode = CODE_FOR_avx512pf_gatherpfv8disf; -+ goto vec_prefetch_gen; -+ case IX86_BUILTIN_SCATTERPFDPD: -+ icode = CODE_FOR_avx512pf_scatterpfv8sidf; -+ goto vec_prefetch_gen; -+ case IX86_BUILTIN_SCATTERPFDPS: -+ icode = CODE_FOR_avx512pf_scatterpfv16sisf; -+ goto vec_prefetch_gen; -+ case IX86_BUILTIN_SCATTERPFQPD: -+ icode = CODE_FOR_avx512pf_scatterpfv8didf; -+ goto vec_prefetch_gen; -+ case IX86_BUILTIN_SCATTERPFQPS: -+ icode = CODE_FOR_avx512pf_scatterpfv8disf; -+ goto vec_prefetch_gen; -+ -+ gather_gen: -+ rtx half; -+ rtx (*gen) (rtx, rtx); -+ -+ arg0 = CALL_EXPR_ARG (exp, 0); -+ arg1 = CALL_EXPR_ARG (exp, 1); -+ arg2 = CALL_EXPR_ARG (exp, 2); -+ arg3 = CALL_EXPR_ARG (exp, 3); -+ arg4 = CALL_EXPR_ARG (exp, 4); -+ op0 = expand_normal (arg0); -+ op1 = expand_normal (arg1); -+ op2 = expand_normal (arg2); -+ op3 = expand_normal (arg3); -+ op4 = expand_normal (arg4); -+ /* Note the arg order is different from the operand order. */ -+ mode0 = insn_data[icode].operand[1].mode; -+ mode2 = insn_data[icode].operand[3].mode; -+ mode3 = insn_data[icode].operand[4].mode; -+ mode4 = insn_data[icode].operand[5].mode; -+ -+ if (target == NULL_RTX -+ || GET_MODE (target) != insn_data[icode].operand[0].mode -+ || !insn_data[icode].operand[0].predicate (target, -+ GET_MODE (target))) -+ subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode); -+ else -+ subtarget = target; -+ -+ switch (fcode) -+ { -+ case IX86_BUILTIN_GATHER3ALTSIV8DF: -+ case IX86_BUILTIN_GATHER3ALTSIV8DI: -+ half = gen_reg_rtx (V8SImode); -+ if (!nonimmediate_operand (op2, V16SImode)) -+ op2 = copy_to_mode_reg (V16SImode, op2); -+ emit_insn (gen_vec_extract_lo_v16si (half, op2)); -+ op2 = half; -+ break; -+ case IX86_BUILTIN_GATHER3ALTSIV4DF: -+ case IX86_BUILTIN_GATHER3ALTSIV4DI: -+ case IX86_BUILTIN_GATHERALTSIV4DF: -+ case IX86_BUILTIN_GATHERALTSIV4DI: -+ half = gen_reg_rtx (V4SImode); -+ if (!nonimmediate_operand (op2, V8SImode)) -+ op2 = copy_to_mode_reg (V8SImode, op2); -+ emit_insn (gen_vec_extract_lo_v8si (half, op2)); -+ op2 = half; -+ break; -+ case IX86_BUILTIN_GATHER3ALTDIV16SF: -+ case IX86_BUILTIN_GATHER3ALTDIV16SI: -+ half = gen_reg_rtx (mode0); -+ if (mode0 == V8SFmode) -+ gen = gen_vec_extract_lo_v16sf; -+ else -+ gen = gen_vec_extract_lo_v16si; -+ if (!nonimmediate_operand (op0, GET_MODE (op0))) -+ op0 = copy_to_mode_reg (GET_MODE (op0), op0); -+ emit_insn (gen (half, op0)); -+ op0 = half; -+ op3 = lowpart_subreg (QImode, op3, HImode); -+ break; -+ case IX86_BUILTIN_GATHER3ALTDIV8SF: -+ case IX86_BUILTIN_GATHER3ALTDIV8SI: -+ case IX86_BUILTIN_GATHERALTDIV8SF: -+ case IX86_BUILTIN_GATHERALTDIV8SI: -+ half = gen_reg_rtx (mode0); -+ if (mode0 == V4SFmode) -+ gen = gen_vec_extract_lo_v8sf; -+ else -+ gen = gen_vec_extract_lo_v8si; -+ if (!nonimmediate_operand (op0, GET_MODE (op0))) -+ op0 = copy_to_mode_reg (GET_MODE (op0), op0); -+ emit_insn (gen (half, op0)); -+ op0 = half; -+ if (VECTOR_MODE_P (GET_MODE (op3))) -+ { -+ half = gen_reg_rtx (mode0); -+ if (!nonimmediate_operand (op3, GET_MODE (op3))) -+ op3 = copy_to_mode_reg (GET_MODE (op3), op3); -+ emit_insn (gen (half, op3)); -+ op3 = half; -+ } -+ break; -+ default: -+ break; -+ } -+ -+ /* Force memory operand only with base register here. But we -+ don't want to do it on memory operand for other builtin -+ functions. */ -+ op1 = ix86_zero_extend_to_Pmode (op1); -+ -+ if (!insn_data[icode].operand[1].predicate (op0, mode0)) -+ op0 = copy_to_mode_reg (mode0, op0); -+ if (!insn_data[icode].operand[2].predicate (op1, Pmode)) -+ op1 = copy_to_mode_reg (Pmode, op1); -+ if (!insn_data[icode].operand[3].predicate (op2, mode2)) -+ op2 = copy_to_mode_reg (mode2, op2); -+ -+ op3 = fixup_modeless_constant (op3, mode3); -+ -+ if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode) -+ { -+ if (!insn_data[icode].operand[4].predicate (op3, mode3)) -+ op3 = copy_to_mode_reg (mode3, op3); -+ } -+ else -+ { -+ op3 = copy_to_reg (op3); -+ op3 = lowpart_subreg (mode3, op3, GET_MODE (op3)); -+ } -+ if (!insn_data[icode].operand[5].predicate (op4, mode4)) -+ { -+ error ("the last argument must be scale 1, 2, 4, 8"); -+ return const0_rtx; -+ } -+ -+ /* Optimize. If mask is known to have all high bits set, -+ replace op0 with pc_rtx to signal that the instruction -+ overwrites the whole destination and doesn't use its -+ previous contents. */ -+ if (optimize) -+ { -+ if (TREE_CODE (arg3) == INTEGER_CST) -+ { -+ if (integer_all_onesp (arg3)) -+ op0 = pc_rtx; -+ } -+ else if (TREE_CODE (arg3) == VECTOR_CST) -+ { -+ unsigned int negative = 0; -+ for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i) -+ { -+ tree cst = VECTOR_CST_ELT (arg3, i); -+ if (TREE_CODE (cst) == INTEGER_CST -+ && tree_int_cst_sign_bit (cst)) -+ negative++; -+ else if (TREE_CODE (cst) == REAL_CST -+ && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst))) -+ negative++; -+ } -+ if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3))) -+ op0 = pc_rtx; -+ } -+ else if (TREE_CODE (arg3) == SSA_NAME -+ && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE) -+ { -+ /* Recognize also when mask is like: -+ __v2df src = _mm_setzero_pd (); -+ __v2df mask = _mm_cmpeq_pd (src, src); -+ or -+ __v8sf src = _mm256_setzero_ps (); -+ __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ); -+ as that is a cheaper way to load all ones into -+ a register than having to load a constant from -+ memory. */ -+ gimple *def_stmt = SSA_NAME_DEF_STMT (arg3); -+ if (is_gimple_call (def_stmt)) -+ { -+ tree fndecl = gimple_call_fndecl (def_stmt); -+ if (fndecl -+ && fndecl_built_in_p (fndecl, BUILT_IN_MD)) -+ switch (DECL_MD_FUNCTION_CODE (fndecl)) -+ { -+ case IX86_BUILTIN_CMPPD: -+ case IX86_BUILTIN_CMPPS: -+ case IX86_BUILTIN_CMPPD256: -+ case IX86_BUILTIN_CMPPS256: -+ if (!integer_zerop (gimple_call_arg (def_stmt, 2))) -+ break; -+ /* FALLTHRU */ -+ case IX86_BUILTIN_CMPEQPD: -+ case IX86_BUILTIN_CMPEQPS: -+ if (initializer_zerop (gimple_call_arg (def_stmt, 0)) -+ && initializer_zerop (gimple_call_arg (def_stmt, -+ 1))) -+ op0 = pc_rtx; -+ break; -+ default: -+ break; -+ } -+ } -+ } -+ } -+ -+ pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4); -+ if (! pat) -+ return const0_rtx; -+ emit_insn (pat); -+ -+ switch (fcode) -+ { -+ case IX86_BUILTIN_GATHER3DIV16SF: -+ if (target == NULL_RTX) -+ target = gen_reg_rtx (V8SFmode); -+ emit_insn (gen_vec_extract_lo_v16sf (target, subtarget)); -+ break; -+ case IX86_BUILTIN_GATHER3DIV16SI: -+ if (target == NULL_RTX) -+ target = gen_reg_rtx (V8SImode); -+ emit_insn (gen_vec_extract_lo_v16si (target, subtarget)); -+ break; -+ case IX86_BUILTIN_GATHER3DIV8SF: -+ case IX86_BUILTIN_GATHERDIV8SF: -+ if (target == NULL_RTX) -+ target = gen_reg_rtx (V4SFmode); -+ emit_insn (gen_vec_extract_lo_v8sf (target, subtarget)); -+ break; -+ case IX86_BUILTIN_GATHER3DIV8SI: -+ case IX86_BUILTIN_GATHERDIV8SI: -+ if (target == NULL_RTX) -+ target = gen_reg_rtx (V4SImode); -+ emit_insn (gen_vec_extract_lo_v8si (target, subtarget)); -+ break; -+ default: -+ target = subtarget; -+ break; -+ } -+ return target; -+ -+ scatter_gen: -+ arg0 = CALL_EXPR_ARG (exp, 0); -+ arg1 = CALL_EXPR_ARG (exp, 1); -+ arg2 = CALL_EXPR_ARG (exp, 2); -+ arg3 = CALL_EXPR_ARG (exp, 3); -+ arg4 = CALL_EXPR_ARG (exp, 4); -+ op0 = expand_normal (arg0); -+ op1 = expand_normal (arg1); -+ op2 = expand_normal (arg2); -+ op3 = expand_normal (arg3); -+ op4 = expand_normal (arg4); -+ mode1 = insn_data[icode].operand[1].mode; -+ mode2 = insn_data[icode].operand[2].mode; -+ mode3 = insn_data[icode].operand[3].mode; -+ mode4 = insn_data[icode].operand[4].mode; -+ -+ /* Scatter instruction stores operand op3 to memory with -+ indices from op2 and scale from op4 under writemask op1. -+ If index operand op2 has more elements then source operand -+ op3 one need to use only its low half. And vice versa. */ -+ switch (fcode) -+ { -+ case IX86_BUILTIN_SCATTERALTSIV8DF: -+ case IX86_BUILTIN_SCATTERALTSIV8DI: -+ half = gen_reg_rtx (V8SImode); -+ if (!nonimmediate_operand (op2, V16SImode)) -+ op2 = copy_to_mode_reg (V16SImode, op2); -+ emit_insn (gen_vec_extract_lo_v16si (half, op2)); -+ op2 = half; -+ break; -+ case IX86_BUILTIN_SCATTERALTDIV16SF: -+ case IX86_BUILTIN_SCATTERALTDIV16SI: -+ half = gen_reg_rtx (mode3); -+ if (mode3 == V8SFmode) -+ gen = gen_vec_extract_lo_v16sf; -+ else -+ gen = gen_vec_extract_lo_v16si; -+ if (!nonimmediate_operand (op3, GET_MODE (op3))) -+ op3 = copy_to_mode_reg (GET_MODE (op3), op3); -+ emit_insn (gen (half, op3)); -+ op3 = half; -+ break; -+ case IX86_BUILTIN_SCATTERALTSIV4DF: -+ case IX86_BUILTIN_SCATTERALTSIV4DI: -+ half = gen_reg_rtx (V4SImode); -+ if (!nonimmediate_operand (op2, V8SImode)) -+ op2 = copy_to_mode_reg (V8SImode, op2); -+ emit_insn (gen_vec_extract_lo_v8si (half, op2)); -+ op2 = half; -+ break; -+ case IX86_BUILTIN_SCATTERALTDIV8SF: -+ case IX86_BUILTIN_SCATTERALTDIV8SI: -+ half = gen_reg_rtx (mode3); -+ if (mode3 == V4SFmode) -+ gen = gen_vec_extract_lo_v8sf; -+ else -+ gen = gen_vec_extract_lo_v8si; -+ if (!nonimmediate_operand (op3, GET_MODE (op3))) -+ op3 = copy_to_mode_reg (GET_MODE (op3), op3); -+ emit_insn (gen (half, op3)); -+ op3 = half; -+ break; -+ case IX86_BUILTIN_SCATTERALTSIV2DF: -+ case IX86_BUILTIN_SCATTERALTSIV2DI: -+ if (!nonimmediate_operand (op2, V4SImode)) -+ op2 = copy_to_mode_reg (V4SImode, op2); -+ break; -+ case IX86_BUILTIN_SCATTERALTDIV4SF: -+ case IX86_BUILTIN_SCATTERALTDIV4SI: -+ if (!nonimmediate_operand (op3, GET_MODE (op3))) -+ op3 = copy_to_mode_reg (GET_MODE (op3), op3); -+ break; -+ default: -+ break; -+ } -+ -+ /* Force memory operand only with base register here. But we -+ don't want to do it on memory operand for other builtin -+ functions. */ -+ op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1)); -+ -+ if (!insn_data[icode].operand[0].predicate (op0, Pmode)) -+ op0 = copy_to_mode_reg (Pmode, op0); -+ -+ op1 = fixup_modeless_constant (op1, mode1); -+ -+ if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode) -+ { -+ if (!insn_data[icode].operand[1].predicate (op1, mode1)) -+ op1 = copy_to_mode_reg (mode1, op1); -+ } -+ else -+ { -+ op1 = copy_to_reg (op1); -+ op1 = lowpart_subreg (mode1, op1, GET_MODE (op1)); -+ } -+ -+ if (!insn_data[icode].operand[2].predicate (op2, mode2)) -+ op2 = copy_to_mode_reg (mode2, op2); -+ -+ if (!insn_data[icode].operand[3].predicate (op3, mode3)) -+ op3 = copy_to_mode_reg (mode3, op3); -+ -+ if (!insn_data[icode].operand[4].predicate (op4, mode4)) -+ { -+ error ("the last argument must be scale 1, 2, 4, 8"); -+ return const0_rtx; -+ } -+ -+ pat = GEN_FCN (icode) (op0, op1, op2, op3, op4); -+ if (! pat) -+ return const0_rtx; -+ -+ emit_insn (pat); -+ return 0; -+ -+ vec_prefetch_gen: -+ arg0 = CALL_EXPR_ARG (exp, 0); -+ arg1 = CALL_EXPR_ARG (exp, 1); -+ arg2 = CALL_EXPR_ARG (exp, 2); -+ arg3 = CALL_EXPR_ARG (exp, 3); -+ arg4 = CALL_EXPR_ARG (exp, 4); -+ op0 = expand_normal (arg0); -+ op1 = expand_normal (arg1); -+ op2 = expand_normal (arg2); -+ op3 = expand_normal (arg3); -+ op4 = expand_normal (arg4); -+ mode0 = insn_data[icode].operand[0].mode; -+ mode1 = insn_data[icode].operand[1].mode; -+ mode3 = insn_data[icode].operand[3].mode; -+ mode4 = insn_data[icode].operand[4].mode; -+ -+ op0 = fixup_modeless_constant (op0, mode0); -+ -+ if (GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode) -+ { -+ if (!insn_data[icode].operand[0].predicate (op0, mode0)) -+ op0 = copy_to_mode_reg (mode0, op0); -+ } -+ else -+ { -+ op0 = copy_to_reg (op0); -+ op0 = lowpart_subreg (mode0, op0, GET_MODE (op0)); -+ } -+ -+ if (!insn_data[icode].operand[1].predicate (op1, mode1)) -+ op1 = copy_to_mode_reg (mode1, op1); -+ -+ /* Force memory operand only with base register here. But we -+ don't want to do it on memory operand for other builtin -+ functions. */ -+ op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1)); -+ -+ if (!insn_data[icode].operand[2].predicate (op2, Pmode)) -+ op2 = copy_to_mode_reg (Pmode, op2); -+ -+ if (!insn_data[icode].operand[3].predicate (op3, mode3)) -+ { -+ error ("the forth argument must be scale 1, 2, 4, 8"); -+ return const0_rtx; -+ } -+ -+ if (!insn_data[icode].operand[4].predicate (op4, mode4)) -+ { -+ error ("incorrect hint operand"); -+ return const0_rtx; -+ } -+ -+ pat = GEN_FCN (icode) (op0, op1, op2, op3, op4); -+ if (! pat) -+ return const0_rtx; -+ -+ emit_insn (pat); -+ -+ return 0; -+ -+ case IX86_BUILTIN_XABORT: -+ icode = CODE_FOR_xabort; -+ arg0 = CALL_EXPR_ARG (exp, 0); -+ op0 = expand_normal (arg0); -+ mode0 = insn_data[icode].operand[0].mode; -+ if (!insn_data[icode].operand[0].predicate (op0, mode0)) -+ { -+ error ("the argument to % intrinsic must " -+ "be an 8-bit immediate"); -+ return const0_rtx; -+ } -+ emit_insn (gen_xabort (op0)); -+ return 0; -+ -+ case IX86_BUILTIN_RSTORSSP: -+ case IX86_BUILTIN_CLRSSBSY: -+ arg0 = CALL_EXPR_ARG (exp, 0); -+ op0 = expand_normal (arg0); -+ icode = (fcode == IX86_BUILTIN_RSTORSSP -+ ? CODE_FOR_rstorssp -+ : CODE_FOR_clrssbsy); -+ if (!address_operand (op0, VOIDmode)) -+ { -+ op1 = convert_memory_address (Pmode, op0); -+ op0 = copy_addr_to_reg (op1); -+ } -+ emit_insn (GEN_FCN (icode) (gen_rtx_MEM (Pmode, op0))); -+ return 0; -+ -+ case IX86_BUILTIN_WRSSD: -+ case IX86_BUILTIN_WRSSQ: -+ case IX86_BUILTIN_WRUSSD: -+ case IX86_BUILTIN_WRUSSQ: -+ arg0 = CALL_EXPR_ARG (exp, 0); -+ op0 = expand_normal (arg0); -+ arg1 = CALL_EXPR_ARG (exp, 1); -+ op1 = expand_normal (arg1); -+ switch (fcode) -+ { -+ case IX86_BUILTIN_WRSSD: -+ icode = CODE_FOR_wrsssi; -+ mode = SImode; -+ break; -+ case IX86_BUILTIN_WRSSQ: -+ icode = CODE_FOR_wrssdi; -+ mode = DImode; -+ break; -+ case IX86_BUILTIN_WRUSSD: -+ icode = CODE_FOR_wrusssi; -+ mode = SImode; -+ break; -+ case IX86_BUILTIN_WRUSSQ: -+ icode = CODE_FOR_wrussdi; -+ mode = DImode; -+ break; -+ } -+ op0 = force_reg (mode, op0); -+ if (!address_operand (op1, VOIDmode)) -+ { -+ op2 = convert_memory_address (Pmode, op1); -+ op1 = copy_addr_to_reg (op2); -+ } -+ emit_insn (GEN_FCN (icode) (op0, gen_rtx_MEM (mode, op1))); -+ return 0; -+ -+ default: -+ break; -+ } -+ -+ if (fcode >= IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST -+ && fcode <= IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST) -+ { -+ i = fcode - IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST; -+ return ix86_expand_special_args_builtin (bdesc_special_args + i, exp, -+ target); -+ } -+ -+ if (fcode >= IX86_BUILTIN__BDESC_ARGS_FIRST -+ && fcode <= IX86_BUILTIN__BDESC_ARGS_LAST) -+ { -+ i = fcode - IX86_BUILTIN__BDESC_ARGS_FIRST; -+ rtx (*fcn) (rtx, rtx, rtx, rtx) = NULL; -+ rtx (*fcn_mask) (rtx, rtx, rtx, rtx, rtx); -+ rtx (*fcn_maskz) (rtx, rtx, rtx, rtx, rtx, rtx); -+ int masked = 1; -+ machine_mode mode, wide_mode, nar_mode; -+ -+ nar_mode = V4SFmode; -+ mode = V16SFmode; -+ wide_mode = V64SFmode; -+ fcn_mask = gen_avx5124fmaddps_4fmaddps_mask; -+ fcn_maskz = gen_avx5124fmaddps_4fmaddps_maskz; -+ -+ switch (fcode) -+ { -+ case IX86_BUILTIN_4FMAPS: -+ fcn = gen_avx5124fmaddps_4fmaddps; -+ masked = 0; -+ goto v4fma_expand; -+ -+ case IX86_BUILTIN_4DPWSSD: -+ nar_mode = V4SImode; -+ mode = V16SImode; -+ wide_mode = V64SImode; -+ fcn = gen_avx5124vnniw_vp4dpwssd; -+ masked = 0; -+ goto v4fma_expand; -+ -+ case IX86_BUILTIN_4DPWSSDS: -+ nar_mode = V4SImode; -+ mode = V16SImode; -+ wide_mode = V64SImode; -+ fcn = gen_avx5124vnniw_vp4dpwssds; -+ masked = 0; -+ goto v4fma_expand; -+ -+ case IX86_BUILTIN_4FNMAPS: -+ fcn = gen_avx5124fmaddps_4fnmaddps; -+ masked = 0; -+ goto v4fma_expand; -+ -+ case IX86_BUILTIN_4FNMAPS_MASK: -+ fcn_mask = gen_avx5124fmaddps_4fnmaddps_mask; -+ fcn_maskz = gen_avx5124fmaddps_4fnmaddps_maskz; -+ goto v4fma_expand; -+ -+ case IX86_BUILTIN_4DPWSSD_MASK: -+ nar_mode = V4SImode; -+ mode = V16SImode; -+ wide_mode = V64SImode; -+ fcn_mask = gen_avx5124vnniw_vp4dpwssd_mask; -+ fcn_maskz = gen_avx5124vnniw_vp4dpwssd_maskz; -+ goto v4fma_expand; -+ -+ case IX86_BUILTIN_4DPWSSDS_MASK: -+ nar_mode = V4SImode; -+ mode = V16SImode; -+ wide_mode = V64SImode; -+ fcn_mask = gen_avx5124vnniw_vp4dpwssds_mask; -+ fcn_maskz = gen_avx5124vnniw_vp4dpwssds_maskz; -+ goto v4fma_expand; -+ -+ case IX86_BUILTIN_4FMAPS_MASK: -+ { -+ tree args[4]; -+ rtx ops[4]; -+ rtx wide_reg; -+ rtx accum; -+ rtx addr; -+ rtx mem; -+ -+v4fma_expand: -+ wide_reg = gen_reg_rtx (wide_mode); -+ for (i = 0; i < 4; i++) -+ { -+ args[i] = CALL_EXPR_ARG (exp, i); -+ ops[i] = expand_normal (args[i]); -+ -+ emit_move_insn (gen_rtx_SUBREG (mode, wide_reg, i * 64), -+ ops[i]); -+ } -+ -+ accum = expand_normal (CALL_EXPR_ARG (exp, 4)); -+ accum = force_reg (mode, accum); -+ -+ addr = expand_normal (CALL_EXPR_ARG (exp, 5)); -+ addr = force_reg (Pmode, addr); -+ -+ mem = gen_rtx_MEM (nar_mode, addr); -+ -+ target = gen_reg_rtx (mode); -+ -+ emit_move_insn (target, accum); -+ -+ if (! masked) -+ emit_insn (fcn (target, accum, wide_reg, mem)); -+ else -+ { -+ rtx merge, mask; -+ merge = expand_normal (CALL_EXPR_ARG (exp, 6)); -+ -+ mask = expand_normal (CALL_EXPR_ARG (exp, 7)); -+ -+ if (CONST_INT_P (mask)) -+ mask = fixup_modeless_constant (mask, HImode); -+ -+ mask = force_reg (HImode, mask); -+ -+ if (GET_MODE (mask) != HImode) -+ mask = gen_rtx_SUBREG (HImode, mask, 0); -+ -+ /* If merge is 0 then we're about to emit z-masked variant. */ -+ if (const0_operand (merge, mode)) -+ emit_insn (fcn_maskz (target, accum, wide_reg, mem, merge, mask)); -+ /* If merge is the same as accum then emit merge-masked variant. */ -+ else if (CALL_EXPR_ARG (exp, 6) == CALL_EXPR_ARG (exp, 4)) -+ { -+ merge = force_reg (mode, merge); -+ emit_insn (fcn_mask (target, wide_reg, mem, merge, mask)); -+ } -+ /* Merge with something unknown might happen if we z-mask w/ -O0. */ -+ else -+ { -+ target = gen_reg_rtx (mode); -+ emit_move_insn (target, merge); -+ emit_insn (fcn_mask (target, wide_reg, mem, target, mask)); -+ } -+ } -+ return target; -+ } -+ -+ case IX86_BUILTIN_4FNMASS: -+ fcn = gen_avx5124fmaddps_4fnmaddss; -+ masked = 0; -+ goto s4fma_expand; -+ -+ case IX86_BUILTIN_4FMASS: -+ fcn = gen_avx5124fmaddps_4fmaddss; -+ masked = 0; -+ goto s4fma_expand; -+ -+ case IX86_BUILTIN_4FNMASS_MASK: -+ fcn_mask = gen_avx5124fmaddps_4fnmaddss_mask; -+ fcn_maskz = gen_avx5124fmaddps_4fnmaddss_maskz; -+ goto s4fma_expand; -+ -+ case IX86_BUILTIN_4FMASS_MASK: -+ { -+ tree args[4]; -+ rtx ops[4]; -+ rtx wide_reg; -+ rtx accum; -+ rtx addr; -+ rtx mem; -+ -+ fcn_mask = gen_avx5124fmaddps_4fmaddss_mask; -+ fcn_maskz = gen_avx5124fmaddps_4fmaddss_maskz; -+ -+s4fma_expand: -+ mode = V4SFmode; -+ wide_reg = gen_reg_rtx (V64SFmode); -+ for (i = 0; i < 4; i++) -+ { -+ rtx tmp; -+ args[i] = CALL_EXPR_ARG (exp, i); -+ ops[i] = expand_normal (args[i]); -+ -+ tmp = gen_reg_rtx (SFmode); -+ emit_move_insn (tmp, gen_rtx_SUBREG (SFmode, ops[i], 0)); -+ -+ emit_move_insn (gen_rtx_SUBREG (V16SFmode, wide_reg, i * 64), -+ gen_rtx_SUBREG (V16SFmode, tmp, 0)); -+ } -+ -+ accum = expand_normal (CALL_EXPR_ARG (exp, 4)); -+ accum = force_reg (V4SFmode, accum); -+ -+ addr = expand_normal (CALL_EXPR_ARG (exp, 5)); -+ addr = force_reg (Pmode, addr); -+ -+ mem = gen_rtx_MEM (V4SFmode, addr); -+ -+ target = gen_reg_rtx (V4SFmode); -+ -+ emit_move_insn (target, accum); -+ -+ if (! masked) -+ emit_insn (fcn (target, accum, wide_reg, mem)); -+ else -+ { -+ rtx merge, mask; -+ merge = expand_normal (CALL_EXPR_ARG (exp, 6)); -+ -+ mask = expand_normal (CALL_EXPR_ARG (exp, 7)); -+ -+ if (CONST_INT_P (mask)) -+ mask = fixup_modeless_constant (mask, QImode); -+ -+ mask = force_reg (QImode, mask); -+ -+ if (GET_MODE (mask) != QImode) -+ mask = gen_rtx_SUBREG (QImode, mask, 0); -+ -+ /* If merge is 0 then we're about to emit z-masked variant. */ -+ if (const0_operand (merge, mode)) -+ emit_insn (fcn_maskz (target, accum, wide_reg, mem, merge, mask)); -+ /* If merge is the same as accum then emit merge-masked -+ variant. */ -+ else if (CALL_EXPR_ARG (exp, 6) == CALL_EXPR_ARG (exp, 4)) -+ { -+ merge = force_reg (mode, merge); -+ emit_insn (fcn_mask (target, wide_reg, mem, merge, mask)); -+ } -+ /* Merge with something unknown might happen if we z-mask -+ w/ -O0. */ -+ else -+ { -+ target = gen_reg_rtx (mode); -+ emit_move_insn (target, merge); -+ emit_insn (fcn_mask (target, wide_reg, mem, target, mask)); -+ } -+ } -+ return target; -+ } -+ case IX86_BUILTIN_RDPID: -+ return ix86_expand_special_args_builtin (bdesc_args + i, exp, -+ target); -+ case IX86_BUILTIN_FABSQ: -+ case IX86_BUILTIN_COPYSIGNQ: -+ if (!TARGET_SSE) -+ /* Emit a normal call if SSE isn't available. */ -+ return expand_call (exp, target, ignore); -+ /* FALLTHRU */ -+ default: -+ return ix86_expand_args_builtin (bdesc_args + i, exp, target); -+ } -+ } -+ -+ if (fcode >= IX86_BUILTIN__BDESC_COMI_FIRST -+ && fcode <= IX86_BUILTIN__BDESC_COMI_LAST) -+ { -+ i = fcode - IX86_BUILTIN__BDESC_COMI_FIRST; -+ return ix86_expand_sse_comi (bdesc_comi + i, exp, target); -+ } -+ -+ if (fcode >= IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST -+ && fcode <= IX86_BUILTIN__BDESC_ROUND_ARGS_LAST) -+ { -+ i = fcode - IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST; -+ return ix86_expand_round_builtin (bdesc_round_args + i, exp, target); -+ } -+ -+ if (fcode >= IX86_BUILTIN__BDESC_PCMPESTR_FIRST -+ && fcode <= IX86_BUILTIN__BDESC_PCMPESTR_LAST) -+ { -+ i = fcode - IX86_BUILTIN__BDESC_PCMPESTR_FIRST; -+ return ix86_expand_sse_pcmpestr (bdesc_pcmpestr + i, exp, target); -+ } -+ -+ if (fcode >= IX86_BUILTIN__BDESC_PCMPISTR_FIRST -+ && fcode <= IX86_BUILTIN__BDESC_PCMPISTR_LAST) -+ { -+ i = fcode - IX86_BUILTIN__BDESC_PCMPISTR_FIRST; -+ return ix86_expand_sse_pcmpistr (bdesc_pcmpistr + i, exp, target); -+ } -+ -+ if (fcode >= IX86_BUILTIN__BDESC_MULTI_ARG_FIRST -+ && fcode <= IX86_BUILTIN__BDESC_MULTI_ARG_LAST) -+ { -+ i = fcode - IX86_BUILTIN__BDESC_MULTI_ARG_FIRST; -+ const struct builtin_description *d = bdesc_multi_arg + i; -+ return ix86_expand_multi_arg_builtin (d->icode, exp, target, -+ (enum ix86_builtin_func_type) -+ d->flag, d->comparison); -+ } -+ -+ if (fcode >= IX86_BUILTIN__BDESC_CET_FIRST -+ && fcode <= IX86_BUILTIN__BDESC_CET_LAST) -+ { -+ i = fcode - IX86_BUILTIN__BDESC_CET_FIRST; -+ return ix86_expand_special_args_builtin (bdesc_cet + i, exp, -+ target); -+ } -+ -+ if (fcode >= IX86_BUILTIN__BDESC_CET_NORMAL_FIRST -+ && fcode <= IX86_BUILTIN__BDESC_CET_NORMAL_LAST) -+ { -+ i = fcode - IX86_BUILTIN__BDESC_CET_NORMAL_FIRST; -+ return ix86_expand_special_args_builtin (bdesc_cet_rdssp + i, exp, -+ target); -+ } -+ -+ gcc_unreachable (); -+} -+ -+/* A subroutine of ix86_expand_vector_init_duplicate. Tries to -+ fill target with val via vec_duplicate. */ -+ -+static bool -+ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val) -+{ -+ bool ok; -+ rtx_insn *insn; -+ rtx dup; -+ -+ /* First attempt to recognize VAL as-is. */ -+ dup = gen_vec_duplicate (mode, val); -+ insn = emit_insn (gen_rtx_SET (target, dup)); -+ if (recog_memoized (insn) < 0) -+ { -+ rtx_insn *seq; -+ machine_mode innermode = GET_MODE_INNER (mode); -+ rtx reg; -+ -+ /* If that fails, force VAL into a register. */ -+ -+ start_sequence (); -+ reg = force_reg (innermode, val); -+ if (GET_MODE (reg) != innermode) -+ reg = gen_lowpart (innermode, reg); -+ SET_SRC (PATTERN (insn)) = gen_vec_duplicate (mode, reg); -+ seq = get_insns (); -+ end_sequence (); -+ if (seq) -+ emit_insn_before (seq, insn); -+ -+ ok = recog_memoized (insn) >= 0; -+ gcc_assert (ok); -+ } -+ return true; -+} -+ -+/* Get a vector mode of the same size as the original but with elements -+ twice as wide. This is only guaranteed to apply to integral vectors. */ -+ -+static machine_mode -+get_mode_wider_vector (machine_mode o) -+{ -+ /* ??? Rely on the ordering that genmodes.c gives to vectors. */ -+ machine_mode n = GET_MODE_WIDER_MODE (o).require (); -+ gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2); -+ gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n)); -+ return n; -+} -+ -+static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d); -+static bool expand_vec_perm_1 (struct expand_vec_perm_d *d); -+ -+/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector -+ with all elements equal to VAR. Return true if successful. */ -+ -+static bool -+ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode, -+ rtx target, rtx val) -+{ -+ bool ok; -+ -+ switch (mode) -+ { -+ case E_V2SImode: -+ case E_V2SFmode: -+ if (!mmx_ok) -+ return false; -+ /* FALLTHRU */ -+ -+ case E_V4DFmode: -+ case E_V4DImode: -+ case E_V8SFmode: -+ case E_V8SImode: -+ case E_V2DFmode: -+ case E_V2DImode: -+ case E_V4SFmode: -+ case E_V4SImode: -+ case E_V16SImode: -+ case E_V8DImode: -+ case E_V16SFmode: -+ case E_V8DFmode: -+ return ix86_vector_duplicate_value (mode, target, val); -+ -+ case E_V4HImode: -+ if (!mmx_ok) -+ return false; -+ if (TARGET_SSE || TARGET_3DNOW_A) -+ { -+ rtx x; -+ -+ val = gen_lowpart (SImode, val); -+ x = gen_rtx_TRUNCATE (HImode, val); -+ x = gen_rtx_VEC_DUPLICATE (mode, x); -+ emit_insn (gen_rtx_SET (target, x)); -+ return true; -+ } -+ goto widen; -+ -+ case E_V8QImode: -+ if (!mmx_ok) -+ return false; -+ goto widen; -+ -+ case E_V8HImode: -+ if (TARGET_AVX2) -+ return ix86_vector_duplicate_value (mode, target, val); -+ -+ if (TARGET_SSE2) -+ { -+ struct expand_vec_perm_d dperm; -+ rtx tmp1, tmp2; -+ -+ permute: -+ memset (&dperm, 0, sizeof (dperm)); -+ dperm.target = target; -+ dperm.vmode = mode; -+ dperm.nelt = GET_MODE_NUNITS (mode); -+ dperm.op0 = dperm.op1 = gen_reg_rtx (mode); -+ dperm.one_operand_p = true; -+ -+ /* Extend to SImode using a paradoxical SUBREG. */ -+ tmp1 = gen_reg_rtx (SImode); -+ emit_move_insn (tmp1, gen_lowpart (SImode, val)); -+ -+ /* Insert the SImode value as low element of a V4SImode vector. */ -+ tmp2 = gen_reg_rtx (V4SImode); -+ emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1)); -+ emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2)); -+ -+ ok = (expand_vec_perm_1 (&dperm) -+ || expand_vec_perm_broadcast_1 (&dperm)); -+ gcc_assert (ok); -+ return ok; -+ } -+ goto widen; -+ -+ case E_V16QImode: -+ if (TARGET_AVX2) -+ return ix86_vector_duplicate_value (mode, target, val); -+ -+ if (TARGET_SSE2) -+ goto permute; -+ goto widen; -+ -+ widen: -+ /* Replicate the value once into the next wider mode and recurse. */ -+ { -+ machine_mode smode, wsmode, wvmode; -+ rtx x; -+ -+ smode = GET_MODE_INNER (mode); -+ wvmode = get_mode_wider_vector (mode); -+ wsmode = GET_MODE_INNER (wvmode); -+ -+ val = convert_modes (wsmode, smode, val, true); -+ x = expand_simple_binop (wsmode, ASHIFT, val, -+ GEN_INT (GET_MODE_BITSIZE (smode)), -+ NULL_RTX, 1, OPTAB_LIB_WIDEN); -+ val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN); -+ -+ x = gen_reg_rtx (wvmode); -+ ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val); -+ gcc_assert (ok); -+ emit_move_insn (target, gen_lowpart (GET_MODE (target), x)); -+ return ok; -+ } -+ -+ case E_V16HImode: -+ case E_V32QImode: -+ if (TARGET_AVX2) -+ return ix86_vector_duplicate_value (mode, target, val); -+ else -+ { -+ machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode); -+ rtx x = gen_reg_rtx (hvmode); -+ -+ ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val); -+ gcc_assert (ok); -+ -+ x = gen_rtx_VEC_CONCAT (mode, x, x); -+ emit_insn (gen_rtx_SET (target, x)); -+ } -+ return true; -+ -+ case E_V64QImode: -+ case E_V32HImode: -+ if (TARGET_AVX512BW) -+ return ix86_vector_duplicate_value (mode, target, val); -+ else -+ { -+ machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode); -+ rtx x = gen_reg_rtx (hvmode); -+ -+ ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val); -+ gcc_assert (ok); -+ -+ x = gen_rtx_VEC_CONCAT (mode, x, x); -+ emit_insn (gen_rtx_SET (target, x)); -+ } -+ return true; -+ -+ default: -+ return false; -+ } -+} -+ -+/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector -+ whose ONE_VAR element is VAR, and other elements are zero. Return true -+ if successful. */ -+ -+static bool -+ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode, -+ rtx target, rtx var, int one_var) -+{ -+ machine_mode vsimode; -+ rtx new_target; -+ rtx x, tmp; -+ bool use_vector_set = false; -+ rtx (*gen_vec_set_0) (rtx, rtx, rtx) = NULL; -+ -+ switch (mode) -+ { -+ case E_V2DImode: -+ /* For SSE4.1, we normally use vector set. But if the second -+ element is zero and inter-unit moves are OK, we use movq -+ instead. */ -+ use_vector_set = (TARGET_64BIT && TARGET_SSE4_1 -+ && !(TARGET_INTER_UNIT_MOVES_TO_VEC -+ && one_var == 0)); -+ break; -+ case E_V16QImode: -+ case E_V4SImode: -+ case E_V4SFmode: -+ use_vector_set = TARGET_SSE4_1; -+ break; -+ case E_V8HImode: -+ use_vector_set = TARGET_SSE2; -+ break; -+ case E_V4HImode: -+ use_vector_set = TARGET_SSE || TARGET_3DNOW_A; -+ break; -+ case E_V32QImode: -+ case E_V16HImode: -+ use_vector_set = TARGET_AVX; -+ break; -+ case E_V8SImode: -+ use_vector_set = TARGET_AVX; -+ gen_vec_set_0 = gen_vec_setv8si_0; -+ break; -+ case E_V8SFmode: -+ use_vector_set = TARGET_AVX; -+ gen_vec_set_0 = gen_vec_setv8sf_0; -+ break; -+ case E_V4DFmode: -+ use_vector_set = TARGET_AVX; -+ gen_vec_set_0 = gen_vec_setv4df_0; -+ break; -+ case E_V4DImode: -+ /* Use ix86_expand_vector_set in 64bit mode only. */ -+ use_vector_set = TARGET_AVX && TARGET_64BIT; -+ gen_vec_set_0 = gen_vec_setv4di_0; -+ break; -+ case E_V16SImode: -+ use_vector_set = TARGET_AVX512F && one_var == 0; -+ gen_vec_set_0 = gen_vec_setv16si_0; -+ break; -+ case E_V16SFmode: -+ use_vector_set = TARGET_AVX512F && one_var == 0; -+ gen_vec_set_0 = gen_vec_setv16sf_0; -+ break; -+ case E_V8DFmode: -+ use_vector_set = TARGET_AVX512F && one_var == 0; -+ gen_vec_set_0 = gen_vec_setv8df_0; -+ break; -+ case E_V8DImode: -+ /* Use ix86_expand_vector_set in 64bit mode only. */ -+ use_vector_set = TARGET_AVX512F && TARGET_64BIT && one_var == 0; -+ gen_vec_set_0 = gen_vec_setv8di_0; -+ break; -+ default: -+ break; -+ } -+ -+ if (use_vector_set) -+ { -+ if (gen_vec_set_0 && one_var == 0) -+ { -+ var = force_reg (GET_MODE_INNER (mode), var); -+ emit_insn (gen_vec_set_0 (target, CONST0_RTX (mode), var)); -+ return true; -+ } -+ emit_insn (gen_rtx_SET (target, CONST0_RTX (mode))); -+ var = force_reg (GET_MODE_INNER (mode), var); -+ ix86_expand_vector_set (mmx_ok, target, var, one_var); -+ return true; -+ } -+ -+ switch (mode) -+ { -+ case E_V2SFmode: -+ case E_V2SImode: -+ if (!mmx_ok) -+ return false; -+ /* FALLTHRU */ -+ -+ case E_V2DFmode: -+ case E_V2DImode: -+ if (one_var != 0) -+ return false; -+ var = force_reg (GET_MODE_INNER (mode), var); -+ x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode))); -+ emit_insn (gen_rtx_SET (target, x)); -+ return true; -+ -+ case E_V4SFmode: -+ case E_V4SImode: -+ if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER) -+ new_target = gen_reg_rtx (mode); -+ else -+ new_target = target; -+ var = force_reg (GET_MODE_INNER (mode), var); -+ x = gen_rtx_VEC_DUPLICATE (mode, var); -+ x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx); -+ emit_insn (gen_rtx_SET (new_target, x)); -+ if (one_var != 0) -+ { -+ /* We need to shuffle the value to the correct position, so -+ create a new pseudo to store the intermediate result. */ -+ -+ /* With SSE2, we can use the integer shuffle insns. */ -+ if (mode != V4SFmode && TARGET_SSE2) -+ { -+ emit_insn (gen_sse2_pshufd_1 (new_target, new_target, -+ const1_rtx, -+ GEN_INT (one_var == 1 ? 0 : 1), -+ GEN_INT (one_var == 2 ? 0 : 1), -+ GEN_INT (one_var == 3 ? 0 : 1))); -+ if (target != new_target) -+ emit_move_insn (target, new_target); -+ return true; -+ } -+ -+ /* Otherwise convert the intermediate result to V4SFmode and -+ use the SSE1 shuffle instructions. */ -+ if (mode != V4SFmode) -+ { -+ tmp = gen_reg_rtx (V4SFmode); -+ emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target)); -+ } -+ else -+ tmp = new_target; -+ -+ emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp, -+ const1_rtx, -+ GEN_INT (one_var == 1 ? 0 : 1), -+ GEN_INT (one_var == 2 ? 0+4 : 1+4), -+ GEN_INT (one_var == 3 ? 0+4 : 1+4))); -+ -+ if (mode != V4SFmode) -+ emit_move_insn (target, gen_lowpart (V4SImode, tmp)); -+ else if (tmp != target) -+ emit_move_insn (target, tmp); -+ } -+ else if (target != new_target) -+ emit_move_insn (target, new_target); -+ return true; -+ -+ case E_V8HImode: -+ case E_V16QImode: -+ vsimode = V4SImode; -+ goto widen; -+ case E_V4HImode: -+ case E_V8QImode: -+ if (!mmx_ok) -+ return false; -+ vsimode = V2SImode; -+ goto widen; -+ widen: -+ if (one_var != 0) -+ return false; -+ -+ /* Zero extend the variable element to SImode and recurse. */ -+ var = convert_modes (SImode, GET_MODE_INNER (mode), var, true); -+ -+ x = gen_reg_rtx (vsimode); -+ if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x, -+ var, one_var)) -+ gcc_unreachable (); -+ -+ emit_move_insn (target, gen_lowpart (mode, x)); -+ return true; -+ -+ default: -+ return false; -+ } -+} -+ -+/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector -+ consisting of the values in VALS. It is known that all elements -+ except ONE_VAR are constants. Return true if successful. */ -+ -+static bool -+ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode, -+ rtx target, rtx vals, int one_var) -+{ -+ rtx var = XVECEXP (vals, 0, one_var); -+ machine_mode wmode; -+ rtx const_vec, x; -+ -+ const_vec = copy_rtx (vals); -+ XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode)); -+ const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0)); -+ -+ switch (mode) -+ { -+ case E_V2DFmode: -+ case E_V2DImode: -+ case E_V2SFmode: -+ case E_V2SImode: -+ /* For the two element vectors, it's just as easy to use -+ the general case. */ -+ return false; -+ -+ case E_V4DImode: -+ /* Use ix86_expand_vector_set in 64bit mode only. */ -+ if (!TARGET_64BIT) -+ return false; -+ /* FALLTHRU */ -+ case E_V4DFmode: -+ case E_V8SFmode: -+ case E_V8SImode: -+ case E_V16HImode: -+ case E_V32QImode: -+ case E_V4SFmode: -+ case E_V4SImode: -+ case E_V8HImode: -+ case E_V4HImode: -+ break; -+ -+ case E_V16QImode: -+ if (TARGET_SSE4_1) -+ break; -+ wmode = V8HImode; -+ goto widen; -+ case E_V8QImode: -+ wmode = V4HImode; -+ goto widen; -+ widen: -+ /* There's no way to set one QImode entry easily. Combine -+ the variable value with its adjacent constant value, and -+ promote to an HImode set. */ -+ x = XVECEXP (vals, 0, one_var ^ 1); -+ if (one_var & 1) -+ { -+ var = convert_modes (HImode, QImode, var, true); -+ var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8), -+ NULL_RTX, 1, OPTAB_LIB_WIDEN); -+ x = GEN_INT (INTVAL (x) & 0xff); -+ } -+ else -+ { -+ var = convert_modes (HImode, QImode, var, true); -+ x = gen_int_mode (UINTVAL (x) << 8, HImode); -+ } -+ if (x != const0_rtx) -+ var = expand_simple_binop (HImode, IOR, var, x, var, -+ 1, OPTAB_LIB_WIDEN); -+ -+ x = gen_reg_rtx (wmode); -+ emit_move_insn (x, gen_lowpart (wmode, const_vec)); -+ ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1); -+ -+ emit_move_insn (target, gen_lowpart (mode, x)); -+ return true; -+ -+ default: -+ return false; -+ } -+ -+ emit_move_insn (target, const_vec); -+ ix86_expand_vector_set (mmx_ok, target, var, one_var); -+ return true; -+} -+ -+/* A subroutine of ix86_expand_vector_init_general. Use vector -+ concatenate to handle the most general case: all values variable, -+ and none identical. */ -+ -+static void -+ix86_expand_vector_init_concat (machine_mode mode, -+ rtx target, rtx *ops, int n) -+{ -+ machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode; -+ rtx first[16], second[8], third[4]; -+ rtvec v; -+ int i, j; -+ -+ switch (n) -+ { -+ case 2: -+ switch (mode) -+ { -+ case E_V16SImode: -+ cmode = V8SImode; -+ break; -+ case E_V16SFmode: -+ cmode = V8SFmode; -+ break; -+ case E_V8DImode: -+ cmode = V4DImode; -+ break; -+ case E_V8DFmode: -+ cmode = V4DFmode; -+ break; -+ case E_V8SImode: -+ cmode = V4SImode; -+ break; -+ case E_V8SFmode: -+ cmode = V4SFmode; -+ break; -+ case E_V4DImode: -+ cmode = V2DImode; -+ break; -+ case E_V4DFmode: -+ cmode = V2DFmode; -+ break; -+ case E_V4SImode: -+ cmode = V2SImode; -+ break; -+ case E_V4SFmode: -+ cmode = V2SFmode; -+ break; -+ case E_V2DImode: -+ cmode = DImode; -+ break; -+ case E_V2SImode: -+ cmode = SImode; -+ break; -+ case E_V2DFmode: -+ cmode = DFmode; -+ break; -+ case E_V2SFmode: -+ cmode = SFmode; -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ -+ if (!register_operand (ops[1], cmode)) -+ ops[1] = force_reg (cmode, ops[1]); -+ if (!register_operand (ops[0], cmode)) -+ ops[0] = force_reg (cmode, ops[0]); -+ emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, ops[0], -+ ops[1]))); -+ break; -+ -+ case 4: -+ switch (mode) -+ { -+ case E_V4DImode: -+ cmode = V2DImode; -+ break; -+ case E_V4DFmode: -+ cmode = V2DFmode; -+ break; -+ case E_V4SImode: -+ cmode = V2SImode; -+ break; -+ case E_V4SFmode: -+ cmode = V2SFmode; -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ goto half; -+ -+ case 8: -+ switch (mode) -+ { -+ case E_V8DImode: -+ cmode = V2DImode; -+ hmode = V4DImode; -+ break; -+ case E_V8DFmode: -+ cmode = V2DFmode; -+ hmode = V4DFmode; -+ break; -+ case E_V8SImode: -+ cmode = V2SImode; -+ hmode = V4SImode; -+ break; -+ case E_V8SFmode: -+ cmode = V2SFmode; -+ hmode = V4SFmode; -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ goto half; -+ -+ case 16: -+ switch (mode) -+ { -+ case E_V16SImode: -+ cmode = V2SImode; -+ hmode = V4SImode; -+ gmode = V8SImode; -+ break; -+ case E_V16SFmode: -+ cmode = V2SFmode; -+ hmode = V4SFmode; -+ gmode = V8SFmode; -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ goto half; -+ -+half: -+ /* FIXME: We process inputs backward to help RA. PR 36222. */ -+ i = n - 1; -+ j = (n >> 1) - 1; -+ for (; i > 0; i -= 2, j--) -+ { -+ first[j] = gen_reg_rtx (cmode); -+ v = gen_rtvec (2, ops[i - 1], ops[i]); -+ ix86_expand_vector_init (false, first[j], -+ gen_rtx_PARALLEL (cmode, v)); -+ } -+ -+ n >>= 1; -+ if (n > 4) -+ { -+ gcc_assert (hmode != VOIDmode); -+ gcc_assert (gmode != VOIDmode); -+ for (i = j = 0; i < n; i += 2, j++) -+ { -+ second[j] = gen_reg_rtx (hmode); -+ ix86_expand_vector_init_concat (hmode, second [j], -+ &first [i], 2); -+ } -+ n >>= 1; -+ for (i = j = 0; i < n; i += 2, j++) -+ { -+ third[j] = gen_reg_rtx (gmode); -+ ix86_expand_vector_init_concat (gmode, third[j], -+ &second[i], 2); -+ } -+ n >>= 1; -+ ix86_expand_vector_init_concat (mode, target, third, n); -+ } -+ else if (n > 2) -+ { -+ gcc_assert (hmode != VOIDmode); -+ for (i = j = 0; i < n; i += 2, j++) -+ { -+ second[j] = gen_reg_rtx (hmode); -+ ix86_expand_vector_init_concat (hmode, second [j], -+ &first [i], 2); -+ } -+ n >>= 1; -+ ix86_expand_vector_init_concat (mode, target, second, n); -+ } -+ else -+ ix86_expand_vector_init_concat (mode, target, first, n); -+ break; -+ -+ default: -+ gcc_unreachable (); -+ } -+} -+ -+/* A subroutine of ix86_expand_vector_init_general. Use vector -+ interleave to handle the most general case: all values variable, -+ and none identical. */ -+ -+static void -+ix86_expand_vector_init_interleave (machine_mode mode, -+ rtx target, rtx *ops, int n) -+{ -+ machine_mode first_imode, second_imode, third_imode, inner_mode; -+ int i, j; -+ rtx op0, op1; -+ rtx (*gen_load_even) (rtx, rtx, rtx); -+ rtx (*gen_interleave_first_low) (rtx, rtx, rtx); -+ rtx (*gen_interleave_second_low) (rtx, rtx, rtx); -+ -+ switch (mode) -+ { -+ case E_V8HImode: -+ gen_load_even = gen_vec_setv8hi; -+ gen_interleave_first_low = gen_vec_interleave_lowv4si; -+ gen_interleave_second_low = gen_vec_interleave_lowv2di; -+ inner_mode = HImode; -+ first_imode = V4SImode; -+ second_imode = V2DImode; -+ third_imode = VOIDmode; -+ break; -+ case E_V16QImode: -+ gen_load_even = gen_vec_setv16qi; -+ gen_interleave_first_low = gen_vec_interleave_lowv8hi; -+ gen_interleave_second_low = gen_vec_interleave_lowv4si; -+ inner_mode = QImode; -+ first_imode = V8HImode; -+ second_imode = V4SImode; -+ third_imode = V2DImode; -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ -+ for (i = 0; i < n; i++) -+ { -+ /* Extend the odd elment to SImode using a paradoxical SUBREG. */ -+ op0 = gen_reg_rtx (SImode); -+ emit_move_insn (op0, gen_lowpart (SImode, ops [i + i])); -+ -+ /* Insert the SImode value as low element of V4SImode vector. */ -+ op1 = gen_reg_rtx (V4SImode); -+ op0 = gen_rtx_VEC_MERGE (V4SImode, -+ gen_rtx_VEC_DUPLICATE (V4SImode, -+ op0), -+ CONST0_RTX (V4SImode), -+ const1_rtx); -+ emit_insn (gen_rtx_SET (op1, op0)); -+ -+ /* Cast the V4SImode vector back to a vector in orignal mode. */ -+ op0 = gen_reg_rtx (mode); -+ emit_move_insn (op0, gen_lowpart (mode, op1)); -+ -+ /* Load even elements into the second position. */ -+ emit_insn (gen_load_even (op0, -+ force_reg (inner_mode, -+ ops [i + i + 1]), -+ const1_rtx)); -+ -+ /* Cast vector to FIRST_IMODE vector. */ -+ ops[i] = gen_reg_rtx (first_imode); -+ emit_move_insn (ops[i], gen_lowpart (first_imode, op0)); -+ } -+ -+ /* Interleave low FIRST_IMODE vectors. */ -+ for (i = j = 0; i < n; i += 2, j++) -+ { -+ op0 = gen_reg_rtx (first_imode); -+ emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1])); -+ -+ /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */ -+ ops[j] = gen_reg_rtx (second_imode); -+ emit_move_insn (ops[j], gen_lowpart (second_imode, op0)); -+ } -+ -+ /* Interleave low SECOND_IMODE vectors. */ -+ switch (second_imode) -+ { -+ case E_V4SImode: -+ for (i = j = 0; i < n / 2; i += 2, j++) -+ { -+ op0 = gen_reg_rtx (second_imode); -+ emit_insn (gen_interleave_second_low (op0, ops[i], -+ ops[i + 1])); -+ -+ /* Cast the SECOND_IMODE vector to the THIRD_IMODE -+ vector. */ -+ ops[j] = gen_reg_rtx (third_imode); -+ emit_move_insn (ops[j], gen_lowpart (third_imode, op0)); -+ } -+ second_imode = V2DImode; -+ gen_interleave_second_low = gen_vec_interleave_lowv2di; -+ /* FALLTHRU */ -+ -+ case E_V2DImode: -+ op0 = gen_reg_rtx (second_imode); -+ emit_insn (gen_interleave_second_low (op0, ops[0], -+ ops[1])); -+ -+ /* Cast the SECOND_IMODE vector back to a vector on original -+ mode. */ -+ emit_insn (gen_rtx_SET (target, gen_lowpart (mode, op0))); -+ break; -+ -+ default: -+ gcc_unreachable (); -+ } -+} -+ -+/* A subroutine of ix86_expand_vector_init. Handle the most general case: -+ all values variable, and none identical. */ -+ -+static void -+ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode, -+ rtx target, rtx vals) -+{ -+ rtx ops[64], op0, op1, op2, op3, op4, op5; -+ machine_mode half_mode = VOIDmode; -+ machine_mode quarter_mode = VOIDmode; -+ int n, i; -+ -+ switch (mode) -+ { -+ case E_V2SFmode: -+ case E_V2SImode: -+ if (!mmx_ok && !TARGET_SSE) -+ break; -+ /* FALLTHRU */ -+ -+ case E_V16SImode: -+ case E_V16SFmode: -+ case E_V8DFmode: -+ case E_V8DImode: -+ case E_V8SFmode: -+ case E_V8SImode: -+ case E_V4DFmode: -+ case E_V4DImode: -+ case E_V4SFmode: -+ case E_V4SImode: -+ case E_V2DFmode: -+ case E_V2DImode: -+ n = GET_MODE_NUNITS (mode); -+ for (i = 0; i < n; i++) -+ ops[i] = XVECEXP (vals, 0, i); -+ ix86_expand_vector_init_concat (mode, target, ops, n); -+ return; -+ -+ case E_V2TImode: -+ for (i = 0; i < 2; i++) -+ ops[i] = gen_lowpart (V2DImode, XVECEXP (vals, 0, i)); -+ op0 = gen_reg_rtx (V4DImode); -+ ix86_expand_vector_init_concat (V4DImode, op0, ops, 2); -+ emit_move_insn (target, gen_lowpart (GET_MODE (target), op0)); -+ return; -+ -+ case E_V4TImode: -+ for (i = 0; i < 4; i++) -+ ops[i] = gen_lowpart (V2DImode, XVECEXP (vals, 0, i)); -+ ops[4] = gen_reg_rtx (V4DImode); -+ ix86_expand_vector_init_concat (V4DImode, ops[4], ops, 2); -+ ops[5] = gen_reg_rtx (V4DImode); -+ ix86_expand_vector_init_concat (V4DImode, ops[5], ops + 2, 2); -+ op0 = gen_reg_rtx (V8DImode); -+ ix86_expand_vector_init_concat (V8DImode, op0, ops + 4, 2); -+ emit_move_insn (target, gen_lowpart (GET_MODE (target), op0)); -+ return; -+ -+ case E_V32QImode: -+ half_mode = V16QImode; -+ goto half; -+ -+ case E_V16HImode: -+ half_mode = V8HImode; -+ goto half; -+ -+half: -+ n = GET_MODE_NUNITS (mode); -+ for (i = 0; i < n; i++) -+ ops[i] = XVECEXP (vals, 0, i); -+ op0 = gen_reg_rtx (half_mode); -+ op1 = gen_reg_rtx (half_mode); -+ ix86_expand_vector_init_interleave (half_mode, op0, ops, -+ n >> 2); -+ ix86_expand_vector_init_interleave (half_mode, op1, -+ &ops [n >> 1], n >> 2); -+ emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op0, op1))); -+ return; -+ -+ case E_V64QImode: -+ quarter_mode = V16QImode; -+ half_mode = V32QImode; -+ goto quarter; -+ -+ case E_V32HImode: -+ quarter_mode = V8HImode; -+ half_mode = V16HImode; -+ goto quarter; -+ -+quarter: -+ n = GET_MODE_NUNITS (mode); -+ for (i = 0; i < n; i++) -+ ops[i] = XVECEXP (vals, 0, i); -+ op0 = gen_reg_rtx (quarter_mode); -+ op1 = gen_reg_rtx (quarter_mode); -+ op2 = gen_reg_rtx (quarter_mode); -+ op3 = gen_reg_rtx (quarter_mode); -+ op4 = gen_reg_rtx (half_mode); -+ op5 = gen_reg_rtx (half_mode); -+ ix86_expand_vector_init_interleave (quarter_mode, op0, ops, -+ n >> 3); -+ ix86_expand_vector_init_interleave (quarter_mode, op1, -+ &ops [n >> 2], n >> 3); -+ ix86_expand_vector_init_interleave (quarter_mode, op2, -+ &ops [n >> 1], n >> 3); -+ ix86_expand_vector_init_interleave (quarter_mode, op3, -+ &ops [(n >> 1) | (n >> 2)], n >> 3); -+ emit_insn (gen_rtx_SET (op4, gen_rtx_VEC_CONCAT (half_mode, op0, op1))); -+ emit_insn (gen_rtx_SET (op5, gen_rtx_VEC_CONCAT (half_mode, op2, op3))); -+ emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op4, op5))); -+ return; -+ -+ case E_V16QImode: -+ if (!TARGET_SSE4_1) -+ break; -+ /* FALLTHRU */ -+ -+ case E_V8HImode: -+ if (!TARGET_SSE2) -+ break; -+ -+ /* Don't use ix86_expand_vector_init_interleave if we can't -+ move from GPR to SSE register directly. */ -+ if (!TARGET_INTER_UNIT_MOVES_TO_VEC) -+ break; -+ -+ n = GET_MODE_NUNITS (mode); -+ for (i = 0; i < n; i++) -+ ops[i] = XVECEXP (vals, 0, i); -+ ix86_expand_vector_init_interleave (mode, target, ops, n >> 1); -+ return; -+ -+ case E_V4HImode: -+ case E_V8QImode: -+ break; -+ -+ default: -+ gcc_unreachable (); -+ } -+ -+ { -+ int i, j, n_elts, n_words, n_elt_per_word; -+ machine_mode inner_mode; -+ rtx words[4], shift; -+ -+ inner_mode = GET_MODE_INNER (mode); -+ n_elts = GET_MODE_NUNITS (mode); -+ n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD; -+ n_elt_per_word = n_elts / n_words; -+ shift = GEN_INT (GET_MODE_BITSIZE (inner_mode)); -+ -+ for (i = 0; i < n_words; ++i) -+ { -+ rtx word = NULL_RTX; -+ -+ for (j = 0; j < n_elt_per_word; ++j) -+ { -+ rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1); -+ elt = convert_modes (word_mode, inner_mode, elt, true); -+ -+ if (j == 0) -+ word = elt; -+ else -+ { -+ word = expand_simple_binop (word_mode, ASHIFT, word, shift, -+ word, 1, OPTAB_LIB_WIDEN); -+ word = expand_simple_binop (word_mode, IOR, word, elt, -+ word, 1, OPTAB_LIB_WIDEN); -+ } -+ } -+ -+ words[i] = word; -+ } -+ -+ if (n_words == 1) -+ emit_move_insn (target, gen_lowpart (mode, words[0])); -+ else if (n_words == 2) -+ { -+ rtx tmp = gen_reg_rtx (mode); -+ emit_clobber (tmp); -+ emit_move_insn (gen_lowpart (word_mode, tmp), words[0]); -+ emit_move_insn (gen_highpart (word_mode, tmp), words[1]); -+ emit_move_insn (target, tmp); -+ } -+ else if (n_words == 4) -+ { -+ rtx tmp = gen_reg_rtx (V4SImode); -+ gcc_assert (word_mode == SImode); -+ vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words)); -+ ix86_expand_vector_init_general (false, V4SImode, tmp, vals); -+ emit_move_insn (target, gen_lowpart (mode, tmp)); -+ } -+ else -+ gcc_unreachable (); -+ } -+} -+ -+/* Initialize vector TARGET via VALS. Suppress the use of MMX -+ instructions unless MMX_OK is true. */ -+ -+void -+ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals) -+{ -+ machine_mode mode = GET_MODE (target); -+ machine_mode inner_mode = GET_MODE_INNER (mode); -+ int n_elts = GET_MODE_NUNITS (mode); -+ int n_var = 0, one_var = -1; -+ bool all_same = true, all_const_zero = true; -+ int i; -+ rtx x; -+ -+ /* Handle first initialization from vector elts. */ -+ if (n_elts != XVECLEN (vals, 0)) -+ { -+ rtx subtarget = target; -+ x = XVECEXP (vals, 0, 0); -+ gcc_assert (GET_MODE_INNER (GET_MODE (x)) == inner_mode); -+ if (GET_MODE_NUNITS (GET_MODE (x)) * 2 == n_elts) -+ { -+ rtx ops[2] = { XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1) }; -+ if (inner_mode == QImode || inner_mode == HImode) -+ { -+ unsigned int n_bits = n_elts * GET_MODE_SIZE (inner_mode); -+ mode = mode_for_vector (SImode, n_bits / 4).require (); -+ inner_mode = mode_for_vector (SImode, n_bits / 8).require (); -+ ops[0] = gen_lowpart (inner_mode, ops[0]); -+ ops[1] = gen_lowpart (inner_mode, ops[1]); -+ subtarget = gen_reg_rtx (mode); -+ } -+ ix86_expand_vector_init_concat (mode, subtarget, ops, 2); -+ if (subtarget != target) -+ emit_move_insn (target, gen_lowpart (GET_MODE (target), subtarget)); -+ return; -+ } -+ gcc_unreachable (); -+ } -+ -+ for (i = 0; i < n_elts; ++i) -+ { -+ x = XVECEXP (vals, 0, i); -+ if (!(CONST_SCALAR_INT_P (x) -+ || CONST_DOUBLE_P (x) -+ || CONST_FIXED_P (x))) -+ n_var++, one_var = i; -+ else if (x != CONST0_RTX (inner_mode)) -+ all_const_zero = false; -+ if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) -+ all_same = false; -+ } -+ -+ /* Constants are best loaded from the constant pool. */ -+ if (n_var == 0) -+ { -+ emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))); -+ return; -+ } -+ -+ /* If all values are identical, broadcast the value. */ -+ if (all_same -+ && ix86_expand_vector_init_duplicate (mmx_ok, mode, target, -+ XVECEXP (vals, 0, 0))) -+ return; -+ -+ /* Values where only one field is non-constant are best loaded from -+ the pool and overwritten via move later. */ -+ if (n_var == 1) -+ { -+ if (all_const_zero -+ && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target, -+ XVECEXP (vals, 0, one_var), -+ one_var)) -+ return; -+ -+ if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var)) -+ return; -+ } -+ -+ ix86_expand_vector_init_general (mmx_ok, mode, target, vals); -+} -+ -+void -+ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt) -+{ -+ machine_mode mode = GET_MODE (target); -+ machine_mode inner_mode = GET_MODE_INNER (mode); -+ machine_mode half_mode; -+ bool use_vec_merge = false; -+ rtx tmp; -+ static rtx (*gen_extract[6][2]) (rtx, rtx) -+ = { -+ { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi }, -+ { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi }, -+ { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si }, -+ { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di }, -+ { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf }, -+ { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df } -+ }; -+ static rtx (*gen_insert[6][2]) (rtx, rtx, rtx) -+ = { -+ { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi }, -+ { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi }, -+ { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si }, -+ { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di }, -+ { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf }, -+ { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df } -+ }; -+ int i, j, n; -+ machine_mode mmode = VOIDmode; -+ rtx (*gen_blendm) (rtx, rtx, rtx, rtx); -+ -+ switch (mode) -+ { -+ case E_V2SFmode: -+ case E_V2SImode: -+ if (mmx_ok) -+ { -+ tmp = gen_reg_rtx (GET_MODE_INNER (mode)); -+ ix86_expand_vector_extract (true, tmp, target, 1 - elt); -+ if (elt == 0) -+ tmp = gen_rtx_VEC_CONCAT (mode, val, tmp); -+ else -+ tmp = gen_rtx_VEC_CONCAT (mode, tmp, val); -+ emit_insn (gen_rtx_SET (target, tmp)); -+ return; -+ } -+ break; -+ -+ case E_V2DImode: -+ use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT; -+ if (use_vec_merge) -+ break; -+ -+ tmp = gen_reg_rtx (GET_MODE_INNER (mode)); -+ ix86_expand_vector_extract (false, tmp, target, 1 - elt); -+ if (elt == 0) -+ tmp = gen_rtx_VEC_CONCAT (mode, val, tmp); -+ else -+ tmp = gen_rtx_VEC_CONCAT (mode, tmp, val); -+ emit_insn (gen_rtx_SET (target, tmp)); -+ return; -+ -+ case E_V2DFmode: -+ { -+ rtx op0, op1; -+ -+ /* For the two element vectors, we implement a VEC_CONCAT with -+ the extraction of the other element. */ -+ -+ tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt))); -+ tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp); -+ -+ if (elt == 0) -+ op0 = val, op1 = tmp; -+ else -+ op0 = tmp, op1 = val; -+ -+ tmp = gen_rtx_VEC_CONCAT (mode, op0, op1); -+ emit_insn (gen_rtx_SET (target, tmp)); -+ } -+ return; -+ -+ case E_V4SFmode: -+ use_vec_merge = TARGET_SSE4_1; -+ if (use_vec_merge) -+ break; -+ -+ switch (elt) -+ { -+ case 0: -+ use_vec_merge = true; -+ break; -+ -+ case 1: -+ /* tmp = target = A B C D */ -+ tmp = copy_to_reg (target); -+ /* target = A A B B */ -+ emit_insn (gen_vec_interleave_lowv4sf (target, target, target)); -+ /* target = X A B B */ -+ ix86_expand_vector_set (false, target, val, 0); -+ /* target = A X C D */ -+ emit_insn (gen_sse_shufps_v4sf (target, target, tmp, -+ const1_rtx, const0_rtx, -+ GEN_INT (2+4), GEN_INT (3+4))); -+ return; -+ -+ case 2: -+ /* tmp = target = A B C D */ -+ tmp = copy_to_reg (target); -+ /* tmp = X B C D */ -+ ix86_expand_vector_set (false, tmp, val, 0); -+ /* target = A B X D */ -+ emit_insn (gen_sse_shufps_v4sf (target, target, tmp, -+ const0_rtx, const1_rtx, -+ GEN_INT (0+4), GEN_INT (3+4))); -+ return; -+ -+ case 3: -+ /* tmp = target = A B C D */ -+ tmp = copy_to_reg (target); -+ /* tmp = X B C D */ -+ ix86_expand_vector_set (false, tmp, val, 0); -+ /* target = A B X D */ -+ emit_insn (gen_sse_shufps_v4sf (target, target, tmp, -+ const0_rtx, const1_rtx, -+ GEN_INT (2+4), GEN_INT (0+4))); -+ return; -+ -+ default: -+ gcc_unreachable (); -+ } -+ break; -+ -+ case E_V4SImode: -+ use_vec_merge = TARGET_SSE4_1; -+ if (use_vec_merge) -+ break; -+ -+ /* Element 0 handled by vec_merge below. */ -+ if (elt == 0) -+ { -+ use_vec_merge = true; -+ break; -+ } -+ -+ if (TARGET_SSE2) -+ { -+ /* With SSE2, use integer shuffles to swap element 0 and ELT, -+ store into element 0, then shuffle them back. */ -+ -+ rtx order[4]; -+ -+ order[0] = GEN_INT (elt); -+ order[1] = const1_rtx; -+ order[2] = const2_rtx; -+ order[3] = GEN_INT (3); -+ order[elt] = const0_rtx; -+ -+ emit_insn (gen_sse2_pshufd_1 (target, target, order[0], -+ order[1], order[2], order[3])); -+ -+ ix86_expand_vector_set (false, target, val, 0); -+ -+ emit_insn (gen_sse2_pshufd_1 (target, target, order[0], -+ order[1], order[2], order[3])); -+ } -+ else -+ { -+ /* For SSE1, we have to reuse the V4SF code. */ -+ rtx t = gen_reg_rtx (V4SFmode); -+ emit_move_insn (t, gen_lowpart (V4SFmode, target)); -+ ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt); -+ emit_move_insn (target, gen_lowpart (mode, t)); -+ } -+ return; -+ -+ case E_V8HImode: -+ use_vec_merge = TARGET_SSE2; -+ break; -+ case E_V4HImode: -+ use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A); -+ break; -+ -+ case E_V16QImode: -+ use_vec_merge = TARGET_SSE4_1; -+ break; -+ -+ case E_V8QImode: -+ break; -+ -+ case E_V32QImode: -+ half_mode = V16QImode; -+ j = 0; -+ n = 16; -+ goto half; -+ -+ case E_V16HImode: -+ half_mode = V8HImode; -+ j = 1; -+ n = 8; -+ goto half; -+ -+ case E_V8SImode: -+ half_mode = V4SImode; -+ j = 2; -+ n = 4; -+ goto half; -+ -+ case E_V4DImode: -+ half_mode = V2DImode; -+ j = 3; -+ n = 2; -+ goto half; -+ -+ case E_V8SFmode: -+ half_mode = V4SFmode; -+ j = 4; -+ n = 4; -+ goto half; -+ -+ case E_V4DFmode: -+ half_mode = V2DFmode; -+ j = 5; -+ n = 2; -+ goto half; -+ -+half: -+ /* Compute offset. */ -+ i = elt / n; -+ elt %= n; -+ -+ gcc_assert (i <= 1); -+ -+ /* Extract the half. */ -+ tmp = gen_reg_rtx (half_mode); -+ emit_insn (gen_extract[j][i] (tmp, target)); -+ -+ /* Put val in tmp at elt. */ -+ ix86_expand_vector_set (false, tmp, val, elt); -+ -+ /* Put it back. */ -+ emit_insn (gen_insert[j][i] (target, target, tmp)); -+ return; -+ -+ case E_V8DFmode: -+ if (TARGET_AVX512F) -+ { -+ mmode = QImode; -+ gen_blendm = gen_avx512f_blendmv8df; -+ } -+ break; -+ -+ case E_V8DImode: -+ if (TARGET_AVX512F) -+ { -+ mmode = QImode; -+ gen_blendm = gen_avx512f_blendmv8di; -+ } -+ break; -+ -+ case E_V16SFmode: -+ if (TARGET_AVX512F) -+ { -+ mmode = HImode; -+ gen_blendm = gen_avx512f_blendmv16sf; -+ } -+ break; -+ -+ case E_V16SImode: -+ if (TARGET_AVX512F) -+ { -+ mmode = HImode; -+ gen_blendm = gen_avx512f_blendmv16si; -+ } -+ break; -+ -+ case E_V32HImode: -+ if (TARGET_AVX512BW) -+ { -+ mmode = SImode; -+ gen_blendm = gen_avx512bw_blendmv32hi; -+ } -+ else if (TARGET_AVX512F) -+ { -+ half_mode = E_V8HImode; -+ n = 8; -+ goto quarter; -+ } -+ break; -+ -+ case E_V64QImode: -+ if (TARGET_AVX512BW) -+ { -+ mmode = DImode; -+ gen_blendm = gen_avx512bw_blendmv64qi; -+ } -+ else if (TARGET_AVX512F) -+ { -+ half_mode = E_V16QImode; -+ n = 16; -+ goto quarter; -+ } -+ break; -+ -+quarter: -+ /* Compute offset. */ -+ i = elt / n; -+ elt %= n; -+ -+ gcc_assert (i <= 3); -+ -+ { -+ /* Extract the quarter. */ -+ tmp = gen_reg_rtx (V4SImode); -+ rtx tmp2 = gen_lowpart (V16SImode, target); -+ rtx mask = gen_reg_rtx (QImode); -+ -+ emit_move_insn (mask, constm1_rtx); -+ emit_insn (gen_avx512f_vextracti32x4_mask (tmp, tmp2, GEN_INT (i), -+ tmp, mask)); -+ -+ tmp2 = gen_reg_rtx (half_mode); -+ emit_move_insn (tmp2, gen_lowpart (half_mode, tmp)); -+ tmp = tmp2; -+ -+ /* Put val in tmp at elt. */ -+ ix86_expand_vector_set (false, tmp, val, elt); -+ -+ /* Put it back. */ -+ tmp2 = gen_reg_rtx (V16SImode); -+ rtx tmp3 = gen_lowpart (V16SImode, target); -+ mask = gen_reg_rtx (HImode); -+ emit_move_insn (mask, constm1_rtx); -+ tmp = gen_lowpart (V4SImode, tmp); -+ emit_insn (gen_avx512f_vinserti32x4_mask (tmp2, tmp3, tmp, GEN_INT (i), -+ tmp3, mask)); -+ emit_move_insn (target, gen_lowpart (mode, tmp2)); -+ } -+ return; -+ -+ default: -+ break; -+ } -+ -+ if (mmode != VOIDmode) -+ { -+ tmp = gen_reg_rtx (mode); -+ emit_insn (gen_rtx_SET (tmp, gen_rtx_VEC_DUPLICATE (mode, val))); -+ /* The avx512*_blendm expanders have different operand order -+ from VEC_MERGE. In VEC_MERGE, the first input operand is used for -+ elements where the mask is set and second input operand otherwise, -+ in {sse,avx}*_*blend* the first input operand is used for elements -+ where the mask is clear and second input operand otherwise. */ -+ emit_insn (gen_blendm (target, target, tmp, -+ force_reg (mmode, -+ gen_int_mode (HOST_WIDE_INT_1U << elt, -+ mmode)))); -+ } -+ else if (use_vec_merge) -+ { -+ tmp = gen_rtx_VEC_DUPLICATE (mode, val); -+ tmp = gen_rtx_VEC_MERGE (mode, tmp, target, -+ GEN_INT (HOST_WIDE_INT_1U << elt)); -+ emit_insn (gen_rtx_SET (target, tmp)); -+ } -+ else -+ { -+ rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode)); -+ -+ emit_move_insn (mem, target); -+ -+ tmp = adjust_address (mem, inner_mode, elt * GET_MODE_SIZE (inner_mode)); -+ emit_move_insn (tmp, val); -+ -+ emit_move_insn (target, mem); -+ } -+} -+ -+void -+ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt) -+{ -+ machine_mode mode = GET_MODE (vec); -+ machine_mode inner_mode = GET_MODE_INNER (mode); -+ bool use_vec_extr = false; -+ rtx tmp; -+ -+ switch (mode) -+ { -+ case E_V2SImode: -+ case E_V2SFmode: -+ if (!mmx_ok) -+ break; -+ /* FALLTHRU */ -+ -+ case E_V2DFmode: -+ case E_V2DImode: -+ case E_V2TImode: -+ case E_V4TImode: -+ use_vec_extr = true; -+ break; -+ -+ case E_V4SFmode: -+ use_vec_extr = TARGET_SSE4_1; -+ if (use_vec_extr) -+ break; -+ -+ switch (elt) -+ { -+ case 0: -+ tmp = vec; -+ break; -+ -+ case 1: -+ case 3: -+ tmp = gen_reg_rtx (mode); -+ emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec, -+ GEN_INT (elt), GEN_INT (elt), -+ GEN_INT (elt+4), GEN_INT (elt+4))); -+ break; -+ -+ case 2: -+ tmp = gen_reg_rtx (mode); -+ emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec)); -+ break; -+ -+ default: -+ gcc_unreachable (); -+ } -+ vec = tmp; -+ use_vec_extr = true; -+ elt = 0; -+ break; -+ -+ case E_V4SImode: -+ use_vec_extr = TARGET_SSE4_1; -+ if (use_vec_extr) -+ break; -+ -+ if (TARGET_SSE2) -+ { -+ switch (elt) -+ { -+ case 0: -+ tmp = vec; -+ break; -+ -+ case 1: -+ case 3: -+ tmp = gen_reg_rtx (mode); -+ emit_insn (gen_sse2_pshufd_1 (tmp, vec, -+ GEN_INT (elt), GEN_INT (elt), -+ GEN_INT (elt), GEN_INT (elt))); -+ break; -+ -+ case 2: -+ tmp = gen_reg_rtx (mode); -+ emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec)); -+ break; -+ -+ default: -+ gcc_unreachable (); -+ } -+ vec = tmp; -+ use_vec_extr = true; -+ elt = 0; -+ } -+ else -+ { -+ /* For SSE1, we have to reuse the V4SF code. */ -+ ix86_expand_vector_extract (false, gen_lowpart (SFmode, target), -+ gen_lowpart (V4SFmode, vec), elt); -+ return; -+ } -+ break; -+ -+ case E_V8HImode: -+ use_vec_extr = TARGET_SSE2; -+ break; -+ case E_V4HImode: -+ use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A); -+ break; -+ -+ case E_V16QImode: -+ use_vec_extr = TARGET_SSE4_1; -+ break; -+ -+ case E_V8SFmode: -+ if (TARGET_AVX) -+ { -+ tmp = gen_reg_rtx (V4SFmode); -+ if (elt < 4) -+ emit_insn (gen_vec_extract_lo_v8sf (tmp, vec)); -+ else -+ emit_insn (gen_vec_extract_hi_v8sf (tmp, vec)); -+ ix86_expand_vector_extract (false, target, tmp, elt & 3); -+ return; -+ } -+ break; -+ -+ case E_V4DFmode: -+ if (TARGET_AVX) -+ { -+ tmp = gen_reg_rtx (V2DFmode); -+ if (elt < 2) -+ emit_insn (gen_vec_extract_lo_v4df (tmp, vec)); -+ else -+ emit_insn (gen_vec_extract_hi_v4df (tmp, vec)); -+ ix86_expand_vector_extract (false, target, tmp, elt & 1); -+ return; -+ } -+ break; -+ -+ case E_V32QImode: -+ if (TARGET_AVX) -+ { -+ tmp = gen_reg_rtx (V16QImode); -+ if (elt < 16) -+ emit_insn (gen_vec_extract_lo_v32qi (tmp, vec)); -+ else -+ emit_insn (gen_vec_extract_hi_v32qi (tmp, vec)); -+ ix86_expand_vector_extract (false, target, tmp, elt & 15); -+ return; -+ } -+ break; -+ -+ case E_V16HImode: -+ if (TARGET_AVX) -+ { -+ tmp = gen_reg_rtx (V8HImode); -+ if (elt < 8) -+ emit_insn (gen_vec_extract_lo_v16hi (tmp, vec)); -+ else -+ emit_insn (gen_vec_extract_hi_v16hi (tmp, vec)); -+ ix86_expand_vector_extract (false, target, tmp, elt & 7); -+ return; -+ } -+ break; -+ -+ case E_V8SImode: -+ if (TARGET_AVX) -+ { -+ tmp = gen_reg_rtx (V4SImode); -+ if (elt < 4) -+ emit_insn (gen_vec_extract_lo_v8si (tmp, vec)); -+ else -+ emit_insn (gen_vec_extract_hi_v8si (tmp, vec)); -+ ix86_expand_vector_extract (false, target, tmp, elt & 3); -+ return; -+ } -+ break; -+ -+ case E_V4DImode: -+ if (TARGET_AVX) -+ { -+ tmp = gen_reg_rtx (V2DImode); -+ if (elt < 2) -+ emit_insn (gen_vec_extract_lo_v4di (tmp, vec)); -+ else -+ emit_insn (gen_vec_extract_hi_v4di (tmp, vec)); -+ ix86_expand_vector_extract (false, target, tmp, elt & 1); -+ return; -+ } -+ break; -+ -+ case E_V32HImode: -+ if (TARGET_AVX512BW) -+ { -+ tmp = gen_reg_rtx (V16HImode); -+ if (elt < 16) -+ emit_insn (gen_vec_extract_lo_v32hi (tmp, vec)); -+ else -+ emit_insn (gen_vec_extract_hi_v32hi (tmp, vec)); -+ ix86_expand_vector_extract (false, target, tmp, elt & 15); -+ return; -+ } -+ break; -+ -+ case E_V64QImode: -+ if (TARGET_AVX512BW) -+ { -+ tmp = gen_reg_rtx (V32QImode); -+ if (elt < 32) -+ emit_insn (gen_vec_extract_lo_v64qi (tmp, vec)); -+ else -+ emit_insn (gen_vec_extract_hi_v64qi (tmp, vec)); -+ ix86_expand_vector_extract (false, target, tmp, elt & 31); -+ return; -+ } -+ break; -+ -+ case E_V16SFmode: -+ tmp = gen_reg_rtx (V8SFmode); -+ if (elt < 8) -+ emit_insn (gen_vec_extract_lo_v16sf (tmp, vec)); -+ else -+ emit_insn (gen_vec_extract_hi_v16sf (tmp, vec)); -+ ix86_expand_vector_extract (false, target, tmp, elt & 7); -+ return; -+ -+ case E_V8DFmode: -+ tmp = gen_reg_rtx (V4DFmode); -+ if (elt < 4) -+ emit_insn (gen_vec_extract_lo_v8df (tmp, vec)); -+ else -+ emit_insn (gen_vec_extract_hi_v8df (tmp, vec)); -+ ix86_expand_vector_extract (false, target, tmp, elt & 3); -+ return; -+ -+ case E_V16SImode: -+ tmp = gen_reg_rtx (V8SImode); -+ if (elt < 8) -+ emit_insn (gen_vec_extract_lo_v16si (tmp, vec)); -+ else -+ emit_insn (gen_vec_extract_hi_v16si (tmp, vec)); -+ ix86_expand_vector_extract (false, target, tmp, elt & 7); -+ return; -+ -+ case E_V8DImode: -+ tmp = gen_reg_rtx (V4DImode); -+ if (elt < 4) -+ emit_insn (gen_vec_extract_lo_v8di (tmp, vec)); -+ else -+ emit_insn (gen_vec_extract_hi_v8di (tmp, vec)); -+ ix86_expand_vector_extract (false, target, tmp, elt & 3); -+ return; -+ -+ case E_V8QImode: -+ /* ??? Could extract the appropriate HImode element and shift. */ -+ default: -+ break; -+ } -+ -+ if (use_vec_extr) -+ { -+ tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt))); -+ tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp); -+ -+ /* Let the rtl optimizers know about the zero extension performed. */ -+ if (inner_mode == QImode || inner_mode == HImode) -+ { -+ tmp = gen_rtx_ZERO_EXTEND (SImode, tmp); -+ target = gen_lowpart (SImode, target); -+ } -+ -+ emit_insn (gen_rtx_SET (target, tmp)); -+ } -+ else -+ { -+ rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode)); -+ -+ emit_move_insn (mem, vec); -+ -+ tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode)); -+ emit_move_insn (target, tmp); -+ } -+} -+ -+/* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC -+ to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode. -+ The upper bits of DEST are undefined, though they shouldn't cause -+ exceptions (some bits from src or all zeros are ok). */ -+ -+static void -+emit_reduc_half (rtx dest, rtx src, int i) -+{ -+ rtx tem, d = dest; -+ switch (GET_MODE (src)) -+ { -+ case E_V4SFmode: -+ if (i == 128) -+ tem = gen_sse_movhlps (dest, src, src); -+ else -+ tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx, -+ GEN_INT (1 + 4), GEN_INT (1 + 4)); -+ break; -+ case E_V2DFmode: -+ tem = gen_vec_interleave_highv2df (dest, src, src); -+ break; -+ case E_V16QImode: -+ case E_V8HImode: -+ case E_V4SImode: -+ case E_V2DImode: -+ d = gen_reg_rtx (V1TImode); -+ tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src), -+ GEN_INT (i / 2)); -+ break; -+ case E_V8SFmode: -+ if (i == 256) -+ tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx); -+ else -+ tem = gen_avx_shufps256 (dest, src, src, -+ GEN_INT (i == 128 ? 2 + (3 << 2) : 1)); -+ break; -+ case E_V4DFmode: -+ if (i == 256) -+ tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx); -+ else -+ tem = gen_avx_shufpd256 (dest, src, src, const1_rtx); -+ break; -+ case E_V32QImode: -+ case E_V16HImode: -+ case E_V8SImode: -+ case E_V4DImode: -+ if (i == 256) -+ { -+ if (GET_MODE (dest) != V4DImode) -+ d = gen_reg_rtx (V4DImode); -+ tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src), -+ gen_lowpart (V4DImode, src), -+ const1_rtx); -+ } -+ else -+ { -+ d = gen_reg_rtx (V2TImode); -+ tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src), -+ GEN_INT (i / 2)); -+ } -+ break; -+ case E_V64QImode: -+ case E_V32HImode: -+ case E_V16SImode: -+ case E_V16SFmode: -+ case E_V8DImode: -+ case E_V8DFmode: -+ if (i > 128) -+ tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest), -+ gen_lowpart (V16SImode, src), -+ gen_lowpart (V16SImode, src), -+ GEN_INT (0x4 + (i == 512 ? 4 : 0)), -+ GEN_INT (0x5 + (i == 512 ? 4 : 0)), -+ GEN_INT (0x6 + (i == 512 ? 4 : 0)), -+ GEN_INT (0x7 + (i == 512 ? 4 : 0)), -+ GEN_INT (0xC), GEN_INT (0xD), -+ GEN_INT (0xE), GEN_INT (0xF), -+ GEN_INT (0x10), GEN_INT (0x11), -+ GEN_INT (0x12), GEN_INT (0x13), -+ GEN_INT (0x14), GEN_INT (0x15), -+ GEN_INT (0x16), GEN_INT (0x17)); -+ else -+ tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest), -+ gen_lowpart (V16SImode, src), -+ GEN_INT (i == 128 ? 0x2 : 0x1), -+ GEN_INT (0x3), -+ GEN_INT (0x3), -+ GEN_INT (0x3), -+ GEN_INT (i == 128 ? 0x6 : 0x5), -+ GEN_INT (0x7), -+ GEN_INT (0x7), -+ GEN_INT (0x7), -+ GEN_INT (i == 128 ? 0xA : 0x9), -+ GEN_INT (0xB), -+ GEN_INT (0xB), -+ GEN_INT (0xB), -+ GEN_INT (i == 128 ? 0xE : 0xD), -+ GEN_INT (0xF), -+ GEN_INT (0xF), -+ GEN_INT (0xF)); -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ emit_insn (tem); -+ if (d != dest) -+ emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d)); -+} -+ -+/* Expand a vector reduction. FN is the binary pattern to reduce; -+ DEST is the destination; IN is the input vector. */ -+ -+void -+ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in) -+{ -+ rtx half, dst, vec = in; -+ machine_mode mode = GET_MODE (in); -+ int i; -+ -+ /* SSE4 has a special instruction for V8HImode UMIN reduction. */ -+ if (TARGET_SSE4_1 -+ && mode == V8HImode -+ && fn == gen_uminv8hi3) -+ { -+ emit_insn (gen_sse4_1_phminposuw (dest, in)); -+ return; -+ } -+ -+ for (i = GET_MODE_BITSIZE (mode); -+ i > GET_MODE_UNIT_BITSIZE (mode); -+ i >>= 1) -+ { -+ half = gen_reg_rtx (mode); -+ emit_reduc_half (half, vec, i); -+ if (i == GET_MODE_UNIT_BITSIZE (mode) * 2) -+ dst = dest; -+ else -+ dst = gen_reg_rtx (mode); -+ emit_insn (fn (dst, half, vec)); -+ vec = dst; -+ } -+} -+ -+/* Output code to perform a conditional jump to LABEL, if C2 flag in -+ FP status register is set. */ -+ -+void -+ix86_emit_fp_unordered_jump (rtx label) -+{ -+ rtx reg = gen_reg_rtx (HImode); -+ rtx_insn *insn; -+ rtx temp; -+ -+ emit_insn (gen_x86_fnstsw_1 (reg)); -+ -+ if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())) -+ { -+ emit_insn (gen_x86_sahf_1 (reg)); -+ -+ temp = gen_rtx_REG (CCmode, FLAGS_REG); -+ temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx); -+ } -+ else -+ { -+ emit_insn (gen_testqi_ext_1_ccno (reg, GEN_INT (0x04))); -+ -+ temp = gen_rtx_REG (CCNOmode, FLAGS_REG); -+ temp = gen_rtx_NE (VOIDmode, temp, const0_rtx); -+ } -+ -+ temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp, -+ gen_rtx_LABEL_REF (VOIDmode, label), -+ pc_rtx); -+ insn = emit_jump_insn (gen_rtx_SET (pc_rtx, temp)); -+ predict_jump (REG_BR_PROB_BASE * 10 / 100); -+ JUMP_LABEL (insn) = label; -+} -+ -+/* Output code to perform an sinh XFmode calculation. */ -+ -+void ix86_emit_i387_sinh (rtx op0, rtx op1) -+{ -+ rtx e1 = gen_reg_rtx (XFmode); -+ rtx e2 = gen_reg_rtx (XFmode); -+ rtx scratch = gen_reg_rtx (HImode); -+ rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG); -+ rtx half = const_double_from_real_value (dconsthalf, XFmode); -+ rtx cst1, tmp; -+ rtx_code_label *jump_label = gen_label_rtx (); -+ rtx_insn *insn; -+ -+ /* scratch = fxam (op1) */ -+ emit_insn (gen_fxamxf2_i387 (scratch, op1)); -+ -+ /* e1 = expm1 (|op1|) */ -+ emit_insn (gen_absxf2 (e2, op1)); -+ emit_insn (gen_expm1xf2 (e1, e2)); -+ -+ /* e2 = e1 / (e1 + 1.0) + e1 */ -+ cst1 = force_reg (XFmode, CONST1_RTX (XFmode)); -+ emit_insn (gen_addxf3 (e2, e1, cst1)); -+ emit_insn (gen_divxf3 (e2, e1, e2)); -+ emit_insn (gen_addxf3 (e2, e2, e1)); -+ -+ /* flags = signbit (op1) */ -+ emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x02))); -+ -+ /* if (flags) then e2 = -e2 */ -+ tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, -+ gen_rtx_EQ (VOIDmode, flags, const0_rtx), -+ gen_rtx_LABEL_REF (VOIDmode, jump_label), -+ pc_rtx); -+ insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); -+ predict_jump (REG_BR_PROB_BASE * 50 / 100); -+ JUMP_LABEL (insn) = jump_label; -+ -+ emit_insn (gen_negxf2 (e2, e2)); -+ -+ emit_label (jump_label); -+ LABEL_NUSES (jump_label) = 1; -+ -+ /* op0 = 0.5 * e2 */ -+ half = force_reg (XFmode, half); -+ emit_insn (gen_mulxf3 (op0, e2, half)); -+} -+ -+/* Output code to perform an cosh XFmode calculation. */ -+ -+void ix86_emit_i387_cosh (rtx op0, rtx op1) -+{ -+ rtx e1 = gen_reg_rtx (XFmode); -+ rtx e2 = gen_reg_rtx (XFmode); -+ rtx half = const_double_from_real_value (dconsthalf, XFmode); -+ rtx cst1; -+ -+ /* e1 = exp (op1) */ -+ emit_insn (gen_expxf2 (e1, op1)); -+ -+ /* e2 = e1 + 1.0 / e1 */ -+ cst1 = force_reg (XFmode, CONST1_RTX (XFmode)); -+ emit_insn (gen_divxf3 (e2, cst1, e1)); -+ emit_insn (gen_addxf3 (e2, e1, e2)); -+ -+ /* op0 = 0.5 * e2 */ -+ half = force_reg (XFmode, half); -+ emit_insn (gen_mulxf3 (op0, e2, half)); -+} -+ -+/* Output code to perform an tanh XFmode calculation. */ -+ -+void ix86_emit_i387_tanh (rtx op0, rtx op1) -+{ -+ rtx e1 = gen_reg_rtx (XFmode); -+ rtx e2 = gen_reg_rtx (XFmode); -+ rtx scratch = gen_reg_rtx (HImode); -+ rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG); -+ rtx cst2, tmp; -+ rtx_code_label *jump_label = gen_label_rtx (); -+ rtx_insn *insn; -+ -+ /* scratch = fxam (op1) */ -+ emit_insn (gen_fxamxf2_i387 (scratch, op1)); -+ -+ /* e1 = expm1 (-|2 * op1|) */ -+ emit_insn (gen_addxf3 (e2, op1, op1)); -+ emit_insn (gen_absxf2 (e2, e2)); -+ emit_insn (gen_negxf2 (e2, e2)); -+ emit_insn (gen_expm1xf2 (e1, e2)); -+ -+ /* e2 = e1 / (e1 + 2.0) */ -+ cst2 = force_reg (XFmode, CONST2_RTX (XFmode)); -+ emit_insn (gen_addxf3 (e2, e1, cst2)); -+ emit_insn (gen_divxf3 (e2, e1, e2)); -+ -+ /* flags = signbit (op1) */ -+ emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x02))); -+ -+ /* if (!flags) then e2 = -e2 */ -+ tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, -+ gen_rtx_NE (VOIDmode, flags, const0_rtx), -+ gen_rtx_LABEL_REF (VOIDmode, jump_label), -+ pc_rtx); -+ insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); -+ predict_jump (REG_BR_PROB_BASE * 50 / 100); -+ JUMP_LABEL (insn) = jump_label; -+ -+ emit_insn (gen_negxf2 (e2, e2)); -+ -+ emit_label (jump_label); -+ LABEL_NUSES (jump_label) = 1; -+ -+ emit_move_insn (op0, e2); -+} -+ -+/* Output code to perform an asinh XFmode calculation. */ -+ -+void ix86_emit_i387_asinh (rtx op0, rtx op1) -+{ -+ rtx e1 = gen_reg_rtx (XFmode); -+ rtx e2 = gen_reg_rtx (XFmode); -+ rtx scratch = gen_reg_rtx (HImode); -+ rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG); -+ rtx cst1, tmp; -+ rtx_code_label *jump_label = gen_label_rtx (); -+ rtx_insn *insn; -+ -+ /* e2 = sqrt (op1^2 + 1.0) + 1.0 */ -+ emit_insn (gen_mulxf3 (e1, op1, op1)); -+ cst1 = force_reg (XFmode, CONST1_RTX (XFmode)); -+ emit_insn (gen_addxf3 (e2, e1, cst1)); -+ emit_insn (gen_sqrtxf2 (e2, e2)); -+ emit_insn (gen_addxf3 (e2, e2, cst1)); -+ -+ /* e1 = e1 / e2 */ -+ emit_insn (gen_divxf3 (e1, e1, e2)); -+ -+ /* scratch = fxam (op1) */ -+ emit_insn (gen_fxamxf2_i387 (scratch, op1)); -+ -+ /* e1 = e1 + |op1| */ -+ emit_insn (gen_absxf2 (e2, op1)); -+ emit_insn (gen_addxf3 (e1, e1, e2)); -+ -+ /* e2 = log1p (e1) */ -+ ix86_emit_i387_log1p (e2, e1); -+ -+ /* flags = signbit (op1) */ -+ emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x02))); -+ -+ /* if (flags) then e2 = -e2 */ -+ tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, -+ gen_rtx_EQ (VOIDmode, flags, const0_rtx), -+ gen_rtx_LABEL_REF (VOIDmode, jump_label), -+ pc_rtx); -+ insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); -+ predict_jump (REG_BR_PROB_BASE * 50 / 100); -+ JUMP_LABEL (insn) = jump_label; -+ -+ emit_insn (gen_negxf2 (e2, e2)); -+ -+ emit_label (jump_label); -+ LABEL_NUSES (jump_label) = 1; -+ -+ emit_move_insn (op0, e2); -+} -+ -+/* Output code to perform an acosh XFmode calculation. */ -+ -+void ix86_emit_i387_acosh (rtx op0, rtx op1) -+{ -+ rtx e1 = gen_reg_rtx (XFmode); -+ rtx e2 = gen_reg_rtx (XFmode); -+ rtx cst1 = force_reg (XFmode, CONST1_RTX (XFmode)); -+ -+ /* e2 = sqrt (op1 + 1.0) */ -+ emit_insn (gen_addxf3 (e2, op1, cst1)); -+ emit_insn (gen_sqrtxf2 (e2, e2)); -+ -+ /* e1 = sqrt (op1 - 1.0) */ -+ emit_insn (gen_subxf3 (e1, op1, cst1)); -+ emit_insn (gen_sqrtxf2 (e1, e1)); -+ -+ /* e1 = e1 * e2 */ -+ emit_insn (gen_mulxf3 (e1, e1, e2)); -+ -+ /* e1 = e1 + op1 */ -+ emit_insn (gen_addxf3 (e1, e1, op1)); -+ -+ /* op0 = log (e1) */ -+ emit_insn (gen_logxf2 (op0, e1)); -+} -+ -+/* Output code to perform an atanh XFmode calculation. */ -+ -+void ix86_emit_i387_atanh (rtx op0, rtx op1) -+{ -+ rtx e1 = gen_reg_rtx (XFmode); -+ rtx e2 = gen_reg_rtx (XFmode); -+ rtx scratch = gen_reg_rtx (HImode); -+ rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG); -+ rtx half = const_double_from_real_value (dconsthalf, XFmode); -+ rtx cst1, tmp; -+ rtx_code_label *jump_label = gen_label_rtx (); -+ rtx_insn *insn; -+ -+ /* scratch = fxam (op1) */ -+ emit_insn (gen_fxamxf2_i387 (scratch, op1)); -+ -+ /* e2 = |op1| */ -+ emit_insn (gen_absxf2 (e2, op1)); -+ -+ /* e1 = -(e2 + e2) / (e2 + 1.0) */ -+ cst1 = force_reg (XFmode, CONST1_RTX (XFmode)); -+ emit_insn (gen_addxf3 (e1, e2, cst1)); -+ emit_insn (gen_addxf3 (e2, e2, e2)); -+ emit_insn (gen_negxf2 (e2, e2)); -+ emit_insn (gen_divxf3 (e1, e2, e1)); -+ -+ /* e2 = log1p (e1) */ -+ ix86_emit_i387_log1p (e2, e1); -+ -+ /* flags = signbit (op1) */ -+ emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x02))); -+ -+ /* if (!flags) then e2 = -e2 */ -+ tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, -+ gen_rtx_NE (VOIDmode, flags, const0_rtx), -+ gen_rtx_LABEL_REF (VOIDmode, jump_label), -+ pc_rtx); -+ insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); -+ predict_jump (REG_BR_PROB_BASE * 50 / 100); -+ JUMP_LABEL (insn) = jump_label; -+ -+ emit_insn (gen_negxf2 (e2, e2)); -+ -+ emit_label (jump_label); -+ LABEL_NUSES (jump_label) = 1; -+ -+ /* op0 = 0.5 * e2 */ -+ half = force_reg (XFmode, half); -+ emit_insn (gen_mulxf3 (op0, e2, half)); -+} -+ -+/* Output code to perform a log1p XFmode calculation. */ -+ -+void ix86_emit_i387_log1p (rtx op0, rtx op1) -+{ -+ rtx_code_label *label1 = gen_label_rtx (); -+ rtx_code_label *label2 = gen_label_rtx (); -+ -+ rtx tmp = gen_reg_rtx (XFmode); -+ rtx res = gen_reg_rtx (XFmode); -+ rtx cst, cstln2, cst1; -+ rtx_insn *insn; -+ -+ cst = const_double_from_real_value -+ (REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode), XFmode); -+ cstln2 = force_reg (XFmode, standard_80387_constant_rtx (4)); /* fldln2 */ -+ -+ emit_insn (gen_absxf2 (tmp, op1)); -+ -+ cst = force_reg (XFmode, cst); -+ ix86_expand_branch (GE, tmp, cst, label1); -+ predict_jump (REG_BR_PROB_BASE * 10 / 100); -+ insn = get_last_insn (); -+ JUMP_LABEL (insn) = label1; -+ -+ emit_insn (gen_fyl2xp1xf3_i387 (res, op1, cstln2)); -+ emit_jump (label2); -+ -+ emit_label (label1); -+ LABEL_NUSES (label1) = 1; -+ -+ cst1 = force_reg (XFmode, CONST1_RTX (XFmode)); -+ emit_insn (gen_rtx_SET (tmp, gen_rtx_PLUS (XFmode, op1, cst1))); -+ emit_insn (gen_fyl2xxf3_i387 (res, tmp, cstln2)); -+ -+ emit_label (label2); -+ LABEL_NUSES (label2) = 1; -+ -+ emit_move_insn (op0, res); -+} -+ -+/* Emit code for round calculation. */ -+void ix86_emit_i387_round (rtx op0, rtx op1) -+{ -+ machine_mode inmode = GET_MODE (op1); -+ machine_mode outmode = GET_MODE (op0); -+ rtx e1 = gen_reg_rtx (XFmode); -+ rtx e2 = gen_reg_rtx (XFmode); -+ rtx scratch = gen_reg_rtx (HImode); -+ rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG); -+ rtx half = const_double_from_real_value (dconsthalf, XFmode); -+ rtx res = gen_reg_rtx (outmode); -+ rtx_code_label *jump_label = gen_label_rtx (); -+ rtx (*floor_insn) (rtx, rtx); -+ rtx (*neg_insn) (rtx, rtx); -+ rtx_insn *insn; -+ rtx tmp; -+ -+ switch (inmode) -+ { -+ case E_SFmode: -+ case E_DFmode: -+ tmp = gen_reg_rtx (XFmode); -+ -+ emit_insn (gen_rtx_SET (tmp, gen_rtx_FLOAT_EXTEND (XFmode, op1))); -+ op1 = tmp; -+ break; -+ case E_XFmode: -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ -+ switch (outmode) -+ { -+ case E_SFmode: -+ floor_insn = gen_frndintxf2_floor; -+ neg_insn = gen_negsf2; -+ break; -+ case E_DFmode: -+ floor_insn = gen_frndintxf2_floor; -+ neg_insn = gen_negdf2; -+ break; -+ case E_XFmode: -+ floor_insn = gen_frndintxf2_floor; -+ neg_insn = gen_negxf2; -+ break; -+ case E_HImode: -+ floor_insn = gen_lfloorxfhi2; -+ neg_insn = gen_neghi2; -+ break; -+ case E_SImode: -+ floor_insn = gen_lfloorxfsi2; -+ neg_insn = gen_negsi2; -+ break; -+ case E_DImode: -+ floor_insn = gen_lfloorxfdi2; -+ neg_insn = gen_negdi2; -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ -+ /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */ -+ -+ /* scratch = fxam(op1) */ -+ emit_insn (gen_fxamxf2_i387 (scratch, op1)); -+ -+ /* e1 = fabs(op1) */ -+ emit_insn (gen_absxf2 (e1, op1)); -+ -+ /* e2 = e1 + 0.5 */ -+ half = force_reg (XFmode, half); -+ emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (XFmode, e1, half))); -+ -+ /* res = floor(e2) */ -+ switch (outmode) -+ { -+ case E_SFmode: -+ case E_DFmode: -+ { -+ tmp = gen_reg_rtx (XFmode); -+ -+ emit_insn (floor_insn (tmp, e2)); -+ emit_insn (gen_rtx_SET (res, -+ gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp), -+ UNSPEC_TRUNC_NOOP))); -+ } -+ break; -+ default: -+ emit_insn (floor_insn (res, e2)); -+ } -+ -+ /* flags = signbit(a) */ -+ emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x02))); -+ -+ /* if (flags) then res = -res */ -+ tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, -+ gen_rtx_EQ (VOIDmode, flags, const0_rtx), -+ gen_rtx_LABEL_REF (VOIDmode, jump_label), -+ pc_rtx); -+ insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); -+ predict_jump (REG_BR_PROB_BASE * 50 / 100); -+ JUMP_LABEL (insn) = jump_label; -+ -+ emit_insn (neg_insn (res, res)); -+ -+ emit_label (jump_label); -+ LABEL_NUSES (jump_label) = 1; -+ -+ emit_move_insn (op0, res); -+} -+ -+/* Output code to perform a Newton-Rhapson approximation of a single precision -+ floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */ -+ -+void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode) -+{ -+ rtx x0, x1, e0, e1; -+ -+ x0 = gen_reg_rtx (mode); -+ e0 = gen_reg_rtx (mode); -+ e1 = gen_reg_rtx (mode); -+ x1 = gen_reg_rtx (mode); -+ -+ /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */ -+ -+ b = force_reg (mode, b); -+ -+ /* x0 = rcp(b) estimate */ -+ if (mode == V16SFmode || mode == V8DFmode) -+ { -+ if (TARGET_AVX512ER) -+ { -+ emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b), -+ UNSPEC_RCP28))); -+ /* res = a * x0 */ -+ emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x0))); -+ return; -+ } -+ else -+ emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b), -+ UNSPEC_RCP14))); -+ } -+ else -+ emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b), -+ UNSPEC_RCP))); -+ -+ /* e0 = x0 * b */ -+ emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b))); -+ -+ /* e0 = x0 * e0 */ -+ emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0))); -+ -+ /* e1 = x0 + x0 */ -+ emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0))); -+ -+ /* x1 = e1 - e0 */ -+ emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0))); -+ -+ /* res = a * x1 */ -+ emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1))); -+} -+ -+/* Output code to perform a Newton-Rhapson approximation of a -+ single precision floating point [reciprocal] square root. */ -+ -+void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode, bool recip) -+{ -+ rtx x0, e0, e1, e2, e3, mthree, mhalf; -+ REAL_VALUE_TYPE r; -+ int unspec; -+ -+ x0 = gen_reg_rtx (mode); -+ e0 = gen_reg_rtx (mode); -+ e1 = gen_reg_rtx (mode); -+ e2 = gen_reg_rtx (mode); -+ e3 = gen_reg_rtx (mode); -+ -+ if (TARGET_AVX512ER && mode == V16SFmode) -+ { -+ if (recip) -+ /* res = rsqrt28(a) estimate */ -+ emit_insn (gen_rtx_SET (res, gen_rtx_UNSPEC (mode, gen_rtvec (1, a), -+ UNSPEC_RSQRT28))); -+ else -+ { -+ /* x0 = rsqrt28(a) estimate */ -+ emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a), -+ UNSPEC_RSQRT28))); -+ /* res = rcp28(x0) estimate */ -+ emit_insn (gen_rtx_SET (res, gen_rtx_UNSPEC (mode, gen_rtvec (1, x0), -+ UNSPEC_RCP28))); -+ } -+ return; -+ } -+ -+ real_from_integer (&r, VOIDmode, -3, SIGNED); -+ mthree = const_double_from_real_value (r, SFmode); -+ -+ real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL); -+ mhalf = const_double_from_real_value (r, SFmode); -+ unspec = UNSPEC_RSQRT; -+ -+ if (VECTOR_MODE_P (mode)) -+ { -+ mthree = ix86_build_const_vector (mode, true, mthree); -+ mhalf = ix86_build_const_vector (mode, true, mhalf); -+ /* There is no 512-bit rsqrt. There is however rsqrt14. */ -+ if (GET_MODE_SIZE (mode) == 64) -+ unspec = UNSPEC_RSQRT14; -+ } -+ -+ /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) -+ rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */ -+ -+ a = force_reg (mode, a); -+ -+ /* x0 = rsqrt(a) estimate */ -+ emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a), -+ unspec))); -+ -+ /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */ -+ if (!recip) -+ { -+ rtx zero = force_reg (mode, CONST0_RTX(mode)); -+ rtx mask; -+ -+ /* Handle masked compare. */ -+ if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64) -+ { -+ mask = gen_reg_rtx (HImode); -+ /* Imm value 0x4 corresponds to not-equal comparison. */ -+ emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4))); -+ emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask)); -+ } -+ else -+ { -+ mask = gen_reg_rtx (mode); -+ emit_insn (gen_rtx_SET (mask, gen_rtx_NE (mode, zero, a))); -+ emit_insn (gen_rtx_SET (x0, gen_rtx_AND (mode, x0, mask))); -+ } -+ } -+ -+ /* e0 = x0 * a */ -+ emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a))); -+ /* e1 = e0 * x0 */ -+ emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, e0, x0))); -+ -+ /* e2 = e1 - 3. */ -+ mthree = force_reg (mode, mthree); -+ emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (mode, e1, mthree))); -+ -+ mhalf = force_reg (mode, mhalf); -+ if (recip) -+ /* e3 = -.5 * x0 */ -+ emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, x0, mhalf))); -+ else -+ /* e3 = -.5 * e0 */ -+ emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, e0, mhalf))); -+ /* ret = e2 * e3 */ -+ emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e2, e3))); -+} -+ -+/* Expand fabs (OP0) and return a new rtx that holds the result. The -+ mask for masking out the sign-bit is stored in *SMASK, if that is -+ non-null. */ -+ -+static rtx -+ix86_expand_sse_fabs (rtx op0, rtx *smask) -+{ -+ machine_mode vmode, mode = GET_MODE (op0); -+ rtx xa, mask; -+ -+ xa = gen_reg_rtx (mode); -+ if (mode == SFmode) -+ vmode = V4SFmode; -+ else if (mode == DFmode) -+ vmode = V2DFmode; -+ else -+ vmode = mode; -+ mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true); -+ if (!VECTOR_MODE_P (mode)) -+ { -+ /* We need to generate a scalar mode mask in this case. */ -+ rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx)); -+ tmp = gen_rtx_VEC_SELECT (mode, mask, tmp); -+ mask = gen_reg_rtx (mode); -+ emit_insn (gen_rtx_SET (mask, tmp)); -+ } -+ emit_insn (gen_rtx_SET (xa, gen_rtx_AND (mode, op0, mask))); -+ -+ if (smask) -+ *smask = mask; -+ -+ return xa; -+} -+ -+/* Expands a comparison of OP0 with OP1 using comparison code CODE, -+ swapping the operands if SWAP_OPERANDS is true. The expanded -+ code is a forward jump to a newly created label in case the -+ comparison is true. The generated label rtx is returned. */ -+static rtx_code_label * -+ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1, -+ bool swap_operands) -+{ -+ bool unordered_compare = ix86_unordered_fp_compare (code); -+ rtx_code_label *label; -+ rtx tmp, reg; -+ -+ if (swap_operands) -+ std::swap (op0, op1); -+ -+ label = gen_label_rtx (); -+ tmp = gen_rtx_COMPARE (CCFPmode, op0, op1); -+ if (unordered_compare) -+ tmp = gen_rtx_UNSPEC (CCFPmode, gen_rtvec (1, tmp), UNSPEC_NOTRAP); -+ reg = gen_rtx_REG (CCFPmode, FLAGS_REG); -+ emit_insn (gen_rtx_SET (reg, tmp)); -+ tmp = gen_rtx_fmt_ee (code, VOIDmode, reg, const0_rtx); -+ tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, -+ gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx); -+ tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); -+ JUMP_LABEL (tmp) = label; -+ -+ return label; -+} -+ -+/* Expand a mask generating SSE comparison instruction comparing OP0 with OP1 -+ using comparison code CODE. Operands are swapped for the comparison if -+ SWAP_OPERANDS is true. Returns a rtx for the generated mask. */ -+static rtx -+ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1, -+ bool swap_operands) -+{ -+ rtx (*insn)(rtx, rtx, rtx, rtx); -+ machine_mode mode = GET_MODE (op0); -+ rtx mask = gen_reg_rtx (mode); -+ -+ if (swap_operands) -+ std::swap (op0, op1); -+ -+ insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse; -+ -+ emit_insn (insn (mask, op0, op1, -+ gen_rtx_fmt_ee (code, mode, op0, op1))); -+ return mask; -+} -+ -+/* Expand copysign from SIGN to the positive value ABS_VALUE -+ storing in RESULT. If MASK is non-null, it shall be a mask to mask out -+ the sign-bit. */ -+ -+static void -+ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask) -+{ -+ machine_mode mode = GET_MODE (sign); -+ rtx sgn = gen_reg_rtx (mode); -+ if (mask == NULL_RTX) -+ { -+ machine_mode vmode; -+ -+ if (mode == SFmode) -+ vmode = V4SFmode; -+ else if (mode == DFmode) -+ vmode = V2DFmode; -+ else -+ vmode = mode; -+ -+ mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false); -+ if (!VECTOR_MODE_P (mode)) -+ { -+ /* We need to generate a scalar mode mask in this case. */ -+ rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx)); -+ tmp = gen_rtx_VEC_SELECT (mode, mask, tmp); -+ mask = gen_reg_rtx (mode); -+ emit_insn (gen_rtx_SET (mask, tmp)); -+ } -+ } -+ else -+ mask = gen_rtx_NOT (mode, mask); -+ emit_insn (gen_rtx_SET (sgn, gen_rtx_AND (mode, mask, sign))); -+ emit_insn (gen_rtx_SET (result, gen_rtx_IOR (mode, abs_value, sgn))); -+} -+ -+/* Expand SSE sequence for computing lround from OP1 storing -+ into OP0. */ -+ -+void -+ix86_expand_lround (rtx op0, rtx op1) -+{ -+ /* C code for the stuff we're doing below: -+ tmp = op1 + copysign (nextafter (0.5, 0.0), op1) -+ return (long)tmp; -+ */ -+ machine_mode mode = GET_MODE (op1); -+ const struct real_format *fmt; -+ REAL_VALUE_TYPE pred_half, half_minus_pred_half; -+ rtx adj; -+ -+ /* load nextafter (0.5, 0.0) */ -+ fmt = REAL_MODE_FORMAT (mode); -+ real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode); -+ real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half); -+ -+ /* adj = copysign (0.5, op1) */ -+ adj = force_reg (mode, const_double_from_real_value (pred_half, mode)); -+ ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX); -+ -+ /* adj = op1 + adj */ -+ adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT); -+ -+ /* op0 = (imode)adj */ -+ expand_fix (op0, adj, 0); -+} -+ -+/* Expand SSE2 sequence for computing lround from OPERAND1 storing -+ into OPERAND0. */ -+ -+void -+ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor) -+{ -+ /* C code for the stuff we're doing below (for do_floor): -+ xi = (long)op1; -+ xi -= (double)xi > op1 ? 1 : 0; -+ return xi; -+ */ -+ machine_mode fmode = GET_MODE (op1); -+ machine_mode imode = GET_MODE (op0); -+ rtx ireg, freg, tmp; -+ rtx_code_label *label; -+ -+ /* reg = (long)op1 */ -+ ireg = gen_reg_rtx (imode); -+ expand_fix (ireg, op1, 0); -+ -+ /* freg = (double)reg */ -+ freg = gen_reg_rtx (fmode); -+ expand_float (freg, ireg, 0); -+ -+ /* ireg = (freg > op1) ? ireg - 1 : ireg */ -+ label = ix86_expand_sse_compare_and_jump (UNLE, -+ freg, op1, !do_floor); -+ tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS, -+ ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT); -+ emit_move_insn (ireg, tmp); -+ -+ emit_label (label); -+ LABEL_NUSES (label) = 1; -+ -+ emit_move_insn (op0, ireg); -+} -+ -+/* Generate and return a rtx of mode MODE for 2**n where n is the number -+ of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */ -+ -+static rtx -+ix86_gen_TWO52 (machine_mode mode) -+{ -+ REAL_VALUE_TYPE TWO52r; -+ rtx TWO52; -+ -+ real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23); -+ TWO52 = const_double_from_real_value (TWO52r, mode); -+ TWO52 = force_reg (mode, TWO52); -+ -+ return TWO52; -+} -+ -+/* Expand rint rounding OPERAND1 and storing the result in OPERAND0. */ -+ -+void -+ix86_expand_rint (rtx operand0, rtx operand1) -+{ -+ /* C code for the stuff we're doing below: -+ xa = fabs (operand1); -+ if (!isless (xa, 2**52)) -+ return operand1; -+ two52 = 2**52; -+ if (flag_rounding_math) -+ { -+ two52 = copysign (two52, operand1); -+ xa = operand1; -+ } -+ xa = xa + two52 - two52; -+ return copysign (xa, operand1); -+ */ -+ machine_mode mode = GET_MODE (operand0); -+ rtx res, xa, TWO52, two52, mask; -+ rtx_code_label *label; -+ -+ res = gen_reg_rtx (mode); -+ emit_move_insn (res, operand1); -+ -+ /* xa = abs (operand1) */ -+ xa = ix86_expand_sse_fabs (res, &mask); -+ -+ /* if (!isless (xa, TWO52)) goto label; */ -+ TWO52 = ix86_gen_TWO52 (mode); -+ label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); -+ -+ two52 = TWO52; -+ if (flag_rounding_math) -+ { -+ two52 = gen_reg_rtx (mode); -+ ix86_sse_copysign_to_positive (two52, TWO52, res, mask); -+ xa = res; -+ } -+ -+ xa = expand_simple_binop (mode, PLUS, xa, two52, NULL_RTX, 0, OPTAB_DIRECT); -+ xa = expand_simple_binop (mode, MINUS, xa, two52, xa, 0, OPTAB_DIRECT); -+ -+ ix86_sse_copysign_to_positive (res, xa, res, mask); -+ -+ emit_label (label); -+ LABEL_NUSES (label) = 1; -+ -+ emit_move_insn (operand0, res); -+} -+ -+/* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing -+ into OPERAND0. */ -+void -+ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor) -+{ -+ /* C code for the stuff we expand below. -+ double xa = fabs (x), x2; -+ if (!isless (xa, TWO52)) -+ return x; -+ xa = xa + TWO52 - TWO52; -+ x2 = copysign (xa, x); -+ Compensate. Floor: -+ if (x2 > x) -+ x2 -= 1; -+ Compensate. Ceil: -+ if (x2 < x) -+ x2 -= -1; -+ return x2; -+ */ -+ machine_mode mode = GET_MODE (operand0); -+ rtx xa, TWO52, tmp, one, res, mask; -+ rtx_code_label *label; -+ -+ TWO52 = ix86_gen_TWO52 (mode); -+ -+ /* Temporary for holding the result, initialized to the input -+ operand to ease control flow. */ -+ res = gen_reg_rtx (mode); -+ emit_move_insn (res, operand1); -+ -+ /* xa = abs (operand1) */ -+ xa = ix86_expand_sse_fabs (res, &mask); -+ -+ /* if (!isless (xa, TWO52)) goto label; */ -+ label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); -+ -+ /* xa = xa + TWO52 - TWO52; */ -+ xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT); -+ xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT); -+ -+ /* xa = copysign (xa, operand1) */ -+ ix86_sse_copysign_to_positive (xa, xa, res, mask); -+ -+ /* generate 1.0 or -1.0 */ -+ one = force_reg (mode, -+ const_double_from_real_value (do_floor -+ ? dconst1 : dconstm1, mode)); -+ -+ /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */ -+ tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor); -+ emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp))); -+ /* We always need to subtract here to preserve signed zero. */ -+ tmp = expand_simple_binop (mode, MINUS, -+ xa, tmp, NULL_RTX, 0, OPTAB_DIRECT); -+ emit_move_insn (res, tmp); -+ -+ emit_label (label); -+ LABEL_NUSES (label) = 1; -+ -+ emit_move_insn (operand0, res); -+} -+ -+/* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing -+ into OPERAND0. */ -+void -+ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor) -+{ -+ /* C code for the stuff we expand below. -+ double xa = fabs (x), x2; -+ if (!isless (xa, TWO52)) -+ return x; -+ x2 = (double)(long)x; -+ Compensate. Floor: -+ if (x2 > x) -+ x2 -= 1; -+ Compensate. Ceil: -+ if (x2 < x) -+ x2 += 1; -+ if (HONOR_SIGNED_ZEROS (mode)) -+ return copysign (x2, x); -+ return x2; -+ */ -+ machine_mode mode = GET_MODE (operand0); -+ rtx xa, xi, TWO52, tmp, one, res, mask; -+ rtx_code_label *label; -+ -+ TWO52 = ix86_gen_TWO52 (mode); -+ -+ /* Temporary for holding the result, initialized to the input -+ operand to ease control flow. */ -+ res = gen_reg_rtx (mode); -+ emit_move_insn (res, operand1); -+ -+ /* xa = abs (operand1) */ -+ xa = ix86_expand_sse_fabs (res, &mask); -+ -+ /* if (!isless (xa, TWO52)) goto label; */ -+ label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); -+ -+ /* xa = (double)(long)x */ -+ xi = gen_reg_rtx (mode == DFmode ? DImode : SImode); -+ expand_fix (xi, res, 0); -+ expand_float (xa, xi, 0); -+ -+ /* generate 1.0 */ -+ one = force_reg (mode, const_double_from_real_value (dconst1, mode)); -+ -+ /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */ -+ tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor); -+ emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp))); -+ tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS, -+ xa, tmp, NULL_RTX, 0, OPTAB_DIRECT); -+ emit_move_insn (res, tmp); -+ -+ if (HONOR_SIGNED_ZEROS (mode)) -+ ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask); -+ -+ emit_label (label); -+ LABEL_NUSES (label) = 1; -+ -+ emit_move_insn (operand0, res); -+} -+ -+/* Expand SSE sequence for computing round from OPERAND1 storing -+ into OPERAND0. Sequence that works without relying on DImode truncation -+ via cvttsd2siq that is only available on 64bit targets. */ -+void -+ix86_expand_rounddf_32 (rtx operand0, rtx operand1) -+{ -+ /* C code for the stuff we expand below. -+ double xa = fabs (x), xa2, x2; -+ if (!isless (xa, TWO52)) -+ return x; -+ Using the absolute value and copying back sign makes -+ -0.0 -> -0.0 correct. -+ xa2 = xa + TWO52 - TWO52; -+ Compensate. -+ dxa = xa2 - xa; -+ if (dxa <= -0.5) -+ xa2 += 1; -+ else if (dxa > 0.5) -+ xa2 -= 1; -+ x2 = copysign (xa2, x); -+ return x2; -+ */ -+ machine_mode mode = GET_MODE (operand0); -+ rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask; -+ rtx_code_label *label; -+ -+ TWO52 = ix86_gen_TWO52 (mode); -+ -+ /* Temporary for holding the result, initialized to the input -+ operand to ease control flow. */ -+ res = gen_reg_rtx (mode); -+ emit_move_insn (res, operand1); -+ -+ /* xa = abs (operand1) */ -+ xa = ix86_expand_sse_fabs (res, &mask); -+ -+ /* if (!isless (xa, TWO52)) goto label; */ -+ label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); -+ -+ /* xa2 = xa + TWO52 - TWO52; */ -+ xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT); -+ xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT); -+ -+ /* dxa = xa2 - xa; */ -+ dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT); -+ -+ /* generate 0.5, 1.0 and -0.5 */ -+ half = force_reg (mode, const_double_from_real_value (dconsthalf, mode)); -+ one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT); -+ mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX, -+ 0, OPTAB_DIRECT); -+ -+ /* Compensate. */ -+ tmp = gen_reg_rtx (mode); -+ /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */ -+ tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false); -+ emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp))); -+ xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT); -+ /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */ -+ tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false); -+ emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp))); -+ xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT); -+ -+ /* res = copysign (xa2, operand1) */ -+ ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask); -+ -+ emit_label (label); -+ LABEL_NUSES (label) = 1; -+ -+ emit_move_insn (operand0, res); -+} -+ -+/* Expand SSE sequence for computing trunc from OPERAND1 storing -+ into OPERAND0. */ -+void -+ix86_expand_trunc (rtx operand0, rtx operand1) -+{ -+ /* C code for SSE variant we expand below. -+ double xa = fabs (x), x2; -+ if (!isless (xa, TWO52)) -+ return x; -+ x2 = (double)(long)x; -+ if (HONOR_SIGNED_ZEROS (mode)) -+ return copysign (x2, x); -+ return x2; -+ */ -+ machine_mode mode = GET_MODE (operand0); -+ rtx xa, xi, TWO52, res, mask; -+ rtx_code_label *label; -+ -+ TWO52 = ix86_gen_TWO52 (mode); -+ -+ /* Temporary for holding the result, initialized to the input -+ operand to ease control flow. */ -+ res = gen_reg_rtx (mode); -+ emit_move_insn (res, operand1); -+ -+ /* xa = abs (operand1) */ -+ xa = ix86_expand_sse_fabs (res, &mask); -+ -+ /* if (!isless (xa, TWO52)) goto label; */ -+ label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); -+ -+ /* x = (double)(long)x */ -+ xi = gen_reg_rtx (mode == DFmode ? DImode : SImode); -+ expand_fix (xi, res, 0); -+ expand_float (res, xi, 0); -+ -+ if (HONOR_SIGNED_ZEROS (mode)) -+ ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask); -+ -+ emit_label (label); -+ LABEL_NUSES (label) = 1; -+ -+ emit_move_insn (operand0, res); -+} -+ -+/* Expand SSE sequence for computing trunc from OPERAND1 storing -+ into OPERAND0. */ -+void -+ix86_expand_truncdf_32 (rtx operand0, rtx operand1) -+{ -+ machine_mode mode = GET_MODE (operand0); -+ rtx xa, mask, TWO52, one, res, smask, tmp; -+ rtx_code_label *label; -+ -+ /* C code for SSE variant we expand below. -+ double xa = fabs (x), x2; -+ if (!isless (xa, TWO52)) -+ return x; -+ xa2 = xa + TWO52 - TWO52; -+ Compensate: -+ if (xa2 > xa) -+ xa2 -= 1.0; -+ x2 = copysign (xa2, x); -+ return x2; -+ */ -+ -+ TWO52 = ix86_gen_TWO52 (mode); -+ -+ /* Temporary for holding the result, initialized to the input -+ operand to ease control flow. */ -+ res = gen_reg_rtx (mode); -+ emit_move_insn (res, operand1); -+ -+ /* xa = abs (operand1) */ -+ xa = ix86_expand_sse_fabs (res, &smask); -+ -+ /* if (!isless (xa, TWO52)) goto label; */ -+ label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); -+ -+ /* res = xa + TWO52 - TWO52; */ -+ tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT); -+ tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT); -+ emit_move_insn (res, tmp); -+ -+ /* generate 1.0 */ -+ one = force_reg (mode, const_double_from_real_value (dconst1, mode)); -+ -+ /* Compensate: res = xa2 - (res > xa ? 1 : 0) */ -+ mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false); -+ emit_insn (gen_rtx_SET (mask, gen_rtx_AND (mode, mask, one))); -+ tmp = expand_simple_binop (mode, MINUS, -+ res, mask, NULL_RTX, 0, OPTAB_DIRECT); -+ emit_move_insn (res, tmp); -+ -+ /* res = copysign (res, operand1) */ -+ ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask); -+ -+ emit_label (label); -+ LABEL_NUSES (label) = 1; -+ -+ emit_move_insn (operand0, res); -+} -+ -+/* Expand SSE sequence for computing round from OPERAND1 storing -+ into OPERAND0. */ -+void -+ix86_expand_round (rtx operand0, rtx operand1) -+{ -+ /* C code for the stuff we're doing below: -+ double xa = fabs (x); -+ if (!isless (xa, TWO52)) -+ return x; -+ xa = (double)(long)(xa + nextafter (0.5, 0.0)); -+ return copysign (xa, x); -+ */ -+ machine_mode mode = GET_MODE (operand0); -+ rtx res, TWO52, xa, xi, half, mask; -+ rtx_code_label *label; -+ const struct real_format *fmt; -+ REAL_VALUE_TYPE pred_half, half_minus_pred_half; -+ -+ /* Temporary for holding the result, initialized to the input -+ operand to ease control flow. */ -+ res = gen_reg_rtx (mode); -+ emit_move_insn (res, operand1); -+ -+ TWO52 = ix86_gen_TWO52 (mode); -+ xa = ix86_expand_sse_fabs (res, &mask); -+ label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); -+ -+ /* load nextafter (0.5, 0.0) */ -+ fmt = REAL_MODE_FORMAT (mode); -+ real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode); -+ real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half); -+ -+ /* xa = xa + 0.5 */ -+ half = force_reg (mode, const_double_from_real_value (pred_half, mode)); -+ xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT); -+ -+ /* xa = (double)(int64_t)xa */ -+ xi = gen_reg_rtx (mode == DFmode ? DImode : SImode); -+ expand_fix (xi, xa, 0); -+ expand_float (xa, xi, 0); -+ -+ /* res = copysign (xa, operand1) */ -+ ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask); -+ -+ emit_label (label); -+ LABEL_NUSES (label) = 1; -+ -+ emit_move_insn (operand0, res); -+} -+ -+/* Expand SSE sequence for computing round -+ from OP1 storing into OP0 using sse4 round insn. */ -+void -+ix86_expand_round_sse4 (rtx op0, rtx op1) -+{ -+ machine_mode mode = GET_MODE (op0); -+ rtx e1, e2, res, half; -+ const struct real_format *fmt; -+ REAL_VALUE_TYPE pred_half, half_minus_pred_half; -+ rtx (*gen_copysign) (rtx, rtx, rtx); -+ rtx (*gen_round) (rtx, rtx, rtx); -+ -+ switch (mode) -+ { -+ case E_SFmode: -+ gen_copysign = gen_copysignsf3; -+ gen_round = gen_sse4_1_roundsf2; -+ break; -+ case E_DFmode: -+ gen_copysign = gen_copysigndf3; -+ gen_round = gen_sse4_1_rounddf2; -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ -+ /* round (a) = trunc (a + copysign (0.5, a)) */ -+ -+ /* load nextafter (0.5, 0.0) */ -+ fmt = REAL_MODE_FORMAT (mode); -+ real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode); -+ real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half); -+ half = const_double_from_real_value (pred_half, mode); -+ -+ /* e1 = copysign (0.5, op1) */ -+ e1 = gen_reg_rtx (mode); -+ emit_insn (gen_copysign (e1, half, op1)); -+ -+ /* e2 = op1 + e1 */ -+ e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT); -+ -+ /* res = trunc (e2) */ -+ res = gen_reg_rtx (mode); -+ emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC))); -+ -+ emit_move_insn (op0, res); -+} -+ -+/* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel []))) -+ insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh -+ insn every time. */ -+ -+static GTY(()) rtx_insn *vselect_insn; -+ -+/* Initialize vselect_insn. */ -+ -+static void -+init_vselect_insn (void) -+{ -+ unsigned i; -+ rtx x; -+ -+ x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN)); -+ for (i = 0; i < MAX_VECT_LEN; ++i) -+ XVECEXP (x, 0, i) = const0_rtx; -+ x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx, -+ const0_rtx), x); -+ x = gen_rtx_SET (const0_rtx, x); -+ start_sequence (); -+ vselect_insn = emit_insn (x); -+ end_sequence (); -+} -+ -+/* Construct (set target (vec_select op0 (parallel perm))) and -+ return true if that's a valid instruction in the active ISA. */ -+ -+static bool -+expand_vselect (rtx target, rtx op0, const unsigned char *perm, -+ unsigned nelt, bool testing_p) -+{ -+ unsigned int i; -+ rtx x, save_vconcat; -+ int icode; -+ -+ if (vselect_insn == NULL_RTX) -+ init_vselect_insn (); -+ -+ x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1); -+ PUT_NUM_ELEM (XVEC (x, 0), nelt); -+ for (i = 0; i < nelt; ++i) -+ XVECEXP (x, 0, i) = GEN_INT (perm[i]); -+ save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0); -+ XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0; -+ PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target)); -+ SET_DEST (PATTERN (vselect_insn)) = target; -+ icode = recog_memoized (vselect_insn); -+ -+ if (icode >= 0 && !testing_p) -+ emit_insn (copy_rtx (PATTERN (vselect_insn))); -+ -+ SET_DEST (PATTERN (vselect_insn)) = const0_rtx; -+ XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat; -+ INSN_CODE (vselect_insn) = -1; -+ -+ return icode >= 0; -+} -+ -+/* Similar, but generate a vec_concat from op0 and op1 as well. */ -+ -+static bool -+expand_vselect_vconcat (rtx target, rtx op0, rtx op1, -+ const unsigned char *perm, unsigned nelt, -+ bool testing_p) -+{ -+ machine_mode v2mode; -+ rtx x; -+ bool ok; -+ -+ if (vselect_insn == NULL_RTX) -+ init_vselect_insn (); -+ -+ if (!GET_MODE_2XWIDER_MODE (GET_MODE (op0)).exists (&v2mode)) -+ return false; -+ x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0); -+ PUT_MODE (x, v2mode); -+ XEXP (x, 0) = op0; -+ XEXP (x, 1) = op1; -+ ok = expand_vselect (target, x, perm, nelt, testing_p); -+ XEXP (x, 0) = const0_rtx; -+ XEXP (x, 1) = const0_rtx; -+ return ok; -+} -+ -+/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D -+ using movss or movsd. */ -+static bool -+expand_vec_perm_movs (struct expand_vec_perm_d *d) -+{ -+ machine_mode vmode = d->vmode; -+ unsigned i, nelt = d->nelt; -+ rtx x; -+ -+ if (d->one_operand_p) -+ return false; -+ -+ if (!(TARGET_SSE && vmode == V4SFmode) -+ && !(TARGET_SSE2 && vmode == V2DFmode)) -+ return false; -+ -+ /* Only the first element is changed. */ -+ if (d->perm[0] != nelt && d->perm[0] != 0) -+ return false; -+ for (i = 1; i < nelt; ++i) -+ if (d->perm[i] != i + nelt - d->perm[0]) -+ return false; -+ -+ if (d->testing_p) -+ return true; -+ -+ if (d->perm[0] == nelt) -+ x = gen_rtx_VEC_MERGE (vmode, d->op1, d->op0, GEN_INT (1)); -+ else -+ x = gen_rtx_VEC_MERGE (vmode, d->op0, d->op1, GEN_INT (1)); -+ -+ emit_insn (gen_rtx_SET (d->target, x)); -+ -+ return true; -+} -+ -+/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D -+ in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */ -+ -+static bool -+expand_vec_perm_blend (struct expand_vec_perm_d *d) -+{ -+ machine_mode mmode, vmode = d->vmode; -+ unsigned i, nelt = d->nelt; -+ unsigned HOST_WIDE_INT mask; -+ rtx target, op0, op1, maskop, x; -+ rtx rperm[32], vperm; -+ -+ if (d->one_operand_p) -+ return false; -+ if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64 -+ && (TARGET_AVX512BW -+ || GET_MODE_UNIT_SIZE (vmode) >= 4)) -+ ; -+ else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32) -+ ; -+ else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode)) -+ ; -+ else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16) -+ ; -+ else -+ return false; -+ -+ /* This is a blend, not a permute. Elements must stay in their -+ respective lanes. */ -+ for (i = 0; i < nelt; ++i) -+ { -+ unsigned e = d->perm[i]; -+ if (!(e == i || e == i + nelt)) -+ return false; -+ } -+ -+ if (d->testing_p) -+ return true; -+ -+ /* ??? Without SSE4.1, we could implement this with and/andn/or. This -+ decision should be extracted elsewhere, so that we only try that -+ sequence once all budget==3 options have been tried. */ -+ target = d->target; -+ op0 = d->op0; -+ op1 = d->op1; -+ mask = 0; -+ -+ switch (vmode) -+ { -+ case E_V8DFmode: -+ case E_V16SFmode: -+ case E_V4DFmode: -+ case E_V8SFmode: -+ case E_V2DFmode: -+ case E_V4SFmode: -+ case E_V8HImode: -+ case E_V8SImode: -+ case E_V32HImode: -+ case E_V64QImode: -+ case E_V16SImode: -+ case E_V8DImode: -+ for (i = 0; i < nelt; ++i) -+ mask |= ((unsigned HOST_WIDE_INT) (d->perm[i] >= nelt)) << i; -+ break; -+ -+ case E_V2DImode: -+ for (i = 0; i < 2; ++i) -+ mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4); -+ vmode = V8HImode; -+ goto do_subreg; -+ -+ case E_V4SImode: -+ for (i = 0; i < 4; ++i) -+ mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2); -+ vmode = V8HImode; -+ goto do_subreg; -+ -+ case E_V16QImode: -+ /* See if bytes move in pairs so we can use pblendw with -+ an immediate argument, rather than pblendvb with a vector -+ argument. */ -+ for (i = 0; i < 16; i += 2) -+ if (d->perm[i] + 1 != d->perm[i + 1]) -+ { -+ use_pblendvb: -+ for (i = 0; i < nelt; ++i) -+ rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx); -+ -+ finish_pblendvb: -+ vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm)); -+ vperm = force_reg (vmode, vperm); -+ -+ if (GET_MODE_SIZE (vmode) == 16) -+ emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm)); -+ else -+ emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm)); -+ if (target != d->target) -+ emit_move_insn (d->target, gen_lowpart (d->vmode, target)); -+ return true; -+ } -+ -+ for (i = 0; i < 8; ++i) -+ mask |= (d->perm[i * 2] >= 16) << i; -+ vmode = V8HImode; -+ /* FALLTHRU */ -+ -+ do_subreg: -+ target = gen_reg_rtx (vmode); -+ op0 = gen_lowpart (vmode, op0); -+ op1 = gen_lowpart (vmode, op1); -+ break; -+ -+ case E_V32QImode: -+ /* See if bytes move in pairs. If not, vpblendvb must be used. */ -+ for (i = 0; i < 32; i += 2) -+ if (d->perm[i] + 1 != d->perm[i + 1]) -+ goto use_pblendvb; -+ /* See if bytes move in quadruplets. If yes, vpblendd -+ with immediate can be used. */ -+ for (i = 0; i < 32; i += 4) -+ if (d->perm[i] + 2 != d->perm[i + 2]) -+ break; -+ if (i < 32) -+ { -+ /* See if bytes move the same in both lanes. If yes, -+ vpblendw with immediate can be used. */ -+ for (i = 0; i < 16; i += 2) -+ if (d->perm[i] + 16 != d->perm[i + 16]) -+ goto use_pblendvb; -+ -+ /* Use vpblendw. */ -+ for (i = 0; i < 16; ++i) -+ mask |= (d->perm[i * 2] >= 32) << i; -+ vmode = V16HImode; -+ goto do_subreg; -+ } -+ -+ /* Use vpblendd. */ -+ for (i = 0; i < 8; ++i) -+ mask |= (d->perm[i * 4] >= 32) << i; -+ vmode = V8SImode; -+ goto do_subreg; -+ -+ case E_V16HImode: -+ /* See if words move in pairs. If yes, vpblendd can be used. */ -+ for (i = 0; i < 16; i += 2) -+ if (d->perm[i] + 1 != d->perm[i + 1]) -+ break; -+ if (i < 16) -+ { -+ /* See if words move the same in both lanes. If not, -+ vpblendvb must be used. */ -+ for (i = 0; i < 8; i++) -+ if (d->perm[i] + 8 != d->perm[i + 8]) -+ { -+ /* Use vpblendvb. */ -+ for (i = 0; i < 32; ++i) -+ rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx); -+ -+ vmode = V32QImode; -+ nelt = 32; -+ target = gen_reg_rtx (vmode); -+ op0 = gen_lowpart (vmode, op0); -+ op1 = gen_lowpart (vmode, op1); -+ goto finish_pblendvb; -+ } -+ -+ /* Use vpblendw. */ -+ for (i = 0; i < 16; ++i) -+ mask |= (d->perm[i] >= 16) << i; -+ break; -+ } -+ -+ /* Use vpblendd. */ -+ for (i = 0; i < 8; ++i) -+ mask |= (d->perm[i * 2] >= 16) << i; -+ vmode = V8SImode; -+ goto do_subreg; -+ -+ case E_V4DImode: -+ /* Use vpblendd. */ -+ for (i = 0; i < 4; ++i) -+ mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2); -+ vmode = V8SImode; -+ goto do_subreg; -+ -+ default: -+ gcc_unreachable (); -+ } -+ -+ switch (vmode) -+ { -+ case E_V8DFmode: -+ case E_V8DImode: -+ mmode = QImode; -+ break; -+ case E_V16SFmode: -+ case E_V16SImode: -+ mmode = HImode; -+ break; -+ case E_V32HImode: -+ mmode = SImode; -+ break; -+ case E_V64QImode: -+ mmode = DImode; -+ break; -+ default: -+ mmode = VOIDmode; -+ } -+ -+ if (mmode != VOIDmode) -+ maskop = force_reg (mmode, gen_int_mode (mask, mmode)); -+ else -+ maskop = GEN_INT (mask); -+ -+ /* This matches five different patterns with the different modes. */ -+ x = gen_rtx_VEC_MERGE (vmode, op1, op0, maskop); -+ x = gen_rtx_SET (target, x); -+ emit_insn (x); -+ if (target != d->target) -+ emit_move_insn (d->target, gen_lowpart (d->vmode, target)); -+ -+ return true; -+} -+ -+/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D -+ in terms of the variable form of vpermilps. -+ -+ Note that we will have already failed the immediate input vpermilps, -+ which requires that the high and low part shuffle be identical; the -+ variable form doesn't require that. */ -+ -+static bool -+expand_vec_perm_vpermil (struct expand_vec_perm_d *d) -+{ -+ rtx rperm[8], vperm; -+ unsigned i; -+ -+ if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p) -+ return false; -+ -+ /* We can only permute within the 128-bit lane. */ -+ for (i = 0; i < 8; ++i) -+ { -+ unsigned e = d->perm[i]; -+ if (i < 4 ? e >= 4 : e < 4) -+ return false; -+ } -+ -+ if (d->testing_p) -+ return true; -+ -+ for (i = 0; i < 8; ++i) -+ { -+ unsigned e = d->perm[i]; -+ -+ /* Within each 128-bit lane, the elements of op0 are numbered -+ from 0 and the elements of op1 are numbered from 4. */ -+ if (e >= 8 + 4) -+ e -= 8; -+ else if (e >= 4) -+ e -= 4; -+ -+ rperm[i] = GEN_INT (e); -+ } -+ -+ vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm)); -+ vperm = force_reg (V8SImode, vperm); -+ emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm)); -+ -+ return true; -+} -+ -+/* Return true if permutation D can be performed as VMODE permutation -+ instead. */ -+ -+static bool -+valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d) -+{ -+ unsigned int i, j, chunk; -+ -+ if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT -+ || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT -+ || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode)) -+ return false; -+ -+ if (GET_MODE_NUNITS (vmode) >= d->nelt) -+ return true; -+ -+ chunk = d->nelt / GET_MODE_NUNITS (vmode); -+ for (i = 0; i < d->nelt; i += chunk) -+ if (d->perm[i] & (chunk - 1)) -+ return false; -+ else -+ for (j = 1; j < chunk; ++j) -+ if (d->perm[i] + j != d->perm[i + j]) -+ return false; -+ -+ return true; -+} -+ -+/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D -+ in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */ -+ -+static bool -+expand_vec_perm_pshufb (struct expand_vec_perm_d *d) -+{ -+ unsigned i, nelt, eltsz, mask; -+ unsigned char perm[64]; -+ machine_mode vmode = V16QImode; -+ rtx rperm[64], vperm, target, op0, op1; -+ -+ nelt = d->nelt; -+ -+ if (!d->one_operand_p) -+ { -+ if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16) -+ { -+ if (TARGET_AVX2 -+ && valid_perm_using_mode_p (V2TImode, d)) -+ { -+ if (d->testing_p) -+ return true; -+ -+ /* Use vperm2i128 insn. The pattern uses -+ V4DImode instead of V2TImode. */ -+ target = d->target; -+ if (d->vmode != V4DImode) -+ target = gen_reg_rtx (V4DImode); -+ op0 = gen_lowpart (V4DImode, d->op0); -+ op1 = gen_lowpart (V4DImode, d->op1); -+ rperm[0] -+ = GEN_INT ((d->perm[0] / (nelt / 2)) -+ | ((d->perm[nelt / 2] / (nelt / 2)) * 16)); -+ emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0])); -+ if (target != d->target) -+ emit_move_insn (d->target, gen_lowpart (d->vmode, target)); -+ return true; -+ } -+ return false; -+ } -+ } -+ else -+ { -+ if (GET_MODE_SIZE (d->vmode) == 16) -+ { -+ if (!TARGET_SSSE3) -+ return false; -+ } -+ else if (GET_MODE_SIZE (d->vmode) == 32) -+ { -+ if (!TARGET_AVX2) -+ return false; -+ -+ /* V4DImode should be already handled through -+ expand_vselect by vpermq instruction. */ -+ gcc_assert (d->vmode != V4DImode); -+ -+ vmode = V32QImode; -+ if (d->vmode == V8SImode -+ || d->vmode == V16HImode -+ || d->vmode == V32QImode) -+ { -+ /* First see if vpermq can be used for -+ V8SImode/V16HImode/V32QImode. */ -+ if (valid_perm_using_mode_p (V4DImode, d)) -+ { -+ for (i = 0; i < 4; i++) -+ perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3; -+ if (d->testing_p) -+ return true; -+ target = gen_reg_rtx (V4DImode); -+ if (expand_vselect (target, gen_lowpart (V4DImode, d->op0), -+ perm, 4, false)) -+ { -+ emit_move_insn (d->target, -+ gen_lowpart (d->vmode, target)); -+ return true; -+ } -+ return false; -+ } -+ -+ /* Next see if vpermd can be used. */ -+ if (valid_perm_using_mode_p (V8SImode, d)) -+ vmode = V8SImode; -+ } -+ /* Or if vpermps can be used. */ -+ else if (d->vmode == V8SFmode) -+ vmode = V8SImode; -+ -+ if (vmode == V32QImode) -+ { -+ /* vpshufb only works intra lanes, it is not -+ possible to shuffle bytes in between the lanes. */ -+ for (i = 0; i < nelt; ++i) -+ if ((d->perm[i] ^ i) & (nelt / 2)) -+ return false; -+ } -+ } -+ else if (GET_MODE_SIZE (d->vmode) == 64) -+ { -+ if (!TARGET_AVX512BW) -+ return false; -+ -+ /* If vpermq didn't work, vpshufb won't work either. */ -+ if (d->vmode == V8DFmode || d->vmode == V8DImode) -+ return false; -+ -+ vmode = V64QImode; -+ if (d->vmode == V16SImode -+ || d->vmode == V32HImode -+ || d->vmode == V64QImode) -+ { -+ /* First see if vpermq can be used for -+ V16SImode/V32HImode/V64QImode. */ -+ if (valid_perm_using_mode_p (V8DImode, d)) -+ { -+ for (i = 0; i < 8; i++) -+ perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7; -+ if (d->testing_p) -+ return true; -+ target = gen_reg_rtx (V8DImode); -+ if (expand_vselect (target, gen_lowpart (V8DImode, d->op0), -+ perm, 8, false)) -+ { -+ emit_move_insn (d->target, -+ gen_lowpart (d->vmode, target)); -+ return true; -+ } -+ return false; -+ } -+ -+ /* Next see if vpermd can be used. */ -+ if (valid_perm_using_mode_p (V16SImode, d)) -+ vmode = V16SImode; -+ } -+ /* Or if vpermps can be used. */ -+ else if (d->vmode == V16SFmode) -+ vmode = V16SImode; -+ if (vmode == V64QImode) -+ { -+ /* vpshufb only works intra lanes, it is not -+ possible to shuffle bytes in between the lanes. */ -+ for (i = 0; i < nelt; ++i) -+ if ((d->perm[i] ^ i) & (nelt / 4)) -+ return false; -+ } -+ } -+ else -+ return false; -+ } -+ -+ if (d->testing_p) -+ return true; -+ -+ if (vmode == V8SImode) -+ for (i = 0; i < 8; ++i) -+ rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7); -+ else if (vmode == V16SImode) -+ for (i = 0; i < 16; ++i) -+ rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15); -+ else -+ { -+ eltsz = GET_MODE_UNIT_SIZE (d->vmode); -+ if (!d->one_operand_p) -+ mask = 2 * nelt - 1; -+ else if (vmode == V16QImode) -+ mask = nelt - 1; -+ else if (vmode == V64QImode) -+ mask = nelt / 4 - 1; -+ else -+ mask = nelt / 2 - 1; -+ -+ for (i = 0; i < nelt; ++i) -+ { -+ unsigned j, e = d->perm[i] & mask; -+ for (j = 0; j < eltsz; ++j) -+ rperm[i * eltsz + j] = GEN_INT (e * eltsz + j); -+ } -+ } -+ -+ vperm = gen_rtx_CONST_VECTOR (vmode, -+ gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm)); -+ vperm = force_reg (vmode, vperm); -+ -+ target = d->target; -+ if (d->vmode != vmode) -+ target = gen_reg_rtx (vmode); -+ op0 = gen_lowpart (vmode, d->op0); -+ if (d->one_operand_p) -+ { -+ if (vmode == V16QImode) -+ emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm)); -+ else if (vmode == V32QImode) -+ emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm)); -+ else if (vmode == V64QImode) -+ emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm)); -+ else if (vmode == V8SFmode) -+ emit_insn (gen_avx2_permvarv8sf (target, op0, vperm)); -+ else if (vmode == V8SImode) -+ emit_insn (gen_avx2_permvarv8si (target, op0, vperm)); -+ else if (vmode == V16SFmode) -+ emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm)); -+ else if (vmode == V16SImode) -+ emit_insn (gen_avx512f_permvarv16si (target, op0, vperm)); -+ else -+ gcc_unreachable (); -+ } -+ else -+ { -+ op1 = gen_lowpart (vmode, d->op1); -+ emit_insn (gen_xop_pperm (target, op0, op1, vperm)); -+ } -+ if (target != d->target) -+ emit_move_insn (d->target, gen_lowpart (d->vmode, target)); -+ -+ return true; -+} -+ -+/* For V*[QHS]Imode permutations, check if the same permutation -+ can't be performed in a 2x, 4x or 8x wider inner mode. */ -+ -+static bool -+canonicalize_vector_int_perm (const struct expand_vec_perm_d *d, -+ struct expand_vec_perm_d *nd) -+{ -+ int i; -+ machine_mode mode = VOIDmode; -+ -+ switch (d->vmode) -+ { -+ case E_V16QImode: mode = V8HImode; break; -+ case E_V32QImode: mode = V16HImode; break; -+ case E_V64QImode: mode = V32HImode; break; -+ case E_V8HImode: mode = V4SImode; break; -+ case E_V16HImode: mode = V8SImode; break; -+ case E_V32HImode: mode = V16SImode; break; -+ case E_V4SImode: mode = V2DImode; break; -+ case E_V8SImode: mode = V4DImode; break; -+ case E_V16SImode: mode = V8DImode; break; -+ default: return false; -+ } -+ for (i = 0; i < d->nelt; i += 2) -+ if ((d->perm[i] & 1) || d->perm[i + 1] != d->perm[i] + 1) -+ return false; -+ nd->vmode = mode; -+ nd->nelt = d->nelt / 2; -+ for (i = 0; i < nd->nelt; i++) -+ nd->perm[i] = d->perm[2 * i] / 2; -+ if (GET_MODE_INNER (mode) != DImode) -+ canonicalize_vector_int_perm (nd, nd); -+ if (nd != d) -+ { -+ nd->one_operand_p = d->one_operand_p; -+ nd->testing_p = d->testing_p; -+ if (d->op0 == d->op1) -+ nd->op0 = nd->op1 = gen_lowpart (nd->vmode, d->op0); -+ else -+ { -+ nd->op0 = gen_lowpart (nd->vmode, d->op0); -+ nd->op1 = gen_lowpart (nd->vmode, d->op1); -+ } -+ if (d->testing_p) -+ nd->target = gen_raw_REG (nd->vmode, LAST_VIRTUAL_REGISTER + 1); -+ else -+ nd->target = gen_reg_rtx (nd->vmode); -+ } -+ return true; -+} -+ -+/* Try to expand one-operand permutation with constant mask. */ -+ -+static bool -+ix86_expand_vec_one_operand_perm_avx512 (struct expand_vec_perm_d *d) -+{ -+ machine_mode mode = GET_MODE (d->op0); -+ machine_mode maskmode = mode; -+ rtx (*gen) (rtx, rtx, rtx) = NULL; -+ rtx target, op0, mask; -+ rtx vec[64]; -+ -+ if (!rtx_equal_p (d->op0, d->op1)) -+ return false; -+ -+ if (!TARGET_AVX512F) -+ return false; -+ -+ switch (mode) -+ { -+ case E_V16SImode: -+ gen = gen_avx512f_permvarv16si; -+ break; -+ case E_V16SFmode: -+ gen = gen_avx512f_permvarv16sf; -+ maskmode = V16SImode; -+ break; -+ case E_V8DImode: -+ gen = gen_avx512f_permvarv8di; -+ break; -+ case E_V8DFmode: -+ gen = gen_avx512f_permvarv8df; -+ maskmode = V8DImode; -+ break; -+ default: -+ return false; -+ } -+ -+ target = d->target; -+ op0 = d->op0; -+ for (int i = 0; i < d->nelt; ++i) -+ vec[i] = GEN_INT (d->perm[i]); -+ mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec)); -+ emit_insn (gen (target, op0, force_reg (maskmode, mask))); -+ return true; -+} -+ -+static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool); -+ -+/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D -+ in a single instruction. */ -+ -+static bool -+expand_vec_perm_1 (struct expand_vec_perm_d *d) -+{ -+ unsigned i, nelt = d->nelt; -+ struct expand_vec_perm_d nd; -+ -+ /* Check plain VEC_SELECT first, because AVX has instructions that could -+ match both SEL and SEL+CONCAT, but the plain SEL will allow a memory -+ input where SEL+CONCAT may not. */ -+ if (d->one_operand_p) -+ { -+ int mask = nelt - 1; -+ bool identity_perm = true; -+ bool broadcast_perm = true; -+ -+ for (i = 0; i < nelt; i++) -+ { -+ nd.perm[i] = d->perm[i] & mask; -+ if (nd.perm[i] != i) -+ identity_perm = false; -+ if (nd.perm[i]) -+ broadcast_perm = false; -+ } -+ -+ if (identity_perm) -+ { -+ if (!d->testing_p) -+ emit_move_insn (d->target, d->op0); -+ return true; -+ } -+ else if (broadcast_perm && TARGET_AVX2) -+ { -+ /* Use vpbroadcast{b,w,d}. */ -+ rtx (*gen) (rtx, rtx) = NULL; -+ switch (d->vmode) -+ { -+ case E_V64QImode: -+ if (TARGET_AVX512BW) -+ gen = gen_avx512bw_vec_dupv64qi_1; -+ break; -+ case E_V32QImode: -+ gen = gen_avx2_pbroadcastv32qi_1; -+ break; -+ case E_V32HImode: -+ if (TARGET_AVX512BW) -+ gen = gen_avx512bw_vec_dupv32hi_1; -+ break; -+ case E_V16HImode: -+ gen = gen_avx2_pbroadcastv16hi_1; -+ break; -+ case E_V16SImode: -+ if (TARGET_AVX512F) -+ gen = gen_avx512f_vec_dupv16si_1; -+ break; -+ case E_V8SImode: -+ gen = gen_avx2_pbroadcastv8si_1; -+ break; -+ case E_V16QImode: -+ gen = gen_avx2_pbroadcastv16qi; -+ break; -+ case E_V8HImode: -+ gen = gen_avx2_pbroadcastv8hi; -+ break; -+ case E_V16SFmode: -+ if (TARGET_AVX512F) -+ gen = gen_avx512f_vec_dupv16sf_1; -+ break; -+ case E_V8SFmode: -+ gen = gen_avx2_vec_dupv8sf_1; -+ break; -+ case E_V8DFmode: -+ if (TARGET_AVX512F) -+ gen = gen_avx512f_vec_dupv8df_1; -+ break; -+ case E_V8DImode: -+ if (TARGET_AVX512F) -+ gen = gen_avx512f_vec_dupv8di_1; -+ break; -+ /* For other modes prefer other shuffles this function creates. */ -+ default: break; -+ } -+ if (gen != NULL) -+ { -+ if (!d->testing_p) -+ emit_insn (gen (d->target, d->op0)); -+ return true; -+ } -+ } -+ -+ if (expand_vselect (d->target, d->op0, nd.perm, nelt, d->testing_p)) -+ return true; -+ -+ /* There are plenty of patterns in sse.md that are written for -+ SEL+CONCAT and are not replicated for a single op. Perhaps -+ that should be changed, to avoid the nastiness here. */ -+ -+ /* Recognize interleave style patterns, which means incrementing -+ every other permutation operand. */ -+ for (i = 0; i < nelt; i += 2) -+ { -+ nd.perm[i] = d->perm[i] & mask; -+ nd.perm[i + 1] = (d->perm[i + 1] & mask) + nelt; -+ } -+ if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt, -+ d->testing_p)) -+ return true; -+ -+ /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */ -+ if (nelt >= 4) -+ { -+ for (i = 0; i < nelt; i += 4) -+ { -+ nd.perm[i + 0] = d->perm[i + 0] & mask; -+ nd.perm[i + 1] = d->perm[i + 1] & mask; -+ nd.perm[i + 2] = (d->perm[i + 2] & mask) + nelt; -+ nd.perm[i + 3] = (d->perm[i + 3] & mask) + nelt; -+ } -+ -+ if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt, -+ d->testing_p)) -+ return true; -+ } -+ } -+ -+ /* Try movss/movsd instructions. */ -+ if (expand_vec_perm_movs (d)) -+ return true; -+ -+ /* Finally, try the fully general two operand permute. */ -+ if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt, -+ d->testing_p)) -+ return true; -+ -+ /* Recognize interleave style patterns with reversed operands. */ -+ if (!d->one_operand_p) -+ { -+ for (i = 0; i < nelt; ++i) -+ { -+ unsigned e = d->perm[i]; -+ if (e >= nelt) -+ e -= nelt; -+ else -+ e += nelt; -+ nd.perm[i] = e; -+ } -+ -+ if (expand_vselect_vconcat (d->target, d->op1, d->op0, nd.perm, nelt, -+ d->testing_p)) -+ return true; -+ } -+ -+ /* Try the SSE4.1 blend variable merge instructions. */ -+ if (expand_vec_perm_blend (d)) -+ return true; -+ -+ /* Try one of the AVX vpermil variable permutations. */ -+ if (expand_vec_perm_vpermil (d)) -+ return true; -+ -+ /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128, -+ vpshufb, vpermd, vpermps or vpermq variable permutation. */ -+ if (expand_vec_perm_pshufb (d)) -+ return true; -+ -+ /* Try the AVX2 vpalignr instruction. */ -+ if (expand_vec_perm_palignr (d, true)) -+ return true; -+ -+ /* Try the AVX512F vperm{s,d} instructions. */ -+ if (ix86_expand_vec_one_operand_perm_avx512 (d)) -+ return true; -+ -+ /* Try the AVX512F vpermt2/vpermi2 instructions. */ -+ if (ix86_expand_vec_perm_vpermt2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d)) -+ return true; -+ -+ /* See if we can get the same permutation in different vector integer -+ mode. */ -+ if (canonicalize_vector_int_perm (d, &nd) && expand_vec_perm_1 (&nd)) -+ { -+ if (!d->testing_p) -+ emit_move_insn (d->target, gen_lowpart (d->vmode, nd.target)); -+ return true; -+ } -+ return false; -+} -+ -+/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D -+ in terms of a pair of pshuflw + pshufhw instructions. */ -+ -+static bool -+expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d) -+{ -+ unsigned char perm2[MAX_VECT_LEN]; -+ unsigned i; -+ bool ok; -+ -+ if (d->vmode != V8HImode || !d->one_operand_p) -+ return false; -+ -+ /* The two permutations only operate in 64-bit lanes. */ -+ for (i = 0; i < 4; ++i) -+ if (d->perm[i] >= 4) -+ return false; -+ for (i = 4; i < 8; ++i) -+ if (d->perm[i] < 4) -+ return false; -+ -+ if (d->testing_p) -+ return true; -+ -+ /* Emit the pshuflw. */ -+ memcpy (perm2, d->perm, 4); -+ for (i = 4; i < 8; ++i) -+ perm2[i] = i; -+ ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p); -+ gcc_assert (ok); -+ -+ /* Emit the pshufhw. */ -+ memcpy (perm2 + 4, d->perm + 4, 4); -+ for (i = 0; i < 4; ++i) -+ perm2[i] = i; -+ ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p); -+ gcc_assert (ok); -+ -+ return true; -+} -+ -+/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify -+ the permutation using the SSSE3 palignr instruction. This succeeds -+ when all of the elements in PERM fit within one vector and we merely -+ need to shift them down so that a single vector permutation has a -+ chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only -+ the vpalignr instruction itself can perform the requested permutation. */ -+ -+static bool -+expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p) -+{ -+ unsigned i, nelt = d->nelt; -+ unsigned min, max, minswap, maxswap; -+ bool in_order, ok, swap = false; -+ rtx shift, target; -+ struct expand_vec_perm_d dcopy; -+ -+ /* Even with AVX, palignr only operates on 128-bit vectors, -+ in AVX2 palignr operates on both 128-bit lanes. */ -+ if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16) -+ && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32)) -+ return false; -+ -+ min = 2 * nelt; -+ max = 0; -+ minswap = 2 * nelt; -+ maxswap = 0; -+ for (i = 0; i < nelt; ++i) -+ { -+ unsigned e = d->perm[i]; -+ unsigned eswap = d->perm[i] ^ nelt; -+ if (GET_MODE_SIZE (d->vmode) == 32) -+ { -+ e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1); -+ eswap = e ^ (nelt / 2); -+ } -+ if (e < min) -+ min = e; -+ if (e > max) -+ max = e; -+ if (eswap < minswap) -+ minswap = eswap; -+ if (eswap > maxswap) -+ maxswap = eswap; -+ } -+ if (min == 0 -+ || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt)) -+ { -+ if (d->one_operand_p -+ || minswap == 0 -+ || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32 -+ ? nelt / 2 : nelt)) -+ return false; -+ swap = true; -+ min = minswap; -+ max = maxswap; -+ } -+ -+ /* Given that we have SSSE3, we know we'll be able to implement the -+ single operand permutation after the palignr with pshufb for -+ 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed -+ first. */ -+ if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p) -+ return true; -+ -+ dcopy = *d; -+ if (swap) -+ { -+ dcopy.op0 = d->op1; -+ dcopy.op1 = d->op0; -+ for (i = 0; i < nelt; ++i) -+ dcopy.perm[i] ^= nelt; -+ } -+ -+ in_order = true; -+ for (i = 0; i < nelt; ++i) -+ { -+ unsigned e = dcopy.perm[i]; -+ if (GET_MODE_SIZE (d->vmode) == 32 -+ && e >= nelt -+ && (e & (nelt / 2 - 1)) < min) -+ e = e - min - (nelt / 2); -+ else -+ e = e - min; -+ if (e != i) -+ in_order = false; -+ dcopy.perm[i] = e; -+ } -+ dcopy.one_operand_p = true; -+ -+ if (single_insn_only_p && !in_order) -+ return false; -+ -+ /* For AVX2, test whether we can permute the result in one instruction. */ -+ if (d->testing_p) -+ { -+ if (in_order) -+ return true; -+ dcopy.op1 = dcopy.op0; -+ return expand_vec_perm_1 (&dcopy); -+ } -+ -+ shift = GEN_INT (min * GET_MODE_UNIT_BITSIZE (d->vmode)); -+ if (GET_MODE_SIZE (d->vmode) == 16) -+ { -+ target = gen_reg_rtx (TImode); -+ emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1), -+ gen_lowpart (TImode, dcopy.op0), shift)); -+ } -+ else -+ { -+ target = gen_reg_rtx (V2TImode); -+ emit_insn (gen_avx2_palignrv2ti (target, -+ gen_lowpart (V2TImode, dcopy.op1), -+ gen_lowpart (V2TImode, dcopy.op0), -+ shift)); -+ } -+ -+ dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target); -+ -+ /* Test for the degenerate case where the alignment by itself -+ produces the desired permutation. */ -+ if (in_order) -+ { -+ emit_move_insn (d->target, dcopy.op0); -+ return true; -+ } -+ -+ ok = expand_vec_perm_1 (&dcopy); -+ gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32); -+ -+ return ok; -+} -+ -+/* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify -+ the permutation using the SSE4_1 pblendv instruction. Potentially -+ reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */ -+ -+static bool -+expand_vec_perm_pblendv (struct expand_vec_perm_d *d) -+{ -+ unsigned i, which, nelt = d->nelt; -+ struct expand_vec_perm_d dcopy, dcopy1; -+ machine_mode vmode = d->vmode; -+ bool ok; -+ -+ /* Use the same checks as in expand_vec_perm_blend. */ -+ if (d->one_operand_p) -+ return false; -+ if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32) -+ ; -+ else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode)) -+ ; -+ else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16) -+ ; -+ else -+ return false; -+ -+ /* Figure out where permutation elements stay not in their -+ respective lanes. */ -+ for (i = 0, which = 0; i < nelt; ++i) -+ { -+ unsigned e = d->perm[i]; -+ if (e != i) -+ which |= (e < nelt ? 1 : 2); -+ } -+ /* We can pblend the part where elements stay not in their -+ respective lanes only when these elements are all in one -+ half of a permutation. -+ {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective -+ lanes, but both 8 and 9 >= 8 -+ {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their -+ respective lanes and 8 >= 8, but 2 not. */ -+ if (which != 1 && which != 2) -+ return false; -+ if (d->testing_p && GET_MODE_SIZE (vmode) == 16) -+ return true; -+ -+ /* First we apply one operand permutation to the part where -+ elements stay not in their respective lanes. */ -+ dcopy = *d; -+ if (which == 2) -+ dcopy.op0 = dcopy.op1 = d->op1; -+ else -+ dcopy.op0 = dcopy.op1 = d->op0; -+ if (!d->testing_p) -+ dcopy.target = gen_reg_rtx (vmode); -+ dcopy.one_operand_p = true; -+ -+ for (i = 0; i < nelt; ++i) -+ dcopy.perm[i] = d->perm[i] & (nelt - 1); -+ -+ ok = expand_vec_perm_1 (&dcopy); -+ if (GET_MODE_SIZE (vmode) != 16 && !ok) -+ return false; -+ else -+ gcc_assert (ok); -+ if (d->testing_p) -+ return true; -+ -+ /* Next we put permuted elements into their positions. */ -+ dcopy1 = *d; -+ if (which == 2) -+ dcopy1.op1 = dcopy.target; -+ else -+ dcopy1.op0 = dcopy.target; -+ -+ for (i = 0; i < nelt; ++i) -+ dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i); -+ -+ ok = expand_vec_perm_blend (&dcopy1); -+ gcc_assert (ok); -+ -+ return true; -+} -+ -+static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d); -+ -+/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify -+ a two vector permutation into a single vector permutation by using -+ an interleave operation to merge the vectors. */ -+ -+static bool -+expand_vec_perm_interleave2 (struct expand_vec_perm_d *d) -+{ -+ struct expand_vec_perm_d dremap, dfinal; -+ unsigned i, nelt = d->nelt, nelt2 = nelt / 2; -+ unsigned HOST_WIDE_INT contents; -+ unsigned char remap[2 * MAX_VECT_LEN]; -+ rtx_insn *seq; -+ bool ok, same_halves = false; -+ -+ if (GET_MODE_SIZE (d->vmode) == 16) -+ { -+ if (d->one_operand_p) -+ return false; -+ } -+ else if (GET_MODE_SIZE (d->vmode) == 32) -+ { -+ if (!TARGET_AVX) -+ return false; -+ /* For 32-byte modes allow even d->one_operand_p. -+ The lack of cross-lane shuffling in some instructions -+ might prevent a single insn shuffle. */ -+ dfinal = *d; -+ dfinal.testing_p = true; -+ /* If expand_vec_perm_interleave3 can expand this into -+ a 3 insn sequence, give up and let it be expanded as -+ 3 insn sequence. While that is one insn longer, -+ it doesn't need a memory operand and in the common -+ case that both interleave low and high permutations -+ with the same operands are adjacent needs 4 insns -+ for both after CSE. */ -+ if (expand_vec_perm_interleave3 (&dfinal)) -+ return false; -+ } -+ else -+ return false; -+ -+ /* Examine from whence the elements come. */ -+ contents = 0; -+ for (i = 0; i < nelt; ++i) -+ contents |= HOST_WIDE_INT_1U << d->perm[i]; -+ -+ memset (remap, 0xff, sizeof (remap)); -+ dremap = *d; -+ -+ if (GET_MODE_SIZE (d->vmode) == 16) -+ { -+ unsigned HOST_WIDE_INT h1, h2, h3, h4; -+ -+ /* Split the two input vectors into 4 halves. */ -+ h1 = (HOST_WIDE_INT_1U << nelt2) - 1; -+ h2 = h1 << nelt2; -+ h3 = h2 << nelt2; -+ h4 = h3 << nelt2; -+ -+ /* If the elements from the low halves use interleave low, and similarly -+ for interleave high. If the elements are from mis-matched halves, we -+ can use shufps for V4SF/V4SI or do a DImode shuffle. */ -+ if ((contents & (h1 | h3)) == contents) -+ { -+ /* punpckl* */ -+ for (i = 0; i < nelt2; ++i) -+ { -+ remap[i] = i * 2; -+ remap[i + nelt] = i * 2 + 1; -+ dremap.perm[i * 2] = i; -+ dremap.perm[i * 2 + 1] = i + nelt; -+ } -+ if (!TARGET_SSE2 && d->vmode == V4SImode) -+ dremap.vmode = V4SFmode; -+ } -+ else if ((contents & (h2 | h4)) == contents) -+ { -+ /* punpckh* */ -+ for (i = 0; i < nelt2; ++i) -+ { -+ remap[i + nelt2] = i * 2; -+ remap[i + nelt + nelt2] = i * 2 + 1; -+ dremap.perm[i * 2] = i + nelt2; -+ dremap.perm[i * 2 + 1] = i + nelt + nelt2; -+ } -+ if (!TARGET_SSE2 && d->vmode == V4SImode) -+ dremap.vmode = V4SFmode; -+ } -+ else if ((contents & (h1 | h4)) == contents) -+ { -+ /* shufps */ -+ for (i = 0; i < nelt2; ++i) -+ { -+ remap[i] = i; -+ remap[i + nelt + nelt2] = i + nelt2; -+ dremap.perm[i] = i; -+ dremap.perm[i + nelt2] = i + nelt + nelt2; -+ } -+ if (nelt != 4) -+ { -+ /* shufpd */ -+ dremap.vmode = V2DImode; -+ dremap.nelt = 2; -+ dremap.perm[0] = 0; -+ dremap.perm[1] = 3; -+ } -+ } -+ else if ((contents & (h2 | h3)) == contents) -+ { -+ /* shufps */ -+ for (i = 0; i < nelt2; ++i) -+ { -+ remap[i + nelt2] = i; -+ remap[i + nelt] = i + nelt2; -+ dremap.perm[i] = i + nelt2; -+ dremap.perm[i + nelt2] = i + nelt; -+ } -+ if (nelt != 4) -+ { -+ /* shufpd */ -+ dremap.vmode = V2DImode; -+ dremap.nelt = 2; -+ dremap.perm[0] = 1; -+ dremap.perm[1] = 2; -+ } -+ } -+ else -+ return false; -+ } -+ else -+ { -+ unsigned int nelt4 = nelt / 4, nzcnt = 0; -+ unsigned HOST_WIDE_INT q[8]; -+ unsigned int nonzero_halves[4]; -+ -+ /* Split the two input vectors into 8 quarters. */ -+ q[0] = (HOST_WIDE_INT_1U << nelt4) - 1; -+ for (i = 1; i < 8; ++i) -+ q[i] = q[0] << (nelt4 * i); -+ for (i = 0; i < 4; ++i) -+ if (((q[2 * i] | q[2 * i + 1]) & contents) != 0) -+ { -+ nonzero_halves[nzcnt] = i; -+ ++nzcnt; -+ } -+ -+ if (nzcnt == 1) -+ { -+ gcc_assert (d->one_operand_p); -+ nonzero_halves[1] = nonzero_halves[0]; -+ same_halves = true; -+ } -+ else if (d->one_operand_p) -+ { -+ gcc_assert (nonzero_halves[0] == 0); -+ gcc_assert (nonzero_halves[1] == 1); -+ } -+ -+ if (nzcnt <= 2) -+ { -+ if (d->perm[0] / nelt2 == nonzero_halves[1]) -+ { -+ /* Attempt to increase the likelihood that dfinal -+ shuffle will be intra-lane. */ -+ std::swap (nonzero_halves[0], nonzero_halves[1]); -+ } -+ -+ /* vperm2f128 or vperm2i128. */ -+ for (i = 0; i < nelt2; ++i) -+ { -+ remap[i + nonzero_halves[1] * nelt2] = i + nelt2; -+ remap[i + nonzero_halves[0] * nelt2] = i; -+ dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2; -+ dremap.perm[i] = i + nonzero_halves[0] * nelt2; -+ } -+ -+ if (d->vmode != V8SFmode -+ && d->vmode != V4DFmode -+ && d->vmode != V8SImode) -+ { -+ dremap.vmode = V8SImode; -+ dremap.nelt = 8; -+ for (i = 0; i < 4; ++i) -+ { -+ dremap.perm[i] = i + nonzero_halves[0] * 4; -+ dremap.perm[i + 4] = i + nonzero_halves[1] * 4; -+ } -+ } -+ } -+ else if (d->one_operand_p) -+ return false; -+ else if (TARGET_AVX2 -+ && (contents & (q[0] | q[2] | q[4] | q[6])) == contents) -+ { -+ /* vpunpckl* */ -+ for (i = 0; i < nelt4; ++i) -+ { -+ remap[i] = i * 2; -+ remap[i + nelt] = i * 2 + 1; -+ remap[i + nelt2] = i * 2 + nelt2; -+ remap[i + nelt + nelt2] = i * 2 + nelt2 + 1; -+ dremap.perm[i * 2] = i; -+ dremap.perm[i * 2 + 1] = i + nelt; -+ dremap.perm[i * 2 + nelt2] = i + nelt2; -+ dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2; -+ } -+ } -+ else if (TARGET_AVX2 -+ && (contents & (q[1] | q[3] | q[5] | q[7])) == contents) -+ { -+ /* vpunpckh* */ -+ for (i = 0; i < nelt4; ++i) -+ { -+ remap[i + nelt4] = i * 2; -+ remap[i + nelt + nelt4] = i * 2 + 1; -+ remap[i + nelt2 + nelt4] = i * 2 + nelt2; -+ remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1; -+ dremap.perm[i * 2] = i + nelt4; -+ dremap.perm[i * 2 + 1] = i + nelt + nelt4; -+ dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4; -+ dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4; -+ } -+ } -+ else -+ return false; -+ } -+ -+ /* Use the remapping array set up above to move the elements from their -+ swizzled locations into their final destinations. */ -+ dfinal = *d; -+ for (i = 0; i < nelt; ++i) -+ { -+ unsigned e = remap[d->perm[i]]; -+ gcc_assert (e < nelt); -+ /* If same_halves is true, both halves of the remapped vector are the -+ same. Avoid cross-lane accesses if possible. */ -+ if (same_halves && i >= nelt2) -+ { -+ gcc_assert (e < nelt2); -+ dfinal.perm[i] = e + nelt2; -+ } -+ else -+ dfinal.perm[i] = e; -+ } -+ if (!d->testing_p) -+ { -+ dremap.target = gen_reg_rtx (dremap.vmode); -+ dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target); -+ } -+ dfinal.op1 = dfinal.op0; -+ dfinal.one_operand_p = true; -+ -+ /* Test if the final remap can be done with a single insn. For V4SFmode or -+ V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */ -+ start_sequence (); -+ ok = expand_vec_perm_1 (&dfinal); -+ seq = get_insns (); -+ end_sequence (); -+ -+ if (!ok) -+ return false; -+ -+ if (d->testing_p) -+ return true; -+ -+ if (dremap.vmode != dfinal.vmode) -+ { -+ dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0); -+ dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1); -+ } -+ -+ ok = expand_vec_perm_1 (&dremap); -+ gcc_assert (ok); -+ -+ emit_insn (seq); -+ return true; -+} -+ -+/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify -+ a single vector cross-lane permutation into vpermq followed -+ by any of the single insn permutations. */ -+ -+static bool -+expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d) -+{ -+ struct expand_vec_perm_d dremap, dfinal; -+ unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4; -+ unsigned contents[2]; -+ bool ok; -+ -+ if (!(TARGET_AVX2 -+ && (d->vmode == V32QImode || d->vmode == V16HImode) -+ && d->one_operand_p)) -+ return false; -+ -+ contents[0] = 0; -+ contents[1] = 0; -+ for (i = 0; i < nelt2; ++i) -+ { -+ contents[0] |= 1u << (d->perm[i] / nelt4); -+ contents[1] |= 1u << (d->perm[i + nelt2] / nelt4); -+ } -+ -+ for (i = 0; i < 2; ++i) -+ { -+ unsigned int cnt = 0; -+ for (j = 0; j < 4; ++j) -+ if ((contents[i] & (1u << j)) != 0 && ++cnt > 2) -+ return false; -+ } -+ -+ if (d->testing_p) -+ return true; -+ -+ dremap = *d; -+ dremap.vmode = V4DImode; -+ dremap.nelt = 4; -+ dremap.target = gen_reg_rtx (V4DImode); -+ dremap.op0 = gen_lowpart (V4DImode, d->op0); -+ dremap.op1 = dremap.op0; -+ dremap.one_operand_p = true; -+ for (i = 0; i < 2; ++i) -+ { -+ unsigned int cnt = 0; -+ for (j = 0; j < 4; ++j) -+ if ((contents[i] & (1u << j)) != 0) -+ dremap.perm[2 * i + cnt++] = j; -+ for (; cnt < 2; ++cnt) -+ dremap.perm[2 * i + cnt] = 0; -+ } -+ -+ dfinal = *d; -+ dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target); -+ dfinal.op1 = dfinal.op0; -+ dfinal.one_operand_p = true; -+ for (i = 0, j = 0; i < nelt; ++i) -+ { -+ if (i == nelt2) -+ j = 2; -+ dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0); -+ if ((d->perm[i] / nelt4) == dremap.perm[j]) -+ ; -+ else if ((d->perm[i] / nelt4) == dremap.perm[j + 1]) -+ dfinal.perm[i] |= nelt4; -+ else -+ gcc_unreachable (); -+ } -+ -+ ok = expand_vec_perm_1 (&dremap); -+ gcc_assert (ok); -+ -+ ok = expand_vec_perm_1 (&dfinal); -+ gcc_assert (ok); -+ -+ return true; -+} -+ -+static bool canonicalize_perm (struct expand_vec_perm_d *d); -+ -+/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand -+ a vector permutation using two instructions, vperm2f128 resp. -+ vperm2i128 followed by any single in-lane permutation. */ -+ -+static bool -+expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d) -+{ -+ struct expand_vec_perm_d dfirst, dsecond; -+ unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm; -+ bool ok; -+ -+ if (!TARGET_AVX -+ || GET_MODE_SIZE (d->vmode) != 32 -+ || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2)) -+ return false; -+ -+ dsecond = *d; -+ dsecond.one_operand_p = false; -+ dsecond.testing_p = true; -+ -+ /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128 -+ immediate. For perm < 16 the second permutation uses -+ d->op0 as first operand, for perm >= 16 it uses d->op1 -+ as first operand. The second operand is the result of -+ vperm2[fi]128. */ -+ for (perm = 0; perm < 32; perm++) -+ { -+ /* Ignore permutations which do not move anything cross-lane. */ -+ if (perm < 16) -+ { -+ /* The second shuffle for e.g. V4DFmode has -+ 0123 and ABCD operands. -+ Ignore AB23, as 23 is already in the second lane -+ of the first operand. */ -+ if ((perm & 0xc) == (1 << 2)) continue; -+ /* And 01CD, as 01 is in the first lane of the first -+ operand. */ -+ if ((perm & 3) == 0) continue; -+ /* And 4567, as then the vperm2[fi]128 doesn't change -+ anything on the original 4567 second operand. */ -+ if ((perm & 0xf) == ((3 << 2) | 2)) continue; -+ } -+ else -+ { -+ /* The second shuffle for e.g. V4DFmode has -+ 4567 and ABCD operands. -+ Ignore AB67, as 67 is already in the second lane -+ of the first operand. */ -+ if ((perm & 0xc) == (3 << 2)) continue; -+ /* And 45CD, as 45 is in the first lane of the first -+ operand. */ -+ if ((perm & 3) == 2) continue; -+ /* And 0123, as then the vperm2[fi]128 doesn't change -+ anything on the original 0123 first operand. */ -+ if ((perm & 0xf) == (1 << 2)) continue; -+ } -+ -+ for (i = 0; i < nelt; i++) -+ { -+ j = d->perm[i] / nelt2; -+ if (j == ((perm >> (2 * (i >= nelt2))) & 3)) -+ dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1)); -+ else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16)) -+ dsecond.perm[i] = d->perm[i] & (nelt - 1); -+ else -+ break; -+ } -+ -+ if (i == nelt) -+ { -+ start_sequence (); -+ ok = expand_vec_perm_1 (&dsecond); -+ end_sequence (); -+ } -+ else -+ ok = false; -+ -+ if (ok) -+ { -+ if (d->testing_p) -+ return true; -+ -+ /* Found a usable second shuffle. dfirst will be -+ vperm2f128 on d->op0 and d->op1. */ -+ dsecond.testing_p = false; -+ dfirst = *d; -+ dfirst.target = gen_reg_rtx (d->vmode); -+ for (i = 0; i < nelt; i++) -+ dfirst.perm[i] = (i & (nelt2 - 1)) -+ + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2; -+ -+ canonicalize_perm (&dfirst); -+ ok = expand_vec_perm_1 (&dfirst); -+ gcc_assert (ok); -+ -+ /* And dsecond is some single insn shuffle, taking -+ d->op0 and result of vperm2f128 (if perm < 16) or -+ d->op1 and result of vperm2f128 (otherwise). */ -+ if (perm >= 16) -+ dsecond.op0 = dsecond.op1; -+ dsecond.op1 = dfirst.target; -+ -+ ok = expand_vec_perm_1 (&dsecond); -+ gcc_assert (ok); -+ -+ return true; -+ } -+ -+ /* For one operand, the only useful vperm2f128 permutation is 0x01 -+ aka lanes swap. */ -+ if (d->one_operand_p) -+ return false; -+ } -+ -+ return false; -+} -+ -+/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify -+ a two vector permutation using 2 intra-lane interleave insns -+ and cross-lane shuffle for 32-byte vectors. */ -+ -+static bool -+expand_vec_perm_interleave3 (struct expand_vec_perm_d *d) -+{ -+ unsigned i, nelt; -+ rtx (*gen) (rtx, rtx, rtx); -+ -+ if (d->one_operand_p) -+ return false; -+ if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32) -+ ; -+ else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode)) -+ ; -+ else -+ return false; -+ -+ nelt = d->nelt; -+ if (d->perm[0] != 0 && d->perm[0] != nelt / 2) -+ return false; -+ for (i = 0; i < nelt; i += 2) -+ if (d->perm[i] != d->perm[0] + i / 2 -+ || d->perm[i + 1] != d->perm[0] + i / 2 + nelt) -+ return false; -+ -+ if (d->testing_p) -+ return true; -+ -+ switch (d->vmode) -+ { -+ case E_V32QImode: -+ if (d->perm[0]) -+ gen = gen_vec_interleave_highv32qi; -+ else -+ gen = gen_vec_interleave_lowv32qi; -+ break; -+ case E_V16HImode: -+ if (d->perm[0]) -+ gen = gen_vec_interleave_highv16hi; -+ else -+ gen = gen_vec_interleave_lowv16hi; -+ break; -+ case E_V8SImode: -+ if (d->perm[0]) -+ gen = gen_vec_interleave_highv8si; -+ else -+ gen = gen_vec_interleave_lowv8si; -+ break; -+ case E_V4DImode: -+ if (d->perm[0]) -+ gen = gen_vec_interleave_highv4di; -+ else -+ gen = gen_vec_interleave_lowv4di; -+ break; -+ case E_V8SFmode: -+ if (d->perm[0]) -+ gen = gen_vec_interleave_highv8sf; -+ else -+ gen = gen_vec_interleave_lowv8sf; -+ break; -+ case E_V4DFmode: -+ if (d->perm[0]) -+ gen = gen_vec_interleave_highv4df; -+ else -+ gen = gen_vec_interleave_lowv4df; -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ -+ emit_insn (gen (d->target, d->op0, d->op1)); -+ return true; -+} -+ -+/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement -+ a single vector permutation using a single intra-lane vector -+ permutation, vperm2f128 swapping the lanes and vblend* insn blending -+ the non-swapped and swapped vectors together. */ -+ -+static bool -+expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d) -+{ -+ struct expand_vec_perm_d dfirst, dsecond; -+ unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2; -+ rtx_insn *seq; -+ bool ok; -+ rtx (*blend) (rtx, rtx, rtx, rtx) = NULL; -+ -+ if (!TARGET_AVX -+ || TARGET_AVX2 -+ || (d->vmode != V8SFmode && d->vmode != V4DFmode) -+ || !d->one_operand_p) -+ return false; -+ -+ dfirst = *d; -+ for (i = 0; i < nelt; i++) -+ dfirst.perm[i] = 0xff; -+ for (i = 0, msk = 0; i < nelt; i++) -+ { -+ j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2; -+ if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i]) -+ return false; -+ dfirst.perm[j] = d->perm[i]; -+ if (j != i) -+ msk |= (1 << i); -+ } -+ for (i = 0; i < nelt; i++) -+ if (dfirst.perm[i] == 0xff) -+ dfirst.perm[i] = i; -+ -+ if (!d->testing_p) -+ dfirst.target = gen_reg_rtx (dfirst.vmode); -+ -+ start_sequence (); -+ ok = expand_vec_perm_1 (&dfirst); -+ seq = get_insns (); -+ end_sequence (); -+ -+ if (!ok) -+ return false; -+ -+ if (d->testing_p) -+ return true; -+ -+ emit_insn (seq); -+ -+ dsecond = *d; -+ dsecond.op0 = dfirst.target; -+ dsecond.op1 = dfirst.target; -+ dsecond.one_operand_p = true; -+ dsecond.target = gen_reg_rtx (dsecond.vmode); -+ for (i = 0; i < nelt; i++) -+ dsecond.perm[i] = i ^ nelt2; -+ -+ ok = expand_vec_perm_1 (&dsecond); -+ gcc_assert (ok); -+ -+ blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256; -+ emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk))); -+ return true; -+} -+ -+/* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF -+ permutation using two vperm2f128, followed by a vshufpd insn blending -+ the two vectors together. */ -+ -+static bool -+expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d) -+{ -+ struct expand_vec_perm_d dfirst, dsecond, dthird; -+ bool ok; -+ -+ if (!TARGET_AVX || (d->vmode != V4DFmode)) -+ return false; -+ -+ if (d->testing_p) -+ return true; -+ -+ dfirst = *d; -+ dsecond = *d; -+ dthird = *d; -+ -+ dfirst.perm[0] = (d->perm[0] & ~1); -+ dfirst.perm[1] = (d->perm[0] & ~1) + 1; -+ dfirst.perm[2] = (d->perm[2] & ~1); -+ dfirst.perm[3] = (d->perm[2] & ~1) + 1; -+ dsecond.perm[0] = (d->perm[1] & ~1); -+ dsecond.perm[1] = (d->perm[1] & ~1) + 1; -+ dsecond.perm[2] = (d->perm[3] & ~1); -+ dsecond.perm[3] = (d->perm[3] & ~1) + 1; -+ dthird.perm[0] = (d->perm[0] % 2); -+ dthird.perm[1] = (d->perm[1] % 2) + 4; -+ dthird.perm[2] = (d->perm[2] % 2) + 2; -+ dthird.perm[3] = (d->perm[3] % 2) + 6; -+ -+ dfirst.target = gen_reg_rtx (dfirst.vmode); -+ dsecond.target = gen_reg_rtx (dsecond.vmode); -+ dthird.op0 = dfirst.target; -+ dthird.op1 = dsecond.target; -+ dthird.one_operand_p = false; -+ -+ canonicalize_perm (&dfirst); -+ canonicalize_perm (&dsecond); -+ -+ ok = expand_vec_perm_1 (&dfirst) -+ && expand_vec_perm_1 (&dsecond) -+ && expand_vec_perm_1 (&dthird); -+ -+ gcc_assert (ok); -+ -+ return true; -+} -+ -+/* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word -+ permutation with two pshufb insns and an ior. We should have already -+ failed all two instruction sequences. */ -+ -+static bool -+expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d) -+{ -+ rtx rperm[2][16], vperm, l, h, op, m128; -+ unsigned int i, nelt, eltsz; -+ -+ if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16) -+ return false; -+ gcc_assert (!d->one_operand_p); -+ -+ if (d->testing_p) -+ return true; -+ -+ nelt = d->nelt; -+ eltsz = GET_MODE_UNIT_SIZE (d->vmode); -+ -+ /* Generate two permutation masks. If the required element is within -+ the given vector it is shuffled into the proper lane. If the required -+ element is in the other vector, force a zero into the lane by setting -+ bit 7 in the permutation mask. */ -+ m128 = GEN_INT (-128); -+ for (i = 0; i < nelt; ++i) -+ { -+ unsigned j, e = d->perm[i]; -+ unsigned which = (e >= nelt); -+ if (e >= nelt) -+ e -= nelt; -+ -+ for (j = 0; j < eltsz; ++j) -+ { -+ rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j); -+ rperm[1-which][i*eltsz + j] = m128; -+ } -+ } -+ -+ vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0])); -+ vperm = force_reg (V16QImode, vperm); -+ -+ l = gen_reg_rtx (V16QImode); -+ op = gen_lowpart (V16QImode, d->op0); -+ emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm)); -+ -+ vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1])); -+ vperm = force_reg (V16QImode, vperm); -+ -+ h = gen_reg_rtx (V16QImode); -+ op = gen_lowpart (V16QImode, d->op1); -+ emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm)); -+ -+ op = d->target; -+ if (d->vmode != V16QImode) -+ op = gen_reg_rtx (V16QImode); -+ emit_insn (gen_iorv16qi3 (op, l, h)); -+ if (op != d->target) -+ emit_move_insn (d->target, gen_lowpart (d->vmode, op)); -+ -+ return true; -+} -+ -+/* Implement arbitrary permutation of one V32QImode and V16QImode operand -+ with two vpshufb insns, vpermq and vpor. We should have already failed -+ all two or three instruction sequences. */ -+ -+static bool -+expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d) -+{ -+ rtx rperm[2][32], vperm, l, h, hp, op, m128; -+ unsigned int i, nelt, eltsz; -+ -+ if (!TARGET_AVX2 -+ || !d->one_operand_p -+ || (d->vmode != V32QImode && d->vmode != V16HImode)) -+ return false; -+ -+ if (d->testing_p) -+ return true; -+ -+ nelt = d->nelt; -+ eltsz = GET_MODE_UNIT_SIZE (d->vmode); -+ -+ /* Generate two permutation masks. If the required element is within -+ the same lane, it is shuffled in. If the required element from the -+ other lane, force a zero by setting bit 7 in the permutation mask. -+ In the other mask the mask has non-negative elements if element -+ is requested from the other lane, but also moved to the other lane, -+ so that the result of vpshufb can have the two V2TImode halves -+ swapped. */ -+ m128 = GEN_INT (-128); -+ for (i = 0; i < nelt; ++i) -+ { -+ unsigned j, e = d->perm[i] & (nelt / 2 - 1); -+ unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz; -+ -+ for (j = 0; j < eltsz; ++j) -+ { -+ rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j); -+ rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128; -+ } -+ } -+ -+ vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1])); -+ vperm = force_reg (V32QImode, vperm); -+ -+ h = gen_reg_rtx (V32QImode); -+ op = gen_lowpart (V32QImode, d->op0); -+ emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm)); -+ -+ /* Swap the 128-byte lanes of h into hp. */ -+ hp = gen_reg_rtx (V4DImode); -+ op = gen_lowpart (V4DImode, h); -+ emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx, -+ const1_rtx)); -+ -+ vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0])); -+ vperm = force_reg (V32QImode, vperm); -+ -+ l = gen_reg_rtx (V32QImode); -+ op = gen_lowpart (V32QImode, d->op0); -+ emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm)); -+ -+ op = d->target; -+ if (d->vmode != V32QImode) -+ op = gen_reg_rtx (V32QImode); -+ emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp))); -+ if (op != d->target) -+ emit_move_insn (d->target, gen_lowpart (d->vmode, op)); -+ -+ return true; -+} -+ -+/* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even -+ and extract-odd permutations of two V32QImode and V16QImode operand -+ with two vpshufb insns, vpor and vpermq. We should have already -+ failed all two or three instruction sequences. */ -+ -+static bool -+expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d) -+{ -+ rtx rperm[2][32], vperm, l, h, ior, op, m128; -+ unsigned int i, nelt, eltsz; -+ -+ if (!TARGET_AVX2 -+ || d->one_operand_p -+ || (d->vmode != V32QImode && d->vmode != V16HImode)) -+ return false; -+ -+ for (i = 0; i < d->nelt; ++i) -+ if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2)) -+ return false; -+ -+ if (d->testing_p) -+ return true; -+ -+ nelt = d->nelt; -+ eltsz = GET_MODE_UNIT_SIZE (d->vmode); -+ -+ /* Generate two permutation masks. In the first permutation mask -+ the first quarter will contain indexes for the first half -+ of the op0, the second quarter will contain bit 7 set, third quarter -+ will contain indexes for the second half of the op0 and the -+ last quarter bit 7 set. In the second permutation mask -+ the first quarter will contain bit 7 set, the second quarter -+ indexes for the first half of the op1, the third quarter bit 7 set -+ and last quarter indexes for the second half of the op1. -+ I.e. the first mask e.g. for V32QImode extract even will be: -+ 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128 -+ (all values masked with 0xf except for -128) and second mask -+ for extract even will be -+ -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */ -+ m128 = GEN_INT (-128); -+ for (i = 0; i < nelt; ++i) -+ { -+ unsigned j, e = d->perm[i] & (nelt / 2 - 1); -+ unsigned which = d->perm[i] >= nelt; -+ unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0; -+ -+ for (j = 0; j < eltsz; ++j) -+ { -+ rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j); -+ rperm[1 - which][(i * eltsz + j) ^ xorv] = m128; -+ } -+ } -+ -+ vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0])); -+ vperm = force_reg (V32QImode, vperm); -+ -+ l = gen_reg_rtx (V32QImode); -+ op = gen_lowpart (V32QImode, d->op0); -+ emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm)); -+ -+ vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1])); -+ vperm = force_reg (V32QImode, vperm); -+ -+ h = gen_reg_rtx (V32QImode); -+ op = gen_lowpart (V32QImode, d->op1); -+ emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm)); -+ -+ ior = gen_reg_rtx (V32QImode); -+ emit_insn (gen_iorv32qi3 (ior, l, h)); -+ -+ /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */ -+ op = gen_reg_rtx (V4DImode); -+ ior = gen_lowpart (V4DImode, ior); -+ emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx, -+ const1_rtx, GEN_INT (3))); -+ emit_move_insn (d->target, gen_lowpart (d->vmode, op)); -+ -+ return true; -+} -+ -+/* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even -+ and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands -+ with two "and" and "pack" or two "shift" and "pack" insns. We should -+ have already failed all two instruction sequences. */ -+ -+static bool -+expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d) -+{ -+ rtx op, dop0, dop1, t; -+ unsigned i, odd, c, s, nelt = d->nelt; -+ bool end_perm = false; -+ machine_mode half_mode; -+ rtx (*gen_and) (rtx, rtx, rtx); -+ rtx (*gen_pack) (rtx, rtx, rtx); -+ rtx (*gen_shift) (rtx, rtx, rtx); -+ -+ if (d->one_operand_p) -+ return false; -+ -+ switch (d->vmode) -+ { -+ case E_V8HImode: -+ /* Required for "pack". */ -+ if (!TARGET_SSE4_1) -+ return false; -+ c = 0xffff; -+ s = 16; -+ half_mode = V4SImode; -+ gen_and = gen_andv4si3; -+ gen_pack = gen_sse4_1_packusdw; -+ gen_shift = gen_lshrv4si3; -+ break; -+ case E_V16QImode: -+ /* No check as all instructions are SSE2. */ -+ c = 0xff; -+ s = 8; -+ half_mode = V8HImode; -+ gen_and = gen_andv8hi3; -+ gen_pack = gen_sse2_packuswb; -+ gen_shift = gen_lshrv8hi3; -+ break; -+ case E_V16HImode: -+ if (!TARGET_AVX2) -+ return false; -+ c = 0xffff; -+ s = 16; -+ half_mode = V8SImode; -+ gen_and = gen_andv8si3; -+ gen_pack = gen_avx2_packusdw; -+ gen_shift = gen_lshrv8si3; -+ end_perm = true; -+ break; -+ case E_V32QImode: -+ if (!TARGET_AVX2) -+ return false; -+ c = 0xff; -+ s = 8; -+ half_mode = V16HImode; -+ gen_and = gen_andv16hi3; -+ gen_pack = gen_avx2_packuswb; -+ gen_shift = gen_lshrv16hi3; -+ end_perm = true; -+ break; -+ default: -+ /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than -+ general shuffles. */ -+ return false; -+ } -+ -+ /* Check that permutation is even or odd. */ -+ odd = d->perm[0]; -+ if (odd > 1) -+ return false; -+ -+ for (i = 1; i < nelt; ++i) -+ if (d->perm[i] != 2 * i + odd) -+ return false; -+ -+ if (d->testing_p) -+ return true; -+ -+ dop0 = gen_reg_rtx (half_mode); -+ dop1 = gen_reg_rtx (half_mode); -+ if (odd == 0) -+ { -+ t = gen_const_vec_duplicate (half_mode, GEN_INT (c)); -+ t = force_reg (half_mode, t); -+ emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0))); -+ emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1))); -+ } -+ else -+ { -+ emit_insn (gen_shift (dop0, -+ gen_lowpart (half_mode, d->op0), -+ GEN_INT (s))); -+ emit_insn (gen_shift (dop1, -+ gen_lowpart (half_mode, d->op1), -+ GEN_INT (s))); -+ } -+ /* In AVX2 for 256 bit case we need to permute pack result. */ -+ if (TARGET_AVX2 && end_perm) -+ { -+ op = gen_reg_rtx (d->vmode); -+ t = gen_reg_rtx (V4DImode); -+ emit_insn (gen_pack (op, dop0, dop1)); -+ emit_insn (gen_avx2_permv4di_1 (t, -+ gen_lowpart (V4DImode, op), -+ const0_rtx, -+ const2_rtx, -+ const1_rtx, -+ GEN_INT (3))); -+ emit_move_insn (d->target, gen_lowpart (d->vmode, t)); -+ } -+ else -+ emit_insn (gen_pack (d->target, dop0, dop1)); -+ -+ return true; -+} -+ -+/* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even -+ and extract-odd permutations of two V64QI operands -+ with two "shifts", two "truncs" and one "concat" insns for "odd" -+ and two "truncs" and one concat insn for "even." -+ Have already failed all two instruction sequences. */ -+ -+static bool -+expand_vec_perm_even_odd_trunc (struct expand_vec_perm_d *d) -+{ -+ rtx t1, t2, t3, t4; -+ unsigned i, odd, nelt = d->nelt; -+ -+ if (!TARGET_AVX512BW -+ || d->one_operand_p -+ || d->vmode != V64QImode) -+ return false; -+ -+ /* Check that permutation is even or odd. */ -+ odd = d->perm[0]; -+ if (odd > 1) -+ return false; -+ -+ for (i = 1; i < nelt; ++i) -+ if (d->perm[i] != 2 * i + odd) -+ return false; -+ -+ if (d->testing_p) -+ return true; -+ -+ -+ if (odd) -+ { -+ t1 = gen_reg_rtx (V32HImode); -+ t2 = gen_reg_rtx (V32HImode); -+ emit_insn (gen_lshrv32hi3 (t1, -+ gen_lowpart (V32HImode, d->op0), -+ GEN_INT (8))); -+ emit_insn (gen_lshrv32hi3 (t2, -+ gen_lowpart (V32HImode, d->op1), -+ GEN_INT (8))); -+ } -+ else -+ { -+ t1 = gen_lowpart (V32HImode, d->op0); -+ t2 = gen_lowpart (V32HImode, d->op1); -+ } -+ -+ t3 = gen_reg_rtx (V32QImode); -+ t4 = gen_reg_rtx (V32QImode); -+ emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t3, t1)); -+ emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t4, t2)); -+ emit_insn (gen_avx_vec_concatv64qi (d->target, t3, t4)); -+ -+ return true; -+} -+ -+/* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even -+ and extract-odd permutations. */ -+ -+static bool -+expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd) -+{ -+ rtx t1, t2, t3, t4, t5; -+ -+ switch (d->vmode) -+ { -+ case E_V4DFmode: -+ if (d->testing_p) -+ break; -+ t1 = gen_reg_rtx (V4DFmode); -+ t2 = gen_reg_rtx (V4DFmode); -+ -+ /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */ -+ emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20))); -+ emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31))); -+ -+ /* Now an unpck[lh]pd will produce the result required. */ -+ if (odd) -+ t3 = gen_avx_unpckhpd256 (d->target, t1, t2); -+ else -+ t3 = gen_avx_unpcklpd256 (d->target, t1, t2); -+ emit_insn (t3); -+ break; -+ -+ case E_V8SFmode: -+ { -+ int mask = odd ? 0xdd : 0x88; -+ -+ if (d->testing_p) -+ break; -+ t1 = gen_reg_rtx (V8SFmode); -+ t2 = gen_reg_rtx (V8SFmode); -+ t3 = gen_reg_rtx (V8SFmode); -+ -+ /* Shuffle within the 128-bit lanes to produce: -+ { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */ -+ emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1, -+ GEN_INT (mask))); -+ -+ /* Shuffle the lanes around to produce: -+ { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */ -+ emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1, -+ GEN_INT (0x3))); -+ -+ /* Shuffle within the 128-bit lanes to produce: -+ { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */ -+ emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44))); -+ -+ /* Shuffle within the 128-bit lanes to produce: -+ { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */ -+ emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee))); -+ -+ /* Shuffle the lanes around to produce: -+ { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */ -+ emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2, -+ GEN_INT (0x20))); -+ } -+ break; -+ -+ case E_V2DFmode: -+ case E_V4SFmode: -+ case E_V2DImode: -+ case E_V4SImode: -+ /* These are always directly implementable by expand_vec_perm_1. */ -+ gcc_unreachable (); -+ -+ case E_V8HImode: -+ if (TARGET_SSE4_1) -+ return expand_vec_perm_even_odd_pack (d); -+ else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB) -+ return expand_vec_perm_pshufb2 (d); -+ else -+ { -+ if (d->testing_p) -+ break; -+ /* We need 2*log2(N)-1 operations to achieve odd/even -+ with interleave. */ -+ t1 = gen_reg_rtx (V8HImode); -+ t2 = gen_reg_rtx (V8HImode); -+ emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1)); -+ emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1)); -+ emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1)); -+ emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1)); -+ if (odd) -+ t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2); -+ else -+ t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2); -+ emit_insn (t3); -+ } -+ break; -+ -+ case E_V16QImode: -+ return expand_vec_perm_even_odd_pack (d); -+ -+ case E_V16HImode: -+ case E_V32QImode: -+ return expand_vec_perm_even_odd_pack (d); -+ -+ case E_V64QImode: -+ return expand_vec_perm_even_odd_trunc (d); -+ -+ case E_V4DImode: -+ if (!TARGET_AVX2) -+ { -+ struct expand_vec_perm_d d_copy = *d; -+ d_copy.vmode = V4DFmode; -+ if (d->testing_p) -+ d_copy.target = gen_raw_REG (V4DFmode, LAST_VIRTUAL_REGISTER + 1); -+ else -+ d_copy.target = gen_reg_rtx (V4DFmode); -+ d_copy.op0 = gen_lowpart (V4DFmode, d->op0); -+ d_copy.op1 = gen_lowpart (V4DFmode, d->op1); -+ if (expand_vec_perm_even_odd_1 (&d_copy, odd)) -+ { -+ if (!d->testing_p) -+ emit_move_insn (d->target, -+ gen_lowpart (V4DImode, d_copy.target)); -+ return true; -+ } -+ return false; -+ } -+ -+ if (d->testing_p) -+ break; -+ -+ t1 = gen_reg_rtx (V4DImode); -+ t2 = gen_reg_rtx (V4DImode); -+ -+ /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */ -+ emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20))); -+ emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31))); -+ -+ /* Now an vpunpck[lh]qdq will produce the result required. */ -+ if (odd) -+ t3 = gen_avx2_interleave_highv4di (d->target, t1, t2); -+ else -+ t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2); -+ emit_insn (t3); -+ break; -+ -+ case E_V8SImode: -+ if (!TARGET_AVX2) -+ { -+ struct expand_vec_perm_d d_copy = *d; -+ d_copy.vmode = V8SFmode; -+ if (d->testing_p) -+ d_copy.target = gen_raw_REG (V8SFmode, LAST_VIRTUAL_REGISTER + 1); -+ else -+ d_copy.target = gen_reg_rtx (V8SFmode); -+ d_copy.op0 = gen_lowpart (V8SFmode, d->op0); -+ d_copy.op1 = gen_lowpart (V8SFmode, d->op1); -+ if (expand_vec_perm_even_odd_1 (&d_copy, odd)) -+ { -+ if (!d->testing_p) -+ emit_move_insn (d->target, -+ gen_lowpart (V8SImode, d_copy.target)); -+ return true; -+ } -+ return false; -+ } -+ -+ if (d->testing_p) -+ break; -+ -+ t1 = gen_reg_rtx (V8SImode); -+ t2 = gen_reg_rtx (V8SImode); -+ t3 = gen_reg_rtx (V4DImode); -+ t4 = gen_reg_rtx (V4DImode); -+ t5 = gen_reg_rtx (V4DImode); -+ -+ /* Shuffle the lanes around into -+ { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */ -+ emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0), -+ gen_lowpart (V4DImode, d->op1), -+ GEN_INT (0x20))); -+ emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0), -+ gen_lowpart (V4DImode, d->op1), -+ GEN_INT (0x31))); -+ -+ /* Swap the 2nd and 3rd position in each lane into -+ { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */ -+ emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3), -+ GEN_INT (2 * 4 + 1 * 16 + 3 * 64))); -+ emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4), -+ GEN_INT (2 * 4 + 1 * 16 + 3 * 64))); -+ -+ /* Now an vpunpck[lh]qdq will produce -+ { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */ -+ if (odd) -+ t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1), -+ gen_lowpart (V4DImode, t2)); -+ else -+ t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1), -+ gen_lowpart (V4DImode, t2)); -+ emit_insn (t3); -+ emit_move_insn (d->target, gen_lowpart (V8SImode, t5)); -+ break; -+ -+ default: -+ gcc_unreachable (); -+ } -+ -+ return true; -+} -+ -+/* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match -+ extract-even and extract-odd permutations. */ -+ -+static bool -+expand_vec_perm_even_odd (struct expand_vec_perm_d *d) -+{ -+ unsigned i, odd, nelt = d->nelt; -+ -+ odd = d->perm[0]; -+ if (odd != 0 && odd != 1) -+ return false; -+ -+ for (i = 1; i < nelt; ++i) -+ if (d->perm[i] != 2 * i + odd) -+ return false; -+ -+ return expand_vec_perm_even_odd_1 (d, odd); -+} -+ -+/* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast -+ permutations. We assume that expand_vec_perm_1 has already failed. */ -+ -+static bool -+expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d) -+{ -+ unsigned elt = d->perm[0], nelt2 = d->nelt / 2; -+ machine_mode vmode = d->vmode; -+ unsigned char perm2[4]; -+ rtx op0 = d->op0, dest; -+ bool ok; -+ -+ switch (vmode) -+ { -+ case E_V4DFmode: -+ case E_V8SFmode: -+ /* These are special-cased in sse.md so that we can optionally -+ use the vbroadcast instruction. They expand to two insns -+ if the input happens to be in a register. */ -+ gcc_unreachable (); -+ -+ case E_V2DFmode: -+ case E_V2DImode: -+ case E_V4SFmode: -+ case E_V4SImode: -+ /* These are always implementable using standard shuffle patterns. */ -+ gcc_unreachable (); -+ -+ case E_V8HImode: -+ case E_V16QImode: -+ /* These can be implemented via interleave. We save one insn by -+ stopping once we have promoted to V4SImode and then use pshufd. */ -+ if (d->testing_p) -+ return true; -+ do -+ { -+ rtx dest; -+ rtx (*gen) (rtx, rtx, rtx) -+ = vmode == V16QImode ? gen_vec_interleave_lowv16qi -+ : gen_vec_interleave_lowv8hi; -+ -+ if (elt >= nelt2) -+ { -+ gen = vmode == V16QImode ? gen_vec_interleave_highv16qi -+ : gen_vec_interleave_highv8hi; -+ elt -= nelt2; -+ } -+ nelt2 /= 2; -+ -+ dest = gen_reg_rtx (vmode); -+ emit_insn (gen (dest, op0, op0)); -+ vmode = get_mode_wider_vector (vmode); -+ op0 = gen_lowpart (vmode, dest); -+ } -+ while (vmode != V4SImode); -+ -+ memset (perm2, elt, 4); -+ dest = gen_reg_rtx (V4SImode); -+ ok = expand_vselect (dest, op0, perm2, 4, d->testing_p); -+ gcc_assert (ok); -+ if (!d->testing_p) -+ emit_move_insn (d->target, gen_lowpart (d->vmode, dest)); -+ return true; -+ -+ case E_V64QImode: -+ case E_V32QImode: -+ case E_V16HImode: -+ case E_V8SImode: -+ case E_V4DImode: -+ /* For AVX2 broadcasts of the first element vpbroadcast* or -+ vpermq should be used by expand_vec_perm_1. */ -+ gcc_assert (!TARGET_AVX2 || d->perm[0]); -+ return false; -+ -+ default: -+ gcc_unreachable (); -+ } -+} -+ -+/* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match -+ broadcast permutations. */ -+ -+static bool -+expand_vec_perm_broadcast (struct expand_vec_perm_d *d) -+{ -+ unsigned i, elt, nelt = d->nelt; -+ -+ if (!d->one_operand_p) -+ return false; -+ -+ elt = d->perm[0]; -+ for (i = 1; i < nelt; ++i) -+ if (d->perm[i] != elt) -+ return false; -+ -+ return expand_vec_perm_broadcast_1 (d); -+} -+ -+/* Implement arbitrary permutations of two V64QImode operands -+ with 2 vperm[it]2w, 2 vpshufb and one vpor instruction. */ -+static bool -+expand_vec_perm_vpermt2_vpshub2 (struct expand_vec_perm_d *d) -+{ -+ if (!TARGET_AVX512BW || !(d->vmode == V64QImode)) -+ return false; -+ -+ if (d->testing_p) -+ return true; -+ -+ struct expand_vec_perm_d ds[2]; -+ rtx rperm[128], vperm, target0, target1; -+ unsigned int i, nelt; -+ machine_mode vmode; -+ -+ nelt = d->nelt; -+ vmode = V64QImode; -+ -+ for (i = 0; i < 2; i++) -+ { -+ ds[i] = *d; -+ ds[i].vmode = V32HImode; -+ ds[i].nelt = 32; -+ ds[i].target = gen_reg_rtx (V32HImode); -+ ds[i].op0 = gen_lowpart (V32HImode, d->op0); -+ ds[i].op1 = gen_lowpart (V32HImode, d->op1); -+ } -+ -+ /* Prepare permutations such that the first one takes care of -+ putting the even bytes into the right positions or one higher -+ positions (ds[0]) and the second one takes care of -+ putting the odd bytes into the right positions or one below -+ (ds[1]). */ -+ -+ for (i = 0; i < nelt; i++) -+ { -+ ds[i & 1].perm[i / 2] = d->perm[i] / 2; -+ if (i & 1) -+ { -+ rperm[i] = constm1_rtx; -+ rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1)); -+ } -+ else -+ { -+ rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1)); -+ rperm[i + 64] = constm1_rtx; -+ } -+ } -+ -+ bool ok = expand_vec_perm_1 (&ds[0]); -+ gcc_assert (ok); -+ ds[0].target = gen_lowpart (V64QImode, ds[0].target); -+ -+ ok = expand_vec_perm_1 (&ds[1]); -+ gcc_assert (ok); -+ ds[1].target = gen_lowpart (V64QImode, ds[1].target); -+ -+ vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm)); -+ vperm = force_reg (vmode, vperm); -+ target0 = gen_reg_rtx (V64QImode); -+ emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm)); -+ -+ vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64)); -+ vperm = force_reg (vmode, vperm); -+ target1 = gen_reg_rtx (V64QImode); -+ emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm)); -+ -+ emit_insn (gen_iorv64qi3 (d->target, target0, target1)); -+ return true; -+} -+ -+/* Implement arbitrary permutation of two V32QImode and V16QImode operands -+ with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed -+ all the shorter instruction sequences. */ -+ -+static bool -+expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d) -+{ -+ rtx rperm[4][32], vperm, l[2], h[2], op, m128; -+ unsigned int i, nelt, eltsz; -+ bool used[4]; -+ -+ if (!TARGET_AVX2 -+ || d->one_operand_p -+ || (d->vmode != V32QImode && d->vmode != V16HImode)) -+ return false; -+ -+ if (d->testing_p) -+ return true; -+ -+ nelt = d->nelt; -+ eltsz = GET_MODE_UNIT_SIZE (d->vmode); -+ -+ /* Generate 4 permutation masks. If the required element is within -+ the same lane, it is shuffled in. If the required element from the -+ other lane, force a zero by setting bit 7 in the permutation mask. -+ In the other mask the mask has non-negative elements if element -+ is requested from the other lane, but also moved to the other lane, -+ so that the result of vpshufb can have the two V2TImode halves -+ swapped. */ -+ m128 = GEN_INT (-128); -+ for (i = 0; i < 32; ++i) -+ { -+ rperm[0][i] = m128; -+ rperm[1][i] = m128; -+ rperm[2][i] = m128; -+ rperm[3][i] = m128; -+ } -+ used[0] = false; -+ used[1] = false; -+ used[2] = false; -+ used[3] = false; -+ for (i = 0; i < nelt; ++i) -+ { -+ unsigned j, e = d->perm[i] & (nelt / 2 - 1); -+ unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz; -+ unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0); -+ -+ for (j = 0; j < eltsz; ++j) -+ rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j); -+ used[which] = true; -+ } -+ -+ for (i = 0; i < 2; ++i) -+ { -+ if (!used[2 * i + 1]) -+ { -+ h[i] = NULL_RTX; -+ continue; -+ } -+ vperm = gen_rtx_CONST_VECTOR (V32QImode, -+ gen_rtvec_v (32, rperm[2 * i + 1])); -+ vperm = force_reg (V32QImode, vperm); -+ h[i] = gen_reg_rtx (V32QImode); -+ op = gen_lowpart (V32QImode, i ? d->op1 : d->op0); -+ emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm)); -+ } -+ -+ /* Swap the 128-byte lanes of h[X]. */ -+ for (i = 0; i < 2; ++i) -+ { -+ if (h[i] == NULL_RTX) -+ continue; -+ op = gen_reg_rtx (V4DImode); -+ emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]), -+ const2_rtx, GEN_INT (3), const0_rtx, -+ const1_rtx)); -+ h[i] = gen_lowpart (V32QImode, op); -+ } -+ -+ for (i = 0; i < 2; ++i) -+ { -+ if (!used[2 * i]) -+ { -+ l[i] = NULL_RTX; -+ continue; -+ } -+ vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i])); -+ vperm = force_reg (V32QImode, vperm); -+ l[i] = gen_reg_rtx (V32QImode); -+ op = gen_lowpart (V32QImode, i ? d->op1 : d->op0); -+ emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm)); -+ } -+ -+ for (i = 0; i < 2; ++i) -+ { -+ if (h[i] && l[i]) -+ { -+ op = gen_reg_rtx (V32QImode); -+ emit_insn (gen_iorv32qi3 (op, l[i], h[i])); -+ l[i] = op; -+ } -+ else if (h[i]) -+ l[i] = h[i]; -+ } -+ -+ gcc_assert (l[0] && l[1]); -+ op = d->target; -+ if (d->vmode != V32QImode) -+ op = gen_reg_rtx (V32QImode); -+ emit_insn (gen_iorv32qi3 (op, l[0], l[1])); -+ if (op != d->target) -+ emit_move_insn (d->target, gen_lowpart (d->vmode, op)); -+ return true; -+} -+ -+/* The guts of ix86_vectorize_vec_perm_const. With all of the interface bits -+ taken care of, perform the expansion in D and return true on success. */ -+ -+static bool -+ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) -+{ -+ /* Try a single instruction expansion. */ -+ if (expand_vec_perm_1 (d)) -+ return true; -+ -+ /* Try sequences of two instructions. */ -+ -+ if (expand_vec_perm_pshuflw_pshufhw (d)) -+ return true; -+ -+ if (expand_vec_perm_palignr (d, false)) -+ return true; -+ -+ if (expand_vec_perm_interleave2 (d)) -+ return true; -+ -+ if (expand_vec_perm_broadcast (d)) -+ return true; -+ -+ if (expand_vec_perm_vpermq_perm_1 (d)) -+ return true; -+ -+ if (expand_vec_perm_vperm2f128 (d)) -+ return true; -+ -+ if (expand_vec_perm_pblendv (d)) -+ return true; -+ -+ /* Try sequences of three instructions. */ -+ -+ if (expand_vec_perm_even_odd_pack (d)) -+ return true; -+ -+ if (expand_vec_perm_2vperm2f128_vshuf (d)) -+ return true; -+ -+ if (expand_vec_perm_pshufb2 (d)) -+ return true; -+ -+ if (expand_vec_perm_interleave3 (d)) -+ return true; -+ -+ if (expand_vec_perm_vperm2f128_vblend (d)) -+ return true; -+ -+ /* Try sequences of four instructions. */ -+ -+ if (expand_vec_perm_even_odd_trunc (d)) -+ return true; -+ if (expand_vec_perm_vpshufb2_vpermq (d)) -+ return true; -+ -+ if (expand_vec_perm_vpshufb2_vpermq_even_odd (d)) -+ return true; -+ -+ if (expand_vec_perm_vpermt2_vpshub2 (d)) -+ return true; -+ -+ /* ??? Look for narrow permutations whose element orderings would -+ allow the promotion to a wider mode. */ -+ -+ /* ??? Look for sequences of interleave or a wider permute that place -+ the data into the correct lanes for a half-vector shuffle like -+ pshuf[lh]w or vpermilps. */ -+ -+ /* ??? Look for sequences of interleave that produce the desired results. -+ The combinatorics of punpck[lh] get pretty ugly... */ -+ -+ if (expand_vec_perm_even_odd (d)) -+ return true; -+ -+ /* Even longer sequences. */ -+ if (expand_vec_perm_vpshufb4_vpermq2 (d)) -+ return true; -+ -+ /* See if we can get the same permutation in different vector integer -+ mode. */ -+ struct expand_vec_perm_d nd; -+ if (canonicalize_vector_int_perm (d, &nd) && expand_vec_perm_1 (&nd)) -+ { -+ if (!d->testing_p) -+ emit_move_insn (d->target, gen_lowpart (d->vmode, nd.target)); -+ return true; -+ } -+ -+ return false; -+} -+ -+/* If a permutation only uses one operand, make it clear. Returns true -+ if the permutation references both operands. */ -+ -+static bool -+canonicalize_perm (struct expand_vec_perm_d *d) -+{ -+ int i, which, nelt = d->nelt; -+ -+ for (i = which = 0; i < nelt; ++i) -+ which |= (d->perm[i] < nelt ? 1 : 2); -+ -+ d->one_operand_p = true; -+ switch (which) -+ { -+ default: -+ gcc_unreachable(); -+ -+ case 3: -+ if (!rtx_equal_p (d->op0, d->op1)) -+ { -+ d->one_operand_p = false; -+ break; -+ } -+ /* The elements of PERM do not suggest that only the first operand -+ is used, but both operands are identical. Allow easier matching -+ of the permutation by folding the permutation into the single -+ input vector. */ -+ /* FALLTHRU */ -+ -+ case 2: -+ for (i = 0; i < nelt; ++i) -+ d->perm[i] &= nelt - 1; -+ d->op0 = d->op1; -+ break; -+ -+ case 1: -+ d->op1 = d->op0; -+ break; -+ } -+ -+ return (which == 3); -+} -+ -+/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */ -+ -+bool -+ix86_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, -+ rtx op1, const vec_perm_indices &sel) -+{ -+ struct expand_vec_perm_d d; -+ unsigned char perm[MAX_VECT_LEN]; -+ unsigned int i, nelt, which; -+ bool two_args; -+ -+ d.target = target; -+ d.op0 = op0; -+ d.op1 = op1; -+ -+ d.vmode = vmode; -+ gcc_assert (VECTOR_MODE_P (d.vmode)); -+ d.nelt = nelt = GET_MODE_NUNITS (d.vmode); -+ d.testing_p = !target; -+ -+ gcc_assert (sel.length () == nelt); -+ gcc_checking_assert (sizeof (d.perm) == sizeof (perm)); -+ -+ /* Given sufficient ISA support we can just return true here -+ for selected vector modes. */ -+ switch (d.vmode) -+ { -+ case E_V16SFmode: -+ case E_V16SImode: -+ case E_V8DImode: -+ case E_V8DFmode: -+ if (!TARGET_AVX512F) -+ return false; -+ /* All implementable with a single vperm[it]2 insn. */ -+ if (d.testing_p) -+ return true; -+ break; -+ case E_V32HImode: -+ if (!TARGET_AVX512BW) -+ return false; -+ if (d.testing_p) -+ /* All implementable with a single vperm[it]2 insn. */ -+ return true; -+ break; -+ case E_V64QImode: -+ if (!TARGET_AVX512BW) -+ return false; -+ if (d.testing_p) -+ /* Implementable with 2 vperm[it]2, 2 vpshufb and 1 or insn. */ -+ return true; -+ break; -+ case E_V8SImode: -+ case E_V8SFmode: -+ case E_V4DFmode: -+ case E_V4DImode: -+ if (!TARGET_AVX) -+ return false; -+ if (d.testing_p && TARGET_AVX512VL) -+ /* All implementable with a single vperm[it]2 insn. */ -+ return true; -+ break; -+ case E_V16HImode: -+ if (!TARGET_SSE2) -+ return false; -+ if (d.testing_p && TARGET_AVX2) -+ /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */ -+ return true; -+ break; -+ case E_V32QImode: -+ if (!TARGET_SSE2) -+ return false; -+ if (d.testing_p && TARGET_AVX2) -+ /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */ -+ return true; -+ break; -+ case E_V8HImode: -+ case E_V16QImode: -+ if (!TARGET_SSE2) -+ return false; -+ /* Fall through. */ -+ case E_V4SImode: -+ case E_V4SFmode: -+ if (!TARGET_SSE) -+ return false; -+ /* All implementable with a single vpperm insn. */ -+ if (d.testing_p && TARGET_XOP) -+ return true; -+ /* All implementable with 2 pshufb + 1 ior. */ -+ if (d.testing_p && TARGET_SSSE3) -+ return true; -+ break; -+ case E_V2DImode: -+ case E_V2DFmode: -+ if (!TARGET_SSE) -+ return false; -+ /* All implementable with shufpd or unpck[lh]pd. */ -+ if (d.testing_p) -+ return true; -+ break; -+ default: -+ return false; -+ } -+ -+ for (i = which = 0; i < nelt; ++i) -+ { -+ unsigned char e = sel[i]; -+ gcc_assert (e < 2 * nelt); -+ d.perm[i] = e; -+ perm[i] = e; -+ which |= (e < nelt ? 1 : 2); -+ } -+ -+ if (d.testing_p) -+ { -+ /* For all elements from second vector, fold the elements to first. */ -+ if (which == 2) -+ for (i = 0; i < nelt; ++i) -+ d.perm[i] -= nelt; -+ -+ /* Check whether the mask can be applied to the vector type. */ -+ d.one_operand_p = (which != 3); -+ -+ /* Implementable with shufps or pshufd. */ -+ if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode)) -+ return true; -+ -+ /* Otherwise we have to go through the motions and see if we can -+ figure out how to generate the requested permutation. */ -+ d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1); -+ d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2); -+ if (!d.one_operand_p) -+ d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3); -+ -+ start_sequence (); -+ bool ret = ix86_expand_vec_perm_const_1 (&d); -+ end_sequence (); -+ -+ return ret; -+ } -+ -+ two_args = canonicalize_perm (&d); -+ -+ if (ix86_expand_vec_perm_const_1 (&d)) -+ return true; -+ -+ /* If the selector says both arguments are needed, but the operands are the -+ same, the above tried to expand with one_operand_p and flattened selector. -+ If that didn't work, retry without one_operand_p; we succeeded with that -+ during testing. */ -+ if (two_args && d.one_operand_p) -+ { -+ d.one_operand_p = false; -+ memcpy (d.perm, perm, sizeof (perm)); -+ return ix86_expand_vec_perm_const_1 (&d); -+ } -+ -+ return false; -+} -+ -+void -+ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd) -+{ -+ struct expand_vec_perm_d d; -+ unsigned i, nelt; -+ -+ d.target = targ; -+ d.op0 = op0; -+ d.op1 = op1; -+ d.vmode = GET_MODE (targ); -+ d.nelt = nelt = GET_MODE_NUNITS (d.vmode); -+ d.one_operand_p = false; -+ d.testing_p = false; -+ -+ for (i = 0; i < nelt; ++i) -+ d.perm[i] = i * 2 + odd; -+ -+ /* We'll either be able to implement the permutation directly... */ -+ if (expand_vec_perm_1 (&d)) -+ return; -+ -+ /* ... or we use the special-case patterns. */ -+ expand_vec_perm_even_odd_1 (&d, odd); -+} -+ -+static void -+ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p) -+{ -+ struct expand_vec_perm_d d; -+ unsigned i, nelt, base; -+ bool ok; -+ -+ d.target = targ; -+ d.op0 = op0; -+ d.op1 = op1; -+ d.vmode = GET_MODE (targ); -+ d.nelt = nelt = GET_MODE_NUNITS (d.vmode); -+ d.one_operand_p = false; -+ d.testing_p = false; -+ -+ base = high_p ? nelt / 2 : 0; -+ for (i = 0; i < nelt / 2; ++i) -+ { -+ d.perm[i * 2] = i + base; -+ d.perm[i * 2 + 1] = i + base + nelt; -+ } -+ -+ /* Note that for AVX this isn't one instruction. */ -+ ok = ix86_expand_vec_perm_const_1 (&d); -+ gcc_assert (ok); -+} -+ -+ -+/* Expand a vector operation CODE for a V*QImode in terms of the -+ same operation on V*HImode. */ -+ -+void -+ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2) -+{ -+ machine_mode qimode = GET_MODE (dest); -+ machine_mode himode; -+ rtx (*gen_il) (rtx, rtx, rtx); -+ rtx (*gen_ih) (rtx, rtx, rtx); -+ rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h; -+ struct expand_vec_perm_d d; -+ bool ok, full_interleave; -+ bool uns_p = false; -+ int i; -+ -+ switch (qimode) -+ { -+ case E_V16QImode: -+ himode = V8HImode; -+ gen_il = gen_vec_interleave_lowv16qi; -+ gen_ih = gen_vec_interleave_highv16qi; -+ break; -+ case E_V32QImode: -+ himode = V16HImode; -+ gen_il = gen_avx2_interleave_lowv32qi; -+ gen_ih = gen_avx2_interleave_highv32qi; -+ break; -+ case E_V64QImode: -+ himode = V32HImode; -+ gen_il = gen_avx512bw_interleave_lowv64qi; -+ gen_ih = gen_avx512bw_interleave_highv64qi; -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ -+ op2_l = op2_h = op2; -+ switch (code) -+ { -+ case MULT: -+ /* Unpack data such that we've got a source byte in each low byte of -+ each word. We don't care what goes into the high byte of each word. -+ Rather than trying to get zero in there, most convenient is to let -+ it be a copy of the low byte. */ -+ op2_l = gen_reg_rtx (qimode); -+ op2_h = gen_reg_rtx (qimode); -+ emit_insn (gen_il (op2_l, op2, op2)); -+ emit_insn (gen_ih (op2_h, op2, op2)); -+ -+ op1_l = gen_reg_rtx (qimode); -+ op1_h = gen_reg_rtx (qimode); -+ emit_insn (gen_il (op1_l, op1, op1)); -+ emit_insn (gen_ih (op1_h, op1, op1)); -+ full_interleave = qimode == V16QImode; -+ break; -+ -+ case ASHIFT: -+ case LSHIFTRT: -+ uns_p = true; -+ /* FALLTHRU */ -+ case ASHIFTRT: -+ op1_l = gen_reg_rtx (himode); -+ op1_h = gen_reg_rtx (himode); -+ ix86_expand_sse_unpack (op1_l, op1, uns_p, false); -+ ix86_expand_sse_unpack (op1_h, op1, uns_p, true); -+ full_interleave = true; -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ -+ /* Perform the operation. */ -+ res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX, -+ 1, OPTAB_DIRECT); -+ res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX, -+ 1, OPTAB_DIRECT); -+ gcc_assert (res_l && res_h); -+ -+ /* Merge the data back into the right place. */ -+ d.target = dest; -+ d.op0 = gen_lowpart (qimode, res_l); -+ d.op1 = gen_lowpart (qimode, res_h); -+ d.vmode = qimode; -+ d.nelt = GET_MODE_NUNITS (qimode); -+ d.one_operand_p = false; -+ d.testing_p = false; -+ -+ if (full_interleave) -+ { -+ /* For SSE2, we used an full interleave, so the desired -+ results are in the even elements. */ -+ for (i = 0; i < d.nelt; ++i) -+ d.perm[i] = i * 2; -+ } -+ else -+ { -+ /* For AVX, the interleave used above was not cross-lane. So the -+ extraction is evens but with the second and third quarter swapped. -+ Happily, that is even one insn shorter than even extraction. -+ For AVX512BW we have 4 lanes. We extract evens from within a lane, -+ always first from the first and then from the second source operand, -+ the index bits above the low 4 bits remains the same. -+ Thus, for d.nelt == 32 we want permutation -+ 0,2,4,..14, 32,34,36,..46, 16,18,20,..30, 48,50,52,..62 -+ and for d.nelt == 64 we want permutation -+ 0,2,4,..14, 64,66,68,..78, 16,18,20,..30, 80,82,84,..94, -+ 32,34,36,..46, 96,98,100,..110, 48,50,52,..62, 112,114,116,..126. */ -+ for (i = 0; i < d.nelt; ++i) -+ d.perm[i] = ((i * 2) & 14) + ((i & 8) ? d.nelt : 0) + (i & ~15); -+ } -+ -+ ok = ix86_expand_vec_perm_const_1 (&d); -+ gcc_assert (ok); -+ -+ set_unique_reg_note (get_last_insn (), REG_EQUAL, -+ gen_rtx_fmt_ee (code, qimode, op1, op2)); -+} -+ -+/* Helper function of ix86_expand_mul_widen_evenodd. Return true -+ if op is CONST_VECTOR with all odd elements equal to their -+ preceding element. */ -+ -+static bool -+const_vector_equal_evenodd_p (rtx op) -+{ -+ machine_mode mode = GET_MODE (op); -+ int i, nunits = GET_MODE_NUNITS (mode); -+ if (GET_CODE (op) != CONST_VECTOR -+ || nunits != CONST_VECTOR_NUNITS (op)) -+ return false; -+ for (i = 0; i < nunits; i += 2) -+ if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1)) -+ return false; -+ return true; -+} -+ -+void -+ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2, -+ bool uns_p, bool odd_p) -+{ -+ machine_mode mode = GET_MODE (op1); -+ machine_mode wmode = GET_MODE (dest); -+ rtx x; -+ rtx orig_op1 = op1, orig_op2 = op2; -+ -+ if (!nonimmediate_operand (op1, mode)) -+ op1 = force_reg (mode, op1); -+ if (!nonimmediate_operand (op2, mode)) -+ op2 = force_reg (mode, op2); -+ -+ /* We only play even/odd games with vectors of SImode. */ -+ gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode); -+ -+ /* If we're looking for the odd results, shift those members down to -+ the even slots. For some cpus this is faster than a PSHUFD. */ -+ if (odd_p) -+ { -+ /* For XOP use vpmacsdqh, but only for smult, as it is only -+ signed. */ -+ if (TARGET_XOP && mode == V4SImode && !uns_p) -+ { -+ x = force_reg (wmode, CONST0_RTX (wmode)); -+ emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x)); -+ return; -+ } -+ -+ x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode)); -+ if (!const_vector_equal_evenodd_p (orig_op1)) -+ op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1), -+ x, NULL, 1, OPTAB_DIRECT); -+ if (!const_vector_equal_evenodd_p (orig_op2)) -+ op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2), -+ x, NULL, 1, OPTAB_DIRECT); -+ op1 = gen_lowpart (mode, op1); -+ op2 = gen_lowpart (mode, op2); -+ } -+ -+ if (mode == V16SImode) -+ { -+ if (uns_p) -+ x = gen_vec_widen_umult_even_v16si (dest, op1, op2); -+ else -+ x = gen_vec_widen_smult_even_v16si (dest, op1, op2); -+ } -+ else if (mode == V8SImode) -+ { -+ if (uns_p) -+ x = gen_vec_widen_umult_even_v8si (dest, op1, op2); -+ else -+ x = gen_vec_widen_smult_even_v8si (dest, op1, op2); -+ } -+ else if (uns_p) -+ x = gen_vec_widen_umult_even_v4si (dest, op1, op2); -+ else if (TARGET_SSE4_1) -+ x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2); -+ else -+ { -+ rtx s1, s2, t0, t1, t2; -+ -+ /* The easiest way to implement this without PMULDQ is to go through -+ the motions as if we are performing a full 64-bit multiply. With -+ the exception that we need to do less shuffling of the elements. */ -+ -+ /* Compute the sign-extension, aka highparts, of the two operands. */ -+ s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode), -+ op1, pc_rtx, pc_rtx); -+ s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode), -+ op2, pc_rtx, pc_rtx); -+ -+ /* Multiply LO(A) * HI(B), and vice-versa. */ -+ t1 = gen_reg_rtx (wmode); -+ t2 = gen_reg_rtx (wmode); -+ emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2)); -+ emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1)); -+ -+ /* Multiply LO(A) * LO(B). */ -+ t0 = gen_reg_rtx (wmode); -+ emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2)); -+ -+ /* Combine and shift the highparts into place. */ -+ t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT); -+ t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1, -+ 1, OPTAB_DIRECT); -+ -+ /* Combine high and low parts. */ -+ force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT); -+ return; -+ } -+ emit_insn (x); -+} -+ -+void -+ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2, -+ bool uns_p, bool high_p) -+{ -+ machine_mode wmode = GET_MODE (dest); -+ machine_mode mode = GET_MODE (op1); -+ rtx t1, t2, t3, t4, mask; -+ -+ switch (mode) -+ { -+ case E_V4SImode: -+ t1 = gen_reg_rtx (mode); -+ t2 = gen_reg_rtx (mode); -+ if (TARGET_XOP && !uns_p) -+ { -+ /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case, -+ shuffle the elements once so that all elements are in the right -+ place for immediate use: { A C B D }. */ -+ emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx, -+ const1_rtx, GEN_INT (3))); -+ emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx, -+ const1_rtx, GEN_INT (3))); -+ } -+ else -+ { -+ /* Put the elements into place for the multiply. */ -+ ix86_expand_vec_interleave (t1, op1, op1, high_p); -+ ix86_expand_vec_interleave (t2, op2, op2, high_p); -+ high_p = false; -+ } -+ ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p); -+ break; -+ -+ case E_V8SImode: -+ /* Shuffle the elements between the lanes. After this we -+ have { A B E F | C D G H } for each operand. */ -+ t1 = gen_reg_rtx (V4DImode); -+ t2 = gen_reg_rtx (V4DImode); -+ emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1), -+ const0_rtx, const2_rtx, -+ const1_rtx, GEN_INT (3))); -+ emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2), -+ const0_rtx, const2_rtx, -+ const1_rtx, GEN_INT (3))); -+ -+ /* Shuffle the elements within the lanes. After this we -+ have { A A B B | C C D D } or { E E F F | G G H H }. */ -+ t3 = gen_reg_rtx (V8SImode); -+ t4 = gen_reg_rtx (V8SImode); -+ mask = GEN_INT (high_p -+ ? 2 + (2 << 2) + (3 << 4) + (3 << 6) -+ : 0 + (0 << 2) + (1 << 4) + (1 << 6)); -+ emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask)); -+ emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask)); -+ -+ ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false); -+ break; -+ -+ case E_V8HImode: -+ case E_V16HImode: -+ t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX, -+ uns_p, OPTAB_DIRECT); -+ t2 = expand_binop (mode, -+ uns_p ? umul_highpart_optab : smul_highpart_optab, -+ op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT); -+ gcc_assert (t1 && t2); -+ -+ t3 = gen_reg_rtx (mode); -+ ix86_expand_vec_interleave (t3, t1, t2, high_p); -+ emit_move_insn (dest, gen_lowpart (wmode, t3)); -+ break; -+ -+ case E_V16QImode: -+ case E_V32QImode: -+ case E_V32HImode: -+ case E_V16SImode: -+ case E_V64QImode: -+ t1 = gen_reg_rtx (wmode); -+ t2 = gen_reg_rtx (wmode); -+ ix86_expand_sse_unpack (t1, op1, uns_p, high_p); -+ ix86_expand_sse_unpack (t2, op2, uns_p, high_p); -+ -+ emit_insn (gen_rtx_SET (dest, gen_rtx_MULT (wmode, t1, t2))); -+ break; -+ -+ default: -+ gcc_unreachable (); -+ } -+} -+ -+void -+ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2) -+{ -+ rtx res_1, res_2, res_3, res_4; -+ -+ res_1 = gen_reg_rtx (V4SImode); -+ res_2 = gen_reg_rtx (V4SImode); -+ res_3 = gen_reg_rtx (V2DImode); -+ res_4 = gen_reg_rtx (V2DImode); -+ ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false); -+ ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true); -+ -+ /* Move the results in element 2 down to element 1; we don't care -+ what goes in elements 2 and 3. Then we can merge the parts -+ back together with an interleave. -+ -+ Note that two other sequences were tried: -+ (1) Use interleaves at the start instead of psrldq, which allows -+ us to use a single shufps to merge things back at the end. -+ (2) Use shufps here to combine the two vectors, then pshufd to -+ put the elements in the correct order. -+ In both cases the cost of the reformatting stall was too high -+ and the overall sequence slower. */ -+ -+ emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3), -+ const0_rtx, const2_rtx, -+ const0_rtx, const0_rtx)); -+ emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4), -+ const0_rtx, const2_rtx, -+ const0_rtx, const0_rtx)); -+ res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2)); -+ -+ set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2)); -+} -+ -+void -+ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2) -+{ -+ machine_mode mode = GET_MODE (op0); -+ rtx t1, t2, t3, t4, t5, t6; -+ -+ if (TARGET_AVX512DQ && mode == V8DImode) -+ emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2)); -+ else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode) -+ emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2)); -+ else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode) -+ emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2)); -+ else if (TARGET_XOP && mode == V2DImode) -+ { -+ /* op1: A,B,C,D, op2: E,F,G,H */ -+ op1 = gen_lowpart (V4SImode, op1); -+ op2 = gen_lowpart (V4SImode, op2); -+ -+ t1 = gen_reg_rtx (V4SImode); -+ t2 = gen_reg_rtx (V4SImode); -+ t3 = gen_reg_rtx (V2DImode); -+ t4 = gen_reg_rtx (V2DImode); -+ -+ /* t1: B,A,D,C */ -+ emit_insn (gen_sse2_pshufd_1 (t1, op1, -+ GEN_INT (1), -+ GEN_INT (0), -+ GEN_INT (3), -+ GEN_INT (2))); -+ -+ /* t2: (B*E),(A*F),(D*G),(C*H) */ -+ emit_insn (gen_mulv4si3 (t2, t1, op2)); -+ -+ /* t3: (B*E)+(A*F), (D*G)+(C*H) */ -+ emit_insn (gen_xop_phadddq (t3, t2)); -+ -+ /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */ -+ emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32))); -+ -+ /* Multiply lower parts and add all */ -+ t5 = gen_reg_rtx (V2DImode); -+ emit_insn (gen_vec_widen_umult_even_v4si (t5, -+ gen_lowpart (V4SImode, op1), -+ gen_lowpart (V4SImode, op2))); -+ op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT); -+ -+ } -+ else -+ { -+ machine_mode nmode; -+ rtx (*umul) (rtx, rtx, rtx); -+ -+ if (mode == V2DImode) -+ { -+ umul = gen_vec_widen_umult_even_v4si; -+ nmode = V4SImode; -+ } -+ else if (mode == V4DImode) -+ { -+ umul = gen_vec_widen_umult_even_v8si; -+ nmode = V8SImode; -+ } -+ else if (mode == V8DImode) -+ { -+ umul = gen_vec_widen_umult_even_v16si; -+ nmode = V16SImode; -+ } -+ else -+ gcc_unreachable (); -+ -+ -+ /* Multiply low parts. */ -+ t1 = gen_reg_rtx (mode); -+ emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2))); -+ -+ /* Shift input vectors right 32 bits so we can multiply high parts. */ -+ t6 = GEN_INT (32); -+ t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT); -+ t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT); -+ -+ /* Multiply high parts by low parts. */ -+ t4 = gen_reg_rtx (mode); -+ t5 = gen_reg_rtx (mode); -+ emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2))); -+ emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1))); -+ -+ /* Combine and shift the highparts back. */ -+ t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT); -+ t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT); -+ -+ /* Combine high and low parts. */ -+ force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT); -+ } -+ -+ set_unique_reg_note (get_last_insn (), REG_EQUAL, -+ gen_rtx_MULT (mode, op1, op2)); -+} -+ -+/* Return 1 if control tansfer instruction INSN -+ should be encoded with notrack prefix. */ -+ -+bool -+ix86_notrack_prefixed_insn_p (rtx insn) -+{ -+ if (!insn || !((flag_cf_protection & CF_BRANCH))) -+ return false; -+ -+ if (CALL_P (insn)) -+ { -+ rtx call = get_call_rtx_from (insn); -+ gcc_assert (call != NULL_RTX); -+ rtx addr = XEXP (call, 0); -+ -+ /* Do not emit 'notrack' if it's not an indirect call. */ -+ if (MEM_P (addr) -+ && GET_CODE (XEXP (addr, 0)) == SYMBOL_REF) -+ return false; -+ else -+ return find_reg_note (insn, REG_CALL_NOCF_CHECK, 0); -+ } -+ -+ if (JUMP_P (insn) && !flag_cet_switch) -+ { -+ rtx target = JUMP_LABEL (insn); -+ if (target == NULL_RTX || ANY_RETURN_P (target)) -+ return false; -+ -+ /* Check the jump is a switch table. */ -+ rtx_insn *label = as_a (target); -+ rtx_insn *table = next_insn (label); -+ if (table == NULL_RTX || !JUMP_TABLE_DATA_P (table)) -+ return false; -+ else -+ return true; -+ } -+ return false; -+} -+ -+/* Calculate integer abs() using only SSE2 instructions. */ -+ -+void -+ix86_expand_sse2_abs (rtx target, rtx input) -+{ -+ machine_mode mode = GET_MODE (target); -+ rtx tmp0, tmp1, x; -+ -+ switch (mode) -+ { -+ case E_V2DImode: -+ case E_V4DImode: -+ /* For 64-bit signed integer X, with SSE4.2 use -+ pxor t0, t0; pcmpgtq X, t0; pxor t0, X; psubq t0, X. -+ Otherwise handle it similarly to V4SImode, except use 64 as W instead of -+ 32 and use logical instead of arithmetic right shift (which is -+ unimplemented) and subtract. */ -+ if (TARGET_SSE4_2) -+ { -+ tmp0 = gen_reg_rtx (mode); -+ tmp1 = gen_reg_rtx (mode); -+ emit_move_insn (tmp1, CONST0_RTX (mode)); -+ if (mode == E_V2DImode) -+ emit_insn (gen_sse4_2_gtv2di3 (tmp0, tmp1, input)); -+ else -+ emit_insn (gen_avx2_gtv4di3 (tmp0, tmp1, input)); -+ } -+ else -+ { -+ tmp0 = expand_simple_binop (mode, LSHIFTRT, input, -+ GEN_INT (GET_MODE_UNIT_BITSIZE (mode) -+ - 1), NULL, 0, OPTAB_DIRECT); -+ tmp0 = expand_simple_unop (mode, NEG, tmp0, NULL, false); -+ } -+ -+ tmp1 = expand_simple_binop (mode, XOR, tmp0, input, -+ NULL, 0, OPTAB_DIRECT); -+ x = expand_simple_binop (mode, MINUS, tmp1, tmp0, -+ target, 0, OPTAB_DIRECT); -+ break; -+ -+ case E_V4SImode: -+ /* For 32-bit signed integer X, the best way to calculate the absolute -+ value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */ -+ tmp0 = expand_simple_binop (mode, ASHIFTRT, input, -+ GEN_INT (GET_MODE_UNIT_BITSIZE (mode) - 1), -+ NULL, 0, OPTAB_DIRECT); -+ tmp1 = expand_simple_binop (mode, XOR, tmp0, input, -+ NULL, 0, OPTAB_DIRECT); -+ x = expand_simple_binop (mode, MINUS, tmp1, tmp0, -+ target, 0, OPTAB_DIRECT); -+ break; -+ -+ case E_V8HImode: -+ /* For 16-bit signed integer X, the best way to calculate the absolute -+ value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */ -+ tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0); -+ -+ x = expand_simple_binop (mode, SMAX, tmp0, input, -+ target, 0, OPTAB_DIRECT); -+ break; -+ -+ case E_V16QImode: -+ /* For 8-bit signed integer X, the best way to calculate the absolute -+ value of X is min ((unsigned char) X, (unsigned char) (-X)), -+ as SSE2 provides the PMINUB insn. */ -+ tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0); -+ -+ x = expand_simple_binop (V16QImode, UMIN, tmp0, input, -+ target, 0, OPTAB_DIRECT); -+ break; -+ -+ default: -+ gcc_unreachable (); -+ } -+ -+ if (x != target) -+ emit_move_insn (target, x); -+} -+ -+/* Expand an extract from a vector register through pextr insn. -+ Return true if successful. */ -+ -+bool -+ix86_expand_pextr (rtx *operands) -+{ -+ rtx dst = operands[0]; -+ rtx src = operands[1]; -+ -+ unsigned int size = INTVAL (operands[2]); -+ unsigned int pos = INTVAL (operands[3]); -+ -+ if (SUBREG_P (dst)) -+ { -+ /* Reject non-lowpart subregs. */ -+ if (SUBREG_BYTE (dst) > 0) -+ return false; -+ dst = SUBREG_REG (dst); -+ } -+ -+ if (SUBREG_P (src)) -+ { -+ pos += SUBREG_BYTE (src) * BITS_PER_UNIT; -+ src = SUBREG_REG (src); -+ } -+ -+ switch (GET_MODE (src)) -+ { -+ case E_V16QImode: -+ case E_V8HImode: -+ case E_V4SImode: -+ case E_V2DImode: -+ case E_V1TImode: -+ case E_TImode: -+ { -+ machine_mode srcmode, dstmode; -+ rtx d, pat; -+ -+ if (!int_mode_for_size (size, 0).exists (&dstmode)) -+ return false; -+ -+ switch (dstmode) -+ { -+ case E_QImode: -+ if (!TARGET_SSE4_1) -+ return false; -+ srcmode = V16QImode; -+ break; -+ -+ case E_HImode: -+ if (!TARGET_SSE2) -+ return false; -+ srcmode = V8HImode; -+ break; -+ -+ case E_SImode: -+ if (!TARGET_SSE4_1) -+ return false; -+ srcmode = V4SImode; -+ break; -+ -+ case E_DImode: -+ gcc_assert (TARGET_64BIT); -+ if (!TARGET_SSE4_1) -+ return false; -+ srcmode = V2DImode; -+ break; -+ -+ default: -+ return false; -+ } -+ -+ /* Reject extractions from misaligned positions. */ -+ if (pos & (size-1)) -+ return false; -+ -+ if (GET_MODE (dst) == dstmode) -+ d = dst; -+ else -+ d = gen_reg_rtx (dstmode); -+ -+ /* Construct insn pattern. */ -+ pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (pos / size))); -+ pat = gen_rtx_VEC_SELECT (dstmode, gen_lowpart (srcmode, src), pat); -+ -+ /* Let the rtl optimizers know about the zero extension performed. */ -+ if (dstmode == QImode || dstmode == HImode) -+ { -+ pat = gen_rtx_ZERO_EXTEND (SImode, pat); -+ d = gen_lowpart (SImode, d); -+ } -+ -+ emit_insn (gen_rtx_SET (d, pat)); -+ -+ if (d != dst) -+ emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d)); -+ return true; -+ } -+ -+ default: -+ return false; -+ } -+} -+ -+/* Expand an insert into a vector register through pinsr insn. -+ Return true if successful. */ -+ -+bool -+ix86_expand_pinsr (rtx *operands) -+{ -+ rtx dst = operands[0]; -+ rtx src = operands[3]; -+ -+ unsigned int size = INTVAL (operands[1]); -+ unsigned int pos = INTVAL (operands[2]); -+ -+ if (SUBREG_P (dst)) -+ { -+ pos += SUBREG_BYTE (dst) * BITS_PER_UNIT; -+ dst = SUBREG_REG (dst); -+ } -+ -+ switch (GET_MODE (dst)) -+ { -+ case E_V16QImode: -+ case E_V8HImode: -+ case E_V4SImode: -+ case E_V2DImode: -+ case E_V1TImode: -+ case E_TImode: -+ { -+ machine_mode srcmode, dstmode; -+ rtx (*pinsr)(rtx, rtx, rtx, rtx); -+ rtx d; -+ -+ if (!int_mode_for_size (size, 0).exists (&srcmode)) -+ return false; -+ -+ switch (srcmode) -+ { -+ case E_QImode: -+ if (!TARGET_SSE4_1) -+ return false; -+ dstmode = V16QImode; -+ pinsr = gen_sse4_1_pinsrb; -+ break; -+ -+ case E_HImode: -+ if (!TARGET_SSE2) -+ return false; -+ dstmode = V8HImode; -+ pinsr = gen_sse2_pinsrw; -+ break; -+ -+ case E_SImode: -+ if (!TARGET_SSE4_1) -+ return false; -+ dstmode = V4SImode; -+ pinsr = gen_sse4_1_pinsrd; -+ break; -+ -+ case E_DImode: -+ gcc_assert (TARGET_64BIT); -+ if (!TARGET_SSE4_1) -+ return false; -+ dstmode = V2DImode; -+ pinsr = gen_sse4_1_pinsrq; -+ break; -+ -+ default: -+ return false; -+ } -+ -+ /* Reject insertions to misaligned positions. */ -+ if (pos & (size-1)) -+ return false; -+ -+ if (SUBREG_P (src)) -+ { -+ unsigned int srcpos = SUBREG_BYTE (src); -+ -+ if (srcpos > 0) -+ { -+ rtx extr_ops[4]; -+ -+ extr_ops[0] = gen_reg_rtx (srcmode); -+ extr_ops[1] = gen_lowpart (srcmode, SUBREG_REG (src)); -+ extr_ops[2] = GEN_INT (size); -+ extr_ops[3] = GEN_INT (srcpos * BITS_PER_UNIT); -+ -+ if (!ix86_expand_pextr (extr_ops)) -+ return false; -+ -+ src = extr_ops[0]; -+ } -+ else -+ src = gen_lowpart (srcmode, SUBREG_REG (src)); -+ } -+ -+ if (GET_MODE (dst) == dstmode) -+ d = dst; -+ else -+ d = gen_reg_rtx (dstmode); -+ -+ emit_insn (pinsr (d, gen_lowpart (dstmode, dst), -+ gen_lowpart (srcmode, src), -+ GEN_INT (1 << (pos / size)))); -+ if (d != dst) -+ emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d)); -+ return true; -+ } -+ -+ default: -+ return false; -+ } -+} -+ -+/* All CPUs prefer to avoid cross-lane operations so perform reductions -+ upper against lower halves up to SSE reg size. */ -+ -+machine_mode -+ix86_split_reduction (machine_mode mode) -+{ -+ /* Reduce lowpart against highpart until we reach SSE reg width to -+ avoid cross-lane operations. */ -+ switch (mode) -+ { -+ case E_V8DImode: -+ case E_V4DImode: -+ return V2DImode; -+ case E_V16SImode: -+ case E_V8SImode: -+ return V4SImode; -+ case E_V32HImode: -+ case E_V16HImode: -+ return V8HImode; -+ case E_V64QImode: -+ case E_V32QImode: -+ return V16QImode; -+ case E_V16SFmode: -+ case E_V8SFmode: -+ return V4SFmode; -+ case E_V8DFmode: -+ case E_V4DFmode: -+ return V2DFmode; -+ default: -+ return mode; -+ } -+} -+ -+/* Generate call to __divmoddi4. */ -+ -+void -+ix86_expand_divmod_libfunc (rtx libfunc, machine_mode mode, -+ rtx op0, rtx op1, -+ rtx *quot_p, rtx *rem_p) -+{ -+ rtx rem = assign_386_stack_local (mode, SLOT_TEMP); -+ -+ rtx quot = emit_library_call_value (libfunc, NULL_RTX, LCT_NORMAL, -+ mode, op0, mode, op1, mode, -+ XEXP (rem, 0), Pmode); -+ *quot_p = quot; -+ *rem_p = rem; -+} -+ -+#include "gt-i386-expand.h" -diff --git a/gcc/config/i386/i386-expand.h b/gcc/config/i386/i386-expand.h -new file mode 100644 -index 000000000..9271bb85a ---- /dev/null -+++ b/gcc/config/i386/i386-expand.h -@@ -0,0 +1,58 @@ -+/* Copyright (C) 1988-2019 Free Software Foundation, Inc. -+ -+This file is part of GCC. -+ -+GCC is free software; you can redistribute it and/or modify -+it under the terms of the GNU General Public License as published by -+the Free Software Foundation; either version 3, or (at your option) -+any later version. -+ -+GCC is distributed in the hope that it will be useful, -+but WITHOUT ANY WARRANTY; without even the implied warranty of -+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+GNU General Public License for more details. -+ -+You should have received a copy of the GNU General Public License -+along with GCC; see the file COPYING3. If not see -+. */ -+ -+#ifndef GCC_I386_EXPAND_H -+#define GCC_I386_EXPAND_H -+ -+/* AVX512F does support 64-byte integer vector operations, -+ thus the longest vector we are faced with is V64QImode. */ -+#define MAX_VECT_LEN 64 -+ -+struct expand_vec_perm_d -+{ -+ rtx target, op0, op1; -+ unsigned char perm[MAX_VECT_LEN]; -+ machine_mode vmode; -+ unsigned char nelt; -+ bool one_operand_p; -+ bool testing_p; -+}; -+ -+rtx legitimize_tls_address (rtx x, enum tls_model model, bool for_mov); -+alias_set_type ix86_GOT_alias_set (void); -+rtx legitimize_pic_address (rtx orig, rtx reg); -+rtx legitimize_pe_coff_symbol (rtx addr, bool inreg); -+ -+bool insn_defines_reg (unsigned int regno1, unsigned int regno2, -+ rtx_insn *insn); -+void ix86_emit_binop (enum rtx_code code, machine_mode mode, rtx dst, rtx src); -+enum calling_abi ix86_function_abi (const_tree fndecl); -+bool ix86_function_ms_hook_prologue (const_tree fn); -+void warn_once_call_ms2sysv_xlogues (const char *feature); -+rtx gen_push (rtx arg); -+rtx gen_pop (rtx arg); -+rtx ix86_expand_builtin (tree exp, rtx target, rtx subtarget, -+ machine_mode mode, int ignore); -+bool ix86_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, -+ rtx op1, const vec_perm_indices &sel); -+bool ix86_notrack_prefixed_insn_p (rtx insn); -+machine_mode ix86_split_reduction (machine_mode mode); -+void ix86_expand_divmod_libfunc (rtx libfunc, machine_mode mode, rtx op0, -+ rtx op1, rtx *quot_p, rtx *rem_p); -+ -+#endif /* GCC_I386_EXPAND_H */ -diff --git a/gcc/config/i386/i386-features.c b/gcc/config/i386/i386-features.c -new file mode 100644 -index 000000000..60a120f4d ---- /dev/null -+++ b/gcc/config/i386/i386-features.c -@@ -0,0 +1,2742 @@ -+/* Copyright (C) 1988-2019 Free Software Foundation, Inc. -+ -+This file is part of GCC. -+ -+GCC is free software; you can redistribute it and/or modify -+it under the terms of the GNU General Public License as published by -+the Free Software Foundation; either version 3, or (at your option) -+any later version. -+ -+GCC is distributed in the hope that it will be useful, -+but WITHOUT ANY WARRANTY; without even the implied warranty of -+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+GNU General Public License for more details. -+ -+You should have received a copy of the GNU General Public License -+along with GCC; see the file COPYING3. If not see -+. */ -+ -+#define IN_TARGET_CODE 1 -+ -+#include "config.h" -+#include "system.h" -+#include "coretypes.h" -+#include "backend.h" -+#include "rtl.h" -+#include "tree.h" -+#include "memmodel.h" -+#include "gimple.h" -+#include "cfghooks.h" -+#include "cfgloop.h" -+#include "df.h" -+#include "tm_p.h" -+#include "stringpool.h" -+#include "expmed.h" -+#include "optabs.h" -+#include "regs.h" -+#include "emit-rtl.h" -+#include "recog.h" -+#include "cgraph.h" -+#include "diagnostic.h" -+#include "cfgbuild.h" -+#include "alias.h" -+#include "fold-const.h" -+#include "attribs.h" -+#include "calls.h" -+#include "stor-layout.h" -+#include "varasm.h" -+#include "output.h" -+#include "insn-attr.h" -+#include "flags.h" -+#include "except.h" -+#include "explow.h" -+#include "expr.h" -+#include "cfgrtl.h" -+#include "common/common-target.h" -+#include "langhooks.h" -+#include "reload.h" -+#include "gimplify.h" -+#include "dwarf2.h" -+#include "tm-constrs.h" -+#include "params.h" -+#include "cselib.h" -+#include "sched-int.h" -+#include "opts.h" -+#include "tree-pass.h" -+#include "context.h" -+#include "pass_manager.h" -+#include "target-globals.h" -+#include "gimple-iterator.h" -+#include "tree-vectorizer.h" -+#include "shrink-wrap.h" -+#include "builtins.h" -+#include "rtl-iter.h" -+#include "tree-iterator.h" -+#include "dbgcnt.h" -+#include "case-cfn-macros.h" -+#include "dojump.h" -+#include "fold-const-call.h" -+#include "tree-vrp.h" -+#include "tree-ssanames.h" -+#include "selftest.h" -+#include "selftest-rtl.h" -+#include "print-rtl.h" -+#include "intl.h" -+#include "ifcvt.h" -+#include "symbol-summary.h" -+#include "ipa-prop.h" -+#include "ipa-fnsummary.h" -+#include "wide-int-bitmask.h" -+#include "tree-vector-builder.h" -+#include "debug.h" -+#include "dwarf2out.h" -+#include "i386-builtins.h" -+#include "i386-features.h" -+ -+const char * const xlogue_layout::STUB_BASE_NAMES[XLOGUE_STUB_COUNT] = { -+ "savms64", -+ "resms64", -+ "resms64x", -+ "savms64f", -+ "resms64f", -+ "resms64fx" -+}; -+ -+const unsigned xlogue_layout::REG_ORDER[xlogue_layout::MAX_REGS] = { -+/* The below offset values are where each register is stored for the layout -+ relative to incoming stack pointer. The value of each m_regs[].offset will -+ be relative to the incoming base pointer (rax or rsi) used by the stub. -+ -+ s_instances: 0 1 2 3 -+ Offset: realigned or aligned + 8 -+ Register aligned aligned + 8 aligned w/HFP w/HFP */ -+ XMM15_REG, /* 0x10 0x18 0x10 0x18 */ -+ XMM14_REG, /* 0x20 0x28 0x20 0x28 */ -+ XMM13_REG, /* 0x30 0x38 0x30 0x38 */ -+ XMM12_REG, /* 0x40 0x48 0x40 0x48 */ -+ XMM11_REG, /* 0x50 0x58 0x50 0x58 */ -+ XMM10_REG, /* 0x60 0x68 0x60 0x68 */ -+ XMM9_REG, /* 0x70 0x78 0x70 0x78 */ -+ XMM8_REG, /* 0x80 0x88 0x80 0x88 */ -+ XMM7_REG, /* 0x90 0x98 0x90 0x98 */ -+ XMM6_REG, /* 0xa0 0xa8 0xa0 0xa8 */ -+ SI_REG, /* 0xa8 0xb0 0xa8 0xb0 */ -+ DI_REG, /* 0xb0 0xb8 0xb0 0xb8 */ -+ BX_REG, /* 0xb8 0xc0 0xb8 0xc0 */ -+ BP_REG, /* 0xc0 0xc8 N/A N/A */ -+ R12_REG, /* 0xc8 0xd0 0xc0 0xc8 */ -+ R13_REG, /* 0xd0 0xd8 0xc8 0xd0 */ -+ R14_REG, /* 0xd8 0xe0 0xd0 0xd8 */ -+ R15_REG, /* 0xe0 0xe8 0xd8 0xe0 */ -+}; -+ -+/* Instantiate static const values. */ -+const HOST_WIDE_INT xlogue_layout::STUB_INDEX_OFFSET; -+const unsigned xlogue_layout::MIN_REGS; -+const unsigned xlogue_layout::MAX_REGS; -+const unsigned xlogue_layout::MAX_EXTRA_REGS; -+const unsigned xlogue_layout::VARIANT_COUNT; -+const unsigned xlogue_layout::STUB_NAME_MAX_LEN; -+ -+/* Initialize xlogue_layout::s_stub_names to zero. */ -+char xlogue_layout::s_stub_names[2][XLOGUE_STUB_COUNT][VARIANT_COUNT] -+ [STUB_NAME_MAX_LEN]; -+ -+/* Instantiates all xlogue_layout instances. */ -+const xlogue_layout xlogue_layout::s_instances[XLOGUE_SET_COUNT] = { -+ xlogue_layout (0, false), -+ xlogue_layout (8, false), -+ xlogue_layout (0, true), -+ xlogue_layout (8, true) -+}; -+ -+/* Return an appropriate const instance of xlogue_layout based upon values -+ in cfun->machine and crtl. */ -+const struct xlogue_layout & -+xlogue_layout::get_instance () -+{ -+ enum xlogue_stub_sets stub_set; -+ bool aligned_plus_8 = cfun->machine->call_ms2sysv_pad_in; -+ -+ if (stack_realign_fp) -+ stub_set = XLOGUE_SET_HFP_ALIGNED_OR_REALIGN; -+ else if (frame_pointer_needed) -+ stub_set = aligned_plus_8 -+ ? XLOGUE_SET_HFP_ALIGNED_PLUS_8 -+ : XLOGUE_SET_HFP_ALIGNED_OR_REALIGN; -+ else -+ stub_set = aligned_plus_8 ? XLOGUE_SET_ALIGNED_PLUS_8 : XLOGUE_SET_ALIGNED; -+ -+ return s_instances[stub_set]; -+} -+ -+/* Determine how many clobbered registers can be saved by the stub. -+ Returns the count of registers the stub will save and restore. */ -+unsigned -+xlogue_layout::count_stub_managed_regs () -+{ -+ bool hfp = frame_pointer_needed || stack_realign_fp; -+ unsigned i, count; -+ unsigned regno; -+ -+ for (count = i = MIN_REGS; i < MAX_REGS; ++i) -+ { -+ regno = REG_ORDER[i]; -+ if (regno == BP_REG && hfp) -+ continue; -+ if (!ix86_save_reg (regno, false, false)) -+ break; -+ ++count; -+ } -+ return count; -+} -+ -+/* Determine if register REGNO is a stub managed register given the -+ total COUNT of stub managed registers. */ -+bool -+xlogue_layout::is_stub_managed_reg (unsigned regno, unsigned count) -+{ -+ bool hfp = frame_pointer_needed || stack_realign_fp; -+ unsigned i; -+ -+ for (i = 0; i < count; ++i) -+ { -+ gcc_assert (i < MAX_REGS); -+ if (REG_ORDER[i] == BP_REG && hfp) -+ ++count; -+ else if (REG_ORDER[i] == regno) -+ return true; -+ } -+ return false; -+} -+ -+/* Constructor for xlogue_layout. */ -+xlogue_layout::xlogue_layout (HOST_WIDE_INT stack_align_off_in, bool hfp) -+ : m_hfp (hfp) , m_nregs (hfp ? 17 : 18), -+ m_stack_align_off_in (stack_align_off_in) -+{ -+ HOST_WIDE_INT offset = stack_align_off_in; -+ unsigned i, j; -+ -+ for (i = j = 0; i < MAX_REGS; ++i) -+ { -+ unsigned regno = REG_ORDER[i]; -+ -+ if (regno == BP_REG && hfp) -+ continue; -+ if (SSE_REGNO_P (regno)) -+ { -+ offset += 16; -+ /* Verify that SSE regs are always aligned. */ -+ gcc_assert (!((stack_align_off_in + offset) & 15)); -+ } -+ else -+ offset += 8; -+ -+ m_regs[j].regno = regno; -+ m_regs[j++].offset = offset - STUB_INDEX_OFFSET; -+ } -+ gcc_assert (j == m_nregs); -+} -+ -+const char * -+xlogue_layout::get_stub_name (enum xlogue_stub stub, -+ unsigned n_extra_regs) -+{ -+ const int have_avx = TARGET_AVX; -+ char *name = s_stub_names[!!have_avx][stub][n_extra_regs]; -+ -+ /* Lazy init */ -+ if (!*name) -+ { -+ int res = snprintf (name, STUB_NAME_MAX_LEN, "__%s_%s_%u", -+ (have_avx ? "avx" : "sse"), -+ STUB_BASE_NAMES[stub], -+ MIN_REGS + n_extra_regs); -+ gcc_checking_assert (res < (int)STUB_NAME_MAX_LEN); -+ } -+ -+ return name; -+} -+ -+/* Return rtx of a symbol ref for the entry point (based upon -+ cfun->machine->call_ms2sysv_extra_regs) of the specified stub. */ -+rtx -+xlogue_layout::get_stub_rtx (enum xlogue_stub stub) -+{ -+ const unsigned n_extra_regs = cfun->machine->call_ms2sysv_extra_regs; -+ gcc_checking_assert (n_extra_regs <= MAX_EXTRA_REGS); -+ gcc_assert (stub < XLOGUE_STUB_COUNT); -+ gcc_assert (crtl->stack_realign_finalized); -+ -+ return gen_rtx_SYMBOL_REF (Pmode, get_stub_name (stub, n_extra_regs)); -+} -+ -+unsigned scalar_chain::max_id = 0; -+ -+/* Initialize new chain. */ -+ -+scalar_chain::scalar_chain () -+{ -+ chain_id = ++max_id; -+ -+ if (dump_file) -+ fprintf (dump_file, "Created a new instruction chain #%d\n", chain_id); -+ -+ bitmap_obstack_initialize (NULL); -+ insns = BITMAP_ALLOC (NULL); -+ defs = BITMAP_ALLOC (NULL); -+ defs_conv = BITMAP_ALLOC (NULL); -+ queue = NULL; -+} -+ -+/* Free chain's data. */ -+ -+scalar_chain::~scalar_chain () -+{ -+ BITMAP_FREE (insns); -+ BITMAP_FREE (defs); -+ BITMAP_FREE (defs_conv); -+ bitmap_obstack_release (NULL); -+} -+ -+/* Add instruction into chains' queue. */ -+ -+void -+scalar_chain::add_to_queue (unsigned insn_uid) -+{ -+ if (bitmap_bit_p (insns, insn_uid) -+ || bitmap_bit_p (queue, insn_uid)) -+ return; -+ -+ if (dump_file) -+ fprintf (dump_file, " Adding insn %d into chain's #%d queue\n", -+ insn_uid, chain_id); -+ bitmap_set_bit (queue, insn_uid); -+} -+ -+/* For DImode conversion, mark register defined by DEF as requiring -+ conversion. */ -+ -+void -+dimode_scalar_chain::mark_dual_mode_def (df_ref def) -+{ -+ gcc_assert (DF_REF_REG_DEF_P (def)); -+ -+ if (bitmap_bit_p (defs_conv, DF_REF_REGNO (def))) -+ return; -+ -+ if (dump_file) -+ fprintf (dump_file, -+ " Mark r%d def in insn %d as requiring both modes in chain #%d\n", -+ DF_REF_REGNO (def), DF_REF_INSN_UID (def), chain_id); -+ -+ bitmap_set_bit (defs_conv, DF_REF_REGNO (def)); -+} -+ -+/* For TImode conversion, it is unused. */ -+ -+void -+timode_scalar_chain::mark_dual_mode_def (df_ref) -+{ -+ gcc_unreachable (); -+} -+ -+/* Check REF's chain to add new insns into a queue -+ and find registers requiring conversion. */ -+ -+void -+scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref) -+{ -+ df_link *chain; -+ -+ gcc_assert (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)) -+ || bitmap_bit_p (candidates, DF_REF_INSN_UID (ref))); -+ add_to_queue (DF_REF_INSN_UID (ref)); -+ -+ for (chain = DF_REF_CHAIN (ref); chain; chain = chain->next) -+ { -+ unsigned uid = DF_REF_INSN_UID (chain->ref); -+ -+ if (!NONDEBUG_INSN_P (DF_REF_INSN (chain->ref))) -+ continue; -+ -+ if (!DF_REF_REG_MEM_P (chain->ref)) -+ { -+ if (bitmap_bit_p (insns, uid)) -+ continue; -+ -+ if (bitmap_bit_p (candidates, uid)) -+ { -+ add_to_queue (uid); -+ continue; -+ } -+ } -+ -+ if (DF_REF_REG_DEF_P (chain->ref)) -+ { -+ if (dump_file) -+ fprintf (dump_file, " r%d def in insn %d isn't convertible\n", -+ DF_REF_REGNO (chain->ref), uid); -+ mark_dual_mode_def (chain->ref); -+ } -+ else -+ { -+ if (dump_file) -+ fprintf (dump_file, " r%d use in insn %d isn't convertible\n", -+ DF_REF_REGNO (chain->ref), uid); -+ mark_dual_mode_def (ref); -+ } -+ } -+} -+ -+/* Add instruction into a chain. */ -+ -+void -+scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid) -+{ -+ if (bitmap_bit_p (insns, insn_uid)) -+ return; -+ -+ if (dump_file) -+ fprintf (dump_file, " Adding insn %d to chain #%d\n", insn_uid, chain_id); -+ -+ bitmap_set_bit (insns, insn_uid); -+ -+ rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn; -+ rtx def_set = single_set (insn); -+ if (def_set && REG_P (SET_DEST (def_set)) -+ && !HARD_REGISTER_P (SET_DEST (def_set))) -+ bitmap_set_bit (defs, REGNO (SET_DEST (def_set))); -+ -+ df_ref ref; -+ df_ref def; -+ for (ref = DF_INSN_UID_DEFS (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref)) -+ if (!HARD_REGISTER_P (DF_REF_REG (ref))) -+ for (def = DF_REG_DEF_CHAIN (DF_REF_REGNO (ref)); -+ def; -+ def = DF_REF_NEXT_REG (def)) -+ analyze_register_chain (candidates, def); -+ for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref)) -+ if (!DF_REF_REG_MEM_P (ref)) -+ analyze_register_chain (candidates, ref); -+} -+ -+/* Build new chain starting from insn INSN_UID recursively -+ adding all dependent uses and definitions. */ -+ -+void -+scalar_chain::build (bitmap candidates, unsigned insn_uid) -+{ -+ queue = BITMAP_ALLOC (NULL); -+ bitmap_set_bit (queue, insn_uid); -+ -+ if (dump_file) -+ fprintf (dump_file, "Building chain #%d...\n", chain_id); -+ -+ while (!bitmap_empty_p (queue)) -+ { -+ insn_uid = bitmap_first_set_bit (queue); -+ bitmap_clear_bit (queue, insn_uid); -+ bitmap_clear_bit (candidates, insn_uid); -+ add_insn (candidates, insn_uid); -+ } -+ -+ if (dump_file) -+ { -+ fprintf (dump_file, "Collected chain #%d...\n", chain_id); -+ fprintf (dump_file, " insns: "); -+ dump_bitmap (dump_file, insns); -+ if (!bitmap_empty_p (defs_conv)) -+ { -+ bitmap_iterator bi; -+ unsigned id; -+ const char *comma = ""; -+ fprintf (dump_file, " defs to convert: "); -+ EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi) -+ { -+ fprintf (dump_file, "%sr%d", comma, id); -+ comma = ", "; -+ } -+ fprintf (dump_file, "\n"); -+ } -+ } -+ -+ BITMAP_FREE (queue); -+} -+ -+/* Return a cost of building a vector costant -+ instead of using a scalar one. */ -+ -+int -+dimode_scalar_chain::vector_const_cost (rtx exp) -+{ -+ gcc_assert (CONST_INT_P (exp)); -+ -+ if (standard_sse_constant_p (exp, V2DImode)) -+ return COSTS_N_INSNS (1); -+ return ix86_cost->sse_load[1]; -+} -+ -+/* Compute a gain for chain conversion. */ -+ -+int -+dimode_scalar_chain::compute_convert_gain () -+{ -+ bitmap_iterator bi; -+ unsigned insn_uid; -+ int gain = 0; -+ int cost = 0; -+ -+ if (dump_file) -+ fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id); -+ -+ EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi) -+ { -+ rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn; -+ rtx def_set = single_set (insn); -+ rtx src = SET_SRC (def_set); -+ rtx dst = SET_DEST (def_set); -+ -+ if (REG_P (src) && REG_P (dst)) -+ gain += COSTS_N_INSNS (2) - ix86_cost->xmm_move; -+ else if (REG_P (src) && MEM_P (dst)) -+ gain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1]; -+ else if (MEM_P (src) && REG_P (dst)) -+ gain += 2 * ix86_cost->int_load[2] - ix86_cost->sse_load[1]; -+ else if (GET_CODE (src) == ASHIFT -+ || GET_CODE (src) == ASHIFTRT -+ || GET_CODE (src) == LSHIFTRT) -+ { -+ if (CONST_INT_P (XEXP (src, 0))) -+ gain -= vector_const_cost (XEXP (src, 0)); -+ gain += ix86_cost->shift_const; -+ if (INTVAL (XEXP (src, 1)) >= 32) -+ gain -= COSTS_N_INSNS (1); -+ } -+ else if (GET_CODE (src) == PLUS -+ || GET_CODE (src) == MINUS -+ || GET_CODE (src) == IOR -+ || GET_CODE (src) == XOR -+ || GET_CODE (src) == AND) -+ { -+ gain += ix86_cost->add; -+ /* Additional gain for andnot for targets without BMI. */ -+ if (GET_CODE (XEXP (src, 0)) == NOT -+ && !TARGET_BMI) -+ gain += 2 * ix86_cost->add; -+ -+ if (CONST_INT_P (XEXP (src, 0))) -+ gain -= vector_const_cost (XEXP (src, 0)); -+ if (CONST_INT_P (XEXP (src, 1))) -+ gain -= vector_const_cost (XEXP (src, 1)); -+ } -+ else if (GET_CODE (src) == NEG -+ || GET_CODE (src) == NOT) -+ gain += ix86_cost->add - COSTS_N_INSNS (1); -+ else if (GET_CODE (src) == COMPARE) -+ { -+ /* Assume comparison cost is the same. */ -+ } -+ else if (CONST_INT_P (src)) -+ { -+ if (REG_P (dst)) -+ gain += COSTS_N_INSNS (2); -+ else if (MEM_P (dst)) -+ gain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1]; -+ gain -= vector_const_cost (src); -+ } -+ else -+ gcc_unreachable (); -+ } -+ -+ if (dump_file) -+ fprintf (dump_file, " Instruction conversion gain: %d\n", gain); -+ -+ EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, insn_uid, bi) -+ cost += DF_REG_DEF_COUNT (insn_uid) * ix86_cost->mmxsse_to_integer; -+ -+ if (dump_file) -+ fprintf (dump_file, " Registers conversion cost: %d\n", cost); -+ -+ gain -= cost; -+ -+ if (dump_file) -+ fprintf (dump_file, " Total gain: %d\n", gain); -+ -+ return gain; -+} -+ -+/* Replace REG in X with a V2DI subreg of NEW_REG. */ -+ -+rtx -+dimode_scalar_chain::replace_with_subreg (rtx x, rtx reg, rtx new_reg) -+{ -+ if (x == reg) -+ return gen_rtx_SUBREG (V2DImode, new_reg, 0); -+ -+ const char *fmt = GET_RTX_FORMAT (GET_CODE (x)); -+ int i, j; -+ for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) -+ { -+ if (fmt[i] == 'e') -+ XEXP (x, i) = replace_with_subreg (XEXP (x, i), reg, new_reg); -+ else if (fmt[i] == 'E') -+ for (j = XVECLEN (x, i) - 1; j >= 0; j--) -+ XVECEXP (x, i, j) = replace_with_subreg (XVECEXP (x, i, j), -+ reg, new_reg); -+ } -+ -+ return x; -+} -+ -+/* Replace REG in INSN with a V2DI subreg of NEW_REG. */ -+ -+void -+dimode_scalar_chain::replace_with_subreg_in_insn (rtx_insn *insn, -+ rtx reg, rtx new_reg) -+{ -+ replace_with_subreg (single_set (insn), reg, new_reg); -+} -+ -+/* Insert generated conversion instruction sequence INSNS -+ after instruction AFTER. New BB may be required in case -+ instruction has EH region attached. */ -+ -+void -+scalar_chain::emit_conversion_insns (rtx insns, rtx_insn *after) -+{ -+ if (!control_flow_insn_p (after)) -+ { -+ emit_insn_after (insns, after); -+ return; -+ } -+ -+ basic_block bb = BLOCK_FOR_INSN (after); -+ edge e = find_fallthru_edge (bb->succs); -+ gcc_assert (e); -+ -+ basic_block new_bb = split_edge (e); -+ emit_insn_after (insns, BB_HEAD (new_bb)); -+} -+ -+/* Make vector copies for all register REGNO definitions -+ and replace its uses in a chain. */ -+ -+void -+dimode_scalar_chain::make_vector_copies (unsigned regno) -+{ -+ rtx reg = regno_reg_rtx[regno]; -+ rtx vreg = gen_reg_rtx (DImode); -+ df_ref ref; -+ -+ for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref)) -+ if (!bitmap_bit_p (insns, DF_REF_INSN_UID (ref))) -+ { -+ start_sequence (); -+ if (!TARGET_INTER_UNIT_MOVES_TO_VEC) -+ { -+ rtx tmp = assign_386_stack_local (DImode, SLOT_STV_TEMP); -+ emit_move_insn (adjust_address (tmp, SImode, 0), -+ gen_rtx_SUBREG (SImode, reg, 0)); -+ emit_move_insn (adjust_address (tmp, SImode, 4), -+ gen_rtx_SUBREG (SImode, reg, 4)); -+ emit_move_insn (vreg, tmp); -+ } -+ else if (TARGET_SSE4_1) -+ { -+ emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0), -+ CONST0_RTX (V4SImode), -+ gen_rtx_SUBREG (SImode, reg, 0))); -+ emit_insn (gen_sse4_1_pinsrd (gen_rtx_SUBREG (V4SImode, vreg, 0), -+ gen_rtx_SUBREG (V4SImode, vreg, 0), -+ gen_rtx_SUBREG (SImode, reg, 4), -+ GEN_INT (2))); -+ } -+ else -+ { -+ rtx tmp = gen_reg_rtx (DImode); -+ emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0), -+ CONST0_RTX (V4SImode), -+ gen_rtx_SUBREG (SImode, reg, 0))); -+ emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, tmp, 0), -+ CONST0_RTX (V4SImode), -+ gen_rtx_SUBREG (SImode, reg, 4))); -+ emit_insn (gen_vec_interleave_lowv4si -+ (gen_rtx_SUBREG (V4SImode, vreg, 0), -+ gen_rtx_SUBREG (V4SImode, vreg, 0), -+ gen_rtx_SUBREG (V4SImode, tmp, 0))); -+ } -+ rtx_insn *seq = get_insns (); -+ end_sequence (); -+ rtx_insn *insn = DF_REF_INSN (ref); -+ emit_conversion_insns (seq, insn); -+ -+ if (dump_file) -+ fprintf (dump_file, -+ " Copied r%d to a vector register r%d for insn %d\n", -+ regno, REGNO (vreg), INSN_UID (insn)); -+ } -+ -+ for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref)) -+ if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref))) -+ { -+ rtx_insn *insn = DF_REF_INSN (ref); -+ replace_with_subreg_in_insn (insn, reg, vreg); -+ -+ if (dump_file) -+ fprintf (dump_file, " Replaced r%d with r%d in insn %d\n", -+ regno, REGNO (vreg), INSN_UID (insn)); -+ } -+} -+ -+/* Convert all definitions of register REGNO -+ and fix its uses. Scalar copies may be created -+ in case register is used in not convertible insn. */ -+ -+void -+dimode_scalar_chain::convert_reg (unsigned regno) -+{ -+ bool scalar_copy = bitmap_bit_p (defs_conv, regno); -+ rtx reg = regno_reg_rtx[regno]; -+ rtx scopy = NULL_RTX; -+ df_ref ref; -+ bitmap conv; -+ -+ conv = BITMAP_ALLOC (NULL); -+ bitmap_copy (conv, insns); -+ -+ if (scalar_copy) -+ scopy = gen_reg_rtx (DImode); -+ -+ for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref)) -+ { -+ rtx_insn *insn = DF_REF_INSN (ref); -+ rtx def_set = single_set (insn); -+ rtx src = SET_SRC (def_set); -+ rtx reg = DF_REF_REG (ref); -+ -+ if (!MEM_P (src)) -+ { -+ replace_with_subreg_in_insn (insn, reg, reg); -+ bitmap_clear_bit (conv, INSN_UID (insn)); -+ } -+ -+ if (scalar_copy) -+ { -+ start_sequence (); -+ if (!TARGET_INTER_UNIT_MOVES_FROM_VEC) -+ { -+ rtx tmp = assign_386_stack_local (DImode, SLOT_STV_TEMP); -+ emit_move_insn (tmp, reg); -+ emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0), -+ adjust_address (tmp, SImode, 0)); -+ emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4), -+ adjust_address (tmp, SImode, 4)); -+ } -+ else if (TARGET_SSE4_1) -+ { -+ rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx)); -+ emit_insn -+ (gen_rtx_SET -+ (gen_rtx_SUBREG (SImode, scopy, 0), -+ gen_rtx_VEC_SELECT (SImode, -+ gen_rtx_SUBREG (V4SImode, reg, 0), tmp))); -+ -+ tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const1_rtx)); -+ emit_insn -+ (gen_rtx_SET -+ (gen_rtx_SUBREG (SImode, scopy, 4), -+ gen_rtx_VEC_SELECT (SImode, -+ gen_rtx_SUBREG (V4SImode, reg, 0), tmp))); -+ } -+ else -+ { -+ rtx vcopy = gen_reg_rtx (V2DImode); -+ emit_move_insn (vcopy, gen_rtx_SUBREG (V2DImode, reg, 0)); -+ emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0), -+ gen_rtx_SUBREG (SImode, vcopy, 0)); -+ emit_move_insn (vcopy, -+ gen_rtx_LSHIFTRT (V2DImode, vcopy, GEN_INT (32))); -+ emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4), -+ gen_rtx_SUBREG (SImode, vcopy, 0)); -+ } -+ rtx_insn *seq = get_insns (); -+ end_sequence (); -+ emit_conversion_insns (seq, insn); -+ -+ if (dump_file) -+ fprintf (dump_file, -+ " Copied r%d to a scalar register r%d for insn %d\n", -+ regno, REGNO (scopy), INSN_UID (insn)); -+ } -+ } -+ -+ for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref)) -+ if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref))) -+ { -+ if (bitmap_bit_p (conv, DF_REF_INSN_UID (ref))) -+ { -+ rtx_insn *insn = DF_REF_INSN (ref); -+ -+ rtx def_set = single_set (insn); -+ gcc_assert (def_set); -+ -+ rtx src = SET_SRC (def_set); -+ rtx dst = SET_DEST (def_set); -+ -+ if (!MEM_P (dst) || !REG_P (src)) -+ replace_with_subreg_in_insn (insn, reg, reg); -+ -+ bitmap_clear_bit (conv, INSN_UID (insn)); -+ } -+ } -+ /* Skip debug insns and uninitialized uses. */ -+ else if (DF_REF_CHAIN (ref) -+ && NONDEBUG_INSN_P (DF_REF_INSN (ref))) -+ { -+ gcc_assert (scopy); -+ replace_rtx (DF_REF_INSN (ref), reg, scopy); -+ df_insn_rescan (DF_REF_INSN (ref)); -+ } -+ -+ BITMAP_FREE (conv); -+} -+ -+/* Convert operand OP in INSN. We should handle -+ memory operands and uninitialized registers. -+ All other register uses are converted during -+ registers conversion. */ -+ -+void -+dimode_scalar_chain::convert_op (rtx *op, rtx_insn *insn) -+{ -+ *op = copy_rtx_if_shared (*op); -+ -+ if (GET_CODE (*op) == NOT) -+ { -+ convert_op (&XEXP (*op, 0), insn); -+ PUT_MODE (*op, V2DImode); -+ } -+ else if (MEM_P (*op)) -+ { -+ rtx tmp = gen_reg_rtx (DImode); -+ -+ emit_insn_before (gen_move_insn (tmp, *op), insn); -+ *op = gen_rtx_SUBREG (V2DImode, tmp, 0); -+ -+ if (dump_file) -+ fprintf (dump_file, " Preloading operand for insn %d into r%d\n", -+ INSN_UID (insn), REGNO (tmp)); -+ } -+ else if (REG_P (*op)) -+ { -+ /* We may have not converted register usage in case -+ this register has no definition. Otherwise it -+ should be converted in convert_reg. */ -+ df_ref ref; -+ FOR_EACH_INSN_USE (ref, insn) -+ if (DF_REF_REGNO (ref) == REGNO (*op)) -+ { -+ gcc_assert (!DF_REF_CHAIN (ref)); -+ break; -+ } -+ *op = gen_rtx_SUBREG (V2DImode, *op, 0); -+ } -+ else if (CONST_INT_P (*op)) -+ { -+ rtx vec_cst; -+ rtx tmp = gen_rtx_SUBREG (V2DImode, gen_reg_rtx (DImode), 0); -+ -+ /* Prefer all ones vector in case of -1. */ -+ if (constm1_operand (*op, GET_MODE (*op))) -+ vec_cst = CONSTM1_RTX (V2DImode); -+ else -+ vec_cst = gen_rtx_CONST_VECTOR (V2DImode, -+ gen_rtvec (2, *op, const0_rtx)); -+ -+ if (!standard_sse_constant_p (vec_cst, V2DImode)) -+ { -+ start_sequence (); -+ vec_cst = validize_mem (force_const_mem (V2DImode, vec_cst)); -+ rtx_insn *seq = get_insns (); -+ end_sequence (); -+ emit_insn_before (seq, insn); -+ } -+ -+ emit_insn_before (gen_move_insn (copy_rtx (tmp), vec_cst), insn); -+ *op = tmp; -+ } -+ else -+ { -+ gcc_assert (SUBREG_P (*op)); -+ gcc_assert (GET_MODE (*op) == V2DImode); -+ } -+} -+ -+/* Convert INSN to vector mode. */ -+ -+void -+dimode_scalar_chain::convert_insn (rtx_insn *insn) -+{ -+ rtx def_set = single_set (insn); -+ rtx src = SET_SRC (def_set); -+ rtx dst = SET_DEST (def_set); -+ rtx subreg; -+ -+ if (MEM_P (dst) && !REG_P (src)) -+ { -+ /* There are no scalar integer instructions and therefore -+ temporary register usage is required. */ -+ rtx tmp = gen_reg_rtx (DImode); -+ emit_conversion_insns (gen_move_insn (dst, tmp), insn); -+ dst = gen_rtx_SUBREG (V2DImode, tmp, 0); -+ } -+ -+ switch (GET_CODE (src)) -+ { -+ case ASHIFT: -+ case ASHIFTRT: -+ case LSHIFTRT: -+ convert_op (&XEXP (src, 0), insn); -+ PUT_MODE (src, V2DImode); -+ break; -+ -+ case PLUS: -+ case MINUS: -+ case IOR: -+ case XOR: -+ case AND: -+ convert_op (&XEXP (src, 0), insn); -+ convert_op (&XEXP (src, 1), insn); -+ PUT_MODE (src, V2DImode); -+ break; -+ -+ case NEG: -+ src = XEXP (src, 0); -+ convert_op (&src, insn); -+ subreg = gen_reg_rtx (V2DImode); -+ emit_insn_before (gen_move_insn (subreg, CONST0_RTX (V2DImode)), insn); -+ src = gen_rtx_MINUS (V2DImode, subreg, src); -+ break; -+ -+ case NOT: -+ src = XEXP (src, 0); -+ convert_op (&src, insn); -+ subreg = gen_reg_rtx (V2DImode); -+ emit_insn_before (gen_move_insn (subreg, CONSTM1_RTX (V2DImode)), insn); -+ src = gen_rtx_XOR (V2DImode, src, subreg); -+ break; -+ -+ case MEM: -+ if (!REG_P (dst)) -+ convert_op (&src, insn); -+ break; -+ -+ case REG: -+ if (!MEM_P (dst)) -+ convert_op (&src, insn); -+ break; -+ -+ case SUBREG: -+ gcc_assert (GET_MODE (src) == V2DImode); -+ break; -+ -+ case COMPARE: -+ src = SUBREG_REG (XEXP (XEXP (src, 0), 0)); -+ -+ gcc_assert ((REG_P (src) && GET_MODE (src) == DImode) -+ || (SUBREG_P (src) && GET_MODE (src) == V2DImode)); -+ -+ if (REG_P (src)) -+ subreg = gen_rtx_SUBREG (V2DImode, src, 0); -+ else -+ subreg = copy_rtx_if_shared (src); -+ emit_insn_before (gen_vec_interleave_lowv2di (copy_rtx_if_shared (subreg), -+ copy_rtx_if_shared (subreg), -+ copy_rtx_if_shared (subreg)), -+ insn); -+ dst = gen_rtx_REG (CCmode, FLAGS_REG); -+ src = gen_rtx_UNSPEC (CCmode, gen_rtvec (2, copy_rtx_if_shared (src), -+ copy_rtx_if_shared (src)), -+ UNSPEC_PTEST); -+ break; -+ -+ case CONST_INT: -+ convert_op (&src, insn); -+ break; -+ -+ default: -+ gcc_unreachable (); -+ } -+ -+ SET_SRC (def_set) = src; -+ SET_DEST (def_set) = dst; -+ -+ /* Drop possible dead definitions. */ -+ PATTERN (insn) = def_set; -+ -+ INSN_CODE (insn) = -1; -+ recog_memoized (insn); -+ df_insn_rescan (insn); -+} -+ -+/* Fix uses of converted REG in debug insns. */ -+ -+void -+timode_scalar_chain::fix_debug_reg_uses (rtx reg) -+{ -+ if (!flag_var_tracking) -+ return; -+ -+ df_ref ref, next; -+ for (ref = DF_REG_USE_CHAIN (REGNO (reg)); ref; ref = next) -+ { -+ rtx_insn *insn = DF_REF_INSN (ref); -+ /* Make sure the next ref is for a different instruction, -+ so that we're not affected by the rescan. */ -+ next = DF_REF_NEXT_REG (ref); -+ while (next && DF_REF_INSN (next) == insn) -+ next = DF_REF_NEXT_REG (next); -+ -+ if (DEBUG_INSN_P (insn)) -+ { -+ /* It may be a debug insn with a TImode variable in -+ register. */ -+ bool changed = false; -+ for (; ref != next; ref = DF_REF_NEXT_REG (ref)) -+ { -+ rtx *loc = DF_REF_LOC (ref); -+ if (REG_P (*loc) && GET_MODE (*loc) == V1TImode) -+ { -+ *loc = gen_rtx_SUBREG (TImode, *loc, 0); -+ changed = true; -+ } -+ } -+ if (changed) -+ df_insn_rescan (insn); -+ } -+ } -+} -+ -+/* Convert INSN from TImode to V1T1mode. */ -+ -+void -+timode_scalar_chain::convert_insn (rtx_insn *insn) -+{ -+ rtx def_set = single_set (insn); -+ rtx src = SET_SRC (def_set); -+ rtx dst = SET_DEST (def_set); -+ -+ switch (GET_CODE (dst)) -+ { -+ case REG: -+ { -+ rtx tmp = find_reg_equal_equiv_note (insn); -+ if (tmp) -+ PUT_MODE (XEXP (tmp, 0), V1TImode); -+ PUT_MODE (dst, V1TImode); -+ fix_debug_reg_uses (dst); -+ } -+ break; -+ case MEM: -+ PUT_MODE (dst, V1TImode); -+ break; -+ -+ default: -+ gcc_unreachable (); -+ } -+ -+ switch (GET_CODE (src)) -+ { -+ case REG: -+ PUT_MODE (src, V1TImode); -+ /* Call fix_debug_reg_uses only if SRC is never defined. */ -+ if (!DF_REG_DEF_CHAIN (REGNO (src))) -+ fix_debug_reg_uses (src); -+ break; -+ -+ case MEM: -+ PUT_MODE (src, V1TImode); -+ break; -+ -+ case CONST_WIDE_INT: -+ if (NONDEBUG_INSN_P (insn)) -+ { -+ /* Since there are no instructions to store 128-bit constant, -+ temporary register usage is required. */ -+ rtx tmp = gen_reg_rtx (V1TImode); -+ start_sequence (); -+ src = gen_rtx_CONST_VECTOR (V1TImode, gen_rtvec (1, src)); -+ src = validize_mem (force_const_mem (V1TImode, src)); -+ rtx_insn *seq = get_insns (); -+ end_sequence (); -+ if (seq) -+ emit_insn_before (seq, insn); -+ emit_conversion_insns (gen_rtx_SET (dst, tmp), insn); -+ dst = tmp; -+ } -+ break; -+ -+ case CONST_INT: -+ switch (standard_sse_constant_p (src, TImode)) -+ { -+ case 1: -+ src = CONST0_RTX (GET_MODE (dst)); -+ break; -+ case 2: -+ src = CONSTM1_RTX (GET_MODE (dst)); -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ if (NONDEBUG_INSN_P (insn)) -+ { -+ rtx tmp = gen_reg_rtx (V1TImode); -+ /* Since there are no instructions to store standard SSE -+ constant, temporary register usage is required. */ -+ emit_conversion_insns (gen_rtx_SET (dst, tmp), insn); -+ dst = tmp; -+ } -+ break; -+ -+ default: -+ gcc_unreachable (); -+ } -+ -+ SET_SRC (def_set) = src; -+ SET_DEST (def_set) = dst; -+ -+ /* Drop possible dead definitions. */ -+ PATTERN (insn) = def_set; -+ -+ INSN_CODE (insn) = -1; -+ recog_memoized (insn); -+ df_insn_rescan (insn); -+} -+ -+void -+dimode_scalar_chain::convert_registers () -+{ -+ bitmap_iterator bi; -+ unsigned id; -+ -+ EXECUTE_IF_SET_IN_BITMAP (defs, 0, id, bi) -+ convert_reg (id); -+ -+ EXECUTE_IF_AND_COMPL_IN_BITMAP (defs_conv, defs, 0, id, bi) -+ make_vector_copies (id); -+} -+ -+/* Convert whole chain creating required register -+ conversions and copies. */ -+ -+int -+scalar_chain::convert () -+{ -+ bitmap_iterator bi; -+ unsigned id; -+ int converted_insns = 0; -+ -+ if (!dbg_cnt (stv_conversion)) -+ return 0; -+ -+ if (dump_file) -+ fprintf (dump_file, "Converting chain #%d...\n", chain_id); -+ -+ convert_registers (); -+ -+ EXECUTE_IF_SET_IN_BITMAP (insns, 0, id, bi) -+ { -+ convert_insn (DF_INSN_UID_GET (id)->insn); -+ converted_insns++; -+ } -+ -+ return converted_insns; -+} -+ -+/* Return 1 if INSN uses or defines a hard register. -+ Hard register uses in a memory address are ignored. -+ Clobbers and flags definitions are ignored. */ -+ -+static bool -+has_non_address_hard_reg (rtx_insn *insn) -+{ -+ df_ref ref; -+ FOR_EACH_INSN_DEF (ref, insn) -+ if (HARD_REGISTER_P (DF_REF_REAL_REG (ref)) -+ && !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER) -+ && DF_REF_REGNO (ref) != FLAGS_REG) -+ return true; -+ -+ FOR_EACH_INSN_USE (ref, insn) -+ if (!DF_REF_REG_MEM_P (ref) && HARD_REGISTER_P (DF_REF_REAL_REG (ref))) -+ return true; -+ -+ return false; -+} -+ -+/* Check if comparison INSN may be transformed -+ into vector comparison. Currently we transform -+ zero checks only which look like: -+ -+ (set (reg:CCZ 17 flags) -+ (compare:CCZ (ior:SI (subreg:SI (reg:DI x) 4) -+ (subreg:SI (reg:DI x) 0)) -+ (const_int 0 [0]))) */ -+ -+static bool -+convertible_comparison_p (rtx_insn *insn) -+{ -+ if (!TARGET_SSE4_1) -+ return false; -+ -+ rtx def_set = single_set (insn); -+ -+ gcc_assert (def_set); -+ -+ rtx src = SET_SRC (def_set); -+ rtx dst = SET_DEST (def_set); -+ -+ gcc_assert (GET_CODE (src) == COMPARE); -+ -+ if (GET_CODE (dst) != REG -+ || REGNO (dst) != FLAGS_REG -+ || GET_MODE (dst) != CCZmode) -+ return false; -+ -+ rtx op1 = XEXP (src, 0); -+ rtx op2 = XEXP (src, 1); -+ -+ if (op2 != CONST0_RTX (GET_MODE (op2))) -+ return false; -+ -+ if (GET_CODE (op1) != IOR) -+ return false; -+ -+ op2 = XEXP (op1, 1); -+ op1 = XEXP (op1, 0); -+ -+ if (!SUBREG_P (op1) -+ || !SUBREG_P (op2) -+ || GET_MODE (op1) != SImode -+ || GET_MODE (op2) != SImode -+ || ((SUBREG_BYTE (op1) != 0 -+ || SUBREG_BYTE (op2) != GET_MODE_SIZE (SImode)) -+ && (SUBREG_BYTE (op2) != 0 -+ || SUBREG_BYTE (op1) != GET_MODE_SIZE (SImode)))) -+ return false; -+ -+ op1 = SUBREG_REG (op1); -+ op2 = SUBREG_REG (op2); -+ -+ if (op1 != op2 -+ || !REG_P (op1) -+ || GET_MODE (op1) != DImode) -+ return false; -+ -+ return true; -+} -+ -+/* The DImode version of scalar_to_vector_candidate_p. */ -+ -+static bool -+dimode_scalar_to_vector_candidate_p (rtx_insn *insn) -+{ -+ rtx def_set = single_set (insn); -+ -+ if (!def_set) -+ return false; -+ -+ if (has_non_address_hard_reg (insn)) -+ return false; -+ -+ rtx src = SET_SRC (def_set); -+ rtx dst = SET_DEST (def_set); -+ -+ if (GET_CODE (src) == COMPARE) -+ return convertible_comparison_p (insn); -+ -+ /* We are interested in DImode promotion only. */ -+ if ((GET_MODE (src) != DImode -+ && !CONST_INT_P (src)) -+ || GET_MODE (dst) != DImode) -+ return false; -+ -+ if (!REG_P (dst) && !MEM_P (dst)) -+ return false; -+ -+ switch (GET_CODE (src)) -+ { -+ case ASHIFTRT: -+ if (!TARGET_AVX512VL) -+ return false; -+ /* FALLTHRU */ -+ -+ case ASHIFT: -+ case LSHIFTRT: -+ if (!CONST_INT_P (XEXP (src, 1)) -+ || !IN_RANGE (INTVAL (XEXP (src, 1)), 0, 63)) -+ return false; -+ break; -+ -+ case PLUS: -+ case MINUS: -+ case IOR: -+ case XOR: -+ case AND: -+ if (!REG_P (XEXP (src, 1)) -+ && !MEM_P (XEXP (src, 1)) -+ && !CONST_INT_P (XEXP (src, 1))) -+ return false; -+ -+ if (GET_MODE (XEXP (src, 1)) != DImode -+ && !CONST_INT_P (XEXP (src, 1))) -+ return false; -+ break; -+ -+ case NEG: -+ case NOT: -+ break; -+ -+ case REG: -+ return true; -+ -+ case MEM: -+ case CONST_INT: -+ return REG_P (dst); -+ -+ default: -+ return false; -+ } -+ -+ if (!REG_P (XEXP (src, 0)) -+ && !MEM_P (XEXP (src, 0)) -+ && !CONST_INT_P (XEXP (src, 0)) -+ /* Check for andnot case. */ -+ && (GET_CODE (src) != AND -+ || GET_CODE (XEXP (src, 0)) != NOT -+ || !REG_P (XEXP (XEXP (src, 0), 0)))) -+ return false; -+ -+ if (GET_MODE (XEXP (src, 0)) != DImode -+ && !CONST_INT_P (XEXP (src, 0))) -+ return false; -+ -+ return true; -+} -+ -+/* The TImode version of scalar_to_vector_candidate_p. */ -+ -+static bool -+timode_scalar_to_vector_candidate_p (rtx_insn *insn) -+{ -+ rtx def_set = single_set (insn); -+ -+ if (!def_set) -+ return false; -+ -+ if (has_non_address_hard_reg (insn)) -+ return false; -+ -+ rtx src = SET_SRC (def_set); -+ rtx dst = SET_DEST (def_set); -+ -+ /* Only TImode load and store are allowed. */ -+ if (GET_MODE (dst) != TImode) -+ return false; -+ -+ if (MEM_P (dst)) -+ { -+ /* Check for store. Memory must be aligned or unaligned store -+ is optimal. Only support store from register, standard SSE -+ constant or CONST_WIDE_INT generated from piecewise store. -+ -+ ??? Verify performance impact before enabling CONST_INT for -+ __int128 store. */ -+ if (misaligned_operand (dst, TImode) -+ && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL) -+ return false; -+ -+ switch (GET_CODE (src)) -+ { -+ default: -+ return false; -+ -+ case REG: -+ case CONST_WIDE_INT: -+ return true; -+ -+ case CONST_INT: -+ return standard_sse_constant_p (src, TImode); -+ } -+ } -+ else if (MEM_P (src)) -+ { -+ /* Check for load. Memory must be aligned or unaligned load is -+ optimal. */ -+ return (REG_P (dst) -+ && (!misaligned_operand (src, TImode) -+ || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)); -+ } -+ -+ return false; -+} -+ -+/* Return 1 if INSN may be converted into vector -+ instruction. */ -+ -+static bool -+scalar_to_vector_candidate_p (rtx_insn *insn) -+{ -+ if (TARGET_64BIT) -+ return timode_scalar_to_vector_candidate_p (insn); -+ else -+ return dimode_scalar_to_vector_candidate_p (insn); -+} -+ -+/* The DImode version of remove_non_convertible_regs. */ -+ -+static void -+dimode_remove_non_convertible_regs (bitmap candidates) -+{ -+ bitmap_iterator bi; -+ unsigned id; -+ bitmap regs = BITMAP_ALLOC (NULL); -+ -+ EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi) -+ { -+ rtx def_set = single_set (DF_INSN_UID_GET (id)->insn); -+ rtx reg = SET_DEST (def_set); -+ -+ if (!REG_P (reg) -+ || bitmap_bit_p (regs, REGNO (reg)) -+ || HARD_REGISTER_P (reg)) -+ continue; -+ -+ for (df_ref def = DF_REG_DEF_CHAIN (REGNO (reg)); -+ def; -+ def = DF_REF_NEXT_REG (def)) -+ { -+ if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def))) -+ { -+ if (dump_file) -+ fprintf (dump_file, -+ "r%d has non convertible definition in insn %d\n", -+ REGNO (reg), DF_REF_INSN_UID (def)); -+ -+ bitmap_set_bit (regs, REGNO (reg)); -+ break; -+ } -+ } -+ } -+ -+ EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi) -+ { -+ for (df_ref def = DF_REG_DEF_CHAIN (id); -+ def; -+ def = DF_REF_NEXT_REG (def)) -+ if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def))) -+ { -+ if (dump_file) -+ fprintf (dump_file, "Removing insn %d from candidates list\n", -+ DF_REF_INSN_UID (def)); -+ -+ bitmap_clear_bit (candidates, DF_REF_INSN_UID (def)); -+ } -+ } -+ -+ BITMAP_FREE (regs); -+} -+ -+/* For a register REGNO, scan instructions for its defs and uses. -+ Put REGNO in REGS if a def or use isn't in CANDIDATES. */ -+ -+static void -+timode_check_non_convertible_regs (bitmap candidates, bitmap regs, -+ unsigned int regno) -+{ -+ for (df_ref def = DF_REG_DEF_CHAIN (regno); -+ def; -+ def = DF_REF_NEXT_REG (def)) -+ { -+ if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def))) -+ { -+ if (dump_file) -+ fprintf (dump_file, -+ "r%d has non convertible def in insn %d\n", -+ regno, DF_REF_INSN_UID (def)); -+ -+ bitmap_set_bit (regs, regno); -+ break; -+ } -+ } -+ -+ for (df_ref ref = DF_REG_USE_CHAIN (regno); -+ ref; -+ ref = DF_REF_NEXT_REG (ref)) -+ { -+ /* Debug instructions are skipped. */ -+ if (NONDEBUG_INSN_P (DF_REF_INSN (ref)) -+ && !bitmap_bit_p (candidates, DF_REF_INSN_UID (ref))) -+ { -+ if (dump_file) -+ fprintf (dump_file, -+ "r%d has non convertible use in insn %d\n", -+ regno, DF_REF_INSN_UID (ref)); -+ -+ bitmap_set_bit (regs, regno); -+ break; -+ } -+ } -+} -+ -+/* The TImode version of remove_non_convertible_regs. */ -+ -+static void -+timode_remove_non_convertible_regs (bitmap candidates) -+{ -+ bitmap_iterator bi; -+ unsigned id; -+ bitmap regs = BITMAP_ALLOC (NULL); -+ -+ EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi) -+ { -+ rtx def_set = single_set (DF_INSN_UID_GET (id)->insn); -+ rtx dest = SET_DEST (def_set); -+ rtx src = SET_SRC (def_set); -+ -+ if ((!REG_P (dest) -+ || bitmap_bit_p (regs, REGNO (dest)) -+ || HARD_REGISTER_P (dest)) -+ && (!REG_P (src) -+ || bitmap_bit_p (regs, REGNO (src)) -+ || HARD_REGISTER_P (src))) -+ continue; -+ -+ if (REG_P (dest)) -+ timode_check_non_convertible_regs (candidates, regs, -+ REGNO (dest)); -+ -+ if (REG_P (src)) -+ timode_check_non_convertible_regs (candidates, regs, -+ REGNO (src)); -+ } -+ -+ EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi) -+ { -+ for (df_ref def = DF_REG_DEF_CHAIN (id); -+ def; -+ def = DF_REF_NEXT_REG (def)) -+ if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def))) -+ { -+ if (dump_file) -+ fprintf (dump_file, "Removing insn %d from candidates list\n", -+ DF_REF_INSN_UID (def)); -+ -+ bitmap_clear_bit (candidates, DF_REF_INSN_UID (def)); -+ } -+ -+ for (df_ref ref = DF_REG_USE_CHAIN (id); -+ ref; -+ ref = DF_REF_NEXT_REG (ref)) -+ if (bitmap_bit_p (candidates, DF_REF_INSN_UID (ref))) -+ { -+ if (dump_file) -+ fprintf (dump_file, "Removing insn %d from candidates list\n", -+ DF_REF_INSN_UID (ref)); -+ -+ bitmap_clear_bit (candidates, DF_REF_INSN_UID (ref)); -+ } -+ } -+ -+ BITMAP_FREE (regs); -+} -+ -+/* For a given bitmap of insn UIDs scans all instruction and -+ remove insn from CANDIDATES in case it has both convertible -+ and not convertible definitions. -+ -+ All insns in a bitmap are conversion candidates according to -+ scalar_to_vector_candidate_p. Currently it implies all insns -+ are single_set. */ -+ -+static void -+remove_non_convertible_regs (bitmap candidates) -+{ -+ if (TARGET_64BIT) -+ timode_remove_non_convertible_regs (candidates); -+ else -+ dimode_remove_non_convertible_regs (candidates); -+} -+ -+/* Main STV pass function. Find and convert scalar -+ instructions into vector mode when profitable. */ -+ -+static unsigned int -+convert_scalars_to_vector () -+{ -+ basic_block bb; -+ bitmap candidates; -+ int converted_insns = 0; -+ -+ bitmap_obstack_initialize (NULL); -+ candidates = BITMAP_ALLOC (NULL); -+ -+ calculate_dominance_info (CDI_DOMINATORS); -+ df_set_flags (DF_DEFER_INSN_RESCAN); -+ df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN); -+ df_md_add_problem (); -+ df_analyze (); -+ -+ /* Find all instructions we want to convert into vector mode. */ -+ if (dump_file) -+ fprintf (dump_file, "Searching for mode conversion candidates...\n"); -+ -+ FOR_EACH_BB_FN (bb, cfun) -+ { -+ rtx_insn *insn; -+ FOR_BB_INSNS (bb, insn) -+ if (scalar_to_vector_candidate_p (insn)) -+ { -+ if (dump_file) -+ fprintf (dump_file, " insn %d is marked as a candidate\n", -+ INSN_UID (insn)); -+ -+ bitmap_set_bit (candidates, INSN_UID (insn)); -+ } -+ } -+ -+ remove_non_convertible_regs (candidates); -+ -+ if (bitmap_empty_p (candidates)) -+ if (dump_file) -+ fprintf (dump_file, "There are no candidates for optimization.\n"); -+ -+ while (!bitmap_empty_p (candidates)) -+ { -+ unsigned uid = bitmap_first_set_bit (candidates); -+ scalar_chain *chain; -+ -+ if (TARGET_64BIT) -+ chain = new timode_scalar_chain; -+ else -+ chain = new dimode_scalar_chain; -+ -+ /* Find instructions chain we want to convert to vector mode. -+ Check all uses and definitions to estimate all required -+ conversions. */ -+ chain->build (candidates, uid); -+ -+ if (chain->compute_convert_gain () > 0) -+ converted_insns += chain->convert (); -+ else -+ if (dump_file) -+ fprintf (dump_file, "Chain #%d conversion is not profitable\n", -+ chain->chain_id); -+ -+ delete chain; -+ } -+ -+ if (dump_file) -+ fprintf (dump_file, "Total insns converted: %d\n", converted_insns); -+ -+ BITMAP_FREE (candidates); -+ bitmap_obstack_release (NULL); -+ df_process_deferred_rescans (); -+ -+ /* Conversion means we may have 128bit register spills/fills -+ which require aligned stack. */ -+ if (converted_insns) -+ { -+ if (crtl->stack_alignment_needed < 128) -+ crtl->stack_alignment_needed = 128; -+ if (crtl->stack_alignment_estimated < 128) -+ crtl->stack_alignment_estimated = 128; -+ /* Fix up DECL_RTL/DECL_INCOMING_RTL of arguments. */ -+ if (TARGET_64BIT) -+ for (tree parm = DECL_ARGUMENTS (current_function_decl); -+ parm; parm = DECL_CHAIN (parm)) -+ { -+ if (TYPE_MODE (TREE_TYPE (parm)) != TImode) -+ continue; -+ if (DECL_RTL_SET_P (parm) -+ && GET_MODE (DECL_RTL (parm)) == V1TImode) -+ { -+ rtx r = DECL_RTL (parm); -+ if (REG_P (r)) -+ SET_DECL_RTL (parm, gen_rtx_SUBREG (TImode, r, 0)); -+ } -+ if (DECL_INCOMING_RTL (parm) -+ && GET_MODE (DECL_INCOMING_RTL (parm)) == V1TImode) -+ { -+ rtx r = DECL_INCOMING_RTL (parm); -+ if (REG_P (r)) -+ DECL_INCOMING_RTL (parm) = gen_rtx_SUBREG (TImode, r, 0); -+ } -+ } -+ } -+ -+ return 0; -+} -+ -+static unsigned int -+rest_of_handle_insert_vzeroupper (void) -+{ -+ int i; -+ -+ /* vzeroupper instructions are inserted immediately after reload to -+ account for possible spills from 256bit or 512bit registers. The pass -+ reuses mode switching infrastructure by re-running mode insertion -+ pass, so disable entities that have already been processed. */ -+ for (i = 0; i < MAX_386_ENTITIES; i++) -+ ix86_optimize_mode_switching[i] = 0; -+ -+ ix86_optimize_mode_switching[AVX_U128] = 1; -+ -+ /* Call optimize_mode_switching. */ -+ g->get_passes ()->execute_pass_mode_switching (); -+ return 0; -+} -+ -+namespace { -+ -+const pass_data pass_data_insert_vzeroupper = -+{ -+ RTL_PASS, /* type */ -+ "vzeroupper", /* name */ -+ OPTGROUP_NONE, /* optinfo_flags */ -+ TV_MACH_DEP, /* tv_id */ -+ 0, /* properties_required */ -+ 0, /* properties_provided */ -+ 0, /* properties_destroyed */ -+ 0, /* todo_flags_start */ -+ TODO_df_finish, /* todo_flags_finish */ -+}; -+ -+class pass_insert_vzeroupper : public rtl_opt_pass -+{ -+public: -+ pass_insert_vzeroupper(gcc::context *ctxt) -+ : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt) -+ {} -+ -+ /* opt_pass methods: */ -+ virtual bool gate (function *) -+ { -+ return TARGET_AVX -+ && TARGET_VZEROUPPER && flag_expensive_optimizations -+ && !optimize_size; -+ } -+ -+ virtual unsigned int execute (function *) -+ { -+ return rest_of_handle_insert_vzeroupper (); -+ } -+ -+}; // class pass_insert_vzeroupper -+ -+const pass_data pass_data_stv = -+{ -+ RTL_PASS, /* type */ -+ "stv", /* name */ -+ OPTGROUP_NONE, /* optinfo_flags */ -+ TV_MACH_DEP, /* tv_id */ -+ 0, /* properties_required */ -+ 0, /* properties_provided */ -+ 0, /* properties_destroyed */ -+ 0, /* todo_flags_start */ -+ TODO_df_finish, /* todo_flags_finish */ -+}; -+ -+class pass_stv : public rtl_opt_pass -+{ -+public: -+ pass_stv (gcc::context *ctxt) -+ : rtl_opt_pass (pass_data_stv, ctxt), -+ timode_p (false) -+ {} -+ -+ /* opt_pass methods: */ -+ virtual bool gate (function *) -+ { -+ return (timode_p == !!TARGET_64BIT -+ && TARGET_STV && TARGET_SSE2 && optimize > 1); -+ } -+ -+ virtual unsigned int execute (function *) -+ { -+ return convert_scalars_to_vector (); -+ } -+ -+ opt_pass *clone () -+ { -+ return new pass_stv (m_ctxt); -+ } -+ -+ void set_pass_param (unsigned int n, bool param) -+ { -+ gcc_assert (n == 0); -+ timode_p = param; -+ } -+ -+private: -+ bool timode_p; -+}; // class pass_stv -+ -+} // anon namespace -+ -+rtl_opt_pass * -+make_pass_insert_vzeroupper (gcc::context *ctxt) -+{ -+ return new pass_insert_vzeroupper (ctxt); -+} -+ -+rtl_opt_pass * -+make_pass_stv (gcc::context *ctxt) -+{ -+ return new pass_stv (ctxt); -+} -+ -+/* Inserting ENDBRANCH instructions. */ -+ -+static unsigned int -+rest_of_insert_endbranch (void) -+{ -+ timevar_push (TV_MACH_DEP); -+ -+ rtx cet_eb; -+ rtx_insn *insn; -+ basic_block bb; -+ -+ /* Currently emit EB if it's a tracking function, i.e. 'nocf_check' is -+ absent among function attributes. Later an optimization will be -+ introduced to make analysis if an address of a static function is -+ taken. A static function whose address is not taken will get a -+ nocf_check attribute. This will allow to reduce the number of EB. */ -+ -+ if (!lookup_attribute ("nocf_check", -+ TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))) -+ && (!flag_manual_endbr -+ || lookup_attribute ("cf_check", -+ DECL_ATTRIBUTES (cfun->decl))) -+ && !cgraph_node::get (cfun->decl)->only_called_directly_p ()) -+ { -+ /* Queue ENDBR insertion to x86_function_profiler. */ -+ if (crtl->profile && flag_fentry) -+ cfun->machine->endbr_queued_at_entrance = true; -+ else -+ { -+ cet_eb = gen_nop_endbr (); -+ -+ bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb; -+ insn = BB_HEAD (bb); -+ emit_insn_before (cet_eb, insn); -+ } -+ } -+ -+ bb = 0; -+ FOR_EACH_BB_FN (bb, cfun) -+ { -+ for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); -+ insn = NEXT_INSN (insn)) -+ { -+ if (CALL_P (insn)) -+ { -+ bool need_endbr; -+ need_endbr = find_reg_note (insn, REG_SETJMP, NULL) != NULL; -+ if (!need_endbr && !SIBLING_CALL_P (insn)) -+ { -+ rtx call = get_call_rtx_from (insn); -+ rtx fnaddr = XEXP (call, 0); -+ tree fndecl = NULL_TREE; -+ -+ /* Also generate ENDBRANCH for non-tail call which -+ may return via indirect branch. */ -+ if (GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF) -+ fndecl = SYMBOL_REF_DECL (XEXP (fnaddr, 0)); -+ if (fndecl == NULL_TREE) -+ fndecl = MEM_EXPR (fnaddr); -+ if (fndecl -+ && TREE_CODE (TREE_TYPE (fndecl)) != FUNCTION_TYPE -+ && TREE_CODE (TREE_TYPE (fndecl)) != METHOD_TYPE) -+ fndecl = NULL_TREE; -+ if (fndecl && TYPE_ARG_TYPES (TREE_TYPE (fndecl))) -+ { -+ tree fntype = TREE_TYPE (fndecl); -+ if (lookup_attribute ("indirect_return", -+ TYPE_ATTRIBUTES (fntype))) -+ need_endbr = true; -+ } -+ } -+ if (!need_endbr) -+ continue; -+ /* Generate ENDBRANCH after CALL, which can return more than -+ twice, setjmp-like functions. */ -+ -+ cet_eb = gen_nop_endbr (); -+ emit_insn_after_setloc (cet_eb, insn, INSN_LOCATION (insn)); -+ continue; -+ } -+ -+ if (JUMP_P (insn) && flag_cet_switch) -+ { -+ rtx target = JUMP_LABEL (insn); -+ if (target == NULL_RTX || ANY_RETURN_P (target)) -+ continue; -+ -+ /* Check the jump is a switch table. */ -+ rtx_insn *label = as_a (target); -+ rtx_insn *table = next_insn (label); -+ if (table == NULL_RTX || !JUMP_TABLE_DATA_P (table)) -+ continue; -+ -+ /* For the indirect jump find out all places it jumps and insert -+ ENDBRANCH there. It should be done under a special flag to -+ control ENDBRANCH generation for switch stmts. */ -+ edge_iterator ei; -+ edge e; -+ basic_block dest_blk; -+ -+ FOR_EACH_EDGE (e, ei, bb->succs) -+ { -+ rtx_insn *insn; -+ -+ dest_blk = e->dest; -+ insn = BB_HEAD (dest_blk); -+ gcc_assert (LABEL_P (insn)); -+ cet_eb = gen_nop_endbr (); -+ emit_insn_after (cet_eb, insn); -+ } -+ continue; -+ } -+ -+ if ((LABEL_P (insn) && LABEL_PRESERVE_P (insn)) -+ || (NOTE_P (insn) -+ && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)) -+ /* TODO. Check /s bit also. */ -+ { -+ cet_eb = gen_nop_endbr (); -+ emit_insn_after (cet_eb, insn); -+ continue; -+ } -+ } -+ } -+ -+ timevar_pop (TV_MACH_DEP); -+ return 0; -+} -+ -+namespace { -+ -+const pass_data pass_data_insert_endbranch = -+{ -+ RTL_PASS, /* type. */ -+ "cet", /* name. */ -+ OPTGROUP_NONE, /* optinfo_flags. */ -+ TV_MACH_DEP, /* tv_id. */ -+ 0, /* properties_required. */ -+ 0, /* properties_provided. */ -+ 0, /* properties_destroyed. */ -+ 0, /* todo_flags_start. */ -+ 0, /* todo_flags_finish. */ -+}; -+ -+class pass_insert_endbranch : public rtl_opt_pass -+{ -+public: -+ pass_insert_endbranch (gcc::context *ctxt) -+ : rtl_opt_pass (pass_data_insert_endbranch, ctxt) -+ {} -+ -+ /* opt_pass methods: */ -+ virtual bool gate (function *) -+ { -+ return ((flag_cf_protection & CF_BRANCH)); -+ } -+ -+ virtual unsigned int execute (function *) -+ { -+ return rest_of_insert_endbranch (); -+ } -+ -+}; // class pass_insert_endbranch -+ -+} // anon namespace -+ -+rtl_opt_pass * -+make_pass_insert_endbranch (gcc::context *ctxt) -+{ -+ return new pass_insert_endbranch (ctxt); -+} -+ -+/* At entry of the nearest common dominator for basic blocks with -+ conversions, generate a single -+ vxorps %xmmN, %xmmN, %xmmN -+ for all -+ vcvtss2sd op, %xmmN, %xmmX -+ vcvtsd2ss op, %xmmN, %xmmX -+ vcvtsi2ss op, %xmmN, %xmmX -+ vcvtsi2sd op, %xmmN, %xmmX -+ -+ NB: We want to generate only a single vxorps to cover the whole -+ function. The LCM algorithm isn't appropriate here since it may -+ place a vxorps inside the loop. */ -+ -+static unsigned int -+remove_partial_avx_dependency (void) -+{ -+ timevar_push (TV_MACH_DEP); -+ -+ bitmap_obstack_initialize (NULL); -+ bitmap convert_bbs = BITMAP_ALLOC (NULL); -+ -+ basic_block bb; -+ rtx_insn *insn, *set_insn; -+ rtx set; -+ rtx v4sf_const0 = NULL_RTX; -+ -+ auto_vec control_flow_insns; -+ -+ FOR_EACH_BB_FN (bb, cfun) -+ { -+ FOR_BB_INSNS (bb, insn) -+ { -+ if (!NONDEBUG_INSN_P (insn)) -+ continue; -+ -+ set = single_set (insn); -+ if (!set) -+ continue; -+ -+ if (get_attr_avx_partial_xmm_update (insn) -+ != AVX_PARTIAL_XMM_UPDATE_TRUE) -+ continue; -+ -+ if (!v4sf_const0) -+ { -+ calculate_dominance_info (CDI_DOMINATORS); -+ df_set_flags (DF_DEFER_INSN_RESCAN); -+ df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN); -+ df_md_add_problem (); -+ df_analyze (); -+ v4sf_const0 = gen_reg_rtx (V4SFmode); -+ } -+ -+ /* Convert PARTIAL_XMM_UPDATE_TRUE insns, DF -> SF, SF -> DF, -+ SI -> SF, SI -> DF, DI -> SF, DI -> DF, to vec_dup and -+ vec_merge with subreg. */ -+ rtx src = SET_SRC (set); -+ rtx dest = SET_DEST (set); -+ machine_mode dest_mode = GET_MODE (dest); -+ -+ rtx zero; -+ machine_mode dest_vecmode; -+ if (dest_mode == E_SFmode) -+ { -+ dest_vecmode = V4SFmode; -+ zero = v4sf_const0; -+ } -+ else -+ { -+ dest_vecmode = V2DFmode; -+ zero = gen_rtx_SUBREG (V2DFmode, v4sf_const0, 0); -+ } -+ -+ /* Change source to vector mode. */ -+ src = gen_rtx_VEC_DUPLICATE (dest_vecmode, src); -+ src = gen_rtx_VEC_MERGE (dest_vecmode, src, zero, -+ GEN_INT (HOST_WIDE_INT_1U)); -+ /* Change destination to vector mode. */ -+ rtx vec = gen_reg_rtx (dest_vecmode); -+ /* Generate an XMM vector SET. */ -+ set = gen_rtx_SET (vec, src); -+ set_insn = emit_insn_before (set, insn); -+ df_insn_rescan (set_insn); -+ -+ if (cfun->can_throw_non_call_exceptions) -+ { -+ /* Handle REG_EH_REGION note. */ -+ rtx note = find_reg_note (insn, REG_EH_REGION, NULL_RTX); -+ if (note) -+ { -+ control_flow_insns.safe_push (set_insn); -+ add_reg_note (set_insn, REG_EH_REGION, XEXP (note, 0)); -+ } -+ } -+ -+ src = gen_rtx_SUBREG (dest_mode, vec, 0); -+ set = gen_rtx_SET (dest, src); -+ -+ /* Drop possible dead definitions. */ -+ PATTERN (insn) = set; -+ -+ INSN_CODE (insn) = -1; -+ recog_memoized (insn); -+ df_insn_rescan (insn); -+ bitmap_set_bit (convert_bbs, bb->index); -+ } -+ } -+ -+ if (v4sf_const0) -+ { -+ /* (Re-)discover loops so that bb->loop_father can be used in the -+ analysis below. */ -+ loop_optimizer_init (AVOID_CFG_MODIFICATIONS); -+ -+ /* Generate a vxorps at entry of the nearest dominator for basic -+ blocks with conversions, which is in the the fake loop that -+ contains the whole function, so that there is only a single -+ vxorps in the whole function. */ -+ bb = nearest_common_dominator_for_set (CDI_DOMINATORS, -+ convert_bbs); -+ while (bb->loop_father->latch -+ != EXIT_BLOCK_PTR_FOR_FN (cfun)) -+ bb = get_immediate_dominator (CDI_DOMINATORS, -+ bb->loop_father->header); -+ -+ set = gen_rtx_SET (v4sf_const0, CONST0_RTX (V4SFmode)); -+ -+ insn = BB_HEAD (bb); -+ while (insn && !NONDEBUG_INSN_P (insn)) -+ { -+ if (insn == BB_END (bb)) -+ { -+ insn = NULL; -+ break; -+ } -+ insn = NEXT_INSN (insn); -+ } -+ if (insn == BB_HEAD (bb)) -+ set_insn = emit_insn_before (set, insn); -+ else -+ set_insn = emit_insn_after (set, -+ insn ? PREV_INSN (insn) : BB_END (bb)); -+ df_insn_rescan (set_insn); -+ df_process_deferred_rescans (); -+ loop_optimizer_finalize (); -+ -+ if (!control_flow_insns.is_empty ()) -+ { -+ free_dominance_info (CDI_DOMINATORS); -+ -+ unsigned int i; -+ FOR_EACH_VEC_ELT (control_flow_insns, i, insn) -+ if (control_flow_insn_p (insn)) -+ { -+ /* Split the block after insn. There will be a fallthru -+ edge, which is OK so we keep it. We have to create -+ the exception edges ourselves. */ -+ bb = BLOCK_FOR_INSN (insn); -+ split_block (bb, insn); -+ rtl_make_eh_edge (NULL, bb, BB_END (bb)); -+ } -+ } -+ } -+ -+ bitmap_obstack_release (NULL); -+ BITMAP_FREE (convert_bbs); -+ -+ timevar_pop (TV_MACH_DEP); -+ return 0; -+} -+ -+namespace { -+ -+const pass_data pass_data_remove_partial_avx_dependency = -+{ -+ RTL_PASS, /* type */ -+ "rpad", /* name */ -+ OPTGROUP_NONE, /* optinfo_flags */ -+ TV_MACH_DEP, /* tv_id */ -+ 0, /* properties_required */ -+ 0, /* properties_provided */ -+ 0, /* properties_destroyed */ -+ 0, /* todo_flags_start */ -+ TODO_df_finish, /* todo_flags_finish */ -+}; -+ -+class pass_remove_partial_avx_dependency : public rtl_opt_pass -+{ -+public: -+ pass_remove_partial_avx_dependency (gcc::context *ctxt) -+ : rtl_opt_pass (pass_data_remove_partial_avx_dependency, ctxt) -+ {} -+ -+ /* opt_pass methods: */ -+ virtual bool gate (function *) -+ { -+ return (TARGET_AVX -+ && TARGET_SSE_PARTIAL_REG_DEPENDENCY -+ && TARGET_SSE_MATH -+ && optimize -+ && optimize_function_for_speed_p (cfun)); -+ } -+ -+ virtual unsigned int execute (function *) -+ { -+ return remove_partial_avx_dependency (); -+ } -+}; // class pass_rpad -+ -+} // anon namespace -+ -+rtl_opt_pass * -+make_pass_remove_partial_avx_dependency (gcc::context *ctxt) -+{ -+ return new pass_remove_partial_avx_dependency (ctxt); -+} -+ -+/* This compares the priority of target features in function DECL1 -+ and DECL2. It returns positive value if DECL1 is higher priority, -+ negative value if DECL2 is higher priority and 0 if they are the -+ same. */ -+ -+int -+ix86_compare_version_priority (tree decl1, tree decl2) -+{ -+ unsigned int priority1 = get_builtin_code_for_version (decl1, NULL); -+ unsigned int priority2 = get_builtin_code_for_version (decl2, NULL); -+ -+ return (int)priority1 - (int)priority2; -+} -+ -+/* V1 and V2 point to function versions with different priorities -+ based on the target ISA. This function compares their priorities. */ -+ -+static int -+feature_compare (const void *v1, const void *v2) -+{ -+ typedef struct _function_version_info -+ { -+ tree version_decl; -+ tree predicate_chain; -+ unsigned int dispatch_priority; -+ } function_version_info; -+ -+ const function_version_info c1 = *(const function_version_info *)v1; -+ const function_version_info c2 = *(const function_version_info *)v2; -+ return (c2.dispatch_priority - c1.dispatch_priority); -+} -+ -+/* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL -+ to return a pointer to VERSION_DECL if the outcome of the expression -+ formed by PREDICATE_CHAIN is true. This function will be called during -+ version dispatch to decide which function version to execute. It returns -+ the basic block at the end, to which more conditions can be added. */ -+ -+static basic_block -+add_condition_to_bb (tree function_decl, tree version_decl, -+ tree predicate_chain, basic_block new_bb) -+{ -+ gimple *return_stmt; -+ tree convert_expr, result_var; -+ gimple *convert_stmt; -+ gimple *call_cond_stmt; -+ gimple *if_else_stmt; -+ -+ basic_block bb1, bb2, bb3; -+ edge e12, e23; -+ -+ tree cond_var, and_expr_var = NULL_TREE; -+ gimple_seq gseq; -+ -+ tree predicate_decl, predicate_arg; -+ -+ push_cfun (DECL_STRUCT_FUNCTION (function_decl)); -+ -+ gcc_assert (new_bb != NULL); -+ gseq = bb_seq (new_bb); -+ -+ -+ convert_expr = build1 (CONVERT_EXPR, ptr_type_node, -+ build_fold_addr_expr (version_decl)); -+ result_var = create_tmp_var (ptr_type_node); -+ convert_stmt = gimple_build_assign (result_var, convert_expr); -+ return_stmt = gimple_build_return (result_var); -+ -+ if (predicate_chain == NULL_TREE) -+ { -+ gimple_seq_add_stmt (&gseq, convert_stmt); -+ gimple_seq_add_stmt (&gseq, return_stmt); -+ set_bb_seq (new_bb, gseq); -+ gimple_set_bb (convert_stmt, new_bb); -+ gimple_set_bb (return_stmt, new_bb); -+ pop_cfun (); -+ return new_bb; -+ } -+ -+ while (predicate_chain != NULL) -+ { -+ cond_var = create_tmp_var (integer_type_node); -+ predicate_decl = TREE_PURPOSE (predicate_chain); -+ predicate_arg = TREE_VALUE (predicate_chain); -+ call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg); -+ gimple_call_set_lhs (call_cond_stmt, cond_var); -+ -+ gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl)); -+ gimple_set_bb (call_cond_stmt, new_bb); -+ gimple_seq_add_stmt (&gseq, call_cond_stmt); -+ -+ predicate_chain = TREE_CHAIN (predicate_chain); -+ -+ if (and_expr_var == NULL) -+ and_expr_var = cond_var; -+ else -+ { -+ gimple *assign_stmt; -+ /* Use MIN_EXPR to check if any integer is zero?. -+ and_expr_var = min_expr */ -+ assign_stmt = gimple_build_assign (and_expr_var, -+ build2 (MIN_EXPR, integer_type_node, -+ cond_var, and_expr_var)); -+ -+ gimple_set_block (assign_stmt, DECL_INITIAL (function_decl)); -+ gimple_set_bb (assign_stmt, new_bb); -+ gimple_seq_add_stmt (&gseq, assign_stmt); -+ } -+ } -+ -+ if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var, -+ integer_zero_node, -+ NULL_TREE, NULL_TREE); -+ gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl)); -+ gimple_set_bb (if_else_stmt, new_bb); -+ gimple_seq_add_stmt (&gseq, if_else_stmt); -+ -+ gimple_seq_add_stmt (&gseq, convert_stmt); -+ gimple_seq_add_stmt (&gseq, return_stmt); -+ set_bb_seq (new_bb, gseq); -+ -+ bb1 = new_bb; -+ e12 = split_block (bb1, if_else_stmt); -+ bb2 = e12->dest; -+ e12->flags &= ~EDGE_FALLTHRU; -+ e12->flags |= EDGE_TRUE_VALUE; -+ -+ e23 = split_block (bb2, return_stmt); -+ -+ gimple_set_bb (convert_stmt, bb2); -+ gimple_set_bb (return_stmt, bb2); -+ -+ bb3 = e23->dest; -+ make_edge (bb1, bb3, EDGE_FALSE_VALUE); -+ -+ remove_edge (e23); -+ make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0); -+ -+ pop_cfun (); -+ -+ return bb3; -+} -+ -+/* This function generates the dispatch function for -+ multi-versioned functions. DISPATCH_DECL is the function which will -+ contain the dispatch logic. FNDECLS are the function choices for -+ dispatch, and is a tree chain. EMPTY_BB is the basic block pointer -+ in DISPATCH_DECL in which the dispatch code is generated. */ -+ -+static int -+dispatch_function_versions (tree dispatch_decl, -+ void *fndecls_p, -+ basic_block *empty_bb) -+{ -+ tree default_decl; -+ gimple *ifunc_cpu_init_stmt; -+ gimple_seq gseq; -+ int ix; -+ tree ele; -+ vec *fndecls; -+ unsigned int num_versions = 0; -+ unsigned int actual_versions = 0; -+ unsigned int i; -+ -+ struct _function_version_info -+ { -+ tree version_decl; -+ tree predicate_chain; -+ unsigned int dispatch_priority; -+ }*function_version_info; -+ -+ gcc_assert (dispatch_decl != NULL -+ && fndecls_p != NULL -+ && empty_bb != NULL); -+ -+ /*fndecls_p is actually a vector. */ -+ fndecls = static_cast *> (fndecls_p); -+ -+ /* At least one more version other than the default. */ -+ num_versions = fndecls->length (); -+ gcc_assert (num_versions >= 2); -+ -+ function_version_info = (struct _function_version_info *) -+ XNEWVEC (struct _function_version_info, (num_versions - 1)); -+ -+ /* The first version in the vector is the default decl. */ -+ default_decl = (*fndecls)[0]; -+ -+ push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl)); -+ -+ gseq = bb_seq (*empty_bb); -+ /* Function version dispatch is via IFUNC. IFUNC resolvers fire before -+ constructors, so explicity call __builtin_cpu_init here. */ -+ ifunc_cpu_init_stmt -+ = gimple_build_call_vec (get_ix86_builtin (IX86_BUILTIN_CPU_INIT), vNULL); -+ gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt); -+ gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb); -+ set_bb_seq (*empty_bb, gseq); -+ -+ pop_cfun (); -+ -+ -+ for (ix = 1; fndecls->iterate (ix, &ele); ++ix) -+ { -+ tree version_decl = ele; -+ tree predicate_chain = NULL_TREE; -+ unsigned int priority; -+ /* Get attribute string, parse it and find the right predicate decl. -+ The predicate function could be a lengthy combination of many -+ features, like arch-type and various isa-variants. */ -+ priority = get_builtin_code_for_version (version_decl, -+ &predicate_chain); -+ -+ if (predicate_chain == NULL_TREE) -+ continue; -+ -+ function_version_info [actual_versions].version_decl = version_decl; -+ function_version_info [actual_versions].predicate_chain -+ = predicate_chain; -+ function_version_info [actual_versions].dispatch_priority = priority; -+ actual_versions++; -+ } -+ -+ /* Sort the versions according to descending order of dispatch priority. The -+ priority is based on the ISA. This is not a perfect solution. There -+ could still be ambiguity. If more than one function version is suitable -+ to execute, which one should be dispatched? In future, allow the user -+ to specify a dispatch priority next to the version. */ -+ qsort (function_version_info, actual_versions, -+ sizeof (struct _function_version_info), feature_compare); -+ -+ for (i = 0; i < actual_versions; ++i) -+ *empty_bb = add_condition_to_bb (dispatch_decl, -+ function_version_info[i].version_decl, -+ function_version_info[i].predicate_chain, -+ *empty_bb); -+ -+ /* dispatch default version at the end. */ -+ *empty_bb = add_condition_to_bb (dispatch_decl, default_decl, -+ NULL, *empty_bb); -+ -+ free (function_version_info); -+ return 0; -+} -+ -+/* This function changes the assembler name for functions that are -+ versions. If DECL is a function version and has a "target" -+ attribute, it appends the attribute string to its assembler name. */ -+ -+static tree -+ix86_mangle_function_version_assembler_name (tree decl, tree id) -+{ -+ tree version_attr; -+ const char *orig_name, *version_string; -+ char *attr_str, *assembler_name; -+ -+ if (DECL_DECLARED_INLINE_P (decl) -+ && lookup_attribute ("gnu_inline", -+ DECL_ATTRIBUTES (decl))) -+ error_at (DECL_SOURCE_LOCATION (decl), -+ "function versions cannot be marked as %," -+ " bodies have to be generated"); -+ -+ if (DECL_VIRTUAL_P (decl) -+ || DECL_VINDEX (decl)) -+ sorry ("virtual function multiversioning not supported"); -+ -+ version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl)); -+ -+ /* target attribute string cannot be NULL. */ -+ gcc_assert (version_attr != NULL_TREE); -+ -+ orig_name = IDENTIFIER_POINTER (id); -+ version_string -+ = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr))); -+ -+ if (strcmp (version_string, "default") == 0) -+ return id; -+ -+ attr_str = sorted_attr_string (TREE_VALUE (version_attr)); -+ assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2); -+ -+ sprintf (assembler_name, "%s.%s", orig_name, attr_str); -+ -+ /* Allow assembler name to be modified if already set. */ -+ if (DECL_ASSEMBLER_NAME_SET_P (decl)) -+ SET_DECL_RTL (decl, NULL); -+ -+ tree ret = get_identifier (assembler_name); -+ XDELETEVEC (attr_str); -+ XDELETEVEC (assembler_name); -+ return ret; -+} -+ -+tree -+ix86_mangle_decl_assembler_name (tree decl, tree id) -+{ -+ /* For function version, add the target suffix to the assembler name. */ -+ if (TREE_CODE (decl) == FUNCTION_DECL -+ && DECL_FUNCTION_VERSIONED (decl)) -+ id = ix86_mangle_function_version_assembler_name (decl, id); -+#ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME -+ id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id); -+#endif -+ -+ return id; -+} -+ -+/* Make a dispatcher declaration for the multi-versioned function DECL. -+ Calls to DECL function will be replaced with calls to the dispatcher -+ by the front-end. Returns the decl of the dispatcher function. */ -+ -+tree -+ix86_get_function_versions_dispatcher (void *decl) -+{ -+ tree fn = (tree) decl; -+ struct cgraph_node *node = NULL; -+ struct cgraph_node *default_node = NULL; -+ struct cgraph_function_version_info *node_v = NULL; -+ struct cgraph_function_version_info *first_v = NULL; -+ -+ tree dispatch_decl = NULL; -+ -+ struct cgraph_function_version_info *default_version_info = NULL; -+ -+ gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn)); -+ -+ node = cgraph_node::get (fn); -+ gcc_assert (node != NULL); -+ -+ node_v = node->function_version (); -+ gcc_assert (node_v != NULL); -+ -+ if (node_v->dispatcher_resolver != NULL) -+ return node_v->dispatcher_resolver; -+ -+ /* Find the default version and make it the first node. */ -+ first_v = node_v; -+ /* Go to the beginning of the chain. */ -+ while (first_v->prev != NULL) -+ first_v = first_v->prev; -+ default_version_info = first_v; -+ while (default_version_info != NULL) -+ { -+ if (is_function_default_version -+ (default_version_info->this_node->decl)) -+ break; -+ default_version_info = default_version_info->next; -+ } -+ -+ /* If there is no default node, just return NULL. */ -+ if (default_version_info == NULL) -+ return NULL; -+ -+ /* Make default info the first node. */ -+ if (first_v != default_version_info) -+ { -+ default_version_info->prev->next = default_version_info->next; -+ if (default_version_info->next) -+ default_version_info->next->prev = default_version_info->prev; -+ first_v->prev = default_version_info; -+ default_version_info->next = first_v; -+ default_version_info->prev = NULL; -+ } -+ -+ default_node = default_version_info->this_node; -+ -+#if defined (ASM_OUTPUT_TYPE_DIRECTIVE) -+ if (targetm.has_ifunc_p ()) -+ { -+ struct cgraph_function_version_info *it_v = NULL; -+ struct cgraph_node *dispatcher_node = NULL; -+ struct cgraph_function_version_info *dispatcher_version_info = NULL; -+ -+ /* Right now, the dispatching is done via ifunc. */ -+ dispatch_decl = make_dispatcher_decl (default_node->decl); -+ -+ dispatcher_node = cgraph_node::get_create (dispatch_decl); -+ gcc_assert (dispatcher_node != NULL); -+ dispatcher_node->dispatcher_function = 1; -+ dispatcher_version_info -+ = dispatcher_node->insert_new_function_version (); -+ dispatcher_version_info->next = default_version_info; -+ dispatcher_node->definition = 1; -+ -+ /* Set the dispatcher for all the versions. */ -+ it_v = default_version_info; -+ while (it_v != NULL) -+ { -+ it_v->dispatcher_resolver = dispatch_decl; -+ it_v = it_v->next; -+ } -+ } -+ else -+#endif -+ { -+ error_at (DECL_SOURCE_LOCATION (default_node->decl), -+ "multiversioning needs ifunc which is not supported " -+ "on this target"); -+ } -+ -+ return dispatch_decl; -+} -+ -+/* Make the resolver function decl to dispatch the versions of -+ a multi-versioned function, DEFAULT_DECL. IFUNC_ALIAS_DECL is -+ ifunc alias that will point to the created resolver. Create an -+ empty basic block in the resolver and store the pointer in -+ EMPTY_BB. Return the decl of the resolver function. */ -+ -+static tree -+make_resolver_func (const tree default_decl, -+ const tree ifunc_alias_decl, -+ basic_block *empty_bb) -+{ -+ char *resolver_name; -+ tree decl, type, decl_name, t; -+ -+ /* IFUNC's have to be globally visible. So, if the default_decl is -+ not, then the name of the IFUNC should be made unique. */ -+ if (TREE_PUBLIC (default_decl) == 0) -+ { -+ char *ifunc_name = make_unique_name (default_decl, "ifunc", true); -+ symtab->change_decl_assembler_name (ifunc_alias_decl, -+ get_identifier (ifunc_name)); -+ XDELETEVEC (ifunc_name); -+ } -+ -+ resolver_name = make_unique_name (default_decl, "resolver", false); -+ -+ /* The resolver function should return a (void *). */ -+ type = build_function_type_list (ptr_type_node, NULL_TREE); -+ -+ decl = build_fn_decl (resolver_name, type); -+ decl_name = get_identifier (resolver_name); -+ SET_DECL_ASSEMBLER_NAME (decl, decl_name); -+ -+ DECL_NAME (decl) = decl_name; -+ TREE_USED (decl) = 1; -+ DECL_ARTIFICIAL (decl) = 1; -+ DECL_IGNORED_P (decl) = 1; -+ TREE_PUBLIC (decl) = 0; -+ DECL_UNINLINABLE (decl) = 1; -+ -+ /* Resolver is not external, body is generated. */ -+ DECL_EXTERNAL (decl) = 0; -+ DECL_EXTERNAL (ifunc_alias_decl) = 0; -+ -+ DECL_CONTEXT (decl) = NULL_TREE; -+ DECL_INITIAL (decl) = make_node (BLOCK); -+ DECL_STATIC_CONSTRUCTOR (decl) = 0; -+ -+ if (DECL_COMDAT_GROUP (default_decl) -+ || TREE_PUBLIC (default_decl)) -+ { -+ /* In this case, each translation unit with a call to this -+ versioned function will put out a resolver. Ensure it -+ is comdat to keep just one copy. */ -+ DECL_COMDAT (decl) = 1; -+ make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl)); -+ } -+ /* Build result decl and add to function_decl. */ -+ t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node); -+ DECL_CONTEXT (t) = decl; -+ DECL_ARTIFICIAL (t) = 1; -+ DECL_IGNORED_P (t) = 1; -+ DECL_RESULT (decl) = t; -+ -+ gimplify_function_tree (decl); -+ push_cfun (DECL_STRUCT_FUNCTION (decl)); -+ *empty_bb = init_lowered_empty_function (decl, false, -+ profile_count::uninitialized ()); -+ -+ cgraph_node::add_new_function (decl, true); -+ symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl)); -+ -+ pop_cfun (); -+ -+ gcc_assert (ifunc_alias_decl != NULL); -+ /* Mark ifunc_alias_decl as "ifunc" with resolver as resolver_name. */ -+ DECL_ATTRIBUTES (ifunc_alias_decl) -+ = make_attribute ("ifunc", resolver_name, -+ DECL_ATTRIBUTES (ifunc_alias_decl)); -+ -+ /* Create the alias for dispatch to resolver here. */ -+ cgraph_node::create_same_body_alias (ifunc_alias_decl, decl); -+ XDELETEVEC (resolver_name); -+ return decl; -+} -+ -+/* Generate the dispatching code body to dispatch multi-versioned function -+ DECL. The target hook is called to process the "target" attributes and -+ provide the code to dispatch the right function at run-time. NODE points -+ to the dispatcher decl whose body will be created. */ -+ -+tree -+ix86_generate_version_dispatcher_body (void *node_p) -+{ -+ tree resolver_decl; -+ basic_block empty_bb; -+ tree default_ver_decl; -+ struct cgraph_node *versn; -+ struct cgraph_node *node; -+ -+ struct cgraph_function_version_info *node_version_info = NULL; -+ struct cgraph_function_version_info *versn_info = NULL; -+ -+ node = (cgraph_node *)node_p; -+ -+ node_version_info = node->function_version (); -+ gcc_assert (node->dispatcher_function -+ && node_version_info != NULL); -+ -+ if (node_version_info->dispatcher_resolver) -+ return node_version_info->dispatcher_resolver; -+ -+ /* The first version in the chain corresponds to the default version. */ -+ default_ver_decl = node_version_info->next->this_node->decl; -+ -+ /* node is going to be an alias, so remove the finalized bit. */ -+ node->definition = false; -+ -+ resolver_decl = make_resolver_func (default_ver_decl, -+ node->decl, &empty_bb); -+ -+ node_version_info->dispatcher_resolver = resolver_decl; -+ -+ push_cfun (DECL_STRUCT_FUNCTION (resolver_decl)); -+ -+ auto_vec fn_ver_vec; -+ -+ for (versn_info = node_version_info->next; versn_info; -+ versn_info = versn_info->next) -+ { -+ versn = versn_info->this_node; -+ /* Check for virtual functions here again, as by this time it should -+ have been determined if this function needs a vtable index or -+ not. This happens for methods in derived classes that override -+ virtual methods in base classes but are not explicitly marked as -+ virtual. */ -+ if (DECL_VINDEX (versn->decl)) -+ sorry ("virtual function multiversioning not supported"); -+ -+ fn_ver_vec.safe_push (versn->decl); -+ } -+ -+ dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb); -+ cgraph_edge::rebuild_edges (); -+ pop_cfun (); -+ return resolver_decl; -+} -+ -+ -diff --git a/gcc/config/i386/i386-features.h b/gcc/config/i386/i386-features.h -new file mode 100644 -index 000000000..358122249 ---- /dev/null -+++ b/gcc/config/i386/i386-features.h -@@ -0,0 +1,201 @@ -+/* Copyright (C) 1988-2019 Free Software Foundation, Inc. -+ -+This file is part of GCC. -+ -+GCC is free software; you can redistribute it and/or modify -+it under the terms of the GNU General Public License as published by -+the Free Software Foundation; either version 3, or (at your option) -+any later version. -+ -+GCC is distributed in the hope that it will be useful, -+but WITHOUT ANY WARRANTY; without even the implied warranty of -+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+GNU General Public License for more details. -+ -+You should have received a copy of the GNU General Public License -+along with GCC; see the file COPYING3. If not see -+. */ -+ -+#ifndef GCC_I386_FEATURES_H -+#define GCC_I386_FEATURES_H -+ -+enum xlogue_stub { -+ XLOGUE_STUB_SAVE, -+ XLOGUE_STUB_RESTORE, -+ XLOGUE_STUB_RESTORE_TAIL, -+ XLOGUE_STUB_SAVE_HFP, -+ XLOGUE_STUB_RESTORE_HFP, -+ XLOGUE_STUB_RESTORE_HFP_TAIL, -+ -+ XLOGUE_STUB_COUNT -+}; -+ -+enum xlogue_stub_sets { -+ XLOGUE_SET_ALIGNED, -+ XLOGUE_SET_ALIGNED_PLUS_8, -+ XLOGUE_SET_HFP_ALIGNED_OR_REALIGN, -+ XLOGUE_SET_HFP_ALIGNED_PLUS_8, -+ -+ XLOGUE_SET_COUNT -+}; -+ -+/* Register save/restore layout used by out-of-line stubs. */ -+class xlogue_layout { -+public: -+ struct reginfo -+ { -+ unsigned regno; -+ HOST_WIDE_INT offset; /* Offset used by stub base pointer (rax or -+ rsi) to where each register is stored. */ -+ }; -+ -+ unsigned get_nregs () const {return m_nregs;} -+ HOST_WIDE_INT get_stack_align_off_in () const {return m_stack_align_off_in;} -+ -+ const reginfo &get_reginfo (unsigned reg) const -+ { -+ gcc_assert (reg < m_nregs); -+ return m_regs[reg]; -+ } -+ -+ static const char *get_stub_name (enum xlogue_stub stub, -+ unsigned n_extra_args); -+ -+ /* Returns an rtx for the stub's symbol based upon -+ 1.) the specified stub (save, restore or restore_ret) and -+ 2.) the value of cfun->machine->call_ms2sysv_extra_regs and -+ 3.) rather or not stack alignment is being performed. */ -+ static rtx get_stub_rtx (enum xlogue_stub stub); -+ -+ /* Returns the amount of stack space (including padding) that the stub -+ needs to store registers based upon data in the machine_function. */ -+ HOST_WIDE_INT get_stack_space_used () const -+ { -+ const struct machine_function *m = cfun->machine; -+ unsigned last_reg = m->call_ms2sysv_extra_regs + MIN_REGS - 1; -+ -+ gcc_assert (m->call_ms2sysv_extra_regs <= MAX_EXTRA_REGS); -+ return m_regs[last_reg].offset + STUB_INDEX_OFFSET; -+ } -+ -+ /* Returns the offset for the base pointer used by the stub. */ -+ HOST_WIDE_INT get_stub_ptr_offset () const -+ { -+ return STUB_INDEX_OFFSET + m_stack_align_off_in; -+ } -+ -+ static const struct xlogue_layout &get_instance (); -+ static unsigned count_stub_managed_regs (); -+ static bool is_stub_managed_reg (unsigned regno, unsigned count); -+ -+ static const HOST_WIDE_INT STUB_INDEX_OFFSET = 0x70; -+ static const unsigned MIN_REGS = NUM_X86_64_MS_CLOBBERED_REGS; -+ static const unsigned MAX_REGS = 18; -+ static const unsigned MAX_EXTRA_REGS = MAX_REGS - MIN_REGS; -+ static const unsigned VARIANT_COUNT = MAX_EXTRA_REGS + 1; -+ static const unsigned STUB_NAME_MAX_LEN = 20; -+ static const char * const STUB_BASE_NAMES[XLOGUE_STUB_COUNT]; -+ static const unsigned REG_ORDER[MAX_REGS]; -+ static const unsigned REG_ORDER_REALIGN[MAX_REGS]; -+ -+private: -+ xlogue_layout (); -+ xlogue_layout (HOST_WIDE_INT stack_align_off_in, bool hfp); -+ xlogue_layout (const xlogue_layout &); -+ -+ /* True if hard frame pointer is used. */ -+ bool m_hfp; -+ -+ /* Max number of register this layout manages. */ -+ unsigned m_nregs; -+ -+ /* Incoming offset from 16-byte alignment. */ -+ HOST_WIDE_INT m_stack_align_off_in; -+ -+ /* Register order and offsets. */ -+ struct reginfo m_regs[MAX_REGS]; -+ -+ /* Lazy-inited cache of symbol names for stubs. */ -+ static char s_stub_names[2][XLOGUE_STUB_COUNT][VARIANT_COUNT] -+ [STUB_NAME_MAX_LEN]; -+ -+ static const xlogue_layout s_instances[XLOGUE_SET_COUNT]; -+}; -+ -+namespace { -+ -+class scalar_chain -+{ -+ public: -+ scalar_chain (); -+ virtual ~scalar_chain (); -+ -+ static unsigned max_id; -+ -+ /* ID of a chain. */ -+ unsigned int chain_id; -+ /* A queue of instructions to be included into a chain. */ -+ bitmap queue; -+ /* Instructions included into a chain. */ -+ bitmap insns; -+ /* All registers defined by a chain. */ -+ bitmap defs; -+ /* Registers used in both vector and sclar modes. */ -+ bitmap defs_conv; -+ -+ void build (bitmap candidates, unsigned insn_uid); -+ virtual int compute_convert_gain () = 0; -+ int convert (); -+ -+ protected: -+ void add_to_queue (unsigned insn_uid); -+ void emit_conversion_insns (rtx insns, rtx_insn *pos); -+ -+ private: -+ void add_insn (bitmap candidates, unsigned insn_uid); -+ void analyze_register_chain (bitmap candidates, df_ref ref); -+ virtual void mark_dual_mode_def (df_ref def) = 0; -+ virtual void convert_insn (rtx_insn *insn) = 0; -+ virtual void convert_registers () = 0; -+}; -+ -+class dimode_scalar_chain : public scalar_chain -+{ -+ public: -+ int compute_convert_gain (); -+ private: -+ void mark_dual_mode_def (df_ref def); -+ rtx replace_with_subreg (rtx x, rtx reg, rtx subreg); -+ void replace_with_subreg_in_insn (rtx_insn *insn, rtx reg, rtx subreg); -+ void convert_insn (rtx_insn *insn); -+ void convert_op (rtx *op, rtx_insn *insn); -+ void convert_reg (unsigned regno); -+ void make_vector_copies (unsigned regno); -+ void convert_registers (); -+ int vector_const_cost (rtx exp); -+}; -+ -+class timode_scalar_chain : public scalar_chain -+{ -+ public: -+ /* Convert from TImode to V1TImode is always faster. */ -+ int compute_convert_gain () { return 1; } -+ -+ private: -+ void mark_dual_mode_def (df_ref def); -+ void fix_debug_reg_uses (rtx reg); -+ void convert_insn (rtx_insn *insn); -+ /* We don't convert registers to difference size. */ -+ void convert_registers () {} -+}; -+ -+} // anon namespace -+ -+bool ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined); -+int ix86_compare_version_priority (tree decl1, tree decl2); -+tree ix86_generate_version_dispatcher_body (void *node_p); -+tree ix86_get_function_versions_dispatcher (void *decl); -+tree ix86_mangle_decl_assembler_name (tree decl, tree id); -+ -+ -+#endif /* GCC_I386_FEATURES_H */ -diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c -new file mode 100644 -index 000000000..4a03bead8 ---- /dev/null -+++ b/gcc/config/i386/i386-options.c -@@ -0,0 +1,3707 @@ -+/* Copyright (C) 1988-2019 Free Software Foundation, Inc. -+ -+This file is part of GCC. -+ -+GCC is free software; you can redistribute it and/or modify -+it under the terms of the GNU General Public License as published by -+the Free Software Foundation; either version 3, or (at your option) -+any later version. -+ -+GCC is distributed in the hope that it will be useful, -+but WITHOUT ANY WARRANTY; without even the implied warranty of -+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+GNU General Public License for more details. -+ -+You should have received a copy of the GNU General Public License -+along with GCC; see the file COPYING3. If not see -+. */ -+ -+#define IN_TARGET_CODE 1 -+ -+#include "config.h" -+#include "system.h" -+#include "coretypes.h" -+#include "backend.h" -+#include "rtl.h" -+#include "tree.h" -+#include "memmodel.h" -+#include "gimple.h" -+#include "cfghooks.h" -+#include "cfgloop.h" -+#include "df.h" -+#include "tm_p.h" -+#include "stringpool.h" -+#include "expmed.h" -+#include "optabs.h" -+#include "regs.h" -+#include "emit-rtl.h" -+#include "recog.h" -+#include "cgraph.h" -+#include "diagnostic.h" -+#include "cfgbuild.h" -+#include "alias.h" -+#include "fold-const.h" -+#include "attribs.h" -+#include "calls.h" -+#include "stor-layout.h" -+#include "varasm.h" -+#include "output.h" -+#include "insn-attr.h" -+#include "flags.h" -+#include "except.h" -+#include "explow.h" -+#include "expr.h" -+#include "cfgrtl.h" -+#include "common/common-target.h" -+#include "langhooks.h" -+#include "reload.h" -+#include "gimplify.h" -+#include "dwarf2.h" -+#include "tm-constrs.h" -+#include "params.h" -+#include "cselib.h" -+#include "sched-int.h" -+#include "opts.h" -+#include "tree-pass.h" -+#include "context.h" -+#include "pass_manager.h" -+#include "target-globals.h" -+#include "gimple-iterator.h" -+#include "tree-vectorizer.h" -+#include "shrink-wrap.h" -+#include "builtins.h" -+#include "rtl-iter.h" -+#include "tree-iterator.h" -+#include "dbgcnt.h" -+#include "case-cfn-macros.h" -+#include "dojump.h" -+#include "fold-const-call.h" -+#include "tree-vrp.h" -+#include "tree-ssanames.h" -+#include "selftest.h" -+#include "selftest-rtl.h" -+#include "print-rtl.h" -+#include "intl.h" -+#include "ifcvt.h" -+#include "symbol-summary.h" -+#include "ipa-prop.h" -+#include "ipa-fnsummary.h" -+#include "wide-int-bitmask.h" -+#include "tree-vector-builder.h" -+#include "debug.h" -+#include "dwarf2out.h" -+#include "i386-options.h" -+ -+#include "x86-tune-costs.h" -+ -+#ifndef SUBTARGET32_DEFAULT_CPU -+#define SUBTARGET32_DEFAULT_CPU "i386" -+#endif -+ -+/* Processor feature/optimization bitmasks. */ -+#define m_386 (HOST_WIDE_INT_1U< 70) -+ { -+ *ptr++ = '\\'; -+ *ptr++ = '\n'; -+ line_len = 0; -+ } -+ } -+ -+ for (j = 0; j < 2; j++) -+ if (opts[i][j]) -+ { -+ memcpy (ptr, opts[i][j], len2[j]); -+ ptr += len2[j]; -+ line_len += len2[j]; -+ } -+ } -+ -+ *ptr = '\0'; -+ gcc_assert (ret + len >= ptr); -+ -+ return ret; -+} -+ -+/* Function that is callable from the debugger to print the current -+ options. */ -+void ATTRIBUTE_UNUSED -+ix86_debug_options (void) -+{ -+ char *opts = ix86_target_string (ix86_isa_flags, ix86_isa_flags2, -+ target_flags, ix86_target_flags, -+ ix86_arch_string,ix86_tune_string, -+ ix86_fpmath, true, true); -+ -+ if (opts) -+ { -+ fprintf (stderr, "%s\n\n", opts); -+ free (opts); -+ } -+ else -+ fputs ("\n\n", stderr); -+ -+ return; -+} -+ -+/* Save the current options */ -+ -+void -+ix86_function_specific_save (struct cl_target_option *ptr, -+ struct gcc_options *opts) -+{ -+ ptr->arch = ix86_arch; -+ ptr->schedule = ix86_schedule; -+ ptr->prefetch_sse = x86_prefetch_sse; -+ ptr->tune = ix86_tune; -+ ptr->branch_cost = ix86_branch_cost; -+ ptr->tune_defaulted = ix86_tune_defaulted; -+ ptr->arch_specified = ix86_arch_specified; -+ ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit; -+ ptr->x_ix86_isa_flags2_explicit = opts->x_ix86_isa_flags2_explicit; -+ ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit; -+ ptr->x_ix86_arch_string = opts->x_ix86_arch_string; -+ ptr->x_ix86_tune_string = opts->x_ix86_tune_string; -+ ptr->x_ix86_cmodel = opts->x_ix86_cmodel; -+ ptr->x_ix86_abi = opts->x_ix86_abi; -+ ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect; -+ ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost; -+ ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes; -+ ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer; -+ ptr->x_ix86_force_drap = opts->x_ix86_force_drap; -+ ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg; -+ ptr->x_ix86_pmode = opts->x_ix86_pmode; -+ ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg; -+ ptr->x_ix86_recip_name = opts->x_ix86_recip_name; -+ ptr->x_ix86_regparm = opts->x_ix86_regparm; -+ ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold; -+ ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx; -+ ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard; -+ ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg; -+ ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect; -+ ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string; -+ ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy; -+ ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy; -+ ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default; -+ ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type; -+ -+ /* The fields are char but the variables are not; make sure the -+ values fit in the fields. */ -+ gcc_assert (ptr->arch == ix86_arch); -+ gcc_assert (ptr->schedule == ix86_schedule); -+ gcc_assert (ptr->tune == ix86_tune); -+ gcc_assert (ptr->branch_cost == ix86_branch_cost); -+} -+ -+/* Feature tests against the various architecture variations, used to create -+ ix86_arch_features based on the processor mask. */ -+static unsigned HOST_WIDE_INT initial_ix86_arch_features[X86_ARCH_LAST] = { -+ /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */ -+ ~(m_386 | m_486 | m_PENT | m_LAKEMONT | m_K6), -+ -+ /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */ -+ ~m_386, -+ -+ /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */ -+ ~(m_386 | m_486), -+ -+ /* X86_ARCH_XADD: Exchange and add was added for 80486. */ -+ ~m_386, -+ -+ /* X86_ARCH_BSWAP: Byteswap was added for 80486. */ -+ ~m_386, -+}; -+ -+/* This table must be in sync with enum processor_type in i386.h. */ -+static const struct processor_costs *processor_cost_table[] = -+{ -+ &generic_cost, -+ &i386_cost, -+ &i486_cost, -+ &pentium_cost, -+ &lakemont_cost, -+ &pentiumpro_cost, -+ &pentium4_cost, -+ &nocona_cost, -+ &core_cost, -+ &core_cost, -+ &core_cost, -+ &core_cost, -+ &atom_cost, -+ &slm_cost, -+ &slm_cost, -+ &slm_cost, -+ &slm_cost, -+ &slm_cost, -+ &slm_cost, -+ &skylake_cost, -+ &skylake_cost, -+ &skylake_cost, -+ &skylake_cost, -+ &skylake_cost, -+ &skylake_cost, -+ &intel_cost, -+ &geode_cost, -+ &k6_cost, -+ &athlon_cost, -+ &k8_cost, -+ &amdfam10_cost, -+ &bdver_cost, -+ &bdver_cost, -+ &bdver_cost, -+ &bdver_cost, -+ &btver1_cost, -+ &btver2_cost, -+ &znver1_cost, -+ &znver2_cost -+}; -+ -+/* Guarantee that the array is aligned with enum processor_type. */ -+STATIC_ASSERT (ARRAY_SIZE (processor_cost_table) == PROCESSOR_max); -+ -+static bool -+ix86_option_override_internal (bool main_args_p, -+ struct gcc_options *opts, -+ struct gcc_options *opts_set); -+static void -+set_ix86_tune_features (enum processor_type ix86_tune, bool dump); -+ -+/* Restore the current options */ -+ -+void -+ix86_function_specific_restore (struct gcc_options *opts, -+ struct cl_target_option *ptr) -+{ -+ enum processor_type old_tune = ix86_tune; -+ enum processor_type old_arch = ix86_arch; -+ unsigned HOST_WIDE_INT ix86_arch_mask; -+ int i; -+ -+ /* We don't change -fPIC. */ -+ opts->x_flag_pic = flag_pic; -+ -+ ix86_arch = (enum processor_type) ptr->arch; -+ ix86_schedule = (enum attr_cpu) ptr->schedule; -+ ix86_tune = (enum processor_type) ptr->tune; -+ x86_prefetch_sse = ptr->prefetch_sse; -+ opts->x_ix86_branch_cost = ptr->branch_cost; -+ ix86_tune_defaulted = ptr->tune_defaulted; -+ ix86_arch_specified = ptr->arch_specified; -+ opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit; -+ opts->x_ix86_isa_flags2_explicit = ptr->x_ix86_isa_flags2_explicit; -+ opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit; -+ opts->x_ix86_arch_string = ptr->x_ix86_arch_string; -+ opts->x_ix86_tune_string = ptr->x_ix86_tune_string; -+ opts->x_ix86_cmodel = ptr->x_ix86_cmodel; -+ opts->x_ix86_abi = ptr->x_ix86_abi; -+ opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect; -+ opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost; -+ opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes; -+ opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer; -+ opts->x_ix86_force_drap = ptr->x_ix86_force_drap; -+ opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg; -+ opts->x_ix86_pmode = ptr->x_ix86_pmode; -+ opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg; -+ opts->x_ix86_recip_name = ptr->x_ix86_recip_name; -+ opts->x_ix86_regparm = ptr->x_ix86_regparm; -+ opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold; -+ opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx; -+ opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard; -+ opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg; -+ opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect; -+ opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string; -+ opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy; -+ opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy; -+ opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default; -+ opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type; -+ ix86_tune_cost = processor_cost_table[ix86_tune]; -+ /* TODO: ix86_cost should be chosen at instruction or function granuality -+ so for cold code we use size_cost even in !optimize_size compilation. */ -+ if (opts->x_optimize_size) -+ ix86_cost = &ix86_size_cost; -+ else -+ ix86_cost = ix86_tune_cost; -+ -+ /* Recreate the arch feature tests if the arch changed */ -+ if (old_arch != ix86_arch) -+ { -+ ix86_arch_mask = HOST_WIDE_INT_1U << ix86_arch; -+ for (i = 0; i < X86_ARCH_LAST; ++i) -+ ix86_arch_features[i] -+ = !!(initial_ix86_arch_features[i] & ix86_arch_mask); -+ } -+ -+ /* Recreate the tune optimization tests */ -+ if (old_tune != ix86_tune) -+ set_ix86_tune_features (ix86_tune, false); -+} -+ -+/* Adjust target options after streaming them in. This is mainly about -+ reconciling them with global options. */ -+ -+void -+ix86_function_specific_post_stream_in (struct cl_target_option *ptr) -+{ -+ /* flag_pic is a global option, but ix86_cmodel is target saved option -+ partly computed from flag_pic. If flag_pic is on, adjust x_ix86_cmodel -+ for PIC, or error out. */ -+ if (flag_pic) -+ switch (ptr->x_ix86_cmodel) -+ { -+ case CM_SMALL: -+ ptr->x_ix86_cmodel = CM_SMALL_PIC; -+ break; -+ -+ case CM_MEDIUM: -+ ptr->x_ix86_cmodel = CM_MEDIUM_PIC; -+ break; -+ -+ case CM_LARGE: -+ ptr->x_ix86_cmodel = CM_LARGE_PIC; -+ break; -+ -+ case CM_KERNEL: -+ error ("code model %s does not support PIC mode", "kernel"); -+ break; -+ -+ default: -+ break; -+ } -+ else -+ switch (ptr->x_ix86_cmodel) -+ { -+ case CM_SMALL_PIC: -+ ptr->x_ix86_cmodel = CM_SMALL; -+ break; -+ -+ case CM_MEDIUM_PIC: -+ ptr->x_ix86_cmodel = CM_MEDIUM; -+ break; -+ -+ case CM_LARGE_PIC: -+ ptr->x_ix86_cmodel = CM_LARGE; -+ break; -+ -+ default: -+ break; -+ } -+} -+ -+/* Print the current options */ -+ -+void -+ix86_function_specific_print (FILE *file, int indent, -+ struct cl_target_option *ptr) -+{ -+ char *target_string -+ = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_ix86_isa_flags2, -+ ptr->x_target_flags, ptr->x_ix86_target_flags, -+ NULL, NULL, ptr->x_ix86_fpmath, false, true); -+ -+ gcc_assert (ptr->arch < PROCESSOR_max); -+ fprintf (file, "%*sarch = %d (%s)\n", -+ indent, "", -+ ptr->arch, processor_names[ptr->arch]); -+ -+ gcc_assert (ptr->tune < PROCESSOR_max); -+ fprintf (file, "%*stune = %d (%s)\n", -+ indent, "", -+ ptr->tune, processor_names[ptr->tune]); -+ -+ fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost); -+ -+ if (target_string) -+ { -+ fprintf (file, "%*s%s\n", indent, "", target_string); -+ free (target_string); -+ } -+} -+ -+ -+/* Inner function to process the attribute((target(...))), take an argument and -+ set the current options from the argument. If we have a list, recursively go -+ over the list. */ -+ -+static bool -+ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[], -+ struct gcc_options *opts, -+ struct gcc_options *opts_set, -+ struct gcc_options *enum_opts_set, -+ bool target_clone_attr) -+{ -+ char *next_optstr; -+ bool ret = true; -+ -+#define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 } -+#define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 } -+#define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 } -+#define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M } -+#define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M } -+ -+ enum ix86_opt_type -+ { -+ ix86_opt_unknown, -+ ix86_opt_yes, -+ ix86_opt_no, -+ ix86_opt_str, -+ ix86_opt_enum, -+ ix86_opt_isa -+ }; -+ -+ static const struct -+ { -+ const char *string; -+ size_t len; -+ enum ix86_opt_type type; -+ int opt; -+ int mask; -+ } attrs[] = { -+ /* isa options */ -+ IX86_ATTR_ISA ("pconfig", OPT_mpconfig), -+ IX86_ATTR_ISA ("wbnoinvd", OPT_mwbnoinvd), -+ IX86_ATTR_ISA ("sgx", OPT_msgx), -+ IX86_ATTR_ISA ("avx5124fmaps", OPT_mavx5124fmaps), -+ IX86_ATTR_ISA ("avx5124vnniw", OPT_mavx5124vnniw), -+ IX86_ATTR_ISA ("avx512vpopcntdq", OPT_mavx512vpopcntdq), -+ IX86_ATTR_ISA ("avx512vbmi2", OPT_mavx512vbmi2), -+ IX86_ATTR_ISA ("avx512vnni", OPT_mavx512vnni), -+ IX86_ATTR_ISA ("avx512bitalg", OPT_mavx512bitalg), -+ -+ IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi), -+ IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma), -+ IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl), -+ IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw), -+ IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq), -+ IX86_ATTR_ISA ("avx512er", OPT_mavx512er), -+ IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf), -+ IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd), -+ IX86_ATTR_ISA ("avx512f", OPT_mavx512f), -+ IX86_ATTR_ISA ("avx2", OPT_mavx2), -+ IX86_ATTR_ISA ("fma", OPT_mfma), -+ IX86_ATTR_ISA ("xop", OPT_mxop), -+ IX86_ATTR_ISA ("fma4", OPT_mfma4), -+ IX86_ATTR_ISA ("f16c", OPT_mf16c), -+ IX86_ATTR_ISA ("avx", OPT_mavx), -+ IX86_ATTR_ISA ("sse4", OPT_msse4), -+ IX86_ATTR_ISA ("sse4.2", OPT_msse4_2), -+ IX86_ATTR_ISA ("sse4.1", OPT_msse4_1), -+ IX86_ATTR_ISA ("sse4a", OPT_msse4a), -+ IX86_ATTR_ISA ("ssse3", OPT_mssse3), -+ IX86_ATTR_ISA ("sse3", OPT_msse3), -+ IX86_ATTR_ISA ("aes", OPT_maes), -+ IX86_ATTR_ISA ("sha", OPT_msha), -+ IX86_ATTR_ISA ("pclmul", OPT_mpclmul), -+ IX86_ATTR_ISA ("sse2", OPT_msse2), -+ IX86_ATTR_ISA ("sse", OPT_msse), -+ IX86_ATTR_ISA ("3dnowa", OPT_m3dnowa), -+ IX86_ATTR_ISA ("3dnow", OPT_m3dnow), -+ IX86_ATTR_ISA ("mmx", OPT_mmmx), -+ IX86_ATTR_ISA ("rtm", OPT_mrtm), -+ IX86_ATTR_ISA ("prfchw", OPT_mprfchw), -+ IX86_ATTR_ISA ("rdseed", OPT_mrdseed), -+ IX86_ATTR_ISA ("adx", OPT_madx), -+ IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1), -+ IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt), -+ IX86_ATTR_ISA ("xsaves", OPT_mxsaves), -+ IX86_ATTR_ISA ("xsavec", OPT_mxsavec), -+ IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt), -+ IX86_ATTR_ISA ("xsave", OPT_mxsave), -+ IX86_ATTR_ISA ("abm", OPT_mabm), -+ IX86_ATTR_ISA ("bmi", OPT_mbmi), -+ IX86_ATTR_ISA ("bmi2", OPT_mbmi2), -+ IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt), -+ IX86_ATTR_ISA ("tbm", OPT_mtbm), -+ IX86_ATTR_ISA ("popcnt", OPT_mpopcnt), -+ IX86_ATTR_ISA ("cx16", OPT_mcx16), -+ IX86_ATTR_ISA ("sahf", OPT_msahf), -+ IX86_ATTR_ISA ("movbe", OPT_mmovbe), -+ IX86_ATTR_ISA ("crc32", OPT_mcrc32), -+ IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase), -+ IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd), -+ IX86_ATTR_ISA ("mwaitx", OPT_mmwaitx), -+ IX86_ATTR_ISA ("clzero", OPT_mclzero), -+ IX86_ATTR_ISA ("pku", OPT_mpku), -+ IX86_ATTR_ISA ("lwp", OPT_mlwp), -+ IX86_ATTR_ISA ("hle", OPT_mhle), -+ IX86_ATTR_ISA ("fxsr", OPT_mfxsr), -+ IX86_ATTR_ISA ("clwb", OPT_mclwb), -+ IX86_ATTR_ISA ("rdpid", OPT_mrdpid), -+ IX86_ATTR_ISA ("gfni", OPT_mgfni), -+ IX86_ATTR_ISA ("shstk", OPT_mshstk), -+ IX86_ATTR_ISA ("vaes", OPT_mvaes), -+ IX86_ATTR_ISA ("vpclmulqdq", OPT_mvpclmulqdq), -+ IX86_ATTR_ISA ("movdiri", OPT_mmovdiri), -+ IX86_ATTR_ISA ("movdir64b", OPT_mmovdir64b), -+ IX86_ATTR_ISA ("waitpkg", OPT_mwaitpkg), -+ IX86_ATTR_ISA ("cldemote", OPT_mcldemote), -+ IX86_ATTR_ISA ("ptwrite", OPT_mptwrite), -+ -+ /* enum options */ -+ IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_), -+ -+ /* string options */ -+ IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH), -+ IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE), -+ -+ /* flag options */ -+ IX86_ATTR_YES ("cld", -+ OPT_mcld, -+ MASK_CLD), -+ -+ IX86_ATTR_NO ("fancy-math-387", -+ OPT_mfancy_math_387, -+ MASK_NO_FANCY_MATH_387), -+ -+ IX86_ATTR_YES ("ieee-fp", -+ OPT_mieee_fp, -+ MASK_IEEE_FP), -+ -+ IX86_ATTR_YES ("inline-all-stringops", -+ OPT_minline_all_stringops, -+ MASK_INLINE_ALL_STRINGOPS), -+ -+ IX86_ATTR_YES ("inline-stringops-dynamically", -+ OPT_minline_stringops_dynamically, -+ MASK_INLINE_STRINGOPS_DYNAMICALLY), -+ -+ IX86_ATTR_NO ("align-stringops", -+ OPT_mno_align_stringops, -+ MASK_NO_ALIGN_STRINGOPS), -+ -+ IX86_ATTR_YES ("recip", -+ OPT_mrecip, -+ MASK_RECIP), -+ }; -+ -+ location_t loc -+ = fndecl == NULL ? UNKNOWN_LOCATION : DECL_SOURCE_LOCATION (fndecl); -+ const char *attr_name = target_clone_attr ? "target_clone" : "target"; -+ -+ /* If this is a list, recurse to get the options. */ -+ if (TREE_CODE (args) == TREE_LIST) -+ { -+ bool ret = true; -+ -+ for (; args; args = TREE_CHAIN (args)) -+ if (TREE_VALUE (args) -+ && !ix86_valid_target_attribute_inner_p (fndecl, TREE_VALUE (args), -+ p_strings, opts, opts_set, -+ enum_opts_set, -+ target_clone_attr)) -+ ret = false; -+ -+ return ret; -+ } -+ -+ else if (TREE_CODE (args) != STRING_CST) -+ { -+ error_at (loc, "attribute %qs argument is not a string", attr_name); -+ return false; -+ } -+ -+ /* Handle multiple arguments separated by commas. */ -+ next_optstr = ASTRDUP (TREE_STRING_POINTER (args)); -+ -+ while (next_optstr && *next_optstr != '\0') -+ { -+ char *p = next_optstr; -+ char *orig_p = p; -+ char *comma = strchr (next_optstr, ','); -+ size_t len, opt_len; -+ int opt; -+ bool opt_set_p; -+ char ch; -+ unsigned i; -+ enum ix86_opt_type type = ix86_opt_unknown; -+ int mask = 0; -+ -+ if (comma) -+ { -+ *comma = '\0'; -+ len = comma - next_optstr; -+ next_optstr = comma + 1; -+ } -+ else -+ { -+ len = strlen (p); -+ next_optstr = NULL; -+ } -+ -+ /* Recognize no-xxx. */ -+ if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-') -+ { -+ opt_set_p = false; -+ p += 3; -+ len -= 3; -+ } -+ else -+ opt_set_p = true; -+ -+ /* Find the option. */ -+ ch = *p; -+ opt = N_OPTS; -+ for (i = 0; i < ARRAY_SIZE (attrs); i++) -+ { -+ type = attrs[i].type; -+ opt_len = attrs[i].len; -+ if (ch == attrs[i].string[0] -+ && ((type != ix86_opt_str && type != ix86_opt_enum) -+ ? len == opt_len -+ : len > opt_len) -+ && memcmp (p, attrs[i].string, opt_len) == 0) -+ { -+ opt = attrs[i].opt; -+ mask = attrs[i].mask; -+ break; -+ } -+ } -+ -+ /* Process the option. */ -+ if (opt == N_OPTS) -+ { -+ error_at (loc, "attribute %qs argument %qs is unknown", -+ orig_p, attr_name); -+ ret = false; -+ } -+ -+ else if (type == ix86_opt_isa) -+ { -+ struct cl_decoded_option decoded; -+ -+ generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded); -+ ix86_handle_option (opts, opts_set, -+ &decoded, input_location); -+ } -+ -+ else if (type == ix86_opt_yes || type == ix86_opt_no) -+ { -+ if (type == ix86_opt_no) -+ opt_set_p = !opt_set_p; -+ -+ if (opt_set_p) -+ opts->x_target_flags |= mask; -+ else -+ opts->x_target_flags &= ~mask; -+ } -+ -+ else if (type == ix86_opt_str) -+ { -+ if (p_strings[opt]) -+ { -+ error_at (loc, "attribute value %qs was already specified " -+ "in %qs attribute", orig_p, attr_name); -+ ret = false; -+ } -+ else -+ { -+ p_strings[opt] = xstrdup (p + opt_len); -+ if (opt == IX86_FUNCTION_SPECIFIC_ARCH) -+ { -+ /* If arch= is set, clear all bits in x_ix86_isa_flags, -+ except for ISA_64BIT, ABI_64, ABI_X32, and CODE16 -+ and all bits in x_ix86_isa_flags2. */ -+ opts->x_ix86_isa_flags &= (OPTION_MASK_ISA_64BIT -+ | OPTION_MASK_ABI_64 -+ | OPTION_MASK_ABI_X32 -+ | OPTION_MASK_CODE16); -+ opts->x_ix86_isa_flags_explicit &= (OPTION_MASK_ISA_64BIT -+ | OPTION_MASK_ABI_64 -+ | OPTION_MASK_ABI_X32 -+ | OPTION_MASK_CODE16); -+ opts->x_ix86_isa_flags2 = 0; -+ opts->x_ix86_isa_flags2_explicit = 0; -+ } -+ } -+ } -+ -+ else if (type == ix86_opt_enum) -+ { -+ bool arg_ok; -+ int value; -+ -+ arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET); -+ if (arg_ok) -+ set_option (opts, enum_opts_set, opt, value, -+ p + opt_len, DK_UNSPECIFIED, input_location, -+ global_dc); -+ else -+ { -+ error_at (loc, "attribute value %qs is unknown in %qs attribute", -+ orig_p, attr_name); -+ ret = false; -+ } -+ } -+ -+ else -+ gcc_unreachable (); -+ } -+ -+ return ret; -+} -+ -+/* Release allocated strings. */ -+static void -+release_options_strings (char **option_strings) -+{ -+ /* Free up memory allocated to hold the strings */ -+ for (unsigned i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++) -+ free (option_strings[i]); -+} -+ -+/* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */ -+ -+tree -+ix86_valid_target_attribute_tree (tree fndecl, tree args, -+ struct gcc_options *opts, -+ struct gcc_options *opts_set, -+ bool target_clone_attr) -+{ -+ const char *orig_arch_string = opts->x_ix86_arch_string; -+ const char *orig_tune_string = opts->x_ix86_tune_string; -+ enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath; -+ int orig_tune_defaulted = ix86_tune_defaulted; -+ int orig_arch_specified = ix86_arch_specified; -+ char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL }; -+ tree t = NULL_TREE; -+ struct cl_target_option *def -+ = TREE_TARGET_OPTION (target_option_default_node); -+ struct gcc_options enum_opts_set; -+ -+ memset (&enum_opts_set, 0, sizeof (enum_opts_set)); -+ -+ /* Process each of the options on the chain. */ -+ if (!ix86_valid_target_attribute_inner_p (fndecl, args, option_strings, opts, -+ opts_set, &enum_opts_set, -+ target_clone_attr)) -+ return error_mark_node; -+ -+ /* If the changed options are different from the default, rerun -+ ix86_option_override_internal, and then save the options away. -+ The string options are attribute options, and will be undone -+ when we copy the save structure. */ -+ if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags -+ || opts->x_ix86_isa_flags2 != def->x_ix86_isa_flags2 -+ || opts->x_target_flags != def->x_target_flags -+ || option_strings[IX86_FUNCTION_SPECIFIC_ARCH] -+ || option_strings[IX86_FUNCTION_SPECIFIC_TUNE] -+ || enum_opts_set.x_ix86_fpmath) -+ { -+ /* If we are using the default tune= or arch=, undo the string assigned, -+ and use the default. */ -+ if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH]) -+ opts->x_ix86_arch_string -+ = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_ARCH]); -+ else if (!orig_arch_specified) -+ opts->x_ix86_arch_string = NULL; -+ -+ if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]) -+ opts->x_ix86_tune_string -+ = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]); -+ else if (orig_tune_defaulted) -+ opts->x_ix86_tune_string = NULL; -+ -+ /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */ -+ if (enum_opts_set.x_ix86_fpmath) -+ opts_set->x_ix86_fpmath = (enum fpmath_unit) 1; -+ -+ /* Do any overrides, such as arch=xxx, or tune=xxx support. */ -+ bool r = ix86_option_override_internal (false, opts, opts_set); -+ if (!r) -+ { -+ release_options_strings (option_strings); -+ return error_mark_node; -+ } -+ -+ /* Add any builtin functions with the new isa if any. */ -+ ix86_add_new_builtins (opts->x_ix86_isa_flags, opts->x_ix86_isa_flags2); -+ -+ /* Save the current options unless we are validating options for -+ #pragma. */ -+ t = build_target_option_node (opts); -+ -+ opts->x_ix86_arch_string = orig_arch_string; -+ opts->x_ix86_tune_string = orig_tune_string; -+ opts_set->x_ix86_fpmath = orig_fpmath_set; -+ -+ release_options_strings (option_strings); -+ } -+ -+ return t; -+} -+ -+/* Hook to validate attribute((target("string"))). */ -+ -+bool -+ix86_valid_target_attribute_p (tree fndecl, -+ tree ARG_UNUSED (name), -+ tree args, -+ int flags) -+{ -+ struct gcc_options func_options; -+ tree new_target, new_optimize; -+ bool ret = true; -+ -+ /* attribute((target("default"))) does nothing, beyond -+ affecting multi-versioning. */ -+ if (TREE_VALUE (args) -+ && TREE_CODE (TREE_VALUE (args)) == STRING_CST -+ && TREE_CHAIN (args) == NULL_TREE -+ && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0) -+ return true; -+ -+ tree old_optimize = build_optimization_node (&global_options); -+ -+ /* Get the optimization options of the current function. */ -+ tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl); -+ -+ if (!func_optimize) -+ func_optimize = old_optimize; -+ -+ /* Init func_options. */ -+ memset (&func_options, 0, sizeof (func_options)); -+ init_options_struct (&func_options, NULL); -+ lang_hooks.init_options_struct (&func_options); -+ -+ cl_optimization_restore (&func_options, -+ TREE_OPTIMIZATION (func_optimize)); -+ -+ /* Initialize func_options to the default before its target options can -+ be set. */ -+ cl_target_option_restore (&func_options, -+ TREE_TARGET_OPTION (target_option_default_node)); -+ -+ /* FLAGS == 1 is used for target_clones attribute. */ -+ new_target -+ = ix86_valid_target_attribute_tree (fndecl, args, &func_options, -+ &global_options_set, flags == 1); -+ -+ new_optimize = build_optimization_node (&func_options); -+ -+ if (new_target == error_mark_node) -+ ret = false; -+ -+ else if (fndecl && new_target) -+ { -+ DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target; -+ -+ if (old_optimize != new_optimize) -+ DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize; -+ } -+ -+ finalize_options_struct (&func_options); -+ -+ return ret; -+} -+ -+const char *stringop_alg_names[] = { -+#define DEF_ENUM -+#define DEF_ALG(alg, name) #name, -+#include "stringop.def" -+#undef DEF_ENUM -+#undef DEF_ALG -+}; -+ -+/* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=. -+ The string is of the following form (or comma separated list of it): -+ -+ strategy_alg:max_size:[align|noalign] -+ -+ where the full size range for the strategy is either [0, max_size] or -+ [min_size, max_size], in which min_size is the max_size + 1 of the -+ preceding range. The last size range must have max_size == -1. -+ -+ Examples: -+ -+ 1. -+ -mmemcpy-strategy=libcall:-1:noalign -+ -+ this is equivalent to (for known size memcpy) -mstringop-strategy=libcall -+ -+ -+ 2. -+ -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign -+ -+ This is to tell the compiler to use the following strategy for memset -+ 1) when the expected size is between [1, 16], use rep_8byte strategy; -+ 2) when the size is between [17, 2048], use vector_loop; -+ 3) when the size is > 2048, use libcall. */ -+ -+struct stringop_size_range -+{ -+ int max; -+ stringop_alg alg; -+ bool noalign; -+}; -+ -+static void -+ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset) -+{ -+ const struct stringop_algs *default_algs; -+ stringop_size_range input_ranges[MAX_STRINGOP_ALGS]; -+ char *curr_range_str, *next_range_str; -+ const char *opt = is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy="; -+ int i = 0, n = 0; -+ -+ if (is_memset) -+ default_algs = &ix86_cost->memset[TARGET_64BIT != 0]; -+ else -+ default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0]; -+ -+ curr_range_str = strategy_str; -+ -+ do -+ { -+ int maxs; -+ char alg_name[128]; -+ char align[16]; -+ next_range_str = strchr (curr_range_str, ','); -+ if (next_range_str) -+ *next_range_str++ = '\0'; -+ -+ if (sscanf (curr_range_str, "%20[^:]:%d:%10s", alg_name, &maxs, -+ align) != 3) -+ { -+ error ("wrong argument %qs to option %qs", curr_range_str, opt); -+ return; -+ } -+ -+ if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1)) -+ { -+ error ("size ranges of option %qs should be increasing", opt); -+ return; -+ } -+ -+ for (i = 0; i < last_alg; i++) -+ if (!strcmp (alg_name, stringop_alg_names[i])) -+ break; -+ -+ if (i == last_alg) -+ { -+ error ("wrong strategy name %qs specified for option %qs", -+ alg_name, opt); -+ -+ auto_vec candidates; -+ for (i = 0; i < last_alg; i++) -+ if ((stringop_alg) i != rep_prefix_8_byte || TARGET_64BIT) -+ candidates.safe_push (stringop_alg_names[i]); -+ -+ char *s; -+ const char *hint -+ = candidates_list_and_hint (alg_name, s, candidates); -+ if (hint) -+ inform (input_location, -+ "valid arguments to %qs are: %s; did you mean %qs?", -+ opt, s, hint); -+ else -+ inform (input_location, "valid arguments to %qs are: %s", -+ opt, s); -+ XDELETEVEC (s); -+ return; -+ } -+ -+ if ((stringop_alg) i == rep_prefix_8_byte -+ && !TARGET_64BIT) -+ { -+ /* rep; movq isn't available in 32-bit code. */ -+ error ("strategy name %qs specified for option %qs " -+ "not supported for 32-bit code", alg_name, opt); -+ return; -+ } -+ -+ input_ranges[n].max = maxs; -+ input_ranges[n].alg = (stringop_alg) i; -+ if (!strcmp (align, "align")) -+ input_ranges[n].noalign = false; -+ else if (!strcmp (align, "noalign")) -+ input_ranges[n].noalign = true; -+ else -+ { -+ error ("unknown alignment %qs specified for option %qs", align, opt); -+ return; -+ } -+ n++; -+ curr_range_str = next_range_str; -+ } -+ while (curr_range_str); -+ -+ if (input_ranges[n - 1].max != -1) -+ { -+ error ("the max value for the last size range should be -1" -+ " for option %qs", opt); -+ return; -+ } -+ -+ if (n > MAX_STRINGOP_ALGS) -+ { -+ error ("too many size ranges specified in option %qs", opt); -+ return; -+ } -+ -+ /* Now override the default algs array. */ -+ for (i = 0; i < n; i++) -+ { -+ *const_cast(&default_algs->size[i].max) = input_ranges[i].max; -+ *const_cast(&default_algs->size[i].alg) -+ = input_ranges[i].alg; -+ *const_cast(&default_algs->size[i].noalign) -+ = input_ranges[i].noalign; -+ } -+} -+ -+ -+/* parse -mtune-ctrl= option. When DUMP is true, -+ print the features that are explicitly set. */ -+ -+static void -+parse_mtune_ctrl_str (bool dump) -+{ -+ if (!ix86_tune_ctrl_string) -+ return; -+ -+ char *next_feature_string = NULL; -+ char *curr_feature_string = xstrdup (ix86_tune_ctrl_string); -+ char *orig = curr_feature_string; -+ int i; -+ do -+ { -+ bool clear = false; -+ -+ next_feature_string = strchr (curr_feature_string, ','); -+ if (next_feature_string) -+ *next_feature_string++ = '\0'; -+ if (*curr_feature_string == '^') -+ { -+ curr_feature_string++; -+ clear = true; -+ } -+ for (i = 0; i < X86_TUNE_LAST; i++) -+ { -+ if (!strcmp (curr_feature_string, ix86_tune_feature_names[i])) -+ { -+ ix86_tune_features[i] = !clear; -+ if (dump) -+ fprintf (stderr, "Explicitly %s feature %s\n", -+ clear ? "clear" : "set", ix86_tune_feature_names[i]); -+ break; -+ } -+ } -+ if (i == X86_TUNE_LAST) -+ error ("unknown parameter to option %<-mtune-ctrl%>: %s", -+ clear ? curr_feature_string - 1 : curr_feature_string); -+ curr_feature_string = next_feature_string; -+ } -+ while (curr_feature_string); -+ free (orig); -+} -+ -+/* Helper function to set ix86_tune_features. IX86_TUNE is the -+ processor type. */ -+ -+static void -+set_ix86_tune_features (enum processor_type ix86_tune, bool dump) -+{ -+ unsigned HOST_WIDE_INT ix86_tune_mask = HOST_WIDE_INT_1U << ix86_tune; -+ int i; -+ -+ for (i = 0; i < X86_TUNE_LAST; ++i) -+ { -+ if (ix86_tune_no_default) -+ ix86_tune_features[i] = 0; -+ else -+ ix86_tune_features[i] -+ = !!(initial_ix86_tune_features[i] & ix86_tune_mask); -+ } -+ -+ if (dump) -+ { -+ fprintf (stderr, "List of x86 specific tuning parameter names:\n"); -+ for (i = 0; i < X86_TUNE_LAST; i++) -+ fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i], -+ ix86_tune_features[i] ? "on" : "off"); -+ } -+ -+ parse_mtune_ctrl_str (dump); -+} -+ -+ -+/* Default align_* from the processor table. */ -+ -+static void -+ix86_default_align (struct gcc_options *opts) -+{ -+ /* -falign-foo without argument: supply one. */ -+ if (opts->x_flag_align_loops && !opts->x_str_align_loops) -+ opts->x_str_align_loops = processor_cost_table[ix86_tune]->align_loop; -+ if (opts->x_flag_align_jumps && !opts->x_str_align_jumps) -+ opts->x_str_align_jumps = processor_cost_table[ix86_tune]->align_jump; -+ if (opts->x_flag_align_labels && !opts->x_str_align_labels) -+ opts->x_str_align_labels = processor_cost_table[ix86_tune]->align_label; -+ if (opts->x_flag_align_functions && !opts->x_str_align_functions) -+ opts->x_str_align_functions = processor_cost_table[ix86_tune]->align_func; -+} -+ -+/* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook. */ -+ -+void -+ix86_override_options_after_change (void) -+{ -+ ix86_default_align (&global_options); -+} -+ -+/* Clear stack slot assignments remembered from previous functions. -+ This is called from INIT_EXPANDERS once before RTL is emitted for each -+ function. */ -+ -+static struct machine_function * -+ix86_init_machine_status (void) -+{ -+ struct machine_function *f; -+ -+ f = ggc_cleared_alloc (); -+ f->call_abi = ix86_abi; -+ -+ return f; -+} -+ -+/* Override various settings based on options. If MAIN_ARGS_P, the -+ options are from the command line, otherwise they are from -+ attributes. Return true if there's an error related to march -+ option. */ -+ -+static bool -+ix86_option_override_internal (bool main_args_p, -+ struct gcc_options *opts, -+ struct gcc_options *opts_set) -+{ -+ int i; -+ unsigned HOST_WIDE_INT ix86_arch_mask; -+ const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL); -+ -+ /* -mrecip options. */ -+ static struct -+ { -+ const char *string; /* option name */ -+ unsigned int mask; /* mask bits to set */ -+ } -+ const recip_options[] = -+ { -+ { "all", RECIP_MASK_ALL }, -+ { "none", RECIP_MASK_NONE }, -+ { "div", RECIP_MASK_DIV }, -+ { "sqrt", RECIP_MASK_SQRT }, -+ { "vec-div", RECIP_MASK_VEC_DIV }, -+ { "vec-sqrt", RECIP_MASK_VEC_SQRT }, -+ }; -+ -+ -+ /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if -+ TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */ -+ if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags)) -+ opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32); -+#ifdef TARGET_BI_ARCH -+ else -+ { -+#if TARGET_BI_ARCH == 1 -+ /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64 -+ is on and OPTION_MASK_ABI_X32 is off. We turn off -+ OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by -+ -mx32. */ -+ if (TARGET_X32_P (opts->x_ix86_isa_flags)) -+ opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64; -+#else -+ /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is -+ on and OPTION_MASK_ABI_64 is off. We turn off -+ OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by -+ -m64 or OPTION_MASK_CODE16 is turned on by -m16. */ -+ if (TARGET_LP64_P (opts->x_ix86_isa_flags) -+ || TARGET_16BIT_P (opts->x_ix86_isa_flags)) -+ opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32; -+#endif -+ if (TARGET_64BIT_P (opts->x_ix86_isa_flags) -+ && TARGET_IAMCU_P (opts->x_target_flags)) -+ sorry ("Intel MCU psABI isn%'t supported in %s mode", -+ TARGET_X32_P (opts->x_ix86_isa_flags) ? "x32" : "64-bit"); -+ } -+#endif -+ -+ if (TARGET_X32_P (opts->x_ix86_isa_flags)) -+ { -+ /* Always turn on OPTION_MASK_ISA_64BIT and turn off -+ OPTION_MASK_ABI_64 for TARGET_X32. */ -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT; -+ opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64; -+ } -+ else if (TARGET_16BIT_P (opts->x_ix86_isa_flags)) -+ opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT -+ | OPTION_MASK_ABI_X32 -+ | OPTION_MASK_ABI_64); -+ else if (TARGET_LP64_P (opts->x_ix86_isa_flags)) -+ { -+ /* Always turn on OPTION_MASK_ISA_64BIT and turn off -+ OPTION_MASK_ABI_X32 for TARGET_LP64. */ -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT; -+ opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32; -+ } -+ -+#ifdef SUBTARGET_OVERRIDE_OPTIONS -+ SUBTARGET_OVERRIDE_OPTIONS; -+#endif -+ -+#ifdef SUBSUBTARGET_OVERRIDE_OPTIONS -+ SUBSUBTARGET_OVERRIDE_OPTIONS; -+#endif -+ -+ /* -fPIC is the default for x86_64. */ -+ if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags)) -+ opts->x_flag_pic = 2; -+ -+ /* Need to check -mtune=generic first. */ -+ if (opts->x_ix86_tune_string) -+ { -+ /* As special support for cross compilers we read -mtune=native -+ as -mtune=generic. With native compilers we won't see the -+ -mtune=native, as it was changed by the driver. */ -+ if (!strcmp (opts->x_ix86_tune_string, "native")) -+ { -+ opts->x_ix86_tune_string = "generic"; -+ } -+ else if (!strcmp (opts->x_ix86_tune_string, "x86-64")) -+ warning (OPT_Wdeprecated, -+ main_args_p -+ ? G_("%<-mtune=x86-64%> is deprecated; use %<-mtune=k8%> " -+ "or %<-mtune=generic%> instead as appropriate") -+ : G_("% is deprecated; use " -+ "% or %" -+ " instead as appropriate")); -+ } -+ else -+ { -+ if (opts->x_ix86_arch_string) -+ opts->x_ix86_tune_string = opts->x_ix86_arch_string; -+ if (!opts->x_ix86_tune_string) -+ { -+ opts->x_ix86_tune_string = processor_names[TARGET_CPU_DEFAULT]; -+ ix86_tune_defaulted = 1; -+ } -+ -+ /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string -+ or defaulted. We need to use a sensible tune option. */ -+ if (!strcmp (opts->x_ix86_tune_string, "x86-64")) -+ { -+ opts->x_ix86_tune_string = "generic"; -+ } -+ } -+ -+ if (opts->x_ix86_stringop_alg == rep_prefix_8_byte -+ && !TARGET_64BIT_P (opts->x_ix86_isa_flags)) -+ { -+ /* rep; movq isn't available in 32-bit code. */ -+ error ("%<-mstringop-strategy=rep_8byte%> not supported for 32-bit code"); -+ opts->x_ix86_stringop_alg = no_stringop; -+ } -+ -+ if (!opts->x_ix86_arch_string) -+ opts->x_ix86_arch_string -+ = TARGET_64BIT_P (opts->x_ix86_isa_flags) -+ ? "x86-64" : SUBTARGET32_DEFAULT_CPU; -+ else -+ ix86_arch_specified = 1; -+ -+ if (opts_set->x_ix86_pmode) -+ { -+ if ((TARGET_LP64_P (opts->x_ix86_isa_flags) -+ && opts->x_ix86_pmode == PMODE_SI) -+ || (!TARGET_64BIT_P (opts->x_ix86_isa_flags) -+ && opts->x_ix86_pmode == PMODE_DI)) -+ error ("address mode %qs not supported in the %s bit mode", -+ TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long", -+ TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32"); -+ } -+ else -+ opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags) -+ ? PMODE_DI : PMODE_SI; -+ -+ if (!opts_set->x_ix86_abi) -+ opts->x_ix86_abi = DEFAULT_ABI; -+ -+ if (opts->x_ix86_abi == MS_ABI && TARGET_X32_P (opts->x_ix86_isa_flags)) -+ error ("%<-mabi=ms%> not supported with X32 ABI"); -+ gcc_assert (opts->x_ix86_abi == SYSV_ABI || opts->x_ix86_abi == MS_ABI); -+ -+ const char *abi_name = opts->x_ix86_abi == MS_ABI ? "ms" : "sysv"; -+ if ((opts->x_flag_sanitize & SANITIZE_USER_ADDRESS) -+ && opts->x_ix86_abi != DEFAULT_ABI) -+ error ("%<-mabi=%s%> not supported with %<-fsanitize=address%>", abi_name); -+ if ((opts->x_flag_sanitize & SANITIZE_KERNEL_ADDRESS) -+ && opts->x_ix86_abi != DEFAULT_ABI) -+ error ("%<-mabi=%s%> not supported with %<-fsanitize=kernel-address%>", -+ abi_name); -+ if ((opts->x_flag_sanitize & SANITIZE_THREAD) -+ && opts->x_ix86_abi != DEFAULT_ABI) -+ error ("%<-mabi=%s%> not supported with %<-fsanitize=thread%>", abi_name); -+ -+ /* For targets using ms ABI enable ms-extensions, if not -+ explicit turned off. For non-ms ABI we turn off this -+ option. */ -+ if (!opts_set->x_flag_ms_extensions) -+ opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI); -+ -+ if (opts_set->x_ix86_cmodel) -+ { -+ switch (opts->x_ix86_cmodel) -+ { -+ case CM_SMALL: -+ case CM_SMALL_PIC: -+ if (opts->x_flag_pic) -+ opts->x_ix86_cmodel = CM_SMALL_PIC; -+ if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)) -+ error ("code model %qs not supported in the %s bit mode", -+ "small", "32"); -+ break; -+ -+ case CM_MEDIUM: -+ case CM_MEDIUM_PIC: -+ if (opts->x_flag_pic) -+ opts->x_ix86_cmodel = CM_MEDIUM_PIC; -+ if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)) -+ error ("code model %qs not supported in the %s bit mode", -+ "medium", "32"); -+ else if (TARGET_X32_P (opts->x_ix86_isa_flags)) -+ error ("code model %qs not supported in x32 mode", -+ "medium"); -+ break; -+ -+ case CM_LARGE: -+ case CM_LARGE_PIC: -+ if (opts->x_flag_pic) -+ opts->x_ix86_cmodel = CM_LARGE_PIC; -+ if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)) -+ error ("code model %qs not supported in the %s bit mode", -+ "large", "32"); -+ else if (TARGET_X32_P (opts->x_ix86_isa_flags)) -+ error ("code model %qs not supported in x32 mode", -+ "large"); -+ break; -+ -+ case CM_32: -+ if (opts->x_flag_pic) -+ error ("code model %s does not support PIC mode", "32"); -+ if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) -+ error ("code model %qs not supported in the %s bit mode", -+ "32", "64"); -+ break; -+ -+ case CM_KERNEL: -+ if (opts->x_flag_pic) -+ { -+ error ("code model %s does not support PIC mode", "kernel"); -+ opts->x_ix86_cmodel = CM_32; -+ } -+ if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)) -+ error ("code model %qs not supported in the %s bit mode", -+ "kernel", "32"); -+ break; -+ -+ default: -+ gcc_unreachable (); -+ } -+ } -+ else -+ { -+ /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the -+ use of rip-relative addressing. This eliminates fixups that -+ would otherwise be needed if this object is to be placed in a -+ DLL, and is essentially just as efficient as direct addressing. */ -+ if (TARGET_64BIT_P (opts->x_ix86_isa_flags) -+ && (TARGET_RDOS || TARGET_PECOFF)) -+ opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1; -+ else if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) -+ opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL; -+ else -+ opts->x_ix86_cmodel = CM_32; -+ } -+ if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL) -+ { -+ error ("%<-masm=intel%> not supported in this configuration"); -+ opts->x_ix86_asm_dialect = ASM_ATT; -+ } -+ if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0) -+ != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0)) -+ sorry ("%i-bit mode not compiled in", -+ (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32); -+ -+ for (i = 0; i < pta_size; i++) -+ if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name)) -+ { -+ if (!strcmp (opts->x_ix86_arch_string, "generic")) -+ { -+ error (main_args_p -+ ? G_("% CPU can be used only for %<-mtune=%> " -+ "switch") -+ : G_("% CPU can be used only for " -+ "% attribute")); -+ return false; -+ } -+ else if (!strcmp (opts->x_ix86_arch_string, "intel")) -+ { -+ error (main_args_p -+ ? G_("% CPU can be used only for %<-mtune=%> " -+ "switch") -+ : G_("% CPU can be used only for " -+ "% attribute")); -+ return false; -+ } -+ -+ if (TARGET_64BIT_P (opts->x_ix86_isa_flags) -+ && !((processor_alias_table[i].flags & PTA_64BIT) != 0)) -+ { -+ error ("CPU you selected does not support x86-64 " -+ "instruction set"); -+ return false; -+ } -+ -+ ix86_schedule = processor_alias_table[i].schedule; -+ ix86_arch = processor_alias_table[i].processor; -+ /* Default cpu tuning to the architecture. */ -+ ix86_tune = ix86_arch; -+ -+ if (((processor_alias_table[i].flags & PTA_MMX) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX; -+ if (((processor_alias_table[i].flags & PTA_3DNOW) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW; -+ if (((processor_alias_table[i].flags & PTA_3DNOW_A) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A; -+ if (((processor_alias_table[i].flags & PTA_SSE) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE; -+ if (((processor_alias_table[i].flags & PTA_SSE2) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2; -+ if (((processor_alias_table[i].flags & PTA_SSE3) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3; -+ if (((processor_alias_table[i].flags & PTA_SSSE3) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3; -+ if (((processor_alias_table[i].flags & PTA_SSE4_1) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1; -+ if (((processor_alias_table[i].flags & PTA_SSE4_2) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2; -+ if (((processor_alias_table[i].flags & PTA_AVX) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX; -+ if (((processor_alias_table[i].flags & PTA_AVX2) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2; -+ if (((processor_alias_table[i].flags & PTA_FMA) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA; -+ if (((processor_alias_table[i].flags & PTA_SSE4A) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A; -+ if (((processor_alias_table[i].flags & PTA_FMA4) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4; -+ if (((processor_alias_table[i].flags & PTA_XOP) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP; -+ if (((processor_alias_table[i].flags & PTA_LWP) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP; -+ if (((processor_alias_table[i].flags & PTA_ABM) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM; -+ if (((processor_alias_table[i].flags & PTA_BMI) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI; -+ if (((processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT; -+ if (((processor_alias_table[i].flags & PTA_TBM) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM; -+ if (((processor_alias_table[i].flags & PTA_BMI2) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2; -+ if (((processor_alias_table[i].flags & PTA_CX16) != 0) -+ && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_CX16)) -+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_CX16; -+ if (((processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT; -+ if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags) -+ && ((processor_alias_table[i].flags & PTA_NO_SAHF) != 0)) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF; -+ if (((processor_alias_table[i].flags & PTA_MOVBE) != 0) -+ && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_MOVBE)) -+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_MOVBE; -+ if (((processor_alias_table[i].flags & PTA_AES) != 0) -+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES)) -+ ix86_isa_flags |= OPTION_MASK_ISA_AES; -+ if (((processor_alias_table[i].flags & PTA_SHA) != 0) -+ && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA)) -+ ix86_isa_flags |= OPTION_MASK_ISA_SHA; -+ if (((processor_alias_table[i].flags & PTA_PCLMUL) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL; -+ if (((processor_alias_table[i].flags & PTA_FSGSBASE) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE; -+ if (((processor_alias_table[i].flags & PTA_RDRND) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND; -+ if (((processor_alias_table[i].flags & PTA_F16C) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C; -+ if (((processor_alias_table[i].flags & PTA_RTM) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM; -+ if (((processor_alias_table[i].flags & PTA_HLE) != 0) -+ && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_HLE)) -+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_HLE; -+ if (((processor_alias_table[i].flags & PTA_PRFCHW) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW; -+ if (((processor_alias_table[i].flags & PTA_RDSEED) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED; -+ if (((processor_alias_table[i].flags & PTA_ADX) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX; -+ if (((processor_alias_table[i].flags & PTA_FXSR) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR; -+ if (((processor_alias_table[i].flags & PTA_XSAVE) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE; -+ if (((processor_alias_table[i].flags & PTA_XSAVEOPT) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT; -+ if (((processor_alias_table[i].flags & PTA_AVX512F) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F; -+ if (((processor_alias_table[i].flags & PTA_AVX512ER) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER; -+ if (((processor_alias_table[i].flags & PTA_AVX512PF) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF; -+ if (((processor_alias_table[i].flags & PTA_AVX512CD) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD; -+ if (((processor_alias_table[i].flags & PTA_PREFETCHWT1) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1; -+ if (((processor_alias_table[i].flags & PTA_CLWB) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB; -+ if (((processor_alias_table[i].flags & PTA_CLFLUSHOPT) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT; -+ if (((processor_alias_table[i].flags & PTA_CLZERO) != 0) -+ && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_CLZERO)) -+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_CLZERO; -+ if (((processor_alias_table[i].flags & PTA_XSAVEC) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC; -+ if (((processor_alias_table[i].flags & PTA_XSAVES) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES; -+ if (((processor_alias_table[i].flags & PTA_AVX512DQ) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ; -+ if (((processor_alias_table[i].flags & PTA_AVX512BW) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW; -+ if (((processor_alias_table[i].flags & PTA_AVX512VL) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL; -+ if (((processor_alias_table[i].flags & PTA_AVX512VBMI) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI; -+ if (((processor_alias_table[i].flags & PTA_AVX512IFMA) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA; -+ if (((processor_alias_table[i].flags & PTA_AVX512VNNI) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VNNI)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VNNI; -+ if (((processor_alias_table[i].flags & PTA_GFNI) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_GFNI)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_GFNI; -+ if (((processor_alias_table[i].flags & PTA_AVX512VBMI2) != 0) -+ && !(opts->x_ix86_isa_flags_explicit -+ & OPTION_MASK_ISA_AVX512VBMI2)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI2; -+ if (((processor_alias_table[i].flags & PTA_VPCLMULQDQ) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_VPCLMULQDQ)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_VPCLMULQDQ; -+ if (((processor_alias_table[i].flags & PTA_AVX512BITALG) != 0) -+ && !(opts->x_ix86_isa_flags_explicit -+ & OPTION_MASK_ISA_AVX512BITALG)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BITALG; -+ -+ if (((processor_alias_table[i].flags & PTA_AVX5124VNNIW) != 0) -+ && !(opts->x_ix86_isa_flags2_explicit -+ & OPTION_MASK_ISA_AVX5124VNNIW)) -+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_AVX5124VNNIW; -+ if (((processor_alias_table[i].flags & PTA_AVX5124FMAPS) != 0) -+ && !(opts->x_ix86_isa_flags2_explicit -+ & OPTION_MASK_ISA_AVX5124FMAPS)) -+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_AVX5124FMAPS; -+ if (((processor_alias_table[i].flags & PTA_AVX512VPOPCNTDQ) != 0) -+ && !(opts->x_ix86_isa_flags_explicit -+ & OPTION_MASK_ISA_AVX512VPOPCNTDQ)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VPOPCNTDQ; -+ if (((processor_alias_table[i].flags & PTA_SGX) != 0) -+ && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_SGX)) -+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_SGX; -+ if (((processor_alias_table[i].flags & PTA_VAES) != 0) -+ && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_VAES)) -+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_VAES; -+ if (((processor_alias_table[i].flags & PTA_RDPID) != 0) -+ && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_RDPID)) -+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_RDPID; -+ if (((processor_alias_table[i].flags & PTA_PCONFIG) != 0) -+ && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_PCONFIG)) -+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_PCONFIG; -+ if (((processor_alias_table[i].flags & PTA_WBNOINVD) != 0) -+ && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_WBNOINVD)) -+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_WBNOINVD; -+ if (((processor_alias_table[i].flags & PTA_PTWRITE) != 0) -+ && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_PTWRITE)) -+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_PTWRITE; -+ -+ if ((processor_alias_table[i].flags -+ & (PTA_PREFETCH_SSE | PTA_SSE)) != 0) -+ x86_prefetch_sse = true; -+ if (((processor_alias_table[i].flags & PTA_MWAITX) != 0) -+ && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_MWAITX)) -+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_MWAITX; -+ if (((processor_alias_table[i].flags & PTA_PKU) != 0) -+ && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PKU)) -+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PKU; -+ -+ /* Don't enable x87 instructions if only -+ general registers are allowed. */ -+ if (!(opts_set->x_ix86_target_flags & OPTION_MASK_GENERAL_REGS_ONLY) -+ && !(opts_set->x_target_flags & MASK_80387)) -+ { -+ if (((processor_alias_table[i].flags & PTA_NO_80387) != 0)) -+ opts->x_target_flags &= ~MASK_80387; -+ else -+ opts->x_target_flags |= MASK_80387; -+ } -+ break; -+ } -+ -+ if (i == pta_size) -+ { -+ error (main_args_p -+ ? G_("bad value (%qs) for %<-march=%> switch") -+ : G_("bad value (%qs) for % attribute"), -+ opts->x_ix86_arch_string); -+ -+ auto_vec candidates; -+ for (i = 0; i < pta_size; i++) -+ if (strcmp (processor_alias_table[i].name, "generic") -+ && strcmp (processor_alias_table[i].name, "intel") -+ && (!TARGET_64BIT_P (opts->x_ix86_isa_flags) -+ || ((processor_alias_table[i].flags & PTA_64BIT) != 0))) -+ candidates.safe_push (processor_alias_table[i].name); -+ -+#ifdef HAVE_LOCAL_CPU_DETECT -+ /* Add also "native" as possible value. */ -+ candidates.safe_push ("native"); -+#endif -+ -+ char *s; -+ const char *hint -+ = candidates_list_and_hint (opts->x_ix86_arch_string, s, candidates); -+ if (hint) -+ inform (input_location, -+ main_args_p -+ ? G_("valid arguments to %<-march=%> switch are: " -+ "%s; did you mean %qs?") -+ : G_("valid arguments to % attribute are: " -+ "%s; did you mean %qs?"), s, hint); -+ else -+ inform (input_location, -+ main_args_p -+ ? G_("valid arguments to %<-march=%> switch are: %s") -+ : G_("valid arguments to % attribute " -+ "are: %s"), s); -+ XDELETEVEC (s); -+ } -+ -+ ix86_arch_mask = HOST_WIDE_INT_1U << ix86_arch; -+ for (i = 0; i < X86_ARCH_LAST; ++i) -+ ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask); -+ -+ for (i = 0; i < pta_size; i++) -+ if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name)) -+ { -+ ix86_schedule = processor_alias_table[i].schedule; -+ ix86_tune = processor_alias_table[i].processor; -+ if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) -+ { -+ if (!((processor_alias_table[i].flags & PTA_64BIT) != 0)) -+ { -+ if (ix86_tune_defaulted) -+ { -+ opts->x_ix86_tune_string = "x86-64"; -+ for (i = 0; i < pta_size; i++) -+ if (! strcmp (opts->x_ix86_tune_string, -+ processor_alias_table[i].name)) -+ break; -+ ix86_schedule = processor_alias_table[i].schedule; -+ ix86_tune = processor_alias_table[i].processor; -+ } -+ else -+ error ("CPU you selected does not support x86-64 " -+ "instruction set"); -+ } -+ } -+ /* Intel CPUs have always interpreted SSE prefetch instructions as -+ NOPs; so, we can enable SSE prefetch instructions even when -+ -mtune (rather than -march) points us to a processor that has them. -+ However, the VIA C3 gives a SIGILL, so we only do that for i686 and -+ higher processors. */ -+ if (TARGET_CMOV -+ && ((processor_alias_table[i].flags -+ & (PTA_PREFETCH_SSE | PTA_SSE)) != 0)) -+ x86_prefetch_sse = true; -+ break; -+ } -+ -+ if (ix86_tune_specified && i == pta_size) -+ { -+ error (main_args_p -+ ? G_("bad value (%qs) for %<-mtune=%> switch") -+ : G_("bad value (%qs) for % attribute"), -+ opts->x_ix86_tune_string); -+ -+ auto_vec candidates; -+ for (i = 0; i < pta_size; i++) -+ if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) -+ || ((processor_alias_table[i].flags & PTA_64BIT) != 0)) -+ candidates.safe_push (processor_alias_table[i].name); -+ -+#ifdef HAVE_LOCAL_CPU_DETECT -+ /* Add also "native" as possible value. */ -+ candidates.safe_push ("native"); -+#endif -+ -+ char *s; -+ const char *hint -+ = candidates_list_and_hint (opts->x_ix86_tune_string, s, candidates); -+ if (hint) -+ inform (input_location, -+ main_args_p -+ ? G_("valid arguments to %<-mtune=%> switch are: " -+ "%s; did you mean %qs?") -+ : G_("valid arguments to % attribute are: " -+ "%s; did you mean %qs?"), s, hint); -+ else -+ inform (input_location, -+ main_args_p -+ ? G_("valid arguments to %<-mtune=%> switch are: %s") -+ : G_("valid arguments to % attribute " -+ "are: %s"), s); -+ XDELETEVEC (s); -+ } -+ -+ set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes); -+ -+#ifndef USE_IX86_FRAME_POINTER -+#define USE_IX86_FRAME_POINTER 0 -+#endif -+ -+#ifndef USE_X86_64_FRAME_POINTER -+#define USE_X86_64_FRAME_POINTER 0 -+#endif -+ -+ /* Set the default values for switches whose default depends on TARGET_64BIT -+ in case they weren't overwritten by command line options. */ -+ if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) -+ { -+ if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer) -+ opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER; -+ if (opts->x_flag_asynchronous_unwind_tables -+ && !opts_set->x_flag_unwind_tables -+ && TARGET_64BIT_MS_ABI) -+ opts->x_flag_unwind_tables = 1; -+ if (opts->x_flag_asynchronous_unwind_tables == 2) -+ opts->x_flag_unwind_tables -+ = opts->x_flag_asynchronous_unwind_tables = 1; -+ if (opts->x_flag_pcc_struct_return == 2) -+ opts->x_flag_pcc_struct_return = 0; -+ } -+ else -+ { -+ if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer) -+ opts->x_flag_omit_frame_pointer -+ = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size); -+ if (opts->x_flag_asynchronous_unwind_tables == 2) -+ opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER; -+ if (opts->x_flag_pcc_struct_return == 2) -+ { -+ /* Intel MCU psABI specifies that -freg-struct-return should -+ be on. Instead of setting DEFAULT_PCC_STRUCT_RETURN to 1, -+ we check -miamcu so that -freg-struct-return is always -+ turned on if -miamcu is used. */ -+ if (TARGET_IAMCU_P (opts->x_target_flags)) -+ opts->x_flag_pcc_struct_return = 0; -+ else -+ opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN; -+ } -+ } -+ -+ ix86_tune_cost = processor_cost_table[ix86_tune]; -+ /* TODO: ix86_cost should be chosen at instruction or function granuality -+ so for cold code we use size_cost even in !optimize_size compilation. */ -+ if (opts->x_optimize_size) -+ ix86_cost = &ix86_size_cost; -+ else -+ ix86_cost = ix86_tune_cost; -+ -+ /* Arrange to set up i386_stack_locals for all functions. */ -+ init_machine_status = ix86_init_machine_status; -+ -+ /* Validate -mregparm= value. */ -+ if (opts_set->x_ix86_regparm) -+ { -+ if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) -+ warning (0, "%<-mregparm%> is ignored in 64-bit mode"); -+ else if (TARGET_IAMCU_P (opts->x_target_flags)) -+ warning (0, "%<-mregparm%> is ignored for Intel MCU psABI"); -+ if (opts->x_ix86_regparm > REGPARM_MAX) -+ { -+ error ("%<-mregparm=%d%> is not between 0 and %d", -+ opts->x_ix86_regparm, REGPARM_MAX); -+ opts->x_ix86_regparm = 0; -+ } -+ } -+ if (TARGET_IAMCU_P (opts->x_target_flags) -+ || TARGET_64BIT_P (opts->x_ix86_isa_flags)) -+ opts->x_ix86_regparm = REGPARM_MAX; -+ -+ /* Default align_* from the processor table. */ -+ ix86_default_align (opts); -+ -+ /* Provide default for -mbranch-cost= value. */ -+ if (!opts_set->x_ix86_branch_cost) -+ opts->x_ix86_branch_cost = ix86_tune_cost->branch_cost; -+ -+ if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) -+ { -+ opts->x_target_flags -+ |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags; -+ -+ if (!ix86_arch_specified) -+ opts->x_ix86_isa_flags -+ |= TARGET_SUBTARGET64_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit; -+ -+ if (TARGET_RTD_P (opts->x_target_flags)) -+ warning (0, -+ main_args_p -+ ? G_("%<-mrtd%> is ignored in 64bit mode") -+ : G_("% is ignored in 64bit mode")); -+ } -+ else -+ { -+ opts->x_target_flags -+ |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags; -+ -+ if (!ix86_arch_specified) -+ opts->x_ix86_isa_flags -+ |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit; -+ -+ /* i386 ABI does not specify red zone. It still makes sense to use it -+ when programmer takes care to stack from being destroyed. */ -+ if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE)) -+ opts->x_target_flags |= MASK_NO_RED_ZONE; -+ } -+ -+ /* Keep nonleaf frame pointers. */ -+ if (opts->x_flag_omit_frame_pointer) -+ opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER; -+ else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags)) -+ opts->x_flag_omit_frame_pointer = 1; -+ -+ /* If we're doing fast math, we don't care about comparison order -+ wrt NaNs. This lets us use a shorter comparison sequence. */ -+ if (opts->x_flag_finite_math_only) -+ opts->x_target_flags &= ~MASK_IEEE_FP; -+ -+ /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387, -+ since the insns won't need emulation. */ -+ if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387]) -+ opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387; -+ -+ /* Likewise, if the target doesn't have a 387, or we've specified -+ software floating point, don't use 387 inline intrinsics. */ -+ if (!TARGET_80387_P (opts->x_target_flags)) -+ opts->x_target_flags |= MASK_NO_FANCY_MATH_387; -+ -+ /* Turn on MMX builtins for -msse. */ -+ if (TARGET_SSE_P (opts->x_ix86_isa_flags)) -+ opts->x_ix86_isa_flags -+ |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit; -+ -+ /* Enable SSE prefetch. */ -+ if (TARGET_SSE_P (opts->x_ix86_isa_flags) -+ || (TARGET_PRFCHW_P (opts->x_ix86_isa_flags) -+ && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)) -+ || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags)) -+ x86_prefetch_sse = true; -+ -+ /* Enable popcnt instruction for -msse4.2 or -mabm. */ -+ if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags) -+ || TARGET_ABM_P (opts->x_ix86_isa_flags)) -+ opts->x_ix86_isa_flags -+ |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit; -+ -+ /* Enable lzcnt instruction for -mabm. */ -+ if (TARGET_ABM_P(opts->x_ix86_isa_flags)) -+ opts->x_ix86_isa_flags -+ |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit; -+ -+ /* Disable BMI, BMI2 and TBM instructions for -m16. */ -+ if (TARGET_16BIT_P(opts->x_ix86_isa_flags)) -+ opts->x_ix86_isa_flags -+ &= ~((OPTION_MASK_ISA_BMI | OPTION_MASK_ISA_BMI2 | OPTION_MASK_ISA_TBM) -+ & ~opts->x_ix86_isa_flags_explicit); -+ -+ /* Validate -mpreferred-stack-boundary= value or default it to -+ PREFERRED_STACK_BOUNDARY_DEFAULT. */ -+ ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT; -+ if (opts_set->x_ix86_preferred_stack_boundary_arg) -+ { -+ int min = TARGET_64BIT_P (opts->x_ix86_isa_flags)? 3 : 2; -+ int max = TARGET_SEH ? 4 : 12; -+ -+ if (opts->x_ix86_preferred_stack_boundary_arg < min -+ || opts->x_ix86_preferred_stack_boundary_arg > max) -+ { -+ if (min == max) -+ error ("%<-mpreferred-stack-boundary%> is not supported " -+ "for this target"); -+ else -+ error ("%<-mpreferred-stack-boundary=%d%> is not between %d and %d", -+ opts->x_ix86_preferred_stack_boundary_arg, min, max); -+ } -+ else -+ ix86_preferred_stack_boundary -+ = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT; -+ } -+ -+ /* Set the default value for -mstackrealign. */ -+ if (!opts_set->x_ix86_force_align_arg_pointer) -+ opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT; -+ -+ ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY; -+ -+ /* Validate -mincoming-stack-boundary= value or default it to -+ MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */ -+ ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary; -+ if (opts_set->x_ix86_incoming_stack_boundary_arg) -+ { -+ int min = TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 3 : 2; -+ -+ if (opts->x_ix86_incoming_stack_boundary_arg < min -+ || opts->x_ix86_incoming_stack_boundary_arg > 12) -+ error ("%<-mincoming-stack-boundary=%d%> is not between %d and 12", -+ opts->x_ix86_incoming_stack_boundary_arg, min); -+ else -+ { -+ ix86_user_incoming_stack_boundary -+ = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT; -+ ix86_incoming_stack_boundary -+ = ix86_user_incoming_stack_boundary; -+ } -+ } -+ -+#ifndef NO_PROFILE_COUNTERS -+ if (flag_nop_mcount) -+ error ("%<-mnop-mcount%> is not compatible with this target"); -+#endif -+ if (flag_nop_mcount && flag_pic) -+ error ("%<-mnop-mcount%> is not implemented for %<-fPIC%>"); -+ -+ /* Accept -msseregparm only if at least SSE support is enabled. */ -+ if (TARGET_SSEREGPARM_P (opts->x_target_flags) -+ && ! TARGET_SSE_P (opts->x_ix86_isa_flags)) -+ error (main_args_p -+ ? G_("%<-msseregparm%> used without SSE enabled") -+ : G_("% used without SSE enabled")); -+ -+ if (opts_set->x_ix86_fpmath) -+ { -+ if (opts->x_ix86_fpmath & FPMATH_SSE) -+ { -+ if (!TARGET_SSE_P (opts->x_ix86_isa_flags)) -+ { -+ if (TARGET_80387_P (opts->x_target_flags)) -+ { -+ warning (0, "SSE instruction set disabled, using 387 arithmetics"); -+ opts->x_ix86_fpmath = FPMATH_387; -+ } -+ } -+ else if ((opts->x_ix86_fpmath & FPMATH_387) -+ && !TARGET_80387_P (opts->x_target_flags)) -+ { -+ warning (0, "387 instruction set disabled, using SSE arithmetics"); -+ opts->x_ix86_fpmath = FPMATH_SSE; -+ } -+ } -+ } -+ /* For all chips supporting SSE2, -mfpmath=sse performs better than -+ fpmath=387. The second is however default at many targets since the -+ extra 80bit precision of temporaries is considered to be part of ABI. -+ Overwrite the default at least for -ffast-math. -+ TODO: -mfpmath=both seems to produce same performing code with bit -+ smaller binaries. It is however not clear if register allocation is -+ ready for this setting. -+ Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE -+ codegen. We may switch to 387 with -ffast-math for size optimized -+ functions. */ -+ else if (fast_math_flags_set_p (&global_options) -+ && TARGET_SSE2_P (opts->x_ix86_isa_flags)) -+ opts->x_ix86_fpmath = FPMATH_SSE; -+ else -+ opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags); -+ -+ /* Use external vectorized library in vectorizing intrinsics. */ -+ if (opts_set->x_ix86_veclibabi_type) -+ switch (opts->x_ix86_veclibabi_type) -+ { -+ case ix86_veclibabi_type_svml: -+ ix86_veclib_handler = &ix86_veclibabi_svml; -+ break; -+ -+ case ix86_veclibabi_type_acml: -+ ix86_veclib_handler = &ix86_veclibabi_acml; -+ break; -+ -+ default: -+ gcc_unreachable (); -+ } -+ -+ if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS] -+ && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)) -+ opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; -+ -+ /* If stack probes are required, the space used for large function -+ arguments on the stack must also be probed, so enable -+ -maccumulate-outgoing-args so this happens in the prologue. */ -+ if (TARGET_STACK_PROBE_P (opts->x_target_flags) -+ && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)) -+ { -+ if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS) -+ warning (0, -+ main_args_p -+ ? G_("stack probing requires %<-maccumulate-outgoing-args%> " -+ "for correctness") -+ : G_("stack probing requires " -+ "% for " -+ "correctness")); -+ opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; -+ } -+ -+ /* Stack realignment without -maccumulate-outgoing-args requires %ebp, -+ so enable -maccumulate-outgoing-args when %ebp is fixed. */ -+ if (fixed_regs[BP_REG] -+ && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)) -+ { -+ if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS) -+ warning (0, -+ main_args_p -+ ? G_("fixed ebp register requires " -+ "%<-maccumulate-outgoing-args%>") -+ : G_("fixed ebp register requires " -+ "%")); -+ opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; -+ } -+ -+ /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */ -+ { -+ char *p; -+ ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0); -+ p = strchr (internal_label_prefix, 'X'); -+ internal_label_prefix_len = p - internal_label_prefix; -+ *p = '\0'; -+ } -+ -+ /* When scheduling description is not available, disable scheduler pass -+ so it won't slow down the compilation and make x87 code slower. */ -+ if (!TARGET_SCHEDULE) -+ opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0; -+ -+ maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, -+ ix86_tune_cost->simultaneous_prefetches, -+ opts->x_param_values, -+ opts_set->x_param_values); -+ maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, -+ ix86_tune_cost->prefetch_block, -+ opts->x_param_values, -+ opts_set->x_param_values); -+ maybe_set_param_value (PARAM_L1_CACHE_SIZE, -+ ix86_tune_cost->l1_cache_size, -+ opts->x_param_values, -+ opts_set->x_param_values); -+ maybe_set_param_value (PARAM_L2_CACHE_SIZE, -+ ix86_tune_cost->l2_cache_size, -+ opts->x_param_values, -+ opts_set->x_param_values); -+ -+ /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */ -+ if (opts->x_flag_prefetch_loop_arrays < 0 -+ && HAVE_prefetch -+ && (opts->x_optimize >= 3 || opts->x_flag_profile_use) -+ && !opts->x_optimize_size -+ && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL) -+ opts->x_flag_prefetch_loop_arrays = 1; -+ -+ /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0) -+ can be opts->x_optimized to ap = __builtin_next_arg (0). */ -+ if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack) -+ targetm.expand_builtin_va_start = NULL; -+ -+ if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) -+ { -+ ix86_gen_leave = gen_leave_rex64; -+ if (Pmode == DImode) -+ { -+ ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di; -+ ix86_gen_tls_local_dynamic_base_64 -+ = gen_tls_local_dynamic_base_64_di; -+ } -+ else -+ { -+ ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si; -+ ix86_gen_tls_local_dynamic_base_64 -+ = gen_tls_local_dynamic_base_64_si; -+ } -+ } -+ else -+ ix86_gen_leave = gen_leave; -+ -+ if (Pmode == DImode) -+ { -+ ix86_gen_add3 = gen_adddi3; -+ ix86_gen_sub3 = gen_subdi3; -+ ix86_gen_sub3_carry = gen_subdi3_carry; -+ ix86_gen_one_cmpl2 = gen_one_cmpldi2; -+ ix86_gen_andsp = gen_anddi3; -+ ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di; -+ ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi; -+ ix86_gen_probe_stack_range = gen_probe_stack_rangedi; -+ ix86_gen_monitor = gen_sse3_monitor_di; -+ ix86_gen_monitorx = gen_monitorx_di; -+ ix86_gen_clzero = gen_clzero_di; -+ } -+ else -+ { -+ ix86_gen_add3 = gen_addsi3; -+ ix86_gen_sub3 = gen_subsi3; -+ ix86_gen_sub3_carry = gen_subsi3_carry; -+ ix86_gen_one_cmpl2 = gen_one_cmplsi2; -+ ix86_gen_andsp = gen_andsi3; -+ ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si; -+ ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi; -+ ix86_gen_probe_stack_range = gen_probe_stack_rangesi; -+ ix86_gen_monitor = gen_sse3_monitor_si; -+ ix86_gen_monitorx = gen_monitorx_si; -+ ix86_gen_clzero = gen_clzero_si; -+ } -+ -+#ifdef USE_IX86_CLD -+ /* Use -mcld by default for 32-bit code if configured with --enable-cld. */ -+ if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)) -+ opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags; -+#endif -+ -+ /* Set the default value for -mfentry. */ -+ if (!opts_set->x_flag_fentry) -+ opts->x_flag_fentry = TARGET_SEH; -+ else -+ { -+ if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic -+ && opts->x_flag_fentry) -+ sorry ("%<-mfentry%> isn%'t supported for 32-bit in combination " -+ "with %<-fpic%>"); -+ else if (TARGET_SEH && !opts->x_flag_fentry) -+ sorry ("%<-mno-fentry%> isn%'t compatible with SEH"); -+ } -+ -+ if (TARGET_SEH && TARGET_CALL_MS2SYSV_XLOGUES) -+ sorry ("%<-mcall-ms2sysv-xlogues%> isn%'t currently supported with SEH"); -+ -+ if (!(opts_set->x_target_flags & MASK_VZEROUPPER) -+ && TARGET_EMIT_VZEROUPPER) -+ opts->x_target_flags |= MASK_VZEROUPPER; -+ if (!(opts_set->x_target_flags & MASK_STV)) -+ opts->x_target_flags |= MASK_STV; -+ /* Disable STV if -mpreferred-stack-boundary={2,3} or -+ -mincoming-stack-boundary={2,3} or -mstackrealign - the needed -+ stack realignment will be extra cost the pass doesn't take into -+ account and the pass can't realign the stack. */ -+ if (ix86_preferred_stack_boundary < 128 -+ || ix86_incoming_stack_boundary < 128 -+ || opts->x_ix86_force_align_arg_pointer) -+ opts->x_target_flags &= ~MASK_STV; -+ if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL] -+ && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD)) -+ opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD; -+ if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL] -+ && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE)) -+ opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE; -+ -+ /* Enable 128-bit AVX instruction generation -+ for the auto-vectorizer. */ -+ if (TARGET_AVX128_OPTIMAL -+ && (opts_set->x_prefer_vector_width_type == PVW_NONE)) -+ opts->x_prefer_vector_width_type = PVW_AVX128; -+ -+ /* Use 256-bit AVX instruction generation -+ in the auto-vectorizer. */ -+ if (ix86_tune_features[X86_TUNE_AVX256_OPTIMAL] -+ && (opts_set->x_prefer_vector_width_type == PVW_NONE)) -+ opts->x_prefer_vector_width_type = PVW_AVX256; -+ -+ if (opts->x_ix86_recip_name) -+ { -+ char *p = ASTRDUP (opts->x_ix86_recip_name); -+ char *q; -+ unsigned int mask, i; -+ bool invert; -+ -+ while ((q = strtok (p, ",")) != NULL) -+ { -+ p = NULL; -+ if (*q == '!') -+ { -+ invert = true; -+ q++; -+ } -+ else -+ invert = false; -+ -+ if (!strcmp (q, "default")) -+ mask = RECIP_MASK_ALL; -+ else -+ { -+ for (i = 0; i < ARRAY_SIZE (recip_options); i++) -+ if (!strcmp (q, recip_options[i].string)) -+ { -+ mask = recip_options[i].mask; -+ break; -+ } -+ -+ if (i == ARRAY_SIZE (recip_options)) -+ { -+ error ("unknown option for %<-mrecip=%s%>", q); -+ invert = false; -+ mask = RECIP_MASK_NONE; -+ } -+ } -+ -+ opts->x_recip_mask_explicit |= mask; -+ if (invert) -+ opts->x_recip_mask &= ~mask; -+ else -+ opts->x_recip_mask |= mask; -+ } -+ } -+ -+ if (TARGET_RECIP_P (opts->x_target_flags)) -+ opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit; -+ else if (opts_set->x_target_flags & MASK_RECIP) -+ opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit); -+ -+ /* Default long double to 64-bit for 32-bit Bionic and to __float128 -+ for 64-bit Bionic. Also default long double to 64-bit for Intel -+ MCU psABI. */ -+ if ((TARGET_HAS_BIONIC || TARGET_IAMCU) -+ && !(opts_set->x_target_flags -+ & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128))) -+ opts->x_target_flags |= (TARGET_64BIT -+ ? MASK_LONG_DOUBLE_128 -+ : MASK_LONG_DOUBLE_64); -+ -+ /* Only one of them can be active. */ -+ gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0 -+ || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0); -+ -+ /* Handle stack protector */ -+ if (!opts_set->x_ix86_stack_protector_guard) -+ { -+#ifdef TARGET_THREAD_SSP_OFFSET -+ if (!TARGET_HAS_BIONIC) -+ opts->x_ix86_stack_protector_guard = SSP_TLS; -+ else -+#endif -+ opts->x_ix86_stack_protector_guard = SSP_GLOBAL; -+ } -+ -+ if (opts_set->x_ix86_stack_protector_guard_offset_str) -+ { -+ char *endp; -+ const char *str = opts->x_ix86_stack_protector_guard_offset_str; -+ -+ errno = 0; -+ int64_t offset; -+ -+#if defined(INT64_T_IS_LONG) -+ offset = strtol (str, &endp, 0); -+#else -+ offset = strtoll (str, &endp, 0); -+#endif -+ -+ if (!*str || *endp || errno) -+ error ("%qs is not a valid number " -+ "in %<-mstack-protector-guard-offset=%>", str); -+ -+ if (!IN_RANGE (offset, HOST_WIDE_INT_C (-0x80000000), -+ HOST_WIDE_INT_C (0x7fffffff))) -+ error ("%qs is not a valid offset " -+ "in %<-mstack-protector-guard-offset=%>", str); -+ -+ opts->x_ix86_stack_protector_guard_offset = offset; -+ } -+#ifdef TARGET_THREAD_SSP_OFFSET -+ else -+ opts->x_ix86_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET; -+#endif -+ -+ if (opts_set->x_ix86_stack_protector_guard_reg_str) -+ { -+ const char *str = opts->x_ix86_stack_protector_guard_reg_str; -+ addr_space_t seg = ADDR_SPACE_GENERIC; -+ -+ /* Discard optional register prefix. */ -+ if (str[0] == '%') -+ str++; -+ -+ if (strlen (str) == 2 && str[1] == 's') -+ { -+ if (str[0] == 'f') -+ seg = ADDR_SPACE_SEG_FS; -+ else if (str[0] == 'g') -+ seg = ADDR_SPACE_SEG_GS; -+ } -+ -+ if (seg == ADDR_SPACE_GENERIC) -+ error ("%qs is not a valid base register " -+ "in %<-mstack-protector-guard-reg=%>", -+ opts->x_ix86_stack_protector_guard_reg_str); -+ -+ opts->x_ix86_stack_protector_guard_reg = seg; -+ } -+ else -+ { -+ opts->x_ix86_stack_protector_guard_reg = DEFAULT_TLS_SEG_REG; -+ -+ /* The kernel uses a different segment register for performance -+ reasons; a system call would not have to trash the userspace -+ segment register, which would be expensive. */ -+ if (opts->x_ix86_cmodel == CM_KERNEL) -+ opts->x_ix86_stack_protector_guard_reg = ADDR_SPACE_SEG_GS; -+ } -+ -+ /* Handle -mmemcpy-strategy= and -mmemset-strategy= */ -+ if (opts->x_ix86_tune_memcpy_strategy) -+ { -+ char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy); -+ ix86_parse_stringop_strategy_string (str, false); -+ free (str); -+ } -+ -+ if (opts->x_ix86_tune_memset_strategy) -+ { -+ char *str = xstrdup (opts->x_ix86_tune_memset_strategy); -+ ix86_parse_stringop_strategy_string (str, true); -+ free (str); -+ } -+ -+ /* Save the initial options in case the user does function specific -+ options. */ -+ if (main_args_p) -+ target_option_default_node = target_option_current_node -+ = build_target_option_node (opts); -+ -+ if (opts->x_flag_cf_protection != CF_NONE) -+ opts->x_flag_cf_protection -+ = (cf_protection_level) (opts->x_flag_cf_protection | CF_SET); -+ -+ if (ix86_tune_features [X86_TUNE_AVOID_256FMA_CHAINS]) -+ maybe_set_param_value (PARAM_AVOID_FMA_MAX_BITS, 256, -+ opts->x_param_values, -+ opts_set->x_param_values); -+ else if (ix86_tune_features [X86_TUNE_AVOID_128FMA_CHAINS]) -+ maybe_set_param_value (PARAM_AVOID_FMA_MAX_BITS, 128, -+ opts->x_param_values, -+ opts_set->x_param_values); -+ -+ /* PR86952: jump table usage with retpolines is slow. -+ The PR provides some numbers about the slowness. */ -+ if (ix86_indirect_branch != indirect_branch_keep -+ && !opts_set->x_flag_jump_tables) -+ opts->x_flag_jump_tables = 0; -+ -+ return true; -+} -+ -+/* Implement the TARGET_OPTION_OVERRIDE hook. */ -+ -+void -+ix86_option_override (void) -+{ -+ ix86_option_override_internal (true, &global_options, &global_options_set); -+} -+ -+/* Remember the last target of ix86_set_current_function. */ -+static GTY(()) tree ix86_previous_fndecl; -+ -+/* Set targets globals to the default (or current #pragma GCC target -+ if active). Invalidate ix86_previous_fndecl cache. */ -+ -+void -+ix86_reset_previous_fndecl (void) -+{ -+ tree new_tree = target_option_current_node; -+ cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree)); -+ if (TREE_TARGET_GLOBALS (new_tree)) -+ restore_target_globals (TREE_TARGET_GLOBALS (new_tree)); -+ else if (new_tree == target_option_default_node) -+ restore_target_globals (&default_target_globals); -+ else -+ TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts (); -+ ix86_previous_fndecl = NULL_TREE; -+} -+ -+/* Add target attribute to SIMD clone NODE if needed. */ -+ -+void -+ix86_simd_clone_adjust (struct cgraph_node *node) -+{ -+ const char *str = NULL; -+ -+ /* Attributes need to be adjusted for definitions, not declarations. */ -+ if (!node->definition) -+ return; -+ -+ gcc_assert (node->decl == cfun->decl); -+ switch (node->simdclone->vecsize_mangle) -+ { -+ case 'b': -+ if (!TARGET_SSE2) -+ str = "sse2"; -+ break; -+ case 'c': -+ if (!TARGET_AVX) -+ str = "avx"; -+ break; -+ case 'd': -+ if (!TARGET_AVX2) -+ str = "avx2"; -+ break; -+ case 'e': -+ if (!TARGET_AVX512F) -+ str = "avx512f"; -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ if (str == NULL) -+ return; -+ push_cfun (NULL); -+ tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str)); -+ bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0); -+ gcc_assert (ok); -+ pop_cfun (); -+ ix86_reset_previous_fndecl (); -+ ix86_set_current_function (node->decl); -+} -+ -+ -+ -+/* Set the func_type field from the function FNDECL. */ -+ -+static void -+ix86_set_func_type (tree fndecl) -+{ -+ if (cfun->machine->func_type == TYPE_UNKNOWN) -+ { -+ if (lookup_attribute ("interrupt", -+ TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))) -+ { -+ if (ix86_function_naked (fndecl)) -+ error_at (DECL_SOURCE_LOCATION (fndecl), -+ "interrupt and naked attributes are not compatible"); -+ -+ int nargs = 0; -+ for (tree arg = DECL_ARGUMENTS (fndecl); -+ arg; -+ arg = TREE_CHAIN (arg)) -+ nargs++; -+ cfun->machine->no_caller_saved_registers = true; -+ cfun->machine->func_type -+ = nargs == 2 ? TYPE_EXCEPTION : TYPE_INTERRUPT; -+ -+ ix86_optimize_mode_switching[X86_DIRFLAG] = 1; -+ -+ /* Only dwarf2out.c can handle -WORD(AP) as a pointer argument. */ -+ if (write_symbols != NO_DEBUG && write_symbols != DWARF2_DEBUG) -+ sorry ("only DWARF debug format is supported for interrupt " -+ "service routine"); -+ } -+ else -+ { -+ cfun->machine->func_type = TYPE_NORMAL; -+ if (lookup_attribute ("no_caller_saved_registers", -+ TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))) -+ cfun->machine->no_caller_saved_registers = true; -+ } -+ } -+} -+ -+/* Set the indirect_branch_type field from the function FNDECL. */ -+ -+static void -+ix86_set_indirect_branch_type (tree fndecl) -+{ -+ if (cfun->machine->indirect_branch_type == indirect_branch_unset) -+ { -+ tree attr = lookup_attribute ("indirect_branch", -+ DECL_ATTRIBUTES (fndecl)); -+ if (attr != NULL) -+ { -+ tree args = TREE_VALUE (attr); -+ if (args == NULL) -+ gcc_unreachable (); -+ tree cst = TREE_VALUE (args); -+ if (strcmp (TREE_STRING_POINTER (cst), "keep") == 0) -+ cfun->machine->indirect_branch_type = indirect_branch_keep; -+ else if (strcmp (TREE_STRING_POINTER (cst), "thunk") == 0) -+ cfun->machine->indirect_branch_type = indirect_branch_thunk; -+ else if (strcmp (TREE_STRING_POINTER (cst), "thunk-inline") == 0) -+ cfun->machine->indirect_branch_type = indirect_branch_thunk_inline; -+ else if (strcmp (TREE_STRING_POINTER (cst), "thunk-extern") == 0) -+ cfun->machine->indirect_branch_type = indirect_branch_thunk_extern; -+ else -+ gcc_unreachable (); -+ } -+ else -+ cfun->machine->indirect_branch_type = ix86_indirect_branch; -+ -+ /* -mcmodel=large is not compatible with -mindirect-branch=thunk -+ nor -mindirect-branch=thunk-extern. */ -+ if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC) -+ && ((cfun->machine->indirect_branch_type -+ == indirect_branch_thunk_extern) -+ || (cfun->machine->indirect_branch_type -+ == indirect_branch_thunk))) -+ error ("%<-mindirect-branch=%s%> and %<-mcmodel=large%> are not " -+ "compatible", -+ ((cfun->machine->indirect_branch_type -+ == indirect_branch_thunk_extern) -+ ? "thunk-extern" : "thunk")); -+ -+ if (cfun->machine->indirect_branch_type != indirect_branch_keep -+ && (flag_cf_protection & CF_RETURN)) -+ error ("%<-mindirect-branch%> and %<-fcf-protection%> are not " -+ "compatible"); -+ } -+ -+ if (cfun->machine->function_return_type == indirect_branch_unset) -+ { -+ tree attr = lookup_attribute ("function_return", -+ DECL_ATTRIBUTES (fndecl)); -+ if (attr != NULL) -+ { -+ tree args = TREE_VALUE (attr); -+ if (args == NULL) -+ gcc_unreachable (); -+ tree cst = TREE_VALUE (args); -+ if (strcmp (TREE_STRING_POINTER (cst), "keep") == 0) -+ cfun->machine->function_return_type = indirect_branch_keep; -+ else if (strcmp (TREE_STRING_POINTER (cst), "thunk") == 0) -+ cfun->machine->function_return_type = indirect_branch_thunk; -+ else if (strcmp (TREE_STRING_POINTER (cst), "thunk-inline") == 0) -+ cfun->machine->function_return_type = indirect_branch_thunk_inline; -+ else if (strcmp (TREE_STRING_POINTER (cst), "thunk-extern") == 0) -+ cfun->machine->function_return_type = indirect_branch_thunk_extern; -+ else -+ gcc_unreachable (); -+ } -+ else -+ cfun->machine->function_return_type = ix86_function_return; -+ -+ /* -mcmodel=large is not compatible with -mfunction-return=thunk -+ nor -mfunction-return=thunk-extern. */ -+ if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC) -+ && ((cfun->machine->function_return_type -+ == indirect_branch_thunk_extern) -+ || (cfun->machine->function_return_type -+ == indirect_branch_thunk))) -+ error ("%<-mfunction-return=%s%> and %<-mcmodel=large%> are not " -+ "compatible", -+ ((cfun->machine->function_return_type -+ == indirect_branch_thunk_extern) -+ ? "thunk-extern" : "thunk")); -+ -+ if (cfun->machine->function_return_type != indirect_branch_keep -+ && (flag_cf_protection & CF_RETURN)) -+ error ("%<-mfunction-return%> and %<-fcf-protection%> are not " -+ "compatible"); -+ } -+} -+ -+/* Establish appropriate back-end context for processing the function -+ FNDECL. The argument might be NULL to indicate processing at top -+ level, outside of any function scope. */ -+void -+ix86_set_current_function (tree fndecl) -+{ -+ /* Only change the context if the function changes. This hook is called -+ several times in the course of compiling a function, and we don't want to -+ slow things down too much or call target_reinit when it isn't safe. */ -+ if (fndecl == ix86_previous_fndecl) -+ { -+ /* There may be 2 function bodies for the same function FNDECL, -+ one is extern inline and one isn't. Call ix86_set_func_type -+ to set the func_type field. */ -+ if (fndecl != NULL_TREE) -+ { -+ ix86_set_func_type (fndecl); -+ ix86_set_indirect_branch_type (fndecl); -+ } -+ return; -+ } -+ -+ tree old_tree; -+ if (ix86_previous_fndecl == NULL_TREE) -+ old_tree = target_option_current_node; -+ else if (DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)) -+ old_tree = DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl); -+ else -+ old_tree = target_option_default_node; -+ -+ if (fndecl == NULL_TREE) -+ { -+ if (old_tree != target_option_current_node) -+ ix86_reset_previous_fndecl (); -+ return; -+ } -+ -+ ix86_set_func_type (fndecl); -+ ix86_set_indirect_branch_type (fndecl); -+ -+ tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl); -+ if (new_tree == NULL_TREE) -+ new_tree = target_option_default_node; -+ -+ if (old_tree != new_tree) -+ { -+ cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree)); -+ if (TREE_TARGET_GLOBALS (new_tree)) -+ restore_target_globals (TREE_TARGET_GLOBALS (new_tree)); -+ else if (new_tree == target_option_default_node) -+ restore_target_globals (&default_target_globals); -+ else -+ TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts (); -+ } -+ ix86_previous_fndecl = fndecl; -+ -+ static bool prev_no_caller_saved_registers; -+ -+ /* 64-bit MS and SYSV ABI have different set of call used registers. -+ Avoid expensive re-initialization of init_regs each time we switch -+ function context. */ -+ if (TARGET_64BIT -+ && (call_used_or_fixed_reg_p (SI_REG) -+ == (cfun->machine->call_abi == MS_ABI))) -+ reinit_regs (); -+ /* Need to re-initialize init_regs if caller-saved registers are -+ changed. */ -+ else if (prev_no_caller_saved_registers -+ != cfun->machine->no_caller_saved_registers) -+ reinit_regs (); -+ -+ if (cfun->machine->func_type != TYPE_NORMAL -+ || cfun->machine->no_caller_saved_registers) -+ { -+ /* Don't allow SSE, MMX nor x87 instructions since they -+ may change processor state. */ -+ const char *isa; -+ if (TARGET_SSE) -+ isa = "SSE"; -+ else if (TARGET_MMX) -+ isa = "MMX/3Dnow"; -+ else if (TARGET_80387) -+ isa = "80387"; -+ else -+ isa = NULL; -+ if (isa != NULL) -+ { -+ if (cfun->machine->func_type != TYPE_NORMAL) -+ sorry (cfun->machine->func_type == TYPE_EXCEPTION -+ ? G_("%s instructions aren%'t allowed in an" -+ " exception service routine") -+ : G_("%s instructions aren%'t allowed in an" -+ " interrupt service routine"), -+ isa); -+ else -+ sorry ("%s instructions aren%'t allowed in a function with " -+ "the % attribute", isa); -+ /* Don't issue the same error twice. */ -+ cfun->machine->func_type = TYPE_NORMAL; -+ cfun->machine->no_caller_saved_registers = false; -+ } -+ } -+ -+ prev_no_caller_saved_registers -+ = cfun->machine->no_caller_saved_registers; -+} -+ -+/* Implement the TARGET_OFFLOAD_OPTIONS hook. */ -+char * -+ix86_offload_options (void) -+{ -+ if (TARGET_LP64) -+ return xstrdup ("-foffload-abi=lp64"); -+ return xstrdup ("-foffload-abi=ilp32"); -+} -+ -+/* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall", -+ and "sseregparm" calling convention attributes; -+ arguments as in struct attribute_spec.handler. */ -+ -+static tree -+ix86_handle_cconv_attribute (tree *node, tree name, tree args, int, -+ bool *no_add_attrs) -+{ -+ if (TREE_CODE (*node) != FUNCTION_TYPE -+ && TREE_CODE (*node) != METHOD_TYPE -+ && TREE_CODE (*node) != FIELD_DECL -+ && TREE_CODE (*node) != TYPE_DECL) -+ { -+ warning (OPT_Wattributes, "%qE attribute only applies to functions", -+ name); -+ *no_add_attrs = true; -+ return NULL_TREE; -+ } -+ -+ /* Can combine regparm with all attributes but fastcall, and thiscall. */ -+ if (is_attribute_p ("regparm", name)) -+ { -+ tree cst; -+ -+ if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) -+ { -+ error ("fastcall and regparm attributes are not compatible"); -+ } -+ -+ if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node))) -+ { -+ error ("regparam and thiscall attributes are not compatible"); -+ } -+ -+ cst = TREE_VALUE (args); -+ if (TREE_CODE (cst) != INTEGER_CST) -+ { -+ warning (OPT_Wattributes, -+ "%qE attribute requires an integer constant argument", -+ name); -+ *no_add_attrs = true; -+ } -+ else if (compare_tree_int (cst, REGPARM_MAX) > 0) -+ { -+ warning (OPT_Wattributes, "argument to %qE attribute larger than %d", -+ name, REGPARM_MAX); -+ *no_add_attrs = true; -+ } -+ -+ return NULL_TREE; -+ } -+ -+ if (TARGET_64BIT) -+ { -+ /* Do not warn when emulating the MS ABI. */ -+ if ((TREE_CODE (*node) != FUNCTION_TYPE -+ && TREE_CODE (*node) != METHOD_TYPE) -+ || ix86_function_type_abi (*node) != MS_ABI) -+ warning (OPT_Wattributes, "%qE attribute ignored", -+ name); -+ *no_add_attrs = true; -+ return NULL_TREE; -+ } -+ -+ /* Can combine fastcall with stdcall (redundant) and sseregparm. */ -+ if (is_attribute_p ("fastcall", name)) -+ { -+ if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node))) -+ { -+ error ("fastcall and cdecl attributes are not compatible"); -+ } -+ if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node))) -+ { -+ error ("fastcall and stdcall attributes are not compatible"); -+ } -+ if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node))) -+ { -+ error ("fastcall and regparm attributes are not compatible"); -+ } -+ if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node))) -+ { -+ error ("fastcall and thiscall attributes are not compatible"); -+ } -+ } -+ -+ /* Can combine stdcall with fastcall (redundant), regparm and -+ sseregparm. */ -+ else if (is_attribute_p ("stdcall", name)) -+ { -+ if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node))) -+ { -+ error ("stdcall and cdecl attributes are not compatible"); -+ } -+ if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) -+ { -+ error ("stdcall and fastcall attributes are not compatible"); -+ } -+ if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node))) -+ { -+ error ("stdcall and thiscall attributes are not compatible"); -+ } -+ } -+ -+ /* Can combine cdecl with regparm and sseregparm. */ -+ else if (is_attribute_p ("cdecl", name)) -+ { -+ if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node))) -+ { -+ error ("stdcall and cdecl attributes are not compatible"); -+ } -+ if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) -+ { -+ error ("fastcall and cdecl attributes are not compatible"); -+ } -+ if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node))) -+ { -+ error ("cdecl and thiscall attributes are not compatible"); -+ } -+ } -+ else if (is_attribute_p ("thiscall", name)) -+ { -+ if (TREE_CODE (*node) != METHOD_TYPE && pedantic) -+ warning (OPT_Wattributes, "%qE attribute is used for non-class method", -+ name); -+ if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node))) -+ { -+ error ("stdcall and thiscall attributes are not compatible"); -+ } -+ if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) -+ { -+ error ("fastcall and thiscall attributes are not compatible"); -+ } -+ if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node))) -+ { -+ error ("cdecl and thiscall attributes are not compatible"); -+ } -+ } -+ -+ /* Can combine sseregparm with all attributes. */ -+ -+ return NULL_TREE; -+} -+ -+#ifndef CHECK_STACK_LIMIT -+#define CHECK_STACK_LIMIT (-1) -+#endif -+ -+/* The transactional memory builtins are implicitly regparm or fastcall -+ depending on the ABI. Override the generic do-nothing attribute that -+ these builtins were declared with, and replace it with one of the two -+ attributes that we expect elsewhere. */ -+ -+static tree -+ix86_handle_tm_regparm_attribute (tree *node, tree, tree, -+ int flags, bool *no_add_attrs) -+{ -+ tree alt; -+ -+ /* In no case do we want to add the placeholder attribute. */ -+ *no_add_attrs = true; -+ -+ /* The 64-bit ABI is unchanged for transactional memory. */ -+ if (TARGET_64BIT) -+ return NULL_TREE; -+ -+ /* ??? Is there a better way to validate 32-bit windows? We have -+ cfun->machine->call_abi, but that seems to be set only for 64-bit. */ -+ if (CHECK_STACK_LIMIT > 0) -+ alt = tree_cons (get_identifier ("fastcall"), NULL, NULL); -+ else -+ { -+ alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL); -+ alt = tree_cons (get_identifier ("regparm"), alt, NULL); -+ } -+ decl_attributes (node, alt, flags); -+ -+ return NULL_TREE; -+} -+ -+/* Handle a "force_align_arg_pointer" attribute. */ -+ -+static tree -+ix86_handle_force_align_arg_pointer_attribute (tree *node, tree name, -+ tree, int, bool *no_add_attrs) -+{ -+ if (TREE_CODE (*node) != FUNCTION_TYPE -+ && TREE_CODE (*node) != METHOD_TYPE -+ && TREE_CODE (*node) != FIELD_DECL -+ && TREE_CODE (*node) != TYPE_DECL) -+ { -+ warning (OPT_Wattributes, "%qE attribute only applies to functions", -+ name); -+ *no_add_attrs = true; -+ } -+ -+ return NULL_TREE; -+} -+ -+/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in -+ struct attribute_spec.handler. */ -+ -+static tree -+ix86_handle_struct_attribute (tree *node, tree name, tree, int, -+ bool *no_add_attrs) -+{ -+ tree *type = NULL; -+ if (DECL_P (*node)) -+ { -+ if (TREE_CODE (*node) == TYPE_DECL) -+ type = &TREE_TYPE (*node); -+ } -+ else -+ type = node; -+ -+ if (!(type && RECORD_OR_UNION_TYPE_P (*type))) -+ { -+ warning (OPT_Wattributes, "%qE attribute ignored", -+ name); -+ *no_add_attrs = true; -+ } -+ -+ else if ((is_attribute_p ("ms_struct", name) -+ && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type))) -+ || ((is_attribute_p ("gcc_struct", name) -+ && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type))))) -+ { -+ warning (OPT_Wattributes, "%qE incompatible attribute ignored", -+ name); -+ *no_add_attrs = true; -+ } -+ -+ return NULL_TREE; -+} -+ -+/* Handle a "callee_pop_aggregate_return" attribute; arguments as -+ in struct attribute_spec handler. */ -+ -+static tree -+ix86_handle_callee_pop_aggregate_return (tree *node, tree name, tree args, int, -+ bool *no_add_attrs) -+{ -+ if (TREE_CODE (*node) != FUNCTION_TYPE -+ && TREE_CODE (*node) != METHOD_TYPE -+ && TREE_CODE (*node) != FIELD_DECL -+ && TREE_CODE (*node) != TYPE_DECL) -+ { -+ warning (OPT_Wattributes, "%qE attribute only applies to functions", -+ name); -+ *no_add_attrs = true; -+ return NULL_TREE; -+ } -+ if (TARGET_64BIT) -+ { -+ warning (OPT_Wattributes, "%qE attribute only available for 32-bit", -+ name); -+ *no_add_attrs = true; -+ return NULL_TREE; -+ } -+ if (is_attribute_p ("callee_pop_aggregate_return", name)) -+ { -+ tree cst; -+ -+ cst = TREE_VALUE (args); -+ if (TREE_CODE (cst) != INTEGER_CST) -+ { -+ warning (OPT_Wattributes, -+ "%qE attribute requires an integer constant argument", -+ name); -+ *no_add_attrs = true; -+ } -+ else if (compare_tree_int (cst, 0) != 0 -+ && compare_tree_int (cst, 1) != 0) -+ { -+ warning (OPT_Wattributes, -+ "argument to %qE attribute is neither zero, nor one", -+ name); -+ *no_add_attrs = true; -+ } -+ -+ return NULL_TREE; -+ } -+ -+ return NULL_TREE; -+} -+ -+/* Handle a "ms_abi" or "sysv" attribute; arguments as in -+ struct attribute_spec.handler. */ -+ -+static tree -+ix86_handle_abi_attribute (tree *node, tree name, tree, int, -+ bool *no_add_attrs) -+{ -+ if (TREE_CODE (*node) != FUNCTION_TYPE -+ && TREE_CODE (*node) != METHOD_TYPE -+ && TREE_CODE (*node) != FIELD_DECL -+ && TREE_CODE (*node) != TYPE_DECL) -+ { -+ warning (OPT_Wattributes, "%qE attribute only applies to functions", -+ name); -+ *no_add_attrs = true; -+ return NULL_TREE; -+ } -+ -+ /* Can combine regparm with all attributes but fastcall. */ -+ if (is_attribute_p ("ms_abi", name)) -+ { -+ if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node))) -+ { -+ error ("%qs and %qs attributes are not compatible", -+ "ms_abi", "sysv_abi"); -+ } -+ -+ return NULL_TREE; -+ } -+ else if (is_attribute_p ("sysv_abi", name)) -+ { -+ if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node))) -+ { -+ error ("%qs and %qs attributes are not compatible", -+ "ms_abi", "sysv_abi"); -+ } -+ -+ return NULL_TREE; -+ } -+ -+ return NULL_TREE; -+} -+ -+static tree -+ix86_handle_fndecl_attribute (tree *node, tree name, tree args, int, -+ bool *no_add_attrs) -+{ -+ if (TREE_CODE (*node) != FUNCTION_DECL) -+ { -+ warning (OPT_Wattributes, "%qE attribute only applies to functions", -+ name); -+ *no_add_attrs = true; -+ } -+ -+ if (is_attribute_p ("indirect_branch", name)) -+ { -+ tree cst = TREE_VALUE (args); -+ if (TREE_CODE (cst) != STRING_CST) -+ { -+ warning (OPT_Wattributes, -+ "%qE attribute requires a string constant argument", -+ name); -+ *no_add_attrs = true; -+ } -+ else if (strcmp (TREE_STRING_POINTER (cst), "keep") != 0 -+ && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0 -+ && strcmp (TREE_STRING_POINTER (cst), "thunk-inline") != 0 -+ && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0) -+ { -+ warning (OPT_Wattributes, -+ "argument to %qE attribute is not " -+ "(keep|thunk|thunk-inline|thunk-extern)", name); -+ *no_add_attrs = true; -+ } -+ } -+ -+ if (is_attribute_p ("function_return", name)) -+ { -+ tree cst = TREE_VALUE (args); -+ if (TREE_CODE (cst) != STRING_CST) -+ { -+ warning (OPT_Wattributes, -+ "%qE attribute requires a string constant argument", -+ name); -+ *no_add_attrs = true; -+ } -+ else if (strcmp (TREE_STRING_POINTER (cst), "keep") != 0 -+ && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0 -+ && strcmp (TREE_STRING_POINTER (cst), "thunk-inline") != 0 -+ && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0) -+ { -+ warning (OPT_Wattributes, -+ "argument to %qE attribute is not " -+ "(keep|thunk|thunk-inline|thunk-extern)", name); -+ *no_add_attrs = true; -+ } -+ } -+ -+ return NULL_TREE; -+} -+ -+static tree -+ix86_handle_no_caller_saved_registers_attribute (tree *, tree, tree, -+ int, bool *) -+{ -+ return NULL_TREE; -+} -+ -+static tree -+ix86_handle_interrupt_attribute (tree *node, tree, tree, int, bool *) -+{ -+ /* DECL_RESULT and DECL_ARGUMENTS do not exist there yet, -+ but the function type contains args and return type data. */ -+ tree func_type = *node; -+ tree return_type = TREE_TYPE (func_type); -+ -+ int nargs = 0; -+ tree current_arg_type = TYPE_ARG_TYPES (func_type); -+ while (current_arg_type -+ && ! VOID_TYPE_P (TREE_VALUE (current_arg_type))) -+ { -+ if (nargs == 0) -+ { -+ if (! POINTER_TYPE_P (TREE_VALUE (current_arg_type))) -+ error ("interrupt service routine should have a pointer " -+ "as the first argument"); -+ } -+ else if (nargs == 1) -+ { -+ if (TREE_CODE (TREE_VALUE (current_arg_type)) != INTEGER_TYPE -+ || TYPE_MODE (TREE_VALUE (current_arg_type)) != word_mode) -+ error ("interrupt service routine should have %qs " -+ "as the second argument", -+ TARGET_64BIT -+ ? (TARGET_X32 ? "unsigned long long int" -+ : "unsigned long int") -+ : "unsigned int"); -+ } -+ nargs++; -+ current_arg_type = TREE_CHAIN (current_arg_type); -+ } -+ if (!nargs || nargs > 2) -+ error ("interrupt service routine can only have a pointer argument " -+ "and an optional integer argument"); -+ if (! VOID_TYPE_P (return_type)) -+ error ("interrupt service routine must return %"); -+ -+ return NULL_TREE; -+} -+ -+/* Handle fentry_name / fentry_section attribute. */ -+ -+static tree -+ix86_handle_fentry_name (tree *node, tree name, tree args, -+ int, bool *no_add_attrs) -+{ -+ if (TREE_CODE (*node) == FUNCTION_DECL -+ && TREE_CODE (TREE_VALUE (args)) == STRING_CST) -+ /* Do nothing else, just set the attribute. We'll get at -+ it later with lookup_attribute. */ -+ ; -+ else -+ { -+ warning (OPT_Wattributes, "%qE attribute ignored", name); -+ *no_add_attrs = true; -+ } -+ -+ return NULL_TREE; -+} -+ -+/* Table of valid machine attributes. */ -+const struct attribute_spec ix86_attribute_table[] = -+{ -+ /* { name, min_len, max_len, decl_req, type_req, fn_type_req, -+ affects_type_identity, handler, exclude } */ -+ /* Stdcall attribute says callee is responsible for popping arguments -+ if they are not variable. */ -+ { "stdcall", 0, 0, false, true, true, true, ix86_handle_cconv_attribute, -+ NULL }, -+ /* Fastcall attribute says callee is responsible for popping arguments -+ if they are not variable. */ -+ { "fastcall", 0, 0, false, true, true, true, ix86_handle_cconv_attribute, -+ NULL }, -+ /* Thiscall attribute says callee is responsible for popping arguments -+ if they are not variable. */ -+ { "thiscall", 0, 0, false, true, true, true, ix86_handle_cconv_attribute, -+ NULL }, -+ /* Cdecl attribute says the callee is a normal C declaration */ -+ { "cdecl", 0, 0, false, true, true, true, ix86_handle_cconv_attribute, -+ NULL }, -+ /* Regparm attribute specifies how many integer arguments are to be -+ passed in registers. */ -+ { "regparm", 1, 1, false, true, true, true, ix86_handle_cconv_attribute, -+ NULL }, -+ /* Sseregparm attribute says we are using x86_64 calling conventions -+ for FP arguments. */ -+ { "sseregparm", 0, 0, false, true, true, true, ix86_handle_cconv_attribute, -+ NULL }, -+ /* The transactional memory builtins are implicitly regparm or fastcall -+ depending on the ABI. Override the generic do-nothing attribute that -+ these builtins were declared with. */ -+ { "*tm regparm", 0, 0, false, true, true, true, -+ ix86_handle_tm_regparm_attribute, NULL }, -+ /* force_align_arg_pointer says this function realigns the stack at entry. */ -+ { "force_align_arg_pointer", 0, 0, -+ false, true, true, false, ix86_handle_force_align_arg_pointer_attribute, -+ NULL }, -+#if TARGET_DLLIMPORT_DECL_ATTRIBUTES -+ { "dllimport", 0, 0, false, false, false, false, handle_dll_attribute, -+ NULL }, -+ { "dllexport", 0, 0, false, false, false, false, handle_dll_attribute, -+ NULL }, -+ { "shared", 0, 0, true, false, false, false, -+ ix86_handle_shared_attribute, NULL }, -+#endif -+ { "ms_struct", 0, 0, false, false, false, false, -+ ix86_handle_struct_attribute, NULL }, -+ { "gcc_struct", 0, 0, false, false, false, false, -+ ix86_handle_struct_attribute, NULL }, -+#ifdef SUBTARGET_ATTRIBUTE_TABLE -+ SUBTARGET_ATTRIBUTE_TABLE, -+#endif -+ /* ms_abi and sysv_abi calling convention function attributes. */ -+ { "ms_abi", 0, 0, false, true, true, true, ix86_handle_abi_attribute, NULL }, -+ { "sysv_abi", 0, 0, false, true, true, true, ix86_handle_abi_attribute, -+ NULL }, -+ { "ms_abi va_list", 0, 0, false, false, false, false, NULL, NULL }, -+ { "sysv_abi va_list", 0, 0, false, false, false, false, NULL, NULL }, -+ { "ms_hook_prologue", 0, 0, true, false, false, false, -+ ix86_handle_fndecl_attribute, NULL }, -+ { "callee_pop_aggregate_return", 1, 1, false, true, true, true, -+ ix86_handle_callee_pop_aggregate_return, NULL }, -+ { "interrupt", 0, 0, false, true, true, false, -+ ix86_handle_interrupt_attribute, NULL }, -+ { "no_caller_saved_registers", 0, 0, false, true, true, false, -+ ix86_handle_no_caller_saved_registers_attribute, NULL }, -+ { "naked", 0, 0, true, false, false, false, -+ ix86_handle_fndecl_attribute, NULL }, -+ { "indirect_branch", 1, 1, true, false, false, false, -+ ix86_handle_fndecl_attribute, NULL }, -+ { "function_return", 1, 1, true, false, false, false, -+ ix86_handle_fndecl_attribute, NULL }, -+ { "indirect_return", 0, 0, false, true, true, false, -+ NULL, NULL }, -+ { "fentry_name", 1, 1, true, false, false, false, -+ ix86_handle_fentry_name, NULL }, -+ { "fentry_section", 1, 1, true, false, false, false, -+ ix86_handle_fentry_name, NULL }, -+ { "cf_check", 0, 0, true, false, false, false, -+ ix86_handle_fndecl_attribute, NULL }, -+ -+ /* End element. */ -+ { NULL, 0, 0, false, false, false, false, NULL, NULL } -+}; -+ -+#include "gt-i386-options.h" -diff --git a/gcc/config/i386/i386-options.h b/gcc/config/i386/i386-options.h -new file mode 100644 -index 000000000..817ddda5c ---- /dev/null -+++ b/gcc/config/i386/i386-options.h -@@ -0,0 +1,95 @@ -+/* Copyright (C) 1988-2019 Free Software Foundation, Inc. -+ -+This file is part of GCC. -+ -+GCC is free software; you can redistribute it and/or modify -+it under the terms of the GNU General Public License as published by -+the Free Software Foundation; either version 3, or (at your option) -+any later version. -+ -+GCC is distributed in the hope that it will be useful, -+but WITHOUT ANY WARRANTY; without even the implied warranty of -+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+GNU General Public License for more details. -+ -+You should have received a copy of the GNU General Public License -+along with GCC; see the file COPYING3. If not see -+. */ -+ -+#ifndef GCC_I386_OPTIONS_H -+#define GCC_I386_OPTIONS_H -+ -+char *ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2, -+ int flags, int flags2, -+ const char *arch, const char *tune, -+ enum fpmath_unit fpmath, bool add_nl_p, -+ bool add_abi_p); -+ -+extern enum attr_cpu ix86_schedule; -+ -+extern enum processor_type ix86_tune; -+extern enum processor_type ix86_arch; -+extern unsigned char x86_prefetch_sse; -+extern const struct processor_costs *ix86_tune_cost; -+ -+extern int ix86_tune_defaulted; -+extern int ix86_arch_specified; -+ -+extern unsigned int ix86_default_incoming_stack_boundary; -+extern HOST_WIDE_INT deferred_isa_values; -+extern HOST_WIDE_INT deferred_isa_values2; -+ -+extern unsigned int ix86_preferred_stack_boundary; -+extern unsigned int ix86_user_incoming_stack_boundary; -+extern unsigned int ix86_default_incoming_stack_boundary; -+extern unsigned int ix86_incoming_stack_boundary; -+ -+extern char *ix86_offload_options (void); -+extern void ix86_option_override (void); -+extern void ix86_override_options_after_change (void); -+void ix86_set_current_function (tree fndecl); -+bool ix86_function_naked (const_tree fn); -+void ix86_simd_clone_adjust (struct cgraph_node *node); -+ -+extern tree (*ix86_veclib_handler) (combined_fn, tree, tree); -+extern tree ix86_veclibabi_svml (combined_fn, tree, tree); -+extern tree ix86_veclibabi_acml (combined_fn, tree, tree); -+ -+extern rtx (*ix86_gen_leave) (void); -+extern rtx (*ix86_gen_add3) (rtx, rtx, rtx); -+extern rtx (*ix86_gen_sub3) (rtx, rtx, rtx); -+extern rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx); -+extern rtx (*ix86_gen_one_cmpl2) (rtx, rtx); -+extern rtx (*ix86_gen_monitor) (rtx, rtx, rtx); -+extern rtx (*ix86_gen_monitorx) (rtx, rtx, rtx); -+extern rtx (*ix86_gen_clzero) (rtx); -+extern rtx (*ix86_gen_andsp) (rtx, rtx, rtx); -+extern rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx); -+extern rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx); -+extern rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx); -+extern rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx); -+extern rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx); -+ -+enum ix86_function_specific_strings -+{ -+ IX86_FUNCTION_SPECIFIC_ARCH, -+ IX86_FUNCTION_SPECIFIC_TUNE, -+ IX86_FUNCTION_SPECIFIC_MAX -+}; -+ -+extern const char *stringop_alg_names[]; -+ -+void ix86_add_new_builtins (HOST_WIDE_INT isa, HOST_WIDE_INT isa2); -+void ix86_function_specific_save (struct cl_target_option *, -+ struct gcc_options *opts); -+void ix86_function_specific_restore (struct gcc_options *opts, -+ struct cl_target_option *); -+void ix86_function_specific_post_stream_in (struct cl_target_option *); -+void ix86_function_specific_print (FILE *, int, -+ struct cl_target_option *); -+bool ix86_valid_target_attribute_p (tree, tree, tree, int); -+ -+extern const struct attribute_spec ix86_attribute_table[]; -+ -+ -+#endif /* GCC_I386_OPTIONS_H */ -diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h -index 83645e89a..4afba5bc2 100644 ---- a/gcc/config/i386/i386-protos.h -+++ b/gcc/config/i386/i386-protos.h -@@ -65,7 +65,7 @@ extern int avx_vpermilp_parallel (rtx par, machine_mode mode); - extern int avx_vperm2f128_parallel (rtx par, machine_mode mode); - - extern bool ix86_expand_strlen (rtx, rtx, rtx, rtx); --extern bool ix86_expand_set_or_movmem (rtx, rtx, rtx, rtx, rtx, rtx, -+extern bool ix86_expand_set_or_cpymem (rtx, rtx, rtx, rtx, rtx, rtx, - rtx, rtx, rtx, rtx, bool); - - extern bool constant_address_p (rtx); -@@ -207,7 +207,7 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int); - #endif /* RTX_CODE */ - - #ifdef TREE_CODE --extern int ix86_data_alignment (tree, int, bool); -+extern int ix86_data_alignment (tree, unsigned int, bool); - extern unsigned int ix86_local_alignment (tree, machine_mode, - unsigned int); - extern unsigned int ix86_minimum_alignment (tree, machine_mode, -@@ -215,9 +215,9 @@ extern unsigned int ix86_minimum_alignment (tree, machine_mode, - extern tree ix86_handle_shared_attribute (tree *, tree, tree, int, bool *); - extern tree ix86_handle_selectany_attribute (tree *, tree, tree, int, bool *); - extern int x86_field_alignment (tree, int); --extern tree ix86_valid_target_attribute_tree (tree, -+extern tree ix86_valid_target_attribute_tree (tree, tree, - struct gcc_options *, -- struct gcc_options *); -+ struct gcc_options *, bool); - extern unsigned int ix86_get_callcvt (const_tree); - - #endif -diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c -index 5a0f8a0eb..9282a8fb6 100644 ---- a/gcc/config/i386/i386.c -+++ b/gcc/config/i386/i386.c -@@ -91,19 +91,17 @@ along with GCC; see the file COPYING3. If not see - #include "tree-vector-builder.h" - #include "debug.h" - #include "dwarf2out.h" -+#include "i386-options.h" -+#include "i386-builtins.h" -+#include "i386-expand.h" -+#include "i386-features.h" - - /* This file should be included last. */ - #include "target-def.h" - --#include "x86-tune-costs.h" -- - static rtx legitimize_dllimport_symbol (rtx, bool); - static rtx legitimize_pe_coff_extern_decl (rtx, bool); --static rtx legitimize_pe_coff_symbol (rtx, bool); - static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool); --static bool ix86_save_reg (unsigned int, bool, bool); --static bool ix86_function_naked (const_tree); --static bool ix86_notrack_prefixed_insn_p (rtx); - static void ix86_emit_restore_reg_using_pop (rtx); - - -@@ -126,102 +124,6 @@ const struct processor_costs *ix86_tune_cost = NULL; - /* Set by -mtune or -Os. */ - const struct processor_costs *ix86_cost = NULL; - --/* Processor feature/optimization bitmasks. */ --#define m_386 (HOST_WIDE_INT_1U<machine->call_ms2sysv_extra_regs and -- 3.) rather or not stack alignment is being performed. */ -- static rtx get_stub_rtx (enum xlogue_stub stub); -- -- /* Returns the amount of stack space (including padding) that the stub -- needs to store registers based upon data in the machine_function. */ -- HOST_WIDE_INT get_stack_space_used () const -- { -- const struct machine_function *m = cfun->machine; -- unsigned last_reg = m->call_ms2sysv_extra_regs + MIN_REGS - 1; -- -- gcc_assert (m->call_ms2sysv_extra_regs <= MAX_EXTRA_REGS); -- return m_regs[last_reg].offset + STUB_INDEX_OFFSET; -- } -- -- /* Returns the offset for the base pointer used by the stub. */ -- HOST_WIDE_INT get_stub_ptr_offset () const -- { -- return STUB_INDEX_OFFSET + m_stack_align_off_in; -- } -- -- static const struct xlogue_layout &get_instance (); -- static unsigned count_stub_managed_regs (); -- static bool is_stub_managed_reg (unsigned regno, unsigned count); -- -- static const HOST_WIDE_INT STUB_INDEX_OFFSET = 0x70; -- static const unsigned MIN_REGS = NUM_X86_64_MS_CLOBBERED_REGS; -- static const unsigned MAX_REGS = 18; -- static const unsigned MAX_EXTRA_REGS = MAX_REGS - MIN_REGS; -- static const unsigned VARIANT_COUNT = MAX_EXTRA_REGS + 1; -- static const unsigned STUB_NAME_MAX_LEN = 20; -- static const char * const STUB_BASE_NAMES[XLOGUE_STUB_COUNT]; -- static const unsigned REG_ORDER[MAX_REGS]; -- static const unsigned REG_ORDER_REALIGN[MAX_REGS]; -- --private: -- xlogue_layout (); -- xlogue_layout (HOST_WIDE_INT stack_align_off_in, bool hfp); -- xlogue_layout (const xlogue_layout &); -- -- /* True if hard frame pointer is used. */ -- bool m_hfp; -- -- /* Max number of register this layout manages. */ -- unsigned m_nregs; -- -- /* Incoming offset from 16-byte alignment. */ -- HOST_WIDE_INT m_stack_align_off_in; -- -- /* Register order and offsets. */ -- struct reginfo m_regs[MAX_REGS]; -- -- /* Lazy-inited cache of symbol names for stubs. */ -- static char s_stub_names[2][XLOGUE_STUB_COUNT][VARIANT_COUNT] -- [STUB_NAME_MAX_LEN]; -- -- static const xlogue_layout s_instances[XLOGUE_SET_COUNT]; --}; -- --const char * const xlogue_layout::STUB_BASE_NAMES[XLOGUE_STUB_COUNT] = { -- "savms64", -- "resms64", -- "resms64x", -- "savms64f", -- "resms64f", -- "resms64fx" --}; -- --const unsigned xlogue_layout::REG_ORDER[xlogue_layout::MAX_REGS] = { --/* The below offset values are where each register is stored for the layout -- relative to incoming stack pointer. The value of each m_regs[].offset will -- be relative to the incoming base pointer (rax or rsi) used by the stub. -- -- s_instances: 0 1 2 3 -- Offset: realigned or aligned + 8 -- Register aligned aligned + 8 aligned w/HFP w/HFP */ -- XMM15_REG, /* 0x10 0x18 0x10 0x18 */ -- XMM14_REG, /* 0x20 0x28 0x20 0x28 */ -- XMM13_REG, /* 0x30 0x38 0x30 0x38 */ -- XMM12_REG, /* 0x40 0x48 0x40 0x48 */ -- XMM11_REG, /* 0x50 0x58 0x50 0x58 */ -- XMM10_REG, /* 0x60 0x68 0x60 0x68 */ -- XMM9_REG, /* 0x70 0x78 0x70 0x78 */ -- XMM8_REG, /* 0x80 0x88 0x80 0x88 */ -- XMM7_REG, /* 0x90 0x98 0x90 0x98 */ -- XMM6_REG, /* 0xa0 0xa8 0xa0 0xa8 */ -- SI_REG, /* 0xa8 0xb0 0xa8 0xb0 */ -- DI_REG, /* 0xb0 0xb8 0xb0 0xb8 */ -- BX_REG, /* 0xb8 0xc0 0xb8 0xc0 */ -- BP_REG, /* 0xc0 0xc8 N/A N/A */ -- R12_REG, /* 0xc8 0xd0 0xc0 0xc8 */ -- R13_REG, /* 0xd0 0xd8 0xc8 0xd0 */ -- R14_REG, /* 0xd8 0xe0 0xd0 0xd8 */ -- R15_REG, /* 0xe0 0xe8 0xd8 0xe0 */ --}; -- --/* Instantiate static const values. */ --const HOST_WIDE_INT xlogue_layout::STUB_INDEX_OFFSET; --const unsigned xlogue_layout::MIN_REGS; --const unsigned xlogue_layout::MAX_REGS; --const unsigned xlogue_layout::MAX_EXTRA_REGS; --const unsigned xlogue_layout::VARIANT_COUNT; --const unsigned xlogue_layout::STUB_NAME_MAX_LEN; -- --/* Initialize xlogue_layout::s_stub_names to zero. */ --char xlogue_layout::s_stub_names[2][XLOGUE_STUB_COUNT][VARIANT_COUNT] -- [STUB_NAME_MAX_LEN]; -- --/* Instantiates all xlogue_layout instances. */ --const xlogue_layout xlogue_layout::s_instances[XLOGUE_SET_COUNT] = { -- xlogue_layout (0, false), -- xlogue_layout (8, false), -- xlogue_layout (0, true), -- xlogue_layout (8, true) --}; -- --/* Return an appropriate const instance of xlogue_layout based upon values -- in cfun->machine and crtl. */ --const struct xlogue_layout & --xlogue_layout::get_instance () --{ -- enum xlogue_stub_sets stub_set; -- bool aligned_plus_8 = cfun->machine->call_ms2sysv_pad_in; -- -- if (stack_realign_fp) -- stub_set = XLOGUE_SET_HFP_ALIGNED_OR_REALIGN; -- else if (frame_pointer_needed) -- stub_set = aligned_plus_8 -- ? XLOGUE_SET_HFP_ALIGNED_PLUS_8 -- : XLOGUE_SET_HFP_ALIGNED_OR_REALIGN; -- else -- stub_set = aligned_plus_8 ? XLOGUE_SET_ALIGNED_PLUS_8 : XLOGUE_SET_ALIGNED; -- -- return s_instances[stub_set]; --} -- --/* Determine how many clobbered registers can be saved by the stub. -- Returns the count of registers the stub will save and restore. */ --unsigned --xlogue_layout::count_stub_managed_regs () --{ -- bool hfp = frame_pointer_needed || stack_realign_fp; -- unsigned i, count; -- unsigned regno; -- -- for (count = i = MIN_REGS; i < MAX_REGS; ++i) -- { -- regno = REG_ORDER[i]; -- if (regno == BP_REG && hfp) -- continue; -- if (!ix86_save_reg (regno, false, false)) -- break; -- ++count; -- } -- return count; --} -- --/* Determine if register REGNO is a stub managed register given the -- total COUNT of stub managed registers. */ --bool --xlogue_layout::is_stub_managed_reg (unsigned regno, unsigned count) --{ -- bool hfp = frame_pointer_needed || stack_realign_fp; -- unsigned i; -- -- for (i = 0; i < count; ++i) -- { -- gcc_assert (i < MAX_REGS); -- if (REG_ORDER[i] == BP_REG && hfp) -- ++count; -- else if (REG_ORDER[i] == regno) -- return true; -- } -- return false; --} -- --/* Constructor for xlogue_layout. */ --xlogue_layout::xlogue_layout (HOST_WIDE_INT stack_align_off_in, bool hfp) -- : m_hfp (hfp) , m_nregs (hfp ? 17 : 18), -- m_stack_align_off_in (stack_align_off_in) --{ -- HOST_WIDE_INT offset = stack_align_off_in; -- unsigned i, j; -- -- for (i = j = 0; i < MAX_REGS; ++i) -- { -- unsigned regno = REG_ORDER[i]; -- -- if (regno == BP_REG && hfp) -- continue; -- if (SSE_REGNO_P (regno)) -- { -- offset += 16; -- /* Verify that SSE regs are always aligned. */ -- gcc_assert (!((stack_align_off_in + offset) & 15)); -- } -- else -- offset += 8; -- -- m_regs[j].regno = regno; -- m_regs[j++].offset = offset - STUB_INDEX_OFFSET; -- } -- gcc_assert (j == m_nregs); --} -- --const char * --xlogue_layout::get_stub_name (enum xlogue_stub stub, -- unsigned n_extra_regs) --{ -- const int have_avx = TARGET_AVX; -- char *name = s_stub_names[!!have_avx][stub][n_extra_regs]; -- -- /* Lazy init */ -- if (!*name) -- { -- int res = snprintf (name, STUB_NAME_MAX_LEN, "__%s_%s_%u", -- (have_avx ? "avx" : "sse"), -- STUB_BASE_NAMES[stub], -- MIN_REGS + n_extra_regs); -- gcc_checking_assert (res < (int)STUB_NAME_MAX_LEN); -- } -- -- return name; --} -- --/* Return rtx of a symbol ref for the entry point (based upon -- cfun->machine->call_ms2sysv_extra_regs) of the specified stub. */ --rtx --xlogue_layout::get_stub_rtx (enum xlogue_stub stub) --{ -- const unsigned n_extra_regs = cfun->machine->call_ms2sysv_extra_regs; -- gcc_checking_assert (n_extra_regs <= MAX_EXTRA_REGS); -- gcc_assert (stub < XLOGUE_STUB_COUNT); -- gcc_assert (crtl->stack_realign_finalized); -- -- return gen_rtx_SYMBOL_REF (Pmode, get_stub_name (stub, n_extra_regs)); --} -- - /* Define the structure for the machine field in struct function. */ - - struct GTY(()) stack_local_entry { -@@ -741,41 +349,37 @@ enum processor_type ix86_arch; - /* True if processor has SSE prefetch instruction. */ - unsigned char x86_prefetch_sse; - --/* -mstackrealign option */ --static const char ix86_force_align_arg_pointer_string[] -- = "force_align_arg_pointer"; -- --static rtx (*ix86_gen_leave) (void); --static rtx (*ix86_gen_add3) (rtx, rtx, rtx); --static rtx (*ix86_gen_sub3) (rtx, rtx, rtx); --static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx); --static rtx (*ix86_gen_one_cmpl2) (rtx, rtx); --static rtx (*ix86_gen_monitor) (rtx, rtx, rtx); --static rtx (*ix86_gen_monitorx) (rtx, rtx, rtx); --static rtx (*ix86_gen_clzero) (rtx); --static rtx (*ix86_gen_andsp) (rtx, rtx, rtx); --static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx); --static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx); --static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx); --static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx); --static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx); -+rtx (*ix86_gen_leave) (void); -+rtx (*ix86_gen_add3) (rtx, rtx, rtx); -+rtx (*ix86_gen_sub3) (rtx, rtx, rtx); -+rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx); -+rtx (*ix86_gen_one_cmpl2) (rtx, rtx); -+rtx (*ix86_gen_monitor) (rtx, rtx, rtx); -+rtx (*ix86_gen_monitorx) (rtx, rtx, rtx); -+rtx (*ix86_gen_clzero) (rtx); -+rtx (*ix86_gen_andsp) (rtx, rtx, rtx); -+rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx); -+rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx); -+rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx); -+rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx); -+rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx); - - /* Preferred alignment for stack boundary in bits. */ - unsigned int ix86_preferred_stack_boundary; - - /* Alignment for incoming stack boundary in bits specified at - command line. */ --static unsigned int ix86_user_incoming_stack_boundary; -+unsigned int ix86_user_incoming_stack_boundary; - - /* Default alignment for incoming stack boundary in bits. */ --static unsigned int ix86_default_incoming_stack_boundary; -+unsigned int ix86_default_incoming_stack_boundary; - - /* Alignment for incoming stack boundary in bits. */ - unsigned int ix86_incoming_stack_boundary; - - /* Calling abi specific va_list type nodes. */ --static GTY(()) tree sysv_va_list_type_node; --static GTY(()) tree ms_va_list_type_node; -+tree sysv_va_list_type_node; -+tree ms_va_list_type_node; - - /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */ - char internal_label_prefix[16]; -@@ -813,7 +417,6 @@ static REAL_VALUE_TYPE ext_80387_constants_table [5]; - static bool ext_80387_constants_init; - - --static struct machine_function * ix86_init_machine_status (void); - static rtx ix86_function_value (const_tree, const_tree, bool); - static bool ix86_function_value_regno_p (const unsigned int); - static unsigned int ix86_function_arg_boundary (machine_mode, -@@ -821,49173 +424,20710 @@ static unsigned int ix86_function_arg_boundary (machine_mode, - static rtx ix86_static_chain (const_tree, bool); - static int ix86_function_regparm (const_tree, const_tree); - static void ix86_compute_frame_layout (void); --static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode, -- rtx, rtx, int); --static void ix86_add_new_builtins (HOST_WIDE_INT, HOST_WIDE_INT); - static tree ix86_canonical_va_list_type (tree); --static void predict_jump (int); - static unsigned int split_stack_prologue_scratch_regno (void); - static bool i386_asm_output_addr_const_extra (FILE *, rtx); - --enum ix86_function_specific_strings --{ -- IX86_FUNCTION_SPECIFIC_ARCH, -- IX86_FUNCTION_SPECIFIC_TUNE, -- IX86_FUNCTION_SPECIFIC_MAX --}; -- --static char *ix86_target_string (HOST_WIDE_INT, HOST_WIDE_INT, int, int, -- const char *, const char *, enum fpmath_unit, -- bool, bool); --static void ix86_function_specific_save (struct cl_target_option *, -- struct gcc_options *opts); --static void ix86_function_specific_restore (struct gcc_options *opts, -- struct cl_target_option *); --static void ix86_function_specific_post_stream_in (struct cl_target_option *); --static void ix86_function_specific_print (FILE *, int, -- struct cl_target_option *); --static bool ix86_valid_target_attribute_p (tree, tree, tree, int); --static bool ix86_valid_target_attribute_inner_p (tree, char *[], -- struct gcc_options *, -- struct gcc_options *, -- struct gcc_options *); - static bool ix86_can_inline_p (tree, tree); --static void ix86_set_current_function (tree); - static unsigned int ix86_minimum_incoming_stack_boundary (bool); - --static enum calling_abi ix86_function_abi (const_tree); -- - --#ifndef SUBTARGET32_DEFAULT_CPU --#define SUBTARGET32_DEFAULT_CPU "i386" --#endif -- - /* Whether -mtune= or -march= were specified */ --static int ix86_tune_defaulted; --static int ix86_arch_specified; -- --/* Vectorization library interface and handlers. */ --static tree (*ix86_veclib_handler) (combined_fn, tree, tree); -- --static tree ix86_veclibabi_svml (combined_fn, tree, tree); --static tree ix86_veclibabi_acml (combined_fn, tree, tree); -- --/* This table must be in sync with enum processor_type in i386.h. */ --static const struct processor_costs *processor_cost_table[] = --{ -- &generic_cost, -- &i386_cost, -- &i486_cost, -- &pentium_cost, -- &lakemont_cost, -- &pentiumpro_cost, -- &pentium4_cost, -- &nocona_cost, -- &core_cost, -- &core_cost, -- &core_cost, -- &core_cost, -- &atom_cost, -- &slm_cost, -- &slm_cost, -- &slm_cost, -- &slm_cost, -- &slm_cost, -- &slm_cost, -- &skylake_cost, -- &skylake_cost, -- &skylake_cost, -- &skylake_cost, -- &skylake_cost, -- &skylake_cost, -- &intel_cost, -- &geode_cost, -- &k6_cost, -- &athlon_cost, -- &k8_cost, -- &amdfam10_cost, -- &bdver_cost, -- &bdver_cost, -- &bdver_cost, -- &bdver_cost, -- &btver1_cost, -- &btver2_cost, -- &znver1_cost, -- &znver2_cost --}; -- --/* Guarantee that the array is aligned with enum processor_type. */ --STATIC_ASSERT (ARRAY_SIZE (processor_cost_table) == PROCESSOR_max); -+int ix86_tune_defaulted; -+int ix86_arch_specified; - --static unsigned int --rest_of_handle_insert_vzeroupper (void) --{ -- int i; -- -- /* vzeroupper instructions are inserted immediately after reload to -- account for possible spills from 256bit or 512bit registers. The pass -- reuses mode switching infrastructure by re-running mode insertion -- pass, so disable entities that have already been processed. */ -- for (i = 0; i < MAX_386_ENTITIES; i++) -- ix86_optimize_mode_switching[i] = 0; -+/* Return true if a red-zone is in use. We can't use red-zone when -+ there are local indirect jumps, like "indirect_jump" or "tablejump", -+ which jumps to another place in the function, since "call" in the -+ indirect thunk pushes the return address onto stack, destroying -+ red-zone. - -- ix86_optimize_mode_switching[AVX_U128] = 1; -+ TODO: If we can reserve the first 2 WORDs, for PUSH and, another -+ for CALL, in red-zone, we can allow local indirect jumps with -+ indirect thunk. */ - -- /* Call optimize_mode_switching. */ -- g->get_passes ()->execute_pass_mode_switching (); -- return 0; -+bool -+ix86_using_red_zone (void) -+{ -+ return (TARGET_RED_ZONE -+ && !TARGET_64BIT_MS_ABI -+ && (!cfun->machine->has_local_indirect_jump -+ || cfun->machine->indirect_branch_type == indirect_branch_keep)); - } -- --/* Return 1 if INSN uses or defines a hard register. -- Hard register uses in a memory address are ignored. -- Clobbers and flags definitions are ignored. */ -- -+ -+/* Return true, if profiling code should be emitted before -+ prologue. Otherwise it returns false. -+ Note: For x86 with "hotfix" it is sorried. */ - static bool --has_non_address_hard_reg (rtx_insn *insn) -+ix86_profile_before_prologue (void) - { -- df_ref ref; -- FOR_EACH_INSN_DEF (ref, insn) -- if (HARD_REGISTER_P (DF_REF_REAL_REG (ref)) -- && !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER) -- && DF_REF_REGNO (ref) != FLAGS_REG) -- return true; -- -- FOR_EACH_INSN_USE (ref, insn) -- if (!DF_REF_REG_MEM_P (ref) && HARD_REGISTER_P (DF_REF_REAL_REG (ref))) -- return true; -- -- return false; -+ return flag_fentry != 0; - } - --/* Check if comparison INSN may be transformed -- into vector comparison. Currently we transform -- zero checks only which look like: -- -- (set (reg:CCZ 17 flags) -- (compare:CCZ (ior:SI (subreg:SI (reg:DI x) 4) -- (subreg:SI (reg:DI x) 0)) -- (const_int 0 [0]))) */ -+/* Update register usage after having seen the compiler flags. */ - --static bool --convertible_comparison_p (rtx_insn *insn) -+static void -+ix86_conditional_register_usage (void) - { -- if (!TARGET_SSE4_1) -- return false; -+ int i, c_mask; - -- rtx def_set = single_set (insn); -+ /* If there are no caller-saved registers, preserve all registers. -+ except fixed_regs and registers used for function return value -+ since aggregate_value_p checks call_used_regs[regno] on return -+ value. */ -+ if (cfun && cfun->machine->no_caller_saved_registers) -+ for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) -+ if (!fixed_regs[i] && !ix86_function_value_regno_p (i)) -+ call_used_regs[i] = 0; - -- gcc_assert (def_set); -+ /* For 32-bit targets, disable the REX registers. */ -+ if (! TARGET_64BIT) -+ { -+ for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++) -+ CLEAR_HARD_REG_BIT (accessible_reg_set, i); -+ for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) -+ CLEAR_HARD_REG_BIT (accessible_reg_set, i); -+ for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++) -+ CLEAR_HARD_REG_BIT (accessible_reg_set, i); -+ } - -- rtx src = SET_SRC (def_set); -- rtx dst = SET_DEST (def_set); -+ /* See the definition of CALL_USED_REGISTERS in i386.h. */ -+ c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI); -+ -+ CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]); - -- gcc_assert (GET_CODE (src) == COMPARE); -+ for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) -+ { -+ /* Set/reset conditionally defined registers from -+ CALL_USED_REGISTERS initializer. */ -+ if (call_used_regs[i] > 1) -+ call_used_regs[i] = !!(call_used_regs[i] & c_mask); - -- if (GET_CODE (dst) != REG -- || REGNO (dst) != FLAGS_REG -- || GET_MODE (dst) != CCZmode) -- return false; -+ /* Calculate registers of CLOBBERED_REGS register set -+ as call used registers from GENERAL_REGS register set. */ -+ if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i) -+ && call_used_regs[i]) -+ SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i); -+ } - -- rtx op1 = XEXP (src, 0); -- rtx op2 = XEXP (src, 1); -+ /* If MMX is disabled, disable the registers. */ -+ if (! TARGET_MMX) -+ accessible_reg_set &= ~reg_class_contents[MMX_REGS]; - -- if (op2 != CONST0_RTX (GET_MODE (op2))) -- return false; -+ /* If SSE is disabled, disable the registers. */ -+ if (! TARGET_SSE) -+ accessible_reg_set &= ~reg_class_contents[ALL_SSE_REGS]; - -- if (GET_CODE (op1) != IOR) -- return false; -+ /* If the FPU is disabled, disable the registers. */ -+ if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387)) -+ accessible_reg_set &= ~reg_class_contents[FLOAT_REGS]; - -- op2 = XEXP (op1, 1); -- op1 = XEXP (op1, 0); -- -- if (!SUBREG_P (op1) -- || !SUBREG_P (op2) -- || GET_MODE (op1) != SImode -- || GET_MODE (op2) != SImode -- || ((SUBREG_BYTE (op1) != 0 -- || SUBREG_BYTE (op2) != GET_MODE_SIZE (SImode)) -- && (SUBREG_BYTE (op2) != 0 -- || SUBREG_BYTE (op1) != GET_MODE_SIZE (SImode)))) -- return false; -+ /* If AVX512F is disabled, disable the registers. */ -+ if (! TARGET_AVX512F) -+ { -+ for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++) -+ CLEAR_HARD_REG_BIT (accessible_reg_set, i); - -- op1 = SUBREG_REG (op1); -- op2 = SUBREG_REG (op2); -+ accessible_reg_set &= ~reg_class_contents[ALL_MASK_REGS]; -+ } -+} - -- if (op1 != op2 -- || !REG_P (op1) -- || GET_MODE (op1) != DImode) -- return false; -+/* Canonicalize a comparison from one we don't have to one we do have. */ - -- return true; --} -+static void -+ix86_canonicalize_comparison (int *code, rtx *op0, rtx *op1, -+ bool op0_preserve_value) -+{ -+ /* The order of operands in x87 ficom compare is forced by combine in -+ simplify_comparison () function. Float operator is treated as RTX_OBJ -+ with a precedence over other operators and is always put in the first -+ place. Swap condition and operands to match ficom instruction. */ -+ if (!op0_preserve_value -+ && GET_CODE (*op0) == FLOAT && MEM_P (XEXP (*op0, 0)) && REG_P (*op1)) -+ { -+ enum rtx_code scode = swap_condition ((enum rtx_code) *code); - --/* The DImode version of scalar_to_vector_candidate_p. */ -+ /* We are called only for compares that are split to SAHF instruction. -+ Ensure that we have setcc/jcc insn for the swapped condition. */ -+ if (ix86_fp_compare_code_to_integer (scode) != UNKNOWN) -+ { -+ std::swap (*op0, *op1); -+ *code = (int) scode; -+ } -+ } -+} -+ -+ -+/* Hook to determine if one function can safely inline another. */ - - static bool --dimode_scalar_to_vector_candidate_p (rtx_insn *insn) -+ix86_can_inline_p (tree caller, tree callee) - { -- rtx def_set = single_set (insn); -- -- if (!def_set) -- return false; -+ tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller); -+ tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee); - -- if (has_non_address_hard_reg (insn)) -- return false; -+ /* Changes of those flags can be tolerated for always inlines. Lets hope -+ user knows what he is doing. */ -+ const unsigned HOST_WIDE_INT always_inline_safe_mask -+ = (MASK_USE_8BIT_IDIV | MASK_ACCUMULATE_OUTGOING_ARGS -+ | MASK_NO_ALIGN_STRINGOPS | MASK_AVX256_SPLIT_UNALIGNED_LOAD -+ | MASK_AVX256_SPLIT_UNALIGNED_STORE | MASK_CLD -+ | MASK_NO_FANCY_MATH_387 | MASK_IEEE_FP | MASK_INLINE_ALL_STRINGOPS -+ | MASK_INLINE_STRINGOPS_DYNAMICALLY | MASK_RECIP | MASK_STACK_PROBE -+ | MASK_STV | MASK_TLS_DIRECT_SEG_REFS | MASK_VZEROUPPER -+ | MASK_NO_PUSH_ARGS | MASK_OMIT_LEAF_FRAME_POINTER); - -- rtx src = SET_SRC (def_set); -- rtx dst = SET_DEST (def_set); - -- if (GET_CODE (src) == COMPARE) -- return convertible_comparison_p (insn); -+ if (!callee_tree) -+ callee_tree = target_option_default_node; -+ if (!caller_tree) -+ caller_tree = target_option_default_node; -+ if (callee_tree == caller_tree) -+ return true; - -- /* We are interested in DImode promotion only. */ -- if ((GET_MODE (src) != DImode -- && !CONST_INT_P (src)) -- || GET_MODE (dst) != DImode) -- return false; -+ struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree); -+ struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree); -+ bool ret = false; -+ bool always_inline -+ = (DECL_DISREGARD_INLINE_LIMITS (callee) -+ && lookup_attribute ("always_inline", -+ DECL_ATTRIBUTES (callee))); - -- if (!REG_P (dst) && !MEM_P (dst)) -- return false; -- -- switch (GET_CODE (src)) -- { -- case ASHIFTRT: -- if (!TARGET_AVX512VL) -- return false; -- /* FALLTHRU */ -- -- case ASHIFT: -- case LSHIFTRT: -- if (!CONST_INT_P (XEXP (src, 1)) -- || !IN_RANGE (INTVAL (XEXP (src, 1)), 0, 63)) -- return false; -- break; -- -- case PLUS: -- case MINUS: -- case IOR: -- case XOR: -- case AND: -- if (!REG_P (XEXP (src, 1)) -- && !MEM_P (XEXP (src, 1)) -- && !CONST_INT_P (XEXP (src, 1))) -- return false; -- -- if (GET_MODE (XEXP (src, 1)) != DImode -- && !CONST_INT_P (XEXP (src, 1))) -- return false; -- break; -+ cgraph_node *callee_node = cgraph_node::get (callee); -+ /* Callee's isa options should be a subset of the caller's, i.e. a SSE4 -+ function can inline a SSE2 function but a SSE2 function can't inline -+ a SSE4 function. */ -+ if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags) -+ != callee_opts->x_ix86_isa_flags) -+ || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2) -+ != callee_opts->x_ix86_isa_flags2)) -+ ret = false; - -- case NEG: -- case NOT: -- break; -+ /* See if we have the same non-isa options. */ -+ else if ((!always_inline -+ && caller_opts->x_target_flags != callee_opts->x_target_flags) -+ || (caller_opts->x_target_flags & ~always_inline_safe_mask) -+ != (callee_opts->x_target_flags & ~always_inline_safe_mask)) -+ ret = false; - -- case REG: -- return true; -+ /* See if arch, tune, etc. are the same. */ -+ else if (caller_opts->arch != callee_opts->arch) -+ ret = false; - -- case MEM: -- case CONST_INT: -- return REG_P (dst); -+ else if (!always_inline && caller_opts->tune != callee_opts->tune) -+ ret = false; - -- default: -- return false; -- } -+ else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath -+ /* If the calle doesn't use FP expressions differences in -+ ix86_fpmath can be ignored. We are called from FEs -+ for multi-versioning call optimization, so beware of -+ ipa_fn_summaries not available. */ -+ && (! ipa_fn_summaries -+ || ipa_fn_summaries->get (callee_node) == NULL -+ || ipa_fn_summaries->get (callee_node)->fp_expressions)) -+ ret = false; - -- if (!REG_P (XEXP (src, 0)) -- && !MEM_P (XEXP (src, 0)) -- && !CONST_INT_P (XEXP (src, 0)) -- /* Check for andnot case. */ -- && (GET_CODE (src) != AND -- || GET_CODE (XEXP (src, 0)) != NOT -- || !REG_P (XEXP (XEXP (src, 0), 0)))) -- return false; -+ else if (!always_inline -+ && caller_opts->branch_cost != callee_opts->branch_cost) -+ ret = false; - -- if (GET_MODE (XEXP (src, 0)) != DImode -- && !CONST_INT_P (XEXP (src, 0))) -- return false; -+ else -+ ret = true; - -- return true; -+ return ret; - } -- --/* The TImode version of scalar_to_vector_candidate_p. */ -+ -+/* Return true if this goes in large data/bss. */ - - static bool --timode_scalar_to_vector_candidate_p (rtx_insn *insn) -+ix86_in_large_data_p (tree exp) - { -- rtx def_set = single_set (insn); -- -- if (!def_set) -+ if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC) - return false; - -- if (has_non_address_hard_reg (insn)) -+ if (exp == NULL_TREE) - return false; - -- rtx src = SET_SRC (def_set); -- rtx dst = SET_DEST (def_set); -- -- /* Only TImode load and store are allowed. */ -- if (GET_MODE (dst) != TImode) -+ /* Functions are never large data. */ -+ if (TREE_CODE (exp) == FUNCTION_DECL) - return false; - -- if (MEM_P (dst)) -- { -- /* Check for store. Memory must be aligned or unaligned store -- is optimal. Only support store from register, standard SSE -- constant or CONST_WIDE_INT generated from piecewise store. -- -- ??? Verify performance impact before enabling CONST_INT for -- __int128 store. */ -- if (misaligned_operand (dst, TImode) -- && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL) -- return false; -- -- switch (GET_CODE (src)) -- { -- default: -- return false; -- -- case REG: -- case CONST_WIDE_INT: -- return true; -+ /* Automatic variables are never large data. */ -+ if (VAR_P (exp) && !is_global_var (exp)) -+ return false; - -- case CONST_INT: -- return standard_sse_constant_p (src, TImode); -- } -- } -- else if (MEM_P (src)) -+ if (VAR_P (exp) && DECL_SECTION_NAME (exp)) - { -- /* Check for load. Memory must be aligned or unaligned load is -- optimal. */ -- return (REG_P (dst) -- && (!misaligned_operand (src, TImode) -- || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)); -+ const char *section = DECL_SECTION_NAME (exp); -+ if (strcmp (section, ".ldata") == 0 -+ || strcmp (section, ".lbss") == 0) -+ return true; -+ return false; - } -- -- return false; --} -- --/* Return 1 if INSN may be converted into vector -- instruction. */ -- --static bool --scalar_to_vector_candidate_p (rtx_insn *insn) --{ -- if (TARGET_64BIT) -- return timode_scalar_to_vector_candidate_p (insn); - else -- return dimode_scalar_to_vector_candidate_p (insn); --} -- --/* The DImode version of remove_non_convertible_regs. */ -- --static void --dimode_remove_non_convertible_regs (bitmap candidates) --{ -- bitmap_iterator bi; -- unsigned id; -- bitmap regs = BITMAP_ALLOC (NULL); -- -- EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi) -- { -- rtx def_set = single_set (DF_INSN_UID_GET (id)->insn); -- rtx reg = SET_DEST (def_set); -- -- if (!REG_P (reg) -- || bitmap_bit_p (regs, REGNO (reg)) -- || HARD_REGISTER_P (reg)) -- continue; -- -- for (df_ref def = DF_REG_DEF_CHAIN (REGNO (reg)); -- def; -- def = DF_REF_NEXT_REG (def)) -- { -- if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def))) -- { -- if (dump_file) -- fprintf (dump_file, -- "r%d has non convertible definition in insn %d\n", -- REGNO (reg), DF_REF_INSN_UID (def)); -- -- bitmap_set_bit (regs, REGNO (reg)); -- break; -- } -- } -- } -- -- EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi) - { -- for (df_ref def = DF_REG_DEF_CHAIN (id); -- def; -- def = DF_REF_NEXT_REG (def)) -- if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def))) -- { -- if (dump_file) -- fprintf (dump_file, "Removing insn %d from candidates list\n", -- DF_REF_INSN_UID (def)); -+ HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp)); - -- bitmap_clear_bit (candidates, DF_REF_INSN_UID (def)); -- } -+ /* If this is an incomplete type with size 0, then we can't put it -+ in data because it might be too big when completed. Also, -+ int_size_in_bytes returns -1 if size can vary or is larger than -+ an integer in which case also it is safer to assume that it goes in -+ large data. */ -+ if (size <= 0 || size > ix86_section_threshold) -+ return true; - } - -- BITMAP_FREE (regs); -+ return false; - } - --/* For a register REGNO, scan instructions for its defs and uses. -- Put REGNO in REGS if a def or use isn't in CANDIDATES. */ -+/* i386-specific section flag to mark large sections. */ -+#define SECTION_LARGE SECTION_MACH_DEP -+ -+/* Switch to the appropriate section for output of DECL. -+ DECL is either a `VAR_DECL' node or a constant of some sort. -+ RELOC indicates whether forming the initial value of DECL requires -+ link-time relocations. */ - --static void --timode_check_non_convertible_regs (bitmap candidates, bitmap regs, -- unsigned int regno) -+ATTRIBUTE_UNUSED static section * -+x86_64_elf_select_section (tree decl, int reloc, -+ unsigned HOST_WIDE_INT align) - { -- for (df_ref def = DF_REG_DEF_CHAIN (regno); -- def; -- def = DF_REF_NEXT_REG (def)) -+ if (ix86_in_large_data_p (decl)) - { -- if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def))) -+ const char *sname = NULL; -+ unsigned int flags = SECTION_WRITE | SECTION_LARGE; -+ switch (categorize_decl_for_section (decl, reloc)) - { -- if (dump_file) -- fprintf (dump_file, -- "r%d has non convertible def in insn %d\n", -- regno, DF_REF_INSN_UID (def)); -- -- bitmap_set_bit (regs, regno); -+ case SECCAT_DATA: -+ sname = ".ldata"; -+ break; -+ case SECCAT_DATA_REL: -+ sname = ".ldata.rel"; -+ break; -+ case SECCAT_DATA_REL_LOCAL: -+ sname = ".ldata.rel.local"; -+ break; -+ case SECCAT_DATA_REL_RO: -+ sname = ".ldata.rel.ro"; -+ break; -+ case SECCAT_DATA_REL_RO_LOCAL: -+ sname = ".ldata.rel.ro.local"; -+ break; -+ case SECCAT_BSS: -+ sname = ".lbss"; -+ flags |= SECTION_BSS; -+ break; -+ case SECCAT_RODATA: -+ case SECCAT_RODATA_MERGE_STR: -+ case SECCAT_RODATA_MERGE_STR_INIT: -+ case SECCAT_RODATA_MERGE_CONST: -+ sname = ".lrodata"; -+ flags &= ~SECTION_WRITE; -+ break; -+ case SECCAT_SRODATA: -+ case SECCAT_SDATA: -+ case SECCAT_SBSS: -+ gcc_unreachable (); -+ case SECCAT_TEXT: -+ case SECCAT_TDATA: -+ case SECCAT_TBSS: -+ /* We don't split these for medium model. Place them into -+ default sections and hope for best. */ - break; - } -- } -- -- for (df_ref ref = DF_REG_USE_CHAIN (regno); -- ref; -- ref = DF_REF_NEXT_REG (ref)) -- { -- /* Debug instructions are skipped. */ -- if (NONDEBUG_INSN_P (DF_REF_INSN (ref)) -- && !bitmap_bit_p (candidates, DF_REF_INSN_UID (ref))) -+ if (sname) - { -- if (dump_file) -- fprintf (dump_file, -- "r%d has non convertible use in insn %d\n", -- regno, DF_REF_INSN_UID (ref)); -- -- bitmap_set_bit (regs, regno); -- break; -+ /* We might get called with string constants, but get_named_section -+ doesn't like them as they are not DECLs. Also, we need to set -+ flags in that case. */ -+ if (!DECL_P (decl)) -+ return get_section (sname, flags, NULL); -+ return get_named_section (decl, sname, reloc); - } - } -+ return default_elf_select_section (decl, reloc, align); - } - --/* The TImode version of remove_non_convertible_regs. */ -+/* Select a set of attributes for section NAME based on the properties -+ of DECL and whether or not RELOC indicates that DECL's initializer -+ might contain runtime relocations. */ - --static void --timode_remove_non_convertible_regs (bitmap candidates) -+static unsigned int ATTRIBUTE_UNUSED -+x86_64_elf_section_type_flags (tree decl, const char *name, int reloc) - { -- bitmap_iterator bi; -- unsigned id; -- bitmap regs = BITMAP_ALLOC (NULL); -- -- EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi) -- { -- rtx def_set = single_set (DF_INSN_UID_GET (id)->insn); -- rtx dest = SET_DEST (def_set); -- rtx src = SET_SRC (def_set); -- -- if ((!REG_P (dest) -- || bitmap_bit_p (regs, REGNO (dest)) -- || HARD_REGISTER_P (dest)) -- && (!REG_P (src) -- || bitmap_bit_p (regs, REGNO (src)) -- || HARD_REGISTER_P (src))) -- continue; -- -- if (REG_P (dest)) -- timode_check_non_convertible_regs (candidates, regs, -- REGNO (dest)); -- -- if (REG_P (src)) -- timode_check_non_convertible_regs (candidates, regs, -- REGNO (src)); -- } -- -- EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi) -- { -- for (df_ref def = DF_REG_DEF_CHAIN (id); -- def; -- def = DF_REF_NEXT_REG (def)) -- if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def))) -- { -- if (dump_file) -- fprintf (dump_file, "Removing insn %d from candidates list\n", -- DF_REF_INSN_UID (def)); -+ unsigned int flags = default_section_type_flags (decl, name, reloc); - -- bitmap_clear_bit (candidates, DF_REF_INSN_UID (def)); -- } -+ if (ix86_in_large_data_p (decl)) -+ flags |= SECTION_LARGE; - -- for (df_ref ref = DF_REG_USE_CHAIN (id); -- ref; -- ref = DF_REF_NEXT_REG (ref)) -- if (bitmap_bit_p (candidates, DF_REF_INSN_UID (ref))) -- { -- if (dump_file) -- fprintf (dump_file, "Removing insn %d from candidates list\n", -- DF_REF_INSN_UID (ref)); -+ if (decl == NULL_TREE -+ && (strcmp (name, ".ldata.rel.ro") == 0 -+ || strcmp (name, ".ldata.rel.ro.local") == 0)) -+ flags |= SECTION_RELRO; - -- bitmap_clear_bit (candidates, DF_REF_INSN_UID (ref)); -- } -- } -+ if (strcmp (name, ".lbss") == 0 -+ || strncmp (name, ".lbss.", 5) == 0 -+ || strncmp (name, ".gnu.linkonce.lb.", 16) == 0) -+ flags |= SECTION_BSS; - -- BITMAP_FREE (regs); -+ return flags; - } - --/* For a given bitmap of insn UIDs scans all instruction and -- remove insn from CANDIDATES in case it has both convertible -- and not convertible definitions. -- -- All insns in a bitmap are conversion candidates according to -- scalar_to_vector_candidate_p. Currently it implies all insns -- are single_set. */ -- --static void --remove_non_convertible_regs (bitmap candidates) --{ -- if (TARGET_64BIT) -- timode_remove_non_convertible_regs (candidates); -- else -- dimode_remove_non_convertible_regs (candidates); --} -- --class scalar_chain --{ -- public: -- scalar_chain (); -- virtual ~scalar_chain (); -- -- static unsigned max_id; -- -- /* ID of a chain. */ -- unsigned int chain_id; -- /* A queue of instructions to be included into a chain. */ -- bitmap queue; -- /* Instructions included into a chain. */ -- bitmap insns; -- /* All registers defined by a chain. */ -- bitmap defs; -- /* Registers used in both vector and sclar modes. */ -- bitmap defs_conv; -- -- void build (bitmap candidates, unsigned insn_uid); -- virtual int compute_convert_gain () = 0; -- int convert (); -- -- protected: -- void add_to_queue (unsigned insn_uid); -- void emit_conversion_insns (rtx insns, rtx_insn *pos); -- -- private: -- void add_insn (bitmap candidates, unsigned insn_uid); -- void analyze_register_chain (bitmap candidates, df_ref ref); -- virtual void mark_dual_mode_def (df_ref def) = 0; -- virtual void convert_insn (rtx_insn *insn) = 0; -- virtual void convert_registers () = 0; --}; -- --class dimode_scalar_chain : public scalar_chain --{ -- public: -- int compute_convert_gain (); -- private: -- void mark_dual_mode_def (df_ref def); -- rtx replace_with_subreg (rtx x, rtx reg, rtx subreg); -- void replace_with_subreg_in_insn (rtx_insn *insn, rtx reg, rtx subreg); -- void convert_insn (rtx_insn *insn); -- void convert_op (rtx *op, rtx_insn *insn); -- void convert_reg (unsigned regno); -- void make_vector_copies (unsigned regno); -- void convert_registers (); -- int vector_const_cost (rtx exp); --}; -+/* Build up a unique section name, expressed as a -+ STRING_CST node, and assign it to DECL_SECTION_NAME (decl). -+ RELOC indicates whether the initial value of EXP requires -+ link-time relocations. */ - --class timode_scalar_chain : public scalar_chain -+static void ATTRIBUTE_UNUSED -+x86_64_elf_unique_section (tree decl, int reloc) - { -- public: -- /* Convert from TImode to V1TImode is always faster. */ -- int compute_convert_gain () { return 1; } -- -- private: -- void mark_dual_mode_def (df_ref def); -- void fix_debug_reg_uses (rtx reg); -- void convert_insn (rtx_insn *insn); -- /* We don't convert registers to difference size. */ -- void convert_registers () {} --}; -- --unsigned scalar_chain::max_id = 0; -- --/* Initialize new chain. */ -+ if (ix86_in_large_data_p (decl)) -+ { -+ const char *prefix = NULL; -+ /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */ -+ bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP; - --scalar_chain::scalar_chain () --{ -- chain_id = ++max_id; -+ switch (categorize_decl_for_section (decl, reloc)) -+ { -+ case SECCAT_DATA: -+ case SECCAT_DATA_REL: -+ case SECCAT_DATA_REL_LOCAL: -+ case SECCAT_DATA_REL_RO: -+ case SECCAT_DATA_REL_RO_LOCAL: -+ prefix = one_only ? ".ld" : ".ldata"; -+ break; -+ case SECCAT_BSS: -+ prefix = one_only ? ".lb" : ".lbss"; -+ break; -+ case SECCAT_RODATA: -+ case SECCAT_RODATA_MERGE_STR: -+ case SECCAT_RODATA_MERGE_STR_INIT: -+ case SECCAT_RODATA_MERGE_CONST: -+ prefix = one_only ? ".lr" : ".lrodata"; -+ break; -+ case SECCAT_SRODATA: -+ case SECCAT_SDATA: -+ case SECCAT_SBSS: -+ gcc_unreachable (); -+ case SECCAT_TEXT: -+ case SECCAT_TDATA: -+ case SECCAT_TBSS: -+ /* We don't split these for medium model. Place them into -+ default sections and hope for best. */ -+ break; -+ } -+ if (prefix) -+ { -+ const char *name, *linkonce; -+ char *string; - -- if (dump_file) -- fprintf (dump_file, "Created a new instruction chain #%d\n", chain_id); -+ name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); -+ name = targetm.strip_name_encoding (name); - -- bitmap_obstack_initialize (NULL); -- insns = BITMAP_ALLOC (NULL); -- defs = BITMAP_ALLOC (NULL); -- defs_conv = BITMAP_ALLOC (NULL); -- queue = NULL; --} -+ /* If we're using one_only, then there needs to be a .gnu.linkonce -+ prefix to the section name. */ -+ linkonce = one_only ? ".gnu.linkonce" : ""; - --/* Free chain's data. */ -+ string = ACONCAT ((linkonce, prefix, ".", name, NULL)); - --scalar_chain::~scalar_chain () --{ -- BITMAP_FREE (insns); -- BITMAP_FREE (defs); -- BITMAP_FREE (defs_conv); -- bitmap_obstack_release (NULL); -+ set_decl_section_name (decl, string); -+ return; -+ } -+ } -+ default_unique_section (decl, reloc); - } - --/* Add instruction into chains' queue. */ -- --void --scalar_chain::add_to_queue (unsigned insn_uid) --{ -- if (bitmap_bit_p (insns, insn_uid) -- || bitmap_bit_p (queue, insn_uid)) -- return; -+#ifdef COMMON_ASM_OP - -- if (dump_file) -- fprintf (dump_file, " Adding insn %d into chain's #%d queue\n", -- insn_uid, chain_id); -- bitmap_set_bit (queue, insn_uid); --} -+#ifndef LARGECOMM_SECTION_ASM_OP -+#define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t" -+#endif - --/* For DImode conversion, mark register defined by DEF as requiring -- conversion. */ -+/* This says how to output assembler code to declare an -+ uninitialized external linkage data object. - -+ For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for -+ large objects. */ - void --dimode_scalar_chain::mark_dual_mode_def (df_ref def) -+x86_elf_aligned_decl_common (FILE *file, tree decl, -+ const char *name, unsigned HOST_WIDE_INT size, -+ int align) - { -- gcc_assert (DF_REF_REG_DEF_P (def)); -- -- if (bitmap_bit_p (defs_conv, DF_REF_REGNO (def))) -- return; -- -- if (dump_file) -- fprintf (dump_file, -- " Mark r%d def in insn %d as requiring both modes in chain #%d\n", -- DF_REF_REGNO (def), DF_REF_INSN_UID (def), chain_id); -- -- bitmap_set_bit (defs_conv, DF_REF_REGNO (def)); -+ if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) -+ && size > (unsigned int)ix86_section_threshold) -+ { -+ switch_to_section (get_named_section (decl, ".lbss", 0)); -+ fputs (LARGECOMM_SECTION_ASM_OP, file); -+ } -+ else -+ fputs (COMMON_ASM_OP, file); -+ assemble_name (file, name); -+ fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n", -+ size, align / BITS_PER_UNIT); - } -+#endif - --/* For TImode conversion, it is unused. */ -+/* Utility function for targets to use in implementing -+ ASM_OUTPUT_ALIGNED_BSS. */ - - void --timode_scalar_chain::mark_dual_mode_def (df_ref) -+x86_output_aligned_bss (FILE *file, tree decl, const char *name, -+ unsigned HOST_WIDE_INT size, int align) - { -- gcc_unreachable (); -+ if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) -+ && size > (unsigned int)ix86_section_threshold) -+ switch_to_section (get_named_section (decl, ".lbss", 0)); -+ else -+ switch_to_section (bss_section); -+ ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT)); -+#ifdef ASM_DECLARE_OBJECT_NAME -+ last_assemble_variable_decl = decl; -+ ASM_DECLARE_OBJECT_NAME (file, name, decl); -+#else -+ /* Standard thing is just output label for the object. */ -+ ASM_OUTPUT_LABEL (file, name); -+#endif /* ASM_DECLARE_OBJECT_NAME */ -+ ASM_OUTPUT_SKIP (file, size ? size : 1); - } -+ -+/* Decide whether we must probe the stack before any space allocation -+ on this target. It's essentially TARGET_STACK_PROBE except when -+ -fstack-check causes the stack to be already probed differently. */ - --/* Check REF's chain to add new insns into a queue -- and find registers requiring conversion. */ -- --void --scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref) -+bool -+ix86_target_stack_probe (void) - { -- df_link *chain; -- -- gcc_assert (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)) -- || bitmap_bit_p (candidates, DF_REF_INSN_UID (ref))); -- add_to_queue (DF_REF_INSN_UID (ref)); -- -- for (chain = DF_REF_CHAIN (ref); chain; chain = chain->next) -- { -- unsigned uid = DF_REF_INSN_UID (chain->ref); -- -- if (!NONDEBUG_INSN_P (DF_REF_INSN (chain->ref))) -- continue; -- -- if (!DF_REF_REG_MEM_P (chain->ref)) -- { -- if (bitmap_bit_p (insns, uid)) -- continue; -- -- if (bitmap_bit_p (candidates, uid)) -- { -- add_to_queue (uid); -- continue; -- } -- } -+ /* Do not probe the stack twice if static stack checking is enabled. */ -+ if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK) -+ return false; - -- if (DF_REF_REG_DEF_P (chain->ref)) -- { -- if (dump_file) -- fprintf (dump_file, " r%d def in insn %d isn't convertible\n", -- DF_REF_REGNO (chain->ref), uid); -- mark_dual_mode_def (chain->ref); -- } -- else -- { -- if (dump_file) -- fprintf (dump_file, " r%d use in insn %d isn't convertible\n", -- DF_REF_REGNO (chain->ref), uid); -- mark_dual_mode_def (ref); -- } -- } -+ return TARGET_STACK_PROBE; - } -+ -+/* Decide whether we can make a sibling call to a function. DECL is the -+ declaration of the function being targeted by the call and EXP is the -+ CALL_EXPR representing the call. */ - --/* Add instruction into a chain. */ -- --void --scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid) -+static bool -+ix86_function_ok_for_sibcall (tree decl, tree exp) - { -- if (bitmap_bit_p (insns, insn_uid)) -- return; -- -- if (dump_file) -- fprintf (dump_file, " Adding insn %d to chain #%d\n", insn_uid, chain_id); -- -- bitmap_set_bit (insns, insn_uid); -+ tree type, decl_or_type; -+ rtx a, b; -+ bool bind_global = decl && !targetm.binds_local_p (decl); - -- rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn; -- rtx def_set = single_set (insn); -- if (def_set && REG_P (SET_DEST (def_set)) -- && !HARD_REGISTER_P (SET_DEST (def_set))) -- bitmap_set_bit (defs, REGNO (SET_DEST (def_set))); -+ if (ix86_function_naked (current_function_decl)) -+ return false; - -- df_ref ref; -- df_ref def; -- for (ref = DF_INSN_UID_DEFS (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref)) -- if (!HARD_REGISTER_P (DF_REF_REG (ref))) -- for (def = DF_REG_DEF_CHAIN (DF_REF_REGNO (ref)); -- def; -- def = DF_REF_NEXT_REG (def)) -- analyze_register_chain (candidates, def); -- for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref)) -- if (!DF_REF_REG_MEM_P (ref)) -- analyze_register_chain (candidates, ref); --} -+ /* Sibling call isn't OK if there are no caller-saved registers -+ since all registers must be preserved before return. */ -+ if (cfun->machine->no_caller_saved_registers) -+ return false; - --/* Build new chain starting from insn INSN_UID recursively -- adding all dependent uses and definitions. */ -+ /* If we are generating position-independent code, we cannot sibcall -+ optimize direct calls to global functions, as the PLT requires -+ %ebx be live. (Darwin does not have a PLT.) */ -+ if (!TARGET_MACHO -+ && !TARGET_64BIT -+ && flag_pic -+ && flag_plt -+ && bind_global) -+ return false; - --void --scalar_chain::build (bitmap candidates, unsigned insn_uid) --{ -- queue = BITMAP_ALLOC (NULL); -- bitmap_set_bit (queue, insn_uid); -+ /* If we need to align the outgoing stack, then sibcalling would -+ unalign the stack, which may break the called function. */ -+ if (ix86_minimum_incoming_stack_boundary (true) -+ < PREFERRED_STACK_BOUNDARY) -+ return false; - -- if (dump_file) -- fprintf (dump_file, "Building chain #%d...\n", chain_id); -+ if (decl) -+ { -+ decl_or_type = decl; -+ type = TREE_TYPE (decl); -+ } -+ else -+ { -+ /* We're looking at the CALL_EXPR, we need the type of the function. */ -+ type = CALL_EXPR_FN (exp); /* pointer expression */ -+ type = TREE_TYPE (type); /* pointer type */ -+ type = TREE_TYPE (type); /* function type */ -+ decl_or_type = type; -+ } - -- while (!bitmap_empty_p (queue)) -+ /* Check that the return value locations are the same. Like -+ if we are returning floats on the 80387 register stack, we cannot -+ make a sibcall from a function that doesn't return a float to a -+ function that does or, conversely, from a function that does return -+ a float to a function that doesn't; the necessary stack adjustment -+ would not be executed. This is also the place we notice -+ differences in the return value ABI. Note that it is ok for one -+ of the functions to have void return type as long as the return -+ value of the other is passed in a register. */ -+ a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false); -+ b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)), -+ cfun->decl, false); -+ if (STACK_REG_P (a) || STACK_REG_P (b)) - { -- insn_uid = bitmap_first_set_bit (queue); -- bitmap_clear_bit (queue, insn_uid); -- bitmap_clear_bit (candidates, insn_uid); -- add_insn (candidates, insn_uid); -+ if (!rtx_equal_p (a, b)) -+ return false; - } -+ else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl)))) -+ ; -+ else if (!rtx_equal_p (a, b)) -+ return false; - -- if (dump_file) -+ if (TARGET_64BIT) -+ { -+ /* The SYSV ABI has more call-clobbered registers; -+ disallow sibcalls from MS to SYSV. */ -+ if (cfun->machine->call_abi == MS_ABI -+ && ix86_function_type_abi (type) == SYSV_ABI) -+ return false; -+ } -+ else - { -- fprintf (dump_file, "Collected chain #%d...\n", chain_id); -- fprintf (dump_file, " insns: "); -- dump_bitmap (dump_file, insns); -- if (!bitmap_empty_p (defs_conv)) -+ /* If this call is indirect, we'll need to be able to use a -+ call-clobbered register for the address of the target function. -+ Make sure that all such registers are not used for passing -+ parameters. Note that DLLIMPORT functions and call to global -+ function via GOT slot are indirect. */ -+ if (!decl -+ || (bind_global && flag_pic && !flag_plt) -+ || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)) -+ || flag_force_indirect_call) - { -- bitmap_iterator bi; -- unsigned id; -- const char *comma = ""; -- fprintf (dump_file, " defs to convert: "); -- EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi) -- { -- fprintf (dump_file, "%sr%d", comma, id); -- comma = ", "; -- } -- fprintf (dump_file, "\n"); -+ /* Check if regparm >= 3 since arg_reg_available is set to -+ false if regparm == 0. If regparm is 1 or 2, there is -+ always a call-clobbered register available. -+ -+ ??? The symbol indirect call doesn't need a call-clobbered -+ register. But we don't know if this is a symbol indirect -+ call or not here. */ -+ if (ix86_function_regparm (type, decl) >= 3 -+ && !cfun->machine->arg_reg_available) -+ return false; - } - } - -- BITMAP_FREE (queue); -+ /* Otherwise okay. That also includes certain types of indirect calls. */ -+ return true; - } - --/* Return a cost of building a vector costant -- instead of using a scalar one. */ -+/* This function determines from TYPE the calling-convention. */ - --int --dimode_scalar_chain::vector_const_cost (rtx exp) -+unsigned int -+ix86_get_callcvt (const_tree type) - { -- gcc_assert (CONST_INT_P (exp)); -+ unsigned int ret = 0; -+ bool is_stdarg; -+ tree attrs; - -- if (standard_sse_constant_p (exp, V2DImode)) -- return COSTS_N_INSNS (1); -- return ix86_cost->sse_load[1]; --} -+ if (TARGET_64BIT) -+ return IX86_CALLCVT_CDECL; - --/* Compute a gain for chain conversion. */ -+ attrs = TYPE_ATTRIBUTES (type); -+ if (attrs != NULL_TREE) -+ { -+ if (lookup_attribute ("cdecl", attrs)) -+ ret |= IX86_CALLCVT_CDECL; -+ else if (lookup_attribute ("stdcall", attrs)) -+ ret |= IX86_CALLCVT_STDCALL; -+ else if (lookup_attribute ("fastcall", attrs)) -+ ret |= IX86_CALLCVT_FASTCALL; -+ else if (lookup_attribute ("thiscall", attrs)) -+ ret |= IX86_CALLCVT_THISCALL; - --int --dimode_scalar_chain::compute_convert_gain () --{ -- bitmap_iterator bi; -- unsigned insn_uid; -- int gain = 0; -- int cost = 0; -- -- if (dump_file) -- fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id); -- -- EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi) -- { -- rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn; -- rtx def_set = single_set (insn); -- rtx src = SET_SRC (def_set); -- rtx dst = SET_DEST (def_set); -- -- if (REG_P (src) && REG_P (dst)) -- gain += COSTS_N_INSNS (2) - ix86_cost->xmm_move; -- else if (REG_P (src) && MEM_P (dst)) -- gain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1]; -- else if (MEM_P (src) && REG_P (dst)) -- gain += 2 * ix86_cost->int_load[2] - ix86_cost->sse_load[1]; -- else if (GET_CODE (src) == ASHIFT -- || GET_CODE (src) == ASHIFTRT -- || GET_CODE (src) == LSHIFTRT) -- { -- if (CONST_INT_P (XEXP (src, 0))) -- gain -= vector_const_cost (XEXP (src, 0)); -- -- gain += ix86_cost->shift_const; -- if (INTVAL (XEXP (src, 1)) >= 32) -- gain -= COSTS_N_INSNS (1); -- } -- else if (GET_CODE (src) == PLUS -- || GET_CODE (src) == MINUS -- || GET_CODE (src) == IOR -- || GET_CODE (src) == XOR -- || GET_CODE (src) == AND) -- { -- gain += ix86_cost->add; -- /* Additional gain for andnot for targets without BMI. */ -- if (GET_CODE (XEXP (src, 0)) == NOT -- && !TARGET_BMI) -- gain += 2 * ix86_cost->add; -- -- if (CONST_INT_P (XEXP (src, 0))) -- gain -= vector_const_cost (XEXP (src, 0)); -- if (CONST_INT_P (XEXP (src, 1))) -- gain -= vector_const_cost (XEXP (src, 1)); -- } -- else if (GET_CODE (src) == NEG -- || GET_CODE (src) == NOT) -- gain += ix86_cost->add - COSTS_N_INSNS (1); -- else if (GET_CODE (src) == COMPARE) -- { -- /* Assume comparison cost is the same. */ -- } -- else if (CONST_INT_P (src)) -- { -- if (REG_P (dst)) -- gain += COSTS_N_INSNS (2); -- else if (MEM_P (dst)) -- gain += 2 * ix86_cost->int_store[2] - ix86_cost->sse_store[1]; -- gain -= vector_const_cost (src); -- } -- else -- gcc_unreachable (); -- } -- -- if (dump_file) -- fprintf (dump_file, " Instruction conversion gain: %d\n", gain); -- -- EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, insn_uid, bi) -- cost += DF_REG_DEF_COUNT (insn_uid) * ix86_cost->mmxsse_to_integer; -+ /* Regparam isn't allowed for thiscall and fastcall. */ -+ if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0) -+ { -+ if (lookup_attribute ("regparm", attrs)) -+ ret |= IX86_CALLCVT_REGPARM; -+ if (lookup_attribute ("sseregparm", attrs)) -+ ret |= IX86_CALLCVT_SSEREGPARM; -+ } - -- if (dump_file) -- fprintf (dump_file, " Registers conversion cost: %d\n", cost); -+ if (IX86_BASE_CALLCVT(ret) != 0) -+ return ret; -+ } - -- gain -= cost; -+ is_stdarg = stdarg_p (type); -+ if (TARGET_RTD && !is_stdarg) -+ return IX86_CALLCVT_STDCALL | ret; - -- if (dump_file) -- fprintf (dump_file, " Total gain: %d\n", gain); -+ if (ret != 0 -+ || is_stdarg -+ || TREE_CODE (type) != METHOD_TYPE -+ || ix86_function_type_abi (type) != MS_ABI) -+ return IX86_CALLCVT_CDECL | ret; - -- return gain; -+ return IX86_CALLCVT_THISCALL; - } - --/* Replace REG in X with a V2DI subreg of NEW_REG. */ -+/* Return 0 if the attributes for two types are incompatible, 1 if they -+ are compatible, and 2 if they are nearly compatible (which causes a -+ warning to be generated). */ - --rtx --dimode_scalar_chain::replace_with_subreg (rtx x, rtx reg, rtx new_reg) -+static int -+ix86_comp_type_attributes (const_tree type1, const_tree type2) - { -- if (x == reg) -- return gen_rtx_SUBREG (V2DImode, new_reg, 0); -+ unsigned int ccvt1, ccvt2; - -- const char *fmt = GET_RTX_FORMAT (GET_CODE (x)); -- int i, j; -- for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) -- { -- if (fmt[i] == 'e') -- XEXP (x, i) = replace_with_subreg (XEXP (x, i), reg, new_reg); -- else if (fmt[i] == 'E') -- for (j = XVECLEN (x, i) - 1; j >= 0; j--) -- XVECEXP (x, i, j) = replace_with_subreg (XVECEXP (x, i, j), -- reg, new_reg); -- } -+ if (TREE_CODE (type1) != FUNCTION_TYPE -+ && TREE_CODE (type1) != METHOD_TYPE) -+ return 1; - -- return x; --} -+ ccvt1 = ix86_get_callcvt (type1); -+ ccvt2 = ix86_get_callcvt (type2); -+ if (ccvt1 != ccvt2) -+ return 0; -+ if (ix86_function_regparm (type1, NULL) -+ != ix86_function_regparm (type2, NULL)) -+ return 0; - --/* Replace REG in INSN with a V2DI subreg of NEW_REG. */ -+ return 1; -+} -+ -+/* Return the regparm value for a function with the indicated TYPE and DECL. -+ DECL may be NULL when calling function indirectly -+ or considering a libcall. */ - --void --dimode_scalar_chain::replace_with_subreg_in_insn (rtx_insn *insn, -- rtx reg, rtx new_reg) -+static int -+ix86_function_regparm (const_tree type, const_tree decl) - { -- replace_with_subreg (single_set (insn), reg, new_reg); --} -+ tree attr; -+ int regparm; -+ unsigned int ccvt; - --/* Insert generated conversion instruction sequence INSNS -- after instruction AFTER. New BB may be required in case -- instruction has EH region attached. */ -+ if (TARGET_64BIT) -+ return (ix86_function_type_abi (type) == SYSV_ABI -+ ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX); -+ ccvt = ix86_get_callcvt (type); -+ regparm = ix86_regparm; - --void --scalar_chain::emit_conversion_insns (rtx insns, rtx_insn *after) --{ -- if (!control_flow_insn_p (after)) -+ if ((ccvt & IX86_CALLCVT_REGPARM) != 0) - { -- emit_insn_after (insns, after); -- return; -+ attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type)); -+ if (attr) -+ { -+ regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))); -+ return regparm; -+ } - } -+ else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) -+ return 2; -+ else if ((ccvt & IX86_CALLCVT_THISCALL) != 0) -+ return 1; - -- basic_block bb = BLOCK_FOR_INSN (after); -- edge e = find_fallthru_edge (bb->succs); -- gcc_assert (e); -+ /* Use register calling convention for local functions when possible. */ -+ if (decl -+ && TREE_CODE (decl) == FUNCTION_DECL) -+ { -+ cgraph_node *target = cgraph_node::get (decl); -+ if (target) -+ target = target->function_symbol (); - -- basic_block new_bb = split_edge (e); -- emit_insn_after (insns, BB_HEAD (new_bb)); --} -+ /* Caller and callee must agree on the calling convention, so -+ checking here just optimize means that with -+ __attribute__((optimize (...))) caller could use regparm convention -+ and callee not, or vice versa. Instead look at whether the callee -+ is optimized or not. */ -+ if (target && opt_for_fn (target->decl, optimize) -+ && !(profile_flag && !flag_fentry)) -+ { -+ cgraph_local_info *i = &target->local; -+ if (i && i->local && i->can_change_signature) -+ { -+ int local_regparm, globals = 0, regno; - --/* Make vector copies for all register REGNO definitions -- and replace its uses in a chain. */ -+ /* Make sure no regparm register is taken by a -+ fixed register variable. */ -+ for (local_regparm = 0; local_regparm < REGPARM_MAX; -+ local_regparm++) -+ if (fixed_regs[local_regparm]) -+ break; - --void --dimode_scalar_chain::make_vector_copies (unsigned regno) --{ -- rtx reg = regno_reg_rtx[regno]; -- rtx vreg = gen_reg_rtx (DImode); -- df_ref ref; -+ /* We don't want to use regparm(3) for nested functions as -+ these use a static chain pointer in the third argument. */ -+ if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl)) -+ local_regparm = 2; - -- for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref)) -- if (!bitmap_bit_p (insns, DF_REF_INSN_UID (ref))) -- { -- start_sequence (); -+ /* Save a register for the split stack. */ -+ if (flag_split_stack) -+ { -+ if (local_regparm == 3) -+ local_regparm = 2; -+ else if (local_regparm == 2 -+ && DECL_STATIC_CHAIN (target->decl)) -+ local_regparm = 1; -+ } - -- if (!TARGET_INTER_UNIT_MOVES_TO_VEC) -- { -- rtx tmp = assign_386_stack_local (DImode, SLOT_STV_TEMP); -- emit_move_insn (adjust_address (tmp, SImode, 0), -- gen_rtx_SUBREG (SImode, reg, 0)); -- emit_move_insn (adjust_address (tmp, SImode, 4), -- gen_rtx_SUBREG (SImode, reg, 4)); -- emit_move_insn (vreg, tmp); -- } -- else if (TARGET_SSE4_1) -- { -- emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0), -- CONST0_RTX (V4SImode), -- gen_rtx_SUBREG (SImode, reg, 0))); -- emit_insn (gen_sse4_1_pinsrd (gen_rtx_SUBREG (V4SImode, vreg, 0), -- gen_rtx_SUBREG (V4SImode, vreg, 0), -- gen_rtx_SUBREG (SImode, reg, 4), -- GEN_INT (2))); -- } -- else -- { -- rtx tmp = gen_reg_rtx (DImode); -- emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0), -- CONST0_RTX (V4SImode), -- gen_rtx_SUBREG (SImode, reg, 0))); -- emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, tmp, 0), -- CONST0_RTX (V4SImode), -- gen_rtx_SUBREG (SImode, reg, 4))); -- emit_insn (gen_vec_interleave_lowv4si -- (gen_rtx_SUBREG (V4SImode, vreg, 0), -- gen_rtx_SUBREG (V4SImode, vreg, 0), -- gen_rtx_SUBREG (V4SImode, tmp, 0))); -- } -- rtx_insn *seq = get_insns (); -- end_sequence (); -- rtx_insn *insn = DF_REF_INSN (ref); -- emit_conversion_insns (seq, insn); -- -- if (dump_file) -- fprintf (dump_file, -- " Copied r%d to a vector register r%d for insn %d\n", -- regno, REGNO (vreg), INSN_UID (insn)); -- } -+ /* Each fixed register usage increases register pressure, -+ so less registers should be used for argument passing. -+ This functionality can be overriden by an explicit -+ regparm value. */ -+ for (regno = AX_REG; regno <= DI_REG; regno++) -+ if (fixed_regs[regno]) -+ globals++; - -- for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref)) -- if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref))) -- { -- rtx_insn *insn = DF_REF_INSN (ref); -+ local_regparm -+ = globals < local_regparm ? local_regparm - globals : 0; - -- replace_with_subreg_in_insn (insn, reg, vreg); -+ if (local_regparm > regparm) -+ regparm = local_regparm; -+ } -+ } -+ } - -- if (dump_file) -- fprintf (dump_file, " Replaced r%d with r%d in insn %d\n", -- regno, REGNO (vreg), INSN_UID (insn)); -- } -+ return regparm; - } - --/* Convert all definitions of register REGNO -- and fix its uses. Scalar copies may be created -- in case register is used in not convertible insn. */ -+/* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and -+ DFmode (2) arguments in SSE registers for a function with the -+ indicated TYPE and DECL. DECL may be NULL when calling function -+ indirectly or considering a libcall. Return -1 if any FP parameter -+ should be rejected by error. This is used in siutation we imply SSE -+ calling convetion but the function is called from another function with -+ SSE disabled. Otherwise return 0. */ - --void --dimode_scalar_chain::convert_reg (unsigned regno) -+static int -+ix86_function_sseregparm (const_tree type, const_tree decl, bool warn) - { -- bool scalar_copy = bitmap_bit_p (defs_conv, regno); -- rtx reg = regno_reg_rtx[regno]; -- rtx scopy = NULL_RTX; -- df_ref ref; -- bitmap conv; -- -- conv = BITMAP_ALLOC (NULL); -- bitmap_copy (conv, insns); -- -- if (scalar_copy) -- scopy = gen_reg_rtx (DImode); -+ gcc_assert (!TARGET_64BIT); - -- for (ref = DF_REG_DEF_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref)) -+ /* Use SSE registers to pass SFmode and DFmode arguments if requested -+ by the sseregparm attribute. */ -+ if (TARGET_SSEREGPARM -+ || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type)))) - { -- rtx_insn *insn = DF_REF_INSN (ref); -- rtx def_set = single_set (insn); -- rtx src = SET_SRC (def_set); -- rtx reg = DF_REF_REG (ref); -- -- if (!MEM_P (src)) -- { -- replace_with_subreg_in_insn (insn, reg, reg); -- bitmap_clear_bit (conv, INSN_UID (insn)); -- } -- -- if (scalar_copy) -+ if (!TARGET_SSE) - { -- start_sequence (); -- if (!TARGET_INTER_UNIT_MOVES_FROM_VEC) -- { -- rtx tmp = assign_386_stack_local (DImode, SLOT_STV_TEMP); -- emit_move_insn (tmp, reg); -- emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0), -- adjust_address (tmp, SImode, 0)); -- emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4), -- adjust_address (tmp, SImode, 4)); -- } -- else if (TARGET_SSE4_1) -- { -- rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx)); -- emit_insn -- (gen_rtx_SET -- (gen_rtx_SUBREG (SImode, scopy, 0), -- gen_rtx_VEC_SELECT (SImode, -- gen_rtx_SUBREG (V4SImode, reg, 0), tmp))); -- -- tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const1_rtx)); -- emit_insn -- (gen_rtx_SET -- (gen_rtx_SUBREG (SImode, scopy, 4), -- gen_rtx_VEC_SELECT (SImode, -- gen_rtx_SUBREG (V4SImode, reg, 0), tmp))); -- } -- else -+ if (warn) - { -- rtx vcopy = gen_reg_rtx (V2DImode); -- emit_move_insn (vcopy, gen_rtx_SUBREG (V2DImode, reg, 0)); -- emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 0), -- gen_rtx_SUBREG (SImode, vcopy, 0)); -- emit_move_insn (vcopy, -- gen_rtx_LSHIFTRT (V2DImode, vcopy, GEN_INT (32))); -- emit_move_insn (gen_rtx_SUBREG (SImode, scopy, 4), -- gen_rtx_SUBREG (SImode, vcopy, 0)); -+ if (decl) -+ error ("calling %qD with attribute sseregparm without " -+ "SSE/SSE2 enabled", decl); -+ else -+ error ("calling %qT with attribute sseregparm without " -+ "SSE/SSE2 enabled", type); - } -- rtx_insn *seq = get_insns (); -- end_sequence (); -- emit_conversion_insns (seq, insn); -- -- if (dump_file) -- fprintf (dump_file, -- " Copied r%d to a scalar register r%d for insn %d\n", -- regno, REGNO (scopy), INSN_UID (insn)); -+ return 0; - } -+ -+ return 2; - } - -- for (ref = DF_REG_USE_CHAIN (regno); ref; ref = DF_REF_NEXT_REG (ref)) -- if (bitmap_bit_p (insns, DF_REF_INSN_UID (ref))) -- { -- if (bitmap_bit_p (conv, DF_REF_INSN_UID (ref))) -- { -- rtx_insn *insn = DF_REF_INSN (ref); -+ if (!decl) -+ return 0; - -- rtx def_set = single_set (insn); -- gcc_assert (def_set); -+ cgraph_node *target = cgraph_node::get (decl); -+ if (target) -+ target = target->function_symbol (); - -- rtx src = SET_SRC (def_set); -- rtx dst = SET_DEST (def_set); -+ /* For local functions, pass up to SSE_REGPARM_MAX SFmode -+ (and DFmode for SSE2) arguments in SSE registers. */ -+ if (target -+ /* TARGET_SSE_MATH */ -+ && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE) -+ && opt_for_fn (target->decl, optimize) -+ && !(profile_flag && !flag_fentry)) -+ { -+ cgraph_local_info *i = &target->local; -+ if (i && i->local && i->can_change_signature) -+ { -+ /* Refuse to produce wrong code when local function with SSE enabled -+ is called from SSE disabled function. -+ FIXME: We need a way to detect these cases cross-ltrans partition -+ and avoid using SSE calling conventions on local functions called -+ from function with SSE disabled. For now at least delay the -+ warning until we know we are going to produce wrong code. -+ See PR66047 */ -+ if (!TARGET_SSE && warn) -+ return -1; -+ return TARGET_SSE2_P (target_opts_for_fn (target->decl) -+ ->x_ix86_isa_flags) ? 2 : 1; -+ } -+ } - -- if (!MEM_P (dst) || !REG_P (src)) -- replace_with_subreg_in_insn (insn, reg, reg); -+ return 0; -+} - -- bitmap_clear_bit (conv, INSN_UID (insn)); -- } -- } -- /* Skip debug insns and uninitialized uses. */ -- else if (DF_REF_CHAIN (ref) -- && NONDEBUG_INSN_P (DF_REF_INSN (ref))) -- { -- gcc_assert (scopy); -- replace_rtx (DF_REF_INSN (ref), reg, scopy); -- df_insn_rescan (DF_REF_INSN (ref)); -- } -+/* Return true if EAX is live at the start of the function. Used by -+ ix86_expand_prologue to determine if we need special help before -+ calling allocate_stack_worker. */ - -- BITMAP_FREE (conv); -+static bool -+ix86_eax_live_at_start_p (void) -+{ -+ /* Cheat. Don't bother working forward from ix86_function_regparm -+ to the function type to whether an actual argument is located in -+ eax. Instead just look at cfg info, which is still close enough -+ to correct at this point. This gives false positives for broken -+ functions that might use uninitialized data that happens to be -+ allocated in eax, but who cares? */ -+ return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0); - } - --/* Convert operand OP in INSN. We should handle -- memory operands and uninitialized registers. -- All other register uses are converted during -- registers conversion. */ -- --void --dimode_scalar_chain::convert_op (rtx *op, rtx_insn *insn) -+static bool -+ix86_keep_aggregate_return_pointer (tree fntype) - { -- *op = copy_rtx_if_shared (*op); -+ tree attr; - -- if (GET_CODE (*op) == NOT) -- { -- convert_op (&XEXP (*op, 0), insn); -- PUT_MODE (*op, V2DImode); -- } -- else if (MEM_P (*op)) -+ if (!TARGET_64BIT) - { -- rtx tmp = gen_reg_rtx (DImode); -- -- emit_insn_before (gen_move_insn (tmp, *op), insn); -- *op = gen_rtx_SUBREG (V2DImode, tmp, 0); -+ attr = lookup_attribute ("callee_pop_aggregate_return", -+ TYPE_ATTRIBUTES (fntype)); -+ if (attr) -+ return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0); - -- if (dump_file) -- fprintf (dump_file, " Preloading operand for insn %d into r%d\n", -- INSN_UID (insn), REGNO (tmp)); -- } -- else if (REG_P (*op)) -- { -- /* We may have not converted register usage in case -- this register has no definition. Otherwise it -- should be converted in convert_reg. */ -- df_ref ref; -- FOR_EACH_INSN_USE (ref, insn) -- if (DF_REF_REGNO (ref) == REGNO (*op)) -- { -- gcc_assert (!DF_REF_CHAIN (ref)); -- break; -- } -- *op = gen_rtx_SUBREG (V2DImode, *op, 0); -- } -- else if (CONST_INT_P (*op)) -- { -- rtx vec_cst; -- rtx tmp = gen_rtx_SUBREG (V2DImode, gen_reg_rtx (DImode), 0); -- -- /* Prefer all ones vector in case of -1. */ -- if (constm1_operand (*op, GET_MODE (*op))) -- vec_cst = CONSTM1_RTX (V2DImode); -- else -- vec_cst = gen_rtx_CONST_VECTOR (V2DImode, -- gen_rtvec (2, *op, const0_rtx)); -- -- if (!standard_sse_constant_p (vec_cst, V2DImode)) -- { -- start_sequence (); -- vec_cst = validize_mem (force_const_mem (V2DImode, vec_cst)); -- rtx_insn *seq = get_insns (); -- end_sequence (); -- emit_insn_before (seq, insn); -- } -- -- emit_insn_before (gen_move_insn (copy_rtx (tmp), vec_cst), insn); -- *op = tmp; -- } -- else -- { -- gcc_assert (SUBREG_P (*op)); -- gcc_assert (GET_MODE (*op) == V2DImode); -+ /* For 32-bit MS-ABI the default is to keep aggregate -+ return pointer. */ -+ if (ix86_function_type_abi (fntype) == MS_ABI) -+ return true; - } -+ return KEEP_AGGREGATE_RETURN_POINTER != 0; - } - --/* Convert INSN to vector mode. */ -- --void --dimode_scalar_chain::convert_insn (rtx_insn *insn) --{ -- rtx def_set = single_set (insn); -- rtx src = SET_SRC (def_set); -- rtx dst = SET_DEST (def_set); -- rtx subreg; -+/* Value is the number of bytes of arguments automatically -+ popped when returning from a subroutine call. -+ FUNDECL is the declaration node of the function (as a tree), -+ FUNTYPE is the data type of the function (as a tree), -+ or for a library call it is an identifier node for the subroutine name. -+ SIZE is the number of bytes of arguments passed on the stack. - -- if (MEM_P (dst) && !REG_P (src)) -- { -- /* There are no scalar integer instructions and therefore -- temporary register usage is required. */ -- rtx tmp = gen_reg_rtx (DImode); -- emit_conversion_insns (gen_move_insn (dst, tmp), insn); -- dst = gen_rtx_SUBREG (V2DImode, tmp, 0); -- } -+ On the 80386, the RTD insn may be used to pop them if the number -+ of args is fixed, but if the number is variable then the caller -+ must pop them all. RTD can't be used for library calls now -+ because the library is compiled with the Unix compiler. -+ Use of RTD is a selectable option, since it is incompatible with -+ standard Unix calling sequences. If the option is not selected, -+ the caller must always pop the args. - -- switch (GET_CODE (src)) -- { -- case ASHIFT: -- case ASHIFTRT: -- case LSHIFTRT: -- convert_op (&XEXP (src, 0), insn); -- PUT_MODE (src, V2DImode); -- break; -+ The attribute stdcall is equivalent to RTD on a per module basis. */ - -- case PLUS: -- case MINUS: -- case IOR: -- case XOR: -- case AND: -- convert_op (&XEXP (src, 0), insn); -- convert_op (&XEXP (src, 1), insn); -- PUT_MODE (src, V2DImode); -- break; -+static poly_int64 -+ix86_return_pops_args (tree fundecl, tree funtype, poly_int64 size) -+{ -+ unsigned int ccvt; - -- case NEG: -- src = XEXP (src, 0); -- convert_op (&src, insn); -- subreg = gen_reg_rtx (V2DImode); -- emit_insn_before (gen_move_insn (subreg, CONST0_RTX (V2DImode)), insn); -- src = gen_rtx_MINUS (V2DImode, subreg, src); -- break; -+ /* None of the 64-bit ABIs pop arguments. */ -+ if (TARGET_64BIT) -+ return 0; - -- case NOT: -- src = XEXP (src, 0); -- convert_op (&src, insn); -- subreg = gen_reg_rtx (V2DImode); -- emit_insn_before (gen_move_insn (subreg, CONSTM1_RTX (V2DImode)), insn); -- src = gen_rtx_XOR (V2DImode, src, subreg); -- break; -+ ccvt = ix86_get_callcvt (funtype); - -- case MEM: -- if (!REG_P (dst)) -- convert_op (&src, insn); -- break; -+ if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL -+ | IX86_CALLCVT_THISCALL)) != 0 -+ && ! stdarg_p (funtype)) -+ return size; - -- case REG: -- if (!MEM_P (dst)) -- convert_op (&src, insn); -- break; -+ /* Lose any fake structure return argument if it is passed on the stack. */ -+ if (aggregate_value_p (TREE_TYPE (funtype), fundecl) -+ && !ix86_keep_aggregate_return_pointer (funtype)) -+ { -+ int nregs = ix86_function_regparm (funtype, fundecl); -+ if (nregs == 0) -+ return GET_MODE_SIZE (Pmode); -+ } - -- case SUBREG: -- gcc_assert (GET_MODE (src) == V2DImode); -- break; -+ return 0; -+} - -- case COMPARE: -- src = SUBREG_REG (XEXP (XEXP (src, 0), 0)); -+/* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */ - -- gcc_assert ((REG_P (src) && GET_MODE (src) == DImode) -- || (SUBREG_P (src) && GET_MODE (src) == V2DImode)); -+static bool -+ix86_legitimate_combined_insn (rtx_insn *insn) -+{ -+ int i; - -- if (REG_P (src)) -- subreg = gen_rtx_SUBREG (V2DImode, src, 0); -- else -- subreg = copy_rtx_if_shared (src); -- emit_insn_before (gen_vec_interleave_lowv2di (copy_rtx_if_shared (subreg), -- copy_rtx_if_shared (subreg), -- copy_rtx_if_shared (subreg)), -- insn); -- dst = gen_rtx_REG (CCmode, FLAGS_REG); -- src = gen_rtx_UNSPEC (CCmode, gen_rtvec (2, copy_rtx_if_shared (src), -- copy_rtx_if_shared (src)), -- UNSPEC_PTEST); -- break; -+ /* Check operand constraints in case hard registers were propagated -+ into insn pattern. This check prevents combine pass from -+ generating insn patterns with invalid hard register operands. -+ These invalid insns can eventually confuse reload to error out -+ with a spill failure. See also PRs 46829 and 46843. */ - -- case CONST_INT: -- convert_op (&src, insn); -- break; -+ gcc_assert (INSN_CODE (insn) >= 0); - -- default: -- gcc_unreachable (); -- } -+ extract_insn (insn); -+ preprocess_constraints (insn); - -- SET_SRC (def_set) = src; -- SET_DEST (def_set) = dst; -+ int n_operands = recog_data.n_operands; -+ int n_alternatives = recog_data.n_alternatives; -+ for (i = 0; i < n_operands; i++) -+ { -+ rtx op = recog_data.operand[i]; -+ machine_mode mode = GET_MODE (op); -+ const operand_alternative *op_alt; -+ int offset = 0; -+ bool win; -+ int j; - -- /* Drop possible dead definitions. */ -- PATTERN (insn) = def_set; -+ /* A unary operator may be accepted by the predicate, but it -+ is irrelevant for matching constraints. */ -+ if (UNARY_P (op)) -+ op = XEXP (op, 0); - -- INSN_CODE (insn) = -1; -- recog_memoized (insn); -- df_insn_rescan (insn); --} -+ if (SUBREG_P (op)) -+ { -+ if (REG_P (SUBREG_REG (op)) -+ && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER) -+ offset = subreg_regno_offset (REGNO (SUBREG_REG (op)), -+ GET_MODE (SUBREG_REG (op)), -+ SUBREG_BYTE (op), -+ GET_MODE (op)); -+ op = SUBREG_REG (op); -+ } - --/* Fix uses of converted REG in debug insns. */ -+ if (!(REG_P (op) && HARD_REGISTER_P (op))) -+ continue; - --void --timode_scalar_chain::fix_debug_reg_uses (rtx reg) --{ -- if (!flag_var_tracking) -- return; -+ op_alt = recog_op_alt; - -- df_ref ref, next; -- for (ref = DF_REG_USE_CHAIN (REGNO (reg)); ref; ref = next) -- { -- rtx_insn *insn = DF_REF_INSN (ref); -- /* Make sure the next ref is for a different instruction, -- so that we're not affected by the rescan. */ -- next = DF_REF_NEXT_REG (ref); -- while (next && DF_REF_INSN (next) == insn) -- next = DF_REF_NEXT_REG (next); -+ /* Operand has no constraints, anything is OK. */ -+ win = !n_alternatives; - -- if (DEBUG_INSN_P (insn)) -+ alternative_mask preferred = get_preferred_alternatives (insn); -+ for (j = 0; j < n_alternatives; j++, op_alt += n_operands) - { -- /* It may be a debug insn with a TImode variable in -- register. */ -- bool changed = false; -- for (; ref != next; ref = DF_REF_NEXT_REG (ref)) -+ if (!TEST_BIT (preferred, j)) -+ continue; -+ if (op_alt[i].anything_ok -+ || (op_alt[i].matches != -1 -+ && operands_match_p -+ (recog_data.operand[i], -+ recog_data.operand[op_alt[i].matches])) -+ || reg_fits_class_p (op, op_alt[i].cl, offset, mode)) - { -- rtx *loc = DF_REF_LOC (ref); -- if (REG_P (*loc) && GET_MODE (*loc) == V1TImode) -- { -- *loc = gen_rtx_SUBREG (TImode, *loc, 0); -- changed = true; -- } -+ win = true; -+ break; - } -- if (changed) -- df_insn_rescan (insn); - } -+ -+ if (!win) -+ return false; - } -+ -+ return true; - } -+ -+/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */ - --/* Convert INSN from TImode to V1T1mode. */ -+static unsigned HOST_WIDE_INT -+ix86_asan_shadow_offset (void) -+{ -+ return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44) -+ : HOST_WIDE_INT_C (0x7fff8000)) -+ : (HOST_WIDE_INT_1 << 29); -+} -+ -+/* Argument support functions. */ - --void --timode_scalar_chain::convert_insn (rtx_insn *insn) -+/* Return true when register may be used to pass function parameters. */ -+bool -+ix86_function_arg_regno_p (int regno) - { -- rtx def_set = single_set (insn); -- rtx src = SET_SRC (def_set); -- rtx dst = SET_DEST (def_set); -+ int i; -+ enum calling_abi call_abi; -+ const int *parm_regs; - -- switch (GET_CODE (dst)) -+ if (!TARGET_64BIT) - { -- case REG: -- { -- rtx tmp = find_reg_equal_equiv_note (insn); -- if (tmp) -- PUT_MODE (XEXP (tmp, 0), V1TImode); -- PUT_MODE (dst, V1TImode); -- fix_debug_reg_uses (dst); -- } -- break; -- case MEM: -- PUT_MODE (dst, V1TImode); -- break; -- -- default: -- gcc_unreachable (); -+ if (TARGET_MACHO) -+ return (regno < REGPARM_MAX -+ || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno])); -+ else -+ return (regno < REGPARM_MAX -+ || (TARGET_MMX && MMX_REGNO_P (regno) -+ && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX)) -+ || (TARGET_SSE && SSE_REGNO_P (regno) -+ && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))); - } - -- switch (GET_CODE (src)) -- { -- case REG: -- PUT_MODE (src, V1TImode); -- /* Call fix_debug_reg_uses only if SRC is never defined. */ -- if (!DF_REG_DEF_CHAIN (REGNO (src))) -- fix_debug_reg_uses (src); -- break; -- -- case MEM: -- PUT_MODE (src, V1TImode); -- break; -- -- case CONST_WIDE_INT: -- if (NONDEBUG_INSN_P (insn)) -- { -- /* Since there are no instructions to store 128-bit constant, -- temporary register usage is required. */ -- rtx tmp = gen_reg_rtx (V1TImode); -- start_sequence (); -- src = gen_rtx_CONST_VECTOR (V1TImode, gen_rtvec (1, src)); -- src = validize_mem (force_const_mem (V1TImode, src)); -- rtx_insn *seq = get_insns (); -- end_sequence (); -- if (seq) -- emit_insn_before (seq, insn); -- emit_conversion_insns (gen_rtx_SET (dst, tmp), insn); -- dst = tmp; -- } -- break; -- -- case CONST_INT: -- switch (standard_sse_constant_p (src, TImode)) -- { -- case 1: -- src = CONST0_RTX (GET_MODE (dst)); -- break; -- case 2: -- src = CONSTM1_RTX (GET_MODE (dst)); -- break; -- default: -- gcc_unreachable (); -- } -- if (NONDEBUG_INSN_P (insn)) -- { -- rtx tmp = gen_reg_rtx (V1TImode); -- /* Since there are no instructions to store standard SSE -- constant, temporary register usage is required. */ -- emit_conversion_insns (gen_rtx_SET (dst, tmp), insn); -- dst = tmp; -- } -- break; -+ if (TARGET_SSE && SSE_REGNO_P (regno) -+ && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)) -+ return true; - -- default: -- gcc_unreachable (); -- } -+ /* TODO: The function should depend on current function ABI but -+ builtins.c would need updating then. Therefore we use the -+ default ABI. */ -+ call_abi = ix86_cfun_abi (); - -- SET_SRC (def_set) = src; -- SET_DEST (def_set) = dst; -+ /* RAX is used as hidden argument to va_arg functions. */ -+ if (call_abi == SYSV_ABI && regno == AX_REG) -+ return true; - -- /* Drop possible dead definitions. */ -- PATTERN (insn) = def_set; -+ if (call_abi == MS_ABI) -+ parm_regs = x86_64_ms_abi_int_parameter_registers; -+ else -+ parm_regs = x86_64_int_parameter_registers; - -- INSN_CODE (insn) = -1; -- recog_memoized (insn); -- df_insn_rescan (insn); -+ for (i = 0; i < (call_abi == MS_ABI -+ ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++) -+ if (regno == parm_regs[i]) -+ return true; -+ return false; - } - --void --dimode_scalar_chain::convert_registers () --{ -- bitmap_iterator bi; -- unsigned id; -+/* Return if we do not know how to pass ARG solely in registers. */ - -- EXECUTE_IF_SET_IN_BITMAP (defs, 0, id, bi) -- convert_reg (id); -+static bool -+ix86_must_pass_in_stack (const function_arg_info &arg) -+{ -+ if (must_pass_in_stack_var_size_or_pad (arg)) -+ return true; - -- EXECUTE_IF_AND_COMPL_IN_BITMAP (defs_conv, defs, 0, id, bi) -- make_vector_copies (id); -+ /* For 32-bit, we want TImode aggregates to go on the stack. But watch out! -+ The layout_type routine is crafty and tries to trick us into passing -+ currently unsupported vector types on the stack by using TImode. */ -+ return (!TARGET_64BIT && arg.mode == TImode -+ && arg.type && TREE_CODE (arg.type) != VECTOR_TYPE); - } - --/* Convert whole chain creating required register -- conversions and copies. */ -- -+/* It returns the size, in bytes, of the area reserved for arguments passed -+ in registers for the function represented by fndecl dependent to the used -+ abi format. */ - int --scalar_chain::convert () -+ix86_reg_parm_stack_space (const_tree fndecl) - { -- bitmap_iterator bi; -- unsigned id; -- int converted_insns = 0; -- -- if (!dbg_cnt (stv_conversion)) -- return 0; -- -- if (dump_file) -- fprintf (dump_file, "Converting chain #%d...\n", chain_id); -+ enum calling_abi call_abi = SYSV_ABI; -+ if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL) -+ call_abi = ix86_function_abi (fndecl); -+ else -+ call_abi = ix86_function_type_abi (fndecl); -+ if (TARGET_64BIT && call_abi == MS_ABI) -+ return 32; -+ return 0; -+} - -- convert_registers (); -- -- EXECUTE_IF_SET_IN_BITMAP (insns, 0, id, bi) -- { -- convert_insn (DF_INSN_UID_GET (id)->insn); -- converted_insns++; -- } -- -- return converted_insns; -+/* We add this as a workaround in order to use libc_has_function -+ hook in i386.md. */ -+bool -+ix86_libc_has_function (enum function_class fn_class) -+{ -+ return targetm.libc_has_function (fn_class); - } - --/* Main STV pass function. Find and convert scalar -- instructions into vector mode when profitable. */ -- --static unsigned int --convert_scalars_to_vector () -+/* Returns value SYSV_ABI, MS_ABI dependent on fntype, -+ specifying the call abi used. */ -+enum calling_abi -+ix86_function_type_abi (const_tree fntype) - { -- basic_block bb; -- bitmap candidates; -- int converted_insns = 0; -- -- bitmap_obstack_initialize (NULL); -- candidates = BITMAP_ALLOC (NULL); -- -- calculate_dominance_info (CDI_DOMINATORS); -- df_set_flags (DF_DEFER_INSN_RESCAN); -- df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN); -- df_md_add_problem (); -- df_analyze (); -- -- /* Find all instructions we want to convert into vector mode. */ -- if (dump_file) -- fprintf (dump_file, "Searching for mode conversion candidates...\n"); -- -- FOR_EACH_BB_FN (bb, cfun) -- { -- rtx_insn *insn; -- FOR_BB_INSNS (bb, insn) -- if (scalar_to_vector_candidate_p (insn)) -- { -- if (dump_file) -- fprintf (dump_file, " insn %d is marked as a candidate\n", -- INSN_UID (insn)); -- -- bitmap_set_bit (candidates, INSN_UID (insn)); -- } -- } -- -- remove_non_convertible_regs (candidates); -+ enum calling_abi abi = ix86_abi; - -- if (bitmap_empty_p (candidates)) -- if (dump_file) -- fprintf (dump_file, "There are no candidates for optimization.\n"); -+ if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE) -+ return abi; - -- while (!bitmap_empty_p (candidates)) -+ if (abi == SYSV_ABI -+ && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype))) - { -- unsigned uid = bitmap_first_set_bit (candidates); -- scalar_chain *chain; -- -- if (TARGET_64BIT) -- chain = new timode_scalar_chain; -- else -- chain = new dimode_scalar_chain; -- -- /* Find instructions chain we want to convert to vector mode. -- Check all uses and definitions to estimate all required -- conversions. */ -- chain->build (candidates, uid); -- -- if (chain->compute_convert_gain () > 0) -- converted_insns += chain->convert (); -- else -- if (dump_file) -- fprintf (dump_file, "Chain #%d conversion is not profitable\n", -- chain->chain_id); -- -- delete chain; -- } -- -- if (dump_file) -- fprintf (dump_file, "Total insns converted: %d\n", converted_insns); -- -- BITMAP_FREE (candidates); -- bitmap_obstack_release (NULL); -- df_process_deferred_rescans (); -+ static int warned; -+ if (TARGET_X32 && !warned) -+ { -+ error ("X32 does not support % attribute"); -+ warned = 1; -+ } - -- /* Conversion means we may have 128bit register spills/fills -- which require aligned stack. */ -- if (converted_insns) -- { -- if (crtl->stack_alignment_needed < 128) -- crtl->stack_alignment_needed = 128; -- if (crtl->stack_alignment_estimated < 128) -- crtl->stack_alignment_estimated = 128; -- /* Fix up DECL_RTL/DECL_INCOMING_RTL of arguments. */ -- if (TARGET_64BIT) -- for (tree parm = DECL_ARGUMENTS (current_function_decl); -- parm; parm = DECL_CHAIN (parm)) -- { -- if (TYPE_MODE (TREE_TYPE (parm)) != TImode) -- continue; -- if (DECL_RTL_SET_P (parm) -- && GET_MODE (DECL_RTL (parm)) == V1TImode) -- { -- rtx r = DECL_RTL (parm); -- if (REG_P (r)) -- SET_DECL_RTL (parm, gen_rtx_SUBREG (TImode, r, 0)); -- } -- if (DECL_INCOMING_RTL (parm) -- && GET_MODE (DECL_INCOMING_RTL (parm)) == V1TImode) -- { -- rtx r = DECL_INCOMING_RTL (parm); -- if (REG_P (r)) -- DECL_INCOMING_RTL (parm) = gen_rtx_SUBREG (TImode, r, 0); -- } -- } -+ abi = MS_ABI; - } -+ else if (abi == MS_ABI -+ && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype))) -+ abi = SYSV_ABI; - -- return 0; -+ return abi; - } - --namespace { -- --const pass_data pass_data_insert_vzeroupper = -+enum calling_abi -+ix86_function_abi (const_tree fndecl) - { -- RTL_PASS, /* type */ -- "vzeroupper", /* name */ -- OPTGROUP_NONE, /* optinfo_flags */ -- TV_MACH_DEP, /* tv_id */ -- 0, /* properties_required */ -- 0, /* properties_provided */ -- 0, /* properties_destroyed */ -- 0, /* todo_flags_start */ -- TODO_df_finish, /* todo_flags_finish */ --}; -+ return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi; -+} - --class pass_insert_vzeroupper : public rtl_opt_pass -+/* Returns value SYSV_ABI, MS_ABI dependent on cfun, -+ specifying the call abi used. */ -+enum calling_abi -+ix86_cfun_abi (void) - { --public: -- pass_insert_vzeroupper(gcc::context *ctxt) -- : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt) -- {} -+ return cfun ? cfun->machine->call_abi : ix86_abi; -+} - -- /* opt_pass methods: */ -- virtual bool gate (function *) -+bool -+ix86_function_ms_hook_prologue (const_tree fn) -+{ -+ if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn))) - { -- return TARGET_AVX -- && TARGET_VZEROUPPER && flag_expensive_optimizations -- && !optimize_size; -+ if (decl_function_context (fn) != NULL_TREE) -+ error_at (DECL_SOURCE_LOCATION (fn), -+ "% attribute is not compatible " -+ "with nested function"); -+ else -+ return true; - } -+ return false; -+} - -- virtual unsigned int execute (function *) -- { -- return rest_of_handle_insert_vzeroupper (); -- } -+bool -+ix86_function_naked (const_tree fn) -+{ -+ if (fn && lookup_attribute ("naked", DECL_ATTRIBUTES (fn))) -+ return true; - --}; // class pass_insert_vzeroupper -+ return false; -+} - --const pass_data pass_data_stv = --{ -- RTL_PASS, /* type */ -- "stv", /* name */ -- OPTGROUP_NONE, /* optinfo_flags */ -- TV_MACH_DEP, /* tv_id */ -- 0, /* properties_required */ -- 0, /* properties_provided */ -- 0, /* properties_destroyed */ -- 0, /* todo_flags_start */ -- TODO_df_finish, /* todo_flags_finish */ --}; -+/* Write the extra assembler code needed to declare a function properly. */ - --class pass_stv : public rtl_opt_pass -+void -+ix86_asm_output_function_label (FILE *asm_out_file, const char *fname, -+ tree decl) - { --public: -- pass_stv (gcc::context *ctxt) -- : rtl_opt_pass (pass_data_stv, ctxt), -- timode_p (false) -- {} -+ bool is_ms_hook = ix86_function_ms_hook_prologue (decl); - -- /* opt_pass methods: */ -- virtual bool gate (function *) -+ if (is_ms_hook) - { -- return (timode_p == !!TARGET_64BIT -- && TARGET_STV && TARGET_SSE2 && optimize > 1); -- } -+ int i, filler_count = (TARGET_64BIT ? 32 : 16); -+ unsigned int filler_cc = 0xcccccccc; - -- virtual unsigned int execute (function *) -- { -- return convert_scalars_to_vector (); -+ for (i = 0; i < filler_count; i += 4) -+ fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc); - } - -- opt_pass *clone () -- { -- return new pass_stv (m_ctxt); -- } -+#ifdef SUBTARGET_ASM_UNWIND_INIT -+ SUBTARGET_ASM_UNWIND_INIT (asm_out_file); -+#endif -+ -+ ASM_OUTPUT_LABEL (asm_out_file, fname); - -- void set_pass_param (unsigned int n, bool param) -+ /* Output magic byte marker, if hot-patch attribute is set. */ -+ if (is_ms_hook) - { -- gcc_assert (n == 0); -- timode_p = param; -+ if (TARGET_64BIT) -+ { -+ /* leaq [%rsp + 0], %rsp */ -+ fputs (ASM_BYTE "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n", -+ asm_out_file); -+ } -+ else -+ { -+ /* movl.s %edi, %edi -+ push %ebp -+ movl.s %esp, %ebp */ -+ fputs (ASM_BYTE "0x8b, 0xff, 0x55, 0x8b, 0xec\n", asm_out_file); -+ } - } -+} - --private: -- bool timode_p; --}; // class pass_stv -- --} // anon namespace -- --rtl_opt_pass * --make_pass_insert_vzeroupper (gcc::context *ctxt) -+/* Implementation of call abi switching target hook. Specific to FNDECL -+ the specific call register sets are set. See also -+ ix86_conditional_register_usage for more details. */ -+void -+ix86_call_abi_override (const_tree fndecl) - { -- return new pass_insert_vzeroupper (ctxt); -+ cfun->machine->call_abi = ix86_function_abi (fndecl); - } - --rtl_opt_pass * --make_pass_stv (gcc::context *ctxt) -+/* Return 1 if pseudo register should be created and used to hold -+ GOT address for PIC code. */ -+bool -+ix86_use_pseudo_pic_reg (void) - { -- return new pass_stv (ctxt); -+ if ((TARGET_64BIT -+ && (ix86_cmodel == CM_SMALL_PIC -+ || TARGET_PECOFF)) -+ || !flag_pic) -+ return false; -+ return true; - } - --/* Inserting ENDBRANCH instructions. */ -+/* Initialize large model PIC register. */ - --static unsigned int --rest_of_insert_endbranch (void) -+static void -+ix86_init_large_pic_reg (unsigned int tmp_regno) - { -- timevar_push (TV_MACH_DEP); -- -- rtx cet_eb; -- rtx_insn *insn; -- basic_block bb; -- -- /* Currently emit EB if it's a tracking function, i.e. 'nocf_check' is -- absent among function attributes. Later an optimization will be -- introduced to make analysis if an address of a static function is -- taken. A static function whose address is not taken will get a -- nocf_check attribute. This will allow to reduce the number of EB. */ -- -- if (!lookup_attribute ("nocf_check", -- TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))) -- && (!flag_manual_endbr -- || lookup_attribute ("cf_check", -- DECL_ATTRIBUTES (cfun->decl))) -- && !cgraph_node::get (cfun->decl)->only_called_directly_p ()) -- { -- /* Queue ENDBR insertion to x86_function_profiler. */ -- if (crtl->profile && flag_fentry) -- cfun->machine->endbr_queued_at_entrance = true; -- else -- { -- cet_eb = gen_nop_endbr (); -- -- bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb; -- insn = BB_HEAD (bb); -- emit_insn_before (cet_eb, insn); -- } -- } -- -- bb = 0; -- FOR_EACH_BB_FN (bb, cfun) -- { -- for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); -- insn = NEXT_INSN (insn)) -- { -- if (CALL_P (insn)) -- { -- bool need_endbr; -- need_endbr = find_reg_note (insn, REG_SETJMP, NULL) != NULL; -- if (!need_endbr && !SIBLING_CALL_P (insn)) -- { -- rtx call = get_call_rtx_from (insn); -- rtx fnaddr = XEXP (call, 0); -- tree fndecl = NULL_TREE; -- -- /* Also generate ENDBRANCH for non-tail call which -- may return via indirect branch. */ -- if (GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF) -- fndecl = SYMBOL_REF_DECL (XEXP (fnaddr, 0)); -- if (fndecl == NULL_TREE) -- fndecl = MEM_EXPR (fnaddr); -- if (fndecl -- && TREE_CODE (TREE_TYPE (fndecl)) != FUNCTION_TYPE -- && TREE_CODE (TREE_TYPE (fndecl)) != METHOD_TYPE) -- fndecl = NULL_TREE; -- if (fndecl && TYPE_ARG_TYPES (TREE_TYPE (fndecl))) -- { -- tree fntype = TREE_TYPE (fndecl); -- if (lookup_attribute ("indirect_return", -- TYPE_ATTRIBUTES (fntype))) -- need_endbr = true; -- } -- } -- if (!need_endbr) -- continue; -- /* Generate ENDBRANCH after CALL, which can return more than -- twice, setjmp-like functions. */ -- -- cet_eb = gen_nop_endbr (); -- emit_insn_after_setloc (cet_eb, insn, INSN_LOCATION (insn)); -- continue; -- } -- -- if (JUMP_P (insn) && flag_cet_switch) -- { -- rtx target = JUMP_LABEL (insn); -- if (target == NULL_RTX || ANY_RETURN_P (target)) -- continue; -- -- /* Check the jump is a switch table. */ -- rtx_insn *label = as_a (target); -- rtx_insn *table = next_insn (label); -- if (table == NULL_RTX || !JUMP_TABLE_DATA_P (table)) -- continue; -- -- /* For the indirect jump find out all places it jumps and insert -- ENDBRANCH there. It should be done under a special flag to -- control ENDBRANCH generation for switch stmts. */ -- edge_iterator ei; -- edge e; -- basic_block dest_blk; -- -- FOR_EACH_EDGE (e, ei, bb->succs) -- { -- rtx_insn *insn; -- -- dest_blk = e->dest; -- insn = BB_HEAD (dest_blk); -- gcc_assert (LABEL_P (insn)); -- cet_eb = gen_nop_endbr (); -- emit_insn_after (cet_eb, insn); -- } -- continue; -- } -- -- if ((LABEL_P (insn) && LABEL_PRESERVE_P (insn)) -- || (NOTE_P (insn) -- && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)) -- /* TODO. Check /s bit also. */ -- { -- cet_eb = gen_nop_endbr (); -- emit_insn_after (cet_eb, insn); -- continue; -- } -- } -- } -+ rtx_code_label *label; -+ rtx tmp_reg; - -- timevar_pop (TV_MACH_DEP); -- return 0; -+ gcc_assert (Pmode == DImode); -+ label = gen_label_rtx (); -+ emit_label (label); -+ LABEL_PRESERVE_P (label) = 1; -+ tmp_reg = gen_rtx_REG (Pmode, tmp_regno); -+ gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno); -+ emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, -+ label)); -+ emit_insn (gen_set_got_offset_rex64 (tmp_reg, label)); -+ emit_insn (ix86_gen_add3 (pic_offset_table_rtx, -+ pic_offset_table_rtx, tmp_reg)); -+ const char *name = LABEL_NAME (label); -+ PUT_CODE (label, NOTE); -+ NOTE_KIND (label) = NOTE_INSN_DELETED_LABEL; -+ NOTE_DELETED_LABEL_NAME (label) = name; - } - --namespace { -- --const pass_data pass_data_insert_endbranch = -+/* Create and initialize PIC register if required. */ -+static void -+ix86_init_pic_reg (void) - { -- RTL_PASS, /* type. */ -- "cet", /* name. */ -- OPTGROUP_NONE, /* optinfo_flags. */ -- TV_MACH_DEP, /* tv_id. */ -- 0, /* properties_required. */ -- 0, /* properties_provided. */ -- 0, /* properties_destroyed. */ -- 0, /* todo_flags_start. */ -- 0, /* todo_flags_finish. */ --}; -+ edge entry_edge; -+ rtx_insn *seq; - --class pass_insert_endbranch : public rtl_opt_pass --{ --public: -- pass_insert_endbranch (gcc::context *ctxt) -- : rtl_opt_pass (pass_data_insert_endbranch, ctxt) -- {} -+ if (!ix86_use_pseudo_pic_reg ()) -+ return; -+ -+ start_sequence (); - -- /* opt_pass methods: */ -- virtual bool gate (function *) -+ if (TARGET_64BIT) - { -- return ((flag_cf_protection & CF_BRANCH)); -+ if (ix86_cmodel == CM_LARGE_PIC) -+ ix86_init_large_pic_reg (R11_REG); -+ else -+ emit_insn (gen_set_got_rex64 (pic_offset_table_rtx)); - } -- -- virtual unsigned int execute (function *) -+ else - { -- return rest_of_insert_endbranch (); -+ /* If there is future mcount call in the function it is more profitable -+ to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */ -+ rtx reg = crtl->profile -+ ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM) -+ : pic_offset_table_rtx; -+ rtx_insn *insn = emit_insn (gen_set_got (reg)); -+ RTX_FRAME_RELATED_P (insn) = 1; -+ if (crtl->profile) -+ emit_move_insn (pic_offset_table_rtx, reg); -+ add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX); - } - --}; // class pass_insert_endbranch -- --} // anon namespace -+ seq = get_insns (); -+ end_sequence (); - --rtl_opt_pass * --make_pass_insert_endbranch (gcc::context *ctxt) --{ -- return new pass_insert_endbranch (ctxt); -+ entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)); -+ insert_insn_on_edge (seq, entry_edge); -+ commit_one_edge_insertion (entry_edge); - } - --/* At entry of the nearest common dominator for basic blocks with -- conversions, generate a single -- vxorps %xmmN, %xmmN, %xmmN -- for all -- vcvtss2sd op, %xmmN, %xmmX -- vcvtsd2ss op, %xmmN, %xmmX -- vcvtsi2ss op, %xmmN, %xmmX -- vcvtsi2sd op, %xmmN, %xmmX -- -- NB: We want to generate only a single vxorps to cover the whole -- function. The LCM algorithm isn't appropriate here since it may -- place a vxorps inside the loop. */ -- --static unsigned int --remove_partial_avx_dependency (void) --{ -- timevar_push (TV_MACH_DEP); -- -- bitmap_obstack_initialize (NULL); -- bitmap convert_bbs = BITMAP_ALLOC (NULL); -+/* Initialize a variable CUM of type CUMULATIVE_ARGS -+ for a call to a function whose data type is FNTYPE. -+ For a library call, FNTYPE is 0. */ - -- basic_block bb; -- rtx_insn *insn, *set_insn; -- rtx set; -- rtx v4sf_const0 = NULL_RTX; -+void -+init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */ -+ tree fntype, /* tree ptr for function decl */ -+ rtx libname, /* SYMBOL_REF of library name or 0 */ -+ tree fndecl, -+ int caller) -+{ -+ struct cgraph_local_info *i = NULL; -+ struct cgraph_node *target = NULL; - -- auto_vec control_flow_insns; -+ memset (cum, 0, sizeof (*cum)); - -- FOR_EACH_BB_FN (bb, cfun) -+ if (fndecl) - { -- FOR_BB_INSNS (bb, insn) -+ target = cgraph_node::get (fndecl); -+ if (target) - { -- if (!NONDEBUG_INSN_P (insn)) -- continue; -- -- set = single_set (insn); -- if (!set) -- continue; -+ target = target->function_symbol (); -+ i = cgraph_node::local_info (target->decl); -+ cum->call_abi = ix86_function_abi (target->decl); -+ } -+ else -+ cum->call_abi = ix86_function_abi (fndecl); -+ } -+ else -+ cum->call_abi = ix86_function_type_abi (fntype); - -- if (get_attr_avx_partial_xmm_update (insn) -- != AVX_PARTIAL_XMM_UPDATE_TRUE) -- continue; -+ cum->caller = caller; - -- if (!v4sf_const0) -- { -- calculate_dominance_info (CDI_DOMINATORS); -- df_set_flags (DF_DEFER_INSN_RESCAN); -- df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN); -- df_md_add_problem (); -- df_analyze (); -- v4sf_const0 = gen_reg_rtx (V4SFmode); -- } -+ /* Set up the number of registers to use for passing arguments. */ -+ cum->nregs = ix86_regparm; -+ if (TARGET_64BIT) -+ { -+ cum->nregs = (cum->call_abi == SYSV_ABI -+ ? X86_64_REGPARM_MAX -+ : X86_64_MS_REGPARM_MAX); -+ } -+ if (TARGET_SSE) -+ { -+ cum->sse_nregs = SSE_REGPARM_MAX; -+ if (TARGET_64BIT) -+ { -+ cum->sse_nregs = (cum->call_abi == SYSV_ABI -+ ? X86_64_SSE_REGPARM_MAX -+ : X86_64_MS_SSE_REGPARM_MAX); -+ } -+ } -+ if (TARGET_MMX) -+ cum->mmx_nregs = MMX_REGPARM_MAX; -+ cum->warn_avx512f = true; -+ cum->warn_avx = true; -+ cum->warn_sse = true; -+ cum->warn_mmx = true; - -- /* Convert PARTIAL_XMM_UPDATE_TRUE insns, DF -> SF, SF -> DF, -- SI -> SF, SI -> DF, DI -> SF, DI -> DF, to vec_dup and -- vec_merge with subreg. */ -- rtx src = SET_SRC (set); -- rtx dest = SET_DEST (set); -- machine_mode dest_mode = GET_MODE (dest); -+ /* Because type might mismatch in between caller and callee, we need to -+ use actual type of function for local calls. -+ FIXME: cgraph_analyze can be told to actually record if function uses -+ va_start so for local functions maybe_vaarg can be made aggressive -+ helping K&R code. -+ FIXME: once typesytem is fixed, we won't need this code anymore. */ -+ if (i && i->local && i->can_change_signature) -+ fntype = TREE_TYPE (target->decl); -+ cum->stdarg = stdarg_p (fntype); -+ cum->maybe_vaarg = (fntype -+ ? (!prototype_p (fntype) || stdarg_p (fntype)) -+ : !libname); - -- rtx zero; -- machine_mode dest_vecmode; -- if (dest_mode == E_SFmode) -- { -- dest_vecmode = V4SFmode; -- zero = v4sf_const0; -- } -- else -- { -- dest_vecmode = V2DFmode; -- zero = gen_rtx_SUBREG (V2DFmode, v4sf_const0, 0); -- } -+ cum->decl = fndecl; - -- /* Change source to vector mode. */ -- src = gen_rtx_VEC_DUPLICATE (dest_vecmode, src); -- src = gen_rtx_VEC_MERGE (dest_vecmode, src, zero, -- GEN_INT (HOST_WIDE_INT_1U)); -- /* Change destination to vector mode. */ -- rtx vec = gen_reg_rtx (dest_vecmode); -- /* Generate an XMM vector SET. */ -- set = gen_rtx_SET (vec, src); -- set_insn = emit_insn_before (set, insn); -- df_insn_rescan (set_insn); -- -- if (cfun->can_throw_non_call_exceptions) -+ cum->warn_empty = !warn_abi || cum->stdarg; -+ if (!cum->warn_empty && fntype) -+ { -+ function_args_iterator iter; -+ tree argtype; -+ bool seen_empty_type = false; -+ FOREACH_FUNCTION_ARGS (fntype, argtype, iter) -+ { -+ if (argtype == error_mark_node || VOID_TYPE_P (argtype)) -+ break; -+ if (TYPE_EMPTY_P (argtype)) -+ seen_empty_type = true; -+ else if (seen_empty_type) - { -- /* Handle REG_EH_REGION note. */ -- rtx note = find_reg_note (insn, REG_EH_REGION, NULL_RTX); -- if (note) -- { -- control_flow_insns.safe_push (set_insn); -- add_reg_note (set_insn, REG_EH_REGION, XEXP (note, 0)); -- } -+ cum->warn_empty = true; -+ break; - } -- -- src = gen_rtx_SUBREG (dest_mode, vec, 0); -- set = gen_rtx_SET (dest, src); -- -- /* Drop possible dead definitions. */ -- PATTERN (insn) = set; -- -- INSN_CODE (insn) = -1; -- recog_memoized (insn); -- df_insn_rescan (insn); -- bitmap_set_bit (convert_bbs, bb->index); - } - } - -- if (v4sf_const0) -+ if (!TARGET_64BIT) - { -- /* (Re-)discover loops so that bb->loop_father can be used in the -- analysis below. */ -- loop_optimizer_init (AVOID_CFG_MODIFICATIONS); -- -- /* Generate a vxorps at entry of the nearest dominator for basic -- blocks with conversions, which is in the the fake loop that -- contains the whole function, so that there is only a single -- vxorps in the whole function. */ -- bb = nearest_common_dominator_for_set (CDI_DOMINATORS, -- convert_bbs); -- while (bb->loop_father->latch -- != EXIT_BLOCK_PTR_FOR_FN (cfun)) -- bb = get_immediate_dominator (CDI_DOMINATORS, -- bb->loop_father->header); -- -- set = gen_rtx_SET (v4sf_const0, CONST0_RTX (V4SFmode)); -+ /* If there are variable arguments, then we won't pass anything -+ in registers in 32-bit mode. */ -+ if (stdarg_p (fntype)) -+ { -+ cum->nregs = 0; -+ /* Since in 32-bit, variable arguments are always passed on -+ stack, there is scratch register available for indirect -+ sibcall. */ -+ cfun->machine->arg_reg_available = true; -+ cum->sse_nregs = 0; -+ cum->mmx_nregs = 0; -+ cum->warn_avx512f = false; -+ cum->warn_avx = false; -+ cum->warn_sse = false; -+ cum->warn_mmx = false; -+ return; -+ } - -- insn = BB_HEAD (bb); -- while (insn && !NONDEBUG_INSN_P (insn)) -+ /* Use ecx and edx registers if function has fastcall attribute, -+ else look for regparm information. */ -+ if (fntype) - { -- if (insn == BB_END (bb)) -+ unsigned int ccvt = ix86_get_callcvt (fntype); -+ if ((ccvt & IX86_CALLCVT_THISCALL) != 0) - { -- insn = NULL; -- break; -+ cum->nregs = 1; -+ cum->fastcall = 1; /* Same first register as in fastcall. */ -+ } -+ else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) -+ { -+ cum->nregs = 2; -+ cum->fastcall = 1; - } -- insn = NEXT_INSN (insn); -+ else -+ cum->nregs = ix86_function_regparm (fntype, fndecl); - } -- if (insn == BB_HEAD (bb)) -- set_insn = emit_insn_before (set, insn); -- else -- set_insn = emit_insn_after (set, -- insn ? PREV_INSN (insn) : BB_END (bb)); -- df_insn_rescan (set_insn); -- df_process_deferred_rescans (); -- loop_optimizer_finalize (); -- -- if (!control_flow_insns.is_empty ()) -- { -- free_dominance_info (CDI_DOMINATORS); - -- unsigned int i; -- FOR_EACH_VEC_ELT (control_flow_insns, i, insn) -- if (control_flow_insn_p (insn)) -- { -- /* Split the block after insn. There will be a fallthru -- edge, which is OK so we keep it. We have to create -- the exception edges ourselves. */ -- bb = BLOCK_FOR_INSN (insn); -- split_block (bb, insn); -- rtl_make_eh_edge (NULL, bb, BB_END (bb)); -- } -- } -+ /* Set up the number of SSE registers used for passing SFmode -+ and DFmode arguments. Warn for mismatching ABI. */ -+ cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true); - } - -- bitmap_obstack_release (NULL); -- BITMAP_FREE (convert_bbs); -- -- timevar_pop (TV_MACH_DEP); -- return 0; -+ cfun->machine->arg_reg_available = (cum->nregs > 0); - } - --namespace { -- --const pass_data pass_data_remove_partial_avx_dependency = --{ -- RTL_PASS, /* type */ -- "rpad", /* name */ -- OPTGROUP_NONE, /* optinfo_flags */ -- TV_MACH_DEP, /* tv_id */ -- 0, /* properties_required */ -- 0, /* properties_provided */ -- 0, /* properties_destroyed */ -- 0, /* todo_flags_start */ -- TODO_df_finish, /* todo_flags_finish */ --}; -- --class pass_remove_partial_avx_dependency : public rtl_opt_pass --{ --public: -- pass_remove_partial_avx_dependency (gcc::context *ctxt) -- : rtl_opt_pass (pass_data_remove_partial_avx_dependency, ctxt) -- {} -- -- /* opt_pass methods: */ -- virtual bool gate (function *) -- { -- return (TARGET_AVX -- && TARGET_SSE_PARTIAL_REG_DEPENDENCY -- && TARGET_SSE_MATH -- && optimize -- && optimize_function_for_speed_p (cfun)); -- } -- -- virtual unsigned int execute (function *) -- { -- return remove_partial_avx_dependency (); -- } --}; // class pass_rpad -- --} // anon namespace -+/* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE. -+ But in the case of vector types, it is some vector mode. - --rtl_opt_pass * --make_pass_remove_partial_avx_dependency (gcc::context *ctxt) --{ -- return new pass_remove_partial_avx_dependency (ctxt); --} -+ When we have only some of our vector isa extensions enabled, then there -+ are some modes for which vector_mode_supported_p is false. For these -+ modes, the generic vector support in gcc will choose some non-vector mode -+ in order to implement the type. By computing the natural mode, we'll -+ select the proper ABI location for the operand and not depend on whatever -+ the middle-end decides to do with these vector types. - --/* Return true if a red-zone is in use. We can't use red-zone when -- there are local indirect jumps, like "indirect_jump" or "tablejump", -- which jumps to another place in the function, since "call" in the -- indirect thunk pushes the return address onto stack, destroying -- red-zone. -+ The midde-end can't deal with the vector types > 16 bytes. In this -+ case, we return the original mode and warn ABI change if CUM isn't -+ NULL. - -- TODO: If we can reserve the first 2 WORDs, for PUSH and, another -- for CALL, in red-zone, we can allow local indirect jumps with -- indirect thunk. */ -+ If INT_RETURN is true, warn ABI change if the vector mode isn't -+ available for function return value. */ - --bool --ix86_using_red_zone (void) -+static machine_mode -+type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum, -+ bool in_return) - { -- return (TARGET_RED_ZONE -- && !TARGET_64BIT_MS_ABI -- && (!cfun->machine->has_local_indirect_jump -- || cfun->machine->indirect_branch_type == indirect_branch_keep)); --} -- --/* Return a string that documents the current -m options. The caller is -- responsible for freeing the string. */ -+ machine_mode mode = TYPE_MODE (type); - --static char * --ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2, -- int flags, int flags2, -- const char *arch, const char *tune, -- enum fpmath_unit fpmath, bool add_nl_p, bool add_abi_p) --{ -- struct ix86_target_opts -- { -- const char *option; /* option string */ -- HOST_WIDE_INT mask; /* isa mask options */ -- }; -+ if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode)) -+ { -+ HOST_WIDE_INT size = int_size_in_bytes (type); -+ if ((size == 8 || size == 16 || size == 32 || size == 64) -+ /* ??? Generic code allows us to create width 1 vectors. Ignore. */ -+ && TYPE_VECTOR_SUBPARTS (type) > 1) -+ { -+ machine_mode innermode = TYPE_MODE (TREE_TYPE (type)); - -- /* This table is ordered so that options like -msse4.2 that imply other -- ISAs come first. Target string will be displayed in the same order. */ -- static struct ix86_target_opts isa2_opts[] = -- { -- { "-mcx16", OPTION_MASK_ISA_CX16 }, -- { "-mvaes", OPTION_MASK_ISA_VAES }, -- { "-mrdpid", OPTION_MASK_ISA_RDPID }, -- { "-mpconfig", OPTION_MASK_ISA_PCONFIG }, -- { "-mwbnoinvd", OPTION_MASK_ISA_WBNOINVD }, -- { "-msgx", OPTION_MASK_ISA_SGX }, -- { "-mavx5124vnniw", OPTION_MASK_ISA_AVX5124VNNIW }, -- { "-mavx5124fmaps", OPTION_MASK_ISA_AVX5124FMAPS }, -- { "-mhle", OPTION_MASK_ISA_HLE }, -- { "-mmovbe", OPTION_MASK_ISA_MOVBE }, -- { "-mclzero", OPTION_MASK_ISA_CLZERO }, -- { "-mmwaitx", OPTION_MASK_ISA_MWAITX }, -- { "-mmovdir64b", OPTION_MASK_ISA_MOVDIR64B }, -- { "-mwaitpkg", OPTION_MASK_ISA_WAITPKG }, -- { "-mcldemote", OPTION_MASK_ISA_CLDEMOTE }, -- { "-mptwrite", OPTION_MASK_ISA_PTWRITE } -- }; -- static struct ix86_target_opts isa_opts[] = -- { -- { "-mavx512vpopcntdq", OPTION_MASK_ISA_AVX512VPOPCNTDQ }, -- { "-mavx512bitalg", OPTION_MASK_ISA_AVX512BITALG }, -- { "-mvpclmulqdq", OPTION_MASK_ISA_VPCLMULQDQ }, -- { "-mgfni", OPTION_MASK_ISA_GFNI }, -- { "-mavx512vnni", OPTION_MASK_ISA_AVX512VNNI }, -- { "-mavx512vbmi2", OPTION_MASK_ISA_AVX512VBMI2 }, -- { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI }, -- { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA }, -- { "-mavx512vl", OPTION_MASK_ISA_AVX512VL }, -- { "-mavx512bw", OPTION_MASK_ISA_AVX512BW }, -- { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ }, -- { "-mavx512er", OPTION_MASK_ISA_AVX512ER }, -- { "-mavx512pf", OPTION_MASK_ISA_AVX512PF }, -- { "-mavx512cd", OPTION_MASK_ISA_AVX512CD }, -- { "-mavx512f", OPTION_MASK_ISA_AVX512F }, -- { "-mavx2", OPTION_MASK_ISA_AVX2 }, -- { "-mfma", OPTION_MASK_ISA_FMA }, -- { "-mxop", OPTION_MASK_ISA_XOP }, -- { "-mfma4", OPTION_MASK_ISA_FMA4 }, -- { "-mf16c", OPTION_MASK_ISA_F16C }, -- { "-mavx", OPTION_MASK_ISA_AVX }, --/* { "-msse4" OPTION_MASK_ISA_SSE4 }, */ -- { "-msse4.2", OPTION_MASK_ISA_SSE4_2 }, -- { "-msse4.1", OPTION_MASK_ISA_SSE4_1 }, -- { "-msse4a", OPTION_MASK_ISA_SSE4A }, -- { "-mssse3", OPTION_MASK_ISA_SSSE3 }, -- { "-msse3", OPTION_MASK_ISA_SSE3 }, -- { "-maes", OPTION_MASK_ISA_AES }, -- { "-msha", OPTION_MASK_ISA_SHA }, -- { "-mpclmul", OPTION_MASK_ISA_PCLMUL }, -- { "-msse2", OPTION_MASK_ISA_SSE2 }, -- { "-msse", OPTION_MASK_ISA_SSE }, -- { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A }, -- { "-m3dnow", OPTION_MASK_ISA_3DNOW }, -- { "-mmmx", OPTION_MASK_ISA_MMX }, -- { "-mrtm", OPTION_MASK_ISA_RTM }, -- { "-mprfchw", OPTION_MASK_ISA_PRFCHW }, -- { "-mrdseed", OPTION_MASK_ISA_RDSEED }, -- { "-madx", OPTION_MASK_ISA_ADX }, -- { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 }, -- { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT }, -- { "-mxsaves", OPTION_MASK_ISA_XSAVES }, -- { "-mxsavec", OPTION_MASK_ISA_XSAVEC }, -- { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT }, -- { "-mxsave", OPTION_MASK_ISA_XSAVE }, -- { "-mabm", OPTION_MASK_ISA_ABM }, -- { "-mbmi", OPTION_MASK_ISA_BMI }, -- { "-mbmi2", OPTION_MASK_ISA_BMI2 }, -- { "-mlzcnt", OPTION_MASK_ISA_LZCNT }, -- { "-mtbm", OPTION_MASK_ISA_TBM }, -- { "-mpopcnt", OPTION_MASK_ISA_POPCNT }, -- { "-msahf", OPTION_MASK_ISA_SAHF }, -- { "-mcrc32", OPTION_MASK_ISA_CRC32 }, -- { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE }, -- { "-mrdrnd", OPTION_MASK_ISA_RDRND }, -- { "-mpku", OPTION_MASK_ISA_PKU }, -- { "-mlwp", OPTION_MASK_ISA_LWP }, -- { "-mfxsr", OPTION_MASK_ISA_FXSR }, -- { "-mclwb", OPTION_MASK_ISA_CLWB }, -- { "-mshstk", OPTION_MASK_ISA_SHSTK }, -- { "-mmovdiri", OPTION_MASK_ISA_MOVDIRI } -- }; -+ /* There are no XFmode vector modes. */ -+ if (innermode == XFmode) -+ return mode; - -- /* Flag options. */ -- static struct ix86_target_opts flag_opts[] = -- { -- { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE }, -- { "-mlong-double-128", MASK_LONG_DOUBLE_128 }, -- { "-mlong-double-64", MASK_LONG_DOUBLE_64 }, -- { "-m80387", MASK_80387 }, -- { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS }, -- { "-malign-double", MASK_ALIGN_DOUBLE }, -- { "-mcld", MASK_CLD }, -- { "-mfp-ret-in-387", MASK_FLOAT_RETURNS }, -- { "-mieee-fp", MASK_IEEE_FP }, -- { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS }, -- { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY }, -- { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT }, -- { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS }, -- { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 }, -- { "-mno-push-args", MASK_NO_PUSH_ARGS }, -- { "-mno-red-zone", MASK_NO_RED_ZONE }, -- { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER }, -- { "-mrecip", MASK_RECIP }, -- { "-mrtd", MASK_RTD }, -- { "-msseregparm", MASK_SSEREGPARM }, -- { "-mstack-arg-probe", MASK_STACK_PROBE }, -- { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS }, -- { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS }, -- { "-m8bit-idiv", MASK_USE_8BIT_IDIV }, -- { "-mvzeroupper", MASK_VZEROUPPER }, -- { "-mstv", MASK_STV }, -- { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD }, -- { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE }, -- { "-mcall-ms2sysv-xlogues", MASK_CALL_MS2SYSV_XLOGUES } -- }; -+ if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE) -+ mode = MIN_MODE_VECTOR_FLOAT; -+ else -+ mode = MIN_MODE_VECTOR_INT; - -- /* Additional flag options. */ -- static struct ix86_target_opts flag2_opts[] = -- { -- { "-mgeneral-regs-only", OPTION_MASK_GENERAL_REGS_ONLY } -- }; -+ /* Get the mode which has this inner mode and number of units. */ -+ FOR_EACH_MODE_FROM (mode, mode) -+ if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type) -+ && GET_MODE_INNER (mode) == innermode) -+ { -+ if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU) -+ { -+ static bool warnedavx512f; -+ static bool warnedavx512f_ret; - -- const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (isa2_opts) -- + ARRAY_SIZE (flag_opts) + ARRAY_SIZE (flag2_opts) + 6][2]; -+ if (cum && cum->warn_avx512f && !warnedavx512f) -+ { -+ if (warning (OPT_Wpsabi, "AVX512F vector argument " -+ "without AVX512F enabled changes the ABI")) -+ warnedavx512f = true; -+ } -+ else if (in_return && !warnedavx512f_ret) -+ { -+ if (warning (OPT_Wpsabi, "AVX512F vector return " -+ "without AVX512F enabled changes the ABI")) -+ warnedavx512f_ret = true; -+ } - -- char isa_other[40]; -- char isa2_other[40]; -- char flags_other[40]; -- char flags2_other[40]; -- unsigned num = 0; -- unsigned i, j; -- char *ret; -- char *ptr; -- size_t len; -- size_t line_len; -- size_t sep_len; -- const char *abi; -+ return TYPE_MODE (type); -+ } -+ else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU) -+ { -+ static bool warnedavx; -+ static bool warnedavx_ret; - -- memset (opts, '\0', sizeof (opts)); -+ if (cum && cum->warn_avx && !warnedavx) -+ { -+ if (warning (OPT_Wpsabi, "AVX vector argument " -+ "without AVX enabled changes the ABI")) -+ warnedavx = true; -+ } -+ else if (in_return && !warnedavx_ret) -+ { -+ if (warning (OPT_Wpsabi, "AVX vector return " -+ "without AVX enabled changes the ABI")) -+ warnedavx_ret = true; -+ } - -- /* Add -march= option. */ -- if (arch) -- { -- opts[num][0] = "-march="; -- opts[num++][1] = arch; -- } -- -- /* Add -mtune= option. */ -- if (tune) -- { -- opts[num][0] = "-mtune="; -- opts[num++][1] = tune; -- } -- -- /* Add -m32/-m64/-mx32. */ -- if (add_abi_p) -- { -- if ((isa & OPTION_MASK_ISA_64BIT) != 0) -- { -- if ((isa & OPTION_MASK_ABI_64) != 0) -- abi = "-m64"; -- else -- abi = "-mx32"; -- } -- else -- abi = "-m32"; -- opts[num++][0] = abi; -- } -- isa &= ~(OPTION_MASK_ISA_64BIT | OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32); -- -- /* Pick out the options in isa2 options. */ -- for (i = 0; i < ARRAY_SIZE (isa2_opts); i++) -- { -- if ((isa2 & isa2_opts[i].mask) != 0) -- { -- opts[num++][0] = isa2_opts[i].option; -- isa2 &= ~ isa2_opts[i].mask; -- } -- } -- -- if (isa2 && add_nl_p) -- { -- opts[num++][0] = isa2_other; -- sprintf (isa2_other, "(other isa2: %#" HOST_WIDE_INT_PRINT "x)", isa2); -- } -- -- /* Pick out the options in isa options. */ -- for (i = 0; i < ARRAY_SIZE (isa_opts); i++) -- { -- if ((isa & isa_opts[i].mask) != 0) -- { -- opts[num++][0] = isa_opts[i].option; -- isa &= ~ isa_opts[i].mask; -- } -- } -- -- if (isa && add_nl_p) -- { -- opts[num++][0] = isa_other; -- sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)", isa); -- } -- -- /* Add flag options. */ -- for (i = 0; i < ARRAY_SIZE (flag_opts); i++) -- { -- if ((flags & flag_opts[i].mask) != 0) -- { -- opts[num++][0] = flag_opts[i].option; -- flags &= ~ flag_opts[i].mask; -- } -- } -- -- if (flags && add_nl_p) -- { -- opts[num++][0] = flags_other; -- sprintf (flags_other, "(other flags: %#x)", flags); -- } -- -- /* Add additional flag options. */ -- for (i = 0; i < ARRAY_SIZE (flag2_opts); i++) -- { -- if ((flags2 & flag2_opts[i].mask) != 0) -- { -- opts[num++][0] = flag2_opts[i].option; -- flags2 &= ~ flag2_opts[i].mask; -- } -- } -- -- if (flags2 && add_nl_p) -- { -- opts[num++][0] = flags2_other; -- sprintf (flags2_other, "(other flags2: %#x)", flags2); -- } -- -- /* Add -fpmath= option. */ -- if (fpmath) -- { -- opts[num][0] = "-mfpmath="; -- switch ((int) fpmath) -- { -- case FPMATH_387: -- opts[num++][1] = "387"; -- break; -+ return TYPE_MODE (type); -+ } -+ else if (((size == 8 && TARGET_64BIT) || size == 16) -+ && !TARGET_SSE -+ && !TARGET_IAMCU) -+ { -+ static bool warnedsse; -+ static bool warnedsse_ret; - -- case FPMATH_SSE: -- opts[num++][1] = "sse"; -- break; -+ if (cum && cum->warn_sse && !warnedsse) -+ { -+ if (warning (OPT_Wpsabi, "SSE vector argument " -+ "without SSE enabled changes the ABI")) -+ warnedsse = true; -+ } -+ else if (!TARGET_64BIT && in_return && !warnedsse_ret) -+ { -+ if (warning (OPT_Wpsabi, "SSE vector return " -+ "without SSE enabled changes the ABI")) -+ warnedsse_ret = true; -+ } -+ } -+ else if ((size == 8 && !TARGET_64BIT) -+ && (!cfun -+ || cfun->machine->func_type == TYPE_NORMAL) -+ && !TARGET_MMX -+ && !TARGET_IAMCU) -+ { -+ static bool warnedmmx; -+ static bool warnedmmx_ret; - -- case FPMATH_387 | FPMATH_SSE: -- opts[num++][1] = "sse+387"; -- break; -+ if (cum && cum->warn_mmx && !warnedmmx) -+ { -+ if (warning (OPT_Wpsabi, "MMX vector argument " -+ "without MMX enabled changes the ABI")) -+ warnedmmx = true; -+ } -+ else if (in_return && !warnedmmx_ret) -+ { -+ if (warning (OPT_Wpsabi, "MMX vector return " -+ "without MMX enabled changes the ABI")) -+ warnedmmx_ret = true; -+ } -+ } -+ return mode; -+ } - -- default: - gcc_unreachable (); - } - } - -- /* Any options? */ -- if (num == 0) -- return NULL; -- -- gcc_assert (num < ARRAY_SIZE (opts)); -- -- /* Size the string. */ -- len = 0; -- sep_len = (add_nl_p) ? 3 : 1; -- for (i = 0; i < num; i++) -- { -- len += sep_len; -- for (j = 0; j < 2; j++) -- if (opts[i][j]) -- len += strlen (opts[i][j]); -- } -- -- /* Build the string. */ -- ret = ptr = (char *) xmalloc (len); -- line_len = 0; -- -- for (i = 0; i < num; i++) -- { -- size_t len2[2]; -- -- for (j = 0; j < 2; j++) -- len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0; -- -- if (i != 0) -- { -- *ptr++ = ' '; -- line_len++; -- -- if (add_nl_p && line_len + len2[0] + len2[1] > 70) -- { -- *ptr++ = '\\'; -- *ptr++ = '\n'; -- line_len = 0; -- } -- } -- -- for (j = 0; j < 2; j++) -- if (opts[i][j]) -- { -- memcpy (ptr, opts[i][j], len2[j]); -- ptr += len2[j]; -- line_len += len2[j]; -- } -- } -- -- *ptr = '\0'; -- gcc_assert (ret + len >= ptr); -- -- return ret; -+ return mode; - } - --/* Return true, if profiling code should be emitted before -- prologue. Otherwise it returns false. -- Note: For x86 with "hotfix" it is sorried. */ --static bool --ix86_profile_before_prologue (void) --{ -- return flag_fentry != 0; --} -+/* We want to pass a value in REGNO whose "natural" mode is MODE. However, -+ this may not agree with the mode that the type system has chosen for the -+ register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can -+ go ahead and use it. Otherwise we have to build a PARALLEL instead. */ - --/* Function that is callable from the debugger to print the current -- options. */ --void ATTRIBUTE_UNUSED --ix86_debug_options (void) -+static rtx -+gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode, -+ unsigned int regno) - { -- char *opts = ix86_target_string (ix86_isa_flags, ix86_isa_flags2, -- target_flags, ix86_target_flags, -- ix86_arch_string,ix86_tune_string, -- ix86_fpmath, true, true); -+ rtx tmp; - -- if (opts) -+ if (orig_mode != BLKmode) -+ tmp = gen_rtx_REG (orig_mode, regno); -+ else - { -- fprintf (stderr, "%s\n\n", opts); -- free (opts); -+ tmp = gen_rtx_REG (mode, regno); -+ tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx); -+ tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp)); - } -- else -- fputs ("\n\n", stderr); - -- return; -+ return tmp; - } - --static const char *stringop_alg_names[] = { --#define DEF_ENUM --#define DEF_ALG(alg, name) #name, --#include "stringop.def" --#undef DEF_ENUM --#undef DEF_ALG --}; -+/* x86-64 register passing implementation. See x86-64 ABI for details. Goal -+ of this code is to classify each 8bytes of incoming argument by the register -+ class and assign registers accordingly. */ - --/* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=. -- The string is of the following form (or comma separated list of it): -+/* Return the union class of CLASS1 and CLASS2. -+ See the x86-64 PS ABI for details. */ - -- strategy_alg:max_size:[align|noalign] -+static enum x86_64_reg_class -+merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2) -+{ -+ /* Rule #1: If both classes are equal, this is the resulting class. */ -+ if (class1 == class2) -+ return class1; - -- where the full size range for the strategy is either [0, max_size] or -- [min_size, max_size], in which min_size is the max_size + 1 of the -- preceding range. The last size range must have max_size == -1. -+ /* Rule #2: If one of the classes is NO_CLASS, the resulting class is -+ the other class. */ -+ if (class1 == X86_64_NO_CLASS) -+ return class2; -+ if (class2 == X86_64_NO_CLASS) -+ return class1; - -- Examples: -+ /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */ -+ if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS) -+ return X86_64_MEMORY_CLASS; - -- 1. -- -mmemcpy-strategy=libcall:-1:noalign -+ /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */ -+ if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS) -+ || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS)) -+ return X86_64_INTEGERSI_CLASS; -+ if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS -+ || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS) -+ return X86_64_INTEGER_CLASS; - -- this is equivalent to (for known size memcpy) -mstringop-strategy=libcall -+ /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class, -+ MEMORY is used. */ -+ if (class1 == X86_64_X87_CLASS -+ || class1 == X86_64_X87UP_CLASS -+ || class1 == X86_64_COMPLEX_X87_CLASS -+ || class2 == X86_64_X87_CLASS -+ || class2 == X86_64_X87UP_CLASS -+ || class2 == X86_64_COMPLEX_X87_CLASS) -+ return X86_64_MEMORY_CLASS; - -+ /* Rule #6: Otherwise class SSE is used. */ -+ return X86_64_SSE_CLASS; -+} - -- 2. -- -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign -+/* Classify the argument of type TYPE and mode MODE. -+ CLASSES will be filled by the register class used to pass each word -+ of the operand. The number of words is returned. In case the parameter -+ should be passed in memory, 0 is returned. As a special case for zero -+ sized containers, classes[0] will be NO_CLASS and 1 is returned. - -- This is to tell the compiler to use the following strategy for memset -- 1) when the expected size is between [1, 16], use rep_8byte strategy; -- 2) when the size is between [17, 2048], use vector_loop; -- 3) when the size is > 2048, use libcall. */ -+ BIT_OFFSET is used internally for handling records and specifies offset -+ of the offset in bits modulo 512 to avoid overflow cases. - --struct stringop_size_range --{ -- int max; -- stringop_alg alg; -- bool noalign; --}; -+ See the x86-64 PS ABI for details. -+*/ - --static void --ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset) -+static int -+classify_argument (machine_mode mode, const_tree type, -+ enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset) - { -- const struct stringop_algs *default_algs; -- stringop_size_range input_ranges[MAX_STRINGOP_ALGS]; -- char *curr_range_str, *next_range_str; -- const char *opt = is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy="; -- int i = 0, n = 0; -- -- if (is_memset) -- default_algs = &ix86_cost->memset[TARGET_64BIT != 0]; -- else -- default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0]; -+ HOST_WIDE_INT bytes -+ = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); -+ int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD); - -- curr_range_str = strategy_str; -+ /* Variable sized entities are always passed/returned in memory. */ -+ if (bytes < 0) -+ return 0; - -- do -+ if (mode != VOIDmode) - { -- int maxs; -- char alg_name[128]; -- char align[16]; -- next_range_str = strchr (curr_range_str, ','); -- if (next_range_str) -- *next_range_str++ = '\0'; -- -- if (sscanf (curr_range_str, "%20[^:]:%d:%10s", alg_name, &maxs, -- align) != 3) -- { -- error ("wrong argument %qs to option %qs", curr_range_str, opt); -- return; -- } -+ /* The value of "named" doesn't matter. */ -+ function_arg_info arg (const_cast (type), mode, /*named=*/true); -+ if (targetm.calls.must_pass_in_stack (arg)) -+ return 0; -+ } - -- if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1)) -- { -- error ("size ranges of option %qs should be increasing", opt); -- return; -- } -+ if (type && AGGREGATE_TYPE_P (type)) -+ { -+ int i; -+ tree field; -+ enum x86_64_reg_class subclasses[MAX_CLASSES]; - -- for (i = 0; i < last_alg; i++) -- if (!strcmp (alg_name, stringop_alg_names[i])) -- break; -+ /* On x86-64 we pass structures larger than 64 bytes on the stack. */ -+ if (bytes > 64) -+ return 0; - -- if (i == last_alg) -- { -- error ("wrong strategy name %qs specified for option %qs", -- alg_name, opt); -- -- auto_vec candidates; -- for (i = 0; i < last_alg; i++) -- if ((stringop_alg) i != rep_prefix_8_byte || TARGET_64BIT) -- candidates.safe_push (stringop_alg_names[i]); -- -- char *s; -- const char *hint -- = candidates_list_and_hint (alg_name, s, candidates); -- if (hint) -- inform (input_location, -- "valid arguments to %qs are: %s; did you mean %qs?", -- opt, s, hint); -- else -- inform (input_location, "valid arguments to %qs are: %s", -- opt, s); -- XDELETEVEC (s); -- return; -- } -+ for (i = 0; i < words; i++) -+ classes[i] = X86_64_NO_CLASS; - -- if ((stringop_alg) i == rep_prefix_8_byte -- && !TARGET_64BIT) -+ /* Zero sized arrays or structures are NO_CLASS. We return 0 to -+ signalize memory class, so handle it as special case. */ -+ if (!words) - { -- /* rep; movq isn't available in 32-bit code. */ -- error ("strategy name %qs specified for option %qs " -- "not supported for 32-bit code", alg_name, opt); -- return; -+ classes[0] = X86_64_NO_CLASS; -+ return 1; - } - -- input_ranges[n].max = maxs; -- input_ranges[n].alg = (stringop_alg) i; -- if (!strcmp (align, "align")) -- input_ranges[n].noalign = false; -- else if (!strcmp (align, "noalign")) -- input_ranges[n].noalign = true; -- else -- { -- error ("unknown alignment %qs specified for option %qs", align, opt); -- return; -- } -- n++; -- curr_range_str = next_range_str; -- } -- while (curr_range_str); -- -- if (input_ranges[n - 1].max != -1) -- { -- error ("the max value for the last size range should be -1" -- " for option %qs", opt); -- return; -- } -+ /* Classify each field of record and merge classes. */ -+ switch (TREE_CODE (type)) -+ { -+ case RECORD_TYPE: -+ /* And now merge the fields of structure. */ -+ for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) -+ { -+ if (TREE_CODE (field) == FIELD_DECL) -+ { -+ int num; - -- if (n > MAX_STRINGOP_ALGS) -- { -- error ("too many size ranges specified in option %qs", opt); -- return; -- } -+ if (TREE_TYPE (field) == error_mark_node) -+ continue; - -- /* Now override the default algs array. */ -- for (i = 0; i < n; i++) -- { -- *const_cast(&default_algs->size[i].max) = input_ranges[i].max; -- *const_cast(&default_algs->size[i].alg) -- = input_ranges[i].alg; -- *const_cast(&default_algs->size[i].noalign) -- = input_ranges[i].noalign; -- } --} -- -- --/* parse -mtune-ctrl= option. When DUMP is true, -- print the features that are explicitly set. */ -- --static void --parse_mtune_ctrl_str (bool dump) --{ -- if (!ix86_tune_ctrl_string) -- return; -- -- char *next_feature_string = NULL; -- char *curr_feature_string = xstrdup (ix86_tune_ctrl_string); -- char *orig = curr_feature_string; -- int i; -- do -- { -- bool clear = false; -- -- next_feature_string = strchr (curr_feature_string, ','); -- if (next_feature_string) -- *next_feature_string++ = '\0'; -- if (*curr_feature_string == '^') -- { -- curr_feature_string++; -- clear = true; -- } -- for (i = 0; i < X86_TUNE_LAST; i++) -- { -- if (!strcmp (curr_feature_string, ix86_tune_feature_names[i])) -- { -- ix86_tune_features[i] = !clear; -- if (dump) -- fprintf (stderr, "Explicitly %s feature %s\n", -- clear ? "clear" : "set", ix86_tune_feature_names[i]); -- break; -- } -- } -- if (i == X86_TUNE_LAST) -- error ("unknown parameter to option %<-mtune-ctrl%>: %s", -- clear ? curr_feature_string - 1 : curr_feature_string); -- curr_feature_string = next_feature_string; -- } -- while (curr_feature_string); -- free (orig); --} -- --/* Helper function to set ix86_tune_features. IX86_TUNE is the -- processor type. */ -- --static void --set_ix86_tune_features (enum processor_type ix86_tune, bool dump) --{ -- unsigned HOST_WIDE_INT ix86_tune_mask = HOST_WIDE_INT_1U << ix86_tune; -- int i; -- -- for (i = 0; i < X86_TUNE_LAST; ++i) -- { -- if (ix86_tune_no_default) -- ix86_tune_features[i] = 0; -- else -- ix86_tune_features[i] -- = !!(initial_ix86_tune_features[i] & ix86_tune_mask); -- } -- -- if (dump) -- { -- fprintf (stderr, "List of x86 specific tuning parameter names:\n"); -- for (i = 0; i < X86_TUNE_LAST; i++) -- fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i], -- ix86_tune_features[i] ? "on" : "off"); -- } -- -- parse_mtune_ctrl_str (dump); --} -- -- --/* Default align_* from the processor table. */ -+ /* Bitfields are always classified as integer. Handle them -+ early, since later code would consider them to be -+ misaligned integers. */ -+ if (DECL_BIT_FIELD (field)) -+ { -+ for (i = (int_bit_position (field) -+ + (bit_offset % 64)) / 8 / 8; -+ i < ((int_bit_position (field) + (bit_offset % 64)) -+ + tree_to_shwi (DECL_SIZE (field)) -+ + 63) / 8 / 8; i++) -+ classes[i] -+ = merge_classes (X86_64_INTEGER_CLASS, classes[i]); -+ } -+ else -+ { -+ int pos; - --static void --ix86_default_align (struct gcc_options *opts) --{ -- /* -falign-foo without argument: supply one. */ -- if (opts->x_flag_align_loops && !opts->x_str_align_loops) -- opts->x_str_align_loops = processor_cost_table[ix86_tune]->align_loop; -- if (opts->x_flag_align_jumps && !opts->x_str_align_jumps) -- opts->x_str_align_jumps = processor_cost_table[ix86_tune]->align_jump; -- if (opts->x_flag_align_labels && !opts->x_str_align_labels) -- opts->x_str_align_labels = processor_cost_table[ix86_tune]->align_label; -- if (opts->x_flag_align_functions && !opts->x_str_align_functions) -- opts->x_str_align_functions = processor_cost_table[ix86_tune]->align_func; --} -+ type = TREE_TYPE (field); - --/* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook. */ -+ /* Flexible array member is ignored. */ -+ if (TYPE_MODE (type) == BLKmode -+ && TREE_CODE (type) == ARRAY_TYPE -+ && TYPE_SIZE (type) == NULL_TREE -+ && TYPE_DOMAIN (type) != NULL_TREE -+ && (TYPE_MAX_VALUE (TYPE_DOMAIN (type)) -+ == NULL_TREE)) -+ { -+ static bool warned; - --static void --ix86_override_options_after_change (void) --{ -- ix86_default_align (&global_options); --} -+ if (!warned && warn_psabi) -+ { -+ warned = true; -+ inform (input_location, -+ "the ABI of passing struct with" -+ " a flexible array member has" -+ " changed in GCC 4.4"); -+ } -+ continue; -+ } -+ num = classify_argument (TYPE_MODE (type), type, -+ subclasses, -+ (int_bit_position (field) -+ + bit_offset) % 512); -+ if (!num) -+ return 0; -+ pos = (int_bit_position (field) -+ + (bit_offset % 64)) / 8 / 8; -+ for (i = 0; i < num && (i + pos) < words; i++) -+ classes[i + pos] -+ = merge_classes (subclasses[i], classes[i + pos]); -+ } -+ } -+ } -+ break; - -+ case ARRAY_TYPE: -+ /* Arrays are handled as small records. */ -+ { -+ int num; -+ num = classify_argument (TYPE_MODE (TREE_TYPE (type)), -+ TREE_TYPE (type), subclasses, bit_offset); -+ if (!num) -+ return 0; - -+ /* The partial classes are now full classes. */ -+ if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4) -+ subclasses[0] = X86_64_SSE_CLASS; -+ if (subclasses[0] == X86_64_INTEGERSI_CLASS -+ && !((bit_offset % 64) == 0 && bytes == 4)) -+ subclasses[0] = X86_64_INTEGER_CLASS; - --/* Override various settings based on options. If MAIN_ARGS_P, the -- options are from the command line, otherwise they are from -- attributes. Return true if there's an error related to march -- option. */ -+ for (i = 0; i < words; i++) -+ classes[i] = subclasses[i % num]; - --static bool --ix86_option_override_internal (bool main_args_p, -- struct gcc_options *opts, -- struct gcc_options *opts_set) --{ -- int i; -- unsigned HOST_WIDE_INT ix86_arch_mask; -- const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL); -+ break; -+ } -+ case UNION_TYPE: -+ case QUAL_UNION_TYPE: -+ /* Unions are similar to RECORD_TYPE but offset is always 0. -+ */ -+ for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) -+ { -+ if (TREE_CODE (field) == FIELD_DECL) -+ { -+ int num; - -- /* -mrecip options. */ -- static struct -- { -- const char *string; /* option name */ -- unsigned int mask; /* mask bits to set */ -- } -- const recip_options[] = -- { -- { "all", RECIP_MASK_ALL }, -- { "none", RECIP_MASK_NONE }, -- { "div", RECIP_MASK_DIV }, -- { "sqrt", RECIP_MASK_SQRT }, -- { "vec-div", RECIP_MASK_VEC_DIV }, -- { "vec-sqrt", RECIP_MASK_VEC_SQRT }, -- }; -+ if (TREE_TYPE (field) == error_mark_node) -+ continue; - -+ num = classify_argument (TYPE_MODE (TREE_TYPE (field)), -+ TREE_TYPE (field), subclasses, -+ bit_offset); -+ if (!num) -+ return 0; -+ for (i = 0; i < num && i < words; i++) -+ classes[i] = merge_classes (subclasses[i], classes[i]); -+ } -+ } -+ break; - -- /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if -- TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */ -- if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags)) -- opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32); --#ifdef TARGET_BI_ARCH -- else -- { --#if TARGET_BI_ARCH == 1 -- /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64 -- is on and OPTION_MASK_ABI_X32 is off. We turn off -- OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by -- -mx32. */ -- if (TARGET_X32_P (opts->x_ix86_isa_flags)) -- opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64; --#else -- /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is -- on and OPTION_MASK_ABI_64 is off. We turn off -- OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by -- -m64 or OPTION_MASK_CODE16 is turned on by -m16. */ -- if (TARGET_LP64_P (opts->x_ix86_isa_flags) -- || TARGET_16BIT_P (opts->x_ix86_isa_flags)) -- opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32; --#endif -- if (TARGET_64BIT_P (opts->x_ix86_isa_flags) -- && TARGET_IAMCU_P (opts->x_target_flags)) -- sorry ("Intel MCU psABI isn%'t supported in %s mode", -- TARGET_X32_P (opts->x_ix86_isa_flags) ? "x32" : "64-bit"); -- } --#endif -+ default: -+ gcc_unreachable (); -+ } - -- if (TARGET_X32_P (opts->x_ix86_isa_flags)) -- { -- /* Always turn on OPTION_MASK_ISA_64BIT and turn off -- OPTION_MASK_ABI_64 for TARGET_X32. */ -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT; -- opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64; -- } -- else if (TARGET_16BIT_P (opts->x_ix86_isa_flags)) -- opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT -- | OPTION_MASK_ABI_X32 -- | OPTION_MASK_ABI_64); -- else if (TARGET_LP64_P (opts->x_ix86_isa_flags)) -- { -- /* Always turn on OPTION_MASK_ISA_64BIT and turn off -- OPTION_MASK_ABI_X32 for TARGET_LP64. */ -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT; -- opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32; -- } -+ if (words > 2) -+ { -+ /* When size > 16 bytes, if the first one isn't -+ X86_64_SSE_CLASS or any other ones aren't -+ X86_64_SSEUP_CLASS, everything should be passed in -+ memory. */ -+ if (classes[0] != X86_64_SSE_CLASS) -+ return 0; - --#ifdef SUBTARGET_OVERRIDE_OPTIONS -- SUBTARGET_OVERRIDE_OPTIONS; --#endif -+ for (i = 1; i < words; i++) -+ if (classes[i] != X86_64_SSEUP_CLASS) -+ return 0; -+ } - --#ifdef SUBSUBTARGET_OVERRIDE_OPTIONS -- SUBSUBTARGET_OVERRIDE_OPTIONS; --#endif -+ /* Final merger cleanup. */ -+ for (i = 0; i < words; i++) -+ { -+ /* If one class is MEMORY, everything should be passed in -+ memory. */ -+ if (classes[i] == X86_64_MEMORY_CLASS) -+ return 0; - -- /* -fPIC is the default for x86_64. */ -- if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags)) -- opts->x_flag_pic = 2; -+ /* The X86_64_SSEUP_CLASS should be always preceded by -+ X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */ -+ if (classes[i] == X86_64_SSEUP_CLASS -+ && classes[i - 1] != X86_64_SSE_CLASS -+ && classes[i - 1] != X86_64_SSEUP_CLASS) -+ { -+ /* The first one should never be X86_64_SSEUP_CLASS. */ -+ gcc_assert (i != 0); -+ classes[i] = X86_64_SSE_CLASS; -+ } - -- /* Need to check -mtune=generic first. */ -- if (opts->x_ix86_tune_string) -- { -- /* As special support for cross compilers we read -mtune=native -- as -mtune=generic. With native compilers we won't see the -- -mtune=native, as it was changed by the driver. */ -- if (!strcmp (opts->x_ix86_tune_string, "native")) -- { -- opts->x_ix86_tune_string = "generic"; -- } -- else if (!strcmp (opts->x_ix86_tune_string, "x86-64")) -- warning (OPT_Wdeprecated, -- main_args_p -- ? G_("%<-mtune=x86-64%> is deprecated; use %<-mtune=k8%> " -- "or %<-mtune=generic%> instead as appropriate") -- : G_("% is deprecated; use " -- "% or %" -- " instead as appropriate")); -- } -- else -- { -- if (opts->x_ix86_arch_string) -- opts->x_ix86_tune_string = opts->x_ix86_arch_string; -- if (!opts->x_ix86_tune_string) -- { -- opts->x_ix86_tune_string = processor_names[TARGET_CPU_DEFAULT]; -- ix86_tune_defaulted = 1; -- } -+ /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS, -+ everything should be passed in memory. */ -+ if (classes[i] == X86_64_X87UP_CLASS -+ && (classes[i - 1] != X86_64_X87_CLASS)) -+ { -+ static bool warned; - -- /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string -- or defaulted. We need to use a sensible tune option. */ -- if (!strcmp (opts->x_ix86_tune_string, "x86-64")) -- { -- opts->x_ix86_tune_string = "generic"; -+ /* The first one should never be X86_64_X87UP_CLASS. */ -+ gcc_assert (i != 0); -+ if (!warned && warn_psabi) -+ { -+ warned = true; -+ inform (input_location, -+ "the ABI of passing union with %" -+ " has changed in GCC 4.4"); -+ } -+ return 0; -+ } - } -+ return words; - } - -- if (opts->x_ix86_stringop_alg == rep_prefix_8_byte -- && !TARGET_64BIT_P (opts->x_ix86_isa_flags)) -+ /* Compute alignment needed. We align all types to natural boundaries with -+ exception of XFmode that is aligned to 64bits. */ -+ if (mode != VOIDmode && mode != BLKmode) - { -- /* rep; movq isn't available in 32-bit code. */ -- error ("%<-mstringop-strategy=rep_8byte%> not supported for 32-bit code"); -- opts->x_ix86_stringop_alg = no_stringop; -- } -- -- if (!opts->x_ix86_arch_string) -- opts->x_ix86_arch_string -- = TARGET_64BIT_P (opts->x_ix86_isa_flags) -- ? "x86-64" : SUBTARGET32_DEFAULT_CPU; -- else -- ix86_arch_specified = 1; -+ int mode_alignment = GET_MODE_BITSIZE (mode); - -- if (opts_set->x_ix86_pmode) -- { -- if ((TARGET_LP64_P (opts->x_ix86_isa_flags) -- && opts->x_ix86_pmode == PMODE_SI) -- || (!TARGET_64BIT_P (opts->x_ix86_isa_flags) -- && opts->x_ix86_pmode == PMODE_DI)) -- error ("address mode %qs not supported in the %s bit mode", -- TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long", -- TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32"); -+ if (mode == XFmode) -+ mode_alignment = 128; -+ else if (mode == XCmode) -+ mode_alignment = 256; -+ if (COMPLEX_MODE_P (mode)) -+ mode_alignment /= 2; -+ /* Misaligned fields are always returned in memory. */ -+ if (bit_offset % mode_alignment) -+ return 0; - } -- else -- opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags) -- ? PMODE_DI : PMODE_SI; -- -- if (!opts_set->x_ix86_abi) -- opts->x_ix86_abi = DEFAULT_ABI; -- -- if (opts->x_ix86_abi == MS_ABI && TARGET_X32_P (opts->x_ix86_isa_flags)) -- error ("%<-mabi=ms%> not supported with X32 ABI"); -- gcc_assert (opts->x_ix86_abi == SYSV_ABI || opts->x_ix86_abi == MS_ABI); -- -- const char *abi_name = opts->x_ix86_abi == MS_ABI ? "ms" : "sysv"; -- if ((opts->x_flag_sanitize & SANITIZE_USER_ADDRESS) -- && opts->x_ix86_abi != DEFAULT_ABI) -- error ("%<-mabi=%s%> not supported with %<-fsanitize=address%>", abi_name); -- if ((opts->x_flag_sanitize & SANITIZE_KERNEL_ADDRESS) -- && opts->x_ix86_abi != DEFAULT_ABI) -- error ("%<-mabi=%s%> not supported with %<-fsanitize=kernel-address%>", -- abi_name); -- if ((opts->x_flag_sanitize & SANITIZE_THREAD) -- && opts->x_ix86_abi != DEFAULT_ABI) -- error ("%<-mabi=%s%> not supported with %<-fsanitize=thread%>", abi_name); -- -- /* For targets using ms ABI enable ms-extensions, if not -- explicit turned off. For non-ms ABI we turn off this -- option. */ -- if (!opts_set->x_flag_ms_extensions) -- opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI); -- -- if (opts_set->x_ix86_cmodel) -- { -- switch (opts->x_ix86_cmodel) -- { -- case CM_SMALL: -- case CM_SMALL_PIC: -- if (opts->x_flag_pic) -- opts->x_ix86_cmodel = CM_SMALL_PIC; -- if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)) -- error ("code model %qs not supported in the %s bit mode", -- "small", "32"); -- break; - -- case CM_MEDIUM: -- case CM_MEDIUM_PIC: -- if (opts->x_flag_pic) -- opts->x_ix86_cmodel = CM_MEDIUM_PIC; -- if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)) -- error ("code model %qs not supported in the %s bit mode", -- "medium", "32"); -- else if (TARGET_X32_P (opts->x_ix86_isa_flags)) -- error ("code model %qs not supported in x32 mode", -- "medium"); -- break; -+ /* for V1xx modes, just use the base mode */ -+ if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode -+ && GET_MODE_UNIT_SIZE (mode) == bytes) -+ mode = GET_MODE_INNER (mode); - -- case CM_LARGE: -- case CM_LARGE_PIC: -- if (opts->x_flag_pic) -- opts->x_ix86_cmodel = CM_LARGE_PIC; -- if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)) -- error ("code model %qs not supported in the %s bit mode", -- "large", "32"); -- else if (TARGET_X32_P (opts->x_ix86_isa_flags)) -- error ("code model %qs not supported in x32 mode", -- "large"); -- break; -+ /* Classification of atomic types. */ -+ switch (mode) -+ { -+ case E_SDmode: -+ case E_DDmode: -+ classes[0] = X86_64_SSE_CLASS; -+ return 1; -+ case E_TDmode: -+ classes[0] = X86_64_SSE_CLASS; -+ classes[1] = X86_64_SSEUP_CLASS; -+ return 2; -+ case E_DImode: -+ case E_SImode: -+ case E_HImode: -+ case E_QImode: -+ case E_CSImode: -+ case E_CHImode: -+ case E_CQImode: -+ { -+ int size = bit_offset + (int) GET_MODE_BITSIZE (mode); - -- case CM_32: -- if (opts->x_flag_pic) -- error ("code model %s does not support PIC mode", "32"); -- if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) -- error ("code model %qs not supported in the %s bit mode", -- "32", "64"); -- break; -+ /* Analyze last 128 bits only. */ -+ size = (size - 1) & 0x7f; - -- case CM_KERNEL: -- if (opts->x_flag_pic) -- { -- error ("code model %s does not support PIC mode", "kernel"); -- opts->x_ix86_cmodel = CM_32; -- } -- if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)) -- error ("code model %qs not supported in the %s bit mode", -- "kernel", "32"); -- break; -- -- default: -- gcc_unreachable (); -- } -- } -- else -- { -- /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the -- use of rip-relative addressing. This eliminates fixups that -- would otherwise be needed if this object is to be placed in a -- DLL, and is essentially just as efficient as direct addressing. */ -- if (TARGET_64BIT_P (opts->x_ix86_isa_flags) -- && (TARGET_RDOS || TARGET_PECOFF)) -- opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1; -- else if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) -- opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL; -- else -- opts->x_ix86_cmodel = CM_32; -- } -- if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL) -- { -- error ("%<-masm=intel%> not supported in this configuration"); -- opts->x_ix86_asm_dialect = ASM_ATT; -- } -- if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0) -- != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0)) -- sorry ("%i-bit mode not compiled in", -- (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32); -- -- for (i = 0; i < pta_size; i++) -- if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name)) -- { -- if (!strcmp (opts->x_ix86_arch_string, "generic")) -+ if (size < 32) - { -- error (main_args_p -- ? G_("% CPU can be used only for %<-mtune=%> " -- "switch") -- : G_("% CPU can be used only for " -- "% attribute")); -- return false; -+ classes[0] = X86_64_INTEGERSI_CLASS; -+ return 1; - } -- else if (!strcmp (opts->x_ix86_arch_string, "intel")) -+ else if (size < 64) - { -- error (main_args_p -- ? G_("% CPU can be used only for %<-mtune=%> " -- "switch") -- : G_("% CPU can be used only for " -- "% attribute")); -- return false; -+ classes[0] = X86_64_INTEGER_CLASS; -+ return 1; - } -- -- if (TARGET_64BIT_P (opts->x_ix86_isa_flags) -- && !((processor_alias_table[i].flags & PTA_64BIT) != 0)) -+ else if (size < 64+32) - { -- error ("CPU you selected does not support x86-64 " -- "instruction set"); -- return false; -+ classes[0] = X86_64_INTEGER_CLASS; -+ classes[1] = X86_64_INTEGERSI_CLASS; -+ return 2; - } -- -- ix86_schedule = processor_alias_table[i].schedule; -- ix86_arch = processor_alias_table[i].processor; -- /* Default cpu tuning to the architecture. */ -- ix86_tune = ix86_arch; -- -- if (((processor_alias_table[i].flags & PTA_MMX) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX; -- if (((processor_alias_table[i].flags & PTA_3DNOW) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW; -- if (((processor_alias_table[i].flags & PTA_3DNOW_A) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A; -- if (((processor_alias_table[i].flags & PTA_SSE) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE; -- if (((processor_alias_table[i].flags & PTA_SSE2) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2; -- if (((processor_alias_table[i].flags & PTA_SSE3) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3; -- if (((processor_alias_table[i].flags & PTA_SSSE3) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3; -- if (((processor_alias_table[i].flags & PTA_SSE4_1) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1; -- if (((processor_alias_table[i].flags & PTA_SSE4_2) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2; -- if (((processor_alias_table[i].flags & PTA_AVX) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX; -- if (((processor_alias_table[i].flags & PTA_AVX2) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2; -- if (((processor_alias_table[i].flags & PTA_FMA) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA; -- if (((processor_alias_table[i].flags & PTA_SSE4A) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A; -- if (((processor_alias_table[i].flags & PTA_FMA4) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4; -- if (((processor_alias_table[i].flags & PTA_XOP) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP; -- if (((processor_alias_table[i].flags & PTA_LWP) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP; -- if (((processor_alias_table[i].flags & PTA_ABM) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM; -- if (((processor_alias_table[i].flags & PTA_BMI) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI; -- if (((processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT; -- if (((processor_alias_table[i].flags & PTA_TBM) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM; -- if (((processor_alias_table[i].flags & PTA_BMI2) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2; -- if (((processor_alias_table[i].flags & PTA_CX16) != 0) -- && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_CX16)) -- opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_CX16; -- if (((processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT; -- if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags) -- && ((processor_alias_table[i].flags & PTA_NO_SAHF) != 0)) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF; -- if (((processor_alias_table[i].flags & PTA_MOVBE) != 0) -- && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_MOVBE)) -- opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_MOVBE; -- if (((processor_alias_table[i].flags & PTA_AES) != 0) -- && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES)) -- ix86_isa_flags |= OPTION_MASK_ISA_AES; -- if (((processor_alias_table[i].flags & PTA_SHA) != 0) -- && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA)) -- ix86_isa_flags |= OPTION_MASK_ISA_SHA; -- if (((processor_alias_table[i].flags & PTA_PCLMUL) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL; -- if (((processor_alias_table[i].flags & PTA_FSGSBASE) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE; -- if (((processor_alias_table[i].flags & PTA_RDRND) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND; -- if (((processor_alias_table[i].flags & PTA_F16C) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C; -- if (((processor_alias_table[i].flags & PTA_RTM) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM; -- if (((processor_alias_table[i].flags & PTA_HLE) != 0) -- && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_HLE)) -- opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_HLE; -- if (((processor_alias_table[i].flags & PTA_PRFCHW) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW; -- if (((processor_alias_table[i].flags & PTA_RDSEED) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED; -- if (((processor_alias_table[i].flags & PTA_ADX) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX; -- if (((processor_alias_table[i].flags & PTA_FXSR) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR; -- if (((processor_alias_table[i].flags & PTA_XSAVE) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE; -- if (((processor_alias_table[i].flags & PTA_XSAVEOPT) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT; -- if (((processor_alias_table[i].flags & PTA_AVX512F) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F; -- if (((processor_alias_table[i].flags & PTA_AVX512ER) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER; -- if (((processor_alias_table[i].flags & PTA_AVX512PF) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF; -- if (((processor_alias_table[i].flags & PTA_AVX512CD) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD; -- if (((processor_alias_table[i].flags & PTA_PREFETCHWT1) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1; -- if (((processor_alias_table[i].flags & PTA_CLWB) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB; -- if (((processor_alias_table[i].flags & PTA_CLFLUSHOPT) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT; -- if (((processor_alias_table[i].flags & PTA_CLZERO) != 0) -- && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_CLZERO)) -- opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_CLZERO; -- if (((processor_alias_table[i].flags & PTA_XSAVEC) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC; -- if (((processor_alias_table[i].flags & PTA_XSAVES) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES; -- if (((processor_alias_table[i].flags & PTA_AVX512DQ) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ; -- if (((processor_alias_table[i].flags & PTA_AVX512BW) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW; -- if (((processor_alias_table[i].flags & PTA_AVX512VL) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL; -- if (((processor_alias_table[i].flags & PTA_AVX512VBMI) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI; -- if (((processor_alias_table[i].flags & PTA_AVX512IFMA) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA; -- if (((processor_alias_table[i].flags & PTA_AVX512VNNI) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VNNI)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VNNI; -- if (((processor_alias_table[i].flags & PTA_GFNI) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_GFNI)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_GFNI; -- if (((processor_alias_table[i].flags & PTA_AVX512VBMI2) != 0) -- && !(opts->x_ix86_isa_flags_explicit -- & OPTION_MASK_ISA_AVX512VBMI2)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI2; -- if (((processor_alias_table[i].flags & PTA_VPCLMULQDQ) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_VPCLMULQDQ)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_VPCLMULQDQ; -- if (((processor_alias_table[i].flags & PTA_AVX512BITALG) != 0) -- && !(opts->x_ix86_isa_flags_explicit -- & OPTION_MASK_ISA_AVX512BITALG)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BITALG; -- -- if (((processor_alias_table[i].flags & PTA_AVX5124VNNIW) != 0) -- && !(opts->x_ix86_isa_flags2_explicit -- & OPTION_MASK_ISA_AVX5124VNNIW)) -- opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_AVX5124VNNIW; -- if (((processor_alias_table[i].flags & PTA_AVX5124FMAPS) != 0) -- && !(opts->x_ix86_isa_flags2_explicit -- & OPTION_MASK_ISA_AVX5124FMAPS)) -- opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_AVX5124FMAPS; -- if (((processor_alias_table[i].flags & PTA_AVX512VPOPCNTDQ) != 0) -- && !(opts->x_ix86_isa_flags_explicit -- & OPTION_MASK_ISA_AVX512VPOPCNTDQ)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VPOPCNTDQ; -- if (((processor_alias_table[i].flags & PTA_SGX) != 0) -- && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_SGX)) -- opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_SGX; -- if (((processor_alias_table[i].flags & PTA_VAES) != 0) -- && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_VAES)) -- opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_VAES; -- if (((processor_alias_table[i].flags & PTA_RDPID) != 0) -- && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_RDPID)) -- opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_RDPID; -- if (((processor_alias_table[i].flags & PTA_PCONFIG) != 0) -- && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_PCONFIG)) -- opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_PCONFIG; -- if (((processor_alias_table[i].flags & PTA_WBNOINVD) != 0) -- && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_WBNOINVD)) -- opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_WBNOINVD; -- if (((processor_alias_table[i].flags & PTA_PTWRITE) != 0) -- && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_PTWRITE)) -- opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_PTWRITE; -- -- if ((processor_alias_table[i].flags -- & (PTA_PREFETCH_SSE | PTA_SSE)) != 0) -- x86_prefetch_sse = true; -- if (((processor_alias_table[i].flags & PTA_MWAITX) != 0) -- && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_MWAITX)) -- opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_MWAITX; -- if (((processor_alias_table[i].flags & PTA_PKU) != 0) -- && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PKU)) -- opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PKU; -- -- /* Don't enable x87 instructions if only -- general registers are allowed. */ -- if (!(opts_set->x_ix86_target_flags & OPTION_MASK_GENERAL_REGS_ONLY) -- && !(opts_set->x_target_flags & MASK_80387)) -+ else if (size < 64+64) - { -- if (((processor_alias_table[i].flags & PTA_NO_80387) != 0)) -- opts->x_target_flags &= ~MASK_80387; -- else -- opts->x_target_flags |= MASK_80387; -+ classes[0] = classes[1] = X86_64_INTEGER_CLASS; -+ return 2; - } -- break; -+ else -+ gcc_unreachable (); - } -+ case E_CDImode: -+ case E_TImode: -+ classes[0] = classes[1] = X86_64_INTEGER_CLASS; -+ return 2; -+ case E_COImode: -+ case E_OImode: -+ /* OImode shouldn't be used directly. */ -+ gcc_unreachable (); -+ case E_CTImode: -+ return 0; -+ case E_SFmode: -+ if (!(bit_offset % 64)) -+ classes[0] = X86_64_SSESF_CLASS; -+ else -+ classes[0] = X86_64_SSE_CLASS; -+ return 1; -+ case E_DFmode: -+ classes[0] = X86_64_SSEDF_CLASS; -+ return 1; -+ case E_XFmode: -+ classes[0] = X86_64_X87_CLASS; -+ classes[1] = X86_64_X87UP_CLASS; -+ return 2; -+ case E_TFmode: -+ classes[0] = X86_64_SSE_CLASS; -+ classes[1] = X86_64_SSEUP_CLASS; -+ return 2; -+ case E_SCmode: -+ classes[0] = X86_64_SSE_CLASS; -+ if (!(bit_offset % 64)) -+ return 1; -+ else -+ { -+ static bool warned; - -- if (i == pta_size) -- { -- error (main_args_p -- ? G_("bad value (%qs) for %<-march=%> switch") -- : G_("bad value (%qs) for % attribute"), -- opts->x_ix86_arch_string); -+ if (!warned && warn_psabi) -+ { -+ warned = true; -+ inform (input_location, -+ "the ABI of passing structure with %" -+ " member has changed in GCC 4.4"); -+ } -+ classes[1] = X86_64_SSESF_CLASS; -+ return 2; -+ } -+ case E_DCmode: -+ classes[0] = X86_64_SSEDF_CLASS; -+ classes[1] = X86_64_SSEDF_CLASS; -+ return 2; -+ case E_XCmode: -+ classes[0] = X86_64_COMPLEX_X87_CLASS; -+ return 1; -+ case E_TCmode: -+ /* This modes is larger than 16 bytes. */ -+ return 0; -+ case E_V8SFmode: -+ case E_V8SImode: -+ case E_V32QImode: -+ case E_V16HImode: -+ case E_V4DFmode: -+ case E_V4DImode: -+ classes[0] = X86_64_SSE_CLASS; -+ classes[1] = X86_64_SSEUP_CLASS; -+ classes[2] = X86_64_SSEUP_CLASS; -+ classes[3] = X86_64_SSEUP_CLASS; -+ return 4; -+ case E_V8DFmode: -+ case E_V16SFmode: -+ case E_V8DImode: -+ case E_V16SImode: -+ case E_V32HImode: -+ case E_V64QImode: -+ classes[0] = X86_64_SSE_CLASS; -+ classes[1] = X86_64_SSEUP_CLASS; -+ classes[2] = X86_64_SSEUP_CLASS; -+ classes[3] = X86_64_SSEUP_CLASS; -+ classes[4] = X86_64_SSEUP_CLASS; -+ classes[5] = X86_64_SSEUP_CLASS; -+ classes[6] = X86_64_SSEUP_CLASS; -+ classes[7] = X86_64_SSEUP_CLASS; -+ return 8; -+ case E_V4SFmode: -+ case E_V4SImode: -+ case E_V16QImode: -+ case E_V8HImode: -+ case E_V2DFmode: -+ case E_V2DImode: -+ classes[0] = X86_64_SSE_CLASS; -+ classes[1] = X86_64_SSEUP_CLASS; -+ return 2; -+ case E_V1TImode: -+ case E_V1DImode: -+ case E_V2SFmode: -+ case E_V2SImode: -+ case E_V4HImode: -+ case E_V8QImode: -+ classes[0] = X86_64_SSE_CLASS; -+ return 1; -+ case E_BLKmode: -+ case E_VOIDmode: -+ return 0; -+ default: -+ gcc_assert (VECTOR_MODE_P (mode)); - -- auto_vec candidates; -- for (i = 0; i < pta_size; i++) -- if (strcmp (processor_alias_table[i].name, "generic") -- && strcmp (processor_alias_table[i].name, "intel") -- && (!TARGET_64BIT_P (opts->x_ix86_isa_flags) -- || ((processor_alias_table[i].flags & PTA_64BIT) != 0))) -- candidates.safe_push (processor_alias_table[i].name); -+ if (bytes > 16) -+ return 0; - --#ifdef HAVE_LOCAL_CPU_DETECT -- /* Add also "native" as possible value. */ -- candidates.safe_push ("native"); --#endif -+ gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT); - -- char *s; -- const char *hint -- = candidates_list_and_hint (opts->x_ix86_arch_string, s, candidates); -- if (hint) -- inform (input_location, -- main_args_p -- ? G_("valid arguments to %<-march=%> switch are: " -- "%s; did you mean %qs?") -- : G_("valid arguments to % attribute are: " -- "%s; did you mean %qs?"), s, hint); -+ if (bit_offset + GET_MODE_BITSIZE (mode) <= 32) -+ classes[0] = X86_64_INTEGERSI_CLASS; - else -- inform (input_location, -- main_args_p -- ? G_("valid arguments to %<-march=%> switch are: %s") -- : G_("valid arguments to % attribute " -- "are: %s"), s); -- XDELETEVEC (s); -+ classes[0] = X86_64_INTEGER_CLASS; -+ classes[1] = X86_64_INTEGER_CLASS; -+ return 1 + (bytes > 8); - } -+} -+ -+/* Examine the argument and return set number of register required in each -+ class. Return true iff parameter should be passed in memory. */ -+ -+static bool -+examine_argument (machine_mode mode, const_tree type, int in_return, -+ int *int_nregs, int *sse_nregs) -+{ -+ enum x86_64_reg_class regclass[MAX_CLASSES]; -+ int n = classify_argument (mode, type, regclass, 0); - -- ix86_arch_mask = HOST_WIDE_INT_1U << ix86_arch; -- for (i = 0; i < X86_ARCH_LAST; ++i) -- ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask); -+ *int_nregs = 0; -+ *sse_nregs = 0; - -- for (i = 0; i < pta_size; i++) -- if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name)) -+ if (!n) -+ return true; -+ for (n--; n >= 0; n--) -+ switch (regclass[n]) - { -- ix86_schedule = processor_alias_table[i].schedule; -- ix86_tune = processor_alias_table[i].processor; -- if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) -- { -- if (!((processor_alias_table[i].flags & PTA_64BIT) != 0)) -- { -- if (ix86_tune_defaulted) -- { -- opts->x_ix86_tune_string = "x86-64"; -- for (i = 0; i < pta_size; i++) -- if (! strcmp (opts->x_ix86_tune_string, -- processor_alias_table[i].name)) -- break; -- ix86_schedule = processor_alias_table[i].schedule; -- ix86_tune = processor_alias_table[i].processor; -- } -- else -- error ("CPU you selected does not support x86-64 " -- "instruction set"); -- } -- } -- /* Intel CPUs have always interpreted SSE prefetch instructions as -- NOPs; so, we can enable SSE prefetch instructions even when -- -mtune (rather than -march) points us to a processor that has them. -- However, the VIA C3 gives a SIGILL, so we only do that for i686 and -- higher processors. */ -- if (TARGET_CMOV -- && ((processor_alias_table[i].flags -- & (PTA_PREFETCH_SSE | PTA_SSE)) != 0)) -- x86_prefetch_sse = true; -+ case X86_64_INTEGER_CLASS: -+ case X86_64_INTEGERSI_CLASS: -+ (*int_nregs)++; - break; -+ case X86_64_SSE_CLASS: -+ case X86_64_SSESF_CLASS: -+ case X86_64_SSEDF_CLASS: -+ (*sse_nregs)++; -+ break; -+ case X86_64_NO_CLASS: -+ case X86_64_SSEUP_CLASS: -+ break; -+ case X86_64_X87_CLASS: -+ case X86_64_X87UP_CLASS: -+ case X86_64_COMPLEX_X87_CLASS: -+ if (!in_return) -+ return true; -+ break; -+ case X86_64_MEMORY_CLASS: -+ gcc_unreachable (); - } - -- if (ix86_tune_specified && i == pta_size) -- { -- error (main_args_p -- ? G_("bad value (%qs) for %<-mtune=%> switch") -- : G_("bad value (%qs) for % attribute"), -- opts->x_ix86_tune_string); -- -- auto_vec candidates; -- for (i = 0; i < pta_size; i++) -- if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) -- || ((processor_alias_table[i].flags & PTA_64BIT) != 0)) -- candidates.safe_push (processor_alias_table[i].name); -- --#ifdef HAVE_LOCAL_CPU_DETECT -- /* Add also "native" as possible value. */ -- candidates.safe_push ("native"); --#endif -+ return false; -+} - -- char *s; -- const char *hint -- = candidates_list_and_hint (opts->x_ix86_tune_string, s, candidates); -- if (hint) -- inform (input_location, -- main_args_p -- ? G_("valid arguments to %<-mtune=%> switch are: " -- "%s; did you mean %qs?") -- : G_("valid arguments to % attribute are: " -- "%s; did you mean %qs?"), s, hint); -- else -- inform (input_location, -- main_args_p -- ? G_("valid arguments to %<-mtune=%> switch are: %s") -- : G_("valid arguments to % attribute " -- "are: %s"), s); -- XDELETEVEC (s); -- } -+/* Construct container for the argument used by GCC interface. See -+ FUNCTION_ARG for the detailed description. */ - -- set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes); -+static rtx -+construct_container (machine_mode mode, machine_mode orig_mode, -+ const_tree type, int in_return, int nintregs, int nsseregs, -+ const int *intreg, int sse_regno) -+{ -+ /* The following variables hold the static issued_error state. */ -+ static bool issued_sse_arg_error; -+ static bool issued_sse_ret_error; -+ static bool issued_x87_ret_error; - --#ifndef USE_IX86_FRAME_POINTER --#define USE_IX86_FRAME_POINTER 0 --#endif -+ machine_mode tmpmode; -+ int bytes -+ = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); -+ enum x86_64_reg_class regclass[MAX_CLASSES]; -+ int n; -+ int i; -+ int nexps = 0; -+ int needed_sseregs, needed_intregs; -+ rtx exp[MAX_CLASSES]; -+ rtx ret; - --#ifndef USE_X86_64_FRAME_POINTER --#define USE_X86_64_FRAME_POINTER 0 --#endif -+ n = classify_argument (mode, type, regclass, 0); -+ if (!n) -+ return NULL; -+ if (examine_argument (mode, type, in_return, &needed_intregs, -+ &needed_sseregs)) -+ return NULL; -+ if (needed_intregs > nintregs || needed_sseregs > nsseregs) -+ return NULL; - -- /* Set the default values for switches whose default depends on TARGET_64BIT -- in case they weren't overwritten by command line options. */ -- if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) -- { -- if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer) -- opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER; -- if (opts->x_flag_asynchronous_unwind_tables -- && !opts_set->x_flag_unwind_tables -- && TARGET_64BIT_MS_ABI) -- opts->x_flag_unwind_tables = 1; -- if (opts->x_flag_asynchronous_unwind_tables == 2) -- opts->x_flag_unwind_tables -- = opts->x_flag_asynchronous_unwind_tables = 1; -- if (opts->x_flag_pcc_struct_return == 2) -- opts->x_flag_pcc_struct_return = 0; -- } -- else -+ /* We allowed the user to turn off SSE for kernel mode. Don't crash if -+ some less clueful developer tries to use floating-point anyway. */ -+ if (needed_sseregs && !TARGET_SSE) - { -- if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer) -- opts->x_flag_omit_frame_pointer -- = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size); -- if (opts->x_flag_asynchronous_unwind_tables == 2) -- opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER; -- if (opts->x_flag_pcc_struct_return == 2) -- { -- /* Intel MCU psABI specifies that -freg-struct-return should -- be on. Instead of setting DEFAULT_PCC_STRUCT_RETURN to 1, -- we check -miamcu so that -freg-struct-return is always -- turned on if -miamcu is used. */ -- if (TARGET_IAMCU_P (opts->x_target_flags)) -- opts->x_flag_pcc_struct_return = 0; -- else -- opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN; -+ if (in_return) -+ { -+ if (!issued_sse_ret_error) -+ { -+ error ("SSE register return with SSE disabled"); -+ issued_sse_ret_error = true; -+ } - } -- } -- -- ix86_tune_cost = processor_cost_table[ix86_tune]; -- /* TODO: ix86_cost should be chosen at instruction or function granuality -- so for cold code we use size_cost even in !optimize_size compilation. */ -- if (opts->x_optimize_size) -- ix86_cost = &ix86_size_cost; -- else -- ix86_cost = ix86_tune_cost; -- -- /* Arrange to set up i386_stack_locals for all functions. */ -- init_machine_status = ix86_init_machine_status; -- -- /* Validate -mregparm= value. */ -- if (opts_set->x_ix86_regparm) -- { -- if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) -- warning (0, "%<-mregparm%> is ignored in 64-bit mode"); -- else if (TARGET_IAMCU_P (opts->x_target_flags)) -- warning (0, "%<-mregparm%> is ignored for Intel MCU psABI"); -- if (opts->x_ix86_regparm > REGPARM_MAX) -+ else if (!issued_sse_arg_error) - { -- error ("%<-mregparm=%d%> is not between 0 and %d", -- opts->x_ix86_regparm, REGPARM_MAX); -- opts->x_ix86_regparm = 0; -+ error ("SSE register argument with SSE disabled"); -+ issued_sse_arg_error = true; - } -+ return NULL; - } -- if (TARGET_IAMCU_P (opts->x_target_flags) -- || TARGET_64BIT_P (opts->x_ix86_isa_flags)) -- opts->x_ix86_regparm = REGPARM_MAX; -- -- /* Default align_* from the processor table. */ -- ix86_default_align (opts); -- -- /* Provide default for -mbranch-cost= value. */ -- if (!opts_set->x_ix86_branch_cost) -- opts->x_ix86_branch_cost = ix86_tune_cost->branch_cost; - -- if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) -- { -- opts->x_target_flags -- |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags; -- -- if (!ix86_arch_specified) -- opts->x_ix86_isa_flags -- |= TARGET_SUBTARGET64_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit; -- -- if (TARGET_RTD_P (opts->x_target_flags)) -- warning (0, -- main_args_p -- ? G_("%<-mrtd%> is ignored in 64bit mode") -- : G_("% is ignored in 64bit mode")); -- } -- else -- { -- opts->x_target_flags -- |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags; -- -- if (!ix86_arch_specified) -- opts->x_ix86_isa_flags -- |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit; -- -- /* i386 ABI does not specify red zone. It still makes sense to use it -- when programmer takes care to stack from being destroyed. */ -- if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE)) -- opts->x_target_flags |= MASK_NO_RED_ZONE; -- } -- -- /* Keep nonleaf frame pointers. */ -- if (opts->x_flag_omit_frame_pointer) -- opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER; -- else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags)) -- opts->x_flag_omit_frame_pointer = 1; -- -- /* If we're doing fast math, we don't care about comparison order -- wrt NaNs. This lets us use a shorter comparison sequence. */ -- if (opts->x_flag_finite_math_only) -- opts->x_target_flags &= ~MASK_IEEE_FP; -- -- /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387, -- since the insns won't need emulation. */ -- if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387]) -- opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387; -- -- /* Likewise, if the target doesn't have a 387, or we've specified -- software floating point, don't use 387 inline intrinsics. */ -- if (!TARGET_80387_P (opts->x_target_flags)) -- opts->x_target_flags |= MASK_NO_FANCY_MATH_387; -- -- /* Turn on MMX builtins for -msse. */ -- if (TARGET_SSE_P (opts->x_ix86_isa_flags)) -- opts->x_ix86_isa_flags -- |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit; -- -- /* Enable SSE prefetch. */ -- if (TARGET_SSE_P (opts->x_ix86_isa_flags) -- || (TARGET_PRFCHW_P (opts->x_ix86_isa_flags) -- && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)) -- || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags)) -- x86_prefetch_sse = true; -- -- /* Enable popcnt instruction for -msse4.2 or -mabm. */ -- if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags) -- || TARGET_ABM_P (opts->x_ix86_isa_flags)) -- opts->x_ix86_isa_flags -- |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit; -- -- /* Enable lzcnt instruction for -mabm. */ -- if (TARGET_ABM_P(opts->x_ix86_isa_flags)) -- opts->x_ix86_isa_flags -- |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit; -- -- /* Disable BMI, BMI2 and TBM instructions for -m16. */ -- if (TARGET_16BIT_P(opts->x_ix86_isa_flags)) -- opts->x_ix86_isa_flags -- &= ~((OPTION_MASK_ISA_BMI | OPTION_MASK_ISA_BMI2 | OPTION_MASK_ISA_TBM) -- & ~opts->x_ix86_isa_flags_explicit); -- -- /* Validate -mpreferred-stack-boundary= value or default it to -- PREFERRED_STACK_BOUNDARY_DEFAULT. */ -- ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT; -- if (opts_set->x_ix86_preferred_stack_boundary_arg) -- { -- int min = TARGET_64BIT_P (opts->x_ix86_isa_flags)? 3 : 2; -- int max = TARGET_SEH ? 4 : 12; -- -- if (opts->x_ix86_preferred_stack_boundary_arg < min -- || opts->x_ix86_preferred_stack_boundary_arg > max) -- { -- if (min == max) -- error ("%<-mpreferred-stack-boundary%> is not supported " -- "for this target"); -- else -- error ("%<-mpreferred-stack-boundary=%d%> is not between %d and %d", -- opts->x_ix86_preferred_stack_boundary_arg, min, max); -- } -- else -- ix86_preferred_stack_boundary -- = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT; -- } -- -- /* Set the default value for -mstackrealign. */ -- if (!opts_set->x_ix86_force_align_arg_pointer) -- opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT; -- -- ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY; -- -- /* Validate -mincoming-stack-boundary= value or default it to -- MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */ -- ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary; -- if (opts_set->x_ix86_incoming_stack_boundary_arg) -- { -- int min = TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 3 : 2; -- -- if (opts->x_ix86_incoming_stack_boundary_arg < min -- || opts->x_ix86_incoming_stack_boundary_arg > 12) -- error ("%<-mincoming-stack-boundary=%d%> is not between %d and 12", -- opts->x_ix86_incoming_stack_boundary_arg, min); -- else -- { -- ix86_user_incoming_stack_boundary -- = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT; -- ix86_incoming_stack_boundary -- = ix86_user_incoming_stack_boundary; -- } -- } -- --#ifndef NO_PROFILE_COUNTERS -- if (flag_nop_mcount) -- error ("%<-mnop-mcount%> is not compatible with this target"); --#endif -- if (flag_nop_mcount && flag_pic) -- error ("%<-mnop-mcount%> is not implemented for %<-fPIC%>"); -- -- /* Accept -msseregparm only if at least SSE support is enabled. */ -- if (TARGET_SSEREGPARM_P (opts->x_target_flags) -- && ! TARGET_SSE_P (opts->x_ix86_isa_flags)) -- error (main_args_p -- ? G_("%<-msseregparm%> used without SSE enabled") -- : G_("% used without SSE enabled")); -- -- if (opts_set->x_ix86_fpmath) -- { -- if (opts->x_ix86_fpmath & FPMATH_SSE) -+ /* Likewise, error if the ABI requires us to return values in the -+ x87 registers and the user specified -mno-80387. */ -+ if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return) -+ for (i = 0; i < n; i++) -+ if (regclass[i] == X86_64_X87_CLASS -+ || regclass[i] == X86_64_X87UP_CLASS -+ || regclass[i] == X86_64_COMPLEX_X87_CLASS) - { -- if (!TARGET_SSE_P (opts->x_ix86_isa_flags)) -- { -- if (TARGET_80387_P (opts->x_target_flags)) -- { -- warning (0, "SSE instruction set disabled, using 387 arithmetics"); -- opts->x_ix86_fpmath = FPMATH_387; -- } -- } -- else if ((opts->x_ix86_fpmath & FPMATH_387) -- && !TARGET_80387_P (opts->x_target_flags)) -+ if (!issued_x87_ret_error) - { -- warning (0, "387 instruction set disabled, using SSE arithmetics"); -- opts->x_ix86_fpmath = FPMATH_SSE; -+ error ("x87 register return with x87 disabled"); -+ issued_x87_ret_error = true; - } -+ return NULL; - } -- } -- /* For all chips supporting SSE2, -mfpmath=sse performs better than -- fpmath=387. The second is however default at many targets since the -- extra 80bit precision of temporaries is considered to be part of ABI. -- Overwrite the default at least for -ffast-math. -- TODO: -mfpmath=both seems to produce same performing code with bit -- smaller binaries. It is however not clear if register allocation is -- ready for this setting. -- Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE -- codegen. We may switch to 387 with -ffast-math for size optimized -- functions. */ -- else if (fast_math_flags_set_p (&global_options) -- && TARGET_SSE2_P (opts->x_ix86_isa_flags)) -- opts->x_ix86_fpmath = FPMATH_SSE; -- else -- opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags); - -- /* Use external vectorized library in vectorizing intrinsics. */ -- if (opts_set->x_ix86_veclibabi_type) -- switch (opts->x_ix86_veclibabi_type) -+ /* First construct simple cases. Avoid SCmode, since we want to use -+ single register to pass this type. */ -+ if (n == 1 && mode != SCmode) -+ switch (regclass[0]) - { -- case ix86_veclibabi_type_svml: -- ix86_veclib_handler = ix86_veclibabi_svml; -- break; -- -- case ix86_veclibabi_type_acml: -- ix86_veclib_handler = ix86_veclibabi_acml; -+ case X86_64_INTEGER_CLASS: -+ case X86_64_INTEGERSI_CLASS: -+ return gen_rtx_REG (mode, intreg[0]); -+ case X86_64_SSE_CLASS: -+ case X86_64_SSESF_CLASS: -+ case X86_64_SSEDF_CLASS: -+ if (mode != BLKmode) -+ return gen_reg_or_parallel (mode, orig_mode, -+ GET_SSE_REGNO (sse_regno)); - break; -- -+ case X86_64_X87_CLASS: -+ case X86_64_COMPLEX_X87_CLASS: -+ return gen_rtx_REG (mode, FIRST_STACK_REG); -+ case X86_64_NO_CLASS: -+ /* Zero sized array, struct or class. */ -+ return NULL; - default: - gcc_unreachable (); - } -+ if (n == 2 -+ && regclass[0] == X86_64_SSE_CLASS -+ && regclass[1] == X86_64_SSEUP_CLASS -+ && mode != BLKmode) -+ return gen_reg_or_parallel (mode, orig_mode, -+ GET_SSE_REGNO (sse_regno)); -+ if (n == 4 -+ && regclass[0] == X86_64_SSE_CLASS -+ && regclass[1] == X86_64_SSEUP_CLASS -+ && regclass[2] == X86_64_SSEUP_CLASS -+ && regclass[3] == X86_64_SSEUP_CLASS -+ && mode != BLKmode) -+ return gen_reg_or_parallel (mode, orig_mode, -+ GET_SSE_REGNO (sse_regno)); -+ if (n == 8 -+ && regclass[0] == X86_64_SSE_CLASS -+ && regclass[1] == X86_64_SSEUP_CLASS -+ && regclass[2] == X86_64_SSEUP_CLASS -+ && regclass[3] == X86_64_SSEUP_CLASS -+ && regclass[4] == X86_64_SSEUP_CLASS -+ && regclass[5] == X86_64_SSEUP_CLASS -+ && regclass[6] == X86_64_SSEUP_CLASS -+ && regclass[7] == X86_64_SSEUP_CLASS -+ && mode != BLKmode) -+ return gen_reg_or_parallel (mode, orig_mode, -+ GET_SSE_REGNO (sse_regno)); -+ if (n == 2 -+ && regclass[0] == X86_64_X87_CLASS -+ && regclass[1] == X86_64_X87UP_CLASS) -+ return gen_rtx_REG (XFmode, FIRST_STACK_REG); - -- if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS] -- && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)) -- opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; -- -- /* If stack probes are required, the space used for large function -- arguments on the stack must also be probed, so enable -- -maccumulate-outgoing-args so this happens in the prologue. */ -- if (TARGET_STACK_PROBE_P (opts->x_target_flags) -- && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)) -- { -- if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS) -- warning (0, -- main_args_p -- ? G_("stack probing requires %<-maccumulate-outgoing-args%> " -- "for correctness") -- : G_("stack probing requires " -- "% for " -- "correctness")); -- opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; -- } -- -- /* Stack realignment without -maccumulate-outgoing-args requires %ebp, -- so enable -maccumulate-outgoing-args when %ebp is fixed. */ -- if (fixed_regs[BP_REG] -- && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)) -- { -- if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS) -- warning (0, -- main_args_p -- ? G_("fixed ebp register requires " -- "%<-maccumulate-outgoing-args%>") -- : G_("fixed ebp register requires " -- "%")); -- opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; -- } -- -- /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */ -- { -- char *p; -- ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0); -- p = strchr (internal_label_prefix, 'X'); -- internal_label_prefix_len = p - internal_label_prefix; -- *p = '\0'; -- } -- -- /* When scheduling description is not available, disable scheduler pass -- so it won't slow down the compilation and make x87 code slower. */ -- if (!TARGET_SCHEDULE) -- opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0; -- -- maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, -- ix86_tune_cost->simultaneous_prefetches, -- opts->x_param_values, -- opts_set->x_param_values); -- maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, -- ix86_tune_cost->prefetch_block, -- opts->x_param_values, -- opts_set->x_param_values); -- maybe_set_param_value (PARAM_L1_CACHE_SIZE, -- ix86_tune_cost->l1_cache_size, -- opts->x_param_values, -- opts_set->x_param_values); -- maybe_set_param_value (PARAM_L2_CACHE_SIZE, -- ix86_tune_cost->l2_cache_size, -- opts->x_param_values, -- opts_set->x_param_values); -- -- /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */ -- if (opts->x_flag_prefetch_loop_arrays < 0 -- && HAVE_prefetch -- && (opts->x_optimize >= 3 || opts->x_flag_profile_use) -- && !opts->x_optimize_size -- && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL) -- opts->x_flag_prefetch_loop_arrays = 1; -- -- /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0) -- can be opts->x_optimized to ap = __builtin_next_arg (0). */ -- if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack) -- targetm.expand_builtin_va_start = NULL; -- -- if (TARGET_64BIT_P (opts->x_ix86_isa_flags)) -- { -- ix86_gen_leave = gen_leave_rex64; -- if (Pmode == DImode) -- { -- ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di; -- ix86_gen_tls_local_dynamic_base_64 -- = gen_tls_local_dynamic_base_64_di; -- } -- else -+ if (n == 2 -+ && regclass[0] == X86_64_INTEGER_CLASS -+ && regclass[1] == X86_64_INTEGER_CLASS -+ && (mode == CDImode || mode == TImode || mode == BLKmode) -+ && intreg[0] + 1 == intreg[1]) -+ { -+ if (mode == BLKmode) - { -- ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si; -- ix86_gen_tls_local_dynamic_base_64 -- = gen_tls_local_dynamic_base_64_si; -+ /* Use TImode for BLKmode values in 2 integer registers. */ -+ exp[0] = gen_rtx_EXPR_LIST (VOIDmode, -+ gen_rtx_REG (TImode, intreg[0]), -+ GEN_INT (0)); -+ ret = gen_rtx_PARALLEL (mode, rtvec_alloc (1)); -+ XVECEXP (ret, 0, 0) = exp[0]; -+ return ret; - } -+ else -+ return gen_rtx_REG (mode, intreg[0]); - } -- else -- ix86_gen_leave = gen_leave; -- -- if (Pmode == DImode) -- { -- ix86_gen_add3 = gen_adddi3; -- ix86_gen_sub3 = gen_subdi3; -- ix86_gen_sub3_carry = gen_subdi3_carry; -- ix86_gen_one_cmpl2 = gen_one_cmpldi2; -- ix86_gen_andsp = gen_anddi3; -- ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di; -- ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi; -- ix86_gen_probe_stack_range = gen_probe_stack_rangedi; -- ix86_gen_monitor = gen_sse3_monitor_di; -- ix86_gen_monitorx = gen_monitorx_di; -- ix86_gen_clzero = gen_clzero_di; -- } -- else -- { -- ix86_gen_add3 = gen_addsi3; -- ix86_gen_sub3 = gen_subsi3; -- ix86_gen_sub3_carry = gen_subsi3_carry; -- ix86_gen_one_cmpl2 = gen_one_cmplsi2; -- ix86_gen_andsp = gen_andsi3; -- ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si; -- ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi; -- ix86_gen_probe_stack_range = gen_probe_stack_rangesi; -- ix86_gen_monitor = gen_sse3_monitor_si; -- ix86_gen_monitorx = gen_monitorx_si; -- ix86_gen_clzero = gen_clzero_si; -- } -- --#ifdef USE_IX86_CLD -- /* Use -mcld by default for 32-bit code if configured with --enable-cld. */ -- if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)) -- opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags; --#endif - -- /* Set the default value for -mfentry. */ -- if (!opts_set->x_flag_fentry) -- opts->x_flag_fentry = TARGET_SEH; -- else -+ /* Otherwise figure out the entries of the PARALLEL. */ -+ for (i = 0; i < n; i++) - { -- if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic -- && opts->x_flag_fentry) -- sorry ("%<-mfentry%> isn%'t supported for 32-bit in combination " -- "with %<-fpic%>"); -- else if (TARGET_SEH && !opts->x_flag_fentry) -- sorry ("%<-mno-fentry%> isn%'t compatible with SEH"); -- } -- -- if (TARGET_SEH && TARGET_CALL_MS2SYSV_XLOGUES) -- sorry ("%<-mcall-ms2sysv-xlogues%> isn%'t currently supported with SEH"); -- -- if (!(opts_set->x_target_flags & MASK_VZEROUPPER) -- && TARGET_EMIT_VZEROUPPER) -- opts->x_target_flags |= MASK_VZEROUPPER; -- if (!(opts_set->x_target_flags & MASK_STV)) -- opts->x_target_flags |= MASK_STV; -- /* Disable STV if -mpreferred-stack-boundary={2,3} or -- -mincoming-stack-boundary={2,3} or -mstackrealign - the needed -- stack realignment will be extra cost the pass doesn't take into -- account and the pass can't realign the stack. */ -- if (ix86_preferred_stack_boundary < 128 -- || ix86_incoming_stack_boundary < 128 -- || opts->x_ix86_force_align_arg_pointer) -- opts->x_target_flags &= ~MASK_STV; -- if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL] -- && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD)) -- opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD; -- if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL] -- && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE)) -- opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE; -- -- /* Enable 128-bit AVX instruction generation -- for the auto-vectorizer. */ -- if (TARGET_AVX128_OPTIMAL -- && (opts_set->x_prefer_vector_width_type == PVW_NONE)) -- opts->x_prefer_vector_width_type = PVW_AVX128; -- -- /* Use 256-bit AVX instruction generation -- in the auto-vectorizer. */ -- if (ix86_tune_features[X86_TUNE_AVX256_OPTIMAL] -- && (opts_set->x_prefer_vector_width_type == PVW_NONE)) -- opts->x_prefer_vector_width_type = PVW_AVX256; -- -- if (opts->x_ix86_recip_name) -- { -- char *p = ASTRDUP (opts->x_ix86_recip_name); -- char *q; -- unsigned int mask, i; -- bool invert; -- -- while ((q = strtok (p, ",")) != NULL) -- { -- p = NULL; -- if (*q == '!') -- { -- invert = true; -- q++; -- } -- else -- invert = false; -+ int pos; - -- if (!strcmp (q, "default")) -- mask = RECIP_MASK_ALL; -- else -- { -- for (i = 0; i < ARRAY_SIZE (recip_options); i++) -- if (!strcmp (q, recip_options[i].string)) -+ switch (regclass[i]) -+ { -+ case X86_64_NO_CLASS: -+ break; -+ case X86_64_INTEGER_CLASS: -+ case X86_64_INTEGERSI_CLASS: -+ /* Merge TImodes on aligned occasions here too. */ -+ if (i * 8 + 8 > bytes) -+ { -+ unsigned int tmpbits = (bytes - i * 8) * BITS_PER_UNIT; -+ if (!int_mode_for_size (tmpbits, 0).exists (&tmpmode)) -+ /* We've requested 24 bytes we -+ don't have mode for. Use DImode. */ -+ tmpmode = DImode; -+ } -+ else if (regclass[i] == X86_64_INTEGERSI_CLASS) -+ tmpmode = SImode; -+ else -+ tmpmode = DImode; -+ exp [nexps++] -+ = gen_rtx_EXPR_LIST (VOIDmode, -+ gen_rtx_REG (tmpmode, *intreg), -+ GEN_INT (i*8)); -+ intreg++; -+ break; -+ case X86_64_SSESF_CLASS: -+ exp [nexps++] -+ = gen_rtx_EXPR_LIST (VOIDmode, -+ gen_rtx_REG (SFmode, -+ GET_SSE_REGNO (sse_regno)), -+ GEN_INT (i*8)); -+ sse_regno++; -+ break; -+ case X86_64_SSEDF_CLASS: -+ exp [nexps++] -+ = gen_rtx_EXPR_LIST (VOIDmode, -+ gen_rtx_REG (DFmode, -+ GET_SSE_REGNO (sse_regno)), -+ GEN_INT (i*8)); -+ sse_regno++; -+ break; -+ case X86_64_SSE_CLASS: -+ pos = i; -+ switch (n) -+ { -+ case 1: -+ tmpmode = DImode; -+ break; -+ case 2: -+ if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS) - { -- mask = recip_options[i].mask; -- break; -+ tmpmode = TImode; -+ i++; - } -- -- if (i == ARRAY_SIZE (recip_options)) -- { -- error ("unknown option for %<-mrecip=%s%>", q); -- invert = false; -- mask = RECIP_MASK_NONE; -- } -- } -- -- opts->x_recip_mask_explicit |= mask; -- if (invert) -- opts->x_recip_mask &= ~mask; -- else -- opts->x_recip_mask |= mask; -+ else -+ tmpmode = DImode; -+ break; -+ case 4: -+ gcc_assert (i == 0 -+ && regclass[1] == X86_64_SSEUP_CLASS -+ && regclass[2] == X86_64_SSEUP_CLASS -+ && regclass[3] == X86_64_SSEUP_CLASS); -+ tmpmode = OImode; -+ i += 3; -+ break; -+ case 8: -+ gcc_assert (i == 0 -+ && regclass[1] == X86_64_SSEUP_CLASS -+ && regclass[2] == X86_64_SSEUP_CLASS -+ && regclass[3] == X86_64_SSEUP_CLASS -+ && regclass[4] == X86_64_SSEUP_CLASS -+ && regclass[5] == X86_64_SSEUP_CLASS -+ && regclass[6] == X86_64_SSEUP_CLASS -+ && regclass[7] == X86_64_SSEUP_CLASS); -+ tmpmode = XImode; -+ i += 7; -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ exp [nexps++] -+ = gen_rtx_EXPR_LIST (VOIDmode, -+ gen_rtx_REG (tmpmode, -+ GET_SSE_REGNO (sse_regno)), -+ GEN_INT (pos*8)); -+ sse_regno++; -+ break; -+ default: -+ gcc_unreachable (); - } - } - -- if (TARGET_RECIP_P (opts->x_target_flags)) -- opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit; -- else if (opts_set->x_target_flags & MASK_RECIP) -- opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit); -+ /* Empty aligned struct, union or class. */ -+ if (nexps == 0) -+ return NULL; -+ -+ ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps)); -+ for (i = 0; i < nexps; i++) -+ XVECEXP (ret, 0, i) = exp [i]; -+ return ret; -+} -+ -+/* Update the data in CUM to advance over an argument of mode MODE -+ and data type TYPE. (TYPE is null for libcalls where that information -+ may not be available.) - -- /* Default long double to 64-bit for 32-bit Bionic and to __float128 -- for 64-bit Bionic. Also default long double to 64-bit for Intel -- MCU psABI. */ -- if ((TARGET_HAS_BIONIC || TARGET_IAMCU) -- && !(opts_set->x_target_flags -- & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128))) -- opts->x_target_flags |= (TARGET_64BIT -- ? MASK_LONG_DOUBLE_128 -- : MASK_LONG_DOUBLE_64); -+ Return a number of integer regsiters advanced over. */ - -- /* Only one of them can be active. */ -- gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0 -- || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0); -+static int -+function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode, -+ const_tree type, HOST_WIDE_INT bytes, -+ HOST_WIDE_INT words) -+{ -+ int res = 0; -+ bool error_p = false; - -- /* Handle stack protector */ -- if (!opts_set->x_ix86_stack_protector_guard) -+ if (TARGET_IAMCU) - { --#ifdef TARGET_THREAD_SSP_OFFSET -- if (!TARGET_HAS_BIONIC) -- opts->x_ix86_stack_protector_guard = SSP_TLS; -- else --#endif -- opts->x_ix86_stack_protector_guard = SSP_GLOBAL; -+ /* Intel MCU psABI passes scalars and aggregates no larger than 8 -+ bytes in registers. */ -+ if (!VECTOR_MODE_P (mode) && bytes <= 8) -+ goto pass_in_reg; -+ return res; - } - -- if (opts_set->x_ix86_stack_protector_guard_offset_str) -+ switch (mode) - { -- char *endp; -- const char *str = opts->x_ix86_stack_protector_guard_offset_str; -- -- errno = 0; -- int64_t offset; -- --#if defined(INT64_T_IS_LONG) -- offset = strtol (str, &endp, 0); --#else -- offset = strtoll (str, &endp, 0); --#endif -- -- if (!*str || *endp || errno) -- error ("%qs is not a valid number " -- "in %<-mstack-protector-guard-offset=%>", str); -- -- if (!IN_RANGE (offset, HOST_WIDE_INT_C (-0x80000000), -- HOST_WIDE_INT_C (0x7fffffff))) -- error ("%qs is not a valid offset " -- "in %<-mstack-protector-guard-offset=%>", str); -- -- opts->x_ix86_stack_protector_guard_offset = offset; -- } --#ifdef TARGET_THREAD_SSP_OFFSET -- else -- opts->x_ix86_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET; --#endif -- -- if (opts_set->x_ix86_stack_protector_guard_reg_str) -- { -- const char *str = opts->x_ix86_stack_protector_guard_reg_str; -- addr_space_t seg = ADDR_SPACE_GENERIC; -+ default: -+ break; - -- /* Discard optional register prefix. */ -- if (str[0] == '%') -- str++; -+ case E_BLKmode: -+ if (bytes < 0) -+ break; -+ /* FALLTHRU */ - -- if (strlen (str) == 2 && str[1] == 's') -+ case E_DImode: -+ case E_SImode: -+ case E_HImode: -+ case E_QImode: -+pass_in_reg: -+ cum->words += words; -+ cum->nregs -= words; -+ cum->regno += words; -+ if (cum->nregs >= 0) -+ res = words; -+ if (cum->nregs <= 0) - { -- if (str[0] == 'f') -- seg = ADDR_SPACE_SEG_FS; -- else if (str[0] == 'g') -- seg = ADDR_SPACE_SEG_GS; -+ cum->nregs = 0; -+ cfun->machine->arg_reg_available = false; -+ cum->regno = 0; - } -+ break; - -- if (seg == ADDR_SPACE_GENERIC) -- error ("%qs is not a valid base register " -- "in %<-mstack-protector-guard-reg=%>", -- opts->x_ix86_stack_protector_guard_reg_str); -+ case E_OImode: -+ /* OImode shouldn't be used directly. */ -+ gcc_unreachable (); - -- opts->x_ix86_stack_protector_guard_reg = seg; -- } -- else -- { -- opts->x_ix86_stack_protector_guard_reg = DEFAULT_TLS_SEG_REG; -+ case E_DFmode: -+ if (cum->float_in_sse == -1) -+ error_p = true; -+ if (cum->float_in_sse < 2) -+ break; -+ /* FALLTHRU */ -+ case E_SFmode: -+ if (cum->float_in_sse == -1) -+ error_p = true; -+ if (cum->float_in_sse < 1) -+ break; -+ /* FALLTHRU */ - -- /* The kernel uses a different segment register for performance -- reasons; a system call would not have to trash the userspace -- segment register, which would be expensive. */ -- if (opts->x_ix86_cmodel == CM_KERNEL) -- opts->x_ix86_stack_protector_guard_reg = ADDR_SPACE_SEG_GS; -- } -+ case E_V8SFmode: -+ case E_V8SImode: -+ case E_V64QImode: -+ case E_V32HImode: -+ case E_V16SImode: -+ case E_V8DImode: -+ case E_V16SFmode: -+ case E_V8DFmode: -+ case E_V32QImode: -+ case E_V16HImode: -+ case E_V4DFmode: -+ case E_V4DImode: -+ case E_TImode: -+ case E_V16QImode: -+ case E_V8HImode: -+ case E_V4SImode: -+ case E_V2DImode: -+ case E_V4SFmode: -+ case E_V2DFmode: -+ if (!type || !AGGREGATE_TYPE_P (type)) -+ { -+ cum->sse_words += words; -+ cum->sse_nregs -= 1; -+ cum->sse_regno += 1; -+ if (cum->sse_nregs <= 0) -+ { -+ cum->sse_nregs = 0; -+ cum->sse_regno = 0; -+ } -+ } -+ break; - -- /* Handle -mmemcpy-strategy= and -mmemset-strategy= */ -- if (opts->x_ix86_tune_memcpy_strategy) -- { -- char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy); -- ix86_parse_stringop_strategy_string (str, false); -- free (str); -+ case E_V8QImode: -+ case E_V4HImode: -+ case E_V2SImode: -+ case E_V2SFmode: -+ case E_V1TImode: -+ case E_V1DImode: -+ if (!type || !AGGREGATE_TYPE_P (type)) -+ { -+ cum->mmx_words += words; -+ cum->mmx_nregs -= 1; -+ cum->mmx_regno += 1; -+ if (cum->mmx_nregs <= 0) -+ { -+ cum->mmx_nregs = 0; -+ cum->mmx_regno = 0; -+ } -+ } -+ break; - } -- -- if (opts->x_ix86_tune_memset_strategy) -+ if (error_p) - { -- char *str = xstrdup (opts->x_ix86_tune_memset_strategy); -- ix86_parse_stringop_strategy_string (str, true); -- free (str); -+ cum->float_in_sse = 0; -+ error ("calling %qD with SSE calling convention without " -+ "SSE/SSE2 enabled", cum->decl); -+ sorry ("this is a GCC bug that can be worked around by adding " -+ "attribute used to function called"); - } - -- /* Save the initial options in case the user does function specific -- options. */ -- if (main_args_p) -- target_option_default_node = target_option_current_node -- = build_target_option_node (opts); -- -- if (opts->x_flag_cf_protection != CF_NONE) -- opts->x_flag_cf_protection -- = (cf_protection_level) (opts->x_flag_cf_protection | CF_SET); -- -- if (ix86_tune_features [X86_TUNE_AVOID_256FMA_CHAINS]) -- maybe_set_param_value (PARAM_AVOID_FMA_MAX_BITS, 256, -- opts->x_param_values, -- opts_set->x_param_values); -- else if (ix86_tune_features [X86_TUNE_AVOID_128FMA_CHAINS]) -- maybe_set_param_value (PARAM_AVOID_FMA_MAX_BITS, 128, -- opts->x_param_values, -- opts_set->x_param_values); -- -- /* PR86952: jump table usage with retpolines is slow. -- The PR provides some numbers about the slowness. */ -- if (ix86_indirect_branch != indirect_branch_keep -- && !opts_set->x_flag_jump_tables) -- opts->x_flag_jump_tables = 0; -- -- return true; --} -- --/* Implement the TARGET_OPTION_OVERRIDE hook. */ -- --static void --ix86_option_override (void) --{ -- ix86_option_override_internal (true, &global_options, &global_options_set); --} -- --/* Implement the TARGET_OFFLOAD_OPTIONS hook. */ --static char * --ix86_offload_options (void) --{ -- if (TARGET_LP64) -- return xstrdup ("-foffload-abi=lp64"); -- return xstrdup ("-foffload-abi=ilp32"); -+ return res; - } - --/* Update register usage after having seen the compiler flags. */ -- --static void --ix86_conditional_register_usage (void) -+static int -+function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode, -+ const_tree type, HOST_WIDE_INT words, bool named) - { -- int i, c_mask; -+ int int_nregs, sse_nregs; - -- /* If there are no caller-saved registers, preserve all registers. -- except fixed_regs and registers used for function return value -- since aggregate_value_p checks call_used_regs[regno] on return -- value. */ -- if (cfun && cfun->machine->no_caller_saved_registers) -- for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) -- if (!fixed_regs[i] && !ix86_function_value_regno_p (i)) -- call_used_regs[i] = 0; -+ /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */ -+ if (!named && (VALID_AVX512F_REG_MODE (mode) -+ || VALID_AVX256_REG_MODE (mode))) -+ return 0; - -- /* For 32-bit targets, squash the REX registers. */ -- if (! TARGET_64BIT) -+ if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs) -+ && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs) - { -- for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++) -- fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; -- for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) -- fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; -- for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++) -- fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; -+ cum->nregs -= int_nregs; -+ cum->sse_nregs -= sse_nregs; -+ cum->regno += int_nregs; -+ cum->sse_regno += sse_nregs; -+ return int_nregs; - } -- -- /* See the definition of CALL_USED_REGISTERS in i386.h. */ -- c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI); -- -- CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]); -- -- for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) -+ else - { -- /* Set/reset conditionally defined registers from -- CALL_USED_REGISTERS initializer. */ -- if (call_used_regs[i] > 1) -- call_used_regs[i] = !!(call_used_regs[i] & c_mask); -- -- /* Calculate registers of CLOBBERED_REGS register set -- as call used registers from GENERAL_REGS register set. */ -- if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i) -- && call_used_regs[i]) -- SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i); -+ int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD; -+ cum->words = ROUND_UP (cum->words, align); -+ cum->words += words; -+ return 0; - } -+} - -- /* If MMX is disabled, squash the registers. */ -- if (! TARGET_MMX) -- for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) -- if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i)) -- fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; -- -- /* If SSE is disabled, squash the registers. */ -- if (! TARGET_SSE) -- for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) -- if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i)) -- fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; -- -- /* If the FPU is disabled, squash the registers. */ -- if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387)) -- for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) -- if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i)) -- fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; -+static int -+function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes, -+ HOST_WIDE_INT words) -+{ -+ /* Otherwise, this should be passed indirect. */ -+ gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8); - -- /* If AVX512F is disabled, squash the registers. */ -- if (! TARGET_AVX512F) -+ cum->words += words; -+ if (cum->nregs > 0) - { -- for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++) -- fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; -- -- for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++) -- fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; -+ cum->nregs -= 1; -+ cum->regno += 1; -+ return 1; - } -+ return 0; - } - --/* Canonicalize a comparison from one we don't have to one we do have. */ -+/* Update the data in CUM to advance over argument ARG. */ - - static void --ix86_canonicalize_comparison (int *code, rtx *op0, rtx *op1, -- bool op0_preserve_value) -+ix86_function_arg_advance (cumulative_args_t cum_v, -+ const function_arg_info &arg) - { -- /* The order of operands in x87 ficom compare is forced by combine in -- simplify_comparison () function. Float operator is treated as RTX_OBJ -- with a precedence over other operators and is always put in the first -- place. Swap condition and operands to match ficom instruction. */ -- if (!op0_preserve_value -- && GET_CODE (*op0) == FLOAT && MEM_P (XEXP (*op0, 0)) && REG_P (*op1)) -- { -- enum rtx_code scode = swap_condition ((enum rtx_code) *code); -+ CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); -+ machine_mode mode = arg.mode; -+ HOST_WIDE_INT bytes, words; -+ int nregs; - -- /* We are called only for compares that are split to SAHF instruction. -- Ensure that we have setcc/jcc insn for the swapped condition. */ -- if (ix86_fp_compare_code_to_integer (scode) != UNKNOWN) -- { -- std::swap (*op0, *op1); -- *code = (int) scode; -- } -- } --} -- --/* Save the current options */ -+ /* The argument of interrupt handler is a special case and is -+ handled in ix86_function_arg. */ -+ if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL) -+ return; - --static void --ix86_function_specific_save (struct cl_target_option *ptr, -- struct gcc_options *opts) --{ -- ptr->arch = ix86_arch; -- ptr->schedule = ix86_schedule; -- ptr->prefetch_sse = x86_prefetch_sse; -- ptr->tune = ix86_tune; -- ptr->branch_cost = ix86_branch_cost; -- ptr->tune_defaulted = ix86_tune_defaulted; -- ptr->arch_specified = ix86_arch_specified; -- ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit; -- ptr->x_ix86_isa_flags2_explicit = opts->x_ix86_isa_flags2_explicit; -- ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit; -- ptr->x_ix86_arch_string = opts->x_ix86_arch_string; -- ptr->x_ix86_tune_string = opts->x_ix86_tune_string; -- ptr->x_ix86_cmodel = opts->x_ix86_cmodel; -- ptr->x_ix86_abi = opts->x_ix86_abi; -- ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect; -- ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost; -- ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes; -- ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer; -- ptr->x_ix86_force_drap = opts->x_ix86_force_drap; -- ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg; -- ptr->x_ix86_pmode = opts->x_ix86_pmode; -- ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg; -- ptr->x_ix86_recip_name = opts->x_ix86_recip_name; -- ptr->x_ix86_regparm = opts->x_ix86_regparm; -- ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold; -- ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx; -- ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard; -- ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg; -- ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect; -- ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string; -- ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy; -- ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy; -- ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default; -- ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type; -- -- /* The fields are char but the variables are not; make sure the -- values fit in the fields. */ -- gcc_assert (ptr->arch == ix86_arch); -- gcc_assert (ptr->schedule == ix86_schedule); -- gcc_assert (ptr->tune == ix86_tune); -- gcc_assert (ptr->branch_cost == ix86_branch_cost); --} -- --/* Restore the current options */ -+ bytes = arg.promoted_size_in_bytes (); -+ words = CEIL (bytes, UNITS_PER_WORD); - --static void --ix86_function_specific_restore (struct gcc_options *opts, -- struct cl_target_option *ptr) --{ -- enum processor_type old_tune = ix86_tune; -- enum processor_type old_arch = ix86_arch; -- unsigned HOST_WIDE_INT ix86_arch_mask; -- int i; -+ if (arg.type) -+ mode = type_natural_mode (arg.type, NULL, false); -+ -+ if (TARGET_64BIT) -+ { -+ enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi; - -- /* We don't change -fPIC. */ -- opts->x_flag_pic = flag_pic; -- -- ix86_arch = (enum processor_type) ptr->arch; -- ix86_schedule = (enum attr_cpu) ptr->schedule; -- ix86_tune = (enum processor_type) ptr->tune; -- x86_prefetch_sse = ptr->prefetch_sse; -- opts->x_ix86_branch_cost = ptr->branch_cost; -- ix86_tune_defaulted = ptr->tune_defaulted; -- ix86_arch_specified = ptr->arch_specified; -- opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit; -- opts->x_ix86_isa_flags2_explicit = ptr->x_ix86_isa_flags2_explicit; -- opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit; -- opts->x_ix86_arch_string = ptr->x_ix86_arch_string; -- opts->x_ix86_tune_string = ptr->x_ix86_tune_string; -- opts->x_ix86_cmodel = ptr->x_ix86_cmodel; -- opts->x_ix86_abi = ptr->x_ix86_abi; -- opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect; -- opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost; -- opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes; -- opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer; -- opts->x_ix86_force_drap = ptr->x_ix86_force_drap; -- opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg; -- opts->x_ix86_pmode = ptr->x_ix86_pmode; -- opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg; -- opts->x_ix86_recip_name = ptr->x_ix86_recip_name; -- opts->x_ix86_regparm = ptr->x_ix86_regparm; -- opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold; -- opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx; -- opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard; -- opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg; -- opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect; -- opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string; -- opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy; -- opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy; -- opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default; -- opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type; -- ix86_tune_cost = processor_cost_table[ix86_tune]; -- /* TODO: ix86_cost should be chosen at instruction or function granuality -- so for cold code we use size_cost even in !optimize_size compilation. */ -- if (opts->x_optimize_size) -- ix86_cost = &ix86_size_cost; -+ if (call_abi == MS_ABI) -+ nregs = function_arg_advance_ms_64 (cum, bytes, words); -+ else -+ nregs = function_arg_advance_64 (cum, mode, arg.type, words, -+ arg.named); -+ } - else -- ix86_cost = ix86_tune_cost; -+ nregs = function_arg_advance_32 (cum, mode, arg.type, bytes, words); - -- /* Recreate the arch feature tests if the arch changed */ -- if (old_arch != ix86_arch) -+ if (!nregs) - { -- ix86_arch_mask = HOST_WIDE_INT_1U << ix86_arch; -- for (i = 0; i < X86_ARCH_LAST; ++i) -- ix86_arch_features[i] -- = !!(initial_ix86_arch_features[i] & ix86_arch_mask); -+ /* Track if there are outgoing arguments on stack. */ -+ if (cum->caller) -+ cfun->machine->outgoing_args_on_stack = true; - } -- -- /* Recreate the tune optimization tests */ -- if (old_tune != ix86_tune) -- set_ix86_tune_features (ix86_tune, false); - } - --/* Adjust target options after streaming them in. This is mainly about -- reconciling them with global options. */ -- --static void --ix86_function_specific_post_stream_in (struct cl_target_option *ptr) --{ -- /* flag_pic is a global option, but ix86_cmodel is target saved option -- partly computed from flag_pic. If flag_pic is on, adjust x_ix86_cmodel -- for PIC, or error out. */ -- if (flag_pic) -- switch (ptr->x_ix86_cmodel) -- { -- case CM_SMALL: -- ptr->x_ix86_cmodel = CM_SMALL_PIC; -- break; -+/* Define where to put the arguments to a function. -+ Value is zero to push the argument on the stack, -+ or a hard register in which to store the argument. - -- case CM_MEDIUM: -- ptr->x_ix86_cmodel = CM_MEDIUM_PIC; -- break; -- -- case CM_LARGE: -- ptr->x_ix86_cmodel = CM_LARGE_PIC; -- break; -- -- case CM_KERNEL: -- error ("code model %s does not support PIC mode", "kernel"); -- break; -- -- default: -- break; -- } -- else -- switch (ptr->x_ix86_cmodel) -- { -- case CM_SMALL_PIC: -- ptr->x_ix86_cmodel = CM_SMALL; -- break; -- -- case CM_MEDIUM_PIC: -- ptr->x_ix86_cmodel = CM_MEDIUM; -- break; -- -- case CM_LARGE_PIC: -- ptr->x_ix86_cmodel = CM_LARGE; -- break; -- -- default: -- break; -- } --} -- --/* Print the current options */ -- --static void --ix86_function_specific_print (FILE *file, int indent, -- struct cl_target_option *ptr) --{ -- char *target_string -- = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_ix86_isa_flags2, -- ptr->x_target_flags, ptr->x_ix86_target_flags, -- NULL, NULL, ptr->x_ix86_fpmath, false, true); -- -- gcc_assert (ptr->arch < PROCESSOR_max); -- fprintf (file, "%*sarch = %d (%s)\n", -- indent, "", -- ptr->arch, processor_names[ptr->arch]); -- -- gcc_assert (ptr->tune < PROCESSOR_max); -- fprintf (file, "%*stune = %d (%s)\n", -- indent, "", -- ptr->tune, processor_names[ptr->tune]); -- -- fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost); -- -- if (target_string) -- { -- fprintf (file, "%*s%s\n", indent, "", target_string); -- free (target_string); -- } --} -- -- --/* Inner function to process the attribute((target(...))), take an argument and -- set the current options from the argument. If we have a list, recursively go -- over the list. */ -+ MODE is the argument's machine mode. -+ TYPE is the data type of the argument (as a tree). -+ This is null for libcalls where that information may -+ not be available. -+ CUM is a variable of type CUMULATIVE_ARGS which gives info about -+ the preceding args and about the function being called. -+ NAMED is nonzero if this argument is a named parameter -+ (otherwise it is an extra parameter matching an ellipsis). */ - --static bool --ix86_valid_target_attribute_inner_p (tree args, char *p_strings[], -- struct gcc_options *opts, -- struct gcc_options *opts_set, -- struct gcc_options *enum_opts_set) -+static rtx -+function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode, -+ machine_mode orig_mode, const_tree type, -+ HOST_WIDE_INT bytes, HOST_WIDE_INT words) - { -- char *next_optstr; -- bool ret = true; -- --#define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 } --#define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 } --#define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 } --#define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M } --#define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M } -- -- enum ix86_opt_type -- { -- ix86_opt_unknown, -- ix86_opt_yes, -- ix86_opt_no, -- ix86_opt_str, -- ix86_opt_enum, -- ix86_opt_isa -- }; -- -- static const struct -- { -- const char *string; -- size_t len; -- enum ix86_opt_type type; -- int opt; -- int mask; -- } attrs[] = { -- /* isa options */ -- IX86_ATTR_ISA ("pconfig", OPT_mpconfig), -- IX86_ATTR_ISA ("wbnoinvd", OPT_mwbnoinvd), -- IX86_ATTR_ISA ("sgx", OPT_msgx), -- IX86_ATTR_ISA ("avx5124fmaps", OPT_mavx5124fmaps), -- IX86_ATTR_ISA ("avx5124vnniw", OPT_mavx5124vnniw), -- IX86_ATTR_ISA ("avx512vpopcntdq", OPT_mavx512vpopcntdq), -- IX86_ATTR_ISA ("avx512vbmi2", OPT_mavx512vbmi2), -- IX86_ATTR_ISA ("avx512vnni", OPT_mavx512vnni), -- IX86_ATTR_ISA ("avx512bitalg", OPT_mavx512bitalg), -- -- IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi), -- IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma), -- IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl), -- IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw), -- IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq), -- IX86_ATTR_ISA ("avx512er", OPT_mavx512er), -- IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf), -- IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd), -- IX86_ATTR_ISA ("avx512f", OPT_mavx512f), -- IX86_ATTR_ISA ("avx2", OPT_mavx2), -- IX86_ATTR_ISA ("fma", OPT_mfma), -- IX86_ATTR_ISA ("xop", OPT_mxop), -- IX86_ATTR_ISA ("fma4", OPT_mfma4), -- IX86_ATTR_ISA ("f16c", OPT_mf16c), -- IX86_ATTR_ISA ("avx", OPT_mavx), -- IX86_ATTR_ISA ("sse4", OPT_msse4), -- IX86_ATTR_ISA ("sse4.2", OPT_msse4_2), -- IX86_ATTR_ISA ("sse4.1", OPT_msse4_1), -- IX86_ATTR_ISA ("sse4a", OPT_msse4a), -- IX86_ATTR_ISA ("ssse3", OPT_mssse3), -- IX86_ATTR_ISA ("sse3", OPT_msse3), -- IX86_ATTR_ISA ("aes", OPT_maes), -- IX86_ATTR_ISA ("sha", OPT_msha), -- IX86_ATTR_ISA ("pclmul", OPT_mpclmul), -- IX86_ATTR_ISA ("sse2", OPT_msse2), -- IX86_ATTR_ISA ("sse", OPT_msse), -- IX86_ATTR_ISA ("3dnowa", OPT_m3dnowa), -- IX86_ATTR_ISA ("3dnow", OPT_m3dnow), -- IX86_ATTR_ISA ("mmx", OPT_mmmx), -- IX86_ATTR_ISA ("rtm", OPT_mrtm), -- IX86_ATTR_ISA ("prfchw", OPT_mprfchw), -- IX86_ATTR_ISA ("rdseed", OPT_mrdseed), -- IX86_ATTR_ISA ("adx", OPT_madx), -- IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1), -- IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt), -- IX86_ATTR_ISA ("xsaves", OPT_mxsaves), -- IX86_ATTR_ISA ("xsavec", OPT_mxsavec), -- IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt), -- IX86_ATTR_ISA ("xsave", OPT_mxsave), -- IX86_ATTR_ISA ("abm", OPT_mabm), -- IX86_ATTR_ISA ("bmi", OPT_mbmi), -- IX86_ATTR_ISA ("bmi2", OPT_mbmi2), -- IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt), -- IX86_ATTR_ISA ("tbm", OPT_mtbm), -- IX86_ATTR_ISA ("popcnt", OPT_mpopcnt), -- IX86_ATTR_ISA ("cx16", OPT_mcx16), -- IX86_ATTR_ISA ("sahf", OPT_msahf), -- IX86_ATTR_ISA ("movbe", OPT_mmovbe), -- IX86_ATTR_ISA ("crc32", OPT_mcrc32), -- IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase), -- IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd), -- IX86_ATTR_ISA ("mwaitx", OPT_mmwaitx), -- IX86_ATTR_ISA ("clzero", OPT_mclzero), -- IX86_ATTR_ISA ("pku", OPT_mpku), -- IX86_ATTR_ISA ("lwp", OPT_mlwp), -- IX86_ATTR_ISA ("hle", OPT_mhle), -- IX86_ATTR_ISA ("fxsr", OPT_mfxsr), -- IX86_ATTR_ISA ("clwb", OPT_mclwb), -- IX86_ATTR_ISA ("rdpid", OPT_mrdpid), -- IX86_ATTR_ISA ("gfni", OPT_mgfni), -- IX86_ATTR_ISA ("shstk", OPT_mshstk), -- IX86_ATTR_ISA ("vaes", OPT_mvaes), -- IX86_ATTR_ISA ("vpclmulqdq", OPT_mvpclmulqdq), -- IX86_ATTR_ISA ("movdiri", OPT_mmovdiri), -- IX86_ATTR_ISA ("movdir64b", OPT_mmovdir64b), -- IX86_ATTR_ISA ("waitpkg", OPT_mwaitpkg), -- IX86_ATTR_ISA ("cldemote", OPT_mcldemote), -- IX86_ATTR_ISA ("ptwrite", OPT_mptwrite), -- -- /* enum options */ -- IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_), -- -- /* string options */ -- IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH), -- IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE), -- -- /* flag options */ -- IX86_ATTR_YES ("cld", -- OPT_mcld, -- MASK_CLD), -- -- IX86_ATTR_NO ("fancy-math-387", -- OPT_mfancy_math_387, -- MASK_NO_FANCY_MATH_387), -- -- IX86_ATTR_YES ("ieee-fp", -- OPT_mieee_fp, -- MASK_IEEE_FP), -- -- IX86_ATTR_YES ("inline-all-stringops", -- OPT_minline_all_stringops, -- MASK_INLINE_ALL_STRINGOPS), -- -- IX86_ATTR_YES ("inline-stringops-dynamically", -- OPT_minline_stringops_dynamically, -- MASK_INLINE_STRINGOPS_DYNAMICALLY), -- -- IX86_ATTR_NO ("align-stringops", -- OPT_mno_align_stringops, -- MASK_NO_ALIGN_STRINGOPS), -- -- IX86_ATTR_YES ("recip", -- OPT_mrecip, -- MASK_RECIP), -- -- }; -- -- /* If this is a list, recurse to get the options. */ -- if (TREE_CODE (args) == TREE_LIST) -- { -- bool ret = true; -- -- for (; args; args = TREE_CHAIN (args)) -- if (TREE_VALUE (args) -- && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), -- p_strings, opts, opts_set, -- enum_opts_set)) -- ret = false; -+ bool error_p = false; - -- return ret; -- } -+ /* Avoid the AL settings for the Unix64 ABI. */ -+ if (mode == VOIDmode) -+ return constm1_rtx; - -- else if (TREE_CODE (args) != STRING_CST) -+ if (TARGET_IAMCU) - { -- error ("attribute % argument not a string"); -- return false; -+ /* Intel MCU psABI passes scalars and aggregates no larger than 8 -+ bytes in registers. */ -+ if (!VECTOR_MODE_P (mode) && bytes <= 8) -+ goto pass_in_reg; -+ return NULL_RTX; - } - -- /* Handle multiple arguments separated by commas. */ -- next_optstr = ASTRDUP (TREE_STRING_POINTER (args)); -- -- while (next_optstr && *next_optstr != '\0') -+ switch (mode) - { -- char *p = next_optstr; -- char *orig_p = p; -- char *comma = strchr (next_optstr, ','); -- const char *opt_string; -- size_t len, opt_len; -- int opt; -- bool opt_set_p; -- char ch; -- unsigned i; -- enum ix86_opt_type type = ix86_opt_unknown; -- int mask = 0; -+ default: -+ break; - -- if (comma) -- { -- *comma = '\0'; -- len = comma - next_optstr; -- next_optstr = comma + 1; -- } -- else -+ case E_BLKmode: -+ if (bytes < 0) -+ break; -+ /* FALLTHRU */ -+ case E_DImode: -+ case E_SImode: -+ case E_HImode: -+ case E_QImode: -+pass_in_reg: -+ if (words <= cum->nregs) - { -- len = strlen (p); -- next_optstr = NULL; -- } -+ int regno = cum->regno; - -- /* Recognize no-xxx. */ -- if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-') -- { -- opt_set_p = false; -- p += 3; -- len -= 3; -- } -- else -- opt_set_p = true; -- -- /* Find the option. */ -- ch = *p; -- opt = N_OPTS; -- for (i = 0; i < ARRAY_SIZE (attrs); i++) -- { -- type = attrs[i].type; -- opt_len = attrs[i].len; -- if (ch == attrs[i].string[0] -- && ((type != ix86_opt_str && type != ix86_opt_enum) -- ? len == opt_len -- : len > opt_len) -- && memcmp (p, attrs[i].string, opt_len) == 0) -+ /* Fastcall allocates the first two DWORD (SImode) or -+ smaller arguments to ECX and EDX if it isn't an -+ aggregate type . */ -+ if (cum->fastcall) - { -- opt = attrs[i].opt; -- mask = attrs[i].mask; -- opt_string = attrs[i].string; -- break; -- } -- } -+ if (mode == BLKmode -+ || mode == DImode -+ || (type && AGGREGATE_TYPE_P (type))) -+ break; - -- /* Process the option. */ -- if (opt == N_OPTS) -- { -- error ("attribute(target(\"%s\")) is unknown", orig_p); -- ret = false; -+ /* ECX not EAX is the first allocated register. */ -+ if (regno == AX_REG) -+ regno = CX_REG; -+ } -+ return gen_rtx_REG (mode, regno); - } -+ break; - -- else if (type == ix86_opt_isa) -+ case E_DFmode: -+ if (cum->float_in_sse == -1) -+ error_p = true; -+ if (cum->float_in_sse < 2) -+ break; -+ /* FALLTHRU */ -+ case E_SFmode: -+ if (cum->float_in_sse == -1) -+ error_p = true; -+ if (cum->float_in_sse < 1) -+ break; -+ /* FALLTHRU */ -+ case E_TImode: -+ /* In 32bit, we pass TImode in xmm registers. */ -+ case E_V16QImode: -+ case E_V8HImode: -+ case E_V4SImode: -+ case E_V2DImode: -+ case E_V4SFmode: -+ case E_V2DFmode: -+ if (!type || !AGGREGATE_TYPE_P (type)) - { -- struct cl_decoded_option decoded; -- -- generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded); -- ix86_handle_option (opts, opts_set, -- &decoded, input_location); -+ if (cum->sse_nregs) -+ return gen_reg_or_parallel (mode, orig_mode, -+ cum->sse_regno + FIRST_SSE_REG); - } -+ break; - -- else if (type == ix86_opt_yes || type == ix86_opt_no) -- { -- if (type == ix86_opt_no) -- opt_set_p = !opt_set_p; -- -- if (opt_set_p) -- opts->x_target_flags |= mask; -- else -- opts->x_target_flags &= ~mask; -- } -+ case E_OImode: -+ case E_XImode: -+ /* OImode and XImode shouldn't be used directly. */ -+ gcc_unreachable (); - -- else if (type == ix86_opt_str) -+ case E_V64QImode: -+ case E_V32HImode: -+ case E_V16SImode: -+ case E_V8DImode: -+ case E_V16SFmode: -+ case E_V8DFmode: -+ case E_V8SFmode: -+ case E_V8SImode: -+ case E_V32QImode: -+ case E_V16HImode: -+ case E_V4DFmode: -+ case E_V4DImode: -+ if (!type || !AGGREGATE_TYPE_P (type)) - { -- if (p_strings[opt]) -- { -- error ("option(\"%s\") was already specified", opt_string); -- ret = false; -- } -- else -- { -- p_strings[opt] = xstrdup (p + opt_len); -- if (opt == IX86_FUNCTION_SPECIFIC_ARCH) -- { -- /* If arch= is set, clear all bits in x_ix86_isa_flags, -- except for ISA_64BIT, ABI_64, ABI_X32, and CODE16 -- and all bits in x_ix86_isa_flags2. */ -- opts->x_ix86_isa_flags &= (OPTION_MASK_ISA_64BIT -- | OPTION_MASK_ABI_64 -- | OPTION_MASK_ABI_X32 -- | OPTION_MASK_CODE16); -- opts->x_ix86_isa_flags_explicit &= (OPTION_MASK_ISA_64BIT -- | OPTION_MASK_ABI_64 -- | OPTION_MASK_ABI_X32 -- | OPTION_MASK_CODE16); -- opts->x_ix86_isa_flags2 = 0; -- opts->x_ix86_isa_flags2_explicit = 0; -- } -- } -+ if (cum->sse_nregs) -+ return gen_reg_or_parallel (mode, orig_mode, -+ cum->sse_regno + FIRST_SSE_REG); - } -+ break; - -- else if (type == ix86_opt_enum) -+ case E_V8QImode: -+ case E_V4HImode: -+ case E_V2SImode: -+ case E_V2SFmode: -+ case E_V1TImode: -+ case E_V1DImode: -+ if (!type || !AGGREGATE_TYPE_P (type)) - { -- bool arg_ok; -- int value; -- -- arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET); -- if (arg_ok) -- set_option (opts, enum_opts_set, opt, value, -- p + opt_len, DK_UNSPECIFIED, input_location, -- global_dc); -- else -- { -- error ("attribute(target(\"%s\")) is unknown", orig_p); -- ret = false; -- } -+ if (cum->mmx_nregs) -+ return gen_reg_or_parallel (mode, orig_mode, -+ cum->mmx_regno + FIRST_MMX_REG); - } -- -- else -- gcc_unreachable (); -+ break; -+ } -+ if (error_p) -+ { -+ cum->float_in_sse = 0; -+ error ("calling %qD with SSE calling convention without " -+ "SSE/SSE2 enabled", cum->decl); -+ sorry ("this is a GCC bug that can be worked around by adding " -+ "attribute used to function called"); - } - -- return ret; --} -- --/* Release allocated strings. */ --static void --release_options_strings (char **option_strings) --{ -- /* Free up memory allocated to hold the strings */ -- for (unsigned i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++) -- free (option_strings[i]); -+ return NULL_RTX; - } - --/* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */ -- --tree --ix86_valid_target_attribute_tree (tree args, -- struct gcc_options *opts, -- struct gcc_options *opts_set) -+static rtx -+function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode, -+ machine_mode orig_mode, const_tree type, bool named) - { -- const char *orig_arch_string = opts->x_ix86_arch_string; -- const char *orig_tune_string = opts->x_ix86_tune_string; -- enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath; -- int orig_tune_defaulted = ix86_tune_defaulted; -- int orig_arch_specified = ix86_arch_specified; -- char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL }; -- tree t = NULL_TREE; -- struct cl_target_option *def -- = TREE_TARGET_OPTION (target_option_default_node); -- struct gcc_options enum_opts_set; -- -- memset (&enum_opts_set, 0, sizeof (enum_opts_set)); -- -- /* Process each of the options on the chain. */ -- if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts, -- opts_set, &enum_opts_set)) -- return error_mark_node; -+ /* Handle a hidden AL argument containing number of registers -+ for varargs x86-64 functions. */ -+ if (mode == VOIDmode) -+ return GEN_INT (cum->maybe_vaarg -+ ? (cum->sse_nregs < 0 -+ ? X86_64_SSE_REGPARM_MAX -+ : cum->sse_regno) -+ : -1); - -- /* If the changed options are different from the default, rerun -- ix86_option_override_internal, and then save the options away. -- The string options are attribute options, and will be undone -- when we copy the save structure. */ -- if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags -- || opts->x_ix86_isa_flags2 != def->x_ix86_isa_flags2 -- || opts->x_target_flags != def->x_target_flags -- || option_strings[IX86_FUNCTION_SPECIFIC_ARCH] -- || option_strings[IX86_FUNCTION_SPECIFIC_TUNE] -- || enum_opts_set.x_ix86_fpmath) -+ switch (mode) - { -- /* If we are using the default tune= or arch=, undo the string assigned, -- and use the default. */ -- if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH]) -- opts->x_ix86_arch_string -- = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_ARCH]); -- else if (!orig_arch_specified) -- opts->x_ix86_arch_string = NULL; -- -- if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]) -- opts->x_ix86_tune_string -- = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]); -- else if (orig_tune_defaulted) -- opts->x_ix86_tune_string = NULL; -- -- /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */ -- if (enum_opts_set.x_ix86_fpmath) -- opts_set->x_ix86_fpmath = (enum fpmath_unit) 1; -- -- /* Do any overrides, such as arch=xxx, or tune=xxx support. */ -- bool r = ix86_option_override_internal (false, opts, opts_set); -- if (!r) -- { -- release_options_strings (option_strings); -- return error_mark_node; -- } -- -- /* Add any builtin functions with the new isa if any. */ -- ix86_add_new_builtins (opts->x_ix86_isa_flags, opts->x_ix86_isa_flags2); -- -- /* Save the current options unless we are validating options for -- #pragma. */ -- t = build_target_option_node (opts); -- -- opts->x_ix86_arch_string = orig_arch_string; -- opts->x_ix86_tune_string = orig_tune_string; -- opts_set->x_ix86_fpmath = orig_fpmath_set; -+ default: -+ break; - -- release_options_strings (option_strings); -+ case E_V8SFmode: -+ case E_V8SImode: -+ case E_V32QImode: -+ case E_V16HImode: -+ case E_V4DFmode: -+ case E_V4DImode: -+ case E_V16SFmode: -+ case E_V16SImode: -+ case E_V64QImode: -+ case E_V32HImode: -+ case E_V8DFmode: -+ case E_V8DImode: -+ /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */ -+ if (!named) -+ return NULL; -+ break; - } - -- return t; -+ return construct_container (mode, orig_mode, type, 0, cum->nregs, -+ cum->sse_nregs, -+ &x86_64_int_parameter_registers [cum->regno], -+ cum->sse_regno); - } - --/* Hook to validate attribute((target("string"))). */ -- --static bool --ix86_valid_target_attribute_p (tree fndecl, -- tree ARG_UNUSED (name), -- tree args, -- int ARG_UNUSED (flags)) --{ -- struct gcc_options func_options; -- tree new_target, new_optimize; -- bool ret = true; -- -- /* attribute((target("default"))) does nothing, beyond -- affecting multi-versioning. */ -- if (TREE_VALUE (args) -- && TREE_CODE (TREE_VALUE (args)) == STRING_CST -- && TREE_CHAIN (args) == NULL_TREE -- && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0) -- return true; -+static rtx -+function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode, -+ machine_mode orig_mode, bool named, const_tree type, -+ HOST_WIDE_INT bytes) -+{ -+ unsigned int regno; - -- tree old_optimize = build_optimization_node (&global_options); -- -- /* Get the optimization options of the current function. */ -- tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl); -- -- if (!func_optimize) -- func_optimize = old_optimize; -- -- /* Init func_options. */ -- memset (&func_options, 0, sizeof (func_options)); -- init_options_struct (&func_options, NULL); -- lang_hooks.init_options_struct (&func_options); -- -- cl_optimization_restore (&func_options, -- TREE_OPTIMIZATION (func_optimize)); -- -- /* Initialize func_options to the default before its target options can -- be set. */ -- cl_target_option_restore (&func_options, -- TREE_TARGET_OPTION (target_option_default_node)); -- -- new_target = ix86_valid_target_attribute_tree (args, &func_options, -- &global_options_set); -- -- new_optimize = build_optimization_node (&func_options); -- -- if (new_target == error_mark_node) -- ret = false; -- -- else if (fndecl && new_target) -- { -- DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target; -- -- if (old_optimize != new_optimize) -- DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize; -- } -- -- finalize_options_struct (&func_options); -- -- return ret; --} -- -- --/* Hook to determine if one function can safely inline another. */ -- --static bool --ix86_can_inline_p (tree caller, tree callee) --{ -- tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller); -- tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee); -- -- /* Changes of those flags can be tolerated for always inlines. Lets hope -- user knows what he is doing. */ -- const unsigned HOST_WIDE_INT always_inline_safe_mask -- = (MASK_USE_8BIT_IDIV | MASK_ACCUMULATE_OUTGOING_ARGS -- | MASK_NO_ALIGN_STRINGOPS | MASK_AVX256_SPLIT_UNALIGNED_LOAD -- | MASK_AVX256_SPLIT_UNALIGNED_STORE | MASK_CLD -- | MASK_NO_FANCY_MATH_387 | MASK_IEEE_FP | MASK_INLINE_ALL_STRINGOPS -- | MASK_INLINE_STRINGOPS_DYNAMICALLY | MASK_RECIP | MASK_STACK_PROBE -- | MASK_STV | MASK_TLS_DIRECT_SEG_REFS | MASK_VZEROUPPER -- | MASK_NO_PUSH_ARGS | MASK_OMIT_LEAF_FRAME_POINTER); -- -- -- if (!callee_tree) -- callee_tree = target_option_default_node; -- if (!caller_tree) -- caller_tree = target_option_default_node; -- if (callee_tree == caller_tree) -- return true; -- -- struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree); -- struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree); -- bool ret = false; -- bool always_inline -- = (DECL_DISREGARD_INLINE_LIMITS (callee) -- && lookup_attribute ("always_inline", -- DECL_ATTRIBUTES (callee))); -- -- cgraph_node *callee_node = cgraph_node::get (callee); -- /* Callee's isa options should be a subset of the caller's, i.e. a SSE4 -- function can inline a SSE2 function but a SSE2 function can't inline -- a SSE4 function. */ -- if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags) -- != callee_opts->x_ix86_isa_flags) -- || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2) -- != callee_opts->x_ix86_isa_flags2)) -- ret = false; -- -- /* See if we have the same non-isa options. */ -- else if ((!always_inline -- && caller_opts->x_target_flags != callee_opts->x_target_flags) -- || (caller_opts->x_target_flags & ~always_inline_safe_mask) -- != (callee_opts->x_target_flags & ~always_inline_safe_mask)) -- ret = false; -- -- /* See if arch, tune, etc. are the same. */ -- else if (caller_opts->arch != callee_opts->arch) -- ret = false; -- -- else if (!always_inline && caller_opts->tune != callee_opts->tune) -- ret = false; -- -- else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath -- /* If the calle doesn't use FP expressions differences in -- ix86_fpmath can be ignored. We are called from FEs -- for multi-versioning call optimization, so beware of -- ipa_fn_summaries not available. */ -- && (! ipa_fn_summaries -- || ipa_fn_summaries->get (callee_node) == NULL -- || ipa_fn_summaries->get (callee_node)->fp_expressions)) -- ret = false; -- -- else if (!always_inline -- && caller_opts->branch_cost != callee_opts->branch_cost) -- ret = false; -- -- else -- ret = true; -- -- return ret; --} -- -- --/* Remember the last target of ix86_set_current_function. */ --static GTY(()) tree ix86_previous_fndecl; -- --/* Set targets globals to the default (or current #pragma GCC target -- if active). Invalidate ix86_previous_fndecl cache. */ -+ /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call. -+ We use value of -2 to specify that current function call is MSABI. */ -+ if (mode == VOIDmode) -+ return GEN_INT (-2); - --void --ix86_reset_previous_fndecl (void) --{ -- tree new_tree = target_option_current_node; -- cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree)); -- if (TREE_TARGET_GLOBALS (new_tree)) -- restore_target_globals (TREE_TARGET_GLOBALS (new_tree)); -- else if (new_tree == target_option_default_node) -- restore_target_globals (&default_target_globals); -- else -- TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts (); -- ix86_previous_fndecl = NULL_TREE; --} -+ /* If we've run out of registers, it goes on the stack. */ -+ if (cum->nregs == 0) -+ return NULL_RTX; - --/* Set the func_type field from the function FNDECL. */ -+ regno = x86_64_ms_abi_int_parameter_registers[cum->regno]; - --static void --ix86_set_func_type (tree fndecl) --{ -- if (cfun->machine->func_type == TYPE_UNKNOWN) -+ /* Only floating point modes are passed in anything but integer regs. */ -+ if (TARGET_SSE && (mode == SFmode || mode == DFmode)) - { -- if (lookup_attribute ("interrupt", -- TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))) -+ if (named) - { -- if (ix86_function_naked (fndecl)) -- error_at (DECL_SOURCE_LOCATION (fndecl), -- "interrupt and naked attributes are not compatible"); -- -- int nargs = 0; -- for (tree arg = DECL_ARGUMENTS (fndecl); -- arg; -- arg = TREE_CHAIN (arg)) -- nargs++; -- cfun->machine->no_caller_saved_registers = true; -- cfun->machine->func_type -- = nargs == 2 ? TYPE_EXCEPTION : TYPE_INTERRUPT; -- -- ix86_optimize_mode_switching[X86_DIRFLAG] = 1; -- -- /* Only dwarf2out.c can handle -WORD(AP) as a pointer argument. */ -- if (write_symbols != NO_DEBUG && write_symbols != DWARF2_DEBUG) -- sorry ("only DWARF debug format is supported for interrupt " -- "service routine"); -+ if (type == NULL_TREE || !AGGREGATE_TYPE_P (type)) -+ regno = cum->regno + FIRST_SSE_REG; - } - else - { -- cfun->machine->func_type = TYPE_NORMAL; -- if (lookup_attribute ("no_caller_saved_registers", -- TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))) -- cfun->machine->no_caller_saved_registers = true; -+ rtx t1, t2; -+ -+ /* Unnamed floating parameters are passed in both the -+ SSE and integer registers. */ -+ t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG); -+ t2 = gen_rtx_REG (mode, regno); -+ t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx); -+ t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx); -+ return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2)); - } - } -+ /* Handle aggregated types passed in register. */ -+ if (orig_mode == BLKmode) -+ { -+ if (bytes > 0 && bytes <= 8) -+ mode = (bytes > 4 ? DImode : SImode); -+ if (mode == BLKmode) -+ mode = DImode; -+ } -+ -+ return gen_reg_or_parallel (mode, orig_mode, regno); - } - --/* Set the indirect_branch_type field from the function FNDECL. */ -+/* Return where to put the arguments to a function. -+ Return zero to push the argument on the stack, or a hard register in which to store the argument. - --static void --ix86_set_indirect_branch_type (tree fndecl) -+ ARG describes the argument while CUM gives information about the -+ preceding args and about the function being called. */ -+ -+static rtx -+ix86_function_arg (cumulative_args_t cum_v, const function_arg_info &arg) - { -- if (cfun->machine->indirect_branch_type == indirect_branch_unset) -+ CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); -+ machine_mode mode = arg.mode; -+ HOST_WIDE_INT bytes, words; -+ rtx reg; -+ -+ if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL) - { -- tree attr = lookup_attribute ("indirect_branch", -- DECL_ATTRIBUTES (fndecl)); -- if (attr != NULL) -+ gcc_assert (arg.type != NULL_TREE); -+ if (POINTER_TYPE_P (arg.type)) - { -- tree args = TREE_VALUE (attr); -- if (args == NULL) -- gcc_unreachable (); -- tree cst = TREE_VALUE (args); -- if (strcmp (TREE_STRING_POINTER (cst), "keep") == 0) -- cfun->machine->indirect_branch_type = indirect_branch_keep; -- else if (strcmp (TREE_STRING_POINTER (cst), "thunk") == 0) -- cfun->machine->indirect_branch_type = indirect_branch_thunk; -- else if (strcmp (TREE_STRING_POINTER (cst), "thunk-inline") == 0) -- cfun->machine->indirect_branch_type = indirect_branch_thunk_inline; -- else if (strcmp (TREE_STRING_POINTER (cst), "thunk-extern") == 0) -- cfun->machine->indirect_branch_type = indirect_branch_thunk_extern; -- else -- gcc_unreachable (); -- } -- else -- cfun->machine->indirect_branch_type = ix86_indirect_branch; -- -- /* -mcmodel=large is not compatible with -mindirect-branch=thunk -- nor -mindirect-branch=thunk-extern. */ -- if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC) -- && ((cfun->machine->indirect_branch_type -- == indirect_branch_thunk_extern) -- || (cfun->machine->indirect_branch_type -- == indirect_branch_thunk))) -- error ("%<-mindirect-branch=%s%> and %<-mcmodel=large%> are not " -- "compatible", -- ((cfun->machine->indirect_branch_type -- == indirect_branch_thunk_extern) -- ? "thunk-extern" : "thunk")); -- -- if (cfun->machine->indirect_branch_type != indirect_branch_keep -- && (flag_cf_protection & CF_RETURN)) -- error ("%<-mindirect-branch%> and %<-fcf-protection%> are not " -- "compatible"); -- } -- -- if (cfun->machine->function_return_type == indirect_branch_unset) -- { -- tree attr = lookup_attribute ("function_return", -- DECL_ATTRIBUTES (fndecl)); -- if (attr != NULL) -- { -- tree args = TREE_VALUE (attr); -- if (args == NULL) -- gcc_unreachable (); -- tree cst = TREE_VALUE (args); -- if (strcmp (TREE_STRING_POINTER (cst), "keep") == 0) -- cfun->machine->function_return_type = indirect_branch_keep; -- else if (strcmp (TREE_STRING_POINTER (cst), "thunk") == 0) -- cfun->machine->function_return_type = indirect_branch_thunk; -- else if (strcmp (TREE_STRING_POINTER (cst), "thunk-inline") == 0) -- cfun->machine->function_return_type = indirect_branch_thunk_inline; -- else if (strcmp (TREE_STRING_POINTER (cst), "thunk-extern") == 0) -- cfun->machine->function_return_type = indirect_branch_thunk_extern; -- else -- gcc_unreachable (); -+ /* This is the pointer argument. */ -+ gcc_assert (TYPE_MODE (arg.type) == Pmode); -+ /* It is at -WORD(AP) in the current frame in interrupt and -+ exception handlers. */ -+ reg = plus_constant (Pmode, arg_pointer_rtx, -UNITS_PER_WORD); - } - else -- cfun->machine->function_return_type = ix86_function_return; -- -- /* -mcmodel=large is not compatible with -mfunction-return=thunk -- nor -mfunction-return=thunk-extern. */ -- if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC) -- && ((cfun->machine->function_return_type -- == indirect_branch_thunk_extern) -- || (cfun->machine->function_return_type -- == indirect_branch_thunk))) -- error ("%<-mfunction-return=%s%> and %<-mcmodel=large%> are not " -- "compatible", -- ((cfun->machine->function_return_type -- == indirect_branch_thunk_extern) -- ? "thunk-extern" : "thunk")); -- -- if (cfun->machine->function_return_type != indirect_branch_keep -- && (flag_cf_protection & CF_RETURN)) -- error ("%<-mfunction-return%> and %<-fcf-protection%> are not " -- "compatible"); -- } --} -- --/* Establish appropriate back-end context for processing the function -- FNDECL. The argument might be NULL to indicate processing at top -- level, outside of any function scope. */ --static void --ix86_set_current_function (tree fndecl) --{ -- /* Only change the context if the function changes. This hook is called -- several times in the course of compiling a function, and we don't want to -- slow things down too much or call target_reinit when it isn't safe. */ -- if (fndecl == ix86_previous_fndecl) -- { -- /* There may be 2 function bodies for the same function FNDECL, -- one is extern inline and one isn't. Call ix86_set_func_type -- to set the func_type field. */ -- if (fndecl != NULL_TREE) - { -- ix86_set_func_type (fndecl); -- ix86_set_indirect_branch_type (fndecl); -+ gcc_assert (cfun->machine->func_type == TYPE_EXCEPTION -+ && TREE_CODE (arg.type) == INTEGER_TYPE -+ && TYPE_MODE (arg.type) == word_mode); -+ /* The error code is the word-mode integer argument at -+ -2 * WORD(AP) in the current frame of the exception -+ handler. */ -+ reg = gen_rtx_MEM (word_mode, -+ plus_constant (Pmode, -+ arg_pointer_rtx, -+ -2 * UNITS_PER_WORD)); - } -- return; -- } -- -- tree old_tree; -- if (ix86_previous_fndecl == NULL_TREE) -- old_tree = target_option_current_node; -- else if (DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)) -- old_tree = DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl); -- else -- old_tree = target_option_default_node; -- -- if (fndecl == NULL_TREE) -- { -- if (old_tree != target_option_current_node) -- ix86_reset_previous_fndecl (); -- return; -+ return reg; - } - -- ix86_set_func_type (fndecl); -- ix86_set_indirect_branch_type (fndecl); -+ bytes = arg.promoted_size_in_bytes (); -+ words = CEIL (bytes, UNITS_PER_WORD); - -- tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl); -- if (new_tree == NULL_TREE) -- new_tree = target_option_default_node; -+ /* To simplify the code below, represent vector types with a vector mode -+ even if MMX/SSE are not active. */ -+ if (arg.type && TREE_CODE (arg.type) == VECTOR_TYPE) -+ mode = type_natural_mode (arg.type, cum, false); - -- if (old_tree != new_tree) -+ if (TARGET_64BIT) - { -- cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree)); -- if (TREE_TARGET_GLOBALS (new_tree)) -- restore_target_globals (TREE_TARGET_GLOBALS (new_tree)); -- else if (new_tree == target_option_default_node) -- restore_target_globals (&default_target_globals); -- else -- TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts (); -- } -- ix86_previous_fndecl = fndecl; -- -- static bool prev_no_caller_saved_registers; -+ enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi; - -- /* 64-bit MS and SYSV ABI have different set of call used registers. -- Avoid expensive re-initialization of init_regs each time we switch -- function context. */ -- if (TARGET_64BIT -- && (call_used_regs[SI_REG] -- == (cfun->machine->call_abi == MS_ABI))) -- reinit_regs (); -- /* Need to re-initialize init_regs if caller-saved registers are -- changed. */ -- else if (prev_no_caller_saved_registers -- != cfun->machine->no_caller_saved_registers) -- reinit_regs (); -- -- if (cfun->machine->func_type != TYPE_NORMAL -- || cfun->machine->no_caller_saved_registers) -- { -- /* Don't allow SSE, MMX nor x87 instructions since they -- may change processor state. */ -- const char *isa; -- if (TARGET_SSE) -- isa = "SSE"; -- else if (TARGET_MMX) -- isa = "MMX/3Dnow"; -- else if (TARGET_80387) -- isa = "80387"; -+ if (call_abi == MS_ABI) -+ reg = function_arg_ms_64 (cum, mode, arg.mode, arg.named, -+ arg.type, bytes); - else -- isa = NULL; -- if (isa != NULL) -- { -- if (cfun->machine->func_type != TYPE_NORMAL) -- sorry (cfun->machine->func_type == TYPE_EXCEPTION -- ? G_("%s instructions aren%'t allowed in an" -- " exception service routine") -- : G_("%s instructions aren%'t allowed in an" -- " interrupt service routine"), -- isa); -- else -- sorry ("%s instructions aren%'t allowed in a function with " -- "the % attribute", isa); -- /* Don't issue the same error twice. */ -- cfun->machine->func_type = TYPE_NORMAL; -- cfun->machine->no_caller_saved_registers = false; -- } -+ reg = function_arg_64 (cum, mode, arg.mode, arg.type, arg.named); - } -+ else -+ reg = function_arg_32 (cum, mode, arg.mode, arg.type, bytes, words); - -- prev_no_caller_saved_registers -- = cfun->machine->no_caller_saved_registers; -+ /* Track if there are outgoing arguments on stack. */ -+ if (reg == NULL_RTX && cum->caller) -+ cfun->machine->outgoing_args_on_stack = true; -+ -+ return reg; - } - -- --/* Return true if this goes in large data/bss. */ -+/* A C expression that indicates when an argument must be passed by -+ reference. If nonzero for an argument, a copy of that argument is -+ made in memory and a pointer to the argument is passed instead of -+ the argument itself. The pointer is passed in whatever way is -+ appropriate for passing a pointer to that type. */ - - static bool --ix86_in_large_data_p (tree exp) -+ix86_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg) - { -- if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC) -- return false; -+ CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); - -- if (exp == NULL_TREE) -- return false; -+ if (TARGET_64BIT) -+ { -+ enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi; - -- /* Functions are never large data. */ -- if (TREE_CODE (exp) == FUNCTION_DECL) -- return false; -+ /* See Windows x64 Software Convention. */ -+ if (call_abi == MS_ABI) -+ { -+ HOST_WIDE_INT msize = GET_MODE_SIZE (arg.mode); - -- /* Automatic variables are never large data. */ -- if (VAR_P (exp) && !is_global_var (exp)) -- return false; -+ if (tree type = arg.type) -+ { -+ /* Arrays are passed by reference. */ -+ if (TREE_CODE (type) == ARRAY_TYPE) -+ return true; - -- if (VAR_P (exp) && DECL_SECTION_NAME (exp)) -- { -- const char *section = DECL_SECTION_NAME (exp); -- if (strcmp (section, ".ldata") == 0 -- || strcmp (section, ".lbss") == 0) -- return true; -- return false; -- } -- else -- { -- HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp)); -+ if (RECORD_OR_UNION_TYPE_P (type)) -+ { -+ /* Structs/unions of sizes other than 8, 16, 32, or 64 bits -+ are passed by reference. */ -+ msize = int_size_in_bytes (type); -+ } -+ } - -- /* If this is an incomplete type with size 0, then we can't put it -- in data because it might be too big when completed. Also, -- int_size_in_bytes returns -1 if size can vary or is larger than -- an integer in which case also it is safer to assume that it goes in -- large data. */ -- if (size <= 0 || size > ix86_section_threshold) -+ /* __m128 is passed by reference. */ -+ return msize != 1 && msize != 2 && msize != 4 && msize != 8; -+ } -+ else if (arg.type && int_size_in_bytes (arg.type) == -1) - return true; - } - - return false; - } - --/* i386-specific section flag to mark large sections. */ --#define SECTION_LARGE SECTION_MACH_DEP -- --/* Switch to the appropriate section for output of DECL. -- DECL is either a `VAR_DECL' node or a constant of some sort. -- RELOC indicates whether forming the initial value of DECL requires -- link-time relocations. */ -+/* Return true when TYPE should be 128bit aligned for 32bit argument -+ passing ABI. XXX: This function is obsolete and is only used for -+ checking psABI compatibility with previous versions of GCC. */ - --ATTRIBUTE_UNUSED static section * --x86_64_elf_select_section (tree decl, int reloc, -- unsigned HOST_WIDE_INT align) -+static bool -+ix86_compat_aligned_value_p (const_tree type) - { -- if (ix86_in_large_data_p (decl)) -+ machine_mode mode = TYPE_MODE (type); -+ if (((TARGET_SSE && SSE_REG_MODE_P (mode)) -+ || mode == TDmode -+ || mode == TFmode -+ || mode == TCmode) -+ && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128)) -+ return true; -+ if (TYPE_ALIGN (type) < 128) -+ return false; -+ -+ if (AGGREGATE_TYPE_P (type)) - { -- const char *sname = NULL; -- unsigned int flags = SECTION_WRITE | SECTION_LARGE; -- switch (categorize_decl_for_section (decl, reloc)) -+ /* Walk the aggregates recursively. */ -+ switch (TREE_CODE (type)) - { -- case SECCAT_DATA: -- sname = ".ldata"; -- break; -- case SECCAT_DATA_REL: -- sname = ".ldata.rel"; -- break; -- case SECCAT_DATA_REL_LOCAL: -- sname = ".ldata.rel.local"; -- break; -- case SECCAT_DATA_REL_RO: -- sname = ".ldata.rel.ro"; -- break; -- case SECCAT_DATA_REL_RO_LOCAL: -- sname = ".ldata.rel.ro.local"; -- break; -- case SECCAT_BSS: -- sname = ".lbss"; -- flags |= SECTION_BSS; -- break; -- case SECCAT_RODATA: -- case SECCAT_RODATA_MERGE_STR: -- case SECCAT_RODATA_MERGE_STR_INIT: -- case SECCAT_RODATA_MERGE_CONST: -- sname = ".lrodata"; -- flags &= ~SECTION_WRITE; -+ case RECORD_TYPE: -+ case UNION_TYPE: -+ case QUAL_UNION_TYPE: -+ { -+ tree field; -+ -+ /* Walk all the structure fields. */ -+ for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) -+ { -+ if (TREE_CODE (field) == FIELD_DECL -+ && ix86_compat_aligned_value_p (TREE_TYPE (field))) -+ return true; -+ } -+ break; -+ } -+ -+ case ARRAY_TYPE: -+ /* Just for use if some languages passes arrays by value. */ -+ if (ix86_compat_aligned_value_p (TREE_TYPE (type))) -+ return true; - break; -- case SECCAT_SRODATA: -- case SECCAT_SDATA: -- case SECCAT_SBSS: -+ -+ default: - gcc_unreachable (); -- case SECCAT_TEXT: -- case SECCAT_TDATA: -- case SECCAT_TBSS: -- /* We don't split these for medium model. Place them into -- default sections and hope for best. */ -- break; -- } -- if (sname) -- { -- /* We might get called with string constants, but get_named_section -- doesn't like them as they are not DECLs. Also, we need to set -- flags in that case. */ -- if (!DECL_P (decl)) -- return get_section (sname, flags, NULL); -- return get_named_section (decl, sname, reloc); - } - } -- return default_elf_select_section (decl, reloc, align); -+ return false; - } - --/* Select a set of attributes for section NAME based on the properties -- of DECL and whether or not RELOC indicates that DECL's initializer -- might contain runtime relocations. */ -+/* Return the alignment boundary for MODE and TYPE with alignment ALIGN. -+ XXX: This function is obsolete and is only used for checking psABI -+ compatibility with previous versions of GCC. */ - --static unsigned int ATTRIBUTE_UNUSED --x86_64_elf_section_type_flags (tree decl, const char *name, int reloc) -+static unsigned int -+ix86_compat_function_arg_boundary (machine_mode mode, -+ const_tree type, unsigned int align) - { -- unsigned int flags = default_section_type_flags (decl, name, reloc); -+ /* In 32bit, only _Decimal128 and __float128 are aligned to their -+ natural boundaries. */ -+ if (!TARGET_64BIT && mode != TDmode && mode != TFmode) -+ { -+ /* i386 ABI defines all arguments to be 4 byte aligned. We have to -+ make an exception for SSE modes since these require 128bit -+ alignment. - -- if (ix86_in_large_data_p (decl)) -- flags |= SECTION_LARGE; -+ The handling here differs from field_alignment. ICC aligns MMX -+ arguments to 4 byte boundaries, while structure fields are aligned -+ to 8 byte boundaries. */ -+ if (!type) -+ { -+ if (!(TARGET_SSE && SSE_REG_MODE_P (mode))) -+ align = PARM_BOUNDARY; -+ } -+ else -+ { -+ if (!ix86_compat_aligned_value_p (type)) -+ align = PARM_BOUNDARY; -+ } -+ } -+ if (align > BIGGEST_ALIGNMENT) -+ align = BIGGEST_ALIGNMENT; -+ return align; -+} - -- if (decl == NULL_TREE -- && (strcmp (name, ".ldata.rel.ro") == 0 -- || strcmp (name, ".ldata.rel.ro.local") == 0)) -- flags |= SECTION_RELRO; -+/* Return true when TYPE should be 128bit aligned for 32bit argument -+ passing ABI. */ - -- if (strcmp (name, ".lbss") == 0 -- || strncmp (name, ".lbss.", 5) == 0 -- || strncmp (name, ".gnu.linkonce.lb.", 16) == 0) -- flags |= SECTION_BSS; -+static bool -+ix86_contains_aligned_value_p (const_tree type) -+{ -+ machine_mode mode = TYPE_MODE (type); - -- return flags; --} -+ if (mode == XFmode || mode == XCmode) -+ return false; - --/* Build up a unique section name, expressed as a -- STRING_CST node, and assign it to DECL_SECTION_NAME (decl). -- RELOC indicates whether the initial value of EXP requires -- link-time relocations. */ -+ if (TYPE_ALIGN (type) < 128) -+ return false; - --static void ATTRIBUTE_UNUSED --x86_64_elf_unique_section (tree decl, int reloc) --{ -- if (ix86_in_large_data_p (decl)) -+ if (AGGREGATE_TYPE_P (type)) - { -- const char *prefix = NULL; -- /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */ -- bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP; -- -- switch (categorize_decl_for_section (decl, reloc)) -- { -- case SECCAT_DATA: -- case SECCAT_DATA_REL: -- case SECCAT_DATA_REL_LOCAL: -- case SECCAT_DATA_REL_RO: -- case SECCAT_DATA_REL_RO_LOCAL: -- prefix = one_only ? ".ld" : ".ldata"; -- break; -- case SECCAT_BSS: -- prefix = one_only ? ".lb" : ".lbss"; -- break; -- case SECCAT_RODATA: -- case SECCAT_RODATA_MERGE_STR: -- case SECCAT_RODATA_MERGE_STR_INIT: -- case SECCAT_RODATA_MERGE_CONST: -- prefix = one_only ? ".lr" : ".lrodata"; -- break; -- case SECCAT_SRODATA: -- case SECCAT_SDATA: -- case SECCAT_SBSS: -- gcc_unreachable (); -- case SECCAT_TEXT: -- case SECCAT_TDATA: -- case SECCAT_TBSS: -- /* We don't split these for medium model. Place them into -- default sections and hope for best. */ -- break; -- } -- if (prefix) -+ /* Walk the aggregates recursively. */ -+ switch (TREE_CODE (type)) - { -- const char *name, *linkonce; -- char *string; -- -- name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); -- name = targetm.strip_name_encoding (name); -+ case RECORD_TYPE: -+ case UNION_TYPE: -+ case QUAL_UNION_TYPE: -+ { -+ tree field; - -- /* If we're using one_only, then there needs to be a .gnu.linkonce -- prefix to the section name. */ -- linkonce = one_only ? ".gnu.linkonce" : ""; -+ /* Walk all the structure fields. */ -+ for (field = TYPE_FIELDS (type); -+ field; -+ field = DECL_CHAIN (field)) -+ { -+ if (TREE_CODE (field) == FIELD_DECL -+ && ix86_contains_aligned_value_p (TREE_TYPE (field))) -+ return true; -+ } -+ break; -+ } - -- string = ACONCAT ((linkonce, prefix, ".", name, NULL)); -+ case ARRAY_TYPE: -+ /* Just for use if some languages passes arrays by value. */ -+ if (ix86_contains_aligned_value_p (TREE_TYPE (type))) -+ return true; -+ break; - -- set_decl_section_name (decl, string); -- return; -+ default: -+ gcc_unreachable (); - } - } -- default_unique_section (decl, reloc); --} -- --#ifdef COMMON_ASM_OP -+ else -+ return TYPE_ALIGN (type) >= 128; - --#ifndef LARGECOMM_SECTION_ASM_OP --#define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t" --#endif -+ return false; -+} - --/* This says how to output assembler code to declare an -- uninitialized external linkage data object. -+/* Gives the alignment boundary, in bits, of an argument with the -+ specified mode and type. */ - -- For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for -- large objects. */ --void --x86_elf_aligned_decl_common (FILE *file, tree decl, -- const char *name, unsigned HOST_WIDE_INT size, -- int align) -+static unsigned int -+ix86_function_arg_boundary (machine_mode mode, const_tree type) - { -- if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) -- && size > (unsigned int)ix86_section_threshold) -+ unsigned int align; -+ if (type) - { -- switch_to_section (get_named_section (decl, ".lbss", 0)); -- fputs (LARGECOMM_SECTION_ASM_OP, file); -+ /* Since the main variant type is used for call, we convert it to -+ the main variant type. */ -+ type = TYPE_MAIN_VARIANT (type); -+ align = TYPE_ALIGN (type); -+ if (TYPE_EMPTY_P (type)) -+ return PARM_BOUNDARY; - } - else -- fputs (COMMON_ASM_OP, file); -- assemble_name (file, name); -- fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n", -- size, align / BITS_PER_UNIT); --} --#endif -+ align = GET_MODE_ALIGNMENT (mode); -+ if (align < PARM_BOUNDARY) -+ align = PARM_BOUNDARY; -+ else -+ { -+ static bool warned; -+ unsigned int saved_align = align; - --/* Utility function for targets to use in implementing -- ASM_OUTPUT_ALIGNED_BSS. */ -+ if (!TARGET_64BIT) -+ { -+ /* i386 ABI defines XFmode arguments to be 4 byte aligned. */ -+ if (!type) -+ { -+ if (mode == XFmode || mode == XCmode) -+ align = PARM_BOUNDARY; -+ } -+ else if (!ix86_contains_aligned_value_p (type)) -+ align = PARM_BOUNDARY; - --void --x86_output_aligned_bss (FILE *file, tree decl, const char *name, -- unsigned HOST_WIDE_INT size, int align) --{ -- if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) -- && size > (unsigned int)ix86_section_threshold) -- switch_to_section (get_named_section (decl, ".lbss", 0)); -- else -- switch_to_section (bss_section); -- ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT)); --#ifdef ASM_DECLARE_OBJECT_NAME -- last_assemble_variable_decl = decl; -- ASM_DECLARE_OBJECT_NAME (file, name, decl); --#else -- /* Standard thing is just output label for the object. */ -- ASM_OUTPUT_LABEL (file, name); --#endif /* ASM_DECLARE_OBJECT_NAME */ -- ASM_OUTPUT_SKIP (file, size ? size : 1); --} -- --/* Decide whether we must probe the stack before any space allocation -- on this target. It's essentially TARGET_STACK_PROBE except when -- -fstack-check causes the stack to be already probed differently. */ -+ if (align < 128) -+ align = PARM_BOUNDARY; -+ } - --bool --ix86_target_stack_probe (void) --{ -- /* Do not probe the stack twice if static stack checking is enabled. */ -- if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK) -- return false; -+ if (warn_psabi -+ && !warned -+ && align != ix86_compat_function_arg_boundary (mode, type, -+ saved_align)) -+ { -+ warned = true; -+ inform (input_location, -+ "the ABI for passing parameters with %d-byte" -+ " alignment has changed in GCC 4.6", -+ align / BITS_PER_UNIT); -+ } -+ } - -- return TARGET_STACK_PROBE; -+ return align; - } -- --/* Decide whether we can make a sibling call to a function. DECL is the -- declaration of the function being targeted by the call and EXP is the -- CALL_EXPR representing the call. */ -+ -+/* Return true if N is a possible register number of function value. */ - - static bool --ix86_function_ok_for_sibcall (tree decl, tree exp) -+ix86_function_value_regno_p (const unsigned int regno) - { -- tree type, decl_or_type; -- rtx a, b; -- bool bind_global = decl && !targetm.binds_local_p (decl); -- -- if (ix86_function_naked (current_function_decl)) -- return false; -- -- /* Sibling call isn't OK if there are no caller-saved registers -- since all registers must be preserved before return. */ -- if (cfun->machine->no_caller_saved_registers) -- return false; -- -- /* If we are generating position-independent code, we cannot sibcall -- optimize direct calls to global functions, as the PLT requires -- %ebx be live. (Darwin does not have a PLT.) */ -- if (!TARGET_MACHO -- && !TARGET_64BIT -- && flag_pic -- && flag_plt -- && bind_global) -- return false; -- -- /* If we need to align the outgoing stack, then sibcalling would -- unalign the stack, which may break the called function. */ -- if (ix86_minimum_incoming_stack_boundary (true) -- < PREFERRED_STACK_BOUNDARY) -- return false; -- -- if (decl) -- { -- decl_or_type = decl; -- type = TREE_TYPE (decl); -- } -- else -+ switch (regno) - { -- /* We're looking at the CALL_EXPR, we need the type of the function. */ -- type = CALL_EXPR_FN (exp); /* pointer expression */ -- type = TREE_TYPE (type); /* pointer type */ -- type = TREE_TYPE (type); /* function type */ -- decl_or_type = type; -- } -+ case AX_REG: -+ return true; -+ case DX_REG: -+ return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI); -+ case DI_REG: -+ case SI_REG: -+ return TARGET_64BIT && ix86_cfun_abi () != MS_ABI; - -- /* Check that the return value locations are the same. Like -- if we are returning floats on the 80387 register stack, we cannot -- make a sibcall from a function that doesn't return a float to a -- function that does or, conversely, from a function that does return -- a float to a function that doesn't; the necessary stack adjustment -- would not be executed. This is also the place we notice -- differences in the return value ABI. Note that it is ok for one -- of the functions to have void return type as long as the return -- value of the other is passed in a register. */ -- a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false); -- b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)), -- cfun->decl, false); -- if (STACK_REG_P (a) || STACK_REG_P (b)) -- { -- if (!rtx_equal_p (a, b)) -+ /* Complex values are returned in %st(0)/%st(1) pair. */ -+ case ST0_REG: -+ case ST1_REG: -+ /* TODO: The function should depend on current function ABI but -+ builtins.c would need updating then. Therefore we use the -+ default ABI. */ -+ if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI) - return false; -- } -- else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl)))) -- ; -- else if (!rtx_equal_p (a, b)) -- return false; -+ return TARGET_FLOAT_RETURNS_IN_80387; - -- if (TARGET_64BIT) -- { -- /* The SYSV ABI has more call-clobbered registers; -- disallow sibcalls from MS to SYSV. */ -- if (cfun->machine->call_abi == MS_ABI -- && ix86_function_type_abi (type) == SYSV_ABI) -- return false; -- } -- else -- { -- /* If this call is indirect, we'll need to be able to use a -- call-clobbered register for the address of the target function. -- Make sure that all such registers are not used for passing -- parameters. Note that DLLIMPORT functions and call to global -- function via GOT slot are indirect. */ -- if (!decl -- || (bind_global && flag_pic && !flag_plt) -- || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)) -- || flag_force_indirect_call) -- { -- /* Check if regparm >= 3 since arg_reg_available is set to -- false if regparm == 0. If regparm is 1 or 2, there is -- always a call-clobbered register available. -+ /* Complex values are returned in %xmm0/%xmm1 pair. */ -+ case XMM0_REG: -+ case XMM1_REG: -+ return TARGET_SSE; - -- ??? The symbol indirect call doesn't need a call-clobbered -- register. But we don't know if this is a symbol indirect -- call or not here. */ -- if (ix86_function_regparm (type, decl) >= 3 -- && !cfun->machine->arg_reg_available) -- return false; -- } -+ case MM0_REG: -+ if (TARGET_MACHO || TARGET_64BIT) -+ return false; -+ return TARGET_MMX; - } - -- /* Otherwise okay. That also includes certain types of indirect calls. */ -- return true; -+ return false; - } - --/* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall", -- and "sseregparm" calling convention attributes; -- arguments as in struct attribute_spec.handler. */ -+/* Define how to find the value returned by a function. -+ VALTYPE is the data type of the value (as a tree). -+ If the precise function being called is known, FUNC is its FUNCTION_DECL; -+ otherwise, FUNC is 0. */ - --static tree --ix86_handle_cconv_attribute (tree *node, tree name, tree args, int, -- bool *no_add_attrs) -+static rtx -+function_value_32 (machine_mode orig_mode, machine_mode mode, -+ const_tree fntype, const_tree fn) - { -- if (TREE_CODE (*node) != FUNCTION_TYPE -- && TREE_CODE (*node) != METHOD_TYPE -- && TREE_CODE (*node) != FIELD_DECL -- && TREE_CODE (*node) != TYPE_DECL) -- { -- warning (OPT_Wattributes, "%qE attribute only applies to functions", -- name); -- *no_add_attrs = true; -- return NULL_TREE; -- } -- -- /* Can combine regparm with all attributes but fastcall, and thiscall. */ -- if (is_attribute_p ("regparm", name)) -- { -- tree cst; -+ unsigned int regno; - -- if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) -- { -- error ("fastcall and regparm attributes are not compatible"); -- } -+ /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where -+ we normally prevent this case when mmx is not available. However -+ some ABIs may require the result to be returned like DImode. */ -+ if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8) -+ regno = FIRST_MMX_REG; - -- if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node))) -- { -- error ("regparam and thiscall attributes are not compatible"); -- } -+ /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where -+ we prevent this case when sse is not available. However some ABIs -+ may require the result to be returned like integer TImode. */ -+ else if (mode == TImode -+ || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16)) -+ regno = FIRST_SSE_REG; - -- cst = TREE_VALUE (args); -- if (TREE_CODE (cst) != INTEGER_CST) -- { -- warning (OPT_Wattributes, -- "%qE attribute requires an integer constant argument", -- name); -- *no_add_attrs = true; -- } -- else if (compare_tree_int (cst, REGPARM_MAX) > 0) -- { -- warning (OPT_Wattributes, "argument to %qE attribute larger than %d", -- name, REGPARM_MAX); -- *no_add_attrs = true; -- } -+ /* 32-byte vector modes in %ymm0. */ -+ else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32) -+ regno = FIRST_SSE_REG; - -- return NULL_TREE; -- } -+ /* 64-byte vector modes in %zmm0. */ -+ else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64) -+ regno = FIRST_SSE_REG; - -- if (TARGET_64BIT) -- { -- /* Do not warn when emulating the MS ABI. */ -- if ((TREE_CODE (*node) != FUNCTION_TYPE -- && TREE_CODE (*node) != METHOD_TYPE) -- || ix86_function_type_abi (*node) != MS_ABI) -- warning (OPT_Wattributes, "%qE attribute ignored", -- name); -- *no_add_attrs = true; -- return NULL_TREE; -- } -+ /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */ -+ else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387) -+ regno = FIRST_FLOAT_REG; -+ else -+ /* Most things go in %eax. */ -+ regno = AX_REG; - -- /* Can combine fastcall with stdcall (redundant) and sseregparm. */ -- if (is_attribute_p ("fastcall", name)) -+ /* Override FP return register with %xmm0 for local functions when -+ SSE math is enabled or for functions with sseregparm attribute. */ -+ if ((fn || fntype) && (mode == SFmode || mode == DFmode)) - { -- if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node))) -- { -- error ("fastcall and cdecl attributes are not compatible"); -- } -- if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node))) -- { -- error ("fastcall and stdcall attributes are not compatible"); -- } -- if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node))) -- { -- error ("fastcall and regparm attributes are not compatible"); -- } -- if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node))) -+ int sse_level = ix86_function_sseregparm (fntype, fn, false); -+ if (sse_level == -1) - { -- error ("fastcall and thiscall attributes are not compatible"); -+ error ("calling %qD with SSE calling convention without " -+ "SSE/SSE2 enabled", fn); -+ sorry ("this is a GCC bug that can be worked around by adding " -+ "attribute used to function called"); - } -+ else if ((sse_level >= 1 && mode == SFmode) -+ || (sse_level == 2 && mode == DFmode)) -+ regno = FIRST_SSE_REG; - } - -- /* Can combine stdcall with fastcall (redundant), regparm and -- sseregparm. */ -- else if (is_attribute_p ("stdcall", name)) -- { -- if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node))) -- { -- error ("stdcall and cdecl attributes are not compatible"); -- } -- if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) -- { -- error ("stdcall and fastcall attributes are not compatible"); -- } -- if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node))) -- { -- error ("stdcall and thiscall attributes are not compatible"); -- } -- } -+ /* OImode shouldn't be used directly. */ -+ gcc_assert (mode != OImode); -+ -+ return gen_rtx_REG (orig_mode, regno); -+} -+ -+static rtx -+function_value_64 (machine_mode orig_mode, machine_mode mode, -+ const_tree valtype) -+{ -+ rtx ret; - -- /* Can combine cdecl with regparm and sseregparm. */ -- else if (is_attribute_p ("cdecl", name)) -+ /* Handle libcalls, which don't provide a type node. */ -+ if (valtype == NULL) - { -- if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node))) -- { -- error ("stdcall and cdecl attributes are not compatible"); -- } -- if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) -- { -- error ("fastcall and cdecl attributes are not compatible"); -- } -- if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node))) -+ unsigned int regno; -+ -+ switch (mode) - { -- error ("cdecl and thiscall attributes are not compatible"); -+ case E_SFmode: -+ case E_SCmode: -+ case E_DFmode: -+ case E_DCmode: -+ case E_TFmode: -+ case E_SDmode: -+ case E_DDmode: -+ case E_TDmode: -+ regno = FIRST_SSE_REG; -+ break; -+ case E_XFmode: -+ case E_XCmode: -+ regno = FIRST_FLOAT_REG; -+ break; -+ case E_TCmode: -+ return NULL; -+ default: -+ regno = AX_REG; - } -+ -+ return gen_rtx_REG (mode, regno); - } -- else if (is_attribute_p ("thiscall", name)) -+ else if (POINTER_TYPE_P (valtype)) - { -- if (TREE_CODE (*node) != METHOD_TYPE && pedantic) -- warning (OPT_Wattributes, "%qE attribute is used for non-class method", -- name); -- if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node))) -- { -- error ("stdcall and thiscall attributes are not compatible"); -- } -- if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) -- { -- error ("fastcall and thiscall attributes are not compatible"); -- } -- if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node))) -- { -- error ("cdecl and thiscall attributes are not compatible"); -- } -+ /* Pointers are always returned in word_mode. */ -+ mode = word_mode; - } - -- /* Can combine sseregparm with all attributes. */ -+ ret = construct_container (mode, orig_mode, valtype, 1, -+ X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX, -+ x86_64_int_return_registers, 0); - -- return NULL_TREE; --} -+ /* For zero sized structures, construct_container returns NULL, but we -+ need to keep rest of compiler happy by returning meaningful value. */ -+ if (!ret) -+ ret = gen_rtx_REG (orig_mode, AX_REG); - --/* The transactional memory builtins are implicitly regparm or fastcall -- depending on the ABI. Override the generic do-nothing attribute that -- these builtins were declared with, and replace it with one of the two -- attributes that we expect elsewhere. */ -+ return ret; -+} - --static tree --ix86_handle_tm_regparm_attribute (tree *node, tree, tree, -- int flags, bool *no_add_attrs) -+static rtx -+function_value_ms_32 (machine_mode orig_mode, machine_mode mode, -+ const_tree fntype, const_tree fn, const_tree valtype) - { -- tree alt; -+ unsigned int regno; - -- /* In no case do we want to add the placeholder attribute. */ -- *no_add_attrs = true; -+ /* Floating point return values in %st(0) -+ (unless -mno-fp-ret-in-387 or aggregate type of up to 8 bytes). */ -+ if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387 -+ && (GET_MODE_SIZE (mode) > 8 -+ || valtype == NULL_TREE || !AGGREGATE_TYPE_P (valtype))) -+ { -+ regno = FIRST_FLOAT_REG; -+ return gen_rtx_REG (orig_mode, regno); -+ } -+ else -+ return function_value_32(orig_mode, mode, fntype,fn); -+} - -- /* The 64-bit ABI is unchanged for transactional memory. */ -- if (TARGET_64BIT) -- return NULL_TREE; -+static rtx -+function_value_ms_64 (machine_mode orig_mode, machine_mode mode, -+ const_tree valtype) -+{ -+ unsigned int regno = AX_REG; - -- /* ??? Is there a better way to validate 32-bit windows? We have -- cfun->machine->call_abi, but that seems to be set only for 64-bit. */ -- if (CHECK_STACK_LIMIT > 0) -- alt = tree_cons (get_identifier ("fastcall"), NULL, NULL); -- else -+ if (TARGET_SSE) - { -- alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL); -- alt = tree_cons (get_identifier ("regparm"), alt, NULL); -+ switch (GET_MODE_SIZE (mode)) -+ { -+ case 16: -+ if (valtype != NULL_TREE -+ && !VECTOR_INTEGER_TYPE_P (valtype) -+ && !VECTOR_INTEGER_TYPE_P (valtype) -+ && !INTEGRAL_TYPE_P (valtype) -+ && !VECTOR_FLOAT_TYPE_P (valtype)) -+ break; -+ if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode)) -+ && !COMPLEX_MODE_P (mode)) -+ regno = FIRST_SSE_REG; -+ break; -+ case 8: -+ case 4: -+ if (valtype != NULL_TREE && AGGREGATE_TYPE_P (valtype)) -+ break; -+ if (mode == SFmode || mode == DFmode) -+ regno = FIRST_SSE_REG; -+ break; -+ default: -+ break; -+ } - } -- decl_attributes (node, alt, flags); -- -- return NULL_TREE; -+ return gen_rtx_REG (orig_mode, regno); - } - --/* This function determines from TYPE the calling-convention. */ -- --unsigned int --ix86_get_callcvt (const_tree type) -+static rtx -+ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl, -+ machine_mode orig_mode, machine_mode mode) - { -- unsigned int ret = 0; -- bool is_stdarg; -- tree attrs; -- -- if (TARGET_64BIT) -- return IX86_CALLCVT_CDECL; -+ const_tree fn, fntype; - -- attrs = TYPE_ATTRIBUTES (type); -- if (attrs != NULL_TREE) -+ fn = NULL_TREE; -+ if (fntype_or_decl && DECL_P (fntype_or_decl)) -+ fn = fntype_or_decl; -+ fntype = fn ? TREE_TYPE (fn) : fntype_or_decl; -+ -+ if (ix86_function_type_abi (fntype) == MS_ABI) - { -- if (lookup_attribute ("cdecl", attrs)) -- ret |= IX86_CALLCVT_CDECL; -- else if (lookup_attribute ("stdcall", attrs)) -- ret |= IX86_CALLCVT_STDCALL; -- else if (lookup_attribute ("fastcall", attrs)) -- ret |= IX86_CALLCVT_FASTCALL; -- else if (lookup_attribute ("thiscall", attrs)) -- ret |= IX86_CALLCVT_THISCALL; -- -- /* Regparam isn't allowed for thiscall and fastcall. */ -- if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0) -- { -- if (lookup_attribute ("regparm", attrs)) -- ret |= IX86_CALLCVT_REGPARM; -- if (lookup_attribute ("sseregparm", attrs)) -- ret |= IX86_CALLCVT_SSEREGPARM; -- } -- -- if (IX86_BASE_CALLCVT(ret) != 0) -- return ret; -+ if (TARGET_64BIT) -+ return function_value_ms_64 (orig_mode, mode, valtype); -+ else -+ return function_value_ms_32 (orig_mode, mode, fntype, fn, valtype); - } -+ else if (TARGET_64BIT) -+ return function_value_64 (orig_mode, mode, valtype); -+ else -+ return function_value_32 (orig_mode, mode, fntype, fn); -+} - -- is_stdarg = stdarg_p (type); -- if (TARGET_RTD && !is_stdarg) -- return IX86_CALLCVT_STDCALL | ret; -- -- if (ret != 0 -- || is_stdarg -- || TREE_CODE (type) != METHOD_TYPE -- || ix86_function_type_abi (type) != MS_ABI) -- return IX86_CALLCVT_CDECL | ret; -+static rtx -+ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool) -+{ -+ machine_mode mode, orig_mode; - -- return IX86_CALLCVT_THISCALL; -+ orig_mode = TYPE_MODE (valtype); -+ mode = type_natural_mode (valtype, NULL, true); -+ return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode); - } - --/* Return 0 if the attributes for two types are incompatible, 1 if they -- are compatible, and 2 if they are nearly compatible (which causes a -- warning to be generated). */ -+/* Pointer function arguments and return values are promoted to -+ word_mode for normal functions. */ - --static int --ix86_comp_type_attributes (const_tree type1, const_tree type2) -+static machine_mode -+ix86_promote_function_mode (const_tree type, machine_mode mode, -+ int *punsignedp, const_tree fntype, -+ int for_return) - { -- unsigned int ccvt1, ccvt2; -- -- if (TREE_CODE (type1) != FUNCTION_TYPE -- && TREE_CODE (type1) != METHOD_TYPE) -- return 1; -+ if (cfun->machine->func_type == TYPE_NORMAL -+ && type != NULL_TREE -+ && POINTER_TYPE_P (type)) -+ { -+ *punsignedp = POINTERS_EXTEND_UNSIGNED; -+ return word_mode; -+ } -+ return default_promote_function_mode (type, mode, punsignedp, fntype, -+ for_return); -+} - -- ccvt1 = ix86_get_callcvt (type1); -- ccvt2 = ix86_get_callcvt (type2); -- if (ccvt1 != ccvt2) -- return 0; -- if (ix86_function_regparm (type1, NULL) -- != ix86_function_regparm (type2, NULL)) -- return 0; -+/* Return true if a structure, union or array with MODE containing FIELD -+ should be accessed using BLKmode. */ - -- return 1; -+static bool -+ix86_member_type_forces_blk (const_tree field, machine_mode mode) -+{ -+ /* Union with XFmode must be in BLKmode. */ -+ return (mode == XFmode -+ && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE -+ || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE)); - } -- --/* Return the regparm value for a function with the indicated TYPE and DECL. -- DECL may be NULL when calling function indirectly -- or considering a libcall. */ - --static int --ix86_function_regparm (const_tree type, const_tree decl) -+rtx -+ix86_libcall_value (machine_mode mode) - { -- tree attr; -- int regparm; -- unsigned int ccvt; -+ return ix86_function_value_1 (NULL, NULL, mode, mode); -+} - -- if (TARGET_64BIT) -- return (ix86_function_type_abi (type) == SYSV_ABI -- ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX); -- ccvt = ix86_get_callcvt (type); -- regparm = ix86_regparm; -+/* Return true iff type is returned in memory. */ - -- if ((ccvt & IX86_CALLCVT_REGPARM) != 0) -+static bool -+ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) -+{ -+#ifdef SUBTARGET_RETURN_IN_MEMORY -+ return SUBTARGET_RETURN_IN_MEMORY (type, fntype); -+#else -+ const machine_mode mode = type_natural_mode (type, NULL, true); -+ HOST_WIDE_INT size; -+ -+ if (TARGET_64BIT) - { -- attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type)); -- if (attr) -+ if (ix86_function_type_abi (fntype) == MS_ABI) - { -- regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))); -- return regparm; -- } -- } -- else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) -- return 2; -- else if ((ccvt & IX86_CALLCVT_THISCALL) != 0) -- return 1; -+ size = int_size_in_bytes (type); - -- /* Use register calling convention for local functions when possible. */ -- if (decl -- && TREE_CODE (decl) == FUNCTION_DECL) -- { -- cgraph_node *target = cgraph_node::get (decl); -- if (target) -- target = target->function_symbol (); -+ /* __m128 is returned in xmm0. */ -+ if ((!type || VECTOR_INTEGER_TYPE_P (type) -+ || INTEGRAL_TYPE_P (type) -+ || VECTOR_FLOAT_TYPE_P (type)) -+ && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode)) -+ && !COMPLEX_MODE_P (mode) -+ && (GET_MODE_SIZE (mode) == 16 || size == 16)) -+ return false; - -- /* Caller and callee must agree on the calling convention, so -- checking here just optimize means that with -- __attribute__((optimize (...))) caller could use regparm convention -- and callee not, or vice versa. Instead look at whether the callee -- is optimized or not. */ -- if (target && opt_for_fn (target->decl, optimize) -- && !(profile_flag && !flag_fentry)) -+ /* Otherwise, the size must be exactly in [1248]. */ -+ return size != 1 && size != 2 && size != 4 && size != 8; -+ } -+ else - { -- cgraph_local_info *i = &target->local; -- if (i && i->local && i->can_change_signature) -- { -- int local_regparm, globals = 0, regno; -- -- /* Make sure no regparm register is taken by a -- fixed register variable. */ -- for (local_regparm = 0; local_regparm < REGPARM_MAX; -- local_regparm++) -- if (fixed_regs[local_regparm]) -- break; -+ int needed_intregs, needed_sseregs; - -- /* We don't want to use regparm(3) for nested functions as -- these use a static chain pointer in the third argument. */ -- if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl)) -- local_regparm = 2; -+ return examine_argument (mode, type, 1, -+ &needed_intregs, &needed_sseregs); -+ } -+ } -+ else -+ { -+ size = int_size_in_bytes (type); - -- /* Save a register for the split stack. */ -- if (flag_split_stack) -- { -- if (local_regparm == 3) -- local_regparm = 2; -- else if (local_regparm == 2 -- && DECL_STATIC_CHAIN (target->decl)) -- local_regparm = 1; -- } -+ /* Intel MCU psABI returns scalars and aggregates no larger than 8 -+ bytes in registers. */ -+ if (TARGET_IAMCU) -+ return VECTOR_MODE_P (mode) || size < 0 || size > 8; - -- /* Each fixed register usage increases register pressure, -- so less registers should be used for argument passing. -- This functionality can be overriden by an explicit -- regparm value. */ -- for (regno = AX_REG; regno <= DI_REG; regno++) -- if (fixed_regs[regno]) -- globals++; -+ if (mode == BLKmode) -+ return true; - -- local_regparm -- = globals < local_regparm ? local_regparm - globals : 0; -+ if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8) -+ return false; - -- if (local_regparm > regparm) -- regparm = local_regparm; -- } -- } -- } -+ if (VECTOR_MODE_P (mode) || mode == TImode) -+ { -+ /* User-created vectors small enough to fit in EAX. */ -+ if (size < 8) -+ return false; - -- return regparm; --} -+ /* Unless ABI prescibes otherwise, -+ MMX/3dNow values are returned in MM0 if available. */ -+ -+ if (size == 8) -+ return TARGET_VECT8_RETURNS || !TARGET_MMX; - --/* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and -- DFmode (2) arguments in SSE registers for a function with the -- indicated TYPE and DECL. DECL may be NULL when calling function -- indirectly or considering a libcall. Return -1 if any FP parameter -- should be rejected by error. This is used in siutation we imply SSE -- calling convetion but the function is called from another function with -- SSE disabled. Otherwise return 0. */ -+ /* SSE values are returned in XMM0 if available. */ -+ if (size == 16) -+ return !TARGET_SSE; - --static int --ix86_function_sseregparm (const_tree type, const_tree decl, bool warn) --{ -- gcc_assert (!TARGET_64BIT); -+ /* AVX values are returned in YMM0 if available. */ -+ if (size == 32) -+ return !TARGET_AVX; - -- /* Use SSE registers to pass SFmode and DFmode arguments if requested -- by the sseregparm attribute. */ -- if (TARGET_SSEREGPARM -- || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type)))) -- { -- if (!TARGET_SSE) -- { -- if (warn) -- { -- if (decl) -- error ("calling %qD with attribute sseregparm without " -- "SSE/SSE2 enabled", decl); -- else -- error ("calling %qT with attribute sseregparm without " -- "SSE/SSE2 enabled", type); -- } -- return 0; -+ /* AVX512F values are returned in ZMM0 if available. */ -+ if (size == 64) -+ return !TARGET_AVX512F; - } - -- return 2; -- } -+ if (mode == XFmode) -+ return false; - -- if (!decl) -- return 0; -+ if (size > 12) -+ return true; - -- cgraph_node *target = cgraph_node::get (decl); -- if (target) -- target = target->function_symbol (); -+ /* OImode shouldn't be used directly. */ -+ gcc_assert (mode != OImode); - -- /* For local functions, pass up to SSE_REGPARM_MAX SFmode -- (and DFmode for SSE2) arguments in SSE registers. */ -- if (target -- /* TARGET_SSE_MATH */ -- && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE) -- && opt_for_fn (target->decl, optimize) -- && !(profile_flag && !flag_fentry)) -- { -- cgraph_local_info *i = &target->local; -- if (i && i->local && i->can_change_signature) -- { -- /* Refuse to produce wrong code when local function with SSE enabled -- is called from SSE disabled function. -- FIXME: We need a way to detect these cases cross-ltrans partition -- and avoid using SSE calling conventions on local functions called -- from function with SSE disabled. For now at least delay the -- warning until we know we are going to produce wrong code. -- See PR66047 */ -- if (!TARGET_SSE && warn) -- return -1; -- return TARGET_SSE2_P (target_opts_for_fn (target->decl) -- ->x_ix86_isa_flags) ? 2 : 1; -- } -+ return false; - } -- -- return 0; -+#endif - } - --/* Return true if EAX is live at the start of the function. Used by -- ix86_expand_prologue to determine if we need special help before -- calling allocate_stack_worker. */ -+ -+/* Create the va_list data type. */ - --static bool --ix86_eax_live_at_start_p (void) -+static tree -+ix86_build_builtin_va_list_64 (void) - { -- /* Cheat. Don't bother working forward from ix86_function_regparm -- to the function type to whether an actual argument is located in -- eax. Instead just look at cfg info, which is still close enough -- to correct at this point. This gives false positives for broken -- functions that might use uninitialized data that happens to be -- allocated in eax, but who cares? */ -- return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0); --} -+ tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl; - --static bool --ix86_keep_aggregate_return_pointer (tree fntype) --{ -- tree attr; -+ record = lang_hooks.types.make_type (RECORD_TYPE); -+ type_decl = build_decl (BUILTINS_LOCATION, -+ TYPE_DECL, get_identifier ("__va_list_tag"), record); - -- if (!TARGET_64BIT) -- { -- attr = lookup_attribute ("callee_pop_aggregate_return", -- TYPE_ATTRIBUTES (fntype)); -- if (attr) -- return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0); -+ f_gpr = build_decl (BUILTINS_LOCATION, -+ FIELD_DECL, get_identifier ("gp_offset"), -+ unsigned_type_node); -+ f_fpr = build_decl (BUILTINS_LOCATION, -+ FIELD_DECL, get_identifier ("fp_offset"), -+ unsigned_type_node); -+ f_ovf = build_decl (BUILTINS_LOCATION, -+ FIELD_DECL, get_identifier ("overflow_arg_area"), -+ ptr_type_node); -+ f_sav = build_decl (BUILTINS_LOCATION, -+ FIELD_DECL, get_identifier ("reg_save_area"), -+ ptr_type_node); - -- /* For 32-bit MS-ABI the default is to keep aggregate -- return pointer. */ -- if (ix86_function_type_abi (fntype) == MS_ABI) -- return true; -- } -- return KEEP_AGGREGATE_RETURN_POINTER != 0; --} -+ va_list_gpr_counter_field = f_gpr; -+ va_list_fpr_counter_field = f_fpr; - --/* Value is the number of bytes of arguments automatically -- popped when returning from a subroutine call. -- FUNDECL is the declaration node of the function (as a tree), -- FUNTYPE is the data type of the function (as a tree), -- or for a library call it is an identifier node for the subroutine name. -- SIZE is the number of bytes of arguments passed on the stack. -+ DECL_FIELD_CONTEXT (f_gpr) = record; -+ DECL_FIELD_CONTEXT (f_fpr) = record; -+ DECL_FIELD_CONTEXT (f_ovf) = record; -+ DECL_FIELD_CONTEXT (f_sav) = record; - -- On the 80386, the RTD insn may be used to pop them if the number -- of args is fixed, but if the number is variable then the caller -- must pop them all. RTD can't be used for library calls now -- because the library is compiled with the Unix compiler. -- Use of RTD is a selectable option, since it is incompatible with -- standard Unix calling sequences. If the option is not selected, -- the caller must always pop the args. -+ TYPE_STUB_DECL (record) = type_decl; -+ TYPE_NAME (record) = type_decl; -+ TYPE_FIELDS (record) = f_gpr; -+ DECL_CHAIN (f_gpr) = f_fpr; -+ DECL_CHAIN (f_fpr) = f_ovf; -+ DECL_CHAIN (f_ovf) = f_sav; - -- The attribute stdcall is equivalent to RTD on a per module basis. */ -+ layout_type (record); - --static poly_int64 --ix86_return_pops_args (tree fundecl, tree funtype, poly_int64 size) --{ -- unsigned int ccvt; -+ TYPE_ATTRIBUTES (record) = tree_cons (get_identifier ("sysv_abi va_list"), -+ NULL_TREE, TYPE_ATTRIBUTES (record)); - -- /* None of the 64-bit ABIs pop arguments. */ -+ /* The correct type is an array type of one element. */ -+ return build_array_type (record, build_index_type (size_zero_node)); -+} -+ -+/* Setup the builtin va_list data type and for 64-bit the additional -+ calling convention specific va_list data types. */ -+ -+static tree -+ix86_build_builtin_va_list (void) -+{ - if (TARGET_64BIT) -- return 0; -+ { -+ /* Initialize ABI specific va_list builtin types. - -- ccvt = ix86_get_callcvt (funtype); -+ In lto1, we can encounter two va_list types: -+ - one as a result of the type-merge across TUs, and -+ - the one constructed here. -+ These two types will not have the same TYPE_MAIN_VARIANT, and therefore -+ a type identity check in canonical_va_list_type based on -+ TYPE_MAIN_VARIANT (which we used to have) will not work. -+ Instead, we tag each va_list_type_node with its unique attribute, and -+ look for the attribute in the type identity check in -+ canonical_va_list_type. - -- if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL -- | IX86_CALLCVT_THISCALL)) != 0 -- && ! stdarg_p (funtype)) -- return size; -+ Tagging sysv_va_list_type_node directly with the attribute is -+ problematic since it's a array of one record, which will degrade into a -+ pointer to record when used as parameter (see build_va_arg comments for -+ an example), dropping the attribute in the process. So we tag the -+ record instead. */ - -- /* Lose any fake structure return argument if it is passed on the stack. */ -- if (aggregate_value_p (TREE_TYPE (funtype), fundecl) -- && !ix86_keep_aggregate_return_pointer (funtype)) -+ /* For SYSV_ABI we use an array of one record. */ -+ sysv_va_list_type_node = ix86_build_builtin_va_list_64 (); -+ -+ /* For MS_ABI we use plain pointer to argument area. */ -+ tree char_ptr_type = build_pointer_type (char_type_node); -+ tree attr = tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE, -+ TYPE_ATTRIBUTES (char_ptr_type)); -+ ms_va_list_type_node = build_type_attribute_variant (char_ptr_type, attr); -+ -+ return ((ix86_abi == MS_ABI) -+ ? ms_va_list_type_node -+ : sysv_va_list_type_node); -+ } -+ else - { -- int nregs = ix86_function_regparm (funtype, fundecl); -- if (nregs == 0) -- return GET_MODE_SIZE (Pmode); -+ /* For i386 we use plain pointer to argument area. */ -+ return build_pointer_type (char_type_node); - } -- -- return 0; - } - --/* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */ -+/* Worker function for TARGET_SETUP_INCOMING_VARARGS. */ - --static bool --ix86_legitimate_combined_insn (rtx_insn *insn) -+static void -+setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum) - { -- int i; -+ rtx save_area, mem; -+ alias_set_type set; -+ int i, max; - -- /* Check operand constraints in case hard registers were propagated -- into insn pattern. This check prevents combine pass from -- generating insn patterns with invalid hard register operands. -- These invalid insns can eventually confuse reload to error out -- with a spill failure. See also PRs 46829 and 46843. */ -+ /* GPR size of varargs save area. */ -+ if (cfun->va_list_gpr_size) -+ ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD; -+ else -+ ix86_varargs_gpr_size = 0; - -- gcc_assert (INSN_CODE (insn) >= 0); -+ /* FPR size of varargs save area. We don't need it if we don't pass -+ anything in SSE registers. */ -+ if (TARGET_SSE && cfun->va_list_fpr_size) -+ ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16; -+ else -+ ix86_varargs_fpr_size = 0; - -- extract_insn (insn); -- preprocess_constraints (insn); -+ if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size) -+ return; - -- int n_operands = recog_data.n_operands; -- int n_alternatives = recog_data.n_alternatives; -- for (i = 0; i < n_operands; i++) -+ save_area = frame_pointer_rtx; -+ set = get_varargs_alias_set (); -+ -+ max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD; -+ if (max > X86_64_REGPARM_MAX) -+ max = X86_64_REGPARM_MAX; -+ -+ for (i = cum->regno; i < max; i++) - { -- rtx op = recog_data.operand[i]; -- machine_mode mode = GET_MODE (op); -- const operand_alternative *op_alt; -- int offset = 0; -- bool win; -- int j; -+ mem = gen_rtx_MEM (word_mode, -+ plus_constant (Pmode, save_area, i * UNITS_PER_WORD)); -+ MEM_NOTRAP_P (mem) = 1; -+ set_mem_alias_set (mem, set); -+ emit_move_insn (mem, -+ gen_rtx_REG (word_mode, -+ x86_64_int_parameter_registers[i])); -+ } - -- /* A unary operator may be accepted by the predicate, but it -- is irrelevant for matching constraints. */ -- if (UNARY_P (op)) -- op = XEXP (op, 0); -+ if (ix86_varargs_fpr_size) -+ { -+ machine_mode smode; -+ rtx_code_label *label; -+ rtx test; - -- if (SUBREG_P (op)) -- { -- if (REG_P (SUBREG_REG (op)) -- && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER) -- offset = subreg_regno_offset (REGNO (SUBREG_REG (op)), -- GET_MODE (SUBREG_REG (op)), -- SUBREG_BYTE (op), -- GET_MODE (op)); -- op = SUBREG_REG (op); -- } -+ /* Now emit code to save SSE registers. The AX parameter contains number -+ of SSE parameter registers used to call this function, though all we -+ actually check here is the zero/non-zero status. */ - -- if (!(REG_P (op) && HARD_REGISTER_P (op))) -- continue; -+ label = gen_label_rtx (); -+ test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx); -+ emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1), -+ label)); - -- op_alt = recog_op_alt; -+ /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if -+ we used movdqa (i.e. TImode) instead? Perhaps even better would -+ be if we could determine the real mode of the data, via a hook -+ into pass_stdarg. Ignore all that for now. */ -+ smode = V4SFmode; -+ if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode)) -+ crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode); - -- /* Operand has no constraints, anything is OK. */ -- win = !n_alternatives; -+ max = cum->sse_regno + cfun->va_list_fpr_size / 16; -+ if (max > X86_64_SSE_REGPARM_MAX) -+ max = X86_64_SSE_REGPARM_MAX; - -- alternative_mask preferred = get_preferred_alternatives (insn); -- for (j = 0; j < n_alternatives; j++, op_alt += n_operands) -+ for (i = cum->sse_regno; i < max; ++i) - { -- if (!TEST_BIT (preferred, j)) -- continue; -- if (op_alt[i].anything_ok -- || (op_alt[i].matches != -1 -- && operands_match_p -- (recog_data.operand[i], -- recog_data.operand[op_alt[i].matches])) -- || reg_fits_class_p (op, op_alt[i].cl, offset, mode)) -- { -- win = true; -- break; -- } -+ mem = plus_constant (Pmode, save_area, -+ i * 16 + ix86_varargs_gpr_size); -+ mem = gen_rtx_MEM (smode, mem); -+ MEM_NOTRAP_P (mem) = 1; -+ set_mem_alias_set (mem, set); -+ set_mem_align (mem, GET_MODE_ALIGNMENT (smode)); -+ -+ emit_move_insn (mem, gen_rtx_REG (smode, GET_SSE_REGNO (i))); - } - -- if (!win) -- return false; -+ emit_label (label); - } -- -- return true; --} -- --/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */ -- --static unsigned HOST_WIDE_INT --ix86_asan_shadow_offset (void) --{ -- return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44) -- : HOST_WIDE_INT_C (0x7fff8000)) -- : (HOST_WIDE_INT_1 << 29); - } -- --/* Argument support functions. */ - --/* Return true when register may be used to pass function parameters. */ --bool --ix86_function_arg_regno_p (int regno) -+static void -+setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum) - { -+ alias_set_type set = get_varargs_alias_set (); - int i; -- enum calling_abi call_abi; -- const int *parm_regs; - -- if (!TARGET_64BIT) -- { -- if (TARGET_MACHO) -- return (regno < REGPARM_MAX -- || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno])); -- else -- return (regno < REGPARM_MAX -- || (TARGET_MMX && MMX_REGNO_P (regno) -- && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX)) -- || (TARGET_SSE && SSE_REGNO_P (regno) -- && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))); -- } -+ /* Reset to zero, as there might be a sysv vaarg used -+ before. */ -+ ix86_varargs_gpr_size = 0; -+ ix86_varargs_fpr_size = 0; - -- if (TARGET_SSE && SSE_REGNO_P (regno) -- && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)) -- return true; -- -- /* TODO: The function should depend on current function ABI but -- builtins.c would need updating then. Therefore we use the -- default ABI. */ -- call_abi = ix86_cfun_abi (); -- -- /* RAX is used as hidden argument to va_arg functions. */ -- if (call_abi == SYSV_ABI && regno == AX_REG) -- return true; -- -- if (call_abi == MS_ABI) -- parm_regs = x86_64_ms_abi_int_parameter_registers; -- else -- parm_regs = x86_64_int_parameter_registers; -- -- for (i = 0; i < (call_abi == MS_ABI -- ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++) -- if (regno == parm_regs[i]) -- return true; -- return false; --} -- --/* Return if we do not know how to pass TYPE solely in registers. */ -- --static bool --ix86_must_pass_in_stack (machine_mode mode, const_tree type) --{ -- if (must_pass_in_stack_var_size_or_pad (mode, type)) -- return true; -- -- /* For 32-bit, we want TImode aggregates to go on the stack. But watch out! -- The layout_type routine is crafty and tries to trick us into passing -- currently unsupported vector types on the stack by using TImode. */ -- return (!TARGET_64BIT && mode == TImode -- && type && TREE_CODE (type) != VECTOR_TYPE); --} -+ for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++) -+ { -+ rtx reg, mem; - --/* It returns the size, in bytes, of the area reserved for arguments passed -- in registers for the function represented by fndecl dependent to the used -- abi format. */ --int --ix86_reg_parm_stack_space (const_tree fndecl) --{ -- enum calling_abi call_abi = SYSV_ABI; -- if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL) -- call_abi = ix86_function_abi (fndecl); -- else -- call_abi = ix86_function_type_abi (fndecl); -- if (TARGET_64BIT && call_abi == MS_ABI) -- return 32; -- return 0; --} -+ mem = gen_rtx_MEM (Pmode, -+ plus_constant (Pmode, virtual_incoming_args_rtx, -+ i * UNITS_PER_WORD)); -+ MEM_NOTRAP_P (mem) = 1; -+ set_mem_alias_set (mem, set); - --/* We add this as a workaround in order to use libc_has_function -- hook in i386.md. */ --bool --ix86_libc_has_function (enum function_class fn_class) --{ -- return targetm.libc_has_function (fn_class); -+ reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]); -+ emit_move_insn (mem, reg); -+ } - } - --/* Returns value SYSV_ABI, MS_ABI dependent on fntype, -- specifying the call abi used. */ --enum calling_abi --ix86_function_type_abi (const_tree fntype) -+static void -+ix86_setup_incoming_varargs (cumulative_args_t cum_v, -+ const function_arg_info &arg, -+ int *, int no_rtl) - { -- enum calling_abi abi = ix86_abi; -+ CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); -+ CUMULATIVE_ARGS next_cum; -+ tree fntype; - -- if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE) -- return abi; -+ /* This argument doesn't appear to be used anymore. Which is good, -+ because the old code here didn't suppress rtl generation. */ -+ gcc_assert (!no_rtl); - -- if (abi == SYSV_ABI -- && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype))) -- { -- static int warned; -- if (TARGET_X32 && !warned) -- { -- error ("X32 does not support ms_abi attribute"); -- warned = 1; -- } -+ if (!TARGET_64BIT) -+ return; - -- abi = MS_ABI; -- } -- else if (abi == MS_ABI -- && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype))) -- abi = SYSV_ABI; -+ fntype = TREE_TYPE (current_function_decl); - -- return abi; --} -+ /* For varargs, we do not want to skip the dummy va_dcl argument. -+ For stdargs, we do want to skip the last named argument. */ -+ next_cum = *cum; -+ if (stdarg_p (fntype)) -+ ix86_function_arg_advance (pack_cumulative_args (&next_cum), arg); - --static enum calling_abi --ix86_function_abi (const_tree fndecl) --{ -- return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi; -+ if (cum->call_abi == MS_ABI) -+ setup_incoming_varargs_ms_64 (&next_cum); -+ else -+ setup_incoming_varargs_64 (&next_cum); - } - --/* Returns value SYSV_ABI, MS_ABI dependent on cfun, -- specifying the call abi used. */ --enum calling_abi --ix86_cfun_abi (void) --{ -- return cfun ? cfun->machine->call_abi : ix86_abi; --} -+/* Checks if TYPE is of kind va_list char *. */ - - static bool --ix86_function_ms_hook_prologue (const_tree fn) -+is_va_list_char_pointer (tree type) - { -- if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn))) -- { -- if (decl_function_context (fn) != NULL_TREE) -- error_at (DECL_SOURCE_LOCATION (fn), -- "ms_hook_prologue is not compatible with nested function"); -- else -- return true; -- } -- return false; --} -+ tree canonic; - --static bool --ix86_function_naked (const_tree fn) --{ -- if (fn && lookup_attribute ("naked", DECL_ATTRIBUTES (fn))) -+ /* For 32-bit it is always true. */ -+ if (!TARGET_64BIT) - return true; -- -- return false; -+ canonic = ix86_canonical_va_list_type (type); -+ return (canonic == ms_va_list_type_node -+ || (ix86_abi == MS_ABI && canonic == va_list_type_node)); - } - --/* Write the extra assembler code needed to declare a function properly. */ -+/* Implement va_start. */ - --void --ix86_asm_output_function_label (FILE *asm_out_file, const char *fname, -- tree decl) -+static void -+ix86_va_start (tree valist, rtx nextarg) - { -- bool is_ms_hook = ix86_function_ms_hook_prologue (decl); -+ HOST_WIDE_INT words, n_gpr, n_fpr; -+ tree f_gpr, f_fpr, f_ovf, f_sav; -+ tree gpr, fpr, ovf, sav, t; -+ tree type; -+ rtx ovf_rtx; - -- if (is_ms_hook) -+ if (flag_split_stack -+ && cfun->machine->split_stack_varargs_pointer == NULL_RTX) - { -- int i, filler_count = (TARGET_64BIT ? 32 : 16); -- unsigned int filler_cc = 0xcccccccc; -+ unsigned int scratch_regno; - -- for (i = 0; i < filler_count; i += 4) -- fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc); -- } -+ /* When we are splitting the stack, we can't refer to the stack -+ arguments using internal_arg_pointer, because they may be on -+ the old stack. The split stack prologue will arrange to -+ leave a pointer to the old stack arguments in a scratch -+ register, which we here copy to a pseudo-register. The split -+ stack prologue can't set the pseudo-register directly because -+ it (the prologue) runs before any registers have been saved. */ - --#ifdef SUBTARGET_ASM_UNWIND_INIT -- SUBTARGET_ASM_UNWIND_INIT (asm_out_file); --#endif -+ scratch_regno = split_stack_prologue_scratch_regno (); -+ if (scratch_regno != INVALID_REGNUM) -+ { -+ rtx reg; -+ rtx_insn *seq; - -- ASM_OUTPUT_LABEL (asm_out_file, fname); -+ reg = gen_reg_rtx (Pmode); -+ cfun->machine->split_stack_varargs_pointer = reg; - -- /* Output magic byte marker, if hot-patch attribute is set. */ -- if (is_ms_hook) -- { -- if (TARGET_64BIT) -- { -- /* leaq [%rsp + 0], %rsp */ -- fputs (ASM_BYTE "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n", -- asm_out_file); -+ start_sequence (); -+ emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno)); -+ seq = get_insns (); -+ end_sequence (); -+ -+ push_topmost_sequence (); -+ emit_insn_after (seq, entry_of_function ()); -+ pop_topmost_sequence (); - } -+ } -+ -+ /* Only 64bit target needs something special. */ -+ if (is_va_list_char_pointer (TREE_TYPE (valist))) -+ { -+ if (cfun->machine->split_stack_varargs_pointer == NULL_RTX) -+ std_expand_builtin_va_start (valist, nextarg); - else - { -- /* movl.s %edi, %edi -- push %ebp -- movl.s %esp, %ebp */ -- fputs (ASM_BYTE "0x8b, 0xff, 0x55, 0x8b, 0xec\n", asm_out_file); -+ rtx va_r, next; -+ -+ va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE); -+ next = expand_binop (ptr_mode, add_optab, -+ cfun->machine->split_stack_varargs_pointer, -+ crtl->args.arg_offset_rtx, -+ NULL_RTX, 0, OPTAB_LIB_WIDEN); -+ convert_move (va_r, next, 0); - } -+ return; - } --} - --/* Implementation of call abi switching target hook. Specific to FNDECL -- the specific call register sets are set. See also -- ix86_conditional_register_usage for more details. */ --void --ix86_call_abi_override (const_tree fndecl) --{ -- cfun->machine->call_abi = ix86_function_abi (fndecl); --} -+ f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node)); -+ f_fpr = DECL_CHAIN (f_gpr); -+ f_ovf = DECL_CHAIN (f_fpr); -+ f_sav = DECL_CHAIN (f_ovf); - --/* Return 1 if pseudo register should be created and used to hold -- GOT address for PIC code. */ --bool --ix86_use_pseudo_pic_reg (void) --{ -- if ((TARGET_64BIT -- && (ix86_cmodel == CM_SMALL_PIC -- || TARGET_PECOFF)) -- || !flag_pic) -- return false; -- return true; --} -+ valist = build_simple_mem_ref (valist); -+ TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node); -+ /* The following should be folded into the MEM_REF offset. */ -+ gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist), -+ f_gpr, NULL_TREE); -+ fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist), -+ f_fpr, NULL_TREE); -+ ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist), -+ f_ovf, NULL_TREE); -+ sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist), -+ f_sav, NULL_TREE); - --/* Initialize large model PIC register. */ -+ /* Count number of gp and fp argument registers used. */ -+ words = crtl->args.info.words; -+ n_gpr = crtl->args.info.regno; -+ n_fpr = crtl->args.info.sse_regno; - --static void --ix86_init_large_pic_reg (unsigned int tmp_regno) --{ -- rtx_code_label *label; -- rtx tmp_reg; -- -- gcc_assert (Pmode == DImode); -- label = gen_label_rtx (); -- emit_label (label); -- LABEL_PRESERVE_P (label) = 1; -- tmp_reg = gen_rtx_REG (Pmode, tmp_regno); -- gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno); -- emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, -- label)); -- emit_insn (gen_set_got_offset_rex64 (tmp_reg, label)); -- emit_insn (ix86_gen_add3 (pic_offset_table_rtx, -- pic_offset_table_rtx, tmp_reg)); -- const char *name = LABEL_NAME (label); -- PUT_CODE (label, NOTE); -- NOTE_KIND (label) = NOTE_INSN_DELETED_LABEL; -- NOTE_DELETED_LABEL_NAME (label) = name; --} -- --/* Create and initialize PIC register if required. */ --static void --ix86_init_pic_reg (void) --{ -- edge entry_edge; -- rtx_insn *seq; -- -- if (!ix86_use_pseudo_pic_reg ()) -- return; -- -- start_sequence (); -- -- if (TARGET_64BIT) -+ if (cfun->va_list_gpr_size) - { -- if (ix86_cmodel == CM_LARGE_PIC) -- ix86_init_large_pic_reg (R11_REG); -- else -- emit_insn (gen_set_got_rex64 (pic_offset_table_rtx)); -+ type = TREE_TYPE (gpr); -+ t = build2 (MODIFY_EXPR, type, -+ gpr, build_int_cst (type, n_gpr * 8)); -+ TREE_SIDE_EFFECTS (t) = 1; -+ expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); - } -- else -+ -+ if (TARGET_SSE && cfun->va_list_fpr_size) - { -- /* If there is future mcount call in the function it is more profitable -- to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */ -- rtx reg = crtl->profile -- ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM) -- : pic_offset_table_rtx; -- rtx_insn *insn = emit_insn (gen_set_got (reg)); -- RTX_FRAME_RELATED_P (insn) = 1; -- if (crtl->profile) -- emit_move_insn (pic_offset_table_rtx, reg); -- add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX); -+ type = TREE_TYPE (fpr); -+ t = build2 (MODIFY_EXPR, type, fpr, -+ build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX)); -+ TREE_SIDE_EFFECTS (t) = 1; -+ expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); - } - -- seq = get_insns (); -- end_sequence (); -+ /* Find the overflow area. */ -+ type = TREE_TYPE (ovf); -+ if (cfun->machine->split_stack_varargs_pointer == NULL_RTX) -+ ovf_rtx = crtl->args.internal_arg_pointer; -+ else -+ ovf_rtx = cfun->machine->split_stack_varargs_pointer; -+ t = make_tree (type, ovf_rtx); -+ if (words != 0) -+ t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD); - -- entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)); -- insert_insn_on_edge (seq, entry_edge); -- commit_one_edge_insertion (entry_edge); -+ t = build2 (MODIFY_EXPR, type, ovf, t); -+ TREE_SIDE_EFFECTS (t) = 1; -+ expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); -+ -+ if (ix86_varargs_gpr_size || ix86_varargs_fpr_size) -+ { -+ /* Find the register save area. -+ Prologue of the function save it right above stack frame. */ -+ type = TREE_TYPE (sav); -+ t = make_tree (type, frame_pointer_rtx); -+ if (!ix86_varargs_gpr_size) -+ t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX); -+ -+ t = build2 (MODIFY_EXPR, type, sav, t); -+ TREE_SIDE_EFFECTS (t) = 1; -+ expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); -+ } - } - --/* Initialize a variable CUM of type CUMULATIVE_ARGS -- for a call to a function whose data type is FNTYPE. -- For a library call, FNTYPE is 0. */ -+/* Implement va_arg. */ - --void --init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */ -- tree fntype, /* tree ptr for function decl */ -- rtx libname, /* SYMBOL_REF of library name or 0 */ -- tree fndecl, -- int caller) -+static tree -+ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, -+ gimple_seq *post_p) - { -- struct cgraph_local_info *i = NULL; -- struct cgraph_node *target = NULL; -+ static const int intreg[6] = { 0, 1, 2, 3, 4, 5 }; -+ tree f_gpr, f_fpr, f_ovf, f_sav; -+ tree gpr, fpr, ovf, sav, t; -+ int size, rsize; -+ tree lab_false, lab_over = NULL_TREE; -+ tree addr, t2; -+ rtx container; -+ int indirect_p = 0; -+ tree ptrtype; -+ machine_mode nat_mode; -+ unsigned int arg_boundary; -+ unsigned int type_align; - -- memset (cum, 0, sizeof (*cum)); -+ /* Only 64bit target needs something special. */ -+ if (is_va_list_char_pointer (TREE_TYPE (valist))) -+ return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); - -- if (fndecl) -+ f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node)); -+ f_fpr = DECL_CHAIN (f_gpr); -+ f_ovf = DECL_CHAIN (f_fpr); -+ f_sav = DECL_CHAIN (f_ovf); -+ -+ gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), -+ valist, f_gpr, NULL_TREE); -+ -+ fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE); -+ ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE); -+ sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE); -+ -+ indirect_p = pass_va_arg_by_reference (type); -+ if (indirect_p) -+ type = build_pointer_type (type); -+ size = arg_int_size_in_bytes (type); -+ rsize = CEIL (size, UNITS_PER_WORD); -+ -+ nat_mode = type_natural_mode (type, NULL, false); -+ switch (nat_mode) - { -- target = cgraph_node::get (fndecl); -- if (target) -+ case E_V8SFmode: -+ case E_V8SImode: -+ case E_V32QImode: -+ case E_V16HImode: -+ case E_V4DFmode: -+ case E_V4DImode: -+ case E_V16SFmode: -+ case E_V16SImode: -+ case E_V64QImode: -+ case E_V32HImode: -+ case E_V8DFmode: -+ case E_V8DImode: -+ /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */ -+ if (!TARGET_64BIT_MS_ABI) - { -- target = target->function_symbol (); -- i = cgraph_node::local_info (target->decl); -- cum->call_abi = ix86_function_abi (target->decl); -+ container = NULL; -+ break; - } -- else -- cum->call_abi = ix86_function_abi (fndecl); -+ /* FALLTHRU */ -+ -+ default: -+ container = construct_container (nat_mode, TYPE_MODE (type), -+ type, 0, X86_64_REGPARM_MAX, -+ X86_64_SSE_REGPARM_MAX, intreg, -+ 0); -+ break; - } -- else -- cum->call_abi = ix86_function_type_abi (fntype); - -- cum->caller = caller; -+ /* Pull the value out of the saved registers. */ - -- /* Set up the number of registers to use for passing arguments. */ -- cum->nregs = ix86_regparm; -- if (TARGET_64BIT) -- { -- cum->nregs = (cum->call_abi == SYSV_ABI -- ? X86_64_REGPARM_MAX -- : X86_64_MS_REGPARM_MAX); -- } -- if (TARGET_SSE) -+ addr = create_tmp_var (ptr_type_node, "addr"); -+ type_align = TYPE_ALIGN (type); -+ -+ if (container) - { -- cum->sse_nregs = SSE_REGPARM_MAX; -- if (TARGET_64BIT) -- { -- cum->sse_nregs = (cum->call_abi == SYSV_ABI -- ? X86_64_SSE_REGPARM_MAX -- : X86_64_MS_SSE_REGPARM_MAX); -- } -- } -- if (TARGET_MMX) -- cum->mmx_nregs = MMX_REGPARM_MAX; -- cum->warn_avx512f = true; -- cum->warn_avx = true; -- cum->warn_sse = true; -- cum->warn_mmx = true; -+ int needed_intregs, needed_sseregs; -+ bool need_temp; -+ tree int_addr, sse_addr; - -- /* Because type might mismatch in between caller and callee, we need to -- use actual type of function for local calls. -- FIXME: cgraph_analyze can be told to actually record if function uses -- va_start so for local functions maybe_vaarg can be made aggressive -- helping K&R code. -- FIXME: once typesytem is fixed, we won't need this code anymore. */ -- if (i && i->local && i->can_change_signature) -- fntype = TREE_TYPE (target->decl); -- cum->stdarg = stdarg_p (fntype); -- cum->maybe_vaarg = (fntype -- ? (!prototype_p (fntype) || stdarg_p (fntype)) -- : !libname); -+ lab_false = create_artificial_label (UNKNOWN_LOCATION); -+ lab_over = create_artificial_label (UNKNOWN_LOCATION); - -- cum->decl = fndecl; -+ examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs); - -- cum->warn_empty = !warn_abi || cum->stdarg; -- if (!cum->warn_empty && fntype) -- { -- function_args_iterator iter; -- tree argtype; -- bool seen_empty_type = false; -- FOREACH_FUNCTION_ARGS (fntype, argtype, iter) -+ need_temp = (!REG_P (container) -+ && ((needed_intregs && TYPE_ALIGN (type) > 64) -+ || TYPE_ALIGN (type) > 128)); -+ -+ /* In case we are passing structure, verify that it is consecutive block -+ on the register save area. If not we need to do moves. */ -+ if (!need_temp && !REG_P (container)) - { -- if (argtype == error_mark_node || VOID_TYPE_P (argtype)) -- break; -- if (TYPE_EMPTY_P (argtype)) -- seen_empty_type = true; -- else if (seen_empty_type) -+ /* Verify that all registers are strictly consecutive */ -+ if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0)))) - { -- cum->warn_empty = true; -- break; -- } -- } -- } -+ int i; - -- if (!TARGET_64BIT) -- { -- /* If there are variable arguments, then we won't pass anything -- in registers in 32-bit mode. */ -- if (stdarg_p (fntype)) -+ for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) -+ { -+ rtx slot = XVECEXP (container, 0, i); -+ if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i -+ || INTVAL (XEXP (slot, 1)) != i * 16) -+ need_temp = true; -+ } -+ } -+ else -+ { -+ int i; -+ -+ for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) -+ { -+ rtx slot = XVECEXP (container, 0, i); -+ if (REGNO (XEXP (slot, 0)) != (unsigned int) i -+ || INTVAL (XEXP (slot, 1)) != i * 8) -+ need_temp = true; -+ } -+ } -+ } -+ if (!need_temp) - { -- cum->nregs = 0; -- /* Since in 32-bit, variable arguments are always passed on -- stack, there is scratch register available for indirect -- sibcall. */ -- cfun->machine->arg_reg_available = true; -- cum->sse_nregs = 0; -- cum->mmx_nregs = 0; -- cum->warn_avx512f = false; -- cum->warn_avx = false; -- cum->warn_sse = false; -- cum->warn_mmx = false; -- return; -+ int_addr = addr; -+ sse_addr = addr; -+ } -+ else -+ { -+ int_addr = create_tmp_var (ptr_type_node, "int_addr"); -+ sse_addr = create_tmp_var (ptr_type_node, "sse_addr"); - } - -- /* Use ecx and edx registers if function has fastcall attribute, -- else look for regparm information. */ -- if (fntype) -+ /* First ensure that we fit completely in registers. */ -+ if (needed_intregs) - { -- unsigned int ccvt = ix86_get_callcvt (fntype); -- if ((ccvt & IX86_CALLCVT_THISCALL) != 0) -- { -- cum->nregs = 1; -- cum->fastcall = 1; /* Same first register as in fastcall. */ -- } -- else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) -- { -- cum->nregs = 2; -- cum->fastcall = 1; -- } -- else -- cum->nregs = ix86_function_regparm (fntype, fndecl); -+ t = build_int_cst (TREE_TYPE (gpr), -+ (X86_64_REGPARM_MAX - needed_intregs + 1) * 8); -+ t = build2 (GE_EXPR, boolean_type_node, gpr, t); -+ t2 = build1 (GOTO_EXPR, void_type_node, lab_false); -+ t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE); -+ gimplify_and_add (t, pre_p); -+ } -+ if (needed_sseregs) -+ { -+ t = build_int_cst (TREE_TYPE (fpr), -+ (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16 -+ + X86_64_REGPARM_MAX * 8); -+ t = build2 (GE_EXPR, boolean_type_node, fpr, t); -+ t2 = build1 (GOTO_EXPR, void_type_node, lab_false); -+ t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE); -+ gimplify_and_add (t, pre_p); - } - -- /* Set up the number of SSE registers used for passing SFmode -- and DFmode arguments. Warn for mismatching ABI. */ -- cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true); -- } -+ /* Compute index to start of area used for integer regs. */ -+ if (needed_intregs) -+ { -+ /* int_addr = gpr + sav; */ -+ t = fold_build_pointer_plus (sav, gpr); -+ gimplify_assign (int_addr, t, pre_p); -+ } -+ if (needed_sseregs) -+ { -+ /* sse_addr = fpr + sav; */ -+ t = fold_build_pointer_plus (sav, fpr); -+ gimplify_assign (sse_addr, t, pre_p); -+ } -+ if (need_temp) -+ { -+ int i, prev_size = 0; -+ tree temp = create_tmp_var (type, "va_arg_tmp"); - -- cfun->machine->arg_reg_available = (cum->nregs > 0); --} -+ /* addr = &temp; */ -+ t = build1 (ADDR_EXPR, build_pointer_type (type), temp); -+ gimplify_assign (addr, t, pre_p); - --/* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE. -- But in the case of vector types, it is some vector mode. -+ for (i = 0; i < XVECLEN (container, 0); i++) -+ { -+ rtx slot = XVECEXP (container, 0, i); -+ rtx reg = XEXP (slot, 0); -+ machine_mode mode = GET_MODE (reg); -+ tree piece_type; -+ tree addr_type; -+ tree daddr_type; -+ tree src_addr, src; -+ int src_offset; -+ tree dest_addr, dest; -+ int cur_size = GET_MODE_SIZE (mode); - -- When we have only some of our vector isa extensions enabled, then there -- are some modes for which vector_mode_supported_p is false. For these -- modes, the generic vector support in gcc will choose some non-vector mode -- in order to implement the type. By computing the natural mode, we'll -- select the proper ABI location for the operand and not depend on whatever -- the middle-end decides to do with these vector types. -+ gcc_assert (prev_size <= INTVAL (XEXP (slot, 1))); -+ prev_size = INTVAL (XEXP (slot, 1)); -+ if (prev_size + cur_size > size) -+ { -+ cur_size = size - prev_size; -+ unsigned int nbits = cur_size * BITS_PER_UNIT; -+ if (!int_mode_for_size (nbits, 1).exists (&mode)) -+ mode = QImode; -+ } -+ piece_type = lang_hooks.types.type_for_mode (mode, 1); -+ if (mode == GET_MODE (reg)) -+ addr_type = build_pointer_type (piece_type); -+ else -+ addr_type = build_pointer_type_for_mode (piece_type, ptr_mode, -+ true); -+ daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode, -+ true); - -- The midde-end can't deal with the vector types > 16 bytes. In this -- case, we return the original mode and warn ABI change if CUM isn't -- NULL. -+ if (SSE_REGNO_P (REGNO (reg))) -+ { -+ src_addr = sse_addr; -+ src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16; -+ } -+ else -+ { -+ src_addr = int_addr; -+ src_offset = REGNO (reg) * 8; -+ } -+ src_addr = fold_convert (addr_type, src_addr); -+ src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset); - -- If INT_RETURN is true, warn ABI change if the vector mode isn't -- available for function return value. */ -+ dest_addr = fold_convert (daddr_type, addr); -+ dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size); -+ if (cur_size == GET_MODE_SIZE (mode)) -+ { -+ src = build_va_arg_indirect_ref (src_addr); -+ dest = build_va_arg_indirect_ref (dest_addr); - --static machine_mode --type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum, -- bool in_return) --{ -- machine_mode mode = TYPE_MODE (type); -+ gimplify_assign (dest, src, pre_p); -+ } -+ else -+ { -+ tree copy -+ = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY), -+ 3, dest_addr, src_addr, -+ size_int (cur_size)); -+ gimplify_and_add (copy, pre_p); -+ } -+ prev_size += cur_size; -+ } -+ } - -- if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode)) -- { -- HOST_WIDE_INT size = int_size_in_bytes (type); -- if ((size == 8 || size == 16 || size == 32 || size == 64) -- /* ??? Generic code allows us to create width 1 vectors. Ignore. */ -- && TYPE_VECTOR_SUBPARTS (type) > 1) -+ if (needed_intregs) - { -- machine_mode innermode = TYPE_MODE (TREE_TYPE (type)); -+ t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr, -+ build_int_cst (TREE_TYPE (gpr), needed_intregs * 8)); -+ gimplify_assign (gpr, t, pre_p); -+ /* The GPR save area guarantees only 8-byte alignment. */ -+ if (!need_temp) -+ type_align = MIN (type_align, 64); -+ } - -- /* There are no XFmode vector modes. */ -- if (innermode == XFmode) -- return mode; -+ if (needed_sseregs) -+ { -+ t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr, -+ build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16)); -+ gimplify_assign (unshare_expr (fpr), t, pre_p); -+ } - -- if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE) -- mode = MIN_MODE_VECTOR_FLOAT; -- else -- mode = MIN_MODE_VECTOR_INT; -+ gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over)); - -- /* Get the mode which has this inner mode and number of units. */ -- FOR_EACH_MODE_FROM (mode, mode) -- if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type) -- && GET_MODE_INNER (mode) == innermode) -- { -- if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU) -- { -- static bool warnedavx512f; -- static bool warnedavx512f_ret; -+ gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false)); -+ } - -- if (cum && cum->warn_avx512f && !warnedavx512f) -- { -- if (warning (OPT_Wpsabi, "AVX512F vector argument " -- "without AVX512F enabled changes the ABI")) -- warnedavx512f = true; -- } -- else if (in_return && !warnedavx512f_ret) -- { -- if (warning (OPT_Wpsabi, "AVX512F vector return " -- "without AVX512F enabled changes the ABI")) -- warnedavx512f_ret = true; -- } -+ /* ... otherwise out of the overflow area. */ - -- return TYPE_MODE (type); -- } -- else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU) -- { -- static bool warnedavx; -- static bool warnedavx_ret; -+ /* When we align parameter on stack for caller, if the parameter -+ alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be -+ aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee -+ here with caller. */ -+ arg_boundary = ix86_function_arg_boundary (VOIDmode, type); -+ if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT) -+ arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT; - -- if (cum && cum->warn_avx && !warnedavx) -- { -- if (warning (OPT_Wpsabi, "AVX vector argument " -- "without AVX enabled changes the ABI")) -- warnedavx = true; -- } -- else if (in_return && !warnedavx_ret) -- { -- if (warning (OPT_Wpsabi, "AVX vector return " -- "without AVX enabled changes the ABI")) -- warnedavx_ret = true; -- } -+ /* Care for on-stack alignment if needed. */ -+ if (arg_boundary <= 64 || size == 0) -+ t = ovf; -+ else -+ { -+ HOST_WIDE_INT align = arg_boundary / 8; -+ t = fold_build_pointer_plus_hwi (ovf, align - 1); -+ t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, -+ build_int_cst (TREE_TYPE (t), -align)); -+ } - -- return TYPE_MODE (type); -- } -- else if (((size == 8 && TARGET_64BIT) || size == 16) -- && !TARGET_SSE -- && !TARGET_IAMCU) -- { -- static bool warnedsse; -- static bool warnedsse_ret; -+ gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue); -+ gimplify_assign (addr, t, pre_p); - -- if (cum && cum->warn_sse && !warnedsse) -- { -- if (warning (OPT_Wpsabi, "SSE vector argument " -- "without SSE enabled changes the ABI")) -- warnedsse = true; -- } -- else if (!TARGET_64BIT && in_return && !warnedsse_ret) -- { -- if (warning (OPT_Wpsabi, "SSE vector return " -- "without SSE enabled changes the ABI")) -- warnedsse_ret = true; -- } -- } -- else if ((size == 8 && !TARGET_64BIT) -- && (!cfun -- || cfun->machine->func_type == TYPE_NORMAL) -- && !TARGET_MMX -- && !TARGET_IAMCU) -- { -- static bool warnedmmx; -- static bool warnedmmx_ret; -+ t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD); -+ gimplify_assign (unshare_expr (ovf), t, pre_p); - -- if (cum && cum->warn_mmx && !warnedmmx) -- { -- if (warning (OPT_Wpsabi, "MMX vector argument " -- "without MMX enabled changes the ABI")) -- warnedmmx = true; -- } -- else if (in_return && !warnedmmx_ret) -- { -- if (warning (OPT_Wpsabi, "MMX vector return " -- "without MMX enabled changes the ABI")) -- warnedmmx_ret = true; -- } -- } -- return mode; -- } -+ if (container) -+ gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over)); - -- gcc_unreachable (); -- } -- } -+ type = build_aligned_type (type, type_align); -+ ptrtype = build_pointer_type_for_mode (type, ptr_mode, true); -+ addr = fold_convert (ptrtype, addr); - -- return mode; -+ if (indirect_p) -+ addr = build_va_arg_indirect_ref (addr); -+ return build_va_arg_indirect_ref (addr); - } -+ -+/* Return true if OPNUM's MEM should be matched -+ in movabs* patterns. */ - --/* We want to pass a value in REGNO whose "natural" mode is MODE. However, -- this may not agree with the mode that the type system has chosen for the -- register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can -- go ahead and use it. Otherwise we have to build a PARALLEL instead. */ -- --static rtx --gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode, -- unsigned int regno) -+bool -+ix86_check_movabs (rtx insn, int opnum) - { -- rtx tmp; -+ rtx set, mem; - -- if (orig_mode != BLKmode) -- tmp = gen_rtx_REG (orig_mode, regno); -- else -+ set = PATTERN (insn); -+ if (GET_CODE (set) == PARALLEL) -+ set = XVECEXP (set, 0, 0); -+ gcc_assert (GET_CODE (set) == SET); -+ mem = XEXP (set, opnum); -+ while (SUBREG_P (mem)) -+ mem = SUBREG_REG (mem); -+ gcc_assert (MEM_P (mem)); -+ return volatile_ok || !MEM_VOLATILE_P (mem); -+} -+ -+/* Return false if INSN contains a MEM with a non-default address space. */ -+bool -+ix86_check_no_addr_space (rtx insn) -+{ -+ subrtx_var_iterator::array_type array; -+ FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL) - { -- tmp = gen_rtx_REG (mode, regno); -- tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx); -- tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp)); -+ rtx x = *iter; -+ if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x))) -+ return false; - } -- -- return tmp; -+ return true; - } -+ -+/* Initialize the table of extra 80387 mathematical constants. */ - --/* x86-64 register passing implementation. See x86-64 ABI for details. Goal -- of this code is to classify each 8bytes of incoming argument by the register -- class and assign registers accordingly. */ -- --/* Return the union class of CLASS1 and CLASS2. -- See the x86-64 PS ABI for details. */ -- --static enum x86_64_reg_class --merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2) -+static void -+init_ext_80387_constants (void) - { -- /* Rule #1: If both classes are equal, this is the resulting class. */ -- if (class1 == class2) -- return class1; -- -- /* Rule #2: If one of the classes is NO_CLASS, the resulting class is -- the other class. */ -- if (class1 == X86_64_NO_CLASS) -- return class2; -- if (class2 == X86_64_NO_CLASS) -- return class1; -- -- /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */ -- if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS) -- return X86_64_MEMORY_CLASS; -- -- /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */ -- if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS) -- || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS)) -- return X86_64_INTEGERSI_CLASS; -- if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS -- || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS) -- return X86_64_INTEGER_CLASS; -+ static const char * cst[5] = -+ { -+ "0.3010299956639811952256464283594894482", /* 0: fldlg2 */ -+ "0.6931471805599453094286904741849753009", /* 1: fldln2 */ -+ "1.4426950408889634073876517827983434472", /* 2: fldl2e */ -+ "3.3219280948873623478083405569094566090", /* 3: fldl2t */ -+ "3.1415926535897932385128089594061862044", /* 4: fldpi */ -+ }; -+ int i; - -- /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class, -- MEMORY is used. */ -- if (class1 == X86_64_X87_CLASS -- || class1 == X86_64_X87UP_CLASS -- || class1 == X86_64_COMPLEX_X87_CLASS -- || class2 == X86_64_X87_CLASS -- || class2 == X86_64_X87UP_CLASS -- || class2 == X86_64_COMPLEX_X87_CLASS) -- return X86_64_MEMORY_CLASS; -+ for (i = 0; i < 5; i++) -+ { -+ real_from_string (&ext_80387_constants_table[i], cst[i]); -+ /* Ensure each constant is rounded to XFmode precision. */ -+ real_convert (&ext_80387_constants_table[i], -+ XFmode, &ext_80387_constants_table[i]); -+ } - -- /* Rule #6: Otherwise class SSE is used. */ -- return X86_64_SSE_CLASS; -+ ext_80387_constants_init = 1; - } - --/* Classify the argument of type TYPE and mode MODE. -- CLASSES will be filled by the register class used to pass each word -- of the operand. The number of words is returned. In case the parameter -- should be passed in memory, 0 is returned. As a special case for zero -- sized containers, classes[0] will be NO_CLASS and 1 is returned. -+/* Return non-zero if the constant is something that -+ can be loaded with a special instruction. */ - -- BIT_OFFSET is used internally for handling records and specifies offset -- of the offset in bits modulo 512 to avoid overflow cases. -+int -+standard_80387_constant_p (rtx x) -+{ -+ machine_mode mode = GET_MODE (x); - -- See the x86-64 PS ABI for details. --*/ -+ const REAL_VALUE_TYPE *r; - --static int --classify_argument (machine_mode mode, const_tree type, -- enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset) --{ -- HOST_WIDE_INT bytes -- = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); -- int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD); -+ if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode))) -+ return -1; - -- /* Variable sized entities are always passed/returned in memory. */ -- if (bytes < 0) -- return 0; -+ if (x == CONST0_RTX (mode)) -+ return 1; -+ if (x == CONST1_RTX (mode)) -+ return 2; - -- if (mode != VOIDmode -- && targetm.calls.must_pass_in_stack (mode, type)) -- return 0; -+ r = CONST_DOUBLE_REAL_VALUE (x); - -- if (type && AGGREGATE_TYPE_P (type)) -+ /* For XFmode constants, try to find a special 80387 instruction when -+ optimizing for size or on those CPUs that benefit from them. */ -+ if (mode == XFmode -+ && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS)) - { - int i; -- tree field; -- enum x86_64_reg_class subclasses[MAX_CLASSES]; - -- /* On x86-64 we pass structures larger than 64 bytes on the stack. */ -- if (bytes > 64) -- return 0; -+ if (! ext_80387_constants_init) -+ init_ext_80387_constants (); - -- for (i = 0; i < words; i++) -- classes[i] = X86_64_NO_CLASS; -+ for (i = 0; i < 5; i++) -+ if (real_identical (r, &ext_80387_constants_table[i])) -+ return i + 3; -+ } - -- /* Zero sized arrays or structures are NO_CLASS. We return 0 to -- signalize memory class, so handle it as special case. */ -- if (!words) -- { -- classes[0] = X86_64_NO_CLASS; -- return 1; -- } -+ /* Load of the constant -0.0 or -1.0 will be split as -+ fldz;fchs or fld1;fchs sequence. */ -+ if (real_isnegzero (r)) -+ return 8; -+ if (real_identical (r, &dconstm1)) -+ return 9; - -- /* Classify each field of record and merge classes. */ -- switch (TREE_CODE (type)) -- { -- case RECORD_TYPE: -- /* And now merge the fields of structure. */ -- for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) -- { -- if (TREE_CODE (field) == FIELD_DECL) -- { -- int num; -+ return 0; -+} - -- if (TREE_TYPE (field) == error_mark_node) -- continue; -+/* Return the opcode of the special instruction to be used to load -+ the constant X. */ - -- /* Bitfields are always classified as integer. Handle them -- early, since later code would consider them to be -- misaligned integers. */ -- if (DECL_BIT_FIELD (field)) -- { -- for (i = (int_bit_position (field) -- + (bit_offset % 64)) / 8 / 8; -- i < ((int_bit_position (field) + (bit_offset % 64)) -- + tree_to_shwi (DECL_SIZE (field)) -- + 63) / 8 / 8; i++) -- classes[i] -- = merge_classes (X86_64_INTEGER_CLASS, classes[i]); -- } -- else -- { -- int pos; -+const char * -+standard_80387_constant_opcode (rtx x) -+{ -+ switch (standard_80387_constant_p (x)) -+ { -+ case 1: -+ return "fldz"; -+ case 2: -+ return "fld1"; -+ case 3: -+ return "fldlg2"; -+ case 4: -+ return "fldln2"; -+ case 5: -+ return "fldl2e"; -+ case 6: -+ return "fldl2t"; -+ case 7: -+ return "fldpi"; -+ case 8: -+ case 9: -+ return "#"; -+ default: -+ gcc_unreachable (); -+ } -+} - -- type = TREE_TYPE (field); -+/* Return the CONST_DOUBLE representing the 80387 constant that is -+ loaded by the specified special instruction. The argument IDX -+ matches the return value from standard_80387_constant_p. */ - -- /* Flexible array member is ignored. */ -- if (TYPE_MODE (type) == BLKmode -- && TREE_CODE (type) == ARRAY_TYPE -- && TYPE_SIZE (type) == NULL_TREE -- && TYPE_DOMAIN (type) != NULL_TREE -- && (TYPE_MAX_VALUE (TYPE_DOMAIN (type)) -- == NULL_TREE)) -- { -- static bool warned; -+rtx -+standard_80387_constant_rtx (int idx) -+{ -+ int i; - -- if (!warned && warn_psabi) -- { -- warned = true; -- inform (input_location, -- "the ABI of passing struct with" -- " a flexible array member has" -- " changed in GCC 4.4"); -- } -- continue; -- } -- num = classify_argument (TYPE_MODE (type), type, -- subclasses, -- (int_bit_position (field) -- + bit_offset) % 512); -- if (!num) -- return 0; -- pos = (int_bit_position (field) -- + (bit_offset % 64)) / 8 / 8; -- for (i = 0; i < num && (i + pos) < words; i++) -- classes[i + pos] -- = merge_classes (subclasses[i], classes[i + pos]); -- } -- } -- } -- break; -+ if (! ext_80387_constants_init) -+ init_ext_80387_constants (); - -- case ARRAY_TYPE: -- /* Arrays are handled as small records. */ -- { -- int num; -- num = classify_argument (TYPE_MODE (TREE_TYPE (type)), -- TREE_TYPE (type), subclasses, bit_offset); -- if (!num) -- return 0; -+ switch (idx) -+ { -+ case 3: -+ case 4: -+ case 5: -+ case 6: -+ case 7: -+ i = idx - 3; -+ break; - -- /* The partial classes are now full classes. */ -- if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4) -- subclasses[0] = X86_64_SSE_CLASS; -- if (subclasses[0] == X86_64_INTEGERSI_CLASS -- && !((bit_offset % 64) == 0 && bytes == 4)) -- subclasses[0] = X86_64_INTEGER_CLASS; -+ default: -+ gcc_unreachable (); -+ } - -- for (i = 0; i < words; i++) -- classes[i] = subclasses[i % num]; -+ return const_double_from_real_value (ext_80387_constants_table[i], -+ XFmode); -+} - -- break; -- } -- case UNION_TYPE: -- case QUAL_UNION_TYPE: -- /* Unions are similar to RECORD_TYPE but offset is always 0. -- */ -- for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) -- { -- if (TREE_CODE (field) == FIELD_DECL) -- { -- int num; -+/* Return 1 if X is all bits 0 and 2 if X is all bits 1 -+ in supported SSE/AVX vector mode. */ - -- if (TREE_TYPE (field) == error_mark_node) -- continue; -+int -+standard_sse_constant_p (rtx x, machine_mode pred_mode) -+{ -+ machine_mode mode; - -- num = classify_argument (TYPE_MODE (TREE_TYPE (field)), -- TREE_TYPE (field), subclasses, -- bit_offset); -- if (!num) -- return 0; -- for (i = 0; i < num && i < words; i++) -- classes[i] = merge_classes (subclasses[i], classes[i]); -- } -- } -- break; -+ if (!TARGET_SSE) -+ return 0; - -- default: -- gcc_unreachable (); -- } -+ mode = GET_MODE (x); - -- if (words > 2) -- { -- /* When size > 16 bytes, if the first one isn't -- X86_64_SSE_CLASS or any other ones aren't -- X86_64_SSEUP_CLASS, everything should be passed in -- memory. */ -- if (classes[0] != X86_64_SSE_CLASS) -- return 0; -+ if (x == const0_rtx || const0_operand (x, mode)) -+ return 1; - -- for (i = 1; i < words; i++) -- if (classes[i] != X86_64_SSEUP_CLASS) -- return 0; -- } -+ if (x == constm1_rtx || vector_all_ones_operand (x, mode)) -+ { -+ /* VOIDmode integer constant, get mode from the predicate. */ -+ if (mode == VOIDmode) -+ mode = pred_mode; - -- /* Final merger cleanup. */ -- for (i = 0; i < words; i++) -+ switch (GET_MODE_SIZE (mode)) - { -- /* If one class is MEMORY, everything should be passed in -- memory. */ -- if (classes[i] == X86_64_MEMORY_CLASS) -- return 0; -- -- /* The X86_64_SSEUP_CLASS should be always preceded by -- X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */ -- if (classes[i] == X86_64_SSEUP_CLASS -- && classes[i - 1] != X86_64_SSE_CLASS -- && classes[i - 1] != X86_64_SSEUP_CLASS) -- { -- /* The first one should never be X86_64_SSEUP_CLASS. */ -- gcc_assert (i != 0); -- classes[i] = X86_64_SSE_CLASS; -- } -- -- /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS, -- everything should be passed in memory. */ -- if (classes[i] == X86_64_X87UP_CLASS -- && (classes[i - 1] != X86_64_X87_CLASS)) -- { -- static bool warned; -- -- /* The first one should never be X86_64_X87UP_CLASS. */ -- gcc_assert (i != 0); -- if (!warned && warn_psabi) -- { -- warned = true; -- inform (input_location, -- "the ABI of passing union with long double" -- " has changed in GCC 4.4"); -- } -- return 0; -- } -+ case 64: -+ if (TARGET_AVX512F) -+ return 2; -+ break; -+ case 32: -+ if (TARGET_AVX2) -+ return 2; -+ break; -+ case 16: -+ if (TARGET_SSE2) -+ return 2; -+ break; -+ case 0: -+ /* VOIDmode */ -+ gcc_unreachable (); -+ default: -+ break; - } -- return words; - } - -- /* Compute alignment needed. We align all types to natural boundaries with -- exception of XFmode that is aligned to 64bits. */ -- if (mode != VOIDmode && mode != BLKmode) -- { -- int mode_alignment = GET_MODE_BITSIZE (mode); -- -- if (mode == XFmode) -- mode_alignment = 128; -- else if (mode == XCmode) -- mode_alignment = 256; -- if (COMPLEX_MODE_P (mode)) -- mode_alignment /= 2; -- /* Misaligned fields are always returned in memory. */ -- if (bit_offset % mode_alignment) -- return 0; -- } -+ return 0; -+} - -- /* for V1xx modes, just use the base mode */ -- if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode -- && GET_MODE_UNIT_SIZE (mode) == bytes) -- mode = GET_MODE_INNER (mode); -+/* Return the opcode of the special instruction to be used to load -+ the constant operands[1] into operands[0]. */ - -- /* Classification of atomic types. */ -- switch (mode) -- { -- case E_SDmode: -- case E_DDmode: -- classes[0] = X86_64_SSE_CLASS; -- return 1; -- case E_TDmode: -- classes[0] = X86_64_SSE_CLASS; -- classes[1] = X86_64_SSEUP_CLASS; -- return 2; -- case E_DImode: -- case E_SImode: -- case E_HImode: -- case E_QImode: -- case E_CSImode: -- case E_CHImode: -- case E_CQImode: -- { -- int size = bit_offset + (int) GET_MODE_BITSIZE (mode); -+const char * -+standard_sse_constant_opcode (rtx_insn *insn, rtx *operands) -+{ -+ machine_mode mode; -+ rtx x = operands[1]; - -- /* Analyze last 128 bits only. */ -- size = (size - 1) & 0x7f; -+ gcc_assert (TARGET_SSE); - -- if (size < 32) -- { -- classes[0] = X86_64_INTEGERSI_CLASS; -- return 1; -- } -- else if (size < 64) -- { -- classes[0] = X86_64_INTEGER_CLASS; -- return 1; -- } -- else if (size < 64+32) -- { -- classes[0] = X86_64_INTEGER_CLASS; -- classes[1] = X86_64_INTEGERSI_CLASS; -- return 2; -- } -- else if (size < 64+64) -- { -- classes[0] = classes[1] = X86_64_INTEGER_CLASS; -- return 2; -- } -- else -- gcc_unreachable (); -- } -- case E_CDImode: -- case E_TImode: -- classes[0] = classes[1] = X86_64_INTEGER_CLASS; -- return 2; -- case E_COImode: -- case E_OImode: -- /* OImode shouldn't be used directly. */ -- gcc_unreachable (); -- case E_CTImode: -- return 0; -- case E_SFmode: -- if (!(bit_offset % 64)) -- classes[0] = X86_64_SSESF_CLASS; -- else -- classes[0] = X86_64_SSE_CLASS; -- return 1; -- case E_DFmode: -- classes[0] = X86_64_SSEDF_CLASS; -- return 1; -- case E_XFmode: -- classes[0] = X86_64_X87_CLASS; -- classes[1] = X86_64_X87UP_CLASS; -- return 2; -- case E_TFmode: -- classes[0] = X86_64_SSE_CLASS; -- classes[1] = X86_64_SSEUP_CLASS; -- return 2; -- case E_SCmode: -- classes[0] = X86_64_SSE_CLASS; -- if (!(bit_offset % 64)) -- return 1; -- else -+ mode = GET_MODE (x); -+ -+ if (x == const0_rtx || const0_operand (x, mode)) -+ { -+ switch (get_attr_mode (insn)) - { -- static bool warned; -+ case MODE_TI: -+ if (!EXT_REX_SSE_REG_P (operands[0])) -+ return "%vpxor\t%0, %d0"; -+ /* FALLTHRU */ -+ case MODE_XI: -+ case MODE_OI: -+ if (EXT_REX_SSE_REG_P (operands[0])) -+ return (TARGET_AVX512VL -+ ? "vpxord\t%x0, %x0, %x0" -+ : "vpxord\t%g0, %g0, %g0"); -+ return "vpxor\t%x0, %x0, %x0"; - -- if (!warned && warn_psabi) -- { -- warned = true; -- inform (input_location, -- "the ABI of passing structure with complex float" -- " member has changed in GCC 4.4"); -- } -- classes[1] = X86_64_SSESF_CLASS; -- return 2; -+ case MODE_V2DF: -+ if (!EXT_REX_SSE_REG_P (operands[0])) -+ return "%vxorpd\t%0, %d0"; -+ /* FALLTHRU */ -+ case MODE_V8DF: -+ case MODE_V4DF: -+ if (!EXT_REX_SSE_REG_P (operands[0])) -+ return "vxorpd\t%x0, %x0, %x0"; -+ else if (TARGET_AVX512DQ) -+ return (TARGET_AVX512VL -+ ? "vxorpd\t%x0, %x0, %x0" -+ : "vxorpd\t%g0, %g0, %g0"); -+ else -+ return (TARGET_AVX512VL -+ ? "vpxorq\t%x0, %x0, %x0" -+ : "vpxorq\t%g0, %g0, %g0"); -+ -+ case MODE_V4SF: -+ if (!EXT_REX_SSE_REG_P (operands[0])) -+ return "%vxorps\t%0, %d0"; -+ /* FALLTHRU */ -+ case MODE_V16SF: -+ case MODE_V8SF: -+ if (!EXT_REX_SSE_REG_P (operands[0])) -+ return "vxorps\t%x0, %x0, %x0"; -+ else if (TARGET_AVX512DQ) -+ return (TARGET_AVX512VL -+ ? "vxorps\t%x0, %x0, %x0" -+ : "vxorps\t%g0, %g0, %g0"); -+ else -+ return (TARGET_AVX512VL -+ ? "vpxord\t%x0, %x0, %x0" -+ : "vpxord\t%g0, %g0, %g0"); -+ -+ default: -+ gcc_unreachable (); - } -- case E_DCmode: -- classes[0] = X86_64_SSEDF_CLASS; -- classes[1] = X86_64_SSEDF_CLASS; -- return 2; -- case E_XCmode: -- classes[0] = X86_64_COMPLEX_X87_CLASS; -- return 1; -- case E_TCmode: -- /* This modes is larger than 16 bytes. */ -- return 0; -- case E_V8SFmode: -- case E_V8SImode: -- case E_V32QImode: -- case E_V16HImode: -- case E_V4DFmode: -- case E_V4DImode: -- classes[0] = X86_64_SSE_CLASS; -- classes[1] = X86_64_SSEUP_CLASS; -- classes[2] = X86_64_SSEUP_CLASS; -- classes[3] = X86_64_SSEUP_CLASS; -- return 4; -- case E_V8DFmode: -- case E_V16SFmode: -- case E_V8DImode: -- case E_V16SImode: -- case E_V32HImode: -- case E_V64QImode: -- classes[0] = X86_64_SSE_CLASS; -- classes[1] = X86_64_SSEUP_CLASS; -- classes[2] = X86_64_SSEUP_CLASS; -- classes[3] = X86_64_SSEUP_CLASS; -- classes[4] = X86_64_SSEUP_CLASS; -- classes[5] = X86_64_SSEUP_CLASS; -- classes[6] = X86_64_SSEUP_CLASS; -- classes[7] = X86_64_SSEUP_CLASS; -- return 8; -- case E_V4SFmode: -- case E_V4SImode: -- case E_V16QImode: -- case E_V8HImode: -- case E_V2DFmode: -- case E_V2DImode: -- classes[0] = X86_64_SSE_CLASS; -- classes[1] = X86_64_SSEUP_CLASS; -- return 2; -- case E_V1TImode: -- case E_V1DImode: -- case E_V2SFmode: -- case E_V2SImode: -- case E_V4HImode: -- case E_V8QImode: -- classes[0] = X86_64_SSE_CLASS; -- return 1; -- case E_BLKmode: -- case E_VOIDmode: -- return 0; -- default: -- gcc_assert (VECTOR_MODE_P (mode)); -+ } -+ else if (x == constm1_rtx || vector_all_ones_operand (x, mode)) -+ { -+ enum attr_mode insn_mode = get_attr_mode (insn); -+ -+ switch (insn_mode) -+ { -+ case MODE_XI: -+ case MODE_V8DF: -+ case MODE_V16SF: -+ gcc_assert (TARGET_AVX512F); -+ return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}"; - -- if (bytes > 16) -- return 0; -+ case MODE_OI: -+ case MODE_V4DF: -+ case MODE_V8SF: -+ gcc_assert (TARGET_AVX2); -+ /* FALLTHRU */ -+ case MODE_TI: -+ case MODE_V2DF: -+ case MODE_V4SF: -+ gcc_assert (TARGET_SSE2); -+ if (!EXT_REX_SSE_REG_P (operands[0])) -+ return (TARGET_AVX -+ ? "vpcmpeqd\t%0, %0, %0" -+ : "pcmpeqd\t%0, %0"); -+ else if (TARGET_AVX512VL) -+ return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}"; -+ else -+ return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}"; - -- gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT); -+ default: -+ gcc_unreachable (); -+ } -+ } - -- if (bit_offset + GET_MODE_BITSIZE (mode) <= 32) -- classes[0] = X86_64_INTEGERSI_CLASS; -- else -- classes[0] = X86_64_INTEGER_CLASS; -- classes[1] = X86_64_INTEGER_CLASS; -- return 1 + (bytes > 8); -- } -+ gcc_unreachable (); - } - --/* Examine the argument and return set number of register required in each -- class. Return true iff parameter should be passed in memory. */ -+/* Returns true if INSN can be transformed from a memory load -+ to a supported FP constant load. */ - --static bool --examine_argument (machine_mode mode, const_tree type, int in_return, -- int *int_nregs, int *sse_nregs) -+bool -+ix86_standard_x87sse_constant_load_p (const rtx_insn *insn, rtx dst) - { -- enum x86_64_reg_class regclass[MAX_CLASSES]; -- int n = classify_argument (mode, type, regclass, 0); -+ rtx src = find_constant_src (insn); - -- *int_nregs = 0; -- *sse_nregs = 0; -+ gcc_assert (REG_P (dst)); - -- if (!n) -- return true; -- for (n--; n >= 0; n--) -- switch (regclass[n]) -- { -- case X86_64_INTEGER_CLASS: -- case X86_64_INTEGERSI_CLASS: -- (*int_nregs)++; -- break; -- case X86_64_SSE_CLASS: -- case X86_64_SSESF_CLASS: -- case X86_64_SSEDF_CLASS: -- (*sse_nregs)++; -- break; -- case X86_64_NO_CLASS: -- case X86_64_SSEUP_CLASS: -- break; -- case X86_64_X87_CLASS: -- case X86_64_X87UP_CLASS: -- case X86_64_COMPLEX_X87_CLASS: -- if (!in_return) -- return true; -- break; -- case X86_64_MEMORY_CLASS: -- gcc_unreachable (); -- } -+ if (src == NULL -+ || (SSE_REGNO_P (REGNO (dst)) -+ && standard_sse_constant_p (src, GET_MODE (dst)) != 1) -+ || (STACK_REGNO_P (REGNO (dst)) -+ && standard_80387_constant_p (src) < 1)) -+ return false; - -- return false; -+ return true; - } - --/* Construct container for the argument used by GCC interface. See -- FUNCTION_ARG for the detailed description. */ -+/* Returns true if OP contains a symbol reference */ - --static rtx --construct_container (machine_mode mode, machine_mode orig_mode, -- const_tree type, int in_return, int nintregs, int nsseregs, -- const int *intreg, int sse_regno) -+bool -+symbolic_reference_mentioned_p (rtx op) - { -- /* The following variables hold the static issued_error state. */ -- static bool issued_sse_arg_error; -- static bool issued_sse_ret_error; -- static bool issued_x87_ret_error; -- -- machine_mode tmpmode; -- int bytes -- = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); -- enum x86_64_reg_class regclass[MAX_CLASSES]; -- int n; -+ const char *fmt; - int i; -- int nexps = 0; -- int needed_sseregs, needed_intregs; -- rtx exp[MAX_CLASSES]; -- rtx ret; - -- n = classify_argument (mode, type, regclass, 0); -- if (!n) -- return NULL; -- if (examine_argument (mode, type, in_return, &needed_intregs, -- &needed_sseregs)) -- return NULL; -- if (needed_intregs > nintregs || needed_sseregs > nsseregs) -- return NULL; -+ if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF) -+ return true; - -- /* We allowed the user to turn off SSE for kernel mode. Don't crash if -- some less clueful developer tries to use floating-point anyway. */ -- if (needed_sseregs && !TARGET_SSE) -+ fmt = GET_RTX_FORMAT (GET_CODE (op)); -+ for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--) - { -- if (in_return) -- { -- if (!issued_sse_ret_error) -- { -- error ("SSE register return with SSE disabled"); -- issued_sse_ret_error = true; -- } -- } -- else if (!issued_sse_arg_error) -+ if (fmt[i] == 'E') - { -- error ("SSE register argument with SSE disabled"); -- issued_sse_arg_error = true; -+ int j; -+ -+ for (j = XVECLEN (op, i) - 1; j >= 0; j--) -+ if (symbolic_reference_mentioned_p (XVECEXP (op, i, j))) -+ return true; - } -- return NULL; -+ -+ else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i))) -+ return true; - } - -- /* Likewise, error if the ABI requires us to return values in the -- x87 registers and the user specified -mno-80387. */ -- if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return) -- for (i = 0; i < n; i++) -- if (regclass[i] == X86_64_X87_CLASS -- || regclass[i] == X86_64_X87UP_CLASS -- || regclass[i] == X86_64_COMPLEX_X87_CLASS) -- { -- if (!issued_x87_ret_error) -- { -- error ("x87 register return with x87 disabled"); -- issued_x87_ret_error = true; -- } -- return NULL; -- } -+ return false; -+} - -- /* First construct simple cases. Avoid SCmode, since we want to use -- single register to pass this type. */ -- if (n == 1 && mode != SCmode) -- switch (regclass[0]) -- { -- case X86_64_INTEGER_CLASS: -- case X86_64_INTEGERSI_CLASS: -- return gen_rtx_REG (mode, intreg[0]); -- case X86_64_SSE_CLASS: -- case X86_64_SSESF_CLASS: -- case X86_64_SSEDF_CLASS: -- if (mode != BLKmode) -- return gen_reg_or_parallel (mode, orig_mode, -- GET_SSE_REGNO (sse_regno)); -- break; -- case X86_64_X87_CLASS: -- case X86_64_COMPLEX_X87_CLASS: -- return gen_rtx_REG (mode, FIRST_STACK_REG); -- case X86_64_NO_CLASS: -- /* Zero sized array, struct or class. */ -- return NULL; -- default: -- gcc_unreachable (); -- } -- if (n == 2 -- && regclass[0] == X86_64_SSE_CLASS -- && regclass[1] == X86_64_SSEUP_CLASS -- && mode != BLKmode) -- return gen_reg_or_parallel (mode, orig_mode, -- GET_SSE_REGNO (sse_regno)); -- if (n == 4 -- && regclass[0] == X86_64_SSE_CLASS -- && regclass[1] == X86_64_SSEUP_CLASS -- && regclass[2] == X86_64_SSEUP_CLASS -- && regclass[3] == X86_64_SSEUP_CLASS -- && mode != BLKmode) -- return gen_reg_or_parallel (mode, orig_mode, -- GET_SSE_REGNO (sse_regno)); -- if (n == 8 -- && regclass[0] == X86_64_SSE_CLASS -- && regclass[1] == X86_64_SSEUP_CLASS -- && regclass[2] == X86_64_SSEUP_CLASS -- && regclass[3] == X86_64_SSEUP_CLASS -- && regclass[4] == X86_64_SSEUP_CLASS -- && regclass[5] == X86_64_SSEUP_CLASS -- && regclass[6] == X86_64_SSEUP_CLASS -- && regclass[7] == X86_64_SSEUP_CLASS -- && mode != BLKmode) -- return gen_reg_or_parallel (mode, orig_mode, -- GET_SSE_REGNO (sse_regno)); -- if (n == 2 -- && regclass[0] == X86_64_X87_CLASS -- && regclass[1] == X86_64_X87UP_CLASS) -- return gen_rtx_REG (XFmode, FIRST_STACK_REG); -+/* Return true if it is appropriate to emit `ret' instructions in the -+ body of a function. Do this only if the epilogue is simple, needing a -+ couple of insns. Prior to reloading, we can't tell how many registers -+ must be saved, so return false then. Return false if there is no frame -+ marker to de-allocate. */ - -- if (n == 2 -- && regclass[0] == X86_64_INTEGER_CLASS -- && regclass[1] == X86_64_INTEGER_CLASS -- && (mode == CDImode || mode == TImode || mode == BLKmode) -- && intreg[0] + 1 == intreg[1]) -- { -- if (mode == BLKmode) -- { -- /* Use TImode for BLKmode values in 2 integer registers. */ -- exp[0] = gen_rtx_EXPR_LIST (VOIDmode, -- gen_rtx_REG (TImode, intreg[0]), -- GEN_INT (0)); -- ret = gen_rtx_PARALLEL (mode, rtvec_alloc (1)); -- XVECEXP (ret, 0, 0) = exp[0]; -- return ret; -- } -- else -- return gen_rtx_REG (mode, intreg[0]); -- } -+bool -+ix86_can_use_return_insn_p (void) -+{ -+ if (ix86_function_naked (current_function_decl)) -+ return false; - -- /* Otherwise figure out the entries of the PARALLEL. */ -- for (i = 0; i < n; i++) -- { -- int pos; -+ /* Don't use `ret' instruction in interrupt handler. */ -+ if (! reload_completed -+ || frame_pointer_needed -+ || cfun->machine->func_type != TYPE_NORMAL) -+ return 0; - -- switch (regclass[i]) -- { -- case X86_64_NO_CLASS: -- break; -- case X86_64_INTEGER_CLASS: -- case X86_64_INTEGERSI_CLASS: -- /* Merge TImodes on aligned occasions here too. */ -- if (i * 8 + 8 > bytes) -- { -- unsigned int tmpbits = (bytes - i * 8) * BITS_PER_UNIT; -- if (!int_mode_for_size (tmpbits, 0).exists (&tmpmode)) -- /* We've requested 24 bytes we -- don't have mode for. Use DImode. */ -- tmpmode = DImode; -- } -- else if (regclass[i] == X86_64_INTEGERSI_CLASS) -- tmpmode = SImode; -- else -- tmpmode = DImode; -- exp [nexps++] -- = gen_rtx_EXPR_LIST (VOIDmode, -- gen_rtx_REG (tmpmode, *intreg), -- GEN_INT (i*8)); -- intreg++; -- break; -- case X86_64_SSESF_CLASS: -- exp [nexps++] -- = gen_rtx_EXPR_LIST (VOIDmode, -- gen_rtx_REG (SFmode, -- GET_SSE_REGNO (sse_regno)), -- GEN_INT (i*8)); -- sse_regno++; -- break; -- case X86_64_SSEDF_CLASS: -- exp [nexps++] -- = gen_rtx_EXPR_LIST (VOIDmode, -- gen_rtx_REG (DFmode, -- GET_SSE_REGNO (sse_regno)), -- GEN_INT (i*8)); -- sse_regno++; -- break; -- case X86_64_SSE_CLASS: -- pos = i; -- switch (n) -- { -- case 1: -- tmpmode = DImode; -- break; -- case 2: -- if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS) -- { -- tmpmode = TImode; -- i++; -- } -- else -- tmpmode = DImode; -- break; -- case 4: -- gcc_assert (i == 0 -- && regclass[1] == X86_64_SSEUP_CLASS -- && regclass[2] == X86_64_SSEUP_CLASS -- && regclass[3] == X86_64_SSEUP_CLASS); -- tmpmode = OImode; -- i += 3; -- break; -- case 8: -- gcc_assert (i == 0 -- && regclass[1] == X86_64_SSEUP_CLASS -- && regclass[2] == X86_64_SSEUP_CLASS -- && regclass[3] == X86_64_SSEUP_CLASS -- && regclass[4] == X86_64_SSEUP_CLASS -- && regclass[5] == X86_64_SSEUP_CLASS -- && regclass[6] == X86_64_SSEUP_CLASS -- && regclass[7] == X86_64_SSEUP_CLASS); -- tmpmode = XImode; -- i += 7; -- break; -- default: -- gcc_unreachable (); -- } -- exp [nexps++] -- = gen_rtx_EXPR_LIST (VOIDmode, -- gen_rtx_REG (tmpmode, -- GET_SSE_REGNO (sse_regno)), -- GEN_INT (pos*8)); -- sse_regno++; -- break; -- default: -- gcc_unreachable (); -- } -- } -+ /* Don't allow more than 32k pop, since that's all we can do -+ with one instruction. */ -+ if (crtl->args.pops_args && crtl->args.size >= 32768) -+ return 0; - -- /* Empty aligned struct, union or class. */ -- if (nexps == 0) -- return NULL; -+ struct ix86_frame &frame = cfun->machine->frame; -+ return (frame.stack_pointer_offset == UNITS_PER_WORD -+ && (frame.nregs + frame.nsseregs) == 0); -+} -+ -+/* Value should be nonzero if functions must have frame pointers. -+ Zero means the frame pointer need not be set up (and parms may -+ be accessed via the stack pointer) in functions that seem suitable. */ - -- ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps)); -- for (i = 0; i < nexps; i++) -- XVECEXP (ret, 0, i) = exp [i]; -- return ret; -+static bool -+ix86_frame_pointer_required (void) -+{ -+ /* If we accessed previous frames, then the generated code expects -+ to be able to access the saved ebp value in our frame. */ -+ if (cfun->machine->accesses_prev_frame) -+ return true; -+ -+ /* Several x86 os'es need a frame pointer for other reasons, -+ usually pertaining to setjmp. */ -+ if (SUBTARGET_FRAME_POINTER_REQUIRED) -+ return true; -+ -+ /* For older 32-bit runtimes setjmp requires valid frame-pointer. */ -+ if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp) -+ return true; -+ -+ /* Win64 SEH, very large frames need a frame-pointer as maximum stack -+ allocation is 4GB. */ -+ if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE) -+ return true; -+ -+ /* SSE saves require frame-pointer when stack is misaligned. */ -+ if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128) -+ return true; -+ -+ /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER -+ turns off the frame pointer by default. Turn it back on now if -+ we've not got a leaf function. */ -+ if (TARGET_OMIT_LEAF_FRAME_POINTER -+ && (!crtl->is_leaf -+ || ix86_current_function_calls_tls_descriptor)) -+ return true; -+ -+ if (crtl->profile && !flag_fentry) -+ return true; -+ -+ return false; - } - --/* Update the data in CUM to advance over an argument of mode MODE -- and data type TYPE. (TYPE is null for libcalls where that information -- may not be available.) -+/* Record that the current function accesses previous call frames. */ - -- Return a number of integer regsiters advanced over. */ -+void -+ix86_setup_frame_addresses (void) -+{ -+ cfun->machine->accesses_prev_frame = 1; -+} -+ -+#ifndef USE_HIDDEN_LINKONCE -+# if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0) -+# define USE_HIDDEN_LINKONCE 1 -+# else -+# define USE_HIDDEN_LINKONCE 0 -+# endif -+#endif - --static int --function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode, -- const_tree type, HOST_WIDE_INT bytes, -- HOST_WIDE_INT words) -+/* Label count for call and return thunks. It is used to make unique -+ labels in call and return thunks. */ -+static int indirectlabelno; -+ -+/* True if call thunk function is needed. */ -+static bool indirect_thunk_needed = false; -+ -+/* Bit masks of integer registers, which contain branch target, used -+ by call thunk functions. */ -+static int indirect_thunks_used; -+ -+/* True if return thunk function is needed. */ -+static bool indirect_return_needed = false; -+ -+/* True if return thunk function via CX is needed. */ -+static bool indirect_return_via_cx; -+ -+#ifndef INDIRECT_LABEL -+# define INDIRECT_LABEL "LIND" -+#endif -+ -+/* Indicate what prefix is needed for an indirect branch. */ -+enum indirect_thunk_prefix - { -- int res = 0; -- bool error_p = false; -+ indirect_thunk_prefix_none, -+ indirect_thunk_prefix_nt -+}; - -- if (TARGET_IAMCU) -+/* Return the prefix needed for an indirect branch INSN. */ -+ -+enum indirect_thunk_prefix -+indirect_thunk_need_prefix (rtx_insn *insn) -+{ -+ enum indirect_thunk_prefix need_prefix; -+ if ((cfun->machine->indirect_branch_type -+ == indirect_branch_thunk_extern) -+ && ix86_notrack_prefixed_insn_p (insn)) - { -- /* Intel MCU psABI passes scalars and aggregates no larger than 8 -- bytes in registers. */ -- if (!VECTOR_MODE_P (mode) && bytes <= 8) -- goto pass_in_reg; -- return res; -+ /* NOTRACK prefix is only used with external thunk so that it -+ can be properly updated to support CET at run-time. */ -+ need_prefix = indirect_thunk_prefix_nt; - } -+ else -+ need_prefix = indirect_thunk_prefix_none; -+ return need_prefix; -+} - -- switch (mode) -- { -- default: -- break; -+/* Fills in the label name that should be used for the indirect thunk. */ - -- case E_BLKmode: -- if (bytes < 0) -- break; -- /* FALLTHRU */ -+static void -+indirect_thunk_name (char name[32], unsigned int regno, -+ enum indirect_thunk_prefix need_prefix, -+ bool ret_p) -+{ -+ if (regno != INVALID_REGNUM && regno != CX_REG && ret_p) -+ gcc_unreachable (); - -- case E_DImode: -- case E_SImode: -- case E_HImode: -- case E_QImode: --pass_in_reg: -- cum->words += words; -- cum->nregs -= words; -- cum->regno += words; -- if (cum->nregs >= 0) -- res = words; -- if (cum->nregs <= 0) -+ if (USE_HIDDEN_LINKONCE) -+ { -+ const char *prefix; -+ -+ if (need_prefix == indirect_thunk_prefix_nt -+ && regno != INVALID_REGNUM) - { -- cum->nregs = 0; -- cfun->machine->arg_reg_available = false; -- cum->regno = 0; -+ /* NOTRACK prefix is only used with external thunk via -+ register so that NOTRACK prefix can be added to indirect -+ branch via register to support CET at run-time. */ -+ prefix = "_nt"; - } -- break; -- -- case E_OImode: -- /* OImode shouldn't be used directly. */ -- gcc_unreachable (); -+ else -+ prefix = ""; - -- case E_DFmode: -- if (cum->float_in_sse == -1) -- error_p = true; -- if (cum->float_in_sse < 2) -- break; -- /* FALLTHRU */ -- case E_SFmode: -- if (cum->float_in_sse == -1) -- error_p = true; -- if (cum->float_in_sse < 1) -- break; -- /* FALLTHRU */ -+ const char *ret = ret_p ? "return" : "indirect"; - -- case E_V8SFmode: -- case E_V8SImode: -- case E_V64QImode: -- case E_V32HImode: -- case E_V16SImode: -- case E_V8DImode: -- case E_V16SFmode: -- case E_V8DFmode: -- case E_V32QImode: -- case E_V16HImode: -- case E_V4DFmode: -- case E_V4DImode: -- case E_TImode: -- case E_V16QImode: -- case E_V8HImode: -- case E_V4SImode: -- case E_V2DImode: -- case E_V4SFmode: -- case E_V2DFmode: -- if (!type || !AGGREGATE_TYPE_P (type)) -+ if (regno != INVALID_REGNUM) - { -- cum->sse_words += words; -- cum->sse_nregs -= 1; -- cum->sse_regno += 1; -- if (cum->sse_nregs <= 0) -- { -- cum->sse_nregs = 0; -- cum->sse_regno = 0; -- } -- } -- break; -- -- case E_V8QImode: -- case E_V4HImode: -- case E_V2SImode: -- case E_V2SFmode: -- case E_V1TImode: -- case E_V1DImode: -- if (!type || !AGGREGATE_TYPE_P (type)) -- { -- cum->mmx_words += words; -- cum->mmx_nregs -= 1; -- cum->mmx_regno += 1; -- if (cum->mmx_nregs <= 0) -- { -- cum->mmx_nregs = 0; -- cum->mmx_regno = 0; -- } -+ const char *reg_prefix; -+ if (LEGACY_INT_REGNO_P (regno)) -+ reg_prefix = TARGET_64BIT ? "r" : "e"; -+ else -+ reg_prefix = ""; -+ sprintf (name, "__x86_%s_thunk%s_%s%s", -+ ret, prefix, reg_prefix, reg_names[regno]); - } -- break; -+ else -+ sprintf (name, "__x86_%s_thunk%s", ret, prefix); - } -- if (error_p) -+ else - { -- cum->float_in_sse = 0; -- error ("calling %qD with SSE calling convention without " -- "SSE/SSE2 enabled", cum->decl); -- sorry ("this is a GCC bug that can be worked around by adding " -- "attribute used to function called"); -+ if (regno != INVALID_REGNUM) -+ ASM_GENERATE_INTERNAL_LABEL (name, "LITR", regno); -+ else -+ { -+ if (ret_p) -+ ASM_GENERATE_INTERNAL_LABEL (name, "LRT", 0); -+ else -+ ASM_GENERATE_INTERNAL_LABEL (name, "LIT", 0); -+ } - } -- -- return res; - } - --static int --function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode, -- const_tree type, HOST_WIDE_INT words, bool named) --{ -- int int_nregs, sse_nregs; -+/* Output a call and return thunk for indirect branch. If REGNO != -1, -+ the function address is in REGNO and the call and return thunk looks like: - -- /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */ -- if (!named && (VALID_AVX512F_REG_MODE (mode) -- || VALID_AVX256_REG_MODE (mode))) -- return 0; -+ call L2 -+ L1: -+ pause -+ lfence -+ jmp L1 -+ L2: -+ mov %REG, (%sp) -+ ret - -- if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs) -- && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs) -- { -- cum->nregs -= int_nregs; -- cum->sse_nregs -= sse_nregs; -- cum->regno += int_nregs; -- cum->sse_regno += sse_nregs; -- return int_nregs; -- } -- else -- { -- int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD; -- cum->words = ROUND_UP (cum->words, align); -- cum->words += words; -- return 0; -- } --} -+ Otherwise, the function address is on the top of stack and the -+ call and return thunk looks like: - --static int --function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes, -- HOST_WIDE_INT words) -+ call L2 -+ L1: -+ pause -+ lfence -+ jmp L1 -+ L2: -+ lea WORD_SIZE(%sp), %sp -+ ret -+ */ -+ -+static void -+output_indirect_thunk (unsigned int regno) - { -- /* Otherwise, this should be passed indirect. */ -- gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8); -+ char indirectlabel1[32]; -+ char indirectlabel2[32]; - -- cum->words += words; -- if (cum->nregs > 0) -- { -- cum->nregs -= 1; -- cum->regno += 1; -- return 1; -- } -- return 0; --} -+ ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, INDIRECT_LABEL, -+ indirectlabelno++); -+ ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, INDIRECT_LABEL, -+ indirectlabelno++); - --/* Update the data in CUM to advance over an argument of mode MODE and -- data type TYPE. (TYPE is null for libcalls where that information -- may not be available.) */ -+ /* Call */ -+ fputs ("\tcall\t", asm_out_file); -+ assemble_name_raw (asm_out_file, indirectlabel2); -+ fputc ('\n', asm_out_file); - --static void --ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode, -- const_tree type, bool named) --{ -- CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); -- HOST_WIDE_INT bytes, words; -- int nregs; -+ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1); - -- /* The argument of interrupt handler is a special case and is -- handled in ix86_function_arg. */ -- if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL) -- return; -+ /* AMD and Intel CPUs prefer each a different instruction as loop filler. -+ Usage of both pause + lfence is compromise solution. */ -+ fprintf (asm_out_file, "\tpause\n\tlfence\n"); - -- if (mode == BLKmode) -- bytes = int_size_in_bytes (type); -- else -- bytes = GET_MODE_SIZE (mode); -- words = CEIL (bytes, UNITS_PER_WORD); -+ /* Jump. */ -+ fputs ("\tjmp\t", asm_out_file); -+ assemble_name_raw (asm_out_file, indirectlabel1); -+ fputc ('\n', asm_out_file); - -- if (type) -- mode = type_natural_mode (type, NULL, false); -+ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2); - -- if (TARGET_64BIT) -+ /* The above call insn pushed a word to stack. Adjust CFI info. */ -+ if (flag_asynchronous_unwind_tables && dwarf2out_do_frame ()) - { -- enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi; -+ if (! dwarf2out_do_cfi_asm ()) -+ { -+ dw_cfi_ref xcfi = ggc_cleared_alloc (); -+ xcfi->dw_cfi_opc = DW_CFA_advance_loc4; -+ xcfi->dw_cfi_oprnd1.dw_cfi_addr = ggc_strdup (indirectlabel2); -+ vec_safe_push (cfun->fde->dw_fde_cfi, xcfi); -+ } -+ dw_cfi_ref xcfi = ggc_cleared_alloc (); -+ xcfi->dw_cfi_opc = DW_CFA_def_cfa_offset; -+ xcfi->dw_cfi_oprnd1.dw_cfi_offset = 2 * UNITS_PER_WORD; -+ vec_safe_push (cfun->fde->dw_fde_cfi, xcfi); -+ dwarf2out_emit_cfi (xcfi); -+ } - -- if (call_abi == MS_ABI) -- nregs = function_arg_advance_ms_64 (cum, bytes, words); -- else -- nregs = function_arg_advance_64 (cum, mode, type, words, named); -+ if (regno != INVALID_REGNUM) -+ { -+ /* MOV. */ -+ rtx xops[2]; -+ xops[0] = gen_rtx_MEM (word_mode, stack_pointer_rtx); -+ xops[1] = gen_rtx_REG (word_mode, regno); -+ output_asm_insn ("mov\t{%1, %0|%0, %1}", xops); - } - else -- nregs = function_arg_advance_32 (cum, mode, type, bytes, words); -- -- if (!nregs) - { -- /* Track if there are outgoing arguments on stack. */ -- if (cum->caller) -- cfun->machine->outgoing_args_on_stack = true; -+ /* LEA. */ -+ rtx xops[2]; -+ xops[0] = stack_pointer_rtx; -+ xops[1] = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); -+ output_asm_insn ("lea\t{%E1, %0|%0, %E1}", xops); - } --} - --/* Define where to put the arguments to a function. -- Value is zero to push the argument on the stack, -- or a hard register in which to store the argument. -+ fputs ("\tret\n", asm_out_file); -+} - -- MODE is the argument's machine mode. -- TYPE is the data type of the argument (as a tree). -- This is null for libcalls where that information may -- not be available. -- CUM is a variable of type CUMULATIVE_ARGS which gives info about -- the preceding args and about the function being called. -- NAMED is nonzero if this argument is a named parameter -- (otherwise it is an extra parameter matching an ellipsis). */ -+/* Output a funtion with a call and return thunk for indirect branch. -+ If REGNO != INVALID_REGNUM, the function address is in REGNO. -+ Otherwise, the function address is on the top of stack. Thunk is -+ used for function return if RET_P is true. */ - --static rtx --function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode, -- machine_mode orig_mode, const_tree type, -- HOST_WIDE_INT bytes, HOST_WIDE_INT words) -+static void -+output_indirect_thunk_function (enum indirect_thunk_prefix need_prefix, -+ unsigned int regno, bool ret_p) - { -- bool error_p = false; -+ char name[32]; -+ tree decl; - -- /* Avoid the AL settings for the Unix64 ABI. */ -- if (mode == VOIDmode) -- return constm1_rtx; -+ /* Create __x86_indirect_thunk. */ -+ indirect_thunk_name (name, regno, need_prefix, ret_p); -+ decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, -+ get_identifier (name), -+ build_function_type_list (void_type_node, NULL_TREE)); -+ DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL, -+ NULL_TREE, void_type_node); -+ TREE_PUBLIC (decl) = 1; -+ TREE_STATIC (decl) = 1; -+ DECL_IGNORED_P (decl) = 1; - -- if (TARGET_IAMCU) -+#if TARGET_MACHO -+ if (TARGET_MACHO) - { -- /* Intel MCU psABI passes scalars and aggregates no larger than 8 -- bytes in registers. */ -- if (!VECTOR_MODE_P (mode) && bytes <= 8) -- goto pass_in_reg; -- return NULL_RTX; -+ switch_to_section (darwin_sections[picbase_thunk_section]); -+ fputs ("\t.weak_definition\t", asm_out_file); -+ assemble_name (asm_out_file, name); -+ fputs ("\n\t.private_extern\t", asm_out_file); -+ assemble_name (asm_out_file, name); -+ putc ('\n', asm_out_file); -+ ASM_OUTPUT_LABEL (asm_out_file, name); -+ DECL_WEAK (decl) = 1; - } -+ else -+#endif -+ if (USE_HIDDEN_LINKONCE) -+ { -+ cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl)); - -- switch (mode) -- { -- default: -- break; -+ targetm.asm_out.unique_section (decl, 0); -+ switch_to_section (get_named_section (decl, NULL, 0)); - -- case E_BLKmode: -- if (bytes < 0) -- break; -- /* FALLTHRU */ -- case E_DImode: -- case E_SImode: -- case E_HImode: -- case E_QImode: --pass_in_reg: -- if (words <= cum->nregs) -- { -- int regno = cum->regno; -+ targetm.asm_out.globalize_label (asm_out_file, name); -+ fputs ("\t.hidden\t", asm_out_file); -+ assemble_name (asm_out_file, name); -+ putc ('\n', asm_out_file); -+ ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl); -+ } -+ else -+ { -+ switch_to_section (text_section); -+ ASM_OUTPUT_LABEL (asm_out_file, name); -+ } - -- /* Fastcall allocates the first two DWORD (SImode) or -- smaller arguments to ECX and EDX if it isn't an -- aggregate type . */ -- if (cum->fastcall) -- { -- if (mode == BLKmode -- || mode == DImode -- || (type && AGGREGATE_TYPE_P (type))) -- break; -+ DECL_INITIAL (decl) = make_node (BLOCK); -+ current_function_decl = decl; -+ allocate_struct_function (decl, false); -+ init_function_start (decl); -+ /* We're about to hide the function body from callees of final_* by -+ emitting it directly; tell them we're a thunk, if they care. */ -+ cfun->is_thunk = true; -+ first_function_block_is_cold = false; -+ /* Make sure unwind info is emitted for the thunk if needed. */ -+ final_start_function (emit_barrier (), asm_out_file, 1); - -- /* ECX not EAX is the first allocated register. */ -- if (regno == AX_REG) -- regno = CX_REG; -- } -- return gen_rtx_REG (mode, regno); -- } -- break; -+ output_indirect_thunk (regno); - -- case E_DFmode: -- if (cum->float_in_sse == -1) -- error_p = true; -- if (cum->float_in_sse < 2) -- break; -- /* FALLTHRU */ -- case E_SFmode: -- if (cum->float_in_sse == -1) -- error_p = true; -- if (cum->float_in_sse < 1) -- break; -- /* FALLTHRU */ -- case E_TImode: -- /* In 32bit, we pass TImode in xmm registers. */ -- case E_V16QImode: -- case E_V8HImode: -- case E_V4SImode: -- case E_V2DImode: -- case E_V4SFmode: -- case E_V2DFmode: -- if (!type || !AGGREGATE_TYPE_P (type)) -- { -- if (cum->sse_nregs) -- return gen_reg_or_parallel (mode, orig_mode, -- cum->sse_regno + FIRST_SSE_REG); -- } -- break; -+ final_end_function (); -+ init_insn_lengths (); -+ free_after_compilation (cfun); -+ set_cfun (NULL); -+ current_function_decl = NULL; -+} - -- case E_OImode: -- case E_XImode: -- /* OImode and XImode shouldn't be used directly. */ -- gcc_unreachable (); -+static int pic_labels_used; - -- case E_V64QImode: -- case E_V32HImode: -- case E_V16SImode: -- case E_V8DImode: -- case E_V16SFmode: -- case E_V8DFmode: -- case E_V8SFmode: -- case E_V8SImode: -- case E_V32QImode: -- case E_V16HImode: -- case E_V4DFmode: -- case E_V4DImode: -- if (!type || !AGGREGATE_TYPE_P (type)) -- { -- if (cum->sse_nregs) -- return gen_reg_or_parallel (mode, orig_mode, -- cum->sse_regno + FIRST_SSE_REG); -- } -- break; -+/* Fills in the label name that should be used for a pc thunk for -+ the given register. */ - -- case E_V8QImode: -- case E_V4HImode: -- case E_V2SImode: -- case E_V2SFmode: -- case E_V1TImode: -- case E_V1DImode: -- if (!type || !AGGREGATE_TYPE_P (type)) -- { -- if (cum->mmx_nregs) -- return gen_reg_or_parallel (mode, orig_mode, -- cum->mmx_regno + FIRST_MMX_REG); -- } -- break; -- } -- if (error_p) -- { -- cum->float_in_sse = 0; -- error ("calling %qD with SSE calling convention without " -- "SSE/SSE2 enabled", cum->decl); -- sorry ("this is a GCC bug that can be worked around by adding " -- "attribute used to function called"); -- } -+static void -+get_pc_thunk_name (char name[32], unsigned int regno) -+{ -+ gcc_assert (!TARGET_64BIT); - -- return NULL_RTX; -+ if (USE_HIDDEN_LINKONCE) -+ sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]); -+ else -+ ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno); - } - --static rtx --function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode, -- machine_mode orig_mode, const_tree type, bool named) -+ -+/* This function generates code for -fpic that loads %ebx with -+ the return address of the caller and then returns. */ -+ -+static void -+ix86_code_end (void) - { -- /* Handle a hidden AL argument containing number of registers -- for varargs x86-64 functions. */ -- if (mode == VOIDmode) -- return GEN_INT (cum->maybe_vaarg -- ? (cum->sse_nregs < 0 -- ? X86_64_SSE_REGPARM_MAX -- : cum->sse_regno) -- : -1); -+ rtx xops[2]; -+ unsigned int regno; - -- switch (mode) -- { -- default: -- break; -+ if (indirect_return_needed) -+ output_indirect_thunk_function (indirect_thunk_prefix_none, -+ INVALID_REGNUM, true); -+ if (indirect_return_via_cx) -+ output_indirect_thunk_function (indirect_thunk_prefix_none, -+ CX_REG, true); -+ if (indirect_thunk_needed) -+ output_indirect_thunk_function (indirect_thunk_prefix_none, -+ INVALID_REGNUM, false); - -- case E_V8SFmode: -- case E_V8SImode: -- case E_V32QImode: -- case E_V16HImode: -- case E_V4DFmode: -- case E_V4DImode: -- case E_V16SFmode: -- case E_V16SImode: -- case E_V64QImode: -- case E_V32HImode: -- case E_V8DFmode: -- case E_V8DImode: -- /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */ -- if (!named) -- return NULL; -- break; -+ for (regno = FIRST_REX_INT_REG; regno <= LAST_REX_INT_REG; regno++) -+ { -+ unsigned int i = regno - FIRST_REX_INT_REG + LAST_INT_REG + 1; -+ if ((indirect_thunks_used & (1 << i))) -+ output_indirect_thunk_function (indirect_thunk_prefix_none, -+ regno, false); - } - -- return construct_container (mode, orig_mode, type, 0, cum->nregs, -- cum->sse_nregs, -- &x86_64_int_parameter_registers [cum->regno], -- cum->sse_regno); --} -+ for (regno = FIRST_INT_REG; regno <= LAST_INT_REG; regno++) -+ { -+ char name[32]; -+ tree decl; - --static rtx --function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode, -- machine_mode orig_mode, bool named, const_tree type, -- HOST_WIDE_INT bytes) --{ -- unsigned int regno; -+ if ((indirect_thunks_used & (1 << regno))) -+ output_indirect_thunk_function (indirect_thunk_prefix_none, -+ regno, false); - -- /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call. -- We use value of -2 to specify that current function call is MSABI. */ -- if (mode == VOIDmode) -- return GEN_INT (-2); -+ if (!(pic_labels_used & (1 << regno))) -+ continue; - -- /* If we've run out of registers, it goes on the stack. */ -- if (cum->nregs == 0) -- return NULL_RTX; -+ get_pc_thunk_name (name, regno); - -- regno = x86_64_ms_abi_int_parameter_registers[cum->regno]; -+ decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, -+ get_identifier (name), -+ build_function_type_list (void_type_node, NULL_TREE)); -+ DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL, -+ NULL_TREE, void_type_node); -+ TREE_PUBLIC (decl) = 1; -+ TREE_STATIC (decl) = 1; -+ DECL_IGNORED_P (decl) = 1; - -- /* Only floating point modes are passed in anything but integer regs. */ -- if (TARGET_SSE && (mode == SFmode || mode == DFmode)) -- { -- if (named) -+#if TARGET_MACHO -+ if (TARGET_MACHO) - { -- if (type == NULL_TREE || !AGGREGATE_TYPE_P (type)) -- regno = cum->regno + FIRST_SSE_REG; -+ switch_to_section (darwin_sections[picbase_thunk_section]); -+ fputs ("\t.weak_definition\t", asm_out_file); -+ assemble_name (asm_out_file, name); -+ fputs ("\n\t.private_extern\t", asm_out_file); -+ assemble_name (asm_out_file, name); -+ putc ('\n', asm_out_file); -+ ASM_OUTPUT_LABEL (asm_out_file, name); -+ DECL_WEAK (decl) = 1; - } - else -+#endif -+ if (USE_HIDDEN_LINKONCE) - { -- rtx t1, t2; -+ cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl)); - -- /* Unnamed floating parameters are passed in both the -- SSE and integer registers. */ -- t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG); -- t2 = gen_rtx_REG (mode, regno); -- t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx); -- t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx); -- return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2)); -- } -- } -- /* Handle aggregated types passed in register. */ -- if (orig_mode == BLKmode) -- { -- if (bytes > 0 && bytes <= 8) -- mode = (bytes > 4 ? DImode : SImode); -- if (mode == BLKmode) -- mode = DImode; -- } -- -- return gen_reg_or_parallel (mode, orig_mode, regno); --} -- --/* Return where to put the arguments to a function. -- Return zero to push the argument on the stack, or a hard register in which to store the argument. -- -- MODE is the argument's machine mode. TYPE is the data type of the -- argument. It is null for libcalls where that information may not be -- available. CUM gives information about the preceding args and about -- the function being called. NAMED is nonzero if this argument is a -- named parameter (otherwise it is an extra parameter matching an -- ellipsis). */ -- --static rtx --ix86_function_arg (cumulative_args_t cum_v, machine_mode omode, -- const_tree type, bool named) --{ -- CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); -- machine_mode mode = omode; -- HOST_WIDE_INT bytes, words; -- rtx arg; -+ targetm.asm_out.unique_section (decl, 0); -+ switch_to_section (get_named_section (decl, NULL, 0)); - -- if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL) -- { -- gcc_assert (type != NULL_TREE); -- if (POINTER_TYPE_P (type)) -- { -- /* This is the pointer argument. */ -- gcc_assert (TYPE_MODE (type) == Pmode); -- /* It is at -WORD(AP) in the current frame in interrupt and -- exception handlers. */ -- arg = plus_constant (Pmode, arg_pointer_rtx, -UNITS_PER_WORD); -+ targetm.asm_out.globalize_label (asm_out_file, name); -+ fputs ("\t.hidden\t", asm_out_file); -+ assemble_name (asm_out_file, name); -+ putc ('\n', asm_out_file); -+ ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl); - } - else - { -- gcc_assert (cfun->machine->func_type == TYPE_EXCEPTION -- && TREE_CODE (type) == INTEGER_TYPE -- && TYPE_MODE (type) == word_mode); -- /* The error code is the word-mode integer argument at -- -2 * WORD(AP) in the current frame of the exception -- handler. */ -- arg = gen_rtx_MEM (word_mode, -- plus_constant (Pmode, -- arg_pointer_rtx, -- -2 * UNITS_PER_WORD)); -+ switch_to_section (text_section); -+ ASM_OUTPUT_LABEL (asm_out_file, name); - } -- return arg; -- } - -- if (mode == BLKmode) -- bytes = int_size_in_bytes (type); -- else -- bytes = GET_MODE_SIZE (mode); -- words = CEIL (bytes, UNITS_PER_WORD); -+ DECL_INITIAL (decl) = make_node (BLOCK); -+ current_function_decl = decl; -+ allocate_struct_function (decl, false); -+ init_function_start (decl); -+ /* We're about to hide the function body from callees of final_* by -+ emitting it directly; tell them we're a thunk, if they care. */ -+ cfun->is_thunk = true; -+ first_function_block_is_cold = false; -+ /* Make sure unwind info is emitted for the thunk if needed. */ -+ final_start_function (emit_barrier (), asm_out_file, 1); - -- /* To simplify the code below, represent vector types with a vector mode -- even if MMX/SSE are not active. */ -- if (type && TREE_CODE (type) == VECTOR_TYPE) -- mode = type_natural_mode (type, cum, false); -+ /* Pad stack IP move with 4 instructions (two NOPs count -+ as one instruction). */ -+ if (TARGET_PAD_SHORT_FUNCTION) -+ { -+ int i = 8; - -- if (TARGET_64BIT) -- { -- enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi; -+ while (i--) -+ fputs ("\tnop\n", asm_out_file); -+ } - -- if (call_abi == MS_ABI) -- arg = function_arg_ms_64 (cum, mode, omode, named, type, bytes); -- else -- arg = function_arg_64 (cum, mode, omode, type, named); -+ xops[0] = gen_rtx_REG (Pmode, regno); -+ xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx); -+ output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops); -+ output_asm_insn ("%!ret", NULL); -+ final_end_function (); -+ init_insn_lengths (); -+ free_after_compilation (cfun); -+ set_cfun (NULL); -+ current_function_decl = NULL; - } -- else -- arg = function_arg_32 (cum, mode, omode, type, bytes, words); - -- /* Track if there are outgoing arguments on stack. */ -- if (arg == NULL_RTX && cum->caller) -- cfun->machine->outgoing_args_on_stack = true; -- -- return arg; -+ if (flag_split_stack) -+ file_end_indicate_split_stack (); - } - --/* A C expression that indicates when an argument must be passed by -- reference. If nonzero for an argument, a copy of that argument is -- made in memory and a pointer to the argument is passed instead of -- the argument itself. The pointer is passed in whatever way is -- appropriate for passing a pointer to that type. */ -+/* Emit code for the SET_GOT patterns. */ - --static bool --ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode, -- const_tree type, bool) -+const char * -+output_set_got (rtx dest, rtx label) - { -- CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); -+ rtx xops[3]; - -- if (TARGET_64BIT) -+ xops[0] = dest; -+ -+ if (TARGET_VXWORKS_RTP && flag_pic) - { -- enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi; -+ /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */ -+ xops[2] = gen_rtx_MEM (Pmode, -+ gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE)); -+ output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops); - -- /* See Windows x64 Software Convention. */ -- if (call_abi == MS_ABI) -- { -- HOST_WIDE_INT msize = GET_MODE_SIZE (mode); -+ /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register. -+ Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as -+ an unadorned address. */ -+ xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX); -+ SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL; -+ output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops); -+ return ""; -+ } - -- if (type) -- { -- /* Arrays are passed by reference. */ -- if (TREE_CODE (type) == ARRAY_TYPE) -- return true; -+ xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME); - -- if (RECORD_OR_UNION_TYPE_P (type)) -- { -- /* Structs/unions of sizes other than 8, 16, 32, or 64 bits -- are passed by reference. */ -- msize = int_size_in_bytes (type); -- } -- } -+ if (flag_pic) -+ { -+ char name[32]; -+ get_pc_thunk_name (name, REGNO (dest)); -+ pic_labels_used |= 1 << REGNO (dest); - -- /* __m128 is passed by reference. */ -- return msize != 1 && msize != 2 && msize != 4 && msize != 8; -- } -- else if (type && int_size_in_bytes (type) == -1) -- return true; -+ xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name)); -+ xops[2] = gen_rtx_MEM (QImode, xops[2]); -+ output_asm_insn ("%!call\t%X2", xops); -+ -+#if TARGET_MACHO -+ /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here. -+ This is what will be referenced by the Mach-O PIC subsystem. */ -+ if (machopic_should_output_picbase_label () || !label) -+ ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME); -+ -+ /* When we are restoring the pic base at the site of a nonlocal label, -+ and we decided to emit the pic base above, we will still output a -+ local label used for calculating the correction offset (even though -+ the offset will be 0 in that case). */ -+ if (label) -+ targetm.asm_out.internal_label (asm_out_file, "L", -+ CODE_LABEL_NUMBER (label)); -+#endif -+ } -+ else -+ { -+ if (TARGET_MACHO) -+ /* We don't need a pic base, we're not producing pic. */ -+ gcc_unreachable (); -+ -+ xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ()); -+ output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops); -+ targetm.asm_out.internal_label (asm_out_file, "L", -+ CODE_LABEL_NUMBER (XEXP (xops[2], 0))); - } - -- return false; -+ if (!TARGET_MACHO) -+ output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops); -+ -+ return ""; - } - --/* Return true when TYPE should be 128bit aligned for 32bit argument -- passing ABI. XXX: This function is obsolete and is only used for -- checking psABI compatibility with previous versions of GCC. */ -+/* Generate an "push" pattern for input ARG. */ - --static bool --ix86_compat_aligned_value_p (const_tree type) -+rtx -+gen_push (rtx arg) - { -- machine_mode mode = TYPE_MODE (type); -- if (((TARGET_SSE && SSE_REG_MODE_P (mode)) -- || mode == TDmode -- || mode == TFmode -- || mode == TCmode) -- && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128)) -- return true; -- if (TYPE_ALIGN (type) < 128) -- return false; -+ struct machine_function *m = cfun->machine; - -- if (AGGREGATE_TYPE_P (type)) -- { -- /* Walk the aggregates recursively. */ -- switch (TREE_CODE (type)) -- { -- case RECORD_TYPE: -- case UNION_TYPE: -- case QUAL_UNION_TYPE: -- { -- tree field; -+ if (m->fs.cfa_reg == stack_pointer_rtx) -+ m->fs.cfa_offset += UNITS_PER_WORD; -+ m->fs.sp_offset += UNITS_PER_WORD; - -- /* Walk all the structure fields. */ -- for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) -- { -- if (TREE_CODE (field) == FIELD_DECL -- && ix86_compat_aligned_value_p (TREE_TYPE (field))) -- return true; -- } -- break; -- } -+ if (REG_P (arg) && GET_MODE (arg) != word_mode) -+ arg = gen_rtx_REG (word_mode, REGNO (arg)); - -- case ARRAY_TYPE: -- /* Just for use if some languages passes arrays by value. */ -- if (ix86_compat_aligned_value_p (TREE_TYPE (type))) -- return true; -- break; -+ return gen_rtx_SET (gen_rtx_MEM (word_mode, -+ gen_rtx_PRE_DEC (Pmode, -+ stack_pointer_rtx)), -+ arg); -+} - -- default: -- gcc_unreachable (); -- } -- } -- return false; -+/* Generate an "pop" pattern for input ARG. */ -+ -+rtx -+gen_pop (rtx arg) -+{ -+ if (REG_P (arg) && GET_MODE (arg) != word_mode) -+ arg = gen_rtx_REG (word_mode, REGNO (arg)); -+ -+ return gen_rtx_SET (arg, -+ gen_rtx_MEM (word_mode, -+ gen_rtx_POST_INC (Pmode, -+ stack_pointer_rtx))); - } - --/* Return the alignment boundary for MODE and TYPE with alignment ALIGN. -- XXX: This function is obsolete and is only used for checking psABI -- compatibility with previous versions of GCC. */ -+/* Return >= 0 if there is an unused call-clobbered register available -+ for the entire function. */ - - static unsigned int --ix86_compat_function_arg_boundary (machine_mode mode, -- const_tree type, unsigned int align) -+ix86_select_alt_pic_regnum (void) - { -- /* In 32bit, only _Decimal128 and __float128 are aligned to their -- natural boundaries. */ -- if (!TARGET_64BIT && mode != TDmode && mode != TFmode) -- { -- /* i386 ABI defines all arguments to be 4 byte aligned. We have to -- make an exception for SSE modes since these require 128bit -- alignment. -- -- The handling here differs from field_alignment. ICC aligns MMX -- arguments to 4 byte boundaries, while structure fields are aligned -- to 8 byte boundaries. */ -- if (!type) -- { -- if (!(TARGET_SSE && SSE_REG_MODE_P (mode))) -- align = PARM_BOUNDARY; -- } -+ if (ix86_use_pseudo_pic_reg ()) -+ return INVALID_REGNUM; -+ -+ if (crtl->is_leaf -+ && !crtl->profile -+ && !ix86_current_function_calls_tls_descriptor) -+ { -+ int i, drap; -+ /* Can't use the same register for both PIC and DRAP. */ -+ if (crtl->drap_reg) -+ drap = REGNO (crtl->drap_reg); - else -- { -- if (!ix86_compat_aligned_value_p (type)) -- align = PARM_BOUNDARY; -- } -+ drap = -1; -+ for (i = 2; i >= 0; --i) -+ if (i != drap && !df_regs_ever_live_p (i)) -+ return i; - } -- if (align > BIGGEST_ALIGNMENT) -- align = BIGGEST_ALIGNMENT; -- return align; -+ -+ return INVALID_REGNUM; - } - --/* Return true when TYPE should be 128bit aligned for 32bit argument -- passing ABI. */ -+/* Return true if REGNO is used by the epilogue. */ - --static bool --ix86_contains_aligned_value_p (const_tree type) -+bool -+ix86_epilogue_uses (int regno) - { -- machine_mode mode = TYPE_MODE (type); -- -- if (mode == XFmode || mode == XCmode) -- return false; -- -- if (TYPE_ALIGN (type) < 128) -- return false; -- -- if (AGGREGATE_TYPE_P (type)) -- { -- /* Walk the aggregates recursively. */ -- switch (TREE_CODE (type)) -- { -- case RECORD_TYPE: -- case UNION_TYPE: -- case QUAL_UNION_TYPE: -- { -- tree field; -- -- /* Walk all the structure fields. */ -- for (field = TYPE_FIELDS (type); -- field; -- field = DECL_CHAIN (field)) -- { -- if (TREE_CODE (field) == FIELD_DECL -- && ix86_contains_aligned_value_p (TREE_TYPE (field))) -- return true; -- } -- break; -- } -- -- case ARRAY_TYPE: -- /* Just for use if some languages passes arrays by value. */ -- if (ix86_contains_aligned_value_p (TREE_TYPE (type))) -- return true; -- break; -+ /* If there are no caller-saved registers, we preserve all registers, -+ except for MMX and x87 registers which aren't supported when saving -+ and restoring registers. Don't explicitly save SP register since -+ it is always preserved. */ -+ return (epilogue_completed -+ && cfun->machine->no_caller_saved_registers -+ && !fixed_regs[regno] -+ && !STACK_REGNO_P (regno) -+ && !MMX_REGNO_P (regno)); -+} - -- default: -- gcc_unreachable (); -- } -- } -- else -- return TYPE_ALIGN (type) >= 128; -+/* Return nonzero if register REGNO can be used as a scratch register -+ in peephole2. */ - -- return false; -+static bool -+ix86_hard_regno_scratch_ok (unsigned int regno) -+{ -+ /* If there are no caller-saved registers, we can't use any register -+ as a scratch register after epilogue and use REGNO as scratch -+ register only if it has been used before to avoid saving and -+ restoring it. */ -+ return (!cfun->machine->no_caller_saved_registers -+ || (!epilogue_completed -+ && df_regs_ever_live_p (regno))); - } - --/* Gives the alignment boundary, in bits, of an argument with the -- specified mode and type. */ -+/* Return TRUE if we need to save REGNO. */ - --static unsigned int --ix86_function_arg_boundary (machine_mode mode, const_tree type) -+bool -+ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined) - { -- unsigned int align; -- if (type) -+ /* If there are no caller-saved registers, we preserve all registers, -+ except for MMX and x87 registers which aren't supported when saving -+ and restoring registers. Don't explicitly save SP register since -+ it is always preserved. */ -+ if (cfun->machine->no_caller_saved_registers) - { -- /* Since the main variant type is used for call, we convert it to -- the main variant type. */ -- type = TYPE_MAIN_VARIANT (type); -- align = TYPE_ALIGN (type); -- if (TYPE_EMPTY_P (type)) -- return PARM_BOUNDARY; -+ /* Don't preserve registers used for function return value. */ -+ rtx reg = crtl->return_rtx; -+ if (reg) -+ { -+ unsigned int i = REGNO (reg); -+ unsigned int nregs = REG_NREGS (reg); -+ while (nregs-- > 0) -+ if ((i + nregs) == regno) -+ return false; -+ } -+ -+ return (df_regs_ever_live_p (regno) -+ && !fixed_regs[regno] -+ && !STACK_REGNO_P (regno) -+ && !MMX_REGNO_P (regno) -+ && (regno != HARD_FRAME_POINTER_REGNUM -+ || !frame_pointer_needed)); - } -- else -- align = GET_MODE_ALIGNMENT (mode); -- if (align < PARM_BOUNDARY) -- align = PARM_BOUNDARY; -- else -- { -- static bool warned; -- unsigned int saved_align = align; - -- if (!TARGET_64BIT) -+ if (regno == REAL_PIC_OFFSET_TABLE_REGNUM -+ && pic_offset_table_rtx) -+ { -+ if (ix86_use_pseudo_pic_reg ()) - { -- /* i386 ABI defines XFmode arguments to be 4 byte aligned. */ -- if (!type) -- { -- if (mode == XFmode || mode == XCmode) -- align = PARM_BOUNDARY; -- } -- else if (!ix86_contains_aligned_value_p (type)) -- align = PARM_BOUNDARY; -- -- if (align < 128) -- align = PARM_BOUNDARY; -+ /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to -+ _mcount in prologue. */ -+ if (!TARGET_64BIT && flag_pic && crtl->profile) -+ return true; - } -+ else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM) -+ || crtl->profile -+ || crtl->calls_eh_return -+ || crtl->uses_const_pool -+ || cfun->has_nonlocal_label) -+ return ix86_select_alt_pic_regnum () == INVALID_REGNUM; -+ } - -- if (warn_psabi -- && !warned -- && align != ix86_compat_function_arg_boundary (mode, type, -- saved_align)) -+ if (crtl->calls_eh_return && maybe_eh_return) -+ { -+ unsigned i; -+ for (i = 0; ; i++) - { -- warned = true; -- inform (input_location, -- "the ABI for passing parameters with %d-byte" -- " alignment has changed in GCC 4.6", -- align / BITS_PER_UNIT); -+ unsigned test = EH_RETURN_DATA_REGNO (i); -+ if (test == INVALID_REGNUM) -+ break; -+ if (test == regno) -+ return true; - } - } - -- return align; --} -- --/* Return true if N is a possible register number of function value. */ -- --static bool --ix86_function_value_regno_p (const unsigned int regno) --{ -- switch (regno) -+ if (ignore_outlined && cfun->machine->call_ms2sysv) - { -- case AX_REG: -- return true; -- case DX_REG: -- return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI); -- case DI_REG: -- case SI_REG: -- return TARGET_64BIT && ix86_cfun_abi () != MS_ABI; -- -- /* Complex values are returned in %st(0)/%st(1) pair. */ -- case ST0_REG: -- case ST1_REG: -- /* TODO: The function should depend on current function ABI but -- builtins.c would need updating then. Therefore we use the -- default ABI. */ -- if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI) -- return false; -- return TARGET_FLOAT_RETURNS_IN_80387; -- -- /* Complex values are returned in %xmm0/%xmm1 pair. */ -- case XMM0_REG: -- case XMM1_REG: -- return TARGET_SSE; -- -- case MM0_REG: -- if (TARGET_MACHO || TARGET_64BIT) -+ unsigned count = cfun->machine->call_ms2sysv_extra_regs -+ + xlogue_layout::MIN_REGS; -+ if (xlogue_layout::is_stub_managed_reg (regno, count)) - return false; -- return TARGET_MMX; - } - -- return false; -+ if (crtl->drap_reg -+ && regno == REGNO (crtl->drap_reg) -+ && !cfun->machine->no_drap_save_restore) -+ return true; -+ -+ return (df_regs_ever_live_p (regno) -+ && !call_used_or_fixed_reg_p (regno) -+ && !fixed_regs[regno] -+ && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed)); - } - --/* Define how to find the value returned by a function. -- VALTYPE is the data type of the value (as a tree). -- If the precise function being called is known, FUNC is its FUNCTION_DECL; -- otherwise, FUNC is 0. */ -+/* Return number of saved general prupose registers. */ - --static rtx --function_value_32 (machine_mode orig_mode, machine_mode mode, -- const_tree fntype, const_tree fn) -+static int -+ix86_nsaved_regs (void) - { -- unsigned int regno; -+ int nregs = 0; -+ int regno; - -- /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where -- we normally prevent this case when mmx is not available. However -- some ABIs may require the result to be returned like DImode. */ -- if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8) -- regno = FIRST_MMX_REG; -- -- /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where -- we prevent this case when sse is not available. However some ABIs -- may require the result to be returned like integer TImode. */ -- else if (mode == TImode -- || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16)) -- regno = FIRST_SSE_REG; -- -- /* 32-byte vector modes in %ymm0. */ -- else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32) -- regno = FIRST_SSE_REG; -+ for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) -+ if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true)) -+ nregs ++; -+ return nregs; -+} - -- /* 64-byte vector modes in %zmm0. */ -- else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64) -- regno = FIRST_SSE_REG; -+/* Return number of saved SSE registers. */ - -- /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */ -- else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387) -- regno = FIRST_FLOAT_REG; -- else -- /* Most things go in %eax. */ -- regno = AX_REG; -+static int -+ix86_nsaved_sseregs (void) -+{ -+ int nregs = 0; -+ int regno; - -- /* Override FP return register with %xmm0 for local functions when -- SSE math is enabled or for functions with sseregparm attribute. */ -- if ((fn || fntype) && (mode == SFmode || mode == DFmode)) -- { -- int sse_level = ix86_function_sseregparm (fntype, fn, false); -- if (sse_level == -1) -- { -- error ("calling %qD with SSE calling convention without " -- "SSE/SSE2 enabled", fn); -- sorry ("this is a GCC bug that can be worked around by adding " -- "attribute used to function called"); -- } -- else if ((sse_level >= 1 && mode == SFmode) -- || (sse_level == 2 && mode == DFmode)) -- regno = FIRST_SSE_REG; -- } -+ if (!TARGET_64BIT_MS_ABI) -+ return 0; -+ for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) -+ if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true)) -+ nregs ++; -+ return nregs; -+} - -- /* OImode shouldn't be used directly. */ -- gcc_assert (mode != OImode); -+/* Given FROM and TO register numbers, say whether this elimination is -+ allowed. If stack alignment is needed, we can only replace argument -+ pointer with hard frame pointer, or replace frame pointer with stack -+ pointer. Otherwise, frame pointer elimination is automatically -+ handled and all other eliminations are valid. */ - -- return gen_rtx_REG (orig_mode, regno); -+static bool -+ix86_can_eliminate (const int from, const int to) -+{ -+ if (stack_realign_fp) -+ return ((from == ARG_POINTER_REGNUM -+ && to == HARD_FRAME_POINTER_REGNUM) -+ || (from == FRAME_POINTER_REGNUM -+ && to == STACK_POINTER_REGNUM)); -+ else -+ return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true; - } - --static rtx --function_value_64 (machine_mode orig_mode, machine_mode mode, -- const_tree valtype) -+/* Return the offset between two registers, one to be eliminated, and the other -+ its replacement, at the start of a routine. */ -+ -+HOST_WIDE_INT -+ix86_initial_elimination_offset (int from, int to) - { -- rtx ret; -+ struct ix86_frame &frame = cfun->machine->frame; - -- /* Handle libcalls, which don't provide a type node. */ -- if (valtype == NULL) -+ if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) -+ return frame.hard_frame_pointer_offset; -+ else if (from == FRAME_POINTER_REGNUM -+ && to == HARD_FRAME_POINTER_REGNUM) -+ return frame.hard_frame_pointer_offset - frame.frame_pointer_offset; -+ else - { -- unsigned int regno; -+ gcc_assert (to == STACK_POINTER_REGNUM); - -- switch (mode) -- { -- case E_SFmode: -- case E_SCmode: -- case E_DFmode: -- case E_DCmode: -- case E_TFmode: -- case E_SDmode: -- case E_DDmode: -- case E_TDmode: -- regno = FIRST_SSE_REG; -- break; -- case E_XFmode: -- case E_XCmode: -- regno = FIRST_FLOAT_REG; -- break; -- case E_TCmode: -- return NULL; -- default: -- regno = AX_REG; -- } -+ if (from == ARG_POINTER_REGNUM) -+ return frame.stack_pointer_offset; - -- return gen_rtx_REG (mode, regno); -- } -- else if (POINTER_TYPE_P (valtype)) -- { -- /* Pointers are always returned in word_mode. */ -- mode = word_mode; -+ gcc_assert (from == FRAME_POINTER_REGNUM); -+ return frame.stack_pointer_offset - frame.frame_pointer_offset; - } -- -- ret = construct_container (mode, orig_mode, valtype, 1, -- X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX, -- x86_64_int_return_registers, 0); -- -- /* For zero sized structures, construct_container returns NULL, but we -- need to keep rest of compiler happy by returning meaningful value. */ -- if (!ret) -- ret = gen_rtx_REG (orig_mode, AX_REG); -- -- return ret; - } - -+/* In a dynamically-aligned function, we can't know the offset from -+ stack pointer to frame pointer, so we must ensure that setjmp -+ eliminates fp against the hard fp (%ebp) rather than trying to -+ index from %esp up to the top of the frame across a gap that is -+ of unknown (at compile-time) size. */ - static rtx --function_value_ms_32 (machine_mode orig_mode, machine_mode mode, -- const_tree fntype, const_tree fn, const_tree valtype) -+ix86_builtin_setjmp_frame_value (void) - { -- unsigned int regno; -- -- /* Floating point return values in %st(0) -- (unless -mno-fp-ret-in-387 or aggregate type of up to 8 bytes). */ -- if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387 -- && (GET_MODE_SIZE (mode) > 8 -- || valtype == NULL_TREE || !AGGREGATE_TYPE_P (valtype))) -- { -- regno = FIRST_FLOAT_REG; -- return gen_rtx_REG (orig_mode, regno); -- } -- else -- return function_value_32(orig_mode, mode, fntype,fn); -+ return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx; - } - --static rtx --function_value_ms_64 (machine_mode orig_mode, machine_mode mode, -- const_tree valtype) -+/* Emits a warning for unsupported msabi to sysv pro/epilogues. */ -+void warn_once_call_ms2sysv_xlogues (const char *feature) - { -- unsigned int regno = AX_REG; -- -- if (TARGET_SSE) -+ static bool warned_once = false; -+ if (!warned_once) - { -- switch (GET_MODE_SIZE (mode)) -- { -- case 16: -- if (valtype != NULL_TREE -- && !VECTOR_INTEGER_TYPE_P (valtype) -- && !VECTOR_INTEGER_TYPE_P (valtype) -- && !INTEGRAL_TYPE_P (valtype) -- && !VECTOR_FLOAT_TYPE_P (valtype)) -- break; -- if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode)) -- && !COMPLEX_MODE_P (mode)) -- regno = FIRST_SSE_REG; -- break; -- case 8: -- case 4: -- if (valtype != NULL_TREE && AGGREGATE_TYPE_P (valtype)) -- break; -- if (mode == SFmode || mode == DFmode) -- regno = FIRST_SSE_REG; -- break; -- default: -- break; -- } -+ warning (0, "%<-mcall-ms2sysv-xlogues%> is not compatible with %s", -+ feature); -+ warned_once = true; - } -- return gen_rtx_REG (orig_mode, regno); - } - --static rtx --ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl, -- machine_mode orig_mode, machine_mode mode) --{ -- const_tree fn, fntype; -+/* Return the probing interval for -fstack-clash-protection. */ - -- fn = NULL_TREE; -- if (fntype_or_decl && DECL_P (fntype_or_decl)) -- fn = fntype_or_decl; -- fntype = fn ? TREE_TYPE (fn) : fntype_or_decl; -- -- if (ix86_function_type_abi (fntype) == MS_ABI) -- { -- if (TARGET_64BIT) -- return function_value_ms_64 (orig_mode, mode, valtype); -- else -- return function_value_ms_32 (orig_mode, mode, fntype, fn, valtype); -- } -- else if (TARGET_64BIT) -- return function_value_64 (orig_mode, mode, valtype); -+static HOST_WIDE_INT -+get_probe_interval (void) -+{ -+ if (flag_stack_clash_protection) -+ return (HOST_WIDE_INT_1U -+ << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL)); - else -- return function_value_32 (orig_mode, mode, fntype, fn); -+ return (HOST_WIDE_INT_1U << STACK_CHECK_PROBE_INTERVAL_EXP); - } - --static rtx --ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool) --{ -- machine_mode mode, orig_mode; -+/* When using -fsplit-stack, the allocation routines set a field in -+ the TCB to the bottom of the stack plus this much space, measured -+ in bytes. */ - -- orig_mode = TYPE_MODE (valtype); -- mode = type_natural_mode (valtype, NULL, true); -- return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode); --} -+#define SPLIT_STACK_AVAILABLE 256 - --/* Pointer function arguments and return values are promoted to -- word_mode for normal functions. */ -+/* Fill structure ix86_frame about frame of currently computed function. */ - --static machine_mode --ix86_promote_function_mode (const_tree type, machine_mode mode, -- int *punsignedp, const_tree fntype, -- int for_return) -+static void -+ix86_compute_frame_layout (void) - { -- if (cfun->machine->func_type == TYPE_NORMAL -- && type != NULL_TREE -- && POINTER_TYPE_P (type)) -- { -- *punsignedp = POINTERS_EXTEND_UNSIGNED; -- return word_mode; -- } -- return default_promote_function_mode (type, mode, punsignedp, fntype, -- for_return); --} -- --/* Return true if a structure, union or array with MODE containing FIELD -- should be accessed using BLKmode. */ -- --static bool --ix86_member_type_forces_blk (const_tree field, machine_mode mode) --{ -- /* Union with XFmode must be in BLKmode. */ -- return (mode == XFmode -- && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE -- || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE)); --} -- --rtx --ix86_libcall_value (machine_mode mode) --{ -- return ix86_function_value_1 (NULL, NULL, mode, mode); --} -- --/* Return true iff type is returned in memory. */ -- --static bool --ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) --{ --#ifdef SUBTARGET_RETURN_IN_MEMORY -- return SUBTARGET_RETURN_IN_MEMORY (type, fntype); --#else -- const machine_mode mode = type_natural_mode (type, NULL, true); -- HOST_WIDE_INT size; -+ struct ix86_frame *frame = &cfun->machine->frame; -+ struct machine_function *m = cfun->machine; -+ unsigned HOST_WIDE_INT stack_alignment_needed; -+ HOST_WIDE_INT offset; -+ unsigned HOST_WIDE_INT preferred_alignment; -+ HOST_WIDE_INT size = get_frame_size (); -+ HOST_WIDE_INT to_allocate; - -- if (TARGET_64BIT) -+ /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit -+ * ms_abi functions that call a sysv function. We now need to prune away -+ * cases where it should be disabled. */ -+ if (TARGET_64BIT && m->call_ms2sysv) - { -- if (ix86_function_type_abi (fntype) == MS_ABI) -- { -- size = int_size_in_bytes (type); -+ gcc_assert (TARGET_64BIT_MS_ABI); -+ gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES); -+ gcc_assert (!TARGET_SEH); -+ gcc_assert (TARGET_SSE); -+ gcc_assert (!ix86_using_red_zone ()); - -- /* __m128 is returned in xmm0. */ -- if ((!type || VECTOR_INTEGER_TYPE_P (type) -- || INTEGRAL_TYPE_P (type) -- || VECTOR_FLOAT_TYPE_P (type)) -- && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode)) -- && !COMPLEX_MODE_P (mode) -- && (GET_MODE_SIZE (mode) == 16 || size == 16)) -- return false; -+ if (crtl->calls_eh_return) -+ { -+ gcc_assert (!reload_completed); -+ m->call_ms2sysv = false; -+ warn_once_call_ms2sysv_xlogues ("__builtin_eh_return"); -+ } - -- /* Otherwise, the size must be exactly in [1248]. */ -- return size != 1 && size != 2 && size != 4 && size != 8; -+ else if (ix86_static_chain_on_stack) -+ { -+ gcc_assert (!reload_completed); -+ m->call_ms2sysv = false; -+ warn_once_call_ms2sysv_xlogues ("static call chains"); - } -+ -+ /* Finally, compute which registers the stub will manage. */ - else - { -- int needed_intregs, needed_sseregs; -- -- return examine_argument (mode, type, 1, -- &needed_intregs, &needed_sseregs); -+ unsigned count = xlogue_layout::count_stub_managed_regs (); -+ m->call_ms2sysv_extra_regs = count - xlogue_layout::MIN_REGS; -+ m->call_ms2sysv_pad_in = 0; - } - } -- else -- { -- size = int_size_in_bytes (type); -- -- /* Intel MCU psABI returns scalars and aggregates no larger than 8 -- bytes in registers. */ -- if (TARGET_IAMCU) -- return VECTOR_MODE_P (mode) || size < 0 || size > 8; -- -- if (mode == BLKmode) -- return true; -- -- if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8) -- return false; - -- if (VECTOR_MODE_P (mode) || mode == TImode) -- { -- /* User-created vectors small enough to fit in EAX. */ -- if (size < 8) -- return false; -+ frame->nregs = ix86_nsaved_regs (); -+ frame->nsseregs = ix86_nsaved_sseregs (); - -- /* Unless ABI prescibes otherwise, -- MMX/3dNow values are returned in MM0 if available. */ -- -- if (size == 8) -- return TARGET_VECT8_RETURNS || !TARGET_MMX; -+ /* 64-bit MS ABI seem to require stack alignment to be always 16, -+ except for function prologues, leaf functions and when the defult -+ incoming stack boundary is overriden at command line or via -+ force_align_arg_pointer attribute. - -- /* SSE values are returned in XMM0 if available. */ -- if (size == 16) -- return !TARGET_SSE; -+ Darwin's ABI specifies 128b alignment for both 32 and 64 bit variants -+ at call sites, including profile function calls. -+ */ -+ if (((TARGET_64BIT_MS_ABI || TARGET_MACHO) -+ && crtl->preferred_stack_boundary < 128) -+ && (!crtl->is_leaf || cfun->calls_alloca != 0 -+ || ix86_current_function_calls_tls_descriptor -+ || (TARGET_MACHO && crtl->profile) -+ || ix86_incoming_stack_boundary < 128)) -+ { -+ crtl->preferred_stack_boundary = 128; -+ crtl->stack_alignment_needed = 128; -+ } - -- /* AVX values are returned in YMM0 if available. */ -- if (size == 32) -- return !TARGET_AVX; -+ stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT; -+ preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT; - -- /* AVX512F values are returned in ZMM0 if available. */ -- if (size == 64) -- return !TARGET_AVX512F; -- } -+ gcc_assert (!size || stack_alignment_needed); -+ gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT); -+ gcc_assert (preferred_alignment <= stack_alignment_needed); - -- if (mode == XFmode) -- return false; -+ /* The only ABI saving SSE regs should be 64-bit ms_abi. */ -+ gcc_assert (TARGET_64BIT || !frame->nsseregs); -+ if (TARGET_64BIT && m->call_ms2sysv) -+ { -+ gcc_assert (stack_alignment_needed >= 16); -+ gcc_assert (!frame->nsseregs); -+ } - -- if (size > 12) -- return true; -+ /* For SEH we have to limit the amount of code movement into the prologue. -+ At present we do this via a BLOCKAGE, at which point there's very little -+ scheduling that can be done, which means that there's very little point -+ in doing anything except PUSHs. */ -+ if (TARGET_SEH) -+ m->use_fast_prologue_epilogue = false; -+ else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))) -+ { -+ int count = frame->nregs; -+ struct cgraph_node *node = cgraph_node::get (current_function_decl); - -- /* OImode shouldn't be used directly. */ -- gcc_assert (mode != OImode); -+ /* The fast prologue uses move instead of push to save registers. This -+ is significantly longer, but also executes faster as modern hardware -+ can execute the moves in parallel, but can't do that for push/pop. - -- return false; -+ Be careful about choosing what prologue to emit: When function takes -+ many instructions to execute we may use slow version as well as in -+ case function is known to be outside hot spot (this is known with -+ feedback only). Weight the size of function by number of registers -+ to save as it is cheap to use one or two push instructions but very -+ slow to use many of them. */ -+ if (count) -+ count = (count - 1) * FAST_PROLOGUE_INSN_COUNT; -+ if (node->frequency < NODE_FREQUENCY_NORMAL -+ || (flag_branch_probabilities -+ && node->frequency < NODE_FREQUENCY_HOT)) -+ m->use_fast_prologue_epilogue = false; -+ else -+ m->use_fast_prologue_epilogue -+ = !expensive_function_p (count); - } --#endif --} - -- --/* Create the va_list data type. */ -+ frame->save_regs_using_mov -+ = (TARGET_PROLOGUE_USING_MOVE && m->use_fast_prologue_epilogue -+ /* If static stack checking is enabled and done with probes, -+ the registers need to be saved before allocating the frame. */ -+ && flag_stack_check != STATIC_BUILTIN_STACK_CHECK); - --static tree --ix86_build_builtin_va_list_64 (void) --{ -- tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl; -+ /* Skip return address and error code in exception handler. */ -+ offset = INCOMING_FRAME_SP_OFFSET; - -- record = lang_hooks.types.make_type (RECORD_TYPE); -- type_decl = build_decl (BUILTINS_LOCATION, -- TYPE_DECL, get_identifier ("__va_list_tag"), record); -+ /* Skip pushed static chain. */ -+ if (ix86_static_chain_on_stack) -+ offset += UNITS_PER_WORD; - -- f_gpr = build_decl (BUILTINS_LOCATION, -- FIELD_DECL, get_identifier ("gp_offset"), -- unsigned_type_node); -- f_fpr = build_decl (BUILTINS_LOCATION, -- FIELD_DECL, get_identifier ("fp_offset"), -- unsigned_type_node); -- f_ovf = build_decl (BUILTINS_LOCATION, -- FIELD_DECL, get_identifier ("overflow_arg_area"), -- ptr_type_node); -- f_sav = build_decl (BUILTINS_LOCATION, -- FIELD_DECL, get_identifier ("reg_save_area"), -- ptr_type_node); -+ /* Skip saved base pointer. */ -+ if (frame_pointer_needed) -+ offset += UNITS_PER_WORD; -+ frame->hfp_save_offset = offset; - -- va_list_gpr_counter_field = f_gpr; -- va_list_fpr_counter_field = f_fpr; -+ /* The traditional frame pointer location is at the top of the frame. */ -+ frame->hard_frame_pointer_offset = offset; - -- DECL_FIELD_CONTEXT (f_gpr) = record; -- DECL_FIELD_CONTEXT (f_fpr) = record; -- DECL_FIELD_CONTEXT (f_ovf) = record; -- DECL_FIELD_CONTEXT (f_sav) = record; -+ /* Register save area */ -+ offset += frame->nregs * UNITS_PER_WORD; -+ frame->reg_save_offset = offset; - -- TYPE_STUB_DECL (record) = type_decl; -- TYPE_NAME (record) = type_decl; -- TYPE_FIELDS (record) = f_gpr; -- DECL_CHAIN (f_gpr) = f_fpr; -- DECL_CHAIN (f_fpr) = f_ovf; -- DECL_CHAIN (f_ovf) = f_sav; -+ /* On SEH target, registers are pushed just before the frame pointer -+ location. */ -+ if (TARGET_SEH) -+ frame->hard_frame_pointer_offset = offset; - -- layout_type (record); -+ /* Calculate the size of the va-arg area (not including padding, if any). */ -+ frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size; - -- TYPE_ATTRIBUTES (record) = tree_cons (get_identifier ("sysv_abi va_list"), -- NULL_TREE, TYPE_ATTRIBUTES (record)); -+ /* Also adjust stack_realign_offset for the largest alignment of -+ stack slot actually used. */ -+ if (stack_realign_fp -+ || (cfun->machine->max_used_stack_alignment != 0 -+ && (offset % cfun->machine->max_used_stack_alignment) != 0)) -+ { -+ /* We may need a 16-byte aligned stack for the remainder of the -+ register save area, but the stack frame for the local function -+ may require a greater alignment if using AVX/2/512. In order -+ to avoid wasting space, we first calculate the space needed for -+ the rest of the register saves, add that to the stack pointer, -+ and then realign the stack to the boundary of the start of the -+ frame for the local function. */ -+ HOST_WIDE_INT space_needed = 0; -+ HOST_WIDE_INT sse_reg_space_needed = 0; - -- /* The correct type is an array type of one element. */ -- return build_array_type (record, build_index_type (size_zero_node)); --} -+ if (TARGET_64BIT) -+ { -+ if (m->call_ms2sysv) -+ { -+ m->call_ms2sysv_pad_in = 0; -+ space_needed = xlogue_layout::get_instance ().get_stack_space_used (); -+ } - --/* Setup the builtin va_list data type and for 64-bit the additional -- calling convention specific va_list data types. */ -+ else if (frame->nsseregs) -+ /* The only ABI that has saved SSE registers (Win64) also has a -+ 16-byte aligned default stack. However, many programs violate -+ the ABI, and Wine64 forces stack realignment to compensate. */ -+ space_needed = frame->nsseregs * 16; - --static tree --ix86_build_builtin_va_list (void) --{ -- if (TARGET_64BIT) -- { -- /* Initialize ABI specific va_list builtin types. -+ sse_reg_space_needed = space_needed = ROUND_UP (space_needed, 16); - -- In lto1, we can encounter two va_list types: -- - one as a result of the type-merge across TUs, and -- - the one constructed here. -- These two types will not have the same TYPE_MAIN_VARIANT, and therefore -- a type identity check in canonical_va_list_type based on -- TYPE_MAIN_VARIANT (which we used to have) will not work. -- Instead, we tag each va_list_type_node with its unique attribute, and -- look for the attribute in the type identity check in -- canonical_va_list_type. -+ /* 64-bit frame->va_arg_size should always be a multiple of 16, but -+ rounding to be pedantic. */ -+ space_needed = ROUND_UP (space_needed + frame->va_arg_size, 16); -+ } -+ else -+ space_needed = frame->va_arg_size; - -- Tagging sysv_va_list_type_node directly with the attribute is -- problematic since it's a array of one record, which will degrade into a -- pointer to record when used as parameter (see build_va_arg comments for -- an example), dropping the attribute in the process. So we tag the -- record instead. */ -+ /* Record the allocation size required prior to the realignment AND. */ -+ frame->stack_realign_allocate = space_needed; - -- /* For SYSV_ABI we use an array of one record. */ -- sysv_va_list_type_node = ix86_build_builtin_va_list_64 (); -- -- /* For MS_ABI we use plain pointer to argument area. */ -- tree char_ptr_type = build_pointer_type (char_type_node); -- tree attr = tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE, -- TYPE_ATTRIBUTES (char_ptr_type)); -- ms_va_list_type_node = build_type_attribute_variant (char_ptr_type, attr); -+ /* The re-aligned stack starts at frame->stack_realign_offset. Values -+ before this point are not directly comparable with values below -+ this point. Use sp_valid_at to determine if the stack pointer is -+ valid for a given offset, fp_valid_at for the frame pointer, or -+ choose_baseaddr to have a base register chosen for you. - -- return ((ix86_abi == MS_ABI) -- ? ms_va_list_type_node -- : sysv_va_list_type_node); -+ Note that the result of (frame->stack_realign_offset -+ & (stack_alignment_needed - 1)) may not equal zero. */ -+ offset = ROUND_UP (offset + space_needed, stack_alignment_needed); -+ frame->stack_realign_offset = offset - space_needed; -+ frame->sse_reg_save_offset = frame->stack_realign_offset -+ + sse_reg_space_needed; - } - else - { -- /* For i386 we use plain pointer to argument area. */ -- return build_pointer_type (char_type_node); -+ frame->stack_realign_offset = offset; -+ -+ if (TARGET_64BIT && m->call_ms2sysv) -+ { -+ m->call_ms2sysv_pad_in = !!(offset & UNITS_PER_WORD); -+ offset += xlogue_layout::get_instance ().get_stack_space_used (); -+ } -+ -+ /* Align and set SSE register save area. */ -+ else if (frame->nsseregs) -+ { -+ /* If the incoming stack boundary is at least 16 bytes, or DRAP is -+ required and the DRAP re-alignment boundary is at least 16 bytes, -+ then we want the SSE register save area properly aligned. */ -+ if (ix86_incoming_stack_boundary >= 128 -+ || (stack_realign_drap && stack_alignment_needed >= 16)) -+ offset = ROUND_UP (offset, 16); -+ offset += frame->nsseregs * 16; -+ } -+ frame->sse_reg_save_offset = offset; -+ offset += frame->va_arg_size; - } --} - --/* Worker function for TARGET_SETUP_INCOMING_VARARGS. */ -+ /* Align start of frame for local function. When a function call -+ is removed, it may become a leaf function. But if argument may -+ be passed on stack, we need to align the stack when there is no -+ tail call. */ -+ if (m->call_ms2sysv -+ || frame->va_arg_size != 0 -+ || size != 0 -+ || !crtl->is_leaf -+ || (!crtl->tail_call_emit -+ && cfun->machine->outgoing_args_on_stack) -+ || cfun->calls_alloca -+ || ix86_current_function_calls_tls_descriptor) -+ offset = ROUND_UP (offset, stack_alignment_needed); - --static void --setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum) --{ -- rtx save_area, mem; -- alias_set_type set; -- int i, max; -+ /* Frame pointer points here. */ -+ frame->frame_pointer_offset = offset; - -- /* GPR size of varargs save area. */ -- if (cfun->va_list_gpr_size) -- ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD; -- else -- ix86_varargs_gpr_size = 0; -+ offset += size; - -- /* FPR size of varargs save area. We don't need it if we don't pass -- anything in SSE registers. */ -- if (TARGET_SSE && cfun->va_list_fpr_size) -- ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16; -+ /* Add outgoing arguments area. Can be skipped if we eliminated -+ all the function calls as dead code. -+ Skipping is however impossible when function calls alloca. Alloca -+ expander assumes that last crtl->outgoing_args_size -+ of stack frame are unused. */ -+ if (ACCUMULATE_OUTGOING_ARGS -+ && (!crtl->is_leaf || cfun->calls_alloca -+ || ix86_current_function_calls_tls_descriptor)) -+ { -+ offset += crtl->outgoing_args_size; -+ frame->outgoing_arguments_size = crtl->outgoing_args_size; -+ } - else -- ix86_varargs_fpr_size = 0; -+ frame->outgoing_arguments_size = 0; - -- if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size) -- return; -+ /* Align stack boundary. Only needed if we're calling another function -+ or using alloca. */ -+ if (!crtl->is_leaf || cfun->calls_alloca -+ || ix86_current_function_calls_tls_descriptor) -+ offset = ROUND_UP (offset, preferred_alignment); - -- save_area = frame_pointer_rtx; -- set = get_varargs_alias_set (); -+ /* We've reached end of stack frame. */ -+ frame->stack_pointer_offset = offset; - -- max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD; -- if (max > X86_64_REGPARM_MAX) -- max = X86_64_REGPARM_MAX; -+ /* Size prologue needs to allocate. */ -+ to_allocate = offset - frame->sse_reg_save_offset; - -- for (i = cum->regno; i < max; i++) -+ if ((!to_allocate && frame->nregs <= 1) -+ || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000)) -+ /* If stack clash probing needs a loop, then it needs a -+ scratch register. But the returned register is only guaranteed -+ to be safe to use after register saves are complete. So if -+ stack clash protections are enabled and the allocated frame is -+ larger than the probe interval, then use pushes to save -+ callee saved registers. */ -+ || (flag_stack_clash_protection && to_allocate > get_probe_interval ())) -+ frame->save_regs_using_mov = false; -+ -+ if (ix86_using_red_zone () -+ && crtl->sp_is_unchanging -+ && crtl->is_leaf -+ && !ix86_pc_thunk_call_expanded -+ && !ix86_current_function_calls_tls_descriptor) - { -- mem = gen_rtx_MEM (word_mode, -- plus_constant (Pmode, save_area, i * UNITS_PER_WORD)); -- MEM_NOTRAP_P (mem) = 1; -- set_mem_alias_set (mem, set); -- emit_move_insn (mem, -- gen_rtx_REG (word_mode, -- x86_64_int_parameter_registers[i])); -+ frame->red_zone_size = to_allocate; -+ if (frame->save_regs_using_mov) -+ frame->red_zone_size += frame->nregs * UNITS_PER_WORD; -+ if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE) -+ frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE; - } -+ else -+ frame->red_zone_size = 0; -+ frame->stack_pointer_offset -= frame->red_zone_size; - -- if (ix86_varargs_fpr_size) -+ /* The SEH frame pointer location is near the bottom of the frame. -+ This is enforced by the fact that the difference between the -+ stack pointer and the frame pointer is limited to 240 bytes in -+ the unwind data structure. */ -+ if (TARGET_SEH) - { -- machine_mode smode; -- rtx_code_label *label; -- rtx test; -- -- /* Now emit code to save SSE registers. The AX parameter contains number -- of SSE parameter registers used to call this function, though all we -- actually check here is the zero/non-zero status. */ -- -- label = gen_label_rtx (); -- test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx); -- emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1), -- label)); -- -- /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if -- we used movdqa (i.e. TImode) instead? Perhaps even better would -- be if we could determine the real mode of the data, via a hook -- into pass_stdarg. Ignore all that for now. */ -- smode = V4SFmode; -- if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode)) -- crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode); -- -- max = cum->sse_regno + cfun->va_list_fpr_size / 16; -- if (max > X86_64_SSE_REGPARM_MAX) -- max = X86_64_SSE_REGPARM_MAX; -+ HOST_WIDE_INT diff; - -- for (i = cum->sse_regno; i < max; ++i) -+ /* If we can leave the frame pointer where it is, do so. Also, returns -+ the establisher frame for __builtin_frame_address (0). */ -+ diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset; -+ if (diff <= SEH_MAX_FRAME_SIZE -+ && (diff > 240 || (diff & 15) != 0) -+ && !crtl->accesses_prior_frames) - { -- mem = plus_constant (Pmode, save_area, -- i * 16 + ix86_varargs_gpr_size); -- mem = gen_rtx_MEM (smode, mem); -- MEM_NOTRAP_P (mem) = 1; -- set_mem_alias_set (mem, set); -- set_mem_align (mem, GET_MODE_ALIGNMENT (smode)); -- -- emit_move_insn (mem, gen_rtx_REG (smode, GET_SSE_REGNO (i))); -+ /* Ideally we'd determine what portion of the local stack frame -+ (within the constraint of the lowest 240) is most heavily used. -+ But without that complication, simply bias the frame pointer -+ by 128 bytes so as to maximize the amount of the local stack -+ frame that is addressable with 8-bit offsets. */ -+ frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128; - } -- -- emit_label (label); - } - } - --static void --setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum) --{ -- alias_set_type set = get_varargs_alias_set (); -- int i; -+/* This is semi-inlined memory_address_length, but simplified -+ since we know that we're always dealing with reg+offset, and -+ to avoid having to create and discard all that rtl. */ - -- /* Reset to zero, as there might be a sysv vaarg used -- before. */ -- ix86_varargs_gpr_size = 0; -- ix86_varargs_fpr_size = 0; -+static inline int -+choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset) -+{ -+ int len = 4; - -- for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++) -+ if (offset == 0) - { -- rtx reg, mem; -+ /* EBP and R13 cannot be encoded without an offset. */ -+ len = (regno == BP_REG || regno == R13_REG); -+ } -+ else if (IN_RANGE (offset, -128, 127)) -+ len = 1; - -- mem = gen_rtx_MEM (Pmode, -- plus_constant (Pmode, virtual_incoming_args_rtx, -- i * UNITS_PER_WORD)); -- MEM_NOTRAP_P (mem) = 1; -- set_mem_alias_set (mem, set); -+ /* ESP and R12 must be encoded with a SIB byte. */ -+ if (regno == SP_REG || regno == R12_REG) -+ len++; - -- reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]); -- emit_move_insn (mem, reg); -+ return len; -+} -+ -+/* Determine if the stack pointer is valid for accessing the CFA_OFFSET in -+ the frame save area. The register is saved at CFA - CFA_OFFSET. */ -+ -+static bool -+sp_valid_at (HOST_WIDE_INT cfa_offset) -+{ -+ const struct machine_frame_state &fs = cfun->machine->fs; -+ if (fs.sp_realigned && cfa_offset <= fs.sp_realigned_offset) -+ { -+ /* Validate that the cfa_offset isn't in a "no-man's land". */ -+ gcc_assert (cfa_offset <= fs.sp_realigned_fp_last); -+ return false; -+ } -+ return fs.sp_valid; -+} -+ -+/* Determine if the frame pointer is valid for accessing the CFA_OFFSET in -+ the frame save area. The register is saved at CFA - CFA_OFFSET. */ -+ -+static inline bool -+fp_valid_at (HOST_WIDE_INT cfa_offset) -+{ -+ const struct machine_frame_state &fs = cfun->machine->fs; -+ if (fs.sp_realigned && cfa_offset > fs.sp_realigned_fp_last) -+ { -+ /* Validate that the cfa_offset isn't in a "no-man's land". */ -+ gcc_assert (cfa_offset >= fs.sp_realigned_offset); -+ return false; - } -+ return fs.fp_valid; - } - -+/* Choose a base register based upon alignment requested, speed and/or -+ size. */ -+ - static void --ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode, -- tree type, int *, int no_rtl) -+choose_basereg (HOST_WIDE_INT cfa_offset, rtx &base_reg, -+ HOST_WIDE_INT &base_offset, -+ unsigned int align_reqested, unsigned int *align) - { -- CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); -- CUMULATIVE_ARGS next_cum; -- tree fntype; -+ const struct machine_function *m = cfun->machine; -+ unsigned int hfp_align; -+ unsigned int drap_align; -+ unsigned int sp_align; -+ bool hfp_ok = fp_valid_at (cfa_offset); -+ bool drap_ok = m->fs.drap_valid; -+ bool sp_ok = sp_valid_at (cfa_offset); - -- /* This argument doesn't appear to be used anymore. Which is good, -- because the old code here didn't suppress rtl generation. */ -- gcc_assert (!no_rtl); -+ hfp_align = drap_align = sp_align = INCOMING_STACK_BOUNDARY; - -- if (!TARGET_64BIT) -- return; -+ /* Filter out any registers that don't meet the requested alignment -+ criteria. */ -+ if (align_reqested) -+ { -+ if (m->fs.realigned) -+ hfp_align = drap_align = sp_align = crtl->stack_alignment_needed; -+ /* SEH unwind code does do not currently support REG_CFA_EXPRESSION -+ notes (which we would need to use a realigned stack pointer), -+ so disable on SEH targets. */ -+ else if (m->fs.sp_realigned) -+ sp_align = crtl->stack_alignment_needed; - -- fntype = TREE_TYPE (current_function_decl); -+ hfp_ok = hfp_ok && hfp_align >= align_reqested; -+ drap_ok = drap_ok && drap_align >= align_reqested; -+ sp_ok = sp_ok && sp_align >= align_reqested; -+ } - -- /* For varargs, we do not want to skip the dummy va_dcl argument. -- For stdargs, we do want to skip the last named argument. */ -- next_cum = *cum; -- if (stdarg_p (fntype)) -- ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type, -- true); -+ if (m->use_fast_prologue_epilogue) -+ { -+ /* Choose the base register most likely to allow the most scheduling -+ opportunities. Generally FP is valid throughout the function, -+ while DRAP must be reloaded within the epilogue. But choose either -+ over the SP due to increased encoding size. */ - -- if (cum->call_abi == MS_ABI) -- setup_incoming_varargs_ms_64 (&next_cum); -+ if (hfp_ok) -+ { -+ base_reg = hard_frame_pointer_rtx; -+ base_offset = m->fs.fp_offset - cfa_offset; -+ } -+ else if (drap_ok) -+ { -+ base_reg = crtl->drap_reg; -+ base_offset = 0 - cfa_offset; -+ } -+ else if (sp_ok) -+ { -+ base_reg = stack_pointer_rtx; -+ base_offset = m->fs.sp_offset - cfa_offset; -+ } -+ } - else -- setup_incoming_varargs_64 (&next_cum); -+ { -+ HOST_WIDE_INT toffset; -+ int len = 16, tlen; -+ -+ /* Choose the base register with the smallest address encoding. -+ With a tie, choose FP > DRAP > SP. */ -+ if (sp_ok) -+ { -+ base_reg = stack_pointer_rtx; -+ base_offset = m->fs.sp_offset - cfa_offset; -+ len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset); -+ } -+ if (drap_ok) -+ { -+ toffset = 0 - cfa_offset; -+ tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset); -+ if (tlen <= len) -+ { -+ base_reg = crtl->drap_reg; -+ base_offset = toffset; -+ len = tlen; -+ } -+ } -+ if (hfp_ok) -+ { -+ toffset = m->fs.fp_offset - cfa_offset; -+ tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset); -+ if (tlen <= len) -+ { -+ base_reg = hard_frame_pointer_rtx; -+ base_offset = toffset; -+ len = tlen; -+ } -+ } -+ } -+ -+ /* Set the align return value. */ -+ if (align) -+ { -+ if (base_reg == stack_pointer_rtx) -+ *align = sp_align; -+ else if (base_reg == crtl->drap_reg) -+ *align = drap_align; -+ else if (base_reg == hard_frame_pointer_rtx) -+ *align = hfp_align; -+ } - } - --static void --ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v, -- machine_mode mode, -- tree type, -- int *pretend_size ATTRIBUTE_UNUSED, -- int no_rtl) -+/* Return an RTX that points to CFA_OFFSET within the stack frame and -+ the alignment of address. If ALIGN is non-null, it should point to -+ an alignment value (in bits) that is preferred or zero and will -+ recieve the alignment of the base register that was selected, -+ irrespective of rather or not CFA_OFFSET is a multiple of that -+ alignment value. If it is possible for the base register offset to be -+ non-immediate then SCRATCH_REGNO should specify a scratch register to -+ use. -+ -+ The valid base registers are taken from CFUN->MACHINE->FS. */ -+ -+static rtx -+choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align, -+ unsigned int scratch_regno = INVALID_REGNUM) - { -- CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); -- CUMULATIVE_ARGS next_cum; -- tree fntype; -+ rtx base_reg = NULL; -+ HOST_WIDE_INT base_offset = 0; - -- gcc_assert (!no_rtl); -+ /* If a specific alignment is requested, try to get a base register -+ with that alignment first. */ -+ if (align && *align) -+ choose_basereg (cfa_offset, base_reg, base_offset, *align, align); - -- /* Do nothing if we use plain pointer to argument area. */ -- if (!TARGET_64BIT || cum->call_abi == MS_ABI) -- return; -+ if (!base_reg) -+ choose_basereg (cfa_offset, base_reg, base_offset, 0, align); - -- fntype = TREE_TYPE (current_function_decl); -+ gcc_assert (base_reg != NULL); - -- /* For varargs, we do not want to skip the dummy va_dcl argument. -- For stdargs, we do want to skip the last named argument. */ -- next_cum = *cum; -- if (stdarg_p (fntype)) -- ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type, -- true); --} -+ rtx base_offset_rtx = GEN_INT (base_offset); - -+ if (!x86_64_immediate_operand (base_offset_rtx, Pmode)) -+ { -+ gcc_assert (scratch_regno != INVALID_REGNUM); - --/* Checks if TYPE is of kind va_list char *. */ -+ rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno); -+ emit_move_insn (scratch_reg, base_offset_rtx); - --static bool --is_va_list_char_pointer (tree type) --{ -- tree canonic; -+ return gen_rtx_PLUS (Pmode, base_reg, scratch_reg); -+ } - -- /* For 32-bit it is always true. */ -- if (!TARGET_64BIT) -- return true; -- canonic = ix86_canonical_va_list_type (type); -- return (canonic == ms_va_list_type_node -- || (ix86_abi == MS_ABI && canonic == va_list_type_node)); -+ return plus_constant (Pmode, base_reg, base_offset); - } - --/* Implement va_start. */ -+/* Emit code to save registers in the prologue. */ - - static void --ix86_va_start (tree valist, rtx nextarg) -+ix86_emit_save_regs (void) - { -- HOST_WIDE_INT words, n_gpr, n_fpr; -- tree f_gpr, f_fpr, f_ovf, f_sav; -- tree gpr, fpr, ovf, sav, t; -- tree type; -- rtx ovf_rtx; -- -- if (flag_split_stack -- && cfun->machine->split_stack_varargs_pointer == NULL_RTX) -- { -- unsigned int scratch_regno; -+ unsigned int regno; -+ rtx_insn *insn; - -- /* When we are splitting the stack, we can't refer to the stack -- arguments using internal_arg_pointer, because they may be on -- the old stack. The split stack prologue will arrange to -- leave a pointer to the old stack arguments in a scratch -- register, which we here copy to a pseudo-register. The split -- stack prologue can't set the pseudo-register directly because -- it (the prologue) runs before any registers have been saved. */ -+ for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; ) -+ if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true)) -+ { -+ insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno))); -+ RTX_FRAME_RELATED_P (insn) = 1; -+ } -+} - -- scratch_regno = split_stack_prologue_scratch_regno (); -- if (scratch_regno != INVALID_REGNUM) -- { -- rtx reg; -- rtx_insn *seq; -+/* Emit a single register save at CFA - CFA_OFFSET. */ - -- reg = gen_reg_rtx (Pmode); -- cfun->machine->split_stack_varargs_pointer = reg; -+static void -+ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno, -+ HOST_WIDE_INT cfa_offset) -+{ -+ struct machine_function *m = cfun->machine; -+ rtx reg = gen_rtx_REG (mode, regno); -+ rtx mem, addr, base, insn; -+ unsigned int align = GET_MODE_ALIGNMENT (mode); - -- start_sequence (); -- emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno)); -- seq = get_insns (); -- end_sequence (); -+ addr = choose_baseaddr (cfa_offset, &align); -+ mem = gen_frame_mem (mode, addr); - -- push_topmost_sequence (); -- emit_insn_after (seq, entry_of_function ()); -- pop_topmost_sequence (); -- } -- } -+ /* The location aligment depends upon the base register. */ -+ align = MIN (GET_MODE_ALIGNMENT (mode), align); -+ gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1))); -+ set_mem_align (mem, align); - -- /* Only 64bit target needs something special. */ -- if (is_va_list_char_pointer (TREE_TYPE (valist))) -+ insn = emit_insn (gen_rtx_SET (mem, reg)); -+ RTX_FRAME_RELATED_P (insn) = 1; -+ -+ base = addr; -+ if (GET_CODE (base) == PLUS) -+ base = XEXP (base, 0); -+ gcc_checking_assert (REG_P (base)); -+ -+ /* When saving registers into a re-aligned local stack frame, avoid -+ any tricky guessing by dwarf2out. */ -+ if (m->fs.realigned) - { -- if (cfun->machine->split_stack_varargs_pointer == NULL_RTX) -- std_expand_builtin_va_start (valist, nextarg); -+ gcc_checking_assert (stack_realign_drap); -+ -+ if (regno == REGNO (crtl->drap_reg)) -+ { -+ /* A bit of a hack. We force the DRAP register to be saved in -+ the re-aligned stack frame, which provides us with a copy -+ of the CFA that will last past the prologue. Install it. */ -+ gcc_checking_assert (cfun->machine->fs.fp_valid); -+ addr = plus_constant (Pmode, hard_frame_pointer_rtx, -+ cfun->machine->fs.fp_offset - cfa_offset); -+ mem = gen_rtx_MEM (mode, addr); -+ add_reg_note (insn, REG_CFA_DEF_CFA, mem); -+ } - else - { -- rtx va_r, next; -- -- va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE); -- next = expand_binop (ptr_mode, add_optab, -- cfun->machine->split_stack_varargs_pointer, -- crtl->args.arg_offset_rtx, -- NULL_RTX, 0, OPTAB_LIB_WIDEN); -- convert_move (va_r, next, 0); -+ /* The frame pointer is a stable reference within the -+ aligned frame. Use it. */ -+ gcc_checking_assert (cfun->machine->fs.fp_valid); -+ addr = plus_constant (Pmode, hard_frame_pointer_rtx, -+ cfun->machine->fs.fp_offset - cfa_offset); -+ mem = gen_rtx_MEM (mode, addr); -+ add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg)); - } -- return; - } - -- f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node)); -- f_fpr = DECL_CHAIN (f_gpr); -- f_ovf = DECL_CHAIN (f_fpr); -- f_sav = DECL_CHAIN (f_ovf); -- -- valist = build_simple_mem_ref (valist); -- TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node); -- /* The following should be folded into the MEM_REF offset. */ -- gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist), -- f_gpr, NULL_TREE); -- fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist), -- f_fpr, NULL_TREE); -- ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist), -- f_ovf, NULL_TREE); -- sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist), -- f_sav, NULL_TREE); -- -- /* Count number of gp and fp argument registers used. */ -- words = crtl->args.info.words; -- n_gpr = crtl->args.info.regno; -- n_fpr = crtl->args.info.sse_regno; -- -- if (cfun->va_list_gpr_size) -+ else if (base == stack_pointer_rtx && m->fs.sp_realigned -+ && cfa_offset >= m->fs.sp_realigned_offset) - { -- type = TREE_TYPE (gpr); -- t = build2 (MODIFY_EXPR, type, -- gpr, build_int_cst (type, n_gpr * 8)); -- TREE_SIDE_EFFECTS (t) = 1; -- expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); -+ gcc_checking_assert (stack_realign_fp); -+ add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg)); - } - -- if (TARGET_SSE && cfun->va_list_fpr_size) -+ /* The memory may not be relative to the current CFA register, -+ which means that we may need to generate a new pattern for -+ use by the unwind info. */ -+ else if (base != m->fs.cfa_reg) - { -- type = TREE_TYPE (fpr); -- t = build2 (MODIFY_EXPR, type, fpr, -- build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX)); -- TREE_SIDE_EFFECTS (t) = 1; -- expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); -+ addr = plus_constant (Pmode, m->fs.cfa_reg, -+ m->fs.cfa_offset - cfa_offset); -+ mem = gen_rtx_MEM (mode, addr); -+ add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg)); - } -+} - -- /* Find the overflow area. */ -- type = TREE_TYPE (ovf); -- if (cfun->machine->split_stack_varargs_pointer == NULL_RTX) -- ovf_rtx = crtl->args.internal_arg_pointer; -- else -- ovf_rtx = cfun->machine->split_stack_varargs_pointer; -- t = make_tree (type, ovf_rtx); -- if (words != 0) -- t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD); -+/* Emit code to save registers using MOV insns. -+ First register is stored at CFA - CFA_OFFSET. */ -+static void -+ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset) -+{ -+ unsigned int regno; - -- t = build2 (MODIFY_EXPR, type, ovf, t); -- TREE_SIDE_EFFECTS (t) = 1; -- expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); -+ for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) -+ if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true)) -+ { -+ ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset); -+ cfa_offset -= UNITS_PER_WORD; -+ } -+} - -- if (ix86_varargs_gpr_size || ix86_varargs_fpr_size) -- { -- /* Find the register save area. -- Prologue of the function save it right above stack frame. */ -- type = TREE_TYPE (sav); -- t = make_tree (type, frame_pointer_rtx); -- if (!ix86_varargs_gpr_size) -- t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX); -+/* Emit code to save SSE registers using MOV insns. -+ First register is stored at CFA - CFA_OFFSET. */ -+static void -+ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset) -+{ -+ unsigned int regno; - -- t = build2 (MODIFY_EXPR, type, sav, t); -- TREE_SIDE_EFFECTS (t) = 1; -- expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); -- } -+ for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) -+ if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true)) -+ { -+ ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset); -+ cfa_offset -= GET_MODE_SIZE (V4SFmode); -+ } - } - --/* Implement va_arg. */ -+static GTY(()) rtx queued_cfa_restores; - --static tree --ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, -- gimple_seq *post_p) -+/* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack -+ manipulation insn. The value is on the stack at CFA - CFA_OFFSET. -+ Don't add the note if the previously saved value will be left untouched -+ within stack red-zone till return, as unwinders can find the same value -+ in the register and on the stack. */ -+ -+static void -+ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset) - { -- static const int intreg[6] = { 0, 1, 2, 3, 4, 5 }; -- tree f_gpr, f_fpr, f_ovf, f_sav; -- tree gpr, fpr, ovf, sav, t; -- int size, rsize; -- tree lab_false, lab_over = NULL_TREE; -- tree addr, t2; -- rtx container; -- int indirect_p = 0; -- tree ptrtype; -- machine_mode nat_mode; -- unsigned int arg_boundary; -- unsigned int type_align; -+ if (!crtl->shrink_wrapped -+ && cfa_offset <= cfun->machine->fs.red_zone_offset) -+ return; - -- /* Only 64bit target needs something special. */ -- if (is_va_list_char_pointer (TREE_TYPE (valist))) -- return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); -+ if (insn) -+ { -+ add_reg_note (insn, REG_CFA_RESTORE, reg); -+ RTX_FRAME_RELATED_P (insn) = 1; -+ } -+ else -+ queued_cfa_restores -+ = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores); -+} - -- f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node)); -- f_fpr = DECL_CHAIN (f_gpr); -- f_ovf = DECL_CHAIN (f_fpr); -- f_sav = DECL_CHAIN (f_ovf); -+/* Add queued REG_CFA_RESTORE notes if any to INSN. */ - -- gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), -- valist, f_gpr, NULL_TREE); -+static void -+ix86_add_queued_cfa_restore_notes (rtx insn) -+{ -+ rtx last; -+ if (!queued_cfa_restores) -+ return; -+ for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1)) -+ ; -+ XEXP (last, 1) = REG_NOTES (insn); -+ REG_NOTES (insn) = queued_cfa_restores; -+ queued_cfa_restores = NULL_RTX; -+ RTX_FRAME_RELATED_P (insn) = 1; -+} - -- fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE); -- ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE); -- sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE); -+/* Expand prologue or epilogue stack adjustment. -+ The pattern exist to put a dependency on all ebp-based memory accesses. -+ STYLE should be negative if instructions should be marked as frame related, -+ zero if %r11 register is live and cannot be freely used and positive -+ otherwise. */ - -- indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false); -- if (indirect_p) -- type = build_pointer_type (type); -- size = arg_int_size_in_bytes (type); -- rsize = CEIL (size, UNITS_PER_WORD); -+static rtx -+pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, -+ int style, bool set_cfa) -+{ -+ struct machine_function *m = cfun->machine; -+ rtx insn; -+ bool add_frame_related_expr = false; - -- nat_mode = type_natural_mode (type, NULL, false); -- switch (nat_mode) -+ if (Pmode == SImode) -+ insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset); -+ else if (x86_64_immediate_operand (offset, DImode)) -+ insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset); -+ else - { -- case E_V8SFmode: -- case E_V8SImode: -- case E_V32QImode: -- case E_V16HImode: -- case E_V4DFmode: -- case E_V4DImode: -- case E_V16SFmode: -- case E_V16SImode: -- case E_V64QImode: -- case E_V32HImode: -- case E_V8DFmode: -- case E_V8DImode: -- /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */ -- if (!TARGET_64BIT_MS_ABI) -+ rtx tmp; -+ /* r11 is used by indirect sibcall return as well, set before the -+ epilogue and used after the epilogue. */ -+ if (style) -+ tmp = gen_rtx_REG (DImode, R11_REG); -+ else - { -- container = NULL; -- break; -+ gcc_assert (src != hard_frame_pointer_rtx -+ && dest != hard_frame_pointer_rtx); -+ tmp = hard_frame_pointer_rtx; - } -- /* FALLTHRU */ -+ insn = emit_insn (gen_rtx_SET (tmp, offset)); -+ if (style < 0) -+ add_frame_related_expr = true; - -- default: -- container = construct_container (nat_mode, TYPE_MODE (type), -- type, 0, X86_64_REGPARM_MAX, -- X86_64_SSE_REGPARM_MAX, intreg, -- 0); -- break; -+ insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp); - } - -- /* Pull the value out of the saved registers. */ -- -- addr = create_tmp_var (ptr_type_node, "addr"); -- type_align = TYPE_ALIGN (type); -+ insn = emit_insn (insn); -+ if (style >= 0) -+ ix86_add_queued_cfa_restore_notes (insn); - -- if (container) -+ if (set_cfa) - { -- int needed_intregs, needed_sseregs; -- bool need_temp; -- tree int_addr, sse_addr; -- -- lab_false = create_artificial_label (UNKNOWN_LOCATION); -- lab_over = create_artificial_label (UNKNOWN_LOCATION); -- -- examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs); -+ rtx r; - -- need_temp = (!REG_P (container) -- && ((needed_intregs && TYPE_ALIGN (type) > 64) -- || TYPE_ALIGN (type) > 128)); -+ gcc_assert (m->fs.cfa_reg == src); -+ m->fs.cfa_offset += INTVAL (offset); -+ m->fs.cfa_reg = dest; - -- /* In case we are passing structure, verify that it is consecutive block -- on the register save area. If not we need to do moves. */ -- if (!need_temp && !REG_P (container)) -+ r = gen_rtx_PLUS (Pmode, src, offset); -+ r = gen_rtx_SET (dest, r); -+ add_reg_note (insn, REG_CFA_ADJUST_CFA, r); -+ RTX_FRAME_RELATED_P (insn) = 1; -+ } -+ else if (style < 0) -+ { -+ RTX_FRAME_RELATED_P (insn) = 1; -+ if (add_frame_related_expr) - { -- /* Verify that all registers are strictly consecutive */ -- if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0)))) -- { -- int i; -+ rtx r = gen_rtx_PLUS (Pmode, src, offset); -+ r = gen_rtx_SET (dest, r); -+ add_reg_note (insn, REG_FRAME_RELATED_EXPR, r); -+ } -+ } - -- for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) -- { -- rtx slot = XVECEXP (container, 0, i); -- if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i -- || INTVAL (XEXP (slot, 1)) != i * 16) -- need_temp = true; -- } -- } -- else -- { -- int i; -+ if (dest == stack_pointer_rtx) -+ { -+ HOST_WIDE_INT ooffset = m->fs.sp_offset; -+ bool valid = m->fs.sp_valid; -+ bool realigned = m->fs.sp_realigned; - -- for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) -- { -- rtx slot = XVECEXP (container, 0, i); -- if (REGNO (XEXP (slot, 0)) != (unsigned int) i -- || INTVAL (XEXP (slot, 1)) != i * 8) -- need_temp = true; -- } -- } -- } -- if (!need_temp) -- { -- int_addr = addr; -- sse_addr = addr; -- } -- else -+ if (src == hard_frame_pointer_rtx) - { -- int_addr = create_tmp_var (ptr_type_node, "int_addr"); -- sse_addr = create_tmp_var (ptr_type_node, "sse_addr"); -+ valid = m->fs.fp_valid; -+ realigned = false; -+ ooffset = m->fs.fp_offset; - } -- -- /* First ensure that we fit completely in registers. */ -- if (needed_intregs) -+ else if (src == crtl->drap_reg) - { -- t = build_int_cst (TREE_TYPE (gpr), -- (X86_64_REGPARM_MAX - needed_intregs + 1) * 8); -- t = build2 (GE_EXPR, boolean_type_node, gpr, t); -- t2 = build1 (GOTO_EXPR, void_type_node, lab_false); -- t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE); -- gimplify_and_add (t, pre_p); -+ valid = m->fs.drap_valid; -+ realigned = false; -+ ooffset = 0; - } -- if (needed_sseregs) -+ else - { -- t = build_int_cst (TREE_TYPE (fpr), -- (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16 -- + X86_64_REGPARM_MAX * 8); -- t = build2 (GE_EXPR, boolean_type_node, fpr, t); -- t2 = build1 (GOTO_EXPR, void_type_node, lab_false); -- t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE); -- gimplify_and_add (t, pre_p); -+ /* Else there are two possibilities: SP itself, which we set -+ up as the default above. Or EH_RETURN_STACKADJ_RTX, which is -+ taken care of this by hand along the eh_return path. */ -+ gcc_checking_assert (src == stack_pointer_rtx -+ || offset == const0_rtx); - } - -- /* Compute index to start of area used for integer regs. */ -- if (needed_intregs) -- { -- /* int_addr = gpr + sav; */ -- t = fold_build_pointer_plus (sav, gpr); -- gimplify_assign (int_addr, t, pre_p); -- } -- if (needed_sseregs) -- { -- /* sse_addr = fpr + sav; */ -- t = fold_build_pointer_plus (sav, fpr); -- gimplify_assign (sse_addr, t, pre_p); -- } -- if (need_temp) -- { -- int i, prev_size = 0; -- tree temp = create_tmp_var (type, "va_arg_tmp"); -+ m->fs.sp_offset = ooffset - INTVAL (offset); -+ m->fs.sp_valid = valid; -+ m->fs.sp_realigned = realigned; -+ } -+ return insn; -+} - -- /* addr = &temp; */ -- t = build1 (ADDR_EXPR, build_pointer_type (type), temp); -- gimplify_assign (addr, t, pre_p); -+/* Find an available register to be used as dynamic realign argument -+ pointer regsiter. Such a register will be written in prologue and -+ used in begin of body, so it must not be -+ 1. parameter passing register. -+ 2. GOT pointer. -+ We reuse static-chain register if it is available. Otherwise, we -+ use DI for i386 and R13 for x86-64. We chose R13 since it has -+ shorter encoding. - -- for (i = 0; i < XVECLEN (container, 0); i++) -- { -- rtx slot = XVECEXP (container, 0, i); -- rtx reg = XEXP (slot, 0); -- machine_mode mode = GET_MODE (reg); -- tree piece_type; -- tree addr_type; -- tree daddr_type; -- tree src_addr, src; -- int src_offset; -- tree dest_addr, dest; -- int cur_size = GET_MODE_SIZE (mode); -+ Return: the regno of chosen register. */ - -- gcc_assert (prev_size <= INTVAL (XEXP (slot, 1))); -- prev_size = INTVAL (XEXP (slot, 1)); -- if (prev_size + cur_size > size) -- { -- cur_size = size - prev_size; -- unsigned int nbits = cur_size * BITS_PER_UNIT; -- if (!int_mode_for_size (nbits, 1).exists (&mode)) -- mode = QImode; -- } -- piece_type = lang_hooks.types.type_for_mode (mode, 1); -- if (mode == GET_MODE (reg)) -- addr_type = build_pointer_type (piece_type); -- else -- addr_type = build_pointer_type_for_mode (piece_type, ptr_mode, -- true); -- daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode, -- true); -+static unsigned int -+find_drap_reg (void) -+{ -+ tree decl = cfun->decl; - -- if (SSE_REGNO_P (REGNO (reg))) -- { -- src_addr = sse_addr; -- src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16; -- } -- else -- { -- src_addr = int_addr; -- src_offset = REGNO (reg) * 8; -- } -- src_addr = fold_convert (addr_type, src_addr); -- src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset); -+ /* Always use callee-saved register if there are no caller-saved -+ registers. */ -+ if (TARGET_64BIT) -+ { -+ /* Use R13 for nested function or function need static chain. -+ Since function with tail call may use any caller-saved -+ registers in epilogue, DRAP must not use caller-saved -+ register in such case. */ -+ if (DECL_STATIC_CHAIN (decl) -+ || cfun->machine->no_caller_saved_registers -+ || crtl->tail_call_emit) -+ return R13_REG; - -- dest_addr = fold_convert (daddr_type, addr); -- dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size); -- if (cur_size == GET_MODE_SIZE (mode)) -- { -- src = build_va_arg_indirect_ref (src_addr); -- dest = build_va_arg_indirect_ref (dest_addr); -+ return R10_REG; -+ } -+ else -+ { -+ /* Use DI for nested function or function need static chain. -+ Since function with tail call may use any caller-saved -+ registers in epilogue, DRAP must not use caller-saved -+ register in such case. */ -+ if (DECL_STATIC_CHAIN (decl) -+ || cfun->machine->no_caller_saved_registers -+ || crtl->tail_call_emit) -+ return DI_REG; - -- gimplify_assign (dest, src, pre_p); -- } -- else -- { -- tree copy -- = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY), -- 3, dest_addr, src_addr, -- size_int (cur_size)); -- gimplify_and_add (copy, pre_p); -- } -- prev_size += cur_size; -- } -- } -- -- if (needed_intregs) -- { -- t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr, -- build_int_cst (TREE_TYPE (gpr), needed_intregs * 8)); -- gimplify_assign (gpr, t, pre_p); -- /* The GPR save area guarantees only 8-byte alignment. */ -- if (!need_temp) -- type_align = MIN (type_align, 64); -- } -- -- if (needed_sseregs) -+ /* Reuse static chain register if it isn't used for parameter -+ passing. */ -+ if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2) - { -- t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr, -- build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16)); -- gimplify_assign (unshare_expr (fpr), t, pre_p); -+ unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl)); -+ if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0) -+ return CX_REG; - } -- -- gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over)); -- -- gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false)); -- } -- -- /* ... otherwise out of the overflow area. */ -- -- /* When we align parameter on stack for caller, if the parameter -- alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be -- aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee -- here with caller. */ -- arg_boundary = ix86_function_arg_boundary (VOIDmode, type); -- if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT) -- arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT; -- -- /* Care for on-stack alignment if needed. */ -- if (arg_boundary <= 64 || size == 0) -- t = ovf; -- else -- { -- HOST_WIDE_INT align = arg_boundary / 8; -- t = fold_build_pointer_plus_hwi (ovf, align - 1); -- t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, -- build_int_cst (TREE_TYPE (t), -align)); -+ return DI_REG; - } -+} - -- gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue); -- gimplify_assign (addr, t, pre_p); -+/* Return minimum incoming stack alignment. */ - -- t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD); -- gimplify_assign (unshare_expr (ovf), t, pre_p); -+static unsigned int -+ix86_minimum_incoming_stack_boundary (bool sibcall) -+{ -+ unsigned int incoming_stack_boundary; - -- if (container) -- gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over)); -+ /* Stack of interrupt handler is aligned to 128 bits in 64bit mode. */ -+ if (cfun->machine->func_type != TYPE_NORMAL) -+ incoming_stack_boundary = TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY; -+ /* Prefer the one specified at command line. */ -+ else if (ix86_user_incoming_stack_boundary) -+ incoming_stack_boundary = ix86_user_incoming_stack_boundary; -+ /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary -+ if -mstackrealign is used, it isn't used for sibcall check and -+ estimated stack alignment is 128bit. */ -+ else if (!sibcall -+ && ix86_force_align_arg_pointer -+ && crtl->stack_alignment_estimated == 128) -+ incoming_stack_boundary = MIN_STACK_BOUNDARY; -+ else -+ incoming_stack_boundary = ix86_default_incoming_stack_boundary; - -- type = build_aligned_type (type, type_align); -- ptrtype = build_pointer_type_for_mode (type, ptr_mode, true); -- addr = fold_convert (ptrtype, addr); -+ /* Incoming stack alignment can be changed on individual functions -+ via force_align_arg_pointer attribute. We use the smallest -+ incoming stack boundary. */ -+ if (incoming_stack_boundary > MIN_STACK_BOUNDARY -+ && lookup_attribute ("force_align_arg_pointer", -+ TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))) -+ incoming_stack_boundary = MIN_STACK_BOUNDARY; - -- if (indirect_p) -- addr = build_va_arg_indirect_ref (addr); -- return build_va_arg_indirect_ref (addr); --} -- --/* Return true if OPNUM's MEM should be matched -- in movabs* patterns. */ -+ /* The incoming stack frame has to be aligned at least at -+ parm_stack_boundary. */ -+ if (incoming_stack_boundary < crtl->parm_stack_boundary) -+ incoming_stack_boundary = crtl->parm_stack_boundary; - --bool --ix86_check_movabs (rtx insn, int opnum) --{ -- rtx set, mem; -+ /* Stack at entrance of main is aligned by runtime. We use the -+ smallest incoming stack boundary. */ -+ if (incoming_stack_boundary > MAIN_STACK_BOUNDARY -+ && DECL_NAME (current_function_decl) -+ && MAIN_NAME_P (DECL_NAME (current_function_decl)) -+ && DECL_FILE_SCOPE_P (current_function_decl)) -+ incoming_stack_boundary = MAIN_STACK_BOUNDARY; - -- set = PATTERN (insn); -- if (GET_CODE (set) == PARALLEL) -- set = XVECEXP (set, 0, 0); -- gcc_assert (GET_CODE (set) == SET); -- mem = XEXP (set, opnum); -- while (SUBREG_P (mem)) -- mem = SUBREG_REG (mem); -- gcc_assert (MEM_P (mem)); -- return volatile_ok || !MEM_VOLATILE_P (mem); -+ return incoming_stack_boundary; - } - --/* Return false if INSN contains a MEM with a non-default address space. */ --bool --ix86_check_no_addr_space (rtx insn) --{ -- subrtx_var_iterator::array_type array; -- FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL) -- { -- rtx x = *iter; -- if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x))) -- return false; -- } -- return true; --} -- --/* Initialize the table of extra 80387 mathematical constants. */ -+/* Update incoming stack boundary and estimated stack alignment. */ - - static void --init_ext_80387_constants (void) -+ix86_update_stack_boundary (void) - { -- static const char * cst[5] = -- { -- "0.3010299956639811952256464283594894482", /* 0: fldlg2 */ -- "0.6931471805599453094286904741849753009", /* 1: fldln2 */ -- "1.4426950408889634073876517827983434472", /* 2: fldl2e */ -- "3.3219280948873623478083405569094566090", /* 3: fldl2t */ -- "3.1415926535897932385128089594061862044", /* 4: fldpi */ -- }; -- int i; -+ ix86_incoming_stack_boundary -+ = ix86_minimum_incoming_stack_boundary (false); - -- for (i = 0; i < 5; i++) -- { -- real_from_string (&ext_80387_constants_table[i], cst[i]); -- /* Ensure each constant is rounded to XFmode precision. */ -- real_convert (&ext_80387_constants_table[i], -- XFmode, &ext_80387_constants_table[i]); -- } -+ /* x86_64 vararg needs 16byte stack alignment for register save area. */ -+ if (TARGET_64BIT -+ && cfun->stdarg -+ && crtl->stack_alignment_estimated < 128) -+ crtl->stack_alignment_estimated = 128; - -- ext_80387_constants_init = 1; -+ /* __tls_get_addr needs to be called with 16-byte aligned stack. */ -+ if (ix86_tls_descriptor_calls_expanded_in_cfun -+ && crtl->preferred_stack_boundary < 128) -+ crtl->preferred_stack_boundary = 128; - } - --/* Return non-zero if the constant is something that -- can be loaded with a special instruction. */ -+/* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is -+ needed or an rtx for DRAP otherwise. */ - --int --standard_80387_constant_p (rtx x) -+static rtx -+ix86_get_drap_rtx (void) - { -- machine_mode mode = GET_MODE (x); -- -- const REAL_VALUE_TYPE *r; -- -- if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode))) -- return -1; -- -- if (x == CONST0_RTX (mode)) -- return 1; -- if (x == CONST1_RTX (mode)) -- return 2; -- -- r = CONST_DOUBLE_REAL_VALUE (x); -+ /* We must use DRAP if there are outgoing arguments on stack and -+ ACCUMULATE_OUTGOING_ARGS is false. */ -+ if (ix86_force_drap -+ || (cfun->machine->outgoing_args_on_stack -+ && !ACCUMULATE_OUTGOING_ARGS)) -+ crtl->need_drap = true; - -- /* For XFmode constants, try to find a special 80387 instruction when -- optimizing for size or on those CPUs that benefit from them. */ -- if (mode == XFmode -- && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS)) -+ if (stack_realign_drap) - { -- int i; -+ /* Assign DRAP to vDRAP and returns vDRAP */ -+ unsigned int regno = find_drap_reg (); -+ rtx drap_vreg; -+ rtx arg_ptr; -+ rtx_insn *seq, *insn; - -- if (! ext_80387_constants_init) -- init_ext_80387_constants (); -+ arg_ptr = gen_rtx_REG (Pmode, regno); -+ crtl->drap_reg = arg_ptr; - -- for (i = 0; i < 5; i++) -- if (real_identical (r, &ext_80387_constants_table[i])) -- return i + 3; -+ start_sequence (); -+ drap_vreg = copy_to_reg (arg_ptr); -+ seq = get_insns (); -+ end_sequence (); -+ -+ insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ())); -+ if (!optimize) -+ { -+ add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg); -+ RTX_FRAME_RELATED_P (insn) = 1; -+ } -+ return drap_vreg; - } -+ else -+ return NULL; -+} - -- /* Load of the constant -0.0 or -1.0 will be split as -- fldz;fchs or fld1;fchs sequence. */ -- if (real_isnegzero (r)) -- return 8; -- if (real_identical (r, &dconstm1)) -- return 9; -+/* Handle the TARGET_INTERNAL_ARG_POINTER hook. */ - -- return 0; -+static rtx -+ix86_internal_arg_pointer (void) -+{ -+ return virtual_incoming_args_rtx; - } - --/* Return the opcode of the special instruction to be used to load -- the constant X. */ -+struct scratch_reg { -+ rtx reg; -+ bool saved; -+}; - --const char * --standard_80387_constant_opcode (rtx x) -+/* Return a short-lived scratch register for use on function entry. -+ In 32-bit mode, it is valid only after the registers are saved -+ in the prologue. This register must be released by means of -+ release_scratch_register_on_entry once it is dead. */ -+ -+static void -+get_scratch_register_on_entry (struct scratch_reg *sr) - { -- switch (standard_80387_constant_p (x)) -- { -- case 1: -- return "fldz"; -- case 2: -- return "fld1"; -- case 3: -- return "fldlg2"; -- case 4: -- return "fldln2"; -- case 5: -- return "fldl2e"; -- case 6: -- return "fldl2t"; -- case 7: -- return "fldpi"; -- case 8: -- case 9: -- return "#"; -- default: -- gcc_unreachable (); -- } --} -- --/* Return the CONST_DOUBLE representing the 80387 constant that is -- loaded by the specified special instruction. The argument IDX -- matches the return value from standard_80387_constant_p. */ -- --rtx --standard_80387_constant_rtx (int idx) --{ -- int i; -+ int regno; - -- if (! ext_80387_constants_init) -- init_ext_80387_constants (); -+ sr->saved = false; - -- switch (idx) -+ if (TARGET_64BIT) - { -- case 3: -- case 4: -- case 5: -- case 6: -- case 7: -- i = idx - 3; -- break; -+ /* We always use R11 in 64-bit mode. */ -+ regno = R11_REG; -+ } -+ else -+ { -+ tree decl = current_function_decl, fntype = TREE_TYPE (decl); -+ bool fastcall_p -+ = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE; -+ bool thiscall_p -+ = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE; -+ bool static_chain_p = DECL_STATIC_CHAIN (decl); -+ int regparm = ix86_function_regparm (fntype, decl); -+ int drap_regno -+ = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM; - -- default: -- gcc_unreachable (); -+ /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax -+ for the static chain register. */ -+ if ((regparm < 1 || (fastcall_p && !static_chain_p)) -+ && drap_regno != AX_REG) -+ regno = AX_REG; -+ /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx -+ for the static chain register. */ -+ else if (thiscall_p && !static_chain_p && drap_regno != AX_REG) -+ regno = AX_REG; -+ else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG) -+ regno = DX_REG; -+ /* ecx is the static chain register. */ -+ else if (regparm < 3 && !fastcall_p && !thiscall_p -+ && !static_chain_p -+ && drap_regno != CX_REG) -+ regno = CX_REG; -+ else if (ix86_save_reg (BX_REG, true, false)) -+ regno = BX_REG; -+ /* esi is the static chain register. */ -+ else if (!(regparm == 3 && static_chain_p) -+ && ix86_save_reg (SI_REG, true, false)) -+ regno = SI_REG; -+ else if (ix86_save_reg (DI_REG, true, false)) -+ regno = DI_REG; -+ else -+ { -+ regno = (drap_regno == AX_REG ? DX_REG : AX_REG); -+ sr->saved = true; -+ } - } - -- return const_double_from_real_value (ext_80387_constants_table[i], -- XFmode); -+ sr->reg = gen_rtx_REG (Pmode, regno); -+ if (sr->saved) -+ { -+ rtx_insn *insn = emit_insn (gen_push (sr->reg)); -+ RTX_FRAME_RELATED_P (insn) = 1; -+ } - } - --/* Return 1 if X is all bits 0 and 2 if X is all bits 1 -- in supported SSE/AVX vector mode. */ -- --int --standard_sse_constant_p (rtx x, machine_mode pred_mode) --{ -- machine_mode mode; -- -- if (!TARGET_SSE) -- return 0; -+/* Release a scratch register obtained from the preceding function. - -- mode = GET_MODE (x); -+ If RELEASE_VIA_POP is true, we just pop the register off the stack -+ to release it. This is what non-Linux systems use with -fstack-check. - -- if (x == const0_rtx || const0_operand (x, mode)) -- return 1; -+ Otherwise we use OFFSET to locate the saved register and the -+ allocated stack space becomes part of the local frame and is -+ deallocated by the epilogue. */ - -- if (x == constm1_rtx || vector_all_ones_operand (x, mode)) -+static void -+release_scratch_register_on_entry (struct scratch_reg *sr, HOST_WIDE_INT offset, -+ bool release_via_pop) -+{ -+ if (sr->saved) - { -- /* VOIDmode integer constant, get mode from the predicate. */ -- if (mode == VOIDmode) -- mode = pred_mode; -+ if (release_via_pop) -+ { -+ struct machine_function *m = cfun->machine; -+ rtx x, insn = emit_insn (gen_pop (sr->reg)); - -- switch (GET_MODE_SIZE (mode)) -+ /* The RX FRAME_RELATED_P mechanism doesn't know about pop. */ -+ RTX_FRAME_RELATED_P (insn) = 1; -+ x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD)); -+ x = gen_rtx_SET (stack_pointer_rtx, x); -+ add_reg_note (insn, REG_FRAME_RELATED_EXPR, x); -+ m->fs.sp_offset -= UNITS_PER_WORD; -+ } -+ else - { -- case 64: -- if (TARGET_AVX512F) -- return 2; -- break; -- case 32: -- if (TARGET_AVX2) -- return 2; -- break; -- case 16: -- if (TARGET_SSE2) -- return 2; -- break; -- case 0: -- /* VOIDmode */ -- gcc_unreachable (); -- default: -- break; -+ rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset)); -+ x = gen_rtx_SET (sr->reg, gen_rtx_MEM (word_mode, x)); -+ emit_insn (x); - } - } -- -- return 0; - } - --/* Return the opcode of the special instruction to be used to load -- the constant operands[1] into operands[0]. */ -+/* Emit code to adjust the stack pointer by SIZE bytes while probing it. - --const char * --standard_sse_constant_opcode (rtx_insn *insn, rtx *operands) --{ -- machine_mode mode; -- rtx x = operands[1]; -+ This differs from the next routine in that it tries hard to prevent -+ attacks that jump the stack guard. Thus it is never allowed to allocate -+ more than PROBE_INTERVAL bytes of stack space without a suitable -+ probe. - -- gcc_assert (TARGET_SSE); -+ INT_REGISTERS_SAVED is true if integer registers have already been -+ pushed on the stack. */ - -- mode = GET_MODE (x); -+static void -+ix86_adjust_stack_and_probe_stack_clash (HOST_WIDE_INT size, -+ const bool int_registers_saved) -+{ -+ struct machine_function *m = cfun->machine; - -- if (x == const0_rtx || const0_operand (x, mode)) -+ /* If this function does not statically allocate stack space, then -+ no probes are needed. */ -+ if (!size) - { -- switch (get_attr_mode (insn)) -- { -- case MODE_TI: -- if (!EXT_REX_SSE_REG_P (operands[0])) -- return "%vpxor\t%0, %d0"; -- /* FALLTHRU */ -- case MODE_XI: -- case MODE_OI: -- if (EXT_REX_SSE_REG_P (operands[0])) -- return (TARGET_AVX512VL -- ? "vpxord\t%x0, %x0, %x0" -- : "vpxord\t%g0, %g0, %g0"); -- return "vpxor\t%x0, %x0, %x0"; -+ /* However, the allocation of space via pushes for register -+ saves could be viewed as allocating space, but without the -+ need to probe. */ -+ if (m->frame.nregs || m->frame.nsseregs || frame_pointer_needed) -+ dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true); -+ else -+ dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false); -+ return; -+ } - -- case MODE_V2DF: -- if (!EXT_REX_SSE_REG_P (operands[0])) -- return "%vxorpd\t%0, %d0"; -- /* FALLTHRU */ -- case MODE_V8DF: -- case MODE_V4DF: -- if (!EXT_REX_SSE_REG_P (operands[0])) -- return "vxorpd\t%x0, %x0, %x0"; -- else if (TARGET_AVX512DQ) -- return (TARGET_AVX512VL -- ? "vxorpd\t%x0, %x0, %x0" -- : "vxorpd\t%g0, %g0, %g0"); -- else -- return (TARGET_AVX512VL -- ? "vpxorq\t%x0, %x0, %x0" -- : "vpxorq\t%g0, %g0, %g0"); -+ /* If we are a noreturn function, then we have to consider the -+ possibility that we're called via a jump rather than a call. - -- case MODE_V4SF: -- if (!EXT_REX_SSE_REG_P (operands[0])) -- return "%vxorps\t%0, %d0"; -- /* FALLTHRU */ -- case MODE_V16SF: -- case MODE_V8SF: -- if (!EXT_REX_SSE_REG_P (operands[0])) -- return "vxorps\t%x0, %x0, %x0"; -- else if (TARGET_AVX512DQ) -- return (TARGET_AVX512VL -- ? "vxorps\t%x0, %x0, %x0" -- : "vxorps\t%g0, %g0, %g0"); -- else -- return (TARGET_AVX512VL -- ? "vpxord\t%x0, %x0, %x0" -- : "vpxord\t%g0, %g0, %g0"); -+ Thus we don't have the implicit probe generated by saving the -+ return address into the stack at the call. Thus, the stack -+ pointer could be anywhere in the guard page. The safe thing -+ to do is emit a probe now. - -- default: -- gcc_unreachable (); -- } -- } -- else if (x == constm1_rtx || vector_all_ones_operand (x, mode)) -- { -- enum attr_mode insn_mode = get_attr_mode (insn); -- -- switch (insn_mode) -- { -- case MODE_XI: -- case MODE_V8DF: -- case MODE_V16SF: -- gcc_assert (TARGET_AVX512F); -- return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}"; -- -- case MODE_OI: -- case MODE_V4DF: -- case MODE_V8SF: -- gcc_assert (TARGET_AVX2); -- /* FALLTHRU */ -- case MODE_TI: -- case MODE_V2DF: -- case MODE_V4SF: -- gcc_assert (TARGET_SSE2); -- if (!EXT_REX_SSE_REG_P (operands[0])) -- return (TARGET_AVX -- ? "vpcmpeqd\t%0, %0, %0" -- : "pcmpeqd\t%0, %0"); -- else if (TARGET_AVX512VL) -- return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}"; -- else -- return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}"; -+ The probe can be avoided if we have already emitted any callee -+ register saves into the stack or have a frame pointer (which will -+ have been saved as well). Those saves will function as implicit -+ probes. - -- default: -- gcc_unreachable (); -+ ?!? This should be revamped to work like aarch64 and s390 where -+ we track the offset from the most recent probe. Normally that -+ offset would be zero. For a noreturn function we would reset -+ it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT). Then -+ we just probe when we cross PROBE_INTERVAL. */ -+ if (TREE_THIS_VOLATILE (cfun->decl) -+ && !(m->frame.nregs || m->frame.nsseregs || frame_pointer_needed)) -+ { -+ /* We can safely use any register here since we're just going to push -+ its value and immediately pop it back. But we do try and avoid -+ argument passing registers so as not to introduce dependencies in -+ the pipeline. For 32 bit we use %esi and for 64 bit we use %rax. */ -+ rtx dummy_reg = gen_rtx_REG (word_mode, TARGET_64BIT ? AX_REG : SI_REG); -+ rtx_insn *insn_push = emit_insn (gen_push (dummy_reg)); -+ rtx_insn *insn_pop = emit_insn (gen_pop (dummy_reg)); -+ m->fs.sp_offset -= UNITS_PER_WORD; -+ if (m->fs.cfa_reg == stack_pointer_rtx) -+ { -+ m->fs.cfa_offset -= UNITS_PER_WORD; -+ rtx x = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD); -+ x = gen_rtx_SET (stack_pointer_rtx, x); -+ add_reg_note (insn_push, REG_CFA_ADJUST_CFA, x); -+ RTX_FRAME_RELATED_P (insn_push) = 1; -+ x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); -+ x = gen_rtx_SET (stack_pointer_rtx, x); -+ add_reg_note (insn_pop, REG_CFA_ADJUST_CFA, x); -+ RTX_FRAME_RELATED_P (insn_pop) = 1; - } -- } -- -- gcc_unreachable (); --} -- --/* Returns true if INSN can be transformed from a memory load -- to a supported FP constant load. */ -+ emit_insn (gen_blockage ()); -+ } - --bool --ix86_standard_x87sse_constant_load_p (const rtx_insn *insn, rtx dst) --{ -- rtx src = find_constant_src (insn); -+ /* If we allocate less than the size of the guard statically, -+ then no probing is necessary, but we do need to allocate -+ the stack. */ -+ if (size < (1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE))) -+ { -+ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, -+ GEN_INT (-size), -1, -+ m->fs.cfa_reg == stack_pointer_rtx); -+ dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true); -+ return; -+ } - -- gcc_assert (REG_P (dst)); -+ /* We're allocating a large enough stack frame that we need to -+ emit probes. Either emit them inline or in a loop depending -+ on the size. */ -+ HOST_WIDE_INT probe_interval = get_probe_interval (); -+ if (size <= 4 * probe_interval) -+ { -+ HOST_WIDE_INT i; -+ for (i = probe_interval; i <= size; i += probe_interval) -+ { -+ /* Allocate PROBE_INTERVAL bytes. */ -+ rtx insn -+ = pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, -+ GEN_INT (-probe_interval), -1, -+ m->fs.cfa_reg == stack_pointer_rtx); -+ add_reg_note (insn, REG_STACK_CHECK, const0_rtx); - -- if (src == NULL -- || (SSE_REGNO_P (REGNO (dst)) -- && standard_sse_constant_p (src, GET_MODE (dst)) != 1) -- || (STACK_REGNO_P (REGNO (dst)) -- && standard_80387_constant_p (src) < 1)) -- return false; -+ /* And probe at *sp. */ -+ emit_stack_probe (stack_pointer_rtx); -+ emit_insn (gen_blockage ()); -+ } - -- return true; --} -+ /* We need to allocate space for the residual, but we do not need -+ to probe the residual. */ -+ HOST_WIDE_INT residual = (i - probe_interval - size); -+ if (residual) -+ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, -+ GEN_INT (residual), -1, -+ m->fs.cfa_reg == stack_pointer_rtx); -+ dump_stack_clash_frame_info (PROBE_INLINE, residual != 0); -+ } -+ else -+ { -+ /* We expect the GP registers to be saved when probes are used -+ as the probing sequences might need a scratch register and -+ the routine to allocate one assumes the integer registers -+ have already been saved. */ -+ gcc_assert (int_registers_saved); - --/* Returns true if OP contains a symbol reference */ -+ struct scratch_reg sr; -+ get_scratch_register_on_entry (&sr); - --bool --symbolic_reference_mentioned_p (rtx op) --{ -- const char *fmt; -- int i; -+ /* If we needed to save a register, then account for any space -+ that was pushed (we are not going to pop the register when -+ we do the restore). */ -+ if (sr.saved) -+ size -= UNITS_PER_WORD; - -- if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF) -- return true; -+ /* Step 1: round SIZE down to a multiple of the interval. */ -+ HOST_WIDE_INT rounded_size = size & -probe_interval; - -- fmt = GET_RTX_FORMAT (GET_CODE (op)); -- for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--) -- { -- if (fmt[i] == 'E') -+ /* Step 2: compute final value of the loop counter. Use lea if -+ possible. */ -+ rtx addr = plus_constant (Pmode, stack_pointer_rtx, -rounded_size); -+ rtx insn; -+ if (address_no_seg_operand (addr, Pmode)) -+ insn = emit_insn (gen_rtx_SET (sr.reg, addr)); -+ else - { -- int j; -- -- for (j = XVECLEN (op, i) - 1; j >= 0; j--) -- if (symbolic_reference_mentioned_p (XVECEXP (op, i, j))) -- return true; -+ emit_move_insn (sr.reg, GEN_INT (-rounded_size)); -+ insn = emit_insn (gen_rtx_SET (sr.reg, -+ gen_rtx_PLUS (Pmode, sr.reg, -+ stack_pointer_rtx))); -+ } -+ if (m->fs.cfa_reg == stack_pointer_rtx) -+ { -+ add_reg_note (insn, REG_CFA_DEF_CFA, -+ plus_constant (Pmode, sr.reg, -+ m->fs.cfa_offset + rounded_size)); -+ RTX_FRAME_RELATED_P (insn) = 1; - } - -- else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i))) -- return true; -- } -+ /* Step 3: the loop. */ -+ rtx size_rtx = GEN_INT (rounded_size); -+ insn = emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, -+ size_rtx)); -+ if (m->fs.cfa_reg == stack_pointer_rtx) -+ { -+ m->fs.cfa_offset += rounded_size; -+ add_reg_note (insn, REG_CFA_DEF_CFA, -+ plus_constant (Pmode, stack_pointer_rtx, -+ m->fs.cfa_offset)); -+ RTX_FRAME_RELATED_P (insn) = 1; -+ } -+ m->fs.sp_offset += rounded_size; -+ emit_insn (gen_blockage ()); - -- return false; --} -+ /* Step 4: adjust SP if we cannot assert at compile-time that SIZE -+ is equal to ROUNDED_SIZE. */ - --/* Return true if it is appropriate to emit `ret' instructions in the -- body of a function. Do this only if the epilogue is simple, needing a -- couple of insns. Prior to reloading, we can't tell how many registers -- must be saved, so return false then. Return false if there is no frame -- marker to de-allocate. */ -+ if (size != rounded_size) -+ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, -+ GEN_INT (rounded_size - size), -1, -+ m->fs.cfa_reg == stack_pointer_rtx); -+ dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size); - --bool --ix86_can_use_return_insn_p (void) --{ -- if (ix86_function_naked (current_function_decl)) -- return false; -+ /* This does not deallocate the space reserved for the scratch -+ register. That will be deallocated in the epilogue. */ -+ release_scratch_register_on_entry (&sr, size, false); -+ } - -- /* Don't use `ret' instruction in interrupt handler. */ -- if (! reload_completed -- || frame_pointer_needed -- || cfun->machine->func_type != TYPE_NORMAL) -- return 0; -+ /* Make sure nothing is scheduled before we are done. */ -+ emit_insn (gen_blockage ()); -+} - -- /* Don't allow more than 32k pop, since that's all we can do -- with one instruction. */ -- if (crtl->args.pops_args && crtl->args.size >= 32768) -- return 0; -+/* Emit code to adjust the stack pointer by SIZE bytes while probing it. - -- struct ix86_frame &frame = cfun->machine->frame; -- return (frame.stack_pointer_offset == UNITS_PER_WORD -- && (frame.nregs + frame.nsseregs) == 0); --} -- --/* Value should be nonzero if functions must have frame pointers. -- Zero means the frame pointer need not be set up (and parms may -- be accessed via the stack pointer) in functions that seem suitable. */ -+ INT_REGISTERS_SAVED is true if integer registers have already been -+ pushed on the stack. */ - --static bool --ix86_frame_pointer_required (void) -+static void -+ix86_adjust_stack_and_probe (HOST_WIDE_INT size, -+ const bool int_registers_saved) - { -- /* If we accessed previous frames, then the generated code expects -- to be able to access the saved ebp value in our frame. */ -- if (cfun->machine->accesses_prev_frame) -- return true; -+ /* We skip the probe for the first interval + a small dope of 4 words and -+ probe that many bytes past the specified size to maintain a protection -+ area at the botton of the stack. */ -+ const int dope = 4 * UNITS_PER_WORD; -+ rtx size_rtx = GEN_INT (size), last; - -- /* Several x86 os'es need a frame pointer for other reasons, -- usually pertaining to setjmp. */ -- if (SUBTARGET_FRAME_POINTER_REQUIRED) -- return true; -+ /* See if we have a constant small number of probes to generate. If so, -+ that's the easy case. The run-time loop is made up of 9 insns in the -+ generic case while the compile-time loop is made up of 3+2*(n-1) insns -+ for n # of intervals. */ -+ if (size <= 4 * get_probe_interval ()) -+ { -+ HOST_WIDE_INT i, adjust; -+ bool first_probe = true; - -- /* For older 32-bit runtimes setjmp requires valid frame-pointer. */ -- if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp) -- return true; -+ /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for -+ values of N from 1 until it exceeds SIZE. If only one probe is -+ needed, this will not generate any code. Then adjust and probe -+ to PROBE_INTERVAL + SIZE. */ -+ for (i = get_probe_interval (); i < size; i += get_probe_interval ()) -+ { -+ if (first_probe) -+ { -+ adjust = 2 * get_probe_interval () + dope; -+ first_probe = false; -+ } -+ else -+ adjust = get_probe_interval (); - -- /* Win64 SEH, very large frames need a frame-pointer as maximum stack -- allocation is 4GB. */ -- if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE) -- return true; -+ emit_insn (gen_rtx_SET (stack_pointer_rtx, -+ plus_constant (Pmode, stack_pointer_rtx, -+ -adjust))); -+ emit_stack_probe (stack_pointer_rtx); -+ } - -- /* SSE saves require frame-pointer when stack is misaligned. */ -- if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128) -- return true; -- -- /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER -- turns off the frame pointer by default. Turn it back on now if -- we've not got a leaf function. */ -- if (TARGET_OMIT_LEAF_FRAME_POINTER -- && (!crtl->is_leaf -- || ix86_current_function_calls_tls_descriptor)) -- return true; -+ if (first_probe) -+ adjust = size + get_probe_interval () + dope; -+ else -+ adjust = size + get_probe_interval () - i; - -- if (crtl->profile && !flag_fentry) -- return true; -+ emit_insn (gen_rtx_SET (stack_pointer_rtx, -+ plus_constant (Pmode, stack_pointer_rtx, -+ -adjust))); -+ emit_stack_probe (stack_pointer_rtx); - -- return false; --} -+ /* Adjust back to account for the additional first interval. */ -+ last = emit_insn (gen_rtx_SET (stack_pointer_rtx, -+ plus_constant (Pmode, stack_pointer_rtx, -+ (get_probe_interval () -+ + dope)))); -+ } - --/* Record that the current function accesses previous call frames. */ -+ /* Otherwise, do the same as above, but in a loop. Note that we must be -+ extra careful with variables wrapping around because we might be at -+ the very top (or the very bottom) of the address space and we have -+ to be able to handle this case properly; in particular, we use an -+ equality test for the loop condition. */ -+ else -+ { -+ /* We expect the GP registers to be saved when probes are used -+ as the probing sequences might need a scratch register and -+ the routine to allocate one assumes the integer registers -+ have already been saved. */ -+ gcc_assert (int_registers_saved); - --void --ix86_setup_frame_addresses (void) --{ -- cfun->machine->accesses_prev_frame = 1; --} -- --#ifndef USE_HIDDEN_LINKONCE --# if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0) --# define USE_HIDDEN_LINKONCE 1 --# else --# define USE_HIDDEN_LINKONCE 0 --# endif --#endif -+ HOST_WIDE_INT rounded_size; -+ struct scratch_reg sr; - --/* Label count for call and return thunks. It is used to make unique -- labels in call and return thunks. */ --static int indirectlabelno; -+ get_scratch_register_on_entry (&sr); - --/* True if call thunk function is needed. */ --static bool indirect_thunk_needed = false; -+ /* If we needed to save a register, then account for any space -+ that was pushed (we are not going to pop the register when -+ we do the restore). */ -+ if (sr.saved) -+ size -= UNITS_PER_WORD; - --/* Bit masks of integer registers, which contain branch target, used -- by call thunk functions. */ --static int indirect_thunks_used; -+ /* Step 1: round SIZE to the previous multiple of the interval. */ - --/* True if return thunk function is needed. */ --static bool indirect_return_needed = false; -+ rounded_size = ROUND_DOWN (size, get_probe_interval ()); - --/* True if return thunk function via CX is needed. */ --static bool indirect_return_via_cx; - --#ifndef INDIRECT_LABEL --# define INDIRECT_LABEL "LIND" --#endif -+ /* Step 2: compute initial and final value of the loop counter. */ - --/* Indicate what prefix is needed for an indirect branch. */ --enum indirect_thunk_prefix --{ -- indirect_thunk_prefix_none, -- indirect_thunk_prefix_nt --}; -+ /* SP = SP_0 + PROBE_INTERVAL. */ -+ emit_insn (gen_rtx_SET (stack_pointer_rtx, -+ plus_constant (Pmode, stack_pointer_rtx, -+ - (get_probe_interval () + dope)))); - --/* Return the prefix needed for an indirect branch INSN. */ -+ /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */ -+ if (rounded_size <= (HOST_WIDE_INT_1 << 31)) -+ emit_insn (gen_rtx_SET (sr.reg, -+ plus_constant (Pmode, stack_pointer_rtx, -+ -rounded_size))); -+ else -+ { -+ emit_move_insn (sr.reg, GEN_INT (-rounded_size)); -+ emit_insn (gen_rtx_SET (sr.reg, -+ gen_rtx_PLUS (Pmode, sr.reg, -+ stack_pointer_rtx))); -+ } - --enum indirect_thunk_prefix --indirect_thunk_need_prefix (rtx_insn *insn) --{ -- enum indirect_thunk_prefix need_prefix; -- if ((cfun->machine->indirect_branch_type -- == indirect_branch_thunk_extern) -- && ix86_notrack_prefixed_insn_p (insn)) -- { -- /* NOTRACK prefix is only used with external thunk so that it -- can be properly updated to support CET at run-time. */ -- need_prefix = indirect_thunk_prefix_nt; -- } -- else -- need_prefix = indirect_thunk_prefix_none; -- return need_prefix; --} - --/* Fills in the label name that should be used for the indirect thunk. */ -+ /* Step 3: the loop - --static void --indirect_thunk_name (char name[32], unsigned int regno, -- enum indirect_thunk_prefix need_prefix, -- bool ret_p) --{ -- if (regno != INVALID_REGNUM && regno != CX_REG && ret_p) -- gcc_unreachable (); -+ do -+ { -+ SP = SP + PROBE_INTERVAL -+ probe at SP -+ } -+ while (SP != LAST_ADDR) - -- if (USE_HIDDEN_LINKONCE) -- { -- const char *prefix; -+ adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for -+ values of N from 1 until it is equal to ROUNDED_SIZE. */ - -- if (need_prefix == indirect_thunk_prefix_nt -- && regno != INVALID_REGNUM) -- { -- /* NOTRACK prefix is only used with external thunk via -- register so that NOTRACK prefix can be added to indirect -- branch via register to support CET at run-time. */ -- prefix = "_nt"; -- } -- else -- prefix = ""; -+ emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx)); - -- const char *ret = ret_p ? "return" : "indirect"; - -- if (regno != INVALID_REGNUM) -+ /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot -+ assert at compile-time that SIZE is equal to ROUNDED_SIZE. */ -+ -+ if (size != rounded_size) - { -- const char *reg_prefix; -- if (LEGACY_INT_REGNO_P (regno)) -- reg_prefix = TARGET_64BIT ? "r" : "e"; -- else -- reg_prefix = ""; -- sprintf (name, "__x86_%s_thunk%s_%s%s", -- ret, prefix, reg_prefix, reg_names[regno]); -+ emit_insn (gen_rtx_SET (stack_pointer_rtx, -+ plus_constant (Pmode, stack_pointer_rtx, -+ rounded_size - size))); -+ emit_stack_probe (stack_pointer_rtx); - } -- else -- sprintf (name, "__x86_%s_thunk%s", ret, prefix); -+ -+ /* Adjust back to account for the additional first interval. */ -+ last = emit_insn (gen_rtx_SET (stack_pointer_rtx, -+ plus_constant (Pmode, stack_pointer_rtx, -+ (get_probe_interval () -+ + dope)))); -+ -+ /* This does not deallocate the space reserved for the scratch -+ register. That will be deallocated in the epilogue. */ -+ release_scratch_register_on_entry (&sr, size, false); - } -- else -+ -+ /* Even if the stack pointer isn't the CFA register, we need to correctly -+ describe the adjustments made to it, in particular differentiate the -+ frame-related ones from the frame-unrelated ones. */ -+ if (size > 0) - { -- if (regno != INVALID_REGNUM) -- ASM_GENERATE_INTERNAL_LABEL (name, "LITR", regno); -- else -- { -- if (ret_p) -- ASM_GENERATE_INTERNAL_LABEL (name, "LRT", 0); -- else -- ASM_GENERATE_INTERNAL_LABEL (name, "LIT", 0); -- } -+ rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2)); -+ XVECEXP (expr, 0, 0) -+ = gen_rtx_SET (stack_pointer_rtx, -+ plus_constant (Pmode, stack_pointer_rtx, -size)); -+ XVECEXP (expr, 0, 1) -+ = gen_rtx_SET (stack_pointer_rtx, -+ plus_constant (Pmode, stack_pointer_rtx, -+ get_probe_interval () + dope + size)); -+ add_reg_note (last, REG_FRAME_RELATED_EXPR, expr); -+ RTX_FRAME_RELATED_P (last) = 1; -+ -+ cfun->machine->fs.sp_offset += size; - } -+ -+ /* Make sure nothing is scheduled before we are done. */ -+ emit_insn (gen_blockage ()); - } - --/* Output a call and return thunk for indirect branch. If REGNO != -1, -- the function address is in REGNO and the call and return thunk looks like: -+/* Adjust the stack pointer up to REG while probing it. */ - -- call L2 -- L1: -- pause -- lfence -- jmp L1 -- L2: -- mov %REG, (%sp) -- ret -+const char * -+output_adjust_stack_and_probe (rtx reg) -+{ -+ static int labelno = 0; -+ char loop_lab[32]; -+ rtx xops[2]; - -- Otherwise, the function address is on the top of stack and the -- call and return thunk looks like: -+ ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++); - -- call L2 -- L1: -- pause -- lfence -- jmp L1 -- L2: -- lea WORD_SIZE(%sp), %sp -- ret -- */ -+ /* Loop. */ -+ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab); - --static void --output_indirect_thunk (unsigned int regno) --{ -- char indirectlabel1[32]; -- char indirectlabel2[32]; -+ /* SP = SP + PROBE_INTERVAL. */ -+ xops[0] = stack_pointer_rtx; -+ xops[1] = GEN_INT (get_probe_interval ()); -+ output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops); - -- ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, INDIRECT_LABEL, -- indirectlabelno++); -- ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, INDIRECT_LABEL, -- indirectlabelno++); -+ /* Probe at SP. */ -+ xops[1] = const0_rtx; -+ output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops); - -- /* Call */ -- fputs ("\tcall\t", asm_out_file); -- assemble_name_raw (asm_out_file, indirectlabel2); -- fputc ('\n', asm_out_file); -+ /* Test if SP == LAST_ADDR. */ -+ xops[0] = stack_pointer_rtx; -+ xops[1] = reg; -+ output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops); - -- ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1); -+ /* Branch. */ -+ fputs ("\tjne\t", asm_out_file); -+ assemble_name_raw (asm_out_file, loop_lab); -+ fputc ('\n', asm_out_file); - -- /* AMD and Intel CPUs prefer each a different instruction as loop filler. -- Usage of both pause + lfence is compromise solution. */ -- fprintf (asm_out_file, "\tpause\n\tlfence\n"); -+ return ""; -+} - -- /* Jump. */ -- fputs ("\tjmp\t", asm_out_file); -- assemble_name_raw (asm_out_file, indirectlabel1); -- fputc ('\n', asm_out_file); -+/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE, -+ inclusive. These are offsets from the current stack pointer. - -- ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2); -+ INT_REGISTERS_SAVED is true if integer registers have already been -+ pushed on the stack. */ - -- /* The above call insn pushed a word to stack. Adjust CFI info. */ -- if (flag_asynchronous_unwind_tables && dwarf2out_do_frame ()) -+static void -+ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size, -+ const bool int_registers_saved) -+{ -+ /* See if we have a constant small number of probes to generate. If so, -+ that's the easy case. The run-time loop is made up of 6 insns in the -+ generic case while the compile-time loop is made up of n insns for n # -+ of intervals. */ -+ if (size <= 6 * get_probe_interval ()) - { -- if (! dwarf2out_do_cfi_asm ()) -- { -- dw_cfi_ref xcfi = ggc_cleared_alloc (); -- xcfi->dw_cfi_opc = DW_CFA_advance_loc4; -- xcfi->dw_cfi_oprnd1.dw_cfi_addr = ggc_strdup (indirectlabel2); -- vec_safe_push (cfun->fde->dw_fde_cfi, xcfi); -- } -- dw_cfi_ref xcfi = ggc_cleared_alloc (); -- xcfi->dw_cfi_opc = DW_CFA_def_cfa_offset; -- xcfi->dw_cfi_oprnd1.dw_cfi_offset = 2 * UNITS_PER_WORD; -- vec_safe_push (cfun->fde->dw_fde_cfi, xcfi); -- dwarf2out_emit_cfi (xcfi); -- } -+ HOST_WIDE_INT i; - -- if (regno != INVALID_REGNUM) -- { -- /* MOV. */ -- rtx xops[2]; -- xops[0] = gen_rtx_MEM (word_mode, stack_pointer_rtx); -- xops[1] = gen_rtx_REG (word_mode, regno); -- output_asm_insn ("mov\t{%1, %0|%0, %1}", xops); -+ /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until -+ it exceeds SIZE. If only one probe is needed, this will not -+ generate any code. Then probe at FIRST + SIZE. */ -+ for (i = get_probe_interval (); i < size; i += get_probe_interval ()) -+ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, -+ -(first + i))); -+ -+ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, -+ -(first + size))); - } -+ -+ /* Otherwise, do the same as above, but in a loop. Note that we must be -+ extra careful with variables wrapping around because we might be at -+ the very top (or the very bottom) of the address space and we have -+ to be able to handle this case properly; in particular, we use an -+ equality test for the loop condition. */ - else - { -- /* LEA. */ -- rtx xops[2]; -- xops[0] = stack_pointer_rtx; -- xops[1] = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); -- output_asm_insn ("lea\t{%E1, %0|%0, %E1}", xops); -- } -- -- fputs ("\tret\n", asm_out_file); --} -+ /* We expect the GP registers to be saved when probes are used -+ as the probing sequences might need a scratch register and -+ the routine to allocate one assumes the integer registers -+ have already been saved. */ -+ gcc_assert (int_registers_saved); - --/* Output a funtion with a call and return thunk for indirect branch. -- If REGNO != INVALID_REGNUM, the function address is in REGNO. -- Otherwise, the function address is on the top of stack. Thunk is -- used for function return if RET_P is true. */ -+ HOST_WIDE_INT rounded_size, last; -+ struct scratch_reg sr; - --static void --output_indirect_thunk_function (enum indirect_thunk_prefix need_prefix, -- unsigned int regno, bool ret_p) --{ -- char name[32]; -- tree decl; -+ get_scratch_register_on_entry (&sr); - -- /* Create __x86_indirect_thunk. */ -- indirect_thunk_name (name, regno, need_prefix, ret_p); -- decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, -- get_identifier (name), -- build_function_type_list (void_type_node, NULL_TREE)); -- DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL, -- NULL_TREE, void_type_node); -- TREE_PUBLIC (decl) = 1; -- TREE_STATIC (decl) = 1; -- DECL_IGNORED_P (decl) = 1; - --#if TARGET_MACHO -- if (TARGET_MACHO) -- { -- switch_to_section (darwin_sections[picbase_thunk_section]); -- fputs ("\t.weak_definition\t", asm_out_file); -- assemble_name (asm_out_file, name); -- fputs ("\n\t.private_extern\t", asm_out_file); -- assemble_name (asm_out_file, name); -- putc ('\n', asm_out_file); -- ASM_OUTPUT_LABEL (asm_out_file, name); -- DECL_WEAK (decl) = 1; -- } -- else --#endif -- if (USE_HIDDEN_LINKONCE) -- { -- cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl)); -+ /* Step 1: round SIZE to the previous multiple of the interval. */ - -- targetm.asm_out.unique_section (decl, 0); -- switch_to_section (get_named_section (decl, NULL, 0)); -+ rounded_size = ROUND_DOWN (size, get_probe_interval ()); - -- targetm.asm_out.globalize_label (asm_out_file, name); -- fputs ("\t.hidden\t", asm_out_file); -- assemble_name (asm_out_file, name); -- putc ('\n', asm_out_file); -- ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl); -- } -- else -- { -- switch_to_section (text_section); -- ASM_OUTPUT_LABEL (asm_out_file, name); -- } - -- DECL_INITIAL (decl) = make_node (BLOCK); -- current_function_decl = decl; -- allocate_struct_function (decl, false); -- init_function_start (decl); -- /* We're about to hide the function body from callees of final_* by -- emitting it directly; tell them we're a thunk, if they care. */ -- cfun->is_thunk = true; -- first_function_block_is_cold = false; -- /* Make sure unwind info is emitted for the thunk if needed. */ -- final_start_function (emit_barrier (), asm_out_file, 1); -+ /* Step 2: compute initial and final value of the loop counter. */ - -- output_indirect_thunk (regno); -+ /* TEST_OFFSET = FIRST. */ -+ emit_move_insn (sr.reg, GEN_INT (-first)); - -- final_end_function (); -- init_insn_lengths (); -- free_after_compilation (cfun); -- set_cfun (NULL); -- current_function_decl = NULL; --} -+ /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */ -+ last = first + rounded_size; - --static int pic_labels_used; - --/* Fills in the label name that should be used for a pc thunk for -- the given register. */ -+ /* Step 3: the loop - --static void --get_pc_thunk_name (char name[32], unsigned int regno) --{ -- gcc_assert (!TARGET_64BIT); -+ do -+ { -+ TEST_ADDR = TEST_ADDR + PROBE_INTERVAL -+ probe at TEST_ADDR -+ } -+ while (TEST_ADDR != LAST_ADDR) - -- if (USE_HIDDEN_LINKONCE) -- sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]); -- else -- ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno); --} -+ probes at FIRST + N * PROBE_INTERVAL for values of N from 1 -+ until it is equal to ROUNDED_SIZE. */ - -+ emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last))); - --/* This function generates code for -fpic that loads %ebx with -- the return address of the caller and then returns. */ - --static void --ix86_code_end (void) --{ -- rtx xops[2]; -- unsigned int regno; -+ /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time -+ that SIZE is equal to ROUNDED_SIZE. */ - -- if (indirect_return_needed) -- output_indirect_thunk_function (indirect_thunk_prefix_none, -- INVALID_REGNUM, true); -- if (indirect_return_via_cx) -- output_indirect_thunk_function (indirect_thunk_prefix_none, -- CX_REG, true); -- if (indirect_thunk_needed) -- output_indirect_thunk_function (indirect_thunk_prefix_none, -- INVALID_REGNUM, false); -+ if (size != rounded_size) -+ emit_stack_probe (plus_constant (Pmode, -+ gen_rtx_PLUS (Pmode, -+ stack_pointer_rtx, -+ sr.reg), -+ rounded_size - size)); - -- for (regno = FIRST_REX_INT_REG; regno <= LAST_REX_INT_REG; regno++) -- { -- unsigned int i = regno - FIRST_REX_INT_REG + LAST_INT_REG + 1; -- if ((indirect_thunks_used & (1 << i))) -- output_indirect_thunk_function (indirect_thunk_prefix_none, -- regno, false); -+ release_scratch_register_on_entry (&sr, size, true); - } - -- for (regno = FIRST_INT_REG; regno <= LAST_INT_REG; regno++) -- { -- char name[32]; -- tree decl; -+ /* Make sure nothing is scheduled before we are done. */ -+ emit_insn (gen_blockage ()); -+} - -- if ((indirect_thunks_used & (1 << regno))) -- output_indirect_thunk_function (indirect_thunk_prefix_none, -- regno, false); -- -- if (!(pic_labels_used & (1 << regno))) -- continue; -- -- get_pc_thunk_name (name, regno); -- -- decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, -- get_identifier (name), -- build_function_type_list (void_type_node, NULL_TREE)); -- DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL, -- NULL_TREE, void_type_node); -- TREE_PUBLIC (decl) = 1; -- TREE_STATIC (decl) = 1; -- DECL_IGNORED_P (decl) = 1; -- --#if TARGET_MACHO -- if (TARGET_MACHO) -- { -- switch_to_section (darwin_sections[picbase_thunk_section]); -- fputs ("\t.weak_definition\t", asm_out_file); -- assemble_name (asm_out_file, name); -- fputs ("\n\t.private_extern\t", asm_out_file); -- assemble_name (asm_out_file, name); -- putc ('\n', asm_out_file); -- ASM_OUTPUT_LABEL (asm_out_file, name); -- DECL_WEAK (decl) = 1; -- } -- else --#endif -- if (USE_HIDDEN_LINKONCE) -- { -- cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl)); -- -- targetm.asm_out.unique_section (decl, 0); -- switch_to_section (get_named_section (decl, NULL, 0)); -- -- targetm.asm_out.globalize_label (asm_out_file, name); -- fputs ("\t.hidden\t", asm_out_file); -- assemble_name (asm_out_file, name); -- putc ('\n', asm_out_file); -- ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl); -- } -- else -- { -- switch_to_section (text_section); -- ASM_OUTPUT_LABEL (asm_out_file, name); -- } -- -- DECL_INITIAL (decl) = make_node (BLOCK); -- current_function_decl = decl; -- allocate_struct_function (decl, false); -- init_function_start (decl); -- /* We're about to hide the function body from callees of final_* by -- emitting it directly; tell them we're a thunk, if they care. */ -- cfun->is_thunk = true; -- first_function_block_is_cold = false; -- /* Make sure unwind info is emitted for the thunk if needed. */ -- final_start_function (emit_barrier (), asm_out_file, 1); -- -- /* Pad stack IP move with 4 instructions (two NOPs count -- as one instruction). */ -- if (TARGET_PAD_SHORT_FUNCTION) -- { -- int i = 8; -- -- while (i--) -- fputs ("\tnop\n", asm_out_file); -- } -- -- xops[0] = gen_rtx_REG (Pmode, regno); -- xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx); -- output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops); -- output_asm_insn ("%!ret", NULL); -- final_end_function (); -- init_insn_lengths (); -- free_after_compilation (cfun); -- set_cfun (NULL); -- current_function_decl = NULL; -- } -- -- if (flag_split_stack) -- file_end_indicate_split_stack (); --} -- --/* Emit code for the SET_GOT patterns. */ -+/* Probe a range of stack addresses from REG to END, inclusive. These are -+ offsets from the current stack pointer. */ - - const char * --output_set_got (rtx dest, rtx label) -+output_probe_stack_range (rtx reg, rtx end) - { -+ static int labelno = 0; -+ char loop_lab[32]; - rtx xops[3]; - -- xops[0] = dest; -- -- if (TARGET_VXWORKS_RTP && flag_pic) -- { -- /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */ -- xops[2] = gen_rtx_MEM (Pmode, -- gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE)); -- output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops); -- -- /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register. -- Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as -- an unadorned address. */ -- xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX); -- SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL; -- output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops); -- return ""; -- } -- -- xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME); -- -- if (flag_pic) -- { -- char name[32]; -- get_pc_thunk_name (name, REGNO (dest)); -- pic_labels_used |= 1 << REGNO (dest); -+ ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++); - -- xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name)); -- xops[2] = gen_rtx_MEM (QImode, xops[2]); -- output_asm_insn ("%!call\t%X2", xops); -+ /* Loop. */ -+ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab); - --#if TARGET_MACHO -- /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here. -- This is what will be referenced by the Mach-O PIC subsystem. */ -- if (machopic_should_output_picbase_label () || !label) -- ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME); -+ /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */ -+ xops[0] = reg; -+ xops[1] = GEN_INT (get_probe_interval ()); -+ output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops); - -- /* When we are restoring the pic base at the site of a nonlocal label, -- and we decided to emit the pic base above, we will still output a -- local label used for calculating the correction offset (even though -- the offset will be 0 in that case). */ -- if (label) -- targetm.asm_out.internal_label (asm_out_file, "L", -- CODE_LABEL_NUMBER (label)); --#endif -- } -- else -- { -- if (TARGET_MACHO) -- /* We don't need a pic base, we're not producing pic. */ -- gcc_unreachable (); -+ /* Probe at TEST_ADDR. */ -+ xops[0] = stack_pointer_rtx; -+ xops[1] = reg; -+ xops[2] = const0_rtx; -+ output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops); - -- xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ()); -- output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops); -- targetm.asm_out.internal_label (asm_out_file, "L", -- CODE_LABEL_NUMBER (XEXP (xops[2], 0))); -- } -+ /* Test if TEST_ADDR == LAST_ADDR. */ -+ xops[0] = reg; -+ xops[1] = end; -+ output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops); - -- if (!TARGET_MACHO) -- output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops); -+ /* Branch. */ -+ fputs ("\tjne\t", asm_out_file); -+ assemble_name_raw (asm_out_file, loop_lab); -+ fputc ('\n', asm_out_file); - - return ""; - } - --/* Generate an "push" pattern for input ARG. */ -+/* Return true if stack frame is required. Update STACK_ALIGNMENT -+ to the largest alignment, in bits, of stack slot used if stack -+ frame is required and CHECK_STACK_SLOT is true. */ - --static rtx --gen_push (rtx arg) -+static bool -+ix86_find_max_used_stack_alignment (unsigned int &stack_alignment, -+ bool check_stack_slot) - { -- struct machine_function *m = cfun->machine; -+ HARD_REG_SET set_up_by_prologue, prologue_used; -+ basic_block bb; - -- if (m->fs.cfa_reg == stack_pointer_rtx) -- m->fs.cfa_offset += UNITS_PER_WORD; -- m->fs.sp_offset += UNITS_PER_WORD; -+ CLEAR_HARD_REG_SET (prologue_used); -+ CLEAR_HARD_REG_SET (set_up_by_prologue); -+ add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM); -+ add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM); -+ add_to_hard_reg_set (&set_up_by_prologue, Pmode, -+ HARD_FRAME_POINTER_REGNUM); - -- if (REG_P (arg) && GET_MODE (arg) != word_mode) -- arg = gen_rtx_REG (word_mode, REGNO (arg)); -+ /* The preferred stack alignment is the minimum stack alignment. */ -+ if (stack_alignment > crtl->preferred_stack_boundary) -+ stack_alignment = crtl->preferred_stack_boundary; - -- return gen_rtx_SET (gen_rtx_MEM (word_mode, -- gen_rtx_PRE_DEC (Pmode, -- stack_pointer_rtx)), -- arg); --} -+ bool require_stack_frame = false; - --/* Generate an "pop" pattern for input ARG. */ -+ FOR_EACH_BB_FN (bb, cfun) -+ { -+ rtx_insn *insn; -+ FOR_BB_INSNS (bb, insn) -+ if (NONDEBUG_INSN_P (insn) -+ && requires_stack_frame_p (insn, prologue_used, -+ set_up_by_prologue)) -+ { -+ require_stack_frame = true; - --static rtx --gen_pop (rtx arg) --{ -- if (REG_P (arg) && GET_MODE (arg) != word_mode) -- arg = gen_rtx_REG (word_mode, REGNO (arg)); -+ if (check_stack_slot) -+ { -+ /* Find the maximum stack alignment. */ -+ subrtx_iterator::array_type array; -+ FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL) -+ if (MEM_P (*iter) -+ && (reg_mentioned_p (stack_pointer_rtx, -+ *iter) -+ || reg_mentioned_p (frame_pointer_rtx, -+ *iter))) -+ { -+ unsigned int alignment = MEM_ALIGN (*iter); -+ if (alignment > stack_alignment) -+ stack_alignment = alignment; -+ } -+ } -+ } -+ } - -- return gen_rtx_SET (arg, -- gen_rtx_MEM (word_mode, -- gen_rtx_POST_INC (Pmode, -- stack_pointer_rtx))); -+ return require_stack_frame; - } - --/* Return >= 0 if there is an unused call-clobbered register available -- for the entire function. */ -+/* Finalize stack_realign_needed and frame_pointer_needed flags, which -+ will guide prologue/epilogue to be generated in correct form. */ - --static unsigned int --ix86_select_alt_pic_regnum (void) -+static void -+ix86_finalize_stack_frame_flags (void) - { -- if (ix86_use_pseudo_pic_reg ()) -- return INVALID_REGNUM; -+ /* Check if stack realign is really needed after reload, and -+ stores result in cfun */ -+ unsigned int incoming_stack_boundary -+ = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary -+ ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary); -+ unsigned int stack_alignment -+ = (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor -+ ? crtl->max_used_stack_slot_alignment -+ : crtl->stack_alignment_needed); -+ unsigned int stack_realign -+ = (incoming_stack_boundary < stack_alignment); -+ bool recompute_frame_layout_p = false; - -- if (crtl->is_leaf -- && !crtl->profile -- && !ix86_current_function_calls_tls_descriptor) -+ if (crtl->stack_realign_finalized) - { -- int i, drap; -- /* Can't use the same register for both PIC and DRAP. */ -- if (crtl->drap_reg) -- drap = REGNO (crtl->drap_reg); -- else -- drap = -1; -- for (i = 2; i >= 0; --i) -- if (i != drap && !df_regs_ever_live_p (i)) -- return i; -+ /* After stack_realign_needed is finalized, we can't no longer -+ change it. */ -+ gcc_assert (crtl->stack_realign_needed == stack_realign); -+ return; - } - -- return INVALID_REGNUM; --} -- --/* Return true if REGNO is used by the epilogue. */ -+ /* If the only reason for frame_pointer_needed is that we conservatively -+ assumed stack realignment might be needed or -fno-omit-frame-pointer -+ is used, but in the end nothing that needed the stack alignment had -+ been spilled nor stack access, clear frame_pointer_needed and say we -+ don't need stack realignment. */ -+ if ((stack_realign || (!flag_omit_frame_pointer && optimize)) -+ && frame_pointer_needed -+ && crtl->is_leaf -+ && crtl->sp_is_unchanging -+ && !ix86_current_function_calls_tls_descriptor -+ && !crtl->accesses_prior_frames -+ && !cfun->calls_alloca -+ && !crtl->calls_eh_return -+ /* See ira_setup_eliminable_regset for the rationale. */ -+ && !(STACK_CHECK_MOVING_SP -+ && flag_stack_check -+ && flag_exceptions -+ && cfun->can_throw_non_call_exceptions) -+ && !ix86_frame_pointer_required () -+ && get_frame_size () == 0 -+ && ix86_nsaved_sseregs () == 0 -+ && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0) -+ { -+ if (ix86_find_max_used_stack_alignment (stack_alignment, -+ stack_realign)) -+ { -+ /* Stack frame is required. If stack alignment needed is less -+ than incoming stack boundary, don't realign stack. */ -+ stack_realign = incoming_stack_boundary < stack_alignment; -+ if (!stack_realign) -+ { -+ crtl->max_used_stack_slot_alignment -+ = incoming_stack_boundary; -+ crtl->stack_alignment_needed -+ = incoming_stack_boundary; -+ /* Also update preferred_stack_boundary for leaf -+ functions. */ -+ crtl->preferred_stack_boundary -+ = incoming_stack_boundary; -+ } -+ } -+ else -+ { -+ /* If drap has been set, but it actually isn't live at the -+ start of the function, there is no reason to set it up. */ -+ if (crtl->drap_reg) -+ { -+ basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb; -+ if (! REGNO_REG_SET_P (DF_LR_IN (bb), -+ REGNO (crtl->drap_reg))) -+ { -+ crtl->drap_reg = NULL_RTX; -+ crtl->need_drap = false; -+ } -+ } -+ else -+ cfun->machine->no_drap_save_restore = true; - --bool --ix86_epilogue_uses (int regno) --{ -- /* If there are no caller-saved registers, we preserve all registers, -- except for MMX and x87 registers which aren't supported when saving -- and restoring registers. Don't explicitly save SP register since -- it is always preserved. */ -- return (epilogue_completed -- && cfun->machine->no_caller_saved_registers -- && !fixed_regs[regno] -- && !STACK_REGNO_P (regno) -- && !MMX_REGNO_P (regno)); --} -+ frame_pointer_needed = false; -+ stack_realign = false; -+ crtl->max_used_stack_slot_alignment = incoming_stack_boundary; -+ crtl->stack_alignment_needed = incoming_stack_boundary; -+ crtl->stack_alignment_estimated = incoming_stack_boundary; -+ if (crtl->preferred_stack_boundary > incoming_stack_boundary) -+ crtl->preferred_stack_boundary = incoming_stack_boundary; -+ df_finish_pass (true); -+ df_scan_alloc (NULL); -+ df_scan_blocks (); -+ df_compute_regs_ever_live (true); -+ df_analyze (); - --/* Return nonzero if register REGNO can be used as a scratch register -- in peephole2. */ -+ if (flag_var_tracking) -+ { -+ /* Since frame pointer is no longer available, replace it with -+ stack pointer - UNITS_PER_WORD in debug insns. */ -+ df_ref ref, next; -+ for (ref = DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM); -+ ref; ref = next) -+ { -+ next = DF_REF_NEXT_REG (ref); -+ if (!DF_REF_INSN_INFO (ref)) -+ continue; - --static bool --ix86_hard_regno_scratch_ok (unsigned int regno) --{ -- /* If there are no caller-saved registers, we can't use any register -- as a scratch register after epilogue and use REGNO as scratch -- register only if it has been used before to avoid saving and -- restoring it. */ -- return (!cfun->machine->no_caller_saved_registers -- || (!epilogue_completed -- && df_regs_ever_live_p (regno))); --} -+ /* Make sure the next ref is for a different instruction, -+ so that we're not affected by the rescan. */ -+ rtx_insn *insn = DF_REF_INSN (ref); -+ while (next && DF_REF_INSN (next) == insn) -+ next = DF_REF_NEXT_REG (next); - --/* Return TRUE if we need to save REGNO. */ -+ if (DEBUG_INSN_P (insn)) -+ { -+ bool changed = false; -+ for (; ref != next; ref = DF_REF_NEXT_REG (ref)) -+ { -+ rtx *loc = DF_REF_LOC (ref); -+ if (*loc == hard_frame_pointer_rtx) -+ { -+ *loc = plus_constant (Pmode, -+ stack_pointer_rtx, -+ -UNITS_PER_WORD); -+ changed = true; -+ } -+ } -+ if (changed) -+ df_insn_rescan (insn); -+ } -+ } -+ } - --static bool --ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined) --{ -- /* If there are no caller-saved registers, we preserve all registers, -- except for MMX and x87 registers which aren't supported when saving -- and restoring registers. Don't explicitly save SP register since -- it is always preserved. */ -- if (cfun->machine->no_caller_saved_registers) -- { -- /* Don't preserve registers used for function return value. */ -- rtx reg = crtl->return_rtx; -- if (reg) -- { -- unsigned int i = REGNO (reg); -- unsigned int nregs = REG_NREGS (reg); -- while (nregs-- > 0) -- if ((i + nregs) == regno) -- return false; -+ recompute_frame_layout_p = true; - } -- -- return (df_regs_ever_live_p (regno) -- && !fixed_regs[regno] -- && !STACK_REGNO_P (regno) -- && !MMX_REGNO_P (regno) -- && (regno != HARD_FRAME_POINTER_REGNUM -- || !frame_pointer_needed)); - } -- -- if (regno == REAL_PIC_OFFSET_TABLE_REGNUM -- && pic_offset_table_rtx) -+ else if (crtl->max_used_stack_slot_alignment >= 128) - { -- if (ix86_use_pseudo_pic_reg ()) -- { -- /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to -- _mcount in prologue. */ -- if (!TARGET_64BIT && flag_pic && crtl->profile) -- return true; -- } -- else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM) -- || crtl->profile -- || crtl->calls_eh_return -- || crtl->uses_const_pool -- || cfun->has_nonlocal_label) -- return ix86_select_alt_pic_regnum () == INVALID_REGNUM; -+ /* We don't need to realign stack. max_used_stack_alignment is -+ used to decide how stack frame should be aligned. This is -+ independent of any psABIs nor 32-bit vs 64-bit. It is always -+ safe to compute max_used_stack_alignment. We compute it only -+ if 128-bit aligned load/store may be generated on misaligned -+ stack slot which will lead to segfault. */ -+ if (ix86_find_max_used_stack_alignment (stack_alignment, true)) -+ cfun->machine->max_used_stack_alignment -+ = stack_alignment / BITS_PER_UNIT; - } - -- if (crtl->calls_eh_return && maybe_eh_return) -+ if (crtl->stack_realign_needed != stack_realign) -+ recompute_frame_layout_p = true; -+ crtl->stack_realign_needed = stack_realign; -+ crtl->stack_realign_finalized = true; -+ if (recompute_frame_layout_p) -+ ix86_compute_frame_layout (); -+} -+ -+/* Delete SET_GOT right after entry block if it is allocated to reg. */ -+ -+static void -+ix86_elim_entry_set_got (rtx reg) -+{ -+ basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb; -+ rtx_insn *c_insn = BB_HEAD (bb); -+ if (!NONDEBUG_INSN_P (c_insn)) -+ c_insn = next_nonnote_nondebug_insn (c_insn); -+ if (c_insn && NONJUMP_INSN_P (c_insn)) - { -- unsigned i; -- for (i = 0; ; i++) -+ rtx pat = PATTERN (c_insn); -+ if (GET_CODE (pat) == PARALLEL) - { -- unsigned test = EH_RETURN_DATA_REGNO (i); -- if (test == INVALID_REGNUM) -- break; -- if (test == regno) -- return true; -+ rtx vec = XVECEXP (pat, 0, 0); -+ if (GET_CODE (vec) == SET -+ && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT -+ && REGNO (XEXP (vec, 0)) == REGNO (reg)) -+ delete_insn (c_insn); - } - } -- -- if (ignore_outlined && cfun->machine->call_ms2sysv) -- { -- unsigned count = cfun->machine->call_ms2sysv_extra_regs -- + xlogue_layout::MIN_REGS; -- if (xlogue_layout::is_stub_managed_reg (regno, count)) -- return false; -- } -- -- if (crtl->drap_reg -- && regno == REGNO (crtl->drap_reg) -- && !cfun->machine->no_drap_save_restore) -- return true; -- -- return (df_regs_ever_live_p (regno) -- && !call_used_regs[regno] -- && !fixed_regs[regno] -- && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed)); - } - --/* Return number of saved general prupose registers. */ -- --static int --ix86_nsaved_regs (void) -+static rtx -+gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store) - { -- int nregs = 0; -- int regno; -+ rtx addr, mem; - -- for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) -- if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true)) -- nregs ++; -- return nregs; -+ if (offset) -+ addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset)); -+ mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg); -+ return gen_rtx_SET (store ? mem : reg, store ? reg : mem); - } - --/* Return number of saved SSE registers. */ -- --static int --ix86_nsaved_sseregs (void) -+static inline rtx -+gen_frame_load (rtx reg, rtx frame_reg, int offset) - { -- int nregs = 0; -- int regno; -- -- if (!TARGET_64BIT_MS_ABI) -- return 0; -- for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) -- if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true)) -- nregs ++; -- return nregs; -+ return gen_frame_set (reg, frame_reg, offset, false); - } - --/* Given FROM and TO register numbers, say whether this elimination is -- allowed. If stack alignment is needed, we can only replace argument -- pointer with hard frame pointer, or replace frame pointer with stack -- pointer. Otherwise, frame pointer elimination is automatically -- handled and all other eliminations are valid. */ -- --static bool --ix86_can_eliminate (const int from, const int to) -+static inline rtx -+gen_frame_store (rtx reg, rtx frame_reg, int offset) - { -- if (stack_realign_fp) -- return ((from == ARG_POINTER_REGNUM -- && to == HARD_FRAME_POINTER_REGNUM) -- || (from == FRAME_POINTER_REGNUM -- && to == STACK_POINTER_REGNUM)); -- else -- return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true; -+ return gen_frame_set (reg, frame_reg, offset, true); - } - --/* Return the offset between two registers, one to be eliminated, and the other -- its replacement, at the start of a routine. */ -- --HOST_WIDE_INT --ix86_initial_elimination_offset (int from, int to) -+static void -+ix86_emit_outlined_ms2sysv_save (const struct ix86_frame &frame) - { -- struct ix86_frame &frame = cfun->machine->frame; -+ struct machine_function *m = cfun->machine; -+ const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS -+ + m->call_ms2sysv_extra_regs; -+ rtvec v = rtvec_alloc (ncregs + 1); -+ unsigned int align, i, vi = 0; -+ rtx_insn *insn; -+ rtx sym, addr; -+ rtx rax = gen_rtx_REG (word_mode, AX_REG); -+ const struct xlogue_layout &xlogue = xlogue_layout::get_instance (); - -- if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) -- return frame.hard_frame_pointer_offset; -- else if (from == FRAME_POINTER_REGNUM -- && to == HARD_FRAME_POINTER_REGNUM) -- return frame.hard_frame_pointer_offset - frame.frame_pointer_offset; -- else -- { -- gcc_assert (to == STACK_POINTER_REGNUM); -+ /* AL should only be live with sysv_abi. */ -+ gcc_assert (!ix86_eax_live_at_start_p ()); -+ gcc_assert (m->fs.sp_offset >= frame.sse_reg_save_offset); - -- if (from == ARG_POINTER_REGNUM) -- return frame.stack_pointer_offset; -+ /* Setup RAX as the stub's base pointer. We use stack_realign_offset rather -+ we've actually realigned the stack or not. */ -+ align = GET_MODE_ALIGNMENT (V4SFmode); -+ addr = choose_baseaddr (frame.stack_realign_offset -+ + xlogue.get_stub_ptr_offset (), &align, AX_REG); -+ gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode)); - -- gcc_assert (from == FRAME_POINTER_REGNUM); -- return frame.stack_pointer_offset - frame.frame_pointer_offset; -- } --} -+ emit_insn (gen_rtx_SET (rax, addr)); - --/* In a dynamically-aligned function, we can't know the offset from -- stack pointer to frame pointer, so we must ensure that setjmp -- eliminates fp against the hard fp (%ebp) rather than trying to -- index from %esp up to the top of the frame across a gap that is -- of unknown (at compile-time) size. */ --static rtx --ix86_builtin_setjmp_frame_value (void) --{ -- return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx; --} -+ /* Get the stub symbol. */ -+ sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP -+ : XLOGUE_STUB_SAVE); -+ RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym); - --/* Emits a warning for unsupported msabi to sysv pro/epilogues. */ --static void warn_once_call_ms2sysv_xlogues (const char *feature) --{ -- static bool warned_once = false; -- if (!warned_once) -+ for (i = 0; i < ncregs; ++i) - { -- warning (0, "%<-mcall-ms2sysv-xlogues%> is not compatible with %s", -- feature); -- warned_once = true; -+ const xlogue_layout::reginfo &r = xlogue.get_reginfo (i); -+ rtx reg = gen_rtx_REG ((SSE_REGNO_P (r.regno) ? V4SFmode : word_mode), -+ r.regno); -+ RTVEC_ELT (v, vi++) = gen_frame_store (reg, rax, -r.offset); - } --} - --/* Return the probing interval for -fstack-clash-protection. */ -+ gcc_assert (vi == (unsigned)GET_NUM_ELEM (v)); - --static HOST_WIDE_INT --get_probe_interval (void) --{ -- if (flag_stack_clash_protection) -- return (HOST_WIDE_INT_1U -- << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL)); -- else -- return (HOST_WIDE_INT_1U << STACK_CHECK_PROBE_INTERVAL_EXP); -+ insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v)); -+ RTX_FRAME_RELATED_P (insn) = true; - } - --/* When using -fsplit-stack, the allocation routines set a field in -- the TCB to the bottom of the stack plus this much space, measured -- in bytes. */ -- --#define SPLIT_STACK_AVAILABLE 256 -- --/* Fill structure ix86_frame about frame of currently computed function. */ -+/* Expand the prologue into a bunch of separate insns. */ - --static void --ix86_compute_frame_layout (void) -+void -+ix86_expand_prologue (void) - { -- struct ix86_frame *frame = &cfun->machine->frame; - struct machine_function *m = cfun->machine; -- unsigned HOST_WIDE_INT stack_alignment_needed; -- HOST_WIDE_INT offset; -- unsigned HOST_WIDE_INT preferred_alignment; -- HOST_WIDE_INT size = get_frame_size (); -- HOST_WIDE_INT to_allocate; -+ rtx insn, t; -+ HOST_WIDE_INT allocate; -+ bool int_registers_saved; -+ bool sse_registers_saved; -+ bool save_stub_call_needed; -+ rtx static_chain = NULL_RTX; - -- /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit -- * ms_abi functions that call a sysv function. We now need to prune away -- * cases where it should be disabled. */ -- if (TARGET_64BIT && m->call_ms2sysv) -- { -- gcc_assert (TARGET_64BIT_MS_ABI); -- gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES); -- gcc_assert (!TARGET_SEH); -- gcc_assert (TARGET_SSE); -- gcc_assert (!ix86_using_red_zone ()); -+ if (ix86_function_naked (current_function_decl)) -+ return; - -- if (crtl->calls_eh_return) -- { -- gcc_assert (!reload_completed); -- m->call_ms2sysv = false; -- warn_once_call_ms2sysv_xlogues ("__builtin_eh_return"); -- } -+ ix86_finalize_stack_frame_flags (); - -- else if (ix86_static_chain_on_stack) -- { -- gcc_assert (!reload_completed); -- m->call_ms2sysv = false; -- warn_once_call_ms2sysv_xlogues ("static call chains"); -- } -+ /* DRAP should not coexist with stack_realign_fp */ -+ gcc_assert (!(crtl->drap_reg && stack_realign_fp)); - -- /* Finally, compute which registers the stub will manage. */ -- else -- { -- unsigned count = xlogue_layout::count_stub_managed_regs (); -- m->call_ms2sysv_extra_regs = count - xlogue_layout::MIN_REGS; -- m->call_ms2sysv_pad_in = 0; -- } -- } -+ memset (&m->fs, 0, sizeof (m->fs)); - -- frame->nregs = ix86_nsaved_regs (); -- frame->nsseregs = ix86_nsaved_sseregs (); -+ /* Initialize CFA state for before the prologue. */ -+ m->fs.cfa_reg = stack_pointer_rtx; -+ m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET; - -- /* 64-bit MS ABI seem to require stack alignment to be always 16, -- except for function prologues, leaf functions and when the defult -- incoming stack boundary is overriden at command line or via -- force_align_arg_pointer attribute. -+ /* Track SP offset to the CFA. We continue tracking this after we've -+ swapped the CFA register away from SP. In the case of re-alignment -+ this is fudged; we're interested to offsets within the local frame. */ -+ m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET; -+ m->fs.sp_valid = true; -+ m->fs.sp_realigned = false; - -- Darwin's ABI specifies 128b alignment for both 32 and 64 bit variants -- at call sites, including profile function calls. -- */ -- if (((TARGET_64BIT_MS_ABI || TARGET_MACHO) -- && crtl->preferred_stack_boundary < 128) -- && (!crtl->is_leaf || cfun->calls_alloca != 0 -- || ix86_current_function_calls_tls_descriptor -- || (TARGET_MACHO && crtl->profile) -- || ix86_incoming_stack_boundary < 128)) -+ const struct ix86_frame &frame = cfun->machine->frame; -+ -+ if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl)) - { -- crtl->preferred_stack_boundary = 128; -- crtl->stack_alignment_needed = 128; -- } -+ /* We should have already generated an error for any use of -+ ms_hook on a nested function. */ -+ gcc_checking_assert (!ix86_static_chain_on_stack); - -- stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT; -- preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT; -+ /* Check if profiling is active and we shall use profiling before -+ prologue variant. If so sorry. */ -+ if (crtl->profile && flag_fentry != 0) -+ sorry ("% attribute is not compatible " -+ "with %<-mfentry%> for 32-bit"); - -- gcc_assert (!size || stack_alignment_needed); -- gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT); -- gcc_assert (preferred_alignment <= stack_alignment_needed); -+ /* In ix86_asm_output_function_label we emitted: -+ 8b ff movl.s %edi,%edi -+ 55 push %ebp -+ 8b ec movl.s %esp,%ebp - -- /* The only ABI saving SSE regs should be 64-bit ms_abi. */ -- gcc_assert (TARGET_64BIT || !frame->nsseregs); -- if (TARGET_64BIT && m->call_ms2sysv) -- { -- gcc_assert (stack_alignment_needed >= 16); -- gcc_assert (!frame->nsseregs); -- } -+ This matches the hookable function prologue in Win32 API -+ functions in Microsoft Windows XP Service Pack 2 and newer. -+ Wine uses this to enable Windows apps to hook the Win32 API -+ functions provided by Wine. - -- /* For SEH we have to limit the amount of code movement into the prologue. -- At present we do this via a BLOCKAGE, at which point there's very little -- scheduling that can be done, which means that there's very little point -- in doing anything except PUSHs. */ -- if (TARGET_SEH) -- m->use_fast_prologue_epilogue = false; -- else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))) -- { -- int count = frame->nregs; -- struct cgraph_node *node = cgraph_node::get (current_function_decl); -+ What that means is that we've already set up the frame pointer. */ - -- /* The fast prologue uses move instead of push to save registers. This -- is significantly longer, but also executes faster as modern hardware -- can execute the moves in parallel, but can't do that for push/pop. -+ if (frame_pointer_needed -+ && !(crtl->drap_reg && crtl->stack_realign_needed)) -+ { -+ rtx push, mov; - -- Be careful about choosing what prologue to emit: When function takes -- many instructions to execute we may use slow version as well as in -- case function is known to be outside hot spot (this is known with -- feedback only). Weight the size of function by number of registers -- to save as it is cheap to use one or two push instructions but very -- slow to use many of them. */ -- if (count) -- count = (count - 1) * FAST_PROLOGUE_INSN_COUNT; -- if (node->frequency < NODE_FREQUENCY_NORMAL -- || (flag_branch_probabilities -- && node->frequency < NODE_FREQUENCY_HOT)) -- m->use_fast_prologue_epilogue = false; -- else -- m->use_fast_prologue_epilogue -- = !expensive_function_p (count); -- } -+ /* We've decided to use the frame pointer already set up. -+ Describe this to the unwinder by pretending that both -+ push and mov insns happen right here. - -- frame->save_regs_using_mov -- = (TARGET_PROLOGUE_USING_MOVE && m->use_fast_prologue_epilogue -- /* If static stack checking is enabled and done with probes, -- the registers need to be saved before allocating the frame. */ -- && flag_stack_check != STATIC_BUILTIN_STACK_CHECK); -+ Putting the unwind info here at the end of the ms_hook -+ is done so that we can make absolutely certain we get -+ the required byte sequence at the start of the function, -+ rather than relying on an assembler that can produce -+ the exact encoding required. - -- /* Skip return address and error code in exception handler. */ -- offset = INCOMING_FRAME_SP_OFFSET; -+ However it does mean (in the unpatched case) that we have -+ a 1 insn window where the asynchronous unwind info is -+ incorrect. However, if we placed the unwind info at -+ its correct location we would have incorrect unwind info -+ in the patched case. Which is probably all moot since -+ I don't expect Wine generates dwarf2 unwind info for the -+ system libraries that use this feature. */ - -- /* Skip pushed static chain. */ -- if (ix86_static_chain_on_stack) -- offset += UNITS_PER_WORD; -+ insn = emit_insn (gen_blockage ()); - -- /* Skip saved base pointer. */ -- if (frame_pointer_needed) -- offset += UNITS_PER_WORD; -- frame->hfp_save_offset = offset; -+ push = gen_push (hard_frame_pointer_rtx); -+ mov = gen_rtx_SET (hard_frame_pointer_rtx, -+ stack_pointer_rtx); -+ RTX_FRAME_RELATED_P (push) = 1; -+ RTX_FRAME_RELATED_P (mov) = 1; - -- /* The traditional frame pointer location is at the top of the frame. */ -- frame->hard_frame_pointer_offset = offset; -+ RTX_FRAME_RELATED_P (insn) = 1; -+ add_reg_note (insn, REG_FRAME_RELATED_EXPR, -+ gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov))); - -- /* Register save area */ -- offset += frame->nregs * UNITS_PER_WORD; -- frame->reg_save_offset = offset; -+ /* Note that gen_push incremented m->fs.cfa_offset, even -+ though we didn't emit the push insn here. */ -+ m->fs.cfa_reg = hard_frame_pointer_rtx; -+ m->fs.fp_offset = m->fs.cfa_offset; -+ m->fs.fp_valid = true; -+ } -+ else -+ { -+ /* The frame pointer is not needed so pop %ebp again. -+ This leaves us with a pristine state. */ -+ emit_insn (gen_pop (hard_frame_pointer_rtx)); -+ } -+ } - -- /* On SEH target, registers are pushed just before the frame pointer -- location. */ -- if (TARGET_SEH) -- frame->hard_frame_pointer_offset = offset; -+ /* The first insn of a function that accepts its static chain on the -+ stack is to push the register that would be filled in by a direct -+ call. This insn will be skipped by the trampoline. */ -+ else if (ix86_static_chain_on_stack) -+ { -+ static_chain = ix86_static_chain (cfun->decl, false); -+ insn = emit_insn (gen_push (static_chain)); -+ emit_insn (gen_blockage ()); - -- /* Calculate the size of the va-arg area (not including padding, if any). */ -- frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size; -+ /* We don't want to interpret this push insn as a register save, -+ only as a stack adjustment. The real copy of the register as -+ a save will be done later, if needed. */ -+ t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD); -+ t = gen_rtx_SET (stack_pointer_rtx, t); -+ add_reg_note (insn, REG_CFA_ADJUST_CFA, t); -+ RTX_FRAME_RELATED_P (insn) = 1; -+ } - -- /* Also adjust stack_realign_offset for the largest alignment of -- stack slot actually used. */ -- if (stack_realign_fp -- || (cfun->machine->max_used_stack_alignment != 0 -- && (offset % cfun->machine->max_used_stack_alignment) != 0)) -+ /* Emit prologue code to adjust stack alignment and setup DRAP, in case -+ of DRAP is needed and stack realignment is really needed after reload */ -+ if (stack_realign_drap) - { -- /* We may need a 16-byte aligned stack for the remainder of the -- register save area, but the stack frame for the local function -- may require a greater alignment if using AVX/2/512. In order -- to avoid wasting space, we first calculate the space needed for -- the rest of the register saves, add that to the stack pointer, -- and then realign the stack to the boundary of the start of the -- frame for the local function. */ -- HOST_WIDE_INT space_needed = 0; -- HOST_WIDE_INT sse_reg_space_needed = 0; -+ int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT; - -- if (TARGET_64BIT) -- { -- if (m->call_ms2sysv) -- { -- m->call_ms2sysv_pad_in = 0; -- space_needed = xlogue_layout::get_instance ().get_stack_space_used (); -- } -+ /* Can't use DRAP in interrupt function. */ -+ if (cfun->machine->func_type != TYPE_NORMAL) -+ sorry ("Dynamic Realign Argument Pointer (DRAP) not supported " -+ "in interrupt service routine. This may be worked " -+ "around by avoiding functions with aggregate return."); - -- else if (frame->nsseregs) -- /* The only ABI that has saved SSE registers (Win64) also has a -- 16-byte aligned default stack. However, many programs violate -- the ABI, and Wine64 forces stack realignment to compensate. */ -- space_needed = frame->nsseregs * 16; -+ /* Only need to push parameter pointer reg if it is caller saved. */ -+ if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg))) -+ { -+ /* Push arg pointer reg */ -+ insn = emit_insn (gen_push (crtl->drap_reg)); -+ RTX_FRAME_RELATED_P (insn) = 1; -+ } - -- sse_reg_space_needed = space_needed = ROUND_UP (space_needed, 16); -+ /* Grab the argument pointer. */ -+ t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset); -+ insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t)); -+ RTX_FRAME_RELATED_P (insn) = 1; -+ m->fs.cfa_reg = crtl->drap_reg; -+ m->fs.cfa_offset = 0; - -- /* 64-bit frame->va_arg_size should always be a multiple of 16, but -- rounding to be pedantic. */ -- space_needed = ROUND_UP (space_needed + frame->va_arg_size, 16); -- } -- else -- space_needed = frame->va_arg_size; -+ /* Align the stack. */ -+ insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx, -+ stack_pointer_rtx, -+ GEN_INT (-align_bytes))); -+ RTX_FRAME_RELATED_P (insn) = 1; - -- /* Record the allocation size required prior to the realignment AND. */ -- frame->stack_realign_allocate = space_needed; -+ /* Replicate the return address on the stack so that return -+ address can be reached via (argp - 1) slot. This is needed -+ to implement macro RETURN_ADDR_RTX and intrinsic function -+ expand_builtin_return_addr etc. */ -+ t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD); -+ t = gen_frame_mem (word_mode, t); -+ insn = emit_insn (gen_push (t)); -+ RTX_FRAME_RELATED_P (insn) = 1; - -- /* The re-aligned stack starts at frame->stack_realign_offset. Values -- before this point are not directly comparable with values below -- this point. Use sp_valid_at to determine if the stack pointer is -- valid for a given offset, fp_valid_at for the frame pointer, or -- choose_baseaddr to have a base register chosen for you. -+ /* For the purposes of frame and register save area addressing, -+ we've started over with a new frame. */ -+ m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET; -+ m->fs.realigned = true; - -- Note that the result of (frame->stack_realign_offset -- & (stack_alignment_needed - 1)) may not equal zero. */ -- offset = ROUND_UP (offset + space_needed, stack_alignment_needed); -- frame->stack_realign_offset = offset - space_needed; -- frame->sse_reg_save_offset = frame->stack_realign_offset -- + sse_reg_space_needed; -+ if (static_chain) -+ { -+ /* Replicate static chain on the stack so that static chain -+ can be reached via (argp - 2) slot. This is needed for -+ nested function with stack realignment. */ -+ insn = emit_insn (gen_push (static_chain)); -+ RTX_FRAME_RELATED_P (insn) = 1; -+ } - } -- else -+ -+ int_registers_saved = (frame.nregs == 0); -+ sse_registers_saved = (frame.nsseregs == 0); -+ save_stub_call_needed = (m->call_ms2sysv); -+ gcc_assert (sse_registers_saved || !save_stub_call_needed); -+ -+ if (frame_pointer_needed && !m->fs.fp_valid) - { -- frame->stack_realign_offset = offset; -+ /* Note: AT&T enter does NOT have reversed args. Enter is probably -+ slower on all targets. Also sdb didn't like it. */ -+ insn = emit_insn (gen_push (hard_frame_pointer_rtx)); -+ RTX_FRAME_RELATED_P (insn) = 1; - -- if (TARGET_64BIT && m->call_ms2sysv) -+ /* Push registers now, before setting the frame pointer -+ on SEH target. */ -+ if (!int_registers_saved -+ && TARGET_SEH -+ && !frame.save_regs_using_mov) - { -- m->call_ms2sysv_pad_in = !!(offset & UNITS_PER_WORD); -- offset += xlogue_layout::get_instance ().get_stack_space_used (); -+ ix86_emit_save_regs (); -+ int_registers_saved = true; -+ gcc_assert (m->fs.sp_offset == frame.reg_save_offset); - } - -- /* Align and set SSE register save area. */ -- else if (frame->nsseregs) -+ if (m->fs.sp_offset == frame.hard_frame_pointer_offset) - { -- /* If the incoming stack boundary is at least 16 bytes, or DRAP is -- required and the DRAP re-alignment boundary is at least 16 bytes, -- then we want the SSE register save area properly aligned. */ -- if (ix86_incoming_stack_boundary >= 128 -- || (stack_realign_drap && stack_alignment_needed >= 16)) -- offset = ROUND_UP (offset, 16); -- offset += frame->nsseregs * 16; -+ insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); -+ RTX_FRAME_RELATED_P (insn) = 1; -+ -+ if (m->fs.cfa_reg == stack_pointer_rtx) -+ m->fs.cfa_reg = hard_frame_pointer_rtx; -+ m->fs.fp_offset = m->fs.sp_offset; -+ m->fs.fp_valid = true; - } -- frame->sse_reg_save_offset = offset; -- offset += frame->va_arg_size; - } - -- /* Align start of frame for local function. When a function call -- is removed, it may become a leaf function. But if argument may -- be passed on stack, we need to align the stack when there is no -- tail call. */ -- if (m->call_ms2sysv -- || frame->va_arg_size != 0 -- || size != 0 -- || !crtl->is_leaf -- || (!crtl->tail_call_emit -- && cfun->machine->outgoing_args_on_stack) -- || cfun->calls_alloca -- || ix86_current_function_calls_tls_descriptor) -- offset = ROUND_UP (offset, stack_alignment_needed); -- -- /* Frame pointer points here. */ -- frame->frame_pointer_offset = offset; -- -- offset += size; -- -- /* Add outgoing arguments area. Can be skipped if we eliminated -- all the function calls as dead code. -- Skipping is however impossible when function calls alloca. Alloca -- expander assumes that last crtl->outgoing_args_size -- of stack frame are unused. */ -- if (ACCUMULATE_OUTGOING_ARGS -- && (!crtl->is_leaf || cfun->calls_alloca -- || ix86_current_function_calls_tls_descriptor)) -+ if (!int_registers_saved) - { -- offset += crtl->outgoing_args_size; -- frame->outgoing_arguments_size = crtl->outgoing_args_size; -- } -- else -- frame->outgoing_arguments_size = 0; -+ /* If saving registers via PUSH, do so now. */ -+ if (!frame.save_regs_using_mov) -+ { -+ ix86_emit_save_regs (); -+ int_registers_saved = true; -+ gcc_assert (m->fs.sp_offset == frame.reg_save_offset); -+ } - -- /* Align stack boundary. Only needed if we're calling another function -- or using alloca. */ -- if (!crtl->is_leaf || cfun->calls_alloca -- || ix86_current_function_calls_tls_descriptor) -- offset = ROUND_UP (offset, preferred_alignment); -+ /* When using red zone we may start register saving before allocating -+ the stack frame saving one cycle of the prologue. However, avoid -+ doing this if we have to probe the stack; at least on x86_64 the -+ stack probe can turn into a call that clobbers a red zone location. */ -+ else if (ix86_using_red_zone () -+ && (! TARGET_STACK_PROBE -+ || frame.stack_pointer_offset < CHECK_STACK_LIMIT)) -+ { -+ ix86_emit_save_regs_using_mov (frame.reg_save_offset); -+ int_registers_saved = true; -+ } -+ } - -- /* We've reached end of stack frame. */ -- frame->stack_pointer_offset = offset; -+ if (stack_realign_fp) -+ { -+ int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT; -+ gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT); - -- /* Size prologue needs to allocate. */ -- to_allocate = offset - frame->sse_reg_save_offset; -+ /* Record last valid frame pointer offset. */ -+ m->fs.sp_realigned_fp_last = frame.reg_save_offset; - -- if ((!to_allocate && frame->nregs <= 1) -- || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000)) -- /* If stack clash probing needs a loop, then it needs a -- scratch register. But the returned register is only guaranteed -- to be safe to use after register saves are complete. So if -- stack clash protections are enabled and the allocated frame is -- larger than the probe interval, then use pushes to save -- callee saved registers. */ -- || (flag_stack_clash_protection && to_allocate > get_probe_interval ())) -- frame->save_regs_using_mov = false; -+ /* The computation of the size of the re-aligned stack frame means -+ that we must allocate the size of the register save area before -+ performing the actual alignment. Otherwise we cannot guarantee -+ that there's enough storage above the realignment point. */ -+ allocate = frame.reg_save_offset - m->fs.sp_offset -+ + frame.stack_realign_allocate; -+ if (allocate) -+ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, -+ GEN_INT (-allocate), -1, false); - -- if (ix86_using_red_zone () -- && crtl->sp_is_unchanging -- && crtl->is_leaf -- && !ix86_pc_thunk_call_expanded -- && !ix86_current_function_calls_tls_descriptor) -- { -- frame->red_zone_size = to_allocate; -- if (frame->save_regs_using_mov) -- frame->red_zone_size += frame->nregs * UNITS_PER_WORD; -- if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE) -- frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE; -- } -- else -- frame->red_zone_size = 0; -- frame->stack_pointer_offset -= frame->red_zone_size; -+ /* Align the stack. */ -+ insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx, -+ stack_pointer_rtx, -+ GEN_INT (-align_bytes))); -+ m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes); -+ m->fs.sp_realigned_offset = m->fs.sp_offset -+ - frame.stack_realign_allocate; -+ /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset. -+ Beyond this point, stack access should be done via choose_baseaddr or -+ by using sp_valid_at and fp_valid_at to determine the correct base -+ register. Henceforth, any CFA offset should be thought of as logical -+ and not physical. */ -+ gcc_assert (m->fs.sp_realigned_offset >= m->fs.sp_realigned_fp_last); -+ gcc_assert (m->fs.sp_realigned_offset == frame.stack_realign_offset); -+ m->fs.sp_realigned = true; - -- /* The SEH frame pointer location is near the bottom of the frame. -- This is enforced by the fact that the difference between the -- stack pointer and the frame pointer is limited to 240 bytes in -- the unwind data structure. */ -- if (TARGET_SEH) -- { -- HOST_WIDE_INT diff; -+ /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which -+ is needed to describe where a register is saved using a realigned -+ stack pointer, so we need to invalidate the stack pointer for that -+ target. */ -+ if (TARGET_SEH) -+ m->fs.sp_valid = false; - -- /* If we can leave the frame pointer where it is, do so. Also, returns -- the establisher frame for __builtin_frame_address (0). */ -- diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset; -- if (diff <= SEH_MAX_FRAME_SIZE -- && (diff > 240 || (diff & 15) != 0) -- && !crtl->accesses_prior_frames) -+ /* If SP offset is non-immediate after allocation of the stack frame, -+ then emit SSE saves or stub call prior to allocating the rest of the -+ stack frame. This is less efficient for the out-of-line stub because -+ we can't combine allocations across the call barrier, but it's better -+ than using a scratch register. */ -+ else if (!x86_64_immediate_operand (GEN_INT (frame.stack_pointer_offset -+ - m->fs.sp_realigned_offset), -+ Pmode)) - { -- /* Ideally we'd determine what portion of the local stack frame -- (within the constraint of the lowest 240) is most heavily used. -- But without that complication, simply bias the frame pointer -- by 128 bytes so as to maximize the amount of the local stack -- frame that is addressable with 8-bit offsets. */ -- frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128; -+ if (!sse_registers_saved) -+ { -+ ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset); -+ sse_registers_saved = true; -+ } -+ else if (save_stub_call_needed) -+ { -+ ix86_emit_outlined_ms2sysv_save (frame); -+ save_stub_call_needed = false; -+ } - } - } --} -- --/* This is semi-inlined memory_address_length, but simplified -- since we know that we're always dealing with reg+offset, and -- to avoid having to create and discard all that rtl. */ - --static inline int --choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset) --{ -- int len = 4; -+ allocate = frame.stack_pointer_offset - m->fs.sp_offset; - -- if (offset == 0) -+ if (flag_stack_usage_info) - { -- /* EBP and R13 cannot be encoded without an offset. */ -- len = (regno == BP_REG || regno == R13_REG); -- } -- else if (IN_RANGE (offset, -128, 127)) -- len = 1; -+ /* We start to count from ARG_POINTER. */ -+ HOST_WIDE_INT stack_size = frame.stack_pointer_offset; - -- /* ESP and R12 must be encoded with a SIB byte. */ -- if (regno == SP_REG || regno == R12_REG) -- len++; -+ /* If it was realigned, take into account the fake frame. */ -+ if (stack_realign_drap) -+ { -+ if (ix86_static_chain_on_stack) -+ stack_size += UNITS_PER_WORD; - -- return len; --} -+ if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg))) -+ stack_size += UNITS_PER_WORD; - --/* Determine if the stack pointer is valid for accessing the CFA_OFFSET in -- the frame save area. The register is saved at CFA - CFA_OFFSET. */ -+ /* This over-estimates by 1 minimal-stack-alignment-unit but -+ mitigates that by counting in the new return address slot. */ -+ current_function_dynamic_stack_size -+ += crtl->stack_alignment_needed / BITS_PER_UNIT; -+ } - --static bool --sp_valid_at (HOST_WIDE_INT cfa_offset) --{ -- const struct machine_frame_state &fs = cfun->machine->fs; -- if (fs.sp_realigned && cfa_offset <= fs.sp_realigned_offset) -- { -- /* Validate that the cfa_offset isn't in a "no-man's land". */ -- gcc_assert (cfa_offset <= fs.sp_realigned_fp_last); -- return false; -+ current_function_static_stack_size = stack_size; - } -- return fs.sp_valid; --} - --/* Determine if the frame pointer is valid for accessing the CFA_OFFSET in -- the frame save area. The register is saved at CFA - CFA_OFFSET. */ -- --static inline bool --fp_valid_at (HOST_WIDE_INT cfa_offset) --{ -- const struct machine_frame_state &fs = cfun->machine->fs; -- if (fs.sp_realigned && cfa_offset > fs.sp_realigned_fp_last) -+ /* On SEH target with very large frame size, allocate an area to save -+ SSE registers (as the very large allocation won't be described). */ -+ if (TARGET_SEH -+ && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE -+ && !sse_registers_saved) - { -- /* Validate that the cfa_offset isn't in a "no-man's land". */ -- gcc_assert (cfa_offset >= fs.sp_realigned_offset); -- return false; -- } -- return fs.fp_valid; --} -- --/* Choose a base register based upon alignment requested, speed and/or -- size. */ -- --static void --choose_basereg (HOST_WIDE_INT cfa_offset, rtx &base_reg, -- HOST_WIDE_INT &base_offset, -- unsigned int align_reqested, unsigned int *align) --{ -- const struct machine_function *m = cfun->machine; -- unsigned int hfp_align; -- unsigned int drap_align; -- unsigned int sp_align; -- bool hfp_ok = fp_valid_at (cfa_offset); -- bool drap_ok = m->fs.drap_valid; -- bool sp_ok = sp_valid_at (cfa_offset); -- -- hfp_align = drap_align = sp_align = INCOMING_STACK_BOUNDARY; -+ HOST_WIDE_INT sse_size -+ = frame.sse_reg_save_offset - frame.reg_save_offset; - -- /* Filter out any registers that don't meet the requested alignment -- criteria. */ -- if (align_reqested) -- { -- if (m->fs.realigned) -- hfp_align = drap_align = sp_align = crtl->stack_alignment_needed; -- /* SEH unwind code does do not currently support REG_CFA_EXPRESSION -- notes (which we would need to use a realigned stack pointer), -- so disable on SEH targets. */ -- else if (m->fs.sp_realigned) -- sp_align = crtl->stack_alignment_needed; -+ gcc_assert (int_registers_saved); - -- hfp_ok = hfp_ok && hfp_align >= align_reqested; -- drap_ok = drap_ok && drap_align >= align_reqested; -- sp_ok = sp_ok && sp_align >= align_reqested; -+ /* No need to do stack checking as the area will be immediately -+ written. */ -+ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, -+ GEN_INT (-sse_size), -1, -+ m->fs.cfa_reg == stack_pointer_rtx); -+ allocate -= sse_size; -+ ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset); -+ sse_registers_saved = true; - } - -- if (m->use_fast_prologue_epilogue) -+ /* The stack has already been decremented by the instruction calling us -+ so probe if the size is non-negative to preserve the protection area. */ -+ if (allocate >= 0 -+ && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK -+ || flag_stack_clash_protection)) - { -- /* Choose the base register most likely to allow the most scheduling -- opportunities. Generally FP is valid throughout the function, -- while DRAP must be reloaded within the epilogue. But choose either -- over the SP due to increased encoding size. */ -- -- if (hfp_ok) -+ if (flag_stack_clash_protection) - { -- base_reg = hard_frame_pointer_rtx; -- base_offset = m->fs.fp_offset - cfa_offset; -+ ix86_adjust_stack_and_probe_stack_clash (allocate, -+ int_registers_saved); -+ allocate = 0; - } -- else if (drap_ok) -+ else if (STACK_CHECK_MOVING_SP) - { -- base_reg = crtl->drap_reg; -- base_offset = 0 - cfa_offset; -+ if (!(crtl->is_leaf && !cfun->calls_alloca -+ && allocate <= get_probe_interval ())) -+ { -+ ix86_adjust_stack_and_probe (allocate, int_registers_saved); -+ allocate = 0; -+ } - } -- else if (sp_ok) -+ else - { -- base_reg = stack_pointer_rtx; -- base_offset = m->fs.sp_offset - cfa_offset; -+ HOST_WIDE_INT size = allocate; -+ -+ if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000)) -+ size = 0x80000000 - get_stack_check_protect () - 1; -+ -+ if (TARGET_STACK_PROBE) -+ { -+ if (crtl->is_leaf && !cfun->calls_alloca) -+ { -+ if (size > get_probe_interval ()) -+ ix86_emit_probe_stack_range (0, size, int_registers_saved); -+ } -+ else -+ ix86_emit_probe_stack_range (0, -+ size + get_stack_check_protect (), -+ int_registers_saved); -+ } -+ else -+ { -+ if (crtl->is_leaf && !cfun->calls_alloca) -+ { -+ if (size > get_probe_interval () -+ && size > get_stack_check_protect ()) -+ ix86_emit_probe_stack_range (get_stack_check_protect (), -+ (size -+ - get_stack_check_protect ()), -+ int_registers_saved); -+ } -+ else -+ ix86_emit_probe_stack_range (get_stack_check_protect (), size, -+ int_registers_saved); -+ } - } - } -+ -+ if (allocate == 0) -+ ; -+ else if (!ix86_target_stack_probe () -+ || frame.stack_pointer_offset < CHECK_STACK_LIMIT) -+ { -+ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, -+ GEN_INT (-allocate), -1, -+ m->fs.cfa_reg == stack_pointer_rtx); -+ } - else - { -- HOST_WIDE_INT toffset; -- int len = 16, tlen; -+ rtx eax = gen_rtx_REG (Pmode, AX_REG); -+ rtx r10 = NULL; -+ rtx (*adjust_stack_insn)(rtx, rtx, rtx); -+ const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx); -+ bool eax_live = ix86_eax_live_at_start_p (); -+ bool r10_live = false; - -- /* Choose the base register with the smallest address encoding. -- With a tie, choose FP > DRAP > SP. */ -- if (sp_ok) -- { -- base_reg = stack_pointer_rtx; -- base_offset = m->fs.sp_offset - cfa_offset; -- len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset); -- } -- if (drap_ok) -+ if (TARGET_64BIT) -+ r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0); -+ -+ if (eax_live) - { -- toffset = 0 - cfa_offset; -- tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset); -- if (tlen <= len) -+ insn = emit_insn (gen_push (eax)); -+ allocate -= UNITS_PER_WORD; -+ /* Note that SEH directives need to continue tracking the stack -+ pointer even after the frame pointer has been set up. */ -+ if (sp_is_cfa_reg || TARGET_SEH) - { -- base_reg = crtl->drap_reg; -- base_offset = toffset; -- len = tlen; -+ if (sp_is_cfa_reg) -+ m->fs.cfa_offset += UNITS_PER_WORD; -+ RTX_FRAME_RELATED_P (insn) = 1; -+ add_reg_note (insn, REG_FRAME_RELATED_EXPR, -+ gen_rtx_SET (stack_pointer_rtx, -+ plus_constant (Pmode, stack_pointer_rtx, -+ -UNITS_PER_WORD))); - } - } -- if (hfp_ok) -+ -+ if (r10_live) - { -- toffset = m->fs.fp_offset - cfa_offset; -- tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset); -- if (tlen <= len) -+ r10 = gen_rtx_REG (Pmode, R10_REG); -+ insn = emit_insn (gen_push (r10)); -+ allocate -= UNITS_PER_WORD; -+ if (sp_is_cfa_reg || TARGET_SEH) - { -- base_reg = hard_frame_pointer_rtx; -- base_offset = toffset; -+ if (sp_is_cfa_reg) -+ m->fs.cfa_offset += UNITS_PER_WORD; -+ RTX_FRAME_RELATED_P (insn) = 1; -+ add_reg_note (insn, REG_FRAME_RELATED_EXPR, -+ gen_rtx_SET (stack_pointer_rtx, -+ plus_constant (Pmode, stack_pointer_rtx, -+ -UNITS_PER_WORD))); - } - } -- } - -- /* Set the align return value. */ -- if (align) -- { -- if (base_reg == stack_pointer_rtx) -- *align = sp_align; -- else if (base_reg == crtl->drap_reg) -- *align = drap_align; -- else if (base_reg == hard_frame_pointer_rtx) -- *align = hfp_align; -- } --} -+ emit_move_insn (eax, GEN_INT (allocate)); -+ emit_insn (ix86_gen_allocate_stack_worker (eax, eax)); - --/* Return an RTX that points to CFA_OFFSET within the stack frame and -- the alignment of address. If ALIGN is non-null, it should point to -- an alignment value (in bits) that is preferred or zero and will -- recieve the alignment of the base register that was selected, -- irrespective of rather or not CFA_OFFSET is a multiple of that -- alignment value. If it is possible for the base register offset to be -- non-immediate then SCRATCH_REGNO should specify a scratch register to -- use. -+ /* Use the fact that AX still contains ALLOCATE. */ -+ adjust_stack_insn = (Pmode == DImode -+ ? gen_pro_epilogue_adjust_stack_di_sub -+ : gen_pro_epilogue_adjust_stack_si_sub); - -- The valid base registers are taken from CFUN->MACHINE->FS. */ -+ insn = emit_insn (adjust_stack_insn (stack_pointer_rtx, -+ stack_pointer_rtx, eax)); - --static rtx --choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align, -- unsigned int scratch_regno = INVALID_REGNUM) --{ -- rtx base_reg = NULL; -- HOST_WIDE_INT base_offset = 0; -+ if (sp_is_cfa_reg || TARGET_SEH) -+ { -+ if (sp_is_cfa_reg) -+ m->fs.cfa_offset += allocate; -+ RTX_FRAME_RELATED_P (insn) = 1; -+ add_reg_note (insn, REG_FRAME_RELATED_EXPR, -+ gen_rtx_SET (stack_pointer_rtx, -+ plus_constant (Pmode, stack_pointer_rtx, -+ -allocate))); -+ } -+ m->fs.sp_offset += allocate; - -- /* If a specific alignment is requested, try to get a base register -- with that alignment first. */ -- if (align && *align) -- choose_basereg (cfa_offset, base_reg, base_offset, *align, align); -+ /* Use stack_pointer_rtx for relative addressing so that code works for -+ realigned stack. But this means that we need a blockage to prevent -+ stores based on the frame pointer from being scheduled before. */ -+ if (r10_live && eax_live) -+ { -+ t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax); -+ emit_move_insn (gen_rtx_REG (word_mode, R10_REG), -+ gen_frame_mem (word_mode, t)); -+ t = plus_constant (Pmode, t, UNITS_PER_WORD); -+ emit_move_insn (gen_rtx_REG (word_mode, AX_REG), -+ gen_frame_mem (word_mode, t)); -+ emit_insn (gen_memory_blockage ()); -+ } -+ else if (eax_live || r10_live) -+ { -+ t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax); -+ emit_move_insn (gen_rtx_REG (word_mode, -+ (eax_live ? AX_REG : R10_REG)), -+ gen_frame_mem (word_mode, t)); -+ emit_insn (gen_memory_blockage ()); -+ } -+ } -+ gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset); - -- if (!base_reg) -- choose_basereg (cfa_offset, base_reg, base_offset, 0, align); -+ /* If we havn't already set up the frame pointer, do so now. */ -+ if (frame_pointer_needed && !m->fs.fp_valid) -+ { -+ insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx, -+ GEN_INT (frame.stack_pointer_offset -+ - frame.hard_frame_pointer_offset)); -+ insn = emit_insn (insn); -+ RTX_FRAME_RELATED_P (insn) = 1; -+ add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL); - -- gcc_assert (base_reg != NULL); -+ if (m->fs.cfa_reg == stack_pointer_rtx) -+ m->fs.cfa_reg = hard_frame_pointer_rtx; -+ m->fs.fp_offset = frame.hard_frame_pointer_offset; -+ m->fs.fp_valid = true; -+ } - -- rtx base_offset_rtx = GEN_INT (base_offset); -+ if (!int_registers_saved) -+ ix86_emit_save_regs_using_mov (frame.reg_save_offset); -+ if (!sse_registers_saved) -+ ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset); -+ else if (save_stub_call_needed) -+ ix86_emit_outlined_ms2sysv_save (frame); - -- if (!x86_64_immediate_operand (base_offset_rtx, Pmode)) -+ /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT -+ in PROLOGUE. */ -+ if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry) - { -- gcc_assert (scratch_regno != INVALID_REGNUM); -- -- rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno); -- emit_move_insn (scratch_reg, base_offset_rtx); -- -- return gen_rtx_PLUS (Pmode, base_reg, scratch_reg); -+ rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM); -+ insn = emit_insn (gen_set_got (pic)); -+ RTX_FRAME_RELATED_P (insn) = 1; -+ add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX); -+ emit_insn (gen_prologue_use (pic)); -+ /* Deleting already emmitted SET_GOT if exist and allocated to -+ REAL_PIC_OFFSET_TABLE_REGNUM. */ -+ ix86_elim_entry_set_got (pic); - } - -- return plus_constant (Pmode, base_reg, base_offset); --} -- --/* Emit code to save registers in the prologue. */ -+ if (crtl->drap_reg && !crtl->stack_realign_needed) -+ { -+ /* vDRAP is setup but after reload it turns out stack realign -+ isn't necessary, here we will emit prologue to setup DRAP -+ without stack realign adjustment */ -+ t = choose_baseaddr (0, NULL); -+ emit_insn (gen_rtx_SET (crtl->drap_reg, t)); -+ } - --static void --ix86_emit_save_regs (void) --{ -- unsigned int regno; -- rtx_insn *insn; -+ /* Prevent instructions from being scheduled into register save push -+ sequence when access to the redzone area is done through frame pointer. -+ The offset between the frame pointer and the stack pointer is calculated -+ relative to the value of the stack pointer at the end of the function -+ prologue, and moving instructions that access redzone area via frame -+ pointer inside push sequence violates this assumption. */ -+ if (frame_pointer_needed && frame.red_zone_size) -+ emit_insn (gen_memory_blockage ()); - -- for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; ) -- if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true)) -- { -- insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno))); -- RTX_FRAME_RELATED_P (insn) = 1; -- } -+ /* SEH requires that the prologue end within 256 bytes of the start of -+ the function. Prevent instruction schedules that would extend that. -+ Further, prevent alloca modifications to the stack pointer from being -+ combined with prologue modifications. */ -+ if (TARGET_SEH) -+ emit_insn (gen_prologue_use (stack_pointer_rtx)); - } - --/* Emit a single register save at CFA - CFA_OFFSET. */ -+/* Emit code to restore REG using a POP insn. */ - - static void --ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno, -- HOST_WIDE_INT cfa_offset) -+ix86_emit_restore_reg_using_pop (rtx reg) - { - struct machine_function *m = cfun->machine; -- rtx reg = gen_rtx_REG (mode, regno); -- rtx mem, addr, base, insn; -- unsigned int align = GET_MODE_ALIGNMENT (mode); -- -- addr = choose_baseaddr (cfa_offset, &align); -- mem = gen_frame_mem (mode, addr); -+ rtx_insn *insn = emit_insn (gen_pop (reg)); - -- /* The location aligment depends upon the base register. */ -- align = MIN (GET_MODE_ALIGNMENT (mode), align); -- gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1))); -- set_mem_align (mem, align); -+ ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset); -+ m->fs.sp_offset -= UNITS_PER_WORD; - -- insn = emit_insn (gen_rtx_SET (mem, reg)); -- RTX_FRAME_RELATED_P (insn) = 1; -+ if (m->fs.cfa_reg == crtl->drap_reg -+ && REGNO (reg) == REGNO (crtl->drap_reg)) -+ { -+ /* Previously we'd represented the CFA as an expression -+ like *(%ebp - 8). We've just popped that value from -+ the stack, which means we need to reset the CFA to -+ the drap register. This will remain until we restore -+ the stack pointer. */ -+ add_reg_note (insn, REG_CFA_DEF_CFA, reg); -+ RTX_FRAME_RELATED_P (insn) = 1; - -- base = addr; -- if (GET_CODE (base) == PLUS) -- base = XEXP (base, 0); -- gcc_checking_assert (REG_P (base)); -+ /* This means that the DRAP register is valid for addressing too. */ -+ m->fs.drap_valid = true; -+ return; -+ } - -- /* When saving registers into a re-aligned local stack frame, avoid -- any tricky guessing by dwarf2out. */ -- if (m->fs.realigned) -+ if (m->fs.cfa_reg == stack_pointer_rtx) - { -- gcc_checking_assert (stack_realign_drap); -+ rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); -+ x = gen_rtx_SET (stack_pointer_rtx, x); -+ add_reg_note (insn, REG_CFA_ADJUST_CFA, x); -+ RTX_FRAME_RELATED_P (insn) = 1; - -- if (regno == REGNO (crtl->drap_reg)) -- { -- /* A bit of a hack. We force the DRAP register to be saved in -- the re-aligned stack frame, which provides us with a copy -- of the CFA that will last past the prologue. Install it. */ -- gcc_checking_assert (cfun->machine->fs.fp_valid); -- addr = plus_constant (Pmode, hard_frame_pointer_rtx, -- cfun->machine->fs.fp_offset - cfa_offset); -- mem = gen_rtx_MEM (mode, addr); -- add_reg_note (insn, REG_CFA_DEF_CFA, mem); -- } -- else -- { -- /* The frame pointer is a stable reference within the -- aligned frame. Use it. */ -- gcc_checking_assert (cfun->machine->fs.fp_valid); -- addr = plus_constant (Pmode, hard_frame_pointer_rtx, -- cfun->machine->fs.fp_offset - cfa_offset); -- mem = gen_rtx_MEM (mode, addr); -- add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg)); -- } -+ m->fs.cfa_offset -= UNITS_PER_WORD; - } - -- else if (base == stack_pointer_rtx && m->fs.sp_realigned -- && cfa_offset >= m->fs.sp_realigned_offset) -+ /* When the frame pointer is the CFA, and we pop it, we are -+ swapping back to the stack pointer as the CFA. This happens -+ for stack frames that don't allocate other data, so we assume -+ the stack pointer is now pointing at the return address, i.e. -+ the function entry state, which makes the offset be 1 word. */ -+ if (reg == hard_frame_pointer_rtx) - { -- gcc_checking_assert (stack_realign_fp); -- add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg)); -+ m->fs.fp_valid = false; -+ if (m->fs.cfa_reg == hard_frame_pointer_rtx) -+ { -+ m->fs.cfa_reg = stack_pointer_rtx; -+ m->fs.cfa_offset -= UNITS_PER_WORD; -+ -+ add_reg_note (insn, REG_CFA_DEF_CFA, -+ gen_rtx_PLUS (Pmode, stack_pointer_rtx, -+ GEN_INT (m->fs.cfa_offset))); -+ RTX_FRAME_RELATED_P (insn) = 1; -+ } - } -+} - -- /* The memory may not be relative to the current CFA register, -- which means that we may need to generate a new pattern for -- use by the unwind info. */ -- else if (base != m->fs.cfa_reg) -+/* Emit code to restore saved registers using POP insns. */ -+ -+static void -+ix86_emit_restore_regs_using_pop (void) -+{ -+ unsigned int regno; -+ -+ for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) -+ if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true)) -+ ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno)); -+} -+ -+/* Emit code and notes for the LEAVE instruction. If insn is non-null, -+ omits the emit and only attaches the notes. */ -+ -+static void -+ix86_emit_leave (rtx_insn *insn) -+{ -+ struct machine_function *m = cfun->machine; -+ if (!insn) -+ insn = emit_insn (ix86_gen_leave ()); -+ -+ ix86_add_queued_cfa_restore_notes (insn); -+ -+ gcc_assert (m->fs.fp_valid); -+ m->fs.sp_valid = true; -+ m->fs.sp_realigned = false; -+ m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD; -+ m->fs.fp_valid = false; -+ -+ if (m->fs.cfa_reg == hard_frame_pointer_rtx) - { -- addr = plus_constant (Pmode, m->fs.cfa_reg, -- m->fs.cfa_offset - cfa_offset); -- mem = gen_rtx_MEM (mode, addr); -- add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg)); -+ m->fs.cfa_reg = stack_pointer_rtx; -+ m->fs.cfa_offset = m->fs.sp_offset; -+ -+ add_reg_note (insn, REG_CFA_DEF_CFA, -+ plus_constant (Pmode, stack_pointer_rtx, -+ m->fs.sp_offset)); -+ RTX_FRAME_RELATED_P (insn) = 1; - } -+ ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx, -+ m->fs.fp_offset); - } - --/* Emit code to save registers using MOV insns. -- First register is stored at CFA - CFA_OFFSET. */ -+/* Emit code to restore saved registers using MOV insns. -+ First register is restored from CFA - CFA_OFFSET. */ - static void --ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset) -+ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset, -+ bool maybe_eh_return) - { -+ struct machine_function *m = cfun->machine; - unsigned int regno; - - for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) -- if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true)) -+ if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true)) - { -- ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset); -+ rtx reg = gen_rtx_REG (word_mode, regno); -+ rtx mem; -+ rtx_insn *insn; -+ -+ mem = choose_baseaddr (cfa_offset, NULL); -+ mem = gen_frame_mem (word_mode, mem); -+ insn = emit_move_insn (reg, mem); -+ -+ if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg)) -+ { -+ /* Previously we'd represented the CFA as an expression -+ like *(%ebp - 8). We've just popped that value from -+ the stack, which means we need to reset the CFA to -+ the drap register. This will remain until we restore -+ the stack pointer. */ -+ add_reg_note (insn, REG_CFA_DEF_CFA, reg); -+ RTX_FRAME_RELATED_P (insn) = 1; -+ -+ /* This means that the DRAP register is valid for addressing. */ -+ m->fs.drap_valid = true; -+ } -+ else -+ ix86_add_cfa_restore_note (NULL, reg, cfa_offset); -+ - cfa_offset -= UNITS_PER_WORD; - } - } - --/* Emit code to save SSE registers using MOV insns. -- First register is stored at CFA - CFA_OFFSET. */ -+/* Emit code to restore saved registers using MOV insns. -+ First register is restored from CFA - CFA_OFFSET. */ - static void --ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset) -+ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset, -+ bool maybe_eh_return) - { - unsigned int regno; - - for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) -- if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true)) -+ if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true)) - { -- ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset); -- cfa_offset -= GET_MODE_SIZE (V4SFmode); -- } --} -+ rtx reg = gen_rtx_REG (V4SFmode, regno); -+ rtx mem; -+ unsigned int align = GET_MODE_ALIGNMENT (V4SFmode); - --static GTY(()) rtx queued_cfa_restores; -+ mem = choose_baseaddr (cfa_offset, &align); -+ mem = gen_rtx_MEM (V4SFmode, mem); - --/* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack -- manipulation insn. The value is on the stack at CFA - CFA_OFFSET. -- Don't add the note if the previously saved value will be left untouched -- within stack red-zone till return, as unwinders can find the same value -- in the register and on the stack. */ -+ /* The location aligment depends upon the base register. */ -+ align = MIN (GET_MODE_ALIGNMENT (V4SFmode), align); -+ gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1))); -+ set_mem_align (mem, align); -+ emit_insn (gen_rtx_SET (reg, mem)); - --static void --ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset) --{ -- if (!crtl->shrink_wrapped -- && cfa_offset <= cfun->machine->fs.red_zone_offset) -- return; -+ ix86_add_cfa_restore_note (NULL, reg, cfa_offset); - -- if (insn) -- { -- add_reg_note (insn, REG_CFA_RESTORE, reg); -- RTX_FRAME_RELATED_P (insn) = 1; -- } -- else -- queued_cfa_restores -- = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores); -+ cfa_offset -= GET_MODE_SIZE (V4SFmode); -+ } - } - --/* Add queued REG_CFA_RESTORE notes if any to INSN. */ -- - static void --ix86_add_queued_cfa_restore_notes (rtx insn) --{ -- rtx last; -- if (!queued_cfa_restores) -- return; -- for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1)) -- ; -- XEXP (last, 1) = REG_NOTES (insn); -- REG_NOTES (insn) = queued_cfa_restores; -- queued_cfa_restores = NULL_RTX; -- RTX_FRAME_RELATED_P (insn) = 1; --} -- --/* Expand prologue or epilogue stack adjustment. -- The pattern exist to put a dependency on all ebp-based memory accesses. -- STYLE should be negative if instructions should be marked as frame related, -- zero if %r11 register is live and cannot be freely used and positive -- otherwise. */ -- --static rtx --pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, -- int style, bool set_cfa) -+ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame, -+ bool use_call, int style) - { - struct machine_function *m = cfun->machine; -- rtx insn; -- bool add_frame_related_expr = false; -+ const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS -+ + m->call_ms2sysv_extra_regs; -+ rtvec v; -+ unsigned int elems_needed, align, i, vi = 0; -+ rtx_insn *insn; -+ rtx sym, tmp; -+ rtx rsi = gen_rtx_REG (word_mode, SI_REG); -+ rtx r10 = NULL_RTX; -+ const struct xlogue_layout &xlogue = xlogue_layout::get_instance (); -+ HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset (); -+ HOST_WIDE_INT rsi_offset = frame.stack_realign_offset + stub_ptr_offset; -+ rtx rsi_frame_load = NULL_RTX; -+ HOST_WIDE_INT rsi_restore_offset = (HOST_WIDE_INT)-1; -+ enum xlogue_stub stub; - -- if (Pmode == SImode) -- insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset); -- else if (x86_64_immediate_operand (offset, DImode)) -- insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset); -- else -- { -- rtx tmp; -- /* r11 is used by indirect sibcall return as well, set before the -- epilogue and used after the epilogue. */ -- if (style) -- tmp = gen_rtx_REG (DImode, R11_REG); -- else -- { -- gcc_assert (src != hard_frame_pointer_rtx -- && dest != hard_frame_pointer_rtx); -- tmp = hard_frame_pointer_rtx; -- } -- insn = emit_insn (gen_rtx_SET (tmp, offset)); -- if (style < 0) -- add_frame_related_expr = true; -+ gcc_assert (!m->fs.fp_valid || frame_pointer_needed); - -- insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp); -- } -+ /* If using a realigned stack, we should never start with padding. */ -+ gcc_assert (!stack_realign_fp || !xlogue.get_stack_align_off_in ()); - -- insn = emit_insn (insn); -- if (style >= 0) -- ix86_add_queued_cfa_restore_notes (insn); -+ /* Setup RSI as the stub's base pointer. */ -+ align = GET_MODE_ALIGNMENT (V4SFmode); -+ tmp = choose_baseaddr (rsi_offset, &align, SI_REG); -+ gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode)); - -- if (set_cfa) -- { -- rtx r; -+ emit_insn (gen_rtx_SET (rsi, tmp)); - -- gcc_assert (m->fs.cfa_reg == src); -- m->fs.cfa_offset += INTVAL (offset); -- m->fs.cfa_reg = dest; -+ /* Get a symbol for the stub. */ -+ if (frame_pointer_needed) -+ stub = use_call ? XLOGUE_STUB_RESTORE_HFP -+ : XLOGUE_STUB_RESTORE_HFP_TAIL; -+ else -+ stub = use_call ? XLOGUE_STUB_RESTORE -+ : XLOGUE_STUB_RESTORE_TAIL; -+ sym = xlogue.get_stub_rtx (stub); - -- r = gen_rtx_PLUS (Pmode, src, offset); -- r = gen_rtx_SET (dest, r); -- add_reg_note (insn, REG_CFA_ADJUST_CFA, r); -- RTX_FRAME_RELATED_P (insn) = 1; -- } -- else if (style < 0) -+ elems_needed = ncregs; -+ if (use_call) -+ elems_needed += 1; -+ else -+ elems_needed += frame_pointer_needed ? 5 : 3; -+ v = rtvec_alloc (elems_needed); -+ -+ /* We call the epilogue stub when we need to pop incoming args or we are -+ doing a sibling call as the tail. Otherwise, we will emit a jmp to the -+ epilogue stub and it is the tail-call. */ -+ if (use_call) -+ RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym); -+ else - { -- RTX_FRAME_RELATED_P (insn) = 1; -- if (add_frame_related_expr) -+ RTVEC_ELT (v, vi++) = ret_rtx; -+ RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym); -+ if (frame_pointer_needed) - { -- rtx r = gen_rtx_PLUS (Pmode, src, offset); -- r = gen_rtx_SET (dest, r); -- add_reg_note (insn, REG_FRAME_RELATED_EXPR, r); -+ rtx rbp = gen_rtx_REG (DImode, BP_REG); -+ gcc_assert (m->fs.fp_valid); -+ gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx); -+ -+ tmp = gen_rtx_PLUS (DImode, rbp, GEN_INT (8)); -+ RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, tmp); -+ RTVEC_ELT (v, vi++) = gen_rtx_SET (rbp, gen_rtx_MEM (DImode, rbp)); -+ tmp = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode)); -+ RTVEC_ELT (v, vi++) = gen_rtx_CLOBBER (VOIDmode, tmp); -+ } -+ else -+ { -+ /* If no hard frame pointer, we set R10 to the SP restore value. */ -+ gcc_assert (!m->fs.fp_valid); -+ gcc_assert (m->fs.cfa_reg == stack_pointer_rtx); -+ gcc_assert (m->fs.sp_valid); -+ -+ r10 = gen_rtx_REG (DImode, R10_REG); -+ tmp = gen_rtx_PLUS (Pmode, rsi, GEN_INT (stub_ptr_offset)); -+ emit_insn (gen_rtx_SET (r10, tmp)); -+ -+ RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, r10); - } - } - -- if (dest == stack_pointer_rtx) -+ /* Generate frame load insns and restore notes. */ -+ for (i = 0; i < ncregs; ++i) - { -- HOST_WIDE_INT ooffset = m->fs.sp_offset; -- bool valid = m->fs.sp_valid; -- bool realigned = m->fs.sp_realigned; -+ const xlogue_layout::reginfo &r = xlogue.get_reginfo (i); -+ machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode; -+ rtx reg, frame_load; - -- if (src == hard_frame_pointer_rtx) -- { -- valid = m->fs.fp_valid; -- realigned = false; -- ooffset = m->fs.fp_offset; -- } -- else if (src == crtl->drap_reg) -+ reg = gen_rtx_REG (mode, r.regno); -+ frame_load = gen_frame_load (reg, rsi, r.offset); -+ -+ /* Save RSI frame load insn & note to add last. */ -+ if (r.regno == SI_REG) - { -- valid = m->fs.drap_valid; -- realigned = false; -- ooffset = 0; -+ gcc_assert (!rsi_frame_load); -+ rsi_frame_load = frame_load; -+ rsi_restore_offset = r.offset; - } - else - { -- /* Else there are two possibilities: SP itself, which we set -- up as the default above. Or EH_RETURN_STACKADJ_RTX, which is -- taken care of this by hand along the eh_return path. */ -- gcc_checking_assert (src == stack_pointer_rtx -- || offset == const0_rtx); -+ RTVEC_ELT (v, vi++) = frame_load; -+ ix86_add_cfa_restore_note (NULL, reg, r.offset); - } -- -- m->fs.sp_offset = ooffset - INTVAL (offset); -- m->fs.sp_valid = valid; -- m->fs.sp_realigned = realigned; - } -- return insn; --} -- --/* Find an available register to be used as dynamic realign argument -- pointer regsiter. Such a register will be written in prologue and -- used in begin of body, so it must not be -- 1. parameter passing register. -- 2. GOT pointer. -- We reuse static-chain register if it is available. Otherwise, we -- use DI for i386 and R13 for x86-64. We chose R13 since it has -- shorter encoding. -- -- Return: the regno of chosen register. */ - --static unsigned int --find_drap_reg (void) --{ -- tree decl = cfun->decl; -+ /* Add RSI frame load & restore note at the end. */ -+ gcc_assert (rsi_frame_load); -+ gcc_assert (rsi_restore_offset != (HOST_WIDE_INT)-1); -+ RTVEC_ELT (v, vi++) = rsi_frame_load; -+ ix86_add_cfa_restore_note (NULL, gen_rtx_REG (DImode, SI_REG), -+ rsi_restore_offset); - -- /* Always use callee-saved register if there are no caller-saved -- registers. */ -- if (TARGET_64BIT) -+ /* Finally, for tail-call w/o a hard frame pointer, set SP to R10. */ -+ if (!use_call && !frame_pointer_needed) - { -- /* Use R13 for nested function or function need static chain. -- Since function with tail call may use any caller-saved -- registers in epilogue, DRAP must not use caller-saved -- register in such case. */ -- if (DECL_STATIC_CHAIN (decl) -- || cfun->machine->no_caller_saved_registers -- || crtl->tail_call_emit) -- return R13_REG; -+ gcc_assert (m->fs.sp_valid); -+ gcc_assert (!m->fs.sp_realigned); - -- return R10_REG; -+ /* At this point, R10 should point to frame.stack_realign_offset. */ -+ if (m->fs.cfa_reg == stack_pointer_rtx) -+ m->fs.cfa_offset += m->fs.sp_offset - frame.stack_realign_offset; -+ m->fs.sp_offset = frame.stack_realign_offset; - } -+ -+ gcc_assert (vi == (unsigned int)GET_NUM_ELEM (v)); -+ tmp = gen_rtx_PARALLEL (VOIDmode, v); -+ if (use_call) -+ insn = emit_insn (tmp); - else - { -- /* Use DI for nested function or function need static chain. -- Since function with tail call may use any caller-saved -- registers in epilogue, DRAP must not use caller-saved -- register in such case. */ -- if (DECL_STATIC_CHAIN (decl) -- || cfun->machine->no_caller_saved_registers -- || crtl->tail_call_emit) -- return DI_REG; -+ insn = emit_jump_insn (tmp); -+ JUMP_LABEL (insn) = ret_rtx; - -- /* Reuse static chain register if it isn't used for parameter -- passing. */ -- if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2) -+ if (frame_pointer_needed) -+ ix86_emit_leave (insn); -+ else - { -- unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl)); -- if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0) -- return CX_REG; -+ /* Need CFA adjust note. */ -+ tmp = gen_rtx_SET (stack_pointer_rtx, r10); -+ add_reg_note (insn, REG_CFA_ADJUST_CFA, tmp); - } -- return DI_REG; - } --} - --/* Handle a "force_align_arg_pointer" attribute. */ -+ RTX_FRAME_RELATED_P (insn) = true; -+ ix86_add_queued_cfa_restore_notes (insn); - --static tree --ix86_handle_force_align_arg_pointer_attribute (tree *node, tree name, -- tree, int, bool *no_add_attrs) --{ -- if (TREE_CODE (*node) != FUNCTION_TYPE -- && TREE_CODE (*node) != METHOD_TYPE -- && TREE_CODE (*node) != FIELD_DECL -- && TREE_CODE (*node) != TYPE_DECL) -+ /* If we're not doing a tail-call, we need to adjust the stack. */ -+ if (use_call && m->fs.sp_valid) - { -- warning (OPT_Wattributes, "%qE attribute only applies to functions", -- name); -- *no_add_attrs = true; -+ HOST_WIDE_INT dealloc = m->fs.sp_offset - frame.stack_realign_offset; -+ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, -+ GEN_INT (dealloc), style, -+ m->fs.cfa_reg == stack_pointer_rtx); - } -- -- return NULL_TREE; - } - --/* Return minimum incoming stack alignment. */ -+/* Restore function stack, frame, and registers. */ - --static unsigned int --ix86_minimum_incoming_stack_boundary (bool sibcall) -+void -+ix86_expand_epilogue (int style) - { -- unsigned int incoming_stack_boundary; -- -- /* Stack of interrupt handler is aligned to 128 bits in 64bit mode. */ -- if (cfun->machine->func_type != TYPE_NORMAL) -- incoming_stack_boundary = TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY; -- /* Prefer the one specified at command line. */ -- else if (ix86_user_incoming_stack_boundary) -- incoming_stack_boundary = ix86_user_incoming_stack_boundary; -- /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary -- if -mstackrealign is used, it isn't used for sibcall check and -- estimated stack alignment is 128bit. */ -- else if (!sibcall -- && ix86_force_align_arg_pointer -- && crtl->stack_alignment_estimated == 128) -- incoming_stack_boundary = MIN_STACK_BOUNDARY; -- else -- incoming_stack_boundary = ix86_default_incoming_stack_boundary; -+ struct machine_function *m = cfun->machine; -+ struct machine_frame_state frame_state_save = m->fs; -+ bool restore_regs_via_mov; -+ bool using_drap; -+ bool restore_stub_is_tail = false; - -- /* Incoming stack alignment can be changed on individual functions -- via force_align_arg_pointer attribute. We use the smallest -- incoming stack boundary. */ -- if (incoming_stack_boundary > MIN_STACK_BOUNDARY -- && lookup_attribute (ix86_force_align_arg_pointer_string, -- TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))) -- incoming_stack_boundary = MIN_STACK_BOUNDARY; -+ if (ix86_function_naked (current_function_decl)) -+ { -+ /* The program should not reach this point. */ -+ emit_insn (gen_ud2 ()); -+ return; -+ } - -- /* The incoming stack frame has to be aligned at least at -- parm_stack_boundary. */ -- if (incoming_stack_boundary < crtl->parm_stack_boundary) -- incoming_stack_boundary = crtl->parm_stack_boundary; -+ ix86_finalize_stack_frame_flags (); -+ const struct ix86_frame &frame = cfun->machine->frame; - -- /* Stack at entrance of main is aligned by runtime. We use the -- smallest incoming stack boundary. */ -- if (incoming_stack_boundary > MAIN_STACK_BOUNDARY -- && DECL_NAME (current_function_decl) -- && MAIN_NAME_P (DECL_NAME (current_function_decl)) -- && DECL_FILE_SCOPE_P (current_function_decl)) -- incoming_stack_boundary = MAIN_STACK_BOUNDARY; -+ m->fs.sp_realigned = stack_realign_fp; -+ m->fs.sp_valid = stack_realign_fp -+ || !frame_pointer_needed -+ || crtl->sp_is_unchanging; -+ gcc_assert (!m->fs.sp_valid -+ || m->fs.sp_offset == frame.stack_pointer_offset); - -- return incoming_stack_boundary; --} -+ /* The FP must be valid if the frame pointer is present. */ -+ gcc_assert (frame_pointer_needed == m->fs.fp_valid); -+ gcc_assert (!m->fs.fp_valid -+ || m->fs.fp_offset == frame.hard_frame_pointer_offset); - --/* Update incoming stack boundary and estimated stack alignment. */ -+ /* We must have *some* valid pointer to the stack frame. */ -+ gcc_assert (m->fs.sp_valid || m->fs.fp_valid); - --static void --ix86_update_stack_boundary (void) --{ -- ix86_incoming_stack_boundary -- = ix86_minimum_incoming_stack_boundary (false); -+ /* The DRAP is never valid at this point. */ -+ gcc_assert (!m->fs.drap_valid); - -- /* x86_64 vararg needs 16byte stack alignment for register save area. */ -- if (TARGET_64BIT -- && cfun->stdarg -- && crtl->stack_alignment_estimated < 128) -- crtl->stack_alignment_estimated = 128; -+ /* See the comment about red zone and frame -+ pointer usage in ix86_expand_prologue. */ -+ if (frame_pointer_needed && frame.red_zone_size) -+ emit_insn (gen_memory_blockage ()); - -- /* __tls_get_addr needs to be called with 16-byte aligned stack. */ -- if (ix86_tls_descriptor_calls_expanded_in_cfun -- && crtl->preferred_stack_boundary < 128) -- crtl->preferred_stack_boundary = 128; --} -+ using_drap = crtl->drap_reg && crtl->stack_realign_needed; -+ gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg); - --/* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is -- needed or an rtx for DRAP otherwise. */ -+ /* Determine the CFA offset of the end of the red-zone. */ -+ m->fs.red_zone_offset = 0; -+ if (ix86_using_red_zone () && crtl->args.pops_args < 65536) -+ { -+ /* The red-zone begins below return address and error code in -+ exception handler. */ -+ m->fs.red_zone_offset = RED_ZONE_SIZE + INCOMING_FRAME_SP_OFFSET; - --static rtx --ix86_get_drap_rtx (void) --{ -- /* We must use DRAP if there are outgoing arguments on stack and -- ACCUMULATE_OUTGOING_ARGS is false. */ -- if (ix86_force_drap -- || (cfun->machine->outgoing_args_on_stack -- && !ACCUMULATE_OUTGOING_ARGS)) -- crtl->need_drap = true; -+ /* When the register save area is in the aligned portion of -+ the stack, determine the maximum runtime displacement that -+ matches up with the aligned frame. */ -+ if (stack_realign_drap) -+ m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT -+ + UNITS_PER_WORD); -+ } - -- if (stack_realign_drap) -- { -- /* Assign DRAP to vDRAP and returns vDRAP */ -- unsigned int regno = find_drap_reg (); -- rtx drap_vreg; -- rtx arg_ptr; -- rtx_insn *seq, *insn; -+ HOST_WIDE_INT reg_save_offset = frame.reg_save_offset; - -- arg_ptr = gen_rtx_REG (Pmode, regno); -- crtl->drap_reg = arg_ptr; -+ /* Special care must be taken for the normal return case of a function -+ using eh_return: the eax and edx registers are marked as saved, but -+ not restored along this path. Adjust the save location to match. */ -+ if (crtl->calls_eh_return && style != 2) -+ reg_save_offset -= 2 * UNITS_PER_WORD; - -- start_sequence (); -- drap_vreg = copy_to_reg (arg_ptr); -- seq = get_insns (); -- end_sequence (); -+ /* EH_RETURN requires the use of moves to function properly. */ -+ if (crtl->calls_eh_return) -+ restore_regs_via_mov = true; -+ /* SEH requires the use of pops to identify the epilogue. */ -+ else if (TARGET_SEH) -+ restore_regs_via_mov = false; -+ /* If we're only restoring one register and sp cannot be used then -+ using a move instruction to restore the register since it's -+ less work than reloading sp and popping the register. */ -+ else if (!sp_valid_at (frame.hfp_save_offset) && frame.nregs <= 1) -+ restore_regs_via_mov = true; -+ else if (TARGET_EPILOGUE_USING_MOVE -+ && cfun->machine->use_fast_prologue_epilogue -+ && (frame.nregs > 1 -+ || m->fs.sp_offset != reg_save_offset)) -+ restore_regs_via_mov = true; -+ else if (frame_pointer_needed -+ && !frame.nregs -+ && m->fs.sp_offset != reg_save_offset) -+ restore_regs_via_mov = true; -+ else if (frame_pointer_needed -+ && TARGET_USE_LEAVE -+ && cfun->machine->use_fast_prologue_epilogue -+ && frame.nregs == 1) -+ restore_regs_via_mov = true; -+ else -+ restore_regs_via_mov = false; - -- insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ())); -- if (!optimize) -+ if (restore_regs_via_mov || frame.nsseregs) -+ { -+ /* Ensure that the entire register save area is addressable via -+ the stack pointer, if we will restore SSE regs via sp. */ -+ if (TARGET_64BIT -+ && m->fs.sp_offset > 0x7fffffff -+ && sp_valid_at (frame.stack_realign_offset + 1) -+ && (frame.nsseregs + frame.nregs) != 0) - { -- add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg); -- RTX_FRAME_RELATED_P (insn) = 1; -+ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, -+ GEN_INT (m->fs.sp_offset -+ - frame.sse_reg_save_offset), -+ style, -+ m->fs.cfa_reg == stack_pointer_rtx); - } -- return drap_vreg; - } -- else -- return NULL; --} -- --/* Handle the TARGET_INTERNAL_ARG_POINTER hook. */ - --static rtx --ix86_internal_arg_pointer (void) --{ -- return virtual_incoming_args_rtx; --} -+ /* If there are any SSE registers to restore, then we have to do it -+ via moves, since there's obviously no pop for SSE regs. */ -+ if (frame.nsseregs) -+ ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset, -+ style == 2); - --struct scratch_reg { -- rtx reg; -- bool saved; --}; -+ if (m->call_ms2sysv) -+ { -+ int pop_incoming_args = crtl->args.pops_args && crtl->args.size; - --/* Return a short-lived scratch register for use on function entry. -- In 32-bit mode, it is valid only after the registers are saved -- in the prologue. This register must be released by means of -- release_scratch_register_on_entry once it is dead. */ -+ /* We cannot use a tail-call for the stub if: -+ 1. We have to pop incoming args, -+ 2. We have additional int regs to restore, or -+ 3. A sibling call will be the tail-call, or -+ 4. We are emitting an eh_return_internal epilogue. - --static void --get_scratch_register_on_entry (struct scratch_reg *sr) --{ -- int regno; -+ TODO: Item 4 has not yet tested! - -- sr->saved = false; -+ If any of the above are true, we will call the stub rather than -+ jump to it. */ -+ restore_stub_is_tail = !(pop_incoming_args || frame.nregs || style != 1); -+ ix86_emit_outlined_ms2sysv_restore (frame, !restore_stub_is_tail, style); -+ } - -- if (TARGET_64BIT) -+ /* If using out-of-line stub that is a tail-call, then...*/ -+ if (m->call_ms2sysv && restore_stub_is_tail) - { -- /* We always use R11 in 64-bit mode. */ -- regno = R11_REG; -+ /* TODO: parinoid tests. (remove eventually) */ -+ gcc_assert (m->fs.sp_valid); -+ gcc_assert (!m->fs.sp_realigned); -+ gcc_assert (!m->fs.fp_valid); -+ gcc_assert (!m->fs.realigned); -+ gcc_assert (m->fs.sp_offset == UNITS_PER_WORD); -+ gcc_assert (!crtl->drap_reg); -+ gcc_assert (!frame.nregs); - } -- else -+ else if (restore_regs_via_mov) - { -- tree decl = current_function_decl, fntype = TREE_TYPE (decl); -- bool fastcall_p -- = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE; -- bool thiscall_p -- = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE; -- bool static_chain_p = DECL_STATIC_CHAIN (decl); -- int regparm = ix86_function_regparm (fntype, decl); -- int drap_regno -- = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM; -+ rtx t; - -- /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax -- for the static chain register. */ -- if ((regparm < 1 || (fastcall_p && !static_chain_p)) -- && drap_regno != AX_REG) -- regno = AX_REG; -- /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx -- for the static chain register. */ -- else if (thiscall_p && !static_chain_p && drap_regno != AX_REG) -- regno = AX_REG; -- else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG) -- regno = DX_REG; -- /* ecx is the static chain register. */ -- else if (regparm < 3 && !fastcall_p && !thiscall_p -- && !static_chain_p -- && drap_regno != CX_REG) -- regno = CX_REG; -- else if (ix86_save_reg (BX_REG, true, false)) -- regno = BX_REG; -- /* esi is the static chain register. */ -- else if (!(regparm == 3 && static_chain_p) -- && ix86_save_reg (SI_REG, true, false)) -- regno = SI_REG; -- else if (ix86_save_reg (DI_REG, true, false)) -- regno = DI_REG; -- else -+ if (frame.nregs) -+ ix86_emit_restore_regs_using_mov (reg_save_offset, style == 2); -+ -+ /* eh_return epilogues need %ecx added to the stack pointer. */ -+ if (style == 2) - { -- regno = (drap_regno == AX_REG ? DX_REG : AX_REG); -- sr->saved = true; -- } -- } -+ rtx sa = EH_RETURN_STACKADJ_RTX; -+ rtx_insn *insn; - -- sr->reg = gen_rtx_REG (Pmode, regno); -- if (sr->saved) -- { -- rtx_insn *insn = emit_insn (gen_push (sr->reg)); -- RTX_FRAME_RELATED_P (insn) = 1; -- } --} -+ /* %ecx can't be used for both DRAP register and eh_return. */ -+ if (crtl->drap_reg) -+ gcc_assert (REGNO (crtl->drap_reg) != CX_REG); - --/* Release a scratch register obtained from the preceding function. -+ /* regparm nested functions don't work with eh_return. */ -+ gcc_assert (!ix86_static_chain_on_stack); - -- If RELEASE_VIA_POP is true, we just pop the register off the stack -- to release it. This is what non-Linux systems use with -fstack-check. -+ if (frame_pointer_needed) -+ { -+ t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa); -+ t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD); -+ emit_insn (gen_rtx_SET (sa, t)); - -- Otherwise we use OFFSET to locate the saved register and the -- allocated stack space becomes part of the local frame and is -- deallocated by the epilogue. */ -+ t = gen_frame_mem (Pmode, hard_frame_pointer_rtx); -+ insn = emit_move_insn (hard_frame_pointer_rtx, t); - --static void --release_scratch_register_on_entry (struct scratch_reg *sr, HOST_WIDE_INT offset, -- bool release_via_pop) --{ -- if (sr->saved) -+ /* Note that we use SA as a temporary CFA, as the return -+ address is at the proper place relative to it. We -+ pretend this happens at the FP restore insn because -+ prior to this insn the FP would be stored at the wrong -+ offset relative to SA, and after this insn we have no -+ other reasonable register to use for the CFA. We don't -+ bother resetting the CFA to the SP for the duration of -+ the return insn, unless the control flow instrumentation -+ is done. In this case the SP is used later and we have -+ to reset CFA to SP. */ -+ add_reg_note (insn, REG_CFA_DEF_CFA, -+ plus_constant (Pmode, sa, UNITS_PER_WORD)); -+ ix86_add_queued_cfa_restore_notes (insn); -+ add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx); -+ RTX_FRAME_RELATED_P (insn) = 1; -+ -+ m->fs.cfa_reg = sa; -+ m->fs.cfa_offset = UNITS_PER_WORD; -+ m->fs.fp_valid = false; -+ -+ pro_epilogue_adjust_stack (stack_pointer_rtx, sa, -+ const0_rtx, style, -+ flag_cf_protection); -+ } -+ else -+ { -+ t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa); -+ t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD); -+ insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t)); -+ ix86_add_queued_cfa_restore_notes (insn); -+ -+ gcc_assert (m->fs.cfa_reg == stack_pointer_rtx); -+ if (m->fs.cfa_offset != UNITS_PER_WORD) -+ { -+ m->fs.cfa_offset = UNITS_PER_WORD; -+ add_reg_note (insn, REG_CFA_DEF_CFA, -+ plus_constant (Pmode, stack_pointer_rtx, -+ UNITS_PER_WORD)); -+ RTX_FRAME_RELATED_P (insn) = 1; -+ } -+ } -+ m->fs.sp_offset = UNITS_PER_WORD; -+ m->fs.sp_valid = true; -+ m->fs.sp_realigned = false; -+ } -+ } -+ else - { -- if (release_via_pop) -+ /* SEH requires that the function end with (1) a stack adjustment -+ if necessary, (2) a sequence of pops, and (3) a return or -+ jump instruction. Prevent insns from the function body from -+ being scheduled into this sequence. */ -+ if (TARGET_SEH) - { -- struct machine_function *m = cfun->machine; -- rtx x, insn = emit_insn (gen_pop (sr->reg)); -+ /* Prevent a catch region from being adjacent to the standard -+ epilogue sequence. Unfortunately neither crtl->uses_eh_lsda -+ nor several other flags that would be interesting to test are -+ set up yet. */ -+ if (flag_non_call_exceptions) -+ emit_insn (gen_nops (const1_rtx)); -+ else -+ emit_insn (gen_blockage ()); -+ } - -- /* The RX FRAME_RELATED_P mechanism doesn't know about pop. */ -- RTX_FRAME_RELATED_P (insn) = 1; -- x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD)); -- x = gen_rtx_SET (stack_pointer_rtx, x); -- add_reg_note (insn, REG_FRAME_RELATED_EXPR, x); -- m->fs.sp_offset -= UNITS_PER_WORD; -+ /* First step is to deallocate the stack frame so that we can -+ pop the registers. If the stack pointer was realigned, it needs -+ to be restored now. Also do it on SEH target for very large -+ frame as the emitted instructions aren't allowed by the ABI -+ in epilogues. */ -+ if (!m->fs.sp_valid || m->fs.sp_realigned -+ || (TARGET_SEH -+ && (m->fs.sp_offset - reg_save_offset -+ >= SEH_MAX_FRAME_SIZE))) -+ { -+ pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx, -+ GEN_INT (m->fs.fp_offset -+ - reg_save_offset), -+ style, false); - } -- else -+ else if (m->fs.sp_offset != reg_save_offset) - { -- rtx x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (offset)); -- x = gen_rtx_SET (sr->reg, gen_rtx_MEM (word_mode, x)); -- emit_insn (x); -+ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, -+ GEN_INT (m->fs.sp_offset -+ - reg_save_offset), -+ style, -+ m->fs.cfa_reg == stack_pointer_rtx); - } -+ -+ ix86_emit_restore_regs_using_pop (); - } --} - --/* Emit code to adjust the stack pointer by SIZE bytes while probing it. -+ /* If we used a stack pointer and haven't already got rid of it, -+ then do so now. */ -+ if (m->fs.fp_valid) -+ { -+ /* If the stack pointer is valid and pointing at the frame -+ pointer store address, then we only need a pop. */ -+ if (sp_valid_at (frame.hfp_save_offset) -+ && m->fs.sp_offset == frame.hfp_save_offset) -+ ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx); -+ /* Leave results in shorter dependency chains on CPUs that are -+ able to grok it fast. */ -+ else if (TARGET_USE_LEAVE -+ || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun)) -+ || !cfun->machine->use_fast_prologue_epilogue) -+ ix86_emit_leave (NULL); -+ else -+ { -+ pro_epilogue_adjust_stack (stack_pointer_rtx, -+ hard_frame_pointer_rtx, -+ const0_rtx, style, !using_drap); -+ ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx); -+ } -+ } - -- This differs from the next routine in that it tries hard to prevent -- attacks that jump the stack guard. Thus it is never allowed to allocate -- more than PROBE_INTERVAL bytes of stack space without a suitable -- probe. -+ if (using_drap) -+ { -+ int param_ptr_offset = UNITS_PER_WORD; -+ rtx_insn *insn; - -- INT_REGISTERS_SAVED is true if integer registers have already been -- pushed on the stack. */ -+ gcc_assert (stack_realign_drap); - --static void --ix86_adjust_stack_and_probe_stack_clash (HOST_WIDE_INT size, -- const bool int_registers_saved) --{ -- struct machine_function *m = cfun->machine; -+ if (ix86_static_chain_on_stack) -+ param_ptr_offset += UNITS_PER_WORD; -+ if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg))) -+ param_ptr_offset += UNITS_PER_WORD; - -- /* If this function does not statically allocate stack space, then -- no probes are needed. */ -- if (!size) -+ insn = emit_insn (gen_rtx_SET -+ (stack_pointer_rtx, -+ gen_rtx_PLUS (Pmode, -+ crtl->drap_reg, -+ GEN_INT (-param_ptr_offset)))); -+ m->fs.cfa_reg = stack_pointer_rtx; -+ m->fs.cfa_offset = param_ptr_offset; -+ m->fs.sp_offset = param_ptr_offset; -+ m->fs.realigned = false; -+ -+ add_reg_note (insn, REG_CFA_DEF_CFA, -+ gen_rtx_PLUS (Pmode, stack_pointer_rtx, -+ GEN_INT (param_ptr_offset))); -+ RTX_FRAME_RELATED_P (insn) = 1; -+ -+ if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg))) -+ ix86_emit_restore_reg_using_pop (crtl->drap_reg); -+ } -+ -+ /* At this point the stack pointer must be valid, and we must have -+ restored all of the registers. We may not have deallocated the -+ entire stack frame. We've delayed this until now because it may -+ be possible to merge the local stack deallocation with the -+ deallocation forced by ix86_static_chain_on_stack. */ -+ gcc_assert (m->fs.sp_valid); -+ gcc_assert (!m->fs.sp_realigned); -+ gcc_assert (!m->fs.fp_valid); -+ gcc_assert (!m->fs.realigned); -+ if (m->fs.sp_offset != UNITS_PER_WORD) - { -- /* However, the allocation of space via pushes for register -- saves could be viewed as allocating space, but without the -- need to probe. */ -- if (m->frame.nregs || m->frame.nsseregs || frame_pointer_needed) -- dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true); -- else -- dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false); -- return; -+ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, -+ GEN_INT (m->fs.sp_offset - UNITS_PER_WORD), -+ style, true); - } -+ else -+ ix86_add_queued_cfa_restore_notes (get_last_insn ()); - -- /* If we are a noreturn function, then we have to consider the -- possibility that we're called via a jump rather than a call. -+ /* Sibcall epilogues don't want a return instruction. */ -+ if (style == 0) -+ { -+ m->fs = frame_state_save; -+ return; -+ } - -- Thus we don't have the implicit probe generated by saving the -- return address into the stack at the call. Thus, the stack -- pointer could be anywhere in the guard page. The safe thing -- to do is emit a probe now. -+ if (cfun->machine->func_type != TYPE_NORMAL) -+ emit_jump_insn (gen_interrupt_return ()); -+ else if (crtl->args.pops_args && crtl->args.size) -+ { -+ rtx popc = GEN_INT (crtl->args.pops_args); - -- The probe can be avoided if we have already emitted any callee -- register saves into the stack or have a frame pointer (which will -- have been saved as well). Those saves will function as implicit -- probes. -+ /* i386 can only pop 64K bytes. If asked to pop more, pop return -+ address, do explicit add, and jump indirectly to the caller. */ - -- ?!? This should be revamped to work like aarch64 and s390 where -- we track the offset from the most recent probe. Normally that -- offset would be zero. For a noreturn function we would reset -- it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT). Then -- we just probe when we cross PROBE_INTERVAL. */ -- if (TREE_THIS_VOLATILE (cfun->decl) -- && !(m->frame.nregs || m->frame.nsseregs || frame_pointer_needed)) -- { -- /* We can safely use any register here since we're just going to push -- its value and immediately pop it back. But we do try and avoid -- argument passing registers so as not to introduce dependencies in -- the pipeline. For 32 bit we use %esi and for 64 bit we use %rax. */ -- rtx dummy_reg = gen_rtx_REG (word_mode, TARGET_64BIT ? AX_REG : SI_REG); -- rtx_insn *insn_push = emit_insn (gen_push (dummy_reg)); -- rtx_insn *insn_pop = emit_insn (gen_pop (dummy_reg)); -- m->fs.sp_offset -= UNITS_PER_WORD; -- if (m->fs.cfa_reg == stack_pointer_rtx) -+ if (crtl->args.pops_args >= 65536) - { -- m->fs.cfa_offset -= UNITS_PER_WORD; -- rtx x = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD); -- x = gen_rtx_SET (stack_pointer_rtx, x); -- add_reg_note (insn_push, REG_CFA_ADJUST_CFA, x); -- RTX_FRAME_RELATED_P (insn_push) = 1; -- x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); -- x = gen_rtx_SET (stack_pointer_rtx, x); -- add_reg_note (insn_pop, REG_CFA_ADJUST_CFA, x); -- RTX_FRAME_RELATED_P (insn_pop) = 1; -- } -- emit_insn (gen_blockage ()); -- } -+ rtx ecx = gen_rtx_REG (SImode, CX_REG); -+ rtx_insn *insn; - -- /* If we allocate less than the size of the guard statically, -- then no probing is necessary, but we do need to allocate -- the stack. */ -- if (size < (1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE))) -- { -- pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, -- GEN_INT (-size), -1, -- m->fs.cfa_reg == stack_pointer_rtx); -- dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true); -- return; -- } -+ /* There is no "pascal" calling convention in any 64bit ABI. */ -+ gcc_assert (!TARGET_64BIT); - -- /* We're allocating a large enough stack frame that we need to -- emit probes. Either emit them inline or in a loop depending -- on the size. */ -- HOST_WIDE_INT probe_interval = get_probe_interval (); -- if (size <= 4 * probe_interval) -- { -- HOST_WIDE_INT i; -- for (i = probe_interval; i <= size; i += probe_interval) -- { -- /* Allocate PROBE_INTERVAL bytes. */ -- rtx insn -- = pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, -- GEN_INT (-probe_interval), -1, -- m->fs.cfa_reg == stack_pointer_rtx); -- add_reg_note (insn, REG_STACK_CHECK, const0_rtx); -+ insn = emit_insn (gen_pop (ecx)); -+ m->fs.cfa_offset -= UNITS_PER_WORD; -+ m->fs.sp_offset -= UNITS_PER_WORD; - -- /* And probe at *sp. */ -- emit_stack_probe (stack_pointer_rtx); -- emit_insn (gen_blockage ()); -- } -+ rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); -+ x = gen_rtx_SET (stack_pointer_rtx, x); -+ add_reg_note (insn, REG_CFA_ADJUST_CFA, x); -+ add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx)); -+ RTX_FRAME_RELATED_P (insn) = 1; - -- /* We need to allocate space for the residual, but we do not need -- to probe the residual. */ -- HOST_WIDE_INT residual = (i - probe_interval - size); -- if (residual) -- pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, -- GEN_INT (residual), -1, -- m->fs.cfa_reg == stack_pointer_rtx); -- dump_stack_clash_frame_info (PROBE_INLINE, residual != 0); -+ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, -+ popc, -1, true); -+ emit_jump_insn (gen_simple_return_indirect_internal (ecx)); -+ } -+ else -+ emit_jump_insn (gen_simple_return_pop_internal (popc)); - } -- else -+ else if (!m->call_ms2sysv || !restore_stub_is_tail) - { -- /* We expect the GP registers to be saved when probes are used -- as the probing sequences might need a scratch register and -- the routine to allocate one assumes the integer registers -- have already been saved. */ -- gcc_assert (int_registers_saved); -- -- struct scratch_reg sr; -- get_scratch_register_on_entry (&sr); -- -- /* If we needed to save a register, then account for any space -- that was pushed (we are not going to pop the register when -- we do the restore). */ -- if (sr.saved) -- size -= UNITS_PER_WORD; -+ /* In case of return from EH a simple return cannot be used -+ as a return address will be compared with a shadow stack -+ return address. Use indirect jump instead. */ -+ if (style == 2 && flag_cf_protection) -+ { -+ /* Register used in indirect jump must be in word_mode. But -+ Pmode may not be the same as word_mode for x32. */ -+ rtx ecx = gen_rtx_REG (word_mode, CX_REG); -+ rtx_insn *insn; - -- /* Step 1: round SIZE down to a multiple of the interval. */ -- HOST_WIDE_INT rounded_size = size & -probe_interval; -+ insn = emit_insn (gen_pop (ecx)); -+ m->fs.cfa_offset -= UNITS_PER_WORD; -+ m->fs.sp_offset -= UNITS_PER_WORD; - -- /* Step 2: compute final value of the loop counter. Use lea if -- possible. */ -- rtx addr = plus_constant (Pmode, stack_pointer_rtx, -rounded_size); -- rtx insn; -- if (address_no_seg_operand (addr, Pmode)) -- insn = emit_insn (gen_rtx_SET (sr.reg, addr)); -- else -- { -- emit_move_insn (sr.reg, GEN_INT (-rounded_size)); -- insn = emit_insn (gen_rtx_SET (sr.reg, -- gen_rtx_PLUS (Pmode, sr.reg, -- stack_pointer_rtx))); -- } -- if (m->fs.cfa_reg == stack_pointer_rtx) -- { -- add_reg_note (insn, REG_CFA_DEF_CFA, -- plus_constant (Pmode, sr.reg, -- m->fs.cfa_offset + rounded_size)); -+ rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); -+ x = gen_rtx_SET (stack_pointer_rtx, x); -+ add_reg_note (insn, REG_CFA_ADJUST_CFA, x); -+ add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx)); - RTX_FRAME_RELATED_P (insn) = 1; -- } - -- /* Step 3: the loop. */ -- rtx size_rtx = GEN_INT (rounded_size); -- insn = emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, -- size_rtx)); -- if (m->fs.cfa_reg == stack_pointer_rtx) -- { -- m->fs.cfa_offset += rounded_size; -- add_reg_note (insn, REG_CFA_DEF_CFA, -- plus_constant (Pmode, stack_pointer_rtx, -- m->fs.cfa_offset)); -- RTX_FRAME_RELATED_P (insn) = 1; -+ emit_jump_insn (gen_simple_return_indirect_internal (ecx)); - } -- m->fs.sp_offset += rounded_size; -- emit_insn (gen_blockage ()); -- -- /* Step 4: adjust SP if we cannot assert at compile-time that SIZE -- is equal to ROUNDED_SIZE. */ -- -- if (size != rounded_size) -- pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, -- GEN_INT (rounded_size - size), -1, -- m->fs.cfa_reg == stack_pointer_rtx); -- dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size); -- -- /* This does not deallocate the space reserved for the scratch -- register. That will be deallocated in the epilogue. */ -- release_scratch_register_on_entry (&sr, size, false); -+ else -+ emit_jump_insn (gen_simple_return_internal ()); - } - -- /* Make sure nothing is scheduled before we are done. */ -- emit_insn (gen_blockage ()); -+ /* Restore the state back to the state from the prologue, -+ so that it's correct for the next epilogue. */ -+ m->fs = frame_state_save; - } - --/* Emit code to adjust the stack pointer by SIZE bytes while probing it. -- -- INT_REGISTERS_SAVED is true if integer registers have already been -- pushed on the stack. */ -+/* Reset from the function's potential modifications. */ - - static void --ix86_adjust_stack_and_probe (HOST_WIDE_INT size, -- const bool int_registers_saved) -+ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED) - { -- /* We skip the probe for the first interval + a small dope of 4 words and -- probe that many bytes past the specified size to maintain a protection -- area at the botton of the stack. */ -- const int dope = 4 * UNITS_PER_WORD; -- rtx size_rtx = GEN_INT (size), last; -+ if (pic_offset_table_rtx -+ && !ix86_use_pseudo_pic_reg ()) -+ SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM); - -- /* See if we have a constant small number of probes to generate. If so, -- that's the easy case. The run-time loop is made up of 9 insns in the -- generic case while the compile-time loop is made up of 3+2*(n-1) insns -- for n # of intervals. */ -- if (size <= 4 * get_probe_interval ()) -+ if (TARGET_MACHO) - { -- HOST_WIDE_INT i, adjust; -- bool first_probe = true; -+ rtx_insn *insn = get_last_insn (); -+ rtx_insn *deleted_debug_label = NULL; - -- /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for -- values of N from 1 until it exceeds SIZE. If only one probe is -- needed, this will not generate any code. Then adjust and probe -- to PROBE_INTERVAL + SIZE. */ -- for (i = get_probe_interval (); i < size; i += get_probe_interval ()) -+ /* Mach-O doesn't support labels at the end of objects, so if -+ it looks like we might want one, take special action. -+ First, collect any sequence of deleted debug labels. */ -+ while (insn -+ && NOTE_P (insn) -+ && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL) - { -- if (first_probe) -- { -- adjust = 2 * get_probe_interval () + dope; -- first_probe = false; -- } -- else -- adjust = get_probe_interval (); -- -- emit_insn (gen_rtx_SET (stack_pointer_rtx, -- plus_constant (Pmode, stack_pointer_rtx, -- -adjust))); -- emit_stack_probe (stack_pointer_rtx); -+ /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL -+ notes only, instead set their CODE_LABEL_NUMBER to -1, -+ otherwise there would be code generation differences -+ in between -g and -g0. */ -+ if (NOTE_P (insn) && NOTE_KIND (insn) -+ == NOTE_INSN_DELETED_DEBUG_LABEL) -+ deleted_debug_label = insn; -+ insn = PREV_INSN (insn); - } - -- if (first_probe) -- adjust = size + get_probe_interval () + dope; -- else -- adjust = size + get_probe_interval () - i; -- -- emit_insn (gen_rtx_SET (stack_pointer_rtx, -- plus_constant (Pmode, stack_pointer_rtx, -- -adjust))); -- emit_stack_probe (stack_pointer_rtx); -+ /* If we have: -+ label: -+ barrier -+ then this needs to be detected, so skip past the barrier. */ - -- /* Adjust back to account for the additional first interval. */ -- last = emit_insn (gen_rtx_SET (stack_pointer_rtx, -- plus_constant (Pmode, stack_pointer_rtx, -- (get_probe_interval () -- + dope)))); -- } -+ if (insn && BARRIER_P (insn)) -+ insn = PREV_INSN (insn); - -- /* Otherwise, do the same as above, but in a loop. Note that we must be -- extra careful with variables wrapping around because we might be at -- the very top (or the very bottom) of the address space and we have -- to be able to handle this case properly; in particular, we use an -- equality test for the loop condition. */ -- else -- { -- /* We expect the GP registers to be saved when probes are used -- as the probing sequences might need a scratch register and -- the routine to allocate one assumes the integer registers -- have already been saved. */ -- gcc_assert (int_registers_saved); -- -- HOST_WIDE_INT rounded_size; -- struct scratch_reg sr; -- -- get_scratch_register_on_entry (&sr); -- -- /* If we needed to save a register, then account for any space -- that was pushed (we are not going to pop the register when -- we do the restore). */ -- if (sr.saved) -- size -= UNITS_PER_WORD; -- -- /* Step 1: round SIZE to the previous multiple of the interval. */ -- -- rounded_size = ROUND_DOWN (size, get_probe_interval ()); -+ /* Up to now we've only seen notes or barriers. */ -+ if (insn) -+ { -+ if (LABEL_P (insn) -+ || (NOTE_P (insn) -+ && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)) -+ /* Trailing label. */ -+ fputs ("\tnop\n", file); -+ else if (cfun && ! cfun->is_thunk) -+ { -+ /* See if we have a completely empty function body, skipping -+ the special case of the picbase thunk emitted as asm. */ -+ while (insn && ! INSN_P (insn)) -+ insn = PREV_INSN (insn); -+ /* If we don't find any insns, we've got an empty function body; -+ I.e. completely empty - without a return or branch. This is -+ taken as the case where a function body has been removed -+ because it contains an inline __builtin_unreachable(). GCC -+ declares that reaching __builtin_unreachable() means UB so -+ we're not obliged to do anything special; however, we want -+ non-zero-sized function bodies. To meet this, and help the -+ user out, let's trap the case. */ -+ if (insn == NULL) -+ fputs ("\tud2\n", file); -+ } -+ } -+ else if (deleted_debug_label) -+ for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn)) -+ if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL) -+ CODE_LABEL_NUMBER (insn) = -1; -+ } -+} - -+/* Return a scratch register to use in the split stack prologue. The -+ split stack prologue is used for -fsplit-stack. It is the first -+ instructions in the function, even before the regular prologue. -+ The scratch register can be any caller-saved register which is not -+ used for parameters or for the static chain. */ - -- /* Step 2: compute initial and final value of the loop counter. */ -+static unsigned int -+split_stack_prologue_scratch_regno (void) -+{ -+ if (TARGET_64BIT) -+ return R11_REG; -+ else -+ { -+ bool is_fastcall, is_thiscall; -+ int regparm; - -- /* SP = SP_0 + PROBE_INTERVAL. */ -- emit_insn (gen_rtx_SET (stack_pointer_rtx, -- plus_constant (Pmode, stack_pointer_rtx, -- - (get_probe_interval () + dope)))); -+ is_fastcall = (lookup_attribute ("fastcall", -+ TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))) -+ != NULL); -+ is_thiscall = (lookup_attribute ("thiscall", -+ TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))) -+ != NULL); -+ regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl); - -- /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */ -- if (rounded_size <= (HOST_WIDE_INT_1 << 31)) -- emit_insn (gen_rtx_SET (sr.reg, -- plus_constant (Pmode, stack_pointer_rtx, -- -rounded_size))); -+ if (is_fastcall) -+ { -+ if (DECL_STATIC_CHAIN (cfun->decl)) -+ { -+ sorry ("%<-fsplit-stack%> does not support fastcall with " -+ "nested function"); -+ return INVALID_REGNUM; -+ } -+ return AX_REG; -+ } -+ else if (is_thiscall) -+ { -+ if (!DECL_STATIC_CHAIN (cfun->decl)) -+ return DX_REG; -+ return AX_REG; -+ } -+ else if (regparm < 3) -+ { -+ if (!DECL_STATIC_CHAIN (cfun->decl)) -+ return CX_REG; -+ else -+ { -+ if (regparm >= 2) -+ { -+ sorry ("%<-fsplit-stack%> does not support 2 register " -+ "parameters for a nested function"); -+ return INVALID_REGNUM; -+ } -+ return DX_REG; -+ } -+ } - else - { -- emit_move_insn (sr.reg, GEN_INT (-rounded_size)); -- emit_insn (gen_rtx_SET (sr.reg, -- gen_rtx_PLUS (Pmode, sr.reg, -- stack_pointer_rtx))); -+ /* FIXME: We could make this work by pushing a register -+ around the addition and comparison. */ -+ sorry ("%<-fsplit-stack%> does not support 3 register parameters"); -+ return INVALID_REGNUM; - } -+ } -+} - -+/* A SYMBOL_REF for the function which allocates new stackspace for -+ -fsplit-stack. */ - -- /* Step 3: the loop -- -- do -- { -- SP = SP + PROBE_INTERVAL -- probe at SP -- } -- while (SP != LAST_ADDR) -- -- adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for -- values of N from 1 until it is equal to ROUNDED_SIZE. */ -- -- emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx)); -+static GTY(()) rtx split_stack_fn; - -+/* A SYMBOL_REF for the more stack function when using the large -+ model. */ - -- /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot -- assert at compile-time that SIZE is equal to ROUNDED_SIZE. */ -+static GTY(()) rtx split_stack_fn_large; - -- if (size != rounded_size) -- { -- emit_insn (gen_rtx_SET (stack_pointer_rtx, -- plus_constant (Pmode, stack_pointer_rtx, -- rounded_size - size))); -- emit_stack_probe (stack_pointer_rtx); -- } -+/* Return location of the stack guard value in the TLS block. */ - -- /* Adjust back to account for the additional first interval. */ -- last = emit_insn (gen_rtx_SET (stack_pointer_rtx, -- plus_constant (Pmode, stack_pointer_rtx, -- (get_probe_interval () -- + dope)))); -+rtx -+ix86_split_stack_guard (void) -+{ -+ int offset; -+ addr_space_t as = DEFAULT_TLS_SEG_REG; -+ rtx r; - -- /* This does not deallocate the space reserved for the scratch -- register. That will be deallocated in the epilogue. */ -- release_scratch_register_on_entry (&sr, size, false); -- } -+ gcc_assert (flag_split_stack); - -- /* Even if the stack pointer isn't the CFA register, we need to correctly -- describe the adjustments made to it, in particular differentiate the -- frame-related ones from the frame-unrelated ones. */ -- if (size > 0) -- { -- rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2)); -- XVECEXP (expr, 0, 0) -- = gen_rtx_SET (stack_pointer_rtx, -- plus_constant (Pmode, stack_pointer_rtx, -size)); -- XVECEXP (expr, 0, 1) -- = gen_rtx_SET (stack_pointer_rtx, -- plus_constant (Pmode, stack_pointer_rtx, -- get_probe_interval () + dope + size)); -- add_reg_note (last, REG_FRAME_RELATED_EXPR, expr); -- RTX_FRAME_RELATED_P (last) = 1; -+#ifdef TARGET_THREAD_SPLIT_STACK_OFFSET -+ offset = TARGET_THREAD_SPLIT_STACK_OFFSET; -+#else -+ gcc_unreachable (); -+#endif - -- cfun->machine->fs.sp_offset += size; -- } -+ r = GEN_INT (offset); -+ r = gen_const_mem (Pmode, r); -+ set_mem_addr_space (r, as); - -- /* Make sure nothing is scheduled before we are done. */ -- emit_insn (gen_blockage ()); -+ return r; - } - --/* Adjust the stack pointer up to REG while probing it. */ -+/* Handle -fsplit-stack. These are the first instructions in the -+ function, even before the regular prologue. */ - --const char * --output_adjust_stack_and_probe (rtx reg) -+void -+ix86_expand_split_stack_prologue (void) - { -- static int labelno = 0; -- char loop_lab[32]; -- rtx xops[2]; -- -- ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++); -+ HOST_WIDE_INT allocate; -+ unsigned HOST_WIDE_INT args_size; -+ rtx_code_label *label; -+ rtx limit, current, allocate_rtx, call_fusage; -+ rtx_insn *call_insn; -+ rtx scratch_reg = NULL_RTX; -+ rtx_code_label *varargs_label = NULL; -+ rtx fn; - -- /* Loop. */ -- ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab); -+ gcc_assert (flag_split_stack && reload_completed); - -- /* SP = SP + PROBE_INTERVAL. */ -- xops[0] = stack_pointer_rtx; -- xops[1] = GEN_INT (get_probe_interval ()); -- output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops); -+ ix86_finalize_stack_frame_flags (); -+ struct ix86_frame &frame = cfun->machine->frame; -+ allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET; - -- /* Probe at SP. */ -- xops[1] = const0_rtx; -- output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops); -+ /* This is the label we will branch to if we have enough stack -+ space. We expect the basic block reordering pass to reverse this -+ branch if optimizing, so that we branch in the unlikely case. */ -+ label = gen_label_rtx (); - -- /* Test if SP == LAST_ADDR. */ -- xops[0] = stack_pointer_rtx; -- xops[1] = reg; -- output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops); -+ /* We need to compare the stack pointer minus the frame size with -+ the stack boundary in the TCB. The stack boundary always gives -+ us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we -+ can compare directly. Otherwise we need to do an addition. */ - -- /* Branch. */ -- fputs ("\tjne\t", asm_out_file); -- assemble_name_raw (asm_out_file, loop_lab); -- fputc ('\n', asm_out_file); -+ limit = ix86_split_stack_guard (); - -- return ""; --} -+ if (allocate < SPLIT_STACK_AVAILABLE) -+ current = stack_pointer_rtx; -+ else -+ { -+ unsigned int scratch_regno; -+ rtx offset; - --/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE, -- inclusive. These are offsets from the current stack pointer. -- -- INT_REGISTERS_SAVED is true if integer registers have already been -- pushed on the stack. */ -+ /* We need a scratch register to hold the stack pointer minus -+ the required frame size. Since this is the very start of the -+ function, the scratch register can be any caller-saved -+ register which is not used for parameters. */ -+ offset = GEN_INT (- allocate); -+ scratch_regno = split_stack_prologue_scratch_regno (); -+ if (scratch_regno == INVALID_REGNUM) -+ return; -+ scratch_reg = gen_rtx_REG (Pmode, scratch_regno); -+ if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode)) -+ { -+ /* We don't use ix86_gen_add3 in this case because it will -+ want to split to lea, but when not optimizing the insn -+ will not be split after this point. */ -+ emit_insn (gen_rtx_SET (scratch_reg, -+ gen_rtx_PLUS (Pmode, stack_pointer_rtx, -+ offset))); -+ } -+ else -+ { -+ emit_move_insn (scratch_reg, offset); -+ emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg, -+ stack_pointer_rtx)); -+ } -+ current = scratch_reg; -+ } - --static void --ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size, -- const bool int_registers_saved) --{ -- /* See if we have a constant small number of probes to generate. If so, -- that's the easy case. The run-time loop is made up of 6 insns in the -- generic case while the compile-time loop is made up of n insns for n # -- of intervals. */ -- if (size <= 6 * get_probe_interval ()) -- { -- HOST_WIDE_INT i; -+ ix86_expand_branch (GEU, current, limit, label); -+ rtx_insn *jump_insn = get_last_insn (); -+ JUMP_LABEL (jump_insn) = label; - -- /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until -- it exceeds SIZE. If only one probe is needed, this will not -- generate any code. Then probe at FIRST + SIZE. */ -- for (i = get_probe_interval (); i < size; i += get_probe_interval ()) -- emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, -- -(first + i))); -+ /* Mark the jump as very likely to be taken. */ -+ add_reg_br_prob_note (jump_insn, profile_probability::very_likely ()); - -- emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, -- -(first + size))); -+ if (split_stack_fn == NULL_RTX) -+ { -+ split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack"); -+ SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL; - } -+ fn = split_stack_fn; - -- /* Otherwise, do the same as above, but in a loop. Note that we must be -- extra careful with variables wrapping around because we might be at -- the very top (or the very bottom) of the address space and we have -- to be able to handle this case properly; in particular, we use an -- equality test for the loop condition. */ -- else -+ /* Get more stack space. We pass in the desired stack space and the -+ size of the arguments to copy to the new stack. In 32-bit mode -+ we push the parameters; __morestack will return on a new stack -+ anyhow. In 64-bit mode we pass the parameters in r10 and -+ r11. */ -+ allocate_rtx = GEN_INT (allocate); -+ args_size = crtl->args.size >= 0 ? (HOST_WIDE_INT) crtl->args.size : 0; -+ call_fusage = NULL_RTX; -+ rtx pop = NULL_RTX; -+ if (TARGET_64BIT) - { -- /* We expect the GP registers to be saved when probes are used -- as the probing sequences might need a scratch register and -- the routine to allocate one assumes the integer registers -- have already been saved. */ -- gcc_assert (int_registers_saved); -+ rtx reg10, reg11; - -- HOST_WIDE_INT rounded_size, last; -- struct scratch_reg sr; -+ reg10 = gen_rtx_REG (Pmode, R10_REG); -+ reg11 = gen_rtx_REG (Pmode, R11_REG); - -- get_scratch_register_on_entry (&sr); -+ /* If this function uses a static chain, it will be in %r10. -+ Preserve it across the call to __morestack. */ -+ if (DECL_STATIC_CHAIN (cfun->decl)) -+ { -+ rtx rax; - -+ rax = gen_rtx_REG (word_mode, AX_REG); -+ emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG)); -+ use_reg (&call_fusage, rax); -+ } - -- /* Step 1: round SIZE to the previous multiple of the interval. */ -+ if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC) -+ && !TARGET_PECOFF) -+ { -+ HOST_WIDE_INT argval; - -- rounded_size = ROUND_DOWN (size, get_probe_interval ()); -+ gcc_assert (Pmode == DImode); -+ /* When using the large model we need to load the address -+ into a register, and we've run out of registers. So we -+ switch to a different calling convention, and we call a -+ different function: __morestack_large. We pass the -+ argument size in the upper 32 bits of r10 and pass the -+ frame size in the lower 32 bits. */ -+ gcc_assert ((allocate & HOST_WIDE_INT_C (0xffffffff)) == allocate); -+ gcc_assert ((args_size & 0xffffffff) == args_size); - -+ if (split_stack_fn_large == NULL_RTX) -+ { -+ split_stack_fn_large -+ = gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model"); -+ SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL; -+ } -+ if (ix86_cmodel == CM_LARGE_PIC) -+ { -+ rtx_code_label *label; -+ rtx x; - -- /* Step 2: compute initial and final value of the loop counter. */ -+ label = gen_label_rtx (); -+ emit_label (label); -+ LABEL_PRESERVE_P (label) = 1; -+ emit_insn (gen_set_rip_rex64 (reg10, label)); -+ emit_insn (gen_set_got_offset_rex64 (reg11, label)); -+ emit_insn (ix86_gen_add3 (reg10, reg10, reg11)); -+ x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large), -+ UNSPEC_GOT); -+ x = gen_rtx_CONST (Pmode, x); -+ emit_move_insn (reg11, x); -+ x = gen_rtx_PLUS (Pmode, reg10, reg11); -+ x = gen_const_mem (Pmode, x); -+ emit_move_insn (reg11, x); -+ } -+ else -+ emit_move_insn (reg11, split_stack_fn_large); - -- /* TEST_OFFSET = FIRST. */ -- emit_move_insn (sr.reg, GEN_INT (-first)); -+ fn = reg11; - -- /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */ -- last = first + rounded_size; -+ argval = ((args_size << 16) << 16) + allocate; -+ emit_move_insn (reg10, GEN_INT (argval)); -+ } -+ else -+ { -+ emit_move_insn (reg10, allocate_rtx); -+ emit_move_insn (reg11, GEN_INT (args_size)); -+ use_reg (&call_fusage, reg11); -+ } - -+ use_reg (&call_fusage, reg10); -+ } -+ else -+ { -+ rtx_insn *insn = emit_insn (gen_push (GEN_INT (args_size))); -+ add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (UNITS_PER_WORD)); -+ insn = emit_insn (gen_push (allocate_rtx)); -+ add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (2 * UNITS_PER_WORD)); -+ pop = GEN_INT (2 * UNITS_PER_WORD); -+ } -+ call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn), -+ GEN_INT (UNITS_PER_WORD), constm1_rtx, -+ pop, false); -+ add_function_usage_to (call_insn, call_fusage); -+ if (!TARGET_64BIT) -+ add_reg_note (call_insn, REG_ARGS_SIZE, GEN_INT (0)); -+ /* Indicate that this function can't jump to non-local gotos. */ -+ make_reg_eh_region_note_nothrow_nononlocal (call_insn); - -- /* Step 3: the loop -+ /* In order to make call/return prediction work right, we now need -+ to execute a return instruction. See -+ libgcc/config/i386/morestack.S for the details on how this works. - -- do -- { -- TEST_ADDR = TEST_ADDR + PROBE_INTERVAL -- probe at TEST_ADDR -- } -- while (TEST_ADDR != LAST_ADDR) -+ For flow purposes gcc must not see this as a return -+ instruction--we need control flow to continue at the subsequent -+ label. Therefore, we use an unspec. */ -+ gcc_assert (crtl->args.pops_args < 65536); -+ rtx_insn *ret_insn -+ = emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args))); - -- probes at FIRST + N * PROBE_INTERVAL for values of N from 1 -- until it is equal to ROUNDED_SIZE. */ -+ if ((flag_cf_protection & CF_BRANCH)) -+ { -+ /* Insert ENDBR since __morestack will jump back here via indirect -+ call. */ -+ rtx cet_eb = gen_nop_endbr (); -+ emit_insn_after (cet_eb, ret_insn); -+ } - -- emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last))); -+ /* If we are in 64-bit mode and this function uses a static chain, -+ we saved %r10 in %rax before calling _morestack. */ -+ if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl)) -+ emit_move_insn (gen_rtx_REG (word_mode, R10_REG), -+ gen_rtx_REG (word_mode, AX_REG)); - -+ /* If this function calls va_start, we need to store a pointer to -+ the arguments on the old stack, because they may not have been -+ all copied to the new stack. At this point the old stack can be -+ found at the frame pointer value used by __morestack, because -+ __morestack has set that up before calling back to us. Here we -+ store that pointer in a scratch register, and in -+ ix86_expand_prologue we store the scratch register in a stack -+ slot. */ -+ if (cfun->machine->split_stack_varargs_pointer != NULL_RTX) -+ { -+ unsigned int scratch_regno; -+ rtx frame_reg; -+ int words; - -- /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time -- that SIZE is equal to ROUNDED_SIZE. */ -+ scratch_regno = split_stack_prologue_scratch_regno (); -+ scratch_reg = gen_rtx_REG (Pmode, scratch_regno); -+ frame_reg = gen_rtx_REG (Pmode, BP_REG); - -- if (size != rounded_size) -- emit_stack_probe (plus_constant (Pmode, -- gen_rtx_PLUS (Pmode, -- stack_pointer_rtx, -- sr.reg), -- rounded_size - size)); -+ /* 64-bit: -+ fp -> old fp value -+ return address within this function -+ return address of caller of this function -+ stack arguments -+ So we add three words to get to the stack arguments. - -- release_scratch_register_on_entry (&sr, size, true); -- } -+ 32-bit: -+ fp -> old fp value -+ return address within this function -+ first argument to __morestack -+ second argument to __morestack -+ return address of caller of this function -+ stack arguments -+ So we add five words to get to the stack arguments. -+ */ -+ words = TARGET_64BIT ? 3 : 5; -+ emit_insn (gen_rtx_SET (scratch_reg, -+ gen_rtx_PLUS (Pmode, frame_reg, -+ GEN_INT (words * UNITS_PER_WORD)))); - -- /* Make sure nothing is scheduled before we are done. */ -- emit_insn (gen_blockage ()); --} -+ varargs_label = gen_label_rtx (); -+ emit_jump_insn (gen_jump (varargs_label)); -+ JUMP_LABEL (get_last_insn ()) = varargs_label; - --/* Probe a range of stack addresses from REG to END, inclusive. These are -- offsets from the current stack pointer. */ -+ emit_barrier (); -+ } - --const char * --output_probe_stack_range (rtx reg, rtx end) --{ -- static int labelno = 0; -- char loop_lab[32]; -- rtx xops[3]; -- -- ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++); -- -- /* Loop. */ -- ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab); -- -- /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */ -- xops[0] = reg; -- xops[1] = GEN_INT (get_probe_interval ()); -- output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops); -- -- /* Probe at TEST_ADDR. */ -- xops[0] = stack_pointer_rtx; -- xops[1] = reg; -- xops[2] = const0_rtx; -- output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops); -- -- /* Test if TEST_ADDR == LAST_ADDR. */ -- xops[0] = reg; -- xops[1] = end; -- output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops); -+ emit_label (label); -+ LABEL_NUSES (label) = 1; - -- /* Branch. */ -- fputs ("\tjne\t", asm_out_file); -- assemble_name_raw (asm_out_file, loop_lab); -- fputc ('\n', asm_out_file); -+ /* If this function calls va_start, we now have to set the scratch -+ register for the case where we do not call __morestack. In this -+ case we need to set it based on the stack pointer. */ -+ if (cfun->machine->split_stack_varargs_pointer != NULL_RTX) -+ { -+ emit_insn (gen_rtx_SET (scratch_reg, -+ gen_rtx_PLUS (Pmode, stack_pointer_rtx, -+ GEN_INT (UNITS_PER_WORD)))); - -- return ""; -+ emit_label (varargs_label); -+ LABEL_NUSES (varargs_label) = 1; -+ } - } - --/* Return true if stack frame is required. Update STACK_ALIGNMENT -- to the largest alignment, in bits, of stack slot used if stack -- frame is required and CHECK_STACK_SLOT is true. */ -+/* We may have to tell the dataflow pass that the split stack prologue -+ is initializing a scratch register. */ - --static bool --ix86_find_max_used_stack_alignment (unsigned int &stack_alignment, -- bool check_stack_slot) -+static void -+ix86_live_on_entry (bitmap regs) - { -- HARD_REG_SET set_up_by_prologue, prologue_used; -- basic_block bb; -- -- CLEAR_HARD_REG_SET (prologue_used); -- CLEAR_HARD_REG_SET (set_up_by_prologue); -- add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM); -- add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM); -- add_to_hard_reg_set (&set_up_by_prologue, Pmode, -- HARD_FRAME_POINTER_REGNUM); -- -- /* The preferred stack alignment is the minimum stack alignment. */ -- if (stack_alignment > crtl->preferred_stack_boundary) -- stack_alignment = crtl->preferred_stack_boundary; -- -- bool require_stack_frame = false; -- -- FOR_EACH_BB_FN (bb, cfun) -+ if (cfun->machine->split_stack_varargs_pointer != NULL_RTX) - { -- rtx_insn *insn; -- FOR_BB_INSNS (bb, insn) -- if (NONDEBUG_INSN_P (insn) -- && requires_stack_frame_p (insn, prologue_used, -- set_up_by_prologue)) -- { -- require_stack_frame = true; -- -- if (check_stack_slot) -- { -- /* Find the maximum stack alignment. */ -- subrtx_iterator::array_type array; -- FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL) -- if (MEM_P (*iter) -- && (reg_mentioned_p (stack_pointer_rtx, -- *iter) -- || reg_mentioned_p (frame_pointer_rtx, -- *iter))) -- { -- unsigned int alignment = MEM_ALIGN (*iter); -- if (alignment > stack_alignment) -- stack_alignment = alignment; -- } -- } -- } -+ gcc_assert (flag_split_stack); -+ bitmap_set_bit (regs, split_stack_prologue_scratch_regno ()); - } -- -- return require_stack_frame; - } -+ -+/* Extract the parts of an RTL expression that is a valid memory address -+ for an instruction. Return 0 if the structure of the address is -+ grossly off. Return -1 if the address contains ASHIFT, so it is not -+ strictly valid, but still used for computing length of lea instruction. */ - --/* Finalize stack_realign_needed and frame_pointer_needed flags, which -- will guide prologue/epilogue to be generated in correct form. */ -- --static void --ix86_finalize_stack_frame_flags (void) -+int -+ix86_decompose_address (rtx addr, struct ix86_address *out) - { -- /* Check if stack realign is really needed after reload, and -- stores result in cfun */ -- unsigned int incoming_stack_boundary -- = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary -- ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary); -- unsigned int stack_alignment -- = (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor -- ? crtl->max_used_stack_slot_alignment -- : crtl->stack_alignment_needed); -- unsigned int stack_realign -- = (incoming_stack_boundary < stack_alignment); -- bool recompute_frame_layout_p = false; -+ rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX; -+ rtx base_reg, index_reg; -+ HOST_WIDE_INT scale = 1; -+ rtx scale_rtx = NULL_RTX; -+ rtx tmp; -+ int retval = 1; -+ addr_space_t seg = ADDR_SPACE_GENERIC; - -- if (crtl->stack_realign_finalized) -+ /* Allow zero-extended SImode addresses, -+ they will be emitted with addr32 prefix. */ -+ if (TARGET_64BIT && GET_MODE (addr) == DImode) - { -- /* After stack_realign_needed is finalized, we can't no longer -- change it. */ -- gcc_assert (crtl->stack_realign_needed == stack_realign); -- return; -+ if (GET_CODE (addr) == ZERO_EXTEND -+ && GET_MODE (XEXP (addr, 0)) == SImode) -+ { -+ addr = XEXP (addr, 0); -+ if (CONST_INT_P (addr)) -+ return 0; -+ } -+ else if (GET_CODE (addr) == AND -+ && const_32bit_mask (XEXP (addr, 1), DImode)) -+ { -+ addr = lowpart_subreg (SImode, XEXP (addr, 0), DImode); -+ if (addr == NULL_RTX) -+ return 0; -+ -+ if (CONST_INT_P (addr)) -+ return 0; -+ } - } - -- /* If the only reason for frame_pointer_needed is that we conservatively -- assumed stack realignment might be needed or -fno-omit-frame-pointer -- is used, but in the end nothing that needed the stack alignment had -- been spilled nor stack access, clear frame_pointer_needed and say we -- don't need stack realignment. */ -- if ((stack_realign || (!flag_omit_frame_pointer && optimize)) -- && frame_pointer_needed -- && crtl->is_leaf -- && crtl->sp_is_unchanging -- && !ix86_current_function_calls_tls_descriptor -- && !crtl->accesses_prior_frames -- && !cfun->calls_alloca -- && !crtl->calls_eh_return -- /* See ira_setup_eliminable_regset for the rationale. */ -- && !(STACK_CHECK_MOVING_SP -- && flag_stack_check -- && flag_exceptions -- && cfun->can_throw_non_call_exceptions) -- && !ix86_frame_pointer_required () -- && get_frame_size () == 0 -- && ix86_nsaved_sseregs () == 0 -- && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0) -+ /* Allow SImode subregs of DImode addresses, -+ they will be emitted with addr32 prefix. */ -+ if (TARGET_64BIT && GET_MODE (addr) == SImode) - { -- if (ix86_find_max_used_stack_alignment (stack_alignment, -- stack_realign)) -+ if (SUBREG_P (addr) -+ && GET_MODE (SUBREG_REG (addr)) == DImode) - { -- /* Stack frame is required. If stack alignment needed is less -- than incoming stack boundary, don't realign stack. */ -- stack_realign = incoming_stack_boundary < stack_alignment; -- if (!stack_realign) -- { -- crtl->max_used_stack_slot_alignment -- = incoming_stack_boundary; -- crtl->stack_alignment_needed -- = incoming_stack_boundary; -- /* Also update preferred_stack_boundary for leaf -- functions. */ -- crtl->preferred_stack_boundary -- = incoming_stack_boundary; -- } -+ addr = SUBREG_REG (addr); -+ if (CONST_INT_P (addr)) -+ return 0; - } -+ } -+ -+ if (REG_P (addr)) -+ base = addr; -+ else if (SUBREG_P (addr)) -+ { -+ if (REG_P (SUBREG_REG (addr))) -+ base = addr; - else -+ return 0; -+ } -+ else if (GET_CODE (addr) == PLUS) -+ { -+ rtx addends[4], op; -+ int n = 0, i; -+ -+ op = addr; -+ do - { -- /* If drap has been set, but it actually isn't live at the -- start of the function, there is no reason to set it up. */ -- if (crtl->drap_reg) -+ if (n >= 4) -+ return 0; -+ addends[n++] = XEXP (op, 1); -+ op = XEXP (op, 0); -+ } -+ while (GET_CODE (op) == PLUS); -+ if (n >= 4) -+ return 0; -+ addends[n] = op; -+ -+ for (i = n; i >= 0; --i) -+ { -+ op = addends[i]; -+ switch (GET_CODE (op)) - { -- basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb; -- if (! REGNO_REG_SET_P (DF_LR_IN (bb), -- REGNO (crtl->drap_reg))) -- { -- crtl->drap_reg = NULL_RTX; -- crtl->need_drap = false; -- } -- } -- else -- cfun->machine->no_drap_save_restore = true; -+ case MULT: -+ if (index) -+ return 0; -+ index = XEXP (op, 0); -+ scale_rtx = XEXP (op, 1); -+ break; - -- frame_pointer_needed = false; -- stack_realign = false; -- crtl->max_used_stack_slot_alignment = incoming_stack_boundary; -- crtl->stack_alignment_needed = incoming_stack_boundary; -- crtl->stack_alignment_estimated = incoming_stack_boundary; -- if (crtl->preferred_stack_boundary > incoming_stack_boundary) -- crtl->preferred_stack_boundary = incoming_stack_boundary; -- df_finish_pass (true); -- df_scan_alloc (NULL); -- df_scan_blocks (); -- df_compute_regs_ever_live (true); -- df_analyze (); -+ case ASHIFT: -+ if (index) -+ return 0; -+ index = XEXP (op, 0); -+ tmp = XEXP (op, 1); -+ if (!CONST_INT_P (tmp)) -+ return 0; -+ scale = INTVAL (tmp); -+ if ((unsigned HOST_WIDE_INT) scale > 3) -+ return 0; -+ scale = 1 << scale; -+ break; - -- if (flag_var_tracking) -- { -- /* Since frame pointer is no longer available, replace it with -- stack pointer - UNITS_PER_WORD in debug insns. */ -- df_ref ref, next; -- for (ref = DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM); -- ref; ref = next) -- { -- next = DF_REF_NEXT_REG (ref); -- if (!DF_REF_INSN_INFO (ref)) -- continue; -+ case ZERO_EXTEND: -+ op = XEXP (op, 0); -+ if (GET_CODE (op) != UNSPEC) -+ return 0; -+ /* FALLTHRU */ - -- /* Make sure the next ref is for a different instruction, -- so that we're not affected by the rescan. */ -- rtx_insn *insn = DF_REF_INSN (ref); -- while (next && DF_REF_INSN (next) == insn) -- next = DF_REF_NEXT_REG (next); -+ case UNSPEC: -+ if (XINT (op, 1) == UNSPEC_TP -+ && TARGET_TLS_DIRECT_SEG_REFS -+ && seg == ADDR_SPACE_GENERIC) -+ seg = DEFAULT_TLS_SEG_REG; -+ else -+ return 0; -+ break; - -- if (DEBUG_INSN_P (insn)) -- { -- bool changed = false; -- for (; ref != next; ref = DF_REF_NEXT_REG (ref)) -- { -- rtx *loc = DF_REF_LOC (ref); -- if (*loc == hard_frame_pointer_rtx) -- { -- *loc = plus_constant (Pmode, -- stack_pointer_rtx, -- -UNITS_PER_WORD); -- changed = true; -- } -- } -- if (changed) -- df_insn_rescan (insn); -- } -- } -- } -+ case SUBREG: -+ if (!REG_P (SUBREG_REG (op))) -+ return 0; -+ /* FALLTHRU */ - -- recompute_frame_layout_p = true; -+ case REG: -+ if (!base) -+ base = op; -+ else if (!index) -+ index = op; -+ else -+ return 0; -+ break; -+ -+ case CONST: -+ case CONST_INT: -+ case SYMBOL_REF: -+ case LABEL_REF: -+ if (disp) -+ return 0; -+ disp = op; -+ break; -+ -+ default: -+ return 0; -+ } - } - } -- else if (crtl->max_used_stack_slot_alignment >= 128) -+ else if (GET_CODE (addr) == MULT) - { -- /* We don't need to realign stack. max_used_stack_alignment is -- used to decide how stack frame should be aligned. This is -- independent of any psABIs nor 32-bit vs 64-bit. It is always -- safe to compute max_used_stack_alignment. We compute it only -- if 128-bit aligned load/store may be generated on misaligned -- stack slot which will lead to segfault. */ -- if (ix86_find_max_used_stack_alignment (stack_alignment, true)) -- cfun->machine->max_used_stack_alignment -- = stack_alignment / BITS_PER_UNIT; -+ index = XEXP (addr, 0); /* index*scale */ -+ scale_rtx = XEXP (addr, 1); - } -+ else if (GET_CODE (addr) == ASHIFT) -+ { -+ /* We're called for lea too, which implements ashift on occasion. */ -+ index = XEXP (addr, 0); -+ tmp = XEXP (addr, 1); -+ if (!CONST_INT_P (tmp)) -+ return 0; -+ scale = INTVAL (tmp); -+ if ((unsigned HOST_WIDE_INT) scale > 3) -+ return 0; -+ scale = 1 << scale; -+ retval = -1; -+ } -+ else -+ disp = addr; /* displacement */ - -- if (crtl->stack_realign_needed != stack_realign) -- recompute_frame_layout_p = true; -- crtl->stack_realign_needed = stack_realign; -- crtl->stack_realign_finalized = true; -- if (recompute_frame_layout_p) -- ix86_compute_frame_layout (); --} -- --/* Delete SET_GOT right after entry block if it is allocated to reg. */ -- --static void --ix86_elim_entry_set_got (rtx reg) --{ -- basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb; -- rtx_insn *c_insn = BB_HEAD (bb); -- if (!NONDEBUG_INSN_P (c_insn)) -- c_insn = next_nonnote_nondebug_insn (c_insn); -- if (c_insn && NONJUMP_INSN_P (c_insn)) -+ if (index) - { -- rtx pat = PATTERN (c_insn); -- if (GET_CODE (pat) == PARALLEL) -- { -- rtx vec = XVECEXP (pat, 0, 0); -- if (GET_CODE (vec) == SET -- && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT -- && REGNO (XEXP (vec, 0)) == REGNO (reg)) -- delete_insn (c_insn); -- } -+ if (REG_P (index)) -+ ; -+ else if (SUBREG_P (index) -+ && REG_P (SUBREG_REG (index))) -+ ; -+ else -+ return 0; - } --} - --static rtx --gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store) --{ -- rtx addr, mem; -+ /* Extract the integral value of scale. */ -+ if (scale_rtx) -+ { -+ if (!CONST_INT_P (scale_rtx)) -+ return 0; -+ scale = INTVAL (scale_rtx); -+ } - -- if (offset) -- addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset)); -- mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg); -- return gen_rtx_SET (store ? mem : reg, store ? reg : mem); --} -+ base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base; -+ index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index; - --static inline rtx --gen_frame_load (rtx reg, rtx frame_reg, int offset) --{ -- return gen_frame_set (reg, frame_reg, offset, false); --} -+ /* Avoid useless 0 displacement. */ -+ if (disp == const0_rtx && (base || index)) -+ disp = NULL_RTX; - --static inline rtx --gen_frame_store (rtx reg, rtx frame_reg, int offset) --{ -- return gen_frame_set (reg, frame_reg, offset, true); --} -+ /* Allow arg pointer and stack pointer as index if there is not scaling. */ -+ if (base_reg && index_reg && scale == 1 -+ && (REGNO (index_reg) == ARG_POINTER_REGNUM -+ || REGNO (index_reg) == FRAME_POINTER_REGNUM -+ || REGNO (index_reg) == SP_REG)) -+ { -+ std::swap (base, index); -+ std::swap (base_reg, index_reg); -+ } - --static void --ix86_emit_outlined_ms2sysv_save (const struct ix86_frame &frame) --{ -- struct machine_function *m = cfun->machine; -- const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS -- + m->call_ms2sysv_extra_regs; -- rtvec v = rtvec_alloc (ncregs + 1); -- unsigned int align, i, vi = 0; -- rtx_insn *insn; -- rtx sym, addr; -- rtx rax = gen_rtx_REG (word_mode, AX_REG); -- const struct xlogue_layout &xlogue = xlogue_layout::get_instance (); -+ /* Special case: %ebp cannot be encoded as a base without a displacement. -+ Similarly %r13. */ -+ if (!disp && base_reg -+ && (REGNO (base_reg) == ARG_POINTER_REGNUM -+ || REGNO (base_reg) == FRAME_POINTER_REGNUM -+ || REGNO (base_reg) == BP_REG -+ || REGNO (base_reg) == R13_REG)) -+ disp = const0_rtx; - -- /* AL should only be live with sysv_abi. */ -- gcc_assert (!ix86_eax_live_at_start_p ()); -- gcc_assert (m->fs.sp_offset >= frame.sse_reg_save_offset); -+ /* Special case: on K6, [%esi] makes the instruction vector decoded. -+ Avoid this by transforming to [%esi+0]. -+ Reload calls address legitimization without cfun defined, so we need -+ to test cfun for being non-NULL. */ -+ if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun) -+ && base_reg && !index_reg && !disp -+ && REGNO (base_reg) == SI_REG) -+ disp = const0_rtx; - -- /* Setup RAX as the stub's base pointer. We use stack_realign_offset rather -- we've actually realigned the stack or not. */ -- align = GET_MODE_ALIGNMENT (V4SFmode); -- addr = choose_baseaddr (frame.stack_realign_offset -- + xlogue.get_stub_ptr_offset (), &align, AX_REG); -- gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode)); -+ /* Special case: encode reg+reg instead of reg*2. */ -+ if (!base && index && scale == 2) -+ base = index, base_reg = index_reg, scale = 1; - -- emit_insn (gen_rtx_SET (rax, addr)); -+ /* Special case: scaling cannot be encoded without base or displacement. */ -+ if (!base && !disp && index && scale != 1) -+ disp = const0_rtx; - -- /* Get the stub symbol. */ -- sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP -- : XLOGUE_STUB_SAVE); -- RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym); -+ out->base = base; -+ out->index = index; -+ out->disp = disp; -+ out->scale = scale; -+ out->seg = seg; - -- for (i = 0; i < ncregs; ++i) -- { -- const xlogue_layout::reginfo &r = xlogue.get_reginfo (i); -- rtx reg = gen_rtx_REG ((SSE_REGNO_P (r.regno) ? V4SFmode : word_mode), -- r.regno); -- RTVEC_ELT (v, vi++) = gen_frame_store (reg, rax, -r.offset); -- } -- -- gcc_assert (vi == (unsigned)GET_NUM_ELEM (v)); -- -- insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v)); -- RTX_FRAME_RELATED_P (insn) = true; -+ return retval; - } -+ -+/* Return cost of the memory address x. -+ For i386, it is better to use a complex address than let gcc copy -+ the address into a reg and make a new pseudo. But not if the address -+ requires to two regs - that would mean more pseudos with longer -+ lifetimes. */ -+static int -+ix86_address_cost (rtx x, machine_mode, addr_space_t, bool) -+{ -+ struct ix86_address parts; -+ int cost = 1; -+ int ok = ix86_decompose_address (x, &parts); - --/* Expand the prologue into a bunch of separate insns. */ -+ gcc_assert (ok); - --void --ix86_expand_prologue (void) --{ -- struct machine_function *m = cfun->machine; -- rtx insn, t; -- HOST_WIDE_INT allocate; -- bool int_registers_saved; -- bool sse_registers_saved; -- bool save_stub_call_needed; -- rtx static_chain = NULL_RTX; -+ if (parts.base && SUBREG_P (parts.base)) -+ parts.base = SUBREG_REG (parts.base); -+ if (parts.index && SUBREG_P (parts.index)) -+ parts.index = SUBREG_REG (parts.index); - -- if (ix86_function_naked (current_function_decl)) -- return; -+ /* Attempt to minimize number of registers in the address by increasing -+ address cost for each used register. We don't increase address cost -+ for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx" -+ is not invariant itself it most likely means that base or index is not -+ invariant. Therefore only "pic_offset_table_rtx" could be hoisted out, -+ which is not profitable for x86. */ -+ if (parts.base -+ && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER) -+ && (current_pass->type == GIMPLE_PASS -+ || !pic_offset_table_rtx -+ || !REG_P (parts.base) -+ || REGNO (pic_offset_table_rtx) != REGNO (parts.base))) -+ cost++; - -- ix86_finalize_stack_frame_flags (); -+ if (parts.index -+ && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER) -+ && (current_pass->type == GIMPLE_PASS -+ || !pic_offset_table_rtx -+ || !REG_P (parts.index) -+ || REGNO (pic_offset_table_rtx) != REGNO (parts.index))) -+ cost++; - -- /* DRAP should not coexist with stack_realign_fp */ -- gcc_assert (!(crtl->drap_reg && stack_realign_fp)); -+ /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b, -+ since it's predecode logic can't detect the length of instructions -+ and it degenerates to vector decoded. Increase cost of such -+ addresses here. The penalty is minimally 2 cycles. It may be worthwhile -+ to split such addresses or even refuse such addresses at all. - -- memset (&m->fs, 0, sizeof (m->fs)); -+ Following addressing modes are affected: -+ [base+scale*index] -+ [scale*index+disp] -+ [base+index] - -- /* Initialize CFA state for before the prologue. */ -- m->fs.cfa_reg = stack_pointer_rtx; -- m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET; -+ The first and last case may be avoidable by explicitly coding the zero in -+ memory address, but I don't have AMD-K6 machine handy to check this -+ theory. */ - -- /* Track SP offset to the CFA. We continue tracking this after we've -- swapped the CFA register away from SP. In the case of re-alignment -- this is fudged; we're interested to offsets within the local frame. */ -- m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET; -- m->fs.sp_valid = true; -- m->fs.sp_realigned = false; -+ if (TARGET_K6 -+ && ((!parts.disp && parts.base && parts.index && parts.scale != 1) -+ || (parts.disp && !parts.base && parts.index && parts.scale != 1) -+ || (!parts.disp && parts.base && parts.index && parts.scale == 1))) -+ cost += 10; - -- const struct ix86_frame &frame = cfun->machine->frame; -+ return cost; -+} -+ -+/* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as -+ this is used for to form addresses to local data when -fPIC is in -+ use. */ - -- if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl)) -- { -- /* We should have already generated an error for any use of -- ms_hook on a nested function. */ -- gcc_checking_assert (!ix86_static_chain_on_stack); -+static bool -+darwin_local_data_pic (rtx disp) -+{ -+ return (GET_CODE (disp) == UNSPEC -+ && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET); -+} - -- /* Check if profiling is active and we shall use profiling before -- prologue variant. If so sorry. */ -- if (crtl->profile && flag_fentry != 0) -- sorry ("ms_hook_prologue attribute isn%'t compatible " -- "with %<-mfentry%> for 32-bit"); -+/* True if operand X should be loaded from GOT. */ - -- /* In ix86_asm_output_function_label we emitted: -- 8b ff movl.s %edi,%edi -- 55 push %ebp -- 8b ec movl.s %esp,%ebp -+bool -+ix86_force_load_from_GOT_p (rtx x) -+{ -+ return ((TARGET_64BIT || HAVE_AS_IX86_GOT32X) -+ && !TARGET_PECOFF && !TARGET_MACHO -+ && !flag_pic -+ && ix86_cmodel != CM_LARGE -+ && GET_CODE (x) == SYMBOL_REF -+ && SYMBOL_REF_FUNCTION_P (x) -+ && (!flag_plt -+ || (SYMBOL_REF_DECL (x) -+ && lookup_attribute ("noplt", -+ DECL_ATTRIBUTES (SYMBOL_REF_DECL (x))))) -+ && !SYMBOL_REF_LOCAL_P (x)); -+} - -- This matches the hookable function prologue in Win32 API -- functions in Microsoft Windows XP Service Pack 2 and newer. -- Wine uses this to enable Windows apps to hook the Win32 API -- functions provided by Wine. -+/* Determine if a given RTX is a valid constant. We already know this -+ satisfies CONSTANT_P. */ - -- What that means is that we've already set up the frame pointer. */ -+static bool -+ix86_legitimate_constant_p (machine_mode mode, rtx x) -+{ -+ switch (GET_CODE (x)) -+ { -+ case CONST: -+ x = XEXP (x, 0); - -- if (frame_pointer_needed -- && !(crtl->drap_reg && crtl->stack_realign_needed)) -+ if (GET_CODE (x) == PLUS) - { -- rtx push, mov; -+ if (!CONST_INT_P (XEXP (x, 1))) -+ return false; -+ x = XEXP (x, 0); -+ } - -- /* We've decided to use the frame pointer already set up. -- Describe this to the unwinder by pretending that both -- push and mov insns happen right here. -+ if (TARGET_MACHO && darwin_local_data_pic (x)) -+ return true; - -- Putting the unwind info here at the end of the ms_hook -- is done so that we can make absolutely certain we get -- the required byte sequence at the start of the function, -- rather than relying on an assembler that can produce -- the exact encoding required. -+ /* Only some unspecs are valid as "constants". */ -+ if (GET_CODE (x) == UNSPEC) -+ switch (XINT (x, 1)) -+ { -+ case UNSPEC_GOT: -+ case UNSPEC_GOTOFF: -+ case UNSPEC_PLTOFF: -+ return TARGET_64BIT; -+ case UNSPEC_TPOFF: -+ case UNSPEC_NTPOFF: -+ x = XVECEXP (x, 0, 0); -+ return (GET_CODE (x) == SYMBOL_REF -+ && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC); -+ case UNSPEC_DTPOFF: -+ x = XVECEXP (x, 0, 0); -+ return (GET_CODE (x) == SYMBOL_REF -+ && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC); -+ default: -+ return false; -+ } - -- However it does mean (in the unpatched case) that we have -- a 1 insn window where the asynchronous unwind info is -- incorrect. However, if we placed the unwind info at -- its correct location we would have incorrect unwind info -- in the patched case. Which is probably all moot since -- I don't expect Wine generates dwarf2 unwind info for the -- system libraries that use this feature. */ -+ /* We must have drilled down to a symbol. */ -+ if (GET_CODE (x) == LABEL_REF) -+ return true; -+ if (GET_CODE (x) != SYMBOL_REF) -+ return false; -+ /* FALLTHRU */ - -- insn = emit_insn (gen_blockage ()); -+ case SYMBOL_REF: -+ /* TLS symbols are never valid. */ -+ if (SYMBOL_REF_TLS_MODEL (x)) -+ return false; - -- push = gen_push (hard_frame_pointer_rtx); -- mov = gen_rtx_SET (hard_frame_pointer_rtx, -- stack_pointer_rtx); -- RTX_FRAME_RELATED_P (push) = 1; -- RTX_FRAME_RELATED_P (mov) = 1; -+ /* DLLIMPORT symbols are never valid. */ -+ if (TARGET_DLLIMPORT_DECL_ATTRIBUTES -+ && SYMBOL_REF_DLLIMPORT_P (x)) -+ return false; - -- RTX_FRAME_RELATED_P (insn) = 1; -- add_reg_note (insn, REG_FRAME_RELATED_EXPR, -- gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov))); -+#if TARGET_MACHO -+ /* mdynamic-no-pic */ -+ if (MACHO_DYNAMIC_NO_PIC_P) -+ return machopic_symbol_defined_p (x); -+#endif - -- /* Note that gen_push incremented m->fs.cfa_offset, even -- though we didn't emit the push insn here. */ -- m->fs.cfa_reg = hard_frame_pointer_rtx; -- m->fs.fp_offset = m->fs.cfa_offset; -- m->fs.fp_valid = true; -- } -- else -+ /* External function address should be loaded -+ via the GOT slot to avoid PLT. */ -+ if (ix86_force_load_from_GOT_p (x)) -+ return false; -+ -+ break; -+ -+ CASE_CONST_SCALAR_INT: -+ switch (mode) - { -- /* The frame pointer is not needed so pop %ebp again. -- This leaves us with a pristine state. */ -- emit_insn (gen_pop (hard_frame_pointer_rtx)); -+ case E_TImode: -+ if (TARGET_64BIT) -+ return true; -+ /* FALLTHRU */ -+ case E_OImode: -+ case E_XImode: -+ if (!standard_sse_constant_p (x, mode)) -+ return false; -+ default: -+ break; - } -+ break; -+ -+ case CONST_VECTOR: -+ if (!standard_sse_constant_p (x, mode)) -+ return false; -+ -+ default: -+ break; - } - -- /* The first insn of a function that accepts its static chain on the -- stack is to push the register that would be filled in by a direct -- call. This insn will be skipped by the trampoline. */ -- else if (ix86_static_chain_on_stack) -- { -- static_chain = ix86_static_chain (cfun->decl, false); -- insn = emit_insn (gen_push (static_chain)); -- emit_insn (gen_blockage ()); -+ /* Otherwise we handle everything else in the move patterns. */ -+ return true; -+} - -- /* We don't want to interpret this push insn as a register save, -- only as a stack adjustment. The real copy of the register as -- a save will be done later, if needed. */ -- t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD); -- t = gen_rtx_SET (stack_pointer_rtx, t); -- add_reg_note (insn, REG_CFA_ADJUST_CFA, t); -- RTX_FRAME_RELATED_P (insn) = 1; -- } -+/* Determine if it's legal to put X into the constant pool. This -+ is not possible for the address of thread-local symbols, which -+ is checked above. */ - -- /* Emit prologue code to adjust stack alignment and setup DRAP, in case -- of DRAP is needed and stack realignment is really needed after reload */ -- if (stack_realign_drap) -+static bool -+ix86_cannot_force_const_mem (machine_mode mode, rtx x) -+{ -+ /* We can put any immediate constant in memory. */ -+ switch (GET_CODE (x)) - { -- int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT; -+ CASE_CONST_ANY: -+ return false; - -- /* Can't use DRAP in interrupt function. */ -- if (cfun->machine->func_type != TYPE_NORMAL) -- sorry ("Dynamic Realign Argument Pointer (DRAP) not supported " -- "in interrupt service routine. This may be worked " -- "around by avoiding functions with aggregate return."); -+ default: -+ break; -+ } - -- /* Only need to push parameter pointer reg if it is caller saved. */ -- if (!call_used_regs[REGNO (crtl->drap_reg)]) -- { -- /* Push arg pointer reg */ -- insn = emit_insn (gen_push (crtl->drap_reg)); -- RTX_FRAME_RELATED_P (insn) = 1; -- } -+ return !ix86_legitimate_constant_p (mode, x); -+} - -- /* Grab the argument pointer. */ -- t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset); -- insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t)); -- RTX_FRAME_RELATED_P (insn) = 1; -- m->fs.cfa_reg = crtl->drap_reg; -- m->fs.cfa_offset = 0; -+/* Nonzero if the symbol is marked as dllimport, or as stub-variable, -+ otherwise zero. */ - -- /* Align the stack. */ -- insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx, -- stack_pointer_rtx, -- GEN_INT (-align_bytes))); -- RTX_FRAME_RELATED_P (insn) = 1; -+static bool -+is_imported_p (rtx x) -+{ -+ if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES -+ || GET_CODE (x) != SYMBOL_REF) -+ return false; - -- /* Replicate the return address on the stack so that return -- address can be reached via (argp - 1) slot. This is needed -- to implement macro RETURN_ADDR_RTX and intrinsic function -- expand_builtin_return_addr etc. */ -- t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD); -- t = gen_frame_mem (word_mode, t); -- insn = emit_insn (gen_push (t)); -- RTX_FRAME_RELATED_P (insn) = 1; -+ return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x); -+} - -- /* For the purposes of frame and register save area addressing, -- we've started over with a new frame. */ -- m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET; -- m->fs.realigned = true; - -- if (static_chain) -- { -- /* Replicate static chain on the stack so that static chain -- can be reached via (argp - 2) slot. This is needed for -- nested function with stack realignment. */ -- insn = emit_insn (gen_push (static_chain)); -- RTX_FRAME_RELATED_P (insn) = 1; -- } -- } -+/* Nonzero if the constant value X is a legitimate general operand -+ when generating PIC code. It is given that flag_pic is on and -+ that X satisfies CONSTANT_P. */ - -- int_registers_saved = (frame.nregs == 0); -- sse_registers_saved = (frame.nsseregs == 0); -- save_stub_call_needed = (m->call_ms2sysv); -- gcc_assert (sse_registers_saved || !save_stub_call_needed); -+bool -+legitimate_pic_operand_p (rtx x) -+{ -+ rtx inner; - -- if (frame_pointer_needed && !m->fs.fp_valid) -+ switch (GET_CODE (x)) - { -- /* Note: AT&T enter does NOT have reversed args. Enter is probably -- slower on all targets. Also sdb didn't like it. */ -- insn = emit_insn (gen_push (hard_frame_pointer_rtx)); -- RTX_FRAME_RELATED_P (insn) = 1; -+ case CONST: -+ inner = XEXP (x, 0); -+ if (GET_CODE (inner) == PLUS -+ && CONST_INT_P (XEXP (inner, 1))) -+ inner = XEXP (inner, 0); - -- /* Push registers now, before setting the frame pointer -- on SEH target. */ -- if (!int_registers_saved -- && TARGET_SEH -- && !frame.save_regs_using_mov) -- { -- ix86_emit_save_regs (); -- int_registers_saved = true; -- gcc_assert (m->fs.sp_offset == frame.reg_save_offset); -- } -+ /* Only some unspecs are valid as "constants". */ -+ if (GET_CODE (inner) == UNSPEC) -+ switch (XINT (inner, 1)) -+ { -+ case UNSPEC_GOT: -+ case UNSPEC_GOTOFF: -+ case UNSPEC_PLTOFF: -+ return TARGET_64BIT; -+ case UNSPEC_TPOFF: -+ x = XVECEXP (inner, 0, 0); -+ return (GET_CODE (x) == SYMBOL_REF -+ && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC); -+ case UNSPEC_MACHOPIC_OFFSET: -+ return legitimate_pic_address_disp_p (x); -+ default: -+ return false; -+ } -+ /* FALLTHRU */ - -- if (m->fs.sp_offset == frame.hard_frame_pointer_offset) -- { -- insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); -- RTX_FRAME_RELATED_P (insn) = 1; -+ case SYMBOL_REF: -+ case LABEL_REF: -+ return legitimate_pic_address_disp_p (x); - -- if (m->fs.cfa_reg == stack_pointer_rtx) -- m->fs.cfa_reg = hard_frame_pointer_rtx; -- m->fs.fp_offset = m->fs.sp_offset; -- m->fs.fp_valid = true; -- } -+ default: -+ return true; - } -+} - -- if (!int_registers_saved) -- { -- /* If saving registers via PUSH, do so now. */ -- if (!frame.save_regs_using_mov) -- { -- ix86_emit_save_regs (); -- int_registers_saved = true; -- gcc_assert (m->fs.sp_offset == frame.reg_save_offset); -- } -+/* Determine if a given CONST RTX is a valid memory displacement -+ in PIC mode. */ - -- /* When using red zone we may start register saving before allocating -- the stack frame saving one cycle of the prologue. However, avoid -- doing this if we have to probe the stack; at least on x86_64 the -- stack probe can turn into a call that clobbers a red zone location. */ -- else if (ix86_using_red_zone () -- && (! TARGET_STACK_PROBE -- || frame.stack_pointer_offset < CHECK_STACK_LIMIT)) -- { -- ix86_emit_save_regs_using_mov (frame.reg_save_offset); -- int_registers_saved = true; -- } -- } -+bool -+legitimate_pic_address_disp_p (rtx disp) -+{ -+ bool saw_plus; - -- if (stack_realign_fp) -+ /* In 64bit mode we can allow direct addresses of symbols and labels -+ when they are not dynamic symbols. */ -+ if (TARGET_64BIT) - { -- int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT; -- gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT); -+ rtx op0 = disp, op1; - -- /* Record last valid frame pointer offset. */ -- m->fs.sp_realigned_fp_last = frame.reg_save_offset; -+ switch (GET_CODE (disp)) -+ { -+ case LABEL_REF: -+ return true; - -- /* The computation of the size of the re-aligned stack frame means -- that we must allocate the size of the register save area before -- performing the actual alignment. Otherwise we cannot guarantee -- that there's enough storage above the realignment point. */ -- allocate = frame.reg_save_offset - m->fs.sp_offset -- + frame.stack_realign_allocate; -- if (allocate) -- pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, -- GEN_INT (-allocate), -1, false); -+ case CONST: -+ if (GET_CODE (XEXP (disp, 0)) != PLUS) -+ break; -+ op0 = XEXP (XEXP (disp, 0), 0); -+ op1 = XEXP (XEXP (disp, 0), 1); -+ if (!CONST_INT_P (op1)) -+ break; -+ if (GET_CODE (op0) == UNSPEC -+ && (XINT (op0, 1) == UNSPEC_DTPOFF -+ || XINT (op0, 1) == UNSPEC_NTPOFF) -+ && trunc_int_for_mode (INTVAL (op1), SImode) == INTVAL (op1)) -+ return true; -+ if (INTVAL (op1) >= 16*1024*1024 -+ || INTVAL (op1) < -16*1024*1024) -+ break; -+ if (GET_CODE (op0) == LABEL_REF) -+ return true; -+ if (GET_CODE (op0) == CONST -+ && GET_CODE (XEXP (op0, 0)) == UNSPEC -+ && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL) -+ return true; -+ if (GET_CODE (op0) == UNSPEC -+ && XINT (op0, 1) == UNSPEC_PCREL) -+ return true; -+ if (GET_CODE (op0) != SYMBOL_REF) -+ break; -+ /* FALLTHRU */ - -- /* Align the stack. */ -- insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx, -- stack_pointer_rtx, -- GEN_INT (-align_bytes))); -- m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes); -- m->fs.sp_realigned_offset = m->fs.sp_offset -- - frame.stack_realign_allocate; -- /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset. -- Beyond this point, stack access should be done via choose_baseaddr or -- by using sp_valid_at and fp_valid_at to determine the correct base -- register. Henceforth, any CFA offset should be thought of as logical -- and not physical. */ -- gcc_assert (m->fs.sp_realigned_offset >= m->fs.sp_realigned_fp_last); -- gcc_assert (m->fs.sp_realigned_offset == frame.stack_realign_offset); -- m->fs.sp_realigned = true; -+ case SYMBOL_REF: -+ /* TLS references should always be enclosed in UNSPEC. -+ The dllimported symbol needs always to be resolved. */ -+ if (SYMBOL_REF_TLS_MODEL (op0) -+ || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0))) -+ return false; - -- /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which -- is needed to describe where a register is saved using a realigned -- stack pointer, so we need to invalidate the stack pointer for that -- target. */ -- if (TARGET_SEH) -- m->fs.sp_valid = false; -- -- /* If SP offset is non-immediate after allocation of the stack frame, -- then emit SSE saves or stub call prior to allocating the rest of the -- stack frame. This is less efficient for the out-of-line stub because -- we can't combine allocations across the call barrier, but it's better -- than using a scratch register. */ -- else if (!x86_64_immediate_operand (GEN_INT (frame.stack_pointer_offset -- - m->fs.sp_realigned_offset), -- Pmode)) -- { -- if (!sse_registers_saved) -- { -- ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset); -- sse_registers_saved = true; -- } -- else if (save_stub_call_needed) -+ if (TARGET_PECOFF) - { -- ix86_emit_outlined_ms2sysv_save (frame); -- save_stub_call_needed = false; -+ if (is_imported_p (op0)) -+ return true; -+ -+ if (SYMBOL_REF_FAR_ADDR_P (op0) -+ || !SYMBOL_REF_LOCAL_P (op0)) -+ break; -+ -+ /* Function-symbols need to be resolved only for -+ large-model. -+ For the small-model we don't need to resolve anything -+ here. */ -+ if ((ix86_cmodel != CM_LARGE_PIC -+ && SYMBOL_REF_FUNCTION_P (op0)) -+ || ix86_cmodel == CM_SMALL_PIC) -+ return true; -+ /* Non-external symbols don't need to be resolved for -+ large, and medium-model. */ -+ if ((ix86_cmodel == CM_LARGE_PIC -+ || ix86_cmodel == CM_MEDIUM_PIC) -+ && !SYMBOL_REF_EXTERNAL_P (op0)) -+ return true; - } -+ else if (!SYMBOL_REF_FAR_ADDR_P (op0) -+ && (SYMBOL_REF_LOCAL_P (op0) -+ || (HAVE_LD_PIE_COPYRELOC -+ && flag_pie -+ && !SYMBOL_REF_WEAK (op0) -+ && !SYMBOL_REF_FUNCTION_P (op0))) -+ && ix86_cmodel != CM_LARGE_PIC) -+ return true; -+ break; -+ -+ default: -+ break; - } - } -+ if (GET_CODE (disp) != CONST) -+ return false; -+ disp = XEXP (disp, 0); - -- allocate = frame.stack_pointer_offset - m->fs.sp_offset; -- -- if (flag_stack_usage_info) -+ if (TARGET_64BIT) - { -- /* We start to count from ARG_POINTER. */ -- HOST_WIDE_INT stack_size = frame.stack_pointer_offset; -+ /* We are unsafe to allow PLUS expressions. This limit allowed distance -+ of GOT tables. We should not need these anyway. */ -+ if (GET_CODE (disp) != UNSPEC -+ || (XINT (disp, 1) != UNSPEC_GOTPCREL -+ && XINT (disp, 1) != UNSPEC_GOTOFF -+ && XINT (disp, 1) != UNSPEC_PCREL -+ && XINT (disp, 1) != UNSPEC_PLTOFF)) -+ return false; - -- /* If it was realigned, take into account the fake frame. */ -- if (stack_realign_drap) -- { -- if (ix86_static_chain_on_stack) -- stack_size += UNITS_PER_WORD; -+ if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF -+ && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF) -+ return false; -+ return true; -+ } - -- if (!call_used_regs[REGNO (crtl->drap_reg)]) -- stack_size += UNITS_PER_WORD; -+ saw_plus = false; -+ if (GET_CODE (disp) == PLUS) -+ { -+ if (!CONST_INT_P (XEXP (disp, 1))) -+ return false; -+ disp = XEXP (disp, 0); -+ saw_plus = true; -+ } - -- /* This over-estimates by 1 minimal-stack-alignment-unit but -- mitigates that by counting in the new return address slot. */ -- current_function_dynamic_stack_size -- += crtl->stack_alignment_needed / BITS_PER_UNIT; -- } -+ if (TARGET_MACHO && darwin_local_data_pic (disp)) -+ return true; - -- current_function_static_stack_size = stack_size; -- } -+ if (GET_CODE (disp) != UNSPEC) -+ return false; - -- /* On SEH target with very large frame size, allocate an area to save -- SSE registers (as the very large allocation won't be described). */ -- if (TARGET_SEH -- && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE -- && !sse_registers_saved) -+ switch (XINT (disp, 1)) - { -- HOST_WIDE_INT sse_size -- = frame.sse_reg_save_offset - frame.reg_save_offset; -+ case UNSPEC_GOT: -+ if (saw_plus) -+ return false; -+ /* We need to check for both symbols and labels because VxWorks loads -+ text labels with @GOT rather than @GOTOFF. See gotoff_operand for -+ details. */ -+ return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF -+ || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF); -+ case UNSPEC_GOTOFF: -+ /* Refuse GOTOFF in 64bit mode since it is always 64bit when used. -+ While ABI specify also 32bit relocation but we don't produce it in -+ small PIC model at all. */ -+ if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF -+ || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF) -+ && !TARGET_64BIT) -+ return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode); -+ return false; -+ case UNSPEC_GOTTPOFF: -+ case UNSPEC_GOTNTPOFF: -+ case UNSPEC_INDNTPOFF: -+ if (saw_plus) -+ return false; -+ disp = XVECEXP (disp, 0, 0); -+ return (GET_CODE (disp) == SYMBOL_REF -+ && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC); -+ case UNSPEC_NTPOFF: -+ disp = XVECEXP (disp, 0, 0); -+ return (GET_CODE (disp) == SYMBOL_REF -+ && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC); -+ case UNSPEC_DTPOFF: -+ disp = XVECEXP (disp, 0, 0); -+ return (GET_CODE (disp) == SYMBOL_REF -+ && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC); -+ } - -- gcc_assert (int_registers_saved); -+ return false; -+} - -- /* No need to do stack checking as the area will be immediately -- written. */ -- pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, -- GEN_INT (-sse_size), -1, -- m->fs.cfa_reg == stack_pointer_rtx); -- allocate -= sse_size; -- ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset); -- sse_registers_saved = true; -- } -+/* Determine if op is suitable RTX for an address register. -+ Return naked register if a register or a register subreg is -+ found, otherwise return NULL_RTX. */ - -- /* The stack has already been decremented by the instruction calling us -- so probe if the size is non-negative to preserve the protection area. */ -- if (allocate >= 0 -- && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK -- || flag_stack_clash_protection)) -+static rtx -+ix86_validate_address_register (rtx op) -+{ -+ machine_mode mode = GET_MODE (op); -+ -+ /* Only SImode or DImode registers can form the address. */ -+ if (mode != SImode && mode != DImode) -+ return NULL_RTX; -+ -+ if (REG_P (op)) -+ return op; -+ else if (SUBREG_P (op)) - { -- if (flag_stack_clash_protection) -- { -- ix86_adjust_stack_and_probe_stack_clash (allocate, -- int_registers_saved); -- allocate = 0; -- } -- else if (STACK_CHECK_MOVING_SP) -- { -- if (!(crtl->is_leaf && !cfun->calls_alloca -- && allocate <= get_probe_interval ())) -- { -- ix86_adjust_stack_and_probe (allocate, int_registers_saved); -- allocate = 0; -- } -- } -- else -- { -- HOST_WIDE_INT size = allocate; -+ rtx reg = SUBREG_REG (op); - -- if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000)) -- size = 0x80000000 - get_stack_check_protect () - 1; -+ if (!REG_P (reg)) -+ return NULL_RTX; - -- if (TARGET_STACK_PROBE) -- { -- if (crtl->is_leaf && !cfun->calls_alloca) -- { -- if (size > get_probe_interval ()) -- ix86_emit_probe_stack_range (0, size, int_registers_saved); -- } -- else -- ix86_emit_probe_stack_range (0, -- size + get_stack_check_protect (), -- int_registers_saved); -- } -- else -- { -- if (crtl->is_leaf && !cfun->calls_alloca) -- { -- if (size > get_probe_interval () -- && size > get_stack_check_protect ()) -- ix86_emit_probe_stack_range (get_stack_check_protect (), -- (size -- - get_stack_check_protect ()), -- int_registers_saved); -- } -- else -- ix86_emit_probe_stack_range (get_stack_check_protect (), size, -- int_registers_saved); -- } -- } -- } -+ mode = GET_MODE (reg); - -- if (allocate == 0) -- ; -- else if (!ix86_target_stack_probe () -- || frame.stack_pointer_offset < CHECK_STACK_LIMIT) -- { -- pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, -- GEN_INT (-allocate), -1, -- m->fs.cfa_reg == stack_pointer_rtx); -- } -- else -- { -- rtx eax = gen_rtx_REG (Pmode, AX_REG); -- rtx r10 = NULL; -- rtx (*adjust_stack_insn)(rtx, rtx, rtx); -- const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx); -- bool eax_live = ix86_eax_live_at_start_p (); -- bool r10_live = false; -+ /* Don't allow SUBREGs that span more than a word. It can -+ lead to spill failures when the register is one word out -+ of a two word structure. */ -+ if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) -+ return NULL_RTX; - -- if (TARGET_64BIT) -- r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0); -+ /* Allow only SUBREGs of non-eliminable hard registers. */ -+ if (register_no_elim_operand (reg, mode)) -+ return reg; -+ } - -- if (eax_live) -- { -- insn = emit_insn (gen_push (eax)); -- allocate -= UNITS_PER_WORD; -- /* Note that SEH directives need to continue tracking the stack -- pointer even after the frame pointer has been set up. */ -- if (sp_is_cfa_reg || TARGET_SEH) -- { -- if (sp_is_cfa_reg) -- m->fs.cfa_offset += UNITS_PER_WORD; -- RTX_FRAME_RELATED_P (insn) = 1; -- add_reg_note (insn, REG_FRAME_RELATED_EXPR, -- gen_rtx_SET (stack_pointer_rtx, -- plus_constant (Pmode, stack_pointer_rtx, -- -UNITS_PER_WORD))); -- } -- } -- -- if (r10_live) -- { -- r10 = gen_rtx_REG (Pmode, R10_REG); -- insn = emit_insn (gen_push (r10)); -- allocate -= UNITS_PER_WORD; -- if (sp_is_cfa_reg || TARGET_SEH) -- { -- if (sp_is_cfa_reg) -- m->fs.cfa_offset += UNITS_PER_WORD; -- RTX_FRAME_RELATED_P (insn) = 1; -- add_reg_note (insn, REG_FRAME_RELATED_EXPR, -- gen_rtx_SET (stack_pointer_rtx, -- plus_constant (Pmode, stack_pointer_rtx, -- -UNITS_PER_WORD))); -- } -- } -+ /* Op is not a register. */ -+ return NULL_RTX; -+} - -- emit_move_insn (eax, GEN_INT (allocate)); -- emit_insn (ix86_gen_allocate_stack_worker (eax, eax)); -+/* Recognizes RTL expressions that are valid memory addresses for an -+ instruction. The MODE argument is the machine mode for the MEM -+ expression that wants to use this address. - -- /* Use the fact that AX still contains ALLOCATE. */ -- adjust_stack_insn = (Pmode == DImode -- ? gen_pro_epilogue_adjust_stack_di_sub -- : gen_pro_epilogue_adjust_stack_si_sub); -+ It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should -+ convert common non-canonical forms to canonical form so that they will -+ be recognized. */ - -- insn = emit_insn (adjust_stack_insn (stack_pointer_rtx, -- stack_pointer_rtx, eax)); -+static bool -+ix86_legitimate_address_p (machine_mode, rtx addr, bool strict) -+{ -+ struct ix86_address parts; -+ rtx base, index, disp; -+ HOST_WIDE_INT scale; -+ addr_space_t seg; - -- if (sp_is_cfa_reg || TARGET_SEH) -- { -- if (sp_is_cfa_reg) -- m->fs.cfa_offset += allocate; -- RTX_FRAME_RELATED_P (insn) = 1; -- add_reg_note (insn, REG_FRAME_RELATED_EXPR, -- gen_rtx_SET (stack_pointer_rtx, -- plus_constant (Pmode, stack_pointer_rtx, -- -allocate))); -- } -- m->fs.sp_offset += allocate; -+ if (ix86_decompose_address (addr, &parts) <= 0) -+ /* Decomposition failed. */ -+ return false; - -- /* Use stack_pointer_rtx for relative addressing so that code works for -- realigned stack. But this means that we need a blockage to prevent -- stores based on the frame pointer from being scheduled before. */ -- if (r10_live && eax_live) -- { -- t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax); -- emit_move_insn (gen_rtx_REG (word_mode, R10_REG), -- gen_frame_mem (word_mode, t)); -- t = plus_constant (Pmode, t, UNITS_PER_WORD); -- emit_move_insn (gen_rtx_REG (word_mode, AX_REG), -- gen_frame_mem (word_mode, t)); -- emit_insn (gen_memory_blockage ()); -- } -- else if (eax_live || r10_live) -- { -- t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax); -- emit_move_insn (gen_rtx_REG (word_mode, -- (eax_live ? AX_REG : R10_REG)), -- gen_frame_mem (word_mode, t)); -- emit_insn (gen_memory_blockage ()); -- } -- } -- gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset); -+ base = parts.base; -+ index = parts.index; -+ disp = parts.disp; -+ scale = parts.scale; -+ seg = parts.seg; - -- /* If we havn't already set up the frame pointer, do so now. */ -- if (frame_pointer_needed && !m->fs.fp_valid) -+ /* Validate base register. */ -+ if (base) - { -- insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx, -- GEN_INT (frame.stack_pointer_offset -- - frame.hard_frame_pointer_offset)); -- insn = emit_insn (insn); -- RTX_FRAME_RELATED_P (insn) = 1; -- add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL); -- -- if (m->fs.cfa_reg == stack_pointer_rtx) -- m->fs.cfa_reg = hard_frame_pointer_rtx; -- m->fs.fp_offset = frame.hard_frame_pointer_offset; -- m->fs.fp_valid = true; -- } -+ rtx reg = ix86_validate_address_register (base); - -- if (!int_registers_saved) -- ix86_emit_save_regs_using_mov (frame.reg_save_offset); -- if (!sse_registers_saved) -- ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset); -- else if (save_stub_call_needed) -- ix86_emit_outlined_ms2sysv_save (frame); -+ if (reg == NULL_RTX) -+ return false; - -- /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT -- in PROLOGUE. */ -- if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry) -- { -- rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM); -- insn = emit_insn (gen_set_got (pic)); -- RTX_FRAME_RELATED_P (insn) = 1; -- add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX); -- emit_insn (gen_prologue_use (pic)); -- /* Deleting already emmitted SET_GOT if exist and allocated to -- REAL_PIC_OFFSET_TABLE_REGNUM. */ -- ix86_elim_entry_set_got (pic); -+ if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg)) -+ || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg))) -+ /* Base is not valid. */ -+ return false; - } - -- if (crtl->drap_reg && !crtl->stack_realign_needed) -+ /* Validate index register. */ -+ if (index) - { -- /* vDRAP is setup but after reload it turns out stack realign -- isn't necessary, here we will emit prologue to setup DRAP -- without stack realign adjustment */ -- t = choose_baseaddr (0, NULL); -- emit_insn (gen_rtx_SET (crtl->drap_reg, t)); -- } -- -- /* Prevent instructions from being scheduled into register save push -- sequence when access to the redzone area is done through frame pointer. -- The offset between the frame pointer and the stack pointer is calculated -- relative to the value of the stack pointer at the end of the function -- prologue, and moving instructions that access redzone area via frame -- pointer inside push sequence violates this assumption. */ -- if (frame_pointer_needed && frame.red_zone_size) -- emit_insn (gen_memory_blockage ()); -+ rtx reg = ix86_validate_address_register (index); - -- /* SEH requires that the prologue end within 256 bytes of the start of -- the function. Prevent instruction schedules that would extend that. -- Further, prevent alloca modifications to the stack pointer from being -- combined with prologue modifications. */ -- if (TARGET_SEH) -- emit_insn (gen_prologue_use (stack_pointer_rtx)); --} -+ if (reg == NULL_RTX) -+ return false; - --/* Emit code to restore REG using a POP insn. */ -+ if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg)) -+ || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg))) -+ /* Index is not valid. */ -+ return false; -+ } - --static void --ix86_emit_restore_reg_using_pop (rtx reg) --{ -- struct machine_function *m = cfun->machine; -- rtx_insn *insn = emit_insn (gen_pop (reg)); -+ /* Index and base should have the same mode. */ -+ if (base && index -+ && GET_MODE (base) != GET_MODE (index)) -+ return false; - -- ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset); -- m->fs.sp_offset -= UNITS_PER_WORD; -+ /* Address override works only on the (%reg) part of %fs:(%reg). */ -+ if (seg != ADDR_SPACE_GENERIC -+ && ((base && GET_MODE (base) != word_mode) -+ || (index && GET_MODE (index) != word_mode))) -+ return false; - -- if (m->fs.cfa_reg == crtl->drap_reg -- && REGNO (reg) == REGNO (crtl->drap_reg)) -+ /* Validate scale factor. */ -+ if (scale != 1) - { -- /* Previously we'd represented the CFA as an expression -- like *(%ebp - 8). We've just popped that value from -- the stack, which means we need to reset the CFA to -- the drap register. This will remain until we restore -- the stack pointer. */ -- add_reg_note (insn, REG_CFA_DEF_CFA, reg); -- RTX_FRAME_RELATED_P (insn) = 1; -+ if (!index) -+ /* Scale without index. */ -+ return false; - -- /* This means that the DRAP register is valid for addressing too. */ -- m->fs.drap_valid = true; -- return; -+ if (scale != 2 && scale != 4 && scale != 8) -+ /* Scale is not a valid multiplier. */ -+ return false; - } - -- if (m->fs.cfa_reg == stack_pointer_rtx) -+ /* Validate displacement. */ -+ if (disp) - { -- rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); -- x = gen_rtx_SET (stack_pointer_rtx, x); -- add_reg_note (insn, REG_CFA_ADJUST_CFA, x); -- RTX_FRAME_RELATED_P (insn) = 1; -- -- m->fs.cfa_offset -= UNITS_PER_WORD; -- } -+ if (GET_CODE (disp) == CONST -+ && GET_CODE (XEXP (disp, 0)) == UNSPEC -+ && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET) -+ switch (XINT (XEXP (disp, 0), 1)) -+ { -+ /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit -+ when used. While ABI specify also 32bit relocations, we -+ don't produce them at all and use IP relative instead. -+ Allow GOT in 32bit mode for both PIC and non-PIC if symbol -+ should be loaded via GOT. */ -+ case UNSPEC_GOT: -+ if (!TARGET_64BIT -+ && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0))) -+ goto is_legitimate_pic; -+ /* FALLTHRU */ -+ case UNSPEC_GOTOFF: -+ gcc_assert (flag_pic); -+ if (!TARGET_64BIT) -+ goto is_legitimate_pic; - -- /* When the frame pointer is the CFA, and we pop it, we are -- swapping back to the stack pointer as the CFA. This happens -- for stack frames that don't allocate other data, so we assume -- the stack pointer is now pointing at the return address, i.e. -- the function entry state, which makes the offset be 1 word. */ -- if (reg == hard_frame_pointer_rtx) -- { -- m->fs.fp_valid = false; -- if (m->fs.cfa_reg == hard_frame_pointer_rtx) -- { -- m->fs.cfa_reg = stack_pointer_rtx; -- m->fs.cfa_offset -= UNITS_PER_WORD; -+ /* 64bit address unspec. */ -+ return false; - -- add_reg_note (insn, REG_CFA_DEF_CFA, -- gen_rtx_PLUS (Pmode, stack_pointer_rtx, -- GEN_INT (m->fs.cfa_offset))); -- RTX_FRAME_RELATED_P (insn) = 1; -- } -- } --} -- --/* Emit code to restore saved registers using POP insns. */ -+ case UNSPEC_GOTPCREL: -+ if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0))) -+ goto is_legitimate_pic; -+ /* FALLTHRU */ -+ case UNSPEC_PCREL: -+ gcc_assert (flag_pic); -+ goto is_legitimate_pic; - --static void --ix86_emit_restore_regs_using_pop (void) --{ -- unsigned int regno; -+ case UNSPEC_GOTTPOFF: -+ case UNSPEC_GOTNTPOFF: -+ case UNSPEC_INDNTPOFF: -+ case UNSPEC_NTPOFF: -+ case UNSPEC_DTPOFF: -+ break; - -- for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) -- if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true)) -- ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno)); --} -+ default: -+ /* Invalid address unspec. */ -+ return false; -+ } - --/* Emit code and notes for the LEAVE instruction. If insn is non-null, -- omits the emit and only attaches the notes. */ -+ else if (SYMBOLIC_CONST (disp) -+ && (flag_pic -+ || (TARGET_MACHO -+#if TARGET_MACHO -+ && MACHOPIC_INDIRECT -+ && !machopic_operand_p (disp) -+#endif -+ ))) -+ { - --static void --ix86_emit_leave (rtx_insn *insn) --{ -- struct machine_function *m = cfun->machine; -- if (!insn) -- insn = emit_insn (ix86_gen_leave ()); -+ is_legitimate_pic: -+ if (TARGET_64BIT && (index || base)) -+ { -+ /* foo@dtpoff(%rX) is ok. */ -+ if (GET_CODE (disp) != CONST -+ || GET_CODE (XEXP (disp, 0)) != PLUS -+ || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC -+ || !CONST_INT_P (XEXP (XEXP (disp, 0), 1)) -+ || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF -+ && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF)) -+ /* Non-constant pic memory reference. */ -+ return false; -+ } -+ else if ((!TARGET_MACHO || flag_pic) -+ && ! legitimate_pic_address_disp_p (disp)) -+ /* Displacement is an invalid pic construct. */ -+ return false; -+#if TARGET_MACHO -+ else if (MACHO_DYNAMIC_NO_PIC_P -+ && !ix86_legitimate_constant_p (Pmode, disp)) -+ /* displacment must be referenced via non_lazy_pointer */ -+ return false; -+#endif - -- ix86_add_queued_cfa_restore_notes (insn); -+ /* This code used to verify that a symbolic pic displacement -+ includes the pic_offset_table_rtx register. - -- gcc_assert (m->fs.fp_valid); -- m->fs.sp_valid = true; -- m->fs.sp_realigned = false; -- m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD; -- m->fs.fp_valid = false; -+ While this is good idea, unfortunately these constructs may -+ be created by "adds using lea" optimization for incorrect -+ code like: - -- if (m->fs.cfa_reg == hard_frame_pointer_rtx) -- { -- m->fs.cfa_reg = stack_pointer_rtx; -- m->fs.cfa_offset = m->fs.sp_offset; -+ int a; -+ int foo(int i) -+ { -+ return *(&a+i); -+ } - -- add_reg_note (insn, REG_CFA_DEF_CFA, -- plus_constant (Pmode, stack_pointer_rtx, -- m->fs.sp_offset)); -- RTX_FRAME_RELATED_P (insn) = 1; -+ This code is nonsensical, but results in addressing -+ GOT table with pic_offset_table_rtx base. We can't -+ just refuse it easily, since it gets matched by -+ "addsi3" pattern, that later gets split to lea in the -+ case output register differs from input. While this -+ can be handled by separate addsi pattern for this case -+ that never results in lea, this seems to be easier and -+ correct fix for crash to disable this test. */ -+ } -+ else if (GET_CODE (disp) != LABEL_REF -+ && !CONST_INT_P (disp) -+ && (GET_CODE (disp) != CONST -+ || !ix86_legitimate_constant_p (Pmode, disp)) -+ && (GET_CODE (disp) != SYMBOL_REF -+ || !ix86_legitimate_constant_p (Pmode, disp))) -+ /* Displacement is not constant. */ -+ return false; -+ else if (TARGET_64BIT -+ && !x86_64_immediate_operand (disp, VOIDmode)) -+ /* Displacement is out of range. */ -+ return false; -+ /* In x32 mode, constant addresses are sign extended to 64bit, so -+ we have to prevent addresses from 0x80000000 to 0xffffffff. */ -+ else if (TARGET_X32 && !(index || base) -+ && CONST_INT_P (disp) -+ && val_signbit_known_set_p (SImode, INTVAL (disp))) -+ return false; - } -- ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx, -- m->fs.fp_offset); --} -- --/* Emit code to restore saved registers using MOV insns. -- First register is restored from CFA - CFA_OFFSET. */ --static void --ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset, -- bool maybe_eh_return) --{ -- struct machine_function *m = cfun->machine; -- unsigned int regno; -- -- for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) -- if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true)) -- { -- rtx reg = gen_rtx_REG (word_mode, regno); -- rtx mem; -- rtx_insn *insn; -- -- mem = choose_baseaddr (cfa_offset, NULL); -- mem = gen_frame_mem (word_mode, mem); -- insn = emit_move_insn (reg, mem); - -- if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg)) -- { -- /* Previously we'd represented the CFA as an expression -- like *(%ebp - 8). We've just popped that value from -- the stack, which means we need to reset the CFA to -- the drap register. This will remain until we restore -- the stack pointer. */ -- add_reg_note (insn, REG_CFA_DEF_CFA, reg); -- RTX_FRAME_RELATED_P (insn) = 1; -+ /* Everything looks valid. */ -+ return true; -+} - -- /* This means that the DRAP register is valid for addressing. */ -- m->fs.drap_valid = true; -- } -- else -- ix86_add_cfa_restore_note (NULL, reg, cfa_offset); -+/* Determine if a given RTX is a valid constant address. */ - -- cfa_offset -= UNITS_PER_WORD; -- } -+bool -+constant_address_p (rtx x) -+{ -+ return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1); - } -+ -+/* Return a unique alias set for the GOT. */ - --/* Emit code to restore saved registers using MOV insns. -- First register is restored from CFA - CFA_OFFSET. */ --static void --ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset, -- bool maybe_eh_return) -+alias_set_type -+ix86_GOT_alias_set (void) - { -- unsigned int regno; -+ static alias_set_type set = -1; -+ if (set == -1) -+ set = new_alias_set (); -+ return set; -+} - -- for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) -- if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true)) -- { -- rtx reg = gen_rtx_REG (V4SFmode, regno); -- rtx mem; -- unsigned int align = GET_MODE_ALIGNMENT (V4SFmode); -+/* Return a legitimate reference for ORIG (an address) using the -+ register REG. If REG is 0, a new pseudo is generated. - -- mem = choose_baseaddr (cfa_offset, &align); -- mem = gen_rtx_MEM (V4SFmode, mem); -+ There are two types of references that must be handled: - -- /* The location aligment depends upon the base register. */ -- align = MIN (GET_MODE_ALIGNMENT (V4SFmode), align); -- gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1))); -- set_mem_align (mem, align); -- emit_insn (gen_rtx_SET (reg, mem)); -+ 1. Global data references must load the address from the GOT, via -+ the PIC reg. An insn is emitted to do this load, and the reg is -+ returned. - -- ix86_add_cfa_restore_note (NULL, reg, cfa_offset); -+ 2. Static data references, constant pool addresses, and code labels -+ compute the address as an offset from the GOT, whose base is in -+ the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to -+ differentiate them from global data objects. The returned -+ address is the PIC reg + an unspec constant. - -- cfa_offset -= GET_MODE_SIZE (V4SFmode); -- } --} -+ TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC -+ reg also appears in the address. */ - --static void --ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame, -- bool use_call, int style) -+rtx -+legitimize_pic_address (rtx orig, rtx reg) - { -- struct machine_function *m = cfun->machine; -- const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS -- + m->call_ms2sysv_extra_regs; -- rtvec v; -- unsigned int elems_needed, align, i, vi = 0; -- rtx_insn *insn; -- rtx sym, tmp; -- rtx rsi = gen_rtx_REG (word_mode, SI_REG); -- rtx r10 = NULL_RTX; -- const struct xlogue_layout &xlogue = xlogue_layout::get_instance (); -- HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset (); -- HOST_WIDE_INT rsi_offset = frame.stack_realign_offset + stub_ptr_offset; -- rtx rsi_frame_load = NULL_RTX; -- HOST_WIDE_INT rsi_restore_offset = (HOST_WIDE_INT)-1; -- enum xlogue_stub stub; -+ rtx addr = orig; -+ rtx new_rtx = orig; - -- gcc_assert (!m->fs.fp_valid || frame_pointer_needed); -+#if TARGET_MACHO -+ if (TARGET_MACHO && !TARGET_64BIT) -+ { -+ if (reg == 0) -+ reg = gen_reg_rtx (Pmode); -+ /* Use the generic Mach-O PIC machinery. */ -+ return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg); -+ } -+#endif - -- /* If using a realigned stack, we should never start with padding. */ -- gcc_assert (!stack_realign_fp || !xlogue.get_stack_align_off_in ()); -+ if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES) -+ { -+ rtx tmp = legitimize_pe_coff_symbol (addr, true); -+ if (tmp) -+ return tmp; -+ } - -- /* Setup RSI as the stub's base pointer. */ -- align = GET_MODE_ALIGNMENT (V4SFmode); -- tmp = choose_baseaddr (rsi_offset, &align, SI_REG); -- gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode)); -+ if (TARGET_64BIT && legitimate_pic_address_disp_p (addr)) -+ new_rtx = addr; -+ else if ((!TARGET_64BIT -+ || /* TARGET_64BIT && */ ix86_cmodel != CM_SMALL_PIC) -+ && !TARGET_PECOFF -+ && gotoff_operand (addr, Pmode)) -+ { -+ /* This symbol may be referenced via a displacement -+ from the PIC base address (@GOTOFF). */ -+ if (GET_CODE (addr) == CONST) -+ addr = XEXP (addr, 0); - -- emit_insn (gen_rtx_SET (rsi, tmp)); -+ if (GET_CODE (addr) == PLUS) -+ { -+ new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), -+ UNSPEC_GOTOFF); -+ new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1)); -+ } -+ else -+ new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF); - -- /* Get a symbol for the stub. */ -- if (frame_pointer_needed) -- stub = use_call ? XLOGUE_STUB_RESTORE_HFP -- : XLOGUE_STUB_RESTORE_HFP_TAIL; -- else -- stub = use_call ? XLOGUE_STUB_RESTORE -- : XLOGUE_STUB_RESTORE_TAIL; -- sym = xlogue.get_stub_rtx (stub); -+ new_rtx = gen_rtx_CONST (Pmode, new_rtx); - -- elems_needed = ncregs; -- if (use_call) -- elems_needed += 1; -- else -- elems_needed += frame_pointer_needed ? 5 : 3; -- v = rtvec_alloc (elems_needed); -+ if (TARGET_64BIT) -+ new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode); - -- /* We call the epilogue stub when we need to pop incoming args or we are -- doing a sibling call as the tail. Otherwise, we will emit a jmp to the -- epilogue stub and it is the tail-call. */ -- if (use_call) -- RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym); -- else -- { -- RTVEC_ELT (v, vi++) = ret_rtx; -- RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym); -- if (frame_pointer_needed) -+ if (reg != 0) - { -- rtx rbp = gen_rtx_REG (DImode, BP_REG); -- gcc_assert (m->fs.fp_valid); -- gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx); -- -- tmp = gen_rtx_PLUS (DImode, rbp, GEN_INT (8)); -- RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, tmp); -- RTVEC_ELT (v, vi++) = gen_rtx_SET (rbp, gen_rtx_MEM (DImode, rbp)); -- tmp = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode)); -- RTVEC_ELT (v, vi++) = gen_rtx_CLOBBER (VOIDmode, tmp); -- } -+ gcc_assert (REG_P (reg)); -+ new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx, -+ new_rtx, reg, 1, OPTAB_DIRECT); -+ } - else -- { -- /* If no hard frame pointer, we set R10 to the SP restore value. */ -- gcc_assert (!m->fs.fp_valid); -- gcc_assert (m->fs.cfa_reg == stack_pointer_rtx); -- gcc_assert (m->fs.sp_valid); -- -- r10 = gen_rtx_REG (DImode, R10_REG); -- tmp = gen_rtx_PLUS (Pmode, rsi, GEN_INT (stub_ptr_offset)); -- emit_insn (gen_rtx_SET (r10, tmp)); -- -- RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, r10); -- } -+ new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx); - } -- -- /* Generate frame load insns and restore notes. */ -- for (i = 0; i < ncregs; ++i) -+ else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0) -+ /* We can't use @GOTOFF for text labels -+ on VxWorks, see gotoff_operand. */ -+ || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF)) - { -- const xlogue_layout::reginfo &r = xlogue.get_reginfo (i); -- machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode; -- rtx reg, frame_load; -- -- reg = gen_rtx_REG (mode, r.regno); -- frame_load = gen_frame_load (reg, rsi, r.offset); -+ rtx tmp = legitimize_pe_coff_symbol (addr, true); -+ if (tmp) -+ return tmp; - -- /* Save RSI frame load insn & note to add last. */ -- if (r.regno == SI_REG) -+ /* For x64 PE-COFF there is no GOT table, -+ so we use address directly. */ -+ if (TARGET_64BIT && TARGET_PECOFF) - { -- gcc_assert (!rsi_frame_load); -- rsi_frame_load = frame_load; -- rsi_restore_offset = r.offset; -+ new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL); -+ new_rtx = gen_rtx_CONST (Pmode, new_rtx); -+ } -+ else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC) -+ { -+ new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), -+ UNSPEC_GOTPCREL); -+ new_rtx = gen_rtx_CONST (Pmode, new_rtx); -+ new_rtx = gen_const_mem (Pmode, new_rtx); -+ set_mem_alias_set (new_rtx, ix86_GOT_alias_set ()); - } - else - { -- RTVEC_ELT (v, vi++) = frame_load; -- ix86_add_cfa_restore_note (NULL, reg, r.offset); -+ /* This symbol must be referenced via a load -+ from the Global Offset Table (@GOT). */ -+ new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT); -+ new_rtx = gen_rtx_CONST (Pmode, new_rtx); -+ if (TARGET_64BIT) -+ new_rtx = force_reg (Pmode, new_rtx); -+ new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx); -+ new_rtx = gen_const_mem (Pmode, new_rtx); -+ set_mem_alias_set (new_rtx, ix86_GOT_alias_set ()); - } -- } -- -- /* Add RSI frame load & restore note at the end. */ -- gcc_assert (rsi_frame_load); -- gcc_assert (rsi_restore_offset != (HOST_WIDE_INT)-1); -- RTVEC_ELT (v, vi++) = rsi_frame_load; -- ix86_add_cfa_restore_note (NULL, gen_rtx_REG (DImode, SI_REG), -- rsi_restore_offset); -- -- /* Finally, for tail-call w/o a hard frame pointer, set SP to R10. */ -- if (!use_call && !frame_pointer_needed) -- { -- gcc_assert (m->fs.sp_valid); -- gcc_assert (!m->fs.sp_realigned); - -- /* At this point, R10 should point to frame.stack_realign_offset. */ -- if (m->fs.cfa_reg == stack_pointer_rtx) -- m->fs.cfa_offset += m->fs.sp_offset - frame.stack_realign_offset; -- m->fs.sp_offset = frame.stack_realign_offset; -+ new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode); - } -- -- gcc_assert (vi == (unsigned int)GET_NUM_ELEM (v)); -- tmp = gen_rtx_PARALLEL (VOIDmode, v); -- if (use_call) -- insn = emit_insn (tmp); - else - { -- insn = emit_jump_insn (tmp); -- JUMP_LABEL (insn) = ret_rtx; -- -- if (frame_pointer_needed) -- ix86_emit_leave (insn); -- else -+ if (CONST_INT_P (addr) -+ && !x86_64_immediate_operand (addr, VOIDmode)) -+ new_rtx = copy_to_suggested_reg (addr, reg, Pmode); -+ else if (GET_CODE (addr) == CONST) - { -- /* Need CFA adjust note. */ -- tmp = gen_rtx_SET (stack_pointer_rtx, r10); -- add_reg_note (insn, REG_CFA_ADJUST_CFA, tmp); -- } -- } -+ addr = XEXP (addr, 0); - -- RTX_FRAME_RELATED_P (insn) = true; -- ix86_add_queued_cfa_restore_notes (insn); -+ /* We must match stuff we generate before. Assume the only -+ unspecs that can get here are ours. Not that we could do -+ anything with them anyway.... */ -+ if (GET_CODE (addr) == UNSPEC -+ || (GET_CODE (addr) == PLUS -+ && GET_CODE (XEXP (addr, 0)) == UNSPEC)) -+ return orig; -+ gcc_assert (GET_CODE (addr) == PLUS); -+ } - -- /* If we're not doing a tail-call, we need to adjust the stack. */ -- if (use_call && m->fs.sp_valid) -- { -- HOST_WIDE_INT dealloc = m->fs.sp_offset - frame.stack_realign_offset; -- pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, -- GEN_INT (dealloc), style, -- m->fs.cfa_reg == stack_pointer_rtx); -- } --} -+ if (GET_CODE (addr) == PLUS) -+ { -+ rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1); - --/* Restore function stack, frame, and registers. */ -+ /* Check first to see if this is a constant -+ offset from a @GOTOFF symbol reference. */ -+ if (!TARGET_PECOFF -+ && gotoff_operand (op0, Pmode) -+ && CONST_INT_P (op1)) -+ { -+ if (!TARGET_64BIT) -+ { -+ new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), -+ UNSPEC_GOTOFF); -+ new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1); -+ new_rtx = gen_rtx_CONST (Pmode, new_rtx); - --void --ix86_expand_epilogue (int style) --{ -- struct machine_function *m = cfun->machine; -- struct machine_frame_state frame_state_save = m->fs; -- bool restore_regs_via_mov; -- bool using_drap; -- bool restore_stub_is_tail = false; -+ if (reg != 0) -+ { -+ gcc_assert (REG_P (reg)); -+ new_rtx = expand_simple_binop (Pmode, PLUS, -+ pic_offset_table_rtx, -+ new_rtx, reg, 1, -+ OPTAB_DIRECT); -+ } -+ else -+ new_rtx -+ = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx); -+ } -+ else -+ { -+ if (INTVAL (op1) < -16*1024*1024 -+ || INTVAL (op1) >= 16*1024*1024) -+ { -+ if (!x86_64_immediate_operand (op1, Pmode)) -+ op1 = force_reg (Pmode, op1); - -- if (ix86_function_naked (current_function_decl)) -- { -- /* The program should not reach this point. */ -- emit_insn (gen_ud2 ()); -- return; -- } -+ new_rtx -+ = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1); -+ } -+ } -+ } -+ else -+ { -+ rtx base = legitimize_pic_address (op0, reg); -+ machine_mode mode = GET_MODE (base); -+ new_rtx -+ = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg); - -- ix86_finalize_stack_frame_flags (); -- const struct ix86_frame &frame = cfun->machine->frame; -+ if (CONST_INT_P (new_rtx)) -+ { -+ if (INTVAL (new_rtx) < -16*1024*1024 -+ || INTVAL (new_rtx) >= 16*1024*1024) -+ { -+ if (!x86_64_immediate_operand (new_rtx, mode)) -+ new_rtx = force_reg (mode, new_rtx); - -- m->fs.sp_realigned = stack_realign_fp; -- m->fs.sp_valid = stack_realign_fp -- || !frame_pointer_needed -- || crtl->sp_is_unchanging; -- gcc_assert (!m->fs.sp_valid -- || m->fs.sp_offset == frame.stack_pointer_offset); -+ new_rtx -+ = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx); -+ } -+ else -+ new_rtx = plus_constant (mode, base, INTVAL (new_rtx)); -+ } -+ else -+ { -+ /* For %rip addressing, we have to use -+ just disp32, not base nor index. */ -+ if (TARGET_64BIT -+ && (GET_CODE (base) == SYMBOL_REF -+ || GET_CODE (base) == LABEL_REF)) -+ base = force_reg (mode, base); -+ if (GET_CODE (new_rtx) == PLUS -+ && CONSTANT_P (XEXP (new_rtx, 1))) -+ { -+ base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0)); -+ new_rtx = XEXP (new_rtx, 1); -+ } -+ new_rtx = gen_rtx_PLUS (mode, base, new_rtx); -+ } -+ } -+ } -+ } -+ return new_rtx; -+} -+ -+/* Load the thread pointer. If TO_REG is true, force it into a register. */ - -- /* The FP must be valid if the frame pointer is present. */ -- gcc_assert (frame_pointer_needed == m->fs.fp_valid); -- gcc_assert (!m->fs.fp_valid -- || m->fs.fp_offset == frame.hard_frame_pointer_offset); -+static rtx -+get_thread_pointer (machine_mode tp_mode, bool to_reg) -+{ -+ rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP); - -- /* We must have *some* valid pointer to the stack frame. */ -- gcc_assert (m->fs.sp_valid || m->fs.fp_valid); -+ if (GET_MODE (tp) != tp_mode) -+ { -+ gcc_assert (GET_MODE (tp) == SImode); -+ gcc_assert (tp_mode == DImode); - -- /* The DRAP is never valid at this point. */ -- gcc_assert (!m->fs.drap_valid); -+ tp = gen_rtx_ZERO_EXTEND (tp_mode, tp); -+ } - -- /* See the comment about red zone and frame -- pointer usage in ix86_expand_prologue. */ -- if (frame_pointer_needed && frame.red_zone_size) -- emit_insn (gen_memory_blockage ()); -+ if (to_reg) -+ tp = copy_to_mode_reg (tp_mode, tp); - -- using_drap = crtl->drap_reg && crtl->stack_realign_needed; -- gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg); -+ return tp; -+} - -- /* Determine the CFA offset of the end of the red-zone. */ -- m->fs.red_zone_offset = 0; -- if (ix86_using_red_zone () && crtl->args.pops_args < 65536) -+/* Construct the SYMBOL_REF for the tls_get_addr function. */ -+ -+static GTY(()) rtx ix86_tls_symbol; -+ -+static rtx -+ix86_tls_get_addr (void) -+{ -+ if (!ix86_tls_symbol) - { -- /* The red-zone begins below return address and error code in -- exception handler. */ -- m->fs.red_zone_offset = RED_ZONE_SIZE + INCOMING_FRAME_SP_OFFSET; -+ const char *sym -+ = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT) -+ ? "___tls_get_addr" : "__tls_get_addr"); - -- /* When the register save area is in the aligned portion of -- the stack, determine the maximum runtime displacement that -- matches up with the aligned frame. */ -- if (stack_realign_drap) -- m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT -- + UNITS_PER_WORD); -+ ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym); - } - -- HOST_WIDE_INT reg_save_offset = frame.reg_save_offset; -+ if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF) -+ { -+ rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol), -+ UNSPEC_PLTOFF); -+ return gen_rtx_PLUS (Pmode, pic_offset_table_rtx, -+ gen_rtx_CONST (Pmode, unspec)); -+ } - -- /* Special care must be taken for the normal return case of a function -- using eh_return: the eax and edx registers are marked as saved, but -- not restored along this path. Adjust the save location to match. */ -- if (crtl->calls_eh_return && style != 2) -- reg_save_offset -= 2 * UNITS_PER_WORD; -+ return ix86_tls_symbol; -+} - -- /* EH_RETURN requires the use of moves to function properly. */ -- if (crtl->calls_eh_return) -- restore_regs_via_mov = true; -- /* SEH requires the use of pops to identify the epilogue. */ -- else if (TARGET_SEH) -- restore_regs_via_mov = false; -- /* If we're only restoring one register and sp cannot be used then -- using a move instruction to restore the register since it's -- less work than reloading sp and popping the register. */ -- else if (!sp_valid_at (frame.hfp_save_offset) && frame.nregs <= 1) -- restore_regs_via_mov = true; -- else if (TARGET_EPILOGUE_USING_MOVE -- && cfun->machine->use_fast_prologue_epilogue -- && (frame.nregs > 1 -- || m->fs.sp_offset != reg_save_offset)) -- restore_regs_via_mov = true; -- else if (frame_pointer_needed -- && !frame.nregs -- && m->fs.sp_offset != reg_save_offset) -- restore_regs_via_mov = true; -- else if (frame_pointer_needed -- && TARGET_USE_LEAVE -- && cfun->machine->use_fast_prologue_epilogue -- && frame.nregs == 1) -- restore_regs_via_mov = true; -- else -- restore_regs_via_mov = false; -+/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */ - -- if (restore_regs_via_mov || frame.nsseregs) -+static GTY(()) rtx ix86_tls_module_base_symbol; -+ -+rtx -+ix86_tls_module_base (void) -+{ -+ if (!ix86_tls_module_base_symbol) - { -- /* Ensure that the entire register save area is addressable via -- the stack pointer, if we will restore SSE regs via sp. */ -- if (TARGET_64BIT -- && m->fs.sp_offset > 0x7fffffff -- && sp_valid_at (frame.stack_realign_offset + 1) -- && (frame.nsseregs + frame.nregs) != 0) -- { -- pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, -- GEN_INT (m->fs.sp_offset -- - frame.sse_reg_save_offset), -- style, -- m->fs.cfa_reg == stack_pointer_rtx); -- } -- } -+ ix86_tls_module_base_symbol -+ = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_"); - -- /* If there are any SSE registers to restore, then we have to do it -- via moves, since there's obviously no pop for SSE regs. */ -- if (frame.nsseregs) -- ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset, -- style == 2); -+ SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol) -+ |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT; -+ } - -- if (m->call_ms2sysv) -- { -- int pop_incoming_args = crtl->args.pops_args && crtl->args.size; -+ return ix86_tls_module_base_symbol; -+} - -- /* We cannot use a tail-call for the stub if: -- 1. We have to pop incoming args, -- 2. We have additional int regs to restore, or -- 3. A sibling call will be the tail-call, or -- 4. We are emitting an eh_return_internal epilogue. -+/* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is -+ false if we expect this to be used for a memory address and true if -+ we expect to load the address into a register. */ - -- TODO: Item 4 has not yet tested! -+rtx -+legitimize_tls_address (rtx x, enum tls_model model, bool for_mov) -+{ -+ rtx dest, base, off; -+ rtx pic = NULL_RTX, tp = NULL_RTX; -+ machine_mode tp_mode = Pmode; -+ int type; - -- If any of the above are true, we will call the stub rather than -- jump to it. */ -- restore_stub_is_tail = !(pop_incoming_args || frame.nregs || style != 1); -- ix86_emit_outlined_ms2sysv_restore (frame, !restore_stub_is_tail, style); -- } -+ /* Fall back to global dynamic model if tool chain cannot support local -+ dynamic. */ -+ if (TARGET_SUN_TLS && !TARGET_64BIT -+ && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM -+ && model == TLS_MODEL_LOCAL_DYNAMIC) -+ model = TLS_MODEL_GLOBAL_DYNAMIC; - -- /* If using out-of-line stub that is a tail-call, then...*/ -- if (m->call_ms2sysv && restore_stub_is_tail) -- { -- /* TODO: parinoid tests. (remove eventually) */ -- gcc_assert (m->fs.sp_valid); -- gcc_assert (!m->fs.sp_realigned); -- gcc_assert (!m->fs.fp_valid); -- gcc_assert (!m->fs.realigned); -- gcc_assert (m->fs.sp_offset == UNITS_PER_WORD); -- gcc_assert (!crtl->drap_reg); -- gcc_assert (!frame.nregs); -- } -- else if (restore_regs_via_mov) -+ switch (model) - { -- rtx t; -+ case TLS_MODEL_GLOBAL_DYNAMIC: -+ dest = gen_reg_rtx (Pmode); - -- if (frame.nregs) -- ix86_emit_restore_regs_using_mov (reg_save_offset, style == 2); -+ if (!TARGET_64BIT) -+ { -+ if (flag_pic && !TARGET_PECOFF) -+ pic = pic_offset_table_rtx; -+ else -+ { -+ pic = gen_reg_rtx (Pmode); -+ emit_insn (gen_set_got (pic)); -+ } -+ } - -- /* eh_return epilogues need %ecx added to the stack pointer. */ -- if (style == 2) -+ if (TARGET_GNU2_TLS) - { -- rtx sa = EH_RETURN_STACKADJ_RTX; -- rtx_insn *insn; -+ if (TARGET_64BIT) -+ emit_insn (gen_tls_dynamic_gnu2_64 (dest, x)); -+ else -+ emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic)); - -- /* %ecx can't be used for both DRAP register and eh_return. */ -- if (crtl->drap_reg) -- gcc_assert (REGNO (crtl->drap_reg) != CX_REG); -+ tp = get_thread_pointer (Pmode, true); -+ dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest)); - -- /* regparm nested functions don't work with eh_return. */ -- gcc_assert (!ix86_static_chain_on_stack); -+ if (GET_MODE (x) != Pmode) -+ x = gen_rtx_ZERO_EXTEND (Pmode, x); - -- if (frame_pointer_needed) -+ set_unique_reg_note (get_last_insn (), REG_EQUAL, x); -+ } -+ else -+ { -+ rtx caddr = ix86_tls_get_addr (); -+ -+ if (TARGET_64BIT) - { -- t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa); -- t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD); -- emit_insn (gen_rtx_SET (sa, t)); -+ rtx rax = gen_rtx_REG (Pmode, AX_REG); -+ rtx_insn *insns; - -- t = gen_frame_mem (Pmode, hard_frame_pointer_rtx); -- insn = emit_move_insn (hard_frame_pointer_rtx, t); -+ start_sequence (); -+ emit_call_insn -+ (ix86_gen_tls_global_dynamic_64 (rax, x, caddr)); -+ insns = get_insns (); -+ end_sequence (); - -- /* Note that we use SA as a temporary CFA, as the return -- address is at the proper place relative to it. We -- pretend this happens at the FP restore insn because -- prior to this insn the FP would be stored at the wrong -- offset relative to SA, and after this insn we have no -- other reasonable register to use for the CFA. We don't -- bother resetting the CFA to the SP for the duration of -- the return insn, unless the control flow instrumentation -- is done. In this case the SP is used later and we have -- to reset CFA to SP. */ -- add_reg_note (insn, REG_CFA_DEF_CFA, -- plus_constant (Pmode, sa, UNITS_PER_WORD)); -- ix86_add_queued_cfa_restore_notes (insn); -- add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx); -- RTX_FRAME_RELATED_P (insn) = 1; -- -- m->fs.cfa_reg = sa; -- m->fs.cfa_offset = UNITS_PER_WORD; -- m->fs.fp_valid = false; -+ if (GET_MODE (x) != Pmode) -+ x = gen_rtx_ZERO_EXTEND (Pmode, x); - -- pro_epilogue_adjust_stack (stack_pointer_rtx, sa, -- const0_rtx, style, -- flag_cf_protection); -+ RTL_CONST_CALL_P (insns) = 1; -+ emit_libcall_block (insns, dest, rax, x); - } - else -- { -- t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa); -- t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD); -- insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t)); -- ix86_add_queued_cfa_restore_notes (insn); -- -- gcc_assert (m->fs.cfa_reg == stack_pointer_rtx); -- if (m->fs.cfa_offset != UNITS_PER_WORD) -- { -- m->fs.cfa_offset = UNITS_PER_WORD; -- add_reg_note (insn, REG_CFA_DEF_CFA, -- plus_constant (Pmode, stack_pointer_rtx, -- UNITS_PER_WORD)); -- RTX_FRAME_RELATED_P (insn) = 1; -- } -- } -- m->fs.sp_offset = UNITS_PER_WORD; -- m->fs.sp_valid = true; -- m->fs.sp_realigned = false; -+ emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr)); - } -- } -- else -- { -- /* SEH requires that the function end with (1) a stack adjustment -- if necessary, (2) a sequence of pops, and (3) a return or -- jump instruction. Prevent insns from the function body from -- being scheduled into this sequence. */ -- if (TARGET_SEH) -+ break; -+ -+ case TLS_MODEL_LOCAL_DYNAMIC: -+ base = gen_reg_rtx (Pmode); -+ -+ if (!TARGET_64BIT) - { -- /* Prevent a catch region from being adjacent to the standard -- epilogue sequence. Unfortunately neither crtl->uses_eh_lsda -- nor several other flags that would be interesting to test are -- set up yet. */ -- if (flag_non_call_exceptions) -- emit_insn (gen_nops (const1_rtx)); -+ if (flag_pic) -+ pic = pic_offset_table_rtx; - else -- emit_insn (gen_blockage ()); -+ { -+ pic = gen_reg_rtx (Pmode); -+ emit_insn (gen_set_got (pic)); -+ } - } - -- /* First step is to deallocate the stack frame so that we can -- pop the registers. If the stack pointer was realigned, it needs -- to be restored now. Also do it on SEH target for very large -- frame as the emitted instructions aren't allowed by the ABI -- in epilogues. */ -- if (!m->fs.sp_valid || m->fs.sp_realigned -- || (TARGET_SEH -- && (m->fs.sp_offset - reg_save_offset -- >= SEH_MAX_FRAME_SIZE))) -- { -- pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx, -- GEN_INT (m->fs.fp_offset -- - reg_save_offset), -- style, false); -- } -- else if (m->fs.sp_offset != reg_save_offset) -+ if (TARGET_GNU2_TLS) - { -- pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, -- GEN_INT (m->fs.sp_offset -- - reg_save_offset), -- style, -- m->fs.cfa_reg == stack_pointer_rtx); -- } -+ rtx tmp = ix86_tls_module_base (); - -- ix86_emit_restore_regs_using_pop (); -- } -+ if (TARGET_64BIT) -+ emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp)); -+ else -+ emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic)); - -- /* If we used a stack pointer and haven't already got rid of it, -- then do so now. */ -- if (m->fs.fp_valid) -- { -- /* If the stack pointer is valid and pointing at the frame -- pointer store address, then we only need a pop. */ -- if (sp_valid_at (frame.hfp_save_offset) -- && m->fs.sp_offset == frame.hfp_save_offset) -- ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx); -- /* Leave results in shorter dependency chains on CPUs that are -- able to grok it fast. */ -- else if (TARGET_USE_LEAVE -- || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun)) -- || !cfun->machine->use_fast_prologue_epilogue) -- ix86_emit_leave (NULL); -+ tp = get_thread_pointer (Pmode, true); -+ set_unique_reg_note (get_last_insn (), REG_EQUAL, -+ gen_rtx_MINUS (Pmode, tmp, tp)); -+ } - else -- { -- pro_epilogue_adjust_stack (stack_pointer_rtx, -- hard_frame_pointer_rtx, -- const0_rtx, style, !using_drap); -- ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx); -- } -- } -- -- if (using_drap) -- { -- int param_ptr_offset = UNITS_PER_WORD; -- rtx_insn *insn; -+ { -+ rtx caddr = ix86_tls_get_addr (); - -- gcc_assert (stack_realign_drap); -+ if (TARGET_64BIT) -+ { -+ rtx rax = gen_rtx_REG (Pmode, AX_REG); -+ rtx_insn *insns; -+ rtx eqv; - -- if (ix86_static_chain_on_stack) -- param_ptr_offset += UNITS_PER_WORD; -- if (!call_used_regs[REGNO (crtl->drap_reg)]) -- param_ptr_offset += UNITS_PER_WORD; -+ start_sequence (); -+ emit_call_insn -+ (ix86_gen_tls_local_dynamic_base_64 (rax, caddr)); -+ insns = get_insns (); -+ end_sequence (); - -- insn = emit_insn (gen_rtx_SET -- (stack_pointer_rtx, -- gen_rtx_PLUS (Pmode, -- crtl->drap_reg, -- GEN_INT (-param_ptr_offset)))); -- m->fs.cfa_reg = stack_pointer_rtx; -- m->fs.cfa_offset = param_ptr_offset; -- m->fs.sp_offset = param_ptr_offset; -- m->fs.realigned = false; -+ /* Attach a unique REG_EQUAL, to allow the RTL optimizers to -+ share the LD_BASE result with other LD model accesses. */ -+ eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), -+ UNSPEC_TLS_LD_BASE); - -- add_reg_note (insn, REG_CFA_DEF_CFA, -- gen_rtx_PLUS (Pmode, stack_pointer_rtx, -- GEN_INT (param_ptr_offset))); -- RTX_FRAME_RELATED_P (insn) = 1; -+ RTL_CONST_CALL_P (insns) = 1; -+ emit_libcall_block (insns, base, rax, eqv); -+ } -+ else -+ emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr)); -+ } - -- if (!call_used_regs[REGNO (crtl->drap_reg)]) -- ix86_emit_restore_reg_using_pop (crtl->drap_reg); -- } -+ off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF); -+ off = gen_rtx_CONST (Pmode, off); - -- /* At this point the stack pointer must be valid, and we must have -- restored all of the registers. We may not have deallocated the -- entire stack frame. We've delayed this until now because it may -- be possible to merge the local stack deallocation with the -- deallocation forced by ix86_static_chain_on_stack. */ -- gcc_assert (m->fs.sp_valid); -- gcc_assert (!m->fs.sp_realigned); -- gcc_assert (!m->fs.fp_valid); -- gcc_assert (!m->fs.realigned); -- if (m->fs.sp_offset != UNITS_PER_WORD) -- { -- pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, -- GEN_INT (m->fs.sp_offset - UNITS_PER_WORD), -- style, true); -- } -- else -- ix86_add_queued_cfa_restore_notes (get_last_insn ()); -+ dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off)); - -- /* Sibcall epilogues don't want a return instruction. */ -- if (style == 0) -- { -- m->fs = frame_state_save; -- return; -- } -+ if (TARGET_GNU2_TLS) -+ { -+ dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp)); - -- if (cfun->machine->func_type != TYPE_NORMAL) -- emit_jump_insn (gen_interrupt_return ()); -- else if (crtl->args.pops_args && crtl->args.size) -- { -- rtx popc = GEN_INT (crtl->args.pops_args); -+ if (GET_MODE (x) != Pmode) -+ x = gen_rtx_ZERO_EXTEND (Pmode, x); - -- /* i386 can only pop 64K bytes. If asked to pop more, pop return -- address, do explicit add, and jump indirectly to the caller. */ -+ set_unique_reg_note (get_last_insn (), REG_EQUAL, x); -+ } -+ break; - -- if (crtl->args.pops_args >= 65536) -+ case TLS_MODEL_INITIAL_EXEC: -+ if (TARGET_64BIT) - { -- rtx ecx = gen_rtx_REG (SImode, CX_REG); -- rtx_insn *insn; -- -- /* There is no "pascal" calling convention in any 64bit ABI. */ -- gcc_assert (!TARGET_64BIT); -- -- insn = emit_insn (gen_pop (ecx)); -- m->fs.cfa_offset -= UNITS_PER_WORD; -- m->fs.sp_offset -= UNITS_PER_WORD; -+ if (TARGET_SUN_TLS && !TARGET_X32) -+ { -+ /* The Sun linker took the AMD64 TLS spec literally -+ and can only handle %rax as destination of the -+ initial executable code sequence. */ - -- rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); -- x = gen_rtx_SET (stack_pointer_rtx, x); -- add_reg_note (insn, REG_CFA_ADJUST_CFA, x); -- add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx)); -- RTX_FRAME_RELATED_P (insn) = 1; -+ dest = gen_reg_rtx (DImode); -+ emit_insn (gen_tls_initial_exec_64_sun (dest, x)); -+ return dest; -+ } - -- pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, -- popc, -1, true); -- emit_jump_insn (gen_simple_return_indirect_internal (ecx)); -+ /* Generate DImode references to avoid %fs:(%reg32) -+ problems and linker IE->LE relaxation bug. */ -+ tp_mode = DImode; -+ pic = NULL; -+ type = UNSPEC_GOTNTPOFF; - } -- else -- emit_jump_insn (gen_simple_return_pop_internal (popc)); -- } -- else if (!m->call_ms2sysv || !restore_stub_is_tail) -- { -- /* In case of return from EH a simple return cannot be used -- as a return address will be compared with a shadow stack -- return address. Use indirect jump instead. */ -- if (style == 2 && flag_cf_protection) -+ else if (flag_pic) - { -- /* Register used in indirect jump must be in word_mode. But -- Pmode may not be the same as word_mode for x32. */ -- rtx ecx = gen_rtx_REG (word_mode, CX_REG); -- rtx_insn *insn; -- -- insn = emit_insn (gen_pop (ecx)); -- m->fs.cfa_offset -= UNITS_PER_WORD; -- m->fs.sp_offset -= UNITS_PER_WORD; -- -- rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); -- x = gen_rtx_SET (stack_pointer_rtx, x); -- add_reg_note (insn, REG_CFA_ADJUST_CFA, x); -- add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx)); -- RTX_FRAME_RELATED_P (insn) = 1; -- -- emit_jump_insn (gen_simple_return_indirect_internal (ecx)); -+ pic = pic_offset_table_rtx; -+ type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF; -+ } -+ else if (!TARGET_ANY_GNU_TLS) -+ { -+ pic = gen_reg_rtx (Pmode); -+ emit_insn (gen_set_got (pic)); -+ type = UNSPEC_GOTTPOFF; - } - else -- emit_jump_insn (gen_simple_return_internal ()); -- } -- -- /* Restore the state back to the state from the prologue, -- so that it's correct for the next epilogue. */ -- m->fs = frame_state_save; --} -+ { -+ pic = NULL; -+ type = UNSPEC_INDNTPOFF; -+ } - --/* Reset from the function's potential modifications. */ -+ off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type); -+ off = gen_rtx_CONST (tp_mode, off); -+ if (pic) -+ off = gen_rtx_PLUS (tp_mode, pic, off); -+ off = gen_const_mem (tp_mode, off); -+ set_mem_alias_set (off, ix86_GOT_alias_set ()); - --static void --ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED) --{ -- if (pic_offset_table_rtx -- && !ix86_use_pseudo_pic_reg ()) -- SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM); -+ if (TARGET_64BIT || TARGET_ANY_GNU_TLS) -+ { -+ base = get_thread_pointer (tp_mode, -+ for_mov || !TARGET_TLS_DIRECT_SEG_REFS); -+ off = force_reg (tp_mode, off); -+ dest = gen_rtx_PLUS (tp_mode, base, off); -+ if (tp_mode != Pmode) -+ dest = convert_to_mode (Pmode, dest, 1); -+ } -+ else -+ { -+ base = get_thread_pointer (Pmode, true); -+ dest = gen_reg_rtx (Pmode); -+ emit_insn (ix86_gen_sub3 (dest, base, off)); -+ } -+ break; - -- if (TARGET_MACHO) -- { -- rtx_insn *insn = get_last_insn (); -- rtx_insn *deleted_debug_label = NULL; -+ case TLS_MODEL_LOCAL_EXEC: -+ off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), -+ (TARGET_64BIT || TARGET_ANY_GNU_TLS) -+ ? UNSPEC_NTPOFF : UNSPEC_TPOFF); -+ off = gen_rtx_CONST (Pmode, off); - -- /* Mach-O doesn't support labels at the end of objects, so if -- it looks like we might want one, take special action. -- First, collect any sequence of deleted debug labels. */ -- while (insn -- && NOTE_P (insn) -- && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL) -+ if (TARGET_64BIT || TARGET_ANY_GNU_TLS) - { -- /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL -- notes only, instead set their CODE_LABEL_NUMBER to -1, -- otherwise there would be code generation differences -- in between -g and -g0. */ -- if (NOTE_P (insn) && NOTE_KIND (insn) -- == NOTE_INSN_DELETED_DEBUG_LABEL) -- deleted_debug_label = insn; -- insn = PREV_INSN (insn); -+ base = get_thread_pointer (Pmode, -+ for_mov || !TARGET_TLS_DIRECT_SEG_REFS); -+ return gen_rtx_PLUS (Pmode, base, off); -+ } -+ else -+ { -+ base = get_thread_pointer (Pmode, true); -+ dest = gen_reg_rtx (Pmode); -+ emit_insn (ix86_gen_sub3 (dest, base, off)); - } -+ break; - -- /* If we have: -- label: -- barrier -- then this needs to be detected, so skip past the barrier. */ -+ default: -+ gcc_unreachable (); -+ } - -- if (insn && BARRIER_P (insn)) -- insn = PREV_INSN (insn); -+ return dest; -+} - -- /* Up to now we've only seen notes or barriers. */ -- if (insn) -+/* Return true if OP refers to a TLS address. */ -+bool -+ix86_tls_address_pattern_p (rtx op) -+{ -+ subrtx_var_iterator::array_type array; -+ FOR_EACH_SUBRTX_VAR (iter, array, op, ALL) -+ { -+ rtx op = *iter; -+ if (MEM_P (op)) - { -- if (LABEL_P (insn) -- || (NOTE_P (insn) -- && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)) -- /* Trailing label. */ -- fputs ("\tnop\n", file); -- else if (cfun && ! cfun->is_thunk) -+ rtx *x = &XEXP (op, 0); -+ while (GET_CODE (*x) == PLUS) - { -- /* See if we have a completely empty function body, skipping -- the special case of the picbase thunk emitted as asm. */ -- while (insn && ! INSN_P (insn)) -- insn = PREV_INSN (insn); -- /* If we don't find any insns, we've got an empty function body; -- I.e. completely empty - without a return or branch. This is -- taken as the case where a function body has been removed -- because it contains an inline __builtin_unreachable(). GCC -- declares that reaching __builtin_unreachable() means UB so -- we're not obliged to do anything special; however, we want -- non-zero-sized function bodies. To meet this, and help the -- user out, let's trap the case. */ -- if (insn == NULL) -- fputs ("\tud2\n", file); -+ int i; -+ for (i = 0; i < 2; i++) -+ { -+ rtx u = XEXP (*x, i); -+ if (GET_CODE (u) == ZERO_EXTEND) -+ u = XEXP (u, 0); -+ if (GET_CODE (u) == UNSPEC -+ && XINT (u, 1) == UNSPEC_TP) -+ return true; -+ } -+ x = &XEXP (*x, 0); - } -+ -+ iter.skip_subrtxes (); - } -- else if (deleted_debug_label) -- for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn)) -- if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL) -- CODE_LABEL_NUMBER (insn) = -1; - } --} - --/* Return a scratch register to use in the split stack prologue. The -- split stack prologue is used for -fsplit-stack. It is the first -- instructions in the function, even before the regular prologue. -- The scratch register can be any caller-saved register which is not -- used for parameters or for the static chain. */ -+ return false; -+} - --static unsigned int --split_stack_prologue_scratch_regno (void) -+/* Rewrite *LOC so that it refers to a default TLS address space. */ -+void -+ix86_rewrite_tls_address_1 (rtx *loc) - { -- if (TARGET_64BIT) -- return R11_REG; -- else -+ subrtx_ptr_iterator::array_type array; -+ FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL) - { -- bool is_fastcall, is_thiscall; -- int regparm; -- -- is_fastcall = (lookup_attribute ("fastcall", -- TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))) -- != NULL); -- is_thiscall = (lookup_attribute ("thiscall", -- TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))) -- != NULL); -- regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl); -- -- if (is_fastcall) -- { -- if (DECL_STATIC_CHAIN (cfun->decl)) -- { -- sorry ("%<-fsplit-stack%> does not support fastcall with " -- "nested function"); -- return INVALID_REGNUM; -- } -- return AX_REG; -- } -- else if (is_thiscall) -- { -- if (!DECL_STATIC_CHAIN (cfun->decl)) -- return DX_REG; -- return AX_REG; -- } -- else if (regparm < 3) -+ rtx *loc = *iter; -+ if (MEM_P (*loc)) - { -- if (!DECL_STATIC_CHAIN (cfun->decl)) -- return CX_REG; -- else -+ rtx addr = XEXP (*loc, 0); -+ rtx *x = &addr; -+ while (GET_CODE (*x) == PLUS) - { -- if (regparm >= 2) -+ int i; -+ for (i = 0; i < 2; i++) - { -- sorry ("%<-fsplit-stack%> does not support 2 register " -- "parameters for a nested function"); -- return INVALID_REGNUM; -+ rtx u = XEXP (*x, i); -+ if (GET_CODE (u) == ZERO_EXTEND) -+ u = XEXP (u, 0); -+ if (GET_CODE (u) == UNSPEC -+ && XINT (u, 1) == UNSPEC_TP) -+ { -+ addr_space_t as = DEFAULT_TLS_SEG_REG; -+ -+ *x = XEXP (*x, 1 - i); -+ -+ *loc = replace_equiv_address_nv (*loc, addr, true); -+ set_mem_addr_space (*loc, as); -+ return; -+ } - } -- return DX_REG; -+ x = &XEXP (*x, 0); - } -- } -- else -- { -- /* FIXME: We could make this work by pushing a register -- around the addition and comparison. */ -- sorry ("%<-fsplit-stack%> does not support 3 register parameters"); -- return INVALID_REGNUM; -+ -+ iter.skip_subrtxes (); - } - } - } - --/* A SYMBOL_REF for the function which allocates new stackspace for -- -fsplit-stack. */ -- --static GTY(()) rtx split_stack_fn; -- --/* A SYMBOL_REF for the more stack function when using the large -- model. */ -- --static GTY(()) rtx split_stack_fn_large; -+/* Rewrite instruction pattern involvning TLS address -+ so that it refers to a default TLS address space. */ -+rtx -+ix86_rewrite_tls_address (rtx pattern) -+{ -+ pattern = copy_insn (pattern); -+ ix86_rewrite_tls_address_1 (&pattern); -+ return pattern; -+} - --/* Return location of the stack guard value in the TLS block. */ -+/* Create or return the unique __imp_DECL dllimport symbol corresponding -+ to symbol DECL if BEIMPORT is true. Otherwise create or return the -+ unique refptr-DECL symbol corresponding to symbol DECL. */ - --rtx --ix86_split_stack_guard (void) -+struct dllimport_hasher : ggc_cache_ptr_hash - { -- int offset; -- addr_space_t as = DEFAULT_TLS_SEG_REG; -- rtx r; -+ static inline hashval_t hash (tree_map *m) { return m->hash; } -+ static inline bool -+ equal (tree_map *a, tree_map *b) -+ { -+ return a->base.from == b->base.from; -+ } - -- gcc_assert (flag_split_stack); -+ static int -+ keep_cache_entry (tree_map *&m) -+ { -+ return ggc_marked_p (m->base.from); -+ } -+}; - --#ifdef TARGET_THREAD_SPLIT_STACK_OFFSET -- offset = TARGET_THREAD_SPLIT_STACK_OFFSET; --#else -- gcc_unreachable (); -+static GTY((cache)) hash_table *dllimport_map; -+ -+static tree -+get_dllimport_decl (tree decl, bool beimport) -+{ -+ struct tree_map *h, in; -+ const char *name; -+ const char *prefix; -+ size_t namelen, prefixlen; -+ char *imp_name; -+ tree to; -+ rtx rtl; -+ -+ if (!dllimport_map) -+ dllimport_map = hash_table::create_ggc (512); -+ -+ in.hash = htab_hash_pointer (decl); -+ in.base.from = decl; -+ tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT); -+ h = *loc; -+ if (h) -+ return h->to; -+ -+ *loc = h = ggc_alloc (); -+ h->hash = in.hash; -+ h->base.from = decl; -+ h->to = to = build_decl (DECL_SOURCE_LOCATION (decl), -+ VAR_DECL, NULL, ptr_type_node); -+ DECL_ARTIFICIAL (to) = 1; -+ DECL_IGNORED_P (to) = 1; -+ DECL_EXTERNAL (to) = 1; -+ TREE_READONLY (to) = 1; -+ -+ name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); -+ name = targetm.strip_name_encoding (name); -+ if (beimport) -+ prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0 -+ ? "*__imp_" : "*__imp__"; -+ else -+ prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr."; -+ namelen = strlen (name); -+ prefixlen = strlen (prefix); -+ imp_name = (char *) alloca (namelen + prefixlen + 1); -+ memcpy (imp_name, prefix, prefixlen); -+ memcpy (imp_name + prefixlen, name, namelen + 1); -+ -+ name = ggc_alloc_string (imp_name, namelen + prefixlen); -+ rtl = gen_rtx_SYMBOL_REF (Pmode, name); -+ SET_SYMBOL_REF_DECL (rtl, to); -+ SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR; -+ if (!beimport) -+ { -+ SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL; -+#ifdef SUB_TARGET_RECORD_STUB -+ SUB_TARGET_RECORD_STUB (name); - #endif -+ } - -- r = GEN_INT (offset); -- r = gen_const_mem (Pmode, r); -- set_mem_addr_space (r, as); -+ rtl = gen_const_mem (Pmode, rtl); -+ set_mem_alias_set (rtl, ix86_GOT_alias_set ()); - -- return r; -+ SET_DECL_RTL (to, rtl); -+ SET_DECL_ASSEMBLER_NAME (to, get_identifier (name)); -+ -+ return to; - } - --/* Handle -fsplit-stack. These are the first instructions in the -- function, even before the regular prologue. */ -+/* Expand SYMBOL into its corresponding far-address symbol. -+ WANT_REG is true if we require the result be a register. */ - --void --ix86_expand_split_stack_prologue (void) -+static rtx -+legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg) - { -- HOST_WIDE_INT allocate; -- unsigned HOST_WIDE_INT args_size; -- rtx_code_label *label; -- rtx limit, current, allocate_rtx, call_fusage; -- rtx_insn *call_insn; -- rtx scratch_reg = NULL_RTX; -- rtx_code_label *varargs_label = NULL; -- rtx fn; -+ tree imp_decl; -+ rtx x; - -- gcc_assert (flag_split_stack && reload_completed); -+ gcc_assert (SYMBOL_REF_DECL (symbol)); -+ imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false); - -- ix86_finalize_stack_frame_flags (); -- struct ix86_frame &frame = cfun->machine->frame; -- allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET; -+ x = DECL_RTL (imp_decl); -+ if (want_reg) -+ x = force_reg (Pmode, x); -+ return x; -+} - -- /* This is the label we will branch to if we have enough stack -- space. We expect the basic block reordering pass to reverse this -- branch if optimizing, so that we branch in the unlikely case. */ -- label = gen_label_rtx (); -+/* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is -+ true if we require the result be a register. */ - -- /* We need to compare the stack pointer minus the frame size with -- the stack boundary in the TCB. The stack boundary always gives -- us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we -- can compare directly. Otherwise we need to do an addition. */ -+static rtx -+legitimize_dllimport_symbol (rtx symbol, bool want_reg) -+{ -+ tree imp_decl; -+ rtx x; - -- limit = ix86_split_stack_guard (); -+ gcc_assert (SYMBOL_REF_DECL (symbol)); -+ imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true); - -- if (allocate < SPLIT_STACK_AVAILABLE) -- current = stack_pointer_rtx; -- else -- { -- unsigned int scratch_regno; -- rtx offset; -+ x = DECL_RTL (imp_decl); -+ if (want_reg) -+ x = force_reg (Pmode, x); -+ return x; -+} - -- /* We need a scratch register to hold the stack pointer minus -- the required frame size. Since this is the very start of the -- function, the scratch register can be any caller-saved -- register which is not used for parameters. */ -- offset = GEN_INT (- allocate); -- scratch_regno = split_stack_prologue_scratch_regno (); -- if (scratch_regno == INVALID_REGNUM) -- return; -- scratch_reg = gen_rtx_REG (Pmode, scratch_regno); -- if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode)) -- { -- /* We don't use ix86_gen_add3 in this case because it will -- want to split to lea, but when not optimizing the insn -- will not be split after this point. */ -- emit_insn (gen_rtx_SET (scratch_reg, -- gen_rtx_PLUS (Pmode, stack_pointer_rtx, -- offset))); -- } -- else -+/* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG -+ is true if we require the result be a register. */ -+ -+rtx -+legitimize_pe_coff_symbol (rtx addr, bool inreg) -+{ -+ if (!TARGET_PECOFF) -+ return NULL_RTX; -+ -+ if (TARGET_DLLIMPORT_DECL_ATTRIBUTES) -+ { -+ if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr)) -+ return legitimize_dllimport_symbol (addr, inreg); -+ if (GET_CODE (addr) == CONST -+ && GET_CODE (XEXP (addr, 0)) == PLUS -+ && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF -+ && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0))) - { -- emit_move_insn (scratch_reg, offset); -- emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg, -- stack_pointer_rtx)); -+ rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg); -+ return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1)); - } -- current = scratch_reg; - } - -- ix86_expand_branch (GEU, current, limit, label); -- rtx_insn *jump_insn = get_last_insn (); -- JUMP_LABEL (jump_insn) = label; -- -- /* Mark the jump as very likely to be taken. */ -- add_reg_br_prob_note (jump_insn, profile_probability::very_likely ()); -+ if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC) -+ return NULL_RTX; -+ if (GET_CODE (addr) == SYMBOL_REF -+ && !is_imported_p (addr) -+ && SYMBOL_REF_EXTERNAL_P (addr) -+ && SYMBOL_REF_DECL (addr)) -+ return legitimize_pe_coff_extern_decl (addr, inreg); - -- if (split_stack_fn == NULL_RTX) -+ if (GET_CODE (addr) == CONST -+ && GET_CODE (XEXP (addr, 0)) == PLUS -+ && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF -+ && !is_imported_p (XEXP (XEXP (addr, 0), 0)) -+ && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0)) -+ && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0))) - { -- split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack"); -- SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL; -+ rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg); -+ return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1)); - } -- fn = split_stack_fn; -+ return NULL_RTX; -+} - -- /* Get more stack space. We pass in the desired stack space and the -- size of the arguments to copy to the new stack. In 32-bit mode -- we push the parameters; __morestack will return on a new stack -- anyhow. In 64-bit mode we pass the parameters in r10 and -- r11. */ -- allocate_rtx = GEN_INT (allocate); -- args_size = crtl->args.size >= 0 ? (HOST_WIDE_INT) crtl->args.size : 0; -- call_fusage = NULL_RTX; -- rtx pop = NULL_RTX; -- if (TARGET_64BIT) -- { -- rtx reg10, reg11; -+/* Try machine-dependent ways of modifying an illegitimate address -+ to be legitimate. If we find one, return the new, valid address. -+ This macro is used in only one place: `memory_address' in explow.c. - -- reg10 = gen_rtx_REG (Pmode, R10_REG); -- reg11 = gen_rtx_REG (Pmode, R11_REG); -+ OLDX is the address as it was before break_out_memory_refs was called. -+ In some cases it is useful to look at this to decide what needs to be done. - -- /* If this function uses a static chain, it will be in %r10. -- Preserve it across the call to __morestack. */ -- if (DECL_STATIC_CHAIN (cfun->decl)) -- { -- rtx rax; -+ It is always safe for this macro to do nothing. It exists to recognize -+ opportunities to optimize the output. - -- rax = gen_rtx_REG (word_mode, AX_REG); -- emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG)); -- use_reg (&call_fusage, rax); -- } -- -- if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC) -- && !TARGET_PECOFF) -- { -- HOST_WIDE_INT argval; -- -- gcc_assert (Pmode == DImode); -- /* When using the large model we need to load the address -- into a register, and we've run out of registers. So we -- switch to a different calling convention, and we call a -- different function: __morestack_large. We pass the -- argument size in the upper 32 bits of r10 and pass the -- frame size in the lower 32 bits. */ -- gcc_assert ((allocate & HOST_WIDE_INT_C (0xffffffff)) == allocate); -- gcc_assert ((args_size & 0xffffffff) == args_size); -- -- if (split_stack_fn_large == NULL_RTX) -- { -- split_stack_fn_large -- = gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model"); -- SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL; -- } -- if (ix86_cmodel == CM_LARGE_PIC) -- { -- rtx_code_label *label; -- rtx x; -- -- label = gen_label_rtx (); -- emit_label (label); -- LABEL_PRESERVE_P (label) = 1; -- emit_insn (gen_set_rip_rex64 (reg10, label)); -- emit_insn (gen_set_got_offset_rex64 (reg11, label)); -- emit_insn (ix86_gen_add3 (reg10, reg10, reg11)); -- x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large), -- UNSPEC_GOT); -- x = gen_rtx_CONST (Pmode, x); -- emit_move_insn (reg11, x); -- x = gen_rtx_PLUS (Pmode, reg10, reg11); -- x = gen_const_mem (Pmode, x); -- emit_move_insn (reg11, x); -- } -- else -- emit_move_insn (reg11, split_stack_fn_large); -+ For the 80386, we handle X+REG by loading X into a register R and -+ using R+REG. R will go in a general reg and indexing will be used. -+ However, if REG is a broken-out memory address or multiplication, -+ nothing needs to be done because REG can certainly go in a general reg. - -- fn = reg11; -+ When -fpic is used, special handling is needed for symbolic references. -+ See comments by legitimize_pic_address in i386.c for details. */ - -- argval = ((args_size << 16) << 16) + allocate; -- emit_move_insn (reg10, GEN_INT (argval)); -- } -- else -- { -- emit_move_insn (reg10, allocate_rtx); -- emit_move_insn (reg11, GEN_INT (args_size)); -- use_reg (&call_fusage, reg11); -- } -+static rtx -+ix86_legitimize_address (rtx x, rtx, machine_mode mode) -+{ -+ bool changed = false; -+ unsigned log; - -- use_reg (&call_fusage, reg10); -+ log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0; -+ if (log) -+ return legitimize_tls_address (x, (enum tls_model) log, false); -+ if (GET_CODE (x) == CONST -+ && GET_CODE (XEXP (x, 0)) == PLUS -+ && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF -+ && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)))) -+ { -+ rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), -+ (enum tls_model) log, false); -+ return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1)); - } -- else -+ -+ if (TARGET_DLLIMPORT_DECL_ATTRIBUTES) - { -- rtx_insn *insn = emit_insn (gen_push (GEN_INT (args_size))); -- add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (UNITS_PER_WORD)); -- insn = emit_insn (gen_push (allocate_rtx)); -- add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (2 * UNITS_PER_WORD)); -- pop = GEN_INT (2 * UNITS_PER_WORD); -+ rtx tmp = legitimize_pe_coff_symbol (x, true); -+ if (tmp) -+ return tmp; - } -- call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn), -- GEN_INT (UNITS_PER_WORD), constm1_rtx, -- pop, false); -- add_function_usage_to (call_insn, call_fusage); -- if (!TARGET_64BIT) -- add_reg_note (call_insn, REG_ARGS_SIZE, GEN_INT (0)); -- /* Indicate that this function can't jump to non-local gotos. */ -- make_reg_eh_region_note_nothrow_nononlocal (call_insn); - -- /* In order to make call/return prediction work right, we now need -- to execute a return instruction. See -- libgcc/config/i386/morestack.S for the details on how this works. -+ if (flag_pic && SYMBOLIC_CONST (x)) -+ return legitimize_pic_address (x, 0); - -- For flow purposes gcc must not see this as a return -- instruction--we need control flow to continue at the subsequent -- label. Therefore, we use an unspec. */ -- gcc_assert (crtl->args.pops_args < 65536); -- rtx_insn *ret_insn -- = emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args))); -+#if TARGET_MACHO -+ if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x)) -+ return machopic_indirect_data_reference (x, 0); -+#endif - -- if ((flag_cf_protection & CF_BRANCH)) -+ /* Canonicalize shifts by 0, 1, 2, 3 into multiply */ -+ if (GET_CODE (x) == ASHIFT -+ && CONST_INT_P (XEXP (x, 1)) -+ && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4) - { -- /* Insert ENDBR since __morestack will jump back here via indirect -- call. */ -- rtx cet_eb = gen_nop_endbr (); -- emit_insn_after (cet_eb, ret_insn); -+ changed = true; -+ log = INTVAL (XEXP (x, 1)); -+ x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)), -+ GEN_INT (1 << log)); - } - -- /* If we are in 64-bit mode and this function uses a static chain, -- we saved %r10 in %rax before calling _morestack. */ -- if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl)) -- emit_move_insn (gen_rtx_REG (word_mode, R10_REG), -- gen_rtx_REG (word_mode, AX_REG)); -- -- /* If this function calls va_start, we need to store a pointer to -- the arguments on the old stack, because they may not have been -- all copied to the new stack. At this point the old stack can be -- found at the frame pointer value used by __morestack, because -- __morestack has set that up before calling back to us. Here we -- store that pointer in a scratch register, and in -- ix86_expand_prologue we store the scratch register in a stack -- slot. */ -- if (cfun->machine->split_stack_varargs_pointer != NULL_RTX) -+ if (GET_CODE (x) == PLUS) - { -- unsigned int scratch_regno; -- rtx frame_reg; -- int words; -+ /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */ - -- scratch_regno = split_stack_prologue_scratch_regno (); -- scratch_reg = gen_rtx_REG (Pmode, scratch_regno); -- frame_reg = gen_rtx_REG (Pmode, BP_REG); -+ if (GET_CODE (XEXP (x, 0)) == ASHIFT -+ && CONST_INT_P (XEXP (XEXP (x, 0), 1)) -+ && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4) -+ { -+ changed = true; -+ log = INTVAL (XEXP (XEXP (x, 0), 1)); -+ XEXP (x, 0) = gen_rtx_MULT (Pmode, -+ force_reg (Pmode, XEXP (XEXP (x, 0), 0)), -+ GEN_INT (1 << log)); -+ } - -- /* 64-bit: -- fp -> old fp value -- return address within this function -- return address of caller of this function -- stack arguments -- So we add three words to get to the stack arguments. -+ if (GET_CODE (XEXP (x, 1)) == ASHIFT -+ && CONST_INT_P (XEXP (XEXP (x, 1), 1)) -+ && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4) -+ { -+ changed = true; -+ log = INTVAL (XEXP (XEXP (x, 1), 1)); -+ XEXP (x, 1) = gen_rtx_MULT (Pmode, -+ force_reg (Pmode, XEXP (XEXP (x, 1), 0)), -+ GEN_INT (1 << log)); -+ } - -- 32-bit: -- fp -> old fp value -- return address within this function -- first argument to __morestack -- second argument to __morestack -- return address of caller of this function -- stack arguments -- So we add five words to get to the stack arguments. -- */ -- words = TARGET_64BIT ? 3 : 5; -- emit_insn (gen_rtx_SET (scratch_reg, -- gen_rtx_PLUS (Pmode, frame_reg, -- GEN_INT (words * UNITS_PER_WORD)))); -+ /* Put multiply first if it isn't already. */ -+ if (GET_CODE (XEXP (x, 1)) == MULT) -+ { -+ std::swap (XEXP (x, 0), XEXP (x, 1)); -+ changed = true; -+ } - -- varargs_label = gen_label_rtx (); -- emit_jump_insn (gen_jump (varargs_label)); -- JUMP_LABEL (get_last_insn ()) = varargs_label; -+ /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const))) -+ into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be -+ created by virtual register instantiation, register elimination, and -+ similar optimizations. */ -+ if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS) -+ { -+ changed = true; -+ x = gen_rtx_PLUS (Pmode, -+ gen_rtx_PLUS (Pmode, XEXP (x, 0), -+ XEXP (XEXP (x, 1), 0)), -+ XEXP (XEXP (x, 1), 1)); -+ } - -- emit_barrier (); -- } -+ /* Canonicalize -+ (plus (plus (mult (reg) (const)) (plus (reg) (const))) const) -+ into (plus (plus (mult (reg) (const)) (reg)) (const)). */ -+ else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS -+ && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT -+ && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS -+ && CONSTANT_P (XEXP (x, 1))) -+ { -+ rtx constant; -+ rtx other = NULL_RTX; - -- emit_label (label); -- LABEL_NUSES (label) = 1; -+ if (CONST_INT_P (XEXP (x, 1))) -+ { -+ constant = XEXP (x, 1); -+ other = XEXP (XEXP (XEXP (x, 0), 1), 1); -+ } -+ else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1))) -+ { -+ constant = XEXP (XEXP (XEXP (x, 0), 1), 1); -+ other = XEXP (x, 1); -+ } -+ else -+ constant = 0; - -- /* If this function calls va_start, we now have to set the scratch -- register for the case where we do not call __morestack. In this -- case we need to set it based on the stack pointer. */ -- if (cfun->machine->split_stack_varargs_pointer != NULL_RTX) -- { -- emit_insn (gen_rtx_SET (scratch_reg, -- gen_rtx_PLUS (Pmode, stack_pointer_rtx, -- GEN_INT (UNITS_PER_WORD)))); -+ if (constant) -+ { -+ changed = true; -+ x = gen_rtx_PLUS (Pmode, -+ gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0), -+ XEXP (XEXP (XEXP (x, 0), 1), 0)), -+ plus_constant (Pmode, other, -+ INTVAL (constant))); -+ } -+ } - -- emit_label (varargs_label); -- LABEL_NUSES (varargs_label) = 1; -- } --} -- --/* We may have to tell the dataflow pass that the split stack prologue -- is initializing a scratch register. */ -- --static void --ix86_live_on_entry (bitmap regs) --{ -- if (cfun->machine->split_stack_varargs_pointer != NULL_RTX) -- { -- gcc_assert (flag_split_stack); -- bitmap_set_bit (regs, split_stack_prologue_scratch_regno ()); -- } --} -- --/* Extract the parts of an RTL expression that is a valid memory address -- for an instruction. Return 0 if the structure of the address is -- grossly off. Return -1 if the address contains ASHIFT, so it is not -- strictly valid, but still used for computing length of lea instruction. */ -- --int --ix86_decompose_address (rtx addr, struct ix86_address *out) --{ -- rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX; -- rtx base_reg, index_reg; -- HOST_WIDE_INT scale = 1; -- rtx scale_rtx = NULL_RTX; -- rtx tmp; -- int retval = 1; -- addr_space_t seg = ADDR_SPACE_GENERIC; -+ if (changed && ix86_legitimate_address_p (mode, x, false)) -+ return x; - -- /* Allow zero-extended SImode addresses, -- they will be emitted with addr32 prefix. */ -- if (TARGET_64BIT && GET_MODE (addr) == DImode) -- { -- if (GET_CODE (addr) == ZERO_EXTEND -- && GET_MODE (XEXP (addr, 0)) == SImode) -- { -- addr = XEXP (addr, 0); -- if (CONST_INT_P (addr)) -- return 0; -- } -- else if (GET_CODE (addr) == AND -- && const_32bit_mask (XEXP (addr, 1), DImode)) -+ if (GET_CODE (XEXP (x, 0)) == MULT) - { -- addr = lowpart_subreg (SImode, XEXP (addr, 0), DImode); -- if (addr == NULL_RTX) -- return 0; -- -- if (CONST_INT_P (addr)) -- return 0; -+ changed = true; -+ XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0)); - } -- } - -- /* Allow SImode subregs of DImode addresses, -- they will be emitted with addr32 prefix. */ -- if (TARGET_64BIT && GET_MODE (addr) == SImode) -- { -- if (SUBREG_P (addr) -- && GET_MODE (SUBREG_REG (addr)) == DImode) -+ if (GET_CODE (XEXP (x, 1)) == MULT) - { -- addr = SUBREG_REG (addr); -- if (CONST_INT_P (addr)) -- return 0; -+ changed = true; -+ XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1)); - } -- } - -- if (REG_P (addr)) -- base = addr; -- else if (SUBREG_P (addr)) -- { -- if (REG_P (SUBREG_REG (addr))) -- base = addr; -- else -- return 0; -- } -- else if (GET_CODE (addr) == PLUS) -- { -- rtx addends[4], op; -- int n = 0, i; -+ if (changed -+ && REG_P (XEXP (x, 1)) -+ && REG_P (XEXP (x, 0))) -+ return x; - -- op = addr; -- do -+ if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1))) - { -- if (n >= 4) -- return 0; -- addends[n++] = XEXP (op, 1); -- op = XEXP (op, 0); -+ changed = true; -+ x = legitimize_pic_address (x, 0); - } -- while (GET_CODE (op) == PLUS); -- if (n >= 4) -- return 0; -- addends[n] = op; - -- for (i = n; i >= 0; --i) -+ if (changed && ix86_legitimate_address_p (mode, x, false)) -+ return x; -+ -+ if (REG_P (XEXP (x, 0))) - { -- op = addends[i]; -- switch (GET_CODE (op)) -+ rtx temp = gen_reg_rtx (Pmode); -+ rtx val = force_operand (XEXP (x, 1), temp); -+ if (val != temp) - { -- case MULT: -- if (index) -- return 0; -- index = XEXP (op, 0); -- scale_rtx = XEXP (op, 1); -- break; -- -- case ASHIFT: -- if (index) -- return 0; -- index = XEXP (op, 0); -- tmp = XEXP (op, 1); -- if (!CONST_INT_P (tmp)) -- return 0; -- scale = INTVAL (tmp); -- if ((unsigned HOST_WIDE_INT) scale > 3) -- return 0; -- scale = 1 << scale; -- break; -- -- case ZERO_EXTEND: -- op = XEXP (op, 0); -- if (GET_CODE (op) != UNSPEC) -- return 0; -- /* FALLTHRU */ -- -- case UNSPEC: -- if (XINT (op, 1) == UNSPEC_TP -- && TARGET_TLS_DIRECT_SEG_REFS -- && seg == ADDR_SPACE_GENERIC) -- seg = DEFAULT_TLS_SEG_REG; -- else -- return 0; -- break; -- -- case SUBREG: -- if (!REG_P (SUBREG_REG (op))) -- return 0; -- /* FALLTHRU */ -- -- case REG: -- if (!base) -- base = op; -- else if (!index) -- index = op; -- else -- return 0; -- break; -+ val = convert_to_mode (Pmode, val, 1); -+ emit_move_insn (temp, val); -+ } - -- case CONST: -- case CONST_INT: -- case SYMBOL_REF: -- case LABEL_REF: -- if (disp) -- return 0; -- disp = op; -- break; -+ XEXP (x, 1) = temp; -+ return x; -+ } - -- default: -- return 0; -+ else if (REG_P (XEXP (x, 1))) -+ { -+ rtx temp = gen_reg_rtx (Pmode); -+ rtx val = force_operand (XEXP (x, 0), temp); -+ if (val != temp) -+ { -+ val = convert_to_mode (Pmode, val, 1); -+ emit_move_insn (temp, val); - } -+ -+ XEXP (x, 0) = temp; -+ return x; - } - } -- else if (GET_CODE (addr) == MULT) -- { -- index = XEXP (addr, 0); /* index*scale */ -- scale_rtx = XEXP (addr, 1); -- } -- else if (GET_CODE (addr) == ASHIFT) -- { -- /* We're called for lea too, which implements ashift on occasion. */ -- index = XEXP (addr, 0); -- tmp = XEXP (addr, 1); -- if (!CONST_INT_P (tmp)) -- return 0; -- scale = INTVAL (tmp); -- if ((unsigned HOST_WIDE_INT) scale > 3) -- return 0; -- scale = 1 << scale; -- retval = -1; -- } -- else -- disp = addr; /* displacement */ - -- if (index) -- { -- if (REG_P (index)) -- ; -- else if (SUBREG_P (index) -- && REG_P (SUBREG_REG (index))) -- ; -- else -- return 0; -- } -+ return x; -+} -+ -+/* Print an integer constant expression in assembler syntax. Addition -+ and subtraction are the only arithmetic that may appear in these -+ expressions. FILE is the stdio stream to write to, X is the rtx, and -+ CODE is the operand print code from the output string. */ - -- /* Extract the integral value of scale. */ -- if (scale_rtx) -+static void -+output_pic_addr_const (FILE *file, rtx x, int code) -+{ -+ char buf[256]; -+ -+ switch (GET_CODE (x)) - { -- if (!CONST_INT_P (scale_rtx)) -- return 0; -- scale = INTVAL (scale_rtx); -- } -+ case PC: -+ gcc_assert (flag_pic); -+ putc ('.', file); -+ break; - -- base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base; -- index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index; -+ case SYMBOL_REF: -+ if (TARGET_64BIT || ! TARGET_MACHO_SYMBOL_STUBS) -+ output_addr_const (file, x); -+ else -+ { -+ const char *name = XSTR (x, 0); - -- /* Avoid useless 0 displacement. */ -- if (disp == const0_rtx && (base || index)) -- disp = NULL_RTX; -+ /* Mark the decl as referenced so that cgraph will -+ output the function. */ -+ if (SYMBOL_REF_DECL (x)) -+ mark_decl_referenced (SYMBOL_REF_DECL (x)); - -- /* Allow arg pointer and stack pointer as index if there is not scaling. */ -- if (base_reg && index_reg && scale == 1 -- && (REGNO (index_reg) == ARG_POINTER_REGNUM -- || REGNO (index_reg) == FRAME_POINTER_REGNUM -- || REGNO (index_reg) == SP_REG)) -- { -- std::swap (base, index); -- std::swap (base_reg, index_reg); -- } -- -- /* Special case: %ebp cannot be encoded as a base without a displacement. -- Similarly %r13. */ -- if (!disp && base_reg -- && (REGNO (base_reg) == ARG_POINTER_REGNUM -- || REGNO (base_reg) == FRAME_POINTER_REGNUM -- || REGNO (base_reg) == BP_REG -- || REGNO (base_reg) == R13_REG)) -- disp = const0_rtx; -- -- /* Special case: on K6, [%esi] makes the instruction vector decoded. -- Avoid this by transforming to [%esi+0]. -- Reload calls address legitimization without cfun defined, so we need -- to test cfun for being non-NULL. */ -- if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun) -- && base_reg && !index_reg && !disp -- && REGNO (base_reg) == SI_REG) -- disp = const0_rtx; -- -- /* Special case: encode reg+reg instead of reg*2. */ -- if (!base && index && scale == 2) -- base = index, base_reg = index_reg, scale = 1; -- -- /* Special case: scaling cannot be encoded without base or displacement. */ -- if (!base && !disp && index && scale != 1) -- disp = const0_rtx; -- -- out->base = base; -- out->index = index; -- out->disp = disp; -- out->scale = scale; -- out->seg = seg; -- -- return retval; --} -- --/* Return cost of the memory address x. -- For i386, it is better to use a complex address than let gcc copy -- the address into a reg and make a new pseudo. But not if the address -- requires to two regs - that would mean more pseudos with longer -- lifetimes. */ --static int --ix86_address_cost (rtx x, machine_mode, addr_space_t, bool) --{ -- struct ix86_address parts; -- int cost = 1; -- int ok = ix86_decompose_address (x, &parts); -- -- gcc_assert (ok); -+#if TARGET_MACHO -+ if (MACHOPIC_INDIRECT -+ && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION) -+ name = machopic_indirection_name (x, /*stub_p=*/true); -+#endif -+ assemble_name (file, name); -+ } -+ if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF) -+ && code == 'P' && ! SYMBOL_REF_LOCAL_P (x)) -+ fputs ("@PLT", file); -+ break; - -- if (parts.base && SUBREG_P (parts.base)) -- parts.base = SUBREG_REG (parts.base); -- if (parts.index && SUBREG_P (parts.index)) -- parts.index = SUBREG_REG (parts.index); -+ case LABEL_REF: -+ x = XEXP (x, 0); -+ /* FALLTHRU */ -+ case CODE_LABEL: -+ ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x)); -+ assemble_name (asm_out_file, buf); -+ break; - -- /* Attempt to minimize number of registers in the address by increasing -- address cost for each used register. We don't increase address cost -- for "pic_offset_table_rtx". When a memopt with "pic_offset_table_rtx" -- is not invariant itself it most likely means that base or index is not -- invariant. Therefore only "pic_offset_table_rtx" could be hoisted out, -- which is not profitable for x86. */ -- if (parts.base -- && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER) -- && (current_pass->type == GIMPLE_PASS -- || !pic_offset_table_rtx -- || !REG_P (parts.base) -- || REGNO (pic_offset_table_rtx) != REGNO (parts.base))) -- cost++; -+ case CONST_INT: -+ fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); -+ break; - -- if (parts.index -- && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER) -- && (current_pass->type == GIMPLE_PASS -- || !pic_offset_table_rtx -- || !REG_P (parts.index) -- || REGNO (pic_offset_table_rtx) != REGNO (parts.index))) -- cost++; -+ case CONST: -+ /* This used to output parentheses around the expression, -+ but that does not work on the 386 (either ATT or BSD assembler). */ -+ output_pic_addr_const (file, XEXP (x, 0), code); -+ break; - -- /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b, -- since it's predecode logic can't detect the length of instructions -- and it degenerates to vector decoded. Increase cost of such -- addresses here. The penalty is minimally 2 cycles. It may be worthwhile -- to split such addresses or even refuse such addresses at all. -+ case CONST_DOUBLE: -+ /* We can't handle floating point constants; -+ TARGET_PRINT_OPERAND must handle them. */ -+ output_operand_lossage ("floating constant misused"); -+ break; - -- Following addressing modes are affected: -- [base+scale*index] -- [scale*index+disp] -- [base+index] -+ case PLUS: -+ /* Some assemblers need integer constants to appear first. */ -+ if (CONST_INT_P (XEXP (x, 0))) -+ { -+ output_pic_addr_const (file, XEXP (x, 0), code); -+ putc ('+', file); -+ output_pic_addr_const (file, XEXP (x, 1), code); -+ } -+ else -+ { -+ gcc_assert (CONST_INT_P (XEXP (x, 1))); -+ output_pic_addr_const (file, XEXP (x, 1), code); -+ putc ('+', file); -+ output_pic_addr_const (file, XEXP (x, 0), code); -+ } -+ break; - -- The first and last case may be avoidable by explicitly coding the zero in -- memory address, but I don't have AMD-K6 machine handy to check this -- theory. */ -+ case MINUS: -+ if (!TARGET_MACHO) -+ putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file); -+ output_pic_addr_const (file, XEXP (x, 0), code); -+ putc ('-', file); -+ output_pic_addr_const (file, XEXP (x, 1), code); -+ if (!TARGET_MACHO) -+ putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file); -+ break; - -- if (TARGET_K6 -- && ((!parts.disp && parts.base && parts.index && parts.scale != 1) -- || (parts.disp && !parts.base && parts.index && parts.scale != 1) -- || (!parts.disp && parts.base && parts.index && parts.scale == 1))) -- cost += 10; -+ case UNSPEC: -+ gcc_assert (XVECLEN (x, 0) == 1); -+ output_pic_addr_const (file, XVECEXP (x, 0, 0), code); -+ switch (XINT (x, 1)) -+ { -+ case UNSPEC_GOT: -+ fputs ("@GOT", file); -+ break; -+ case UNSPEC_GOTOFF: -+ fputs ("@GOTOFF", file); -+ break; -+ case UNSPEC_PLTOFF: -+ fputs ("@PLTOFF", file); -+ break; -+ case UNSPEC_PCREL: -+ fputs (ASSEMBLER_DIALECT == ASM_ATT ? -+ "(%rip)" : "[rip]", file); -+ break; -+ case UNSPEC_GOTPCREL: -+ fputs (ASSEMBLER_DIALECT == ASM_ATT ? -+ "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file); -+ break; -+ case UNSPEC_GOTTPOFF: -+ /* FIXME: This might be @TPOFF in Sun ld too. */ -+ fputs ("@gottpoff", file); -+ break; -+ case UNSPEC_TPOFF: -+ fputs ("@tpoff", file); -+ break; -+ case UNSPEC_NTPOFF: -+ if (TARGET_64BIT) -+ fputs ("@tpoff", file); -+ else -+ fputs ("@ntpoff", file); -+ break; -+ case UNSPEC_DTPOFF: -+ fputs ("@dtpoff", file); -+ break; -+ case UNSPEC_GOTNTPOFF: -+ if (TARGET_64BIT) -+ fputs (ASSEMBLER_DIALECT == ASM_ATT ? -+ "@gottpoff(%rip)": "@gottpoff[rip]", file); -+ else -+ fputs ("@gotntpoff", file); -+ break; -+ case UNSPEC_INDNTPOFF: -+ fputs ("@indntpoff", file); -+ break; -+#if TARGET_MACHO -+ case UNSPEC_MACHOPIC_OFFSET: -+ putc ('-', file); -+ machopic_output_function_base_name (file); -+ break; -+#endif -+ default: -+ output_operand_lossage ("invalid UNSPEC as operand"); -+ break; -+ } -+ break; - -- return cost; -+ default: -+ output_operand_lossage ("invalid expression as operand"); -+ } - } -- --/* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as -- this is used for to form addresses to local data when -fPIC is in -- use. */ - --static bool --darwin_local_data_pic (rtx disp) -+/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL. -+ We need to emit DTP-relative relocations. */ -+ -+static void ATTRIBUTE_UNUSED -+i386_output_dwarf_dtprel (FILE *file, int size, rtx x) - { -- return (GET_CODE (disp) == UNSPEC -- && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET); -+ fputs (ASM_LONG, file); -+ output_addr_const (file, x); -+ fputs ("@dtpoff", file); -+ switch (size) -+ { -+ case 4: -+ break; -+ case 8: -+ fputs (", 0", file); -+ break; -+ default: -+ gcc_unreachable (); -+ } - } - --/* True if operand X should be loaded from GOT. */ -+/* Return true if X is a representation of the PIC register. This copes -+ with calls from ix86_find_base_term, where the register might have -+ been replaced by a cselib value. */ - --bool --ix86_force_load_from_GOT_p (rtx x) -+static bool -+ix86_pic_register_p (rtx x) - { -- return ((TARGET_64BIT || HAVE_AS_IX86_GOT32X) -- && !TARGET_PECOFF && !TARGET_MACHO -- && !flag_pic -- && ix86_cmodel != CM_LARGE -- && GET_CODE (x) == SYMBOL_REF -- && SYMBOL_REF_FUNCTION_P (x) -- && (!flag_plt -- || (SYMBOL_REF_DECL (x) -- && lookup_attribute ("noplt", -- DECL_ATTRIBUTES (SYMBOL_REF_DECL (x))))) -- && !SYMBOL_REF_LOCAL_P (x)); -+ if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x)) -+ return (pic_offset_table_rtx -+ && rtx_equal_for_cselib_p (x, pic_offset_table_rtx)); -+ else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SET_GOT) -+ return true; -+ else if (!REG_P (x)) -+ return false; -+ else if (pic_offset_table_rtx) -+ { -+ if (REGNO (x) == REGNO (pic_offset_table_rtx)) -+ return true; -+ if (HARD_REGISTER_P (x) -+ && !HARD_REGISTER_P (pic_offset_table_rtx) -+ && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx)) -+ return true; -+ return false; -+ } -+ else -+ return REGNO (x) == PIC_OFFSET_TABLE_REGNUM; - } - --/* Determine if a given RTX is a valid constant. We already know this -- satisfies CONSTANT_P. */ -+/* Helper function for ix86_delegitimize_address. -+ Attempt to delegitimize TLS local-exec accesses. */ - --static bool --ix86_legitimate_constant_p (machine_mode mode, rtx x) -+static rtx -+ix86_delegitimize_tls_address (rtx orig_x) - { -- switch (GET_CODE (x)) -- { -- case CONST: -- x = XEXP (x, 0); -- -- if (GET_CODE (x) == PLUS) -- { -- if (!CONST_INT_P (XEXP (x, 1))) -- return false; -- x = XEXP (x, 0); -- } -+ rtx x = orig_x, unspec; -+ struct ix86_address addr; - -- if (TARGET_MACHO && darwin_local_data_pic (x)) -- return true; -+ if (!TARGET_TLS_DIRECT_SEG_REFS) -+ return orig_x; -+ if (MEM_P (x)) -+ x = XEXP (x, 0); -+ if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode) -+ return orig_x; -+ if (ix86_decompose_address (x, &addr) == 0 -+ || addr.seg != DEFAULT_TLS_SEG_REG -+ || addr.disp == NULL_RTX -+ || GET_CODE (addr.disp) != CONST) -+ return orig_x; -+ unspec = XEXP (addr.disp, 0); -+ if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1))) -+ unspec = XEXP (unspec, 0); -+ if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF) -+ return orig_x; -+ x = XVECEXP (unspec, 0, 0); -+ gcc_assert (GET_CODE (x) == SYMBOL_REF); -+ if (unspec != XEXP (addr.disp, 0)) -+ x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1)); -+ if (addr.index) -+ { -+ rtx idx = addr.index; -+ if (addr.scale != 1) -+ idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale)); -+ x = gen_rtx_PLUS (Pmode, idx, x); -+ } -+ if (addr.base) -+ x = gen_rtx_PLUS (Pmode, addr.base, x); -+ if (MEM_P (orig_x)) -+ x = replace_equiv_address_nv (orig_x, x); -+ return x; -+} - -- /* Only some unspecs are valid as "constants". */ -- if (GET_CODE (x) == UNSPEC) -- switch (XINT (x, 1)) -- { -- case UNSPEC_GOT: -- case UNSPEC_GOTOFF: -- case UNSPEC_PLTOFF: -- return TARGET_64BIT; -- case UNSPEC_TPOFF: -- case UNSPEC_NTPOFF: -- x = XVECEXP (x, 0, 0); -- return (GET_CODE (x) == SYMBOL_REF -- && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC); -- case UNSPEC_DTPOFF: -- x = XVECEXP (x, 0, 0); -- return (GET_CODE (x) == SYMBOL_REF -- && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC); -- default: -- return false; -- } -+/* In the name of slightly smaller debug output, and to cater to -+ general assembler lossage, recognize PIC+GOTOFF and turn it back -+ into a direct symbol reference. - -- /* We must have drilled down to a symbol. */ -- if (GET_CODE (x) == LABEL_REF) -- return true; -- if (GET_CODE (x) != SYMBOL_REF) -- return false; -- /* FALLTHRU */ -+ On Darwin, this is necessary to avoid a crash, because Darwin -+ has a different PIC label for each routine but the DWARF debugging -+ information is not associated with any particular routine, so it's -+ necessary to remove references to the PIC label from RTL stored by -+ the DWARF output code. - -- case SYMBOL_REF: -- /* TLS symbols are never valid. */ -- if (SYMBOL_REF_TLS_MODEL (x)) -- return false; -+ This helper is used in the normal ix86_delegitimize_address -+ entrypoint (e.g. used in the target delegitimization hook) and -+ in ix86_find_base_term. As compile time memory optimization, we -+ avoid allocating rtxes that will not change anything on the outcome -+ of the callers (find_base_value and find_base_term). */ - -- /* DLLIMPORT symbols are never valid. */ -- if (TARGET_DLLIMPORT_DECL_ATTRIBUTES -- && SYMBOL_REF_DLLIMPORT_P (x)) -- return false; -+static inline rtx -+ix86_delegitimize_address_1 (rtx x, bool base_term_p) -+{ -+ rtx orig_x = delegitimize_mem_from_attrs (x); -+ /* addend is NULL or some rtx if x is something+GOTOFF where -+ something doesn't include the PIC register. */ -+ rtx addend = NULL_RTX; -+ /* reg_addend is NULL or a multiple of some register. */ -+ rtx reg_addend = NULL_RTX; -+ /* const_addend is NULL or a const_int. */ -+ rtx const_addend = NULL_RTX; -+ /* This is the result, or NULL. */ -+ rtx result = NULL_RTX; - --#if TARGET_MACHO -- /* mdynamic-no-pic */ -- if (MACHO_DYNAMIC_NO_PIC_P) -- return machopic_symbol_defined_p (x); --#endif -+ x = orig_x; - -- /* External function address should be loaded -- via the GOT slot to avoid PLT. */ -- if (ix86_force_load_from_GOT_p (x)) -- return false; -+ if (MEM_P (x)) -+ x = XEXP (x, 0); - -- break; -+ if (TARGET_64BIT) -+ { -+ if (GET_CODE (x) == CONST -+ && GET_CODE (XEXP (x, 0)) == PLUS -+ && GET_MODE (XEXP (x, 0)) == Pmode -+ && CONST_INT_P (XEXP (XEXP (x, 0), 1)) -+ && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC -+ && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL) -+ { -+ /* find_base_{value,term} only care about MEMs with arg_pointer_rtx -+ base. A CONST can't be arg_pointer_rtx based. */ -+ if (base_term_p && MEM_P (orig_x)) -+ return orig_x; -+ rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0); -+ x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2); -+ if (MEM_P (orig_x)) -+ x = replace_equiv_address_nv (orig_x, x); -+ return x; -+ } - -- CASE_CONST_SCALAR_INT: -- switch (mode) -+ if (GET_CODE (x) == CONST -+ && GET_CODE (XEXP (x, 0)) == UNSPEC -+ && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL -+ || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL) -+ && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)) - { -- case E_TImode: -- if (TARGET_64BIT) -- return true; -- /* FALLTHRU */ -- case E_OImode: -- case E_XImode: -- if (!standard_sse_constant_p (x, mode)) -- return false; -- default: -- break; -+ x = XVECEXP (XEXP (x, 0), 0, 0); -+ if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x)) -+ { -+ x = lowpart_subreg (GET_MODE (orig_x), x, GET_MODE (x)); -+ if (x == NULL_RTX) -+ return orig_x; -+ } -+ return x; - } -- break; - -- case CONST_VECTOR: -- if (!standard_sse_constant_p (x, mode)) -- return false; -+ if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC) -+ return ix86_delegitimize_tls_address (orig_x); - -- default: -- break; -+ /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic -+ and -mcmodel=medium -fpic. */ - } - -- /* Otherwise we handle everything else in the move patterns. */ -- return true; --} -- --/* Determine if it's legal to put X into the constant pool. This -- is not possible for the address of thread-local symbols, which -- is checked above. */ -+ if (GET_CODE (x) != PLUS -+ || GET_CODE (XEXP (x, 1)) != CONST) -+ return ix86_delegitimize_tls_address (orig_x); - --static bool --ix86_cannot_force_const_mem (machine_mode mode, rtx x) --{ -- /* We can put any immediate constant in memory. */ -- switch (GET_CODE (x)) -+ if (ix86_pic_register_p (XEXP (x, 0))) -+ /* %ebx + GOT/GOTOFF */ -+ ; -+ else if (GET_CODE (XEXP (x, 0)) == PLUS) - { -- CASE_CONST_ANY: -- return false; -+ /* %ebx + %reg * scale + GOT/GOTOFF */ -+ reg_addend = XEXP (x, 0); -+ if (ix86_pic_register_p (XEXP (reg_addend, 0))) -+ reg_addend = XEXP (reg_addend, 1); -+ else if (ix86_pic_register_p (XEXP (reg_addend, 1))) -+ reg_addend = XEXP (reg_addend, 0); -+ else -+ { -+ reg_addend = NULL_RTX; -+ addend = XEXP (x, 0); -+ } -+ } -+ else -+ addend = XEXP (x, 0); - -- default: -- break; -+ x = XEXP (XEXP (x, 1), 0); -+ if (GET_CODE (x) == PLUS -+ && CONST_INT_P (XEXP (x, 1))) -+ { -+ const_addend = XEXP (x, 1); -+ x = XEXP (x, 0); - } - -- return !ix86_legitimate_constant_p (mode, x); --} -+ if (GET_CODE (x) == UNSPEC -+ && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend) -+ || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x)) -+ || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC -+ && !MEM_P (orig_x) && !addend))) -+ result = XVECEXP (x, 0, 0); - --/* Nonzero if the symbol is marked as dllimport, or as stub-variable, -- otherwise zero. */ -+ if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x) -+ && !MEM_P (orig_x)) -+ result = XVECEXP (x, 0, 0); - --static bool --is_imported_p (rtx x) --{ -- if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES -- || GET_CODE (x) != SYMBOL_REF) -- return false; -+ if (! result) -+ return ix86_delegitimize_tls_address (orig_x); - -- return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x); -+ /* For (PLUS something CONST_INT) both find_base_{value,term} just -+ recurse on the first operand. */ -+ if (const_addend && !base_term_p) -+ result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend)); -+ if (reg_addend) -+ result = gen_rtx_PLUS (Pmode, reg_addend, result); -+ if (addend) -+ { -+ /* If the rest of original X doesn't involve the PIC register, add -+ addend and subtract pic_offset_table_rtx. This can happen e.g. -+ for code like: -+ leal (%ebx, %ecx, 4), %ecx -+ ... -+ movl foo@GOTOFF(%ecx), %edx -+ in which case we return (%ecx - %ebx) + foo -+ or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg -+ and reload has completed. Don't do the latter for debug, -+ as _GLOBAL_OFFSET_TABLE_ can't be expressed in the assembly. */ -+ if (pic_offset_table_rtx -+ && (!reload_completed || !ix86_use_pseudo_pic_reg ())) -+ result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend), -+ pic_offset_table_rtx), -+ result); -+ else if (base_term_p -+ && pic_offset_table_rtx -+ && !TARGET_MACHO -+ && !TARGET_VXWORKS_RTP) -+ { -+ rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME); -+ tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp); -+ result = gen_rtx_PLUS (Pmode, tmp, result); -+ } -+ else -+ return orig_x; -+ } -+ if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x)) -+ { -+ result = lowpart_subreg (GET_MODE (orig_x), result, Pmode); -+ if (result == NULL_RTX) -+ return orig_x; -+ } -+ return result; - } - -+/* The normal instantiation of the above template. */ - --/* Nonzero if the constant value X is a legitimate general operand -- when generating PIC code. It is given that flag_pic is on and -- that X satisfies CONSTANT_P. */ -- --bool --legitimate_pic_operand_p (rtx x) -+static rtx -+ix86_delegitimize_address (rtx x) - { -- rtx inner; -- -- switch (GET_CODE (x)) -- { -- case CONST: -- inner = XEXP (x, 0); -- if (GET_CODE (inner) == PLUS -- && CONST_INT_P (XEXP (inner, 1))) -- inner = XEXP (inner, 0); -- -- /* Only some unspecs are valid as "constants". */ -- if (GET_CODE (inner) == UNSPEC) -- switch (XINT (inner, 1)) -- { -- case UNSPEC_GOT: -- case UNSPEC_GOTOFF: -- case UNSPEC_PLTOFF: -- return TARGET_64BIT; -- case UNSPEC_TPOFF: -- x = XVECEXP (inner, 0, 0); -- return (GET_CODE (x) == SYMBOL_REF -- && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC); -- case UNSPEC_MACHOPIC_OFFSET: -- return legitimate_pic_address_disp_p (x); -- default: -- return false; -- } -- /* FALLTHRU */ -- -- case SYMBOL_REF: -- case LABEL_REF: -- return legitimate_pic_address_disp_p (x); -- -- default: -- return true; -- } -+ return ix86_delegitimize_address_1 (x, false); - } - --/* Determine if a given CONST RTX is a valid memory displacement -- in PIC mode. */ -+/* If X is a machine specific address (i.e. a symbol or label being -+ referenced as a displacement from the GOT implemented using an -+ UNSPEC), then return the base term. Otherwise return X. */ - --bool --legitimate_pic_address_disp_p (rtx disp) -+rtx -+ix86_find_base_term (rtx x) - { -- bool saw_plus; -+ rtx term; - -- /* In 64bit mode we can allow direct addresses of symbols and labels -- when they are not dynamic symbols. */ - if (TARGET_64BIT) - { -- rtx op0 = disp, op1; -- -- switch (GET_CODE (disp)) -- { -- case LABEL_REF: -- return true; -+ if (GET_CODE (x) != CONST) -+ return x; -+ term = XEXP (x, 0); -+ if (GET_CODE (term) == PLUS -+ && CONST_INT_P (XEXP (term, 1))) -+ term = XEXP (term, 0); -+ if (GET_CODE (term) != UNSPEC -+ || (XINT (term, 1) != UNSPEC_GOTPCREL -+ && XINT (term, 1) != UNSPEC_PCREL)) -+ return x; - -- case CONST: -- if (GET_CODE (XEXP (disp, 0)) != PLUS) -- break; -- op0 = XEXP (XEXP (disp, 0), 0); -- op1 = XEXP (XEXP (disp, 0), 1); -- if (!CONST_INT_P (op1)) -- break; -- if (GET_CODE (op0) == UNSPEC -- && (XINT (op0, 1) == UNSPEC_DTPOFF -- || XINT (op0, 1) == UNSPEC_NTPOFF) -- && trunc_int_for_mode (INTVAL (op1), SImode) == INTVAL (op1)) -- return true; -- if (INTVAL (op1) >= 16*1024*1024 -- || INTVAL (op1) < -16*1024*1024) -- break; -- if (GET_CODE (op0) == LABEL_REF) -- return true; -- if (GET_CODE (op0) == CONST -- && GET_CODE (XEXP (op0, 0)) == UNSPEC -- && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL) -- return true; -- if (GET_CODE (op0) == UNSPEC -- && XINT (op0, 1) == UNSPEC_PCREL) -- return true; -- if (GET_CODE (op0) != SYMBOL_REF) -- break; -- /* FALLTHRU */ -+ return XVECEXP (term, 0, 0); -+ } - -- case SYMBOL_REF: -- /* TLS references should always be enclosed in UNSPEC. -- The dllimported symbol needs always to be resolved. */ -- if (SYMBOL_REF_TLS_MODEL (op0) -- || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0))) -- return false; -+ return ix86_delegitimize_address_1 (x, true); -+} - -- if (TARGET_PECOFF) -- { -- if (is_imported_p (op0)) -- return true; -+/* Return true if X shouldn't be emitted into the debug info. -+ Disallow UNSPECs other than @gotoff - we can't emit _GLOBAL_OFFSET_TABLE_ -+ symbol easily into the .debug_info section, so we need not to -+ delegitimize, but instead assemble as @gotoff. -+ Disallow _GLOBAL_OFFSET_TABLE_ SYMBOL_REF - the assembler magically -+ assembles that as _GLOBAL_OFFSET_TABLE_-. expression. */ - -- if (SYMBOL_REF_FAR_ADDR_P (op0) -- || !SYMBOL_REF_LOCAL_P (op0)) -- break; -+static bool -+ix86_const_not_ok_for_debug_p (rtx x) -+{ -+ if (GET_CODE (x) == UNSPEC && XINT (x, 1) != UNSPEC_GOTOFF) -+ return true; - -- /* Function-symbols need to be resolved only for -- large-model. -- For the small-model we don't need to resolve anything -- here. */ -- if ((ix86_cmodel != CM_LARGE_PIC -- && SYMBOL_REF_FUNCTION_P (op0)) -- || ix86_cmodel == CM_SMALL_PIC) -- return true; -- /* Non-external symbols don't need to be resolved for -- large, and medium-model. */ -- if ((ix86_cmodel == CM_LARGE_PIC -- || ix86_cmodel == CM_MEDIUM_PIC) -- && !SYMBOL_REF_EXTERNAL_P (op0)) -- return true; -- } -- else if (!SYMBOL_REF_FAR_ADDR_P (op0) -- && (SYMBOL_REF_LOCAL_P (op0) -- || (HAVE_LD_PIE_COPYRELOC -- && flag_pie -- && !SYMBOL_REF_WEAK (op0) -- && !SYMBOL_REF_FUNCTION_P (op0))) -- && ix86_cmodel != CM_LARGE_PIC) -- return true; -- break; -+ if (SYMBOL_REF_P (x) && strcmp (XSTR (x, 0), GOT_SYMBOL_NAME) == 0) -+ return true; - -- default: -- break; -- } -- } -- if (GET_CODE (disp) != CONST) -- return false; -- disp = XEXP (disp, 0); -+ return false; -+} -+ -+static void -+put_condition_code (enum rtx_code code, machine_mode mode, bool reverse, -+ bool fp, FILE *file) -+{ -+ const char *suffix; - -- if (TARGET_64BIT) -+ if (mode == CCFPmode) - { -- /* We are unsafe to allow PLUS expressions. This limit allowed distance -- of GOT tables. We should not need these anyway. */ -- if (GET_CODE (disp) != UNSPEC -- || (XINT (disp, 1) != UNSPEC_GOTPCREL -- && XINT (disp, 1) != UNSPEC_GOTOFF -- && XINT (disp, 1) != UNSPEC_PCREL -- && XINT (disp, 1) != UNSPEC_PLTOFF)) -- return false; -- -- if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF -- && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF) -- return false; -- return true; -+ code = ix86_fp_compare_code_to_integer (code); -+ mode = CCmode; - } -+ if (reverse) -+ code = reverse_condition (code); - -- saw_plus = false; -- if (GET_CODE (disp) == PLUS) -+ switch (code) - { -- if (!CONST_INT_P (XEXP (disp, 1))) -- return false; -- disp = XEXP (disp, 0); -- saw_plus = true; -- } -- -- if (TARGET_MACHO && darwin_local_data_pic (disp)) -- return true; -+ case EQ: -+ gcc_assert (mode != CCGZmode); -+ switch (mode) -+ { -+ case E_CCAmode: -+ suffix = "a"; -+ break; -+ case E_CCCmode: -+ suffix = "c"; -+ break; -+ case E_CCOmode: -+ suffix = "o"; -+ break; -+ case E_CCPmode: -+ suffix = "p"; -+ break; -+ case E_CCSmode: -+ suffix = "s"; -+ break; -+ default: -+ suffix = "e"; -+ break; -+ } -+ break; -+ case NE: -+ gcc_assert (mode != CCGZmode); -+ switch (mode) -+ { -+ case E_CCAmode: -+ suffix = "na"; -+ break; -+ case E_CCCmode: -+ suffix = "nc"; -+ break; -+ case E_CCOmode: -+ suffix = "no"; -+ break; -+ case E_CCPmode: -+ suffix = "np"; -+ break; -+ case E_CCSmode: -+ suffix = "ns"; -+ break; -+ default: -+ suffix = "ne"; -+ break; -+ } -+ break; -+ case GT: -+ gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode); -+ suffix = "g"; -+ break; -+ case GTU: -+ /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers. -+ Those same assemblers have the same but opposite lossage on cmov. */ -+ if (mode == CCmode) -+ suffix = fp ? "nbe" : "a"; -+ else -+ gcc_unreachable (); -+ break; -+ case LT: -+ switch (mode) -+ { -+ case E_CCNOmode: -+ case E_CCGOCmode: -+ suffix = "s"; -+ break; - -- if (GET_CODE (disp) != UNSPEC) -- return false; -+ case E_CCmode: -+ case E_CCGCmode: -+ case E_CCGZmode: -+ suffix = "l"; -+ break; - -- switch (XINT (disp, 1)) -- { -- case UNSPEC_GOT: -- if (saw_plus) -- return false; -- /* We need to check for both symbols and labels because VxWorks loads -- text labels with @GOT rather than @GOTOFF. See gotoff_operand for -- details. */ -- return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF -- || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF); -- case UNSPEC_GOTOFF: -- /* Refuse GOTOFF in 64bit mode since it is always 64bit when used. -- While ABI specify also 32bit relocation but we don't produce it in -- small PIC model at all. */ -- if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF -- || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF) -- && !TARGET_64BIT) -- return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode); -- return false; -- case UNSPEC_GOTTPOFF: -- case UNSPEC_GOTNTPOFF: -- case UNSPEC_INDNTPOFF: -- if (saw_plus) -- return false; -- disp = XVECEXP (disp, 0, 0); -- return (GET_CODE (disp) == SYMBOL_REF -- && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC); -- case UNSPEC_NTPOFF: -- disp = XVECEXP (disp, 0, 0); -- return (GET_CODE (disp) == SYMBOL_REF -- && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC); -- case UNSPEC_DTPOFF: -- disp = XVECEXP (disp, 0, 0); -- return (GET_CODE (disp) == SYMBOL_REF -- && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC); -- } -+ default: -+ gcc_unreachable (); -+ } -+ break; -+ case LTU: -+ if (mode == CCmode || mode == CCGZmode) -+ suffix = "b"; -+ else if (mode == CCCmode) -+ suffix = fp ? "b" : "c"; -+ else -+ gcc_unreachable (); -+ break; -+ case GE: -+ switch (mode) -+ { -+ case E_CCNOmode: -+ case E_CCGOCmode: -+ suffix = "ns"; -+ break; - -- return false; -+ case E_CCmode: -+ case E_CCGCmode: -+ case E_CCGZmode: -+ suffix = "ge"; -+ break; -+ -+ default: -+ gcc_unreachable (); -+ } -+ break; -+ case GEU: -+ if (mode == CCmode || mode == CCGZmode) -+ suffix = "nb"; -+ else if (mode == CCCmode) -+ suffix = fp ? "nb" : "nc"; -+ else -+ gcc_unreachable (); -+ break; -+ case LE: -+ gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode); -+ suffix = "le"; -+ break; -+ case LEU: -+ if (mode == CCmode) -+ suffix = "be"; -+ else -+ gcc_unreachable (); -+ break; -+ case UNORDERED: -+ suffix = fp ? "u" : "p"; -+ break; -+ case ORDERED: -+ suffix = fp ? "nu" : "np"; -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ fputs (suffix, file); - } - --/* Determine if op is suitable RTX for an address register. -- Return naked register if a register or a register subreg is -- found, otherwise return NULL_RTX. */ -+/* Print the name of register X to FILE based on its machine mode and number. -+ If CODE is 'w', pretend the mode is HImode. -+ If CODE is 'b', pretend the mode is QImode. -+ If CODE is 'k', pretend the mode is SImode. -+ If CODE is 'q', pretend the mode is DImode. -+ If CODE is 'x', pretend the mode is V4SFmode. -+ If CODE is 't', pretend the mode is V8SFmode. -+ If CODE is 'g', pretend the mode is V16SFmode. -+ If CODE is 'h', pretend the reg is the 'high' byte register. -+ If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. -+ If CODE is 'd', duplicate the operand for AVX instruction. -+ If CODE is 'V', print naked full integer register name without %. -+ */ - --static rtx --ix86_validate_address_register (rtx op) -+void -+print_reg (rtx x, int code, FILE *file) - { -- machine_mode mode = GET_MODE (op); -+ const char *reg; -+ int msize; -+ unsigned int regno; -+ bool duplicated; - -- /* Only SImode or DImode registers can form the address. */ -- if (mode != SImode && mode != DImode) -- return NULL_RTX; -+ if (ASSEMBLER_DIALECT == ASM_ATT && code != 'V') -+ putc ('%', file); - -- if (REG_P (op)) -- return op; -- else if (SUBREG_P (op)) -+ if (x == pc_rtx) - { -- rtx reg = SUBREG_REG (op); -+ gcc_assert (TARGET_64BIT); -+ fputs ("rip", file); -+ return; -+ } - -- if (!REG_P (reg)) -- return NULL_RTX; -+ if (code == 'y' && STACK_TOP_P (x)) -+ { -+ fputs ("st(0)", file); -+ return; -+ } - -- mode = GET_MODE (reg); -+ if (code == 'w') -+ msize = 2; -+ else if (code == 'b') -+ msize = 1; -+ else if (code == 'k') -+ msize = 4; -+ else if (code == 'q') -+ msize = 8; -+ else if (code == 'h') -+ msize = 0; -+ else if (code == 'x') -+ msize = 16; -+ else if (code == 't') -+ msize = 32; -+ else if (code == 'g') -+ msize = 64; -+ else -+ msize = GET_MODE_SIZE (GET_MODE (x)); - -- /* Don't allow SUBREGs that span more than a word. It can -- lead to spill failures when the register is one word out -- of a two word structure. */ -- if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) -- return NULL_RTX; -+ regno = REGNO (x); - -- /* Allow only SUBREGs of non-eliminable hard registers. */ -- if (register_no_elim_operand (reg, mode)) -- return reg; -+ if (regno == ARG_POINTER_REGNUM -+ || regno == FRAME_POINTER_REGNUM -+ || regno == FPSR_REG) -+ { -+ output_operand_lossage -+ ("invalid use of register '%s'", reg_names[regno]); -+ return; -+ } -+ else if (regno == FLAGS_REG) -+ { -+ output_operand_lossage ("invalid use of asm flag output"); -+ return; - } - -- /* Op is not a register. */ -- return NULL_RTX; --} -- --/* Recognizes RTL expressions that are valid memory addresses for an -- instruction. The MODE argument is the machine mode for the MEM -- expression that wants to use this address. -- -- It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should -- convert common non-canonical forms to canonical form so that they will -- be recognized. */ -- --static bool --ix86_legitimate_address_p (machine_mode, rtx addr, bool strict) --{ -- struct ix86_address parts; -- rtx base, index, disp; -- HOST_WIDE_INT scale; -- addr_space_t seg; -- -- if (ix86_decompose_address (addr, &parts) <= 0) -- /* Decomposition failed. */ -- return false; -+ if (code == 'V') -+ { -+ if (GENERAL_REGNO_P (regno)) -+ msize = GET_MODE_SIZE (word_mode); -+ else -+ error ("% modifier on non-integer register"); -+ } - -- base = parts.base; -- index = parts.index; -- disp = parts.disp; -- scale = parts.scale; -- seg = parts.seg; -+ duplicated = code == 'd' && TARGET_AVX; - -- /* Validate base register. */ -- if (base) -+ switch (msize) - { -- rtx reg = ix86_validate_address_register (base); -+ case 16: -+ case 12: -+ case 8: -+ if (GENERAL_REGNO_P (regno) && msize > GET_MODE_SIZE (word_mode)) -+ warning (0, "unsupported size for integer register"); -+ /* FALLTHRU */ -+ case 4: -+ if (LEGACY_INT_REGNO_P (regno)) -+ putc (msize > 4 && TARGET_64BIT ? 'r' : 'e', file); -+ /* FALLTHRU */ -+ case 2: -+ normal: -+ reg = hi_reg_name[regno]; -+ break; -+ case 1: -+ if (regno >= ARRAY_SIZE (qi_reg_name)) -+ goto normal; -+ if (!ANY_QI_REGNO_P (regno)) -+ error ("unsupported size for integer register"); -+ reg = qi_reg_name[regno]; -+ break; -+ case 0: -+ if (regno >= ARRAY_SIZE (qi_high_reg_name)) -+ goto normal; -+ reg = qi_high_reg_name[regno]; -+ break; -+ case 32: -+ case 64: -+ if (SSE_REGNO_P (regno)) -+ { -+ gcc_assert (!duplicated); -+ putc (msize == 32 ? 'y' : 'z', file); -+ reg = hi_reg_name[regno] + 1; -+ break; -+ } -+ goto normal; -+ default: -+ gcc_unreachable (); -+ } - -- if (reg == NULL_RTX) -- return false; -+ fputs (reg, file); - -- if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg)) -- || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg))) -- /* Base is not valid. */ -- return false; -+ /* Irritatingly, AMD extended registers use -+ different naming convention: "r%d[bwd]" */ -+ if (REX_INT_REGNO_P (regno)) -+ { -+ gcc_assert (TARGET_64BIT); -+ switch (msize) -+ { -+ case 0: -+ error ("extended registers have no high halves"); -+ break; -+ case 1: -+ putc ('b', file); -+ break; -+ case 2: -+ putc ('w', file); -+ break; -+ case 4: -+ putc ('d', file); -+ break; -+ case 8: -+ /* no suffix */ -+ break; -+ default: -+ error ("unsupported operand size for extended register"); -+ break; -+ } -+ return; - } - -- /* Validate index register. */ -- if (index) -+ if (duplicated) - { -- rtx reg = ix86_validate_address_register (index); -- -- if (reg == NULL_RTX) -- return false; -- -- if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg)) -- || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg))) -- /* Index is not valid. */ -- return false; -+ if (ASSEMBLER_DIALECT == ASM_ATT) -+ fprintf (file, ", %%%s", reg); -+ else -+ fprintf (file, ", %s", reg); - } -+} - -- /* Index and base should have the same mode. */ -- if (base && index -- && GET_MODE (base) != GET_MODE (index)) -- return false; -- -- /* Address override works only on the (%reg) part of %fs:(%reg). */ -- if (seg != ADDR_SPACE_GENERIC -- && ((base && GET_MODE (base) != word_mode) -- || (index && GET_MODE (index) != word_mode))) -- return false; -+/* Meaning of CODE: -+ L,W,B,Q,S,T -- print the opcode suffix for specified size of operand. -+ C -- print opcode suffix for set/cmov insn. -+ c -- like C, but print reversed condition -+ F,f -- likewise, but for floating-point. -+ O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.", -+ otherwise nothing -+ R -- print embedded rounding and sae. -+ r -- print only sae. -+ z -- print the opcode suffix for the size of the current operand. -+ Z -- likewise, with special suffixes for x87 instructions. -+ * -- print a star (in certain assembler syntax) -+ A -- print an absolute memory reference. -+ E -- print address with DImode register names if TARGET_64BIT. -+ w -- print the operand as if it's a "word" (HImode) even if it isn't. -+ s -- print a shift double count, followed by the assemblers argument -+ delimiter. -+ b -- print the QImode name of the register for the indicated operand. -+ %b0 would print %al if operands[0] is reg 0. -+ w -- likewise, print the HImode name of the register. -+ k -- likewise, print the SImode name of the register. -+ q -- likewise, print the DImode name of the register. -+ x -- likewise, print the V4SFmode name of the register. -+ t -- likewise, print the V8SFmode name of the register. -+ g -- likewise, print the V16SFmode name of the register. -+ h -- print the QImode name for a "high" register, either ah, bh, ch or dh. -+ y -- print "st(0)" instead of "st" as a register. -+ d -- print duplicated register operand for AVX instruction. -+ D -- print condition for SSE cmp instruction. -+ P -- if PIC, print an @PLT suffix. -+ p -- print raw symbol name. -+ X -- don't print any sort of PIC '@' suffix for a symbol. -+ & -- print some in-use local-dynamic symbol name. -+ H -- print a memory address offset by 8; used for sse high-parts -+ Y -- print condition for XOP pcom* instruction. -+ V -- print naked full integer register name without %. -+ + -- print a branch hint as 'cs' or 'ds' prefix -+ ; -- print a semicolon (after prefixes due to bug in older gas). -+ ~ -- print "i" if TARGET_AVX2, "f" otherwise. -+ ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode -+ M -- print addr32 prefix for TARGET_X32 with VSIB address. -+ ! -- print NOTRACK prefix for jxx/call/ret instructions if required. -+ */ - -- /* Validate scale factor. */ -- if (scale != 1) -+void -+ix86_print_operand (FILE *file, rtx x, int code) -+{ -+ if (code) - { -- if (!index) -- /* Scale without index. */ -- return false; -+ switch (code) -+ { -+ case 'A': -+ switch (ASSEMBLER_DIALECT) -+ { -+ case ASM_ATT: -+ putc ('*', file); -+ break; - -- if (scale != 2 && scale != 4 && scale != 8) -- /* Scale is not a valid multiplier. */ -- return false; -- } -+ case ASM_INTEL: -+ /* Intel syntax. For absolute addresses, registers should not -+ be surrounded by braces. */ -+ if (!REG_P (x)) -+ { -+ putc ('[', file); -+ ix86_print_operand (file, x, 0); -+ putc (']', file); -+ return; -+ } -+ break; - -- /* Validate displacement. */ -- if (disp) -- { -- if (GET_CODE (disp) == CONST -- && GET_CODE (XEXP (disp, 0)) == UNSPEC -- && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET) -- switch (XINT (XEXP (disp, 0), 1)) -- { -- /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit -- when used. While ABI specify also 32bit relocations, we -- don't produce them at all and use IP relative instead. -- Allow GOT in 32bit mode for both PIC and non-PIC if symbol -- should be loaded via GOT. */ -- case UNSPEC_GOT: -- if (!TARGET_64BIT -- && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0))) -- goto is_legitimate_pic; -- /* FALLTHRU */ -- case UNSPEC_GOTOFF: -- gcc_assert (flag_pic); -- if (!TARGET_64BIT) -- goto is_legitimate_pic; -+ default: -+ gcc_unreachable (); -+ } - -- /* 64bit address unspec. */ -- return false; -+ ix86_print_operand (file, x, 0); -+ return; - -- case UNSPEC_GOTPCREL: -- if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0))) -- goto is_legitimate_pic; -- /* FALLTHRU */ -- case UNSPEC_PCREL: -- gcc_assert (flag_pic); -- goto is_legitimate_pic; -+ case 'E': -+ /* Wrap address in an UNSPEC to declare special handling. */ -+ if (TARGET_64BIT) -+ x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR); - -- case UNSPEC_GOTTPOFF: -- case UNSPEC_GOTNTPOFF: -- case UNSPEC_INDNTPOFF: -- case UNSPEC_NTPOFF: -- case UNSPEC_DTPOFF: -- break; -- -- default: -- /* Invalid address unspec. */ -- return false; -- } -+ output_address (VOIDmode, x); -+ return; - -- else if (SYMBOLIC_CONST (disp) -- && (flag_pic -- || (TARGET_MACHO --#if TARGET_MACHO -- && MACHOPIC_INDIRECT -- && !machopic_operand_p (disp) --#endif -- ))) -- { -+ case 'L': -+ if (ASSEMBLER_DIALECT == ASM_ATT) -+ putc ('l', file); -+ return; - -- is_legitimate_pic: -- if (TARGET_64BIT && (index || base)) -- { -- /* foo@dtpoff(%rX) is ok. */ -- if (GET_CODE (disp) != CONST -- || GET_CODE (XEXP (disp, 0)) != PLUS -- || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC -- || !CONST_INT_P (XEXP (XEXP (disp, 0), 1)) -- || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF -- && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF)) -- /* Non-constant pic memory reference. */ -- return false; -- } -- else if ((!TARGET_MACHO || flag_pic) -- && ! legitimate_pic_address_disp_p (disp)) -- /* Displacement is an invalid pic construct. */ -- return false; --#if TARGET_MACHO -- else if (MACHO_DYNAMIC_NO_PIC_P -- && !ix86_legitimate_constant_p (Pmode, disp)) -- /* displacment must be referenced via non_lazy_pointer */ -- return false; --#endif -+ case 'W': -+ if (ASSEMBLER_DIALECT == ASM_ATT) -+ putc ('w', file); -+ return; - -- /* This code used to verify that a symbolic pic displacement -- includes the pic_offset_table_rtx register. -+ case 'B': -+ if (ASSEMBLER_DIALECT == ASM_ATT) -+ putc ('b', file); -+ return; - -- While this is good idea, unfortunately these constructs may -- be created by "adds using lea" optimization for incorrect -- code like: -+ case 'Q': -+ if (ASSEMBLER_DIALECT == ASM_ATT) -+ putc ('l', file); -+ return; - -- int a; -- int foo(int i) -- { -- return *(&a+i); -- } -+ case 'S': -+ if (ASSEMBLER_DIALECT == ASM_ATT) -+ putc ('s', file); -+ return; - -- This code is nonsensical, but results in addressing -- GOT table with pic_offset_table_rtx base. We can't -- just refuse it easily, since it gets matched by -- "addsi3" pattern, that later gets split to lea in the -- case output register differs from input. While this -- can be handled by separate addsi pattern for this case -- that never results in lea, this seems to be easier and -- correct fix for crash to disable this test. */ -- } -- else if (GET_CODE (disp) != LABEL_REF -- && !CONST_INT_P (disp) -- && (GET_CODE (disp) != CONST -- || !ix86_legitimate_constant_p (Pmode, disp)) -- && (GET_CODE (disp) != SYMBOL_REF -- || !ix86_legitimate_constant_p (Pmode, disp))) -- /* Displacement is not constant. */ -- return false; -- else if (TARGET_64BIT -- && !x86_64_immediate_operand (disp, VOIDmode)) -- /* Displacement is out of range. */ -- return false; -- /* In x32 mode, constant addresses are sign extended to 64bit, so -- we have to prevent addresses from 0x80000000 to 0xffffffff. */ -- else if (TARGET_X32 && !(index || base) -- && CONST_INT_P (disp) -- && val_signbit_known_set_p (SImode, INTVAL (disp))) -- return false; -- } -+ case 'T': -+ if (ASSEMBLER_DIALECT == ASM_ATT) -+ putc ('t', file); -+ return; - -- /* Everything looks valid. */ -- return true; --} -+ case 'O': -+#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX -+ if (ASSEMBLER_DIALECT != ASM_ATT) -+ return; - --/* Determine if a given RTX is a valid constant address. */ -+ switch (GET_MODE_SIZE (GET_MODE (x))) -+ { -+ case 2: -+ putc ('w', file); -+ break; -+ -+ case 4: -+ putc ('l', file); -+ break; - --bool --constant_address_p (rtx x) --{ -- return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1); --} -- --/* Return a unique alias set for the GOT. */ -+ case 8: -+ putc ('q', file); -+ break; - --static alias_set_type --ix86_GOT_alias_set (void) --{ -- static alias_set_type set = -1; -- if (set == -1) -- set = new_alias_set (); -- return set; --} -+ default: -+ output_operand_lossage ("invalid operand size for operand " -+ "code 'O'"); -+ return; -+ } - --/* Return a legitimate reference for ORIG (an address) using the -- register REG. If REG is 0, a new pseudo is generated. -+ putc ('.', file); -+#endif -+ return; - -- There are two types of references that must be handled: -+ case 'z': -+ if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) -+ { -+ /* Opcodes don't get size suffixes if using Intel opcodes. */ -+ if (ASSEMBLER_DIALECT == ASM_INTEL) -+ return; - -- 1. Global data references must load the address from the GOT, via -- the PIC reg. An insn is emitted to do this load, and the reg is -- returned. -+ switch (GET_MODE_SIZE (GET_MODE (x))) -+ { -+ case 1: -+ putc ('b', file); -+ return; - -- 2. Static data references, constant pool addresses, and code labels -- compute the address as an offset from the GOT, whose base is in -- the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to -- differentiate them from global data objects. The returned -- address is the PIC reg + an unspec constant. -+ case 2: -+ putc ('w', file); -+ return; - -- TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC -- reg also appears in the address. */ -+ case 4: -+ putc ('l', file); -+ return; - --static rtx --legitimize_pic_address (rtx orig, rtx reg) --{ -- rtx addr = orig; -- rtx new_rtx = orig; -+ case 8: -+ putc ('q', file); -+ return; - --#if TARGET_MACHO -- if (TARGET_MACHO && !TARGET_64BIT) -- { -- if (reg == 0) -- reg = gen_reg_rtx (Pmode); -- /* Use the generic Mach-O PIC machinery. */ -- return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg); -- } --#endif -+ default: -+ output_operand_lossage ("invalid operand size for operand " -+ "code 'z'"); -+ return; -+ } -+ } - -- if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES) -- { -- rtx tmp = legitimize_pe_coff_symbol (addr, true); -- if (tmp) -- return tmp; -- } -+ if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) -+ warning (0, "non-integer operand used with operand code %"); -+ /* FALLTHRU */ - -- if (TARGET_64BIT && legitimate_pic_address_disp_p (addr)) -- new_rtx = addr; -- else if ((!TARGET_64BIT -- || /* TARGET_64BIT && */ ix86_cmodel != CM_SMALL_PIC) -- && !TARGET_PECOFF -- && gotoff_operand (addr, Pmode)) -- { -- /* This symbol may be referenced via a displacement -- from the PIC base address (@GOTOFF). */ -- if (GET_CODE (addr) == CONST) -- addr = XEXP (addr, 0); -+ case 'Z': -+ /* 387 opcodes don't get size suffixes if using Intel opcodes. */ -+ if (ASSEMBLER_DIALECT == ASM_INTEL) -+ return; - -- if (GET_CODE (addr) == PLUS) -- { -- new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), -- UNSPEC_GOTOFF); -- new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1)); -- } -- else -- new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF); -+ if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) -+ { -+ switch (GET_MODE_SIZE (GET_MODE (x))) -+ { -+ case 2: -+#ifdef HAVE_AS_IX86_FILDS -+ putc ('s', file); -+#endif -+ return; - -- new_rtx = gen_rtx_CONST (Pmode, new_rtx); -+ case 4: -+ putc ('l', file); -+ return; - -- if (TARGET_64BIT) -- new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode); -+ case 8: -+#ifdef HAVE_AS_IX86_FILDQ -+ putc ('q', file); -+#else -+ fputs ("ll", file); -+#endif -+ return; - -- if (reg != 0) -- { -- gcc_assert (REG_P (reg)); -- new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx, -- new_rtx, reg, 1, OPTAB_DIRECT); -- } -- else -- new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx); -- } -- else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0) -- /* We can't use @GOTOFF for text labels -- on VxWorks, see gotoff_operand. */ -- || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF)) -- { -- rtx tmp = legitimize_pe_coff_symbol (addr, true); -- if (tmp) -- return tmp; -- -- /* For x64 PE-COFF there is no GOT table, -- so we use address directly. */ -- if (TARGET_64BIT && TARGET_PECOFF) -- { -- new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL); -- new_rtx = gen_rtx_CONST (Pmode, new_rtx); -- } -- else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC) -- { -- new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), -- UNSPEC_GOTPCREL); -- new_rtx = gen_rtx_CONST (Pmode, new_rtx); -- new_rtx = gen_const_mem (Pmode, new_rtx); -- set_mem_alias_set (new_rtx, ix86_GOT_alias_set ()); -- } -- else -- { -- /* This symbol must be referenced via a load -- from the Global Offset Table (@GOT). */ -- new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT); -- new_rtx = gen_rtx_CONST (Pmode, new_rtx); -- if (TARGET_64BIT) -- new_rtx = force_reg (Pmode, new_rtx); -- new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx); -- new_rtx = gen_const_mem (Pmode, new_rtx); -- set_mem_alias_set (new_rtx, ix86_GOT_alias_set ()); -- } -- -- new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode); -- } -- else -- { -- if (CONST_INT_P (addr) -- && !x86_64_immediate_operand (addr, VOIDmode)) -- new_rtx = copy_to_suggested_reg (addr, reg, Pmode); -- else if (GET_CODE (addr) == CONST) -- { -- addr = XEXP (addr, 0); -- -- /* We must match stuff we generate before. Assume the only -- unspecs that can get here are ours. Not that we could do -- anything with them anyway.... */ -- if (GET_CODE (addr) == UNSPEC -- || (GET_CODE (addr) == PLUS -- && GET_CODE (XEXP (addr, 0)) == UNSPEC)) -- return orig; -- gcc_assert (GET_CODE (addr) == PLUS); -- } -- -- if (GET_CODE (addr) == PLUS) -- { -- rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1); -- -- /* Check first to see if this is a constant -- offset from a @GOTOFF symbol reference. */ -- if (!TARGET_PECOFF -- && gotoff_operand (op0, Pmode) -- && CONST_INT_P (op1)) -+ default: -+ break; -+ } -+ } -+ else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) - { -- if (!TARGET_64BIT) -- { -- new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), -- UNSPEC_GOTOFF); -- new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1); -- new_rtx = gen_rtx_CONST (Pmode, new_rtx); -+ /* 387 opcodes don't get size suffixes -+ if the operands are registers. */ -+ if (STACK_REG_P (x)) -+ return; - -- if (reg != 0) -- { -- gcc_assert (REG_P (reg)); -- new_rtx = expand_simple_binop (Pmode, PLUS, -- pic_offset_table_rtx, -- new_rtx, reg, 1, -- OPTAB_DIRECT); -- } -- else -- new_rtx -- = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx); -- } -- else -+ switch (GET_MODE_SIZE (GET_MODE (x))) - { -- if (INTVAL (op1) < -16*1024*1024 -- || INTVAL (op1) >= 16*1024*1024) -- { -- if (!x86_64_immediate_operand (op1, Pmode)) -- op1 = force_reg (Pmode, op1); -+ case 4: -+ putc ('s', file); -+ return; - -- new_rtx -- = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1); -- } -+ case 8: -+ putc ('l', file); -+ return; -+ -+ case 12: -+ case 16: -+ putc ('t', file); -+ return; -+ -+ default: -+ break; - } - } - else - { -- rtx base = legitimize_pic_address (op0, reg); -- machine_mode mode = GET_MODE (base); -- new_rtx -- = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg); -- -- if (CONST_INT_P (new_rtx)) -- { -- if (INTVAL (new_rtx) < -16*1024*1024 -- || INTVAL (new_rtx) >= 16*1024*1024) -- { -- if (!x86_64_immediate_operand (new_rtx, mode)) -- new_rtx = force_reg (mode, new_rtx); -- -- new_rtx -- = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx); -- } -- else -- new_rtx = plus_constant (mode, base, INTVAL (new_rtx)); -- } -- else -- { -- /* For %rip addressing, we have to use -- just disp32, not base nor index. */ -- if (TARGET_64BIT -- && (GET_CODE (base) == SYMBOL_REF -- || GET_CODE (base) == LABEL_REF)) -- base = force_reg (mode, base); -- if (GET_CODE (new_rtx) == PLUS -- && CONSTANT_P (XEXP (new_rtx, 1))) -- { -- base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0)); -- new_rtx = XEXP (new_rtx, 1); -- } -- new_rtx = gen_rtx_PLUS (mode, base, new_rtx); -- } -+ output_operand_lossage ("invalid operand type used with " -+ "operand code 'Z'"); -+ return; - } -- } -- } -- return new_rtx; --} -- --/* Load the thread pointer. If TO_REG is true, force it into a register. */ -- --static rtx --get_thread_pointer (machine_mode tp_mode, bool to_reg) --{ -- rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP); -- -- if (GET_MODE (tp) != tp_mode) -- { -- gcc_assert (GET_MODE (tp) == SImode); -- gcc_assert (tp_mode == DImode); -- -- tp = gen_rtx_ZERO_EXTEND (tp_mode, tp); -- } -- -- if (to_reg) -- tp = copy_to_mode_reg (tp_mode, tp); -- -- return tp; --} -- --/* Construct the SYMBOL_REF for the tls_get_addr function. */ -- --static GTY(()) rtx ix86_tls_symbol; -- --static rtx --ix86_tls_get_addr (void) --{ -- if (!ix86_tls_symbol) -- { -- const char *sym -- = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT) -- ? "___tls_get_addr" : "__tls_get_addr"); -- -- ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym); -- } -- -- if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF) -- { -- rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol), -- UNSPEC_PLTOFF); -- return gen_rtx_PLUS (Pmode, pic_offset_table_rtx, -- gen_rtx_CONST (Pmode, unspec)); -- } -- -- return ix86_tls_symbol; --} -- --/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */ - --static GTY(()) rtx ix86_tls_module_base_symbol; -- --rtx --ix86_tls_module_base (void) --{ -- if (!ix86_tls_module_base_symbol) -- { -- ix86_tls_module_base_symbol -- = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_"); -+ output_operand_lossage ("invalid operand size for operand code 'Z'"); -+ return; - -- SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol) -- |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT; -- } -+ case 'd': -+ case 'b': -+ case 'w': -+ case 'k': -+ case 'q': -+ case 'h': -+ case 't': -+ case 'g': -+ case 'y': -+ case 'x': -+ case 'X': -+ case 'P': -+ case 'p': -+ case 'V': -+ break; - -- return ix86_tls_module_base_symbol; --} -+ case 's': -+ if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT) -+ { -+ ix86_print_operand (file, x, 0); -+ fputs (", ", file); -+ } -+ return; - --/* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is -- false if we expect this to be used for a memory address and true if -- we expect to load the address into a register. */ -- --static rtx --legitimize_tls_address (rtx x, enum tls_model model, bool for_mov) --{ -- rtx dest, base, off; -- rtx pic = NULL_RTX, tp = NULL_RTX; -- machine_mode tp_mode = Pmode; -- int type; -+ case 'Y': -+ switch (GET_CODE (x)) -+ { -+ case NE: -+ fputs ("neq", file); -+ break; -+ case EQ: -+ fputs ("eq", file); -+ break; -+ case GE: -+ case GEU: -+ fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file); -+ break; -+ case GT: -+ case GTU: -+ fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file); -+ break; -+ case LE: -+ case LEU: -+ fputs ("le", file); -+ break; -+ case LT: -+ case LTU: -+ fputs ("lt", file); -+ break; -+ case UNORDERED: -+ fputs ("unord", file); -+ break; -+ case ORDERED: -+ fputs ("ord", file); -+ break; -+ case UNEQ: -+ fputs ("ueq", file); -+ break; -+ case UNGE: -+ fputs ("nlt", file); -+ break; -+ case UNGT: -+ fputs ("nle", file); -+ break; -+ case UNLE: -+ fputs ("ule", file); -+ break; -+ case UNLT: -+ fputs ("ult", file); -+ break; -+ case LTGT: -+ fputs ("une", file); -+ break; -+ default: -+ output_operand_lossage ("operand is not a condition code, " -+ "invalid operand code 'Y'"); -+ return; -+ } -+ return; - -- /* Fall back to global dynamic model if tool chain cannot support local -- dynamic. */ -- if (TARGET_SUN_TLS && !TARGET_64BIT -- && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM -- && model == TLS_MODEL_LOCAL_DYNAMIC) -- model = TLS_MODEL_GLOBAL_DYNAMIC; -+ case 'D': -+ /* Little bit of braindamage here. The SSE compare instructions -+ does use completely different names for the comparisons that the -+ fp conditional moves. */ -+ switch (GET_CODE (x)) -+ { -+ case UNEQ: -+ if (TARGET_AVX) -+ { -+ fputs ("eq_us", file); -+ break; -+ } -+ /* FALLTHRU */ -+ case EQ: -+ fputs ("eq", file); -+ break; -+ case UNLT: -+ if (TARGET_AVX) -+ { -+ fputs ("nge", file); -+ break; -+ } -+ /* FALLTHRU */ -+ case LT: -+ fputs ("lt", file); -+ break; -+ case UNLE: -+ if (TARGET_AVX) -+ { -+ fputs ("ngt", file); -+ break; -+ } -+ /* FALLTHRU */ -+ case LE: -+ fputs ("le", file); -+ break; -+ case UNORDERED: -+ fputs ("unord", file); -+ break; -+ case LTGT: -+ if (TARGET_AVX) -+ { -+ fputs ("neq_oq", file); -+ break; -+ } -+ /* FALLTHRU */ -+ case NE: -+ fputs ("neq", file); -+ break; -+ case GE: -+ if (TARGET_AVX) -+ { -+ fputs ("ge", file); -+ break; -+ } -+ /* FALLTHRU */ -+ case UNGE: -+ fputs ("nlt", file); -+ break; -+ case GT: -+ if (TARGET_AVX) -+ { -+ fputs ("gt", file); -+ break; -+ } -+ /* FALLTHRU */ -+ case UNGT: -+ fputs ("nle", file); -+ break; -+ case ORDERED: -+ fputs ("ord", file); -+ break; -+ default: -+ output_operand_lossage ("operand is not a condition code, " -+ "invalid operand code 'D'"); -+ return; -+ } -+ return; - -- switch (model) -- { -- case TLS_MODEL_GLOBAL_DYNAMIC: -- dest = gen_reg_rtx (Pmode); -+ case 'F': -+ case 'f': -+#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX -+ if (ASSEMBLER_DIALECT == ASM_ATT) -+ putc ('.', file); -+ gcc_fallthrough (); -+#endif - -- if (!TARGET_64BIT) -- { -- if (flag_pic && !TARGET_PECOFF) -- pic = pic_offset_table_rtx; -- else -+ case 'C': -+ case 'c': -+ if (!COMPARISON_P (x)) - { -- pic = gen_reg_rtx (Pmode); -- emit_insn (gen_set_got (pic)); -+ output_operand_lossage ("operand is not a condition code, " -+ "invalid operand code '%c'", code); -+ return; - } -- } -+ put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), -+ code == 'c' || code == 'f', -+ code == 'F' || code == 'f', -+ file); -+ return; - -- if (TARGET_GNU2_TLS) -- { -- if (TARGET_64BIT) -- emit_insn (gen_tls_dynamic_gnu2_64 (dest, x)); -- else -- emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic)); -+ case 'H': -+ if (!offsettable_memref_p (x)) -+ { -+ output_operand_lossage ("operand is not an offsettable memory " -+ "reference, invalid operand code 'H'"); -+ return; -+ } -+ /* It doesn't actually matter what mode we use here, as we're -+ only going to use this for printing. */ -+ x = adjust_address_nv (x, DImode, 8); -+ /* Output 'qword ptr' for intel assembler dialect. */ -+ if (ASSEMBLER_DIALECT == ASM_INTEL) -+ code = 'q'; -+ break; - -- tp = get_thread_pointer (Pmode, true); -- dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest)); -+ case 'K': -+ if (!CONST_INT_P (x)) -+ { -+ output_operand_lossage ("operand is not an integer, invalid " -+ "operand code 'K'"); -+ return; -+ } - -- if (GET_MODE (x) != Pmode) -- x = gen_rtx_ZERO_EXTEND (Pmode, x); -+ if (INTVAL (x) & IX86_HLE_ACQUIRE) -+#ifdef HAVE_AS_IX86_HLE -+ fputs ("xacquire ", file); -+#else -+ fputs ("\n" ASM_BYTE "0xf2\n\t", file); -+#endif -+ else if (INTVAL (x) & IX86_HLE_RELEASE) -+#ifdef HAVE_AS_IX86_HLE -+ fputs ("xrelease ", file); -+#else -+ fputs ("\n" ASM_BYTE "0xf3\n\t", file); -+#endif -+ /* We do not want to print value of the operand. */ -+ return; - -- set_unique_reg_note (get_last_insn (), REG_EQUAL, x); -- } -- else -- { -- rtx caddr = ix86_tls_get_addr (); -+ case 'N': -+ if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x))) -+ fputs ("{z}", file); -+ return; - -- if (TARGET_64BIT) -+ case 'r': -+ if (!CONST_INT_P (x) || INTVAL (x) != ROUND_SAE) - { -- rtx rax = gen_rtx_REG (Pmode, AX_REG); -- rtx_insn *insns; -+ output_operand_lossage ("operand is not a specific integer, " -+ "invalid operand code 'r'"); -+ return; -+ } - -- start_sequence (); -- emit_call_insn -- (ix86_gen_tls_global_dynamic_64 (rax, x, caddr)); -- insns = get_insns (); -- end_sequence (); -+ if (ASSEMBLER_DIALECT == ASM_INTEL) -+ fputs (", ", file); - -- if (GET_MODE (x) != Pmode) -- x = gen_rtx_ZERO_EXTEND (Pmode, x); -+ fputs ("{sae}", file); - -- RTL_CONST_CALL_P (insns) = 1; -- emit_libcall_block (insns, dest, rax, x); -- } -- else -- emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr)); -- } -- break; -+ if (ASSEMBLER_DIALECT == ASM_ATT) -+ fputs (", ", file); - -- case TLS_MODEL_LOCAL_DYNAMIC: -- base = gen_reg_rtx (Pmode); -+ return; - -- if (!TARGET_64BIT) -- { -- if (flag_pic) -- pic = pic_offset_table_rtx; -- else -+ case 'R': -+ if (!CONST_INT_P (x)) - { -- pic = gen_reg_rtx (Pmode); -- emit_insn (gen_set_got (pic)); -+ output_operand_lossage ("operand is not an integer, invalid " -+ "operand code 'R'"); -+ return; - } -- } - -- if (TARGET_GNU2_TLS) -- { -- rtx tmp = ix86_tls_module_base (); -+ if (ASSEMBLER_DIALECT == ASM_INTEL) -+ fputs (", ", file); - -- if (TARGET_64BIT) -- emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp)); -- else -- emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic)); -+ switch (INTVAL (x)) -+ { -+ case ROUND_NEAREST_INT | ROUND_SAE: -+ fputs ("{rn-sae}", file); -+ break; -+ case ROUND_NEG_INF | ROUND_SAE: -+ fputs ("{rd-sae}", file); -+ break; -+ case ROUND_POS_INF | ROUND_SAE: -+ fputs ("{ru-sae}", file); -+ break; -+ case ROUND_ZERO | ROUND_SAE: -+ fputs ("{rz-sae}", file); -+ break; -+ default: -+ output_operand_lossage ("operand is not a specific integer, " -+ "invalid operand code 'R'"); -+ } - -- tp = get_thread_pointer (Pmode, true); -- set_unique_reg_note (get_last_insn (), REG_EQUAL, -- gen_rtx_MINUS (Pmode, tmp, tp)); -- } -- else -- { -- rtx caddr = ix86_tls_get_addr (); -+ if (ASSEMBLER_DIALECT == ASM_ATT) -+ fputs (", ", file); - -- if (TARGET_64BIT) -- { -- rtx rax = gen_rtx_REG (Pmode, AX_REG); -- rtx_insn *insns; -- rtx eqv; -+ return; - -- start_sequence (); -- emit_call_insn -- (ix86_gen_tls_local_dynamic_base_64 (rax, caddr)); -- insns = get_insns (); -- end_sequence (); -+ case '*': -+ if (ASSEMBLER_DIALECT == ASM_ATT) -+ putc ('*', file); -+ return; - -- /* Attach a unique REG_EQUAL, to allow the RTL optimizers to -- share the LD_BASE result with other LD model accesses. */ -- eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), -- UNSPEC_TLS_LD_BASE); -+ case '&': -+ { -+ const char *name = get_some_local_dynamic_name (); -+ if (name == NULL) -+ output_operand_lossage ("'%%&' used without any " -+ "local dynamic TLS references"); -+ else -+ assemble_name (file, name); -+ return; -+ } - -- RTL_CONST_CALL_P (insns) = 1; -- emit_libcall_block (insns, base, rax, eqv); -- } -- else -- emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr)); -- } -+ case '+': -+ { -+ rtx x; - -- off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF); -- off = gen_rtx_CONST (Pmode, off); -+ if (!optimize -+ || optimize_function_for_size_p (cfun) -+ || !TARGET_BRANCH_PREDICTION_HINTS) -+ return; - -- dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off)); -+ x = find_reg_note (current_output_insn, REG_BR_PROB, 0); -+ if (x) -+ { -+ int pred_val = profile_probability::from_reg_br_prob_note -+ (XINT (x, 0)).to_reg_br_prob_base (); - -- if (TARGET_GNU2_TLS) -- { -- dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp)); -+ if (pred_val < REG_BR_PROB_BASE * 45 / 100 -+ || pred_val > REG_BR_PROB_BASE * 55 / 100) -+ { -+ bool taken = pred_val > REG_BR_PROB_BASE / 2; -+ bool cputaken -+ = final_forward_branch_p (current_output_insn) == 0; - -- if (GET_MODE (x) != Pmode) -- x = gen_rtx_ZERO_EXTEND (Pmode, x); -+ /* Emit hints only in the case default branch prediction -+ heuristics would fail. */ -+ if (taken != cputaken) -+ { -+ /* We use 3e (DS) prefix for taken branches and -+ 2e (CS) prefix for not taken branches. */ -+ if (taken) -+ fputs ("ds ; ", file); -+ else -+ fputs ("cs ; ", file); -+ } -+ } -+ } -+ return; -+ } - -- set_unique_reg_note (get_last_insn (), REG_EQUAL, x); -- } -- break; -+ case ';': -+#ifndef HAVE_AS_IX86_REP_LOCK_PREFIX -+ putc (';', file); -+#endif -+ return; - -- case TLS_MODEL_INITIAL_EXEC: -- if (TARGET_64BIT) -- { -- if (TARGET_SUN_TLS && !TARGET_X32) -- { -- /* The Sun linker took the AMD64 TLS spec literally -- and can only handle %rax as destination of the -- initial executable code sequence. */ -+ case '~': -+ putc (TARGET_AVX2 ? 'i' : 'f', file); -+ return; - -- dest = gen_reg_rtx (DImode); -- emit_insn (gen_tls_initial_exec_64_sun (dest, x)); -- return dest; -+ case 'M': -+ if (TARGET_X32) -+ { -+ /* NB: 32-bit indices in VSIB address are sign-extended -+ to 64 bits. In x32, if 32-bit address 0xf7fa3010 is -+ sign-extended to 0xfffffffff7fa3010 which is invalid -+ address. Add addr32 prefix if there is no base -+ register nor symbol. */ -+ bool ok; -+ struct ix86_address parts; -+ ok = ix86_decompose_address (x, &parts); -+ gcc_assert (ok && parts.index == NULL_RTX); -+ if (parts.base == NULL_RTX -+ && (parts.disp == NULL_RTX -+ || !symbolic_operand (parts.disp, -+ GET_MODE (parts.disp)))) -+ fputs ("addr32 ", file); - } -+ return; - -- /* Generate DImode references to avoid %fs:(%reg32) -- problems and linker IE->LE relaxation bug. */ -- tp_mode = DImode; -- pic = NULL; -- type = UNSPEC_GOTNTPOFF; -- } -- else if (flag_pic) -- { -- pic = pic_offset_table_rtx; -- type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF; -- } -- else if (!TARGET_ANY_GNU_TLS) -- { -- pic = gen_reg_rtx (Pmode); -- emit_insn (gen_set_got (pic)); -- type = UNSPEC_GOTTPOFF; -- } -- else -- { -- pic = NULL; -- type = UNSPEC_INDNTPOFF; -- } -+ case '^': -+ if (TARGET_64BIT && Pmode != word_mode) -+ fputs ("addr32 ", file); -+ return; - -- off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type); -- off = gen_rtx_CONST (tp_mode, off); -- if (pic) -- off = gen_rtx_PLUS (tp_mode, pic, off); -- off = gen_const_mem (tp_mode, off); -- set_mem_alias_set (off, ix86_GOT_alias_set ()); -+ case '!': -+ if (ix86_notrack_prefixed_insn_p (current_output_insn)) -+ fputs ("notrack ", file); -+ return; - -- if (TARGET_64BIT || TARGET_ANY_GNU_TLS) -- { -- base = get_thread_pointer (tp_mode, -- for_mov || !TARGET_TLS_DIRECT_SEG_REFS); -- off = force_reg (tp_mode, off); -- dest = gen_rtx_PLUS (tp_mode, base, off); -- if (tp_mode != Pmode) -- dest = convert_to_mode (Pmode, dest, 1); -- } -- else -- { -- base = get_thread_pointer (Pmode, true); -- dest = gen_reg_rtx (Pmode); -- emit_insn (ix86_gen_sub3 (dest, base, off)); -+ default: -+ output_operand_lossage ("invalid operand code '%c'", code); - } -- break; -+ } - -- case TLS_MODEL_LOCAL_EXEC: -- off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), -- (TARGET_64BIT || TARGET_ANY_GNU_TLS) -- ? UNSPEC_NTPOFF : UNSPEC_TPOFF); -- off = gen_rtx_CONST (Pmode, off); -+ if (REG_P (x)) -+ print_reg (x, code, file); - -- if (TARGET_64BIT || TARGET_ANY_GNU_TLS) -- { -- base = get_thread_pointer (Pmode, -- for_mov || !TARGET_TLS_DIRECT_SEG_REFS); -- return gen_rtx_PLUS (Pmode, base, off); -- } -- else -+ else if (MEM_P (x)) -+ { -+ rtx addr = XEXP (x, 0); -+ -+ /* No `byte ptr' prefix for call instructions ... */ -+ if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P') - { -- base = get_thread_pointer (Pmode, true); -- dest = gen_reg_rtx (Pmode); -- emit_insn (ix86_gen_sub3 (dest, base, off)); -- } -- break; -+ machine_mode mode = GET_MODE (x); -+ const char *size; - -- default: -- gcc_unreachable (); -- } -+ /* Check for explicit size override codes. */ -+ if (code == 'b') -+ size = "BYTE"; -+ else if (code == 'w') -+ size = "WORD"; -+ else if (code == 'k') -+ size = "DWORD"; -+ else if (code == 'q') -+ size = "QWORD"; -+ else if (code == 'x') -+ size = "XMMWORD"; -+ else if (code == 't') -+ size = "YMMWORD"; -+ else if (code == 'g') -+ size = "ZMMWORD"; -+ else if (mode == BLKmode) -+ /* ... or BLKmode operands, when not overridden. */ -+ size = NULL; -+ else -+ switch (GET_MODE_SIZE (mode)) -+ { -+ case 1: size = "BYTE"; break; -+ case 2: size = "WORD"; break; -+ case 4: size = "DWORD"; break; -+ case 8: size = "QWORD"; break; -+ case 12: size = "TBYTE"; break; -+ case 16: -+ if (mode == XFmode) -+ size = "TBYTE"; -+ else -+ size = "XMMWORD"; -+ break; -+ case 32: size = "YMMWORD"; break; -+ case 64: size = "ZMMWORD"; break; -+ default: -+ gcc_unreachable (); -+ } -+ if (size) -+ { -+ fputs (size, file); -+ fputs (" PTR ", file); -+ } -+ } - -- return dest; --} -+ if (this_is_asm_operands && ! address_operand (addr, VOIDmode)) -+ output_operand_lossage ("invalid constraints for operand"); -+ else -+ ix86_print_operand_address_as -+ (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P'); -+ } - --/* Return true if OP refers to a TLS address. */ --bool --ix86_tls_address_pattern_p (rtx op) --{ -- subrtx_var_iterator::array_type array; -- FOR_EACH_SUBRTX_VAR (iter, array, op, ALL) -+ else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode) - { -- rtx op = *iter; -- if (MEM_P (op)) -- { -- rtx *x = &XEXP (op, 0); -- while (GET_CODE (*x) == PLUS) -- { -- int i; -- for (i = 0; i < 2; i++) -- { -- rtx u = XEXP (*x, i); -- if (GET_CODE (u) == ZERO_EXTEND) -- u = XEXP (u, 0); -- if (GET_CODE (u) == UNSPEC -- && XINT (u, 1) == UNSPEC_TP) -- return true; -- } -- x = &XEXP (*x, 0); -- } -+ long l; - -- iter.skip_subrtxes (); -- } -+ REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l); -+ -+ if (ASSEMBLER_DIALECT == ASM_ATT) -+ putc ('$', file); -+ /* Sign extend 32bit SFmode immediate to 8 bytes. */ -+ if (code == 'q') -+ fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x", -+ (unsigned long long) (int) l); -+ else -+ fprintf (file, "0x%08x", (unsigned int) l); - } - -- return false; --} -+ else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode) -+ { -+ long l[2]; - --/* Rewrite *LOC so that it refers to a default TLS address space. */ --void --ix86_rewrite_tls_address_1 (rtx *loc) --{ -- subrtx_ptr_iterator::array_type array; -- FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL) -+ REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l); -+ -+ if (ASSEMBLER_DIALECT == ASM_ATT) -+ putc ('$', file); -+ fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff); -+ } -+ -+ /* These float cases don't actually occur as immediate operands. */ -+ else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode) - { -- rtx *loc = *iter; -- if (MEM_P (*loc)) -- { -- rtx addr = XEXP (*loc, 0); -- rtx *x = &addr; -- while (GET_CODE (*x) == PLUS) -- { -- int i; -- for (i = 0; i < 2; i++) -- { -- rtx u = XEXP (*x, i); -- if (GET_CODE (u) == ZERO_EXTEND) -- u = XEXP (u, 0); -- if (GET_CODE (u) == UNSPEC -- && XINT (u, 1) == UNSPEC_TP) -- { -- addr_space_t as = DEFAULT_TLS_SEG_REG; -+ char dstr[30]; - -- *x = XEXP (*x, 1 - i); -+ real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1); -+ fputs (dstr, file); -+ } - -- *loc = replace_equiv_address_nv (*loc, addr, true); -- set_mem_addr_space (*loc, as); -- return; -- } -- } -- x = &XEXP (*x, 0); -- } -+ else -+ { -+ /* We have patterns that allow zero sets of memory, for instance. -+ In 64-bit mode, we should probably support all 8-byte vectors, -+ since we can in fact encode that into an immediate. */ -+ if (GET_CODE (x) == CONST_VECTOR) -+ { -+ if (x != CONST0_RTX (GET_MODE (x))) -+ output_operand_lossage ("invalid vector immediate"); -+ x = const0_rtx; -+ } - -- iter.skip_subrtxes (); -+ if (code != 'P' && code != 'p') -+ { -+ if (CONST_INT_P (x)) -+ { -+ if (ASSEMBLER_DIALECT == ASM_ATT) -+ putc ('$', file); -+ } -+ else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF -+ || GET_CODE (x) == LABEL_REF) -+ { -+ if (ASSEMBLER_DIALECT == ASM_ATT) -+ putc ('$', file); -+ else -+ fputs ("OFFSET FLAT:", file); -+ } - } -+ if (CONST_INT_P (x)) -+ fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); -+ else if (flag_pic || MACHOPIC_INDIRECT) -+ output_pic_addr_const (file, x, code); -+ else -+ output_addr_const (file, x); - } - } - --/* Rewrite instruction pattern involvning TLS address -- so that it refers to a default TLS address space. */ --rtx --ix86_rewrite_tls_address (rtx pattern) -+static bool -+ix86_print_operand_punct_valid_p (unsigned char code) - { -- pattern = copy_insn (pattern); -- ix86_rewrite_tls_address_1 (&pattern); -- return pattern; -+ return (code == '*' || code == '+' || code == '&' || code == ';' -+ || code == '~' || code == '^' || code == '!'); - } -+ -+/* Print a memory operand whose address is ADDR. */ - --/* Create or return the unique __imp_DECL dllimport symbol corresponding -- to symbol DECL if BEIMPORT is true. Otherwise create or return the -- unique refptr-DECL symbol corresponding to symbol DECL. */ -- --struct dllimport_hasher : ggc_cache_ptr_hash -+static void -+ix86_print_operand_address_as (FILE *file, rtx addr, -+ addr_space_t as, bool no_rip) - { -- static inline hashval_t hash (tree_map *m) { return m->hash; } -- static inline bool -- equal (tree_map *a, tree_map *b) -- { -- return a->base.from == b->base.from; -- } -- -- static int -- keep_cache_entry (tree_map *&m) -- { -- return ggc_marked_p (m->base.from); -- } --}; -+ struct ix86_address parts; -+ rtx base, index, disp; -+ int scale; -+ int ok; -+ bool vsib = false; -+ int code = 0; - --static GTY((cache)) hash_table *dllimport_map; -+ if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR) -+ { -+ ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts); -+ gcc_assert (parts.index == NULL_RTX); -+ parts.index = XVECEXP (addr, 0, 1); -+ parts.scale = INTVAL (XVECEXP (addr, 0, 2)); -+ addr = XVECEXP (addr, 0, 0); -+ vsib = true; -+ } -+ else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR) -+ { -+ gcc_assert (TARGET_64BIT); -+ ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts); -+ code = 'q'; -+ } -+ else -+ ok = ix86_decompose_address (addr, &parts); - --static tree --get_dllimport_decl (tree decl, bool beimport) --{ -- struct tree_map *h, in; -- const char *name; -- const char *prefix; -- size_t namelen, prefixlen; -- char *imp_name; -- tree to; -- rtx rtl; -+ gcc_assert (ok); - -- if (!dllimport_map) -- dllimport_map = hash_table::create_ggc (512); -+ base = parts.base; -+ index = parts.index; -+ disp = parts.disp; -+ scale = parts.scale; - -- in.hash = htab_hash_pointer (decl); -- in.base.from = decl; -- tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT); -- h = *loc; -- if (h) -- return h->to; -+ if (ADDR_SPACE_GENERIC_P (as)) -+ as = parts.seg; -+ else -+ gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg)); - -- *loc = h = ggc_alloc (); -- h->hash = in.hash; -- h->base.from = decl; -- h->to = to = build_decl (DECL_SOURCE_LOCATION (decl), -- VAR_DECL, NULL, ptr_type_node); -- DECL_ARTIFICIAL (to) = 1; -- DECL_IGNORED_P (to) = 1; -- DECL_EXTERNAL (to) = 1; -- TREE_READONLY (to) = 1; -+ if (!ADDR_SPACE_GENERIC_P (as)) -+ { -+ if (ASSEMBLER_DIALECT == ASM_ATT) -+ putc ('%', file); - -- name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); -- name = targetm.strip_name_encoding (name); -- if (beimport) -- prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0 -- ? "*__imp_" : "*__imp__"; -- else -- prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr."; -- namelen = strlen (name); -- prefixlen = strlen (prefix); -- imp_name = (char *) alloca (namelen + prefixlen + 1); -- memcpy (imp_name, prefix, prefixlen); -- memcpy (imp_name + prefixlen, name, namelen + 1); -+ switch (as) -+ { -+ case ADDR_SPACE_SEG_FS: -+ fputs ("fs:", file); -+ break; -+ case ADDR_SPACE_SEG_GS: -+ fputs ("gs:", file); -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ } - -- name = ggc_alloc_string (imp_name, namelen + prefixlen); -- rtl = gen_rtx_SYMBOL_REF (Pmode, name); -- SET_SYMBOL_REF_DECL (rtl, to); -- SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR; -- if (!beimport) -+ /* Use one byte shorter RIP relative addressing for 64bit mode. */ -+ if (TARGET_64BIT && !base && !index && !no_rip) - { -- SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL; --#ifdef SUB_TARGET_RECORD_STUB -- SUB_TARGET_RECORD_STUB (name); --#endif -- } -+ rtx symbol = disp; - -- rtl = gen_const_mem (Pmode, rtl); -- set_mem_alias_set (rtl, ix86_GOT_alias_set ()); -+ if (GET_CODE (disp) == CONST -+ && GET_CODE (XEXP (disp, 0)) == PLUS -+ && CONST_INT_P (XEXP (XEXP (disp, 0), 1))) -+ symbol = XEXP (XEXP (disp, 0), 0); - -- SET_DECL_RTL (to, rtl); -- SET_DECL_ASSEMBLER_NAME (to, get_identifier (name)); -+ if (GET_CODE (symbol) == LABEL_REF -+ || (GET_CODE (symbol) == SYMBOL_REF -+ && SYMBOL_REF_TLS_MODEL (symbol) == 0)) -+ base = pc_rtx; -+ } - -- return to; --} -+ if (!base && !index) -+ { -+ /* Displacement only requires special attention. */ -+ if (CONST_INT_P (disp)) -+ { -+ if (ASSEMBLER_DIALECT == ASM_INTEL && ADDR_SPACE_GENERIC_P (as)) -+ fputs ("ds:", file); -+ fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp)); -+ } -+ /* Load the external function address via the GOT slot to avoid PLT. */ -+ else if (GET_CODE (disp) == CONST -+ && GET_CODE (XEXP (disp, 0)) == UNSPEC -+ && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL -+ || XINT (XEXP (disp, 0), 1) == UNSPEC_GOT) -+ && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0))) -+ output_pic_addr_const (file, disp, 0); -+ else if (flag_pic) -+ output_pic_addr_const (file, disp, 0); -+ else -+ output_addr_const (file, disp); -+ } -+ else -+ { -+ /* Print SImode register names to force addr32 prefix. */ -+ if (SImode_address_operand (addr, VOIDmode)) -+ { -+ if (flag_checking) -+ { -+ gcc_assert (TARGET_64BIT); -+ switch (GET_CODE (addr)) -+ { -+ case SUBREG: -+ gcc_assert (GET_MODE (addr) == SImode); -+ gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode); -+ break; -+ case ZERO_EXTEND: -+ case AND: -+ gcc_assert (GET_MODE (addr) == DImode); -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ } -+ gcc_assert (!code); -+ code = 'k'; -+ } -+ else if (code == 0 -+ && TARGET_X32 -+ && disp -+ && CONST_INT_P (disp) -+ && INTVAL (disp) < -16*1024*1024) -+ { -+ /* X32 runs in 64-bit mode, where displacement, DISP, in -+ address DISP(%r64), is encoded as 32-bit immediate sign- -+ extended from 32-bit to 64-bit. For -0x40000300(%r64), -+ address is %r64 + 0xffffffffbffffd00. When %r64 < -+ 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64, -+ which is invalid for x32. The correct address is %r64 -+ - 0x40000300 == 0xf7ffdd64. To properly encode -+ -0x40000300(%r64) for x32, we zero-extend negative -+ displacement by forcing addr32 prefix which truncates -+ 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should -+ zero-extend all negative displacements, including -1(%rsp). -+ However, for small negative displacements, sign-extension -+ won't cause overflow. We only zero-extend negative -+ displacements if they < -16*1024*1024, which is also used -+ to check legitimate address displacements for PIC. */ -+ code = 'k'; -+ } - --/* Expand SYMBOL into its corresponding far-address symbol. -- WANT_REG is true if we require the result be a register. */ -+ /* Since the upper 32 bits of RSP are always zero for x32, -+ we can encode %esp as %rsp to avoid 0x67 prefix if -+ there is no index register. */ -+ if (TARGET_X32 && Pmode == SImode -+ && !index && base && REG_P (base) && REGNO (base) == SP_REG) -+ code = 'q'; - --static rtx --legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg) --{ -- tree imp_decl; -- rtx x; -+ if (ASSEMBLER_DIALECT == ASM_ATT) -+ { -+ if (disp) -+ { -+ if (flag_pic) -+ output_pic_addr_const (file, disp, 0); -+ else if (GET_CODE (disp) == LABEL_REF) -+ output_asm_label (disp); -+ else -+ output_addr_const (file, disp); -+ } - -- gcc_assert (SYMBOL_REF_DECL (symbol)); -- imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false); -+ putc ('(', file); -+ if (base) -+ print_reg (base, code, file); -+ if (index) -+ { -+ putc (',', file); -+ print_reg (index, vsib ? 0 : code, file); -+ if (scale != 1 || vsib) -+ fprintf (file, ",%d", scale); -+ } -+ putc (')', file); -+ } -+ else -+ { -+ rtx offset = NULL_RTX; - -- x = DECL_RTL (imp_decl); -- if (want_reg) -- x = force_reg (Pmode, x); -- return x; --} -+ if (disp) -+ { -+ /* Pull out the offset of a symbol; print any symbol itself. */ -+ if (GET_CODE (disp) == CONST -+ && GET_CODE (XEXP (disp, 0)) == PLUS -+ && CONST_INT_P (XEXP (XEXP (disp, 0), 1))) -+ { -+ offset = XEXP (XEXP (disp, 0), 1); -+ disp = gen_rtx_CONST (VOIDmode, -+ XEXP (XEXP (disp, 0), 0)); -+ } - --/* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is -- true if we require the result be a register. */ -+ if (flag_pic) -+ output_pic_addr_const (file, disp, 0); -+ else if (GET_CODE (disp) == LABEL_REF) -+ output_asm_label (disp); -+ else if (CONST_INT_P (disp)) -+ offset = disp; -+ else -+ output_addr_const (file, disp); -+ } - --static rtx --legitimize_dllimport_symbol (rtx symbol, bool want_reg) --{ -- tree imp_decl; -- rtx x; -+ putc ('[', file); -+ if (base) -+ { -+ print_reg (base, code, file); -+ if (offset) -+ { -+ if (INTVAL (offset) >= 0) -+ putc ('+', file); -+ fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); -+ } -+ } -+ else if (offset) -+ fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); -+ else -+ putc ('0', file); - -- gcc_assert (SYMBOL_REF_DECL (symbol)); -- imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true); -+ if (index) -+ { -+ putc ('+', file); -+ print_reg (index, vsib ? 0 : code, file); -+ if (scale != 1 || vsib) -+ fprintf (file, "*%d", scale); -+ } -+ putc (']', file); -+ } -+ } -+} - -- x = DECL_RTL (imp_decl); -- if (want_reg) -- x = force_reg (Pmode, x); -- return x; -+static void -+ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr) -+{ -+ ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, false); - } - --/* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG -- is true if we require the result be a register. */ -+/* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */ - --static rtx --legitimize_pe_coff_symbol (rtx addr, bool inreg) -+static bool -+i386_asm_output_addr_const_extra (FILE *file, rtx x) - { -- if (!TARGET_PECOFF) -- return NULL_RTX; -+ rtx op; - -- if (TARGET_DLLIMPORT_DECL_ATTRIBUTES) -- { -- if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr)) -- return legitimize_dllimport_symbol (addr, inreg); -- if (GET_CODE (addr) == CONST -- && GET_CODE (XEXP (addr, 0)) == PLUS -- && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF -- && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0))) -- { -- rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg); -- return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1)); -- } -- } -- -- if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC) -- return NULL_RTX; -- if (GET_CODE (addr) == SYMBOL_REF -- && !is_imported_p (addr) -- && SYMBOL_REF_EXTERNAL_P (addr) -- && SYMBOL_REF_DECL (addr)) -- return legitimize_pe_coff_extern_decl (addr, inreg); -+ if (GET_CODE (x) != UNSPEC) -+ return false; - -- if (GET_CODE (addr) == CONST -- && GET_CODE (XEXP (addr, 0)) == PLUS -- && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF -- && !is_imported_p (XEXP (XEXP (addr, 0), 0)) -- && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0)) -- && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0))) -+ op = XVECEXP (x, 0, 0); -+ switch (XINT (x, 1)) - { -- rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg); -- return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1)); -+ case UNSPEC_GOTOFF: -+ output_addr_const (file, op); -+ fputs ("@gotoff", file); -+ break; -+ case UNSPEC_GOTTPOFF: -+ output_addr_const (file, op); -+ /* FIXME: This might be @TPOFF in Sun ld. */ -+ fputs ("@gottpoff", file); -+ break; -+ case UNSPEC_TPOFF: -+ output_addr_const (file, op); -+ fputs ("@tpoff", file); -+ break; -+ case UNSPEC_NTPOFF: -+ output_addr_const (file, op); -+ if (TARGET_64BIT) -+ fputs ("@tpoff", file); -+ else -+ fputs ("@ntpoff", file); -+ break; -+ case UNSPEC_DTPOFF: -+ output_addr_const (file, op); -+ fputs ("@dtpoff", file); -+ break; -+ case UNSPEC_GOTNTPOFF: -+ output_addr_const (file, op); -+ if (TARGET_64BIT) -+ fputs (ASSEMBLER_DIALECT == ASM_ATT ? -+ "@gottpoff(%rip)" : "@gottpoff[rip]", file); -+ else -+ fputs ("@gotntpoff", file); -+ break; -+ case UNSPEC_INDNTPOFF: -+ output_addr_const (file, op); -+ fputs ("@indntpoff", file); -+ break; -+#if TARGET_MACHO -+ case UNSPEC_MACHOPIC_OFFSET: -+ output_addr_const (file, op); -+ putc ('-', file); -+ machopic_output_function_base_name (file); -+ break; -+#endif -+ -+ default: -+ return false; - } -- return NULL_RTX; --} - --/* Try machine-dependent ways of modifying an illegitimate address -- to be legitimate. If we find one, return the new, valid address. -- This macro is used in only one place: `memory_address' in explow.c. -+ return true; -+} -+ -+ -+/* Output code to perform a 387 binary operation in INSN, one of PLUS, -+ MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3] -+ is the expression of the binary operation. The output may either be -+ emitted here, or returned to the caller, like all output_* functions. - -- OLDX is the address as it was before break_out_memory_refs was called. -- In some cases it is useful to look at this to decide what needs to be done. -+ There is no guarantee that the operands are the same mode, as they -+ might be within FLOAT or FLOAT_EXTEND expressions. */ - -- It is always safe for this macro to do nothing. It exists to recognize -- opportunities to optimize the output. -+#ifndef SYSV386_COMPAT -+/* Set to 1 for compatibility with brain-damaged assemblers. No-one -+ wants to fix the assemblers because that causes incompatibility -+ with gcc. No-one wants to fix gcc because that causes -+ incompatibility with assemblers... You can use the option of -+ -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */ -+#define SYSV386_COMPAT 1 -+#endif - -- For the 80386, we handle X+REG by loading X into a register R and -- using R+REG. R will go in a general reg and indexing will be used. -- However, if REG is a broken-out memory address or multiplication, -- nothing needs to be done because REG can certainly go in a general reg. -+const char * -+output_387_binary_op (rtx_insn *insn, rtx *operands) -+{ -+ static char buf[40]; -+ const char *p; -+ bool is_sse -+ = (SSE_REG_P (operands[0]) -+ || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2])); - -- When -fpic is used, special handling is needed for symbolic references. -- See comments by legitimize_pic_address in i386.c for details. */ -+ if (is_sse) -+ p = "%v"; -+ else if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT -+ || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) -+ p = "fi"; -+ else -+ p = "f"; - --static rtx --ix86_legitimize_address (rtx x, rtx, machine_mode mode) --{ -- bool changed = false; -- unsigned log; -+ strcpy (buf, p); - -- log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0; -- if (log) -- return legitimize_tls_address (x, (enum tls_model) log, false); -- if (GET_CODE (x) == CONST -- && GET_CODE (XEXP (x, 0)) == PLUS -- && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF -- && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)))) -+ switch (GET_CODE (operands[3])) - { -- rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), -- (enum tls_model) log, false); -- return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1)); -+ case PLUS: -+ p = "add"; break; -+ case MINUS: -+ p = "sub"; break; -+ case MULT: -+ p = "mul"; break; -+ case DIV: -+ p = "div"; break; -+ default: -+ gcc_unreachable (); - } - -- if (TARGET_DLLIMPORT_DECL_ATTRIBUTES) -- { -- rtx tmp = legitimize_pe_coff_symbol (x, true); -- if (tmp) -- return tmp; -- } -+ strcat (buf, p); - -- if (flag_pic && SYMBOLIC_CONST (x)) -- return legitimize_pic_address (x, 0); -+ if (is_sse) -+ { -+ p = (GET_MODE (operands[0]) == SFmode) ? "ss" : "sd"; -+ strcat (buf, p); - --#if TARGET_MACHO -- if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x)) -- return machopic_indirect_data_reference (x, 0); --#endif -+ if (TARGET_AVX) -+ p = "\t{%2, %1, %0|%0, %1, %2}"; -+ else -+ p = "\t{%2, %0|%0, %2}"; - -- /* Canonicalize shifts by 0, 1, 2, 3 into multiply */ -- if (GET_CODE (x) == ASHIFT -- && CONST_INT_P (XEXP (x, 1)) -- && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4) -+ strcat (buf, p); -+ return buf; -+ } -+ -+ /* Even if we do not want to check the inputs, this documents input -+ constraints. Which helps in understanding the following code. */ -+ if (flag_checking) - { -- changed = true; -- log = INTVAL (XEXP (x, 1)); -- x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)), -- GEN_INT (1 << log)); -+ if (STACK_REG_P (operands[0]) -+ && ((REG_P (operands[1]) -+ && REGNO (operands[0]) == REGNO (operands[1]) -+ && (STACK_REG_P (operands[2]) || MEM_P (operands[2]))) -+ || (REG_P (operands[2]) -+ && REGNO (operands[0]) == REGNO (operands[2]) -+ && (STACK_REG_P (operands[1]) || MEM_P (operands[1])))) -+ && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2]))) -+ ; /* ok */ -+ else -+ gcc_unreachable (); - } - -- if (GET_CODE (x) == PLUS) -+ switch (GET_CODE (operands[3])) - { -- /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */ -+ case MULT: -+ case PLUS: -+ if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2])) -+ std::swap (operands[1], operands[2]); - -- if (GET_CODE (XEXP (x, 0)) == ASHIFT -- && CONST_INT_P (XEXP (XEXP (x, 0), 1)) -- && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4) -+ /* know operands[0] == operands[1]. */ -+ -+ if (MEM_P (operands[2])) - { -- changed = true; -- log = INTVAL (XEXP (XEXP (x, 0), 1)); -- XEXP (x, 0) = gen_rtx_MULT (Pmode, -- force_reg (Pmode, XEXP (XEXP (x, 0), 0)), -- GEN_INT (1 << log)); -+ p = "%Z2\t%2"; -+ break; - } - -- if (GET_CODE (XEXP (x, 1)) == ASHIFT -- && CONST_INT_P (XEXP (XEXP (x, 1), 1)) -- && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4) -+ if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) - { -- changed = true; -- log = INTVAL (XEXP (XEXP (x, 1), 1)); -- XEXP (x, 1) = gen_rtx_MULT (Pmode, -- force_reg (Pmode, XEXP (XEXP (x, 1), 0)), -- GEN_INT (1 << log)); -+ if (STACK_TOP_P (operands[0])) -+ /* How is it that we are storing to a dead operand[2]? -+ Well, presumably operands[1] is dead too. We can't -+ store the result to st(0) as st(0) gets popped on this -+ instruction. Instead store to operands[2] (which I -+ think has to be st(1)). st(1) will be popped later. -+ gcc <= 2.8.1 didn't have this check and generated -+ assembly code that the Unixware assembler rejected. */ -+ p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */ -+ else -+ p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */ -+ break; - } - -- /* Put multiply first if it isn't already. */ -- if (GET_CODE (XEXP (x, 1)) == MULT) -+ if (STACK_TOP_P (operands[0])) -+ p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */ -+ else -+ p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */ -+ break; -+ -+ case MINUS: -+ case DIV: -+ if (MEM_P (operands[1])) - { -- std::swap (XEXP (x, 0), XEXP (x, 1)); -- changed = true; -+ p = "r%Z1\t%1"; -+ break; - } - -- /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const))) -- into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be -- created by virtual register instantiation, register elimination, and -- similar optimizations. */ -- if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS) -+ if (MEM_P (operands[2])) - { -- changed = true; -- x = gen_rtx_PLUS (Pmode, -- gen_rtx_PLUS (Pmode, XEXP (x, 0), -- XEXP (XEXP (x, 1), 0)), -- XEXP (XEXP (x, 1), 1)); -+ p = "%Z2\t%2"; -+ break; - } - -- /* Canonicalize -- (plus (plus (mult (reg) (const)) (plus (reg) (const))) const) -- into (plus (plus (mult (reg) (const)) (reg)) (const)). */ -- else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS -- && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT -- && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS -- && CONSTANT_P (XEXP (x, 1))) -+ if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) - { -- rtx constant; -- rtx other = NULL_RTX; -- -- if (CONST_INT_P (XEXP (x, 1))) -- { -- constant = XEXP (x, 1); -- other = XEXP (XEXP (XEXP (x, 0), 1), 1); -- } -- else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1))) -- { -- constant = XEXP (XEXP (XEXP (x, 0), 1), 1); -- other = XEXP (x, 1); -- } -+#if SYSV386_COMPAT -+ /* The SystemV/386 SVR3.2 assembler, and probably all AT&T -+ derived assemblers, confusingly reverse the direction of -+ the operation for fsub{r} and fdiv{r} when the -+ destination register is not st(0). The Intel assembler -+ doesn't have this brain damage. Read !SYSV386_COMPAT to -+ figure out what the hardware really does. */ -+ if (STACK_TOP_P (operands[0])) -+ p = "{p\t%0, %2|rp\t%2, %0}"; - else -- constant = 0; -- -- if (constant) -- { -- changed = true; -- x = gen_rtx_PLUS (Pmode, -- gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0), -- XEXP (XEXP (XEXP (x, 0), 1), 0)), -- plus_constant (Pmode, other, -- INTVAL (constant))); -- } -+ p = "{rp\t%2, %0|p\t%0, %2}"; -+#else -+ if (STACK_TOP_P (operands[0])) -+ /* As above for fmul/fadd, we can't store to st(0). */ -+ p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */ -+ else -+ p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */ -+#endif -+ break; - } - -- if (changed && ix86_legitimate_address_p (mode, x, false)) -- return x; -- -- if (GET_CODE (XEXP (x, 0)) == MULT) -+ if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) - { -- changed = true; -- XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0)); -+#if SYSV386_COMPAT -+ if (STACK_TOP_P (operands[0])) -+ p = "{rp\t%0, %1|p\t%1, %0}"; -+ else -+ p = "{p\t%1, %0|rp\t%0, %1}"; -+#else -+ if (STACK_TOP_P (operands[0])) -+ p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */ -+ else -+ p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */ -+#endif -+ break; - } - -- if (GET_CODE (XEXP (x, 1)) == MULT) -+ if (STACK_TOP_P (operands[0])) - { -- changed = true; -- XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1)); -+ if (STACK_TOP_P (operands[1])) -+ p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */ -+ else -+ p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */ -+ break; - } -- -- if (changed -- && REG_P (XEXP (x, 1)) -- && REG_P (XEXP (x, 0))) -- return x; -- -- if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1))) -+ else if (STACK_TOP_P (operands[1])) - { -- changed = true; -- x = legitimize_pic_address (x, 0); -+#if SYSV386_COMPAT -+ p = "{\t%1, %0|r\t%0, %1}"; -+#else -+ p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */ -+#endif -+ } -+ else -+ { -+#if SYSV386_COMPAT -+ p = "{r\t%2, %0|\t%0, %2}"; -+#else -+ p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */ -+#endif - } -+ break; - -- if (changed && ix86_legitimate_address_p (mode, x, false)) -- return x; -+ default: -+ gcc_unreachable (); -+ } - -- if (REG_P (XEXP (x, 0))) -- { -- rtx temp = gen_reg_rtx (Pmode); -- rtx val = force_operand (XEXP (x, 1), temp); -- if (val != temp) -- { -- val = convert_to_mode (Pmode, val, 1); -- emit_move_insn (temp, val); -- } -+ strcat (buf, p); -+ return buf; -+} - -- XEXP (x, 1) = temp; -- return x; -- } -+/* Return needed mode for entity in optimize_mode_switching pass. */ - -- else if (REG_P (XEXP (x, 1))) -- { -- rtx temp = gen_reg_rtx (Pmode); -- rtx val = force_operand (XEXP (x, 0), temp); -- if (val != temp) -- { -- val = convert_to_mode (Pmode, val, 1); -- emit_move_insn (temp, val); -- } -+static int -+ix86_dirflag_mode_needed (rtx_insn *insn) -+{ -+ if (CALL_P (insn)) -+ { -+ if (cfun->machine->func_type == TYPE_NORMAL) -+ return X86_DIRFLAG_ANY; -+ else -+ /* No need to emit CLD in interrupt handler for TARGET_CLD. */ -+ return TARGET_CLD ? X86_DIRFLAG_ANY : X86_DIRFLAG_RESET; -+ } - -- XEXP (x, 0) = temp; -- return x; -- } -+ if (recog_memoized (insn) < 0) -+ return X86_DIRFLAG_ANY; -+ -+ if (get_attr_type (insn) == TYPE_STR) -+ { -+ /* Emit cld instruction if stringops are used in the function. */ -+ if (cfun->machine->func_type == TYPE_NORMAL) -+ return TARGET_CLD ? X86_DIRFLAG_RESET : X86_DIRFLAG_ANY; -+ else -+ return X86_DIRFLAG_RESET; - } - -- return x; -+ return X86_DIRFLAG_ANY; - } -- --/* Print an integer constant expression in assembler syntax. Addition -- and subtraction are the only arithmetic that may appear in these -- expressions. FILE is the stdio stream to write to, X is the rtx, and -- CODE is the operand print code from the output string. */ - --static void --output_pic_addr_const (FILE *file, rtx x, int code) -+/* Check if a 256bit or 512 bit AVX register is referenced inside of EXP. */ -+ -+static bool -+ix86_check_avx_upper_register (const_rtx exp) - { -- char buf[256]; -+ return SSE_REG_P (exp) && GET_MODE_BITSIZE (GET_MODE (exp)) > 128; -+} - -- switch (GET_CODE (x)) -+/* Return needed mode for entity in optimize_mode_switching pass. */ -+ -+static int -+ix86_avx_u128_mode_needed (rtx_insn *insn) -+{ -+ if (CALL_P (insn)) - { -- case PC: -- gcc_assert (flag_pic); -- putc ('.', file); -- break; -+ rtx link; - -- case SYMBOL_REF: -- if (TARGET_64BIT || ! TARGET_MACHO_SYMBOL_STUBS) -- output_addr_const (file, x); -- else -+ /* Needed mode is set to AVX_U128_CLEAN if there are -+ no 256bit or 512bit modes used in function arguments. */ -+ for (link = CALL_INSN_FUNCTION_USAGE (insn); -+ link; -+ link = XEXP (link, 1)) - { -- const char *name = XSTR (x, 0); -- -- /* Mark the decl as referenced so that cgraph will -- output the function. */ -- if (SYMBOL_REF_DECL (x)) -- mark_decl_referenced (SYMBOL_REF_DECL (x)); -+ if (GET_CODE (XEXP (link, 0)) == USE) -+ { -+ rtx arg = XEXP (XEXP (link, 0), 0); - --#if TARGET_MACHO -- if (MACHOPIC_INDIRECT -- && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION) -- name = machopic_indirection_name (x, /*stub_p=*/true); --#endif -- assemble_name (file, name); -+ if (ix86_check_avx_upper_register (arg)) -+ return AVX_U128_DIRTY; -+ } - } -- if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF) -- && code == 'P' && ! SYMBOL_REF_LOCAL_P (x)) -- fputs ("@PLT", file); -- break; - -- case LABEL_REF: -- x = XEXP (x, 0); -- /* FALLTHRU */ -- case CODE_LABEL: -- ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x)); -- assemble_name (asm_out_file, buf); -- break; -+ return AVX_U128_CLEAN; -+ } - -- case CONST_INT: -- fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); -- break; -+ /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced. -+ Hardware changes state only when a 256bit register is written to, -+ but we need to prevent the compiler from moving optimal insertion -+ point above eventual read from 256bit or 512 bit register. */ -+ subrtx_iterator::array_type array; -+ FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST) -+ if (ix86_check_avx_upper_register (*iter)) -+ return AVX_U128_DIRTY; - -- case CONST: -- /* This used to output parentheses around the expression, -- but that does not work on the 386 (either ATT or BSD assembler). */ -- output_pic_addr_const (file, XEXP (x, 0), code); -- break; -+ return AVX_U128_ANY; -+} - -- case CONST_DOUBLE: -- /* We can't handle floating point constants; -- TARGET_PRINT_OPERAND must handle them. */ -- output_operand_lossage ("floating constant misused"); -- break; -+/* Return mode that i387 must be switched into -+ prior to the execution of insn. */ - -- case PLUS: -- /* Some assemblers need integer constants to appear first. */ -- if (CONST_INT_P (XEXP (x, 0))) -- { -- output_pic_addr_const (file, XEXP (x, 0), code); -- putc ('+', file); -- output_pic_addr_const (file, XEXP (x, 1), code); -- } -- else -- { -- gcc_assert (CONST_INT_P (XEXP (x, 1))); -- output_pic_addr_const (file, XEXP (x, 1), code); -- putc ('+', file); -- output_pic_addr_const (file, XEXP (x, 0), code); -- } -+static int -+ix86_i387_mode_needed (int entity, rtx_insn *insn) -+{ -+ enum attr_i387_cw mode; -+ -+ /* The mode UNINITIALIZED is used to store control word after a -+ function call or ASM pattern. The mode ANY specify that function -+ has no requirements on the control word and make no changes in the -+ bits we are interested in. */ -+ -+ if (CALL_P (insn) -+ || (NONJUMP_INSN_P (insn) -+ && (asm_noperands (PATTERN (insn)) >= 0 -+ || GET_CODE (PATTERN (insn)) == ASM_INPUT))) -+ return I387_CW_UNINITIALIZED; -+ -+ if (recog_memoized (insn) < 0) -+ return I387_CW_ANY; -+ -+ mode = get_attr_i387_cw (insn); -+ -+ switch (entity) -+ { -+ case I387_TRUNC: -+ if (mode == I387_CW_TRUNC) -+ return mode; - break; - -- case MINUS: -- if (!TARGET_MACHO) -- putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file); -- output_pic_addr_const (file, XEXP (x, 0), code); -- putc ('-', file); -- output_pic_addr_const (file, XEXP (x, 1), code); -- if (!TARGET_MACHO) -- putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file); -+ case I387_FLOOR: -+ if (mode == I387_CW_FLOOR) -+ return mode; - break; - -- case UNSPEC: -- gcc_assert (XVECLEN (x, 0) == 1); -- output_pic_addr_const (file, XVECEXP (x, 0, 0), code); -- switch (XINT (x, 1)) -- { -- case UNSPEC_GOT: -- fputs ("@GOT", file); -- break; -- case UNSPEC_GOTOFF: -- fputs ("@GOTOFF", file); -- break; -- case UNSPEC_PLTOFF: -- fputs ("@PLTOFF", file); -- break; -- case UNSPEC_PCREL: -- fputs (ASSEMBLER_DIALECT == ASM_ATT ? -- "(%rip)" : "[rip]", file); -- break; -- case UNSPEC_GOTPCREL: -- fputs (ASSEMBLER_DIALECT == ASM_ATT ? -- "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file); -- break; -- case UNSPEC_GOTTPOFF: -- /* FIXME: This might be @TPOFF in Sun ld too. */ -- fputs ("@gottpoff", file); -- break; -- case UNSPEC_TPOFF: -- fputs ("@tpoff", file); -- break; -- case UNSPEC_NTPOFF: -- if (TARGET_64BIT) -- fputs ("@tpoff", file); -- else -- fputs ("@ntpoff", file); -- break; -- case UNSPEC_DTPOFF: -- fputs ("@dtpoff", file); -- break; -- case UNSPEC_GOTNTPOFF: -- if (TARGET_64BIT) -- fputs (ASSEMBLER_DIALECT == ASM_ATT ? -- "@gottpoff(%rip)": "@gottpoff[rip]", file); -- else -- fputs ("@gotntpoff", file); -- break; -- case UNSPEC_INDNTPOFF: -- fputs ("@indntpoff", file); -- break; --#if TARGET_MACHO -- case UNSPEC_MACHOPIC_OFFSET: -- putc ('-', file); -- machopic_output_function_base_name (file); -- break; --#endif -- default: -- output_operand_lossage ("invalid UNSPEC as operand"); -- break; -- } -- break; -+ case I387_CEIL: -+ if (mode == I387_CW_CEIL) -+ return mode; -+ break; - - default: -- output_operand_lossage ("invalid expression as operand"); -+ gcc_unreachable (); - } -+ -+ return I387_CW_ANY; - } - --/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL. -- We need to emit DTP-relative relocations. */ -+/* Return mode that entity must be switched into -+ prior to the execution of insn. */ - --static void ATTRIBUTE_UNUSED --i386_output_dwarf_dtprel (FILE *file, int size, rtx x) -+static int -+ix86_mode_needed (int entity, rtx_insn *insn) - { -- fputs (ASM_LONG, file); -- output_addr_const (file, x); -- fputs ("@dtpoff", file); -- switch (size) -+ switch (entity) - { -- case 4: -- break; -- case 8: -- fputs (", 0", file); -- break; -+ case X86_DIRFLAG: -+ return ix86_dirflag_mode_needed (insn); -+ case AVX_U128: -+ return ix86_avx_u128_mode_needed (insn); -+ case I387_TRUNC: -+ case I387_FLOOR: -+ case I387_CEIL: -+ return ix86_i387_mode_needed (entity, insn); - default: - gcc_unreachable (); -- } -+ } -+ return 0; - } - --/* Return true if X is a representation of the PIC register. This copes -- with calls from ix86_find_base_term, where the register might have -- been replaced by a cselib value. */ -- --static bool --ix86_pic_register_p (rtx x) --{ -- if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x)) -- return (pic_offset_table_rtx -- && rtx_equal_for_cselib_p (x, pic_offset_table_rtx)); -- else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SET_GOT) -- return true; -- else if (!REG_P (x)) -- return false; -- else if (pic_offset_table_rtx) -+/* Check if a 256bit or 512bit AVX register is referenced in stores. */ -+ -+static void -+ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data) -+ { -+ if (ix86_check_avx_upper_register (dest)) - { -- if (REGNO (x) == REGNO (pic_offset_table_rtx)) -- return true; -- if (HARD_REGISTER_P (x) -- && !HARD_REGISTER_P (pic_offset_table_rtx) -- && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx)) -- return true; -- return false; -+ bool *used = (bool *) data; -+ *used = true; - } -- else -- return REGNO (x) == PIC_OFFSET_TABLE_REGNUM; --} -+ } - --/* Helper function for ix86_delegitimize_address. -- Attempt to delegitimize TLS local-exec accesses. */ -+/* Calculate mode of upper 128bit AVX registers after the insn. */ - --static rtx --ix86_delegitimize_tls_address (rtx orig_x) -+static int -+ix86_avx_u128_mode_after (int mode, rtx_insn *insn) - { -- rtx x = orig_x, unspec; -- struct ix86_address addr; -+ rtx pat = PATTERN (insn); - -- if (!TARGET_TLS_DIRECT_SEG_REFS) -- return orig_x; -- if (MEM_P (x)) -- x = XEXP (x, 0); -- if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode) -- return orig_x; -- if (ix86_decompose_address (x, &addr) == 0 -- || addr.seg != DEFAULT_TLS_SEG_REG -- || addr.disp == NULL_RTX -- || GET_CODE (addr.disp) != CONST) -- return orig_x; -- unspec = XEXP (addr.disp, 0); -- if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1))) -- unspec = XEXP (unspec, 0); -- if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF) -- return orig_x; -- x = XVECEXP (unspec, 0, 0); -- gcc_assert (GET_CODE (x) == SYMBOL_REF); -- if (unspec != XEXP (addr.disp, 0)) -- x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1)); -- if (addr.index) -+ if (vzeroupper_pattern (pat, VOIDmode) -+ || vzeroall_pattern (pat, VOIDmode)) -+ return AVX_U128_CLEAN; -+ -+ /* We know that state is clean after CALL insn if there are no -+ 256bit or 512bit registers used in the function return register. */ -+ if (CALL_P (insn)) - { -- rtx idx = addr.index; -- if (addr.scale != 1) -- idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale)); -- x = gen_rtx_PLUS (Pmode, idx, x); -+ bool avx_upper_reg_found = false; -+ note_stores (insn, ix86_check_avx_upper_stores, &avx_upper_reg_found); -+ -+ return avx_upper_reg_found ? AVX_U128_DIRTY : AVX_U128_CLEAN; - } -- if (addr.base) -- x = gen_rtx_PLUS (Pmode, addr.base, x); -- if (MEM_P (orig_x)) -- x = replace_equiv_address_nv (orig_x, x); -- return x; --} - --/* In the name of slightly smaller debug output, and to cater to -- general assembler lossage, recognize PIC+GOTOFF and turn it back -- into a direct symbol reference. -+ /* Otherwise, return current mode. Remember that if insn -+ references AVX 256bit or 512bit registers, the mode was already -+ changed to DIRTY from MODE_NEEDED. */ -+ return mode; -+} - -- On Darwin, this is necessary to avoid a crash, because Darwin -- has a different PIC label for each routine but the DWARF debugging -- information is not associated with any particular routine, so it's -- necessary to remove references to the PIC label from RTL stored by -- the DWARF output code. -+/* Return the mode that an insn results in. */ - -- This helper is used in the normal ix86_delegitimize_address -- entrypoint (e.g. used in the target delegitimization hook) and -- in ix86_find_base_term. As compile time memory optimization, we -- avoid allocating rtxes that will not change anything on the outcome -- of the callers (find_base_value and find_base_term). */ -+static int -+ix86_mode_after (int entity, int mode, rtx_insn *insn) -+{ -+ switch (entity) -+ { -+ case X86_DIRFLAG: -+ return mode; -+ case AVX_U128: -+ return ix86_avx_u128_mode_after (mode, insn); -+ case I387_TRUNC: -+ case I387_FLOOR: -+ case I387_CEIL: -+ return mode; -+ default: -+ gcc_unreachable (); -+ } -+} - --static inline rtx --ix86_delegitimize_address_1 (rtx x, bool base_term_p) -+static int -+ix86_dirflag_mode_entry (void) - { -- rtx orig_x = delegitimize_mem_from_attrs (x); -- /* addend is NULL or some rtx if x is something+GOTOFF where -- something doesn't include the PIC register. */ -- rtx addend = NULL_RTX; -- /* reg_addend is NULL or a multiple of some register. */ -- rtx reg_addend = NULL_RTX; -- /* const_addend is NULL or a const_int. */ -- rtx const_addend = NULL_RTX; -- /* This is the result, or NULL. */ -- rtx result = NULL_RTX; -+ /* For TARGET_CLD or in the interrupt handler we can't assume -+ direction flag state at function entry. */ -+ if (TARGET_CLD -+ || cfun->machine->func_type != TYPE_NORMAL) -+ return X86_DIRFLAG_ANY; - -- x = orig_x; -+ return X86_DIRFLAG_RESET; -+} - -- if (MEM_P (x)) -- x = XEXP (x, 0); -+static int -+ix86_avx_u128_mode_entry (void) -+{ -+ tree arg; - -- if (TARGET_64BIT) -+ /* Entry mode is set to AVX_U128_DIRTY if there are -+ 256bit or 512bit modes used in function arguments. */ -+ for (arg = DECL_ARGUMENTS (current_function_decl); arg; -+ arg = TREE_CHAIN (arg)) - { -- if (GET_CODE (x) == CONST -- && GET_CODE (XEXP (x, 0)) == PLUS -- && GET_MODE (XEXP (x, 0)) == Pmode -- && CONST_INT_P (XEXP (XEXP (x, 0), 1)) -- && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC -- && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL) -- { -- /* find_base_{value,term} only care about MEMs with arg_pointer_rtx -- base. A CONST can't be arg_pointer_rtx based. */ -- if (base_term_p && MEM_P (orig_x)) -- return orig_x; -- rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0); -- x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2); -- if (MEM_P (orig_x)) -- x = replace_equiv_address_nv (orig_x, x); -- return x; -- } -- -- if (GET_CODE (x) == CONST -- && GET_CODE (XEXP (x, 0)) == UNSPEC -- && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL -- || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL) -- && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)) -- { -- x = XVECEXP (XEXP (x, 0), 0, 0); -- if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x)) -- { -- x = lowpart_subreg (GET_MODE (orig_x), x, GET_MODE (x)); -- if (x == NULL_RTX) -- return orig_x; -- } -- return x; -- } -- -- if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC) -- return ix86_delegitimize_tls_address (orig_x); -+ rtx incoming = DECL_INCOMING_RTL (arg); - -- /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic -- and -mcmodel=medium -fpic. */ -+ if (incoming && ix86_check_avx_upper_register (incoming)) -+ return AVX_U128_DIRTY; - } - -- if (GET_CODE (x) != PLUS -- || GET_CODE (XEXP (x, 1)) != CONST) -- return ix86_delegitimize_tls_address (orig_x); -+ return AVX_U128_CLEAN; -+} - -- if (ix86_pic_register_p (XEXP (x, 0))) -- /* %ebx + GOT/GOTOFF */ -- ; -- else if (GET_CODE (XEXP (x, 0)) == PLUS) -- { -- /* %ebx + %reg * scale + GOT/GOTOFF */ -- reg_addend = XEXP (x, 0); -- if (ix86_pic_register_p (XEXP (reg_addend, 0))) -- reg_addend = XEXP (reg_addend, 1); -- else if (ix86_pic_register_p (XEXP (reg_addend, 1))) -- reg_addend = XEXP (reg_addend, 0); -- else -- { -- reg_addend = NULL_RTX; -- addend = XEXP (x, 0); -- } -- } -- else -- addend = XEXP (x, 0); -+/* Return a mode that ENTITY is assumed to be -+ switched to at function entry. */ - -- x = XEXP (XEXP (x, 1), 0); -- if (GET_CODE (x) == PLUS -- && CONST_INT_P (XEXP (x, 1))) -+static int -+ix86_mode_entry (int entity) -+{ -+ switch (entity) - { -- const_addend = XEXP (x, 1); -- x = XEXP (x, 0); -+ case X86_DIRFLAG: -+ return ix86_dirflag_mode_entry (); -+ case AVX_U128: -+ return ix86_avx_u128_mode_entry (); -+ case I387_TRUNC: -+ case I387_FLOOR: -+ case I387_CEIL: -+ return I387_CW_ANY; -+ default: -+ gcc_unreachable (); - } -+} - -- if (GET_CODE (x) == UNSPEC -- && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend) -- || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x)) -- || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC -- && !MEM_P (orig_x) && !addend))) -- result = XVECEXP (x, 0, 0); -+static int -+ix86_avx_u128_mode_exit (void) -+{ -+ rtx reg = crtl->return_rtx; - -- if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x) -- && !MEM_P (orig_x)) -- result = XVECEXP (x, 0, 0); -+ /* Exit mode is set to AVX_U128_DIRTY if there are 256bit -+ or 512 bit modes used in the function return register. */ -+ if (reg && ix86_check_avx_upper_register (reg)) -+ return AVX_U128_DIRTY; - -- if (! result) -- return ix86_delegitimize_tls_address (orig_x); -+ /* Exit mode is set to AVX_U128_DIRTY if there are 256bit or 512bit -+ modes used in function arguments, otherwise return AVX_U128_CLEAN. -+ */ -+ return ix86_avx_u128_mode_entry (); -+} - -- /* For (PLUS something CONST_INT) both find_base_{value,term} just -- recurse on the first operand. */ -- if (const_addend && !base_term_p) -- result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend)); -- if (reg_addend) -- result = gen_rtx_PLUS (Pmode, reg_addend, result); -- if (addend) -- { -- /* If the rest of original X doesn't involve the PIC register, add -- addend and subtract pic_offset_table_rtx. This can happen e.g. -- for code like: -- leal (%ebx, %ecx, 4), %ecx -- ... -- movl foo@GOTOFF(%ecx), %edx -- in which case we return (%ecx - %ebx) + foo -- or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg -- and reload has completed. Don't do the latter for debug, -- as _GLOBAL_OFFSET_TABLE_ can't be expressed in the assembly. */ -- if (pic_offset_table_rtx -- && (!reload_completed || !ix86_use_pseudo_pic_reg ())) -- result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend), -- pic_offset_table_rtx), -- result); -- else if (base_term_p -- && pic_offset_table_rtx -- && !TARGET_MACHO -- && !TARGET_VXWORKS_RTP) -- { -- rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME); -- tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp); -- result = gen_rtx_PLUS (Pmode, tmp, result); -- } -- else -- return orig_x; -- } -- if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x)) -+/* Return a mode that ENTITY is assumed to be -+ switched to at function exit. */ -+ -+static int -+ix86_mode_exit (int entity) -+{ -+ switch (entity) - { -- result = lowpart_subreg (GET_MODE (orig_x), result, Pmode); -- if (result == NULL_RTX) -- return orig_x; -+ case X86_DIRFLAG: -+ return X86_DIRFLAG_ANY; -+ case AVX_U128: -+ return ix86_avx_u128_mode_exit (); -+ case I387_TRUNC: -+ case I387_FLOOR: -+ case I387_CEIL: -+ return I387_CW_ANY; -+ default: -+ gcc_unreachable (); - } -- return result; - } - --/* The normal instantiation of the above template. */ -- --static rtx --ix86_delegitimize_address (rtx x) -+static int -+ix86_mode_priority (int, int n) - { -- return ix86_delegitimize_address_1 (x, false); -+ return n; - } - --/* If X is a machine specific address (i.e. a symbol or label being -- referenced as a displacement from the GOT implemented using an -- UNSPEC), then return the base term. Otherwise return X. */ -+/* Output code to initialize control word copies used by trunc?f?i and -+ rounding patterns. CURRENT_MODE is set to current control word, -+ while NEW_MODE is set to new control word. */ - --rtx --ix86_find_base_term (rtx x) -+static void -+emit_i387_cw_initialization (int mode) - { -- rtx term; -+ rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED); -+ rtx new_mode; - -- if (TARGET_64BIT) -- { -- if (GET_CODE (x) != CONST) -- return x; -- term = XEXP (x, 0); -- if (GET_CODE (term) == PLUS -- && CONST_INT_P (XEXP (term, 1))) -- term = XEXP (term, 0); -- if (GET_CODE (term) != UNSPEC -- || (XINT (term, 1) != UNSPEC_GOTPCREL -- && XINT (term, 1) != UNSPEC_PCREL)) -- return x; -+ enum ix86_stack_slot slot; - -- return XVECEXP (term, 0, 0); -- } -+ rtx reg = gen_reg_rtx (HImode); - -- return ix86_delegitimize_address_1 (x, true); --} -- --/* Return true if X shouldn't be emitted into the debug info. -- Disallow UNSPECs other than @gotoff - we can't emit _GLOBAL_OFFSET_TABLE_ -- symbol easily into the .debug_info section, so we need not to -- delegitimize, but instead assemble as @gotoff. -- Disallow _GLOBAL_OFFSET_TABLE_ SYMBOL_REF - the assembler magically -- assembles that as _GLOBAL_OFFSET_TABLE_-. expression. */ -- --static bool --ix86_const_not_ok_for_debug_p (rtx x) --{ -- if (GET_CODE (x) == UNSPEC && XINT (x, 1) != UNSPEC_GOTOFF) -- return true; -- -- if (SYMBOL_REF_P (x) && strcmp (XSTR (x, 0), GOT_SYMBOL_NAME) == 0) -- return true; -- -- return false; --} -- --static void --put_condition_code (enum rtx_code code, machine_mode mode, bool reverse, -- bool fp, FILE *file) --{ -- const char *suffix; -- -- if (mode == CCFPmode) -- { -- code = ix86_fp_compare_code_to_integer (code); -- mode = CCmode; -- } -- if (reverse) -- code = reverse_condition (code); -+ emit_insn (gen_x86_fnstcw_1 (stored_mode)); -+ emit_move_insn (reg, copy_rtx (stored_mode)); - -- switch (code) -+ switch (mode) - { -- case EQ: -- gcc_assert (mode != CCGZmode); -- switch (mode) -- { -- case E_CCAmode: -- suffix = "a"; -- break; -- case E_CCCmode: -- suffix = "c"; -- break; -- case E_CCOmode: -- suffix = "o"; -- break; -- case E_CCPmode: -- suffix = "p"; -- break; -- case E_CCSmode: -- suffix = "s"; -- break; -- default: -- suffix = "e"; -- break; -- } -- break; -- case NE: -- gcc_assert (mode != CCGZmode); -- switch (mode) -- { -- case E_CCAmode: -- suffix = "na"; -- break; -- case E_CCCmode: -- suffix = "nc"; -- break; -- case E_CCOmode: -- suffix = "no"; -- break; -- case E_CCPmode: -- suffix = "np"; -- break; -- case E_CCSmode: -- suffix = "ns"; -- break; -- default: -- suffix = "ne"; -- break; -- } -+ case I387_CW_TRUNC: -+ /* round toward zero (truncate) */ -+ emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00))); -+ slot = SLOT_CW_TRUNC; - break; -- case GT: -- gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode); -- suffix = "g"; -+ -+ case I387_CW_FLOOR: -+ /* round down toward -oo */ -+ emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00))); -+ emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400))); -+ slot = SLOT_CW_FLOOR; - break; -- case GTU: -- /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers. -- Those same assemblers have the same but opposite lossage on cmov. */ -- if (mode == CCmode) -- suffix = fp ? "nbe" : "a"; -- else -- gcc_unreachable (); -+ -+ case I387_CW_CEIL: -+ /* round up toward +oo */ -+ emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00))); -+ emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800))); -+ slot = SLOT_CW_CEIL; - break; -- case LT: -- switch (mode) -- { -- case E_CCNOmode: -- case E_CCGOCmode: -- suffix = "s"; -- break; - -- case E_CCmode: -- case E_CCGCmode: -- case E_CCGZmode: -- suffix = "l"; -- break; -+ default: -+ gcc_unreachable (); -+ } - -- default: -- gcc_unreachable (); -- } -- break; -- case LTU: -- if (mode == CCmode || mode == CCGZmode) -- suffix = "b"; -- else if (mode == CCCmode) -- suffix = fp ? "b" : "c"; -- else -- gcc_unreachable (); -- break; -- case GE: -- switch (mode) -- { -- case E_CCNOmode: -- case E_CCGOCmode: -- suffix = "ns"; -- break; -+ gcc_assert (slot < MAX_386_STACK_LOCALS); - -- case E_CCmode: -- case E_CCGCmode: -- case E_CCGZmode: -- suffix = "ge"; -- break; -+ new_mode = assign_386_stack_local (HImode, slot); -+ emit_move_insn (new_mode, reg); -+} - -- default: -- gcc_unreachable (); -- } -- break; -- case GEU: -- if (mode == CCmode || mode == CCGZmode) -- suffix = "nb"; -- else if (mode == CCCmode) -- suffix = fp ? "nb" : "nc"; -- else -- gcc_unreachable (); -- break; -- case LE: -- gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode); -- suffix = "le"; -- break; -- case LEU: -- if (mode == CCmode) -- suffix = "be"; -- else -- gcc_unreachable (); -+/* Generate one or more insns to set ENTITY to MODE. */ -+ -+static void -+ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED, -+ HARD_REG_SET regs_live ATTRIBUTE_UNUSED) -+{ -+ switch (entity) -+ { -+ case X86_DIRFLAG: -+ if (mode == X86_DIRFLAG_RESET) -+ emit_insn (gen_cld ()); - break; -- case UNORDERED: -- suffix = fp ? "u" : "p"; -+ case AVX_U128: -+ if (mode == AVX_U128_CLEAN) -+ emit_insn (gen_avx_vzeroupper ()); - break; -- case ORDERED: -- suffix = fp ? "nu" : "np"; -+ case I387_TRUNC: -+ case I387_FLOOR: -+ case I387_CEIL: -+ if (mode != I387_CW_ANY -+ && mode != I387_CW_UNINITIALIZED) -+ emit_i387_cw_initialization (mode); - break; - default: - gcc_unreachable (); - } -- fputs (suffix, file); - } - --/* Print the name of register X to FILE based on its machine mode and number. -- If CODE is 'w', pretend the mode is HImode. -- If CODE is 'b', pretend the mode is QImode. -- If CODE is 'k', pretend the mode is SImode. -- If CODE is 'q', pretend the mode is DImode. -- If CODE is 'x', pretend the mode is V4SFmode. -- If CODE is 't', pretend the mode is V8SFmode. -- If CODE is 'g', pretend the mode is V16SFmode. -- If CODE is 'h', pretend the reg is the 'high' byte register. -- If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. -- If CODE is 'd', duplicate the operand for AVX instruction. -- If CODE is 'V', print naked full integer register name without %. -- */ -+/* Output code for INSN to convert a float to a signed int. OPERANDS -+ are the insn operands. The output may be [HSD]Imode and the input -+ operand may be [SDX]Fmode. */ - --void --print_reg (rtx x, int code, FILE *file) -+const char * -+output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp) - { -- const char *reg; -- int msize; -- unsigned int regno; -- bool duplicated; -+ bool stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG); -+ bool dimode_p = GET_MODE (operands[0]) == DImode; -+ int round_mode = get_attr_i387_cw (insn); - -- if (ASSEMBLER_DIALECT == ASM_ATT && code != 'V') -- putc ('%', file); -+ static char buf[40]; -+ const char *p; - -- if (x == pc_rtx) -- { -- gcc_assert (TARGET_64BIT); -- fputs ("rip", file); -- return; -- } -+ /* Jump through a hoop or two for DImode, since the hardware has no -+ non-popping instruction. We used to do this a different way, but -+ that was somewhat fragile and broke with post-reload splitters. */ -+ if ((dimode_p || fisttp) && !stack_top_dies) -+ output_asm_insn ("fld\t%y1", operands); - -- if (code == 'y' && STACK_TOP_P (x)) -- { -- fputs ("st(0)", file); -- return; -- } -+ gcc_assert (STACK_TOP_P (operands[1])); -+ gcc_assert (MEM_P (operands[0])); -+ gcc_assert (GET_MODE (operands[1]) != TFmode); - -- if (code == 'w') -- msize = 2; -- else if (code == 'b') -- msize = 1; -- else if (code == 'k') -- msize = 4; -- else if (code == 'q') -- msize = 8; -- else if (code == 'h') -- msize = 0; -- else if (code == 'x') -- msize = 16; -- else if (code == 't') -- msize = 32; -- else if (code == 'g') -- msize = 64; -- else -- msize = GET_MODE_SIZE (GET_MODE (x)); -- -- regno = REGNO (x); -- -- if (regno == ARG_POINTER_REGNUM -- || regno == FRAME_POINTER_REGNUM -- || regno == FPSR_REG) -- { -- output_operand_lossage -- ("invalid use of register '%s'", reg_names[regno]); -- return; -- } -- else if (regno == FLAGS_REG) -- { -- output_operand_lossage ("invalid use of asm flag output"); -- return; -- } -+ if (fisttp) -+ return "fisttp%Z0\t%0"; - -- if (code == 'V') -- { -- if (GENERAL_REGNO_P (regno)) -- msize = GET_MODE_SIZE (word_mode); -- else -- error ("% modifier on non-integer register"); -- } -+ strcpy (buf, "fist"); - -- duplicated = code == 'd' && TARGET_AVX; -+ if (round_mode != I387_CW_ANY) -+ output_asm_insn ("fldcw\t%3", operands); - -- switch (msize) -- { -- case 16: -- case 12: -- case 8: -- if (GENERAL_REGNO_P (regno) && msize > GET_MODE_SIZE (word_mode)) -- warning (0, "unsupported size for integer register"); -- /* FALLTHRU */ -- case 4: -- if (LEGACY_INT_REGNO_P (regno)) -- putc (msize > 4 && TARGET_64BIT ? 'r' : 'e', file); -- /* FALLTHRU */ -- case 2: -- normal: -- reg = hi_reg_name[regno]; -- break; -- case 1: -- if (regno >= ARRAY_SIZE (qi_reg_name)) -- goto normal; -- if (!ANY_QI_REGNO_P (regno)) -- error ("unsupported size for integer register"); -- reg = qi_reg_name[regno]; -- break; -- case 0: -- if (regno >= ARRAY_SIZE (qi_high_reg_name)) -- goto normal; -- reg = qi_high_reg_name[regno]; -- break; -- case 32: -- case 64: -- if (SSE_REGNO_P (regno)) -- { -- gcc_assert (!duplicated); -- putc (msize == 32 ? 'y' : 'z', file); -- reg = hi_reg_name[regno] + 1; -- break; -- } -- goto normal; -- default: -- gcc_unreachable (); -- } -+ p = "p%Z0\t%0"; -+ strcat (buf, p + !(stack_top_dies || dimode_p)); - -- fputs (reg, file); -+ output_asm_insn (buf, operands); - -- /* Irritatingly, AMD extended registers use -- different naming convention: "r%d[bwd]" */ -- if (REX_INT_REGNO_P (regno)) -- { -- gcc_assert (TARGET_64BIT); -- switch (msize) -- { -- case 0: -- error ("extended registers have no high halves"); -- break; -- case 1: -- putc ('b', file); -- break; -- case 2: -- putc ('w', file); -- break; -- case 4: -- putc ('d', file); -- break; -- case 8: -- /* no suffix */ -- break; -- default: -- error ("unsupported operand size for extended register"); -- break; -- } -- return; -- } -+ if (round_mode != I387_CW_ANY) -+ output_asm_insn ("fldcw\t%2", operands); - -- if (duplicated) -- { -- if (ASSEMBLER_DIALECT == ASM_ATT) -- fprintf (file, ", %%%s", reg); -- else -- fprintf (file, ", %s", reg); -- } -+ return ""; - } - --/* Meaning of CODE: -- L,W,B,Q,S,T -- print the opcode suffix for specified size of operand. -- C -- print opcode suffix for set/cmov insn. -- c -- like C, but print reversed condition -- F,f -- likewise, but for floating-point. -- O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.", -- otherwise nothing -- R -- print embedded rounding and sae. -- r -- print only sae. -- z -- print the opcode suffix for the size of the current operand. -- Z -- likewise, with special suffixes for x87 instructions. -- * -- print a star (in certain assembler syntax) -- A -- print an absolute memory reference. -- E -- print address with DImode register names if TARGET_64BIT. -- w -- print the operand as if it's a "word" (HImode) even if it isn't. -- s -- print a shift double count, followed by the assemblers argument -- delimiter. -- b -- print the QImode name of the register for the indicated operand. -- %b0 would print %al if operands[0] is reg 0. -- w -- likewise, print the HImode name of the register. -- k -- likewise, print the SImode name of the register. -- q -- likewise, print the DImode name of the register. -- x -- likewise, print the V4SFmode name of the register. -- t -- likewise, print the V8SFmode name of the register. -- g -- likewise, print the V16SFmode name of the register. -- h -- print the QImode name for a "high" register, either ah, bh, ch or dh. -- y -- print "st(0)" instead of "st" as a register. -- d -- print duplicated register operand for AVX instruction. -- D -- print condition for SSE cmp instruction. -- P -- if PIC, print an @PLT suffix. -- p -- print raw symbol name. -- X -- don't print any sort of PIC '@' suffix for a symbol. -- & -- print some in-use local-dynamic symbol name. -- H -- print a memory address offset by 8; used for sse high-parts -- Y -- print condition for XOP pcom* instruction. -- V -- print naked full integer register name without %. -- + -- print a branch hint as 'cs' or 'ds' prefix -- ; -- print a semicolon (after prefixes due to bug in older gas). -- ~ -- print "i" if TARGET_AVX2, "f" otherwise. -- ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode -- M -- print addr32 prefix for TARGET_X32 with VSIB address. -- ! -- print NOTRACK prefix for jxx/call/ret instructions if required. -- */ -+/* Output code for x87 ffreep insn. The OPNO argument, which may only -+ have the values zero or one, indicates the ffreep insn's operand -+ from the OPERANDS array. */ - --void --ix86_print_operand (FILE *file, rtx x, int code) -+static const char * -+output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno) - { -- if (code) -+ if (TARGET_USE_FFREEP) -+#ifdef HAVE_AS_IX86_FFREEP -+ return opno ? "ffreep\t%y1" : "ffreep\t%y0"; -+#else - { -- switch (code) -- { -- case 'A': -- switch (ASSEMBLER_DIALECT) -- { -- case ASM_ATT: -- putc ('*', file); -- break; -- -- case ASM_INTEL: -- /* Intel syntax. For absolute addresses, registers should not -- be surrounded by braces. */ -- if (!REG_P (x)) -- { -- putc ('[', file); -- ix86_print_operand (file, x, 0); -- putc (']', file); -- return; -- } -- break; -+ static char retval[32]; -+ int regno = REGNO (operands[opno]); - -- default: -- gcc_unreachable (); -- } -+ gcc_assert (STACK_REGNO_P (regno)); - -- ix86_print_operand (file, x, 0); -- return; -+ regno -= FIRST_STACK_REG; - -- case 'E': -- /* Wrap address in an UNSPEC to declare special handling. */ -- if (TARGET_64BIT) -- x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR); -+ snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno); -+ return retval; -+ } -+#endif - -- output_address (VOIDmode, x); -- return; -+ return opno ? "fstp\t%y1" : "fstp\t%y0"; -+} - -- case 'L': -- if (ASSEMBLER_DIALECT == ASM_ATT) -- putc ('l', file); -- return; - -- case 'W': -- if (ASSEMBLER_DIALECT == ASM_ATT) -- putc ('w', file); -- return; -+/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi -+ should be used. UNORDERED_P is true when fucom should be used. */ - -- case 'B': -- if (ASSEMBLER_DIALECT == ASM_ATT) -- putc ('b', file); -- return; -+const char * -+output_fp_compare (rtx_insn *insn, rtx *operands, -+ bool eflags_p, bool unordered_p) -+{ -+ rtx *xops = eflags_p ? &operands[0] : &operands[1]; -+ bool stack_top_dies; - -- case 'Q': -- if (ASSEMBLER_DIALECT == ASM_ATT) -- putc ('l', file); -- return; -+ static char buf[40]; -+ const char *p; - -- case 'S': -- if (ASSEMBLER_DIALECT == ASM_ATT) -- putc ('s', file); -- return; -+ gcc_assert (STACK_TOP_P (xops[0])); - -- case 'T': -- if (ASSEMBLER_DIALECT == ASM_ATT) -- putc ('t', file); -- return; -+ stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG); - -- case 'O': --#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX -- if (ASSEMBLER_DIALECT != ASM_ATT) -- return; -- -- switch (GET_MODE_SIZE (GET_MODE (x))) -- { -- case 2: -- putc ('w', file); -- break; -- -- case 4: -- putc ('l', file); -- break; -+ if (eflags_p) -+ { -+ p = unordered_p ? "fucomi" : "fcomi"; -+ strcpy (buf, p); - -- case 8: -- putc ('q', file); -- break; -+ p = "p\t{%y1, %0|%0, %y1}"; -+ strcat (buf, p + !stack_top_dies); - -- default: -- output_operand_lossage ("invalid operand size for operand " -- "code 'O'"); -- return; -- } -+ return buf; -+ } - -- putc ('.', file); --#endif -- return; -+ if (STACK_REG_P (xops[1]) -+ && stack_top_dies -+ && find_regno_note (insn, REG_DEAD, FIRST_STACK_REG + 1)) -+ { -+ gcc_assert (REGNO (xops[1]) == FIRST_STACK_REG + 1); - -- case 'z': -- if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) -- { -- /* Opcodes don't get size suffixes if using Intel opcodes. */ -- if (ASSEMBLER_DIALECT == ASM_INTEL) -- return; -+ /* If both the top of the 387 stack die, and the other operand -+ is also a stack register that dies, then this must be a -+ `fcompp' float compare. */ -+ p = unordered_p ? "fucompp" : "fcompp"; -+ strcpy (buf, p); -+ } -+ else if (const0_operand (xops[1], VOIDmode)) -+ { -+ gcc_assert (!unordered_p); -+ strcpy (buf, "ftst"); -+ } -+ else -+ { -+ if (GET_MODE_CLASS (GET_MODE (xops[1])) == MODE_INT) -+ { -+ gcc_assert (!unordered_p); -+ p = "ficom"; -+ } -+ else -+ p = unordered_p ? "fucom" : "fcom"; - -- switch (GET_MODE_SIZE (GET_MODE (x))) -- { -- case 1: -- putc ('b', file); -- return; -+ strcpy (buf, p); - -- case 2: -- putc ('w', file); -- return; -+ p = "p%Z2\t%y2"; -+ strcat (buf, p + !stack_top_dies); -+ } - -- case 4: -- putc ('l', file); -- return; -+ output_asm_insn (buf, operands); -+ return "fnstsw\t%0"; -+} - -- case 8: -- putc ('q', file); -- return; -+void -+ix86_output_addr_vec_elt (FILE *file, int value) -+{ -+ const char *directive = ASM_LONG; - -- default: -- output_operand_lossage ("invalid operand size for operand " -- "code 'z'"); -- return; -- } -- } -+#ifdef ASM_QUAD -+ if (TARGET_LP64) -+ directive = ASM_QUAD; -+#else -+ gcc_assert (!TARGET_64BIT); -+#endif - -- if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) -- warning (0, "non-integer operand used with operand code %"); -- /* FALLTHRU */ -+ fprintf (file, "%s%s%d\n", directive, LPREFIX, value); -+} - -- case 'Z': -- /* 387 opcodes don't get size suffixes if using Intel opcodes. */ -- if (ASSEMBLER_DIALECT == ASM_INTEL) -- return; -+void -+ix86_output_addr_diff_elt (FILE *file, int value, int rel) -+{ -+ const char *directive = ASM_LONG; - -- if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) -- { -- switch (GET_MODE_SIZE (GET_MODE (x))) -- { -- case 2: --#ifdef HAVE_AS_IX86_FILDS -- putc ('s', file); -+#ifdef ASM_QUAD -+ if (TARGET_64BIT && CASE_VECTOR_MODE == DImode) -+ directive = ASM_QUAD; -+#else -+ gcc_assert (!TARGET_64BIT); - #endif -- return; -+ /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */ -+ if (TARGET_64BIT || TARGET_VXWORKS_RTP) -+ fprintf (file, "%s%s%d-%s%d\n", -+ directive, LPREFIX, value, LPREFIX, rel); -+#if TARGET_MACHO -+ else if (TARGET_MACHO) -+ { -+ fprintf (file, ASM_LONG "%s%d-", LPREFIX, value); -+ machopic_output_function_base_name (file); -+ putc ('\n', file); -+ } -+#endif -+ else if (HAVE_AS_GOTOFF_IN_DATA) -+ fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value); -+ else -+ asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n", -+ GOT_SYMBOL_NAME, LPREFIX, value); -+} -+ -+#define LEA_MAX_STALL (3) -+#define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1) - -- case 4: -- putc ('l', file); -- return; -+/* Increase given DISTANCE in half-cycles according to -+ dependencies between PREV and NEXT instructions. -+ Add 1 half-cycle if there is no dependency and -+ go to next cycle if there is some dependecy. */ - -- case 8: --#ifdef HAVE_AS_IX86_FILDQ -- putc ('q', file); --#else -- fputs ("ll", file); --#endif -- return; -+static unsigned int -+increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance) -+{ -+ df_ref def, use; - -- default: -- break; -- } -- } -- else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) -- { -- /* 387 opcodes don't get size suffixes -- if the operands are registers. */ -- if (STACK_REG_P (x)) -- return; -+ if (!prev || !next) -+ return distance + (distance & 1) + 2; - -- switch (GET_MODE_SIZE (GET_MODE (x))) -- { -- case 4: -- putc ('s', file); -- return; -+ if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev)) -+ return distance + 1; - -- case 8: -- putc ('l', file); -- return; -+ FOR_EACH_INSN_USE (use, next) -+ FOR_EACH_INSN_DEF (def, prev) -+ if (!DF_REF_IS_ARTIFICIAL (def) -+ && DF_REF_REGNO (use) == DF_REF_REGNO (def)) -+ return distance + (distance & 1) + 2; - -- case 12: -- case 16: -- putc ('t', file); -- return; -+ return distance + 1; -+} - -- default: -- break; -- } -- } -- else -- { -- output_operand_lossage ("invalid operand type used with " -- "operand code 'Z'"); -- return; -- } -+/* Function checks if instruction INSN defines register number -+ REGNO1 or REGNO2. */ - -- output_operand_lossage ("invalid operand size for operand code 'Z'"); -- return; -+bool -+insn_defines_reg (unsigned int regno1, unsigned int regno2, -+ rtx_insn *insn) -+{ -+ df_ref def; - -- case 'd': -- case 'b': -- case 'w': -- case 'k': -- case 'q': -- case 'h': -- case 't': -- case 'g': -- case 'y': -- case 'x': -- case 'X': -- case 'P': -- case 'p': -- case 'V': -- break; -+ FOR_EACH_INSN_DEF (def, insn) -+ if (DF_REF_REG_DEF_P (def) -+ && !DF_REF_IS_ARTIFICIAL (def) -+ && (regno1 == DF_REF_REGNO (def) -+ || regno2 == DF_REF_REGNO (def))) -+ return true; - -- case 's': -- if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT) -- { -- ix86_print_operand (file, x, 0); -- fputs (", ", file); -- } -- return; -+ return false; -+} - -- case 'Y': -- switch (GET_CODE (x)) -- { -- case NE: -- fputs ("neq", file); -- break; -- case EQ: -- fputs ("eq", file); -- break; -- case GE: -- case GEU: -- fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file); -- break; -- case GT: -- case GTU: -- fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file); -- break; -- case LE: -- case LEU: -- fputs ("le", file); -- break; -- case LT: -- case LTU: -- fputs ("lt", file); -- break; -- case UNORDERED: -- fputs ("unord", file); -- break; -- case ORDERED: -- fputs ("ord", file); -- break; -- case UNEQ: -- fputs ("ueq", file); -- break; -- case UNGE: -- fputs ("nlt", file); -- break; -- case UNGT: -- fputs ("nle", file); -- break; -- case UNLE: -- fputs ("ule", file); -- break; -- case UNLT: -- fputs ("ult", file); -- break; -- case LTGT: -- fputs ("une", file); -- break; -- default: -- output_operand_lossage ("operand is not a condition code, " -- "invalid operand code 'Y'"); -- return; -- } -- return; -+/* Function checks if instruction INSN uses register number -+ REGNO as a part of address expression. */ - -- case 'D': -- /* Little bit of braindamage here. The SSE compare instructions -- does use completely different names for the comparisons that the -- fp conditional moves. */ -- switch (GET_CODE (x)) -- { -- case UNEQ: -- if (TARGET_AVX) -- { -- fputs ("eq_us", file); -- break; -- } -- /* FALLTHRU */ -- case EQ: -- fputs ("eq", file); -- break; -- case UNLT: -- if (TARGET_AVX) -- { -- fputs ("nge", file); -- break; -- } -- /* FALLTHRU */ -- case LT: -- fputs ("lt", file); -- break; -- case UNLE: -- if (TARGET_AVX) -- { -- fputs ("ngt", file); -- break; -- } -- /* FALLTHRU */ -- case LE: -- fputs ("le", file); -- break; -- case UNORDERED: -- fputs ("unord", file); -- break; -- case LTGT: -- if (TARGET_AVX) -- { -- fputs ("neq_oq", file); -- break; -- } -- /* FALLTHRU */ -- case NE: -- fputs ("neq", file); -- break; -- case GE: -- if (TARGET_AVX) -- { -- fputs ("ge", file); -- break; -- } -- /* FALLTHRU */ -- case UNGE: -- fputs ("nlt", file); -- break; -- case GT: -- if (TARGET_AVX) -- { -- fputs ("gt", file); -- break; -- } -- /* FALLTHRU */ -- case UNGT: -- fputs ("nle", file); -- break; -- case ORDERED: -- fputs ("ord", file); -- break; -- default: -- output_operand_lossage ("operand is not a condition code, " -- "invalid operand code 'D'"); -- return; -- } -- return; -+static bool -+insn_uses_reg_mem (unsigned int regno, rtx insn) -+{ -+ df_ref use; - -- case 'F': -- case 'f': --#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX -- if (ASSEMBLER_DIALECT == ASM_ATT) -- putc ('.', file); -- gcc_fallthrough (); --#endif -+ FOR_EACH_INSN_USE (use, insn) -+ if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use)) -+ return true; - -- case 'C': -- case 'c': -- if (!COMPARISON_P (x)) -- { -- output_operand_lossage ("operand is not a condition code, " -- "invalid operand code '%c'", code); -- return; -- } -- put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), -- code == 'c' || code == 'f', -- code == 'F' || code == 'f', -- file); -- return; -+ return false; -+} - -- case 'H': -- if (!offsettable_memref_p (x)) -- { -- output_operand_lossage ("operand is not an offsettable memory " -- "reference, invalid operand code 'H'"); -- return; -- } -- /* It doesn't actually matter what mode we use here, as we're -- only going to use this for printing. */ -- x = adjust_address_nv (x, DImode, 8); -- /* Output 'qword ptr' for intel assembler dialect. */ -- if (ASSEMBLER_DIALECT == ASM_INTEL) -- code = 'q'; -- break; -+/* Search backward for non-agu definition of register number REGNO1 -+ or register number REGNO2 in basic block starting from instruction -+ START up to head of basic block or instruction INSN. - -- case 'K': -- if (!CONST_INT_P (x)) -- { -- output_operand_lossage ("operand is not an integer, invalid " -- "operand code 'K'"); -- return; -- } -+ Function puts true value into *FOUND var if definition was found -+ and false otherwise. - -- if (INTVAL (x) & IX86_HLE_ACQUIRE) --#ifdef HAVE_AS_IX86_HLE -- fputs ("xacquire ", file); --#else -- fputs ("\n" ASM_BYTE "0xf2\n\t", file); --#endif -- else if (INTVAL (x) & IX86_HLE_RELEASE) --#ifdef HAVE_AS_IX86_HLE -- fputs ("xrelease ", file); --#else -- fputs ("\n" ASM_BYTE "0xf3\n\t", file); --#endif -- /* We do not want to print value of the operand. */ -- return; -+ Distance in half-cycles between START and found instruction or head -+ of BB is added to DISTANCE and returned. */ - -- case 'N': -- if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x))) -- fputs ("{z}", file); -- return; -+static int -+distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2, -+ rtx_insn *insn, int distance, -+ rtx_insn *start, bool *found) -+{ -+ basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL; -+ rtx_insn *prev = start; -+ rtx_insn *next = NULL; - -- case 'r': -- if (!CONST_INT_P (x) || INTVAL (x) != ROUND_SAE) -+ *found = false; -+ -+ while (prev -+ && prev != insn -+ && distance < LEA_SEARCH_THRESHOLD) -+ { -+ if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev)) -+ { -+ distance = increase_distance (prev, next, distance); -+ if (insn_defines_reg (regno1, regno2, prev)) - { -- output_operand_lossage ("operand is not a specific integer, " -- "invalid operand code 'r'"); -- return; -+ if (recog_memoized (prev) < 0 -+ || get_attr_type (prev) != TYPE_LEA) -+ { -+ *found = true; -+ return distance; -+ } - } - -- if (ASSEMBLER_DIALECT == ASM_INTEL) -- fputs (", ", file); -+ next = prev; -+ } -+ if (prev == BB_HEAD (bb)) -+ break; - -- fputs ("{sae}", file); -+ prev = PREV_INSN (prev); -+ } - -- if (ASSEMBLER_DIALECT == ASM_ATT) -- fputs (", ", file); -+ return distance; -+} - -- return; -+/* Search backward for non-agu definition of register number REGNO1 -+ or register number REGNO2 in INSN's basic block until -+ 1. Pass LEA_SEARCH_THRESHOLD instructions, or -+ 2. Reach neighbor BBs boundary, or -+ 3. Reach agu definition. -+ Returns the distance between the non-agu definition point and INSN. -+ If no definition point, returns -1. */ - -- case 'R': -- if (!CONST_INT_P (x)) -- { -- output_operand_lossage ("operand is not an integer, invalid " -- "operand code 'R'"); -- return; -- } -+static int -+distance_non_agu_define (unsigned int regno1, unsigned int regno2, -+ rtx_insn *insn) -+{ -+ basic_block bb = BLOCK_FOR_INSN (insn); -+ int distance = 0; -+ bool found = false; - -- if (ASSEMBLER_DIALECT == ASM_INTEL) -- fputs (", ", file); -+ if (insn != BB_HEAD (bb)) -+ distance = distance_non_agu_define_in_bb (regno1, regno2, insn, -+ distance, PREV_INSN (insn), -+ &found); - -- switch (INTVAL (x)) -+ if (!found && distance < LEA_SEARCH_THRESHOLD) -+ { -+ edge e; -+ edge_iterator ei; -+ bool simple_loop = false; -+ -+ FOR_EACH_EDGE (e, ei, bb->preds) -+ if (e->src == bb) -+ { -+ simple_loop = true; -+ break; -+ } -+ -+ if (simple_loop) -+ distance = distance_non_agu_define_in_bb (regno1, regno2, -+ insn, distance, -+ BB_END (bb), &found); -+ else -+ { -+ int shortest_dist = -1; -+ bool found_in_bb = false; -+ -+ FOR_EACH_EDGE (e, ei, bb->preds) - { -- case ROUND_NEAREST_INT | ROUND_SAE: -- fputs ("{rn-sae}", file); -- break; -- case ROUND_NEG_INF | ROUND_SAE: -- fputs ("{rd-sae}", file); -- break; -- case ROUND_POS_INF | ROUND_SAE: -- fputs ("{ru-sae}", file); -- break; -- case ROUND_ZERO | ROUND_SAE: -- fputs ("{rz-sae}", file); -- break; -- default: -- output_operand_lossage ("operand is not a specific integer, " -- "invalid operand code 'R'"); -- } -+ int bb_dist -+ = distance_non_agu_define_in_bb (regno1, regno2, -+ insn, distance, -+ BB_END (e->src), -+ &found_in_bb); -+ if (found_in_bb) -+ { -+ if (shortest_dist < 0) -+ shortest_dist = bb_dist; -+ else if (bb_dist > 0) -+ shortest_dist = MIN (bb_dist, shortest_dist); - -- if (ASSEMBLER_DIALECT == ASM_ATT) -- fputs (", ", file); -+ found = true; -+ } -+ } - -- return; -+ distance = shortest_dist; -+ } -+ } - -- case '*': -- if (ASSEMBLER_DIALECT == ASM_ATT) -- putc ('*', file); -- return; -+ /* get_attr_type may modify recog data. We want to make sure -+ that recog data is valid for instruction INSN, on which -+ distance_non_agu_define is called. INSN is unchanged here. */ -+ extract_insn_cached (insn); - -- case '&': -- { -- const char *name = get_some_local_dynamic_name (); -- if (name == NULL) -- output_operand_lossage ("'%%&' used without any " -- "local dynamic TLS references"); -- else -- assemble_name (file, name); -- return; -- } -+ if (!found) -+ return -1; - -- case '+': -- { -- rtx x; -+ return distance >> 1; -+} - -- if (!optimize -- || optimize_function_for_size_p (cfun) -- || !TARGET_BRANCH_PREDICTION_HINTS) -- return; -+/* Return the distance in half-cycles between INSN and the next -+ insn that uses register number REGNO in memory address added -+ to DISTANCE. Return -1 if REGNO0 is set. - -- x = find_reg_note (current_output_insn, REG_BR_PROB, 0); -- if (x) -- { -- int pred_val = profile_probability::from_reg_br_prob_note -- (XINT (x, 0)).to_reg_br_prob_base (); -+ Put true value into *FOUND if register usage was found and -+ false otherwise. -+ Put true value into *REDEFINED if register redefinition was -+ found and false otherwise. */ - -- if (pred_val < REG_BR_PROB_BASE * 45 / 100 -- || pred_val > REG_BR_PROB_BASE * 55 / 100) -- { -- bool taken = pred_val > REG_BR_PROB_BASE / 2; -- bool cputaken -- = final_forward_branch_p (current_output_insn) == 0; -+static int -+distance_agu_use_in_bb (unsigned int regno, -+ rtx_insn *insn, int distance, rtx_insn *start, -+ bool *found, bool *redefined) -+{ -+ basic_block bb = NULL; -+ rtx_insn *next = start; -+ rtx_insn *prev = NULL; - -- /* Emit hints only in the case default branch prediction -- heuristics would fail. */ -- if (taken != cputaken) -- { -- /* We use 3e (DS) prefix for taken branches and -- 2e (CS) prefix for not taken branches. */ -- if (taken) -- fputs ("ds ; ", file); -- else -- fputs ("cs ; ", file); -- } -- } -- } -- return; -- } -+ *found = false; -+ *redefined = false; - -- case ';': --#ifndef HAVE_AS_IX86_REP_LOCK_PREFIX -- putc (';', file); --#endif -- return; -+ if (start != NULL_RTX) -+ { -+ bb = BLOCK_FOR_INSN (start); -+ if (start != BB_HEAD (bb)) -+ /* If insn and start belong to the same bb, set prev to insn, -+ so the call to increase_distance will increase the distance -+ between insns by 1. */ -+ prev = insn; -+ } - -- case '~': -- putc (TARGET_AVX2 ? 'i' : 'f', file); -- return; -+ while (next -+ && next != insn -+ && distance < LEA_SEARCH_THRESHOLD) -+ { -+ if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next)) -+ { -+ distance = increase_distance(prev, next, distance); -+ if (insn_uses_reg_mem (regno, next)) -+ { -+ /* Return DISTANCE if OP0 is used in memory -+ address in NEXT. */ -+ *found = true; -+ return distance; -+ } - -- case 'M': -- if (TARGET_X32) -+ if (insn_defines_reg (regno, INVALID_REGNUM, next)) - { -- /* NB: 32-bit indices in VSIB address are sign-extended -- to 64 bits. In x32, if 32-bit address 0xf7fa3010 is -- sign-extended to 0xfffffffff7fa3010 which is invalid -- address. Add addr32 prefix if there is no base -- register nor symbol. */ -- bool ok; -- struct ix86_address parts; -- ok = ix86_decompose_address (x, &parts); -- gcc_assert (ok && parts.index == NULL_RTX); -- if (parts.base == NULL_RTX -- && (parts.disp == NULL_RTX -- || !symbolic_operand (parts.disp, -- GET_MODE (parts.disp)))) -- fputs ("addr32 ", file); -+ /* Return -1 if OP0 is set in NEXT. */ -+ *redefined = true; -+ return -1; - } -- return; - -- case '^': -- if (TARGET_64BIT && Pmode != word_mode) -- fputs ("addr32 ", file); -- return; -+ prev = next; -+ } - -- case '!': -- if (ix86_notrack_prefixed_insn_p (current_output_insn)) -- fputs ("notrack ", file); -- return; -+ if (next == BB_END (bb)) -+ break; - -- default: -- output_operand_lossage ("invalid operand code '%c'", code); -- } -+ next = NEXT_INSN (next); - } - -- if (REG_P (x)) -- print_reg (x, code, file); -+ return distance; -+} - -- else if (MEM_P (x)) -+/* Return the distance between INSN and the next insn that uses -+ register number REGNO0 in memory address. Return -1 if no such -+ a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */ -+ -+static int -+distance_agu_use (unsigned int regno0, rtx_insn *insn) -+{ -+ basic_block bb = BLOCK_FOR_INSN (insn); -+ int distance = 0; -+ bool found = false; -+ bool redefined = false; -+ -+ if (insn != BB_END (bb)) -+ distance = distance_agu_use_in_bb (regno0, insn, distance, -+ NEXT_INSN (insn), -+ &found, &redefined); -+ -+ if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD) - { -- rtx addr = XEXP (x, 0); -+ edge e; -+ edge_iterator ei; -+ bool simple_loop = false; - -- /* No `byte ptr' prefix for call instructions ... */ -- if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P') -- { -- machine_mode mode = GET_MODE (x); -- const char *size; -+ FOR_EACH_EDGE (e, ei, bb->succs) -+ if (e->dest == bb) -+ { -+ simple_loop = true; -+ break; -+ } - -- /* Check for explicit size override codes. */ -- if (code == 'b') -- size = "BYTE"; -- else if (code == 'w') -- size = "WORD"; -- else if (code == 'k') -- size = "DWORD"; -- else if (code == 'q') -- size = "QWORD"; -- else if (code == 'x') -- size = "XMMWORD"; -- else if (code == 't') -- size = "YMMWORD"; -- else if (code == 'g') -- size = "ZMMWORD"; -- else if (mode == BLKmode) -- /* ... or BLKmode operands, when not overridden. */ -- size = NULL; -- else -- switch (GET_MODE_SIZE (mode)) -- { -- case 1: size = "BYTE"; break; -- case 2: size = "WORD"; break; -- case 4: size = "DWORD"; break; -- case 8: size = "QWORD"; break; -- case 12: size = "TBYTE"; break; -- case 16: -- if (mode == XFmode) -- size = "TBYTE"; -- else -- size = "XMMWORD"; -- break; -- case 32: size = "YMMWORD"; break; -- case 64: size = "ZMMWORD"; break; -- default: -- gcc_unreachable (); -- } -- if (size) -+ if (simple_loop) -+ distance = distance_agu_use_in_bb (regno0, insn, -+ distance, BB_HEAD (bb), -+ &found, &redefined); -+ else -+ { -+ int shortest_dist = -1; -+ bool found_in_bb = false; -+ bool redefined_in_bb = false; -+ -+ FOR_EACH_EDGE (e, ei, bb->succs) - { -- fputs (size, file); -- fputs (" PTR ", file); -+ int bb_dist -+ = distance_agu_use_in_bb (regno0, insn, -+ distance, BB_HEAD (e->dest), -+ &found_in_bb, &redefined_in_bb); -+ if (found_in_bb) -+ { -+ if (shortest_dist < 0) -+ shortest_dist = bb_dist; -+ else if (bb_dist > 0) -+ shortest_dist = MIN (bb_dist, shortest_dist); -+ -+ found = true; -+ } - } -- } - -- if (this_is_asm_operands && ! address_operand (addr, VOIDmode)) -- output_operand_lossage ("invalid constraints for operand"); -- else -- ix86_print_operand_address_as -- (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P'); -+ distance = shortest_dist; -+ } - } - -- else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode) -+ if (!found || redefined) -+ return -1; -+ -+ return distance >> 1; -+} -+ -+/* Define this macro to tune LEA priority vs ADD, it take effect when -+ there is a dilemma of choicing LEA or ADD -+ Negative value: ADD is more preferred than LEA -+ Zero: Netrual -+ Positive value: LEA is more preferred than ADD*/ -+#define IX86_LEA_PRIORITY 0 -+ -+/* Return true if usage of lea INSN has performance advantage -+ over a sequence of instructions. Instructions sequence has -+ SPLIT_COST cycles higher latency than lea latency. */ -+ -+static bool -+ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1, -+ unsigned int regno2, int split_cost, bool has_scale) -+{ -+ int dist_define, dist_use; -+ -+ /* For Silvermont if using a 2-source or 3-source LEA for -+ non-destructive destination purposes, or due to wanting -+ ability to use SCALE, the use of LEA is justified. */ -+ if (TARGET_SILVERMONT || TARGET_GOLDMONT || TARGET_GOLDMONT_PLUS -+ || TARGET_TREMONT || TARGET_INTEL) - { -- long l; -+ if (has_scale) -+ return true; -+ if (split_cost < 1) -+ return false; -+ if (regno0 == regno1 || regno0 == regno2) -+ return false; -+ return true; -+ } - -- REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l); -+ dist_define = distance_non_agu_define (regno1, regno2, insn); -+ dist_use = distance_agu_use (regno0, insn); - -- if (ASSEMBLER_DIALECT == ASM_ATT) -- putc ('$', file); -- /* Sign extend 32bit SFmode immediate to 8 bytes. */ -- if (code == 'q') -- fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x", -- (unsigned long long) (int) l); -+ if (dist_define < 0 || dist_define >= LEA_MAX_STALL) -+ { -+ /* If there is no non AGU operand definition, no AGU -+ operand usage and split cost is 0 then both lea -+ and non lea variants have same priority. Currently -+ we prefer lea for 64 bit code and non lea on 32 bit -+ code. */ -+ if (dist_use < 0 && split_cost == 0) -+ return TARGET_64BIT || IX86_LEA_PRIORITY; - else -- fprintf (file, "0x%08x", (unsigned int) l); -+ return true; - } - -- else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode) -- { -- long l[2]; -+ /* With longer definitions distance lea is more preferable. -+ Here we change it to take into account splitting cost and -+ lea priority. */ -+ dist_define += split_cost + IX86_LEA_PRIORITY; - -- REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l); -+ /* If there is no use in memory addess then we just check -+ that split cost exceeds AGU stall. */ -+ if (dist_use < 0) -+ return dist_define > LEA_MAX_STALL; - -- if (ASSEMBLER_DIALECT == ASM_ATT) -- putc ('$', file); -- fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff); -- } -+ /* If this insn has both backward non-agu dependence and forward -+ agu dependence, the one with short distance takes effect. */ -+ return dist_define >= dist_use; -+} - -- /* These float cases don't actually occur as immediate operands. */ -- else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode) -- { -- char dstr[30]; -+/* Return true if it is legal to clobber flags by INSN and -+ false otherwise. */ - -- real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1); -- fputs (dstr, file); -- } -+static bool -+ix86_ok_to_clobber_flags (rtx_insn *insn) -+{ -+ basic_block bb = BLOCK_FOR_INSN (insn); -+ df_ref use; -+ bitmap live; - -- else -+ while (insn) - { -- /* We have patterns that allow zero sets of memory, for instance. -- In 64-bit mode, we should probably support all 8-byte vectors, -- since we can in fact encode that into an immediate. */ -- if (GET_CODE (x) == CONST_VECTOR) -+ if (NONDEBUG_INSN_P (insn)) - { -- if (x != CONST0_RTX (GET_MODE (x))) -- output_operand_lossage ("invalid vector immediate"); -- x = const0_rtx; -- } -+ FOR_EACH_INSN_USE (use, insn) -+ if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG) -+ return false; - -- if (code != 'P' && code != 'p') -- { -- if (CONST_INT_P (x)) -- { -- if (ASSEMBLER_DIALECT == ASM_ATT) -- putc ('$', file); -- } -- else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF -- || GET_CODE (x) == LABEL_REF) -- { -- if (ASSEMBLER_DIALECT == ASM_ATT) -- putc ('$', file); -- else -- fputs ("OFFSET FLAT:", file); -- } -+ if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn)) -+ return true; - } -- if (CONST_INT_P (x)) -- fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); -- else if (flag_pic || MACHOPIC_INDIRECT) -- output_pic_addr_const (file, x, code); -- else -- output_addr_const (file, x); -+ -+ if (insn == BB_END (bb)) -+ break; -+ -+ insn = NEXT_INSN (insn); - } --} - --static bool --ix86_print_operand_punct_valid_p (unsigned char code) --{ -- return (code == '*' || code == '+' || code == '&' || code == ';' -- || code == '~' || code == '^' || code == '!'); -+ live = df_get_live_out(bb); -+ return !REGNO_REG_SET_P (live, FLAGS_REG); - } -- --/* Print a memory operand whose address is ADDR. */ - --static void --ix86_print_operand_address_as (FILE *file, rtx addr, -- addr_space_t as, bool no_rip) -+/* Return true if we need to split op0 = op1 + op2 into a sequence of -+ move and add to avoid AGU stalls. */ -+ -+bool -+ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[]) - { -- struct ix86_address parts; -- rtx base, index, disp; -- int scale; -- int ok; -- bool vsib = false; -- int code = 0; -+ unsigned int regno0, regno1, regno2; - -- if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR) -- { -- ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts); -- gcc_assert (parts.index == NULL_RTX); -- parts.index = XVECEXP (addr, 0, 1); -- parts.scale = INTVAL (XVECEXP (addr, 0, 2)); -- addr = XVECEXP (addr, 0, 0); -- vsib = true; -- } -- else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR) -- { -- gcc_assert (TARGET_64BIT); -- ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts); -- code = 'q'; -- } -- else -- ok = ix86_decompose_address (addr, &parts); -+ /* Check if we need to optimize. */ -+ if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun)) -+ return false; - -- gcc_assert (ok); -+ /* Check it is correct to split here. */ -+ if (!ix86_ok_to_clobber_flags(insn)) -+ return false; - -- base = parts.base; -- index = parts.index; -- disp = parts.disp; -- scale = parts.scale; -+ regno0 = true_regnum (operands[0]); -+ regno1 = true_regnum (operands[1]); -+ regno2 = true_regnum (operands[2]); - -- if (ADDR_SPACE_GENERIC_P (as)) -- as = parts.seg; -+ /* We need to split only adds with non destructive -+ destination operand. */ -+ if (regno0 == regno1 || regno0 == regno2) -+ return false; - else -- gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg)); -- -- if (!ADDR_SPACE_GENERIC_P (as)) -- { -- if (ASSEMBLER_DIALECT == ASM_ATT) -- putc ('%', file); -- -- switch (as) -- { -- case ADDR_SPACE_SEG_FS: -- fputs ("fs:", file); -- break; -- case ADDR_SPACE_SEG_GS: -- fputs ("gs:", file); -- break; -- default: -- gcc_unreachable (); -- } -- } -- -- /* Use one byte shorter RIP relative addressing for 64bit mode. */ -- if (TARGET_64BIT && !base && !index && !no_rip) -- { -- rtx symbol = disp; -+ return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false); -+} - -- if (GET_CODE (disp) == CONST -- && GET_CODE (XEXP (disp, 0)) == PLUS -- && CONST_INT_P (XEXP (XEXP (disp, 0), 1))) -- symbol = XEXP (XEXP (disp, 0), 0); -+/* Return true if we should emit lea instruction instead of mov -+ instruction. */ - -- if (GET_CODE (symbol) == LABEL_REF -- || (GET_CODE (symbol) == SYMBOL_REF -- && SYMBOL_REF_TLS_MODEL (symbol) == 0)) -- base = pc_rtx; -- } -+bool -+ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[]) -+{ -+ unsigned int regno0, regno1; - -- if (!base && !index) -- { -- /* Displacement only requires special attention. */ -- if (CONST_INT_P (disp)) -- { -- if (ASSEMBLER_DIALECT == ASM_INTEL && ADDR_SPACE_GENERIC_P (as)) -- fputs ("ds:", file); -- fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp)); -- } -- /* Load the external function address via the GOT slot to avoid PLT. */ -- else if (GET_CODE (disp) == CONST -- && GET_CODE (XEXP (disp, 0)) == UNSPEC -- && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL -- || XINT (XEXP (disp, 0), 1) == UNSPEC_GOT) -- && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0))) -- output_pic_addr_const (file, disp, 0); -- else if (flag_pic) -- output_pic_addr_const (file, disp, 0); -- else -- output_addr_const (file, disp); -- } -- else -- { -- /* Print SImode register names to force addr32 prefix. */ -- if (SImode_address_operand (addr, VOIDmode)) -- { -- if (flag_checking) -- { -- gcc_assert (TARGET_64BIT); -- switch (GET_CODE (addr)) -- { -- case SUBREG: -- gcc_assert (GET_MODE (addr) == SImode); -- gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode); -- break; -- case ZERO_EXTEND: -- case AND: -- gcc_assert (GET_MODE (addr) == DImode); -- break; -- default: -- gcc_unreachable (); -- } -- } -- gcc_assert (!code); -- code = 'k'; -- } -- else if (code == 0 -- && TARGET_X32 -- && disp -- && CONST_INT_P (disp) -- && INTVAL (disp) < -16*1024*1024) -- { -- /* X32 runs in 64-bit mode, where displacement, DISP, in -- address DISP(%r64), is encoded as 32-bit immediate sign- -- extended from 32-bit to 64-bit. For -0x40000300(%r64), -- address is %r64 + 0xffffffffbffffd00. When %r64 < -- 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64, -- which is invalid for x32. The correct address is %r64 -- - 0x40000300 == 0xf7ffdd64. To properly encode -- -0x40000300(%r64) for x32, we zero-extend negative -- displacement by forcing addr32 prefix which truncates -- 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should -- zero-extend all negative displacements, including -1(%rsp). -- However, for small negative displacements, sign-extension -- won't cause overflow. We only zero-extend negative -- displacements if they < -16*1024*1024, which is also used -- to check legitimate address displacements for PIC. */ -- code = 'k'; -- } -+ /* Check if we need to optimize. */ -+ if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun)) -+ return false; - -- /* Since the upper 32 bits of RSP are always zero for x32, -- we can encode %esp as %rsp to avoid 0x67 prefix if -- there is no index register. */ -- if (TARGET_X32 && Pmode == SImode -- && !index && base && REG_P (base) && REGNO (base) == SP_REG) -- code = 'q'; -+ /* Use lea for reg to reg moves only. */ -+ if (!REG_P (operands[0]) || !REG_P (operands[1])) -+ return false; - -- if (ASSEMBLER_DIALECT == ASM_ATT) -- { -- if (disp) -- { -- if (flag_pic) -- output_pic_addr_const (file, disp, 0); -- else if (GET_CODE (disp) == LABEL_REF) -- output_asm_label (disp); -- else -- output_addr_const (file, disp); -- } -+ regno0 = true_regnum (operands[0]); -+ regno1 = true_regnum (operands[1]); - -- putc ('(', file); -- if (base) -- print_reg (base, code, file); -- if (index) -- { -- putc (',', file); -- print_reg (index, vsib ? 0 : code, file); -- if (scale != 1 || vsib) -- fprintf (file, ",%d", scale); -- } -- putc (')', file); -- } -- else -- { -- rtx offset = NULL_RTX; -+ return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false); -+} - -- if (disp) -- { -- /* Pull out the offset of a symbol; print any symbol itself. */ -- if (GET_CODE (disp) == CONST -- && GET_CODE (XEXP (disp, 0)) == PLUS -- && CONST_INT_P (XEXP (XEXP (disp, 0), 1))) -- { -- offset = XEXP (XEXP (disp, 0), 1); -- disp = gen_rtx_CONST (VOIDmode, -- XEXP (XEXP (disp, 0), 0)); -- } -+/* Return true if we need to split lea into a sequence of -+ instructions to avoid AGU stalls. */ - -- if (flag_pic) -- output_pic_addr_const (file, disp, 0); -- else if (GET_CODE (disp) == LABEL_REF) -- output_asm_label (disp); -- else if (CONST_INT_P (disp)) -- offset = disp; -- else -- output_addr_const (file, disp); -- } -+bool -+ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[]) -+{ -+ unsigned int regno0, regno1, regno2; -+ int split_cost; -+ struct ix86_address parts; -+ int ok; - -- putc ('[', file); -- if (base) -- { -- print_reg (base, code, file); -- if (offset) -- { -- if (INTVAL (offset) >= 0) -- putc ('+', file); -- fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); -- } -- } -- else if (offset) -- fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); -- else -- putc ('0', file); -+ /* Check we need to optimize. */ -+ if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun)) -+ return false; - -- if (index) -- { -- putc ('+', file); -- print_reg (index, vsib ? 0 : code, file); -- if (scale != 1 || vsib) -- fprintf (file, "*%d", scale); -- } -- putc (']', file); -- } -- } --} -+ /* The "at least two components" test below might not catch simple -+ move or zero extension insns if parts.base is non-NULL and parts.disp -+ is const0_rtx as the only components in the address, e.g. if the -+ register is %rbp or %r13. As this test is much cheaper and moves or -+ zero extensions are the common case, do this check first. */ -+ if (REG_P (operands[1]) -+ || (SImode_address_operand (operands[1], VOIDmode) -+ && REG_P (XEXP (operands[1], 0)))) -+ return false; - --static void --ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr) --{ -- ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, false); --} -+ /* Check if it is OK to split here. */ -+ if (!ix86_ok_to_clobber_flags (insn)) -+ return false; - --/* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */ -+ ok = ix86_decompose_address (operands[1], &parts); -+ gcc_assert (ok); - --static bool --i386_asm_output_addr_const_extra (FILE *file, rtx x) --{ -- rtx op; -+ /* There should be at least two components in the address. */ -+ if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX) -+ + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2) -+ return false; - -- if (GET_CODE (x) != UNSPEC) -+ /* We should not split into add if non legitimate pic -+ operand is used as displacement. */ -+ if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp)) - return false; - -- op = XVECEXP (x, 0, 0); -- switch (XINT (x, 1)) -- { -- case UNSPEC_GOTOFF: -- output_addr_const (file, op); -- fputs ("@gotoff", file); -- break; -- case UNSPEC_GOTTPOFF: -- output_addr_const (file, op); -- /* FIXME: This might be @TPOFF in Sun ld. */ -- fputs ("@gottpoff", file); -- break; -- case UNSPEC_TPOFF: -- output_addr_const (file, op); -- fputs ("@tpoff", file); -- break; -- case UNSPEC_NTPOFF: -- output_addr_const (file, op); -- if (TARGET_64BIT) -- fputs ("@tpoff", file); -- else -- fputs ("@ntpoff", file); -- break; -- case UNSPEC_DTPOFF: -- output_addr_const (file, op); -- fputs ("@dtpoff", file); -- break; -- case UNSPEC_GOTNTPOFF: -- output_addr_const (file, op); -- if (TARGET_64BIT) -- fputs (ASSEMBLER_DIALECT == ASM_ATT ? -- "@gottpoff(%rip)" : "@gottpoff[rip]", file); -- else -- fputs ("@gotntpoff", file); -- break; -- case UNSPEC_INDNTPOFF: -- output_addr_const (file, op); -- fputs ("@indntpoff", file); -- break; --#if TARGET_MACHO -- case UNSPEC_MACHOPIC_OFFSET: -- output_addr_const (file, op); -- putc ('-', file); -- machopic_output_function_base_name (file); -- break; --#endif -- -- default: -- return false; -- } -+ regno0 = true_regnum (operands[0]) ; -+ regno1 = INVALID_REGNUM; -+ regno2 = INVALID_REGNUM; - -- return true; --} -- --/* Split one or more double-mode RTL references into pairs of half-mode -- references. The RTL can be REG, offsettable MEM, integer constant, or -- CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to -- split and "num" is its length. lo_half and hi_half are output arrays -- that parallel "operands". */ -+ if (parts.base) -+ regno1 = true_regnum (parts.base); -+ if (parts.index) -+ regno2 = true_regnum (parts.index); - --void --split_double_mode (machine_mode mode, rtx operands[], -- int num, rtx lo_half[], rtx hi_half[]) --{ -- machine_mode half_mode; -- unsigned int byte; -+ split_cost = 0; - -- switch (mode) -+ /* Compute how many cycles we will add to execution time -+ if split lea into a sequence of instructions. */ -+ if (parts.base || parts.index) - { -- case E_TImode: -- half_mode = DImode; -- break; -- case E_DImode: -- half_mode = SImode; -- break; -- default: -- gcc_unreachable (); -- } -- -- byte = GET_MODE_SIZE (half_mode); -+ /* Have to use mov instruction if non desctructive -+ destination form is used. */ -+ if (regno1 != regno0 && regno2 != regno0) -+ split_cost += 1; - -- while (num--) -- { -- rtx op = operands[num]; -+ /* Have to add index to base if both exist. */ -+ if (parts.base && parts.index) -+ split_cost += 1; - -- /* simplify_subreg refuse to split volatile memory addresses, -- but we still have to handle it. */ -- if (MEM_P (op)) -- { -- lo_half[num] = adjust_address (op, half_mode, 0); -- hi_half[num] = adjust_address (op, half_mode, byte); -- } -- else -+ /* Have to use shift and adds if scale is 2 or greater. */ -+ if (parts.scale > 1) - { -- lo_half[num] = simplify_gen_subreg (half_mode, op, -- GET_MODE (op) == VOIDmode -- ? mode : GET_MODE (op), 0); -- hi_half[num] = simplify_gen_subreg (half_mode, op, -- GET_MODE (op) == VOIDmode -- ? mode : GET_MODE (op), byte); -+ if (regno0 != regno1) -+ split_cost += 1; -+ else if (regno2 == regno0) -+ split_cost += 4; -+ else -+ split_cost += parts.scale; - } -- } --} -- --/* Output code to perform a 387 binary operation in INSN, one of PLUS, -- MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3] -- is the expression of the binary operation. The output may either be -- emitted here, or returned to the caller, like all output_* functions. - -- There is no guarantee that the operands are the same mode, as they -- might be within FLOAT or FLOAT_EXTEND expressions. */ -+ /* Have to use add instruction with immediate if -+ disp is non zero. */ -+ if (parts.disp && parts.disp != const0_rtx) -+ split_cost += 1; - --#ifndef SYSV386_COMPAT --/* Set to 1 for compatibility with brain-damaged assemblers. No-one -- wants to fix the assemblers because that causes incompatibility -- with gcc. No-one wants to fix gcc because that causes -- incompatibility with assemblers... You can use the option of -- -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */ --#define SYSV386_COMPAT 1 --#endif -+ /* Subtract the price of lea. */ -+ split_cost -= 1; -+ } - --const char * --output_387_binary_op (rtx_insn *insn, rtx *operands) --{ -- static char buf[40]; -- const char *p; -- bool is_sse -- = (SSE_REG_P (operands[0]) -- || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2])); -+ return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost, -+ parts.scale > 1); -+} - -- if (is_sse) -- p = "%v"; -- else if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT -- || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) -- p = "fi"; -- else -- p = "f"; -+/* Return true if it is ok to optimize an ADD operation to LEA -+ operation to avoid flag register consumation. For most processors, -+ ADD is faster than LEA. For the processors like BONNELL, if the -+ destination register of LEA holds an actual address which will be -+ used soon, LEA is better and otherwise ADD is better. */ - -- strcpy (buf, p); -+bool -+ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[]) -+{ -+ unsigned int regno0 = true_regnum (operands[0]); -+ unsigned int regno1 = true_regnum (operands[1]); -+ unsigned int regno2 = true_regnum (operands[2]); - -- switch (GET_CODE (operands[3])) -- { -- case PLUS: -- p = "add"; break; -- case MINUS: -- p = "sub"; break; -- case MULT: -- p = "mul"; break; -- case DIV: -- p = "div"; break; -- default: -- gcc_unreachable (); -- } -+ /* If a = b + c, (a!=b && a!=c), must use lea form. */ -+ if (regno0 != regno1 && regno0 != regno2) -+ return true; - -- strcat (buf, p); -+ if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun)) -+ return false; - -- if (is_sse) -- { -- p = (GET_MODE (operands[0]) == SFmode) ? "ss" : "sd"; -- strcat (buf, p); -+ return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false); -+} - -- if (TARGET_AVX) -- p = "\t{%2, %1, %0|%0, %1, %2}"; -- else -- p = "\t{%2, %0|%0, %2}"; -+/* Return true if destination reg of SET_BODY is shift count of -+ USE_BODY. */ - -- strcat (buf, p); -- return buf; -- } -+static bool -+ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body) -+{ -+ rtx set_dest; -+ rtx shift_rtx; -+ int i; - -- /* Even if we do not want to check the inputs, this documents input -- constraints. Which helps in understanding the following code. */ -- if (flag_checking) -+ /* Retrieve destination of SET_BODY. */ -+ switch (GET_CODE (set_body)) - { -- if (STACK_REG_P (operands[0]) -- && ((REG_P (operands[1]) -- && REGNO (operands[0]) == REGNO (operands[1]) -- && (STACK_REG_P (operands[2]) || MEM_P (operands[2]))) -- || (REG_P (operands[2]) -- && REGNO (operands[0]) == REGNO (operands[2]) -- && (STACK_REG_P (operands[1]) || MEM_P (operands[1])))) -- && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2]))) -- ; /* ok */ -- else -- gcc_unreachable (); -+ case SET: -+ set_dest = SET_DEST (set_body); -+ if (!set_dest || !REG_P (set_dest)) -+ return false; -+ break; -+ case PARALLEL: -+ for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--) -+ if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i), -+ use_body)) -+ return true; -+ /* FALLTHROUGH */ -+ default: -+ return false; - } - -- switch (GET_CODE (operands[3])) -+ /* Retrieve shift count of USE_BODY. */ -+ switch (GET_CODE (use_body)) - { -- case MULT: -- case PLUS: -- if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2])) -- std::swap (operands[1], operands[2]); -+ case SET: -+ shift_rtx = XEXP (use_body, 1); -+ break; -+ case PARALLEL: -+ for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--) -+ if (ix86_dep_by_shift_count_body (set_body, -+ XVECEXP (use_body, 0, i))) -+ return true; -+ /* FALLTHROUGH */ -+ default: -+ return false; -+ } - -- /* know operands[0] == operands[1]. */ -+ if (shift_rtx -+ && (GET_CODE (shift_rtx) == ASHIFT -+ || GET_CODE (shift_rtx) == LSHIFTRT -+ || GET_CODE (shift_rtx) == ASHIFTRT -+ || GET_CODE (shift_rtx) == ROTATE -+ || GET_CODE (shift_rtx) == ROTATERT)) -+ { -+ rtx shift_count = XEXP (shift_rtx, 1); - -- if (MEM_P (operands[2])) -+ /* Return true if shift count is dest of SET_BODY. */ -+ if (REG_P (shift_count)) - { -- p = "%Z2\t%2"; -- break; -+ /* Add check since it can be invoked before register -+ allocation in pre-reload schedule. */ -+ if (reload_completed -+ && true_regnum (set_dest) == true_regnum (shift_count)) -+ return true; -+ else if (REGNO(set_dest) == REGNO(shift_count)) -+ return true; - } -+ } - -- if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) -- { -- if (STACK_TOP_P (operands[0])) -- /* How is it that we are storing to a dead operand[2]? -- Well, presumably operands[1] is dead too. We can't -- store the result to st(0) as st(0) gets popped on this -- instruction. Instead store to operands[2] (which I -- think has to be st(1)). st(1) will be popped later. -- gcc <= 2.8.1 didn't have this check and generated -- assembly code that the Unixware assembler rejected. */ -- p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */ -- else -- p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */ -- break; -- } -- -- if (STACK_TOP_P (operands[0])) -- p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */ -- else -- p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */ -- break; -- -- case MINUS: -- case DIV: -- if (MEM_P (operands[1])) -- { -- p = "r%Z1\t%1"; -- break; -- } -- -- if (MEM_P (operands[2])) -- { -- p = "%Z2\t%2"; -- break; -- } -- -- if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) -- { --#if SYSV386_COMPAT -- /* The SystemV/386 SVR3.2 assembler, and probably all AT&T -- derived assemblers, confusingly reverse the direction of -- the operation for fsub{r} and fdiv{r} when the -- destination register is not st(0). The Intel assembler -- doesn't have this brain damage. Read !SYSV386_COMPAT to -- figure out what the hardware really does. */ -- if (STACK_TOP_P (operands[0])) -- p = "{p\t%0, %2|rp\t%2, %0}"; -- else -- p = "{rp\t%2, %0|p\t%0, %2}"; --#else -- if (STACK_TOP_P (operands[0])) -- /* As above for fmul/fadd, we can't store to st(0). */ -- p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */ -- else -- p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */ --#endif -- break; -- } -- -- if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) -- { --#if SYSV386_COMPAT -- if (STACK_TOP_P (operands[0])) -- p = "{rp\t%0, %1|p\t%1, %0}"; -- else -- p = "{p\t%1, %0|rp\t%0, %1}"; --#else -- if (STACK_TOP_P (operands[0])) -- p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */ -- else -- p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */ --#endif -- break; -- } -- -- if (STACK_TOP_P (operands[0])) -- { -- if (STACK_TOP_P (operands[1])) -- p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */ -- else -- p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */ -- break; -- } -- else if (STACK_TOP_P (operands[1])) -- { --#if SYSV386_COMPAT -- p = "{\t%1, %0|r\t%0, %1}"; --#else -- p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */ --#endif -- } -- else -- { --#if SYSV386_COMPAT -- p = "{r\t%2, %0|\t%0, %2}"; --#else -- p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */ --#endif -- } -- break; -- -- default: -- gcc_unreachable (); -- } -- -- strcat (buf, p); -- return buf; -+ return false; - } - --/* Return needed mode for entity in optimize_mode_switching pass. */ -+/* Return true if destination reg of SET_INSN is shift count of -+ USE_INSN. */ - --static int --ix86_dirflag_mode_needed (rtx_insn *insn) -+bool -+ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn) - { -- if (CALL_P (insn)) -- { -- if (cfun->machine->func_type == TYPE_NORMAL) -- return X86_DIRFLAG_ANY; -- else -- /* No need to emit CLD in interrupt handler for TARGET_CLD. */ -- return TARGET_CLD ? X86_DIRFLAG_ANY : X86_DIRFLAG_RESET; -- } -- -- if (recog_memoized (insn) < 0) -- return X86_DIRFLAG_ANY; -+ return ix86_dep_by_shift_count_body (PATTERN (set_insn), -+ PATTERN (use_insn)); -+} - -- if (get_attr_type (insn) == TYPE_STR) -- { -- /* Emit cld instruction if stringops are used in the function. */ -- if (cfun->machine->func_type == TYPE_NORMAL) -- return TARGET_CLD ? X86_DIRFLAG_RESET : X86_DIRFLAG_ANY; -- else -- return X86_DIRFLAG_RESET; -- } -+/* Return TRUE or FALSE depending on whether the unary operator meets the -+ appropriate constraints. */ - -- return X86_DIRFLAG_ANY; -+bool -+ix86_unary_operator_ok (enum rtx_code, -+ machine_mode, -+ rtx operands[2]) -+{ -+ /* If one of operands is memory, source and destination must match. */ -+ if ((MEM_P (operands[0]) -+ || MEM_P (operands[1])) -+ && ! rtx_equal_p (operands[0], operands[1])) -+ return false; -+ return true; - } - --/* Check if a 256bit or 512 bit AVX register is referenced inside of EXP. */ -+/* Return TRUE if the operands to a vec_interleave_{high,low}v2df -+ are ok, keeping in mind the possible movddup alternative. */ - --static bool --ix86_check_avx_upper_register (const_rtx exp) -+bool -+ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high) - { -- return SSE_REG_P (exp) && GET_MODE_BITSIZE (GET_MODE (exp)) > 128; -+ if (MEM_P (operands[0])) -+ return rtx_equal_p (operands[0], operands[1 + high]); -+ if (MEM_P (operands[1]) && MEM_P (operands[2])) -+ return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]); -+ return true; - } - --/* Return needed mode for entity in optimize_mode_switching pass. */ -+/* A subroutine of ix86_build_signbit_mask. If VECT is true, -+ then replicate the value for all elements of the vector -+ register. */ - --static int --ix86_avx_u128_mode_needed (rtx_insn *insn) -+rtx -+ix86_build_const_vector (machine_mode mode, bool vect, rtx value) - { -- if (CALL_P (insn)) -- { -- rtx link; -+ int i, n_elt; -+ rtvec v; -+ machine_mode scalar_mode; - -- /* Needed mode is set to AVX_U128_CLEAN if there are -- no 256bit or 512bit modes used in function arguments. */ -- for (link = CALL_INSN_FUNCTION_USAGE (insn); -- link; -- link = XEXP (link, 1)) -- { -- if (GET_CODE (XEXP (link, 0)) == USE) -- { -- rtx arg = XEXP (XEXP (link, 0), 0); -+ switch (mode) -+ { -+ case E_V64QImode: -+ case E_V32QImode: -+ case E_V16QImode: -+ case E_V32HImode: -+ case E_V16HImode: -+ case E_V8HImode: -+ case E_V16SImode: -+ case E_V8SImode: -+ case E_V4SImode: -+ case E_V8DImode: -+ case E_V4DImode: -+ case E_V2DImode: -+ gcc_assert (vect); -+ /* FALLTHRU */ -+ case E_V16SFmode: -+ case E_V8SFmode: -+ case E_V4SFmode: -+ case E_V8DFmode: -+ case E_V4DFmode: -+ case E_V2DFmode: -+ n_elt = GET_MODE_NUNITS (mode); -+ v = rtvec_alloc (n_elt); -+ scalar_mode = GET_MODE_INNER (mode); - -- if (ix86_check_avx_upper_register (arg)) -- return AVX_U128_DIRTY; -- } -- } -+ RTVEC_ELT (v, 0) = value; - -- return AVX_U128_CLEAN; -- } -+ for (i = 1; i < n_elt; ++i) -+ RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode); - -- /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced. -- Hardware changes state only when a 256bit register is written to, -- but we need to prevent the compiler from moving optimal insertion -- point above eventual read from 256bit or 512 bit register. */ -- subrtx_iterator::array_type array; -- FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST) -- if (ix86_check_avx_upper_register (*iter)) -- return AVX_U128_DIRTY; -+ return gen_rtx_CONST_VECTOR (mode, v); - -- return AVX_U128_ANY; -+ default: -+ gcc_unreachable (); -+ } - } - --/* Return mode that i387 must be switched into -- prior to the execution of insn. */ -+/* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders -+ and ix86_expand_int_vcond. Create a mask for the sign bit in MODE -+ for an SSE register. If VECT is true, then replicate the mask for -+ all elements of the vector register. If INVERT is true, then create -+ a mask excluding the sign bit. */ - --static int --ix86_i387_mode_needed (int entity, rtx_insn *insn) -+rtx -+ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert) - { -- enum attr_i387_cw mode; -- -- /* The mode UNINITIALIZED is used to store control word after a -- function call or ASM pattern. The mode ANY specify that function -- has no requirements on the control word and make no changes in the -- bits we are interested in. */ -+ machine_mode vec_mode, imode; -+ wide_int w; -+ rtx mask, v; - -- if (CALL_P (insn) -- || (NONJUMP_INSN_P (insn) -- && (asm_noperands (PATTERN (insn)) >= 0 -- || GET_CODE (PATTERN (insn)) == ASM_INPUT))) -- return I387_CW_UNINITIALIZED; -- -- if (recog_memoized (insn) < 0) -- return I387_CW_ANY; -- -- mode = get_attr_i387_cw (insn); -- -- switch (entity) -+ switch (mode) - { -- case I387_TRUNC: -- if (mode == I387_CW_TRUNC) -- return mode; -+ case E_V16SImode: -+ case E_V16SFmode: -+ case E_V8SImode: -+ case E_V4SImode: -+ case E_V8SFmode: -+ case E_V4SFmode: -+ vec_mode = mode; -+ imode = SImode; - break; - -- case I387_FLOOR: -- if (mode == I387_CW_FLOOR) -- return mode; -+ case E_V8DImode: -+ case E_V4DImode: -+ case E_V2DImode: -+ case E_V8DFmode: -+ case E_V4DFmode: -+ case E_V2DFmode: -+ vec_mode = mode; -+ imode = DImode; - break; - -- case I387_CEIL: -- if (mode == I387_CW_CEIL) -- return mode; -+ case E_TImode: -+ case E_TFmode: -+ vec_mode = VOIDmode; -+ imode = TImode; - break; - - default: - gcc_unreachable (); - } - -- return I387_CW_ANY; --} -+ machine_mode inner_mode = GET_MODE_INNER (mode); -+ w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1, -+ GET_MODE_BITSIZE (inner_mode)); -+ if (invert) -+ w = wi::bit_not (w); - --/* Return mode that entity must be switched into -- prior to the execution of insn. */ -+ /* Force this value into the low part of a fp vector constant. */ -+ mask = immed_wide_int_const (w, imode); -+ mask = gen_lowpart (inner_mode, mask); - --static int --ix86_mode_needed (int entity, rtx_insn *insn) --{ -- switch (entity) -- { -- case X86_DIRFLAG: -- return ix86_dirflag_mode_needed (insn); -- case AVX_U128: -- return ix86_avx_u128_mode_needed (insn); -- case I387_TRUNC: -- case I387_FLOOR: -- case I387_CEIL: -- return ix86_i387_mode_needed (entity, insn); -- default: -- gcc_unreachable (); -- } -- return 0; --} -+ if (vec_mode == VOIDmode) -+ return force_reg (inner_mode, mask); - --/* Check if a 256bit or 512bit AVX register is referenced in stores. */ -- --static void --ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data) -- { -- if (ix86_check_avx_upper_register (dest)) -- { -- bool *used = (bool *) data; -- *used = true; -- } -- } -+ v = ix86_build_const_vector (vec_mode, vect, mask); -+ return force_reg (vec_mode, v); -+} - --/* Calculate mode of upper 128bit AVX registers after the insn. */ -+/* Return TRUE or FALSE depending on whether the first SET in INSN -+ has source and destination with matching CC modes, and that the -+ CC mode is at least as constrained as REQ_MODE. */ - --static int --ix86_avx_u128_mode_after (int mode, rtx_insn *insn) -+bool -+ix86_match_ccmode (rtx insn, machine_mode req_mode) - { -- rtx pat = PATTERN (insn); -+ rtx set; -+ machine_mode set_mode; - -- if (vzeroupper_pattern (pat, VOIDmode) -- || vzeroall_pattern (pat, VOIDmode)) -- return AVX_U128_CLEAN; -+ set = PATTERN (insn); -+ if (GET_CODE (set) == PARALLEL) -+ set = XVECEXP (set, 0, 0); -+ gcc_assert (GET_CODE (set) == SET); -+ gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE); - -- /* We know that state is clean after CALL insn if there are no -- 256bit or 512bit registers used in the function return register. */ -- if (CALL_P (insn)) -+ set_mode = GET_MODE (SET_DEST (set)); -+ switch (set_mode) - { -- bool avx_upper_reg_found = false; -- note_stores (pat, ix86_check_avx_upper_stores, &avx_upper_reg_found); -- -- return avx_upper_reg_found ? AVX_U128_DIRTY : AVX_U128_CLEAN; -- } -+ case E_CCNOmode: -+ if (req_mode != CCNOmode -+ && (req_mode != CCmode -+ || XEXP (SET_SRC (set), 1) != const0_rtx)) -+ return false; -+ break; -+ case E_CCmode: -+ if (req_mode == CCGCmode) -+ return false; -+ /* FALLTHRU */ -+ case E_CCGCmode: -+ if (req_mode == CCGOCmode || req_mode == CCNOmode) -+ return false; -+ /* FALLTHRU */ -+ case E_CCGOCmode: -+ if (req_mode == CCZmode) -+ return false; -+ /* FALLTHRU */ -+ case E_CCZmode: -+ break; - -- /* Otherwise, return current mode. Remember that if insn -- references AVX 256bit or 512bit registers, the mode was already -- changed to DIRTY from MODE_NEEDED. */ -- return mode; --} -+ case E_CCGZmode: - --/* Return the mode that an insn results in. */ -+ case E_CCAmode: -+ case E_CCCmode: -+ case E_CCOmode: -+ case E_CCPmode: -+ case E_CCSmode: -+ if (set_mode != req_mode) -+ return false; -+ break; - --static int --ix86_mode_after (int entity, int mode, rtx_insn *insn) --{ -- switch (entity) -- { -- case X86_DIRFLAG: -- return mode; -- case AVX_U128: -- return ix86_avx_u128_mode_after (mode, insn); -- case I387_TRUNC: -- case I387_FLOOR: -- case I387_CEIL: -- return mode; - default: - gcc_unreachable (); - } --} -- --static int --ix86_dirflag_mode_entry (void) --{ -- /* For TARGET_CLD or in the interrupt handler we can't assume -- direction flag state at function entry. */ -- if (TARGET_CLD -- || cfun->machine->func_type != TYPE_NORMAL) -- return X86_DIRFLAG_ANY; - -- return X86_DIRFLAG_RESET; -+ return GET_MODE (SET_SRC (set)) == set_mode; - } - --static int --ix86_avx_u128_mode_entry (void) -+machine_mode -+ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1) - { -- tree arg; -+ machine_mode mode = GET_MODE (op0); - -- /* Entry mode is set to AVX_U128_DIRTY if there are -- 256bit or 512bit modes used in function arguments. */ -- for (arg = DECL_ARGUMENTS (current_function_decl); arg; -- arg = TREE_CHAIN (arg)) -+ if (SCALAR_FLOAT_MODE_P (mode)) - { -- rtx incoming = DECL_INCOMING_RTL (arg); -- -- if (incoming && ix86_check_avx_upper_register (incoming)) -- return AVX_U128_DIRTY; -+ gcc_assert (!DECIMAL_FLOAT_MODE_P (mode)); -+ return CCFPmode; - } - -- return AVX_U128_CLEAN; --} -- --/* Return a mode that ENTITY is assumed to be -- switched to at function entry. */ -- --static int --ix86_mode_entry (int entity) --{ -- switch (entity) -+ switch (code) - { -- case X86_DIRFLAG: -- return ix86_dirflag_mode_entry (); -- case AVX_U128: -- return ix86_avx_u128_mode_entry (); -- case I387_TRUNC: -- case I387_FLOOR: -- case I387_CEIL: -- return I387_CW_ANY; -+ /* Only zero flag is needed. */ -+ case EQ: /* ZF=0 */ -+ case NE: /* ZF!=0 */ -+ return CCZmode; -+ /* Codes needing carry flag. */ -+ case GEU: /* CF=0 */ -+ case LTU: /* CF=1 */ -+ /* Detect overflow checks. They need just the carry flag. */ -+ if (GET_CODE (op0) == PLUS -+ && (rtx_equal_p (op1, XEXP (op0, 0)) -+ || rtx_equal_p (op1, XEXP (op0, 1)))) -+ return CCCmode; -+ else -+ return CCmode; -+ case GTU: /* CF=0 & ZF=0 */ -+ case LEU: /* CF=1 | ZF=1 */ -+ return CCmode; -+ /* Codes possibly doable only with sign flag when -+ comparing against zero. */ -+ case GE: /* SF=OF or SF=0 */ -+ case LT: /* SF<>OF or SF=1 */ -+ if (op1 == const0_rtx) -+ return CCGOCmode; -+ else -+ /* For other cases Carry flag is not required. */ -+ return CCGCmode; -+ /* Codes doable only with sign flag when comparing -+ against zero, but we miss jump instruction for it -+ so we need to use relational tests against overflow -+ that thus needs to be zero. */ -+ case GT: /* ZF=0 & SF=OF */ -+ case LE: /* ZF=1 | SF<>OF */ -+ if (op1 == const0_rtx) -+ return CCNOmode; -+ else -+ return CCGCmode; -+ /* strcmp pattern do (use flags) and combine may ask us for proper -+ mode. */ -+ case USE: -+ return CCmode; - default: - gcc_unreachable (); - } - } - --static int --ix86_avx_u128_mode_exit (void) --{ -- rtx reg = crtl->return_rtx; -+/* Return the fixed registers used for condition codes. */ - -- /* Exit mode is set to AVX_U128_DIRTY if there are 256bit -- or 512 bit modes used in the function return register. */ -- if (reg && ix86_check_avx_upper_register (reg)) -- return AVX_U128_DIRTY; -- -- /* Exit mode is set to AVX_U128_DIRTY if there are 256bit or 512bit -- modes used in function arguments, otherwise return AVX_U128_CLEAN. -- */ -- return ix86_avx_u128_mode_entry (); --} -- --/* Return a mode that ENTITY is assumed to be -- switched to at function exit. */ -- --static int --ix86_mode_exit (int entity) --{ -- switch (entity) -- { -- case X86_DIRFLAG: -- return X86_DIRFLAG_ANY; -- case AVX_U128: -- return ix86_avx_u128_mode_exit (); -- case I387_TRUNC: -- case I387_FLOOR: -- case I387_CEIL: -- return I387_CW_ANY; -- default: -- gcc_unreachable (); -- } --} -- --static int --ix86_mode_priority (int, int n) -+static bool -+ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2) - { -- return n; -+ *p1 = FLAGS_REG; -+ *p2 = INVALID_REGNUM; -+ return true; - } - --/* Output code to initialize control word copies used by trunc?f?i and -- rounding patterns. CURRENT_MODE is set to current control word, -- while NEW_MODE is set to new control word. */ -+/* If two condition code modes are compatible, return a condition code -+ mode which is compatible with both. Otherwise, return -+ VOIDmode. */ - --static void --emit_i387_cw_initialization (int mode) -+static machine_mode -+ix86_cc_modes_compatible (machine_mode m1, machine_mode m2) - { -- rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED); -- rtx new_mode; -+ if (m1 == m2) -+ return m1; - -- enum ix86_stack_slot slot; -+ if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC) -+ return VOIDmode; - -- rtx reg = gen_reg_rtx (HImode); -+ if ((m1 == CCGCmode && m2 == CCGOCmode) -+ || (m1 == CCGOCmode && m2 == CCGCmode)) -+ return CCGCmode; - -- emit_insn (gen_x86_fnstcw_1 (stored_mode)); -- emit_move_insn (reg, copy_rtx (stored_mode)); -+ if ((m1 == CCNOmode && m2 == CCGOCmode) -+ || (m1 == CCGOCmode && m2 == CCNOmode)) -+ return CCNOmode; - -- switch (mode) -+ if (m1 == CCZmode -+ && (m2 == CCGCmode || m2 == CCGOCmode || m2 == CCNOmode)) -+ return m2; -+ else if (m2 == CCZmode -+ && (m1 == CCGCmode || m1 == CCGOCmode || m1 == CCNOmode)) -+ return m1; -+ -+ switch (m1) - { -- case I387_CW_TRUNC: -- /* round toward zero (truncate) */ -- emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00))); -- slot = SLOT_CW_TRUNC; -- break; -+ default: -+ gcc_unreachable (); - -- case I387_CW_FLOOR: -- /* round down toward -oo */ -- emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00))); -- emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400))); -- slot = SLOT_CW_FLOOR; -- break; -+ case E_CCmode: -+ case E_CCGCmode: -+ case E_CCGOCmode: -+ case E_CCNOmode: -+ case E_CCAmode: -+ case E_CCCmode: -+ case E_CCOmode: -+ case E_CCPmode: -+ case E_CCSmode: -+ case E_CCZmode: -+ switch (m2) -+ { -+ default: -+ return VOIDmode; - -- case I387_CW_CEIL: -- /* round up toward +oo */ -- emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00))); -- emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800))); -- slot = SLOT_CW_CEIL; -- break; -+ case E_CCmode: -+ case E_CCGCmode: -+ case E_CCGOCmode: -+ case E_CCNOmode: -+ case E_CCAmode: -+ case E_CCCmode: -+ case E_CCOmode: -+ case E_CCPmode: -+ case E_CCSmode: -+ case E_CCZmode: -+ return CCmode; -+ } - -- default: -- gcc_unreachable (); -+ case E_CCFPmode: -+ /* These are only compatible with themselves, which we already -+ checked above. */ -+ return VOIDmode; - } -+} - -- gcc_assert (slot < MAX_386_STACK_LOCALS); -+/* Return strategy to use for floating-point. We assume that fcomi is always -+ preferrable where available, since that is also true when looking at size -+ (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */ - -- new_mode = assign_386_stack_local (HImode, slot); -- emit_move_insn (new_mode, reg); -+enum ix86_fpcmp_strategy -+ix86_fp_comparison_strategy (enum rtx_code) -+{ -+ /* Do fcomi/sahf based test when profitable. */ -+ -+ if (TARGET_CMOVE) -+ return IX86_FPCMP_COMI; -+ -+ if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())) -+ return IX86_FPCMP_SAHF; -+ -+ return IX86_FPCMP_ARITH; - } - --/* Generate one or more insns to set ENTITY to MODE. */ -+/* Convert comparison codes we use to represent FP comparison to integer -+ code that will result in proper branch. Return UNKNOWN if no such code -+ is available. */ - --static void --ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED, -- HARD_REG_SET regs_live ATTRIBUTE_UNUSED) -+enum rtx_code -+ix86_fp_compare_code_to_integer (enum rtx_code code) - { -- switch (entity) -+ switch (code) - { -- case X86_DIRFLAG: -- if (mode == X86_DIRFLAG_RESET) -- emit_insn (gen_cld ()); -- break; -- case AVX_U128: -- if (mode == AVX_U128_CLEAN) -- emit_insn (gen_avx_vzeroupper ()); -- break; -- case I387_TRUNC: -- case I387_FLOOR: -- case I387_CEIL: -- if (mode != I387_CW_ANY -- && mode != I387_CW_UNINITIALIZED) -- emit_i387_cw_initialization (mode); -- break; -+ case GT: -+ return GTU; -+ case GE: -+ return GEU; -+ case ORDERED: -+ case UNORDERED: -+ return code; -+ case UNEQ: -+ return EQ; -+ case UNLT: -+ return LTU; -+ case UNLE: -+ return LEU; -+ case LTGT: -+ return NE; - default: -- gcc_unreachable (); -+ return UNKNOWN; - } - } - --/* Output code for INSN to convert a float to a signed int. OPERANDS -- are the insn operands. The output may be [HSD]Imode and the input -- operand may be [SDX]Fmode. */ -- --const char * --output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp) -+/* Zero extend possibly SImode EXP to Pmode register. */ -+rtx -+ix86_zero_extend_to_Pmode (rtx exp) - { -- bool stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG); -- bool dimode_p = GET_MODE (operands[0]) == DImode; -- int round_mode = get_attr_i387_cw (insn); -- -- static char buf[40]; -- const char *p; -- -- /* Jump through a hoop or two for DImode, since the hardware has no -- non-popping instruction. We used to do this a different way, but -- that was somewhat fragile and broke with post-reload splitters. */ -- if ((dimode_p || fisttp) && !stack_top_dies) -- output_asm_insn ("fld\t%y1", operands); -- -- gcc_assert (STACK_TOP_P (operands[1])); -- gcc_assert (MEM_P (operands[0])); -- gcc_assert (GET_MODE (operands[1]) != TFmode); -- -- if (fisttp) -- return "fisttp%Z0\t%0"; -- -- strcpy (buf, "fist"); -+ return force_reg (Pmode, convert_to_mode (Pmode, exp, 1)); -+} - -- if (round_mode != I387_CW_ANY) -- output_asm_insn ("fldcw\t%3", operands); -+/* Return true if the function being called was marked with attribute -+ "noplt" or using -fno-plt and we are compiling for non-PIC. We need -+ to handle the non-PIC case in the backend because there is no easy -+ interface for the front-end to force non-PLT calls to use the GOT. -+ This is currently used only with 64-bit or 32-bit GOT32X ELF targets -+ to call the function marked "noplt" indirectly. */ - -- p = "p%Z0\t%0"; -- strcat (buf, p + !(stack_top_dies || dimode_p)); -+static bool -+ix86_nopic_noplt_attribute_p (rtx call_op) -+{ -+ if (flag_pic || ix86_cmodel == CM_LARGE -+ || !(TARGET_64BIT || HAVE_AS_IX86_GOT32X) -+ || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF -+ || SYMBOL_REF_LOCAL_P (call_op)) -+ return false; - -- output_asm_insn (buf, operands); -+ tree symbol_decl = SYMBOL_REF_DECL (call_op); - -- if (round_mode != I387_CW_ANY) -- output_asm_insn ("fldcw\t%2", operands); -+ if (!flag_plt -+ || (symbol_decl != NULL_TREE -+ && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl)))) -+ return true; - -- return ""; -+ return false; - } - --/* Output code for x87 ffreep insn. The OPNO argument, which may only -- have the values zero or one, indicates the ffreep insn's operand -- from the OPERANDS array. */ -- --static const char * --output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno) -+/* Helper to output the jmp/call. */ -+static void -+ix86_output_jmp_thunk_or_indirect (const char *thunk_name, const int regno) - { -- if (TARGET_USE_FFREEP) --#ifdef HAVE_AS_IX86_FFREEP -- return opno ? "ffreep\t%y1" : "ffreep\t%y0"; --#else -+ if (thunk_name != NULL) - { -- static char retval[32]; -- int regno = REGNO (operands[opno]); -- -- gcc_assert (STACK_REGNO_P (regno)); -- -- regno -= FIRST_STACK_REG; -- -- snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno); -- return retval; -+ fprintf (asm_out_file, "\tjmp\t"); -+ assemble_name (asm_out_file, thunk_name); -+ putc ('\n', asm_out_file); - } --#endif -- -- return opno ? "fstp\t%y1" : "fstp\t%y0"; -+ else -+ output_indirect_thunk (regno); - } - -+/* Output indirect branch via a call and return thunk. CALL_OP is a -+ register which contains the branch target. XASM is the assembly -+ template for CALL_OP. Branch is a tail call if SIBCALL_P is true. -+ A normal call is converted to: - --/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi -- should be used. UNORDERED_P is true when fucom should be used. */ -- --const char * --output_fp_compare (rtx_insn *insn, rtx *operands, -- bool eflags_p, bool unordered_p) --{ -- rtx *xops = eflags_p ? &operands[0] : &operands[1]; -- bool stack_top_dies; -+ call __x86_indirect_thunk_reg - -- static char buf[40]; -- const char *p; -+ and a tail call is converted to: - -- gcc_assert (STACK_TOP_P (xops[0])); -+ jmp __x86_indirect_thunk_reg -+ */ - -- stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG); -+static void -+ix86_output_indirect_branch_via_reg (rtx call_op, bool sibcall_p) -+{ -+ char thunk_name_buf[32]; -+ char *thunk_name; -+ enum indirect_thunk_prefix need_prefix -+ = indirect_thunk_need_prefix (current_output_insn); -+ int regno = REGNO (call_op); - -- if (eflags_p) -+ if (cfun->machine->indirect_branch_type -+ != indirect_branch_thunk_inline) - { -- p = unordered_p ? "fucomi" : "fcomi"; -- strcpy (buf, p); -- -- p = "p\t{%y1, %0|%0, %y1}"; -- strcat (buf, p + !stack_top_dies); -- -- return buf; -+ if (cfun->machine->indirect_branch_type == indirect_branch_thunk) -+ { -+ int i = regno; -+ if (i >= FIRST_REX_INT_REG) -+ i -= (FIRST_REX_INT_REG - LAST_INT_REG - 1); -+ indirect_thunks_used |= 1 << i; -+ } -+ indirect_thunk_name (thunk_name_buf, regno, need_prefix, false); -+ thunk_name = thunk_name_buf; - } -+ else -+ thunk_name = NULL; - -- if (STACK_REG_P (xops[1]) -- && stack_top_dies -- && find_regno_note (insn, REG_DEAD, FIRST_STACK_REG + 1)) -- { -- gcc_assert (REGNO (xops[1]) == FIRST_STACK_REG + 1); -- -- /* If both the top of the 387 stack die, and the other operand -- is also a stack register that dies, then this must be a -- `fcompp' float compare. */ -- p = unordered_p ? "fucompp" : "fcompp"; -- strcpy (buf, p); -- } -- else if (const0_operand (xops[1], VOIDmode)) -- { -- gcc_assert (!unordered_p); -- strcpy (buf, "ftst"); -- } -+ if (sibcall_p) -+ ix86_output_jmp_thunk_or_indirect (thunk_name, regno); - else - { -- if (GET_MODE_CLASS (GET_MODE (xops[1])) == MODE_INT) -+ if (thunk_name != NULL) - { -- gcc_assert (!unordered_p); -- p = "ficom"; -+ fprintf (asm_out_file, "\tcall\t"); -+ assemble_name (asm_out_file, thunk_name); -+ putc ('\n', asm_out_file); -+ return; - } -- else -- p = unordered_p ? "fucom" : "fcom"; -- -- strcpy (buf, p); - -- p = "p%Z2\t%y2"; -- strcat (buf, p + !stack_top_dies); -- } -+ char indirectlabel1[32]; -+ char indirectlabel2[32]; - -- output_asm_insn (buf, operands); -- return "fnstsw\t%0"; --} -+ ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, -+ INDIRECT_LABEL, -+ indirectlabelno++); -+ ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, -+ INDIRECT_LABEL, -+ indirectlabelno++); - --void --ix86_output_addr_vec_elt (FILE *file, int value) --{ -- const char *directive = ASM_LONG; -+ /* Jump. */ -+ fputs ("\tjmp\t", asm_out_file); -+ assemble_name_raw (asm_out_file, indirectlabel2); -+ fputc ('\n', asm_out_file); - --#ifdef ASM_QUAD -- if (TARGET_LP64) -- directive = ASM_QUAD; --#else -- gcc_assert (!TARGET_64BIT); --#endif -+ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1); - -- fprintf (file, "%s%s%d\n", directive, LPREFIX, value); --} -+ ix86_output_jmp_thunk_or_indirect (thunk_name, regno); - --void --ix86_output_addr_diff_elt (FILE *file, int value, int rel) --{ -- const char *directive = ASM_LONG; -+ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2); - --#ifdef ASM_QUAD -- if (TARGET_64BIT && CASE_VECTOR_MODE == DImode) -- directive = ASM_QUAD; --#else -- gcc_assert (!TARGET_64BIT); --#endif -- /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */ -- if (TARGET_64BIT || TARGET_VXWORKS_RTP) -- fprintf (file, "%s%s%d-%s%d\n", -- directive, LPREFIX, value, LPREFIX, rel); --#if TARGET_MACHO -- else if (TARGET_MACHO) -- { -- fprintf (file, ASM_LONG "%s%d-", LPREFIX, value); -- machopic_output_function_base_name (file); -- putc ('\n', file); -+ /* Call. */ -+ fputs ("\tcall\t", asm_out_file); -+ assemble_name_raw (asm_out_file, indirectlabel1); -+ fputc ('\n', asm_out_file); - } --#endif -- else if (HAVE_AS_GOTOFF_IN_DATA) -- fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value); -- else -- asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n", -- GOT_SYMBOL_NAME, LPREFIX, value); - } -- --/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate -- for the target. */ -- --void --ix86_expand_clear (rtx dest) --{ -- rtx tmp; - -- /* We play register width games, which are only valid after reload. */ -- gcc_assert (reload_completed); -+/* Output indirect branch via a call and return thunk. CALL_OP is -+ the branch target. XASM is the assembly template for CALL_OP. -+ Branch is a tail call if SIBCALL_P is true. A normal call is -+ converted to: - -- /* Avoid HImode and its attendant prefix byte. */ -- if (GET_MODE_SIZE (GET_MODE (dest)) < 4) -- dest = gen_rtx_REG (SImode, REGNO (dest)); -- tmp = gen_rtx_SET (dest, const0_rtx); -+ jmp L2 -+ L1: -+ push CALL_OP -+ jmp __x86_indirect_thunk -+ L2: -+ call L1 - -- if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ()) -- { -- rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); -- tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob)); -- } -+ and a tail call is converted to: - -- emit_insn (tmp); --} -+ push CALL_OP -+ jmp __x86_indirect_thunk -+ */ - --void --ix86_expand_move (machine_mode mode, rtx operands[]) -+static void -+ix86_output_indirect_branch_via_push (rtx call_op, const char *xasm, -+ bool sibcall_p) - { -- rtx op0, op1; -- rtx tmp, addend = NULL_RTX; -- enum tls_model model; -+ char thunk_name_buf[32]; -+ char *thunk_name; -+ char push_buf[64]; -+ enum indirect_thunk_prefix need_prefix -+ = indirect_thunk_need_prefix (current_output_insn); -+ int regno = -1; -+ -+ if (cfun->machine->indirect_branch_type -+ != indirect_branch_thunk_inline) -+ { -+ if (cfun->machine->indirect_branch_type == indirect_branch_thunk) -+ indirect_thunk_needed = true; -+ indirect_thunk_name (thunk_name_buf, regno, need_prefix, false); -+ thunk_name = thunk_name_buf; -+ } -+ else -+ thunk_name = NULL; - -- op0 = operands[0]; -- op1 = operands[1]; -+ snprintf (push_buf, sizeof (push_buf), "push{%c}\t%s", -+ TARGET_64BIT ? 'q' : 'l', xasm); - -- switch (GET_CODE (op1)) -+ if (sibcall_p) - { -- case CONST: -- tmp = XEXP (op1, 0); -+ output_asm_insn (push_buf, &call_op); -+ ix86_output_jmp_thunk_or_indirect (thunk_name, regno); -+ } -+ else -+ { -+ char indirectlabel1[32]; -+ char indirectlabel2[32]; - -- if (GET_CODE (tmp) != PLUS -- || GET_CODE (XEXP (tmp, 0)) != SYMBOL_REF) -- break; -+ ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, -+ INDIRECT_LABEL, -+ indirectlabelno++); -+ ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, -+ INDIRECT_LABEL, -+ indirectlabelno++); - -- op1 = XEXP (tmp, 0); -- addend = XEXP (tmp, 1); -- /* FALLTHRU */ -+ /* Jump. */ -+ fputs ("\tjmp\t", asm_out_file); -+ assemble_name_raw (asm_out_file, indirectlabel2); -+ fputc ('\n', asm_out_file); - -- case SYMBOL_REF: -- model = SYMBOL_REF_TLS_MODEL (op1); -+ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1); - -- if (model) -- op1 = legitimize_tls_address (op1, model, true); -- else if (ix86_force_load_from_GOT_p (op1)) -- { -- /* Load the external function address via GOT slot to avoid PLT. */ -- op1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op1), -- (TARGET_64BIT -- ? UNSPEC_GOTPCREL -- : UNSPEC_GOT)); -- op1 = gen_rtx_CONST (Pmode, op1); -- op1 = gen_const_mem (Pmode, op1); -- set_mem_alias_set (op1, ix86_GOT_alias_set ()); -- } -- else -+ /* An external function may be called via GOT, instead of PLT. */ -+ if (MEM_P (call_op)) - { -- tmp = legitimize_pe_coff_symbol (op1, addend != NULL_RTX); -- if (tmp) -- { -- op1 = tmp; -- if (!addend) -- break; -- } -- else -+ struct ix86_address parts; -+ rtx addr = XEXP (call_op, 0); -+ if (ix86_decompose_address (addr, &parts) -+ && parts.base == stack_pointer_rtx) - { -- op1 = operands[1]; -- break; -- } -- } -- -- if (addend) -- { -- op1 = force_operand (op1, NULL_RTX); -- op1 = expand_simple_binop (Pmode, PLUS, op1, addend, -- op0, 1, OPTAB_DIRECT); -- } -- else -- op1 = force_operand (op1, op0); -- -- if (op1 == op0) -- return; -- -- op1 = convert_to_mode (mode, op1, 1); -+ /* Since call will adjust stack by -UNITS_PER_WORD, -+ we must convert "disp(stack, index, scale)" to -+ "disp+UNITS_PER_WORD(stack, index, scale)". */ -+ if (parts.index) -+ { -+ addr = gen_rtx_MULT (Pmode, parts.index, -+ GEN_INT (parts.scale)); -+ addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx, -+ addr); -+ } -+ else -+ addr = stack_pointer_rtx; - -- default: -- break; -- } -+ rtx disp; -+ if (parts.disp != NULL_RTX) -+ disp = plus_constant (Pmode, parts.disp, -+ UNITS_PER_WORD); -+ else -+ disp = GEN_INT (UNITS_PER_WORD); - -- if ((flag_pic || MACHOPIC_INDIRECT) -- && symbolic_operand (op1, mode)) -- { -- if (TARGET_MACHO && !TARGET_64BIT) -- { --#if TARGET_MACHO -- /* dynamic-no-pic */ -- if (MACHOPIC_INDIRECT) -- { -- rtx temp = (op0 && REG_P (op0) && mode == Pmode) -- ? op0 : gen_reg_rtx (Pmode); -- op1 = machopic_indirect_data_reference (op1, temp); -- if (MACHOPIC_PURE) -- op1 = machopic_legitimize_pic_address (op1, mode, -- temp == op1 ? 0 : temp); -- } -- if (op0 != op1 && GET_CODE (op0) != MEM) -- { -- rtx insn = gen_rtx_SET (op0, op1); -- emit_insn (insn); -- return; -- } -- if (GET_CODE (op0) == MEM) -- op1 = force_reg (Pmode, op1); -- else -- { -- rtx temp = op0; -- if (GET_CODE (temp) != REG) -- temp = gen_reg_rtx (Pmode); -- temp = legitimize_pic_address (op1, temp); -- if (temp == op0) -- return; -- op1 = temp; -- } -- /* dynamic-no-pic */ --#endif -- } -- else -- { -- if (MEM_P (op0)) -- op1 = force_reg (mode, op1); -- else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode))) -- { -- rtx reg = can_create_pseudo_p () ? NULL_RTX : op0; -- op1 = legitimize_pic_address (op1, reg); -- if (op0 == op1) -- return; -- op1 = convert_to_mode (mode, op1, 1); -- } -- } -- } -- else -- { -- if (MEM_P (op0) -- && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode) -- || !push_operand (op0, mode)) -- && MEM_P (op1)) -- op1 = force_reg (mode, op1); -- -- if (push_operand (op0, mode) -- && ! general_no_elim_operand (op1, mode)) -- op1 = copy_to_mode_reg (mode, op1); -- -- /* Force large constants in 64bit compilation into register -- to get them CSEed. */ -- if (can_create_pseudo_p () -- && (mode == DImode) && TARGET_64BIT -- && immediate_operand (op1, mode) -- && !x86_64_zext_immediate_operand (op1, VOIDmode) -- && !register_operand (op0, mode) -- && optimize) -- op1 = copy_to_mode_reg (mode, op1); -- -- if (can_create_pseudo_p () -- && CONST_DOUBLE_P (op1)) -- { -- /* If we are loading a floating point constant to a register, -- force the value to memory now, since we'll get better code -- out the back end. */ -- -- op1 = validize_mem (force_const_mem (mode, op1)); -- if (!register_operand (op0, mode)) -- { -- rtx temp = gen_reg_rtx (mode); -- emit_insn (gen_rtx_SET (temp, op1)); -- emit_move_insn (op0, temp); -- return; -+ addr = gen_rtx_PLUS (Pmode, addr, disp); -+ call_op = gen_rtx_MEM (GET_MODE (call_op), addr); - } - } -- } -- -- emit_insn (gen_rtx_SET (op0, op1)); --} -- --void --ix86_expand_vector_move (machine_mode mode, rtx operands[]) --{ -- rtx op0 = operands[0], op1 = operands[1]; -- /* Use GET_MODE_BITSIZE instead of GET_MODE_ALIGNMENT for IA MCU -- psABI since the biggest alignment is 4 byte for IA MCU psABI. */ -- unsigned int align = (TARGET_IAMCU -- ? GET_MODE_BITSIZE (mode) -- : GET_MODE_ALIGNMENT (mode)); -- -- if (push_operand (op0, VOIDmode)) -- op0 = emit_move_resolve_push (mode, op0); -- -- /* Force constants other than zero into memory. We do not know how -- the instructions used to build constants modify the upper 64 bits -- of the register, once we have that information we may be able -- to handle some of them more efficiently. */ -- if (can_create_pseudo_p () -- && (CONSTANT_P (op1) -- || (SUBREG_P (op1) -- && CONSTANT_P (SUBREG_REG (op1)))) -- && ((register_operand (op0, mode) -- && !standard_sse_constant_p (op1, mode)) -- /* ix86_expand_vector_move_misalign() does not like constants. */ -- || (SSE_REG_MODE_P (mode) -- && MEM_P (op0) -- && MEM_ALIGN (op0) < align))) -- { -- if (SUBREG_P (op1)) -- { -- machine_mode imode = GET_MODE (SUBREG_REG (op1)); -- rtx r = force_const_mem (imode, SUBREG_REG (op1)); -- if (r) -- r = validize_mem (r); -- else -- r = force_reg (imode, SUBREG_REG (op1)); -- op1 = simplify_gen_subreg (mode, r, imode, SUBREG_BYTE (op1)); -- } -- else -- op1 = validize_mem (force_const_mem (mode, op1)); -- } - -- /* We need to check memory alignment for SSE mode since attribute -- can make operands unaligned. */ -- if (can_create_pseudo_p () -- && SSE_REG_MODE_P (mode) -- && ((MEM_P (op0) && (MEM_ALIGN (op0) < align)) -- || (MEM_P (op1) && (MEM_ALIGN (op1) < align)))) -- { -- rtx tmp[2]; -+ output_asm_insn (push_buf, &call_op); - -- /* ix86_expand_vector_move_misalign() does not like both -- arguments in memory. */ -- if (!register_operand (op0, mode) -- && !register_operand (op1, mode)) -- op1 = force_reg (mode, op1); -+ ix86_output_jmp_thunk_or_indirect (thunk_name, regno); - -- tmp[0] = op0; tmp[1] = op1; -- ix86_expand_vector_move_misalign (mode, tmp); -- return; -- } -+ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2); - -- /* Make operand1 a register if it isn't already. */ -- if (can_create_pseudo_p () -- && !register_operand (op0, mode) -- && !register_operand (op1, mode)) -- { -- emit_move_insn (op0, force_reg (GET_MODE (op0), op1)); -- return; -+ /* Call. */ -+ fputs ("\tcall\t", asm_out_file); -+ assemble_name_raw (asm_out_file, indirectlabel1); -+ fputc ('\n', asm_out_file); - } -- -- emit_insn (gen_rtx_SET (op0, op1)); - } - --/* Split 32-byte AVX unaligned load and store if needed. */ -+/* Output indirect branch via a call and return thunk. CALL_OP is -+ the branch target. XASM is the assembly template for CALL_OP. -+ Branch is a tail call if SIBCALL_P is true. */ - - static void --ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1) -+ix86_output_indirect_branch (rtx call_op, const char *xasm, -+ bool sibcall_p) - { -- rtx m; -- rtx (*extract) (rtx, rtx, rtx); -- machine_mode mode; -+ if (REG_P (call_op)) -+ ix86_output_indirect_branch_via_reg (call_op, sibcall_p); -+ else -+ ix86_output_indirect_branch_via_push (call_op, xasm, sibcall_p); -+} -+ -+/* Output indirect jump. CALL_OP is the jump target. */ - -- if ((MEM_P (op1) && !TARGET_AVX256_SPLIT_UNALIGNED_LOAD) -- || (MEM_P (op0) && !TARGET_AVX256_SPLIT_UNALIGNED_STORE)) -+const char * -+ix86_output_indirect_jmp (rtx call_op) -+{ -+ if (cfun->machine->indirect_branch_type != indirect_branch_keep) - { -- emit_insn (gen_rtx_SET (op0, op1)); -- return; -+ /* We can't have red-zone since "call" in the indirect thunk -+ pushes the return address onto stack, destroying red-zone. */ -+ if (ix86_red_zone_size != 0) -+ gcc_unreachable (); -+ -+ ix86_output_indirect_branch (call_op, "%0", true); -+ return ""; - } -+ else -+ return "%!jmp\t%A0"; -+} - -- rtx orig_op0 = NULL_RTX; -- mode = GET_MODE (op0); -- switch (GET_MODE_CLASS (mode)) -+/* Output return instrumentation for current function if needed. */ -+ -+static void -+output_return_instrumentation (void) -+{ -+ if (ix86_instrument_return != instrument_return_none -+ && flag_fentry -+ && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun->decl)) - { -- case MODE_VECTOR_INT: -- case MODE_INT: -- if (mode != V32QImode) -+ if (ix86_flag_record_return) -+ fprintf (asm_out_file, "1:\n"); -+ switch (ix86_instrument_return) - { -- if (!MEM_P (op0)) -- { -- orig_op0 = op0; -- op0 = gen_reg_rtx (V32QImode); -- } -- else -- op0 = gen_lowpart (V32QImode, op0); -- op1 = gen_lowpart (V32QImode, op1); -- mode = V32QImode; -+ case instrument_return_call: -+ fprintf (asm_out_file, "\tcall\t__return__\n"); -+ break; -+ case instrument_return_nop5: -+ /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */ -+ fprintf (asm_out_file, ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n"); -+ break; -+ case instrument_return_none: -+ break; - } -- break; -- case MODE_VECTOR_FLOAT: -- break; -- default: -- gcc_unreachable (); -- } -- -- switch (mode) -- { -- default: -- gcc_unreachable (); -- case E_V32QImode: -- extract = gen_avx_vextractf128v32qi; -- mode = V16QImode; -- break; -- case E_V8SFmode: -- extract = gen_avx_vextractf128v8sf; -- mode = V4SFmode; -- break; -- case E_V4DFmode: -- extract = gen_avx_vextractf128v4df; -- mode = V2DFmode; -- break; -- } - -- if (MEM_P (op1)) -- { -- rtx r = gen_reg_rtx (mode); -- m = adjust_address (op1, mode, 0); -- emit_move_insn (r, m); -- m = adjust_address (op1, mode, 16); -- r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m); -- emit_move_insn (op0, r); -- } -- else if (MEM_P (op0)) -- { -- m = adjust_address (op0, mode, 0); -- emit_insn (extract (m, op1, const0_rtx)); -- m = adjust_address (op0, mode, 16); -- emit_insn (extract (m, copy_rtx (op1), const1_rtx)); -+ if (ix86_flag_record_return) -+ { -+ fprintf (asm_out_file, "\t.section __return_loc, \"a\",@progbits\n"); -+ fprintf (asm_out_file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long"); -+ fprintf (asm_out_file, "\t.previous\n"); -+ } - } -- else -- gcc_unreachable (); -- -- if (orig_op0) -- emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0)); - } - --/* Implement the movmisalign patterns for SSE. Non-SSE modes go -- straight to ix86_expand_vector_move. */ --/* Code generation for scalar reg-reg moves of single and double precision data: -- if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true) -- movaps reg, reg -- else -- movss reg, reg -- if (x86_sse_partial_reg_dependency == true) -- movapd reg, reg -- else -- movsd reg, reg -- -- Code generation for scalar loads of double precision data: -- if (x86_sse_split_regs == true) -- movlpd mem, reg (gas syntax) -- else -- movsd mem, reg -- -- Code generation for unaligned packed loads of single precision data -- (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency): -- if (x86_sse_unaligned_move_optimal) -- movups mem, reg -- -- if (x86_sse_partial_reg_dependency == true) -- { -- xorps reg, reg -- movlps mem, reg -- movhps mem+8, reg -- } -- else -- { -- movlps mem, reg -- movhps mem+8, reg -- } -- -- Code generation for unaligned packed loads of double precision data -- (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs): -- if (x86_sse_unaligned_move_optimal) -- movupd mem, reg -- -- if (x86_sse_split_regs == true) -- { -- movlpd mem, reg -- movhpd mem+8, reg -- } -- else -- { -- movsd mem, reg -- movhpd mem+8, reg -- } -- */ -+/* Output function return. CALL_OP is the jump target. Add a REP -+ prefix to RET if LONG_P is true and function return is kept. */ - --void --ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[]) -+const char * -+ix86_output_function_return (bool long_p) - { -- rtx op0, op1, m; -- -- op0 = operands[0]; -- op1 = operands[1]; -+ output_return_instrumentation (); - -- /* Use unaligned load/store for AVX512 or when optimizing for size. */ -- if (GET_MODE_SIZE (mode) == 64 || optimize_insn_for_size_p ()) -+ if (cfun->machine->function_return_type != indirect_branch_keep) - { -- emit_insn (gen_rtx_SET (op0, op1)); -- return; -- } -+ char thunk_name[32]; -+ enum indirect_thunk_prefix need_prefix -+ = indirect_thunk_need_prefix (current_output_insn); - -- if (TARGET_AVX) -- { -- if (GET_MODE_SIZE (mode) == 32) -- ix86_avx256_split_vector_move_misalign (op0, op1); -+ if (cfun->machine->function_return_type -+ != indirect_branch_thunk_inline) -+ { -+ bool need_thunk = (cfun->machine->function_return_type -+ == indirect_branch_thunk); -+ indirect_thunk_name (thunk_name, INVALID_REGNUM, need_prefix, -+ true); -+ indirect_return_needed |= need_thunk; -+ fprintf (asm_out_file, "\tjmp\t"); -+ assemble_name (asm_out_file, thunk_name); -+ putc ('\n', asm_out_file); -+ } - else -- /* Always use 128-bit mov_internal pattern for AVX. */ -- emit_insn (gen_rtx_SET (op0, op1)); -- return; -- } -- -- if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL -- || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) -- { -- emit_insn (gen_rtx_SET (op0, op1)); -- return; -- } -+ output_indirect_thunk (INVALID_REGNUM); - -- /* ??? If we have typed data, then it would appear that using -- movdqu is the only way to get unaligned data loaded with -- integer type. */ -- if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT) -- { -- emit_insn (gen_rtx_SET (op0, op1)); -- return; -+ return ""; - } - -- if (MEM_P (op1)) -- { -- if (TARGET_SSE2 && mode == V2DFmode) -- { -- rtx zero; -- -- /* When SSE registers are split into halves, we can avoid -- writing to the top half twice. */ -- if (TARGET_SSE_SPLIT_REGS) -- { -- emit_clobber (op0); -- zero = op0; -- } -- else -- { -- /* ??? Not sure about the best option for the Intel chips. -- The following would seem to satisfy; the register is -- entirely cleared, breaking the dependency chain. We -- then store to the upper half, with a dependency depth -- of one. A rumor has it that Intel recommends two movsd -- followed by an unpacklpd, but this is unconfirmed. And -- given that the dependency depth of the unpacklpd would -- still be one, I'm not sure why this would be better. */ -- zero = CONST0_RTX (V2DFmode); -- } -+ if (!long_p) -+ return "%!ret"; - -- m = adjust_address (op1, DFmode, 0); -- emit_insn (gen_sse2_loadlpd (op0, zero, m)); -- m = adjust_address (op1, DFmode, 8); -- emit_insn (gen_sse2_loadhpd (op0, op0, m)); -- } -- else -- { -- rtx t; -+ return "rep%; ret"; -+} - -- if (mode != V4SFmode) -- t = gen_reg_rtx (V4SFmode); -- else -- t = op0; -- -- if (TARGET_SSE_PARTIAL_REG_DEPENDENCY) -- emit_move_insn (t, CONST0_RTX (V4SFmode)); -- else -- emit_clobber (t); -+/* Output indirect function return. RET_OP is the function return -+ target. */ - -- m = adjust_address (op1, V2SFmode, 0); -- emit_insn (gen_sse_loadlps (t, t, m)); -- m = adjust_address (op1, V2SFmode, 8); -- emit_insn (gen_sse_loadhps (t, t, m)); -- if (mode != V4SFmode) -- emit_move_insn (op0, gen_lowpart (mode, t)); -- } -- } -- else if (MEM_P (op0)) -+const char * -+ix86_output_indirect_function_return (rtx ret_op) -+{ -+ if (cfun->machine->function_return_type != indirect_branch_keep) - { -- if (TARGET_SSE2 && mode == V2DFmode) -+ char thunk_name[32]; -+ enum indirect_thunk_prefix need_prefix -+ = indirect_thunk_need_prefix (current_output_insn); -+ unsigned int regno = REGNO (ret_op); -+ gcc_assert (regno == CX_REG); -+ -+ if (cfun->machine->function_return_type -+ != indirect_branch_thunk_inline) - { -- m = adjust_address (op0, DFmode, 0); -- emit_insn (gen_sse2_storelpd (m, op1)); -- m = adjust_address (op0, DFmode, 8); -- emit_insn (gen_sse2_storehpd (m, op1)); -+ bool need_thunk = (cfun->machine->function_return_type -+ == indirect_branch_thunk); -+ indirect_thunk_name (thunk_name, regno, need_prefix, true); -+ -+ if (need_thunk) -+ { -+ indirect_return_via_cx = true; -+ indirect_thunks_used |= 1 << CX_REG; -+ } -+ fprintf (asm_out_file, "\tjmp\t"); -+ assemble_name (asm_out_file, thunk_name); -+ putc ('\n', asm_out_file); - } - else -- { -- if (mode != V4SFmode) -- op1 = gen_lowpart (V4SFmode, op1); -+ output_indirect_thunk (regno); - -- m = adjust_address (op0, V2SFmode, 0); -- emit_insn (gen_sse_storelps (m, op1)); -- m = adjust_address (op0, V2SFmode, 8); -- emit_insn (gen_sse_storehps (m, copy_rtx (op1))); -- } -+ return ""; - } - else -- gcc_unreachable (); -+ return "%!jmp\t%A0"; - } - --/* Helper function of ix86_fixup_binary_operands to canonicalize -- operand order. Returns true if the operands should be swapped. */ -+/* Output the assembly for a call instruction. */ - --static bool --ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode, -- rtx operands[]) -+const char * -+ix86_output_call_insn (rtx_insn *insn, rtx call_op) - { -- rtx dst = operands[0]; -- rtx src1 = operands[1]; -- rtx src2 = operands[2]; -- -- /* If the operation is not commutative, we can't do anything. */ -- if (GET_RTX_CLASS (code) != RTX_COMM_ARITH -- && GET_RTX_CLASS (code) != RTX_COMM_COMPARE) -- return false; -- -- /* Highest priority is that src1 should match dst. */ -- if (rtx_equal_p (dst, src1)) -- return false; -- if (rtx_equal_p (dst, src2)) -- return true; -- -- /* Next highest priority is that immediate constants come second. */ -- if (immediate_operand (src2, mode)) -- return false; -- if (immediate_operand (src1, mode)) -- return true; -- -- /* Lowest priority is that memory references should come second. */ -- if (MEM_P (src2)) -- return false; -- if (MEM_P (src1)) -- return true; -+ bool direct_p = constant_call_address_operand (call_op, VOIDmode); -+ bool output_indirect_p -+ = (!TARGET_SEH -+ && cfun->machine->indirect_branch_type != indirect_branch_keep); -+ bool seh_nop_p = false; -+ const char *xasm; - -- return false; --} -+ if (SIBLING_CALL_P (insn)) -+ { -+ output_return_instrumentation (); -+ if (direct_p) -+ { -+ if (ix86_nopic_noplt_attribute_p (call_op)) -+ { -+ direct_p = false; -+ if (TARGET_64BIT) -+ { -+ if (output_indirect_p) -+ xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}"; -+ else -+ xasm = "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}"; -+ } -+ else -+ { -+ if (output_indirect_p) -+ xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}"; -+ else -+ xasm = "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}"; -+ } -+ } -+ else -+ xasm = "%!jmp\t%P0"; -+ } -+ /* SEH epilogue detection requires the indirect branch case -+ to include REX.W. */ -+ else if (TARGET_SEH) -+ xasm = "%!rex.W jmp\t%A0"; -+ else -+ { -+ if (output_indirect_p) -+ xasm = "%0"; -+ else -+ xasm = "%!jmp\t%A0"; -+ } - -+ if (output_indirect_p && !direct_p) -+ ix86_output_indirect_branch (call_op, xasm, true); -+ else -+ output_asm_insn (xasm, &call_op); -+ return ""; -+ } - --/* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the -- destination to use for the operation. If different from the true -- destination in operands[0], a copy operation will be required. */ -+ /* SEH unwinding can require an extra nop to be emitted in several -+ circumstances. Determine if we have one of those. */ -+ if (TARGET_SEH) -+ { -+ rtx_insn *i; - --rtx --ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode, -- rtx operands[]) --{ -- rtx dst = operands[0]; -- rtx src1 = operands[1]; -- rtx src2 = operands[2]; -+ for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i)) -+ { -+ /* Prevent a catch region from being adjacent to a jump that would -+ be interpreted as an epilogue sequence by the unwinder. */ -+ if (JUMP_P(i) && CROSSING_JUMP_P (i)) -+ { -+ seh_nop_p = true; -+ break; -+ } -+ -+ /* If we get to another real insn, we don't need the nop. */ -+ if (INSN_P (i)) -+ break; - -- /* Canonicalize operand order. */ -- if (ix86_swap_binary_operands_p (code, mode, operands)) -- { -- /* It is invalid to swap operands of different modes. */ -- gcc_assert (GET_MODE (src1) == GET_MODE (src2)); -+ /* If we get to the epilogue note, prevent a catch region from -+ being adjacent to the standard epilogue sequence. If non- -+ call-exceptions, we'll have done this during epilogue emission. */ -+ if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG -+ && !flag_non_call_exceptions -+ && !can_throw_internal (insn)) -+ { -+ seh_nop_p = true; -+ break; -+ } -+ } - -- std::swap (src1, src2); -+ /* If we didn't find a real insn following the call, prevent the -+ unwinder from looking into the next function. */ -+ if (i == NULL) -+ seh_nop_p = true; - } - -- /* Both source operands cannot be in memory. */ -- if (MEM_P (src1) && MEM_P (src2)) -+ if (direct_p) - { -- /* Optimization: Only read from memory once. */ -- if (rtx_equal_p (src1, src2)) -+ if (ix86_nopic_noplt_attribute_p (call_op)) - { -- src2 = force_reg (mode, src2); -- src1 = src2; -+ direct_p = false; -+ if (TARGET_64BIT) -+ { -+ if (output_indirect_p) -+ xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}"; -+ else -+ xasm = "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}"; -+ } -+ else -+ { -+ if (output_indirect_p) -+ xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}"; -+ else -+ xasm = "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}"; -+ } - } -- else if (rtx_equal_p (dst, src1)) -- src2 = force_reg (mode, src2); - else -- src1 = force_reg (mode, src1); -+ xasm = "%!call\t%P0"; -+ } -+ else -+ { -+ if (output_indirect_p) -+ xasm = "%0"; -+ else -+ xasm = "%!call\t%A0"; - } - -- /* If the destination is memory, and we do not have matching source -- operands, do things in registers. */ -- if (MEM_P (dst) && !rtx_equal_p (dst, src1)) -- dst = gen_reg_rtx (mode); -- -- /* Source 1 cannot be a constant. */ -- if (CONSTANT_P (src1)) -- src1 = force_reg (mode, src1); -- -- /* Source 1 cannot be a non-matching memory. */ -- if (MEM_P (src1) && !rtx_equal_p (dst, src1)) -- src1 = force_reg (mode, src1); -- -- /* Improve address combine. */ -- if (code == PLUS -- && GET_MODE_CLASS (mode) == MODE_INT -- && MEM_P (src2)) -- src2 = force_reg (mode, src2); -- -- operands[1] = src1; -- operands[2] = src2; -- return dst; --} -+ if (output_indirect_p && !direct_p) -+ ix86_output_indirect_branch (call_op, xasm, false); -+ else -+ output_asm_insn (xasm, &call_op); - --/* Similarly, but assume that the destination has already been -- set up properly. */ -+ if (seh_nop_p) -+ return "nop"; - --void --ix86_fixup_binary_operands_no_copy (enum rtx_code code, -- machine_mode mode, rtx operands[]) --{ -- rtx dst = ix86_fixup_binary_operands (code, mode, operands); -- gcc_assert (dst == operands[0]); -+ return ""; - } -+ -+/* Return a MEM corresponding to a stack slot with mode MODE. -+ Allocate a new slot if necessary. - --/* Attempt to expand a binary operator. Make the expansion closer to the -- actual machine, then just general_operand, which will allow 3 separate -- memory references (one output, two input) in a single insn. */ -+ The RTL for a function can have several slots available: N is -+ which slot to use. */ - --void --ix86_expand_binary_operator (enum rtx_code code, machine_mode mode, -- rtx operands[]) -+rtx -+assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n) - { -- rtx src1, src2, dst, op, clob; -- -- dst = ix86_fixup_binary_operands (code, mode, operands); -- src1 = operands[1]; -- src2 = operands[2]; -+ struct stack_local_entry *s; - -- /* Emit the instruction. */ -+ gcc_assert (n < MAX_386_STACK_LOCALS); - -- op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, src1, src2)); -+ for (s = ix86_stack_locals; s; s = s->next) -+ if (s->mode == mode && s->n == n) -+ return validize_mem (copy_rtx (s->rtl)); - -- if (reload_completed -- && code == PLUS -- && !rtx_equal_p (dst, src1)) -- { -- /* This is going to be an LEA; avoid splitting it later. */ -- emit_insn (op); -- } -- else -- { -- clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); -- emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); -- } -+ s = ggc_alloc (); -+ s->n = n; -+ s->mode = mode; -+ s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0); - -- /* Fix up the destination if needed. */ -- if (dst != operands[0]) -- emit_move_insn (operands[0], dst); -+ s->next = ix86_stack_locals; -+ ix86_stack_locals = s; -+ return validize_mem (copy_rtx (s->rtl)); - } - --/* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with -- the given OPERANDS. */ -+static void -+ix86_instantiate_decls (void) -+{ -+ struct stack_local_entry *s; - --void --ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode, -- rtx operands[]) --{ -- rtx op1 = NULL_RTX, op2 = NULL_RTX; -- if (SUBREG_P (operands[1])) -- { -- op1 = operands[1]; -- op2 = operands[2]; -- } -- else if (SUBREG_P (operands[2])) -- { -- op1 = operands[2]; -- op2 = operands[1]; -- } -- /* Optimize (__m128i) d | (__m128i) e and similar code -- when d and e are float vectors into float vector logical -- insn. In C/C++ without using intrinsics there is no other way -- to express vector logical operation on float vectors than -- to cast them temporarily to integer vectors. */ -- if (op1 -- && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL -- && (SUBREG_P (op2) || GET_CODE (op2) == CONST_VECTOR) -- && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT -- && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode) -- && SUBREG_BYTE (op1) == 0 -- && (GET_CODE (op2) == CONST_VECTOR -- || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2)) -- && SUBREG_BYTE (op2) == 0)) -- && can_create_pseudo_p ()) -- { -- rtx dst; -- switch (GET_MODE (SUBREG_REG (op1))) -- { -- case E_V4SFmode: -- case E_V8SFmode: -- case E_V16SFmode: -- case E_V2DFmode: -- case E_V4DFmode: -- case E_V8DFmode: -- dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1))); -- if (GET_CODE (op2) == CONST_VECTOR) -- { -- op2 = gen_lowpart (GET_MODE (dst), op2); -- op2 = force_reg (GET_MODE (dst), op2); -- } -- else -- { -- op1 = operands[1]; -- op2 = SUBREG_REG (operands[2]); -- if (!vector_operand (op2, GET_MODE (dst))) -- op2 = force_reg (GET_MODE (dst), op2); -- } -- op1 = SUBREG_REG (op1); -- if (!vector_operand (op1, GET_MODE (dst))) -- op1 = force_reg (GET_MODE (dst), op1); -- emit_insn (gen_rtx_SET (dst, -- gen_rtx_fmt_ee (code, GET_MODE (dst), -- op1, op2))); -- emit_move_insn (operands[0], gen_lowpart (mode, dst)); -- return; -- default: -- break; -- } -- } -- if (!vector_operand (operands[1], mode)) -- operands[1] = force_reg (mode, operands[1]); -- if (!vector_operand (operands[2], mode)) -- operands[2] = force_reg (mode, operands[2]); -- ix86_fixup_binary_operands_no_copy (code, mode, operands); -- emit_insn (gen_rtx_SET (operands[0], -- gen_rtx_fmt_ee (code, mode, operands[1], -- operands[2]))); -+ for (s = ix86_stack_locals; s; s = s->next) -+ if (s->rtl != NULL_RTX) -+ instantiate_decl_rtl (s->rtl); - } -- --/* Return TRUE or FALSE depending on whether the binary operator meets the -- appropriate constraints. */ -+ -+/* Check whether x86 address PARTS is a pc-relative address. */ - - bool --ix86_binary_operator_ok (enum rtx_code code, machine_mode mode, -- rtx operands[3]) -+ix86_rip_relative_addr_p (struct ix86_address *parts) - { -- rtx dst = operands[0]; -- rtx src1 = operands[1]; -- rtx src2 = operands[2]; -- -- /* Both source operands cannot be in memory. */ -- if (MEM_P (src1) && MEM_P (src2)) -- return false; -- -- /* Canonicalize operand order for commutative operators. */ -- if (ix86_swap_binary_operands_p (code, mode, operands)) -- std::swap (src1, src2); -+ rtx base, index, disp; - -- /* If the destination is memory, we must have a matching source operand. */ -- if (MEM_P (dst) && !rtx_equal_p (dst, src1)) -- return false; -+ base = parts->base; -+ index = parts->index; -+ disp = parts->disp; - -- /* Source 1 cannot be a constant. */ -- if (CONSTANT_P (src1)) -- return false; -+ if (disp && !base && !index) -+ { -+ if (TARGET_64BIT) -+ { -+ rtx symbol = disp; - -- /* Source 1 cannot be a non-matching memory. */ -- if (MEM_P (src1) && !rtx_equal_p (dst, src1)) -- /* Support "andhi/andsi/anddi" as a zero-extending move. */ -- return (code == AND -- && (mode == HImode -- || mode == SImode -- || (TARGET_64BIT && mode == DImode)) -- && satisfies_constraint_L (src2)); -+ if (GET_CODE (disp) == CONST) -+ symbol = XEXP (disp, 0); -+ if (GET_CODE (symbol) == PLUS -+ && CONST_INT_P (XEXP (symbol, 1))) -+ symbol = XEXP (symbol, 0); - -- return true; -+ if (GET_CODE (symbol) == LABEL_REF -+ || (GET_CODE (symbol) == SYMBOL_REF -+ && SYMBOL_REF_TLS_MODEL (symbol) == 0) -+ || (GET_CODE (symbol) == UNSPEC -+ && (XINT (symbol, 1) == UNSPEC_GOTPCREL -+ || XINT (symbol, 1) == UNSPEC_PCREL -+ || XINT (symbol, 1) == UNSPEC_GOTNTPOFF))) -+ return true; -+ } -+ } -+ return false; - } - --/* Attempt to expand a unary operator. Make the expansion closer to the -- actual machine, then just general_operand, which will allow 2 separate -- memory references (one output, one input) in a single insn. */ -+/* Calculate the length of the memory address in the instruction encoding. -+ Includes addr32 prefix, does not include the one-byte modrm, opcode, -+ or other prefixes. We never generate addr32 prefix for LEA insn. */ - --void --ix86_expand_unary_operator (enum rtx_code code, machine_mode mode, -- rtx operands[]) -+int -+memory_address_length (rtx addr, bool lea) - { -- bool matching_memory = false; -- rtx src, dst, op, clob; -- -- dst = operands[0]; -- src = operands[1]; -+ struct ix86_address parts; -+ rtx base, index, disp; -+ int len; -+ int ok; - -- /* If the destination is memory, and we do not have matching source -- operands, do things in registers. */ -- if (MEM_P (dst)) -- { -- if (rtx_equal_p (dst, src)) -- matching_memory = true; -- else -- dst = gen_reg_rtx (mode); -- } -+ if (GET_CODE (addr) == PRE_DEC -+ || GET_CODE (addr) == POST_INC -+ || GET_CODE (addr) == PRE_MODIFY -+ || GET_CODE (addr) == POST_MODIFY) -+ return 0; - -- /* When source operand is memory, destination must match. */ -- if (MEM_P (src) && !matching_memory) -- src = force_reg (mode, src); -+ ok = ix86_decompose_address (addr, &parts); -+ gcc_assert (ok); - -- /* Emit the instruction. */ -+ len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1; - -- op = gen_rtx_SET (dst, gen_rtx_fmt_e (code, mode, src)); -+ /* If this is not LEA instruction, add the length of addr32 prefix. */ -+ if (TARGET_64BIT && !lea -+ && (SImode_address_operand (addr, VOIDmode) -+ || (parts.base && GET_MODE (parts.base) == SImode) -+ || (parts.index && GET_MODE (parts.index) == SImode))) -+ len++; - -- if (code == NOT) -- emit_insn (op); -- else -- { -- clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); -- emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); -- } -+ base = parts.base; -+ index = parts.index; -+ disp = parts.disp; - -- /* Fix up the destination if needed. */ -- if (dst != operands[0]) -- emit_move_insn (operands[0], dst); --} -+ if (base && SUBREG_P (base)) -+ base = SUBREG_REG (base); -+ if (index && SUBREG_P (index)) -+ index = SUBREG_REG (index); - --/* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and -- divisor are within the range [0-255]. */ -+ gcc_assert (base == NULL_RTX || REG_P (base)); -+ gcc_assert (index == NULL_RTX || REG_P (index)); - --void --ix86_split_idivmod (machine_mode mode, rtx operands[], -- bool signed_p) --{ -- rtx_code_label *end_label, *qimode_label; -- rtx div, mod; -- rtx_insn *insn; -- rtx scratch, tmp0, tmp1, tmp2; -- rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx); -- rtx (*gen_zero_extend) (rtx, rtx); -- rtx (*gen_test_ccno_1) (rtx, rtx); -+ /* Rule of thumb: -+ - esp as the base always wants an index, -+ - ebp as the base always wants a displacement, -+ - r12 as the base always wants an index, -+ - r13 as the base always wants a displacement. */ - -- switch (mode) -+ /* Register Indirect. */ -+ if (base && !index && !disp) - { -- case E_SImode: -- if (GET_MODE (operands[0]) == SImode) -- { -- if (GET_MODE (operands[1]) == SImode) -- gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1; -- else -- gen_divmod4_1 -- = signed_p ? gen_divmodsi4_zext_2 : gen_udivmodsi4_zext_2; -- gen_zero_extend = gen_zero_extendqisi2; -- } -- else -- { -- gen_divmod4_1 -- = signed_p ? gen_divmodsi4_zext_1 : gen_udivmodsi4_zext_1; -- gen_zero_extend = gen_zero_extendqidi2; -- } -- gen_test_ccno_1 = gen_testsi_ccno_1; -- break; -- case E_DImode: -- gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1; -- gen_test_ccno_1 = gen_testdi_ccno_1; -- gen_zero_extend = gen_zero_extendqidi2; -- break; -- default: -- gcc_unreachable (); -+ /* esp (for its index) and ebp (for its displacement) need -+ the two-byte modrm form. Similarly for r12 and r13 in 64-bit -+ code. */ -+ if (base == arg_pointer_rtx -+ || base == frame_pointer_rtx -+ || REGNO (base) == SP_REG -+ || REGNO (base) == BP_REG -+ || REGNO (base) == R12_REG -+ || REGNO (base) == R13_REG) -+ len++; - } - -- end_label = gen_label_rtx (); -- qimode_label = gen_label_rtx (); -- -- scratch = gen_reg_rtx (mode); -- -- /* Use 8bit unsigned divimod if dividend and divisor are within -- the range [0-255]. */ -- emit_move_insn (scratch, operands[2]); -- scratch = expand_simple_binop (mode, IOR, scratch, operands[3], -- scratch, 1, OPTAB_DIRECT); -- emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100))); -- tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG); -- tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx); -- tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0, -- gen_rtx_LABEL_REF (VOIDmode, qimode_label), -- pc_rtx); -- insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp0)); -- predict_jump (REG_BR_PROB_BASE * 50 / 100); -- JUMP_LABEL (insn) = qimode_label; -- -- /* Generate original signed/unsigned divimod. */ -- div = gen_divmod4_1 (operands[0], operands[1], -- operands[2], operands[3]); -- emit_insn (div); -- -- /* Branch to the end. */ -- emit_jump_insn (gen_jump (end_label)); -- emit_barrier (); -- -- /* Generate 8bit unsigned divide. */ -- emit_label (qimode_label); -- /* Don't use operands[0] for result of 8bit divide since not all -- registers support QImode ZERO_EXTRACT. */ -- tmp0 = lowpart_subreg (HImode, scratch, mode); -- tmp1 = lowpart_subreg (HImode, operands[2], mode); -- tmp2 = lowpart_subreg (QImode, operands[3], mode); -- emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2)); -- -- if (signed_p) -+ /* Direct Addressing. In 64-bit mode mod 00 r/m 5 -+ is not disp32, but disp32(%rip), so for disp32 -+ SIB byte is needed, unless print_operand_address -+ optimizes it into disp32(%rip) or (%rip) is implied -+ by UNSPEC. */ -+ else if (disp && !base && !index) - { -- div = gen_rtx_DIV (mode, operands[2], operands[3]); -- mod = gen_rtx_MOD (mode, operands[2], operands[3]); -+ len += 4; -+ if (!ix86_rip_relative_addr_p (&parts)) -+ len++; - } - else - { -- div = gen_rtx_UDIV (mode, operands[2], operands[3]); -- mod = gen_rtx_UMOD (mode, operands[2], operands[3]); -- } -- if (mode == SImode) -- { -- if (GET_MODE (operands[0]) != SImode) -- div = gen_rtx_ZERO_EXTEND (DImode, div); -- if (GET_MODE (operands[1]) != SImode) -- mod = gen_rtx_ZERO_EXTEND (DImode, mod); -- } -+ /* Find the length of the displacement constant. */ -+ if (disp) -+ { -+ if (base && satisfies_constraint_K (disp)) -+ len += 1; -+ else -+ len += 4; -+ } -+ /* ebp always wants a displacement. Similarly r13. */ -+ else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG)) -+ len++; - -- /* Extract remainder from AH. */ -- tmp1 = gen_rtx_ZERO_EXTRACT (GET_MODE (operands[1]), -- tmp0, GEN_INT (8), GEN_INT (8)); -- if (REG_P (operands[1])) -- insn = emit_move_insn (operands[1], tmp1); -- else -- { -- /* Need a new scratch register since the old one has result -- of 8bit divide. */ -- scratch = gen_reg_rtx (GET_MODE (operands[1])); -- emit_move_insn (scratch, tmp1); -- insn = emit_move_insn (operands[1], scratch); -+ /* An index requires the two-byte modrm form.... */ -+ if (index -+ /* ...like esp (or r12), which always wants an index. */ -+ || base == arg_pointer_rtx -+ || base == frame_pointer_rtx -+ || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG))) -+ len++; - } -- set_unique_reg_note (insn, REG_EQUAL, mod); - -- /* Zero extend quotient from AL. */ -- tmp1 = gen_lowpart (QImode, tmp0); -- insn = emit_insn (gen_zero_extend (operands[0], tmp1)); -- set_unique_reg_note (insn, REG_EQUAL, div); -- -- emit_label (end_label); -+ return len; - } - --#define LEA_MAX_STALL (3) --#define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1) -+/* Compute default value for "length_immediate" attribute. When SHORTFORM -+ is set, expect that insn have 8bit immediate alternative. */ -+int -+ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform) -+{ -+ int len = 0; -+ int i; -+ extract_insn_cached (insn); -+ for (i = recog_data.n_operands - 1; i >= 0; --i) -+ if (CONSTANT_P (recog_data.operand[i])) -+ { -+ enum attr_mode mode = get_attr_mode (insn); - --/* Increase given DISTANCE in half-cycles according to -- dependencies between PREV and NEXT instructions. -- Add 1 half-cycle if there is no dependency and -- go to next cycle if there is some dependecy. */ -+ gcc_assert (!len); -+ if (shortform && CONST_INT_P (recog_data.operand[i])) -+ { -+ HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]); -+ switch (mode) -+ { -+ case MODE_QI: -+ len = 1; -+ continue; -+ case MODE_HI: -+ ival = trunc_int_for_mode (ival, HImode); -+ break; -+ case MODE_SI: -+ ival = trunc_int_for_mode (ival, SImode); -+ break; -+ default: -+ break; -+ } -+ if (IN_RANGE (ival, -128, 127)) -+ { -+ len = 1; -+ continue; -+ } -+ } -+ switch (mode) -+ { -+ case MODE_QI: -+ len = 1; -+ break; -+ case MODE_HI: -+ len = 2; -+ break; -+ case MODE_SI: -+ len = 4; -+ break; -+ /* Immediates for DImode instructions are encoded -+ as 32bit sign extended values. */ -+ case MODE_DI: -+ len = 4; -+ break; -+ default: -+ fatal_insn ("unknown insn mode", insn); -+ } -+ } -+ return len; -+} - --static unsigned int --increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance) -+/* Compute default value for "length_address" attribute. */ -+int -+ix86_attr_length_address_default (rtx_insn *insn) - { -- df_ref def, use; -+ int i; - -- if (!prev || !next) -- return distance + (distance & 1) + 2; -+ if (get_attr_type (insn) == TYPE_LEA) -+ { -+ rtx set = PATTERN (insn), addr; - -- if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev)) -- return distance + 1; -+ if (GET_CODE (set) == PARALLEL) -+ set = XVECEXP (set, 0, 0); - -- FOR_EACH_INSN_USE (use, next) -- FOR_EACH_INSN_DEF (def, prev) -- if (!DF_REF_IS_ARTIFICIAL (def) -- && DF_REF_REGNO (use) == DF_REF_REGNO (def)) -- return distance + (distance & 1) + 2; -+ gcc_assert (GET_CODE (set) == SET); - -- return distance + 1; --} -+ addr = SET_SRC (set); - --/* Function checks if instruction INSN defines register number -- REGNO1 or REGNO2. */ -+ return memory_address_length (addr, true); -+ } - --static bool --insn_defines_reg (unsigned int regno1, unsigned int regno2, -- rtx_insn *insn) --{ -- df_ref def; -+ extract_insn_cached (insn); -+ for (i = recog_data.n_operands - 1; i >= 0; --i) -+ { -+ rtx op = recog_data.operand[i]; -+ if (MEM_P (op)) -+ { -+ constrain_operands_cached (insn, reload_completed); -+ if (which_alternative != -1) -+ { -+ const char *constraints = recog_data.constraints[i]; -+ int alt = which_alternative; - -- FOR_EACH_INSN_DEF (def, insn) -- if (DF_REF_REG_DEF_P (def) -- && !DF_REF_IS_ARTIFICIAL (def) -- && (regno1 == DF_REF_REGNO (def) -- || regno2 == DF_REF_REGNO (def))) -- return true; -+ while (*constraints == '=' || *constraints == '+') -+ constraints++; -+ while (alt-- > 0) -+ while (*constraints++ != ',') -+ ; -+ /* Skip ignored operands. */ -+ if (*constraints == 'X') -+ continue; -+ } - -- return false; -+ int len = memory_address_length (XEXP (op, 0), false); -+ -+ /* Account for segment prefix for non-default addr spaces. */ -+ if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op))) -+ len++; -+ -+ return len; -+ } -+ } -+ return 0; - } - --/* Function checks if instruction INSN uses register number -- REGNO as a part of address expression. */ -+/* Compute default value for "length_vex" attribute. It includes -+ 2 or 3 byte VEX prefix and 1 opcode byte. */ - --static bool --insn_uses_reg_mem (unsigned int regno, rtx insn) -+int -+ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode, -+ bool has_vex_w) - { -- df_ref use; -+ int i; - -- FOR_EACH_INSN_USE (use, insn) -- if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use)) -- return true; -+ /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3 -+ byte VEX prefix. */ -+ if (!has_0f_opcode || has_vex_w) -+ return 3 + 1; - -- return false; --} -+ /* We can always use 2 byte VEX prefix in 32bit. */ -+ if (!TARGET_64BIT) -+ return 2 + 1; - --/* Search backward for non-agu definition of register number REGNO1 -- or register number REGNO2 in basic block starting from instruction -- START up to head of basic block or instruction INSN. -+ extract_insn_cached (insn); - -- Function puts true value into *FOUND var if definition was found -- and false otherwise. -+ for (i = recog_data.n_operands - 1; i >= 0; --i) -+ if (REG_P (recog_data.operand[i])) -+ { -+ /* REX.W bit uses 3 byte VEX prefix. */ -+ if (GET_MODE (recog_data.operand[i]) == DImode -+ && GENERAL_REG_P (recog_data.operand[i])) -+ return 3 + 1; -+ } -+ else -+ { -+ /* REX.X or REX.B bits use 3 byte VEX prefix. */ -+ if (MEM_P (recog_data.operand[i]) -+ && x86_extended_reg_mentioned_p (recog_data.operand[i])) -+ return 3 + 1; -+ } - -- Distance in half-cycles between START and found instruction or head -- of BB is added to DISTANCE and returned. */ -+ return 2 + 1; -+} -+ - --static int --distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2, -- rtx_insn *insn, int distance, -- rtx_insn *start, bool *found) --{ -- basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL; -- rtx_insn *prev = start; -- rtx_insn *next = NULL; -+static bool -+ix86_class_likely_spilled_p (reg_class_t); - -- *found = false; -+/* Returns true if lhs of insn is HW function argument register and set up -+ is_spilled to true if it is likely spilled HW register. */ -+static bool -+insn_is_function_arg (rtx insn, bool* is_spilled) -+{ -+ rtx dst; - -- while (prev -- && prev != insn -- && distance < LEA_SEARCH_THRESHOLD) -+ if (!NONDEBUG_INSN_P (insn)) -+ return false; -+ /* Call instructions are not movable, ignore it. */ -+ if (CALL_P (insn)) -+ return false; -+ insn = PATTERN (insn); -+ if (GET_CODE (insn) == PARALLEL) -+ insn = XVECEXP (insn, 0, 0); -+ if (GET_CODE (insn) != SET) -+ return false; -+ dst = SET_DEST (insn); -+ if (REG_P (dst) && HARD_REGISTER_P (dst) -+ && ix86_function_arg_regno_p (REGNO (dst))) - { -- if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev)) -- { -- distance = increase_distance (prev, next, distance); -- if (insn_defines_reg (regno1, regno2, prev)) -- { -- if (recog_memoized (prev) < 0 -- || get_attr_type (prev) != TYPE_LEA) -- { -- *found = true; -- return distance; -- } -- } -- -- next = prev; -- } -- if (prev == BB_HEAD (bb)) -- break; -- -- prev = PREV_INSN (prev); -+ /* Is it likely spilled HW register? */ -+ if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst)) -+ && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst)))) -+ *is_spilled = true; -+ return true; - } -- -- return distance; -+ return false; - } - --/* Search backward for non-agu definition of register number REGNO1 -- or register number REGNO2 in INSN's basic block until -- 1. Pass LEA_SEARCH_THRESHOLD instructions, or -- 2. Reach neighbor BBs boundary, or -- 3. Reach agu definition. -- Returns the distance between the non-agu definition point and INSN. -- If no definition point, returns -1. */ -- --static int --distance_non_agu_define (unsigned int regno1, unsigned int regno2, -- rtx_insn *insn) -+/* Add output dependencies for chain of function adjacent arguments if only -+ there is a move to likely spilled HW register. Return first argument -+ if at least one dependence was added or NULL otherwise. */ -+static rtx_insn * -+add_parameter_dependencies (rtx_insn *call, rtx_insn *head) - { -- basic_block bb = BLOCK_FOR_INSN (insn); -- int distance = 0; -- bool found = false; -+ rtx_insn *insn; -+ rtx_insn *last = call; -+ rtx_insn *first_arg = NULL; -+ bool is_spilled = false; - -- if (insn != BB_HEAD (bb)) -- distance = distance_non_agu_define_in_bb (regno1, regno2, insn, -- distance, PREV_INSN (insn), -- &found); -+ head = PREV_INSN (head); - -- if (!found && distance < LEA_SEARCH_THRESHOLD) -+ /* Find nearest to call argument passing instruction. */ -+ while (true) - { -- edge e; -- edge_iterator ei; -- bool simple_loop = false; -- -- FOR_EACH_EDGE (e, ei, bb->preds) -- if (e->src == bb) -- { -- simple_loop = true; -- break; -- } -+ last = PREV_INSN (last); -+ if (last == head) -+ return NULL; -+ if (!NONDEBUG_INSN_P (last)) -+ continue; -+ if (insn_is_function_arg (last, &is_spilled)) -+ break; -+ return NULL; -+ } - -- if (simple_loop) -- distance = distance_non_agu_define_in_bb (regno1, regno2, -- insn, distance, -- BB_END (bb), &found); -- else -+ first_arg = last; -+ while (true) -+ { -+ insn = PREV_INSN (last); -+ if (!INSN_P (insn)) -+ break; -+ if (insn == head) -+ break; -+ if (!NONDEBUG_INSN_P (insn)) - { -- int shortest_dist = -1; -- bool found_in_bb = false; -- -- FOR_EACH_EDGE (e, ei, bb->preds) -- { -- int bb_dist -- = distance_non_agu_define_in_bb (regno1, regno2, -- insn, distance, -- BB_END (e->src), -- &found_in_bb); -- if (found_in_bb) -- { -- if (shortest_dist < 0) -- shortest_dist = bb_dist; -- else if (bb_dist > 0) -- shortest_dist = MIN (bb_dist, shortest_dist); -- -- found = true; -- } -- } -- -- distance = shortest_dist; -+ last = insn; -+ continue; -+ } -+ if (insn_is_function_arg (insn, &is_spilled)) -+ { -+ /* Add output depdendence between two function arguments if chain -+ of output arguments contains likely spilled HW registers. */ -+ if (is_spilled) -+ add_dependence (first_arg, insn, REG_DEP_OUTPUT); -+ first_arg = last = insn; - } -+ else -+ break; - } -- -- /* get_attr_type may modify recog data. We want to make sure -- that recog data is valid for instruction INSN, on which -- distance_non_agu_define is called. INSN is unchanged here. */ -- extract_insn_cached (insn); -- -- if (!found) -- return -1; -- -- return distance >> 1; -+ if (!is_spilled) -+ return NULL; -+ return first_arg; - } - --/* Return the distance in half-cycles between INSN and the next -- insn that uses register number REGNO in memory address added -- to DISTANCE. Return -1 if REGNO0 is set. -- -- Put true value into *FOUND if register usage was found and -- false otherwise. -- Put true value into *REDEFINED if register redefinition was -- found and false otherwise. */ -- --static int --distance_agu_use_in_bb (unsigned int regno, -- rtx_insn *insn, int distance, rtx_insn *start, -- bool *found, bool *redefined) -+/* Add output or anti dependency from insn to first_arg to restrict its code -+ motion. */ -+static void -+avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn) - { -- basic_block bb = NULL; -- rtx_insn *next = start; -- rtx_insn *prev = NULL; -- -- *found = false; -- *redefined = false; -+ rtx set; -+ rtx tmp; - -- if (start != NULL_RTX) -+ set = single_set (insn); -+ if (!set) -+ return; -+ tmp = SET_DEST (set); -+ if (REG_P (tmp)) - { -- bb = BLOCK_FOR_INSN (start); -- if (start != BB_HEAD (bb)) -- /* If insn and start belong to the same bb, set prev to insn, -- so the call to increase_distance will increase the distance -- between insns by 1. */ -- prev = insn; -+ /* Add output dependency to the first function argument. */ -+ add_dependence (first_arg, insn, REG_DEP_OUTPUT); -+ return; - } -+ /* Add anti dependency. */ -+ add_dependence (first_arg, insn, REG_DEP_ANTI); -+} - -- while (next -- && next != insn -- && distance < LEA_SEARCH_THRESHOLD) -+/* Avoid cross block motion of function argument through adding dependency -+ from the first non-jump instruction in bb. */ -+static void -+add_dependee_for_func_arg (rtx_insn *arg, basic_block bb) -+{ -+ rtx_insn *insn = BB_END (bb); -+ -+ while (insn) - { -- if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next)) -+ if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn)) - { -- distance = increase_distance(prev, next, distance); -- if (insn_uses_reg_mem (regno, next)) -- { -- /* Return DISTANCE if OP0 is used in memory -- address in NEXT. */ -- *found = true; -- return distance; -- } -- -- if (insn_defines_reg (regno, INVALID_REGNUM, next)) -+ rtx set = single_set (insn); -+ if (set) - { -- /* Return -1 if OP0 is set in NEXT. */ -- *redefined = true; -- return -1; -+ avoid_func_arg_motion (arg, insn); -+ return; - } -- -- prev = next; - } -- -- if (next == BB_END (bb)) -- break; -- -- next = NEXT_INSN (next); -+ if (insn == BB_HEAD (bb)) -+ return; -+ insn = PREV_INSN (insn); - } -- -- return distance; - } - --/* Return the distance between INSN and the next insn that uses -- register number REGNO0 in memory address. Return -1 if no such -- a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */ -+/* Hook for pre-reload schedule - avoid motion of function arguments -+ passed in likely spilled HW registers. */ -+static void -+ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail) -+{ -+ rtx_insn *insn; -+ rtx_insn *first_arg = NULL; -+ if (reload_completed) -+ return; -+ while (head != tail && DEBUG_INSN_P (head)) -+ head = NEXT_INSN (head); -+ for (insn = tail; insn != head; insn = PREV_INSN (insn)) -+ if (INSN_P (insn) && CALL_P (insn)) -+ { -+ first_arg = add_parameter_dependencies (insn, head); -+ if (first_arg) -+ { -+ /* Add dependee for first argument to predecessors if only -+ region contains more than one block. */ -+ basic_block bb = BLOCK_FOR_INSN (insn); -+ int rgn = CONTAINING_RGN (bb->index); -+ int nr_blks = RGN_NR_BLOCKS (rgn); -+ /* Skip trivial regions and region head blocks that can have -+ predecessors outside of region. */ -+ if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0) -+ { -+ edge e; -+ edge_iterator ei; -+ -+ /* Regions are SCCs with the exception of selective -+ scheduling with pipelining of outer blocks enabled. -+ So also check that immediate predecessors of a non-head -+ block are in the same region. */ -+ FOR_EACH_EDGE (e, ei, bb->preds) -+ { -+ /* Avoid creating of loop-carried dependencies through -+ using topological ordering in the region. */ -+ if (rgn == CONTAINING_RGN (e->src->index) -+ && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index)) -+ add_dependee_for_func_arg (first_arg, e->src); -+ } -+ } -+ insn = first_arg; -+ if (insn == head) -+ break; -+ } -+ } -+ else if (first_arg) -+ avoid_func_arg_motion (first_arg, insn); -+} - -+/* Hook for pre-reload schedule - set priority of moves from likely spilled -+ HW registers to maximum, to schedule them at soon as possible. These are -+ moves from function argument registers at the top of the function entry -+ and moves from function return value registers after call. */ - static int --distance_agu_use (unsigned int regno0, rtx_insn *insn) -+ix86_adjust_priority (rtx_insn *insn, int priority) - { -- basic_block bb = BLOCK_FOR_INSN (insn); -- int distance = 0; -- bool found = false; -- bool redefined = false; -+ rtx set; - -- if (insn != BB_END (bb)) -- distance = distance_agu_use_in_bb (regno0, insn, distance, -- NEXT_INSN (insn), -- &found, &redefined); -+ if (reload_completed) -+ return priority; - -- if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD) -+ if (!NONDEBUG_INSN_P (insn)) -+ return priority; -+ -+ set = single_set (insn); -+ if (set) - { -- edge e; -- edge_iterator ei; -- bool simple_loop = false; -+ rtx tmp = SET_SRC (set); -+ if (REG_P (tmp) -+ && HARD_REGISTER_P (tmp) -+ && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp)) -+ && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp)))) -+ return current_sched_info->sched_max_insns_priority; -+ } - -- FOR_EACH_EDGE (e, ei, bb->succs) -- if (e->dest == bb) -- { -- simple_loop = true; -- break; -- } -+ return priority; -+} - -- if (simple_loop) -- distance = distance_agu_use_in_bb (regno0, insn, -- distance, BB_HEAD (bb), -- &found, &redefined); -- else -+/* Prepare for scheduling pass. */ -+static void -+ix86_sched_init_global (FILE *, int, int) -+{ -+ /* Install scheduling hooks for current CPU. Some of these hooks are used -+ in time-critical parts of the scheduler, so we only set them up when -+ they are actually used. */ -+ switch (ix86_tune) -+ { -+ case PROCESSOR_CORE2: -+ case PROCESSOR_NEHALEM: -+ case PROCESSOR_SANDYBRIDGE: -+ case PROCESSOR_HASWELL: -+ case PROCESSOR_GENERIC: -+ /* Do not perform multipass scheduling for pre-reload schedule -+ to save compile time. */ -+ if (reload_completed) - { -- int shortest_dist = -1; -- bool found_in_bb = false; -- bool redefined_in_bb = false; -- -- FOR_EACH_EDGE (e, ei, bb->succs) -- { -- int bb_dist -- = distance_agu_use_in_bb (regno0, insn, -- distance, BB_HEAD (e->dest), -- &found_in_bb, &redefined_in_bb); -- if (found_in_bb) -- { -- if (shortest_dist < 0) -- shortest_dist = bb_dist; -- else if (bb_dist > 0) -- shortest_dist = MIN (bb_dist, shortest_dist); -- -- found = true; -- } -- } -- -- distance = shortest_dist; -+ ix86_core2i7_init_hooks (); -+ break; - } -+ /* Fall through. */ -+ default: -+ targetm.sched.dfa_post_advance_cycle = NULL; -+ targetm.sched.first_cycle_multipass_init = NULL; -+ targetm.sched.first_cycle_multipass_begin = NULL; -+ targetm.sched.first_cycle_multipass_issue = NULL; -+ targetm.sched.first_cycle_multipass_backtrack = NULL; -+ targetm.sched.first_cycle_multipass_end = NULL; -+ targetm.sched.first_cycle_multipass_fini = NULL; -+ break; - } -- -- if (!found || redefined) -- return -1; -- -- return distance >> 1; - } - --/* Define this macro to tune LEA priority vs ADD, it take effect when -- there is a dilemma of choicing LEA or ADD -- Negative value: ADD is more preferred than LEA -- Zero: Netrual -- Positive value: LEA is more preferred than ADD*/ --#define IX86_LEA_PRIORITY 0 -- --/* Return true if usage of lea INSN has performance advantage -- over a sequence of instructions. Instructions sequence has -- SPLIT_COST cycles higher latency than lea latency. */ -+ -+/* Implement TARGET_STATIC_RTX_ALIGNMENT. */ - --static bool --ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1, -- unsigned int regno2, int split_cost, bool has_scale) -+static HOST_WIDE_INT -+ix86_static_rtx_alignment (machine_mode mode) - { -- int dist_define, dist_use; -- -- /* For Silvermont if using a 2-source or 3-source LEA for -- non-destructive destination purposes, or due to wanting -- ability to use SCALE, the use of LEA is justified. */ -- if (TARGET_SILVERMONT || TARGET_GOLDMONT || TARGET_GOLDMONT_PLUS -- || TARGET_TREMONT || TARGET_INTEL) -- { -- if (has_scale) -- return true; -- if (split_cost < 1) -- return false; -- if (regno0 == regno1 || regno0 == regno2) -- return false; -- return true; -- } -+ if (mode == DFmode) -+ return 64; -+ if (ALIGN_MODE_128 (mode)) -+ return MAX (128, GET_MODE_ALIGNMENT (mode)); -+ return GET_MODE_ALIGNMENT (mode); -+} - -- dist_define = distance_non_agu_define (regno1, regno2, insn); -- dist_use = distance_agu_use (regno0, insn); -+/* Implement TARGET_CONSTANT_ALIGNMENT. */ - -- if (dist_define < 0 || dist_define >= LEA_MAX_STALL) -+static HOST_WIDE_INT -+ix86_constant_alignment (const_tree exp, HOST_WIDE_INT align) -+{ -+ if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST -+ || TREE_CODE (exp) == INTEGER_CST) - { -- /* If there is no non AGU operand definition, no AGU -- operand usage and split cost is 0 then both lea -- and non lea variants have same priority. Currently -- we prefer lea for 64 bit code and non lea on 32 bit -- code. */ -- if (dist_use < 0 && split_cost == 0) -- return TARGET_64BIT || IX86_LEA_PRIORITY; -- else -- return true; -+ machine_mode mode = TYPE_MODE (TREE_TYPE (exp)); -+ HOST_WIDE_INT mode_align = ix86_static_rtx_alignment (mode); -+ return MAX (mode_align, align); - } -+ else if (!optimize_size && TREE_CODE (exp) == STRING_CST -+ && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD) -+ return BITS_PER_WORD; - -- /* With longer definitions distance lea is more preferable. -- Here we change it to take into account splitting cost and -- lea priority. */ -- dist_define += split_cost + IX86_LEA_PRIORITY; -+ return align; -+} - -- /* If there is no use in memory addess then we just check -- that split cost exceeds AGU stall. */ -- if (dist_use < 0) -- return dist_define > LEA_MAX_STALL; -+/* Implement TARGET_EMPTY_RECORD_P. */ - -- /* If this insn has both backward non-agu dependence and forward -- agu dependence, the one with short distance takes effect. */ -- return dist_define >= dist_use; -+static bool -+ix86_is_empty_record (const_tree type) -+{ -+ if (!TARGET_64BIT) -+ return false; -+ return default_is_empty_record (type); - } - --/* Return true if it is legal to clobber flags by INSN and -- false otherwise. */ -+/* Implement TARGET_WARN_PARAMETER_PASSING_ABI. */ - --static bool --ix86_ok_to_clobber_flags (rtx_insn *insn) -+static void -+ix86_warn_parameter_passing_abi (cumulative_args_t cum_v, tree type) - { -- basic_block bb = BLOCK_FOR_INSN (insn); -- df_ref use; -- bitmap live; -+ CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); - -- while (insn) -- { -- if (NONDEBUG_INSN_P (insn)) -- { -- FOR_EACH_INSN_USE (use, insn) -- if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG) -- return false; -+ if (!cum->warn_empty) -+ return; - -- if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn)) -- return true; -- } -- -- if (insn == BB_END (bb)) -- break; -- -- insn = NEXT_INSN (insn); -- } -- -- live = df_get_live_out(bb); -- return !REGNO_REG_SET_P (live, FLAGS_REG); --} -- --/* Return true if we need to split op0 = op1 + op2 into a sequence of -- move and add to avoid AGU stalls. */ -+ if (!TYPE_EMPTY_P (type)) -+ return; - --bool --ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[]) --{ -- unsigned int regno0, regno1, regno2; -+ /* Don't warn if the function isn't visible outside of the TU. */ -+ if (cum->decl && !TREE_PUBLIC (cum->decl)) -+ return; - -- /* Check if we need to optimize. */ -- if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun)) -- return false; -+ const_tree ctx = get_ultimate_context (cum->decl); -+ if (ctx != NULL_TREE -+ && !TRANSLATION_UNIT_WARN_EMPTY_P (ctx)) -+ return; - -- /* Check it is correct to split here. */ -- if (!ix86_ok_to_clobber_flags(insn)) -- return false; -+ /* If the actual size of the type is zero, then there is no change -+ in how objects of this size are passed. */ -+ if (int_size_in_bytes (type) == 0) -+ return; - -- regno0 = true_regnum (operands[0]); -- regno1 = true_regnum (operands[1]); -- regno2 = true_regnum (operands[2]); -+ warning (OPT_Wabi, "empty class %qT parameter passing ABI " -+ "changes in %<-fabi-version=12%> (GCC 8)", type); - -- /* We need to split only adds with non destructive -- destination operand. */ -- if (regno0 == regno1 || regno0 == regno2) -- return false; -- else -- return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false); -+ /* Only warn once. */ -+ cum->warn_empty = false; - } - --/* Return true if we should emit lea instruction instead of mov -- instruction. */ -+/* This hook returns name of multilib ABI. */ - --bool --ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[]) -+static const char * -+ix86_get_multilib_abi_name (void) - { -- unsigned int regno0, regno1; -- -- /* Check if we need to optimize. */ -- if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun)) -- return false; -- -- /* Use lea for reg to reg moves only. */ -- if (!REG_P (operands[0]) || !REG_P (operands[1])) -- return false; -- -- regno0 = true_regnum (operands[0]); -- regno1 = true_regnum (operands[1]); -- -- return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false); -+ if (!(TARGET_64BIT_P (ix86_isa_flags))) -+ return "i386"; -+ else if (TARGET_X32_P (ix86_isa_flags)) -+ return "x32"; -+ else -+ return "x86_64"; - } - --/* Return true if we need to split lea into a sequence of -- instructions to avoid AGU stalls. */ -+/* Compute the alignment for a variable for Intel MCU psABI. TYPE is -+ the data type, and ALIGN is the alignment that the object would -+ ordinarily have. */ - --bool --ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[]) -+static int -+iamcu_alignment (tree type, int align) - { -- unsigned int regno0, regno1, regno2; -- int split_cost; -- struct ix86_address parts; -- int ok; -- -- /* Check we need to optimize. */ -- if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun)) -- return false; -- -- /* The "at least two components" test below might not catch simple -- move or zero extension insns if parts.base is non-NULL and parts.disp -- is const0_rtx as the only components in the address, e.g. if the -- register is %rbp or %r13. As this test is much cheaper and moves or -- zero extensions are the common case, do this check first. */ -- if (REG_P (operands[1]) -- || (SImode_address_operand (operands[1], VOIDmode) -- && REG_P (XEXP (operands[1], 0)))) -- return false; -- -- /* Check if it is OK to split here. */ -- if (!ix86_ok_to_clobber_flags (insn)) -- return false; -- -- ok = ix86_decompose_address (operands[1], &parts); -- gcc_assert (ok); -- -- /* There should be at least two components in the address. */ -- if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX) -- + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2) -- return false; -- -- /* We should not split into add if non legitimate pic -- operand is used as displacement. */ -- if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp)) -- return false; -- -- regno0 = true_regnum (operands[0]) ; -- regno1 = INVALID_REGNUM; -- regno2 = INVALID_REGNUM; -- -- if (parts.base) -- regno1 = true_regnum (parts.base); -- if (parts.index) -- regno2 = true_regnum (parts.index); -+ machine_mode mode; - -- split_cost = 0; -+ if (align < 32 || TYPE_USER_ALIGN (type)) -+ return align; - -- /* Compute how many cycles we will add to execution time -- if split lea into a sequence of instructions. */ -- if (parts.base || parts.index) -+ /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4 -+ bytes. */ -+ mode = TYPE_MODE (strip_array_types (type)); -+ switch (GET_MODE_CLASS (mode)) - { -- /* Have to use mov instruction if non desctructive -- destination form is used. */ -- if (regno1 != regno0 && regno2 != regno0) -- split_cost += 1; -- -- /* Have to add index to base if both exist. */ -- if (parts.base && parts.index) -- split_cost += 1; -- -- /* Have to use shift and adds if scale is 2 or greater. */ -- if (parts.scale > 1) -- { -- if (regno0 != regno1) -- split_cost += 1; -- else if (regno2 == regno0) -- split_cost += 4; -- else -- split_cost += parts.scale; -- } -- -- /* Have to use add instruction with immediate if -- disp is non zero. */ -- if (parts.disp && parts.disp != const0_rtx) -- split_cost += 1; -- -- /* Subtract the price of lea. */ -- split_cost -= 1; -+ case MODE_INT: -+ case MODE_COMPLEX_INT: -+ case MODE_COMPLEX_FLOAT: -+ case MODE_FLOAT: -+ case MODE_DECIMAL_FLOAT: -+ return 32; -+ default: -+ return align; - } -- -- return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost, -- parts.scale > 1); - } - --/* Emit x86 binary operand CODE in mode MODE, where the first operand -- matches destination. RTX includes clobber of FLAGS_REG. */ -+/* Compute the alignment for a static variable. -+ TYPE is the data type, and ALIGN is the alignment that -+ the object would ordinarily have. The value of this function is used -+ instead of that alignment to align the object. */ - --static void --ix86_emit_binop (enum rtx_code code, machine_mode mode, -- rtx dst, rtx src) -+int -+ix86_data_alignment (tree type, unsigned int align, bool opt) - { -- rtx op, clob; -+ /* GCC 4.8 and earlier used to incorrectly assume this alignment even -+ for symbols from other compilation units or symbols that don't need -+ to bind locally. In order to preserve some ABI compatibility with -+ those compilers, ensure we don't decrease alignment from what we -+ used to assume. */ - -- op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, dst, src)); -- clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); -- -- emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); --} -+ unsigned int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT); - --/* Return true if regno1 def is nearest to the insn. */ -+ /* A data structure, equal or greater than the size of a cache line -+ (64 bytes in the Pentium 4 and other recent Intel processors, including -+ processors based on Intel Core microarchitecture) should be aligned -+ so that its base address is a multiple of a cache line size. */ - --static bool --find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2) --{ -- rtx_insn *prev = insn; -- rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn)); -+ unsigned int max_align -+ = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT); - -- if (insn == start) -- return false; -- while (prev && prev != start) -+ if (max_align < BITS_PER_WORD) -+ max_align = BITS_PER_WORD; -+ -+ switch (ix86_align_data_type) - { -- if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev)) -- { -- prev = PREV_INSN (prev); -- continue; -- } -- if (insn_defines_reg (regno1, INVALID_REGNUM, prev)) -- return true; -- else if (insn_defines_reg (regno2, INVALID_REGNUM, prev)) -- return false; -- prev = PREV_INSN (prev); -+ case ix86_align_data_type_abi: opt = false; break; -+ case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break; -+ case ix86_align_data_type_cacheline: break; - } - -- /* None of the regs is defined in the bb. */ -- return false; --} -- --/* Split lea instructions into a sequence of instructions -- which are executed on ALU to avoid AGU stalls. -- It is assumed that it is allowed to clobber flags register -- at lea position. */ -- --void --ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode) --{ -- unsigned int regno0, regno1, regno2; -- struct ix86_address parts; -- rtx target, tmp; -- int ok, adds; -- -- ok = ix86_decompose_address (operands[1], &parts); -- gcc_assert (ok); -- -- target = gen_lowpart (mode, operands[0]); -- -- regno0 = true_regnum (target); -- regno1 = INVALID_REGNUM; -- regno2 = INVALID_REGNUM; -+ if (TARGET_IAMCU) -+ align = iamcu_alignment (type, align); - -- if (parts.base) -+ if (opt -+ && AGGREGATE_TYPE_P (type) -+ && TYPE_SIZE (type) -+ && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST) - { -- parts.base = gen_lowpart (mode, parts.base); -- regno1 = true_regnum (parts.base); -+ if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align_compat) -+ && align < max_align_compat) -+ align = max_align_compat; -+ if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align) -+ && align < max_align) -+ align = max_align; - } - -- if (parts.index) -+ /* x86-64 ABI requires arrays greater than 16 bytes to be aligned -+ to 16byte boundary. */ -+ if (TARGET_64BIT) - { -- parts.index = gen_lowpart (mode, parts.index); -- regno2 = true_regnum (parts.index); -+ if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE) -+ && TYPE_SIZE (type) -+ && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST -+ && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128) -+ && align < 128) -+ return 128; - } - -- if (parts.disp) -- parts.disp = gen_lowpart (mode, parts.disp); -+ if (!opt) -+ return align; - -- if (parts.scale > 1) -+ if (TREE_CODE (type) == ARRAY_TYPE) -+ { -+ if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) -+ return 64; -+ if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) -+ return 128; -+ } -+ else if (TREE_CODE (type) == COMPLEX_TYPE) - { -- /* Case r1 = r1 + ... */ -- if (regno1 == regno0) -- { -- /* If we have a case r1 = r1 + C * r2 then we -- should use multiplication which is very -- expensive. Assume cost model is wrong if we -- have such case here. */ -- gcc_assert (regno2 != regno0); -- -- for (adds = parts.scale; adds > 0; adds--) -- ix86_emit_binop (PLUS, mode, target, parts.index); -- } -- else -- { -- /* r1 = r2 + r3 * C case. Need to move r3 into r1. */ -- if (regno0 != regno2) -- emit_insn (gen_rtx_SET (target, parts.index)); -- -- /* Use shift for scaling. */ -- ix86_emit_binop (ASHIFT, mode, target, -- GEN_INT (exact_log2 (parts.scale))); -- -- if (parts.base) -- ix86_emit_binop (PLUS, mode, target, parts.base); - -- if (parts.disp && parts.disp != const0_rtx) -- ix86_emit_binop (PLUS, mode, target, parts.disp); -- } -+ if (TYPE_MODE (type) == DCmode && align < 64) -+ return 64; -+ if ((TYPE_MODE (type) == XCmode -+ || TYPE_MODE (type) == TCmode) && align < 128) -+ return 128; - } -- else if (!parts.base && !parts.index) -+ else if ((TREE_CODE (type) == RECORD_TYPE -+ || TREE_CODE (type) == UNION_TYPE -+ || TREE_CODE (type) == QUAL_UNION_TYPE) -+ && TYPE_FIELDS (type)) - { -- gcc_assert(parts.disp); -- emit_insn (gen_rtx_SET (target, parts.disp)); -+ if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) -+ return 64; -+ if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) -+ return 128; - } -- else -+ else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE -+ || TREE_CODE (type) == INTEGER_TYPE) - { -- if (!parts.base) -- { -- if (regno0 != regno2) -- emit_insn (gen_rtx_SET (target, parts.index)); -- } -- else if (!parts.index) -- { -- if (regno0 != regno1) -- emit_insn (gen_rtx_SET (target, parts.base)); -- } -- else -- { -- if (regno0 == regno1) -- tmp = parts.index; -- else if (regno0 == regno2) -- tmp = parts.base; -- else -- { -- rtx tmp1; -+ if (TYPE_MODE (type) == DFmode && align < 64) -+ return 64; -+ if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) -+ return 128; -+ } - -- /* Find better operand for SET instruction, depending -- on which definition is farther from the insn. */ -- if (find_nearest_reg_def (insn, regno1, regno2)) -- tmp = parts.index, tmp1 = parts.base; -- else -- tmp = parts.base, tmp1 = parts.index; -+ return align; -+} - -- emit_insn (gen_rtx_SET (target, tmp)); -+/* Compute the alignment for a local variable or a stack slot. EXP is -+ the data type or decl itself, MODE is the widest mode available and -+ ALIGN is the alignment that the object would ordinarily have. The -+ value of this macro is used instead of that alignment to align the -+ object. */ - -- if (parts.disp && parts.disp != const0_rtx) -- ix86_emit_binop (PLUS, mode, target, parts.disp); -+unsigned int -+ix86_local_alignment (tree exp, machine_mode mode, -+ unsigned int align) -+{ -+ tree type, decl; - -- ix86_emit_binop (PLUS, mode, target, tmp1); -- return; -- } -+ if (exp && DECL_P (exp)) -+ { -+ type = TREE_TYPE (exp); -+ decl = exp; -+ } -+ else -+ { -+ type = exp; -+ decl = NULL; -+ } - -- ix86_emit_binop (PLUS, mode, target, tmp); -- } -+ /* Don't do dynamic stack realignment for long long objects with -+ -mpreferred-stack-boundary=2. */ -+ if (!TARGET_64BIT -+ && align == 64 -+ && ix86_preferred_stack_boundary < 64 -+ && (mode == DImode || (type && TYPE_MODE (type) == DImode)) -+ && (!type || !TYPE_USER_ALIGN (type)) -+ && (!decl || !DECL_USER_ALIGN (decl))) -+ align = 32; - -- if (parts.disp && parts.disp != const0_rtx) -- ix86_emit_binop (PLUS, mode, target, parts.disp); -+ /* If TYPE is NULL, we are allocating a stack slot for caller-save -+ register in MODE. We will return the largest alignment of XF -+ and DF. */ -+ if (!type) -+ { -+ if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode)) -+ align = GET_MODE_ALIGNMENT (DFmode); -+ return align; - } --} - --/* Return true if it is ok to optimize an ADD operation to LEA -- operation to avoid flag register consumation. For most processors, -- ADD is faster than LEA. For the processors like BONNELL, if the -- destination register of LEA holds an actual address which will be -- used soon, LEA is better and otherwise ADD is better. */ -+ /* Don't increase alignment for Intel MCU psABI. */ -+ if (TARGET_IAMCU) -+ return align; - --bool --ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[]) --{ -- unsigned int regno0 = true_regnum (operands[0]); -- unsigned int regno1 = true_regnum (operands[1]); -- unsigned int regno2 = true_regnum (operands[2]); -+ /* x86-64 ABI requires arrays greater than 16 bytes to be aligned -+ to 16byte boundary. Exact wording is: - -- /* If a = b + c, (a!=b && a!=c), must use lea form. */ -- if (regno0 != regno1 && regno0 != regno2) -- return true; -+ An array uses the same alignment as its elements, except that a local or -+ global array variable of length at least 16 bytes or -+ a C99 variable-length array variable always has alignment of at least 16 bytes. - -- if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun)) -- return false; -+ This was added to allow use of aligned SSE instructions at arrays. This -+ rule is meant for static storage (where compiler cannot do the analysis -+ by itself). We follow it for automatic variables only when convenient. -+ We fully control everything in the function compiled and functions from -+ other unit cannot rely on the alignment. - -- return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false); -+ Exclude va_list type. It is the common case of local array where -+ we cannot benefit from the alignment. -+ -+ TODO: Probably one should optimize for size only when var is not escaping. */ -+ if (TARGET_64BIT && optimize_function_for_speed_p (cfun) -+ && TARGET_SSE) -+ { -+ if (AGGREGATE_TYPE_P (type) -+ && (va_list_type_node == NULL_TREE -+ || (TYPE_MAIN_VARIANT (type) -+ != TYPE_MAIN_VARIANT (va_list_type_node))) -+ && TYPE_SIZE (type) -+ && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST -+ && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128) -+ && align < 128) -+ return 128; -+ } -+ if (TREE_CODE (type) == ARRAY_TYPE) -+ { -+ if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) -+ return 64; -+ if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) -+ return 128; -+ } -+ else if (TREE_CODE (type) == COMPLEX_TYPE) -+ { -+ if (TYPE_MODE (type) == DCmode && align < 64) -+ return 64; -+ if ((TYPE_MODE (type) == XCmode -+ || TYPE_MODE (type) == TCmode) && align < 128) -+ return 128; -+ } -+ else if ((TREE_CODE (type) == RECORD_TYPE -+ || TREE_CODE (type) == UNION_TYPE -+ || TREE_CODE (type) == QUAL_UNION_TYPE) -+ && TYPE_FIELDS (type)) -+ { -+ if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) -+ return 64; -+ if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) -+ return 128; -+ } -+ else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE -+ || TREE_CODE (type) == INTEGER_TYPE) -+ { -+ -+ if (TYPE_MODE (type) == DFmode && align < 64) -+ return 64; -+ if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) -+ return 128; -+ } -+ return align; - } - --/* Return true if destination reg of SET_BODY is shift count of -- USE_BODY. */ -+/* Compute the minimum required alignment for dynamic stack realignment -+ purposes for a local variable, parameter or a stack slot. EXP is -+ the data type or decl itself, MODE is its mode and ALIGN is the -+ alignment that the object would ordinarily have. */ - --static bool --ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body) -+unsigned int -+ix86_minimum_alignment (tree exp, machine_mode mode, -+ unsigned int align) - { -- rtx set_dest; -- rtx shift_rtx; -- int i; -+ tree type, decl; - -- /* Retrieve destination of SET_BODY. */ -- switch (GET_CODE (set_body)) -+ if (exp && DECL_P (exp)) - { -- case SET: -- set_dest = SET_DEST (set_body); -- if (!set_dest || !REG_P (set_dest)) -- return false; -- break; -- case PARALLEL: -- for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--) -- if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i), -- use_body)) -- return true; -- /* FALLTHROUGH */ -- default: -- return false; -+ type = TREE_TYPE (exp); -+ decl = exp; - } -- -- /* Retrieve shift count of USE_BODY. */ -- switch (GET_CODE (use_body)) -+ else - { -- case SET: -- shift_rtx = XEXP (use_body, 1); -- break; -- case PARALLEL: -- for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--) -- if (ix86_dep_by_shift_count_body (set_body, -- XVECEXP (use_body, 0, i))) -- return true; -- /* FALLTHROUGH */ -- default: -- return false; -+ type = exp; -+ decl = NULL; - } - -- if (shift_rtx -- && (GET_CODE (shift_rtx) == ASHIFT -- || GET_CODE (shift_rtx) == LSHIFTRT -- || GET_CODE (shift_rtx) == ASHIFTRT -- || GET_CODE (shift_rtx) == ROTATE -- || GET_CODE (shift_rtx) == ROTATERT)) -- { -- rtx shift_count = XEXP (shift_rtx, 1); -+ if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64) -+ return align; - -- /* Return true if shift count is dest of SET_BODY. */ -- if (REG_P (shift_count)) -- { -- /* Add check since it can be invoked before register -- allocation in pre-reload schedule. */ -- if (reload_completed -- && true_regnum (set_dest) == true_regnum (shift_count)) -- return true; -- else if (REGNO(set_dest) == REGNO(shift_count)) -- return true; -- } -+ /* Don't do dynamic stack realignment for long long objects with -+ -mpreferred-stack-boundary=2. */ -+ if ((mode == DImode || (type && TYPE_MODE (type) == DImode)) -+ && (!type || !TYPE_USER_ALIGN (type)) -+ && (!decl || !DECL_USER_ALIGN (decl))) -+ { -+ gcc_checking_assert (!TARGET_STV); -+ return 32; - } - -- return false; -+ return align; - } -+ -+/* Find a location for the static chain incoming to a nested function. -+ This is a register, unless all free registers are used by arguments. */ - --/* Return true if destination reg of SET_INSN is shift count of -- USE_INSN. */ -- --bool --ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn) -+static rtx -+ix86_static_chain (const_tree fndecl_or_type, bool incoming_p) - { -- return ix86_dep_by_shift_count_body (PATTERN (set_insn), -- PATTERN (use_insn)); --} -+ unsigned regno; - --/* Return TRUE or FALSE depending on whether the unary operator meets the -- appropriate constraints. */ -+ if (TARGET_64BIT) -+ { -+ /* We always use R10 in 64-bit mode. */ -+ regno = R10_REG; -+ } -+ else -+ { -+ const_tree fntype, fndecl; -+ unsigned int ccvt; - --bool --ix86_unary_operator_ok (enum rtx_code, -- machine_mode, -- rtx operands[2]) --{ -- /* If one of operands is memory, source and destination must match. */ -- if ((MEM_P (operands[0]) -- || MEM_P (operands[1])) -- && ! rtx_equal_p (operands[0], operands[1])) -- return false; -- return true; --} -+ /* By default in 32-bit mode we use ECX to pass the static chain. */ -+ regno = CX_REG; - --/* Return TRUE if the operands to a vec_interleave_{high,low}v2df -- are ok, keeping in mind the possible movddup alternative. */ -+ if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL) -+ { -+ fntype = TREE_TYPE (fndecl_or_type); -+ fndecl = fndecl_or_type; -+ } -+ else -+ { -+ fntype = fndecl_or_type; -+ fndecl = NULL; -+ } - --bool --ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high) --{ -- if (MEM_P (operands[0])) -- return rtx_equal_p (operands[0], operands[1 + high]); -- if (MEM_P (operands[1]) && MEM_P (operands[2])) -- return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]); -- return true; -+ ccvt = ix86_get_callcvt (fntype); -+ if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) -+ { -+ /* Fastcall functions use ecx/edx for arguments, which leaves -+ us with EAX for the static chain. -+ Thiscall functions use ecx for arguments, which also -+ leaves us with EAX for the static chain. */ -+ regno = AX_REG; -+ } -+ else if ((ccvt & IX86_CALLCVT_THISCALL) != 0) -+ { -+ /* Thiscall functions use ecx for arguments, which leaves -+ us with EAX and EDX for the static chain. -+ We are using for abi-compatibility EAX. */ -+ regno = AX_REG; -+ } -+ else if (ix86_function_regparm (fntype, fndecl) == 3) -+ { -+ /* For regparm 3, we have no free call-clobbered registers in -+ which to store the static chain. In order to implement this, -+ we have the trampoline push the static chain to the stack. -+ However, we can't push a value below the return address when -+ we call the nested function directly, so we have to use an -+ alternate entry point. For this we use ESI, and have the -+ alternate entry point push ESI, so that things appear the -+ same once we're executing the nested function. */ -+ if (incoming_p) -+ { -+ if (fndecl == current_function_decl -+ && !ix86_static_chain_on_stack) -+ { -+ gcc_assert (!reload_completed); -+ ix86_static_chain_on_stack = true; -+ } -+ return gen_frame_mem (SImode, -+ plus_constant (Pmode, -+ arg_pointer_rtx, -8)); -+ } -+ regno = SI_REG; -+ } -+ } -+ -+ return gen_rtx_REG (Pmode, regno); - } - --/* Post-reload splitter for converting an SF or DFmode value in an -- SSE register into an unsigned SImode. */ -+/* Emit RTL insns to initialize the variable parts of a trampoline. -+ FNDECL is the decl of the target address; M_TRAMP is a MEM for -+ the trampoline, and CHAIN_VALUE is an RTX for the static chain -+ to be passed to the target function. */ - --void --ix86_split_convert_uns_si_sse (rtx operands[]) -+static void -+ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) - { -- machine_mode vecmode; -- rtx value, large, zero_or_two31, input, two31, x; -+ rtx mem, fnaddr; -+ int opcode; -+ int offset = 0; -+ bool need_endbr = (flag_cf_protection & CF_BRANCH); - -- large = operands[1]; -- zero_or_two31 = operands[2]; -- input = operands[3]; -- two31 = operands[4]; -- vecmode = GET_MODE (large); -- value = gen_rtx_REG (vecmode, REGNO (operands[0])); -+ fnaddr = XEXP (DECL_RTL (fndecl), 0); - -- /* Load up the value into the low element. We must ensure that the other -- elements are valid floats -- zero is the easiest such value. */ -- if (MEM_P (input)) -- { -- if (vecmode == V4SFmode) -- emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input)); -- else -- emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input)); -- } -- else -+ if (TARGET_64BIT) - { -- input = gen_rtx_REG (vecmode, REGNO (input)); -- emit_move_insn (value, CONST0_RTX (vecmode)); -- if (vecmode == V4SFmode) -- emit_insn (gen_sse_movss (value, value, input)); -- else -- emit_insn (gen_sse2_movsd (value, value, input)); -- } -- -- emit_move_insn (large, two31); -- emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31); -+ int size; - -- x = gen_rtx_fmt_ee (LE, vecmode, large, value); -- emit_insn (gen_rtx_SET (large, x)); -+ if (need_endbr) -+ { -+ /* Insert ENDBR64. */ -+ mem = adjust_address (m_tramp, SImode, offset); -+ emit_move_insn (mem, gen_int_mode (0xfa1e0ff3, SImode)); -+ offset += 4; -+ } - -- x = gen_rtx_AND (vecmode, zero_or_two31, large); -- emit_insn (gen_rtx_SET (zero_or_two31, x)); -+ /* Load the function address to r11. Try to load address using -+ the shorter movl instead of movabs. We may want to support -+ movq for kernel mode, but kernel does not use trampolines at -+ the moment. FNADDR is a 32bit address and may not be in -+ DImode when ptr_mode == SImode. Always use movl in this -+ case. */ -+ if (ptr_mode == SImode -+ || x86_64_zext_immediate_operand (fnaddr, VOIDmode)) -+ { -+ fnaddr = copy_addr_to_reg (fnaddr); - -- x = gen_rtx_MINUS (vecmode, value, zero_or_two31); -- emit_insn (gen_rtx_SET (value, x)); -+ mem = adjust_address (m_tramp, HImode, offset); -+ emit_move_insn (mem, gen_int_mode (0xbb41, HImode)); - -- large = gen_rtx_REG (V4SImode, REGNO (large)); -- emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31))); -+ mem = adjust_address (m_tramp, SImode, offset + 2); -+ emit_move_insn (mem, gen_lowpart (SImode, fnaddr)); -+ offset += 6; -+ } -+ else -+ { -+ mem = adjust_address (m_tramp, HImode, offset); -+ emit_move_insn (mem, gen_int_mode (0xbb49, HImode)); - -- x = gen_rtx_REG (V4SImode, REGNO (value)); -- if (vecmode == V4SFmode) -- emit_insn (gen_fix_truncv4sfv4si2 (x, value)); -- else -- emit_insn (gen_sse2_cvttpd2dq (x, value)); -- value = x; -+ mem = adjust_address (m_tramp, DImode, offset + 2); -+ emit_move_insn (mem, fnaddr); -+ offset += 10; -+ } - -- emit_insn (gen_xorv4si3 (value, value, large)); --} -+ /* Load static chain using movabs to r10. Use the shorter movl -+ instead of movabs when ptr_mode == SImode. */ -+ if (ptr_mode == SImode) -+ { -+ opcode = 0xba41; -+ size = 6; -+ } -+ else -+ { -+ opcode = 0xba49; -+ size = 10; -+ } - --/* Convert an unsigned DImode value into a DFmode, using only SSE. -- Expects the 64-bit DImode to be supplied in a pair of integral -- registers. Requires SSE2; will use SSE3 if available. For x86_32, -- -mfpmath=sse, !optimize_size only. */ -+ mem = adjust_address (m_tramp, HImode, offset); -+ emit_move_insn (mem, gen_int_mode (opcode, HImode)); - --void --ix86_expand_convert_uns_didf_sse (rtx target, rtx input) --{ -- REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt; -- rtx int_xmm, fp_xmm; -- rtx biases, exponents; -- rtx x; -+ mem = adjust_address (m_tramp, ptr_mode, offset + 2); -+ emit_move_insn (mem, chain_value); -+ offset += size; - -- int_xmm = gen_reg_rtx (V4SImode); -- if (TARGET_INTER_UNIT_MOVES_TO_VEC) -- emit_insn (gen_movdi_to_sse (int_xmm, input)); -- else if (TARGET_SSE_SPLIT_REGS) -- { -- emit_clobber (int_xmm); -- emit_move_insn (gen_lowpart (DImode, int_xmm), input); -+ /* Jump to r11; the last (unused) byte is a nop, only there to -+ pad the write out to a single 32-bit store. */ -+ mem = adjust_address (m_tramp, SImode, offset); -+ emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode)); -+ offset += 4; - } - else - { -- x = gen_reg_rtx (V2DImode); -- ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0); -- emit_move_insn (int_xmm, gen_lowpart (V4SImode, x)); -- } -- -- x = gen_rtx_CONST_VECTOR (V4SImode, -- gen_rtvec (4, GEN_INT (0x43300000UL), -- GEN_INT (0x45300000UL), -- const0_rtx, const0_rtx)); -- exponents = validize_mem (force_const_mem (V4SImode, x)); -- -- /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */ -- emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents)); -- -- /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm) -- yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)). -- Similarly (0x45300000UL ## fp_value_hi_xmm) yields -- (0x1.0p84 + double(fp_value_hi_xmm)). -- Note these exponents differ by 32. */ -- -- fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm)); -- -- /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values -- in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */ -- real_ldexp (&bias_lo_rvt, &dconst1, 52); -- real_ldexp (&bias_hi_rvt, &dconst1, 84); -- biases = const_double_from_real_value (bias_lo_rvt, DFmode); -- x = const_double_from_real_value (bias_hi_rvt, DFmode); -- biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x)); -- biases = validize_mem (force_const_mem (V2DFmode, biases)); -- emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases)); -- -- /* Add the upper and lower DFmode values together. */ -- if (TARGET_SSE3) -- emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm)); -- else -- { -- x = copy_to_mode_reg (V2DFmode, fp_xmm); -- emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm)); -- emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x)); -- } -+ rtx disp, chain; - -- ix86_expand_vector_extract (false, target, fp_xmm, 0); --} -+ /* Depending on the static chain location, either load a register -+ with a constant, or push the constant to the stack. All of the -+ instructions are the same size. */ -+ chain = ix86_static_chain (fndecl, true); -+ if (REG_P (chain)) -+ { -+ switch (REGNO (chain)) -+ { -+ case AX_REG: -+ opcode = 0xb8; break; -+ case CX_REG: -+ opcode = 0xb9; break; -+ default: -+ gcc_unreachable (); -+ } -+ } -+ else -+ opcode = 0x68; - --/* Not used, but eases macroization of patterns. */ --void --ix86_expand_convert_uns_sixf_sse (rtx, rtx) --{ -- gcc_unreachable (); --} -+ if (need_endbr) -+ { -+ /* Insert ENDBR32. */ -+ mem = adjust_address (m_tramp, SImode, offset); -+ emit_move_insn (mem, gen_int_mode (0xfb1e0ff3, SImode)); -+ offset += 4; -+ } - --/* Convert an unsigned SImode value into a DFmode. Only currently used -- for SSE, but applicable anywhere. */ -+ mem = adjust_address (m_tramp, QImode, offset); -+ emit_move_insn (mem, gen_int_mode (opcode, QImode)); - --void --ix86_expand_convert_uns_sidf_sse (rtx target, rtx input) --{ -- REAL_VALUE_TYPE TWO31r; -- rtx x, fp; -+ mem = adjust_address (m_tramp, SImode, offset + 1); -+ emit_move_insn (mem, chain_value); -+ offset += 5; - -- x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1), -- NULL, 1, OPTAB_DIRECT); -+ mem = adjust_address (m_tramp, QImode, offset); -+ emit_move_insn (mem, gen_int_mode (0xe9, QImode)); -+ -+ mem = adjust_address (m_tramp, SImode, offset + 1); - -- fp = gen_reg_rtx (DFmode); -- emit_insn (gen_floatsidf2 (fp, x)); -+ /* Compute offset from the end of the jmp to the target function. -+ In the case in which the trampoline stores the static chain on -+ the stack, we need to skip the first insn which pushes the -+ (call-saved) register static chain; this push is 1 byte. */ -+ offset += 5; -+ int skip = MEM_P (chain) ? 1 : 0; -+ /* Skip ENDBR32 at the entry of the target function. */ -+ if (need_endbr -+ && !cgraph_node::get (fndecl)->only_called_directly_p ()) -+ skip += 4; -+ disp = expand_binop (SImode, sub_optab, fnaddr, -+ plus_constant (Pmode, XEXP (m_tramp, 0), -+ offset - skip), -+ NULL_RTX, 1, OPTAB_DIRECT); -+ emit_move_insn (mem, disp); -+ } - -- real_ldexp (&TWO31r, &dconst1, 31); -- x = const_double_from_real_value (TWO31r, DFmode); -+ gcc_assert (offset <= TRAMPOLINE_SIZE); - -- x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT); -- if (x != target) -- emit_move_insn (target, x); -+#ifdef HAVE_ENABLE_EXECUTE_STACK -+#ifdef CHECK_EXECUTE_STACK_ENABLED -+ if (CHECK_EXECUTE_STACK_ENABLED) -+#endif -+ emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"), -+ LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode); -+#endif - } - --/* Convert a signed DImode value into a DFmode. Only used for SSE in -- 32-bit mode; otherwise we have a direct convert instruction. */ -- --void --ix86_expand_convert_sign_didf_sse (rtx target, rtx input) -+static bool -+ix86_allocate_stack_slots_for_args (void) - { -- REAL_VALUE_TYPE TWO32r; -- rtx fp_lo, fp_hi, x; -- -- fp_lo = gen_reg_rtx (DFmode); -- fp_hi = gen_reg_rtx (DFmode); -+ /* Naked functions should not allocate stack slots for arguments. */ -+ return !ix86_function_naked (current_function_decl); -+} - -- emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input))); -- -- real_ldexp (&TWO32r, &dconst1, 32); -- x = const_double_from_real_value (TWO32r, DFmode); -- fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT); -- -- ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input)); -- -- x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target, -- 0, OPTAB_DIRECT); -- if (x != target) -- emit_move_insn (target, x); --} -- --/* Convert an unsigned SImode value into a SFmode, using only SSE. -- For x86_32, -mfpmath=sse, !optimize_size only. */ --void --ix86_expand_convert_uns_sisf_sse (rtx target, rtx input) --{ -- REAL_VALUE_TYPE ONE16r; -- rtx fp_hi, fp_lo, int_hi, int_lo, x; -- -- real_ldexp (&ONE16r, &dconst1, 16); -- x = const_double_from_real_value (ONE16r, SFmode); -- int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff), -- NULL, 0, OPTAB_DIRECT); -- int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16), -- NULL, 0, OPTAB_DIRECT); -- fp_hi = gen_reg_rtx (SFmode); -- fp_lo = gen_reg_rtx (SFmode); -- emit_insn (gen_floatsisf2 (fp_hi, int_hi)); -- emit_insn (gen_floatsisf2 (fp_lo, int_lo)); -- fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi, -- 0, OPTAB_DIRECT); -- fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target, -- 0, OPTAB_DIRECT); -- if (!rtx_equal_p (target, fp_hi)) -- emit_move_insn (target, fp_hi); --} -- --/* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert -- a vector of unsigned ints VAL to vector of floats TARGET. */ -- --void --ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val) --{ -- rtx tmp[8]; -- REAL_VALUE_TYPE TWO16r; -- machine_mode intmode = GET_MODE (val); -- machine_mode fltmode = GET_MODE (target); -- rtx (*cvt) (rtx, rtx); -- -- if (intmode == V4SImode) -- cvt = gen_floatv4siv4sf2; -- else -- cvt = gen_floatv8siv8sf2; -- tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff)); -- tmp[0] = force_reg (intmode, tmp[0]); -- tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1, -- OPTAB_DIRECT); -- tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16), -- NULL_RTX, 1, OPTAB_DIRECT); -- tmp[3] = gen_reg_rtx (fltmode); -- emit_insn (cvt (tmp[3], tmp[1])); -- tmp[4] = gen_reg_rtx (fltmode); -- emit_insn (cvt (tmp[4], tmp[2])); -- real_ldexp (&TWO16r, &dconst1, 16); -- tmp[5] = const_double_from_real_value (TWO16r, SFmode); -- tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5])); -- tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1, -- OPTAB_DIRECT); -- tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1, -- OPTAB_DIRECT); -- if (tmp[7] != target) -- emit_move_insn (target, tmp[7]); --} -- --/* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc* -- pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*. -- This is done by doing just signed conversion if < 0x1p31, and otherwise by -- subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */ -- --rtx --ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp) --{ -- REAL_VALUE_TYPE TWO31r; -- rtx two31r, tmp[4]; -- machine_mode mode = GET_MODE (val); -- machine_mode scalarmode = GET_MODE_INNER (mode); -- machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode; -- rtx (*cmp) (rtx, rtx, rtx, rtx); -- int i; -- -- for (i = 0; i < 3; i++) -- tmp[i] = gen_reg_rtx (mode); -- real_ldexp (&TWO31r, &dconst1, 31); -- two31r = const_double_from_real_value (TWO31r, scalarmode); -- two31r = ix86_build_const_vector (mode, 1, two31r); -- two31r = force_reg (mode, two31r); -- switch (mode) -- { -- case E_V8SFmode: cmp = gen_avx_maskcmpv8sf3; break; -- case E_V4SFmode: cmp = gen_sse_maskcmpv4sf3; break; -- case E_V4DFmode: cmp = gen_avx_maskcmpv4df3; break; -- case E_V2DFmode: cmp = gen_sse2_maskcmpv2df3; break; -- default: gcc_unreachable (); -- } -- tmp[3] = gen_rtx_LE (mode, two31r, val); -- emit_insn (cmp (tmp[0], two31r, val, tmp[3])); -- tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1], -- 0, OPTAB_DIRECT); -- if (intmode == V4SImode || TARGET_AVX2) -- *xorp = expand_simple_binop (intmode, ASHIFT, -- gen_lowpart (intmode, tmp[0]), -- GEN_INT (31), NULL_RTX, 0, -- OPTAB_DIRECT); -- else -- { -- rtx two31 = GEN_INT (HOST_WIDE_INT_1U << 31); -- two31 = ix86_build_const_vector (intmode, 1, two31); -- *xorp = expand_simple_binop (intmode, AND, -- gen_lowpart (intmode, tmp[0]), -- two31, NULL_RTX, 0, -- OPTAB_DIRECT); -- } -- return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2], -- 0, OPTAB_DIRECT); --} -- --/* A subroutine of ix86_build_signbit_mask. If VECT is true, -- then replicate the value for all elements of the vector -- register. */ -- --rtx --ix86_build_const_vector (machine_mode mode, bool vect, rtx value) -+static bool -+ix86_warn_func_return (tree decl) - { -- int i, n_elt; -- rtvec v; -- machine_mode scalar_mode; -- -- switch (mode) -- { -- case E_V64QImode: -- case E_V32QImode: -- case E_V16QImode: -- case E_V32HImode: -- case E_V16HImode: -- case E_V8HImode: -- case E_V16SImode: -- case E_V8SImode: -- case E_V4SImode: -- case E_V8DImode: -- case E_V4DImode: -- case E_V2DImode: -- gcc_assert (vect); -- /* FALLTHRU */ -- case E_V16SFmode: -- case E_V8SFmode: -- case E_V4SFmode: -- case E_V8DFmode: -- case E_V4DFmode: -- case E_V2DFmode: -- n_elt = GET_MODE_NUNITS (mode); -- v = rtvec_alloc (n_elt); -- scalar_mode = GET_MODE_INNER (mode); -- -- RTVEC_ELT (v, 0) = value; -- -- for (i = 1; i < n_elt; ++i) -- RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode); -- -- return gen_rtx_CONST_VECTOR (mode, v); -- -- default: -- gcc_unreachable (); -- } -+ /* Naked functions are implemented entirely in assembly, including the -+ return sequence, so suppress warnings about this. */ -+ return !ix86_function_naked (decl); - } -- --/* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders -- and ix86_expand_int_vcond. Create a mask for the sign bit in MODE -- for an SSE register. If VECT is true, then replicate the mask for -- all elements of the vector register. If INVERT is true, then create -- a mask excluding the sign bit. */ -- --rtx --ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert) -+ -+/* Return the shift count of a vector by scalar shift builtin second argument -+ ARG1. */ -+static tree -+ix86_vector_shift_count (tree arg1) - { -- machine_mode vec_mode, imode; -- wide_int w; -- rtx mask, v; -- -- switch (mode) -+ if (tree_fits_uhwi_p (arg1)) -+ return arg1; -+ else if (TREE_CODE (arg1) == VECTOR_CST && CHAR_BIT == 8) - { -- case E_V16SImode: -- case E_V16SFmode: -- case E_V8SImode: -- case E_V4SImode: -- case E_V8SFmode: -- case E_V4SFmode: -- vec_mode = mode; -- imode = SImode; -- break; -- -- case E_V8DImode: -- case E_V4DImode: -- case E_V2DImode: -- case E_V8DFmode: -- case E_V4DFmode: -- case E_V2DFmode: -- vec_mode = mode; -- imode = DImode; -- break; -- -- case E_TImode: -- case E_TFmode: -- vec_mode = VOIDmode; -- imode = TImode; -- break; -- -- default: -- gcc_unreachable (); -+ /* The count argument is weird, passed in as various 128-bit -+ (or 64-bit) vectors, the low 64 bits from it are the count. */ -+ unsigned char buf[16]; -+ int len = native_encode_expr (arg1, buf, 16); -+ if (len == 0) -+ return NULL_TREE; -+ tree t = native_interpret_expr (uint64_type_node, buf, len); -+ if (t && tree_fits_uhwi_p (t)) -+ return t; - } -- -- machine_mode inner_mode = GET_MODE_INNER (mode); -- w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1, -- GET_MODE_BITSIZE (inner_mode)); -- if (invert) -- w = wi::bit_not (w); -- -- /* Force this value into the low part of a fp vector constant. */ -- mask = immed_wide_int_const (w, imode); -- mask = gen_lowpart (inner_mode, mask); -- -- if (vec_mode == VOIDmode) -- return force_reg (inner_mode, mask); -- -- v = ix86_build_const_vector (vec_mode, vect, mask); -- return force_reg (vec_mode, v); -+ return NULL_TREE; - } - --/* Generate code for floating point ABS or NEG. */ -- --void --ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode, -- rtx operands[]) -+static tree -+ix86_fold_builtin (tree fndecl, int n_args, -+ tree *args, bool ignore ATTRIBUTE_UNUSED) - { -- rtx mask, set, dst, src; -- bool use_sse = false; -- bool vector_mode = VECTOR_MODE_P (mode); -- machine_mode vmode = mode; -- -- if (vector_mode) -- use_sse = true; -- else if (mode == TFmode) -- use_sse = true; -- else if (TARGET_SSE_MATH) -- { -- use_sse = SSE_FLOAT_MODE_P (mode); -- if (mode == SFmode) -- vmode = V4SFmode; -- else if (mode == DFmode) -- vmode = V2DFmode; -- } -- -- /* NEG and ABS performed with SSE use bitwise mask operations. -- Create the appropriate mask now. */ -- if (use_sse) -- mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS); -- else -- mask = NULL_RTX; -- -- dst = operands[0]; -- src = operands[1]; -- -- set = gen_rtx_fmt_e (code, mode, src); -- set = gen_rtx_SET (dst, set); -- -- if (mask) -+ if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD) - { -- rtx use, clob; -- rtvec par; -+ enum ix86_builtins fn_code -+ = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl); -+ enum rtx_code rcode; -+ bool is_vshift; -+ unsigned HOST_WIDE_INT mask; - -- use = gen_rtx_USE (VOIDmode, mask); -- if (vector_mode) -- par = gen_rtvec (2, set, use); -- else -+ switch (fn_code) - { -- clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); -- par = gen_rtvec (3, set, use, clob); -- } -- emit_insn (gen_rtx_PARALLEL (VOIDmode, par)); -- } -- else -- emit_insn (set); --} -- --/* Expand a copysign operation. Special case operand 0 being a constant. */ -- --void --ix86_expand_copysign (rtx operands[]) --{ -- machine_mode mode, vmode; -- rtx dest, op0, op1, mask, nmask; -- -- dest = operands[0]; -- op0 = operands[1]; -- op1 = operands[2]; -- -- mode = GET_MODE (dest); -+ case IX86_BUILTIN_CPU_IS: -+ case IX86_BUILTIN_CPU_SUPPORTS: -+ gcc_assert (n_args == 1); -+ return fold_builtin_cpu (fndecl, args); - -- if (mode == SFmode) -- vmode = V4SFmode; -- else if (mode == DFmode) -- vmode = V2DFmode; -- else -- vmode = mode; -+ case IX86_BUILTIN_NANQ: -+ case IX86_BUILTIN_NANSQ: -+ { -+ tree type = TREE_TYPE (TREE_TYPE (fndecl)); -+ const char *str = c_getstr (*args); -+ int quiet = fn_code == IX86_BUILTIN_NANQ; -+ REAL_VALUE_TYPE real; - -- if (CONST_DOUBLE_P (op0)) -- { -- rtx (*copysign_insn)(rtx, rtx, rtx, rtx); -+ if (str && real_nan (&real, str, quiet, TYPE_MODE (type))) -+ return build_real (type, real); -+ return NULL_TREE; -+ } - -- if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0))) -- op0 = simplify_unary_operation (ABS, mode, op0, mode); -+ case IX86_BUILTIN_INFQ: -+ case IX86_BUILTIN_HUGE_VALQ: -+ { -+ tree type = TREE_TYPE (TREE_TYPE (fndecl)); -+ REAL_VALUE_TYPE inf; -+ real_inf (&inf); -+ return build_real (type, inf); -+ } - -- if (mode == SFmode || mode == DFmode) -- { -- if (op0 == CONST0_RTX (mode)) -- op0 = CONST0_RTX (vmode); -- else -+ case IX86_BUILTIN_TZCNT16: -+ case IX86_BUILTIN_CTZS: -+ case IX86_BUILTIN_TZCNT32: -+ case IX86_BUILTIN_TZCNT64: -+ gcc_assert (n_args == 1); -+ if (TREE_CODE (args[0]) == INTEGER_CST) - { -- rtx v = ix86_build_const_vector (vmode, false, op0); -- -- op0 = force_reg (vmode, v); -+ tree type = TREE_TYPE (TREE_TYPE (fndecl)); -+ tree arg = args[0]; -+ if (fn_code == IX86_BUILTIN_TZCNT16 -+ || fn_code == IX86_BUILTIN_CTZS) -+ arg = fold_convert (short_unsigned_type_node, arg); -+ if (integer_zerop (arg)) -+ return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg))); -+ else -+ return fold_const_call (CFN_CTZ, type, arg); - } -- } -- else if (op0 != CONST0_RTX (mode)) -- op0 = force_reg (mode, op0); -- -- mask = ix86_build_signbit_mask (vmode, 0, 0); -- -- if (mode == SFmode) -- copysign_insn = gen_copysignsf3_const; -- else if (mode == DFmode) -- copysign_insn = gen_copysigndf3_const; -- else -- copysign_insn = gen_copysigntf3_const; -- -- emit_insn (copysign_insn (dest, op0, op1, mask)); -- } -- else -- { -- rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx); -- -- nmask = ix86_build_signbit_mask (vmode, 0, 1); -- mask = ix86_build_signbit_mask (vmode, 0, 0); -- -- if (mode == SFmode) -- copysign_insn = gen_copysignsf3_var; -- else if (mode == DFmode) -- copysign_insn = gen_copysigndf3_var; -- else -- copysign_insn = gen_copysigntf3_var; -- -- emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask)); -- } --} -+ break; - --/* Deconstruct a copysign operation into bit masks. Operand 0 is known to -- be a constant, and so has already been expanded into a vector constant. */ -+ case IX86_BUILTIN_LZCNT16: -+ case IX86_BUILTIN_CLZS: -+ case IX86_BUILTIN_LZCNT32: -+ case IX86_BUILTIN_LZCNT64: -+ gcc_assert (n_args == 1); -+ if (TREE_CODE (args[0]) == INTEGER_CST) -+ { -+ tree type = TREE_TYPE (TREE_TYPE (fndecl)); -+ tree arg = args[0]; -+ if (fn_code == IX86_BUILTIN_LZCNT16 -+ || fn_code == IX86_BUILTIN_CLZS) -+ arg = fold_convert (short_unsigned_type_node, arg); -+ if (integer_zerop (arg)) -+ return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg))); -+ else -+ return fold_const_call (CFN_CLZ, type, arg); -+ } -+ break; - --void --ix86_split_copysign_const (rtx operands[]) --{ -- machine_mode mode, vmode; -- rtx dest, op0, mask, x; -- -- dest = operands[0]; -- op0 = operands[1]; -- mask = operands[3]; -- -- mode = GET_MODE (dest); -- vmode = GET_MODE (mask); -- -- dest = lowpart_subreg (vmode, dest, mode); -- x = gen_rtx_AND (vmode, dest, mask); -- emit_insn (gen_rtx_SET (dest, x)); -- -- if (op0 != CONST0_RTX (vmode)) -- { -- x = gen_rtx_IOR (vmode, dest, op0); -- emit_insn (gen_rtx_SET (dest, x)); -- } --} -- --/* Deconstruct a copysign operation into bit masks. Operand 0 is variable, -- so we have to do two masks. */ -- --void --ix86_split_copysign_var (rtx operands[]) --{ -- machine_mode mode, vmode; -- rtx dest, scratch, op0, op1, mask, nmask, x; -- -- dest = operands[0]; -- scratch = operands[1]; -- op0 = operands[2]; -- op1 = operands[3]; -- nmask = operands[4]; -- mask = operands[5]; -- -- mode = GET_MODE (dest); -- vmode = GET_MODE (mask); -- -- if (rtx_equal_p (op0, op1)) -- { -- /* Shouldn't happen often (it's useless, obviously), but when it does -- we'd generate incorrect code if we continue below. */ -- emit_move_insn (dest, op0); -- return; -- } -- -- if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */ -- { -- gcc_assert (REGNO (op1) == REGNO (scratch)); -- -- x = gen_rtx_AND (vmode, scratch, mask); -- emit_insn (gen_rtx_SET (scratch, x)); -- -- dest = mask; -- op0 = lowpart_subreg (vmode, op0, mode); -- x = gen_rtx_NOT (vmode, dest); -- x = gen_rtx_AND (vmode, x, op0); -- emit_insn (gen_rtx_SET (dest, x)); -- } -- else -- { -- if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */ -- { -- x = gen_rtx_AND (vmode, scratch, mask); -- } -- else /* alternative 2,4 */ -- { -- gcc_assert (REGNO (mask) == REGNO (scratch)); -- op1 = lowpart_subreg (vmode, op1, mode); -- x = gen_rtx_AND (vmode, scratch, op1); -- } -- emit_insn (gen_rtx_SET (scratch, x)); -- -- if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */ -- { -- dest = lowpart_subreg (vmode, op0, mode); -- x = gen_rtx_AND (vmode, dest, nmask); -- } -- else /* alternative 3,4 */ -- { -- gcc_assert (REGNO (nmask) == REGNO (dest)); -- dest = nmask; -- op0 = lowpart_subreg (vmode, op0, mode); -- x = gen_rtx_AND (vmode, dest, op0); -- } -- emit_insn (gen_rtx_SET (dest, x)); -- } -- -- x = gen_rtx_IOR (vmode, dest, scratch); -- emit_insn (gen_rtx_SET (dest, x)); --} -- --/* Expand an xorsign operation. */ -- --void --ix86_expand_xorsign (rtx operands[]) --{ -- rtx (*xorsign_insn)(rtx, rtx, rtx, rtx); -- machine_mode mode, vmode; -- rtx dest, op0, op1, mask; -- -- dest = operands[0]; -- op0 = operands[1]; -- op1 = operands[2]; -- -- mode = GET_MODE (dest); -- -- if (mode == SFmode) -- { -- xorsign_insn = gen_xorsignsf3_1; -- vmode = V4SFmode; -- } -- else if (mode == DFmode) -- { -- xorsign_insn = gen_xorsigndf3_1; -- vmode = V2DFmode; -- } -- else -- gcc_unreachable (); -- -- mask = ix86_build_signbit_mask (vmode, 0, 0); -- -- emit_insn (xorsign_insn (dest, op0, op1, mask)); --} -- --/* Deconstruct an xorsign operation into bit masks. */ -- --void --ix86_split_xorsign (rtx operands[]) --{ -- machine_mode mode, vmode; -- rtx dest, op0, mask, x; -- -- dest = operands[0]; -- op0 = operands[1]; -- mask = operands[3]; -- -- mode = GET_MODE (dest); -- vmode = GET_MODE (mask); -- -- dest = lowpart_subreg (vmode, dest, mode); -- x = gen_rtx_AND (vmode, dest, mask); -- emit_insn (gen_rtx_SET (dest, x)); -- -- op0 = lowpart_subreg (vmode, op0, mode); -- x = gen_rtx_XOR (vmode, dest, op0); -- emit_insn (gen_rtx_SET (dest, x)); --} -- --/* Return TRUE or FALSE depending on whether the first SET in INSN -- has source and destination with matching CC modes, and that the -- CC mode is at least as constrained as REQ_MODE. */ -- --bool --ix86_match_ccmode (rtx insn, machine_mode req_mode) --{ -- rtx set; -- machine_mode set_mode; -- -- set = PATTERN (insn); -- if (GET_CODE (set) == PARALLEL) -- set = XVECEXP (set, 0, 0); -- gcc_assert (GET_CODE (set) == SET); -- gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE); -- -- set_mode = GET_MODE (SET_DEST (set)); -- switch (set_mode) -- { -- case E_CCNOmode: -- if (req_mode != CCNOmode -- && (req_mode != CCmode -- || XEXP (SET_SRC (set), 1) != const0_rtx)) -- return false; -- break; -- case E_CCmode: -- if (req_mode == CCGCmode) -- return false; -- /* FALLTHRU */ -- case E_CCGCmode: -- if (req_mode == CCGOCmode || req_mode == CCNOmode) -- return false; -- /* FALLTHRU */ -- case E_CCGOCmode: -- if (req_mode == CCZmode) -- return false; -- /* FALLTHRU */ -- case E_CCZmode: -- break; -- -- case E_CCGZmode: -- -- case E_CCAmode: -- case E_CCCmode: -- case E_CCOmode: -- case E_CCPmode: -- case E_CCSmode: -- if (set_mode != req_mode) -- return false; -- break; -- -- default: -- gcc_unreachable (); -- } -- -- return GET_MODE (SET_SRC (set)) == set_mode; --} -- --/* Generate insn patterns to do an integer compare of OPERANDS. */ -- --static rtx --ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1) --{ -- machine_mode cmpmode; -- rtx tmp, flags; -- -- cmpmode = SELECT_CC_MODE (code, op0, op1); -- flags = gen_rtx_REG (cmpmode, FLAGS_REG); -- -- /* This is very simple, but making the interface the same as in the -- FP case makes the rest of the code easier. */ -- tmp = gen_rtx_COMPARE (cmpmode, op0, op1); -- emit_insn (gen_rtx_SET (flags, tmp)); -- -- /* Return the test that should be put into the flags user, i.e. -- the bcc, scc, or cmov instruction. */ -- return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx); --} -+ case IX86_BUILTIN_BEXTR32: -+ case IX86_BUILTIN_BEXTR64: -+ case IX86_BUILTIN_BEXTRI32: -+ case IX86_BUILTIN_BEXTRI64: -+ gcc_assert (n_args == 2); -+ if (tree_fits_uhwi_p (args[1])) -+ { -+ unsigned HOST_WIDE_INT res = 0; -+ unsigned int prec = TYPE_PRECISION (TREE_TYPE (args[0])); -+ unsigned int start = tree_to_uhwi (args[1]); -+ unsigned int len = (start & 0xff00) >> 8; -+ start &= 0xff; -+ if (start >= prec || len == 0) -+ res = 0; -+ else if (!tree_fits_uhwi_p (args[0])) -+ break; -+ else -+ res = tree_to_uhwi (args[0]) >> start; -+ if (len > prec) -+ len = prec; -+ if (len < HOST_BITS_PER_WIDE_INT) -+ res &= (HOST_WIDE_INT_1U << len) - 1; -+ return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res); -+ } -+ break; - --/* Figure out whether to use unordered fp comparisons. */ -+ case IX86_BUILTIN_BZHI32: -+ case IX86_BUILTIN_BZHI64: -+ gcc_assert (n_args == 2); -+ if (tree_fits_uhwi_p (args[1])) -+ { -+ unsigned int idx = tree_to_uhwi (args[1]) & 0xff; -+ if (idx >= TYPE_PRECISION (TREE_TYPE (args[0]))) -+ return args[0]; -+ if (idx == 0) -+ return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), 0); -+ if (!tree_fits_uhwi_p (args[0])) -+ break; -+ unsigned HOST_WIDE_INT res = tree_to_uhwi (args[0]); -+ res &= ~(HOST_WIDE_INT_M1U << idx); -+ return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res); -+ } -+ break; - --static bool --ix86_unordered_fp_compare (enum rtx_code code) --{ -- if (!TARGET_IEEE_FP) -- return false; -+ case IX86_BUILTIN_PDEP32: -+ case IX86_BUILTIN_PDEP64: -+ gcc_assert (n_args == 2); -+ if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1])) -+ { -+ unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]); -+ unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]); -+ unsigned HOST_WIDE_INT res = 0; -+ unsigned HOST_WIDE_INT m, k = 1; -+ for (m = 1; m; m <<= 1) -+ if ((mask & m) != 0) -+ { -+ if ((src & k) != 0) -+ res |= m; -+ k <<= 1; -+ } -+ return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res); -+ } -+ break; - -- switch (code) -- { -- case GT: -- case GE: -- case LT: -- case LE: -- return false; -+ case IX86_BUILTIN_PEXT32: -+ case IX86_BUILTIN_PEXT64: -+ gcc_assert (n_args == 2); -+ if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1])) -+ { -+ unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]); -+ unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]); -+ unsigned HOST_WIDE_INT res = 0; -+ unsigned HOST_WIDE_INT m, k = 1; -+ for (m = 1; m; m <<= 1) -+ if ((mask & m) != 0) -+ { -+ if ((src & m) != 0) -+ res |= k; -+ k <<= 1; -+ } -+ return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res); -+ } -+ break; - -- case EQ: -- case NE: -+ case IX86_BUILTIN_MOVMSKPS: -+ case IX86_BUILTIN_PMOVMSKB: -+ case IX86_BUILTIN_MOVMSKPD: -+ case IX86_BUILTIN_PMOVMSKB128: -+ case IX86_BUILTIN_MOVMSKPD256: -+ case IX86_BUILTIN_MOVMSKPS256: -+ case IX86_BUILTIN_PMOVMSKB256: -+ gcc_assert (n_args == 1); -+ if (TREE_CODE (args[0]) == VECTOR_CST) -+ { -+ HOST_WIDE_INT res = 0; -+ for (unsigned i = 0; i < VECTOR_CST_NELTS (args[0]); ++i) -+ { -+ tree e = VECTOR_CST_ELT (args[0], i); -+ if (TREE_CODE (e) == INTEGER_CST && !TREE_OVERFLOW (e)) -+ { -+ if (wi::neg_p (wi::to_wide (e))) -+ res |= HOST_WIDE_INT_1 << i; -+ } -+ else if (TREE_CODE (e) == REAL_CST && !TREE_OVERFLOW (e)) -+ { -+ if (TREE_REAL_CST (e).sign) -+ res |= HOST_WIDE_INT_1 << i; -+ } -+ else -+ return NULL_TREE; -+ } -+ return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), res); -+ } -+ break; - -- case LTGT: -- case UNORDERED: -- case ORDERED: -- case UNLT: -- case UNLE: -- case UNGT: -- case UNGE: -- case UNEQ: -- return true; -- -- default: -- gcc_unreachable (); -- } --} -- --machine_mode --ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1) --{ -- machine_mode mode = GET_MODE (op0); -- -- if (SCALAR_FLOAT_MODE_P (mode)) -- { -- gcc_assert (!DECIMAL_FLOAT_MODE_P (mode)); -- return CCFPmode; -- } -- -- switch (code) -- { -- /* Only zero flag is needed. */ -- case EQ: /* ZF=0 */ -- case NE: /* ZF!=0 */ -- return CCZmode; -- /* Codes needing carry flag. */ -- case GEU: /* CF=0 */ -- case LTU: /* CF=1 */ -- /* Detect overflow checks. They need just the carry flag. */ -- if (GET_CODE (op0) == PLUS -- && (rtx_equal_p (op1, XEXP (op0, 0)) -- || rtx_equal_p (op1, XEXP (op0, 1)))) -- return CCCmode; -- else -- return CCmode; -- case GTU: /* CF=0 & ZF=0 */ -- case LEU: /* CF=1 | ZF=1 */ -- return CCmode; -- /* Codes possibly doable only with sign flag when -- comparing against zero. */ -- case GE: /* SF=OF or SF=0 */ -- case LT: /* SF<>OF or SF=1 */ -- if (op1 == const0_rtx) -- return CCGOCmode; -- else -- /* For other cases Carry flag is not required. */ -- return CCGCmode; -- /* Codes doable only with sign flag when comparing -- against zero, but we miss jump instruction for it -- so we need to use relational tests against overflow -- that thus needs to be zero. */ -- case GT: /* ZF=0 & SF=OF */ -- case LE: /* ZF=1 | SF<>OF */ -- if (op1 == const0_rtx) -- return CCNOmode; -- else -- return CCGCmode; -- /* strcmp pattern do (use flags) and combine may ask us for proper -- mode. */ -- case USE: -- return CCmode; -- default: -- gcc_unreachable (); -- } --} -- --/* Return the fixed registers used for condition codes. */ -- --static bool --ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2) --{ -- *p1 = FLAGS_REG; -- *p2 = INVALID_REGNUM; -- return true; --} -- --/* If two condition code modes are compatible, return a condition code -- mode which is compatible with both. Otherwise, return -- VOIDmode. */ -- --static machine_mode --ix86_cc_modes_compatible (machine_mode m1, machine_mode m2) --{ -- if (m1 == m2) -- return m1; -- -- if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC) -- return VOIDmode; -- -- if ((m1 == CCGCmode && m2 == CCGOCmode) -- || (m1 == CCGOCmode && m2 == CCGCmode)) -- return CCGCmode; -- -- if ((m1 == CCNOmode && m2 == CCGOCmode) -- || (m1 == CCGOCmode && m2 == CCNOmode)) -- return CCNOmode; -- -- if (m1 == CCZmode -- && (m2 == CCGCmode || m2 == CCGOCmode || m2 == CCNOmode)) -- return m2; -- else if (m2 == CCZmode -- && (m1 == CCGCmode || m1 == CCGOCmode || m1 == CCNOmode)) -- return m1; -- -- switch (m1) -- { -- default: -- gcc_unreachable (); -- -- case E_CCmode: -- case E_CCGCmode: -- case E_CCGOCmode: -- case E_CCNOmode: -- case E_CCAmode: -- case E_CCCmode: -- case E_CCOmode: -- case E_CCPmode: -- case E_CCSmode: -- case E_CCZmode: -- switch (m2) -- { -- default: -- return VOIDmode; -- -- case E_CCmode: -- case E_CCGCmode: -- case E_CCGOCmode: -- case E_CCNOmode: -- case E_CCAmode: -- case E_CCCmode: -- case E_CCOmode: -- case E_CCPmode: -- case E_CCSmode: -- case E_CCZmode: -- return CCmode; -- } -- -- case E_CCFPmode: -- /* These are only compatible with themselves, which we already -- checked above. */ -- return VOIDmode; -- } --} -- -- --/* Return a comparison we can do and that it is equivalent to -- swap_condition (code) apart possibly from orderedness. -- But, never change orderedness if TARGET_IEEE_FP, returning -- UNKNOWN in that case if necessary. */ -- --static enum rtx_code --ix86_fp_swap_condition (enum rtx_code code) --{ -- switch (code) -- { -- case GT: /* GTU - CF=0 & ZF=0 */ -- return TARGET_IEEE_FP ? UNKNOWN : UNLT; -- case GE: /* GEU - CF=0 */ -- return TARGET_IEEE_FP ? UNKNOWN : UNLE; -- case UNLT: /* LTU - CF=1 */ -- return TARGET_IEEE_FP ? UNKNOWN : GT; -- case UNLE: /* LEU - CF=1 | ZF=1 */ -- return TARGET_IEEE_FP ? UNKNOWN : GE; -- default: -- return swap_condition (code); -- } --} -- --/* Return cost of comparison CODE using the best strategy for performance. -- All following functions do use number of instructions as a cost metrics. -- In future this should be tweaked to compute bytes for optimize_size and -- take into account performance of various instructions on various CPUs. */ -- --static int --ix86_fp_comparison_cost (enum rtx_code code) --{ -- int arith_cost; -- -- /* The cost of code using bit-twiddling on %ah. */ -- switch (code) -- { -- case UNLE: -- case UNLT: -- case LTGT: -- case GT: -- case GE: -- case UNORDERED: -- case ORDERED: -- case UNEQ: -- arith_cost = 4; -- break; -- case LT: -- case NE: -- case EQ: -- case UNGE: -- arith_cost = TARGET_IEEE_FP ? 5 : 4; -- break; -- case LE: -- case UNGT: -- arith_cost = TARGET_IEEE_FP ? 6 : 4; -- break; -- default: -- gcc_unreachable (); -- } -- -- switch (ix86_fp_comparison_strategy (code)) -- { -- case IX86_FPCMP_COMI: -- return arith_cost > 4 ? 3 : 2; -- case IX86_FPCMP_SAHF: -- return arith_cost > 4 ? 4 : 3; -- default: -- return arith_cost; -- } --} -- --/* Return strategy to use for floating-point. We assume that fcomi is always -- preferrable where available, since that is also true when looking at size -- (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */ -- --enum ix86_fpcmp_strategy --ix86_fp_comparison_strategy (enum rtx_code) --{ -- /* Do fcomi/sahf based test when profitable. */ -- -- if (TARGET_CMOVE) -- return IX86_FPCMP_COMI; -- -- if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())) -- return IX86_FPCMP_SAHF; -- -- return IX86_FPCMP_ARITH; --} -- --/* Swap, force into registers, or otherwise massage the two operands -- to a fp comparison. The operands are updated in place; the new -- comparison code is returned. */ -- --static enum rtx_code --ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1) --{ -- bool unordered_compare = ix86_unordered_fp_compare (code); -- rtx op0 = *pop0, op1 = *pop1; -- machine_mode op_mode = GET_MODE (op0); -- bool is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode); -- -- /* All of the unordered compare instructions only work on registers. -- The same is true of the fcomi compare instructions. The XFmode -- compare instructions require registers except when comparing -- against zero or when converting operand 1 from fixed point to -- floating point. */ -- -- if (!is_sse -- && (unordered_compare -- || (op_mode == XFmode -- && ! (standard_80387_constant_p (op0) == 1 -- || standard_80387_constant_p (op1) == 1) -- && GET_CODE (op1) != FLOAT) -- || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI)) -- { -- op0 = force_reg (op_mode, op0); -- op1 = force_reg (op_mode, op1); -- } -- else -- { -- /* %%% We only allow op1 in memory; op0 must be st(0). So swap -- things around if they appear profitable, otherwise force op0 -- into a register. */ -- -- if (standard_80387_constant_p (op0) == 0 -- || (MEM_P (op0) -- && ! (standard_80387_constant_p (op1) == 0 -- || MEM_P (op1)))) -- { -- enum rtx_code new_code = ix86_fp_swap_condition (code); -- if (new_code != UNKNOWN) -- { -- std::swap (op0, op1); -- code = new_code; -- } -- } -- -- if (!REG_P (op0)) -- op0 = force_reg (op_mode, op0); -- -- if (CONSTANT_P (op1)) -- { -- int tmp = standard_80387_constant_p (op1); -- if (tmp == 0) -- op1 = validize_mem (force_const_mem (op_mode, op1)); -- else if (tmp == 1) -- { -- if (TARGET_CMOVE) -- op1 = force_reg (op_mode, op1); -- } -- else -- op1 = force_reg (op_mode, op1); -- } -- } -- -- /* Try to rearrange the comparison to make it cheaper. */ -- if (ix86_fp_comparison_cost (code) -- > ix86_fp_comparison_cost (swap_condition (code)) -- && (REG_P (op1) || can_create_pseudo_p ())) -- { -- std::swap (op0, op1); -- code = swap_condition (code); -- if (!REG_P (op0)) -- op0 = force_reg (op_mode, op0); -- } -- -- *pop0 = op0; -- *pop1 = op1; -- return code; --} -- --/* Convert comparison codes we use to represent FP comparison to integer -- code that will result in proper branch. Return UNKNOWN if no such code -- is available. */ -- --enum rtx_code --ix86_fp_compare_code_to_integer (enum rtx_code code) --{ -- switch (code) -- { -- case GT: -- return GTU; -- case GE: -- return GEU; -- case ORDERED: -- case UNORDERED: -- return code; -- case UNEQ: -- return EQ; -- case UNLT: -- return LTU; -- case UNLE: -- return LEU; -- case LTGT: -- return NE; -- default: -- return UNKNOWN; -- } --} -- --/* Generate insn patterns to do a floating point compare of OPERANDS. */ -- --static rtx --ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1) --{ -- bool unordered_compare = ix86_unordered_fp_compare (code); -- machine_mode cmp_mode; -- rtx tmp, scratch; -- -- code = ix86_prepare_fp_compare_args (code, &op0, &op1); -- -- tmp = gen_rtx_COMPARE (CCFPmode, op0, op1); -- if (unordered_compare) -- tmp = gen_rtx_UNSPEC (CCFPmode, gen_rtvec (1, tmp), UNSPEC_NOTRAP); -- -- /* Do fcomi/sahf based test when profitable. */ -- switch (ix86_fp_comparison_strategy (code)) -- { -- case IX86_FPCMP_COMI: -- cmp_mode = CCFPmode; -- emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode, FLAGS_REG), tmp)); -- break; -- -- case IX86_FPCMP_SAHF: -- cmp_mode = CCFPmode; -- tmp = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW); -- scratch = gen_reg_rtx (HImode); -- emit_insn (gen_rtx_SET (scratch, tmp)); -- emit_insn (gen_x86_sahf_1 (scratch)); -- break; -- -- case IX86_FPCMP_ARITH: -- cmp_mode = CCNOmode; -- tmp = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW); -- scratch = gen_reg_rtx (HImode); -- emit_insn (gen_rtx_SET (scratch, tmp)); -- -- /* In the unordered case, we have to check C2 for NaN's, which -- doesn't happen to work out to anything nice combination-wise. -- So do some bit twiddling on the value we've got in AH to come -- up with an appropriate set of condition codes. */ -- -- switch (code) -- { -- case GT: -- case UNGT: -- if (code == GT || !TARGET_IEEE_FP) -- { -- emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x45))); -- code = EQ; -- } -- else -- { -- emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45))); -- emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx)); -- emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44))); -- cmp_mode = CCmode; -- code = GEU; -- } -- break; -- case LT: -- case UNLT: -- if (code == LT && TARGET_IEEE_FP) -- { -- emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45))); -- emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx)); -- cmp_mode = CCmode; -- code = EQ; -- } -- else -- { -- emit_insn (gen_testqi_ext_1_ccno (scratch, const1_rtx)); -- code = NE; -- } -- break; -- case GE: -- case UNGE: -- if (code == GE || !TARGET_IEEE_FP) -- { -- emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x05))); -- code = EQ; -- } -- else -- { -- emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45))); -- emit_insn (gen_xorqi_ext_1_cc (scratch, scratch, const1_rtx)); -- code = NE; -- } -- break; -- case LE: -- case UNLE: -- if (code == LE && TARGET_IEEE_FP) -- { -- emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45))); -- emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx)); -- emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40))); -- cmp_mode = CCmode; -- code = LTU; -- } -- else -- { -- emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x45))); -- code = NE; -- } -- break; -- case EQ: -- case UNEQ: -- if (code == EQ && TARGET_IEEE_FP) -- { -- emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45))); -- emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40))); -- cmp_mode = CCmode; -- code = EQ; -- } -- else -- { -- emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x40))); -- code = NE; -- } -- break; -- case NE: -- case LTGT: -- if (code == NE && TARGET_IEEE_FP) -- { -- emit_insn (gen_andqi_ext_1 (scratch, scratch, GEN_INT (0x45))); -- emit_insn (gen_xorqi_ext_1_cc (scratch, scratch, -- GEN_INT (0x40))); -- code = NE; -- } -- else -- { -- emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x40))); -- code = EQ; -- } -- break; -- -- case UNORDERED: -- emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x04))); -- code = NE; -- break; -- case ORDERED: -- emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x04))); -- code = EQ; -- break; -- -- default: -- gcc_unreachable (); -- } -- break; -- -- default: -- gcc_unreachable(); -- } -- -- /* Return the test that should be put into the flags user, i.e. -- the bcc, scc, or cmov instruction. */ -- return gen_rtx_fmt_ee (code, VOIDmode, -- gen_rtx_REG (cmp_mode, FLAGS_REG), -- const0_rtx); --} -- --static rtx --ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1) --{ -- rtx ret; -- -- if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC) -- ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1); -- -- else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0))) -- { -- gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0))); -- ret = ix86_expand_fp_compare (code, op0, op1); -- } -- else -- ret = ix86_expand_int_compare (code, op0, op1); -- -- return ret; --} -- --void --ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label) --{ -- machine_mode mode = GET_MODE (op0); -- rtx tmp; -- -- /* Handle special case - vector comparsion with boolean result, transform -- it using ptest instruction. */ -- if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) -- { -- rtx flag = gen_rtx_REG (CCZmode, FLAGS_REG); -- machine_mode p_mode = GET_MODE_SIZE (mode) == 32 ? V4DImode : V2DImode; -- -- gcc_assert (code == EQ || code == NE); -- /* Generate XOR since we can't check that one operand is zero vector. */ -- tmp = gen_reg_rtx (mode); -- emit_insn (gen_rtx_SET (tmp, gen_rtx_XOR (mode, op0, op1))); -- tmp = gen_lowpart (p_mode, tmp); -- emit_insn (gen_rtx_SET (gen_rtx_REG (CCmode, FLAGS_REG), -- gen_rtx_UNSPEC (CCmode, -- gen_rtvec (2, tmp, tmp), -- UNSPEC_PTEST))); -- tmp = gen_rtx_fmt_ee (code, VOIDmode, flag, const0_rtx); -- tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, -- gen_rtx_LABEL_REF (VOIDmode, label), -- pc_rtx); -- emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); -- return; -- } -- -- switch (mode) -- { -- case E_SFmode: -- case E_DFmode: -- case E_XFmode: -- case E_QImode: -- case E_HImode: -- case E_SImode: -- simple: -- tmp = ix86_expand_compare (code, op0, op1); -- tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, -- gen_rtx_LABEL_REF (VOIDmode, label), -- pc_rtx); -- emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); -- return; -- -- case E_DImode: -- if (TARGET_64BIT) -- goto simple; -- /* For 32-bit target DI comparison may be performed on -- SSE registers. To allow this we should avoid split -- to SI mode which is achieved by doing xor in DI mode -- and then comparing with zero (which is recognized by -- STV pass). We don't compare using xor when optimizing -- for size. */ -- if (!optimize_insn_for_size_p () -- && TARGET_STV -- && (code == EQ || code == NE)) -- { -- op0 = force_reg (mode, gen_rtx_XOR (mode, op0, op1)); -- op1 = const0_rtx; -- } -- /* FALLTHRU */ -- case E_TImode: -- /* Expand DImode branch into multiple compare+branch. */ -- { -- rtx lo[2], hi[2]; -- rtx_code_label *label2; -- enum rtx_code code1, code2, code3; -- machine_mode submode; -- -- if (CONSTANT_P (op0) && !CONSTANT_P (op1)) -- { -- std::swap (op0, op1); -- code = swap_condition (code); -- } -- -- split_double_mode (mode, &op0, 1, lo+0, hi+0); -- split_double_mode (mode, &op1, 1, lo+1, hi+1); -- -- submode = mode == DImode ? SImode : DImode; -- -- /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to -- avoid two branches. This costs one extra insn, so disable when -- optimizing for size. */ -- -- if ((code == EQ || code == NE) -- && (!optimize_insn_for_size_p () -- || hi[1] == const0_rtx || lo[1] == const0_rtx)) -- { -- rtx xor0, xor1; -- -- xor1 = hi[0]; -- if (hi[1] != const0_rtx) -- xor1 = expand_binop (submode, xor_optab, xor1, hi[1], -- NULL_RTX, 0, OPTAB_WIDEN); -- -- xor0 = lo[0]; -- if (lo[1] != const0_rtx) -- xor0 = expand_binop (submode, xor_optab, xor0, lo[1], -- NULL_RTX, 0, OPTAB_WIDEN); -- -- tmp = expand_binop (submode, ior_optab, xor1, xor0, -- NULL_RTX, 0, OPTAB_WIDEN); -- -- ix86_expand_branch (code, tmp, const0_rtx, label); -- return; -- } -- -- /* Otherwise, if we are doing less-than or greater-or-equal-than, -- op1 is a constant and the low word is zero, then we can just -- examine the high word. Similarly for low word -1 and -- less-or-equal-than or greater-than. */ -- -- if (CONST_INT_P (hi[1])) -- switch (code) -- { -- case LT: case LTU: case GE: case GEU: -- if (lo[1] == const0_rtx) -- { -- ix86_expand_branch (code, hi[0], hi[1], label); -- return; -- } -- break; -- case LE: case LEU: case GT: case GTU: -- if (lo[1] == constm1_rtx) -- { -- ix86_expand_branch (code, hi[0], hi[1], label); -- return; -- } -- break; -- default: -- break; -- } -- -- /* Emulate comparisons that do not depend on Zero flag with -- double-word subtraction. Note that only Overflow, Sign -- and Carry flags are valid, so swap arguments and condition -- of comparisons that would otherwise test Zero flag. */ -- -- switch (code) -- { -- case LE: case LEU: case GT: case GTU: -- std::swap (lo[0], lo[1]); -- std::swap (hi[0], hi[1]); -- code = swap_condition (code); -- /* FALLTHRU */ -- -- case LT: case LTU: case GE: case GEU: -- { -- rtx (*cmp_insn) (rtx, rtx); -- rtx (*sbb_insn) (rtx, rtx, rtx); -- bool uns = (code == LTU || code == GEU); -- -- if (TARGET_64BIT) -- { -- cmp_insn = gen_cmpdi_1; -- sbb_insn -- = uns ? gen_subdi3_carry_ccc : gen_subdi3_carry_ccgz; -- } -- else -- { -- cmp_insn = gen_cmpsi_1; -- sbb_insn -- = uns ? gen_subsi3_carry_ccc : gen_subsi3_carry_ccgz; -- } -- -- if (!nonimmediate_operand (lo[0], submode)) -- lo[0] = force_reg (submode, lo[0]); -- if (!x86_64_general_operand (lo[1], submode)) -- lo[1] = force_reg (submode, lo[1]); -- -- if (!register_operand (hi[0], submode)) -- hi[0] = force_reg (submode, hi[0]); -- if ((uns && !nonimmediate_operand (hi[1], submode)) -- || (!uns && !x86_64_general_operand (hi[1], submode))) -- hi[1] = force_reg (submode, hi[1]); -- -- emit_insn (cmp_insn (lo[0], lo[1])); -- emit_insn (sbb_insn (gen_rtx_SCRATCH (submode), hi[0], hi[1])); -- -- tmp = gen_rtx_REG (uns ? CCCmode : CCGZmode, FLAGS_REG); -- -- ix86_expand_branch (code, tmp, const0_rtx, label); -- return; -- } -- -- default: -- break; -- } -- -- /* Otherwise, we need two or three jumps. */ -- -- label2 = gen_label_rtx (); -- -- code1 = code; -- code2 = swap_condition (code); -- code3 = unsigned_condition (code); -- -- switch (code) -- { -- case LT: case GT: case LTU: case GTU: -- break; -- -- case LE: code1 = LT; code2 = GT; break; -- case GE: code1 = GT; code2 = LT; break; -- case LEU: code1 = LTU; code2 = GTU; break; -- case GEU: code1 = GTU; code2 = LTU; break; -- -- case EQ: code1 = UNKNOWN; code2 = NE; break; -- case NE: code2 = UNKNOWN; break; -- -- default: -- gcc_unreachable (); -- } -- -- /* -- * a < b => -- * if (hi(a) < hi(b)) goto true; -- * if (hi(a) > hi(b)) goto false; -- * if (lo(a) < lo(b)) goto true; -- * false: -- */ -- -- if (code1 != UNKNOWN) -- ix86_expand_branch (code1, hi[0], hi[1], label); -- if (code2 != UNKNOWN) -- ix86_expand_branch (code2, hi[0], hi[1], label2); -- -- ix86_expand_branch (code3, lo[0], lo[1], label); -- -- if (code2 != UNKNOWN) -- emit_label (label2); -- return; -- } -- -- default: -- gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC); -- goto simple; -- } --} -- --void --ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1) --{ -- rtx ret; -- -- gcc_assert (GET_MODE (dest) == QImode); -- -- ret = ix86_expand_compare (code, op0, op1); -- PUT_MODE (ret, QImode); -- emit_insn (gen_rtx_SET (dest, ret)); --} -- --/* Expand comparison setting or clearing carry flag. Return true when -- successful and set pop for the operation. */ --static bool --ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop) --{ -- machine_mode mode -- = GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1); -- -- /* Do not handle double-mode compares that go through special path. */ -- if (mode == (TARGET_64BIT ? TImode : DImode)) -- return false; -- -- if (SCALAR_FLOAT_MODE_P (mode)) -- { -- rtx compare_op; -- rtx_insn *compare_seq; -- -- gcc_assert (!DECIMAL_FLOAT_MODE_P (mode)); -- -- /* Shortcut: following common codes never translate -- into carry flag compares. */ -- if (code == EQ || code == NE || code == UNEQ || code == LTGT -- || code == ORDERED || code == UNORDERED) -- return false; -- -- /* These comparisons require zero flag; swap operands so they won't. */ -- if ((code == GT || code == UNLE || code == LE || code == UNGT) -- && !TARGET_IEEE_FP) -- { -- std::swap (op0, op1); -- code = swap_condition (code); -- } -- -- /* Try to expand the comparison and verify that we end up with -- carry flag based comparison. This fails to be true only when -- we decide to expand comparison using arithmetic that is not -- too common scenario. */ -- start_sequence (); -- compare_op = ix86_expand_fp_compare (code, op0, op1); -- compare_seq = get_insns (); -- end_sequence (); -- -- if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode) -- code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op)); -- else -- code = GET_CODE (compare_op); -- -- if (code != LTU && code != GEU) -- return false; -- -- emit_insn (compare_seq); -- *pop = compare_op; -- return true; -- } -- -- if (!INTEGRAL_MODE_P (mode)) -- return false; -- -- switch (code) -- { -- case LTU: -- case GEU: -- break; -- -- /* Convert a==0 into (unsigned)a<1. */ -- case EQ: -- case NE: -- if (op1 != const0_rtx) -- return false; -- op1 = const1_rtx; -- code = (code == EQ ? LTU : GEU); -- break; -- -- /* Convert a>b into b=b-1. */ -- case GTU: -- case LEU: -- if (CONST_INT_P (op1)) -- { -- op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0)); -- /* Bail out on overflow. We still can swap operands but that -- would force loading of the constant into register. */ -- if (op1 == const0_rtx -- || !x86_64_immediate_operand (op1, GET_MODE (op1))) -- return false; -- code = (code == GTU ? GEU : LTU); -- } -- else -- { -- std::swap (op0, op1); -- code = (code == GTU ? LTU : GEU); -- } -- break; -- -- /* Convert a>=0 into (unsigned)a<0x80000000. */ -- case LT: -- case GE: -- if (mode == DImode || op1 != const0_rtx) -- return false; -- op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode); -- code = (code == LT ? GEU : LTU); -- break; -- case LE: -- case GT: -- if (mode == DImode || op1 != constm1_rtx) -- return false; -- op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode); -- code = (code == LE ? GEU : LTU); -- break; -- -- default: -- return false; -- } -- /* Swapping operands may cause constant to appear as first operand. */ -- if (!nonimmediate_operand (op0, VOIDmode)) -- { -- if (!can_create_pseudo_p ()) -- return false; -- op0 = force_reg (mode, op0); -- } -- *pop = ix86_expand_compare (code, op0, op1); -- gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU); -- return true; --} -- --bool --ix86_expand_int_movcc (rtx operands[]) --{ -- enum rtx_code code = GET_CODE (operands[1]), compare_code; -- rtx_insn *compare_seq; -- rtx compare_op; -- machine_mode mode = GET_MODE (operands[0]); -- bool sign_bit_compare_p = false; -- rtx op0 = XEXP (operands[1], 0); -- rtx op1 = XEXP (operands[1], 1); -- -- if (GET_MODE (op0) == TImode -- || (GET_MODE (op0) == DImode -- && !TARGET_64BIT)) -- return false; -- -- start_sequence (); -- compare_op = ix86_expand_compare (code, op0, op1); -- compare_seq = get_insns (); -- end_sequence (); -- -- compare_code = GET_CODE (compare_op); -- -- if ((op1 == const0_rtx && (code == GE || code == LT)) -- || (op1 == constm1_rtx && (code == GT || code == LE))) -- sign_bit_compare_p = true; -- -- /* Don't attempt mode expansion here -- if we had to expand 5 or 6 -- HImode insns, we'd be swallowed in word prefix ops. */ -- -- if ((mode != HImode || TARGET_FAST_PREFIX) -- && (mode != (TARGET_64BIT ? TImode : DImode)) -- && CONST_INT_P (operands[2]) -- && CONST_INT_P (operands[3])) -- { -- rtx out = operands[0]; -- HOST_WIDE_INT ct = INTVAL (operands[2]); -- HOST_WIDE_INT cf = INTVAL (operands[3]); -- HOST_WIDE_INT diff; -- -- diff = ct - cf; -- /* Sign bit compares are better done using shifts than we do by using -- sbb. */ -- if (sign_bit_compare_p -- || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op)) -- { -- /* Detect overlap between destination and compare sources. */ -- rtx tmp = out; -- -- if (!sign_bit_compare_p) -- { -- rtx flags; -- bool fpcmp = false; -- -- compare_code = GET_CODE (compare_op); -- -- flags = XEXP (compare_op, 0); -- -- if (GET_MODE (flags) == CCFPmode) -- { -- fpcmp = true; -- compare_code -- = ix86_fp_compare_code_to_integer (compare_code); -- } -- -- /* To simplify rest of code, restrict to the GEU case. */ -- if (compare_code == LTU) -- { -- std::swap (ct, cf); -- compare_code = reverse_condition (compare_code); -- code = reverse_condition (code); -- } -- else -- { -- if (fpcmp) -- PUT_CODE (compare_op, -- reverse_condition_maybe_unordered -- (GET_CODE (compare_op))); -- else -- PUT_CODE (compare_op, -- reverse_condition (GET_CODE (compare_op))); -- } -- diff = ct - cf; -- -- if (reg_overlap_mentioned_p (out, op0) -- || reg_overlap_mentioned_p (out, op1)) -- tmp = gen_reg_rtx (mode); -- -- if (mode == DImode) -- emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op)); -- else -- emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), -- flags, compare_op)); -- } -- else -- { -- if (code == GT || code == GE) -- code = reverse_condition (code); -- else -- { -- std::swap (ct, cf); -- diff = ct - cf; -- } -- tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1); -- } -- -- if (diff == 1) -- { -- /* -- * cmpl op0,op1 -- * sbbl dest,dest -- * [addl dest, ct] -- * -- * Size 5 - 8. -- */ -- if (ct) -- tmp = expand_simple_binop (mode, PLUS, -- tmp, GEN_INT (ct), -- copy_rtx (tmp), 1, OPTAB_DIRECT); -- } -- else if (cf == -1) -- { -- /* -- * cmpl op0,op1 -- * sbbl dest,dest -- * orl $ct, dest -- * -- * Size 8. -- */ -- tmp = expand_simple_binop (mode, IOR, -- tmp, GEN_INT (ct), -- copy_rtx (tmp), 1, OPTAB_DIRECT); -- } -- else if (diff == -1 && ct) -- { -- /* -- * cmpl op0,op1 -- * sbbl dest,dest -- * notl dest -- * [addl dest, cf] -- * -- * Size 8 - 11. -- */ -- tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1); -- if (cf) -- tmp = expand_simple_binop (mode, PLUS, -- copy_rtx (tmp), GEN_INT (cf), -- copy_rtx (tmp), 1, OPTAB_DIRECT); -- } -- else -- { -- /* -- * cmpl op0,op1 -- * sbbl dest,dest -- * [notl dest] -- * andl cf - ct, dest -- * [addl dest, ct] -- * -- * Size 8 - 11. -- */ -- -- if (cf == 0) -- { -- cf = ct; -- ct = 0; -- tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1); -- } -- -- tmp = expand_simple_binop (mode, AND, -- copy_rtx (tmp), -- gen_int_mode (cf - ct, mode), -- copy_rtx (tmp), 1, OPTAB_DIRECT); -- if (ct) -- tmp = expand_simple_binop (mode, PLUS, -- copy_rtx (tmp), GEN_INT (ct), -- copy_rtx (tmp), 1, OPTAB_DIRECT); -- } -- -- if (!rtx_equal_p (tmp, out)) -- emit_move_insn (copy_rtx (out), copy_rtx (tmp)); -- -- return true; -- } -- -- if (diff < 0) -- { -- machine_mode cmp_mode = GET_MODE (op0); -- enum rtx_code new_code; -- -- if (SCALAR_FLOAT_MODE_P (cmp_mode)) -- { -- gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode)); -- -- /* We may be reversing unordered compare to normal compare, that -- is not valid in general (we may convert non-trapping condition -- to trapping one), however on i386 we currently emit all -- comparisons unordered. */ -- new_code = reverse_condition_maybe_unordered (code); -- } -- else -- new_code = ix86_reverse_condition (code, cmp_mode); -- if (new_code != UNKNOWN) -- { -- std::swap (ct, cf); -- diff = -diff; -- code = new_code; -- } -- } -- -- compare_code = UNKNOWN; -- if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT -- && CONST_INT_P (op1)) -- { -- if (op1 == const0_rtx -- && (code == LT || code == GE)) -- compare_code = code; -- else if (op1 == constm1_rtx) -- { -- if (code == LE) -- compare_code = LT; -- else if (code == GT) -- compare_code = GE; -- } -- } -- -- /* Optimize dest = (op0 < 0) ? -1 : cf. */ -- if (compare_code != UNKNOWN -- && GET_MODE (op0) == GET_MODE (out) -- && (cf == -1 || ct == -1)) -- { -- /* If lea code below could be used, only optimize -- if it results in a 2 insn sequence. */ -- -- if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8 -- || diff == 3 || diff == 5 || diff == 9) -- || (compare_code == LT && ct == -1) -- || (compare_code == GE && cf == -1)) -- { -- /* -- * notl op1 (if necessary) -- * sarl $31, op1 -- * orl cf, op1 -- */ -- if (ct != -1) -- { -- cf = ct; -- ct = -1; -- code = reverse_condition (code); -- } -- -- out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1); -- -- out = expand_simple_binop (mode, IOR, -- out, GEN_INT (cf), -- out, 1, OPTAB_DIRECT); -- if (out != operands[0]) -- emit_move_insn (operands[0], out); -- -- return true; -- } -- } -- -- -- if ((diff == 1 || diff == 2 || diff == 4 || diff == 8 -- || diff == 3 || diff == 5 || diff == 9) -- && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL) -- && (mode != DImode -- || x86_64_immediate_operand (GEN_INT (cf), VOIDmode))) -- { -- /* -- * xorl dest,dest -- * cmpl op1,op2 -- * setcc dest -- * lea cf(dest*(ct-cf)),dest -- * -- * Size 14. -- * -- * This also catches the degenerate setcc-only case. -- */ -- -- rtx tmp; -- int nops; -- -- out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1); -- -- nops = 0; -- /* On x86_64 the lea instruction operates on Pmode, so we need -- to get arithmetics done in proper mode to match. */ -- if (diff == 1) -- tmp = copy_rtx (out); -- else -- { -- rtx out1; -- out1 = copy_rtx (out); -- tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1)); -- nops++; -- if (diff & 1) -- { -- tmp = gen_rtx_PLUS (mode, tmp, out1); -- nops++; -- } -- } -- if (cf != 0) -- { -- tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf)); -- nops++; -- } -- if (!rtx_equal_p (tmp, out)) -- { -- if (nops == 1) -- out = force_operand (tmp, copy_rtx (out)); -- else -- emit_insn (gen_rtx_SET (copy_rtx (out), copy_rtx (tmp))); -- } -- if (!rtx_equal_p (out, operands[0])) -- emit_move_insn (operands[0], copy_rtx (out)); -- -- return true; -- } -- -- /* -- * General case: Jumpful: -- * xorl dest,dest cmpl op1, op2 -- * cmpl op1, op2 movl ct, dest -- * setcc dest jcc 1f -- * decl dest movl cf, dest -- * andl (cf-ct),dest 1: -- * addl ct,dest -- * -- * Size 20. Size 14. -- * -- * This is reasonably steep, but branch mispredict costs are -- * high on modern cpus, so consider failing only if optimizing -- * for space. -- */ -- -- if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL)) -- && BRANCH_COST (optimize_insn_for_speed_p (), -- false) >= 2) -- { -- if (cf == 0) -- { -- machine_mode cmp_mode = GET_MODE (op0); -- enum rtx_code new_code; -- -- if (SCALAR_FLOAT_MODE_P (cmp_mode)) -- { -- gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode)); -- -- /* We may be reversing unordered compare to normal compare, -- that is not valid in general (we may convert non-trapping -- condition to trapping one), however on i386 we currently -- emit all comparisons unordered. */ -- new_code = reverse_condition_maybe_unordered (code); -- } -- else -- { -- new_code = ix86_reverse_condition (code, cmp_mode); -- if (compare_code != UNKNOWN && new_code != UNKNOWN) -- compare_code = reverse_condition (compare_code); -- } -- -- if (new_code != UNKNOWN) -- { -- cf = ct; -- ct = 0; -- code = new_code; -- } -- } -- -- if (compare_code != UNKNOWN) -- { -- /* notl op1 (if needed) -- sarl $31, op1 -- andl (cf-ct), op1 -- addl ct, op1 -- -- For x < 0 (resp. x <= -1) there will be no notl, -- so if possible swap the constants to get rid of the -- complement. -- True/false will be -1/0 while code below (store flag -- followed by decrement) is 0/-1, so the constants need -- to be exchanged once more. */ -- -- if (compare_code == GE || !cf) -- { -- code = reverse_condition (code); -- compare_code = LT; -- } -- else -- std::swap (ct, cf); -- -- out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1); -- } -- else -- { -- out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1); -- -- out = expand_simple_binop (mode, PLUS, copy_rtx (out), -- constm1_rtx, -- copy_rtx (out), 1, OPTAB_DIRECT); -- } -- -- out = expand_simple_binop (mode, AND, copy_rtx (out), -- gen_int_mode (cf - ct, mode), -- copy_rtx (out), 1, OPTAB_DIRECT); -- if (ct) -- out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct), -- copy_rtx (out), 1, OPTAB_DIRECT); -- if (!rtx_equal_p (out, operands[0])) -- emit_move_insn (operands[0], copy_rtx (out)); -- -- return true; -- } -- } -- -- if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL)) -- { -- /* Try a few things more with specific constants and a variable. */ -- -- optab op; -- rtx var, orig_out, out, tmp; -- -- if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2) -- return false; -- -- /* If one of the two operands is an interesting constant, load a -- constant with the above and mask it in with a logical operation. */ -- -- if (CONST_INT_P (operands[2])) -- { -- var = operands[3]; -- if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx) -- operands[3] = constm1_rtx, op = and_optab; -- else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx) -- operands[3] = const0_rtx, op = ior_optab; -- else -- return false; -- } -- else if (CONST_INT_P (operands[3])) -- { -- var = operands[2]; -- if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx) -- operands[2] = constm1_rtx, op = and_optab; -- else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx) -- operands[2] = const0_rtx, op = ior_optab; -- else -- return false; -- } -- else -- return false; -- -- orig_out = operands[0]; -- tmp = gen_reg_rtx (mode); -- operands[0] = tmp; -- -- /* Recurse to get the constant loaded. */ -- if (!ix86_expand_int_movcc (operands)) -- return false; -- -- /* Mask in the interesting variable. */ -- out = expand_binop (mode, op, var, tmp, orig_out, 0, -- OPTAB_WIDEN); -- if (!rtx_equal_p (out, orig_out)) -- emit_move_insn (copy_rtx (orig_out), copy_rtx (out)); -- -- return true; -- } -- -- /* -- * For comparison with above, -- * -- * movl cf,dest -- * movl ct,tmp -- * cmpl op1,op2 -- * cmovcc tmp,dest -- * -- * Size 15. -- */ -- -- if (! nonimmediate_operand (operands[2], mode)) -- operands[2] = force_reg (mode, operands[2]); -- if (! nonimmediate_operand (operands[3], mode)) -- operands[3] = force_reg (mode, operands[3]); -- -- if (! register_operand (operands[2], VOIDmode) -- && (mode == QImode -- || ! register_operand (operands[3], VOIDmode))) -- operands[2] = force_reg (mode, operands[2]); -- -- if (mode == QImode -- && ! register_operand (operands[3], VOIDmode)) -- operands[3] = force_reg (mode, operands[3]); -- -- emit_insn (compare_seq); -- emit_insn (gen_rtx_SET (operands[0], -- gen_rtx_IF_THEN_ELSE (mode, -- compare_op, operands[2], -- operands[3]))); -- return true; --} -- --/* Swap, force into registers, or otherwise massage the two operands -- to an sse comparison with a mask result. Thus we differ a bit from -- ix86_prepare_fp_compare_args which expects to produce a flags result. -- -- The DEST operand exists to help determine whether to commute commutative -- operators. The POP0/POP1 operands are updated in place. The new -- comparison code is returned, or UNKNOWN if not implementable. */ -- --static enum rtx_code --ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code, -- rtx *pop0, rtx *pop1) --{ -- switch (code) -- { -- case LTGT: -- case UNEQ: -- /* AVX supports all the needed comparisons. */ -- if (TARGET_AVX) -- break; -- /* We have no LTGT as an operator. We could implement it with -- NE & ORDERED, but this requires an extra temporary. It's -- not clear that it's worth it. */ -- return UNKNOWN; -- -- case LT: -- case LE: -- case UNGT: -- case UNGE: -- /* These are supported directly. */ -- break; -- -- case EQ: -- case NE: -- case UNORDERED: -- case ORDERED: -- /* AVX has 3 operand comparisons, no need to swap anything. */ -- if (TARGET_AVX) -- break; -- /* For commutative operators, try to canonicalize the destination -- operand to be first in the comparison - this helps reload to -- avoid extra moves. */ -- if (!dest || !rtx_equal_p (dest, *pop1)) -- break; -- /* FALLTHRU */ -- -- case GE: -- case GT: -- case UNLE: -- case UNLT: -- /* These are not supported directly before AVX, and furthermore -- ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the -- comparison operands to transform into something that is -- supported. */ -- std::swap (*pop0, *pop1); -- code = swap_condition (code); -- break; -- -- default: -- gcc_unreachable (); -- } -- -- return code; --} -- --/* Detect conditional moves that exactly match min/max operational -- semantics. Note that this is IEEE safe, as long as we don't -- interchange the operands. -- -- Returns FALSE if this conditional move doesn't match a MIN/MAX, -- and TRUE if the operation is successful and instructions are emitted. */ -- --static bool --ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0, -- rtx cmp_op1, rtx if_true, rtx if_false) --{ -- machine_mode mode; -- bool is_min; -- rtx tmp; -- -- if (code == LT) -- ; -- else if (code == UNGE) -- std::swap (if_true, if_false); -- else -- return false; -- -- if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false)) -- is_min = true; -- else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false)) -- is_min = false; -- else -- return false; -- -- mode = GET_MODE (dest); -- -- /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here, -- but MODE may be a vector mode and thus not appropriate. */ -- if (!flag_finite_math_only || flag_signed_zeros) -- { -- int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX; -- rtvec v; -- -- if_true = force_reg (mode, if_true); -- v = gen_rtvec (2, if_true, if_false); -- tmp = gen_rtx_UNSPEC (mode, v, u); -- } -- else -- { -- code = is_min ? SMIN : SMAX; -- if (MEM_P (if_true) && MEM_P (if_false)) -- if_true = force_reg (mode, if_true); -- tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false); -- } -- -- emit_insn (gen_rtx_SET (dest, tmp)); -- return true; --} -- --/* Expand an SSE comparison. Return the register with the result. */ -- --static rtx --ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1, -- rtx op_true, rtx op_false) --{ -- machine_mode mode = GET_MODE (dest); -- machine_mode cmp_ops_mode = GET_MODE (cmp_op0); -- -- /* In general case result of comparison can differ from operands' type. */ -- machine_mode cmp_mode; -- -- /* In AVX512F the result of comparison is an integer mask. */ -- bool maskcmp = false; -- rtx x; -- -- if (GET_MODE_SIZE (cmp_ops_mode) == 64) -- { -- unsigned int nbits = GET_MODE_NUNITS (cmp_ops_mode); -- cmp_mode = int_mode_for_size (nbits, 0).require (); -- maskcmp = true; -- } -- else -- cmp_mode = cmp_ops_mode; -- -- cmp_op0 = force_reg (cmp_ops_mode, cmp_op0); -- -- int (*op1_predicate)(rtx, machine_mode) -- = VECTOR_MODE_P (cmp_ops_mode) ? vector_operand : nonimmediate_operand; -- -- if (!op1_predicate (cmp_op1, cmp_ops_mode)) -- cmp_op1 = force_reg (cmp_ops_mode, cmp_op1); -- -- if (optimize -- || (maskcmp && cmp_mode != mode) -- || (op_true && reg_overlap_mentioned_p (dest, op_true)) -- || (op_false && reg_overlap_mentioned_p (dest, op_false))) -- dest = gen_reg_rtx (maskcmp ? cmp_mode : mode); -- -- /* Compare patterns for int modes are unspec in AVX512F only. */ -- if (maskcmp && (code == GT || code == EQ)) -- { -- rtx (*gen)(rtx, rtx, rtx); -- -- switch (cmp_ops_mode) -- { -- case E_V64QImode: -- gcc_assert (TARGET_AVX512BW); -- gen = code == GT ? gen_avx512bw_gtv64qi3 : gen_avx512bw_eqv64qi3_1; -- break; -- case E_V32HImode: -- gcc_assert (TARGET_AVX512BW); -- gen = code == GT ? gen_avx512bw_gtv32hi3 : gen_avx512bw_eqv32hi3_1; -- break; -- case E_V16SImode: -- gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1; -- break; -- case E_V8DImode: -- gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1; -- break; -- default: -- gen = NULL; -- } -- -- if (gen) -- { -- emit_insn (gen (dest, cmp_op0, cmp_op1)); -- return dest; -- } -- } -- x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1); -- -- if (cmp_mode != mode && !maskcmp) -- { -- x = force_reg (cmp_ops_mode, x); -- convert_move (dest, x, false); -- } -- else -- emit_insn (gen_rtx_SET (dest, x)); -- -- return dest; --} -- --/* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical -- operations. This is used for both scalar and vector conditional moves. */ -- --void --ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) --{ -- machine_mode mode = GET_MODE (dest); -- machine_mode cmpmode = GET_MODE (cmp); -- -- /* In AVX512F the result of comparison is an integer mask. */ -- bool maskcmp = (mode != cmpmode && TARGET_AVX512F); -- -- rtx t2, t3, x; -- -- /* If we have an integer mask and FP value then we need -- to cast mask to FP mode. */ -- if (mode != cmpmode && VECTOR_MODE_P (cmpmode)) -- { -- cmp = force_reg (cmpmode, cmp); -- cmp = gen_rtx_SUBREG (mode, cmp, 0); -- } -- -- if (maskcmp) -- { -- rtx (*gen) (rtx, rtx) = NULL; -- if ((op_true == CONST0_RTX (mode) -- && vector_all_ones_operand (op_false, mode)) -- || (op_false == CONST0_RTX (mode) -- && vector_all_ones_operand (op_true, mode))) -- switch (mode) -- { -- case E_V64QImode: -- if (TARGET_AVX512BW) -- gen = gen_avx512bw_cvtmask2bv64qi; -- break; -- case E_V32QImode: -- if (TARGET_AVX512VL && TARGET_AVX512BW) -- gen = gen_avx512vl_cvtmask2bv32qi; -- break; -- case E_V16QImode: -- if (TARGET_AVX512VL && TARGET_AVX512BW) -- gen = gen_avx512vl_cvtmask2bv16qi; -- break; -- case E_V32HImode: -- if (TARGET_AVX512BW) -- gen = gen_avx512bw_cvtmask2wv32hi; -- break; -- case E_V16HImode: -- if (TARGET_AVX512VL && TARGET_AVX512BW) -- gen = gen_avx512vl_cvtmask2wv16hi; -- break; -- case E_V8HImode: -- if (TARGET_AVX512VL && TARGET_AVX512BW) -- gen = gen_avx512vl_cvtmask2wv8hi; -- break; -- case E_V16SImode: -- if (TARGET_AVX512DQ) -- gen = gen_avx512f_cvtmask2dv16si; -- break; -- case E_V8SImode: -- if (TARGET_AVX512VL && TARGET_AVX512DQ) -- gen = gen_avx512vl_cvtmask2dv8si; -- break; -- case E_V4SImode: -- if (TARGET_AVX512VL && TARGET_AVX512DQ) -- gen = gen_avx512vl_cvtmask2dv4si; -- break; -- case E_V8DImode: -- if (TARGET_AVX512DQ) -- gen = gen_avx512f_cvtmask2qv8di; -- break; -- case E_V4DImode: -- if (TARGET_AVX512VL && TARGET_AVX512DQ) -- gen = gen_avx512vl_cvtmask2qv4di; -- break; -- case E_V2DImode: -- if (TARGET_AVX512VL && TARGET_AVX512DQ) -- gen = gen_avx512vl_cvtmask2qv2di; -- break; -- default: -- break; -- } -- if (gen && SCALAR_INT_MODE_P (cmpmode)) -- { -- cmp = force_reg (cmpmode, cmp); -- if (op_true == CONST0_RTX (mode)) -- { -- rtx (*gen_not) (rtx, rtx); -- switch (cmpmode) -- { -- case E_QImode: gen_not = gen_knotqi; break; -- case E_HImode: gen_not = gen_knothi; break; -- case E_SImode: gen_not = gen_knotsi; break; -- case E_DImode: gen_not = gen_knotdi; break; -- default: gcc_unreachable (); -- } -- rtx n = gen_reg_rtx (cmpmode); -- emit_insn (gen_not (n, cmp)); -- cmp = n; -- } -- emit_insn (gen (dest, cmp)); -- return; -- } -- } -- else if (vector_all_ones_operand (op_true, mode) -- && op_false == CONST0_RTX (mode)) -- { -- emit_insn (gen_rtx_SET (dest, cmp)); -- return; -- } -- else if (op_false == CONST0_RTX (mode)) -- { -- op_true = force_reg (mode, op_true); -- x = gen_rtx_AND (mode, cmp, op_true); -- emit_insn (gen_rtx_SET (dest, x)); -- return; -- } -- else if (op_true == CONST0_RTX (mode)) -- { -- op_false = force_reg (mode, op_false); -- x = gen_rtx_NOT (mode, cmp); -- x = gen_rtx_AND (mode, x, op_false); -- emit_insn (gen_rtx_SET (dest, x)); -- return; -- } -- else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)) -- { -- op_false = force_reg (mode, op_false); -- x = gen_rtx_IOR (mode, cmp, op_false); -- emit_insn (gen_rtx_SET (dest, x)); -- return; -- } -- else if (TARGET_XOP) -- { -- op_true = force_reg (mode, op_true); -- -- if (!nonimmediate_operand (op_false, mode)) -- op_false = force_reg (mode, op_false); -- -- emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cmp, -- op_true, -- op_false))); -- return; -- } -- -- rtx (*gen) (rtx, rtx, rtx, rtx) = NULL; -- rtx d = dest; -- -- if (!vector_operand (op_true, mode)) -- op_true = force_reg (mode, op_true); -- -- op_false = force_reg (mode, op_false); -- -- switch (mode) -- { -- case E_V4SFmode: -- if (TARGET_SSE4_1) -- gen = gen_sse4_1_blendvps; -- break; -- case E_V2DFmode: -- if (TARGET_SSE4_1) -- gen = gen_sse4_1_blendvpd; -- break; -- case E_SFmode: -- if (TARGET_SSE4_1) -- { -- gen = gen_sse4_1_blendvss; -- op_true = force_reg (mode, op_true); -- } -- break; -- case E_DFmode: -- if (TARGET_SSE4_1) -- { -- gen = gen_sse4_1_blendvsd; -- op_true = force_reg (mode, op_true); -- } -- break; -- case E_V16QImode: -- case E_V8HImode: -- case E_V4SImode: -- case E_V2DImode: -- if (TARGET_SSE4_1) -- { -- gen = gen_sse4_1_pblendvb; -- if (mode != V16QImode) -- d = gen_reg_rtx (V16QImode); -- op_false = gen_lowpart (V16QImode, op_false); -- op_true = gen_lowpart (V16QImode, op_true); -- cmp = gen_lowpart (V16QImode, cmp); -- } -- break; -- case E_V8SFmode: -- if (TARGET_AVX) -- gen = gen_avx_blendvps256; -- break; -- case E_V4DFmode: -- if (TARGET_AVX) -- gen = gen_avx_blendvpd256; -- break; -- case E_V32QImode: -- case E_V16HImode: -- case E_V8SImode: -- case E_V4DImode: -- if (TARGET_AVX2) -- { -- gen = gen_avx2_pblendvb; -- if (mode != V32QImode) -- d = gen_reg_rtx (V32QImode); -- op_false = gen_lowpart (V32QImode, op_false); -- op_true = gen_lowpart (V32QImode, op_true); -- cmp = gen_lowpart (V32QImode, cmp); -- } -- break; -- -- case E_V64QImode: -- gen = gen_avx512bw_blendmv64qi; -- break; -- case E_V32HImode: -- gen = gen_avx512bw_blendmv32hi; -- break; -- case E_V16SImode: -- gen = gen_avx512f_blendmv16si; -- break; -- case E_V8DImode: -- gen = gen_avx512f_blendmv8di; -- break; -- case E_V8DFmode: -- gen = gen_avx512f_blendmv8df; -- break; -- case E_V16SFmode: -- gen = gen_avx512f_blendmv16sf; -- break; -- -- default: -- break; -- } -- -- if (gen != NULL) -- { -- emit_insn (gen (d, op_false, op_true, cmp)); -- if (d != dest) -- emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d)); -- } -- else -- { -- op_true = force_reg (mode, op_true); -- -- t2 = gen_reg_rtx (mode); -- if (optimize) -- t3 = gen_reg_rtx (mode); -- else -- t3 = dest; -- -- x = gen_rtx_AND (mode, op_true, cmp); -- emit_insn (gen_rtx_SET (t2, x)); -- -- x = gen_rtx_NOT (mode, cmp); -- x = gen_rtx_AND (mode, x, op_false); -- emit_insn (gen_rtx_SET (t3, x)); -- -- x = gen_rtx_IOR (mode, t3, t2); -- emit_insn (gen_rtx_SET (dest, x)); -- } --} -- --/* Expand a floating-point conditional move. Return true if successful. */ -- --bool --ix86_expand_fp_movcc (rtx operands[]) --{ -- machine_mode mode = GET_MODE (operands[0]); -- enum rtx_code code = GET_CODE (operands[1]); -- rtx tmp, compare_op; -- rtx op0 = XEXP (operands[1], 0); -- rtx op1 = XEXP (operands[1], 1); -- -- if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode)) -- { -- machine_mode cmode; -- -- /* Since we've no cmove for sse registers, don't force bad register -- allocation just to gain access to it. Deny movcc when the -- comparison mode doesn't match the move mode. */ -- cmode = GET_MODE (op0); -- if (cmode == VOIDmode) -- cmode = GET_MODE (op1); -- if (cmode != mode) -- return false; -- -- code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1); -- if (code == UNKNOWN) -- return false; -- -- if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1, -- operands[2], operands[3])) -- return true; -- -- tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1, -- operands[2], operands[3]); -- ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]); -- return true; -- } -- -- if (GET_MODE (op0) == TImode -- || (GET_MODE (op0) == DImode -- && !TARGET_64BIT)) -- return false; -- -- /* The floating point conditional move instructions don't directly -- support conditions resulting from a signed integer comparison. */ -- -- compare_op = ix86_expand_compare (code, op0, op1); -- if (!fcmov_comparison_operator (compare_op, VOIDmode)) -- { -- tmp = gen_reg_rtx (QImode); -- ix86_expand_setcc (tmp, code, op0, op1); -- -- compare_op = ix86_expand_compare (NE, tmp, const0_rtx); -- } -- -- emit_insn (gen_rtx_SET (operands[0], -- gen_rtx_IF_THEN_ELSE (mode, compare_op, -- operands[2], operands[3]))); -- -- return true; --} -- --/* Helper for ix86_cmp_code_to_pcmp_immediate for int modes. */ -- --static int --ix86_int_cmp_code_to_pcmp_immediate (enum rtx_code code) --{ -- switch (code) -- { -- case EQ: -- return 0; -- case LT: -- case LTU: -- return 1; -- case LE: -- case LEU: -- return 2; -- case NE: -- return 4; -- case GE: -- case GEU: -- return 5; -- case GT: -- case GTU: -- return 6; -- default: -- gcc_unreachable (); -- } --} -- --/* Helper for ix86_cmp_code_to_pcmp_immediate for fp modes. */ -- --static int --ix86_fp_cmp_code_to_pcmp_immediate (enum rtx_code code) --{ -- switch (code) -- { -- case EQ: -- return 0x00; -- case NE: -- return 0x04; -- case GT: -- return 0x0e; -- case LE: -- return 0x02; -- case GE: -- return 0x0d; -- case LT: -- return 0x01; -- case UNLE: -- return 0x0a; -- case UNLT: -- return 0x09; -- case UNGE: -- return 0x05; -- case UNGT: -- return 0x06; -- case UNEQ: -- return 0x18; -- case LTGT: -- return 0x0c; -- case ORDERED: -- return 0x07; -- case UNORDERED: -- return 0x03; -- default: -- gcc_unreachable (); -- } --} -- --/* Return immediate value to be used in UNSPEC_PCMP -- for comparison CODE in MODE. */ -- --static int --ix86_cmp_code_to_pcmp_immediate (enum rtx_code code, machine_mode mode) --{ -- if (FLOAT_MODE_P (mode)) -- return ix86_fp_cmp_code_to_pcmp_immediate (code); -- return ix86_int_cmp_code_to_pcmp_immediate (code); --} -- --/* Expand AVX-512 vector comparison. */ -- --bool --ix86_expand_mask_vec_cmp (rtx operands[]) --{ -- machine_mode mask_mode = GET_MODE (operands[0]); -- machine_mode cmp_mode = GET_MODE (operands[2]); -- enum rtx_code code = GET_CODE (operands[1]); -- rtx imm = GEN_INT (ix86_cmp_code_to_pcmp_immediate (code, cmp_mode)); -- int unspec_code; -- rtx unspec; -- -- switch (code) -- { -- case LEU: -- case GTU: -- case GEU: -- case LTU: -- unspec_code = UNSPEC_UNSIGNED_PCMP; -- break; -- -- default: -- unspec_code = UNSPEC_PCMP; -- } -- -- unspec = gen_rtx_UNSPEC (mask_mode, gen_rtvec (3, operands[2], -- operands[3], imm), -- unspec_code); -- emit_insn (gen_rtx_SET (operands[0], unspec)); -- -- return true; --} -- --/* Expand fp vector comparison. */ -- --bool --ix86_expand_fp_vec_cmp (rtx operands[]) --{ -- enum rtx_code code = GET_CODE (operands[1]); -- rtx cmp; -- -- code = ix86_prepare_sse_fp_compare_args (operands[0], code, -- &operands[2], &operands[3]); -- if (code == UNKNOWN) -- { -- rtx temp; -- switch (GET_CODE (operands[1])) -- { -- case LTGT: -- temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[2], -- operands[3], NULL, NULL); -- cmp = ix86_expand_sse_cmp (operands[0], NE, operands[2], -- operands[3], NULL, NULL); -- code = AND; -- break; -- case UNEQ: -- temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[2], -- operands[3], NULL, NULL); -- cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[2], -- operands[3], NULL, NULL); -- code = IOR; -- break; -- default: -- gcc_unreachable (); -- } -- cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1, -- OPTAB_DIRECT); -- } -- else -- cmp = ix86_expand_sse_cmp (operands[0], code, operands[2], operands[3], -- operands[1], operands[2]); -- -- if (operands[0] != cmp) -- emit_move_insn (operands[0], cmp); -- -- return true; --} -- --static rtx --ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1, -- rtx op_true, rtx op_false, bool *negate) --{ -- machine_mode data_mode = GET_MODE (dest); -- machine_mode mode = GET_MODE (cop0); -- rtx x; -- -- *negate = false; -- -- /* XOP supports all of the comparisons on all 128-bit vector int types. */ -- if (TARGET_XOP -- && (mode == V16QImode || mode == V8HImode -- || mode == V4SImode || mode == V2DImode)) -- ; -- else -- { -- /* Canonicalize the comparison to EQ, GT, GTU. */ -- switch (code) -- { -- case EQ: -- case GT: -- case GTU: -- break; -- -- case NE: -- case LE: -- case LEU: -- code = reverse_condition (code); -- *negate = true; -- break; -- -- case GE: -- case GEU: -- code = reverse_condition (code); -- *negate = true; -- /* FALLTHRU */ -- -- case LT: -- case LTU: -- std::swap (cop0, cop1); -- code = swap_condition (code); -- break; -- -- default: -- gcc_unreachable (); -- } -- -- /* Only SSE4.1/SSE4.2 supports V2DImode. */ -- if (mode == V2DImode) -- { -- switch (code) -- { -- case EQ: -- /* SSE4.1 supports EQ. */ -- if (!TARGET_SSE4_1) -- return NULL; -- break; -- -- case GT: -- case GTU: -- /* SSE4.2 supports GT/GTU. */ -- if (!TARGET_SSE4_2) -- return NULL; -- break; -- -- default: -- gcc_unreachable (); -- } -- } -- -- rtx optrue = op_true ? op_true : CONSTM1_RTX (data_mode); -- rtx opfalse = op_false ? op_false : CONST0_RTX (data_mode); -- if (*negate) -- std::swap (optrue, opfalse); -- -- /* Transform x > y ? 0 : -1 (i.e. x <= y ? -1 : 0 or x <= y) when -- not using integer masks into min (x, y) == x ? -1 : 0 (i.e. -- min (x, y) == x). While we add one instruction (the minimum), -- we remove the need for two instructions in the negation, as the -- result is done this way. -- When using masks, do it for SI/DImode element types, as it is shorter -- than the two subtractions. */ -- if ((code != EQ -- && GET_MODE_SIZE (mode) != 64 -- && vector_all_ones_operand (opfalse, data_mode) -- && optrue == CONST0_RTX (data_mode)) -- || (code == GTU -- && GET_MODE_SIZE (GET_MODE_INNER (mode)) >= 4 -- /* Don't do it if not using integer masks and we'd end up with -- the right values in the registers though. */ -- && (GET_MODE_SIZE (mode) == 64 -- || !vector_all_ones_operand (optrue, data_mode) -- || opfalse != CONST0_RTX (data_mode)))) -- { -- rtx (*gen) (rtx, rtx, rtx) = NULL; -- -- switch (mode) -- { -- case E_V16SImode: -- gen = (code == GTU) ? gen_uminv16si3 : gen_sminv16si3; -- break; -- case E_V8DImode: -- gen = (code == GTU) ? gen_uminv8di3 : gen_sminv8di3; -- cop0 = force_reg (mode, cop0); -- cop1 = force_reg (mode, cop1); -- break; -- case E_V32QImode: -- if (TARGET_AVX2) -- gen = (code == GTU) ? gen_uminv32qi3 : gen_sminv32qi3; -- break; -- case E_V16HImode: -- if (TARGET_AVX2) -- gen = (code == GTU) ? gen_uminv16hi3 : gen_sminv16hi3; -- break; -- case E_V8SImode: -- if (TARGET_AVX2) -- gen = (code == GTU) ? gen_uminv8si3 : gen_sminv8si3; -- break; -- case E_V4DImode: -- if (TARGET_AVX512VL) -- { -- gen = (code == GTU) ? gen_uminv4di3 : gen_sminv4di3; -- cop0 = force_reg (mode, cop0); -- cop1 = force_reg (mode, cop1); -- } -- break; -- case E_V16QImode: -- if (code == GTU && TARGET_SSE2) -- gen = gen_uminv16qi3; -- else if (code == GT && TARGET_SSE4_1) -- gen = gen_sminv16qi3; -- break; -- case E_V8HImode: -- if (code == GTU && TARGET_SSE4_1) -- gen = gen_uminv8hi3; -- else if (code == GT && TARGET_SSE2) -- gen = gen_sminv8hi3; -- break; -- case E_V4SImode: -- if (TARGET_SSE4_1) -- gen = (code == GTU) ? gen_uminv4si3 : gen_sminv4si3; -- break; -- case E_V2DImode: -- if (TARGET_AVX512VL) -- { -- gen = (code == GTU) ? gen_uminv2di3 : gen_sminv2di3; -- cop0 = force_reg (mode, cop0); -- cop1 = force_reg (mode, cop1); -- } -- break; -- default: -- break; -- } -- -- if (gen) -- { -- rtx tem = gen_reg_rtx (mode); -- if (!vector_operand (cop0, mode)) -- cop0 = force_reg (mode, cop0); -- if (!vector_operand (cop1, mode)) -- cop1 = force_reg (mode, cop1); -- *negate = !*negate; -- emit_insn (gen (tem, cop0, cop1)); -- cop1 = tem; -- code = EQ; -- } -- } -- -- /* Unsigned parallel compare is not supported by the hardware. -- Play some tricks to turn this into a signed comparison -- against 0. */ -- if (code == GTU) -- { -- cop0 = force_reg (mode, cop0); -- -- switch (mode) -- { -- case E_V16SImode: -- case E_V8DImode: -- case E_V8SImode: -- case E_V4DImode: -- case E_V4SImode: -- case E_V2DImode: -- { -- rtx t1, t2, mask; -- rtx (*gen_sub3) (rtx, rtx, rtx); -- -- switch (mode) -- { -- case E_V16SImode: gen_sub3 = gen_subv16si3; break; -- case E_V8DImode: gen_sub3 = gen_subv8di3; break; -- case E_V8SImode: gen_sub3 = gen_subv8si3; break; -- case E_V4DImode: gen_sub3 = gen_subv4di3; break; -- case E_V4SImode: gen_sub3 = gen_subv4si3; break; -- case E_V2DImode: gen_sub3 = gen_subv2di3; break; -- default: -- gcc_unreachable (); -- } -- /* Subtract (-(INT MAX) - 1) from both operands to make -- them signed. */ -- mask = ix86_build_signbit_mask (mode, true, false); -- t1 = gen_reg_rtx (mode); -- emit_insn (gen_sub3 (t1, cop0, mask)); -- -- t2 = gen_reg_rtx (mode); -- emit_insn (gen_sub3 (t2, cop1, mask)); -- -- cop0 = t1; -- cop1 = t2; -- code = GT; -- } -- break; -- -- case E_V64QImode: -- case E_V32HImode: -- case E_V32QImode: -- case E_V16HImode: -- case E_V16QImode: -- case E_V8HImode: -- /* Perform a parallel unsigned saturating subtraction. */ -- x = gen_reg_rtx (mode); -- emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, cop0, -- cop1))); -- -- cop0 = x; -- cop1 = CONST0_RTX (mode); -- code = EQ; -- *negate = !*negate; -- break; -- -- default: -- gcc_unreachable (); -- } -- } -- } -- -- if (*negate) -- std::swap (op_true, op_false); -- -- /* Allow the comparison to be done in one mode, but the movcc to -- happen in another mode. */ -- if (data_mode == mode) -- { -- x = ix86_expand_sse_cmp (dest, code, cop0, cop1, -- op_true, op_false); -- } -- else -- { -- gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode)); -- x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1, -- op_true, op_false); -- if (GET_MODE (x) == mode) -- x = gen_lowpart (data_mode, x); -- } -- -- return x; --} -- --/* Expand integer vector comparison. */ -- --bool --ix86_expand_int_vec_cmp (rtx operands[]) --{ -- rtx_code code = GET_CODE (operands[1]); -- bool negate = false; -- rtx cmp = ix86_expand_int_sse_cmp (operands[0], code, operands[2], -- operands[3], NULL, NULL, &negate); -- -- if (!cmp) -- return false; -- -- if (negate) -- cmp = ix86_expand_int_sse_cmp (operands[0], EQ, cmp, -- CONST0_RTX (GET_MODE (cmp)), -- NULL, NULL, &negate); -- -- gcc_assert (!negate); -- -- if (operands[0] != cmp) -- emit_move_insn (operands[0], cmp); -- -- return true; --} -- --/* Expand a floating-point vector conditional move; a vcond operation -- rather than a movcc operation. */ -- --bool --ix86_expand_fp_vcond (rtx operands[]) --{ -- enum rtx_code code = GET_CODE (operands[3]); -- rtx cmp; -- -- code = ix86_prepare_sse_fp_compare_args (operands[0], code, -- &operands[4], &operands[5]); -- if (code == UNKNOWN) -- { -- rtx temp; -- switch (GET_CODE (operands[3])) -- { -- case LTGT: -- temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4], -- operands[5], operands[0], operands[0]); -- cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4], -- operands[5], operands[1], operands[2]); -- code = AND; -- break; -- case UNEQ: -- temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4], -- operands[5], operands[0], operands[0]); -- cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4], -- operands[5], operands[1], operands[2]); -- code = IOR; -- break; -- default: -- gcc_unreachable (); -- } -- cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1, -- OPTAB_DIRECT); -- ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]); -- return true; -- } -- -- if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4], -- operands[5], operands[1], operands[2])) -- return true; -- -- cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5], -- operands[1], operands[2]); -- ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]); -- return true; --} -- --/* Expand a signed/unsigned integral vector conditional move. */ -- --bool --ix86_expand_int_vcond (rtx operands[]) --{ -- machine_mode data_mode = GET_MODE (operands[0]); -- machine_mode mode = GET_MODE (operands[4]); -- enum rtx_code code = GET_CODE (operands[3]); -- bool negate = false; -- rtx x, cop0, cop1; -- -- cop0 = operands[4]; -- cop1 = operands[5]; -- -- /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31 -- and x < 0 ? 1 : 0 into (unsigned) x >> 31. */ -- if ((code == LT || code == GE) -- && data_mode == mode -- && cop1 == CONST0_RTX (mode) -- && operands[1 + (code == LT)] == CONST0_RTX (data_mode) -- && GET_MODE_UNIT_SIZE (data_mode) > 1 -- && GET_MODE_UNIT_SIZE (data_mode) <= 8 -- && (GET_MODE_SIZE (data_mode) == 16 -- || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32))) -- { -- rtx negop = operands[2 - (code == LT)]; -- int shift = GET_MODE_UNIT_BITSIZE (data_mode) - 1; -- if (negop == CONST1_RTX (data_mode)) -- { -- rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift), -- operands[0], 1, OPTAB_DIRECT); -- if (res != operands[0]) -- emit_move_insn (operands[0], res); -- return true; -- } -- else if (GET_MODE_INNER (data_mode) != DImode -- && vector_all_ones_operand (negop, data_mode)) -- { -- rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift), -- operands[0], 0, OPTAB_DIRECT); -- if (res != operands[0]) -- emit_move_insn (operands[0], res); -- return true; -- } -- } -- -- if (!nonimmediate_operand (cop1, mode)) -- cop1 = force_reg (mode, cop1); -- if (!general_operand (operands[1], data_mode)) -- operands[1] = force_reg (data_mode, operands[1]); -- if (!general_operand (operands[2], data_mode)) -- operands[2] = force_reg (data_mode, operands[2]); -- -- x = ix86_expand_int_sse_cmp (operands[0], code, cop0, cop1, -- operands[1], operands[2], &negate); -- -- if (!x) -- return false; -- -- ix86_expand_sse_movcc (operands[0], x, operands[1+negate], -- operands[2-negate]); -- return true; --} -- --/* AVX512F does support 64-byte integer vector operations, -- thus the longest vector we are faced with is V64QImode. */ --#define MAX_VECT_LEN 64 -- --struct expand_vec_perm_d --{ -- rtx target, op0, op1; -- unsigned char perm[MAX_VECT_LEN]; -- machine_mode vmode; -- unsigned char nelt; -- bool one_operand_p; -- bool testing_p; --}; -- --static bool --ix86_expand_vec_perm_vpermt2 (rtx target, rtx mask, rtx op0, rtx op1, -- struct expand_vec_perm_d *d) --{ -- /* ix86_expand_vec_perm_vpermt2 is called from both const and non-const -- expander, so args are either in d, or in op0, op1 etc. */ -- machine_mode mode = GET_MODE (d ? d->op0 : op0); -- machine_mode maskmode = mode; -- rtx (*gen) (rtx, rtx, rtx, rtx) = NULL; -- -- switch (mode) -- { -- case E_V8HImode: -- if (TARGET_AVX512VL && TARGET_AVX512BW) -- gen = gen_avx512vl_vpermt2varv8hi3; -- break; -- case E_V16HImode: -- if (TARGET_AVX512VL && TARGET_AVX512BW) -- gen = gen_avx512vl_vpermt2varv16hi3; -- break; -- case E_V64QImode: -- if (TARGET_AVX512VBMI) -- gen = gen_avx512bw_vpermt2varv64qi3; -- break; -- case E_V32HImode: -- if (TARGET_AVX512BW) -- gen = gen_avx512bw_vpermt2varv32hi3; -- break; -- case E_V4SImode: -- if (TARGET_AVX512VL) -- gen = gen_avx512vl_vpermt2varv4si3; -- break; -- case E_V8SImode: -- if (TARGET_AVX512VL) -- gen = gen_avx512vl_vpermt2varv8si3; -- break; -- case E_V16SImode: -- if (TARGET_AVX512F) -- gen = gen_avx512f_vpermt2varv16si3; -- break; -- case E_V4SFmode: -- if (TARGET_AVX512VL) -- { -- gen = gen_avx512vl_vpermt2varv4sf3; -- maskmode = V4SImode; -- } -- break; -- case E_V8SFmode: -- if (TARGET_AVX512VL) -- { -- gen = gen_avx512vl_vpermt2varv8sf3; -- maskmode = V8SImode; -- } -- break; -- case E_V16SFmode: -- if (TARGET_AVX512F) -- { -- gen = gen_avx512f_vpermt2varv16sf3; -- maskmode = V16SImode; -- } -- break; -- case E_V2DImode: -- if (TARGET_AVX512VL) -- gen = gen_avx512vl_vpermt2varv2di3; -- break; -- case E_V4DImode: -- if (TARGET_AVX512VL) -- gen = gen_avx512vl_vpermt2varv4di3; -- break; -- case E_V8DImode: -- if (TARGET_AVX512F) -- gen = gen_avx512f_vpermt2varv8di3; -- break; -- case E_V2DFmode: -- if (TARGET_AVX512VL) -- { -- gen = gen_avx512vl_vpermt2varv2df3; -- maskmode = V2DImode; -- } -- break; -- case E_V4DFmode: -- if (TARGET_AVX512VL) -- { -- gen = gen_avx512vl_vpermt2varv4df3; -- maskmode = V4DImode; -- } -- break; -- case E_V8DFmode: -- if (TARGET_AVX512F) -- { -- gen = gen_avx512f_vpermt2varv8df3; -- maskmode = V8DImode; -- } -- break; -- default: -- break; -- } -- -- if (gen == NULL) -- return false; -- -- /* ix86_expand_vec_perm_vpermt2 is called from both const and non-const -- expander, so args are either in d, or in op0, op1 etc. */ -- if (d) -- { -- rtx vec[64]; -- target = d->target; -- op0 = d->op0; -- op1 = d->op1; -- for (int i = 0; i < d->nelt; ++i) -- vec[i] = GEN_INT (d->perm[i]); -- mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec)); -- } -- -- emit_insn (gen (target, force_reg (maskmode, mask), op0, op1)); -- return true; --} -- --/* Expand a variable vector permutation. */ -- --void --ix86_expand_vec_perm (rtx operands[]) --{ -- rtx target = operands[0]; -- rtx op0 = operands[1]; -- rtx op1 = operands[2]; -- rtx mask = operands[3]; -- rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32]; -- machine_mode mode = GET_MODE (op0); -- machine_mode maskmode = GET_MODE (mask); -- int w, e, i; -- bool one_operand_shuffle = rtx_equal_p (op0, op1); -- -- /* Number of elements in the vector. */ -- w = GET_MODE_NUNITS (mode); -- e = GET_MODE_UNIT_SIZE (mode); -- gcc_assert (w <= 64); -- -- if (TARGET_AVX512F && one_operand_shuffle) -- { -- rtx (*gen) (rtx, rtx, rtx) = NULL; -- switch (mode) -- { -- case E_V16SImode: -- gen =gen_avx512f_permvarv16si; -- break; -- case E_V16SFmode: -- gen = gen_avx512f_permvarv16sf; -- break; -- case E_V8DImode: -- gen = gen_avx512f_permvarv8di; -- break; -- case E_V8DFmode: -- gen = gen_avx512f_permvarv8df; -- break; -- default: -- break; -- } -- if (gen != NULL) -- { -- emit_insn (gen (target, op0, mask)); -- return; -- } -- } -- -- if (ix86_expand_vec_perm_vpermt2 (target, mask, op0, op1, NULL)) -- return; -- -- if (TARGET_AVX2) -- { -- if (mode == V4DImode || mode == V4DFmode || mode == V16HImode) -- { -- /* Unfortunately, the VPERMQ and VPERMPD instructions only support -- an constant shuffle operand. With a tiny bit of effort we can -- use VPERMD instead. A re-interpretation stall for V4DFmode is -- unfortunate but there's no avoiding it. -- Similarly for V16HImode we don't have instructions for variable -- shuffling, while for V32QImode we can use after preparing suitable -- masks vpshufb; vpshufb; vpermq; vpor. */ -- -- if (mode == V16HImode) -- { -- maskmode = mode = V32QImode; -- w = 32; -- e = 1; -- } -- else -- { -- maskmode = mode = V8SImode; -- w = 8; -- e = 4; -- } -- t1 = gen_reg_rtx (maskmode); -- -- /* Replicate the low bits of the V4DImode mask into V8SImode: -- mask = { A B C D } -- t1 = { A A B B C C D D }. */ -- for (i = 0; i < w / 2; ++i) -- vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2); -- vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec)); -- vt = force_reg (maskmode, vt); -- mask = gen_lowpart (maskmode, mask); -- if (maskmode == V8SImode) -- emit_insn (gen_avx2_permvarv8si (t1, mask, vt)); -- else -- emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt)); -- -- /* Multiply the shuffle indicies by two. */ -- t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1, -- OPTAB_DIRECT); -- -- /* Add one to the odd shuffle indicies: -- t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */ -- for (i = 0; i < w / 2; ++i) -- { -- vec[i * 2] = const0_rtx; -- vec[i * 2 + 1] = const1_rtx; -- } -- vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec)); -- vt = validize_mem (force_const_mem (maskmode, vt)); -- t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1, -- OPTAB_DIRECT); -- -- /* Continue as if V8SImode (resp. V32QImode) was used initially. */ -- operands[3] = mask = t1; -- target = gen_reg_rtx (mode); -- op0 = gen_lowpart (mode, op0); -- op1 = gen_lowpart (mode, op1); -- } -- -- switch (mode) -- { -- case E_V8SImode: -- /* The VPERMD and VPERMPS instructions already properly ignore -- the high bits of the shuffle elements. No need for us to -- perform an AND ourselves. */ -- if (one_operand_shuffle) -- { -- emit_insn (gen_avx2_permvarv8si (target, op0, mask)); -- if (target != operands[0]) -- emit_move_insn (operands[0], -- gen_lowpart (GET_MODE (operands[0]), target)); -- } -- else -- { -- t1 = gen_reg_rtx (V8SImode); -- t2 = gen_reg_rtx (V8SImode); -- emit_insn (gen_avx2_permvarv8si (t1, op0, mask)); -- emit_insn (gen_avx2_permvarv8si (t2, op1, mask)); -- goto merge_two; -- } -- return; -- -- case E_V8SFmode: -- mask = gen_lowpart (V8SImode, mask); -- if (one_operand_shuffle) -- emit_insn (gen_avx2_permvarv8sf (target, op0, mask)); -- else -- { -- t1 = gen_reg_rtx (V8SFmode); -- t2 = gen_reg_rtx (V8SFmode); -- emit_insn (gen_avx2_permvarv8sf (t1, op0, mask)); -- emit_insn (gen_avx2_permvarv8sf (t2, op1, mask)); -- goto merge_two; -- } -- return; -- -- case E_V4SImode: -- /* By combining the two 128-bit input vectors into one 256-bit -- input vector, we can use VPERMD and VPERMPS for the full -- two-operand shuffle. */ -- t1 = gen_reg_rtx (V8SImode); -- t2 = gen_reg_rtx (V8SImode); -- emit_insn (gen_avx_vec_concatv8si (t1, op0, op1)); -- emit_insn (gen_avx_vec_concatv8si (t2, mask, mask)); -- emit_insn (gen_avx2_permvarv8si (t1, t1, t2)); -- emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx)); -- return; -- -- case E_V4SFmode: -- t1 = gen_reg_rtx (V8SFmode); -- t2 = gen_reg_rtx (V8SImode); -- mask = gen_lowpart (V4SImode, mask); -- emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1)); -- emit_insn (gen_avx_vec_concatv8si (t2, mask, mask)); -- emit_insn (gen_avx2_permvarv8sf (t1, t1, t2)); -- emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx)); -- return; -- -- case E_V32QImode: -- t1 = gen_reg_rtx (V32QImode); -- t2 = gen_reg_rtx (V32QImode); -- t3 = gen_reg_rtx (V32QImode); -- vt2 = GEN_INT (-128); -- vt = gen_const_vec_duplicate (V32QImode, vt2); -- vt = force_reg (V32QImode, vt); -- for (i = 0; i < 32; i++) -- vec[i] = i < 16 ? vt2 : const0_rtx; -- vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec)); -- vt2 = force_reg (V32QImode, vt2); -- /* From mask create two adjusted masks, which contain the same -- bits as mask in the low 7 bits of each vector element. -- The first mask will have the most significant bit clear -- if it requests element from the same 128-bit lane -- and MSB set if it requests element from the other 128-bit lane. -- The second mask will have the opposite values of the MSB, -- and additionally will have its 128-bit lanes swapped. -- E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have -- t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and -- t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ... -- stands for other 12 bytes. */ -- /* The bit whether element is from the same lane or the other -- lane is bit 4, so shift it up by 3 to the MSB position. */ -- t5 = gen_reg_rtx (V4DImode); -- emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask), -- GEN_INT (3))); -- /* Clear MSB bits from the mask just in case it had them set. */ -- emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask)); -- /* After this t1 will have MSB set for elements from other lane. */ -- emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2)); -- /* Clear bits other than MSB. */ -- emit_insn (gen_andv32qi3 (t1, t1, vt)); -- /* Or in the lower bits from mask into t3. */ -- emit_insn (gen_iorv32qi3 (t3, t1, t2)); -- /* And invert MSB bits in t1, so MSB is set for elements from the same -- lane. */ -- emit_insn (gen_xorv32qi3 (t1, t1, vt)); -- /* Swap 128-bit lanes in t3. */ -- t6 = gen_reg_rtx (V4DImode); -- emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3), -- const2_rtx, GEN_INT (3), -- const0_rtx, const1_rtx)); -- /* And or in the lower bits from mask into t1. */ -- emit_insn (gen_iorv32qi3 (t1, t1, t2)); -- if (one_operand_shuffle) -- { -- /* Each of these shuffles will put 0s in places where -- element from the other 128-bit lane is needed, otherwise -- will shuffle in the requested value. */ -- emit_insn (gen_avx2_pshufbv32qi3 (t3, op0, -- gen_lowpart (V32QImode, t6))); -- emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1)); -- /* For t3 the 128-bit lanes are swapped again. */ -- t7 = gen_reg_rtx (V4DImode); -- emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3), -- const2_rtx, GEN_INT (3), -- const0_rtx, const1_rtx)); -- /* And oring both together leads to the result. */ -- emit_insn (gen_iorv32qi3 (target, t1, -- gen_lowpart (V32QImode, t7))); -- if (target != operands[0]) -- emit_move_insn (operands[0], -- gen_lowpart (GET_MODE (operands[0]), target)); -- return; -- } -- -- t4 = gen_reg_rtx (V32QImode); -- /* Similarly to the above one_operand_shuffle code, -- just for repeated twice for each operand. merge_two: -- code will merge the two results together. */ -- emit_insn (gen_avx2_pshufbv32qi3 (t4, op0, -- gen_lowpart (V32QImode, t6))); -- emit_insn (gen_avx2_pshufbv32qi3 (t3, op1, -- gen_lowpart (V32QImode, t6))); -- emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1)); -- emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1)); -- t7 = gen_reg_rtx (V4DImode); -- emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4), -- const2_rtx, GEN_INT (3), -- const0_rtx, const1_rtx)); -- t8 = gen_reg_rtx (V4DImode); -- emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3), -- const2_rtx, GEN_INT (3), -- const0_rtx, const1_rtx)); -- emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7))); -- emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8))); -- t1 = t4; -- t2 = t3; -- goto merge_two; -- -- default: -- gcc_assert (GET_MODE_SIZE (mode) <= 16); -- break; -- } -- } -- -- if (TARGET_XOP) -- { -- /* The XOP VPPERM insn supports three inputs. By ignoring the -- one_operand_shuffle special case, we avoid creating another -- set of constant vectors in memory. */ -- one_operand_shuffle = false; -- -- /* mask = mask & {2*w-1, ...} */ -- vt = GEN_INT (2*w - 1); -- } -- else -- { -- /* mask = mask & {w-1, ...} */ -- vt = GEN_INT (w - 1); -- } -- -- vt = gen_const_vec_duplicate (maskmode, vt); -- mask = expand_simple_binop (maskmode, AND, mask, vt, -- NULL_RTX, 0, OPTAB_DIRECT); -- -- /* For non-QImode operations, convert the word permutation control -- into a byte permutation control. */ -- if (mode != V16QImode) -- { -- mask = expand_simple_binop (maskmode, ASHIFT, mask, -- GEN_INT (exact_log2 (e)), -- NULL_RTX, 0, OPTAB_DIRECT); -- -- /* Convert mask to vector of chars. */ -- mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask)); -- -- /* Replicate each of the input bytes into byte positions: -- (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8} -- (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12} -- (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */ -- for (i = 0; i < 16; ++i) -- vec[i] = GEN_INT (i/e * e); -- vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec)); -- vt = validize_mem (force_const_mem (V16QImode, vt)); -- if (TARGET_XOP) -- emit_insn (gen_xop_pperm (mask, mask, mask, vt)); -- else -- emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt)); -- -- /* Convert it into the byte positions by doing -- mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */ -- for (i = 0; i < 16; ++i) -- vec[i] = GEN_INT (i % e); -- vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec)); -- vt = validize_mem (force_const_mem (V16QImode, vt)); -- emit_insn (gen_addv16qi3 (mask, mask, vt)); -- } -- -- /* The actual shuffle operations all operate on V16QImode. */ -- op0 = gen_lowpart (V16QImode, op0); -- op1 = gen_lowpart (V16QImode, op1); -- -- if (TARGET_XOP) -- { -- if (GET_MODE (target) != V16QImode) -- target = gen_reg_rtx (V16QImode); -- emit_insn (gen_xop_pperm (target, op0, op1, mask)); -- if (target != operands[0]) -- emit_move_insn (operands[0], -- gen_lowpart (GET_MODE (operands[0]), target)); -- } -- else if (one_operand_shuffle) -- { -- if (GET_MODE (target) != V16QImode) -- target = gen_reg_rtx (V16QImode); -- emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask)); -- if (target != operands[0]) -- emit_move_insn (operands[0], -- gen_lowpart (GET_MODE (operands[0]), target)); -- } -- else -- { -- rtx xops[6]; -- bool ok; -- -- /* Shuffle the two input vectors independently. */ -- t1 = gen_reg_rtx (V16QImode); -- t2 = gen_reg_rtx (V16QImode); -- emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask)); -- emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask)); -- -- merge_two: -- /* Then merge them together. The key is whether any given control -- element contained a bit set that indicates the second word. */ -- mask = operands[3]; -- vt = GEN_INT (w); -- if (maskmode == V2DImode && !TARGET_SSE4_1) -- { -- /* Without SSE4.1, we don't have V2DImode EQ. Perform one -- more shuffle to convert the V2DI input mask into a V4SI -- input mask. At which point the masking that expand_int_vcond -- will work as desired. */ -- rtx t3 = gen_reg_rtx (V4SImode); -- emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask), -- const0_rtx, const0_rtx, -- const2_rtx, const2_rtx)); -- mask = t3; -- maskmode = V4SImode; -- e = w = 4; -- } -- -- vt = gen_const_vec_duplicate (maskmode, vt); -- vt = force_reg (maskmode, vt); -- mask = expand_simple_binop (maskmode, AND, mask, vt, -- NULL_RTX, 0, OPTAB_DIRECT); -- -- if (GET_MODE (target) != mode) -- target = gen_reg_rtx (mode); -- xops[0] = target; -- xops[1] = gen_lowpart (mode, t2); -- xops[2] = gen_lowpart (mode, t1); -- xops[3] = gen_rtx_EQ (maskmode, mask, vt); -- xops[4] = mask; -- xops[5] = vt; -- ok = ix86_expand_int_vcond (xops); -- gcc_assert (ok); -- if (target != operands[0]) -- emit_move_insn (operands[0], -- gen_lowpart (GET_MODE (operands[0]), target)); -- } --} -- --/* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is -- true if we should do zero extension, else sign extension. HIGH_P is -- true if we want the N/2 high elements, else the low elements. */ -- --void --ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p) --{ -- machine_mode imode = GET_MODE (src); -- rtx tmp; -- -- if (TARGET_SSE4_1) -- { -- rtx (*unpack)(rtx, rtx); -- rtx (*extract)(rtx, rtx) = NULL; -- machine_mode halfmode = BLKmode; -- -- switch (imode) -- { -- case E_V64QImode: -- if (unsigned_p) -- unpack = gen_avx512bw_zero_extendv32qiv32hi2; -- else -- unpack = gen_avx512bw_sign_extendv32qiv32hi2; -- halfmode = V32QImode; -- extract -- = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi; -- break; -- case E_V32QImode: -- if (unsigned_p) -- unpack = gen_avx2_zero_extendv16qiv16hi2; -- else -- unpack = gen_avx2_sign_extendv16qiv16hi2; -- halfmode = V16QImode; -- extract -- = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi; -- break; -- case E_V32HImode: -- if (unsigned_p) -- unpack = gen_avx512f_zero_extendv16hiv16si2; -- else -- unpack = gen_avx512f_sign_extendv16hiv16si2; -- halfmode = V16HImode; -- extract -- = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi; -- break; -- case E_V16HImode: -- if (unsigned_p) -- unpack = gen_avx2_zero_extendv8hiv8si2; -- else -- unpack = gen_avx2_sign_extendv8hiv8si2; -- halfmode = V8HImode; -- extract -- = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi; -- break; -- case E_V16SImode: -- if (unsigned_p) -- unpack = gen_avx512f_zero_extendv8siv8di2; -- else -- unpack = gen_avx512f_sign_extendv8siv8di2; -- halfmode = V8SImode; -- extract -- = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si; -- break; -- case E_V8SImode: -- if (unsigned_p) -- unpack = gen_avx2_zero_extendv4siv4di2; -- else -- unpack = gen_avx2_sign_extendv4siv4di2; -- halfmode = V4SImode; -- extract -- = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si; -- break; -- case E_V16QImode: -- if (unsigned_p) -- unpack = gen_sse4_1_zero_extendv8qiv8hi2; -- else -- unpack = gen_sse4_1_sign_extendv8qiv8hi2; -- break; -- case E_V8HImode: -- if (unsigned_p) -- unpack = gen_sse4_1_zero_extendv4hiv4si2; -- else -- unpack = gen_sse4_1_sign_extendv4hiv4si2; -- break; -- case E_V4SImode: -- if (unsigned_p) -- unpack = gen_sse4_1_zero_extendv2siv2di2; -- else -- unpack = gen_sse4_1_sign_extendv2siv2di2; -- break; -- default: -- gcc_unreachable (); -- } -- -- if (GET_MODE_SIZE (imode) >= 32) -- { -- tmp = gen_reg_rtx (halfmode); -- emit_insn (extract (tmp, src)); -- } -- else if (high_p) -- { -- /* Shift higher 8 bytes to lower 8 bytes. */ -- tmp = gen_reg_rtx (V1TImode); -- emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src), -- GEN_INT (64))); -- tmp = gen_lowpart (imode, tmp); -- } -- else -- tmp = src; -- -- emit_insn (unpack (dest, tmp)); -- } -- else -- { -- rtx (*unpack)(rtx, rtx, rtx); -- -- switch (imode) -- { -- case E_V16QImode: -- if (high_p) -- unpack = gen_vec_interleave_highv16qi; -- else -- unpack = gen_vec_interleave_lowv16qi; -- break; -- case E_V8HImode: -- if (high_p) -- unpack = gen_vec_interleave_highv8hi; -- else -- unpack = gen_vec_interleave_lowv8hi; -- break; -- case E_V4SImode: -- if (high_p) -- unpack = gen_vec_interleave_highv4si; -- else -- unpack = gen_vec_interleave_lowv4si; -- break; -- default: -- gcc_unreachable (); -- } -- -- if (unsigned_p) -- tmp = force_reg (imode, CONST0_RTX (imode)); -- else -- tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode), -- src, pc_rtx, pc_rtx); -- -- rtx tmp2 = gen_reg_rtx (imode); -- emit_insn (unpack (tmp2, src, tmp)); -- emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2)); -- } --} -- --/* Expand conditional increment or decrement using adb/sbb instructions. -- The default case using setcc followed by the conditional move can be -- done by generic code. */ --bool --ix86_expand_int_addcc (rtx operands[]) --{ -- enum rtx_code code = GET_CODE (operands[1]); -- rtx flags; -- rtx (*insn)(rtx, rtx, rtx, rtx, rtx); -- rtx compare_op; -- rtx val = const0_rtx; -- bool fpcmp = false; -- machine_mode mode; -- rtx op0 = XEXP (operands[1], 0); -- rtx op1 = XEXP (operands[1], 1); -- -- if (operands[3] != const1_rtx -- && operands[3] != constm1_rtx) -- return false; -- if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op)) -- return false; -- code = GET_CODE (compare_op); -- -- flags = XEXP (compare_op, 0); -- -- if (GET_MODE (flags) == CCFPmode) -- { -- fpcmp = true; -- code = ix86_fp_compare_code_to_integer (code); -- } -- -- if (code != LTU) -- { -- val = constm1_rtx; -- if (fpcmp) -- PUT_CODE (compare_op, -- reverse_condition_maybe_unordered -- (GET_CODE (compare_op))); -- else -- PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op))); -- } -- -- mode = GET_MODE (operands[0]); -- -- /* Construct either adc or sbb insn. */ -- if ((code == LTU) == (operands[3] == constm1_rtx)) -- { -- switch (mode) -- { -- case E_QImode: -- insn = gen_subqi3_carry; -- break; -- case E_HImode: -- insn = gen_subhi3_carry; -- break; -- case E_SImode: -- insn = gen_subsi3_carry; -- break; -- case E_DImode: -- insn = gen_subdi3_carry; -- break; -- default: -- gcc_unreachable (); -- } -- } -- else -- { -- switch (mode) -- { -- case E_QImode: -- insn = gen_addqi3_carry; -- break; -- case E_HImode: -- insn = gen_addhi3_carry; -- break; -- case E_SImode: -- insn = gen_addsi3_carry; -- break; -- case E_DImode: -- insn = gen_adddi3_carry; -- break; -- default: -- gcc_unreachable (); -- } -- } -- emit_insn (insn (operands[0], operands[2], val, flags, compare_op)); -- -- return true; --} -- -- --/* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode, -- but works for floating pointer parameters and nonoffsetable memories. -- For pushes, it returns just stack offsets; the values will be saved -- in the right order. Maximally three parts are generated. */ -- --static int --ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode) --{ -- int size; -- -- if (!TARGET_64BIT) -- size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4; -- else -- size = (GET_MODE_SIZE (mode) + 4) / 8; -- -- gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand))); -- gcc_assert (size >= 2 && size <= 4); -- -- /* Optimize constant pool reference to immediates. This is used by fp -- moves, that force all constants to memory to allow combining. */ -- if (MEM_P (operand) && MEM_READONLY_P (operand)) -- operand = avoid_constant_pool_reference (operand); -- -- if (MEM_P (operand) && !offsettable_memref_p (operand)) -- { -- /* The only non-offsetable memories we handle are pushes. */ -- int ok = push_operand (operand, VOIDmode); -- -- gcc_assert (ok); -- -- operand = copy_rtx (operand); -- PUT_MODE (operand, word_mode); -- parts[0] = parts[1] = parts[2] = parts[3] = operand; -- return size; -- } -- -- if (GET_CODE (operand) == CONST_VECTOR) -- { -- scalar_int_mode imode = int_mode_for_mode (mode).require (); -- /* Caution: if we looked through a constant pool memory above, -- the operand may actually have a different mode now. That's -- ok, since we want to pun this all the way back to an integer. */ -- operand = simplify_subreg (imode, operand, GET_MODE (operand), 0); -- gcc_assert (operand != NULL); -- mode = imode; -- } -- -- if (!TARGET_64BIT) -- { -- if (mode == DImode) -- split_double_mode (mode, &operand, 1, &parts[0], &parts[1]); -- else -- { -- int i; -- -- if (REG_P (operand)) -- { -- gcc_assert (reload_completed); -- for (i = 0; i < size; i++) -- parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i); -- } -- else if (offsettable_memref_p (operand)) -- { -- operand = adjust_address (operand, SImode, 0); -- parts[0] = operand; -- for (i = 1; i < size; i++) -- parts[i] = adjust_address (operand, SImode, 4 * i); -- } -- else if (CONST_DOUBLE_P (operand)) -- { -- const REAL_VALUE_TYPE *r; -- long l[4]; -- -- r = CONST_DOUBLE_REAL_VALUE (operand); -- switch (mode) -- { -- case E_TFmode: -- real_to_target (l, r, mode); -- parts[3] = gen_int_mode (l[3], SImode); -- parts[2] = gen_int_mode (l[2], SImode); -- break; -- case E_XFmode: -- /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since -- long double may not be 80-bit. */ -- real_to_target (l, r, mode); -- parts[2] = gen_int_mode (l[2], SImode); -- break; -- case E_DFmode: -- REAL_VALUE_TO_TARGET_DOUBLE (*r, l); -- break; -- default: -- gcc_unreachable (); -- } -- parts[1] = gen_int_mode (l[1], SImode); -- parts[0] = gen_int_mode (l[0], SImode); -- } -- else -- gcc_unreachable (); -- } -- } -- else -- { -- if (mode == TImode) -- split_double_mode (mode, &operand, 1, &parts[0], &parts[1]); -- if (mode == XFmode || mode == TFmode) -- { -- machine_mode upper_mode = mode==XFmode ? SImode : DImode; -- if (REG_P (operand)) -- { -- gcc_assert (reload_completed); -- parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0); -- parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1); -- } -- else if (offsettable_memref_p (operand)) -- { -- operand = adjust_address (operand, DImode, 0); -- parts[0] = operand; -- parts[1] = adjust_address (operand, upper_mode, 8); -- } -- else if (CONST_DOUBLE_P (operand)) -- { -- long l[4]; -- -- real_to_target (l, CONST_DOUBLE_REAL_VALUE (operand), mode); -- -- /* real_to_target puts 32-bit pieces in each long. */ -- parts[0] = gen_int_mode ((l[0] & HOST_WIDE_INT_C (0xffffffff)) -- | ((l[1] & HOST_WIDE_INT_C (0xffffffff)) -- << 32), DImode); -- -- if (upper_mode == SImode) -- parts[1] = gen_int_mode (l[2], SImode); -- else -- parts[1] -- = gen_int_mode ((l[2] & HOST_WIDE_INT_C (0xffffffff)) -- | ((l[3] & HOST_WIDE_INT_C (0xffffffff)) -- << 32), DImode); -- } -- else -- gcc_unreachable (); -- } -- } -- -- return size; --} -- --/* Emit insns to perform a move or push of DI, DF, XF, and TF values. -- Return false when normal moves are needed; true when all required -- insns have been emitted. Operands 2-4 contain the input values -- int the correct order; operands 5-7 contain the output values. */ -- --void --ix86_split_long_move (rtx operands[]) --{ -- rtx part[2][4]; -- int nparts, i, j; -- int push = 0; -- int collisions = 0; -- machine_mode mode = GET_MODE (operands[0]); -- bool collisionparts[4]; -- -- /* The DFmode expanders may ask us to move double. -- For 64bit target this is single move. By hiding the fact -- here we simplify i386.md splitters. */ -- if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8) -- { -- /* Optimize constant pool reference to immediates. This is used by -- fp moves, that force all constants to memory to allow combining. */ -- -- if (MEM_P (operands[1]) -- && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF -- && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0))) -- operands[1] = get_pool_constant (XEXP (operands[1], 0)); -- if (push_operand (operands[0], VOIDmode)) -- { -- operands[0] = copy_rtx (operands[0]); -- PUT_MODE (operands[0], word_mode); -- } -- else -- operands[0] = gen_lowpart (DImode, operands[0]); -- operands[1] = gen_lowpart (DImode, operands[1]); -- emit_move_insn (operands[0], operands[1]); -- return; -- } -- -- /* The only non-offsettable memory we handle is push. */ -- if (push_operand (operands[0], VOIDmode)) -- push = 1; -- else -- gcc_assert (!MEM_P (operands[0]) -- || offsettable_memref_p (operands[0])); -- -- nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0])); -- ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0])); -- -- /* When emitting push, take care for source operands on the stack. */ -- if (push && MEM_P (operands[1]) -- && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1])) -- { -- rtx src_base = XEXP (part[1][nparts - 1], 0); -- -- /* Compensate for the stack decrement by 4. */ -- if (!TARGET_64BIT && nparts == 3 -- && mode == XFmode && TARGET_128BIT_LONG_DOUBLE) -- src_base = plus_constant (Pmode, src_base, 4); -- -- /* src_base refers to the stack pointer and is -- automatically decreased by emitted push. */ -- for (i = 0; i < nparts; i++) -- part[1][i] = change_address (part[1][i], -- GET_MODE (part[1][i]), src_base); -- } -- -- /* We need to do copy in the right order in case an address register -- of the source overlaps the destination. */ -- if (REG_P (part[0][0]) && MEM_P (part[1][0])) -- { -- rtx tmp; -- -- for (i = 0; i < nparts; i++) -- { -- collisionparts[i] -- = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0)); -- if (collisionparts[i]) -- collisions++; -- } -- -- /* Collision in the middle part can be handled by reordering. */ -- if (collisions == 1 && nparts == 3 && collisionparts [1]) -- { -- std::swap (part[0][1], part[0][2]); -- std::swap (part[1][1], part[1][2]); -- } -- else if (collisions == 1 -- && nparts == 4 -- && (collisionparts [1] || collisionparts [2])) -- { -- if (collisionparts [1]) -- { -- std::swap (part[0][1], part[0][2]); -- std::swap (part[1][1], part[1][2]); -- } -- else -- { -- std::swap (part[0][2], part[0][3]); -- std::swap (part[1][2], part[1][3]); -- } -- } -- -- /* If there are more collisions, we can't handle it by reordering. -- Do an lea to the last part and use only one colliding move. */ -- else if (collisions > 1) -- { -- rtx base, addr; -- -- collisions = 1; -- -- base = part[0][nparts - 1]; -- -- /* Handle the case when the last part isn't valid for lea. -- Happens in 64-bit mode storing the 12-byte XFmode. */ -- if (GET_MODE (base) != Pmode) -- base = gen_rtx_REG (Pmode, REGNO (base)); -- -- addr = XEXP (part[1][0], 0); -- if (TARGET_TLS_DIRECT_SEG_REFS) -- { -- struct ix86_address parts; -- int ok = ix86_decompose_address (addr, &parts); -- gcc_assert (ok); -- /* It is not valid to use %gs: or %fs: in lea. */ -- gcc_assert (parts.seg == ADDR_SPACE_GENERIC); -- } -- emit_insn (gen_rtx_SET (base, addr)); -- part[1][0] = replace_equiv_address (part[1][0], base); -- for (i = 1; i < nparts; i++) -- { -- tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i); -- part[1][i] = replace_equiv_address (part[1][i], tmp); -- } -- } -- } -- -- if (push) -- { -- if (!TARGET_64BIT) -- { -- if (nparts == 3) -- { -- if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode) -- emit_insn (ix86_gen_add3 (stack_pointer_rtx, -- stack_pointer_rtx, GEN_INT (-4))); -- emit_move_insn (part[0][2], part[1][2]); -- } -- else if (nparts == 4) -- { -- emit_move_insn (part[0][3], part[1][3]); -- emit_move_insn (part[0][2], part[1][2]); -- } -- } -- else -- { -- /* In 64bit mode we don't have 32bit push available. In case this is -- register, it is OK - we will just use larger counterpart. We also -- retype memory - these comes from attempt to avoid REX prefix on -- moving of second half of TFmode value. */ -- if (GET_MODE (part[1][1]) == SImode) -- { -- switch (GET_CODE (part[1][1])) -- { -- case MEM: -- part[1][1] = adjust_address (part[1][1], DImode, 0); -- break; -- -- case REG: -- part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1])); -- break; -- -- default: -- gcc_unreachable (); -- } -- -- if (GET_MODE (part[1][0]) == SImode) -- part[1][0] = part[1][1]; -- } -- } -- emit_move_insn (part[0][1], part[1][1]); -- emit_move_insn (part[0][0], part[1][0]); -- return; -- } -- -- /* Choose correct order to not overwrite the source before it is copied. */ -- if ((REG_P (part[0][0]) -- && REG_P (part[1][1]) -- && (REGNO (part[0][0]) == REGNO (part[1][1]) -- || (nparts == 3 -- && REGNO (part[0][0]) == REGNO (part[1][2])) -- || (nparts == 4 -- && REGNO (part[0][0]) == REGNO (part[1][3])))) -- || (collisions > 0 -- && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))) -- { -- for (i = 0, j = nparts - 1; i < nparts; i++, j--) -- { -- operands[2 + i] = part[0][j]; -- operands[6 + i] = part[1][j]; -- } -- } -- else -- { -- for (i = 0; i < nparts; i++) -- { -- operands[2 + i] = part[0][i]; -- operands[6 + i] = part[1][i]; -- } -- } -- -- /* If optimizing for size, attempt to locally unCSE nonzero constants. */ -- if (optimize_insn_for_size_p ()) -- { -- for (j = 0; j < nparts - 1; j++) -- if (CONST_INT_P (operands[6 + j]) -- && operands[6 + j] != const0_rtx -- && REG_P (operands[2 + j])) -- for (i = j; i < nparts - 1; i++) -- if (CONST_INT_P (operands[7 + i]) -- && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j])) -- operands[7 + i] = operands[2 + j]; -- } -- -- for (i = 0; i < nparts; i++) -- emit_move_insn (operands[2 + i], operands[6 + i]); -- -- return; --} -- --/* Helper function of ix86_split_ashl used to generate an SImode/DImode -- left shift by a constant, either using a single shift or -- a sequence of add instructions. */ -- --static void --ix86_expand_ashl_const (rtx operand, int count, machine_mode mode) --{ -- rtx (*insn)(rtx, rtx, rtx); -- -- if (count == 1 -- || (count * ix86_cost->add <= ix86_cost->shift_const -- && !optimize_insn_for_size_p ())) -- { -- insn = mode == DImode ? gen_addsi3 : gen_adddi3; -- while (count-- > 0) -- emit_insn (insn (operand, operand, operand)); -- } -- else -- { -- insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3; -- emit_insn (insn (operand, operand, GEN_INT (count))); -- } --} -- --void --ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode) --{ -- rtx (*gen_ashl3)(rtx, rtx, rtx); -- rtx (*gen_shld)(rtx, rtx, rtx); -- int half_width = GET_MODE_BITSIZE (mode) >> 1; -- -- rtx low[2], high[2]; -- int count; -- -- if (CONST_INT_P (operands[2])) -- { -- split_double_mode (mode, operands, 2, low, high); -- count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1); -- -- if (count >= half_width) -- { -- emit_move_insn (high[0], low[1]); -- emit_move_insn (low[0], const0_rtx); -- -- if (count > half_width) -- ix86_expand_ashl_const (high[0], count - half_width, mode); -- } -- else -- { -- gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld; -- -- if (!rtx_equal_p (operands[0], operands[1])) -- emit_move_insn (operands[0], operands[1]); -- -- emit_insn (gen_shld (high[0], low[0], GEN_INT (count))); -- ix86_expand_ashl_const (low[0], count, mode); -- } -- return; -- } -- -- split_double_mode (mode, operands, 1, low, high); -- -- gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3; -- -- if (operands[1] == const1_rtx) -- { -- /* Assuming we've chosen a QImode capable registers, then 1 << N -- can be done with two 32/64-bit shifts, no branches, no cmoves. */ -- if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0])) -- { -- rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG); -- -- ix86_expand_clear (low[0]); -- ix86_expand_clear (high[0]); -- emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width))); -- -- d = gen_lowpart (QImode, low[0]); -- d = gen_rtx_STRICT_LOW_PART (VOIDmode, d); -- s = gen_rtx_EQ (QImode, flags, const0_rtx); -- emit_insn (gen_rtx_SET (d, s)); -- -- d = gen_lowpart (QImode, high[0]); -- d = gen_rtx_STRICT_LOW_PART (VOIDmode, d); -- s = gen_rtx_NE (QImode, flags, const0_rtx); -- emit_insn (gen_rtx_SET (d, s)); -- } -- -- /* Otherwise, we can get the same results by manually performing -- a bit extract operation on bit 5/6, and then performing the two -- shifts. The two methods of getting 0/1 into low/high are exactly -- the same size. Avoiding the shift in the bit extract case helps -- pentium4 a bit; no one else seems to care much either way. */ -- else -- { -- machine_mode half_mode; -- rtx (*gen_lshr3)(rtx, rtx, rtx); -- rtx (*gen_and3)(rtx, rtx, rtx); -- rtx (*gen_xor3)(rtx, rtx, rtx); -- HOST_WIDE_INT bits; -- rtx x; -- -- if (mode == DImode) -- { -- half_mode = SImode; -- gen_lshr3 = gen_lshrsi3; -- gen_and3 = gen_andsi3; -- gen_xor3 = gen_xorsi3; -- bits = 5; -- } -- else -- { -- half_mode = DImode; -- gen_lshr3 = gen_lshrdi3; -- gen_and3 = gen_anddi3; -- gen_xor3 = gen_xordi3; -- bits = 6; -- } -- -- if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ()) -- x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]); -- else -- x = gen_lowpart (half_mode, operands[2]); -- emit_insn (gen_rtx_SET (high[0], x)); -- -- emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits))); -- emit_insn (gen_and3 (high[0], high[0], const1_rtx)); -- emit_move_insn (low[0], high[0]); -- emit_insn (gen_xor3 (low[0], low[0], const1_rtx)); -- } -- -- emit_insn (gen_ashl3 (low[0], low[0], operands[2])); -- emit_insn (gen_ashl3 (high[0], high[0], operands[2])); -- return; -- } -- -- if (operands[1] == constm1_rtx) -- { -- /* For -1 << N, we can avoid the shld instruction, because we -- know that we're shifting 0...31/63 ones into a -1. */ -- emit_move_insn (low[0], constm1_rtx); -- if (optimize_insn_for_size_p ()) -- emit_move_insn (high[0], low[0]); -- else -- emit_move_insn (high[0], constm1_rtx); -- } -- else -- { -- gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld; -- -- if (!rtx_equal_p (operands[0], operands[1])) -- emit_move_insn (operands[0], operands[1]); -- -- split_double_mode (mode, operands, 1, low, high); -- emit_insn (gen_shld (high[0], low[0], operands[2])); -- } -- -- emit_insn (gen_ashl3 (low[0], low[0], operands[2])); -- -- if (TARGET_CMOVE && scratch) -- { -- rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx) -- = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1; -- -- ix86_expand_clear (scratch); -- emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch)); -- } -- else -- { -- rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx) -- = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2; -- -- emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2])); -- } --} -- --void --ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode) --{ -- rtx (*gen_ashr3)(rtx, rtx, rtx) -- = mode == DImode ? gen_ashrsi3 : gen_ashrdi3; -- rtx (*gen_shrd)(rtx, rtx, rtx); -- int half_width = GET_MODE_BITSIZE (mode) >> 1; -- -- rtx low[2], high[2]; -- int count; -- -- if (CONST_INT_P (operands[2])) -- { -- split_double_mode (mode, operands, 2, low, high); -- count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1); -- -- if (count == GET_MODE_BITSIZE (mode) - 1) -- { -- emit_move_insn (high[0], high[1]); -- emit_insn (gen_ashr3 (high[0], high[0], -- GEN_INT (half_width - 1))); -- emit_move_insn (low[0], high[0]); -- -- } -- else if (count >= half_width) -- { -- emit_move_insn (low[0], high[1]); -- emit_move_insn (high[0], low[0]); -- emit_insn (gen_ashr3 (high[0], high[0], -- GEN_INT (half_width - 1))); -- -- if (count > half_width) -- emit_insn (gen_ashr3 (low[0], low[0], -- GEN_INT (count - half_width))); -- } -- else -- { -- gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd; -- -- if (!rtx_equal_p (operands[0], operands[1])) -- emit_move_insn (operands[0], operands[1]); -- -- emit_insn (gen_shrd (low[0], high[0], GEN_INT (count))); -- emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count))); -- } -- } -- else -- { -- gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd; -- -- if (!rtx_equal_p (operands[0], operands[1])) -- emit_move_insn (operands[0], operands[1]); -- -- split_double_mode (mode, operands, 1, low, high); -- -- emit_insn (gen_shrd (low[0], high[0], operands[2])); -- emit_insn (gen_ashr3 (high[0], high[0], operands[2])); -- -- if (TARGET_CMOVE && scratch) -- { -- rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx) -- = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1; -- -- emit_move_insn (scratch, high[0]); -- emit_insn (gen_ashr3 (scratch, scratch, -- GEN_INT (half_width - 1))); -- emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2], -- scratch)); -- } -- else -- { -- rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx) -- = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3; -- -- emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2])); -- } -- } --} -- --void --ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode) --{ -- rtx (*gen_lshr3)(rtx, rtx, rtx) -- = mode == DImode ? gen_lshrsi3 : gen_lshrdi3; -- rtx (*gen_shrd)(rtx, rtx, rtx); -- int half_width = GET_MODE_BITSIZE (mode) >> 1; -- -- rtx low[2], high[2]; -- int count; -- -- if (CONST_INT_P (operands[2])) -- { -- split_double_mode (mode, operands, 2, low, high); -- count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1); -- -- if (count >= half_width) -- { -- emit_move_insn (low[0], high[1]); -- ix86_expand_clear (high[0]); -- -- if (count > half_width) -- emit_insn (gen_lshr3 (low[0], low[0], -- GEN_INT (count - half_width))); -- } -- else -- { -- gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd; -- -- if (!rtx_equal_p (operands[0], operands[1])) -- emit_move_insn (operands[0], operands[1]); -- -- emit_insn (gen_shrd (low[0], high[0], GEN_INT (count))); -- emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count))); -- } -- } -- else -- { -- gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd; -- -- if (!rtx_equal_p (operands[0], operands[1])) -- emit_move_insn (operands[0], operands[1]); -- -- split_double_mode (mode, operands, 1, low, high); -- -- emit_insn (gen_shrd (low[0], high[0], operands[2])); -- emit_insn (gen_lshr3 (high[0], high[0], operands[2])); -- -- if (TARGET_CMOVE && scratch) -- { -- rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx) -- = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1; -- -- ix86_expand_clear (scratch); -- emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2], -- scratch)); -- } -- else -- { -- rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx) -- = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2; -- -- emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2])); -- } -- } --} -- --/* Predict just emitted jump instruction to be taken with probability PROB. */ --static void --predict_jump (int prob) --{ -- rtx_insn *insn = get_last_insn (); -- gcc_assert (JUMP_P (insn)); -- add_reg_br_prob_note (insn, profile_probability::from_reg_br_prob_base (prob)); --} -- --/* Helper function for the string operations below. Dest VARIABLE whether -- it is aligned to VALUE bytes. If true, jump to the label. */ --static rtx_code_label * --ix86_expand_aligntest (rtx variable, int value, bool epilogue) --{ -- rtx_code_label *label = gen_label_rtx (); -- rtx tmpcount = gen_reg_rtx (GET_MODE (variable)); -- if (GET_MODE (variable) == DImode) -- emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value))); -- else -- emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value))); -- emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable), -- 1, label); -- if (epilogue) -- predict_jump (REG_BR_PROB_BASE * 50 / 100); -- else -- predict_jump (REG_BR_PROB_BASE * 90 / 100); -- return label; --} -- --/* Adjust COUNTER by the VALUE. */ --static void --ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value) --{ -- rtx (*gen_add)(rtx, rtx, rtx) -- = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3; -- -- emit_insn (gen_add (countreg, countreg, GEN_INT (-value))); --} -- --/* Zero extend possibly SImode EXP to Pmode register. */ --rtx --ix86_zero_extend_to_Pmode (rtx exp) --{ -- return force_reg (Pmode, convert_to_mode (Pmode, exp, 1)); --} -- --/* Divide COUNTREG by SCALE. */ --static rtx --scale_counter (rtx countreg, int scale) --{ -- rtx sc; -- -- if (scale == 1) -- return countreg; -- if (CONST_INT_P (countreg)) -- return GEN_INT (INTVAL (countreg) / scale); -- gcc_assert (REG_P (countreg)); -- -- sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg, -- GEN_INT (exact_log2 (scale)), -- NULL, 1, OPTAB_DIRECT); -- return sc; --} -- --/* Return mode for the memcpy/memset loop counter. Prefer SImode over -- DImode for constant loop counts. */ -- --static machine_mode --counter_mode (rtx count_exp) --{ -- if (GET_MODE (count_exp) != VOIDmode) -- return GET_MODE (count_exp); -- if (!CONST_INT_P (count_exp)) -- return Pmode; -- if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff)) -- return DImode; -- return SImode; --} -- --/* Copy the address to a Pmode register. This is used for x32 to -- truncate DImode TLS address to a SImode register. */ -- --static rtx --ix86_copy_addr_to_reg (rtx addr) --{ -- rtx reg; -- if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode) -- { -- reg = copy_addr_to_reg (addr); -- REG_POINTER (reg) = 1; -- return reg; -- } -- else -- { -- gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode); -- reg = copy_to_mode_reg (DImode, addr); -- REG_POINTER (reg) = 1; -- return gen_rtx_SUBREG (SImode, reg, 0); -- } --} -- --/* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR -- to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT -- specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set -- memory by VALUE (supposed to be in MODE). -- -- The size is rounded down to whole number of chunk size moved at once. -- SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */ -- -- --static void --expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem, -- rtx destptr, rtx srcptr, rtx value, -- rtx count, machine_mode mode, int unroll, -- int expected_size, bool issetmem) --{ -- rtx_code_label *out_label, *top_label; -- rtx iter, tmp; -- machine_mode iter_mode = counter_mode (count); -- int piece_size_n = GET_MODE_SIZE (mode) * unroll; -- rtx piece_size = GEN_INT (piece_size_n); -- rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1)); -- rtx size; -- int i; -- -- top_label = gen_label_rtx (); -- out_label = gen_label_rtx (); -- iter = gen_reg_rtx (iter_mode); -- -- size = expand_simple_binop (iter_mode, AND, count, piece_size_mask, -- NULL, 1, OPTAB_DIRECT); -- /* Those two should combine. */ -- if (piece_size == const1_rtx) -- { -- emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode, -- true, out_label); -- predict_jump (REG_BR_PROB_BASE * 10 / 100); -- } -- emit_move_insn (iter, const0_rtx); -- -- emit_label (top_label); -- -- tmp = convert_modes (Pmode, iter_mode, iter, true); -- -- /* This assert could be relaxed - in this case we'll need to compute -- smallest power of two, containing in PIECE_SIZE_N and pass it to -- offset_address. */ -- gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0); -- destmem = offset_address (destmem, tmp, piece_size_n); -- destmem = adjust_address (destmem, mode, 0); -- -- if (!issetmem) -- { -- srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n); -- srcmem = adjust_address (srcmem, mode, 0); -- -- /* When unrolling for chips that reorder memory reads and writes, -- we can save registers by using single temporary. -- Also using 4 temporaries is overkill in 32bit mode. */ -- if (!TARGET_64BIT && 0) -- { -- for (i = 0; i < unroll; i++) -- { -- if (i) -- { -- destmem = adjust_address (copy_rtx (destmem), mode, -- GET_MODE_SIZE (mode)); -- srcmem = adjust_address (copy_rtx (srcmem), mode, -- GET_MODE_SIZE (mode)); -- } -- emit_move_insn (destmem, srcmem); -- } -- } -- else -- { -- rtx tmpreg[4]; -- gcc_assert (unroll <= 4); -- for (i = 0; i < unroll; i++) -- { -- tmpreg[i] = gen_reg_rtx (mode); -- if (i) -- srcmem = adjust_address (copy_rtx (srcmem), mode, -- GET_MODE_SIZE (mode)); -- emit_move_insn (tmpreg[i], srcmem); -- } -- for (i = 0; i < unroll; i++) -- { -- if (i) -- destmem = adjust_address (copy_rtx (destmem), mode, -- GET_MODE_SIZE (mode)); -- emit_move_insn (destmem, tmpreg[i]); -- } -- } -- } -- else -- for (i = 0; i < unroll; i++) -- { -- if (i) -- destmem = adjust_address (copy_rtx (destmem), mode, -- GET_MODE_SIZE (mode)); -- emit_move_insn (destmem, value); -- } -- -- tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter, -- true, OPTAB_LIB_WIDEN); -- if (tmp != iter) -- emit_move_insn (iter, tmp); -- -- emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode, -- true, top_label); -- if (expected_size != -1) -- { -- expected_size /= GET_MODE_SIZE (mode) * unroll; -- if (expected_size == 0) -- predict_jump (0); -- else if (expected_size > REG_BR_PROB_BASE) -- predict_jump (REG_BR_PROB_BASE - 1); -- else -- predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) -- / expected_size); -- } -- else -- predict_jump (REG_BR_PROB_BASE * 80 / 100); -- iter = ix86_zero_extend_to_Pmode (iter); -- tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr, -- true, OPTAB_LIB_WIDEN); -- if (tmp != destptr) -- emit_move_insn (destptr, tmp); -- if (!issetmem) -- { -- tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr, -- true, OPTAB_LIB_WIDEN); -- if (tmp != srcptr) -- emit_move_insn (srcptr, tmp); -- } -- emit_label (out_label); --} -- --/* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument. -- When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored. -- When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored. -- For setmem case, VALUE is a promoted to a wider size ORIG_VALUE. -- ORIG_VALUE is the original value passed to memset to fill the memory with. -- Other arguments have same meaning as for previous function. */ -- --static void --expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem, -- rtx destptr, rtx srcptr, rtx value, rtx orig_value, -- rtx count, -- machine_mode mode, bool issetmem) --{ -- rtx destexp; -- rtx srcexp; -- rtx countreg; -- HOST_WIDE_INT rounded_count; -- -- /* If possible, it is shorter to use rep movs. -- TODO: Maybe it is better to move this logic to decide_alg. */ -- if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3) -- && (!issetmem || orig_value == const0_rtx)) -- mode = SImode; -- -- if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode) -- destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0); -- -- countreg = ix86_zero_extend_to_Pmode (scale_counter (count, -- GET_MODE_SIZE (mode))); -- if (mode != QImode) -- { -- destexp = gen_rtx_ASHIFT (Pmode, countreg, -- GEN_INT (exact_log2 (GET_MODE_SIZE (mode)))); -- destexp = gen_rtx_PLUS (Pmode, destexp, destptr); -- } -- else -- destexp = gen_rtx_PLUS (Pmode, destptr, countreg); -- if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count)) -- { -- rounded_count -- = ROUND_DOWN (INTVAL (count), (HOST_WIDE_INT) GET_MODE_SIZE (mode)); -- destmem = shallow_copy_rtx (destmem); -- set_mem_size (destmem, rounded_count); -- } -- else if (MEM_SIZE_KNOWN_P (destmem)) -- clear_mem_size (destmem); -- -- if (issetmem) -- { -- value = force_reg (mode, gen_lowpart (mode, value)); -- emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp)); -- } -- else -- { -- if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode) -- srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0); -- if (mode != QImode) -- { -- srcexp = gen_rtx_ASHIFT (Pmode, countreg, -- GEN_INT (exact_log2 (GET_MODE_SIZE (mode)))); -- srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr); -- } -- else -- srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg); -- if (CONST_INT_P (count)) -- { -- rounded_count -- = ROUND_DOWN (INTVAL (count), (HOST_WIDE_INT) GET_MODE_SIZE (mode)); -- srcmem = shallow_copy_rtx (srcmem); -- set_mem_size (srcmem, rounded_count); -- } -- else -- { -- if (MEM_SIZE_KNOWN_P (srcmem)) -- clear_mem_size (srcmem); -- } -- emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg, -- destexp, srcexp)); -- } --} -- --/* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to -- DESTMEM. -- SRC is passed by pointer to be updated on return. -- Return value is updated DST. */ --static rtx --emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr, -- HOST_WIDE_INT size_to_move) --{ -- rtx dst = destmem, src = *srcmem, adjust, tempreg; -- enum insn_code code; -- machine_mode move_mode; -- int piece_size, i; -- -- /* Find the widest mode in which we could perform moves. -- Start with the biggest power of 2 less than SIZE_TO_MOVE and half -- it until move of such size is supported. */ -- piece_size = 1 << floor_log2 (size_to_move); -- while (!int_mode_for_size (piece_size * BITS_PER_UNIT, 0).exists (&move_mode) -- || (code = optab_handler (mov_optab, move_mode)) == CODE_FOR_nothing) -- { -- gcc_assert (piece_size > 1); -- piece_size >>= 1; -- } -- -- /* Find the corresponding vector mode with the same size as MOVE_MODE. -- MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */ -- if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode)) -- { -- int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode); -- if (!mode_for_vector (word_mode, nunits).exists (&move_mode) -- || (code = optab_handler (mov_optab, move_mode)) == CODE_FOR_nothing) -- { -- move_mode = word_mode; -- piece_size = GET_MODE_SIZE (move_mode); -- code = optab_handler (mov_optab, move_mode); -- } -- } -- gcc_assert (code != CODE_FOR_nothing); -- -- dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0); -- src = adjust_automodify_address_nv (src, move_mode, srcptr, 0); -- -- /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */ -- gcc_assert (size_to_move % piece_size == 0); -- adjust = GEN_INT (piece_size); -- for (i = 0; i < size_to_move; i += piece_size) -- { -- /* We move from memory to memory, so we'll need to do it via -- a temporary register. */ -- tempreg = gen_reg_rtx (move_mode); -- emit_insn (GEN_FCN (code) (tempreg, src)); -- emit_insn (GEN_FCN (code) (dst, tempreg)); -- -- emit_move_insn (destptr, -- gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust)); -- emit_move_insn (srcptr, -- gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust)); -- -- dst = adjust_automodify_address_nv (dst, move_mode, destptr, -- piece_size); -- src = adjust_automodify_address_nv (src, move_mode, srcptr, -- piece_size); -- } -- -- /* Update DST and SRC rtx. */ -- *srcmem = src; -- return dst; --} -- --/* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */ --static void --expand_movmem_epilogue (rtx destmem, rtx srcmem, -- rtx destptr, rtx srcptr, rtx count, int max_size) --{ -- rtx src, dest; -- if (CONST_INT_P (count)) -- { -- HOST_WIDE_INT countval = INTVAL (count); -- HOST_WIDE_INT epilogue_size = countval % max_size; -- int i; -- -- /* For now MAX_SIZE should be a power of 2. This assert could be -- relaxed, but it'll require a bit more complicated epilogue -- expanding. */ -- gcc_assert ((max_size & (max_size - 1)) == 0); -- for (i = max_size; i >= 1; i >>= 1) -- { -- if (epilogue_size & i) -- destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i); -- } -- return; -- } -- if (max_size > 8) -- { -- count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1), -- count, 1, OPTAB_DIRECT); -- expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL, -- count, QImode, 1, 4, false); -- return; -- } -- -- /* When there are stringops, we can cheaply increase dest and src pointers. -- Otherwise we save code size by maintaining offset (zero is readily -- available from preceding rep operation) and using x86 addressing modes. -- */ -- if (TARGET_SINGLE_STRINGOP) -- { -- if (max_size > 4) -- { -- rtx_code_label *label = ix86_expand_aligntest (count, 4, true); -- src = change_address (srcmem, SImode, srcptr); -- dest = change_address (destmem, SImode, destptr); -- emit_insn (gen_strmov (destptr, dest, srcptr, src)); -- emit_label (label); -- LABEL_NUSES (label) = 1; -- } -- if (max_size > 2) -- { -- rtx_code_label *label = ix86_expand_aligntest (count, 2, true); -- src = change_address (srcmem, HImode, srcptr); -- dest = change_address (destmem, HImode, destptr); -- emit_insn (gen_strmov (destptr, dest, srcptr, src)); -- emit_label (label); -- LABEL_NUSES (label) = 1; -- } -- if (max_size > 1) -- { -- rtx_code_label *label = ix86_expand_aligntest (count, 1, true); -- src = change_address (srcmem, QImode, srcptr); -- dest = change_address (destmem, QImode, destptr); -- emit_insn (gen_strmov (destptr, dest, srcptr, src)); -- emit_label (label); -- LABEL_NUSES (label) = 1; -- } -- } -- else -- { -- rtx offset = force_reg (Pmode, const0_rtx); -- rtx tmp; -- -- if (max_size > 4) -- { -- rtx_code_label *label = ix86_expand_aligntest (count, 4, true); -- src = change_address (srcmem, SImode, srcptr); -- dest = change_address (destmem, SImode, destptr); -- emit_move_insn (dest, src); -- tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL, -- true, OPTAB_LIB_WIDEN); -- if (tmp != offset) -- emit_move_insn (offset, tmp); -- emit_label (label); -- LABEL_NUSES (label) = 1; -- } -- if (max_size > 2) -- { -- rtx_code_label *label = ix86_expand_aligntest (count, 2, true); -- tmp = gen_rtx_PLUS (Pmode, srcptr, offset); -- src = change_address (srcmem, HImode, tmp); -- tmp = gen_rtx_PLUS (Pmode, destptr, offset); -- dest = change_address (destmem, HImode, tmp); -- emit_move_insn (dest, src); -- tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp, -- true, OPTAB_LIB_WIDEN); -- if (tmp != offset) -- emit_move_insn (offset, tmp); -- emit_label (label); -- LABEL_NUSES (label) = 1; -- } -- if (max_size > 1) -- { -- rtx_code_label *label = ix86_expand_aligntest (count, 1, true); -- tmp = gen_rtx_PLUS (Pmode, srcptr, offset); -- src = change_address (srcmem, QImode, tmp); -- tmp = gen_rtx_PLUS (Pmode, destptr, offset); -- dest = change_address (destmem, QImode, tmp); -- emit_move_insn (dest, src); -- emit_label (label); -- LABEL_NUSES (label) = 1; -- } -- } --} -- --/* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM -- with value PROMOTED_VAL. -- SRC is passed by pointer to be updated on return. -- Return value is updated DST. */ --static rtx --emit_memset (rtx destmem, rtx destptr, rtx promoted_val, -- HOST_WIDE_INT size_to_move) --{ -- rtx dst = destmem, adjust; -- enum insn_code code; -- machine_mode move_mode; -- int piece_size, i; -- -- /* Find the widest mode in which we could perform moves. -- Start with the biggest power of 2 less than SIZE_TO_MOVE and half -- it until move of such size is supported. */ -- move_mode = GET_MODE (promoted_val); -- if (move_mode == VOIDmode) -- move_mode = QImode; -- if (size_to_move < GET_MODE_SIZE (move_mode)) -- { -- unsigned int move_bits = size_to_move * BITS_PER_UNIT; -- move_mode = int_mode_for_size (move_bits, 0).require (); -- promoted_val = gen_lowpart (move_mode, promoted_val); -- } -- piece_size = GET_MODE_SIZE (move_mode); -- code = optab_handler (mov_optab, move_mode); -- gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX); -- -- dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0); -- -- /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */ -- gcc_assert (size_to_move % piece_size == 0); -- adjust = GEN_INT (piece_size); -- for (i = 0; i < size_to_move; i += piece_size) -- { -- if (piece_size <= GET_MODE_SIZE (word_mode)) -- { -- emit_insn (gen_strset (destptr, dst, promoted_val)); -- dst = adjust_automodify_address_nv (dst, move_mode, destptr, -- piece_size); -- continue; -- } -- -- emit_insn (GEN_FCN (code) (dst, promoted_val)); -- -- emit_move_insn (destptr, -- gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust)); -- -- dst = adjust_automodify_address_nv (dst, move_mode, destptr, -- piece_size); -- } -- -- /* Update DST rtx. */ -- return dst; --} --/* Output code to set at most count & (max_size - 1) bytes starting by DEST. */ --static void --expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value, -- rtx count, int max_size) --{ -- count = expand_simple_binop (counter_mode (count), AND, count, -- GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT); -- expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL, -- gen_lowpart (QImode, value), count, QImode, -- 1, max_size / 2, true); --} -- --/* Output code to set at most count & (max_size - 1) bytes starting by DEST. */ --static void --expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value, -- rtx count, int max_size) --{ -- rtx dest; -- -- if (CONST_INT_P (count)) -- { -- HOST_WIDE_INT countval = INTVAL (count); -- HOST_WIDE_INT epilogue_size = countval % max_size; -- int i; -- -- /* For now MAX_SIZE should be a power of 2. This assert could be -- relaxed, but it'll require a bit more complicated epilogue -- expanding. */ -- gcc_assert ((max_size & (max_size - 1)) == 0); -- for (i = max_size; i >= 1; i >>= 1) -- { -- if (epilogue_size & i) -- { -- if (vec_value && i > GET_MODE_SIZE (GET_MODE (value))) -- destmem = emit_memset (destmem, destptr, vec_value, i); -- else -- destmem = emit_memset (destmem, destptr, value, i); -- } -- } -- return; -- } -- if (max_size > 32) -- { -- expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size); -- return; -- } -- if (max_size > 16) -- { -- rtx_code_label *label = ix86_expand_aligntest (count, 16, true); -- if (TARGET_64BIT) -- { -- dest = change_address (destmem, DImode, destptr); -- emit_insn (gen_strset (destptr, dest, value)); -- dest = adjust_automodify_address_nv (dest, DImode, destptr, 8); -- emit_insn (gen_strset (destptr, dest, value)); -- } -- else -- { -- dest = change_address (destmem, SImode, destptr); -- emit_insn (gen_strset (destptr, dest, value)); -- dest = adjust_automodify_address_nv (dest, SImode, destptr, 4); -- emit_insn (gen_strset (destptr, dest, value)); -- dest = adjust_automodify_address_nv (dest, SImode, destptr, 8); -- emit_insn (gen_strset (destptr, dest, value)); -- dest = adjust_automodify_address_nv (dest, SImode, destptr, 12); -- emit_insn (gen_strset (destptr, dest, value)); -- } -- emit_label (label); -- LABEL_NUSES (label) = 1; -- } -- if (max_size > 8) -- { -- rtx_code_label *label = ix86_expand_aligntest (count, 8, true); -- if (TARGET_64BIT) -- { -- dest = change_address (destmem, DImode, destptr); -- emit_insn (gen_strset (destptr, dest, value)); -- } -- else -- { -- dest = change_address (destmem, SImode, destptr); -- emit_insn (gen_strset (destptr, dest, value)); -- dest = adjust_automodify_address_nv (dest, SImode, destptr, 4); -- emit_insn (gen_strset (destptr, dest, value)); -- } -- emit_label (label); -- LABEL_NUSES (label) = 1; -- } -- if (max_size > 4) -- { -- rtx_code_label *label = ix86_expand_aligntest (count, 4, true); -- dest = change_address (destmem, SImode, destptr); -- emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value))); -- emit_label (label); -- LABEL_NUSES (label) = 1; -- } -- if (max_size > 2) -- { -- rtx_code_label *label = ix86_expand_aligntest (count, 2, true); -- dest = change_address (destmem, HImode, destptr); -- emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value))); -- emit_label (label); -- LABEL_NUSES (label) = 1; -- } -- if (max_size > 1) -- { -- rtx_code_label *label = ix86_expand_aligntest (count, 1, true); -- dest = change_address (destmem, QImode, destptr); -- emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value))); -- emit_label (label); -- LABEL_NUSES (label) = 1; -- } --} -- --/* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to -- DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN. -- Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are -- ignored. -- Return value is updated DESTMEM. */ --static rtx --expand_set_or_movmem_prologue (rtx destmem, rtx srcmem, -- rtx destptr, rtx srcptr, rtx value, -- rtx vec_value, rtx count, int align, -- int desired_alignment, bool issetmem) --{ -- int i; -- for (i = 1; i < desired_alignment; i <<= 1) -- { -- if (align <= i) -- { -- rtx_code_label *label = ix86_expand_aligntest (destptr, i, false); -- if (issetmem) -- { -- if (vec_value && i > GET_MODE_SIZE (GET_MODE (value))) -- destmem = emit_memset (destmem, destptr, vec_value, i); -- else -- destmem = emit_memset (destmem, destptr, value, i); -- } -- else -- destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i); -- ix86_adjust_counter (count, i); -- emit_label (label); -- LABEL_NUSES (label) = 1; -- set_mem_align (destmem, i * 2 * BITS_PER_UNIT); -- } -- } -- return destmem; --} -- --/* Test if COUNT&SIZE is nonzero and if so, expand movme -- or setmem sequence that is valid for SIZE..2*SIZE-1 bytes -- and jump to DONE_LABEL. */ --static void --expand_small_movmem_or_setmem (rtx destmem, rtx srcmem, -- rtx destptr, rtx srcptr, -- rtx value, rtx vec_value, -- rtx count, int size, -- rtx done_label, bool issetmem) --{ -- rtx_code_label *label = ix86_expand_aligntest (count, size, false); -- machine_mode mode = int_mode_for_size (size * BITS_PER_UNIT, 1).else_blk (); -- rtx modesize; -- int n; -- -- /* If we do not have vector value to copy, we must reduce size. */ -- if (issetmem) -- { -- if (!vec_value) -- { -- if (GET_MODE (value) == VOIDmode && size > 8) -- mode = Pmode; -- else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value))) -- mode = GET_MODE (value); -- } -- else -- mode = GET_MODE (vec_value), value = vec_value; -- } -- else -- { -- /* Choose appropriate vector mode. */ -- if (size >= 32) -- mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode; -- else if (size >= 16) -- mode = TARGET_SSE ? V16QImode : DImode; -- srcmem = change_address (srcmem, mode, srcptr); -- } -- destmem = change_address (destmem, mode, destptr); -- modesize = GEN_INT (GET_MODE_SIZE (mode)); -- gcc_assert (GET_MODE_SIZE (mode) <= size); -- for (n = 0; n * GET_MODE_SIZE (mode) < size; n++) -- { -- if (issetmem) -- emit_move_insn (destmem, gen_lowpart (mode, value)); -- else -- { -- emit_move_insn (destmem, srcmem); -- srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode)); -- } -- destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode)); -- } -- -- destmem = offset_address (destmem, count, 1); -- destmem = offset_address (destmem, GEN_INT (-2 * size), -- GET_MODE_SIZE (mode)); -- if (!issetmem) -- { -- srcmem = offset_address (srcmem, count, 1); -- srcmem = offset_address (srcmem, GEN_INT (-2 * size), -- GET_MODE_SIZE (mode)); -- } -- for (n = 0; n * GET_MODE_SIZE (mode) < size; n++) -- { -- if (issetmem) -- emit_move_insn (destmem, gen_lowpart (mode, value)); -- else -- { -- emit_move_insn (destmem, srcmem); -- srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode)); -- } -- destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode)); -- } -- emit_jump_insn (gen_jump (done_label)); -- emit_barrier (); -- -- emit_label (label); -- LABEL_NUSES (label) = 1; --} -- --/* Handle small memcpy (up to SIZE that is supposed to be small power of 2. -- and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN -- bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can -- proceed with an loop copying SIZE bytes at once. Do moves in MODE. -- DONE_LABEL is a label after the whole copying sequence. The label is created -- on demand if *DONE_LABEL is NULL. -- MIN_SIZE is minimal size of block copied. This value gets adjusted for new -- bounds after the initial copies. -- -- DESTMEM/SRCMEM are memory expressions pointing to the copies block, -- DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether -- we will dispatch to a library call for large blocks. -- -- In pseudocode we do: -- -- if (COUNT < SIZE) -- { -- Assume that SIZE is 4. Bigger sizes are handled analogously -- if (COUNT & 4) -- { -- copy 4 bytes from SRCPTR to DESTPTR -- copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4 -- goto done_label -- } -- if (!COUNT) -- goto done_label; -- copy 1 byte from SRCPTR to DESTPTR -- if (COUNT & 2) -- { -- copy 2 bytes from SRCPTR to DESTPTR -- copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2 -- } -- } -- else -- { -- copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR -- copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE -- -- OLD_DESPTR = DESTPTR; -- Align DESTPTR up to DESIRED_ALIGN -- SRCPTR += DESTPTR - OLD_DESTPTR -- COUNT -= DEST_PTR - OLD_DESTPTR -- if (DYNAMIC_CHECK) -- Round COUNT down to multiple of SIZE -- << optional caller supplied zero size guard is here >> -- << optional caller supplied dynamic check is here >> -- << caller supplied main copy loop is here >> -- } -- done_label: -- */ --static void --expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem, -- rtx *destptr, rtx *srcptr, -- machine_mode mode, -- rtx value, rtx vec_value, -- rtx *count, -- rtx_code_label **done_label, -- int size, -- int desired_align, -- int align, -- unsigned HOST_WIDE_INT *min_size, -- bool dynamic_check, -- bool issetmem) --{ -- rtx_code_label *loop_label = NULL, *label; -- int n; -- rtx modesize; -- int prolog_size = 0; -- rtx mode_value; -- -- /* Chose proper value to copy. */ -- if (issetmem && VECTOR_MODE_P (mode)) -- mode_value = vec_value; -- else -- mode_value = value; -- gcc_assert (GET_MODE_SIZE (mode) <= size); -- -- /* See if block is big or small, handle small blocks. */ -- if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size) -- { -- int size2 = size; -- loop_label = gen_label_rtx (); -- -- if (!*done_label) -- *done_label = gen_label_rtx (); -- -- emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count), -- 1, loop_label); -- size2 >>= 1; -- -- /* Handle sizes > 3. */ -- for (;size2 > 2; size2 >>= 1) -- expand_small_movmem_or_setmem (destmem, srcmem, -- *destptr, *srcptr, -- value, vec_value, -- *count, -- size2, *done_label, issetmem); -- /* Nothing to copy? Jump to DONE_LABEL if so */ -- emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count), -- 1, *done_label); -- -- /* Do a byte copy. */ -- destmem = change_address (destmem, QImode, *destptr); -- if (issetmem) -- emit_move_insn (destmem, gen_lowpart (QImode, value)); -- else -- { -- srcmem = change_address (srcmem, QImode, *srcptr); -- emit_move_insn (destmem, srcmem); -- } -- -- /* Handle sizes 2 and 3. */ -- label = ix86_expand_aligntest (*count, 2, false); -- destmem = change_address (destmem, HImode, *destptr); -- destmem = offset_address (destmem, *count, 1); -- destmem = offset_address (destmem, GEN_INT (-2), 2); -- if (issetmem) -- emit_move_insn (destmem, gen_lowpart (HImode, value)); -- else -- { -- srcmem = change_address (srcmem, HImode, *srcptr); -- srcmem = offset_address (srcmem, *count, 1); -- srcmem = offset_address (srcmem, GEN_INT (-2), 2); -- emit_move_insn (destmem, srcmem); -- } -- -- emit_label (label); -- LABEL_NUSES (label) = 1; -- emit_jump_insn (gen_jump (*done_label)); -- emit_barrier (); -- } -- else -- gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size -- || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size); -- -- /* Start memcpy for COUNT >= SIZE. */ -- if (loop_label) -- { -- emit_label (loop_label); -- LABEL_NUSES (loop_label) = 1; -- } -- -- /* Copy first desired_align bytes. */ -- if (!issetmem) -- srcmem = change_address (srcmem, mode, *srcptr); -- destmem = change_address (destmem, mode, *destptr); -- modesize = GEN_INT (GET_MODE_SIZE (mode)); -- for (n = 0; prolog_size < desired_align - align; n++) -- { -- if (issetmem) -- emit_move_insn (destmem, mode_value); -- else -- { -- emit_move_insn (destmem, srcmem); -- srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode)); -- } -- destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode)); -- prolog_size += GET_MODE_SIZE (mode); -- } -- -- -- /* Copy last SIZE bytes. */ -- destmem = offset_address (destmem, *count, 1); -- destmem = offset_address (destmem, -- GEN_INT (-size - prolog_size), -- 1); -- if (issetmem) -- emit_move_insn (destmem, mode_value); -- else -- { -- srcmem = offset_address (srcmem, *count, 1); -- srcmem = offset_address (srcmem, -- GEN_INT (-size - prolog_size), -- 1); -- emit_move_insn (destmem, srcmem); -- } -- for (n = 1; n * GET_MODE_SIZE (mode) < size; n++) -- { -- destmem = offset_address (destmem, modesize, 1); -- if (issetmem) -- emit_move_insn (destmem, mode_value); -- else -- { -- srcmem = offset_address (srcmem, modesize, 1); -- emit_move_insn (destmem, srcmem); -- } -- } -- -- /* Align destination. */ -- if (desired_align > 1 && desired_align > align) -- { -- rtx saveddest = *destptr; -- -- gcc_assert (desired_align <= size); -- /* Align destptr up, place it to new register. */ -- *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr, -- GEN_INT (prolog_size), -- NULL_RTX, 1, OPTAB_DIRECT); -- if (REG_P (*destptr) && REG_P (saveddest) && REG_POINTER (saveddest)) -- REG_POINTER (*destptr) = 1; -- *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr, -- GEN_INT (-desired_align), -- *destptr, 1, OPTAB_DIRECT); -- /* See how many bytes we skipped. */ -- saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest, -- *destptr, -- saveddest, 1, OPTAB_DIRECT); -- /* Adjust srcptr and count. */ -- if (!issetmem) -- *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, -- saveddest, *srcptr, 1, OPTAB_DIRECT); -- *count = expand_simple_binop (GET_MODE (*count), PLUS, *count, -- saveddest, *count, 1, OPTAB_DIRECT); -- /* We copied at most size + prolog_size. */ -- if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size)) -- *min_size -- = ROUND_DOWN (*min_size - size, (unsigned HOST_WIDE_INT)size); -- else -- *min_size = 0; -- -- /* Our loops always round down the block size, but for dispatch to -- library we need precise value. */ -- if (dynamic_check) -- *count = expand_simple_binop (GET_MODE (*count), AND, *count, -- GEN_INT (-size), *count, 1, OPTAB_DIRECT); -- } -- else -- { -- gcc_assert (prolog_size == 0); -- /* Decrease count, so we won't end up copying last word twice. */ -- if (!CONST_INT_P (*count)) -- *count = expand_simple_binop (GET_MODE (*count), PLUS, *count, -- constm1_rtx, *count, 1, OPTAB_DIRECT); -- else -- *count = GEN_INT (ROUND_DOWN (UINTVAL (*count) - 1, -- (unsigned HOST_WIDE_INT)size)); -- if (*min_size) -- *min_size = ROUND_DOWN (*min_size - 1, (unsigned HOST_WIDE_INT)size); -- } --} -- -- --/* This function is like the previous one, except here we know how many bytes -- need to be copied. That allows us to update alignment not only of DST, which -- is returned, but also of SRC, which is passed as a pointer for that -- reason. */ --static rtx --expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg, -- rtx srcreg, rtx value, rtx vec_value, -- int desired_align, int align_bytes, -- bool issetmem) --{ -- rtx src = NULL; -- rtx orig_dst = dst; -- rtx orig_src = NULL; -- int piece_size = 1; -- int copied_bytes = 0; -- -- if (!issetmem) -- { -- gcc_assert (srcp != NULL); -- src = *srcp; -- orig_src = src; -- } -- -- for (piece_size = 1; -- piece_size <= desired_align && copied_bytes < align_bytes; -- piece_size <<= 1) -- { -- if (align_bytes & piece_size) -- { -- if (issetmem) -- { -- if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value))) -- dst = emit_memset (dst, destreg, vec_value, piece_size); -- else -- dst = emit_memset (dst, destreg, value, piece_size); -- } -- else -- dst = emit_memmov (dst, &src, destreg, srcreg, piece_size); -- copied_bytes += piece_size; -- } -- } -- if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT) -- set_mem_align (dst, desired_align * BITS_PER_UNIT); -- if (MEM_SIZE_KNOWN_P (orig_dst)) -- set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes); -- -- if (!issetmem) -- { -- int src_align_bytes = get_mem_align_offset (src, desired_align -- * BITS_PER_UNIT); -- if (src_align_bytes >= 0) -- src_align_bytes = desired_align - src_align_bytes; -- if (src_align_bytes >= 0) -- { -- unsigned int src_align; -- for (src_align = desired_align; src_align >= 2; src_align >>= 1) -- { -- if ((src_align_bytes & (src_align - 1)) -- == (align_bytes & (src_align - 1))) -- break; -- } -- if (src_align > (unsigned int) desired_align) -- src_align = desired_align; -- if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT) -- set_mem_align (src, src_align * BITS_PER_UNIT); -- } -- if (MEM_SIZE_KNOWN_P (orig_src)) -- set_mem_size (src, MEM_SIZE (orig_src) - align_bytes); -- *srcp = src; -- } -- -- return dst; --} -- --/* Return true if ALG can be used in current context. -- Assume we expand memset if MEMSET is true. */ --static bool --alg_usable_p (enum stringop_alg alg, bool memset, bool have_as) --{ -- if (alg == no_stringop) -- return false; -- if (alg == vector_loop) -- return TARGET_SSE || TARGET_AVX; -- /* Algorithms using the rep prefix want at least edi and ecx; -- additionally, memset wants eax and memcpy wants esi. Don't -- consider such algorithms if the user has appropriated those -- registers for their own purposes, or if we have a non-default -- address space, since some string insns cannot override the segment. */ -- if (alg == rep_prefix_1_byte -- || alg == rep_prefix_4_byte -- || alg == rep_prefix_8_byte) -- { -- if (have_as) -- return false; -- if (fixed_regs[CX_REG] -- || fixed_regs[DI_REG] -- || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG])) -- return false; -- } -- return true; --} -- --/* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */ --static enum stringop_alg --decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, -- unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size, -- bool memset, bool zero_memset, bool have_as, -- int *dynamic_check, bool *noalign, bool recur) --{ -- const struct stringop_algs *algs; -- bool optimize_for_speed; -- int max = 0; -- const struct processor_costs *cost; -- int i; -- bool any_alg_usable_p = false; -- -- *noalign = false; -- *dynamic_check = -1; -- -- /* Even if the string operation call is cold, we still might spend a lot -- of time processing large blocks. */ -- if (optimize_function_for_size_p (cfun) -- || (optimize_insn_for_size_p () -- && (max_size < 256 -- || (expected_size != -1 && expected_size < 256)))) -- optimize_for_speed = false; -- else -- optimize_for_speed = true; -- -- cost = optimize_for_speed ? ix86_cost : &ix86_size_cost; -- if (memset) -- algs = &cost->memset[TARGET_64BIT != 0]; -- else -- algs = &cost->memcpy[TARGET_64BIT != 0]; -- -- /* See maximal size for user defined algorithm. */ -- for (i = 0; i < MAX_STRINGOP_ALGS; i++) -- { -- enum stringop_alg candidate = algs->size[i].alg; -- bool usable = alg_usable_p (candidate, memset, have_as); -- any_alg_usable_p |= usable; -- -- if (candidate != libcall && candidate && usable) -- max = algs->size[i].max; -- } -- -- /* If expected size is not known but max size is small enough -- so inline version is a win, set expected size into -- the range. */ -- if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1) -- && expected_size == -1) -- expected_size = min_size / 2 + max_size / 2; -- -- /* If user specified the algorithm, honor it if possible. */ -- if (ix86_stringop_alg != no_stringop -- && alg_usable_p (ix86_stringop_alg, memset, have_as)) -- return ix86_stringop_alg; -- /* rep; movq or rep; movl is the smallest variant. */ -- else if (!optimize_for_speed) -- { -- *noalign = true; -- if (!count || (count & 3) || (memset && !zero_memset)) -- return alg_usable_p (rep_prefix_1_byte, memset, have_as) -- ? rep_prefix_1_byte : loop_1_byte; -- else -- return alg_usable_p (rep_prefix_4_byte, memset, have_as) -- ? rep_prefix_4_byte : loop; -- } -- /* Very tiny blocks are best handled via the loop, REP is expensive to -- setup. */ -- else if (expected_size != -1 && expected_size < 4) -- return loop_1_byte; -- else if (expected_size != -1) -- { -- enum stringop_alg alg = libcall; -- bool alg_noalign = false; -- for (i = 0; i < MAX_STRINGOP_ALGS; i++) -- { -- /* We get here if the algorithms that were not libcall-based -- were rep-prefix based and we are unable to use rep prefixes -- based on global register usage. Break out of the loop and -- use the heuristic below. */ -- if (algs->size[i].max == 0) -- break; -- if (algs->size[i].max >= expected_size || algs->size[i].max == -1) -- { -- enum stringop_alg candidate = algs->size[i].alg; -- -- if (candidate != libcall -- && alg_usable_p (candidate, memset, have_as)) -- { -- alg = candidate; -- alg_noalign = algs->size[i].noalign; -- } -- /* Honor TARGET_INLINE_ALL_STRINGOPS by picking -- last non-libcall inline algorithm. */ -- if (TARGET_INLINE_ALL_STRINGOPS) -- { -- /* When the current size is best to be copied by a libcall, -- but we are still forced to inline, run the heuristic below -- that will pick code for medium sized blocks. */ -- if (alg != libcall) -- { -- *noalign = alg_noalign; -- return alg; -- } -- else if (!any_alg_usable_p) -- break; -- } -- else if (alg_usable_p (candidate, memset, have_as)) -- { -- *noalign = algs->size[i].noalign; -- return candidate; -- } -- } -- } -- } -- /* When asked to inline the call anyway, try to pick meaningful choice. -- We look for maximal size of block that is faster to copy by hand and -- take blocks of at most of that size guessing that average size will -- be roughly half of the block. -- -- If this turns out to be bad, we might simply specify the preferred -- choice in ix86_costs. */ -- if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY) -- && (algs->unknown_size == libcall -- || !alg_usable_p (algs->unknown_size, memset, have_as))) -- { -- enum stringop_alg alg; -- HOST_WIDE_INT new_expected_size = (max > 0 ? max : 4096) / 2; -- -- /* If there aren't any usable algorithms or if recursing already, -- then recursing on smaller sizes or same size isn't going to -- find anything. Just return the simple byte-at-a-time copy loop. */ -- if (!any_alg_usable_p || recur) -- { -- /* Pick something reasonable. */ -- if (TARGET_INLINE_STRINGOPS_DYNAMICALLY && !recur) -- *dynamic_check = 128; -- return loop_1_byte; -- } -- alg = decide_alg (count, new_expected_size, min_size, max_size, memset, -- zero_memset, have_as, dynamic_check, noalign, true); -- gcc_assert (*dynamic_check == -1); -- if (TARGET_INLINE_STRINGOPS_DYNAMICALLY) -- *dynamic_check = max; -- else -- gcc_assert (alg != libcall); -- return alg; -- } -- return (alg_usable_p (algs->unknown_size, memset, have_as) -- ? algs->unknown_size : libcall); --} -- --/* Decide on alignment. We know that the operand is already aligned to ALIGN -- (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */ --static int --decide_alignment (int align, -- enum stringop_alg alg, -- int expected_size, -- machine_mode move_mode) --{ -- int desired_align = 0; -- -- gcc_assert (alg != no_stringop); -- -- if (alg == libcall) -- return 0; -- if (move_mode == VOIDmode) -- return 0; -- -- desired_align = GET_MODE_SIZE (move_mode); -- /* PentiumPro has special logic triggering for 8 byte aligned blocks. -- copying whole cacheline at once. */ -- if (TARGET_PENTIUMPRO -- && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte)) -- desired_align = 8; -- -- if (optimize_size) -- desired_align = 1; -- if (desired_align < align) -- desired_align = align; -- if (expected_size != -1 && expected_size < 4) -- desired_align = align; -- -- return desired_align; --} -- -- --/* Helper function for memcpy. For QImode value 0xXY produce -- 0xXYXYXYXY of wide specified by MODE. This is essentially -- a * 0x10101010, but we can do slightly better than -- synth_mult by unwinding the sequence by hand on CPUs with -- slow multiply. */ --static rtx --promote_duplicated_reg (machine_mode mode, rtx val) --{ -- machine_mode valmode = GET_MODE (val); -- rtx tmp; -- int nops = mode == DImode ? 3 : 2; -- -- gcc_assert (mode == SImode || mode == DImode || val == const0_rtx); -- if (val == const0_rtx) -- return copy_to_mode_reg (mode, CONST0_RTX (mode)); -- if (CONST_INT_P (val)) -- { -- HOST_WIDE_INT v = INTVAL (val) & 255; -- -- v |= v << 8; -- v |= v << 16; -- if (mode == DImode) -- v |= (v << 16) << 16; -- return copy_to_mode_reg (mode, gen_int_mode (v, mode)); -- } -- -- if (valmode == VOIDmode) -- valmode = QImode; -- if (valmode != QImode) -- val = gen_lowpart (QImode, val); -- if (mode == QImode) -- return val; -- if (!TARGET_PARTIAL_REG_STALL) -- nops--; -- if (ix86_cost->mult_init[mode == DImode ? 3 : 2] -- + ix86_cost->mult_bit * (mode == DImode ? 8 : 4) -- <= (ix86_cost->shift_const + ix86_cost->add) * nops -- + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0))) -- { -- rtx reg = convert_modes (mode, QImode, val, true); -- tmp = promote_duplicated_reg (mode, const1_rtx); -- return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1, -- OPTAB_DIRECT); -- } -- else -- { -- rtx reg = convert_modes (mode, QImode, val, true); -- -- if (!TARGET_PARTIAL_REG_STALL) -- if (mode == SImode) -- emit_insn (gen_insvsi_1 (reg, reg)); -- else -- emit_insn (gen_insvdi_1 (reg, reg)); -- else -- { -- tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8), -- NULL, 1, OPTAB_DIRECT); -- reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, -- OPTAB_DIRECT); -- } -- tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16), -- NULL, 1, OPTAB_DIRECT); -- reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT); -- if (mode == SImode) -- return reg; -- tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32), -- NULL, 1, OPTAB_DIRECT); -- reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT); -- return reg; -- } --} -- --/* Duplicate value VAL using promote_duplicated_reg into maximal size that will -- be needed by main loop copying SIZE_NEEDED chunks and prologue getting -- alignment from ALIGN to DESIRED_ALIGN. */ --static rtx --promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, -- int align) --{ -- rtx promoted_val; -- -- if (TARGET_64BIT -- && (size_needed > 4 || (desired_align > align && desired_align > 4))) -- promoted_val = promote_duplicated_reg (DImode, val); -- else if (size_needed > 2 || (desired_align > align && desired_align > 2)) -- promoted_val = promote_duplicated_reg (SImode, val); -- else if (size_needed > 1 || (desired_align > align && desired_align > 1)) -- promoted_val = promote_duplicated_reg (HImode, val); -- else -- promoted_val = val; -- -- return promoted_val; --} -- --/* Expand string move (memcpy) ot store (memset) operation. Use i386 string -- operations when profitable. The code depends upon architecture, block size -- and alignment, but always has one of the following overall structures: -- -- Aligned move sequence: -- -- 1) Prologue guard: Conditional that jumps up to epilogues for small -- blocks that can be handled by epilogue alone. This is faster -- but also needed for correctness, since prologue assume the block -- is larger than the desired alignment. -- -- Optional dynamic check for size and libcall for large -- blocks is emitted here too, with -minline-stringops-dynamically. -- -- 2) Prologue: copy first few bytes in order to get destination -- aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less -- than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be -- copied. We emit either a jump tree on power of two sized -- blocks, or a byte loop. -- -- 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks -- with specified algorithm. -- -- 4) Epilogue: code copying tail of the block that is too small to be -- handled by main body (or up to size guarded by prologue guard). -- -- Misaligned move sequence -- -- 1) missaligned move prologue/epilogue containing: -- a) Prologue handling small memory blocks and jumping to done_label -- (skipped if blocks are known to be large enough) -- b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is -- needed by single possibly misaligned move -- (skipped if alignment is not needed) -- c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves -- -- 2) Zero size guard dispatching to done_label, if needed -- -- 3) dispatch to library call, if needed, -- -- 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks -- with specified algorithm. */ --bool --ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp, -- rtx align_exp, rtx expected_align_exp, -- rtx expected_size_exp, rtx min_size_exp, -- rtx max_size_exp, rtx probable_max_size_exp, -- bool issetmem) --{ -- rtx destreg; -- rtx srcreg = NULL; -- rtx_code_label *label = NULL; -- rtx tmp; -- rtx_code_label *jump_around_label = NULL; -- HOST_WIDE_INT align = 1; -- unsigned HOST_WIDE_INT count = 0; -- HOST_WIDE_INT expected_size = -1; -- int size_needed = 0, epilogue_size_needed; -- int desired_align = 0, align_bytes = 0; -- enum stringop_alg alg; -- rtx promoted_val = NULL; -- rtx vec_promoted_val = NULL; -- bool force_loopy_epilogue = false; -- int dynamic_check; -- bool need_zero_guard = false; -- bool noalign; -- machine_mode move_mode = VOIDmode; -- machine_mode wider_mode; -- int unroll_factor = 1; -- /* TODO: Once value ranges are available, fill in proper data. */ -- unsigned HOST_WIDE_INT min_size = 0; -- unsigned HOST_WIDE_INT max_size = -1; -- unsigned HOST_WIDE_INT probable_max_size = -1; -- bool misaligned_prologue_used = false; -- bool have_as; -- -- if (CONST_INT_P (align_exp)) -- align = INTVAL (align_exp); -- /* i386 can do misaligned access on reasonably increased cost. */ -- if (CONST_INT_P (expected_align_exp) -- && INTVAL (expected_align_exp) > align) -- align = INTVAL (expected_align_exp); -- /* ALIGN is the minimum of destination and source alignment, but we care here -- just about destination alignment. */ -- else if (!issetmem -- && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT) -- align = MEM_ALIGN (dst) / BITS_PER_UNIT; -- -- if (CONST_INT_P (count_exp)) -- { -- min_size = max_size = probable_max_size = count = expected_size -- = INTVAL (count_exp); -- /* When COUNT is 0, there is nothing to do. */ -- if (!count) -- return true; -- } -- else -- { -- if (min_size_exp) -- min_size = INTVAL (min_size_exp); -- if (max_size_exp) -- max_size = INTVAL (max_size_exp); -- if (probable_max_size_exp) -- probable_max_size = INTVAL (probable_max_size_exp); -- if (CONST_INT_P (expected_size_exp)) -- expected_size = INTVAL (expected_size_exp); -- } -- -- /* Make sure we don't need to care about overflow later on. */ -- if (count > (HOST_WIDE_INT_1U << 30)) -- return false; -- -- have_as = !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst)); -- if (!issetmem) -- have_as |= !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src)); -- -- /* Step 0: Decide on preferred algorithm, desired alignment and -- size of chunks to be copied by main loop. */ -- alg = decide_alg (count, expected_size, min_size, probable_max_size, -- issetmem, -- issetmem && val_exp == const0_rtx, have_as, -- &dynamic_check, &noalign, false); -- -- if (dump_file) -- fprintf (dump_file, "Selected stringop expansion strategy: %s\n", -- stringop_alg_names[alg]); -- -- if (alg == libcall) -- return false; -- gcc_assert (alg != no_stringop); -- -- /* For now vector-version of memset is generated only for memory zeroing, as -- creating of promoted vector value is very cheap in this case. */ -- if (issetmem && alg == vector_loop && val_exp != const0_rtx) -- alg = unrolled_loop; -- -- if (!count) -- count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp); -- destreg = ix86_copy_addr_to_reg (XEXP (dst, 0)); -- if (!issetmem) -- srcreg = ix86_copy_addr_to_reg (XEXP (src, 0)); -- -- unroll_factor = 1; -- move_mode = word_mode; -- switch (alg) -- { -- case libcall: -- case no_stringop: -- case last_alg: -- gcc_unreachable (); -- case loop_1_byte: -- need_zero_guard = true; -- move_mode = QImode; -- break; -- case loop: -- need_zero_guard = true; -- break; -- case unrolled_loop: -- need_zero_guard = true; -- unroll_factor = (TARGET_64BIT ? 4 : 2); -- break; -- case vector_loop: -- need_zero_guard = true; -- unroll_factor = 4; -- /* Find the widest supported mode. */ -- move_mode = word_mode; -- while (GET_MODE_WIDER_MODE (move_mode).exists (&wider_mode) -- && optab_handler (mov_optab, wider_mode) != CODE_FOR_nothing) -- move_mode = wider_mode; -- -- if (TARGET_AVX128_OPTIMAL && GET_MODE_BITSIZE (move_mode) > 128) -- move_mode = TImode; -- -- /* Find the corresponding vector mode with the same size as MOVE_MODE. -- MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */ -- if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode)) -- { -- int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode); -- if (!mode_for_vector (word_mode, nunits).exists (&move_mode) -- || optab_handler (mov_optab, move_mode) == CODE_FOR_nothing) -- move_mode = word_mode; -- } -- gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing); -- break; -- case rep_prefix_8_byte: -- move_mode = DImode; -- break; -- case rep_prefix_4_byte: -- move_mode = SImode; -- break; -- case rep_prefix_1_byte: -- move_mode = QImode; -- break; -- } -- size_needed = GET_MODE_SIZE (move_mode) * unroll_factor; -- epilogue_size_needed = size_needed; -- -- /* If we are going to call any library calls conditionally, make sure any -- pending stack adjustment happen before the first conditional branch, -- otherwise they will be emitted before the library call only and won't -- happen from the other branches. */ -- if (dynamic_check != -1) -- do_pending_stack_adjust (); -- -- desired_align = decide_alignment (align, alg, expected_size, move_mode); -- if (!TARGET_ALIGN_STRINGOPS || noalign) -- align = desired_align; -- -- /* Step 1: Prologue guard. */ -- -- /* Alignment code needs count to be in register. */ -- if (CONST_INT_P (count_exp) && desired_align > align) -- { -- if (INTVAL (count_exp) > desired_align -- && INTVAL (count_exp) > size_needed) -- { -- align_bytes -- = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT); -- if (align_bytes <= 0) -- align_bytes = 0; -- else -- align_bytes = desired_align - align_bytes; -- } -- if (align_bytes == 0) -- count_exp = force_reg (counter_mode (count_exp), count_exp); -- } -- gcc_assert (desired_align >= 1 && align >= 1); -- -- /* Misaligned move sequences handle both prologue and epilogue at once. -- Default code generation results in a smaller code for large alignments -- and also avoids redundant job when sizes are known precisely. */ -- misaligned_prologue_used -- = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES -- && MAX (desired_align, epilogue_size_needed) <= 32 -- && desired_align <= epilogue_size_needed -- && ((desired_align > align && !align_bytes) -- || (!count && epilogue_size_needed > 1))); -- -- /* Do the cheap promotion to allow better CSE across the -- main loop and epilogue (ie one load of the big constant in the -- front of all code. -- For now the misaligned move sequences do not have fast path -- without broadcasting. */ -- if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used))) -- { -- if (alg == vector_loop) -- { -- gcc_assert (val_exp == const0_rtx); -- vec_promoted_val = promote_duplicated_reg (move_mode, val_exp); -- promoted_val = promote_duplicated_reg_to_size (val_exp, -- GET_MODE_SIZE (word_mode), -- desired_align, align); -- } -- else -- { -- promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed, -- desired_align, align); -- } -- } -- /* Misaligned move sequences handles both prologues and epilogues at once. -- Default code generation results in smaller code for large alignments and -- also avoids redundant job when sizes are known precisely. */ -- if (misaligned_prologue_used) -- { -- /* Misaligned move prologue handled small blocks by itself. */ -- expand_set_or_movmem_prologue_epilogue_by_misaligned_moves -- (dst, src, &destreg, &srcreg, -- move_mode, promoted_val, vec_promoted_val, -- &count_exp, -- &jump_around_label, -- desired_align < align -- ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed, -- desired_align, align, &min_size, dynamic_check, issetmem); -- if (!issetmem) -- src = change_address (src, BLKmode, srcreg); -- dst = change_address (dst, BLKmode, destreg); -- set_mem_align (dst, desired_align * BITS_PER_UNIT); -- epilogue_size_needed = 0; -- if (need_zero_guard -- && min_size < (unsigned HOST_WIDE_INT) size_needed) -- { -- /* It is possible that we copied enough so the main loop will not -- execute. */ -- gcc_assert (size_needed > 1); -- if (jump_around_label == NULL_RTX) -- jump_around_label = gen_label_rtx (); -- emit_cmp_and_jump_insns (count_exp, -- GEN_INT (size_needed), -- LTU, 0, counter_mode (count_exp), 1, jump_around_label); -- if (expected_size == -1 -- || expected_size < (desired_align - align) / 2 + size_needed) -- predict_jump (REG_BR_PROB_BASE * 20 / 100); -- else -- predict_jump (REG_BR_PROB_BASE * 60 / 100); -- } -- } -- /* Ensure that alignment prologue won't copy past end of block. */ -- else if (size_needed > 1 || (desired_align > 1 && desired_align > align)) -- { -- epilogue_size_needed = MAX (size_needed - 1, desired_align - align); -- /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes. -- Make sure it is power of 2. */ -- epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1); -- -- /* To improve performance of small blocks, we jump around the VAL -- promoting mode. This mean that if the promoted VAL is not constant, -- we might not use it in the epilogue and have to use byte -- loop variant. */ -- if (issetmem && epilogue_size_needed > 2 && !promoted_val) -- force_loopy_epilogue = true; -- if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed) -- || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed) -- { -- /* If main algorithm works on QImode, no epilogue is needed. -- For small sizes just don't align anything. */ -- if (size_needed == 1) -- desired_align = align; -- else -- goto epilogue; -- } -- else if (!count -- && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed) -- { -- label = gen_label_rtx (); -- emit_cmp_and_jump_insns (count_exp, -- GEN_INT (epilogue_size_needed), -- LTU, 0, counter_mode (count_exp), 1, label); -- if (expected_size == -1 || expected_size < epilogue_size_needed) -- predict_jump (REG_BR_PROB_BASE * 60 / 100); -- else -- predict_jump (REG_BR_PROB_BASE * 20 / 100); -- } -- } -- -- /* Emit code to decide on runtime whether library call or inline should be -- used. */ -- if (dynamic_check != -1) -- { -- if (!issetmem && CONST_INT_P (count_exp)) -- { -- if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check) -- { -- emit_block_copy_via_libcall (dst, src, count_exp); -- count_exp = const0_rtx; -- goto epilogue; -- } -- } -- else -- { -- rtx_code_label *hot_label = gen_label_rtx (); -- if (jump_around_label == NULL_RTX) -- jump_around_label = gen_label_rtx (); -- emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1), -- LEU, 0, counter_mode (count_exp), -- 1, hot_label); -- predict_jump (REG_BR_PROB_BASE * 90 / 100); -- if (issetmem) -- set_storage_via_libcall (dst, count_exp, val_exp); -- else -- emit_block_copy_via_libcall (dst, src, count_exp); -- emit_jump (jump_around_label); -- emit_label (hot_label); -- } -- } -- -- /* Step 2: Alignment prologue. */ -- /* Do the expensive promotion once we branched off the small blocks. */ -- if (issetmem && !promoted_val) -- promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed, -- desired_align, align); -- -- if (desired_align > align && !misaligned_prologue_used) -- { -- if (align_bytes == 0) -- { -- /* Except for the first move in prologue, we no longer know -- constant offset in aliasing info. It don't seems to worth -- the pain to maintain it for the first move, so throw away -- the info early. */ -- dst = change_address (dst, BLKmode, destreg); -- if (!issetmem) -- src = change_address (src, BLKmode, srcreg); -- dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg, -- promoted_val, vec_promoted_val, -- count_exp, align, desired_align, -- issetmem); -- /* At most desired_align - align bytes are copied. */ -- if (min_size < (unsigned)(desired_align - align)) -- min_size = 0; -- else -- min_size -= desired_align - align; -- } -- else -- { -- /* If we know how many bytes need to be stored before dst is -- sufficiently aligned, maintain aliasing info accurately. */ -- dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg, -- srcreg, -- promoted_val, -- vec_promoted_val, -- desired_align, -- align_bytes, -- issetmem); -- -- count_exp = plus_constant (counter_mode (count_exp), -- count_exp, -align_bytes); -- count -= align_bytes; -- min_size -= align_bytes; -- max_size -= align_bytes; -- } -- if (need_zero_guard -- && min_size < (unsigned HOST_WIDE_INT) size_needed -- && (count < (unsigned HOST_WIDE_INT) size_needed -- || (align_bytes == 0 -- && count < ((unsigned HOST_WIDE_INT) size_needed -- + desired_align - align)))) -- { -- /* It is possible that we copied enough so the main loop will not -- execute. */ -- gcc_assert (size_needed > 1); -- if (label == NULL_RTX) -- label = gen_label_rtx (); -- emit_cmp_and_jump_insns (count_exp, -- GEN_INT (size_needed), -- LTU, 0, counter_mode (count_exp), 1, label); -- if (expected_size == -1 -- || expected_size < (desired_align - align) / 2 + size_needed) -- predict_jump (REG_BR_PROB_BASE * 20 / 100); -- else -- predict_jump (REG_BR_PROB_BASE * 60 / 100); -- } -- } -- if (label && size_needed == 1) -- { -- emit_label (label); -- LABEL_NUSES (label) = 1; -- label = NULL; -- epilogue_size_needed = 1; -- if (issetmem) -- promoted_val = val_exp; -- } -- else if (label == NULL_RTX && !misaligned_prologue_used) -- epilogue_size_needed = size_needed; -- -- /* Step 3: Main loop. */ -- -- switch (alg) -- { -- case libcall: -- case no_stringop: -- case last_alg: -- gcc_unreachable (); -- case loop_1_byte: -- case loop: -- case unrolled_loop: -- expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val, -- count_exp, move_mode, unroll_factor, -- expected_size, issetmem); -- break; -- case vector_loop: -- expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, -- vec_promoted_val, count_exp, move_mode, -- unroll_factor, expected_size, issetmem); -- break; -- case rep_prefix_8_byte: -- case rep_prefix_4_byte: -- case rep_prefix_1_byte: -- expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val, -- val_exp, count_exp, move_mode, issetmem); -- break; -- } -- /* Adjust properly the offset of src and dest memory for aliasing. */ -- if (CONST_INT_P (count_exp)) -- { -- if (!issetmem) -- src = adjust_automodify_address_nv (src, BLKmode, srcreg, -- (count / size_needed) * size_needed); -- dst = adjust_automodify_address_nv (dst, BLKmode, destreg, -- (count / size_needed) * size_needed); -- } -- else -- { -- if (!issetmem) -- src = change_address (src, BLKmode, srcreg); -- dst = change_address (dst, BLKmode, destreg); -- } -- -- /* Step 4: Epilogue to copy the remaining bytes. */ -- epilogue: -- if (label) -- { -- /* When the main loop is done, COUNT_EXP might hold original count, -- while we want to copy only COUNT_EXP & SIZE_NEEDED bytes. -- Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED -- bytes. Compensate if needed. */ -- -- if (size_needed < epilogue_size_needed) -- { -- tmp = expand_simple_binop (counter_mode (count_exp), AND, count_exp, -- GEN_INT (size_needed - 1), count_exp, 1, -- OPTAB_DIRECT); -- if (tmp != count_exp) -- emit_move_insn (count_exp, tmp); -- } -- emit_label (label); -- LABEL_NUSES (label) = 1; -- } -- -- if (count_exp != const0_rtx && epilogue_size_needed > 1) -- { -- if (force_loopy_epilogue) -- expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp, -- epilogue_size_needed); -- else -- { -- if (issetmem) -- expand_setmem_epilogue (dst, destreg, promoted_val, -- vec_promoted_val, count_exp, -- epilogue_size_needed); -- else -- expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp, -- epilogue_size_needed); -- } -- } -- if (jump_around_label) -- emit_label (jump_around_label); -- return true; --} -- -- --/* Expand the appropriate insns for doing strlen if not just doing -- repnz; scasb -- -- out = result, initialized with the start address -- align_rtx = alignment of the address. -- scratch = scratch register, initialized with the startaddress when -- not aligned, otherwise undefined -- -- This is just the body. It needs the initializations mentioned above and -- some address computing at the end. These things are done in i386.md. */ -- --static void --ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx) --{ -- int align; -- rtx tmp; -- rtx_code_label *align_2_label = NULL; -- rtx_code_label *align_3_label = NULL; -- rtx_code_label *align_4_label = gen_label_rtx (); -- rtx_code_label *end_0_label = gen_label_rtx (); -- rtx mem; -- rtx tmpreg = gen_reg_rtx (SImode); -- rtx scratch = gen_reg_rtx (SImode); -- rtx cmp; -- -- align = 0; -- if (CONST_INT_P (align_rtx)) -- align = INTVAL (align_rtx); -- -- /* Loop to check 1..3 bytes for null to get an aligned pointer. */ -- -- /* Is there a known alignment and is it less than 4? */ -- if (align < 4) -- { -- rtx scratch1 = gen_reg_rtx (Pmode); -- emit_move_insn (scratch1, out); -- /* Is there a known alignment and is it not 2? */ -- if (align != 2) -- { -- align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */ -- align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */ -- -- /* Leave just the 3 lower bits. */ -- align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3), -- NULL_RTX, 0, OPTAB_WIDEN); -- -- emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL, -- Pmode, 1, align_4_label); -- emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL, -- Pmode, 1, align_2_label); -- emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL, -- Pmode, 1, align_3_label); -- } -- else -- { -- /* Since the alignment is 2, we have to check 2 or 0 bytes; -- check if is aligned to 4 - byte. */ -- -- align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx, -- NULL_RTX, 0, OPTAB_WIDEN); -- -- emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL, -- Pmode, 1, align_4_label); -- } -- -- mem = change_address (src, QImode, out); -- -- /* Now compare the bytes. */ -- -- /* Compare the first n unaligned byte on a byte per byte basis. */ -- emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, -- QImode, 1, end_0_label); -- -- /* Increment the address. */ -- emit_insn (ix86_gen_add3 (out, out, const1_rtx)); -- -- /* Not needed with an alignment of 2 */ -- if (align != 2) -- { -- emit_label (align_2_label); -- -- emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1, -- end_0_label); -- -- emit_insn (ix86_gen_add3 (out, out, const1_rtx)); -- -- emit_label (align_3_label); -- } -- -- emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1, -- end_0_label); -- -- emit_insn (ix86_gen_add3 (out, out, const1_rtx)); -- } -- -- /* Generate loop to check 4 bytes at a time. It is not a good idea to -- align this loop. It gives only huge programs, but does not help to -- speed up. */ -- emit_label (align_4_label); -- -- mem = change_address (src, SImode, out); -- emit_move_insn (scratch, mem); -- emit_insn (ix86_gen_add3 (out, out, GEN_INT (4))); -- -- /* This formula yields a nonzero result iff one of the bytes is zero. -- This saves three branches inside loop and many cycles. */ -- -- emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101))); -- emit_insn (gen_one_cmplsi2 (scratch, scratch)); -- emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch)); -- emit_insn (gen_andsi3 (tmpreg, tmpreg, -- gen_int_mode (0x80808080, SImode))); -- emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1, -- align_4_label); -- -- if (TARGET_CMOVE) -- { -- rtx reg = gen_reg_rtx (SImode); -- rtx reg2 = gen_reg_rtx (Pmode); -- emit_move_insn (reg, tmpreg); -- emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16))); -- -- /* If zero is not in the first two bytes, move two bytes forward. */ -- emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080))); -- tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); -- tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); -- emit_insn (gen_rtx_SET (tmpreg, -- gen_rtx_IF_THEN_ELSE (SImode, tmp, -- reg, -- tmpreg))); -- /* Emit lea manually to avoid clobbering of flags. */ -- emit_insn (gen_rtx_SET (reg2, gen_rtx_PLUS (Pmode, out, const2_rtx))); -- -- tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); -- tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); -- emit_insn (gen_rtx_SET (out, -- gen_rtx_IF_THEN_ELSE (Pmode, tmp, -- reg2, -- out))); -- } -- else -- { -- rtx_code_label *end_2_label = gen_label_rtx (); -- /* Is zero in the first two bytes? */ -- -- emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080))); -- tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); -- tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx); -- tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, -- gen_rtx_LABEL_REF (VOIDmode, end_2_label), -- pc_rtx); -- tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); -- JUMP_LABEL (tmp) = end_2_label; -- -- /* Not in the first two. Move two bytes forward. */ -- emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16))); -- emit_insn (ix86_gen_add3 (out, out, const2_rtx)); -- -- emit_label (end_2_label); -- -- } -- -- /* Avoid branch in fixing the byte. */ -- tmpreg = gen_lowpart (QImode, tmpreg); -- emit_insn (gen_addqi3_cconly_overflow (tmpreg, tmpreg)); -- tmp = gen_rtx_REG (CCmode, FLAGS_REG); -- cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx); -- emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp)); -- -- emit_label (end_0_label); --} -- --/* Expand strlen. */ -- --bool --ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align) --{ -- rtx addr, scratch1, scratch2, scratch3, scratch4; -- -- /* The generic case of strlen expander is long. Avoid it's -- expanding unless TARGET_INLINE_ALL_STRINGOPS. */ -- -- if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1 -- && !TARGET_INLINE_ALL_STRINGOPS -- && !optimize_insn_for_size_p () -- && (!CONST_INT_P (align) || INTVAL (align) < 4)) -- return false; -- -- addr = force_reg (Pmode, XEXP (src, 0)); -- scratch1 = gen_reg_rtx (Pmode); -- -- if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1 -- && !optimize_insn_for_size_p ()) -- { -- /* Well it seems that some optimizer does not combine a call like -- foo(strlen(bar), strlen(bar)); -- when the move and the subtraction is done here. It does calculate -- the length just once when these instructions are done inside of -- output_strlen_unroll(). But I think since &bar[strlen(bar)] is -- often used and I use one fewer register for the lifetime of -- output_strlen_unroll() this is better. */ -- -- emit_move_insn (out, addr); -- -- ix86_expand_strlensi_unroll_1 (out, src, align); -- -- /* strlensi_unroll_1 returns the address of the zero at the end of -- the string, like memchr(), so compute the length by subtracting -- the start address. */ -- emit_insn (ix86_gen_sub3 (out, out, addr)); -- } -- else -- { -- rtx unspec; -- -- /* Can't use this if the user has appropriated eax, ecx, or edi. */ -- if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG]) -- return false; -- /* Can't use this for non-default address spaces. */ -- if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src))) -- return false; -- -- scratch2 = gen_reg_rtx (Pmode); -- scratch3 = gen_reg_rtx (Pmode); -- scratch4 = force_reg (Pmode, constm1_rtx); -- -- emit_move_insn (scratch3, addr); -- eoschar = force_reg (QImode, eoschar); -- -- src = replace_equiv_address_nv (src, scratch3); -- -- /* If .md starts supporting :P, this can be done in .md. */ -- unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align, -- scratch4), UNSPEC_SCAS); -- emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec)); -- emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1)); -- emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx)); -- } -- return true; --} -- --/* For given symbol (function) construct code to compute address of it's PLT -- entry in large x86-64 PIC model. */ --static rtx --construct_plt_address (rtx symbol) --{ -- rtx tmp, unspec; -- -- gcc_assert (GET_CODE (symbol) == SYMBOL_REF); -- gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF); -- gcc_assert (Pmode == DImode); -- -- tmp = gen_reg_rtx (Pmode); -- unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF); -- -- emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec)); -- emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx)); -- return tmp; --} -- --rtx_insn * --ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, -- rtx callarg2, -- rtx pop, bool sibcall) --{ -- rtx vec[3]; -- rtx use = NULL, call; -- unsigned int vec_len = 0; -- tree fndecl; -- -- if (GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF) -- { -- fndecl = SYMBOL_REF_DECL (XEXP (fnaddr, 0)); -- if (fndecl -- && (lookup_attribute ("interrupt", -- TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))) -- error ("interrupt service routine can%'t be called directly"); -- } -- else -- fndecl = NULL_TREE; -- -- if (pop == const0_rtx) -- pop = NULL; -- gcc_assert (!TARGET_64BIT || !pop); -- -- if (TARGET_MACHO && !TARGET_64BIT) -- { --#if TARGET_MACHO -- if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF) -- fnaddr = machopic_indirect_call_target (fnaddr); --#endif -- } -- else -- { -- /* Static functions and indirect calls don't need the pic register. Also, -- check if PLT was explicitly avoided via no-plt or "noplt" attribute, making -- it an indirect call. */ -- rtx addr = XEXP (fnaddr, 0); -- if (flag_pic -- && GET_CODE (addr) == SYMBOL_REF -- && !SYMBOL_REF_LOCAL_P (addr)) -- { -- if (flag_plt -- && (SYMBOL_REF_DECL (addr) == NULL_TREE -- || !lookup_attribute ("noplt", -- DECL_ATTRIBUTES (SYMBOL_REF_DECL (addr))))) -- { -- if (!TARGET_64BIT -- || (ix86_cmodel == CM_LARGE_PIC -- && DEFAULT_ABI != MS_ABI)) -- { -- use_reg (&use, gen_rtx_REG (Pmode, -- REAL_PIC_OFFSET_TABLE_REGNUM)); -- if (ix86_use_pseudo_pic_reg ()) -- emit_move_insn (gen_rtx_REG (Pmode, -- REAL_PIC_OFFSET_TABLE_REGNUM), -- pic_offset_table_rtx); -- } -- } -- else if (!TARGET_PECOFF && !TARGET_MACHO) -- { -- if (TARGET_64BIT) -- { -- fnaddr = gen_rtx_UNSPEC (Pmode, -- gen_rtvec (1, addr), -- UNSPEC_GOTPCREL); -- fnaddr = gen_rtx_CONST (Pmode, fnaddr); -- } -- else -- { -- fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), -- UNSPEC_GOT); -- fnaddr = gen_rtx_CONST (Pmode, fnaddr); -- fnaddr = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, -- fnaddr); -- } -- fnaddr = gen_const_mem (Pmode, fnaddr); -- /* Pmode may not be the same as word_mode for x32, which -- doesn't support indirect branch via 32-bit memory slot. -- Since x32 GOT slot is 64 bit with zero upper 32 bits, -- indirect branch via x32 GOT slot is OK. */ -- if (GET_MODE (fnaddr) != word_mode) -- fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr); -- fnaddr = gen_rtx_MEM (QImode, fnaddr); -- } -- } -- } -- -- /* Skip setting up RAX register for -mskip-rax-setup when there are no -- parameters passed in vector registers. */ -- if (TARGET_64BIT -- && (INTVAL (callarg2) > 0 -- || (INTVAL (callarg2) == 0 -- && (TARGET_SSE || !flag_skip_rax_setup)))) -- { -- rtx al = gen_rtx_REG (QImode, AX_REG); -- emit_move_insn (al, callarg2); -- use_reg (&use, al); -- } -- -- if (ix86_cmodel == CM_LARGE_PIC -- && !TARGET_PECOFF -- && MEM_P (fnaddr) -- && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF -- && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode)) -- fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0))); -- /* Since x32 GOT slot is 64 bit with zero upper 32 bits, indirect -- branch via x32 GOT slot is OK. */ -- else if (!(TARGET_X32 -- && MEM_P (fnaddr) -- && GET_CODE (XEXP (fnaddr, 0)) == ZERO_EXTEND -- && GOT_memory_operand (XEXP (XEXP (fnaddr, 0), 0), Pmode)) -- && (sibcall -- ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode) -- : !call_insn_operand (XEXP (fnaddr, 0), word_mode))) -- { -- fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1); -- fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr)); -- } -- -- call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1); -- -- if (retval) -- call = gen_rtx_SET (retval, call); -- vec[vec_len++] = call; -- -- if (pop) -- { -- pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop); -- pop = gen_rtx_SET (stack_pointer_rtx, pop); -- vec[vec_len++] = pop; -- } -- -- if (cfun->machine->no_caller_saved_registers -- && (!fndecl -- || (!TREE_THIS_VOLATILE (fndecl) -- && !lookup_attribute ("no_caller_saved_registers", -- TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))))) -- { -- static const char ix86_call_used_regs[] = CALL_USED_REGISTERS; -- bool is_64bit_ms_abi = (TARGET_64BIT -- && ix86_function_abi (fndecl) == MS_ABI); -- char c_mask = CALL_USED_REGISTERS_MASK (is_64bit_ms_abi); -- -- /* If there are no caller-saved registers, add all registers -- that are clobbered by the call which returns. */ -- for (int i = 0; i < FIRST_PSEUDO_REGISTER; i++) -- if (!fixed_regs[i] -- && (ix86_call_used_regs[i] == 1 -- || (ix86_call_used_regs[i] & c_mask)) -- && !STACK_REGNO_P (i) -- && !MMX_REGNO_P (i)) -- clobber_reg (&use, -- gen_rtx_REG (GET_MODE (regno_reg_rtx[i]), i)); -- } -- else if (TARGET_64BIT_MS_ABI -- && (!callarg2 || INTVAL (callarg2) != -2)) -- { -- unsigned i; -- -- for (i = 0; i < NUM_X86_64_MS_CLOBBERED_REGS; i++) -- { -- int regno = x86_64_ms_sysv_extra_clobbered_registers[i]; -- machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode; -- -- clobber_reg (&use, gen_rtx_REG (mode, regno)); -- } -- -- /* Set here, but it may get cleared later. */ -- if (TARGET_CALL_MS2SYSV_XLOGUES) -- { -- if (!TARGET_SSE) -- ; -- -- /* Don't break hot-patched functions. */ -- else if (ix86_function_ms_hook_prologue (current_function_decl)) -- ; -- -- /* TODO: Cases not yet examined. */ -- else if (flag_split_stack) -- warn_once_call_ms2sysv_xlogues ("-fsplit-stack"); -- -- else -- { -- gcc_assert (!reload_completed); -- cfun->machine->call_ms2sysv = true; -- } -- } -- } -- -- if (vec_len > 1) -- call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec)); -- rtx_insn *call_insn = emit_call_insn (call); -- if (use) -- CALL_INSN_FUNCTION_USAGE (call_insn) = use; -- -- return call_insn; --} -- --/* Return true if the function being called was marked with attribute -- "noplt" or using -fno-plt and we are compiling for non-PIC. We need -- to handle the non-PIC case in the backend because there is no easy -- interface for the front-end to force non-PLT calls to use the GOT. -- This is currently used only with 64-bit or 32-bit GOT32X ELF targets -- to call the function marked "noplt" indirectly. */ -- --static bool --ix86_nopic_noplt_attribute_p (rtx call_op) --{ -- if (flag_pic || ix86_cmodel == CM_LARGE -- || !(TARGET_64BIT || HAVE_AS_IX86_GOT32X) -- || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF -- || SYMBOL_REF_LOCAL_P (call_op)) -- return false; -- -- tree symbol_decl = SYMBOL_REF_DECL (call_op); -- -- if (!flag_plt -- || (symbol_decl != NULL_TREE -- && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl)))) -- return true; -- -- return false; --} -- --/* Helper to output the jmp/call. */ --static void --ix86_output_jmp_thunk_or_indirect (const char *thunk_name, const int regno) --{ -- if (thunk_name != NULL) -- { -- fprintf (asm_out_file, "\tjmp\t"); -- assemble_name (asm_out_file, thunk_name); -- putc ('\n', asm_out_file); -- } -- else -- output_indirect_thunk (regno); --} -- --/* Output indirect branch via a call and return thunk. CALL_OP is a -- register which contains the branch target. XASM is the assembly -- template for CALL_OP. Branch is a tail call if SIBCALL_P is true. -- A normal call is converted to: -- -- call __x86_indirect_thunk_reg -- -- and a tail call is converted to: -- -- jmp __x86_indirect_thunk_reg -- */ -- --static void --ix86_output_indirect_branch_via_reg (rtx call_op, bool sibcall_p) --{ -- char thunk_name_buf[32]; -- char *thunk_name; -- enum indirect_thunk_prefix need_prefix -- = indirect_thunk_need_prefix (current_output_insn); -- int regno = REGNO (call_op); -- -- if (cfun->machine->indirect_branch_type -- != indirect_branch_thunk_inline) -- { -- if (cfun->machine->indirect_branch_type == indirect_branch_thunk) -- { -- int i = regno; -- if (i >= FIRST_REX_INT_REG) -- i -= (FIRST_REX_INT_REG - LAST_INT_REG - 1); -- indirect_thunks_used |= 1 << i; -- } -- indirect_thunk_name (thunk_name_buf, regno, need_prefix, false); -- thunk_name = thunk_name_buf; -- } -- else -- thunk_name = NULL; -- -- if (sibcall_p) -- ix86_output_jmp_thunk_or_indirect (thunk_name, regno); -- else -- { -- if (thunk_name != NULL) -- { -- fprintf (asm_out_file, "\tcall\t"); -- assemble_name (asm_out_file, thunk_name); -- putc ('\n', asm_out_file); -- return; -- } -- -- char indirectlabel1[32]; -- char indirectlabel2[32]; -- -- ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, -- INDIRECT_LABEL, -- indirectlabelno++); -- ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, -- INDIRECT_LABEL, -- indirectlabelno++); -- -- /* Jump. */ -- fputs ("\tjmp\t", asm_out_file); -- assemble_name_raw (asm_out_file, indirectlabel2); -- fputc ('\n', asm_out_file); -- -- ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1); -- -- ix86_output_jmp_thunk_or_indirect (thunk_name, regno); -- -- ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2); -- -- /* Call. */ -- fputs ("\tcall\t", asm_out_file); -- assemble_name_raw (asm_out_file, indirectlabel1); -- fputc ('\n', asm_out_file); -- } --} -- --/* Output indirect branch via a call and return thunk. CALL_OP is -- the branch target. XASM is the assembly template for CALL_OP. -- Branch is a tail call if SIBCALL_P is true. A normal call is -- converted to: -- -- jmp L2 -- L1: -- push CALL_OP -- jmp __x86_indirect_thunk -- L2: -- call L1 -- -- and a tail call is converted to: -- -- push CALL_OP -- jmp __x86_indirect_thunk -- */ -- --static void --ix86_output_indirect_branch_via_push (rtx call_op, const char *xasm, -- bool sibcall_p) --{ -- char thunk_name_buf[32]; -- char *thunk_name; -- char push_buf[64]; -- enum indirect_thunk_prefix need_prefix -- = indirect_thunk_need_prefix (current_output_insn); -- int regno = -1; -- -- if (cfun->machine->indirect_branch_type -- != indirect_branch_thunk_inline) -- { -- if (cfun->machine->indirect_branch_type == indirect_branch_thunk) -- indirect_thunk_needed = true; -- indirect_thunk_name (thunk_name_buf, regno, need_prefix, false); -- thunk_name = thunk_name_buf; -- } -- else -- thunk_name = NULL; -- -- snprintf (push_buf, sizeof (push_buf), "push{%c}\t%s", -- TARGET_64BIT ? 'q' : 'l', xasm); -- -- if (sibcall_p) -- { -- output_asm_insn (push_buf, &call_op); -- ix86_output_jmp_thunk_or_indirect (thunk_name, regno); -- } -- else -- { -- char indirectlabel1[32]; -- char indirectlabel2[32]; -- -- ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, -- INDIRECT_LABEL, -- indirectlabelno++); -- ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, -- INDIRECT_LABEL, -- indirectlabelno++); -- -- /* Jump. */ -- fputs ("\tjmp\t", asm_out_file); -- assemble_name_raw (asm_out_file, indirectlabel2); -- fputc ('\n', asm_out_file); -- -- ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1); -- -- /* An external function may be called via GOT, instead of PLT. */ -- if (MEM_P (call_op)) -- { -- struct ix86_address parts; -- rtx addr = XEXP (call_op, 0); -- if (ix86_decompose_address (addr, &parts) -- && parts.base == stack_pointer_rtx) -- { -- /* Since call will adjust stack by -UNITS_PER_WORD, -- we must convert "disp(stack, index, scale)" to -- "disp+UNITS_PER_WORD(stack, index, scale)". */ -- if (parts.index) -- { -- addr = gen_rtx_MULT (Pmode, parts.index, -- GEN_INT (parts.scale)); -- addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx, -- addr); -- } -- else -- addr = stack_pointer_rtx; -- -- rtx disp; -- if (parts.disp != NULL_RTX) -- disp = plus_constant (Pmode, parts.disp, -- UNITS_PER_WORD); -- else -- disp = GEN_INT (UNITS_PER_WORD); -- -- addr = gen_rtx_PLUS (Pmode, addr, disp); -- call_op = gen_rtx_MEM (GET_MODE (call_op), addr); -- } -- } -- -- output_asm_insn (push_buf, &call_op); -- -- ix86_output_jmp_thunk_or_indirect (thunk_name, regno); -- -- ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2); -- -- /* Call. */ -- fputs ("\tcall\t", asm_out_file); -- assemble_name_raw (asm_out_file, indirectlabel1); -- fputc ('\n', asm_out_file); -- } --} -- --/* Output indirect branch via a call and return thunk. CALL_OP is -- the branch target. XASM is the assembly template for CALL_OP. -- Branch is a tail call if SIBCALL_P is true. */ -- --static void --ix86_output_indirect_branch (rtx call_op, const char *xasm, -- bool sibcall_p) --{ -- if (REG_P (call_op)) -- ix86_output_indirect_branch_via_reg (call_op, sibcall_p); -- else -- ix86_output_indirect_branch_via_push (call_op, xasm, sibcall_p); --} -- --/* Output indirect jump. CALL_OP is the jump target. */ -- --const char * --ix86_output_indirect_jmp (rtx call_op) --{ -- if (cfun->machine->indirect_branch_type != indirect_branch_keep) -- { -- /* We can't have red-zone since "call" in the indirect thunk -- pushes the return address onto stack, destroying red-zone. */ -- if (ix86_red_zone_size != 0) -- gcc_unreachable (); -- -- ix86_output_indirect_branch (call_op, "%0", true); -- return ""; -- } -- else -- return "%!jmp\t%A0"; --} -- --/* Output return instrumentation for current function if needed. */ -- --static void --output_return_instrumentation (void) --{ -- if (ix86_instrument_return != instrument_return_none -- && flag_fentry -- && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun->decl)) -- { -- if (ix86_flag_record_return) -- fprintf (asm_out_file, "1:\n"); -- switch (ix86_instrument_return) -- { -- case instrument_return_call: -- fprintf (asm_out_file, "\tcall\t__return__\n"); -- break; -- case instrument_return_nop5: -- /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */ -- fprintf (asm_out_file, ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n"); -- break; -- case instrument_return_none: -- break; -- } -- -- if (ix86_flag_record_return) -- { -- fprintf (asm_out_file, "\t.section __return_loc, \"a\",@progbits\n"); -- fprintf (asm_out_file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long"); -- fprintf (asm_out_file, "\t.previous\n"); -- } -- } --} -- --/* Output function return. CALL_OP is the jump target. Add a REP -- prefix to RET if LONG_P is true and function return is kept. */ -- --const char * --ix86_output_function_return (bool long_p) --{ -- output_return_instrumentation (); -- -- if (cfun->machine->function_return_type != indirect_branch_keep) -- { -- char thunk_name[32]; -- enum indirect_thunk_prefix need_prefix -- = indirect_thunk_need_prefix (current_output_insn); -- -- if (cfun->machine->function_return_type -- != indirect_branch_thunk_inline) -- { -- bool need_thunk = (cfun->machine->function_return_type -- == indirect_branch_thunk); -- indirect_thunk_name (thunk_name, INVALID_REGNUM, need_prefix, -- true); -- indirect_return_needed |= need_thunk; -- fprintf (asm_out_file, "\tjmp\t"); -- assemble_name (asm_out_file, thunk_name); -- putc ('\n', asm_out_file); -- } -- else -- output_indirect_thunk (INVALID_REGNUM); -- -- return ""; -- } -- -- if (!long_p) -- return "%!ret"; -- -- return "rep%; ret"; --} -- --/* Output indirect function return. RET_OP is the function return -- target. */ -- --const char * --ix86_output_indirect_function_return (rtx ret_op) --{ -- if (cfun->machine->function_return_type != indirect_branch_keep) -- { -- char thunk_name[32]; -- enum indirect_thunk_prefix need_prefix -- = indirect_thunk_need_prefix (current_output_insn); -- unsigned int regno = REGNO (ret_op); -- gcc_assert (regno == CX_REG); -- -- if (cfun->machine->function_return_type -- != indirect_branch_thunk_inline) -- { -- bool need_thunk = (cfun->machine->function_return_type -- == indirect_branch_thunk); -- indirect_thunk_name (thunk_name, regno, need_prefix, true); -- -- if (need_thunk) -- { -- indirect_return_via_cx = true; -- indirect_thunks_used |= 1 << CX_REG; -- } -- fprintf (asm_out_file, "\tjmp\t"); -- assemble_name (asm_out_file, thunk_name); -- putc ('\n', asm_out_file); -- } -- else -- output_indirect_thunk (regno); -- -- return ""; -- } -- else -- return "%!jmp\t%A0"; --} -- --/* Split simple return with popping POPC bytes from stack to indirect -- branch with stack adjustment . */ -- --void --ix86_split_simple_return_pop_internal (rtx popc) --{ -- struct machine_function *m = cfun->machine; -- rtx ecx = gen_rtx_REG (SImode, CX_REG); -- rtx_insn *insn; -- -- /* There is no "pascal" calling convention in any 64bit ABI. */ -- gcc_assert (!TARGET_64BIT); -- -- insn = emit_insn (gen_pop (ecx)); -- m->fs.cfa_offset -= UNITS_PER_WORD; -- m->fs.sp_offset -= UNITS_PER_WORD; -- -- rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD); -- x = gen_rtx_SET (stack_pointer_rtx, x); -- add_reg_note (insn, REG_CFA_ADJUST_CFA, x); -- add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx)); -- RTX_FRAME_RELATED_P (insn) = 1; -- -- x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, popc); -- x = gen_rtx_SET (stack_pointer_rtx, x); -- insn = emit_insn (x); -- add_reg_note (insn, REG_CFA_ADJUST_CFA, x); -- RTX_FRAME_RELATED_P (insn) = 1; -- -- /* Now return address is in ECX. */ -- emit_jump_insn (gen_simple_return_indirect_internal (ecx)); --} -- --/* Output the assembly for a call instruction. */ -- --const char * --ix86_output_call_insn (rtx_insn *insn, rtx call_op) --{ -- bool direct_p = constant_call_address_operand (call_op, VOIDmode); -- bool output_indirect_p -- = (!TARGET_SEH -- && cfun->machine->indirect_branch_type != indirect_branch_keep); -- bool seh_nop_p = false; -- const char *xasm; -- -- if (SIBLING_CALL_P (insn)) -- { -- output_return_instrumentation (); -- if (direct_p) -- { -- if (ix86_nopic_noplt_attribute_p (call_op)) -- { -- direct_p = false; -- if (TARGET_64BIT) -- { -- if (output_indirect_p) -- xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}"; -- else -- xasm = "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}"; -- } -- else -- { -- if (output_indirect_p) -- xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}"; -- else -- xasm = "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}"; -- } -- } -- else -- xasm = "%!jmp\t%P0"; -- } -- /* SEH epilogue detection requires the indirect branch case -- to include REX.W. */ -- else if (TARGET_SEH) -- xasm = "%!rex.W jmp\t%A0"; -- else -- { -- if (output_indirect_p) -- xasm = "%0"; -- else -- xasm = "%!jmp\t%A0"; -- } -- -- if (output_indirect_p && !direct_p) -- ix86_output_indirect_branch (call_op, xasm, true); -- else -- output_asm_insn (xasm, &call_op); -- return ""; -- } -- -- /* SEH unwinding can require an extra nop to be emitted in several -- circumstances. Determine if we have one of those. */ -- if (TARGET_SEH) -- { -- rtx_insn *i; -- -- for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i)) -- { -- /* Prevent a catch region from being adjacent to a jump that would -- be interpreted as an epilogue sequence by the unwinder. */ -- if (JUMP_P(i) && CROSSING_JUMP_P (i)) -- { -- seh_nop_p = true; -- break; -- } -- -- /* If we get to another real insn, we don't need the nop. */ -- if (INSN_P (i)) -- break; -- -- /* If we get to the epilogue note, prevent a catch region from -- being adjacent to the standard epilogue sequence. If non- -- call-exceptions, we'll have done this during epilogue emission. */ -- if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG -- && !flag_non_call_exceptions -- && !can_throw_internal (insn)) -- { -- seh_nop_p = true; -- break; -- } -- } -- -- /* If we didn't find a real insn following the call, prevent the -- unwinder from looking into the next function. */ -- if (i == NULL) -- seh_nop_p = true; -- } -- -- if (direct_p) -- { -- if (ix86_nopic_noplt_attribute_p (call_op)) -- { -- direct_p = false; -- if (TARGET_64BIT) -- { -- if (output_indirect_p) -- xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}"; -- else -- xasm = "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}"; -- } -- else -- { -- if (output_indirect_p) -- xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}"; -- else -- xasm = "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}"; -- } -- } -- else -- xasm = "%!call\t%P0"; -- } -- else -- { -- if (output_indirect_p) -- xasm = "%0"; -- else -- xasm = "%!call\t%A0"; -- } -- -- if (output_indirect_p && !direct_p) -- ix86_output_indirect_branch (call_op, xasm, false); -- else -- output_asm_insn (xasm, &call_op); -- -- if (seh_nop_p) -- return "nop"; -- -- return ""; --} -- --/* Clear stack slot assignments remembered from previous functions. -- This is called from INIT_EXPANDERS once before RTL is emitted for each -- function. */ -- --static struct machine_function * --ix86_init_machine_status (void) --{ -- struct machine_function *f; -- -- f = ggc_cleared_alloc (); -- f->call_abi = ix86_abi; -- -- return f; --} -- --/* Return a MEM corresponding to a stack slot with mode MODE. -- Allocate a new slot if necessary. -- -- The RTL for a function can have several slots available: N is -- which slot to use. */ -- --rtx --assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n) --{ -- struct stack_local_entry *s; -- -- gcc_assert (n < MAX_386_STACK_LOCALS); -- -- for (s = ix86_stack_locals; s; s = s->next) -- if (s->mode == mode && s->n == n) -- return validize_mem (copy_rtx (s->rtl)); -- -- s = ggc_alloc (); -- s->n = n; -- s->mode = mode; -- s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0); -- -- s->next = ix86_stack_locals; -- ix86_stack_locals = s; -- return validize_mem (copy_rtx (s->rtl)); --} -- --static void --ix86_instantiate_decls (void) --{ -- struct stack_local_entry *s; -- -- for (s = ix86_stack_locals; s; s = s->next) -- if (s->rtl != NULL_RTX) -- instantiate_decl_rtl (s->rtl); --} -- --/* Check whether x86 address PARTS is a pc-relative address. */ -- --bool --ix86_rip_relative_addr_p (struct ix86_address *parts) --{ -- rtx base, index, disp; -- -- base = parts->base; -- index = parts->index; -- disp = parts->disp; -- -- if (disp && !base && !index) -- { -- if (TARGET_64BIT) -- { -- rtx symbol = disp; -- -- if (GET_CODE (disp) == CONST) -- symbol = XEXP (disp, 0); -- if (GET_CODE (symbol) == PLUS -- && CONST_INT_P (XEXP (symbol, 1))) -- symbol = XEXP (symbol, 0); -- -- if (GET_CODE (symbol) == LABEL_REF -- || (GET_CODE (symbol) == SYMBOL_REF -- && SYMBOL_REF_TLS_MODEL (symbol) == 0) -- || (GET_CODE (symbol) == UNSPEC -- && (XINT (symbol, 1) == UNSPEC_GOTPCREL -- || XINT (symbol, 1) == UNSPEC_PCREL -- || XINT (symbol, 1) == UNSPEC_GOTNTPOFF))) -- return true; -- } -- } -- return false; --} -- --/* Calculate the length of the memory address in the instruction encoding. -- Includes addr32 prefix, does not include the one-byte modrm, opcode, -- or other prefixes. We never generate addr32 prefix for LEA insn. */ -- --int --memory_address_length (rtx addr, bool lea) --{ -- struct ix86_address parts; -- rtx base, index, disp; -- int len; -- int ok; -- -- if (GET_CODE (addr) == PRE_DEC -- || GET_CODE (addr) == POST_INC -- || GET_CODE (addr) == PRE_MODIFY -- || GET_CODE (addr) == POST_MODIFY) -- return 0; -- -- ok = ix86_decompose_address (addr, &parts); -- gcc_assert (ok); -- -- len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1; -- -- /* If this is not LEA instruction, add the length of addr32 prefix. */ -- if (TARGET_64BIT && !lea -- && (SImode_address_operand (addr, VOIDmode) -- || (parts.base && GET_MODE (parts.base) == SImode) -- || (parts.index && GET_MODE (parts.index) == SImode))) -- len++; -- -- base = parts.base; -- index = parts.index; -- disp = parts.disp; -- -- if (base && SUBREG_P (base)) -- base = SUBREG_REG (base); -- if (index && SUBREG_P (index)) -- index = SUBREG_REG (index); -- -- gcc_assert (base == NULL_RTX || REG_P (base)); -- gcc_assert (index == NULL_RTX || REG_P (index)); -- -- /* Rule of thumb: -- - esp as the base always wants an index, -- - ebp as the base always wants a displacement, -- - r12 as the base always wants an index, -- - r13 as the base always wants a displacement. */ -- -- /* Register Indirect. */ -- if (base && !index && !disp) -- { -- /* esp (for its index) and ebp (for its displacement) need -- the two-byte modrm form. Similarly for r12 and r13 in 64-bit -- code. */ -- if (base == arg_pointer_rtx -- || base == frame_pointer_rtx -- || REGNO (base) == SP_REG -- || REGNO (base) == BP_REG -- || REGNO (base) == R12_REG -- || REGNO (base) == R13_REG) -- len++; -- } -- -- /* Direct Addressing. In 64-bit mode mod 00 r/m 5 -- is not disp32, but disp32(%rip), so for disp32 -- SIB byte is needed, unless print_operand_address -- optimizes it into disp32(%rip) or (%rip) is implied -- by UNSPEC. */ -- else if (disp && !base && !index) -- { -- len += 4; -- if (!ix86_rip_relative_addr_p (&parts)) -- len++; -- } -- else -- { -- /* Find the length of the displacement constant. */ -- if (disp) -- { -- if (base && satisfies_constraint_K (disp)) -- len += 1; -- else -- len += 4; -- } -- /* ebp always wants a displacement. Similarly r13. */ -- else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG)) -- len++; -- -- /* An index requires the two-byte modrm form.... */ -- if (index -- /* ...like esp (or r12), which always wants an index. */ -- || base == arg_pointer_rtx -- || base == frame_pointer_rtx -- || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG))) -- len++; -- } -- -- return len; --} -- --/* Compute default value for "length_immediate" attribute. When SHORTFORM -- is set, expect that insn have 8bit immediate alternative. */ --int --ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform) --{ -- int len = 0; -- int i; -- extract_insn_cached (insn); -- for (i = recog_data.n_operands - 1; i >= 0; --i) -- if (CONSTANT_P (recog_data.operand[i])) -- { -- enum attr_mode mode = get_attr_mode (insn); -- -- gcc_assert (!len); -- if (shortform && CONST_INT_P (recog_data.operand[i])) -- { -- HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]); -- switch (mode) -- { -- case MODE_QI: -- len = 1; -- continue; -- case MODE_HI: -- ival = trunc_int_for_mode (ival, HImode); -- break; -- case MODE_SI: -- ival = trunc_int_for_mode (ival, SImode); -- break; -- default: -- break; -- } -- if (IN_RANGE (ival, -128, 127)) -- { -- len = 1; -- continue; -- } -- } -- switch (mode) -- { -- case MODE_QI: -- len = 1; -- break; -- case MODE_HI: -- len = 2; -- break; -- case MODE_SI: -- len = 4; -- break; -- /* Immediates for DImode instructions are encoded -- as 32bit sign extended values. */ -- case MODE_DI: -- len = 4; -- break; -- default: -- fatal_insn ("unknown insn mode", insn); -- } -- } -- return len; --} -- --/* Compute default value for "length_address" attribute. */ --int --ix86_attr_length_address_default (rtx_insn *insn) --{ -- int i; -- -- if (get_attr_type (insn) == TYPE_LEA) -- { -- rtx set = PATTERN (insn), addr; -- -- if (GET_CODE (set) == PARALLEL) -- set = XVECEXP (set, 0, 0); -- -- gcc_assert (GET_CODE (set) == SET); -- -- addr = SET_SRC (set); -- -- return memory_address_length (addr, true); -- } -- -- extract_insn_cached (insn); -- for (i = recog_data.n_operands - 1; i >= 0; --i) -- { -- rtx op = recog_data.operand[i]; -- if (MEM_P (op)) -- { -- constrain_operands_cached (insn, reload_completed); -- if (which_alternative != -1) -- { -- const char *constraints = recog_data.constraints[i]; -- int alt = which_alternative; -- -- while (*constraints == '=' || *constraints == '+') -- constraints++; -- while (alt-- > 0) -- while (*constraints++ != ',') -- ; -- /* Skip ignored operands. */ -- if (*constraints == 'X') -- continue; -- } -- -- int len = memory_address_length (XEXP (op, 0), false); -- -- /* Account for segment prefix for non-default addr spaces. */ -- if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op))) -- len++; -- -- return len; -- } -- } -- return 0; --} -- --/* Compute default value for "length_vex" attribute. It includes -- 2 or 3 byte VEX prefix and 1 opcode byte. */ -- --int --ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode, -- bool has_vex_w) --{ -- int i; -- -- /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3 -- byte VEX prefix. */ -- if (!has_0f_opcode || has_vex_w) -- return 3 + 1; -- -- /* We can always use 2 byte VEX prefix in 32bit. */ -- if (!TARGET_64BIT) -- return 2 + 1; -- -- extract_insn_cached (insn); -- -- for (i = recog_data.n_operands - 1; i >= 0; --i) -- if (REG_P (recog_data.operand[i])) -- { -- /* REX.W bit uses 3 byte VEX prefix. */ -- if (GET_MODE (recog_data.operand[i]) == DImode -- && GENERAL_REG_P (recog_data.operand[i])) -- return 3 + 1; -- } -- else -- { -- /* REX.X or REX.B bits use 3 byte VEX prefix. */ -- if (MEM_P (recog_data.operand[i]) -- && x86_extended_reg_mentioned_p (recog_data.operand[i])) -- return 3 + 1; -- } -- -- return 2 + 1; --} -- -- --static bool --ix86_class_likely_spilled_p (reg_class_t); -- --/* Returns true if lhs of insn is HW function argument register and set up -- is_spilled to true if it is likely spilled HW register. */ --static bool --insn_is_function_arg (rtx insn, bool* is_spilled) --{ -- rtx dst; -- -- if (!NONDEBUG_INSN_P (insn)) -- return false; -- /* Call instructions are not movable, ignore it. */ -- if (CALL_P (insn)) -- return false; -- insn = PATTERN (insn); -- if (GET_CODE (insn) == PARALLEL) -- insn = XVECEXP (insn, 0, 0); -- if (GET_CODE (insn) != SET) -- return false; -- dst = SET_DEST (insn); -- if (REG_P (dst) && HARD_REGISTER_P (dst) -- && ix86_function_arg_regno_p (REGNO (dst))) -- { -- /* Is it likely spilled HW register? */ -- if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst)) -- && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst)))) -- *is_spilled = true; -- return true; -- } -- return false; --} -- --/* Add output dependencies for chain of function adjacent arguments if only -- there is a move to likely spilled HW register. Return first argument -- if at least one dependence was added or NULL otherwise. */ --static rtx_insn * --add_parameter_dependencies (rtx_insn *call, rtx_insn *head) --{ -- rtx_insn *insn; -- rtx_insn *last = call; -- rtx_insn *first_arg = NULL; -- bool is_spilled = false; -- -- head = PREV_INSN (head); -- -- /* Find nearest to call argument passing instruction. */ -- while (true) -- { -- last = PREV_INSN (last); -- if (last == head) -- return NULL; -- if (!NONDEBUG_INSN_P (last)) -- continue; -- if (insn_is_function_arg (last, &is_spilled)) -- break; -- return NULL; -- } -- -- first_arg = last; -- while (true) -- { -- insn = PREV_INSN (last); -- if (!INSN_P (insn)) -- break; -- if (insn == head) -- break; -- if (!NONDEBUG_INSN_P (insn)) -- { -- last = insn; -- continue; -- } -- if (insn_is_function_arg (insn, &is_spilled)) -- { -- /* Add output depdendence between two function arguments if chain -- of output arguments contains likely spilled HW registers. */ -- if (is_spilled) -- add_dependence (first_arg, insn, REG_DEP_OUTPUT); -- first_arg = last = insn; -- } -- else -- break; -- } -- if (!is_spilled) -- return NULL; -- return first_arg; --} -- --/* Add output or anti dependency from insn to first_arg to restrict its code -- motion. */ --static void --avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn) --{ -- rtx set; -- rtx tmp; -- -- set = single_set (insn); -- if (!set) -- return; -- tmp = SET_DEST (set); -- if (REG_P (tmp)) -- { -- /* Add output dependency to the first function argument. */ -- add_dependence (first_arg, insn, REG_DEP_OUTPUT); -- return; -- } -- /* Add anti dependency. */ -- add_dependence (first_arg, insn, REG_DEP_ANTI); --} -- --/* Avoid cross block motion of function argument through adding dependency -- from the first non-jump instruction in bb. */ --static void --add_dependee_for_func_arg (rtx_insn *arg, basic_block bb) --{ -- rtx_insn *insn = BB_END (bb); -- -- while (insn) -- { -- if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn)) -- { -- rtx set = single_set (insn); -- if (set) -- { -- avoid_func_arg_motion (arg, insn); -- return; -- } -- } -- if (insn == BB_HEAD (bb)) -- return; -- insn = PREV_INSN (insn); -- } --} -- --/* Hook for pre-reload schedule - avoid motion of function arguments -- passed in likely spilled HW registers. */ --static void --ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail) --{ -- rtx_insn *insn; -- rtx_insn *first_arg = NULL; -- if (reload_completed) -- return; -- while (head != tail && DEBUG_INSN_P (head)) -- head = NEXT_INSN (head); -- for (insn = tail; insn != head; insn = PREV_INSN (insn)) -- if (INSN_P (insn) && CALL_P (insn)) -- { -- first_arg = add_parameter_dependencies (insn, head); -- if (first_arg) -- { -- /* Add dependee for first argument to predecessors if only -- region contains more than one block. */ -- basic_block bb = BLOCK_FOR_INSN (insn); -- int rgn = CONTAINING_RGN (bb->index); -- int nr_blks = RGN_NR_BLOCKS (rgn); -- /* Skip trivial regions and region head blocks that can have -- predecessors outside of region. */ -- if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0) -- { -- edge e; -- edge_iterator ei; -- -- /* Regions are SCCs with the exception of selective -- scheduling with pipelining of outer blocks enabled. -- So also check that immediate predecessors of a non-head -- block are in the same region. */ -- FOR_EACH_EDGE (e, ei, bb->preds) -- { -- /* Avoid creating of loop-carried dependencies through -- using topological ordering in the region. */ -- if (rgn == CONTAINING_RGN (e->src->index) -- && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index)) -- add_dependee_for_func_arg (first_arg, e->src); -- } -- } -- insn = first_arg; -- if (insn == head) -- break; -- } -- } -- else if (first_arg) -- avoid_func_arg_motion (first_arg, insn); --} -- --/* Hook for pre-reload schedule - set priority of moves from likely spilled -- HW registers to maximum, to schedule them at soon as possible. These are -- moves from function argument registers at the top of the function entry -- and moves from function return value registers after call. */ --static int --ix86_adjust_priority (rtx_insn *insn, int priority) --{ -- rtx set; -- -- if (reload_completed) -- return priority; -- -- if (!NONDEBUG_INSN_P (insn)) -- return priority; -- -- set = single_set (insn); -- if (set) -- { -- rtx tmp = SET_SRC (set); -- if (REG_P (tmp) -- && HARD_REGISTER_P (tmp) -- && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp)) -- && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp)))) -- return current_sched_info->sched_max_insns_priority; -- } -- -- return priority; --} -- --/* Prepare for scheduling pass. */ --static void --ix86_sched_init_global (FILE *, int, int) --{ -- /* Install scheduling hooks for current CPU. Some of these hooks are used -- in time-critical parts of the scheduler, so we only set them up when -- they are actually used. */ -- switch (ix86_tune) -- { -- case PROCESSOR_CORE2: -- case PROCESSOR_NEHALEM: -- case PROCESSOR_SANDYBRIDGE: -- case PROCESSOR_HASWELL: -- case PROCESSOR_GENERIC: -- /* Do not perform multipass scheduling for pre-reload schedule -- to save compile time. */ -- if (reload_completed) -- { -- ix86_core2i7_init_hooks (); -- break; -- } -- /* Fall through. */ -- default: -- targetm.sched.dfa_post_advance_cycle = NULL; -- targetm.sched.first_cycle_multipass_init = NULL; -- targetm.sched.first_cycle_multipass_begin = NULL; -- targetm.sched.first_cycle_multipass_issue = NULL; -- targetm.sched.first_cycle_multipass_backtrack = NULL; -- targetm.sched.first_cycle_multipass_end = NULL; -- targetm.sched.first_cycle_multipass_fini = NULL; -- break; -- } --} -- -- --/* Implement TARGET_STATIC_RTX_ALIGNMENT. */ -- --static HOST_WIDE_INT --ix86_static_rtx_alignment (machine_mode mode) --{ -- if (mode == DFmode) -- return 64; -- if (ALIGN_MODE_128 (mode)) -- return MAX (128, GET_MODE_ALIGNMENT (mode)); -- return GET_MODE_ALIGNMENT (mode); --} -- --/* Implement TARGET_CONSTANT_ALIGNMENT. */ -- --static HOST_WIDE_INT --ix86_constant_alignment (const_tree exp, HOST_WIDE_INT align) --{ -- if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST -- || TREE_CODE (exp) == INTEGER_CST) -- { -- machine_mode mode = TYPE_MODE (TREE_TYPE (exp)); -- HOST_WIDE_INT mode_align = ix86_static_rtx_alignment (mode); -- return MAX (mode_align, align); -- } -- else if (!optimize_size && TREE_CODE (exp) == STRING_CST -- && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD) -- return BITS_PER_WORD; -- -- return align; --} -- --/* Implement TARGET_EMPTY_RECORD_P. */ -- --static bool --ix86_is_empty_record (const_tree type) --{ -- if (!TARGET_64BIT) -- return false; -- return default_is_empty_record (type); --} -- --/* Implement TARGET_WARN_PARAMETER_PASSING_ABI. */ -- --static void --ix86_warn_parameter_passing_abi (cumulative_args_t cum_v, tree type) --{ -- CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); -- -- if (!cum->warn_empty) -- return; -- -- if (!TYPE_EMPTY_P (type)) -- return; -- -- /* Don't warn if the function isn't visible outside of the TU. */ -- if (cum->decl && !TREE_PUBLIC (cum->decl)) -- return; -- -- const_tree ctx = get_ultimate_context (cum->decl); -- if (ctx != NULL_TREE -- && !TRANSLATION_UNIT_WARN_EMPTY_P (ctx)) -- return; -- -- /* If the actual size of the type is zero, then there is no change -- in how objects of this size are passed. */ -- if (int_size_in_bytes (type) == 0) -- return; -- -- warning (OPT_Wabi, "empty class %qT parameter passing ABI " -- "changes in %<-fabi-version=12%> (GCC 8)", type); -- -- /* Only warn once. */ -- cum->warn_empty = false; --} -- --/* This hook returns name of multilib ABI. */ -- --static const char * --ix86_get_multilib_abi_name (void) --{ -- if (!(TARGET_64BIT_P (ix86_isa_flags))) -- return "i386"; -- else if (TARGET_X32_P (ix86_isa_flags)) -- return "x32"; -- else -- return "x86_64"; --} -- --/* Compute the alignment for a variable for Intel MCU psABI. TYPE is -- the data type, and ALIGN is the alignment that the object would -- ordinarily have. */ -- --static int --iamcu_alignment (tree type, int align) --{ -- machine_mode mode; -- -- if (align < 32 || TYPE_USER_ALIGN (type)) -- return align; -- -- /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4 -- bytes. */ -- mode = TYPE_MODE (strip_array_types (type)); -- switch (GET_MODE_CLASS (mode)) -- { -- case MODE_INT: -- case MODE_COMPLEX_INT: -- case MODE_COMPLEX_FLOAT: -- case MODE_FLOAT: -- case MODE_DECIMAL_FLOAT: -- return 32; -- default: -- return align; -- } --} -- --/* Compute the alignment for a static variable. -- TYPE is the data type, and ALIGN is the alignment that -- the object would ordinarily have. The value of this function is used -- instead of that alignment to align the object. */ -- --int --ix86_data_alignment (tree type, int align, bool opt) --{ -- /* GCC 4.8 and earlier used to incorrectly assume this alignment even -- for symbols from other compilation units or symbols that don't need -- to bind locally. In order to preserve some ABI compatibility with -- those compilers, ensure we don't decrease alignment from what we -- used to assume. */ -- -- int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT); -- -- /* A data structure, equal or greater than the size of a cache line -- (64 bytes in the Pentium 4 and other recent Intel processors, including -- processors based on Intel Core microarchitecture) should be aligned -- so that its base address is a multiple of a cache line size. */ -- -- int max_align -- = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT); -- -- if (max_align < BITS_PER_WORD) -- max_align = BITS_PER_WORD; -- -- switch (ix86_align_data_type) -- { -- case ix86_align_data_type_abi: opt = false; break; -- case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break; -- case ix86_align_data_type_cacheline: break; -- } -- -- if (TARGET_IAMCU) -- align = iamcu_alignment (type, align); -- -- if (opt -- && AGGREGATE_TYPE_P (type) -- && TYPE_SIZE (type) -- && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST) -- { -- if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align_compat) -- && align < max_align_compat) -- align = max_align_compat; -- if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align) -- && align < max_align) -- align = max_align; -- } -- -- /* x86-64 ABI requires arrays greater than 16 bytes to be aligned -- to 16byte boundary. */ -- if (TARGET_64BIT) -- { -- if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE) -- && TYPE_SIZE (type) -- && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST -- && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128) -- && align < 128) -- return 128; -- } -- -- if (!opt) -- return align; -- -- if (TREE_CODE (type) == ARRAY_TYPE) -- { -- if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) -- return 64; -- if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) -- return 128; -- } -- else if (TREE_CODE (type) == COMPLEX_TYPE) -- { -- -- if (TYPE_MODE (type) == DCmode && align < 64) -- return 64; -- if ((TYPE_MODE (type) == XCmode -- || TYPE_MODE (type) == TCmode) && align < 128) -- return 128; -- } -- else if ((TREE_CODE (type) == RECORD_TYPE -- || TREE_CODE (type) == UNION_TYPE -- || TREE_CODE (type) == QUAL_UNION_TYPE) -- && TYPE_FIELDS (type)) -- { -- if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) -- return 64; -- if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) -- return 128; -- } -- else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE -- || TREE_CODE (type) == INTEGER_TYPE) -- { -- if (TYPE_MODE (type) == DFmode && align < 64) -- return 64; -- if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) -- return 128; -- } -- -- return align; --} -- --/* Compute the alignment for a local variable or a stack slot. EXP is -- the data type or decl itself, MODE is the widest mode available and -- ALIGN is the alignment that the object would ordinarily have. The -- value of this macro is used instead of that alignment to align the -- object. */ -- --unsigned int --ix86_local_alignment (tree exp, machine_mode mode, -- unsigned int align) --{ -- tree type, decl; -- -- if (exp && DECL_P (exp)) -- { -- type = TREE_TYPE (exp); -- decl = exp; -- } -- else -- { -- type = exp; -- decl = NULL; -- } -- -- /* Don't do dynamic stack realignment for long long objects with -- -mpreferred-stack-boundary=2. */ -- if (!TARGET_64BIT -- && align == 64 -- && ix86_preferred_stack_boundary < 64 -- && (mode == DImode || (type && TYPE_MODE (type) == DImode)) -- && (!type || !TYPE_USER_ALIGN (type)) -- && (!decl || !DECL_USER_ALIGN (decl))) -- align = 32; -- -- /* If TYPE is NULL, we are allocating a stack slot for caller-save -- register in MODE. We will return the largest alignment of XF -- and DF. */ -- if (!type) -- { -- if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode)) -- align = GET_MODE_ALIGNMENT (DFmode); -- return align; -- } -- -- /* Don't increase alignment for Intel MCU psABI. */ -- if (TARGET_IAMCU) -- return align; -- -- /* x86-64 ABI requires arrays greater than 16 bytes to be aligned -- to 16byte boundary. Exact wording is: -- -- An array uses the same alignment as its elements, except that a local or -- global array variable of length at least 16 bytes or -- a C99 variable-length array variable always has alignment of at least 16 bytes. -- -- This was added to allow use of aligned SSE instructions at arrays. This -- rule is meant for static storage (where compiler cannot do the analysis -- by itself). We follow it for automatic variables only when convenient. -- We fully control everything in the function compiled and functions from -- other unit cannot rely on the alignment. -- -- Exclude va_list type. It is the common case of local array where -- we cannot benefit from the alignment. -- -- TODO: Probably one should optimize for size only when var is not escaping. */ -- if (TARGET_64BIT && optimize_function_for_speed_p (cfun) -- && TARGET_SSE) -- { -- if (AGGREGATE_TYPE_P (type) -- && (va_list_type_node == NULL_TREE -- || (TYPE_MAIN_VARIANT (type) -- != TYPE_MAIN_VARIANT (va_list_type_node))) -- && TYPE_SIZE (type) -- && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST -- && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128) -- && align < 128) -- return 128; -- } -- if (TREE_CODE (type) == ARRAY_TYPE) -- { -- if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) -- return 64; -- if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) -- return 128; -- } -- else if (TREE_CODE (type) == COMPLEX_TYPE) -- { -- if (TYPE_MODE (type) == DCmode && align < 64) -- return 64; -- if ((TYPE_MODE (type) == XCmode -- || TYPE_MODE (type) == TCmode) && align < 128) -- return 128; -- } -- else if ((TREE_CODE (type) == RECORD_TYPE -- || TREE_CODE (type) == UNION_TYPE -- || TREE_CODE (type) == QUAL_UNION_TYPE) -- && TYPE_FIELDS (type)) -- { -- if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) -- return 64; -- if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) -- return 128; -- } -- else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE -- || TREE_CODE (type) == INTEGER_TYPE) -- { -- -- if (TYPE_MODE (type) == DFmode && align < 64) -- return 64; -- if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) -- return 128; -- } -- return align; --} -- --/* Compute the minimum required alignment for dynamic stack realignment -- purposes for a local variable, parameter or a stack slot. EXP is -- the data type or decl itself, MODE is its mode and ALIGN is the -- alignment that the object would ordinarily have. */ -- --unsigned int --ix86_minimum_alignment (tree exp, machine_mode mode, -- unsigned int align) --{ -- tree type, decl; -- -- if (exp && DECL_P (exp)) -- { -- type = TREE_TYPE (exp); -- decl = exp; -- } -- else -- { -- type = exp; -- decl = NULL; -- } -- -- if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64) -- return align; -- -- /* Don't do dynamic stack realignment for long long objects with -- -mpreferred-stack-boundary=2. */ -- if ((mode == DImode || (type && TYPE_MODE (type) == DImode)) -- && (!type || !TYPE_USER_ALIGN (type)) -- && (!decl || !DECL_USER_ALIGN (decl))) -- { -- gcc_checking_assert (!TARGET_STV); -- return 32; -- } -- -- return align; --} -- --/* Find a location for the static chain incoming to a nested function. -- This is a register, unless all free registers are used by arguments. */ -- --static rtx --ix86_static_chain (const_tree fndecl_or_type, bool incoming_p) --{ -- unsigned regno; -- -- if (TARGET_64BIT) -- { -- /* We always use R10 in 64-bit mode. */ -- regno = R10_REG; -- } -- else -- { -- const_tree fntype, fndecl; -- unsigned int ccvt; -- -- /* By default in 32-bit mode we use ECX to pass the static chain. */ -- regno = CX_REG; -- -- if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL) -- { -- fntype = TREE_TYPE (fndecl_or_type); -- fndecl = fndecl_or_type; -- } -- else -- { -- fntype = fndecl_or_type; -- fndecl = NULL; -- } -- -- ccvt = ix86_get_callcvt (fntype); -- if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) -- { -- /* Fastcall functions use ecx/edx for arguments, which leaves -- us with EAX for the static chain. -- Thiscall functions use ecx for arguments, which also -- leaves us with EAX for the static chain. */ -- regno = AX_REG; -- } -- else if ((ccvt & IX86_CALLCVT_THISCALL) != 0) -- { -- /* Thiscall functions use ecx for arguments, which leaves -- us with EAX and EDX for the static chain. -- We are using for abi-compatibility EAX. */ -- regno = AX_REG; -- } -- else if (ix86_function_regparm (fntype, fndecl) == 3) -- { -- /* For regparm 3, we have no free call-clobbered registers in -- which to store the static chain. In order to implement this, -- we have the trampoline push the static chain to the stack. -- However, we can't push a value below the return address when -- we call the nested function directly, so we have to use an -- alternate entry point. For this we use ESI, and have the -- alternate entry point push ESI, so that things appear the -- same once we're executing the nested function. */ -- if (incoming_p) -- { -- if (fndecl == current_function_decl -- && !ix86_static_chain_on_stack) -- { -- gcc_assert (!reload_completed); -- ix86_static_chain_on_stack = true; -- } -- return gen_frame_mem (SImode, -- plus_constant (Pmode, -- arg_pointer_rtx, -8)); -- } -- regno = SI_REG; -- } -- } -- -- return gen_rtx_REG (Pmode, regno); --} -- --/* Emit RTL insns to initialize the variable parts of a trampoline. -- FNDECL is the decl of the target address; M_TRAMP is a MEM for -- the trampoline, and CHAIN_VALUE is an RTX for the static chain -- to be passed to the target function. */ -- --static void --ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) --{ -- rtx mem, fnaddr; -- int opcode; -- int offset = 0; -- bool need_endbr = (flag_cf_protection & CF_BRANCH); -- -- fnaddr = XEXP (DECL_RTL (fndecl), 0); -- -- if (TARGET_64BIT) -- { -- int size; -- -- if (need_endbr) -- { -- /* Insert ENDBR64. */ -- mem = adjust_address (m_tramp, SImode, offset); -- emit_move_insn (mem, gen_int_mode (0xfa1e0ff3, SImode)); -- offset += 4; -- } -- -- /* Load the function address to r11. Try to load address using -- the shorter movl instead of movabs. We may want to support -- movq for kernel mode, but kernel does not use trampolines at -- the moment. FNADDR is a 32bit address and may not be in -- DImode when ptr_mode == SImode. Always use movl in this -- case. */ -- if (ptr_mode == SImode -- || x86_64_zext_immediate_operand (fnaddr, VOIDmode)) -- { -- fnaddr = copy_addr_to_reg (fnaddr); -- -- mem = adjust_address (m_tramp, HImode, offset); -- emit_move_insn (mem, gen_int_mode (0xbb41, HImode)); -- -- mem = adjust_address (m_tramp, SImode, offset + 2); -- emit_move_insn (mem, gen_lowpart (SImode, fnaddr)); -- offset += 6; -- } -- else -- { -- mem = adjust_address (m_tramp, HImode, offset); -- emit_move_insn (mem, gen_int_mode (0xbb49, HImode)); -- -- mem = adjust_address (m_tramp, DImode, offset + 2); -- emit_move_insn (mem, fnaddr); -- offset += 10; -- } -- -- /* Load static chain using movabs to r10. Use the shorter movl -- instead of movabs when ptr_mode == SImode. */ -- if (ptr_mode == SImode) -- { -- opcode = 0xba41; -- size = 6; -- } -- else -- { -- opcode = 0xba49; -- size = 10; -- } -- -- mem = adjust_address (m_tramp, HImode, offset); -- emit_move_insn (mem, gen_int_mode (opcode, HImode)); -- -- mem = adjust_address (m_tramp, ptr_mode, offset + 2); -- emit_move_insn (mem, chain_value); -- offset += size; -- -- /* Jump to r11; the last (unused) byte is a nop, only there to -- pad the write out to a single 32-bit store. */ -- mem = adjust_address (m_tramp, SImode, offset); -- emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode)); -- offset += 4; -- } -- else -- { -- rtx disp, chain; -- -- /* Depending on the static chain location, either load a register -- with a constant, or push the constant to the stack. All of the -- instructions are the same size. */ -- chain = ix86_static_chain (fndecl, true); -- if (REG_P (chain)) -- { -- switch (REGNO (chain)) -- { -- case AX_REG: -- opcode = 0xb8; break; -- case CX_REG: -- opcode = 0xb9; break; -- default: -- gcc_unreachable (); -- } -- } -- else -- opcode = 0x68; -- -- if (need_endbr) -- { -- /* Insert ENDBR32. */ -- mem = adjust_address (m_tramp, SImode, offset); -- emit_move_insn (mem, gen_int_mode (0xfb1e0ff3, SImode)); -- offset += 4; -- } -- -- mem = adjust_address (m_tramp, QImode, offset); -- emit_move_insn (mem, gen_int_mode (opcode, QImode)); -- -- mem = adjust_address (m_tramp, SImode, offset + 1); -- emit_move_insn (mem, chain_value); -- offset += 5; -- -- mem = adjust_address (m_tramp, QImode, offset); -- emit_move_insn (mem, gen_int_mode (0xe9, QImode)); -- -- mem = adjust_address (m_tramp, SImode, offset + 1); -- -- /* Compute offset from the end of the jmp to the target function. -- In the case in which the trampoline stores the static chain on -- the stack, we need to skip the first insn which pushes the -- (call-saved) register static chain; this push is 1 byte. */ -- offset += 5; -- int skip = MEM_P (chain) ? 1 : 0; -- /* Skip ENDBR32 at the entry of the target function. */ -- if (need_endbr -- && !cgraph_node::get (fndecl)->only_called_directly_p ()) -- skip += 4; -- disp = expand_binop (SImode, sub_optab, fnaddr, -- plus_constant (Pmode, XEXP (m_tramp, 0), -- offset - skip), -- NULL_RTX, 1, OPTAB_DIRECT); -- emit_move_insn (mem, disp); -- } -- -- gcc_assert (offset <= TRAMPOLINE_SIZE); -- --#ifdef HAVE_ENABLE_EXECUTE_STACK --#ifdef CHECK_EXECUTE_STACK_ENABLED -- if (CHECK_EXECUTE_STACK_ENABLED) --#endif -- emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"), -- LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode); --#endif --} -- --static bool --ix86_allocate_stack_slots_for_args (void) --{ -- /* Naked functions should not allocate stack slots for arguments. */ -- return !ix86_function_naked (current_function_decl); --} -- --static bool --ix86_warn_func_return (tree decl) --{ -- /* Naked functions are implemented entirely in assembly, including the -- return sequence, so suppress warnings about this. */ -- return !ix86_function_naked (decl); --} -- --/* The following file contains several enumerations and data structures -- built from the definitions in i386-builtin-types.def. */ -- --#include "i386-builtin-types.inc" -- --/* Table for the ix86 builtin non-function types. */ --static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1]; -- --/* Retrieve an element from the above table, building some of -- the types lazily. */ -- --static tree --ix86_get_builtin_type (enum ix86_builtin_type tcode) --{ -- unsigned int index; -- tree type, itype; -- -- gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab)); -- -- type = ix86_builtin_type_tab[(int) tcode]; -- if (type != NULL) -- return type; -- -- gcc_assert (tcode > IX86_BT_LAST_PRIM); -- if (tcode <= IX86_BT_LAST_VECT) -- { -- machine_mode mode; -- -- index = tcode - IX86_BT_LAST_PRIM - 1; -- itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]); -- mode = ix86_builtin_type_vect_mode[index]; -- -- type = build_vector_type_for_mode (itype, mode); -- } -- else -- { -- int quals; -- -- index = tcode - IX86_BT_LAST_VECT - 1; -- if (tcode <= IX86_BT_LAST_PTR) -- quals = TYPE_UNQUALIFIED; -- else -- quals = TYPE_QUAL_CONST; -- -- itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]); -- if (quals != TYPE_UNQUALIFIED) -- itype = build_qualified_type (itype, quals); -- -- type = build_pointer_type (itype); -- } -- -- ix86_builtin_type_tab[(int) tcode] = type; -- return type; --} -- --/* Table for the ix86 builtin function types. */ --static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1]; -- --/* Retrieve an element from the above table, building some of -- the types lazily. */ -- --static tree --ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode) --{ -- tree type; -- -- gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab)); -- -- type = ix86_builtin_func_type_tab[(int) tcode]; -- if (type != NULL) -- return type; -- -- if (tcode <= IX86_BT_LAST_FUNC) -- { -- unsigned start = ix86_builtin_func_start[(int) tcode]; -- unsigned after = ix86_builtin_func_start[(int) tcode + 1]; -- tree rtype, atype, args = void_list_node; -- unsigned i; -- -- rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]); -- for (i = after - 1; i > start; --i) -- { -- atype = ix86_get_builtin_type (ix86_builtin_func_args[i]); -- args = tree_cons (NULL, atype, args); -- } -- -- type = build_function_type (rtype, args); -- } -- else -- { -- unsigned index = tcode - IX86_BT_LAST_FUNC - 1; -- enum ix86_builtin_func_type icode; -- -- icode = ix86_builtin_func_alias_base[index]; -- type = ix86_get_builtin_func_type (icode); -- } -- -- ix86_builtin_func_type_tab[(int) tcode] = type; -- return type; --} -- -- --/* Codes for all the SSE/MMX builtins. Builtins not mentioned in any -- bdesc_* arrays below should come first, then builtins for each bdesc_* -- array in ascending order, so that we can use direct array accesses. */ --enum ix86_builtins --{ -- IX86_BUILTIN_MASKMOVQ, -- IX86_BUILTIN_LDMXCSR, -- IX86_BUILTIN_STMXCSR, -- IX86_BUILTIN_MASKMOVDQU, -- IX86_BUILTIN_PSLLDQ128, -- IX86_BUILTIN_CLFLUSH, -- IX86_BUILTIN_MONITOR, -- IX86_BUILTIN_MWAIT, -- IX86_BUILTIN_UMONITOR, -- IX86_BUILTIN_UMWAIT, -- IX86_BUILTIN_TPAUSE, -- IX86_BUILTIN_CLZERO, -- IX86_BUILTIN_CLDEMOTE, -- IX86_BUILTIN_VEC_INIT_V2SI, -- IX86_BUILTIN_VEC_INIT_V4HI, -- IX86_BUILTIN_VEC_INIT_V8QI, -- IX86_BUILTIN_VEC_EXT_V2DF, -- IX86_BUILTIN_VEC_EXT_V2DI, -- IX86_BUILTIN_VEC_EXT_V4SF, -- IX86_BUILTIN_VEC_EXT_V4SI, -- IX86_BUILTIN_VEC_EXT_V8HI, -- IX86_BUILTIN_VEC_EXT_V2SI, -- IX86_BUILTIN_VEC_EXT_V4HI, -- IX86_BUILTIN_VEC_EXT_V16QI, -- IX86_BUILTIN_VEC_SET_V2DI, -- IX86_BUILTIN_VEC_SET_V4SF, -- IX86_BUILTIN_VEC_SET_V4SI, -- IX86_BUILTIN_VEC_SET_V8HI, -- IX86_BUILTIN_VEC_SET_V4HI, -- IX86_BUILTIN_VEC_SET_V16QI, -- IX86_BUILTIN_GATHERSIV2DF, -- IX86_BUILTIN_GATHERSIV4DF, -- IX86_BUILTIN_GATHERDIV2DF, -- IX86_BUILTIN_GATHERDIV4DF, -- IX86_BUILTIN_GATHERSIV4SF, -- IX86_BUILTIN_GATHERSIV8SF, -- IX86_BUILTIN_GATHERDIV4SF, -- IX86_BUILTIN_GATHERDIV8SF, -- IX86_BUILTIN_GATHERSIV2DI, -- IX86_BUILTIN_GATHERSIV4DI, -- IX86_BUILTIN_GATHERDIV2DI, -- IX86_BUILTIN_GATHERDIV4DI, -- IX86_BUILTIN_GATHERSIV4SI, -- IX86_BUILTIN_GATHERSIV8SI, -- IX86_BUILTIN_GATHERDIV4SI, -- IX86_BUILTIN_GATHERDIV8SI, -- IX86_BUILTIN_GATHER3SIV8SF, -- IX86_BUILTIN_GATHER3SIV4SF, -- IX86_BUILTIN_GATHER3SIV4DF, -- IX86_BUILTIN_GATHER3SIV2DF, -- IX86_BUILTIN_GATHER3DIV8SF, -- IX86_BUILTIN_GATHER3DIV4SF, -- IX86_BUILTIN_GATHER3DIV4DF, -- IX86_BUILTIN_GATHER3DIV2DF, -- IX86_BUILTIN_GATHER3SIV8SI, -- IX86_BUILTIN_GATHER3SIV4SI, -- IX86_BUILTIN_GATHER3SIV4DI, -- IX86_BUILTIN_GATHER3SIV2DI, -- IX86_BUILTIN_GATHER3DIV8SI, -- IX86_BUILTIN_GATHER3DIV4SI, -- IX86_BUILTIN_GATHER3DIV4DI, -- IX86_BUILTIN_GATHER3DIV2DI, -- IX86_BUILTIN_SCATTERSIV8SF, -- IX86_BUILTIN_SCATTERSIV4SF, -- IX86_BUILTIN_SCATTERSIV4DF, -- IX86_BUILTIN_SCATTERSIV2DF, -- IX86_BUILTIN_SCATTERDIV8SF, -- IX86_BUILTIN_SCATTERDIV4SF, -- IX86_BUILTIN_SCATTERDIV4DF, -- IX86_BUILTIN_SCATTERDIV2DF, -- IX86_BUILTIN_SCATTERSIV8SI, -- IX86_BUILTIN_SCATTERSIV4SI, -- IX86_BUILTIN_SCATTERSIV4DI, -- IX86_BUILTIN_SCATTERSIV2DI, -- IX86_BUILTIN_SCATTERDIV8SI, -- IX86_BUILTIN_SCATTERDIV4SI, -- IX86_BUILTIN_SCATTERDIV4DI, -- IX86_BUILTIN_SCATTERDIV2DI, -- /* Alternate 4 and 8 element gather/scatter for the vectorizer -- where all operands are 32-byte or 64-byte wide respectively. */ -- IX86_BUILTIN_GATHERALTSIV4DF, -- IX86_BUILTIN_GATHERALTDIV8SF, -- IX86_BUILTIN_GATHERALTSIV4DI, -- IX86_BUILTIN_GATHERALTDIV8SI, -- IX86_BUILTIN_GATHER3ALTDIV16SF, -- IX86_BUILTIN_GATHER3ALTDIV16SI, -- IX86_BUILTIN_GATHER3ALTSIV4DF, -- IX86_BUILTIN_GATHER3ALTDIV8SF, -- IX86_BUILTIN_GATHER3ALTSIV4DI, -- IX86_BUILTIN_GATHER3ALTDIV8SI, -- IX86_BUILTIN_GATHER3ALTSIV8DF, -- IX86_BUILTIN_GATHER3ALTSIV8DI, -- IX86_BUILTIN_GATHER3DIV16SF, -- IX86_BUILTIN_GATHER3DIV16SI, -- IX86_BUILTIN_GATHER3DIV8DF, -- IX86_BUILTIN_GATHER3DIV8DI, -- IX86_BUILTIN_GATHER3SIV16SF, -- IX86_BUILTIN_GATHER3SIV16SI, -- IX86_BUILTIN_GATHER3SIV8DF, -- IX86_BUILTIN_GATHER3SIV8DI, -- IX86_BUILTIN_SCATTERALTSIV8DF, -- IX86_BUILTIN_SCATTERALTDIV16SF, -- IX86_BUILTIN_SCATTERALTSIV8DI, -- IX86_BUILTIN_SCATTERALTDIV16SI, -- IX86_BUILTIN_SCATTERALTSIV4DF, -- IX86_BUILTIN_SCATTERALTDIV8SF, -- IX86_BUILTIN_SCATTERALTSIV4DI, -- IX86_BUILTIN_SCATTERALTDIV8SI, -- IX86_BUILTIN_SCATTERALTSIV2DF, -- IX86_BUILTIN_SCATTERALTDIV4SF, -- IX86_BUILTIN_SCATTERALTSIV2DI, -- IX86_BUILTIN_SCATTERALTDIV4SI, -- IX86_BUILTIN_SCATTERDIV16SF, -- IX86_BUILTIN_SCATTERDIV16SI, -- IX86_BUILTIN_SCATTERDIV8DF, -- IX86_BUILTIN_SCATTERDIV8DI, -- IX86_BUILTIN_SCATTERSIV16SF, -- IX86_BUILTIN_SCATTERSIV16SI, -- IX86_BUILTIN_SCATTERSIV8DF, -- IX86_BUILTIN_SCATTERSIV8DI, -- IX86_BUILTIN_GATHERPFQPD, -- IX86_BUILTIN_GATHERPFDPS, -- IX86_BUILTIN_GATHERPFDPD, -- IX86_BUILTIN_GATHERPFQPS, -- IX86_BUILTIN_SCATTERPFDPD, -- IX86_BUILTIN_SCATTERPFDPS, -- IX86_BUILTIN_SCATTERPFQPD, -- IX86_BUILTIN_SCATTERPFQPS, -- IX86_BUILTIN_CLWB, -- IX86_BUILTIN_CLFLUSHOPT, -- IX86_BUILTIN_INFQ, -- IX86_BUILTIN_HUGE_VALQ, -- IX86_BUILTIN_NANQ, -- IX86_BUILTIN_NANSQ, -- IX86_BUILTIN_XABORT, -- IX86_BUILTIN_ADDCARRYX32, -- IX86_BUILTIN_ADDCARRYX64, -- IX86_BUILTIN_SBB32, -- IX86_BUILTIN_SBB64, -- IX86_BUILTIN_RDRAND16_STEP, -- IX86_BUILTIN_RDRAND32_STEP, -- IX86_BUILTIN_RDRAND64_STEP, -- IX86_BUILTIN_RDSEED16_STEP, -- IX86_BUILTIN_RDSEED32_STEP, -- IX86_BUILTIN_RDSEED64_STEP, -- IX86_BUILTIN_MONITORX, -- IX86_BUILTIN_MWAITX, -- IX86_BUILTIN_CFSTRING, -- IX86_BUILTIN_CPU_INIT, -- IX86_BUILTIN_CPU_IS, -- IX86_BUILTIN_CPU_SUPPORTS, -- IX86_BUILTIN_READ_FLAGS, -- IX86_BUILTIN_WRITE_FLAGS, -- -- /* All the remaining builtins are tracked in bdesc_* arrays in -- i386-builtin.def. Don't add any IX86_BUILTIN_* enumerators after -- this point. */ --#define BDESC(mask, mask2, icode, name, code, comparison, flag) \ -- code, --#define BDESC_FIRST(kind, kindu, mask, mask2, icode, name, code, comparison, flag) \ -- code, \ -- IX86_BUILTIN__BDESC_##kindu##_FIRST = code, --#define BDESC_END(kind, next_kind) -- --#include "i386-builtin.def" -- --#undef BDESC --#undef BDESC_FIRST --#undef BDESC_END -- -- IX86_BUILTIN_MAX, -- -- IX86_BUILTIN__BDESC_MAX_FIRST = IX86_BUILTIN_MAX, -- -- /* Now just the aliases for bdesc_* start/end. */ --#define BDESC(mask, mask2, icode, name, code, comparison, flag) --#define BDESC_FIRST(kind, kindu, mask, mask2, icode, name, code, comparison, flag) --#define BDESC_END(kind, next_kind) \ -- IX86_BUILTIN__BDESC_##kind##_LAST \ -- = IX86_BUILTIN__BDESC_##next_kind##_FIRST - 1, -- --#include "i386-builtin.def" -- --#undef BDESC --#undef BDESC_FIRST --#undef BDESC_END -- -- /* Just to make sure there is no comma after the last enumerator. */ -- IX86_BUILTIN__BDESC_MAX_LAST = IX86_BUILTIN__BDESC_MAX_FIRST --}; -- --/* Table for the ix86 builtin decls. */ --static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX]; -- --/* Table of all of the builtin functions that are possible with different ISA's -- but are waiting to be built until a function is declared to use that -- ISA. */ --struct builtin_isa { -- HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */ -- HOST_WIDE_INT isa2; /* additional isa_flags this builtin is defined for */ -- const char *name; /* function name */ -- enum ix86_builtin_func_type tcode; /* type to use in the declaration */ -- unsigned char const_p:1; /* true if the declaration is constant */ -- unsigned char pure_p:1; /* true if the declaration has pure attribute */ -- bool set_and_not_built_p; --}; -- --static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX]; -- --/* Bits that can still enable any inclusion of a builtin. */ --static HOST_WIDE_INT deferred_isa_values = 0; --static HOST_WIDE_INT deferred_isa_values2 = 0; -- --/* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the -- MASK and MASK2 of which isa_flags and ix86_isa_flags2 to use in the -- ix86_builtins_isa array. Stores the function decl in the ix86_builtins -- array. Returns the function decl or NULL_TREE, if the builtin was not -- added. -- -- If the front end has a special hook for builtin functions, delay adding -- builtin functions that aren't in the current ISA until the ISA is changed -- with function specific optimization. Doing so, can save about 300K for the -- default compiler. When the builtin is expanded, check at that time whether -- it is valid. -- -- If the front end doesn't have a special hook, record all builtins, even if -- it isn't an instruction set in the current ISA in case the user uses -- function specific options for a different ISA, so that we don't get scope -- errors if a builtin is added in the middle of a function scope. */ -- --static inline tree --def_builtin (HOST_WIDE_INT mask, HOST_WIDE_INT mask2, -- const char *name, -- enum ix86_builtin_func_type tcode, -- enum ix86_builtins code) --{ -- tree decl = NULL_TREE; -- -- /* An instruction may be 64bit only regardless of ISAs. */ -- if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT) -- { -- ix86_builtins_isa[(int) code].isa = mask; -- ix86_builtins_isa[(int) code].isa2 = mask2; -- -- mask &= ~OPTION_MASK_ISA_64BIT; -- -- /* Filter out the masks most often ored together with others. */ -- if ((mask & ix86_isa_flags & OPTION_MASK_ISA_AVX512VL) -- && mask != OPTION_MASK_ISA_AVX512VL) -- mask &= ~OPTION_MASK_ISA_AVX512VL; -- if ((mask & ix86_isa_flags & OPTION_MASK_ISA_AVX512BW) -- && mask != OPTION_MASK_ISA_AVX512BW) -- mask &= ~OPTION_MASK_ISA_AVX512BW; -- -- if (((mask2 == 0 || (mask2 & ix86_isa_flags2) != 0) -- && (mask == 0 || (mask & ix86_isa_flags) != 0)) -- || (lang_hooks.builtin_function -- == lang_hooks.builtin_function_ext_scope)) -- { -- tree type = ix86_get_builtin_func_type (tcode); -- decl = add_builtin_function (name, type, code, BUILT_IN_MD, -- NULL, NULL_TREE); -- ix86_builtins[(int) code] = decl; -- ix86_builtins_isa[(int) code].set_and_not_built_p = false; -- } -- else -- { -- /* Just MASK and MASK2 where set_and_not_built_p == true can potentially -- include a builtin. */ -- deferred_isa_values |= mask; -- deferred_isa_values2 |= mask2; -- ix86_builtins[(int) code] = NULL_TREE; -- ix86_builtins_isa[(int) code].tcode = tcode; -- ix86_builtins_isa[(int) code].name = name; -- ix86_builtins_isa[(int) code].const_p = false; -- ix86_builtins_isa[(int) code].pure_p = false; -- ix86_builtins_isa[(int) code].set_and_not_built_p = true; -- } -- } -- -- return decl; --} -- --/* Like def_builtin, but also marks the function decl "const". */ -- --static inline tree --def_builtin_const (HOST_WIDE_INT mask, HOST_WIDE_INT mask2, const char *name, -- enum ix86_builtin_func_type tcode, enum ix86_builtins code) --{ -- tree decl = def_builtin (mask, mask2, name, tcode, code); -- if (decl) -- TREE_READONLY (decl) = 1; -- else -- ix86_builtins_isa[(int) code].const_p = true; -- -- return decl; --} -- --/* Like def_builtin, but also marks the function decl "pure". */ -- --static inline tree --def_builtin_pure (HOST_WIDE_INT mask, HOST_WIDE_INT mask2, const char *name, -- enum ix86_builtin_func_type tcode, enum ix86_builtins code) --{ -- tree decl = def_builtin (mask, mask2, name, tcode, code); -- if (decl) -- DECL_PURE_P (decl) = 1; -- else -- ix86_builtins_isa[(int) code].pure_p = true; -- -- return decl; --} -- --/* Add any new builtin functions for a given ISA that may not have been -- declared. This saves a bit of space compared to adding all of the -- declarations to the tree, even if we didn't use them. */ -- --static void --ix86_add_new_builtins (HOST_WIDE_INT isa, HOST_WIDE_INT isa2) --{ -- isa &= ~OPTION_MASK_ISA_64BIT; -- -- if ((isa & deferred_isa_values) == 0 -- && (isa2 & deferred_isa_values2) == 0) -- return; -- -- /* Bits in ISA value can be removed from potential isa values. */ -- deferred_isa_values &= ~isa; -- deferred_isa_values2 &= ~isa2; -- -- int i; -- tree saved_current_target_pragma = current_target_pragma; -- current_target_pragma = NULL_TREE; -- -- for (i = 0; i < (int)IX86_BUILTIN_MAX; i++) -- { -- if (((ix86_builtins_isa[i].isa & isa) != 0 -- || (ix86_builtins_isa[i].isa2 & isa2) != 0) -- && ix86_builtins_isa[i].set_and_not_built_p) -- { -- tree decl, type; -- -- /* Don't define the builtin again. */ -- ix86_builtins_isa[i].set_and_not_built_p = false; -- -- type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode); -- decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name, -- type, i, BUILT_IN_MD, NULL, -- NULL_TREE); -- -- ix86_builtins[i] = decl; -- if (ix86_builtins_isa[i].const_p) -- TREE_READONLY (decl) = 1; -- } -- } -- -- current_target_pragma = saved_current_target_pragma; --} -- --/* Bits for builtin_description.flag. */ -- --/* Set when we don't support the comparison natively, and should -- swap_comparison in order to support it. */ --#define BUILTIN_DESC_SWAP_OPERANDS 1 -- --struct builtin_description --{ -- const HOST_WIDE_INT mask; -- const HOST_WIDE_INT mask2; -- const enum insn_code icode; -- const char *const name; -- const enum ix86_builtins code; -- const enum rtx_code comparison; -- const int flag; --}; -- --#define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT --#define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT --#define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT --#define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT --#define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF --#define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF --#define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF --#define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF --#define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI --#define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI --#define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI --#define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI --#define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI --#define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI --#define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI --#define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI --#define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI --#define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI --#define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF --#define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF --#define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI --#define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI --#define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI --#define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI --#define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI --#define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI --#define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI --#define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI --#define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP --#define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP --#define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP --#define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP --#define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF --#define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF --#define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF --#define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF --#define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF --#define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF --#define MULTI_ARG_1_SF V4SF_FTYPE_V4SF --#define MULTI_ARG_1_DF V2DF_FTYPE_V2DF --#define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF --#define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF --#define MULTI_ARG_1_DI V2DI_FTYPE_V2DI --#define MULTI_ARG_1_SI V4SI_FTYPE_V4SI --#define MULTI_ARG_1_HI V8HI_FTYPE_V8HI --#define MULTI_ARG_1_QI V16QI_FTYPE_V16QI --#define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI --#define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI --#define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI --#define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI --#define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI --#define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI -- --#define BDESC(mask, mask2, icode, name, code, comparison, flag) \ -- { mask, mask2, icode, name, code, comparison, flag }, --#define BDESC_FIRST(kind, kindu, mask, mask2, icode, name, code, comparison, flag) \ --static const struct builtin_description bdesc_##kind[] = \ --{ \ -- BDESC (mask, mask2, icode, name, code, comparison, flag) --#define BDESC_END(kind, next_kind) \ --}; -- --#include "i386-builtin.def" -- --#undef BDESC --#undef BDESC_FIRST --#undef BDESC_END -- -- --/* TM vector builtins. */ -- --/* Reuse the existing x86-specific `struct builtin_description' cause -- we're lazy. Add casts to make them fit. */ --static const struct builtin_description bdesc_tm[] = --{ -- { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI }, -- { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI }, -- { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI }, -- { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI }, -- { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI }, -- { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI }, -- { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI }, -- -- { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF }, -- { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF }, -- { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF }, -- { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF }, -- { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF }, -- { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF }, -- { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF }, -- -- { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF }, -- { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF }, -- { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF }, -- { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF }, -- { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF }, -- { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF }, -- { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF }, -- -- { OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID }, -- { OPTION_MASK_ISA_SSE, 0, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID }, -- { OPTION_MASK_ISA_AVX, 0, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID }, --}; -- --/* Initialize the transactional memory vector load/store builtins. */ -- --static void --ix86_init_tm_builtins (void) --{ -- enum ix86_builtin_func_type ftype; -- const struct builtin_description *d; -- size_t i; -- tree decl; -- tree attrs_load, attrs_type_load, attrs_store, attrs_type_store; -- tree attrs_log, attrs_type_log; -- -- if (!flag_tm) -- return; -- -- /* If there are no builtins defined, we must be compiling in a -- language without trans-mem support. */ -- if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1)) -- return; -- -- /* Use whatever attributes a normal TM load has. */ -- decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1); -- attrs_load = DECL_ATTRIBUTES (decl); -- attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl)); -- /* Use whatever attributes a normal TM store has. */ -- decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1); -- attrs_store = DECL_ATTRIBUTES (decl); -- attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl)); -- /* Use whatever attributes a normal TM log has. */ -- decl = builtin_decl_explicit (BUILT_IN_TM_LOG); -- attrs_log = DECL_ATTRIBUTES (decl); -- attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl)); -- -- for (i = 0, d = bdesc_tm; -- i < ARRAY_SIZE (bdesc_tm); -- i++, d++) -- { -- if ((d->mask & ix86_isa_flags) != 0 -- || (lang_hooks.builtin_function -- == lang_hooks.builtin_function_ext_scope)) -- { -- tree type, attrs, attrs_type; -- enum built_in_function code = (enum built_in_function) d->code; -- -- ftype = (enum ix86_builtin_func_type) d->flag; -- type = ix86_get_builtin_func_type (ftype); -- -- if (BUILTIN_TM_LOAD_P (code)) -- { -- attrs = attrs_load; -- attrs_type = attrs_type_load; -- } -- else if (BUILTIN_TM_STORE_P (code)) -- { -- attrs = attrs_store; -- attrs_type = attrs_type_store; -- } -- else -- { -- attrs = attrs_log; -- attrs_type = attrs_type_log; -- } -- decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL, -- /* The builtin without the prefix for -- calling it directly. */ -- d->name + strlen ("__builtin_"), -- attrs); -- /* add_builtin_function() will set the DECL_ATTRIBUTES, now -- set the TYPE_ATTRIBUTES. */ -- decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN); -- -- set_builtin_decl (code, decl, false); -- } -- } --} -- --/* Macros for verification of enum ix86_builtins order. */ --#define BDESC_VERIFY(x, y, z) \ -- gcc_checking_assert ((x) == (enum ix86_builtins) ((y) + (z))) --#define BDESC_VERIFYS(x, y, z) \ -- STATIC_ASSERT ((x) == (enum ix86_builtins) ((y) + (z))) -- --BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPESTR_FIRST, -- IX86_BUILTIN__BDESC_COMI_LAST, 1); --BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPISTR_FIRST, -- IX86_BUILTIN__BDESC_PCMPESTR_LAST, 1); --BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST, -- IX86_BUILTIN__BDESC_PCMPISTR_LAST, 1); --BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS_FIRST, -- IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST, 1); --BDESC_VERIFYS (IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST, -- IX86_BUILTIN__BDESC_ARGS_LAST, 1); --BDESC_VERIFYS (IX86_BUILTIN__BDESC_MULTI_ARG_FIRST, -- IX86_BUILTIN__BDESC_ROUND_ARGS_LAST, 1); --BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_FIRST, -- IX86_BUILTIN__BDESC_MULTI_ARG_LAST, 1); --BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_NORMAL_FIRST, -- IX86_BUILTIN__BDESC_CET_LAST, 1); --BDESC_VERIFYS (IX86_BUILTIN_MAX, -- IX86_BUILTIN__BDESC_CET_NORMAL_LAST, 1); -- --/* Set up all the MMX/SSE builtins, even builtins for instructions that are not -- in the current target ISA to allow the user to compile particular modules -- with different target specific options that differ from the command line -- options. */ --static void --ix86_init_mmx_sse_builtins (void) --{ -- const struct builtin_description * d; -- enum ix86_builtin_func_type ftype; -- size_t i; -- -- /* Add all special builtins with variable number of operands. */ -- for (i = 0, d = bdesc_special_args; -- i < ARRAY_SIZE (bdesc_special_args); -- i++, d++) -- { -- BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST, i); -- if (d->name == 0) -- continue; -- -- ftype = (enum ix86_builtin_func_type) d->flag; -- def_builtin (d->mask, d->mask2, d->name, ftype, d->code); -- } -- BDESC_VERIFYS (IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST, -- IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST, -- ARRAY_SIZE (bdesc_special_args) - 1); -- -- /* Add all builtins with variable number of operands. */ -- for (i = 0, d = bdesc_args; -- i < ARRAY_SIZE (bdesc_args); -- i++, d++) -- { -- BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_ARGS_FIRST, i); -- if (d->name == 0) -- continue; -- -- ftype = (enum ix86_builtin_func_type) d->flag; -- def_builtin_const (d->mask, d->mask2, d->name, ftype, d->code); -- } -- BDESC_VERIFYS (IX86_BUILTIN__BDESC_ARGS_LAST, -- IX86_BUILTIN__BDESC_ARGS_FIRST, -- ARRAY_SIZE (bdesc_args) - 1); -- -- /* Add all builtins with rounding. */ -- for (i = 0, d = bdesc_round_args; -- i < ARRAY_SIZE (bdesc_round_args); -- i++, d++) -- { -- BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST, i); -- if (d->name == 0) -- continue; -- -- ftype = (enum ix86_builtin_func_type) d->flag; -- def_builtin_const (d->mask, d->mask2, d->name, ftype, d->code); -- } -- BDESC_VERIFYS (IX86_BUILTIN__BDESC_ROUND_ARGS_LAST, -- IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST, -- ARRAY_SIZE (bdesc_round_args) - 1); -- -- /* pcmpestr[im] insns. */ -- for (i = 0, d = bdesc_pcmpestr; -- i < ARRAY_SIZE (bdesc_pcmpestr); -- i++, d++) -- { -- BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_PCMPESTR_FIRST, i); -- if (d->code == IX86_BUILTIN_PCMPESTRM128) -- ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT; -- else -- ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT; -- def_builtin_const (d->mask, d->mask2, d->name, ftype, d->code); -- } -- BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPESTR_LAST, -- IX86_BUILTIN__BDESC_PCMPESTR_FIRST, -- ARRAY_SIZE (bdesc_pcmpestr) - 1); -- -- /* pcmpistr[im] insns. */ -- for (i = 0, d = bdesc_pcmpistr; -- i < ARRAY_SIZE (bdesc_pcmpistr); -- i++, d++) -- { -- BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_PCMPISTR_FIRST, i); -- if (d->code == IX86_BUILTIN_PCMPISTRM128) -- ftype = V16QI_FTYPE_V16QI_V16QI_INT; -- else -- ftype = INT_FTYPE_V16QI_V16QI_INT; -- def_builtin_const (d->mask, d->mask2, d->name, ftype, d->code); -- } -- BDESC_VERIFYS (IX86_BUILTIN__BDESC_PCMPISTR_LAST, -- IX86_BUILTIN__BDESC_PCMPISTR_FIRST, -- ARRAY_SIZE (bdesc_pcmpistr) - 1); -- -- /* comi/ucomi insns. */ -- for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++) -- { -- BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_COMI_FIRST, i); -- if (d->mask == OPTION_MASK_ISA_SSE2) -- ftype = INT_FTYPE_V2DF_V2DF; -- else -- ftype = INT_FTYPE_V4SF_V4SF; -- def_builtin_const (d->mask, d->mask2, d->name, ftype, d->code); -- } -- BDESC_VERIFYS (IX86_BUILTIN__BDESC_COMI_LAST, -- IX86_BUILTIN__BDESC_COMI_FIRST, -- ARRAY_SIZE (bdesc_comi) - 1); -- -- /* SSE */ -- def_builtin (OPTION_MASK_ISA_SSE, 0, "__builtin_ia32_ldmxcsr", -- VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR); -- def_builtin_pure (OPTION_MASK_ISA_SSE, 0, "__builtin_ia32_stmxcsr", -- UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR); -- -- /* SSE or 3DNow!A */ -- def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A -- /* As it uses V4HImode, we have to require -mmmx too. */ -- | OPTION_MASK_ISA_MMX, 0, -- "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR, -- IX86_BUILTIN_MASKMOVQ); -- -- /* SSE2 */ -- def_builtin (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_maskmovdqu", -- VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU); -- -- def_builtin (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_clflush", -- VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH); -- x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_mfence", -- VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE); -- -- /* SSE3. */ -- def_builtin (OPTION_MASK_ISA_SSE3, 0, "__builtin_ia32_monitor", -- VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR); -- def_builtin (OPTION_MASK_ISA_SSE3, 0, "__builtin_ia32_mwait", -- VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT); -- -- /* AES */ -- def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0, -- "__builtin_ia32_aesenc128", -- V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128); -- def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0, -- "__builtin_ia32_aesenclast128", -- V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128); -- def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0, -- "__builtin_ia32_aesdec128", -- V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128); -- def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0, -- "__builtin_ia32_aesdeclast128", -- V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128); -- def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0, -- "__builtin_ia32_aesimc128", -- V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128); -- def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0, -- "__builtin_ia32_aeskeygenassist128", -- V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128); -- -- /* PCLMUL */ -- def_builtin_const (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2, 0, -- "__builtin_ia32_pclmulqdq128", -- V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128); -- -- /* RDRND */ -- def_builtin (OPTION_MASK_ISA_RDRND, 0, "__builtin_ia32_rdrand16_step", -- INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP); -- def_builtin (OPTION_MASK_ISA_RDRND, 0, "__builtin_ia32_rdrand32_step", -- INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP); -- def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT, 0, -- "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG, -- IX86_BUILTIN_RDRAND64_STEP); -- -- /* AVX2 */ -- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gathersiv2df", -- V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT, -- IX86_BUILTIN_GATHERSIV2DF); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gathersiv4df", -- V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT, -- IX86_BUILTIN_GATHERSIV4DF); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatherdiv2df", -- V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT, -- IX86_BUILTIN_GATHERDIV2DF); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatherdiv4df", -- V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT, -- IX86_BUILTIN_GATHERDIV4DF); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gathersiv4sf", -- V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT, -- IX86_BUILTIN_GATHERSIV4SF); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gathersiv8sf", -- V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT, -- IX86_BUILTIN_GATHERSIV8SF); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatherdiv4sf", -- V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT, -- IX86_BUILTIN_GATHERDIV4SF); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatherdiv4sf256", -- V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT, -- IX86_BUILTIN_GATHERDIV8SF); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gathersiv2di", -- V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT, -- IX86_BUILTIN_GATHERSIV2DI); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gathersiv4di", -- V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT, -- IX86_BUILTIN_GATHERSIV4DI); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatherdiv2di", -- V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT, -- IX86_BUILTIN_GATHERDIV2DI); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatherdiv4di", -- V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT, -- IX86_BUILTIN_GATHERDIV4DI); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gathersiv4si", -- V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT, -- IX86_BUILTIN_GATHERSIV4SI); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gathersiv8si", -- V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT, -- IX86_BUILTIN_GATHERSIV8SI); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatherdiv4si", -- V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT, -- IX86_BUILTIN_GATHERDIV4SI); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatherdiv4si256", -- V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT, -- IX86_BUILTIN_GATHERDIV8SI); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatheraltsiv4df ", -- V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT, -- IX86_BUILTIN_GATHERALTSIV4DF); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatheraltdiv8sf ", -- V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT, -- IX86_BUILTIN_GATHERALTDIV8SF); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatheraltsiv4di ", -- V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT, -- IX86_BUILTIN_GATHERALTSIV4DI); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX2, 0, "__builtin_ia32_gatheraltdiv8si ", -- V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT, -- IX86_BUILTIN_GATHERALTDIV8SI); -- -- /* AVX512F */ -- def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gathersiv16sf", -- V16SF_FTYPE_V16SF_PCVOID_V16SI_HI_INT, -- IX86_BUILTIN_GATHER3SIV16SF); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gathersiv8df", -- V8DF_FTYPE_V8DF_PCVOID_V8SI_QI_INT, -- IX86_BUILTIN_GATHER3SIV8DF); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gatherdiv16sf", -- V8SF_FTYPE_V8SF_PCVOID_V8DI_QI_INT, -- IX86_BUILTIN_GATHER3DIV16SF); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gatherdiv8df", -- V8DF_FTYPE_V8DF_PCVOID_V8DI_QI_INT, -- IX86_BUILTIN_GATHER3DIV8DF); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gathersiv16si", -- V16SI_FTYPE_V16SI_PCVOID_V16SI_HI_INT, -- IX86_BUILTIN_GATHER3SIV16SI); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gathersiv8di", -- V8DI_FTYPE_V8DI_PCVOID_V8SI_QI_INT, -- IX86_BUILTIN_GATHER3SIV8DI); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gatherdiv16si", -- V8SI_FTYPE_V8SI_PCVOID_V8DI_QI_INT, -- IX86_BUILTIN_GATHER3DIV16SI); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gatherdiv8di", -- V8DI_FTYPE_V8DI_PCVOID_V8DI_QI_INT, -- IX86_BUILTIN_GATHER3DIV8DI); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gather3altsiv8df ", -- V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT, -- IX86_BUILTIN_GATHER3ALTSIV8DF); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gather3altdiv16sf ", -- V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT, -- IX86_BUILTIN_GATHER3ALTDIV16SF); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gather3altsiv8di ", -- V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT, -- IX86_BUILTIN_GATHER3ALTSIV8DI); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_gather3altdiv16si ", -- V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT, -- IX86_BUILTIN_GATHER3ALTDIV16SI); -- -- def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scattersiv16sf", -- VOID_FTYPE_PVOID_HI_V16SI_V16SF_INT, -- IX86_BUILTIN_SCATTERSIV16SF); -- -- def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scattersiv8df", -- VOID_FTYPE_PVOID_QI_V8SI_V8DF_INT, -- IX86_BUILTIN_SCATTERSIV8DF); -- -- def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatterdiv16sf", -- VOID_FTYPE_PVOID_QI_V8DI_V8SF_INT, -- IX86_BUILTIN_SCATTERDIV16SF); -- -- def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatterdiv8df", -- VOID_FTYPE_PVOID_QI_V8DI_V8DF_INT, -- IX86_BUILTIN_SCATTERDIV8DF); -- -- def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scattersiv16si", -- VOID_FTYPE_PVOID_HI_V16SI_V16SI_INT, -- IX86_BUILTIN_SCATTERSIV16SI); -- -- def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scattersiv8di", -- VOID_FTYPE_PVOID_QI_V8SI_V8DI_INT, -- IX86_BUILTIN_SCATTERSIV8DI); -- -- def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatterdiv16si", -- VOID_FTYPE_PVOID_QI_V8DI_V8SI_INT, -- IX86_BUILTIN_SCATTERDIV16SI); -- -- def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatterdiv8di", -- VOID_FTYPE_PVOID_QI_V8DI_V8DI_INT, -- IX86_BUILTIN_SCATTERDIV8DI); -- -- /* AVX512VL */ -- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3siv2df", -- V2DF_FTYPE_V2DF_PCVOID_V4SI_QI_INT, -- IX86_BUILTIN_GATHER3SIV2DF); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3siv4df", -- V4DF_FTYPE_V4DF_PCVOID_V4SI_QI_INT, -- IX86_BUILTIN_GATHER3SIV4DF); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3div2df", -- V2DF_FTYPE_V2DF_PCVOID_V2DI_QI_INT, -- IX86_BUILTIN_GATHER3DIV2DF); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3div4df", -- V4DF_FTYPE_V4DF_PCVOID_V4DI_QI_INT, -- IX86_BUILTIN_GATHER3DIV4DF); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3siv4sf", -- V4SF_FTYPE_V4SF_PCVOID_V4SI_QI_INT, -- IX86_BUILTIN_GATHER3SIV4SF); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3siv8sf", -- V8SF_FTYPE_V8SF_PCVOID_V8SI_QI_INT, -- IX86_BUILTIN_GATHER3SIV8SF); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3div4sf", -- V4SF_FTYPE_V4SF_PCVOID_V2DI_QI_INT, -- IX86_BUILTIN_GATHER3DIV4SF); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3div8sf", -- V4SF_FTYPE_V4SF_PCVOID_V4DI_QI_INT, -- IX86_BUILTIN_GATHER3DIV8SF); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3siv2di", -- V2DI_FTYPE_V2DI_PCVOID_V4SI_QI_INT, -- IX86_BUILTIN_GATHER3SIV2DI); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3siv4di", -- V4DI_FTYPE_V4DI_PCVOID_V4SI_QI_INT, -- IX86_BUILTIN_GATHER3SIV4DI); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3div2di", -- V2DI_FTYPE_V2DI_PCVOID_V2DI_QI_INT, -- IX86_BUILTIN_GATHER3DIV2DI); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3div4di", -- V4DI_FTYPE_V4DI_PCVOID_V4DI_QI_INT, -- IX86_BUILTIN_GATHER3DIV4DI); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3siv4si", -- V4SI_FTYPE_V4SI_PCVOID_V4SI_QI_INT, -- IX86_BUILTIN_GATHER3SIV4SI); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3siv8si", -- V8SI_FTYPE_V8SI_PCVOID_V8SI_QI_INT, -- IX86_BUILTIN_GATHER3SIV8SI); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3div4si", -- V4SI_FTYPE_V4SI_PCVOID_V2DI_QI_INT, -- IX86_BUILTIN_GATHER3DIV4SI); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3div8si", -- V4SI_FTYPE_V4SI_PCVOID_V4DI_QI_INT, -- IX86_BUILTIN_GATHER3DIV8SI); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3altsiv4df ", -- V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT, -- IX86_BUILTIN_GATHER3ALTSIV4DF); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3altdiv8sf ", -- V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT, -- IX86_BUILTIN_GATHER3ALTDIV8SF); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3altsiv4di ", -- V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT, -- IX86_BUILTIN_GATHER3ALTSIV4DI); -- -- def_builtin_pure (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_gather3altdiv8si ", -- V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT, -- IX86_BUILTIN_GATHER3ALTDIV8SI); -- -- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scattersiv8sf", -- VOID_FTYPE_PVOID_QI_V8SI_V8SF_INT, -- IX86_BUILTIN_SCATTERSIV8SF); -- -- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scattersiv4sf", -- VOID_FTYPE_PVOID_QI_V4SI_V4SF_INT, -- IX86_BUILTIN_SCATTERSIV4SF); -- -- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scattersiv4df", -- VOID_FTYPE_PVOID_QI_V4SI_V4DF_INT, -- IX86_BUILTIN_SCATTERSIV4DF); -- -- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scattersiv2df", -- VOID_FTYPE_PVOID_QI_V4SI_V2DF_INT, -- IX86_BUILTIN_SCATTERSIV2DF); -- -- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatterdiv8sf", -- VOID_FTYPE_PVOID_QI_V4DI_V4SF_INT, -- IX86_BUILTIN_SCATTERDIV8SF); -- -- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatterdiv4sf", -- VOID_FTYPE_PVOID_QI_V2DI_V4SF_INT, -- IX86_BUILTIN_SCATTERDIV4SF); -- -- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatterdiv4df", -- VOID_FTYPE_PVOID_QI_V4DI_V4DF_INT, -- IX86_BUILTIN_SCATTERDIV4DF); -- -- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatterdiv2df", -- VOID_FTYPE_PVOID_QI_V2DI_V2DF_INT, -- IX86_BUILTIN_SCATTERDIV2DF); -- -- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scattersiv8si", -- VOID_FTYPE_PVOID_QI_V8SI_V8SI_INT, -- IX86_BUILTIN_SCATTERSIV8SI); -- -- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scattersiv4si", -- VOID_FTYPE_PVOID_QI_V4SI_V4SI_INT, -- IX86_BUILTIN_SCATTERSIV4SI); -- -- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scattersiv4di", -- VOID_FTYPE_PVOID_QI_V4SI_V4DI_INT, -- IX86_BUILTIN_SCATTERSIV4DI); -- -- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scattersiv2di", -- VOID_FTYPE_PVOID_QI_V4SI_V2DI_INT, -- IX86_BUILTIN_SCATTERSIV2DI); -- -- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatterdiv8si", -- VOID_FTYPE_PVOID_QI_V4DI_V4SI_INT, -- IX86_BUILTIN_SCATTERDIV8SI); -- -- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatterdiv4si", -- VOID_FTYPE_PVOID_QI_V2DI_V4SI_INT, -- IX86_BUILTIN_SCATTERDIV4SI); -- -- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatterdiv4di", -- VOID_FTYPE_PVOID_QI_V4DI_V4DI_INT, -- IX86_BUILTIN_SCATTERDIV4DI); -- -- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatterdiv2di", -- VOID_FTYPE_PVOID_QI_V2DI_V2DI_INT, -- IX86_BUILTIN_SCATTERDIV2DI); -- -- def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatteraltsiv8df ", -- VOID_FTYPE_PDOUBLE_QI_V16SI_V8DF_INT, -- IX86_BUILTIN_SCATTERALTSIV8DF); -- -- def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatteraltdiv16sf ", -- VOID_FTYPE_PFLOAT_HI_V8DI_V16SF_INT, -- IX86_BUILTIN_SCATTERALTDIV16SF); -- -- def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatteraltsiv8di ", -- VOID_FTYPE_PLONGLONG_QI_V16SI_V8DI_INT, -- IX86_BUILTIN_SCATTERALTSIV8DI); -- -- def_builtin (OPTION_MASK_ISA_AVX512F, 0, "__builtin_ia32_scatteraltdiv16si ", -- VOID_FTYPE_PINT_HI_V8DI_V16SI_INT, -- IX86_BUILTIN_SCATTERALTDIV16SI); -- -- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatteraltsiv4df ", -- VOID_FTYPE_PDOUBLE_QI_V8SI_V4DF_INT, -- IX86_BUILTIN_SCATTERALTSIV4DF); -- -- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatteraltdiv8sf ", -- VOID_FTYPE_PFLOAT_QI_V4DI_V8SF_INT, -- IX86_BUILTIN_SCATTERALTDIV8SF); -- -- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatteraltsiv4di ", -- VOID_FTYPE_PLONGLONG_QI_V8SI_V4DI_INT, -- IX86_BUILTIN_SCATTERALTSIV4DI); -- -- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatteraltdiv8si ", -- VOID_FTYPE_PINT_QI_V4DI_V8SI_INT, -- IX86_BUILTIN_SCATTERALTDIV8SI); -- -- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatteraltsiv2df ", -- VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT, -- IX86_BUILTIN_SCATTERALTSIV2DF); -- -- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatteraltdiv4sf ", -- VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT, -- IX86_BUILTIN_SCATTERALTDIV4SF); -- -- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatteraltsiv2di ", -- VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT, -- IX86_BUILTIN_SCATTERALTSIV2DI); -- -- def_builtin (OPTION_MASK_ISA_AVX512VL, 0, "__builtin_ia32_scatteraltdiv4si ", -- VOID_FTYPE_PINT_QI_V2DI_V4SI_INT, -- IX86_BUILTIN_SCATTERALTDIV4SI); -- -- /* AVX512PF */ -- def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_gatherpfdpd", -- VOID_FTYPE_QI_V8SI_PCVOID_INT_INT, -- IX86_BUILTIN_GATHERPFDPD); -- def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_gatherpfdps", -- VOID_FTYPE_HI_V16SI_PCVOID_INT_INT, -- IX86_BUILTIN_GATHERPFDPS); -- def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_gatherpfqpd", -- VOID_FTYPE_QI_V8DI_PCVOID_INT_INT, -- IX86_BUILTIN_GATHERPFQPD); -- def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_gatherpfqps", -- VOID_FTYPE_QI_V8DI_PCVOID_INT_INT, -- IX86_BUILTIN_GATHERPFQPS); -- def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_scatterpfdpd", -- VOID_FTYPE_QI_V8SI_PCVOID_INT_INT, -- IX86_BUILTIN_SCATTERPFDPD); -- def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_scatterpfdps", -- VOID_FTYPE_HI_V16SI_PCVOID_INT_INT, -- IX86_BUILTIN_SCATTERPFDPS); -- def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_scatterpfqpd", -- VOID_FTYPE_QI_V8DI_PCVOID_INT_INT, -- IX86_BUILTIN_SCATTERPFQPD); -- def_builtin (OPTION_MASK_ISA_AVX512PF, 0, "__builtin_ia32_scatterpfqps", -- VOID_FTYPE_QI_V8DI_PCVOID_INT_INT, -- IX86_BUILTIN_SCATTERPFQPS); -- -- /* SHA */ -- def_builtin_const (OPTION_MASK_ISA_SHA, 0, "__builtin_ia32_sha1msg1", -- V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1); -- def_builtin_const (OPTION_MASK_ISA_SHA, 0, "__builtin_ia32_sha1msg2", -- V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2); -- def_builtin_const (OPTION_MASK_ISA_SHA, 0, "__builtin_ia32_sha1nexte", -- V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE); -- def_builtin_const (OPTION_MASK_ISA_SHA, 0, "__builtin_ia32_sha1rnds4", -- V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4); -- def_builtin_const (OPTION_MASK_ISA_SHA, 0, "__builtin_ia32_sha256msg1", -- V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1); -- def_builtin_const (OPTION_MASK_ISA_SHA, 0, "__builtin_ia32_sha256msg2", -- V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2); -- def_builtin_const (OPTION_MASK_ISA_SHA, 0, "__builtin_ia32_sha256rnds2", -- V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2); -- -- /* RTM. */ -- def_builtin (OPTION_MASK_ISA_RTM, 0, "__builtin_ia32_xabort", -- VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT); -- -- /* MMX access to the vec_init patterns. */ -- def_builtin_const (OPTION_MASK_ISA_MMX, 0, "__builtin_ia32_vec_init_v2si", -- V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI); -- -- def_builtin_const (OPTION_MASK_ISA_MMX, 0, "__builtin_ia32_vec_init_v4hi", -- V4HI_FTYPE_HI_HI_HI_HI, -- IX86_BUILTIN_VEC_INIT_V4HI); -- -- def_builtin_const (OPTION_MASK_ISA_MMX, 0, "__builtin_ia32_vec_init_v8qi", -- V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI, -- IX86_BUILTIN_VEC_INIT_V8QI); -- -- /* Access to the vec_extract patterns. */ -- def_builtin_const (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_vec_ext_v2df", -- DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF); -- def_builtin_const (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_vec_ext_v2di", -- DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI); -- def_builtin_const (OPTION_MASK_ISA_SSE, 0, "__builtin_ia32_vec_ext_v4sf", -- FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF); -- def_builtin_const (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_vec_ext_v4si", -- SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI); -- def_builtin_const (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_vec_ext_v8hi", -- HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI); -- -- def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A -- /* As it uses V4HImode, we have to require -mmmx too. */ -- | OPTION_MASK_ISA_MMX, 0, -- "__builtin_ia32_vec_ext_v4hi", -- HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI); -- -- def_builtin_const (OPTION_MASK_ISA_MMX, 0, "__builtin_ia32_vec_ext_v2si", -- SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI); -- -- def_builtin_const (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_vec_ext_v16qi", -- QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI); -- -- /* Access to the vec_set patterns. */ -- def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, 0, -- "__builtin_ia32_vec_set_v2di", -- V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI); -- -- def_builtin_const (OPTION_MASK_ISA_SSE4_1, 0, "__builtin_ia32_vec_set_v4sf", -- V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF); -- -- def_builtin_const (OPTION_MASK_ISA_SSE4_1, 0, "__builtin_ia32_vec_set_v4si", -- V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI); -- -- def_builtin_const (OPTION_MASK_ISA_SSE2, 0, "__builtin_ia32_vec_set_v8hi", -- V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI); -- -- def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A -- /* As it uses V4HImode, we have to require -mmmx too. */ -- | OPTION_MASK_ISA_MMX, 0, -- "__builtin_ia32_vec_set_v4hi", -- V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI); -- -- def_builtin_const (OPTION_MASK_ISA_SSE4_1, 0, "__builtin_ia32_vec_set_v16qi", -- V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI); -- -- /* RDSEED */ -- def_builtin (OPTION_MASK_ISA_RDSEED, 0, "__builtin_ia32_rdseed_hi_step", -- INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP); -- def_builtin (OPTION_MASK_ISA_RDSEED, 0, "__builtin_ia32_rdseed_si_step", -- INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP); -- def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT, 0, -- "__builtin_ia32_rdseed_di_step", -- INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP); -- -- /* ADCX */ -- def_builtin (0, 0, "__builtin_ia32_addcarryx_u32", -- UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32); -- def_builtin (OPTION_MASK_ISA_64BIT, 0, -- "__builtin_ia32_addcarryx_u64", -- UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG, -- IX86_BUILTIN_ADDCARRYX64); -- -- /* SBB */ -- def_builtin (0, 0, "__builtin_ia32_sbb_u32", -- UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32); -- def_builtin (OPTION_MASK_ISA_64BIT, 0, -- "__builtin_ia32_sbb_u64", -- UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG, -- IX86_BUILTIN_SBB64); -- -- /* Read/write FLAGS. */ -- if (TARGET_64BIT) -- { -- def_builtin (OPTION_MASK_ISA_64BIT, 0, "__builtin_ia32_readeflags_u64", -- UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS); -- def_builtin (OPTION_MASK_ISA_64BIT, 0, "__builtin_ia32_writeeflags_u64", -- VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS); -- } -- else -- { -- def_builtin (0, 0, "__builtin_ia32_readeflags_u32", -- UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS); -- def_builtin (0, 0, "__builtin_ia32_writeeflags_u32", -- VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS); -- } -- -- /* CLFLUSHOPT. */ -- def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, 0, "__builtin_ia32_clflushopt", -- VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT); -- -- /* CLWB. */ -- def_builtin (OPTION_MASK_ISA_CLWB, 0, "__builtin_ia32_clwb", -- VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB); -- -- /* MONITORX and MWAITX. */ -- def_builtin (0, OPTION_MASK_ISA_MWAITX, "__builtin_ia32_monitorx", -- VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITORX); -- def_builtin (0, OPTION_MASK_ISA_MWAITX, "__builtin_ia32_mwaitx", -- VOID_FTYPE_UNSIGNED_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAITX); -- -- /* CLZERO. */ -- def_builtin (0, OPTION_MASK_ISA_CLZERO, "__builtin_ia32_clzero", -- VOID_FTYPE_PCVOID, IX86_BUILTIN_CLZERO); -- -- /* WAITPKG. */ -- def_builtin (0, OPTION_MASK_ISA_WAITPKG, "__builtin_ia32_umonitor", -- VOID_FTYPE_PVOID, IX86_BUILTIN_UMONITOR); -- def_builtin (0, OPTION_MASK_ISA_WAITPKG, "__builtin_ia32_umwait", -- UINT8_FTYPE_UNSIGNED_UINT64, IX86_BUILTIN_UMWAIT); -- def_builtin (0, OPTION_MASK_ISA_WAITPKG, "__builtin_ia32_tpause", -- UINT8_FTYPE_UNSIGNED_UINT64, IX86_BUILTIN_TPAUSE); -- -- /* CLDEMOTE. */ -- def_builtin (0, OPTION_MASK_ISA_CLDEMOTE, "__builtin_ia32_cldemote", -- VOID_FTYPE_PCVOID, IX86_BUILTIN_CLDEMOTE); -- -- /* Add FMA4 multi-arg argument instructions */ -- for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++) -- { -- BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_MULTI_ARG_FIRST, i); -- if (d->name == 0) -- continue; -- -- ftype = (enum ix86_builtin_func_type) d->flag; -- def_builtin_const (d->mask, d->mask2, d->name, ftype, d->code); -- } -- BDESC_VERIFYS (IX86_BUILTIN__BDESC_MULTI_ARG_LAST, -- IX86_BUILTIN__BDESC_MULTI_ARG_FIRST, -- ARRAY_SIZE (bdesc_multi_arg) - 1); -- -- /* Add CET inrinsics. */ -- for (i = 0, d = bdesc_cet; i < ARRAY_SIZE (bdesc_cet); i++, d++) -- { -- BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_CET_FIRST, i); -- if (d->name == 0) -- continue; -- -- ftype = (enum ix86_builtin_func_type) d->flag; -- def_builtin (d->mask, d->mask2, d->name, ftype, d->code); -- } -- BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_LAST, -- IX86_BUILTIN__BDESC_CET_FIRST, -- ARRAY_SIZE (bdesc_cet) - 1); -- -- for (i = 0, d = bdesc_cet_rdssp; -- i < ARRAY_SIZE (bdesc_cet_rdssp); -- i++, d++) -- { -- BDESC_VERIFY (d->code, IX86_BUILTIN__BDESC_CET_NORMAL_FIRST, i); -- if (d->name == 0) -- continue; -- -- ftype = (enum ix86_builtin_func_type) d->flag; -- def_builtin (d->mask, d->mask2, d->name, ftype, d->code); -- } -- BDESC_VERIFYS (IX86_BUILTIN__BDESC_CET_NORMAL_LAST, -- IX86_BUILTIN__BDESC_CET_NORMAL_FIRST, -- ARRAY_SIZE (bdesc_cet_rdssp) - 1); --} -- --#undef BDESC_VERIFY --#undef BDESC_VERIFYS -- --/* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL -- to return a pointer to VERSION_DECL if the outcome of the expression -- formed by PREDICATE_CHAIN is true. This function will be called during -- version dispatch to decide which function version to execute. It returns -- the basic block at the end, to which more conditions can be added. */ -- --static basic_block --add_condition_to_bb (tree function_decl, tree version_decl, -- tree predicate_chain, basic_block new_bb) --{ -- gimple *return_stmt; -- tree convert_expr, result_var; -- gimple *convert_stmt; -- gimple *call_cond_stmt; -- gimple *if_else_stmt; -- -- basic_block bb1, bb2, bb3; -- edge e12, e23; -- -- tree cond_var, and_expr_var = NULL_TREE; -- gimple_seq gseq; -- -- tree predicate_decl, predicate_arg; -- -- push_cfun (DECL_STRUCT_FUNCTION (function_decl)); -- -- gcc_assert (new_bb != NULL); -- gseq = bb_seq (new_bb); -- -- -- convert_expr = build1 (CONVERT_EXPR, ptr_type_node, -- build_fold_addr_expr (version_decl)); -- result_var = create_tmp_var (ptr_type_node); -- convert_stmt = gimple_build_assign (result_var, convert_expr); -- return_stmt = gimple_build_return (result_var); -- -- if (predicate_chain == NULL_TREE) -- { -- gimple_seq_add_stmt (&gseq, convert_stmt); -- gimple_seq_add_stmt (&gseq, return_stmt); -- set_bb_seq (new_bb, gseq); -- gimple_set_bb (convert_stmt, new_bb); -- gimple_set_bb (return_stmt, new_bb); -- pop_cfun (); -- return new_bb; -- } -- -- while (predicate_chain != NULL) -- { -- cond_var = create_tmp_var (integer_type_node); -- predicate_decl = TREE_PURPOSE (predicate_chain); -- predicate_arg = TREE_VALUE (predicate_chain); -- call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg); -- gimple_call_set_lhs (call_cond_stmt, cond_var); -- -- gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl)); -- gimple_set_bb (call_cond_stmt, new_bb); -- gimple_seq_add_stmt (&gseq, call_cond_stmt); -- -- predicate_chain = TREE_CHAIN (predicate_chain); -- -- if (and_expr_var == NULL) -- and_expr_var = cond_var; -- else -- { -- gimple *assign_stmt; -- /* Use MIN_EXPR to check if any integer is zero?. -- and_expr_var = min_expr */ -- assign_stmt = gimple_build_assign (and_expr_var, -- build2 (MIN_EXPR, integer_type_node, -- cond_var, and_expr_var)); -- -- gimple_set_block (assign_stmt, DECL_INITIAL (function_decl)); -- gimple_set_bb (assign_stmt, new_bb); -- gimple_seq_add_stmt (&gseq, assign_stmt); -- } -- } -- -- if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var, -- integer_zero_node, -- NULL_TREE, NULL_TREE); -- gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl)); -- gimple_set_bb (if_else_stmt, new_bb); -- gimple_seq_add_stmt (&gseq, if_else_stmt); -- -- gimple_seq_add_stmt (&gseq, convert_stmt); -- gimple_seq_add_stmt (&gseq, return_stmt); -- set_bb_seq (new_bb, gseq); -- -- bb1 = new_bb; -- e12 = split_block (bb1, if_else_stmt); -- bb2 = e12->dest; -- e12->flags &= ~EDGE_FALLTHRU; -- e12->flags |= EDGE_TRUE_VALUE; -- -- e23 = split_block (bb2, return_stmt); -- -- gimple_set_bb (convert_stmt, bb2); -- gimple_set_bb (return_stmt, bb2); -- -- bb3 = e23->dest; -- make_edge (bb1, bb3, EDGE_FALSE_VALUE); -- -- remove_edge (e23); -- make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0); -- -- pop_cfun (); -- -- return bb3; --} -- --/* Priority of i386 features, greater value is higher priority. This is -- used to decide the order in which function dispatch must happen. For -- instance, a version specialized for SSE4.2 should be checked for dispatch -- before a version for SSE3, as SSE4.2 implies SSE3. */ --enum feature_priority --{ -- P_ZERO = 0, -- P_MMX, -- P_SSE, -- P_SSE2, -- P_SSE3, -- P_SSSE3, -- P_PROC_SSSE3, -- P_SSE4_A, -- P_PROC_SSE4_A, -- P_SSE4_1, -- P_SSE4_2, -- P_PROC_SSE4_2, -- P_POPCNT, -- P_AES, -- P_PCLMUL, -- P_AVX, -- P_PROC_AVX, -- P_BMI, -- P_PROC_BMI, -- P_FMA4, -- P_XOP, -- P_PROC_XOP, -- P_FMA, -- P_PROC_FMA, -- P_BMI2, -- P_AVX2, -- P_PROC_AVX2, -- P_AVX512F, -- P_PROC_AVX512F --}; -- --/* This is the order of bit-fields in __processor_features in cpuinfo.c */ --enum processor_features --{ -- F_CMOV = 0, -- F_MMX, -- F_POPCNT, -- F_SSE, -- F_SSE2, -- F_SSE3, -- F_SSSE3, -- F_SSE4_1, -- F_SSE4_2, -- F_AVX, -- F_AVX2, -- F_SSE4_A, -- F_FMA4, -- F_XOP, -- F_FMA, -- F_AVX512F, -- F_BMI, -- F_BMI2, -- F_AES, -- F_PCLMUL, -- F_AVX512VL, -- F_AVX512BW, -- F_AVX512DQ, -- F_AVX512CD, -- F_AVX512ER, -- F_AVX512PF, -- F_AVX512VBMI, -- F_AVX512IFMA, -- F_AVX5124VNNIW, -- F_AVX5124FMAPS, -- F_AVX512VPOPCNTDQ, -- F_AVX512VBMI2, -- F_GFNI, -- F_VPCLMULQDQ, -- F_AVX512VNNI, -- F_AVX512BITALG, -- F_MAX --}; -- --/* These are the values for vendor types and cpu types and subtypes -- in cpuinfo.c. Cpu types and subtypes should be subtracted by -- the corresponding start value. */ --enum processor_model --{ -- M_INTEL = 1, -- M_AMD, -- M_CPU_TYPE_START, -- M_INTEL_BONNELL, -- M_INTEL_CORE2, -- M_INTEL_COREI7, -- M_AMDFAM10H, -- M_AMDFAM15H, -- M_INTEL_SILVERMONT, -- M_INTEL_KNL, -- M_AMD_BTVER1, -- M_AMD_BTVER2, -- M_AMDFAM17H, -- M_INTEL_KNM, -- M_INTEL_GOLDMONT, -- M_INTEL_GOLDMONT_PLUS, -- M_INTEL_TREMONT, -- M_CPU_SUBTYPE_START, -- M_INTEL_COREI7_NEHALEM, -- M_INTEL_COREI7_WESTMERE, -- M_INTEL_COREI7_SANDYBRIDGE, -- M_AMDFAM10H_BARCELONA, -- M_AMDFAM10H_SHANGHAI, -- M_AMDFAM10H_ISTANBUL, -- M_AMDFAM15H_BDVER1, -- M_AMDFAM15H_BDVER2, -- M_AMDFAM15H_BDVER3, -- M_AMDFAM15H_BDVER4, -- M_AMDFAM17H_ZNVER1, -- M_INTEL_COREI7_IVYBRIDGE, -- M_INTEL_COREI7_HASWELL, -- M_INTEL_COREI7_BROADWELL, -- M_INTEL_COREI7_SKYLAKE, -- M_INTEL_COREI7_SKYLAKE_AVX512, -- M_INTEL_COREI7_CANNONLAKE, -- M_INTEL_COREI7_ICELAKE_CLIENT, -- M_INTEL_COREI7_ICELAKE_SERVER, -- M_AMDFAM17H_ZNVER2, -- M_INTEL_COREI7_CASCADELAKE --}; -- --struct _arch_names_table --{ -- const char *const name; -- const enum processor_model model; --}; -- --static const _arch_names_table arch_names_table[] = --{ -- {"amd", M_AMD}, -- {"intel", M_INTEL}, -- {"atom", M_INTEL_BONNELL}, -- {"slm", M_INTEL_SILVERMONT}, -- {"core2", M_INTEL_CORE2}, -- {"corei7", M_INTEL_COREI7}, -- {"nehalem", M_INTEL_COREI7_NEHALEM}, -- {"westmere", M_INTEL_COREI7_WESTMERE}, -- {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE}, -- {"ivybridge", M_INTEL_COREI7_IVYBRIDGE}, -- {"haswell", M_INTEL_COREI7_HASWELL}, -- {"broadwell", M_INTEL_COREI7_BROADWELL}, -- {"skylake", M_INTEL_COREI7_SKYLAKE}, -- {"skylake-avx512", M_INTEL_COREI7_SKYLAKE_AVX512}, -- {"cannonlake", M_INTEL_COREI7_CANNONLAKE}, -- {"icelake-client", M_INTEL_COREI7_ICELAKE_CLIENT}, -- {"icelake-server", M_INTEL_COREI7_ICELAKE_SERVER}, -- {"cascadelake", M_INTEL_COREI7_CASCADELAKE}, -- {"bonnell", M_INTEL_BONNELL}, -- {"silvermont", M_INTEL_SILVERMONT}, -- {"goldmont", M_INTEL_GOLDMONT}, -- {"goldmont-plus", M_INTEL_GOLDMONT_PLUS}, -- {"tremont", M_INTEL_TREMONT}, -- {"knl", M_INTEL_KNL}, -- {"knm", M_INTEL_KNM}, -- {"amdfam10h", M_AMDFAM10H}, -- {"barcelona", M_AMDFAM10H_BARCELONA}, -- {"shanghai", M_AMDFAM10H_SHANGHAI}, -- {"istanbul", M_AMDFAM10H_ISTANBUL}, -- {"btver1", M_AMD_BTVER1}, -- {"amdfam15h", M_AMDFAM15H}, -- {"bdver1", M_AMDFAM15H_BDVER1}, -- {"bdver2", M_AMDFAM15H_BDVER2}, -- {"bdver3", M_AMDFAM15H_BDVER3}, -- {"bdver4", M_AMDFAM15H_BDVER4}, -- {"btver2", M_AMD_BTVER2}, -- {"amdfam17h", M_AMDFAM17H}, -- {"znver1", M_AMDFAM17H_ZNVER1}, -- {"znver2", M_AMDFAM17H_ZNVER2}, --}; -- --/* These are the target attribute strings for which a dispatcher is -- available, from fold_builtin_cpu. */ --struct _isa_names_table --{ -- const char *const name; -- const enum processor_features feature; -- const enum feature_priority priority; --}; -- --static const _isa_names_table isa_names_table[] = --{ -- {"cmov", F_CMOV, P_ZERO}, -- {"mmx", F_MMX, P_MMX}, -- {"popcnt", F_POPCNT, P_POPCNT}, -- {"sse", F_SSE, P_SSE}, -- {"sse2", F_SSE2, P_SSE2}, -- {"sse3", F_SSE3, P_SSE3}, -- {"ssse3", F_SSSE3, P_SSSE3}, -- {"sse4a", F_SSE4_A, P_SSE4_A}, -- {"sse4.1", F_SSE4_1, P_SSE4_1}, -- {"sse4.2", F_SSE4_2, P_SSE4_2}, -- {"avx", F_AVX, P_AVX}, -- {"fma4", F_FMA4, P_FMA4}, -- {"xop", F_XOP, P_XOP}, -- {"fma", F_FMA, P_FMA}, -- {"avx2", F_AVX2, P_AVX2}, -- {"avx512f", F_AVX512F, P_AVX512F}, -- {"bmi", F_BMI, P_BMI}, -- {"bmi2", F_BMI2, P_BMI2}, -- {"aes", F_AES, P_AES}, -- {"pclmul", F_PCLMUL, P_PCLMUL}, -- {"avx512vl",F_AVX512VL, P_ZERO}, -- {"avx512bw",F_AVX512BW, P_ZERO}, -- {"avx512dq",F_AVX512DQ, P_ZERO}, -- {"avx512cd",F_AVX512CD, P_ZERO}, -- {"avx512er",F_AVX512ER, P_ZERO}, -- {"avx512pf",F_AVX512PF, P_ZERO}, -- {"avx512vbmi",F_AVX512VBMI, P_ZERO}, -- {"avx512ifma",F_AVX512IFMA, P_ZERO}, -- {"avx5124vnniw",F_AVX5124VNNIW, P_ZERO}, -- {"avx5124fmaps",F_AVX5124FMAPS, P_ZERO}, -- {"avx512vpopcntdq",F_AVX512VPOPCNTDQ, P_ZERO}, -- {"avx512vbmi2", F_AVX512VBMI2, P_ZERO}, -- {"gfni", F_GFNI, P_ZERO}, -- {"vpclmulqdq", F_VPCLMULQDQ, P_ZERO}, -- {"avx512vnni", F_AVX512VNNI, P_ZERO}, -- {"avx512bitalg", F_AVX512BITALG, P_ZERO} --}; -- --/* This parses the attribute arguments to target in DECL and determines -- the right builtin to use to match the platform specification. -- It returns the priority value for this version decl. If PREDICATE_LIST -- is not NULL, it stores the list of cpu features that need to be checked -- before dispatching this function. */ -- --static unsigned int --get_builtin_code_for_version (tree decl, tree *predicate_list) --{ -- tree attrs; -- struct cl_target_option cur_target; -- tree target_node; -- struct cl_target_option *new_target; -- const char *arg_str = NULL; -- const char *attrs_str = NULL; -- char *tok_str = NULL; -- char *token; -- -- enum feature_priority priority = P_ZERO; -- -- static unsigned int NUM_FEATURES -- = sizeof (isa_names_table) / sizeof (_isa_names_table); -- -- unsigned int i; -- -- tree predicate_chain = NULL_TREE; -- tree predicate_decl, predicate_arg; -- -- attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl)); -- gcc_assert (attrs != NULL); -- -- attrs = TREE_VALUE (TREE_VALUE (attrs)); -- -- gcc_assert (TREE_CODE (attrs) == STRING_CST); -- attrs_str = TREE_STRING_POINTER (attrs); -- -- /* Return priority zero for default function. */ -- if (strcmp (attrs_str, "default") == 0) -- return 0; -- -- /* Handle arch= if specified. For priority, set it to be 1 more than -- the best instruction set the processor can handle. For instance, if -- there is a version for atom and a version for ssse3 (the highest ISA -- priority for atom), the atom version must be checked for dispatch -- before the ssse3 version. */ -- if (strstr (attrs_str, "arch=") != NULL) -- { -- cl_target_option_save (&cur_target, &global_options); -- target_node = ix86_valid_target_attribute_tree (attrs, &global_options, -- &global_options_set); -- -- gcc_assert (target_node); -- if (target_node == error_mark_node) -- return 0; -- new_target = TREE_TARGET_OPTION (target_node); -- gcc_assert (new_target); -- -- if (new_target->arch_specified && new_target->arch > 0) -- { -- switch (new_target->arch) -- { -- case PROCESSOR_CORE2: -- arg_str = "core2"; -- priority = P_PROC_SSSE3; -- break; -- case PROCESSOR_NEHALEM: -- if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_PCLMUL) -- { -- arg_str = "westmere"; -- priority = P_PCLMUL; -- } -- else -- { -- /* We translate "arch=corei7" and "arch=nehalem" to -- "corei7" so that it will be mapped to M_INTEL_COREI7 -- as cpu type to cover all M_INTEL_COREI7_XXXs. */ -- arg_str = "corei7"; -- priority = P_PROC_SSE4_2; -- } -- break; -- case PROCESSOR_SANDYBRIDGE: -- if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C) -- arg_str = "ivybridge"; -- else -- arg_str = "sandybridge"; -- priority = P_PROC_AVX; -- break; -- case PROCESSOR_HASWELL: -- if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX) -- arg_str = "broadwell"; -- else -- arg_str = "haswell"; -- priority = P_PROC_AVX2; -- break; -- case PROCESSOR_SKYLAKE: -- arg_str = "skylake"; -- priority = P_PROC_AVX2; -- break; -- case PROCESSOR_SKYLAKE_AVX512: -- arg_str = "skylake-avx512"; -- priority = P_PROC_AVX512F; -- break; -- case PROCESSOR_CANNONLAKE: -- arg_str = "cannonlake"; -- priority = P_PROC_AVX512F; -- break; -- case PROCESSOR_ICELAKE_CLIENT: -- arg_str = "icelake-client"; -- priority = P_PROC_AVX512F; -- break; -- case PROCESSOR_ICELAKE_SERVER: -- arg_str = "icelake-server"; -- priority = P_PROC_AVX512F; -- break; -- case PROCESSOR_CASCADELAKE: -- arg_str = "cascadelake"; -- priority = P_PROC_AVX512F; -- break; -- case PROCESSOR_BONNELL: -- arg_str = "bonnell"; -- priority = P_PROC_SSSE3; -- break; -- case PROCESSOR_KNL: -- arg_str = "knl"; -- priority = P_PROC_AVX512F; -- break; -- case PROCESSOR_KNM: -- arg_str = "knm"; -- priority = P_PROC_AVX512F; -- break; -- case PROCESSOR_SILVERMONT: -- arg_str = "silvermont"; -- priority = P_PROC_SSE4_2; -- break; -- case PROCESSOR_GOLDMONT: -- arg_str = "goldmont"; -- priority = P_PROC_SSE4_2; -- break; -- case PROCESSOR_GOLDMONT_PLUS: -- arg_str = "goldmont-plus"; -- priority = P_PROC_SSE4_2; -- break; -- case PROCESSOR_TREMONT: -- arg_str = "tremont"; -- priority = P_PROC_SSE4_2; -- break; -- case PROCESSOR_AMDFAM10: -- arg_str = "amdfam10h"; -- priority = P_PROC_SSE4_A; -- break; -- case PROCESSOR_BTVER1: -- arg_str = "btver1"; -- priority = P_PROC_SSE4_A; -- break; -- case PROCESSOR_BTVER2: -- arg_str = "btver2"; -- priority = P_PROC_BMI; -- break; -- case PROCESSOR_BDVER1: -- arg_str = "bdver1"; -- priority = P_PROC_XOP; -- break; -- case PROCESSOR_BDVER2: -- arg_str = "bdver2"; -- priority = P_PROC_FMA; -- break; -- case PROCESSOR_BDVER3: -- arg_str = "bdver3"; -- priority = P_PROC_FMA; -- break; -- case PROCESSOR_BDVER4: -- arg_str = "bdver4"; -- priority = P_PROC_AVX2; -- break; -- case PROCESSOR_ZNVER1: -- arg_str = "znver1"; -- priority = P_PROC_AVX2; -- break; -- case PROCESSOR_ZNVER2: -- arg_str = "znver2"; -- priority = P_PROC_AVX2; -- break; -- } -- } -- -- cl_target_option_restore (&global_options, &cur_target); -- -- if (predicate_list && arg_str == NULL) -- { -- error_at (DECL_SOURCE_LOCATION (decl), -- "no dispatcher found for the versioning attributes"); -- return 0; -- } -- -- if (predicate_list) -- { -- predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS]; -- /* For a C string literal the length includes the trailing NULL. */ -- predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str); -- predicate_chain = tree_cons (predicate_decl, predicate_arg, -- predicate_chain); -- } -- } -- -- /* Process feature name. */ -- tok_str = (char *) xmalloc (strlen (attrs_str) + 1); -- strcpy (tok_str, attrs_str); -- token = strtok (tok_str, ","); -- predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS]; -- -- while (token != NULL) -- { -- /* Do not process "arch=" */ -- if (strncmp (token, "arch=", 5) == 0) -- { -- token = strtok (NULL, ","); -- continue; -- } -- for (i = 0; i < NUM_FEATURES; ++i) -- { -- if (strcmp (token, isa_names_table[i].name) == 0) -- { -- if (predicate_list) -- { -- predicate_arg = build_string_literal ( -- strlen (isa_names_table[i].name) + 1, -- isa_names_table[i].name); -- predicate_chain = tree_cons (predicate_decl, predicate_arg, -- predicate_chain); -- } -- /* Find the maximum priority feature. */ -- if (isa_names_table[i].priority > priority) -- priority = isa_names_table[i].priority; -- -- break; -- } -- } -- if (predicate_list && priority == P_ZERO) -- { -- error_at (DECL_SOURCE_LOCATION (decl), -- "ISA %qs is not supported in % attribute, " -- "use % syntax", token); -- return 0; -- } -- token = strtok (NULL, ","); -- } -- free (tok_str); -- -- if (predicate_list && predicate_chain == NULL_TREE) -- { -- error_at (DECL_SOURCE_LOCATION (decl), -- "no dispatcher found for the versioning attributes: %s", -- attrs_str); -- return 0; -- } -- else if (predicate_list) -- { -- predicate_chain = nreverse (predicate_chain); -- *predicate_list = predicate_chain; -- } -- -- return priority; --} -- --/* This compares the priority of target features in function DECL1 -- and DECL2. It returns positive value if DECL1 is higher priority, -- negative value if DECL2 is higher priority and 0 if they are the -- same. */ -- --static int --ix86_compare_version_priority (tree decl1, tree decl2) --{ -- unsigned int priority1 = get_builtin_code_for_version (decl1, NULL); -- unsigned int priority2 = get_builtin_code_for_version (decl2, NULL); -- -- return (int)priority1 - (int)priority2; --} -- --/* V1 and V2 point to function versions with different priorities -- based on the target ISA. This function compares their priorities. */ -- --static int --feature_compare (const void *v1, const void *v2) --{ -- typedef struct _function_version_info -- { -- tree version_decl; -- tree predicate_chain; -- unsigned int dispatch_priority; -- } function_version_info; -- -- const function_version_info c1 = *(const function_version_info *)v1; -- const function_version_info c2 = *(const function_version_info *)v2; -- return (c2.dispatch_priority - c1.dispatch_priority); --} -- --/* This function generates the dispatch function for -- multi-versioned functions. DISPATCH_DECL is the function which will -- contain the dispatch logic. FNDECLS are the function choices for -- dispatch, and is a tree chain. EMPTY_BB is the basic block pointer -- in DISPATCH_DECL in which the dispatch code is generated. */ -- --static int --dispatch_function_versions (tree dispatch_decl, -- void *fndecls_p, -- basic_block *empty_bb) --{ -- tree default_decl; -- gimple *ifunc_cpu_init_stmt; -- gimple_seq gseq; -- int ix; -- tree ele; -- vec *fndecls; -- unsigned int num_versions = 0; -- unsigned int actual_versions = 0; -- unsigned int i; -- -- struct _function_version_info -- { -- tree version_decl; -- tree predicate_chain; -- unsigned int dispatch_priority; -- }*function_version_info; -- -- gcc_assert (dispatch_decl != NULL -- && fndecls_p != NULL -- && empty_bb != NULL); -- -- /*fndecls_p is actually a vector. */ -- fndecls = static_cast *> (fndecls_p); -- -- /* At least one more version other than the default. */ -- num_versions = fndecls->length (); -- gcc_assert (num_versions >= 2); -- -- function_version_info = (struct _function_version_info *) -- XNEWVEC (struct _function_version_info, (num_versions - 1)); -- -- /* The first version in the vector is the default decl. */ -- default_decl = (*fndecls)[0]; -- -- push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl)); -- -- gseq = bb_seq (*empty_bb); -- /* Function version dispatch is via IFUNC. IFUNC resolvers fire before -- constructors, so explicity call __builtin_cpu_init here. */ -- ifunc_cpu_init_stmt = gimple_build_call_vec ( -- ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL); -- gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt); -- gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb); -- set_bb_seq (*empty_bb, gseq); -- -- pop_cfun (); -- -- -- for (ix = 1; fndecls->iterate (ix, &ele); ++ix) -- { -- tree version_decl = ele; -- tree predicate_chain = NULL_TREE; -- unsigned int priority; -- /* Get attribute string, parse it and find the right predicate decl. -- The predicate function could be a lengthy combination of many -- features, like arch-type and various isa-variants. */ -- priority = get_builtin_code_for_version (version_decl, -- &predicate_chain); -- -- if (predicate_chain == NULL_TREE) -- continue; -- -- function_version_info [actual_versions].version_decl = version_decl; -- function_version_info [actual_versions].predicate_chain -- = predicate_chain; -- function_version_info [actual_versions].dispatch_priority = priority; -- actual_versions++; -- } -- -- /* Sort the versions according to descending order of dispatch priority. The -- priority is based on the ISA. This is not a perfect solution. There -- could still be ambiguity. If more than one function version is suitable -- to execute, which one should be dispatched? In future, allow the user -- to specify a dispatch priority next to the version. */ -- qsort (function_version_info, actual_versions, -- sizeof (struct _function_version_info), feature_compare); -- -- for (i = 0; i < actual_versions; ++i) -- *empty_bb = add_condition_to_bb (dispatch_decl, -- function_version_info[i].version_decl, -- function_version_info[i].predicate_chain, -- *empty_bb); -- -- /* dispatch default version at the end. */ -- *empty_bb = add_condition_to_bb (dispatch_decl, default_decl, -- NULL, *empty_bb); -- -- free (function_version_info); -- return 0; --} -- --/* This function changes the assembler name for functions that are -- versions. If DECL is a function version and has a "target" -- attribute, it appends the attribute string to its assembler name. */ -- --static tree --ix86_mangle_function_version_assembler_name (tree decl, tree id) --{ -- tree version_attr; -- const char *orig_name, *version_string; -- char *attr_str, *assembler_name; -- -- if (DECL_DECLARED_INLINE_P (decl) -- && lookup_attribute ("gnu_inline", -- DECL_ATTRIBUTES (decl))) -- error_at (DECL_SOURCE_LOCATION (decl), -- "function versions cannot be marked as gnu_inline," -- " bodies have to be generated"); -- -- if (DECL_VIRTUAL_P (decl) -- || DECL_VINDEX (decl)) -- sorry ("virtual function multiversioning not supported"); -- -- version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl)); -- -- /* target attribute string cannot be NULL. */ -- gcc_assert (version_attr != NULL_TREE); -- -- orig_name = IDENTIFIER_POINTER (id); -- version_string -- = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr))); -- -- if (strcmp (version_string, "default") == 0) -- return id; -- -- attr_str = sorted_attr_string (TREE_VALUE (version_attr)); -- assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2); -- -- sprintf (assembler_name, "%s.%s", orig_name, attr_str); -- -- /* Allow assembler name to be modified if already set. */ -- if (DECL_ASSEMBLER_NAME_SET_P (decl)) -- SET_DECL_RTL (decl, NULL); -- -- tree ret = get_identifier (assembler_name); -- XDELETEVEC (attr_str); -- XDELETEVEC (assembler_name); -- return ret; --} -- -- --static tree --ix86_mangle_decl_assembler_name (tree decl, tree id) --{ -- /* For function version, add the target suffix to the assembler name. */ -- if (TREE_CODE (decl) == FUNCTION_DECL -- && DECL_FUNCTION_VERSIONED (decl)) -- id = ix86_mangle_function_version_assembler_name (decl, id); --#ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME -- id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id); --#endif -- -- return id; --} -- --/* Make a dispatcher declaration for the multi-versioned function DECL. -- Calls to DECL function will be replaced with calls to the dispatcher -- by the front-end. Returns the decl of the dispatcher function. */ -- --static tree --ix86_get_function_versions_dispatcher (void *decl) --{ -- tree fn = (tree) decl; -- struct cgraph_node *node = NULL; -- struct cgraph_node *default_node = NULL; -- struct cgraph_function_version_info *node_v = NULL; -- struct cgraph_function_version_info *first_v = NULL; -- -- tree dispatch_decl = NULL; -- -- struct cgraph_function_version_info *default_version_info = NULL; -- -- gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn)); -- -- node = cgraph_node::get (fn); -- gcc_assert (node != NULL); -- -- node_v = node->function_version (); -- gcc_assert (node_v != NULL); -- -- if (node_v->dispatcher_resolver != NULL) -- return node_v->dispatcher_resolver; -- -- /* Find the default version and make it the first node. */ -- first_v = node_v; -- /* Go to the beginning of the chain. */ -- while (first_v->prev != NULL) -- first_v = first_v->prev; -- default_version_info = first_v; -- while (default_version_info != NULL) -- { -- if (is_function_default_version -- (default_version_info->this_node->decl)) -- break; -- default_version_info = default_version_info->next; -- } -- -- /* If there is no default node, just return NULL. */ -- if (default_version_info == NULL) -- return NULL; -- -- /* Make default info the first node. */ -- if (first_v != default_version_info) -- { -- default_version_info->prev->next = default_version_info->next; -- if (default_version_info->next) -- default_version_info->next->prev = default_version_info->prev; -- first_v->prev = default_version_info; -- default_version_info->next = first_v; -- default_version_info->prev = NULL; -- } -- -- default_node = default_version_info->this_node; -- --#if defined (ASM_OUTPUT_TYPE_DIRECTIVE) -- if (targetm.has_ifunc_p ()) -- { -- struct cgraph_function_version_info *it_v = NULL; -- struct cgraph_node *dispatcher_node = NULL; -- struct cgraph_function_version_info *dispatcher_version_info = NULL; -- -- /* Right now, the dispatching is done via ifunc. */ -- dispatch_decl = make_dispatcher_decl (default_node->decl); -- -- dispatcher_node = cgraph_node::get_create (dispatch_decl); -- gcc_assert (dispatcher_node != NULL); -- dispatcher_node->dispatcher_function = 1; -- dispatcher_version_info -- = dispatcher_node->insert_new_function_version (); -- dispatcher_version_info->next = default_version_info; -- dispatcher_node->definition = 1; -- -- /* Set the dispatcher for all the versions. */ -- it_v = default_version_info; -- while (it_v != NULL) -- { -- it_v->dispatcher_resolver = dispatch_decl; -- it_v = it_v->next; -- } -- } -- else --#endif -- { -- error_at (DECL_SOURCE_LOCATION (default_node->decl), -- "multiversioning needs ifunc which is not supported " -- "on this target"); -- } -- -- return dispatch_decl; --} -- --/* Make the resolver function decl to dispatch the versions of -- a multi-versioned function, DEFAULT_DECL. IFUNC_ALIAS_DECL is -- ifunc alias that will point to the created resolver. Create an -- empty basic block in the resolver and store the pointer in -- EMPTY_BB. Return the decl of the resolver function. */ -- --static tree --make_resolver_func (const tree default_decl, -- const tree ifunc_alias_decl, -- basic_block *empty_bb) --{ -- char *resolver_name; -- tree decl, type, decl_name, t; -- -- /* IFUNC's have to be globally visible. So, if the default_decl is -- not, then the name of the IFUNC should be made unique. */ -- if (TREE_PUBLIC (default_decl) == 0) -- { -- char *ifunc_name = make_unique_name (default_decl, "ifunc", true); -- symtab->change_decl_assembler_name (ifunc_alias_decl, -- get_identifier (ifunc_name)); -- XDELETEVEC (ifunc_name); -- } -- -- resolver_name = make_unique_name (default_decl, "resolver", false); -- -- /* The resolver function should return a (void *). */ -- type = build_function_type_list (ptr_type_node, NULL_TREE); -- -- decl = build_fn_decl (resolver_name, type); -- decl_name = get_identifier (resolver_name); -- SET_DECL_ASSEMBLER_NAME (decl, decl_name); -- -- DECL_NAME (decl) = decl_name; -- TREE_USED (decl) = 1; -- DECL_ARTIFICIAL (decl) = 1; -- DECL_IGNORED_P (decl) = 1; -- TREE_PUBLIC (decl) = 0; -- DECL_UNINLINABLE (decl) = 1; -- -- /* Resolver is not external, body is generated. */ -- DECL_EXTERNAL (decl) = 0; -- DECL_EXTERNAL (ifunc_alias_decl) = 0; -- -- DECL_CONTEXT (decl) = NULL_TREE; -- DECL_INITIAL (decl) = make_node (BLOCK); -- DECL_STATIC_CONSTRUCTOR (decl) = 0; -- -- if (DECL_COMDAT_GROUP (default_decl) -- || TREE_PUBLIC (default_decl)) -- { -- /* In this case, each translation unit with a call to this -- versioned function will put out a resolver. Ensure it -- is comdat to keep just one copy. */ -- DECL_COMDAT (decl) = 1; -- make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl)); -- } -- /* Build result decl and add to function_decl. */ -- t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node); -- DECL_CONTEXT (t) = decl; -- DECL_ARTIFICIAL (t) = 1; -- DECL_IGNORED_P (t) = 1; -- DECL_RESULT (decl) = t; -- -- gimplify_function_tree (decl); -- push_cfun (DECL_STRUCT_FUNCTION (decl)); -- *empty_bb = init_lowered_empty_function (decl, false, -- profile_count::uninitialized ()); -- -- cgraph_node::add_new_function (decl, true); -- symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl)); -- -- pop_cfun (); -- -- gcc_assert (ifunc_alias_decl != NULL); -- /* Mark ifunc_alias_decl as "ifunc" with resolver as resolver_name. */ -- DECL_ATTRIBUTES (ifunc_alias_decl) -- = make_attribute ("ifunc", resolver_name, -- DECL_ATTRIBUTES (ifunc_alias_decl)); -- -- /* Create the alias for dispatch to resolver here. */ -- cgraph_node::create_same_body_alias (ifunc_alias_decl, decl); -- XDELETEVEC (resolver_name); -- return decl; --} -- --/* Generate the dispatching code body to dispatch multi-versioned function -- DECL. The target hook is called to process the "target" attributes and -- provide the code to dispatch the right function at run-time. NODE points -- to the dispatcher decl whose body will be created. */ -- --static tree --ix86_generate_version_dispatcher_body (void *node_p) --{ -- tree resolver_decl; -- basic_block empty_bb; -- tree default_ver_decl; -- struct cgraph_node *versn; -- struct cgraph_node *node; -- -- struct cgraph_function_version_info *node_version_info = NULL; -- struct cgraph_function_version_info *versn_info = NULL; -- -- node = (cgraph_node *)node_p; -- -- node_version_info = node->function_version (); -- gcc_assert (node->dispatcher_function -- && node_version_info != NULL); -- -- if (node_version_info->dispatcher_resolver) -- return node_version_info->dispatcher_resolver; -- -- /* The first version in the chain corresponds to the default version. */ -- default_ver_decl = node_version_info->next->this_node->decl; -- -- /* node is going to be an alias, so remove the finalized bit. */ -- node->definition = false; -- -- resolver_decl = make_resolver_func (default_ver_decl, -- node->decl, &empty_bb); -- -- node_version_info->dispatcher_resolver = resolver_decl; -- -- push_cfun (DECL_STRUCT_FUNCTION (resolver_decl)); -- -- auto_vec fn_ver_vec; -- -- for (versn_info = node_version_info->next; versn_info; -- versn_info = versn_info->next) -- { -- versn = versn_info->this_node; -- /* Check for virtual functions here again, as by this time it should -- have been determined if this function needs a vtable index or -- not. This happens for methods in derived classes that override -- virtual methods in base classes but are not explicitly marked as -- virtual. */ -- if (DECL_VINDEX (versn->decl)) -- sorry ("virtual function multiversioning not supported"); -- -- fn_ver_vec.safe_push (versn->decl); -- } -- -- dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb); -- cgraph_edge::rebuild_edges (); -- pop_cfun (); -- return resolver_decl; --} --/* This builds the processor_model struct type defined in -- libgcc/config/i386/cpuinfo.c */ -- --static tree --build_processor_model_struct (void) --{ -- const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype", -- "__cpu_features"}; -- tree field = NULL_TREE, field_chain = NULL_TREE; -- int i; -- tree type = make_node (RECORD_TYPE); -- -- /* The first 3 fields are unsigned int. */ -- for (i = 0; i < 3; ++i) -- { -- field = build_decl (UNKNOWN_LOCATION, FIELD_DECL, -- get_identifier (field_name[i]), unsigned_type_node); -- if (field_chain != NULL_TREE) -- DECL_CHAIN (field) = field_chain; -- field_chain = field; -- } -- -- /* The last field is an array of unsigned integers of size one. */ -- field = build_decl (UNKNOWN_LOCATION, FIELD_DECL, -- get_identifier (field_name[3]), -- build_array_type (unsigned_type_node, -- build_index_type (size_one_node))); -- if (field_chain != NULL_TREE) -- DECL_CHAIN (field) = field_chain; -- field_chain = field; -- -- finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE); -- return type; --} -- --/* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */ -- --static tree --make_var_decl (tree type, const char *name) --{ -- tree new_decl; -- -- new_decl = build_decl (UNKNOWN_LOCATION, -- VAR_DECL, -- get_identifier(name), -- type); -- -- DECL_EXTERNAL (new_decl) = 1; -- TREE_STATIC (new_decl) = 1; -- TREE_PUBLIC (new_decl) = 1; -- DECL_INITIAL (new_decl) = 0; -- DECL_ARTIFICIAL (new_decl) = 0; -- DECL_PRESERVE_P (new_decl) = 1; -- -- make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl)); -- assemble_variable (new_decl, 0, 0, 0); -- -- return new_decl; --} -- --/* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded -- into an integer defined in libgcc/config/i386/cpuinfo.c */ -- --static tree --fold_builtin_cpu (tree fndecl, tree *args) --{ -- unsigned int i; -- enum ix86_builtins fn_code = (enum ix86_builtins) -- DECL_FUNCTION_CODE (fndecl); -- tree param_string_cst = NULL; -- -- tree __processor_model_type = build_processor_model_struct (); -- tree __cpu_model_var = make_var_decl (__processor_model_type, -- "__cpu_model"); -- -- -- varpool_node::add (__cpu_model_var); -- -- gcc_assert ((args != NULL) && (*args != NULL)); -- -- param_string_cst = *args; -- while (param_string_cst -- && TREE_CODE (param_string_cst) != STRING_CST) -- { -- /* *args must be a expr that can contain other EXPRS leading to a -- STRING_CST. */ -- if (!EXPR_P (param_string_cst)) -- { -- error ("parameter to builtin must be a string constant or literal"); -- return integer_zero_node; -- } -- param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0); -- } -- -- gcc_assert (param_string_cst); -- -- if (fn_code == IX86_BUILTIN_CPU_IS) -- { -- tree ref; -- tree field; -- tree final; -- -- unsigned int field_val = 0; -- unsigned int NUM_ARCH_NAMES -- = sizeof (arch_names_table) / sizeof (struct _arch_names_table); -- -- for (i = 0; i < NUM_ARCH_NAMES; i++) -- if (strcmp (arch_names_table[i].name, -- TREE_STRING_POINTER (param_string_cst)) == 0) -- break; -- -- if (i == NUM_ARCH_NAMES) -- { -- error ("parameter to builtin not valid: %s", -- TREE_STRING_POINTER (param_string_cst)); -- return integer_zero_node; -- } -- -- field = TYPE_FIELDS (__processor_model_type); -- field_val = arch_names_table[i].model; -- -- /* CPU types are stored in the next field. */ -- if (field_val > M_CPU_TYPE_START -- && field_val < M_CPU_SUBTYPE_START) -- { -- field = DECL_CHAIN (field); -- field_val -= M_CPU_TYPE_START; -- } -- -- /* CPU subtypes are stored in the next field. */ -- if (field_val > M_CPU_SUBTYPE_START) -- { -- field = DECL_CHAIN ( DECL_CHAIN (field)); -- field_val -= M_CPU_SUBTYPE_START; -- } -- -- /* Get the appropriate field in __cpu_model. */ -- ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var, -- field, NULL_TREE); -- -- /* Check the value. */ -- final = build2 (EQ_EXPR, unsigned_type_node, ref, -- build_int_cstu (unsigned_type_node, field_val)); -- return build1 (CONVERT_EXPR, integer_type_node, final); -- } -- else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS) -- { -- tree ref; -- tree array_elt; -- tree field; -- tree final; -- -- unsigned int field_val = 0; -- unsigned int NUM_ISA_NAMES -- = sizeof (isa_names_table) / sizeof (struct _isa_names_table); -- -- for (i = 0; i < NUM_ISA_NAMES; i++) -- if (strcmp (isa_names_table[i].name, -- TREE_STRING_POINTER (param_string_cst)) == 0) -- break; -- -- if (i == NUM_ISA_NAMES) -- { -- error ("parameter to builtin not valid: %s", -- TREE_STRING_POINTER (param_string_cst)); -- return integer_zero_node; -- } -- -- if (isa_names_table[i].feature >= 32) -- { -- tree __cpu_features2_var = make_var_decl (unsigned_type_node, -- "__cpu_features2"); -- -- varpool_node::add (__cpu_features2_var); -- field_val = (1U << (isa_names_table[i].feature - 32)); -- /* Return __cpu_features2 & field_val */ -- final = build2 (BIT_AND_EXPR, unsigned_type_node, -- __cpu_features2_var, -- build_int_cstu (unsigned_type_node, field_val)); -- return build1 (CONVERT_EXPR, integer_type_node, final); -- } -- -- field = TYPE_FIELDS (__processor_model_type); -- /* Get the last field, which is __cpu_features. */ -- while (DECL_CHAIN (field)) -- field = DECL_CHAIN (field); -- -- /* Get the appropriate field: __cpu_model.__cpu_features */ -- ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var, -- field, NULL_TREE); -- -- /* Access the 0th element of __cpu_features array. */ -- array_elt = build4 (ARRAY_REF, unsigned_type_node, ref, -- integer_zero_node, NULL_TREE, NULL_TREE); -- -- field_val = (1U << isa_names_table[i].feature); -- /* Return __cpu_model.__cpu_features[0] & field_val */ -- final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt, -- build_int_cstu (unsigned_type_node, field_val)); -- return build1 (CONVERT_EXPR, integer_type_node, final); -- } -- gcc_unreachable (); --} -- --/* Return the shift count of a vector by scalar shift builtin second argument -- ARG1. */ --static tree --ix86_vector_shift_count (tree arg1) --{ -- if (tree_fits_uhwi_p (arg1)) -- return arg1; -- else if (TREE_CODE (arg1) == VECTOR_CST && CHAR_BIT == 8) -- { -- /* The count argument is weird, passed in as various 128-bit -- (or 64-bit) vectors, the low 64 bits from it are the count. */ -- unsigned char buf[16]; -- int len = native_encode_expr (arg1, buf, 16); -- if (len == 0) -- return NULL_TREE; -- tree t = native_interpret_expr (uint64_type_node, buf, len); -- if (t && tree_fits_uhwi_p (t)) -- return t; -- } -- return NULL_TREE; --} -- --static tree --ix86_fold_builtin (tree fndecl, int n_args, -- tree *args, bool ignore ATTRIBUTE_UNUSED) --{ -- if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD) -- { -- enum ix86_builtins fn_code = (enum ix86_builtins) -- DECL_FUNCTION_CODE (fndecl); -- enum rtx_code rcode; -- bool is_vshift; -- unsigned HOST_WIDE_INT mask; -- -- switch (fn_code) -- { -- case IX86_BUILTIN_CPU_IS: -- case IX86_BUILTIN_CPU_SUPPORTS: -- gcc_assert (n_args == 1); -- return fold_builtin_cpu (fndecl, args); -- -- case IX86_BUILTIN_NANQ: -- case IX86_BUILTIN_NANSQ: -- { -- tree type = TREE_TYPE (TREE_TYPE (fndecl)); -- const char *str = c_getstr (*args); -- int quiet = fn_code == IX86_BUILTIN_NANQ; -- REAL_VALUE_TYPE real; -- -- if (str && real_nan (&real, str, quiet, TYPE_MODE (type))) -- return build_real (type, real); -- return NULL_TREE; -- } -- -- case IX86_BUILTIN_INFQ: -- case IX86_BUILTIN_HUGE_VALQ: -- { -- tree type = TREE_TYPE (TREE_TYPE (fndecl)); -- REAL_VALUE_TYPE inf; -- real_inf (&inf); -- return build_real (type, inf); -- } -- -- case IX86_BUILTIN_TZCNT16: -- case IX86_BUILTIN_CTZS: -- case IX86_BUILTIN_TZCNT32: -- case IX86_BUILTIN_TZCNT64: -- gcc_assert (n_args == 1); -- if (TREE_CODE (args[0]) == INTEGER_CST) -- { -- tree type = TREE_TYPE (TREE_TYPE (fndecl)); -- tree arg = args[0]; -- if (fn_code == IX86_BUILTIN_TZCNT16 -- || fn_code == IX86_BUILTIN_CTZS) -- arg = fold_convert (short_unsigned_type_node, arg); -- if (integer_zerop (arg)) -- return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg))); -- else -- return fold_const_call (CFN_CTZ, type, arg); -- } -- break; -- -- case IX86_BUILTIN_LZCNT16: -- case IX86_BUILTIN_CLZS: -- case IX86_BUILTIN_LZCNT32: -- case IX86_BUILTIN_LZCNT64: -- gcc_assert (n_args == 1); -- if (TREE_CODE (args[0]) == INTEGER_CST) -- { -- tree type = TREE_TYPE (TREE_TYPE (fndecl)); -- tree arg = args[0]; -- if (fn_code == IX86_BUILTIN_LZCNT16 -- || fn_code == IX86_BUILTIN_CLZS) -- arg = fold_convert (short_unsigned_type_node, arg); -- if (integer_zerop (arg)) -- return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg))); -- else -- return fold_const_call (CFN_CLZ, type, arg); -- } -- break; -- -- case IX86_BUILTIN_BEXTR32: -- case IX86_BUILTIN_BEXTR64: -- case IX86_BUILTIN_BEXTRI32: -- case IX86_BUILTIN_BEXTRI64: -- gcc_assert (n_args == 2); -- if (tree_fits_uhwi_p (args[1])) -- { -- unsigned HOST_WIDE_INT res = 0; -- unsigned int prec = TYPE_PRECISION (TREE_TYPE (args[0])); -- unsigned int start = tree_to_uhwi (args[1]); -- unsigned int len = (start & 0xff00) >> 8; -- start &= 0xff; -- if (start >= prec || len == 0) -- res = 0; -- else if (!tree_fits_uhwi_p (args[0])) -- break; -- else -- res = tree_to_uhwi (args[0]) >> start; -- if (len > prec) -- len = prec; -- if (len < HOST_BITS_PER_WIDE_INT) -- res &= (HOST_WIDE_INT_1U << len) - 1; -- return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res); -- } -- break; -- -- case IX86_BUILTIN_BZHI32: -- case IX86_BUILTIN_BZHI64: -- gcc_assert (n_args == 2); -- if (tree_fits_uhwi_p (args[1])) -- { -- unsigned int idx = tree_to_uhwi (args[1]) & 0xff; -- if (idx >= TYPE_PRECISION (TREE_TYPE (args[0]))) -- return args[0]; -- if (idx == 0) -- return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), 0); -- if (!tree_fits_uhwi_p (args[0])) -- break; -- unsigned HOST_WIDE_INT res = tree_to_uhwi (args[0]); -- res &= ~(HOST_WIDE_INT_M1U << idx); -- return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res); -- } -- break; -- -- case IX86_BUILTIN_PDEP32: -- case IX86_BUILTIN_PDEP64: -- gcc_assert (n_args == 2); -- if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1])) -- { -- unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]); -- unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]); -- unsigned HOST_WIDE_INT res = 0; -- unsigned HOST_WIDE_INT m, k = 1; -- for (m = 1; m; m <<= 1) -- if ((mask & m) != 0) -- { -- if ((src & k) != 0) -- res |= m; -- k <<= 1; -- } -- return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res); -- } -- break; -- -- case IX86_BUILTIN_PEXT32: -- case IX86_BUILTIN_PEXT64: -- gcc_assert (n_args == 2); -- if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1])) -- { -- unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]); -- unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]); -- unsigned HOST_WIDE_INT res = 0; -- unsigned HOST_WIDE_INT m, k = 1; -- for (m = 1; m; m <<= 1) -- if ((mask & m) != 0) -- { -- if ((src & m) != 0) -- res |= k; -- k <<= 1; -- } -- return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res); -- } -- break; -- -- case IX86_BUILTIN_MOVMSKPS: -- case IX86_BUILTIN_PMOVMSKB: -- case IX86_BUILTIN_MOVMSKPD: -- case IX86_BUILTIN_PMOVMSKB128: -- case IX86_BUILTIN_MOVMSKPD256: -- case IX86_BUILTIN_MOVMSKPS256: -- case IX86_BUILTIN_PMOVMSKB256: -- gcc_assert (n_args == 1); -- if (TREE_CODE (args[0]) == VECTOR_CST) -- { -- HOST_WIDE_INT res = 0; -- for (unsigned i = 0; i < VECTOR_CST_NELTS (args[0]); ++i) -- { -- tree e = VECTOR_CST_ELT (args[0], i); -- if (TREE_CODE (e) == INTEGER_CST && !TREE_OVERFLOW (e)) -- { -- if (wi::neg_p (wi::to_wide (e))) -- res |= HOST_WIDE_INT_1 << i; -- } -- else if (TREE_CODE (e) == REAL_CST && !TREE_OVERFLOW (e)) -- { -- if (TREE_REAL_CST (e).sign) -- res |= HOST_WIDE_INT_1 << i; -- } -- else -- return NULL_TREE; -- } -- return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), res); -- } -- break; -- -- case IX86_BUILTIN_PSLLD: -- case IX86_BUILTIN_PSLLD128: -- case IX86_BUILTIN_PSLLD128_MASK: -- case IX86_BUILTIN_PSLLD256: -- case IX86_BUILTIN_PSLLD256_MASK: -- case IX86_BUILTIN_PSLLD512: -- case IX86_BUILTIN_PSLLDI: -- case IX86_BUILTIN_PSLLDI128: -- case IX86_BUILTIN_PSLLDI128_MASK: -- case IX86_BUILTIN_PSLLDI256: -- case IX86_BUILTIN_PSLLDI256_MASK: -- case IX86_BUILTIN_PSLLDI512: -- case IX86_BUILTIN_PSLLQ: -- case IX86_BUILTIN_PSLLQ128: -- case IX86_BUILTIN_PSLLQ128_MASK: -- case IX86_BUILTIN_PSLLQ256: -- case IX86_BUILTIN_PSLLQ256_MASK: -- case IX86_BUILTIN_PSLLQ512: -- case IX86_BUILTIN_PSLLQI: -- case IX86_BUILTIN_PSLLQI128: -- case IX86_BUILTIN_PSLLQI128_MASK: -- case IX86_BUILTIN_PSLLQI256: -- case IX86_BUILTIN_PSLLQI256_MASK: -- case IX86_BUILTIN_PSLLQI512: -- case IX86_BUILTIN_PSLLW: -- case IX86_BUILTIN_PSLLW128: -- case IX86_BUILTIN_PSLLW128_MASK: -- case IX86_BUILTIN_PSLLW256: -- case IX86_BUILTIN_PSLLW256_MASK: -- case IX86_BUILTIN_PSLLW512_MASK: -- case IX86_BUILTIN_PSLLWI: -- case IX86_BUILTIN_PSLLWI128: -- case IX86_BUILTIN_PSLLWI128_MASK: -- case IX86_BUILTIN_PSLLWI256: -- case IX86_BUILTIN_PSLLWI256_MASK: -- case IX86_BUILTIN_PSLLWI512_MASK: -- rcode = ASHIFT; -- is_vshift = false; -- goto do_shift; -- case IX86_BUILTIN_PSRAD: -- case IX86_BUILTIN_PSRAD128: -- case IX86_BUILTIN_PSRAD128_MASK: -- case IX86_BUILTIN_PSRAD256: -- case IX86_BUILTIN_PSRAD256_MASK: -- case IX86_BUILTIN_PSRAD512: -- case IX86_BUILTIN_PSRADI: -- case IX86_BUILTIN_PSRADI128: -- case IX86_BUILTIN_PSRADI128_MASK: -- case IX86_BUILTIN_PSRADI256: -- case IX86_BUILTIN_PSRADI256_MASK: -- case IX86_BUILTIN_PSRADI512: -- case IX86_BUILTIN_PSRAQ128_MASK: -- case IX86_BUILTIN_PSRAQ256_MASK: -- case IX86_BUILTIN_PSRAQ512: -- case IX86_BUILTIN_PSRAQI128_MASK: -- case IX86_BUILTIN_PSRAQI256_MASK: -- case IX86_BUILTIN_PSRAQI512: -- case IX86_BUILTIN_PSRAW: -- case IX86_BUILTIN_PSRAW128: -- case IX86_BUILTIN_PSRAW128_MASK: -- case IX86_BUILTIN_PSRAW256: -- case IX86_BUILTIN_PSRAW256_MASK: -- case IX86_BUILTIN_PSRAW512: -- case IX86_BUILTIN_PSRAWI: -- case IX86_BUILTIN_PSRAWI128: -- case IX86_BUILTIN_PSRAWI128_MASK: -- case IX86_BUILTIN_PSRAWI256: -- case IX86_BUILTIN_PSRAWI256_MASK: -- case IX86_BUILTIN_PSRAWI512: -- rcode = ASHIFTRT; -- is_vshift = false; -- goto do_shift; -- case IX86_BUILTIN_PSRLD: -- case IX86_BUILTIN_PSRLD128: -- case IX86_BUILTIN_PSRLD128_MASK: -- case IX86_BUILTIN_PSRLD256: -- case IX86_BUILTIN_PSRLD256_MASK: -- case IX86_BUILTIN_PSRLD512: -- case IX86_BUILTIN_PSRLDI: -- case IX86_BUILTIN_PSRLDI128: -- case IX86_BUILTIN_PSRLDI128_MASK: -- case IX86_BUILTIN_PSRLDI256: -- case IX86_BUILTIN_PSRLDI256_MASK: -- case IX86_BUILTIN_PSRLDI512: -- case IX86_BUILTIN_PSRLQ: -- case IX86_BUILTIN_PSRLQ128: -- case IX86_BUILTIN_PSRLQ128_MASK: -- case IX86_BUILTIN_PSRLQ256: -- case IX86_BUILTIN_PSRLQ256_MASK: -- case IX86_BUILTIN_PSRLQ512: -- case IX86_BUILTIN_PSRLQI: -- case IX86_BUILTIN_PSRLQI128: -- case IX86_BUILTIN_PSRLQI128_MASK: -- case IX86_BUILTIN_PSRLQI256: -- case IX86_BUILTIN_PSRLQI256_MASK: -- case IX86_BUILTIN_PSRLQI512: -- case IX86_BUILTIN_PSRLW: -- case IX86_BUILTIN_PSRLW128: -- case IX86_BUILTIN_PSRLW128_MASK: -- case IX86_BUILTIN_PSRLW256: -- case IX86_BUILTIN_PSRLW256_MASK: -- case IX86_BUILTIN_PSRLW512: -- case IX86_BUILTIN_PSRLWI: -- case IX86_BUILTIN_PSRLWI128: -- case IX86_BUILTIN_PSRLWI128_MASK: -- case IX86_BUILTIN_PSRLWI256: -- case IX86_BUILTIN_PSRLWI256_MASK: -- case IX86_BUILTIN_PSRLWI512: -- rcode = LSHIFTRT; -- is_vshift = false; -- goto do_shift; -- case IX86_BUILTIN_PSLLVV16HI: -- case IX86_BUILTIN_PSLLVV16SI: -- case IX86_BUILTIN_PSLLVV2DI: -- case IX86_BUILTIN_PSLLVV2DI_MASK: -- case IX86_BUILTIN_PSLLVV32HI: -- case IX86_BUILTIN_PSLLVV4DI: -- case IX86_BUILTIN_PSLLVV4DI_MASK: -- case IX86_BUILTIN_PSLLVV4SI: -- case IX86_BUILTIN_PSLLVV4SI_MASK: -- case IX86_BUILTIN_PSLLVV8DI: -- case IX86_BUILTIN_PSLLVV8HI: -- case IX86_BUILTIN_PSLLVV8SI: -- case IX86_BUILTIN_PSLLVV8SI_MASK: -- rcode = ASHIFT; -- is_vshift = true; -- goto do_shift; -- case IX86_BUILTIN_PSRAVQ128: -- case IX86_BUILTIN_PSRAVQ256: -- case IX86_BUILTIN_PSRAVV16HI: -- case IX86_BUILTIN_PSRAVV16SI: -- case IX86_BUILTIN_PSRAVV32HI: -- case IX86_BUILTIN_PSRAVV4SI: -- case IX86_BUILTIN_PSRAVV4SI_MASK: -- case IX86_BUILTIN_PSRAVV8DI: -- case IX86_BUILTIN_PSRAVV8HI: -- case IX86_BUILTIN_PSRAVV8SI: -- case IX86_BUILTIN_PSRAVV8SI_MASK: -- rcode = ASHIFTRT; -- is_vshift = true; -- goto do_shift; -- case IX86_BUILTIN_PSRLVV16HI: -- case IX86_BUILTIN_PSRLVV16SI: -- case IX86_BUILTIN_PSRLVV2DI: -- case IX86_BUILTIN_PSRLVV2DI_MASK: -- case IX86_BUILTIN_PSRLVV32HI: -- case IX86_BUILTIN_PSRLVV4DI: -- case IX86_BUILTIN_PSRLVV4DI_MASK: -- case IX86_BUILTIN_PSRLVV4SI: -- case IX86_BUILTIN_PSRLVV4SI_MASK: -- case IX86_BUILTIN_PSRLVV8DI: -- case IX86_BUILTIN_PSRLVV8HI: -- case IX86_BUILTIN_PSRLVV8SI: -- case IX86_BUILTIN_PSRLVV8SI_MASK: -- rcode = LSHIFTRT; -- is_vshift = true; -- goto do_shift; -- -- do_shift: -- gcc_assert (n_args >= 2); -- if (TREE_CODE (args[0]) != VECTOR_CST) -- break; -- mask = HOST_WIDE_INT_M1U; -- if (n_args > 2) -- { -- /* This is masked shift. */ -- if (!tree_fits_uhwi_p (args[n_args - 1]) -- || TREE_SIDE_EFFECTS (args[n_args - 2])) -- break; -- mask = tree_to_uhwi (args[n_args - 1]); -- unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])); -- mask |= HOST_WIDE_INT_M1U << elems; -- if (mask != HOST_WIDE_INT_M1U -- && TREE_CODE (args[n_args - 2]) != VECTOR_CST) -- break; -- if (mask == (HOST_WIDE_INT_M1U << elems)) -- return args[n_args - 2]; -- } -- if (is_vshift && TREE_CODE (args[1]) != VECTOR_CST) -- break; -- if (tree tem = (is_vshift ? integer_one_node -- : ix86_vector_shift_count (args[1]))) -- { -- unsigned HOST_WIDE_INT count = tree_to_uhwi (tem); -- unsigned HOST_WIDE_INT prec -- = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args[0]))); -- if (count == 0 && mask == HOST_WIDE_INT_M1U) -- return args[0]; -- if (count >= prec) -- { -- if (rcode == ASHIFTRT) -- count = prec - 1; -- else if (mask == HOST_WIDE_INT_M1U) -- return build_zero_cst (TREE_TYPE (args[0])); -- } -- tree countt = NULL_TREE; -- if (!is_vshift) -- { -- if (count >= prec) -- countt = integer_zero_node; -- else -- countt = build_int_cst (integer_type_node, count); -- } -- tree_vector_builder builder; -- if (mask != HOST_WIDE_INT_M1U || is_vshift) -- builder.new_vector (TREE_TYPE (args[0]), -- TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])), -- 1); -- else -- builder.new_unary_operation (TREE_TYPE (args[0]), args[0], -- false); -- unsigned int cnt = builder.encoded_nelts (); -- for (unsigned int i = 0; i < cnt; ++i) -- { -- tree elt = VECTOR_CST_ELT (args[0], i); -- if (TREE_CODE (elt) != INTEGER_CST || TREE_OVERFLOW (elt)) -- return NULL_TREE; -- tree type = TREE_TYPE (elt); -- if (rcode == LSHIFTRT) -- elt = fold_convert (unsigned_type_for (type), elt); -- if (is_vshift) -- { -- countt = VECTOR_CST_ELT (args[1], i); -- if (TREE_CODE (countt) != INTEGER_CST -- || TREE_OVERFLOW (countt)) -- return NULL_TREE; -- if (wi::neg_p (wi::to_wide (countt)) -- || wi::to_widest (countt) >= prec) -- { -- if (rcode == ASHIFTRT) -- countt = build_int_cst (TREE_TYPE (countt), -- prec - 1); -- else -- { -- elt = build_zero_cst (TREE_TYPE (elt)); -- countt = build_zero_cst (TREE_TYPE (countt)); -- } -- } -- } -- else if (count >= prec) -- elt = build_zero_cst (TREE_TYPE (elt)); -- elt = const_binop (rcode == ASHIFT -- ? LSHIFT_EXPR : RSHIFT_EXPR, -- TREE_TYPE (elt), elt, countt); -- if (!elt || TREE_CODE (elt) != INTEGER_CST) -- return NULL_TREE; -- if (rcode == LSHIFTRT) -- elt = fold_convert (type, elt); -- if ((mask & (HOST_WIDE_INT_1U << i)) == 0) -- { -- elt = VECTOR_CST_ELT (args[n_args - 2], i); -- if (TREE_CODE (elt) != INTEGER_CST -- || TREE_OVERFLOW (elt)) -- return NULL_TREE; -- } -- builder.quick_push (elt); -- } -- return builder.build (); -- } -- break; -- -- default: -- break; -- } -- } -- --#ifdef SUBTARGET_FOLD_BUILTIN -- return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore); --#endif -- -- return NULL_TREE; --} -- --/* Fold a MD builtin (use ix86_fold_builtin for folding into -- constant) in GIMPLE. */ -- --bool --ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) --{ -- gimple *stmt = gsi_stmt (*gsi); -- tree fndecl = gimple_call_fndecl (stmt); -- gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD)); -- int n_args = gimple_call_num_args (stmt); -- enum ix86_builtins fn_code = (enum ix86_builtins) DECL_FUNCTION_CODE (fndecl); -- tree decl = NULL_TREE; -- tree arg0, arg1; -- enum rtx_code rcode; -- unsigned HOST_WIDE_INT count; -- bool is_vshift; -- -- switch (fn_code) -- { -- case IX86_BUILTIN_TZCNT32: -- decl = builtin_decl_implicit (BUILT_IN_CTZ); -- goto fold_tzcnt_lzcnt; -- -- case IX86_BUILTIN_TZCNT64: -- decl = builtin_decl_implicit (BUILT_IN_CTZLL); -- goto fold_tzcnt_lzcnt; -- -- case IX86_BUILTIN_LZCNT32: -- decl = builtin_decl_implicit (BUILT_IN_CLZ); -- goto fold_tzcnt_lzcnt; -- -- case IX86_BUILTIN_LZCNT64: -- decl = builtin_decl_implicit (BUILT_IN_CLZLL); -- goto fold_tzcnt_lzcnt; -- -- fold_tzcnt_lzcnt: -- gcc_assert (n_args == 1); -- arg0 = gimple_call_arg (stmt, 0); -- if (TREE_CODE (arg0) == SSA_NAME && decl && gimple_call_lhs (stmt)) -- { -- int prec = TYPE_PRECISION (TREE_TYPE (arg0)); -- /* If arg0 is provably non-zero, optimize into generic -- __builtin_c[tl]z{,ll} function the middle-end handles -- better. */ -- if (!expr_not_equal_to (arg0, wi::zero (prec))) -- return false; -- -- location_t loc = gimple_location (stmt); -- gimple *g = gimple_build_call (decl, 1, arg0); -- gimple_set_location (g, loc); -- tree lhs = make_ssa_name (integer_type_node); -- gimple_call_set_lhs (g, lhs); -- gsi_insert_before (gsi, g, GSI_SAME_STMT); -- g = gimple_build_assign (gimple_call_lhs (stmt), NOP_EXPR, lhs); -- gimple_set_location (g, loc); -- gsi_replace (gsi, g, false); -- return true; -- } -- break; -- -- case IX86_BUILTIN_BZHI32: -- case IX86_BUILTIN_BZHI64: -- gcc_assert (n_args == 2); -- arg1 = gimple_call_arg (stmt, 1); -- if (tree_fits_uhwi_p (arg1) && gimple_call_lhs (stmt)) -- { -- unsigned int idx = tree_to_uhwi (arg1) & 0xff; -- arg0 = gimple_call_arg (stmt, 0); -- if (idx < TYPE_PRECISION (TREE_TYPE (arg0))) -- break; -- location_t loc = gimple_location (stmt); -- gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0); -- gimple_set_location (g, loc); -- gsi_replace (gsi, g, false); -- return true; -- } -- break; -- -- case IX86_BUILTIN_PDEP32: -- case IX86_BUILTIN_PDEP64: -- case IX86_BUILTIN_PEXT32: -- case IX86_BUILTIN_PEXT64: -- gcc_assert (n_args == 2); -- arg1 = gimple_call_arg (stmt, 1); -- if (integer_all_onesp (arg1) && gimple_call_lhs (stmt)) -- { -- location_t loc = gimple_location (stmt); -- arg0 = gimple_call_arg (stmt, 0); -- gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0); -- gimple_set_location (g, loc); -- gsi_replace (gsi, g, false); -- return true; -- } -- break; -- -- case IX86_BUILTIN_PSLLD: -- case IX86_BUILTIN_PSLLD128: -- case IX86_BUILTIN_PSLLD128_MASK: -- case IX86_BUILTIN_PSLLD256: -- case IX86_BUILTIN_PSLLD256_MASK: -- case IX86_BUILTIN_PSLLD512: -- case IX86_BUILTIN_PSLLDI: -- case IX86_BUILTIN_PSLLDI128: -- case IX86_BUILTIN_PSLLDI128_MASK: -- case IX86_BUILTIN_PSLLDI256: -- case IX86_BUILTIN_PSLLDI256_MASK: -- case IX86_BUILTIN_PSLLDI512: -- case IX86_BUILTIN_PSLLQ: -- case IX86_BUILTIN_PSLLQ128: -- case IX86_BUILTIN_PSLLQ128_MASK: -- case IX86_BUILTIN_PSLLQ256: -- case IX86_BUILTIN_PSLLQ256_MASK: -- case IX86_BUILTIN_PSLLQ512: -- case IX86_BUILTIN_PSLLQI: -- case IX86_BUILTIN_PSLLQI128: -- case IX86_BUILTIN_PSLLQI128_MASK: -- case IX86_BUILTIN_PSLLQI256: -- case IX86_BUILTIN_PSLLQI256_MASK: -- case IX86_BUILTIN_PSLLQI512: -- case IX86_BUILTIN_PSLLW: -- case IX86_BUILTIN_PSLLW128: -- case IX86_BUILTIN_PSLLW128_MASK: -- case IX86_BUILTIN_PSLLW256: -- case IX86_BUILTIN_PSLLW256_MASK: -- case IX86_BUILTIN_PSLLW512_MASK: -- case IX86_BUILTIN_PSLLWI: -- case IX86_BUILTIN_PSLLWI128: -- case IX86_BUILTIN_PSLLWI128_MASK: -- case IX86_BUILTIN_PSLLWI256: -- case IX86_BUILTIN_PSLLWI256_MASK: -- case IX86_BUILTIN_PSLLWI512_MASK: -- rcode = ASHIFT; -- is_vshift = false; -- goto do_shift; -- case IX86_BUILTIN_PSRAD: -- case IX86_BUILTIN_PSRAD128: -- case IX86_BUILTIN_PSRAD128_MASK: -- case IX86_BUILTIN_PSRAD256: -- case IX86_BUILTIN_PSRAD256_MASK: -- case IX86_BUILTIN_PSRAD512: -- case IX86_BUILTIN_PSRADI: -- case IX86_BUILTIN_PSRADI128: -- case IX86_BUILTIN_PSRADI128_MASK: -- case IX86_BUILTIN_PSRADI256: -- case IX86_BUILTIN_PSRADI256_MASK: -- case IX86_BUILTIN_PSRADI512: -- case IX86_BUILTIN_PSRAQ128_MASK: -- case IX86_BUILTIN_PSRAQ256_MASK: -- case IX86_BUILTIN_PSRAQ512: -- case IX86_BUILTIN_PSRAQI128_MASK: -- case IX86_BUILTIN_PSRAQI256_MASK: -- case IX86_BUILTIN_PSRAQI512: -- case IX86_BUILTIN_PSRAW: -- case IX86_BUILTIN_PSRAW128: -- case IX86_BUILTIN_PSRAW128_MASK: -- case IX86_BUILTIN_PSRAW256: -- case IX86_BUILTIN_PSRAW256_MASK: -- case IX86_BUILTIN_PSRAW512: -- case IX86_BUILTIN_PSRAWI: -- case IX86_BUILTIN_PSRAWI128: -- case IX86_BUILTIN_PSRAWI128_MASK: -- case IX86_BUILTIN_PSRAWI256: -- case IX86_BUILTIN_PSRAWI256_MASK: -- case IX86_BUILTIN_PSRAWI512: -- rcode = ASHIFTRT; -- is_vshift = false; -- goto do_shift; -- case IX86_BUILTIN_PSRLD: -- case IX86_BUILTIN_PSRLD128: -- case IX86_BUILTIN_PSRLD128_MASK: -- case IX86_BUILTIN_PSRLD256: -- case IX86_BUILTIN_PSRLD256_MASK: -- case IX86_BUILTIN_PSRLD512: -- case IX86_BUILTIN_PSRLDI: -- case IX86_BUILTIN_PSRLDI128: -- case IX86_BUILTIN_PSRLDI128_MASK: -- case IX86_BUILTIN_PSRLDI256: -- case IX86_BUILTIN_PSRLDI256_MASK: -- case IX86_BUILTIN_PSRLDI512: -- case IX86_BUILTIN_PSRLQ: -- case IX86_BUILTIN_PSRLQ128: -- case IX86_BUILTIN_PSRLQ128_MASK: -- case IX86_BUILTIN_PSRLQ256: -- case IX86_BUILTIN_PSRLQ256_MASK: -- case IX86_BUILTIN_PSRLQ512: -- case IX86_BUILTIN_PSRLQI: -- case IX86_BUILTIN_PSRLQI128: -- case IX86_BUILTIN_PSRLQI128_MASK: -- case IX86_BUILTIN_PSRLQI256: -- case IX86_BUILTIN_PSRLQI256_MASK: -- case IX86_BUILTIN_PSRLQI512: -- case IX86_BUILTIN_PSRLW: -- case IX86_BUILTIN_PSRLW128: -- case IX86_BUILTIN_PSRLW128_MASK: -- case IX86_BUILTIN_PSRLW256: -- case IX86_BUILTIN_PSRLW256_MASK: -- case IX86_BUILTIN_PSRLW512: -- case IX86_BUILTIN_PSRLWI: -- case IX86_BUILTIN_PSRLWI128: -- case IX86_BUILTIN_PSRLWI128_MASK: -- case IX86_BUILTIN_PSRLWI256: -- case IX86_BUILTIN_PSRLWI256_MASK: -- case IX86_BUILTIN_PSRLWI512: -- rcode = LSHIFTRT; -- is_vshift = false; -- goto do_shift; -- case IX86_BUILTIN_PSLLVV16HI: -- case IX86_BUILTIN_PSLLVV16SI: -- case IX86_BUILTIN_PSLLVV2DI: -- case IX86_BUILTIN_PSLLVV2DI_MASK: -- case IX86_BUILTIN_PSLLVV32HI: -- case IX86_BUILTIN_PSLLVV4DI: -- case IX86_BUILTIN_PSLLVV4DI_MASK: -- case IX86_BUILTIN_PSLLVV4SI: -- case IX86_BUILTIN_PSLLVV4SI_MASK: -- case IX86_BUILTIN_PSLLVV8DI: -- case IX86_BUILTIN_PSLLVV8HI: -- case IX86_BUILTIN_PSLLVV8SI: -- case IX86_BUILTIN_PSLLVV8SI_MASK: -- rcode = ASHIFT; -- is_vshift = true; -- goto do_shift; -- case IX86_BUILTIN_PSRAVQ128: -- case IX86_BUILTIN_PSRAVQ256: -- case IX86_BUILTIN_PSRAVV16HI: -- case IX86_BUILTIN_PSRAVV16SI: -- case IX86_BUILTIN_PSRAVV32HI: -- case IX86_BUILTIN_PSRAVV4SI: -- case IX86_BUILTIN_PSRAVV4SI_MASK: -- case IX86_BUILTIN_PSRAVV8DI: -- case IX86_BUILTIN_PSRAVV8HI: -- case IX86_BUILTIN_PSRAVV8SI: -- case IX86_BUILTIN_PSRAVV8SI_MASK: -- rcode = ASHIFTRT; -- is_vshift = true; -- goto do_shift; -- case IX86_BUILTIN_PSRLVV16HI: -- case IX86_BUILTIN_PSRLVV16SI: -- case IX86_BUILTIN_PSRLVV2DI: -- case IX86_BUILTIN_PSRLVV2DI_MASK: -- case IX86_BUILTIN_PSRLVV32HI: -- case IX86_BUILTIN_PSRLVV4DI: -- case IX86_BUILTIN_PSRLVV4DI_MASK: -- case IX86_BUILTIN_PSRLVV4SI: -- case IX86_BUILTIN_PSRLVV4SI_MASK: -- case IX86_BUILTIN_PSRLVV8DI: -- case IX86_BUILTIN_PSRLVV8HI: -- case IX86_BUILTIN_PSRLVV8SI: -- case IX86_BUILTIN_PSRLVV8SI_MASK: -- rcode = LSHIFTRT; -- is_vshift = true; -- goto do_shift; -- -- do_shift: -- gcc_assert (n_args >= 2); -- arg0 = gimple_call_arg (stmt, 0); -- arg1 = gimple_call_arg (stmt, 1); -- if (n_args > 2) -- { -- /* This is masked shift. Only optimize if the mask is all ones. */ -- tree argl = gimple_call_arg (stmt, n_args - 1); -- if (!tree_fits_uhwi_p (argl)) -- break; -- unsigned HOST_WIDE_INT mask = tree_to_uhwi (argl); -- unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); -- if ((mask | (HOST_WIDE_INT_M1U << elems)) != HOST_WIDE_INT_M1U) -- break; -- } -- if (is_vshift) -- { -- if (TREE_CODE (arg1) != VECTOR_CST) -- break; -- count = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0))); -- if (integer_zerop (arg1)) -- count = 0; -- else if (rcode == ASHIFTRT) -- break; -- else -- for (unsigned int i = 0; i < VECTOR_CST_NELTS (arg1); ++i) -- { -- tree elt = VECTOR_CST_ELT (arg1, i); -- if (!wi::neg_p (wi::to_wide (elt)) -- && wi::to_widest (elt) < count) -- return false; -- } -- } -- else -- { -- arg1 = ix86_vector_shift_count (arg1); -- if (!arg1) -- break; -- count = tree_to_uhwi (arg1); -- } -- if (count == 0) -- { -- /* Just return the first argument for shift by 0. */ -- location_t loc = gimple_location (stmt); -- gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0); -- gimple_set_location (g, loc); -- gsi_replace (gsi, g, false); -- return true; -- } -- if (rcode != ASHIFTRT -- && count >= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0)))) -- { -- /* For shift counts equal or greater than precision, except for -- arithmetic right shift the result is zero. */ -- location_t loc = gimple_location (stmt); -- gimple *g = gimple_build_assign (gimple_call_lhs (stmt), -- build_zero_cst (TREE_TYPE (arg0))); -- gimple_set_location (g, loc); -- gsi_replace (gsi, g, false); -- return true; -- } -- break; -- -- default: -- break; -- } -- -- return false; --} -- --/* Make builtins to detect cpu type and features supported. NAME is -- the builtin name, CODE is the builtin code, and FTYPE is the function -- type of the builtin. */ -- --static void --make_cpu_type_builtin (const char* name, int code, -- enum ix86_builtin_func_type ftype, bool is_const) --{ -- tree decl; -- tree type; -- -- type = ix86_get_builtin_func_type (ftype); -- decl = add_builtin_function (name, type, code, BUILT_IN_MD, -- NULL, NULL_TREE); -- gcc_assert (decl != NULL_TREE); -- ix86_builtins[(int) code] = decl; -- TREE_READONLY (decl) = is_const; --} -- --/* Make builtins to get CPU type and features supported. The created -- builtins are : -- -- __builtin_cpu_init (), to detect cpu type and features, -- __builtin_cpu_is (""), to check if cpu is of type , -- __builtin_cpu_supports (""), to check if cpu supports -- */ -- --static void --ix86_init_platform_type_builtins (void) --{ -- make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT, -- INT_FTYPE_VOID, false); -- make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS, -- INT_FTYPE_PCCHAR, true); -- make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS, -- INT_FTYPE_PCCHAR, true); --} -- --/* Internal method for ix86_init_builtins. */ -- --static void --ix86_init_builtins_va_builtins_abi (void) --{ -- tree ms_va_ref, sysv_va_ref; -- tree fnvoid_va_end_ms, fnvoid_va_end_sysv; -- tree fnvoid_va_start_ms, fnvoid_va_start_sysv; -- tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv; -- tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE; -- -- if (!TARGET_64BIT) -- return; -- fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE); -- fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE); -- ms_va_ref = build_reference_type (ms_va_list_type_node); -- sysv_va_ref = build_pointer_type (TREE_TYPE (sysv_va_list_type_node)); -- -- fnvoid_va_end_ms = build_function_type_list (void_type_node, ms_va_ref, -- NULL_TREE); -- fnvoid_va_start_ms -- = build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE); -- fnvoid_va_end_sysv -- = build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE); -- fnvoid_va_start_sysv -- = build_varargs_function_type_list (void_type_node, sysv_va_ref, -- NULL_TREE); -- fnvoid_va_copy_ms -- = build_function_type_list (void_type_node, ms_va_ref, -- ms_va_list_type_node, NULL_TREE); -- fnvoid_va_copy_sysv -- = build_function_type_list (void_type_node, sysv_va_ref, -- sysv_va_ref, NULL_TREE); -- -- add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms, -- BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms); -- add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms, -- BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms); -- add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms, -- BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms); -- add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv, -- BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv); -- add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv, -- BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv); -- add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv, -- BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv); --} -- --static void --ix86_init_builtin_types (void) --{ -- tree float80_type_node, const_string_type_node; -- -- /* The __float80 type. */ -- float80_type_node = long_double_type_node; -- if (TYPE_MODE (float80_type_node) != XFmode) -- { -- if (float64x_type_node != NULL_TREE -- && TYPE_MODE (float64x_type_node) == XFmode) -- float80_type_node = float64x_type_node; -- else -- { -- /* The __float80 type. */ -- float80_type_node = make_node (REAL_TYPE); -- -- TYPE_PRECISION (float80_type_node) = 80; -- layout_type (float80_type_node); -- } -- } -- lang_hooks.types.register_builtin_type (float80_type_node, "__float80"); -- -- /* The __float128 type. The node has already been created as -- _Float128, so we only need to register the __float128 name for -- it. */ -- lang_hooks.types.register_builtin_type (float128_type_node, "__float128"); -- -- const_string_type_node -- = build_pointer_type (build_qualified_type -- (char_type_node, TYPE_QUAL_CONST)); -- -- /* This macro is built by i386-builtin-types.awk. */ -- DEFINE_BUILTIN_PRIMITIVE_TYPES; --} -- --static void --ix86_init_builtins (void) --{ -- tree ftype, decl; -- -- ix86_init_builtin_types (); -- -- /* Builtins to get CPU type and features. */ -- ix86_init_platform_type_builtins (); -- -- /* TFmode support builtins. */ -- def_builtin_const (0, 0, "__builtin_infq", -- FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ); -- def_builtin_const (0, 0, "__builtin_huge_valq", -- FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ); -- -- ftype = ix86_get_builtin_func_type (FLOAT128_FTYPE_CONST_STRING); -- decl = add_builtin_function ("__builtin_nanq", ftype, IX86_BUILTIN_NANQ, -- BUILT_IN_MD, "nanq", NULL_TREE); -- TREE_READONLY (decl) = 1; -- ix86_builtins[(int) IX86_BUILTIN_NANQ] = decl; -- -- decl = add_builtin_function ("__builtin_nansq", ftype, IX86_BUILTIN_NANSQ, -- BUILT_IN_MD, "nansq", NULL_TREE); -- TREE_READONLY (decl) = 1; -- ix86_builtins[(int) IX86_BUILTIN_NANSQ] = decl; -- -- /* We will expand them to normal call if SSE isn't available since -- they are used by libgcc. */ -- ftype = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128); -- decl = add_builtin_function ("__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ, -- BUILT_IN_MD, "__fabstf2", NULL_TREE); -- TREE_READONLY (decl) = 1; -- ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl; -- -- ftype = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128); -- decl = add_builtin_function ("__builtin_copysignq", ftype, -- IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD, -- "__copysigntf3", NULL_TREE); -- TREE_READONLY (decl) = 1; -- ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl; -- -- ix86_init_tm_builtins (); -- ix86_init_mmx_sse_builtins (); -- -- if (TARGET_LP64) -- ix86_init_builtins_va_builtins_abi (); -- --#ifdef SUBTARGET_INIT_BUILTINS -- SUBTARGET_INIT_BUILTINS; --#endif --} -- --/* Return the ix86 builtin for CODE. */ -- --static tree --ix86_builtin_decl (unsigned code, bool) --{ -- if (code >= IX86_BUILTIN_MAX) -- return error_mark_node; -- -- return ix86_builtins[code]; --} -- --/* Errors in the source file can cause expand_expr to return const0_rtx -- where we expect a vector. To avoid crashing, use one of the vector -- clear instructions. */ --static rtx --safe_vector_operand (rtx x, machine_mode mode) --{ -- if (x == const0_rtx) -- x = CONST0_RTX (mode); -- return x; --} -- --/* Fixup modeless constants to fit required mode. */ --static rtx --fixup_modeless_constant (rtx x, machine_mode mode) --{ -- if (GET_MODE (x) == VOIDmode) -- x = convert_to_mode (mode, x, 1); -- return x; --} -- --/* Subroutine of ix86_expand_builtin to take care of binop insns. */ -- --static rtx --ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target) --{ -- rtx pat; -- tree arg0 = CALL_EXPR_ARG (exp, 0); -- tree arg1 = CALL_EXPR_ARG (exp, 1); -- rtx op0 = expand_normal (arg0); -- rtx op1 = expand_normal (arg1); -- machine_mode tmode = insn_data[icode].operand[0].mode; -- machine_mode mode0 = insn_data[icode].operand[1].mode; -- machine_mode mode1 = insn_data[icode].operand[2].mode; -- -- if (VECTOR_MODE_P (mode0)) -- op0 = safe_vector_operand (op0, mode0); -- if (VECTOR_MODE_P (mode1)) -- op1 = safe_vector_operand (op1, mode1); -- -- if (optimize || !target -- || GET_MODE (target) != tmode -- || !insn_data[icode].operand[0].predicate (target, tmode)) -- target = gen_reg_rtx (tmode); -- -- if (GET_MODE (op1) == SImode && mode1 == TImode) -- { -- rtx x = gen_reg_rtx (V4SImode); -- emit_insn (gen_sse2_loadd (x, op1)); -- op1 = gen_lowpart (TImode, x); -- } -- -- if (!insn_data[icode].operand[1].predicate (op0, mode0)) -- op0 = copy_to_mode_reg (mode0, op0); -- if (!insn_data[icode].operand[2].predicate (op1, mode1)) -- op1 = copy_to_mode_reg (mode1, op1); -- -- pat = GEN_FCN (icode) (target, op0, op1); -- if (! pat) -- return 0; -- -- emit_insn (pat); -- -- return target; --} -- --/* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */ -- --static rtx --ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target, -- enum ix86_builtin_func_type m_type, -- enum rtx_code sub_code) --{ -- rtx pat; -- int i; -- int nargs; -- bool comparison_p = false; -- bool tf_p = false; -- bool last_arg_constant = false; -- int num_memory = 0; -- struct { -- rtx op; -- machine_mode mode; -- } args[4]; -- -- machine_mode tmode = insn_data[icode].operand[0].mode; -- -- switch (m_type) -- { -- case MULTI_ARG_4_DF2_DI_I: -- case MULTI_ARG_4_DF2_DI_I1: -- case MULTI_ARG_4_SF2_SI_I: -- case MULTI_ARG_4_SF2_SI_I1: -- nargs = 4; -- last_arg_constant = true; -- break; -- -- case MULTI_ARG_3_SF: -- case MULTI_ARG_3_DF: -- case MULTI_ARG_3_SF2: -- case MULTI_ARG_3_DF2: -- case MULTI_ARG_3_DI: -- case MULTI_ARG_3_SI: -- case MULTI_ARG_3_SI_DI: -- case MULTI_ARG_3_HI: -- case MULTI_ARG_3_HI_SI: -- case MULTI_ARG_3_QI: -- case MULTI_ARG_3_DI2: -- case MULTI_ARG_3_SI2: -- case MULTI_ARG_3_HI2: -- case MULTI_ARG_3_QI2: -- nargs = 3; -- break; -- -- case MULTI_ARG_2_SF: -- case MULTI_ARG_2_DF: -- case MULTI_ARG_2_DI: -- case MULTI_ARG_2_SI: -- case MULTI_ARG_2_HI: -- case MULTI_ARG_2_QI: -- nargs = 2; -- break; -- -- case MULTI_ARG_2_DI_IMM: -- case MULTI_ARG_2_SI_IMM: -- case MULTI_ARG_2_HI_IMM: -- case MULTI_ARG_2_QI_IMM: -- nargs = 2; -- last_arg_constant = true; -- break; -- -- case MULTI_ARG_1_SF: -- case MULTI_ARG_1_DF: -- case MULTI_ARG_1_SF2: -- case MULTI_ARG_1_DF2: -- case MULTI_ARG_1_DI: -- case MULTI_ARG_1_SI: -- case MULTI_ARG_1_HI: -- case MULTI_ARG_1_QI: -- case MULTI_ARG_1_SI_DI: -- case MULTI_ARG_1_HI_DI: -- case MULTI_ARG_1_HI_SI: -- case MULTI_ARG_1_QI_DI: -- case MULTI_ARG_1_QI_SI: -- case MULTI_ARG_1_QI_HI: -- nargs = 1; -- break; -- -- case MULTI_ARG_2_DI_CMP: -- case MULTI_ARG_2_SI_CMP: -- case MULTI_ARG_2_HI_CMP: -- case MULTI_ARG_2_QI_CMP: -- nargs = 2; -- comparison_p = true; -- break; -- -- case MULTI_ARG_2_SF_TF: -- case MULTI_ARG_2_DF_TF: -- case MULTI_ARG_2_DI_TF: -- case MULTI_ARG_2_SI_TF: -- case MULTI_ARG_2_HI_TF: -- case MULTI_ARG_2_QI_TF: -- nargs = 2; -- tf_p = true; -- break; -- -- default: -- gcc_unreachable (); -- } -- -- if (optimize || !target -- || GET_MODE (target) != tmode -- || !insn_data[icode].operand[0].predicate (target, tmode)) -- target = gen_reg_rtx (tmode); -- else if (memory_operand (target, tmode)) -- num_memory++; -- -- gcc_assert (nargs <= 4); -- -- for (i = 0; i < nargs; i++) -- { -- tree arg = CALL_EXPR_ARG (exp, i); -- rtx op = expand_normal (arg); -- int adjust = (comparison_p) ? 1 : 0; -- machine_mode mode = insn_data[icode].operand[i+adjust+1].mode; -- -- if (last_arg_constant && i == nargs - 1) -- { -- if (!insn_data[icode].operand[i + 1].predicate (op, mode)) -- { -- enum insn_code new_icode = icode; -- switch (icode) -- { -- case CODE_FOR_xop_vpermil2v2df3: -- case CODE_FOR_xop_vpermil2v4sf3: -- case CODE_FOR_xop_vpermil2v4df3: -- case CODE_FOR_xop_vpermil2v8sf3: -- error ("the last argument must be a 2-bit immediate"); -- return gen_reg_rtx (tmode); -- case CODE_FOR_xop_rotlv2di3: -- new_icode = CODE_FOR_rotlv2di3; -- goto xop_rotl; -- case CODE_FOR_xop_rotlv4si3: -- new_icode = CODE_FOR_rotlv4si3; -- goto xop_rotl; -- case CODE_FOR_xop_rotlv8hi3: -- new_icode = CODE_FOR_rotlv8hi3; -- goto xop_rotl; -- case CODE_FOR_xop_rotlv16qi3: -- new_icode = CODE_FOR_rotlv16qi3; -- xop_rotl: -- if (CONST_INT_P (op)) -- { -- int mask = GET_MODE_UNIT_BITSIZE (tmode) - 1; -- op = GEN_INT (INTVAL (op) & mask); -- gcc_checking_assert -- (insn_data[icode].operand[i + 1].predicate (op, mode)); -- } -- else -- { -- gcc_checking_assert -- (nargs == 2 -- && insn_data[new_icode].operand[0].mode == tmode -- && insn_data[new_icode].operand[1].mode == tmode -- && insn_data[new_icode].operand[2].mode == mode -- && insn_data[new_icode].operand[0].predicate -- == insn_data[icode].operand[0].predicate -- && insn_data[new_icode].operand[1].predicate -- == insn_data[icode].operand[1].predicate); -- icode = new_icode; -- goto non_constant; -- } -- break; -- default: -- gcc_unreachable (); -- } -- } -- } -- else -- { -- non_constant: -- if (VECTOR_MODE_P (mode)) -- op = safe_vector_operand (op, mode); -- -- /* If we aren't optimizing, only allow one memory operand to be -- generated. */ -- if (memory_operand (op, mode)) -- num_memory++; -- -- gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode); -- -- if (optimize -- || !insn_data[icode].operand[i+adjust+1].predicate (op, mode) -- || num_memory > 1) -- op = force_reg (mode, op); -- } -- -- args[i].op = op; -- args[i].mode = mode; -- } -- -- switch (nargs) -- { -- case 1: -- pat = GEN_FCN (icode) (target, args[0].op); -- break; -- -- case 2: -- if (tf_p) -- pat = GEN_FCN (icode) (target, args[0].op, args[1].op, -- GEN_INT ((int)sub_code)); -- else if (! comparison_p) -- pat = GEN_FCN (icode) (target, args[0].op, args[1].op); -- else -- { -- rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target), -- args[0].op, -- args[1].op); -- -- pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op); -- } -- break; -- -- case 3: -- pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op); -- break; -- -- case 4: -- pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op); -- break; -- -- default: -- gcc_unreachable (); -- } -- -- if (! pat) -- return 0; -- -- emit_insn (pat); -- return target; --} -- --/* Subroutine of ix86_expand_args_builtin to take care of scalar unop -- insns with vec_merge. */ -- --static rtx --ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp, -- rtx target) --{ -- rtx pat; -- tree arg0 = CALL_EXPR_ARG (exp, 0); -- rtx op1, op0 = expand_normal (arg0); -- machine_mode tmode = insn_data[icode].operand[0].mode; -- machine_mode mode0 = insn_data[icode].operand[1].mode; -- -- if (optimize || !target -- || GET_MODE (target) != tmode -- || !insn_data[icode].operand[0].predicate (target, tmode)) -- target = gen_reg_rtx (tmode); -- -- if (VECTOR_MODE_P (mode0)) -- op0 = safe_vector_operand (op0, mode0); -- -- if ((optimize && !register_operand (op0, mode0)) -- || !insn_data[icode].operand[1].predicate (op0, mode0)) -- op0 = copy_to_mode_reg (mode0, op0); -- -- op1 = op0; -- if (!insn_data[icode].operand[2].predicate (op1, mode0)) -- op1 = copy_to_mode_reg (mode0, op1); -- -- pat = GEN_FCN (icode) (target, op0, op1); -- if (! pat) -- return 0; -- emit_insn (pat); -- return target; --} -- --/* Subroutine of ix86_expand_builtin to take care of comparison insns. */ -- --static rtx --ix86_expand_sse_compare (const struct builtin_description *d, -- tree exp, rtx target, bool swap) --{ -- rtx pat; -- tree arg0 = CALL_EXPR_ARG (exp, 0); -- tree arg1 = CALL_EXPR_ARG (exp, 1); -- rtx op0 = expand_normal (arg0); -- rtx op1 = expand_normal (arg1); -- rtx op2; -- machine_mode tmode = insn_data[d->icode].operand[0].mode; -- machine_mode mode0 = insn_data[d->icode].operand[1].mode; -- machine_mode mode1 = insn_data[d->icode].operand[2].mode; -- enum rtx_code comparison = d->comparison; -- -- if (VECTOR_MODE_P (mode0)) -- op0 = safe_vector_operand (op0, mode0); -- if (VECTOR_MODE_P (mode1)) -- op1 = safe_vector_operand (op1, mode1); -- -- /* Swap operands if we have a comparison that isn't available in -- hardware. */ -- if (swap) -- std::swap (op0, op1); -- -- if (optimize || !target -- || GET_MODE (target) != tmode -- || !insn_data[d->icode].operand[0].predicate (target, tmode)) -- target = gen_reg_rtx (tmode); -- -- if ((optimize && !register_operand (op0, mode0)) -- || !insn_data[d->icode].operand[1].predicate (op0, mode0)) -- op0 = copy_to_mode_reg (mode0, op0); -- if ((optimize && !register_operand (op1, mode1)) -- || !insn_data[d->icode].operand[2].predicate (op1, mode1)) -- op1 = copy_to_mode_reg (mode1, op1); -- -- op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1); -- pat = GEN_FCN (d->icode) (target, op0, op1, op2); -- if (! pat) -- return 0; -- emit_insn (pat); -- return target; --} -- --/* Subroutine of ix86_expand_builtin to take care of comi insns. */ -- --static rtx --ix86_expand_sse_comi (const struct builtin_description *d, tree exp, -- rtx target) --{ -- rtx pat; -- tree arg0 = CALL_EXPR_ARG (exp, 0); -- tree arg1 = CALL_EXPR_ARG (exp, 1); -- rtx op0 = expand_normal (arg0); -- rtx op1 = expand_normal (arg1); -- machine_mode mode0 = insn_data[d->icode].operand[0].mode; -- machine_mode mode1 = insn_data[d->icode].operand[1].mode; -- enum rtx_code comparison = d->comparison; -- -- if (VECTOR_MODE_P (mode0)) -- op0 = safe_vector_operand (op0, mode0); -- if (VECTOR_MODE_P (mode1)) -- op1 = safe_vector_operand (op1, mode1); -- -- /* Swap operands if we have a comparison that isn't available in -- hardware. */ -- if (d->flag & BUILTIN_DESC_SWAP_OPERANDS) -- std::swap (op0, op1); -- -- target = gen_reg_rtx (SImode); -- emit_move_insn (target, const0_rtx); -- target = gen_rtx_SUBREG (QImode, target, 0); -- -- if ((optimize && !register_operand (op0, mode0)) -- || !insn_data[d->icode].operand[0].predicate (op0, mode0)) -- op0 = copy_to_mode_reg (mode0, op0); -- if ((optimize && !register_operand (op1, mode1)) -- || !insn_data[d->icode].operand[1].predicate (op1, mode1)) -- op1 = copy_to_mode_reg (mode1, op1); -- -- pat = GEN_FCN (d->icode) (op0, op1); -- if (! pat) -- return 0; -- emit_insn (pat); -- emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target), -- gen_rtx_fmt_ee (comparison, QImode, -- SET_DEST (pat), -- const0_rtx))); -- -- return SUBREG_REG (target); --} -- --/* Subroutines of ix86_expand_args_builtin to take care of round insns. */ -- --static rtx --ix86_expand_sse_round (const struct builtin_description *d, tree exp, -- rtx target) --{ -- rtx pat; -- tree arg0 = CALL_EXPR_ARG (exp, 0); -- rtx op1, op0 = expand_normal (arg0); -- machine_mode tmode = insn_data[d->icode].operand[0].mode; -- machine_mode mode0 = insn_data[d->icode].operand[1].mode; -- -- if (optimize || target == 0 -- || GET_MODE (target) != tmode -- || !insn_data[d->icode].operand[0].predicate (target, tmode)) -- target = gen_reg_rtx (tmode); -- -- if (VECTOR_MODE_P (mode0)) -- op0 = safe_vector_operand (op0, mode0); -- -- if ((optimize && !register_operand (op0, mode0)) -- || !insn_data[d->icode].operand[0].predicate (op0, mode0)) -- op0 = copy_to_mode_reg (mode0, op0); -- -- op1 = GEN_INT (d->comparison); -- -- pat = GEN_FCN (d->icode) (target, op0, op1); -- if (! pat) -- return 0; -- emit_insn (pat); -- return target; --} -- --static rtx --ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d, -- tree exp, rtx target) --{ -- rtx pat; -- tree arg0 = CALL_EXPR_ARG (exp, 0); -- tree arg1 = CALL_EXPR_ARG (exp, 1); -- rtx op0 = expand_normal (arg0); -- rtx op1 = expand_normal (arg1); -- rtx op2; -- machine_mode tmode = insn_data[d->icode].operand[0].mode; -- machine_mode mode0 = insn_data[d->icode].operand[1].mode; -- machine_mode mode1 = insn_data[d->icode].operand[2].mode; -- -- if (optimize || target == 0 -- || GET_MODE (target) != tmode -- || !insn_data[d->icode].operand[0].predicate (target, tmode)) -- target = gen_reg_rtx (tmode); -- -- op0 = safe_vector_operand (op0, mode0); -- op1 = safe_vector_operand (op1, mode1); -- -- if ((optimize && !register_operand (op0, mode0)) -- || !insn_data[d->icode].operand[0].predicate (op0, mode0)) -- op0 = copy_to_mode_reg (mode0, op0); -- if ((optimize && !register_operand (op1, mode1)) -- || !insn_data[d->icode].operand[1].predicate (op1, mode1)) -- op1 = copy_to_mode_reg (mode1, op1); -- -- op2 = GEN_INT (d->comparison); -- -- pat = GEN_FCN (d->icode) (target, op0, op1, op2); -- if (! pat) -- return 0; -- emit_insn (pat); -- return target; --} -- --/* Subroutine of ix86_expand_builtin to take care of ptest insns. */ -- --static rtx --ix86_expand_sse_ptest (const struct builtin_description *d, tree exp, -- rtx target) --{ -- rtx pat; -- tree arg0 = CALL_EXPR_ARG (exp, 0); -- tree arg1 = CALL_EXPR_ARG (exp, 1); -- rtx op0 = expand_normal (arg0); -- rtx op1 = expand_normal (arg1); -- machine_mode mode0 = insn_data[d->icode].operand[0].mode; -- machine_mode mode1 = insn_data[d->icode].operand[1].mode; -- enum rtx_code comparison = d->comparison; -- -- if (VECTOR_MODE_P (mode0)) -- op0 = safe_vector_operand (op0, mode0); -- if (VECTOR_MODE_P (mode1)) -- op1 = safe_vector_operand (op1, mode1); -- -- target = gen_reg_rtx (SImode); -- emit_move_insn (target, const0_rtx); -- target = gen_rtx_SUBREG (QImode, target, 0); -- -- if ((optimize && !register_operand (op0, mode0)) -- || !insn_data[d->icode].operand[0].predicate (op0, mode0)) -- op0 = copy_to_mode_reg (mode0, op0); -- if ((optimize && !register_operand (op1, mode1)) -- || !insn_data[d->icode].operand[1].predicate (op1, mode1)) -- op1 = copy_to_mode_reg (mode1, op1); -- -- pat = GEN_FCN (d->icode) (op0, op1); -- if (! pat) -- return 0; -- emit_insn (pat); -- emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target), -- gen_rtx_fmt_ee (comparison, QImode, -- SET_DEST (pat), -- const0_rtx))); -- -- return SUBREG_REG (target); --} -- --/* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */ -- --static rtx --ix86_expand_sse_pcmpestr (const struct builtin_description *d, -- tree exp, rtx target) --{ -- rtx pat; -- tree arg0 = CALL_EXPR_ARG (exp, 0); -- tree arg1 = CALL_EXPR_ARG (exp, 1); -- tree arg2 = CALL_EXPR_ARG (exp, 2); -- tree arg3 = CALL_EXPR_ARG (exp, 3); -- tree arg4 = CALL_EXPR_ARG (exp, 4); -- rtx scratch0, scratch1; -- rtx op0 = expand_normal (arg0); -- rtx op1 = expand_normal (arg1); -- rtx op2 = expand_normal (arg2); -- rtx op3 = expand_normal (arg3); -- rtx op4 = expand_normal (arg4); -- machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm; -- -- tmode0 = insn_data[d->icode].operand[0].mode; -- tmode1 = insn_data[d->icode].operand[1].mode; -- modev2 = insn_data[d->icode].operand[2].mode; -- modei3 = insn_data[d->icode].operand[3].mode; -- modev4 = insn_data[d->icode].operand[4].mode; -- modei5 = insn_data[d->icode].operand[5].mode; -- modeimm = insn_data[d->icode].operand[6].mode; -- -- if (VECTOR_MODE_P (modev2)) -- op0 = safe_vector_operand (op0, modev2); -- if (VECTOR_MODE_P (modev4)) -- op2 = safe_vector_operand (op2, modev4); -- -- if (!insn_data[d->icode].operand[2].predicate (op0, modev2)) -- op0 = copy_to_mode_reg (modev2, op0); -- if (!insn_data[d->icode].operand[3].predicate (op1, modei3)) -- op1 = copy_to_mode_reg (modei3, op1); -- if ((optimize && !register_operand (op2, modev4)) -- || !insn_data[d->icode].operand[4].predicate (op2, modev4)) -- op2 = copy_to_mode_reg (modev4, op2); -- if (!insn_data[d->icode].operand[5].predicate (op3, modei5)) -- op3 = copy_to_mode_reg (modei5, op3); -- -- if (!insn_data[d->icode].operand[6].predicate (op4, modeimm)) -- { -- error ("the fifth argument must be an 8-bit immediate"); -- return const0_rtx; -- } -- -- if (d->code == IX86_BUILTIN_PCMPESTRI128) -- { -- if (optimize || !target -- || GET_MODE (target) != tmode0 -- || !insn_data[d->icode].operand[0].predicate (target, tmode0)) -- target = gen_reg_rtx (tmode0); -- -- scratch1 = gen_reg_rtx (tmode1); -- -- pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4); -- } -- else if (d->code == IX86_BUILTIN_PCMPESTRM128) -- { -- if (optimize || !target -- || GET_MODE (target) != tmode1 -- || !insn_data[d->icode].operand[1].predicate (target, tmode1)) -- target = gen_reg_rtx (tmode1); -- -- scratch0 = gen_reg_rtx (tmode0); -- -- pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4); -- } -- else -- { -- gcc_assert (d->flag); -- -- scratch0 = gen_reg_rtx (tmode0); -- scratch1 = gen_reg_rtx (tmode1); -- -- pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4); -- } -- -- if (! pat) -- return 0; -- -- emit_insn (pat); -- -- if (d->flag) -- { -- target = gen_reg_rtx (SImode); -- emit_move_insn (target, const0_rtx); -- target = gen_rtx_SUBREG (QImode, target, 0); -- -- emit_insn -- (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target), -- gen_rtx_fmt_ee (EQ, QImode, -- gen_rtx_REG ((machine_mode) d->flag, -- FLAGS_REG), -- const0_rtx))); -- return SUBREG_REG (target); -- } -- else -- return target; --} -- -- --/* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */ -- --static rtx --ix86_expand_sse_pcmpistr (const struct builtin_description *d, -- tree exp, rtx target) --{ -- rtx pat; -- tree arg0 = CALL_EXPR_ARG (exp, 0); -- tree arg1 = CALL_EXPR_ARG (exp, 1); -- tree arg2 = CALL_EXPR_ARG (exp, 2); -- rtx scratch0, scratch1; -- rtx op0 = expand_normal (arg0); -- rtx op1 = expand_normal (arg1); -- rtx op2 = expand_normal (arg2); -- machine_mode tmode0, tmode1, modev2, modev3, modeimm; -- -- tmode0 = insn_data[d->icode].operand[0].mode; -- tmode1 = insn_data[d->icode].operand[1].mode; -- modev2 = insn_data[d->icode].operand[2].mode; -- modev3 = insn_data[d->icode].operand[3].mode; -- modeimm = insn_data[d->icode].operand[4].mode; -- -- if (VECTOR_MODE_P (modev2)) -- op0 = safe_vector_operand (op0, modev2); -- if (VECTOR_MODE_P (modev3)) -- op1 = safe_vector_operand (op1, modev3); -- -- if (!insn_data[d->icode].operand[2].predicate (op0, modev2)) -- op0 = copy_to_mode_reg (modev2, op0); -- if ((optimize && !register_operand (op1, modev3)) -- || !insn_data[d->icode].operand[3].predicate (op1, modev3)) -- op1 = copy_to_mode_reg (modev3, op1); -- -- if (!insn_data[d->icode].operand[4].predicate (op2, modeimm)) -- { -- error ("the third argument must be an 8-bit immediate"); -- return const0_rtx; -- } -- -- if (d->code == IX86_BUILTIN_PCMPISTRI128) -- { -- if (optimize || !target -- || GET_MODE (target) != tmode0 -- || !insn_data[d->icode].operand[0].predicate (target, tmode0)) -- target = gen_reg_rtx (tmode0); -- -- scratch1 = gen_reg_rtx (tmode1); -- -- pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2); -- } -- else if (d->code == IX86_BUILTIN_PCMPISTRM128) -- { -- if (optimize || !target -- || GET_MODE (target) != tmode1 -- || !insn_data[d->icode].operand[1].predicate (target, tmode1)) -- target = gen_reg_rtx (tmode1); -- -- scratch0 = gen_reg_rtx (tmode0); -- -- pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2); -- } -- else -- { -- gcc_assert (d->flag); -- -- scratch0 = gen_reg_rtx (tmode0); -- scratch1 = gen_reg_rtx (tmode1); -- -- pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2); -- } -- -- if (! pat) -- return 0; -- -- emit_insn (pat); -- -- if (d->flag) -- { -- target = gen_reg_rtx (SImode); -- emit_move_insn (target, const0_rtx); -- target = gen_rtx_SUBREG (QImode, target, 0); -- -- emit_insn -- (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target), -- gen_rtx_fmt_ee (EQ, QImode, -- gen_rtx_REG ((machine_mode) d->flag, -- FLAGS_REG), -- const0_rtx))); -- return SUBREG_REG (target); -- } -- else -- return target; --} -- --/* Subroutine of ix86_expand_builtin to take care of insns with -- variable number of operands. */ -- --static rtx --ix86_expand_args_builtin (const struct builtin_description *d, -- tree exp, rtx target) --{ -- rtx pat, real_target; -- unsigned int i, nargs; -- unsigned int nargs_constant = 0; -- unsigned int mask_pos = 0; -- int num_memory = 0; -- struct -- { -- rtx op; -- machine_mode mode; -- } args[6]; -- bool second_arg_count = false; -- enum insn_code icode = d->icode; -- const struct insn_data_d *insn_p = &insn_data[icode]; -- machine_mode tmode = insn_p->operand[0].mode; -- machine_mode rmode = VOIDmode; -- bool swap = false; -- enum rtx_code comparison = d->comparison; -- -- switch ((enum ix86_builtin_func_type) d->flag) -- { -- case V2DF_FTYPE_V2DF_ROUND: -- case V4DF_FTYPE_V4DF_ROUND: -- case V8DF_FTYPE_V8DF_ROUND: -- case V4SF_FTYPE_V4SF_ROUND: -- case V8SF_FTYPE_V8SF_ROUND: -- case V16SF_FTYPE_V16SF_ROUND: -- case V4SI_FTYPE_V4SF_ROUND: -- case V8SI_FTYPE_V8SF_ROUND: -- case V16SI_FTYPE_V16SF_ROUND: -- return ix86_expand_sse_round (d, exp, target); -- case V4SI_FTYPE_V2DF_V2DF_ROUND: -- case V8SI_FTYPE_V4DF_V4DF_ROUND: -- case V16SI_FTYPE_V8DF_V8DF_ROUND: -- return ix86_expand_sse_round_vec_pack_sfix (d, exp, target); -- case INT_FTYPE_V8SF_V8SF_PTEST: -- case INT_FTYPE_V4DI_V4DI_PTEST: -- case INT_FTYPE_V4DF_V4DF_PTEST: -- case INT_FTYPE_V4SF_V4SF_PTEST: -- case INT_FTYPE_V2DI_V2DI_PTEST: -- case INT_FTYPE_V2DF_V2DF_PTEST: -- return ix86_expand_sse_ptest (d, exp, target); -- case FLOAT128_FTYPE_FLOAT128: -- case FLOAT_FTYPE_FLOAT: -- case INT_FTYPE_INT: -- case UINT_FTYPE_UINT: -- case UINT16_FTYPE_UINT16: -- case UINT64_FTYPE_INT: -- case UINT64_FTYPE_UINT64: -- case INT64_FTYPE_INT64: -- case INT64_FTYPE_V4SF: -- case INT64_FTYPE_V2DF: -- case INT_FTYPE_V16QI: -- case INT_FTYPE_V8QI: -- case INT_FTYPE_V8SF: -- case INT_FTYPE_V4DF: -- case INT_FTYPE_V4SF: -- case INT_FTYPE_V2DF: -- case INT_FTYPE_V32QI: -- case V16QI_FTYPE_V16QI: -- case V8SI_FTYPE_V8SF: -- case V8SI_FTYPE_V4SI: -- case V8HI_FTYPE_V8HI: -- case V8HI_FTYPE_V16QI: -- case V8QI_FTYPE_V8QI: -- case V8SF_FTYPE_V8SF: -- case V8SF_FTYPE_V8SI: -- case V8SF_FTYPE_V4SF: -- case V8SF_FTYPE_V8HI: -- case V4SI_FTYPE_V4SI: -- case V4SI_FTYPE_V16QI: -- case V4SI_FTYPE_V4SF: -- case V4SI_FTYPE_V8SI: -- case V4SI_FTYPE_V8HI: -- case V4SI_FTYPE_V4DF: -- case V4SI_FTYPE_V2DF: -- case V4HI_FTYPE_V4HI: -- case V4DF_FTYPE_V4DF: -- case V4DF_FTYPE_V4SI: -- case V4DF_FTYPE_V4SF: -- case V4DF_FTYPE_V2DF: -- case V4SF_FTYPE_V4SF: -- case V4SF_FTYPE_V4SI: -- case V4SF_FTYPE_V8SF: -- case V4SF_FTYPE_V4DF: -- case V4SF_FTYPE_V8HI: -- case V4SF_FTYPE_V2DF: -- case V2DI_FTYPE_V2DI: -- case V2DI_FTYPE_V16QI: -- case V2DI_FTYPE_V8HI: -- case V2DI_FTYPE_V4SI: -- case V2DF_FTYPE_V2DF: -- case V2DF_FTYPE_V4SI: -- case V2DF_FTYPE_V4DF: -- case V2DF_FTYPE_V4SF: -- case V2DF_FTYPE_V2SI: -- case V2SI_FTYPE_V2SI: -- case V2SI_FTYPE_V4SF: -- case V2SI_FTYPE_V2SF: -- case V2SI_FTYPE_V2DF: -- case V2SF_FTYPE_V2SF: -- case V2SF_FTYPE_V2SI: -- case V32QI_FTYPE_V32QI: -- case V32QI_FTYPE_V16QI: -- case V16HI_FTYPE_V16HI: -- case V16HI_FTYPE_V8HI: -- case V8SI_FTYPE_V8SI: -- case V16HI_FTYPE_V16QI: -- case V8SI_FTYPE_V16QI: -- case V4DI_FTYPE_V16QI: -- case V8SI_FTYPE_V8HI: -- case V4DI_FTYPE_V8HI: -- case V4DI_FTYPE_V4SI: -- case V4DI_FTYPE_V2DI: -- case UQI_FTYPE_UQI: -- case UHI_FTYPE_UHI: -- case USI_FTYPE_USI: -- case USI_FTYPE_UQI: -- case USI_FTYPE_UHI: -- case UDI_FTYPE_UDI: -- case UHI_FTYPE_V16QI: -- case USI_FTYPE_V32QI: -- case UDI_FTYPE_V64QI: -- case V16QI_FTYPE_UHI: -- case V32QI_FTYPE_USI: -- case V64QI_FTYPE_UDI: -- case V8HI_FTYPE_UQI: -- case V16HI_FTYPE_UHI: -- case V32HI_FTYPE_USI: -- case V4SI_FTYPE_UQI: -- case V8SI_FTYPE_UQI: -- case V4SI_FTYPE_UHI: -- case V8SI_FTYPE_UHI: -- case UQI_FTYPE_V8HI: -- case UHI_FTYPE_V16HI: -- case USI_FTYPE_V32HI: -- case UQI_FTYPE_V4SI: -- case UQI_FTYPE_V8SI: -- case UHI_FTYPE_V16SI: -- case UQI_FTYPE_V2DI: -- case UQI_FTYPE_V4DI: -- case UQI_FTYPE_V8DI: -- case V16SI_FTYPE_UHI: -- case V2DI_FTYPE_UQI: -- case V4DI_FTYPE_UQI: -- case V16SI_FTYPE_INT: -- case V16SF_FTYPE_V8SF: -- case V16SI_FTYPE_V8SI: -- case V16SF_FTYPE_V4SF: -- case V16SI_FTYPE_V4SI: -- case V16SI_FTYPE_V16SF: -- case V16SI_FTYPE_V16SI: -- case V64QI_FTYPE_V64QI: -- case V32HI_FTYPE_V32HI: -- case V16SF_FTYPE_V16SF: -- case V8DI_FTYPE_UQI: -- case V8DI_FTYPE_V8DI: -- case V8DF_FTYPE_V4DF: -- case V8DF_FTYPE_V2DF: -- case V8DF_FTYPE_V8DF: -- case V4DI_FTYPE_V4DI: -- nargs = 1; -- break; -- case V4SF_FTYPE_V4SF_VEC_MERGE: -- case V2DF_FTYPE_V2DF_VEC_MERGE: -- return ix86_expand_unop_vec_merge_builtin (icode, exp, target); -- case FLOAT128_FTYPE_FLOAT128_FLOAT128: -- case V16QI_FTYPE_V16QI_V16QI: -- case V16QI_FTYPE_V8HI_V8HI: -- case V16SF_FTYPE_V16SF_V16SF: -- case V8QI_FTYPE_V8QI_V8QI: -- case V8QI_FTYPE_V4HI_V4HI: -- case V8HI_FTYPE_V8HI_V8HI: -- case V8HI_FTYPE_V16QI_V16QI: -- case V8HI_FTYPE_V4SI_V4SI: -- case V8SF_FTYPE_V8SF_V8SF: -- case V8SF_FTYPE_V8SF_V8SI: -- case V8DF_FTYPE_V8DF_V8DF: -- case V4SI_FTYPE_V4SI_V4SI: -- case V4SI_FTYPE_V8HI_V8HI: -- case V4SI_FTYPE_V2DF_V2DF: -- case V4HI_FTYPE_V4HI_V4HI: -- case V4HI_FTYPE_V8QI_V8QI: -- case V4HI_FTYPE_V2SI_V2SI: -- case V4DF_FTYPE_V4DF_V4DF: -- case V4DF_FTYPE_V4DF_V4DI: -- case V4SF_FTYPE_V4SF_V4SF: -- case V4SF_FTYPE_V4SF_V4SI: -- case V4SF_FTYPE_V4SF_V2SI: -- case V4SF_FTYPE_V4SF_V2DF: -- case V4SF_FTYPE_V4SF_UINT: -- case V4SF_FTYPE_V4SF_DI: -- case V4SF_FTYPE_V4SF_SI: -- case V2DI_FTYPE_V2DI_V2DI: -- case V2DI_FTYPE_V16QI_V16QI: -- case V2DI_FTYPE_V4SI_V4SI: -- case V2DI_FTYPE_V2DI_V16QI: -- case V2SI_FTYPE_V2SI_V2SI: -- case V2SI_FTYPE_V4HI_V4HI: -- case V2SI_FTYPE_V2SF_V2SF: -- case V2DF_FTYPE_V2DF_V2DF: -- case V2DF_FTYPE_V2DF_V4SF: -- case V2DF_FTYPE_V2DF_V2DI: -- case V2DF_FTYPE_V2DF_DI: -- case V2DF_FTYPE_V2DF_SI: -- case V2DF_FTYPE_V2DF_UINT: -- case V2SF_FTYPE_V2SF_V2SF: -- case V1DI_FTYPE_V1DI_V1DI: -- case V1DI_FTYPE_V8QI_V8QI: -- case V1DI_FTYPE_V2SI_V2SI: -- case V32QI_FTYPE_V16HI_V16HI: -- case V16HI_FTYPE_V8SI_V8SI: -- case V64QI_FTYPE_V64QI_V64QI: -- case V32QI_FTYPE_V32QI_V32QI: -- case V16HI_FTYPE_V32QI_V32QI: -- case V16HI_FTYPE_V16HI_V16HI: -- case V8SI_FTYPE_V4DF_V4DF: -- case V8SI_FTYPE_V8SI_V8SI: -- case V8SI_FTYPE_V16HI_V16HI: -- case V4DI_FTYPE_V4DI_V4DI: -- case V4DI_FTYPE_V8SI_V8SI: -- case V8DI_FTYPE_V64QI_V64QI: -- if (comparison == UNKNOWN) -- return ix86_expand_binop_builtin (icode, exp, target); -- nargs = 2; -- break; -- case V4SF_FTYPE_V4SF_V4SF_SWAP: -- case V2DF_FTYPE_V2DF_V2DF_SWAP: -- gcc_assert (comparison != UNKNOWN); -- nargs = 2; -- swap = true; -- break; -- case V16HI_FTYPE_V16HI_V8HI_COUNT: -- case V16HI_FTYPE_V16HI_SI_COUNT: -- case V8SI_FTYPE_V8SI_V4SI_COUNT: -- case V8SI_FTYPE_V8SI_SI_COUNT: -- case V4DI_FTYPE_V4DI_V2DI_COUNT: -- case V4DI_FTYPE_V4DI_INT_COUNT: -- case V8HI_FTYPE_V8HI_V8HI_COUNT: -- case V8HI_FTYPE_V8HI_SI_COUNT: -- case V4SI_FTYPE_V4SI_V4SI_COUNT: -- case V4SI_FTYPE_V4SI_SI_COUNT: -- case V4HI_FTYPE_V4HI_V4HI_COUNT: -- case V4HI_FTYPE_V4HI_SI_COUNT: -- case V2DI_FTYPE_V2DI_V2DI_COUNT: -- case V2DI_FTYPE_V2DI_SI_COUNT: -- case V2SI_FTYPE_V2SI_V2SI_COUNT: -- case V2SI_FTYPE_V2SI_SI_COUNT: -- case V1DI_FTYPE_V1DI_V1DI_COUNT: -- case V1DI_FTYPE_V1DI_SI_COUNT: -- nargs = 2; -- second_arg_count = true; -- break; -- case V16HI_FTYPE_V16HI_INT_V16HI_UHI_COUNT: -- case V16HI_FTYPE_V16HI_V8HI_V16HI_UHI_COUNT: -- case V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT: -- case V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT: -- case V2DI_FTYPE_V2DI_INT_V2DI_UQI_COUNT: -- case V2DI_FTYPE_V2DI_V2DI_V2DI_UQI_COUNT: -- case V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT: -- case V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT: -- case V4DI_FTYPE_V4DI_INT_V4DI_UQI_COUNT: -- case V4DI_FTYPE_V4DI_V2DI_V4DI_UQI_COUNT: -- case V4SI_FTYPE_V4SI_INT_V4SI_UQI_COUNT: -- case V4SI_FTYPE_V4SI_V4SI_V4SI_UQI_COUNT: -- case V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT: -- case V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT: -- case V8HI_FTYPE_V8HI_INT_V8HI_UQI_COUNT: -- case V8HI_FTYPE_V8HI_V8HI_V8HI_UQI_COUNT: -- case V8SI_FTYPE_V8SI_INT_V8SI_UQI_COUNT: -- case V8SI_FTYPE_V8SI_V4SI_V8SI_UQI_COUNT: -- nargs = 4; -- second_arg_count = true; -- break; -- case UINT64_FTYPE_UINT64_UINT64: -- case UINT_FTYPE_UINT_UINT: -- case UINT_FTYPE_UINT_USHORT: -- case UINT_FTYPE_UINT_UCHAR: -- case UINT16_FTYPE_UINT16_INT: -- case UINT8_FTYPE_UINT8_INT: -- case UQI_FTYPE_UQI_UQI: -- case UHI_FTYPE_UHI_UHI: -- case USI_FTYPE_USI_USI: -- case UDI_FTYPE_UDI_UDI: -- case V16SI_FTYPE_V8DF_V8DF: -- nargs = 2; -- break; -- case V2DI_FTYPE_V2DI_INT_CONVERT: -- nargs = 2; -- rmode = V1TImode; -- nargs_constant = 1; -- break; -- case V4DI_FTYPE_V4DI_INT_CONVERT: -- nargs = 2; -- rmode = V2TImode; -- nargs_constant = 1; -- break; -- case V8DI_FTYPE_V8DI_INT_CONVERT: -- nargs = 2; -- rmode = V4TImode; -- nargs_constant = 1; -- break; -- case V8HI_FTYPE_V8HI_INT: -- case V8HI_FTYPE_V8SF_INT: -- case V16HI_FTYPE_V16SF_INT: -- case V8HI_FTYPE_V4SF_INT: -- case V8SF_FTYPE_V8SF_INT: -- case V4SF_FTYPE_V16SF_INT: -- case V16SF_FTYPE_V16SF_INT: -- case V4SI_FTYPE_V4SI_INT: -- case V4SI_FTYPE_V8SI_INT: -- case V4HI_FTYPE_V4HI_INT: -- case V4DF_FTYPE_V4DF_INT: -- case V4DF_FTYPE_V8DF_INT: -- case V4SF_FTYPE_V4SF_INT: -- case V4SF_FTYPE_V8SF_INT: -- case V2DI_FTYPE_V2DI_INT: -- case V2DF_FTYPE_V2DF_INT: -- case V2DF_FTYPE_V4DF_INT: -- case V16HI_FTYPE_V16HI_INT: -- case V8SI_FTYPE_V8SI_INT: -- case V16SI_FTYPE_V16SI_INT: -- case V4SI_FTYPE_V16SI_INT: -- case V4DI_FTYPE_V4DI_INT: -- case V2DI_FTYPE_V4DI_INT: -- case V4DI_FTYPE_V8DI_INT: -- case QI_FTYPE_V4SF_INT: -- case QI_FTYPE_V2DF_INT: -- case UQI_FTYPE_UQI_UQI_CONST: -- case UHI_FTYPE_UHI_UQI: -- case USI_FTYPE_USI_UQI: -- case UDI_FTYPE_UDI_UQI: -- nargs = 2; -- nargs_constant = 1; -- break; -- case V16QI_FTYPE_V16QI_V16QI_V16QI: -- case V8SF_FTYPE_V8SF_V8SF_V8SF: -- case V4DF_FTYPE_V4DF_V4DF_V4DF: -- case V4SF_FTYPE_V4SF_V4SF_V4SF: -- case V2DF_FTYPE_V2DF_V2DF_V2DF: -- case V32QI_FTYPE_V32QI_V32QI_V32QI: -- case UHI_FTYPE_V16SI_V16SI_UHI: -- case UQI_FTYPE_V8DI_V8DI_UQI: -- case V16HI_FTYPE_V16SI_V16HI_UHI: -- case V16QI_FTYPE_V16SI_V16QI_UHI: -- case V16QI_FTYPE_V8DI_V16QI_UQI: -- case V16SF_FTYPE_V16SF_V16SF_UHI: -- case V16SF_FTYPE_V4SF_V16SF_UHI: -- case V16SI_FTYPE_SI_V16SI_UHI: -- case V16SI_FTYPE_V16HI_V16SI_UHI: -- case V16SI_FTYPE_V16QI_V16SI_UHI: -- case V8SF_FTYPE_V4SF_V8SF_UQI: -- case V4DF_FTYPE_V2DF_V4DF_UQI: -- case V8SI_FTYPE_V4SI_V8SI_UQI: -- case V8SI_FTYPE_SI_V8SI_UQI: -- case V4SI_FTYPE_V4SI_V4SI_UQI: -- case V4SI_FTYPE_SI_V4SI_UQI: -- case V4DI_FTYPE_V2DI_V4DI_UQI: -- case V4DI_FTYPE_DI_V4DI_UQI: -- case V2DI_FTYPE_V2DI_V2DI_UQI: -- case V2DI_FTYPE_DI_V2DI_UQI: -- case V64QI_FTYPE_V64QI_V64QI_UDI: -- case V64QI_FTYPE_V16QI_V64QI_UDI: -- case V64QI_FTYPE_QI_V64QI_UDI: -- case V32QI_FTYPE_V32QI_V32QI_USI: -- case V32QI_FTYPE_V16QI_V32QI_USI: -- case V32QI_FTYPE_QI_V32QI_USI: -- case V16QI_FTYPE_V16QI_V16QI_UHI: -- case V16QI_FTYPE_QI_V16QI_UHI: -- case V32HI_FTYPE_V8HI_V32HI_USI: -- case V32HI_FTYPE_HI_V32HI_USI: -- case V16HI_FTYPE_V8HI_V16HI_UHI: -- case V16HI_FTYPE_HI_V16HI_UHI: -- case V8HI_FTYPE_V8HI_V8HI_UQI: -- case V8HI_FTYPE_HI_V8HI_UQI: -- case V8SF_FTYPE_V8HI_V8SF_UQI: -- case V4SF_FTYPE_V8HI_V4SF_UQI: -- case V8SI_FTYPE_V8SF_V8SI_UQI: -- case V4SI_FTYPE_V4SF_V4SI_UQI: -- case V4DI_FTYPE_V4SF_V4DI_UQI: -- case V2DI_FTYPE_V4SF_V2DI_UQI: -- case V4SF_FTYPE_V4DI_V4SF_UQI: -- case V4SF_FTYPE_V2DI_V4SF_UQI: -- case V4DF_FTYPE_V4DI_V4DF_UQI: -- case V2DF_FTYPE_V2DI_V2DF_UQI: -- case V16QI_FTYPE_V8HI_V16QI_UQI: -- case V16QI_FTYPE_V16HI_V16QI_UHI: -- case V16QI_FTYPE_V4SI_V16QI_UQI: -- case V16QI_FTYPE_V8SI_V16QI_UQI: -- case V8HI_FTYPE_V4SI_V8HI_UQI: -- case V8HI_FTYPE_V8SI_V8HI_UQI: -- case V16QI_FTYPE_V2DI_V16QI_UQI: -- case V16QI_FTYPE_V4DI_V16QI_UQI: -- case V8HI_FTYPE_V2DI_V8HI_UQI: -- case V8HI_FTYPE_V4DI_V8HI_UQI: -- case V4SI_FTYPE_V2DI_V4SI_UQI: -- case V4SI_FTYPE_V4DI_V4SI_UQI: -- case V32QI_FTYPE_V32HI_V32QI_USI: -- case UHI_FTYPE_V16QI_V16QI_UHI: -- case USI_FTYPE_V32QI_V32QI_USI: -- case UDI_FTYPE_V64QI_V64QI_UDI: -- case UQI_FTYPE_V8HI_V8HI_UQI: -- case UHI_FTYPE_V16HI_V16HI_UHI: -- case USI_FTYPE_V32HI_V32HI_USI: -- case UQI_FTYPE_V4SI_V4SI_UQI: -- case UQI_FTYPE_V8SI_V8SI_UQI: -- case UQI_FTYPE_V2DI_V2DI_UQI: -- case UQI_FTYPE_V4DI_V4DI_UQI: -- case V4SF_FTYPE_V2DF_V4SF_UQI: -- case V4SF_FTYPE_V4DF_V4SF_UQI: -- case V16SI_FTYPE_V16SI_V16SI_UHI: -- case V16SI_FTYPE_V4SI_V16SI_UHI: -- case V2DI_FTYPE_V4SI_V2DI_UQI: -- case V2DI_FTYPE_V8HI_V2DI_UQI: -- case V2DI_FTYPE_V16QI_V2DI_UQI: -- case V4DI_FTYPE_V4DI_V4DI_UQI: -- case V4DI_FTYPE_V4SI_V4DI_UQI: -- case V4DI_FTYPE_V8HI_V4DI_UQI: -- case V4DI_FTYPE_V16QI_V4DI_UQI: -- case V4DI_FTYPE_V4DF_V4DI_UQI: -- case V2DI_FTYPE_V2DF_V2DI_UQI: -- case V4SI_FTYPE_V4DF_V4SI_UQI: -- case V4SI_FTYPE_V2DF_V4SI_UQI: -- case V4SI_FTYPE_V8HI_V4SI_UQI: -- case V4SI_FTYPE_V16QI_V4SI_UQI: -- case V4DI_FTYPE_V4DI_V4DI_V4DI: -- case V8DF_FTYPE_V2DF_V8DF_UQI: -- case V8DF_FTYPE_V4DF_V8DF_UQI: -- case V8DF_FTYPE_V8DF_V8DF_UQI: -- case V8SF_FTYPE_V8SF_V8SF_UQI: -- case V8SF_FTYPE_V8SI_V8SF_UQI: -- case V4DF_FTYPE_V4DF_V4DF_UQI: -- case V4SF_FTYPE_V4SF_V4SF_UQI: -- case V2DF_FTYPE_V2DF_V2DF_UQI: -- case V2DF_FTYPE_V4SF_V2DF_UQI: -- case V2DF_FTYPE_V4SI_V2DF_UQI: -- case V4SF_FTYPE_V4SI_V4SF_UQI: -- case V4DF_FTYPE_V4SF_V4DF_UQI: -- case V4DF_FTYPE_V4SI_V4DF_UQI: -- case V8SI_FTYPE_V8SI_V8SI_UQI: -- case V8SI_FTYPE_V8HI_V8SI_UQI: -- case V8SI_FTYPE_V16QI_V8SI_UQI: -- case V8DF_FTYPE_V8SI_V8DF_UQI: -- case V8DI_FTYPE_DI_V8DI_UQI: -- case V16SF_FTYPE_V8SF_V16SF_UHI: -- case V16SI_FTYPE_V8SI_V16SI_UHI: -- case V16HI_FTYPE_V16HI_V16HI_UHI: -- case V8HI_FTYPE_V16QI_V8HI_UQI: -- case V16HI_FTYPE_V16QI_V16HI_UHI: -- case V32HI_FTYPE_V32HI_V32HI_USI: -- case V32HI_FTYPE_V32QI_V32HI_USI: -- case V8DI_FTYPE_V16QI_V8DI_UQI: -- case V8DI_FTYPE_V2DI_V8DI_UQI: -- case V8DI_FTYPE_V4DI_V8DI_UQI: -- case V8DI_FTYPE_V8DI_V8DI_UQI: -- case V8DI_FTYPE_V8HI_V8DI_UQI: -- case V8DI_FTYPE_V8SI_V8DI_UQI: -- case V8HI_FTYPE_V8DI_V8HI_UQI: -- case V8SI_FTYPE_V8DI_V8SI_UQI: -- case V4SI_FTYPE_V4SI_V4SI_V4SI: -- case V16SI_FTYPE_V16SI_V16SI_V16SI: -- case V8DI_FTYPE_V8DI_V8DI_V8DI: -- case V32HI_FTYPE_V32HI_V32HI_V32HI: -- case V2DI_FTYPE_V2DI_V2DI_V2DI: -- case V16HI_FTYPE_V16HI_V16HI_V16HI: -- case V8SI_FTYPE_V8SI_V8SI_V8SI: -- case V8HI_FTYPE_V8HI_V8HI_V8HI: -- nargs = 3; -- break; -- case V32QI_FTYPE_V32QI_V32QI_INT: -- case V16HI_FTYPE_V16HI_V16HI_INT: -- case V16QI_FTYPE_V16QI_V16QI_INT: -- case V4DI_FTYPE_V4DI_V4DI_INT: -- case V8HI_FTYPE_V8HI_V8HI_INT: -- case V8SI_FTYPE_V8SI_V8SI_INT: -- case V8SI_FTYPE_V8SI_V4SI_INT: -- case V8SF_FTYPE_V8SF_V8SF_INT: -- case V8SF_FTYPE_V8SF_V4SF_INT: -- case V4SI_FTYPE_V4SI_V4SI_INT: -- case V4DF_FTYPE_V4DF_V4DF_INT: -- case V16SF_FTYPE_V16SF_V16SF_INT: -- case V16SF_FTYPE_V16SF_V4SF_INT: -- case V16SI_FTYPE_V16SI_V4SI_INT: -- case V4DF_FTYPE_V4DF_V2DF_INT: -- case V4SF_FTYPE_V4SF_V4SF_INT: -- case V2DI_FTYPE_V2DI_V2DI_INT: -- case V4DI_FTYPE_V4DI_V2DI_INT: -- case V2DF_FTYPE_V2DF_V2DF_INT: -- case UQI_FTYPE_V8DI_V8UDI_INT: -- case UQI_FTYPE_V8DF_V8DF_INT: -- case UQI_FTYPE_V2DF_V2DF_INT: -- case UQI_FTYPE_V4SF_V4SF_INT: -- case UHI_FTYPE_V16SI_V16SI_INT: -- case UHI_FTYPE_V16SF_V16SF_INT: -- case V64QI_FTYPE_V64QI_V64QI_INT: -- case V32HI_FTYPE_V32HI_V32HI_INT: -- case V16SI_FTYPE_V16SI_V16SI_INT: -- case V8DI_FTYPE_V8DI_V8DI_INT: -- nargs = 3; -- nargs_constant = 1; -- break; -- case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT: -- nargs = 3; -- rmode = V4DImode; -- nargs_constant = 1; -- break; -- case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT: -- nargs = 3; -- rmode = V2DImode; -- nargs_constant = 1; -- break; -- case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT: -- nargs = 3; -- rmode = DImode; -- nargs_constant = 1; -- break; -- case V2DI_FTYPE_V2DI_UINT_UINT: -- nargs = 3; -- nargs_constant = 2; -- break; -- case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT: -- nargs = 3; -- rmode = V8DImode; -- nargs_constant = 1; -- break; -- case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT: -- nargs = 5; -- rmode = V8DImode; -- mask_pos = 2; -- nargs_constant = 1; -- break; -- case QI_FTYPE_V8DF_INT_UQI: -- case QI_FTYPE_V4DF_INT_UQI: -- case QI_FTYPE_V2DF_INT_UQI: -- case HI_FTYPE_V16SF_INT_UHI: -- case QI_FTYPE_V8SF_INT_UQI: -- case QI_FTYPE_V4SF_INT_UQI: -- case V4SI_FTYPE_V4SI_V4SI_UHI: -- case V8SI_FTYPE_V8SI_V8SI_UHI: -- nargs = 3; -- mask_pos = 1; -- nargs_constant = 1; -- break; -- case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_USI_CONVERT: -- nargs = 5; -- rmode = V4DImode; -- mask_pos = 2; -- nargs_constant = 1; -- break; -- case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UHI_CONVERT: -- nargs = 5; -- rmode = V2DImode; -- mask_pos = 2; -- nargs_constant = 1; -- break; -- case V32QI_FTYPE_V32QI_V32QI_V32QI_USI: -- case V32HI_FTYPE_V32HI_V32HI_V32HI_USI: -- case V32HI_FTYPE_V64QI_V64QI_V32HI_USI: -- case V16SI_FTYPE_V32HI_V32HI_V16SI_UHI: -- case V64QI_FTYPE_V64QI_V64QI_V64QI_UDI: -- case V32HI_FTYPE_V32HI_V8HI_V32HI_USI: -- case V16HI_FTYPE_V16HI_V8HI_V16HI_UHI: -- case V8SI_FTYPE_V8SI_V4SI_V8SI_UQI: -- case V4DI_FTYPE_V4DI_V2DI_V4DI_UQI: -- case V64QI_FTYPE_V32HI_V32HI_V64QI_UDI: -- case V32QI_FTYPE_V16HI_V16HI_V32QI_USI: -- case V16QI_FTYPE_V8HI_V8HI_V16QI_UHI: -- case V32HI_FTYPE_V16SI_V16SI_V32HI_USI: -- case V16HI_FTYPE_V8SI_V8SI_V16HI_UHI: -- case V8HI_FTYPE_V4SI_V4SI_V8HI_UQI: -- case V4DF_FTYPE_V4DF_V4DI_V4DF_UQI: -- case V8SF_FTYPE_V8SF_V8SI_V8SF_UQI: -- case V4SF_FTYPE_V4SF_V4SI_V4SF_UQI: -- case V2DF_FTYPE_V2DF_V2DI_V2DF_UQI: -- case V2DI_FTYPE_V4SI_V4SI_V2DI_UQI: -- case V4DI_FTYPE_V8SI_V8SI_V4DI_UQI: -- case V4DF_FTYPE_V4DI_V4DF_V4DF_UQI: -- case V8SF_FTYPE_V8SI_V8SF_V8SF_UQI: -- case V2DF_FTYPE_V2DI_V2DF_V2DF_UQI: -- case V4SF_FTYPE_V4SI_V4SF_V4SF_UQI: -- case V16SF_FTYPE_V16SF_V16SF_V16SF_UHI: -- case V16SF_FTYPE_V16SF_V16SI_V16SF_UHI: -- case V16SF_FTYPE_V16SI_V16SF_V16SF_UHI: -- case V16SI_FTYPE_V16SI_V16SI_V16SI_UHI: -- case V16SI_FTYPE_V16SI_V4SI_V16SI_UHI: -- case V8HI_FTYPE_V8HI_V8HI_V8HI_UQI: -- case V8SI_FTYPE_V8SI_V8SI_V8SI_UQI: -- case V4SI_FTYPE_V4SI_V4SI_V4SI_UQI: -- case V8SF_FTYPE_V8SF_V8SF_V8SF_UQI: -- case V16QI_FTYPE_V16QI_V16QI_V16QI_UHI: -- case V16HI_FTYPE_V16HI_V16HI_V16HI_UHI: -- case V2DI_FTYPE_V2DI_V2DI_V2DI_UQI: -- case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI: -- case V4DI_FTYPE_V4DI_V4DI_V4DI_UQI: -- case V4DF_FTYPE_V4DF_V4DF_V4DF_UQI: -- case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI: -- case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI: -- case V8DF_FTYPE_V8DF_V8DI_V8DF_UQI: -- case V8DF_FTYPE_V8DI_V8DF_V8DF_UQI: -- case V8DI_FTYPE_V16SI_V16SI_V8DI_UQI: -- case V8DI_FTYPE_V8DI_V2DI_V8DI_UQI: -- case V8DI_FTYPE_V8DI_V8DI_V8DI_UQI: -- case V8HI_FTYPE_V16QI_V16QI_V8HI_UQI: -- case V16HI_FTYPE_V32QI_V32QI_V16HI_UHI: -- case V8SI_FTYPE_V16HI_V16HI_V8SI_UQI: -- case V4SI_FTYPE_V8HI_V8HI_V4SI_UQI: -- nargs = 4; -- break; -- case V2DF_FTYPE_V2DF_V2DF_V2DI_INT: -- case V4DF_FTYPE_V4DF_V4DF_V4DI_INT: -- case V4SF_FTYPE_V4SF_V4SF_V4SI_INT: -- case V8SF_FTYPE_V8SF_V8SF_V8SI_INT: -- case V16SF_FTYPE_V16SF_V16SF_V16SI_INT: -- nargs = 4; -- nargs_constant = 1; -- break; -- case UQI_FTYPE_V4DI_V4DI_INT_UQI: -- case UQI_FTYPE_V8SI_V8SI_INT_UQI: -- case QI_FTYPE_V4DF_V4DF_INT_UQI: -- case QI_FTYPE_V8SF_V8SF_INT_UQI: -- case UQI_FTYPE_V2DI_V2DI_INT_UQI: -- case UQI_FTYPE_V4SI_V4SI_INT_UQI: -- case UQI_FTYPE_V2DF_V2DF_INT_UQI: -- case UQI_FTYPE_V4SF_V4SF_INT_UQI: -- case UDI_FTYPE_V64QI_V64QI_INT_UDI: -- case USI_FTYPE_V32QI_V32QI_INT_USI: -- case UHI_FTYPE_V16QI_V16QI_INT_UHI: -- case USI_FTYPE_V32HI_V32HI_INT_USI: -- case UHI_FTYPE_V16HI_V16HI_INT_UHI: -- case UQI_FTYPE_V8HI_V8HI_INT_UQI: -- case V32HI_FTYPE_V32HI_V32HI_V32HI_INT: -- case V16HI_FTYPE_V16HI_V16HI_V16HI_INT: -- case V8HI_FTYPE_V8HI_V8HI_V8HI_INT: -- case V8SI_FTYPE_V8SI_V8SI_V8SI_INT: -- case V4DI_FTYPE_V4DI_V4DI_V4DI_INT: -- case V8DI_FTYPE_V8DI_V8DI_V8DI_INT: -- case V16SI_FTYPE_V16SI_V16SI_V16SI_INT: -- case V2DI_FTYPE_V2DI_V2DI_V2DI_INT: -- case V4SI_FTYPE_V4SI_V4SI_V4SI_INT: -- nargs = 4; -- mask_pos = 1; -- nargs_constant = 1; -- break; -- case V2DI_FTYPE_V2DI_V2DI_UINT_UINT: -- nargs = 4; -- nargs_constant = 2; -- break; -- case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED: -- case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG: -- nargs = 4; -- break; -- case UQI_FTYPE_V8DI_V8DI_INT_UQI: -- case UHI_FTYPE_V16SI_V16SI_INT_UHI: -- mask_pos = 1; -- nargs = 4; -- nargs_constant = 1; -- break; -- case V8SF_FTYPE_V8SF_INT_V8SF_UQI: -- case V4SF_FTYPE_V4SF_INT_V4SF_UQI: -- case V2DF_FTYPE_V4DF_INT_V2DF_UQI: -- case V2DI_FTYPE_V4DI_INT_V2DI_UQI: -- case V8SF_FTYPE_V16SF_INT_V8SF_UQI: -- case V8SI_FTYPE_V16SI_INT_V8SI_UQI: -- case V2DF_FTYPE_V8DF_INT_V2DF_UQI: -- case V2DI_FTYPE_V8DI_INT_V2DI_UQI: -- case V4SF_FTYPE_V8SF_INT_V4SF_UQI: -- case V4SI_FTYPE_V8SI_INT_V4SI_UQI: -- case V8HI_FTYPE_V8SF_INT_V8HI_UQI: -- case V8HI_FTYPE_V4SF_INT_V8HI_UQI: -- case V32HI_FTYPE_V32HI_INT_V32HI_USI: -- case V16HI_FTYPE_V16HI_INT_V16HI_UHI: -- case V8HI_FTYPE_V8HI_INT_V8HI_UQI: -- case V4DI_FTYPE_V4DI_INT_V4DI_UQI: -- case V2DI_FTYPE_V2DI_INT_V2DI_UQI: -- case V8SI_FTYPE_V8SI_INT_V8SI_UQI: -- case V4SI_FTYPE_V4SI_INT_V4SI_UQI: -- case V4DF_FTYPE_V4DF_INT_V4DF_UQI: -- case V2DF_FTYPE_V2DF_INT_V2DF_UQI: -- case V8DF_FTYPE_V8DF_INT_V8DF_UQI: -- case V16SF_FTYPE_V16SF_INT_V16SF_UHI: -- case V16HI_FTYPE_V16SF_INT_V16HI_UHI: -- case V16SI_FTYPE_V16SI_INT_V16SI_UHI: -- case V4SI_FTYPE_V16SI_INT_V4SI_UQI: -- case V4DI_FTYPE_V8DI_INT_V4DI_UQI: -- case V4DF_FTYPE_V8DF_INT_V4DF_UQI: -- case V4SF_FTYPE_V16SF_INT_V4SF_UQI: -- case V8DI_FTYPE_V8DI_INT_V8DI_UQI: -- nargs = 4; -- mask_pos = 2; -- nargs_constant = 1; -- break; -- case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI: -- case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI: -- case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI: -- case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI: -- case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI: -- case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI: -- case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI: -- case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI: -- case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI: -- case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI: -- case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI: -- case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI: -- case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI: -- case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI: -- case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_UQI: -- case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_UQI: -- case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_UQI: -- case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_UQI: -- case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI: -- case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_UHI: -- case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_UQI: -- case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI: -- case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI: -- case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_UQI: -- case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_UQI: -- case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_UQI: -- case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_UQI: -- nargs = 5; -- mask_pos = 2; -- nargs_constant = 1; -- break; -- case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI: -- case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI: -- case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_UQI: -- case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_UQI: -- case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI: -- case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_UQI: -- case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI: -- case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_UQI: -- case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_UQI: -- case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_UQI: -- nargs = 5; -- mask_pos = 1; -- nargs_constant = 1; -- break; -- case V64QI_FTYPE_V64QI_V64QI_INT_V64QI_UDI: -- case V32QI_FTYPE_V32QI_V32QI_INT_V32QI_USI: -- case V16QI_FTYPE_V16QI_V16QI_INT_V16QI_UHI: -- case V32HI_FTYPE_V32HI_V32HI_INT_V32HI_INT: -- case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_INT: -- case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_INT: -- case V16HI_FTYPE_V16HI_V16HI_INT_V16HI_INT: -- case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_INT: -- case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_INT: -- case V8HI_FTYPE_V8HI_V8HI_INT_V8HI_INT: -- case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_INT: -- case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_INT: -- nargs = 5; -- mask_pos = 1; -- nargs_constant = 2; -- break; -- -- default: -- gcc_unreachable (); -- } -- -- gcc_assert (nargs <= ARRAY_SIZE (args)); -- -- if (comparison != UNKNOWN) -- { -- gcc_assert (nargs == 2); -- return ix86_expand_sse_compare (d, exp, target, swap); -- } -- -- if (rmode == VOIDmode || rmode == tmode) -- { -- if (optimize -- || target == 0 -- || GET_MODE (target) != tmode -- || !insn_p->operand[0].predicate (target, tmode)) -- target = gen_reg_rtx (tmode); -- else if (memory_operand (target, tmode)) -- num_memory++; -- real_target = target; -- } -- else -- { -- real_target = gen_reg_rtx (tmode); -- target = lowpart_subreg (rmode, real_target, tmode); -- } -- -- for (i = 0; i < nargs; i++) -- { -- tree arg = CALL_EXPR_ARG (exp, i); -- rtx op = expand_normal (arg); -- machine_mode mode = insn_p->operand[i + 1].mode; -- bool match = insn_p->operand[i + 1].predicate (op, mode); -- -- if (second_arg_count && i == 1) -- { -- /* SIMD shift insns take either an 8-bit immediate or -- register as count. But builtin functions take int as -- count. If count doesn't match, we put it in register. -- The instructions are using 64-bit count, if op is just -- 32-bit, zero-extend it, as negative shift counts -- are undefined behavior and zero-extension is more -- efficient. */ -- if (!match) -- { -- if (SCALAR_INT_MODE_P (GET_MODE (op))) -- op = convert_modes (mode, GET_MODE (op), op, 1); -- else -- op = lowpart_subreg (mode, op, GET_MODE (op)); -- if (!insn_p->operand[i + 1].predicate (op, mode)) -- op = copy_to_reg (op); -- } -- } -- else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) || -- (!mask_pos && (nargs - i) <= nargs_constant)) -- { -- if (!match) -- switch (icode) -- { -- case CODE_FOR_avx_vinsertf128v4di: -- case CODE_FOR_avx_vextractf128v4di: -- error ("the last argument must be an 1-bit immediate"); -- return const0_rtx; -- -- case CODE_FOR_avx512f_cmpv8di3_mask: -- case CODE_FOR_avx512f_cmpv16si3_mask: -- case CODE_FOR_avx512f_ucmpv8di3_mask: -- case CODE_FOR_avx512f_ucmpv16si3_mask: -- case CODE_FOR_avx512vl_cmpv4di3_mask: -- case CODE_FOR_avx512vl_cmpv8si3_mask: -- case CODE_FOR_avx512vl_ucmpv4di3_mask: -- case CODE_FOR_avx512vl_ucmpv8si3_mask: -- case CODE_FOR_avx512vl_cmpv2di3_mask: -- case CODE_FOR_avx512vl_cmpv4si3_mask: -- case CODE_FOR_avx512vl_ucmpv2di3_mask: -- case CODE_FOR_avx512vl_ucmpv4si3_mask: -- error ("the last argument must be a 3-bit immediate"); -- return const0_rtx; -- -- case CODE_FOR_sse4_1_roundsd: -- case CODE_FOR_sse4_1_roundss: -- -- case CODE_FOR_sse4_1_roundpd: -- case CODE_FOR_sse4_1_roundps: -- case CODE_FOR_avx_roundpd256: -- case CODE_FOR_avx_roundps256: -- -- case CODE_FOR_sse4_1_roundpd_vec_pack_sfix: -- case CODE_FOR_sse4_1_roundps_sfix: -- case CODE_FOR_avx_roundpd_vec_pack_sfix256: -- case CODE_FOR_avx_roundps_sfix256: -- -- case CODE_FOR_sse4_1_blendps: -- case CODE_FOR_avx_blendpd256: -- case CODE_FOR_avx_vpermilv4df: -- case CODE_FOR_avx_vpermilv4df_mask: -- case CODE_FOR_avx512f_getmantv8df_mask: -- case CODE_FOR_avx512f_getmantv16sf_mask: -- case CODE_FOR_avx512vl_getmantv8sf_mask: -- case CODE_FOR_avx512vl_getmantv4df_mask: -- case CODE_FOR_avx512vl_getmantv4sf_mask: -- case CODE_FOR_avx512vl_getmantv2df_mask: -- case CODE_FOR_avx512dq_rangepv8df_mask_round: -- case CODE_FOR_avx512dq_rangepv16sf_mask_round: -- case CODE_FOR_avx512dq_rangepv4df_mask: -- case CODE_FOR_avx512dq_rangepv8sf_mask: -- case CODE_FOR_avx512dq_rangepv2df_mask: -- case CODE_FOR_avx512dq_rangepv4sf_mask: -- case CODE_FOR_avx_shufpd256_mask: -- error ("the last argument must be a 4-bit immediate"); -- return const0_rtx; -- -- case CODE_FOR_sha1rnds4: -- case CODE_FOR_sse4_1_blendpd: -- case CODE_FOR_avx_vpermilv2df: -- case CODE_FOR_avx_vpermilv2df_mask: -- case CODE_FOR_xop_vpermil2v2df3: -- case CODE_FOR_xop_vpermil2v4sf3: -- case CODE_FOR_xop_vpermil2v4df3: -- case CODE_FOR_xop_vpermil2v8sf3: -- case CODE_FOR_avx512f_vinsertf32x4_mask: -- case CODE_FOR_avx512f_vinserti32x4_mask: -- case CODE_FOR_avx512f_vextractf32x4_mask: -- case CODE_FOR_avx512f_vextracti32x4_mask: -- case CODE_FOR_sse2_shufpd: -- case CODE_FOR_sse2_shufpd_mask: -- case CODE_FOR_avx512dq_shuf_f64x2_mask: -- case CODE_FOR_avx512dq_shuf_i64x2_mask: -- case CODE_FOR_avx512vl_shuf_i32x4_mask: -- case CODE_FOR_avx512vl_shuf_f32x4_mask: -- error ("the last argument must be a 2-bit immediate"); -- return const0_rtx; -- -- case CODE_FOR_avx_vextractf128v4df: -- case CODE_FOR_avx_vextractf128v8sf: -- case CODE_FOR_avx_vextractf128v8si: -- case CODE_FOR_avx_vinsertf128v4df: -- case CODE_FOR_avx_vinsertf128v8sf: -- case CODE_FOR_avx_vinsertf128v8si: -- case CODE_FOR_avx512f_vinsertf64x4_mask: -- case CODE_FOR_avx512f_vinserti64x4_mask: -- case CODE_FOR_avx512f_vextractf64x4_mask: -- case CODE_FOR_avx512f_vextracti64x4_mask: -- case CODE_FOR_avx512dq_vinsertf32x8_mask: -- case CODE_FOR_avx512dq_vinserti32x8_mask: -- case CODE_FOR_avx512vl_vinsertv4df: -- case CODE_FOR_avx512vl_vinsertv4di: -- case CODE_FOR_avx512vl_vinsertv8sf: -- case CODE_FOR_avx512vl_vinsertv8si: -- error ("the last argument must be a 1-bit immediate"); -- return const0_rtx; -- -- case CODE_FOR_avx_vmcmpv2df3: -- case CODE_FOR_avx_vmcmpv4sf3: -- case CODE_FOR_avx_cmpv2df3: -- case CODE_FOR_avx_cmpv4sf3: -- case CODE_FOR_avx_cmpv4df3: -- case CODE_FOR_avx_cmpv8sf3: -- case CODE_FOR_avx512f_cmpv8df3_mask: -- case CODE_FOR_avx512f_cmpv16sf3_mask: -- case CODE_FOR_avx512f_vmcmpv2df3_mask: -- case CODE_FOR_avx512f_vmcmpv4sf3_mask: -- error ("the last argument must be a 5-bit immediate"); -- return const0_rtx; -- -- default: -- switch (nargs_constant) -- { -- case 2: -- if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) || -- (!mask_pos && (nargs - i) == nargs_constant)) -- { -- error ("the next to last argument must be an 8-bit immediate"); -- break; -- } -- /* FALLTHRU */ -- case 1: -- error ("the last argument must be an 8-bit immediate"); -- break; -- default: -- gcc_unreachable (); -- } -- return const0_rtx; -- } -- } -- else -- { -- if (VECTOR_MODE_P (mode)) -- op = safe_vector_operand (op, mode); -- -- /* If we aren't optimizing, only allow one memory operand to -- be generated. */ -- if (memory_operand (op, mode)) -- num_memory++; -- -- op = fixup_modeless_constant (op, mode); -- -- if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode) -- { -- if (optimize || !match || num_memory > 1) -- op = copy_to_mode_reg (mode, op); -- } -- else -- { -- op = copy_to_reg (op); -- op = lowpart_subreg (mode, op, GET_MODE (op)); -- } -- } -- -- args[i].op = op; -- args[i].mode = mode; -- } -- -- switch (nargs) -- { -- case 1: -- pat = GEN_FCN (icode) (real_target, args[0].op); -- break; -- case 2: -- pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op); -- break; -- case 3: -- pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op, -- args[2].op); -- break; -- case 4: -- pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op, -- args[2].op, args[3].op); -- break; -- case 5: -- pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op, -- args[2].op, args[3].op, args[4].op); -- break; -- case 6: -- pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op, -- args[2].op, args[3].op, args[4].op, -- args[5].op); -- break; -- default: -- gcc_unreachable (); -- } -- -- if (! pat) -- return 0; -- -- emit_insn (pat); -- return target; --} -- --/* Transform pattern of following layout: -- (set A -- (unspec [B C] UNSPEC_EMBEDDED_ROUNDING)) -- ) -- into: -- (set (A B)) */ -- --static rtx --ix86_erase_embedded_rounding (rtx pat) --{ -- if (GET_CODE (pat) == INSN) -- pat = PATTERN (pat); -- -- gcc_assert (GET_CODE (pat) == SET); -- rtx src = SET_SRC (pat); -- gcc_assert (XVECLEN (src, 0) == 2); -- rtx p0 = XVECEXP (src, 0, 0); -- gcc_assert (GET_CODE (src) == UNSPEC -- && XINT (src, 1) == UNSPEC_EMBEDDED_ROUNDING); -- rtx res = gen_rtx_SET (SET_DEST (pat), p0); -- return res; --} -- --/* Subroutine of ix86_expand_round_builtin to take care of comi insns -- with rounding. */ --static rtx --ix86_expand_sse_comi_round (const struct builtin_description *d, -- tree exp, rtx target) --{ -- rtx pat, set_dst; -- tree arg0 = CALL_EXPR_ARG (exp, 0); -- tree arg1 = CALL_EXPR_ARG (exp, 1); -- tree arg2 = CALL_EXPR_ARG (exp, 2); -- tree arg3 = CALL_EXPR_ARG (exp, 3); -- rtx op0 = expand_normal (arg0); -- rtx op1 = expand_normal (arg1); -- rtx op2 = expand_normal (arg2); -- rtx op3 = expand_normal (arg3); -- enum insn_code icode = d->icode; -- const struct insn_data_d *insn_p = &insn_data[icode]; -- machine_mode mode0 = insn_p->operand[0].mode; -- machine_mode mode1 = insn_p->operand[1].mode; -- enum rtx_code comparison = UNEQ; -- bool need_ucomi = false; -- -- /* See avxintrin.h for values. */ -- enum rtx_code comi_comparisons[32] = -- { -- UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT, -- UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE, -- UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT -- }; -- bool need_ucomi_values[32] = -- { -- true, false, false, true, true, false, false, true, -- true, false, false, true, true, false, false, true, -- false, true, true, false, false, true, true, false, -- false, true, true, false, false, true, true, false -- }; -- -- if (!CONST_INT_P (op2)) -- { -- error ("the third argument must be comparison constant"); -- return const0_rtx; -- } -- if (INTVAL (op2) < 0 || INTVAL (op2) >= 32) -- { -- error ("incorrect comparison mode"); -- return const0_rtx; -- } -- -- if (!insn_p->operand[2].predicate (op3, SImode)) -- { -- error ("incorrect rounding operand"); -- return const0_rtx; -- } -- -- comparison = comi_comparisons[INTVAL (op2)]; -- need_ucomi = need_ucomi_values[INTVAL (op2)]; -- -- if (VECTOR_MODE_P (mode0)) -- op0 = safe_vector_operand (op0, mode0); -- if (VECTOR_MODE_P (mode1)) -- op1 = safe_vector_operand (op1, mode1); -- -- target = gen_reg_rtx (SImode); -- emit_move_insn (target, const0_rtx); -- target = gen_rtx_SUBREG (QImode, target, 0); -- -- if ((optimize && !register_operand (op0, mode0)) -- || !insn_p->operand[0].predicate (op0, mode0)) -- op0 = copy_to_mode_reg (mode0, op0); -- if ((optimize && !register_operand (op1, mode1)) -- || !insn_p->operand[1].predicate (op1, mode1)) -- op1 = copy_to_mode_reg (mode1, op1); -- -- if (need_ucomi) -- icode = icode == CODE_FOR_sse_comi_round -- ? CODE_FOR_sse_ucomi_round -- : CODE_FOR_sse2_ucomi_round; -- -- pat = GEN_FCN (icode) (op0, op1, op3); -- if (! pat) -- return 0; -- -- /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */ -- if (INTVAL (op3) == NO_ROUND) -- { -- pat = ix86_erase_embedded_rounding (pat); -- if (! pat) -- return 0; -- -- set_dst = SET_DEST (pat); -- } -- else -- { -- gcc_assert (GET_CODE (pat) == SET); -- set_dst = SET_DEST (pat); -- } -- -- emit_insn (pat); -- emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target), -- gen_rtx_fmt_ee (comparison, QImode, -- set_dst, -- const0_rtx))); -- -- return SUBREG_REG (target); --} -- --static rtx --ix86_expand_round_builtin (const struct builtin_description *d, -- tree exp, rtx target) --{ -- rtx pat; -- unsigned int i, nargs; -- struct -- { -- rtx op; -- machine_mode mode; -- } args[6]; -- enum insn_code icode = d->icode; -- const struct insn_data_d *insn_p = &insn_data[icode]; -- machine_mode tmode = insn_p->operand[0].mode; -- unsigned int nargs_constant = 0; -- unsigned int redundant_embed_rnd = 0; -- -- switch ((enum ix86_builtin_func_type) d->flag) -- { -- case UINT64_FTYPE_V2DF_INT: -- case UINT64_FTYPE_V4SF_INT: -- case UINT_FTYPE_V2DF_INT: -- case UINT_FTYPE_V4SF_INT: -- case INT64_FTYPE_V2DF_INT: -- case INT64_FTYPE_V4SF_INT: -- case INT_FTYPE_V2DF_INT: -- case INT_FTYPE_V4SF_INT: -- nargs = 2; -- break; -- case V4SF_FTYPE_V4SF_UINT_INT: -- case V4SF_FTYPE_V4SF_UINT64_INT: -- case V2DF_FTYPE_V2DF_UINT64_INT: -- case V4SF_FTYPE_V4SF_INT_INT: -- case V4SF_FTYPE_V4SF_INT64_INT: -- case V2DF_FTYPE_V2DF_INT64_INT: -- case V4SF_FTYPE_V4SF_V4SF_INT: -- case V2DF_FTYPE_V2DF_V2DF_INT: -- case V4SF_FTYPE_V4SF_V2DF_INT: -- case V2DF_FTYPE_V2DF_V4SF_INT: -- nargs = 3; -- break; -- case V8SF_FTYPE_V8DF_V8SF_QI_INT: -- case V8DF_FTYPE_V8DF_V8DF_QI_INT: -- case V8SI_FTYPE_V8DF_V8SI_QI_INT: -- case V8DI_FTYPE_V8DF_V8DI_QI_INT: -- case V8SF_FTYPE_V8DI_V8SF_QI_INT: -- case V8DF_FTYPE_V8DI_V8DF_QI_INT: -- case V16SF_FTYPE_V16SF_V16SF_HI_INT: -- case V8DI_FTYPE_V8SF_V8DI_QI_INT: -- case V16SF_FTYPE_V16SI_V16SF_HI_INT: -- case V16SI_FTYPE_V16SF_V16SI_HI_INT: -- case V8DF_FTYPE_V8SF_V8DF_QI_INT: -- case V16SF_FTYPE_V16HI_V16SF_HI_INT: -- case V2DF_FTYPE_V2DF_V2DF_V2DF_INT: -- case V4SF_FTYPE_V4SF_V4SF_V4SF_INT: -- nargs = 4; -- break; -- case V4SF_FTYPE_V4SF_V4SF_INT_INT: -- case V2DF_FTYPE_V2DF_V2DF_INT_INT: -- nargs_constant = 2; -- nargs = 4; -- break; -- case INT_FTYPE_V4SF_V4SF_INT_INT: -- case INT_FTYPE_V2DF_V2DF_INT_INT: -- return ix86_expand_sse_comi_round (d, exp, target); -- case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT: -- case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT: -- case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT: -- case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT: -- case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT: -- case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT: -- case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT: -- case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT: -- nargs = 5; -- break; -- case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT: -- case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT: -- nargs_constant = 4; -- nargs = 5; -- break; -- case UQI_FTYPE_V8DF_V8DF_INT_UQI_INT: -- case UQI_FTYPE_V2DF_V2DF_INT_UQI_INT: -- case UHI_FTYPE_V16SF_V16SF_INT_UHI_INT: -- case UQI_FTYPE_V4SF_V4SF_INT_UQI_INT: -- nargs_constant = 3; -- nargs = 5; -- break; -- case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT: -- case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT: -- case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT: -- case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT: -- case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT: -- case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT: -- nargs = 6; -- nargs_constant = 4; -- break; -- case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT: -- case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT: -- case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT: -- case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT: -- nargs = 6; -- nargs_constant = 3; -- break; -- default: -- gcc_unreachable (); -- } -- gcc_assert (nargs <= ARRAY_SIZE (args)); -- -- if (optimize -- || target == 0 -- || GET_MODE (target) != tmode -- || !insn_p->operand[0].predicate (target, tmode)) -- target = gen_reg_rtx (tmode); -- -- for (i = 0; i < nargs; i++) -- { -- tree arg = CALL_EXPR_ARG (exp, i); -- rtx op = expand_normal (arg); -- machine_mode mode = insn_p->operand[i + 1].mode; -- bool match = insn_p->operand[i + 1].predicate (op, mode); -- -- if (i == nargs - nargs_constant) -- { -- if (!match) -- { -- switch (icode) -- { -- case CODE_FOR_avx512f_getmantv8df_mask_round: -- case CODE_FOR_avx512f_getmantv16sf_mask_round: -- case CODE_FOR_avx512f_vgetmantv2df_round: -- case CODE_FOR_avx512f_vgetmantv2df_mask_round: -- case CODE_FOR_avx512f_vgetmantv4sf_round: -- case CODE_FOR_avx512f_vgetmantv4sf_mask_round: -- error ("the immediate argument must be a 4-bit immediate"); -- return const0_rtx; -- case CODE_FOR_avx512f_cmpv8df3_mask_round: -- case CODE_FOR_avx512f_cmpv16sf3_mask_round: -- case CODE_FOR_avx512f_vmcmpv2df3_mask_round: -- case CODE_FOR_avx512f_vmcmpv4sf3_mask_round: -- error ("the immediate argument must be a 5-bit immediate"); -- return const0_rtx; -- default: -- error ("the immediate argument must be an 8-bit immediate"); -- return const0_rtx; -- } -- } -- } -- else if (i == nargs-1) -- { -- if (!insn_p->operand[nargs].predicate (op, SImode)) -- { -- error ("incorrect rounding operand"); -- return const0_rtx; -- } -- -- /* If there is no rounding use normal version of the pattern. */ -- if (INTVAL (op) == NO_ROUND) -- redundant_embed_rnd = 1; -- } -- else -- { -- if (VECTOR_MODE_P (mode)) -- op = safe_vector_operand (op, mode); -- -- op = fixup_modeless_constant (op, mode); -- -- if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode) -- { -- if (optimize || !match) -- op = copy_to_mode_reg (mode, op); -- } -- else -- { -- op = copy_to_reg (op); -- op = lowpart_subreg (mode, op, GET_MODE (op)); -- } -- } -- -- args[i].op = op; -- args[i].mode = mode; -- } -- -- switch (nargs) -- { -- case 1: -- pat = GEN_FCN (icode) (target, args[0].op); -- break; -- case 2: -- pat = GEN_FCN (icode) (target, args[0].op, args[1].op); -- break; -- case 3: -- pat = GEN_FCN (icode) (target, args[0].op, args[1].op, -- args[2].op); -- break; -- case 4: -- pat = GEN_FCN (icode) (target, args[0].op, args[1].op, -- args[2].op, args[3].op); -- break; -- case 5: -- pat = GEN_FCN (icode) (target, args[0].op, args[1].op, -- args[2].op, args[3].op, args[4].op); -- break; -- case 6: -- pat = GEN_FCN (icode) (target, args[0].op, args[1].op, -- args[2].op, args[3].op, args[4].op, -- args[5].op); -- break; -- default: -- gcc_unreachable (); -- } -- -- if (!pat) -- return 0; -- -- if (redundant_embed_rnd) -- pat = ix86_erase_embedded_rounding (pat); -- -- emit_insn (pat); -- return target; --} -- --/* Subroutine of ix86_expand_builtin to take care of special insns -- with variable number of operands. */ -- --static rtx --ix86_expand_special_args_builtin (const struct builtin_description *d, -- tree exp, rtx target) --{ -- tree arg; -- rtx pat, op; -- unsigned int i, nargs, arg_adjust, memory; -- bool aligned_mem = false; -- struct -- { -- rtx op; -- machine_mode mode; -- } args[3]; -- enum insn_code icode = d->icode; -- bool last_arg_constant = false; -- const struct insn_data_d *insn_p = &insn_data[icode]; -- machine_mode tmode = insn_p->operand[0].mode; -- enum { load, store } klass; -- -- switch ((enum ix86_builtin_func_type) d->flag) -- { -- case VOID_FTYPE_VOID: -- emit_insn (GEN_FCN (icode) (target)); -- return 0; -- case VOID_FTYPE_UINT64: -- case VOID_FTYPE_UNSIGNED: -- nargs = 0; -- klass = store; -- memory = 0; -- break; -- -- case INT_FTYPE_VOID: -- case USHORT_FTYPE_VOID: -- case UINT64_FTYPE_VOID: -- case UINT_FTYPE_VOID: -- case UNSIGNED_FTYPE_VOID: -- nargs = 0; -- klass = load; -- memory = 0; -- break; -- case UINT64_FTYPE_PUNSIGNED: -- case V2DI_FTYPE_PV2DI: -- case V4DI_FTYPE_PV4DI: -- case V32QI_FTYPE_PCCHAR: -- case V16QI_FTYPE_PCCHAR: -- case V8SF_FTYPE_PCV4SF: -- case V8SF_FTYPE_PCFLOAT: -- case V4SF_FTYPE_PCFLOAT: -- case V4DF_FTYPE_PCV2DF: -- case V4DF_FTYPE_PCDOUBLE: -- case V2DF_FTYPE_PCDOUBLE: -- case VOID_FTYPE_PVOID: -- case V8DI_FTYPE_PV8DI: -- nargs = 1; -- klass = load; -- memory = 0; -- switch (icode) -- { -- case CODE_FOR_sse4_1_movntdqa: -- case CODE_FOR_avx2_movntdqa: -- case CODE_FOR_avx512f_movntdqa: -- aligned_mem = true; -- break; -- default: -- break; -- } -- break; -- case VOID_FTYPE_PV2SF_V4SF: -- case VOID_FTYPE_PV8DI_V8DI: -- case VOID_FTYPE_PV4DI_V4DI: -- case VOID_FTYPE_PV2DI_V2DI: -- case VOID_FTYPE_PCHAR_V32QI: -- case VOID_FTYPE_PCHAR_V16QI: -- case VOID_FTYPE_PFLOAT_V16SF: -- case VOID_FTYPE_PFLOAT_V8SF: -- case VOID_FTYPE_PFLOAT_V4SF: -- case VOID_FTYPE_PDOUBLE_V8DF: -- case VOID_FTYPE_PDOUBLE_V4DF: -- case VOID_FTYPE_PDOUBLE_V2DF: -- case VOID_FTYPE_PLONGLONG_LONGLONG: -- case VOID_FTYPE_PULONGLONG_ULONGLONG: -- case VOID_FTYPE_PUNSIGNED_UNSIGNED: -- case VOID_FTYPE_PINT_INT: -- nargs = 1; -- klass = store; -- /* Reserve memory operand for target. */ -- memory = ARRAY_SIZE (args); -- switch (icode) -- { -- /* These builtins and instructions require the memory -- to be properly aligned. */ -- case CODE_FOR_avx_movntv4di: -- case CODE_FOR_sse2_movntv2di: -- case CODE_FOR_avx_movntv8sf: -- case CODE_FOR_sse_movntv4sf: -- case CODE_FOR_sse4a_vmmovntv4sf: -- case CODE_FOR_avx_movntv4df: -- case CODE_FOR_sse2_movntv2df: -- case CODE_FOR_sse4a_vmmovntv2df: -- case CODE_FOR_sse2_movntidi: -- case CODE_FOR_sse_movntq: -- case CODE_FOR_sse2_movntisi: -- case CODE_FOR_avx512f_movntv16sf: -- case CODE_FOR_avx512f_movntv8df: -- case CODE_FOR_avx512f_movntv8di: -- aligned_mem = true; -- break; -- default: -- break; -- } -- break; -- case VOID_FTYPE_PVOID_PCVOID: -- nargs = 1; -- klass = store; -- memory = 0; -- -- break; -- case V4SF_FTYPE_V4SF_PCV2SF: -- case V2DF_FTYPE_V2DF_PCDOUBLE: -- nargs = 2; -- klass = load; -- memory = 1; -- break; -- case V8SF_FTYPE_PCV8SF_V8SI: -- case V4DF_FTYPE_PCV4DF_V4DI: -- case V4SF_FTYPE_PCV4SF_V4SI: -- case V2DF_FTYPE_PCV2DF_V2DI: -- case V8SI_FTYPE_PCV8SI_V8SI: -- case V4DI_FTYPE_PCV4DI_V4DI: -- case V4SI_FTYPE_PCV4SI_V4SI: -- case V2DI_FTYPE_PCV2DI_V2DI: -- case VOID_FTYPE_INT_INT64: -- nargs = 2; -- klass = load; -- memory = 0; -- break; -- case VOID_FTYPE_PV8DF_V8DF_UQI: -- case VOID_FTYPE_PV4DF_V4DF_UQI: -- case VOID_FTYPE_PV2DF_V2DF_UQI: -- case VOID_FTYPE_PV16SF_V16SF_UHI: -- case VOID_FTYPE_PV8SF_V8SF_UQI: -- case VOID_FTYPE_PV4SF_V4SF_UQI: -- case VOID_FTYPE_PV8DI_V8DI_UQI: -- case VOID_FTYPE_PV4DI_V4DI_UQI: -- case VOID_FTYPE_PV2DI_V2DI_UQI: -- case VOID_FTYPE_PV16SI_V16SI_UHI: -- case VOID_FTYPE_PV8SI_V8SI_UQI: -- case VOID_FTYPE_PV4SI_V4SI_UQI: -- case VOID_FTYPE_PV64QI_V64QI_UDI: -- case VOID_FTYPE_PV32HI_V32HI_USI: -- case VOID_FTYPE_PV32QI_V32QI_USI: -- case VOID_FTYPE_PV16QI_V16QI_UHI: -- case VOID_FTYPE_PV16HI_V16HI_UHI: -- case VOID_FTYPE_PV8HI_V8HI_UQI: -- switch (icode) -- { -- /* These builtins and instructions require the memory -- to be properly aligned. */ -- case CODE_FOR_avx512f_storev16sf_mask: -- case CODE_FOR_avx512f_storev16si_mask: -- case CODE_FOR_avx512f_storev8df_mask: -- case CODE_FOR_avx512f_storev8di_mask: -- case CODE_FOR_avx512vl_storev8sf_mask: -- case CODE_FOR_avx512vl_storev8si_mask: -- case CODE_FOR_avx512vl_storev4df_mask: -- case CODE_FOR_avx512vl_storev4di_mask: -- case CODE_FOR_avx512vl_storev4sf_mask: -- case CODE_FOR_avx512vl_storev4si_mask: -- case CODE_FOR_avx512vl_storev2df_mask: -- case CODE_FOR_avx512vl_storev2di_mask: -- aligned_mem = true; -- break; -- default: -- break; -- } -- /* FALLTHRU */ -- case VOID_FTYPE_PV8SF_V8SI_V8SF: -- case VOID_FTYPE_PV4DF_V4DI_V4DF: -- case VOID_FTYPE_PV4SF_V4SI_V4SF: -- case VOID_FTYPE_PV2DF_V2DI_V2DF: -- case VOID_FTYPE_PV8SI_V8SI_V8SI: -- case VOID_FTYPE_PV4DI_V4DI_V4DI: -- case VOID_FTYPE_PV4SI_V4SI_V4SI: -- case VOID_FTYPE_PV2DI_V2DI_V2DI: -- case VOID_FTYPE_PV8SI_V8DI_UQI: -- case VOID_FTYPE_PV8HI_V8DI_UQI: -- case VOID_FTYPE_PV16HI_V16SI_UHI: -- case VOID_FTYPE_PV16QI_V8DI_UQI: -- case VOID_FTYPE_PV16QI_V16SI_UHI: -- case VOID_FTYPE_PV4SI_V4DI_UQI: -- case VOID_FTYPE_PV4SI_V2DI_UQI: -- case VOID_FTYPE_PV8HI_V4DI_UQI: -- case VOID_FTYPE_PV8HI_V2DI_UQI: -- case VOID_FTYPE_PV8HI_V8SI_UQI: -- case VOID_FTYPE_PV8HI_V4SI_UQI: -- case VOID_FTYPE_PV16QI_V4DI_UQI: -- case VOID_FTYPE_PV16QI_V2DI_UQI: -- case VOID_FTYPE_PV16QI_V8SI_UQI: -- case VOID_FTYPE_PV16QI_V4SI_UQI: -- case VOID_FTYPE_PCHAR_V64QI_UDI: -- case VOID_FTYPE_PCHAR_V32QI_USI: -- case VOID_FTYPE_PCHAR_V16QI_UHI: -- case VOID_FTYPE_PSHORT_V32HI_USI: -- case VOID_FTYPE_PSHORT_V16HI_UHI: -- case VOID_FTYPE_PSHORT_V8HI_UQI: -- case VOID_FTYPE_PINT_V16SI_UHI: -- case VOID_FTYPE_PINT_V8SI_UQI: -- case VOID_FTYPE_PINT_V4SI_UQI: -- case VOID_FTYPE_PINT64_V8DI_UQI: -- case VOID_FTYPE_PINT64_V4DI_UQI: -- case VOID_FTYPE_PINT64_V2DI_UQI: -- case VOID_FTYPE_PDOUBLE_V8DF_UQI: -- case VOID_FTYPE_PDOUBLE_V4DF_UQI: -- case VOID_FTYPE_PDOUBLE_V2DF_UQI: -- case VOID_FTYPE_PFLOAT_V16SF_UHI: -- case VOID_FTYPE_PFLOAT_V8SF_UQI: -- case VOID_FTYPE_PFLOAT_V4SF_UQI: -- case VOID_FTYPE_PV32QI_V32HI_USI: -- case VOID_FTYPE_PV16QI_V16HI_UHI: -- case VOID_FTYPE_PV8QI_V8HI_UQI: -- nargs = 2; -- klass = store; -- /* Reserve memory operand for target. */ -- memory = ARRAY_SIZE (args); -- break; -- case V4SF_FTYPE_PCV4SF_V4SF_UQI: -- case V8SF_FTYPE_PCV8SF_V8SF_UQI: -- case V16SF_FTYPE_PCV16SF_V16SF_UHI: -- case V4SI_FTYPE_PCV4SI_V4SI_UQI: -- case V8SI_FTYPE_PCV8SI_V8SI_UQI: -- case V16SI_FTYPE_PCV16SI_V16SI_UHI: -- case V2DF_FTYPE_PCV2DF_V2DF_UQI: -- case V4DF_FTYPE_PCV4DF_V4DF_UQI: -- case V8DF_FTYPE_PCV8DF_V8DF_UQI: -- case V2DI_FTYPE_PCV2DI_V2DI_UQI: -- case V4DI_FTYPE_PCV4DI_V4DI_UQI: -- case V8DI_FTYPE_PCV8DI_V8DI_UQI: -- case V64QI_FTYPE_PCV64QI_V64QI_UDI: -- case V32HI_FTYPE_PCV32HI_V32HI_USI: -- case V32QI_FTYPE_PCV32QI_V32QI_USI: -- case V16QI_FTYPE_PCV16QI_V16QI_UHI: -- case V16HI_FTYPE_PCV16HI_V16HI_UHI: -- case V8HI_FTYPE_PCV8HI_V8HI_UQI: -- switch (icode) -- { -- /* These builtins and instructions require the memory -- to be properly aligned. */ -- case CODE_FOR_avx512f_loadv16sf_mask: -- case CODE_FOR_avx512f_loadv16si_mask: -- case CODE_FOR_avx512f_loadv8df_mask: -- case CODE_FOR_avx512f_loadv8di_mask: -- case CODE_FOR_avx512vl_loadv8sf_mask: -- case CODE_FOR_avx512vl_loadv8si_mask: -- case CODE_FOR_avx512vl_loadv4df_mask: -- case CODE_FOR_avx512vl_loadv4di_mask: -- case CODE_FOR_avx512vl_loadv4sf_mask: -- case CODE_FOR_avx512vl_loadv4si_mask: -- case CODE_FOR_avx512vl_loadv2df_mask: -- case CODE_FOR_avx512vl_loadv2di_mask: -- case CODE_FOR_avx512bw_loadv64qi_mask: -- case CODE_FOR_avx512vl_loadv32qi_mask: -- case CODE_FOR_avx512vl_loadv16qi_mask: -- case CODE_FOR_avx512bw_loadv32hi_mask: -- case CODE_FOR_avx512vl_loadv16hi_mask: -- case CODE_FOR_avx512vl_loadv8hi_mask: -- aligned_mem = true; -- break; -- default: -- break; -- } -- /* FALLTHRU */ -- case V64QI_FTYPE_PCCHAR_V64QI_UDI: -- case V32QI_FTYPE_PCCHAR_V32QI_USI: -- case V16QI_FTYPE_PCCHAR_V16QI_UHI: -- case V32HI_FTYPE_PCSHORT_V32HI_USI: -- case V16HI_FTYPE_PCSHORT_V16HI_UHI: -- case V8HI_FTYPE_PCSHORT_V8HI_UQI: -- case V16SI_FTYPE_PCINT_V16SI_UHI: -- case V8SI_FTYPE_PCINT_V8SI_UQI: -- case V4SI_FTYPE_PCINT_V4SI_UQI: -- case V8DI_FTYPE_PCINT64_V8DI_UQI: -- case V4DI_FTYPE_PCINT64_V4DI_UQI: -- case V2DI_FTYPE_PCINT64_V2DI_UQI: -- case V8DF_FTYPE_PCDOUBLE_V8DF_UQI: -- case V4DF_FTYPE_PCDOUBLE_V4DF_UQI: -- case V2DF_FTYPE_PCDOUBLE_V2DF_UQI: -- case V16SF_FTYPE_PCFLOAT_V16SF_UHI: -- case V8SF_FTYPE_PCFLOAT_V8SF_UQI: -- case V4SF_FTYPE_PCFLOAT_V4SF_UQI: -- nargs = 3; -- klass = load; -- memory = 0; -- break; -- case VOID_FTYPE_UINT_UINT_UINT: -- case VOID_FTYPE_UINT64_UINT_UINT: -- case UCHAR_FTYPE_UINT_UINT_UINT: -- case UCHAR_FTYPE_UINT64_UINT_UINT: -- nargs = 3; -- klass = load; -- memory = ARRAY_SIZE (args); -- last_arg_constant = true; -- break; -- default: -- gcc_unreachable (); -- } -- -- gcc_assert (nargs <= ARRAY_SIZE (args)); -- -- if (klass == store) -- { -- arg = CALL_EXPR_ARG (exp, 0); -- op = expand_normal (arg); -- gcc_assert (target == 0); -- if (memory) -- { -- op = ix86_zero_extend_to_Pmode (op); -- target = gen_rtx_MEM (tmode, op); -- /* target at this point has just BITS_PER_UNIT MEM_ALIGN -- on it. Try to improve it using get_pointer_alignment, -- and if the special builtin is one that requires strict -- mode alignment, also from it's GET_MODE_ALIGNMENT. -- Failure to do so could lead to ix86_legitimate_combined_insn -- rejecting all changes to such insns. */ -- unsigned int align = get_pointer_alignment (arg); -- if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode)) -- align = GET_MODE_ALIGNMENT (tmode); -- if (MEM_ALIGN (target) < align) -- set_mem_align (target, align); -- } -- else -- target = force_reg (tmode, op); -- arg_adjust = 1; -- } -- else -- { -- arg_adjust = 0; -- if (optimize -- || target == 0 -- || !register_operand (target, tmode) -- || GET_MODE (target) != tmode) -- target = gen_reg_rtx (tmode); -- } -- -- for (i = 0; i < nargs; i++) -- { -- machine_mode mode = insn_p->operand[i + 1].mode; -- bool match; -- -- arg = CALL_EXPR_ARG (exp, i + arg_adjust); -- op = expand_normal (arg); -- match = insn_p->operand[i + 1].predicate (op, mode); -- -- if (last_arg_constant && (i + 1) == nargs) -- { -- if (!match) -- { -- if (icode == CODE_FOR_lwp_lwpvalsi3 -- || icode == CODE_FOR_lwp_lwpinssi3 -- || icode == CODE_FOR_lwp_lwpvaldi3 -- || icode == CODE_FOR_lwp_lwpinsdi3) -- error ("the last argument must be a 32-bit immediate"); -- else -- error ("the last argument must be an 8-bit immediate"); -- return const0_rtx; -- } -- } -- else -- { -- if (i == memory) -- { -- /* This must be the memory operand. */ -- op = ix86_zero_extend_to_Pmode (op); -- op = gen_rtx_MEM (mode, op); -- /* op at this point has just BITS_PER_UNIT MEM_ALIGN -- on it. Try to improve it using get_pointer_alignment, -- and if the special builtin is one that requires strict -- mode alignment, also from it's GET_MODE_ALIGNMENT. -- Failure to do so could lead to ix86_legitimate_combined_insn -- rejecting all changes to such insns. */ -- unsigned int align = get_pointer_alignment (arg); -- if (aligned_mem && align < GET_MODE_ALIGNMENT (mode)) -- align = GET_MODE_ALIGNMENT (mode); -- if (MEM_ALIGN (op) < align) -- set_mem_align (op, align); -- } -- else -- { -- /* This must be register. */ -- if (VECTOR_MODE_P (mode)) -- op = safe_vector_operand (op, mode); -- -- op = fixup_modeless_constant (op, mode); -- -- if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode) -- op = copy_to_mode_reg (mode, op); -- else -- { -- op = copy_to_reg (op); -- op = lowpart_subreg (mode, op, GET_MODE (op)); -- } -- } -- } -- -- args[i].op = op; -- args[i].mode = mode; -- } -- -- switch (nargs) -- { -- case 0: -- pat = GEN_FCN (icode) (target); -- break; -- case 1: -- pat = GEN_FCN (icode) (target, args[0].op); -- break; -- case 2: -- pat = GEN_FCN (icode) (target, args[0].op, args[1].op); -- break; -- case 3: -- pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op); -- break; -- default: -- gcc_unreachable (); -- } -- -- if (! pat) -- return 0; -- emit_insn (pat); -- return klass == store ? 0 : target; --} -- --/* Return the integer constant in ARG. Constrain it to be in the range -- of the subparts of VEC_TYPE; issue an error if not. */ -- --static int --get_element_number (tree vec_type, tree arg) --{ -- unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1; -- -- if (!tree_fits_uhwi_p (arg) -- || (elt = tree_to_uhwi (arg), elt > max)) -- { -- error ("selector must be an integer constant in the range 0..%wi", max); -- return 0; -- } -- -- return elt; --} -- --/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around -- ix86_expand_vector_init. We DO have language-level syntax for this, in -- the form of (type){ init-list }. Except that since we can't place emms -- instructions from inside the compiler, we can't allow the use of MMX -- registers unless the user explicitly asks for it. So we do *not* define -- vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead -- we have builtins invoked by mmintrin.h that gives us license to emit -- these sorts of instructions. */ -- --static rtx --ix86_expand_vec_init_builtin (tree type, tree exp, rtx target) --{ -- machine_mode tmode = TYPE_MODE (type); -- machine_mode inner_mode = GET_MODE_INNER (tmode); -- int i, n_elt = GET_MODE_NUNITS (tmode); -- rtvec v = rtvec_alloc (n_elt); -- -- gcc_assert (VECTOR_MODE_P (tmode)); -- gcc_assert (call_expr_nargs (exp) == n_elt); -- -- for (i = 0; i < n_elt; ++i) -- { -- rtx x = expand_normal (CALL_EXPR_ARG (exp, i)); -- RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x); -- } -- -- if (!target || !register_operand (target, tmode)) -- target = gen_reg_rtx (tmode); -- -- ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v)); -- return target; --} -- --/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around -- ix86_expand_vector_extract. They would be redundant (for non-MMX) if we -- had a language-level syntax for referencing vector elements. */ -- --static rtx --ix86_expand_vec_ext_builtin (tree exp, rtx target) --{ -- machine_mode tmode, mode0; -- tree arg0, arg1; -- int elt; -- rtx op0; -- -- arg0 = CALL_EXPR_ARG (exp, 0); -- arg1 = CALL_EXPR_ARG (exp, 1); -- -- op0 = expand_normal (arg0); -- elt = get_element_number (TREE_TYPE (arg0), arg1); -- -- tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0))); -- mode0 = TYPE_MODE (TREE_TYPE (arg0)); -- gcc_assert (VECTOR_MODE_P (mode0)); -- -- op0 = force_reg (mode0, op0); -- -- if (optimize || !target || !register_operand (target, tmode)) -- target = gen_reg_rtx (tmode); -- -- ix86_expand_vector_extract (true, target, op0, elt); -- -- return target; --} -- --/* A subroutine of ix86_expand_builtin. These builtins are a wrapper around -- ix86_expand_vector_set. They would be redundant (for non-MMX) if we had -- a language-level syntax for referencing vector elements. */ -- --static rtx --ix86_expand_vec_set_builtin (tree exp) --{ -- machine_mode tmode, mode1; -- tree arg0, arg1, arg2; -- int elt; -- rtx op0, op1, target; -- -- arg0 = CALL_EXPR_ARG (exp, 0); -- arg1 = CALL_EXPR_ARG (exp, 1); -- arg2 = CALL_EXPR_ARG (exp, 2); -- -- tmode = TYPE_MODE (TREE_TYPE (arg0)); -- mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0))); -- gcc_assert (VECTOR_MODE_P (tmode)); -- -- op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL); -- op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL); -- elt = get_element_number (TREE_TYPE (arg0), arg2); -- -- if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode) -- op1 = convert_modes (mode1, GET_MODE (op1), op1, true); -- -- op0 = force_reg (tmode, op0); -- op1 = force_reg (mode1, op1); -- -- /* OP0 is the source of these builtin functions and shouldn't be -- modified. Create a copy, use it and return it as target. */ -- target = gen_reg_rtx (tmode); -- emit_move_insn (target, op0); -- ix86_expand_vector_set (true, target, op1, elt); -- -- return target; --} -- --/* Expand an expression EXP that calls a built-in function, -- with result going to TARGET if that's convenient -- (and in mode MODE if that's convenient). -- SUBTARGET may be used as the target for computing one of EXP's operands. -- IGNORE is nonzero if the value is to be ignored. */ -- --static rtx --ix86_expand_builtin (tree exp, rtx target, rtx subtarget, -- machine_mode mode, int ignore) --{ -- size_t i; -- enum insn_code icode, icode2; -- tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); -- tree arg0, arg1, arg2, arg3, arg4; -- rtx op0, op1, op2, op3, op4, pat, pat2, insn; -- machine_mode mode0, mode1, mode2, mode3, mode4; -- unsigned int fcode = DECL_FUNCTION_CODE (fndecl); -- -- /* For CPU builtins that can be folded, fold first and expand the fold. */ -- switch (fcode) -- { -- case IX86_BUILTIN_CPU_INIT: -- { -- /* Make it call __cpu_indicator_init in libgcc. */ -- tree call_expr, fndecl, type; -- type = build_function_type_list (integer_type_node, NULL_TREE); -- fndecl = build_fn_decl ("__cpu_indicator_init", type); -- call_expr = build_call_expr (fndecl, 0); -- return expand_expr (call_expr, target, mode, EXPAND_NORMAL); -- } -- case IX86_BUILTIN_CPU_IS: -- case IX86_BUILTIN_CPU_SUPPORTS: -- { -- tree arg0 = CALL_EXPR_ARG (exp, 0); -- tree fold_expr = fold_builtin_cpu (fndecl, &arg0); -- gcc_assert (fold_expr != NULL_TREE); -- return expand_expr (fold_expr, target, mode, EXPAND_NORMAL); -- } -- } -- -- HOST_WIDE_INT isa = ix86_isa_flags; -- HOST_WIDE_INT isa2 = ix86_isa_flags2; -- HOST_WIDE_INT bisa = ix86_builtins_isa[fcode].isa; -- HOST_WIDE_INT bisa2 = ix86_builtins_isa[fcode].isa2; -- /* The general case is we require all the ISAs specified in bisa{,2} -- to be enabled. -- The exceptions are: -- OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A -- OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 -- OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4 -- where for each this pair it is sufficient if either of the ISAs is -- enabled, plus if it is ored with other options also those others. */ -- if (((bisa & (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A)) -- == (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A)) -- && (isa & (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A)) != 0) -- isa |= (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A); -- if (((bisa & (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32)) -- == (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32)) -- && (isa & (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32)) != 0) -- isa |= (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32); -- if (((bisa & (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4)) -- == (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4)) -- && (isa & (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4)) != 0) -- isa |= (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4); -- if ((bisa & isa) != bisa || (bisa2 & isa2) != bisa2) -- { -- bool add_abi_p = bisa & OPTION_MASK_ISA_64BIT; -- if (TARGET_ABI_X32) -- bisa |= OPTION_MASK_ABI_X32; -- else -- bisa |= OPTION_MASK_ABI_64; -- char *opts = ix86_target_string (bisa, bisa2, 0, 0, NULL, NULL, -- (enum fpmath_unit) 0, false, add_abi_p); -- if (!opts) -- error ("%qE needs unknown isa option", fndecl); -- else -- { -- gcc_assert (opts != NULL); -- error ("%qE needs isa option %s", fndecl, opts); -- free (opts); -- } -- return expand_call (exp, target, ignore); -- } -- -- switch (fcode) -- { -- case IX86_BUILTIN_MASKMOVQ: -- case IX86_BUILTIN_MASKMOVDQU: -- icode = (fcode == IX86_BUILTIN_MASKMOVQ -- ? CODE_FOR_mmx_maskmovq -- : CODE_FOR_sse2_maskmovdqu); -- /* Note the arg order is different from the operand order. */ -- arg1 = CALL_EXPR_ARG (exp, 0); -- arg2 = CALL_EXPR_ARG (exp, 1); -- arg0 = CALL_EXPR_ARG (exp, 2); -- op0 = expand_normal (arg0); -- op1 = expand_normal (arg1); -- op2 = expand_normal (arg2); -- mode0 = insn_data[icode].operand[0].mode; -- mode1 = insn_data[icode].operand[1].mode; -- mode2 = insn_data[icode].operand[2].mode; -- -- op0 = ix86_zero_extend_to_Pmode (op0); -- op0 = gen_rtx_MEM (mode1, op0); -- -- if (!insn_data[icode].operand[0].predicate (op0, mode0)) -- op0 = copy_to_mode_reg (mode0, op0); -- if (!insn_data[icode].operand[1].predicate (op1, mode1)) -- op1 = copy_to_mode_reg (mode1, op1); -- if (!insn_data[icode].operand[2].predicate (op2, mode2)) -- op2 = copy_to_mode_reg (mode2, op2); -- pat = GEN_FCN (icode) (op0, op1, op2); -- if (! pat) -- return 0; -- emit_insn (pat); -- return 0; -- -- case IX86_BUILTIN_LDMXCSR: -- op0 = expand_normal (CALL_EXPR_ARG (exp, 0)); -- target = assign_386_stack_local (SImode, SLOT_TEMP); -- emit_move_insn (target, op0); -- emit_insn (gen_sse_ldmxcsr (target)); -- return 0; -- -- case IX86_BUILTIN_STMXCSR: -- target = assign_386_stack_local (SImode, SLOT_TEMP); -- emit_insn (gen_sse_stmxcsr (target)); -- return copy_to_mode_reg (SImode, target); -- -- case IX86_BUILTIN_CLFLUSH: -- arg0 = CALL_EXPR_ARG (exp, 0); -- op0 = expand_normal (arg0); -- icode = CODE_FOR_sse2_clflush; -- if (!insn_data[icode].operand[0].predicate (op0, Pmode)) -- op0 = ix86_zero_extend_to_Pmode (op0); -- -- emit_insn (gen_sse2_clflush (op0)); -- return 0; -- -- case IX86_BUILTIN_CLWB: -- arg0 = CALL_EXPR_ARG (exp, 0); -- op0 = expand_normal (arg0); -- icode = CODE_FOR_clwb; -- if (!insn_data[icode].operand[0].predicate (op0, Pmode)) -- op0 = ix86_zero_extend_to_Pmode (op0); -- -- emit_insn (gen_clwb (op0)); -- return 0; -- -- case IX86_BUILTIN_CLFLUSHOPT: -- arg0 = CALL_EXPR_ARG (exp, 0); -- op0 = expand_normal (arg0); -- icode = CODE_FOR_clflushopt; -- if (!insn_data[icode].operand[0].predicate (op0, Pmode)) -- op0 = ix86_zero_extend_to_Pmode (op0); -- -- emit_insn (gen_clflushopt (op0)); -- return 0; -- -- case IX86_BUILTIN_MONITOR: -- case IX86_BUILTIN_MONITORX: -- arg0 = CALL_EXPR_ARG (exp, 0); -- arg1 = CALL_EXPR_ARG (exp, 1); -- arg2 = CALL_EXPR_ARG (exp, 2); -- op0 = expand_normal (arg0); -- op1 = expand_normal (arg1); -- op2 = expand_normal (arg2); -- if (!REG_P (op0)) -- op0 = ix86_zero_extend_to_Pmode (op0); -- if (!REG_P (op1)) -- op1 = copy_to_mode_reg (SImode, op1); -- if (!REG_P (op2)) -- op2 = copy_to_mode_reg (SImode, op2); -- -- emit_insn (fcode == IX86_BUILTIN_MONITOR -- ? ix86_gen_monitor (op0, op1, op2) -- : ix86_gen_monitorx (op0, op1, op2)); -- return 0; -- -- case IX86_BUILTIN_MWAIT: -- arg0 = CALL_EXPR_ARG (exp, 0); -- arg1 = CALL_EXPR_ARG (exp, 1); -- op0 = expand_normal (arg0); -- op1 = expand_normal (arg1); -- if (!REG_P (op0)) -- op0 = copy_to_mode_reg (SImode, op0); -- if (!REG_P (op1)) -- op1 = copy_to_mode_reg (SImode, op1); -- emit_insn (gen_sse3_mwait (op0, op1)); -- return 0; -- -- case IX86_BUILTIN_MWAITX: -- arg0 = CALL_EXPR_ARG (exp, 0); -- arg1 = CALL_EXPR_ARG (exp, 1); -- arg2 = CALL_EXPR_ARG (exp, 2); -- op0 = expand_normal (arg0); -- op1 = expand_normal (arg1); -- op2 = expand_normal (arg2); -- if (!REG_P (op0)) -- op0 = copy_to_mode_reg (SImode, op0); -- if (!REG_P (op1)) -- op1 = copy_to_mode_reg (SImode, op1); -- if (!REG_P (op2)) -- op2 = copy_to_mode_reg (SImode, op2); -- emit_insn (gen_mwaitx (op0, op1, op2)); -- return 0; -- -- case IX86_BUILTIN_UMONITOR: -- arg0 = CALL_EXPR_ARG (exp, 0); -- op0 = expand_normal (arg0); -- -- op0 = ix86_zero_extend_to_Pmode (op0); -- -- insn = (TARGET_64BIT -- ? gen_umonitor_di (op0) -- : gen_umonitor_si (op0)); -- -- emit_insn (insn); -- return 0; -- -- case IX86_BUILTIN_UMWAIT: -- case IX86_BUILTIN_TPAUSE: -- arg0 = CALL_EXPR_ARG (exp, 0); -- arg1 = CALL_EXPR_ARG (exp, 1); -- op0 = expand_normal (arg0); -- op1 = expand_normal (arg1); -- -- if (!REG_P (op0)) -- op0 = copy_to_mode_reg (SImode, op0); -- -- op1 = force_reg (DImode, op1); -- -- if (TARGET_64BIT) -- { -- op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32), -- NULL, 1, OPTAB_DIRECT); -- switch (fcode) -- { -- case IX86_BUILTIN_UMWAIT: -- icode = CODE_FOR_umwait_rex64; -- break; -- case IX86_BUILTIN_TPAUSE: -- icode = CODE_FOR_tpause_rex64; -- break; -- default: -- gcc_unreachable (); -- } -- -- op2 = gen_lowpart (SImode, op2); -- op1 = gen_lowpart (SImode, op1); -- pat = GEN_FCN (icode) (op0, op1, op2); -- } -- else -- { -- switch (fcode) -- { -- case IX86_BUILTIN_UMWAIT: -- icode = CODE_FOR_umwait; -- break; -- case IX86_BUILTIN_TPAUSE: -- icode = CODE_FOR_tpause; -- break; -- default: -- gcc_unreachable (); -- } -- pat = GEN_FCN (icode) (op0, op1); -- } -- -- if (!pat) -- return 0; -- -- emit_insn (pat); -- -- if (target == 0 -- || !register_operand (target, QImode)) -- target = gen_reg_rtx (QImode); -- -- pat = gen_rtx_EQ (QImode, gen_rtx_REG (CCCmode, FLAGS_REG), -- const0_rtx); -- emit_insn (gen_rtx_SET (target, pat)); -- -- return target; -- -- case IX86_BUILTIN_CLZERO: -- arg0 = CALL_EXPR_ARG (exp, 0); -- op0 = expand_normal (arg0); -- if (!REG_P (op0)) -- op0 = ix86_zero_extend_to_Pmode (op0); -- emit_insn (ix86_gen_clzero (op0)); -- return 0; -- -- case IX86_BUILTIN_CLDEMOTE: -- arg0 = CALL_EXPR_ARG (exp, 0); -- op0 = expand_normal (arg0); -- icode = CODE_FOR_cldemote; -- if (!insn_data[icode].operand[0].predicate (op0, Pmode)) -- op0 = ix86_zero_extend_to_Pmode (op0); -- -- emit_insn (gen_cldemote (op0)); -- return 0; -- -- case IX86_BUILTIN_VEC_INIT_V2SI: -- case IX86_BUILTIN_VEC_INIT_V4HI: -- case IX86_BUILTIN_VEC_INIT_V8QI: -- return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target); -- -- case IX86_BUILTIN_VEC_EXT_V2DF: -- case IX86_BUILTIN_VEC_EXT_V2DI: -- case IX86_BUILTIN_VEC_EXT_V4SF: -- case IX86_BUILTIN_VEC_EXT_V4SI: -- case IX86_BUILTIN_VEC_EXT_V8HI: -- case IX86_BUILTIN_VEC_EXT_V2SI: -- case IX86_BUILTIN_VEC_EXT_V4HI: -- case IX86_BUILTIN_VEC_EXT_V16QI: -- return ix86_expand_vec_ext_builtin (exp, target); -- -- case IX86_BUILTIN_VEC_SET_V2DI: -- case IX86_BUILTIN_VEC_SET_V4SF: -- case IX86_BUILTIN_VEC_SET_V4SI: -- case IX86_BUILTIN_VEC_SET_V8HI: -- case IX86_BUILTIN_VEC_SET_V4HI: -- case IX86_BUILTIN_VEC_SET_V16QI: -- return ix86_expand_vec_set_builtin (exp); -- -- case IX86_BUILTIN_NANQ: -- case IX86_BUILTIN_NANSQ: -- return expand_call (exp, target, ignore); -- -- case IX86_BUILTIN_RDPID: -- -- op0 = gen_reg_rtx (word_mode); -- -- if (TARGET_64BIT) -- { -- insn = gen_rdpid_rex64 (op0); -- op0 = convert_to_mode (SImode, op0, 1); -- } -- else -- insn = gen_rdpid (op0); -- -- emit_insn (insn); -- -- if (target == 0 -- || !register_operand (target, SImode)) -- target = gen_reg_rtx (SImode); -- -- emit_move_insn (target, op0); -- return target; -- -- case IX86_BUILTIN_RDPMC: -- case IX86_BUILTIN_RDTSC: -- case IX86_BUILTIN_RDTSCP: -- case IX86_BUILTIN_XGETBV: -- -- op0 = gen_reg_rtx (DImode); -- op1 = gen_reg_rtx (DImode); -- -- if (fcode == IX86_BUILTIN_RDPMC) -- { -- arg0 = CALL_EXPR_ARG (exp, 0); -- op2 = expand_normal (arg0); -- if (!register_operand (op2, SImode)) -- op2 = copy_to_mode_reg (SImode, op2); -- -- insn = (TARGET_64BIT -- ? gen_rdpmc_rex64 (op0, op1, op2) -- : gen_rdpmc (op0, op2)); -- emit_insn (insn); -- } -- else if (fcode == IX86_BUILTIN_XGETBV) -- { -- arg0 = CALL_EXPR_ARG (exp, 0); -- op2 = expand_normal (arg0); -- if (!register_operand (op2, SImode)) -- op2 = copy_to_mode_reg (SImode, op2); -- -- insn = (TARGET_64BIT -- ? gen_xgetbv_rex64 (op0, op1, op2) -- : gen_xgetbv (op0, op2)); -- emit_insn (insn); -- } -- else if (fcode == IX86_BUILTIN_RDTSC) -- { -- insn = (TARGET_64BIT -- ? gen_rdtsc_rex64 (op0, op1) -- : gen_rdtsc (op0)); -- emit_insn (insn); -- } -- else -- { -- op2 = gen_reg_rtx (SImode); -- -- insn = (TARGET_64BIT -- ? gen_rdtscp_rex64 (op0, op1, op2) -- : gen_rdtscp (op0, op2)); -- emit_insn (insn); -- -- arg0 = CALL_EXPR_ARG (exp, 0); -- op4 = expand_normal (arg0); -- if (!address_operand (op4, VOIDmode)) -- { -- op4 = convert_memory_address (Pmode, op4); -- op4 = copy_addr_to_reg (op4); -- } -- emit_move_insn (gen_rtx_MEM (SImode, op4), op2); -- } -- -- if (target == 0 -- || !register_operand (target, DImode)) -- target = gen_reg_rtx (DImode); -- -- if (TARGET_64BIT) -- { -- op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32), -- op1, 1, OPTAB_DIRECT); -- op0 = expand_simple_binop (DImode, IOR, op0, op1, -- op0, 1, OPTAB_DIRECT); -- } -- -- emit_move_insn (target, op0); -- return target; -- -- case IX86_BUILTIN_MOVDIR64B: -- -- arg0 = CALL_EXPR_ARG (exp, 0); -- arg1 = CALL_EXPR_ARG (exp, 1); -- op0 = expand_normal (arg0); -- op1 = expand_normal (arg1); -- -- op0 = ix86_zero_extend_to_Pmode (op0); -- if (!address_operand (op1, VOIDmode)) -- { -- op1 = convert_memory_address (Pmode, op1); -- op1 = copy_addr_to_reg (op1); -- } -- op1 = gen_rtx_MEM (XImode, op1); -- -- insn = (TARGET_64BIT -- ? gen_movdir64b_di (op0, op1) -- : gen_movdir64b_si (op0, op1)); -- emit_insn (insn); -- return 0; -- -- case IX86_BUILTIN_FXSAVE: -- case IX86_BUILTIN_FXRSTOR: -- case IX86_BUILTIN_FXSAVE64: -- case IX86_BUILTIN_FXRSTOR64: -- case IX86_BUILTIN_FNSTENV: -- case IX86_BUILTIN_FLDENV: -- mode0 = BLKmode; -- switch (fcode) -- { -- case IX86_BUILTIN_FXSAVE: -- icode = CODE_FOR_fxsave; -- break; -- case IX86_BUILTIN_FXRSTOR: -- icode = CODE_FOR_fxrstor; -- break; -- case IX86_BUILTIN_FXSAVE64: -- icode = CODE_FOR_fxsave64; -- break; -- case IX86_BUILTIN_FXRSTOR64: -- icode = CODE_FOR_fxrstor64; -- break; -- case IX86_BUILTIN_FNSTENV: -- icode = CODE_FOR_fnstenv; -- break; -- case IX86_BUILTIN_FLDENV: -- icode = CODE_FOR_fldenv; -- break; -- default: -- gcc_unreachable (); -- } -- -- arg0 = CALL_EXPR_ARG (exp, 0); -- op0 = expand_normal (arg0); -- -- if (!address_operand (op0, VOIDmode)) -- { -- op0 = convert_memory_address (Pmode, op0); -- op0 = copy_addr_to_reg (op0); -- } -- op0 = gen_rtx_MEM (mode0, op0); -- -- pat = GEN_FCN (icode) (op0); -- if (pat) -- emit_insn (pat); -- return 0; -- -- case IX86_BUILTIN_XSETBV: -- arg0 = CALL_EXPR_ARG (exp, 0); -- arg1 = CALL_EXPR_ARG (exp, 1); -- op0 = expand_normal (arg0); -- op1 = expand_normal (arg1); -- -- if (!REG_P (op0)) -- op0 = copy_to_mode_reg (SImode, op0); -- -- op1 = force_reg (DImode, op1); -- -- if (TARGET_64BIT) -- { -- op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32), -- NULL, 1, OPTAB_DIRECT); -- -- icode = CODE_FOR_xsetbv_rex64; -- -- op2 = gen_lowpart (SImode, op2); -- op1 = gen_lowpart (SImode, op1); -- pat = GEN_FCN (icode) (op0, op1, op2); -- } -- else -- { -- icode = CODE_FOR_xsetbv; -- -- pat = GEN_FCN (icode) (op0, op1); -- } -- if (pat) -- emit_insn (pat); -- return 0; -- -- case IX86_BUILTIN_XSAVE: -- case IX86_BUILTIN_XRSTOR: -- case IX86_BUILTIN_XSAVE64: -- case IX86_BUILTIN_XRSTOR64: -- case IX86_BUILTIN_XSAVEOPT: -- case IX86_BUILTIN_XSAVEOPT64: -- case IX86_BUILTIN_XSAVES: -- case IX86_BUILTIN_XRSTORS: -- case IX86_BUILTIN_XSAVES64: -- case IX86_BUILTIN_XRSTORS64: -- case IX86_BUILTIN_XSAVEC: -- case IX86_BUILTIN_XSAVEC64: -- arg0 = CALL_EXPR_ARG (exp, 0); -- arg1 = CALL_EXPR_ARG (exp, 1); -- op0 = expand_normal (arg0); -- op1 = expand_normal (arg1); -- -- if (!address_operand (op0, VOIDmode)) -- { -- op0 = convert_memory_address (Pmode, op0); -- op0 = copy_addr_to_reg (op0); -- } -- op0 = gen_rtx_MEM (BLKmode, op0); -- -- op1 = force_reg (DImode, op1); -- -- if (TARGET_64BIT) -- { -- op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32), -- NULL, 1, OPTAB_DIRECT); -- switch (fcode) -- { -- case IX86_BUILTIN_XSAVE: -- icode = CODE_FOR_xsave_rex64; -- break; -- case IX86_BUILTIN_XRSTOR: -- icode = CODE_FOR_xrstor_rex64; -- break; -- case IX86_BUILTIN_XSAVE64: -- icode = CODE_FOR_xsave64; -- break; -- case IX86_BUILTIN_XRSTOR64: -- icode = CODE_FOR_xrstor64; -- break; -- case IX86_BUILTIN_XSAVEOPT: -- icode = CODE_FOR_xsaveopt_rex64; -- break; -- case IX86_BUILTIN_XSAVEOPT64: -- icode = CODE_FOR_xsaveopt64; -- break; -- case IX86_BUILTIN_XSAVES: -- icode = CODE_FOR_xsaves_rex64; -- break; -- case IX86_BUILTIN_XRSTORS: -- icode = CODE_FOR_xrstors_rex64; -- break; -- case IX86_BUILTIN_XSAVES64: -- icode = CODE_FOR_xsaves64; -- break; -- case IX86_BUILTIN_XRSTORS64: -- icode = CODE_FOR_xrstors64; -- break; -- case IX86_BUILTIN_XSAVEC: -- icode = CODE_FOR_xsavec_rex64; -- break; -- case IX86_BUILTIN_XSAVEC64: -- icode = CODE_FOR_xsavec64; -- break; -- default: -- gcc_unreachable (); -- } -- -- op2 = gen_lowpart (SImode, op2); -- op1 = gen_lowpart (SImode, op1); -- pat = GEN_FCN (icode) (op0, op1, op2); -- } -- else -- { -- switch (fcode) -- { -- case IX86_BUILTIN_XSAVE: -- icode = CODE_FOR_xsave; -- break; -- case IX86_BUILTIN_XRSTOR: -- icode = CODE_FOR_xrstor; -- break; -- case IX86_BUILTIN_XSAVEOPT: -- icode = CODE_FOR_xsaveopt; -- break; -- case IX86_BUILTIN_XSAVES: -- icode = CODE_FOR_xsaves; -- break; -- case IX86_BUILTIN_XRSTORS: -- icode = CODE_FOR_xrstors; -- break; -- case IX86_BUILTIN_XSAVEC: -- icode = CODE_FOR_xsavec; -- break; -- default: -- gcc_unreachable (); -- } -- pat = GEN_FCN (icode) (op0, op1); -- } -- -- if (pat) -- emit_insn (pat); -- return 0; -- -- case IX86_BUILTIN_LLWPCB: -- arg0 = CALL_EXPR_ARG (exp, 0); -- op0 = expand_normal (arg0); -- icode = CODE_FOR_lwp_llwpcb; -- if (!insn_data[icode].operand[0].predicate (op0, Pmode)) -- op0 = ix86_zero_extend_to_Pmode (op0); -- emit_insn (gen_lwp_llwpcb (op0)); -- return 0; -- -- case IX86_BUILTIN_SLWPCB: -- icode = CODE_FOR_lwp_slwpcb; -- if (!target -- || !insn_data[icode].operand[0].predicate (target, Pmode)) -- target = gen_reg_rtx (Pmode); -- emit_insn (gen_lwp_slwpcb (target)); -- return target; -- -- case IX86_BUILTIN_BEXTRI32: -- case IX86_BUILTIN_BEXTRI64: -- arg0 = CALL_EXPR_ARG (exp, 0); -- arg1 = CALL_EXPR_ARG (exp, 1); -- op0 = expand_normal (arg0); -- op1 = expand_normal (arg1); -- icode = (fcode == IX86_BUILTIN_BEXTRI32 -- ? CODE_FOR_tbm_bextri_si -- : CODE_FOR_tbm_bextri_di); -- if (!CONST_INT_P (op1)) -- { -- error ("last argument must be an immediate"); -- return const0_rtx; -- } -- else -- { -- unsigned char length = (INTVAL (op1) >> 8) & 0xFF; -- unsigned char lsb_index = INTVAL (op1) & 0xFF; -- op1 = GEN_INT (length); -- op2 = GEN_INT (lsb_index); -- -- mode1 = insn_data[icode].operand[1].mode; -- if (!insn_data[icode].operand[1].predicate (op0, mode1)) -- op0 = copy_to_mode_reg (mode1, op0); -- -- mode0 = insn_data[icode].operand[0].mode; -- if (target == 0 -- || !register_operand (target, mode0)) -- target = gen_reg_rtx (mode0); -- -- pat = GEN_FCN (icode) (target, op0, op1, op2); -- if (pat) -- emit_insn (pat); -- return target; -- } -- -- case IX86_BUILTIN_RDRAND16_STEP: -- icode = CODE_FOR_rdrandhi_1; -- mode0 = HImode; -- goto rdrand_step; -- -- case IX86_BUILTIN_RDRAND32_STEP: -- icode = CODE_FOR_rdrandsi_1; -- mode0 = SImode; -- goto rdrand_step; -- -- case IX86_BUILTIN_RDRAND64_STEP: -- icode = CODE_FOR_rdranddi_1; -- mode0 = DImode; -- --rdrand_step: -- arg0 = CALL_EXPR_ARG (exp, 0); -- op1 = expand_normal (arg0); -- if (!address_operand (op1, VOIDmode)) -- { -- op1 = convert_memory_address (Pmode, op1); -- op1 = copy_addr_to_reg (op1); -- } -- -- op0 = gen_reg_rtx (mode0); -- emit_insn (GEN_FCN (icode) (op0)); -- -- emit_move_insn (gen_rtx_MEM (mode0, op1), op0); -- -- op1 = gen_reg_rtx (SImode); -- emit_move_insn (op1, CONST1_RTX (SImode)); -- -- /* Emit SImode conditional move. */ -- if (mode0 == HImode) -- { -- if (TARGET_ZERO_EXTEND_WITH_AND -- && optimize_function_for_speed_p (cfun)) -- { -- op2 = force_reg (SImode, const0_rtx); -- -- emit_insn (gen_movstricthi -- (gen_lowpart (HImode, op2), op0)); -- } -- else -- { -- op2 = gen_reg_rtx (SImode); -- -- emit_insn (gen_zero_extendhisi2 (op2, op0)); -- } -- } -- else if (mode0 == SImode) -- op2 = op0; -- else -- op2 = gen_rtx_SUBREG (SImode, op0, 0); -- -- if (target == 0 -- || !register_operand (target, SImode)) -- target = gen_reg_rtx (SImode); -- -- pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG), -- const0_rtx); -- emit_insn (gen_rtx_SET (target, -- gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1))); -- return target; -- -- case IX86_BUILTIN_RDSEED16_STEP: -- icode = CODE_FOR_rdseedhi_1; -- mode0 = HImode; -- goto rdseed_step; -- -- case IX86_BUILTIN_RDSEED32_STEP: -- icode = CODE_FOR_rdseedsi_1; -- mode0 = SImode; -- goto rdseed_step; -- -- case IX86_BUILTIN_RDSEED64_STEP: -- icode = CODE_FOR_rdseeddi_1; -- mode0 = DImode; -- --rdseed_step: -- arg0 = CALL_EXPR_ARG (exp, 0); -- op1 = expand_normal (arg0); -- if (!address_operand (op1, VOIDmode)) -- { -- op1 = convert_memory_address (Pmode, op1); -- op1 = copy_addr_to_reg (op1); -- } -- -- op0 = gen_reg_rtx (mode0); -- emit_insn (GEN_FCN (icode) (op0)); -- -- emit_move_insn (gen_rtx_MEM (mode0, op1), op0); -- -- op2 = gen_reg_rtx (QImode); -- -- pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG), -- const0_rtx); -- emit_insn (gen_rtx_SET (op2, pat)); -- -- if (target == 0 -- || !register_operand (target, SImode)) -- target = gen_reg_rtx (SImode); -- -- emit_insn (gen_zero_extendqisi2 (target, op2)); -- return target; -- -- case IX86_BUILTIN_SBB32: -- icode = CODE_FOR_subborrowsi; -- icode2 = CODE_FOR_subborrowsi_0; -- mode0 = SImode; -- mode1 = DImode; -- mode2 = CCmode; -- goto handlecarry; -- -- case IX86_BUILTIN_SBB64: -- icode = CODE_FOR_subborrowdi; -- icode2 = CODE_FOR_subborrowdi_0; -- mode0 = DImode; -- mode1 = TImode; -- mode2 = CCmode; -- goto handlecarry; -- -- case IX86_BUILTIN_ADDCARRYX32: -- icode = CODE_FOR_addcarrysi; -- icode2 = CODE_FOR_addcarrysi_0; -- mode0 = SImode; -- mode1 = DImode; -- mode2 = CCCmode; -- goto handlecarry; -- -- case IX86_BUILTIN_ADDCARRYX64: -- icode = CODE_FOR_addcarrydi; -- icode2 = CODE_FOR_addcarrydi_0; -- mode0 = DImode; -- mode1 = TImode; -- mode2 = CCCmode; -- -- handlecarry: -- arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */ -- arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */ -- arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */ -- arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */ -- -- op1 = expand_normal (arg0); -- if (!integer_zerop (arg0)) -- op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1)); -- -- op2 = expand_normal (arg1); -- if (!register_operand (op2, mode0)) -- op2 = copy_to_mode_reg (mode0, op2); -- -- op3 = expand_normal (arg2); -- if (!register_operand (op3, mode0)) -- op3 = copy_to_mode_reg (mode0, op3); -- -- op4 = expand_normal (arg3); -- if (!address_operand (op4, VOIDmode)) -- { -- op4 = convert_memory_address (Pmode, op4); -- op4 = copy_addr_to_reg (op4); -- } -- -- op0 = gen_reg_rtx (mode0); -- if (integer_zerop (arg0)) -- { -- /* If arg0 is 0, optimize right away into add or sub -- instruction that sets CCCmode flags. */ -- op1 = gen_rtx_REG (mode2, FLAGS_REG); -- emit_insn (GEN_FCN (icode2) (op0, op2, op3)); -- } -- else -- { -- /* Generate CF from input operand. */ -- emit_insn (gen_addqi3_cconly_overflow (op1, constm1_rtx)); -- -- /* Generate instruction that consumes CF. */ -- op1 = gen_rtx_REG (CCCmode, FLAGS_REG); -- pat = gen_rtx_LTU (mode1, op1, const0_rtx); -- pat2 = gen_rtx_LTU (mode0, op1, const0_rtx); -- emit_insn (GEN_FCN (icode) (op0, op2, op3, op1, pat, pat2)); -- } -- -- /* Return current CF value. */ -- if (target == 0) -- target = gen_reg_rtx (QImode); -- -- pat = gen_rtx_LTU (QImode, op1, const0_rtx); -- emit_insn (gen_rtx_SET (target, pat)); -- -- /* Store the result. */ -- emit_move_insn (gen_rtx_MEM (mode0, op4), op0); -- -- return target; -- -- case IX86_BUILTIN_READ_FLAGS: -- emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG))); -- -- if (optimize -- || target == NULL_RTX -- || !nonimmediate_operand (target, word_mode) -- || GET_MODE (target) != word_mode) -- target = gen_reg_rtx (word_mode); -- -- emit_insn (gen_pop (target)); -- return target; -- -- case IX86_BUILTIN_WRITE_FLAGS: -- -- arg0 = CALL_EXPR_ARG (exp, 0); -- op0 = expand_normal (arg0); -- if (!general_no_elim_operand (op0, word_mode)) -- op0 = copy_to_mode_reg (word_mode, op0); -- -- emit_insn (gen_push (op0)); -- emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG))); -- return 0; -- -- case IX86_BUILTIN_KTESTC8: -- icode = CODE_FOR_ktestqi; -- mode3 = CCCmode; -- goto kortest; -- -- case IX86_BUILTIN_KTESTZ8: -- icode = CODE_FOR_ktestqi; -- mode3 = CCZmode; -- goto kortest; -- -- case IX86_BUILTIN_KTESTC16: -- icode = CODE_FOR_ktesthi; -- mode3 = CCCmode; -- goto kortest; -- -- case IX86_BUILTIN_KTESTZ16: -- icode = CODE_FOR_ktesthi; -- mode3 = CCZmode; -- goto kortest; -- -- case IX86_BUILTIN_KTESTC32: -- icode = CODE_FOR_ktestsi; -- mode3 = CCCmode; -- goto kortest; -- -- case IX86_BUILTIN_KTESTZ32: -- icode = CODE_FOR_ktestsi; -- mode3 = CCZmode; -- goto kortest; -- -- case IX86_BUILTIN_KTESTC64: -- icode = CODE_FOR_ktestdi; -- mode3 = CCCmode; -- goto kortest; -- -- case IX86_BUILTIN_KTESTZ64: -- icode = CODE_FOR_ktestdi; -- mode3 = CCZmode; -- goto kortest; -- -- case IX86_BUILTIN_KORTESTC8: -- icode = CODE_FOR_kortestqi; -- mode3 = CCCmode; -- goto kortest; -- -- case IX86_BUILTIN_KORTESTZ8: -- icode = CODE_FOR_kortestqi; -- mode3 = CCZmode; -- goto kortest; -- -- case IX86_BUILTIN_KORTESTC16: -- icode = CODE_FOR_kortesthi; -- mode3 = CCCmode; -- goto kortest; -- -- case IX86_BUILTIN_KORTESTZ16: -- icode = CODE_FOR_kortesthi; -- mode3 = CCZmode; -- goto kortest; -- -- case IX86_BUILTIN_KORTESTC32: -- icode = CODE_FOR_kortestsi; -- mode3 = CCCmode; -- goto kortest; -- -- case IX86_BUILTIN_KORTESTZ32: -- icode = CODE_FOR_kortestsi; -- mode3 = CCZmode; -- goto kortest; -- -- case IX86_BUILTIN_KORTESTC64: -- icode = CODE_FOR_kortestdi; -- mode3 = CCCmode; -- goto kortest; -- -- case IX86_BUILTIN_KORTESTZ64: -- icode = CODE_FOR_kortestdi; -- mode3 = CCZmode; -- -- kortest: -- arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */ -- arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */ -- op0 = expand_normal (arg0); -- op1 = expand_normal (arg1); -- -- mode0 = insn_data[icode].operand[0].mode; -- mode1 = insn_data[icode].operand[1].mode; -- -- if (GET_MODE (op0) != VOIDmode) -- op0 = force_reg (GET_MODE (op0), op0); -- -- op0 = gen_lowpart (mode0, op0); -- -- if (!insn_data[icode].operand[0].predicate (op0, mode0)) -- op0 = copy_to_mode_reg (mode0, op0); -- -- if (GET_MODE (op1) != VOIDmode) -- op1 = force_reg (GET_MODE (op1), op1); -- -- op1 = gen_lowpart (mode1, op1); -- -- if (!insn_data[icode].operand[1].predicate (op1, mode1)) -- op1 = copy_to_mode_reg (mode1, op1); -- -- target = gen_reg_rtx (QImode); -- -- /* Emit kortest. */ -- emit_insn (GEN_FCN (icode) (op0, op1)); -- /* And use setcc to return result from flags. */ -- ix86_expand_setcc (target, EQ, -- gen_rtx_REG (mode3, FLAGS_REG), const0_rtx); -- return target; -- -- case IX86_BUILTIN_GATHERSIV2DF: -- icode = CODE_FOR_avx2_gathersiv2df; -- goto gather_gen; -- case IX86_BUILTIN_GATHERSIV4DF: -- icode = CODE_FOR_avx2_gathersiv4df; -- goto gather_gen; -- case IX86_BUILTIN_GATHERDIV2DF: -- icode = CODE_FOR_avx2_gatherdiv2df; -- goto gather_gen; -- case IX86_BUILTIN_GATHERDIV4DF: -- icode = CODE_FOR_avx2_gatherdiv4df; -- goto gather_gen; -- case IX86_BUILTIN_GATHERSIV4SF: -- icode = CODE_FOR_avx2_gathersiv4sf; -- goto gather_gen; -- case IX86_BUILTIN_GATHERSIV8SF: -- icode = CODE_FOR_avx2_gathersiv8sf; -- goto gather_gen; -- case IX86_BUILTIN_GATHERDIV4SF: -- icode = CODE_FOR_avx2_gatherdiv4sf; -- goto gather_gen; -- case IX86_BUILTIN_GATHERDIV8SF: -- icode = CODE_FOR_avx2_gatherdiv8sf; -- goto gather_gen; -- case IX86_BUILTIN_GATHERSIV2DI: -- icode = CODE_FOR_avx2_gathersiv2di; -- goto gather_gen; -- case IX86_BUILTIN_GATHERSIV4DI: -- icode = CODE_FOR_avx2_gathersiv4di; -- goto gather_gen; -- case IX86_BUILTIN_GATHERDIV2DI: -- icode = CODE_FOR_avx2_gatherdiv2di; -- goto gather_gen; -- case IX86_BUILTIN_GATHERDIV4DI: -- icode = CODE_FOR_avx2_gatherdiv4di; -- goto gather_gen; -- case IX86_BUILTIN_GATHERSIV4SI: -- icode = CODE_FOR_avx2_gathersiv4si; -- goto gather_gen; -- case IX86_BUILTIN_GATHERSIV8SI: -- icode = CODE_FOR_avx2_gathersiv8si; -- goto gather_gen; -- case IX86_BUILTIN_GATHERDIV4SI: -- icode = CODE_FOR_avx2_gatherdiv4si; -- goto gather_gen; -- case IX86_BUILTIN_GATHERDIV8SI: -- icode = CODE_FOR_avx2_gatherdiv8si; -- goto gather_gen; -- case IX86_BUILTIN_GATHERALTSIV4DF: -- icode = CODE_FOR_avx2_gathersiv4df; -- goto gather_gen; -- case IX86_BUILTIN_GATHERALTDIV8SF: -- icode = CODE_FOR_avx2_gatherdiv8sf; -- goto gather_gen; -- case IX86_BUILTIN_GATHERALTSIV4DI: -- icode = CODE_FOR_avx2_gathersiv4di; -- goto gather_gen; -- case IX86_BUILTIN_GATHERALTDIV8SI: -- icode = CODE_FOR_avx2_gatherdiv8si; -- goto gather_gen; -- case IX86_BUILTIN_GATHER3SIV16SF: -- icode = CODE_FOR_avx512f_gathersiv16sf; -- goto gather_gen; -- case IX86_BUILTIN_GATHER3SIV8DF: -- icode = CODE_FOR_avx512f_gathersiv8df; -- goto gather_gen; -- case IX86_BUILTIN_GATHER3DIV16SF: -- icode = CODE_FOR_avx512f_gatherdiv16sf; -- goto gather_gen; -- case IX86_BUILTIN_GATHER3DIV8DF: -- icode = CODE_FOR_avx512f_gatherdiv8df; -- goto gather_gen; -- case IX86_BUILTIN_GATHER3SIV16SI: -- icode = CODE_FOR_avx512f_gathersiv16si; -- goto gather_gen; -- case IX86_BUILTIN_GATHER3SIV8DI: -- icode = CODE_FOR_avx512f_gathersiv8di; -- goto gather_gen; -- case IX86_BUILTIN_GATHER3DIV16SI: -- icode = CODE_FOR_avx512f_gatherdiv16si; -- goto gather_gen; -- case IX86_BUILTIN_GATHER3DIV8DI: -- icode = CODE_FOR_avx512f_gatherdiv8di; -- goto gather_gen; -- case IX86_BUILTIN_GATHER3ALTSIV8DF: -- icode = CODE_FOR_avx512f_gathersiv8df; -- goto gather_gen; -- case IX86_BUILTIN_GATHER3ALTDIV16SF: -- icode = CODE_FOR_avx512f_gatherdiv16sf; -- goto gather_gen; -- case IX86_BUILTIN_GATHER3ALTSIV8DI: -- icode = CODE_FOR_avx512f_gathersiv8di; -- goto gather_gen; -- case IX86_BUILTIN_GATHER3ALTDIV16SI: -- icode = CODE_FOR_avx512f_gatherdiv16si; -- goto gather_gen; -- case IX86_BUILTIN_GATHER3SIV2DF: -- icode = CODE_FOR_avx512vl_gathersiv2df; -- goto gather_gen; -- case IX86_BUILTIN_GATHER3SIV4DF: -- icode = CODE_FOR_avx512vl_gathersiv4df; -- goto gather_gen; -- case IX86_BUILTIN_GATHER3DIV2DF: -- icode = CODE_FOR_avx512vl_gatherdiv2df; -- goto gather_gen; -- case IX86_BUILTIN_GATHER3DIV4DF: -- icode = CODE_FOR_avx512vl_gatherdiv4df; -- goto gather_gen; -- case IX86_BUILTIN_GATHER3SIV4SF: -- icode = CODE_FOR_avx512vl_gathersiv4sf; -- goto gather_gen; -- case IX86_BUILTIN_GATHER3SIV8SF: -- icode = CODE_FOR_avx512vl_gathersiv8sf; -- goto gather_gen; -- case IX86_BUILTIN_GATHER3DIV4SF: -- icode = CODE_FOR_avx512vl_gatherdiv4sf; -- goto gather_gen; -- case IX86_BUILTIN_GATHER3DIV8SF: -- icode = CODE_FOR_avx512vl_gatherdiv8sf; -- goto gather_gen; -- case IX86_BUILTIN_GATHER3SIV2DI: -- icode = CODE_FOR_avx512vl_gathersiv2di; -- goto gather_gen; -- case IX86_BUILTIN_GATHER3SIV4DI: -- icode = CODE_FOR_avx512vl_gathersiv4di; -- goto gather_gen; -- case IX86_BUILTIN_GATHER3DIV2DI: -- icode = CODE_FOR_avx512vl_gatherdiv2di; -- goto gather_gen; -- case IX86_BUILTIN_GATHER3DIV4DI: -- icode = CODE_FOR_avx512vl_gatherdiv4di; -- goto gather_gen; -- case IX86_BUILTIN_GATHER3SIV4SI: -- icode = CODE_FOR_avx512vl_gathersiv4si; -- goto gather_gen; -- case IX86_BUILTIN_GATHER3SIV8SI: -- icode = CODE_FOR_avx512vl_gathersiv8si; -- goto gather_gen; -- case IX86_BUILTIN_GATHER3DIV4SI: -- icode = CODE_FOR_avx512vl_gatherdiv4si; -- goto gather_gen; -- case IX86_BUILTIN_GATHER3DIV8SI: -- icode = CODE_FOR_avx512vl_gatherdiv8si; -- goto gather_gen; -- case IX86_BUILTIN_GATHER3ALTSIV4DF: -- icode = CODE_FOR_avx512vl_gathersiv4df; -- goto gather_gen; -- case IX86_BUILTIN_GATHER3ALTDIV8SF: -- icode = CODE_FOR_avx512vl_gatherdiv8sf; -- goto gather_gen; -- case IX86_BUILTIN_GATHER3ALTSIV4DI: -- icode = CODE_FOR_avx512vl_gathersiv4di; -- goto gather_gen; -- case IX86_BUILTIN_GATHER3ALTDIV8SI: -- icode = CODE_FOR_avx512vl_gatherdiv8si; -- goto gather_gen; -- case IX86_BUILTIN_SCATTERSIV16SF: -- icode = CODE_FOR_avx512f_scattersiv16sf; -- goto scatter_gen; -- case IX86_BUILTIN_SCATTERSIV8DF: -- icode = CODE_FOR_avx512f_scattersiv8df; -- goto scatter_gen; -- case IX86_BUILTIN_SCATTERDIV16SF: -- icode = CODE_FOR_avx512f_scatterdiv16sf; -- goto scatter_gen; -- case IX86_BUILTIN_SCATTERDIV8DF: -- icode = CODE_FOR_avx512f_scatterdiv8df; -- goto scatter_gen; -- case IX86_BUILTIN_SCATTERSIV16SI: -- icode = CODE_FOR_avx512f_scattersiv16si; -- goto scatter_gen; -- case IX86_BUILTIN_SCATTERSIV8DI: -- icode = CODE_FOR_avx512f_scattersiv8di; -- goto scatter_gen; -- case IX86_BUILTIN_SCATTERDIV16SI: -- icode = CODE_FOR_avx512f_scatterdiv16si; -- goto scatter_gen; -- case IX86_BUILTIN_SCATTERDIV8DI: -- icode = CODE_FOR_avx512f_scatterdiv8di; -- goto scatter_gen; -- case IX86_BUILTIN_SCATTERSIV8SF: -- icode = CODE_FOR_avx512vl_scattersiv8sf; -- goto scatter_gen; -- case IX86_BUILTIN_SCATTERSIV4SF: -- icode = CODE_FOR_avx512vl_scattersiv4sf; -- goto scatter_gen; -- case IX86_BUILTIN_SCATTERSIV4DF: -- icode = CODE_FOR_avx512vl_scattersiv4df; -- goto scatter_gen; -- case IX86_BUILTIN_SCATTERSIV2DF: -- icode = CODE_FOR_avx512vl_scattersiv2df; -- goto scatter_gen; -- case IX86_BUILTIN_SCATTERDIV8SF: -- icode = CODE_FOR_avx512vl_scatterdiv8sf; -- goto scatter_gen; -- case IX86_BUILTIN_SCATTERDIV4SF: -- icode = CODE_FOR_avx512vl_scatterdiv4sf; -- goto scatter_gen; -- case IX86_BUILTIN_SCATTERDIV4DF: -- icode = CODE_FOR_avx512vl_scatterdiv4df; -- goto scatter_gen; -- case IX86_BUILTIN_SCATTERDIV2DF: -- icode = CODE_FOR_avx512vl_scatterdiv2df; -- goto scatter_gen; -- case IX86_BUILTIN_SCATTERSIV8SI: -- icode = CODE_FOR_avx512vl_scattersiv8si; -- goto scatter_gen; -- case IX86_BUILTIN_SCATTERSIV4SI: -- icode = CODE_FOR_avx512vl_scattersiv4si; -- goto scatter_gen; -- case IX86_BUILTIN_SCATTERSIV4DI: -- icode = CODE_FOR_avx512vl_scattersiv4di; -- goto scatter_gen; -- case IX86_BUILTIN_SCATTERSIV2DI: -- icode = CODE_FOR_avx512vl_scattersiv2di; -- goto scatter_gen; -- case IX86_BUILTIN_SCATTERDIV8SI: -- icode = CODE_FOR_avx512vl_scatterdiv8si; -- goto scatter_gen; -- case IX86_BUILTIN_SCATTERDIV4SI: -- icode = CODE_FOR_avx512vl_scatterdiv4si; -- goto scatter_gen; -- case IX86_BUILTIN_SCATTERDIV4DI: -- icode = CODE_FOR_avx512vl_scatterdiv4di; -- goto scatter_gen; -- case IX86_BUILTIN_SCATTERDIV2DI: -- icode = CODE_FOR_avx512vl_scatterdiv2di; -- goto scatter_gen; -- case IX86_BUILTIN_GATHERPFDPD: -- icode = CODE_FOR_avx512pf_gatherpfv8sidf; -- goto vec_prefetch_gen; -- case IX86_BUILTIN_SCATTERALTSIV8DF: -- icode = CODE_FOR_avx512f_scattersiv8df; -- goto scatter_gen; -- case IX86_BUILTIN_SCATTERALTDIV16SF: -- icode = CODE_FOR_avx512f_scatterdiv16sf; -- goto scatter_gen; -- case IX86_BUILTIN_SCATTERALTSIV8DI: -- icode = CODE_FOR_avx512f_scattersiv8di; -- goto scatter_gen; -- case IX86_BUILTIN_SCATTERALTDIV16SI: -- icode = CODE_FOR_avx512f_scatterdiv16si; -- goto scatter_gen; -- case IX86_BUILTIN_SCATTERALTSIV4DF: -- icode = CODE_FOR_avx512vl_scattersiv4df; -- goto scatter_gen; -- case IX86_BUILTIN_SCATTERALTDIV8SF: -- icode = CODE_FOR_avx512vl_scatterdiv8sf; -- goto scatter_gen; -- case IX86_BUILTIN_SCATTERALTSIV4DI: -- icode = CODE_FOR_avx512vl_scattersiv4di; -- goto scatter_gen; -- case IX86_BUILTIN_SCATTERALTDIV8SI: -- icode = CODE_FOR_avx512vl_scatterdiv8si; -- goto scatter_gen; -- case IX86_BUILTIN_SCATTERALTSIV2DF: -- icode = CODE_FOR_avx512vl_scattersiv2df; -- goto scatter_gen; -- case IX86_BUILTIN_SCATTERALTDIV4SF: -- icode = CODE_FOR_avx512vl_scatterdiv4sf; -- goto scatter_gen; -- case IX86_BUILTIN_SCATTERALTSIV2DI: -- icode = CODE_FOR_avx512vl_scattersiv2di; -- goto scatter_gen; -- case IX86_BUILTIN_SCATTERALTDIV4SI: -- icode = CODE_FOR_avx512vl_scatterdiv4si; -- goto scatter_gen; -- case IX86_BUILTIN_GATHERPFDPS: -- icode = CODE_FOR_avx512pf_gatherpfv16sisf; -- goto vec_prefetch_gen; -- case IX86_BUILTIN_GATHERPFQPD: -- icode = CODE_FOR_avx512pf_gatherpfv8didf; -- goto vec_prefetch_gen; -- case IX86_BUILTIN_GATHERPFQPS: -- icode = CODE_FOR_avx512pf_gatherpfv8disf; -- goto vec_prefetch_gen; -- case IX86_BUILTIN_SCATTERPFDPD: -- icode = CODE_FOR_avx512pf_scatterpfv8sidf; -- goto vec_prefetch_gen; -- case IX86_BUILTIN_SCATTERPFDPS: -- icode = CODE_FOR_avx512pf_scatterpfv16sisf; -- goto vec_prefetch_gen; -- case IX86_BUILTIN_SCATTERPFQPD: -- icode = CODE_FOR_avx512pf_scatterpfv8didf; -- goto vec_prefetch_gen; -- case IX86_BUILTIN_SCATTERPFQPS: -- icode = CODE_FOR_avx512pf_scatterpfv8disf; -- goto vec_prefetch_gen; -- -- gather_gen: -- rtx half; -- rtx (*gen) (rtx, rtx); -- -- arg0 = CALL_EXPR_ARG (exp, 0); -- arg1 = CALL_EXPR_ARG (exp, 1); -- arg2 = CALL_EXPR_ARG (exp, 2); -- arg3 = CALL_EXPR_ARG (exp, 3); -- arg4 = CALL_EXPR_ARG (exp, 4); -- op0 = expand_normal (arg0); -- op1 = expand_normal (arg1); -- op2 = expand_normal (arg2); -- op3 = expand_normal (arg3); -- op4 = expand_normal (arg4); -- /* Note the arg order is different from the operand order. */ -- mode0 = insn_data[icode].operand[1].mode; -- mode2 = insn_data[icode].operand[3].mode; -- mode3 = insn_data[icode].operand[4].mode; -- mode4 = insn_data[icode].operand[5].mode; -- -- if (target == NULL_RTX -- || GET_MODE (target) != insn_data[icode].operand[0].mode -- || !insn_data[icode].operand[0].predicate (target, -- GET_MODE (target))) -- subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode); -- else -- subtarget = target; -- -- switch (fcode) -- { -- case IX86_BUILTIN_GATHER3ALTSIV8DF: -- case IX86_BUILTIN_GATHER3ALTSIV8DI: -- half = gen_reg_rtx (V8SImode); -- if (!nonimmediate_operand (op2, V16SImode)) -- op2 = copy_to_mode_reg (V16SImode, op2); -- emit_insn (gen_vec_extract_lo_v16si (half, op2)); -- op2 = half; -- break; -- case IX86_BUILTIN_GATHER3ALTSIV4DF: -- case IX86_BUILTIN_GATHER3ALTSIV4DI: -- case IX86_BUILTIN_GATHERALTSIV4DF: -- case IX86_BUILTIN_GATHERALTSIV4DI: -- half = gen_reg_rtx (V4SImode); -- if (!nonimmediate_operand (op2, V8SImode)) -- op2 = copy_to_mode_reg (V8SImode, op2); -- emit_insn (gen_vec_extract_lo_v8si (half, op2)); -- op2 = half; -- break; -- case IX86_BUILTIN_GATHER3ALTDIV16SF: -- case IX86_BUILTIN_GATHER3ALTDIV16SI: -- half = gen_reg_rtx (mode0); -- if (mode0 == V8SFmode) -- gen = gen_vec_extract_lo_v16sf; -- else -- gen = gen_vec_extract_lo_v16si; -- if (!nonimmediate_operand (op0, GET_MODE (op0))) -- op0 = copy_to_mode_reg (GET_MODE (op0), op0); -- emit_insn (gen (half, op0)); -- op0 = half; -- op3 = lowpart_subreg (QImode, op3, HImode); -- break; -- case IX86_BUILTIN_GATHER3ALTDIV8SF: -- case IX86_BUILTIN_GATHER3ALTDIV8SI: -- case IX86_BUILTIN_GATHERALTDIV8SF: -- case IX86_BUILTIN_GATHERALTDIV8SI: -- half = gen_reg_rtx (mode0); -- if (mode0 == V4SFmode) -- gen = gen_vec_extract_lo_v8sf; -- else -- gen = gen_vec_extract_lo_v8si; -- if (!nonimmediate_operand (op0, GET_MODE (op0))) -- op0 = copy_to_mode_reg (GET_MODE (op0), op0); -- emit_insn (gen (half, op0)); -- op0 = half; -- if (VECTOR_MODE_P (GET_MODE (op3))) -- { -- half = gen_reg_rtx (mode0); -- if (!nonimmediate_operand (op3, GET_MODE (op3))) -- op3 = copy_to_mode_reg (GET_MODE (op3), op3); -- emit_insn (gen (half, op3)); -- op3 = half; -- } -- break; -- default: -- break; -- } -- -- /* Force memory operand only with base register here. But we -- don't want to do it on memory operand for other builtin -- functions. */ -- op1 = ix86_zero_extend_to_Pmode (op1); -- -- if (!insn_data[icode].operand[1].predicate (op0, mode0)) -- op0 = copy_to_mode_reg (mode0, op0); -- if (!insn_data[icode].operand[2].predicate (op1, Pmode)) -- op1 = copy_to_mode_reg (Pmode, op1); -- if (!insn_data[icode].operand[3].predicate (op2, mode2)) -- op2 = copy_to_mode_reg (mode2, op2); -- -- op3 = fixup_modeless_constant (op3, mode3); -- -- if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode) -- { -- if (!insn_data[icode].operand[4].predicate (op3, mode3)) -- op3 = copy_to_mode_reg (mode3, op3); -- } -- else -- { -- op3 = copy_to_reg (op3); -- op3 = lowpart_subreg (mode3, op3, GET_MODE (op3)); -- } -- if (!insn_data[icode].operand[5].predicate (op4, mode4)) -- { -- error ("the last argument must be scale 1, 2, 4, 8"); -- return const0_rtx; -- } -- -- /* Optimize. If mask is known to have all high bits set, -- replace op0 with pc_rtx to signal that the instruction -- overwrites the whole destination and doesn't use its -- previous contents. */ -- if (optimize) -- { -- if (TREE_CODE (arg3) == INTEGER_CST) -- { -- if (integer_all_onesp (arg3)) -- op0 = pc_rtx; -- } -- else if (TREE_CODE (arg3) == VECTOR_CST) -- { -- unsigned int negative = 0; -- for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i) -- { -- tree cst = VECTOR_CST_ELT (arg3, i); -- if (TREE_CODE (cst) == INTEGER_CST -- && tree_int_cst_sign_bit (cst)) -- negative++; -- else if (TREE_CODE (cst) == REAL_CST -- && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst))) -- negative++; -- } -- if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3))) -- op0 = pc_rtx; -- } -- else if (TREE_CODE (arg3) == SSA_NAME -- && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE) -- { -- /* Recognize also when mask is like: -- __v2df src = _mm_setzero_pd (); -- __v2df mask = _mm_cmpeq_pd (src, src); -- or -- __v8sf src = _mm256_setzero_ps (); -- __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ); -- as that is a cheaper way to load all ones into -- a register than having to load a constant from -- memory. */ -- gimple *def_stmt = SSA_NAME_DEF_STMT (arg3); -- if (is_gimple_call (def_stmt)) -- { -- tree fndecl = gimple_call_fndecl (def_stmt); -- if (fndecl -- && fndecl_built_in_p (fndecl, BUILT_IN_MD)) -- switch ((unsigned int) DECL_FUNCTION_CODE (fndecl)) -- { -- case IX86_BUILTIN_CMPPD: -- case IX86_BUILTIN_CMPPS: -- case IX86_BUILTIN_CMPPD256: -- case IX86_BUILTIN_CMPPS256: -- if (!integer_zerop (gimple_call_arg (def_stmt, 2))) -- break; -- /* FALLTHRU */ -- case IX86_BUILTIN_CMPEQPD: -- case IX86_BUILTIN_CMPEQPS: -- if (initializer_zerop (gimple_call_arg (def_stmt, 0)) -- && initializer_zerop (gimple_call_arg (def_stmt, -- 1))) -- op0 = pc_rtx; -- break; -- default: -- break; -- } -- } -- } -- } -- -- pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4); -- if (! pat) -- return const0_rtx; -- emit_insn (pat); -- -- switch (fcode) -- { -- case IX86_BUILTIN_GATHER3DIV16SF: -- if (target == NULL_RTX) -- target = gen_reg_rtx (V8SFmode); -- emit_insn (gen_vec_extract_lo_v16sf (target, subtarget)); -- break; -- case IX86_BUILTIN_GATHER3DIV16SI: -- if (target == NULL_RTX) -- target = gen_reg_rtx (V8SImode); -- emit_insn (gen_vec_extract_lo_v16si (target, subtarget)); -- break; -- case IX86_BUILTIN_GATHER3DIV8SF: -- case IX86_BUILTIN_GATHERDIV8SF: -- if (target == NULL_RTX) -- target = gen_reg_rtx (V4SFmode); -- emit_insn (gen_vec_extract_lo_v8sf (target, subtarget)); -- break; -- case IX86_BUILTIN_GATHER3DIV8SI: -- case IX86_BUILTIN_GATHERDIV8SI: -- if (target == NULL_RTX) -- target = gen_reg_rtx (V4SImode); -- emit_insn (gen_vec_extract_lo_v8si (target, subtarget)); -- break; -- default: -- target = subtarget; -- break; -- } -- return target; -- -- scatter_gen: -- arg0 = CALL_EXPR_ARG (exp, 0); -- arg1 = CALL_EXPR_ARG (exp, 1); -- arg2 = CALL_EXPR_ARG (exp, 2); -- arg3 = CALL_EXPR_ARG (exp, 3); -- arg4 = CALL_EXPR_ARG (exp, 4); -- op0 = expand_normal (arg0); -- op1 = expand_normal (arg1); -- op2 = expand_normal (arg2); -- op3 = expand_normal (arg3); -- op4 = expand_normal (arg4); -- mode1 = insn_data[icode].operand[1].mode; -- mode2 = insn_data[icode].operand[2].mode; -- mode3 = insn_data[icode].operand[3].mode; -- mode4 = insn_data[icode].operand[4].mode; -- -- /* Scatter instruction stores operand op3 to memory with -- indices from op2 and scale from op4 under writemask op1. -- If index operand op2 has more elements then source operand -- op3 one need to use only its low half. And vice versa. */ -- switch (fcode) -- { -- case IX86_BUILTIN_SCATTERALTSIV8DF: -- case IX86_BUILTIN_SCATTERALTSIV8DI: -- half = gen_reg_rtx (V8SImode); -- if (!nonimmediate_operand (op2, V16SImode)) -- op2 = copy_to_mode_reg (V16SImode, op2); -- emit_insn (gen_vec_extract_lo_v16si (half, op2)); -- op2 = half; -- break; -- case IX86_BUILTIN_SCATTERALTDIV16SF: -- case IX86_BUILTIN_SCATTERALTDIV16SI: -- half = gen_reg_rtx (mode3); -- if (mode3 == V8SFmode) -- gen = gen_vec_extract_lo_v16sf; -- else -- gen = gen_vec_extract_lo_v16si; -- if (!nonimmediate_operand (op3, GET_MODE (op3))) -- op3 = copy_to_mode_reg (GET_MODE (op3), op3); -- emit_insn (gen (half, op3)); -- op3 = half; -- break; -- case IX86_BUILTIN_SCATTERALTSIV4DF: -- case IX86_BUILTIN_SCATTERALTSIV4DI: -- half = gen_reg_rtx (V4SImode); -- if (!nonimmediate_operand (op2, V8SImode)) -- op2 = copy_to_mode_reg (V8SImode, op2); -- emit_insn (gen_vec_extract_lo_v8si (half, op2)); -- op2 = half; -- break; -- case IX86_BUILTIN_SCATTERALTDIV8SF: -- case IX86_BUILTIN_SCATTERALTDIV8SI: -- half = gen_reg_rtx (mode3); -- if (mode3 == V4SFmode) -- gen = gen_vec_extract_lo_v8sf; -- else -- gen = gen_vec_extract_lo_v8si; -- if (!nonimmediate_operand (op3, GET_MODE (op3))) -- op3 = copy_to_mode_reg (GET_MODE (op3), op3); -- emit_insn (gen (half, op3)); -- op3 = half; -- break; -- case IX86_BUILTIN_SCATTERALTSIV2DF: -- case IX86_BUILTIN_SCATTERALTSIV2DI: -- if (!nonimmediate_operand (op2, V4SImode)) -- op2 = copy_to_mode_reg (V4SImode, op2); -- break; -- case IX86_BUILTIN_SCATTERALTDIV4SF: -- case IX86_BUILTIN_SCATTERALTDIV4SI: -- if (!nonimmediate_operand (op3, GET_MODE (op3))) -- op3 = copy_to_mode_reg (GET_MODE (op3), op3); -- break; -- default: -- break; -- } -- -- /* Force memory operand only with base register here. But we -- don't want to do it on memory operand for other builtin -- functions. */ -- op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1)); -- -- if (!insn_data[icode].operand[0].predicate (op0, Pmode)) -- op0 = copy_to_mode_reg (Pmode, op0); -- -- op1 = fixup_modeless_constant (op1, mode1); -- -- if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode) -- { -- if (!insn_data[icode].operand[1].predicate (op1, mode1)) -- op1 = copy_to_mode_reg (mode1, op1); -- } -- else -- { -- op1 = copy_to_reg (op1); -- op1 = lowpart_subreg (mode1, op1, GET_MODE (op1)); -- } -- -- if (!insn_data[icode].operand[2].predicate (op2, mode2)) -- op2 = copy_to_mode_reg (mode2, op2); -- -- if (!insn_data[icode].operand[3].predicate (op3, mode3)) -- op3 = copy_to_mode_reg (mode3, op3); -- -- if (!insn_data[icode].operand[4].predicate (op4, mode4)) -- { -- error ("the last argument must be scale 1, 2, 4, 8"); -- return const0_rtx; -- } -- -- pat = GEN_FCN (icode) (op0, op1, op2, op3, op4); -- if (! pat) -- return const0_rtx; -- -- emit_insn (pat); -- return 0; -- -- vec_prefetch_gen: -- arg0 = CALL_EXPR_ARG (exp, 0); -- arg1 = CALL_EXPR_ARG (exp, 1); -- arg2 = CALL_EXPR_ARG (exp, 2); -- arg3 = CALL_EXPR_ARG (exp, 3); -- arg4 = CALL_EXPR_ARG (exp, 4); -- op0 = expand_normal (arg0); -- op1 = expand_normal (arg1); -- op2 = expand_normal (arg2); -- op3 = expand_normal (arg3); -- op4 = expand_normal (arg4); -- mode0 = insn_data[icode].operand[0].mode; -- mode1 = insn_data[icode].operand[1].mode; -- mode3 = insn_data[icode].operand[3].mode; -- mode4 = insn_data[icode].operand[4].mode; -- -- op0 = fixup_modeless_constant (op0, mode0); -- -- if (GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode) -- { -- if (!insn_data[icode].operand[0].predicate (op0, mode0)) -- op0 = copy_to_mode_reg (mode0, op0); -- } -- else -- { -- op0 = copy_to_reg (op0); -- op0 = lowpart_subreg (mode0, op0, GET_MODE (op0)); -- } -- -- if (!insn_data[icode].operand[1].predicate (op1, mode1)) -- op1 = copy_to_mode_reg (mode1, op1); -- -- /* Force memory operand only with base register here. But we -- don't want to do it on memory operand for other builtin -- functions. */ -- op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1)); -- -- if (!insn_data[icode].operand[2].predicate (op2, Pmode)) -- op2 = copy_to_mode_reg (Pmode, op2); -- -- if (!insn_data[icode].operand[3].predicate (op3, mode3)) -- { -- error ("the forth argument must be scale 1, 2, 4, 8"); -- return const0_rtx; -- } -- -- if (!insn_data[icode].operand[4].predicate (op4, mode4)) -- { -- error ("incorrect hint operand"); -- return const0_rtx; -- } -- -- pat = GEN_FCN (icode) (op0, op1, op2, op3, op4); -- if (! pat) -- return const0_rtx; -- -- emit_insn (pat); -- -- return 0; -- -- case IX86_BUILTIN_XABORT: -- icode = CODE_FOR_xabort; -- arg0 = CALL_EXPR_ARG (exp, 0); -- op0 = expand_normal (arg0); -- mode0 = insn_data[icode].operand[0].mode; -- if (!insn_data[icode].operand[0].predicate (op0, mode0)) -- { -- error ("the argument to % intrinsic must " -- "be an 8-bit immediate"); -- return const0_rtx; -- } -- emit_insn (gen_xabort (op0)); -- return 0; -- -- case IX86_BUILTIN_RSTORSSP: -- case IX86_BUILTIN_CLRSSBSY: -- arg0 = CALL_EXPR_ARG (exp, 0); -- op0 = expand_normal (arg0); -- icode = (fcode == IX86_BUILTIN_RSTORSSP -- ? CODE_FOR_rstorssp -- : CODE_FOR_clrssbsy); -- if (!address_operand (op0, VOIDmode)) -- { -- op1 = convert_memory_address (Pmode, op0); -- op0 = copy_addr_to_reg (op1); -- } -- emit_insn (GEN_FCN (icode) (gen_rtx_MEM (Pmode, op0))); -- return 0; -- -- case IX86_BUILTIN_WRSSD: -- case IX86_BUILTIN_WRSSQ: -- case IX86_BUILTIN_WRUSSD: -- case IX86_BUILTIN_WRUSSQ: -- arg0 = CALL_EXPR_ARG (exp, 0); -- op0 = expand_normal (arg0); -- arg1 = CALL_EXPR_ARG (exp, 1); -- op1 = expand_normal (arg1); -- switch (fcode) -- { -- case IX86_BUILTIN_WRSSD: -- icode = CODE_FOR_wrsssi; -- mode = SImode; -- break; -- case IX86_BUILTIN_WRSSQ: -- icode = CODE_FOR_wrssdi; -- mode = DImode; -- break; -- case IX86_BUILTIN_WRUSSD: -- icode = CODE_FOR_wrusssi; -- mode = SImode; -- break; -- case IX86_BUILTIN_WRUSSQ: -- icode = CODE_FOR_wrussdi; -- mode = DImode; -- break; -- } -- op0 = force_reg (mode, op0); -- if (!address_operand (op1, VOIDmode)) -- { -- op2 = convert_memory_address (Pmode, op1); -- op1 = copy_addr_to_reg (op2); -- } -- emit_insn (GEN_FCN (icode) (op0, gen_rtx_MEM (mode, op1))); -- return 0; -- -- default: -- break; -- } -- -- if (fcode >= IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST -- && fcode <= IX86_BUILTIN__BDESC_SPECIAL_ARGS_LAST) -- { -- i = fcode - IX86_BUILTIN__BDESC_SPECIAL_ARGS_FIRST; -- return ix86_expand_special_args_builtin (bdesc_special_args + i, exp, -- target); -- } -- -- if (fcode >= IX86_BUILTIN__BDESC_ARGS_FIRST -- && fcode <= IX86_BUILTIN__BDESC_ARGS_LAST) -- { -- i = fcode - IX86_BUILTIN__BDESC_ARGS_FIRST; -- rtx (*fcn) (rtx, rtx, rtx, rtx) = NULL; -- rtx (*fcn_mask) (rtx, rtx, rtx, rtx, rtx); -- rtx (*fcn_maskz) (rtx, rtx, rtx, rtx, rtx, rtx); -- int masked = 1; -- machine_mode mode, wide_mode, nar_mode; -- -- nar_mode = V4SFmode; -- mode = V16SFmode; -- wide_mode = V64SFmode; -- fcn_mask = gen_avx5124fmaddps_4fmaddps_mask; -- fcn_maskz = gen_avx5124fmaddps_4fmaddps_maskz; -- -- switch (fcode) -- { -- case IX86_BUILTIN_4FMAPS: -- fcn = gen_avx5124fmaddps_4fmaddps; -- masked = 0; -- goto v4fma_expand; -- -- case IX86_BUILTIN_4DPWSSD: -- nar_mode = V4SImode; -- mode = V16SImode; -- wide_mode = V64SImode; -- fcn = gen_avx5124vnniw_vp4dpwssd; -- masked = 0; -- goto v4fma_expand; -- -- case IX86_BUILTIN_4DPWSSDS: -- nar_mode = V4SImode; -- mode = V16SImode; -- wide_mode = V64SImode; -- fcn = gen_avx5124vnniw_vp4dpwssds; -- masked = 0; -- goto v4fma_expand; -- -- case IX86_BUILTIN_4FNMAPS: -- fcn = gen_avx5124fmaddps_4fnmaddps; -- masked = 0; -- goto v4fma_expand; -- -- case IX86_BUILTIN_4FNMAPS_MASK: -- fcn_mask = gen_avx5124fmaddps_4fnmaddps_mask; -- fcn_maskz = gen_avx5124fmaddps_4fnmaddps_maskz; -- goto v4fma_expand; -- -- case IX86_BUILTIN_4DPWSSD_MASK: -- nar_mode = V4SImode; -- mode = V16SImode; -- wide_mode = V64SImode; -- fcn_mask = gen_avx5124vnniw_vp4dpwssd_mask; -- fcn_maskz = gen_avx5124vnniw_vp4dpwssd_maskz; -- goto v4fma_expand; -- -- case IX86_BUILTIN_4DPWSSDS_MASK: -- nar_mode = V4SImode; -- mode = V16SImode; -- wide_mode = V64SImode; -- fcn_mask = gen_avx5124vnniw_vp4dpwssds_mask; -- fcn_maskz = gen_avx5124vnniw_vp4dpwssds_maskz; -- goto v4fma_expand; -- -- case IX86_BUILTIN_4FMAPS_MASK: -- { -- tree args[4]; -- rtx ops[4]; -- rtx wide_reg; -- rtx accum; -- rtx addr; -- rtx mem; -- --v4fma_expand: -- wide_reg = gen_reg_rtx (wide_mode); -- for (i = 0; i < 4; i++) -- { -- args[i] = CALL_EXPR_ARG (exp, i); -- ops[i] = expand_normal (args[i]); -- -- emit_move_insn (gen_rtx_SUBREG (mode, wide_reg, i * 64), -- ops[i]); -- } -- -- accum = expand_normal (CALL_EXPR_ARG (exp, 4)); -- accum = force_reg (mode, accum); -- -- addr = expand_normal (CALL_EXPR_ARG (exp, 5)); -- addr = force_reg (Pmode, addr); -- -- mem = gen_rtx_MEM (nar_mode, addr); -- -- target = gen_reg_rtx (mode); -- -- emit_move_insn (target, accum); -- -- if (! masked) -- emit_insn (fcn (target, accum, wide_reg, mem)); -- else -- { -- rtx merge, mask; -- merge = expand_normal (CALL_EXPR_ARG (exp, 6)); -- -- mask = expand_normal (CALL_EXPR_ARG (exp, 7)); -- -- if (CONST_INT_P (mask)) -- mask = fixup_modeless_constant (mask, HImode); -- -- mask = force_reg (HImode, mask); -- -- if (GET_MODE (mask) != HImode) -- mask = gen_rtx_SUBREG (HImode, mask, 0); -- -- /* If merge is 0 then we're about to emit z-masked variant. */ -- if (const0_operand (merge, mode)) -- emit_insn (fcn_maskz (target, accum, wide_reg, mem, merge, mask)); -- /* If merge is the same as accum then emit merge-masked variant. */ -- else if (CALL_EXPR_ARG (exp, 6) == CALL_EXPR_ARG (exp, 4)) -- { -- merge = force_reg (mode, merge); -- emit_insn (fcn_mask (target, wide_reg, mem, merge, mask)); -- } -- /* Merge with something unknown might happen if we z-mask w/ -O0. */ -- else -- { -- target = gen_reg_rtx (mode); -- emit_move_insn (target, merge); -- emit_insn (fcn_mask (target, wide_reg, mem, target, mask)); -- } -- } -- return target; -- } -- -- case IX86_BUILTIN_4FNMASS: -- fcn = gen_avx5124fmaddps_4fnmaddss; -- masked = 0; -- goto s4fma_expand; -- -- case IX86_BUILTIN_4FMASS: -- fcn = gen_avx5124fmaddps_4fmaddss; -- masked = 0; -- goto s4fma_expand; -- -- case IX86_BUILTIN_4FNMASS_MASK: -- fcn_mask = gen_avx5124fmaddps_4fnmaddss_mask; -- fcn_maskz = gen_avx5124fmaddps_4fnmaddss_maskz; -- goto s4fma_expand; -- -- case IX86_BUILTIN_4FMASS_MASK: -- { -- tree args[4]; -- rtx ops[4]; -- rtx wide_reg; -- rtx accum; -- rtx addr; -- rtx mem; -- -- fcn_mask = gen_avx5124fmaddps_4fmaddss_mask; -- fcn_maskz = gen_avx5124fmaddps_4fmaddss_maskz; -- --s4fma_expand: -- mode = V4SFmode; -- wide_reg = gen_reg_rtx (V64SFmode); -- for (i = 0; i < 4; i++) -- { -- rtx tmp; -- args[i] = CALL_EXPR_ARG (exp, i); -- ops[i] = expand_normal (args[i]); -- -- tmp = gen_reg_rtx (SFmode); -- emit_move_insn (tmp, gen_rtx_SUBREG (SFmode, ops[i], 0)); -- -- emit_move_insn (gen_rtx_SUBREG (V16SFmode, wide_reg, i * 64), -- gen_rtx_SUBREG (V16SFmode, tmp, 0)); -- } -- -- accum = expand_normal (CALL_EXPR_ARG (exp, 4)); -- accum = force_reg (V4SFmode, accum); -- -- addr = expand_normal (CALL_EXPR_ARG (exp, 5)); -- addr = force_reg (Pmode, addr); -- -- mem = gen_rtx_MEM (V4SFmode, addr); -- -- target = gen_reg_rtx (V4SFmode); -- -- emit_move_insn (target, accum); -- -- if (! masked) -- emit_insn (fcn (target, accum, wide_reg, mem)); -- else -- { -- rtx merge, mask; -- merge = expand_normal (CALL_EXPR_ARG (exp, 6)); -- -- mask = expand_normal (CALL_EXPR_ARG (exp, 7)); -- -- if (CONST_INT_P (mask)) -- mask = fixup_modeless_constant (mask, QImode); -- -- mask = force_reg (QImode, mask); -- -- if (GET_MODE (mask) != QImode) -- mask = gen_rtx_SUBREG (QImode, mask, 0); -- -- /* If merge is 0 then we're about to emit z-masked variant. */ -- if (const0_operand (merge, mode)) -- emit_insn (fcn_maskz (target, accum, wide_reg, mem, merge, mask)); -- /* If merge is the same as accum then emit merge-masked -- variant. */ -- else if (CALL_EXPR_ARG (exp, 6) == CALL_EXPR_ARG (exp, 4)) -- { -- merge = force_reg (mode, merge); -- emit_insn (fcn_mask (target, wide_reg, mem, merge, mask)); -- } -- /* Merge with something unknown might happen if we z-mask -- w/ -O0. */ -- else -- { -- target = gen_reg_rtx (mode); -- emit_move_insn (target, merge); -- emit_insn (fcn_mask (target, wide_reg, mem, target, mask)); -- } -- } -- return target; -- } -- case IX86_BUILTIN_RDPID: -- return ix86_expand_special_args_builtin (bdesc_args + i, exp, -- target); -- case IX86_BUILTIN_FABSQ: -- case IX86_BUILTIN_COPYSIGNQ: -- if (!TARGET_SSE) -- /* Emit a normal call if SSE isn't available. */ -- return expand_call (exp, target, ignore); -- /* FALLTHRU */ -- default: -- return ix86_expand_args_builtin (bdesc_args + i, exp, target); -- } -- } -- -- if (fcode >= IX86_BUILTIN__BDESC_COMI_FIRST -- && fcode <= IX86_BUILTIN__BDESC_COMI_LAST) -- { -- i = fcode - IX86_BUILTIN__BDESC_COMI_FIRST; -- return ix86_expand_sse_comi (bdesc_comi + i, exp, target); -- } -- -- if (fcode >= IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST -- && fcode <= IX86_BUILTIN__BDESC_ROUND_ARGS_LAST) -- { -- i = fcode - IX86_BUILTIN__BDESC_ROUND_ARGS_FIRST; -- return ix86_expand_round_builtin (bdesc_round_args + i, exp, target); -- } -- -- if (fcode >= IX86_BUILTIN__BDESC_PCMPESTR_FIRST -- && fcode <= IX86_BUILTIN__BDESC_PCMPESTR_LAST) -- { -- i = fcode - IX86_BUILTIN__BDESC_PCMPESTR_FIRST; -- return ix86_expand_sse_pcmpestr (bdesc_pcmpestr + i, exp, target); -- } -- -- if (fcode >= IX86_BUILTIN__BDESC_PCMPISTR_FIRST -- && fcode <= IX86_BUILTIN__BDESC_PCMPISTR_LAST) -- { -- i = fcode - IX86_BUILTIN__BDESC_PCMPISTR_FIRST; -- return ix86_expand_sse_pcmpistr (bdesc_pcmpistr + i, exp, target); -- } -- -- if (fcode >= IX86_BUILTIN__BDESC_MULTI_ARG_FIRST -- && fcode <= IX86_BUILTIN__BDESC_MULTI_ARG_LAST) -- { -- i = fcode - IX86_BUILTIN__BDESC_MULTI_ARG_FIRST; -- const struct builtin_description *d = bdesc_multi_arg + i; -- return ix86_expand_multi_arg_builtin (d->icode, exp, target, -- (enum ix86_builtin_func_type) -- d->flag, d->comparison); -- } -- -- if (fcode >= IX86_BUILTIN__BDESC_CET_FIRST -- && fcode <= IX86_BUILTIN__BDESC_CET_LAST) -- { -- i = fcode - IX86_BUILTIN__BDESC_CET_FIRST; -- return ix86_expand_special_args_builtin (bdesc_cet + i, exp, -- target); -- } -- -- if (fcode >= IX86_BUILTIN__BDESC_CET_NORMAL_FIRST -- && fcode <= IX86_BUILTIN__BDESC_CET_NORMAL_LAST) -- { -- i = fcode - IX86_BUILTIN__BDESC_CET_NORMAL_FIRST; -- return ix86_expand_special_args_builtin (bdesc_cet_rdssp + i, exp, -- target); -- } -- -- gcc_unreachable (); --} -- --/* This returns the target-specific builtin with code CODE if -- current_function_decl has visibility on this builtin, which is checked -- using isa flags. Returns NULL_TREE otherwise. */ -- --static tree ix86_get_builtin (enum ix86_builtins code) --{ -- struct cl_target_option *opts; -- tree target_tree = NULL_TREE; -- -- /* Determine the isa flags of current_function_decl. */ -- -- if (current_function_decl) -- target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl); -- -- if (target_tree == NULL) -- target_tree = target_option_default_node; -- -- opts = TREE_TARGET_OPTION (target_tree); -- -- if ((ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags) -- || (ix86_builtins_isa[(int) code].isa2 & opts->x_ix86_isa_flags2)) -- return ix86_builtin_decl (code, true); -- else -- return NULL_TREE; --} -- --/* Returns a function decl for a vectorized version of the combined function -- with combined_fn code FN and the result vector type TYPE, or NULL_TREE -- if it is not available. */ -- --static tree --ix86_builtin_vectorized_function (unsigned int fn, tree type_out, -- tree type_in) --{ -- machine_mode in_mode, out_mode; -- int in_n, out_n; -- -- if (TREE_CODE (type_out) != VECTOR_TYPE -- || TREE_CODE (type_in) != VECTOR_TYPE) -- return NULL_TREE; -- -- out_mode = TYPE_MODE (TREE_TYPE (type_out)); -- out_n = TYPE_VECTOR_SUBPARTS (type_out); -- in_mode = TYPE_MODE (TREE_TYPE (type_in)); -- in_n = TYPE_VECTOR_SUBPARTS (type_in); -- -- switch (fn) -- { -- CASE_CFN_EXP2: -- if (out_mode == SFmode && in_mode == SFmode) -- { -- if (out_n == 16 && in_n == 16) -- return ix86_get_builtin (IX86_BUILTIN_EXP2PS); -- } -- break; -- -- CASE_CFN_IFLOOR: -- CASE_CFN_LFLOOR: -- CASE_CFN_LLFLOOR: -- /* The round insn does not trap on denormals. */ -- if (flag_trapping_math || !TARGET_SSE4_1) -- break; -- -- if (out_mode == SImode && in_mode == DFmode) -- { -- if (out_n == 4 && in_n == 2) -- return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX); -- else if (out_n == 8 && in_n == 4) -- return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256); -- else if (out_n == 16 && in_n == 8) -- return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512); -- } -- if (out_mode == SImode && in_mode == SFmode) -- { -- if (out_n == 4 && in_n == 4) -- return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX); -- else if (out_n == 8 && in_n == 8) -- return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256); -- else if (out_n == 16 && in_n == 16) -- return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX512); -- } -- break; -- -- CASE_CFN_ICEIL: -- CASE_CFN_LCEIL: -- CASE_CFN_LLCEIL: -- /* The round insn does not trap on denormals. */ -- if (flag_trapping_math || !TARGET_SSE4_1) -- break; -- -- if (out_mode == SImode && in_mode == DFmode) -- { -- if (out_n == 4 && in_n == 2) -- return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX); -- else if (out_n == 8 && in_n == 4) -- return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256); -- else if (out_n == 16 && in_n == 8) -- return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512); -- } -- if (out_mode == SImode && in_mode == SFmode) -- { -- if (out_n == 4 && in_n == 4) -- return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX); -- else if (out_n == 8 && in_n == 8) -- return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256); -- else if (out_n == 16 && in_n == 16) -- return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX512); -- } -- break; -- -- CASE_CFN_IRINT: -- CASE_CFN_LRINT: -- CASE_CFN_LLRINT: -- if (out_mode == SImode && in_mode == DFmode) -- { -- if (out_n == 4 && in_n == 2) -- return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX); -- else if (out_n == 8 && in_n == 4) -- return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256); -- else if (out_n == 16 && in_n == 8) -- return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX512); -- } -- if (out_mode == SImode && in_mode == SFmode) -- { -- if (out_n == 4 && in_n == 4) -- return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ); -- else if (out_n == 8 && in_n == 8) -- return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256); -- else if (out_n == 16 && in_n == 16) -- return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ512); -- } -- break; -- -- CASE_CFN_IROUND: -- CASE_CFN_LROUND: -- CASE_CFN_LLROUND: -- /* The round insn does not trap on denormals. */ -- if (flag_trapping_math || !TARGET_SSE4_1) -- break; -- -- if (out_mode == SImode && in_mode == DFmode) -- { -- if (out_n == 4 && in_n == 2) -- return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX); -- else if (out_n == 8 && in_n == 4) -- return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256); -- else if (out_n == 16 && in_n == 8) -- return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512); -- } -- if (out_mode == SImode && in_mode == SFmode) -- { -- if (out_n == 4 && in_n == 4) -- return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX); -- else if (out_n == 8 && in_n == 8) -- return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256); -- else if (out_n == 16 && in_n == 16) -- return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX512); -- } -- break; -- -- CASE_CFN_FLOOR: -- /* The round insn does not trap on denormals. */ -- if (flag_trapping_math || !TARGET_SSE4_1) -- break; -- -- if (out_mode == DFmode && in_mode == DFmode) -- { -- if (out_n == 2 && in_n == 2) -- return ix86_get_builtin (IX86_BUILTIN_FLOORPD); -- else if (out_n == 4 && in_n == 4) -- return ix86_get_builtin (IX86_BUILTIN_FLOORPD256); -- else if (out_n == 8 && in_n == 8) -- return ix86_get_builtin (IX86_BUILTIN_FLOORPD512); -- } -- if (out_mode == SFmode && in_mode == SFmode) -- { -- if (out_n == 4 && in_n == 4) -- return ix86_get_builtin (IX86_BUILTIN_FLOORPS); -- else if (out_n == 8 && in_n == 8) -- return ix86_get_builtin (IX86_BUILTIN_FLOORPS256); -- else if (out_n == 16 && in_n == 16) -- return ix86_get_builtin (IX86_BUILTIN_FLOORPS512); -- } -- break; -- -- CASE_CFN_CEIL: -- /* The round insn does not trap on denormals. */ -- if (flag_trapping_math || !TARGET_SSE4_1) -- break; -- -- if (out_mode == DFmode && in_mode == DFmode) -- { -- if (out_n == 2 && in_n == 2) -- return ix86_get_builtin (IX86_BUILTIN_CEILPD); -- else if (out_n == 4 && in_n == 4) -- return ix86_get_builtin (IX86_BUILTIN_CEILPD256); -- else if (out_n == 8 && in_n == 8) -- return ix86_get_builtin (IX86_BUILTIN_CEILPD512); -- } -- if (out_mode == SFmode && in_mode == SFmode) -- { -- if (out_n == 4 && in_n == 4) -- return ix86_get_builtin (IX86_BUILTIN_CEILPS); -- else if (out_n == 8 && in_n == 8) -- return ix86_get_builtin (IX86_BUILTIN_CEILPS256); -- else if (out_n == 16 && in_n == 16) -- return ix86_get_builtin (IX86_BUILTIN_CEILPS512); -- } -- break; -- -- CASE_CFN_TRUNC: -- /* The round insn does not trap on denormals. */ -- if (flag_trapping_math || !TARGET_SSE4_1) -- break; -- -- if (out_mode == DFmode && in_mode == DFmode) -- { -- if (out_n == 2 && in_n == 2) -- return ix86_get_builtin (IX86_BUILTIN_TRUNCPD); -- else if (out_n == 4 && in_n == 4) -- return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256); -- else if (out_n == 8 && in_n == 8) -- return ix86_get_builtin (IX86_BUILTIN_TRUNCPD512); -- } -- if (out_mode == SFmode && in_mode == SFmode) -- { -- if (out_n == 4 && in_n == 4) -- return ix86_get_builtin (IX86_BUILTIN_TRUNCPS); -- else if (out_n == 8 && in_n == 8) -- return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256); -- else if (out_n == 16 && in_n == 16) -- return ix86_get_builtin (IX86_BUILTIN_TRUNCPS512); -- } -- break; -- -- CASE_CFN_RINT: -- /* The round insn does not trap on denormals. */ -- if (flag_trapping_math || !TARGET_SSE4_1) -- break; -- -- if (out_mode == DFmode && in_mode == DFmode) -- { -- if (out_n == 2 && in_n == 2) -- return ix86_get_builtin (IX86_BUILTIN_RINTPD); -- else if (out_n == 4 && in_n == 4) -- return ix86_get_builtin (IX86_BUILTIN_RINTPD256); -- } -- if (out_mode == SFmode && in_mode == SFmode) -- { -- if (out_n == 4 && in_n == 4) -- return ix86_get_builtin (IX86_BUILTIN_RINTPS); -- else if (out_n == 8 && in_n == 8) -- return ix86_get_builtin (IX86_BUILTIN_RINTPS256); -- } -- break; -- -- CASE_CFN_FMA: -- if (out_mode == DFmode && in_mode == DFmode) -- { -- if (out_n == 2 && in_n == 2) -- return ix86_get_builtin (IX86_BUILTIN_VFMADDPD); -- if (out_n == 4 && in_n == 4) -- return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256); -- } -- if (out_mode == SFmode && in_mode == SFmode) -- { -- if (out_n == 4 && in_n == 4) -- return ix86_get_builtin (IX86_BUILTIN_VFMADDPS); -- if (out_n == 8 && in_n == 8) -- return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256); -- } -- break; -- -- default: -- break; -- } -- -- /* Dispatch to a handler for a vectorization library. */ -- if (ix86_veclib_handler) -- return ix86_veclib_handler (combined_fn (fn), type_out, type_in); -- -- return NULL_TREE; --} -- --/* Handler for an SVML-style interface to -- a library with vectorized intrinsics. */ -- --static tree --ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in) --{ -- char name[20]; -- tree fntype, new_fndecl, args; -- unsigned arity; -- const char *bname; -- machine_mode el_mode, in_mode; -- int n, in_n; -- -- /* The SVML is suitable for unsafe math only. */ -- if (!flag_unsafe_math_optimizations) -- return NULL_TREE; -- -- el_mode = TYPE_MODE (TREE_TYPE (type_out)); -- n = TYPE_VECTOR_SUBPARTS (type_out); -- in_mode = TYPE_MODE (TREE_TYPE (type_in)); -- in_n = TYPE_VECTOR_SUBPARTS (type_in); -- if (el_mode != in_mode -- || n != in_n) -- return NULL_TREE; -- -- switch (fn) -- { -- CASE_CFN_EXP: -- CASE_CFN_LOG: -- CASE_CFN_LOG10: -- CASE_CFN_POW: -- CASE_CFN_TANH: -- CASE_CFN_TAN: -- CASE_CFN_ATAN: -- CASE_CFN_ATAN2: -- CASE_CFN_ATANH: -- CASE_CFN_CBRT: -- CASE_CFN_SINH: -- CASE_CFN_SIN: -- CASE_CFN_ASINH: -- CASE_CFN_ASIN: -- CASE_CFN_COSH: -- CASE_CFN_COS: -- CASE_CFN_ACOSH: -- CASE_CFN_ACOS: -- if ((el_mode != DFmode || n != 2) -- && (el_mode != SFmode || n != 4)) -- return NULL_TREE; -- break; -- -- default: -- return NULL_TREE; -- } -- -- tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn); -- bname = IDENTIFIER_POINTER (DECL_NAME (fndecl)); -- -- if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOGF) -- strcpy (name, "vmlsLn4"); -- else if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOG) -- strcpy (name, "vmldLn2"); -- else if (n == 4) -- { -- sprintf (name, "vmls%s", bname+10); -- name[strlen (name)-1] = '4'; -- } -- else -- sprintf (name, "vmld%s2", bname+10); -- -- /* Convert to uppercase. */ -- name[4] &= ~0x20; -- -- arity = 0; -- for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args)) -- arity++; -- -- if (arity == 1) -- fntype = build_function_type_list (type_out, type_in, NULL); -- else -- fntype = build_function_type_list (type_out, type_in, type_in, NULL); -- -- /* Build a function declaration for the vectorized function. */ -- new_fndecl = build_decl (BUILTINS_LOCATION, -- FUNCTION_DECL, get_identifier (name), fntype); -- TREE_PUBLIC (new_fndecl) = 1; -- DECL_EXTERNAL (new_fndecl) = 1; -- DECL_IS_NOVOPS (new_fndecl) = 1; -- TREE_READONLY (new_fndecl) = 1; -- -- return new_fndecl; --} -- --/* Handler for an ACML-style interface to -- a library with vectorized intrinsics. */ -- --static tree --ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in) --{ -- char name[20] = "__vr.._"; -- tree fntype, new_fndecl, args; -- unsigned arity; -- const char *bname; -- machine_mode el_mode, in_mode; -- int n, in_n; -- -- /* The ACML is 64bits only and suitable for unsafe math only as -- it does not correctly support parts of IEEE with the required -- precision such as denormals. */ -- if (!TARGET_64BIT -- || !flag_unsafe_math_optimizations) -- return NULL_TREE; -- -- el_mode = TYPE_MODE (TREE_TYPE (type_out)); -- n = TYPE_VECTOR_SUBPARTS (type_out); -- in_mode = TYPE_MODE (TREE_TYPE (type_in)); -- in_n = TYPE_VECTOR_SUBPARTS (type_in); -- if (el_mode != in_mode -- || n != in_n) -- return NULL_TREE; -- -- switch (fn) -- { -- CASE_CFN_SIN: -- CASE_CFN_COS: -- CASE_CFN_EXP: -- CASE_CFN_LOG: -- CASE_CFN_LOG2: -- CASE_CFN_LOG10: -- if (el_mode == DFmode && n == 2) -- { -- name[4] = 'd'; -- name[5] = '2'; -- } -- else if (el_mode == SFmode && n == 4) -- { -- name[4] = 's'; -- name[5] = '4'; -- } -- else -- return NULL_TREE; -- break; -- -- default: -- return NULL_TREE; -- } -- -- tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn); -- bname = IDENTIFIER_POINTER (DECL_NAME (fndecl)); -- sprintf (name + 7, "%s", bname+10); -- -- arity = 0; -- for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args)) -- arity++; -- -- if (arity == 1) -- fntype = build_function_type_list (type_out, type_in, NULL); -- else -- fntype = build_function_type_list (type_out, type_in, type_in, NULL); -- -- /* Build a function declaration for the vectorized function. */ -- new_fndecl = build_decl (BUILTINS_LOCATION, -- FUNCTION_DECL, get_identifier (name), fntype); -- TREE_PUBLIC (new_fndecl) = 1; -- DECL_EXTERNAL (new_fndecl) = 1; -- DECL_IS_NOVOPS (new_fndecl) = 1; -- TREE_READONLY (new_fndecl) = 1; -- -- return new_fndecl; --} -- --/* Returns a decl of a function that implements gather load with -- memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE. -- Return NULL_TREE if it is not available. */ -- --static tree --ix86_vectorize_builtin_gather (const_tree mem_vectype, -- const_tree index_type, int scale) --{ -- bool si; -- enum ix86_builtins code; -- -- if (! TARGET_AVX2 || !TARGET_USE_GATHER) -- return NULL_TREE; -- -- if ((TREE_CODE (index_type) != INTEGER_TYPE -- && !POINTER_TYPE_P (index_type)) -- || (TYPE_MODE (index_type) != SImode -- && TYPE_MODE (index_type) != DImode)) -- return NULL_TREE; -- -- if (TYPE_PRECISION (index_type) > POINTER_SIZE) -- return NULL_TREE; -- -- /* v*gather* insn sign extends index to pointer mode. */ -- if (TYPE_PRECISION (index_type) < POINTER_SIZE -- && TYPE_UNSIGNED (index_type)) -- return NULL_TREE; -- -- if (scale <= 0 -- || scale > 8 -- || (scale & (scale - 1)) != 0) -- return NULL_TREE; -- -- si = TYPE_MODE (index_type) == SImode; -- switch (TYPE_MODE (mem_vectype)) -- { -- case E_V2DFmode: -- if (TARGET_AVX512VL) -- code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF; -- else -- code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF; -- break; -- case E_V4DFmode: -- if (TARGET_AVX512VL) -- code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF; -- else -- code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF; -- break; -- case E_V2DImode: -- if (TARGET_AVX512VL) -- code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI; -- else -- code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI; -- break; -- case E_V4DImode: -- if (TARGET_AVX512VL) -- code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI; -- else -- code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI; -- break; -- case E_V4SFmode: -- if (TARGET_AVX512VL) -- code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF; -- else -- code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF; -- break; -- case E_V8SFmode: -- if (TARGET_AVX512VL) -- code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF; -- else -- code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF; -- break; -- case E_V4SImode: -- if (TARGET_AVX512VL) -- code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI; -- else -- code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI; -- break; -- case E_V8SImode: -- if (TARGET_AVX512VL) -- code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI; -- else -- code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI; -- break; -- case E_V8DFmode: -- if (TARGET_AVX512F) -- code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF; -- else -- return NULL_TREE; -- break; -- case E_V8DImode: -- if (TARGET_AVX512F) -- code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI; -- else -- return NULL_TREE; -- break; -- case E_V16SFmode: -- if (TARGET_AVX512F) -- code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF; -- else -- return NULL_TREE; -- break; -- case E_V16SImode: -- if (TARGET_AVX512F) -- code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI; -- else -- return NULL_TREE; -- break; -- default: -- return NULL_TREE; -- } -- -- return ix86_get_builtin (code); --} -- --/* Returns a decl of a function that implements scatter store with -- register type VECTYPE and index type INDEX_TYPE and SCALE. -- Return NULL_TREE if it is not available. */ -- --static tree --ix86_vectorize_builtin_scatter (const_tree vectype, -- const_tree index_type, int scale) --{ -- bool si; -- enum ix86_builtins code; -- -- if (!TARGET_AVX512F) -- return NULL_TREE; -- -- if ((TREE_CODE (index_type) != INTEGER_TYPE -- && !POINTER_TYPE_P (index_type)) -- || (TYPE_MODE (index_type) != SImode -- && TYPE_MODE (index_type) != DImode)) -- return NULL_TREE; -- -- if (TYPE_PRECISION (index_type) > POINTER_SIZE) -- return NULL_TREE; -- -- /* v*scatter* insn sign extends index to pointer mode. */ -- if (TYPE_PRECISION (index_type) < POINTER_SIZE -- && TYPE_UNSIGNED (index_type)) -- return NULL_TREE; -- -- /* Scale can be 1, 2, 4 or 8. */ -- if (scale <= 0 -- || scale > 8 -- || (scale & (scale - 1)) != 0) -- return NULL_TREE; -- -- si = TYPE_MODE (index_type) == SImode; -- switch (TYPE_MODE (vectype)) -- { -- case E_V8DFmode: -- code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF; -- break; -- case E_V8DImode: -- code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI; -- break; -- case E_V16SFmode: -- code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF; -- break; -- case E_V16SImode: -- code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI; -- break; -- case E_V4DFmode: -- if (TARGET_AVX512VL) -- code = si ? IX86_BUILTIN_SCATTERALTSIV4DF : IX86_BUILTIN_SCATTERDIV4DF; -- else -- return NULL_TREE; -- break; -- case E_V4DImode: -- if (TARGET_AVX512VL) -- code = si ? IX86_BUILTIN_SCATTERALTSIV4DI : IX86_BUILTIN_SCATTERDIV4DI; -- else -- return NULL_TREE; -- break; -- case E_V8SFmode: -- if (TARGET_AVX512VL) -- code = si ? IX86_BUILTIN_SCATTERSIV8SF : IX86_BUILTIN_SCATTERALTDIV8SF; -- else -- return NULL_TREE; -- break; -- case E_V8SImode: -- if (TARGET_AVX512VL) -- code = si ? IX86_BUILTIN_SCATTERSIV8SI : IX86_BUILTIN_SCATTERALTDIV8SI; -- else -- return NULL_TREE; -- break; -- case E_V2DFmode: -- if (TARGET_AVX512VL) -- code = si ? IX86_BUILTIN_SCATTERALTSIV2DF : IX86_BUILTIN_SCATTERDIV2DF; -- else -- return NULL_TREE; -- break; -- case E_V2DImode: -- if (TARGET_AVX512VL) -- code = si ? IX86_BUILTIN_SCATTERALTSIV2DI : IX86_BUILTIN_SCATTERDIV2DI; -- else -- return NULL_TREE; -- break; -- case E_V4SFmode: -- if (TARGET_AVX512VL) -- code = si ? IX86_BUILTIN_SCATTERSIV4SF : IX86_BUILTIN_SCATTERALTDIV4SF; -- else -- return NULL_TREE; -- break; -- case E_V4SImode: -- if (TARGET_AVX512VL) -- code = si ? IX86_BUILTIN_SCATTERSIV4SI : IX86_BUILTIN_SCATTERALTDIV4SI; -- else -- return NULL_TREE; -- break; -- default: -- return NULL_TREE; -- } -- -- return ix86_builtins[code]; --} -- --/* Return true if it is safe to use the rsqrt optabs to optimize -- 1.0/sqrt. */ -- --static bool --use_rsqrt_p () --{ -- return (TARGET_SSE && TARGET_SSE_MATH -- && flag_finite_math_only -- && !flag_trapping_math -- && flag_unsafe_math_optimizations); --} -- --/* Returns a code for a target-specific builtin that implements -- reciprocal of the function, or NULL_TREE if not available. */ -- --static tree --ix86_builtin_reciprocal (tree fndecl) --{ -- switch (DECL_FUNCTION_CODE (fndecl)) -- { -- /* Vectorized version of sqrt to rsqrt conversion. */ -- case IX86_BUILTIN_SQRTPS_NR: -- return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR); -- -- case IX86_BUILTIN_SQRTPS_NR256: -- return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256); -- -- default: -- return NULL_TREE; -- } --} -- --/* Helper for avx_vpermilps256_operand et al. This is also used by -- the expansion functions to turn the parallel back into a mask. -- The return value is 0 for no match and the imm8+1 for a match. */ -- --int --avx_vpermilp_parallel (rtx par, machine_mode mode) --{ -- unsigned i, nelt = GET_MODE_NUNITS (mode); -- unsigned mask = 0; -- unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */ -- -- if (XVECLEN (par, 0) != (int) nelt) -- return 0; -- -- /* Validate that all of the elements are constants, and not totally -- out of range. Copy the data into an integral array to make the -- subsequent checks easier. */ -- for (i = 0; i < nelt; ++i) -- { -- rtx er = XVECEXP (par, 0, i); -- unsigned HOST_WIDE_INT ei; -- -- if (!CONST_INT_P (er)) -- return 0; -- ei = INTVAL (er); -- if (ei >= nelt) -- return 0; -- ipar[i] = ei; -- } -- -- switch (mode) -- { -- case E_V8DFmode: -- /* In the 512-bit DFmode case, we can only move elements within -- a 128-bit lane. First fill the second part of the mask, -- then fallthru. */ -- for (i = 4; i < 6; ++i) -- { -- if (ipar[i] < 4 || ipar[i] >= 6) -- return 0; -- mask |= (ipar[i] - 4) << i; -- } -- for (i = 6; i < 8; ++i) -- { -- if (ipar[i] < 6) -- return 0; -- mask |= (ipar[i] - 6) << i; -- } -- /* FALLTHRU */ -- -- case E_V4DFmode: -- /* In the 256-bit DFmode case, we can only move elements within -- a 128-bit lane. */ -- for (i = 0; i < 2; ++i) -- { -- if (ipar[i] >= 2) -- return 0; -- mask |= ipar[i] << i; -- } -- for (i = 2; i < 4; ++i) -- { -- if (ipar[i] < 2) -- return 0; -- mask |= (ipar[i] - 2) << i; -- } -- break; -- -- case E_V16SFmode: -- /* In 512 bit SFmode case, permutation in the upper 256 bits -- must mirror the permutation in the lower 256-bits. */ -- for (i = 0; i < 8; ++i) -- if (ipar[i] + 8 != ipar[i + 8]) -- return 0; -- /* FALLTHRU */ -- -- case E_V8SFmode: -- /* In 256 bit SFmode case, we have full freedom of -- movement within the low 128-bit lane, but the high 128-bit -- lane must mirror the exact same pattern. */ -- for (i = 0; i < 4; ++i) -- if (ipar[i] + 4 != ipar[i + 4]) -- return 0; -- nelt = 4; -- /* FALLTHRU */ -- -- case E_V2DFmode: -- case E_V4SFmode: -- /* In the 128-bit case, we've full freedom in the placement of -- the elements from the source operand. */ -- for (i = 0; i < nelt; ++i) -- mask |= ipar[i] << (i * (nelt / 2)); -- break; -- -- default: -- gcc_unreachable (); -- } -- -- /* Make sure success has a non-zero value by adding one. */ -- return mask + 1; --} -- --/* Helper for avx_vperm2f128_v4df_operand et al. This is also used by -- the expansion functions to turn the parallel back into a mask. -- The return value is 0 for no match and the imm8+1 for a match. */ -- --int --avx_vperm2f128_parallel (rtx par, machine_mode mode) --{ -- unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2; -- unsigned mask = 0; -- unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */ -- -- if (XVECLEN (par, 0) != (int) nelt) -- return 0; -- -- /* Validate that all of the elements are constants, and not totally -- out of range. Copy the data into an integral array to make the -- subsequent checks easier. */ -- for (i = 0; i < nelt; ++i) -- { -- rtx er = XVECEXP (par, 0, i); -- unsigned HOST_WIDE_INT ei; -- -- if (!CONST_INT_P (er)) -- return 0; -- ei = INTVAL (er); -- if (ei >= 2 * nelt) -- return 0; -- ipar[i] = ei; -- } -- -- /* Validate that the halves of the permute are halves. */ -- for (i = 0; i < nelt2 - 1; ++i) -- if (ipar[i] + 1 != ipar[i + 1]) -- return 0; -- for (i = nelt2; i < nelt - 1; ++i) -- if (ipar[i] + 1 != ipar[i + 1]) -- return 0; -- -- /* Reconstruct the mask. */ -- for (i = 0; i < 2; ++i) -- { -- unsigned e = ipar[i * nelt2]; -- if (e % nelt2) -- return 0; -- e /= nelt2; -- mask |= e << (i * 4); -- } -- -- /* Make sure success has a non-zero value by adding one. */ -- return mask + 1; --} -- --/* Return a register priority for hard reg REGNO. */ --static int --ix86_register_priority (int hard_regno) --{ -- /* ebp and r13 as the base always wants a displacement, r12 as the -- base always wants an index. So discourage their usage in an -- address. */ -- if (hard_regno == R12_REG || hard_regno == R13_REG) -- return 0; -- if (hard_regno == BP_REG) -- return 1; -- /* New x86-64 int registers result in bigger code size. Discourage -- them. */ -- if (IN_RANGE (hard_regno, FIRST_REX_INT_REG, LAST_REX_INT_REG)) -- return 2; -- /* New x86-64 SSE registers result in bigger code size. Discourage -- them. */ -- if (IN_RANGE (hard_regno, FIRST_REX_SSE_REG, LAST_REX_SSE_REG)) -- return 2; -- if (IN_RANGE (hard_regno, FIRST_EXT_REX_SSE_REG, LAST_EXT_REX_SSE_REG)) -- return 1; -- /* Usage of AX register results in smaller code. Prefer it. */ -- if (hard_regno == AX_REG) -- return 4; -- return 3; --} -- --/* Implement TARGET_PREFERRED_RELOAD_CLASS. -- -- Put float CONST_DOUBLE in the constant pool instead of fp regs. -- QImode must go into class Q_REGS. -- Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and -- movdf to do mem-to-mem moves through integer regs. */ -- --static reg_class_t --ix86_preferred_reload_class (rtx x, reg_class_t regclass) --{ -- machine_mode mode = GET_MODE (x); -- -- /* We're only allowed to return a subclass of CLASS. Many of the -- following checks fail for NO_REGS, so eliminate that early. */ -- if (regclass == NO_REGS) -- return NO_REGS; -- -- /* All classes can load zeros. */ -- if (x == CONST0_RTX (mode)) -- return regclass; -- -- /* Force constants into memory if we are loading a (nonzero) constant into -- an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK -- instructions to load from a constant. */ -- if (CONSTANT_P (x) -- && (MAYBE_MMX_CLASS_P (regclass) -- || MAYBE_SSE_CLASS_P (regclass) -- || MAYBE_MASK_CLASS_P (regclass))) -- return NO_REGS; -- -- /* Floating-point constants need more complex checks. */ -- if (CONST_DOUBLE_P (x)) -- { -- /* General regs can load everything. */ -- if (INTEGER_CLASS_P (regclass)) -- return regclass; -- -- /* Floats can load 0 and 1 plus some others. Note that we eliminated -- zero above. We only want to wind up preferring 80387 registers if -- we plan on doing computation with them. */ -- if (IS_STACK_MODE (mode) -- && standard_80387_constant_p (x) > 0) -- { -- /* Limit class to FP regs. */ -- if (FLOAT_CLASS_P (regclass)) -- return FLOAT_REGS; -- } -- -- return NO_REGS; -- } -- -- /* Prefer SSE regs only, if we can use them for math. */ -- if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) -- return SSE_CLASS_P (regclass) ? regclass : NO_REGS; -- -- /* Generally when we see PLUS here, it's the function invariant -- (plus soft-fp const_int). Which can only be computed into general -- regs. */ -- if (GET_CODE (x) == PLUS) -- return INTEGER_CLASS_P (regclass) ? regclass : NO_REGS; -- -- /* QImode constants are easy to load, but non-constant QImode data -- must go into Q_REGS. */ -- if (GET_MODE (x) == QImode && !CONSTANT_P (x)) -- { -- if (Q_CLASS_P (regclass)) -- return regclass; -- else if (reg_class_subset_p (Q_REGS, regclass)) -- return Q_REGS; -- else -- return NO_REGS; -- } -- -- return regclass; --} -- --/* Discourage putting floating-point values in SSE registers unless -- SSE math is being used, and likewise for the 387 registers. */ --static reg_class_t --ix86_preferred_output_reload_class (rtx x, reg_class_t regclass) --{ -- /* Restrict the output reload class to the register bank that we are doing -- math on. If we would like not to return a subset of CLASS, reject this -- alternative: if reload cannot do this, it will still use its choice. */ -- machine_mode mode = GET_MODE (x); -- if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) -- return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS; -- -- if (IS_STACK_MODE (mode)) -- return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS; -- -- return regclass; --} -- --static reg_class_t --ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass, -- machine_mode mode, secondary_reload_info *sri) --{ -- /* Double-word spills from general registers to non-offsettable memory -- references (zero-extended addresses) require special handling. */ -- if (TARGET_64BIT -- && MEM_P (x) -- && GET_MODE_SIZE (mode) > UNITS_PER_WORD -- && INTEGER_CLASS_P (rclass) -- && !offsettable_memref_p (x)) -- { -- sri->icode = (in_p -- ? CODE_FOR_reload_noff_load -- : CODE_FOR_reload_noff_store); -- /* Add the cost of moving address to a temporary. */ -- sri->extra_cost = 1; -- -- return NO_REGS; -- } -- -- /* QImode spills from non-QI registers require -- intermediate register on 32bit targets. */ -- if (mode == QImode -- && ((!TARGET_64BIT && !in_p -- && INTEGER_CLASS_P (rclass) -- && MAYBE_NON_Q_CLASS_P (rclass)) -- || (!TARGET_AVX512DQ -- && MAYBE_MASK_CLASS_P (rclass)))) -- { -- int regno = true_regnum (x); -- -- /* Return Q_REGS if the operand is in memory. */ -- if (regno == -1) -- return Q_REGS; -- -- return NO_REGS; -- } -- -- /* This condition handles corner case where an expression involving -- pointers gets vectorized. We're trying to use the address of a -- stack slot as a vector initializer. -- -- (set (reg:V2DI 74 [ vect_cst_.2 ]) -- (vec_duplicate:V2DI (reg/f:DI 20 frame))) -- -- Eventually frame gets turned into sp+offset like this: -- -- (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74]) -- (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp) -- (const_int 392 [0x188])))) -- -- That later gets turned into: -- -- (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74]) -- (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp) -- (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64])))) -- -- We'll have the following reload recorded: -- -- Reload 0: reload_in (DI) = -- (plus:DI (reg/f:DI 7 sp) -- (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64])) -- reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74]) -- SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine -- reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188])) -- reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74]) -- reload_reg_rtx: (reg:V2DI 22 xmm1) -- -- Which isn't going to work since SSE instructions can't handle scalar -- additions. Returning GENERAL_REGS forces the addition into integer -- register and reload can handle subsequent reloads without problems. */ -- -- if (in_p && GET_CODE (x) == PLUS -- && SSE_CLASS_P (rclass) -- && SCALAR_INT_MODE_P (mode)) -- return GENERAL_REGS; -- -- return NO_REGS; --} -- --/* Implement TARGET_CLASS_LIKELY_SPILLED_P. */ -- --static bool --ix86_class_likely_spilled_p (reg_class_t rclass) --{ -- switch (rclass) -- { -- case AREG: -- case DREG: -- case CREG: -- case BREG: -- case AD_REGS: -- case SIREG: -- case DIREG: -- case SSE_FIRST_REG: -- case FP_TOP_REG: -- case FP_SECOND_REG: -- return true; -- -- default: -- break; -- } -- -- return false; --} -- --/* If we are copying between registers from different register sets -- (e.g. FP and integer), we may need a memory location. -- -- The function can't work reliably when one of the CLASSES is a class -- containing registers from multiple sets. We avoid this by never combining -- different sets in a single alternative in the machine description. -- Ensure that this constraint holds to avoid unexpected surprises. -- -- When STRICT is false, we are being called from REGISTER_MOVE_COST, -- so do not enforce these sanity checks. -- -- To optimize register_move_cost performance, define inline variant. */ -- --static inline bool --inline_secondary_memory_needed (machine_mode mode, reg_class_t class1, -- reg_class_t class2, int strict) --{ -- if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS)) -- return false; -- -- if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1) -- || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2) -- || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1) -- || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2) -- || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1) -- || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2) -- || MAYBE_MASK_CLASS_P (class1) != MASK_CLASS_P (class1) -- || MAYBE_MASK_CLASS_P (class2) != MASK_CLASS_P (class2)) -- { -- gcc_assert (!strict || lra_in_progress); -- return true; -- } -- -- if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)) -- return true; -- -- /* Between mask and general, we have moves no larger than word size. */ -- if ((MASK_CLASS_P (class1) != MASK_CLASS_P (class2)) -- && (GET_MODE_SIZE (mode) > UNITS_PER_WORD)) -- return true; -- -- /* ??? This is a lie. We do have moves between mmx/general, and for -- mmx/sse2. But by saying we need secondary memory we discourage the -- register allocator from using the mmx registers unless needed. */ -- if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)) -- return true; -- -- if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)) -- { -- /* SSE1 doesn't have any direct moves from other classes. */ -- if (!TARGET_SSE2) -- return true; -- -- /* If the target says that inter-unit moves are more expensive -- than moving through memory, then don't generate them. */ -- if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC) -- || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC)) -- return true; -- -- /* Between SSE and general, we have moves no larger than word size. */ -- if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) -- return true; -- } -- -- return false; --} -- --/* Implement TARGET_SECONDARY_MEMORY_NEEDED. */ -- --static bool --ix86_secondary_memory_needed (machine_mode mode, reg_class_t class1, -- reg_class_t class2) --{ -- return inline_secondary_memory_needed (mode, class1, class2, true); --} -- --/* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE. -- -- get_secondary_mem widens integral modes to BITS_PER_WORD. -- There is no need to emit full 64 bit move on 64 bit targets -- for integral modes that can be moved using 32 bit move. */ -- --static machine_mode --ix86_secondary_memory_needed_mode (machine_mode mode) --{ -- if (GET_MODE_BITSIZE (mode) < 32 && INTEGRAL_MODE_P (mode)) -- return mode_for_size (32, GET_MODE_CLASS (mode), 0).require (); -- return mode; --} -- --/* Implement the TARGET_CLASS_MAX_NREGS hook. -- -- On the 80386, this is the size of MODE in words, -- except in the FP regs, where a single reg is always enough. */ -- --static unsigned char --ix86_class_max_nregs (reg_class_t rclass, machine_mode mode) --{ -- if (MAYBE_INTEGER_CLASS_P (rclass)) -- { -- if (mode == XFmode) -- return (TARGET_64BIT ? 2 : 3); -- else if (mode == XCmode) -- return (TARGET_64BIT ? 4 : 6); -- else -- return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD); -- } -- else -- { -- if (COMPLEX_MODE_P (mode)) -- return 2; -- else -- return 1; -- } --} -- --/* Implement TARGET_CAN_CHANGE_MODE_CLASS. */ -- --static bool --ix86_can_change_mode_class (machine_mode from, machine_mode to, -- reg_class_t regclass) --{ -- if (from == to) -- return true; -- -- /* x87 registers can't do subreg at all, as all values are reformatted -- to extended precision. */ -- if (MAYBE_FLOAT_CLASS_P (regclass)) -- return false; -- -- if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass)) -- { -- /* Vector registers do not support QI or HImode loads. If we don't -- disallow a change to these modes, reload will assume it's ok to -- drop the subreg from (subreg:SI (reg:HI 100) 0). This affects -- the vec_dupv4hi pattern. */ -- if (GET_MODE_SIZE (from) < 4) -- return false; -- } -- -- return true; --} -- --/* Return index of MODE in the sse load/store tables. */ -- --static inline int --sse_store_index (machine_mode mode) --{ -- switch (GET_MODE_SIZE (mode)) -- { -- case 4: -- return 0; -- case 8: -- return 1; -- case 16: -- return 2; -- case 32: -- return 3; -- case 64: -- return 4; -- default: -- return -1; -- } --} -- --/* Return the cost of moving data of mode M between a -- register and memory. A value of 2 is the default; this cost is -- relative to those in `REGISTER_MOVE_COST'. -- -- This function is used extensively by register_move_cost that is used to -- build tables at startup. Make it inline in this case. -- When IN is 2, return maximum of in and out move cost. -- -- If moving between registers and memory is more expensive than -- between two registers, you should define this macro to express the -- relative cost. -- -- Model also increased moving costs of QImode registers in non -- Q_REGS classes. -- */ --static inline int --inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in) --{ -- int cost; -- if (FLOAT_CLASS_P (regclass)) -- { -- int index; -- switch (mode) -- { -- case E_SFmode: -- index = 0; -- break; -- case E_DFmode: -- index = 1; -- break; -- case E_XFmode: -- index = 2; -- break; -- default: -- return 100; -- } -- if (in == 2) -- return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]); -- return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index]; -- } -- if (SSE_CLASS_P (regclass)) -- { -- int index = sse_store_index (mode); -- if (index == -1) -- return 100; -- if (in == 2) -- return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]); -- return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index]; -- } -- if (MMX_CLASS_P (regclass)) -- { -- int index; -- switch (GET_MODE_SIZE (mode)) -- { -- case 4: -- index = 0; -- break; -- case 8: -- index = 1; -- break; -- default: -- return 100; -- } -- if (in == 2) -- return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]); -- return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index]; -- } -- switch (GET_MODE_SIZE (mode)) -- { -- case 1: -- if (Q_CLASS_P (regclass) || TARGET_64BIT) -- { -- if (!in) -- return ix86_cost->int_store[0]; -- if (TARGET_PARTIAL_REG_DEPENDENCY -- && optimize_function_for_speed_p (cfun)) -- cost = ix86_cost->movzbl_load; -- else -- cost = ix86_cost->int_load[0]; -- if (in == 2) -- return MAX (cost, ix86_cost->int_store[0]); -- return cost; -- } -- else -- { -- if (in == 2) -- return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4); -- if (in) -- return ix86_cost->movzbl_load; -- else -- return ix86_cost->int_store[0] + 4; -- } -- break; -- case 2: -- if (in == 2) -- return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]); -- return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1]; -- default: -- if (in == 2) -- cost = MAX (ix86_cost->int_load[2], ix86_cost->int_store[2]); -- else if (in) -- cost = ix86_cost->int_load[2]; -- else -- cost = ix86_cost->int_store[2]; -- /* Multiply with the number of GPR moves needed. */ -- return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD); -- } --} -- --static int --ix86_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in) --{ -- return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0); --} -- -- --/* Return the cost of moving data from a register in class CLASS1 to -- one in class CLASS2. -- -- It is not required that the cost always equal 2 when FROM is the same as TO; -- on some machines it is expensive to move between registers if they are not -- general registers. */ -- --static int --ix86_register_move_cost (machine_mode mode, reg_class_t class1_i, -- reg_class_t class2_i) --{ -- enum reg_class class1 = (enum reg_class) class1_i; -- enum reg_class class2 = (enum reg_class) class2_i; -- -- /* In case we require secondary memory, compute cost of the store followed -- by load. In order to avoid bad register allocation choices, we need -- for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */ -- -- if (inline_secondary_memory_needed (mode, class1, class2, false)) -- { -- int cost = 1; -- -- cost += inline_memory_move_cost (mode, class1, 2); -- cost += inline_memory_move_cost (mode, class2, 2); -- -- /* In case of copying from general_purpose_register we may emit multiple -- stores followed by single load causing memory size mismatch stall. -- Count this as arbitrarily high cost of 20. */ -- if (GET_MODE_BITSIZE (mode) > BITS_PER_WORD -- && TARGET_MEMORY_MISMATCH_STALL -- && targetm.class_max_nregs (class1, mode) -- > targetm.class_max_nregs (class2, mode)) -- cost += 20; -- -- /* In the case of FP/MMX moves, the registers actually overlap, and we -- have to switch modes in order to treat them differently. */ -- if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2)) -- || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1))) -- cost += 20; -- -- return cost; -- } -- -- /* Moves between SSE/MMX and integer unit are expensive. */ -- if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2) -- || SSE_CLASS_P (class1) != SSE_CLASS_P (class2)) -- -- /* ??? By keeping returned value relatively high, we limit the number -- of moves between integer and MMX/SSE registers for all targets. -- Additionally, high value prevents problem with x86_modes_tieable_p(), -- where integer modes in MMX/SSE registers are not tieable -- because of missing QImode and HImode moves to, from or between -- MMX/SSE registers. */ -- return MAX (8, MMX_CLASS_P (class1) || MMX_CLASS_P (class2) -- ? ix86_cost->mmxsse_to_integer : ix86_cost->ssemmx_to_integer); -- -- if (MAYBE_FLOAT_CLASS_P (class1)) -- return ix86_cost->fp_move; -- if (MAYBE_SSE_CLASS_P (class1)) -- { -- if (GET_MODE_BITSIZE (mode) <= 128) -- return ix86_cost->xmm_move; -- if (GET_MODE_BITSIZE (mode) <= 256) -- return ix86_cost->ymm_move; -- return ix86_cost->zmm_move; -- } -- if (MAYBE_MMX_CLASS_P (class1)) -- return ix86_cost->mmx_move; -- return 2; --} -- --/* Implement TARGET_HARD_REGNO_NREGS. This is ordinarily the length in -- words of a value of mode MODE but can be less for certain modes in -- special long registers. -- -- Actually there are no two word move instructions for consecutive -- registers. And only registers 0-3 may have mov byte instructions -- applied to them. */ -- --static unsigned int --ix86_hard_regno_nregs (unsigned int regno, machine_mode mode) --{ -- if (GENERAL_REGNO_P (regno)) -- { -- if (mode == XFmode) -- return TARGET_64BIT ? 2 : 3; -- if (mode == XCmode) -- return TARGET_64BIT ? 4 : 6; -- return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD); -- } -- if (COMPLEX_MODE_P (mode)) -- return 2; -- if (mode == V64SFmode || mode == V64SImode) -- return 4; -- return 1; --} -- --/* Implement TARGET_HARD_REGNO_MODE_OK. */ -- --static bool --ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode) --{ -- /* Flags and only flags can only hold CCmode values. */ -- if (CC_REGNO_P (regno)) -- return GET_MODE_CLASS (mode) == MODE_CC; -- if (GET_MODE_CLASS (mode) == MODE_CC -- || GET_MODE_CLASS (mode) == MODE_RANDOM -- || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT) -- return false; -- if (STACK_REGNO_P (regno)) -- return VALID_FP_MODE_P (mode); -- if (MASK_REGNO_P (regno)) -- return (VALID_MASK_REG_MODE (mode) -- || (TARGET_AVX512BW -- && VALID_MASK_AVX512BW_MODE (mode))); -- if (SSE_REGNO_P (regno)) -- { -- /* We implement the move patterns for all vector modes into and -- out of SSE registers, even when no operation instructions -- are available. */ -- -- /* For AVX-512 we allow, regardless of regno: -- - XI mode -- - any of 512-bit wide vector mode -- - any scalar mode. */ -- if (TARGET_AVX512F -- && (mode == XImode -- || VALID_AVX512F_REG_MODE (mode) -- || VALID_AVX512F_SCALAR_MODE (mode))) -- return true; -- -- /* For AVX-5124FMAPS or AVX-5124VNNIW -- allow V64SF and V64SI modes for special regnos. */ -- if ((TARGET_AVX5124FMAPS || TARGET_AVX5124VNNIW) -- && (mode == V64SFmode || mode == V64SImode) -- && MOD4_SSE_REGNO_P (regno)) -- return true; -- -- /* TODO check for QI/HI scalars. */ -- /* AVX512VL allows sse regs16+ for 128/256 bit modes. */ -- if (TARGET_AVX512VL -- && (mode == OImode -- || mode == TImode -- || VALID_AVX256_REG_MODE (mode) -- || VALID_AVX512VL_128_REG_MODE (mode))) -- return true; -- -- /* xmm16-xmm31 are only available for AVX-512. */ -- if (EXT_REX_SSE_REGNO_P (regno)) -- return false; -- -- /* OImode and AVX modes are available only when AVX is enabled. */ -- return ((TARGET_AVX -- && VALID_AVX256_REG_OR_OI_MODE (mode)) -- || VALID_SSE_REG_MODE (mode) -- || VALID_SSE2_REG_MODE (mode) -- || VALID_MMX_REG_MODE (mode) -- || VALID_MMX_REG_MODE_3DNOW (mode)); -- } -- if (MMX_REGNO_P (regno)) -- { -- /* We implement the move patterns for 3DNOW modes even in MMX mode, -- so if the register is available at all, then we can move data of -- the given mode into or out of it. */ -- return (VALID_MMX_REG_MODE (mode) -- || VALID_MMX_REG_MODE_3DNOW (mode)); -- } -- -- if (mode == QImode) -- { -- /* Take care for QImode values - they can be in non-QI regs, -- but then they do cause partial register stalls. */ -- if (ANY_QI_REGNO_P (regno)) -- return true; -- if (!TARGET_PARTIAL_REG_STALL) -- return true; -- /* LRA checks if the hard register is OK for the given mode. -- QImode values can live in non-QI regs, so we allow all -- registers here. */ -- if (lra_in_progress) -- return true; -- return !can_create_pseudo_p (); -- } -- /* We handle both integer and floats in the general purpose registers. */ -- else if (VALID_INT_MODE_P (mode)) -- return true; -- else if (VALID_FP_MODE_P (mode)) -- return true; -- else if (VALID_DFP_MODE_P (mode)) -- return true; -- /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go -- on to use that value in smaller contexts, this can easily force a -- pseudo to be allocated to GENERAL_REGS. Since this is no worse than -- supporting DImode, allow it. */ -- else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode)) -- return true; -- -- return false; --} -- --/* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The only ABI that -- saves SSE registers across calls is Win64 (thus no need to check the -- current ABI here), and with AVX enabled Win64 only guarantees that -- the low 16 bytes are saved. */ -- --static bool --ix86_hard_regno_call_part_clobbered (rtx_insn *insn ATTRIBUTE_UNUSED, -- unsigned int regno, machine_mode mode) --{ -- return SSE_REGNO_P (regno) && GET_MODE_SIZE (mode) > 16; --} -- --/* A subroutine of ix86_modes_tieable_p. Return true if MODE is a -- tieable integer mode. */ -- --static bool --ix86_tieable_integer_mode_p (machine_mode mode) --{ -- switch (mode) -- { -- case E_HImode: -- case E_SImode: -- return true; -- -- case E_QImode: -- return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL; -- -- case E_DImode: -- return TARGET_64BIT; -- -- default: -- return false; -- } --} -- --/* Implement TARGET_MODES_TIEABLE_P. -- -- Return true if MODE1 is accessible in a register that can hold MODE2 -- without copying. That is, all register classes that can hold MODE2 -- can also hold MODE1. */ -- --static bool --ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2) --{ -- if (mode1 == mode2) -- return true; -- -- if (ix86_tieable_integer_mode_p (mode1) -- && ix86_tieable_integer_mode_p (mode2)) -- return true; -- -- /* MODE2 being XFmode implies fp stack or general regs, which means we -- can tie any smaller floating point modes to it. Note that we do not -- tie this with TFmode. */ -- if (mode2 == XFmode) -- return mode1 == SFmode || mode1 == DFmode; -- -- /* MODE2 being DFmode implies fp stack, general or sse regs, which means -- that we can tie it with SFmode. */ -- if (mode2 == DFmode) -- return mode1 == SFmode; -- -- /* If MODE2 is only appropriate for an SSE register, then tie with -- any other mode acceptable to SSE registers. */ -- if (GET_MODE_SIZE (mode2) == 64 -- && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2)) -- return (GET_MODE_SIZE (mode1) == 64 -- && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1)); -- if (GET_MODE_SIZE (mode2) == 32 -- && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2)) -- return (GET_MODE_SIZE (mode1) == 32 -- && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1)); -- if (GET_MODE_SIZE (mode2) == 16 -- && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2)) -- return (GET_MODE_SIZE (mode1) == 16 -- && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1)); -- -- /* If MODE2 is appropriate for an MMX register, then tie -- with any other mode acceptable to MMX registers. */ -- if (GET_MODE_SIZE (mode2) == 8 -- && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2)) -- return (GET_MODE_SIZE (mode1) == 8 -- && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1)); -- -- return false; --} -- --/* Return the cost of moving between two registers of mode MODE. */ -- --static int --ix86_set_reg_reg_cost (machine_mode mode) --{ -- unsigned int units = UNITS_PER_WORD; -- -- switch (GET_MODE_CLASS (mode)) -- { -- default: -- break; -- -- case MODE_CC: -- units = GET_MODE_SIZE (CCmode); -- break; -- -- case MODE_FLOAT: -- if ((TARGET_SSE && mode == TFmode) -- || (TARGET_80387 && mode == XFmode) -- || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode) -- || ((TARGET_80387 || TARGET_SSE) && mode == SFmode)) -- units = GET_MODE_SIZE (mode); -- break; -- -- case MODE_COMPLEX_FLOAT: -- if ((TARGET_SSE && mode == TCmode) -- || (TARGET_80387 && mode == XCmode) -- || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode) -- || ((TARGET_80387 || TARGET_SSE) && mode == SCmode)) -- units = GET_MODE_SIZE (mode); -- break; -- -- case MODE_VECTOR_INT: -- case MODE_VECTOR_FLOAT: -- if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode)) -- || (TARGET_AVX && VALID_AVX256_REG_MODE (mode)) -- || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode)) -- || (TARGET_SSE && VALID_SSE_REG_MODE (mode)) -- || (TARGET_MMX && VALID_MMX_REG_MODE (mode))) -- units = GET_MODE_SIZE (mode); -- } -- -- /* Return the cost of moving between two registers of mode MODE, -- assuming that the move will be in pieces of at most UNITS bytes. */ -- return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units)); --} -- --/* Return cost of vector operation in MODE given that scalar version has -- COST. */ -- --static int --ix86_vec_cost (machine_mode mode, int cost) --{ -- if (!VECTOR_MODE_P (mode)) -- return cost; -- -- if (GET_MODE_BITSIZE (mode) == 128 -- && TARGET_SSE_SPLIT_REGS) -- return cost * 2; -- if (GET_MODE_BITSIZE (mode) > 128 -- && TARGET_AVX128_OPTIMAL) -- return cost * GET_MODE_BITSIZE (mode) / 128; -- return cost; --} -- --/* Return cost of multiplication in MODE. */ -- --static int --ix86_multiplication_cost (const struct processor_costs *cost, -- enum machine_mode mode) --{ -- machine_mode inner_mode = mode; -- if (VECTOR_MODE_P (mode)) -- inner_mode = GET_MODE_INNER (mode); -- -- if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) -- return inner_mode == DFmode ? cost->mulsd : cost->mulss; -- else if (X87_FLOAT_MODE_P (mode)) -- return cost->fmul; -- else if (FLOAT_MODE_P (mode)) -- return ix86_vec_cost (mode, -- inner_mode == DFmode ? cost->mulsd : cost->mulss); -- else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) -- { -- /* vpmullq is used in this case. No emulation is needed. */ -- if (TARGET_AVX512DQ) -- return ix86_vec_cost (mode, cost->mulss); -- -- /* V*QImode is emulated with 7-13 insns. */ -- if (mode == V16QImode || mode == V32QImode) -- { -- int extra = 11; -- if (TARGET_XOP && mode == V16QImode) -- extra = 5; -- else if (TARGET_SSSE3) -- extra = 6; -- return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * extra); -- } -- /* V*DImode is emulated with 5-8 insns. */ -- else if (mode == V2DImode || mode == V4DImode) -- { -- if (TARGET_XOP && mode == V2DImode) -- return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 3); -- else -- return ix86_vec_cost (mode, cost->mulss * 3 + cost->sse_op * 5); -- } -- /* Without sse4.1, we don't have PMULLD; it's emulated with 7 -- insns, including two PMULUDQ. */ -- else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX)) -- return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 5); -- else -- return ix86_vec_cost (mode, cost->mulss); -- } -- else -- return (cost->mult_init[MODE_INDEX (mode)] + cost->mult_bit * 7); --} -- --/* Return cost of multiplication in MODE. */ -- --static int --ix86_division_cost (const struct processor_costs *cost, -- enum machine_mode mode) --{ -- machine_mode inner_mode = mode; -- if (VECTOR_MODE_P (mode)) -- inner_mode = GET_MODE_INNER (mode); -- -- if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) -- return inner_mode == DFmode ? cost->divsd : cost->divss; -- else if (X87_FLOAT_MODE_P (mode)) -- return cost->fdiv; -- else if (FLOAT_MODE_P (mode)) -- return ix86_vec_cost (mode, -- inner_mode == DFmode ? cost->divsd : cost->divss); -- else -- return cost->divide[MODE_INDEX (mode)]; --} -- --/* Return cost of shift in MODE. -- If CONSTANT_OP1 is true, the op1 value is known and set in OP1_VAL. -- AND_IN_OP1 specify in op1 is result of and and SHIFT_AND_TRUNCATE -- if op1 is a result of subreg. -- -- SKIP_OP0/1 is set to true if cost of OP0/1 should be ignored. */ -- --static int --ix86_shift_rotate_cost (const struct processor_costs *cost, -- enum machine_mode mode, bool constant_op1, -- HOST_WIDE_INT op1_val, -- bool speed, -- bool and_in_op1, -- bool shift_and_truncate, -- bool *skip_op0, bool *skip_op1) --{ -- if (skip_op0) -- *skip_op0 = *skip_op1 = false; -- if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) -- { -- /* V*QImode is emulated with 1-11 insns. */ -- if (mode == V16QImode || mode == V32QImode) -- { -- int count = 11; -- if (TARGET_XOP && mode == V16QImode) -- { -- /* For XOP we use vpshab, which requires a broadcast of the -- value to the variable shift insn. For constants this -- means a V16Q const in mem; even when we can perform the -- shift with one insn set the cost to prefer paddb. */ -- if (constant_op1) -- { -- if (skip_op1) -- *skip_op1 = true; -- return ix86_vec_cost (mode, -- cost->sse_op -- + (speed -- ? 2 -- : COSTS_N_BYTES -- (GET_MODE_UNIT_SIZE (mode)))); -- } -- count = 3; -- } -- else if (TARGET_SSSE3) -- count = 7; -- return ix86_vec_cost (mode, cost->sse_op * count); -- } -- else -- return ix86_vec_cost (mode, cost->sse_op); -- } -- if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) -- { -- if (constant_op1) -- { -- if (op1_val > 32) -- return cost->shift_const + COSTS_N_INSNS (2); -- else -- return cost->shift_const * 2; -- } -- else -- { -- if (and_in_op1) -- return cost->shift_var * 2; -- else -- return cost->shift_var * 6 + COSTS_N_INSNS (2); -- } -- } -- else -- { -- if (constant_op1) -- return cost->shift_const; -- else if (shift_and_truncate) -- { -- if (skip_op0) -- *skip_op0 = *skip_op1 = true; -- /* Return the cost after shift-and truncation. */ -- return cost->shift_var; -- } -- else -- return cost->shift_var; -- } -- return cost->shift_const; --} -- --/* Compute a (partial) cost for rtx X. Return true if the complete -- cost has been computed, and false if subexpressions should be -- scanned. In either case, *TOTAL contains the cost result. */ -- --static bool --ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, -- int *total, bool speed) --{ -- rtx mask; -- enum rtx_code code = GET_CODE (x); -- enum rtx_code outer_code = (enum rtx_code) outer_code_i; -- const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost; -- int src_cost; -- -- switch (code) -- { -- case SET: -- if (register_operand (SET_DEST (x), VOIDmode) -- && register_operand (SET_SRC (x), VOIDmode)) -- { -- *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x))); -- return true; -- } -- -- if (register_operand (SET_SRC (x), VOIDmode)) -- /* Avoid potentially incorrect high cost from rtx_costs -- for non-tieable SUBREGs. */ -- src_cost = 0; -- else -- { -- src_cost = rtx_cost (SET_SRC (x), mode, SET, 1, speed); -- -- if (CONSTANT_P (SET_SRC (x))) -- /* Constant costs assume a base value of COSTS_N_INSNS (1) and add -- a small value, possibly zero for cheap constants. */ -- src_cost += COSTS_N_INSNS (1); -- } -- -- *total = src_cost + rtx_cost (SET_DEST (x), mode, SET, 0, speed); -- return true; -- -- case CONST_INT: -- case CONST: -- case LABEL_REF: -- case SYMBOL_REF: -- if (x86_64_immediate_operand (x, VOIDmode)) -- *total = 0; -- else -- *total = 1; -- return true; -- -- case CONST_DOUBLE: -- if (IS_STACK_MODE (mode)) -- switch (standard_80387_constant_p (x)) -- { -- case -1: -- case 0: -- break; -- case 1: /* 0.0 */ -- *total = 1; -- return true; -- default: /* Other constants */ -- *total = 2; -- return true; -- } -- /* FALLTHRU */ -- -- case CONST_VECTOR: -- switch (standard_sse_constant_p (x, mode)) -- { -- case 0: -- break; -- case 1: /* 0: xor eliminates false dependency */ -- *total = 0; -- return true; -- default: /* -1: cmp contains false dependency */ -- *total = 1; -- return true; -- } -- /* FALLTHRU */ -- -- case CONST_WIDE_INT: -- /* Fall back to (MEM (SYMBOL_REF)), since that's where -- it'll probably end up. Add a penalty for size. */ -- *total = (COSTS_N_INSNS (1) -- + (!TARGET_64BIT && flag_pic) -- + (GET_MODE_SIZE (mode) <= 4 -- ? 0 : GET_MODE_SIZE (mode) <= 8 ? 1 : 2)); -- return true; -- -- case ZERO_EXTEND: -- /* The zero extensions is often completely free on x86_64, so make -- it as cheap as possible. */ -- if (TARGET_64BIT && mode == DImode -- && GET_MODE (XEXP (x, 0)) == SImode) -- *total = 1; -- else if (TARGET_ZERO_EXTEND_WITH_AND) -- *total = cost->add; -- else -- *total = cost->movzx; -- return false; -- -- case SIGN_EXTEND: -- *total = cost->movsx; -- return false; -- -- case ASHIFT: -- if (SCALAR_INT_MODE_P (mode) -- && GET_MODE_SIZE (mode) < UNITS_PER_WORD -- && CONST_INT_P (XEXP (x, 1))) -- { -- HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); -- if (value == 1) -- { -- *total = cost->add; -- return false; -- } -- if ((value == 2 || value == 3) -- && cost->lea <= cost->shift_const) -- { -- *total = cost->lea; -- return false; -- } -- } -- /* FALLTHRU */ -- -- case ROTATE: -- case ASHIFTRT: -- case LSHIFTRT: -- case ROTATERT: -- bool skip_op0, skip_op1; -- *total = ix86_shift_rotate_cost (cost, mode, CONSTANT_P (XEXP (x, 1)), -- CONST_INT_P (XEXP (x, 1)) -- ? INTVAL (XEXP (x, 1)) : -1, -- speed, -- GET_CODE (XEXP (x, 1)) == AND, -- SUBREG_P (XEXP (x, 1)) -- && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND, -- &skip_op0, &skip_op1); -- if (skip_op0 || skip_op1) -- { -- if (!skip_op0) -- *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed); -- if (!skip_op1) -- *total += rtx_cost (XEXP (x, 1), mode, code, 0, speed); -- return true; -- } -- return false; -- -- case FMA: -- { -- rtx sub; -- -- gcc_assert (FLOAT_MODE_P (mode)); -- gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F); -- -- *total = ix86_vec_cost (mode, -- GET_MODE_INNER (mode) == SFmode -- ? cost->fmass : cost->fmasd); -- *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed); -- -- /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */ -- sub = XEXP (x, 0); -- if (GET_CODE (sub) == NEG) -- sub = XEXP (sub, 0); -- *total += rtx_cost (sub, mode, FMA, 0, speed); -- -- sub = XEXP (x, 2); -- if (GET_CODE (sub) == NEG) -- sub = XEXP (sub, 0); -- *total += rtx_cost (sub, mode, FMA, 2, speed); -- return true; -- } -- -- case MULT: -- if (!FLOAT_MODE_P (mode) && !VECTOR_MODE_P (mode)) -- { -- rtx op0 = XEXP (x, 0); -- rtx op1 = XEXP (x, 1); -- int nbits; -- if (CONST_INT_P (XEXP (x, 1))) -- { -- unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); -- for (nbits = 0; value != 0; value &= value - 1) -- nbits++; -- } -- else -- /* This is arbitrary. */ -- nbits = 7; -- -- /* Compute costs correctly for widening multiplication. */ -- if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND) -- && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2 -- == GET_MODE_SIZE (mode)) -- { -- int is_mulwiden = 0; -- machine_mode inner_mode = GET_MODE (op0); -- -- if (GET_CODE (op0) == GET_CODE (op1)) -- is_mulwiden = 1, op1 = XEXP (op1, 0); -- else if (CONST_INT_P (op1)) -- { -- if (GET_CODE (op0) == SIGN_EXTEND) -- is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode) -- == INTVAL (op1); -- else -- is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode)); -- } -- -- if (is_mulwiden) -- op0 = XEXP (op0, 0), mode = GET_MODE (op0); -- } -- -- *total = (cost->mult_init[MODE_INDEX (mode)] -- + nbits * cost->mult_bit -- + rtx_cost (op0, mode, outer_code, opno, speed) -- + rtx_cost (op1, mode, outer_code, opno, speed)); -- -- return true; -- } -- *total = ix86_multiplication_cost (cost, mode); -- return false; -- -- case DIV: -- case UDIV: -- case MOD: -- case UMOD: -- *total = ix86_division_cost (cost, mode); -- return false; -- -- case PLUS: -- if (GET_MODE_CLASS (mode) == MODE_INT -- && GET_MODE_SIZE (mode) <= UNITS_PER_WORD) -- { -- if (GET_CODE (XEXP (x, 0)) == PLUS -- && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT -- && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1)) -- && CONSTANT_P (XEXP (x, 1))) -- { -- HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)); -- if (val == 2 || val == 4 || val == 8) -- { -- *total = cost->lea; -- *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode, -- outer_code, opno, speed); -- *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode, -- outer_code, opno, speed); -- *total += rtx_cost (XEXP (x, 1), mode, -- outer_code, opno, speed); -- return true; -- } -- } -- else if (GET_CODE (XEXP (x, 0)) == MULT -- && CONST_INT_P (XEXP (XEXP (x, 0), 1))) -- { -- HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1)); -- if (val == 2 || val == 4 || val == 8) -- { -- *total = cost->lea; -- *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, -- outer_code, opno, speed); -- *total += rtx_cost (XEXP (x, 1), mode, -- outer_code, opno, speed); -- return true; -- } -- } -- else if (GET_CODE (XEXP (x, 0)) == PLUS) -- { -- /* Add with carry, ignore the cost of adding a carry flag. */ -- if (ix86_carry_flag_operator (XEXP (XEXP (x, 0), 0), mode)) -- *total = cost->add; -- else -- { -- *total = cost->lea; -- *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, -- outer_code, opno, speed); -- } -- -- *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode, -- outer_code, opno, speed); -- *total += rtx_cost (XEXP (x, 1), mode, -- outer_code, opno, speed); -- return true; -- } -- } -- /* FALLTHRU */ -- -- case MINUS: -- /* Subtract with borrow, ignore the cost of subtracting a carry flag. */ -- if (GET_MODE_CLASS (mode) == MODE_INT -- && GET_MODE_SIZE (mode) <= UNITS_PER_WORD -- && GET_CODE (XEXP (x, 0)) == MINUS -- && ix86_carry_flag_operator (XEXP (XEXP (x, 0), 1), mode)) -- { -- *total = cost->add; -- *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, -- outer_code, opno, speed); -- *total += rtx_cost (XEXP (x, 1), mode, -- outer_code, opno, speed); -- return true; -- } -- -- if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) -- { -- *total = cost->addss; -- return false; -- } -- else if (X87_FLOAT_MODE_P (mode)) -- { -- *total = cost->fadd; -- return false; -- } -- else if (FLOAT_MODE_P (mode)) -- { -- *total = ix86_vec_cost (mode, cost->addss); -- return false; -- } -- /* FALLTHRU */ -- -- case AND: -- case IOR: -- case XOR: -- if (GET_MODE_CLASS (mode) == MODE_INT -- && GET_MODE_SIZE (mode) > UNITS_PER_WORD) -- { -- *total = (cost->add * 2 -- + (rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed) -- << (GET_MODE (XEXP (x, 0)) != DImode)) -- + (rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed) -- << (GET_MODE (XEXP (x, 1)) != DImode))); -- return true; -- } -- /* FALLTHRU */ -- -- case NEG: -- if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) -- { -- *total = cost->sse_op; -- return false; -- } -- else if (X87_FLOAT_MODE_P (mode)) -- { -- *total = cost->fchs; -- return false; -- } -- else if (FLOAT_MODE_P (mode)) -- { -- *total = ix86_vec_cost (mode, cost->sse_op); -- return false; -- } -- /* FALLTHRU */ -- -- case NOT: -- if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) -- *total = ix86_vec_cost (mode, cost->sse_op); -- else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) -- *total = cost->add * 2; -- else -- *total = cost->add; -- return false; -- -- case COMPARE: -- if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT -- && XEXP (XEXP (x, 0), 1) == const1_rtx -- && CONST_INT_P (XEXP (XEXP (x, 0), 2)) -- && XEXP (x, 1) == const0_rtx) -- { -- /* This kind of construct is implemented using test[bwl]. -- Treat it as if we had an AND. */ -- mode = GET_MODE (XEXP (XEXP (x, 0), 0)); -- *total = (cost->add -- + rtx_cost (XEXP (XEXP (x, 0), 0), mode, outer_code, -- opno, speed) -- + rtx_cost (const1_rtx, mode, outer_code, opno, speed)); -- return true; -- } -- -- /* The embedded comparison operand is completely free. */ -- if (!general_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0))) -- && XEXP (x, 1) == const0_rtx) -- *total = 0; -- -- return false; -- -- case FLOAT_EXTEND: -- if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)) -- *total = 0; -- else -- *total = ix86_vec_cost (mode, cost->addss); -- return false; -- -- case FLOAT_TRUNCATE: -- if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)) -- *total = cost->fadd; -- else -- *total = ix86_vec_cost (mode, cost->addss); -- return false; -- -- case ABS: -- /* SSE requires memory load for the constant operand. It may make -- sense to account for this. Of course the constant operand may or -- may not be reused. */ -- if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) -- *total = cost->sse_op; -- else if (X87_FLOAT_MODE_P (mode)) -- *total = cost->fabs; -- else if (FLOAT_MODE_P (mode)) -- *total = ix86_vec_cost (mode, cost->sse_op); -- return false; -- -- case SQRT: -- if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) -- *total = mode == SFmode ? cost->sqrtss : cost->sqrtsd; -- else if (X87_FLOAT_MODE_P (mode)) -- *total = cost->fsqrt; -- else if (FLOAT_MODE_P (mode)) -- *total = ix86_vec_cost (mode, -- mode == SFmode ? cost->sqrtss : cost->sqrtsd); -- return false; -- -- case UNSPEC: -- if (XINT (x, 1) == UNSPEC_TP) -- *total = 0; -- return false; -- -- case VEC_SELECT: -- case VEC_CONCAT: -- case VEC_DUPLICATE: -- /* ??? Assume all of these vector manipulation patterns are -- recognizable. In which case they all pretty much have the -- same cost. */ -- *total = cost->sse_op; -- return true; -- case VEC_MERGE: -- mask = XEXP (x, 2); -- /* This is masked instruction, assume the same cost, -- as nonmasked variant. */ -- if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask))) -- *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed); -- else -- *total = cost->sse_op; -- return true; -- -- default: -- return false; -- } --} -- --#if TARGET_MACHO -- --static int current_machopic_label_num; -- --/* Given a symbol name and its associated stub, write out the -- definition of the stub. */ -- --void --machopic_output_stub (FILE *file, const char *symb, const char *stub) --{ -- unsigned int length; -- char *binder_name, *symbol_name, lazy_ptr_name[32]; -- int label = ++current_machopic_label_num; -- -- /* For 64-bit we shouldn't get here. */ -- gcc_assert (!TARGET_64BIT); -- -- /* Lose our funky encoding stuff so it doesn't contaminate the stub. */ -- symb = targetm.strip_name_encoding (symb); -- -- length = strlen (stub); -- binder_name = XALLOCAVEC (char, length + 32); -- GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length); -- -- length = strlen (symb); -- symbol_name = XALLOCAVEC (char, length + 32); -- GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length); -- -- sprintf (lazy_ptr_name, "L%d$lz", label); -- -- if (MACHOPIC_ATT_STUB) -- switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]); -- else if (MACHOPIC_PURE) -- switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]); -- else -- switch_to_section (darwin_sections[machopic_symbol_stub_section]); -- -- fprintf (file, "%s:\n", stub); -- fprintf (file, "\t.indirect_symbol %s\n", symbol_name); -- -- if (MACHOPIC_ATT_STUB) -- { -- fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n"); -- } -- else if (MACHOPIC_PURE) -- { -- /* PIC stub. */ -- /* 25-byte PIC stub using "CALL get_pc_thunk". */ -- rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */); -- output_set_got (tmp, NULL_RTX); /* "CALL ___.get_pc_thunk.cx". */ -- fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n", -- label, lazy_ptr_name, label); -- fprintf (file, "\tjmp\t*%%ecx\n"); -- } -- else -- fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name); -- -- /* The AT&T-style ("self-modifying") stub is not lazily bound, thus -- it needs no stub-binding-helper. */ -- if (MACHOPIC_ATT_STUB) -- return; -- -- fprintf (file, "%s:\n", binder_name); -- -- if (MACHOPIC_PURE) -- { -- fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name); -- fprintf (file, "\tpushl\t%%ecx\n"); -- } -- else -- fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name); -- -- fputs ("\tjmp\tdyld_stub_binding_helper\n", file); -- -- /* N.B. Keep the correspondence of these -- 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the -- old-pic/new-pic/non-pic stubs; altering this will break -- compatibility with existing dylibs. */ -- if (MACHOPIC_PURE) -- { -- /* 25-byte PIC stub using "CALL get_pc_thunk". */ -- switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]); -- } -- else -- /* 16-byte -mdynamic-no-pic stub. */ -- switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]); -- -- fprintf (file, "%s:\n", lazy_ptr_name); -- fprintf (file, "\t.indirect_symbol %s\n", symbol_name); -- fprintf (file, ASM_LONG "%s\n", binder_name); --} --#endif /* TARGET_MACHO */ -- --/* Order the registers for register allocator. */ -- --void --x86_order_regs_for_local_alloc (void) --{ -- int pos = 0; -- int i; -- -- /* First allocate the local general purpose registers. */ -- for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) -- if (GENERAL_REGNO_P (i) && call_used_regs[i]) -- reg_alloc_order [pos++] = i; -- -- /* Global general purpose registers. */ -- for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) -- if (GENERAL_REGNO_P (i) && !call_used_regs[i]) -- reg_alloc_order [pos++] = i; -- -- /* x87 registers come first in case we are doing FP math -- using them. */ -- if (!TARGET_SSE_MATH) -- for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) -- reg_alloc_order [pos++] = i; -- -- /* SSE registers. */ -- for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++) -- reg_alloc_order [pos++] = i; -- for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) -- reg_alloc_order [pos++] = i; -- -- /* Extended REX SSE registers. */ -- for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++) -- reg_alloc_order [pos++] = i; -- -- /* Mask register. */ -- for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++) -- reg_alloc_order [pos++] = i; -- -- /* x87 registers. */ -- if (TARGET_SSE_MATH) -- for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) -- reg_alloc_order [pos++] = i; -- -- for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++) -- reg_alloc_order [pos++] = i; -- -- /* Initialize the rest of array as we do not allocate some registers -- at all. */ -- while (pos < FIRST_PSEUDO_REGISTER) -- reg_alloc_order [pos++] = 0; --} -- --/* Handle a "callee_pop_aggregate_return" attribute; arguments as -- in struct attribute_spec handler. */ --static tree --ix86_handle_callee_pop_aggregate_return (tree *node, tree name, tree args, int, -- bool *no_add_attrs) --{ -- if (TREE_CODE (*node) != FUNCTION_TYPE -- && TREE_CODE (*node) != METHOD_TYPE -- && TREE_CODE (*node) != FIELD_DECL -- && TREE_CODE (*node) != TYPE_DECL) -- { -- warning (OPT_Wattributes, "%qE attribute only applies to functions", -- name); -- *no_add_attrs = true; -- return NULL_TREE; -- } -- if (TARGET_64BIT) -- { -- warning (OPT_Wattributes, "%qE attribute only available for 32-bit", -- name); -- *no_add_attrs = true; -- return NULL_TREE; -- } -- if (is_attribute_p ("callee_pop_aggregate_return", name)) -- { -- tree cst; -- -- cst = TREE_VALUE (args); -- if (TREE_CODE (cst) != INTEGER_CST) -- { -- warning (OPT_Wattributes, -- "%qE attribute requires an integer constant argument", -- name); -- *no_add_attrs = true; -- } -- else if (compare_tree_int (cst, 0) != 0 -- && compare_tree_int (cst, 1) != 0) -- { -- warning (OPT_Wattributes, -- "argument to %qE attribute is neither zero, nor one", -- name); -- *no_add_attrs = true; -- } -- -- return NULL_TREE; -- } -- -- return NULL_TREE; --} -- --/* Handle a "ms_abi" or "sysv" attribute; arguments as in -- struct attribute_spec.handler. */ --static tree --ix86_handle_abi_attribute (tree *node, tree name, tree, int, -- bool *no_add_attrs) --{ -- if (TREE_CODE (*node) != FUNCTION_TYPE -- && TREE_CODE (*node) != METHOD_TYPE -- && TREE_CODE (*node) != FIELD_DECL -- && TREE_CODE (*node) != TYPE_DECL) -- { -- warning (OPT_Wattributes, "%qE attribute only applies to functions", -- name); -- *no_add_attrs = true; -- return NULL_TREE; -- } -- -- /* Can combine regparm with all attributes but fastcall. */ -- if (is_attribute_p ("ms_abi", name)) -- { -- if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node))) -- { -- error ("ms_abi and sysv_abi attributes are not compatible"); -- } -- -- return NULL_TREE; -- } -- else if (is_attribute_p ("sysv_abi", name)) -- { -- if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node))) -- { -- error ("ms_abi and sysv_abi attributes are not compatible"); -- } -- -- return NULL_TREE; -- } -- -- return NULL_TREE; --} -- --/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in -- struct attribute_spec.handler. */ --static tree --ix86_handle_struct_attribute (tree *node, tree name, tree, int, -- bool *no_add_attrs) --{ -- tree *type = NULL; -- if (DECL_P (*node)) -- { -- if (TREE_CODE (*node) == TYPE_DECL) -- type = &TREE_TYPE (*node); -- } -- else -- type = node; -- -- if (!(type && RECORD_OR_UNION_TYPE_P (*type))) -- { -- warning (OPT_Wattributes, "%qE attribute ignored", -- name); -- *no_add_attrs = true; -- } -- -- else if ((is_attribute_p ("ms_struct", name) -- && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type))) -- || ((is_attribute_p ("gcc_struct", name) -- && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type))))) -- { -- warning (OPT_Wattributes, "%qE incompatible attribute ignored", -- name); -- *no_add_attrs = true; -- } -- -- return NULL_TREE; --} -- --static tree --ix86_handle_fndecl_attribute (tree *node, tree name, tree args, int, -- bool *no_add_attrs) --{ -- if (TREE_CODE (*node) != FUNCTION_DECL) -- { -- warning (OPT_Wattributes, "%qE attribute only applies to functions", -- name); -- *no_add_attrs = true; -- } -- -- if (is_attribute_p ("indirect_branch", name)) -- { -- tree cst = TREE_VALUE (args); -- if (TREE_CODE (cst) != STRING_CST) -- { -- warning (OPT_Wattributes, -- "%qE attribute requires a string constant argument", -- name); -- *no_add_attrs = true; -- } -- else if (strcmp (TREE_STRING_POINTER (cst), "keep") != 0 -- && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0 -- && strcmp (TREE_STRING_POINTER (cst), "thunk-inline") != 0 -- && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0) -- { -- warning (OPT_Wattributes, -- "argument to %qE attribute is not " -- "(keep|thunk|thunk-inline|thunk-extern)", name); -- *no_add_attrs = true; -- } -- } -- -- if (is_attribute_p ("function_return", name)) -- { -- tree cst = TREE_VALUE (args); -- if (TREE_CODE (cst) != STRING_CST) -- { -- warning (OPT_Wattributes, -- "%qE attribute requires a string constant argument", -- name); -- *no_add_attrs = true; -- } -- else if (strcmp (TREE_STRING_POINTER (cst), "keep") != 0 -- && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0 -- && strcmp (TREE_STRING_POINTER (cst), "thunk-inline") != 0 -- && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0) -- { -- warning (OPT_Wattributes, -- "argument to %qE attribute is not " -- "(keep|thunk|thunk-inline|thunk-extern)", name); -- *no_add_attrs = true; -- } -- } -- -- return NULL_TREE; --} -- --static tree --ix86_handle_no_caller_saved_registers_attribute (tree *, tree, tree, -- int, bool *) --{ -- return NULL_TREE; --} -- --static tree --ix86_handle_interrupt_attribute (tree *node, tree, tree, int, bool *) --{ -- /* DECL_RESULT and DECL_ARGUMENTS do not exist there yet, -- but the function type contains args and return type data. */ -- tree func_type = *node; -- tree return_type = TREE_TYPE (func_type); -- -- int nargs = 0; -- tree current_arg_type = TYPE_ARG_TYPES (func_type); -- while (current_arg_type -- && ! VOID_TYPE_P (TREE_VALUE (current_arg_type))) -- { -- if (nargs == 0) -- { -- if (! POINTER_TYPE_P (TREE_VALUE (current_arg_type))) -- error ("interrupt service routine should have a pointer " -- "as the first argument"); -- } -- else if (nargs == 1) -- { -- if (TREE_CODE (TREE_VALUE (current_arg_type)) != INTEGER_TYPE -- || TYPE_MODE (TREE_VALUE (current_arg_type)) != word_mode) -- error ("interrupt service routine should have %qs " -- "as the second argument", -- TARGET_64BIT -- ? (TARGET_X32 ? "unsigned long long int" -- : "unsigned long int") -- : "unsigned int"); -- } -- nargs++; -- current_arg_type = TREE_CHAIN (current_arg_type); -- } -- if (!nargs || nargs > 2) -- error ("interrupt service routine can only have a pointer argument " -- "and an optional integer argument"); -- if (! VOID_TYPE_P (return_type)) -- error ("interrupt service routine can%'t have non-void return value"); -- -- return NULL_TREE; --} -- --static bool --ix86_ms_bitfield_layout_p (const_tree record_type) --{ -- return ((TARGET_MS_BITFIELD_LAYOUT -- && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type))) -- || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type))); --} -- --/* Returns an expression indicating where the this parameter is -- located on entry to the FUNCTION. */ -- --static rtx --x86_this_parameter (tree function) --{ -- tree type = TREE_TYPE (function); -- bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0; -- int nregs; -- -- if (TARGET_64BIT) -- { -- const int *parm_regs; -- -- if (ix86_function_type_abi (type) == MS_ABI) -- parm_regs = x86_64_ms_abi_int_parameter_registers; -- else -- parm_regs = x86_64_int_parameter_registers; -- return gen_rtx_REG (Pmode, parm_regs[aggr]); -- } -- -- nregs = ix86_function_regparm (type, function); -- -- if (nregs > 0 && !stdarg_p (type)) -- { -- int regno; -- unsigned int ccvt = ix86_get_callcvt (type); -- -- if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) -- regno = aggr ? DX_REG : CX_REG; -- else if ((ccvt & IX86_CALLCVT_THISCALL) != 0) -- { -- regno = CX_REG; -- if (aggr) -- return gen_rtx_MEM (SImode, -- plus_constant (Pmode, stack_pointer_rtx, 4)); -- } -- else -- { -- regno = AX_REG; -- if (aggr) -- { -- regno = DX_REG; -- if (nregs == 1) -- return gen_rtx_MEM (SImode, -- plus_constant (Pmode, -- stack_pointer_rtx, 4)); -- } -- } -- return gen_rtx_REG (SImode, regno); -- } -- -- return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx, -- aggr ? 8 : 4)); --} -- --/* Determine whether x86_output_mi_thunk can succeed. */ -- --static bool --x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset, -- const_tree function) --{ -- /* 64-bit can handle anything. */ -- if (TARGET_64BIT) -- return true; -- -- /* For 32-bit, everything's fine if we have one free register. */ -- if (ix86_function_regparm (TREE_TYPE (function), function) < 3) -- return true; -- -- /* Need a free register for vcall_offset. */ -- if (vcall_offset) -- return false; -- -- /* Need a free register for GOT references. */ -- if (flag_pic && !targetm.binds_local_p (function)) -- return false; -- -- /* Otherwise ok. */ -- return true; --} -- --/* Output the assembler code for a thunk function. THUNK_DECL is the -- declaration for the thunk function itself, FUNCTION is the decl for -- the target function. DELTA is an immediate constant offset to be -- added to THIS. If VCALL_OFFSET is nonzero, the word at -- *(*this + vcall_offset) should be added to THIS. */ -- --static void --x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta, -- HOST_WIDE_INT vcall_offset, tree function) --{ -- rtx this_param = x86_this_parameter (function); -- rtx this_reg, tmp, fnaddr; -- unsigned int tmp_regno; -- rtx_insn *insn; -- -- if (TARGET_64BIT) -- tmp_regno = R10_REG; -- else -- { -- unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function)); -- if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) -- tmp_regno = AX_REG; -- else if ((ccvt & IX86_CALLCVT_THISCALL) != 0) -- tmp_regno = DX_REG; -- else -- tmp_regno = CX_REG; -- } -- -- emit_note (NOTE_INSN_PROLOGUE_END); -- -- /* CET is enabled, insert EB instruction. */ -- if ((flag_cf_protection & CF_BRANCH)) -- emit_insn (gen_nop_endbr ()); -- -- /* If VCALL_OFFSET, we'll need THIS in a register. Might as well -- pull it in now and let DELTA benefit. */ -- if (REG_P (this_param)) -- this_reg = this_param; -- else if (vcall_offset) -- { -- /* Put the this parameter into %eax. */ -- this_reg = gen_rtx_REG (Pmode, AX_REG); -- emit_move_insn (this_reg, this_param); -- } -- else -- this_reg = NULL_RTX; -- -- /* Adjust the this parameter by a fixed constant. */ -- if (delta) -- { -- rtx delta_rtx = GEN_INT (delta); -- rtx delta_dst = this_reg ? this_reg : this_param; -- -- if (TARGET_64BIT) -- { -- if (!x86_64_general_operand (delta_rtx, Pmode)) -- { -- tmp = gen_rtx_REG (Pmode, tmp_regno); -- emit_move_insn (tmp, delta_rtx); -- delta_rtx = tmp; -- } -- } -- -- ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx); -- } -- -- /* Adjust the this parameter by a value stored in the vtable. */ -- if (vcall_offset) -- { -- rtx vcall_addr, vcall_mem, this_mem; -- -- tmp = gen_rtx_REG (Pmode, tmp_regno); -- -- this_mem = gen_rtx_MEM (ptr_mode, this_reg); -- if (Pmode != ptr_mode) -- this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem); -- emit_move_insn (tmp, this_mem); -- -- /* Adjust the this parameter. */ -- vcall_addr = plus_constant (Pmode, tmp, vcall_offset); -- if (TARGET_64BIT -- && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true)) -- { -- rtx tmp2 = gen_rtx_REG (Pmode, R11_REG); -- emit_move_insn (tmp2, GEN_INT (vcall_offset)); -- vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2); -- } -- -- vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr); -- if (Pmode != ptr_mode) -- emit_insn (gen_addsi_1_zext (this_reg, -- gen_rtx_REG (ptr_mode, -- REGNO (this_reg)), -- vcall_mem)); -- else -- ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem); -- } -- -- /* If necessary, drop THIS back to its stack slot. */ -- if (this_reg && this_reg != this_param) -- emit_move_insn (this_param, this_reg); -- -- fnaddr = XEXP (DECL_RTL (function), 0); -- if (TARGET_64BIT) -- { -- if (!flag_pic || targetm.binds_local_p (function) -- || TARGET_PECOFF) -- ; -- else -- { -- tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL); -- tmp = gen_rtx_CONST (Pmode, tmp); -- fnaddr = gen_const_mem (Pmode, tmp); -- } -- } -- else -- { -- if (!flag_pic || targetm.binds_local_p (function)) -- ; --#if TARGET_MACHO -- else if (TARGET_MACHO) -- { -- fnaddr = machopic_indirect_call_target (DECL_RTL (function)); -- fnaddr = XEXP (fnaddr, 0); -- } --#endif /* TARGET_MACHO */ -- else -- { -- tmp = gen_rtx_REG (Pmode, CX_REG); -- output_set_got (tmp, NULL_RTX); -- -- fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT); -- fnaddr = gen_rtx_CONST (Pmode, fnaddr); -- fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr); -- fnaddr = gen_const_mem (Pmode, fnaddr); -- } -- } -- -- /* Our sibling call patterns do not allow memories, because we have no -- predicate that can distinguish between frame and non-frame memory. -- For our purposes here, we can get away with (ab)using a jump pattern, -- because we're going to do no optimization. */ -- if (MEM_P (fnaddr)) -- { -- if (sibcall_insn_operand (fnaddr, word_mode)) -- { -- fnaddr = XEXP (DECL_RTL (function), 0); -- tmp = gen_rtx_MEM (QImode, fnaddr); -- tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx); -- tmp = emit_call_insn (tmp); -- SIBLING_CALL_P (tmp) = 1; -- } -- else -- emit_jump_insn (gen_indirect_jump (fnaddr)); -- } -- else -- { -- if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr)) -- { -- // CM_LARGE_PIC always uses pseudo PIC register which is -- // uninitialized. Since FUNCTION is local and calling it -- // doesn't go through PLT, we use scratch register %r11 as -- // PIC register and initialize it here. -- pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG); -- ix86_init_large_pic_reg (tmp_regno); -- fnaddr = legitimize_pic_address (fnaddr, -- gen_rtx_REG (Pmode, tmp_regno)); -- } -- -- if (!sibcall_insn_operand (fnaddr, word_mode)) -- { -- tmp = gen_rtx_REG (word_mode, tmp_regno); -- if (GET_MODE (fnaddr) != word_mode) -- fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr); -- emit_move_insn (tmp, fnaddr); -- fnaddr = tmp; -- } -- -- tmp = gen_rtx_MEM (QImode, fnaddr); -- tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx); -- tmp = emit_call_insn (tmp); -- SIBLING_CALL_P (tmp) = 1; -- } -- emit_barrier (); -- -- /* Emit just enough of rest_of_compilation to get the insns emitted. -- Note that use_thunk calls assemble_start_function et al. */ -- insn = get_insns (); -- shorten_branches (insn); -- final_start_function (insn, file, 1); -- final (insn, file, 1); -- final_end_function (); --} -- --static void --x86_file_start (void) --{ -- default_file_start (); -- if (TARGET_16BIT) -- fputs ("\t.code16gcc\n", asm_out_file); --#if TARGET_MACHO -- darwin_file_start (); --#endif -- if (X86_FILE_START_VERSION_DIRECTIVE) -- fputs ("\t.version\t\"01.01\"\n", asm_out_file); -- if (X86_FILE_START_FLTUSED) -- fputs ("\t.global\t__fltused\n", asm_out_file); -- if (ix86_asm_dialect == ASM_INTEL) -- fputs ("\t.intel_syntax noprefix\n", asm_out_file); --} -- --int --x86_field_alignment (tree type, int computed) --{ -- machine_mode mode; -- -- if (TARGET_64BIT || TARGET_ALIGN_DOUBLE) -- return computed; -- if (TARGET_IAMCU) -- return iamcu_alignment (type, computed); -- mode = TYPE_MODE (strip_array_types (type)); -- if (mode == DFmode || mode == DCmode -- || GET_MODE_CLASS (mode) == MODE_INT -- || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT) -- return MIN (32, computed); -- return computed; --} -- --/* Print call to TARGET to FILE. */ -- --static void --x86_print_call_or_nop (FILE *file, const char *target) --{ -- if (flag_nop_mcount || !strcmp (target, "nop")) -- /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */ -- fprintf (file, "1:" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n"); -- else -- fprintf (file, "1:\tcall\t%s\n", target); --} -- --static bool --current_fentry_name (const char **name) --{ -- tree attr = lookup_attribute ("fentry_name", -- DECL_ATTRIBUTES (current_function_decl)); -- if (!attr) -- return false; -- *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr))); -- return true; --} -- --static bool --current_fentry_section (const char **name) --{ -- tree attr = lookup_attribute ("fentry_section", -- DECL_ATTRIBUTES (current_function_decl)); -- if (!attr) -- return false; -- *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr))); -- return true; --} -- --/* Output assembler code to FILE to increment profiler label # LABELNO -- for profiling a function entry. */ --void --x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) --{ -- if (cfun->machine->endbr_queued_at_entrance) -- fprintf (file, "\t%s\n", TARGET_64BIT ? "endbr64" : "endbr32"); -- -- const char *mcount_name = MCOUNT_NAME; -- -- if (current_fentry_name (&mcount_name)) -- ; -- else if (fentry_name) -- mcount_name = fentry_name; -- else if (flag_fentry) -- mcount_name = MCOUNT_NAME_BEFORE_PROLOGUE; -- -- if (TARGET_64BIT) -- { --#ifndef NO_PROFILE_COUNTERS -- fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno); --#endif -- -- if (!TARGET_PECOFF && flag_pic) -- fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name); -- else -- x86_print_call_or_nop (file, mcount_name); -- } -- else if (flag_pic) -- { --#ifndef NO_PROFILE_COUNTERS -- fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n", -- LPREFIX, labelno); --#endif -- fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name); -- } -- else -- { --#ifndef NO_PROFILE_COUNTERS -- fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n", -- LPREFIX, labelno); --#endif -- x86_print_call_or_nop (file, mcount_name); -- } -- -- if (flag_record_mcount -- || lookup_attribute ("fentry_section", -- DECL_ATTRIBUTES (current_function_decl))) -- { -- const char *sname = "__mcount_loc"; -- -- if (current_fentry_section (&sname)) -- ; -- else if (fentry_section) -- sname = fentry_section; -- -- fprintf (file, "\t.section %s, \"a\",@progbits\n", sname); -- fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long"); -- fprintf (file, "\t.previous\n"); -- } --} -- --/* We don't have exact information about the insn sizes, but we may assume -- quite safely that we are informed about all 1 byte insns and memory -- address sizes. This is enough to eliminate unnecessary padding in -- 99% of cases. */ -- --int --ix86_min_insn_size (rtx_insn *insn) --{ -- int l = 0, len; -- -- if (!INSN_P (insn) || !active_insn_p (insn)) -- return 0; -- -- /* Discard alignments we've emit and jump instructions. */ -- if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE -- && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN) -- return 0; -- -- /* Important case - calls are always 5 bytes. -- It is common to have many calls in the row. */ -- if (CALL_P (insn) -- && symbolic_reference_mentioned_p (PATTERN (insn)) -- && !SIBLING_CALL_P (insn)) -- return 5; -- len = get_attr_length (insn); -- if (len <= 1) -- return 1; -- -- /* For normal instructions we rely on get_attr_length being exact, -- with a few exceptions. */ -- if (!JUMP_P (insn)) -- { -- enum attr_type type = get_attr_type (insn); -- -- switch (type) -- { -- case TYPE_MULTI: -- if (GET_CODE (PATTERN (insn)) == ASM_INPUT -- || asm_noperands (PATTERN (insn)) >= 0) -- return 0; -- break; -- case TYPE_OTHER: -- case TYPE_FCMP: -- break; -- default: -- /* Otherwise trust get_attr_length. */ -- return len; -- } -- -- l = get_attr_length_address (insn); -- if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn))) -- l = 4; -- } -- if (l) -- return 1+l; -- else -- return 2; --} -- --#ifdef ASM_OUTPUT_MAX_SKIP_PAD -- --/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte -- window. */ -- --static void --ix86_avoid_jump_mispredicts (void) --{ -- rtx_insn *insn, *start = get_insns (); -- int nbytes = 0, njumps = 0; -- bool isjump = false; -- -- /* Look for all minimal intervals of instructions containing 4 jumps. -- The intervals are bounded by START and INSN. NBYTES is the total -- size of instructions in the interval including INSN and not including -- START. When the NBYTES is smaller than 16 bytes, it is possible -- that the end of START and INSN ends up in the same 16byte page. -- -- The smallest offset in the page INSN can start is the case where START -- ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN). -- We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN). -- -- Don't consider asm goto as jump, while it can contain a jump, it doesn't -- have to, control transfer to label(s) can be performed through other -- means, and also we estimate minimum length of all asm stmts as 0. */ -- for (insn = start; insn; insn = NEXT_INSN (insn)) -- { -- int min_size; -- -- if (LABEL_P (insn)) -- { -- align_flags alignment = label_to_alignment (insn); -- int align = alignment.levels[0].log; -- int max_skip = alignment.levels[0].maxskip; -- -- if (max_skip > 15) -- max_skip = 15; -- /* If align > 3, only up to 16 - max_skip - 1 bytes can be -- already in the current 16 byte page, because otherwise -- ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer -- bytes to reach 16 byte boundary. */ -- if (align <= 0 -- || (align <= 3 && max_skip != (1 << align) - 1)) -- max_skip = 0; -- if (dump_file) -- fprintf (dump_file, "Label %i with max_skip %i\n", -- INSN_UID (insn), max_skip); -- if (max_skip) -- { -- while (nbytes + max_skip >= 16) -- { -- start = NEXT_INSN (start); -- if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0) -- || CALL_P (start)) -- njumps--, isjump = true; -- else -- isjump = false; -- nbytes -= ix86_min_insn_size (start); -- } -- } -- continue; -- } -- -- min_size = ix86_min_insn_size (insn); -- nbytes += min_size; -- if (dump_file) -- fprintf (dump_file, "Insn %i estimated to %i bytes\n", -- INSN_UID (insn), min_size); -- if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0) -- || CALL_P (insn)) -- njumps++; -- else -- continue; -- -- while (njumps > 3) -- { -- start = NEXT_INSN (start); -- if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0) -- || CALL_P (start)) -- njumps--, isjump = true; -- else -- isjump = false; -- nbytes -= ix86_min_insn_size (start); -- } -- gcc_assert (njumps >= 0); -- if (dump_file) -- fprintf (dump_file, "Interval %i to %i has %i bytes\n", -- INSN_UID (start), INSN_UID (insn), nbytes); -- -- if (njumps == 3 && isjump && nbytes < 16) -- { -- int padsize = 15 - nbytes + ix86_min_insn_size (insn); -- -- if (dump_file) -- fprintf (dump_file, "Padding insn %i by %i bytes!\n", -- INSN_UID (insn), padsize); -- emit_insn_before (gen_pad (GEN_INT (padsize)), insn); -- } -- } --} --#endif -- --/* AMD Athlon works faster -- when RET is not destination of conditional jump or directly preceded -- by other jump instruction. We avoid the penalty by inserting NOP just -- before the RET instructions in such cases. */ --static void --ix86_pad_returns (void) --{ -- edge e; -- edge_iterator ei; -- -- FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) -- { -- basic_block bb = e->src; -- rtx_insn *ret = BB_END (bb); -- rtx_insn *prev; -- bool replace = false; -- -- if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret)) -- || optimize_bb_for_size_p (bb)) -- continue; -- for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev)) -- if (active_insn_p (prev) || LABEL_P (prev)) -- break; -- if (prev && LABEL_P (prev)) -- { -- edge e; -- edge_iterator ei; -- -- FOR_EACH_EDGE (e, ei, bb->preds) -- if (EDGE_FREQUENCY (e) && e->src->index >= 0 -- && !(e->flags & EDGE_FALLTHRU)) -- { -- replace = true; -- break; -- } -- } -- if (!replace) -- { -- prev = prev_active_insn (ret); -- if (prev -- && ((JUMP_P (prev) && any_condjump_p (prev)) -- || CALL_P (prev))) -- replace = true; -- /* Empty functions get branch mispredict even when -- the jump destination is not visible to us. */ -- if (!prev && !optimize_function_for_size_p (cfun)) -- replace = true; -- } -- if (replace) -- { -- emit_jump_insn_before (gen_simple_return_internal_long (), ret); -- delete_insn (ret); -- } -- } --} -- --/* Count the minimum number of instructions in BB. Return 4 if the -- number of instructions >= 4. */ -- --static int --ix86_count_insn_bb (basic_block bb) --{ -- rtx_insn *insn; -- int insn_count = 0; -- -- /* Count number of instructions in this block. Return 4 if the number -- of instructions >= 4. */ -- FOR_BB_INSNS (bb, insn) -- { -- /* Only happen in exit blocks. */ -- if (JUMP_P (insn) -- && ANY_RETURN_P (PATTERN (insn))) -- break; -- -- if (NONDEBUG_INSN_P (insn) -- && GET_CODE (PATTERN (insn)) != USE -- && GET_CODE (PATTERN (insn)) != CLOBBER) -- { -- insn_count++; -- if (insn_count >= 4) -- return insn_count; -- } -- } -- -- return insn_count; --} -- -- --/* Count the minimum number of instructions in code path in BB. -- Return 4 if the number of instructions >= 4. */ -- --static int --ix86_count_insn (basic_block bb) --{ -- edge e; -- edge_iterator ei; -- int min_prev_count; -- -- /* Only bother counting instructions along paths with no -- more than 2 basic blocks between entry and exit. Given -- that BB has an edge to exit, determine if a predecessor -- of BB has an edge from entry. If so, compute the number -- of instructions in the predecessor block. If there -- happen to be multiple such blocks, compute the minimum. */ -- min_prev_count = 4; -- FOR_EACH_EDGE (e, ei, bb->preds) -- { -- edge prev_e; -- edge_iterator prev_ei; -- -- if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)) -- { -- min_prev_count = 0; -- break; -- } -- FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds) -- { -- if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)) -- { -- int count = ix86_count_insn_bb (e->src); -- if (count < min_prev_count) -- min_prev_count = count; -- break; -- } -- } -- } -- -- if (min_prev_count < 4) -- min_prev_count += ix86_count_insn_bb (bb); -- -- return min_prev_count; --} -- --/* Pad short function to 4 instructions. */ -- --static void --ix86_pad_short_function (void) --{ -- edge e; -- edge_iterator ei; -- -- FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) -- { -- rtx_insn *ret = BB_END (e->src); -- if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret))) -- { -- int insn_count = ix86_count_insn (e->src); -- -- /* Pad short function. */ -- if (insn_count < 4) -- { -- rtx_insn *insn = ret; -- -- /* Find epilogue. */ -- while (insn -- && (!NOTE_P (insn) -- || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG)) -- insn = PREV_INSN (insn); -- -- if (!insn) -- insn = ret; -- -- /* Two NOPs count as one instruction. */ -- insn_count = 2 * (4 - insn_count); -- emit_insn_before (gen_nops (GEN_INT (insn_count)), insn); -- } -- } -- } --} -- --/* Fix up a Windows system unwinder issue. If an EH region falls through into -- the epilogue, the Windows system unwinder will apply epilogue logic and -- produce incorrect offsets. This can be avoided by adding a nop between -- the last insn that can throw and the first insn of the epilogue. */ -- --static void --ix86_seh_fixup_eh_fallthru (void) --{ -- edge e; -- edge_iterator ei; -- -- FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) -- { -- rtx_insn *insn, *next; -- -- /* Find the beginning of the epilogue. */ -- for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn)) -- if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG) -- break; -- if (insn == NULL) -- continue; -- -- /* We only care about preceding insns that can throw. */ -- insn = prev_active_insn (insn); -- if (insn == NULL || !can_throw_internal (insn)) -- continue; -- -- /* Do not separate calls from their debug information. */ -- for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next)) -- if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION) -- insn = next; -- else -- break; -- -- emit_insn_after (gen_nops (const1_rtx), insn); -- } --} -- --/* Implement machine specific optimizations. We implement padding of returns -- for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */ --static void --ix86_reorg (void) --{ -- /* We are freeing block_for_insn in the toplev to keep compatibility -- with old MDEP_REORGS that are not CFG based. Recompute it now. */ -- compute_bb_for_insn (); -- -- if (TARGET_SEH && current_function_has_exception_handlers ()) -- ix86_seh_fixup_eh_fallthru (); -- -- if (optimize && optimize_function_for_speed_p (cfun)) -- { -- if (TARGET_PAD_SHORT_FUNCTION) -- ix86_pad_short_function (); -- else if (TARGET_PAD_RETURNS) -- ix86_pad_returns (); --#ifdef ASM_OUTPUT_MAX_SKIP_PAD -- if (TARGET_FOUR_JUMP_LIMIT) -- ix86_avoid_jump_mispredicts (); --#endif -- } --} -- --/* Return nonzero when QImode register that must be represented via REX prefix -- is used. */ --bool --x86_extended_QIreg_mentioned_p (rtx_insn *insn) --{ -- int i; -- extract_insn_cached (insn); -- for (i = 0; i < recog_data.n_operands; i++) -- if (GENERAL_REG_P (recog_data.operand[i]) -- && !QI_REGNO_P (REGNO (recog_data.operand[i]))) -- return true; -- return false; --} -- --/* Return true when INSN mentions register that must be encoded using REX -- prefix. */ --bool --x86_extended_reg_mentioned_p (rtx insn) --{ -- subrtx_iterator::array_type array; -- FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST) -- { -- const_rtx x = *iter; -- if (REG_P (x) -- && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x)))) -- return true; -- } -- return false; --} -- --/* If profitable, negate (without causing overflow) integer constant -- of mode MODE at location LOC. Return true in this case. */ --bool --x86_maybe_negate_const_int (rtx *loc, machine_mode mode) --{ -- HOST_WIDE_INT val; -- -- if (!CONST_INT_P (*loc)) -- return false; -- -- switch (mode) -- { -- case E_DImode: -- /* DImode x86_64 constants must fit in 32 bits. */ -- gcc_assert (x86_64_immediate_operand (*loc, mode)); -- -- mode = SImode; -- break; -- -- case E_SImode: -- case E_HImode: -- case E_QImode: -- break; -- -- default: -- gcc_unreachable (); -- } -- -- /* Avoid overflows. */ -- if (mode_signbit_p (mode, *loc)) -- return false; -- -- val = INTVAL (*loc); -- -- /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'. -- Exceptions: -128 encodes smaller than 128, so swap sign and op. */ -- if ((val < 0 && val != -128) -- || val == 128) -- { -- *loc = GEN_INT (-val); -- return true; -- } -- -- return false; --} -- --/* Generate an unsigned DImode/SImode to FP conversion. This is the same code -- optabs would emit if we didn't have TFmode patterns. */ -- --void --x86_emit_floatuns (rtx operands[2]) --{ -- rtx_code_label *neglab, *donelab; -- rtx i0, i1, f0, in, out; -- machine_mode mode, inmode; -- -- inmode = GET_MODE (operands[1]); -- gcc_assert (inmode == SImode || inmode == DImode); -- -- out = operands[0]; -- in = force_reg (inmode, operands[1]); -- mode = GET_MODE (out); -- neglab = gen_label_rtx (); -- donelab = gen_label_rtx (); -- f0 = gen_reg_rtx (mode); -- -- emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab); -- -- expand_float (out, in, 0); -- -- emit_jump_insn (gen_jump (donelab)); -- emit_barrier (); -- -- emit_label (neglab); -- -- i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL, -- 1, OPTAB_DIRECT); -- i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL, -- 1, OPTAB_DIRECT); -- i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT); -- -- expand_float (f0, i0, 0); -- -- emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0))); -- -- emit_label (donelab); --} -- --static bool canonicalize_perm (struct expand_vec_perm_d *d); --static bool expand_vec_perm_1 (struct expand_vec_perm_d *d); --static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d); --static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool); -- --/* Get a vector mode of the same size as the original but with elements -- twice as wide. This is only guaranteed to apply to integral vectors. */ -- --static inline machine_mode --get_mode_wider_vector (machine_mode o) --{ -- /* ??? Rely on the ordering that genmodes.c gives to vectors. */ -- machine_mode n = GET_MODE_WIDER_MODE (o).require (); -- gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2); -- gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n)); -- return n; --} -- --/* A subroutine of ix86_expand_vector_init_duplicate. Tries to -- fill target with val via vec_duplicate. */ -- --static bool --ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val) --{ -- bool ok; -- rtx_insn *insn; -- rtx dup; -- -- /* First attempt to recognize VAL as-is. */ -- dup = gen_vec_duplicate (mode, val); -- insn = emit_insn (gen_rtx_SET (target, dup)); -- if (recog_memoized (insn) < 0) -- { -- rtx_insn *seq; -- machine_mode innermode = GET_MODE_INNER (mode); -- rtx reg; -- -- /* If that fails, force VAL into a register. */ -- -- start_sequence (); -- reg = force_reg (innermode, val); -- if (GET_MODE (reg) != innermode) -- reg = gen_lowpart (innermode, reg); -- SET_SRC (PATTERN (insn)) = gen_vec_duplicate (mode, reg); -- seq = get_insns (); -- end_sequence (); -- if (seq) -- emit_insn_before (seq, insn); -- -- ok = recog_memoized (insn) >= 0; -- gcc_assert (ok); -- } -- return true; --} -- --/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector -- with all elements equal to VAR. Return true if successful. */ -- --static bool --ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode, -- rtx target, rtx val) --{ -- bool ok; -- -- switch (mode) -- { -- case E_V2SImode: -- case E_V2SFmode: -- if (!mmx_ok) -- return false; -- /* FALLTHRU */ -- -- case E_V4DFmode: -- case E_V4DImode: -- case E_V8SFmode: -- case E_V8SImode: -- case E_V2DFmode: -- case E_V2DImode: -- case E_V4SFmode: -- case E_V4SImode: -- case E_V16SImode: -- case E_V8DImode: -- case E_V16SFmode: -- case E_V8DFmode: -- return ix86_vector_duplicate_value (mode, target, val); -- -- case E_V4HImode: -- if (!mmx_ok) -- return false; -- if (TARGET_SSE || TARGET_3DNOW_A) -- { -- rtx x; -- -- val = gen_lowpart (SImode, val); -- x = gen_rtx_TRUNCATE (HImode, val); -- x = gen_rtx_VEC_DUPLICATE (mode, x); -- emit_insn (gen_rtx_SET (target, x)); -- return true; -- } -- goto widen; -- -- case E_V8QImode: -- if (!mmx_ok) -- return false; -- goto widen; -- -- case E_V8HImode: -- if (TARGET_AVX2) -- return ix86_vector_duplicate_value (mode, target, val); -- -- if (TARGET_SSE2) -- { -- struct expand_vec_perm_d dperm; -- rtx tmp1, tmp2; -- -- permute: -- memset (&dperm, 0, sizeof (dperm)); -- dperm.target = target; -- dperm.vmode = mode; -- dperm.nelt = GET_MODE_NUNITS (mode); -- dperm.op0 = dperm.op1 = gen_reg_rtx (mode); -- dperm.one_operand_p = true; -- -- /* Extend to SImode using a paradoxical SUBREG. */ -- tmp1 = gen_reg_rtx (SImode); -- emit_move_insn (tmp1, gen_lowpart (SImode, val)); -- -- /* Insert the SImode value as low element of a V4SImode vector. */ -- tmp2 = gen_reg_rtx (V4SImode); -- emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1)); -- emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2)); -- -- ok = (expand_vec_perm_1 (&dperm) -- || expand_vec_perm_broadcast_1 (&dperm)); -- gcc_assert (ok); -- return ok; -- } -- goto widen; -- -- case E_V16QImode: -- if (TARGET_AVX2) -- return ix86_vector_duplicate_value (mode, target, val); -- -- if (TARGET_SSE2) -- goto permute; -- goto widen; -- -- widen: -- /* Replicate the value once into the next wider mode and recurse. */ -- { -- machine_mode smode, wsmode, wvmode; -- rtx x; -- -- smode = GET_MODE_INNER (mode); -- wvmode = get_mode_wider_vector (mode); -- wsmode = GET_MODE_INNER (wvmode); -- -- val = convert_modes (wsmode, smode, val, true); -- x = expand_simple_binop (wsmode, ASHIFT, val, -- GEN_INT (GET_MODE_BITSIZE (smode)), -- NULL_RTX, 1, OPTAB_LIB_WIDEN); -- val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN); -- -- x = gen_reg_rtx (wvmode); -- ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val); -- gcc_assert (ok); -- emit_move_insn (target, gen_lowpart (GET_MODE (target), x)); -- return ok; -- } -- -- case E_V16HImode: -- case E_V32QImode: -- if (TARGET_AVX2) -- return ix86_vector_duplicate_value (mode, target, val); -- else -- { -- machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode); -- rtx x = gen_reg_rtx (hvmode); -- -- ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val); -- gcc_assert (ok); -- -- x = gen_rtx_VEC_CONCAT (mode, x, x); -- emit_insn (gen_rtx_SET (target, x)); -- } -- return true; -- -- case E_V64QImode: -- case E_V32HImode: -- if (TARGET_AVX512BW) -- return ix86_vector_duplicate_value (mode, target, val); -- else -- { -- machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode); -- rtx x = gen_reg_rtx (hvmode); -- -- ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val); -- gcc_assert (ok); -- -- x = gen_rtx_VEC_CONCAT (mode, x, x); -- emit_insn (gen_rtx_SET (target, x)); -- } -- return true; -- -- default: -- return false; -- } --} -- --/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector -- whose ONE_VAR element is VAR, and other elements are zero. Return true -- if successful. */ -- --static bool --ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode, -- rtx target, rtx var, int one_var) --{ -- machine_mode vsimode; -- rtx new_target; -- rtx x, tmp; -- bool use_vector_set = false; -- rtx (*gen_vec_set_0) (rtx, rtx, rtx) = NULL; -- -- switch (mode) -- { -- case E_V2DImode: -- /* For SSE4.1, we normally use vector set. But if the second -- element is zero and inter-unit moves are OK, we use movq -- instead. */ -- use_vector_set = (TARGET_64BIT && TARGET_SSE4_1 -- && !(TARGET_INTER_UNIT_MOVES_TO_VEC -- && one_var == 0)); -- break; -- case E_V16QImode: -- case E_V4SImode: -- case E_V4SFmode: -- use_vector_set = TARGET_SSE4_1; -- break; -- case E_V8HImode: -- use_vector_set = TARGET_SSE2; -- break; -- case E_V4HImode: -- use_vector_set = TARGET_SSE || TARGET_3DNOW_A; -- break; -- case E_V32QImode: -- case E_V16HImode: -- use_vector_set = TARGET_AVX; -- break; -- case E_V8SImode: -- use_vector_set = TARGET_AVX; -- gen_vec_set_0 = gen_vec_setv8si_0; -- break; -- case E_V8SFmode: -- use_vector_set = TARGET_AVX; -- gen_vec_set_0 = gen_vec_setv8sf_0; -- break; -- case E_V4DFmode: -- use_vector_set = TARGET_AVX; -- gen_vec_set_0 = gen_vec_setv4df_0; -- break; -- case E_V4DImode: -- /* Use ix86_expand_vector_set in 64bit mode only. */ -- use_vector_set = TARGET_AVX && TARGET_64BIT; -- gen_vec_set_0 = gen_vec_setv4di_0; -- break; -- case E_V16SImode: -- use_vector_set = TARGET_AVX512F && one_var == 0; -- gen_vec_set_0 = gen_vec_setv16si_0; -- break; -- case E_V16SFmode: -- use_vector_set = TARGET_AVX512F && one_var == 0; -- gen_vec_set_0 = gen_vec_setv16sf_0; -- break; -- case E_V8DFmode: -- use_vector_set = TARGET_AVX512F && one_var == 0; -- gen_vec_set_0 = gen_vec_setv8df_0; -- break; -- case E_V8DImode: -- /* Use ix86_expand_vector_set in 64bit mode only. */ -- use_vector_set = TARGET_AVX512F && TARGET_64BIT && one_var == 0; -- gen_vec_set_0 = gen_vec_setv8di_0; -- break; -- default: -- break; -- } -- -- if (use_vector_set) -- { -- if (gen_vec_set_0 && one_var == 0) -- { -- var = force_reg (GET_MODE_INNER (mode), var); -- emit_insn (gen_vec_set_0 (target, CONST0_RTX (mode), var)); -- return true; -- } -- emit_insn (gen_rtx_SET (target, CONST0_RTX (mode))); -- var = force_reg (GET_MODE_INNER (mode), var); -- ix86_expand_vector_set (mmx_ok, target, var, one_var); -- return true; -- } -- -- switch (mode) -- { -- case E_V2SFmode: -- case E_V2SImode: -- if (!mmx_ok) -- return false; -- /* FALLTHRU */ -- -- case E_V2DFmode: -- case E_V2DImode: -- if (one_var != 0) -- return false; -- var = force_reg (GET_MODE_INNER (mode), var); -- x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode))); -- emit_insn (gen_rtx_SET (target, x)); -- return true; -- -- case E_V4SFmode: -- case E_V4SImode: -- if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER) -- new_target = gen_reg_rtx (mode); -- else -- new_target = target; -- var = force_reg (GET_MODE_INNER (mode), var); -- x = gen_rtx_VEC_DUPLICATE (mode, var); -- x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx); -- emit_insn (gen_rtx_SET (new_target, x)); -- if (one_var != 0) -- { -- /* We need to shuffle the value to the correct position, so -- create a new pseudo to store the intermediate result. */ -- -- /* With SSE2, we can use the integer shuffle insns. */ -- if (mode != V4SFmode && TARGET_SSE2) -- { -- emit_insn (gen_sse2_pshufd_1 (new_target, new_target, -- const1_rtx, -- GEN_INT (one_var == 1 ? 0 : 1), -- GEN_INT (one_var == 2 ? 0 : 1), -- GEN_INT (one_var == 3 ? 0 : 1))); -- if (target != new_target) -- emit_move_insn (target, new_target); -- return true; -- } -- -- /* Otherwise convert the intermediate result to V4SFmode and -- use the SSE1 shuffle instructions. */ -- if (mode != V4SFmode) -- { -- tmp = gen_reg_rtx (V4SFmode); -- emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target)); -- } -- else -- tmp = new_target; -- -- emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp, -- const1_rtx, -- GEN_INT (one_var == 1 ? 0 : 1), -- GEN_INT (one_var == 2 ? 0+4 : 1+4), -- GEN_INT (one_var == 3 ? 0+4 : 1+4))); -- -- if (mode != V4SFmode) -- emit_move_insn (target, gen_lowpart (V4SImode, tmp)); -- else if (tmp != target) -- emit_move_insn (target, tmp); -- } -- else if (target != new_target) -- emit_move_insn (target, new_target); -- return true; -- -- case E_V8HImode: -- case E_V16QImode: -- vsimode = V4SImode; -- goto widen; -- case E_V4HImode: -- case E_V8QImode: -- if (!mmx_ok) -- return false; -- vsimode = V2SImode; -- goto widen; -- widen: -- if (one_var != 0) -- return false; -- -- /* Zero extend the variable element to SImode and recurse. */ -- var = convert_modes (SImode, GET_MODE_INNER (mode), var, true); -- -- x = gen_reg_rtx (vsimode); -- if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x, -- var, one_var)) -- gcc_unreachable (); -- -- emit_move_insn (target, gen_lowpart (mode, x)); -- return true; -- -- default: -- return false; -- } --} -- --/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector -- consisting of the values in VALS. It is known that all elements -- except ONE_VAR are constants. Return true if successful. */ -- --static bool --ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode, -- rtx target, rtx vals, int one_var) --{ -- rtx var = XVECEXP (vals, 0, one_var); -- machine_mode wmode; -- rtx const_vec, x; -- -- const_vec = copy_rtx (vals); -- XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode)); -- const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0)); -- -- switch (mode) -- { -- case E_V2DFmode: -- case E_V2DImode: -- case E_V2SFmode: -- case E_V2SImode: -- /* For the two element vectors, it's just as easy to use -- the general case. */ -- return false; -- -- case E_V4DImode: -- /* Use ix86_expand_vector_set in 64bit mode only. */ -- if (!TARGET_64BIT) -- return false; -- /* FALLTHRU */ -- case E_V4DFmode: -- case E_V8SFmode: -- case E_V8SImode: -- case E_V16HImode: -- case E_V32QImode: -- case E_V4SFmode: -- case E_V4SImode: -- case E_V8HImode: -- case E_V4HImode: -- break; -- -- case E_V16QImode: -- if (TARGET_SSE4_1) -- break; -- wmode = V8HImode; -- goto widen; -- case E_V8QImode: -- wmode = V4HImode; -- goto widen; -- widen: -- /* There's no way to set one QImode entry easily. Combine -- the variable value with its adjacent constant value, and -- promote to an HImode set. */ -- x = XVECEXP (vals, 0, one_var ^ 1); -- if (one_var & 1) -- { -- var = convert_modes (HImode, QImode, var, true); -- var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8), -- NULL_RTX, 1, OPTAB_LIB_WIDEN); -- x = GEN_INT (INTVAL (x) & 0xff); -- } -- else -- { -- var = convert_modes (HImode, QImode, var, true); -- x = gen_int_mode (UINTVAL (x) << 8, HImode); -- } -- if (x != const0_rtx) -- var = expand_simple_binop (HImode, IOR, var, x, var, -- 1, OPTAB_LIB_WIDEN); -- -- x = gen_reg_rtx (wmode); -- emit_move_insn (x, gen_lowpart (wmode, const_vec)); -- ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1); -- -- emit_move_insn (target, gen_lowpart (mode, x)); -- return true; -- -- default: -- return false; -- } -- -- emit_move_insn (target, const_vec); -- ix86_expand_vector_set (mmx_ok, target, var, one_var); -- return true; --} -- --/* A subroutine of ix86_expand_vector_init_general. Use vector -- concatenate to handle the most general case: all values variable, -- and none identical. */ -- --static void --ix86_expand_vector_init_concat (machine_mode mode, -- rtx target, rtx *ops, int n) --{ -- machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode; -- rtx first[16], second[8], third[4]; -- rtvec v; -- int i, j; -- -- switch (n) -- { -- case 2: -- switch (mode) -- { -- case E_V16SImode: -- cmode = V8SImode; -- break; -- case E_V16SFmode: -- cmode = V8SFmode; -- break; -- case E_V8DImode: -- cmode = V4DImode; -- break; -- case E_V8DFmode: -- cmode = V4DFmode; -- break; -- case E_V8SImode: -- cmode = V4SImode; -- break; -- case E_V8SFmode: -- cmode = V4SFmode; -- break; -- case E_V4DImode: -- cmode = V2DImode; -- break; -- case E_V4DFmode: -- cmode = V2DFmode; -- break; -- case E_V4SImode: -- cmode = V2SImode; -- break; -- case E_V4SFmode: -- cmode = V2SFmode; -- break; -- case E_V2DImode: -- cmode = DImode; -- break; -- case E_V2SImode: -- cmode = SImode; -- break; -- case E_V2DFmode: -- cmode = DFmode; -- break; -- case E_V2SFmode: -- cmode = SFmode; -- break; -- default: -- gcc_unreachable (); -- } -- -- if (!register_operand (ops[1], cmode)) -- ops[1] = force_reg (cmode, ops[1]); -- if (!register_operand (ops[0], cmode)) -- ops[0] = force_reg (cmode, ops[0]); -- emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, ops[0], -- ops[1]))); -- break; -- -- case 4: -- switch (mode) -- { -- case E_V4DImode: -- cmode = V2DImode; -- break; -- case E_V4DFmode: -- cmode = V2DFmode; -- break; -- case E_V4SImode: -- cmode = V2SImode; -- break; -- case E_V4SFmode: -- cmode = V2SFmode; -- break; -- default: -- gcc_unreachable (); -- } -- goto half; -- -- case 8: -- switch (mode) -- { -- case E_V8DImode: -- cmode = V2DImode; -- hmode = V4DImode; -- break; -- case E_V8DFmode: -- cmode = V2DFmode; -- hmode = V4DFmode; -- break; -- case E_V8SImode: -- cmode = V2SImode; -- hmode = V4SImode; -- break; -- case E_V8SFmode: -- cmode = V2SFmode; -- hmode = V4SFmode; -- break; -- default: -- gcc_unreachable (); -- } -- goto half; -- -- case 16: -- switch (mode) -- { -- case E_V16SImode: -- cmode = V2SImode; -- hmode = V4SImode; -- gmode = V8SImode; -- break; -- case E_V16SFmode: -- cmode = V2SFmode; -- hmode = V4SFmode; -- gmode = V8SFmode; -- break; -- default: -- gcc_unreachable (); -- } -- goto half; -- --half: -- /* FIXME: We process inputs backward to help RA. PR 36222. */ -- i = n - 1; -- j = (n >> 1) - 1; -- for (; i > 0; i -= 2, j--) -- { -- first[j] = gen_reg_rtx (cmode); -- v = gen_rtvec (2, ops[i - 1], ops[i]); -- ix86_expand_vector_init (false, first[j], -- gen_rtx_PARALLEL (cmode, v)); -- } -- -- n >>= 1; -- if (n > 4) -- { -- gcc_assert (hmode != VOIDmode); -- gcc_assert (gmode != VOIDmode); -- for (i = j = 0; i < n; i += 2, j++) -- { -- second[j] = gen_reg_rtx (hmode); -- ix86_expand_vector_init_concat (hmode, second [j], -- &first [i], 2); -- } -- n >>= 1; -- for (i = j = 0; i < n; i += 2, j++) -- { -- third[j] = gen_reg_rtx (gmode); -- ix86_expand_vector_init_concat (gmode, third[j], -- &second[i], 2); -- } -- n >>= 1; -- ix86_expand_vector_init_concat (mode, target, third, n); -- } -- else if (n > 2) -- { -- gcc_assert (hmode != VOIDmode); -- for (i = j = 0; i < n; i += 2, j++) -- { -- second[j] = gen_reg_rtx (hmode); -- ix86_expand_vector_init_concat (hmode, second [j], -- &first [i], 2); -- } -- n >>= 1; -- ix86_expand_vector_init_concat (mode, target, second, n); -- } -- else -- ix86_expand_vector_init_concat (mode, target, first, n); -- break; -- -- default: -- gcc_unreachable (); -- } --} -- --/* A subroutine of ix86_expand_vector_init_general. Use vector -- interleave to handle the most general case: all values variable, -- and none identical. */ -- --static void --ix86_expand_vector_init_interleave (machine_mode mode, -- rtx target, rtx *ops, int n) --{ -- machine_mode first_imode, second_imode, third_imode, inner_mode; -- int i, j; -- rtx op0, op1; -- rtx (*gen_load_even) (rtx, rtx, rtx); -- rtx (*gen_interleave_first_low) (rtx, rtx, rtx); -- rtx (*gen_interleave_second_low) (rtx, rtx, rtx); -- -- switch (mode) -- { -- case E_V8HImode: -- gen_load_even = gen_vec_setv8hi; -- gen_interleave_first_low = gen_vec_interleave_lowv4si; -- gen_interleave_second_low = gen_vec_interleave_lowv2di; -- inner_mode = HImode; -- first_imode = V4SImode; -- second_imode = V2DImode; -- third_imode = VOIDmode; -- break; -- case E_V16QImode: -- gen_load_even = gen_vec_setv16qi; -- gen_interleave_first_low = gen_vec_interleave_lowv8hi; -- gen_interleave_second_low = gen_vec_interleave_lowv4si; -- inner_mode = QImode; -- first_imode = V8HImode; -- second_imode = V4SImode; -- third_imode = V2DImode; -- break; -- default: -- gcc_unreachable (); -- } -- -- for (i = 0; i < n; i++) -- { -- /* Extend the odd elment to SImode using a paradoxical SUBREG. */ -- op0 = gen_reg_rtx (SImode); -- emit_move_insn (op0, gen_lowpart (SImode, ops [i + i])); -- -- /* Insert the SImode value as low element of V4SImode vector. */ -- op1 = gen_reg_rtx (V4SImode); -- op0 = gen_rtx_VEC_MERGE (V4SImode, -- gen_rtx_VEC_DUPLICATE (V4SImode, -- op0), -- CONST0_RTX (V4SImode), -- const1_rtx); -- emit_insn (gen_rtx_SET (op1, op0)); -- -- /* Cast the V4SImode vector back to a vector in orignal mode. */ -- op0 = gen_reg_rtx (mode); -- emit_move_insn (op0, gen_lowpart (mode, op1)); -- -- /* Load even elements into the second position. */ -- emit_insn (gen_load_even (op0, -- force_reg (inner_mode, -- ops [i + i + 1]), -- const1_rtx)); -- -- /* Cast vector to FIRST_IMODE vector. */ -- ops[i] = gen_reg_rtx (first_imode); -- emit_move_insn (ops[i], gen_lowpart (first_imode, op0)); -- } -- -- /* Interleave low FIRST_IMODE vectors. */ -- for (i = j = 0; i < n; i += 2, j++) -- { -- op0 = gen_reg_rtx (first_imode); -- emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1])); -- -- /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */ -- ops[j] = gen_reg_rtx (second_imode); -- emit_move_insn (ops[j], gen_lowpart (second_imode, op0)); -- } -- -- /* Interleave low SECOND_IMODE vectors. */ -- switch (second_imode) -- { -- case E_V4SImode: -- for (i = j = 0; i < n / 2; i += 2, j++) -- { -- op0 = gen_reg_rtx (second_imode); -- emit_insn (gen_interleave_second_low (op0, ops[i], -- ops[i + 1])); -- -- /* Cast the SECOND_IMODE vector to the THIRD_IMODE -- vector. */ -- ops[j] = gen_reg_rtx (third_imode); -- emit_move_insn (ops[j], gen_lowpart (third_imode, op0)); -- } -- second_imode = V2DImode; -- gen_interleave_second_low = gen_vec_interleave_lowv2di; -- /* FALLTHRU */ -- -- case E_V2DImode: -- op0 = gen_reg_rtx (second_imode); -- emit_insn (gen_interleave_second_low (op0, ops[0], -- ops[1])); -- -- /* Cast the SECOND_IMODE vector back to a vector on original -- mode. */ -- emit_insn (gen_rtx_SET (target, gen_lowpart (mode, op0))); -- break; -- -- default: -- gcc_unreachable (); -- } --} -- --/* A subroutine of ix86_expand_vector_init. Handle the most general case: -- all values variable, and none identical. */ -- --static void --ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode, -- rtx target, rtx vals) --{ -- rtx ops[64], op0, op1, op2, op3, op4, op5; -- machine_mode half_mode = VOIDmode; -- machine_mode quarter_mode = VOIDmode; -- int n, i; -- -- switch (mode) -- { -- case E_V2SFmode: -- case E_V2SImode: -- if (!mmx_ok && !TARGET_SSE) -- break; -- /* FALLTHRU */ -- -- case E_V16SImode: -- case E_V16SFmode: -- case E_V8DFmode: -- case E_V8DImode: -- case E_V8SFmode: -- case E_V8SImode: -- case E_V4DFmode: -- case E_V4DImode: -- case E_V4SFmode: -- case E_V4SImode: -- case E_V2DFmode: -- case E_V2DImode: -- n = GET_MODE_NUNITS (mode); -- for (i = 0; i < n; i++) -- ops[i] = XVECEXP (vals, 0, i); -- ix86_expand_vector_init_concat (mode, target, ops, n); -- return; -- -- case E_V2TImode: -- for (i = 0; i < 2; i++) -- ops[i] = gen_lowpart (V2DImode, XVECEXP (vals, 0, i)); -- op0 = gen_reg_rtx (V4DImode); -- ix86_expand_vector_init_concat (V4DImode, op0, ops, 2); -- emit_move_insn (target, gen_lowpart (GET_MODE (target), op0)); -- return; -- -- case E_V4TImode: -- for (i = 0; i < 4; i++) -- ops[i] = gen_lowpart (V2DImode, XVECEXP (vals, 0, i)); -- ops[4] = gen_reg_rtx (V4DImode); -- ix86_expand_vector_init_concat (V4DImode, ops[4], ops, 2); -- ops[5] = gen_reg_rtx (V4DImode); -- ix86_expand_vector_init_concat (V4DImode, ops[5], ops + 2, 2); -- op0 = gen_reg_rtx (V8DImode); -- ix86_expand_vector_init_concat (V8DImode, op0, ops + 4, 2); -- emit_move_insn (target, gen_lowpart (GET_MODE (target), op0)); -- return; -- -- case E_V32QImode: -- half_mode = V16QImode; -- goto half; -- -- case E_V16HImode: -- half_mode = V8HImode; -- goto half; -- --half: -- n = GET_MODE_NUNITS (mode); -- for (i = 0; i < n; i++) -- ops[i] = XVECEXP (vals, 0, i); -- op0 = gen_reg_rtx (half_mode); -- op1 = gen_reg_rtx (half_mode); -- ix86_expand_vector_init_interleave (half_mode, op0, ops, -- n >> 2); -- ix86_expand_vector_init_interleave (half_mode, op1, -- &ops [n >> 1], n >> 2); -- emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op0, op1))); -- return; -- -- case E_V64QImode: -- quarter_mode = V16QImode; -- half_mode = V32QImode; -- goto quarter; -- -- case E_V32HImode: -- quarter_mode = V8HImode; -- half_mode = V16HImode; -- goto quarter; -- --quarter: -- n = GET_MODE_NUNITS (mode); -- for (i = 0; i < n; i++) -- ops[i] = XVECEXP (vals, 0, i); -- op0 = gen_reg_rtx (quarter_mode); -- op1 = gen_reg_rtx (quarter_mode); -- op2 = gen_reg_rtx (quarter_mode); -- op3 = gen_reg_rtx (quarter_mode); -- op4 = gen_reg_rtx (half_mode); -- op5 = gen_reg_rtx (half_mode); -- ix86_expand_vector_init_interleave (quarter_mode, op0, ops, -- n >> 3); -- ix86_expand_vector_init_interleave (quarter_mode, op1, -- &ops [n >> 2], n >> 3); -- ix86_expand_vector_init_interleave (quarter_mode, op2, -- &ops [n >> 1], n >> 3); -- ix86_expand_vector_init_interleave (quarter_mode, op3, -- &ops [(n >> 1) | (n >> 2)], n >> 3); -- emit_insn (gen_rtx_SET (op4, gen_rtx_VEC_CONCAT (half_mode, op0, op1))); -- emit_insn (gen_rtx_SET (op5, gen_rtx_VEC_CONCAT (half_mode, op2, op3))); -- emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (mode, op4, op5))); -- return; -- -- case E_V16QImode: -- if (!TARGET_SSE4_1) -- break; -- /* FALLTHRU */ -- -- case E_V8HImode: -- if (!TARGET_SSE2) -- break; -- -- /* Don't use ix86_expand_vector_init_interleave if we can't -- move from GPR to SSE register directly. */ -- if (!TARGET_INTER_UNIT_MOVES_TO_VEC) -- break; -- -- n = GET_MODE_NUNITS (mode); -- for (i = 0; i < n; i++) -- ops[i] = XVECEXP (vals, 0, i); -- ix86_expand_vector_init_interleave (mode, target, ops, n >> 1); -- return; -- -- case E_V4HImode: -- case E_V8QImode: -- break; -- -- default: -- gcc_unreachable (); -- } -- -- { -- int i, j, n_elts, n_words, n_elt_per_word; -- machine_mode inner_mode; -- rtx words[4], shift; -- -- inner_mode = GET_MODE_INNER (mode); -- n_elts = GET_MODE_NUNITS (mode); -- n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD; -- n_elt_per_word = n_elts / n_words; -- shift = GEN_INT (GET_MODE_BITSIZE (inner_mode)); -- -- for (i = 0; i < n_words; ++i) -- { -- rtx word = NULL_RTX; -- -- for (j = 0; j < n_elt_per_word; ++j) -- { -- rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1); -- elt = convert_modes (word_mode, inner_mode, elt, true); -- -- if (j == 0) -- word = elt; -- else -- { -- word = expand_simple_binop (word_mode, ASHIFT, word, shift, -- word, 1, OPTAB_LIB_WIDEN); -- word = expand_simple_binop (word_mode, IOR, word, elt, -- word, 1, OPTAB_LIB_WIDEN); -- } -- } -- -- words[i] = word; -- } -- -- if (n_words == 1) -- emit_move_insn (target, gen_lowpart (mode, words[0])); -- else if (n_words == 2) -- { -- rtx tmp = gen_reg_rtx (mode); -- emit_clobber (tmp); -- emit_move_insn (gen_lowpart (word_mode, tmp), words[0]); -- emit_move_insn (gen_highpart (word_mode, tmp), words[1]); -- emit_move_insn (target, tmp); -- } -- else if (n_words == 4) -- { -- rtx tmp = gen_reg_rtx (V4SImode); -- gcc_assert (word_mode == SImode); -- vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words)); -- ix86_expand_vector_init_general (false, V4SImode, tmp, vals); -- emit_move_insn (target, gen_lowpart (mode, tmp)); -- } -- else -- gcc_unreachable (); -- } --} -- --/* Initialize vector TARGET via VALS. Suppress the use of MMX -- instructions unless MMX_OK is true. */ -- --void --ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals) --{ -- machine_mode mode = GET_MODE (target); -- machine_mode inner_mode = GET_MODE_INNER (mode); -- int n_elts = GET_MODE_NUNITS (mode); -- int n_var = 0, one_var = -1; -- bool all_same = true, all_const_zero = true; -- int i; -- rtx x; -- -- /* Handle first initialization from vector elts. */ -- if (n_elts != XVECLEN (vals, 0)) -- { -- rtx subtarget = target; -- x = XVECEXP (vals, 0, 0); -- gcc_assert (GET_MODE_INNER (GET_MODE (x)) == inner_mode); -- if (GET_MODE_NUNITS (GET_MODE (x)) * 2 == n_elts) -- { -- rtx ops[2] = { XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1) }; -- if (inner_mode == QImode || inner_mode == HImode) -- { -- unsigned int n_bits = n_elts * GET_MODE_SIZE (inner_mode); -- mode = mode_for_vector (SImode, n_bits / 4).require (); -- inner_mode = mode_for_vector (SImode, n_bits / 8).require (); -- ops[0] = gen_lowpart (inner_mode, ops[0]); -- ops[1] = gen_lowpart (inner_mode, ops[1]); -- subtarget = gen_reg_rtx (mode); -- } -- ix86_expand_vector_init_concat (mode, subtarget, ops, 2); -- if (subtarget != target) -- emit_move_insn (target, gen_lowpart (GET_MODE (target), subtarget)); -- return; -- } -- gcc_unreachable (); -- } -- -- for (i = 0; i < n_elts; ++i) -- { -- x = XVECEXP (vals, 0, i); -- if (!(CONST_SCALAR_INT_P (x) -- || CONST_DOUBLE_P (x) -- || CONST_FIXED_P (x))) -- n_var++, one_var = i; -- else if (x != CONST0_RTX (inner_mode)) -- all_const_zero = false; -- if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) -- all_same = false; -- } -- -- /* Constants are best loaded from the constant pool. */ -- if (n_var == 0) -- { -- emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))); -- return; -- } -- -- /* If all values are identical, broadcast the value. */ -- if (all_same -- && ix86_expand_vector_init_duplicate (mmx_ok, mode, target, -- XVECEXP (vals, 0, 0))) -- return; -- -- /* Values where only one field is non-constant are best loaded from -- the pool and overwritten via move later. */ -- if (n_var == 1) -- { -- if (all_const_zero -- && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target, -- XVECEXP (vals, 0, one_var), -- one_var)) -- return; -- -- if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var)) -- return; -- } -- -- ix86_expand_vector_init_general (mmx_ok, mode, target, vals); --} -- --void --ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt) --{ -- machine_mode mode = GET_MODE (target); -- machine_mode inner_mode = GET_MODE_INNER (mode); -- machine_mode half_mode; -- bool use_vec_merge = false; -- rtx tmp; -- static rtx (*gen_extract[6][2]) (rtx, rtx) -- = { -- { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi }, -- { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi }, -- { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si }, -- { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di }, -- { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf }, -- { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df } -- }; -- static rtx (*gen_insert[6][2]) (rtx, rtx, rtx) -- = { -- { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi }, -- { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi }, -- { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si }, -- { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di }, -- { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf }, -- { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df } -- }; -- int i, j, n; -- machine_mode mmode = VOIDmode; -- rtx (*gen_blendm) (rtx, rtx, rtx, rtx); -- -- switch (mode) -- { -- case E_V2SFmode: -- case E_V2SImode: -- if (mmx_ok) -- { -- tmp = gen_reg_rtx (GET_MODE_INNER (mode)); -- ix86_expand_vector_extract (true, tmp, target, 1 - elt); -- if (elt == 0) -- tmp = gen_rtx_VEC_CONCAT (mode, val, tmp); -- else -- tmp = gen_rtx_VEC_CONCAT (mode, tmp, val); -- emit_insn (gen_rtx_SET (target, tmp)); -- return; -- } -- break; -- -- case E_V2DImode: -- use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT; -- if (use_vec_merge) -- break; -- -- tmp = gen_reg_rtx (GET_MODE_INNER (mode)); -- ix86_expand_vector_extract (false, tmp, target, 1 - elt); -- if (elt == 0) -- tmp = gen_rtx_VEC_CONCAT (mode, val, tmp); -- else -- tmp = gen_rtx_VEC_CONCAT (mode, tmp, val); -- emit_insn (gen_rtx_SET (target, tmp)); -- return; -- -- case E_V2DFmode: -- { -- rtx op0, op1; -- -- /* For the two element vectors, we implement a VEC_CONCAT with -- the extraction of the other element. */ -- -- tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt))); -- tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp); -- -- if (elt == 0) -- op0 = val, op1 = tmp; -- else -- op0 = tmp, op1 = val; -- -- tmp = gen_rtx_VEC_CONCAT (mode, op0, op1); -- emit_insn (gen_rtx_SET (target, tmp)); -- } -- return; -- -- case E_V4SFmode: -- use_vec_merge = TARGET_SSE4_1; -- if (use_vec_merge) -- break; -- -- switch (elt) -- { -- case 0: -- use_vec_merge = true; -- break; -- -- case 1: -- /* tmp = target = A B C D */ -- tmp = copy_to_reg (target); -- /* target = A A B B */ -- emit_insn (gen_vec_interleave_lowv4sf (target, target, target)); -- /* target = X A B B */ -- ix86_expand_vector_set (false, target, val, 0); -- /* target = A X C D */ -- emit_insn (gen_sse_shufps_v4sf (target, target, tmp, -- const1_rtx, const0_rtx, -- GEN_INT (2+4), GEN_INT (3+4))); -- return; -- -- case 2: -- /* tmp = target = A B C D */ -- tmp = copy_to_reg (target); -- /* tmp = X B C D */ -- ix86_expand_vector_set (false, tmp, val, 0); -- /* target = A B X D */ -- emit_insn (gen_sse_shufps_v4sf (target, target, tmp, -- const0_rtx, const1_rtx, -- GEN_INT (0+4), GEN_INT (3+4))); -- return; -- -- case 3: -- /* tmp = target = A B C D */ -- tmp = copy_to_reg (target); -- /* tmp = X B C D */ -- ix86_expand_vector_set (false, tmp, val, 0); -- /* target = A B X D */ -- emit_insn (gen_sse_shufps_v4sf (target, target, tmp, -- const0_rtx, const1_rtx, -- GEN_INT (2+4), GEN_INT (0+4))); -- return; -- -- default: -- gcc_unreachable (); -- } -- break; -- -- case E_V4SImode: -- use_vec_merge = TARGET_SSE4_1; -- if (use_vec_merge) -- break; -- -- /* Element 0 handled by vec_merge below. */ -- if (elt == 0) -- { -- use_vec_merge = true; -- break; -- } -- -- if (TARGET_SSE2) -- { -- /* With SSE2, use integer shuffles to swap element 0 and ELT, -- store into element 0, then shuffle them back. */ -- -- rtx order[4]; -- -- order[0] = GEN_INT (elt); -- order[1] = const1_rtx; -- order[2] = const2_rtx; -- order[3] = GEN_INT (3); -- order[elt] = const0_rtx; -- -- emit_insn (gen_sse2_pshufd_1 (target, target, order[0], -- order[1], order[2], order[3])); -- -- ix86_expand_vector_set (false, target, val, 0); -- -- emit_insn (gen_sse2_pshufd_1 (target, target, order[0], -- order[1], order[2], order[3])); -- } -- else -- { -- /* For SSE1, we have to reuse the V4SF code. */ -- rtx t = gen_reg_rtx (V4SFmode); -- emit_move_insn (t, gen_lowpart (V4SFmode, target)); -- ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt); -- emit_move_insn (target, gen_lowpart (mode, t)); -- } -- return; -- -- case E_V8HImode: -- use_vec_merge = TARGET_SSE2; -- break; -- case E_V4HImode: -- use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A); -- break; -- -- case E_V16QImode: -- use_vec_merge = TARGET_SSE4_1; -- break; -- -- case E_V8QImode: -- break; -- -- case E_V32QImode: -- half_mode = V16QImode; -- j = 0; -- n = 16; -- goto half; -- -- case E_V16HImode: -- half_mode = V8HImode; -- j = 1; -- n = 8; -- goto half; -- -- case E_V8SImode: -- half_mode = V4SImode; -- j = 2; -- n = 4; -- goto half; -- -- case E_V4DImode: -- half_mode = V2DImode; -- j = 3; -- n = 2; -- goto half; -- -- case E_V8SFmode: -- half_mode = V4SFmode; -- j = 4; -- n = 4; -- goto half; -- -- case E_V4DFmode: -- half_mode = V2DFmode; -- j = 5; -- n = 2; -- goto half; -- --half: -- /* Compute offset. */ -- i = elt / n; -- elt %= n; -- -- gcc_assert (i <= 1); -- -- /* Extract the half. */ -- tmp = gen_reg_rtx (half_mode); -- emit_insn (gen_extract[j][i] (tmp, target)); -- -- /* Put val in tmp at elt. */ -- ix86_expand_vector_set (false, tmp, val, elt); -- -- /* Put it back. */ -- emit_insn (gen_insert[j][i] (target, target, tmp)); -- return; -- -- case E_V8DFmode: -- if (TARGET_AVX512F) -- { -- mmode = QImode; -- gen_blendm = gen_avx512f_blendmv8df; -- } -- break; -- -- case E_V8DImode: -- if (TARGET_AVX512F) -- { -- mmode = QImode; -- gen_blendm = gen_avx512f_blendmv8di; -- } -- break; -- -- case E_V16SFmode: -- if (TARGET_AVX512F) -- { -- mmode = HImode; -- gen_blendm = gen_avx512f_blendmv16sf; -- } -- break; -- -- case E_V16SImode: -- if (TARGET_AVX512F) -- { -- mmode = HImode; -- gen_blendm = gen_avx512f_blendmv16si; -- } -- break; -- -- case E_V32HImode: -- if (TARGET_AVX512BW) -- { -- mmode = SImode; -- gen_blendm = gen_avx512bw_blendmv32hi; -- } -- else if (TARGET_AVX512F) -- { -- half_mode = E_V8HImode; -- n = 8; -- goto quarter; -- } -- break; -- -- case E_V64QImode: -- if (TARGET_AVX512BW) -- { -- mmode = DImode; -- gen_blendm = gen_avx512bw_blendmv64qi; -- } -- else if (TARGET_AVX512F) -- { -- half_mode = E_V16QImode; -- n = 16; -- goto quarter; -- } -- break; -- --quarter: -- /* Compute offset. */ -- i = elt / n; -- elt %= n; -- -- gcc_assert (i <= 3); -- -- { -- /* Extract the quarter. */ -- tmp = gen_reg_rtx (V4SImode); -- rtx tmp2 = gen_lowpart (V16SImode, target); -- rtx mask = gen_reg_rtx (QImode); -- -- emit_move_insn (mask, constm1_rtx); -- emit_insn (gen_avx512f_vextracti32x4_mask (tmp, tmp2, GEN_INT (i), -- tmp, mask)); -- -- tmp2 = gen_reg_rtx (half_mode); -- emit_move_insn (tmp2, gen_lowpart (half_mode, tmp)); -- tmp = tmp2; -- -- /* Put val in tmp at elt. */ -- ix86_expand_vector_set (false, tmp, val, elt); -- -- /* Put it back. */ -- tmp2 = gen_reg_rtx (V16SImode); -- rtx tmp3 = gen_lowpart (V16SImode, target); -- mask = gen_reg_rtx (HImode); -- emit_move_insn (mask, constm1_rtx); -- tmp = gen_lowpart (V4SImode, tmp); -- emit_insn (gen_avx512f_vinserti32x4_mask (tmp2, tmp3, tmp, GEN_INT (i), -- tmp3, mask)); -- emit_move_insn (target, gen_lowpart (mode, tmp2)); -- } -- return; -- -- default: -- break; -- } -- -- if (mmode != VOIDmode) -- { -- tmp = gen_reg_rtx (mode); -- emit_insn (gen_rtx_SET (tmp, gen_rtx_VEC_DUPLICATE (mode, val))); -- /* The avx512*_blendm expanders have different operand order -- from VEC_MERGE. In VEC_MERGE, the first input operand is used for -- elements where the mask is set and second input operand otherwise, -- in {sse,avx}*_*blend* the first input operand is used for elements -- where the mask is clear and second input operand otherwise. */ -- emit_insn (gen_blendm (target, target, tmp, -- force_reg (mmode, -- gen_int_mode (HOST_WIDE_INT_1U << elt, -- mmode)))); -- } -- else if (use_vec_merge) -- { -- tmp = gen_rtx_VEC_DUPLICATE (mode, val); -- tmp = gen_rtx_VEC_MERGE (mode, tmp, target, -- GEN_INT (HOST_WIDE_INT_1U << elt)); -- emit_insn (gen_rtx_SET (target, tmp)); -- } -- else -- { -- rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode)); -- -- emit_move_insn (mem, target); -- -- tmp = adjust_address (mem, inner_mode, elt * GET_MODE_SIZE (inner_mode)); -- emit_move_insn (tmp, val); -- -- emit_move_insn (target, mem); -- } --} -- --void --ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt) --{ -- machine_mode mode = GET_MODE (vec); -- machine_mode inner_mode = GET_MODE_INNER (mode); -- bool use_vec_extr = false; -- rtx tmp; -- -- switch (mode) -- { -- case E_V2SImode: -- case E_V2SFmode: -- if (!mmx_ok) -- break; -- /* FALLTHRU */ -- -- case E_V2DFmode: -- case E_V2DImode: -- case E_V2TImode: -- case E_V4TImode: -- use_vec_extr = true; -- break; -- -- case E_V4SFmode: -- use_vec_extr = TARGET_SSE4_1; -- if (use_vec_extr) -- break; -- -- switch (elt) -- { -- case 0: -- tmp = vec; -- break; -- -- case 1: -- case 3: -- tmp = gen_reg_rtx (mode); -- emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec, -- GEN_INT (elt), GEN_INT (elt), -- GEN_INT (elt+4), GEN_INT (elt+4))); -- break; -- -- case 2: -- tmp = gen_reg_rtx (mode); -- emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec)); -- break; -- -- default: -- gcc_unreachable (); -- } -- vec = tmp; -- use_vec_extr = true; -- elt = 0; -- break; -- -- case E_V4SImode: -- use_vec_extr = TARGET_SSE4_1; -- if (use_vec_extr) -- break; -- -- if (TARGET_SSE2) -- { -- switch (elt) -- { -- case 0: -- tmp = vec; -- break; -- -- case 1: -- case 3: -- tmp = gen_reg_rtx (mode); -- emit_insn (gen_sse2_pshufd_1 (tmp, vec, -- GEN_INT (elt), GEN_INT (elt), -- GEN_INT (elt), GEN_INT (elt))); -- break; -- -- case 2: -- tmp = gen_reg_rtx (mode); -- emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec)); -- break; -- -- default: -- gcc_unreachable (); -- } -- vec = tmp; -- use_vec_extr = true; -- elt = 0; -- } -- else -- { -- /* For SSE1, we have to reuse the V4SF code. */ -- ix86_expand_vector_extract (false, gen_lowpart (SFmode, target), -- gen_lowpart (V4SFmode, vec), elt); -- return; -- } -- break; -- -- case E_V8HImode: -- use_vec_extr = TARGET_SSE2; -- break; -- case E_V4HImode: -- use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A); -- break; -- -- case E_V16QImode: -- use_vec_extr = TARGET_SSE4_1; -- break; -- -- case E_V8SFmode: -- if (TARGET_AVX) -- { -- tmp = gen_reg_rtx (V4SFmode); -- if (elt < 4) -- emit_insn (gen_vec_extract_lo_v8sf (tmp, vec)); -- else -- emit_insn (gen_vec_extract_hi_v8sf (tmp, vec)); -- ix86_expand_vector_extract (false, target, tmp, elt & 3); -- return; -- } -- break; -- -- case E_V4DFmode: -- if (TARGET_AVX) -- { -- tmp = gen_reg_rtx (V2DFmode); -- if (elt < 2) -- emit_insn (gen_vec_extract_lo_v4df (tmp, vec)); -- else -- emit_insn (gen_vec_extract_hi_v4df (tmp, vec)); -- ix86_expand_vector_extract (false, target, tmp, elt & 1); -- return; -- } -- break; -- -- case E_V32QImode: -- if (TARGET_AVX) -- { -- tmp = gen_reg_rtx (V16QImode); -- if (elt < 16) -- emit_insn (gen_vec_extract_lo_v32qi (tmp, vec)); -- else -- emit_insn (gen_vec_extract_hi_v32qi (tmp, vec)); -- ix86_expand_vector_extract (false, target, tmp, elt & 15); -- return; -- } -- break; -- -- case E_V16HImode: -- if (TARGET_AVX) -- { -- tmp = gen_reg_rtx (V8HImode); -- if (elt < 8) -- emit_insn (gen_vec_extract_lo_v16hi (tmp, vec)); -- else -- emit_insn (gen_vec_extract_hi_v16hi (tmp, vec)); -- ix86_expand_vector_extract (false, target, tmp, elt & 7); -- return; -- } -- break; -- -- case E_V8SImode: -- if (TARGET_AVX) -- { -- tmp = gen_reg_rtx (V4SImode); -- if (elt < 4) -- emit_insn (gen_vec_extract_lo_v8si (tmp, vec)); -- else -- emit_insn (gen_vec_extract_hi_v8si (tmp, vec)); -- ix86_expand_vector_extract (false, target, tmp, elt & 3); -- return; -- } -- break; -- -- case E_V4DImode: -- if (TARGET_AVX) -- { -- tmp = gen_reg_rtx (V2DImode); -- if (elt < 2) -- emit_insn (gen_vec_extract_lo_v4di (tmp, vec)); -- else -- emit_insn (gen_vec_extract_hi_v4di (tmp, vec)); -- ix86_expand_vector_extract (false, target, tmp, elt & 1); -- return; -- } -- break; -- -- case E_V32HImode: -- if (TARGET_AVX512BW) -- { -- tmp = gen_reg_rtx (V16HImode); -- if (elt < 16) -- emit_insn (gen_vec_extract_lo_v32hi (tmp, vec)); -- else -- emit_insn (gen_vec_extract_hi_v32hi (tmp, vec)); -- ix86_expand_vector_extract (false, target, tmp, elt & 15); -- return; -- } -- break; -- -- case E_V64QImode: -- if (TARGET_AVX512BW) -- { -- tmp = gen_reg_rtx (V32QImode); -- if (elt < 32) -- emit_insn (gen_vec_extract_lo_v64qi (tmp, vec)); -- else -- emit_insn (gen_vec_extract_hi_v64qi (tmp, vec)); -- ix86_expand_vector_extract (false, target, tmp, elt & 31); -- return; -- } -- break; -- -- case E_V16SFmode: -- tmp = gen_reg_rtx (V8SFmode); -- if (elt < 8) -- emit_insn (gen_vec_extract_lo_v16sf (tmp, vec)); -- else -- emit_insn (gen_vec_extract_hi_v16sf (tmp, vec)); -- ix86_expand_vector_extract (false, target, tmp, elt & 7); -- return; -- -- case E_V8DFmode: -- tmp = gen_reg_rtx (V4DFmode); -- if (elt < 4) -- emit_insn (gen_vec_extract_lo_v8df (tmp, vec)); -- else -- emit_insn (gen_vec_extract_hi_v8df (tmp, vec)); -- ix86_expand_vector_extract (false, target, tmp, elt & 3); -- return; -- -- case E_V16SImode: -- tmp = gen_reg_rtx (V8SImode); -- if (elt < 8) -- emit_insn (gen_vec_extract_lo_v16si (tmp, vec)); -- else -- emit_insn (gen_vec_extract_hi_v16si (tmp, vec)); -- ix86_expand_vector_extract (false, target, tmp, elt & 7); -- return; -- -- case E_V8DImode: -- tmp = gen_reg_rtx (V4DImode); -- if (elt < 4) -- emit_insn (gen_vec_extract_lo_v8di (tmp, vec)); -- else -- emit_insn (gen_vec_extract_hi_v8di (tmp, vec)); -- ix86_expand_vector_extract (false, target, tmp, elt & 3); -- return; -- -- case E_V8QImode: -- /* ??? Could extract the appropriate HImode element and shift. */ -- default: -- break; -- } -- -- if (use_vec_extr) -- { -- tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt))); -- tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp); -- -- /* Let the rtl optimizers know about the zero extension performed. */ -- if (inner_mode == QImode || inner_mode == HImode) -- { -- tmp = gen_rtx_ZERO_EXTEND (SImode, tmp); -- target = gen_lowpart (SImode, target); -- } -- -- emit_insn (gen_rtx_SET (target, tmp)); -- } -- else -- { -- rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode)); -- -- emit_move_insn (mem, vec); -- -- tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode)); -- emit_move_insn (target, tmp); -- } --} -- --/* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC -- to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode. -- The upper bits of DEST are undefined, though they shouldn't cause -- exceptions (some bits from src or all zeros are ok). */ -- --static void --emit_reduc_half (rtx dest, rtx src, int i) --{ -- rtx tem, d = dest; -- switch (GET_MODE (src)) -- { -- case E_V4SFmode: -- if (i == 128) -- tem = gen_sse_movhlps (dest, src, src); -- else -- tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx, -- GEN_INT (1 + 4), GEN_INT (1 + 4)); -- break; -- case E_V2DFmode: -- tem = gen_vec_interleave_highv2df (dest, src, src); -- break; -- case E_V16QImode: -- case E_V8HImode: -- case E_V4SImode: -- case E_V2DImode: -- d = gen_reg_rtx (V1TImode); -- tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src), -- GEN_INT (i / 2)); -- break; -- case E_V8SFmode: -- if (i == 256) -- tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx); -- else -- tem = gen_avx_shufps256 (dest, src, src, -- GEN_INT (i == 128 ? 2 + (3 << 2) : 1)); -- break; -- case E_V4DFmode: -- if (i == 256) -- tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx); -- else -- tem = gen_avx_shufpd256 (dest, src, src, const1_rtx); -- break; -- case E_V32QImode: -- case E_V16HImode: -- case E_V8SImode: -- case E_V4DImode: -- if (i == 256) -- { -- if (GET_MODE (dest) != V4DImode) -- d = gen_reg_rtx (V4DImode); -- tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src), -- gen_lowpart (V4DImode, src), -- const1_rtx); -- } -- else -- { -- d = gen_reg_rtx (V2TImode); -- tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src), -- GEN_INT (i / 2)); -- } -- break; -- case E_V64QImode: -- case E_V32HImode: -- case E_V16SImode: -- case E_V16SFmode: -- case E_V8DImode: -- case E_V8DFmode: -- if (i > 128) -- tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest), -- gen_lowpart (V16SImode, src), -- gen_lowpart (V16SImode, src), -- GEN_INT (0x4 + (i == 512 ? 4 : 0)), -- GEN_INT (0x5 + (i == 512 ? 4 : 0)), -- GEN_INT (0x6 + (i == 512 ? 4 : 0)), -- GEN_INT (0x7 + (i == 512 ? 4 : 0)), -- GEN_INT (0xC), GEN_INT (0xD), -- GEN_INT (0xE), GEN_INT (0xF), -- GEN_INT (0x10), GEN_INT (0x11), -- GEN_INT (0x12), GEN_INT (0x13), -- GEN_INT (0x14), GEN_INT (0x15), -- GEN_INT (0x16), GEN_INT (0x17)); -- else -- tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest), -- gen_lowpart (V16SImode, src), -- GEN_INT (i == 128 ? 0x2 : 0x1), -- GEN_INT (0x3), -- GEN_INT (0x3), -- GEN_INT (0x3), -- GEN_INT (i == 128 ? 0x6 : 0x5), -- GEN_INT (0x7), -- GEN_INT (0x7), -- GEN_INT (0x7), -- GEN_INT (i == 128 ? 0xA : 0x9), -- GEN_INT (0xB), -- GEN_INT (0xB), -- GEN_INT (0xB), -- GEN_INT (i == 128 ? 0xE : 0xD), -- GEN_INT (0xF), -- GEN_INT (0xF), -- GEN_INT (0xF)); -- break; -- default: -- gcc_unreachable (); -- } -- emit_insn (tem); -- if (d != dest) -- emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d)); --} -- --/* Expand a vector reduction. FN is the binary pattern to reduce; -- DEST is the destination; IN is the input vector. */ -- --void --ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in) --{ -- rtx half, dst, vec = in; -- machine_mode mode = GET_MODE (in); -- int i; -- -- /* SSE4 has a special instruction for V8HImode UMIN reduction. */ -- if (TARGET_SSE4_1 -- && mode == V8HImode -- && fn == gen_uminv8hi3) -- { -- emit_insn (gen_sse4_1_phminposuw (dest, in)); -- return; -- } -- -- for (i = GET_MODE_BITSIZE (mode); -- i > GET_MODE_UNIT_BITSIZE (mode); -- i >>= 1) -- { -- half = gen_reg_rtx (mode); -- emit_reduc_half (half, vec, i); -- if (i == GET_MODE_UNIT_BITSIZE (mode) * 2) -- dst = dest; -- else -- dst = gen_reg_rtx (mode); -- emit_insn (fn (dst, half, vec)); -- vec = dst; -- } --} -- --/* Target hook for scalar_mode_supported_p. */ --static bool --ix86_scalar_mode_supported_p (scalar_mode mode) --{ -- if (DECIMAL_FLOAT_MODE_P (mode)) -- return default_decimal_float_supported_p (); -- else if (mode == TFmode) -- return true; -- else -- return default_scalar_mode_supported_p (mode); --} -- --/* Implements target hook vector_mode_supported_p. */ --static bool --ix86_vector_mode_supported_p (machine_mode mode) --{ -- if (TARGET_SSE && VALID_SSE_REG_MODE (mode)) -- return true; -- if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode)) -- return true; -- if (TARGET_AVX && VALID_AVX256_REG_MODE (mode)) -- return true; -- if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode)) -- return true; -- if (TARGET_MMX && VALID_MMX_REG_MODE (mode)) -- return true; -- if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode)) -- return true; -- return false; --} -- --/* Target hook for c_mode_for_suffix. */ --static machine_mode --ix86_c_mode_for_suffix (char suffix) --{ -- if (suffix == 'q') -- return TFmode; -- if (suffix == 'w') -- return XFmode; -- -- return VOIDmode; --} -- --/* Worker function for TARGET_MD_ASM_ADJUST. -- -- We implement asm flag outputs, and maintain source compatibility -- with the old cc0-based compiler. */ -- --static rtx_insn * --ix86_md_asm_adjust (vec &outputs, vec &/*inputs*/, -- vec &constraints, -- vec &clobbers, HARD_REG_SET &clobbered_regs) --{ -- bool saw_asm_flag = false; -- -- start_sequence (); -- for (unsigned i = 0, n = outputs.length (); i < n; ++i) -- { -- const char *con = constraints[i]; -- if (strncmp (con, "=@cc", 4) != 0) -- continue; -- con += 4; -- if (strchr (con, ',') != NULL) -- { -- error ("alternatives not allowed in asm flag output"); -- continue; -- } -- -- bool invert = false; -- if (con[0] == 'n') -- invert = true, con++; -- -- machine_mode mode = CCmode; -- rtx_code code = UNKNOWN; -- -- switch (con[0]) -- { -- case 'a': -- if (con[1] == 0) -- mode = CCAmode, code = EQ; -- else if (con[1] == 'e' && con[2] == 0) -- mode = CCCmode, code = NE; -- break; -- case 'b': -- if (con[1] == 0) -- mode = CCCmode, code = EQ; -- else if (con[1] == 'e' && con[2] == 0) -- mode = CCAmode, code = NE; -- break; -- case 'c': -- if (con[1] == 0) -- mode = CCCmode, code = EQ; -- break; -- case 'e': -- if (con[1] == 0) -- mode = CCZmode, code = EQ; -- break; -- case 'g': -- if (con[1] == 0) -- mode = CCGCmode, code = GT; -- else if (con[1] == 'e' && con[2] == 0) -- mode = CCGCmode, code = GE; -- break; -- case 'l': -- if (con[1] == 0) -- mode = CCGCmode, code = LT; -- else if (con[1] == 'e' && con[2] == 0) -- mode = CCGCmode, code = LE; -- break; -- case 'o': -- if (con[1] == 0) -- mode = CCOmode, code = EQ; -- break; -- case 'p': -- if (con[1] == 0) -- mode = CCPmode, code = EQ; -- break; -- case 's': -- if (con[1] == 0) -- mode = CCSmode, code = EQ; -- break; -- case 'z': -- if (con[1] == 0) -- mode = CCZmode, code = EQ; -- break; -- } -- if (code == UNKNOWN) -- { -- error ("unknown asm flag output %qs", constraints[i]); -- continue; -- } -- if (invert) -- code = reverse_condition (code); -- -- rtx dest = outputs[i]; -- if (!saw_asm_flag) -- { -- /* This is the first asm flag output. Here we put the flags -- register in as the real output and adjust the condition to -- allow it. */ -- constraints[i] = "=Bf"; -- outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG); -- saw_asm_flag = true; -- } -- else -- { -- /* We don't need the flags register as output twice. */ -- constraints[i] = "=X"; -- outputs[i] = gen_rtx_SCRATCH (SImode); -- } -- -- rtx x = gen_rtx_REG (mode, FLAGS_REG); -- x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx); -- -- machine_mode dest_mode = GET_MODE (dest); -- if (!SCALAR_INT_MODE_P (dest_mode)) -- { -- error ("invalid type for asm flag output"); -- continue; -- } -- -- if (dest_mode == DImode && !TARGET_64BIT) -- dest_mode = SImode; -- -- if (dest_mode != QImode) -- { -- rtx destqi = gen_reg_rtx (QImode); -- emit_insn (gen_rtx_SET (destqi, x)); -- -- if (TARGET_ZERO_EXTEND_WITH_AND -- && optimize_function_for_speed_p (cfun)) -- { -- x = force_reg (dest_mode, const0_rtx); -- -- emit_insn (gen_movstrictqi (gen_lowpart (QImode, x), destqi)); -- } -- else -- { -- x = gen_rtx_ZERO_EXTEND (dest_mode, destqi); -- if (dest_mode == GET_MODE (dest) -- && !register_operand (dest, GET_MODE (dest))) -- x = force_reg (dest_mode, x); -- } -- } -- -- if (dest_mode != GET_MODE (dest)) -- { -- rtx tmp = gen_reg_rtx (SImode); -- -- emit_insn (gen_rtx_SET (tmp, x)); -- emit_insn (gen_zero_extendsidi2 (dest, tmp)); -- } -- else -- emit_insn (gen_rtx_SET (dest, x)); -- } -- rtx_insn *seq = get_insns (); -- end_sequence (); -- -- if (saw_asm_flag) -- return seq; -- else -- { -- /* If we had no asm flag outputs, clobber the flags. */ -- clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG)); -- SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG); -- return NULL; -- } --} -- --/* Implements target vector targetm.asm.encode_section_info. */ -- --static void ATTRIBUTE_UNUSED --ix86_encode_section_info (tree decl, rtx rtl, int first) --{ -- default_encode_section_info (decl, rtl, first); -- -- if (ix86_in_large_data_p (decl)) -- SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR; --} -- --/* Worker function for REVERSE_CONDITION. */ -- --enum rtx_code --ix86_reverse_condition (enum rtx_code code, machine_mode mode) --{ -- return (mode == CCFPmode -- ? reverse_condition_maybe_unordered (code) -- : reverse_condition (code)); --} -- --/* Output code to perform an x87 FP register move, from OPERANDS[1] -- to OPERANDS[0]. */ -- --const char * --output_387_reg_move (rtx_insn *insn, rtx *operands) --{ -- if (REG_P (operands[0])) -- { -- if (REG_P (operands[1]) -- && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) -- { -- if (REGNO (operands[0]) == FIRST_STACK_REG) -- return output_387_ffreep (operands, 0); -- return "fstp\t%y0"; -- } -- if (STACK_TOP_P (operands[0])) -- return "fld%Z1\t%y1"; -- return "fst\t%y0"; -- } -- else if (MEM_P (operands[0])) -- { -- gcc_assert (REG_P (operands[1])); -- if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) -- return "fstp%Z0\t%y0"; -- else -- { -- /* There is no non-popping store to memory for XFmode. -- So if we need one, follow the store with a load. */ -- if (GET_MODE (operands[0]) == XFmode) -- return "fstp%Z0\t%y0\n\tfld%Z0\t%y0"; -- else -- return "fst%Z0\t%y0"; -- } -- } -- else -- gcc_unreachable(); --} -- --/* Output code to perform a conditional jump to LABEL, if C2 flag in -- FP status register is set. */ -- --void --ix86_emit_fp_unordered_jump (rtx label) --{ -- rtx reg = gen_reg_rtx (HImode); -- rtx_insn *insn; -- rtx temp; -- -- emit_insn (gen_x86_fnstsw_1 (reg)); -- -- if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())) -- { -- emit_insn (gen_x86_sahf_1 (reg)); -- -- temp = gen_rtx_REG (CCmode, FLAGS_REG); -- temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx); -- } -- else -- { -- emit_insn (gen_testqi_ext_1_ccno (reg, GEN_INT (0x04))); -- -- temp = gen_rtx_REG (CCNOmode, FLAGS_REG); -- temp = gen_rtx_NE (VOIDmode, temp, const0_rtx); -- } -- -- temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp, -- gen_rtx_LABEL_REF (VOIDmode, label), -- pc_rtx); -- insn = emit_jump_insn (gen_rtx_SET (pc_rtx, temp)); -- predict_jump (REG_BR_PROB_BASE * 10 / 100); -- JUMP_LABEL (insn) = label; --} -- --/* Output code to perform an sinh XFmode calculation. */ -- --void ix86_emit_i387_sinh (rtx op0, rtx op1) --{ -- rtx e1 = gen_reg_rtx (XFmode); -- rtx e2 = gen_reg_rtx (XFmode); -- rtx scratch = gen_reg_rtx (HImode); -- rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG); -- rtx half = const_double_from_real_value (dconsthalf, XFmode); -- rtx cst1, tmp; -- rtx_code_label *jump_label = gen_label_rtx (); -- rtx_insn *insn; -- -- /* scratch = fxam (op1) */ -- emit_insn (gen_fxamxf2_i387 (scratch, op1)); -- -- /* e1 = expm1 (|op1|) */ -- emit_insn (gen_absxf2 (e2, op1)); -- emit_insn (gen_expm1xf2 (e1, e2)); -- -- /* e2 = e1 / (e1 + 1.0) + e1 */ -- cst1 = force_reg (XFmode, CONST1_RTX (XFmode)); -- emit_insn (gen_addxf3 (e2, e1, cst1)); -- emit_insn (gen_divxf3 (e2, e1, e2)); -- emit_insn (gen_addxf3 (e2, e2, e1)); -- -- /* flags = signbit (op1) */ -- emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x02))); -- -- /* if (flags) then e2 = -e2 */ -- tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, -- gen_rtx_EQ (VOIDmode, flags, const0_rtx), -- gen_rtx_LABEL_REF (VOIDmode, jump_label), -- pc_rtx); -- insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); -- predict_jump (REG_BR_PROB_BASE * 50 / 100); -- JUMP_LABEL (insn) = jump_label; -- -- emit_insn (gen_negxf2 (e2, e2)); -- -- emit_label (jump_label); -- LABEL_NUSES (jump_label) = 1; -- -- /* op0 = 0.5 * e2 */ -- half = force_reg (XFmode, half); -- emit_insn (gen_mulxf3 (op0, e2, half)); --} -- --/* Output code to perform an cosh XFmode calculation. */ -- --void ix86_emit_i387_cosh (rtx op0, rtx op1) --{ -- rtx e1 = gen_reg_rtx (XFmode); -- rtx e2 = gen_reg_rtx (XFmode); -- rtx half = const_double_from_real_value (dconsthalf, XFmode); -- rtx cst1; -- -- /* e1 = exp (op1) */ -- emit_insn (gen_expxf2 (e1, op1)); -- -- /* e2 = e1 + 1.0 / e1 */ -- cst1 = force_reg (XFmode, CONST1_RTX (XFmode)); -- emit_insn (gen_divxf3 (e2, cst1, e1)); -- emit_insn (gen_addxf3 (e2, e1, e2)); -- -- /* op0 = 0.5 * e2 */ -- half = force_reg (XFmode, half); -- emit_insn (gen_mulxf3 (op0, e2, half)); --} -- --/* Output code to perform an tanh XFmode calculation. */ -- --void ix86_emit_i387_tanh (rtx op0, rtx op1) --{ -- rtx e1 = gen_reg_rtx (XFmode); -- rtx e2 = gen_reg_rtx (XFmode); -- rtx scratch = gen_reg_rtx (HImode); -- rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG); -- rtx cst2, tmp; -- rtx_code_label *jump_label = gen_label_rtx (); -- rtx_insn *insn; -- -- /* scratch = fxam (op1) */ -- emit_insn (gen_fxamxf2_i387 (scratch, op1)); -- -- /* e1 = expm1 (-|2 * op1|) */ -- emit_insn (gen_addxf3 (e2, op1, op1)); -- emit_insn (gen_absxf2 (e2, e2)); -- emit_insn (gen_negxf2 (e2, e2)); -- emit_insn (gen_expm1xf2 (e1, e2)); -- -- /* e2 = e1 / (e1 + 2.0) */ -- cst2 = force_reg (XFmode, CONST2_RTX (XFmode)); -- emit_insn (gen_addxf3 (e2, e1, cst2)); -- emit_insn (gen_divxf3 (e2, e1, e2)); -- -- /* flags = signbit (op1) */ -- emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x02))); -- -- /* if (!flags) then e2 = -e2 */ -- tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, -- gen_rtx_NE (VOIDmode, flags, const0_rtx), -- gen_rtx_LABEL_REF (VOIDmode, jump_label), -- pc_rtx); -- insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); -- predict_jump (REG_BR_PROB_BASE * 50 / 100); -- JUMP_LABEL (insn) = jump_label; -- -- emit_insn (gen_negxf2 (e2, e2)); -- -- emit_label (jump_label); -- LABEL_NUSES (jump_label) = 1; -- -- emit_move_insn (op0, e2); --} -- --/* Output code to perform an asinh XFmode calculation. */ -- --void ix86_emit_i387_asinh (rtx op0, rtx op1) --{ -- rtx e1 = gen_reg_rtx (XFmode); -- rtx e2 = gen_reg_rtx (XFmode); -- rtx scratch = gen_reg_rtx (HImode); -- rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG); -- rtx cst1, tmp; -- rtx_code_label *jump_label = gen_label_rtx (); -- rtx_insn *insn; -- -- /* e2 = sqrt (op1^2 + 1.0) + 1.0 */ -- emit_insn (gen_mulxf3 (e1, op1, op1)); -- cst1 = force_reg (XFmode, CONST1_RTX (XFmode)); -- emit_insn (gen_addxf3 (e2, e1, cst1)); -- emit_insn (gen_sqrtxf2 (e2, e2)); -- emit_insn (gen_addxf3 (e2, e2, cst1)); -- -- /* e1 = e1 / e2 */ -- emit_insn (gen_divxf3 (e1, e1, e2)); -- -- /* scratch = fxam (op1) */ -- emit_insn (gen_fxamxf2_i387 (scratch, op1)); -- -- /* e1 = e1 + |op1| */ -- emit_insn (gen_absxf2 (e2, op1)); -- emit_insn (gen_addxf3 (e1, e1, e2)); -- -- /* e2 = log1p (e1) */ -- ix86_emit_i387_log1p (e2, e1); -- -- /* flags = signbit (op1) */ -- emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x02))); -- -- /* if (flags) then e2 = -e2 */ -- tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, -- gen_rtx_EQ (VOIDmode, flags, const0_rtx), -- gen_rtx_LABEL_REF (VOIDmode, jump_label), -- pc_rtx); -- insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); -- predict_jump (REG_BR_PROB_BASE * 50 / 100); -- JUMP_LABEL (insn) = jump_label; -- -- emit_insn (gen_negxf2 (e2, e2)); -- -- emit_label (jump_label); -- LABEL_NUSES (jump_label) = 1; -- -- emit_move_insn (op0, e2); --} -- --/* Output code to perform an acosh XFmode calculation. */ -- --void ix86_emit_i387_acosh (rtx op0, rtx op1) --{ -- rtx e1 = gen_reg_rtx (XFmode); -- rtx e2 = gen_reg_rtx (XFmode); -- rtx cst1 = force_reg (XFmode, CONST1_RTX (XFmode)); -- -- /* e2 = sqrt (op1 + 1.0) */ -- emit_insn (gen_addxf3 (e2, op1, cst1)); -- emit_insn (gen_sqrtxf2 (e2, e2)); -- -- /* e1 = sqrt (op1 - 1.0) */ -- emit_insn (gen_subxf3 (e1, op1, cst1)); -- emit_insn (gen_sqrtxf2 (e1, e1)); -- -- /* e1 = e1 * e2 */ -- emit_insn (gen_mulxf3 (e1, e1, e2)); -- -- /* e1 = e1 + op1 */ -- emit_insn (gen_addxf3 (e1, e1, op1)); -- -- /* op0 = log (e1) */ -- emit_insn (gen_logxf2 (op0, e1)); --} -- --/* Output code to perform an atanh XFmode calculation. */ -- --void ix86_emit_i387_atanh (rtx op0, rtx op1) --{ -- rtx e1 = gen_reg_rtx (XFmode); -- rtx e2 = gen_reg_rtx (XFmode); -- rtx scratch = gen_reg_rtx (HImode); -- rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG); -- rtx half = const_double_from_real_value (dconsthalf, XFmode); -- rtx cst1, tmp; -- rtx_code_label *jump_label = gen_label_rtx (); -- rtx_insn *insn; -- -- /* scratch = fxam (op1) */ -- emit_insn (gen_fxamxf2_i387 (scratch, op1)); -- -- /* e2 = |op1| */ -- emit_insn (gen_absxf2 (e2, op1)); -- -- /* e1 = -(e2 + e2) / (e2 + 1.0) */ -- cst1 = force_reg (XFmode, CONST1_RTX (XFmode)); -- emit_insn (gen_addxf3 (e1, e2, cst1)); -- emit_insn (gen_addxf3 (e2, e2, e2)); -- emit_insn (gen_negxf2 (e2, e2)); -- emit_insn (gen_divxf3 (e1, e2, e1)); -- -- /* e2 = log1p (e1) */ -- ix86_emit_i387_log1p (e2, e1); -- -- /* flags = signbit (op1) */ -- emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x02))); -- -- /* if (!flags) then e2 = -e2 */ -- tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, -- gen_rtx_NE (VOIDmode, flags, const0_rtx), -- gen_rtx_LABEL_REF (VOIDmode, jump_label), -- pc_rtx); -- insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); -- predict_jump (REG_BR_PROB_BASE * 50 / 100); -- JUMP_LABEL (insn) = jump_label; -- -- emit_insn (gen_negxf2 (e2, e2)); -- -- emit_label (jump_label); -- LABEL_NUSES (jump_label) = 1; -- -- /* op0 = 0.5 * e2 */ -- half = force_reg (XFmode, half); -- emit_insn (gen_mulxf3 (op0, e2, half)); --} -- --/* Output code to perform a log1p XFmode calculation. */ -- --void ix86_emit_i387_log1p (rtx op0, rtx op1) --{ -- rtx_code_label *label1 = gen_label_rtx (); -- rtx_code_label *label2 = gen_label_rtx (); -- -- rtx tmp = gen_reg_rtx (XFmode); -- rtx res = gen_reg_rtx (XFmode); -- rtx cst, cstln2, cst1; -- rtx_insn *insn; -- -- cst = const_double_from_real_value -- (REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode), XFmode); -- cstln2 = force_reg (XFmode, standard_80387_constant_rtx (4)); /* fldln2 */ -- -- emit_insn (gen_absxf2 (tmp, op1)); -- -- cst = force_reg (XFmode, cst); -- ix86_expand_branch (GE, tmp, cst, label1); -- predict_jump (REG_BR_PROB_BASE * 10 / 100); -- insn = get_last_insn (); -- JUMP_LABEL (insn) = label1; -- -- emit_insn (gen_fyl2xp1xf3_i387 (res, op1, cstln2)); -- emit_jump (label2); -- -- emit_label (label1); -- LABEL_NUSES (label1) = 1; -- -- cst1 = force_reg (XFmode, CONST1_RTX (XFmode)); -- emit_insn (gen_rtx_SET (tmp, gen_rtx_PLUS (XFmode, op1, cst1))); -- emit_insn (gen_fyl2xxf3_i387 (res, tmp, cstln2)); -- -- emit_label (label2); -- LABEL_NUSES (label2) = 1; -- -- emit_move_insn (op0, res); --} -- --/* Emit code for round calculation. */ --void ix86_emit_i387_round (rtx op0, rtx op1) --{ -- machine_mode inmode = GET_MODE (op1); -- machine_mode outmode = GET_MODE (op0); -- rtx e1 = gen_reg_rtx (XFmode); -- rtx e2 = gen_reg_rtx (XFmode); -- rtx scratch = gen_reg_rtx (HImode); -- rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG); -- rtx half = const_double_from_real_value (dconsthalf, XFmode); -- rtx res = gen_reg_rtx (outmode); -- rtx_code_label *jump_label = gen_label_rtx (); -- rtx (*floor_insn) (rtx, rtx); -- rtx (*neg_insn) (rtx, rtx); -- rtx_insn *insn; -- rtx tmp; -- -- switch (inmode) -- { -- case E_SFmode: -- case E_DFmode: -- tmp = gen_reg_rtx (XFmode); -- -- emit_insn (gen_rtx_SET (tmp, gen_rtx_FLOAT_EXTEND (XFmode, op1))); -- op1 = tmp; -- break; -- case E_XFmode: -- break; -- default: -- gcc_unreachable (); -- } -- -- switch (outmode) -- { -- case E_SFmode: -- floor_insn = gen_frndintxf2_floor; -- neg_insn = gen_negsf2; -- break; -- case E_DFmode: -- floor_insn = gen_frndintxf2_floor; -- neg_insn = gen_negdf2; -- break; -- case E_XFmode: -- floor_insn = gen_frndintxf2_floor; -- neg_insn = gen_negxf2; -- break; -- case E_HImode: -- floor_insn = gen_lfloorxfhi2; -- neg_insn = gen_neghi2; -- break; -- case E_SImode: -- floor_insn = gen_lfloorxfsi2; -- neg_insn = gen_negsi2; -- break; -- case E_DImode: -- floor_insn = gen_lfloorxfdi2; -- neg_insn = gen_negdi2; -- break; -- default: -- gcc_unreachable (); -- } -- -- /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */ -- -- /* scratch = fxam(op1) */ -- emit_insn (gen_fxamxf2_i387 (scratch, op1)); -- -- /* e1 = fabs(op1) */ -- emit_insn (gen_absxf2 (e1, op1)); -- -- /* e2 = e1 + 0.5 */ -- half = force_reg (XFmode, half); -- emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (XFmode, e1, half))); -- -- /* res = floor(e2) */ -- switch (outmode) -- { -- case E_SFmode: -- case E_DFmode: -- { -- tmp = gen_reg_rtx (XFmode); -- -- emit_insn (floor_insn (tmp, e2)); -- emit_insn (gen_rtx_SET (res, -- gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp), -- UNSPEC_TRUNC_NOOP))); -- } -- break; -- default: -- emit_insn (floor_insn (res, e2)); -- } -- -- /* flags = signbit(a) */ -- emit_insn (gen_testqi_ext_1_ccno (scratch, GEN_INT (0x02))); -- -- /* if (flags) then res = -res */ -- tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, -- gen_rtx_EQ (VOIDmode, flags, const0_rtx), -- gen_rtx_LABEL_REF (VOIDmode, jump_label), -- pc_rtx); -- insn = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); -- predict_jump (REG_BR_PROB_BASE * 50 / 100); -- JUMP_LABEL (insn) = jump_label; -- -- emit_insn (neg_insn (res, res)); -- -- emit_label (jump_label); -- LABEL_NUSES (jump_label) = 1; -- -- emit_move_insn (op0, res); --} -- --/* Output code to perform a Newton-Rhapson approximation of a single precision -- floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */ -- --void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode) --{ -- rtx x0, x1, e0, e1; -- -- x0 = gen_reg_rtx (mode); -- e0 = gen_reg_rtx (mode); -- e1 = gen_reg_rtx (mode); -- x1 = gen_reg_rtx (mode); -- -- /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */ -- -- b = force_reg (mode, b); -- -- /* x0 = rcp(b) estimate */ -- if (mode == V16SFmode || mode == V8DFmode) -- { -- if (TARGET_AVX512ER) -- { -- emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b), -- UNSPEC_RCP28))); -- /* res = a * x0 */ -- emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x0))); -- return; -- } -- else -- emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b), -- UNSPEC_RCP14))); -- } -- else -- emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b), -- UNSPEC_RCP))); -- -- /* e0 = x0 * b */ -- emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b))); -- -- /* e0 = x0 * e0 */ -- emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0))); -- -- /* e1 = x0 + x0 */ -- emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0))); -- -- /* x1 = e1 - e0 */ -- emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0))); -- -- /* res = a * x1 */ -- emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1))); --} -- --/* Output code to perform a Newton-Rhapson approximation of a -- single precision floating point [reciprocal] square root. */ -- --void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode, bool recip) --{ -- rtx x0, e0, e1, e2, e3, mthree, mhalf; -- REAL_VALUE_TYPE r; -- int unspec; -- -- x0 = gen_reg_rtx (mode); -- e0 = gen_reg_rtx (mode); -- e1 = gen_reg_rtx (mode); -- e2 = gen_reg_rtx (mode); -- e3 = gen_reg_rtx (mode); -- -- if (TARGET_AVX512ER && mode == V16SFmode) -- { -- if (recip) -- /* res = rsqrt28(a) estimate */ -- emit_insn (gen_rtx_SET (res, gen_rtx_UNSPEC (mode, gen_rtvec (1, a), -- UNSPEC_RSQRT28))); -- else -- { -- /* x0 = rsqrt28(a) estimate */ -- emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a), -- UNSPEC_RSQRT28))); -- /* res = rcp28(x0) estimate */ -- emit_insn (gen_rtx_SET (res, gen_rtx_UNSPEC (mode, gen_rtvec (1, x0), -- UNSPEC_RCP28))); -- } -- return; -- } -- -- real_from_integer (&r, VOIDmode, -3, SIGNED); -- mthree = const_double_from_real_value (r, SFmode); -- -- real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL); -- mhalf = const_double_from_real_value (r, SFmode); -- unspec = UNSPEC_RSQRT; -- -- if (VECTOR_MODE_P (mode)) -- { -- mthree = ix86_build_const_vector (mode, true, mthree); -- mhalf = ix86_build_const_vector (mode, true, mhalf); -- /* There is no 512-bit rsqrt. There is however rsqrt14. */ -- if (GET_MODE_SIZE (mode) == 64) -- unspec = UNSPEC_RSQRT14; -- } -- -- /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) -- rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */ -- -- a = force_reg (mode, a); -- -- /* x0 = rsqrt(a) estimate */ -- emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a), -- unspec))); -- -- /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */ -- if (!recip) -- { -- rtx zero = force_reg (mode, CONST0_RTX(mode)); -- rtx mask; -- -- /* Handle masked compare. */ -- if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64) -- { -- mask = gen_reg_rtx (HImode); -- /* Imm value 0x4 corresponds to not-equal comparison. */ -- emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4))); -- emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask)); -- } -- else -- { -- mask = gen_reg_rtx (mode); -- emit_insn (gen_rtx_SET (mask, gen_rtx_NE (mode, zero, a))); -- emit_insn (gen_rtx_SET (x0, gen_rtx_AND (mode, x0, mask))); -- } -- } -- -- /* e0 = x0 * a */ -- emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a))); -- /* e1 = e0 * x0 */ -- emit_insn (gen_rtx_SET (e1, gen_rtx_MULT (mode, e0, x0))); -- -- /* e2 = e1 - 3. */ -- mthree = force_reg (mode, mthree); -- emit_insn (gen_rtx_SET (e2, gen_rtx_PLUS (mode, e1, mthree))); -- -- mhalf = force_reg (mode, mhalf); -- if (recip) -- /* e3 = -.5 * x0 */ -- emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, x0, mhalf))); -- else -- /* e3 = -.5 * e0 */ -- emit_insn (gen_rtx_SET (e3, gen_rtx_MULT (mode, e0, mhalf))); -- /* ret = e2 * e3 */ -- emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, e2, e3))); --} -- --#ifdef TARGET_SOLARIS --/* Solaris implementation of TARGET_ASM_NAMED_SECTION. */ -+ case IX86_BUILTIN_PSLLD: -+ case IX86_BUILTIN_PSLLD128: -+ case IX86_BUILTIN_PSLLD128_MASK: -+ case IX86_BUILTIN_PSLLD256: -+ case IX86_BUILTIN_PSLLD256_MASK: -+ case IX86_BUILTIN_PSLLD512: -+ case IX86_BUILTIN_PSLLDI: -+ case IX86_BUILTIN_PSLLDI128: -+ case IX86_BUILTIN_PSLLDI128_MASK: -+ case IX86_BUILTIN_PSLLDI256: -+ case IX86_BUILTIN_PSLLDI256_MASK: -+ case IX86_BUILTIN_PSLLDI512: -+ case IX86_BUILTIN_PSLLQ: -+ case IX86_BUILTIN_PSLLQ128: -+ case IX86_BUILTIN_PSLLQ128_MASK: -+ case IX86_BUILTIN_PSLLQ256: -+ case IX86_BUILTIN_PSLLQ256_MASK: -+ case IX86_BUILTIN_PSLLQ512: -+ case IX86_BUILTIN_PSLLQI: -+ case IX86_BUILTIN_PSLLQI128: -+ case IX86_BUILTIN_PSLLQI128_MASK: -+ case IX86_BUILTIN_PSLLQI256: -+ case IX86_BUILTIN_PSLLQI256_MASK: -+ case IX86_BUILTIN_PSLLQI512: -+ case IX86_BUILTIN_PSLLW: -+ case IX86_BUILTIN_PSLLW128: -+ case IX86_BUILTIN_PSLLW128_MASK: -+ case IX86_BUILTIN_PSLLW256: -+ case IX86_BUILTIN_PSLLW256_MASK: -+ case IX86_BUILTIN_PSLLW512_MASK: -+ case IX86_BUILTIN_PSLLWI: -+ case IX86_BUILTIN_PSLLWI128: -+ case IX86_BUILTIN_PSLLWI128_MASK: -+ case IX86_BUILTIN_PSLLWI256: -+ case IX86_BUILTIN_PSLLWI256_MASK: -+ case IX86_BUILTIN_PSLLWI512_MASK: -+ rcode = ASHIFT; -+ is_vshift = false; -+ goto do_shift; -+ case IX86_BUILTIN_PSRAD: -+ case IX86_BUILTIN_PSRAD128: -+ case IX86_BUILTIN_PSRAD128_MASK: -+ case IX86_BUILTIN_PSRAD256: -+ case IX86_BUILTIN_PSRAD256_MASK: -+ case IX86_BUILTIN_PSRAD512: -+ case IX86_BUILTIN_PSRADI: -+ case IX86_BUILTIN_PSRADI128: -+ case IX86_BUILTIN_PSRADI128_MASK: -+ case IX86_BUILTIN_PSRADI256: -+ case IX86_BUILTIN_PSRADI256_MASK: -+ case IX86_BUILTIN_PSRADI512: -+ case IX86_BUILTIN_PSRAQ128_MASK: -+ case IX86_BUILTIN_PSRAQ256_MASK: -+ case IX86_BUILTIN_PSRAQ512: -+ case IX86_BUILTIN_PSRAQI128_MASK: -+ case IX86_BUILTIN_PSRAQI256_MASK: -+ case IX86_BUILTIN_PSRAQI512: -+ case IX86_BUILTIN_PSRAW: -+ case IX86_BUILTIN_PSRAW128: -+ case IX86_BUILTIN_PSRAW128_MASK: -+ case IX86_BUILTIN_PSRAW256: -+ case IX86_BUILTIN_PSRAW256_MASK: -+ case IX86_BUILTIN_PSRAW512: -+ case IX86_BUILTIN_PSRAWI: -+ case IX86_BUILTIN_PSRAWI128: -+ case IX86_BUILTIN_PSRAWI128_MASK: -+ case IX86_BUILTIN_PSRAWI256: -+ case IX86_BUILTIN_PSRAWI256_MASK: -+ case IX86_BUILTIN_PSRAWI512: -+ rcode = ASHIFTRT; -+ is_vshift = false; -+ goto do_shift; -+ case IX86_BUILTIN_PSRLD: -+ case IX86_BUILTIN_PSRLD128: -+ case IX86_BUILTIN_PSRLD128_MASK: -+ case IX86_BUILTIN_PSRLD256: -+ case IX86_BUILTIN_PSRLD256_MASK: -+ case IX86_BUILTIN_PSRLD512: -+ case IX86_BUILTIN_PSRLDI: -+ case IX86_BUILTIN_PSRLDI128: -+ case IX86_BUILTIN_PSRLDI128_MASK: -+ case IX86_BUILTIN_PSRLDI256: -+ case IX86_BUILTIN_PSRLDI256_MASK: -+ case IX86_BUILTIN_PSRLDI512: -+ case IX86_BUILTIN_PSRLQ: -+ case IX86_BUILTIN_PSRLQ128: -+ case IX86_BUILTIN_PSRLQ128_MASK: -+ case IX86_BUILTIN_PSRLQ256: -+ case IX86_BUILTIN_PSRLQ256_MASK: -+ case IX86_BUILTIN_PSRLQ512: -+ case IX86_BUILTIN_PSRLQI: -+ case IX86_BUILTIN_PSRLQI128: -+ case IX86_BUILTIN_PSRLQI128_MASK: -+ case IX86_BUILTIN_PSRLQI256: -+ case IX86_BUILTIN_PSRLQI256_MASK: -+ case IX86_BUILTIN_PSRLQI512: -+ case IX86_BUILTIN_PSRLW: -+ case IX86_BUILTIN_PSRLW128: -+ case IX86_BUILTIN_PSRLW128_MASK: -+ case IX86_BUILTIN_PSRLW256: -+ case IX86_BUILTIN_PSRLW256_MASK: -+ case IX86_BUILTIN_PSRLW512: -+ case IX86_BUILTIN_PSRLWI: -+ case IX86_BUILTIN_PSRLWI128: -+ case IX86_BUILTIN_PSRLWI128_MASK: -+ case IX86_BUILTIN_PSRLWI256: -+ case IX86_BUILTIN_PSRLWI256_MASK: -+ case IX86_BUILTIN_PSRLWI512: -+ rcode = LSHIFTRT; -+ is_vshift = false; -+ goto do_shift; -+ case IX86_BUILTIN_PSLLVV16HI: -+ case IX86_BUILTIN_PSLLVV16SI: -+ case IX86_BUILTIN_PSLLVV2DI: -+ case IX86_BUILTIN_PSLLVV2DI_MASK: -+ case IX86_BUILTIN_PSLLVV32HI: -+ case IX86_BUILTIN_PSLLVV4DI: -+ case IX86_BUILTIN_PSLLVV4DI_MASK: -+ case IX86_BUILTIN_PSLLVV4SI: -+ case IX86_BUILTIN_PSLLVV4SI_MASK: -+ case IX86_BUILTIN_PSLLVV8DI: -+ case IX86_BUILTIN_PSLLVV8HI: -+ case IX86_BUILTIN_PSLLVV8SI: -+ case IX86_BUILTIN_PSLLVV8SI_MASK: -+ rcode = ASHIFT; -+ is_vshift = true; -+ goto do_shift; -+ case IX86_BUILTIN_PSRAVQ128: -+ case IX86_BUILTIN_PSRAVQ256: -+ case IX86_BUILTIN_PSRAVV16HI: -+ case IX86_BUILTIN_PSRAVV16SI: -+ case IX86_BUILTIN_PSRAVV32HI: -+ case IX86_BUILTIN_PSRAVV4SI: -+ case IX86_BUILTIN_PSRAVV4SI_MASK: -+ case IX86_BUILTIN_PSRAVV8DI: -+ case IX86_BUILTIN_PSRAVV8HI: -+ case IX86_BUILTIN_PSRAVV8SI: -+ case IX86_BUILTIN_PSRAVV8SI_MASK: -+ rcode = ASHIFTRT; -+ is_vshift = true; -+ goto do_shift; -+ case IX86_BUILTIN_PSRLVV16HI: -+ case IX86_BUILTIN_PSRLVV16SI: -+ case IX86_BUILTIN_PSRLVV2DI: -+ case IX86_BUILTIN_PSRLVV2DI_MASK: -+ case IX86_BUILTIN_PSRLVV32HI: -+ case IX86_BUILTIN_PSRLVV4DI: -+ case IX86_BUILTIN_PSRLVV4DI_MASK: -+ case IX86_BUILTIN_PSRLVV4SI: -+ case IX86_BUILTIN_PSRLVV4SI_MASK: -+ case IX86_BUILTIN_PSRLVV8DI: -+ case IX86_BUILTIN_PSRLVV8HI: -+ case IX86_BUILTIN_PSRLVV8SI: -+ case IX86_BUILTIN_PSRLVV8SI_MASK: -+ rcode = LSHIFTRT; -+ is_vshift = true; -+ goto do_shift; - --static void --i386_solaris_elf_named_section (const char *name, unsigned int flags, -- tree decl) --{ -- /* With Binutils 2.15, the "@unwind" marker must be specified on -- every occurrence of the ".eh_frame" section, not just the first -- one. */ -- if (TARGET_64BIT -- && strcmp (name, ".eh_frame") == 0) -- { -- fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name, -- flags & SECTION_WRITE ? "aw" : "a"); -- return; -- } -+ do_shift: -+ gcc_assert (n_args >= 2); -+ if (TREE_CODE (args[0]) != VECTOR_CST) -+ break; -+ mask = HOST_WIDE_INT_M1U; -+ if (n_args > 2) -+ { -+ /* This is masked shift. */ -+ if (!tree_fits_uhwi_p (args[n_args - 1]) -+ || TREE_SIDE_EFFECTS (args[n_args - 2])) -+ break; -+ mask = tree_to_uhwi (args[n_args - 1]); -+ unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])); -+ mask |= HOST_WIDE_INT_M1U << elems; -+ if (mask != HOST_WIDE_INT_M1U -+ && TREE_CODE (args[n_args - 2]) != VECTOR_CST) -+ break; -+ if (mask == (HOST_WIDE_INT_M1U << elems)) -+ return args[n_args - 2]; -+ } -+ if (is_vshift && TREE_CODE (args[1]) != VECTOR_CST) -+ break; -+ if (tree tem = (is_vshift ? integer_one_node -+ : ix86_vector_shift_count (args[1]))) -+ { -+ unsigned HOST_WIDE_INT count = tree_to_uhwi (tem); -+ unsigned HOST_WIDE_INT prec -+ = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args[0]))); -+ if (count == 0 && mask == HOST_WIDE_INT_M1U) -+ return args[0]; -+ if (count >= prec) -+ { -+ if (rcode == ASHIFTRT) -+ count = prec - 1; -+ else if (mask == HOST_WIDE_INT_M1U) -+ return build_zero_cst (TREE_TYPE (args[0])); -+ } -+ tree countt = NULL_TREE; -+ if (!is_vshift) -+ { -+ if (count >= prec) -+ countt = integer_zero_node; -+ else -+ countt = build_int_cst (integer_type_node, count); -+ } -+ tree_vector_builder builder; -+ if (mask != HOST_WIDE_INT_M1U || is_vshift) -+ builder.new_vector (TREE_TYPE (args[0]), -+ TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])), -+ 1); -+ else -+ builder.new_unary_operation (TREE_TYPE (args[0]), args[0], -+ false); -+ unsigned int cnt = builder.encoded_nelts (); -+ for (unsigned int i = 0; i < cnt; ++i) -+ { -+ tree elt = VECTOR_CST_ELT (args[0], i); -+ if (TREE_CODE (elt) != INTEGER_CST || TREE_OVERFLOW (elt)) -+ return NULL_TREE; -+ tree type = TREE_TYPE (elt); -+ if (rcode == LSHIFTRT) -+ elt = fold_convert (unsigned_type_for (type), elt); -+ if (is_vshift) -+ { -+ countt = VECTOR_CST_ELT (args[1], i); -+ if (TREE_CODE (countt) != INTEGER_CST -+ || TREE_OVERFLOW (countt)) -+ return NULL_TREE; -+ if (wi::neg_p (wi::to_wide (countt)) -+ || wi::to_widest (countt) >= prec) -+ { -+ if (rcode == ASHIFTRT) -+ countt = build_int_cst (TREE_TYPE (countt), -+ prec - 1); -+ else -+ { -+ elt = build_zero_cst (TREE_TYPE (elt)); -+ countt = build_zero_cst (TREE_TYPE (countt)); -+ } -+ } -+ } -+ else if (count >= prec) -+ elt = build_zero_cst (TREE_TYPE (elt)); -+ elt = const_binop (rcode == ASHIFT -+ ? LSHIFT_EXPR : RSHIFT_EXPR, -+ TREE_TYPE (elt), elt, countt); -+ if (!elt || TREE_CODE (elt) != INTEGER_CST) -+ return NULL_TREE; -+ if (rcode == LSHIFTRT) -+ elt = fold_convert (type, elt); -+ if ((mask & (HOST_WIDE_INT_1U << i)) == 0) -+ { -+ elt = VECTOR_CST_ELT (args[n_args - 2], i); -+ if (TREE_CODE (elt) != INTEGER_CST -+ || TREE_OVERFLOW (elt)) -+ return NULL_TREE; -+ } -+ builder.quick_push (elt); -+ } -+ return builder.build (); -+ } -+ break; - --#ifndef USE_GAS -- if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE) -- { -- solaris_elf_asm_comdat_section (name, flags, decl); -- return; -+ default: -+ break; -+ } - } - -- /* Solaris/x86 as uses the same syntax for the SHF_EXCLUDE flags as the -- SPARC assembler. One cannot mix single-letter flags and #exclude, so -- only emit the latter here. */ -- if (flags & SECTION_EXCLUDE) -- { -- fprintf (asm_out_file, "\t.section\t%s,#exclude\n", name); -- return; -- } -+#ifdef SUBTARGET_FOLD_BUILTIN -+ return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore); - #endif - -- default_elf_asm_named_section (name, flags, decl); -+ return NULL_TREE; - } --#endif /* TARGET_SOLARIS */ - --/* Return the mangling of TYPE if it is an extended fundamental type. */ -+/* Fold a MD builtin (use ix86_fold_builtin for folding into -+ constant) in GIMPLE. */ - --static const char * --ix86_mangle_type (const_tree type) -+bool -+ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) - { -- type = TYPE_MAIN_VARIANT (type); -- -- if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE -- && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE) -- return NULL; -+ gimple *stmt = gsi_stmt (*gsi); -+ tree fndecl = gimple_call_fndecl (stmt); -+ gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD)); -+ int n_args = gimple_call_num_args (stmt); -+ enum ix86_builtins fn_code -+ = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl); -+ tree decl = NULL_TREE; -+ tree arg0, arg1, arg2; -+ enum rtx_code rcode; -+ unsigned HOST_WIDE_INT count; -+ bool is_vshift; - -- switch (TYPE_MODE (type)) -+ switch (fn_code) - { -- case E_TFmode: -- /* __float128 is "g". */ -- return "g"; -- case E_XFmode: -- /* "long double" or __float80 is "e". */ -- return "e"; -- default: -- return NULL; -- } --} -+ case IX86_BUILTIN_TZCNT32: -+ decl = builtin_decl_implicit (BUILT_IN_CTZ); -+ goto fold_tzcnt_lzcnt; - --static GTY(()) tree ix86_tls_stack_chk_guard_decl; -+ case IX86_BUILTIN_TZCNT64: -+ decl = builtin_decl_implicit (BUILT_IN_CTZLL); -+ goto fold_tzcnt_lzcnt; - --static tree --ix86_stack_protect_guard (void) --{ -- if (TARGET_SSP_TLS_GUARD) -- { -- tree type_node = lang_hooks.types.type_for_mode (ptr_mode, 1); -- int qual = ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg); -- tree type = build_qualified_type (type_node, qual); -- tree t; -+ case IX86_BUILTIN_LZCNT32: -+ decl = builtin_decl_implicit (BUILT_IN_CLZ); -+ goto fold_tzcnt_lzcnt; -+ -+ case IX86_BUILTIN_LZCNT64: -+ decl = builtin_decl_implicit (BUILT_IN_CLZLL); -+ goto fold_tzcnt_lzcnt; -+ -+ fold_tzcnt_lzcnt: -+ gcc_assert (n_args == 1); -+ arg0 = gimple_call_arg (stmt, 0); -+ if (TREE_CODE (arg0) == SSA_NAME && decl && gimple_call_lhs (stmt)) -+ { -+ int prec = TYPE_PRECISION (TREE_TYPE (arg0)); -+ /* If arg0 is provably non-zero, optimize into generic -+ __builtin_c[tl]z{,ll} function the middle-end handles -+ better. */ -+ if (!expr_not_equal_to (arg0, wi::zero (prec))) -+ return false; -+ -+ location_t loc = gimple_location (stmt); -+ gimple *g = gimple_build_call (decl, 1, arg0); -+ gimple_set_location (g, loc); -+ tree lhs = make_ssa_name (integer_type_node); -+ gimple_call_set_lhs (g, lhs); -+ gsi_insert_before (gsi, g, GSI_SAME_STMT); -+ g = gimple_build_assign (gimple_call_lhs (stmt), NOP_EXPR, lhs); -+ gimple_set_location (g, loc); -+ gsi_replace (gsi, g, false); -+ return true; -+ } -+ break; -+ -+ case IX86_BUILTIN_BZHI32: -+ case IX86_BUILTIN_BZHI64: -+ gcc_assert (n_args == 2); -+ arg1 = gimple_call_arg (stmt, 1); -+ if (tree_fits_uhwi_p (arg1) && gimple_call_lhs (stmt)) -+ { -+ unsigned int idx = tree_to_uhwi (arg1) & 0xff; -+ arg0 = gimple_call_arg (stmt, 0); -+ if (idx < TYPE_PRECISION (TREE_TYPE (arg0))) -+ break; -+ location_t loc = gimple_location (stmt); -+ gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0); -+ gimple_set_location (g, loc); -+ gsi_replace (gsi, g, false); -+ return true; -+ } -+ break; -+ -+ case IX86_BUILTIN_PDEP32: -+ case IX86_BUILTIN_PDEP64: -+ case IX86_BUILTIN_PEXT32: -+ case IX86_BUILTIN_PEXT64: -+ gcc_assert (n_args == 2); -+ arg1 = gimple_call_arg (stmt, 1); -+ if (integer_all_onesp (arg1) && gimple_call_lhs (stmt)) -+ { -+ location_t loc = gimple_location (stmt); -+ arg0 = gimple_call_arg (stmt, 0); -+ gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0); -+ gimple_set_location (g, loc); -+ gsi_replace (gsi, g, false); -+ return true; -+ } -+ break; -+ -+ case IX86_BUILTIN_PSLLD: -+ case IX86_BUILTIN_PSLLD128: -+ case IX86_BUILTIN_PSLLD128_MASK: -+ case IX86_BUILTIN_PSLLD256: -+ case IX86_BUILTIN_PSLLD256_MASK: -+ case IX86_BUILTIN_PSLLD512: -+ case IX86_BUILTIN_PSLLDI: -+ case IX86_BUILTIN_PSLLDI128: -+ case IX86_BUILTIN_PSLLDI128_MASK: -+ case IX86_BUILTIN_PSLLDI256: -+ case IX86_BUILTIN_PSLLDI256_MASK: -+ case IX86_BUILTIN_PSLLDI512: -+ case IX86_BUILTIN_PSLLQ: -+ case IX86_BUILTIN_PSLLQ128: -+ case IX86_BUILTIN_PSLLQ128_MASK: -+ case IX86_BUILTIN_PSLLQ256: -+ case IX86_BUILTIN_PSLLQ256_MASK: -+ case IX86_BUILTIN_PSLLQ512: -+ case IX86_BUILTIN_PSLLQI: -+ case IX86_BUILTIN_PSLLQI128: -+ case IX86_BUILTIN_PSLLQI128_MASK: -+ case IX86_BUILTIN_PSLLQI256: -+ case IX86_BUILTIN_PSLLQI256_MASK: -+ case IX86_BUILTIN_PSLLQI512: -+ case IX86_BUILTIN_PSLLW: -+ case IX86_BUILTIN_PSLLW128: -+ case IX86_BUILTIN_PSLLW128_MASK: -+ case IX86_BUILTIN_PSLLW256: -+ case IX86_BUILTIN_PSLLW256_MASK: -+ case IX86_BUILTIN_PSLLW512_MASK: -+ case IX86_BUILTIN_PSLLWI: -+ case IX86_BUILTIN_PSLLWI128: -+ case IX86_BUILTIN_PSLLWI128_MASK: -+ case IX86_BUILTIN_PSLLWI256: -+ case IX86_BUILTIN_PSLLWI256_MASK: -+ case IX86_BUILTIN_PSLLWI512_MASK: -+ rcode = ASHIFT; -+ is_vshift = false; -+ goto do_shift; -+ case IX86_BUILTIN_PSRAD: -+ case IX86_BUILTIN_PSRAD128: -+ case IX86_BUILTIN_PSRAD128_MASK: -+ case IX86_BUILTIN_PSRAD256: -+ case IX86_BUILTIN_PSRAD256_MASK: -+ case IX86_BUILTIN_PSRAD512: -+ case IX86_BUILTIN_PSRADI: -+ case IX86_BUILTIN_PSRADI128: -+ case IX86_BUILTIN_PSRADI128_MASK: -+ case IX86_BUILTIN_PSRADI256: -+ case IX86_BUILTIN_PSRADI256_MASK: -+ case IX86_BUILTIN_PSRADI512: -+ case IX86_BUILTIN_PSRAQ128_MASK: -+ case IX86_BUILTIN_PSRAQ256_MASK: -+ case IX86_BUILTIN_PSRAQ512: -+ case IX86_BUILTIN_PSRAQI128_MASK: -+ case IX86_BUILTIN_PSRAQI256_MASK: -+ case IX86_BUILTIN_PSRAQI512: -+ case IX86_BUILTIN_PSRAW: -+ case IX86_BUILTIN_PSRAW128: -+ case IX86_BUILTIN_PSRAW128_MASK: -+ case IX86_BUILTIN_PSRAW256: -+ case IX86_BUILTIN_PSRAW256_MASK: -+ case IX86_BUILTIN_PSRAW512: -+ case IX86_BUILTIN_PSRAWI: -+ case IX86_BUILTIN_PSRAWI128: -+ case IX86_BUILTIN_PSRAWI128_MASK: -+ case IX86_BUILTIN_PSRAWI256: -+ case IX86_BUILTIN_PSRAWI256_MASK: -+ case IX86_BUILTIN_PSRAWI512: -+ rcode = ASHIFTRT; -+ is_vshift = false; -+ goto do_shift; -+ case IX86_BUILTIN_PSRLD: -+ case IX86_BUILTIN_PSRLD128: -+ case IX86_BUILTIN_PSRLD128_MASK: -+ case IX86_BUILTIN_PSRLD256: -+ case IX86_BUILTIN_PSRLD256_MASK: -+ case IX86_BUILTIN_PSRLD512: -+ case IX86_BUILTIN_PSRLDI: -+ case IX86_BUILTIN_PSRLDI128: -+ case IX86_BUILTIN_PSRLDI128_MASK: -+ case IX86_BUILTIN_PSRLDI256: -+ case IX86_BUILTIN_PSRLDI256_MASK: -+ case IX86_BUILTIN_PSRLDI512: -+ case IX86_BUILTIN_PSRLQ: -+ case IX86_BUILTIN_PSRLQ128: -+ case IX86_BUILTIN_PSRLQ128_MASK: -+ case IX86_BUILTIN_PSRLQ256: -+ case IX86_BUILTIN_PSRLQ256_MASK: -+ case IX86_BUILTIN_PSRLQ512: -+ case IX86_BUILTIN_PSRLQI: -+ case IX86_BUILTIN_PSRLQI128: -+ case IX86_BUILTIN_PSRLQI128_MASK: -+ case IX86_BUILTIN_PSRLQI256: -+ case IX86_BUILTIN_PSRLQI256_MASK: -+ case IX86_BUILTIN_PSRLQI512: -+ case IX86_BUILTIN_PSRLW: -+ case IX86_BUILTIN_PSRLW128: -+ case IX86_BUILTIN_PSRLW128_MASK: -+ case IX86_BUILTIN_PSRLW256: -+ case IX86_BUILTIN_PSRLW256_MASK: -+ case IX86_BUILTIN_PSRLW512: -+ case IX86_BUILTIN_PSRLWI: -+ case IX86_BUILTIN_PSRLWI128: -+ case IX86_BUILTIN_PSRLWI128_MASK: -+ case IX86_BUILTIN_PSRLWI256: -+ case IX86_BUILTIN_PSRLWI256_MASK: -+ case IX86_BUILTIN_PSRLWI512: -+ rcode = LSHIFTRT; -+ is_vshift = false; -+ goto do_shift; -+ case IX86_BUILTIN_PSLLVV16HI: -+ case IX86_BUILTIN_PSLLVV16SI: -+ case IX86_BUILTIN_PSLLVV2DI: -+ case IX86_BUILTIN_PSLLVV2DI_MASK: -+ case IX86_BUILTIN_PSLLVV32HI: -+ case IX86_BUILTIN_PSLLVV4DI: -+ case IX86_BUILTIN_PSLLVV4DI_MASK: -+ case IX86_BUILTIN_PSLLVV4SI: -+ case IX86_BUILTIN_PSLLVV4SI_MASK: -+ case IX86_BUILTIN_PSLLVV8DI: -+ case IX86_BUILTIN_PSLLVV8HI: -+ case IX86_BUILTIN_PSLLVV8SI: -+ case IX86_BUILTIN_PSLLVV8SI_MASK: -+ rcode = ASHIFT; -+ is_vshift = true; -+ goto do_shift; -+ case IX86_BUILTIN_PSRAVQ128: -+ case IX86_BUILTIN_PSRAVQ256: -+ case IX86_BUILTIN_PSRAVV16HI: -+ case IX86_BUILTIN_PSRAVV16SI: -+ case IX86_BUILTIN_PSRAVV32HI: -+ case IX86_BUILTIN_PSRAVV4SI: -+ case IX86_BUILTIN_PSRAVV4SI_MASK: -+ case IX86_BUILTIN_PSRAVV8DI: -+ case IX86_BUILTIN_PSRAVV8HI: -+ case IX86_BUILTIN_PSRAVV8SI: -+ case IX86_BUILTIN_PSRAVV8SI_MASK: -+ rcode = ASHIFTRT; -+ is_vshift = true; -+ goto do_shift; -+ case IX86_BUILTIN_PSRLVV16HI: -+ case IX86_BUILTIN_PSRLVV16SI: -+ case IX86_BUILTIN_PSRLVV2DI: -+ case IX86_BUILTIN_PSRLVV2DI_MASK: -+ case IX86_BUILTIN_PSRLVV32HI: -+ case IX86_BUILTIN_PSRLVV4DI: -+ case IX86_BUILTIN_PSRLVV4DI_MASK: -+ case IX86_BUILTIN_PSRLVV4SI: -+ case IX86_BUILTIN_PSRLVV4SI_MASK: -+ case IX86_BUILTIN_PSRLVV8DI: -+ case IX86_BUILTIN_PSRLVV8HI: -+ case IX86_BUILTIN_PSRLVV8SI: -+ case IX86_BUILTIN_PSRLVV8SI_MASK: -+ rcode = LSHIFTRT; -+ is_vshift = true; -+ goto do_shift; - -- if (global_options_set.x_ix86_stack_protector_guard_symbol_str) -+ do_shift: -+ gcc_assert (n_args >= 2); -+ arg0 = gimple_call_arg (stmt, 0); -+ arg1 = gimple_call_arg (stmt, 1); -+ if (n_args > 2) - { -- t = ix86_tls_stack_chk_guard_decl; -- -- if (t == NULL) -- { -- rtx x; -- -- t = build_decl -- (UNKNOWN_LOCATION, VAR_DECL, -- get_identifier (ix86_stack_protector_guard_symbol_str), -- type); -- TREE_STATIC (t) = 1; -- TREE_PUBLIC (t) = 1; -- DECL_EXTERNAL (t) = 1; -- TREE_USED (t) = 1; -- TREE_THIS_VOLATILE (t) = 1; -- DECL_ARTIFICIAL (t) = 1; -- DECL_IGNORED_P (t) = 1; -- -- /* Do not share RTL as the declaration is visible outside of -- current function. */ -- x = DECL_RTL (t); -- RTX_FLAG (x, used) = 1; -- -- ix86_tls_stack_chk_guard_decl = t; -- } -+ /* This is masked shift. Only optimize if the mask is all ones. */ -+ tree argl = gimple_call_arg (stmt, n_args - 1); -+ if (!tree_fits_uhwi_p (argl)) -+ break; -+ unsigned HOST_WIDE_INT mask = tree_to_uhwi (argl); -+ unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); -+ if ((mask | (HOST_WIDE_INT_M1U << elems)) != HOST_WIDE_INT_M1U) -+ break; - } -- else -+ if (is_vshift) - { -- tree asptrtype = build_pointer_type (type); -- -- t = build_int_cst (asptrtype, ix86_stack_protector_guard_offset); -- t = build2 (MEM_REF, asptrtype, t, -- build_int_cst (asptrtype, 0)); -- TREE_THIS_VOLATILE (t) = 1; -+ if (TREE_CODE (arg1) != VECTOR_CST) -+ break; -+ count = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0))); -+ if (integer_zerop (arg1)) -+ count = 0; -+ else if (rcode == ASHIFTRT) -+ break; -+ else -+ for (unsigned int i = 0; i < VECTOR_CST_NELTS (arg1); ++i) -+ { -+ tree elt = VECTOR_CST_ELT (arg1, i); -+ if (!wi::neg_p (wi::to_wide (elt)) -+ && wi::to_widest (elt) < count) -+ return false; -+ } - } -- -- return t; -- } -- -- return default_stack_protect_guard (); --} -- --/* For 32-bit code we can save PIC register setup by using -- __stack_chk_fail_local hidden function instead of calling -- __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC -- register, so it is better to call __stack_chk_fail directly. */ -- --static tree ATTRIBUTE_UNUSED --ix86_stack_protect_fail (void) --{ -- return TARGET_64BIT -- ? default_external_stack_protect_fail () -- : default_hidden_stack_protect_fail (); --} -- --/* Select a format to encode pointers in exception handling data. CODE -- is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is -- true if the symbol may be affected by dynamic relocations. -- -- ??? All x86 object file formats are capable of representing this. -- After all, the relocation needed is the same as for the call insn. -- Whether or not a particular assembler allows us to enter such, I -- guess we'll have to see. */ --int --asm_preferred_eh_data_format (int code, int global) --{ -- if (flag_pic) -- { -- int type = DW_EH_PE_sdata8; -- if (!TARGET_64BIT -- || ix86_cmodel == CM_SMALL_PIC -- || (ix86_cmodel == CM_MEDIUM_PIC && (global || code))) -- type = DW_EH_PE_sdata4; -- return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type; -- } -- if (ix86_cmodel == CM_SMALL -- || (ix86_cmodel == CM_MEDIUM && code)) -- return DW_EH_PE_udata4; -- return DW_EH_PE_absptr; --} -- --/* Expand copysign from SIGN to the positive value ABS_VALUE -- storing in RESULT. If MASK is non-null, it shall be a mask to mask out -- the sign-bit. */ --static void --ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask) --{ -- machine_mode mode = GET_MODE (sign); -- rtx sgn = gen_reg_rtx (mode); -- if (mask == NULL_RTX) -- { -- machine_mode vmode; -- -- if (mode == SFmode) -- vmode = V4SFmode; -- else if (mode == DFmode) -- vmode = V2DFmode; - else -- vmode = mode; -- -- mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false); -- if (!VECTOR_MODE_P (mode)) - { -- /* We need to generate a scalar mode mask in this case. */ -- rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx)); -- tmp = gen_rtx_VEC_SELECT (mode, mask, tmp); -- mask = gen_reg_rtx (mode); -- emit_insn (gen_rtx_SET (mask, tmp)); -+ arg1 = ix86_vector_shift_count (arg1); -+ if (!arg1) -+ break; -+ count = tree_to_uhwi (arg1); - } -- } -- else -- mask = gen_rtx_NOT (mode, mask); -- emit_insn (gen_rtx_SET (sgn, gen_rtx_AND (mode, mask, sign))); -- emit_insn (gen_rtx_SET (result, gen_rtx_IOR (mode, abs_value, sgn))); --} -+ if (count == 0) -+ { -+ /* Just return the first argument for shift by 0. */ -+ location_t loc = gimple_location (stmt); -+ gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0); -+ gimple_set_location (g, loc); -+ gsi_replace (gsi, g, false); -+ return true; -+ } -+ if (rcode != ASHIFTRT -+ && count >= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0)))) -+ { -+ /* For shift counts equal or greater than precision, except for -+ arithmetic right shift the result is zero. */ -+ location_t loc = gimple_location (stmt); -+ gimple *g = gimple_build_assign (gimple_call_lhs (stmt), -+ build_zero_cst (TREE_TYPE (arg0))); -+ gimple_set_location (g, loc); -+ gsi_replace (gsi, g, false); -+ return true; -+ } -+ break; - --/* Expand fabs (OP0) and return a new rtx that holds the result. The -- mask for masking out the sign-bit is stored in *SMASK, if that is -- non-null. */ --static rtx --ix86_expand_sse_fabs (rtx op0, rtx *smask) --{ -- machine_mode vmode, mode = GET_MODE (op0); -- rtx xa, mask; -+ case IX86_BUILTIN_SHUFPD: -+ arg2 = gimple_call_arg (stmt, 2); -+ if (TREE_CODE (arg2) == INTEGER_CST) -+ { -+ location_t loc = gimple_location (stmt); -+ unsigned HOST_WIDE_INT imask = TREE_INT_CST_LOW (arg2); -+ arg0 = gimple_call_arg (stmt, 0); -+ arg1 = gimple_call_arg (stmt, 1); -+ tree itype = long_long_integer_type_node; -+ tree vtype = build_vector_type (itype, 2); /* V2DI */ -+ tree_vector_builder elts (vtype, 2, 1); -+ /* Ignore bits other than the lowest 2. */ -+ elts.quick_push (build_int_cst (itype, imask & 1)); -+ imask >>= 1; -+ elts.quick_push (build_int_cst (itype, 2 + (imask & 1))); -+ tree omask = elts.build (); -+ gimple *g = gimple_build_assign (gimple_call_lhs (stmt), -+ VEC_PERM_EXPR, -+ arg0, arg1, omask); -+ gimple_set_location (g, loc); -+ gsi_replace (gsi, g, false); -+ return true; -+ } -+ // Do not error yet, the constant could be propagated later? -+ break; - -- xa = gen_reg_rtx (mode); -- if (mode == SFmode) -- vmode = V4SFmode; -- else if (mode == DFmode) -- vmode = V2DFmode; -- else -- vmode = mode; -- mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true); -- if (!VECTOR_MODE_P (mode)) -- { -- /* We need to generate a scalar mode mask in this case. */ -- rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx)); -- tmp = gen_rtx_VEC_SELECT (mode, mask, tmp); -- mask = gen_reg_rtx (mode); -- emit_insn (gen_rtx_SET (mask, tmp)); -+ default: -+ break; - } -- emit_insn (gen_rtx_SET (xa, gen_rtx_AND (mode, op0, mask))); -- -- if (smask) -- *smask = mask; -- -- return xa; --} -- --/* Expands a comparison of OP0 with OP1 using comparison code CODE, -- swapping the operands if SWAP_OPERANDS is true. The expanded -- code is a forward jump to a newly created label in case the -- comparison is true. The generated label rtx is returned. */ --static rtx_code_label * --ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1, -- bool swap_operands) --{ -- bool unordered_compare = ix86_unordered_fp_compare (code); -- rtx_code_label *label; -- rtx tmp, reg; -- -- if (swap_operands) -- std::swap (op0, op1); -- -- label = gen_label_rtx (); -- tmp = gen_rtx_COMPARE (CCFPmode, op0, op1); -- if (unordered_compare) -- tmp = gen_rtx_UNSPEC (CCFPmode, gen_rtvec (1, tmp), UNSPEC_NOTRAP); -- reg = gen_rtx_REG (CCFPmode, FLAGS_REG); -- emit_insn (gen_rtx_SET (reg, tmp)); -- tmp = gen_rtx_fmt_ee (code, VOIDmode, reg, const0_rtx); -- tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, -- gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx); -- tmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); -- JUMP_LABEL (tmp) = label; -- -- return label; --} -- --/* Expand a mask generating SSE comparison instruction comparing OP0 with OP1 -- using comparison code CODE. Operands are swapped for the comparison if -- SWAP_OPERANDS is true. Returns a rtx for the generated mask. */ --static rtx --ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1, -- bool swap_operands) --{ -- rtx (*insn)(rtx, rtx, rtx, rtx); -- machine_mode mode = GET_MODE (op0); -- rtx mask = gen_reg_rtx (mode); -- -- if (swap_operands) -- std::swap (op0, op1); -- -- insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse; -- -- emit_insn (insn (mask, op0, op1, -- gen_rtx_fmt_ee (code, mode, op0, op1))); -- return mask; --} -- --/* Generate and return a rtx of mode MODE for 2**n where n is the number -- of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */ --static rtx --ix86_gen_TWO52 (machine_mode mode) --{ -- REAL_VALUE_TYPE TWO52r; -- rtx TWO52; -- -- real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23); -- TWO52 = const_double_from_real_value (TWO52r, mode); -- TWO52 = force_reg (mode, TWO52); -- -- return TWO52; --} -- --/* Expand SSE sequence for computing lround from OP1 storing -- into OP0. */ --void --ix86_expand_lround (rtx op0, rtx op1) --{ -- /* C code for the stuff we're doing below: -- tmp = op1 + copysign (nextafter (0.5, 0.0), op1) -- return (long)tmp; -- */ -- machine_mode mode = GET_MODE (op1); -- const struct real_format *fmt; -- REAL_VALUE_TYPE pred_half, half_minus_pred_half; -- rtx adj; -- -- /* load nextafter (0.5, 0.0) */ -- fmt = REAL_MODE_FORMAT (mode); -- real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode); -- real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half); - -- /* adj = copysign (0.5, op1) */ -- adj = force_reg (mode, const_double_from_real_value (pred_half, mode)); -- ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX); -- -- /* adj = op1 + adj */ -- adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT); -- -- /* op0 = (imode)adj */ -- expand_fix (op0, adj, 0); --} -- --/* Expand SSE2 sequence for computing lround from OPERAND1 storing -- into OPERAND0. */ --void --ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor) --{ -- /* C code for the stuff we're doing below (for do_floor): -- xi = (long)op1; -- xi -= (double)xi > op1 ? 1 : 0; -- return xi; -- */ -- machine_mode fmode = GET_MODE (op1); -- machine_mode imode = GET_MODE (op0); -- rtx ireg, freg, tmp; -- rtx_code_label *label; -- -- /* reg = (long)op1 */ -- ireg = gen_reg_rtx (imode); -- expand_fix (ireg, op1, 0); -- -- /* freg = (double)reg */ -- freg = gen_reg_rtx (fmode); -- expand_float (freg, ireg, 0); -- -- /* ireg = (freg > op1) ? ireg - 1 : ireg */ -- label = ix86_expand_sse_compare_and_jump (UNLE, -- freg, op1, !do_floor); -- tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS, -- ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT); -- emit_move_insn (ireg, tmp); -- -- emit_label (label); -- LABEL_NUSES (label) = 1; -- -- emit_move_insn (op0, ireg); -+ return false; - } - --/* Expand rint rounding OPERAND1 and storing the result in OPERAND0. */ --void --ix86_expand_rint (rtx operand0, rtx operand1) --{ -- /* C code for the stuff we're doing below: -- xa = fabs (operand1); -- if (!isless (xa, 2**52)) -- return operand1; -- two52 = 2**52; -- if (flag_rounding_math) -- { -- two52 = copysign (two52, operand1); -- xa = operand1; -- } -- xa = xa + two52 - two52; -- return copysign (xa, operand1); -- */ -- machine_mode mode = GET_MODE (operand0); -- rtx res, xa, TWO52, two52, mask; -- rtx_code_label *label; -- -- res = gen_reg_rtx (mode); -- emit_move_insn (res, operand1); -- -- /* xa = abs (operand1) */ -- xa = ix86_expand_sse_fabs (res, &mask); -- -- /* if (!isless (xa, TWO52)) goto label; */ -- TWO52 = ix86_gen_TWO52 (mode); -- label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); -- -- two52 = TWO52; -- if (flag_rounding_math) -- { -- two52 = gen_reg_rtx (mode); -- ix86_sse_copysign_to_positive (two52, TWO52, res, mask); -- xa = res; -- } -- -- xa = expand_simple_binop (mode, PLUS, xa, two52, NULL_RTX, 0, OPTAB_DIRECT); -- xa = expand_simple_binop (mode, MINUS, xa, two52, xa, 0, OPTAB_DIRECT); -- -- ix86_sse_copysign_to_positive (res, xa, res, mask); -- -- emit_label (label); -- LABEL_NUSES (label) = 1; -+/* Handler for an SVML-style interface to -+ a library with vectorized intrinsics. */ - -- emit_move_insn (operand0, res); --} -+tree -+ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in) -+{ -+ char name[20]; -+ tree fntype, new_fndecl, args; -+ unsigned arity; -+ const char *bname; -+ machine_mode el_mode, in_mode; -+ int n, in_n; - --/* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing -- into OPERAND0. */ --void --ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor) --{ -- /* C code for the stuff we expand below. -- double xa = fabs (x), x2; -- if (!isless (xa, TWO52)) -- return x; -- xa = xa + TWO52 - TWO52; -- x2 = copysign (xa, x); -- Compensate. Floor: -- if (x2 > x) -- x2 -= 1; -- Compensate. Ceil: -- if (x2 < x) -- x2 += 1; -- if (HONOR_SIGNED_ZEROS (mode)) -- x2 = copysign (x2, x); -- return x2; -- */ -- machine_mode mode = GET_MODE (operand0); -- rtx xa, TWO52, tmp, one, res, mask; -- rtx_code_label *label; -+ /* The SVML is suitable for unsafe math only. */ -+ if (!flag_unsafe_math_optimizations) -+ return NULL_TREE; - -- TWO52 = ix86_gen_TWO52 (mode); -+ el_mode = TYPE_MODE (TREE_TYPE (type_out)); -+ n = TYPE_VECTOR_SUBPARTS (type_out); -+ in_mode = TYPE_MODE (TREE_TYPE (type_in)); -+ in_n = TYPE_VECTOR_SUBPARTS (type_in); -+ if (el_mode != in_mode -+ || n != in_n) -+ return NULL_TREE; - -- /* Temporary for holding the result, initialized to the input -- operand to ease control flow. */ -- res = gen_reg_rtx (mode); -- emit_move_insn (res, operand1); -+ switch (fn) -+ { -+ CASE_CFN_EXP: -+ CASE_CFN_LOG: -+ CASE_CFN_LOG10: -+ CASE_CFN_POW: -+ CASE_CFN_TANH: -+ CASE_CFN_TAN: -+ CASE_CFN_ATAN: -+ CASE_CFN_ATAN2: -+ CASE_CFN_ATANH: -+ CASE_CFN_CBRT: -+ CASE_CFN_SINH: -+ CASE_CFN_SIN: -+ CASE_CFN_ASINH: -+ CASE_CFN_ASIN: -+ CASE_CFN_COSH: -+ CASE_CFN_COS: -+ CASE_CFN_ACOSH: -+ CASE_CFN_ACOS: -+ if ((el_mode != DFmode || n != 2) -+ && (el_mode != SFmode || n != 4)) -+ return NULL_TREE; -+ break; - -- /* xa = abs (operand1) */ -- xa = ix86_expand_sse_fabs (res, &mask); -+ default: -+ return NULL_TREE; -+ } - -- /* if (!isless (xa, TWO52)) goto label; */ -- label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); -+ tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn); -+ bname = IDENTIFIER_POINTER (DECL_NAME (fndecl)); - -- /* xa = xa + TWO52 - TWO52; */ -- xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT); -- xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT); -+ if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOGF) -+ strcpy (name, "vmlsLn4"); -+ else if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOG) -+ strcpy (name, "vmldLn2"); -+ else if (n == 4) -+ { -+ sprintf (name, "vmls%s", bname+10); -+ name[strlen (name)-1] = '4'; -+ } -+ else -+ sprintf (name, "vmld%s2", bname+10); - -- /* xa = copysign (xa, operand1) */ -- ix86_sse_copysign_to_positive (xa, xa, res, mask); -+ /* Convert to uppercase. */ -+ name[4] &= ~0x20; - -- /* generate 1.0 */ -- one = force_reg (mode, const_double_from_real_value (dconst1, mode)); -+ arity = 0; -+ for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args)) -+ arity++; - -- /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */ -- tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor); -- emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp))); -- tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS, -- xa, tmp, NULL_RTX, 0, OPTAB_DIRECT); -- if (!do_floor && HONOR_SIGNED_ZEROS (mode)) -- ix86_sse_copysign_to_positive (tmp, tmp, res, mask); -- emit_move_insn (res, tmp); -+ if (arity == 1) -+ fntype = build_function_type_list (type_out, type_in, NULL); -+ else -+ fntype = build_function_type_list (type_out, type_in, type_in, NULL); - -- emit_label (label); -- LABEL_NUSES (label) = 1; -+ /* Build a function declaration for the vectorized function. */ -+ new_fndecl = build_decl (BUILTINS_LOCATION, -+ FUNCTION_DECL, get_identifier (name), fntype); -+ TREE_PUBLIC (new_fndecl) = 1; -+ DECL_EXTERNAL (new_fndecl) = 1; -+ DECL_IS_NOVOPS (new_fndecl) = 1; -+ TREE_READONLY (new_fndecl) = 1; - -- emit_move_insn (operand0, res); -+ return new_fndecl; - } - --/* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing -- into OPERAND0. */ --void --ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor) --{ -- /* C code for the stuff we expand below. -- double xa = fabs (x), x2; -- if (!isless (xa, TWO52)) -- return x; -- x2 = (double)(long)x; -- Compensate. Floor: -- if (x2 > x) -- x2 -= 1; -- Compensate. Ceil: -- if (x2 < x) -- x2 += 1; -- if (HONOR_SIGNED_ZEROS (mode)) -- return copysign (x2, x); -- return x2; -- */ -- machine_mode mode = GET_MODE (operand0); -- rtx xa, xi, TWO52, tmp, one, res, mask; -- rtx_code_label *label; -+/* Handler for an ACML-style interface to -+ a library with vectorized intrinsics. */ - -- TWO52 = ix86_gen_TWO52 (mode); -+tree -+ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in) -+{ -+ char name[20] = "__vr.._"; -+ tree fntype, new_fndecl, args; -+ unsigned arity; -+ const char *bname; -+ machine_mode el_mode, in_mode; -+ int n, in_n; - -- /* Temporary for holding the result, initialized to the input -- operand to ease control flow. */ -- res = gen_reg_rtx (mode); -- emit_move_insn (res, operand1); -+ /* The ACML is 64bits only and suitable for unsafe math only as -+ it does not correctly support parts of IEEE with the required -+ precision such as denormals. */ -+ if (!TARGET_64BIT -+ || !flag_unsafe_math_optimizations) -+ return NULL_TREE; - -- /* xa = abs (operand1) */ -- xa = ix86_expand_sse_fabs (res, &mask); -+ el_mode = TYPE_MODE (TREE_TYPE (type_out)); -+ n = TYPE_VECTOR_SUBPARTS (type_out); -+ in_mode = TYPE_MODE (TREE_TYPE (type_in)); -+ in_n = TYPE_VECTOR_SUBPARTS (type_in); -+ if (el_mode != in_mode -+ || n != in_n) -+ return NULL_TREE; - -- /* if (!isless (xa, TWO52)) goto label; */ -- label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); -+ switch (fn) -+ { -+ CASE_CFN_SIN: -+ CASE_CFN_COS: -+ CASE_CFN_EXP: -+ CASE_CFN_LOG: -+ CASE_CFN_LOG2: -+ CASE_CFN_LOG10: -+ if (el_mode == DFmode && n == 2) -+ { -+ name[4] = 'd'; -+ name[5] = '2'; -+ } -+ else if (el_mode == SFmode && n == 4) -+ { -+ name[4] = 's'; -+ name[5] = '4'; -+ } -+ else -+ return NULL_TREE; -+ break; - -- /* xa = (double)(long)x */ -- xi = gen_reg_rtx (mode == DFmode ? DImode : SImode); -- expand_fix (xi, res, 0); -- expand_float (xa, xi, 0); -+ default: -+ return NULL_TREE; -+ } - -- /* generate 1.0 */ -- one = force_reg (mode, const_double_from_real_value (dconst1, mode)); -+ tree fndecl = mathfn_built_in (TREE_TYPE (type_in), fn); -+ bname = IDENTIFIER_POINTER (DECL_NAME (fndecl)); -+ sprintf (name + 7, "%s", bname+10); - -- /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */ -- tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor); -- emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp))); -- tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS, -- xa, tmp, NULL_RTX, 0, OPTAB_DIRECT); -- emit_move_insn (res, tmp); -+ arity = 0; -+ for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args)) -+ arity++; - -- if (HONOR_SIGNED_ZEROS (mode)) -- ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask); -+ if (arity == 1) -+ fntype = build_function_type_list (type_out, type_in, NULL); -+ else -+ fntype = build_function_type_list (type_out, type_in, type_in, NULL); - -- emit_label (label); -- LABEL_NUSES (label) = 1; -+ /* Build a function declaration for the vectorized function. */ -+ new_fndecl = build_decl (BUILTINS_LOCATION, -+ FUNCTION_DECL, get_identifier (name), fntype); -+ TREE_PUBLIC (new_fndecl) = 1; -+ DECL_EXTERNAL (new_fndecl) = 1; -+ DECL_IS_NOVOPS (new_fndecl) = 1; -+ TREE_READONLY (new_fndecl) = 1; - -- emit_move_insn (operand0, res); -+ return new_fndecl; - } - --/* Expand SSE sequence for computing round from OPERAND1 storing -- into OPERAND0. Sequence that works without relying on DImode truncation -- via cvttsd2siq that is only available on 64bit targets. */ --void --ix86_expand_rounddf_32 (rtx operand0, rtx operand1) --{ -- /* C code for the stuff we expand below. -- double xa = fabs (x), xa2, x2; -- if (!isless (xa, TWO52)) -- return x; -- Using the absolute value and copying back sign makes -- -0.0 -> -0.0 correct. -- xa2 = xa + TWO52 - TWO52; -- Compensate. -- dxa = xa2 - xa; -- if (dxa <= -0.5) -- xa2 += 1; -- else if (dxa > 0.5) -- xa2 -= 1; -- x2 = copysign (xa2, x); -- return x2; -- */ -- machine_mode mode = GET_MODE (operand0); -- rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask; -- rtx_code_label *label; -- -- TWO52 = ix86_gen_TWO52 (mode); -- -- /* Temporary for holding the result, initialized to the input -- operand to ease control flow. */ -- res = gen_reg_rtx (mode); -- emit_move_insn (res, operand1); -- -- /* xa = abs (operand1) */ -- xa = ix86_expand_sse_fabs (res, &mask); -+/* Returns a decl of a function that implements scatter store with -+ register type VECTYPE and index type INDEX_TYPE and SCALE. -+ Return NULL_TREE if it is not available. */ - -- /* if (!isless (xa, TWO52)) goto label; */ -- label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); -+static tree -+ix86_vectorize_builtin_scatter (const_tree vectype, -+ const_tree index_type, int scale) -+{ -+ bool si; -+ enum ix86_builtins code; - -- /* xa2 = xa + TWO52 - TWO52; */ -- xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT); -- xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT); -+ if (!TARGET_AVX512F) -+ return NULL_TREE; - -- /* dxa = xa2 - xa; */ -- dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT); -+ if ((TREE_CODE (index_type) != INTEGER_TYPE -+ && !POINTER_TYPE_P (index_type)) -+ || (TYPE_MODE (index_type) != SImode -+ && TYPE_MODE (index_type) != DImode)) -+ return NULL_TREE; - -- /* generate 0.5, 1.0 and -0.5 */ -- half = force_reg (mode, const_double_from_real_value (dconsthalf, mode)); -- one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT); -- mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX, -- 0, OPTAB_DIRECT); -+ if (TYPE_PRECISION (index_type) > POINTER_SIZE) -+ return NULL_TREE; - -- /* Compensate. */ -- /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */ -- tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false); -- emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, tmp, one))); -- xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT); -- /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */ -- tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false); -- emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, tmp, one))); -- xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT); -+ /* v*scatter* insn sign extends index to pointer mode. */ -+ if (TYPE_PRECISION (index_type) < POINTER_SIZE -+ && TYPE_UNSIGNED (index_type)) -+ return NULL_TREE; - -- /* res = copysign (xa2, operand1) */ -- ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask); -+ /* Scale can be 1, 2, 4 or 8. */ -+ if (scale <= 0 -+ || scale > 8 -+ || (scale & (scale - 1)) != 0) -+ return NULL_TREE; - -- emit_label (label); -- LABEL_NUSES (label) = 1; -+ si = TYPE_MODE (index_type) == SImode; -+ switch (TYPE_MODE (vectype)) -+ { -+ case E_V8DFmode: -+ code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF; -+ break; -+ case E_V8DImode: -+ code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI; -+ break; -+ case E_V16SFmode: -+ code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF; -+ break; -+ case E_V16SImode: -+ code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI; -+ break; -+ case E_V4DFmode: -+ if (TARGET_AVX512VL) -+ code = si ? IX86_BUILTIN_SCATTERALTSIV4DF : IX86_BUILTIN_SCATTERDIV4DF; -+ else -+ return NULL_TREE; -+ break; -+ case E_V4DImode: -+ if (TARGET_AVX512VL) -+ code = si ? IX86_BUILTIN_SCATTERALTSIV4DI : IX86_BUILTIN_SCATTERDIV4DI; -+ else -+ return NULL_TREE; -+ break; -+ case E_V8SFmode: -+ if (TARGET_AVX512VL) -+ code = si ? IX86_BUILTIN_SCATTERSIV8SF : IX86_BUILTIN_SCATTERALTDIV8SF; -+ else -+ return NULL_TREE; -+ break; -+ case E_V8SImode: -+ if (TARGET_AVX512VL) -+ code = si ? IX86_BUILTIN_SCATTERSIV8SI : IX86_BUILTIN_SCATTERALTDIV8SI; -+ else -+ return NULL_TREE; -+ break; -+ case E_V2DFmode: -+ if (TARGET_AVX512VL) -+ code = si ? IX86_BUILTIN_SCATTERALTSIV2DF : IX86_BUILTIN_SCATTERDIV2DF; -+ else -+ return NULL_TREE; -+ break; -+ case E_V2DImode: -+ if (TARGET_AVX512VL) -+ code = si ? IX86_BUILTIN_SCATTERALTSIV2DI : IX86_BUILTIN_SCATTERDIV2DI; -+ else -+ return NULL_TREE; -+ break; -+ case E_V4SFmode: -+ if (TARGET_AVX512VL) -+ code = si ? IX86_BUILTIN_SCATTERSIV4SF : IX86_BUILTIN_SCATTERALTDIV4SF; -+ else -+ return NULL_TREE; -+ break; -+ case E_V4SImode: -+ if (TARGET_AVX512VL) -+ code = si ? IX86_BUILTIN_SCATTERSIV4SI : IX86_BUILTIN_SCATTERALTDIV4SI; -+ else -+ return NULL_TREE; -+ break; -+ default: -+ return NULL_TREE; -+ } - -- emit_move_insn (operand0, res); -+ return get_ix86_builtin (code); - } - --/* Expand SSE sequence for computing trunc from OPERAND1 storing -- into OPERAND0. */ --void --ix86_expand_trunc (rtx operand0, rtx operand1) --{ -- /* C code for SSE variant we expand below. -- double xa = fabs (x), x2; -- if (!isless (xa, TWO52)) -- return x; -- x2 = (double)(long)x; -- if (HONOR_SIGNED_ZEROS (mode)) -- return copysign (x2, x); -- return x2; -- */ -- machine_mode mode = GET_MODE (operand0); -- rtx xa, xi, TWO52, res, mask; -- rtx_code_label *label; -+/* Return true if it is safe to use the rsqrt optabs to optimize -+ 1.0/sqrt. */ - -- TWO52 = ix86_gen_TWO52 (mode); -+static bool -+use_rsqrt_p () -+{ -+ return (TARGET_SSE && TARGET_SSE_MATH -+ && flag_finite_math_only -+ && !flag_trapping_math -+ && flag_unsafe_math_optimizations); -+} -+ -+/* Helper for avx_vpermilps256_operand et al. This is also used by -+ the expansion functions to turn the parallel back into a mask. -+ The return value is 0 for no match and the imm8+1 for a match. */ - -- /* Temporary for holding the result, initialized to the input -- operand to ease control flow. */ -- res = gen_reg_rtx (mode); -- emit_move_insn (res, operand1); -+int -+avx_vpermilp_parallel (rtx par, machine_mode mode) -+{ -+ unsigned i, nelt = GET_MODE_NUNITS (mode); -+ unsigned mask = 0; -+ unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */ - -- /* xa = abs (operand1) */ -- xa = ix86_expand_sse_fabs (res, &mask); -+ if (XVECLEN (par, 0) != (int) nelt) -+ return 0; - -- /* if (!isless (xa, TWO52)) goto label; */ -- label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); -+ /* Validate that all of the elements are constants, and not totally -+ out of range. Copy the data into an integral array to make the -+ subsequent checks easier. */ -+ for (i = 0; i < nelt; ++i) -+ { -+ rtx er = XVECEXP (par, 0, i); -+ unsigned HOST_WIDE_INT ei; - -- /* x = (double)(long)x */ -- xi = gen_reg_rtx (mode == DFmode ? DImode : SImode); -- expand_fix (xi, res, 0); -- expand_float (res, xi, 0); -+ if (!CONST_INT_P (er)) -+ return 0; -+ ei = INTVAL (er); -+ if (ei >= nelt) -+ return 0; -+ ipar[i] = ei; -+ } - -- if (HONOR_SIGNED_ZEROS (mode)) -- ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask); -+ switch (mode) -+ { -+ case E_V8DFmode: -+ /* In the 512-bit DFmode case, we can only move elements within -+ a 128-bit lane. First fill the second part of the mask, -+ then fallthru. */ -+ for (i = 4; i < 6; ++i) -+ { -+ if (ipar[i] < 4 || ipar[i] >= 6) -+ return 0; -+ mask |= (ipar[i] - 4) << i; -+ } -+ for (i = 6; i < 8; ++i) -+ { -+ if (ipar[i] < 6) -+ return 0; -+ mask |= (ipar[i] - 6) << i; -+ } -+ /* FALLTHRU */ - -- emit_label (label); -- LABEL_NUSES (label) = 1; -+ case E_V4DFmode: -+ /* In the 256-bit DFmode case, we can only move elements within -+ a 128-bit lane. */ -+ for (i = 0; i < 2; ++i) -+ { -+ if (ipar[i] >= 2) -+ return 0; -+ mask |= ipar[i] << i; -+ } -+ for (i = 2; i < 4; ++i) -+ { -+ if (ipar[i] < 2) -+ return 0; -+ mask |= (ipar[i] - 2) << i; -+ } -+ break; - -- emit_move_insn (operand0, res); --} -+ case E_V16SFmode: -+ /* In 512 bit SFmode case, permutation in the upper 256 bits -+ must mirror the permutation in the lower 256-bits. */ -+ for (i = 0; i < 8; ++i) -+ if (ipar[i] + 8 != ipar[i + 8]) -+ return 0; -+ /* FALLTHRU */ - --/* Expand SSE sequence for computing trunc from OPERAND1 storing -- into OPERAND0. */ --void --ix86_expand_truncdf_32 (rtx operand0, rtx operand1) --{ -- machine_mode mode = GET_MODE (operand0); -- rtx xa, mask, TWO52, one, res, smask, tmp; -- rtx_code_label *label; -+ case E_V8SFmode: -+ /* In 256 bit SFmode case, we have full freedom of -+ movement within the low 128-bit lane, but the high 128-bit -+ lane must mirror the exact same pattern. */ -+ for (i = 0; i < 4; ++i) -+ if (ipar[i] + 4 != ipar[i + 4]) -+ return 0; -+ nelt = 4; -+ /* FALLTHRU */ - -- /* C code for SSE variant we expand below. -- double xa = fabs (x), x2; -- if (!isless (xa, TWO52)) -- return x; -- xa2 = xa + TWO52 - TWO52; -- Compensate: -- if (xa2 > xa) -- xa2 -= 1.0; -- x2 = copysign (xa2, x); -- return x2; -- */ -+ case E_V2DFmode: -+ case E_V4SFmode: -+ /* In the 128-bit case, we've full freedom in the placement of -+ the elements from the source operand. */ -+ for (i = 0; i < nelt; ++i) -+ mask |= ipar[i] << (i * (nelt / 2)); -+ break; - -- TWO52 = ix86_gen_TWO52 (mode); -+ default: -+ gcc_unreachable (); -+ } - -- /* Temporary for holding the result, initialized to the input -- operand to ease control flow. */ -- res = gen_reg_rtx (mode); -- emit_move_insn (res, operand1); -+ /* Make sure success has a non-zero value by adding one. */ -+ return mask + 1; -+} - -- /* xa = abs (operand1) */ -- xa = ix86_expand_sse_fabs (res, &smask); -+/* Helper for avx_vperm2f128_v4df_operand et al. This is also used by -+ the expansion functions to turn the parallel back into a mask. -+ The return value is 0 for no match and the imm8+1 for a match. */ - -- /* if (!isless (xa, TWO52)) goto label; */ -- label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); -+int -+avx_vperm2f128_parallel (rtx par, machine_mode mode) -+{ -+ unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2; -+ unsigned mask = 0; -+ unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */ - -- /* res = xa + TWO52 - TWO52; */ -- tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT); -- tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT); -- emit_move_insn (res, tmp); -+ if (XVECLEN (par, 0) != (int) nelt) -+ return 0; - -- /* generate 1.0 */ -- one = force_reg (mode, const_double_from_real_value (dconst1, mode)); -+ /* Validate that all of the elements are constants, and not totally -+ out of range. Copy the data into an integral array to make the -+ subsequent checks easier. */ -+ for (i = 0; i < nelt; ++i) -+ { -+ rtx er = XVECEXP (par, 0, i); -+ unsigned HOST_WIDE_INT ei; - -- /* Compensate: res = xa2 - (res > xa ? 1 : 0) */ -- mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false); -- emit_insn (gen_rtx_SET (mask, gen_rtx_AND (mode, mask, one))); -- tmp = expand_simple_binop (mode, MINUS, -- res, mask, NULL_RTX, 0, OPTAB_DIRECT); -- emit_move_insn (res, tmp); -+ if (!CONST_INT_P (er)) -+ return 0; -+ ei = INTVAL (er); -+ if (ei >= 2 * nelt) -+ return 0; -+ ipar[i] = ei; -+ } - -- /* res = copysign (res, operand1) */ -- ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask); -+ /* Validate that the halves of the permute are halves. */ -+ for (i = 0; i < nelt2 - 1; ++i) -+ if (ipar[i] + 1 != ipar[i + 1]) -+ return 0; -+ for (i = nelt2; i < nelt - 1; ++i) -+ if (ipar[i] + 1 != ipar[i + 1]) -+ return 0; - -- emit_label (label); -- LABEL_NUSES (label) = 1; -+ /* Reconstruct the mask. */ -+ for (i = 0; i < 2; ++i) -+ { -+ unsigned e = ipar[i * nelt2]; -+ if (e % nelt2) -+ return 0; -+ e /= nelt2; -+ mask |= e << (i * 4); -+ } - -- emit_move_insn (operand0, res); -+ /* Make sure success has a non-zero value by adding one. */ -+ return mask + 1; -+} -+ -+/* Return a register priority for hard reg REGNO. */ -+static int -+ix86_register_priority (int hard_regno) -+{ -+ /* ebp and r13 as the base always wants a displacement, r12 as the -+ base always wants an index. So discourage their usage in an -+ address. */ -+ if (hard_regno == R12_REG || hard_regno == R13_REG) -+ return 0; -+ if (hard_regno == BP_REG) -+ return 1; -+ /* New x86-64 int registers result in bigger code size. Discourage -+ them. */ -+ if (IN_RANGE (hard_regno, FIRST_REX_INT_REG, LAST_REX_INT_REG)) -+ return 2; -+ /* New x86-64 SSE registers result in bigger code size. Discourage -+ them. */ -+ if (IN_RANGE (hard_regno, FIRST_REX_SSE_REG, LAST_REX_SSE_REG)) -+ return 2; -+ if (IN_RANGE (hard_regno, FIRST_EXT_REX_SSE_REG, LAST_EXT_REX_SSE_REG)) -+ return 1; -+ /* Usage of AX register results in smaller code. Prefer it. */ -+ if (hard_regno == AX_REG) -+ return 4; -+ return 3; - } - --/* Expand SSE sequence for computing round from OPERAND1 storing -- into OPERAND0. */ --void --ix86_expand_round (rtx operand0, rtx operand1) --{ -- /* C code for the stuff we're doing below: -- double xa = fabs (x); -- if (!isless (xa, TWO52)) -- return x; -- xa = (double)(long)(xa + nextafter (0.5, 0.0)); -- return copysign (xa, x); -- */ -- machine_mode mode = GET_MODE (operand0); -- rtx res, TWO52, xa, xi, half, mask; -- rtx_code_label *label; -- const struct real_format *fmt; -- REAL_VALUE_TYPE pred_half, half_minus_pred_half; -+/* Implement TARGET_PREFERRED_RELOAD_CLASS. -+ -+ Put float CONST_DOUBLE in the constant pool instead of fp regs. -+ QImode must go into class Q_REGS. -+ Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and -+ movdf to do mem-to-mem moves through integer regs. */ - -- /* Temporary for holding the result, initialized to the input -- operand to ease control flow. */ -- res = gen_reg_rtx (mode); -- emit_move_insn (res, operand1); -+static reg_class_t -+ix86_preferred_reload_class (rtx x, reg_class_t regclass) -+{ -+ machine_mode mode = GET_MODE (x); - -- TWO52 = ix86_gen_TWO52 (mode); -- xa = ix86_expand_sse_fabs (res, &mask); -- label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); -+ /* We're only allowed to return a subclass of CLASS. Many of the -+ following checks fail for NO_REGS, so eliminate that early. */ -+ if (regclass == NO_REGS) -+ return NO_REGS; - -- /* load nextafter (0.5, 0.0) */ -- fmt = REAL_MODE_FORMAT (mode); -- real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode); -- real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half); -+ /* All classes can load zeros. */ -+ if (x == CONST0_RTX (mode)) -+ return regclass; - -- /* xa = xa + 0.5 */ -- half = force_reg (mode, const_double_from_real_value (pred_half, mode)); -- xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT); -+ /* Force constants into memory if we are loading a (nonzero) constant into -+ an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK -+ instructions to load from a constant. */ -+ if (CONSTANT_P (x) -+ && (MAYBE_MMX_CLASS_P (regclass) -+ || MAYBE_SSE_CLASS_P (regclass) -+ || MAYBE_MASK_CLASS_P (regclass))) -+ return NO_REGS; - -- /* xa = (double)(int64_t)xa */ -- xi = gen_reg_rtx (mode == DFmode ? DImode : SImode); -- expand_fix (xi, xa, 0); -- expand_float (xa, xi, 0); -+ /* Floating-point constants need more complex checks. */ -+ if (CONST_DOUBLE_P (x)) -+ { -+ /* General regs can load everything. */ -+ if (INTEGER_CLASS_P (regclass)) -+ return regclass; - -- /* res = copysign (xa, operand1) */ -- ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask); -+ /* Floats can load 0 and 1 plus some others. Note that we eliminated -+ zero above. We only want to wind up preferring 80387 registers if -+ we plan on doing computation with them. */ -+ if (IS_STACK_MODE (mode) -+ && standard_80387_constant_p (x) > 0) -+ { -+ /* Limit class to FP regs. */ -+ if (FLOAT_CLASS_P (regclass)) -+ return FLOAT_REGS; -+ } - -- emit_label (label); -- LABEL_NUSES (label) = 1; -+ return NO_REGS; -+ } - -- emit_move_insn (operand0, res); --} -+ /* Prefer SSE regs only, if we can use them for math. */ -+ if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) -+ return SSE_CLASS_P (regclass) ? regclass : NO_REGS; - --/* Expand SSE sequence for computing round -- from OP1 storing into OP0 using sse4 round insn. */ --void --ix86_expand_round_sse4 (rtx op0, rtx op1) --{ -- machine_mode mode = GET_MODE (op0); -- rtx e1, e2, res, half; -- const struct real_format *fmt; -- REAL_VALUE_TYPE pred_half, half_minus_pred_half; -- rtx (*gen_copysign) (rtx, rtx, rtx); -- rtx (*gen_round) (rtx, rtx, rtx); -+ /* Generally when we see PLUS here, it's the function invariant -+ (plus soft-fp const_int). Which can only be computed into general -+ regs. */ -+ if (GET_CODE (x) == PLUS) -+ return INTEGER_CLASS_P (regclass) ? regclass : NO_REGS; - -- switch (mode) -+ /* QImode constants are easy to load, but non-constant QImode data -+ must go into Q_REGS. */ -+ if (GET_MODE (x) == QImode && !CONSTANT_P (x)) - { -- case E_SFmode: -- gen_copysign = gen_copysignsf3; -- gen_round = gen_sse4_1_roundsf2; -- break; -- case E_DFmode: -- gen_copysign = gen_copysigndf3; -- gen_round = gen_sse4_1_rounddf2; -- break; -- default: -- gcc_unreachable (); -+ if (Q_CLASS_P (regclass)) -+ return regclass; -+ else if (reg_class_subset_p (Q_REGS, regclass)) -+ return Q_REGS; -+ else -+ return NO_REGS; - } - -- /* round (a) = trunc (a + copysign (0.5, a)) */ -- -- /* load nextafter (0.5, 0.0) */ -- fmt = REAL_MODE_FORMAT (mode); -- real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode); -- real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half); -- half = const_double_from_real_value (pred_half, mode); -+ return regclass; -+} - -- /* e1 = copysign (0.5, op1) */ -- e1 = gen_reg_rtx (mode); -- emit_insn (gen_copysign (e1, half, op1)); -+/* Discourage putting floating-point values in SSE registers unless -+ SSE math is being used, and likewise for the 387 registers. */ -+static reg_class_t -+ix86_preferred_output_reload_class (rtx x, reg_class_t regclass) -+{ -+ machine_mode mode = GET_MODE (x); - -- /* e2 = op1 + e1 */ -- e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT); -+ /* Restrict the output reload class to the register bank that we are doing -+ math on. If we would like not to return a subset of CLASS, reject this -+ alternative: if reload cannot do this, it will still use its choice. */ -+ mode = GET_MODE (x); -+ if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) -+ return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS; - -- /* res = trunc (e2) */ -- res = gen_reg_rtx (mode); -- emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC))); -+ if (IS_STACK_MODE (mode)) -+ return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS; - -- emit_move_insn (op0, res); -+ return regclass; - } - --/* Handle fentry_name / fentry_section attribute. */ -- --static tree --ix86_handle_fentry_name (tree *node, tree name, tree args, -- int, bool *no_add_attrs) -+static reg_class_t -+ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass, -+ machine_mode mode, secondary_reload_info *sri) - { -- if (TREE_CODE (*node) == FUNCTION_DECL -- && TREE_CODE (TREE_VALUE (args)) == STRING_CST) -- /* Do nothing else, just set the attribute. We'll get at -- it later with lookup_attribute. */ -- ; -- else -+ /* Double-word spills from general registers to non-offsettable memory -+ references (zero-extended addresses) require special handling. */ -+ if (TARGET_64BIT -+ && MEM_P (x) -+ && GET_MODE_SIZE (mode) > UNITS_PER_WORD -+ && INTEGER_CLASS_P (rclass) -+ && !offsettable_memref_p (x)) - { -- warning (OPT_Wattributes, "%qE attribute ignored", name); -- *no_add_attrs = true; -- } -- -- return NULL_TREE; --} -- -- --/* Table of valid machine attributes. */ --static const struct attribute_spec ix86_attribute_table[] = --{ -- /* { name, min_len, max_len, decl_req, type_req, fn_type_req, -- affects_type_identity, handler, exclude } */ -- /* Stdcall attribute says callee is responsible for popping arguments -- if they are not variable. */ -- { "stdcall", 0, 0, false, true, true, true, ix86_handle_cconv_attribute, -- NULL }, -- /* Fastcall attribute says callee is responsible for popping arguments -- if they are not variable. */ -- { "fastcall", 0, 0, false, true, true, true, ix86_handle_cconv_attribute, -- NULL }, -- /* Thiscall attribute says callee is responsible for popping arguments -- if they are not variable. */ -- { "thiscall", 0, 0, false, true, true, true, ix86_handle_cconv_attribute, -- NULL }, -- /* Cdecl attribute says the callee is a normal C declaration */ -- { "cdecl", 0, 0, false, true, true, true, ix86_handle_cconv_attribute, -- NULL }, -- /* Regparm attribute specifies how many integer arguments are to be -- passed in registers. */ -- { "regparm", 1, 1, false, true, true, true, ix86_handle_cconv_attribute, -- NULL }, -- /* Sseregparm attribute says we are using x86_64 calling conventions -- for FP arguments. */ -- { "sseregparm", 0, 0, false, true, true, true, ix86_handle_cconv_attribute, -- NULL }, -- /* The transactional memory builtins are implicitly regparm or fastcall -- depending on the ABI. Override the generic do-nothing attribute that -- these builtins were declared with. */ -- { "*tm regparm", 0, 0, false, true, true, true, -- ix86_handle_tm_regparm_attribute, NULL }, -- /* force_align_arg_pointer says this function realigns the stack at entry. */ -- { (const char *)&ix86_force_align_arg_pointer_string, 0, 0, -- false, true, true, false, ix86_handle_force_align_arg_pointer_attribute, -- NULL }, --#if TARGET_DLLIMPORT_DECL_ATTRIBUTES -- { "dllimport", 0, 0, false, false, false, false, handle_dll_attribute, -- NULL }, -- { "dllexport", 0, 0, false, false, false, false, handle_dll_attribute, -- NULL }, -- { "shared", 0, 0, true, false, false, false, -- ix86_handle_shared_attribute, NULL }, --#endif -- { "ms_struct", 0, 0, false, false, false, false, -- ix86_handle_struct_attribute, NULL }, -- { "gcc_struct", 0, 0, false, false, false, false, -- ix86_handle_struct_attribute, NULL }, --#ifdef SUBTARGET_ATTRIBUTE_TABLE -- SUBTARGET_ATTRIBUTE_TABLE, --#endif -- /* ms_abi and sysv_abi calling convention function attributes. */ -- { "ms_abi", 0, 0, false, true, true, true, ix86_handle_abi_attribute, NULL }, -- { "sysv_abi", 0, 0, false, true, true, true, ix86_handle_abi_attribute, -- NULL }, -- { "ms_abi va_list", 0, 0, false, false, false, false, NULL, NULL }, -- { "sysv_abi va_list", 0, 0, false, false, false, false, NULL, NULL }, -- { "ms_hook_prologue", 0, 0, true, false, false, false, -- ix86_handle_fndecl_attribute, NULL }, -- { "callee_pop_aggregate_return", 1, 1, false, true, true, true, -- ix86_handle_callee_pop_aggregate_return, NULL }, -- { "interrupt", 0, 0, false, true, true, false, -- ix86_handle_interrupt_attribute, NULL }, -- { "no_caller_saved_registers", 0, 0, false, true, true, false, -- ix86_handle_no_caller_saved_registers_attribute, NULL }, -- { "naked", 0, 0, true, false, false, false, -- ix86_handle_fndecl_attribute, NULL }, -- { "indirect_branch", 1, 1, true, false, false, false, -- ix86_handle_fndecl_attribute, NULL }, -- { "function_return", 1, 1, true, false, false, false, -- ix86_handle_fndecl_attribute, NULL }, -- { "indirect_return", 0, 0, false, true, true, false, -- NULL, NULL }, -- { "fentry_name", 1, 1, true, false, false, false, -- ix86_handle_fentry_name, NULL }, -- { "fentry_section", 1, 1, true, false, false, false, -- ix86_handle_fentry_name, NULL }, -- { "cf_check", 0, 0, true, false, false, false, -- ix86_handle_fndecl_attribute, NULL }, -- -- /* End element. */ -- { NULL, 0, 0, false, false, false, false, NULL, NULL } --}; -+ sri->icode = (in_p -+ ? CODE_FOR_reload_noff_load -+ : CODE_FOR_reload_noff_store); -+ /* Add the cost of moving address to a temporary. */ -+ sri->extra_cost = 1; - --/* Implement targetm.vectorize.builtin_vectorization_cost. */ --static int --ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, -- tree vectype, int) --{ -- bool fp = false; -- machine_mode mode = TImode; -- int index; -- if (vectype != NULL) -- { -- fp = FLOAT_TYPE_P (vectype); -- mode = TYPE_MODE (vectype); -+ return NO_REGS; - } - -- switch (type_of_cost) -+ /* QImode spills from non-QI registers require -+ intermediate register on 32bit targets. */ -+ if (mode == QImode -+ && ((!TARGET_64BIT && !in_p -+ && INTEGER_CLASS_P (rclass) -+ && MAYBE_NON_Q_CLASS_P (rclass)) -+ || (!TARGET_AVX512DQ -+ && MAYBE_MASK_CLASS_P (rclass)))) - { -- case scalar_stmt: -- return fp ? ix86_cost->addss : COSTS_N_INSNS (1); -+ int regno = true_regnum (x); - -- case scalar_load: -- /* load/store costs are relative to register move which is 2. Recompute -- it to COSTS_N_INSNS so everything have same base. */ -- return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0] -- : ix86_cost->int_load [2]) / 2; -+ /* Return Q_REGS if the operand is in memory. */ -+ if (regno == -1) -+ return Q_REGS; - -- case scalar_store: -- return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0] -- : ix86_cost->int_store [2]) / 2; -+ return NO_REGS; -+ } - -- case vector_stmt: -- return ix86_vec_cost (mode, -- fp ? ix86_cost->addss : ix86_cost->sse_op); -+ /* This condition handles corner case where an expression involving -+ pointers gets vectorized. We're trying to use the address of a -+ stack slot as a vector initializer. - -- case vector_load: -- index = sse_store_index (mode); -- /* See PR82713 - we may end up being called on non-vector type. */ -- if (index < 0) -- index = 2; -- return COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2; -+ (set (reg:V2DI 74 [ vect_cst_.2 ]) -+ (vec_duplicate:V2DI (reg/f:DI 20 frame))) - -- case vector_store: -- index = sse_store_index (mode); -- /* See PR82713 - we may end up being called on non-vector type. */ -- if (index < 0) -- index = 2; -- return COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2; -+ Eventually frame gets turned into sp+offset like this: - -- case vec_to_scalar: -- case scalar_to_vec: -- return ix86_vec_cost (mode, ix86_cost->sse_op); -+ (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74]) -+ (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp) -+ (const_int 392 [0x188])))) - -- /* We should have separate costs for unaligned loads and gather/scatter. -- Do that incrementally. */ -- case unaligned_load: -- index = sse_store_index (mode); -- /* See PR82713 - we may end up being called on non-vector type. */ -- if (index < 0) -- index = 2; -- return COSTS_N_INSNS (ix86_cost->sse_unaligned_load[index]) / 2; -+ That later gets turned into: - -- case unaligned_store: -- index = sse_store_index (mode); -- /* See PR82713 - we may end up being called on non-vector type. */ -- if (index < 0) -- index = 2; -- return COSTS_N_INSNS (ix86_cost->sse_unaligned_store[index]) / 2; -+ (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74]) -+ (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp) -+ (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64])))) - -- case vector_gather_load: -- return ix86_vec_cost (mode, -- COSTS_N_INSNS -- (ix86_cost->gather_static -- + ix86_cost->gather_per_elt -- * TYPE_VECTOR_SUBPARTS (vectype)) / 2); -+ We'll have the following reload recorded: - -- case vector_scatter_store: -- return ix86_vec_cost (mode, -- COSTS_N_INSNS -- (ix86_cost->scatter_static -- + ix86_cost->scatter_per_elt -- * TYPE_VECTOR_SUBPARTS (vectype)) / 2); -+ Reload 0: reload_in (DI) = -+ (plus:DI (reg/f:DI 7 sp) -+ (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64])) -+ reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74]) -+ SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine -+ reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188])) -+ reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74]) -+ reload_reg_rtx: (reg:V2DI 22 xmm1) - -- case cond_branch_taken: -- return ix86_cost->cond_taken_branch_cost; -+ Which isn't going to work since SSE instructions can't handle scalar -+ additions. Returning GENERAL_REGS forces the addition into integer -+ register and reload can handle subsequent reloads without problems. */ - -- case cond_branch_not_taken: -- return ix86_cost->cond_not_taken_branch_cost; -+ if (in_p && GET_CODE (x) == PLUS -+ && SSE_CLASS_P (rclass) -+ && SCALAR_INT_MODE_P (mode)) -+ return GENERAL_REGS; - -- case vec_perm: -- case vec_promote_demote: -- return ix86_vec_cost (mode, ix86_cost->sse_op); -+ return NO_REGS; -+} - -- case vec_construct: -- { -- /* N element inserts into SSE vectors. */ -- int cost = TYPE_VECTOR_SUBPARTS (vectype) * ix86_cost->sse_op; -- /* One vinserti128 for combining two SSE vectors for AVX256. */ -- if (GET_MODE_BITSIZE (mode) == 256) -- cost += ix86_vec_cost (mode, ix86_cost->addss); -- /* One vinserti64x4 and two vinserti128 for combining SSE -- and AVX256 vectors to AVX512. */ -- else if (GET_MODE_BITSIZE (mode) == 512) -- cost += 3 * ix86_vec_cost (mode, ix86_cost->addss); -- return cost; -- } -+/* Implement TARGET_CLASS_LIKELY_SPILLED_P. */ -+ -+static bool -+ix86_class_likely_spilled_p (reg_class_t rclass) -+{ -+ switch (rclass) -+ { -+ case AREG: -+ case DREG: -+ case CREG: -+ case BREG: -+ case AD_REGS: -+ case SIREG: -+ case DIREG: -+ case SSE_FIRST_REG: -+ case FP_TOP_REG: -+ case FP_SECOND_REG: -+ return true; - - default: -- gcc_unreachable (); -+ break; - } -+ -+ return false; - } - --/* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel []))) -- insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh -- insn every time. */ -+/* If we are copying between registers from different register sets -+ (e.g. FP and integer), we may need a memory location. -+ -+ The function can't work reliably when one of the CLASSES is a class -+ containing registers from multiple sets. We avoid this by never combining -+ different sets in a single alternative in the machine description. -+ Ensure that this constraint holds to avoid unexpected surprises. - --static GTY(()) rtx_insn *vselect_insn; -+ When STRICT is false, we are being called from REGISTER_MOVE_COST, -+ so do not enforce these sanity checks. - --/* Initialize vselect_insn. */ -+ To optimize register_move_cost performance, define inline variant. */ - --static void --init_vselect_insn (void) -+static inline bool -+inline_secondary_memory_needed (machine_mode mode, reg_class_t class1, -+ reg_class_t class2, int strict) - { -- unsigned i; -- rtx x; -+ if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS)) -+ return false; - -- x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN)); -- for (i = 0; i < MAX_VECT_LEN; ++i) -- XVECEXP (x, 0, i) = const0_rtx; -- x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx, -- const0_rtx), x); -- x = gen_rtx_SET (const0_rtx, x); -- start_sequence (); -- vselect_insn = emit_insn (x); -- end_sequence (); --} -+ if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1) -+ || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2) -+ || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1) -+ || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2) -+ || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1) -+ || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2) -+ || MAYBE_MASK_CLASS_P (class1) != MASK_CLASS_P (class1) -+ || MAYBE_MASK_CLASS_P (class2) != MASK_CLASS_P (class2)) -+ { -+ gcc_assert (!strict || lra_in_progress); -+ return true; -+ } - --/* Construct (set target (vec_select op0 (parallel perm))) and -- return true if that's a valid instruction in the active ISA. */ -+ if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)) -+ return true; - --static bool --expand_vselect (rtx target, rtx op0, const unsigned char *perm, -- unsigned nelt, bool testing_p) --{ -- unsigned int i; -- rtx x, save_vconcat; -- int icode; -+ /* Between mask and general, we have moves no larger than word size. */ -+ if ((MASK_CLASS_P (class1) != MASK_CLASS_P (class2)) -+ && (GET_MODE_SIZE (mode) > UNITS_PER_WORD)) -+ return true; - -- if (vselect_insn == NULL_RTX) -- init_vselect_insn (); -+ /* ??? This is a lie. We do have moves between mmx/general, and for -+ mmx/sse2. But by saying we need secondary memory we discourage the -+ register allocator from using the mmx registers unless needed. */ -+ if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)) -+ return true; - -- x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1); -- PUT_NUM_ELEM (XVEC (x, 0), nelt); -- for (i = 0; i < nelt; ++i) -- XVECEXP (x, 0, i) = GEN_INT (perm[i]); -- save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0); -- XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0; -- PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target)); -- SET_DEST (PATTERN (vselect_insn)) = target; -- icode = recog_memoized (vselect_insn); -+ if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)) -+ { -+ /* SSE1 doesn't have any direct moves from other classes. */ -+ if (!TARGET_SSE2) -+ return true; - -- if (icode >= 0 && !testing_p) -- emit_insn (copy_rtx (PATTERN (vselect_insn))); -+ /* If the target says that inter-unit moves are more expensive -+ than moving through memory, then don't generate them. */ -+ if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC) -+ || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC)) -+ return true; - -- SET_DEST (PATTERN (vselect_insn)) = const0_rtx; -- XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat; -- INSN_CODE (vselect_insn) = -1; -+ /* Between SSE and general, we have moves no larger than word size. */ -+ if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) -+ return true; -+ } - -- return icode >= 0; -+ return false; - } - --/* Similar, but generate a vec_concat from op0 and op1 as well. */ -+/* Implement TARGET_SECONDARY_MEMORY_NEEDED. */ - - static bool --expand_vselect_vconcat (rtx target, rtx op0, rtx op1, -- const unsigned char *perm, unsigned nelt, -- bool testing_p) -+ix86_secondary_memory_needed (machine_mode mode, reg_class_t class1, -+ reg_class_t class2) - { -- machine_mode v2mode; -- rtx x; -- bool ok; -- -- if (vselect_insn == NULL_RTX) -- init_vselect_insn (); -+ return inline_secondary_memory_needed (mode, class1, class2, true); -+} - -- if (!GET_MODE_2XWIDER_MODE (GET_MODE (op0)).exists (&v2mode)) -- return false; -- x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0); -- PUT_MODE (x, v2mode); -- XEXP (x, 0) = op0; -- XEXP (x, 1) = op1; -- ok = expand_vselect (target, x, perm, nelt, testing_p); -- XEXP (x, 0) = const0_rtx; -- XEXP (x, 1) = const0_rtx; -- return ok; --} -- --/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D -- using movss or movsd. */ --static bool --expand_vec_perm_movs (struct expand_vec_perm_d *d) --{ -- machine_mode vmode = d->vmode; -- unsigned i, nelt = d->nelt; -- rtx x; -+/* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE. - -- if (d->one_operand_p) -- return false; -+ get_secondary_mem widens integral modes to BITS_PER_WORD. -+ There is no need to emit full 64 bit move on 64 bit targets -+ for integral modes that can be moved using 32 bit move. */ - -- if (!(TARGET_SSE && vmode == V4SFmode) -- && !(TARGET_SSE2 && vmode == V2DFmode)) -- return false; -+static machine_mode -+ix86_secondary_memory_needed_mode (machine_mode mode) -+{ -+ if (GET_MODE_BITSIZE (mode) < 32 && INTEGRAL_MODE_P (mode)) -+ return mode_for_size (32, GET_MODE_CLASS (mode), 0).require (); -+ return mode; -+} - -- /* Only the first element is changed. */ -- if (d->perm[0] != nelt && d->perm[0] != 0) -- return false; -- for (i = 1; i < nelt; ++i) -- if (d->perm[i] != i + nelt - d->perm[0]) -- return false; -+/* Implement the TARGET_CLASS_MAX_NREGS hook. - -- if (d->testing_p) -- return true; -+ On the 80386, this is the size of MODE in words, -+ except in the FP regs, where a single reg is always enough. */ - -- if (d->perm[0] == nelt) -- x = gen_rtx_VEC_MERGE (vmode, d->op1, d->op0, GEN_INT (1)); -+static unsigned char -+ix86_class_max_nregs (reg_class_t rclass, machine_mode mode) -+{ -+ if (MAYBE_INTEGER_CLASS_P (rclass)) -+ { -+ if (mode == XFmode) -+ return (TARGET_64BIT ? 2 : 3); -+ else if (mode == XCmode) -+ return (TARGET_64BIT ? 4 : 6); -+ else -+ return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD); -+ } - else -- x = gen_rtx_VEC_MERGE (vmode, d->op0, d->op1, GEN_INT (1)); -- -- emit_insn (gen_rtx_SET (d->target, x)); -- -- return true; -+ { -+ if (COMPLEX_MODE_P (mode)) -+ return 2; -+ else -+ return 1; -+ } - } - --/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D -- in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */ -+/* Implement TARGET_CAN_CHANGE_MODE_CLASS. */ - - static bool --expand_vec_perm_blend (struct expand_vec_perm_d *d) -+ix86_can_change_mode_class (machine_mode from, machine_mode to, -+ reg_class_t regclass) - { -- machine_mode mmode, vmode = d->vmode; -- unsigned i, nelt = d->nelt; -- unsigned HOST_WIDE_INT mask; -- rtx target, op0, op1, maskop, x; -- rtx rperm[32], vperm; -+ if (from == to) -+ return true; - -- if (d->one_operand_p) -- return false; -- if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64 -- && (TARGET_AVX512BW -- || GET_MODE_UNIT_SIZE (vmode) >= 4)) -- ; -- else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32) -- ; -- else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode)) -- ; -- else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16) -- ; -- else -+ /* x87 registers can't do subreg at all, as all values are reformatted -+ to extended precision. */ -+ if (MAYBE_FLOAT_CLASS_P (regclass)) - return false; - -- /* This is a blend, not a permute. Elements must stay in their -- respective lanes. */ -- for (i = 0; i < nelt; ++i) -+ if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass)) - { -- unsigned e = d->perm[i]; -- if (!(e == i || e == i + nelt)) -+ /* Vector registers do not support QI or HImode loads. If we don't -+ disallow a change to these modes, reload will assume it's ok to -+ drop the subreg from (subreg:SI (reg:HI 100) 0). This affects -+ the vec_dupv4hi pattern. */ -+ if (GET_MODE_SIZE (from) < 4) - return false; - } - -- if (d->testing_p) -- return true; -- -- /* ??? Without SSE4.1, we could implement this with and/andn/or. This -- decision should be extracted elsewhere, so that we only try that -- sequence once all budget==3 options have been tried. */ -- target = d->target; -- op0 = d->op0; -- op1 = d->op1; -- mask = 0; -- -- switch (vmode) -- { -- case E_V8DFmode: -- case E_V16SFmode: -- case E_V4DFmode: -- case E_V8SFmode: -- case E_V2DFmode: -- case E_V4SFmode: -- case E_V8HImode: -- case E_V8SImode: -- case E_V32HImode: -- case E_V64QImode: -- case E_V16SImode: -- case E_V8DImode: -- for (i = 0; i < nelt; ++i) -- mask |= ((unsigned HOST_WIDE_INT) (d->perm[i] >= nelt)) << i; -- break; -- -- case E_V2DImode: -- for (i = 0; i < 2; ++i) -- mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4); -- vmode = V8HImode; -- goto do_subreg; -- -- case E_V4SImode: -- for (i = 0; i < 4; ++i) -- mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2); -- vmode = V8HImode; -- goto do_subreg; -+ return true; -+} - -- case E_V16QImode: -- /* See if bytes move in pairs so we can use pblendw with -- an immediate argument, rather than pblendvb with a vector -- argument. */ -- for (i = 0; i < 16; i += 2) -- if (d->perm[i] + 1 != d->perm[i + 1]) -- { -- use_pblendvb: -- for (i = 0; i < nelt; ++i) -- rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx); -+/* Return index of MODE in the sse load/store tables. */ - -- finish_pblendvb: -- vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm)); -- vperm = force_reg (vmode, vperm); -+static inline int -+sse_store_index (machine_mode mode) -+{ -+ switch (GET_MODE_SIZE (mode)) -+ { -+ case 4: -+ return 0; -+ case 8: -+ return 1; -+ case 16: -+ return 2; -+ case 32: -+ return 3; -+ case 64: -+ return 4; -+ default: -+ return -1; -+ } -+} - -- if (GET_MODE_SIZE (vmode) == 16) -- emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm)); -- else -- emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm)); -- if (target != d->target) -- emit_move_insn (d->target, gen_lowpart (d->vmode, target)); -- return true; -- } -+/* Return the cost of moving data of mode M between a -+ register and memory. A value of 2 is the default; this cost is -+ relative to those in `REGISTER_MOVE_COST'. - -- for (i = 0; i < 8; ++i) -- mask |= (d->perm[i * 2] >= 16) << i; -- vmode = V8HImode; -- /* FALLTHRU */ -+ This function is used extensively by register_move_cost that is used to -+ build tables at startup. Make it inline in this case. -+ When IN is 2, return maximum of in and out move cost. - -- do_subreg: -- target = gen_reg_rtx (vmode); -- op0 = gen_lowpart (vmode, op0); -- op1 = gen_lowpart (vmode, op1); -- break; -+ If moving between registers and memory is more expensive than -+ between two registers, you should define this macro to express the -+ relative cost. - -- case E_V32QImode: -- /* See if bytes move in pairs. If not, vpblendvb must be used. */ -- for (i = 0; i < 32; i += 2) -- if (d->perm[i] + 1 != d->perm[i + 1]) -- goto use_pblendvb; -- /* See if bytes move in quadruplets. If yes, vpblendd -- with immediate can be used. */ -- for (i = 0; i < 32; i += 4) -- if (d->perm[i] + 2 != d->perm[i + 2]) -- break; -- if (i < 32) -+ Model also increased moving costs of QImode registers in non -+ Q_REGS classes. -+ */ -+static inline int -+inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in) -+{ -+ int cost; -+ if (FLOAT_CLASS_P (regclass)) -+ { -+ int index; -+ switch (mode) - { -- /* See if bytes move the same in both lanes. If yes, -- vpblendw with immediate can be used. */ -- for (i = 0; i < 16; i += 2) -- if (d->perm[i] + 16 != d->perm[i + 16]) -- goto use_pblendvb; -- -- /* Use vpblendw. */ -- for (i = 0; i < 16; ++i) -- mask |= (d->perm[i * 2] >= 32) << i; -- vmode = V16HImode; -- goto do_subreg; -+ case E_SFmode: -+ index = 0; -+ break; -+ case E_DFmode: -+ index = 1; -+ break; -+ case E_XFmode: -+ index = 2; -+ break; -+ default: -+ return 100; - } -- -- /* Use vpblendd. */ -- for (i = 0; i < 8; ++i) -- mask |= (d->perm[i * 4] >= 32) << i; -- vmode = V8SImode; -- goto do_subreg; -- -- case E_V16HImode: -- /* See if words move in pairs. If yes, vpblendd can be used. */ -- for (i = 0; i < 16; i += 2) -- if (d->perm[i] + 1 != d->perm[i + 1]) -- break; -- if (i < 16) -+ if (in == 2) -+ return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]); -+ return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index]; -+ } -+ if (SSE_CLASS_P (regclass)) -+ { -+ int index = sse_store_index (mode); -+ if (index == -1) -+ return 100; -+ if (in == 2) -+ return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]); -+ return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index]; -+ } -+ if (MMX_CLASS_P (regclass)) -+ { -+ int index; -+ switch (GET_MODE_SIZE (mode)) - { -- /* See if words move the same in both lanes. If not, -- vpblendvb must be used. */ -- for (i = 0; i < 8; i++) -- if (d->perm[i] + 8 != d->perm[i + 8]) -- { -- /* Use vpblendvb. */ -- for (i = 0; i < 32; ++i) -- rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx); -- -- vmode = V32QImode; -- nelt = 32; -- target = gen_reg_rtx (vmode); -- op0 = gen_lowpart (vmode, op0); -- op1 = gen_lowpart (vmode, op1); -- goto finish_pblendvb; -- } -- -- /* Use vpblendw. */ -- for (i = 0; i < 16; ++i) -- mask |= (d->perm[i] >= 16) << i; -- break; -+ case 4: -+ index = 0; -+ break; -+ case 8: -+ index = 1; -+ break; -+ default: -+ return 100; - } -- -- /* Use vpblendd. */ -- for (i = 0; i < 8; ++i) -- mask |= (d->perm[i * 2] >= 16) << i; -- vmode = V8SImode; -- goto do_subreg; -- -- case E_V4DImode: -- /* Use vpblendd. */ -- for (i = 0; i < 4; ++i) -- mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2); -- vmode = V8SImode; -- goto do_subreg; -- -- default: -- gcc_unreachable (); -+ if (in == 2) -+ return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]); -+ return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index]; - } -- -- switch (vmode) -+ switch (GET_MODE_SIZE (mode)) - { -- case E_V8DFmode: -- case E_V8DImode: -- mmode = QImode; -- break; -- case E_V16SFmode: -- case E_V16SImode: -- mmode = HImode; -- break; -- case E_V32HImode: -- mmode = SImode; -- break; -- case E_V64QImode: -- mmode = DImode; -- break; -- default: -- mmode = VOIDmode; -+ case 1: -+ if (Q_CLASS_P (regclass) || TARGET_64BIT) -+ { -+ if (!in) -+ return ix86_cost->int_store[0]; -+ if (TARGET_PARTIAL_REG_DEPENDENCY -+ && optimize_function_for_speed_p (cfun)) -+ cost = ix86_cost->movzbl_load; -+ else -+ cost = ix86_cost->int_load[0]; -+ if (in == 2) -+ return MAX (cost, ix86_cost->int_store[0]); -+ return cost; -+ } -+ else -+ { -+ if (in == 2) -+ return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4); -+ if (in) -+ return ix86_cost->movzbl_load; -+ else -+ return ix86_cost->int_store[0] + 4; -+ } -+ break; -+ case 2: -+ if (in == 2) -+ return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]); -+ return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1]; -+ default: -+ if (in == 2) -+ cost = MAX (ix86_cost->int_load[2], ix86_cost->int_store[2]); -+ else if (in) -+ cost = ix86_cost->int_load[2]; -+ else -+ cost = ix86_cost->int_store[2]; -+ /* Multiply with the number of GPR moves needed. */ -+ return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD); - } -+} - -- if (mmode != VOIDmode) -- maskop = force_reg (mmode, gen_int_mode (mask, mmode)); -- else -- maskop = GEN_INT (mask); -- -- /* This matches five different patterns with the different modes. */ -- x = gen_rtx_VEC_MERGE (vmode, op1, op0, maskop); -- x = gen_rtx_SET (target, x); -- emit_insn (x); -- if (target != d->target) -- emit_move_insn (d->target, gen_lowpart (d->vmode, target)); -- -- return true; -+static int -+ix86_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in) -+{ -+ return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0); - } - --/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D -- in terms of the variable form of vpermilps. - -- Note that we will have already failed the immediate input vpermilps, -- which requires that the high and low part shuffle be identical; the -- variable form doesn't require that. */ -+/* Return the cost of moving data from a register in class CLASS1 to -+ one in class CLASS2. - --static bool --expand_vec_perm_vpermil (struct expand_vec_perm_d *d) -+ It is not required that the cost always equal 2 when FROM is the same as TO; -+ on some machines it is expensive to move between registers if they are not -+ general registers. */ -+ -+static int -+ix86_register_move_cost (machine_mode mode, reg_class_t class1_i, -+ reg_class_t class2_i) - { -- rtx rperm[8], vperm; -- unsigned i; -+ enum reg_class class1 = (enum reg_class) class1_i; -+ enum reg_class class2 = (enum reg_class) class2_i; - -- if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p) -- return false; -+ /* In case we require secondary memory, compute cost of the store followed -+ by load. In order to avoid bad register allocation choices, we need -+ for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */ - -- /* We can only permute within the 128-bit lane. */ -- for (i = 0; i < 8; ++i) -+ if (inline_secondary_memory_needed (mode, class1, class2, false)) - { -- unsigned e = d->perm[i]; -- if (i < 4 ? e >= 4 : e < 4) -- return false; -- } -+ int cost = 1; - -- if (d->testing_p) -- return true; -+ cost += inline_memory_move_cost (mode, class1, 2); -+ cost += inline_memory_move_cost (mode, class2, 2); - -- for (i = 0; i < 8; ++i) -- { -- unsigned e = d->perm[i]; -+ /* In case of copying from general_purpose_register we may emit multiple -+ stores followed by single load causing memory size mismatch stall. -+ Count this as arbitrarily high cost of 20. */ -+ if (GET_MODE_BITSIZE (mode) > BITS_PER_WORD -+ && TARGET_MEMORY_MISMATCH_STALL -+ && targetm.class_max_nregs (class1, mode) -+ > targetm.class_max_nregs (class2, mode)) -+ cost += 20; - -- /* Within each 128-bit lane, the elements of op0 are numbered -- from 0 and the elements of op1 are numbered from 4. */ -- if (e >= 8 + 4) -- e -= 8; -- else if (e >= 4) -- e -= 4; -+ /* In the case of FP/MMX moves, the registers actually overlap, and we -+ have to switch modes in order to treat them differently. */ -+ if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2)) -+ || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1))) -+ cost += 20; - -- rperm[i] = GEN_INT (e); -+ return cost; - } - -- vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm)); -- vperm = force_reg (V8SImode, vperm); -- emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm)); -- -- return true; --} -- --/* Return true if permutation D can be performed as VMODE permutation -- instead. */ -+ /* Moves between SSE/MMX and integer unit are expensive. */ -+ if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2) -+ || SSE_CLASS_P (class1) != SSE_CLASS_P (class2)) - --static bool --valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d) --{ -- unsigned int i, j, chunk; -+ /* ??? By keeping returned value relatively high, we limit the number -+ of moves between integer and MMX/SSE registers for all targets. -+ Additionally, high value prevents problem with x86_modes_tieable_p(), -+ where integer modes in MMX/SSE registers are not tieable -+ because of missing QImode and HImode moves to, from or between -+ MMX/SSE registers. */ -+ return MAX (8, MMX_CLASS_P (class1) || MMX_CLASS_P (class2) -+ ? ix86_cost->mmxsse_to_integer : ix86_cost->ssemmx_to_integer); - -- if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT -- || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT -- || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode)) -- return false; -+ if (MAYBE_FLOAT_CLASS_P (class1)) -+ return ix86_cost->fp_move; -+ if (MAYBE_SSE_CLASS_P (class1)) -+ { -+ if (GET_MODE_BITSIZE (mode) <= 128) -+ return ix86_cost->xmm_move; -+ if (GET_MODE_BITSIZE (mode) <= 256) -+ return ix86_cost->ymm_move; -+ return ix86_cost->zmm_move; -+ } -+ if (MAYBE_MMX_CLASS_P (class1)) -+ return ix86_cost->mmx_move; -+ return 2; -+} - -- if (GET_MODE_NUNITS (vmode) >= d->nelt) -- return true; -+/* Implement TARGET_HARD_REGNO_NREGS. This is ordinarily the length in -+ words of a value of mode MODE but can be less for certain modes in -+ special long registers. - -- chunk = d->nelt / GET_MODE_NUNITS (vmode); -- for (i = 0; i < d->nelt; i += chunk) -- if (d->perm[i] & (chunk - 1)) -- return false; -- else -- for (j = 1; j < chunk; ++j) -- if (d->perm[i] + j != d->perm[i + j]) -- return false; -+ Actually there are no two word move instructions for consecutive -+ registers. And only registers 0-3 may have mov byte instructions -+ applied to them. */ - -- return true; -+static unsigned int -+ix86_hard_regno_nregs (unsigned int regno, machine_mode mode) -+{ -+ if (GENERAL_REGNO_P (regno)) -+ { -+ if (mode == XFmode) -+ return TARGET_64BIT ? 2 : 3; -+ if (mode == XCmode) -+ return TARGET_64BIT ? 4 : 6; -+ return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD); -+ } -+ if (COMPLEX_MODE_P (mode)) -+ return 2; -+ if (mode == V64SFmode || mode == V64SImode) -+ return 4; -+ return 1; - } - --/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D -- in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */ -+/* Implement TARGET_HARD_REGNO_MODE_OK. */ - - static bool --expand_vec_perm_pshufb (struct expand_vec_perm_d *d) -+ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode) - { -- unsigned i, nelt, eltsz, mask; -- unsigned char perm[64]; -- machine_mode vmode = V16QImode; -- rtx rperm[64], vperm, target, op0, op1; -- -- nelt = d->nelt; -- -- if (!d->one_operand_p) -- { -- if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16) -- { -- if (TARGET_AVX2 -- && valid_perm_using_mode_p (V2TImode, d)) -- { -- if (d->testing_p) -- return true; -- -- /* Use vperm2i128 insn. The pattern uses -- V4DImode instead of V2TImode. */ -- target = d->target; -- if (d->vmode != V4DImode) -- target = gen_reg_rtx (V4DImode); -- op0 = gen_lowpart (V4DImode, d->op0); -- op1 = gen_lowpart (V4DImode, d->op1); -- rperm[0] -- = GEN_INT ((d->perm[0] / (nelt / 2)) -- | ((d->perm[nelt / 2] / (nelt / 2)) * 16)); -- emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0])); -- if (target != d->target) -- emit_move_insn (d->target, gen_lowpart (d->vmode, target)); -- return true; -- } -- return false; -- } -- } -- else -+ /* Flags and only flags can only hold CCmode values. */ -+ if (CC_REGNO_P (regno)) -+ return GET_MODE_CLASS (mode) == MODE_CC; -+ if (GET_MODE_CLASS (mode) == MODE_CC -+ || GET_MODE_CLASS (mode) == MODE_RANDOM -+ || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT) -+ return false; -+ if (STACK_REGNO_P (regno)) -+ return VALID_FP_MODE_P (mode); -+ if (MASK_REGNO_P (regno)) -+ return (VALID_MASK_REG_MODE (mode) -+ || (TARGET_AVX512BW -+ && VALID_MASK_AVX512BW_MODE (mode))); -+ if (SSE_REGNO_P (regno)) - { -- if (GET_MODE_SIZE (d->vmode) == 16) -- { -- if (!TARGET_SSSE3) -- return false; -- } -- else if (GET_MODE_SIZE (d->vmode) == 32) -- { -- if (!TARGET_AVX2) -- return false; -- -- /* V4DImode should be already handled through -- expand_vselect by vpermq instruction. */ -- gcc_assert (d->vmode != V4DImode); -- -- vmode = V32QImode; -- if (d->vmode == V8SImode -- || d->vmode == V16HImode -- || d->vmode == V32QImode) -- { -- /* First see if vpermq can be used for -- V8SImode/V16HImode/V32QImode. */ -- if (valid_perm_using_mode_p (V4DImode, d)) -- { -- for (i = 0; i < 4; i++) -- perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3; -- if (d->testing_p) -- return true; -- target = gen_reg_rtx (V4DImode); -- if (expand_vselect (target, gen_lowpart (V4DImode, d->op0), -- perm, 4, false)) -- { -- emit_move_insn (d->target, -- gen_lowpart (d->vmode, target)); -- return true; -- } -- return false; -- } -- -- /* Next see if vpermd can be used. */ -- if (valid_perm_using_mode_p (V8SImode, d)) -- vmode = V8SImode; -- } -- /* Or if vpermps can be used. */ -- else if (d->vmode == V8SFmode) -- vmode = V8SImode; -+ /* We implement the move patterns for all vector modes into and -+ out of SSE registers, even when no operation instructions -+ are available. */ - -- if (vmode == V32QImode) -- { -- /* vpshufb only works intra lanes, it is not -- possible to shuffle bytes in between the lanes. */ -- for (i = 0; i < nelt; ++i) -- if ((d->perm[i] ^ i) & (nelt / 2)) -- return false; -- } -- } -- else if (GET_MODE_SIZE (d->vmode) == 64) -- { -- if (!TARGET_AVX512BW) -- return false; -+ /* For AVX-512 we allow, regardless of regno: -+ - XI mode -+ - any of 512-bit wide vector mode -+ - any scalar mode. */ -+ if (TARGET_AVX512F -+ && (mode == XImode -+ || VALID_AVX512F_REG_MODE (mode) -+ || VALID_AVX512F_SCALAR_MODE (mode))) -+ return true; - -- /* If vpermq didn't work, vpshufb won't work either. */ -- if (d->vmode == V8DFmode || d->vmode == V8DImode) -- return false; -+ /* For AVX-5124FMAPS or AVX-5124VNNIW -+ allow V64SF and V64SI modes for special regnos. */ -+ if ((TARGET_AVX5124FMAPS || TARGET_AVX5124VNNIW) -+ && (mode == V64SFmode || mode == V64SImode) -+ && MOD4_SSE_REGNO_P (regno)) -+ return true; - -- vmode = V64QImode; -- if (d->vmode == V16SImode -- || d->vmode == V32HImode -- || d->vmode == V64QImode) -- { -- /* First see if vpermq can be used for -- V16SImode/V32HImode/V64QImode. */ -- if (valid_perm_using_mode_p (V8DImode, d)) -- { -- for (i = 0; i < 8; i++) -- perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7; -- if (d->testing_p) -- return true; -- target = gen_reg_rtx (V8DImode); -- if (expand_vselect (target, gen_lowpart (V8DImode, d->op0), -- perm, 8, false)) -- { -- emit_move_insn (d->target, -- gen_lowpart (d->vmode, target)); -- return true; -- } -- return false; -- } -+ /* TODO check for QI/HI scalars. */ -+ /* AVX512VL allows sse regs16+ for 128/256 bit modes. */ -+ if (TARGET_AVX512VL -+ && (mode == OImode -+ || mode == TImode -+ || VALID_AVX256_REG_MODE (mode) -+ || VALID_AVX512VL_128_REG_MODE (mode))) -+ return true; - -- /* Next see if vpermd can be used. */ -- if (valid_perm_using_mode_p (V16SImode, d)) -- vmode = V16SImode; -- } -- /* Or if vpermps can be used. */ -- else if (d->vmode == V16SFmode) -- vmode = V16SImode; -- if (vmode == V64QImode) -- { -- /* vpshufb only works intra lanes, it is not -- possible to shuffle bytes in between the lanes. */ -- for (i = 0; i < nelt; ++i) -- if ((d->perm[i] ^ i) & (nelt / 4)) -- return false; -- } -- } -- else -+ /* xmm16-xmm31 are only available for AVX-512. */ -+ if (EXT_REX_SSE_REGNO_P (regno)) - return false; -- } -- -- if (d->testing_p) -- return true; - -- if (vmode == V8SImode) -- for (i = 0; i < 8; ++i) -- rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7); -- else if (vmode == V16SImode) -- for (i = 0; i < 16; ++i) -- rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15); -- else -+ /* OImode and AVX modes are available only when AVX is enabled. */ -+ return ((TARGET_AVX -+ && VALID_AVX256_REG_OR_OI_MODE (mode)) -+ || VALID_SSE_REG_MODE (mode) -+ || VALID_SSE2_REG_MODE (mode) -+ || VALID_MMX_REG_MODE (mode) -+ || VALID_MMX_REG_MODE_3DNOW (mode)); -+ } -+ if (MMX_REGNO_P (regno)) - { -- eltsz = GET_MODE_UNIT_SIZE (d->vmode); -- if (!d->one_operand_p) -- mask = 2 * nelt - 1; -- else if (vmode == V16QImode) -- mask = nelt - 1; -- else if (vmode == V64QImode) -- mask = nelt / 4 - 1; -- else -- mask = nelt / 2 - 1; -- -- for (i = 0; i < nelt; ++i) -- { -- unsigned j, e = d->perm[i] & mask; -- for (j = 0; j < eltsz; ++j) -- rperm[i * eltsz + j] = GEN_INT (e * eltsz + j); -- } -- } -- -- vperm = gen_rtx_CONST_VECTOR (vmode, -- gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm)); -- vperm = force_reg (vmode, vperm); -- -- target = d->target; -- if (d->vmode != vmode) -- target = gen_reg_rtx (vmode); -- op0 = gen_lowpart (vmode, d->op0); -- if (d->one_operand_p) -- { -- if (vmode == V16QImode) -- emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm)); -- else if (vmode == V32QImode) -- emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm)); -- else if (vmode == V64QImode) -- emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm)); -- else if (vmode == V8SFmode) -- emit_insn (gen_avx2_permvarv8sf (target, op0, vperm)); -- else if (vmode == V8SImode) -- emit_insn (gen_avx2_permvarv8si (target, op0, vperm)); -- else if (vmode == V16SFmode) -- emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm)); -- else if (vmode == V16SImode) -- emit_insn (gen_avx512f_permvarv16si (target, op0, vperm)); -- else -- gcc_unreachable (); -+ /* We implement the move patterns for 3DNOW modes even in MMX mode, -+ so if the register is available at all, then we can move data of -+ the given mode into or out of it. */ -+ return (VALID_MMX_REG_MODE (mode) -+ || VALID_MMX_REG_MODE_3DNOW (mode)); - } -- else -+ -+ if (mode == QImode) - { -- op1 = gen_lowpart (vmode, d->op1); -- emit_insn (gen_xop_pperm (target, op0, op1, vperm)); -+ /* Take care for QImode values - they can be in non-QI regs, -+ but then they do cause partial register stalls. */ -+ if (ANY_QI_REGNO_P (regno)) -+ return true; -+ if (!TARGET_PARTIAL_REG_STALL) -+ return true; -+ /* LRA checks if the hard register is OK for the given mode. -+ QImode values can live in non-QI regs, so we allow all -+ registers here. */ -+ if (lra_in_progress) -+ return true; -+ return !can_create_pseudo_p (); - } -- if (target != d->target) -- emit_move_insn (d->target, gen_lowpart (d->vmode, target)); -+ /* We handle both integer and floats in the general purpose registers. */ -+ else if (VALID_INT_MODE_P (mode)) -+ return true; -+ else if (VALID_FP_MODE_P (mode)) -+ return true; -+ else if (VALID_DFP_MODE_P (mode)) -+ return true; -+ /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go -+ on to use that value in smaller contexts, this can easily force a -+ pseudo to be allocated to GENERAL_REGS. Since this is no worse than -+ supporting DImode, allow it. */ -+ else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode)) -+ return true; - -- return true; -+ return false; - } - --/* For V*[QHS]Imode permutations, check if the same permutation -- can't be performed in a 2x, 4x or 8x wider inner mode. */ -+/* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. The only ABI that -+ saves SSE registers across calls is Win64 (thus no need to check the -+ current ABI here), and with AVX enabled Win64 only guarantees that -+ the low 16 bytes are saved. */ - - static bool --canonicalize_vector_int_perm (const struct expand_vec_perm_d *d, -- struct expand_vec_perm_d *nd) -+ix86_hard_regno_call_part_clobbered (unsigned int, unsigned int regno, -+ machine_mode mode) - { -- int i; -- machine_mode mode = VOIDmode; -- -- switch (d->vmode) -- { -- case E_V16QImode: mode = V8HImode; break; -- case E_V32QImode: mode = V16HImode; break; -- case E_V64QImode: mode = V32HImode; break; -- case E_V8HImode: mode = V4SImode; break; -- case E_V16HImode: mode = V8SImode; break; -- case E_V32HImode: mode = V16SImode; break; -- case E_V4SImode: mode = V2DImode; break; -- case E_V8SImode: mode = V4DImode; break; -- case E_V16SImode: mode = V8DImode; break; -- default: return false; -- } -- for (i = 0; i < d->nelt; i += 2) -- if ((d->perm[i] & 1) || d->perm[i + 1] != d->perm[i] + 1) -- return false; -- nd->vmode = mode; -- nd->nelt = d->nelt / 2; -- for (i = 0; i < nd->nelt; i++) -- nd->perm[i] = d->perm[2 * i] / 2; -- if (GET_MODE_INNER (mode) != DImode) -- canonicalize_vector_int_perm (nd, nd); -- if (nd != d) -- { -- nd->one_operand_p = d->one_operand_p; -- nd->testing_p = d->testing_p; -- if (d->op0 == d->op1) -- nd->op0 = nd->op1 = gen_lowpart (nd->vmode, d->op0); -- else -- { -- nd->op0 = gen_lowpart (nd->vmode, d->op0); -- nd->op1 = gen_lowpart (nd->vmode, d->op1); -- } -- if (d->testing_p) -- nd->target = gen_raw_REG (nd->vmode, LAST_VIRTUAL_REGISTER + 1); -- else -- nd->target = gen_reg_rtx (nd->vmode); -- } -- return true; -+ return SSE_REGNO_P (regno) && GET_MODE_SIZE (mode) > 16; - } - --/* Try to expand one-operand permutation with constant mask. */ -+/* A subroutine of ix86_modes_tieable_p. Return true if MODE is a -+ tieable integer mode. */ - - static bool --ix86_expand_vec_one_operand_perm_avx512 (struct expand_vec_perm_d *d) -+ix86_tieable_integer_mode_p (machine_mode mode) - { -- machine_mode mode = GET_MODE (d->op0); -- machine_mode maskmode = mode; -- rtx (*gen) (rtx, rtx, rtx) = NULL; -- rtx target, op0, mask; -- rtx vec[64]; -+ switch (mode) -+ { -+ case E_HImode: -+ case E_SImode: -+ return true; - -- if (!rtx_equal_p (d->op0, d->op1)) -- return false; -+ case E_QImode: -+ return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL; - -- if (!TARGET_AVX512F) -- return false; -+ case E_DImode: -+ return TARGET_64BIT; - -- switch (mode) -- { -- case E_V16SImode: -- gen = gen_avx512f_permvarv16si; -- break; -- case E_V16SFmode: -- gen = gen_avx512f_permvarv16sf; -- maskmode = V16SImode; -- break; -- case E_V8DImode: -- gen = gen_avx512f_permvarv8di; -- break; -- case E_V8DFmode: -- gen = gen_avx512f_permvarv8df; -- maskmode = V8DImode; -- break; - default: - return false; - } -- -- target = d->target; -- op0 = d->op0; -- for (int i = 0; i < d->nelt; ++i) -- vec[i] = GEN_INT (d->perm[i]); -- mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec)); -- emit_insn (gen (target, op0, force_reg (maskmode, mask))); -- return true; - } - --/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D -- in a single instruction. */ -+/* Implement TARGET_MODES_TIEABLE_P. -+ -+ Return true if MODE1 is accessible in a register that can hold MODE2 -+ without copying. That is, all register classes that can hold MODE2 -+ can also hold MODE1. */ - - static bool --expand_vec_perm_1 (struct expand_vec_perm_d *d) -+ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2) - { -- unsigned i, nelt = d->nelt; -- struct expand_vec_perm_d nd; -- -- /* Check plain VEC_SELECT first, because AVX has instructions that could -- match both SEL and SEL+CONCAT, but the plain SEL will allow a memory -- input where SEL+CONCAT may not. */ -- if (d->one_operand_p) -- { -- int mask = nelt - 1; -- bool identity_perm = true; -- bool broadcast_perm = true; -- -- for (i = 0; i < nelt; i++) -- { -- nd.perm[i] = d->perm[i] & mask; -- if (nd.perm[i] != i) -- identity_perm = false; -- if (nd.perm[i]) -- broadcast_perm = false; -- } -+ if (mode1 == mode2) -+ return true; - -- if (identity_perm) -- { -- if (!d->testing_p) -- emit_move_insn (d->target, d->op0); -- return true; -- } -- else if (broadcast_perm && TARGET_AVX2) -- { -- /* Use vpbroadcast{b,w,d}. */ -- rtx (*gen) (rtx, rtx) = NULL; -- switch (d->vmode) -- { -- case E_V64QImode: -- if (TARGET_AVX512BW) -- gen = gen_avx512bw_vec_dupv64qi_1; -- break; -- case E_V32QImode: -- gen = gen_avx2_pbroadcastv32qi_1; -- break; -- case E_V32HImode: -- if (TARGET_AVX512BW) -- gen = gen_avx512bw_vec_dupv32hi_1; -- break; -- case E_V16HImode: -- gen = gen_avx2_pbroadcastv16hi_1; -- break; -- case E_V16SImode: -- if (TARGET_AVX512F) -- gen = gen_avx512f_vec_dupv16si_1; -- break; -- case E_V8SImode: -- gen = gen_avx2_pbroadcastv8si_1; -- break; -- case E_V16QImode: -- gen = gen_avx2_pbroadcastv16qi; -- break; -- case E_V8HImode: -- gen = gen_avx2_pbroadcastv8hi; -- break; -- case E_V16SFmode: -- if (TARGET_AVX512F) -- gen = gen_avx512f_vec_dupv16sf_1; -- break; -- case E_V8SFmode: -- gen = gen_avx2_vec_dupv8sf_1; -- break; -- case E_V8DFmode: -- if (TARGET_AVX512F) -- gen = gen_avx512f_vec_dupv8df_1; -- break; -- case E_V8DImode: -- if (TARGET_AVX512F) -- gen = gen_avx512f_vec_dupv8di_1; -- break; -- /* For other modes prefer other shuffles this function creates. */ -- default: break; -- } -- if (gen != NULL) -- { -- if (!d->testing_p) -- emit_insn (gen (d->target, d->op0)); -- return true; -- } -- } -+ if (ix86_tieable_integer_mode_p (mode1) -+ && ix86_tieable_integer_mode_p (mode2)) -+ return true; - -- if (expand_vselect (d->target, d->op0, nd.perm, nelt, d->testing_p)) -- return true; -+ /* MODE2 being XFmode implies fp stack or general regs, which means we -+ can tie any smaller floating point modes to it. Note that we do not -+ tie this with TFmode. */ -+ if (mode2 == XFmode) -+ return mode1 == SFmode || mode1 == DFmode; - -- /* There are plenty of patterns in sse.md that are written for -- SEL+CONCAT and are not replicated for a single op. Perhaps -- that should be changed, to avoid the nastiness here. */ -+ /* MODE2 being DFmode implies fp stack, general or sse regs, which means -+ that we can tie it with SFmode. */ -+ if (mode2 == DFmode) -+ return mode1 == SFmode; - -- /* Recognize interleave style patterns, which means incrementing -- every other permutation operand. */ -- for (i = 0; i < nelt; i += 2) -- { -- nd.perm[i] = d->perm[i] & mask; -- nd.perm[i + 1] = (d->perm[i + 1] & mask) + nelt; -- } -- if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt, -- d->testing_p)) -- return true; -+ /* If MODE2 is only appropriate for an SSE register, then tie with -+ any other mode acceptable to SSE registers. */ -+ if (GET_MODE_SIZE (mode2) == 64 -+ && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2)) -+ return (GET_MODE_SIZE (mode1) == 64 -+ && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1)); -+ if (GET_MODE_SIZE (mode2) == 32 -+ && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2)) -+ return (GET_MODE_SIZE (mode1) == 32 -+ && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1)); -+ if (GET_MODE_SIZE (mode2) == 16 -+ && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2)) -+ return (GET_MODE_SIZE (mode1) == 16 -+ && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1)); - -- /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */ -- if (nelt >= 4) -- { -- for (i = 0; i < nelt; i += 4) -- { -- nd.perm[i + 0] = d->perm[i + 0] & mask; -- nd.perm[i + 1] = d->perm[i + 1] & mask; -- nd.perm[i + 2] = (d->perm[i + 2] & mask) + nelt; -- nd.perm[i + 3] = (d->perm[i + 3] & mask) + nelt; -- } -+ /* If MODE2 is appropriate for an MMX register, then tie -+ with any other mode acceptable to MMX registers. */ -+ if (GET_MODE_SIZE (mode2) == 8 -+ && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2)) -+ return (GET_MODE_SIZE (mode1) == 8 -+ && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1)); - -- if (expand_vselect_vconcat (d->target, d->op0, d->op0, nd.perm, nelt, -- d->testing_p)) -- return true; -- } -- } -+ return false; -+} - -- /* Try movss/movsd instructions. */ -- if (expand_vec_perm_movs (d)) -- return true; -+/* Return the cost of moving between two registers of mode MODE. */ - -- /* Finally, try the fully general two operand permute. */ -- if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt, -- d->testing_p)) -- return true; -+static int -+ix86_set_reg_reg_cost (machine_mode mode) -+{ -+ unsigned int units = UNITS_PER_WORD; - -- /* Recognize interleave style patterns with reversed operands. */ -- if (!d->one_operand_p) -+ switch (GET_MODE_CLASS (mode)) - { -- for (i = 0; i < nelt; ++i) -- { -- unsigned e = d->perm[i]; -- if (e >= nelt) -- e -= nelt; -- else -- e += nelt; -- nd.perm[i] = e; -- } -+ default: -+ break; - -- if (expand_vselect_vconcat (d->target, d->op1, d->op0, nd.perm, nelt, -- d->testing_p)) -- return true; -- } -+ case MODE_CC: -+ units = GET_MODE_SIZE (CCmode); -+ break; - -- /* Try the SSE4.1 blend variable merge instructions. */ -- if (expand_vec_perm_blend (d)) -- return true; -+ case MODE_FLOAT: -+ if ((TARGET_SSE && mode == TFmode) -+ || (TARGET_80387 && mode == XFmode) -+ || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode) -+ || ((TARGET_80387 || TARGET_SSE) && mode == SFmode)) -+ units = GET_MODE_SIZE (mode); -+ break; - -- /* Try one of the AVX vpermil variable permutations. */ -- if (expand_vec_perm_vpermil (d)) -- return true; -+ case MODE_COMPLEX_FLOAT: -+ if ((TARGET_SSE && mode == TCmode) -+ || (TARGET_80387 && mode == XCmode) -+ || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode) -+ || ((TARGET_80387 || TARGET_SSE) && mode == SCmode)) -+ units = GET_MODE_SIZE (mode); -+ break; - -- /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128, -- vpshufb, vpermd, vpermps or vpermq variable permutation. */ -- if (expand_vec_perm_pshufb (d)) -- return true; -+ case MODE_VECTOR_INT: -+ case MODE_VECTOR_FLOAT: -+ if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode)) -+ || (TARGET_AVX && VALID_AVX256_REG_MODE (mode)) -+ || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode)) -+ || (TARGET_SSE && VALID_SSE_REG_MODE (mode)) -+ || (TARGET_MMX && VALID_MMX_REG_MODE (mode))) -+ units = GET_MODE_SIZE (mode); -+ } - -- /* Try the AVX2 vpalignr instruction. */ -- if (expand_vec_perm_palignr (d, true)) -- return true; -+ /* Return the cost of moving between two registers of mode MODE, -+ assuming that the move will be in pieces of at most UNITS bytes. */ -+ return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units)); -+} - -- /* Try the AVX512F vperm{s,d} instructions. */ -- if (ix86_expand_vec_one_operand_perm_avx512 (d)) -- return true; -+/* Return cost of vector operation in MODE given that scalar version has -+ COST. */ - -- /* Try the AVX512F vpermt2/vpermi2 instructions. */ -- if (ix86_expand_vec_perm_vpermt2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d)) -- return true; -+static int -+ix86_vec_cost (machine_mode mode, int cost) -+{ -+ if (!VECTOR_MODE_P (mode)) -+ return cost; - -- /* See if we can get the same permutation in different vector integer -- mode. */ -- if (canonicalize_vector_int_perm (d, &nd) && expand_vec_perm_1 (&nd)) -- { -- if (!d->testing_p) -- emit_move_insn (d->target, gen_lowpart (d->vmode, nd.target)); -- return true; -- } -- return false; -+ if (GET_MODE_BITSIZE (mode) == 128 -+ && TARGET_SSE_SPLIT_REGS) -+ return cost * 2; -+ if (GET_MODE_BITSIZE (mode) > 128 -+ && TARGET_AVX128_OPTIMAL) -+ return cost * GET_MODE_BITSIZE (mode) / 128; -+ return cost; - } - --/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D -- in terms of a pair of pshuflw + pshufhw instructions. */ -+/* Return cost of multiplication in MODE. */ - --static bool --expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d) -+static int -+ix86_multiplication_cost (const struct processor_costs *cost, -+ enum machine_mode mode) - { -- unsigned char perm2[MAX_VECT_LEN]; -- unsigned i; -- bool ok; -- -- if (d->vmode != V8HImode || !d->one_operand_p) -- return false; -+ machine_mode inner_mode = mode; -+ if (VECTOR_MODE_P (mode)) -+ inner_mode = GET_MODE_INNER (mode); - -- /* The two permutations only operate in 64-bit lanes. */ -- for (i = 0; i < 4; ++i) -- if (d->perm[i] >= 4) -- return false; -- for (i = 4; i < 8; ++i) -- if (d->perm[i] < 4) -- return false; -+ if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) -+ return inner_mode == DFmode ? cost->mulsd : cost->mulss; -+ else if (X87_FLOAT_MODE_P (mode)) -+ return cost->fmul; -+ else if (FLOAT_MODE_P (mode)) -+ return ix86_vec_cost (mode, -+ inner_mode == DFmode ? cost->mulsd : cost->mulss); -+ else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) -+ { -+ /* vpmullq is used in this case. No emulation is needed. */ -+ if (TARGET_AVX512DQ) -+ return ix86_vec_cost (mode, cost->mulss); - -- if (d->testing_p) -- return true; -+ /* V*QImode is emulated with 7-13 insns. */ -+ if (mode == V16QImode || mode == V32QImode) -+ { -+ int extra = 11; -+ if (TARGET_XOP && mode == V16QImode) -+ extra = 5; -+ else if (TARGET_SSSE3) -+ extra = 6; -+ return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * extra); -+ } -+ /* V*DImode is emulated with 5-8 insns. */ -+ else if (mode == V2DImode || mode == V4DImode) -+ { -+ if (TARGET_XOP && mode == V2DImode) -+ return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 3); -+ else -+ return ix86_vec_cost (mode, cost->mulss * 3 + cost->sse_op * 5); -+ } -+ /* Without sse4.1, we don't have PMULLD; it's emulated with 7 -+ insns, including two PMULUDQ. */ -+ else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX)) -+ return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 5); -+ else -+ return ix86_vec_cost (mode, cost->mulss); -+ } -+ else -+ return (cost->mult_init[MODE_INDEX (mode)] + cost->mult_bit * 7); -+} - -- /* Emit the pshuflw. */ -- memcpy (perm2, d->perm, 4); -- for (i = 4; i < 8; ++i) -- perm2[i] = i; -- ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p); -- gcc_assert (ok); -+/* Return cost of multiplication in MODE. */ - -- /* Emit the pshufhw. */ -- memcpy (perm2 + 4, d->perm + 4, 4); -- for (i = 0; i < 4; ++i) -- perm2[i] = i; -- ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p); -- gcc_assert (ok); -+static int -+ix86_division_cost (const struct processor_costs *cost, -+ enum machine_mode mode) -+{ -+ machine_mode inner_mode = mode; -+ if (VECTOR_MODE_P (mode)) -+ inner_mode = GET_MODE_INNER (mode); - -- return true; -+ if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) -+ return inner_mode == DFmode ? cost->divsd : cost->divss; -+ else if (X87_FLOAT_MODE_P (mode)) -+ return cost->fdiv; -+ else if (FLOAT_MODE_P (mode)) -+ return ix86_vec_cost (mode, -+ inner_mode == DFmode ? cost->divsd : cost->divss); -+ else -+ return cost->divide[MODE_INDEX (mode)]; - } - --/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify -- the permutation using the SSSE3 palignr instruction. This succeeds -- when all of the elements in PERM fit within one vector and we merely -- need to shift them down so that a single vector permutation has a -- chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only -- the vpalignr instruction itself can perform the requested permutation. */ -- --static bool --expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p) --{ -- unsigned i, nelt = d->nelt; -- unsigned min, max, minswap, maxswap; -- bool in_order, ok, swap = false; -- rtx shift, target; -- struct expand_vec_perm_d dcopy; -- -- /* Even with AVX, palignr only operates on 128-bit vectors, -- in AVX2 palignr operates on both 128-bit lanes. */ -- if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16) -- && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32)) -- return false; -- -- min = 2 * nelt; -- max = 0; -- minswap = 2 * nelt; -- maxswap = 0; -- for (i = 0; i < nelt; ++i) -- { -- unsigned e = d->perm[i]; -- unsigned eswap = d->perm[i] ^ nelt; -- if (GET_MODE_SIZE (d->vmode) == 32) -- { -- e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1); -- eswap = e ^ (nelt / 2); -- } -- if (e < min) -- min = e; -- if (e > max) -- max = e; -- if (eswap < minswap) -- minswap = eswap; -- if (eswap > maxswap) -- maxswap = eswap; -- } -- if (min == 0 -- || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt)) -- { -- if (d->one_operand_p -- || minswap == 0 -- || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32 -- ? nelt / 2 : nelt)) -- return false; -- swap = true; -- min = minswap; -- max = maxswap; -- } -+#define COSTS_N_BYTES(N) ((N) * 2) - -- /* Given that we have SSSE3, we know we'll be able to implement the -- single operand permutation after the palignr with pshufb for -- 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed -- first. */ -- if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p) -- return true; -+/* Return cost of shift in MODE. -+ If CONSTANT_OP1 is true, the op1 value is known and set in OP1_VAL. -+ AND_IN_OP1 specify in op1 is result of and and SHIFT_AND_TRUNCATE -+ if op1 is a result of subreg. - -- dcopy = *d; -- if (swap) -- { -- dcopy.op0 = d->op1; -- dcopy.op1 = d->op0; -- for (i = 0; i < nelt; ++i) -- dcopy.perm[i] ^= nelt; -- } -+ SKIP_OP0/1 is set to true if cost of OP0/1 should be ignored. */ - -- in_order = true; -- for (i = 0; i < nelt; ++i) -+static int -+ix86_shift_rotate_cost (const struct processor_costs *cost, -+ enum machine_mode mode, bool constant_op1, -+ HOST_WIDE_INT op1_val, -+ bool speed, -+ bool and_in_op1, -+ bool shift_and_truncate, -+ bool *skip_op0, bool *skip_op1) -+{ -+ if (skip_op0) -+ *skip_op0 = *skip_op1 = false; -+ if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) - { -- unsigned e = dcopy.perm[i]; -- if (GET_MODE_SIZE (d->vmode) == 32 -- && e >= nelt -- && (e & (nelt / 2 - 1)) < min) -- e = e - min - (nelt / 2); -+ /* V*QImode is emulated with 1-11 insns. */ -+ if (mode == V16QImode || mode == V32QImode) -+ { -+ int count = 11; -+ if (TARGET_XOP && mode == V16QImode) -+ { -+ /* For XOP we use vpshab, which requires a broadcast of the -+ value to the variable shift insn. For constants this -+ means a V16Q const in mem; even when we can perform the -+ shift with one insn set the cost to prefer paddb. */ -+ if (constant_op1) -+ { -+ if (skip_op1) -+ *skip_op1 = true; -+ return ix86_vec_cost (mode, -+ cost->sse_op -+ + (speed -+ ? 2 -+ : COSTS_N_BYTES -+ (GET_MODE_UNIT_SIZE (mode)))); -+ } -+ count = 3; -+ } -+ else if (TARGET_SSSE3) -+ count = 7; -+ return ix86_vec_cost (mode, cost->sse_op * count); -+ } - else -- e = e - min; -- if (e != i) -- in_order = false; -- dcopy.perm[i] = e; -- } -- dcopy.one_operand_p = true; -- -- if (single_insn_only_p && !in_order) -- return false; -- -- /* For AVX2, test whether we can permute the result in one instruction. */ -- if (d->testing_p) -- { -- if (in_order) -- return true; -- dcopy.op1 = dcopy.op0; -- return expand_vec_perm_1 (&dcopy); -+ return ix86_vec_cost (mode, cost->sse_op); - } -- -- shift = GEN_INT (min * GET_MODE_UNIT_BITSIZE (d->vmode)); -- if (GET_MODE_SIZE (d->vmode) == 16) -+ if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) - { -- target = gen_reg_rtx (TImode); -- emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1), -- gen_lowpart (TImode, dcopy.op0), shift)); -+ if (constant_op1) -+ { -+ if (op1_val > 32) -+ return cost->shift_const + COSTS_N_INSNS (2); -+ else -+ return cost->shift_const * 2; -+ } -+ else -+ { -+ if (and_in_op1) -+ return cost->shift_var * 2; -+ else -+ return cost->shift_var * 6 + COSTS_N_INSNS (2); -+ } - } - else - { -- target = gen_reg_rtx (V2TImode); -- emit_insn (gen_avx2_palignrv2ti (target, -- gen_lowpart (V2TImode, dcopy.op1), -- gen_lowpart (V2TImode, dcopy.op0), -- shift)); -- } -- -- dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target); -- -- /* Test for the degenerate case where the alignment by itself -- produces the desired permutation. */ -- if (in_order) -- { -- emit_move_insn (d->target, dcopy.op0); -- return true; -+ if (constant_op1) -+ return cost->shift_const; -+ else if (shift_and_truncate) -+ { -+ if (skip_op0) -+ *skip_op0 = *skip_op1 = true; -+ /* Return the cost after shift-and truncation. */ -+ return cost->shift_var; -+ } -+ else -+ return cost->shift_var; - } -- -- ok = expand_vec_perm_1 (&dcopy); -- gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32); -- -- return ok; -+ return cost->shift_const; - } - --/* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify -- the permutation using the SSE4_1 pblendv instruction. Potentially -- reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */ -+/* Compute a (partial) cost for rtx X. Return true if the complete -+ cost has been computed, and false if subexpressions should be -+ scanned. In either case, *TOTAL contains the cost result. */ - - static bool --expand_vec_perm_pblendv (struct expand_vec_perm_d *d) -+ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, -+ int *total, bool speed) - { -- unsigned i, which, nelt = d->nelt; -- struct expand_vec_perm_d dcopy, dcopy1; -- machine_mode vmode = d->vmode; -- bool ok; -- -- /* Use the same checks as in expand_vec_perm_blend. */ -- if (d->one_operand_p) -- return false; -- if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32) -- ; -- else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode)) -- ; -- else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16) -- ; -- else -- return false; -- -- /* Figure out where permutation elements stay not in their -- respective lanes. */ -- for (i = 0, which = 0; i < nelt; ++i) -- { -- unsigned e = d->perm[i]; -- if (e != i) -- which |= (e < nelt ? 1 : 2); -- } -- /* We can pblend the part where elements stay not in their -- respective lanes only when these elements are all in one -- half of a permutation. -- {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective -- lanes, but both 8 and 9 >= 8 -- {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their -- respective lanes and 8 >= 8, but 2 not. */ -- if (which != 1 && which != 2) -- return false; -- if (d->testing_p && GET_MODE_SIZE (vmode) == 16) -- return true; -- -- /* First we apply one operand permutation to the part where -- elements stay not in their respective lanes. */ -- dcopy = *d; -- if (which == 2) -- dcopy.op0 = dcopy.op1 = d->op1; -- else -- dcopy.op0 = dcopy.op1 = d->op0; -- if (!d->testing_p) -- dcopy.target = gen_reg_rtx (vmode); -- dcopy.one_operand_p = true; -- -- for (i = 0; i < nelt; ++i) -- dcopy.perm[i] = d->perm[i] & (nelt - 1); -- -- ok = expand_vec_perm_1 (&dcopy); -- if (GET_MODE_SIZE (vmode) != 16 && !ok) -- return false; -- else -- gcc_assert (ok); -- if (d->testing_p) -- return true; -- -- /* Next we put permuted elements into their positions. */ -- dcopy1 = *d; -- if (which == 2) -- dcopy1.op1 = dcopy.target; -- else -- dcopy1.op0 = dcopy.target; -- -- for (i = 0; i < nelt; ++i) -- dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i); -+ rtx mask; -+ enum rtx_code code = GET_CODE (x); -+ enum rtx_code outer_code = (enum rtx_code) outer_code_i; -+ const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost; -+ int src_cost; - -- ok = expand_vec_perm_blend (&dcopy1); -- gcc_assert (ok); -+ switch (code) -+ { -+ case SET: -+ if (register_operand (SET_DEST (x), VOIDmode) -+ && register_operand (SET_SRC (x), VOIDmode)) -+ { -+ *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x))); -+ return true; -+ } - -- return true; --} -+ if (register_operand (SET_SRC (x), VOIDmode)) -+ /* Avoid potentially incorrect high cost from rtx_costs -+ for non-tieable SUBREGs. */ -+ src_cost = 0; -+ else -+ { -+ src_cost = rtx_cost (SET_SRC (x), mode, SET, 1, speed); - --static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d); -+ if (CONSTANT_P (SET_SRC (x))) -+ /* Constant costs assume a base value of COSTS_N_INSNS (1) and add -+ a small value, possibly zero for cheap constants. */ -+ src_cost += COSTS_N_INSNS (1); -+ } - --/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify -- a two vector permutation into a single vector permutation by using -- an interleave operation to merge the vectors. */ -+ *total = src_cost + rtx_cost (SET_DEST (x), mode, SET, 0, speed); -+ return true; - --static bool --expand_vec_perm_interleave2 (struct expand_vec_perm_d *d) --{ -- struct expand_vec_perm_d dremap, dfinal; -- unsigned i, nelt = d->nelt, nelt2 = nelt / 2; -- unsigned HOST_WIDE_INT contents; -- unsigned char remap[2 * MAX_VECT_LEN]; -- rtx_insn *seq; -- bool ok, same_halves = false; -+ case CONST_INT: -+ case CONST: -+ case LABEL_REF: -+ case SYMBOL_REF: -+ if (x86_64_immediate_operand (x, VOIDmode)) -+ *total = 0; -+ else -+ *total = 1; -+ return true; - -- if (GET_MODE_SIZE (d->vmode) == 16) -- { -- if (d->one_operand_p) -- return false; -- } -- else if (GET_MODE_SIZE (d->vmode) == 32) -- { -- if (!TARGET_AVX) -- return false; -- /* For 32-byte modes allow even d->one_operand_p. -- The lack of cross-lane shuffling in some instructions -- might prevent a single insn shuffle. */ -- dfinal = *d; -- dfinal.testing_p = true; -- /* If expand_vec_perm_interleave3 can expand this into -- a 3 insn sequence, give up and let it be expanded as -- 3 insn sequence. While that is one insn longer, -- it doesn't need a memory operand and in the common -- case that both interleave low and high permutations -- with the same operands are adjacent needs 4 insns -- for both after CSE. */ -- if (expand_vec_perm_interleave3 (&dfinal)) -- return false; -- } -- else -- return false; -+ case CONST_DOUBLE: -+ if (IS_STACK_MODE (mode)) -+ switch (standard_80387_constant_p (x)) -+ { -+ case -1: -+ case 0: -+ break; -+ case 1: /* 0.0 */ -+ *total = 1; -+ return true; -+ default: /* Other constants */ -+ *total = 2; -+ return true; -+ } -+ /* FALLTHRU */ - -- /* Examine from whence the elements come. */ -- contents = 0; -- for (i = 0; i < nelt; ++i) -- contents |= HOST_WIDE_INT_1U << d->perm[i]; -+ case CONST_VECTOR: -+ switch (standard_sse_constant_p (x, mode)) -+ { -+ case 0: -+ break; -+ case 1: /* 0: xor eliminates false dependency */ -+ *total = 0; -+ return true; -+ default: /* -1: cmp contains false dependency */ -+ *total = 1; -+ return true; -+ } -+ /* FALLTHRU */ - -- memset (remap, 0xff, sizeof (remap)); -- dremap = *d; -+ case CONST_WIDE_INT: -+ /* Fall back to (MEM (SYMBOL_REF)), since that's where -+ it'll probably end up. Add a penalty for size. */ -+ *total = (COSTS_N_INSNS (1) -+ + (!TARGET_64BIT && flag_pic) -+ + (GET_MODE_SIZE (mode) <= 4 -+ ? 0 : GET_MODE_SIZE (mode) <= 8 ? 1 : 2)); -+ return true; - -- if (GET_MODE_SIZE (d->vmode) == 16) -- { -- unsigned HOST_WIDE_INT h1, h2, h3, h4; -+ case ZERO_EXTEND: -+ /* The zero extensions is often completely free on x86_64, so make -+ it as cheap as possible. */ -+ if (TARGET_64BIT && mode == DImode -+ && GET_MODE (XEXP (x, 0)) == SImode) -+ *total = 1; -+ else if (TARGET_ZERO_EXTEND_WITH_AND) -+ *total = cost->add; -+ else -+ *total = cost->movzx; -+ return false; - -- /* Split the two input vectors into 4 halves. */ -- h1 = (HOST_WIDE_INT_1U << nelt2) - 1; -- h2 = h1 << nelt2; -- h3 = h2 << nelt2; -- h4 = h3 << nelt2; -+ case SIGN_EXTEND: -+ *total = cost->movsx; -+ return false; - -- /* If the elements from the low halves use interleave low, and similarly -- for interleave high. If the elements are from mis-matched halves, we -- can use shufps for V4SF/V4SI or do a DImode shuffle. */ -- if ((contents & (h1 | h3)) == contents) -- { -- /* punpckl* */ -- for (i = 0; i < nelt2; ++i) -- { -- remap[i] = i * 2; -- remap[i + nelt] = i * 2 + 1; -- dremap.perm[i * 2] = i; -- dremap.perm[i * 2 + 1] = i + nelt; -- } -- if (!TARGET_SSE2 && d->vmode == V4SImode) -- dremap.vmode = V4SFmode; -- } -- else if ((contents & (h2 | h4)) == contents) -- { -- /* punpckh* */ -- for (i = 0; i < nelt2; ++i) -- { -- remap[i + nelt2] = i * 2; -- remap[i + nelt + nelt2] = i * 2 + 1; -- dremap.perm[i * 2] = i + nelt2; -- dremap.perm[i * 2 + 1] = i + nelt + nelt2; -- } -- if (!TARGET_SSE2 && d->vmode == V4SImode) -- dremap.vmode = V4SFmode; -- } -- else if ((contents & (h1 | h4)) == contents) -+ case ASHIFT: -+ if (SCALAR_INT_MODE_P (mode) -+ && GET_MODE_SIZE (mode) < UNITS_PER_WORD -+ && CONST_INT_P (XEXP (x, 1))) - { -- /* shufps */ -- for (i = 0; i < nelt2; ++i) -+ HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); -+ if (value == 1) - { -- remap[i] = i; -- remap[i + nelt + nelt2] = i + nelt2; -- dremap.perm[i] = i; -- dremap.perm[i + nelt2] = i + nelt + nelt2; -+ *total = cost->add; -+ return false; - } -- if (nelt != 4) -+ if ((value == 2 || value == 3) -+ && cost->lea <= cost->shift_const) - { -- /* shufpd */ -- dremap.vmode = V2DImode; -- dremap.nelt = 2; -- dremap.perm[0] = 0; -- dremap.perm[1] = 3; -+ *total = cost->lea; -+ return false; - } - } -- else if ((contents & (h2 | h3)) == contents) -+ /* FALLTHRU */ -+ -+ case ROTATE: -+ case ASHIFTRT: -+ case LSHIFTRT: -+ case ROTATERT: -+ bool skip_op0, skip_op1; -+ *total = ix86_shift_rotate_cost (cost, mode, CONSTANT_P (XEXP (x, 1)), -+ CONST_INT_P (XEXP (x, 1)) -+ ? INTVAL (XEXP (x, 1)) : -1, -+ speed, -+ GET_CODE (XEXP (x, 1)) == AND, -+ SUBREG_P (XEXP (x, 1)) -+ && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND, -+ &skip_op0, &skip_op1); -+ if (skip_op0 || skip_op1) - { -- /* shufps */ -- for (i = 0; i < nelt2; ++i) -- { -- remap[i + nelt2] = i; -- remap[i + nelt] = i + nelt2; -- dremap.perm[i] = i + nelt2; -- dremap.perm[i + nelt2] = i + nelt; -- } -- if (nelt != 4) -- { -- /* shufpd */ -- dremap.vmode = V2DImode; -- dremap.nelt = 2; -- dremap.perm[0] = 1; -- dremap.perm[1] = 2; -- } -+ if (!skip_op0) -+ *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed); -+ if (!skip_op1) -+ *total += rtx_cost (XEXP (x, 1), mode, code, 0, speed); -+ return true; - } -- else -- return false; -- } -- else -- { -- unsigned int nelt4 = nelt / 4, nzcnt = 0; -- unsigned HOST_WIDE_INT q[8]; -- unsigned int nonzero_halves[4]; -+ return false; - -- /* Split the two input vectors into 8 quarters. */ -- q[0] = (HOST_WIDE_INT_1U << nelt4) - 1; -- for (i = 1; i < 8; ++i) -- q[i] = q[0] << (nelt4 * i); -- for (i = 0; i < 4; ++i) -- if (((q[2 * i] | q[2 * i + 1]) & contents) != 0) -- { -- nonzero_halves[nzcnt] = i; -- ++nzcnt; -- } -+ case FMA: -+ { -+ rtx sub; - -- if (nzcnt == 1) -- { -- gcc_assert (d->one_operand_p); -- nonzero_halves[1] = nonzero_halves[0]; -- same_halves = true; -- } -- else if (d->one_operand_p) -- { -- gcc_assert (nonzero_halves[0] == 0); -- gcc_assert (nonzero_halves[1] == 1); -- } -+ gcc_assert (FLOAT_MODE_P (mode)); -+ gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F); -+ -+ *total = ix86_vec_cost (mode, -+ GET_MODE_INNER (mode) == SFmode -+ ? cost->fmass : cost->fmasd); -+ *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed); -+ -+ /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */ -+ sub = XEXP (x, 0); -+ if (GET_CODE (sub) == NEG) -+ sub = XEXP (sub, 0); -+ *total += rtx_cost (sub, mode, FMA, 0, speed); -+ -+ sub = XEXP (x, 2); -+ if (GET_CODE (sub) == NEG) -+ sub = XEXP (sub, 0); -+ *total += rtx_cost (sub, mode, FMA, 2, speed); -+ return true; -+ } - -- if (nzcnt <= 2) -+ case MULT: -+ if (!FLOAT_MODE_P (mode) && !VECTOR_MODE_P (mode)) - { -- if (d->perm[0] / nelt2 == nonzero_halves[1]) -+ rtx op0 = XEXP (x, 0); -+ rtx op1 = XEXP (x, 1); -+ int nbits; -+ if (CONST_INT_P (XEXP (x, 1))) - { -- /* Attempt to increase the likelihood that dfinal -- shuffle will be intra-lane. */ -- std::swap (nonzero_halves[0], nonzero_halves[1]); -+ unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); -+ for (nbits = 0; value != 0; value &= value - 1) -+ nbits++; - } -+ else -+ /* This is arbitrary. */ -+ nbits = 7; - -- /* vperm2f128 or vperm2i128. */ -- for (i = 0; i < nelt2; ++i) -+ /* Compute costs correctly for widening multiplication. */ -+ if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND) -+ && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2 -+ == GET_MODE_SIZE (mode)) - { -- remap[i + nonzero_halves[1] * nelt2] = i + nelt2; -- remap[i + nonzero_halves[0] * nelt2] = i; -- dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2; -- dremap.perm[i] = i + nonzero_halves[0] * nelt2; -+ int is_mulwiden = 0; -+ machine_mode inner_mode = GET_MODE (op0); -+ -+ if (GET_CODE (op0) == GET_CODE (op1)) -+ is_mulwiden = 1, op1 = XEXP (op1, 0); -+ else if (CONST_INT_P (op1)) -+ { -+ if (GET_CODE (op0) == SIGN_EXTEND) -+ is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode) -+ == INTVAL (op1); -+ else -+ is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode)); -+ } -+ -+ if (is_mulwiden) -+ op0 = XEXP (op0, 0), mode = GET_MODE (op0); - } - -- if (d->vmode != V8SFmode -- && d->vmode != V4DFmode -- && d->vmode != V8SImode) -+ *total = (cost->mult_init[MODE_INDEX (mode)] -+ + nbits * cost->mult_bit -+ + rtx_cost (op0, mode, outer_code, opno, speed) -+ + rtx_cost (op1, mode, outer_code, opno, speed)); -+ -+ return true; -+ } -+ *total = ix86_multiplication_cost (cost, mode); -+ return false; -+ -+ case DIV: -+ case UDIV: -+ case MOD: -+ case UMOD: -+ *total = ix86_division_cost (cost, mode); -+ return false; -+ -+ case PLUS: -+ if (GET_MODE_CLASS (mode) == MODE_INT -+ && GET_MODE_SIZE (mode) <= UNITS_PER_WORD) -+ { -+ if (GET_CODE (XEXP (x, 0)) == PLUS -+ && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT -+ && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1)) -+ && CONSTANT_P (XEXP (x, 1))) - { -- dremap.vmode = V8SImode; -- dremap.nelt = 8; -- for (i = 0; i < 4; ++i) -+ HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)); -+ if (val == 2 || val == 4 || val == 8) - { -- dremap.perm[i] = i + nonzero_halves[0] * 4; -- dremap.perm[i + 4] = i + nonzero_halves[1] * 4; -+ *total = cost->lea; -+ *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode, -+ outer_code, opno, speed); -+ *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode, -+ outer_code, opno, speed); -+ *total += rtx_cost (XEXP (x, 1), mode, -+ outer_code, opno, speed); -+ return true; - } - } -- } -- else if (d->one_operand_p) -- return false; -- else if (TARGET_AVX2 -- && (contents & (q[0] | q[2] | q[4] | q[6])) == contents) -- { -- /* vpunpckl* */ -- for (i = 0; i < nelt4; ++i) -+ else if (GET_CODE (XEXP (x, 0)) == MULT -+ && CONST_INT_P (XEXP (XEXP (x, 0), 1))) - { -- remap[i] = i * 2; -- remap[i + nelt] = i * 2 + 1; -- remap[i + nelt2] = i * 2 + nelt2; -- remap[i + nelt + nelt2] = i * 2 + nelt2 + 1; -- dremap.perm[i * 2] = i; -- dremap.perm[i * 2 + 1] = i + nelt; -- dremap.perm[i * 2 + nelt2] = i + nelt2; -- dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2; -+ HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1)); -+ if (val == 2 || val == 4 || val == 8) -+ { -+ *total = cost->lea; -+ *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, -+ outer_code, opno, speed); -+ *total += rtx_cost (XEXP (x, 1), mode, -+ outer_code, opno, speed); -+ return true; -+ } - } -- } -- else if (TARGET_AVX2 -- && (contents & (q[1] | q[3] | q[5] | q[7])) == contents) -- { -- /* vpunpckh* */ -- for (i = 0; i < nelt4; ++i) -+ else if (GET_CODE (XEXP (x, 0)) == PLUS) - { -- remap[i + nelt4] = i * 2; -- remap[i + nelt + nelt4] = i * 2 + 1; -- remap[i + nelt2 + nelt4] = i * 2 + nelt2; -- remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1; -- dremap.perm[i * 2] = i + nelt4; -- dremap.perm[i * 2 + 1] = i + nelt + nelt4; -- dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4; -- dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4; -+ /* Add with carry, ignore the cost of adding a carry flag. */ -+ if (ix86_carry_flag_operator (XEXP (XEXP (x, 0), 0), mode)) -+ *total = cost->add; -+ else -+ { -+ *total = cost->lea; -+ *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, -+ outer_code, opno, speed); -+ } -+ -+ *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode, -+ outer_code, opno, speed); -+ *total += rtx_cost (XEXP (x, 1), mode, -+ outer_code, opno, speed); -+ return true; - } - } -- else -- return false; -- } -+ /* FALLTHRU */ - -- /* Use the remapping array set up above to move the elements from their -- swizzled locations into their final destinations. */ -- dfinal = *d; -- for (i = 0; i < nelt; ++i) -- { -- unsigned e = remap[d->perm[i]]; -- gcc_assert (e < nelt); -- /* If same_halves is true, both halves of the remapped vector are the -- same. Avoid cross-lane accesses if possible. */ -- if (same_halves && i >= nelt2) -+ case MINUS: -+ /* Subtract with borrow, ignore the cost of subtracting a carry flag. */ -+ if (GET_MODE_CLASS (mode) == MODE_INT -+ && GET_MODE_SIZE (mode) <= UNITS_PER_WORD -+ && GET_CODE (XEXP (x, 0)) == MINUS -+ && ix86_carry_flag_operator (XEXP (XEXP (x, 0), 1), mode)) - { -- gcc_assert (e < nelt2); -- dfinal.perm[i] = e + nelt2; -+ *total = cost->add; -+ *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, -+ outer_code, opno, speed); -+ *total += rtx_cost (XEXP (x, 1), mode, -+ outer_code, opno, speed); -+ return true; - } -- else -- dfinal.perm[i] = e; -- } -- if (!d->testing_p) -- { -- dremap.target = gen_reg_rtx (dremap.vmode); -- dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target); -- } -- dfinal.op1 = dfinal.op0; -- dfinal.one_operand_p = true; - -- /* Test if the final remap can be done with a single insn. For V4SFmode or -- V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */ -- start_sequence (); -- ok = expand_vec_perm_1 (&dfinal); -- seq = get_insns (); -- end_sequence (); -- -- if (!ok) -- return false; -- -- if (d->testing_p) -- return true; -- -- if (dremap.vmode != dfinal.vmode) -- { -- dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0); -- dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1); -- } -- -- ok = expand_vec_perm_1 (&dremap); -- gcc_assert (ok); -- -- emit_insn (seq); -- return true; --} -- --/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify -- a single vector cross-lane permutation into vpermq followed -- by any of the single insn permutations. */ -- --static bool --expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d) --{ -- struct expand_vec_perm_d dremap, dfinal; -- unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4; -- unsigned contents[2]; -- bool ok; -- -- if (!(TARGET_AVX2 -- && (d->vmode == V32QImode || d->vmode == V16HImode) -- && d->one_operand_p)) -- return false; -- -- contents[0] = 0; -- contents[1] = 0; -- for (i = 0; i < nelt2; ++i) -- { -- contents[0] |= 1u << (d->perm[i] / nelt4); -- contents[1] |= 1u << (d->perm[i + nelt2] / nelt4); -- } -- -- for (i = 0; i < 2; ++i) -- { -- unsigned int cnt = 0; -- for (j = 0; j < 4; ++j) -- if ((contents[i] & (1u << j)) != 0 && ++cnt > 2) -+ if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) -+ { -+ *total = cost->addss; - return false; -- } -- -- if (d->testing_p) -- return true; -- -- dremap = *d; -- dremap.vmode = V4DImode; -- dremap.nelt = 4; -- dremap.target = gen_reg_rtx (V4DImode); -- dremap.op0 = gen_lowpart (V4DImode, d->op0); -- dremap.op1 = dremap.op0; -- dremap.one_operand_p = true; -- for (i = 0; i < 2; ++i) -- { -- unsigned int cnt = 0; -- for (j = 0; j < 4; ++j) -- if ((contents[i] & (1u << j)) != 0) -- dremap.perm[2 * i + cnt++] = j; -- for (; cnt < 2; ++cnt) -- dremap.perm[2 * i + cnt] = 0; -- } -- -- dfinal = *d; -- dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target); -- dfinal.op1 = dfinal.op0; -- dfinal.one_operand_p = true; -- for (i = 0, j = 0; i < nelt; ++i) -- { -- if (i == nelt2) -- j = 2; -- dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0); -- if ((d->perm[i] / nelt4) == dremap.perm[j]) -- ; -- else if ((d->perm[i] / nelt4) == dremap.perm[j + 1]) -- dfinal.perm[i] |= nelt4; -- else -- gcc_unreachable (); -- } -- -- ok = expand_vec_perm_1 (&dremap); -- gcc_assert (ok); -- -- ok = expand_vec_perm_1 (&dfinal); -- gcc_assert (ok); -- -- return true; --} -- --/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand -- a vector permutation using two instructions, vperm2f128 resp. -- vperm2i128 followed by any single in-lane permutation. */ -- --static bool --expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d) --{ -- struct expand_vec_perm_d dfirst, dsecond; -- unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm; -- bool ok; -+ } -+ else if (X87_FLOAT_MODE_P (mode)) -+ { -+ *total = cost->fadd; -+ return false; -+ } -+ else if (FLOAT_MODE_P (mode)) -+ { -+ *total = ix86_vec_cost (mode, cost->addss); -+ return false; -+ } -+ /* FALLTHRU */ - -- if (!TARGET_AVX -- || GET_MODE_SIZE (d->vmode) != 32 -- || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2)) -- return false; -+ case AND: -+ case IOR: -+ case XOR: -+ if (GET_MODE_CLASS (mode) == MODE_INT -+ && GET_MODE_SIZE (mode) > UNITS_PER_WORD) -+ { -+ *total = (cost->add * 2 -+ + (rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed) -+ << (GET_MODE (XEXP (x, 0)) != DImode)) -+ + (rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed) -+ << (GET_MODE (XEXP (x, 1)) != DImode))); -+ return true; -+ } -+ /* FALLTHRU */ - -- dsecond = *d; -- dsecond.one_operand_p = false; -- dsecond.testing_p = true; -- -- /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128 -- immediate. For perm < 16 the second permutation uses -- d->op0 as first operand, for perm >= 16 it uses d->op1 -- as first operand. The second operand is the result of -- vperm2[fi]128. */ -- for (perm = 0; perm < 32; perm++) -- { -- /* Ignore permutations which do not move anything cross-lane. */ -- if (perm < 16) -- { -- /* The second shuffle for e.g. V4DFmode has -- 0123 and ABCD operands. -- Ignore AB23, as 23 is already in the second lane -- of the first operand. */ -- if ((perm & 0xc) == (1 << 2)) continue; -- /* And 01CD, as 01 is in the first lane of the first -- operand. */ -- if ((perm & 3) == 0) continue; -- /* And 4567, as then the vperm2[fi]128 doesn't change -- anything on the original 4567 second operand. */ -- if ((perm & 0xf) == ((3 << 2) | 2)) continue; -+ case NEG: -+ if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) -+ { -+ *total = cost->sse_op; -+ return false; - } -- else -+ else if (X87_FLOAT_MODE_P (mode)) - { -- /* The second shuffle for e.g. V4DFmode has -- 4567 and ABCD operands. -- Ignore AB67, as 67 is already in the second lane -- of the first operand. */ -- if ((perm & 0xc) == (3 << 2)) continue; -- /* And 45CD, as 45 is in the first lane of the first -- operand. */ -- if ((perm & 3) == 2) continue; -- /* And 0123, as then the vperm2[fi]128 doesn't change -- anything on the original 0123 first operand. */ -- if ((perm & 0xf) == (1 << 2)) continue; -- } -- -- for (i = 0; i < nelt; i++) -- { -- j = d->perm[i] / nelt2; -- if (j == ((perm >> (2 * (i >= nelt2))) & 3)) -- dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1)); -- else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16)) -- dsecond.perm[i] = d->perm[i] & (nelt - 1); -- else -- break; -+ *total = cost->fchs; -+ return false; - } -- -- if (i == nelt) -+ else if (FLOAT_MODE_P (mode)) - { -- start_sequence (); -- ok = expand_vec_perm_1 (&dsecond); -- end_sequence (); -+ *total = ix86_vec_cost (mode, cost->sse_op); -+ return false; - } -+ /* FALLTHRU */ -+ -+ case NOT: -+ if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) -+ *total = ix86_vec_cost (mode, cost->sse_op); -+ else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) -+ *total = cost->add * 2; - else -- ok = false; -+ *total = cost->add; -+ return false; - -- if (ok) -+ case COMPARE: -+ if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT -+ && XEXP (XEXP (x, 0), 1) == const1_rtx -+ && CONST_INT_P (XEXP (XEXP (x, 0), 2)) -+ && XEXP (x, 1) == const0_rtx) - { -- if (d->testing_p) -- return true; -- -- /* Found a usable second shuffle. dfirst will be -- vperm2f128 on d->op0 and d->op1. */ -- dsecond.testing_p = false; -- dfirst = *d; -- dfirst.target = gen_reg_rtx (d->vmode); -- for (i = 0; i < nelt; i++) -- dfirst.perm[i] = (i & (nelt2 - 1)) -- + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2; -- -- canonicalize_perm (&dfirst); -- ok = expand_vec_perm_1 (&dfirst); -- gcc_assert (ok); -- -- /* And dsecond is some single insn shuffle, taking -- d->op0 and result of vperm2f128 (if perm < 16) or -- d->op1 and result of vperm2f128 (otherwise). */ -- if (perm >= 16) -- dsecond.op0 = dsecond.op1; -- dsecond.op1 = dfirst.target; -- -- ok = expand_vec_perm_1 (&dsecond); -- gcc_assert (ok); -- -+ /* This kind of construct is implemented using test[bwl]. -+ Treat it as if we had an AND. */ -+ mode = GET_MODE (XEXP (XEXP (x, 0), 0)); -+ *total = (cost->add -+ + rtx_cost (XEXP (XEXP (x, 0), 0), mode, outer_code, -+ opno, speed) -+ + rtx_cost (const1_rtx, mode, outer_code, opno, speed)); - return true; - } - -- /* For one operand, the only useful vperm2f128 permutation is 0x01 -- aka lanes swap. */ -- if (d->one_operand_p) -- return false; -- } -+ /* The embedded comparison operand is completely free. */ -+ if (!general_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0))) -+ && XEXP (x, 1) == const0_rtx) -+ *total = 0; - -- return false; --} -+ return false; - --/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify -- a two vector permutation using 2 intra-lane interleave insns -- and cross-lane shuffle for 32-byte vectors. */ -+ case FLOAT_EXTEND: -+ if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)) -+ *total = 0; -+ else -+ *total = ix86_vec_cost (mode, cost->addss); -+ return false; - --static bool --expand_vec_perm_interleave3 (struct expand_vec_perm_d *d) --{ -- unsigned i, nelt; -- rtx (*gen) (rtx, rtx, rtx); -+ case FLOAT_TRUNCATE: -+ if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)) -+ *total = cost->fadd; -+ else -+ *total = ix86_vec_cost (mode, cost->addss); -+ return false; - -- if (d->one_operand_p) -- return false; -- if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32) -- ; -- else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode)) -- ; -- else -- return false; -+ case ABS: -+ /* SSE requires memory load for the constant operand. It may make -+ sense to account for this. Of course the constant operand may or -+ may not be reused. */ -+ if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) -+ *total = cost->sse_op; -+ else if (X87_FLOAT_MODE_P (mode)) -+ *total = cost->fabs; -+ else if (FLOAT_MODE_P (mode)) -+ *total = ix86_vec_cost (mode, cost->sse_op); -+ return false; - -- nelt = d->nelt; -- if (d->perm[0] != 0 && d->perm[0] != nelt / 2) -- return false; -- for (i = 0; i < nelt; i += 2) -- if (d->perm[i] != d->perm[0] + i / 2 -- || d->perm[i + 1] != d->perm[0] + i / 2 + nelt) -+ case SQRT: -+ if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) -+ *total = mode == SFmode ? cost->sqrtss : cost->sqrtsd; -+ else if (X87_FLOAT_MODE_P (mode)) -+ *total = cost->fsqrt; -+ else if (FLOAT_MODE_P (mode)) -+ *total = ix86_vec_cost (mode, -+ mode == SFmode ? cost->sqrtss : cost->sqrtsd); - return false; - -- if (d->testing_p) -- return true; -+ case UNSPEC: -+ if (XINT (x, 1) == UNSPEC_TP) -+ *total = 0; -+ return false; - -- switch (d->vmode) -- { -- case E_V32QImode: -- if (d->perm[0]) -- gen = gen_vec_interleave_highv32qi; -- else -- gen = gen_vec_interleave_lowv32qi; -- break; -- case E_V16HImode: -- if (d->perm[0]) -- gen = gen_vec_interleave_highv16hi; -- else -- gen = gen_vec_interleave_lowv16hi; -- break; -- case E_V8SImode: -- if (d->perm[0]) -- gen = gen_vec_interleave_highv8si; -- else -- gen = gen_vec_interleave_lowv8si; -- break; -- case E_V4DImode: -- if (d->perm[0]) -- gen = gen_vec_interleave_highv4di; -- else -- gen = gen_vec_interleave_lowv4di; -- break; -- case E_V8SFmode: -- if (d->perm[0]) -- gen = gen_vec_interleave_highv8sf; -- else -- gen = gen_vec_interleave_lowv8sf; -- break; -- case E_V4DFmode: -- if (d->perm[0]) -- gen = gen_vec_interleave_highv4df; -+ case VEC_SELECT: -+ case VEC_CONCAT: -+ case VEC_DUPLICATE: -+ /* ??? Assume all of these vector manipulation patterns are -+ recognizable. In which case they all pretty much have the -+ same cost. */ -+ *total = cost->sse_op; -+ return true; -+ case VEC_MERGE: -+ mask = XEXP (x, 2); -+ /* This is masked instruction, assume the same cost, -+ as nonmasked variant. */ -+ if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask))) -+ *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed); - else -- gen = gen_vec_interleave_lowv4df; -- break; -+ *total = cost->sse_op; -+ return true; -+ - default: -- gcc_unreachable (); -+ return false; - } -- -- emit_insn (gen (d->target, d->op0, d->op1)); -- return true; - } - --/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement -- a single vector permutation using a single intra-lane vector -- permutation, vperm2f128 swapping the lanes and vblend* insn blending -- the non-swapped and swapped vectors together. */ -- --static bool --expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d) --{ -- struct expand_vec_perm_d dfirst, dsecond; -- unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2; -- rtx_insn *seq; -- bool ok; -- rtx (*blend) (rtx, rtx, rtx, rtx) = NULL; -- -- if (!TARGET_AVX -- || TARGET_AVX2 -- || (d->vmode != V8SFmode && d->vmode != V4DFmode) -- || !d->one_operand_p) -- return false; -- -- dfirst = *d; -- for (i = 0; i < nelt; i++) -- dfirst.perm[i] = 0xff; -- for (i = 0, msk = 0; i < nelt; i++) -- { -- j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2; -- if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i]) -- return false; -- dfirst.perm[j] = d->perm[i]; -- if (j != i) -- msk |= (1 << i); -- } -- for (i = 0; i < nelt; i++) -- if (dfirst.perm[i] == 0xff) -- dfirst.perm[i] = i; -- -- if (!d->testing_p) -- dfirst.target = gen_reg_rtx (dfirst.vmode); -- -- start_sequence (); -- ok = expand_vec_perm_1 (&dfirst); -- seq = get_insns (); -- end_sequence (); -- -- if (!ok) -- return false; -- -- if (d->testing_p) -- return true; -- -- emit_insn (seq); -- -- dsecond = *d; -- dsecond.op0 = dfirst.target; -- dsecond.op1 = dfirst.target; -- dsecond.one_operand_p = true; -- dsecond.target = gen_reg_rtx (dsecond.vmode); -- for (i = 0; i < nelt; i++) -- dsecond.perm[i] = i ^ nelt2; -- -- ok = expand_vec_perm_1 (&dsecond); -- gcc_assert (ok); -+#if TARGET_MACHO - -- blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256; -- emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk))); -- return true; --} -+static int current_machopic_label_num; - --/* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF -- permutation using two vperm2f128, followed by a vshufpd insn blending -- the two vectors together. */ -+/* Given a symbol name and its associated stub, write out the -+ definition of the stub. */ - --static bool --expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d) -+void -+machopic_output_stub (FILE *file, const char *symb, const char *stub) - { -- struct expand_vec_perm_d dfirst, dsecond, dthird; -- bool ok; -- -- if (!TARGET_AVX || (d->vmode != V4DFmode)) -- return false; -- -- if (d->testing_p) -- return true; -- -- dfirst = *d; -- dsecond = *d; -- dthird = *d; -- -- dfirst.perm[0] = (d->perm[0] & ~1); -- dfirst.perm[1] = (d->perm[0] & ~1) + 1; -- dfirst.perm[2] = (d->perm[2] & ~1); -- dfirst.perm[3] = (d->perm[2] & ~1) + 1; -- dsecond.perm[0] = (d->perm[1] & ~1); -- dsecond.perm[1] = (d->perm[1] & ~1) + 1; -- dsecond.perm[2] = (d->perm[3] & ~1); -- dsecond.perm[3] = (d->perm[3] & ~1) + 1; -- dthird.perm[0] = (d->perm[0] % 2); -- dthird.perm[1] = (d->perm[1] % 2) + 4; -- dthird.perm[2] = (d->perm[2] % 2) + 2; -- dthird.perm[3] = (d->perm[3] % 2) + 6; -- -- dfirst.target = gen_reg_rtx (dfirst.vmode); -- dsecond.target = gen_reg_rtx (dsecond.vmode); -- dthird.op0 = dfirst.target; -- dthird.op1 = dsecond.target; -- dthird.one_operand_p = false; -- -- canonicalize_perm (&dfirst); -- canonicalize_perm (&dsecond); -- -- ok = expand_vec_perm_1 (&dfirst) -- && expand_vec_perm_1 (&dsecond) -- && expand_vec_perm_1 (&dthird); -+ unsigned int length; -+ char *binder_name, *symbol_name, lazy_ptr_name[32]; -+ int label = ++current_machopic_label_num; - -- gcc_assert (ok); -+ /* For 64-bit we shouldn't get here. */ -+ gcc_assert (!TARGET_64BIT); - -- return true; --} -+ /* Lose our funky encoding stuff so it doesn't contaminate the stub. */ -+ symb = targetm.strip_name_encoding (symb); - --/* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word -- permutation with two pshufb insns and an ior. We should have already -- failed all two instruction sequences. */ -+ length = strlen (stub); -+ binder_name = XALLOCAVEC (char, length + 32); -+ GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length); - --static bool --expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d) --{ -- rtx rperm[2][16], vperm, l, h, op, m128; -- unsigned int i, nelt, eltsz; -+ length = strlen (symb); -+ symbol_name = XALLOCAVEC (char, length + 32); -+ GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length); - -- if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16) -- return false; -- gcc_assert (!d->one_operand_p); -+ sprintf (lazy_ptr_name, "L%d$lz", label); - -- if (d->testing_p) -- return true; -+ if (MACHOPIC_ATT_STUB) -+ switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]); -+ else if (MACHOPIC_PURE) -+ switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]); -+ else -+ switch_to_section (darwin_sections[machopic_symbol_stub_section]); - -- nelt = d->nelt; -- eltsz = GET_MODE_UNIT_SIZE (d->vmode); -+ fprintf (file, "%s:\n", stub); -+ fprintf (file, "\t.indirect_symbol %s\n", symbol_name); - -- /* Generate two permutation masks. If the required element is within -- the given vector it is shuffled into the proper lane. If the required -- element is in the other vector, force a zero into the lane by setting -- bit 7 in the permutation mask. */ -- m128 = GEN_INT (-128); -- for (i = 0; i < nelt; ++i) -+ if (MACHOPIC_ATT_STUB) - { -- unsigned j, e = d->perm[i]; -- unsigned which = (e >= nelt); -- if (e >= nelt) -- e -= nelt; -- -- for (j = 0; j < eltsz; ++j) -- { -- rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j); -- rperm[1-which][i*eltsz + j] = m128; -- } -+ fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n"); - } -- -- vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0])); -- vperm = force_reg (V16QImode, vperm); -- -- l = gen_reg_rtx (V16QImode); -- op = gen_lowpart (V16QImode, d->op0); -- emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm)); -- -- vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1])); -- vperm = force_reg (V16QImode, vperm); -- -- h = gen_reg_rtx (V16QImode); -- op = gen_lowpart (V16QImode, d->op1); -- emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm)); -- -- op = d->target; -- if (d->vmode != V16QImode) -- op = gen_reg_rtx (V16QImode); -- emit_insn (gen_iorv16qi3 (op, l, h)); -- if (op != d->target) -- emit_move_insn (d->target, gen_lowpart (d->vmode, op)); -- -- return true; --} -- --/* Implement arbitrary permutation of one V32QImode and V16QImode operand -- with two vpshufb insns, vpermq and vpor. We should have already failed -- all two or three instruction sequences. */ -- --static bool --expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d) --{ -- rtx rperm[2][32], vperm, l, h, hp, op, m128; -- unsigned int i, nelt, eltsz; -- -- if (!TARGET_AVX2 -- || !d->one_operand_p -- || (d->vmode != V32QImode && d->vmode != V16HImode)) -- return false; -- -- if (d->testing_p) -- return true; -- -- nelt = d->nelt; -- eltsz = GET_MODE_UNIT_SIZE (d->vmode); -- -- /* Generate two permutation masks. If the required element is within -- the same lane, it is shuffled in. If the required element from the -- other lane, force a zero by setting bit 7 in the permutation mask. -- In the other mask the mask has non-negative elements if element -- is requested from the other lane, but also moved to the other lane, -- so that the result of vpshufb can have the two V2TImode halves -- swapped. */ -- m128 = GEN_INT (-128); -- for (i = 0; i < nelt; ++i) -+ else if (MACHOPIC_PURE) - { -- unsigned j, e = d->perm[i] & (nelt / 2 - 1); -- unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz; -- -- for (j = 0; j < eltsz; ++j) -- { -- rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j); -- rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128; -- } -+ /* PIC stub. */ -+ /* 25-byte PIC stub using "CALL get_pc_thunk". */ -+ rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */); -+ output_set_got (tmp, NULL_RTX); /* "CALL ___.get_pc_thunk.cx". */ -+ fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n", -+ label, lazy_ptr_name, label); -+ fprintf (file, "\tjmp\t*%%ecx\n"); - } -+ else -+ fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name); - -- vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1])); -- vperm = force_reg (V32QImode, vperm); -- -- h = gen_reg_rtx (V32QImode); -- op = gen_lowpart (V32QImode, d->op0); -- emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm)); -+ /* The AT&T-style ("self-modifying") stub is not lazily bound, thus -+ it needs no stub-binding-helper. */ -+ if (MACHOPIC_ATT_STUB) -+ return; - -- /* Swap the 128-byte lanes of h into hp. */ -- hp = gen_reg_rtx (V4DImode); -- op = gen_lowpart (V4DImode, h); -- emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx, -- const1_rtx)); -+ fprintf (file, "%s:\n", binder_name); - -- vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0])); -- vperm = force_reg (V32QImode, vperm); -+ if (MACHOPIC_PURE) -+ { -+ fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name); -+ fprintf (file, "\tpushl\t%%ecx\n"); -+ } -+ else -+ fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name); - -- l = gen_reg_rtx (V32QImode); -- op = gen_lowpart (V32QImode, d->op0); -- emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm)); -+ fputs ("\tjmp\tdyld_stub_binding_helper\n", file); - -- op = d->target; -- if (d->vmode != V32QImode) -- op = gen_reg_rtx (V32QImode); -- emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp))); -- if (op != d->target) -- emit_move_insn (d->target, gen_lowpart (d->vmode, op)); -+ /* N.B. Keep the correspondence of these -+ 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the -+ old-pic/new-pic/non-pic stubs; altering this will break -+ compatibility with existing dylibs. */ -+ if (MACHOPIC_PURE) -+ { -+ /* 25-byte PIC stub using "CALL get_pc_thunk". */ -+ switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]); -+ } -+ else -+ /* 16-byte -mdynamic-no-pic stub. */ -+ switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]); - -- return true; -+ fprintf (file, "%s:\n", lazy_ptr_name); -+ fprintf (file, "\t.indirect_symbol %s\n", symbol_name); -+ fprintf (file, ASM_LONG "%s\n", binder_name); - } -+#endif /* TARGET_MACHO */ - --/* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even -- and extract-odd permutations of two V32QImode and V16QImode operand -- with two vpshufb insns, vpor and vpermq. We should have already -- failed all two or three instruction sequences. */ -+/* Order the registers for register allocator. */ - --static bool --expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d) -+void -+x86_order_regs_for_local_alloc (void) - { -- rtx rperm[2][32], vperm, l, h, ior, op, m128; -- unsigned int i, nelt, eltsz; -- -- if (!TARGET_AVX2 -- || d->one_operand_p -- || (d->vmode != V32QImode && d->vmode != V16HImode)) -- return false; -- -- for (i = 0; i < d->nelt; ++i) -- if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2)) -- return false; -- -- if (d->testing_p) -- return true; -+ int pos = 0; -+ int i; - -- nelt = d->nelt; -- eltsz = GET_MODE_UNIT_SIZE (d->vmode); -- -- /* Generate two permutation masks. In the first permutation mask -- the first quarter will contain indexes for the first half -- of the op0, the second quarter will contain bit 7 set, third quarter -- will contain indexes for the second half of the op0 and the -- last quarter bit 7 set. In the second permutation mask -- the first quarter will contain bit 7 set, the second quarter -- indexes for the first half of the op1, the third quarter bit 7 set -- and last quarter indexes for the second half of the op1. -- I.e. the first mask e.g. for V32QImode extract even will be: -- 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128 -- (all values masked with 0xf except for -128) and second mask -- for extract even will be -- -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */ -- m128 = GEN_INT (-128); -- for (i = 0; i < nelt; ++i) -- { -- unsigned j, e = d->perm[i] & (nelt / 2 - 1); -- unsigned which = d->perm[i] >= nelt; -- unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0; -+ /* First allocate the local general purpose registers. */ -+ for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) -+ if (GENERAL_REGNO_P (i) && call_used_or_fixed_reg_p (i)) -+ reg_alloc_order [pos++] = i; - -- for (j = 0; j < eltsz; ++j) -- { -- rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j); -- rperm[1 - which][(i * eltsz + j) ^ xorv] = m128; -- } -- } -+ /* Global general purpose registers. */ -+ for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) -+ if (GENERAL_REGNO_P (i) && !call_used_or_fixed_reg_p (i)) -+ reg_alloc_order [pos++] = i; - -- vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0])); -- vperm = force_reg (V32QImode, vperm); -+ /* x87 registers come first in case we are doing FP math -+ using them. */ -+ if (!TARGET_SSE_MATH) -+ for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) -+ reg_alloc_order [pos++] = i; - -- l = gen_reg_rtx (V32QImode); -- op = gen_lowpart (V32QImode, d->op0); -- emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm)); -+ /* SSE registers. */ -+ for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++) -+ reg_alloc_order [pos++] = i; -+ for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) -+ reg_alloc_order [pos++] = i; - -- vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1])); -- vperm = force_reg (V32QImode, vperm); -+ /* Extended REX SSE registers. */ -+ for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++) -+ reg_alloc_order [pos++] = i; - -- h = gen_reg_rtx (V32QImode); -- op = gen_lowpart (V32QImode, d->op1); -- emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm)); -+ /* Mask register. */ -+ for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++) -+ reg_alloc_order [pos++] = i; - -- ior = gen_reg_rtx (V32QImode); -- emit_insn (gen_iorv32qi3 (ior, l, h)); -+ /* x87 registers. */ -+ if (TARGET_SSE_MATH) -+ for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) -+ reg_alloc_order [pos++] = i; - -- /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */ -- op = gen_reg_rtx (V4DImode); -- ior = gen_lowpart (V4DImode, ior); -- emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx, -- const1_rtx, GEN_INT (3))); -- emit_move_insn (d->target, gen_lowpart (d->vmode, op)); -+ for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++) -+ reg_alloc_order [pos++] = i; - -- return true; -+ /* Initialize the rest of array as we do not allocate some registers -+ at all. */ -+ while (pos < FIRST_PSEUDO_REGISTER) -+ reg_alloc_order [pos++] = 0; - } - --/* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even -- and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands -- with two "and" and "pack" or two "shift" and "pack" insns. We should -- have already failed all two instruction sequences. */ -- - static bool --expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d) -+ix86_ms_bitfield_layout_p (const_tree record_type) - { -- rtx op, dop0, dop1, t; -- unsigned i, odd, c, s, nelt = d->nelt; -- bool end_perm = false; -- machine_mode half_mode; -- rtx (*gen_and) (rtx, rtx, rtx); -- rtx (*gen_pack) (rtx, rtx, rtx); -- rtx (*gen_shift) (rtx, rtx, rtx); -+ return ((TARGET_MS_BITFIELD_LAYOUT -+ && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type))) -+ || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type))); -+} - -- if (d->one_operand_p) -- return false; -+/* Returns an expression indicating where the this parameter is -+ located on entry to the FUNCTION. */ - -- switch (d->vmode) -- { -- case E_V8HImode: -- /* Required for "pack". */ -- if (!TARGET_SSE4_1) -- return false; -- c = 0xffff; -- s = 16; -- half_mode = V4SImode; -- gen_and = gen_andv4si3; -- gen_pack = gen_sse4_1_packusdw; -- gen_shift = gen_lshrv4si3; -- break; -- case E_V16QImode: -- /* No check as all instructions are SSE2. */ -- c = 0xff; -- s = 8; -- half_mode = V8HImode; -- gen_and = gen_andv8hi3; -- gen_pack = gen_sse2_packuswb; -- gen_shift = gen_lshrv8hi3; -- break; -- case E_V16HImode: -- if (!TARGET_AVX2) -- return false; -- c = 0xffff; -- s = 16; -- half_mode = V8SImode; -- gen_and = gen_andv8si3; -- gen_pack = gen_avx2_packusdw; -- gen_shift = gen_lshrv8si3; -- end_perm = true; -- break; -- case E_V32QImode: -- if (!TARGET_AVX2) -- return false; -- c = 0xff; -- s = 8; -- half_mode = V16HImode; -- gen_and = gen_andv16hi3; -- gen_pack = gen_avx2_packuswb; -- gen_shift = gen_lshrv16hi3; -- end_perm = true; -- break; -- default: -- /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than -- general shuffles. */ -- return false; -- } -+static rtx -+x86_this_parameter (tree function) -+{ -+ tree type = TREE_TYPE (function); -+ bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0; -+ int nregs; - -- /* Check that permutation is even or odd. */ -- odd = d->perm[0]; -- if (odd > 1) -- return false; -+ if (TARGET_64BIT) -+ { -+ const int *parm_regs; - -- for (i = 1; i < nelt; ++i) -- if (d->perm[i] != 2 * i + odd) -- return false; -+ if (ix86_function_type_abi (type) == MS_ABI) -+ parm_regs = x86_64_ms_abi_int_parameter_registers; -+ else -+ parm_regs = x86_64_int_parameter_registers; -+ return gen_rtx_REG (Pmode, parm_regs[aggr]); -+ } - -- if (d->testing_p) -- return true; -+ nregs = ix86_function_regparm (type, function); - -- dop0 = gen_reg_rtx (half_mode); -- dop1 = gen_reg_rtx (half_mode); -- if (odd == 0) -- { -- t = gen_const_vec_duplicate (half_mode, GEN_INT (c)); -- t = force_reg (half_mode, t); -- emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0))); -- emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1))); -- } -- else -+ if (nregs > 0 && !stdarg_p (type)) - { -- emit_insn (gen_shift (dop0, -- gen_lowpart (half_mode, d->op0), -- GEN_INT (s))); -- emit_insn (gen_shift (dop1, -- gen_lowpart (half_mode, d->op1), -- GEN_INT (s))); -- } -- /* In AVX2 for 256 bit case we need to permute pack result. */ -- if (TARGET_AVX2 && end_perm) -- { -- op = gen_reg_rtx (d->vmode); -- t = gen_reg_rtx (V4DImode); -- emit_insn (gen_pack (op, dop0, dop1)); -- emit_insn (gen_avx2_permv4di_1 (t, -- gen_lowpart (V4DImode, op), -- const0_rtx, -- const2_rtx, -- const1_rtx, -- GEN_INT (3))); -- emit_move_insn (d->target, gen_lowpart (d->vmode, t)); -+ int regno; -+ unsigned int ccvt = ix86_get_callcvt (type); -+ -+ if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) -+ regno = aggr ? DX_REG : CX_REG; -+ else if ((ccvt & IX86_CALLCVT_THISCALL) != 0) -+ { -+ regno = CX_REG; -+ if (aggr) -+ return gen_rtx_MEM (SImode, -+ plus_constant (Pmode, stack_pointer_rtx, 4)); -+ } -+ else -+ { -+ regno = AX_REG; -+ if (aggr) -+ { -+ regno = DX_REG; -+ if (nregs == 1) -+ return gen_rtx_MEM (SImode, -+ plus_constant (Pmode, -+ stack_pointer_rtx, 4)); -+ } -+ } -+ return gen_rtx_REG (SImode, regno); - } -- else -- emit_insn (gen_pack (d->target, dop0, dop1)); - -- return true; -+ return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx, -+ aggr ? 8 : 4)); - } - --/* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even -- and extract-odd permutations of two V64QI operands -- with two "shifts", two "truncs" and one "concat" insns for "odd" -- and two "truncs" and one concat insn for "even." -- Have already failed all two instruction sequences. */ -+/* Determine whether x86_output_mi_thunk can succeed. */ - - static bool --expand_vec_perm_even_odd_trunc (struct expand_vec_perm_d *d) -+x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset, -+ const_tree function) - { -- rtx t1, t2, t3, t4; -- unsigned i, odd, nelt = d->nelt; -- -- if (!TARGET_AVX512BW -- || d->one_operand_p -- || d->vmode != V64QImode) -- return false; -- -- /* Check that permutation is even or odd. */ -- odd = d->perm[0]; -- if (odd > 1) -- return false; -- -- for (i = 1; i < nelt; ++i) -- if (d->perm[i] != 2 * i + odd) -- return false; -- -- if (d->testing_p) -+ /* 64-bit can handle anything. */ -+ if (TARGET_64BIT) - return true; - -+ /* For 32-bit, everything's fine if we have one free register. */ -+ if (ix86_function_regparm (TREE_TYPE (function), function) < 3) -+ return true; - -- if (odd) -- { -- t1 = gen_reg_rtx (V32HImode); -- t2 = gen_reg_rtx (V32HImode); -- emit_insn (gen_lshrv32hi3 (t1, -- gen_lowpart (V32HImode, d->op0), -- GEN_INT (8))); -- emit_insn (gen_lshrv32hi3 (t2, -- gen_lowpart (V32HImode, d->op1), -- GEN_INT (8))); -- } -- else -- { -- t1 = gen_lowpart (V32HImode, d->op0); -- t2 = gen_lowpart (V32HImode, d->op1); -- } -+ /* Need a free register for vcall_offset. */ -+ if (vcall_offset) -+ return false; - -- t3 = gen_reg_rtx (V32QImode); -- t4 = gen_reg_rtx (V32QImode); -- emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t3, t1)); -- emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t4, t2)); -- emit_insn (gen_avx_vec_concatv64qi (d->target, t3, t4)); -+ /* Need a free register for GOT references. */ -+ if (flag_pic && !targetm.binds_local_p (function)) -+ return false; - -+ /* Otherwise ok. */ - return true; - } - --/* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even -- and extract-odd permutations. */ -+/* Output the assembler code for a thunk function. THUNK_DECL is the -+ declaration for the thunk function itself, FUNCTION is the decl for -+ the target function. DELTA is an immediate constant offset to be -+ added to THIS. If VCALL_OFFSET is nonzero, the word at -+ *(*this + vcall_offset) should be added to THIS. */ - --static bool --expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd) -+static void -+x86_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta, -+ HOST_WIDE_INT vcall_offset, tree function) - { -- rtx t1, t2, t3, t4, t5; -+ const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl)); -+ rtx this_param = x86_this_parameter (function); -+ rtx this_reg, tmp, fnaddr; -+ unsigned int tmp_regno; -+ rtx_insn *insn; - -- switch (d->vmode) -+ if (TARGET_64BIT) -+ tmp_regno = R10_REG; -+ else - { -- case E_V4DFmode: -- if (d->testing_p) -- break; -- t1 = gen_reg_rtx (V4DFmode); -- t2 = gen_reg_rtx (V4DFmode); -- -- /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */ -- emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20))); -- emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31))); -- -- /* Now an unpck[lh]pd will produce the result required. */ -- if (odd) -- t3 = gen_avx_unpckhpd256 (d->target, t1, t2); -+ unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function)); -+ if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) -+ tmp_regno = AX_REG; -+ else if ((ccvt & IX86_CALLCVT_THISCALL) != 0) -+ tmp_regno = DX_REG; - else -- t3 = gen_avx_unpcklpd256 (d->target, t1, t2); -- emit_insn (t3); -- break; -+ tmp_regno = CX_REG; -+ } - -- case E_V8SFmode: -- { -- int mask = odd ? 0xdd : 0x88; -+ emit_note (NOTE_INSN_PROLOGUE_END); - -- if (d->testing_p) -- break; -- t1 = gen_reg_rtx (V8SFmode); -- t2 = gen_reg_rtx (V8SFmode); -- t3 = gen_reg_rtx (V8SFmode); -- -- /* Shuffle within the 128-bit lanes to produce: -- { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */ -- emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1, -- GEN_INT (mask))); -- -- /* Shuffle the lanes around to produce: -- { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */ -- emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1, -- GEN_INT (0x3))); -- -- /* Shuffle within the 128-bit lanes to produce: -- { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */ -- emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44))); -- -- /* Shuffle within the 128-bit lanes to produce: -- { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */ -- emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee))); -- -- /* Shuffle the lanes around to produce: -- { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */ -- emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2, -- GEN_INT (0x20))); -- } -- break; -+ /* CET is enabled, insert EB instruction. */ -+ if ((flag_cf_protection & CF_BRANCH)) -+ emit_insn (gen_nop_endbr ()); - -- case E_V2DFmode: -- case E_V4SFmode: -- case E_V2DImode: -- case E_V4SImode: -- /* These are always directly implementable by expand_vec_perm_1. */ -- gcc_unreachable (); -+ /* If VCALL_OFFSET, we'll need THIS in a register. Might as well -+ pull it in now and let DELTA benefit. */ -+ if (REG_P (this_param)) -+ this_reg = this_param; -+ else if (vcall_offset) -+ { -+ /* Put the this parameter into %eax. */ -+ this_reg = gen_rtx_REG (Pmode, AX_REG); -+ emit_move_insn (this_reg, this_param); -+ } -+ else -+ this_reg = NULL_RTX; - -- case E_V8HImode: -- if (TARGET_SSE4_1) -- return expand_vec_perm_even_odd_pack (d); -- else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB) -- return expand_vec_perm_pshufb2 (d); -- else -+ /* Adjust the this parameter by a fixed constant. */ -+ if (delta) -+ { -+ rtx delta_rtx = GEN_INT (delta); -+ rtx delta_dst = this_reg ? this_reg : this_param; -+ -+ if (TARGET_64BIT) - { -- if (d->testing_p) -- break; -- /* We need 2*log2(N)-1 operations to achieve odd/even -- with interleave. */ -- t1 = gen_reg_rtx (V8HImode); -- t2 = gen_reg_rtx (V8HImode); -- emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1)); -- emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1)); -- emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1)); -- emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1)); -- if (odd) -- t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2); -- else -- t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2); -- emit_insn (t3); -+ if (!x86_64_general_operand (delta_rtx, Pmode)) -+ { -+ tmp = gen_rtx_REG (Pmode, tmp_regno); -+ emit_move_insn (tmp, delta_rtx); -+ delta_rtx = tmp; -+ } - } -- break; - -- case E_V16QImode: -- return expand_vec_perm_even_odd_pack (d); -+ ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx); -+ } -+ -+ /* Adjust the this parameter by a value stored in the vtable. */ -+ if (vcall_offset) -+ { -+ rtx vcall_addr, vcall_mem, this_mem; - -- case E_V16HImode: -- case E_V32QImode: -- return expand_vec_perm_even_odd_pack (d); -+ tmp = gen_rtx_REG (Pmode, tmp_regno); - -- case E_V64QImode: -- return expand_vec_perm_even_odd_trunc (d); -+ this_mem = gen_rtx_MEM (ptr_mode, this_reg); -+ if (Pmode != ptr_mode) -+ this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem); -+ emit_move_insn (tmp, this_mem); - -- case E_V4DImode: -- if (!TARGET_AVX2) -+ /* Adjust the this parameter. */ -+ vcall_addr = plus_constant (Pmode, tmp, vcall_offset); -+ if (TARGET_64BIT -+ && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true)) - { -- struct expand_vec_perm_d d_copy = *d; -- d_copy.vmode = V4DFmode; -- if (d->testing_p) -- d_copy.target = gen_raw_REG (V4DFmode, LAST_VIRTUAL_REGISTER + 1); -- else -- d_copy.target = gen_reg_rtx (V4DFmode); -- d_copy.op0 = gen_lowpart (V4DFmode, d->op0); -- d_copy.op1 = gen_lowpart (V4DFmode, d->op1); -- if (expand_vec_perm_even_odd_1 (&d_copy, odd)) -- { -- if (!d->testing_p) -- emit_move_insn (d->target, -- gen_lowpart (V4DImode, d_copy.target)); -- return true; -- } -- return false; -+ rtx tmp2 = gen_rtx_REG (Pmode, R11_REG); -+ emit_move_insn (tmp2, GEN_INT (vcall_offset)); -+ vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2); - } - -- if (d->testing_p) -- break; -- -- t1 = gen_reg_rtx (V4DImode); -- t2 = gen_reg_rtx (V4DImode); -+ vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr); -+ if (Pmode != ptr_mode) -+ emit_insn (gen_addsi_1_zext (this_reg, -+ gen_rtx_REG (ptr_mode, -+ REGNO (this_reg)), -+ vcall_mem)); -+ else -+ ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem); -+ } - -- /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */ -- emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20))); -- emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31))); -+ /* If necessary, drop THIS back to its stack slot. */ -+ if (this_reg && this_reg != this_param) -+ emit_move_insn (this_param, this_reg); - -- /* Now an vpunpck[lh]qdq will produce the result required. */ -- if (odd) -- t3 = gen_avx2_interleave_highv4di (d->target, t1, t2); -+ fnaddr = XEXP (DECL_RTL (function), 0); -+ if (TARGET_64BIT) -+ { -+ if (!flag_pic || targetm.binds_local_p (function) -+ || TARGET_PECOFF) -+ ; - else -- t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2); -- emit_insn (t3); -- break; -- -- case E_V8SImode: -- if (!TARGET_AVX2) - { -- struct expand_vec_perm_d d_copy = *d; -- d_copy.vmode = V8SFmode; -- if (d->testing_p) -- d_copy.target = gen_raw_REG (V8SFmode, LAST_VIRTUAL_REGISTER + 1); -- else -- d_copy.target = gen_reg_rtx (V8SFmode); -- d_copy.op0 = gen_lowpart (V8SFmode, d->op0); -- d_copy.op1 = gen_lowpart (V8SFmode, d->op1); -- if (expand_vec_perm_even_odd_1 (&d_copy, odd)) -- { -- if (!d->testing_p) -- emit_move_insn (d->target, -- gen_lowpart (V8SImode, d_copy.target)); -- return true; -- } -- return false; -+ tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL); -+ tmp = gen_rtx_CONST (Pmode, tmp); -+ fnaddr = gen_const_mem (Pmode, tmp); -+ } -+ } -+ else -+ { -+ if (!flag_pic || targetm.binds_local_p (function)) -+ ; -+#if TARGET_MACHO -+ else if (TARGET_MACHO) -+ { -+ fnaddr = machopic_indirect_call_target (DECL_RTL (function)); -+ fnaddr = XEXP (fnaddr, 0); - } -+#endif /* TARGET_MACHO */ -+ else -+ { -+ tmp = gen_rtx_REG (Pmode, CX_REG); -+ output_set_got (tmp, NULL_RTX); - -- if (d->testing_p) -- break; -+ fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT); -+ fnaddr = gen_rtx_CONST (Pmode, fnaddr); -+ fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr); -+ fnaddr = gen_const_mem (Pmode, fnaddr); -+ } -+ } - -- t1 = gen_reg_rtx (V8SImode); -- t2 = gen_reg_rtx (V8SImode); -- t3 = gen_reg_rtx (V4DImode); -- t4 = gen_reg_rtx (V4DImode); -- t5 = gen_reg_rtx (V4DImode); -- -- /* Shuffle the lanes around into -- { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */ -- emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0), -- gen_lowpart (V4DImode, d->op1), -- GEN_INT (0x20))); -- emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0), -- gen_lowpart (V4DImode, d->op1), -- GEN_INT (0x31))); -- -- /* Swap the 2nd and 3rd position in each lane into -- { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */ -- emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3), -- GEN_INT (2 * 4 + 1 * 16 + 3 * 64))); -- emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4), -- GEN_INT (2 * 4 + 1 * 16 + 3 * 64))); -- -- /* Now an vpunpck[lh]qdq will produce -- { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */ -- if (odd) -- t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1), -- gen_lowpart (V4DImode, t2)); -+ /* Our sibling call patterns do not allow memories, because we have no -+ predicate that can distinguish between frame and non-frame memory. -+ For our purposes here, we can get away with (ab)using a jump pattern, -+ because we're going to do no optimization. */ -+ if (MEM_P (fnaddr)) -+ { -+ if (sibcall_insn_operand (fnaddr, word_mode)) -+ { -+ fnaddr = XEXP (DECL_RTL (function), 0); -+ tmp = gen_rtx_MEM (QImode, fnaddr); -+ tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx); -+ tmp = emit_call_insn (tmp); -+ SIBLING_CALL_P (tmp) = 1; -+ } - else -- t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1), -- gen_lowpart (V4DImode, t2)); -- emit_insn (t3); -- emit_move_insn (d->target, gen_lowpart (V8SImode, t5)); -- break; -+ emit_jump_insn (gen_indirect_jump (fnaddr)); -+ } -+ else -+ { -+ if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr)) -+ { -+ // CM_LARGE_PIC always uses pseudo PIC register which is -+ // uninitialized. Since FUNCTION is local and calling it -+ // doesn't go through PLT, we use scratch register %r11 as -+ // PIC register and initialize it here. -+ pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG); -+ ix86_init_large_pic_reg (tmp_regno); -+ fnaddr = legitimize_pic_address (fnaddr, -+ gen_rtx_REG (Pmode, tmp_regno)); -+ } - -- default: -- gcc_unreachable (); -+ if (!sibcall_insn_operand (fnaddr, word_mode)) -+ { -+ tmp = gen_rtx_REG (word_mode, tmp_regno); -+ if (GET_MODE (fnaddr) != word_mode) -+ fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr); -+ emit_move_insn (tmp, fnaddr); -+ fnaddr = tmp; -+ } -+ -+ tmp = gen_rtx_MEM (QImode, fnaddr); -+ tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx); -+ tmp = emit_call_insn (tmp); -+ SIBLING_CALL_P (tmp) = 1; - } -+ emit_barrier (); - -- return true; -+ /* Emit just enough of rest_of_compilation to get the insns emitted. -+ Note that use_thunk calls assemble_start_function et al. */ -+ insn = get_insns (); -+ shorten_branches (insn); -+ assemble_start_function (thunk_fndecl, fnname); -+ final_start_function (insn, file, 1); -+ final (insn, file, 1); -+ final_end_function (); -+ assemble_end_function (thunk_fndecl, fnname); - } - --/* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match -- extract-even and extract-odd permutations. */ -+static void -+x86_file_start (void) -+{ -+ default_file_start (); -+ if (TARGET_16BIT) -+ fputs ("\t.code16gcc\n", asm_out_file); -+#if TARGET_MACHO -+ darwin_file_start (); -+#endif -+ if (X86_FILE_START_VERSION_DIRECTIVE) -+ fputs ("\t.version\t\"01.01\"\n", asm_out_file); -+ if (X86_FILE_START_FLTUSED) -+ fputs ("\t.global\t__fltused\n", asm_out_file); -+ if (ix86_asm_dialect == ASM_INTEL) -+ fputs ("\t.intel_syntax noprefix\n", asm_out_file); -+} - --static bool --expand_vec_perm_even_odd (struct expand_vec_perm_d *d) -+int -+x86_field_alignment (tree type, int computed) - { -- unsigned i, odd, nelt = d->nelt; -+ machine_mode mode; - -- odd = d->perm[0]; -- if (odd != 0 && odd != 1) -- return false; -+ if (TARGET_64BIT || TARGET_ALIGN_DOUBLE) -+ return computed; -+ if (TARGET_IAMCU) -+ return iamcu_alignment (type, computed); -+ mode = TYPE_MODE (strip_array_types (type)); -+ if (mode == DFmode || mode == DCmode -+ || GET_MODE_CLASS (mode) == MODE_INT -+ || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT) -+ return MIN (32, computed); -+ return computed; -+} - -- for (i = 1; i < nelt; ++i) -- if (d->perm[i] != 2 * i + odd) -- return false; -+/* Print call to TARGET to FILE. */ - -- return expand_vec_perm_even_odd_1 (d, odd); -+static void -+x86_print_call_or_nop (FILE *file, const char *target) -+{ -+ if (flag_nop_mcount || !strcmp (target, "nop")) -+ /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */ -+ fprintf (file, "1:" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n"); -+ else -+ fprintf (file, "1:\tcall\t%s\n", target); - } - --/* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast -- permutations. We assume that expand_vec_perm_1 has already failed. */ -- - static bool --expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d) -+current_fentry_name (const char **name) - { -- unsigned elt = d->perm[0], nelt2 = d->nelt / 2; -- machine_mode vmode = d->vmode; -- unsigned char perm2[4]; -- rtx op0 = d->op0, dest; -- bool ok; -- -- switch (vmode) -- { -- case E_V4DFmode: -- case E_V8SFmode: -- /* These are special-cased in sse.md so that we can optionally -- use the vbroadcast instruction. They expand to two insns -- if the input happens to be in a register. */ -- gcc_unreachable (); -- -- case E_V2DFmode: -- case E_V2DImode: -- case E_V4SFmode: -- case E_V4SImode: -- /* These are always implementable using standard shuffle patterns. */ -- gcc_unreachable (); -+ tree attr = lookup_attribute ("fentry_name", -+ DECL_ATTRIBUTES (current_function_decl)); -+ if (!attr) -+ return false; -+ *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr))); -+ return true; -+} - -- case E_V8HImode: -- case E_V16QImode: -- /* These can be implemented via interleave. We save one insn by -- stopping once we have promoted to V4SImode and then use pshufd. */ -- if (d->testing_p) -- return true; -- do -- { -- rtx dest; -- rtx (*gen) (rtx, rtx, rtx) -- = vmode == V16QImode ? gen_vec_interleave_lowv16qi -- : gen_vec_interleave_lowv8hi; -+static bool -+current_fentry_section (const char **name) -+{ -+ tree attr = lookup_attribute ("fentry_section", -+ DECL_ATTRIBUTES (current_function_decl)); -+ if (!attr) -+ return false; -+ *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr))); -+ return true; -+} - -- if (elt >= nelt2) -- { -- gen = vmode == V16QImode ? gen_vec_interleave_highv16qi -- : gen_vec_interleave_highv8hi; -- elt -= nelt2; -- } -- nelt2 /= 2; -+/* Output assembler code to FILE to increment profiler label # LABELNO -+ for profiling a function entry. */ -+void -+x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) -+{ -+ if (cfun->machine->endbr_queued_at_entrance) -+ fprintf (file, "\t%s\n", TARGET_64BIT ? "endbr64" : "endbr32"); - -- dest = gen_reg_rtx (vmode); -- emit_insn (gen (dest, op0, op0)); -- vmode = get_mode_wider_vector (vmode); -- op0 = gen_lowpart (vmode, dest); -- } -- while (vmode != V4SImode); -+ const char *mcount_name = MCOUNT_NAME; - -- memset (perm2, elt, 4); -- dest = gen_reg_rtx (V4SImode); -- ok = expand_vselect (dest, op0, perm2, 4, d->testing_p); -- gcc_assert (ok); -- if (!d->testing_p) -- emit_move_insn (d->target, gen_lowpart (d->vmode, dest)); -- return true; -+ if (current_fentry_name (&mcount_name)) -+ ; -+ else if (fentry_name) -+ mcount_name = fentry_name; -+ else if (flag_fentry) -+ mcount_name = MCOUNT_NAME_BEFORE_PROLOGUE; - -- case E_V64QImode: -- case E_V32QImode: -- case E_V16HImode: -- case E_V8SImode: -- case E_V4DImode: -- /* For AVX2 broadcasts of the first element vpbroadcast* or -- vpermq should be used by expand_vec_perm_1. */ -- gcc_assert (!TARGET_AVX2 || d->perm[0]); -- return false; -+ if (TARGET_64BIT) -+ { -+#ifndef NO_PROFILE_COUNTERS -+ fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno); -+#endif - -- default: -- gcc_unreachable (); -+ if (!TARGET_PECOFF && flag_pic) -+ fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name); -+ else -+ x86_print_call_or_nop (file, mcount_name); -+ } -+ else if (flag_pic) -+ { -+#ifndef NO_PROFILE_COUNTERS -+ fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n", -+ LPREFIX, labelno); -+#endif -+ fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name); -+ } -+ else -+ { -+#ifndef NO_PROFILE_COUNTERS -+ fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n", -+ LPREFIX, labelno); -+#endif -+ x86_print_call_or_nop (file, mcount_name); - } --} -- --/* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match -- broadcast permutations. */ - --static bool --expand_vec_perm_broadcast (struct expand_vec_perm_d *d) --{ -- unsigned i, elt, nelt = d->nelt; -- -- if (!d->one_operand_p) -- return false; -+ if (flag_record_mcount -+ || lookup_attribute ("fentry_section", -+ DECL_ATTRIBUTES (current_function_decl))) -+ { -+ const char *sname = "__mcount_loc"; - -- elt = d->perm[0]; -- for (i = 1; i < nelt; ++i) -- if (d->perm[i] != elt) -- return false; -+ if (current_fentry_section (&sname)) -+ ; -+ else if (fentry_section) -+ sname = fentry_section; - -- return expand_vec_perm_broadcast_1 (d); -+ fprintf (file, "\t.section %s, \"a\",@progbits\n", sname); -+ fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long"); -+ fprintf (file, "\t.previous\n"); -+ } - } - --/* Implement arbitrary permutations of two V64QImode operands -- with 2 vperm[it]2w, 2 vpshufb and one vpor instruction. */ --static bool --expand_vec_perm_vpermt2_vpshub2 (struct expand_vec_perm_d *d) -+/* We don't have exact information about the insn sizes, but we may assume -+ quite safely that we are informed about all 1 byte insns and memory -+ address sizes. This is enough to eliminate unnecessary padding in -+ 99% of cases. */ -+ -+int -+ix86_min_insn_size (rtx_insn *insn) - { -- if (!TARGET_AVX512BW || !(d->vmode == V64QImode)) -- return false; -+ int l = 0, len; - -- if (d->testing_p) -- return true; -+ if (!INSN_P (insn) || !active_insn_p (insn)) -+ return 0; - -- struct expand_vec_perm_d ds[2]; -- rtx rperm[128], vperm, target0, target1; -- unsigned int i, nelt; -- machine_mode vmode; -+ /* Discard alignments we've emit and jump instructions. */ -+ if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE -+ && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN) -+ return 0; - -- nelt = d->nelt; -- vmode = V64QImode; -+ /* Important case - calls are always 5 bytes. -+ It is common to have many calls in the row. */ -+ if (CALL_P (insn) -+ && symbolic_reference_mentioned_p (PATTERN (insn)) -+ && !SIBLING_CALL_P (insn)) -+ return 5; -+ len = get_attr_length (insn); -+ if (len <= 1) -+ return 1; - -- for (i = 0; i < 2; i++) -+ /* For normal instructions we rely on get_attr_length being exact, -+ with a few exceptions. */ -+ if (!JUMP_P (insn)) - { -- ds[i] = *d; -- ds[i].vmode = V32HImode; -- ds[i].nelt = 32; -- ds[i].target = gen_reg_rtx (V32HImode); -- ds[i].op0 = gen_lowpart (V32HImode, d->op0); -- ds[i].op1 = gen_lowpart (V32HImode, d->op1); -- } -- -- /* Prepare permutations such that the first one takes care of -- putting the even bytes into the right positions or one higher -- positions (ds[0]) and the second one takes care of -- putting the odd bytes into the right positions or one below -- (ds[1]). */ -+ enum attr_type type = get_attr_type (insn); - -- for (i = 0; i < nelt; i++) -- { -- ds[i & 1].perm[i / 2] = d->perm[i] / 2; -- if (i & 1) -- { -- rperm[i] = constm1_rtx; -- rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1)); -- } -- else -+ switch (type) - { -- rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1)); -- rperm[i + 64] = constm1_rtx; -+ case TYPE_MULTI: -+ if (GET_CODE (PATTERN (insn)) == ASM_INPUT -+ || asm_noperands (PATTERN (insn)) >= 0) -+ return 0; -+ break; -+ case TYPE_OTHER: -+ case TYPE_FCMP: -+ break; -+ default: -+ /* Otherwise trust get_attr_length. */ -+ return len; - } -- } -- -- bool ok = expand_vec_perm_1 (&ds[0]); -- gcc_assert (ok); -- ds[0].target = gen_lowpart (V64QImode, ds[0].target); -- -- ok = expand_vec_perm_1 (&ds[1]); -- gcc_assert (ok); -- ds[1].target = gen_lowpart (V64QImode, ds[1].target); -- -- vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm)); -- vperm = force_reg (vmode, vperm); -- target0 = gen_reg_rtx (V64QImode); -- emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm)); - -- vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64)); -- vperm = force_reg (vmode, vperm); -- target1 = gen_reg_rtx (V64QImode); -- emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm)); -- -- emit_insn (gen_iorv64qi3 (d->target, target0, target1)); -- return true; -+ l = get_attr_length_address (insn); -+ if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn))) -+ l = 4; -+ } -+ if (l) -+ return 1+l; -+ else -+ return 2; - } - --/* Implement arbitrary permutation of two V32QImode and V16QImode operands -- with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed -- all the shorter instruction sequences. */ -+#ifdef ASM_OUTPUT_MAX_SKIP_PAD - --static bool --expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d) -+/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte -+ window. */ -+ -+static void -+ix86_avoid_jump_mispredicts (void) - { -- rtx rperm[4][32], vperm, l[2], h[2], op, m128; -- unsigned int i, nelt, eltsz; -- bool used[4]; -+ rtx_insn *insn, *start = get_insns (); -+ int nbytes = 0, njumps = 0; -+ bool isjump = false; - -- if (!TARGET_AVX2 -- || d->one_operand_p -- || (d->vmode != V32QImode && d->vmode != V16HImode)) -- return false; -+ /* Look for all minimal intervals of instructions containing 4 jumps. -+ The intervals are bounded by START and INSN. NBYTES is the total -+ size of instructions in the interval including INSN and not including -+ START. When the NBYTES is smaller than 16 bytes, it is possible -+ that the end of START and INSN ends up in the same 16byte page. - -- if (d->testing_p) -- return true; -+ The smallest offset in the page INSN can start is the case where START -+ ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN). -+ We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN). - -- nelt = d->nelt; -- eltsz = GET_MODE_UNIT_SIZE (d->vmode); -- -- /* Generate 4 permutation masks. If the required element is within -- the same lane, it is shuffled in. If the required element from the -- other lane, force a zero by setting bit 7 in the permutation mask. -- In the other mask the mask has non-negative elements if element -- is requested from the other lane, but also moved to the other lane, -- so that the result of vpshufb can have the two V2TImode halves -- swapped. */ -- m128 = GEN_INT (-128); -- for (i = 0; i < 32; ++i) -- { -- rperm[0][i] = m128; -- rperm[1][i] = m128; -- rperm[2][i] = m128; -- rperm[3][i] = m128; -- } -- used[0] = false; -- used[1] = false; -- used[2] = false; -- used[3] = false; -- for (i = 0; i < nelt; ++i) -+ Don't consider asm goto as jump, while it can contain a jump, it doesn't -+ have to, control transfer to label(s) can be performed through other -+ means, and also we estimate minimum length of all asm stmts as 0. */ -+ for (insn = start; insn; insn = NEXT_INSN (insn)) - { -- unsigned j, e = d->perm[i] & (nelt / 2 - 1); -- unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz; -- unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0); -- -- for (j = 0; j < eltsz; ++j) -- rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j); -- used[which] = true; -- } -+ int min_size; - -- for (i = 0; i < 2; ++i) -- { -- if (!used[2 * i + 1]) -+ if (LABEL_P (insn)) - { -- h[i] = NULL_RTX; -+ align_flags alignment = label_to_alignment (insn); -+ int align = alignment.levels[0].log; -+ int max_skip = alignment.levels[0].maxskip; -+ -+ if (max_skip > 15) -+ max_skip = 15; -+ /* If align > 3, only up to 16 - max_skip - 1 bytes can be -+ already in the current 16 byte page, because otherwise -+ ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer -+ bytes to reach 16 byte boundary. */ -+ if (align <= 0 -+ || (align <= 3 && max_skip != (1 << align) - 1)) -+ max_skip = 0; -+ if (dump_file) -+ fprintf (dump_file, "Label %i with max_skip %i\n", -+ INSN_UID (insn), max_skip); -+ if (max_skip) -+ { -+ while (nbytes + max_skip >= 16) -+ { -+ start = NEXT_INSN (start); -+ if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0) -+ || CALL_P (start)) -+ njumps--, isjump = true; -+ else -+ isjump = false; -+ nbytes -= ix86_min_insn_size (start); -+ } -+ } - continue; - } -- vperm = gen_rtx_CONST_VECTOR (V32QImode, -- gen_rtvec_v (32, rperm[2 * i + 1])); -- vperm = force_reg (V32QImode, vperm); -- h[i] = gen_reg_rtx (V32QImode); -- op = gen_lowpart (V32QImode, i ? d->op1 : d->op0); -- emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm)); -- } - -- /* Swap the 128-byte lanes of h[X]. */ -- for (i = 0; i < 2; ++i) -- { -- if (h[i] == NULL_RTX) -- continue; -- op = gen_reg_rtx (V4DImode); -- emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]), -- const2_rtx, GEN_INT (3), const0_rtx, -- const1_rtx)); -- h[i] = gen_lowpart (V32QImode, op); -- } -+ min_size = ix86_min_insn_size (insn); -+ nbytes += min_size; -+ if (dump_file) -+ fprintf (dump_file, "Insn %i estimated to %i bytes\n", -+ INSN_UID (insn), min_size); -+ if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0) -+ || CALL_P (insn)) -+ njumps++; -+ else -+ continue; - -- for (i = 0; i < 2; ++i) -- { -- if (!used[2 * i]) -+ while (njumps > 3) - { -- l[i] = NULL_RTX; -- continue; -+ start = NEXT_INSN (start); -+ if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0) -+ || CALL_P (start)) -+ njumps--, isjump = true; -+ else -+ isjump = false; -+ nbytes -= ix86_min_insn_size (start); - } -- vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i])); -- vperm = force_reg (V32QImode, vperm); -- l[i] = gen_reg_rtx (V32QImode); -- op = gen_lowpart (V32QImode, i ? d->op1 : d->op0); -- emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm)); -- } -+ gcc_assert (njumps >= 0); -+ if (dump_file) -+ fprintf (dump_file, "Interval %i to %i has %i bytes\n", -+ INSN_UID (start), INSN_UID (insn), nbytes); - -- for (i = 0; i < 2; ++i) -- { -- if (h[i] && l[i]) -+ if (njumps == 3 && isjump && nbytes < 16) - { -- op = gen_reg_rtx (V32QImode); -- emit_insn (gen_iorv32qi3 (op, l[i], h[i])); -- l[i] = op; -+ int padsize = 15 - nbytes + ix86_min_insn_size (insn); -+ -+ if (dump_file) -+ fprintf (dump_file, "Padding insn %i by %i bytes!\n", -+ INSN_UID (insn), padsize); -+ emit_insn_before (gen_pad (GEN_INT (padsize)), insn); - } -- else if (h[i]) -- l[i] = h[i]; - } -- -- gcc_assert (l[0] && l[1]); -- op = d->target; -- if (d->vmode != V32QImode) -- op = gen_reg_rtx (V32QImode); -- emit_insn (gen_iorv32qi3 (op, l[0], l[1])); -- if (op != d->target) -- emit_move_insn (d->target, gen_lowpart (d->vmode, op)); -- return true; - } -+#endif - --/* The guts of ix86_vectorize_vec_perm_const. With all of the interface bits -- taken care of, perform the expansion in D and return true on success. */ -- --static bool --ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) -+/* AMD Athlon works faster -+ when RET is not destination of conditional jump or directly preceded -+ by other jump instruction. We avoid the penalty by inserting NOP just -+ before the RET instructions in such cases. */ -+static void -+ix86_pad_returns (void) - { -- /* Try a single instruction expansion. */ -- if (expand_vec_perm_1 (d)) -- return true; -- -- /* Try sequences of two instructions. */ -- -- if (expand_vec_perm_pshuflw_pshufhw (d)) -- return true; -- -- if (expand_vec_perm_palignr (d, false)) -- return true; -- -- if (expand_vec_perm_interleave2 (d)) -- return true; -- -- if (expand_vec_perm_broadcast (d)) -- return true; -- -- if (expand_vec_perm_vpermq_perm_1 (d)) -- return true; -- -- if (expand_vec_perm_vperm2f128 (d)) -- return true; -- -- if (expand_vec_perm_pblendv (d)) -- return true; -- -- /* Try sequences of three instructions. */ -- -- if (expand_vec_perm_even_odd_pack (d)) -- return true; -- -- if (expand_vec_perm_2vperm2f128_vshuf (d)) -- return true; -- -- if (expand_vec_perm_pshufb2 (d)) -- return true; -- -- if (expand_vec_perm_interleave3 (d)) -- return true; -- -- if (expand_vec_perm_vperm2f128_vblend (d)) -- return true; -- -- /* Try sequences of four instructions. */ -- -- if (expand_vec_perm_even_odd_trunc (d)) -- return true; -- if (expand_vec_perm_vpshufb2_vpermq (d)) -- return true; -- -- if (expand_vec_perm_vpshufb2_vpermq_even_odd (d)) -- return true; -- -- if (expand_vec_perm_vpermt2_vpshub2 (d)) -- return true; -+ edge e; -+ edge_iterator ei; - -- /* ??? Look for narrow permutations whose element orderings would -- allow the promotion to a wider mode. */ -+ FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) -+ { -+ basic_block bb = e->src; -+ rtx_insn *ret = BB_END (bb); -+ rtx_insn *prev; -+ bool replace = false; - -- /* ??? Look for sequences of interleave or a wider permute that place -- the data into the correct lanes for a half-vector shuffle like -- pshuf[lh]w or vpermilps. */ -+ if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret)) -+ || optimize_bb_for_size_p (bb)) -+ continue; -+ for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev)) -+ if (active_insn_p (prev) || LABEL_P (prev)) -+ break; -+ if (prev && LABEL_P (prev)) -+ { -+ edge e; -+ edge_iterator ei; - -- /* ??? Look for sequences of interleave that produce the desired results. -- The combinatorics of punpck[lh] get pretty ugly... */ -+ FOR_EACH_EDGE (e, ei, bb->preds) -+ if (EDGE_FREQUENCY (e) && e->src->index >= 0 -+ && !(e->flags & EDGE_FALLTHRU)) -+ { -+ replace = true; -+ break; -+ } -+ } -+ if (!replace) -+ { -+ prev = prev_active_insn (ret); -+ if (prev -+ && ((JUMP_P (prev) && any_condjump_p (prev)) -+ || CALL_P (prev))) -+ replace = true; -+ /* Empty functions get branch mispredict even when -+ the jump destination is not visible to us. */ -+ if (!prev && !optimize_function_for_size_p (cfun)) -+ replace = true; -+ } -+ if (replace) -+ { -+ emit_jump_insn_before (gen_simple_return_internal_long (), ret); -+ delete_insn (ret); -+ } -+ } -+} - -- if (expand_vec_perm_even_odd (d)) -- return true; -+/* Count the minimum number of instructions in BB. Return 4 if the -+ number of instructions >= 4. */ - -- /* Even longer sequences. */ -- if (expand_vec_perm_vpshufb4_vpermq2 (d)) -- return true; -+static int -+ix86_count_insn_bb (basic_block bb) -+{ -+ rtx_insn *insn; -+ int insn_count = 0; - -- /* See if we can get the same permutation in different vector integer -- mode. */ -- struct expand_vec_perm_d nd; -- if (canonicalize_vector_int_perm (d, &nd) && expand_vec_perm_1 (&nd)) -+ /* Count number of instructions in this block. Return 4 if the number -+ of instructions >= 4. */ -+ FOR_BB_INSNS (bb, insn) - { -- if (!d->testing_p) -- emit_move_insn (d->target, gen_lowpart (d->vmode, nd.target)); -- return true; -+ /* Only happen in exit blocks. */ -+ if (JUMP_P (insn) -+ && ANY_RETURN_P (PATTERN (insn))) -+ break; -+ -+ if (NONDEBUG_INSN_P (insn) -+ && GET_CODE (PATTERN (insn)) != USE -+ && GET_CODE (PATTERN (insn)) != CLOBBER) -+ { -+ insn_count++; -+ if (insn_count >= 4) -+ return insn_count; -+ } - } - -- return false; -+ return insn_count; - } - --/* If a permutation only uses one operand, make it clear. Returns true -- if the permutation references both operands. */ - --static bool --canonicalize_perm (struct expand_vec_perm_d *d) --{ -- int i, which, nelt = d->nelt; -+/* Count the minimum number of instructions in code path in BB. -+ Return 4 if the number of instructions >= 4. */ - -- for (i = which = 0; i < nelt; ++i) -- which |= (d->perm[i] < nelt ? 1 : 2); -+static int -+ix86_count_insn (basic_block bb) -+{ -+ edge e; -+ edge_iterator ei; -+ int min_prev_count; - -- d->one_operand_p = true; -- switch (which) -+ /* Only bother counting instructions along paths with no -+ more than 2 basic blocks between entry and exit. Given -+ that BB has an edge to exit, determine if a predecessor -+ of BB has an edge from entry. If so, compute the number -+ of instructions in the predecessor block. If there -+ happen to be multiple such blocks, compute the minimum. */ -+ min_prev_count = 4; -+ FOR_EACH_EDGE (e, ei, bb->preds) - { -- default: -- gcc_unreachable(); -+ edge prev_e; -+ edge_iterator prev_ei; - -- case 3: -- if (!rtx_equal_p (d->op0, d->op1)) -- { -- d->one_operand_p = false; -+ if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)) -+ { -+ min_prev_count = 0; - break; -- } -- /* The elements of PERM do not suggest that only the first operand -- is used, but both operands are identical. Allow easier matching -- of the permutation by folding the permutation into the single -- input vector. */ -- /* FALLTHRU */ -- -- case 2: -- for (i = 0; i < nelt; ++i) -- d->perm[i] &= nelt - 1; -- d->op0 = d->op1; -- break; -- -- case 1: -- d->op1 = d->op0; -- break; -+ } -+ FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds) -+ { -+ if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)) -+ { -+ int count = ix86_count_insn_bb (e->src); -+ if (count < min_prev_count) -+ min_prev_count = count; -+ break; -+ } -+ } - } - -- return (which == 3); -+ if (min_prev_count < 4) -+ min_prev_count += ix86_count_insn_bb (bb); -+ -+ return min_prev_count; - } - --/* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */ -+/* Pad short function to 4 instructions. */ - --static bool --ix86_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, -- rtx op1, const vec_perm_indices &sel) -+static void -+ix86_pad_short_function (void) - { -- struct expand_vec_perm_d d; -- unsigned char perm[MAX_VECT_LEN]; -- unsigned int i, nelt, which; -- bool two_args; -+ edge e; -+ edge_iterator ei; - -- d.target = target; -- d.op0 = op0; -- d.op1 = op1; -+ FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) -+ { -+ rtx_insn *ret = BB_END (e->src); -+ if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret))) -+ { -+ int insn_count = ix86_count_insn (e->src); - -- d.vmode = vmode; -- gcc_assert (VECTOR_MODE_P (d.vmode)); -- d.nelt = nelt = GET_MODE_NUNITS (d.vmode); -- d.testing_p = !target; -+ /* Pad short function. */ -+ if (insn_count < 4) -+ { -+ rtx_insn *insn = ret; - -- gcc_assert (sel.length () == nelt); -- gcc_checking_assert (sizeof (d.perm) == sizeof (perm)); -+ /* Find epilogue. */ -+ while (insn -+ && (!NOTE_P (insn) -+ || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG)) -+ insn = PREV_INSN (insn); - -- /* Given sufficient ISA support we can just return true here -- for selected vector modes. */ -- switch (d.vmode) -- { -- case E_V16SFmode: -- case E_V16SImode: -- case E_V8DImode: -- case E_V8DFmode: -- if (!TARGET_AVX512F) -- return false; -- /* All implementable with a single vperm[it]2 insn. */ -- if (d.testing_p) -- return true; -- break; -- case E_V32HImode: -- if (!TARGET_AVX512BW) -- return false; -- if (d.testing_p) -- /* All implementable with a single vperm[it]2 insn. */ -- return true; -- break; -- case E_V64QImode: -- if (!TARGET_AVX512BW) -- return false; -- if (d.testing_p) -- /* Implementable with 2 vperm[it]2, 2 vpshufb and 1 or insn. */ -- return true; -- break; -- case E_V8SImode: -- case E_V8SFmode: -- case E_V4DFmode: -- case E_V4DImode: -- if (!TARGET_AVX) -- return false; -- if (d.testing_p && TARGET_AVX512VL) -- /* All implementable with a single vperm[it]2 insn. */ -- return true; -- break; -- case E_V16HImode: -- if (!TARGET_SSE2) -- return false; -- if (d.testing_p && TARGET_AVX2) -- /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */ -- return true; -- break; -- case E_V32QImode: -- if (!TARGET_SSE2) -- return false; -- if (d.testing_p && TARGET_AVX2) -- /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */ -- return true; -- break; -- case E_V8HImode: -- case E_V16QImode: -- if (!TARGET_SSE2) -- return false; -- /* Fall through. */ -- case E_V4SImode: -- case E_V4SFmode: -- if (!TARGET_SSE) -- return false; -- /* All implementable with a single vpperm insn. */ -- if (d.testing_p && TARGET_XOP) -- return true; -- /* All implementable with 2 pshufb + 1 ior. */ -- if (d.testing_p && TARGET_SSSE3) -- return true; -- break; -- case E_V2DImode: -- case E_V2DFmode: -- if (!TARGET_SSE) -- return false; -- /* All implementable with shufpd or unpck[lh]pd. */ -- if (d.testing_p) -- return true; -- break; -- default: -- return false; -- } -+ if (!insn) -+ insn = ret; - -- for (i = which = 0; i < nelt; ++i) -- { -- unsigned char e = sel[i]; -- gcc_assert (e < 2 * nelt); -- d.perm[i] = e; -- perm[i] = e; -- which |= (e < nelt ? 1 : 2); -+ /* Two NOPs count as one instruction. */ -+ insn_count = 2 * (4 - insn_count); -+ emit_insn_before (gen_nops (GEN_INT (insn_count)), insn); -+ } -+ } - } -+} - -- if (d.testing_p) -- { -- /* For all elements from second vector, fold the elements to first. */ -- if (which == 2) -- for (i = 0; i < nelt; ++i) -- d.perm[i] -= nelt; -- -- /* Check whether the mask can be applied to the vector type. */ -- d.one_operand_p = (which != 3); -- -- /* Implementable with shufps or pshufd. */ -- if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode)) -- return true; -+/* Fix up a Windows system unwinder issue. If an EH region falls through into -+ the epilogue, the Windows system unwinder will apply epilogue logic and -+ produce incorrect offsets. This can be avoided by adding a nop between -+ the last insn that can throw and the first insn of the epilogue. */ - -- /* Otherwise we have to go through the motions and see if we can -- figure out how to generate the requested permutation. */ -- d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1); -- d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2); -- if (!d.one_operand_p) -- d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3); -+static void -+ix86_seh_fixup_eh_fallthru (void) -+{ -+ edge e; -+ edge_iterator ei; - -- start_sequence (); -- bool ret = ix86_expand_vec_perm_const_1 (&d); -- end_sequence (); -+ FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) -+ { -+ rtx_insn *insn, *next; - -- return ret; -- } -+ /* Find the beginning of the epilogue. */ -+ for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn)) -+ if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG) -+ break; -+ if (insn == NULL) -+ continue; - -- two_args = canonicalize_perm (&d); -+ /* We only care about preceding insns that can throw. */ -+ insn = prev_active_insn (insn); -+ if (insn == NULL || !can_throw_internal (insn)) -+ continue; - -- if (ix86_expand_vec_perm_const_1 (&d)) -- return true; -+ /* Do not separate calls from their debug information. */ -+ for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next)) -+ if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION) -+ insn = next; -+ else -+ break; - -- /* If the selector says both arguments are needed, but the operands are the -- same, the above tried to expand with one_operand_p and flattened selector. -- If that didn't work, retry without one_operand_p; we succeeded with that -- during testing. */ -- if (two_args && d.one_operand_p) -- { -- d.one_operand_p = false; -- memcpy (d.perm, perm, sizeof (perm)); -- return ix86_expand_vec_perm_const_1 (&d); -+ emit_insn_after (gen_nops (const1_rtx), insn); - } -- -- return false; - } - --void --ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd) -+/* Implement machine specific optimizations. We implement padding of returns -+ for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */ -+static void -+ix86_reorg (void) - { -- struct expand_vec_perm_d d; -- unsigned i, nelt; -- -- d.target = targ; -- d.op0 = op0; -- d.op1 = op1; -- d.vmode = GET_MODE (targ); -- d.nelt = nelt = GET_MODE_NUNITS (d.vmode); -- d.one_operand_p = false; -- d.testing_p = false; -+ /* We are freeing block_for_insn in the toplev to keep compatibility -+ with old MDEP_REORGS that are not CFG based. Recompute it now. */ -+ compute_bb_for_insn (); - -- for (i = 0; i < nelt; ++i) -- d.perm[i] = i * 2 + odd; -+ if (TARGET_SEH && current_function_has_exception_handlers ()) -+ ix86_seh_fixup_eh_fallthru (); - -- /* We'll either be able to implement the permutation directly... */ -- if (expand_vec_perm_1 (&d)) -- return; -+ if (optimize && optimize_function_for_speed_p (cfun)) -+ { -+ if (TARGET_PAD_SHORT_FUNCTION) -+ ix86_pad_short_function (); -+ else if (TARGET_PAD_RETURNS) -+ ix86_pad_returns (); -+#ifdef ASM_OUTPUT_MAX_SKIP_PAD -+ if (TARGET_FOUR_JUMP_LIMIT) -+ ix86_avoid_jump_mispredicts (); -+#endif -+ } -+} - -- /* ... or we use the special-case patterns. */ -- expand_vec_perm_even_odd_1 (&d, odd); -+/* Return nonzero when QImode register that must be represented via REX prefix -+ is used. */ -+bool -+x86_extended_QIreg_mentioned_p (rtx_insn *insn) -+{ -+ int i; -+ extract_insn_cached (insn); -+ for (i = 0; i < recog_data.n_operands; i++) -+ if (GENERAL_REG_P (recog_data.operand[i]) -+ && !QI_REGNO_P (REGNO (recog_data.operand[i]))) -+ return true; -+ return false; - } - --static void --ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p) -+/* Return true when INSN mentions register that must be encoded using REX -+ prefix. */ -+bool -+x86_extended_reg_mentioned_p (rtx insn) - { -- struct expand_vec_perm_d d; -- unsigned i, nelt, base; -- bool ok; -- -- d.target = targ; -- d.op0 = op0; -- d.op1 = op1; -- d.vmode = GET_MODE (targ); -- d.nelt = nelt = GET_MODE_NUNITS (d.vmode); -- d.one_operand_p = false; -- d.testing_p = false; -- -- base = high_p ? nelt / 2 : 0; -- for (i = 0; i < nelt / 2; ++i) -+ subrtx_iterator::array_type array; -+ FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST) - { -- d.perm[i * 2] = i + base; -- d.perm[i * 2 + 1] = i + base + nelt; -+ const_rtx x = *iter; -+ if (REG_P (x) -+ && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x)))) -+ return true; - } -- -- /* Note that for AVX this isn't one instruction. */ -- ok = ix86_expand_vec_perm_const_1 (&d); -- gcc_assert (ok); -+ return false; - } - -+/* If profitable, negate (without causing overflow) integer constant -+ of mode MODE at location LOC. Return true in this case. */ -+bool -+x86_maybe_negate_const_int (rtx *loc, machine_mode mode) -+{ -+ HOST_WIDE_INT val; - --/* Expand a vector operation CODE for a V*QImode in terms of the -- same operation on V*HImode. */ -- --void --ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2) --{ -- machine_mode qimode = GET_MODE (dest); -- machine_mode himode; -- rtx (*gen_il) (rtx, rtx, rtx); -- rtx (*gen_ih) (rtx, rtx, rtx); -- rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h; -- struct expand_vec_perm_d d; -- bool ok, full_interleave; -- bool uns_p = false; -- int i; -+ if (!CONST_INT_P (*loc)) -+ return false; - -- switch (qimode) -+ switch (mode) - { -- case E_V16QImode: -- himode = V8HImode; -- gen_il = gen_vec_interleave_lowv16qi; -- gen_ih = gen_vec_interleave_highv16qi; -- break; -- case E_V32QImode: -- himode = V16HImode; -- gen_il = gen_avx2_interleave_lowv32qi; -- gen_ih = gen_avx2_interleave_highv32qi; -- break; -- case E_V64QImode: -- himode = V32HImode; -- gen_il = gen_avx512bw_interleave_lowv64qi; -- gen_ih = gen_avx512bw_interleave_highv64qi; -- break; -- default: -- gcc_unreachable (); -- } -+ case E_DImode: -+ /* DImode x86_64 constants must fit in 32 bits. */ -+ gcc_assert (x86_64_immediate_operand (*loc, mode)); - -- op2_l = op2_h = op2; -- switch (code) -- { -- case MULT: -- /* Unpack data such that we've got a source byte in each low byte of -- each word. We don't care what goes into the high byte of each word. -- Rather than trying to get zero in there, most convenient is to let -- it be a copy of the low byte. */ -- op2_l = gen_reg_rtx (qimode); -- op2_h = gen_reg_rtx (qimode); -- emit_insn (gen_il (op2_l, op2, op2)); -- emit_insn (gen_ih (op2_h, op2, op2)); -- -- op1_l = gen_reg_rtx (qimode); -- op1_h = gen_reg_rtx (qimode); -- emit_insn (gen_il (op1_l, op1, op1)); -- emit_insn (gen_ih (op1_h, op1, op1)); -- full_interleave = qimode == V16QImode; -+ mode = SImode; - break; - -- case ASHIFT: -- case LSHIFTRT: -- uns_p = true; -- /* FALLTHRU */ -- case ASHIFTRT: -- op1_l = gen_reg_rtx (himode); -- op1_h = gen_reg_rtx (himode); -- ix86_expand_sse_unpack (op1_l, op1, uns_p, false); -- ix86_expand_sse_unpack (op1_h, op1, uns_p, true); -- full_interleave = true; -+ case E_SImode: -+ case E_HImode: -+ case E_QImode: - break; -+ - default: - gcc_unreachable (); - } - -- /* Perform the operation. */ -- res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX, -- 1, OPTAB_DIRECT); -- res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX, -- 1, OPTAB_DIRECT); -- gcc_assert (res_l && res_h); -+ /* Avoid overflows. */ -+ if (mode_signbit_p (mode, *loc)) -+ return false; - -- /* Merge the data back into the right place. */ -- d.target = dest; -- d.op0 = gen_lowpart (qimode, res_l); -- d.op1 = gen_lowpart (qimode, res_h); -- d.vmode = qimode; -- d.nelt = GET_MODE_NUNITS (qimode); -- d.one_operand_p = false; -- d.testing_p = false; -+ val = INTVAL (*loc); - -- if (full_interleave) -+ /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'. -+ Exceptions: -128 encodes smaller than 128, so swap sign and op. */ -+ if ((val < 0 && val != -128) -+ || val == 128) - { -- /* For SSE2, we used an full interleave, so the desired -- results are in the even elements. */ -- for (i = 0; i < d.nelt; ++i) -- d.perm[i] = i * 2; -+ *loc = GEN_INT (-val); -+ return true; - } -- else -- { -- /* For AVX, the interleave used above was not cross-lane. So the -- extraction is evens but with the second and third quarter swapped. -- Happily, that is even one insn shorter than even extraction. -- For AVX512BW we have 4 lanes. We extract evens from within a lane, -- always first from the first and then from the second source operand, -- the index bits above the low 4 bits remains the same. -- Thus, for d.nelt == 32 we want permutation -- 0,2,4,..14, 32,34,36,..46, 16,18,20,..30, 48,50,52,..62 -- and for d.nelt == 64 we want permutation -- 0,2,4,..14, 64,66,68,..78, 16,18,20,..30, 80,82,84,..94, -- 32,34,36,..46, 96,98,100,..110, 48,50,52,..62, 112,114,116,..126. */ -- for (i = 0; i < d.nelt; ++i) -- d.perm[i] = ((i * 2) & 14) + ((i & 8) ? d.nelt : 0) + (i & ~15); -- } -- -- ok = ix86_expand_vec_perm_const_1 (&d); -- gcc_assert (ok); - -- set_unique_reg_note (get_last_insn (), REG_EQUAL, -- gen_rtx_fmt_ee (code, qimode, op1, op2)); -+ return false; - } - --/* Helper function of ix86_expand_mul_widen_evenodd. Return true -- if op is CONST_VECTOR with all odd elements equal to their -- preceding element. */ -- --static bool --const_vector_equal_evenodd_p (rtx op) --{ -- machine_mode mode = GET_MODE (op); -- int i, nunits = GET_MODE_NUNITS (mode); -- if (GET_CODE (op) != CONST_VECTOR -- || nunits != CONST_VECTOR_NUNITS (op)) -- return false; -- for (i = 0; i < nunits; i += 2) -- if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1)) -- return false; -- return true; --} -+/* Generate an unsigned DImode/SImode to FP conversion. This is the same code -+ optabs would emit if we didn't have TFmode patterns. */ - - void --ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2, -- bool uns_p, bool odd_p) -+x86_emit_floatuns (rtx operands[2]) - { -- machine_mode mode = GET_MODE (op1); -- machine_mode wmode = GET_MODE (dest); -- rtx x; -- rtx orig_op1 = op1, orig_op2 = op2; -- -- if (!nonimmediate_operand (op1, mode)) -- op1 = force_reg (mode, op1); -- if (!nonimmediate_operand (op2, mode)) -- op2 = force_reg (mode, op2); -+ rtx_code_label *neglab, *donelab; -+ rtx i0, i1, f0, in, out; -+ machine_mode mode, inmode; - -- /* We only play even/odd games with vectors of SImode. */ -- gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode); -+ inmode = GET_MODE (operands[1]); -+ gcc_assert (inmode == SImode || inmode == DImode); - -- /* If we're looking for the odd results, shift those members down to -- the even slots. For some cpus this is faster than a PSHUFD. */ -- if (odd_p) -- { -- /* For XOP use vpmacsdqh, but only for smult, as it is only -- signed. */ -- if (TARGET_XOP && mode == V4SImode && !uns_p) -- { -- x = force_reg (wmode, CONST0_RTX (wmode)); -- emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x)); -- return; -- } -+ out = operands[0]; -+ in = force_reg (inmode, operands[1]); -+ mode = GET_MODE (out); -+ neglab = gen_label_rtx (); -+ donelab = gen_label_rtx (); -+ f0 = gen_reg_rtx (mode); - -- x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode)); -- if (!const_vector_equal_evenodd_p (orig_op1)) -- op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1), -- x, NULL, 1, OPTAB_DIRECT); -- if (!const_vector_equal_evenodd_p (orig_op2)) -- op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2), -- x, NULL, 1, OPTAB_DIRECT); -- op1 = gen_lowpart (mode, op1); -- op2 = gen_lowpart (mode, op2); -- } -+ emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab); - -- if (mode == V16SImode) -- { -- if (uns_p) -- x = gen_vec_widen_umult_even_v16si (dest, op1, op2); -- else -- x = gen_vec_widen_smult_even_v16si (dest, op1, op2); -- } -- else if (mode == V8SImode) -- { -- if (uns_p) -- x = gen_vec_widen_umult_even_v8si (dest, op1, op2); -- else -- x = gen_vec_widen_smult_even_v8si (dest, op1, op2); -- } -- else if (uns_p) -- x = gen_vec_widen_umult_even_v4si (dest, op1, op2); -- else if (TARGET_SSE4_1) -- x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2); -- else -- { -- rtx s1, s2, t0, t1, t2; -+ expand_float (out, in, 0); - -- /* The easiest way to implement this without PMULDQ is to go through -- the motions as if we are performing a full 64-bit multiply. With -- the exception that we need to do less shuffling of the elements. */ -+ emit_jump_insn (gen_jump (donelab)); -+ emit_barrier (); - -- /* Compute the sign-extension, aka highparts, of the two operands. */ -- s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode), -- op1, pc_rtx, pc_rtx); -- s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode), -- op2, pc_rtx, pc_rtx); -+ emit_label (neglab); - -- /* Multiply LO(A) * HI(B), and vice-versa. */ -- t1 = gen_reg_rtx (wmode); -- t2 = gen_reg_rtx (wmode); -- emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2)); -- emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1)); -+ i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL, -+ 1, OPTAB_DIRECT); -+ i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL, -+ 1, OPTAB_DIRECT); -+ i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT); - -- /* Multiply LO(A) * LO(B). */ -- t0 = gen_reg_rtx (wmode); -- emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2)); -+ expand_float (f0, i0, 0); - -- /* Combine and shift the highparts into place. */ -- t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT); -- t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1, -- 1, OPTAB_DIRECT); -+ emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0))); - -- /* Combine high and low parts. */ -- force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT); -- return; -- } -- emit_insn (x); -+ emit_label (donelab); - } -- --void --ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2, -- bool uns_p, bool high_p) -+ -+/* Target hook for scalar_mode_supported_p. */ -+static bool -+ix86_scalar_mode_supported_p (scalar_mode mode) - { -- machine_mode wmode = GET_MODE (dest); -- machine_mode mode = GET_MODE (op1); -- rtx t1, t2, t3, t4, mask; -- -- switch (mode) -- { -- case E_V4SImode: -- t1 = gen_reg_rtx (mode); -- t2 = gen_reg_rtx (mode); -- if (TARGET_XOP && !uns_p) -- { -- /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case, -- shuffle the elements once so that all elements are in the right -- place for immediate use: { A C B D }. */ -- emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx, -- const1_rtx, GEN_INT (3))); -- emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx, -- const1_rtx, GEN_INT (3))); -- } -- else -- { -- /* Put the elements into place for the multiply. */ -- ix86_expand_vec_interleave (t1, op1, op1, high_p); -- ix86_expand_vec_interleave (t2, op2, op2, high_p); -- high_p = false; -- } -- ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p); -- break; -- -- case E_V8SImode: -- /* Shuffle the elements between the lanes. After this we -- have { A B E F | C D G H } for each operand. */ -- t1 = gen_reg_rtx (V4DImode); -- t2 = gen_reg_rtx (V4DImode); -- emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1), -- const0_rtx, const2_rtx, -- const1_rtx, GEN_INT (3))); -- emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2), -- const0_rtx, const2_rtx, -- const1_rtx, GEN_INT (3))); -- -- /* Shuffle the elements within the lanes. After this we -- have { A A B B | C C D D } or { E E F F | G G H H }. */ -- t3 = gen_reg_rtx (V8SImode); -- t4 = gen_reg_rtx (V8SImode); -- mask = GEN_INT (high_p -- ? 2 + (2 << 2) + (3 << 4) + (3 << 6) -- : 0 + (0 << 2) + (1 << 4) + (1 << 6)); -- emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask)); -- emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask)); -- -- ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false); -- break; -- -- case E_V8HImode: -- case E_V16HImode: -- t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX, -- uns_p, OPTAB_DIRECT); -- t2 = expand_binop (mode, -- uns_p ? umul_highpart_optab : smul_highpart_optab, -- op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT); -- gcc_assert (t1 && t2); -- -- t3 = gen_reg_rtx (mode); -- ix86_expand_vec_interleave (t3, t1, t2, high_p); -- emit_move_insn (dest, gen_lowpart (wmode, t3)); -- break; -- -- case E_V16QImode: -- case E_V32QImode: -- case E_V32HImode: -- case E_V16SImode: -- case E_V64QImode: -- t1 = gen_reg_rtx (wmode); -- t2 = gen_reg_rtx (wmode); -- ix86_expand_sse_unpack (t1, op1, uns_p, high_p); -- ix86_expand_sse_unpack (t2, op2, uns_p, high_p); -- -- emit_insn (gen_rtx_SET (dest, gen_rtx_MULT (wmode, t1, t2))); -- break; -- -- default: -- gcc_unreachable (); -- } -+ if (DECIMAL_FLOAT_MODE_P (mode)) -+ return default_decimal_float_supported_p (); -+ else if (mode == TFmode) -+ return true; -+ else -+ return default_scalar_mode_supported_p (mode); - } - --void --ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2) -+/* Implements target hook vector_mode_supported_p. */ -+static bool -+ix86_vector_mode_supported_p (machine_mode mode) - { -- rtx res_1, res_2, res_3, res_4; -- -- res_1 = gen_reg_rtx (V4SImode); -- res_2 = gen_reg_rtx (V4SImode); -- res_3 = gen_reg_rtx (V2DImode); -- res_4 = gen_reg_rtx (V2DImode); -- ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false); -- ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true); -- -- /* Move the results in element 2 down to element 1; we don't care -- what goes in elements 2 and 3. Then we can merge the parts -- back together with an interleave. -- -- Note that two other sequences were tried: -- (1) Use interleaves at the start instead of psrldq, which allows -- us to use a single shufps to merge things back at the end. -- (2) Use shufps here to combine the two vectors, then pshufd to -- put the elements in the correct order. -- In both cases the cost of the reformatting stall was too high -- and the overall sequence slower. */ -- -- emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3), -- const0_rtx, const2_rtx, -- const0_rtx, const0_rtx)); -- emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4), -- const0_rtx, const2_rtx, -- const0_rtx, const0_rtx)); -- res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2)); -- -- set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2)); -+ if (TARGET_SSE && VALID_SSE_REG_MODE (mode)) -+ return true; -+ if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode)) -+ return true; -+ if (TARGET_AVX && VALID_AVX256_REG_MODE (mode)) -+ return true; -+ if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode)) -+ return true; -+ if (TARGET_MMX && VALID_MMX_REG_MODE (mode)) -+ return true; -+ if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode)) -+ return true; -+ return false; - } - --void --ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2) -+/* Target hook for c_mode_for_suffix. */ -+static machine_mode -+ix86_c_mode_for_suffix (char suffix) - { -- machine_mode mode = GET_MODE (op0); -- rtx t1, t2, t3, t4, t5, t6; -+ if (suffix == 'q') -+ return TFmode; -+ if (suffix == 'w') -+ return XFmode; - -- if (TARGET_AVX512DQ && mode == V8DImode) -- emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2)); -- else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode) -- emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2)); -- else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode) -- emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2)); -- else if (TARGET_XOP && mode == V2DImode) -- { -- /* op1: A,B,C,D, op2: E,F,G,H */ -- op1 = gen_lowpart (V4SImode, op1); -- op2 = gen_lowpart (V4SImode, op2); -+ return VOIDmode; -+} - -- t1 = gen_reg_rtx (V4SImode); -- t2 = gen_reg_rtx (V4SImode); -- t3 = gen_reg_rtx (V2DImode); -- t4 = gen_reg_rtx (V2DImode); -+/* Worker function for TARGET_MD_ASM_ADJUST. - -- /* t1: B,A,D,C */ -- emit_insn (gen_sse2_pshufd_1 (t1, op1, -- GEN_INT (1), -- GEN_INT (0), -- GEN_INT (3), -- GEN_INT (2))); -+ We implement asm flag outputs, and maintain source compatibility -+ with the old cc0-based compiler. */ - -- /* t2: (B*E),(A*F),(D*G),(C*H) */ -- emit_insn (gen_mulv4si3 (t2, t1, op2)); -+static rtx_insn * -+ix86_md_asm_adjust (vec &outputs, vec &/*inputs*/, -+ vec &constraints, -+ vec &clobbers, HARD_REG_SET &clobbered_regs) -+{ -+ bool saw_asm_flag = false; - -- /* t3: (B*E)+(A*F), (D*G)+(C*H) */ -- emit_insn (gen_xop_phadddq (t3, t2)); -+ start_sequence (); -+ for (unsigned i = 0, n = outputs.length (); i < n; ++i) -+ { -+ const char *con = constraints[i]; -+ if (strncmp (con, "=@cc", 4) != 0) -+ continue; -+ con += 4; -+ if (strchr (con, ',') != NULL) -+ { -+ error ("alternatives not allowed in % flag output"); -+ continue; -+ } - -- /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */ -- emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32))); -+ bool invert = false; -+ if (con[0] == 'n') -+ invert = true, con++; - -- /* Multiply lower parts and add all */ -- t5 = gen_reg_rtx (V2DImode); -- emit_insn (gen_vec_widen_umult_even_v4si (t5, -- gen_lowpart (V4SImode, op1), -- gen_lowpart (V4SImode, op2))); -- op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT); -+ machine_mode mode = CCmode; -+ rtx_code code = UNKNOWN; - -- } -- else -- { -- machine_mode nmode; -- rtx (*umul) (rtx, rtx, rtx); -+ switch (con[0]) -+ { -+ case 'a': -+ if (con[1] == 0) -+ mode = CCAmode, code = EQ; -+ else if (con[1] == 'e' && con[2] == 0) -+ mode = CCCmode, code = NE; -+ break; -+ case 'b': -+ if (con[1] == 0) -+ mode = CCCmode, code = EQ; -+ else if (con[1] == 'e' && con[2] == 0) -+ mode = CCAmode, code = NE; -+ break; -+ case 'c': -+ if (con[1] == 0) -+ mode = CCCmode, code = EQ; -+ break; -+ case 'e': -+ if (con[1] == 0) -+ mode = CCZmode, code = EQ; -+ break; -+ case 'g': -+ if (con[1] == 0) -+ mode = CCGCmode, code = GT; -+ else if (con[1] == 'e' && con[2] == 0) -+ mode = CCGCmode, code = GE; -+ break; -+ case 'l': -+ if (con[1] == 0) -+ mode = CCGCmode, code = LT; -+ else if (con[1] == 'e' && con[2] == 0) -+ mode = CCGCmode, code = LE; -+ break; -+ case 'o': -+ if (con[1] == 0) -+ mode = CCOmode, code = EQ; -+ break; -+ case 'p': -+ if (con[1] == 0) -+ mode = CCPmode, code = EQ; -+ break; -+ case 's': -+ if (con[1] == 0) -+ mode = CCSmode, code = EQ; -+ break; -+ case 'z': -+ if (con[1] == 0) -+ mode = CCZmode, code = EQ; -+ break; -+ } -+ if (code == UNKNOWN) -+ { -+ error ("unknown % flag output %qs", constraints[i]); -+ continue; -+ } -+ if (invert) -+ code = reverse_condition (code); - -- if (mode == V2DImode) -+ rtx dest = outputs[i]; -+ if (!saw_asm_flag) - { -- umul = gen_vec_widen_umult_even_v4si; -- nmode = V4SImode; -+ /* This is the first asm flag output. Here we put the flags -+ register in as the real output and adjust the condition to -+ allow it. */ -+ constraints[i] = "=Bf"; -+ outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG); -+ saw_asm_flag = true; - } -- else if (mode == V4DImode) -+ else - { -- umul = gen_vec_widen_umult_even_v8si; -- nmode = V8SImode; -+ /* We don't need the flags register as output twice. */ -+ constraints[i] = "=X"; -+ outputs[i] = gen_rtx_SCRATCH (SImode); - } -- else if (mode == V8DImode) -+ -+ rtx x = gen_rtx_REG (mode, FLAGS_REG); -+ x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx); -+ -+ machine_mode dest_mode = GET_MODE (dest); -+ if (!SCALAR_INT_MODE_P (dest_mode)) - { -- umul = gen_vec_widen_umult_even_v16si; -- nmode = V16SImode; -+ error ("invalid type for % flag output"); -+ continue; - } -- else -- gcc_unreachable (); - -+ if (dest_mode == DImode && !TARGET_64BIT) -+ dest_mode = SImode; - -- /* Multiply low parts. */ -- t1 = gen_reg_rtx (mode); -- emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2))); -+ if (dest_mode != QImode) -+ { -+ rtx destqi = gen_reg_rtx (QImode); -+ emit_insn (gen_rtx_SET (destqi, x)); - -- /* Shift input vectors right 32 bits so we can multiply high parts. */ -- t6 = GEN_INT (32); -- t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT); -- t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT); -+ if (TARGET_ZERO_EXTEND_WITH_AND -+ && optimize_function_for_speed_p (cfun)) -+ { -+ x = force_reg (dest_mode, const0_rtx); - -- /* Multiply high parts by low parts. */ -- t4 = gen_reg_rtx (mode); -- t5 = gen_reg_rtx (mode); -- emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2))); -- emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1))); -+ emit_insn (gen_movstrictqi (gen_lowpart (QImode, x), destqi)); -+ } -+ else -+ { -+ x = gen_rtx_ZERO_EXTEND (dest_mode, destqi); -+ if (dest_mode == GET_MODE (dest) -+ && !register_operand (dest, GET_MODE (dest))) -+ x = force_reg (dest_mode, x); -+ } -+ } - -- /* Combine and shift the highparts back. */ -- t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT); -- t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT); -+ if (dest_mode != GET_MODE (dest)) -+ { -+ rtx tmp = gen_reg_rtx (SImode); - -- /* Combine high and low parts. */ -- force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT); -+ emit_insn (gen_rtx_SET (tmp, x)); -+ emit_insn (gen_zero_extendsidi2 (dest, tmp)); -+ } -+ else -+ emit_insn (gen_rtx_SET (dest, x)); - } -+ rtx_insn *seq = get_insns (); -+ end_sequence (); - -- set_unique_reg_note (get_last_insn (), REG_EQUAL, -- gen_rtx_MULT (mode, op1, op2)); -+ if (saw_asm_flag) -+ return seq; -+ else -+ { -+ /* If we had no asm flag outputs, clobber the flags. */ -+ clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG)); -+ SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG); -+ return NULL; -+ } - } - --/* Return 1 if control tansfer instruction INSN -- should be encoded with notrack prefix. */ -+/* Implements target vector targetm.asm.encode_section_info. */ - --static bool --ix86_notrack_prefixed_insn_p (rtx insn) -+static void ATTRIBUTE_UNUSED -+ix86_encode_section_info (tree decl, rtx rtl, int first) - { -- if (!insn || !((flag_cf_protection & CF_BRANCH))) -- return false; -- -- if (CALL_P (insn)) -- { -- rtx call = get_call_rtx_from (insn); -- gcc_assert (call != NULL_RTX); -- rtx addr = XEXP (call, 0); -+ default_encode_section_info (decl, rtl, first); - -- /* Do not emit 'notrack' if it's not an indirect call. */ -- if (MEM_P (addr) -- && GET_CODE (XEXP (addr, 0)) == SYMBOL_REF) -- return false; -- else -- return find_reg_note (insn, REG_CALL_NOCF_CHECK, 0); -- } -+ if (ix86_in_large_data_p (decl)) -+ SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR; -+} - -- if (JUMP_P (insn) && !flag_cet_switch) -- { -- rtx target = JUMP_LABEL (insn); -- if (target == NULL_RTX || ANY_RETURN_P (target)) -- return false; -+/* Worker function for REVERSE_CONDITION. */ - -- /* Check the jump is a switch table. */ -- rtx_insn *label = as_a (target); -- rtx_insn *table = next_insn (label); -- if (table == NULL_RTX || !JUMP_TABLE_DATA_P (table)) -- return false; -- else -- return true; -- } -- return false; -+enum rtx_code -+ix86_reverse_condition (enum rtx_code code, machine_mode mode) -+{ -+ return (mode == CCFPmode -+ ? reverse_condition_maybe_unordered (code) -+ : reverse_condition (code)); - } - --/* Calculate integer abs() using only SSE2 instructions. */ -+/* Output code to perform an x87 FP register move, from OPERANDS[1] -+ to OPERANDS[0]. */ - --void --ix86_expand_sse2_abs (rtx target, rtx input) -+const char * -+output_387_reg_move (rtx_insn *insn, rtx *operands) - { -- machine_mode mode = GET_MODE (target); -- rtx tmp0, tmp1, x; -- -- switch (mode) -+ if (REG_P (operands[0])) - { -- case E_V2DImode: -- case E_V4DImode: -- /* For 64-bit signed integer X, with SSE4.2 use -- pxor t0, t0; pcmpgtq X, t0; pxor t0, X; psubq t0, X. -- Otherwise handle it similarly to V4SImode, except use 64 as W instead of -- 32 and use logical instead of arithmetic right shift (which is -- unimplemented) and subtract. */ -- if (TARGET_SSE4_2) -- { -- tmp0 = gen_reg_rtx (mode); -- tmp1 = gen_reg_rtx (mode); -- emit_move_insn (tmp1, CONST0_RTX (mode)); -- if (mode == E_V2DImode) -- emit_insn (gen_sse4_2_gtv2di3 (tmp0, tmp1, input)); -- else -- emit_insn (gen_avx2_gtv4di3 (tmp0, tmp1, input)); -+ if (REG_P (operands[1]) -+ && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) -+ { -+ if (REGNO (operands[0]) == FIRST_STACK_REG) -+ return output_387_ffreep (operands, 0); -+ return "fstp\t%y0"; - } -+ if (STACK_TOP_P (operands[0])) -+ return "fld%Z1\t%y1"; -+ return "fst\t%y0"; -+ } -+ else if (MEM_P (operands[0])) -+ { -+ gcc_assert (REG_P (operands[1])); -+ if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) -+ return "fstp%Z0\t%y0"; - else - { -- tmp0 = expand_simple_binop (mode, LSHIFTRT, input, -- GEN_INT (GET_MODE_UNIT_BITSIZE (mode) -- - 1), NULL, 0, OPTAB_DIRECT); -- tmp0 = expand_simple_unop (mode, NEG, tmp0, NULL, false); -+ /* There is no non-popping store to memory for XFmode. -+ So if we need one, follow the store with a load. */ -+ if (GET_MODE (operands[0]) == XFmode) -+ return "fstp%Z0\t%y0\n\tfld%Z0\t%y0"; -+ else -+ return "fst%Z0\t%y0"; - } -- -- tmp1 = expand_simple_binop (mode, XOR, tmp0, input, -- NULL, 0, OPTAB_DIRECT); -- x = expand_simple_binop (mode, MINUS, tmp1, tmp0, -- target, 0, OPTAB_DIRECT); -- break; -- -- case E_V4SImode: -- /* For 32-bit signed integer X, the best way to calculate the absolute -- value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */ -- tmp0 = expand_simple_binop (mode, ASHIFTRT, input, -- GEN_INT (GET_MODE_UNIT_BITSIZE (mode) - 1), -- NULL, 0, OPTAB_DIRECT); -- tmp1 = expand_simple_binop (mode, XOR, tmp0, input, -- NULL, 0, OPTAB_DIRECT); -- x = expand_simple_binop (mode, MINUS, tmp1, tmp0, -- target, 0, OPTAB_DIRECT); -- break; -- -- case E_V8HImode: -- /* For 16-bit signed integer X, the best way to calculate the absolute -- value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */ -- tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0); -- -- x = expand_simple_binop (mode, SMAX, tmp0, input, -- target, 0, OPTAB_DIRECT); -- break; -- -- case E_V16QImode: -- /* For 8-bit signed integer X, the best way to calculate the absolute -- value of X is min ((unsigned char) X, (unsigned char) (-X)), -- as SSE2 provides the PMINUB insn. */ -- tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0); -- -- x = expand_simple_binop (V16QImode, UMIN, tmp0, input, -- target, 0, OPTAB_DIRECT); -- break; -- -- default: -- gcc_unreachable (); - } -- -- if (x != target) -- emit_move_insn (target, x); -+ else -+ gcc_unreachable(); - } -+#ifdef TARGET_SOLARIS -+/* Solaris implementation of TARGET_ASM_NAMED_SECTION. */ - --/* Expand an extract from a vector register through pextr insn. -- Return true if successful. */ -- --bool --ix86_expand_pextr (rtx *operands) -+static void -+i386_solaris_elf_named_section (const char *name, unsigned int flags, -+ tree decl) - { -- rtx dst = operands[0]; -- rtx src = operands[1]; -- -- unsigned int size = INTVAL (operands[2]); -- unsigned int pos = INTVAL (operands[3]); -- -- if (SUBREG_P (dst)) -+ /* With Binutils 2.15, the "@unwind" marker must be specified on -+ every occurrence of the ".eh_frame" section, not just the first -+ one. */ -+ if (TARGET_64BIT -+ && strcmp (name, ".eh_frame") == 0) - { -- /* Reject non-lowpart subregs. */ -- if (SUBREG_BYTE (dst) > 0) -- return false; -- dst = SUBREG_REG (dst); -+ fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name, -+ flags & SECTION_WRITE ? "aw" : "a"); -+ return; - } -- -- if (SUBREG_P (src)) -+ -+#ifndef USE_GAS -+ if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE) - { -- pos += SUBREG_BYTE (src) * BITS_PER_UNIT; -- src = SUBREG_REG (src); -+ solaris_elf_asm_comdat_section (name, flags, decl); -+ return; - } - -- switch (GET_MODE (src)) -+ /* Solaris/x86 as uses the same syntax for the SHF_EXCLUDE flags as the -+ SPARC assembler. One cannot mix single-letter flags and #exclude, so -+ only emit the latter here. */ -+ if (flags & SECTION_EXCLUDE) - { -- case E_V16QImode: -- case E_V8HImode: -- case E_V4SImode: -- case E_V2DImode: -- case E_V1TImode: -- case E_TImode: -- { -- machine_mode srcmode, dstmode; -- rtx d, pat; -+ fprintf (asm_out_file, "\t.section\t%s,#exclude\n", name); -+ return; -+ } -+#endif - -- if (!int_mode_for_size (size, 0).exists (&dstmode)) -- return false; -+ default_elf_asm_named_section (name, flags, decl); -+} -+#endif /* TARGET_SOLARIS */ - -- switch (dstmode) -- { -- case E_QImode: -- if (!TARGET_SSE4_1) -- return false; -- srcmode = V16QImode; -- break; -+/* Return the mangling of TYPE if it is an extended fundamental type. */ - -- case E_HImode: -- if (!TARGET_SSE2) -- return false; -- srcmode = V8HImode; -- break; -+static const char * -+ix86_mangle_type (const_tree type) -+{ -+ type = TYPE_MAIN_VARIANT (type); - -- case E_SImode: -- if (!TARGET_SSE4_1) -- return false; -- srcmode = V4SImode; -- break; -+ if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE -+ && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE) -+ return NULL; - -- case E_DImode: -- gcc_assert (TARGET_64BIT); -- if (!TARGET_SSE4_1) -- return false; -- srcmode = V2DImode; -- break; -+ switch (TYPE_MODE (type)) -+ { -+ case E_TFmode: -+ /* __float128 is "g". */ -+ return "g"; -+ case E_XFmode: -+ /* "long double" or __float80 is "e". */ -+ return "e"; -+ default: -+ return NULL; -+ } -+} - -- default: -- return false; -- } -+static GTY(()) tree ix86_tls_stack_chk_guard_decl; -+ -+static tree -+ix86_stack_protect_guard (void) -+{ -+ if (TARGET_SSP_TLS_GUARD) -+ { -+ tree type_node = lang_hooks.types.type_for_mode (ptr_mode, 1); -+ int qual = ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg); -+ tree type = build_qualified_type (type_node, qual); -+ tree t; - -- /* Reject extractions from misaligned positions. */ -- if (pos & (size-1)) -- return false; -+ if (global_options_set.x_ix86_stack_protector_guard_symbol_str) -+ { -+ t = ix86_tls_stack_chk_guard_decl; - -- if (GET_MODE (dst) == dstmode) -- d = dst; -- else -- d = gen_reg_rtx (dstmode); -+ if (t == NULL) -+ { -+ rtx x; - -- /* Construct insn pattern. */ -- pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (pos / size))); -- pat = gen_rtx_VEC_SELECT (dstmode, gen_lowpart (srcmode, src), pat); -+ t = build_decl -+ (UNKNOWN_LOCATION, VAR_DECL, -+ get_identifier (ix86_stack_protector_guard_symbol_str), -+ type); -+ TREE_STATIC (t) = 1; -+ TREE_PUBLIC (t) = 1; -+ DECL_EXTERNAL (t) = 1; -+ TREE_USED (t) = 1; -+ TREE_THIS_VOLATILE (t) = 1; -+ DECL_ARTIFICIAL (t) = 1; -+ DECL_IGNORED_P (t) = 1; - -- /* Let the rtl optimizers know about the zero extension performed. */ -- if (dstmode == QImode || dstmode == HImode) -- { -- pat = gen_rtx_ZERO_EXTEND (SImode, pat); -- d = gen_lowpart (SImode, d); -- } -+ /* Do not share RTL as the declaration is visible outside of -+ current function. */ -+ x = DECL_RTL (t); -+ RTX_FLAG (x, used) = 1; - -- emit_insn (gen_rtx_SET (d, pat)); -+ ix86_tls_stack_chk_guard_decl = t; -+ } -+ } -+ else -+ { -+ tree asptrtype = build_pointer_type (type); - -- if (d != dst) -- emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d)); -- return true; -- } -+ t = build_int_cst (asptrtype, ix86_stack_protector_guard_offset); -+ t = build2 (MEM_REF, asptrtype, t, -+ build_int_cst (asptrtype, 0)); -+ TREE_THIS_VOLATILE (t) = 1; -+ } - -- default: -- return false; -+ return t; - } -+ -+ return default_stack_protect_guard (); - } - --/* Expand an insert into a vector register through pinsr insn. -- Return true if successful. */ -+/* For 32-bit code we can save PIC register setup by using -+ __stack_chk_fail_local hidden function instead of calling -+ __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC -+ register, so it is better to call __stack_chk_fail directly. */ - --bool --ix86_expand_pinsr (rtx *operands) -+static tree ATTRIBUTE_UNUSED -+ix86_stack_protect_fail (void) - { -- rtx dst = operands[0]; -- rtx src = operands[3]; -+ return TARGET_64BIT -+ ? default_external_stack_protect_fail () -+ : default_hidden_stack_protect_fail (); -+} - -- unsigned int size = INTVAL (operands[1]); -- unsigned int pos = INTVAL (operands[2]); -+/* Select a format to encode pointers in exception handling data. CODE -+ is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is -+ true if the symbol may be affected by dynamic relocations. - -- if (SUBREG_P (dst)) -+ ??? All x86 object file formats are capable of representing this. -+ After all, the relocation needed is the same as for the call insn. -+ Whether or not a particular assembler allows us to enter such, I -+ guess we'll have to see. */ -+int -+asm_preferred_eh_data_format (int code, int global) -+{ -+ if (flag_pic) -+ { -+ int type = DW_EH_PE_sdata8; -+ if (!TARGET_64BIT -+ || ix86_cmodel == CM_SMALL_PIC -+ || (ix86_cmodel == CM_MEDIUM_PIC && (global || code))) -+ type = DW_EH_PE_sdata4; -+ return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type; -+ } -+ if (ix86_cmodel == CM_SMALL -+ || (ix86_cmodel == CM_MEDIUM && code)) -+ return DW_EH_PE_udata4; -+ return DW_EH_PE_absptr; -+} -+ -+/* Implement targetm.vectorize.builtin_vectorization_cost. */ -+static int -+ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, -+ tree vectype, int) -+{ -+ bool fp = false; -+ machine_mode mode = TImode; -+ int index; -+ if (vectype != NULL) - { -- pos += SUBREG_BYTE (dst) * BITS_PER_UNIT; -- dst = SUBREG_REG (dst); -+ fp = FLOAT_TYPE_P (vectype); -+ mode = TYPE_MODE (vectype); - } - -- switch (GET_MODE (dst)) -+ switch (type_of_cost) - { -- case E_V16QImode: -- case E_V8HImode: -- case E_V4SImode: -- case E_V2DImode: -- case E_V1TImode: -- case E_TImode: -- { -- machine_mode srcmode, dstmode; -- rtx (*pinsr)(rtx, rtx, rtx, rtx); -- rtx d; -+ case scalar_stmt: -+ return fp ? ix86_cost->addss : COSTS_N_INSNS (1); - -- if (!int_mode_for_size (size, 0).exists (&srcmode)) -- return false; -+ case scalar_load: -+ /* load/store costs are relative to register move which is 2. Recompute -+ it to COSTS_N_INSNS so everything have same base. */ -+ return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0] -+ : ix86_cost->int_load [2]) / 2; - -- switch (srcmode) -- { -- case E_QImode: -- if (!TARGET_SSE4_1) -- return false; -- dstmode = V16QImode; -- pinsr = gen_sse4_1_pinsrb; -- break; -+ case scalar_store: -+ return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0] -+ : ix86_cost->int_store [2]) / 2; - -- case E_HImode: -- if (!TARGET_SSE2) -- return false; -- dstmode = V8HImode; -- pinsr = gen_sse2_pinsrw; -- break; -+ case vector_stmt: -+ return ix86_vec_cost (mode, -+ fp ? ix86_cost->addss : ix86_cost->sse_op); - -- case E_SImode: -- if (!TARGET_SSE4_1) -- return false; -- dstmode = V4SImode; -- pinsr = gen_sse4_1_pinsrd; -- break; -+ case vector_load: -+ index = sse_store_index (mode); -+ /* See PR82713 - we may end up being called on non-vector type. */ -+ if (index < 0) -+ index = 2; -+ return COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2; - -- case E_DImode: -- gcc_assert (TARGET_64BIT); -- if (!TARGET_SSE4_1) -- return false; -- dstmode = V2DImode; -- pinsr = gen_sse4_1_pinsrq; -- break; -+ case vector_store: -+ index = sse_store_index (mode); -+ /* See PR82713 - we may end up being called on non-vector type. */ -+ if (index < 0) -+ index = 2; -+ return COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2; - -- default: -- return false; -- } -+ case vec_to_scalar: -+ case scalar_to_vec: -+ return ix86_vec_cost (mode, ix86_cost->sse_op); - -- /* Reject insertions to misaligned positions. */ -- if (pos & (size-1)) -- return false; -+ /* We should have separate costs for unaligned loads and gather/scatter. -+ Do that incrementally. */ -+ case unaligned_load: -+ index = sse_store_index (mode); -+ /* See PR82713 - we may end up being called on non-vector type. */ -+ if (index < 0) -+ index = 2; -+ return COSTS_N_INSNS (ix86_cost->sse_unaligned_load[index]) / 2; - -- if (SUBREG_P (src)) -- { -- unsigned int srcpos = SUBREG_BYTE (src); -+ case unaligned_store: -+ index = sse_store_index (mode); -+ /* See PR82713 - we may end up being called on non-vector type. */ -+ if (index < 0) -+ index = 2; -+ return COSTS_N_INSNS (ix86_cost->sse_unaligned_store[index]) / 2; - -- if (srcpos > 0) -- { -- rtx extr_ops[4]; -+ case vector_gather_load: -+ return ix86_vec_cost (mode, -+ COSTS_N_INSNS -+ (ix86_cost->gather_static -+ + ix86_cost->gather_per_elt -+ * TYPE_VECTOR_SUBPARTS (vectype)) / 2); - -- extr_ops[0] = gen_reg_rtx (srcmode); -- extr_ops[1] = gen_lowpart (srcmode, SUBREG_REG (src)); -- extr_ops[2] = GEN_INT (size); -- extr_ops[3] = GEN_INT (srcpos * BITS_PER_UNIT); -+ case vector_scatter_store: -+ return ix86_vec_cost (mode, -+ COSTS_N_INSNS -+ (ix86_cost->scatter_static -+ + ix86_cost->scatter_per_elt -+ * TYPE_VECTOR_SUBPARTS (vectype)) / 2); - -- if (!ix86_expand_pextr (extr_ops)) -- return false; -+ case cond_branch_taken: -+ return ix86_cost->cond_taken_branch_cost; - -- src = extr_ops[0]; -- } -- else -- src = gen_lowpart (srcmode, SUBREG_REG (src)); -- } -+ case cond_branch_not_taken: -+ return ix86_cost->cond_not_taken_branch_cost; - -- if (GET_MODE (dst) == dstmode) -- d = dst; -- else -- d = gen_reg_rtx (dstmode); -+ case vec_perm: -+ case vec_promote_demote: -+ return ix86_vec_cost (mode, ix86_cost->sse_op); - -- emit_insn (pinsr (d, gen_lowpart (dstmode, dst), -- gen_lowpart (srcmode, src), -- GEN_INT (1 << (pos / size)))); -- if (d != dst) -- emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d)); -- return true; -- } -+ case vec_construct: -+ { -+ /* N element inserts into SSE vectors. */ -+ int cost = TYPE_VECTOR_SUBPARTS (vectype) * ix86_cost->sse_op; -+ /* One vinserti128 for combining two SSE vectors for AVX256. */ -+ if (GET_MODE_BITSIZE (mode) == 256) -+ cost += ix86_vec_cost (mode, ix86_cost->addss); -+ /* One vinserti64x4 and two vinserti128 for combining SSE -+ and AVX256 vectors to AVX512. */ -+ else if (GET_MODE_BITSIZE (mode) == 512) -+ cost += 3 * ix86_vec_cost (mode, ix86_cost->addss); -+ return cost; -+ } - -- default: -- return false; -+ default: -+ gcc_unreachable (); - } - } -+ - - /* This function returns the calling abi specific va_list type node. - It returns the FNDECL specific va_list type. */ -@@ -50192,39 +21332,6 @@ ix86_preferred_simd_mode (scalar_mode mode) - } - } - --/* All CPUs prefer to avoid cross-lane operations so perform reductions -- upper against lower halves up to SSE reg size. */ -- --static machine_mode --ix86_split_reduction (machine_mode mode) --{ -- /* Reduce lowpart against highpart until we reach SSE reg width to -- avoid cross-lane operations. */ -- switch (mode) -- { -- case E_V8DImode: -- case E_V4DImode: -- return V2DImode; -- case E_V16SImode: -- case E_V8SImode: -- return V4SImode; -- case E_V32HImode: -- case E_V16HImode: -- return V8HImode; -- case E_V64QImode: -- case E_V32QImode: -- return V16QImode; -- case E_V16SFmode: -- case E_V8SFmode: -- return V4SFmode; -- case E_V8DFmode: -- case E_V4DFmode: -- return V2DFmode; -- default: -- return mode; -- } --} -- - /* If AVX is enabled then try vectorizing with both 256bit and 128bit - vectors. If AVX512F is enabled then try vectorizing with 512bit, - 256bit and 128bit vectors. */ -@@ -50596,13 +21703,15 @@ ix86_memmodel_check (unsigned HOST_WIDE_INT val) - if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong)) - { - warning (OPT_Winvalid_memory_model, -- "HLE_ACQUIRE not used with ACQUIRE or stronger memory model"); -+ "% not used with % or stronger " -+ "memory model"); - return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE; - } - if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong)) - { - warning (OPT_Winvalid_memory_model, -- "HLE_RELEASE not used with RELEASE or stronger memory model"); -+ "% not used with % or stronger " -+ "memory model"); - return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE; - } - return val; -@@ -50760,50 +21869,6 @@ ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, - return ret; - } - --/* Add target attribute to SIMD clone NODE if needed. */ -- --static void --ix86_simd_clone_adjust (struct cgraph_node *node) --{ -- const char *str = NULL; -- -- /* Attributes need to be adjusted for definitions, not declarations. */ -- if (!node->definition) -- return; -- -- gcc_assert (node->decl == cfun->decl); -- switch (node->simdclone->vecsize_mangle) -- { -- case 'b': -- if (!TARGET_SSE2) -- str = "sse2"; -- break; -- case 'c': -- if (!TARGET_AVX) -- str = "avx"; -- break; -- case 'd': -- if (!TARGET_AVX2) -- str = "avx2"; -- break; -- case 'e': -- if (!TARGET_AVX512F) -- str = "avx512f"; -- break; -- default: -- gcc_unreachable (); -- } -- if (str == NULL) -- return; -- push_cfun (NULL); -- tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str)); -- bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0); -- gcc_assert (ok); -- pop_cfun (); -- ix86_reset_previous_fndecl (); -- ix86_set_current_function (node->decl); --} -- - /* If SIMD clone NODE can't be used in a vectorized loop - in current function, return -1, otherwise return a badness of using it - (0 if it is most desirable from vecsize_mangle point of view, 1 -@@ -50912,10 +21977,10 @@ ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) - tree fenv_ptr = build_pointer_type (fenv_type); - tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var); - fenv_addr = fold_convert (ptr_type_node, fenv_addr); -- tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV]; -- tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV]; -- tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW]; -- tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX]; -+ tree fnstenv = get_ix86_builtin (IX86_BUILTIN_FNSTENV); -+ tree fldenv = get_ix86_builtin (IX86_BUILTIN_FLDENV); -+ tree fnstsw = get_ix86_builtin (IX86_BUILTIN_FNSTSW); -+ tree fnclex = get_ix86_builtin (IX86_BUILTIN_FNCLEX); - tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr); - tree hold_fnclex = build_call_expr (fnclex, 0); - fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv, -@@ -50939,8 +22004,8 @@ ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) - { - tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node); - tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node); -- tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR]; -- tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR]; -+ tree stmxcsr = get_ix86_builtin (IX86_BUILTIN_STMXCSR); -+ tree ldmxcsr = get_ix86_builtin (IX86_BUILTIN_LDMXCSR); - tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0); - tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node, - mxcsr_orig_var, stmxcsr_hold_call); -@@ -51183,22 +22248,6 @@ ix86_init_libfuncs (void) - #endif - } - --/* Generate call to __divmoddi4. */ -- --static void --ix86_expand_divmod_libfunc (rtx libfunc, machine_mode mode, -- rtx op0, rtx op1, -- rtx *quot_p, rtx *rem_p) --{ -- rtx rem = assign_386_stack_local (mode, SLOT_TEMP); -- -- rtx quot = emit_library_call_value (libfunc, NULL_RTX, LCT_NORMAL, -- mode, op0, mode, op1, mode, -- XEXP (rem, 0), Pmode); -- *quot_p = quot; -- *rem_p = rem; --} -- - /* Set the value of FLT_EVAL_METHOD in float.h. When using only the - FPU, assume that the fpcw is set to extended precision; when using - only SSE, rounding is correct; when using both SSE and the FPU, -@@ -51970,9 +23019,6 @@ ix86_run_selftests (void) - #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS - #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true - --#undef TARGET_SETUP_INCOMING_VARARG_BOUNDS --#define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds -- - #undef TARGET_OFFLOAD_OPTIONS - #define TARGET_OFFLOAD_OPTIONS \ - ix86_offload_options -diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h -index 14e5a392f..187e52a5b 100644 ---- a/gcc/config/i386/i386.h -+++ b/gcc/config/i386/i386.h -@@ -1891,7 +1891,7 @@ typedef struct ix86_args { - ? GET_MODE_SIZE (TImode) : UNITS_PER_WORD) - - /* If a memory-to-memory move would take MOVE_RATIO or more simple -- move-instruction pairs, we will do a movmem or libcall instead. -+ move-instruction pairs, we will do a cpymem or libcall instead. - Increasing the value will always make code faster, but eventually - incurs high cost in increased code size. - -@@ -2784,6 +2784,9 @@ struct GTY(()) machine_function { - /* During SEH output, this is non-null. */ - struct seh_frame_state * GTY((skip(""))) seh; - }; -+ -+extern GTY(()) tree sysv_va_list_type_node; -+extern GTY(()) tree ms_va_list_type_node; - #endif - - #define ix86_stack_locals (cfun->machine->stack_locals) -@@ -2881,6 +2884,12 @@ extern void debug_dispatch_window (int); - - #define TARGET_SUPPORTS_WIDE_INT 1 - -+#if !defined(GENERATOR_FILE) && !defined(IN_LIBGCC2) -+extern enum attr_cpu ix86_schedule; -+ -+#define NUM_X86_64_MS_CLOBBERED_REGS 12 -+#endif -+ - /* - Local variables: - version-control: t -diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md -index 698c31a0a..861248899 100644 ---- a/gcc/config/i386/i386.md -+++ b/gcc/config/i386/i386.md -@@ -16731,7 +16731,7 @@ - (set_attr "length_immediate" "0") - (set_attr "modrm" "0")]) - --(define_expand "movmem" -+(define_expand "cpymem" - [(use (match_operand:BLK 0 "memory_operand")) - (use (match_operand:BLK 1 "memory_operand")) - (use (match_operand:SWI48 2 "nonmemory_operand")) -@@ -16743,7 +16743,7 @@ - (use (match_operand:SI 8 ""))] - "" - { -- if (ix86_expand_set_or_movmem (operands[0], operands[1], -+ if (ix86_expand_set_or_cpymem (operands[0], operands[1], - operands[2], NULL, operands[3], - operands[4], operands[5], - operands[6], operands[7], -@@ -16958,7 +16958,7 @@ - (use (match_operand:SI 8 ""))] - "" - { -- if (ix86_expand_set_or_movmem (operands[0], NULL, -+ if (ix86_expand_set_or_cpymem (operands[0], NULL, - operands[1], operands[2], - operands[3], operands[4], - operands[5], operands[6], -diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md -index 865947deb..4135159ac 100644 ---- a/gcc/config/i386/predicates.md -+++ b/gcc/config/i386/predicates.md -@@ -683,7 +683,7 @@ - if (GET_CODE (op) == PLUS && REG_P (XEXP (op, 0))) - { - int regno = REGNO (XEXP (op, 0)); -- if (!HARD_REGISTER_NUM_P (regno) || call_used_regs[regno]) -+ if (!HARD_REGISTER_NUM_P (regno) || call_used_or_fixed_reg_p (regno)) - { - op = XEXP (op, 1); - if (GOT32_symbol_operand (op, VOIDmode)) -diff --git a/gcc/config/i386/t-i386 b/gcc/config/i386/t-i386 -index 0dac80fbc..50caf2c69 100644 ---- a/gcc/config/i386/t-i386 -+++ b/gcc/config/i386/t-i386 -@@ -44,6 +44,22 @@ i386-d.o: $(srcdir)/config/i386/i386-d.c - $(COMPILE) $< - $(POSTCOMPILE) - -+i386-options.o: $(srcdir)/config/i386/i386-options.c -+ $(COMPILE) $< -+ $(POSTCOMPILE) -+ -+i386-builtins.o: $(srcdir)/config/i386/i386-builtins.c -+ $(COMPILE) $< -+ $(POSTCOMPILE) -+ -+i386-expand.o: $(srcdir)/config/i386/i386-expand.c -+ $(COMPILE) $< -+ $(POSTCOMPILE) -+ -+i386-features.o: $(srcdir)/config/i386/i386-features.c -+ $(COMPILE) $< -+ $(POSTCOMPILE) -+ - i386.o: i386-builtin-types.inc - - i386-builtin-types.inc: s-i386-bt ; @true -diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c -index e8d905e22..d09e49637 100644 ---- a/gcc/config/ia64/ia64.c -+++ b/gcc/config/ia64/ia64.c -@@ -5147,7 +5147,7 @@ ia64_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, - gimple_seq *post_p) - { - /* Variable sized types are passed by reference. */ -- if (pass_by_reference (NULL, TYPE_MODE (type), type, false)) -+ if (pass_va_arg_by_reference (type)) - { - tree ptrtype = build_pointer_type (type); - tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p); -diff --git a/gcc/config/lm32/lm32.md b/gcc/config/lm32/lm32.md -index c09052c62..91a5fe1e0 100644 ---- a/gcc/config/lm32/lm32.md -+++ b/gcc/config/lm32/lm32.md -@@ -216,7 +216,7 @@ - } - }") - --(define_expand "movmemsi" -+(define_expand "cpymemsi" - [(parallel [(set (match_operand:BLK 0 "general_operand" "") - (match_operand:BLK 1 "general_operand" "")) - (use (match_operand:SI 2 "" "")) -diff --git a/gcc/config/m32c/blkmov.md b/gcc/config/m32c/blkmov.md -index d7da439c2..e5cdc801f 100644 ---- a/gcc/config/m32c/blkmov.md -+++ b/gcc/config/m32c/blkmov.md -@@ -40,14 +40,14 @@ - ;; 1 = source (mem:BLK ...) - ;; 2 = count - ;; 3 = alignment --(define_expand "movmemhi" -+(define_expand "cpymemhi" - [(match_operand 0 "ap_operand" "") - (match_operand 1 "ap_operand" "") - (match_operand 2 "m32c_r3_operand" "") - (match_operand 3 "" "") - ] - "" -- "if (m32c_expand_movmemhi(operands)) DONE; FAIL;" -+ "if (m32c_expand_cpymemhi(operands)) DONE; FAIL;" - ) - - ;; We can't use mode iterators for these because M16C uses r1h to extend -@@ -60,7 +60,7 @@ - ;; 3 = dest (in) - ;; 4 = src (in) - ;; 5 = count (in) --(define_insn "movmemhi_bhi_op" -+(define_insn "cpymemhi_bhi_op" - [(set (mem:QI (match_operand:HI 3 "ap_operand" "0")) - (mem:QI (match_operand:HI 4 "ap_operand" "1"))) - (set (match_operand:HI 2 "m32c_r3_operand" "=R3w") -@@ -75,7 +75,7 @@ - "TARGET_A16" - "mov.b:q\t#0,r1h\n\tsmovf.b\t; %0[0..%2-1]=r1h%1[]" - ) --(define_insn "movmemhi_bpsi_op" -+(define_insn "cpymemhi_bpsi_op" - [(set (mem:QI (match_operand:PSI 3 "ap_operand" "0")) - (mem:QI (match_operand:PSI 4 "ap_operand" "1"))) - (set (match_operand:HI 2 "m32c_r3_operand" "=R3w") -@@ -89,7 +89,7 @@ - "TARGET_A24" - "smovf.b\t; %0[0..%2-1]=%1[]" - ) --(define_insn "movmemhi_whi_op" -+(define_insn "cpymemhi_whi_op" - [(set (mem:HI (match_operand:HI 3 "ap_operand" "0")) - (mem:HI (match_operand:HI 4 "ap_operand" "1"))) - (set (match_operand:HI 2 "m32c_r3_operand" "=R3w") -@@ -104,7 +104,7 @@ - "TARGET_A16" - "mov.b:q\t#0,r1h\n\tsmovf.w\t; %0[0..%2-1]=r1h%1[]" - ) --(define_insn "movmemhi_wpsi_op" -+(define_insn "cpymemhi_wpsi_op" - [(set (mem:HI (match_operand:PSI 3 "ap_operand" "0")) - (mem:HI (match_operand:PSI 4 "ap_operand" "1"))) - (set (match_operand:HI 2 "m32c_r3_operand" "=R3w") -diff --git a/gcc/config/m32c/m32c-protos.h b/gcc/config/m32c/m32c-protos.h -index 7d4d478fd..fe926fd50 100644 ---- a/gcc/config/m32c/m32c-protos.h -+++ b/gcc/config/m32c/m32c-protos.h -@@ -43,7 +43,7 @@ void m32c_emit_eh_epilogue (rtx); - int m32c_expand_cmpstr (rtx *); - int m32c_expand_insv (rtx *); - int m32c_expand_movcc (rtx *); --int m32c_expand_movmemhi (rtx *); -+int m32c_expand_cpymemhi (rtx *); - int m32c_expand_movstr (rtx *); - void m32c_expand_neg_mulpsi3 (rtx *); - int m32c_expand_setmemhi (rtx *); -diff --git a/gcc/config/m32c/m32c.c b/gcc/config/m32c/m32c.c -index 1a0d0c681..d0d24bb5f 100644 ---- a/gcc/config/m32c/m32c.c -+++ b/gcc/config/m32c/m32c.c -@@ -3592,7 +3592,7 @@ m32c_expand_setmemhi(rtx *operands) - addresses, not [mem] syntax. $0 is the destination (MEM:BLK), $1 - is the source (MEM:BLK), and $2 the count (HI). */ - int --m32c_expand_movmemhi(rtx *operands) -+m32c_expand_cpymemhi(rtx *operands) - { - rtx desta, srca, count; - rtx desto, srco, counto; -@@ -3620,9 +3620,9 @@ m32c_expand_movmemhi(rtx *operands) - { - count = copy_to_mode_reg (HImode, GEN_INT (INTVAL (count) / 2)); - if (TARGET_A16) -- emit_insn (gen_movmemhi_whi_op (desto, srco, counto, desta, srca, count)); -+ emit_insn (gen_cpymemhi_whi_op (desto, srco, counto, desta, srca, count)); - else -- emit_insn (gen_movmemhi_wpsi_op (desto, srco, counto, desta, srca, count)); -+ emit_insn (gen_cpymemhi_wpsi_op (desto, srco, counto, desta, srca, count)); - return 1; - } - -@@ -3632,9 +3632,9 @@ m32c_expand_movmemhi(rtx *operands) - count = copy_to_mode_reg (HImode, count); - - if (TARGET_A16) -- emit_insn (gen_movmemhi_bhi_op (desto, srco, counto, desta, srca, count)); -+ emit_insn (gen_cpymemhi_bhi_op (desto, srco, counto, desta, srca, count)); - else -- emit_insn (gen_movmemhi_bpsi_op (desto, srco, counto, desta, srca, count)); -+ emit_insn (gen_cpymemhi_bpsi_op (desto, srco, counto, desta, srca, count)); - - return 1; - } -diff --git a/gcc/config/m32r/m32r.c b/gcc/config/m32r/m32r.c -index 6e79b2aec..ac18aa286 100644 ---- a/gcc/config/m32r/m32r.c -+++ b/gcc/config/m32r/m32r.c -@@ -2598,7 +2598,7 @@ m32r_expand_block_move (rtx operands[]) - to the word after the end of the source block, and dst_reg to point - to the last word of the destination block, provided that the block - is MAX_MOVE_BYTES long. */ -- emit_insn (gen_movmemsi_internal (dst_reg, src_reg, at_a_time, -+ emit_insn (gen_cpymemsi_internal (dst_reg, src_reg, at_a_time, - new_dst_reg, new_src_reg)); - emit_move_insn (dst_reg, new_dst_reg); - emit_move_insn (src_reg, new_src_reg); -@@ -2612,7 +2612,7 @@ m32r_expand_block_move (rtx operands[]) - } - - if (leftover) -- emit_insn (gen_movmemsi_internal (dst_reg, src_reg, GEN_INT (leftover), -+ emit_insn (gen_cpymemsi_internal (dst_reg, src_reg, GEN_INT (leftover), - gen_reg_rtx (SImode), - gen_reg_rtx (SImode))); - return 1; -diff --git a/gcc/config/m32r/m32r.md b/gcc/config/m32r/m32r.md -index be5739763..e944363fd 100644 ---- a/gcc/config/m32r/m32r.md -+++ b/gcc/config/m32r/m32r.md -@@ -2195,7 +2195,7 @@ - ;; Argument 2 is the length - ;; Argument 3 is the alignment - --(define_expand "movmemsi" -+(define_expand "cpymemsi" - [(parallel [(set (match_operand:BLK 0 "general_operand" "") - (match_operand:BLK 1 "general_operand" "")) - (use (match_operand:SI 2 "immediate_operand" "")) -@@ -2214,7 +2214,7 @@ - - ;; Insn generated by block moves - --(define_insn "movmemsi_internal" -+(define_insn "cpymemsi_internal" - [(set (mem:BLK (match_operand:SI 0 "register_operand" "r")) ;; destination - (mem:BLK (match_operand:SI 1 "register_operand" "r"))) ;; source - (use (match_operand:SI 2 "m32r_block_immediate_operand" "J"));; # bytes to move -diff --git a/gcc/config/mcore/mcore.md b/gcc/config/mcore/mcore.md -index cc84e342b..c6893518d 100644 ---- a/gcc/config/mcore/mcore.md -+++ b/gcc/config/mcore/mcore.md -@@ -2552,7 +2552,7 @@ - ;; Block move - adapted from m88k.md - ;; ------------------------------------------------------------------------ - --(define_expand "movmemsi" -+(define_expand "cpymemsi" - [(parallel [(set (mem:BLK (match_operand:BLK 0 "" "")) - (mem:BLK (match_operand:BLK 1 "" ""))) - (use (match_operand:SI 2 "general_operand" "")) -diff --git a/gcc/config/microblaze/microblaze.c b/gcc/config/microblaze/microblaze.c -index 55c1becf9..07dd0bc6f 100644 ---- a/gcc/config/microblaze/microblaze.c -+++ b/gcc/config/microblaze/microblaze.c -@@ -1250,7 +1250,7 @@ microblaze_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length) - microblaze_block_move_straight (dest, src, leftover); - } - --/* Expand a movmemsi instruction. */ -+/* Expand a cpymemsi instruction. */ - - bool - microblaze_expand_block_move (rtx dest, rtx src, rtx length, rtx align_rtx) -diff --git a/gcc/config/microblaze/microblaze.md b/gcc/config/microblaze/microblaze.md -index 183afff37..1509e4318 100644 ---- a/gcc/config/microblaze/microblaze.md -+++ b/gcc/config/microblaze/microblaze.md -@@ -1144,7 +1144,7 @@ - ;; Argument 2 is the length - ;; Argument 3 is the alignment - --(define_expand "movmemsi" -+(define_expand "cpymemsi" - [(parallel [(set (match_operand:BLK 0 "general_operand") - (match_operand:BLK 1 "general_operand")) - (use (match_operand:SI 2 "")) -diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c -index 100894720..3c95636bf 100644 ---- a/gcc/config/mips/mips.c -+++ b/gcc/config/mips/mips.c -@@ -6780,7 +6780,7 @@ mips_std_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, - unsigned HOST_WIDE_INT align, boundary; - bool indirect; - -- indirect = pass_by_reference (NULL, TYPE_MODE (type), type, false); -+ indirect = pass_va_arg_by_reference (type); - if (indirect) - type = build_pointer_type (type); - -@@ -6867,7 +6867,7 @@ mips_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, - tree addr; - bool indirect_p; - -- indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, 0); -+ indirect_p = pass_va_arg_by_reference (type); - if (indirect_p) - type = build_pointer_type (type); - -@@ -7938,15 +7938,15 @@ mips_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size, - { - if (op == STORE_BY_PIECES) - return mips_store_by_pieces_p (size, align); -- if (op == MOVE_BY_PIECES && HAVE_movmemsi) -+ if (op == MOVE_BY_PIECES && HAVE_cpymemsi) - { -- /* movmemsi is meant to generate code that is at least as good as -- move_by_pieces. However, movmemsi effectively uses a by-pieces -+ /* cpymemsi is meant to generate code that is at least as good as -+ move_by_pieces. However, cpymemsi effectively uses a by-pieces - implementation both for moves smaller than a word and for - word-aligned moves of no more than MIPS_MAX_MOVE_BYTES_STRAIGHT - bytes. We should allow the tree-level optimisers to do such - moves by pieces, as it often exposes other optimization -- opportunities. We might as well continue to use movmemsi at -+ opportunities. We might as well continue to use cpymemsi at - the rtl level though, as it produces better code when - scheduling is disabled (such as at -O). */ - if (currently_expanding_to_rtl) -@@ -8165,7 +8165,7 @@ mips_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length, - emit_insn (gen_nop ()); - } - --/* Expand a movmemsi instruction, which copies LENGTH bytes from -+/* Expand a cpymemsi instruction, which copies LENGTH bytes from - memory reference SRC to memory reference DEST. */ - - bool -diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h -index 953d82e85..a5be7fa39 100644 ---- a/gcc/config/mips/mips.h -+++ b/gcc/config/mips/mips.h -@@ -3099,12 +3099,12 @@ while (0) - #define MIPS_MIN_MOVE_MEM_ALIGN 16 - - /* The maximum number of bytes that can be copied by one iteration of -- a movmemsi loop; see mips_block_move_loop. */ -+ a cpymemsi loop; see mips_block_move_loop. */ - #define MIPS_MAX_MOVE_BYTES_PER_LOOP_ITER \ - (UNITS_PER_WORD * 4) - - /* The maximum number of bytes that can be copied by a straight-line -- implementation of movmemsi; see mips_block_move_straight. We want -+ implementation of cpymemsi; see mips_block_move_straight. We want - to make sure that any loop-based implementation will iterate at - least twice. */ - #define MIPS_MAX_MOVE_BYTES_STRAIGHT \ -@@ -3119,11 +3119,11 @@ while (0) - - #define MIPS_CALL_RATIO 8 - --/* Any loop-based implementation of movmemsi will have at least -+/* Any loop-based implementation of cpymemsi will have at least - MIPS_MAX_MOVE_BYTES_STRAIGHT / UNITS_PER_WORD memory-to-memory - moves, so allow individual copies of fewer elements. - -- When movmemsi is not available, use a value approximating -+ When cpymemsi is not available, use a value approximating - the length of a memcpy call sequence, so that move_by_pieces - will generate inline code if it is shorter than a function call. - Since move_by_pieces_ninsns counts memory-to-memory moves, but -@@ -3131,7 +3131,7 @@ while (0) - value of MIPS_CALL_RATIO to take that into account. */ - - #define MOVE_RATIO(speed) \ -- (HAVE_movmemsi \ -+ (HAVE_cpymemsi \ - ? MIPS_MAX_MOVE_BYTES_STRAIGHT / MOVE_MAX \ - : MIPS_CALL_RATIO / 2) - -diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md -index 3cfb1a751..a9abb6fdd 100644 ---- a/gcc/config/mips/mips.md -+++ b/gcc/config/mips/mips.md -@@ -5638,7 +5638,7 @@ - ;; Argument 2 is the length - ;; Argument 3 is the alignment - --(define_expand "movmemsi" -+(define_expand "cpymemsi" - [(parallel [(set (match_operand:BLK 0 "general_operand") - (match_operand:BLK 1 "general_operand")) - (use (match_operand:SI 2 "")) -diff --git a/gcc/config/msp430/msp430.c b/gcc/config/msp430/msp430.c -index 020e980b8..3ce649648 100644 ---- a/gcc/config/msp430/msp430.c -+++ b/gcc/config/msp430/msp430.c -@@ -1457,7 +1457,7 @@ msp430_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, - unsigned HOST_WIDE_INT align, boundary; - bool indirect; - -- indirect = pass_by_reference (NULL, TYPE_MODE (type), type, false); -+ indirect = pass_va_arg_by_reference (type); - if (indirect) - type = build_pointer_type (type); - -diff --git a/gcc/config/nds32/nds32-memory-manipulation.c b/gcc/config/nds32/nds32-memory-manipulation.c -index 71b75dca5..b3f2cd698 100644 ---- a/gcc/config/nds32/nds32-memory-manipulation.c -+++ b/gcc/config/nds32/nds32-memory-manipulation.c -@@ -1,4 +1,4 @@ --/* Auxiliary functions for expand movmem, setmem, cmpmem, load_multiple -+/* Auxiliary functions for expand cpymem, setmem, cmpmem, load_multiple - and store_multiple pattern of Andes NDS32 cpu for GNU compiler - Copyright (C) 2012-2019 Free Software Foundation, Inc. - Contributed by Andes Technology Corporation. -@@ -120,14 +120,14 @@ nds32_emit_mem_move_block (int base_regno, int count, - - /* ------------------------------------------------------------------------ */ - --/* Auxiliary function for expand movmem pattern. */ -+/* Auxiliary function for expand cpymem pattern. */ - - static bool --nds32_expand_movmemsi_loop_unknown_size (rtx dstmem, rtx srcmem, -+nds32_expand_cpymemsi_loop_unknown_size (rtx dstmem, rtx srcmem, - rtx size, - rtx alignment) - { -- /* Emit loop version of movmem. -+ /* Emit loop version of cpymem. - - andi $size_least_3_bit, $size, #~7 - add $dst_end, $dst, $size -@@ -254,7 +254,7 @@ nds32_expand_movmemsi_loop_unknown_size (rtx dstmem, rtx srcmem, - } - - static bool --nds32_expand_movmemsi_loop_known_size (rtx dstmem, rtx srcmem, -+nds32_expand_cpymemsi_loop_known_size (rtx dstmem, rtx srcmem, - rtx size, rtx alignment) - { - rtx dst_base_reg, src_base_reg; -@@ -288,7 +288,7 @@ nds32_expand_movmemsi_loop_known_size (rtx dstmem, rtx srcmem, - - if (total_bytes < 8) - { -- /* Emit total_bytes less than 8 loop version of movmem. -+ /* Emit total_bytes less than 8 loop version of cpymem. - add $dst_end, $dst, $size - move $dst_itr, $dst - .Lbyte_mode_loop: -@@ -321,7 +321,7 @@ nds32_expand_movmemsi_loop_known_size (rtx dstmem, rtx srcmem, - } - else if (total_bytes % 8 == 0) - { -- /* Emit multiple of 8 loop version of movmem. -+ /* Emit multiple of 8 loop version of cpymem. - - add $dst_end, $dst, $size - move $dst_itr, $dst -@@ -370,7 +370,7 @@ nds32_expand_movmemsi_loop_known_size (rtx dstmem, rtx srcmem, - else - { - /* Handle size greater than 8, and not a multiple of 8. */ -- return nds32_expand_movmemsi_loop_unknown_size (dstmem, srcmem, -+ return nds32_expand_cpymemsi_loop_unknown_size (dstmem, srcmem, - size, alignment); - } - -@@ -378,19 +378,19 @@ nds32_expand_movmemsi_loop_known_size (rtx dstmem, rtx srcmem, - } - - static bool --nds32_expand_movmemsi_loop (rtx dstmem, rtx srcmem, -+nds32_expand_cpymemsi_loop (rtx dstmem, rtx srcmem, - rtx size, rtx alignment) - { - if (CONST_INT_P (size)) -- return nds32_expand_movmemsi_loop_known_size (dstmem, srcmem, -+ return nds32_expand_cpymemsi_loop_known_size (dstmem, srcmem, - size, alignment); - else -- return nds32_expand_movmemsi_loop_unknown_size (dstmem, srcmem, -+ return nds32_expand_cpymemsi_loop_unknown_size (dstmem, srcmem, - size, alignment); - } - - static bool --nds32_expand_movmemsi_unroll (rtx dstmem, rtx srcmem, -+nds32_expand_cpymemsi_unroll (rtx dstmem, rtx srcmem, - rtx total_bytes, rtx alignment) - { - rtx dst_base_reg, src_base_reg; -@@ -533,13 +533,13 @@ nds32_expand_movmemsi_unroll (rtx dstmem, rtx srcmem, - This is auxiliary extern function to help create rtx template. - Check nds32-multiple.md file for the patterns. */ - bool --nds32_expand_movmemsi (rtx dstmem, rtx srcmem, rtx total_bytes, rtx alignment) -+nds32_expand_cpymemsi (rtx dstmem, rtx srcmem, rtx total_bytes, rtx alignment) - { -- if (nds32_expand_movmemsi_unroll (dstmem, srcmem, total_bytes, alignment)) -+ if (nds32_expand_cpymemsi_unroll (dstmem, srcmem, total_bytes, alignment)) - return true; - - if (!optimize_size && optimize > 2) -- return nds32_expand_movmemsi_loop (dstmem, srcmem, total_bytes, alignment); -+ return nds32_expand_cpymemsi_loop (dstmem, srcmem, total_bytes, alignment); - - return false; - } -diff --git a/gcc/config/nds32/nds32-multiple.md b/gcc/config/nds32/nds32-multiple.md -index a1e10c055..98d9508c0 100644 ---- a/gcc/config/nds32/nds32-multiple.md -+++ b/gcc/config/nds32/nds32-multiple.md -@@ -3751,14 +3751,14 @@ - ;; operands[3] is the known shared alignment. - - --(define_expand "movmemsi" -+(define_expand "cpymemsi" - [(match_operand:BLK 0 "general_operand" "") - (match_operand:BLK 1 "general_operand" "") - (match_operand:SI 2 "nds32_reg_constant_operand" "") - (match_operand:SI 3 "const_int_operand" "")] - "" - { -- if (nds32_expand_movmemsi (operands[0], -+ if (nds32_expand_cpymemsi (operands[0], - operands[1], - operands[2], - operands[3])) -diff --git a/gcc/config/nds32/nds32-protos.h b/gcc/config/nds32/nds32-protos.h -index aaa65d6f0..7ae1954d0 100644 ---- a/gcc/config/nds32/nds32-protos.h -+++ b/gcc/config/nds32/nds32-protos.h -@@ -78,7 +78,7 @@ extern rtx nds32_di_low_part_subreg(rtx); - - extern rtx nds32_expand_load_multiple (int, int, rtx, rtx, bool, rtx *); - extern rtx nds32_expand_store_multiple (int, int, rtx, rtx, bool, rtx *); --extern bool nds32_expand_movmemsi (rtx, rtx, rtx, rtx); -+extern bool nds32_expand_cpymemsi (rtx, rtx, rtx, rtx); - extern bool nds32_expand_setmem (rtx, rtx, rtx, rtx, rtx, rtx); - extern bool nds32_expand_strlen (rtx, rtx, rtx, rtx); - -diff --git a/gcc/config/pa/pa.c b/gcc/config/pa/pa.c -index 84a8cae22..73109c6f9 100644 ---- a/gcc/config/pa/pa.c -+++ b/gcc/config/pa/pa.c -@@ -107,7 +107,7 @@ static int pa_can_combine_p (rtx_insn *, rtx_insn *, rtx_insn *, int, rtx, - static bool forward_branch_p (rtx_insn *); - static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *); - static void compute_zdepdi_operands (unsigned HOST_WIDE_INT, unsigned *); --static int compute_movmem_length (rtx_insn *); -+static int compute_cpymem_length (rtx_insn *); - static int compute_clrmem_length (rtx_insn *); - static bool pa_assemble_integer (rtx, unsigned int, int); - static void remove_useless_addtr_insns (int); -@@ -2986,7 +2986,7 @@ pa_output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED) - count insns rather than emit them. */ - - static int --compute_movmem_length (rtx_insn *insn) -+compute_cpymem_length (rtx_insn *insn) - { - rtx pat = PATTERN (insn); - unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0)); -@@ -5061,7 +5061,7 @@ pa_adjust_insn_length (rtx_insn *insn, int length) - && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM - && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode - && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode) -- length += compute_movmem_length (insn) - 4; -+ length += compute_cpymem_length (insn) - 4; - /* Block clear pattern. */ - else if (NONJUMP_INSN_P (insn) - && GET_CODE (pat) == PARALLEL -@@ -6378,7 +6378,7 @@ hppa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, - unsigned int size, ofs; - bool indirect; - -- indirect = pass_by_reference (NULL, TYPE_MODE (type), type, 0); -+ indirect = pass_va_arg_by_reference (type); - if (indirect) - { - type = ptr; -diff --git a/gcc/config/pa/pa.md b/gcc/config/pa/pa.md -index 18f8e127d..a37989032 100644 ---- a/gcc/config/pa/pa.md -+++ b/gcc/config/pa/pa.md -@@ -3162,9 +3162,9 @@ - - ;; The definition of this insn does not really explain what it does, - ;; but it should suffice that anything generated as this insn will be --;; recognized as a movmemsi operation, and that it will not successfully -+;; recognized as a cpymemsi operation, and that it will not successfully - ;; combine with anything. --(define_expand "movmemsi" -+(define_expand "cpymemsi" - [(parallel [(set (match_operand:BLK 0 "" "") - (match_operand:BLK 1 "" "")) - (clobber (match_dup 4)) -@@ -3244,7 +3244,7 @@ - ;; operands 0 and 1 are both equivalent to symbolic MEMs. Thus, we are - ;; forced to internally copy operands 0 and 1 to operands 7 and 8, - ;; respectively. We then split or peephole optimize after reload. --(define_insn "movmemsi_prereload" -+(define_insn "cpymemsi_prereload" - [(set (mem:BLK (match_operand:SI 0 "register_operand" "r,r")) - (mem:BLK (match_operand:SI 1 "register_operand" "r,r"))) - (clobber (match_operand:SI 2 "register_operand" "=&r,&r")) ;loop cnt/tmp -@@ -3337,7 +3337,7 @@ - } - }") - --(define_insn "movmemsi_postreload" -+(define_insn "cpymemsi_postreload" - [(set (mem:BLK (match_operand:SI 0 "register_operand" "+r,r")) - (mem:BLK (match_operand:SI 1 "register_operand" "+r,r"))) - (clobber (match_operand:SI 2 "register_operand" "=&r,&r")) ;loop cnt/tmp -@@ -3352,7 +3352,7 @@ - "* return pa_output_block_move (operands, !which_alternative);" - [(set_attr "type" "multi,multi")]) - --(define_expand "movmemdi" -+(define_expand "cpymemdi" - [(parallel [(set (match_operand:BLK 0 "" "") - (match_operand:BLK 1 "" "")) - (clobber (match_dup 4)) -@@ -3432,7 +3432,7 @@ - ;; operands 0 and 1 are both equivalent to symbolic MEMs. Thus, we are - ;; forced to internally copy operands 0 and 1 to operands 7 and 8, - ;; respectively. We then split or peephole optimize after reload. --(define_insn "movmemdi_prereload" -+(define_insn "cpymemdi_prereload" - [(set (mem:BLK (match_operand:DI 0 "register_operand" "r,r")) - (mem:BLK (match_operand:DI 1 "register_operand" "r,r"))) - (clobber (match_operand:DI 2 "register_operand" "=&r,&r")) ;loop cnt/tmp -@@ -3525,7 +3525,7 @@ - } - }") - --(define_insn "movmemdi_postreload" -+(define_insn "cpymemdi_postreload" - [(set (mem:BLK (match_operand:DI 0 "register_operand" "+r,r")) - (mem:BLK (match_operand:DI 1 "register_operand" "+r,r"))) - (clobber (match_operand:DI 2 "register_operand" "=&r,&r")) ;loop cnt/tmp -diff --git a/gcc/config/pdp11/pdp11.md b/gcc/config/pdp11/pdp11.md -index ce781db06..be5ddc4c3 100644 ---- a/gcc/config/pdp11/pdp11.md -+++ b/gcc/config/pdp11/pdp11.md -@@ -26,7 +26,7 @@ - UNSPECV_BLOCKAGE - UNSPECV_SETD - UNSPECV_SETI -- UNSPECV_MOVMEM -+ UNSPECV_CPYMEM - ]) - - (define_constants -@@ -664,8 +664,8 @@ - [(set_attr "length" "2,2,4,4,2")]) - - ;; Expand a block move. We turn this into a move loop. --(define_expand "movmemhi" -- [(parallel [(unspec_volatile [(const_int 0)] UNSPECV_MOVMEM) -+(define_expand "cpymemhi" -+ [(parallel [(unspec_volatile [(const_int 0)] UNSPECV_CPYMEM) - (match_operand:BLK 0 "general_operand" "=g") - (match_operand:BLK 1 "general_operand" "g") - (match_operand:HI 2 "immediate_operand" "i") -@@ -694,8 +694,8 @@ - }") - - ;; Expand a block move. We turn this into a move loop. --(define_insn_and_split "movmemhi1" -- [(unspec_volatile [(const_int 0)] UNSPECV_MOVMEM) -+(define_insn_and_split "cpymemhi1" -+ [(unspec_volatile [(const_int 0)] UNSPECV_CPYMEM) - (match_operand:HI 0 "register_operand" "+r") - (match_operand:HI 1 "register_operand" "+r") - (match_operand:HI 2 "register_operand" "+r") -@@ -707,7 +707,7 @@ - "" - "#" - "reload_completed" -- [(parallel [(unspec_volatile [(const_int 0)] UNSPECV_MOVMEM) -+ [(parallel [(unspec_volatile [(const_int 0)] UNSPECV_CPYMEM) - (match_dup 0) - (match_dup 1) - (match_dup 2) -@@ -719,8 +719,8 @@ - (clobber (reg:CC CC_REGNUM))])] - "") - --(define_insn "movmemhi_nocc" -- [(unspec_volatile [(const_int 0)] UNSPECV_MOVMEM) -+(define_insn "cpymemhi_nocc" -+ [(unspec_volatile [(const_int 0)] UNSPECV_CPYMEM) - (match_operand:HI 0 "register_operand" "+r") - (match_operand:HI 1 "register_operand" "+r") - (match_operand:HI 2 "register_operand" "+r") -diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c -index b3297a381..49383d857 100644 ---- a/gcc/config/riscv/riscv.c -+++ b/gcc/config/riscv/riscv.c -@@ -3024,7 +3024,7 @@ riscv_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length, - emit_insn(gen_nop ()); - } - --/* Expand a movmemsi instruction, which copies LENGTH bytes from -+/* Expand a cpymemsi instruction, which copies LENGTH bytes from - memory reference SRC to memory reference DEST. */ - - bool -diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h -index 5130dc826..7e3612641 100644 ---- a/gcc/config/riscv/riscv.h -+++ b/gcc/config/riscv/riscv.h -@@ -829,20 +829,20 @@ while (0) - #undef PTRDIFF_TYPE - #define PTRDIFF_TYPE (POINTER_SIZE == 64 ? "long int" : "int") - --/* The maximum number of bytes copied by one iteration of a movmemsi loop. */ -+/* The maximum number of bytes copied by one iteration of a cpymemsi loop. */ - - #define RISCV_MAX_MOVE_BYTES_PER_LOOP_ITER (UNITS_PER_WORD * 4) - - /* The maximum number of bytes that can be copied by a straight-line -- movmemsi implementation. */ -+ cpymemsi implementation. */ - - #define RISCV_MAX_MOVE_BYTES_STRAIGHT (RISCV_MAX_MOVE_BYTES_PER_LOOP_ITER * 3) - - /* If a memory-to-memory move would take MOVE_RATIO or more simple -- move-instruction pairs, we will do a movmem or libcall instead. -+ move-instruction pairs, we will do a cpymem or libcall instead. - Do not use move_by_pieces at all when strict alignment is not - in effect but the target has slow unaligned accesses; in this -- case, movmem or libcall is more efficient. */ -+ case, cpymem or libcall is more efficient. */ - - #define MOVE_RATIO(speed) \ - (!STRICT_ALIGNMENT && riscv_slow_unaligned_access_p ? 1 : \ -diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md -index e40535c9e..cfb5fdd6a 100644 ---- a/gcc/config/riscv/riscv.md -+++ b/gcc/config/riscv/riscv.md -@@ -1503,7 +1503,7 @@ - DONE; - }) - --(define_expand "movmemsi" -+(define_expand "cpymemsi" - [(parallel [(set (match_operand:BLK 0 "general_operand") - (match_operand:BLK 1 "general_operand")) - (use (match_operand:SI 2 "")) -diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c -index 8f046de42..ee07aa9df 100644 ---- a/gcc/config/rs6000/rs6000.c -+++ b/gcc/config/rs6000/rs6000.c -@@ -33472,7 +33472,7 @@ get_prev_label (tree function_name) - return NULL_TREE; - } - --/* Generate PIC and indirect symbol stubs. */ -+/* Generate external symbol indirection stubs (PIC and non-PIC). */ - - void - machopic_output_stub (FILE *file, const char *symb, const char *stub) -@@ -38392,7 +38392,8 @@ rs6000_call_darwin_1 (rtx value, rtx func_desc, rtx tlsarg, - if ((cookie_val & CALL_LONG) != 0 - && GET_CODE (func_desc) == SYMBOL_REF) - { -- /* FIXME: the longcall opt should not hang off picsymbol stubs. */ -+ /* FIXME: the longcall opt should not hang off this flag, it is most -+ likely incorrect for kernel-mode code-generation. */ - if (darwin_symbol_stubs && TARGET_32BIT) - make_island = true; /* Do nothing yet, retain the CALL_LONG flag. */ - else -diff --git a/gcc/config/rx/rx.md b/gcc/config/rx/rx.md -index 2790882c9..9df73e6ef 100644 ---- a/gcc/config/rx/rx.md -+++ b/gcc/config/rx/rx.md -@@ -46,7 +46,7 @@ - (UNSPEC_CONST 13) - - (UNSPEC_MOVSTR 20) -- (UNSPEC_MOVMEM 21) -+ (UNSPEC_CPYMEM 21) - (UNSPEC_SETMEM 22) - (UNSPEC_STRLEN 23) - (UNSPEC_CMPSTRN 24) -@@ -2449,13 +2449,13 @@ - (set_attr "timings" "1111")] ;; The timing is a guesstimate. - ) - --(define_expand "movmemsi" -+(define_expand "cpymemsi" - [(parallel - [(set (match_operand:BLK 0 "memory_operand") ;; Dest - (match_operand:BLK 1 "memory_operand")) ;; Source - (use (match_operand:SI 2 "register_operand")) ;; Length in bytes - (match_operand 3 "immediate_operand") ;; Align -- (unspec_volatile:BLK [(reg:SI 1) (reg:SI 2) (reg:SI 3)] UNSPEC_MOVMEM)] -+ (unspec_volatile:BLK [(reg:SI 1) (reg:SI 2) (reg:SI 3)] UNSPEC_CPYMEM)] - )] - "rx_allow_string_insns" - { -@@ -2486,16 +2486,16 @@ - emit_move_insn (len, force_operand (operands[2], NULL_RTX)); - operands[0] = replace_equiv_address_nv (operands[0], addr1); - operands[1] = replace_equiv_address_nv (operands[1], addr2); -- emit_insn (gen_rx_movmem ()); -+ emit_insn (gen_rx_cpymem ()); - DONE; - } - ) - --(define_insn "rx_movmem" -+(define_insn "rx_cpymem" - [(set (mem:BLK (reg:SI 1)) - (mem:BLK (reg:SI 2))) - (use (reg:SI 3)) -- (unspec_volatile:BLK [(reg:SI 1) (reg:SI 2) (reg:SI 3)] UNSPEC_MOVMEM) -+ (unspec_volatile:BLK [(reg:SI 1) (reg:SI 2) (reg:SI 3)] UNSPEC_CPYMEM) - (clobber (reg:SI 1)) - (clobber (reg:SI 2)) - (clobber (reg:SI 3))] -diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h -index aa04479ec..b162b26b3 100644 ---- a/gcc/config/s390/s390-protos.h -+++ b/gcc/config/s390/s390-protos.h -@@ -104,7 +104,7 @@ extern void s390_reload_symref_address (rtx , rtx , rtx , bool); - extern void s390_expand_plus_operand (rtx, rtx, rtx); - extern void emit_symbolic_move (rtx *); - extern void s390_load_address (rtx, rtx); --extern bool s390_expand_movmem (rtx, rtx, rtx); -+extern bool s390_expand_cpymem (rtx, rtx, rtx); - extern void s390_expand_setmem (rtx, rtx, rtx); - extern bool s390_expand_cmpmem (rtx, rtx, rtx, rtx); - extern void s390_expand_vec_strlen (rtx, rtx, rtx); -diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c -index c35666dec..2959f6423 100644 ---- a/gcc/config/s390/s390.c -+++ b/gcc/config/s390/s390.c -@@ -5400,7 +5400,7 @@ legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED, - /* Emit code to move LEN bytes from DST to SRC. */ - - bool --s390_expand_movmem (rtx dst, rtx src, rtx len) -+s390_expand_cpymem (rtx dst, rtx src, rtx len) - { - /* When tuning for z10 or higher we rely on the Glibc functions to - do the right thing. Only for constant lengths below 64k we will -@@ -5425,14 +5425,14 @@ s390_expand_movmem (rtx dst, rtx src, rtx len) - { - rtx newdst = adjust_address (dst, BLKmode, o); - rtx newsrc = adjust_address (src, BLKmode, o); -- emit_insn (gen_movmem_short (newdst, newsrc, -+ emit_insn (gen_cpymem_short (newdst, newsrc, - GEN_INT (l > 256 ? 255 : l - 1))); - } - } - - else if (TARGET_MVCLE) - { -- emit_insn (gen_movmem_long (dst, src, convert_to_mode (Pmode, len, 1))); -+ emit_insn (gen_cpymem_long (dst, src, convert_to_mode (Pmode, len, 1))); - } - - else -@@ -5494,7 +5494,7 @@ s390_expand_movmem (rtx dst, rtx src, rtx len) - emit_insn (prefetch); - } - -- emit_insn (gen_movmem_short (dst, src, GEN_INT (255))); -+ emit_insn (gen_cpymem_short (dst, src, GEN_INT (255))); - s390_load_address (dst_addr, - gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256))); - s390_load_address (src_addr, -@@ -5511,7 +5511,7 @@ s390_expand_movmem (rtx dst, rtx src, rtx len) - emit_jump (loop_start_label); - emit_label (loop_end_label); - -- emit_insn (gen_movmem_short (dst, src, -+ emit_insn (gen_cpymem_short (dst, src, - convert_to_mode (Pmode, count, 1))); - emit_label (end_label); - } -@@ -5563,7 +5563,7 @@ s390_expand_setmem (rtx dst, rtx len, rtx val) - if (l > 1) - { - rtx newdstp1 = adjust_address (dst, BLKmode, o + 1); -- emit_insn (gen_movmem_short (newdstp1, newdst, -+ emit_insn (gen_cpymem_short (newdstp1, newdst, - GEN_INT (l > 257 ? 255 : l - 2))); - } - } -@@ -5670,7 +5670,7 @@ s390_expand_setmem (rtx dst, rtx len, rtx val) - /* Set the first byte in the block to the value and use an - overlapping mvc for the block. */ - emit_move_insn (adjust_address (dst, QImode, 0), val); -- emit_insn (gen_movmem_short (dstp1, dst, GEN_INT (254))); -+ emit_insn (gen_cpymem_short (dstp1, dst, GEN_INT (254))); - } - s390_load_address (dst_addr, - gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256))); -@@ -5694,7 +5694,7 @@ s390_expand_setmem (rtx dst, rtx len, rtx val) - emit_move_insn (adjust_address (dst, QImode, 0), val); - /* execute only uses the lowest 8 bits of count that's - exactly what we need here. */ -- emit_insn (gen_movmem_short (dstp1, dst, -+ emit_insn (gen_cpymem_short (dstp1, dst, - convert_to_mode (Pmode, count, 1))); - } - -@@ -6336,7 +6336,7 @@ s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src) - - dest = adjust_address (dest, BLKmode, 0); - set_mem_size (dest, size); -- s390_expand_movmem (dest, src_mem, GEN_INT (size)); -+ s390_expand_cpymem (dest, src_mem, GEN_INT (size)); - return true; - } - -@@ -12408,7 +12408,7 @@ s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, - - s390_check_type_for_vector_abi (type, true, false); - -- if (pass_by_reference (NULL, TYPE_MODE (type), type, false)) -+ if (pass_va_arg_by_reference (type)) - { - if (TARGET_DEBUG_ARG) - { -diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md -index 5a3496ac9..8dc3c12df 100644 ---- a/gcc/config/s390/s390.md -+++ b/gcc/config/s390/s390.md -@@ -3196,17 +3196,17 @@ - - - ; --; movmemM instruction pattern(s). -+; cpymemM instruction pattern(s). - ; - --(define_expand "movmem" -+(define_expand "cpymem" - [(set (match_operand:BLK 0 "memory_operand" "") ; destination - (match_operand:BLK 1 "memory_operand" "")) ; source - (use (match_operand:GPR 2 "general_operand" "")) ; count - (match_operand 3 "" "")] - "" - { -- if (s390_expand_movmem (operands[0], operands[1], operands[2])) -+ if (s390_expand_cpymem (operands[0], operands[1], operands[2])) - DONE; - else - FAIL; -@@ -3215,7 +3215,7 @@ - ; Move a block that is up to 256 bytes in length. - ; The block length is taken as (operands[2] % 256) + 1. - --(define_expand "movmem_short" -+(define_expand "cpymem_short" - [(parallel - [(set (match_operand:BLK 0 "memory_operand" "") - (match_operand:BLK 1 "memory_operand" "")) -@@ -3225,7 +3225,7 @@ - "" - "operands[3] = gen_rtx_SCRATCH (Pmode);") - --(define_insn "*movmem_short" -+(define_insn "*cpymem_short" - [(set (match_operand:BLK 0 "memory_operand" "=Q,Q,Q,Q") - (match_operand:BLK 1 "memory_operand" "Q,Q,Q,Q")) - (use (match_operand 2 "nonmemory_operand" "n,a,a,a")) -@@ -3293,7 +3293,7 @@ - - ; Move a block of arbitrary length. - --(define_expand "movmem_long" -+(define_expand "cpymem_long" - [(parallel - [(clobber (match_dup 2)) - (clobber (match_dup 3)) -@@ -3327,7 +3327,7 @@ - operands[3] = reg1; - }) - --(define_insn "*movmem_long" -+(define_insn "*cpymem_long" - [(clobber (match_operand: 0 "register_operand" "=d")) - (clobber (match_operand: 1 "register_operand" "=d")) - (set (mem:BLK (subreg:P (match_operand: 2 "register_operand" "0") 0)) -@@ -3340,7 +3340,7 @@ - [(set_attr "length" "8") - (set_attr "type" "vs")]) - --(define_insn "*movmem_long_31z" -+(define_insn "*cpymem_long_31z" - [(clobber (match_operand:TI 0 "register_operand" "=d")) - (clobber (match_operand:TI 1 "register_operand" "=d")) - (set (mem:BLK (subreg:SI (match_operand:TI 2 "register_operand" "0") 4)) -diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md -index fdb80d5d9..e687cf22a 100644 ---- a/gcc/config/sh/sh.md -+++ b/gcc/config/sh/sh.md -@@ -8906,7 +8906,7 @@ - - ;; String/block move insn. - --(define_expand "movmemsi" -+(define_expand "cpymemsi" - [(parallel [(set (mem:BLK (match_operand:BLK 0)) - (mem:BLK (match_operand:BLK 1))) - (use (match_operand:SI 2 "nonmemory_operand")) -diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c -index a993aab76..02966fd03 100644 ---- a/gcc/config/sparc/sparc.c -+++ b/gcc/config/sparc/sparc.c -@@ -7965,7 +7965,7 @@ sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, - bool indirect; - tree ptrtype = build_pointer_type (type); - -- if (pass_by_reference (NULL, TYPE_MODE (type), type, false)) -+ if (pass_va_arg_by_reference (type)) - { - indirect = true; - size = rsize = UNITS_PER_WORD; -diff --git a/gcc/config/sparc/sparc.h b/gcc/config/sparc/sparc.h -index 4b09fc86b..8807a56f4 100644 ---- a/gcc/config/sparc/sparc.h -+++ b/gcc/config/sparc/sparc.h -@@ -1419,7 +1419,7 @@ do { \ - #define MOVE_MAX 8 - - /* If a memory-to-memory move would take MOVE_RATIO or more simple -- move-instruction pairs, we will do a movmem or libcall instead. */ -+ move-instruction pairs, we will do a cpymem or libcall instead. */ - - #define MOVE_RATIO(speed) ((speed) ? 8 : 3) - -diff --git a/gcc/config/spu/spu.c b/gcc/config/spu/spu.c -index 8d7439e69..ecc767bfa 100644 ---- a/gcc/config/spu/spu.c -+++ b/gcc/config/spu/spu.c -@@ -4053,8 +4053,7 @@ spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p, - - /* if an object is dynamically sized, a pointer to it is passed - instead of the object itself. */ -- pass_by_reference_p = pass_by_reference (NULL, TYPE_MODE (type), type, -- false); -+ pass_by_reference_p = pass_va_arg_by_reference (type); - if (pass_by_reference_p) - type = build_pointer_type (type); - size = int_size_in_bytes (type); -diff --git a/gcc/config/tilegx/tilegx.c b/gcc/config/tilegx/tilegx.c -index 82226da3a..d12f1a99d 100644 ---- a/gcc/config/tilegx/tilegx.c -+++ b/gcc/config/tilegx/tilegx.c -@@ -471,8 +471,7 @@ tilegx_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, - - /* If an object is dynamically sized, a pointer to it is passed - instead of the object itself. */ -- pass_by_reference_p = pass_by_reference (NULL, TYPE_MODE (type), type, -- false); -+ pass_by_reference_p = pass_va_arg_by_reference (type); - - if (pass_by_reference_p) - type = build_pointer_type (type); -diff --git a/gcc/config/tilepro/tilepro.c b/gcc/config/tilepro/tilepro.c -index c8d69d32f..f1a0df0ad 100644 ---- a/gcc/config/tilepro/tilepro.c -+++ b/gcc/config/tilepro/tilepro.c -@@ -419,8 +419,7 @@ tilepro_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p, - - /* if an object is dynamically sized, a pointer to it is passed - instead of the object itself. */ -- pass_by_reference_p = pass_by_reference (NULL, TYPE_MODE (type), type, -- false); -+ pass_by_reference_p = pass_va_arg_by_reference (type); - - if (pass_by_reference_p) - type = build_pointer_type (type); -diff --git a/gcc/config/vax/vax-protos.h b/gcc/config/vax/vax-protos.h -index a76cf0239..a85cf3611 100644 ---- a/gcc/config/vax/vax-protos.h -+++ b/gcc/config/vax/vax-protos.h -@@ -31,7 +31,6 @@ extern void vax_expand_addsub_di_operands (rtx *, enum rtx_code); - extern const char * vax_output_int_move (rtx, rtx *, machine_mode); - extern const char * vax_output_int_add (rtx_insn *, rtx *, machine_mode); - extern const char * vax_output_int_subtract (rtx_insn *, rtx *, machine_mode); --extern const char * vax_output_movmemsi (rtx, rtx *); - #endif /* RTX_CODE */ - - #ifdef REAL_VALUE_TYPE -diff --git a/gcc/config/vax/vax.h b/gcc/config/vax/vax.h -index a6a8227f7..e7137dc09 100644 ---- a/gcc/config/vax/vax.h -+++ b/gcc/config/vax/vax.h -@@ -430,7 +430,7 @@ enum reg_class { NO_REGS, ALL_REGS, LIM_REG_CLASSES }; - #define MOVE_MAX 8 - - /* If a memory-to-memory move would take MOVE_RATIO or more simple -- move-instruction pairs, we will do a movmem or libcall instead. */ -+ move-instruction pairs, we will do a cpymem or libcall instead. */ - #define MOVE_RATIO(speed) ((speed) ? 6 : 3) - #define CLEAR_RATIO(speed) ((speed) ? 6 : 2) - -diff --git a/gcc/config/vax/vax.md b/gcc/config/vax/vax.md -index bfeae7f80..298f3393d 100644 ---- a/gcc/config/vax/vax.md -+++ b/gcc/config/vax/vax.md -@@ -206,8 +206,8 @@ - }") - - ;; This is here to accept 4 arguments and pass the first 3 along --;; to the movmemhi1 pattern that really does the work. --(define_expand "movmemhi" -+;; to the cpymemhi1 pattern that really does the work. -+(define_expand "cpymemhi" - [(set (match_operand:BLK 0 "general_operand" "=g") - (match_operand:BLK 1 "general_operand" "g")) - (use (match_operand:HI 2 "general_operand" "g")) -@@ -215,7 +215,7 @@ - "" - " - { -- emit_insn (gen_movmemhi1 (operands[0], operands[1], operands[2])); -+ emit_insn (gen_cpymemhi1 (operands[0], operands[1], operands[2])); - DONE; - }") - -@@ -224,7 +224,7 @@ - ;; that anything generated as this insn will be recognized as one - ;; and that it won't successfully combine with anything. - --(define_insn "movmemhi1" -+(define_insn "cpymemhi1" - [(set (match_operand:BLK 0 "memory_operand" "=o") - (match_operand:BLK 1 "memory_operand" "o")) - (use (match_operand:HI 2 "general_operand" "g")) -diff --git a/gcc/config/visium/visium.c b/gcc/config/visium/visium.c -index 431f64cfc..4ff331362 100644 ---- a/gcc/config/visium/visium.c -+++ b/gcc/config/visium/visium.c -@@ -1637,8 +1637,7 @@ visium_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, - tree f_ovfl, f_gbase, f_fbase, f_gbytes, f_fbytes; - tree ovfl, base, bytes; - HOST_WIDE_INT size, rsize; -- const bool by_reference_p -- = pass_by_reference (NULL, TYPE_MODE (type), type, false); -+ const bool by_reference_p = pass_va_arg_by_reference (type); - const bool float_reg_arg_p - = (TARGET_FPU && !by_reference_p - && ((GET_MODE_CLASS (TYPE_MODE (type)) == MODE_FLOAT -diff --git a/gcc/config/visium/visium.h b/gcc/config/visium/visium.h -index 817e7dc70..c9376b28f 100644 ---- a/gcc/config/visium/visium.h -+++ b/gcc/config/visium/visium.h -@@ -1138,8 +1138,8 @@ do \ - always make code faster, but eventually incurs high cost in - increased code size. - -- Since we have a movmemsi pattern, the default MOVE_RATIO is 2, which -- is too low given that movmemsi will invoke a libcall. */ -+ Since we have a cpymemsi pattern, the default MOVE_RATIO is 2, which -+ is too low given that cpymemsi will invoke a libcall. */ - #define MOVE_RATIO(speed) ((speed) ? 9 : 3) - - /* `CLEAR_RATIO (SPEED)` -diff --git a/gcc/config/visium/visium.md b/gcc/config/visium/visium.md -index f53544134..e146b89d1 100644 ---- a/gcc/config/visium/visium.md -+++ b/gcc/config/visium/visium.md -@@ -3006,7 +3006,7 @@ - ;; Argument 2 is the length - ;; Argument 3 is the alignment - --(define_expand "movmemsi" -+(define_expand "cpymemsi" - [(parallel [(set (match_operand:BLK 0 "memory_operand" "") - (match_operand:BLK 1 "memory_operand" "")) - (use (match_operand:SI 2 "general_operand" "")) -diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c -index ee5612441..b275deafa 100644 ---- a/gcc/config/xtensa/xtensa.c -+++ b/gcc/config/xtensa/xtensa.c -@@ -3252,7 +3252,7 @@ xtensa_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, - tree lab_false, lab_over, lab_false2; - bool indirect; - -- indirect = pass_by_reference (NULL, TYPE_MODE (type), type, false); -+ indirect = pass_va_arg_by_reference (type); - if (indirect) - type = build_pointer_type (type); - -diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md -index 362e5ff3c..d1448a02f 100644 ---- a/gcc/config/xtensa/xtensa.md -+++ b/gcc/config/xtensa/xtensa.md -@@ -1026,7 +1026,7 @@ - - ;; Block moves - --(define_expand "movmemsi" -+(define_expand "cpymemsi" - [(parallel [(set (match_operand:BLK 0 "" "") - (match_operand:BLK 1 "" "")) - (use (match_operand:SI 2 "arith_operand" "")) -diff --git a/gcc/coretypes.h b/gcc/coretypes.h -index 2f6b8599d..88fe8a3f9 100644 ---- a/gcc/coretypes.h -+++ b/gcc/coretypes.h -@@ -153,6 +153,14 @@ struct cl_option_handlers; - struct diagnostic_context; - struct pretty_printer; - -+template struct array_traits; -+ -+/* Provides a read-only bitmap view of a single integer bitmask or an -+ array of integer bitmasks, or of a wrapper around such bitmasks. */ -+template, -+ bool has_constant_size = Traits::has_constant_size> -+struct bitmap_view; -+ - /* Address space number for named address space support. */ - typedef unsigned char addr_space_t; - -@@ -332,6 +340,7 @@ namespace gcc { - } - - typedef std::pair tree_pair; -+typedef std::pair string_int_pair; - - /* Define a name->value mapping. */ - template -diff --git a/gcc/coverage.c b/gcc/coverage.c -index 1ffefd5f4..a63cb94e9 100644 ---- a/gcc/coverage.c -+++ b/gcc/coverage.c -@@ -643,7 +643,7 @@ coverage_begin_function (unsigned lineno_checksum, unsigned cfg_checksum) - (DECL_ASSEMBLER_NAME (current_function_decl))); - gcov_write_unsigned (DECL_ARTIFICIAL (current_function_decl) - && !DECL_FUNCTION_VERSIONED (current_function_decl) -- && !DECL_LAMBDA_FUNCTION (current_function_decl)); -+ && !DECL_LAMBDA_FUNCTION_P (current_function_decl)); - gcov_write_filename (xloc.file); - gcov_write_unsigned (xloc.line); - gcov_write_unsigned (xloc.column); -diff --git a/gcc/cp/call.c b/gcc/cp/call.c -index 23a54f3c3..3a821de7a 100644 ---- a/gcc/cp/call.c -+++ b/gcc/cp/call.c -@@ -9166,12 +9166,14 @@ maybe_warn_class_memaccess (location_t loc, tree fndecl, - } - - /* Build and return a call to FN, using NARGS arguments in ARGARRAY. -+ If FN is the result of resolving an overloaded target built-in, -+ ORIG_FNDECL is the original function decl, otherwise it is null. - This function performs no overload resolution, conversion, or other - high-level operations. */ - - tree - build_cxx_call (tree fn, int nargs, tree *argarray, -- tsubst_flags_t complain) -+ tsubst_flags_t complain, tree orig_fndecl) - { - tree fndecl; - -@@ -9181,11 +9183,13 @@ build_cxx_call (tree fn, int nargs, tree *argarray, - SET_EXPR_LOCATION (fn, loc); - - fndecl = get_callee_fndecl (fn); -+ if (!orig_fndecl) -+ orig_fndecl = fndecl; - - /* Check that arguments to builtin functions match the expectations. */ - if (fndecl - && !processing_template_decl -- && fndecl_built_in_p (fndecl, BUILT_IN_NORMAL)) -+ && fndecl_built_in_p (fndecl)) - { - int i; - -@@ -9195,7 +9199,7 @@ build_cxx_call (tree fn, int nargs, tree *argarray, - argarray[i] = maybe_constant_value (argarray[i]); - - if (!check_builtin_function_arguments (EXPR_LOCATION (fn), vNULL, fndecl, -- nargs, argarray)) -+ orig_fndecl, nargs, argarray)) - return error_mark_node; - } - -diff --git a/gcc/cp/cp-objcp-common.h b/gcc/cp/cp-objcp-common.h -index 89a889a7d..e5d34f180 100644 ---- a/gcc/cp/cp-objcp-common.h -+++ b/gcc/cp/cp-objcp-common.h -@@ -35,6 +35,8 @@ extern tree cp_get_global_decls (); - extern tree cp_pushdecl (tree); - extern void cp_register_dumps (gcc::dump_manager *); - extern tree cxx_make_type_hook (tree_code); -+extern tree cxx_simulate_enum_decl (location_t, const char *, -+ vec); - - /* Lang hooks that are shared between C++ and ObjC++ are defined here. Hooks - specific to C++ or ObjC++ go in cp/cp-lang.c and objcp/objcp-lang.c, -@@ -100,6 +102,9 @@ extern tree cxx_make_type_hook (tree_code); - #define LANG_HOOKS_BUILTIN_FUNCTION cxx_builtin_function - #undef LANG_HOOKS_BUILTIN_FUNCTION_EXT_SCOPE - #define LANG_HOOKS_BUILTIN_FUNCTION_EXT_SCOPE cxx_builtin_function_ext_scope -+#undef LANG_HOOKS_SIMULATE_BUILTIN_FUNCTION_DECL -+#define LANG_HOOKS_SIMULATE_BUILTIN_FUNCTION_DECL \ -+ cxx_simulate_builtin_function_decl - #undef LANG_HOOKS_TYPE_HASH_EQ - #define LANG_HOOKS_TYPE_HASH_EQ cxx_type_hash_eq - #undef LANG_HOOKS_COPY_LANG_QUALIFIERS -@@ -128,6 +133,8 @@ extern tree cxx_make_type_hook (tree_code); - - #undef LANG_HOOKS_MAKE_TYPE - #define LANG_HOOKS_MAKE_TYPE cxx_make_type_hook -+#undef LANG_HOOKS_SIMULATE_ENUM_DECL -+#define LANG_HOOKS_SIMULATE_ENUM_DECL cxx_simulate_enum_decl - #undef LANG_HOOKS_TYPE_FOR_MODE - #define LANG_HOOKS_TYPE_FOR_MODE c_common_type_for_mode - #undef LANG_HOOKS_TYPE_FOR_SIZE -diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h -index f7c3eea4c..4bba1887f 100644 ---- a/gcc/cp/cp-tree.h -+++ b/gcc/cp/cp-tree.h -@@ -6245,7 +6245,8 @@ extern tree perform_direct_initialization_if_possible (tree, tree, bool, - tsubst_flags_t); - extern tree in_charge_arg_for_name (tree); - extern tree build_cxx_call (tree, int, tree *, -- tsubst_flags_t); -+ tsubst_flags_t, -+ tree = NULL_TREE); - extern bool is_std_init_list (tree); - extern bool is_list_ctor (tree); - extern void validate_conversion_obstack (void); -@@ -6451,6 +6452,7 @@ extern tmpl_spec_kind current_tmpl_spec_kind (int); - extern tree cp_fname_init (const char *, tree *); - extern tree cxx_builtin_function (tree decl); - extern tree cxx_builtin_function_ext_scope (tree decl); -+extern tree cxx_simulate_builtin_function_decl (tree); - extern tree check_elaborated_type_specifier (enum tag_types, tree, bool); - extern void warn_extern_redeclared_static (tree, tree); - extern tree cxx_comdat_group (tree); -@@ -7386,7 +7388,8 @@ extern tree get_member_function_from_ptrfunc (tree *, tree, tsubst_flags_t); - extern tree cp_build_function_call_nary (tree, tsubst_flags_t, ...) - ATTRIBUTE_SENTINEL; - extern tree cp_build_function_call_vec (tree, vec **, -- tsubst_flags_t); -+ tsubst_flags_t, -+ tree = NULL_TREE); - extern tree build_x_binary_op (const op_location_t &, - enum tree_code, tree, - enum tree_code, tree, -diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c -index 5c82c2272..928ac3f21 100644 ---- a/gcc/cp/decl.c -+++ b/gcc/cp/decl.c -@@ -2273,7 +2273,8 @@ next_arg:; - DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (newdecl) - |= DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (olddecl); - DECL_NO_LIMIT_STACK (newdecl) |= DECL_NO_LIMIT_STACK (olddecl); -- DECL_IS_OPERATOR_NEW (newdecl) |= DECL_IS_OPERATOR_NEW (olddecl); -+ if (DECL_IS_OPERATOR_NEW_P (olddecl)) -+ DECL_SET_IS_OPERATOR_NEW (newdecl, true); - DECL_LOOPING_CONST_OR_PURE_P (newdecl) - |= DECL_LOOPING_CONST_OR_PURE_P (olddecl); - -@@ -2520,8 +2521,7 @@ next_arg:; - if (fndecl_built_in_p (olddecl) - && (new_defines_function ? GNU_INLINE_P (newdecl) : types_match)) - { -- DECL_BUILT_IN_CLASS (newdecl) = DECL_BUILT_IN_CLASS (olddecl); -- DECL_FUNCTION_CODE (newdecl) = DECL_FUNCTION_CODE (olddecl); -+ copy_decl_built_in_function (newdecl, olddecl); - /* If we're keeping the built-in definition, keep the rtl, - regardless of declaration matches. */ - COPY_DECL_RTL (olddecl, newdecl); -@@ -4335,10 +4335,10 @@ cxx_init_decl_processing (void) - deltype = build_exception_variant (deltype, empty_except_spec); - tree opnew = push_cp_library_fn (NEW_EXPR, newtype, 0); - DECL_IS_MALLOC (opnew) = 1; -- DECL_IS_OPERATOR_NEW (opnew) = 1; -+ DECL_SET_IS_OPERATOR_NEW (opnew, true); - opnew = push_cp_library_fn (VEC_NEW_EXPR, newtype, 0); - DECL_IS_MALLOC (opnew) = 1; -- DECL_IS_OPERATOR_NEW (opnew) = 1; -+ DECL_SET_IS_OPERATOR_NEW (opnew, true); - push_cp_library_fn (DELETE_EXPR, deltype, ECF_NOTHROW); - push_cp_library_fn (VEC_DELETE_EXPR, deltype, ECF_NOTHROW); - if (flag_sized_deallocation) -@@ -4371,10 +4371,10 @@ cxx_init_decl_processing (void) - newtype = build_exception_variant (newtype, new_eh_spec); - opnew = push_cp_library_fn (NEW_EXPR, newtype, 0); - DECL_IS_MALLOC (opnew) = 1; -- DECL_IS_OPERATOR_NEW (opnew) = 1; -+ DECL_SET_IS_OPERATOR_NEW (opnew, true); - opnew = push_cp_library_fn (VEC_NEW_EXPR, newtype, 0); - DECL_IS_MALLOC (opnew) = 1; -- DECL_IS_OPERATOR_NEW (opnew) = 1; -+ DECL_SET_IS_OPERATOR_NEW (opnew, true); - - /* operator delete (void *, align_val_t); */ - deltype = build_function_type_list (void_type_node, ptr_type_node, -@@ -4614,6 +4614,19 @@ cxx_builtin_function_ext_scope (tree decl) - return builtin_function_1 (decl, NULL_TREE, true); - } - -+/* Implement LANG_HOOKS_SIMULATE_BUILTIN_FUNCTION_DECL. */ -+ -+tree -+cxx_simulate_builtin_function_decl (tree decl) -+{ -+ retrofit_lang_decl (decl); -+ -+ DECL_ARTIFICIAL (decl) = 1; -+ SET_DECL_LANGUAGE (decl, lang_cplusplus); -+ DECL_CONTEXT (decl) = FROB_CONTEXT (current_namespace); -+ return pushdecl (decl); -+} -+ - /* Generate a FUNCTION_DECL with the typical flags for a runtime library - function. Not called directly. */ - -@@ -13570,7 +13583,7 @@ grok_op_properties (tree decl, bool complain) - coerce_delete_type (decl, loc); - else - { -- DECL_IS_OPERATOR_NEW (decl) = 1; -+ DECL_SET_IS_OPERATOR_NEW (decl, true); - TREE_TYPE (decl) = coerce_new_type (TREE_TYPE (decl), loc); - } - -@@ -15119,6 +15132,40 @@ lookup_enumerator (tree enumtype, tree name) - return e? TREE_VALUE (e) : NULL_TREE; - } - -+/* Implement LANG_HOOKS_SIMULATE_ENUM_DECL. */ -+ -+tree -+cxx_simulate_enum_decl (location_t loc, const char *name, -+ vec values) -+{ -+ location_t saved_loc = input_location; -+ input_location = loc; -+ -+ tree enumtype = start_enum (get_identifier (name), NULL_TREE, NULL_TREE, -+ NULL_TREE, false, NULL); -+ if (!OPAQUE_ENUM_P (enumtype)) -+ { -+ error_at (loc, "multiple definition of %q#T", enumtype); -+ inform (DECL_SOURCE_LOCATION (TYPE_MAIN_DECL (enumtype)), -+ "previous definition here"); -+ return enumtype; -+ } -+ SET_OPAQUE_ENUM_P (enumtype, false); -+ DECL_SOURCE_LOCATION (TYPE_NAME (enumtype)) = loc; -+ -+ string_int_pair *value; -+ unsigned int i; -+ FOR_EACH_VEC_ELT (values, i, value) -+ build_enumerator (get_identifier (value->first), -+ build_int_cst (integer_type_node, value->second), -+ enumtype, NULL_TREE, loc); -+ -+ finish_enum_value_list (enumtype); -+ finish_enum (enumtype); -+ -+ input_location = saved_loc; -+ return enumtype; -+} - - /* We're defining DECL. Make sure that its type is OK. */ - -diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c -index 60fe58e03..6fc6ed4e3 100644 ---- a/gcc/cp/parser.c -+++ b/gcc/cp/parser.c -@@ -10977,7 +10977,7 @@ cp_parser_lambda_declarator_opt (cp_parser* parser, tree lambda_expr) - DECL_ARTIFICIAL (fco) = 1; - /* Give the object parameter a different name. */ - DECL_NAME (DECL_ARGUMENTS (fco)) = closure_identifier; -- DECL_LAMBDA_FUNCTION (fco) = 1; -+ DECL_SET_LAMBDA_FUNCTION (fco, true); - } - if (template_param_list) - { -diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c -index ff7921533..bd6df79a4 100644 ---- a/gcc/cp/pt.c -+++ b/gcc/cp/pt.c -@@ -28431,9 +28431,8 @@ declare_integer_pack (void) - NULL_TREE), - NULL_TREE, ECF_CONST); - DECL_DECLARED_CONSTEXPR_P (ipfn) = true; -- DECL_BUILT_IN_CLASS (ipfn) = BUILT_IN_FRONTEND; -- DECL_FUNCTION_CODE (ipfn) -- = (enum built_in_function) (int) CP_BUILT_IN_INTEGER_PACK; -+ set_decl_built_in_function (ipfn, BUILT_IN_FRONTEND, -+ CP_BUILT_IN_INTEGER_PACK); - } - - /* Set up the hash tables for template instantiations. */ -diff --git a/gcc/cp/typeck.c b/gcc/cp/typeck.c -index c42fd731c..82f7bb0bd 100644 ---- a/gcc/cp/typeck.c -+++ b/gcc/cp/typeck.c -@@ -3738,11 +3738,11 @@ build_function_call (location_t /*loc*/, - tree - build_function_call_vec (location_t /*loc*/, vec /*arg_loc*/, - tree function, vec *params, -- vec * /*origtypes*/) -+ vec * /*origtypes*/, tree orig_function) - { - vec *orig_params = params; - tree ret = cp_build_function_call_vec (function, ¶ms, -- tf_warning_or_error); -+ tf_warning_or_error, orig_function); - - /* cp_build_function_call_vec can reallocate PARAMS by adding - default arguments. That should never happen here. Verify -@@ -3787,13 +3787,15 @@ cp_build_function_call_nary (tree function, tsubst_flags_t complain, ...) - return ret; - } - --/* Build a function call using a vector of arguments. PARAMS may be -- NULL if there are no parameters. This changes the contents of -- PARAMS. */ -+/* Build a function call using a vector of arguments. -+ If FUNCTION is the result of resolving an overloaded target built-in, -+ ORIG_FNDECL is the original function decl, otherwise it is null. -+ PARAMS may be NULL if there are no parameters. This changes the -+ contents of PARAMS. */ - - tree - cp_build_function_call_vec (tree function, vec **params, -- tsubst_flags_t complain) -+ tsubst_flags_t complain, tree orig_fndecl) - { - tree fntype, fndecl; - int is_method; -@@ -3918,7 +3920,7 @@ cp_build_function_call_vec (tree function, vec **params, - bool warned_p = check_function_arguments (input_location, fndecl, fntype, - nargs, argarray, NULL); - -- ret = build_cxx_call (function, nargs, argarray, complain); -+ ret = build_cxx_call (function, nargs, argarray, complain, orig_fndecl); - - if (warned_p) - { -diff --git a/gcc/cse.c b/gcc/cse.c -index 6c9cda16a..18eb8dfbb 100644 ---- a/gcc/cse.c -+++ b/gcc/cse.c -@@ -559,7 +559,6 @@ static struct table_elt *insert_with_costs (rtx, struct table_elt *, unsigned, - static struct table_elt *insert (rtx, struct table_elt *, unsigned, - machine_mode); - static void merge_equiv_classes (struct table_elt *, struct table_elt *); --static void invalidate_reg (rtx, bool); - static void invalidate (rtx, machine_mode); - static void remove_invalid_refs (unsigned int); - static void remove_invalid_subreg_refs (unsigned int, poly_uint64, -@@ -1821,12 +1820,10 @@ check_dependence (const_rtx x, rtx exp, machine_mode mode, rtx addr) - } - - /* Remove from the hash table, or mark as invalid, all expressions whose -- values could be altered by storing in register X. -- -- CLOBBER_HIGH is set if X was part of a CLOBBER_HIGH expression. */ -+ values could be altered by storing in register X. */ - - static void --invalidate_reg (rtx x, bool clobber_high) -+invalidate_reg (rtx x) - { - gcc_assert (GET_CODE (x) == REG); - -@@ -1851,10 +1848,7 @@ invalidate_reg (rtx x, bool clobber_high) - SUBREG_TICKED (regno) = -1; - - if (regno >= FIRST_PSEUDO_REGISTER) -- { -- gcc_assert (!clobber_high); -- remove_pseudo_from_table (x, hash); -- } -+ remove_pseudo_from_table (x, hash); - else - { - HOST_WIDE_INT in_table = TEST_HARD_REG_BIT (hard_regs_in_table, regno); -@@ -1882,18 +1876,10 @@ invalidate_reg (rtx x, bool clobber_high) - if (!REG_P (p->exp) || REGNO (p->exp) >= FIRST_PSEUDO_REGISTER) - continue; - -- if (clobber_high) -- { -- if (reg_is_clobbered_by_clobber_high (p->exp, x)) -- remove_from_table (p, hash); -- } -- else -- { -- unsigned int tregno = REGNO (p->exp); -- unsigned int tendregno = END_REGNO (p->exp); -- if (tendregno > regno && tregno < endregno) -- remove_from_table (p, hash); -- } -+ unsigned int tregno = REGNO (p->exp); -+ unsigned int tendregno = END_REGNO (p->exp); -+ if (tendregno > regno && tregno < endregno) -+ remove_from_table (p, hash); - } - } - } -@@ -1920,7 +1906,7 @@ invalidate (rtx x, machine_mode full_mode) - switch (GET_CODE (x)) - { - case REG: -- invalidate_reg (x, false); -+ invalidate_reg (x); - return; - - case SUBREG: -@@ -4420,8 +4406,6 @@ canonicalize_insn (rtx_insn *insn, struct set **psets, int n_sets) - if (MEM_P (XEXP (x, 0))) - canon_reg (XEXP (x, 0), insn); - } -- else if (GET_CODE (x) == CLOBBER_HIGH) -- gcc_assert (REG_P (XEXP (x, 0))); - else if (GET_CODE (x) == USE - && ! (REG_P (XEXP (x, 0)) - && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER)) -@@ -4453,8 +4437,6 @@ canonicalize_insn (rtx_insn *insn, struct set **psets, int n_sets) - if (MEM_P (XEXP (y, 0))) - canon_reg (XEXP (y, 0), insn); - } -- else if (GET_CODE (y) == CLOBBER_HIGH) -- gcc_assert (REG_P (XEXP (y, 0))); - else if (GET_CODE (y) == USE - && ! (REG_P (XEXP (y, 0)) - && REGNO (XEXP (y, 0)) < FIRST_PSEUDO_REGISTER)) -@@ -6155,12 +6137,6 @@ invalidate_from_clobbers (rtx_insn *insn) - invalidate (XEXP (ref, 0), GET_MODE (ref)); - } - } -- if (GET_CODE (x) == CLOBBER_HIGH) -- { -- rtx ref = XEXP (x, 0); -- gcc_assert (REG_P (ref)); -- invalidate_reg (ref, true); -- } - else if (GET_CODE (x) == PARALLEL) - { - int i; -@@ -6177,12 +6153,6 @@ invalidate_from_clobbers (rtx_insn *insn) - || GET_CODE (ref) == ZERO_EXTRACT) - invalidate (XEXP (ref, 0), GET_MODE (ref)); - } -- else if (GET_CODE (y) == CLOBBER_HIGH) -- { -- rtx ref = XEXP (y, 0); -- gcc_assert (REG_P (ref)); -- invalidate_reg (ref, true); -- } - } - } - } -@@ -6204,12 +6174,6 @@ invalidate_from_sets_and_clobbers (rtx_insn *insn) - rtx temx = XEXP (tem, 0); - if (GET_CODE (temx) == CLOBBER) - invalidate (SET_DEST (temx), VOIDmode); -- else if (GET_CODE (temx) == CLOBBER_HIGH) -- { -- rtx temref = XEXP (temx, 0); -- gcc_assert (REG_P (temref)); -- invalidate_reg (temref, true); -- } - } - } - -@@ -6237,12 +6201,6 @@ invalidate_from_sets_and_clobbers (rtx_insn *insn) - || GET_CODE (clobbered) == ZERO_EXTRACT) - invalidate (XEXP (clobbered, 0), GET_MODE (clobbered)); - } -- else if (GET_CODE (y) == CLOBBER_HIGH) -- { -- rtx ref = XEXP (y, 0); -- gcc_assert (REG_P (ref)); -- invalidate_reg (ref, true); -- } - else if (GET_CODE (y) == SET && GET_CODE (SET_SRC (y)) == CALL) - invalidate (SET_DEST (y), VOIDmode); - } -@@ -6902,10 +6860,6 @@ count_reg_usage (rtx x, int *counts, rtx dest, int incr) - count_reg_usage (XEXP (XEXP (x, 0), 0), counts, NULL_RTX, incr); - return; - -- case CLOBBER_HIGH: -- gcc_assert (REG_P ((XEXP (x, 0)))); -- return; -- - case SET: - /* Unless we are setting a REG, count everything in SET_DEST. */ - if (!REG_P (SET_DEST (x))) -@@ -6958,8 +6912,7 @@ count_reg_usage (rtx x, int *counts, rtx dest, int incr) - || (REG_NOTE_KIND (x) != REG_NONNEG && GET_CODE (XEXP (x,0)) == USE) - /* FUNCTION_USAGE expression lists may include (CLOBBER (mem /u)), - involving registers in the address. */ -- || GET_CODE (XEXP (x, 0)) == CLOBBER -- || GET_CODE (XEXP (x, 0)) == CLOBBER_HIGH) -+ || GET_CODE (XEXP (x, 0)) == CLOBBER) - count_reg_usage (XEXP (x, 0), counts, NULL_RTX, incr); - - count_reg_usage (XEXP (x, 1), counts, NULL_RTX, incr); -@@ -7043,9 +6996,7 @@ insn_live_p (rtx_insn *insn, int *counts) - if (set_live_p (elt, insn, counts)) - return true; - } -- else if (GET_CODE (elt) != CLOBBER -- && GET_CODE (elt) != CLOBBER_HIGH -- && GET_CODE (elt) != USE) -+ else if (GET_CODE (elt) != CLOBBER && GET_CODE (elt) != USE) - return true; - } - return false; -@@ -7158,7 +7109,7 @@ delete_trivially_dead_insns (rtx_insn *insns, int nreg) - else if (INSN_P (insn)) - { - count_reg_usage (insn, counts, NULL_RTX, 1); -- note_stores (PATTERN (insn), count_stores, counts + nreg * 2); -+ note_stores (insn, count_stores, counts + nreg * 2); - } - /* If there can be debug insns, COUNTS are 3 consecutive arrays. - First one counts how many times each pseudo is used outside -diff --git a/gcc/cselib.c b/gcc/cselib.c -index 108b2588c..e3408bb38 100644 ---- a/gcc/cselib.c -+++ b/gcc/cselib.c -@@ -32,6 +32,7 @@ along with GCC; see the file COPYING3. If not see - #include "dumpfile.h" - #include "cselib.h" - #include "params.h" -+#include "function-abi.h" - - /* A list of cselib_val structures. */ - struct elt_list -@@ -54,8 +55,7 @@ static unsigned int cselib_hash_rtx (rtx, int, machine_mode); - static cselib_val *new_cselib_val (unsigned int, machine_mode, rtx); - static void add_mem_for_addr (cselib_val *, cselib_val *, rtx); - static cselib_val *cselib_lookup_mem (rtx, int); --static void cselib_invalidate_regno (unsigned int, machine_mode, -- const_rtx = NULL); -+static void cselib_invalidate_regno (unsigned int, machine_mode); - static void cselib_invalidate_mem (rtx); - static void cselib_record_set (rtx, cselib_val *, cselib_val *); - static void cselib_record_sets (rtx_insn *); -@@ -1662,7 +1662,6 @@ cselib_expand_value_rtx_1 (rtx orig, struct expand_value_data *evd, - /* SCRATCH must be shared because they represent distinct values. */ - return orig; - case CLOBBER: -- case CLOBBER_HIGH: - if (REG_P (XEXP (orig, 0)) && HARD_REGISTER_NUM_P (REGNO (XEXP (orig, 0)))) - return orig; - break; -@@ -2165,8 +2164,7 @@ cselib_lookup (rtx x, machine_mode mode, - invalidating call clobbered registers across a call. */ - - static void --cselib_invalidate_regno (unsigned int regno, machine_mode mode, -- const_rtx setter) -+cselib_invalidate_regno (unsigned int regno, machine_mode mode) - { - unsigned int endregno; - unsigned int i; -@@ -2189,9 +2187,6 @@ cselib_invalidate_regno (unsigned int regno, machine_mode mode, - i = regno - max_value_regs; - - endregno = end_hard_regno (mode, regno); -- -- if (setter && GET_CODE (setter) == CLOBBER_HIGH) -- gcc_assert (endregno == regno + 1); - } - else - { -@@ -2224,19 +2219,6 @@ cselib_invalidate_regno (unsigned int regno, machine_mode mode, - continue; - } - -- /* Ignore if clobber high and the register isn't clobbered. */ -- if (setter && GET_CODE (setter) == CLOBBER_HIGH) -- { -- gcc_assert (endregno == regno + 1); -- const_rtx x = XEXP (setter, 0); -- if (!reg_is_clobbered_by_clobber_high (i, GET_MODE (v->val_rtx), -- x)) -- { -- l = &(*l)->next; -- continue; -- } -- } -- - /* We have an overlap. */ - if (*l == REG_VALUES (i)) - { -@@ -2371,10 +2353,10 @@ cselib_invalidate_mem (rtx mem_rtx) - *vp = &dummy_val; - } - --/* Invalidate DEST, which is being assigned to or clobbered by SETTER. */ -+/* Invalidate DEST. */ - - void --cselib_invalidate_rtx (rtx dest, const_rtx setter) -+cselib_invalidate_rtx (rtx dest) - { - while (GET_CODE (dest) == SUBREG - || GET_CODE (dest) == ZERO_EXTRACT -@@ -2382,7 +2364,7 @@ cselib_invalidate_rtx (rtx dest, const_rtx setter) - dest = XEXP (dest, 0); - - if (REG_P (dest)) -- cselib_invalidate_regno (REGNO (dest), GET_MODE (dest), setter); -+ cselib_invalidate_regno (REGNO (dest), GET_MODE (dest)); - else if (MEM_P (dest)) - cselib_invalidate_mem (dest); - } -@@ -2390,10 +2372,10 @@ cselib_invalidate_rtx (rtx dest, const_rtx setter) - /* A wrapper for cselib_invalidate_rtx to be called via note_stores. */ - - static void --cselib_invalidate_rtx_note_stores (rtx dest, const_rtx setter, -+cselib_invalidate_rtx_note_stores (rtx dest, const_rtx, - void *data ATTRIBUTE_UNUSED) - { -- cselib_invalidate_rtx (dest, setter); -+ cselib_invalidate_rtx (dest); - } - - /* Record the result of a SET instruction. DEST is being set; the source -@@ -2659,7 +2641,7 @@ cselib_record_sets (rtx_insn *insn) - /* Invalidate all locations written by this insn. Note that the elts we - looked up in the previous loop aren't affected, just some of their - locations may go away. */ -- note_stores (body, cselib_invalidate_rtx_note_stores, NULL); -+ note_pattern_stores (body, cselib_invalidate_rtx_note_stores, NULL); - - for (i = n_sets_before_autoinc; i < n_sets; i++) - cselib_invalidate_rtx (sets[i].dest); -@@ -2765,11 +2747,13 @@ cselib_process_insn (rtx_insn *insn) - memory. */ - if (CALL_P (insn)) - { -+ function_abi callee_abi = insn_callee_abi (insn); - for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) -- if (call_used_regs[i] -+ if (call_used_or_fixed_reg_p (i) - || (REG_VALUES (i) && REG_VALUES (i)->elt - && (targetm.hard_regno_call_part_clobbered -- (insn, i, GET_MODE (REG_VALUES (i)->elt->val_rtx))))) -+ (callee_abi.id (), i, -+ GET_MODE (REG_VALUES (i)->elt->val_rtx))))) - cselib_invalidate_regno (i, reg_raw_mode[i]); - - /* Since it is not clear how cselib is going to be used, be -@@ -2794,11 +2778,9 @@ cselib_process_insn (rtx_insn *insn) - if (CALL_P (insn)) - { - for (x = CALL_INSN_FUNCTION_USAGE (insn); x; x = XEXP (x, 1)) -- { -- gcc_assert (GET_CODE (XEXP (x, 0)) != CLOBBER_HIGH); -- if (GET_CODE (XEXP (x, 0)) == CLOBBER) -- cselib_invalidate_rtx (XEXP (XEXP (x, 0), 0)); -- } -+ if (GET_CODE (XEXP (x, 0)) == CLOBBER) -+ cselib_invalidate_rtx (XEXP (XEXP (x, 0), 0)); -+ - /* Flush everything on setjmp. */ - if (cselib_preserve_constants - && find_reg_note (insn, REG_SETJMP, NULL)) -diff --git a/gcc/cselib.h b/gcc/cselib.h -index 8b8d3e8d5..b5854aedc 100644 ---- a/gcc/cselib.h -+++ b/gcc/cselib.h -@@ -92,7 +92,7 @@ extern bool cselib_dummy_expand_value_rtx_cb (rtx, bitmap, int, - cselib_expand_callback, void *); - extern rtx cselib_subst_to_values (rtx, machine_mode); - extern rtx cselib_subst_to_values_from_insn (rtx, machine_mode, rtx_insn *); --extern void cselib_invalidate_rtx (rtx, const_rtx = NULL); -+extern void cselib_invalidate_rtx (rtx); - - extern void cselib_reset_table (unsigned int); - extern unsigned int cselib_get_next_uid (void); -diff --git a/gcc/d/intrinsics.cc b/gcc/d/intrinsics.cc -index 4bd321b2d..56eab522e 100644 ---- a/gcc/d/intrinsics.cc -+++ b/gcc/d/intrinsics.cc -@@ -134,10 +134,7 @@ maybe_set_intrinsic (FuncDeclaration *decl) - /* If there is no function body, then the implementation is always - provided by the compiler. */ - if (!decl->fbody) -- { -- DECL_BUILT_IN_CLASS (decl->csym) = BUILT_IN_FRONTEND; -- DECL_FUNCTION_CODE (decl->csym) = (built_in_function) code; -- } -+ set_decl_built_in_function (decl->csym, BUILT_IN_FRONTEND, code); - - /* Infer whether the intrinsic can be used for CTFE, let the - front-end know that it can be evaluated at compile-time. */ -diff --git a/gcc/dce.c b/gcc/dce.c -index 68d3713b0..2894fa57b 100644 ---- a/gcc/dce.c -+++ b/gcc/dce.c -@@ -174,7 +174,6 @@ deletable_insn_p (rtx_insn *insn, bool fast, bitmap arg_stores) - return false; - - case CLOBBER: -- case CLOBBER_HIGH: - if (fast) - { - /* A CLOBBER of a dead pseudo register serves no purpose. -@@ -244,10 +243,7 @@ static void - mark_nonreg_stores_1 (rtx dest, const_rtx pattern, void *data) - { - if (GET_CODE (pattern) != CLOBBER && !REG_P (dest)) -- { -- gcc_checking_assert (GET_CODE (pattern) != CLOBBER_HIGH); -- mark_insn ((rtx_insn *) data, true); -- } -+ mark_insn ((rtx_insn *) data, true); - } - - -@@ -258,22 +254,19 @@ static void - mark_nonreg_stores_2 (rtx dest, const_rtx pattern, void *data) - { - if (GET_CODE (pattern) != CLOBBER && !REG_P (dest)) -- { -- gcc_checking_assert (GET_CODE (pattern) != CLOBBER_HIGH); -- mark_insn ((rtx_insn *) data, false); -- } -+ mark_insn ((rtx_insn *) data, false); - } - - --/* Mark INSN if BODY stores to a non-register destination. */ -+/* Mark INSN if it stores to a non-register destination. */ - - static void --mark_nonreg_stores (rtx body, rtx_insn *insn, bool fast) -+mark_nonreg_stores (rtx_insn *insn, bool fast) - { - if (fast) -- note_stores (body, mark_nonreg_stores_1, insn); -+ note_stores (insn, mark_nonreg_stores_1, insn); - else -- note_stores (body, mark_nonreg_stores_2, insn); -+ note_stores (insn, mark_nonreg_stores_2, insn); - } - - -@@ -691,7 +684,7 @@ prescan_insns_for_dce (bool fast) - if (arg_stores && bitmap_bit_p (arg_stores, INSN_UID (insn))) - continue; - if (deletable_insn_p (insn, fast, arg_stores)) -- mark_nonreg_stores (PATTERN (insn), insn, fast); -+ mark_nonreg_stores (insn, fast); - else - mark_insn (insn, fast); - } -diff --git a/gcc/ddg.c b/gcc/ddg.c -index 82554ed96..47a50d8ea 100644 ---- a/gcc/ddg.c -+++ b/gcc/ddg.c -@@ -84,7 +84,7 @@ static bool - mem_write_insn_p (rtx_insn *insn) - { - mem_ref_p = false; -- note_stores (PATTERN (insn), mark_mem_store, NULL); -+ note_stores (insn, mark_mem_store, NULL); - return mem_ref_p; - } - -diff --git a/gcc/defaults.h b/gcc/defaults.h -index b75342561..72d4fba11 100644 ---- a/gcc/defaults.h -+++ b/gcc/defaults.h -@@ -1318,10 +1318,10 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - #endif - - /* If a memory-to-memory move would take MOVE_RATIO or more simple -- move-instruction sequences, we will do a movmem or libcall instead. */ -+ move-instruction sequences, we will do a cpymem or libcall instead. */ - - #ifndef MOVE_RATIO --#if defined (HAVE_movmemqi) || defined (HAVE_movmemhi) || defined (HAVE_movmemsi) || defined (HAVE_movmemdi) || defined (HAVE_movmemti) -+#if defined (HAVE_cpymemqi) || defined (HAVE_cpymemhi) || defined (HAVE_cpymemsi) || defined (HAVE_cpymemdi) || defined (HAVE_cpymemti) - #define MOVE_RATIO(speed) 2 - #else - /* If we are optimizing for space (-Os), cut down the default move ratio. */ -@@ -1342,7 +1342,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - #endif - - /* If a memory set (to value other than zero) operation would take -- SET_RATIO or more simple move-instruction sequences, we will do a movmem -+ SET_RATIO or more simple move-instruction sequences, we will do a setmem - or libcall instead. */ - #ifndef SET_RATIO - #define SET_RATIO(speed) MOVE_RATIO (speed) -@@ -1459,4 +1459,18 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - #define DWARF_GNAT_ENCODINGS_DEFAULT DWARF_GNAT_ENCODINGS_GDB - #endif - -+#ifndef USED_FOR_TARGET -+/* Done this way to keep gengtype happy. */ -+#if BITS_PER_UNIT == 8 -+#define TARGET_UNIT uint8_t -+#elif BITS_PER_UNIT == 16 -+#define TARGET_UNIT uint16_t -+#elif BITS_PER_UNIT == 32 -+#define TARGET_UNIT uint32_t -+#else -+#error Unknown BITS_PER_UNIT -+#endif -+typedef TARGET_UNIT target_unit; -+#endif -+ - #endif /* ! GCC_DEFAULTS_H */ -diff --git a/gcc/df-core.c b/gcc/df-core.c -index b19ba289d..2181ff131 100644 ---- a/gcc/df-core.c -+++ b/gcc/df-core.c -@@ -2052,7 +2052,7 @@ debug_regset (regset r) - This is part of making a debugging dump. */ - - void --df_print_regset (FILE *file, bitmap r) -+df_print_regset (FILE *file, const_bitmap r) - { - unsigned int i; - bitmap_iterator bi; -@@ -2077,7 +2077,7 @@ df_print_regset (FILE *file, bitmap r) - debugging dump. */ - - void --df_print_word_regset (FILE *file, bitmap r) -+df_print_word_regset (FILE *file, const_bitmap r) - { - unsigned int max_reg = max_reg_num (); - -diff --git a/gcc/df-problems.c b/gcc/df-problems.c -index a9dfa6203..3c7aeceb2 100644 ---- a/gcc/df-problems.c -+++ b/gcc/df-problems.c -@@ -388,7 +388,6 @@ df_rd_local_compute (bitmap all_blocks) - { - unsigned int bb_index; - bitmap_iterator bi; -- unsigned int regno; - struct df_rd_problem_data *problem_data - = (struct df_rd_problem_data *) df_rd->problem_data; - bitmap sparse_invalidated = &problem_data->sparse_invalidated_by_call; -@@ -405,10 +404,9 @@ df_rd_local_compute (bitmap all_blocks) - } - - /* Set up the knockout bit vectors to be applied across EH_EDGES. */ -- EXECUTE_IF_SET_IN_BITMAP (regs_invalidated_by_call_regset, 0, regno, bi) -- { -- if (! HARD_REGISTER_NUM_P (regno) -- || !(df->changeable_flags & DF_NO_HARD_REGS)) -+ if (!(df->changeable_flags & DF_NO_HARD_REGS)) -+ for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno) -+ if (TEST_HARD_REG_BIT (regs_invalidated_by_call, regno)) - { - if (DF_DEFS_COUNT (regno) > DF_SPARSE_THRESHOLD) - bitmap_set_bit (sparse_invalidated, regno); -@@ -417,7 +415,6 @@ df_rd_local_compute (bitmap all_blocks) - DF_DEFS_BEGIN (regno), - DF_DEFS_COUNT (regno)); - } -- } - - bitmap_release (&seen_in_block); - bitmap_release (&seen_in_insn); -@@ -982,7 +979,10 @@ df_lr_confluence_n (edge e) - /* ??? Abnormal call edges ignored for the moment, as this gets - confused by sibling call edges, which crashes reg-stack. */ - if (e->flags & EDGE_EH) -- changed = bitmap_ior_and_compl_into (op1, op2, regs_invalidated_by_call_regset); -+ { -+ bitmap_view eh_kills (regs_invalidated_by_call); -+ changed = bitmap_ior_and_compl_into (op1, op2, eh_kills); -+ } - else - changed = bitmap_ior_into (op1, op2); - -@@ -4093,8 +4093,7 @@ can_move_insns_across (rtx_insn *from, rtx_insn *to, - if (volatile_insn_p (PATTERN (insn))) - return false; - memrefs_in_across |= find_memory (insn); -- note_stores (PATTERN (insn), find_memory_stores, -- &mem_sets_in_across); -+ note_stores (insn, find_memory_stores, &mem_sets_in_across); - /* This is used just to find sets of the stack pointer. */ - memrefs_in_across |= mem_sets_in_across; - trapping_insns_in_across |= may_trap_p (PATTERN (insn)); -@@ -4173,7 +4172,7 @@ can_move_insns_across (rtx_insn *from, rtx_insn *to, - { - int mem_ref_flags = 0; - int mem_set_flags = 0; -- note_stores (PATTERN (insn), find_memory_stores, &mem_set_flags); -+ note_stores (insn, find_memory_stores, &mem_set_flags); - mem_ref_flags = find_memory (insn); - /* Catch sets of the stack pointer. */ - mem_ref_flags |= mem_set_flags; -@@ -4635,8 +4634,10 @@ df_md_confluence_n (edge e) - return false; - - if (e->flags & EDGE_EH) -- return bitmap_ior_and_compl_into (op1, op2, -- regs_invalidated_by_call_regset); -+ { -+ bitmap_view eh_kills (regs_invalidated_by_call); -+ return bitmap_ior_and_compl_into (op1, op2, eh_kills); -+ } - else - return bitmap_ior_into (op1, op2); - } -diff --git a/gcc/df-scan.c b/gcc/df-scan.c -index 84c2e54c8..ea149c6cc 100644 ---- a/gcc/df-scan.c -+++ b/gcc/df-scan.c -@@ -35,7 +35,7 @@ along with GCC; see the file COPYING3. If not see - #include "emit-rtl.h" /* FIXME: Can go away once crtl is moved to rtl.h. */ - #include "dumpfile.h" - #include "calls.h" -- -+#include "function-abi.h" - - /* The set of hard registers in eliminables[i].from. */ - -@@ -312,7 +312,7 @@ df_scan_start_dump (FILE *file ATTRIBUTE_UNUSED) - rtx_insn *insn; - - fprintf (file, ";; invalidated by call \t"); -- df_print_regset (file, regs_invalidated_by_call_regset); -+ df_print_regset (file, bitmap_view (regs_invalidated_by_call)); - fprintf (file, ";; hardware regs used \t"); - df_print_regset (file, &df->hardware_regs_used); - fprintf (file, ";; regular block artificial uses \t"); -@@ -2773,7 +2773,6 @@ df_find_hard_reg_defs (rtx x, HARD_REG_SET *defs) - break; - - case CLOBBER: -- case CLOBBER_HIGH: - df_find_hard_reg_defs_1 (XEXP (x, 0), defs); - break; - -@@ -2833,10 +2832,6 @@ df_uses_record (struct df_collection_rec *collection_rec, - /* If we're clobbering a REG then we have a def so ignore. */ - return; - -- case CLOBBER_HIGH: -- gcc_assert (REG_P (XEXP (x, 0))); -- return; -- - case MEM: - df_uses_record (collection_rec, - &XEXP (x, 0), DF_REF_REG_MEM_LOAD, -@@ -3087,13 +3082,11 @@ df_get_call_refs (struct df_collection_rec *collection_rec, - bool is_sibling_call; - unsigned int i; - HARD_REG_SET defs_generated; -- HARD_REG_SET fn_reg_set_usage; - - CLEAR_HARD_REG_SET (defs_generated); - df_find_hard_reg_defs (PATTERN (insn_info->insn), &defs_generated); - is_sibling_call = SIBLING_CALL_P (insn_info->insn); -- get_call_reg_set_usage (insn_info->insn, &fn_reg_set_usage, -- regs_invalidated_by_call); -+ function_abi callee_abi = insn_callee_abi (insn_info->insn); - - for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) - { -@@ -3117,7 +3110,7 @@ df_get_call_refs (struct df_collection_rec *collection_rec, - NULL, bb, insn_info, DF_REF_REG_DEF, flags); - } - } -- else if (TEST_HARD_REG_BIT (fn_reg_set_usage, i) -+ else if (callee_abi.clobbers_full_reg_p (i) - /* no clobbers for regs that are the result of the call */ - && !TEST_HARD_REG_BIT (defs_generated, i) - && (!is_sibling_call -@@ -3133,7 +3126,6 @@ df_get_call_refs (struct df_collection_rec *collection_rec, - for (note = CALL_INSN_FUNCTION_USAGE (insn_info->insn); note; - note = XEXP (note, 1)) - { -- gcc_assert (GET_CODE (XEXP (note, 0)) != CLOBBER_HIGH); - if (GET_CODE (XEXP (note, 0)) == USE) - df_uses_record (collection_rec, &XEXP (XEXP (note, 0), 0), - DF_REF_REG_USE, bb, insn_info, flags); -@@ -3499,7 +3491,9 @@ df_get_entry_block_def_set (bitmap entry_block_defs) - /* Defs for the callee saved registers are inserted so that the - pushes have some defining location. */ - for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) -- if ((call_used_regs[i] == 0) && (df_regs_ever_live_p (i))) -+ if (!crtl->abi->clobbers_full_reg_p (i) -+ && !fixed_regs[i] -+ && df_regs_ever_live_p (i)) - bitmap_set_bit (entry_block_defs, i); - } - -@@ -3682,8 +3676,9 @@ df_get_exit_block_use_set (bitmap exit_block_uses) - { - /* Mark all call-saved registers that we actually used. */ - for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) -- if (df_regs_ever_live_p (i) && !LOCAL_REGNO (i) -- && !TEST_HARD_REG_BIT (regs_invalidated_by_call, i)) -+ if (df_regs_ever_live_p (i) -+ && !LOCAL_REGNO (i) -+ && !crtl->abi->clobbers_full_reg_p (i)) - bitmap_set_bit (exit_block_uses, i); - } - -diff --git a/gcc/df.h b/gcc/df.h -index d76d31baa..241812235 100644 ---- a/gcc/df.h -+++ b/gcc/df.h -@@ -984,8 +984,8 @@ extern bool df_reg_defined (rtx_insn *, rtx); - extern df_ref df_find_use (rtx_insn *, rtx); - extern bool df_reg_used (rtx_insn *, rtx); - extern void df_worklist_dataflow (struct dataflow *,bitmap, int *, int); --extern void df_print_regset (FILE *file, bitmap r); --extern void df_print_word_regset (FILE *file, bitmap r); -+extern void df_print_regset (FILE *file, const_bitmap r); -+extern void df_print_word_regset (FILE *file, const_bitmap r); - extern void df_dump (FILE *); - extern void df_dump_region (FILE *); - extern void df_dump_start (FILE *); -diff --git a/gcc/diagnostic-color.c b/gcc/diagnostic-color.c -index 69e759ff6..abc919f63 100644 ---- a/gcc/diagnostic-color.c -+++ b/gcc/diagnostic-color.c -@@ -19,6 +19,7 @@ - #include "config.h" - #include "system.h" - #include "diagnostic-color.h" -+#include "diagnostic-url.h" - - #ifdef __MINGW32__ - # include -@@ -236,3 +237,22 @@ colorize_init (diagnostic_color_rule_t rule) - gcc_unreachable (); - } - } -+ -+/* Determine if URLs should be enabled, based on RULE. -+ This reuses the logic for colorization. */ -+ -+bool -+diagnostic_urls_enabled_p (diagnostic_url_rule_t rule) -+{ -+ switch (rule) -+ { -+ case DIAGNOSTICS_URL_NO: -+ return false; -+ case DIAGNOSTICS_URL_YES: -+ return true; -+ case DIAGNOSTICS_URL_AUTO: -+ return should_colorize (); -+ default: -+ gcc_unreachable (); -+ } -+} -diff --git a/gcc/diagnostic-url.h b/gcc/diagnostic-url.h -new file mode 100644 -index 000000000..ce0de459f ---- /dev/null -+++ b/gcc/diagnostic-url.h -@@ -0,0 +1,36 @@ -+/* Copyright (C) 2019 Free Software Foundation, Inc. -+ Contributed by David Malcolm . -+ -+This file is part of GCC. -+ -+GCC is free software; you can redistribute it and/or modify it under -+the terms of the GNU General Public License as published by the Free -+Software Foundation; either version 3, or (at your option) any later -+version. -+ -+GCC is distributed in the hope that it will be useful, but WITHOUT ANY -+WARRANTY; without even the implied warranty of MERCHANTABILITY or -+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+for more details. -+ -+You should have received a copy of the GNU General Public License -+along with GCC; see the file COPYING3. If not see -+. */ -+ -+#ifndef GCC_DIAGNOSTIC_URL_H -+#define GCC_DIAGNOSTIC_URL_H -+ -+/* Whether to add URLs to diagnostics: -+ - DIAGNOSTICS_URL_NO: never -+ - DIAGNOSTICS_URL_YES: always -+ - DIAGNOSTICS_URL_AUTO: depending on the output stream. */ -+typedef enum -+{ -+ DIAGNOSTICS_URL_NO = 0, -+ DIAGNOSTICS_URL_YES = 1, -+ DIAGNOSTICS_URL_AUTO = 2 -+} diagnostic_url_rule_t; -+ -+extern bool diagnostic_urls_enabled_p (diagnostic_url_rule_t); -+ -+#endif /* ! GCC_DIAGNOSTIC_URL_H */ -diff --git a/gcc/diagnostic.c b/gcc/diagnostic.c -index be6b65722..a9acda7cc 100644 ---- a/gcc/diagnostic.c -+++ b/gcc/diagnostic.c -@@ -31,6 +31,7 @@ along with GCC; see the file COPYING3. If not see - #include "backtrace.h" - #include "diagnostic.h" - #include "diagnostic-color.h" -+#include "diagnostic-url.h" - #include "edit-context.h" - #include "selftest.h" - #include "selftest-diagnostic.h" -@@ -238,6 +239,18 @@ diagnostic_color_init (diagnostic_context *context, int value /*= -1 */) - = colorize_init ((diagnostic_color_rule_t) value); - } - -+/* Initialize URL support within CONTEXT based on VALUE, handling "auto". */ -+ -+void -+diagnostic_urls_init (diagnostic_context *context, int value /*= -1 */) -+{ -+ if (value < 0) -+ value = DIAGNOSTICS_COLOR_DEFAULT; -+ -+ context->printer->show_urls -+ = diagnostic_urls_enabled_p ((diagnostic_url_rule_t) value); -+} -+ - /* Do any cleaning up required after the last diagnostic is emitted. */ - - void -diff --git a/gcc/diagnostic.h b/gcc/diagnostic.h -index 46c3b50a5..5daf4f288 100644 ---- a/gcc/diagnostic.h -+++ b/gcc/diagnostic.h -@@ -328,6 +328,7 @@ diagnostic_override_option_index (diagnostic_info *info, int optidx) - /* Diagnostic related functions. */ - extern void diagnostic_initialize (diagnostic_context *, int); - extern void diagnostic_color_init (diagnostic_context *, int value = -1); -+extern void diagnostic_urls_init (diagnostic_context *, int value = -1); - extern void diagnostic_finish (diagnostic_context *); - extern void diagnostic_report_current_module (diagnostic_context *, location_t); - extern void diagnostic_show_locus (diagnostic_context *, -diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi -index 9c87792ff..e366ab923 100644 ---- a/gcc/doc/invoke.texi -+++ b/gcc/doc/invoke.texi -@@ -271,6 +271,7 @@ Objective-C and Objective-C++ Dialects}. - @gccoptlist{-fmessage-length=@var{n} @gol - -fdiagnostics-show-location=@r{[}once@r{|}every-line@r{]} @gol - -fdiagnostics-color=@r{[}auto@r{|}never@r{|}always@r{]} @gol -+-fdiagnostics-urls=@r{[}auto@r{|}never@r{|}always@r{]} @gol - -fdiagnostics-format=@r{[}text@r{|}json@r{]} @gol - -fno-diagnostics-show-option -fno-diagnostics-show-caret @gol - -fno-diagnostics-show-labels -fno-diagnostics-show-line-numbers @gol -@@ -403,8 +404,7 @@ Objective-C and Objective-C++ Dialects}. - -fallow-store-data-races @gol - -fassociative-math -fauto-profile -fauto-profile[=@var{path}] @gol - -fauto-inc-dec -fbranch-probabilities @gol ---fbranch-target-load-optimize -fbranch-target-load-optimize2 @gol ---fbtr-bb-exclusive -fcaller-saves @gol -+-fcaller-saves @gol - -fcombine-stack-adjustments -fconserve-stack @gol - -fcompare-elim -fcprop-registers -fcrossjumping @gol - -fcse-follow-jumps -fcse-skip-blocks -fcx-fortran-rules @gol -@@ -636,11 +636,13 @@ Objective-C and Objective-C++ Dialects}. - -mlow-precision-recip-sqrt -mlow-precision-sqrt -mlow-precision-div @gol - -mpc-relative-literal-loads @gol - -msign-return-address=@var{scope} @gol ---mbranch-protection=@var{none}|@var{standard}|@var{pac-ret}[+@var{leaf}]|@var{bti} @gol -+-mbranch-protection=@var{none}|@var{standard}|@var{pac-ret}[+@var{leaf} -++@var{b-key}]|@var{bti} @gol - -march=@var{name} -mcpu=@var{name} -mtune=@var{name} @gol - -moverride=@var{string} -mverbose-cost-dump @gol - -mstack-protector-guard=@var{guard} -mstack-protector-guard-reg=@var{sysreg} @gol ---mstack-protector-guard-offset=@var{offset} -mtrack-speculation } -+-mstack-protector-guard-offset=@var{offset} -mtrack-speculation @gol -+-moutline-atomics } - - @emph{Adapteva Epiphany Options} - @gccoptlist{-mhalf-reg-file -mprefer-short-insn-regs @gol -@@ -3885,6 +3887,18 @@ SGR substring for highlighting mismatching types within template - arguments in the C++ frontend. - @end table - -+@item -fdiagnostics-urls[=@var{WHEN}] -+@opindex fdiagnostics-urls -+@cindex urls -+Use escape sequences to embed URLs in diagnostics. For example, when -+@option{-fdiagnostics-show-option} emits text showing the command-line -+option controlling a diagnostic, embed a URL for documentation of that -+option. -+ -+@var{WHEN} is @samp{never}, @samp{always}, or @samp{auto}. -+The default is @samp{auto}, which means to use URL escape sequences only -+when the standard error is a terminal. -+ - @item -fno-diagnostics-show-option - @opindex fno-diagnostics-show-option - @opindex fdiagnostics-show-option -@@ -8295,6 +8309,7 @@ also turns on the following optimization flags: - -ffinite-loops @gol - -fgcse -fgcse-lm @gol - -fhoist-adjacent-loads @gol -+-finline-functions @gol - -finline-small-functions @gol - -findirect-inlining @gol - -fipa-bit-cp -fipa-cp -fipa-icf @gol -@@ -8328,7 +8343,6 @@ by @option{-O2} and also turns on the following optimization flags: - - @c Please keep the following list alphabetized! - @gccoptlist{-fgcse-after-reload @gol ---finline-functions @gol - -fipa-cp-clone - -floop-interchange @gol - -floop-unroll-and-jam @gol -@@ -8386,10 +8400,10 @@ no effect. Otherwise @option{-Og} enables all @option{-O1} - optimization flags except for those that may interfere with debugging: - - @gccoptlist{-fbranch-count-reg -fdelayed-branch @gol ---fif-conversion -fif-conversion2 @gol -+-fdse -fif-conversion -fif-conversion2 @gol - -finline-functions-called-once @gol - -fmove-loop-invariants -fssa-phiopt @gol ---ftree-bit-ccp -ftree-pta -ftree-sra} -+-ftree-bit-ccp -ftree-dse -ftree-pta -ftree-sra} - - @end table - -@@ -8508,7 +8522,7 @@ If all calls to a given function are integrated, and the function is - declared @code{static}, then the function is normally not output as - assembler code in its own right. - --Enabled at levels @option{-O3}, @option{-Os}. Also enabled -+Enabled at levels @option{-O2}, @option{-O3}, @option{-Os}. Also enabled - by @option{-fprofile-use} and @option{-fauto-profile}. - - @item -finline-functions-called-once -@@ -10986,24 +11000,6 @@ locations inside a translation unit since the locations are unknown until - link time. An example of such an optimization is relaxing calls to short call - instructions. - --@item -fbranch-target-load-optimize --@opindex fbranch-target-load-optimize --Perform branch target register load optimization before prologue / epilogue --threading. --The use of target registers can typically be exposed only during reload, --thus hoisting loads out of loops and doing inter-block scheduling needs --a separate optimization pass. -- --@item -fbranch-target-load-optimize2 --@opindex fbranch-target-load-optimize2 --Perform branch target register load optimization after prologue / epilogue --threading. -- --@item -fbtr-bb-exclusive --@opindex fbtr-bb-exclusive --When performing branch target register load optimization, don't reuse --branch target registers within any basic block. -- - @item -fstdarg-opt - @opindex fstdarg-opt - Optimize the prologue of variadic argument functions with respect to usage of -@@ -11154,19 +11150,30 @@ when modulo scheduling a loop. Larger values can exponentially increase - compilation time. - - @item max-inline-insns-single --Several parameters control the tree inliner used in GCC@. --This number sets the maximum number of instructions (counted in GCC's --internal representation) in a single function that the tree inliner --considers for inlining. This only affects functions declared --inline and methods implemented in a class declaration (C++). -+@item max-inline-insns-single-O2 -+Several parameters control the tree inliner used in GCC@. This number sets the -+maximum number of instructions (counted in GCC's internal representation) in a -+single function that the tree inliner considers for inlining. This only -+affects functions declared inline and methods implemented in a class -+declaration (C++). -+ -+For functions compiled with optimization levels -+@option{-O3} and @option{-Ofast} parameter @option{max-inline-insns-single} is -+applied. In other cases @option{max-inline-insns-single-O2} is applied. -+ - - @item max-inline-insns-auto -+@item max-inline-insns-auto-O2 - When you use @option{-finline-functions} (included in @option{-O3}), - a lot of functions that would otherwise not be considered for inlining - by the compiler are investigated. To those functions, a different - (more restrictive) limit compared to functions declared inline can - be applied. - -+For functions compiled with optimization levels -+@option{-O3} and @option{-Ofast} parameter @option{max-inline-insns-auto} is -+applied. In other cases @option{max-inline-insns-auto-O2} is applied. -+ - @item max-inline-insns-small - This is bound applied to calls which are considered relevant with - @option{-finline-small-functions}. -@@ -11189,11 +11196,16 @@ Same as @option{--param uninlined-function-insns} and - @option{--param uninlined-function-time} but applied to function thunks - - @item inline-min-speedup -+@item inline-min-speedup-O2 - When estimated performance improvement of caller + callee runtime exceeds this - threshold (in percent), the function can be inlined regardless of the limit on - @option{--param max-inline-insns-single} and @option{--param - max-inline-insns-auto}. - -+For functions compiled with optimization levels -+@option{-O3} and @option{-Ofast} parameter @option{inline-min-speedup} is -+applied. In other cases @option{inline-min-speedup-O2} is applied. -+ - @item large-function-insns - The limit specifying really large functions. For functions larger than this - limit after inlining, inlining is constrained by -@@ -11271,9 +11283,14 @@ via a given call expression. This parameter limits inlining only to call - expressions whose probability exceeds the given threshold (in percents). - - @item early-inlining-insns -+@item early-inlining-insns-O2 - Specify growth that the early inliner can make. In effect it increases - the amount of inlining for code having a large abstraction penalty. - -+For functions compiled with optimization levels -+@option{-O3} and @option{-Ofast} parameter @option{early-inlining-insns} is -+applied. In other cases @option{early-inlining-insns-O2} is applied. -+ - @item max-early-inliner-iterations - Limit of iterations of the early inliner. This basically bounds - the number of nested indirect calls the early inliner can resolve. -@@ -15816,31 +15833,38 @@ be used by the compiler when expanding calls to - @code{__builtin_speculation_safe_copy} to permit a more efficient code - sequence to be generated. - -+@item -moutline-atomics -+@itemx -mno-outline-atomics -+Enable or disable calls to out-of-line helpers to implement atomic operations. -+These helpers will, at runtime, determine if the LSE instructions from -+ARMv8.1-A can be used; if not, they will use the load/store-exclusive -+instructions that are present in the base ARMv8.0 ISA. -+ -+This option is only applicable when compiling for the base ARMv8.0 -+instruction set. If using a later revision, e.g. @option{-march=armv8.1-a} -+or @option{-march=armv8-a+lse}, the ARMv8.1-Atomics instructions will be -+used directly. The same applies when using @option{-mcpu=} when the -+selected cpu supports the @samp{lse} feature. -+ - @item -march=@var{name} - @opindex march - Specify the name of the target architecture and, optionally, one or - more feature modifiers. This option has the form - @option{-march=@var{arch}@r{@{}+@r{[}no@r{]}@var{feature}@r{@}*}}. - --The permissible values for @var{arch} are @samp{armv8-a}, --@samp{armv8.1-a}, @samp{armv8.2-a}, @samp{armv8.3-a}, @samp{armv8.4-a}, --@samp{armv8.5-a} or @var{native}. -- --The value @samp{armv8.5-a} implies @samp{armv8.4-a} and enables compiler --support for the ARMv8.5-A architecture extensions. -- --The value @samp{armv8.4-a} implies @samp{armv8.3-a} and enables compiler --support for the ARMv8.4-A architecture extensions. -- --The value @samp{armv8.3-a} implies @samp{armv8.2-a} and enables compiler --support for the ARMv8.3-A architecture extensions. -- --The value @samp{armv8.2-a} implies @samp{armv8.1-a} and enables compiler --support for the ARMv8.2-A architecture extensions. -- --The value @samp{armv8.1-a} implies @samp{armv8-a} and enables compiler --support for the ARMv8.1-A architecture extension. In particular, it --enables the @samp{+crc}, @samp{+lse}, and @samp{+rdma} features. -+The table below summarizes the permissible values for @var{arch} -+and the features that they enable by default: -+ -+@multitable @columnfractions 0.20 0.20 0.60 -+@headitem @var{arch} value @tab Architecture @tab Includes by default -+@item @samp{armv8-a} @tab Armv8-A @tab @samp{+fp}, @samp{+simd} -+@item @samp{armv8.1-a} @tab Armv8.1-A @tab @samp{armv8-a}, @samp{+crc}, @samp{+lse}, @samp{+rdma} -+@item @samp{armv8.2-a} @tab Armv8.2-A @tab @samp{armv8.1-a} -+@item @samp{armv8.3-a} @tab Armv8.3-A @tab @samp{armv8.2-a} -+@item @samp{armv8.4-a} @tab Armv8.4-A @tab @samp{armv8.3-a}, @samp{+fp16fml}, @samp{+dotprod} -+@item @samp{armv8.5-a} @tab Armv8.5-A @tab @samp{armv8.4-a}, @samp{+sb}, @samp{+ssbs}, @samp{+predres} -+@item @samp{armv8.6-a} @tab Armv8.6-A @tab @samp{armv8.5-a}, @samp{+bf16}, @samp{+i8mm} -+@end multitable - - The value @samp{native} is available on native AArch64 GNU/Linux and - causes the compiler to pick the architecture of the host system. This -@@ -15864,7 +15888,9 @@ Specify the name of the target processor for which GCC should tune the - performance of the code. Permissible values for this option are: - @samp{generic}, @samp{cortex-a35}, @samp{cortex-a53}, @samp{cortex-a55}, - @samp{cortex-a57}, @samp{cortex-a72}, @samp{cortex-a73}, @samp{cortex-a75}, --@samp{cortex-a76}, @samp{ares}, @samp{exynos-m1}, @samp{emag}, @samp{falkor}, -+@samp{cortex-a76}, @samp{cortex-a76ae}, @samp{cortex-a77}, -+@samp{cortex-a65}, @samp{cortex-a65ae}, @samp{cortex-a34}, -+@samp{ares}, @samp{exynos-m1}, @samp{emag}, @samp{falkor}, - @samp{neoverse-e1},@samp{neoverse-n1},@samp{qdf24xx}, @samp{saphira}, - @samp{phecda}, @samp{xgene1}, @samp{vulcan}, @samp{octeontx}, - @samp{octeontx81}, @samp{octeontx83}, @samp{thunderx}, @samp{thunderxt88}, -@@ -15941,7 +15967,7 @@ functions, and @samp{all}, which enables pointer signing for all functions. The - default value is @samp{none}. This option has been deprecated by - -mbranch-protection. - --@item -mbranch-protection=@var{none}|@var{standard}|@var{pac-ret}[+@var{leaf}]|@var{bti} -+@item -mbranch-protection=@var{none}|@var{standard}|@var{pac-ret}[+@var{leaf}+@var{b-key}]|@var{bti} - @opindex mbranch-protection - Select the branch protection features to use. - @samp{none} is the default and turns off all types of branch protection. -@@ -15952,7 +15978,8 @@ level. - level: signing functions that save the return address to memory (non-leaf - functions will practically always do this) using the a-key. The optional - argument @samp{leaf} can be used to extend the signing to include leaf --functions. -+functions. The optional argument @samp{b-key} can be used to sign the functions -+with the B-key instead of the A-key. - @samp{bti} turns on branch target identification mechanism. - - @item -msve-vector-bits=@var{bits} -@@ -16054,6 +16081,37 @@ Enable the Armv8-a Execution and Data Prediction Restriction instructions. - This option is only to enable the extension at the assembler level and does - not affect code generation. This option is enabled by default for - @option{-march=armv8.5-a}. -+@item sve2 -+Enable the Armv8-a Scalable Vector Extension 2. This also enables SVE -+instructions. -+@item sve2-bitperm -+Enable SVE2 bitperm instructions. This also enables SVE2 instructions. -+@item sve2-sm4 -+Enable SVE2 sm4 instructions. This also enables SVE2 instructions. -+@item sve2-aes -+Enable SVE2 aes instructions. This also enables SVE2 instructions. -+@item sve2-sha3 -+Enable SVE2 sha3 instructions. This also enables SVE2 instructions. -+@item tme -+Enable the Transactional Memory Extension. -+@item i8mm -+Enable 8-bit Integer Matrix Multiply instructions. This also enables -+Advanced SIMD and floating-point instructions. This option is enabled by -+default for @option{-march=armv8.6-a}. Use of this option with architectures -+prior to Armv8.2-A is not supported. -+@item f32mm -+Enable 32-bit Floating point Matrix Multiply instructions. This also enables -+SVE instructions. Use of this option with architectures prior to Armv8.2-A is -+not supported. -+@item f64mm -+Enable 64-bit Floating point Matrix Multiply instructions. This also enables -+SVE instructions. Use of this option with architectures prior to Armv8.2-A is -+not supported. -+@item bf16 -+Enable brain half-precision floating-point instructions. This also enables -+Advanced SIMD and floating-point instructions. This option is enabled by -+default for @option{-march=armv8.6-a}. Use of this option with architectures -+prior to Armv8.2-A is not supported. - - @end table - -@@ -28567,8 +28625,9 @@ By default GCC inlines string operations only when the destination is - known to be aligned to least a 4-byte boundary. - This enables more inlining and increases code - size, but may improve performance of code that depends on fast --@code{memcpy}, @code{strlen}, --and @code{memset} for short lengths. -+@code{memcpy} and @code{memset} for short lengths. -+The option enables inline expansion of @code{strlen} for all -+pointer alignments. - - @item -minline-stringops-dynamically - @opindex minline-stringops-dynamically -diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi -index 50e13124b..75482d7a2 100644 ---- a/gcc/doc/md.texi -+++ b/gcc/doc/md.texi -@@ -1748,6 +1748,12 @@ The stack pointer register (@code{SP}) - @item w - Floating point register, Advanced SIMD vector register or SVE vector register - -+@item x -+Like @code{w}, but restricted to registers 0 to 15 inclusive. -+ -+@item y -+Like @code{w}, but restricted to registers 0 to 7 inclusive. -+ - @item Upl - One of the low eight SVE predicate registers (@code{P0} to @code{P7}) - -@@ -5470,6 +5476,11 @@ mode @var{m} and the scalars have the mode appropriate for one - element of @var{m}. The operation is strictly in-order: there is - no reassociation. - -+@cindex @code{mask_fold_left_plus_@var{m}} instruction pattern -+@item @code{mask_fold_left_plus_@var{m}} -+Like @samp{fold_left_plus_@var{m}}, but takes an additional mask operand -+(operand 3) that specifies which elements of the source vector should be added. -+ - @cindex @code{sdot_prod@var{m}} instruction pattern - @item @samp{sdot_prod@var{m}} - @cindex @code{udot_prod@var{m}} instruction pattern -@@ -5499,6 +5510,44 @@ operand 1. Add operand 1 to operand 2 and place the widened result in - operand 0. (This is used express accumulation of elements into an accumulator - of a wider mode.) - -+@cindex @code{smulhs@var{m3}} instruction pattern -+@item @samp{smulhs@var{m3}} -+@cindex @code{umulhs@var{m3}} instruction pattern -+@itemx @samp{umulhs@var{m3}} -+Signed/unsigned multiply high with scale. This is equivalent to the C code: -+@smallexample -+narrow op0, op1, op2; -+@dots{} -+op0 = (narrow) (((wide) op1 * (wide) op2) >> (N / 2 - 1)); -+@end smallexample -+where the sign of @samp{narrow} determines whether this is a signed -+or unsigned operation, and @var{N} is the size of @samp{wide} in bits. -+ -+@cindex @code{smulhrs@var{m3}} instruction pattern -+@item @samp{smulhrs@var{m3}} -+@cindex @code{umulhrs@var{m3}} instruction pattern -+@itemx @samp{umulhrs@var{m3}} -+Signed/unsigned multiply high with round and scale. This is -+equivalent to the C code: -+@smallexample -+narrow op0, op1, op2; -+@dots{} -+op0 = (narrow) (((((wide) op1 * (wide) op2) >> (N / 2 - 2)) + 1) >> 1); -+@end smallexample -+where the sign of @samp{narrow} determines whether this is a signed -+or unsigned operation, and @var{N} is the size of @samp{wide} in bits. -+ -+@cindex @code{sdiv_pow2@var{m3}} instruction pattern -+@item @samp{sdiv_pow2@var{m3}} -+@cindex @code{sdiv_pow2@var{m3}} instruction pattern -+@itemx @samp{sdiv_pow2@var{m3}} -+Signed division by power-of-2 immediate. Equivalent to: -+@smallexample -+signed op0, op1; -+@dots{} -+op0 = op1 / (1 << imm); -+@end smallexample -+ - @cindex @code{vec_shl_insert_@var{m}} instruction pattern - @item @samp{vec_shl_insert_@var{m}} - Shift the elements in vector input operand 1 left one element (i.e.@: -@@ -6240,13 +6289,13 @@ This pattern is not allowed to @code{FAIL}. - @item @samp{one_cmpl@var{m}2} - Store the bitwise-complement of operand 1 into operand 0. - --@cindex @code{movmem@var{m}} instruction pattern --@item @samp{movmem@var{m}} --Block move instruction. The destination and source blocks of memory -+@cindex @code{cpymem@var{m}} instruction pattern -+@item @samp{cpymem@var{m}} -+Block copy instruction. The destination and source blocks of memory - are the first two operands, and both are @code{mem:BLK}s with an - address in mode @code{Pmode}. - --The number of bytes to move is the third operand, in mode @var{m}. -+The number of bytes to copy is the third operand, in mode @var{m}. - Usually, you specify @code{Pmode} for @var{m}. However, if you can - generate better code knowing the range of valid lengths is smaller than - those representable in a full Pmode pointer, you should provide -@@ -6266,14 +6315,16 @@ in a way that the blocks are not required to be aligned according to it in - all cases. This expected alignment is also in bytes, just like operand 4. - Expected size, when unknown, is set to @code{(const_int -1)}. - --Descriptions of multiple @code{movmem@var{m}} patterns can only be -+Descriptions of multiple @code{cpymem@var{m}} patterns can only be - beneficial if the patterns for smaller modes have fewer restrictions - on their first, second and fourth operands. Note that the mode @var{m} --in @code{movmem@var{m}} does not impose any restriction on the mode of --individually moved data units in the block. -+in @code{cpymem@var{m}} does not impose any restriction on the mode of -+individually copied data units in the block. - --These patterns need not give special consideration to the possibility --that the source and destination strings might overlap. -+The @code{cpymem@var{m}} patterns need not give special consideration -+to the possibility that the source and destination strings might -+overlap. These patterns are used to do inline expansion of -+@code{__builtin_memcpy}. - - @cindex @code{movstr} instruction pattern - @item @samp{movstr} -@@ -6294,7 +6345,7 @@ given as a @code{mem:BLK} whose address is in mode @code{Pmode}. The - number of bytes to set is the second operand, in mode @var{m}. The value to - initialize the memory with is the third operand. Targets that only support the - clearing of memory should reject any value that is not the constant 0. See --@samp{movmem@var{m}} for a discussion of the choice of mode. -+@samp{cpymem@var{m}} for a discussion of the choice of mode. - - The fourth operand is the known alignment of the destination, in the form - of a @code{const_int} rtx. Thus, if the compiler knows that the -@@ -6312,13 +6363,13 @@ Operand 9 is the probable maximal size (i.e.@: we cannot rely on it for - correctness, but it can be used for choosing proper code sequence for a - given size). - --The use for multiple @code{setmem@var{m}} is as for @code{movmem@var{m}}. -+The use for multiple @code{setmem@var{m}} is as for @code{cpymem@var{m}}. - - @cindex @code{cmpstrn@var{m}} instruction pattern - @item @samp{cmpstrn@var{m}} - String compare instruction, with five operands. Operand 0 is the output; - it has mode @var{m}. The remaining four operands are like the operands --of @samp{movmem@var{m}}. The two memory blocks specified are compared -+of @samp{cpymem@var{m}}. The two memory blocks specified are compared - byte by byte in lexicographic order starting at the beginning of each - string. The instruction is not allowed to prefetch more than one byte - at a time since either string may end in the first byte and reading past -@@ -8537,6 +8588,119 @@ functionality as two separate @code{define_insn} and @code{define_split} - patterns. It exists for compactness, and as a maintenance tool to prevent - having to ensure the two patterns' templates match. - -+@findex define_insn_and_rewrite -+It is sometimes useful to have a @code{define_insn_and_split} -+that replaces specific operands of an instruction but leaves the -+rest of the instruction pattern unchanged. You can do this directly -+with a @code{define_insn_and_split}, but it requires a -+@var{new-insn-pattern-1} that repeats most of the original @var{insn-pattern}. -+There is also the complication that an implicit @code{parallel} in -+@var{insn-pattern} must become an explicit @code{parallel} in -+@var{new-insn-pattern-1}, which is easy to overlook. -+A simpler alternative is to use @code{define_insn_and_rewrite}, which -+is a form of @code{define_insn_and_split} that automatically generates -+@var{new-insn-pattern-1} by replacing each @code{match_operand} -+in @var{insn-pattern} with a corresponding @code{match_dup}, and each -+@code{match_operator} in the pattern with a corresponding @code{match_op_dup}. -+The arguments are otherwise identical to @code{define_insn_and_split}: -+ -+@smallexample -+(define_insn_and_rewrite -+ [@var{insn-pattern}] -+ "@var{condition}" -+ "@var{output-template}" -+ "@var{split-condition}" -+ "@var{preparation-statements}" -+ [@var{insn-attributes}]) -+@end smallexample -+ -+The @code{match_dup}s and @code{match_op_dup}s in the new -+instruction pattern use any new operand values that the -+@var{preparation-statements} store in the @code{operands} array, -+as for a normal @code{define_insn_and_split}. @var{preparation-statements} -+can also emit additional instructions before the new instruction. -+They can even emit an entirely different sequence of instructions and -+use @code{DONE} to avoid emitting a new form of the original -+instruction. -+ -+The split in a @code{define_insn_and_rewrite} is only intended -+to apply to existing instructions that match @var{insn-pattern}. -+@var{split-condition} must therefore start with @code{&&}, -+so that the split condition applies on top of @var{condition}. -+ -+Here is an example from the AArch64 SVE port, in which operand 1 is -+known to be equivalent to an all-true constant and isn't used by the -+output template: -+ -+@smallexample -+(define_insn_and_rewrite "*while_ult_cc" -+ [(set (reg:CC CC_REGNUM) -+ (compare:CC -+ (unspec:SI [(match_operand:PRED_ALL 1) -+ (unspec:PRED_ALL -+ [(match_operand:GPI 2 "aarch64_reg_or_zero" "rZ") -+ (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ")] -+ UNSPEC_WHILE_LO)] -+ UNSPEC_PTEST_PTRUE) -+ (const_int 0))) -+ (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") -+ (unspec:PRED_ALL [(match_dup 2) -+ (match_dup 3)] -+ UNSPEC_WHILE_LO))] -+ "TARGET_SVE" -+ "whilelo\t%0., %2, %3" -+ ;; Force the compiler to drop the unused predicate operand, so that we -+ ;; don't have an unnecessary PTRUE. -+ "&& !CONSTANT_P (operands[1])" -+ @{ -+ operands[1] = CONSTM1_RTX (mode); -+ @} -+) -+@end smallexample -+ -+The splitter in this case simply replaces operand 1 with the constant -+value that it is known to have. The equivalent @code{define_insn_and_split} -+would be: -+ -+@smallexample -+(define_insn_and_split "*while_ult_cc" -+ [(set (reg:CC CC_REGNUM) -+ (compare:CC -+ (unspec:SI [(match_operand:PRED_ALL 1) -+ (unspec:PRED_ALL -+ [(match_operand:GPI 2 "aarch64_reg_or_zero" "rZ") -+ (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ")] -+ UNSPEC_WHILE_LO)] -+ UNSPEC_PTEST_PTRUE) -+ (const_int 0))) -+ (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") -+ (unspec:PRED_ALL [(match_dup 2) -+ (match_dup 3)] -+ UNSPEC_WHILE_LO))] -+ "TARGET_SVE" -+ "whilelo\t%0., %2, %3" -+ ;; Force the compiler to drop the unused predicate operand, so that we -+ ;; don't have an unnecessary PTRUE. -+ "&& !CONSTANT_P (operands[1])" -+ [(parallel -+ [(set (reg:CC CC_REGNUM) -+ (compare:CC -+ (unspec:SI [(match_dup 1) -+ (unspec:PRED_ALL [(match_dup 2) -+ (match_dup 3)] -+ UNSPEC_WHILE_LO)] -+ UNSPEC_PTEST_PTRUE) -+ (const_int 0))) -+ (set (match_dup 0) -+ (unspec:PRED_ALL [(match_dup 2) -+ (match_dup 3)] -+ UNSPEC_WHILE_LO))])] -+ @{ -+ operands[1] = CONSTM1_RTX (mode); -+ @} -+) -+@end smallexample -+ - @end ifset - @ifset INTERNALS - @node Including Patterns -@@ -10979,6 +11143,27 @@ Other attributes are defined using: - (define_code_attr @var{name} [(@var{code1} "@var{value1}") @dots{} (@var{coden} "@var{valuen}")]) - @end smallexample - -+Instruction patterns can use code attributes as rtx codes, which can be -+useful if two sets of codes act in tandem. For example, the following -+@code{define_insn} defines two patterns, one calculating a signed absolute -+difference and another calculating an unsigned absolute difference: -+ -+@smallexample -+(define_code_iterator any_max [smax umax]) -+(define_code_attr paired_min [(smax "smin") (umax "umin")]) -+(define_insn @dots{} -+ [(set (match_operand:SI 0 @dots{}) -+ (minus:SI (any_max:SI (match_operand:SI 1 @dots{}) -+ (match_operand:SI 2 @dots{})) -+ (:SI (match_dup 1) (match_dup 2))))] -+ @dots{}) -+@end smallexample -+ -+The signed version of the instruction uses @code{smax} and @code{smin} -+while the unsigned version uses @code{umax} and @code{umin}. There -+are no versions that pair @code{smax} with @code{umin} or @code{umax} -+with @code{smin}. -+ - Here's an example of code iterators in action, taken from the MIPS port: - - @smallexample -@@ -11249,4 +11434,13 @@ name and same types of iterator. For example: - would produce a single set of functions that handles both - @code{INTEGER_MODES} and @code{FLOAT_MODES}. - -+It is also possible for these @samp{@@} patterns to have different -+numbers of operands from each other. For example, patterns with -+a binary rtl code might take three operands (one output and two inputs) -+while patterns with a ternary rtl code might take four operands (one -+output and three inputs). This combination would produce separate -+@samp{maybe_gen_@var{name}} and @samp{gen_@var{name}} functions for -+each operand count, but it would still produce a single -+@samp{maybe_code_for_@var{name}} and a single @samp{code_for_@var{name}}. -+ - @end ifset -diff --git a/gcc/doc/rtl.texi b/gcc/doc/rtl.texi -index f5f2de756..3df798216 100644 ---- a/gcc/doc/rtl.texi -+++ b/gcc/doc/rtl.texi -@@ -3295,18 +3295,6 @@ There is one other known use for clobbering a pseudo register in a - clobbered by the insn. In this case, using the same pseudo register in - the clobber and elsewhere in the insn produces the expected results. - --@findex clobber_high --@item (clobber_high @var{x}) --Represents the storing or possible storing of an unpredictable, --undescribed value into the upper parts of @var{x}. The mode of the expression --represents the lower parts of the register which will not be overwritten. --@code{reg} must be a reg expression. -- --One place this is used is when calling into functions where the registers are --preserved, but only up to a given number of bits. For example when using --Aarch64 SVE, calling a TLS descriptor will cause only the lower 128 bits of --each of the vector registers to be preserved. -- - @findex use - @item (use @var{x}) - Represents the use of the value of @var{x}. It indicates that the -@@ -3341,7 +3329,7 @@ that the register is live. You should think twice before adding - instead. The @code{use} RTX is most commonly useful to describe that - a fixed register is implicitly used in an insn. It is also safe to use - in patterns where the compiler knows for other reasons that the result --of the whole pattern is variable, such as @samp{movmem@var{m}} or -+of the whole pattern is variable, such as @samp{cpymem@var{m}} or - @samp{call} patterns. - - During the reload phase, an insn that has a @code{use} as pattern -@@ -3360,8 +3348,7 @@ Represents several side effects performed in parallel. The square - brackets stand for a vector; the operand of @code{parallel} is a - vector of expressions. @var{x0}, @var{x1} and so on are individual - side effect expressions---expressions of code @code{set}, @code{call}, --@code{return}, @code{simple_return}, @code{clobber} @code{use} or --@code{clobber_high}. -+@code{return}, @code{simple_return}, @code{clobber} or @code{use}. - - ``In parallel'' means that first all the values used in the individual - side-effects are computed, and second all the actual side-effects are -diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi -index 546af7f72..62245c2b3 100644 ---- a/gcc/doc/sourcebuild.texi -+++ b/gcc/doc/sourcebuild.texi -@@ -1439,6 +1439,14 @@ vector alignment. - Target supports both signed and unsigned averaging operations on vectors - of bytes. - -+@item vect_mulhrs_hi -+Target supports both signed and unsigned multiply-high-with-round-and-scale -+operations on vectors of half-words. -+ -+@item vect_sdiv_pow2_si -+Target supports signed division by constant power-of-2 operations -+on vectors of 4-byte integers. -+ - @item vect_condition - Target supports vector conditional operations. - -@@ -1854,6 +1862,16 @@ ARM target supports extensions to generate the @code{VFMAL} and @code{VFMLS} - half-precision floating-point instructions available from ARMv8.2-A and - onwards. Some multilibs may be incompatible with these options. - -+@item arm_v8_2a_bf16_neon_ok -+ARM target supports options to generate instructions from ARMv8.2-A with -+the BFloat16 extension (bf16). Some multilibs may be incompatible with these -+options. -+ -+@item arm_v8_2a_i8mm_ok -+ARM target supports options to generate instructions from ARMv8.2-A with -+the 8-Bit Integer Matrix Multiply extension (i8mm). Some multilibs may be -+incompatible with these options. -+ - @item arm_prefer_ldrd_strd - ARM target prefers @code{LDRD} and @code{STRD} instructions over - @code{LDM} and @code{STM} instructions. -@@ -2663,6 +2681,91 @@ assembly output. - @item scan-not-hidden @var{symbol} [@{ target/xfail @var{selector} @}] - Passes if @var{symbol} is not defined as a hidden symbol in the test's - assembly output. -+ -+@item check-function-bodies @var{prefix} @var{terminator} [@var{option} [@{ target/xfail @var{selector} @}]] -+Looks through the source file for comments that give the expected assembly -+output for selected functions. Each line of expected output starts with the -+prefix string @var{prefix} and the expected output for a function as a whole -+is followed by a line that starts with the string @var{terminator}. -+Specifying an empty terminator is equivalent to specifying @samp{"*/"}. -+ -+If @var{option} is specified, the test only applies to command lines -+that contain @var{option}. This can be useful if a source file is compiled -+both with and without optimization, since it is rarely useful to check the -+assembly output for unoptimized code. -+ -+The first line of the expected output for a function @var{fn} has the form: -+ -+@smallexample -+@var{prefix} @var{fn}: [@{ target/xfail @var{selector} @}] -+@end smallexample -+ -+Subsequent lines of the expected output also start with @var{prefix}. -+In both cases, whitespace after @var{prefix} is not significant. -+ -+The test discards assembly directives such as @code{.cfi_startproc} -+and local label definitions such as @code{.LFB0} from the compiler's -+assembly output. It then matches the result against the expected -+output for a function as a single regular expression. This means that -+later lines can use backslashes to refer back to @samp{(@dots{})} -+captures on earlier lines. For example: -+ -+@smallexample -+/* @{ dg-final @{ check-function-bodies "**" "" "-DCHECK_ASM" @} @} */ -+@dots{} -+/* -+** add_w0_s8_m: -+** mov (z[0-9]+\.b), w0 -+** add z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+svint8_t add_w0_s8_m (@dots{}) @{ @dots{} @} -+@dots{} -+/* -+** add_b0_s8_m: -+** mov (z[0-9]+\.b), b0 -+** add z1\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+svint8_t add_b0_s8_m (@dots{}) @{ @dots{} @} -+@end smallexample -+ -+checks whether the implementations of @code{add_w0_s8_m} and -+@code{add_b0_s8_m} match the regular expressions given. The test only -+runs when @samp{-DCHECK_ASM} is passed on the command line. -+ -+It is possible to create non-capturing multi-line regular expression -+groups of the form @samp{(@var{a}|@var{b}|@dots{})} by putting the -+@samp{(}, @samp{|} and @samp{)} on separate lines (each still using -+@var{prefix}). For example: -+ -+@smallexample -+/* -+** cmple_f16_tied: -+** ( -+** fcmge p0\.h, p0/z, z1\.h, z0\.h -+** | -+** fcmle p0\.h, p0/z, z0\.h, z1\.h -+** ) -+** ret -+*/ -+svbool_t cmple_f16_tied (@dots{}) @{ @dots{} @} -+@end smallexample -+ -+checks whether @code{cmple_f16_tied} is implemented by the -+@code{fcmge} instruction followed by @code{ret} or by the -+@code{fcmle} instruction followed by @code{ret}. The test is -+still a single regular rexpression. -+ -+A line containing just: -+ -+@smallexample -+@var{prefix} ... -+@end smallexample -+ -+stands for zero or more unmatched lines; the whitespace after -+@var{prefix} is again not significant. -+ - @end table - - @subsubsection Scan optimization dump files -diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi -index 73db70867..3f22bb1f6 100644 ---- a/gcc/doc/tm.texi -+++ b/gcc/doc/tm.texi -@@ -1878,6 +1878,9 @@ function calls. - If a register has 0 in @code{CALL_USED_REGISTERS}, the compiler - automatically saves it on function entry and restores it on function - exit, if the register is used within the function. -+ -+Exactly one of @code{CALL_USED_REGISTERS} and @code{CALL_REALLY_USED_REGISTERS} -+must be defined. Modern ports should define @code{CALL_REALLY_USED_REGISTERS}. - @end defmac - - @defmac CALL_REALLY_USED_REGISTERS -@@ -1887,48 +1890,55 @@ exit, if the register is used within the function. - Like @code{CALL_USED_REGISTERS} except this macro doesn't require - that the entire set of @code{FIXED_REGISTERS} be included. - (@code{CALL_USED_REGISTERS} must be a superset of @code{FIXED_REGISTERS}). --This macro is optional. If not specified, it defaults to the value --of @code{CALL_USED_REGISTERS}. -+ -+Exactly one of @code{CALL_USED_REGISTERS} and @code{CALL_REALLY_USED_REGISTERS} -+must be defined. Modern ports should define @code{CALL_REALLY_USED_REGISTERS}. - @end defmac - - @cindex call-used register - @cindex call-clobbered register - @cindex call-saved register --@deftypefn {Target Hook} bool TARGET_HARD_REGNO_CALL_PART_CLOBBERED (rtx_insn *@var{insn}, unsigned int @var{regno}, machine_mode @var{mode}) --This hook should return true if @var{regno} is partly call-saved and --partly call-clobbered, and if a value of mode @var{mode} would be partly --clobbered by call instruction @var{insn}. If @var{insn} is NULL then it --should return true if any call could partly clobber the register. --For example, if the low 32 bits of @var{regno} are preserved across a call --but higher bits are clobbered, this hook should return true for a 64-bit --mode but false for a 32-bit mode. -- --The default implementation returns false, which is correct --for targets that don't have partly call-clobbered registers. -+@deftypefn {Target Hook} {const predefined_function_abi &} TARGET_FNTYPE_ABI (const_tree @var{type}) -+Return the ABI used by a function with type @var{type}; see the -+definition of @code{predefined_function_abi} for details of the ABI -+descriptor. Targets only need to define this hook if they support -+interoperability between several ABIs in the same translation unit. - @end deftypefn - --@deftypefn {Target Hook} void TARGET_REMOVE_EXTRA_CALL_PRESERVED_REGS (rtx_insn *@var{insn}, HARD_REG_SET *@var{used_regs}) --This hook removes registers from the set of call-clobbered registers -- in @var{used_regs} if, contrary to the default rules, something guarantees -- that @samp{insn} preserves those registers. For example, some targets -- support variant ABIs in which functions preserve more registers than -- normal functions would. Removing those extra registers from @var{used_regs} -- can lead to better register allocation. -- -- The default implementation does nothing, which is always safe. -- Defining the hook is purely an optimization. -+@deftypefn {Target Hook} {const predefined_function_abi &} TARGET_INSN_CALLEE_ABI (const rtx_insn *@var{insn}) -+This hook returns a description of the ABI used by the target of -+call instruction @var{insn}; see the definition of -+@code{predefined_function_abi} for details of the ABI descriptor. -+Only the global function @code{insn_callee_abi} should call this hook -+directly. -+ -+Targets only need to define this hook if they support -+interoperability between several ABIs in the same translation unit. - @end deftypefn - --@deftypefn {Target Hook} {rtx_insn *} TARGET_RETURN_CALL_WITH_MAX_CLOBBERS (rtx_insn *@var{call_1}, rtx_insn *@var{call_2}) --This hook returns a pointer to the call that partially clobbers the --most registers. If a platform supports multiple ABIs where the registers --that are partially clobbered may vary, this function compares two --calls and returns a pointer to the one that clobbers the most registers. --If both calls clobber the same registers, @var{call_1} must be returned. -+@cindex call-used register -+@cindex call-clobbered register -+@cindex call-saved register -+@deftypefn {Target Hook} bool TARGET_HARD_REGNO_CALL_PART_CLOBBERED (unsigned int @var{abi_id}, unsigned int @var{regno}, machine_mode @var{mode}) -+ABIs usually specify that calls must preserve the full contents -+of a particular register, or that calls can alter any part of a -+particular register. This information is captured by the target macro -+@code{CALL_REALLY_USED_REGISTERS}. However, some ABIs specify that calls -+must preserve certain bits of a particular register but can alter others. -+This hook should return true if this applies to at least one of the -+registers in @samp{(reg:@var{mode} @var{regno})}, and if as a result the -+call would alter part of the @var{mode} value. For example, if a call -+preserves the low 32 bits of a 64-bit hard register @var{regno} but can -+clobber the upper 32 bits, this hook should return true for a 64-bit mode -+but false for a 32-bit mode. -+ -+The value of @var{abi_id} comes from the @code{predefined_function_abi} -+structure that describes the ABI of the call; see the definition of the -+structure for more details. If (as is usual) the target uses the same ABI -+for all functions in a translation unit, @var{abi_id} is always 0. - --The registers clobbered in different ABIs must be a proper subset or --superset of all other ABIs. @var{call_1} must always be a call insn, --call_2 may be NULL or a call insn. -+The default implementation returns false, which is correct -+for targets that don't have partly call-clobbered registers. - @end deftypefn - - @deftypefn {Target Hook} {const char *} TARGET_GET_MULTILIB_ABI_NAME (void) -@@ -3961,18 +3971,10 @@ This section describes the macros which let you control how various - types of arguments are passed in registers or how they are arranged in - the stack. - --@deftypefn {Target Hook} rtx TARGET_FUNCTION_ARG (cumulative_args_t @var{ca}, machine_mode @var{mode}, const_tree @var{type}, bool @var{named}) --Return an RTX indicating whether a function argument is passed in a --register and if so, which register. -- --The arguments are @var{ca}, which summarizes all the previous --arguments; @var{mode}, the machine mode of the argument; @var{type}, --the data type of the argument as a tree node or 0 if that is not known --(which happens for C support library functions); and @var{named}, --which is @code{true} for an ordinary argument and @code{false} for --nameless arguments that correspond to @samp{@dots{}} in the called --function's prototype. @var{type} can be an incomplete type if a --syntax error has previously occurred. -+@deftypefn {Target Hook} rtx TARGET_FUNCTION_ARG (cumulative_args_t @var{ca}, const function_arg_info @var{&arg}) -+Return an RTX indicating whether function argument @var{arg} is passed -+in a register and if so, which register. Argument @var{ca} summarizes all -+the previous arguments. - - The return value is usually either a @code{reg} RTX for the hard - register in which to pass the argument, or zero to pass the argument -@@ -4020,14 +4022,14 @@ defined, the argument will be computed in the stack and then loaded into - a register. - @end deftypefn - --@deftypefn {Target Hook} bool TARGET_MUST_PASS_IN_STACK (machine_mode @var{mode}, const_tree @var{type}) --This target hook should return @code{true} if we should not pass @var{type} -+@deftypefn {Target Hook} bool TARGET_MUST_PASS_IN_STACK (const function_arg_info @var{&arg}) -+This target hook should return @code{true} if we should not pass @var{arg} - solely in registers. The file @file{expr.h} defines a - definition that is usually appropriate, refer to @file{expr.h} for additional - documentation. - @end deftypefn - --@deftypefn {Target Hook} rtx TARGET_FUNCTION_INCOMING_ARG (cumulative_args_t @var{ca}, machine_mode @var{mode}, const_tree @var{type}, bool @var{named}) -+@deftypefn {Target Hook} rtx TARGET_FUNCTION_INCOMING_ARG (cumulative_args_t @var{ca}, const function_arg_info @var{&arg}) - Define this hook if the caller and callee on the target have different - views of where arguments are passed. Also define this hook if there are - functions that are never directly called, but are invoked by the hardware -@@ -4057,7 +4059,7 @@ Perform a target dependent initialization of pic_offset_table_rtx. - This hook is called at the start of register allocation. - @end deftypefn - --@deftypefn {Target Hook} int TARGET_ARG_PARTIAL_BYTES (cumulative_args_t @var{cum}, machine_mode @var{mode}, tree @var{type}, bool @var{named}) -+@deftypefn {Target Hook} int TARGET_ARG_PARTIAL_BYTES (cumulative_args_t @var{cum}, const function_arg_info @var{&arg}) - This target hook returns the number of bytes at the beginning of an - argument that must be put in registers. The value must be zero for - arguments that are passed entirely in registers or that are entirely -@@ -4076,11 +4078,11 @@ register to be used by the caller for this argument; likewise - @code{TARGET_FUNCTION_INCOMING_ARG}, for the called function. - @end deftypefn - --@deftypefn {Target Hook} bool TARGET_PASS_BY_REFERENCE (cumulative_args_t @var{cum}, machine_mode @var{mode}, const_tree @var{type}, bool @var{named}) --This target hook should return @code{true} if an argument at the -+@deftypefn {Target Hook} bool TARGET_PASS_BY_REFERENCE (cumulative_args_t @var{cum}, const function_arg_info @var{&arg}) -+This target hook should return @code{true} if argument @var{arg} at the - position indicated by @var{cum} should be passed by reference. This - predicate is queried after target independent reasons for being --passed by reference, such as @code{TREE_ADDRESSABLE (type)}. -+passed by reference, such as @code{TREE_ADDRESSABLE (@var{arg}.type)}. - - If the hook returns true, a copy of that argument is made in memory and a - pointer to the argument is passed instead of the argument itself. -@@ -4088,7 +4090,7 @@ The pointer is passed in whatever way is appropriate for passing a pointer - to that type. - @end deftypefn - --@deftypefn {Target Hook} bool TARGET_CALLEE_COPIES (cumulative_args_t @var{cum}, machine_mode @var{mode}, const_tree @var{type}, bool @var{named}) -+@deftypefn {Target Hook} bool TARGET_CALLEE_COPIES (cumulative_args_t @var{cum}, const function_arg_info @var{&arg}) - The function argument described by the parameters to this hook is - known to be passed by reference. The hook should return true if the - function argument should be copied by the callee instead of copied -@@ -4167,10 +4169,9 @@ argument @var{libname} exists for symmetry with - @c --mew 5feb93 i switched the order of the sentences. --mew 10feb93 - @end defmac - --@deftypefn {Target Hook} void TARGET_FUNCTION_ARG_ADVANCE (cumulative_args_t @var{ca}, machine_mode @var{mode}, const_tree @var{type}, bool @var{named}) -+@deftypefn {Target Hook} void TARGET_FUNCTION_ARG_ADVANCE (cumulative_args_t @var{ca}, const function_arg_info @var{&arg}) - This hook updates the summarizer variable pointed to by @var{ca} to --advance past an argument in the argument list. The values @var{mode}, --@var{type} and @var{named} describe that argument. Once this is done, -+advance past argument @var{arg} in the argument list. Once this is done, - the variable @var{cum} is suitable for analyzing the @emph{following} - argument with @code{TARGET_FUNCTION_ARG}, etc. - -@@ -4331,6 +4332,27 @@ insns involving vector mode @var{mode}. At the very least, it - must have move patterns for this mode. - @end deftypefn - -+@deftypefn {Target Hook} bool TARGET_COMPATIBLE_VECTOR_TYPES_P (const_tree @var{type1}, const_tree @var{type2}) -+Return true if there is no target-specific reason for treating -+vector types @var{type1} and @var{type2} as distinct types. The caller -+has already checked for target-independent reasons, meaning that the -+types are known to have the same mode, to have the same number of elements, -+and to have what the caller considers to be compatible element types. -+ -+The main reason for defining this hook is to reject pairs of types -+that are handled differently by the target's calling convention. -+For example, when a new @var{N}-bit vector architecture is added -+to a target, the target may want to handle normal @var{N}-bit -+@code{VECTOR_TYPE} arguments and return values in the same way as -+before, to maintain backwards compatibility. However, it may also -+provide new, architecture-specific @code{VECTOR_TYPE}s that are passed -+and returned in a more efficient way. It is then important to maintain -+a distinction between the ``normal'' @code{VECTOR_TYPE}s and the new -+architecture-specific ones. -+ -+The default implementation returns true, which is correct for most targets. -+@end deftypefn -+ - @deftypefn {Target Hook} opt_machine_mode TARGET_ARRAY_MODE (machine_mode @var{mode}, unsigned HOST_WIDE_INT @var{nelems}) - Return the mode that GCC should use for an array that has - @var{nelems} elements, with each element having mode @var{mode}. -@@ -5202,7 +5224,7 @@ return value of this function should be an RTX that contains the value - to use as the return of @code{__builtin_saveregs}. - @end deftypefn - --@deftypefn {Target Hook} void TARGET_SETUP_INCOMING_VARARGS (cumulative_args_t @var{args_so_far}, machine_mode @var{mode}, tree @var{type}, int *@var{pretend_args_size}, int @var{second_time}) -+@deftypefn {Target Hook} void TARGET_SETUP_INCOMING_VARARGS (cumulative_args_t @var{args_so_far}, const function_arg_info @var{&arg}, int *@var{pretend_args_size}, int @var{second_time}) - This target hook offers an alternative to using - @code{__builtin_saveregs} and defining the hook - @code{TARGET_EXPAND_BUILTIN_SAVEREGS}. Use it to store the anonymous -@@ -5213,8 +5235,8 @@ pass all their arguments on the stack. - - The argument @var{args_so_far} points to the @code{CUMULATIVE_ARGS} data - structure, containing the values that are obtained after processing the --named arguments. The arguments @var{mode} and @var{type} describe the --last named argument---its machine mode and its data type as a tree node. -+named arguments. The argument @var{arg} describes the last of these named -+arguments. - - The target hook should do two things: first, push onto the stack all the - argument registers @emph{not} used for the named arguments, and second, -@@ -5314,12 +5336,6 @@ This hook is used by expand pass to emit insn to store @var{bounds} - returned by function call into @var{slot}. - @end deftypefn - --@deftypefn {Target Hook} void TARGET_SETUP_INCOMING_VARARG_BOUNDS (cumulative_args_t @var{args_so_far}, machine_mode @var{mode}, tree @var{type}, int *@var{pretend_args_size}, int @var{second_time}) --Use it to store bounds for anonymous register arguments stored --into the stack. Arguments meaning is similar to --@code{TARGET_SETUP_INCOMING_VARARGS}. --@end deftypefn -- - @node Trampolines - @section Support for Nested Functions - @cindex support for nested functions -@@ -5967,18 +5983,6 @@ instruction pattern. There is no need for the hook to handle these two - implementation approaches itself. - @end deftypefn - --@deftypefn {Target Hook} tree TARGET_VECTORIZE_BUILTIN_CONVERSION (unsigned @var{code}, tree @var{dest_type}, tree @var{src_type}) --This hook should return the DECL of a function that implements conversion of the --input vector of type @var{src_type} to type @var{dest_type}. --The value of @var{code} is one of the enumerators in @code{enum tree_code} and --specifies how the conversion is to be applied --(truncation, rounding, etc.). -- --If this hook is defined, the autovectorizer will use the --@code{TARGET_VECTORIZE_BUILTIN_CONVERSION} target hook when vectorizing --conversion. Otherwise, it will return @code{NULL_TREE}. --@end deftypefn -- - @deftypefn {Target Hook} tree TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION (unsigned @var{code}, tree @var{vec_type_out}, tree @var{vec_type_in}) - This hook should return the decl of a function that implements the - vectorized variant of the function with the @code{combined_fn} code -@@ -6698,7 +6702,7 @@ two areas of memory, or to set, clear or store to memory, for example - when copying a @code{struct}. The @code{by_pieces} infrastructure - implements such memory operations as a sequence of load, store or move - insns. Alternate strategies are to expand the --@code{movmem} or @code{setmem} optabs, to emit a library call, or to emit -+@code{cpymem} or @code{setmem} optabs, to emit a library call, or to emit - unit-by-unit, loop-based operations. - - This target hook should return true if, for a memory operation with a -@@ -6717,7 +6721,7 @@ optimized for speed rather than size. - - Returning true for higher values of @var{size} can improve code generation - for speed if the target does not provide an implementation of the --@code{movmem} or @code{setmem} standard names, if the @code{movmem} or -+@code{cpymem} or @code{setmem} standard names, if the @code{cpymem} or - @code{setmem} implementation would be more expensive than a sequence of - insns, or if the overhead of a library call would dominate that of - the body of the memory operation. -@@ -11607,6 +11611,21 @@ another @code{CALL_EXPR}. - @var{arglist} really has type @samp{VEC(tree,gc)*} - @end deftypefn - -+@deftypefn {Target Hook} bool TARGET_CHECK_BUILTIN_CALL (location_t @var{loc}, vec @var{arg_loc}, tree @var{fndecl}, tree @var{orig_fndecl}, unsigned int @var{nargs}, tree *@var{args}) -+Perform semantic checking on a call to a machine-specific built-in -+function after its arguments have been constrained to the function -+signature. Return true if the call is valid, otherwise report an error -+and return false. -+ -+This hook is called after @code{TARGET_RESOLVE_OVERLOADED_BUILTIN}. -+The call was originally to built-in function @var{orig_fndecl}, -+but after the optional @code{TARGET_RESOLVE_OVERLOADED_BUILTIN} -+step is now to built-in function @var{fndecl}. @var{loc} is the -+location of the call and @var{args} is an array of function arguments, -+of which there are @var{nargs}. @var{arg_loc} specifies the location -+of each argument. -+@end deftypefn -+ - @deftypefn {Target Hook} tree TARGET_FOLD_BUILTIN (tree @var{fndecl}, int @var{n_args}, tree *@var{argp}, bool @var{ignore}) - Fold a call to a machine specific built-in function that was set up by - @samp{TARGET_INIT_BUILTINS}. @var{fndecl} is the declaration of the -@@ -11791,28 +11810,6 @@ cannot_modify_jumps_past_reload_p () - @end smallexample - @end deftypefn - --@deftypefn {Target Hook} reg_class_t TARGET_BRANCH_TARGET_REGISTER_CLASS (void) --This target hook returns a register class for which branch target register --optimizations should be applied. All registers in this class should be --usable interchangeably. After reload, registers in this class will be --re-allocated and loads will be hoisted out of loops and be subjected --to inter-block scheduling. --@end deftypefn -- --@deftypefn {Target Hook} bool TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED (bool @var{after_prologue_epilogue_gen}) --Branch target register optimization will by default exclude callee-saved --registers --that are not already live during the current function; if this target hook --returns true, they will be included. The target code must than make sure --that all target registers in the class returned by --@samp{TARGET_BRANCH_TARGET_REGISTER_CLASS} that might need saving are --saved. @var{after_prologue_epilogue_gen} indicates if prologues and --epilogues have already been generated. Note, even if you only return --true when @var{after_prologue_epilogue_gen} is false, you still are likely --to have to make special provisions in @code{INITIAL_ELIMINATION_OFFSET} --to reserve space for caller-saved target registers. --@end deftypefn -- - @deftypefn {Target Hook} bool TARGET_HAVE_CONDITIONAL_EXECUTION (void) - This target hook returns true if the target supports conditional execution. - This target hook is required only when the target has several different -diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in -index bc362dca0..89cfb5253 100644 ---- a/gcc/doc/tm.texi.in -+++ b/gcc/doc/tm.texi.in -@@ -1689,6 +1689,9 @@ function calls. - If a register has 0 in @code{CALL_USED_REGISTERS}, the compiler - automatically saves it on function entry and restores it on function - exit, if the register is used within the function. -+ -+Exactly one of @code{CALL_USED_REGISTERS} and @code{CALL_REALLY_USED_REGISTERS} -+must be defined. Modern ports should define @code{CALL_REALLY_USED_REGISTERS}. - @end defmac - - @defmac CALL_REALLY_USED_REGISTERS -@@ -1698,18 +1701,22 @@ exit, if the register is used within the function. - Like @code{CALL_USED_REGISTERS} except this macro doesn't require - that the entire set of @code{FIXED_REGISTERS} be included. - (@code{CALL_USED_REGISTERS} must be a superset of @code{FIXED_REGISTERS}). --This macro is optional. If not specified, it defaults to the value --of @code{CALL_USED_REGISTERS}. -+ -+Exactly one of @code{CALL_USED_REGISTERS} and @code{CALL_REALLY_USED_REGISTERS} -+must be defined. Modern ports should define @code{CALL_REALLY_USED_REGISTERS}. - @end defmac - - @cindex call-used register - @cindex call-clobbered register - @cindex call-saved register --@hook TARGET_HARD_REGNO_CALL_PART_CLOBBERED -+@hook TARGET_FNTYPE_ABI - --@hook TARGET_REMOVE_EXTRA_CALL_PRESERVED_REGS -+@hook TARGET_INSN_CALLEE_ABI - --@hook TARGET_RETURN_CALL_WITH_MAX_CLOBBERS -+@cindex call-used register -+@cindex call-clobbered register -+@cindex call-saved register -+@hook TARGET_HARD_REGNO_CALL_PART_CLOBBERED - - @hook TARGET_GET_MULTILIB_ABI_NAME - -@@ -3362,6 +3369,8 @@ stack. - - @hook TARGET_VECTOR_MODE_SUPPORTED_P - -+@hook TARGET_COMPATIBLE_VECTOR_TYPES_P -+ - @hook TARGET_ARRAY_MODE - - @hook TARGET_ARRAY_MODE_SUPPORTED_P -@@ -3785,8 +3794,6 @@ These machine description macros help implement varargs: - - @hook TARGET_STORE_RETURNED_BOUNDS - --@hook TARGET_SETUP_INCOMING_VARARG_BOUNDS -- - @node Trampolines - @section Support for Nested Functions - @cindex support for nested functions -@@ -4160,8 +4167,6 @@ address; but often a machine-dependent strategy can generate better code. - - @hook TARGET_VECTORIZE_VEC_PERM_CONST - --@hook TARGET_VECTORIZE_BUILTIN_CONVERSION -- - @hook TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION - - @hook TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION -@@ -7934,6 +7939,8 @@ to by @var{ce_info}. - - @hook TARGET_RESOLVE_OVERLOADED_BUILTIN - -+@hook TARGET_CHECK_BUILTIN_CALL -+ - @hook TARGET_FOLD_BUILTIN - - @hook TARGET_GIMPLE_FOLD_BUILTIN -@@ -7999,10 +8006,6 @@ build_type_attribute_variant (@var{mdecl}, - - @hook TARGET_CANNOT_MODIFY_JUMPS_P - --@hook TARGET_BRANCH_TARGET_REGISTER_CLASS -- --@hook TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED -- - @hook TARGET_HAVE_CONDITIONAL_EXECUTION - - @hook TARGET_GEN_CCMP_FIRST -diff --git a/gcc/dse.c b/gcc/dse.c -index 4becdcf1c..874ff507c 100644 ---- a/gcc/dse.c -+++ b/gcc/dse.c -@@ -50,6 +50,7 @@ along with GCC; see the file COPYING3. If not see - #include "params.h" - #include "rtl-iter.h" - #include "cfgcleanup.h" -+#include "calls.h" - - /* This file contains three techniques for performing Dead Store - Elimination (dse). -@@ -819,7 +820,7 @@ emit_inc_dec_insn_before (rtx mem ATTRIBUTE_UNUSED, - for (cur = new_insn; cur; cur = NEXT_INSN (cur)) - { - info.current = cur; -- note_stores (PATTERN (cur), note_add_store, &info); -+ note_stores (cur, note_add_store, &info); - } - - /* If a failure was flagged above, return 1 so that for_each_inc_dec will -@@ -1976,7 +1977,7 @@ replace_read (store_info *store_info, insn_info_t store_insn, - bitmap regs_set = BITMAP_ALLOC (®_obstack); - - for (this_insn = insns; this_insn != NULL_RTX; this_insn = NEXT_INSN (this_insn)) -- note_stores (PATTERN (this_insn), look_for_hardregs, regs_set); -+ note_stores (this_insn, look_for_hardregs, regs_set); - - bitmap_and_into (regs_set, regs_live); - if (!bitmap_empty_p (regs_set)) -@@ -2341,7 +2342,8 @@ get_call_args (rtx call_insn, tree fn, rtx *args, int nargs) - if (!is_int_mode (TYPE_MODE (TREE_VALUE (arg)), &mode)) - return false; - -- reg = targetm.calls.function_arg (args_so_far, mode, NULL_TREE, true); -+ function_arg_info arg (mode, /*named=*/true); -+ reg = targetm.calls.function_arg (args_so_far, arg); - if (!reg || !REG_P (reg) || GET_MODE (reg) != mode) - return false; - -@@ -2373,7 +2375,7 @@ get_call_args (rtx call_insn, tree fn, rtx *args, int nargs) - if (tmp) - args[idx] = tmp; - -- targetm.calls.function_arg_advance (args_so_far, mode, NULL_TREE, true); -+ targetm.calls.function_arg_advance (args_so_far, arg); - } - if (arg != void_list_node || idx != nargs) - return false; -@@ -2388,7 +2390,7 @@ copy_fixed_regs (const_bitmap in) - bitmap ret; - - ret = ALLOC_REG_SET (NULL); -- bitmap_and (ret, in, fixed_reg_set_regset); -+ bitmap_and (ret, in, bitmap_view (fixed_reg_set)); - return ret; - } - -diff --git a/gcc/dwarf2out.c b/gcc/dwarf2out.c -index 30c4c7007..a219d7fc3 100644 ---- a/gcc/dwarf2out.c -+++ b/gcc/dwarf2out.c -@@ -16428,7 +16428,6 @@ mem_loc_descriptor (rtx rtl, machine_mode mode, - case CONST_FIXED: - case CLRSB: - case CLOBBER: -- case CLOBBER_HIGH: - break; - - case CONST_STRING: -@@ -18566,6 +18565,24 @@ loc_list_from_tree_1 (tree loc, int want_address, - } - break; - -+ case POLY_INT_CST: -+ { -+ if (want_address) -+ { -+ expansion_failed (loc, NULL_RTX, -+ "constant address with a runtime component"); -+ return 0; -+ } -+ poly_int64 value; -+ if (!poly_int_tree_p (loc, &value)) -+ { -+ expansion_failed (loc, NULL_RTX, "constant too big"); -+ return 0; -+ } -+ ret = int_loc_descriptor (value); -+ } -+ break; -+ - case CONSTRUCTOR: - case REAL_CST: - case STRING_CST: -@@ -19682,6 +19699,7 @@ add_const_value_attribute (dw_die_ref die, rtx rtl) - case MINUS: - case SIGN_EXTEND: - case ZERO_EXTEND: -+ case CONST_POLY_INT: - return false; - - case MEM: -diff --git a/gcc/emit-rtl.c b/gcc/emit-rtl.c -index 78104603c..d6636ccb0 100644 ---- a/gcc/emit-rtl.c -+++ b/gcc/emit-rtl.c -@@ -2865,7 +2865,6 @@ verify_rtx_sharing (rtx orig, rtx insn) - /* SCRATCH must be shared because they represent distinct values. */ - return; - case CLOBBER: -- case CLOBBER_HIGH: - /* Share clobbers of hard registers (like cc0), but do not share pseudo reg - clobbers or clobbers of hard registers that originated as pseudos. - This is needed to allow safe register renaming. */ -@@ -3119,7 +3118,6 @@ repeat: - /* SCRATCH must be shared because they represent distinct values. */ - return; - case CLOBBER: -- case CLOBBER_HIGH: - /* Share clobbers of hard registers (like cc0), but do not share pseudo reg - clobbers or clobbers of hard registers that originated as pseudos. - This is needed to allow safe register renaming. */ -@@ -5693,7 +5691,6 @@ copy_insn_1 (rtx orig) - case SIMPLE_RETURN: - return orig; - case CLOBBER: -- case CLOBBER_HIGH: - /* Share clobbers of hard registers (like cc0), but do not share pseudo reg - clobbers or clobbers of hard registers that originated as pseudos. - This is needed to allow safe register renaming. */ -@@ -6505,21 +6502,6 @@ gen_hard_reg_clobber (machine_mode mode, unsigned int regno) - gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (mode, regno))); - } - --static GTY((deletable)) rtx --hard_reg_clobbers_high[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER]; -- --/* Return a CLOBBER_HIGH expression for register REGNO that clobbers MODE, -- caching into HARD_REG_CLOBBERS_HIGH. */ --rtx --gen_hard_reg_clobber_high (machine_mode mode, unsigned int regno) --{ -- if (hard_reg_clobbers_high[mode][regno]) -- return hard_reg_clobbers_high[mode][regno]; -- else -- return (hard_reg_clobbers_high[mode][regno] -- = gen_rtx_CLOBBER_HIGH (VOIDmode, gen_rtx_REG (mode, regno))); --} -- - location_t prologue_location; - location_t epilogue_location; - -diff --git a/gcc/emit-rtl.h b/gcc/emit-rtl.h -index 7b1cecd3c..573140e84 100644 ---- a/gcc/emit-rtl.h -+++ b/gcc/emit-rtl.h -@@ -22,6 +22,7 @@ along with GCC; see the file COPYING3. If not see - - struct temp_slot; - typedef struct temp_slot *temp_slot_p; -+struct predefined_function_abi; - - /* Information mainlined about RTL representation of incoming arguments. */ - struct GTY(()) incoming_args { -@@ -64,6 +65,14 @@ struct GTY(()) rtl_data { - struct function_subsections subsections; - struct rtl_eh eh; - -+ /* The ABI of the function, i.e. the interface it presents to its callers. -+ This is the ABI that should be queried to see which registers the -+ function needs to save before it uses them. -+ -+ Other functions (including those called by this function) might use -+ different ABIs. */ -+ const predefined_function_abi *GTY((skip)) abi; -+ - /* For function.c */ - - /* # of bytes of outgoing arguments. If ACCUMULATE_OUTGOING_ARGS is -diff --git a/gcc/expr.c b/gcc/expr.c -index 650be8dad..b77f0409e 100644 ---- a/gcc/expr.c -+++ b/gcc/expr.c -@@ -73,7 +73,7 @@ along with GCC; see the file COPYING3. If not see - int cse_not_expected; - - static bool block_move_libcall_safe_for_call_parm (void); --static bool emit_block_move_via_movmem (rtx, rtx, rtx, unsigned, unsigned, HOST_WIDE_INT, -+static bool emit_block_move_via_cpymem (rtx, rtx, rtx, unsigned, unsigned, HOST_WIDE_INT, - unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT, - unsigned HOST_WIDE_INT); - static void emit_block_move_via_loop (rtx, rtx, rtx, unsigned); -@@ -1645,7 +1645,7 @@ emit_block_move_hints (rtx x, rtx y, rtx size, enum block_op_methods method, - - if (CONST_INT_P (size) && can_move_by_pieces (INTVAL (size), align)) - move_by_pieces (x, y, INTVAL (size), align, RETURN_BEGIN); -- else if (emit_block_move_via_movmem (x, y, size, align, -+ else if (emit_block_move_via_cpymem (x, y, size, align, - expected_align, expected_size, - min_size, max_size, probable_max_size)) - ; -@@ -1723,31 +1723,28 @@ block_move_libcall_safe_for_call_parm (void) - for ( ; arg != void_list_node ; arg = TREE_CHAIN (arg)) - { - machine_mode mode = TYPE_MODE (TREE_VALUE (arg)); -- rtx tmp = targetm.calls.function_arg (args_so_far, mode, -- NULL_TREE, true); -+ function_arg_info arg_info (mode, /*named=*/true); -+ rtx tmp = targetm.calls.function_arg (args_so_far, arg_info); - if (!tmp || !REG_P (tmp)) - return false; -- if (targetm.calls.arg_partial_bytes (args_so_far, mode, NULL, 1)) -+ if (targetm.calls.arg_partial_bytes (args_so_far, arg_info)) - return false; -- targetm.calls.function_arg_advance (args_so_far, mode, -- NULL_TREE, true); -+ targetm.calls.function_arg_advance (args_so_far, arg_info); - } - } - return true; - } - --/* A subroutine of emit_block_move. Expand a movmem pattern; -+/* A subroutine of emit_block_move. Expand a cpymem pattern; - return true if successful. */ - - static bool --emit_block_move_via_movmem (rtx x, rtx y, rtx size, unsigned int align, -+emit_block_move_via_cpymem (rtx x, rtx y, rtx size, unsigned int align, - unsigned int expected_align, HOST_WIDE_INT expected_size, - unsigned HOST_WIDE_INT min_size, - unsigned HOST_WIDE_INT max_size, - unsigned HOST_WIDE_INT probable_max_size) - { -- int save_volatile_ok = volatile_ok; -- - if (expected_align < align) - expected_align = align; - if (expected_size != -1) -@@ -1759,7 +1756,7 @@ emit_block_move_via_movmem (rtx x, rtx y, rtx size, unsigned int align, - } - - /* Since this is a move insn, we don't care about volatility. */ -- volatile_ok = 1; -+ temporary_volatile_ok v (true); - - /* Try the most limited insn first, because there's no point - including more than one in the machine description unless -@@ -1769,7 +1766,7 @@ emit_block_move_via_movmem (rtx x, rtx y, rtx size, unsigned int align, - FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT) - { - scalar_int_mode mode = mode_iter.require (); -- enum insn_code code = direct_optab_handler (movmem_optab, mode); -+ enum insn_code code = direct_optab_handler (cpymem_optab, mode); - - if (code != CODE_FOR_nothing - /* We don't need MODE to be narrower than BITS_PER_HOST_WIDE_INT -@@ -1823,14 +1820,10 @@ emit_block_move_via_movmem (rtx x, rtx y, rtx size, unsigned int align, - create_fixed_operand (&ops[8], NULL); - } - if (maybe_expand_insn (code, nops, ops)) -- { -- volatile_ok = save_volatile_ok; -- return true; -- } -+ return true; - } - } - -- volatile_ok = save_volatile_ok; - return false; - } - -@@ -5841,7 +5834,8 @@ store_expr (tree exp, rtx target, int call_param_p, - copy_blkmode_from_reg (target, temp, TREE_TYPE (exp)); - else - store_bit_field (target, -- INTVAL (expr_size (exp)) * BITS_PER_UNIT, -+ rtx_to_poly_int64 (expr_size (exp)) -+ * BITS_PER_UNIT, - 0, 0, 0, GET_MODE (temp), temp, reverse); - } - else -diff --git a/gcc/final.c b/gcc/final.c -index fefc4874b..7cf9ef1ef 100644 ---- a/gcc/final.c -+++ b/gcc/final.c -@@ -81,6 +81,7 @@ along with GCC; see the file COPYING3. If not see - #include "asan.h" - #include "rtl-iter.h" - #include "print-rtl.h" -+#include "function-abi.h" - - #ifdef XCOFF_DEBUGGING_INFO - #include "xcoffout.h" /* Needed for external data declarations. */ -@@ -230,7 +231,6 @@ static int alter_cond (rtx); - #endif - static int align_fuzz (rtx, rtx, int, unsigned); - static void collect_fn_hard_reg_usage (void); --static tree get_call_fndecl (rtx_insn *); - - /* Initialize data in final at the beginning of a compilation. */ - -@@ -4994,7 +4994,16 @@ collect_fn_hard_reg_usage (void) - if (!targetm.call_fusage_contains_non_callee_clobbers) - return; - -- CLEAR_HARD_REG_SET (function_used_regs); -+ /* Be conservative - mark fixed and global registers as used. */ -+ function_used_regs = fixed_reg_set; -+ -+#ifdef STACK_REGS -+ /* Handle STACK_REGS conservatively, since the df-framework does not -+ provide accurate information for them. */ -+ -+ for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) -+ SET_HARD_REG_BIT (function_used_regs, i); -+#endif - - for (insn = get_insns (); insn != NULL_RTX; insn = next_insn (insn)) - { -@@ -5005,97 +5014,23 @@ collect_fn_hard_reg_usage (void) - - if (CALL_P (insn) - && !self_recursive_call_p (insn)) -- { -- if (!get_call_reg_set_usage (insn, &insn_used_regs, -- call_used_reg_set)) -- return; -- -- IOR_HARD_REG_SET (function_used_regs, insn_used_regs); -- } -+ function_used_regs -+ |= insn_callee_abi (insn).full_and_partial_reg_clobbers (); - - find_all_hard_reg_sets (insn, &insn_used_regs, false); -- IOR_HARD_REG_SET (function_used_regs, insn_used_regs); -- } -+ function_used_regs |= insn_used_regs; - -- /* Be conservative - mark fixed and global registers as used. */ -- IOR_HARD_REG_SET (function_used_regs, fixed_reg_set); -- --#ifdef STACK_REGS -- /* Handle STACK_REGS conservatively, since the df-framework does not -- provide accurate information for them. */ -- -- for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) -- SET_HARD_REG_BIT (function_used_regs, i); --#endif -+ if (hard_reg_set_subset_p (crtl->abi->full_and_partial_reg_clobbers (), -+ function_used_regs)) -+ return; -+ } - -- /* The information we have gathered is only interesting if it exposes a -- register from the call_used_regs that is not used in this function. */ -- if (hard_reg_set_subset_p (call_used_reg_set, function_used_regs)) -- return; -+ /* Mask out fully-saved registers, so that they don't affect equality -+ comparisons between function_abis. */ -+ function_used_regs &= crtl->abi->full_and_partial_reg_clobbers (); - - node = cgraph_node::rtl_info (current_function_decl); - gcc_assert (node != NULL); - -- COPY_HARD_REG_SET (node->function_used_regs, function_used_regs); -- node->function_used_regs_valid = 1; --} -- --/* Get the declaration of the function called by INSN. */ -- --static tree --get_call_fndecl (rtx_insn *insn) --{ -- rtx note, datum; -- -- note = find_reg_note (insn, REG_CALL_DECL, NULL_RTX); -- if (note == NULL_RTX) -- return NULL_TREE; -- -- datum = XEXP (note, 0); -- if (datum != NULL_RTX) -- return SYMBOL_REF_DECL (datum); -- -- return NULL_TREE; --} -- --/* Return the cgraph_rtl_info of the function called by INSN. Returns NULL for -- call targets that can be overwritten. */ -- --static struct cgraph_rtl_info * --get_call_cgraph_rtl_info (rtx_insn *insn) --{ -- tree fndecl; -- -- if (insn == NULL_RTX) -- return NULL; -- -- fndecl = get_call_fndecl (insn); -- if (fndecl == NULL_TREE -- || !decl_binds_to_current_def_p (fndecl)) -- return NULL; -- -- return cgraph_node::rtl_info (fndecl); --} -- --/* Find hard registers used by function call instruction INSN, and return them -- in REG_SET. Return DEFAULT_SET in REG_SET if not found. */ -- --bool --get_call_reg_set_usage (rtx_insn *insn, HARD_REG_SET *reg_set, -- HARD_REG_SET default_set) --{ -- if (flag_ipa_ra) -- { -- struct cgraph_rtl_info *node = get_call_cgraph_rtl_info (insn); -- if (node != NULL -- && node->function_used_regs_valid) -- { -- COPY_HARD_REG_SET (*reg_set, node->function_used_regs); -- AND_HARD_REG_SET (*reg_set, default_set); -- return true; -- } -- } -- COPY_HARD_REG_SET (*reg_set, default_set); -- targetm.remove_extra_call_preserved_regs (insn, reg_set); -- return false; -+ node->function_used_regs = function_used_regs; - } -diff --git a/gcc/fold-const-call.c b/gcc/fold-const-call.c -index 702c8b405..e21d8e110 100644 ---- a/gcc/fold-const-call.c -+++ b/gcc/fold-const-call.c -@@ -689,6 +689,36 @@ fold_const_vec_convert (tree ret_type, tree arg) - return elts.build (); - } - -+/* Try to evaluate: -+ -+ IFN_WHILE_ULT (ARG0, ARG1, (TYPE) { ... }) -+ -+ Return the value on success and null on failure. */ -+ -+static tree -+fold_while_ult (tree type, poly_uint64 arg0, poly_uint64 arg1) -+{ -+ if (known_ge (arg0, arg1)) -+ return build_zero_cst (type); -+ -+ if (maybe_ge (arg0, arg1)) -+ return NULL_TREE; -+ -+ poly_uint64 diff = arg1 - arg0; -+ poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (type); -+ if (known_ge (diff, nelts)) -+ return build_all_ones_cst (type); -+ -+ unsigned HOST_WIDE_INT const_diff; -+ if (known_le (diff, nelts) && diff.is_constant (&const_diff)) -+ { -+ tree minus_one = build_minus_one_cst (TREE_TYPE (type)); -+ tree zero = build_zero_cst (TREE_TYPE (type)); -+ return build_vector_a_then_b (type, const_diff, minus_one, zero); -+ } -+ return NULL_TREE; -+} -+ - /* Try to evaluate: - - *RESULT = FN (*ARG) -@@ -1782,6 +1812,14 @@ fold_const_call (combined_fn fn, tree type, tree arg0, tree arg1, tree arg2) - } - return NULL_TREE; - -+ case CFN_WHILE_ULT: -+ { -+ poly_uint64 parg0, parg1; -+ if (poly_int_tree_p (arg0, &parg0) && poly_int_tree_p (arg1, &parg1)) -+ return fold_while_ult (type, parg0, parg1); -+ return NULL_TREE; -+ } -+ - default: - return fold_const_call_1 (fn, type, arg0, arg1, arg2); - } -diff --git a/gcc/fold-const.c b/gcc/fold-const.c -index c717f2450..ffc2669a7 100644 ---- a/gcc/fold-const.c -+++ b/gcc/fold-const.c -@@ -3477,7 +3477,8 @@ operand_equal_p (const_tree arg0, const_tree arg1, unsigned int flags) - return (TREE_CODE (arg0) == FUNCTION_DECL - && fndecl_built_in_p (arg0) && fndecl_built_in_p (arg1) - && DECL_BUILT_IN_CLASS (arg0) == DECL_BUILT_IN_CLASS (arg1) -- && DECL_FUNCTION_CODE (arg0) == DECL_FUNCTION_CODE (arg1)); -+ && (DECL_UNCHECKED_FUNCTION_CODE (arg0) -+ == DECL_UNCHECKED_FUNCTION_CODE (arg1))); - - case tcc_exceptional: - if (TREE_CODE (arg0) == CONSTRUCTOR) -@@ -7380,22 +7381,18 @@ native_encode_complex (const_tree expr, unsigned char *ptr, int len, int off) - return rsize + isize; - } - -- --/* Subroutine of native_encode_expr. Encode the VECTOR_CST -- specified by EXPR into the buffer PTR of length LEN bytes. -- Return the number of bytes placed in the buffer, or zero -- upon failure. */ -+/* Like native_encode_vector, but only encode the first COUNT elements. -+ The other arguments are as for native_encode_vector. */ - - static int --native_encode_vector (const_tree expr, unsigned char *ptr, int len, int off) -+native_encode_vector_part (const_tree expr, unsigned char *ptr, int len, -+ int off, unsigned HOST_WIDE_INT count) - { -- unsigned HOST_WIDE_INT i, count; -+ unsigned HOST_WIDE_INT i; - int size, offset; - tree itype, elem; - - offset = 0; -- if (!VECTOR_CST_NELTS (expr).is_constant (&count)) -- return 0; - itype = TREE_TYPE (TREE_TYPE (expr)); - size = GET_MODE_SIZE (SCALAR_TYPE_MODE (itype)); - for (i = 0; i < count; i++) -@@ -7419,6 +7416,20 @@ native_encode_vector (const_tree expr, unsigned char *ptr, int len, int off) - return offset; - } - -+/* Subroutine of native_encode_expr. Encode the VECTOR_CST -+ specified by EXPR into the buffer PTR of length LEN bytes. -+ Return the number of bytes placed in the buffer, or zero -+ upon failure. */ -+ -+static int -+native_encode_vector (const_tree expr, unsigned char *ptr, int len, int off) -+{ -+ unsigned HOST_WIDE_INT count; -+ if (!VECTOR_CST_NELTS (expr).is_constant (&count)) -+ return 0; -+ return native_encode_vector_part (expr, ptr, len, off, count); -+} -+ - - /* Subroutine of native_encode_expr. Encode the STRING_CST - specified by EXPR into the buffer PTR of length LEN bytes. -@@ -7714,6 +7725,113 @@ can_native_interpret_type_p (tree type) - } - } - -+/* Read a vector of type TYPE from the target memory image given by BYTES, -+ starting at byte FIRST_BYTE. The vector is known to be encodable using -+ NPATTERNS interleaved patterns with NELTS_PER_PATTERN elements each, -+ and BYTES is known to have enough bytes to supply NPATTERNS * -+ NELTS_PER_PATTERN vector elements. Each element of BYTES contains -+ BITS_PER_UNIT bits and the bytes are in target memory order. -+ -+ Return the vector on success, otherwise return null. */ -+ -+static tree -+native_decode_vector_tree (tree type, vec bytes, -+ unsigned int first_byte, unsigned int npatterns, -+ unsigned int nelts_per_pattern) -+{ -+ tree_vector_builder builder (type, npatterns, nelts_per_pattern); -+ tree elt_type = TREE_TYPE (type); -+ unsigned int elt_bits = tree_to_uhwi (TYPE_SIZE (elt_type)); -+ if (VECTOR_BOOLEAN_TYPE_P (type) && elt_bits <= BITS_PER_UNIT) -+ { -+ /* This is the only case in which elements can be smaller than a byte. -+ Element 0 is always in the lsb of the containing byte. */ -+ elt_bits = TYPE_PRECISION (elt_type); -+ for (unsigned int i = 0; i < builder.encoded_nelts (); ++i) -+ { -+ unsigned int bit_index = first_byte * BITS_PER_UNIT + i * elt_bits; -+ unsigned int byte_index = bit_index / BITS_PER_UNIT; -+ unsigned int lsb = bit_index % BITS_PER_UNIT; -+ builder.quick_push (bytes[byte_index] & (1 << lsb) -+ ? build_all_ones_cst (elt_type) -+ : build_zero_cst (elt_type)); -+ } -+ } -+ else -+ { -+ unsigned int elt_bytes = elt_bits / BITS_PER_UNIT; -+ for (unsigned int i = 0; i < builder.encoded_nelts (); ++i) -+ { -+ tree elt = native_interpret_expr (elt_type, &bytes[first_byte], -+ elt_bytes); -+ if (!elt) -+ return NULL_TREE; -+ builder.quick_push (elt); -+ first_byte += elt_bytes; -+ } -+ } -+ return builder.build (); -+} -+ -+/* Try to view-convert VECTOR_CST EXPR to VECTOR_TYPE TYPE by operating -+ directly on the VECTOR_CST encoding, in a way that works for variable- -+ length vectors. Return the resulting VECTOR_CST on success or null -+ on failure. */ -+ -+static tree -+fold_view_convert_vector_encoding (tree type, tree expr) -+{ -+ tree expr_type = TREE_TYPE (expr); -+ poly_uint64 type_bits, expr_bits; -+ if (!poly_int_tree_p (TYPE_SIZE (type), &type_bits) -+ || !poly_int_tree_p (TYPE_SIZE (expr_type), &expr_bits)) -+ return NULL_TREE; -+ -+ poly_uint64 type_units = TYPE_VECTOR_SUBPARTS (type); -+ poly_uint64 expr_units = TYPE_VECTOR_SUBPARTS (expr_type); -+ unsigned int type_elt_bits = vector_element_size (type_bits, type_units); -+ unsigned int expr_elt_bits = vector_element_size (expr_bits, expr_units); -+ -+ /* We can only preserve the semantics of a stepped pattern if the new -+ vector element is an integer of the same size. */ -+ if (VECTOR_CST_STEPPED_P (expr) -+ && (!INTEGRAL_TYPE_P (type) || type_elt_bits != expr_elt_bits)) -+ return NULL_TREE; -+ -+ /* The number of bits needed to encode one element from every pattern -+ of the original vector. */ -+ unsigned int expr_sequence_bits -+ = VECTOR_CST_NPATTERNS (expr) * expr_elt_bits; -+ -+ /* The number of bits needed to encode one element from every pattern -+ of the result. */ -+ unsigned int type_sequence_bits -+ = least_common_multiple (expr_sequence_bits, type_elt_bits); -+ -+ /* Don't try to read more bytes than are available, which can happen -+ for constant-sized vectors if TYPE has larger elements than EXPR_TYPE. -+ The general VIEW_CONVERT handling can cope with that case, so there's -+ no point complicating things here. */ -+ unsigned int nelts_per_pattern = VECTOR_CST_NELTS_PER_PATTERN (expr); -+ unsigned int buffer_bytes = CEIL (nelts_per_pattern * type_sequence_bits, -+ BITS_PER_UNIT); -+ unsigned int buffer_bits = buffer_bytes * BITS_PER_UNIT; -+ if (known_gt (buffer_bits, expr_bits)) -+ return NULL_TREE; -+ -+ /* Get enough bytes of EXPR to form the new encoding. */ -+ auto_vec buffer (buffer_bytes); -+ buffer.quick_grow (buffer_bytes); -+ if (native_encode_vector_part (expr, buffer.address (), buffer_bytes, 0, -+ buffer_bits / expr_elt_bits) -+ != (int) buffer_bytes) -+ return NULL_TREE; -+ -+ /* Reencode the bytes as TYPE. */ -+ unsigned int type_npatterns = type_sequence_bits / type_elt_bits; -+ return native_decode_vector_tree (type, buffer, 0, type_npatterns, -+ nelts_per_pattern); -+} - - /* Fold a VIEW_CONVERT_EXPR of a constant expression EXPR to type - TYPE at compile-time. If we're unable to perform the conversion -@@ -7730,6 +7848,10 @@ fold_view_convert_expr (tree type, tree expr) - if (CHAR_BIT != 8 || BITS_PER_UNIT != 8) - return NULL_TREE; - -+ if (VECTOR_TYPE_P (type) && TREE_CODE (expr) == VECTOR_CST) -+ if (tree res = fold_view_convert_vector_encoding (type, expr)) -+ return res; -+ - len = native_encode_expr (expr, buffer, sizeof (buffer)); - if (len == 0) - return NULL_TREE; -@@ -9030,7 +9152,7 @@ vec_cst_ctor_to_array (tree arg, unsigned int nelts, tree *elts) - selector. Return the folded VECTOR_CST or CONSTRUCTOR if successful, - NULL_TREE otherwise. */ - --static tree -+tree - fold_vec_perm (tree type, tree arg0, tree arg1, const vec_perm_indices &sel) - { - unsigned int i; -@@ -9254,7 +9376,7 @@ tree_expr_nonzero_warnv_p (tree t, bool *strict_overflow_p) - tree fndecl = get_callee_fndecl (t); - if (!fndecl) return false; - if (flag_delete_null_pointer_checks && !flag_check_new -- && DECL_IS_OPERATOR_NEW (fndecl) -+ && DECL_IS_OPERATOR_NEW_P (fndecl) - && !TREE_NOTHROW (fndecl)) - return true; - if (flag_delete_null_pointer_checks -@@ -11778,7 +11900,10 @@ fold_ternary_loc (location_t loc, enum tree_code code, tree type, - return NULL_TREE; - - case VEC_PERM_EXPR: -- if (TREE_CODE (arg2) == VECTOR_CST) -+ /* Perform constant folding of BIT_INSERT_EXPR. */ -+ if (TREE_CODE (arg2) == VECTOR_CST -+ && TREE_CODE (op0) == VECTOR_CST -+ && TREE_CODE (op1) == VECTOR_CST) - { - /* Build a vector of integers from the tree mask. */ - vec_perm_builder builder; -@@ -11789,61 +11914,7 @@ fold_ternary_loc (location_t loc, enum tree_code code, tree type, - poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (type); - bool single_arg = (op0 == op1); - vec_perm_indices sel (builder, single_arg ? 1 : 2, nelts); -- -- /* Check for cases that fold to OP0 or OP1 in their original -- element order. */ -- if (sel.series_p (0, 1, 0, 1)) -- return op0; -- if (sel.series_p (0, 1, nelts, 1)) -- return op1; -- -- if (!single_arg) -- { -- if (sel.all_from_input_p (0)) -- op1 = op0; -- else if (sel.all_from_input_p (1)) -- { -- op0 = op1; -- sel.rotate_inputs (1); -- } -- } -- -- if ((TREE_CODE (op0) == VECTOR_CST -- || TREE_CODE (op0) == CONSTRUCTOR) -- && (TREE_CODE (op1) == VECTOR_CST -- || TREE_CODE (op1) == CONSTRUCTOR)) -- { -- tree t = fold_vec_perm (type, op0, op1, sel); -- if (t != NULL_TREE) -- return t; -- } -- -- bool changed = (op0 == op1 && !single_arg); -- -- /* Generate a canonical form of the selector. */ -- if (arg2 == op2 && sel.encoding () != builder) -- { -- /* Some targets are deficient and fail to expand a single -- argument permutation while still allowing an equivalent -- 2-argument version. */ -- if (sel.ninputs () == 2 -- || can_vec_perm_const_p (TYPE_MODE (type), sel, false)) -- op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel); -- else -- { -- vec_perm_indices sel2 (builder, 2, nelts); -- if (can_vec_perm_const_p (TYPE_MODE (type), sel2, false)) -- op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel2); -- else -- /* Not directly supported with either encoding, -- so use the preferred form. */ -- op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel); -- } -- changed = true; -- } -- -- if (changed) -- return build3_loc (loc, VEC_PERM_EXPR, type, op0, op1, op2); -+ return fold_vec_perm (type, op0, op1, sel); - } - return NULL_TREE; - -diff --git a/gcc/fold-const.h b/gcc/fold-const.h -index e2e662463..1d94e2894 100644 ---- a/gcc/fold-const.h -+++ b/gcc/fold-const.h -@@ -100,6 +100,9 @@ extern tree fold_bit_and_mask (tree, tree, enum tree_code, - tree, enum tree_code, tree, tree, - tree, enum tree_code, tree, tree, tree *); - extern tree fold_read_from_constant_string (tree); -+#if GCC_VEC_PERN_INDICES_H -+extern tree fold_vec_perm (tree, tree, tree, const vec_perm_indices &); -+#endif - extern bool wide_int_binop (wide_int &res, enum tree_code, - const wide_int &arg1, const wide_int &arg2, - signop, wi::overflow_type *); -diff --git a/gcc/function-abi.cc b/gcc/function-abi.cc -new file mode 100644 -index 000000000..b4a183963 ---- /dev/null -+++ b/gcc/function-abi.cc -@@ -0,0 +1,260 @@ -+/* Information about fuunction binary interfaces. -+ Copyright (C) 2019 Free Software Foundation, Inc. -+ -+This file is part of GCC -+ -+GCC is free software; you can redistribute it and/or modify it under -+the terms of the GNU General Public License as published by the Free -+Software Foundation; either version 3, or (at your option) any later -+version. -+ -+GCC is distributed in the hope that it will be useful, but WITHOUT ANY -+WARRANTY; without even the implied warranty of MERCHANTABILITY or -+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+for more details. -+ -+You should have received a copy of the GNU General Public License -+along with GCC; see the file COPYING3. If not see -+. */ -+ -+#include "config.h" -+#include "system.h" -+#include "coretypes.h" -+#include "backend.h" -+#include "target.h" -+#include "rtl.h" -+#include "tree.h" -+#include "regs.h" -+#include "function-abi.h" -+#include "varasm.h" -+#include "cgraph.h" -+ -+target_function_abi_info default_target_function_abi_info; -+#if SWITCHABLE_TARGET -+target_function_abi_info *this_target_function_abi_info -+ = &default_target_function_abi_info; -+#endif -+ -+/* Initialize a predefined function ABI with the given values of -+ ID and FULL_REG_CLOBBERS. */ -+ -+void -+predefined_function_abi::initialize (unsigned int id, -+ const_hard_reg_set full_reg_clobbers) -+{ -+ m_id = id; -+ m_initialized = true; -+ m_full_reg_clobbers = full_reg_clobbers; -+ -+ /* Set up the value of m_full_and_partial_reg_clobbers. -+ -+ If the ABI specifies that part of a hard register R is call-clobbered, -+ we should be able to find a single-register mode M for which -+ targetm.hard_regno_call_part_clobbered (m_id, R, M) is true. -+ In other words, it shouldn't be the case that R can hold all -+ single-register modes across a call, but can't hold part of -+ a multi-register mode. -+ -+ If that assumption doesn't hold for a future target, we would need -+ to change the interface of TARGET_HARD_REGNO_CALL_PART_CLOBBERED so -+ that it tells us which registers in a multi-register value are -+ actually clobbered. */ -+ m_full_and_partial_reg_clobbers = full_reg_clobbers; -+ for (unsigned int i = 0; i < NUM_MACHINE_MODES; ++i) -+ { -+ machine_mode mode = (machine_mode) i; -+ for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno) -+ if (targetm.hard_regno_mode_ok (regno, mode) -+ && hard_regno_nregs (regno, mode) == 1 -+ && targetm.hard_regno_call_part_clobbered (m_id, regno, mode)) -+ SET_HARD_REG_BIT (m_full_and_partial_reg_clobbers, regno); -+ } -+ -+ /* For each mode MODE, work out which registers are unable to hold -+ any part of a MODE value across a call, i.e. those for which no -+ overlapping call-preserved (reg:MODE REGNO) exists. -+ -+ We assume that this can be flipped around to say that a call -+ preserves (reg:MODE REGNO) unless the register overlaps this set. -+ The usual reason for this being true is that if (reg:MODE REGNO) -+ contains a part-clobbered register, that register would be -+ part-clobbered regardless of which part of MODE it holds. -+ For example, if (reg:M 2) occupies two registers and if the -+ register 3 portion of it is part-clobbered, (reg:M 3) is usually -+ either invalid or also part-clobbered. */ -+ for (unsigned int i = 0; i < NUM_MACHINE_MODES; ++i) -+ { -+ machine_mode mode = (machine_mode) i; -+ m_mode_clobbers[i] = m_full_and_partial_reg_clobbers; -+ for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno) -+ if (targetm.hard_regno_mode_ok (regno, mode) -+ && !overlaps_hard_reg_set_p (m_full_reg_clobbers, mode, regno) -+ && !targetm.hard_regno_call_part_clobbered (m_id, regno, mode)) -+ remove_from_hard_reg_set (&m_mode_clobbers[i], mode, regno); -+ } -+ -+ /* Check that the assumptions above actually hold, i.e. that testing -+ for single-register modes makes sense, and that overlap tests for -+ mode_clobbers work as expected. */ -+ if (flag_checking) -+ for (unsigned int i = 0; i < NUM_MACHINE_MODES; ++i) -+ { -+ machine_mode mode = (machine_mode) i; -+ const_hard_reg_set all_clobbers = m_full_and_partial_reg_clobbers; -+ for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno) -+ if (targetm.hard_regno_mode_ok (regno, mode) -+ && !overlaps_hard_reg_set_p (m_full_reg_clobbers, mode, regno) -+ && targetm.hard_regno_call_part_clobbered (m_id, regno, mode)) -+ gcc_assert (overlaps_hard_reg_set_p (all_clobbers, mode, regno) -+ && overlaps_hard_reg_set_p (m_mode_clobbers[i], -+ mode, regno)); -+ } -+} -+ -+/* If the ABI has been initialized, add REGNO to the set of registers -+ that can be completely altered by a call. */ -+ -+void -+predefined_function_abi::add_full_reg_clobber (unsigned int regno) -+{ -+ if (!m_initialized) -+ return; -+ -+ SET_HARD_REG_BIT (m_full_reg_clobbers, regno); -+ SET_HARD_REG_BIT (m_full_and_partial_reg_clobbers, regno); -+ for (unsigned int i = 0; i < NUM_MACHINE_MODES; ++i) -+ SET_HARD_REG_BIT (m_mode_clobbers[i], regno); -+} -+ -+/* Return the set of registers that the caller of the recorded functions must -+ save in order to honor the requirements of CALLER_ABI. */ -+ -+HARD_REG_SET -+function_abi_aggregator:: -+caller_save_regs (const function_abi &caller_abi) const -+{ -+ HARD_REG_SET result; -+ CLEAR_HARD_REG_SET (result); -+ for (unsigned int abi_id = 0; abi_id < NUM_ABI_IDS; ++abi_id) -+ { -+ const predefined_function_abi &callee_abi = function_abis[abi_id]; -+ -+ /* Skip cases that clearly aren't problematic. */ -+ if (abi_id == caller_abi.id () -+ || hard_reg_set_empty_p (m_abi_clobbers[abi_id])) -+ continue; -+ -+ /* Collect the set of registers that can be "more clobbered" by -+ CALLEE_ABI than by CALLER_ABI. */ -+ HARD_REG_SET extra_clobbers; -+ CLEAR_HARD_REG_SET (extra_clobbers); -+ for (unsigned int i = 0; i < NUM_MACHINE_MODES; ++i) -+ { -+ machine_mode mode = (machine_mode) i; -+ extra_clobbers |= (callee_abi.mode_clobbers (mode) -+ & ~caller_abi.mode_clobbers (mode)); -+ } -+ -+ /* Restrict it to the set of registers that we actually saw -+ clobbers for (e.g. taking -fipa-ra into account). */ -+ result |= (extra_clobbers & m_abi_clobbers[abi_id]); -+ } -+ return result; -+} -+ -+/* Return the set of registers that cannot be used to hold a value of -+ mode MODE across the calls in a region described by ABIS and MASK, where: -+ -+ * Bit ID of ABIS is set if the region contains a call with -+ function_abi identifier ID. -+ -+ * MASK contains all the registers that are fully or partially -+ clobbered by calls in the region. -+ -+ This is not quite as accurate as testing each individual call, -+ but it's a close and conservatively-correct approximation. -+ It's much better for some targets than just using MASK. */ -+ -+HARD_REG_SET -+call_clobbers_in_region (unsigned int abis, const_hard_reg_set mask, -+ machine_mode mode) -+{ -+ HARD_REG_SET result; -+ CLEAR_HARD_REG_SET (result); -+ for (unsigned int id = 0; abis; abis >>= 1, ++id) -+ if (abis & 1) -+ result |= function_abis[id].mode_clobbers (mode); -+ return result & mask; -+} -+ -+/* Return the predefined ABI used by functions with type TYPE. */ -+ -+const predefined_function_abi & -+fntype_abi (const_tree type) -+{ -+ gcc_assert (FUNC_OR_METHOD_TYPE_P (type)); -+ if (targetm.calls.fntype_abi) -+ return targetm.calls.fntype_abi (type); -+ return default_function_abi; -+} -+ -+/* Return the ABI of function decl FNDECL. */ -+ -+function_abi -+fndecl_abi (const_tree fndecl) -+{ -+ gcc_assert (TREE_CODE (fndecl) == FUNCTION_DECL); -+ const predefined_function_abi &base_abi = fntype_abi (TREE_TYPE (fndecl)); -+ -+ if (flag_ipa_ra && decl_binds_to_current_def_p (fndecl)) -+ if (cgraph_rtl_info *info = cgraph_node::rtl_info (fndecl)) -+ return function_abi (base_abi, info->function_used_regs); -+ -+ return base_abi; -+} -+ -+/* Return the ABI of the function called by INSN. */ -+ -+function_abi -+insn_callee_abi (const rtx_insn *insn) -+{ -+ gcc_assert (insn && CALL_P (insn)); -+ -+ if (flag_ipa_ra) -+ if (tree fndecl = get_call_fndecl (insn)) -+ return fndecl_abi (fndecl); -+ -+ if (targetm.calls.insn_callee_abi) -+ return targetm.calls.insn_callee_abi (insn); -+ -+ return default_function_abi; -+} -+ -+/* Return the ABI of the function called by CALL_EXPR EXP. Return the -+ default ABI for erroneous calls. */ -+ -+function_abi -+expr_callee_abi (const_tree exp) -+{ -+ gcc_assert (TREE_CODE (exp) == CALL_EXPR); -+ -+ if (tree fndecl = get_callee_fndecl (exp)) -+ return fndecl_abi (fndecl); -+ -+ tree callee = CALL_EXPR_FN (exp); -+ if (callee == error_mark_node) -+ return default_function_abi; -+ -+ tree type = TREE_TYPE (callee); -+ if (type == error_mark_node) -+ return default_function_abi; -+ -+ if (POINTER_TYPE_P (type)) -+ { -+ type = TREE_TYPE (type); -+ if (type == error_mark_node) -+ return default_function_abi; -+ } -+ -+ return fntype_abi (type); -+} -diff --git a/gcc/function-abi.h b/gcc/function-abi.h -new file mode 100644 -index 000000000..96a49dfbe ---- /dev/null -+++ b/gcc/function-abi.h -@@ -0,0 +1,320 @@ -+/* Information about fuunction binary interfaces. -+ Copyright (C) 2019 Free Software Foundation, Inc. -+ -+This file is part of GCC -+ -+GCC is free software; you can redistribute it and/or modify it under -+the terms of the GNU General Public License as published by the Free -+Software Foundation; either version 3, or (at your option) any later -+version. -+ -+GCC is distributed in the hope that it will be useful, but WITHOUT ANY -+WARRANTY; without even the implied warranty of MERCHANTABILITY or -+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+for more details. -+ -+You should have received a copy of the GNU General Public License -+along with GCC; see the file COPYING3. If not see -+. */ -+ -+#ifndef GCC_FUNCTION_ABI_H -+#define GCC_FUNCTION_ABI_H -+ -+/* Most targets use the same ABI for all functions in a translation -+ unit, but some targets support interoperability between several ABIs. -+ Each such ABI has a unique 0-based identifier, with 0 always being -+ the default choice of ABI. -+ -+ NUM_ABI_IDS is the maximum number of such ABIs that GCC can handle at once. -+ A bitfield with this number of bits can represent any combinaion of the -+ supported ABIs. */ -+const size_t NUM_ABI_IDS = 8; -+ -+/* Information about one of the target's predefined ABIs. */ -+class predefined_function_abi -+{ -+public: -+ /* A target-specific identifier for this ABI. The value must be in -+ the range [0, NUM_ABI_IDS - 1]. */ -+ unsigned int id () const { return m_id; } -+ -+ /* True if this ABI has been initialized. */ -+ bool initialized_p () const { return m_initialized; } -+ -+ /* Return true if a function call is allowed to alter every bit of -+ register REGNO, so that the register contains an arbitrary value -+ on return. If so, the register cannot hold any part of a value -+ that is live across a call. */ -+ bool -+ clobbers_full_reg_p (unsigned int regno) const -+ { -+ return TEST_HARD_REG_BIT (m_full_reg_clobbers, regno); -+ } -+ -+ /* Return true if a function call is allowed to alter some or all bits -+ of register REGNO. -+ -+ This is true whenever clobbers_full_reg_p (REGNO) is true. It is -+ also true if, for example, the ABI says that a call must preserve the -+ low 32 or 64 bits of REGNO, but can clobber the upper bits of REGNO. -+ In the latter case, it is possible for REGNO to hold values that -+ are live across a call, provided that the value occupies only the -+ call-preserved part of the register. */ -+ bool -+ clobbers_at_least_part_of_reg_p (unsigned int regno) const -+ { -+ return TEST_HARD_REG_BIT (m_full_and_partial_reg_clobbers, regno); -+ } -+ -+ /* Return true if a function call is allowed to clobber at least part -+ of (reg:MODE REGNO). If so, it is not possible for the register -+ as a whole to be live across a call. */ -+ bool -+ clobbers_reg_p (machine_mode mode, unsigned int regno) const -+ { -+ return overlaps_hard_reg_set_p (m_mode_clobbers[mode], mode, regno); -+ } -+ -+ /* Return the set of registers that a function call is allowed to -+ alter completely, so that the registers contain arbitrary values -+ on return. This doesn't include registers that a call can only -+ partly clobber (as per TARGET_HARD_REGNO_CALL_PART_CLOBBERED). -+ -+ These registers cannot hold any part of a value that is live across -+ a call. */ -+ HARD_REG_SET full_reg_clobbers () const { return m_full_reg_clobbers; } -+ -+ /* Return the set of registers that a function call is allowed to alter -+ to some degree. For example, if an ABI says that a call must preserve -+ the low 32 or 64 bits of a register R, but can clobber the upper bits -+ of R, R would be in this set but not in full_reg_clobbers (). -+ -+ This set is a superset of full_reg_clobbers (). It is possible for a -+ register in full_and_partial_reg_clobbers () & ~full_reg_clobbers () -+ to contain values that are live across a call, provided that the live -+ value only occupies the call-preserved part of the register. */ -+ HARD_REG_SET -+ full_and_partial_reg_clobbers () const -+ { -+ return m_full_and_partial_reg_clobbers; -+ } -+ -+ /* Return the set of registers that cannot be used to hold a value of -+ mode MODE across a function call. That is: -+ -+ (reg:REGNO MODE) -+ -+ might be clobbered by a call whenever: -+ -+ overlaps_hard_reg_set (mode_clobbers (MODE), MODE, REGNO) -+ -+ In allocation terms, the registers in the returned set conflict -+ with any value of mode MODE that is live across a call. */ -+ HARD_REG_SET -+ mode_clobbers (machine_mode mode) const -+ { -+ return m_mode_clobbers[mode]; -+ } -+ -+ void initialize (unsigned int, const_hard_reg_set); -+ void add_full_reg_clobber (unsigned int); -+ -+private: -+ unsigned int m_id : NUM_ABI_IDS; -+ unsigned int m_initialized : 1; -+ HARD_REG_SET m_full_reg_clobbers; -+ HARD_REG_SET m_full_and_partial_reg_clobbers; -+ HARD_REG_SET m_mode_clobbers[NUM_MACHINE_MODES]; -+}; -+ -+/* Describes either a predefined ABI or the ABI of a particular function. -+ In the latter case, the ABI might make use of extra function-specific -+ information, such as for -fipa-ra. */ -+class function_abi -+{ -+public: -+ /* Initialize the structure for a general function with the given ABI. */ -+ function_abi (const predefined_function_abi &base_abi) -+ : m_base_abi (&base_abi), -+ m_mask (base_abi.full_and_partial_reg_clobbers ()) {} -+ -+ /* Initialize the structure for a function that has the given ABI and -+ that is known not to clobber registers outside MASK. */ -+ function_abi (const predefined_function_abi &base_abi, -+ const_hard_reg_set mask) -+ : m_base_abi (&base_abi), m_mask (mask) {} -+ -+ /* The predefined ABI from which this ABI is derived. */ -+ const predefined_function_abi &base_abi () const { return *m_base_abi; } -+ -+ /* The target-specific identifier of the predefined ABI. */ -+ unsigned int id () const { return m_base_abi->id (); } -+ -+ /* See the corresponding predefined_function_abi functions for -+ details about the following functions. */ -+ -+ HARD_REG_SET -+ full_reg_clobbers () const -+ { -+ return m_mask & m_base_abi->full_reg_clobbers (); -+ } -+ -+ HARD_REG_SET -+ full_and_partial_reg_clobbers () const -+ { -+ return m_mask & m_base_abi->full_and_partial_reg_clobbers (); -+ } -+ -+ HARD_REG_SET -+ mode_clobbers (machine_mode mode) const -+ { -+ return m_mask & m_base_abi->mode_clobbers (mode); -+ } -+ -+ bool -+ clobbers_full_reg_p (unsigned int regno) const -+ { -+ return (TEST_HARD_REG_BIT (m_mask, regno) -+ & m_base_abi->clobbers_full_reg_p (regno)); -+ } -+ -+ bool -+ clobbers_at_least_part_of_reg_p (unsigned int regno) const -+ { -+ return (TEST_HARD_REG_BIT (m_mask, regno) -+ & m_base_abi->clobbers_at_least_part_of_reg_p (regno)); -+ } -+ -+ bool -+ clobbers_reg_p (machine_mode mode, unsigned int regno) const -+ { -+ return overlaps_hard_reg_set_p (mode_clobbers (mode), mode, regno); -+ } -+ -+ bool -+ operator== (const function_abi &other) const -+ { -+ return m_base_abi == other.m_base_abi && m_mask == other.m_mask; -+ } -+ -+ bool -+ operator!= (const function_abi &other) const -+ { -+ return !operator== (other); -+ } -+ -+protected: -+ const predefined_function_abi *m_base_abi; -+ HARD_REG_SET m_mask; -+}; -+ -+/* This class collects information about the ABIs of functions that are -+ called in a particular region of code. It is mostly intended to be -+ used as a local variable during an IR walk. */ -+class function_abi_aggregator -+{ -+public: -+ function_abi_aggregator () : m_abi_clobbers () {} -+ -+ /* Record that the code region calls a function with the given ABI. */ -+ void -+ note_callee_abi (const function_abi &abi) -+ { -+ m_abi_clobbers[abi.id ()] |= abi.full_and_partial_reg_clobbers (); -+ } -+ -+ HARD_REG_SET caller_save_regs (const function_abi &) const; -+ -+private: -+ HARD_REG_SET m_abi_clobbers[NUM_ABI_IDS]; -+}; -+ -+struct target_function_abi_info -+{ -+ /* An array of all the target ABIs that are available in this -+ translation unit. Not all entries are used for all targets, -+ but the structures are relatively small, and using a fixed-size -+ array avoids extra indirection. -+ -+ There are various ways of getting an ABI descriptor: -+ -+ * fndecl_abi (FNDECL) is the ABI of function FNDECL. -+ -+ * fntype_abi (FNTYPE) is the ABI of a function with type FNTYPE. -+ -+ * crtl->abi is the ABI of the function that we are currently -+ compiling to rtl. -+ -+ * insn_callee_abi (INSN) is the ABI used by the target of call insn INSN. -+ -+ * eh_edge_abi is the "ABI" used when taking an EH edge from an -+ exception-throwing statement to an exception handler. Catching -+ exceptions from calls can be treated as an abnormal return from -+ those calls, and this ABI therefore describes the ABI of functions -+ on such an abnormal return. Statements that throw non-call -+ exceptions can be treated as being implicitly wrapped in a call -+ that has such an abnormal return. -+ -+ At present, no target needs to support more than one EH ABI. -+ -+ * function_abis[N] is the ABI with identifier N. This can be useful -+ when referring back to ABIs that have been collected by number in -+ a bitmask, such as after walking function calls in a particular -+ region of code. -+ -+ * default_function_abi refers specifically to the target's default -+ choice of ABI, regardless of which (if any) functions actually -+ use it. This ABI and data derived from it do *not* provide -+ globally conservatively-correct information, so it is only -+ useful in very specific circumstances. */ -+ predefined_function_abi x_function_abis[NUM_ABI_IDS]; -+}; -+ -+extern target_function_abi_info default_target_function_abi_info; -+#if SWITCHABLE_TARGET -+extern target_function_abi_info *this_target_function_abi_info; -+#else -+#define this_target_function_abi_info (&default_target_function_abi_info) -+#endif -+ -+/* See the comment above x_function_abis for when these macros should be used. -+ At present, eh_edge_abi is always the default ABI, but that could change -+ in future if a target needs it to. */ -+#define function_abis \ -+ (this_target_function_abi_info->x_function_abis) -+#define default_function_abi \ -+ (this_target_function_abi_info->x_function_abis[0]) -+#define eh_edge_abi default_function_abi -+ -+extern HARD_REG_SET call_clobbers_in_region (unsigned int, const_hard_reg_set, -+ machine_mode mode); -+ -+/* Return true if (reg:MODE REGNO) might be clobbered by one of the -+ calls in a region described by ABIS and MASK, where: -+ -+ * Bit ID of ABIS is set if the region contains a call with -+ function_abi identifier ID. -+ -+ * MASK contains all the registers that are fully or partially -+ clobbered by calls in the region. -+ -+ This is not quite as accurate as testing each individual call, -+ but it's a close and conservatively-correct approximation. -+ It's much better for some targets than: -+ -+ overlaps_hard_reg_set_p (MASK, MODE, REGNO). */ -+ -+inline bool -+call_clobbered_in_region_p (unsigned int abis, const_hard_reg_set mask, -+ machine_mode mode, unsigned int regno) -+{ -+ HARD_REG_SET clobbers = call_clobbers_in_region (abis, mask, mode); -+ return overlaps_hard_reg_set_p (clobbers, mode, regno); -+} -+ -+extern const predefined_function_abi &fntype_abi (const_tree); -+extern function_abi fndecl_abi (const_tree); -+extern function_abi insn_callee_abi (const rtx_insn *); -+extern function_abi expr_callee_abi (const_tree); -+ -+#endif -diff --git a/gcc/function.c b/gcc/function.c -index acf9f9e60..6d5574244 100644 ---- a/gcc/function.c -+++ b/gcc/function.c -@@ -79,6 +79,7 @@ along with GCC; see the file COPYING3. If not see - #include "attribs.h" - #include "gimple.h" - #include "options.h" -+#include "function-abi.h" - - /* So we can assign to cfun in this file. */ - #undef cfun -@@ -2121,7 +2122,7 @@ aggregate_value_p (const_tree exp, const_tree fntype) - regno = REGNO (reg); - nregs = hard_regno_nregs (regno, TYPE_MODE (type)); - for (i = 0; i < nregs; i++) -- if (! call_used_regs[regno + i]) -+ if (! call_used_or_fixed_reg_p (regno + i)) - return 1; - - return 0; -@@ -2454,13 +2455,15 @@ assign_parm_find_data_types (struct assign_parm_data_all *all, tree parm, - passed_type = TREE_TYPE (first_field (passed_type)); - - /* See if this arg was passed by invisible reference. */ -- if (pass_by_reference (&all->args_so_far_v, passed_mode, -- passed_type, data->named_arg)) -- { -- passed_type = nominal_type = build_pointer_type (passed_type); -- data->passed_pointer = true; -- passed_mode = nominal_mode = TYPE_MODE (nominal_type); -- } -+ { -+ function_arg_info arg (passed_type, passed_mode, data->named_arg); -+ if (apply_pass_by_reference_rules (&all->args_so_far_v, arg)) -+ { -+ passed_type = nominal_type = arg.type; -+ data->passed_pointer = true; -+ passed_mode = nominal_mode = arg.mode; -+ } -+ } - - /* Find mode as it is passed by the ABI. */ - unsignedp = TYPE_UNSIGNED (passed_type); -@@ -2483,9 +2486,9 @@ assign_parms_setup_varargs (struct assign_parm_data_all *all, - { - int varargs_pretend_bytes = 0; - -- targetm.calls.setup_incoming_varargs (all->args_so_far, -- data->promoted_mode, -- data->passed_type, -+ function_arg_info last_named_arg (data->passed_type, data->promoted_mode, -+ /*named=*/true); -+ targetm.calls.setup_incoming_varargs (all->args_so_far, last_named_arg, - &varargs_pretend_bytes, no_rtl); - - /* If the back-end has requested extra stack space, record how much is -@@ -2515,11 +2518,9 @@ assign_parm_find_entry_rtl (struct assign_parm_data_all *all, - targetm.calls.warn_parameter_passing_abi (all->args_so_far, - data->passed_type); - -- entry_parm = targetm.calls.function_incoming_arg (all->args_so_far, -- data->promoted_mode, -- data->passed_type, -- data->named_arg); -- -+ function_arg_info arg (data->passed_type, data->promoted_mode, -+ data->named_arg); -+ entry_parm = targetm.calls.function_incoming_arg (all->args_so_far, arg); - if (entry_parm == 0) - data->promoted_mode = data->passed_mode; - -@@ -2542,27 +2543,26 @@ assign_parm_find_entry_rtl (struct assign_parm_data_all *all, - if (targetm.calls.pretend_outgoing_varargs_named (all->args_so_far)) - { - rtx tem; -+ function_arg_info named_arg (data->passed_type, data->promoted_mode, -+ /*named=*/true); - tem = targetm.calls.function_incoming_arg (all->args_so_far, -- data->promoted_mode, -- data->passed_type, true); -+ named_arg); - in_regs = tem != NULL; - } - } - - /* If this parameter was passed both in registers and in the stack, use - the copy on the stack. */ -- if (targetm.calls.must_pass_in_stack (data->promoted_mode, -- data->passed_type)) -+ if (targetm.calls.must_pass_in_stack (arg)) - entry_parm = 0; - - if (entry_parm) - { - int partial; - -- partial = targetm.calls.arg_partial_bytes (all->args_so_far, -- data->promoted_mode, -- data->passed_type, -- data->named_arg); -+ function_arg_info arg (data->passed_type, data->promoted_mode, -+ data->named_arg); -+ partial = targetm.calls.arg_partial_bytes (all->args_so_far, arg); - data->partial = partial; - - /* The caller might already have allocated stack space for the -@@ -3226,8 +3226,7 @@ assign_parm_setup_reg (struct assign_parm_data_all *all, tree parm, - for (insn = insns; insn && moved; insn = NEXT_INSN (insn)) - { - if (INSN_P (insn)) -- note_stores (PATTERN (insn), record_hard_reg_sets, -- &hardregs); -+ note_stores (insn, record_hard_reg_sets, &hardregs); - if (!hard_reg_set_empty_p (hardregs)) - moved = false; - } -@@ -3647,8 +3646,9 @@ assign_parms (tree fndecl) - assign_parms_setup_varargs (&all, &data, false); - - /* Update info on where next arg arrives in registers. */ -- targetm.calls.function_arg_advance (all.args_so_far, data.promoted_mode, -- data.passed_type, data.named_arg); -+ function_arg_info arg (data.passed_type, data.promoted_mode, -+ data.named_arg); -+ targetm.calls.function_arg_advance (all.args_so_far, arg); - } - - if (targetm.calls.split_complex_arg) -@@ -3835,8 +3835,9 @@ gimplify_parameters (gimple_seq *cleanup) - continue; - - /* Update info on where next arg arrives in registers. */ -- targetm.calls.function_arg_advance (all.args_so_far, data.promoted_mode, -- data.passed_type, data.named_arg); -+ function_arg_info arg (data.passed_type, data.promoted_mode, -+ data.named_arg); -+ targetm.calls.function_arg_advance (all.args_so_far, arg); - - /* ??? Once upon a time variable_size stuffed parameter list - SAVE_EXPRs (amongst others) onto a pending sizes list. This -@@ -3854,8 +3855,8 @@ gimplify_parameters (gimple_seq *cleanup) - if (data.passed_pointer) - { - tree type = TREE_TYPE (data.passed_type); -- if (reference_callee_copied (&all.args_so_far_v, TYPE_MODE (type), -- type, data.named_arg)) -+ function_arg_info orig_arg (type, data.named_arg); -+ if (reference_callee_copied (&all.args_so_far_v, orig_arg)) - { - tree local, t; - -@@ -4823,6 +4824,12 @@ static void - prepare_function_start (void) - { - gcc_assert (!get_last_insn ()); -+ -+ if (in_dummy_function) -+ crtl->abi = &default_function_abi; -+ else -+ crtl->abi = &fndecl_abi (cfun->decl).base_abi (); -+ - init_temp_slots (); - init_emit (); - init_varasm_status (); -diff --git a/gcc/fwprop.c b/gcc/fwprop.c -index f2966fada..e6f375271 100644 ---- a/gcc/fwprop.c -+++ b/gcc/fwprop.c -@@ -740,7 +740,7 @@ propagate_rtx (rtx x, machine_mode mode, rtx old_rtx, rtx new_rtx, - || CONSTANT_P (new_rtx) - || (GET_CODE (new_rtx) == SUBREG - && REG_P (SUBREG_REG (new_rtx)) -- && !paradoxical_subreg_p (mode, GET_MODE (SUBREG_REG (new_rtx))))) -+ && !paradoxical_subreg_p (new_rtx))) - flags |= PR_CAN_APPEAR; - if (!varying_mem_p (new_rtx)) - flags |= PR_HANDLE_MEM; -diff --git a/gcc/gcc.c b/gcc/gcc.c -index 4f57765b0..1a5ad7db3 100644 ---- a/gcc/gcc.c -+++ b/gcc/gcc.c -@@ -4041,6 +4041,10 @@ driver_handle_option (struct gcc_options *opts, - diagnostic_color_init (dc, value); - break; - -+ case OPT_fdiagnostics_urls_: -+ diagnostic_urls_init (dc, value); -+ break; -+ - case OPT_fdiagnostics_format_: - diagnostic_output_format_init (dc, - (enum diagnostics_output_format)value); -@@ -7438,6 +7442,7 @@ driver::global_initializations () - - diagnostic_initialize (global_dc, 0); - diagnostic_color_init (global_dc); -+ diagnostic_urls_init (global_dc); - - #ifdef GCC_DRIVER_HOST_INITIALIZATION - /* Perform host dependent initialization when needed. */ -diff --git a/gcc/gcse-common.c b/gcc/gcse-common.c -index e6e4b642b..55148623f 100644 ---- a/gcc/gcse-common.c -+++ b/gcc/gcse-common.c -@@ -89,7 +89,7 @@ record_last_mem_set_info_common (rtx_insn *insn, - struct gcse_note_stores_info data; - data.insn = insn; - data.canon_mem_list = canon_modify_mem_list; -- note_stores (PATTERN (insn), canon_list_insert, (void*) &data); -+ note_stores (insn, canon_list_insert, (void*) &data); - } - } - -diff --git a/gcc/gcse.c b/gcc/gcse.c -index 7fbdd6750..373ba7a16 100644 ---- a/gcc/gcse.c -+++ b/gcc/gcse.c -@@ -1049,7 +1049,7 @@ load_killed_in_block_p (const_basic_block bb, int uid_limit, const_rtx x, - note_stores to examine each hunk of memory that is modified. */ - mci.mem = x; - mci.conflict = false; -- note_stores (PATTERN (setter), mems_conflict_for_gcse_p, &mci); -+ note_stores (setter, mems_conflict_for_gcse_p, &mci); - if (mci.conflict) - return 1; - } -@@ -1537,7 +1537,7 @@ compute_hash_table_work (struct gcse_hash_table_d *table) - record_last_mem_set_info (insn); - } - -- note_stores (PATTERN (insn), record_last_set_info, insn); -+ note_stores (insn, record_last_set_info, insn); - } - - /* The next pass builds the hash table. */ -@@ -2415,7 +2415,7 @@ single_set_gcse (rtx_insn *insn) - - s.insn = insn; - s.nsets = 0; -- note_stores (pattern, record_set_data, &s); -+ note_pattern_stores (pattern, record_set_data, &s); - - /* Considered invariant insns have exactly one set. */ - gcc_assert (s.nsets == 1); -diff --git a/gcc/genconfig.c b/gcc/genconfig.c -index 194fe950d..6f914b1e4 100644 ---- a/gcc/genconfig.c -+++ b/gcc/genconfig.c -@@ -72,7 +72,6 @@ walk_insn_part (rtx part, int recog_p, int non_pc_set_src) - switch (code) - { - case CLOBBER: -- case CLOBBER_HIGH: - clobbers_seen_this_insn++; - break; - -diff --git a/gcc/genemit.c b/gcc/genemit.c -index 83f86a35c..e03af01f2 100644 ---- a/gcc/genemit.c -+++ b/gcc/genemit.c -@@ -169,15 +169,6 @@ gen_exp (rtx x, enum rtx_code subroutine_type, char *used, md_rtx_info *info) - return; - } - break; -- case CLOBBER_HIGH: -- if (!REG_P (XEXP (x, 0))) -- error ("CLOBBER_HIGH argument is not a register expr, at %s:%d", -- info->loc.filename, info->loc.lineno); -- printf ("gen_hard_reg_clobber_high (%smode, %i)", -- GET_MODE_NAME (GET_MODE (XEXP (x, 0))), -- REGNO (XEXP (x, 0))); -- return; -- break; - case CC0: - printf ("cc0_rtx"); - return; -@@ -343,8 +334,7 @@ gen_insn (md_rtx_info *info) - - for (i = XVECLEN (insn, 1) - 1; i > 0; i--) - { -- if (GET_CODE (XVECEXP (insn, 1, i)) != CLOBBER -- && GET_CODE (XVECEXP (insn, 1, i)) != CLOBBER_HIGH) -+ if (GET_CODE (XVECEXP (insn, 1, i)) != CLOBBER) - break; - - if (REG_P (XEXP (XVECEXP (insn, 1, i), 0))) -@@ -811,42 +801,45 @@ handle_overloaded_code_for (overloaded_name *oname) - static void - handle_overloaded_gen (overloaded_name *oname) - { -+ unsigned HOST_WIDE_INT seen = 0; - /* All patterns must have the same number of operands. */ -- pattern_stats stats; -- get_pattern_stats (&stats, XVEC (oname->first_instance->insn, 1)); - for (overloaded_instance *instance = oname->first_instance->next; - instance; instance = instance->next) - { -- pattern_stats stats2; -- get_pattern_stats (&stats2, XVEC (instance->insn, 1)); -- if (stats.num_generator_args != stats2.num_generator_args) -- fatal_at (get_file_location (instance->insn), -- "inconsistent number of operands for '%s'; " -- "this instance has %d, but previous instances had %d", -- oname->name, stats2.num_generator_args, -- stats.num_generator_args); -+ pattern_stats stats; -+ get_pattern_stats (&stats, XVEC (instance->insn, 1)); -+ unsigned HOST_WIDE_INT mask -+ = HOST_WIDE_INT_1U << stats.num_generator_args; -+ if (seen & mask) -+ continue; -+ -+ seen |= mask; -+ -+ /* Print the function prototype. */ -+ printf ("\nrtx\nmaybe_gen_%s (", oname->name); -+ print_overload_arguments (oname); -+ for (int i = 0; i < stats.num_generator_args; ++i) -+ printf (", rtx x%d", i); -+ printf (")\n{\n"); -+ -+ /* Use maybe_code_for_*, instead of duplicating the selection -+ logic here. */ -+ printf (" insn_code code = maybe_code_for_%s (", oname->name); -+ for (unsigned int i = 0; i < oname->arg_types.length (); ++i) -+ printf ("%sarg%d", i == 0 ? "" : ", ", i); -+ printf (");\n" -+ " if (code != CODE_FOR_nothing)\n" -+ " {\n" -+ " gcc_assert (insn_data[code].n_generator_args == %d);\n" -+ " return GEN_FCN (code) (", stats.num_generator_args); -+ for (int i = 0; i < stats.num_generator_args; ++i) -+ printf ("%sx%d", i == 0 ? "" : ", ", i); -+ printf (");\n" -+ " }\n" -+ " else\n" -+ " return NULL_RTX;\n" -+ "}\n"); - } -- -- /* Print the function prototype. */ -- printf ("\nrtx\nmaybe_gen_%s (", oname->name); -- print_overload_arguments (oname); -- for (int i = 0; i < stats.num_generator_args; ++i) -- printf (", rtx x%d", i); -- printf (")\n{\n"); -- -- /* Use maybe_code_for_*, instead of duplicating the selection logic here. */ -- printf (" insn_code code = maybe_code_for_%s (", oname->name); -- for (unsigned int i = 0; i < oname->arg_types.length (); ++i) -- printf ("%sarg%d", i == 0 ? "" : ", ", i); -- printf (");\n" -- " if (code != CODE_FOR_nothing)\n" -- " return GEN_FCN (code) ("); -- for (int i = 0; i < stats.num_generator_args; ++i) -- printf ("%sx%d", i == 0 ? "" : ", ", i); -- printf (");\n" -- " else\n" -- " return NULL_RTX;\n" -- "}\n"); - } - - int -diff --git a/gcc/generic-match-head.c b/gcc/generic-match-head.c -index 3478cf59f..e9ef343c9 100644 ---- a/gcc/generic-match-head.c -+++ b/gcc/generic-match-head.c -@@ -27,6 +27,7 @@ along with GCC; see the file COPYING3. If not see - #include "gimple.h" - #include "ssa.h" - #include "cgraph.h" -+#include "vec-perm-indices.h" - #include "fold-const.h" - #include "fold-const-call.h" - #include "stor-layout.h" -diff --git a/gcc/genmodes.c b/gcc/genmodes.c -index f33eefa24..95522d6b5 100644 ---- a/gcc/genmodes.c -+++ b/gcc/genmodes.c -@@ -53,6 +53,7 @@ struct mode_data - - const char *name; /* printable mode name -- SI, not SImode */ - enum mode_class cl; /* this mode class */ -+ unsigned int order; /* top-level sorting order */ - unsigned int precision; /* size in bits, equiv to TYPE_PRECISION */ - unsigned int bytesize; /* storage size in addressable units */ - unsigned int ncomponents; /* number of subunits */ -@@ -85,7 +86,7 @@ static struct mode_data *void_mode; - - static const struct mode_data blank_mode = { - 0, "", MAX_MODE_CLASS, -- -1U, -1U, -1U, -1U, -+ 0, -1U, -1U, -1U, -1U, - 0, 0, 0, 0, 0, 0, - "", 0, 0, 0, 0, false, false, 0 - }; -@@ -484,14 +485,15 @@ make_complex_modes (enum mode_class cl, - } - } - --/* For all modes in class CL, construct vector modes of width -- WIDTH, having as many components as necessary. */ --#define VECTOR_MODES_WITH_PREFIX(PREFIX, C, W) \ -- make_vector_modes (MODE_##C, #PREFIX, W, __FILE__, __LINE__) --#define VECTOR_MODES(C, W) VECTOR_MODES_WITH_PREFIX (V, C, W) -+/* For all modes in class CL, construct vector modes of width WIDTH, -+ having as many components as necessary. ORDER is the sorting order -+ of the mode, with smaller numbers indicating a higher priority. */ -+#define VECTOR_MODES_WITH_PREFIX(PREFIX, C, W, ORDER) \ -+ make_vector_modes (MODE_##C, #PREFIX, W, ORDER, __FILE__, __LINE__) -+#define VECTOR_MODES(C, W) VECTOR_MODES_WITH_PREFIX (V, C, W, 0) - static void ATTRIBUTE_UNUSED - make_vector_modes (enum mode_class cl, const char *prefix, unsigned int width, -- const char *file, unsigned int line) -+ unsigned int order, const char *file, unsigned int line) - { - struct mode_data *m; - struct mode_data *v; -@@ -530,6 +532,7 @@ make_vector_modes (enum mode_class cl, const char *prefix, unsigned int width, - } - - v = new_mode (vclass, xstrdup (buf), file, line); -+ v->order = order; - v->component = m; - v->ncomponents = ncomponents; - } -@@ -832,6 +835,11 @@ cmp_modes (const void *a, const void *b) - const struct mode_data *const m = *(const struct mode_data *const*)a; - const struct mode_data *const n = *(const struct mode_data *const*)b; - -+ if (m->order > n->order) -+ return 1; -+ else if (m->order < n->order) -+ return -1; -+ - if (m->bytesize > n->bytesize) - return 1; - else if (m->bytesize < n->bytesize) -diff --git a/gcc/genopinit.c b/gcc/genopinit.c -index ea4c3ce01..1dd1d82d0 100644 ---- a/gcc/genopinit.c -+++ b/gcc/genopinit.c -@@ -134,31 +134,43 @@ handle_overloaded_code_for (FILE *file, overloaded_name *oname) - static void - handle_overloaded_gen (FILE *file, overloaded_name *oname) - { -- pattern_stats stats; -- get_pattern_stats (&stats, XVEC (oname->first_instance->insn, 1)); -- -- fprintf (file, "\nextern rtx maybe_gen_%s (", oname->name); -- for (unsigned int i = 0; i < oname->arg_types.length (); ++i) -- fprintf (file, "%s%s", i == 0 ? "" : ", ", oname->arg_types[i]); -- for (int i = 0; i < stats.num_generator_args; ++i) -- fprintf (file, ", rtx"); -- fprintf (file, ");\n"); -- -- fprintf (file, "inline rtx\ngen_%s (", oname->name); -- for (unsigned int i = 0; i < oname->arg_types.length (); ++i) -- fprintf (file, "%s%s arg%d", i == 0 ? "" : ", ", oname->arg_types[i], i); -- for (int i = 0; i < stats.num_generator_args; ++i) -- fprintf (file, ", rtx x%d", i); -- fprintf (file, ")\n{\n rtx res = maybe_gen_%s (", oname->name); -- for (unsigned int i = 0; i < oname->arg_types.length (); ++i) -- fprintf (file, "%sarg%d", i == 0 ? "" : ", ", i); -- for (int i = 0; i < stats.num_generator_args; ++i) -- fprintf (file, ", x%d", i); -- fprintf (file, -- ");\n" -- " gcc_assert (res);\n" -- " return res;\n" -- "}\n"); -+ unsigned HOST_WIDE_INT seen = 0; -+ for (overloaded_instance *instance = oname->first_instance->next; -+ instance; instance = instance->next) -+ { -+ pattern_stats stats; -+ get_pattern_stats (&stats, XVEC (instance->insn, 1)); -+ unsigned HOST_WIDE_INT mask -+ = HOST_WIDE_INT_1U << stats.num_generator_args; -+ if (seen & mask) -+ continue; -+ -+ seen |= mask; -+ -+ fprintf (file, "\nextern rtx maybe_gen_%s (", oname->name); -+ for (unsigned int i = 0; i < oname->arg_types.length (); ++i) -+ fprintf (file, "%s%s", i == 0 ? "" : ", ", oname->arg_types[i]); -+ for (int i = 0; i < stats.num_generator_args; ++i) -+ fprintf (file, ", rtx"); -+ fprintf (file, ");\n"); -+ -+ fprintf (file, "inline rtx\ngen_%s (", oname->name); -+ for (unsigned int i = 0; i < oname->arg_types.length (); ++i) -+ fprintf (file, "%s%s arg%d", i == 0 ? "" : ", ", -+ oname->arg_types[i], i); -+ for (int i = 0; i < stats.num_generator_args; ++i) -+ fprintf (file, ", rtx x%d", i); -+ fprintf (file, ")\n{\n rtx res = maybe_gen_%s (", oname->name); -+ for (unsigned int i = 0; i < oname->arg_types.length (); ++i) -+ fprintf (file, "%sarg%d", i == 0 ? "" : ", ", i); -+ for (int i = 0; i < stats.num_generator_args; ++i) -+ fprintf (file, ", x%d", i); -+ fprintf (file, -+ ");\n" -+ " gcc_assert (res);\n" -+ " return res;\n" -+ "}\n"); -+ } - } - - int -diff --git a/gcc/genrecog.c b/gcc/genrecog.c -index 90e2508fa..ec921702a 100644 ---- a/gcc/genrecog.c -+++ b/gcc/genrecog.c -@@ -718,7 +718,6 @@ validate_pattern (rtx pattern, md_rtx_info *info, rtx set, int set_code) - } - - case CLOBBER: -- case CLOBBER_HIGH: - validate_pattern (SET_DEST (pattern), info, pattern, '='); - return; - -@@ -5295,7 +5294,7 @@ remove_clobbers (acceptance_type *acceptance_ptr, rtx *pattern_ptr) - for (i = XVECLEN (pattern, 0); i > 0; i--) - { - rtx x = XVECEXP (pattern, 0, i - 1); -- if ((GET_CODE (x) != CLOBBER && GET_CODE (x) != CLOBBER_HIGH) -+ if (GET_CODE (x) != CLOBBER - || (!REG_P (XEXP (x, 0)) - && GET_CODE (XEXP (x, 0)) != MATCH_SCRATCH)) - break; -diff --git a/gcc/gensupport.c b/gcc/gensupport.c -index 31a67d5ad..ab6a523dd 100644 ---- a/gcc/gensupport.c -+++ b/gcc/gensupport.c -@@ -70,8 +70,8 @@ struct queue_elem - rtx data; - file_location loc; - struct queue_elem *next; -- /* In a DEFINE_INSN that came from a DEFINE_INSN_AND_SPLIT, SPLIT -- points to the generated DEFINE_SPLIT. */ -+ /* In a DEFINE_INSN that came from a DEFINE_INSN_AND_SPLIT or -+ DEFINE_INSN_AND_REWRITE, SPLIT points to the generated DEFINE_SPLIT. */ - struct queue_elem *split; - }; - -@@ -485,6 +485,65 @@ remove_constraints (rtx part) - } - } - -+/* Recursively replace MATCH_OPERANDs with MATCH_DUPs and MATCH_OPERATORs -+ with MATCH_OP_DUPs in X. */ -+ -+static rtx -+replace_operands_with_dups (rtx x) -+{ -+ if (x == 0) -+ return x; -+ -+ rtx newx; -+ if (GET_CODE (x) == MATCH_OPERAND) -+ { -+ newx = rtx_alloc (MATCH_DUP); -+ XINT (newx, 0) = XINT (x, 0); -+ x = newx; -+ } -+ else if (GET_CODE (x) == MATCH_OPERATOR) -+ { -+ newx = rtx_alloc (MATCH_OP_DUP); -+ XINT (newx, 0) = XINT (x, 0); -+ XVEC (newx, 1) = XVEC (x, 2); -+ x = newx; -+ } -+ else -+ newx = shallow_copy_rtx (x); -+ -+ const char *format_ptr = GET_RTX_FORMAT (GET_CODE (x)); -+ for (int i = 0; i < GET_RTX_LENGTH (GET_CODE (x)); i++) -+ switch (*format_ptr++) -+ { -+ case 'e': -+ case 'u': -+ XEXP (newx, i) = replace_operands_with_dups (XEXP (x, i)); -+ break; -+ case 'E': -+ if (XVEC (x, i) != NULL) -+ { -+ XVEC (newx, i) = rtvec_alloc (XVECLEN (x, i)); -+ for (int j = 0; j < XVECLEN (x, i); j++) -+ XVECEXP (newx, i, j) -+ = replace_operands_with_dups (XVECEXP (x, i, j)); -+ } -+ break; -+ } -+ return newx; -+} -+ -+/* Convert matching pattern VEC from a DEFINE_INSN_AND_REWRITE into -+ a sequence that should be generated by the splitter. */ -+ -+static rtvec -+gen_rewrite_sequence (rtvec vec) -+{ -+ rtvec new_vec = rtvec_alloc (1); -+ rtx x = add_implicit_parallel (vec); -+ RTVEC_ELT (new_vec, 0) = replace_operands_with_dups (x); -+ return new_vec; -+} -+ - /* Process a top level rtx in some way, queuing as appropriate. */ - - static void -@@ -527,6 +586,7 @@ process_rtx (rtx desc, file_location loc) - break; - - case DEFINE_INSN_AND_SPLIT: -+ case DEFINE_INSN_AND_REWRITE: - { - const char *split_cond; - rtx split; -@@ -534,6 +594,7 @@ process_rtx (rtx desc, file_location loc) - int i; - struct queue_elem *insn_elem; - struct queue_elem *split_elem; -+ int split_code = (GET_CODE (desc) == DEFINE_INSN_AND_REWRITE ? 5 : 6); - - /* Create a split with values from the insn_and_split. */ - split = rtx_alloc (DEFINE_SPLIT); -@@ -555,12 +616,17 @@ process_rtx (rtx desc, file_location loc) - split_cond = rtx_reader_ptr->join_c_conditions (XSTR (desc, 2), - split_cond + 2); - } -+ else if (GET_CODE (desc) == DEFINE_INSN_AND_REWRITE) -+ error_at (loc, "the rewrite condition must start with `&&'"); - XSTR (split, 1) = split_cond; -- XVEC (split, 2) = XVEC (desc, 5); -- XSTR (split, 3) = XSTR (desc, 6); -+ if (GET_CODE (desc) == DEFINE_INSN_AND_REWRITE) -+ XVEC (split, 2) = gen_rewrite_sequence (XVEC (desc, 1)); -+ else -+ XVEC (split, 2) = XVEC (desc, 5); -+ XSTR (split, 3) = XSTR (desc, split_code); - - /* Fix up the DEFINE_INSN. */ -- attr = XVEC (desc, 7); -+ attr = XVEC (desc, split_code + 1); - PUT_CODE (desc, DEFINE_INSN); - XVEC (desc, 4) = attr; - -diff --git a/gcc/gimple-expr.c b/gcc/gimple-expr.c -index b0c9f9b67..4ba194ff4 100644 ---- a/gcc/gimple-expr.c -+++ b/gcc/gimple-expr.c -@@ -37,6 +37,7 @@ along with GCC; see the file COPYING3. If not see - #include "tree-pass.h" - #include "stringpool.h" - #include "attribs.h" -+#include "target.h" - - /* ----- Type related ----- */ - -@@ -147,10 +148,12 @@ useless_type_conversion_p (tree outer_type, tree inner_type) - - /* Recurse for vector types with the same number of subparts. */ - else if (TREE_CODE (inner_type) == VECTOR_TYPE -- && TREE_CODE (outer_type) == VECTOR_TYPE -- && TYPE_PRECISION (inner_type) == TYPE_PRECISION (outer_type)) -- return useless_type_conversion_p (TREE_TYPE (outer_type), -- TREE_TYPE (inner_type)); -+ && TREE_CODE (outer_type) == VECTOR_TYPE) -+ return (known_eq (TYPE_VECTOR_SUBPARTS (inner_type), -+ TYPE_VECTOR_SUBPARTS (outer_type)) -+ && useless_type_conversion_p (TREE_TYPE (outer_type), -+ TREE_TYPE (inner_type)) -+ && targetm.compatible_vector_types_p (inner_type, outer_type)); - - else if (TREE_CODE (inner_type) == ARRAY_TYPE - && TREE_CODE (outer_type) == ARRAY_TYPE) -diff --git a/gcc/gimple-fold.c b/gcc/gimple-fold.c -index d33d93242..bbee8eb46 100644 ---- a/gcc/gimple-fold.c -+++ b/gcc/gimple-fold.c -@@ -631,14 +631,7 @@ replace_call_with_call_and_fold (gimple_stmt_iterator *gsi, gimple *repl) - gimple *stmt = gsi_stmt (*gsi); - gimple_call_set_lhs (repl, gimple_call_lhs (stmt)); - gimple_set_location (repl, gimple_location (stmt)); -- if (gimple_vdef (stmt) -- && TREE_CODE (gimple_vdef (stmt)) == SSA_NAME) -- { -- gimple_set_vdef (repl, gimple_vdef (stmt)); -- SSA_NAME_DEF_STMT (gimple_vdef (repl)) = repl; -- } -- if (gimple_vuse (stmt)) -- gimple_set_vuse (repl, gimple_vuse (stmt)); -+ gimple_move_vops (repl, stmt); - gsi_replace (gsi, repl, false); - fold_stmt (gsi); - } -@@ -822,11 +815,7 @@ gimple_fold_builtin_memory_op (gimple_stmt_iterator *gsi, - = gimple_build_assign (fold_build2 (MEM_REF, desttype, - dest, off0), - srcmem); -- gimple_set_vuse (new_stmt, gimple_vuse (stmt)); -- gimple_set_vdef (new_stmt, gimple_vdef (stmt)); -- if (gimple_vdef (new_stmt) -- && TREE_CODE (gimple_vdef (new_stmt)) == SSA_NAME) -- SSA_NAME_DEF_STMT (gimple_vdef (new_stmt)) = new_stmt; -+ gimple_move_vops (new_stmt, stmt); - if (!lhs) - { - gsi_replace (gsi, new_stmt, false); -@@ -1087,11 +1076,7 @@ gimple_fold_builtin_memory_op (gimple_stmt_iterator *gsi, - = gimple_build_assign (fold_build2 (MEM_REF, desttype, dest, off0), - fold_build2 (MEM_REF, srctype, src, off0)); - set_vop_and_replace: -- gimple_set_vuse (new_stmt, gimple_vuse (stmt)); -- gimple_set_vdef (new_stmt, gimple_vdef (stmt)); -- if (gimple_vdef (new_stmt) -- && TREE_CODE (gimple_vdef (new_stmt)) == SSA_NAME) -- SSA_NAME_DEF_STMT (gimple_vdef (new_stmt)) = new_stmt; -+ gimple_move_vops (new_stmt, stmt); - if (!lhs) - { - gsi_replace (gsi, new_stmt, false); -@@ -1264,13 +1249,7 @@ gimple_fold_builtin_memset (gimple_stmt_iterator *gsi, tree c, tree len) - - var = fold_build2 (MEM_REF, etype, dest, build_int_cst (ptr_type_node, 0)); - gimple *store = gimple_build_assign (var, build_int_cst_type (etype, cval)); -- gimple_set_vuse (store, gimple_vuse (stmt)); -- tree vdef = gimple_vdef (stmt); -- if (vdef && TREE_CODE (vdef) == SSA_NAME) -- { -- gimple_set_vdef (store, gimple_vdef (stmt)); -- SSA_NAME_DEF_STMT (gimple_vdef (stmt)) = store; -- } -+ gimple_move_vops (store, stmt); - gsi_insert_before (gsi, store, GSI_SAME_STMT); - if (gimple_call_lhs (stmt)) - { -@@ -2979,11 +2958,7 @@ gimple_fold_builtin_stpcpy (gimple_stmt_iterator *gsi) - tem, build_int_cst (size_type_node, 1)); - gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); - gcall *repl = gimple_build_call (fn, 3, dest, src, lenp1); -- gimple_set_vuse (repl, gimple_vuse (stmt)); -- gimple_set_vdef (repl, gimple_vdef (stmt)); -- if (gimple_vdef (repl) -- && TREE_CODE (gimple_vdef (repl)) == SSA_NAME) -- SSA_NAME_DEF_STMT (gimple_vdef (repl)) = repl; -+ gimple_move_vops (repl, stmt); - gsi_insert_before (gsi, repl, GSI_SAME_STMT); - /* Replace the result with dest + len. */ - stmts = NULL; -@@ -4135,9 +4110,7 @@ fold_builtin_atomic_compare_exchange (gimple_stmt_iterator *gsi) - gimple_call_arg (stmt, 5)); - tree lhs = make_ssa_name (ctype); - gimple_call_set_lhs (g, lhs); -- gimple_set_vdef (g, gimple_vdef (stmt)); -- gimple_set_vuse (g, gimple_vuse (stmt)); -- SSA_NAME_DEF_STMT (gimple_vdef (g)) = g; -+ gimple_move_vops (g, stmt); - tree oldlhs = gimple_call_lhs (stmt); - if (stmt_can_throw_internal (cfun, stmt)) - { -@@ -4316,8 +4289,7 @@ gimple_fold_call (gimple_stmt_iterator *gsi, bool inplace) - SSA_NAME_DEF_STMT (lhs) = gimple_build_nop (); - set_ssa_default_def (cfun, var, lhs); - } -- gimple_set_vuse (new_stmt, gimple_vuse (stmt)); -- gimple_set_vdef (new_stmt, gimple_vdef (stmt)); -+ gimple_move_vops (new_stmt, stmt); - gsi_replace (gsi, new_stmt, false); - return true; - } -diff --git a/gcc/gimple-match-head.c b/gcc/gimple-match-head.c -index bbbc0f2c2..f83f22561 100644 ---- a/gcc/gimple-match-head.c -+++ b/gcc/gimple-match-head.c -@@ -27,6 +27,7 @@ along with GCC; see the file COPYING3. If not see - #include "gimple.h" - #include "ssa.h" - #include "cgraph.h" -+#include "vec-perm-indices.h" - #include "fold-const.h" - #include "fold-const-call.h" - #include "stor-layout.h" -diff --git a/gcc/gimple.c b/gcc/gimple.c -index bf362dbe5..763c8e7e1 100644 ---- a/gcc/gimple.c -+++ b/gcc/gimple.c -@@ -1564,7 +1564,7 @@ gimple_call_nonnull_result_p (gcall *call) - if (!fndecl) - return false; - if (flag_delete_null_pointer_checks && !flag_check_new -- && DECL_IS_OPERATOR_NEW (fndecl) -+ && DECL_IS_OPERATOR_NEW_P (fndecl) - && !TREE_NOTHROW (fndecl)) - return true; - -@@ -2034,6 +2034,18 @@ gimple_copy (gimple *stmt) - return copy; - } - -+/* Move OLD_STMT's vuse and vdef operands to NEW_STMT, on the assumption -+ that OLD_STMT is about to be removed. */ -+ -+void -+gimple_move_vops (gimple *new_stmt, gimple *old_stmt) -+{ -+ tree vdef = gimple_vdef (old_stmt); -+ gimple_set_vuse (new_stmt, gimple_vuse (old_stmt)); -+ gimple_set_vdef (new_stmt, vdef); -+ if (vdef && TREE_CODE (vdef) == SSA_NAME) -+ SSA_NAME_DEF_STMT (vdef) = new_stmt; -+} - - /* Return true if statement S has side-effects. We consider a - statement to have side effects if: -diff --git a/gcc/gimple.h b/gcc/gimple.h -index 8b5c9e219..f91c6db4d 100644 ---- a/gcc/gimple.h -+++ b/gcc/gimple.h -@@ -1509,6 +1509,7 @@ void gimple_assign_set_rhs_with_ops (gimple_stmt_iterator *, enum tree_code, - tree gimple_get_lhs (const gimple *); - void gimple_set_lhs (gimple *, tree); - gimple *gimple_copy (gimple *); -+void gimple_move_vops (gimple *, gimple *); - bool gimple_has_side_effects (const gimple *); - bool gimple_could_trap_p_1 (gimple *, bool, bool); - bool gimple_could_trap_p (gimple *); -diff --git a/gcc/gimplify.c b/gcc/gimplify.c -index bd8bd6d7e..b23680f96 100644 ---- a/gcc/gimplify.c -+++ b/gcc/gimplify.c -@@ -1699,11 +1699,12 @@ gimplify_decl_expr (tree *stmt_p, gimple_seq *seq_p) - tree init = DECL_INITIAL (decl); - bool is_vla = false; - -- if (TREE_CODE (DECL_SIZE_UNIT (decl)) != INTEGER_CST -+ poly_uint64 size; -+ if (!poly_int_tree_p (DECL_SIZE_UNIT (decl), &size) - || (!TREE_STATIC (decl) - && flag_stack_check == GENERIC_STACK_CHECK -- && compare_tree_int (DECL_SIZE_UNIT (decl), -- STACK_CHECK_MAX_VAR_SIZE) > 0)) -+ && maybe_gt (size, -+ (unsigned HOST_WIDE_INT) STACK_CHECK_MAX_VAR_SIZE))) - { - gimplify_vla_decl (decl, seq_p); - is_vla = true; -diff --git a/gcc/haifa-sched.c b/gcc/haifa-sched.c -index 5025aae42..33a77542a 100644 ---- a/gcc/haifa-sched.c -+++ b/gcc/haifa-sched.c -@@ -529,9 +529,6 @@ haifa_classify_rtx (const_rtx x) - /* Test if it is a 'store'. */ - tmp_class = may_trap_exp (XEXP (x, 0), 1); - break; -- case CLOBBER_HIGH: -- gcc_assert (REG_P (XEXP (x, 0))); -- break; - case SET: - /* Test if it is a store. */ - tmp_class = may_trap_exp (SET_DEST (x), 1); -@@ -7207,7 +7204,7 @@ alloc_global_sched_pressure_data (void) - fixed_regs_num[cl] = 0; - - for (int i = 0; i < ira_class_hard_regs_num[cl]; ++i) -- if (!call_used_regs[ira_class_hard_regs[cl][i]]) -+ if (!call_used_or_fixed_reg_p (ira_class_hard_regs[cl][i])) - ++call_saved_regs_num[cl]; - else if (fixed_regs[ira_class_hard_regs[cl][i]]) - ++fixed_regs_num[cl]; -diff --git a/gcc/hard-reg-set.h b/gcc/hard-reg-set.h -index a72819662..51c9e72bb 100644 ---- a/gcc/hard-reg-set.h -+++ b/gcc/hard-reg-set.h -@@ -20,6 +20,8 @@ along with GCC; see the file COPYING3. If not see - #ifndef GCC_HARD_REG_SET_H - #define GCC_HARD_REG_SET_H - -+#include "array-traits.h" -+ - /* Define the type of a set of hard registers. */ - - /* HARD_REG_ELT_TYPE is a typedef of the unsigned integral type which -@@ -42,14 +44,88 @@ typedef unsigned HOST_WIDEST_FAST_INT HARD_REG_ELT_TYPE; - - #if FIRST_PSEUDO_REGISTER <= HOST_BITS_PER_WIDEST_FAST_INT - --#define HARD_REG_SET HARD_REG_ELT_TYPE -+typedef HARD_REG_ELT_TYPE HARD_REG_SET; -+typedef const HARD_REG_SET const_hard_reg_set; - - #else - - #define HARD_REG_SET_LONGS \ - ((FIRST_PSEUDO_REGISTER + HOST_BITS_PER_WIDEST_FAST_INT - 1) \ - / HOST_BITS_PER_WIDEST_FAST_INT) --typedef HARD_REG_ELT_TYPE HARD_REG_SET[HARD_REG_SET_LONGS]; -+ -+struct HARD_REG_SET -+{ -+ HARD_REG_SET -+ operator~ () const -+ { -+ HARD_REG_SET res; -+ for (unsigned int i = 0; i < ARRAY_SIZE (elts); ++i) -+ res.elts[i] = ~elts[i]; -+ return res; -+ } -+ -+ HARD_REG_SET -+ operator& (const HARD_REG_SET &other) const -+ { -+ HARD_REG_SET res; -+ for (unsigned int i = 0; i < ARRAY_SIZE (elts); ++i) -+ res.elts[i] = elts[i] & other.elts[i]; -+ return res; -+ } -+ -+ HARD_REG_SET & -+ operator&= (const HARD_REG_SET &other) -+ { -+ for (unsigned int i = 0; i < ARRAY_SIZE (elts); ++i) -+ elts[i] &= other.elts[i]; -+ return *this; -+ } -+ -+ HARD_REG_SET -+ operator| (const HARD_REG_SET &other) const -+ { -+ HARD_REG_SET res; -+ for (unsigned int i = 0; i < ARRAY_SIZE (elts); ++i) -+ res.elts[i] = elts[i] | other.elts[i]; -+ return res; -+ } -+ -+ HARD_REG_SET & -+ operator|= (const HARD_REG_SET &other) -+ { -+ for (unsigned int i = 0; i < ARRAY_SIZE (elts); ++i) -+ elts[i] |= other.elts[i]; -+ return *this; -+ } -+ -+ bool -+ operator== (const HARD_REG_SET &other) const -+ { -+ HARD_REG_ELT_TYPE bad = 0; -+ for (unsigned int i = 0; i < ARRAY_SIZE (elts); ++i) -+ bad |= (elts[i] ^ other.elts[i]); -+ return bad == 0; -+ } -+ -+ bool -+ operator!= (const HARD_REG_SET &other) const -+ { -+ return !operator== (other); -+ } -+ -+ HARD_REG_ELT_TYPE elts[HARD_REG_SET_LONGS]; -+}; -+typedef const HARD_REG_SET &const_hard_reg_set; -+ -+template<> -+struct array_traits -+{ -+ typedef HARD_REG_ELT_TYPE element_type; -+ static const bool has_constant_size = true; -+ static const size_t constant_size = HARD_REG_SET_LONGS; -+ static const element_type *base (const HARD_REG_SET &x) { return x.elts; } -+ static size_t size (const HARD_REG_SET &) { return HARD_REG_SET_LONGS; } -+}; - - #endif - -@@ -77,28 +153,15 @@ struct hard_reg_set_container - CLEAR_HARD_REG_SET and SET_HARD_REG_SET. - These take just one argument. - -- Also define macros for copying hard reg sets: -- COPY_HARD_REG_SET and COMPL_HARD_REG_SET. -- These take two arguments TO and FROM; they read from FROM -- and store into TO. COMPL_HARD_REG_SET complements each bit. -- -- Also define macros for combining hard reg sets: -- IOR_HARD_REG_SET and AND_HARD_REG_SET. -- These take two arguments TO and FROM; they read from FROM -- and combine bitwise into TO. Define also two variants -- IOR_COMPL_HARD_REG_SET and AND_COMPL_HARD_REG_SET -- which use the complement of the set FROM. -- - Also define: - - hard_reg_set_subset_p (X, Y), which returns true if X is a subset of Y. -- hard_reg_set_equal_p (X, Y), which returns true if X and Y are equal. - hard_reg_set_intersect_p (X, Y), which returns true if X and Y intersect. - hard_reg_set_empty_p (X), which returns true if X is empty. */ - - #define UHOST_BITS_PER_WIDE_INT ((unsigned) HOST_BITS_PER_WIDEST_FAST_INT) - --#ifdef HARD_REG_SET -+#if FIRST_PSEUDO_REGISTER <= HOST_BITS_PER_WIDEST_FAST_INT - - #define SET_HARD_REG_BIT(SET, BIT) \ - ((SET) |= HARD_CONST (1) << (BIT)) -@@ -110,404 +173,87 @@ struct hard_reg_set_container - #define CLEAR_HARD_REG_SET(TO) ((TO) = HARD_CONST (0)) - #define SET_HARD_REG_SET(TO) ((TO) = ~ HARD_CONST (0)) - --#define COPY_HARD_REG_SET(TO, FROM) ((TO) = (FROM)) --#define COMPL_HARD_REG_SET(TO, FROM) ((TO) = ~(FROM)) -- --#define IOR_HARD_REG_SET(TO, FROM) ((TO) |= (FROM)) --#define IOR_COMPL_HARD_REG_SET(TO, FROM) ((TO) |= ~ (FROM)) --#define AND_HARD_REG_SET(TO, FROM) ((TO) &= (FROM)) --#define AND_COMPL_HARD_REG_SET(TO, FROM) ((TO) &= ~ (FROM)) -- - static inline bool --hard_reg_set_subset_p (const HARD_REG_SET x, const HARD_REG_SET y) -+hard_reg_set_subset_p (const_hard_reg_set x, const_hard_reg_set y) - { - return (x & ~y) == HARD_CONST (0); - } - - static inline bool --hard_reg_set_equal_p (const HARD_REG_SET x, const HARD_REG_SET y) --{ -- return x == y; --} -- --static inline bool --hard_reg_set_intersect_p (const HARD_REG_SET x, const HARD_REG_SET y) -+hard_reg_set_intersect_p (const_hard_reg_set x, const_hard_reg_set y) - { - return (x & y) != HARD_CONST (0); - } - - static inline bool --hard_reg_set_empty_p (const HARD_REG_SET x) -+hard_reg_set_empty_p (const_hard_reg_set x) - { - return x == HARD_CONST (0); - } - - #else - --#define SET_HARD_REG_BIT(SET, BIT) \ -- ((SET)[(BIT) / UHOST_BITS_PER_WIDE_INT] \ -- |= HARD_CONST (1) << ((BIT) % UHOST_BITS_PER_WIDE_INT)) -- --#define CLEAR_HARD_REG_BIT(SET, BIT) \ -- ((SET)[(BIT) / UHOST_BITS_PER_WIDE_INT] \ -- &= ~(HARD_CONST (1) << ((BIT) % UHOST_BITS_PER_WIDE_INT))) -- --#define TEST_HARD_REG_BIT(SET, BIT) \ -- (!!((SET)[(BIT) / UHOST_BITS_PER_WIDE_INT] \ -- & (HARD_CONST (1) << ((BIT) % UHOST_BITS_PER_WIDE_INT)))) -- --#if FIRST_PSEUDO_REGISTER <= 2*HOST_BITS_PER_WIDEST_FAST_INT --#define CLEAR_HARD_REG_SET(TO) \ --do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ -- scan_tp_[0] = 0; \ -- scan_tp_[1] = 0; } while (0) -- --#define SET_HARD_REG_SET(TO) \ --do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ -- scan_tp_[0] = -1; \ -- scan_tp_[1] = -1; } while (0) -- --#define COPY_HARD_REG_SET(TO, FROM) \ --do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ -- const HARD_REG_ELT_TYPE *scan_fp_ = (FROM); \ -- scan_tp_[0] = scan_fp_[0]; \ -- scan_tp_[1] = scan_fp_[1]; } while (0) -- --#define COMPL_HARD_REG_SET(TO, FROM) \ --do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ -- const HARD_REG_ELT_TYPE *scan_fp_ = (FROM); \ -- scan_tp_[0] = ~ scan_fp_[0]; \ -- scan_tp_[1] = ~ scan_fp_[1]; } while (0) -- --#define AND_HARD_REG_SET(TO, FROM) \ --do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ -- const HARD_REG_ELT_TYPE *scan_fp_ = (FROM); \ -- scan_tp_[0] &= scan_fp_[0]; \ -- scan_tp_[1] &= scan_fp_[1]; } while (0) -- --#define AND_COMPL_HARD_REG_SET(TO, FROM) \ --do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ -- const HARD_REG_ELT_TYPE *scan_fp_ = (FROM); \ -- scan_tp_[0] &= ~ scan_fp_[0]; \ -- scan_tp_[1] &= ~ scan_fp_[1]; } while (0) -- --#define IOR_HARD_REG_SET(TO, FROM) \ --do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ -- const HARD_REG_ELT_TYPE *scan_fp_ = (FROM); \ -- scan_tp_[0] |= scan_fp_[0]; \ -- scan_tp_[1] |= scan_fp_[1]; } while (0) -- --#define IOR_COMPL_HARD_REG_SET(TO, FROM) \ --do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ -- const HARD_REG_ELT_TYPE *scan_fp_ = (FROM); \ -- scan_tp_[0] |= ~ scan_fp_[0]; \ -- scan_tp_[1] |= ~ scan_fp_[1]; } while (0) -- --static inline bool --hard_reg_set_subset_p (const HARD_REG_SET x, const HARD_REG_SET y) --{ -- return (x[0] & ~y[0]) == 0 && (x[1] & ~y[1]) == 0; --} -- --static inline bool --hard_reg_set_equal_p (const HARD_REG_SET x, const HARD_REG_SET y) -+inline void -+SET_HARD_REG_BIT (HARD_REG_SET &set, unsigned int bit) - { -- return x[0] == y[0] && x[1] == y[1]; -+ set.elts[bit / UHOST_BITS_PER_WIDE_INT] -+ |= HARD_CONST (1) << (bit % UHOST_BITS_PER_WIDE_INT); - } - --static inline bool --hard_reg_set_intersect_p (const HARD_REG_SET x, const HARD_REG_SET y) -+inline void -+CLEAR_HARD_REG_BIT (HARD_REG_SET &set, unsigned int bit) - { -- return (x[0] & y[0]) != 0 || (x[1] & y[1]) != 0; -+ set.elts[bit / UHOST_BITS_PER_WIDE_INT] -+ &= ~(HARD_CONST (1) << (bit % UHOST_BITS_PER_WIDE_INT)); - } - --static inline bool --hard_reg_set_empty_p (const HARD_REG_SET x) -+inline bool -+TEST_HARD_REG_BIT (const_hard_reg_set set, unsigned int bit) - { -- return x[0] == 0 && x[1] == 0; -+ return (set.elts[bit / UHOST_BITS_PER_WIDE_INT] -+ & (HARD_CONST (1) << (bit % UHOST_BITS_PER_WIDE_INT))); - } - --#else --#if FIRST_PSEUDO_REGISTER <= 3*HOST_BITS_PER_WIDEST_FAST_INT --#define CLEAR_HARD_REG_SET(TO) \ --do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ -- scan_tp_[0] = 0; \ -- scan_tp_[1] = 0; \ -- scan_tp_[2] = 0; } while (0) -- --#define SET_HARD_REG_SET(TO) \ --do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ -- scan_tp_[0] = -1; \ -- scan_tp_[1] = -1; \ -- scan_tp_[2] = -1; } while (0) -- --#define COPY_HARD_REG_SET(TO, FROM) \ --do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ -- const HARD_REG_ELT_TYPE *scan_fp_ = (FROM); \ -- scan_tp_[0] = scan_fp_[0]; \ -- scan_tp_[1] = scan_fp_[1]; \ -- scan_tp_[2] = scan_fp_[2]; } while (0) -- --#define COMPL_HARD_REG_SET(TO, FROM) \ --do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ -- const HARD_REG_ELT_TYPE *scan_fp_ = (FROM); \ -- scan_tp_[0] = ~ scan_fp_[0]; \ -- scan_tp_[1] = ~ scan_fp_[1]; \ -- scan_tp_[2] = ~ scan_fp_[2]; } while (0) -- --#define AND_HARD_REG_SET(TO, FROM) \ --do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ -- const HARD_REG_ELT_TYPE *scan_fp_ = (FROM); \ -- scan_tp_[0] &= scan_fp_[0]; \ -- scan_tp_[1] &= scan_fp_[1]; \ -- scan_tp_[2] &= scan_fp_[2]; } while (0) -- --#define AND_COMPL_HARD_REG_SET(TO, FROM) \ --do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ -- const HARD_REG_ELT_TYPE *scan_fp_ = (FROM); \ -- scan_tp_[0] &= ~ scan_fp_[0]; \ -- scan_tp_[1] &= ~ scan_fp_[1]; \ -- scan_tp_[2] &= ~ scan_fp_[2]; } while (0) -- --#define IOR_HARD_REG_SET(TO, FROM) \ --do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ -- const HARD_REG_ELT_TYPE *scan_fp_ = (FROM); \ -- scan_tp_[0] |= scan_fp_[0]; \ -- scan_tp_[1] |= scan_fp_[1]; \ -- scan_tp_[2] |= scan_fp_[2]; } while (0) -- --#define IOR_COMPL_HARD_REG_SET(TO, FROM) \ --do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ -- const HARD_REG_ELT_TYPE *scan_fp_ = (FROM); \ -- scan_tp_[0] |= ~ scan_fp_[0]; \ -- scan_tp_[1] |= ~ scan_fp_[1]; \ -- scan_tp_[2] |= ~ scan_fp_[2]; } while (0) -- --static inline bool --hard_reg_set_subset_p (const HARD_REG_SET x, const HARD_REG_SET y) -+inline void -+CLEAR_HARD_REG_SET (HARD_REG_SET &set) - { -- return ((x[0] & ~y[0]) == 0 -- && (x[1] & ~y[1]) == 0 -- && (x[2] & ~y[2]) == 0); -+ for (unsigned int i = 0; i < ARRAY_SIZE (set.elts); ++i) -+ set.elts[i] = 0; - } - --static inline bool --hard_reg_set_equal_p (const HARD_REG_SET x, const HARD_REG_SET y) -+inline void -+SET_HARD_REG_SET (HARD_REG_SET &set) - { -- return x[0] == y[0] && x[1] == y[1] && x[2] == y[2]; -+ for (unsigned int i = 0; i < ARRAY_SIZE (set.elts); ++i) -+ set.elts[i] = -1; - } - - static inline bool --hard_reg_set_intersect_p (const HARD_REG_SET x, const HARD_REG_SET y) --{ -- return ((x[0] & y[0]) != 0 -- || (x[1] & y[1]) != 0 -- || (x[2] & y[2]) != 0); --} -- --static inline bool --hard_reg_set_empty_p (const HARD_REG_SET x) --{ -- return x[0] == 0 && x[1] == 0 && x[2] == 0; --} -- --#else --#if FIRST_PSEUDO_REGISTER <= 4*HOST_BITS_PER_WIDEST_FAST_INT --#define CLEAR_HARD_REG_SET(TO) \ --do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ -- scan_tp_[0] = 0; \ -- scan_tp_[1] = 0; \ -- scan_tp_[2] = 0; \ -- scan_tp_[3] = 0; } while (0) -- --#define SET_HARD_REG_SET(TO) \ --do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ -- scan_tp_[0] = -1; \ -- scan_tp_[1] = -1; \ -- scan_tp_[2] = -1; \ -- scan_tp_[3] = -1; } while (0) -- --#define COPY_HARD_REG_SET(TO, FROM) \ --do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ -- const HARD_REG_ELT_TYPE *scan_fp_ = (FROM); \ -- scan_tp_[0] = scan_fp_[0]; \ -- scan_tp_[1] = scan_fp_[1]; \ -- scan_tp_[2] = scan_fp_[2]; \ -- scan_tp_[3] = scan_fp_[3]; } while (0) -- --#define COMPL_HARD_REG_SET(TO, FROM) \ --do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ -- const HARD_REG_ELT_TYPE *scan_fp_ = (FROM); \ -- scan_tp_[0] = ~ scan_fp_[0]; \ -- scan_tp_[1] = ~ scan_fp_[1]; \ -- scan_tp_[2] = ~ scan_fp_[2]; \ -- scan_tp_[3] = ~ scan_fp_[3]; } while (0) -- --#define AND_HARD_REG_SET(TO, FROM) \ --do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ -- const HARD_REG_ELT_TYPE *scan_fp_ = (FROM); \ -- scan_tp_[0] &= scan_fp_[0]; \ -- scan_tp_[1] &= scan_fp_[1]; \ -- scan_tp_[2] &= scan_fp_[2]; \ -- scan_tp_[3] &= scan_fp_[3]; } while (0) -- --#define AND_COMPL_HARD_REG_SET(TO, FROM) \ --do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ -- const HARD_REG_ELT_TYPE *scan_fp_ = (FROM); \ -- scan_tp_[0] &= ~ scan_fp_[0]; \ -- scan_tp_[1] &= ~ scan_fp_[1]; \ -- scan_tp_[2] &= ~ scan_fp_[2]; \ -- scan_tp_[3] &= ~ scan_fp_[3]; } while (0) -- --#define IOR_HARD_REG_SET(TO, FROM) \ --do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ -- const HARD_REG_ELT_TYPE *scan_fp_ = (FROM); \ -- scan_tp_[0] |= scan_fp_[0]; \ -- scan_tp_[1] |= scan_fp_[1]; \ -- scan_tp_[2] |= scan_fp_[2]; \ -- scan_tp_[3] |= scan_fp_[3]; } while (0) -- --#define IOR_COMPL_HARD_REG_SET(TO, FROM) \ --do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ -- const HARD_REG_ELT_TYPE *scan_fp_ = (FROM); \ -- scan_tp_[0] |= ~ scan_fp_[0]; \ -- scan_tp_[1] |= ~ scan_fp_[1]; \ -- scan_tp_[2] |= ~ scan_fp_[2]; \ -- scan_tp_[3] |= ~ scan_fp_[3]; } while (0) -- --static inline bool --hard_reg_set_subset_p (const HARD_REG_SET x, const HARD_REG_SET y) -+hard_reg_set_subset_p (const_hard_reg_set x, const_hard_reg_set y) - { -- return ((x[0] & ~y[0]) == 0 -- && (x[1] & ~y[1]) == 0 -- && (x[2] & ~y[2]) == 0 -- && (x[3] & ~y[3]) == 0); -+ HARD_REG_ELT_TYPE bad = 0; -+ for (unsigned int i = 0; i < ARRAY_SIZE (x.elts); ++i) -+ bad |= (x.elts[i] & ~y.elts[i]); -+ return bad == 0; - } - - static inline bool --hard_reg_set_equal_p (const HARD_REG_SET x, const HARD_REG_SET y) -+hard_reg_set_intersect_p (const_hard_reg_set x, const_hard_reg_set y) - { -- return x[0] == y[0] && x[1] == y[1] && x[2] == y[2] && x[3] == y[3]; -+ HARD_REG_ELT_TYPE good = 0; -+ for (unsigned int i = 0; i < ARRAY_SIZE (x.elts); ++i) -+ good |= (x.elts[i] & y.elts[i]); -+ return good != 0; - } - - static inline bool --hard_reg_set_intersect_p (const HARD_REG_SET x, const HARD_REG_SET y) -+hard_reg_set_empty_p (const_hard_reg_set x) - { -- return ((x[0] & y[0]) != 0 -- || (x[1] & y[1]) != 0 -- || (x[2] & y[2]) != 0 -- || (x[3] & y[3]) != 0); -+ HARD_REG_ELT_TYPE bad = 0; -+ for (unsigned int i = 0; i < ARRAY_SIZE (x.elts); ++i) -+ bad |= x.elts[i]; -+ return bad == 0; - } -- --static inline bool --hard_reg_set_empty_p (const HARD_REG_SET x) --{ -- return x[0] == 0 && x[1] == 0 && x[2] == 0 && x[3] == 0; --} -- --#else /* FIRST_PSEUDO_REGISTER > 4*HOST_BITS_PER_WIDEST_FAST_INT */ -- --#define CLEAR_HARD_REG_SET(TO) \ --do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ -- int i; \ -- for (i = 0; i < HARD_REG_SET_LONGS; i++) \ -- *scan_tp_++ = 0; } while (0) -- --#define SET_HARD_REG_SET(TO) \ --do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ -- int i; \ -- for (i = 0; i < HARD_REG_SET_LONGS; i++) \ -- *scan_tp_++ = -1; } while (0) -- --#define COPY_HARD_REG_SET(TO, FROM) \ --do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ -- const HARD_REG_ELT_TYPE *scan_fp_ = (FROM); \ -- int i; \ -- for (i = 0; i < HARD_REG_SET_LONGS; i++) \ -- *scan_tp_++ = *scan_fp_++; } while (0) -- --#define COMPL_HARD_REG_SET(TO, FROM) \ --do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ -- const HARD_REG_ELT_TYPE *scan_fp_ = (FROM); \ -- int i; \ -- for (i = 0; i < HARD_REG_SET_LONGS; i++) \ -- *scan_tp_++ = ~ *scan_fp_++; } while (0) -- --#define AND_HARD_REG_SET(TO, FROM) \ --do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ -- const HARD_REG_ELT_TYPE *scan_fp_ = (FROM); \ -- int i; \ -- for (i = 0; i < HARD_REG_SET_LONGS; i++) \ -- *scan_tp_++ &= *scan_fp_++; } while (0) -- --#define AND_COMPL_HARD_REG_SET(TO, FROM) \ --do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ -- const HARD_REG_ELT_TYPE *scan_fp_ = (FROM); \ -- int i; \ -- for (i = 0; i < HARD_REG_SET_LONGS; i++) \ -- *scan_tp_++ &= ~ *scan_fp_++; } while (0) -- --#define IOR_HARD_REG_SET(TO, FROM) \ --do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ -- const HARD_REG_ELT_TYPE *scan_fp_ = (FROM); \ -- int i; \ -- for (i = 0; i < HARD_REG_SET_LONGS; i++) \ -- *scan_tp_++ |= *scan_fp_++; } while (0) -- --#define IOR_COMPL_HARD_REG_SET(TO, FROM) \ --do { HARD_REG_ELT_TYPE *scan_tp_ = (TO); \ -- const HARD_REG_ELT_TYPE *scan_fp_ = (FROM); \ -- int i; \ -- for (i = 0; i < HARD_REG_SET_LONGS; i++) \ -- *scan_tp_++ |= ~ *scan_fp_++; } while (0) -- --static inline bool --hard_reg_set_subset_p (const HARD_REG_SET x, const HARD_REG_SET y) --{ -- int i; -- -- for (i = 0; i < HARD_REG_SET_LONGS; i++) -- if ((x[i] & ~y[i]) != 0) -- return false; -- return true; --} -- --static inline bool --hard_reg_set_equal_p (const HARD_REG_SET x, const HARD_REG_SET y) --{ -- int i; -- -- for (i = 0; i < HARD_REG_SET_LONGS; i++) -- if (x[i] != y[i]) -- return false; -- return true; --} -- --static inline bool --hard_reg_set_intersect_p (const HARD_REG_SET x, const HARD_REG_SET y) --{ -- int i; -- -- for (i = 0; i < HARD_REG_SET_LONGS; i++) -- if ((x[i] & y[i]) != 0) -- return true; -- return false; --} -- --static inline bool --hard_reg_set_empty_p (const HARD_REG_SET x) --{ -- int i; -- -- for (i = 0; i < HARD_REG_SET_LONGS; i++) -- if (x[i] != 0) -- return false; -- return true; --} -- --#endif --#endif --#endif - #endif - - /* Iterator for hard register sets. */ -@@ -515,7 +261,7 @@ hard_reg_set_empty_p (const HARD_REG_SET x) - struct hard_reg_set_iterator - { - /* Pointer to the current element. */ -- HARD_REG_ELT_TYPE *pelt; -+ const HARD_REG_ELT_TYPE *pelt; - - /* The length of the set. */ - unsigned short length; -@@ -534,11 +280,11 @@ struct hard_reg_set_iterator - /* The implementation of the iterator functions is fully analogous to - the bitmap iterators. */ - static inline void --hard_reg_set_iter_init (hard_reg_set_iterator *iter, HARD_REG_SET set, -+hard_reg_set_iter_init (hard_reg_set_iterator *iter, const_hard_reg_set set, - unsigned min, unsigned *regno) - { - #ifdef HARD_REG_SET_LONGS -- iter->pelt = set; -+ iter->pelt = set.elts; - iter->length = HARD_REG_SET_LONGS; - #else - iter->pelt = &set; -@@ -649,16 +395,15 @@ struct target_hard_regs { - a pseudo reg whose life crosses calls. */ - char x_call_used_regs[FIRST_PSEUDO_REGISTER]; - -- char x_call_really_used_regs[FIRST_PSEUDO_REGISTER]; -- -- /* The same info as a HARD_REG_SET. */ -- HARD_REG_SET x_call_used_reg_set; -+ /* For targets that use reload rather than LRA, this is the set -+ of registers that we are able to save and restore around calls -+ (i.e. those for which we know a suitable mode and set of -+ load/store instructions exist). For LRA targets it contains -+ all registers. - -- /* Contains registers that are fixed use -- i.e. in fixed_reg_set -- or -- a function value return register or TARGET_STRUCT_VALUE_RTX or -- STATIC_CHAIN_REGNUM. These are the registers that cannot hold quantities -- across calls even if we are willing to save and restore them. */ -- HARD_REG_SET x_call_fixed_reg_set; -+ This is legacy information and should be removed if all targets -+ switch to LRA. */ -+ HARD_REG_SET x_savable_regs; - - /* Contains registers that are fixed use -- i.e. in fixed_reg_set -- but - only if they are not merely part of that set because they are global -@@ -674,10 +419,6 @@ struct target_hard_regs { - with the local stack frame are safe, but scant others. */ - HARD_REG_SET x_regs_invalidated_by_call; - -- /* Call used hard registers which cannot be saved because there is no -- insn for this. */ -- HARD_REG_SET x_no_caller_save_reg_set; -- - /* Table of register numbers in the order in which to try to use them. */ - int x_reg_alloc_order[FIRST_PSEUDO_REGISTER]; - -@@ -730,18 +471,16 @@ extern struct target_hard_regs *this_target_hard_regs; - (this_target_hard_regs->x_fixed_reg_set) - #define fixed_nonglobal_reg_set \ - (this_target_hard_regs->x_fixed_nonglobal_reg_set) -+#ifdef IN_TARGET_CODE - #define call_used_regs \ - (this_target_hard_regs->x_call_used_regs) --#define call_really_used_regs \ -- (this_target_hard_regs->x_call_really_used_regs) --#define call_used_reg_set \ -- (this_target_hard_regs->x_call_used_reg_set) --#define call_fixed_reg_set \ -- (this_target_hard_regs->x_call_fixed_reg_set) -+#endif -+#define savable_regs \ -+ (this_target_hard_regs->x_savable_regs) - #define regs_invalidated_by_call \ - (this_target_hard_regs->x_regs_invalidated_by_call) --#define no_caller_save_reg_set \ -- (this_target_hard_regs->x_no_caller_save_reg_set) -+#define call_used_or_fixed_regs \ -+ (regs_invalidated_by_call | fixed_reg_set) - #define reg_alloc_order \ - (this_target_hard_regs->x_reg_alloc_order) - #define inv_reg_alloc_order \ -@@ -770,4 +509,13 @@ extern const char * reg_class_names[]; - #define REG_CAN_CHANGE_MODE_P(REGN, FROM, TO) \ - (targetm.can_change_mode_class (FROM, TO, REGNO_REG_CLASS (REGN))) - -+/* Return true if register REGNO is either fixed or call-used -+ (aka call-clobbered). */ -+ -+inline bool -+call_used_or_fixed_reg_p (unsigned int regno) -+{ -+ return fixed_regs[regno] || this_target_hard_regs->x_call_used_regs[regno]; -+} -+ - #endif /* ! GCC_HARD_REG_SET_H */ -diff --git a/gcc/hooks.c b/gcc/hooks.c -index f95659b38..98038860e 100644 ---- a/gcc/hooks.c -+++ b/gcc/hooks.c -@@ -140,9 +140,8 @@ hook_bool_puint64_puint64_true (poly_uint64, poly_uint64) - return true; - } - --/* Generic hook that takes (unsigned int, machine_mode) and returns false. */ - bool --hook_bool_insn_uint_mode_false (rtx_insn *, unsigned int, machine_mode) -+hook_bool_uint_uint_mode_false (unsigned int, unsigned int, machine_mode) - { - return false; - } -@@ -313,6 +312,12 @@ hook_bool_const_tree_false (const_tree) - return false; - } - -+bool -+hook_bool_const_tree_const_tree_true (const_tree, const_tree) -+{ -+ return true; -+} -+ - bool - hook_bool_tree_true (tree) - { -diff --git a/gcc/hooks.h b/gcc/hooks.h -index 0bc8117c2..b398d13ce 100644 ---- a/gcc/hooks.h -+++ b/gcc/hooks.h -@@ -40,11 +40,12 @@ extern bool hook_bool_const_rtx_insn_const_rtx_insn_true (const rtx_insn *, - extern bool hook_bool_mode_uhwi_false (machine_mode, - unsigned HOST_WIDE_INT); - extern bool hook_bool_puint64_puint64_true (poly_uint64, poly_uint64); --extern bool hook_bool_insn_uint_mode_false (rtx_insn *, unsigned int, -+extern bool hook_bool_uint_uint_mode_false (unsigned int, unsigned int, - machine_mode); - extern bool hook_bool_uint_mode_true (unsigned int, machine_mode); - extern bool hook_bool_tree_false (tree); - extern bool hook_bool_const_tree_false (const_tree); -+extern bool hook_bool_const_tree_const_tree_true (const_tree, const_tree); - extern bool hook_bool_tree_true (tree); - extern bool hook_bool_const_tree_true (const_tree); - extern bool hook_bool_gsiptr_false (gimple_stmt_iterator *); -diff --git a/gcc/hw-doloop.c b/gcc/hw-doloop.c -index 2decece62..3ee0b4098 100644 ---- a/gcc/hw-doloop.c -+++ b/gcc/hw-doloop.c -@@ -141,7 +141,7 @@ scan_loop (hwloop_info loop) - CLEAR_HARD_REG_BIT (set_this_insn, REGNO (loop->iter_reg)); - else if (reg_mentioned_p (loop->iter_reg, PATTERN (insn))) - loop->iter_reg_used = true; -- IOR_HARD_REG_SET (loop->regs_set_in_loop, set_this_insn); -+ loop->regs_set_in_loop |= set_this_insn; - } - } - } -@@ -581,7 +581,7 @@ optimize_loop (hwloop_info loop, struct hw_doloop_hooks *hooks) - inner_depth = inner->depth; - /* The set of registers may be changed while optimizing the inner - loop. */ -- IOR_HARD_REG_SET (loop->regs_set_in_loop, inner->regs_set_in_loop); -+ loop->regs_set_in_loop |= inner->regs_set_in_loop; - } - - loop->depth = inner_depth + 1; -diff --git a/gcc/int-vector-builder.h b/gcc/int-vector-builder.h -index adf0904c5..dc9651021 100644 ---- a/gcc/int-vector-builder.h -+++ b/gcc/int-vector-builder.h -@@ -26,10 +26,11 @@ along with GCC; see the file COPYING3. If not see - encoding as tree and rtx constants. See vector_builder for more - details. */ - template --class int_vector_builder : public vector_builder > -+class int_vector_builder : public vector_builder > - { -- typedef vector_builder parent; -- friend class vector_builder; -+ typedef vector_builder parent; -+ friend class vector_builder; - - public: - int_vector_builder () {} -@@ -45,6 +46,8 @@ private: - T apply_step (T, unsigned int, T) const; - bool can_elide_p (T) const { return true; } - void note_representative (T *, T) {} -+ -+ static poly_uint64 shape_nelts (poly_uint64 x) { return x; } - }; - - /* Create a new builder for a vector with FULL_NELTS elements. -diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c -index 21ecd5667..9753a12f3 100644 ---- a/gcc/internal-fn.c -+++ b/gcc/internal-fn.c -@@ -117,6 +117,7 @@ init_internal_fns () - #define while_direct { 0, 2, false } - #define fold_extract_direct { 2, 2, false } - #define fold_left_direct { 1, 1, false } -+#define mask_fold_left_direct { 1, 1, false } - - const direct_internal_fn_info direct_internal_fn_array[IFN_LAST + 1] = { - #define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) not_direct, -@@ -3005,6 +3006,9 @@ expand_while_optab_fn (internal_fn, gcall *stmt, convert_optab optab) - #define expand_fold_left_optab_fn(FN, STMT, OPTAB) \ - expand_direct_optab_fn (FN, STMT, OPTAB, 2) - -+#define expand_mask_fold_left_optab_fn(FN, STMT, OPTAB) \ -+ expand_direct_optab_fn (FN, STMT, OPTAB, 3) -+ - /* RETURN_TYPE and ARGS are a return type and argument list that are - in principle compatible with FN (which satisfies direct_internal_fn_p). - Return the types that should be used to determine whether the -@@ -3093,6 +3097,7 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types, - #define direct_while_optab_supported_p convert_optab_supported_p - #define direct_fold_extract_optab_supported_p direct_optab_supported_p - #define direct_fold_left_optab_supported_p direct_optab_supported_p -+#define direct_mask_fold_left_optab_supported_p direct_optab_supported_p - - /* Return the optab used by internal function FN. */ - -@@ -3210,6 +3215,8 @@ first_commutative_argument (internal_fn fn) - case IFN_FNMS: - case IFN_AVG_FLOOR: - case IFN_AVG_CEIL: -+ case IFN_MULHS: -+ case IFN_MULHRS: - case IFN_FMIN: - case IFN_FMAX: - return 0; -@@ -3286,7 +3293,9 @@ static void (*const internal_fn_expanders[]) (internal_fn, gcall *) = { - T (MAX_EXPR, IFN_COND_MAX) \ - T (BIT_AND_EXPR, IFN_COND_AND) \ - T (BIT_IOR_EXPR, IFN_COND_IOR) \ -- T (BIT_XOR_EXPR, IFN_COND_XOR) -+ T (BIT_XOR_EXPR, IFN_COND_XOR) \ -+ T (LSHIFT_EXPR, IFN_COND_SHL) \ -+ T (RSHIFT_EXPR, IFN_COND_SHR) - - /* Return a function that only performs CODE when a certain condition is met - and that uses a given fallback value otherwise. For example, if CODE is -diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def -index e370eaa84..ae32fc7bd 100644 ---- a/gcc/internal-fn.def -+++ b/gcc/internal-fn.def -@@ -140,6 +140,8 @@ DEF_INTERNAL_OPTAB_FN (WHILE_ULT, ECF_CONST | ECF_NOTHROW, while_ult, while) - DEF_INTERNAL_OPTAB_FN (VEC_SHL_INSERT, ECF_CONST | ECF_NOTHROW, - vec_shl_insert, binary) - -+DEF_INTERNAL_OPTAB_FN (DIV_POW2, ECF_CONST | ECF_NOTHROW, sdiv_pow2, binary) -+ - DEF_INTERNAL_OPTAB_FN (FMS, ECF_CONST, fms, ternary) - DEF_INTERNAL_OPTAB_FN (FNMA, ECF_CONST, fnma, ternary) - DEF_INTERNAL_OPTAB_FN (FNMS, ECF_CONST, fnms, ternary) -@@ -149,6 +151,11 @@ DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_FLOOR, ECF_CONST | ECF_NOTHROW, first, - DEF_INTERNAL_SIGNED_OPTAB_FN (AVG_CEIL, ECF_CONST | ECF_NOTHROW, first, - savg_ceil, uavg_ceil, binary) - -+DEF_INTERNAL_SIGNED_OPTAB_FN (MULHS, ECF_CONST | ECF_NOTHROW, first, -+ smulhs, umulhs, binary) -+DEF_INTERNAL_SIGNED_OPTAB_FN (MULHRS, ECF_CONST | ECF_NOTHROW, first, -+ smulhrs, umulhrs, binary) -+ - DEF_INTERNAL_OPTAB_FN (COND_ADD, ECF_CONST, cond_add, cond_binary) - DEF_INTERNAL_OPTAB_FN (COND_SUB, ECF_CONST, cond_sub, cond_binary) - DEF_INTERNAL_OPTAB_FN (COND_MUL, ECF_CONST, cond_smul, cond_binary) -@@ -167,6 +174,10 @@ DEF_INTERNAL_OPTAB_FN (COND_IOR, ECF_CONST | ECF_NOTHROW, - cond_ior, cond_binary) - DEF_INTERNAL_OPTAB_FN (COND_XOR, ECF_CONST | ECF_NOTHROW, - cond_xor, cond_binary) -+DEF_INTERNAL_OPTAB_FN (COND_SHL, ECF_CONST | ECF_NOTHROW, -+ cond_ashl, cond_binary) -+DEF_INTERNAL_SIGNED_OPTAB_FN (COND_SHR, ECF_CONST | ECF_NOTHROW, first, -+ cond_ashr, cond_lshr, cond_binary) - - DEF_INTERNAL_OPTAB_FN (COND_FMA, ECF_CONST, cond_fma, cond_ternary) - DEF_INTERNAL_OPTAB_FN (COND_FMS, ECF_CONST, cond_fms, cond_ternary) -@@ -199,6 +210,9 @@ DEF_INTERNAL_OPTAB_FN (FOLD_EXTRACT_LAST, ECF_CONST | ECF_NOTHROW, - DEF_INTERNAL_OPTAB_FN (FOLD_LEFT_PLUS, ECF_CONST | ECF_NOTHROW, - fold_left_plus, fold_left) - -+DEF_INTERNAL_OPTAB_FN (MASK_FOLD_LEFT_PLUS, ECF_CONST | ECF_NOTHROW, -+ mask_fold_left_plus, mask_fold_left) -+ - /* Unary math functions. */ - DEF_INTERNAL_FLT_FN (ACOS, ECF_CONST, acos, unary) - DEF_INTERNAL_FLT_FN (ACOSH, ECF_CONST, acosh, unary) -@@ -217,6 +231,7 @@ DEF_INTERNAL_FLT_FN (LOG10, ECF_CONST, log10, unary) - DEF_INTERNAL_FLT_FN (LOG1P, ECF_CONST, log1p, unary) - DEF_INTERNAL_FLT_FN (LOG2, ECF_CONST, log2, unary) - DEF_INTERNAL_FLT_FN (LOGB, ECF_CONST, logb, unary) -+DEF_INTERNAL_FLT_FN (SIGNBIT, ECF_CONST, signbit, unary) - DEF_INTERNAL_FLT_FN (SIGNIFICAND, ECF_CONST, significand, unary) - DEF_INTERNAL_FLT_FN (SIN, ECF_CONST, sin, unary) - DEF_INTERNAL_FLT_FN (SINH, ECF_CONST, sinh, unary) -diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c -index 8988a4e49..b9e2ef450 100644 ---- a/gcc/ipa-cp.c -+++ b/gcc/ipa-cp.c -@@ -2862,8 +2862,7 @@ ipa_get_indirect_edge_target_1 (struct cgraph_edge *ie, - if (can_refer) - { - if (!target -- || (TREE_CODE (TREE_TYPE (target)) == FUNCTION_TYPE -- && DECL_FUNCTION_CODE (target) == BUILT_IN_UNREACHABLE) -+ || fndecl_built_in_p (target, BUILT_IN_UNREACHABLE) - || !possible_polymorphic_call_target_p - (ie, cgraph_node::get (target))) - { -diff --git a/gcc/ipa-devirt.c b/gcc/ipa-devirt.c -index 2d8a0b383..df1ea21b4 100644 ---- a/gcc/ipa-devirt.c -+++ b/gcc/ipa-devirt.c -@@ -3576,12 +3576,10 @@ possible_polymorphic_call_target_p (tree otr_type, - { - vec targets; - unsigned int i; -- enum built_in_function fcode; - bool final; - -- if (TREE_CODE (TREE_TYPE (n->decl)) == FUNCTION_TYPE -- && ((fcode = DECL_FUNCTION_CODE (n->decl)) == BUILT_IN_UNREACHABLE -- || fcode == BUILT_IN_TRAP)) -+ if (fndecl_built_in_p (n->decl, BUILT_IN_UNREACHABLE) -+ || fndecl_built_in_p (n->decl, BUILT_IN_TRAP)) - return true; - - if (is_cxa_pure_virtual_p (n->decl)) -diff --git a/gcc/ipa-icf.c b/gcc/ipa-icf.c -index 568c6a452..8b6961486 100644 ---- a/gcc/ipa-icf.c -+++ b/gcc/ipa-icf.c -@@ -351,8 +351,8 @@ sem_item::compare_referenced_symbol_properties (symtab_node *used_by, - return return_false_with_msg ("inline attributes are different"); - } - -- if (DECL_IS_OPERATOR_NEW (n1->decl) -- != DECL_IS_OPERATOR_NEW (n2->decl)) -+ if (DECL_IS_OPERATOR_NEW_P (n1->decl) -+ != DECL_IS_OPERATOR_NEW_P (n2->decl)) - return return_false_with_msg ("operator new flags are different"); - } - -@@ -416,7 +416,7 @@ sem_item::hash_referenced_symbol_properties (symtab_node *ref, - hstate.add_flag (DECL_DISREGARD_INLINE_LIMITS (ref->decl)); - hstate.add_flag (DECL_DECLARED_INLINE_P (ref->decl)); - } -- hstate.add_flag (DECL_IS_OPERATOR_NEW (ref->decl)); -+ hstate.add_flag (DECL_IS_OPERATOR_NEW_P (ref->decl)); - } - else if (is_a (ref)) - { -diff --git a/gcc/ipa-inline.c b/gcc/ipa-inline.c -index a2fb20320..7c627eff8 100644 ---- a/gcc/ipa-inline.c -+++ b/gcc/ipa-inline.c -@@ -390,6 +390,28 @@ can_inline_edge_p (struct cgraph_edge *e, bool report, - return inlinable; - } - -+/* Return inlining_insns_single limit for function N */ -+ -+static int -+inline_insns_single (cgraph_node *n) -+{ -+ if (opt_for_fn (n->decl, optimize >= 3)) -+ return PARAM_VALUE (PARAM_MAX_INLINE_INSNS_SINGLE); -+ else -+ return PARAM_VALUE (PARAM_MAX_INLINE_INSNS_SINGLE_O2); -+} -+ -+/* Return inlining_insns_auto limit for function N */ -+ -+static int -+inline_insns_auto (cgraph_node *n) -+{ -+ if (opt_for_fn (n->decl, optimize >= 3)) -+ return PARAM_VALUE (PARAM_MAX_INLINE_INSNS_AUTO); -+ else -+ return PARAM_VALUE (PARAM_MAX_INLINE_INSNS_AUTO_O2); -+} -+ - /* Decide if we can inline the edge and possibly update - inline_failed reason. - We check whether inlining is possible at all and whether -@@ -532,8 +554,8 @@ can_inline_edge_by_limits_p (struct cgraph_edge *e, bool report, - int growth = estimate_edge_growth (e); - if (growth > PARAM_VALUE (PARAM_MAX_INLINE_INSNS_SIZE) - && (!DECL_DECLARED_INLINE_P (callee->decl) -- && growth >= MAX (MAX_INLINE_INSNS_SINGLE, -- MAX_INLINE_INSNS_AUTO))) -+ && growth >= MAX (inline_insns_single (caller), -+ inline_insns_auto (caller)))) - { - e->inline_failed = CIF_OPTIMIZATION_MISMATCH; - inlinable = false; -@@ -641,6 +663,10 @@ want_early_inline_function_p (struct cgraph_edge *e) - { - int growth = estimate_edge_growth (e); - int n; -+ int early_inlining_insns = opt_for_fn (e->caller->decl, optimize) >= 3 -+ ? PARAM_VALUE (PARAM_EARLY_INLINING_INSNS) -+ : PARAM_VALUE (PARAM_EARLY_INLINING_INSNS_O2); -+ - - if (growth <= PARAM_VALUE (PARAM_MAX_INLINE_INSNS_SIZE)) - ; -@@ -654,26 +680,28 @@ want_early_inline_function_p (struct cgraph_edge *e) - growth); - want_inline = false; - } -- else if (growth > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS)) -+ else if (growth > early_inlining_insns) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt, - " will not early inline: %C->%C, " -- "growth %i exceeds --param early-inlining-insns\n", -- e->caller, callee, -- growth); -+ "growth %i exceeds --param early-inlining-insns%s\n", -+ e->caller, callee, growth, -+ opt_for_fn (e->caller->decl, optimize) >= 3 -+ ? "" : "-O2"); - want_inline = false; - } - else if ((n = num_calls (callee)) != 0 -- && growth * (n + 1) > PARAM_VALUE (PARAM_EARLY_INLINING_INSNS)) -+ && growth * (n + 1) > early_inlining_insns) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, e->call_stmt, - " will not early inline: %C->%C, " -- "growth %i exceeds --param early-inlining-insns " -+ "growth %i exceeds --param early-inlining-insns%s " - "divided by number of calls\n", -- e->caller, callee, -- growth); -+ e->caller, callee, growth, -+ opt_for_fn (e->caller->decl, optimize) >= 3 -+ ? "" : "-O2"); - want_inline = false; - } - } -@@ -739,9 +767,14 @@ big_speedup_p (struct cgraph_edge *e) - sreal spec_time = estimate_edge_time (e, &unspec_time); - sreal time = compute_uninlined_call_time (e, unspec_time); - sreal inlined_time = compute_inlined_call_time (e, spec_time); -+ cgraph_node *caller = (e->caller->inlined_to -+ ? e->caller->inlined_to -+ : e->caller); -+ int limit = opt_for_fn (caller->decl, optimize) >= 3 -+ ? PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP) -+ : PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP_O2); - -- if ((time - inlined_time) * 100 -- > (sreal) (time * PARAM_VALUE (PARAM_INLINE_MIN_SPEEDUP))) -+ if ((time - inlined_time) * 100 > time * limit) - return true; - return false; - } -@@ -775,20 +808,29 @@ want_inline_small_function_p (struct cgraph_edge *e, bool report) - && (!e->count.ipa ().initialized_p () || !e->maybe_hot_p ())) - && ipa_fn_summaries->get (callee)->min_size - - ipa_call_summaries->get (e)->call_stmt_size -- > MAX (MAX_INLINE_INSNS_SINGLE, MAX_INLINE_INSNS_AUTO)) -+ > MAX (inline_insns_single (e->caller), -+ inline_insns_auto (e->caller))) - { -- e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT; -+ if (opt_for_fn (e->caller->decl, optimize) >= 3) -+ e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT; -+ else -+ e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_O2_LIMIT; - want_inline = false; - } - else if ((DECL_DECLARED_INLINE_P (callee->decl) - || e->count.ipa ().nonzero_p ()) - && ipa_fn_summaries->get (callee)->min_size - - ipa_call_summaries->get (e)->call_stmt_size -- > 16 * MAX_INLINE_INSNS_SINGLE) -+ > 16 * inline_insns_single (e->caller)) - { -- e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl) -- ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT -- : CIF_MAX_INLINE_INSNS_AUTO_LIMIT); -+ if (opt_for_fn (e->caller->decl, optimize) >= 3) -+ e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl) -+ ? CIF_MAX_INLINE_INSNS_SINGLE_LIMIT -+ : CIF_MAX_INLINE_INSNS_AUTO_LIMIT); -+ else -+ e->inline_failed = (DECL_DECLARED_INLINE_P (callee->decl) -+ ? CIF_MAX_INLINE_INSNS_SINGLE_O2_LIMIT -+ : CIF_MAX_INLINE_INSNS_AUTO_O2_LIMIT); - want_inline = false; - } - else -@@ -802,15 +844,18 @@ want_inline_small_function_p (struct cgraph_edge *e, bool report) - /* Apply MAX_INLINE_INSNS_SINGLE limit. Do not do so when - hints suggests that inlining given function is very profitable. */ - else if (DECL_DECLARED_INLINE_P (callee->decl) -- && growth >= MAX_INLINE_INSNS_SINGLE -- && (growth >= MAX_INLINE_INSNS_SINGLE * 16 -+ && growth >= inline_insns_single (e->caller) -+ && (growth >= inline_insns_single (e->caller) * 16 - || (!(hints & (INLINE_HINT_indirect_call - | INLINE_HINT_known_hot - | INLINE_HINT_loop_iterations - | INLINE_HINT_loop_stride)) - && !(big_speedup = big_speedup_p (e))))) - { -- e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT; -+ if (opt_for_fn (e->caller->decl, optimize) >= 3) -+ e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_LIMIT; -+ else -+ e->inline_failed = CIF_MAX_INLINE_INSNS_SINGLE_O2_LIMIT; - want_inline = false; - } - else if (!DECL_DECLARED_INLINE_P (callee->decl) -@@ -818,7 +863,7 @@ want_inline_small_function_p (struct cgraph_edge *e, bool report) - && growth >= PARAM_VALUE (PARAM_MAX_INLINE_INSNS_SMALL)) - { - /* growth_likely_positive is expensive, always test it last. */ -- if (growth >= MAX_INLINE_INSNS_SINGLE -+ if (growth >= inline_insns_single (e->caller) - || growth_likely_positive (callee, growth)) - { - e->inline_failed = CIF_NOT_DECLARED_INLINED; -@@ -833,22 +878,25 @@ want_inline_small_function_p (struct cgraph_edge *e, bool report) - && growth >= ((hints & (INLINE_HINT_indirect_call - | INLINE_HINT_loop_iterations - | INLINE_HINT_loop_stride)) -- ? MAX (MAX_INLINE_INSNS_AUTO, -- MAX_INLINE_INSNS_SINGLE) -- : MAX_INLINE_INSNS_AUTO) -+ ? MAX (inline_insns_auto (e->caller), -+ inline_insns_single (e->caller)) -+ : inline_insns_auto (e->caller)) - && !(big_speedup == -1 ? big_speedup_p (e) : big_speedup)) - { - /* growth_likely_positive is expensive, always test it last. */ -- if (growth >= MAX_INLINE_INSNS_SINGLE -+ if (growth >= inline_insns_single (e->caller) - || growth_likely_positive (callee, growth)) - { -- e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT; -+ if (opt_for_fn (e->caller->decl, optimize) >= 3) -+ e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_LIMIT; -+ else -+ e->inline_failed = CIF_MAX_INLINE_INSNS_AUTO_O2_LIMIT; - want_inline = false; - } - } - /* If call is cold, do not inline when function body would grow. */ - else if (!e->maybe_hot_p () -- && (growth >= MAX_INLINE_INSNS_SINGLE -+ && (growth >= inline_insns_single (e->caller) - || growth_likely_positive (callee, growth))) - { - e->inline_failed = CIF_UNLIKELY_CALL; -@@ -1157,7 +1205,7 @@ edge_badness (struct cgraph_edge *edge, bool dump) - && caller_info->inlinable - && ipa_size_summaries->get (caller)->size - < (DECL_DECLARED_INLINE_P (caller->decl) -- ? MAX_INLINE_INSNS_SINGLE : MAX_INLINE_INSNS_AUTO)) -+ ? inline_insns_single (caller) : inline_insns_auto (caller))) - { - if (dump) - fprintf (dump_file, -diff --git a/gcc/ipa-param-manipulation.c b/gcc/ipa-param-manipulation.c -index 037253a87..1af6d050c 100644 ---- a/gcc/ipa-param-manipulation.c -+++ b/gcc/ipa-param-manipulation.c -@@ -219,10 +219,7 @@ ipa_modify_formal_parameters (tree fndecl, ipa_parm_adjustment_vec adjustments) - - /* When signature changes, we need to clear builtin info. */ - if (fndecl_built_in_p (fndecl)) -- { -- DECL_BUILT_IN_CLASS (fndecl) = NOT_BUILT_IN; -- DECL_FUNCTION_CODE (fndecl) = (enum built_in_function) 0; -- } -+ set_decl_built_in_function (fndecl, NOT_BUILT_IN, 0); - - TREE_TYPE (fndecl) = new_type; - DECL_VIRTUAL_P (fndecl) = 0; -@@ -452,14 +449,7 @@ ipa_modify_call_arguments (struct cgraph_edge *cs, gcall *stmt, - gimple_call_set_chain (new_stmt, gimple_call_chain (stmt)); - gimple_call_copy_flags (new_stmt, stmt); - if (gimple_in_ssa_p (cfun)) -- { -- gimple_set_vuse (new_stmt, gimple_vuse (stmt)); -- if (gimple_vdef (stmt)) -- { -- gimple_set_vdef (new_stmt, gimple_vdef (stmt)); -- SSA_NAME_DEF_STMT (gimple_vdef (new_stmt)) = new_stmt; -- } -- } -+ gimple_move_vops (new_stmt, stmt); - - if (dump_file && (dump_flags & TDF_DETAILS)) - { -diff --git a/gcc/ipa-prop.c b/gcc/ipa-prop.c -index 0439ce0c5..a70319505 100644 ---- a/gcc/ipa-prop.c -+++ b/gcc/ipa-prop.c -@@ -3685,8 +3685,7 @@ try_make_edge_direct_virtual_call (struct cgraph_edge *ie, - if (can_refer) - { - if (!t -- || (TREE_CODE (TREE_TYPE (t)) == FUNCTION_TYPE -- && DECL_FUNCTION_CODE (t) == BUILT_IN_UNREACHABLE) -+ || fndecl_built_in_p (t, BUILT_IN_UNREACHABLE) - || !possible_polymorphic_call_target_p - (ie, cgraph_node::get (t))) - { -diff --git a/gcc/ipa-split.c b/gcc/ipa-split.c -index 5eaf8257f..aef2fa53c 100644 ---- a/gcc/ipa-split.c -+++ b/gcc/ipa-split.c -@@ -1348,10 +1348,7 @@ split_function (basic_block return_bb, struct split_point *split_point, - changes. For partial inlining we however cannot expect the part - of builtin implementation to have same semantic as the whole. */ - if (fndecl_built_in_p (node->decl)) -- { -- DECL_BUILT_IN_CLASS (node->decl) = NOT_BUILT_IN; -- DECL_FUNCTION_CODE (node->decl) = (enum built_in_function) 0; -- } -+ set_decl_built_in_function (node->decl, NOT_BUILT_IN, 0); - - /* If return_bb contains any clobbers that refer to SSA_NAMEs - set in the split part, remove them. Also reset debug stmts that -diff --git a/gcc/ira-build.c b/gcc/ira-build.c -index 83caa3a8e..55c552679 100644 ---- a/gcc/ira-build.c -+++ b/gcc/ira-build.c -@@ -456,12 +456,10 @@ ira_create_object (ira_allocno_t a, int subword) - OBJECT_CONFLICT_VEC_P (obj) = false; - OBJECT_CONFLICT_ARRAY (obj) = NULL; - OBJECT_NUM_CONFLICTS (obj) = 0; -- COPY_HARD_REG_SET (OBJECT_CONFLICT_HARD_REGS (obj), ira_no_alloc_regs); -- COPY_HARD_REG_SET (OBJECT_TOTAL_CONFLICT_HARD_REGS (obj), ira_no_alloc_regs); -- IOR_COMPL_HARD_REG_SET (OBJECT_CONFLICT_HARD_REGS (obj), -- reg_class_contents[aclass]); -- IOR_COMPL_HARD_REG_SET (OBJECT_TOTAL_CONFLICT_HARD_REGS (obj), -- reg_class_contents[aclass]); -+ OBJECT_CONFLICT_HARD_REGS (obj) = ira_no_alloc_regs; -+ OBJECT_TOTAL_CONFLICT_HARD_REGS (obj) = ira_no_alloc_regs; -+ OBJECT_CONFLICT_HARD_REGS (obj) |= ~reg_class_contents[aclass]; -+ OBJECT_TOTAL_CONFLICT_HARD_REGS (obj) |= ~reg_class_contents[aclass]; - OBJECT_MIN (obj) = INT_MAX; - OBJECT_MAX (obj) = -1; - OBJECT_LIVE_RANGES (obj) = NULL; -@@ -549,10 +547,8 @@ ira_set_allocno_class (ira_allocno_t a, enum reg_class aclass) - ALLOCNO_CLASS (a) = aclass; - FOR_EACH_ALLOCNO_OBJECT (a, obj, oi) - { -- IOR_COMPL_HARD_REG_SET (OBJECT_CONFLICT_HARD_REGS (obj), -- reg_class_contents[aclass]); -- IOR_COMPL_HARD_REG_SET (OBJECT_TOTAL_CONFLICT_HARD_REGS (obj), -- reg_class_contents[aclass]); -+ OBJECT_CONFLICT_HARD_REGS (obj) |= ~reg_class_contents[aclass]; -+ OBJECT_TOTAL_CONFLICT_HARD_REGS (obj) |= ~reg_class_contents[aclass]; - } - } - -@@ -602,10 +598,10 @@ merge_hard_reg_conflicts (ira_allocno_t from, ira_allocno_t to, - ira_object_t to_obj = ALLOCNO_OBJECT (to, i); - - if (!total_only) -- IOR_HARD_REG_SET (OBJECT_CONFLICT_HARD_REGS (to_obj), -- OBJECT_CONFLICT_HARD_REGS (from_obj)); -- IOR_HARD_REG_SET (OBJECT_TOTAL_CONFLICT_HARD_REGS (to_obj), -- OBJECT_TOTAL_CONFLICT_HARD_REGS (from_obj)); -+ OBJECT_CONFLICT_HARD_REGS (to_obj) -+ |= OBJECT_CONFLICT_HARD_REGS (from_obj); -+ OBJECT_TOTAL_CONFLICT_HARD_REGS (to_obj) -+ |= OBJECT_TOTAL_CONFLICT_HARD_REGS (from_obj); - } - #ifdef STACK_REGS - if (!total_only && ALLOCNO_NO_STACK_REG_P (from)) -@@ -618,15 +614,15 @@ merge_hard_reg_conflicts (ira_allocno_t from, ira_allocno_t to, - /* Update hard register conflict information for all objects associated with - A to include the regs in SET. */ - void --ior_hard_reg_conflicts (ira_allocno_t a, HARD_REG_SET *set) -+ior_hard_reg_conflicts (ira_allocno_t a, const_hard_reg_set set) - { - ira_allocno_object_iterator i; - ira_object_t obj; - - FOR_EACH_ALLOCNO_OBJECT (a, obj, i) - { -- IOR_HARD_REG_SET (OBJECT_CONFLICT_HARD_REGS (obj), *set); -- IOR_HARD_REG_SET (OBJECT_TOTAL_CONFLICT_HARD_REGS (obj), *set); -+ OBJECT_CONFLICT_HARD_REGS (obj) |= set; -+ OBJECT_TOTAL_CONFLICT_HARD_REGS (obj) |= set; - } - } - -@@ -907,8 +903,9 @@ create_cap_allocno (ira_allocno_t a) - - ALLOCNO_CALLS_CROSSED_NUM (cap) = ALLOCNO_CALLS_CROSSED_NUM (a); - ALLOCNO_CHEAP_CALLS_CROSSED_NUM (cap) = ALLOCNO_CHEAP_CALLS_CROSSED_NUM (a); -- IOR_HARD_REG_SET (ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (cap), -- ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (a)); -+ ALLOCNO_CROSSED_CALLS_ABIS (cap) = ALLOCNO_CROSSED_CALLS_ABIS (a); -+ ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (cap) -+ = ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (a); - if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL) - { - fprintf (ira_dump_file, " Creating cap "); -@@ -1876,11 +1873,6 @@ create_insn_allocnos (rtx x, rtx outer, bool output_p) - create_insn_allocnos (XEXP (x, 0), NULL, true); - return; - } -- else if (code == CLOBBER_HIGH) -- { -- gcc_assert (REG_P (XEXP (x, 0)) && HARD_REGISTER_P (XEXP (x, 0))); -- return; -- } - else if (code == MEM) - { - create_insn_allocnos (XEXP (x, 0), NULL, false); -@@ -2036,8 +2028,10 @@ propagate_allocno_info (void) - += ALLOCNO_CALLS_CROSSED_NUM (a); - ALLOCNO_CHEAP_CALLS_CROSSED_NUM (parent_a) - += ALLOCNO_CHEAP_CALLS_CROSSED_NUM (a); -- IOR_HARD_REG_SET (ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (parent_a), -- ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (a)); -+ ALLOCNO_CROSSED_CALLS_ABIS (parent_a) -+ |= ALLOCNO_CROSSED_CALLS_ABIS (a); -+ ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (parent_a) -+ |= ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (a); - ALLOCNO_EXCESS_PRESSURE_POINTS_NUM (parent_a) - += ALLOCNO_EXCESS_PRESSURE_POINTS_NUM (a); - aclass = ALLOCNO_CLASS (a); -@@ -2419,8 +2413,9 @@ propagate_some_info_from_allocno (ira_allocno_t a, ira_allocno_t from_a) - ALLOCNO_CALLS_CROSSED_NUM (a) += ALLOCNO_CALLS_CROSSED_NUM (from_a); - ALLOCNO_CHEAP_CALLS_CROSSED_NUM (a) - += ALLOCNO_CHEAP_CALLS_CROSSED_NUM (from_a); -- IOR_HARD_REG_SET (ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (a), -- ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (from_a)); -+ ALLOCNO_CROSSED_CALLS_ABIS (a) |= ALLOCNO_CROSSED_CALLS_ABIS (from_a); -+ ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (a) -+ |= ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (from_a); - - ALLOCNO_EXCESS_PRESSURE_POINTS_NUM (a) - += ALLOCNO_EXCESS_PRESSURE_POINTS_NUM (from_a); -@@ -2569,8 +2564,8 @@ remove_low_level_allocnos (void) - ALLOCNO_NEXT_REGNO_ALLOCNO (a) = NULL; - ALLOCNO_CAP_MEMBER (a) = NULL; - FOR_EACH_ALLOCNO_OBJECT (a, obj, oi) -- COPY_HARD_REG_SET (OBJECT_CONFLICT_HARD_REGS (obj), -- OBJECT_TOTAL_CONFLICT_HARD_REGS (obj)); -+ OBJECT_CONFLICT_HARD_REGS (obj) -+ = OBJECT_TOTAL_CONFLICT_HARD_REGS (obj); - #ifdef STACK_REGS - if (ALLOCNO_TOTAL_NO_STACK_REG_P (a)) - ALLOCNO_NO_STACK_REG_P (a) = true; -@@ -3060,8 +3055,10 @@ copy_info_to_removed_store_destinations (int regno) - += ALLOCNO_CALLS_CROSSED_NUM (a); - ALLOCNO_CHEAP_CALLS_CROSSED_NUM (parent_a) - += ALLOCNO_CHEAP_CALLS_CROSSED_NUM (a); -- IOR_HARD_REG_SET (ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (parent_a), -- ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (a)); -+ ALLOCNO_CROSSED_CALLS_ABIS (parent_a) -+ |= ALLOCNO_CROSSED_CALLS_ABIS (a); -+ ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (parent_a) -+ |= ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (a); - ALLOCNO_EXCESS_PRESSURE_POINTS_NUM (parent_a) - += ALLOCNO_EXCESS_PRESSURE_POINTS_NUM (a); - merged_p = true; -@@ -3108,8 +3105,8 @@ ira_flattening (int max_regno_before_emit, int ira_max_point_before_emit) - flattening. */ - continue; - FOR_EACH_ALLOCNO_OBJECT (a, obj, oi) -- COPY_HARD_REG_SET (OBJECT_TOTAL_CONFLICT_HARD_REGS (obj), -- OBJECT_CONFLICT_HARD_REGS (obj)); -+ OBJECT_TOTAL_CONFLICT_HARD_REGS (obj) -+ = OBJECT_CONFLICT_HARD_REGS (obj); - #ifdef STACK_REGS - ALLOCNO_TOTAL_NO_STACK_REG_P (a) = ALLOCNO_NO_STACK_REG_P (a); - #endif -@@ -3159,6 +3156,9 @@ ira_flattening (int max_regno_before_emit, int ira_max_point_before_emit) - -= ALLOCNO_CALLS_CROSSED_NUM (a); - ALLOCNO_CHEAP_CALLS_CROSSED_NUM (parent_a) - -= ALLOCNO_CHEAP_CALLS_CROSSED_NUM (a); -+ /* Assume that ALLOCNO_CROSSED_CALLS_ABIS and -+ ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS stay the same. -+ We'd need to rebuild the IR to do better. */ - ALLOCNO_EXCESS_PRESSURE_POINTS_NUM (parent_a) - -= ALLOCNO_EXCESS_PRESSURE_POINTS_NUM (a); - ira_assert (ALLOCNO_CALLS_CROSSED_NUM (parent_a) >= 0 -@@ -3466,7 +3466,7 @@ ira_build (void) - allocno crossing calls. */ - FOR_EACH_ALLOCNO (a, ai) - if (ALLOCNO_CALLS_CROSSED_NUM (a) != 0) -- ior_hard_reg_conflicts (a, &call_used_reg_set); -+ ior_hard_reg_conflicts (a, ira_need_caller_save_regs (a)); - } - if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL) - print_copies (ira_dump_file); -diff --git a/gcc/ira-color.c b/gcc/ira-color.c -index 8a90ae1b4..62499be91 100644 ---- a/gcc/ira-color.c -+++ b/gcc/ira-color.c -@@ -218,7 +218,7 @@ inline bool - allocno_hard_regs_hasher::equal (const allocno_hard_regs *hv1, - const allocno_hard_regs *hv2) - { -- return hard_reg_set_equal_p (hv1->set, hv2->set); -+ return hv1->set == hv2->set; - } - - /* Hash table of unique allocno hard registers. */ -@@ -261,14 +261,14 @@ add_allocno_hard_regs (HARD_REG_SET set, int64_t cost) - allocno_hard_regs_t hv; - - gcc_assert (! hard_reg_set_empty_p (set)); -- COPY_HARD_REG_SET (temp.set, set); -+ temp.set = set; - if ((hv = find_hard_regs (&temp)) != NULL) - hv->cost += cost; - else - { - hv = ((struct allocno_hard_regs *) - ira_allocate (sizeof (struct allocno_hard_regs))); -- COPY_HARD_REG_SET (hv->set, set); -+ hv->set = set; - hv->cost = cost; - allocno_hard_regs_vec.safe_push (hv); - insert_hard_regs (hv); -@@ -371,7 +371,7 @@ add_allocno_hard_regs_to_forest (allocno_hard_regs_node_t *roots, - start = hard_regs_node_vec.length (); - for (node = *roots; node != NULL; node = node->next) - { -- if (hard_reg_set_equal_p (hv->set, node->hard_regs->set)) -+ if (hv->set == node->hard_regs->set) - return; - if (hard_reg_set_subset_p (hv->set, node->hard_regs->set)) - { -@@ -382,8 +382,7 @@ add_allocno_hard_regs_to_forest (allocno_hard_regs_node_t *roots, - hard_regs_node_vec.safe_push (node); - else if (hard_reg_set_intersect_p (hv->set, node->hard_regs->set)) - { -- COPY_HARD_REG_SET (temp_set, hv->set); -- AND_HARD_REG_SET (temp_set, node->hard_regs->set); -+ temp_set = hv->set & node->hard_regs->set; - hv2 = add_allocno_hard_regs (temp_set, hv->cost); - add_allocno_hard_regs_to_forest (&node->first, hv2); - } -@@ -398,7 +397,7 @@ add_allocno_hard_regs_to_forest (allocno_hard_regs_node_t *roots, - i++) - { - node = hard_regs_node_vec[i]; -- IOR_HARD_REG_SET (temp_set, node->hard_regs->set); -+ temp_set |= node->hard_regs->set; - } - hv = add_allocno_hard_regs (temp_set, hv->cost); - new_node = create_new_allocno_hard_regs_node (hv); -@@ -717,8 +716,7 @@ form_allocno_hard_regs_nodes_forest (void) - (allocno_data->profitable_hard_regs, - ALLOCNO_MEMORY_COST (a) - ALLOCNO_CLASS_COST (a))); - } -- SET_HARD_REG_SET (temp); -- AND_COMPL_HARD_REG_SET (temp, ira_no_alloc_regs); -+ temp = ~ira_no_alloc_regs; - add_allocno_hard_regs (temp, 0); - qsort (allocno_hard_regs_vec.address () + start, - allocno_hard_regs_vec.length () - start, -@@ -833,10 +831,10 @@ setup_left_conflict_sizes_p (ira_allocno_t a) - nobj = ALLOCNO_NUM_OBJECTS (a); - data = ALLOCNO_COLOR_DATA (a); - subnodes = allocno_hard_regs_subnodes + data->hard_regs_subnodes_start; -- COPY_HARD_REG_SET (profitable_hard_regs, data->profitable_hard_regs); -+ profitable_hard_regs = data->profitable_hard_regs; - node = data->hard_regs_node; - node_preorder_num = node->preorder_num; -- COPY_HARD_REG_SET (node_set, node->hard_regs->set); -+ node_set = node->hard_regs->set; - node_check_tick++; - for (k = 0; k < nobj; k++) - { -@@ -859,7 +857,7 @@ setup_left_conflict_sizes_p (ira_allocno_t a) - ->profitable_hard_regs)) - continue; - conflict_node = conflict_data->hard_regs_node; -- COPY_HARD_REG_SET (conflict_node_set, conflict_node->hard_regs->set); -+ conflict_node_set = conflict_node->hard_regs->set; - if (hard_reg_set_subset_p (node_set, conflict_node_set)) - temp_node = node; - else -@@ -897,8 +895,7 @@ setup_left_conflict_sizes_p (ira_allocno_t a) - int j, n, hard_regno; - enum reg_class aclass; - -- COPY_HARD_REG_SET (temp_set, temp_node->hard_regs->set); -- AND_HARD_REG_SET (temp_set, profitable_hard_regs); -+ temp_set = temp_node->hard_regs->set & profitable_hard_regs; - aclass = ALLOCNO_CLASS (a); - for (n = 0, j = ira_class_hard_regs_num[aclass] - 1; j >= 0; j--) - { -@@ -1042,15 +1039,15 @@ setup_profitable_hard_regs (void) - else - { - mode = ALLOCNO_MODE (a); -- COPY_HARD_REG_SET (data->profitable_hard_regs, -- ira_useful_class_mode_regs[aclass][mode]); -+ data->profitable_hard_regs -+ = ira_useful_class_mode_regs[aclass][mode]; - nobj = ALLOCNO_NUM_OBJECTS (a); - for (k = 0; k < nobj; k++) - { - ira_object_t obj = ALLOCNO_OBJECT (a, k); - -- AND_COMPL_HARD_REG_SET (data->profitable_hard_regs, -- OBJECT_TOTAL_CONFLICT_HARD_REGS (obj)); -+ data->profitable_hard_regs -+ &= ~OBJECT_TOTAL_CONFLICT_HARD_REGS (obj); - } - } - } -@@ -1091,9 +1088,8 @@ setup_profitable_hard_regs (void) - hard_regno + num); - } - else -- AND_COMPL_HARD_REG_SET -- (ALLOCNO_COLOR_DATA (conflict_a)->profitable_hard_regs, -- ira_reg_mode_hard_regset[hard_regno][mode]); -+ ALLOCNO_COLOR_DATA (conflict_a)->profitable_hard_regs -+ &= ~ira_reg_mode_hard_regset[hard_regno][mode]; - } - } - } -@@ -1589,20 +1585,15 @@ get_conflict_and_start_profitable_regs (ira_allocno_t a, bool retry_p, - for (i = 0; i < nwords; i++) - { - obj = ALLOCNO_OBJECT (a, i); -- COPY_HARD_REG_SET (conflict_regs[i], -- OBJECT_TOTAL_CONFLICT_HARD_REGS (obj)); -+ conflict_regs[i] = OBJECT_TOTAL_CONFLICT_HARD_REGS (obj); - } - if (retry_p) -- { -- COPY_HARD_REG_SET (*start_profitable_regs, -- reg_class_contents[ALLOCNO_CLASS (a)]); -- AND_COMPL_HARD_REG_SET (*start_profitable_regs, -- ira_prohibited_class_mode_regs -- [ALLOCNO_CLASS (a)][ALLOCNO_MODE (a)]); -- } -+ *start_profitable_regs -+ = (reg_class_contents[ALLOCNO_CLASS (a)] -+ &~ (ira_prohibited_class_mode_regs -+ [ALLOCNO_CLASS (a)][ALLOCNO_MODE (a)])); - else -- COPY_HARD_REG_SET (*start_profitable_regs, -- ALLOCNO_COLOR_DATA (a)->profitable_hard_regs); -+ *start_profitable_regs = ALLOCNO_COLOR_DATA (a)->profitable_hard_regs; - } - - /* Return true if HARD_REGNO is ok for assigning to allocno A with -@@ -1659,7 +1650,7 @@ calculate_saved_nregs (int hard_regno, machine_mode mode) - ira_assert (hard_regno >= 0); - for (i = hard_regno_nregs (hard_regno, mode) - 1; i >= 0; i--) - if (!allocated_hardreg_p[hard_regno + i] -- && !TEST_HARD_REG_BIT (call_used_reg_set, hard_regno + i) -+ && !crtl->abi->clobbers_full_reg_p (hard_regno + i) - && !LOCAL_REGNO (hard_regno + i)) - nregs++; - return nregs; -@@ -1803,9 +1794,8 @@ assign_hard_reg (ira_allocno_t a, bool retry_p) - hard_regno + num); - } - else -- IOR_HARD_REG_SET -- (conflicting_regs[word], -- ira_reg_mode_hard_regset[hard_regno][mode]); -+ conflicting_regs[word] -+ |= ira_reg_mode_hard_regset[hard_regno][mode]; - if (hard_reg_set_subset_p (profitable_hard_regs, - conflicting_regs[word])) - goto fail; -@@ -2698,8 +2688,7 @@ setup_allocno_available_regs_num (ira_allocno_t a) - reg_class_names[aclass], ira_class_hard_regs_num[aclass], n); - print_hard_reg_set (ira_dump_file, data->profitable_hard_regs, false); - fprintf (ira_dump_file, ", %snode: ", -- hard_reg_set_equal_p (data->profitable_hard_regs, -- data->hard_regs_node->hard_regs->set) -+ data->profitable_hard_regs == data->hard_regs_node->hard_regs->set - ? "" : "^"); - print_hard_reg_set (ira_dump_file, - data->hard_regs_node->hard_regs->set, false); -@@ -4387,11 +4376,10 @@ allocno_reload_assign (ira_allocno_t a, HARD_REG_SET forbidden_regs) - for (i = 0; i < n; i++) - { - ira_object_t obj = ALLOCNO_OBJECT (a, i); -- COPY_HARD_REG_SET (saved[i], OBJECT_TOTAL_CONFLICT_HARD_REGS (obj)); -- IOR_HARD_REG_SET (OBJECT_TOTAL_CONFLICT_HARD_REGS (obj), forbidden_regs); -+ saved[i] = OBJECT_TOTAL_CONFLICT_HARD_REGS (obj); -+ OBJECT_TOTAL_CONFLICT_HARD_REGS (obj) |= forbidden_regs; - if (! flag_caller_saves && ALLOCNO_CALLS_CROSSED_NUM (a) != 0) -- IOR_HARD_REG_SET (OBJECT_TOTAL_CONFLICT_HARD_REGS (obj), -- call_used_reg_set); -+ OBJECT_TOTAL_CONFLICT_HARD_REGS (obj) |= ira_need_caller_save_regs (a); - } - ALLOCNO_ASSIGNED_P (a) = false; - aclass = ALLOCNO_CLASS (a); -@@ -4410,9 +4398,7 @@ allocno_reload_assign (ira_allocno_t a, HARD_REG_SET forbidden_regs) - ? ALLOCNO_CLASS_COST (a) - : ALLOCNO_HARD_REG_COSTS (a)[ira_class_hard_reg_index - [aclass][hard_regno]])); -- if (ALLOCNO_CALLS_CROSSED_NUM (a) != 0 -- && ira_hard_reg_set_intersection_p (hard_regno, ALLOCNO_MODE (a), -- call_used_reg_set)) -+ if (ira_need_caller_save_p (a, regno)) - { - ira_assert (flag_caller_saves); - caller_save_needed = 1; -@@ -4434,7 +4420,7 @@ allocno_reload_assign (ira_allocno_t a, HARD_REG_SET forbidden_regs) - for (i = 0; i < n; i++) - { - ira_object_t obj = ALLOCNO_OBJECT (a, i); -- COPY_HARD_REG_SET (OBJECT_TOTAL_CONFLICT_HARD_REGS (obj), saved[i]); -+ OBJECT_TOTAL_CONFLICT_HARD_REGS (obj) = saved[i]; - } - return reg_renumber[regno] >= 0; - } -@@ -4519,9 +4505,9 @@ ira_reassign_pseudos (int *spilled_pseudo_regs, int num, - for (i = 0; i < num; i++) - { - regno = spilled_pseudo_regs[i]; -- COPY_HARD_REG_SET (forbidden_regs, bad_spill_regs); -- IOR_HARD_REG_SET (forbidden_regs, pseudo_forbidden_regs[regno]); -- IOR_HARD_REG_SET (forbidden_regs, pseudo_previous_regs[regno]); -+ forbidden_regs = (bad_spill_regs -+ | pseudo_forbidden_regs[regno] -+ | pseudo_previous_regs[regno]); - gcc_assert (reg_renumber[regno] < 0); - a = ira_regno_allocno_map[regno]; - ira_mark_allocation_change (regno); -@@ -4699,16 +4685,16 @@ ira_mark_new_stack_slot (rtx x, int regno, poly_uint64 total_size) - given IN and OUT for INSN. Return also number points (through - EXCESS_PRESSURE_LIVE_LENGTH) where the pseudo-register lives and - the register pressure is high, number of references of the -- pseudo-registers (through NREFS), number of callee-clobbered -- hard-registers occupied by the pseudo-registers (through -- CALL_USED_COUNT), and the first hard regno occupied by the -+ pseudo-registers (through NREFS), the number of psuedo registers -+ whose allocated register wouldn't need saving in the prologue -+ (through CALL_USED_COUNT), and the first hard regno occupied by the - pseudo-registers (through FIRST_HARD_REGNO). */ - static int - calculate_spill_cost (int *regnos, rtx in, rtx out, rtx_insn *insn, - int *excess_pressure_live_length, - int *nrefs, int *call_used_count, int *first_hard_regno) - { -- int i, cost, regno, hard_regno, j, count, saved_cost, nregs; -+ int i, cost, regno, hard_regno, count, saved_cost; - bool in_p, out_p; - int length; - ira_allocno_t a; -@@ -4725,11 +4711,8 @@ calculate_spill_cost (int *regnos, rtx in, rtx out, rtx_insn *insn, - a = ira_regno_allocno_map[regno]; - length += ALLOCNO_EXCESS_PRESSURE_POINTS_NUM (a) / ALLOCNO_NUM_OBJECTS (a); - cost += ALLOCNO_MEMORY_COST (a) - ALLOCNO_CLASS_COST (a); -- nregs = hard_regno_nregs (hard_regno, ALLOCNO_MODE (a)); -- for (j = 0; j < nregs; j++) -- if (! TEST_HARD_REG_BIT (call_used_reg_set, hard_regno + j)) -- break; -- if (j == nregs) -+ if (in_hard_reg_set_p (crtl->abi->full_reg_clobbers (), -+ ALLOCNO_MODE (a), hard_regno)) - count++; - in_p = in && REG_P (in) && (int) REGNO (in) == hard_regno; - out_p = out && REG_P (out) && (int) REGNO (out) == hard_regno; -@@ -4886,11 +4869,10 @@ fast_allocation (void) - for (l = 0; l < nr; l++) - { - ira_object_t obj = ALLOCNO_OBJECT (a, l); -- IOR_HARD_REG_SET (conflict_hard_regs, -- OBJECT_CONFLICT_HARD_REGS (obj)); -+ conflict_hard_regs |= OBJECT_CONFLICT_HARD_REGS (obj); - for (r = OBJECT_LIVE_RANGES (obj); r != NULL; r = r->next) - for (j = r->start; j <= r->finish; j++) -- IOR_HARD_REG_SET (conflict_hard_regs, used_hard_regs[j]); -+ conflict_hard_regs |= used_hard_regs[j]; - } - aclass = ALLOCNO_CLASS (a); - ALLOCNO_ASSIGNED_P (a) = true; -@@ -4938,8 +4920,7 @@ fast_allocation (void) - ira_object_t obj = ALLOCNO_OBJECT (a, l); - for (r = OBJECT_LIVE_RANGES (obj); r != NULL; r = r->next) - for (k = r->start; k <= r->finish; k++) -- IOR_HARD_REG_SET (used_hard_regs[k], -- ira_reg_mode_hard_regset[hard_regno][mode]); -+ used_hard_regs[k] |= ira_reg_mode_hard_regset[hard_regno][mode]; - } - } - ira_free (sorted_allocnos); -diff --git a/gcc/ira-conflicts.c b/gcc/ira-conflicts.c -index 9a3e3811d..a0aefaa05 100644 ---- a/gcc/ira-conflicts.c -+++ b/gcc/ira-conflicts.c -@@ -325,12 +325,37 @@ process_regs_for_copy (rtx reg1, rtx reg2, bool constraint_p, - return true; - } - --/* Process all of the output registers of the current insn which are -- not bound (BOUND_P) and the input register REG (its operand number -+/* Return true if output operand OUTPUT and input operand INPUT of -+ INSN can use the same register class for at least one alternative. -+ INSN is already described in recog_data and recog_op_alt. */ -+static bool -+can_use_same_reg_p (rtx_insn *insn, int output, int input) -+{ -+ alternative_mask preferred = get_preferred_alternatives (insn); -+ for (int nalt = 0; nalt < recog_data.n_alternatives; nalt++) -+ { -+ if (!TEST_BIT (preferred, nalt)) -+ continue; -+ -+ const operand_alternative *op_alt -+ = &recog_op_alt[nalt * recog_data.n_operands]; -+ if (op_alt[input].matches == output) -+ return true; -+ -+ if (ira_reg_class_intersect[op_alt[input].cl][op_alt[output].cl] -+ != NO_REGS) -+ return true; -+ } -+ return false; -+} -+ -+/* Process all of the output registers of the current insn (INSN) which -+ are not bound (BOUND_P) and the input register REG (its operand number - OP_NUM) which dies in the insn as if there were a move insn between - them with frequency FREQ. */ - static void --process_reg_shuffles (rtx reg, int op_num, int freq, bool *bound_p) -+process_reg_shuffles (rtx_insn *insn, rtx reg, int op_num, int freq, -+ bool *bound_p) - { - int i; - rtx another_reg; -@@ -342,7 +367,13 @@ process_reg_shuffles (rtx reg, int op_num, int freq, bool *bound_p) - - if (!REG_SUBREG_P (another_reg) || op_num == i - || recog_data.operand_type[i] != OP_OUT -- || bound_p[i]) -+ || bound_p[i] -+ || (!can_use_same_reg_p (insn, i, op_num) -+ && (recog_data.constraints[op_num][0] != '%' -+ || !can_use_same_reg_p (insn, i, op_num + 1)) -+ && (op_num == 0 -+ || recog_data.constraints[op_num - 1][0] != '%' -+ || !can_use_same_reg_p (insn, i, op_num - 1)))) - continue; - - process_regs_for_copy (reg, another_reg, false, NULL, freq); -@@ -358,7 +389,7 @@ add_insn_allocno_copies (rtx_insn *insn) - rtx set, operand, dup; - bool bound_p[MAX_RECOG_OPERANDS]; - int i, n, freq; -- HARD_REG_SET alts; -+ alternative_mask alts; - - freq = REG_FREQ_FROM_BB (BLOCK_FOR_INSN (insn)); - if (freq == 0) -@@ -379,7 +410,7 @@ add_insn_allocno_copies (rtx_insn *insn) - there are no dead registers, there will be no such copies. */ - if (! find_reg_note (insn, REG_DEAD, NULL_RTX)) - return; -- ira_setup_alts (insn, alts); -+ alts = ira_setup_alts (insn); - for (i = 0; i < recog_data.n_operands; i++) - bound_p[i] = false; - for (i = 0; i < recog_data.n_operands; i++) -@@ -412,7 +443,8 @@ add_insn_allocno_copies (rtx_insn *insn) - the corresponding allocno copies. The cost will not - correspond to a real move insn cost, so make the frequency - smaller. */ -- process_reg_shuffles (operand, i, freq < 8 ? 1 : freq / 8, bound_p); -+ process_reg_shuffles (insn, operand, i, freq < 8 ? 1 : freq / 8, -+ bound_p); - } - } - -@@ -660,17 +692,15 @@ print_allocno_conflicts (FILE * file, bool reg_p, ira_allocno_t a) - putc (')', file); - } - } -- COPY_HARD_REG_SET (conflicting_hard_regs, OBJECT_TOTAL_CONFLICT_HARD_REGS (obj)); -- AND_COMPL_HARD_REG_SET (conflicting_hard_regs, ira_no_alloc_regs); -- AND_HARD_REG_SET (conflicting_hard_regs, -- reg_class_contents[ALLOCNO_CLASS (a)]); -+ conflicting_hard_regs = (OBJECT_TOTAL_CONFLICT_HARD_REGS (obj) -+ & ~ira_no_alloc_regs -+ & reg_class_contents[ALLOCNO_CLASS (a)]); - print_hard_reg_set (file, "\n;; total conflict hard regs:", - conflicting_hard_regs); - -- COPY_HARD_REG_SET (conflicting_hard_regs, OBJECT_CONFLICT_HARD_REGS (obj)); -- AND_COMPL_HARD_REG_SET (conflicting_hard_regs, ira_no_alloc_regs); -- AND_HARD_REG_SET (conflicting_hard_regs, -- reg_class_contents[ALLOCNO_CLASS (a)]); -+ conflicting_hard_regs = (OBJECT_CONFLICT_HARD_REGS (obj) -+ & ~ira_no_alloc_regs -+ & reg_class_contents[ALLOCNO_CLASS (a)]); - print_hard_reg_set (file, ";; conflict hard regs:", - conflicting_hard_regs); - putc ('\n', file); -@@ -740,11 +770,7 @@ ira_build_conflicts (void) - if (! targetm.class_likely_spilled_p (base)) - CLEAR_HARD_REG_SET (temp_hard_reg_set); - else -- { -- COPY_HARD_REG_SET (temp_hard_reg_set, reg_class_contents[base]); -- AND_COMPL_HARD_REG_SET (temp_hard_reg_set, ira_no_alloc_regs); -- AND_HARD_REG_SET (temp_hard_reg_set, call_used_reg_set); -- } -+ temp_hard_reg_set = reg_class_contents[base] & ~ira_no_alloc_regs; - FOR_EACH_ALLOCNO (a, ai) - { - int i, n = ALLOCNO_NUM_OBJECTS (a); -@@ -752,33 +778,28 @@ ira_build_conflicts (void) - for (i = 0; i < n; i++) - { - ira_object_t obj = ALLOCNO_OBJECT (a, i); -- machine_mode obj_mode = obj->allocno->mode; - rtx allocno_reg = regno_reg_rtx [ALLOCNO_REGNO (a)]; - -- if ((! flag_caller_saves && ALLOCNO_CALLS_CROSSED_NUM (a) != 0) -- /* For debugging purposes don't put user defined variables in -- callee-clobbered registers. However, do allow parameters -- in callee-clobbered registers to improve debugging. This -- is a bit of a fragile hack. */ -- || (optimize == 0 -- && REG_USERVAR_P (allocno_reg) -- && ! reg_is_parm_p (allocno_reg))) -+ /* For debugging purposes don't put user defined variables in -+ callee-clobbered registers. However, do allow parameters -+ in callee-clobbered registers to improve debugging. This -+ is a bit of a fragile hack. */ -+ if (optimize == 0 -+ && REG_USERVAR_P (allocno_reg) -+ && ! reg_is_parm_p (allocno_reg)) - { -- IOR_HARD_REG_SET (OBJECT_TOTAL_CONFLICT_HARD_REGS (obj), -- call_used_reg_set); -- IOR_HARD_REG_SET (OBJECT_CONFLICT_HARD_REGS (obj), -- call_used_reg_set); -+ HARD_REG_SET new_conflict_regs = crtl->abi->full_reg_clobbers (); -+ OBJECT_TOTAL_CONFLICT_HARD_REGS (obj) |= new_conflict_regs; -+ OBJECT_CONFLICT_HARD_REGS (obj) |= new_conflict_regs; - } -- else if (ALLOCNO_CALLS_CROSSED_NUM (a) != 0) -+ -+ if (ALLOCNO_CALLS_CROSSED_NUM (a) != 0) - { -- IOR_HARD_REG_SET (OBJECT_TOTAL_CONFLICT_HARD_REGS (obj), -- no_caller_save_reg_set); -- IOR_HARD_REG_SET (OBJECT_TOTAL_CONFLICT_HARD_REGS (obj), -- temp_hard_reg_set); -- IOR_HARD_REG_SET (OBJECT_CONFLICT_HARD_REGS (obj), -- no_caller_save_reg_set); -- IOR_HARD_REG_SET (OBJECT_CONFLICT_HARD_REGS (obj), -- temp_hard_reg_set); -+ HARD_REG_SET new_conflict_regs = ira_need_caller_save_regs (a); -+ if (flag_caller_saves) -+ new_conflict_regs &= (~savable_regs | temp_hard_reg_set); -+ OBJECT_TOTAL_CONFLICT_HARD_REGS (obj) |= new_conflict_regs; -+ OBJECT_CONFLICT_HARD_REGS (obj) |= new_conflict_regs; - } - - /* Now we deal with paradoxical subreg cases where certain registers -@@ -805,23 +826,6 @@ ira_build_conflicts (void) - } - } - } -- -- if (ALLOCNO_CALLS_CROSSED_NUM (a) != 0) -- { -- int regno; -- -- /* Allocnos bigger than the saved part of call saved -- regs must conflict with them. */ -- for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) -- if (!TEST_HARD_REG_BIT (call_used_reg_set, regno) -- && targetm.hard_regno_call_part_clobbered (NULL, regno, -- obj_mode)) -- { -- SET_HARD_REG_BIT (OBJECT_CONFLICT_HARD_REGS (obj), regno); -- SET_HARD_REG_BIT (OBJECT_TOTAL_CONFLICT_HARD_REGS (obj), -- regno); -- } -- } - } - } - if (optimize && ira_conflicts_p -diff --git a/gcc/ira-costs.c b/gcc/ira-costs.c -index c7feaba37..baf7261dd 100644 ---- a/gcc/ira-costs.c -+++ b/gcc/ira-costs.c -@@ -237,7 +237,7 @@ setup_cost_classes (cost_classes_t from) - allocated. */ - static cost_classes_t - restrict_cost_classes (cost_classes_t full, machine_mode mode, -- const HARD_REG_SET ®s) -+ const_hard_reg_set regs) - { - static struct cost_classes narrow; - int map[N_REG_CLASSES]; -@@ -254,12 +254,9 @@ restrict_cost_classes (cost_classes_t full, machine_mode mode, - - /* Calculate the set of registers in CL that belong to REGS and - are valid for MODE. */ -- HARD_REG_SET valid_for_cl; -- COPY_HARD_REG_SET (valid_for_cl, reg_class_contents[cl]); -- AND_HARD_REG_SET (valid_for_cl, regs); -- AND_COMPL_HARD_REG_SET (valid_for_cl, -- ira_prohibited_class_mode_regs[cl][mode]); -- AND_COMPL_HARD_REG_SET (valid_for_cl, ira_no_alloc_regs); -+ HARD_REG_SET valid_for_cl = reg_class_contents[cl] & regs; -+ valid_for_cl &= ~(ira_prohibited_class_mode_regs[cl][mode] -+ | ira_no_alloc_regs); - if (hard_reg_set_empty_p (valid_for_cl)) - continue; - -@@ -343,8 +340,7 @@ setup_regno_cost_classes_by_aclass (int regno, enum reg_class aclass) - - if ((classes_ptr = cost_classes_aclass_cache[aclass]) == NULL) - { -- COPY_HARD_REG_SET (temp, reg_class_contents[aclass]); -- AND_COMPL_HARD_REG_SET (temp, ira_no_alloc_regs); -+ temp = reg_class_contents[aclass] & ~ira_no_alloc_regs; - /* We exclude classes from consideration which are subsets of - ACLASS only if ACLASS is an uniform class. */ - exclude_p = ira_uniform_class_p[aclass]; -@@ -356,8 +352,7 @@ setup_regno_cost_classes_by_aclass (int regno, enum reg_class aclass) - { - /* Exclude non-uniform classes which are subsets of - ACLASS. */ -- COPY_HARD_REG_SET (temp2, reg_class_contents[cl]); -- AND_COMPL_HARD_REG_SET (temp2, ira_no_alloc_regs); -+ temp2 = reg_class_contents[cl] & ~ira_no_alloc_regs; - if (hard_reg_set_subset_p (temp2, temp) && cl != aclass) - continue; - } -@@ -1482,13 +1477,6 @@ scan_one_insn (rtx_insn *insn) - return insn; - } - -- if (pat_code == CLOBBER_HIGH) -- { -- gcc_assert (REG_P (XEXP (PATTERN (insn), 0)) -- && HARD_REGISTER_P (XEXP (PATTERN (insn), 0))); -- return insn; -- } -- - counted_mem = false; - set = single_set (insn); - extract_insn (insn); -@@ -2345,7 +2333,6 @@ ira_tune_allocno_costs (void) - ira_allocno_object_iterator oi; - ira_object_t obj; - bool skip_p; -- HARD_REG_SET *crossed_calls_clobber_regs; - - FOR_EACH_ALLOCNO (a, ai) - { -@@ -2380,14 +2367,7 @@ ira_tune_allocno_costs (void) - continue; - rclass = REGNO_REG_CLASS (regno); - cost = 0; -- crossed_calls_clobber_regs -- = &(ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (a)); -- if (ira_hard_reg_set_intersection_p (regno, mode, -- *crossed_calls_clobber_regs) -- && (ira_hard_reg_set_intersection_p (regno, mode, -- call_used_reg_set) -- || targetm.hard_regno_call_part_clobbered (NULL, regno, -- mode))) -+ if (ira_need_caller_save_p (a, regno)) - cost += (ALLOCNO_CALL_FREQ (a) - * (ira_memory_move_cost[mode][rclass][0] - + ira_memory_move_cost[mode][rclass][1])); -diff --git a/gcc/ira-emit.c b/gcc/ira-emit.c -index 51bf9c8bc..f44a0d199 100644 ---- a/gcc/ira-emit.c -+++ b/gcc/ira-emit.c -@@ -1115,8 +1115,8 @@ add_range_and_copies_from_move_list (move_t list, ira_loop_tree_node_t node, - ira_allocate_object_conflicts (to_obj, n); - } - } -- ior_hard_reg_conflicts (from, &hard_regs_live); -- ior_hard_reg_conflicts (to, &hard_regs_live); -+ ior_hard_reg_conflicts (from, hard_regs_live); -+ ior_hard_reg_conflicts (to, hard_regs_live); - - update_costs (from, true, freq); - update_costs (to, false, freq); -diff --git a/gcc/ira-int.h b/gcc/ira-int.h -index 3c7fe4e64..a2529ff81 100644 ---- a/gcc/ira-int.h -+++ b/gcc/ira-int.h -@@ -22,6 +22,7 @@ along with GCC; see the file COPYING3. If not see - #define GCC_IRA_INT_H - - #include "recog.h" -+#include "function-abi.h" - - /* To provide consistency in naming, all IRA external variables, - functions, common typedefs start with prefix ira_. */ -@@ -287,6 +288,9 @@ struct ira_allocno - /* Register class which should be used for allocation for given - allocno. NO_REGS means that we should use memory. */ - ENUM_BITFIELD (reg_class) aclass : 16; -+ /* A bitmask of the ABIs used by calls that occur while the allocno -+ is live. */ -+ unsigned int crossed_calls_abis : NUM_ABI_IDS; - /* During the reload, value TRUE means that we should not reassign a - hard register to the allocno got memory earlier. It is set up - when we removed memory-memory move insn before each iteration of -@@ -423,6 +427,7 @@ struct ira_allocno - #define ALLOCNO_CALL_FREQ(A) ((A)->call_freq) - #define ALLOCNO_CALLS_CROSSED_NUM(A) ((A)->calls_crossed_num) - #define ALLOCNO_CHEAP_CALLS_CROSSED_NUM(A) ((A)->cheap_calls_crossed_num) -+#define ALLOCNO_CROSSED_CALLS_ABIS(A) ((A)->crossed_calls_abis) - #define ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS(A) \ - ((A)->crossed_calls_clobbered_regs) - #define ALLOCNO_MEM_OPTIMIZED_DEST(A) ((A)->mem_optimized_dest) -@@ -963,8 +968,8 @@ extern void ira_print_disposition (FILE *); - extern void ira_debug_disposition (void); - extern void ira_debug_allocno_classes (void); - extern void ira_init_register_move_cost (machine_mode); --extern void ira_setup_alts (rtx_insn *insn, HARD_REG_SET &alts); --extern int ira_get_dup_out_num (int op_num, HARD_REG_SET &alts); -+extern alternative_mask ira_setup_alts (rtx_insn *); -+extern int ira_get_dup_out_num (int, alternative_mask); - - /* ira-build.c */ - -@@ -996,7 +1001,7 @@ extern void ira_set_allocno_class (ira_allocno_t, enum reg_class); - extern bool ira_conflict_vector_profitable_p (ira_object_t, int); - extern void ira_allocate_conflict_vec (ira_object_t, int); - extern void ira_allocate_object_conflicts (ira_object_t, int); --extern void ior_hard_reg_conflicts (ira_allocno_t, HARD_REG_SET *); -+extern void ior_hard_reg_conflicts (ira_allocno_t, const_hard_reg_set); - extern void ira_print_expanded_allocno (ira_allocno_t); - extern void ira_add_live_range_to_object (ira_object_t, int, int); - extern live_range_t ira_create_live_range (ira_object_t, int, int, -@@ -1508,4 +1513,28 @@ ira_allocate_and_set_or_copy_costs (int **vec, enum reg_class aclass, - extern rtx ira_create_new_reg (rtx); - extern int first_moveable_pseudo, last_moveable_pseudo; - -+/* Return the set of registers that would need a caller save if allocno A -+ overlapped them. */ -+ -+inline HARD_REG_SET -+ira_need_caller_save_regs (ira_allocno_t a) -+{ -+ return call_clobbers_in_region (ALLOCNO_CROSSED_CALLS_ABIS (a), -+ ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (a), -+ ALLOCNO_MODE (a)); -+} -+ -+/* Return true if we would need to save allocno A around a call if we -+ assigned hard register REGNO. */ -+ -+inline bool -+ira_need_caller_save_p (ira_allocno_t a, unsigned int regno) -+{ -+ if (ALLOCNO_CALLS_CROSSED_NUM (a) == 0) -+ return false; -+ return call_clobbered_in_region_p (ALLOCNO_CROSSED_CALLS_ABIS (a), -+ ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (a), -+ ALLOCNO_MODE (a), regno); -+} -+ - #endif /* GCC_IRA_INT_H */ -diff --git a/gcc/ira-lives.c b/gcc/ira-lives.c -index faadf08b0..b933dff16 100644 ---- a/gcc/ira-lives.c -+++ b/gcc/ira-lives.c -@@ -33,6 +33,7 @@ along with GCC; see the file COPYING3. If not see - #include "ira.h" - #include "ira-int.h" - #include "sparseset.h" -+#include "function-abi.h" - - /* The code in this file is similar to one in global but the code - works on the allocno basis and creates live ranges instead of -@@ -80,8 +81,9 @@ static int last_call_num; - /* The number of last call at which given allocno was saved. */ - static int *allocno_saved_at_call; - --/* The value of get_preferred_alternatives for the current instruction, -- supplemental to recog_data. */ -+/* The value returned by ira_setup_alts for the current instruction; -+ i.e. the set of alternatives that we should consider to be likely -+ candidates during reloading. */ - static alternative_mask preferred_alternatives; - - /* If non-NULL, the source operand of a register to register copy for which -@@ -187,8 +189,8 @@ make_object_dead (ira_object_t obj) - } - } - -- IOR_HARD_REG_SET (OBJECT_CONFLICT_HARD_REGS (obj), hard_regs_live); -- IOR_HARD_REG_SET (OBJECT_TOTAL_CONFLICT_HARD_REGS (obj), hard_regs_live); -+ OBJECT_CONFLICT_HARD_REGS (obj) |= hard_regs_live; -+ OBJECT_TOTAL_CONFLICT_HARD_REGS (obj) |= hard_regs_live; - - /* If IGNORE_REG_FOR_CONFLICTS did not already conflict with OBJ, make - sure it still doesn't. */ -@@ -989,10 +991,8 @@ process_single_reg_class_operands (bool in_p, int freq) - /* We could increase costs of A instead of making it - conflicting with the hard register. But it works worse - because it will be spilled in reload in anyway. */ -- IOR_HARD_REG_SET (OBJECT_CONFLICT_HARD_REGS (obj), -- reg_class_contents[cl]); -- IOR_HARD_REG_SET (OBJECT_TOTAL_CONFLICT_HARD_REGS (obj), -- reg_class_contents[cl]); -+ OBJECT_CONFLICT_HARD_REGS (obj) |= reg_class_contents[cl]; -+ OBJECT_TOTAL_CONFLICT_HARD_REGS (obj) |= reg_class_contents[cl]; - } - } - } -@@ -1130,8 +1130,7 @@ process_bb_node_lives (ira_loop_tree_node_t loop_tree_node) - reg_live_out = df_get_live_out (bb); - sparseset_clear (objects_live); - REG_SET_TO_HARD_REG_SET (hard_regs_live, reg_live_out); -- AND_COMPL_HARD_REG_SET (hard_regs_live, eliminable_regset); -- AND_COMPL_HARD_REG_SET (hard_regs_live, ira_no_alloc_regs); -+ hard_regs_live &= ~(eliminable_regset | ira_no_alloc_regs); - for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) - if (TEST_HARD_REG_BIT (hard_regs_live, i)) - { -@@ -1236,9 +1235,7 @@ process_bb_node_lives (ira_loop_tree_node_t loop_tree_node) - } - } - -- extract_insn (insn); -- preferred_alternatives = get_preferred_alternatives (insn); -- preprocess_constraints (insn); -+ preferred_alternatives = ira_setup_alts (insn); - process_single_reg_class_operands (false, freq); - - /* See which defined values die here. */ -@@ -1263,10 +1260,7 @@ process_bb_node_lives (ira_loop_tree_node_t loop_tree_node) - ira_object_t obj = ira_object_id_map[i]; - a = OBJECT_ALLOCNO (obj); - int num = ALLOCNO_NUM (a); -- HARD_REG_SET this_call_used_reg_set; -- -- get_call_reg_set_usage (insn, &this_call_used_reg_set, -- call_used_reg_set); -+ function_abi callee_abi = insn_callee_abi (insn); - - /* Don't allocate allocnos that cross setjmps or any - call, if this function receives a nonlocal -@@ -1281,10 +1275,10 @@ process_bb_node_lives (ira_loop_tree_node_t loop_tree_node) - } - if (can_throw_internal (insn)) - { -- IOR_HARD_REG_SET (OBJECT_CONFLICT_HARD_REGS (obj), -- this_call_used_reg_set); -- IOR_HARD_REG_SET (OBJECT_TOTAL_CONFLICT_HARD_REGS (obj), -- this_call_used_reg_set); -+ OBJECT_CONFLICT_HARD_REGS (obj) -+ |= callee_abi.mode_clobbers (ALLOCNO_MODE (a)); -+ OBJECT_TOTAL_CONFLICT_HARD_REGS (obj) -+ |= callee_abi.mode_clobbers (ALLOCNO_MODE (a)); - } - - if (sparseset_bit_p (allocnos_processed, num)) -@@ -1301,8 +1295,9 @@ process_bb_node_lives (ira_loop_tree_node_t loop_tree_node) - /* Mark it as saved at the next call. */ - allocno_saved_at_call[num] = last_call_num + 1; - ALLOCNO_CALLS_CROSSED_NUM (a)++; -- IOR_HARD_REG_SET (ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (a), -- this_call_used_reg_set); -+ ALLOCNO_CROSSED_CALLS_ABIS (a) |= 1 << callee_abi.id (); -+ ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (a) -+ |= callee_abi.full_and_partial_reg_clobbers (); - if (cheap_reg != NULL_RTX - && ALLOCNO_REGNO (a) == (int) REGNO (cheap_reg)) - ALLOCNO_CHEAP_CALLS_CROSSED_NUM (a)++; -@@ -1355,10 +1350,11 @@ process_bb_node_lives (ira_loop_tree_node_t loop_tree_node) - } - - /* Allocnos can't go in stack regs at the start of a basic block -- that is reached by an abnormal edge. Likewise for call -- clobbered regs, because caller-save, fixup_abnormal_edges and -- possibly the table driven EH machinery are not quite ready to -- handle such allocnos live across such edges. */ -+ that is reached by an abnormal edge. Likewise for registers -+ that are at least partly call clobbered, because caller-save, -+ fixup_abnormal_edges and possibly the table driven EH machinery -+ are not quite ready to handle such allocnos live across such -+ edges. */ - if (bb_has_abnormal_pred (bb)) - { - #ifdef STACK_REGS -@@ -1378,7 +1374,7 @@ process_bb_node_lives (ira_loop_tree_node_t loop_tree_node) - if (!cfun->has_nonlocal_label - && has_abnormal_call_or_eh_pred_edge_p (bb)) - for (px = 0; px < FIRST_PSEUDO_REGISTER; px++) -- if (call_used_regs[px] -+ if (eh_edge_abi.clobbers_at_least_part_of_reg_p (px) - #ifdef REAL_PIC_OFFSET_TABLE_REGNUM - /* We should create a conflict of PIC pseudo with - PIC hard reg as PIC hard reg can have a wrong -diff --git a/gcc/ira.c b/gcc/ira.c -index 4262e5cf3..a985dddaf 100644 ---- a/gcc/ira.c -+++ b/gcc/ira.c -@@ -471,8 +471,7 @@ setup_class_hard_regs (void) - ira_assert (SHRT_MAX >= FIRST_PSEUDO_REGISTER); - for (cl = (int) N_REG_CLASSES - 1; cl >= 0; cl--) - { -- COPY_HARD_REG_SET (temp_hard_regset, reg_class_contents[cl]); -- AND_COMPL_HARD_REG_SET (temp_hard_regset, no_unit_alloc_regs); -+ temp_hard_regset = reg_class_contents[cl] & ~no_unit_alloc_regs; - CLEAR_HARD_REG_SET (processed_hard_reg_set); - for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) - { -@@ -514,7 +513,7 @@ setup_alloc_regs (bool use_hard_frame_p) - #ifdef ADJUST_REG_ALLOC_ORDER - ADJUST_REG_ALLOC_ORDER; - #endif -- COPY_HARD_REG_SET (no_unit_alloc_regs, fixed_nonglobal_reg_set); -+ no_unit_alloc_regs = fixed_nonglobal_reg_set; - if (! use_hard_frame_p) - SET_HARD_REG_BIT (no_unit_alloc_regs, HARD_FRAME_POINTER_REGNUM); - setup_class_hard_regs (); -@@ -541,8 +540,7 @@ setup_reg_subclasses (void) - if (i == (int) NO_REGS) - continue; - -- COPY_HARD_REG_SET (temp_hard_regset, reg_class_contents[i]); -- AND_COMPL_HARD_REG_SET (temp_hard_regset, no_unit_alloc_regs); -+ temp_hard_regset = reg_class_contents[i] & ~no_unit_alloc_regs; - if (hard_reg_set_empty_p (temp_hard_regset)) - continue; - for (j = 0; j < N_REG_CLASSES; j++) -@@ -550,8 +548,7 @@ setup_reg_subclasses (void) - { - enum reg_class *p; - -- COPY_HARD_REG_SET (temp_hard_regset2, reg_class_contents[j]); -- AND_COMPL_HARD_REG_SET (temp_hard_regset2, no_unit_alloc_regs); -+ temp_hard_regset2 = reg_class_contents[j] & ~no_unit_alloc_regs; - if (! hard_reg_set_subset_p (temp_hard_regset, - temp_hard_regset2)) - continue; -@@ -605,10 +602,8 @@ setup_class_subset_and_memory_move_costs (void) - for (cl = (int) N_REG_CLASSES - 1; cl >= 0; cl--) - for (cl2 = (int) N_REG_CLASSES - 1; cl2 >= 0; cl2--) - { -- COPY_HARD_REG_SET (temp_hard_regset, reg_class_contents[cl]); -- AND_COMPL_HARD_REG_SET (temp_hard_regset, no_unit_alloc_regs); -- COPY_HARD_REG_SET (temp_hard_regset2, reg_class_contents[cl2]); -- AND_COMPL_HARD_REG_SET (temp_hard_regset2, no_unit_alloc_regs); -+ temp_hard_regset = reg_class_contents[cl] & ~no_unit_alloc_regs; -+ temp_hard_regset2 = reg_class_contents[cl2] & ~no_unit_alloc_regs; - ira_class_subset_p[cl][cl2] - = hard_reg_set_subset_p (temp_hard_regset, temp_hard_regset2); - if (! hard_reg_set_empty_p (temp_hard_regset2) -@@ -757,8 +752,7 @@ setup_stack_reg_pressure_class (void) - for (i = 0; i < ira_pressure_classes_num; i++) - { - cl = ira_pressure_classes[i]; -- COPY_HARD_REG_SET (temp_hard_regset2, temp_hard_regset); -- AND_HARD_REG_SET (temp_hard_regset2, reg_class_contents[cl]); -+ temp_hard_regset2 = temp_hard_regset & reg_class_contents[cl]; - size = hard_reg_set_size (temp_hard_regset2); - if (best < size) - { -@@ -816,10 +810,10 @@ setup_pressure_classes (void) - register pressure class. */ - for (m = 0; m < NUM_MACHINE_MODES; m++) - { -- COPY_HARD_REG_SET (temp_hard_regset, reg_class_contents[cl]); -- AND_COMPL_HARD_REG_SET (temp_hard_regset, no_unit_alloc_regs); -- AND_COMPL_HARD_REG_SET (temp_hard_regset, -- ira_prohibited_class_mode_regs[cl][m]); -+ temp_hard_regset -+ = (reg_class_contents[cl] -+ & ~(no_unit_alloc_regs -+ | ira_prohibited_class_mode_regs[cl][m])); - if (hard_reg_set_empty_p (temp_hard_regset)) - continue; - ira_init_register_move_cost_if_necessary ((machine_mode) m); -@@ -833,8 +827,7 @@ setup_pressure_classes (void) - } - curr = 0; - insert_p = true; -- COPY_HARD_REG_SET (temp_hard_regset, reg_class_contents[cl]); -- AND_COMPL_HARD_REG_SET (temp_hard_regset, no_unit_alloc_regs); -+ temp_hard_regset = reg_class_contents[cl] & ~no_unit_alloc_regs; - /* Remove so far added pressure classes which are subset of the - current candidate class. Prefer GENERAL_REGS as a pressure - register class to another class containing the same -@@ -845,11 +838,10 @@ setup_pressure_classes (void) - for (i = 0; i < n; i++) - { - cl2 = pressure_classes[i]; -- COPY_HARD_REG_SET (temp_hard_regset2, reg_class_contents[cl2]); -- AND_COMPL_HARD_REG_SET (temp_hard_regset2, no_unit_alloc_regs); -+ temp_hard_regset2 = (reg_class_contents[cl2] -+ & ~no_unit_alloc_regs); - if (hard_reg_set_subset_p (temp_hard_regset, temp_hard_regset2) -- && (! hard_reg_set_equal_p (temp_hard_regset, -- temp_hard_regset2) -+ && (temp_hard_regset != temp_hard_regset2 - || cl2 == (int) GENERAL_REGS)) - { - pressure_classes[curr++] = (enum reg_class) cl2; -@@ -857,11 +849,10 @@ setup_pressure_classes (void) - continue; - } - if (hard_reg_set_subset_p (temp_hard_regset2, temp_hard_regset) -- && (! hard_reg_set_equal_p (temp_hard_regset2, -- temp_hard_regset) -+ && (temp_hard_regset2 != temp_hard_regset - || cl == (int) GENERAL_REGS)) - continue; -- if (hard_reg_set_equal_p (temp_hard_regset2, temp_hard_regset)) -+ if (temp_hard_regset2 == temp_hard_regset) - insert_p = false; - pressure_classes[curr++] = (enum reg_class) cl2; - } -@@ -882,7 +873,7 @@ setup_pressure_classes (void) - registers available for the allocation. */ - CLEAR_HARD_REG_SET (temp_hard_regset); - CLEAR_HARD_REG_SET (temp_hard_regset2); -- COPY_HARD_REG_SET (ignore_hard_regs, no_unit_alloc_regs); -+ ignore_hard_regs = no_unit_alloc_regs; - for (cl = 0; cl < LIM_REG_CLASSES; cl++) - { - /* For some targets (like MIPS with MD_REGS), there are some -@@ -893,23 +884,23 @@ setup_pressure_classes (void) - break; - if (m >= NUM_MACHINE_MODES) - { -- IOR_HARD_REG_SET (ignore_hard_regs, reg_class_contents[cl]); -+ ignore_hard_regs |= reg_class_contents[cl]; - continue; - } - for (i = 0; i < n; i++) - if ((int) pressure_classes[i] == cl) - break; -- IOR_HARD_REG_SET (temp_hard_regset2, reg_class_contents[cl]); -+ temp_hard_regset2 |= reg_class_contents[cl]; - if (i < n) -- IOR_HARD_REG_SET (temp_hard_regset, reg_class_contents[cl]); -+ temp_hard_regset |= reg_class_contents[cl]; - } - for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) - /* Some targets (like SPARC with ICC reg) have allocatable regs - for which no reg class is defined. */ - if (REGNO_REG_CLASS (i) == NO_REGS) - SET_HARD_REG_BIT (ignore_hard_regs, i); -- AND_COMPL_HARD_REG_SET (temp_hard_regset, ignore_hard_regs); -- AND_COMPL_HARD_REG_SET (temp_hard_regset2, ignore_hard_regs); -+ temp_hard_regset &= ~ignore_hard_regs; -+ temp_hard_regset2 &= ~ignore_hard_regs; - ira_assert (hard_reg_set_subset_p (temp_hard_regset2, temp_hard_regset)); - } - #endif -@@ -1001,16 +992,12 @@ setup_allocno_and_important_classes (void) - same set of hard registers. */ - for (i = 0; i < LIM_REG_CLASSES; i++) - { -- COPY_HARD_REG_SET (temp_hard_regset, reg_class_contents[i]); -- AND_COMPL_HARD_REG_SET (temp_hard_regset, no_unit_alloc_regs); -+ temp_hard_regset = reg_class_contents[i] & ~no_unit_alloc_regs; - for (j = 0; j < n; j++) - { - cl = classes[j]; -- COPY_HARD_REG_SET (temp_hard_regset2, reg_class_contents[cl]); -- AND_COMPL_HARD_REG_SET (temp_hard_regset2, -- no_unit_alloc_regs); -- if (hard_reg_set_equal_p (temp_hard_regset, -- temp_hard_regset2)) -+ temp_hard_regset2 = reg_class_contents[cl] & ~no_unit_alloc_regs; -+ if (temp_hard_regset == temp_hard_regset2) - break; - } - if (j >= n || targetm.additional_allocno_class_p (i)) -@@ -1037,14 +1024,12 @@ setup_allocno_and_important_classes (void) - for (cl = 0; cl < N_REG_CLASSES; cl++) - if (ira_class_hard_regs_num[cl] > 0) - { -- COPY_HARD_REG_SET (temp_hard_regset, reg_class_contents[cl]); -- AND_COMPL_HARD_REG_SET (temp_hard_regset, no_unit_alloc_regs); -+ temp_hard_regset = reg_class_contents[cl] & ~no_unit_alloc_regs; - set_p = false; - for (j = 0; j < ira_allocno_classes_num; j++) - { -- COPY_HARD_REG_SET (temp_hard_regset2, -- reg_class_contents[ira_allocno_classes[j]]); -- AND_COMPL_HARD_REG_SET (temp_hard_regset2, no_unit_alloc_regs); -+ temp_hard_regset2 = (reg_class_contents[ira_allocno_classes[j]] -+ & ~no_unit_alloc_regs); - if ((enum reg_class) cl == ira_allocno_classes[j]) - break; - else if (hard_reg_set_subset_p (temp_hard_regset, -@@ -1118,10 +1103,9 @@ setup_class_translate_array (enum reg_class *class_translate, - for (i = 0; i < classes_num; i++) - { - aclass = classes[i]; -- COPY_HARD_REG_SET (temp_hard_regset, -- reg_class_contents[aclass]); -- AND_HARD_REG_SET (temp_hard_regset, reg_class_contents[cl]); -- AND_COMPL_HARD_REG_SET (temp_hard_regset, no_unit_alloc_regs); -+ temp_hard_regset = (reg_class_contents[aclass] -+ & reg_class_contents[cl] -+ & ~no_unit_alloc_regs); - if (! hard_reg_set_empty_p (temp_hard_regset)) - { - min_cost = INT_MAX; -@@ -1223,10 +1207,8 @@ setup_reg_class_relations (void) - ira_reg_classes_intersect_p[cl1][cl2] = false; - ira_reg_class_intersect[cl1][cl2] = NO_REGS; - ira_reg_class_subset[cl1][cl2] = NO_REGS; -- COPY_HARD_REG_SET (temp_hard_regset, reg_class_contents[cl1]); -- AND_COMPL_HARD_REG_SET (temp_hard_regset, no_unit_alloc_regs); -- COPY_HARD_REG_SET (temp_set2, reg_class_contents[cl2]); -- AND_COMPL_HARD_REG_SET (temp_set2, no_unit_alloc_regs); -+ temp_hard_regset = reg_class_contents[cl1] & ~no_unit_alloc_regs; -+ temp_set2 = reg_class_contents[cl2] & ~no_unit_alloc_regs; - if (hard_reg_set_empty_p (temp_hard_regset) - && hard_reg_set_empty_p (temp_set2)) - { -@@ -1264,16 +1246,14 @@ setup_reg_class_relations (void) - } - ira_reg_class_subunion[cl1][cl2] = NO_REGS; - ira_reg_class_superunion[cl1][cl2] = NO_REGS; -- COPY_HARD_REG_SET (intersection_set, reg_class_contents[cl1]); -- AND_HARD_REG_SET (intersection_set, reg_class_contents[cl2]); -- AND_COMPL_HARD_REG_SET (intersection_set, no_unit_alloc_regs); -- COPY_HARD_REG_SET (union_set, reg_class_contents[cl1]); -- IOR_HARD_REG_SET (union_set, reg_class_contents[cl2]); -- AND_COMPL_HARD_REG_SET (union_set, no_unit_alloc_regs); -+ intersection_set = (reg_class_contents[cl1] -+ & reg_class_contents[cl2] -+ & ~no_unit_alloc_regs); -+ union_set = ((reg_class_contents[cl1] | reg_class_contents[cl2]) -+ & ~no_unit_alloc_regs); - for (cl3 = 0; cl3 < N_REG_CLASSES; cl3++) - { -- COPY_HARD_REG_SET (temp_hard_regset, reg_class_contents[cl3]); -- AND_COMPL_HARD_REG_SET (temp_hard_regset, no_unit_alloc_regs); -+ temp_hard_regset = reg_class_contents[cl3] & ~no_unit_alloc_regs; - if (hard_reg_set_subset_p (temp_hard_regset, intersection_set)) - { - /* CL3 allocatable hard register set is inside of -@@ -1281,17 +1261,16 @@ setup_reg_class_relations (void) - of CL1 and CL2. */ - if (important_class_p[cl3]) - { -- COPY_HARD_REG_SET -- (temp_set2, -- reg_class_contents -- [(int) ira_reg_class_intersect[cl1][cl2]]); -- AND_COMPL_HARD_REG_SET (temp_set2, no_unit_alloc_regs); -+ temp_set2 -+ = (reg_class_contents -+ [ira_reg_class_intersect[cl1][cl2]]); -+ temp_set2 &= ~no_unit_alloc_regs; - if (! hard_reg_set_subset_p (temp_hard_regset, temp_set2) - /* If the allocatable hard register sets are - the same, prefer GENERAL_REGS or the - smallest class for debugging - purposes. */ -- || (hard_reg_set_equal_p (temp_hard_regset, temp_set2) -+ || (temp_hard_regset == temp_set2 - && (cl3 == GENERAL_REGS - || ((ira_reg_class_intersect[cl1][cl2] - != GENERAL_REGS) -@@ -1302,14 +1281,13 @@ setup_reg_class_relations (void) - ira_reg_class_intersect[cl1][cl2]]))))) - ira_reg_class_intersect[cl1][cl2] = (enum reg_class) cl3; - } -- COPY_HARD_REG_SET -- (temp_set2, -- reg_class_contents[(int) ira_reg_class_subset[cl1][cl2]]); -- AND_COMPL_HARD_REG_SET (temp_set2, no_unit_alloc_regs); -+ temp_set2 -+ = (reg_class_contents[ira_reg_class_subset[cl1][cl2]] -+ & ~no_unit_alloc_regs); - if (! hard_reg_set_subset_p (temp_hard_regset, temp_set2) - /* Ignore unavailable hard registers and prefer - smallest class for debugging purposes. */ -- || (hard_reg_set_equal_p (temp_hard_regset, temp_set2) -+ || (temp_hard_regset == temp_set2 - && hard_reg_set_subset_p - (reg_class_contents[cl3], - reg_class_contents -@@ -1322,15 +1300,13 @@ setup_reg_class_relations (void) - /* CL3 allocatable hard register set is inside of - union of allocatable hard register sets of CL1 - and CL2. */ -- COPY_HARD_REG_SET -- (temp_set2, -- reg_class_contents[(int) ira_reg_class_subunion[cl1][cl2]]); -- AND_COMPL_HARD_REG_SET (temp_set2, no_unit_alloc_regs); -+ temp_set2 -+ = (reg_class_contents[ira_reg_class_subunion[cl1][cl2]] -+ & ~no_unit_alloc_regs); - if (ira_reg_class_subunion[cl1][cl2] == NO_REGS - || (hard_reg_set_subset_p (temp_set2, temp_hard_regset) - -- && (! hard_reg_set_equal_p (temp_set2, -- temp_hard_regset) -+ && (temp_set2 != temp_hard_regset - || cl3 == GENERAL_REGS - /* If the allocatable hard register sets are the - same, prefer GENERAL_REGS or the smallest -@@ -1347,15 +1323,13 @@ setup_reg_class_relations (void) - /* CL3 allocatable hard register set contains union - of allocatable hard register sets of CL1 and - CL2. */ -- COPY_HARD_REG_SET -- (temp_set2, -- reg_class_contents[(int) ira_reg_class_superunion[cl1][cl2]]); -- AND_COMPL_HARD_REG_SET (temp_set2, no_unit_alloc_regs); -+ temp_set2 -+ = (reg_class_contents[ira_reg_class_superunion[cl1][cl2]] -+ & ~no_unit_alloc_regs); - if (ira_reg_class_superunion[cl1][cl2] == NO_REGS - || (hard_reg_set_subset_p (temp_hard_regset, temp_set2) - -- && (! hard_reg_set_equal_p (temp_set2, -- temp_hard_regset) -+ && (temp_set2 != temp_hard_regset - || cl3 == GENERAL_REGS - /* If the allocatable hard register sets are the - same, prefer GENERAL_REGS or the smallest -@@ -1499,8 +1473,7 @@ setup_prohibited_class_mode_regs (void) - - for (cl = (int) N_REG_CLASSES - 1; cl >= 0; cl--) - { -- COPY_HARD_REG_SET (temp_hard_regset, reg_class_contents[cl]); -- AND_COMPL_HARD_REG_SET (temp_hard_regset, no_unit_alloc_regs); -+ temp_hard_regset = reg_class_contents[cl] & ~no_unit_alloc_regs; - for (j = 0; j < NUM_MACHINE_MODES; j++) - { - count = 0; -@@ -1784,68 +1757,59 @@ setup_prohibited_mode_move_regs (void) - - - --/* Setup possible alternatives in ALTS for INSN. */ --void --ira_setup_alts (rtx_insn *insn, HARD_REG_SET &alts) -+/* Extract INSN and return the set of alternatives that we should consider. -+ This excludes any alternatives whose constraints are obviously impossible -+ to meet (e.g. because the constraint requires a constant and the operand -+ is nonconstant). It also excludes alternatives that are bound to need -+ a spill or reload, as long as we have other alternatives that match -+ exactly. */ -+alternative_mask -+ira_setup_alts (rtx_insn *insn) - { -- /* MAP nalt * nop -> start of constraints for given operand and -- alternative. */ -- static vec insn_constraints; - int nop, nalt; - bool curr_swapped; - const char *p; - int commutative = -1; - - extract_insn (insn); -+ preprocess_constraints (insn); - alternative_mask preferred = get_preferred_alternatives (insn); -- CLEAR_HARD_REG_SET (alts); -- insn_constraints.release (); -- insn_constraints.safe_grow_cleared (recog_data.n_operands -- * recog_data.n_alternatives + 1); -+ alternative_mask alts = 0; -+ alternative_mask exact_alts = 0; - /* Check that the hard reg set is enough for holding all - alternatives. It is hard to imagine the situation when the - assertion is wrong. */ - ira_assert (recog_data.n_alternatives - <= (int) MAX (sizeof (HARD_REG_ELT_TYPE) * CHAR_BIT, - FIRST_PSEUDO_REGISTER)); -+ for (nop = 0; nop < recog_data.n_operands; nop++) -+ if (recog_data.constraints[nop][0] == '%') -+ { -+ commutative = nop; -+ break; -+ } - for (curr_swapped = false;; curr_swapped = true) - { -- /* Calculate some data common for all alternatives to speed up the -- function. */ -- for (nop = 0; nop < recog_data.n_operands; nop++) -- { -- for (nalt = 0, p = recog_data.constraints[nop]; -- nalt < recog_data.n_alternatives; -- nalt++) -- { -- insn_constraints[nop * recog_data.n_alternatives + nalt] = p; -- while (*p && *p != ',') -- { -- /* We only support one commutative marker, the first -- one. We already set commutative above. */ -- if (*p == '%' && commutative < 0) -- commutative = nop; -- p++; -- } -- if (*p) -- p++; -- } -- } - for (nalt = 0; nalt < recog_data.n_alternatives; nalt++) - { -- if (!TEST_BIT (preferred, nalt) -- || TEST_HARD_REG_BIT (alts, nalt)) -+ if (!TEST_BIT (preferred, nalt) || TEST_BIT (exact_alts, nalt)) - continue; - -+ const operand_alternative *op_alt -+ = &recog_op_alt[nalt * recog_data.n_operands]; -+ int this_reject = 0; - for (nop = 0; nop < recog_data.n_operands; nop++) - { - int c, len; - -+ this_reject += op_alt[nop].reject; -+ - rtx op = recog_data.operand[nop]; -- p = insn_constraints[nop * recog_data.n_alternatives + nalt]; -+ p = op_alt[nop].constraint; - if (*p == 0 || *p == ',') - continue; -- -+ -+ bool win_p = false; - do - switch (c = *p, len = CONSTRAINT_LEN (c, p), c) - { -@@ -1863,7 +1827,14 @@ ira_setup_alts (rtx_insn *insn, HARD_REG_SET &alts) - - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': -- goto op_success; -+ { -+ rtx other = recog_data.operand[c - '0']; -+ if (MEM_P (other) -+ ? rtx_equal_p (other, op) -+ : REG_P (op) || SUBREG_P (op)) -+ goto op_success; -+ win_p = true; -+ } - break; - - case 'g': -@@ -1877,7 +1848,11 @@ ira_setup_alts (rtx_insn *insn, HARD_REG_SET &alts) - { - case CT_REGISTER: - if (reg_class_for_constraint (cn) != NO_REGS) -- goto op_success; -+ { -+ if (REG_P (op) || SUBREG_P (op)) -+ goto op_success; -+ win_p = true; -+ } - break; - - case CT_CONST_INT: -@@ -1888,9 +1863,14 @@ ira_setup_alts (rtx_insn *insn, HARD_REG_SET &alts) - break; - - case CT_ADDRESS: -+ goto op_success; -+ - case CT_MEMORY: - case CT_SPECIAL_MEMORY: -- goto op_success; -+ if (MEM_P (op)) -+ goto op_success; -+ win_p = true; -+ break; - - case CT_FIXED_FORM: - if (constraint_satisfied_p (op, cn)) -@@ -1901,12 +1881,22 @@ ira_setup_alts (rtx_insn *insn, HARD_REG_SET &alts) - } - } - while (p += len, c); -- break; -+ if (!win_p) -+ break; -+ /* We can make the alternative match by spilling a register -+ to memory or loading something into a register. Count a -+ cost of one reload (the equivalent of the '?' constraint). */ -+ this_reject += 6; - op_success: - ; - } -+ - if (nop >= recog_data.n_operands) -- SET_HARD_REG_BIT (alts, nalt); -+ { -+ alts |= ALTERNATIVE_BIT (nalt); -+ if (this_reject == 0) -+ exact_alts |= ALTERNATIVE_BIT (nalt); -+ } - } - if (commutative < 0) - break; -@@ -1916,14 +1906,15 @@ ira_setup_alts (rtx_insn *insn, HARD_REG_SET &alts) - if (curr_swapped) - break; - } -+ return exact_alts ? exact_alts : alts; - } - - /* Return the number of the output non-early clobber operand which - should be the same in any case as operand with number OP_NUM (or -- negative value if there is no such operand). The function takes -- only really possible alternatives into consideration. */ -+ negative value if there is no such operand). ALTS is the mask -+ of alternatives that we should consider. */ - int --ira_get_dup_out_num (int op_num, HARD_REG_SET &alts) -+ira_get_dup_out_num (int op_num, alternative_mask alts) - { - int curr_alt, c, original, dup; - bool ignore_p, use_commut_op_p; -@@ -1940,7 +1931,7 @@ ira_get_dup_out_num (int op_num, HARD_REG_SET &alts) - { - rtx op = recog_data.operand[op_num]; - -- for (curr_alt = 0, ignore_p = !TEST_HARD_REG_BIT (alts, curr_alt), -+ for (curr_alt = 0, ignore_p = !TEST_BIT (alts, curr_alt), - original = -1;;) - { - c = *str; -@@ -1951,7 +1942,7 @@ ira_get_dup_out_num (int op_num, HARD_REG_SET &alts) - else if (c == ',') - { - curr_alt++; -- ignore_p = !TEST_HARD_REG_BIT (alts, curr_alt); -+ ignore_p = !TEST_BIT (alts, curr_alt); - } - else if (! ignore_p) - switch (c) -@@ -1981,26 +1972,8 @@ ira_get_dup_out_num (int op_num, HARD_REG_SET &alts) - } - if (original == -1) - goto fail; -- dup = -1; -- for (ignore_p = false, str = recog_data.constraints[original - '0']; -- *str != 0; -- str++) -- if (ignore_p) -- { -- if (*str == ',') -- ignore_p = false; -- } -- else if (*str == '#') -- ignore_p = true; -- else if (! ignore_p) -- { -- if (*str == '=') -- dup = original - '0'; -- /* It is better ignore an alternative with early clobber. */ -- else if (*str == '&') -- goto fail; -- } -- if (dup >= 0) -+ dup = original - '0'; -+ if (recog_data.operand_type[dup] == OP_OUT) - return dup; - fail: - if (use_commut_op_p) -@@ -2305,7 +2278,7 @@ ira_setup_eliminable_regset (void) - if (frame_pointer_needed) - df_set_regs_ever_live (HARD_FRAME_POINTER_REGNUM, true); - -- COPY_HARD_REG_SET (ira_no_alloc_regs, no_unit_alloc_regs); -+ ira_no_alloc_regs = no_unit_alloc_regs; - CLEAR_HARD_REG_SET (eliminable_regset); - - compute_regs_asm_clobbered (); -@@ -2326,7 +2299,7 @@ ira_setup_eliminable_regset (void) - SET_HARD_REG_BIT (ira_no_alloc_regs, eliminables[i].from); - } - else if (cannot_elim) -- error ("%s cannot be used in asm here", -+ error ("%s cannot be used in % here", - reg_names[eliminables[i].from]); - else - df_set_regs_ever_live (eliminables[i].from, true); -@@ -2340,7 +2313,7 @@ ira_setup_eliminable_regset (void) - SET_HARD_REG_BIT (ira_no_alloc_regs, HARD_FRAME_POINTER_REGNUM); - } - else if (frame_pointer_needed) -- error ("%s cannot be used in asm here", -+ error ("%s cannot be used in % here", - reg_names[HARD_FRAME_POINTER_REGNUM]); - else - df_set_regs_ever_live (HARD_FRAME_POINTER_REGNUM, true); -@@ -2392,12 +2365,10 @@ setup_reg_renumber (void) - for (i = 0; i < nwords; i++) - { - obj = ALLOCNO_OBJECT (a, i); -- IOR_COMPL_HARD_REG_SET (OBJECT_TOTAL_CONFLICT_HARD_REGS (obj), -- reg_class_contents[pclass]); -+ OBJECT_TOTAL_CONFLICT_HARD_REGS (obj) -+ |= ~reg_class_contents[pclass]; - } -- if (ALLOCNO_CALLS_CROSSED_NUM (a) != 0 -- && ira_hard_reg_set_intersection_p (hard_regno, ALLOCNO_MODE (a), -- call_used_reg_set)) -+ if (ira_need_caller_save_p (a, hard_regno)) - { - ira_assert (!optimize || flag_caller_saves - || (ALLOCNO_CALLS_CROSSED_NUM (a) -@@ -3004,7 +2975,7 @@ validate_equiv_mem (rtx_insn *start, rtx reg, rtx memref) - return valid_none; - } - -- note_stores (PATTERN (insn), validate_equiv_mem_from_store, &info); -+ note_stores (insn, validate_equiv_mem_from_store, &info); - if (info.equiv_mem_modified) - return valid_none; - -@@ -3092,7 +3063,6 @@ equiv_init_movable_p (rtx x, int regno) - - case CC0: - case CLOBBER: -- case CLOBBER_HIGH: - return 0; - - case PRE_INC: -@@ -3199,7 +3169,6 @@ memref_referenced_p (rtx memref, rtx x, bool read_p) - return memref_referenced_p (memref, SET_SRC (x), true); - - case CLOBBER: -- case CLOBBER_HIGH: - if (process_set_for_memref_referenced_p (memref, XEXP (x, 0))) - return true; - -@@ -3391,6 +3360,37 @@ def_dominates_uses (int regno) - return true; - } - -+/* Scan the instructions before update_equiv_regs. Record which registers -+ are referenced as paradoxical subregs. Also check for cases in which -+ the current function needs to save a register that one of its call -+ instructions clobbers. -+ -+ These things are logically unrelated, but it's more efficient to do -+ them together. */ -+ -+static void -+update_equiv_regs_prescan (void) -+{ -+ basic_block bb; -+ rtx_insn *insn; -+ function_abi_aggregator callee_abis; -+ -+ FOR_EACH_BB_FN (bb, cfun) -+ FOR_BB_INSNS (bb, insn) -+ if (NONDEBUG_INSN_P (insn)) -+ { -+ set_paradoxical_subreg (insn); -+ if (CALL_P (insn)) -+ callee_abis.note_callee_abi (insn_callee_abi (insn)); -+ } -+ -+ HARD_REG_SET extra_caller_saves = callee_abis.caller_save_regs (*crtl->abi); -+ if (!hard_reg_set_empty_p (extra_caller_saves)) -+ for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno) -+ if (TEST_HARD_REG_BIT (extra_caller_saves, regno)) -+ df_set_regs_ever_live (regno, true); -+} -+ - /* Find registers that are equivalent to a single value throughout the - compilation (either because they can be referenced in memory or are - set once from a single constant). Lower their priority for a -@@ -3407,15 +3407,6 @@ update_equiv_regs (void) - rtx_insn *insn; - basic_block bb; - -- /* Scan insns and set pdx_subregs if the reg is used in a -- paradoxical subreg. Don't set such reg equivalent to a mem, -- because lra will not substitute such equiv memory in order to -- prevent access beyond allocated memory for paradoxical memory subreg. */ -- FOR_EACH_BB_FN (bb, cfun) -- FOR_BB_INSNS (bb, insn) -- if (NONDEBUG_INSN_P (insn)) -- set_paradoxical_subreg (insn); -- - /* Scan the insns and find which registers have equivalences. Do this - in a separate scan of the insns because (due to -fcse-follow-jumps) - a register can be set below its use. */ -@@ -3447,7 +3438,7 @@ update_equiv_regs (void) - if (set == NULL_RTX - || side_effects_p (SET_SRC (set))) - { -- note_stores (PATTERN (insn), no_equiv, NULL); -+ note_pattern_stores (PATTERN (insn), no_equiv, NULL); - continue; - } - else if (GET_CODE (PATTERN (insn)) == PARALLEL) -@@ -3458,7 +3449,7 @@ update_equiv_regs (void) - { - rtx part = XVECEXP (PATTERN (insn), 0, i); - if (part != set) -- note_stores (part, no_equiv, NULL); -+ note_pattern_stores (part, no_equiv, NULL); - } - } - -@@ -3516,7 +3507,7 @@ update_equiv_regs (void) - { - /* This might be setting a SUBREG of a pseudo, a pseudo that is - also set somewhere else to a constant. */ -- note_stores (set, no_equiv, NULL); -+ note_pattern_stores (set, no_equiv, NULL); - continue; - } - -@@ -3524,7 +3515,7 @@ update_equiv_regs (void) - equivalent to a mem. */ - if (MEM_P (src) && reg_equiv[regno].pdx_subregs) - { -- note_stores (set, no_equiv, NULL); -+ note_pattern_stores (set, no_equiv, NULL); - continue; - } - -@@ -4458,7 +4449,6 @@ rtx_moveable_p (rtx *loc, enum op_type type) - && rtx_moveable_p (&XEXP (x, 2), OP_IN)); - - case CLOBBER: -- case CLOBBER_HIGH: - return rtx_moveable_p (&SET_DEST (x), OP_OUT); - - case UNSPEC_VOLATILE: -@@ -4911,9 +4901,7 @@ interesting_dest_for_shprep (rtx_insn *insn, basic_block call_dom) - for (int i = 0; i < XVECLEN (pat, 0); i++) - { - rtx sub = XVECEXP (pat, 0, i); -- if (GET_CODE (sub) == USE -- || GET_CODE (sub) == CLOBBER -- || GET_CODE (sub) == CLOBBER_HIGH) -+ if (GET_CODE (sub) == USE || GET_CODE (sub) == CLOBBER) - continue; - if (GET_CODE (sub) != SET - || side_effects_p (sub)) -@@ -5305,6 +5293,7 @@ ira (FILE *f) - init_alias_analysis (); - loop_optimizer_init (AVOID_CFG_MODIFICATIONS); - reg_equiv = XCNEWVEC (struct equivalence, max_reg_num ()); -+ update_equiv_regs_prescan (); - update_equiv_regs (); - - /* Don't move insns if live range shrinkage or register -@@ -5616,7 +5605,9 @@ do_reload (void) - poly_int64 size = get_frame_size () + STACK_CHECK_FIXED_FRAME_SIZE; - - for (int i = 0; i < FIRST_PSEUDO_REGISTER; i++) -- if (df_regs_ever_live_p (i) && !fixed_regs[i] && call_used_regs[i]) -+ if (df_regs_ever_live_p (i) -+ && !fixed_regs[i] -+ && !crtl->abi->clobbers_full_reg_p (i)) - size += UNITS_PER_WORD; - - if (constant_lower_bound (size) > STACK_CHECK_MAX_FRAME_SIZE) -diff --git a/gcc/jit/jit-playback.c b/gcc/jit/jit-playback.c -index b74495c58..8b16e81d5 100644 ---- a/gcc/jit/jit-playback.c -+++ b/gcc/jit/jit-playback.c -@@ -399,12 +399,11 @@ new_function (location *loc, - - if (builtin_id) - { -- DECL_FUNCTION_CODE (fndecl) = builtin_id; - gcc_assert (loc == NULL); - DECL_SOURCE_LOCATION (fndecl) = BUILTINS_LOCATION; - -- DECL_BUILT_IN_CLASS (fndecl) = -- builtins_manager::get_class (builtin_id); -+ built_in_class fclass = builtins_manager::get_class (builtin_id); -+ set_decl_built_in_function (fndecl, fclass, builtin_id); - set_builtin_decl (builtin_id, fndecl, - builtins_manager::implicit_p (builtin_id)); - -diff --git a/gcc/jump.c b/gcc/jump.c -index ce5cee523..17642a95b 100644 ---- a/gcc/jump.c -+++ b/gcc/jump.c -@@ -1094,7 +1094,6 @@ mark_jump_label_1 (rtx x, rtx_insn *insn, bool in_mem, bool is_target) - case CC0: - case REG: - case CLOBBER: -- case CLOBBER_HIGH: - case CALL: - return; - -diff --git a/gcc/langhooks-def.h b/gcc/langhooks-def.h -index a059841b3..842f6a502 100644 ---- a/gcc/langhooks-def.h -+++ b/gcc/langhooks-def.h -@@ -122,6 +122,7 @@ extern int lhd_type_dwarf_attribute (const_tree, int); - #define LANG_HOOKS_TYPES_COMPATIBLE_P lhd_types_compatible_p - #define LANG_HOOKS_BUILTIN_FUNCTION lhd_builtin_function - #define LANG_HOOKS_BUILTIN_FUNCTION_EXT_SCOPE LANG_HOOKS_BUILTIN_FUNCTION -+#define LANG_HOOKS_SIMULATE_BUILTIN_FUNCTION_DECL LANG_HOOKS_BUILTIN_FUNCTION - #define LANG_HOOKS_EXPR_TO_DECL lhd_expr_to_decl - #define LANG_HOOKS_TO_TARGET_CHARSET lhd_to_target_charset - #define LANG_HOOKS_INIT_TS lhd_do_nothing -@@ -170,6 +171,7 @@ extern tree lhd_make_node (enum tree_code); - extern tree lhd_unit_size_without_reusable_padding (tree); - - #define LANG_HOOKS_MAKE_TYPE lhd_make_node -+#define LANG_HOOKS_SIMULATE_ENUM_DECL NULL - #define LANG_HOOKS_CLASSIFY_RECORD NULL - #define LANG_HOOKS_TYPE_FOR_SIZE lhd_type_for_size - #define LANG_HOOKS_INCOMPLETE_TYPE_ERROR lhd_incomplete_type_error -@@ -203,6 +205,7 @@ extern tree lhd_unit_size_without_reusable_padding (tree); - - #define LANG_HOOKS_FOR_TYPES_INITIALIZER { \ - LANG_HOOKS_MAKE_TYPE, \ -+ LANG_HOOKS_SIMULATE_ENUM_DECL, \ - LANG_HOOKS_CLASSIFY_RECORD, \ - LANG_HOOKS_TYPE_FOR_MODE, \ - LANG_HOOKS_TYPE_FOR_SIZE, \ -@@ -338,6 +341,7 @@ extern void lhd_end_section (void); - LANG_HOOKS_GIMPLIFY_EXPR, \ - LANG_HOOKS_BUILTIN_FUNCTION, \ - LANG_HOOKS_BUILTIN_FUNCTION_EXT_SCOPE, \ -+ LANG_HOOKS_SIMULATE_BUILTIN_FUNCTION_DECL, \ - LANG_HOOKS_INIT_TS, \ - LANG_HOOKS_EXPR_TO_DECL, \ - LANG_HOOKS_EH_PERSONALITY, \ -diff --git a/gcc/langhooks.c b/gcc/langhooks.c -index 2df97f2b6..fd8f43312 100644 ---- a/gcc/langhooks.c -+++ b/gcc/langhooks.c -@@ -599,28 +599,21 @@ lhd_omp_mappable_type (tree type) - return true; - } - --/* Common function for add_builtin_function and -- add_builtin_function_ext_scope. */ -+/* Common function for add_builtin_function, add_builtin_function_ext_scope -+ and simulate_builtin_function_decl. */ -+ - static tree --add_builtin_function_common (const char *name, -- tree type, -- int function_code, -- enum built_in_class cl, -- const char *library_name, -- tree attrs, -- tree (*hook) (tree)) -+build_builtin_function (location_t location, const char *name, tree type, -+ int function_code, enum built_in_class cl, -+ const char *library_name, tree attrs) - { - tree id = get_identifier (name); -- tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, id, type); -+ tree decl = build_decl (location, FUNCTION_DECL, id, type); - - TREE_PUBLIC (decl) = 1; - DECL_EXTERNAL (decl) = 1; -- DECL_BUILT_IN_CLASS (decl) = cl; -- -- DECL_FUNCTION_CODE (decl) = (enum built_in_function) function_code; - -- /* DECL_FUNCTION_CODE is a bitfield; verify that the value fits. */ -- gcc_assert (DECL_FUNCTION_CODE (decl) == function_code); -+ set_decl_built_in_function (decl, cl, function_code); - - if (library_name) - { -@@ -636,8 +629,7 @@ add_builtin_function_common (const char *name, - else - decl_attributes (&decl, NULL_TREE, 0); - -- return hook (decl); -- -+ return decl; - } - - /* Create a builtin function. */ -@@ -650,9 +642,9 @@ add_builtin_function (const char *name, - const char *library_name, - tree attrs) - { -- return add_builtin_function_common (name, type, function_code, cl, -- library_name, attrs, -- lang_hooks.builtin_function); -+ tree decl = build_builtin_function (BUILTINS_LOCATION, name, type, -+ function_code, cl, library_name, attrs); -+ return lang_hooks.builtin_function (decl); - } - - /* Like add_builtin_function, but make sure the scope is the external scope. -@@ -670,9 +662,40 @@ add_builtin_function_ext_scope (const char *name, - const char *library_name, - tree attrs) - { -- return add_builtin_function_common (name, type, function_code, cl, -- library_name, attrs, -- lang_hooks.builtin_function_ext_scope); -+ tree decl = build_builtin_function (BUILTINS_LOCATION, name, type, -+ function_code, cl, library_name, attrs); -+ return lang_hooks.builtin_function_ext_scope (decl); -+} -+ -+/* Simulate a declaration of a target-specific built-in function at -+ location LOCATION, as though it had been declared directly in the -+ source language. NAME is the name of the function, TYPE is its function -+ type, FUNCTION_CODE is the target-specific function code, LIBRARY_NAME -+ is the name of the underlying library function (NULL if none) and -+ ATTRS is a list of function attributes. -+ -+ Return the decl of the declared function. */ -+ -+tree -+simulate_builtin_function_decl (location_t location, const char *name, -+ tree type, int function_code, -+ const char *library_name, tree attrs) -+{ -+ tree decl = build_builtin_function (location, name, type, -+ function_code, BUILT_IN_MD, -+ library_name, attrs); -+ tree new_decl = lang_hooks.simulate_builtin_function_decl (decl); -+ -+ /* Give the front end a chance to create a new decl if necessary, -+ but if the front end discards the decl in favour of a conflicting -+ (erroneous) previous definition, return the decl that we tried but -+ failed to add. This allows the caller to process the returned decl -+ normally, even though the source code won't be able to use it. */ -+ if (TREE_CODE (new_decl) == FUNCTION_DECL -+ && fndecl_built_in_p (new_decl, function_code, BUILT_IN_MD)) -+ return new_decl; -+ -+ return decl; - } - - tree -diff --git a/gcc/langhooks.h b/gcc/langhooks.h -index a45579b33..b8cee93f5 100644 ---- a/gcc/langhooks.h -+++ b/gcc/langhooks.h -@@ -64,6 +64,10 @@ struct lang_hooks_for_types - language-specific processing is required. */ - tree (*make_type) (enum tree_code); - -+ /* Make an enum type with the given name and values, associating -+ them all with the given source location. */ -+ tree (*simulate_enum_decl) (location_t, const char *, vec); -+ - /* Return what kind of RECORD_TYPE this is, mainly for purposes of - debug information. If not defined, record types are assumed to - be structures. */ -@@ -494,6 +498,15 @@ struct lang_hooks - backend must add all of the builtins at program initialization time. */ - tree (*builtin_function_ext_scope) (tree decl); - -+ /* Do language-specific processing for target-specific built-in -+ function DECL, so that it is defined in the global scope (only) -+ and is available without needing to be explicitly declared. -+ -+ This is intended for targets that want to inject declarations of -+ built-in functions into the source language (such as in response -+ to a pragma) rather than providing them in the source language itself. */ -+ tree (*simulate_builtin_function_decl) (tree decl); -+ - /* Used to set up the tree_contains_structure array for a frontend. */ - void (*init_ts) (void); - -@@ -562,6 +575,8 @@ extern tree add_builtin_function_ext_scope (const char *name, tree type, - enum built_in_class cl, - const char *library_name, - tree attrs); -+extern tree simulate_builtin_function_decl (location_t, const char *, tree, -+ int, const char *, tree); - extern tree add_builtin_type (const char *name, tree type); - - /* Language helper functions. */ -diff --git a/gcc/loop-doloop.c b/gcc/loop-doloop.c -index 89714be76..732687dba 100644 ---- a/gcc/loop-doloop.c -+++ b/gcc/loop-doloop.c -@@ -731,7 +731,7 @@ doloop_optimize (struct loop *loop) - bitmap modified = BITMAP_ALLOC (NULL); - - for (rtx_insn *i = doloop_seq; i != NULL; i = NEXT_INSN (i)) -- note_stores (PATTERN (i), record_reg_sets, modified); -+ note_stores (i, record_reg_sets, modified); - - basic_block loop_end = desc->out_edge->src; - bool fail = bitmap_intersect_p (df_get_live_out (loop_end), modified); -diff --git a/gcc/loop-invariant.c b/gcc/loop-invariant.c -index b880ead3d..1af88876c 100644 ---- a/gcc/loop-invariant.c -+++ b/gcc/loop-invariant.c -@@ -2170,7 +2170,7 @@ calculate_loop_reg_pressure (void) - - mark_ref_regs (PATTERN (insn)); - n_regs_set = 0; -- note_stores (PATTERN (insn), mark_reg_clobber, NULL); -+ note_stores (insn, mark_reg_clobber, NULL); - - /* Mark any registers dead after INSN as dead now. */ - -@@ -2183,7 +2183,7 @@ calculate_loop_reg_pressure (void) - Clobbers are processed again, so they conflict with - the registers that are set. */ - -- note_stores (PATTERN (insn), mark_reg_store, NULL); -+ note_stores (insn, mark_reg_store, NULL); - - if (AUTO_INC_DEC) - for (link = REG_NOTES (insn); link; link = XEXP (link, 1)) -diff --git a/gcc/loop-iv.c b/gcc/loop-iv.c -index 340045ce8..1dc3bc74d 100644 ---- a/gcc/loop-iv.c -+++ b/gcc/loop-iv.c -@@ -1967,16 +1967,10 @@ simplify_using_initial_values (struct loop *loop, enum rtx_code op, rtx *expr) - continue; - - CLEAR_REG_SET (this_altered); -- note_stores (PATTERN (insn), mark_altered, this_altered); -+ note_stores (insn, mark_altered, this_altered); - if (CALL_P (insn)) -- { -- /* Kill all call clobbered registers. */ -- unsigned int i; -- hard_reg_set_iterator hrsi; -- EXECUTE_IF_SET_IN_HARD_REG_SET (regs_invalidated_by_call, -- 0, i, hrsi) -- SET_REGNO_REG_SET (this_altered, i); -- } -+ /* Kill all call clobbered registers. */ -+ IOR_REG_SET_HRS (this_altered, regs_invalidated_by_call); - - if (suitable_set_for_replacement (insn, &dest, &src)) - { -diff --git a/gcc/lra-assigns.c b/gcc/lra-assigns.c -index 5c5c73293..a35fc41ac 100644 ---- a/gcc/lra-assigns.c -+++ b/gcc/lra-assigns.c -@@ -94,6 +94,7 @@ along with GCC; see the file COPYING3. If not see - #include "params.h" - #include "lra.h" - #include "lra-int.h" -+#include "function-abi.h" - - /* Current iteration number of the pass and current iteration number - of the pass after the latest spill pass when any former reload -@@ -493,18 +494,15 @@ find_hard_regno_for_1 (int regno, int *cost, int try_only_hard_regno, - HARD_REG_SET impossible_start_hard_regs, available_regs; - - if (hard_reg_set_empty_p (regno_set)) -- COPY_HARD_REG_SET (conflict_set, lra_no_alloc_regs); -+ conflict_set = lra_no_alloc_regs; - else -- { -- COMPL_HARD_REG_SET (conflict_set, regno_set); -- IOR_HARD_REG_SET (conflict_set, lra_no_alloc_regs); -- } -+ conflict_set = ~regno_set | lra_no_alloc_regs; - rclass = regno_allocno_class_array[regno]; - rclass_intersect_p = ira_reg_classes_intersect_p[rclass]; - curr_hard_regno_costs_check++; - sparseset_clear (conflict_reload_and_inheritance_pseudos); - sparseset_clear (live_range_hard_reg_pseudos); -- IOR_HARD_REG_SET (conflict_set, lra_reg_info[regno].conflict_hard_regs); -+ conflict_set |= lra_reg_info[regno].conflict_hard_regs; - biggest_mode = lra_reg_info[regno].biggest_mode; - for (r = lra_reg_info[regno].live_ranges; r != NULL; r = r->next) - { -@@ -614,7 +612,7 @@ find_hard_regno_for_1 (int regno, int *cost, int try_only_hard_regno, - } - /* Make sure that all registers in a multi-word pseudo belong to the - required class. */ -- IOR_COMPL_HARD_REG_SET (conflict_set, reg_class_contents[rclass]); -+ conflict_set |= ~reg_class_contents[rclass]; - lra_assert (rclass != NO_REGS); - rclass_size = ira_class_hard_regs_num[rclass]; - best_hard_regno = -1; -@@ -622,8 +620,7 @@ find_hard_regno_for_1 (int regno, int *cost, int try_only_hard_regno, - biggest_nregs = hard_regno_nregs (hard_regno, biggest_mode); - nregs_diff = (biggest_nregs - - hard_regno_nregs (hard_regno, PSEUDO_REGNO_MODE (regno))); -- COPY_HARD_REG_SET (available_regs, reg_class_contents[rclass]); -- AND_COMPL_HARD_REG_SET (available_regs, lra_no_alloc_regs); -+ available_regs = reg_class_contents[rclass] & ~lra_no_alloc_regs; - for (i = 0; i < rclass_size; i++) - { - if (try_only_hard_regno >= 0) -@@ -658,7 +655,7 @@ find_hard_regno_for_1 (int regno, int *cost, int try_only_hard_regno, - for (j = 0; - j < hard_regno_nregs (hard_regno, PSEUDO_REGNO_MODE (regno)); - j++) -- if (! TEST_HARD_REG_BIT (call_used_reg_set, hard_regno + j) -+ if (! crtl->abi->clobbers_full_reg_p (hard_regno + j) - && ! df_regs_ever_live_p (hard_regno + j)) - /* It needs save restore. */ - hard_regno_costs[hard_regno] -@@ -1219,8 +1216,8 @@ setup_live_pseudos_and_spill_after_risky_transforms (bitmap - sparseset_set_bit (live_range_hard_reg_pseudos, r2->regno); - } - } -- COPY_HARD_REG_SET (conflict_set, lra_no_alloc_regs); -- IOR_HARD_REG_SET (conflict_set, lra_reg_info[regno].conflict_hard_regs); -+ conflict_set = lra_no_alloc_regs; -+ conflict_set |= lra_reg_info[regno].conflict_hard_regs; - val = lra_reg_info[regno].val; - offset = lra_reg_info[regno].offset; - EXECUTE_IF_SET_IN_SPARSESET (live_range_hard_reg_pseudos, conflict_regno) -@@ -1640,14 +1637,14 @@ lra_assign (bool &fails_p) - bitmap_initialize (&all_spilled_pseudos, ®_obstack); - create_live_range_start_chains (); - setup_live_pseudos_and_spill_after_risky_transforms (&all_spilled_pseudos); -- if (! lra_asm_error_p && flag_checking && !flag_ipa_ra) -+ if (! lra_asm_error_p && flag_checking) - /* Check correctness of allocation for call-crossed pseudos but - only when there are no asm errors as in the case of errors the - asm is removed and it can result in incorrect allocation. */ - for (i = FIRST_PSEUDO_REGISTER; i < max_regno; i++) -- if (lra_reg_info[i].nrefs != 0 && reg_renumber[i] >= 0 -- && lra_reg_info[i].call_insn -- && overlaps_hard_reg_set_p (call_used_reg_set, -+ if (lra_reg_info[i].nrefs != 0 -+ && reg_renumber[i] >= 0 -+ && overlaps_hard_reg_set_p (lra_reg_info[i].conflict_hard_regs, - PSEUDO_REGNO_MODE (i), reg_renumber[i])) - gcc_unreachable (); - /* Setup insns to process on the next constraint pass. */ -diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c -index f0a2f0491..b34aec227 100644 ---- a/gcc/lra-constraints.c -+++ b/gcc/lra-constraints.c -@@ -131,6 +131,7 @@ - #include "lra.h" - #include "lra-int.h" - #include "print-rtl.h" -+#include "function-abi.h" - - /* Value of LRA_CURR_RELOAD_NUM at the beginning of BB of the current - insn. Remember that LRA_CURR_RELOAD_NUM is the number of emitted -@@ -394,11 +395,24 @@ address_eliminator::~address_eliminator () - *m_index_loc = m_index_reg; - } - --/* Return true if the eliminated form of AD is a legitimate target address. */ -+/* Return true if the eliminated form of AD is a legitimate target address. -+ If OP is a MEM, AD is the address within OP, otherwise OP should be -+ ignored. CONSTRAINT is one constraint that the operand may need -+ to meet. */ - static bool --valid_address_p (struct address_info *ad) -+valid_address_p (rtx op, struct address_info *ad, -+ enum constraint_num constraint) - { - address_eliminator eliminator (ad); -+ -+ /* Allow a memory OP if it matches CONSTRAINT, even if CONSTRAINT is more -+ forgiving than "m". */ -+ if (MEM_P (op) -+ && (insn_extra_memory_constraint (constraint) -+ || insn_extra_special_memory_constraint (constraint)) -+ && constraint_satisfied_p (op, constraint)) -+ return true; -+ - return valid_address_p (ad->mode, *ad->outer, ad->as); - } - -@@ -1888,8 +1902,7 @@ prohibited_class_reg_set_mode_p (enum reg_class rclass, - HARD_REG_SET temp; - - lra_assert (hard_reg_set_subset_p (reg_class_contents[rclass], set)); -- COPY_HARD_REG_SET (temp, set); -- AND_COMPL_HARD_REG_SET (temp, lra_no_alloc_regs); -+ temp = set & ~lra_no_alloc_regs; - return (hard_reg_set_subset_p - (temp, ira_prohibited_class_mode_regs[rclass][mode])); - } -@@ -1900,11 +1913,12 @@ prohibited_class_reg_set_mode_p (enum reg_class rclass, - alternative. */ - static unsigned int curr_small_class_check = 0; - --/* Update number of used inputs of class OP_CLASS for operand NOP. -- Return true if we have more such class operands than the number of -- available regs. */ -+/* Update number of used inputs of class OP_CLASS for operand NOP -+ of alternative NALT. Return true if we have more such class operands -+ than the number of available regs. */ - static bool --update_and_check_small_class_inputs (int nop, enum reg_class op_class) -+update_and_check_small_class_inputs (int nop, int nalt, -+ enum reg_class op_class) - { - static unsigned int small_class_check[LIM_REG_CLASSES]; - static int small_class_input_nums[LIM_REG_CLASSES]; -@@ -1915,7 +1929,7 @@ update_and_check_small_class_inputs (int nop, enum reg_class op_class) - && hard_reg_set_intersect_p (reg_class_contents[op_class], - ira_no_alloc_regs) - && (curr_static_id->operand[nop].type != OP_OUT -- || curr_static_id->operand[nop].early_clobber)) -+ || TEST_BIT (curr_static_id->operand[nop].early_clobber_alts, nalt))) - { - if (small_class_check[op_class] == curr_small_class_check) - small_class_input_nums[op_class]++; -@@ -2184,7 +2198,8 @@ process_alt_operands (int only_alternative) - /* We should reject matching of an early - clobber operand if the matching operand is - not dying in the insn. */ -- if (! curr_static_id->operand[m].early_clobber -+ if (!TEST_BIT (curr_static_id->operand[m] -+ .early_clobber_alts, nalt) - || operand_reg[nop] == NULL_RTX - || (find_regno_note (curr_insn, REG_DEAD, - REGNO (op)) -@@ -2251,7 +2266,8 @@ process_alt_operands (int only_alternative) - it results in less hard regs required for - the insn than a non-matching earlyclobber - alternative. */ -- if (curr_static_id->operand[m].early_clobber) -+ if (TEST_BIT (curr_static_id->operand[m] -+ .early_clobber_alts, nalt)) - { - if (lra_dump_file != NULL) - fprintf -@@ -2302,7 +2318,7 @@ process_alt_operands (int only_alternative) - reloads. */ - badop = false; - this_alternative = curr_alt[m]; -- COPY_HARD_REG_SET (this_alternative_set, curr_alt_set[m]); -+ this_alternative_set = curr_alt_set[m]; - winreg = this_alternative != NO_REGS; - break; - } -@@ -2387,14 +2403,12 @@ process_alt_operands (int only_alternative) - if (mode == BLKmode) - break; - this_alternative = reg_class_subunion[this_alternative][cl]; -- IOR_HARD_REG_SET (this_alternative_set, -- reg_class_contents[cl]); -+ this_alternative_set |= reg_class_contents[cl]; - if (costly_p) - { - this_costly_alternative - = reg_class_subunion[this_costly_alternative][cl]; -- IOR_HARD_REG_SET (this_costly_alternative_set, -- reg_class_contents[cl]); -+ this_costly_alternative_set |= reg_class_contents[cl]; - } - winreg = true; - if (REG_P (op)) -@@ -2529,14 +2543,11 @@ process_alt_operands (int only_alternative) - - if (this_alternative != NO_REGS) - { -- HARD_REG_SET available_regs; -- -- COPY_HARD_REG_SET (available_regs, -- reg_class_contents[this_alternative]); -- AND_COMPL_HARD_REG_SET -- (available_regs, -- ira_prohibited_class_mode_regs[this_alternative][mode]); -- AND_COMPL_HARD_REG_SET (available_regs, lra_no_alloc_regs); -+ HARD_REG_SET available_regs -+ = (reg_class_contents[this_alternative] -+ & ~((ira_prohibited_class_mode_regs -+ [this_alternative][mode]) -+ | lra_no_alloc_regs)); - if (hard_reg_set_empty_p (available_regs)) - { - /* There are no hard regs holding a value of given -@@ -2892,7 +2903,8 @@ process_alt_operands (int only_alternative) - goto fail; - } - -- if (update_and_check_small_class_inputs (nop, this_alternative)) -+ if (update_and_check_small_class_inputs (nop, nalt, -+ this_alternative)) - { - if (lra_dump_file != NULL) - fprintf (lra_dump_file, -@@ -2901,7 +2913,7 @@ process_alt_operands (int only_alternative) - goto fail; - } - curr_alt[nop] = this_alternative; -- COPY_HARD_REG_SET (curr_alt_set[nop], this_alternative_set); -+ curr_alt_set[nop] = this_alternative_set; - curr_alt_win[nop] = this_alternative_win; - curr_alt_match_win[nop] = this_alternative_match_win; - curr_alt_offmemok[nop] = this_alternative_offmemok; -@@ -3416,7 +3428,7 @@ process_address_1 (int nop, bool check_only_p, - - All these cases involve a non-autoinc address, so there is no - point revalidating other types. */ -- if (ad.autoinc_p || valid_address_p (&ad)) -+ if (ad.autoinc_p || valid_address_p (op, &ad, cn)) - return change_p; - - /* Any index existed before LRA started, so we can assume that the -@@ -3445,7 +3457,7 @@ process_address_1 (int nop, bool check_only_p, - if (code >= 0) - { - *ad.inner = gen_rtx_LO_SUM (Pmode, new_reg, addr); -- if (! valid_address_p (ad.mode, *ad.outer, ad.as)) -+ if (!valid_address_p (op, &ad, cn)) - { - /* Try to put lo_sum into register. */ - insn = emit_insn (gen_rtx_SET -@@ -3455,7 +3467,7 @@ process_address_1 (int nop, bool check_only_p, - if (code >= 0) - { - *ad.inner = new_reg; -- if (! valid_address_p (ad.mode, *ad.outer, ad.as)) -+ if (!valid_address_p (op, &ad, cn)) - { - *ad.inner = addr; - code = -1; -@@ -3550,7 +3562,7 @@ process_address_1 (int nop, bool check_only_p, - && CONSTANT_P (XEXP (SET_SRC (set), 1))) - { - *ad.inner = SET_SRC (set); -- if (valid_address_p (ad.mode, *ad.outer, ad.as)) -+ if (valid_address_p (op, &ad, cn)) - { - *ad.base_term = XEXP (SET_SRC (set), 0); - *ad.disp_term = XEXP (SET_SRC (set), 1); -@@ -4573,7 +4585,7 @@ contains_reg_p (rtx x, bool hard_reg_p, bool spilled_p) - regno = lra_get_regno_hard_regno (regno); - if (regno < 0) - return false; -- COMPL_HARD_REG_SET (alloc_regs, lra_no_alloc_regs); -+ alloc_regs = ~lra_no_alloc_regs; - return overlaps_hard_reg_set_p (alloc_regs, GET_MODE (x), regno); - } - else -@@ -5165,6 +5177,14 @@ static int reloads_num; - /* Number of calls passed so far in current EBB. */ - static int calls_num; - -+/* Index ID is the CALLS_NUM associated the last call we saw with -+ ABI identifier ID. */ -+static int last_call_for_abi[NUM_ABI_IDS]; -+ -+/* Which registers have been fully or partially clobbered by a call -+ since they were last used. */ -+static HARD_REG_SET full_and_partial_call_clobbers; -+ - /* Current reload pseudo check for validity of elements in - USAGE_INSNS. */ - static int curr_usage_insns_check; -@@ -5208,6 +5228,10 @@ setup_next_usage_insn (int regno, rtx insn, int reloads_num, bool after_p) - usage_insns[regno].reloads_num = reloads_num; - usage_insns[regno].calls_num = calls_num; - usage_insns[regno].after_p = after_p; -+ if (regno >= FIRST_PSEUDO_REGISTER && reg_renumber[regno] >= 0) -+ remove_from_hard_reg_set (&full_and_partial_call_clobbers, -+ PSEUDO_REGNO_MODE (regno), -+ reg_renumber[regno]); - } - - /* The function is used to form list REGNO usages which consists of -@@ -5453,16 +5477,19 @@ static inline bool - need_for_call_save_p (int regno) - { - lra_assert (regno >= FIRST_PSEUDO_REGISTER && reg_renumber[regno] >= 0); -- return (usage_insns[regno].calls_num < calls_num -- && (overlaps_hard_reg_set_p -- ((flag_ipa_ra && -- ! hard_reg_set_empty_p (lra_reg_info[regno].actual_call_used_reg_set)) -- ? lra_reg_info[regno].actual_call_used_reg_set -- : call_used_reg_set, -- PSEUDO_REGNO_MODE (regno), reg_renumber[regno]) -- || (targetm.hard_regno_call_part_clobbered -- (lra_reg_info[regno].call_insn, -- reg_renumber[regno], PSEUDO_REGNO_MODE (regno))))); -+ if (usage_insns[regno].calls_num < calls_num) -+ { -+ unsigned int abis = 0; -+ for (unsigned int i = 0; i < NUM_ABI_IDS; ++i) -+ if (last_call_for_abi[i] > usage_insns[regno].calls_num) -+ abis |= 1 << i; -+ gcc_assert (abis); -+ if (call_clobbered_in_region_p (abis, full_and_partial_call_clobbers, -+ PSEUDO_REGNO_MODE (regno), -+ reg_renumber[regno])) -+ return true; -+ } -+ return false; - } - - /* Global registers occurring in the current EBB. */ -@@ -5502,8 +5529,7 @@ need_for_split_p (HARD_REG_SET potential_reload_hard_regs, int regno) - true) the assign pass assumes that all pseudos living - through calls are assigned to call saved hard regs. */ - && (regno >= FIRST_PSEUDO_REGISTER -- || ! TEST_HARD_REG_BIT (call_used_reg_set, regno) -- || usage_insns[regno].calls_num == calls_num) -+ || !TEST_HARD_REG_BIT (full_and_partial_call_clobbers, regno)) - /* We need at least 2 reloads to make pseudo splitting - profitable. We should provide hard regno splitting in - any case to solve 1st insn scheduling problem when -@@ -6255,12 +6281,14 @@ inherit_in_ebb (rtx_insn *head, rtx_insn *tail) - curr_usage_insns_check++; - clear_invariants (); - reloads_num = calls_num = 0; -+ for (unsigned int i = 0; i < NUM_ABI_IDS; ++i) -+ last_call_for_abi[i] = 0; -+ CLEAR_HARD_REG_SET (full_and_partial_call_clobbers); - bitmap_clear (&check_only_regs); - bitmap_clear (&invalid_invariant_regs); - last_processed_bb = NULL; - CLEAR_HARD_REG_SET (potential_reload_hard_regs); -- COPY_HARD_REG_SET (live_hard_regs, eliminable_regset); -- IOR_HARD_REG_SET (live_hard_regs, lra_no_alloc_regs); -+ live_hard_regs = eliminable_regset | lra_no_alloc_regs; - /* We don't process new insns generated in the loop. */ - for (curr_insn = tail; curr_insn != PREV_INSN (head); curr_insn = prev_insn) - { -@@ -6330,8 +6358,7 @@ inherit_in_ebb (rtx_insn *head, rtx_insn *tail) - else - setup_next_usage_insn (src_regno, curr_insn, reloads_num, false); - if (hard_reg_set_subset_p (reg_class_contents[cl], live_hard_regs)) -- IOR_HARD_REG_SET (potential_reload_hard_regs, -- reg_class_contents[cl]); -+ potential_reload_hard_regs |= reg_class_contents[cl]; - } - else if (src_regno < 0 - && dst_regno >= lra_constraint_new_regno_start -@@ -6348,8 +6375,7 @@ inherit_in_ebb (rtx_insn *head, rtx_insn *tail) - if (process_invariant_for_inheritance (SET_DEST (curr_set), SET_SRC (curr_set))) - change_p = true; - if (hard_reg_set_subset_p (reg_class_contents[cl], live_hard_regs)) -- IOR_HARD_REG_SET (potential_reload_hard_regs, -- reg_class_contents[cl]); -+ potential_reload_hard_regs |= reg_class_contents[cl]; - } - else if (src_regno >= lra_constraint_new_regno_start - && dst_regno < lra_constraint_new_regno_start -@@ -6371,8 +6397,7 @@ inherit_in_ebb (rtx_insn *head, rtx_insn *tail) - /* Invalidate. */ - usage_insns[dst_regno].check = 0; - if (hard_reg_set_subset_p (reg_class_contents[cl], live_hard_regs)) -- IOR_HARD_REG_SET (potential_reload_hard_regs, -- reg_class_contents[cl]); -+ potential_reload_hard_regs |= reg_class_contents[cl]; - } - else if (INSN_P (curr_insn)) - { -@@ -6427,8 +6452,8 @@ inherit_in_ebb (rtx_insn *head, rtx_insn *tail) - else - add_to_hard_reg_set (&s, PSEUDO_REGNO_MODE (dst_regno), - reg_renumber[dst_regno]); -- AND_COMPL_HARD_REG_SET (live_hard_regs, s); -- AND_COMPL_HARD_REG_SET (potential_reload_hard_regs, s); -+ live_hard_regs &= ~s; -+ potential_reload_hard_regs &= ~s; - } - /* We should invalidate potential inheritance or - splitting for the current insn usages to the next -@@ -6472,6 +6497,10 @@ inherit_in_ebb (rtx_insn *head, rtx_insn *tail) - int regno, hard_regno; - - calls_num++; -+ function_abi callee_abi = insn_callee_abi (curr_insn); -+ last_call_for_abi[callee_abi.id ()] = calls_num; -+ full_and_partial_call_clobbers -+ |= callee_abi.full_and_partial_reg_clobbers (); - if ((cheap = find_reg_note (curr_insn, - REG_RETURNED, NULL_RTX)) != NULL_RTX - && ((cheap = XEXP (cheap, 0)), true) -@@ -6481,7 +6510,7 @@ inherit_in_ebb (rtx_insn *head, rtx_insn *tail) - /* If there are pending saves/restores, the - optimization is not worth. */ - && usage_insns[regno].calls_num == calls_num - 1 -- && TEST_HARD_REG_BIT (call_used_reg_set, hard_regno)) -+ && callee_abi.clobbers_reg_p (GET_MODE (cheap), hard_regno)) - { - /* Restore the pseudo from the call result as - REG_RETURNED note says that the pseudo value is -@@ -6504,6 +6533,9 @@ inherit_in_ebb (rtx_insn *head, rtx_insn *tail) - /* We don't need to save/restore of the pseudo from - this call. */ - usage_insns[regno].calls_num = calls_num; -+ remove_from_hard_reg_set -+ (&full_and_partial_call_clobbers, -+ GET_MODE (cheap), hard_regno); - bitmap_set_bit (&check_only_regs, regno); - } - } -@@ -6607,8 +6639,7 @@ inherit_in_ebb (rtx_insn *head, rtx_insn *tail) - if (ira_class_hard_regs_num[cl] <= max_small_class_regs_num) - reloads_num++; - if (hard_reg_set_subset_p (reg_class_contents[cl], live_hard_regs)) -- IOR_HARD_REG_SET (potential_reload_hard_regs, -- reg_class_contents[cl]); -+ potential_reload_hard_regs |= reg_class_contents[cl]; - } - } - if (NONDEBUG_INSN_P (curr_insn)) -diff --git a/gcc/lra-eliminations.c b/gcc/lra-eliminations.c -index 7a345a52a..9568c13cb 100644 ---- a/gcc/lra-eliminations.c -+++ b/gcc/lra-eliminations.c -@@ -654,7 +654,6 @@ lra_eliminate_regs_1 (rtx_insn *insn, rtx x, machine_mode mem_mode, - return x; - - case CLOBBER: -- case CLOBBER_HIGH: - case SET: - gcc_unreachable (); - -@@ -807,16 +806,6 @@ mark_not_eliminable (rtx x, machine_mode mem_mode) - setup_can_eliminate (ep, false); - return; - -- case CLOBBER_HIGH: -- gcc_assert (REG_P (XEXP (x, 0))); -- gcc_assert (REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER); -- for (ep = reg_eliminate; -- ep < ®_eliminate[NUM_ELIMINABLE_REGS]; -- ep++) -- if (reg_is_clobbered_by_clobber_high (ep->to_rtx, XEXP (x, 0))) -- setup_can_eliminate (ep, false); -- return; -- - case SET: - if (SET_DEST (x) == stack_pointer_rtx - && GET_CODE (SET_SRC (x)) == PLUS -@@ -1180,7 +1169,7 @@ spill_pseudos (HARD_REG_SET set) - reg_renumber[i] = -1; - bitmap_ior_into (&to_process, &lra_reg_info[i].insn_bitmap); - } -- IOR_HARD_REG_SET (lra_no_alloc_regs, set); -+ lra_no_alloc_regs |= set; - for (insn = get_insns (); insn != NULL_RTX; insn = NEXT_INSN (insn)) - if (bitmap_bit_p (&to_process, INSN_UID (insn))) - { -@@ -1293,8 +1282,8 @@ update_reg_eliminate (bitmap insns_with_changed_offsets) - result = true; - } - } -- IOR_HARD_REG_SET (lra_no_alloc_regs, temp_hard_reg_set); -- AND_COMPL_HARD_REG_SET (eliminable_regset, temp_hard_reg_set); -+ lra_no_alloc_regs |= temp_hard_reg_set; -+ eliminable_regset &= ~temp_hard_reg_set; - spill_pseudos (temp_hard_reg_set); - return result; - } -diff --git a/gcc/lra-int.h b/gcc/lra-int.h -index 253ae1e6c..5671e2e65 100644 ---- a/gcc/lra-int.h -+++ b/gcc/lra-int.h -@@ -72,10 +72,6 @@ struct lra_reg - /* The following fields are defined only for pseudos. */ - /* Hard registers with which the pseudo conflicts. */ - HARD_REG_SET conflict_hard_regs; -- /* Call used registers with which the pseudo conflicts, taking into account -- the registers used by functions called from calls which cross the -- pseudo. */ -- HARD_REG_SET actual_call_used_reg_set; - /* We assign hard registers to reload pseudos which can occur in few - places. So two hard register preferences are enough for them. - The following fields define the preferred hard registers. If -@@ -103,8 +99,6 @@ struct lra_reg - int val; - /* Offset from relative eliminate register to pesudo reg. */ - poly_int64 offset; -- /* Call instruction, if any, that may affect this psuedo reg. */ -- rtx_insn *call_insn; - /* These members are set up in lra-lives.c and updated in - lra-coalesce.c. */ - /* The biggest size mode in which each pseudo reg is referred in -@@ -141,10 +135,6 @@ struct lra_operand_data - unsigned int strict_low : 1; - /* True if the operand is an operator. */ - unsigned int is_operator : 1; -- /* True if there is an early clobber alternative for this operand. -- This field is set up every time when corresponding -- operand_alternative in lra_static_insn_data is set up. */ -- unsigned int early_clobber : 1; - /* True if the operand is an address. */ - unsigned int is_address : 1; - }; -@@ -163,11 +153,6 @@ struct lra_insn_reg - /* True if the reg is accessed through a subreg and the subreg is - just a part of the register. */ - unsigned int subreg_p : 1; -- /* True if there is an early clobber alternative for this -- operand. */ -- unsigned int early_clobber : 1; -- /* True if the reg is clobber highed by the operand. */ -- unsigned int clobber_high : 1; - /* The corresponding regno of the register. */ - int regno; - /* Next reg info of the same insn. */ -diff --git a/gcc/lra-lives.c b/gcc/lra-lives.c -index 55b2adc2a..bce123d73 100644 ---- a/gcc/lra-lives.c -+++ b/gcc/lra-lives.c -@@ -43,6 +43,7 @@ along with GCC; see the file COPYING3. If not see - #include "sparseset.h" - #include "lra-int.h" - #include "target.h" -+#include "function-abi.h" - - /* Program points are enumerated by numbers from range - 0..LRA_LIVE_MAX_POINT-1. There are approximately two times more -@@ -327,7 +328,7 @@ static void - mark_pseudo_dead (int regno) - { - lra_assert (!HARD_REGISTER_NUM_P (regno)); -- IOR_HARD_REG_SET (lra_reg_info[regno].conflict_hard_regs, hard_regs_live); -+ lra_reg_info[regno].conflict_hard_regs |= hard_regs_live; - if (!sparseset_bit_p (pseudos_live, regno)) - return; - -@@ -574,41 +575,21 @@ lra_setup_reload_pseudo_preferenced_hard_reg (int regno, - } - } - --/* Check that REGNO living through calls and setjumps, set up conflict -- regs using LAST_CALL_USED_REG_SET, and clear corresponding bits in -- PSEUDOS_LIVE_THROUGH_CALLS and PSEUDOS_LIVE_THROUGH_SETJUMPS. -- CALL_INSN is a call that is representative of all calls in the region -- described by the PSEUDOS_LIVE_THROUGH_* sets, in terms of the registers -- that it preserves and clobbers. */ -+/* Check whether REGNO lives through calls and setjmps and clear -+ the corresponding bits in PSEUDOS_LIVE_THROUGH_CALLS and -+ PSEUDOS_LIVE_THROUGH_SETJUMPS. All calls in the region described -+ by PSEUDOS_LIVE_THROUGH_CALLS have the given ABI. */ - - static inline void --check_pseudos_live_through_calls (int regno, -- HARD_REG_SET last_call_used_reg_set, -- rtx_insn *call_insn) -+check_pseudos_live_through_calls (int regno, const function_abi &abi) - { -- int hr; -- rtx_insn *old_call_insn; -- - if (! sparseset_bit_p (pseudos_live_through_calls, regno)) - return; - -- gcc_assert (call_insn && CALL_P (call_insn)); -- old_call_insn = lra_reg_info[regno].call_insn; -- if (!old_call_insn -- || (targetm.return_call_with_max_clobbers -- && targetm.return_call_with_max_clobbers (old_call_insn, call_insn) -- == call_insn)) -- lra_reg_info[regno].call_insn = call_insn; -+ machine_mode mode = PSEUDO_REGNO_MODE (regno); - - sparseset_clear_bit (pseudos_live_through_calls, regno); -- IOR_HARD_REG_SET (lra_reg_info[regno].conflict_hard_regs, -- last_call_used_reg_set); -- -- for (hr = 0; HARD_REGISTER_NUM_P (hr); hr++) -- if (targetm.hard_regno_call_part_clobbered (call_insn, hr, -- PSEUDO_REGNO_MODE (regno))) -- add_to_hard_reg_set (&lra_reg_info[regno].conflict_hard_regs, -- PSEUDO_REGNO_MODE (regno), hr); -+ lra_reg_info[regno].conflict_hard_regs |= abi.mode_clobbers (mode); - if (! sparseset_bit_p (pseudos_live_through_setjumps, regno)) - return; - sparseset_clear_bit (pseudos_live_through_setjumps, regno); -@@ -623,23 +604,10 @@ check_pseudos_live_through_calls (int regno, - static inline bool - reg_early_clobber_p (const struct lra_insn_reg *reg, int n_alt) - { -- return (reg->early_clobber -- && (n_alt == LRA_UNKNOWN_ALT -- || (n_alt != LRA_NON_CLOBBERED_ALT -- && TEST_BIT (reg->early_clobber_alts, n_alt)))); --} -- --/* Return true if call instructions CALL1 and CALL2 use ABIs that -- preserve the same set of registers. */ -- --static bool --calls_have_same_clobbers_p (rtx_insn *call1, rtx_insn *call2) --{ -- if (!targetm.return_call_with_max_clobbers) -- return false; -- -- return (targetm.return_call_with_max_clobbers (call1, call2) == call1 -- && targetm.return_call_with_max_clobbers (call2, call1) == call2); -+ return (n_alt == LRA_UNKNOWN_ALT -+ ? reg->early_clobber_alts != 0 -+ : (n_alt != LRA_NON_CLOBBERED_ALT -+ && TEST_BIT (reg->early_clobber_alts, n_alt))); - } - - /* Process insns of the basic block BB to update pseudo live ranges, -@@ -661,17 +629,15 @@ process_bb_lives (basic_block bb, int &curr_point, bool dead_insn_p) - rtx_insn *next; - rtx link, *link_loc; - bool need_curr_point_incr; -- HARD_REG_SET last_call_used_reg_set; -- rtx_insn *call_insn = NULL; -- rtx_insn *last_call_insn = NULL; -+ /* Only has a meaningful value once we've seen a call. */ -+ function_abi last_call_abi = default_function_abi; - - reg_live_out = df_get_live_out (bb); - sparseset_clear (pseudos_live); - sparseset_clear (pseudos_live_through_calls); - sparseset_clear (pseudos_live_through_setjumps); -- CLEAR_HARD_REG_SET (last_call_used_reg_set); - REG_SET_TO_HARD_REG_SET (hard_regs_live, reg_live_out); -- AND_COMPL_HARD_REG_SET (hard_regs_live, eliminable_regset); -+ hard_regs_live &= ~eliminable_regset; - EXECUTE_IF_SET_IN_BITMAP (reg_live_out, FIRST_PSEUDO_REGISTER, j, bi) - { - update_pseudo_point (j, curr_point, USE_POINT); -@@ -701,7 +667,7 @@ process_bb_lives (basic_block bb, int &curr_point, bool dead_insn_p) - bool call_p; - int n_alt, dst_regno, src_regno; - rtx set; -- struct lra_insn_reg *reg, *hr; -+ struct lra_insn_reg *reg; - - if (!NONDEBUG_INSN_P (curr_insn)) - continue; -@@ -733,7 +699,7 @@ process_bb_lives (basic_block bb, int &curr_point, bool dead_insn_p) - break; - } - for (reg = curr_static_id->hard_regs; reg != NULL; reg = reg->next) -- if (reg->type != OP_IN && !reg->clobber_high) -+ if (reg->type != OP_IN) - { - remove_p = false; - break; -@@ -870,24 +836,13 @@ process_bb_lives (basic_block bb, int &curr_point, bool dead_insn_p) - unused values because they still conflict with quantities - that are live at the time of the definition. */ - for (reg = curr_id->regs; reg != NULL; reg = reg->next) -- { -- if (reg->type != OP_IN) -- { -- update_pseudo_point (reg->regno, curr_point, USE_POINT); -- mark_regno_live (reg->regno, reg->biggest_mode); -- check_pseudos_live_through_calls (reg->regno, -- last_call_used_reg_set, -- call_insn); -- } -- -- if (!HARD_REGISTER_NUM_P (reg->regno)) -- for (hr = curr_static_id->hard_regs; hr != NULL; hr = hr->next) -- if (hr->clobber_high -- && maybe_gt (GET_MODE_SIZE (PSEUDO_REGNO_MODE (reg->regno)), -- GET_MODE_SIZE (hr->biggest_mode))) -- SET_HARD_REG_BIT (lra_reg_info[reg->regno].conflict_hard_regs, -- hr->regno); -- } -+ if (reg->type != OP_IN) -+ { -+ update_pseudo_point (reg->regno, curr_point, USE_POINT); -+ mark_regno_live (reg->regno, reg->biggest_mode); -+ /* ??? Should be a no-op for unused registers. */ -+ check_pseudos_live_through_calls (reg->regno, last_call_abi); -+ } - - for (reg = curr_static_id->hard_regs; reg != NULL; reg = reg->next) - if (reg->type != OP_IN) -@@ -926,35 +881,13 @@ process_bb_lives (basic_block bb, int &curr_point, bool dead_insn_p) - - if (call_p) - { -- call_insn = curr_insn; -- if (! flag_ipa_ra && ! targetm.return_call_with_max_clobbers) -- COPY_HARD_REG_SET(last_call_used_reg_set, call_used_reg_set); -- else -- { -- HARD_REG_SET this_call_used_reg_set; -- get_call_reg_set_usage (curr_insn, &this_call_used_reg_set, -- call_used_reg_set); -- -- bool flush = (! hard_reg_set_empty_p (last_call_used_reg_set) -- && ( ! hard_reg_set_equal_p (last_call_used_reg_set, -- this_call_used_reg_set))) -- || (last_call_insn && ! calls_have_same_clobbers_p -- (call_insn, -- last_call_insn)); -- -- EXECUTE_IF_SET_IN_SPARSESET (pseudos_live, j) -- { -- IOR_HARD_REG_SET (lra_reg_info[j].actual_call_used_reg_set, -- this_call_used_reg_set); -+ function_abi call_abi = insn_callee_abi (curr_insn); - -- if (flush) -- check_pseudos_live_through_calls (j, -- last_call_used_reg_set, -- last_call_insn); -- } -- COPY_HARD_REG_SET(last_call_used_reg_set, this_call_used_reg_set); -- last_call_insn = call_insn; -- } -+ if (last_call_abi != call_abi) -+ EXECUTE_IF_SET_IN_SPARSESET (pseudos_live, j) -+ check_pseudos_live_through_calls (j, last_call_abi); -+ -+ last_call_abi = call_abi; - - sparseset_ior (pseudos_live_through_calls, - pseudos_live_through_calls, pseudos_live); -@@ -992,9 +925,7 @@ process_bb_lives (basic_block bb, int &curr_point, bool dead_insn_p) - if (reg->type == OP_IN) - update_pseudo_point (reg->regno, curr_point, USE_POINT); - mark_regno_live (reg->regno, reg->biggest_mode); -- check_pseudos_live_through_calls (reg->regno, -- last_call_used_reg_set, -- call_insn); -+ check_pseudos_live_through_calls (reg->regno, last_call_abi); - } - - for (reg = curr_static_id->hard_regs; reg != NULL; reg = reg->next) -@@ -1088,10 +1019,10 @@ process_bb_lives (basic_block bb, int &curr_point, bool dead_insn_p) - } - - /* Pseudos can't go in stack regs at the start of a basic block that -- is reached by an abnormal edge. Likewise for call clobbered regs, -- because caller-save, fixup_abnormal_edges and possibly the table -- driven EH machinery are not quite ready to handle such pseudos -- live across such edges. */ -+ is reached by an abnormal edge. Likewise for registers that are at -+ least partly call clobbered, because caller-save, fixup_abnormal_edges -+ and possibly the table driven EH machinery are not quite ready to -+ handle such pseudos live across such edges. */ - if (bb_has_abnormal_pred (bb)) - { - #ifdef STACK_REGS -@@ -1106,7 +1037,7 @@ process_bb_lives (basic_block bb, int &curr_point, bool dead_insn_p) - if (!cfun->has_nonlocal_label - && has_abnormal_call_or_eh_pred_edge_p (bb)) - for (px = 0; HARD_REGISTER_NUM_P (px); px++) -- if (call_used_regs[px] -+ if (eh_edge_abi.clobbers_at_least_part_of_reg_p (px) - #ifdef REAL_PIC_OFFSET_TABLE_REGNUM - /* We should create a conflict of PIC pseudo with PIC - hard reg as PIC hard reg can have a wrong value after -@@ -1163,7 +1094,7 @@ process_bb_lives (basic_block bb, int &curr_point, bool dead_insn_p) - if (sparseset_cardinality (pseudos_live_through_calls) == 0) - break; - if (sparseset_bit_p (pseudos_live_through_calls, j)) -- check_pseudos_live_through_calls (j, last_call_used_reg_set, call_insn); -+ check_pseudos_live_through_calls (j, last_call_abi); - } - - for (i = 0; HARD_REGISTER_NUM_P (i); ++i) -@@ -1397,7 +1328,6 @@ lra_create_live_ranges_1 (bool all_p, bool dead_insn_p) - lra_reg_info[i].biggest_mode = GET_MODE (regno_reg_rtx[i]); - else - lra_reg_info[i].biggest_mode = VOIDmode; -- lra_reg_info[i].call_insn = NULL; - if (!HARD_REGISTER_NUM_P (i) - && lra_reg_info[i].nrefs != 0) - { -diff --git a/gcc/lra-remat.c b/gcc/lra-remat.c -index 69209b2a1..914f5e2ce 100644 ---- a/gcc/lra-remat.c -+++ b/gcc/lra-remat.c -@@ -65,16 +65,11 @@ along with GCC; see the file COPYING3. If not see - #include "recog.h" - #include "lra.h" - #include "lra-int.h" -+#include "function-abi.h" - - /* Number of candidates for rematerialization. */ - static unsigned int cands_num; - --/* The following is used for representation of call_used_reg_set in -- form array whose elements are hard register numbers with nonzero bit -- in CALL_USED_REG_SET. */ --static int call_used_regs_arr_len; --static int call_used_regs_arr[FIRST_PSEUDO_REGISTER]; -- - /* Bitmap used for different calculations. */ - static bitmap_head temp_bitmap; - -@@ -632,9 +627,12 @@ set_bb_regs (basic_block bb, rtx_insn *insn) - bitmap_set_bit (&subreg_regs, regno); - } - if (CALL_P (insn)) -- for (int i = 0; i < call_used_regs_arr_len; i++) -- bitmap_set_bit (&get_remat_bb_data (bb)->dead_regs, -- call_used_regs_arr[i]); -+ { -+ /* Partially-clobbered registers might still be live. */ -+ HARD_REG_SET clobbers = insn_callee_abi (insn).full_reg_clobbers (); -+ bitmap_ior_into (&get_remat_bb_data (bb)->dead_regs, -+ bitmap_view (clobbers)); -+ } - } - - /* Calculate changed_regs and dead_regs for each BB. */ -@@ -697,7 +695,7 @@ reg_overlap_for_remat_p (lra_insn_reg *reg, rtx_insn *insn) - - /* Return true if a call used register is an input operand of INSN. */ - static bool --call_used_input_regno_present_p (rtx_insn *insn) -+call_used_input_regno_present_p (const function_abi &abi, rtx_insn *insn) - { - int iter; - lra_insn_recog_data_t id = lra_get_insn_recog_data (insn); -@@ -708,8 +706,9 @@ call_used_input_regno_present_p (rtx_insn *insn) - for (reg = (iter == 0 ? id->regs : static_id->hard_regs); - reg != NULL; - reg = reg->next) -- if (reg->type == OP_IN && reg->regno < FIRST_PSEUDO_REGISTER -- && TEST_HARD_REG_BIT (call_used_reg_set, reg->regno)) -+ if (reg->type == OP_IN -+ && reg->regno < FIRST_PSEUDO_REGISTER -+ && abi.clobbers_reg_p (reg->biggest_mode, reg->regno)) - return true; - return false; - } -@@ -798,18 +797,21 @@ calculate_gen_cands (void) - } - - if (CALL_P (insn)) -- EXECUTE_IF_SET_IN_BITMAP (gen_insns, 0, uid, bi) -- { -- rtx_insn *insn2 = lra_insn_recog_data[uid]->insn; -+ { -+ function_abi callee_abi = insn_callee_abi (insn); -+ EXECUTE_IF_SET_IN_BITMAP (gen_insns, 0, uid, bi) -+ { -+ rtx_insn *insn2 = lra_insn_recog_data[uid]->insn; - -- cand = insn_to_cand[INSN_UID (insn2)]; -- gcc_assert (cand != NULL); -- if (call_used_input_regno_present_p (insn2)) -- { -- bitmap_clear_bit (gen_cands, cand->index); -- bitmap_set_bit (&temp_bitmap, uid); -- } -- } -+ cand = insn_to_cand[INSN_UID (insn2)]; -+ gcc_assert (cand != NULL); -+ if (call_used_input_regno_present_p (callee_abi, insn2)) -+ { -+ bitmap_clear_bit (gen_cands, cand->index); -+ bitmap_set_bit (&temp_bitmap, uid); -+ } -+ } -+ } - bitmap_and_compl_into (gen_insns, &temp_bitmap); - - cand = insn_to_cand[INSN_UID (insn)]; -@@ -1204,13 +1206,16 @@ do_remat (void) - } - - if (CALL_P (insn)) -- EXECUTE_IF_SET_IN_BITMAP (avail_cands, 0, cid, bi) -- { -- cand = all_cands[cid]; -+ { -+ function_abi callee_abi = insn_callee_abi (insn); -+ EXECUTE_IF_SET_IN_BITMAP (avail_cands, 0, cid, bi) -+ { -+ cand = all_cands[cid]; - -- if (call_used_input_regno_present_p (cand->insn)) -- bitmap_set_bit (&temp_bitmap, cand->index); -- } -+ if (call_used_input_regno_present_p (callee_abi, cand->insn)) -+ bitmap_set_bit (&temp_bitmap, cand->index); -+ } -+ } - - bitmap_and_compl_into (avail_cands, &temp_bitmap); - -@@ -1306,10 +1311,6 @@ lra_remat (void) - insn_to_cand_activation = XCNEWVEC (cand_t, get_max_uid ()); - regno_cands = XCNEWVEC (cand_t, max_regno); - all_cands.create (8000); -- call_used_regs_arr_len = 0; -- for (int i = 0; i < FIRST_PSEUDO_REGISTER; i++) -- if (call_used_regs[i]) -- call_used_regs_arr[call_used_regs_arr_len++] = i; - initiate_cand_table (); - create_remat_bb_data (); - bitmap_initialize (&temp_bitmap, ®_obstack); -diff --git a/gcc/lra-spills.c b/gcc/lra-spills.c -index c0f61c119..d4163eb75 100644 ---- a/gcc/lra-spills.c -+++ b/gcc/lra-spills.c -@@ -242,7 +242,7 @@ assign_spill_hard_regs (int *pseudo_regnos, int n) - /* Set up reserved hard regs for every program point. */ - reserved_hard_regs = XNEWVEC (HARD_REG_SET, lra_live_max_point); - for (p = 0; p < lra_live_max_point; p++) -- COPY_HARD_REG_SET (reserved_hard_regs[p], lra_no_alloc_regs); -+ reserved_hard_regs[p] = lra_no_alloc_regs; - for (i = FIRST_PSEUDO_REGISTER; i < regs_num; i++) - if (lra_reg_info[i].nrefs != 0 - && (hard_regno = lra_get_regno_hard_regno (i)) >= 0) -@@ -273,11 +273,10 @@ assign_spill_hard_regs (int *pseudo_regnos, int n) - continue; - } - lra_assert (spill_class != NO_REGS); -- COPY_HARD_REG_SET (conflict_hard_regs, -- lra_reg_info[regno].conflict_hard_regs); -+ conflict_hard_regs = lra_reg_info[regno].conflict_hard_regs; - for (r = lra_reg_info[regno].live_ranges; r != NULL; r = r->next) - for (p = r->start; p <= r->finish; p++) -- IOR_HARD_REG_SET (conflict_hard_regs, reserved_hard_regs[p]); -+ conflict_hard_regs |= reserved_hard_regs[p]; - spill_class_size = ira_class_hard_regs_num[spill_class]; - mode = lra_reg_info[regno].biggest_mode; - for (k = 0; k < spill_class_size; k++) -diff --git a/gcc/lra.c b/gcc/lra.c -index 10b85340f..db2f82fb1 100644 ---- a/gcc/lra.c -+++ b/gcc/lra.c -@@ -121,6 +121,7 @@ along with GCC; see the file COPYING3. If not see - #include "lra.h" - #include "lra-int.h" - #include "print-rtl.h" -+#include "function-abi.h" - - /* Dump bitmap SET with TITLE and BB INDEX. */ - void -@@ -536,18 +537,15 @@ object_allocator lra_insn_reg_pool ("insn regs"); - - /* Create LRA insn related info about a reference to REGNO in INSN - with TYPE (in/out/inout), biggest reference mode MODE, flag that it -- is reference through subreg (SUBREG_P), flag that is early -- clobbered in the insn (EARLY_CLOBBER), and reference to the next -+ is reference through subreg (SUBREG_P), and reference to the next - insn reg info (NEXT). If REGNO can be early clobbered, - alternatives in which it can be early clobbered are given by -- EARLY_CLOBBER_ALTS. CLOBBER_HIGH marks if reference is a clobber -- high. */ -+ EARLY_CLOBBER_ALTS. */ - static struct lra_insn_reg * - new_insn_reg (rtx_insn *insn, int regno, enum op_type type, -- machine_mode mode, -- bool subreg_p, bool early_clobber, -+ machine_mode mode, bool subreg_p, - alternative_mask early_clobber_alts, -- struct lra_insn_reg *next, bool clobber_high) -+ struct lra_insn_reg *next) - { - lra_insn_reg *ir = lra_insn_reg_pool.allocate (); - ir->type = type; -@@ -556,9 +554,7 @@ new_insn_reg (rtx_insn *insn, int regno, enum op_type type, - && partial_subreg_p (lra_reg_info[regno].biggest_mode, mode)) - lra_reg_info[regno].biggest_mode = mode; - ir->subreg_p = subreg_p; -- ir->early_clobber = early_clobber; - ir->early_clobber_alts = early_clobber_alts; -- ir->clobber_high = clobber_high; - ir->regno = regno; - ir->next = next; - return ir; -@@ -605,7 +601,7 @@ static struct lra_operand_data debug_operand_data = - 0, /* early_clobber_alts */ - E_VOIDmode, /* We are not interesting in the operand mode. */ - OP_IN, -- 0, 0, 0, 0 -+ 0, 0, 0 - }; - - /* The following data are used as static insn data for all debug -@@ -801,7 +797,6 @@ setup_operand_alternative (lra_insn_recog_data_t data, - for (i = 0; i < nop; i++) - { - static_data->operand[i].early_clobber_alts = 0; -- static_data->operand[i].early_clobber = false; - static_data->operand[i].is_address = false; - if (static_data->operand[i].constraint[0] == '%') - { -@@ -817,7 +812,6 @@ setup_operand_alternative (lra_insn_recog_data_t data, - for (j = 0; j < nalt; j++) - for (i = 0; i < nop; i++, op_alt++) - { -- static_data->operand[i].early_clobber |= op_alt->earlyclobber; - if (op_alt->earlyclobber) - static_data->operand[i].early_clobber_alts |= (alternative_mask) 1 << j; - static_data->operand[i].is_address |= op_alt->is_address; -@@ -828,13 +822,12 @@ setup_operand_alternative (lra_insn_recog_data_t data, - not the insn operands, in X with TYPE (in/out/inout) and flag that - it is early clobbered in the insn (EARLY_CLOBBER) and add the info - to LIST. X is a part of insn given by DATA. Return the result -- list. CLOBBER_HIGH marks if X is a clobber high. */ -+ list. */ - static struct lra_insn_reg * - collect_non_operand_hard_regs (rtx_insn *insn, rtx *x, - lra_insn_recog_data_t data, - struct lra_insn_reg *list, -- enum op_type type, bool early_clobber, -- bool clobber_high) -+ enum op_type type, bool early_clobber) - { - int i, j, regno, last; - bool subreg_p; -@@ -878,10 +871,7 @@ collect_non_operand_hard_regs (rtx_insn *insn, rtx *x, - if (curr->type != type) - curr->type = OP_INOUT; - if (early_clobber) -- { -- curr->early_clobber = true; -- curr->early_clobber_alts = ALL_ALTERNATIVES; -- } -+ curr->early_clobber_alts = ALL_ALTERNATIVES; - break; - } - if (curr == NULL) -@@ -897,9 +887,7 @@ collect_non_operand_hard_regs (rtx_insn *insn, rtx *x, - && regno <= LAST_STACK_REG)); - #endif - list = new_insn_reg (data->insn, regno, type, mode, subreg_p, -- early_clobber, -- early_clobber ? ALL_ALTERNATIVES : 0, list, -- clobber_high); -+ early_clobber ? ALL_ALTERNATIVES : 0, list); - } - } - return list; -@@ -908,31 +896,24 @@ collect_non_operand_hard_regs (rtx_insn *insn, rtx *x, - { - case SET: - list = collect_non_operand_hard_regs (insn, &SET_DEST (op), data, -- list, OP_OUT, false, false); -+ list, OP_OUT, false); - list = collect_non_operand_hard_regs (insn, &SET_SRC (op), data, -- list, OP_IN, false, false); -+ list, OP_IN, false); - break; - case CLOBBER: - /* We treat clobber of non-operand hard registers as early clobber. */ - list = collect_non_operand_hard_regs (insn, &XEXP (op, 0), data, -- list, OP_OUT, true, false); -- break; -- case CLOBBER_HIGH: -- /* Clobber high should always span exactly one register. */ -- gcc_assert (REG_NREGS (XEXP (op, 0)) == 1); -- /* We treat clobber of non-operand hard registers as early clobber. */ -- list = collect_non_operand_hard_regs (insn, &XEXP (op, 0), data, -- list, OP_OUT, true, true); -+ list, OP_OUT, true); - break; - case PRE_INC: case PRE_DEC: case POST_INC: case POST_DEC: - list = collect_non_operand_hard_regs (insn, &XEXP (op, 0), data, -- list, OP_INOUT, false, false); -+ list, OP_INOUT, false); - break; - case PRE_MODIFY: case POST_MODIFY: - list = collect_non_operand_hard_regs (insn, &XEXP (op, 0), data, -- list, OP_INOUT, false, false); -+ list, OP_INOUT, false); - list = collect_non_operand_hard_regs (insn, &XEXP (op, 1), data, -- list, OP_IN, false, false); -+ list, OP_IN, false); - break; - default: - fmt = GET_RTX_FORMAT (code); -@@ -940,12 +921,11 @@ collect_non_operand_hard_regs (rtx_insn *insn, rtx *x, - { - if (fmt[i] == 'e') - list = collect_non_operand_hard_regs (insn, &XEXP (op, i), data, -- list, OP_IN, false, false); -+ list, OP_IN, false); - else if (fmt[i] == 'E') - for (j = XVECLEN (op, i) - 1; j >= 0; j--) - list = collect_non_operand_hard_regs (insn, &XVECEXP (op, i, j), -- data, list, OP_IN, false, -- false); -+ data, list, OP_IN, false); - } - } - return list; -@@ -1094,7 +1074,7 @@ lra_set_insn_recog_data (rtx_insn *insn) - else - insn_static_data->hard_regs - = collect_non_operand_hard_regs (insn, &PATTERN (insn), data, -- NULL, OP_IN, false, false); -+ NULL, OP_IN, false); - data->arg_hard_regs = NULL; - if (CALL_P (insn)) - { -@@ -1120,10 +1100,6 @@ lra_set_insn_recog_data (rtx_insn *insn) - arg_hard_regs[n_hard_regs++] - = regno + i + (use_p ? 0 : FIRST_PSEUDO_REGISTER); - } -- else if (GET_CODE (XEXP (link, 0)) == CLOBBER_HIGH) -- /* We could support CLOBBER_HIGH and treat it in the same way as -- HARD_REGNO_CALL_PART_CLOBBERED, but no port needs that yet. */ -- gcc_unreachable (); - - if (n_hard_regs != 0) - { -@@ -1332,7 +1308,6 @@ initialize_lra_reg_info_element (int i) - lra_reg_info[i].no_stack_p = false; - #endif - CLEAR_HARD_REG_SET (lra_reg_info[i].conflict_hard_regs); -- CLEAR_HARD_REG_SET (lra_reg_info[i].actual_call_used_reg_set); - lra_reg_info[i].preferred_hard_regno1 = -1; - lra_reg_info[i].preferred_hard_regno2 = -1; - lra_reg_info[i].preferred_hard_regno_profit1 = 0; -@@ -1345,7 +1320,6 @@ initialize_lra_reg_info_element (int i) - lra_reg_info[i].val = get_new_reg_value (); - lra_reg_info[i].offset = 0; - lra_reg_info[i].copies = NULL; -- lra_reg_info[i].call_insn = NULL; - } - - /* Initialize common reg info and copies. */ -@@ -1449,15 +1423,13 @@ lra_get_copy (int n) - /* This page contains code dealing with info about registers in - insns. */ - --/* Process X of INSN recursively and add info (operand type is -- given by TYPE, flag of that it is early clobber is EARLY_CLOBBER) -- about registers in X to the insn DATA. If X can be early clobbered, -- alternatives in which it can be early clobbered are given by -- EARLY_CLOBBER_ALTS. */ -+/* Process X of INSN recursively and add info (operand type is given -+ by TYPE) about registers in X to the insn DATA. If X can be early -+ clobbered, alternatives in which it can be early clobbered are given -+ by EARLY_CLOBBER_ALTS. */ - static void - add_regs_to_insn_regno_info (lra_insn_recog_data_t data, rtx x, -- rtx_insn *insn, -- enum op_type type, bool early_clobber, -+ rtx_insn *insn, enum op_type type, - alternative_mask early_clobber_alts) - { - int i, j, regno; -@@ -1487,8 +1459,7 @@ add_regs_to_insn_regno_info (lra_insn_recog_data_t data, rtx x, - if (bitmap_set_bit (&lra_reg_info[regno].insn_bitmap, INSN_UID (insn))) - { - data->regs = new_insn_reg (data->insn, regno, type, mode, subreg_p, -- early_clobber, early_clobber_alts, -- data->regs, false); -+ early_clobber_alts, data->regs); - return; - } - else -@@ -1500,15 +1471,12 @@ add_regs_to_insn_regno_info (lra_insn_recog_data_t data, rtx x, - /* The info cannot be integrated into the found - structure. */ - data->regs = new_insn_reg (data->insn, regno, type, mode, -- subreg_p, early_clobber, -- early_clobber_alts, data->regs, -- false); -+ subreg_p, early_clobber_alts, -+ data->regs); - else - { - if (curr->type != type) - curr->type = OP_INOUT; -- if (curr->early_clobber != early_clobber) -- curr->early_clobber = true; - curr->early_clobber_alts |= early_clobber_alts; - } - return; -@@ -1520,23 +1488,21 @@ add_regs_to_insn_regno_info (lra_insn_recog_data_t data, rtx x, - switch (code) - { - case SET: -- add_regs_to_insn_regno_info (data, SET_DEST (x), insn, OP_OUT, false, 0); -- add_regs_to_insn_regno_info (data, SET_SRC (x), insn, OP_IN, false, 0); -+ add_regs_to_insn_regno_info (data, SET_DEST (x), insn, OP_OUT, 0); -+ add_regs_to_insn_regno_info (data, SET_SRC (x), insn, OP_IN, 0); - break; - case CLOBBER: - /* We treat clobber of non-operand hard registers as early - clobber. */ - add_regs_to_insn_regno_info (data, XEXP (x, 0), insn, OP_OUT, -- true, ALL_ALTERNATIVES); -+ ALL_ALTERNATIVES); - break; -- case CLOBBER_HIGH: -- gcc_unreachable (); - case PRE_INC: case PRE_DEC: case POST_INC: case POST_DEC: -- add_regs_to_insn_regno_info (data, XEXP (x, 0), insn, OP_INOUT, false, 0); -+ add_regs_to_insn_regno_info (data, XEXP (x, 0), insn, OP_INOUT, 0); - break; - case PRE_MODIFY: case POST_MODIFY: -- add_regs_to_insn_regno_info (data, XEXP (x, 0), insn, OP_INOUT, false, 0); -- add_regs_to_insn_regno_info (data, XEXP (x, 1), insn, OP_IN, false, 0); -+ add_regs_to_insn_regno_info (data, XEXP (x, 0), insn, OP_INOUT, 0); -+ add_regs_to_insn_regno_info (data, XEXP (x, 1), insn, OP_IN, 0); - break; - default: - if ((code != PARALLEL && code != EXPR_LIST) || type != OP_OUT) -@@ -1557,12 +1523,12 @@ add_regs_to_insn_regno_info (lra_insn_recog_data_t data, rtx x, - for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) - { - if (fmt[i] == 'e') -- add_regs_to_insn_regno_info (data, XEXP (x, i), insn, type, false, 0); -+ add_regs_to_insn_regno_info (data, XEXP (x, i), insn, type, 0); - else if (fmt[i] == 'E') - { - for (j = XVECLEN (x, i) - 1; j >= 0; j--) - add_regs_to_insn_regno_info (data, XVECEXP (x, i, j), insn, -- type, false, 0); -+ type, 0); - } - } - } -@@ -1652,11 +1618,10 @@ lra_update_insn_regno_info (rtx_insn *insn) - for (i = static_data->n_operands - 1; i >= 0; i--) - add_regs_to_insn_regno_info (data, *data->operand_loc[i], insn, - static_data->operand[i].type, -- static_data->operand[i].early_clobber, - static_data->operand[i].early_clobber_alts); - if ((code = GET_CODE (PATTERN (insn))) == CLOBBER || code == USE) - add_regs_to_insn_regno_info (data, XEXP (PATTERN (insn), 0), insn, -- code == USE ? OP_IN : OP_OUT, false, 0); -+ code == USE ? OP_IN : OP_OUT, 0); - if (CALL_P (insn)) - /* On some targets call insns can refer to pseudos in memory in - CALL_INSN_FUNCTION_USAGE list. Process them in order to -@@ -1667,13 +1632,10 @@ lra_update_insn_regno_info (rtx_insn *insn) - link = XEXP (link, 1)) - { - code = GET_CODE (XEXP (link, 0)); -- /* We could support CLOBBER_HIGH and treat it in the same way as -- HARD_REGNO_CALL_PART_CLOBBERED, but no port needs that yet. */ -- gcc_assert (code != CLOBBER_HIGH); - if ((code == USE || code == CLOBBER) - && MEM_P (XEXP (XEXP (link, 0), 0))) - add_regs_to_insn_regno_info (data, XEXP (XEXP (link, 0), 0), insn, -- code == USE ? OP_IN : OP_OUT, false, 0); -+ code == USE ? OP_IN : OP_OUT, 0); - } - if (NONDEBUG_INSN_P (insn)) - setup_insn_reg_info (data, freq); -@@ -2400,7 +2362,7 @@ lra (FILE *f) - need it. */ - emit_note (NOTE_INSN_DELETED); - -- COPY_HARD_REG_SET (lra_no_alloc_regs, ira_no_alloc_regs); -+ lra_no_alloc_regs = ira_no_alloc_regs; - - init_reg_info (); - expand_reg_info (); -@@ -2436,7 +2398,9 @@ lra (FILE *f) - - if (crtl->saves_all_registers) - for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) -- if (! call_used_regs[i] && ! fixed_regs[i] && ! LOCAL_REGNO (i)) -+ if (!crtl->abi->clobbers_full_reg_p (i) -+ && !fixed_regs[i] -+ && !LOCAL_REGNO (i)) - df_set_regs_ever_live (i, true); - - /* We don't DF from now and avoid its using because it is to -@@ -2494,19 +2458,7 @@ lra (FILE *f) - } - /* Do inheritance only for regular algorithms. */ - if (! lra_simple_p) -- { -- if (flag_ipa_ra) -- { -- if (live_p) -- lra_clear_live_ranges (); -- /* As a side-effect of lra_create_live_ranges, we calculate -- actual_call_used_reg_set, which is needed during -- lra_inheritance. */ -- lra_create_live_ranges (true, true); -- live_p = true; -- } -- lra_inheritance (); -- } -+ lra_inheritance (); - if (live_p) - lra_clear_live_ranges (); - bool fails_p; -diff --git a/gcc/lto-streamer-out.c b/gcc/lto-streamer-out.c -index c1b160237..f47ac5b76 100644 ---- a/gcc/lto-streamer-out.c -+++ b/gcc/lto-streamer-out.c -@@ -1122,12 +1122,12 @@ hash_tree (struct streamer_tree_cache_d *cache, hash_map *map, - hstate.add_int (DECL_BUILT_IN_CLASS (t)); - hstate.add_flag (DECL_STATIC_CONSTRUCTOR (t)); - hstate.add_flag (DECL_STATIC_DESTRUCTOR (t)); -+ hstate.add_flag (FUNCTION_DECL_DECL_TYPE (t)); - hstate.add_flag (DECL_UNINLINABLE (t)); - hstate.add_flag (DECL_POSSIBLY_INLINED (t)); - hstate.add_flag (DECL_IS_NOVOPS (t)); - hstate.add_flag (DECL_IS_RETURNS_TWICE (t)); - hstate.add_flag (DECL_IS_MALLOC (t)); -- hstate.add_flag (DECL_IS_OPERATOR_NEW (t)); - hstate.add_flag (DECL_DECLARED_INLINE_P (t)); - hstate.add_flag (DECL_STATIC_CHAIN (t)); - hstate.add_flag (DECL_NO_INLINE_WARNING_P (t)); -@@ -1138,7 +1138,7 @@ hash_tree (struct streamer_tree_cache_d *cache, hash_map *map, - hstate.add_flag (DECL_LOOPING_CONST_OR_PURE_P (t)); - hstate.commit_flag (); - if (DECL_BUILT_IN_CLASS (t) != NOT_BUILT_IN) -- hstate.add_int (DECL_FUNCTION_CODE (t)); -+ hstate.add_int (DECL_UNCHECKED_FUNCTION_CODE (t)); - } - - if (CODE_CONTAINS_STRUCT (code, TS_TYPE_COMMON)) -diff --git a/gcc/lto/Make-lang.in b/gcc/lto/Make-lang.in -index 1b856d6d4..b7ed96eac 100644 ---- a/gcc/lto/Make-lang.in -+++ b/gcc/lto/Make-lang.in -@@ -22,7 +22,7 @@ - # The name of the LTO compiler. - LTO_EXE = lto1$(exeext) - # The LTO-specific object files inclued in $(LTO_EXE). --LTO_OBJS = lto/lto-lang.o lto/lto.o lto/lto-object.o attribs.o lto/lto-partition.o lto/lto-symtab.o -+LTO_OBJS = lto/lto-lang.o lto/lto.o lto/lto-object.o attribs.o lto/lto-partition.o lto/lto-symtab.o lto/lto-common.o - lto_OBJS = $(LTO_OBJS) - - # this is only useful in a LTO bootstrap, but this does not work right -diff --git a/gcc/lto/config-lang.in b/gcc/lto/config-lang.in -index de9712504..07214365f 100644 ---- a/gcc/lto/config-lang.in -+++ b/gcc/lto/config-lang.in -@@ -20,7 +20,7 @@ - language="lto" - compilers="lto1\$(exeext)" - --gtfiles="\$(srcdir)/lto/lto-tree.h \$(srcdir)/lto/lto-lang.c \$(srcdir)/lto/lto.c \$(srcdir)/lto/lto.h" -+gtfiles="\$(srcdir)/lto/lto-tree.h \$(srcdir)/lto/lto-lang.c \$(srcdir)/lto/lto.c \$(srcdir)/lto/lto.h \$(srcdir)/lto/lto-common.h \$(srcdir)/lto/lto-common.c" - - # LTO is a special front end. From a user's perspective it is not - # really a language, but a middle end feature. However, the GIMPLE -diff --git a/gcc/lto/lto-common.c b/gcc/lto/lto-common.c -new file mode 100644 -index 000000000..daf7f7b47 ---- /dev/null -+++ b/gcc/lto/lto-common.c -@@ -0,0 +1,2837 @@ -+/* Top-level LTO routines. -+ Copyright (C) 2009-2018 Free Software Foundation, Inc. -+ Contributed by CodeSourcery, Inc. -+ -+This file is part of GCC. -+ -+GCC is free software; you can redistribute it and/or modify it under -+the terms of the GNU General Public License as published by the Free -+Software Foundation; either version 3, or (at your option) any later -+version. -+ -+GCC is distributed in the hope that it will be useful, but WITHOUT ANY -+WARRANTY; without even the implied warranty of MERCHANTABILITY or -+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+for more details. -+ -+You should have received a copy of the GNU General Public License -+along with GCC; see the file COPYING3. If not see -+. */ -+ -+#include "config.h" -+#include "system.h" -+#include "coretypes.h" -+#include "tm.h" -+#include "function.h" -+#include "bitmap.h" -+#include "basic-block.h" -+#include "tree.h" -+#include "gimple.h" -+#include "cfghooks.h" -+#include "alloc-pool.h" -+#include "tree-pass.h" -+#include "tree-streamer.h" -+#include "cgraph.h" -+#include "opts.h" -+#include "toplev.h" -+#include "stor-layout.h" -+#include "symbol-summary.h" -+#include "tree-vrp.h" -+#include "ipa-prop.h" -+#include "common.h" -+#include "debug.h" -+#include "lto.h" -+#include "lto-section-names.h" -+#include "splay-tree.h" -+#include "lto-partition.h" -+#include "context.h" -+#include "pass_manager.h" -+#include "ipa-fnsummary.h" -+#include "params.h" -+#include "ipa-utils.h" -+#include "gomp-constants.h" -+#include "lto-symtab.h" -+#include "stringpool.h" -+#include "fold-const.h" -+#include "attribs.h" -+#include "builtins.h" -+#include "lto-common.h" -+ -+GTY(()) tree first_personality_decl; -+ -+GTY(()) const unsigned char *lto_mode_identity_table; -+ -+/* Returns a hash code for P. */ -+ -+static hashval_t -+hash_name (const void *p) -+{ -+ const struct lto_section_slot *ds = (const struct lto_section_slot *) p; -+ return (hashval_t) htab_hash_string (ds->name); -+} -+ -+ -+/* Returns nonzero if P1 and P2 are equal. */ -+ -+static int -+eq_name (const void *p1, const void *p2) -+{ -+ const struct lto_section_slot *s1 = -+ (const struct lto_section_slot *) p1; -+ const struct lto_section_slot *s2 = -+ (const struct lto_section_slot *) p2; -+ -+ return strcmp (s1->name, s2->name) == 0; -+} -+ -+/* Free lto_section_slot */ -+ -+static void -+free_with_string (void *arg) -+{ -+ struct lto_section_slot *s = (struct lto_section_slot *)arg; -+ -+ free (CONST_CAST (char *, s->name)); -+ free (arg); -+} -+ -+/* Create section hash table */ -+ -+htab_t -+lto_obj_create_section_hash_table (void) -+{ -+ return htab_create (37, hash_name, eq_name, free_with_string); -+} -+ -+/* Delete an allocated integer KEY in the splay tree. */ -+ -+static void -+lto_splay_tree_delete_id (splay_tree_key key) -+{ -+ free ((void *) key); -+} -+ -+/* Compare splay tree node ids A and B. */ -+ -+static int -+lto_splay_tree_compare_ids (splay_tree_key a, splay_tree_key b) -+{ -+ unsigned HOST_WIDE_INT ai; -+ unsigned HOST_WIDE_INT bi; -+ -+ ai = *(unsigned HOST_WIDE_INT *) a; -+ bi = *(unsigned HOST_WIDE_INT *) b; -+ -+ if (ai < bi) -+ return -1; -+ else if (ai > bi) -+ return 1; -+ return 0; -+} -+ -+/* Look up splay tree node by ID in splay tree T. */ -+ -+static splay_tree_node -+lto_splay_tree_lookup (splay_tree t, unsigned HOST_WIDE_INT id) -+{ -+ return splay_tree_lookup (t, (splay_tree_key) &id); -+} -+ -+/* Check if KEY has ID. */ -+ -+static bool -+lto_splay_tree_id_equal_p (splay_tree_key key, unsigned HOST_WIDE_INT id) -+{ -+ return *(unsigned HOST_WIDE_INT *) key == id; -+} -+ -+/* Insert a splay tree node into tree T with ID as key and FILE_DATA as value. -+ The ID is allocated separately because we need HOST_WIDE_INTs which may -+ be wider than a splay_tree_key. */ -+ -+static void -+lto_splay_tree_insert (splay_tree t, unsigned HOST_WIDE_INT id, -+ struct lto_file_decl_data *file_data) -+{ -+ unsigned HOST_WIDE_INT *idp = XCNEW (unsigned HOST_WIDE_INT); -+ *idp = id; -+ splay_tree_insert (t, (splay_tree_key) idp, (splay_tree_value) file_data); -+} -+ -+/* Create a splay tree. */ -+ -+static splay_tree -+lto_splay_tree_new (void) -+{ -+ return splay_tree_new (lto_splay_tree_compare_ids, -+ lto_splay_tree_delete_id, -+ NULL); -+} -+ -+/* Decode the content of memory pointed to by DATA in the in decl -+ state object STATE. DATA_IN points to a data_in structure for -+ decoding. Return the address after the decoded object in the -+ input. */ -+ -+static const uint32_t * -+lto_read_in_decl_state (struct data_in *data_in, const uint32_t *data, -+ struct lto_in_decl_state *state) -+{ -+ uint32_t ix; -+ tree decl; -+ uint32_t i, j; -+ -+ ix = *data++; -+ state->compressed = ix & 1; -+ ix /= 2; -+ decl = streamer_tree_cache_get_tree (data_in->reader_cache, ix); -+ if (!VAR_OR_FUNCTION_DECL_P (decl)) -+ { -+ gcc_assert (decl == void_type_node); -+ decl = NULL_TREE; -+ } -+ state->fn_decl = decl; -+ -+ for (i = 0; i < LTO_N_DECL_STREAMS; i++) -+ { -+ uint32_t size = *data++; -+ vec *decls = NULL; -+ vec_alloc (decls, size); -+ -+ for (j = 0; j < size; j++) -+ vec_safe_push (decls, -+ streamer_tree_cache_get_tree (data_in->reader_cache, -+ data[j])); -+ -+ state->streams[i] = decls; -+ data += size; -+ } -+ -+ return data; -+} -+ -+ -+/* Global canonical type table. */ -+static htab_t gimple_canonical_types; -+static hash_map *canonical_type_hash_cache; -+static unsigned long num_canonical_type_hash_entries; -+static unsigned long num_canonical_type_hash_queries; -+ -+static void iterative_hash_canonical_type (tree type, inchash::hash &hstate); -+static hashval_t gimple_canonical_type_hash (const void *p); -+static void gimple_register_canonical_type_1 (tree t, hashval_t hash); -+ -+/* Returning a hash value for gimple type TYPE. -+ -+ The hash value returned is equal for types considered compatible -+ by gimple_canonical_types_compatible_p. */ -+ -+static hashval_t -+hash_canonical_type (tree type) -+{ -+ inchash::hash hstate; -+ enum tree_code code; -+ -+ /* We compute alias sets only for types that needs them. -+ Be sure we do not recurse to something else as we cannot hash incomplete -+ types in a way they would have same hash value as compatible complete -+ types. */ -+ gcc_checking_assert (type_with_alias_set_p (type)); -+ -+ /* Combine a few common features of types so that types are grouped into -+ smaller sets; when searching for existing matching types to merge, -+ only existing types having the same features as the new type will be -+ checked. */ -+ code = tree_code_for_canonical_type_merging (TREE_CODE (type)); -+ hstate.add_int (code); -+ hstate.add_int (TYPE_MODE (type)); -+ -+ /* Incorporate common features of numerical types. */ -+ if (INTEGRAL_TYPE_P (type) -+ || SCALAR_FLOAT_TYPE_P (type) -+ || FIXED_POINT_TYPE_P (type) -+ || TREE_CODE (type) == OFFSET_TYPE -+ || POINTER_TYPE_P (type)) -+ { -+ hstate.add_int (TYPE_PRECISION (type)); -+ if (!type_with_interoperable_signedness (type)) -+ hstate.add_int (TYPE_UNSIGNED (type)); -+ } -+ -+ if (VECTOR_TYPE_P (type)) -+ { -+ hstate.add_poly_int (TYPE_VECTOR_SUBPARTS (type)); -+ hstate.add_int (TYPE_UNSIGNED (type)); -+ } -+ -+ if (TREE_CODE (type) == COMPLEX_TYPE) -+ hstate.add_int (TYPE_UNSIGNED (type)); -+ -+ /* Fortran's C_SIGNED_CHAR is !TYPE_STRING_FLAG but needs to be -+ interoperable with "signed char". Unless all frontends are revisited to -+ agree on these types, we must ignore the flag completely. */ -+ -+ /* Fortran standard define C_PTR type that is compatible with every -+ C pointer. For this reason we need to glob all pointers into one. -+ Still pointers in different address spaces are not compatible. */ -+ if (POINTER_TYPE_P (type)) -+ hstate.add_int (TYPE_ADDR_SPACE (TREE_TYPE (type))); -+ -+ /* For array types hash the domain bounds and the string flag. */ -+ if (TREE_CODE (type) == ARRAY_TYPE && TYPE_DOMAIN (type)) -+ { -+ hstate.add_int (TYPE_STRING_FLAG (type)); -+ /* OMP lowering can introduce error_mark_node in place of -+ random local decls in types. */ -+ if (TYPE_MIN_VALUE (TYPE_DOMAIN (type)) != error_mark_node) -+ inchash::add_expr (TYPE_MIN_VALUE (TYPE_DOMAIN (type)), hstate); -+ if (TYPE_MAX_VALUE (TYPE_DOMAIN (type)) != error_mark_node) -+ inchash::add_expr (TYPE_MAX_VALUE (TYPE_DOMAIN (type)), hstate); -+ } -+ -+ /* Recurse for aggregates with a single element type. */ -+ if (TREE_CODE (type) == ARRAY_TYPE -+ || TREE_CODE (type) == COMPLEX_TYPE -+ || TREE_CODE (type) == VECTOR_TYPE) -+ iterative_hash_canonical_type (TREE_TYPE (type), hstate); -+ -+ /* Incorporate function return and argument types. */ -+ if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE) -+ { -+ unsigned na; -+ tree p; -+ -+ iterative_hash_canonical_type (TREE_TYPE (type), hstate); -+ -+ for (p = TYPE_ARG_TYPES (type), na = 0; p; p = TREE_CHAIN (p)) -+ { -+ iterative_hash_canonical_type (TREE_VALUE (p), hstate); -+ na++; -+ } -+ -+ hstate.add_int (na); -+ } -+ -+ if (RECORD_OR_UNION_TYPE_P (type)) -+ { -+ unsigned nf; -+ tree f; -+ -+ for (f = TYPE_FIELDS (type), nf = 0; f; f = TREE_CHAIN (f)) -+ if (TREE_CODE (f) == FIELD_DECL -+ && (! DECL_SIZE (f) -+ || ! integer_zerop (DECL_SIZE (f)))) -+ { -+ iterative_hash_canonical_type (TREE_TYPE (f), hstate); -+ nf++; -+ } -+ -+ hstate.add_int (nf); -+ } -+ -+ return hstate.end(); -+} -+ -+/* Returning a hash value for gimple type TYPE combined with VAL. */ -+ -+static void -+iterative_hash_canonical_type (tree type, inchash::hash &hstate) -+{ -+ hashval_t v; -+ -+ /* All type variants have same TYPE_CANONICAL. */ -+ type = TYPE_MAIN_VARIANT (type); -+ -+ if (!canonical_type_used_p (type)) -+ v = hash_canonical_type (type); -+ /* An already processed type. */ -+ else if (TYPE_CANONICAL (type)) -+ { -+ type = TYPE_CANONICAL (type); -+ v = gimple_canonical_type_hash (type); -+ } -+ else -+ { -+ /* Canonical types should not be able to form SCCs by design, this -+ recursion is just because we do not register canonical types in -+ optimal order. To avoid quadratic behavior also register the -+ type here. */ -+ v = hash_canonical_type (type); -+ gimple_register_canonical_type_1 (type, v); -+ } -+ hstate.add_int (v); -+} -+ -+/* Returns the hash for a canonical type P. */ -+ -+static hashval_t -+gimple_canonical_type_hash (const void *p) -+{ -+ num_canonical_type_hash_queries++; -+ hashval_t *slot = canonical_type_hash_cache->get ((const_tree) p); -+ gcc_assert (slot != NULL); -+ return *slot; -+} -+ -+ -+ -+/* Returns nonzero if P1 and P2 are equal. */ -+ -+static int -+gimple_canonical_type_eq (const void *p1, const void *p2) -+{ -+ const_tree t1 = (const_tree) p1; -+ const_tree t2 = (const_tree) p2; -+ return gimple_canonical_types_compatible_p (CONST_CAST_TREE (t1), -+ CONST_CAST_TREE (t2)); -+} -+ -+/* Main worker for gimple_register_canonical_type. */ -+ -+static void -+gimple_register_canonical_type_1 (tree t, hashval_t hash) -+{ -+ void **slot; -+ -+ gcc_checking_assert (TYPE_P (t) && !TYPE_CANONICAL (t) -+ && type_with_alias_set_p (t) -+ && canonical_type_used_p (t)); -+ -+ slot = htab_find_slot_with_hash (gimple_canonical_types, t, hash, INSERT); -+ if (*slot) -+ { -+ tree new_type = (tree)(*slot); -+ gcc_checking_assert (new_type != t); -+ TYPE_CANONICAL (t) = new_type; -+ } -+ else -+ { -+ TYPE_CANONICAL (t) = t; -+ *slot = (void *) t; -+ /* Cache the just computed hash value. */ -+ num_canonical_type_hash_entries++; -+ bool existed_p = canonical_type_hash_cache->put (t, hash); -+ gcc_assert (!existed_p); -+ } -+} -+ -+/* Register type T in the global type table gimple_types and set -+ TYPE_CANONICAL of T accordingly. -+ This is used by LTO to merge structurally equivalent types for -+ type-based aliasing purposes across different TUs and languages. -+ -+ ??? This merging does not exactly match how the tree.c middle-end -+ functions will assign TYPE_CANONICAL when new types are created -+ during optimization (which at least happens for pointer and array -+ types). */ -+ -+static void -+gimple_register_canonical_type (tree t) -+{ -+ if (TYPE_CANONICAL (t) || !type_with_alias_set_p (t) -+ || !canonical_type_used_p (t)) -+ return; -+ -+ /* Canonical types are same among all complete variants. */ -+ if (TYPE_CANONICAL (TYPE_MAIN_VARIANT (t))) -+ TYPE_CANONICAL (t) = TYPE_CANONICAL (TYPE_MAIN_VARIANT (t)); -+ else -+ { -+ gimple_register_canonical_type_1 (TYPE_MAIN_VARIANT (t), -+ hash_canonical_type (TYPE_MAIN_VARIANT (t))); -+ TYPE_CANONICAL (t) = TYPE_CANONICAL (TYPE_MAIN_VARIANT (t)); -+ } -+} -+ -+/* Re-compute TYPE_CANONICAL for NODE and related types. */ -+ -+static void -+lto_register_canonical_types (tree node, bool first_p) -+{ -+ if (!node -+ || !TYPE_P (node)) -+ return; -+ -+ if (first_p) -+ TYPE_CANONICAL (node) = NULL_TREE; -+ -+ if (POINTER_TYPE_P (node) -+ || TREE_CODE (node) == COMPLEX_TYPE -+ || TREE_CODE (node) == ARRAY_TYPE) -+ lto_register_canonical_types (TREE_TYPE (node), first_p); -+ -+ if (!first_p) -+ gimple_register_canonical_type (node); -+} -+ -+ -+/* Remember trees that contains references to declarations. */ -+vec *tree_with_vars; -+ -+#define CHECK_VAR(tt) \ -+ do \ -+ { \ -+ if ((tt) && VAR_OR_FUNCTION_DECL_P (tt) \ -+ && (TREE_PUBLIC (tt) || DECL_EXTERNAL (tt))) \ -+ return true; \ -+ } while (0) -+ -+#define CHECK_NO_VAR(tt) \ -+ gcc_checking_assert (!(tt) || !VAR_OR_FUNCTION_DECL_P (tt)) -+ -+/* Check presence of pointers to decls in fields of a tree_typed T. */ -+ -+static inline bool -+mentions_vars_p_typed (tree t) -+{ -+ CHECK_NO_VAR (TREE_TYPE (t)); -+ return false; -+} -+ -+/* Check presence of pointers to decls in fields of a tree_common T. */ -+ -+static inline bool -+mentions_vars_p_common (tree t) -+{ -+ if (mentions_vars_p_typed (t)) -+ return true; -+ CHECK_NO_VAR (TREE_CHAIN (t)); -+ return false; -+} -+ -+/* Check presence of pointers to decls in fields of a decl_minimal T. */ -+ -+static inline bool -+mentions_vars_p_decl_minimal (tree t) -+{ -+ if (mentions_vars_p_common (t)) -+ return true; -+ CHECK_NO_VAR (DECL_NAME (t)); -+ CHECK_VAR (DECL_CONTEXT (t)); -+ return false; -+} -+ -+/* Check presence of pointers to decls in fields of a decl_common T. */ -+ -+static inline bool -+mentions_vars_p_decl_common (tree t) -+{ -+ if (mentions_vars_p_decl_minimal (t)) -+ return true; -+ CHECK_VAR (DECL_SIZE (t)); -+ CHECK_VAR (DECL_SIZE_UNIT (t)); -+ CHECK_VAR (DECL_INITIAL (t)); -+ CHECK_NO_VAR (DECL_ATTRIBUTES (t)); -+ CHECK_VAR (DECL_ABSTRACT_ORIGIN (t)); -+ return false; -+} -+ -+/* Check presence of pointers to decls in fields of a decl_with_vis T. */ -+ -+static inline bool -+mentions_vars_p_decl_with_vis (tree t) -+{ -+ if (mentions_vars_p_decl_common (t)) -+ return true; -+ -+ /* Accessor macro has side-effects, use field-name here. */ -+ CHECK_NO_VAR (DECL_ASSEMBLER_NAME_RAW (t)); -+ return false; -+} -+ -+/* Check presence of pointers to decls in fields of a decl_non_common T. */ -+ -+static inline bool -+mentions_vars_p_decl_non_common (tree t) -+{ -+ if (mentions_vars_p_decl_with_vis (t)) -+ return true; -+ CHECK_NO_VAR (DECL_RESULT_FLD (t)); -+ return false; -+} -+ -+/* Check presence of pointers to decls in fields of a decl_non_common T. */ -+ -+static bool -+mentions_vars_p_function (tree t) -+{ -+ if (mentions_vars_p_decl_non_common (t)) -+ return true; -+ CHECK_NO_VAR (DECL_ARGUMENTS (t)); -+ CHECK_NO_VAR (DECL_VINDEX (t)); -+ CHECK_VAR (DECL_FUNCTION_PERSONALITY (t)); -+ return false; -+} -+ -+/* Check presence of pointers to decls in fields of a field_decl T. */ -+ -+static bool -+mentions_vars_p_field_decl (tree t) -+{ -+ if (mentions_vars_p_decl_common (t)) -+ return true; -+ CHECK_VAR (DECL_FIELD_OFFSET (t)); -+ CHECK_NO_VAR (DECL_BIT_FIELD_TYPE (t)); -+ CHECK_NO_VAR (DECL_QUALIFIER (t)); -+ CHECK_NO_VAR (DECL_FIELD_BIT_OFFSET (t)); -+ CHECK_NO_VAR (DECL_FCONTEXT (t)); -+ return false; -+} -+ -+/* Check presence of pointers to decls in fields of a type T. */ -+ -+static bool -+mentions_vars_p_type (tree t) -+{ -+ if (mentions_vars_p_common (t)) -+ return true; -+ CHECK_NO_VAR (TYPE_CACHED_VALUES (t)); -+ CHECK_VAR (TYPE_SIZE (t)); -+ CHECK_VAR (TYPE_SIZE_UNIT (t)); -+ CHECK_NO_VAR (TYPE_ATTRIBUTES (t)); -+ CHECK_NO_VAR (TYPE_NAME (t)); -+ -+ CHECK_VAR (TYPE_MIN_VALUE_RAW (t)); -+ CHECK_VAR (TYPE_MAX_VALUE_RAW (t)); -+ -+ /* Accessor is for derived node types only. */ -+ CHECK_NO_VAR (TYPE_LANG_SLOT_1 (t)); -+ -+ CHECK_VAR (TYPE_CONTEXT (t)); -+ CHECK_NO_VAR (TYPE_CANONICAL (t)); -+ CHECK_NO_VAR (TYPE_MAIN_VARIANT (t)); -+ CHECK_NO_VAR (TYPE_NEXT_VARIANT (t)); -+ return false; -+} -+ -+/* Check presence of pointers to decls in fields of a BINFO T. */ -+ -+static bool -+mentions_vars_p_binfo (tree t) -+{ -+ unsigned HOST_WIDE_INT i, n; -+ -+ if (mentions_vars_p_common (t)) -+ return true; -+ CHECK_VAR (BINFO_VTABLE (t)); -+ CHECK_NO_VAR (BINFO_OFFSET (t)); -+ CHECK_NO_VAR (BINFO_VIRTUALS (t)); -+ CHECK_NO_VAR (BINFO_VPTR_FIELD (t)); -+ n = vec_safe_length (BINFO_BASE_ACCESSES (t)); -+ for (i = 0; i < n; i++) -+ CHECK_NO_VAR (BINFO_BASE_ACCESS (t, i)); -+ /* Do not walk BINFO_INHERITANCE_CHAIN, BINFO_SUBVTT_INDEX -+ and BINFO_VPTR_INDEX; these are used by C++ FE only. */ -+ n = BINFO_N_BASE_BINFOS (t); -+ for (i = 0; i < n; i++) -+ CHECK_NO_VAR (BINFO_BASE_BINFO (t, i)); -+ return false; -+} -+ -+/* Check presence of pointers to decls in fields of a CONSTRUCTOR T. */ -+ -+static bool -+mentions_vars_p_constructor (tree t) -+{ -+ unsigned HOST_WIDE_INT idx; -+ constructor_elt *ce; -+ -+ if (mentions_vars_p_typed (t)) -+ return true; -+ -+ for (idx = 0; vec_safe_iterate (CONSTRUCTOR_ELTS (t), idx, &ce); idx++) -+ { -+ CHECK_NO_VAR (ce->index); -+ CHECK_VAR (ce->value); -+ } -+ return false; -+} -+ -+/* Check presence of pointers to decls in fields of an expression tree T. */ -+ -+static bool -+mentions_vars_p_expr (tree t) -+{ -+ int i; -+ if (mentions_vars_p_typed (t)) -+ return true; -+ for (i = TREE_OPERAND_LENGTH (t) - 1; i >= 0; --i) -+ CHECK_VAR (TREE_OPERAND (t, i)); -+ return false; -+} -+ -+/* Check presence of pointers to decls in fields of an OMP_CLAUSE T. */ -+ -+static bool -+mentions_vars_p_omp_clause (tree t) -+{ -+ int i; -+ if (mentions_vars_p_common (t)) -+ return true; -+ for (i = omp_clause_num_ops[OMP_CLAUSE_CODE (t)] - 1; i >= 0; --i) -+ CHECK_VAR (OMP_CLAUSE_OPERAND (t, i)); -+ return false; -+} -+ -+/* Check presence of pointers to decls that needs later fixup in T. */ -+ -+static bool -+mentions_vars_p (tree t) -+{ -+ switch (TREE_CODE (t)) -+ { -+ case IDENTIFIER_NODE: -+ break; -+ -+ case TREE_LIST: -+ CHECK_VAR (TREE_VALUE (t)); -+ CHECK_VAR (TREE_PURPOSE (t)); -+ CHECK_NO_VAR (TREE_CHAIN (t)); -+ break; -+ -+ case FIELD_DECL: -+ return mentions_vars_p_field_decl (t); -+ -+ case LABEL_DECL: -+ case CONST_DECL: -+ case PARM_DECL: -+ case RESULT_DECL: -+ case IMPORTED_DECL: -+ case NAMESPACE_DECL: -+ case NAMELIST_DECL: -+ return mentions_vars_p_decl_common (t); -+ -+ case VAR_DECL: -+ return mentions_vars_p_decl_with_vis (t); -+ -+ case TYPE_DECL: -+ return mentions_vars_p_decl_non_common (t); -+ -+ case FUNCTION_DECL: -+ return mentions_vars_p_function (t); -+ -+ case TREE_BINFO: -+ return mentions_vars_p_binfo (t); -+ -+ case PLACEHOLDER_EXPR: -+ return mentions_vars_p_common (t); -+ -+ case BLOCK: -+ case TRANSLATION_UNIT_DECL: -+ case OPTIMIZATION_NODE: -+ case TARGET_OPTION_NODE: -+ break; -+ -+ case CONSTRUCTOR: -+ return mentions_vars_p_constructor (t); -+ -+ case OMP_CLAUSE: -+ return mentions_vars_p_omp_clause (t); -+ -+ default: -+ if (TYPE_P (t)) -+ { -+ if (mentions_vars_p_type (t)) -+ return true; -+ } -+ else if (EXPR_P (t)) -+ { -+ if (mentions_vars_p_expr (t)) -+ return true; -+ } -+ else if (CONSTANT_CLASS_P (t)) -+ CHECK_NO_VAR (TREE_TYPE (t)); -+ else -+ gcc_unreachable (); -+ } -+ return false; -+} -+ -+ -+/* Return the resolution for the decl with index INDEX from DATA_IN. */ -+ -+static enum ld_plugin_symbol_resolution -+get_resolution (struct data_in *data_in, unsigned index) -+{ -+ if (data_in->globals_resolution.exists ()) -+ { -+ ld_plugin_symbol_resolution_t ret; -+ /* We can have references to not emitted functions in -+ DECL_FUNCTION_PERSONALITY at least. So we can and have -+ to indeed return LDPR_UNKNOWN in some cases. */ -+ if (data_in->globals_resolution.length () <= index) -+ return LDPR_UNKNOWN; -+ ret = data_in->globals_resolution[index]; -+ return ret; -+ } -+ else -+ /* Delay resolution finding until decl merging. */ -+ return LDPR_UNKNOWN; -+} -+ -+/* We need to record resolutions until symbol table is read. */ -+static void -+register_resolution (struct lto_file_decl_data *file_data, tree decl, -+ enum ld_plugin_symbol_resolution resolution) -+{ -+ bool existed; -+ if (resolution == LDPR_UNKNOWN) -+ return; -+ if (!file_data->resolution_map) -+ file_data->resolution_map -+ = new hash_map; -+ ld_plugin_symbol_resolution_t &res -+ = file_data->resolution_map->get_or_insert (decl, &existed); -+ if (!existed -+ || resolution == LDPR_PREVAILING_DEF_IRONLY -+ || resolution == LDPR_PREVAILING_DEF -+ || resolution == LDPR_PREVAILING_DEF_IRONLY_EXP) -+ res = resolution; -+} -+ -+/* Register DECL with the global symbol table and change its -+ name if necessary to avoid name clashes for static globals across -+ different files. */ -+ -+static void -+lto_register_var_decl_in_symtab (struct data_in *data_in, tree decl, -+ unsigned ix) -+{ -+ tree context; -+ -+ /* Variable has file scope, not local. */ -+ if (!TREE_PUBLIC (decl) -+ && !((context = decl_function_context (decl)) -+ && auto_var_in_fn_p (decl, context))) -+ rest_of_decl_compilation (decl, 1, 0); -+ -+ /* If this variable has already been declared, queue the -+ declaration for merging. */ -+ if (TREE_PUBLIC (decl)) -+ register_resolution (data_in->file_data, -+ decl, get_resolution (data_in, ix)); -+} -+ -+ -+/* Register DECL with the global symbol table and change its -+ name if necessary to avoid name clashes for static globals across -+ different files. DATA_IN contains descriptors and tables for the -+ file being read. */ -+ -+static void -+lto_register_function_decl_in_symtab (struct data_in *data_in, tree decl, -+ unsigned ix) -+{ -+ /* If this variable has already been declared, queue the -+ declaration for merging. */ -+ if (TREE_PUBLIC (decl) && !DECL_ABSTRACT_P (decl)) -+ register_resolution (data_in->file_data, -+ decl, get_resolution (data_in, ix)); -+} -+ -+/* Check if T is a decl and needs register its resolution info. */ -+ -+static void -+lto_maybe_register_decl (struct data_in *data_in, tree t, unsigned ix) -+{ -+ if (TREE_CODE (t) == VAR_DECL) -+ lto_register_var_decl_in_symtab (data_in, t, ix); -+ else if (TREE_CODE (t) == FUNCTION_DECL -+ && !fndecl_built_in_p (t)) -+ lto_register_function_decl_in_symtab (data_in, t, ix); -+} -+ -+ -+/* For the type T re-materialize it in the type variant list and -+ the pointer/reference-to chains. */ -+ -+static void -+lto_fixup_prevailing_type (tree t) -+{ -+ /* The following re-creates proper variant lists while fixing up -+ the variant leaders. We do not stream TYPE_NEXT_VARIANT so the -+ variant list state before fixup is broken. */ -+ -+ /* If we are not our own variant leader link us into our new leaders -+ variant list. */ -+ if (TYPE_MAIN_VARIANT (t) != t) -+ { -+ tree mv = TYPE_MAIN_VARIANT (t); -+ TYPE_NEXT_VARIANT (t) = TYPE_NEXT_VARIANT (mv); -+ TYPE_NEXT_VARIANT (mv) = t; -+ } -+ -+ /* The following reconstructs the pointer chains -+ of the new pointed-to type if we are a main variant. We do -+ not stream those so they are broken before fixup. */ -+ if (TREE_CODE (t) == POINTER_TYPE -+ && TYPE_MAIN_VARIANT (t) == t) -+ { -+ TYPE_NEXT_PTR_TO (t) = TYPE_POINTER_TO (TREE_TYPE (t)); -+ TYPE_POINTER_TO (TREE_TYPE (t)) = t; -+ } -+ else if (TREE_CODE (t) == REFERENCE_TYPE -+ && TYPE_MAIN_VARIANT (t) == t) -+ { -+ TYPE_NEXT_REF_TO (t) = TYPE_REFERENCE_TO (TREE_TYPE (t)); -+ TYPE_REFERENCE_TO (TREE_TYPE (t)) = t; -+ } -+} -+ -+ -+/* We keep prevailing tree SCCs in a hashtable with manual collision -+ handling (in case all hashes compare the same) and keep the colliding -+ entries in the tree_scc->next chain. */ -+ -+struct tree_scc -+{ -+ tree_scc *next; -+ /* Hash of the whole SCC. */ -+ hashval_t hash; -+ /* Number of trees in the SCC. */ -+ unsigned len; -+ /* Number of possible entries into the SCC (tree nodes [0..entry_len-1] -+ which share the same individual tree hash). */ -+ unsigned entry_len; -+ /* The members of the SCC. -+ We only need to remember the first entry node candidate for prevailing -+ SCCs (but of course have access to all entries for SCCs we are -+ processing). -+ ??? For prevailing SCCs we really only need hash and the first -+ entry candidate, but that's too awkward to implement. */ -+ tree entries[1]; -+}; -+ -+struct tree_scc_hasher : nofree_ptr_hash -+{ -+ static inline hashval_t hash (const tree_scc *); -+ static inline bool equal (const tree_scc *, const tree_scc *); -+}; -+ -+hashval_t -+tree_scc_hasher::hash (const tree_scc *scc) -+{ -+ return scc->hash; -+} -+ -+bool -+tree_scc_hasher::equal (const tree_scc *scc1, const tree_scc *scc2) -+{ -+ if (scc1->hash != scc2->hash -+ || scc1->len != scc2->len -+ || scc1->entry_len != scc2->entry_len) -+ return false; -+ return true; -+} -+ -+static hash_table *tree_scc_hash; -+static struct obstack tree_scc_hash_obstack; -+ -+static unsigned long num_merged_types; -+static unsigned long num_prevailing_types; -+static unsigned long num_type_scc_trees; -+static unsigned long total_scc_size; -+static unsigned long num_sccs_read; -+static unsigned long total_scc_size_merged; -+static unsigned long num_sccs_merged; -+static unsigned long num_scc_compares; -+static unsigned long num_scc_compare_collisions; -+ -+ -+/* Compare the two entries T1 and T2 of two SCCs that are possibly equal, -+ recursing through in-SCC tree edges. Returns true if the SCCs entered -+ through T1 and T2 are equal and fills in *MAP with the pairs of -+ SCC entries we visited, starting with (*MAP)[0] = T1 and (*MAP)[1] = T2. */ -+ -+static bool -+compare_tree_sccs_1 (tree t1, tree t2, tree **map) -+{ -+ enum tree_code code; -+ -+ /* Mark already visited nodes. */ -+ TREE_ASM_WRITTEN (t2) = 1; -+ -+ /* Push the pair onto map. */ -+ (*map)[0] = t1; -+ (*map)[1] = t2; -+ *map = *map + 2; -+ -+ /* Compare value-fields. */ -+#define compare_values(X) \ -+ do { \ -+ if (X(t1) != X(t2)) \ -+ return false; \ -+ } while (0) -+ -+ compare_values (TREE_CODE); -+ code = TREE_CODE (t1); -+ -+ if (!TYPE_P (t1)) -+ { -+ compare_values (TREE_SIDE_EFFECTS); -+ compare_values (TREE_CONSTANT); -+ compare_values (TREE_READONLY); -+ compare_values (TREE_PUBLIC); -+ } -+ compare_values (TREE_ADDRESSABLE); -+ compare_values (TREE_THIS_VOLATILE); -+ if (DECL_P (t1)) -+ compare_values (DECL_UNSIGNED); -+ else if (TYPE_P (t1)) -+ compare_values (TYPE_UNSIGNED); -+ if (TYPE_P (t1)) -+ compare_values (TYPE_ARTIFICIAL); -+ else -+ compare_values (TREE_NO_WARNING); -+ compare_values (TREE_NOTHROW); -+ compare_values (TREE_STATIC); -+ if (code != TREE_BINFO) -+ compare_values (TREE_PRIVATE); -+ compare_values (TREE_PROTECTED); -+ compare_values (TREE_DEPRECATED); -+ if (TYPE_P (t1)) -+ { -+ if (AGGREGATE_TYPE_P (t1)) -+ compare_values (TYPE_REVERSE_STORAGE_ORDER); -+ else -+ compare_values (TYPE_SATURATING); -+ compare_values (TYPE_ADDR_SPACE); -+ } -+ else if (code == SSA_NAME) -+ compare_values (SSA_NAME_IS_DEFAULT_DEF); -+ -+ if (CODE_CONTAINS_STRUCT (code, TS_INT_CST)) -+ { -+ if (wi::to_wide (t1) != wi::to_wide (t2)) -+ return false; -+ } -+ -+ if (CODE_CONTAINS_STRUCT (code, TS_REAL_CST)) -+ { -+ /* ??? No suitable compare routine available. */ -+ REAL_VALUE_TYPE r1 = TREE_REAL_CST (t1); -+ REAL_VALUE_TYPE r2 = TREE_REAL_CST (t2); -+ if (r1.cl != r2.cl -+ || r1.decimal != r2.decimal -+ || r1.sign != r2.sign -+ || r1.signalling != r2.signalling -+ || r1.canonical != r2.canonical -+ || r1.uexp != r2.uexp) -+ return false; -+ for (unsigned i = 0; i < SIGSZ; ++i) -+ if (r1.sig[i] != r2.sig[i]) -+ return false; -+ } -+ -+ if (CODE_CONTAINS_STRUCT (code, TS_FIXED_CST)) -+ if (!fixed_compare (EQ_EXPR, -+ TREE_FIXED_CST_PTR (t1), TREE_FIXED_CST_PTR (t2))) -+ return false; -+ -+ if (CODE_CONTAINS_STRUCT (code, TS_VECTOR)) -+ { -+ compare_values (VECTOR_CST_LOG2_NPATTERNS); -+ compare_values (VECTOR_CST_NELTS_PER_PATTERN); -+ } -+ -+ if (CODE_CONTAINS_STRUCT (code, TS_DECL_COMMON)) -+ { -+ compare_values (DECL_MODE); -+ compare_values (DECL_NONLOCAL); -+ compare_values (DECL_VIRTUAL_P); -+ compare_values (DECL_IGNORED_P); -+ compare_values (DECL_ABSTRACT_P); -+ compare_values (DECL_ARTIFICIAL); -+ compare_values (DECL_USER_ALIGN); -+ compare_values (DECL_PRESERVE_P); -+ compare_values (DECL_EXTERNAL); -+ compare_values (DECL_GIMPLE_REG_P); -+ compare_values (DECL_ALIGN); -+ if (code == LABEL_DECL) -+ { -+ compare_values (EH_LANDING_PAD_NR); -+ compare_values (LABEL_DECL_UID); -+ } -+ else if (code == FIELD_DECL) -+ { -+ compare_values (DECL_PACKED); -+ compare_values (DECL_NONADDRESSABLE_P); -+ compare_values (DECL_PADDING_P); -+ compare_values (DECL_OFFSET_ALIGN); -+ } -+ else if (code == VAR_DECL) -+ { -+ compare_values (DECL_HAS_DEBUG_EXPR_P); -+ compare_values (DECL_NONLOCAL_FRAME); -+ } -+ if (code == RESULT_DECL -+ || code == PARM_DECL -+ || code == VAR_DECL) -+ { -+ compare_values (DECL_BY_REFERENCE); -+ if (code == VAR_DECL -+ || code == PARM_DECL) -+ compare_values (DECL_HAS_VALUE_EXPR_P); -+ } -+ } -+ -+ if (CODE_CONTAINS_STRUCT (code, TS_DECL_WRTL)) -+ compare_values (DECL_REGISTER); -+ -+ if (CODE_CONTAINS_STRUCT (code, TS_DECL_WITH_VIS)) -+ { -+ compare_values (DECL_COMMON); -+ compare_values (DECL_DLLIMPORT_P); -+ compare_values (DECL_WEAK); -+ compare_values (DECL_SEEN_IN_BIND_EXPR_P); -+ compare_values (DECL_COMDAT); -+ compare_values (DECL_VISIBILITY); -+ compare_values (DECL_VISIBILITY_SPECIFIED); -+ if (code == VAR_DECL) -+ { -+ compare_values (DECL_HARD_REGISTER); -+ /* DECL_IN_TEXT_SECTION is set during final asm output only. */ -+ compare_values (DECL_IN_CONSTANT_POOL); -+ } -+ } -+ -+ if (CODE_CONTAINS_STRUCT (code, TS_FUNCTION_DECL)) -+ { -+ compare_values (DECL_BUILT_IN_CLASS); -+ compare_values (DECL_STATIC_CONSTRUCTOR); -+ compare_values (DECL_STATIC_DESTRUCTOR); -+ compare_values (DECL_UNINLINABLE); -+ compare_values (DECL_POSSIBLY_INLINED); -+ compare_values (DECL_IS_NOVOPS); -+ compare_values (DECL_IS_RETURNS_TWICE); -+ compare_values (DECL_IS_MALLOC); -+ compare_values (DECL_IS_OPERATOR_NEW_P); -+ compare_values (DECL_DECLARED_INLINE_P); -+ compare_values (DECL_STATIC_CHAIN); -+ compare_values (DECL_NO_INLINE_WARNING_P); -+ compare_values (DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT); -+ compare_values (DECL_NO_LIMIT_STACK); -+ compare_values (DECL_DISREGARD_INLINE_LIMITS); -+ compare_values (DECL_PURE_P); -+ compare_values (DECL_LOOPING_CONST_OR_PURE_P); -+ compare_values (DECL_FINAL_P); -+ compare_values (DECL_CXX_CONSTRUCTOR_P); -+ compare_values (DECL_CXX_DESTRUCTOR_P); -+ if (DECL_BUILT_IN_CLASS (t1) != NOT_BUILT_IN) -+ compare_values (DECL_UNCHECKED_FUNCTION_CODE); -+ } -+ -+ if (CODE_CONTAINS_STRUCT (code, TS_TYPE_COMMON)) -+ { -+ compare_values (TYPE_MODE); -+ compare_values (TYPE_STRING_FLAG); -+ compare_values (TYPE_NEEDS_CONSTRUCTING); -+ if (RECORD_OR_UNION_TYPE_P (t1)) -+ { -+ compare_values (TYPE_TRANSPARENT_AGGR); -+ compare_values (TYPE_FINAL_P); -+ } -+ else if (code == ARRAY_TYPE) -+ compare_values (TYPE_NONALIASED_COMPONENT); -+ if (AGGREGATE_TYPE_P (t1)) -+ compare_values (TYPE_TYPELESS_STORAGE); -+ compare_values (TYPE_EMPTY_P); -+ compare_values (TYPE_PACKED); -+ compare_values (TYPE_RESTRICT); -+ compare_values (TYPE_USER_ALIGN); -+ compare_values (TYPE_READONLY); -+ compare_values (TYPE_PRECISION); -+ compare_values (TYPE_ALIGN); -+ /* Do not compare TYPE_ALIAS_SET. Doing so introduce ordering issues -+ with calls to get_alias_set which may initialize it for streamed -+ in types. */ -+ } -+ -+ /* We don't want to compare locations, so there is nothing do compare -+ for TS_EXP. */ -+ -+ /* BLOCKs are function local and we don't merge anything there, so -+ simply refuse to merge. */ -+ if (CODE_CONTAINS_STRUCT (code, TS_BLOCK)) -+ return false; -+ -+ if (CODE_CONTAINS_STRUCT (code, TS_TRANSLATION_UNIT_DECL)) -+ if (strcmp (TRANSLATION_UNIT_LANGUAGE (t1), -+ TRANSLATION_UNIT_LANGUAGE (t2)) != 0) -+ return false; -+ -+ if (CODE_CONTAINS_STRUCT (code, TS_TARGET_OPTION)) -+ if (!cl_target_option_eq (TREE_TARGET_OPTION (t1), TREE_TARGET_OPTION (t2))) -+ return false; -+ -+ if (CODE_CONTAINS_STRUCT (code, TS_OPTIMIZATION)) -+ if (!cl_optimization_option_eq (TREE_OPTIMIZATION (t1), -+ TREE_OPTIMIZATION (t2))) -+ return false; -+ -+ if (CODE_CONTAINS_STRUCT (code, TS_BINFO)) -+ if (vec_safe_length (BINFO_BASE_ACCESSES (t1)) -+ != vec_safe_length (BINFO_BASE_ACCESSES (t2))) -+ return false; -+ -+ if (CODE_CONTAINS_STRUCT (code, TS_CONSTRUCTOR)) -+ compare_values (CONSTRUCTOR_NELTS); -+ -+ if (CODE_CONTAINS_STRUCT (code, TS_IDENTIFIER)) -+ if (IDENTIFIER_LENGTH (t1) != IDENTIFIER_LENGTH (t2) -+ || memcmp (IDENTIFIER_POINTER (t1), IDENTIFIER_POINTER (t2), -+ IDENTIFIER_LENGTH (t1)) != 0) -+ return false; -+ -+ if (CODE_CONTAINS_STRUCT (code, TS_STRING)) -+ if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2) -+ || memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2), -+ TREE_STRING_LENGTH (t1)) != 0) -+ return false; -+ -+ if (code == OMP_CLAUSE) -+ { -+ compare_values (OMP_CLAUSE_CODE); -+ switch (OMP_CLAUSE_CODE (t1)) -+ { -+ case OMP_CLAUSE_DEFAULT: -+ compare_values (OMP_CLAUSE_DEFAULT_KIND); -+ break; -+ case OMP_CLAUSE_SCHEDULE: -+ compare_values (OMP_CLAUSE_SCHEDULE_KIND); -+ break; -+ case OMP_CLAUSE_DEPEND: -+ compare_values (OMP_CLAUSE_DEPEND_KIND); -+ break; -+ case OMP_CLAUSE_MAP: -+ compare_values (OMP_CLAUSE_MAP_KIND); -+ break; -+ case OMP_CLAUSE_PROC_BIND: -+ compare_values (OMP_CLAUSE_PROC_BIND_KIND); -+ break; -+ case OMP_CLAUSE_REDUCTION: -+ compare_values (OMP_CLAUSE_REDUCTION_CODE); -+ compare_values (OMP_CLAUSE_REDUCTION_GIMPLE_INIT); -+ compare_values (OMP_CLAUSE_REDUCTION_GIMPLE_MERGE); -+ break; -+ default: -+ break; -+ } -+ } -+ -+#undef compare_values -+ -+ -+ /* Compare pointer fields. */ -+ -+ /* Recurse. Search & Replaced from DFS_write_tree_body. -+ Folding the early checks into the compare_tree_edges recursion -+ macro makes debugging way quicker as you are able to break on -+ compare_tree_sccs_1 and simply finish until a call returns false -+ to spot the SCC members with the difference. */ -+#define compare_tree_edges(E1, E2) \ -+ do { \ -+ tree t1_ = (E1), t2_ = (E2); \ -+ if (t1_ != t2_ \ -+ && (!t1_ || !t2_ \ -+ || !TREE_VISITED (t2_) \ -+ || (!TREE_ASM_WRITTEN (t2_) \ -+ && !compare_tree_sccs_1 (t1_, t2_, map)))) \ -+ return false; \ -+ /* Only non-NULL trees outside of the SCC may compare equal. */ \ -+ gcc_checking_assert (t1_ != t2_ || (!t2_ || !TREE_VISITED (t2_))); \ -+ } while (0) -+ -+ if (CODE_CONTAINS_STRUCT (code, TS_TYPED)) -+ { -+ if (code != IDENTIFIER_NODE) -+ compare_tree_edges (TREE_TYPE (t1), TREE_TYPE (t2)); -+ } -+ -+ if (CODE_CONTAINS_STRUCT (code, TS_VECTOR)) -+ { -+ /* Note that the number of elements for EXPR has already been emitted -+ in EXPR's header (see streamer_write_tree_header). */ -+ unsigned int count = vector_cst_encoded_nelts (t1); -+ for (unsigned int i = 0; i < count; ++i) -+ compare_tree_edges (VECTOR_CST_ENCODED_ELT (t1, i), -+ VECTOR_CST_ENCODED_ELT (t2, i)); -+ } -+ -+ if (CODE_CONTAINS_STRUCT (code, TS_COMPLEX)) -+ { -+ compare_tree_edges (TREE_REALPART (t1), TREE_REALPART (t2)); -+ compare_tree_edges (TREE_IMAGPART (t1), TREE_IMAGPART (t2)); -+ } -+ -+ if (CODE_CONTAINS_STRUCT (code, TS_DECL_MINIMAL)) -+ { -+ compare_tree_edges (DECL_NAME (t1), DECL_NAME (t2)); -+ /* ??? Global decls from different TUs have non-matching -+ TRANSLATION_UNIT_DECLs. Only consider a small set of -+ decls equivalent, we should not end up merging others. */ -+ if ((code == TYPE_DECL -+ || code == NAMESPACE_DECL -+ || code == IMPORTED_DECL -+ || code == CONST_DECL -+ || (VAR_OR_FUNCTION_DECL_P (t1) -+ && (TREE_PUBLIC (t1) || DECL_EXTERNAL (t1)))) -+ && DECL_FILE_SCOPE_P (t1) && DECL_FILE_SCOPE_P (t2)) -+ ; -+ else -+ compare_tree_edges (DECL_CONTEXT (t1), DECL_CONTEXT (t2)); -+ } -+ -+ if (CODE_CONTAINS_STRUCT (code, TS_DECL_COMMON)) -+ { -+ compare_tree_edges (DECL_SIZE (t1), DECL_SIZE (t2)); -+ compare_tree_edges (DECL_SIZE_UNIT (t1), DECL_SIZE_UNIT (t2)); -+ compare_tree_edges (DECL_ATTRIBUTES (t1), DECL_ATTRIBUTES (t2)); -+ compare_tree_edges (DECL_ABSTRACT_ORIGIN (t1), DECL_ABSTRACT_ORIGIN (t2)); -+ if ((code == VAR_DECL -+ || code == PARM_DECL) -+ && DECL_HAS_VALUE_EXPR_P (t1)) -+ compare_tree_edges (DECL_VALUE_EXPR (t1), DECL_VALUE_EXPR (t2)); -+ if (code == VAR_DECL -+ && DECL_HAS_DEBUG_EXPR_P (t1)) -+ compare_tree_edges (DECL_DEBUG_EXPR (t1), DECL_DEBUG_EXPR (t2)); -+ /* LTO specific edges. */ -+ if (code != FUNCTION_DECL -+ && code != TRANSLATION_UNIT_DECL) -+ compare_tree_edges (DECL_INITIAL (t1), DECL_INITIAL (t2)); -+ } -+ -+ if (CODE_CONTAINS_STRUCT (code, TS_DECL_NON_COMMON)) -+ { -+ if (code == FUNCTION_DECL) -+ { -+ tree a1, a2; -+ for (a1 = DECL_ARGUMENTS (t1), a2 = DECL_ARGUMENTS (t2); -+ a1 || a2; -+ a1 = TREE_CHAIN (a1), a2 = TREE_CHAIN (a2)) -+ compare_tree_edges (a1, a2); -+ compare_tree_edges (DECL_RESULT (t1), DECL_RESULT (t2)); -+ } -+ else if (code == TYPE_DECL) -+ compare_tree_edges (DECL_ORIGINAL_TYPE (t1), DECL_ORIGINAL_TYPE (t2)); -+ } -+ -+ if (CODE_CONTAINS_STRUCT (code, TS_DECL_WITH_VIS)) -+ { -+ /* Make sure we don't inadvertently set the assembler name. */ -+ if (DECL_ASSEMBLER_NAME_SET_P (t1)) -+ compare_tree_edges (DECL_ASSEMBLER_NAME (t1), -+ DECL_ASSEMBLER_NAME (t2)); -+ } -+ -+ if (CODE_CONTAINS_STRUCT (code, TS_FIELD_DECL)) -+ { -+ compare_tree_edges (DECL_FIELD_OFFSET (t1), DECL_FIELD_OFFSET (t2)); -+ compare_tree_edges (DECL_BIT_FIELD_TYPE (t1), DECL_BIT_FIELD_TYPE (t2)); -+ compare_tree_edges (DECL_BIT_FIELD_REPRESENTATIVE (t1), -+ DECL_BIT_FIELD_REPRESENTATIVE (t2)); -+ compare_tree_edges (DECL_FIELD_BIT_OFFSET (t1), -+ DECL_FIELD_BIT_OFFSET (t2)); -+ compare_tree_edges (DECL_FCONTEXT (t1), DECL_FCONTEXT (t2)); -+ } -+ -+ if (CODE_CONTAINS_STRUCT (code, TS_FUNCTION_DECL)) -+ { -+ compare_tree_edges (DECL_FUNCTION_PERSONALITY (t1), -+ DECL_FUNCTION_PERSONALITY (t2)); -+ compare_tree_edges (DECL_VINDEX (t1), DECL_VINDEX (t2)); -+ compare_tree_edges (DECL_FUNCTION_SPECIFIC_TARGET (t1), -+ DECL_FUNCTION_SPECIFIC_TARGET (t2)); -+ compare_tree_edges (DECL_FUNCTION_SPECIFIC_OPTIMIZATION (t1), -+ DECL_FUNCTION_SPECIFIC_OPTIMIZATION (t2)); -+ } -+ -+ if (CODE_CONTAINS_STRUCT (code, TS_TYPE_COMMON)) -+ { -+ compare_tree_edges (TYPE_SIZE (t1), TYPE_SIZE (t2)); -+ compare_tree_edges (TYPE_SIZE_UNIT (t1), TYPE_SIZE_UNIT (t2)); -+ compare_tree_edges (TYPE_ATTRIBUTES (t1), TYPE_ATTRIBUTES (t2)); -+ compare_tree_edges (TYPE_NAME (t1), TYPE_NAME (t2)); -+ /* Do not compare TYPE_POINTER_TO or TYPE_REFERENCE_TO. They will be -+ reconstructed during fixup. */ -+ /* Do not compare TYPE_NEXT_VARIANT, we reconstruct the variant lists -+ during fixup. */ -+ compare_tree_edges (TYPE_MAIN_VARIANT (t1), TYPE_MAIN_VARIANT (t2)); -+ /* ??? Global types from different TUs have non-matching -+ TRANSLATION_UNIT_DECLs. Still merge them if they are otherwise -+ equal. */ -+ if (TYPE_FILE_SCOPE_P (t1) && TYPE_FILE_SCOPE_P (t2)) -+ ; -+ else -+ compare_tree_edges (TYPE_CONTEXT (t1), TYPE_CONTEXT (t2)); -+ /* TYPE_CANONICAL is re-computed during type merging, so do not -+ compare it here. */ -+ compare_tree_edges (TYPE_STUB_DECL (t1), TYPE_STUB_DECL (t2)); -+ } -+ -+ if (CODE_CONTAINS_STRUCT (code, TS_TYPE_NON_COMMON)) -+ { -+ if (code == ENUMERAL_TYPE) -+ compare_tree_edges (TYPE_VALUES (t1), TYPE_VALUES (t2)); -+ else if (code == ARRAY_TYPE) -+ compare_tree_edges (TYPE_DOMAIN (t1), TYPE_DOMAIN (t2)); -+ else if (RECORD_OR_UNION_TYPE_P (t1)) -+ { -+ tree f1, f2; -+ for (f1 = TYPE_FIELDS (t1), f2 = TYPE_FIELDS (t2); -+ f1 || f2; -+ f1 = TREE_CHAIN (f1), f2 = TREE_CHAIN (f2)) -+ compare_tree_edges (f1, f2); -+ } -+ else if (code == FUNCTION_TYPE -+ || code == METHOD_TYPE) -+ compare_tree_edges (TYPE_ARG_TYPES (t1), TYPE_ARG_TYPES (t2)); -+ -+ if (!POINTER_TYPE_P (t1)) -+ compare_tree_edges (TYPE_MIN_VALUE_RAW (t1), TYPE_MIN_VALUE_RAW (t2)); -+ compare_tree_edges (TYPE_MAX_VALUE_RAW (t1), TYPE_MAX_VALUE_RAW (t2)); -+ } -+ -+ if (CODE_CONTAINS_STRUCT (code, TS_LIST)) -+ { -+ compare_tree_edges (TREE_PURPOSE (t1), TREE_PURPOSE (t2)); -+ compare_tree_edges (TREE_VALUE (t1), TREE_VALUE (t2)); -+ compare_tree_edges (TREE_CHAIN (t1), TREE_CHAIN (t2)); -+ } -+ -+ if (CODE_CONTAINS_STRUCT (code, TS_VEC)) -+ for (int i = 0; i < TREE_VEC_LENGTH (t1); i++) -+ compare_tree_edges (TREE_VEC_ELT (t1, i), TREE_VEC_ELT (t2, i)); -+ -+ if (CODE_CONTAINS_STRUCT (code, TS_EXP)) -+ { -+ for (int i = 0; i < TREE_OPERAND_LENGTH (t1); i++) -+ compare_tree_edges (TREE_OPERAND (t1, i), -+ TREE_OPERAND (t2, i)); -+ -+ /* BLOCKs are function local and we don't merge anything there. */ -+ if (TREE_BLOCK (t1) || TREE_BLOCK (t2)) -+ return false; -+ } -+ -+ if (CODE_CONTAINS_STRUCT (code, TS_BINFO)) -+ { -+ unsigned i; -+ tree t; -+ /* Lengths have already been compared above. */ -+ FOR_EACH_VEC_ELT (*BINFO_BASE_BINFOS (t1), i, t) -+ compare_tree_edges (t, BINFO_BASE_BINFO (t2, i)); -+ FOR_EACH_VEC_SAFE_ELT (BINFO_BASE_ACCESSES (t1), i, t) -+ compare_tree_edges (t, BINFO_BASE_ACCESS (t2, i)); -+ compare_tree_edges (BINFO_OFFSET (t1), BINFO_OFFSET (t2)); -+ compare_tree_edges (BINFO_VTABLE (t1), BINFO_VTABLE (t2)); -+ compare_tree_edges (BINFO_VPTR_FIELD (t1), BINFO_VPTR_FIELD (t2)); -+ /* Do not walk BINFO_INHERITANCE_CHAIN, BINFO_SUBVTT_INDEX -+ and BINFO_VPTR_INDEX; these are used by C++ FE only. */ -+ } -+ -+ if (CODE_CONTAINS_STRUCT (code, TS_CONSTRUCTOR)) -+ { -+ unsigned i; -+ tree index, value; -+ /* Lengths have already been compared above. */ -+ FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (t1), i, index, value) -+ { -+ compare_tree_edges (index, CONSTRUCTOR_ELT (t2, i)->index); -+ compare_tree_edges (value, CONSTRUCTOR_ELT (t2, i)->value); -+ } -+ } -+ -+ if (code == OMP_CLAUSE) -+ { -+ int i; -+ -+ for (i = 0; i < omp_clause_num_ops[OMP_CLAUSE_CODE (t1)]; i++) -+ compare_tree_edges (OMP_CLAUSE_OPERAND (t1, i), -+ OMP_CLAUSE_OPERAND (t2, i)); -+ compare_tree_edges (OMP_CLAUSE_CHAIN (t1), OMP_CLAUSE_CHAIN (t2)); -+ } -+ -+#undef compare_tree_edges -+ -+ return true; -+} -+ -+/* Compare the tree scc SCC to the prevailing candidate PSCC, filling -+ out MAP if they are equal. */ -+ -+static bool -+compare_tree_sccs (tree_scc *pscc, tree_scc *scc, -+ tree *map) -+{ -+ /* Assume SCC entry hashes are sorted after their cardinality. Which -+ means we can simply take the first n-tuple of equal hashes -+ (which is recorded as entry_len) and do n SCC entry candidate -+ comparisons. */ -+ for (unsigned i = 0; i < pscc->entry_len; ++i) -+ { -+ tree *mapp = map; -+ num_scc_compare_collisions++; -+ if (compare_tree_sccs_1 (pscc->entries[0], scc->entries[i], &mapp)) -+ { -+ /* Equal - no need to reset TREE_VISITED or TREE_ASM_WRITTEN -+ on the scc as all trees will be freed. */ -+ return true; -+ } -+ /* Reset TREE_ASM_WRITTEN on scc for the next compare or in case -+ the SCC prevails. */ -+ for (unsigned j = 0; j < scc->len; ++j) -+ TREE_ASM_WRITTEN (scc->entries[j]) = 0; -+ } -+ -+ return false; -+} -+ -+/* QSort sort function to sort a map of two pointers after the 2nd -+ pointer. */ -+ -+static int -+cmp_tree (const void *p1_, const void *p2_) -+{ -+ tree *p1 = (tree *)(const_cast(p1_)); -+ tree *p2 = (tree *)(const_cast(p2_)); -+ if (p1[1] == p2[1]) -+ return 0; -+ return ((uintptr_t)p1[1] < (uintptr_t)p2[1]) ? -1 : 1; -+} -+ -+/* Try to unify the SCC with nodes FROM to FROM + LEN in CACHE and -+ hash value SCC_HASH with an already recorded SCC. Return true if -+ that was successful, otherwise return false. */ -+ -+static bool -+unify_scc (struct data_in *data_in, unsigned from, -+ unsigned len, unsigned scc_entry_len, hashval_t scc_hash) -+{ -+ bool unified_p = false; -+ struct streamer_tree_cache_d *cache = data_in->reader_cache; -+ tree_scc *scc -+ = (tree_scc *) alloca (sizeof (tree_scc) + (len - 1) * sizeof (tree)); -+ scc->next = NULL; -+ scc->hash = scc_hash; -+ scc->len = len; -+ scc->entry_len = scc_entry_len; -+ for (unsigned i = 0; i < len; ++i) -+ { -+ tree t = streamer_tree_cache_get_tree (cache, from + i); -+ scc->entries[i] = t; -+ /* Do not merge SCCs with local entities inside them. Also do -+ not merge TRANSLATION_UNIT_DECLs. */ -+ if (TREE_CODE (t) == TRANSLATION_UNIT_DECL -+ || (VAR_OR_FUNCTION_DECL_P (t) -+ && !(TREE_PUBLIC (t) || DECL_EXTERNAL (t))) -+ || TREE_CODE (t) == LABEL_DECL) -+ { -+ /* Avoid doing any work for these cases and do not worry to -+ record the SCCs for further merging. */ -+ return false; -+ } -+ } -+ -+ /* Look for the list of candidate SCCs to compare against. */ -+ tree_scc **slot; -+ slot = tree_scc_hash->find_slot_with_hash (scc, scc_hash, INSERT); -+ if (*slot) -+ { -+ /* Try unifying against each candidate. */ -+ num_scc_compares++; -+ -+ /* Set TREE_VISITED on the scc so we can easily identify tree nodes -+ outside of the scc when following tree edges. Make sure -+ that TREE_ASM_WRITTEN is unset so we can use it as 2nd bit -+ to track whether we visited the SCC member during the compare. -+ We cannot use TREE_VISITED on the pscc members as the extended -+ scc and pscc can overlap. */ -+ for (unsigned i = 0; i < scc->len; ++i) -+ { -+ TREE_VISITED (scc->entries[i]) = 1; -+ gcc_checking_assert (!TREE_ASM_WRITTEN (scc->entries[i])); -+ } -+ -+ tree *map = XALLOCAVEC (tree, 2 * len); -+ for (tree_scc *pscc = *slot; pscc; pscc = pscc->next) -+ { -+ if (!compare_tree_sccs (pscc, scc, map)) -+ continue; -+ -+ /* Found an equal SCC. */ -+ unified_p = true; -+ num_scc_compare_collisions--; -+ num_sccs_merged++; -+ total_scc_size_merged += len; -+ -+ if (flag_checking) -+ for (unsigned i = 0; i < len; ++i) -+ { -+ tree t = map[2*i+1]; -+ enum tree_code code = TREE_CODE (t); -+ /* IDENTIFIER_NODEs should be singletons and are merged by the -+ streamer. The others should be singletons, too, and we -+ should not merge them in any way. */ -+ gcc_assert (code != TRANSLATION_UNIT_DECL -+ && code != IDENTIFIER_NODE); -+ } -+ -+ /* Fixup the streamer cache with the prevailing nodes according -+ to the tree node mapping computed by compare_tree_sccs. */ -+ if (len == 1) -+ { -+ /* If we got a debug reference queued, see if the prevailing -+ tree has a debug reference and if not, register the one -+ for the tree we are about to throw away. */ -+ if (dref_queue.length () == 1) -+ { -+ dref_entry e = dref_queue.pop (); -+ gcc_assert (e.decl -+ == streamer_tree_cache_get_tree (cache, from)); -+ const char *sym; -+ unsigned HOST_WIDE_INT off; -+ if (!debug_hooks->die_ref_for_decl (pscc->entries[0], &sym, -+ &off)) -+ debug_hooks->register_external_die (pscc->entries[0], -+ e.sym, e.off); -+ } -+ lto_maybe_register_decl (data_in, pscc->entries[0], from); -+ streamer_tree_cache_replace_tree (cache, pscc->entries[0], from); -+ } -+ else -+ { -+ tree *map2 = XALLOCAVEC (tree, 2 * len); -+ for (unsigned i = 0; i < len; ++i) -+ { -+ map2[i*2] = (tree)(uintptr_t)(from + i); -+ map2[i*2+1] = scc->entries[i]; -+ } -+ qsort (map2, len, 2 * sizeof (tree), cmp_tree); -+ qsort (map, len, 2 * sizeof (tree), cmp_tree); -+ for (unsigned i = 0; i < len; ++i) -+ { -+ lto_maybe_register_decl (data_in, map[2*i], -+ (uintptr_t)map2[2*i]); -+ streamer_tree_cache_replace_tree (cache, map[2*i], -+ (uintptr_t)map2[2*i]); -+ } -+ } -+ -+ /* Free the tree nodes from the read SCC. */ -+ data_in->location_cache.revert_location_cache (); -+ for (unsigned i = 0; i < len; ++i) -+ { -+ if (TYPE_P (scc->entries[i])) -+ num_merged_types++; -+ free_node (scc->entries[i]); -+ } -+ -+ /* Drop DIE references. -+ ??? Do as in the size-one SCC case which involves sorting -+ the queue. */ -+ dref_queue.truncate (0); -+ -+ break; -+ } -+ -+ /* Reset TREE_VISITED if we didn't unify the SCC with another. */ -+ if (!unified_p) -+ for (unsigned i = 0; i < scc->len; ++i) -+ TREE_VISITED (scc->entries[i]) = 0; -+ } -+ -+ /* If we didn't unify it to any candidate duplicate the relevant -+ pieces to permanent storage and link it into the chain. */ -+ if (!unified_p) -+ { -+ tree_scc *pscc -+ = XOBNEWVAR (&tree_scc_hash_obstack, tree_scc, sizeof (tree_scc)); -+ memcpy (pscc, scc, sizeof (tree_scc)); -+ pscc->next = (*slot); -+ *slot = pscc; -+ } -+ return unified_p; -+} -+ -+ -+/* Read all the symbols from buffer DATA, using descriptors in DECL_DATA. -+ RESOLUTIONS is the set of symbols picked by the linker (read from the -+ resolution file when the linker plugin is being used). */ -+ -+static void -+lto_read_decls (struct lto_file_decl_data *decl_data, const void *data, -+ vec resolutions) -+{ -+ const struct lto_decl_header *header = (const struct lto_decl_header *) data; -+ const int decl_offset = sizeof (struct lto_decl_header); -+ const int main_offset = decl_offset + header->decl_state_size; -+ const int string_offset = main_offset + header->main_size; -+ struct data_in *data_in; -+ unsigned int i; -+ const uint32_t *data_ptr, *data_end; -+ uint32_t num_decl_states; -+ -+ lto_input_block ib_main ((const char *) data + main_offset, -+ header->main_size, decl_data->mode_table); -+ -+ data_in = lto_data_in_create (decl_data, (const char *) data + string_offset, -+ header->string_size, resolutions); -+ -+ /* We do not uniquify the pre-loaded cache entries, those are middle-end -+ internal types that should not be merged. */ -+ -+ /* Read the global declarations and types. */ -+ while (ib_main.p < ib_main.len) -+ { -+ tree t; -+ unsigned from = data_in->reader_cache->nodes.length (); -+ /* Read and uniquify SCCs as in the input stream. */ -+ enum LTO_tags tag = streamer_read_record_start (&ib_main); -+ if (tag == LTO_tree_scc) -+ { -+ unsigned len_; -+ unsigned scc_entry_len; -+ hashval_t scc_hash = lto_input_scc (&ib_main, data_in, &len_, -+ &scc_entry_len); -+ unsigned len = data_in->reader_cache->nodes.length () - from; -+ gcc_assert (len == len_); -+ -+ total_scc_size += len; -+ num_sccs_read++; -+ -+ /* We have the special case of size-1 SCCs that are pre-merged -+ by means of identifier and string sharing for example. -+ ??? Maybe we should avoid streaming those as SCCs. */ -+ tree first = streamer_tree_cache_get_tree (data_in->reader_cache, -+ from); -+ if (len == 1 -+ && (TREE_CODE (first) == IDENTIFIER_NODE -+ || (TREE_CODE (first) == INTEGER_CST -+ && !TREE_OVERFLOW (first)))) -+ continue; -+ -+ /* Try to unify the SCC with already existing ones. */ -+ if (!flag_ltrans -+ && unify_scc (data_in, from, -+ len, scc_entry_len, scc_hash)) -+ continue; -+ -+ /* Tree merging failed, mark entries in location cache as -+ permanent. */ -+ data_in->location_cache.accept_location_cache (); -+ -+ bool seen_type = false; -+ for (unsigned i = 0; i < len; ++i) -+ { -+ tree t = streamer_tree_cache_get_tree (data_in->reader_cache, -+ from + i); -+ /* Reconstruct the type variant and pointer-to/reference-to -+ chains. */ -+ if (TYPE_P (t)) -+ { -+ seen_type = true; -+ num_prevailing_types++; -+ lto_fixup_prevailing_type (t); -+ -+ /* Compute the canonical type of all types. -+ Because SCC components are streamed in random (hash) order -+ we may have encountered the type before while registering -+ type canonical of a derived type in the same SCC. */ -+ if (!TYPE_CANONICAL (t)) -+ gimple_register_canonical_type (t); -+ if (TYPE_MAIN_VARIANT (t) == t && odr_type_p (t)) -+ register_odr_type (t); -+ } -+ /* Link shared INTEGER_CSTs into TYPE_CACHED_VALUEs of its -+ type which is also member of this SCC. */ -+ if (TREE_CODE (t) == INTEGER_CST -+ && !TREE_OVERFLOW (t)) -+ cache_integer_cst (t); -+ if (!flag_ltrans) -+ { -+ lto_maybe_register_decl (data_in, t, from + i); -+ /* Scan the tree for references to global functions or -+ variables and record those for later fixup. */ -+ if (mentions_vars_p (t)) -+ vec_safe_push (tree_with_vars, t); -+ } -+ } -+ -+ /* Register DECLs with the debuginfo machinery. */ -+ while (!dref_queue.is_empty ()) -+ { -+ dref_entry e = dref_queue.pop (); -+ debug_hooks->register_external_die (e.decl, e.sym, e.off); -+ } -+ -+ if (seen_type) -+ num_type_scc_trees += len; -+ } -+ else -+ { -+ /* Pickle stray references. */ -+ t = lto_input_tree_1 (&ib_main, data_in, tag, 0); -+ gcc_assert (t && data_in->reader_cache->nodes.length () == from); -+ } -+ } -+ data_in->location_cache.apply_location_cache (); -+ -+ /* Read in lto_in_decl_state objects. */ -+ data_ptr = (const uint32_t *) ((const char*) data + decl_offset); -+ data_end = -+ (const uint32_t *) ((const char*) data_ptr + header->decl_state_size); -+ num_decl_states = *data_ptr++; -+ -+ gcc_assert (num_decl_states > 0); -+ decl_data->global_decl_state = lto_new_in_decl_state (); -+ data_ptr = lto_read_in_decl_state (data_in, data_ptr, -+ decl_data->global_decl_state); -+ -+ /* Read in per-function decl states and enter them in hash table. */ -+ decl_data->function_decl_states = -+ hash_table::create_ggc (37); -+ -+ for (i = 1; i < num_decl_states; i++) -+ { -+ struct lto_in_decl_state *state = lto_new_in_decl_state (); -+ -+ data_ptr = lto_read_in_decl_state (data_in, data_ptr, state); -+ lto_in_decl_state **slot -+ = decl_data->function_decl_states->find_slot (state, INSERT); -+ gcc_assert (*slot == NULL); -+ *slot = state; -+ } -+ -+ if (data_ptr != data_end) -+ internal_error ("bytecode stream: garbage at the end of symbols section"); -+ -+ /* Set the current decl state to be the global state. */ -+ decl_data->current_decl_state = decl_data->global_decl_state; -+ -+ lto_data_in_delete (data_in); -+} -+ -+/* Custom version of strtoll, which is not portable. */ -+ -+static int64_t -+lto_parse_hex (const char *p) -+{ -+ int64_t ret = 0; -+ -+ for (; *p != '\0'; ++p) -+ { -+ char c = *p; -+ unsigned char part; -+ ret <<= 4; -+ if (c >= '0' && c <= '9') -+ part = c - '0'; -+ else if (c >= 'a' && c <= 'f') -+ part = c - 'a' + 10; -+ else if (c >= 'A' && c <= 'F') -+ part = c - 'A' + 10; -+ else -+ internal_error ("could not parse hex number"); -+ ret |= part; -+ } -+ -+ return ret; -+} -+ -+/* Read resolution for file named FILE_NAME. The resolution is read from -+ RESOLUTION. */ -+ -+static void -+lto_resolution_read (splay_tree file_ids, FILE *resolution, lto_file *file) -+{ -+ /* We require that objects in the resolution file are in the same -+ order as the lto1 command line. */ -+ unsigned int name_len; -+ char *obj_name; -+ unsigned int num_symbols; -+ unsigned int i; -+ struct lto_file_decl_data *file_data; -+ splay_tree_node nd = NULL; -+ -+ if (!resolution) -+ return; -+ -+ name_len = strlen (file->filename); -+ obj_name = XNEWVEC (char, name_len + 1); -+ fscanf (resolution, " "); /* Read white space. */ -+ -+ fread (obj_name, sizeof (char), name_len, resolution); -+ obj_name[name_len] = '\0'; -+ if (filename_cmp (obj_name, file->filename) != 0) -+ internal_error ("unexpected file name %s in linker resolution file. " -+ "Expected %s", obj_name, file->filename); -+ if (file->offset != 0) -+ { -+ int t; -+ char offset_p[17]; -+ int64_t offset; -+ t = fscanf (resolution, "@0x%16s", offset_p); -+ if (t != 1) -+ internal_error ("could not parse file offset"); -+ offset = lto_parse_hex (offset_p); -+ if (offset != file->offset) -+ internal_error ("unexpected offset"); -+ } -+ -+ free (obj_name); -+ -+ fscanf (resolution, "%u", &num_symbols); -+ -+ for (i = 0; i < num_symbols; i++) -+ { -+ int t; -+ unsigned index; -+ unsigned HOST_WIDE_INT id; -+ char r_str[27]; -+ enum ld_plugin_symbol_resolution r = (enum ld_plugin_symbol_resolution) 0; -+ unsigned int j; -+ unsigned int lto_resolution_str_len = -+ sizeof (lto_resolution_str) / sizeof (char *); -+ res_pair rp; -+ -+ t = fscanf (resolution, "%u " HOST_WIDE_INT_PRINT_HEX_PURE " %26s %*[^\n]\n", -+ &index, &id, r_str); -+ if (t != 3) -+ internal_error ("invalid line in the resolution file"); -+ -+ for (j = 0; j < lto_resolution_str_len; j++) -+ { -+ if (strcmp (lto_resolution_str[j], r_str) == 0) -+ { -+ r = (enum ld_plugin_symbol_resolution) j; -+ break; -+ } -+ } -+ if (j == lto_resolution_str_len) -+ internal_error ("invalid resolution in the resolution file"); -+ -+ if (!(nd && lto_splay_tree_id_equal_p (nd->key, id))) -+ { -+ nd = lto_splay_tree_lookup (file_ids, id); -+ if (nd == NULL) -+ internal_error ("resolution sub id %wx not in object file", id); -+ } -+ -+ file_data = (struct lto_file_decl_data *)nd->value; -+ /* The indexes are very sparse. To save memory save them in a compact -+ format that is only unpacked later when the subfile is processed. */ -+ rp.res = r; -+ rp.index = index; -+ file_data->respairs.safe_push (rp); -+ if (file_data->max_index < index) -+ file_data->max_index = index; -+ } -+} -+ -+/* List of file_decl_datas */ -+struct file_data_list -+ { -+ struct lto_file_decl_data *first, *last; -+ }; -+ -+/* Is the name for a id'ed LTO section? */ -+ -+static int -+lto_section_with_id (const char *name, unsigned HOST_WIDE_INT *id) -+{ -+ const char *s; -+ -+ if (strncmp (name, section_name_prefix, strlen (section_name_prefix))) -+ return 0; -+ s = strrchr (name, '.'); -+ if (!s) -+ return 0; -+ /* If the section is not suffixed with an ID return. */ -+ if ((size_t)(s - name) == strlen (section_name_prefix)) -+ return 0; -+ return sscanf (s, "." HOST_WIDE_INT_PRINT_HEX_PURE, id) == 1; -+} -+ -+/* Create file_data of each sub file id */ -+ -+static int -+create_subid_section_table (struct lto_section_slot *ls, splay_tree file_ids, -+ struct file_data_list *list) -+{ -+ struct lto_section_slot s_slot, *new_slot; -+ unsigned HOST_WIDE_INT id; -+ splay_tree_node nd; -+ void **hash_slot; -+ char *new_name; -+ struct lto_file_decl_data *file_data; -+ -+ if (!lto_section_with_id (ls->name, &id)) -+ return 1; -+ -+ /* Find hash table of sub module id */ -+ nd = lto_splay_tree_lookup (file_ids, id); -+ if (nd != NULL) -+ { -+ file_data = (struct lto_file_decl_data *)nd->value; -+ } -+ else -+ { -+ file_data = ggc_alloc (); -+ memset(file_data, 0, sizeof (struct lto_file_decl_data)); -+ file_data->id = id; -+ file_data->section_hash_table = lto_obj_create_section_hash_table (); -+ lto_splay_tree_insert (file_ids, id, file_data); -+ -+ /* Maintain list in linker order */ -+ if (!list->first) -+ list->first = file_data; -+ if (list->last) -+ list->last->next = file_data; -+ list->last = file_data; -+ } -+ -+ /* Copy section into sub module hash table */ -+ new_name = XDUPVEC (char, ls->name, strlen (ls->name) + 1); -+ s_slot.name = new_name; -+ hash_slot = htab_find_slot (file_data->section_hash_table, &s_slot, INSERT); -+ gcc_assert (*hash_slot == NULL); -+ -+ new_slot = XDUP (struct lto_section_slot, ls); -+ new_slot->name = new_name; -+ *hash_slot = new_slot; -+ return 1; -+} -+ -+/* Read declarations and other initializations for a FILE_DATA. */ -+ -+static void -+lto_file_finalize (struct lto_file_decl_data *file_data, lto_file *file) -+{ -+ const char *data; -+ size_t len; -+ vec -+ resolutions = vNULL; -+ int i; -+ res_pair *rp; -+ -+ /* Create vector for fast access of resolution. We do this lazily -+ to save memory. */ -+ resolutions.safe_grow_cleared (file_data->max_index + 1); -+ for (i = 0; file_data->respairs.iterate (i, &rp); i++) -+ resolutions[rp->index] = rp->res; -+ file_data->respairs.release (); -+ -+ file_data->renaming_hash_table = lto_create_renaming_table (); -+ file_data->file_name = file->filename; -+#ifdef ACCEL_COMPILER -+ lto_input_mode_table (file_data); -+#else -+ file_data->mode_table = lto_mode_identity_table; -+#endif -+ data = lto_get_section_data (file_data, LTO_section_decls, NULL, &len); -+ if (data == NULL) -+ { -+ internal_error ("cannot read LTO decls from %s", file_data->file_name); -+ return; -+ } -+ /* Frees resolutions */ -+ lto_read_decls (file_data, data, resolutions); -+ lto_free_section_data (file_data, LTO_section_decls, NULL, data, len); -+} -+ -+/* Finalize FILE_DATA in FILE and increase COUNT. */ -+ -+static int -+lto_create_files_from_ids (lto_file *file, struct lto_file_decl_data *file_data, -+ int *count) -+{ -+ lto_file_finalize (file_data, file); -+ if (symtab->dump_file) -+ fprintf (symtab->dump_file, -+ "Creating file %s with sub id " HOST_WIDE_INT_PRINT_HEX "\n", -+ file_data->file_name, file_data->id); -+ (*count)++; -+ return 0; -+} -+ -+/* Generate a TREE representation for all types and external decls -+ entities in FILE. -+ -+ Read all of the globals out of the file. Then read the cgraph -+ and process the .o index into the cgraph nodes so that it can open -+ the .o file to load the functions and ipa information. */ -+ -+static struct lto_file_decl_data * -+lto_file_read (lto_file *file, FILE *resolution_file, int *count) -+{ -+ struct lto_file_decl_data *file_data = NULL; -+ splay_tree file_ids; -+ htab_t section_hash_table; -+ struct lto_section_slot *section; -+ struct file_data_list file_list; -+ struct lto_section_list section_list; -+ -+ memset (§ion_list, 0, sizeof (struct lto_section_list)); -+ section_hash_table = lto_obj_build_section_table (file, §ion_list); -+ -+ /* Find all sub modules in the object and put their sections into new hash -+ tables in a splay tree. */ -+ file_ids = lto_splay_tree_new (); -+ memset (&file_list, 0, sizeof (struct file_data_list)); -+ for (section = section_list.first; section != NULL; section = section->next) -+ create_subid_section_table (section, file_ids, &file_list); -+ -+ /* Add resolutions to file ids */ -+ lto_resolution_read (file_ids, resolution_file, file); -+ -+ /* Finalize each lto file for each submodule in the merged object */ -+ for (file_data = file_list.first; file_data != NULL; file_data = file_data->next) -+ lto_create_files_from_ids (file, file_data, count); -+ -+ splay_tree_delete (file_ids); -+ htab_delete (section_hash_table); -+ -+ return file_list.first; -+} -+ -+#if HAVE_MMAP_FILE && HAVE_SYSCONF && defined _SC_PAGE_SIZE -+#define LTO_MMAP_IO 1 -+#endif -+ -+#if LTO_MMAP_IO -+/* Page size of machine is used for mmap and munmap calls. */ -+static size_t page_mask; -+#endif -+ -+/* Get the section data of length LEN from FILENAME starting at -+ OFFSET. The data segment must be freed by the caller when the -+ caller is finished. Returns NULL if all was not well. */ -+ -+static char * -+lto_read_section_data (struct lto_file_decl_data *file_data, -+ intptr_t offset, size_t len) -+{ -+ char *result; -+ static int fd = -1; -+ static char *fd_name; -+#if LTO_MMAP_IO -+ intptr_t computed_len; -+ intptr_t computed_offset; -+ intptr_t diff; -+#endif -+ -+ /* Keep a single-entry file-descriptor cache. The last file we -+ touched will get closed at exit. -+ ??? Eventually we want to add a more sophisticated larger cache -+ or rather fix function body streaming to not stream them in -+ practically random order. */ -+ if (fd != -1 -+ && filename_cmp (fd_name, file_data->file_name) != 0) -+ { -+ free (fd_name); -+ close (fd); -+ fd = -1; -+ } -+ if (fd == -1) -+ { -+ fd = open (file_data->file_name, O_RDONLY|O_BINARY); -+ if (fd == -1) -+ { -+ fatal_error (input_location, "Cannot open %s", file_data->file_name); -+ return NULL; -+ } -+ fd_name = xstrdup (file_data->file_name); -+ } -+ -+#if LTO_MMAP_IO -+ if (!page_mask) -+ { -+ size_t page_size = sysconf (_SC_PAGE_SIZE); -+ page_mask = ~(page_size - 1); -+ } -+ -+ computed_offset = offset & page_mask; -+ diff = offset - computed_offset; -+ computed_len = len + diff; -+ -+ result = (char *) mmap (NULL, computed_len, PROT_READ, MAP_PRIVATE, -+ fd, computed_offset); -+ if (result == MAP_FAILED) -+ { -+ fatal_error (input_location, "Cannot map %s", file_data->file_name); -+ return NULL; -+ } -+ -+ return result + diff; -+#else -+ result = (char *) xmalloc (len); -+ if (lseek (fd, offset, SEEK_SET) != offset -+ || read (fd, result, len) != (ssize_t) len) -+ { -+ free (result); -+ fatal_error (input_location, "Cannot read %s", file_data->file_name); -+ result = NULL; -+ } -+#ifdef __MINGW32__ -+ /* Native windows doesn't supports delayed unlink on opened file. So -+ we close file here again. This produces higher I/O load, but at least -+ it prevents to have dangling file handles preventing unlink. */ -+ free (fd_name); -+ fd_name = NULL; -+ close (fd); -+ fd = -1; -+#endif -+ return result; -+#endif -+} -+ -+ -+/* Get the section data from FILE_DATA of SECTION_TYPE with NAME. -+ NAME will be NULL unless the section type is for a function -+ body. */ -+ -+static const char * -+get_section_data (struct lto_file_decl_data *file_data, -+ enum lto_section_type section_type, -+ const char *name, -+ size_t *len) -+{ -+ htab_t section_hash_table = file_data->section_hash_table; -+ struct lto_section_slot *f_slot; -+ struct lto_section_slot s_slot; -+ const char *section_name = lto_get_section_name (section_type, name, file_data); -+ char *data = NULL; -+ -+ *len = 0; -+ s_slot.name = section_name; -+ f_slot = (struct lto_section_slot *) htab_find (section_hash_table, &s_slot); -+ if (f_slot) -+ { -+ data = lto_read_section_data (file_data, f_slot->start, f_slot->len); -+ *len = f_slot->len; -+ } -+ -+ free (CONST_CAST (char *, section_name)); -+ return data; -+} -+ -+ -+/* Free the section data from FILE_DATA of SECTION_TYPE with NAME that -+ starts at OFFSET and has LEN bytes. */ -+ -+static void -+free_section_data (struct lto_file_decl_data *file_data ATTRIBUTE_UNUSED, -+ enum lto_section_type section_type ATTRIBUTE_UNUSED, -+ const char *name ATTRIBUTE_UNUSED, -+ const char *offset, size_t len ATTRIBUTE_UNUSED) -+{ -+#if LTO_MMAP_IO -+ intptr_t computed_len; -+ intptr_t computed_offset; -+ intptr_t diff; -+#endif -+ -+#if LTO_MMAP_IO -+ computed_offset = ((intptr_t) offset) & page_mask; -+ diff = (intptr_t) offset - computed_offset; -+ computed_len = len + diff; -+ -+ munmap ((caddr_t) computed_offset, computed_len); -+#else -+ free (CONST_CAST(char *, offset)); -+#endif -+} -+ -+static lto_file *current_lto_file; -+ -+/* If TT is a variable or function decl replace it with its -+ prevailing variant. */ -+#define LTO_SET_PREVAIL(tt) \ -+ do {\ -+ if ((tt) && VAR_OR_FUNCTION_DECL_P (tt) \ -+ && (TREE_PUBLIC (tt) || DECL_EXTERNAL (tt))) \ -+ { \ -+ tt = lto_symtab_prevailing_decl (tt); \ -+ fixed = true; \ -+ } \ -+ } while (0) -+ -+/* Ensure that TT isn't a replacable var of function decl. */ -+#define LTO_NO_PREVAIL(tt) \ -+ gcc_checking_assert (!(tt) || !VAR_OR_FUNCTION_DECL_P (tt)) -+ -+/* Given a tree T replace all fields referring to variables or functions -+ with their prevailing variant. */ -+static void -+lto_fixup_prevailing_decls (tree t) -+{ -+ enum tree_code code = TREE_CODE (t); -+ bool fixed = false; -+ -+ gcc_checking_assert (code != TREE_BINFO); -+ LTO_NO_PREVAIL (TREE_TYPE (t)); -+ if (CODE_CONTAINS_STRUCT (code, TS_COMMON) -+ /* lto_symtab_prevail_decl use TREE_CHAIN to link to the prevailing decl. -+ in the case T is a prevailed declaration we would ICE here. */ -+ && !VAR_OR_FUNCTION_DECL_P (t)) -+ LTO_NO_PREVAIL (TREE_CHAIN (t)); -+ if (DECL_P (t)) -+ { -+ LTO_NO_PREVAIL (DECL_NAME (t)); -+ LTO_SET_PREVAIL (DECL_CONTEXT (t)); -+ if (CODE_CONTAINS_STRUCT (code, TS_DECL_COMMON)) -+ { -+ LTO_SET_PREVAIL (DECL_SIZE (t)); -+ LTO_SET_PREVAIL (DECL_SIZE_UNIT (t)); -+ LTO_SET_PREVAIL (DECL_INITIAL (t)); -+ LTO_NO_PREVAIL (DECL_ATTRIBUTES (t)); -+ LTO_SET_PREVAIL (DECL_ABSTRACT_ORIGIN (t)); -+ } -+ if (CODE_CONTAINS_STRUCT (code, TS_DECL_WITH_VIS)) -+ { -+ LTO_NO_PREVAIL (DECL_ASSEMBLER_NAME_RAW (t)); -+ } -+ if (CODE_CONTAINS_STRUCT (code, TS_DECL_NON_COMMON)) -+ { -+ LTO_NO_PREVAIL (DECL_RESULT_FLD (t)); -+ } -+ if (CODE_CONTAINS_STRUCT (code, TS_FUNCTION_DECL)) -+ { -+ LTO_NO_PREVAIL (DECL_ARGUMENTS (t)); -+ LTO_SET_PREVAIL (DECL_FUNCTION_PERSONALITY (t)); -+ LTO_NO_PREVAIL (DECL_VINDEX (t)); -+ } -+ if (CODE_CONTAINS_STRUCT (code, TS_FIELD_DECL)) -+ { -+ LTO_SET_PREVAIL (DECL_FIELD_OFFSET (t)); -+ LTO_NO_PREVAIL (DECL_BIT_FIELD_TYPE (t)); -+ LTO_NO_PREVAIL (DECL_QUALIFIER (t)); -+ LTO_NO_PREVAIL (DECL_FIELD_BIT_OFFSET (t)); -+ LTO_NO_PREVAIL (DECL_FCONTEXT (t)); -+ } -+ } -+ else if (TYPE_P (t)) -+ { -+ LTO_NO_PREVAIL (TYPE_CACHED_VALUES (t)); -+ LTO_SET_PREVAIL (TYPE_SIZE (t)); -+ LTO_SET_PREVAIL (TYPE_SIZE_UNIT (t)); -+ LTO_NO_PREVAIL (TYPE_ATTRIBUTES (t)); -+ LTO_NO_PREVAIL (TYPE_NAME (t)); -+ -+ LTO_SET_PREVAIL (TYPE_MIN_VALUE_RAW (t)); -+ LTO_SET_PREVAIL (TYPE_MAX_VALUE_RAW (t)); -+ LTO_NO_PREVAIL (TYPE_LANG_SLOT_1 (t)); -+ -+ LTO_SET_PREVAIL (TYPE_CONTEXT (t)); -+ -+ LTO_NO_PREVAIL (TYPE_CANONICAL (t)); -+ LTO_NO_PREVAIL (TYPE_MAIN_VARIANT (t)); -+ LTO_NO_PREVAIL (TYPE_NEXT_VARIANT (t)); -+ } -+ else if (EXPR_P (t)) -+ { -+ int i; -+ for (i = TREE_OPERAND_LENGTH (t) - 1; i >= 0; --i) -+ LTO_SET_PREVAIL (TREE_OPERAND (t, i)); -+ } -+ else if (TREE_CODE (t) == CONSTRUCTOR) -+ { -+ unsigned i; -+ tree val; -+ FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (t), i, val) -+ LTO_SET_PREVAIL (val); -+ } -+ else -+ { -+ switch (code) -+ { -+ case TREE_LIST: -+ LTO_SET_PREVAIL (TREE_VALUE (t)); -+ LTO_SET_PREVAIL (TREE_PURPOSE (t)); -+ LTO_NO_PREVAIL (TREE_PURPOSE (t)); -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ } -+ /* If we fixed nothing, then we missed something seen by -+ mentions_vars_p. */ -+ gcc_checking_assert (fixed); -+} -+#undef LTO_SET_PREVAIL -+#undef LTO_NO_PREVAIL -+ -+/* Helper function of lto_fixup_decls. Walks the var and fn streams in STATE, -+ replaces var and function decls with the corresponding prevailing def. */ -+ -+static void -+lto_fixup_state (struct lto_in_decl_state *state) -+{ -+ unsigned i, si; -+ -+ /* Although we only want to replace FUNCTION_DECLs and VAR_DECLs, -+ we still need to walk from all DECLs to find the reachable -+ FUNCTION_DECLs and VAR_DECLs. */ -+ for (si = 0; si < LTO_N_DECL_STREAMS; si++) -+ { -+ vec *trees = state->streams[si]; -+ for (i = 0; i < vec_safe_length (trees); i++) -+ { -+ tree t = (*trees)[i]; -+ if (flag_checking && TYPE_P (t)) -+ verify_type (t); -+ if (VAR_OR_FUNCTION_DECL_P (t) -+ && (TREE_PUBLIC (t) || DECL_EXTERNAL (t))) -+ (*trees)[i] = lto_symtab_prevailing_decl (t); -+ } -+ } -+} -+ -+/* Fix the decls from all FILES. Replaces each decl with the corresponding -+ prevailing one. */ -+ -+static void -+lto_fixup_decls (struct lto_file_decl_data **files) -+{ -+ unsigned int i; -+ tree t; -+ -+ if (tree_with_vars) -+ FOR_EACH_VEC_ELT ((*tree_with_vars), i, t) -+ lto_fixup_prevailing_decls (t); -+ -+ for (i = 0; files[i]; i++) -+ { -+ struct lto_file_decl_data *file = files[i]; -+ struct lto_in_decl_state *state = file->global_decl_state; -+ lto_fixup_state (state); -+ -+ hash_table::iterator iter; -+ lto_in_decl_state *elt; -+ FOR_EACH_HASH_TABLE_ELEMENT (*file->function_decl_states, elt, -+ lto_in_decl_state *, iter) -+ lto_fixup_state (elt); -+ } -+} -+ -+static GTY((length ("lto_stats.num_input_files + 1"))) struct lto_file_decl_data **all_file_decl_data; -+ -+/* Turn file datas for sub files into a single array, so that they look -+ like separate files for further passes. */ -+ -+static void -+lto_flatten_files (struct lto_file_decl_data **orig, int count, int last_file_ix) -+{ -+ struct lto_file_decl_data *n, *next; -+ int i, k; -+ -+ lto_stats.num_input_files = count; -+ all_file_decl_data -+ = ggc_cleared_vec_alloc (count + 1); -+ /* Set the hooks so that all of the ipa passes can read in their data. */ -+ lto_set_in_hooks (all_file_decl_data, get_section_data, free_section_data); -+ for (i = 0, k = 0; i < last_file_ix; i++) -+ { -+ for (n = orig[i]; n != NULL; n = next) -+ { -+ all_file_decl_data[k++] = n; -+ next = n->next; -+ n->next = NULL; -+ } -+ } -+ all_file_decl_data[k] = NULL; -+ gcc_assert (k == count); -+} -+ -+/* Input file data before flattening (i.e. splitting them to subfiles to support -+ incremental linking. */ -+static int real_file_count; -+static GTY((length ("real_file_count + 1"))) struct lto_file_decl_data **real_file_decl_data; -+ -+/* Read all the symbols from the input files FNAMES. NFILES is the -+ number of files requested in the command line. Instantiate a -+ global call graph by aggregating all the sub-graphs found in each -+ file. */ -+ -+void -+read_cgraph_and_symbols (unsigned nfiles, const char **fnames) -+{ -+ unsigned int i, last_file_ix; -+ FILE *resolution; -+ int count = 0; -+ struct lto_file_decl_data **decl_data; -+ symtab_node *snode; -+ -+ symtab->initialize (); -+ -+ timevar_push (TV_IPA_LTO_DECL_IN); -+ -+#ifdef ACCEL_COMPILER -+ section_name_prefix = OFFLOAD_SECTION_NAME_PREFIX; -+ lto_stream_offload_p = true; -+#endif -+ -+ real_file_decl_data -+ = decl_data = ggc_cleared_vec_alloc (nfiles + 1); -+ real_file_count = nfiles; -+ -+ /* Read the resolution file. */ -+ resolution = NULL; -+ if (resolution_file_name) -+ { -+ int t; -+ unsigned num_objects; -+ -+ resolution = fopen (resolution_file_name, "r"); -+ if (resolution == NULL) -+ fatal_error (input_location, -+ "could not open symbol resolution file: %m"); -+ -+ t = fscanf (resolution, "%u", &num_objects); -+ gcc_assert (t == 1); -+ -+ /* True, since the plugin splits the archives. */ -+ gcc_assert (num_objects == nfiles); -+ } -+ symtab->state = LTO_STREAMING; -+ -+ canonical_type_hash_cache = new hash_map (251); -+ gimple_canonical_types = htab_create (16381, gimple_canonical_type_hash, -+ gimple_canonical_type_eq, NULL); -+ gcc_obstack_init (&tree_scc_hash_obstack); -+ tree_scc_hash = new hash_table (4096); -+ -+ /* Register the common node types with the canonical type machinery so -+ we properly share alias-sets across languages and TUs. Do not -+ expose the common nodes as type merge target - those that should be -+ are already exposed so by pre-loading the LTO streamer caches. -+ Do two passes - first clear TYPE_CANONICAL and then re-compute it. */ -+ for (i = 0; i < itk_none; ++i) -+ lto_register_canonical_types (integer_types[i], true); -+ for (i = 0; i < stk_type_kind_last; ++i) -+ lto_register_canonical_types (sizetype_tab[i], true); -+ for (i = 0; i < TI_MAX; ++i) -+ lto_register_canonical_types (global_trees[i], true); -+ for (i = 0; i < itk_none; ++i) -+ lto_register_canonical_types (integer_types[i], false); -+ for (i = 0; i < stk_type_kind_last; ++i) -+ lto_register_canonical_types (sizetype_tab[i], false); -+ for (i = 0; i < TI_MAX; ++i) -+ lto_register_canonical_types (global_trees[i], false); -+ -+ if (!quiet_flag) -+ fprintf (stderr, "Reading object files:"); -+ -+ /* Read all of the object files specified on the command line. */ -+ for (i = 0, last_file_ix = 0; i < nfiles; ++i) -+ { -+ struct lto_file_decl_data *file_data = NULL; -+ if (!quiet_flag) -+ { -+ fprintf (stderr, " %s", fnames[i]); -+ fflush (stderr); -+ } -+ -+ current_lto_file = lto_obj_file_open (fnames[i], false); -+ if (!current_lto_file) -+ break; -+ -+ file_data = lto_file_read (current_lto_file, resolution, &count); -+ if (!file_data) -+ { -+ lto_obj_file_close (current_lto_file); -+ free (current_lto_file); -+ current_lto_file = NULL; -+ break; -+ } -+ -+ decl_data[last_file_ix++] = file_data; -+ -+ lto_obj_file_close (current_lto_file); -+ free (current_lto_file); -+ current_lto_file = NULL; -+ } -+ -+ lto_flatten_files (decl_data, count, last_file_ix); -+ lto_stats.num_input_files = count; -+ ggc_free(decl_data); -+ real_file_decl_data = NULL; -+ -+ if (resolution_file_name) -+ fclose (resolution); -+ -+ /* Show the LTO report before launching LTRANS. */ -+ if (flag_lto_report || (flag_wpa && flag_lto_report_wpa)) -+ print_lto_report_1 (); -+ -+ /* Free gimple type merging datastructures. */ -+ delete tree_scc_hash; -+ tree_scc_hash = NULL; -+ obstack_free (&tree_scc_hash_obstack, NULL); -+ htab_delete (gimple_canonical_types); -+ gimple_canonical_types = NULL; -+ delete canonical_type_hash_cache; -+ canonical_type_hash_cache = NULL; -+ -+ /* At this stage we know that majority of GGC memory is reachable. -+ Growing the limits prevents unnecesary invocation of GGC. */ -+ ggc_grow (); -+ ggc_collect (); -+ -+ /* Set the hooks so that all of the ipa passes can read in their data. */ -+ lto_set_in_hooks (all_file_decl_data, get_section_data, free_section_data); -+ -+ timevar_pop (TV_IPA_LTO_DECL_IN); -+ -+ if (!quiet_flag) -+ fprintf (stderr, "\nReading the callgraph\n"); -+ -+ timevar_push (TV_IPA_LTO_CGRAPH_IO); -+ /* Read the symtab. */ -+ input_symtab (); -+ -+ input_offload_tables (!flag_ltrans); -+ -+ /* Store resolutions into the symbol table. */ -+ -+ FOR_EACH_SYMBOL (snode) -+ if (snode->externally_visible && snode->real_symbol_p () -+ && snode->lto_file_data && snode->lto_file_data->resolution_map -+ && !(TREE_CODE (snode->decl) == FUNCTION_DECL -+ && fndecl_built_in_p (snode->decl)) -+ && !(VAR_P (snode->decl) && DECL_HARD_REGISTER (snode->decl))) -+ { -+ ld_plugin_symbol_resolution_t *res; -+ -+ res = snode->lto_file_data->resolution_map->get (snode->decl); -+ if (!res || *res == LDPR_UNKNOWN) -+ { -+ if (snode->output_to_lto_symbol_table_p ()) -+ fatal_error (input_location, "missing resolution data for %s", -+ IDENTIFIER_POINTER -+ (DECL_ASSEMBLER_NAME (snode->decl))); -+ } -+ else -+ snode->resolution = *res; -+ } -+ for (i = 0; all_file_decl_data[i]; i++) -+ if (all_file_decl_data[i]->resolution_map) -+ { -+ delete all_file_decl_data[i]->resolution_map; -+ all_file_decl_data[i]->resolution_map = NULL; -+ } -+ -+ timevar_pop (TV_IPA_LTO_CGRAPH_IO); -+ -+ if (!quiet_flag) -+ fprintf (stderr, "Merging declarations\n"); -+ -+ timevar_push (TV_IPA_LTO_DECL_MERGE); -+ /* Merge global decls. In ltrans mode we read merged cgraph, we do not -+ need to care about resolving symbols again, we only need to replace -+ duplicated declarations read from the callgraph and from function -+ sections. */ -+ if (!flag_ltrans) -+ { -+ lto_symtab_merge_decls (); -+ -+ /* If there were errors during symbol merging bail out, we have no -+ good way to recover here. */ -+ if (seen_error ()) -+ fatal_error (input_location, -+ "errors during merging of translation units"); -+ -+ /* Fixup all decls. */ -+ lto_fixup_decls (all_file_decl_data); -+ } -+ if (tree_with_vars) -+ ggc_free (tree_with_vars); -+ tree_with_vars = NULL; -+ ggc_collect (); -+ -+ timevar_pop (TV_IPA_LTO_DECL_MERGE); -+ /* Each pass will set the appropriate timer. */ -+ -+ if (!quiet_flag) -+ fprintf (stderr, "Reading summaries\n"); -+ -+ /* Read the IPA summary data. */ -+ if (flag_ltrans) -+ ipa_read_optimization_summaries (); -+ else -+ ipa_read_summaries (); -+ -+ for (i = 0; all_file_decl_data[i]; i++) -+ { -+ gcc_assert (all_file_decl_data[i]->symtab_node_encoder); -+ lto_symtab_encoder_delete (all_file_decl_data[i]->symtab_node_encoder); -+ all_file_decl_data[i]->symtab_node_encoder = NULL; -+ lto_free_function_in_decl_state (all_file_decl_data[i]->global_decl_state); -+ all_file_decl_data[i]->global_decl_state = NULL; -+ all_file_decl_data[i]->current_decl_state = NULL; -+ } -+ -+ if (!flag_ltrans) -+ { -+ /* Finally merge the cgraph according to the decl merging decisions. */ -+ timevar_push (TV_IPA_LTO_CGRAPH_MERGE); -+ -+ gcc_assert (!dump_file); -+ dump_file = dump_begin (lto_link_dump_id, NULL); -+ -+ if (dump_file) -+ { -+ fprintf (dump_file, "Before merging:\n"); -+ symtab->dump (dump_file); -+ } -+ lto_symtab_merge_symbols (); -+ /* Removal of unreachable symbols is needed to make verify_symtab to pass; -+ we are still having duplicated comdat groups containing local statics. -+ We could also just remove them while merging. */ -+ symtab->remove_unreachable_nodes (dump_file); -+ ggc_collect (); -+ -+ if (dump_file) -+ dump_end (lto_link_dump_id, dump_file); -+ dump_file = NULL; -+ timevar_pop (TV_IPA_LTO_CGRAPH_MERGE); -+ } -+ symtab->state = IPA_SSA; -+ /* All node removals happening here are useless, because -+ WPA should not stream them. Still always perform remove_unreachable_nodes -+ because we may reshape clone tree, get rid of dead masters of inline -+ clones and remove symbol entries for read-only variables we keep around -+ only to be able to constant fold them. */ -+ if (flag_ltrans) -+ { -+ if (symtab->dump_file) -+ symtab->dump (symtab->dump_file); -+ symtab->remove_unreachable_nodes (symtab->dump_file); -+ } -+ -+ /* Indicate that the cgraph is built and ready. */ -+ symtab->function_flags_ready = true; -+ -+ ggc_free (all_file_decl_data); -+ all_file_decl_data = NULL; -+} -+ -+ -+ -+/* Show various memory usage statistics related to LTO. */ -+void -+print_lto_report_1 (void) -+{ -+ const char *pfx = (flag_lto) ? "LTO" : (flag_wpa) ? "WPA" : "LTRANS"; -+ fprintf (stderr, "%s statistics\n", pfx); -+ -+ fprintf (stderr, "[%s] read %lu SCCs of average size %f\n", -+ pfx, num_sccs_read, total_scc_size / (double)num_sccs_read); -+ fprintf (stderr, "[%s] %lu tree bodies read in total\n", pfx, total_scc_size); -+ if (flag_wpa && tree_scc_hash) -+ { -+ fprintf (stderr, "[%s] tree SCC table: size %ld, %ld elements, " -+ "collision ratio: %f\n", pfx, -+ (long) tree_scc_hash->size (), -+ (long) tree_scc_hash->elements (), -+ tree_scc_hash->collisions ()); -+ hash_table::iterator hiter; -+ tree_scc *scc, *max_scc = NULL; -+ unsigned max_length = 0; -+ FOR_EACH_HASH_TABLE_ELEMENT (*tree_scc_hash, scc, x, hiter) -+ { -+ unsigned length = 0; -+ tree_scc *s = scc; -+ for (; s; s = s->next) -+ length++; -+ if (length > max_length) -+ { -+ max_length = length; -+ max_scc = scc; -+ } -+ } -+ fprintf (stderr, "[%s] tree SCC max chain length %u (size %u)\n", -+ pfx, max_length, max_scc->len); -+ fprintf (stderr, "[%s] Compared %lu SCCs, %lu collisions (%f)\n", pfx, -+ num_scc_compares, num_scc_compare_collisions, -+ num_scc_compare_collisions / (double) num_scc_compares); -+ fprintf (stderr, "[%s] Merged %lu SCCs\n", pfx, num_sccs_merged); -+ fprintf (stderr, "[%s] Merged %lu tree bodies\n", pfx, -+ total_scc_size_merged); -+ fprintf (stderr, "[%s] Merged %lu types\n", pfx, num_merged_types); -+ fprintf (stderr, "[%s] %lu types prevailed (%lu associated trees)\n", -+ pfx, num_prevailing_types, num_type_scc_trees); -+ fprintf (stderr, "[%s] GIMPLE canonical type table: size %ld, " -+ "%ld elements, %ld searches, %ld collisions (ratio: %f)\n", pfx, -+ (long) htab_size (gimple_canonical_types), -+ (long) htab_elements (gimple_canonical_types), -+ (long) gimple_canonical_types->searches, -+ (long) gimple_canonical_types->collisions, -+ htab_collisions (gimple_canonical_types)); -+ fprintf (stderr, "[%s] GIMPLE canonical type pointer-map: " -+ "%lu elements, %ld searches\n", pfx, -+ num_canonical_type_hash_entries, -+ num_canonical_type_hash_queries); -+ } -+ -+ print_lto_report (pfx); -+} -+ -+GTY(()) tree lto_eh_personality_decl; -+ -+/* Return the LTO personality function decl. */ -+ -+tree -+lto_eh_personality (void) -+{ -+ if (!lto_eh_personality_decl) -+ { -+ /* Use the first personality DECL for our personality if we don't -+ support multiple ones. This ensures that we don't artificially -+ create the need for them in a single-language program. */ -+ if (first_personality_decl && !dwarf2out_do_cfi_asm ()) -+ lto_eh_personality_decl = first_personality_decl; -+ else -+ lto_eh_personality_decl = lhd_gcc_personality (); -+ } -+ -+ return lto_eh_personality_decl; -+} -+ -+/* Set the process name based on the LTO mode. */ -+ -+static void -+lto_process_name (void) -+{ -+ if (flag_lto) -+ setproctitle (flag_incremental_link == INCREMENTAL_LINK_LTO -+ ? "lto1-inclink" : "lto1-lto"); -+ if (flag_wpa) -+ setproctitle ("lto1-wpa"); -+ if (flag_ltrans) -+ setproctitle ("lto1-ltrans"); -+} -+ -+ -+/* Initialize the LTO front end. */ -+ -+void -+lto_fe_init (void) -+{ -+ lto_process_name (); -+ lto_streamer_hooks_init (); -+ lto_reader_init (); -+ lto_set_in_hooks (NULL, get_section_data, free_section_data); -+ memset (<o_stats, 0, sizeof (lto_stats)); -+ bitmap_obstack_initialize (NULL); -+ gimple_register_cfg_hooks (); -+#ifndef ACCEL_COMPILER -+ unsigned char *table -+ = ggc_vec_alloc (MAX_MACHINE_MODE); -+ for (int m = 0; m < MAX_MACHINE_MODE; m++) -+ table[m] = m; -+ lto_mode_identity_table = table; -+#endif -+} -+ -+#include "gt-lto-lto-common.h" -diff --git a/gcc/lto/lto-common.h b/gcc/lto/lto-common.h -new file mode 100644 -index 000000000..b1209a3a3 ---- /dev/null -+++ b/gcc/lto/lto-common.h -@@ -0,0 +1,33 @@ -+/* LTO common functions between lto.c and lto-dump.c header file. -+ Copyright (C) 2018 Free Software Foundation, Inc. -+ -+This file is part of GCC. -+ -+GCC is free software; you can redistribute it and/or modify it under -+the terms of the GNU General Public License as published by the Free -+Software Foundation; either version 3, or (at your option) any later -+version. -+ -+GCC is distributed in the hope that it will be useful, but WITHOUT ANY -+WARRANTY; without even the implied warranty of MERCHANTABILITY or -+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+for more details. -+ -+You should have received a copy of the GNU General Public License -+along with GCC; see the file COPYING3. If not see -+. */ -+ -+#ifndef LTO_COMMON_H -+#define LTO_COMMON_H -+ -+void lto_fe_init (void); -+void read_cgraph_and_symbols (unsigned, const char **); -+void print_lto_report_1 (void); -+ -+extern tree lto_eh_personality_decl; -+extern GTY(()) vec *tree_with_vars; -+extern const unsigned char *lto_mode_identity_table; -+extern tree first_personality_decl; -+ -+#endif -+ -diff --git a/gcc/lto/lto-lang.c b/gcc/lto/lto-lang.c -index 4ef228fcb..1d35db11e 100644 ---- a/gcc/lto/lto-lang.c -+++ b/gcc/lto/lto-lang.c -@@ -34,6 +34,7 @@ along with GCC; see the file COPYING3. If not see - #include "debug.h" - #include "lto-tree.h" - #include "lto.h" -+#include "lto-common.h" - #include "stringpool.h" - #include "attribs.h" - -diff --git a/gcc/lto/lto-symtab.c b/gcc/lto/lto-symtab.c -index 63a633302..2fd5b1e8f 100644 ---- a/gcc/lto/lto-symtab.c -+++ b/gcc/lto/lto-symtab.c -@@ -556,7 +556,8 @@ lto_symtab_merge_p (tree prevailing, tree decl) - } - if (fndecl_built_in_p (prevailing) - && (DECL_BUILT_IN_CLASS (prevailing) != DECL_BUILT_IN_CLASS (decl) -- || DECL_FUNCTION_CODE (prevailing) != DECL_FUNCTION_CODE (decl))) -+ || (DECL_UNCHECKED_FUNCTION_CODE (prevailing) -+ != DECL_UNCHECKED_FUNCTION_CODE (decl)))) - { - if (dump_file) - fprintf (dump_file, "Not merging decls; " -diff --git a/gcc/lto/lto.c b/gcc/lto/lto.c -index 4db156fdf..c44e034a2 100644 ---- a/gcc/lto/lto.c -+++ b/gcc/lto/lto.c -@@ -38,7 +38,6 @@ along with GCC; see the file COPYING3. If not see - #include "symbol-summary.h" - #include "tree-vrp.h" - #include "ipa-prop.h" --#include "common.h" - #include "debug.h" - #include "lto.h" - #include "lto-section-names.h" -@@ -55,122 +54,12 @@ along with GCC; see the file COPYING3. If not see - #include "fold-const.h" - #include "attribs.h" - #include "builtins.h" -+#include "lto-common.h" - - - /* Number of parallel tasks to run, -1 if we want to use GNU Make jobserver. */ - static int lto_parallelism; - --static GTY(()) tree first_personality_decl; -- --static GTY(()) const unsigned char *lto_mode_identity_table; -- --/* Returns a hash code for P. */ -- --static hashval_t --hash_name (const void *p) --{ -- const struct lto_section_slot *ds = (const struct lto_section_slot *) p; -- return (hashval_t) htab_hash_string (ds->name); --} -- -- --/* Returns nonzero if P1 and P2 are equal. */ -- --static int --eq_name (const void *p1, const void *p2) --{ -- const struct lto_section_slot *s1 = -- (const struct lto_section_slot *) p1; -- const struct lto_section_slot *s2 = -- (const struct lto_section_slot *) p2; -- -- return strcmp (s1->name, s2->name) == 0; --} -- --/* Free lto_section_slot */ -- --static void --free_with_string (void *arg) --{ -- struct lto_section_slot *s = (struct lto_section_slot *)arg; -- -- free (CONST_CAST (char *, s->name)); -- free (arg); --} -- --/* Create section hash table */ -- --htab_t --lto_obj_create_section_hash_table (void) --{ -- return htab_create (37, hash_name, eq_name, free_with_string); --} -- --/* Delete an allocated integer KEY in the splay tree. */ -- --static void --lto_splay_tree_delete_id (splay_tree_key key) --{ -- free ((void *) key); --} -- --/* Compare splay tree node ids A and B. */ -- --static int --lto_splay_tree_compare_ids (splay_tree_key a, splay_tree_key b) --{ -- unsigned HOST_WIDE_INT ai; -- unsigned HOST_WIDE_INT bi; -- -- ai = *(unsigned HOST_WIDE_INT *) a; -- bi = *(unsigned HOST_WIDE_INT *) b; -- -- if (ai < bi) -- return -1; -- else if (ai > bi) -- return 1; -- return 0; --} -- --/* Look up splay tree node by ID in splay tree T. */ -- --static splay_tree_node --lto_splay_tree_lookup (splay_tree t, unsigned HOST_WIDE_INT id) --{ -- return splay_tree_lookup (t, (splay_tree_key) &id); --} -- --/* Check if KEY has ID. */ -- --static bool --lto_splay_tree_id_equal_p (splay_tree_key key, unsigned HOST_WIDE_INT id) --{ -- return *(unsigned HOST_WIDE_INT *) key == id; --} -- --/* Insert a splay tree node into tree T with ID as key and FILE_DATA as value. -- The ID is allocated separately because we need HOST_WIDE_INTs which may -- be wider than a splay_tree_key. */ -- --static void --lto_splay_tree_insert (splay_tree t, unsigned HOST_WIDE_INT id, -- struct lto_file_decl_data *file_data) --{ -- unsigned HOST_WIDE_INT *idp = XCNEW (unsigned HOST_WIDE_INT); -- *idp = id; -- splay_tree_insert (t, (splay_tree_key) idp, (splay_tree_value) file_data); --} -- --/* Create a splay tree. */ -- --static splay_tree --lto_splay_tree_new (void) --{ -- return splay_tree_new (lto_splay_tree_compare_ids, -- lto_splay_tree_delete_id, -- NULL); --} -- - /* Return true when NODE has a clone that is analyzed (i.e. we need - to load its body even if the node itself is not needed). */ - -@@ -224,2083 +113,45 @@ lto_materialize_function (struct cgraph_node *node) - rest_of_decl_compilation (decl, 1, 0); - } - -- --/* Decode the content of memory pointed to by DATA in the in decl -- state object STATE. DATA_IN points to a data_in structure for -- decoding. Return the address after the decoded object in the -- input. */ -- --static const uint32_t * --lto_read_in_decl_state (struct data_in *data_in, const uint32_t *data, -- struct lto_in_decl_state *state) --{ -- uint32_t ix; -- tree decl; -- uint32_t i, j; -- -- ix = *data++; -- state->compressed = ix & 1; -- ix /= 2; -- decl = streamer_tree_cache_get_tree (data_in->reader_cache, ix); -- if (!VAR_OR_FUNCTION_DECL_P (decl)) -- { -- gcc_assert (decl == void_type_node); -- decl = NULL_TREE; -- } -- state->fn_decl = decl; -- -- for (i = 0; i < LTO_N_DECL_STREAMS; i++) -- { -- uint32_t size = *data++; -- vec *decls = NULL; -- vec_alloc (decls, size); -- -- for (j = 0; j < size; j++) -- vec_safe_push (decls, -- streamer_tree_cache_get_tree (data_in->reader_cache, -- data[j])); -- -- state->streams[i] = decls; -- data += size; -- } -- -- return data; --} -- -- --/* Global canonical type table. */ --static htab_t gimple_canonical_types; --static hash_map *canonical_type_hash_cache; --static unsigned long num_canonical_type_hash_entries; --static unsigned long num_canonical_type_hash_queries; -- --static void iterative_hash_canonical_type (tree type, inchash::hash &hstate); --static hashval_t gimple_canonical_type_hash (const void *p); --static void gimple_register_canonical_type_1 (tree t, hashval_t hash); -- --/* Returning a hash value for gimple type TYPE. -- -- The hash value returned is equal for types considered compatible -- by gimple_canonical_types_compatible_p. */ -- --static hashval_t --hash_canonical_type (tree type) --{ -- inchash::hash hstate; -- enum tree_code code; -- -- /* We compute alias sets only for types that needs them. -- Be sure we do not recurse to something else as we cannot hash incomplete -- types in a way they would have same hash value as compatible complete -- types. */ -- gcc_checking_assert (type_with_alias_set_p (type)); -- -- /* Combine a few common features of types so that types are grouped into -- smaller sets; when searching for existing matching types to merge, -- only existing types having the same features as the new type will be -- checked. */ -- code = tree_code_for_canonical_type_merging (TREE_CODE (type)); -- hstate.add_int (code); -- hstate.add_int (TYPE_MODE (type)); -- -- /* Incorporate common features of numerical types. */ -- if (INTEGRAL_TYPE_P (type) -- || SCALAR_FLOAT_TYPE_P (type) -- || FIXED_POINT_TYPE_P (type) -- || TREE_CODE (type) == OFFSET_TYPE -- || POINTER_TYPE_P (type)) -- { -- hstate.add_int (TYPE_PRECISION (type)); -- if (!type_with_interoperable_signedness (type)) -- hstate.add_int (TYPE_UNSIGNED (type)); -- } -- -- if (VECTOR_TYPE_P (type)) -- { -- hstate.add_poly_int (TYPE_VECTOR_SUBPARTS (type)); -- hstate.add_int (TYPE_UNSIGNED (type)); -- } -- -- if (TREE_CODE (type) == COMPLEX_TYPE) -- hstate.add_int (TYPE_UNSIGNED (type)); -- -- /* Fortran's C_SIGNED_CHAR is !TYPE_STRING_FLAG but needs to be -- interoperable with "signed char". Unless all frontends are revisited to -- agree on these types, we must ignore the flag completely. */ -- -- /* Fortran standard define C_PTR type that is compatible with every -- C pointer. For this reason we need to glob all pointers into one. -- Still pointers in different address spaces are not compatible. */ -- if (POINTER_TYPE_P (type)) -- hstate.add_int (TYPE_ADDR_SPACE (TREE_TYPE (type))); -- -- /* For array types hash the domain bounds and the string flag. */ -- if (TREE_CODE (type) == ARRAY_TYPE && TYPE_DOMAIN (type)) -- { -- hstate.add_int (TYPE_STRING_FLAG (type)); -- /* OMP lowering can introduce error_mark_node in place of -- random local decls in types. */ -- if (TYPE_MIN_VALUE (TYPE_DOMAIN (type)) != error_mark_node) -- inchash::add_expr (TYPE_MIN_VALUE (TYPE_DOMAIN (type)), hstate); -- if (TYPE_MAX_VALUE (TYPE_DOMAIN (type)) != error_mark_node) -- inchash::add_expr (TYPE_MAX_VALUE (TYPE_DOMAIN (type)), hstate); -- } -- -- /* Recurse for aggregates with a single element type. */ -- if (TREE_CODE (type) == ARRAY_TYPE -- || TREE_CODE (type) == COMPLEX_TYPE -- || TREE_CODE (type) == VECTOR_TYPE) -- iterative_hash_canonical_type (TREE_TYPE (type), hstate); -- -- /* Incorporate function return and argument types. */ -- if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE) -- { -- unsigned na; -- tree p; -- -- iterative_hash_canonical_type (TREE_TYPE (type), hstate); -- -- for (p = TYPE_ARG_TYPES (type), na = 0; p; p = TREE_CHAIN (p)) -- { -- iterative_hash_canonical_type (TREE_VALUE (p), hstate); -- na++; -- } -- -- hstate.add_int (na); -- } -- -- if (RECORD_OR_UNION_TYPE_P (type)) -- { -- unsigned nf; -- tree f; -- -- for (f = TYPE_FIELDS (type), nf = 0; f; f = TREE_CHAIN (f)) -- if (TREE_CODE (f) == FIELD_DECL -- && (! DECL_SIZE (f) -- || ! integer_zerop (DECL_SIZE (f)))) -- { -- iterative_hash_canonical_type (TREE_TYPE (f), hstate); -- nf++; -- } -- -- hstate.add_int (nf); -- } -- -- return hstate.end(); --} -- --/* Returning a hash value for gimple type TYPE combined with VAL. */ -- --static void --iterative_hash_canonical_type (tree type, inchash::hash &hstate) --{ -- hashval_t v; -- -- /* All type variants have same TYPE_CANONICAL. */ -- type = TYPE_MAIN_VARIANT (type); -- -- if (!canonical_type_used_p (type)) -- v = hash_canonical_type (type); -- /* An already processed type. */ -- else if (TYPE_CANONICAL (type)) -- { -- type = TYPE_CANONICAL (type); -- v = gimple_canonical_type_hash (type); -- } -- else -- { -- /* Canonical types should not be able to form SCCs by design, this -- recursion is just because we do not register canonical types in -- optimal order. To avoid quadratic behavior also register the -- type here. */ -- v = hash_canonical_type (type); -- gimple_register_canonical_type_1 (type, v); -- } -- hstate.add_int (v); --} -- --/* Returns the hash for a canonical type P. */ -- --static hashval_t --gimple_canonical_type_hash (const void *p) --{ -- num_canonical_type_hash_queries++; -- hashval_t *slot = canonical_type_hash_cache->get ((const_tree) p); -- gcc_assert (slot != NULL); -- return *slot; --} -- -- -- --/* Returns nonzero if P1 and P2 are equal. */ -- --static int --gimple_canonical_type_eq (const void *p1, const void *p2) --{ -- const_tree t1 = (const_tree) p1; -- const_tree t2 = (const_tree) p2; -- return gimple_canonical_types_compatible_p (CONST_CAST_TREE (t1), -- CONST_CAST_TREE (t2)); --} -- --/* Main worker for gimple_register_canonical_type. */ -- --static void --gimple_register_canonical_type_1 (tree t, hashval_t hash) --{ -- void **slot; -- -- gcc_checking_assert (TYPE_P (t) && !TYPE_CANONICAL (t) -- && type_with_alias_set_p (t) -- && canonical_type_used_p (t)); -- -- slot = htab_find_slot_with_hash (gimple_canonical_types, t, hash, INSERT); -- if (*slot) -- { -- tree new_type = (tree)(*slot); -- gcc_checking_assert (new_type != t); -- TYPE_CANONICAL (t) = new_type; -- } -- else -- { -- TYPE_CANONICAL (t) = t; -- *slot = (void *) t; -- /* Cache the just computed hash value. */ -- num_canonical_type_hash_entries++; -- bool existed_p = canonical_type_hash_cache->put (t, hash); -- gcc_assert (!existed_p); -- } --} -- --/* Register type T in the global type table gimple_types and set -- TYPE_CANONICAL of T accordingly. -- This is used by LTO to merge structurally equivalent types for -- type-based aliasing purposes across different TUs and languages. -- -- ??? This merging does not exactly match how the tree.c middle-end -- functions will assign TYPE_CANONICAL when new types are created -- during optimization (which at least happens for pointer and array -- types). */ -- --static void --gimple_register_canonical_type (tree t) --{ -- if (TYPE_CANONICAL (t) || !type_with_alias_set_p (t) -- || !canonical_type_used_p (t)) -- return; -- -- /* Canonical types are same among all complete variants. */ -- if (TYPE_CANONICAL (TYPE_MAIN_VARIANT (t))) -- TYPE_CANONICAL (t) = TYPE_CANONICAL (TYPE_MAIN_VARIANT (t)); -- else -- { -- gimple_register_canonical_type_1 (TYPE_MAIN_VARIANT (t), -- hash_canonical_type (TYPE_MAIN_VARIANT (t))); -- TYPE_CANONICAL (t) = TYPE_CANONICAL (TYPE_MAIN_VARIANT (t)); -- } --} -- --/* Re-compute TYPE_CANONICAL for NODE and related types. */ -+/* Materialize all the bodies for all the nodes in the callgraph. */ - - static void --lto_register_canonical_types (tree node, bool first_p) --{ -- if (!node -- || !TYPE_P (node)) -- return; -- -- if (first_p) -- TYPE_CANONICAL (node) = NULL_TREE; -- -- if (POINTER_TYPE_P (node) -- || TREE_CODE (node) == COMPLEX_TYPE -- || TREE_CODE (node) == ARRAY_TYPE) -- lto_register_canonical_types (TREE_TYPE (node), first_p); -- -- if (!first_p) -- gimple_register_canonical_type (node); --} -- -- --/* Remember trees that contains references to declarations. */ --static GTY(()) vec *tree_with_vars; -- --#define CHECK_VAR(tt) \ -- do \ -- { \ -- if ((tt) && VAR_OR_FUNCTION_DECL_P (tt) \ -- && (TREE_PUBLIC (tt) || DECL_EXTERNAL (tt))) \ -- return true; \ -- } while (0) -- --#define CHECK_NO_VAR(tt) \ -- gcc_checking_assert (!(tt) || !VAR_OR_FUNCTION_DECL_P (tt)) -- --/* Check presence of pointers to decls in fields of a tree_typed T. */ -- --static inline bool --mentions_vars_p_typed (tree t) --{ -- CHECK_NO_VAR (TREE_TYPE (t)); -- return false; --} -- --/* Check presence of pointers to decls in fields of a tree_common T. */ -- --static inline bool --mentions_vars_p_common (tree t) --{ -- if (mentions_vars_p_typed (t)) -- return true; -- CHECK_NO_VAR (TREE_CHAIN (t)); -- return false; --} -- --/* Check presence of pointers to decls in fields of a decl_minimal T. */ -- --static inline bool --mentions_vars_p_decl_minimal (tree t) --{ -- if (mentions_vars_p_common (t)) -- return true; -- CHECK_NO_VAR (DECL_NAME (t)); -- CHECK_VAR (DECL_CONTEXT (t)); -- return false; --} -- --/* Check presence of pointers to decls in fields of a decl_common T. */ -- --static inline bool --mentions_vars_p_decl_common (tree t) --{ -- if (mentions_vars_p_decl_minimal (t)) -- return true; -- CHECK_VAR (DECL_SIZE (t)); -- CHECK_VAR (DECL_SIZE_UNIT (t)); -- CHECK_VAR (DECL_INITIAL (t)); -- CHECK_NO_VAR (DECL_ATTRIBUTES (t)); -- CHECK_VAR (DECL_ABSTRACT_ORIGIN (t)); -- return false; --} -- --/* Check presence of pointers to decls in fields of a decl_with_vis T. */ -- --static inline bool --mentions_vars_p_decl_with_vis (tree t) --{ -- if (mentions_vars_p_decl_common (t)) -- return true; -- -- /* Accessor macro has side-effects, use field-name here. */ -- CHECK_NO_VAR (DECL_ASSEMBLER_NAME_RAW (t)); -- return false; --} -- --/* Check presence of pointers to decls in fields of a decl_non_common T. */ -- --static inline bool --mentions_vars_p_decl_non_common (tree t) --{ -- if (mentions_vars_p_decl_with_vis (t)) -- return true; -- CHECK_NO_VAR (DECL_RESULT_FLD (t)); -- return false; --} -- --/* Check presence of pointers to decls in fields of a decl_non_common T. */ -- --static bool --mentions_vars_p_function (tree t) --{ -- if (mentions_vars_p_decl_non_common (t)) -- return true; -- CHECK_NO_VAR (DECL_ARGUMENTS (t)); -- CHECK_NO_VAR (DECL_VINDEX (t)); -- CHECK_VAR (DECL_FUNCTION_PERSONALITY (t)); -- return false; --} -- --/* Check presence of pointers to decls in fields of a field_decl T. */ -- --static bool --mentions_vars_p_field_decl (tree t) --{ -- if (mentions_vars_p_decl_common (t)) -- return true; -- CHECK_VAR (DECL_FIELD_OFFSET (t)); -- CHECK_NO_VAR (DECL_BIT_FIELD_TYPE (t)); -- CHECK_NO_VAR (DECL_QUALIFIER (t)); -- CHECK_NO_VAR (DECL_FIELD_BIT_OFFSET (t)); -- CHECK_NO_VAR (DECL_FCONTEXT (t)); -- return false; --} -- --/* Check presence of pointers to decls in fields of a type T. */ -- --static bool --mentions_vars_p_type (tree t) --{ -- if (mentions_vars_p_common (t)) -- return true; -- CHECK_NO_VAR (TYPE_CACHED_VALUES (t)); -- CHECK_VAR (TYPE_SIZE (t)); -- CHECK_VAR (TYPE_SIZE_UNIT (t)); -- CHECK_NO_VAR (TYPE_ATTRIBUTES (t)); -- CHECK_NO_VAR (TYPE_NAME (t)); -- -- CHECK_VAR (TYPE_MIN_VALUE_RAW (t)); -- CHECK_VAR (TYPE_MAX_VALUE_RAW (t)); -- -- /* Accessor is for derived node types only. */ -- CHECK_NO_VAR (TYPE_LANG_SLOT_1 (t)); -- -- CHECK_VAR (TYPE_CONTEXT (t)); -- CHECK_NO_VAR (TYPE_CANONICAL (t)); -- CHECK_NO_VAR (TYPE_MAIN_VARIANT (t)); -- CHECK_NO_VAR (TYPE_NEXT_VARIANT (t)); -- return false; --} -- --/* Check presence of pointers to decls in fields of a BINFO T. */ -- --static bool --mentions_vars_p_binfo (tree t) --{ -- unsigned HOST_WIDE_INT i, n; -- -- if (mentions_vars_p_common (t)) -- return true; -- CHECK_VAR (BINFO_VTABLE (t)); -- CHECK_NO_VAR (BINFO_OFFSET (t)); -- CHECK_NO_VAR (BINFO_VIRTUALS (t)); -- CHECK_NO_VAR (BINFO_VPTR_FIELD (t)); -- n = vec_safe_length (BINFO_BASE_ACCESSES (t)); -- for (i = 0; i < n; i++) -- CHECK_NO_VAR (BINFO_BASE_ACCESS (t, i)); -- /* Do not walk BINFO_INHERITANCE_CHAIN, BINFO_SUBVTT_INDEX -- and BINFO_VPTR_INDEX; these are used by C++ FE only. */ -- n = BINFO_N_BASE_BINFOS (t); -- for (i = 0; i < n; i++) -- CHECK_NO_VAR (BINFO_BASE_BINFO (t, i)); -- return false; --} -- --/* Check presence of pointers to decls in fields of a CONSTRUCTOR T. */ -- --static bool --mentions_vars_p_constructor (tree t) --{ -- unsigned HOST_WIDE_INT idx; -- constructor_elt *ce; -- -- if (mentions_vars_p_typed (t)) -- return true; -- -- for (idx = 0; vec_safe_iterate (CONSTRUCTOR_ELTS (t), idx, &ce); idx++) -- { -- CHECK_NO_VAR (ce->index); -- CHECK_VAR (ce->value); -- } -- return false; --} -- --/* Check presence of pointers to decls in fields of an expression tree T. */ -- --static bool --mentions_vars_p_expr (tree t) --{ -- int i; -- if (mentions_vars_p_typed (t)) -- return true; -- for (i = TREE_OPERAND_LENGTH (t) - 1; i >= 0; --i) -- CHECK_VAR (TREE_OPERAND (t, i)); -- return false; --} -- --/* Check presence of pointers to decls in fields of an OMP_CLAUSE T. */ -- --static bool --mentions_vars_p_omp_clause (tree t) --{ -- int i; -- if (mentions_vars_p_common (t)) -- return true; -- for (i = omp_clause_num_ops[OMP_CLAUSE_CODE (t)] - 1; i >= 0; --i) -- CHECK_VAR (OMP_CLAUSE_OPERAND (t, i)); -- return false; --} -- --/* Check presence of pointers to decls that needs later fixup in T. */ -- --static bool --mentions_vars_p (tree t) -+materialize_cgraph (void) - { -- switch (TREE_CODE (t)) -- { -- case IDENTIFIER_NODE: -- break; -- -- case TREE_LIST: -- CHECK_VAR (TREE_VALUE (t)); -- CHECK_VAR (TREE_PURPOSE (t)); -- CHECK_NO_VAR (TREE_CHAIN (t)); -- break; -- -- case FIELD_DECL: -- return mentions_vars_p_field_decl (t); -- -- case LABEL_DECL: -- case CONST_DECL: -- case PARM_DECL: -- case RESULT_DECL: -- case IMPORTED_DECL: -- case NAMESPACE_DECL: -- case NAMELIST_DECL: -- return mentions_vars_p_decl_common (t); -- -- case VAR_DECL: -- return mentions_vars_p_decl_with_vis (t); -- -- case TYPE_DECL: -- return mentions_vars_p_decl_non_common (t); -- -- case FUNCTION_DECL: -- return mentions_vars_p_function (t); -- -- case TREE_BINFO: -- return mentions_vars_p_binfo (t); -- -- case PLACEHOLDER_EXPR: -- return mentions_vars_p_common (t); -- -- case BLOCK: -- case TRANSLATION_UNIT_DECL: -- case OPTIMIZATION_NODE: -- case TARGET_OPTION_NODE: -- break; -- -- case CONSTRUCTOR: -- return mentions_vars_p_constructor (t); -- -- case OMP_CLAUSE: -- return mentions_vars_p_omp_clause (t); -- -- default: -- if (TYPE_P (t)) -- { -- if (mentions_vars_p_type (t)) -- return true; -- } -- else if (EXPR_P (t)) -- { -- if (mentions_vars_p_expr (t)) -- return true; -- } -- else if (CONSTANT_CLASS_P (t)) -- CHECK_NO_VAR (TREE_TYPE (t)); -- else -- gcc_unreachable (); -- } -- return false; --} -- -- --/* Return the resolution for the decl with index INDEX from DATA_IN. */ -- --static enum ld_plugin_symbol_resolution --get_resolution (struct data_in *data_in, unsigned index) --{ -- if (data_in->globals_resolution.exists ()) -- { -- ld_plugin_symbol_resolution_t ret; -- /* We can have references to not emitted functions in -- DECL_FUNCTION_PERSONALITY at least. So we can and have -- to indeed return LDPR_UNKNOWN in some cases. */ -- if (data_in->globals_resolution.length () <= index) -- return LDPR_UNKNOWN; -- ret = data_in->globals_resolution[index]; -- return ret; -- } -- else -- /* Delay resolution finding until decl merging. */ -- return LDPR_UNKNOWN; --} -- --/* We need to record resolutions until symbol table is read. */ --static void --register_resolution (struct lto_file_decl_data *file_data, tree decl, -- enum ld_plugin_symbol_resolution resolution) --{ -- bool existed; -- if (resolution == LDPR_UNKNOWN) -- return; -- if (!file_data->resolution_map) -- file_data->resolution_map -- = new hash_map; -- ld_plugin_symbol_resolution_t &res -- = file_data->resolution_map->get_or_insert (decl, &existed); -- if (!existed -- || resolution == LDPR_PREVAILING_DEF_IRONLY -- || resolution == LDPR_PREVAILING_DEF -- || resolution == LDPR_PREVAILING_DEF_IRONLY_EXP) -- res = resolution; --} -- --/* Register DECL with the global symbol table and change its -- name if necessary to avoid name clashes for static globals across -- different files. */ -- --static void --lto_register_var_decl_in_symtab (struct data_in *data_in, tree decl, -- unsigned ix) --{ -- tree context; -- -- /* Variable has file scope, not local. */ -- if (!TREE_PUBLIC (decl) -- && !((context = decl_function_context (decl)) -- && auto_var_in_fn_p (decl, context))) -- rest_of_decl_compilation (decl, 1, 0); -- -- /* If this variable has already been declared, queue the -- declaration for merging. */ -- if (TREE_PUBLIC (decl)) -- register_resolution (data_in->file_data, -- decl, get_resolution (data_in, ix)); --} -- -- --/* Register DECL with the global symbol table and change its -- name if necessary to avoid name clashes for static globals across -- different files. DATA_IN contains descriptors and tables for the -- file being read. */ -- --static void --lto_register_function_decl_in_symtab (struct data_in *data_in, tree decl, -- unsigned ix) --{ -- /* If this variable has already been declared, queue the -- declaration for merging. */ -- if (TREE_PUBLIC (decl) && !DECL_ABSTRACT_P (decl)) -- register_resolution (data_in->file_data, -- decl, get_resolution (data_in, ix)); --} -- --/* Check if T is a decl and needs register its resolution info. */ -- --static void --lto_maybe_register_decl (struct data_in *data_in, tree t, unsigned ix) --{ -- if (TREE_CODE (t) == VAR_DECL) -- lto_register_var_decl_in_symtab (data_in, t, ix); -- else if (TREE_CODE (t) == FUNCTION_DECL -- && !fndecl_built_in_p (t)) -- lto_register_function_decl_in_symtab (data_in, t, ix); --} -- -- --/* For the type T re-materialize it in the type variant list and -- the pointer/reference-to chains. */ -- --static void --lto_fixup_prevailing_type (tree t) --{ -- /* The following re-creates proper variant lists while fixing up -- the variant leaders. We do not stream TYPE_NEXT_VARIANT so the -- variant list state before fixup is broken. */ -- -- /* If we are not our own variant leader link us into our new leaders -- variant list. */ -- if (TYPE_MAIN_VARIANT (t) != t) -- { -- tree mv = TYPE_MAIN_VARIANT (t); -- TYPE_NEXT_VARIANT (t) = TYPE_NEXT_VARIANT (mv); -- TYPE_NEXT_VARIANT (mv) = t; -- } -- -- /* The following reconstructs the pointer chains -- of the new pointed-to type if we are a main variant. We do -- not stream those so they are broken before fixup. */ -- if (TREE_CODE (t) == POINTER_TYPE -- && TYPE_MAIN_VARIANT (t) == t) -- { -- TYPE_NEXT_PTR_TO (t) = TYPE_POINTER_TO (TREE_TYPE (t)); -- TYPE_POINTER_TO (TREE_TYPE (t)) = t; -- } -- else if (TREE_CODE (t) == REFERENCE_TYPE -- && TYPE_MAIN_VARIANT (t) == t) -- { -- TYPE_NEXT_REF_TO (t) = TYPE_REFERENCE_TO (TREE_TYPE (t)); -- TYPE_REFERENCE_TO (TREE_TYPE (t)) = t; -- } --} -- -- --/* We keep prevailing tree SCCs in a hashtable with manual collision -- handling (in case all hashes compare the same) and keep the colliding -- entries in the tree_scc->next chain. */ -- --struct tree_scc --{ -- tree_scc *next; -- /* Hash of the whole SCC. */ -- hashval_t hash; -- /* Number of trees in the SCC. */ -- unsigned len; -- /* Number of possible entries into the SCC (tree nodes [0..entry_len-1] -- which share the same individual tree hash). */ -- unsigned entry_len; -- /* The members of the SCC. -- We only need to remember the first entry node candidate for prevailing -- SCCs (but of course have access to all entries for SCCs we are -- processing). -- ??? For prevailing SCCs we really only need hash and the first -- entry candidate, but that's too awkward to implement. */ -- tree entries[1]; --}; -- --struct tree_scc_hasher : nofree_ptr_hash --{ -- static inline hashval_t hash (const tree_scc *); -- static inline bool equal (const tree_scc *, const tree_scc *); --}; -- --hashval_t --tree_scc_hasher::hash (const tree_scc *scc) --{ -- return scc->hash; --} -- --bool --tree_scc_hasher::equal (const tree_scc *scc1, const tree_scc *scc2) --{ -- if (scc1->hash != scc2->hash -- || scc1->len != scc2->len -- || scc1->entry_len != scc2->entry_len) -- return false; -- return true; --} -- --static hash_table *tree_scc_hash; --static struct obstack tree_scc_hash_obstack; -- --static unsigned long num_merged_types; --static unsigned long num_prevailing_types; --static unsigned long num_type_scc_trees; --static unsigned long total_scc_size; --static unsigned long num_sccs_read; --static unsigned long total_scc_size_merged; --static unsigned long num_sccs_merged; --static unsigned long num_scc_compares; --static unsigned long num_scc_compare_collisions; -- -- --/* Compare the two entries T1 and T2 of two SCCs that are possibly equal, -- recursing through in-SCC tree edges. Returns true if the SCCs entered -- through T1 and T2 are equal and fills in *MAP with the pairs of -- SCC entries we visited, starting with (*MAP)[0] = T1 and (*MAP)[1] = T2. */ -- --static bool --compare_tree_sccs_1 (tree t1, tree t2, tree **map) --{ -- enum tree_code code; -- -- /* Mark already visited nodes. */ -- TREE_ASM_WRITTEN (t2) = 1; -- -- /* Push the pair onto map. */ -- (*map)[0] = t1; -- (*map)[1] = t2; -- *map = *map + 2; -- -- /* Compare value-fields. */ --#define compare_values(X) \ -- do { \ -- if (X(t1) != X(t2)) \ -- return false; \ -- } while (0) -- -- compare_values (TREE_CODE); -- code = TREE_CODE (t1); -- -- if (!TYPE_P (t1)) -- { -- compare_values (TREE_SIDE_EFFECTS); -- compare_values (TREE_CONSTANT); -- compare_values (TREE_READONLY); -- compare_values (TREE_PUBLIC); -- } -- compare_values (TREE_ADDRESSABLE); -- compare_values (TREE_THIS_VOLATILE); -- if (DECL_P (t1)) -- compare_values (DECL_UNSIGNED); -- else if (TYPE_P (t1)) -- compare_values (TYPE_UNSIGNED); -- if (TYPE_P (t1)) -- compare_values (TYPE_ARTIFICIAL); -- else -- compare_values (TREE_NO_WARNING); -- compare_values (TREE_NOTHROW); -- compare_values (TREE_STATIC); -- if (code != TREE_BINFO) -- compare_values (TREE_PRIVATE); -- compare_values (TREE_PROTECTED); -- compare_values (TREE_DEPRECATED); -- if (TYPE_P (t1)) -- { -- if (AGGREGATE_TYPE_P (t1)) -- compare_values (TYPE_REVERSE_STORAGE_ORDER); -- else -- compare_values (TYPE_SATURATING); -- compare_values (TYPE_ADDR_SPACE); -- } -- else if (code == SSA_NAME) -- compare_values (SSA_NAME_IS_DEFAULT_DEF); -- -- if (CODE_CONTAINS_STRUCT (code, TS_INT_CST)) -- { -- if (wi::to_wide (t1) != wi::to_wide (t2)) -- return false; -- } -- -- if (CODE_CONTAINS_STRUCT (code, TS_REAL_CST)) -- { -- /* ??? No suitable compare routine available. */ -- REAL_VALUE_TYPE r1 = TREE_REAL_CST (t1); -- REAL_VALUE_TYPE r2 = TREE_REAL_CST (t2); -- if (r1.cl != r2.cl -- || r1.decimal != r2.decimal -- || r1.sign != r2.sign -- || r1.signalling != r2.signalling -- || r1.canonical != r2.canonical -- || r1.uexp != r2.uexp) -- return false; -- for (unsigned i = 0; i < SIGSZ; ++i) -- if (r1.sig[i] != r2.sig[i]) -- return false; -- } -- -- if (CODE_CONTAINS_STRUCT (code, TS_FIXED_CST)) -- if (!fixed_compare (EQ_EXPR, -- TREE_FIXED_CST_PTR (t1), TREE_FIXED_CST_PTR (t2))) -- return false; -- -- if (CODE_CONTAINS_STRUCT (code, TS_VECTOR)) -- { -- compare_values (VECTOR_CST_LOG2_NPATTERNS); -- compare_values (VECTOR_CST_NELTS_PER_PATTERN); -- } -- -- if (CODE_CONTAINS_STRUCT (code, TS_DECL_COMMON)) -- { -- compare_values (DECL_MODE); -- compare_values (DECL_NONLOCAL); -- compare_values (DECL_VIRTUAL_P); -- compare_values (DECL_IGNORED_P); -- compare_values (DECL_ABSTRACT_P); -- compare_values (DECL_ARTIFICIAL); -- compare_values (DECL_USER_ALIGN); -- compare_values (DECL_PRESERVE_P); -- compare_values (DECL_EXTERNAL); -- compare_values (DECL_GIMPLE_REG_P); -- compare_values (DECL_ALIGN); -- if (code == LABEL_DECL) -- { -- compare_values (EH_LANDING_PAD_NR); -- compare_values (LABEL_DECL_UID); -- } -- else if (code == FIELD_DECL) -- { -- compare_values (DECL_PACKED); -- compare_values (DECL_NONADDRESSABLE_P); -- compare_values (DECL_PADDING_P); -- compare_values (DECL_OFFSET_ALIGN); -- } -- else if (code == VAR_DECL) -- { -- compare_values (DECL_HAS_DEBUG_EXPR_P); -- compare_values (DECL_NONLOCAL_FRAME); -- } -- if (code == RESULT_DECL -- || code == PARM_DECL -- || code == VAR_DECL) -- { -- compare_values (DECL_BY_REFERENCE); -- if (code == VAR_DECL -- || code == PARM_DECL) -- compare_values (DECL_HAS_VALUE_EXPR_P); -- } -- } -- -- if (CODE_CONTAINS_STRUCT (code, TS_DECL_WRTL)) -- compare_values (DECL_REGISTER); -- -- if (CODE_CONTAINS_STRUCT (code, TS_DECL_WITH_VIS)) -- { -- compare_values (DECL_COMMON); -- compare_values (DECL_DLLIMPORT_P); -- compare_values (DECL_WEAK); -- compare_values (DECL_SEEN_IN_BIND_EXPR_P); -- compare_values (DECL_COMDAT); -- compare_values (DECL_VISIBILITY); -- compare_values (DECL_VISIBILITY_SPECIFIED); -- if (code == VAR_DECL) -- { -- compare_values (DECL_HARD_REGISTER); -- /* DECL_IN_TEXT_SECTION is set during final asm output only. */ -- compare_values (DECL_IN_CONSTANT_POOL); -- } -- } -- -- if (CODE_CONTAINS_STRUCT (code, TS_FUNCTION_DECL)) -- { -- compare_values (DECL_BUILT_IN_CLASS); -- compare_values (DECL_STATIC_CONSTRUCTOR); -- compare_values (DECL_STATIC_DESTRUCTOR); -- compare_values (DECL_UNINLINABLE); -- compare_values (DECL_POSSIBLY_INLINED); -- compare_values (DECL_IS_NOVOPS); -- compare_values (DECL_IS_RETURNS_TWICE); -- compare_values (DECL_IS_MALLOC); -- compare_values (DECL_IS_OPERATOR_NEW); -- compare_values (DECL_DECLARED_INLINE_P); -- compare_values (DECL_STATIC_CHAIN); -- compare_values (DECL_NO_INLINE_WARNING_P); -- compare_values (DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT); -- compare_values (DECL_NO_LIMIT_STACK); -- compare_values (DECL_DISREGARD_INLINE_LIMITS); -- compare_values (DECL_PURE_P); -- compare_values (DECL_LOOPING_CONST_OR_PURE_P); -- compare_values (DECL_FINAL_P); -- compare_values (DECL_CXX_CONSTRUCTOR_P); -- compare_values (DECL_CXX_DESTRUCTOR_P); -- if (DECL_BUILT_IN_CLASS (t1) != NOT_BUILT_IN) -- compare_values (DECL_FUNCTION_CODE); -- } -- -- if (CODE_CONTAINS_STRUCT (code, TS_TYPE_COMMON)) -- { -- compare_values (TYPE_MODE); -- compare_values (TYPE_STRING_FLAG); -- compare_values (TYPE_NEEDS_CONSTRUCTING); -- if (RECORD_OR_UNION_TYPE_P (t1)) -- { -- compare_values (TYPE_TRANSPARENT_AGGR); -- compare_values (TYPE_FINAL_P); -- } -- else if (code == ARRAY_TYPE) -- compare_values (TYPE_NONALIASED_COMPONENT); -- if (AGGREGATE_TYPE_P (t1)) -- compare_values (TYPE_TYPELESS_STORAGE); -- compare_values (TYPE_EMPTY_P); -- compare_values (TYPE_PACKED); -- compare_values (TYPE_RESTRICT); -- compare_values (TYPE_USER_ALIGN); -- compare_values (TYPE_READONLY); -- compare_values (TYPE_PRECISION); -- compare_values (TYPE_ALIGN); -- /* Do not compare TYPE_ALIAS_SET. Doing so introduce ordering issues -- with calls to get_alias_set which may initialize it for streamed -- in types. */ -- } -- -- /* We don't want to compare locations, so there is nothing do compare -- for TS_EXP. */ -- -- /* BLOCKs are function local and we don't merge anything there, so -- simply refuse to merge. */ -- if (CODE_CONTAINS_STRUCT (code, TS_BLOCK)) -- return false; -- -- if (CODE_CONTAINS_STRUCT (code, TS_TRANSLATION_UNIT_DECL)) -- if (strcmp (TRANSLATION_UNIT_LANGUAGE (t1), -- TRANSLATION_UNIT_LANGUAGE (t2)) != 0) -- return false; -- -- if (CODE_CONTAINS_STRUCT (code, TS_TARGET_OPTION)) -- if (!cl_target_option_eq (TREE_TARGET_OPTION (t1), TREE_TARGET_OPTION (t2))) -- return false; -- -- if (CODE_CONTAINS_STRUCT (code, TS_OPTIMIZATION)) -- if (!cl_optimization_option_eq (TREE_OPTIMIZATION (t1), -- TREE_OPTIMIZATION (t2))) -- return false; -- -- if (CODE_CONTAINS_STRUCT (code, TS_BINFO)) -- if (vec_safe_length (BINFO_BASE_ACCESSES (t1)) -- != vec_safe_length (BINFO_BASE_ACCESSES (t2))) -- return false; -- -- if (CODE_CONTAINS_STRUCT (code, TS_CONSTRUCTOR)) -- compare_values (CONSTRUCTOR_NELTS); -- -- if (CODE_CONTAINS_STRUCT (code, TS_IDENTIFIER)) -- if (IDENTIFIER_LENGTH (t1) != IDENTIFIER_LENGTH (t2) -- || memcmp (IDENTIFIER_POINTER (t1), IDENTIFIER_POINTER (t2), -- IDENTIFIER_LENGTH (t1)) != 0) -- return false; -- -- if (CODE_CONTAINS_STRUCT (code, TS_STRING)) -- if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2) -- || memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2), -- TREE_STRING_LENGTH (t1)) != 0) -- return false; -- -- if (code == OMP_CLAUSE) -- { -- compare_values (OMP_CLAUSE_CODE); -- switch (OMP_CLAUSE_CODE (t1)) -- { -- case OMP_CLAUSE_DEFAULT: -- compare_values (OMP_CLAUSE_DEFAULT_KIND); -- break; -- case OMP_CLAUSE_SCHEDULE: -- compare_values (OMP_CLAUSE_SCHEDULE_KIND); -- break; -- case OMP_CLAUSE_DEPEND: -- compare_values (OMP_CLAUSE_DEPEND_KIND); -- break; -- case OMP_CLAUSE_MAP: -- compare_values (OMP_CLAUSE_MAP_KIND); -- break; -- case OMP_CLAUSE_PROC_BIND: -- compare_values (OMP_CLAUSE_PROC_BIND_KIND); -- break; -- case OMP_CLAUSE_REDUCTION: -- compare_values (OMP_CLAUSE_REDUCTION_CODE); -- compare_values (OMP_CLAUSE_REDUCTION_GIMPLE_INIT); -- compare_values (OMP_CLAUSE_REDUCTION_GIMPLE_MERGE); -- break; -- default: -- break; -- } -- } -- --#undef compare_values -- -- -- /* Compare pointer fields. */ -- -- /* Recurse. Search & Replaced from DFS_write_tree_body. -- Folding the early checks into the compare_tree_edges recursion -- macro makes debugging way quicker as you are able to break on -- compare_tree_sccs_1 and simply finish until a call returns false -- to spot the SCC members with the difference. */ --#define compare_tree_edges(E1, E2) \ -- do { \ -- tree t1_ = (E1), t2_ = (E2); \ -- if (t1_ != t2_ \ -- && (!t1_ || !t2_ \ -- || !TREE_VISITED (t2_) \ -- || (!TREE_ASM_WRITTEN (t2_) \ -- && !compare_tree_sccs_1 (t1_, t2_, map)))) \ -- return false; \ -- /* Only non-NULL trees outside of the SCC may compare equal. */ \ -- gcc_checking_assert (t1_ != t2_ || (!t2_ || !TREE_VISITED (t2_))); \ -- } while (0) -- -- if (CODE_CONTAINS_STRUCT (code, TS_TYPED)) -- { -- if (code != IDENTIFIER_NODE) -- compare_tree_edges (TREE_TYPE (t1), TREE_TYPE (t2)); -- } -- -- if (CODE_CONTAINS_STRUCT (code, TS_VECTOR)) -- { -- /* Note that the number of elements for EXPR has already been emitted -- in EXPR's header (see streamer_write_tree_header). */ -- unsigned int count = vector_cst_encoded_nelts (t1); -- for (unsigned int i = 0; i < count; ++i) -- compare_tree_edges (VECTOR_CST_ENCODED_ELT (t1, i), -- VECTOR_CST_ENCODED_ELT (t2, i)); -- } -- -- if (CODE_CONTAINS_STRUCT (code, TS_COMPLEX)) -- { -- compare_tree_edges (TREE_REALPART (t1), TREE_REALPART (t2)); -- compare_tree_edges (TREE_IMAGPART (t1), TREE_IMAGPART (t2)); -- } -- -- if (CODE_CONTAINS_STRUCT (code, TS_DECL_MINIMAL)) -- { -- compare_tree_edges (DECL_NAME (t1), DECL_NAME (t2)); -- /* ??? Global decls from different TUs have non-matching -- TRANSLATION_UNIT_DECLs. Only consider a small set of -- decls equivalent, we should not end up merging others. */ -- if ((code == TYPE_DECL -- || code == NAMESPACE_DECL -- || code == IMPORTED_DECL -- || code == CONST_DECL -- || (VAR_OR_FUNCTION_DECL_P (t1) -- && (TREE_PUBLIC (t1) || DECL_EXTERNAL (t1)))) -- && DECL_FILE_SCOPE_P (t1) && DECL_FILE_SCOPE_P (t2)) -- ; -- else -- compare_tree_edges (DECL_CONTEXT (t1), DECL_CONTEXT (t2)); -- } -- -- if (CODE_CONTAINS_STRUCT (code, TS_DECL_COMMON)) -- { -- compare_tree_edges (DECL_SIZE (t1), DECL_SIZE (t2)); -- compare_tree_edges (DECL_SIZE_UNIT (t1), DECL_SIZE_UNIT (t2)); -- compare_tree_edges (DECL_ATTRIBUTES (t1), DECL_ATTRIBUTES (t2)); -- compare_tree_edges (DECL_ABSTRACT_ORIGIN (t1), DECL_ABSTRACT_ORIGIN (t2)); -- if ((code == VAR_DECL -- || code == PARM_DECL) -- && DECL_HAS_VALUE_EXPR_P (t1)) -- compare_tree_edges (DECL_VALUE_EXPR (t1), DECL_VALUE_EXPR (t2)); -- if (code == VAR_DECL -- && DECL_HAS_DEBUG_EXPR_P (t1)) -- compare_tree_edges (DECL_DEBUG_EXPR (t1), DECL_DEBUG_EXPR (t2)); -- /* LTO specific edges. */ -- if (code != FUNCTION_DECL -- && code != TRANSLATION_UNIT_DECL) -- compare_tree_edges (DECL_INITIAL (t1), DECL_INITIAL (t2)); -- } -- -- if (CODE_CONTAINS_STRUCT (code, TS_DECL_NON_COMMON)) -- { -- if (code == FUNCTION_DECL) -- { -- tree a1, a2; -- for (a1 = DECL_ARGUMENTS (t1), a2 = DECL_ARGUMENTS (t2); -- a1 || a2; -- a1 = TREE_CHAIN (a1), a2 = TREE_CHAIN (a2)) -- compare_tree_edges (a1, a2); -- compare_tree_edges (DECL_RESULT (t1), DECL_RESULT (t2)); -- } -- else if (code == TYPE_DECL) -- compare_tree_edges (DECL_ORIGINAL_TYPE (t1), DECL_ORIGINAL_TYPE (t2)); -- } -- -- if (CODE_CONTAINS_STRUCT (code, TS_DECL_WITH_VIS)) -- { -- /* Make sure we don't inadvertently set the assembler name. */ -- if (DECL_ASSEMBLER_NAME_SET_P (t1)) -- compare_tree_edges (DECL_ASSEMBLER_NAME (t1), -- DECL_ASSEMBLER_NAME (t2)); -- } -- -- if (CODE_CONTAINS_STRUCT (code, TS_FIELD_DECL)) -- { -- compare_tree_edges (DECL_FIELD_OFFSET (t1), DECL_FIELD_OFFSET (t2)); -- compare_tree_edges (DECL_BIT_FIELD_TYPE (t1), DECL_BIT_FIELD_TYPE (t2)); -- compare_tree_edges (DECL_BIT_FIELD_REPRESENTATIVE (t1), -- DECL_BIT_FIELD_REPRESENTATIVE (t2)); -- compare_tree_edges (DECL_FIELD_BIT_OFFSET (t1), -- DECL_FIELD_BIT_OFFSET (t2)); -- compare_tree_edges (DECL_FCONTEXT (t1), DECL_FCONTEXT (t2)); -- } -- -- if (CODE_CONTAINS_STRUCT (code, TS_FUNCTION_DECL)) -- { -- compare_tree_edges (DECL_FUNCTION_PERSONALITY (t1), -- DECL_FUNCTION_PERSONALITY (t2)); -- compare_tree_edges (DECL_VINDEX (t1), DECL_VINDEX (t2)); -- compare_tree_edges (DECL_FUNCTION_SPECIFIC_TARGET (t1), -- DECL_FUNCTION_SPECIFIC_TARGET (t2)); -- compare_tree_edges (DECL_FUNCTION_SPECIFIC_OPTIMIZATION (t1), -- DECL_FUNCTION_SPECIFIC_OPTIMIZATION (t2)); -- } -- -- if (CODE_CONTAINS_STRUCT (code, TS_TYPE_COMMON)) -- { -- compare_tree_edges (TYPE_SIZE (t1), TYPE_SIZE (t2)); -- compare_tree_edges (TYPE_SIZE_UNIT (t1), TYPE_SIZE_UNIT (t2)); -- compare_tree_edges (TYPE_ATTRIBUTES (t1), TYPE_ATTRIBUTES (t2)); -- compare_tree_edges (TYPE_NAME (t1), TYPE_NAME (t2)); -- /* Do not compare TYPE_POINTER_TO or TYPE_REFERENCE_TO. They will be -- reconstructed during fixup. */ -- /* Do not compare TYPE_NEXT_VARIANT, we reconstruct the variant lists -- during fixup. */ -- compare_tree_edges (TYPE_MAIN_VARIANT (t1), TYPE_MAIN_VARIANT (t2)); -- /* ??? Global types from different TUs have non-matching -- TRANSLATION_UNIT_DECLs. Still merge them if they are otherwise -- equal. */ -- if (TYPE_FILE_SCOPE_P (t1) && TYPE_FILE_SCOPE_P (t2)) -- ; -- else -- compare_tree_edges (TYPE_CONTEXT (t1), TYPE_CONTEXT (t2)); -- /* TYPE_CANONICAL is re-computed during type merging, so do not -- compare it here. */ -- compare_tree_edges (TYPE_STUB_DECL (t1), TYPE_STUB_DECL (t2)); -- } -- -- if (CODE_CONTAINS_STRUCT (code, TS_TYPE_NON_COMMON)) -- { -- if (code == ENUMERAL_TYPE) -- compare_tree_edges (TYPE_VALUES (t1), TYPE_VALUES (t2)); -- else if (code == ARRAY_TYPE) -- compare_tree_edges (TYPE_DOMAIN (t1), TYPE_DOMAIN (t2)); -- else if (RECORD_OR_UNION_TYPE_P (t1)) -- { -- tree f1, f2; -- for (f1 = TYPE_FIELDS (t1), f2 = TYPE_FIELDS (t2); -- f1 || f2; -- f1 = TREE_CHAIN (f1), f2 = TREE_CHAIN (f2)) -- compare_tree_edges (f1, f2); -- } -- else if (code == FUNCTION_TYPE -- || code == METHOD_TYPE) -- compare_tree_edges (TYPE_ARG_TYPES (t1), TYPE_ARG_TYPES (t2)); -- -- if (!POINTER_TYPE_P (t1)) -- compare_tree_edges (TYPE_MIN_VALUE_RAW (t1), TYPE_MIN_VALUE_RAW (t2)); -- compare_tree_edges (TYPE_MAX_VALUE_RAW (t1), TYPE_MAX_VALUE_RAW (t2)); -- } -- -- if (CODE_CONTAINS_STRUCT (code, TS_LIST)) -- { -- compare_tree_edges (TREE_PURPOSE (t1), TREE_PURPOSE (t2)); -- compare_tree_edges (TREE_VALUE (t1), TREE_VALUE (t2)); -- compare_tree_edges (TREE_CHAIN (t1), TREE_CHAIN (t2)); -- } -- -- if (CODE_CONTAINS_STRUCT (code, TS_VEC)) -- for (int i = 0; i < TREE_VEC_LENGTH (t1); i++) -- compare_tree_edges (TREE_VEC_ELT (t1, i), TREE_VEC_ELT (t2, i)); -- -- if (CODE_CONTAINS_STRUCT (code, TS_EXP)) -- { -- for (int i = 0; i < TREE_OPERAND_LENGTH (t1); i++) -- compare_tree_edges (TREE_OPERAND (t1, i), -- TREE_OPERAND (t2, i)); -- -- /* BLOCKs are function local and we don't merge anything there. */ -- if (TREE_BLOCK (t1) || TREE_BLOCK (t2)) -- return false; -- } -- -- if (CODE_CONTAINS_STRUCT (code, TS_BINFO)) -- { -- unsigned i; -- tree t; -- /* Lengths have already been compared above. */ -- FOR_EACH_VEC_ELT (*BINFO_BASE_BINFOS (t1), i, t) -- compare_tree_edges (t, BINFO_BASE_BINFO (t2, i)); -- FOR_EACH_VEC_SAFE_ELT (BINFO_BASE_ACCESSES (t1), i, t) -- compare_tree_edges (t, BINFO_BASE_ACCESS (t2, i)); -- compare_tree_edges (BINFO_OFFSET (t1), BINFO_OFFSET (t2)); -- compare_tree_edges (BINFO_VTABLE (t1), BINFO_VTABLE (t2)); -- compare_tree_edges (BINFO_VPTR_FIELD (t1), BINFO_VPTR_FIELD (t2)); -- /* Do not walk BINFO_INHERITANCE_CHAIN, BINFO_SUBVTT_INDEX -- and BINFO_VPTR_INDEX; these are used by C++ FE only. */ -- } -- -- if (CODE_CONTAINS_STRUCT (code, TS_CONSTRUCTOR)) -- { -- unsigned i; -- tree index, value; -- /* Lengths have already been compared above. */ -- FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (t1), i, index, value) -- { -- compare_tree_edges (index, CONSTRUCTOR_ELT (t2, i)->index); -- compare_tree_edges (value, CONSTRUCTOR_ELT (t2, i)->value); -- } -- } -- -- if (code == OMP_CLAUSE) -- { -- int i; -- -- for (i = 0; i < omp_clause_num_ops[OMP_CLAUSE_CODE (t1)]; i++) -- compare_tree_edges (OMP_CLAUSE_OPERAND (t1, i), -- OMP_CLAUSE_OPERAND (t2, i)); -- compare_tree_edges (OMP_CLAUSE_CHAIN (t1), OMP_CLAUSE_CHAIN (t2)); -- } -- --#undef compare_tree_edges -- -- return true; --} -- --/* Compare the tree scc SCC to the prevailing candidate PSCC, filling -- out MAP if they are equal. */ -- --static bool --compare_tree_sccs (tree_scc *pscc, tree_scc *scc, -- tree *map) --{ -- /* Assume SCC entry hashes are sorted after their cardinality. Which -- means we can simply take the first n-tuple of equal hashes -- (which is recorded as entry_len) and do n SCC entry candidate -- comparisons. */ -- for (unsigned i = 0; i < pscc->entry_len; ++i) -- { -- tree *mapp = map; -- num_scc_compare_collisions++; -- if (compare_tree_sccs_1 (pscc->entries[0], scc->entries[i], &mapp)) -- { -- /* Equal - no need to reset TREE_VISITED or TREE_ASM_WRITTEN -- on the scc as all trees will be freed. */ -- return true; -- } -- /* Reset TREE_ASM_WRITTEN on scc for the next compare or in case -- the SCC prevails. */ -- for (unsigned j = 0; j < scc->len; ++j) -- TREE_ASM_WRITTEN (scc->entries[j]) = 0; -- } -- -- return false; --} -- --/* QSort sort function to sort a map of two pointers after the 2nd -- pointer. */ -- --static int --cmp_tree (const void *p1_, const void *p2_) --{ -- tree *p1 = (tree *)(const_cast(p1_)); -- tree *p2 = (tree *)(const_cast(p2_)); -- if (p1[1] == p2[1]) -- return 0; -- return ((uintptr_t)p1[1] < (uintptr_t)p2[1]) ? -1 : 1; --} -- --/* Try to unify the SCC with nodes FROM to FROM + LEN in CACHE and -- hash value SCC_HASH with an already recorded SCC. Return true if -- that was successful, otherwise return false. */ -- --static bool --unify_scc (struct data_in *data_in, unsigned from, -- unsigned len, unsigned scc_entry_len, hashval_t scc_hash) --{ -- bool unified_p = false; -- struct streamer_tree_cache_d *cache = data_in->reader_cache; -- tree_scc *scc -- = (tree_scc *) alloca (sizeof (tree_scc) + (len - 1) * sizeof (tree)); -- scc->next = NULL; -- scc->hash = scc_hash; -- scc->len = len; -- scc->entry_len = scc_entry_len; -- for (unsigned i = 0; i < len; ++i) -- { -- tree t = streamer_tree_cache_get_tree (cache, from + i); -- scc->entries[i] = t; -- /* Do not merge SCCs with local entities inside them. Also do -- not merge TRANSLATION_UNIT_DECLs. */ -- if (TREE_CODE (t) == TRANSLATION_UNIT_DECL -- || (VAR_OR_FUNCTION_DECL_P (t) -- && !(TREE_PUBLIC (t) || DECL_EXTERNAL (t))) -- || TREE_CODE (t) == LABEL_DECL) -- { -- /* Avoid doing any work for these cases and do not worry to -- record the SCCs for further merging. */ -- return false; -- } -- } -- -- /* Look for the list of candidate SCCs to compare against. */ -- tree_scc **slot; -- slot = tree_scc_hash->find_slot_with_hash (scc, scc_hash, INSERT); -- if (*slot) -- { -- /* Try unifying against each candidate. */ -- num_scc_compares++; -- -- /* Set TREE_VISITED on the scc so we can easily identify tree nodes -- outside of the scc when following tree edges. Make sure -- that TREE_ASM_WRITTEN is unset so we can use it as 2nd bit -- to track whether we visited the SCC member during the compare. -- We cannot use TREE_VISITED on the pscc members as the extended -- scc and pscc can overlap. */ -- for (unsigned i = 0; i < scc->len; ++i) -- { -- TREE_VISITED (scc->entries[i]) = 1; -- gcc_checking_assert (!TREE_ASM_WRITTEN (scc->entries[i])); -- } -- -- tree *map = XALLOCAVEC (tree, 2 * len); -- for (tree_scc *pscc = *slot; pscc; pscc = pscc->next) -- { -- if (!compare_tree_sccs (pscc, scc, map)) -- continue; -- -- /* Found an equal SCC. */ -- unified_p = true; -- num_scc_compare_collisions--; -- num_sccs_merged++; -- total_scc_size_merged += len; -- -- if (flag_checking) -- for (unsigned i = 0; i < len; ++i) -- { -- tree t = map[2*i+1]; -- enum tree_code code = TREE_CODE (t); -- /* IDENTIFIER_NODEs should be singletons and are merged by the -- streamer. The others should be singletons, too, and we -- should not merge them in any way. */ -- gcc_assert (code != TRANSLATION_UNIT_DECL -- && code != IDENTIFIER_NODE); -- } -- -- /* Fixup the streamer cache with the prevailing nodes according -- to the tree node mapping computed by compare_tree_sccs. */ -- if (len == 1) -- { -- /* If we got a debug reference queued, see if the prevailing -- tree has a debug reference and if not, register the one -- for the tree we are about to throw away. */ -- if (dref_queue.length () == 1) -- { -- dref_entry e = dref_queue.pop (); -- gcc_assert (e.decl -- == streamer_tree_cache_get_tree (cache, from)); -- const char *sym; -- unsigned HOST_WIDE_INT off; -- if (!debug_hooks->die_ref_for_decl (pscc->entries[0], &sym, -- &off)) -- debug_hooks->register_external_die (pscc->entries[0], -- e.sym, e.off); -- } -- lto_maybe_register_decl (data_in, pscc->entries[0], from); -- streamer_tree_cache_replace_tree (cache, pscc->entries[0], from); -- } -- else -- { -- tree *map2 = XALLOCAVEC (tree, 2 * len); -- for (unsigned i = 0; i < len; ++i) -- { -- map2[i*2] = (tree)(uintptr_t)(from + i); -- map2[i*2+1] = scc->entries[i]; -- } -- qsort (map2, len, 2 * sizeof (tree), cmp_tree); -- qsort (map, len, 2 * sizeof (tree), cmp_tree); -- for (unsigned i = 0; i < len; ++i) -- { -- lto_maybe_register_decl (data_in, map[2*i], -- (uintptr_t)map2[2*i]); -- streamer_tree_cache_replace_tree (cache, map[2*i], -- (uintptr_t)map2[2*i]); -- } -- } -- -- /* Free the tree nodes from the read SCC. */ -- data_in->location_cache.revert_location_cache (); -- for (unsigned i = 0; i < len; ++i) -- { -- if (TYPE_P (scc->entries[i])) -- num_merged_types++; -- free_node (scc->entries[i]); -- } -- -- /* Drop DIE references. -- ??? Do as in the size-one SCC case which involves sorting -- the queue. */ -- dref_queue.truncate (0); -- -- break; -- } -- -- /* Reset TREE_VISITED if we didn't unify the SCC with another. */ -- if (!unified_p) -- for (unsigned i = 0; i < scc->len; ++i) -- TREE_VISITED (scc->entries[i]) = 0; -- } -- -- /* If we didn't unify it to any candidate duplicate the relevant -- pieces to permanent storage and link it into the chain. */ -- if (!unified_p) -- { -- tree_scc *pscc -- = XOBNEWVAR (&tree_scc_hash_obstack, tree_scc, sizeof (tree_scc)); -- memcpy (pscc, scc, sizeof (tree_scc)); -- pscc->next = (*slot); -- *slot = pscc; -- } -- return unified_p; --} -- -- --/* Read all the symbols from buffer DATA, using descriptors in DECL_DATA. -- RESOLUTIONS is the set of symbols picked by the linker (read from the -- resolution file when the linker plugin is being used). */ -- --static void --lto_read_decls (struct lto_file_decl_data *decl_data, const void *data, -- vec resolutions) --{ -- const struct lto_decl_header *header = (const struct lto_decl_header *) data; -- const int decl_offset = sizeof (struct lto_decl_header); -- const int main_offset = decl_offset + header->decl_state_size; -- const int string_offset = main_offset + header->main_size; -- struct data_in *data_in; -- unsigned int i; -- const uint32_t *data_ptr, *data_end; -- uint32_t num_decl_states; -- -- lto_input_block ib_main ((const char *) data + main_offset, -- header->main_size, decl_data->mode_table); -- -- data_in = lto_data_in_create (decl_data, (const char *) data + string_offset, -- header->string_size, resolutions); -- -- /* We do not uniquify the pre-loaded cache entries, those are middle-end -- internal types that should not be merged. */ -- -- /* Read the global declarations and types. */ -- while (ib_main.p < ib_main.len) -- { -- tree t; -- unsigned from = data_in->reader_cache->nodes.length (); -- /* Read and uniquify SCCs as in the input stream. */ -- enum LTO_tags tag = streamer_read_record_start (&ib_main); -- if (tag == LTO_tree_scc) -- { -- unsigned len_; -- unsigned scc_entry_len; -- hashval_t scc_hash = lto_input_scc (&ib_main, data_in, &len_, -- &scc_entry_len); -- unsigned len = data_in->reader_cache->nodes.length () - from; -- gcc_assert (len == len_); -- -- total_scc_size += len; -- num_sccs_read++; -- -- /* We have the special case of size-1 SCCs that are pre-merged -- by means of identifier and string sharing for example. -- ??? Maybe we should avoid streaming those as SCCs. */ -- tree first = streamer_tree_cache_get_tree (data_in->reader_cache, -- from); -- if (len == 1 -- && (TREE_CODE (first) == IDENTIFIER_NODE -- || (TREE_CODE (first) == INTEGER_CST -- && !TREE_OVERFLOW (first)))) -- continue; -- -- /* Try to unify the SCC with already existing ones. */ -- if (!flag_ltrans -- && unify_scc (data_in, from, -- len, scc_entry_len, scc_hash)) -- continue; -- -- /* Tree merging failed, mark entries in location cache as -- permanent. */ -- data_in->location_cache.accept_location_cache (); -- -- bool seen_type = false; -- for (unsigned i = 0; i < len; ++i) -- { -- tree t = streamer_tree_cache_get_tree (data_in->reader_cache, -- from + i); -- /* Reconstruct the type variant and pointer-to/reference-to -- chains. */ -- if (TYPE_P (t)) -- { -- seen_type = true; -- num_prevailing_types++; -- lto_fixup_prevailing_type (t); -- -- /* Compute the canonical type of all types. -- Because SCC components are streamed in random (hash) order -- we may have encountered the type before while registering -- type canonical of a derived type in the same SCC. */ -- if (!TYPE_CANONICAL (t)) -- gimple_register_canonical_type (t); -- if (TYPE_MAIN_VARIANT (t) == t && odr_type_p (t)) -- register_odr_type (t); -- } -- /* Link shared INTEGER_CSTs into TYPE_CACHED_VALUEs of its -- type which is also member of this SCC. */ -- if (TREE_CODE (t) == INTEGER_CST -- && !TREE_OVERFLOW (t)) -- cache_integer_cst (t); -- if (!flag_ltrans) -- { -- lto_maybe_register_decl (data_in, t, from + i); -- /* Scan the tree for references to global functions or -- variables and record those for later fixup. */ -- if (mentions_vars_p (t)) -- vec_safe_push (tree_with_vars, t); -- } -- } -- -- /* Register DECLs with the debuginfo machinery. */ -- while (!dref_queue.is_empty ()) -- { -- dref_entry e = dref_queue.pop (); -- debug_hooks->register_external_die (e.decl, e.sym, e.off); -- } -- -- if (seen_type) -- num_type_scc_trees += len; -- } -- else -- { -- /* Pickle stray references. */ -- t = lto_input_tree_1 (&ib_main, data_in, tag, 0); -- gcc_assert (t && data_in->reader_cache->nodes.length () == from); -- } -- } -- data_in->location_cache.apply_location_cache (); -- -- /* Read in lto_in_decl_state objects. */ -- data_ptr = (const uint32_t *) ((const char*) data + decl_offset); -- data_end = -- (const uint32_t *) ((const char*) data_ptr + header->decl_state_size); -- num_decl_states = *data_ptr++; -- -- gcc_assert (num_decl_states > 0); -- decl_data->global_decl_state = lto_new_in_decl_state (); -- data_ptr = lto_read_in_decl_state (data_in, data_ptr, -- decl_data->global_decl_state); -- -- /* Read in per-function decl states and enter them in hash table. */ -- decl_data->function_decl_states = -- hash_table::create_ggc (37); -- -- for (i = 1; i < num_decl_states; i++) -- { -- struct lto_in_decl_state *state = lto_new_in_decl_state (); -- -- data_ptr = lto_read_in_decl_state (data_in, data_ptr, state); -- lto_in_decl_state **slot -- = decl_data->function_decl_states->find_slot (state, INSERT); -- gcc_assert (*slot == NULL); -- *slot = state; -- } -- -- if (data_ptr != data_end) -- internal_error ("bytecode stream: garbage at the end of symbols section"); -- -- /* Set the current decl state to be the global state. */ -- decl_data->current_decl_state = decl_data->global_decl_state; -- -- lto_data_in_delete (data_in); --} -- --/* Custom version of strtoll, which is not portable. */ -- --static int64_t --lto_parse_hex (const char *p) --{ -- int64_t ret = 0; -- -- for (; *p != '\0'; ++p) -- { -- char c = *p; -- unsigned char part; -- ret <<= 4; -- if (c >= '0' && c <= '9') -- part = c - '0'; -- else if (c >= 'a' && c <= 'f') -- part = c - 'a' + 10; -- else if (c >= 'A' && c <= 'F') -- part = c - 'A' + 10; -- else -- internal_error ("could not parse hex number"); -- ret |= part; -- } -- -- return ret; --} -- --/* Read resolution for file named FILE_NAME. The resolution is read from -- RESOLUTION. */ -- --static void --lto_resolution_read (splay_tree file_ids, FILE *resolution, lto_file *file) --{ -- /* We require that objects in the resolution file are in the same -- order as the lto1 command line. */ -- unsigned int name_len; -- char *obj_name; -- unsigned int num_symbols; -- unsigned int i; -- struct lto_file_decl_data *file_data; -- splay_tree_node nd = NULL; -- -- if (!resolution) -- return; -- -- name_len = strlen (file->filename); -- obj_name = XNEWVEC (char, name_len + 1); -- fscanf (resolution, " "); /* Read white space. */ -- -- fread (obj_name, sizeof (char), name_len, resolution); -- obj_name[name_len] = '\0'; -- if (filename_cmp (obj_name, file->filename) != 0) -- internal_error ("unexpected file name %s in linker resolution file. " -- "Expected %s", obj_name, file->filename); -- if (file->offset != 0) -- { -- int t; -- char offset_p[17]; -- int64_t offset; -- t = fscanf (resolution, "@0x%16s", offset_p); -- if (t != 1) -- internal_error ("could not parse file offset"); -- offset = lto_parse_hex (offset_p); -- if (offset != file->offset) -- internal_error ("unexpected offset"); -- } -- -- free (obj_name); -- -- fscanf (resolution, "%u", &num_symbols); -- -- for (i = 0; i < num_symbols; i++) -- { -- int t; -- unsigned index; -- unsigned HOST_WIDE_INT id; -- char r_str[27]; -- enum ld_plugin_symbol_resolution r = (enum ld_plugin_symbol_resolution) 0; -- unsigned int j; -- unsigned int lto_resolution_str_len = -- sizeof (lto_resolution_str) / sizeof (char *); -- res_pair rp; -- -- t = fscanf (resolution, "%u " HOST_WIDE_INT_PRINT_HEX_PURE " %26s %*[^\n]\n", -- &index, &id, r_str); -- if (t != 3) -- internal_error ("invalid line in the resolution file"); -- -- for (j = 0; j < lto_resolution_str_len; j++) -- { -- if (strcmp (lto_resolution_str[j], r_str) == 0) -- { -- r = (enum ld_plugin_symbol_resolution) j; -- break; -- } -- } -- if (j == lto_resolution_str_len) -- internal_error ("invalid resolution in the resolution file"); -- -- if (!(nd && lto_splay_tree_id_equal_p (nd->key, id))) -- { -- nd = lto_splay_tree_lookup (file_ids, id); -- if (nd == NULL) -- internal_error ("resolution sub id %wx not in object file", id); -- } -- -- file_data = (struct lto_file_decl_data *)nd->value; -- /* The indexes are very sparse. To save memory save them in a compact -- format that is only unpacked later when the subfile is processed. */ -- rp.res = r; -- rp.index = index; -- file_data->respairs.safe_push (rp); -- if (file_data->max_index < index) -- file_data->max_index = index; -- } --} -- --/* List of file_decl_datas */ --struct file_data_list -- { -- struct lto_file_decl_data *first, *last; -- }; -- --/* Is the name for a id'ed LTO section? */ -- --static int --lto_section_with_id (const char *name, unsigned HOST_WIDE_INT *id) --{ -- const char *s; -- -- if (strncmp (name, section_name_prefix, strlen (section_name_prefix))) -- return 0; -- s = strrchr (name, '.'); -- if (!s) -- return 0; -- /* If the section is not suffixed with an ID return. */ -- if ((size_t)(s - name) == strlen (section_name_prefix)) -- return 0; -- return sscanf (s, "." HOST_WIDE_INT_PRINT_HEX_PURE, id) == 1; --} -- --/* Create file_data of each sub file id */ -- --static int --create_subid_section_table (struct lto_section_slot *ls, splay_tree file_ids, -- struct file_data_list *list) --{ -- struct lto_section_slot s_slot, *new_slot; -- unsigned HOST_WIDE_INT id; -- splay_tree_node nd; -- void **hash_slot; -- char *new_name; -- struct lto_file_decl_data *file_data; -- -- if (!lto_section_with_id (ls->name, &id)) -- return 1; -- -- /* Find hash table of sub module id */ -- nd = lto_splay_tree_lookup (file_ids, id); -- if (nd != NULL) -- { -- file_data = (struct lto_file_decl_data *)nd->value; -- } -- else -- { -- file_data = ggc_alloc (); -- memset(file_data, 0, sizeof (struct lto_file_decl_data)); -- file_data->id = id; -- file_data->section_hash_table = lto_obj_create_section_hash_table (); -- lto_splay_tree_insert (file_ids, id, file_data); -- -- /* Maintain list in linker order */ -- if (!list->first) -- list->first = file_data; -- if (list->last) -- list->last->next = file_data; -- list->last = file_data; -- } -- -- /* Copy section into sub module hash table */ -- new_name = XDUPVEC (char, ls->name, strlen (ls->name) + 1); -- s_slot.name = new_name; -- hash_slot = htab_find_slot (file_data->section_hash_table, &s_slot, INSERT); -- gcc_assert (*hash_slot == NULL); -- -- new_slot = XDUP (struct lto_section_slot, ls); -- new_slot->name = new_name; -- *hash_slot = new_slot; -- return 1; --} -- --/* Read declarations and other initializations for a FILE_DATA. */ -- --static void --lto_file_finalize (struct lto_file_decl_data *file_data, lto_file *file) --{ -- const char *data; -- size_t len; -- vec -- resolutions = vNULL; -- int i; -- res_pair *rp; -- -- /* Create vector for fast access of resolution. We do this lazily -- to save memory. */ -- resolutions.safe_grow_cleared (file_data->max_index + 1); -- for (i = 0; file_data->respairs.iterate (i, &rp); i++) -- resolutions[rp->index] = rp->res; -- file_data->respairs.release (); -- -- file_data->renaming_hash_table = lto_create_renaming_table (); -- file_data->file_name = file->filename; --#ifdef ACCEL_COMPILER -- lto_input_mode_table (file_data); --#else -- file_data->mode_table = lto_mode_identity_table; --#endif -- data = lto_get_section_data (file_data, LTO_section_decls, NULL, &len); -- if (data == NULL) -- { -- internal_error ("cannot read LTO decls from %s", file_data->file_name); -- return; -- } -- /* Frees resolutions */ -- lto_read_decls (file_data, data, resolutions); -- lto_free_section_data (file_data, LTO_section_decls, NULL, data, len); --} -- --/* Finalize FILE_DATA in FILE and increase COUNT. */ -- --static int --lto_create_files_from_ids (lto_file *file, struct lto_file_decl_data *file_data, -- int *count) --{ -- lto_file_finalize (file_data, file); -- if (symtab->dump_file) -- fprintf (symtab->dump_file, -- "Creating file %s with sub id " HOST_WIDE_INT_PRINT_HEX "\n", -- file_data->file_name, file_data->id); -- (*count)++; -- return 0; --} -- --/* Generate a TREE representation for all types and external decls -- entities in FILE. -- -- Read all of the globals out of the file. Then read the cgraph -- and process the .o index into the cgraph nodes so that it can open -- the .o file to load the functions and ipa information. */ -- --static struct lto_file_decl_data * --lto_file_read (lto_file *file, FILE *resolution_file, int *count) --{ -- struct lto_file_decl_data *file_data = NULL; -- splay_tree file_ids; -- htab_t section_hash_table; -- struct lto_section_slot *section; -- struct file_data_list file_list; -- struct lto_section_list section_list; -- -- memset (§ion_list, 0, sizeof (struct lto_section_list)); -- section_hash_table = lto_obj_build_section_table (file, §ion_list); -- -- /* Find all sub modules in the object and put their sections into new hash -- tables in a splay tree. */ -- file_ids = lto_splay_tree_new (); -- memset (&file_list, 0, sizeof (struct file_data_list)); -- for (section = section_list.first; section != NULL; section = section->next) -- create_subid_section_table (section, file_ids, &file_list); -- -- /* Add resolutions to file ids */ -- lto_resolution_read (file_ids, resolution_file, file); -- -- /* Finalize each lto file for each submodule in the merged object */ -- for (file_data = file_list.first; file_data != NULL; file_data = file_data->next) -- lto_create_files_from_ids (file, file_data, count); -- -- splay_tree_delete (file_ids); -- htab_delete (section_hash_table); -- -- return file_list.first; --} -- --#if HAVE_MMAP_FILE && HAVE_SYSCONF && defined _SC_PAGE_SIZE --#define LTO_MMAP_IO 1 --#endif -- --#if LTO_MMAP_IO --/* Page size of machine is used for mmap and munmap calls. */ --static size_t page_mask; --#endif -- --/* Get the section data of length LEN from FILENAME starting at -- OFFSET. The data segment must be freed by the caller when the -- caller is finished. Returns NULL if all was not well. */ -- --static char * --lto_read_section_data (struct lto_file_decl_data *file_data, -- intptr_t offset, size_t len) --{ -- char *result; -- static int fd = -1; -- static char *fd_name; --#if LTO_MMAP_IO -- intptr_t computed_len; -- intptr_t computed_offset; -- intptr_t diff; --#endif -- -- /* Keep a single-entry file-descriptor cache. The last file we -- touched will get closed at exit. -- ??? Eventually we want to add a more sophisticated larger cache -- or rather fix function body streaming to not stream them in -- practically random order. */ -- if (fd != -1 -- && filename_cmp (fd_name, file_data->file_name) != 0) -- { -- free (fd_name); -- close (fd); -- fd = -1; -- } -- if (fd == -1) -- { -- fd = open (file_data->file_name, O_RDONLY|O_BINARY); -- if (fd == -1) -- { -- fatal_error (input_location, "Cannot open %s", file_data->file_name); -- return NULL; -- } -- fd_name = xstrdup (file_data->file_name); -- } -- --#if LTO_MMAP_IO -- if (!page_mask) -- { -- size_t page_size = sysconf (_SC_PAGE_SIZE); -- page_mask = ~(page_size - 1); -- } -- -- computed_offset = offset & page_mask; -- diff = offset - computed_offset; -- computed_len = len + diff; -- -- result = (char *) mmap (NULL, computed_len, PROT_READ, MAP_PRIVATE, -- fd, computed_offset); -- if (result == MAP_FAILED) -- { -- fatal_error (input_location, "Cannot map %s", file_data->file_name); -- return NULL; -- } -- -- return result + diff; --#else -- result = (char *) xmalloc (len); -- if (lseek (fd, offset, SEEK_SET) != offset -- || read (fd, result, len) != (ssize_t) len) -- { -- free (result); -- fatal_error (input_location, "Cannot read %s", file_data->file_name); -- result = NULL; -- } --#ifdef __MINGW32__ -- /* Native windows doesn't supports delayed unlink on opened file. So -- we close file here again. This produces higher I/O load, but at least -- it prevents to have dangling file handles preventing unlink. */ -- free (fd_name); -- fd_name = NULL; -- close (fd); -- fd = -1; --#endif -- return result; --#endif --} -+ struct cgraph_node *node; -+ timevar_id_t lto_timer; - -+ if (!quiet_flag) -+ fprintf (stderr, -+ flag_wpa ? "Materializing decls:" : "Reading function bodies:"); - --/* Get the section data from FILE_DATA of SECTION_TYPE with NAME. -- NAME will be NULL unless the section type is for a function -- body. */ - --static const char * --get_section_data (struct lto_file_decl_data *file_data, -- enum lto_section_type section_type, -- const char *name, -- size_t *len) --{ -- htab_t section_hash_table = file_data->section_hash_table; -- struct lto_section_slot *f_slot; -- struct lto_section_slot s_slot; -- const char *section_name = lto_get_section_name (section_type, name, file_data); -- char *data = NULL; -- -- *len = 0; -- s_slot.name = section_name; -- f_slot = (struct lto_section_slot *) htab_find (section_hash_table, &s_slot); -- if (f_slot) -+ FOR_EACH_FUNCTION (node) - { -- data = lto_read_section_data (file_data, f_slot->start, f_slot->len); -- *len = f_slot->len; -+ if (node->lto_file_data) -+ { -+ lto_materialize_function (node); -+ lto_stats.num_input_cgraph_nodes++; -+ } - } - -- free (CONST_CAST (char *, section_name)); -- return data; --} -- - --/* Free the section data from FILE_DATA of SECTION_TYPE with NAME that -- starts at OFFSET and has LEN bytes. */ -+ /* Start the appropriate timer depending on the mode that we are -+ operating in. */ -+ lto_timer = (flag_wpa) ? TV_WHOPR_WPA -+ : (flag_ltrans) ? TV_WHOPR_LTRANS -+ : TV_LTO; -+ timevar_push (lto_timer); - --static void --free_section_data (struct lto_file_decl_data *file_data ATTRIBUTE_UNUSED, -- enum lto_section_type section_type ATTRIBUTE_UNUSED, -- const char *name ATTRIBUTE_UNUSED, -- const char *offset, size_t len ATTRIBUTE_UNUSED) --{ --#if LTO_MMAP_IO -- intptr_t computed_len; -- intptr_t computed_offset; -- intptr_t diff; --#endif -+ current_function_decl = NULL; -+ set_cfun (NULL); - --#if LTO_MMAP_IO -- computed_offset = ((intptr_t) offset) & page_mask; -- diff = (intptr_t) offset - computed_offset; -- computed_len = len + diff; -+ if (!quiet_flag) -+ fprintf (stderr, "\n"); - -- munmap ((caddr_t) computed_offset, computed_len); --#else -- free (CONST_CAST(char *, offset)); --#endif -+ timevar_pop (lto_timer); - } - --static lto_file *current_lto_file; -- - /* Actually stream out ENCODER into TEMP_FILENAME. */ - - static void -@@ -2560,581 +411,6 @@ lto_wpa_write_files (void) - timevar_pop (TV_WHOPR_WPA_IO); - } - -- --/* If TT is a variable or function decl replace it with its -- prevailing variant. */ --#define LTO_SET_PREVAIL(tt) \ -- do {\ -- if ((tt) && VAR_OR_FUNCTION_DECL_P (tt) \ -- && (TREE_PUBLIC (tt) || DECL_EXTERNAL (tt))) \ -- { \ -- tt = lto_symtab_prevailing_decl (tt); \ -- fixed = true; \ -- } \ -- } while (0) -- --/* Ensure that TT isn't a replacable var of function decl. */ --#define LTO_NO_PREVAIL(tt) \ -- gcc_checking_assert (!(tt) || !VAR_OR_FUNCTION_DECL_P (tt)) -- --/* Given a tree T replace all fields referring to variables or functions -- with their prevailing variant. */ --static void --lto_fixup_prevailing_decls (tree t) --{ -- enum tree_code code = TREE_CODE (t); -- bool fixed = false; -- -- gcc_checking_assert (code != TREE_BINFO); -- LTO_NO_PREVAIL (TREE_TYPE (t)); -- if (CODE_CONTAINS_STRUCT (code, TS_COMMON) -- /* lto_symtab_prevail_decl use TREE_CHAIN to link to the prevailing decl. -- in the case T is a prevailed declaration we would ICE here. */ -- && !VAR_OR_FUNCTION_DECL_P (t)) -- LTO_NO_PREVAIL (TREE_CHAIN (t)); -- if (DECL_P (t)) -- { -- LTO_NO_PREVAIL (DECL_NAME (t)); -- LTO_SET_PREVAIL (DECL_CONTEXT (t)); -- if (CODE_CONTAINS_STRUCT (code, TS_DECL_COMMON)) -- { -- LTO_SET_PREVAIL (DECL_SIZE (t)); -- LTO_SET_PREVAIL (DECL_SIZE_UNIT (t)); -- LTO_SET_PREVAIL (DECL_INITIAL (t)); -- LTO_NO_PREVAIL (DECL_ATTRIBUTES (t)); -- LTO_SET_PREVAIL (DECL_ABSTRACT_ORIGIN (t)); -- } -- if (CODE_CONTAINS_STRUCT (code, TS_DECL_WITH_VIS)) -- { -- LTO_NO_PREVAIL (DECL_ASSEMBLER_NAME_RAW (t)); -- } -- if (CODE_CONTAINS_STRUCT (code, TS_DECL_NON_COMMON)) -- { -- LTO_NO_PREVAIL (DECL_RESULT_FLD (t)); -- } -- if (CODE_CONTAINS_STRUCT (code, TS_FUNCTION_DECL)) -- { -- LTO_NO_PREVAIL (DECL_ARGUMENTS (t)); -- LTO_SET_PREVAIL (DECL_FUNCTION_PERSONALITY (t)); -- LTO_NO_PREVAIL (DECL_VINDEX (t)); -- } -- if (CODE_CONTAINS_STRUCT (code, TS_FIELD_DECL)) -- { -- LTO_SET_PREVAIL (DECL_FIELD_OFFSET (t)); -- LTO_NO_PREVAIL (DECL_BIT_FIELD_TYPE (t)); -- LTO_NO_PREVAIL (DECL_QUALIFIER (t)); -- LTO_NO_PREVAIL (DECL_FIELD_BIT_OFFSET (t)); -- LTO_NO_PREVAIL (DECL_FCONTEXT (t)); -- } -- } -- else if (TYPE_P (t)) -- { -- LTO_NO_PREVAIL (TYPE_CACHED_VALUES (t)); -- LTO_SET_PREVAIL (TYPE_SIZE (t)); -- LTO_SET_PREVAIL (TYPE_SIZE_UNIT (t)); -- LTO_NO_PREVAIL (TYPE_ATTRIBUTES (t)); -- LTO_NO_PREVAIL (TYPE_NAME (t)); -- -- LTO_SET_PREVAIL (TYPE_MIN_VALUE_RAW (t)); -- LTO_SET_PREVAIL (TYPE_MAX_VALUE_RAW (t)); -- LTO_NO_PREVAIL (TYPE_LANG_SLOT_1 (t)); -- -- LTO_SET_PREVAIL (TYPE_CONTEXT (t)); -- -- LTO_NO_PREVAIL (TYPE_CANONICAL (t)); -- LTO_NO_PREVAIL (TYPE_MAIN_VARIANT (t)); -- LTO_NO_PREVAIL (TYPE_NEXT_VARIANT (t)); -- } -- else if (EXPR_P (t)) -- { -- int i; -- for (i = TREE_OPERAND_LENGTH (t) - 1; i >= 0; --i) -- LTO_SET_PREVAIL (TREE_OPERAND (t, i)); -- } -- else if (TREE_CODE (t) == CONSTRUCTOR) -- { -- unsigned i; -- tree val; -- FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (t), i, val) -- LTO_SET_PREVAIL (val); -- } -- else -- { -- switch (code) -- { -- case TREE_LIST: -- LTO_SET_PREVAIL (TREE_VALUE (t)); -- LTO_SET_PREVAIL (TREE_PURPOSE (t)); -- LTO_NO_PREVAIL (TREE_PURPOSE (t)); -- break; -- default: -- gcc_unreachable (); -- } -- } -- /* If we fixed nothing, then we missed something seen by -- mentions_vars_p. */ -- gcc_checking_assert (fixed); --} --#undef LTO_SET_PREVAIL --#undef LTO_NO_PREVAIL -- --/* Helper function of lto_fixup_decls. Walks the var and fn streams in STATE, -- replaces var and function decls with the corresponding prevailing def. */ -- --static void --lto_fixup_state (struct lto_in_decl_state *state) --{ -- unsigned i, si; -- -- /* Although we only want to replace FUNCTION_DECLs and VAR_DECLs, -- we still need to walk from all DECLs to find the reachable -- FUNCTION_DECLs and VAR_DECLs. */ -- for (si = 0; si < LTO_N_DECL_STREAMS; si++) -- { -- vec *trees = state->streams[si]; -- for (i = 0; i < vec_safe_length (trees); i++) -- { -- tree t = (*trees)[i]; -- if (flag_checking && TYPE_P (t)) -- verify_type (t); -- if (VAR_OR_FUNCTION_DECL_P (t) -- && (TREE_PUBLIC (t) || DECL_EXTERNAL (t))) -- (*trees)[i] = lto_symtab_prevailing_decl (t); -- } -- } --} -- --/* Fix the decls from all FILES. Replaces each decl with the corresponding -- prevailing one. */ -- --static void --lto_fixup_decls (struct lto_file_decl_data **files) --{ -- unsigned int i; -- tree t; -- -- if (tree_with_vars) -- FOR_EACH_VEC_ELT ((*tree_with_vars), i, t) -- lto_fixup_prevailing_decls (t); -- -- for (i = 0; files[i]; i++) -- { -- struct lto_file_decl_data *file = files[i]; -- struct lto_in_decl_state *state = file->global_decl_state; -- lto_fixup_state (state); -- -- hash_table::iterator iter; -- lto_in_decl_state *elt; -- FOR_EACH_HASH_TABLE_ELEMENT (*file->function_decl_states, elt, -- lto_in_decl_state *, iter) -- lto_fixup_state (elt); -- } --} -- --static GTY((length ("lto_stats.num_input_files + 1"))) struct lto_file_decl_data **all_file_decl_data; -- --/* Turn file datas for sub files into a single array, so that they look -- like separate files for further passes. */ -- --static void --lto_flatten_files (struct lto_file_decl_data **orig, int count, int last_file_ix) --{ -- struct lto_file_decl_data *n, *next; -- int i, k; -- -- lto_stats.num_input_files = count; -- all_file_decl_data -- = ggc_cleared_vec_alloc (count + 1); -- /* Set the hooks so that all of the ipa passes can read in their data. */ -- lto_set_in_hooks (all_file_decl_data, get_section_data, free_section_data); -- for (i = 0, k = 0; i < last_file_ix; i++) -- { -- for (n = orig[i]; n != NULL; n = next) -- { -- all_file_decl_data[k++] = n; -- next = n->next; -- n->next = NULL; -- } -- } -- all_file_decl_data[k] = NULL; -- gcc_assert (k == count); --} -- --/* Input file data before flattening (i.e. splitting them to subfiles to support -- incremental linking. */ --static int real_file_count; --static GTY((length ("real_file_count + 1"))) struct lto_file_decl_data **real_file_decl_data; -- --static void print_lto_report_1 (void); -- --/* Read all the symbols from the input files FNAMES. NFILES is the -- number of files requested in the command line. Instantiate a -- global call graph by aggregating all the sub-graphs found in each -- file. */ -- --static void --read_cgraph_and_symbols (unsigned nfiles, const char **fnames) --{ -- unsigned int i, last_file_ix; -- FILE *resolution; -- int count = 0; -- struct lto_file_decl_data **decl_data; -- symtab_node *snode; -- -- symtab->initialize (); -- -- timevar_push (TV_IPA_LTO_DECL_IN); -- --#ifdef ACCEL_COMPILER -- section_name_prefix = OFFLOAD_SECTION_NAME_PREFIX; -- lto_stream_offload_p = true; --#endif -- -- real_file_decl_data -- = decl_data = ggc_cleared_vec_alloc (nfiles + 1); -- real_file_count = nfiles; -- -- /* Read the resolution file. */ -- resolution = NULL; -- if (resolution_file_name) -- { -- int t; -- unsigned num_objects; -- -- resolution = fopen (resolution_file_name, "r"); -- if (resolution == NULL) -- fatal_error (input_location, -- "could not open symbol resolution file: %m"); -- -- t = fscanf (resolution, "%u", &num_objects); -- gcc_assert (t == 1); -- -- /* True, since the plugin splits the archives. */ -- gcc_assert (num_objects == nfiles); -- } -- symtab->state = LTO_STREAMING; -- -- canonical_type_hash_cache = new hash_map (251); -- gimple_canonical_types = htab_create (16381, gimple_canonical_type_hash, -- gimple_canonical_type_eq, NULL); -- gcc_obstack_init (&tree_scc_hash_obstack); -- tree_scc_hash = new hash_table (4096); -- -- /* Register the common node types with the canonical type machinery so -- we properly share alias-sets across languages and TUs. Do not -- expose the common nodes as type merge target - those that should be -- are already exposed so by pre-loading the LTO streamer caches. -- Do two passes - first clear TYPE_CANONICAL and then re-compute it. */ -- for (i = 0; i < itk_none; ++i) -- lto_register_canonical_types (integer_types[i], true); -- for (i = 0; i < stk_type_kind_last; ++i) -- lto_register_canonical_types (sizetype_tab[i], true); -- for (i = 0; i < TI_MAX; ++i) -- lto_register_canonical_types (global_trees[i], true); -- for (i = 0; i < itk_none; ++i) -- lto_register_canonical_types (integer_types[i], false); -- for (i = 0; i < stk_type_kind_last; ++i) -- lto_register_canonical_types (sizetype_tab[i], false); -- for (i = 0; i < TI_MAX; ++i) -- lto_register_canonical_types (global_trees[i], false); -- -- if (!quiet_flag) -- fprintf (stderr, "Reading object files:"); -- -- /* Read all of the object files specified on the command line. */ -- for (i = 0, last_file_ix = 0; i < nfiles; ++i) -- { -- struct lto_file_decl_data *file_data = NULL; -- if (!quiet_flag) -- { -- fprintf (stderr, " %s", fnames[i]); -- fflush (stderr); -- } -- -- current_lto_file = lto_obj_file_open (fnames[i], false); -- if (!current_lto_file) -- break; -- -- file_data = lto_file_read (current_lto_file, resolution, &count); -- if (!file_data) -- { -- lto_obj_file_close (current_lto_file); -- free (current_lto_file); -- current_lto_file = NULL; -- break; -- } -- -- decl_data[last_file_ix++] = file_data; -- -- lto_obj_file_close (current_lto_file); -- free (current_lto_file); -- current_lto_file = NULL; -- } -- -- lto_flatten_files (decl_data, count, last_file_ix); -- lto_stats.num_input_files = count; -- ggc_free(decl_data); -- real_file_decl_data = NULL; -- -- if (resolution_file_name) -- fclose (resolution); -- -- /* Show the LTO report before launching LTRANS. */ -- if (flag_lto_report || (flag_wpa && flag_lto_report_wpa)) -- print_lto_report_1 (); -- -- /* Free gimple type merging datastructures. */ -- delete tree_scc_hash; -- tree_scc_hash = NULL; -- obstack_free (&tree_scc_hash_obstack, NULL); -- htab_delete (gimple_canonical_types); -- gimple_canonical_types = NULL; -- delete canonical_type_hash_cache; -- canonical_type_hash_cache = NULL; -- -- /* At this stage we know that majority of GGC memory is reachable. -- Growing the limits prevents unnecesary invocation of GGC. */ -- ggc_grow (); -- ggc_collect (); -- -- /* Set the hooks so that all of the ipa passes can read in their data. */ -- lto_set_in_hooks (all_file_decl_data, get_section_data, free_section_data); -- -- timevar_pop (TV_IPA_LTO_DECL_IN); -- -- if (!quiet_flag) -- fprintf (stderr, "\nReading the callgraph\n"); -- -- timevar_push (TV_IPA_LTO_CGRAPH_IO); -- /* Read the symtab. */ -- input_symtab (); -- -- input_offload_tables (!flag_ltrans); -- -- /* Store resolutions into the symbol table. */ -- -- FOR_EACH_SYMBOL (snode) -- if (snode->externally_visible && snode->real_symbol_p () -- && snode->lto_file_data && snode->lto_file_data->resolution_map -- && !(TREE_CODE (snode->decl) == FUNCTION_DECL -- && fndecl_built_in_p (snode->decl)) -- && !(VAR_P (snode->decl) && DECL_HARD_REGISTER (snode->decl))) -- { -- ld_plugin_symbol_resolution_t *res; -- -- res = snode->lto_file_data->resolution_map->get (snode->decl); -- if (!res || *res == LDPR_UNKNOWN) -- { -- if (snode->output_to_lto_symbol_table_p ()) -- fatal_error (input_location, "missing resolution data for %s", -- IDENTIFIER_POINTER -- (DECL_ASSEMBLER_NAME (snode->decl))); -- } -- else -- snode->resolution = *res; -- } -- for (i = 0; all_file_decl_data[i]; i++) -- if (all_file_decl_data[i]->resolution_map) -- { -- delete all_file_decl_data[i]->resolution_map; -- all_file_decl_data[i]->resolution_map = NULL; -- } -- -- timevar_pop (TV_IPA_LTO_CGRAPH_IO); -- -- if (!quiet_flag) -- fprintf (stderr, "Merging declarations\n"); -- -- timevar_push (TV_IPA_LTO_DECL_MERGE); -- /* Merge global decls. In ltrans mode we read merged cgraph, we do not -- need to care about resolving symbols again, we only need to replace -- duplicated declarations read from the callgraph and from function -- sections. */ -- if (!flag_ltrans) -- { -- lto_symtab_merge_decls (); -- -- /* If there were errors during symbol merging bail out, we have no -- good way to recover here. */ -- if (seen_error ()) -- fatal_error (input_location, -- "errors during merging of translation units"); -- -- /* Fixup all decls. */ -- lto_fixup_decls (all_file_decl_data); -- } -- if (tree_with_vars) -- ggc_free (tree_with_vars); -- tree_with_vars = NULL; -- ggc_collect (); -- -- timevar_pop (TV_IPA_LTO_DECL_MERGE); -- /* Each pass will set the appropriate timer. */ -- -- if (!quiet_flag) -- fprintf (stderr, "Reading summaries\n"); -- -- /* Read the IPA summary data. */ -- if (flag_ltrans) -- ipa_read_optimization_summaries (); -- else -- ipa_read_summaries (); -- -- for (i = 0; all_file_decl_data[i]; i++) -- { -- gcc_assert (all_file_decl_data[i]->symtab_node_encoder); -- lto_symtab_encoder_delete (all_file_decl_data[i]->symtab_node_encoder); -- all_file_decl_data[i]->symtab_node_encoder = NULL; -- lto_free_function_in_decl_state (all_file_decl_data[i]->global_decl_state); -- all_file_decl_data[i]->global_decl_state = NULL; -- all_file_decl_data[i]->current_decl_state = NULL; -- } -- -- if (!flag_ltrans) -- { -- /* Finally merge the cgraph according to the decl merging decisions. */ -- timevar_push (TV_IPA_LTO_CGRAPH_MERGE); -- -- gcc_assert (!dump_file); -- dump_file = dump_begin (lto_link_dump_id, NULL); -- -- if (dump_file) -- { -- fprintf (dump_file, "Before merging:\n"); -- symtab->dump (dump_file); -- } -- lto_symtab_merge_symbols (); -- /* Removal of unreachable symbols is needed to make verify_symtab to pass; -- we are still having duplicated comdat groups containing local statics. -- We could also just remove them while merging. */ -- symtab->remove_unreachable_nodes (dump_file); -- ggc_collect (); -- -- if (dump_file) -- dump_end (lto_link_dump_id, dump_file); -- dump_file = NULL; -- timevar_pop (TV_IPA_LTO_CGRAPH_MERGE); -- } -- symtab->state = IPA_SSA; -- /* All node removals happening here are useless, because -- WPA should not stream them. Still always perform remove_unreachable_nodes -- because we may reshape clone tree, get rid of dead masters of inline -- clones and remove symbol entries for read-only variables we keep around -- only to be able to constant fold them. */ -- if (flag_ltrans) -- { -- if (symtab->dump_file) -- symtab->dump (symtab->dump_file); -- symtab->remove_unreachable_nodes (symtab->dump_file); -- } -- -- /* Indicate that the cgraph is built and ready. */ -- symtab->function_flags_ready = true; -- -- ggc_free (all_file_decl_data); -- all_file_decl_data = NULL; --} -- -- --/* Materialize all the bodies for all the nodes in the callgraph. */ -- --static void --materialize_cgraph (void) --{ -- struct cgraph_node *node; -- timevar_id_t lto_timer; -- -- if (!quiet_flag) -- fprintf (stderr, -- flag_wpa ? "Materializing decls:" : "Reading function bodies:"); -- -- -- FOR_EACH_FUNCTION (node) -- { -- if (node->lto_file_data) -- { -- lto_materialize_function (node); -- lto_stats.num_input_cgraph_nodes++; -- } -- } -- -- -- /* Start the appropriate timer depending on the mode that we are -- operating in. */ -- lto_timer = (flag_wpa) ? TV_WHOPR_WPA -- : (flag_ltrans) ? TV_WHOPR_LTRANS -- : TV_LTO; -- timevar_push (lto_timer); -- -- current_function_decl = NULL; -- set_cfun (NULL); -- -- if (!quiet_flag) -- fprintf (stderr, "\n"); -- -- timevar_pop (lto_timer); --} -- -- --/* Show various memory usage statistics related to LTO. */ --static void --print_lto_report_1 (void) --{ -- const char *pfx = (flag_lto) ? "LTO" : (flag_wpa) ? "WPA" : "LTRANS"; -- fprintf (stderr, "%s statistics\n", pfx); -- -- fprintf (stderr, "[%s] read %lu SCCs of average size %f\n", -- pfx, num_sccs_read, total_scc_size / (double)num_sccs_read); -- fprintf (stderr, "[%s] %lu tree bodies read in total\n", pfx, total_scc_size); -- if (flag_wpa && tree_scc_hash) -- { -- fprintf (stderr, "[%s] tree SCC table: size %ld, %ld elements, " -- "collision ratio: %f\n", pfx, -- (long) tree_scc_hash->size (), -- (long) tree_scc_hash->elements (), -- tree_scc_hash->collisions ()); -- hash_table::iterator hiter; -- tree_scc *scc, *max_scc = NULL; -- unsigned max_length = 0; -- FOR_EACH_HASH_TABLE_ELEMENT (*tree_scc_hash, scc, x, hiter) -- { -- unsigned length = 0; -- tree_scc *s = scc; -- for (; s; s = s->next) -- length++; -- if (length > max_length) -- { -- max_length = length; -- max_scc = scc; -- } -- } -- fprintf (stderr, "[%s] tree SCC max chain length %u (size %u)\n", -- pfx, max_length, max_scc->len); -- fprintf (stderr, "[%s] Compared %lu SCCs, %lu collisions (%f)\n", pfx, -- num_scc_compares, num_scc_compare_collisions, -- num_scc_compare_collisions / (double) num_scc_compares); -- fprintf (stderr, "[%s] Merged %lu SCCs\n", pfx, num_sccs_merged); -- fprintf (stderr, "[%s] Merged %lu tree bodies\n", pfx, -- total_scc_size_merged); -- fprintf (stderr, "[%s] Merged %lu types\n", pfx, num_merged_types); -- fprintf (stderr, "[%s] %lu types prevailed (%lu associated trees)\n", -- pfx, num_prevailing_types, num_type_scc_trees); -- fprintf (stderr, "[%s] GIMPLE canonical type table: size %ld, " -- "%ld elements, %ld searches, %ld collisions (ratio: %f)\n", pfx, -- (long) htab_size (gimple_canonical_types), -- (long) htab_elements (gimple_canonical_types), -- (long) gimple_canonical_types->searches, -- (long) gimple_canonical_types->collisions, -- htab_collisions (gimple_canonical_types)); -- fprintf (stderr, "[%s] GIMPLE canonical type pointer-map: " -- "%lu elements, %ld searches\n", pfx, -- num_canonical_type_hash_entries, -- num_canonical_type_hash_queries); -- } -- -- print_lto_report (pfx); --} -- - /* Perform whole program analysis (WPA) on the callgraph and write out the - optimization plan. */ - -@@ -3262,64 +538,6 @@ do_whole_program_analysis (void) - dump_memory_report (true); - } - -- --static GTY(()) tree lto_eh_personality_decl; -- --/* Return the LTO personality function decl. */ -- --tree --lto_eh_personality (void) --{ -- if (!lto_eh_personality_decl) -- { -- /* Use the first personality DECL for our personality if we don't -- support multiple ones. This ensures that we don't artificially -- create the need for them in a single-language program. */ -- if (first_personality_decl && !dwarf2out_do_cfi_asm ()) -- lto_eh_personality_decl = first_personality_decl; -- else -- lto_eh_personality_decl = lhd_gcc_personality (); -- } -- -- return lto_eh_personality_decl; --} -- --/* Set the process name based on the LTO mode. */ -- --static void --lto_process_name (void) --{ -- if (flag_lto) -- setproctitle (flag_incremental_link == INCREMENTAL_LINK_LTO -- ? "lto1-inclink" : "lto1-lto"); -- if (flag_wpa) -- setproctitle ("lto1-wpa"); -- if (flag_ltrans) -- setproctitle ("lto1-ltrans"); --} -- -- --/* Initialize the LTO front end. */ -- --static void --lto_init (void) --{ -- lto_process_name (); -- lto_streamer_hooks_init (); -- lto_reader_init (); -- lto_set_in_hooks (NULL, get_section_data, free_section_data); -- memset (<o_stats, 0, sizeof (lto_stats)); -- bitmap_obstack_initialize (NULL); -- gimple_register_cfg_hooks (); --#ifndef ACCEL_COMPILER -- unsigned char *table -- = ggc_vec_alloc (MAX_MACHINE_MODE); -- for (int m = 0; m < MAX_MACHINE_MODE; m++) -- table[m] = m; -- lto_mode_identity_table = table; --#endif --} -- - /* Create artificial pointers for "omp declare target link" vars. */ - - static void -@@ -3351,7 +569,6 @@ offload_handle_link_vars (void) - #endif - } - -- - /* Main entry point for the GIMPLE front end. This front end has - three main personalities: - -@@ -3386,7 +603,7 @@ lto_main (void) - timevar_start (TV_PHASE_SETUP); - - /* Initialize the LTO front end. */ -- lto_init (); -+ lto_fe_init (); - - timevar_stop (TV_PHASE_SETUP); - timevar_start (TV_PHASE_STREAM_IN); -@@ -3439,5 +656,3 @@ lto_main (void) - timevar_start (TV_PHASE_PARSING); - timevar_push (TV_PARSE_GLOBAL); - } -- --#include "gt-lto-lto.h" -diff --git a/gcc/machmode.h b/gcc/machmode.h -index d564f9c64..a507ed66c 100644 ---- a/gcc/machmode.h -+++ b/gcc/machmode.h -@@ -244,14 +244,15 @@ class opt_mode - public: - enum from_int { dummy = MAX_MACHINE_MODE }; - -- ALWAYS_INLINE opt_mode () : m_mode (E_VOIDmode) {} -- ALWAYS_INLINE opt_mode (const T &m) : m_mode (m) {} -+ ALWAYS_INLINE CONSTEXPR opt_mode () : m_mode (E_VOIDmode) {} -+ ALWAYS_INLINE CONSTEXPR opt_mode (const T &m) : m_mode (m) {} - template -- ALWAYS_INLINE opt_mode (const U &m) : m_mode (T (m)) {} -- ALWAYS_INLINE opt_mode (from_int m) : m_mode (machine_mode (m)) {} -+ ALWAYS_INLINE CONSTEXPR opt_mode (const U &m) : m_mode (T (m)) {} -+ ALWAYS_INLINE CONSTEXPR opt_mode (from_int m) : m_mode (machine_mode (m)) {} - - machine_mode else_void () const; -- machine_mode else_blk () const; -+ machine_mode else_blk () const { return else_mode (BLKmode); } -+ machine_mode else_mode (machine_mode) const; - T require () const; - - bool exists () const; -@@ -274,13 +275,13 @@ opt_mode::else_void () const - return m_mode; - } - --/* If the T exists, return its enum value, otherwise return E_BLKmode. */ -+/* If the T exists, return its enum value, otherwise return FALLBACK. */ - - template - inline machine_mode --opt_mode::else_blk () const -+opt_mode::else_mode (machine_mode fallback) const - { -- return m_mode == E_VOIDmode ? E_BLKmode : m_mode; -+ return m_mode == E_VOIDmode ? fallback : m_mode; - } - - /* Assert that the object contains a T and return it. */ -@@ -326,8 +327,12 @@ struct pod_mode - typedef typename T::measurement_type measurement_type; - - machine_mode m_mode; -- ALWAYS_INLINE operator machine_mode () const { return m_mode; } -- ALWAYS_INLINE operator T () const { return from_int (m_mode); } -+ ALWAYS_INLINE CONSTEXPR -+ operator machine_mode () const { return m_mode; } -+ -+ ALWAYS_INLINE CONSTEXPR -+ operator T () const { return from_int (m_mode); } -+ - ALWAYS_INLINE pod_mode &operator = (const T &m) { m_mode = m; return *this; } - }; - -@@ -405,8 +410,11 @@ public: - typedef unsigned short measurement_type; - - ALWAYS_INLINE scalar_int_mode () {} -- ALWAYS_INLINE scalar_int_mode (from_int m) : m_mode (machine_mode (m)) {} -- ALWAYS_INLINE operator machine_mode () const { return m_mode; } -+ -+ ALWAYS_INLINE CONSTEXPR -+ scalar_int_mode (from_int m) : m_mode (machine_mode (m)) {} -+ -+ ALWAYS_INLINE CONSTEXPR operator machine_mode () const { return m_mode; } - - static bool includes_p (machine_mode); - -@@ -430,8 +438,11 @@ public: - typedef unsigned short measurement_type; - - ALWAYS_INLINE scalar_float_mode () {} -- ALWAYS_INLINE scalar_float_mode (from_int m) : m_mode (machine_mode (m)) {} -- ALWAYS_INLINE operator machine_mode () const { return m_mode; } -+ -+ ALWAYS_INLINE CONSTEXPR -+ scalar_float_mode (from_int m) : m_mode (machine_mode (m)) {} -+ -+ ALWAYS_INLINE CONSTEXPR operator machine_mode () const { return m_mode; } - - static bool includes_p (machine_mode); - -@@ -455,11 +466,20 @@ public: - typedef unsigned short measurement_type; - - ALWAYS_INLINE scalar_mode () {} -- ALWAYS_INLINE scalar_mode (from_int m) : m_mode (machine_mode (m)) {} -- ALWAYS_INLINE scalar_mode (const scalar_int_mode &m) : m_mode (m) {} -- ALWAYS_INLINE scalar_mode (const scalar_float_mode &m) : m_mode (m) {} -- ALWAYS_INLINE scalar_mode (const scalar_int_mode_pod &m) : m_mode (m) {} -- ALWAYS_INLINE operator machine_mode () const { return m_mode; } -+ -+ ALWAYS_INLINE CONSTEXPR -+ scalar_mode (from_int m) : m_mode (machine_mode (m)) {} -+ -+ ALWAYS_INLINE CONSTEXPR -+ scalar_mode (const scalar_int_mode &m) : m_mode (m) {} -+ -+ ALWAYS_INLINE CONSTEXPR -+ scalar_mode (const scalar_float_mode &m) : m_mode (m) {} -+ -+ ALWAYS_INLINE CONSTEXPR -+ scalar_mode (const scalar_int_mode_pod &m) : m_mode (m) {} -+ -+ ALWAYS_INLINE CONSTEXPR operator machine_mode () const { return m_mode; } - - static bool includes_p (machine_mode); - -@@ -496,8 +516,11 @@ public: - typedef unsigned short measurement_type; - - ALWAYS_INLINE complex_mode () {} -- ALWAYS_INLINE complex_mode (from_int m) : m_mode (machine_mode (m)) {} -- ALWAYS_INLINE operator machine_mode () const { return m_mode; } -+ -+ ALWAYS_INLINE CONSTEXPR -+ complex_mode (from_int m) : m_mode (machine_mode (m)) {} -+ -+ ALWAYS_INLINE CONSTEXPR operator machine_mode () const { return m_mode; } - - static bool includes_p (machine_mode); - -@@ -766,14 +789,29 @@ public: - typedef unsigned short measurement_type; - - ALWAYS_INLINE fixed_size_mode () {} -- ALWAYS_INLINE fixed_size_mode (from_int m) : m_mode (machine_mode (m)) {} -- ALWAYS_INLINE fixed_size_mode (const scalar_mode &m) : m_mode (m) {} -- ALWAYS_INLINE fixed_size_mode (const scalar_int_mode &m) : m_mode (m) {} -- ALWAYS_INLINE fixed_size_mode (const scalar_float_mode &m) : m_mode (m) {} -- ALWAYS_INLINE fixed_size_mode (const scalar_mode_pod &m) : m_mode (m) {} -- ALWAYS_INLINE fixed_size_mode (const scalar_int_mode_pod &m) : m_mode (m) {} -- ALWAYS_INLINE fixed_size_mode (const complex_mode &m) : m_mode (m) {} -- ALWAYS_INLINE operator machine_mode () const { return m_mode; } -+ -+ ALWAYS_INLINE CONSTEXPR -+ fixed_size_mode (from_int m) : m_mode (machine_mode (m)) {} -+ -+ ALWAYS_INLINE CONSTEXPR -+ fixed_size_mode (const scalar_mode &m) : m_mode (m) {} -+ -+ ALWAYS_INLINE CONSTEXPR -+ fixed_size_mode (const scalar_int_mode &m) : m_mode (m) {} -+ -+ ALWAYS_INLINE CONSTEXPR -+ fixed_size_mode (const scalar_float_mode &m) : m_mode (m) {} -+ -+ ALWAYS_INLINE CONSTEXPR -+ fixed_size_mode (const scalar_mode_pod &m) : m_mode (m) {} -+ -+ ALWAYS_INLINE CONSTEXPR -+ fixed_size_mode (const scalar_int_mode_pod &m) : m_mode (m) {} -+ -+ ALWAYS_INLINE CONSTEXPR -+ fixed_size_mode (const complex_mode &m) : m_mode (m) {} -+ -+ ALWAYS_INLINE CONSTEXPR operator machine_mode () const { return m_mode; } - - static bool includes_p (machine_mode); - -diff --git a/gcc/match.pd b/gcc/match.pd -index f7e192d9b..facc43387 100644 ---- a/gcc/match.pd -+++ b/gcc/match.pd -@@ -82,12 +82,14 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) - plus minus - mult trunc_div trunc_mod rdiv - min max -- bit_and bit_ior bit_xor) -+ bit_and bit_ior bit_xor -+ lshift rshift) - (define_operator_list COND_BINARY - IFN_COND_ADD IFN_COND_SUB - IFN_COND_MUL IFN_COND_DIV IFN_COND_MOD IFN_COND_RDIV - IFN_COND_MIN IFN_COND_MAX -- IFN_COND_AND IFN_COND_IOR IFN_COND_XOR) -+ IFN_COND_AND IFN_COND_IOR IFN_COND_XOR -+ IFN_COND_SHL IFN_COND_SHR) - - /* Same for ternary operations. */ - (define_operator_list UNCOND_TERNARY -@@ -5378,3 +5380,86 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) - (bit_and:elt_type - (BIT_FIELD_REF:elt_type @0 { size; } { pos; }) - { elt; }))))))) -+ -+(simplify -+ (vec_perm @0 @1 VECTOR_CST@2) -+ (with -+ { -+ tree op0 = @0, op1 = @1, op2 = @2; -+ -+ /* Build a vector of integers from the tree mask. */ -+ vec_perm_builder builder; -+ if (!tree_to_vec_perm_builder (&builder, op2)) -+ return NULL_TREE; -+ -+ /* Create a vec_perm_indices for the integer vector. */ -+ poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (type); -+ bool single_arg = (op0 == op1); -+ vec_perm_indices sel (builder, single_arg ? 1 : 2, nelts); -+ } -+ (if (sel.series_p (0, 1, 0, 1)) -+ { op0; } -+ (if (sel.series_p (0, 1, nelts, 1)) -+ { op1; } -+ (with -+ { -+ if (!single_arg) -+ { -+ if (sel.all_from_input_p (0)) -+ op1 = op0; -+ else if (sel.all_from_input_p (1)) -+ { -+ op0 = op1; -+ sel.rotate_inputs (1); -+ } -+ } -+ gassign *def; -+ tree cop0 = op0, cop1 = op1; -+ if (TREE_CODE (op0) == SSA_NAME -+ && (def = dyn_cast (SSA_NAME_DEF_STMT (op0))) -+ && gimple_assign_rhs_code (def) == CONSTRUCTOR) -+ cop0 = gimple_assign_rhs1 (def); -+ if (TREE_CODE (op1) == SSA_NAME -+ && (def = dyn_cast (SSA_NAME_DEF_STMT (op1))) -+ && gimple_assign_rhs_code (def) == CONSTRUCTOR) -+ cop1 = gimple_assign_rhs1 (def); -+ -+ tree t; -+ } -+ (if ((TREE_CODE (cop0) == VECTOR_CST -+ || TREE_CODE (cop0) == CONSTRUCTOR) -+ && (TREE_CODE (cop1) == VECTOR_CST -+ || TREE_CODE (cop1) == CONSTRUCTOR) -+ && (t = fold_vec_perm (type, cop0, cop1, sel))) -+ { t; } -+ (with -+ { -+ bool changed = (op0 == op1 && !single_arg); -+ -+ /* Generate a canonical form of the selector. */ -+ if (sel.encoding () != builder) -+ { -+ /* Some targets are deficient and fail to expand a single -+ argument permutation while still allowing an equivalent -+ 2-argument version. */ -+ tree oldop2 = op2; -+ if (sel.ninputs () == 2 -+ || can_vec_perm_const_p (TYPE_MODE (type), sel, false)) -+ op2 = vec_perm_indices_to_tree (TREE_TYPE (op2), sel); -+ else -+ { -+ vec_perm_indices sel2 (builder, 2, nelts); -+ if (can_vec_perm_const_p (TYPE_MODE (type), sel2, false)) -+ op2 = vec_perm_indices_to_tree (TREE_TYPE (op2), sel2); -+ else -+ /* Not directly supported with either encoding, -+ so use the preferred form. */ -+ op2 = vec_perm_indices_to_tree (TREE_TYPE (op2), sel); -+ } -+ /* Differences in the encoder do not necessarily mean -+ differences in the resulting vector. */ -+ changed = !operand_equal_p (op2, oldop2, 0); -+ } -+ } -+ (if (changed) -+ (vec_perm { op0; } { op1; } { op2; }))))))))) -diff --git a/gcc/mode-switching.c b/gcc/mode-switching.c -index 2ff21a400..4a34d4a2b 100644 ---- a/gcc/mode-switching.c -+++ b/gcc/mode-switching.c -@@ -165,7 +165,7 @@ new_seginfo (int mode, rtx_insn *insn, int bb, HARD_REG_SET regs_live) - ptr->insn_ptr = insn; - ptr->bbnum = bb; - ptr->next = NULL; -- COPY_HARD_REG_SET (ptr->regs_live, regs_live); -+ ptr->regs_live = regs_live; - return ptr; - } - -@@ -637,7 +637,7 @@ optimize_mode_switching (void) - if (REG_NOTE_KIND (link) == REG_DEAD) - reg_dies (XEXP (link, 0), &live_now); - -- note_stores (PATTERN (insn), reg_becomes_live, &live_now); -+ note_stores (insn, reg_becomes_live, &live_now); - for (link = REG_NOTES (insn); link; link = XEXP (link, 1)) - if (REG_NOTE_KIND (link) == REG_UNUSED) - reg_dies (XEXP (link, 0), &live_now); -diff --git a/gcc/omp-simd-clone.c b/gcc/omp-simd-clone.c -index 10490f34f..d884514cc 100644 ---- a/gcc/omp-simd-clone.c -+++ b/gcc/omp-simd-clone.c -@@ -461,8 +461,7 @@ simd_clone_create (struct cgraph_node *old_node) - if (new_node == NULL) - return new_node; - -- DECL_BUILT_IN_CLASS (new_node->decl) = NOT_BUILT_IN; -- DECL_FUNCTION_CODE (new_node->decl) = (enum built_in_function) 0; -+ set_decl_built_in_function (new_node->decl, NOT_BUILT_IN, 0); - TREE_PUBLIC (new_node->decl) = TREE_PUBLIC (old_node->decl); - DECL_COMDAT (new_node->decl) = DECL_COMDAT (old_node->decl); - DECL_WEAK (new_node->decl) = DECL_WEAK (old_node->decl); -diff --git a/gcc/opt-suggestions.c b/gcc/opt-suggestions.c -index a820c78ff..1ec94203c 100644 ---- a/gcc/opt-suggestions.c -+++ b/gcc/opt-suggestions.c -@@ -307,7 +307,6 @@ test_completion_valid_options (option_proposer &proposer) - "-Wassign-intercept", - "-Wno-format-security", - "-fno-sched-stalled-insns", -- "-fbtr-bb-exclusive", - "-fno-tree-tail-merge", - "-Wlong-long", - "-Wno-unused-but-set-parameter", -diff --git a/gcc/optabs-tree.c b/gcc/optabs-tree.c -index 341e02bd5..7bad9c87b 100644 ---- a/gcc/optabs-tree.c -+++ b/gcc/optabs-tree.c -@@ -267,20 +267,16 @@ optab_for_tree_code (enum tree_code code, const_tree type, - - Convert operations we currently support directly are FIX_TRUNC and FLOAT. - This function checks if these operations are supported -- by the target platform either directly (via vector tree-codes), or via -- target builtins. -+ by the target platform directly (via vector tree-codes). - - Output: - - CODE1 is code of vector operation to be used when -- vectorizing the operation, if available. -- - DECL is decl of target builtin functions to be used -- when vectorizing the operation, if available. In this case, -- CODE1 is CALL_EXPR. */ -+ vectorizing the operation, if available. */ - - bool - supportable_convert_operation (enum tree_code code, - tree vectype_out, tree vectype_in, -- tree *decl, enum tree_code *code1) -+ enum tree_code *code1) - { - machine_mode m1,m2; - bool truncp; -@@ -314,15 +310,6 @@ supportable_convert_operation (enum tree_code code, - return true; - } - -- /* Now check for builtin. */ -- if (targetm.vectorize.builtin_conversion -- && targetm.vectorize.builtin_conversion (code, vectype_out, vectype_in)) -- { -- *code1 = CALL_EXPR; -- *decl = targetm.vectorize.builtin_conversion (code, vectype_out, -- vectype_in); -- return true; -- } - return false; - } - -diff --git a/gcc/optabs-tree.h b/gcc/optabs-tree.h -index 5e4848997..dac350142 100644 ---- a/gcc/optabs-tree.h -+++ b/gcc/optabs-tree.h -@@ -36,7 +36,7 @@ enum optab_subtype - the second argument. The third argument distinguishes between the types of - vector shifts and rotates. */ - optab optab_for_tree_code (enum tree_code, const_tree, enum optab_subtype); --bool supportable_convert_operation (enum tree_code, tree, tree, tree *, -+bool supportable_convert_operation (enum tree_code, tree, tree, - enum tree_code *); - bool expand_vec_cmp_expr_p (tree, tree, enum tree_code); - bool expand_vec_cond_expr_p (tree, tree, enum tree_code); -diff --git a/gcc/optabs.c b/gcc/optabs.c -index c2c1274eb..d9788d248 100644 ---- a/gcc/optabs.c -+++ b/gcc/optabs.c -@@ -3727,7 +3727,7 @@ emit_libcall_block_1 (rtx_insn *insns, rtx target, rtx result, rtx equiv, - data.first = insns; - data.insn = insn; - data.must_stay = 0; -- note_stores (PATTERN (insn), no_conflict_move_test, &data); -+ note_stores (insn, no_conflict_move_test, &data); - if (! data.must_stay) - { - if (PREV_INSN (insn)) -@@ -6428,7 +6428,7 @@ expand_atomic_compare_and_swap (rtx *ptarget_bool, rtx *ptarget_oval, - /* Otherwise, work out if the compare-and-swap succeeded. */ - cc_reg = NULL_RTX; - if (have_insn_for (COMPARE, CCmode)) -- note_stores (PATTERN (get_last_insn ()), find_cc_set, &cc_reg); -+ note_stores (get_last_insn (), find_cc_set, &cc_reg); - if (cc_reg) - { - target_bool = emit_store_flag_force (target_bool, EQ, cc_reg, -@@ -7181,18 +7181,16 @@ static bool - maybe_legitimize_operand (enum insn_code icode, unsigned int opno, - struct expand_operand *op) - { -- machine_mode mode, imode; -- bool old_volatile_ok, result; -+ machine_mode mode, imode, tmode; - - mode = op->mode; - switch (op->type) - { - case EXPAND_FIXED: -- old_volatile_ok = volatile_ok; -- volatile_ok = true; -- result = maybe_legitimize_operand_same_code (icode, opno, op); -- volatile_ok = old_volatile_ok; -- return result; -+ { -+ temporary_volatile_ok v (true); -+ return maybe_legitimize_operand_same_code (icode, opno, op); -+ } - - case EXPAND_OUTPUT: - gcc_assert (mode != VOIDmode); -@@ -7230,9 +7228,17 @@ maybe_legitimize_operand (enum insn_code icode, unsigned int opno, - gcc_assert (mode != VOIDmode); - - imode = insn_data[(int) icode].operand[opno].mode; -+ tmode = (VECTOR_MODE_P (imode) && !VECTOR_MODE_P (mode) -+ ? GET_MODE_INNER (imode) : imode); -+ if (tmode != VOIDmode && tmode != mode) -+ { -+ op->value = convert_modes (tmode, mode, op->value, op->unsigned_p); -+ mode = tmode; -+ } - if (imode != VOIDmode && imode != mode) - { -- op->value = convert_modes (imode, mode, op->value, op->unsigned_p); -+ gcc_assert (VECTOR_MODE_P (imode) && !VECTOR_MODE_P (mode)); -+ op->value = expand_vector_broadcast (imode, op->value); - mode = imode; - } - goto input; -diff --git a/gcc/optabs.def b/gcc/optabs.def -index 8af3a2f43..912766656 100644 ---- a/gcc/optabs.def -+++ b/gcc/optabs.def -@@ -230,6 +230,9 @@ OPTAB_D (cond_umod_optab, "cond_umod$a") - OPTAB_D (cond_and_optab, "cond_and$a") - OPTAB_D (cond_ior_optab, "cond_ior$a") - OPTAB_D (cond_xor_optab, "cond_xor$a") -+OPTAB_D (cond_ashl_optab, "cond_ashl$a") -+OPTAB_D (cond_ashr_optab, "cond_ashr$a") -+OPTAB_D (cond_lshr_optab, "cond_lshr$a") - OPTAB_D (cond_smin_optab, "cond_smin$a") - OPTAB_D (cond_smax_optab, "cond_smax$a") - OPTAB_D (cond_umin_optab, "cond_umin$a") -@@ -256,7 +259,7 @@ OPTAB_D (umul_highpart_optab, "umul$a3_highpart") - OPTAB_D (cmpmem_optab, "cmpmem$a") - OPTAB_D (cmpstr_optab, "cmpstr$a") - OPTAB_D (cmpstrn_optab, "cmpstrn$a") --OPTAB_D (movmem_optab, "movmem$a") -+OPTAB_D (cpymem_optab, "cpymem$a") - OPTAB_D (setmem_optab, "setmem$a") - OPTAB_D (strlen_optab, "strlen$a") - -@@ -323,6 +326,7 @@ OPTAB_D (reduc_and_scal_optab, "reduc_and_scal_$a") - OPTAB_D (reduc_ior_scal_optab, "reduc_ior_scal_$a") - OPTAB_D (reduc_xor_scal_optab, "reduc_xor_scal_$a") - OPTAB_D (fold_left_plus_optab, "fold_left_plus_$a") -+OPTAB_D (mask_fold_left_plus_optab, "mask_fold_left_plus_$a") - - OPTAB_D (extract_last_optab, "extract_last_$a") - OPTAB_D (fold_extract_last_optab, "fold_extract_last_$a") -@@ -337,6 +341,11 @@ OPTAB_D (udot_prod_optab, "udot_prod$I$a") - OPTAB_D (usum_widen_optab, "widen_usum$I$a3") - OPTAB_D (usad_optab, "usad$I$a") - OPTAB_D (ssad_optab, "ssad$I$a") -+OPTAB_D (smulhs_optab, "smulhs$a3") -+OPTAB_D (smulhrs_optab, "smulhrs$a3") -+OPTAB_D (umulhs_optab, "umulhs$a3") -+OPTAB_D (umulhrs_optab, "umulhrs$a3") -+OPTAB_D (sdiv_pow2_optab, "sdiv_pow2$a3") - OPTAB_D (vec_pack_sfix_trunc_optab, "vec_pack_sfix_trunc_$a") - OPTAB_D (vec_pack_ssat_optab, "vec_pack_ssat_$a") - OPTAB_D (vec_pack_trunc_optab, "vec_pack_trunc_$a") -diff --git a/gcc/optabs.h b/gcc/optabs.h -index 17b5dfb67..18dec50f5 100644 ---- a/gcc/optabs.h -+++ b/gcc/optabs.h -@@ -128,7 +128,11 @@ create_convert_operand_to (struct expand_operand *op, rtx value, - /* Make OP describe an input operand that should have the same value - as VALUE, after any mode conversion that the backend might request. - If VALUE is a CONST_INT, it should be treated as having mode MODE. -- UNSIGNED_P says whether VALUE is unsigned. */ -+ UNSIGNED_P says whether VALUE is unsigned. -+ -+ The conversion of VALUE can include a combination of numerical -+ conversion (as for convert_modes) and duplicating a scalar to fill -+ a vector (if VALUE is a scalar but the operand is a vector). */ - - static inline void - create_convert_operand_from (struct expand_operand *op, rtx value, -diff --git a/gcc/opts-global.c b/gcc/opts-global.c -index 4f8aac7e9..6e4f2d528 100644 ---- a/gcc/opts-global.c -+++ b/gcc/opts-global.c -@@ -255,6 +255,7 @@ init_options_once (void) - construct their pretty-printers means that all previous settings - are overriden. */ - diagnostic_color_init (global_dc); -+ diagnostic_urls_init (global_dc); - } - - /* Decode command-line options to an array, like -diff --git a/gcc/opts.c b/gcc/opts.c -index 494be7a9f..a8db491b5 100644 ---- a/gcc/opts.c -+++ b/gcc/opts.c -@@ -465,7 +465,6 @@ static const struct default_options default_options_table[] = - { OPT_LEVELS_1_PLUS, OPT_ftree_copy_prop, NULL, 1 }, - { OPT_LEVELS_1_PLUS, OPT_ftree_dce, NULL, 1 }, - { OPT_LEVELS_1_PLUS, OPT_ftree_dominator_opts, NULL, 1 }, -- { OPT_LEVELS_1_PLUS, OPT_ftree_dse, NULL, 1 }, - { OPT_LEVELS_1_PLUS, OPT_ftree_fre, NULL, 1 }, - { OPT_LEVELS_1_PLUS, OPT_ftree_sink, NULL, 1 }, - { OPT_LEVELS_1_PLUS, OPT_ftree_slsr, NULL, 1 }, -@@ -476,14 +475,16 @@ static const struct default_options default_options_table[] = - #if DELAY_SLOTS - { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_fdelayed_branch, NULL, 1 }, - #endif -+ { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_fdse, NULL, 1 }, - { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_fif_conversion, NULL, 1 }, - { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_fif_conversion2, NULL, 1 }, - { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_finline_functions_called_once, NULL, 1 }, - { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_fmove_loop_invariants, NULL, 1 }, - { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_fssa_phiopt, NULL, 1 }, - { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_ftree_bit_ccp, NULL, 1 }, -- { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_ftree_sra, NULL, 1 }, -+ { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_ftree_dse, NULL, 1 }, - { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_ftree_pta, NULL, 1 }, -+ { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_ftree_sra, NULL, 1 }, - - /* -O2 and -Os optimizations. */ - { OPT_LEVELS_2_PLUS, OPT_fcaller_saves, NULL, 1 }, -@@ -521,6 +522,7 @@ static const struct default_options default_options_table[] = - { OPT_LEVELS_2_PLUS, OPT_ftree_tail_merge, NULL, 1 }, - { OPT_LEVELS_2_PLUS, OPT_ftree_vrp, NULL, 1 }, - { OPT_LEVELS_2_PLUS, OPT_fvect_cost_model_, NULL, VECT_COST_MODEL_CHEAP }, -+ { OPT_LEVELS_2_PLUS, OPT_finline_functions, NULL, 1 }, - - /* -O2 and -Os optimizations. */ - { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_falign_functions, NULL, 1 }, -@@ -536,9 +538,6 @@ static const struct default_options default_options_table[] = - #endif - - /* -O3 and -Os optimizations. */ -- /* Inlining of functions reducing size is a good idea with -Os -- regardless of them being declared inline. */ -- { OPT_LEVELS_3_PLUS_AND_SIZE, OPT_finline_functions, NULL, 1 }, - - /* -O3 optimizations. */ - { OPT_LEVELS_3_PLUS, OPT_fgcse_after_reload, NULL, 1 }, -@@ -2400,6 +2399,10 @@ common_handle_option (struct gcc_options *opts, - diagnostic_color_init (dc, value); - break; - -+ case OPT_fdiagnostics_urls_: -+ diagnostic_urls_init (dc, value); -+ break; -+ - case OPT_fdiagnostics_format_: - diagnostic_output_format_init (dc, - (enum diagnostics_output_format)value); -diff --git a/gcc/params.def b/gcc/params.def -index 08c709636..0ef092214 100644 ---- a/gcc/params.def -+++ b/gcc/params.def -@@ -61,8 +61,13 @@ DEFPARAM (PARAM_PREDICTABLE_BRANCH_OUTCOME, - - DEFPARAM (PARAM_INLINE_MIN_SPEEDUP, - "inline-min-speedup", -+ "The minimal estimated speedup allowing inliner to ignore inline-insns-single and inline-insns-auto with -O3 and -Ofast.", -+ 15, 0, 100) -+ -+DEFPARAM (PARAM_INLINE_MIN_SPEEDUP_O2, -+ "inline-min-speedup-O2", - "The minimal estimated speedup allowing inliner to ignore inline-insns-single and inline-insns-auto.", -- 15, 0, 0) -+ 30, 0, 100) - - /* The single function inlining limit. This is the maximum size - of a function counted in internal gcc instructions (not in -@@ -77,9 +82,14 @@ DEFPARAM (PARAM_INLINE_MIN_SPEEDUP, - gets decreased. */ - DEFPARAM (PARAM_MAX_INLINE_INSNS_SINGLE, - "max-inline-insns-single", -- "The maximum number of instructions in a single function eligible for inlining.", -+ "The maximum number of instructions in a single function eligible for inlining with -O3 and -Ofast.", - 200, 0, 0) - -+DEFPARAM (PARAM_MAX_INLINE_INSNS_SINGLE_O2, -+ "max-inline-insns-single-O2", -+ "The maximum number of instructions in a single function eligible for inlining.", -+ 30, 0, 0) -+ - /* The single function inlining limit for functions that are - inlined by virtue of -finline-functions (-O3). - This limit should be chosen to be below or equal to the limit -@@ -89,9 +99,14 @@ DEFPARAM (PARAM_MAX_INLINE_INSNS_SINGLE, - The default value is 30. */ - DEFPARAM (PARAM_MAX_INLINE_INSNS_AUTO, - "max-inline-insns-auto", -- "The maximum number of instructions when automatically inlining.", -+ "The maximum number of instructions when automatically inlining with -O3 and -Ofast.", - 30, 0, 0) - -+DEFPARAM (PARAM_MAX_INLINE_INSNS_AUTO_O2, -+ "max-inline-insns-auto-O2", -+ "The maximum number of instructions when automatically inlining.", -+ 15, 0, 0) -+ - DEFPARAM (PARAM_MAX_INLINE_INSNS_SMALL, - "max-inline-insns-small", - "The maximum number of instructions when automatically inlining small functions.", -@@ -243,8 +258,12 @@ DEFPARAM(PARAM_IPCP_UNIT_GROWTH, - 10, 0, 0) - DEFPARAM(PARAM_EARLY_INLINING_INSNS, - "early-inlining-insns", -- "Maximal estimated growth of function body caused by early inlining of single call.", -+ "Maximal estimated growth of function body caused by early inlining of single call with -O3 and -Ofast.", - 14, 0, 0) -+DEFPARAM(PARAM_EARLY_INLINING_INSNS_O2, -+ "early-inlining-insns-O2", -+ "Maximal estimated growth of function body caused by early inlining of single call with -O1 and -O2.", -+ 6, 0, 0) - DEFPARAM(PARAM_LARGE_STACK_FRAME, - "large-stack-frame", - "The size of stack frame to be considered large.", -diff --git a/gcc/passes.def b/gcc/passes.def -index 901dbef93..a03685500 100644 ---- a/gcc/passes.def -+++ b/gcc/passes.def -@@ -459,7 +459,6 @@ along with GCC; see the file COPYING3. If not see - NEXT_PASS (pass_split_after_reload); - NEXT_PASS (pass_ree); - NEXT_PASS (pass_compare_elim_after_reload); -- NEXT_PASS (pass_branch_target_load_optimize1); - NEXT_PASS (pass_thread_prologue_and_epilogue); - NEXT_PASS (pass_rtl_dse2); - NEXT_PASS (pass_stack_adjustments); -@@ -472,7 +471,6 @@ along with GCC; see the file COPYING3. If not see - NEXT_PASS (pass_cprop_hardreg); - NEXT_PASS (pass_fast_rtl_dce); - NEXT_PASS (pass_reorder_blocks); -- NEXT_PASS (pass_branch_target_load_optimize2); - NEXT_PASS (pass_leaf_regs); - NEXT_PASS (pass_split_before_sched2); - NEXT_PASS (pass_sched2); -diff --git a/gcc/postreload-gcse.c b/gcc/postreload-gcse.c -index a165351ca..bc2e8fc91 100644 ---- a/gcc/postreload-gcse.c -+++ b/gcc/postreload-gcse.c -@@ -672,7 +672,7 @@ load_killed_in_block_p (int uid_limit, rtx x, bool after_insn) - It will set mems_conflict_p to nonzero if there may be a - conflict between X and SETTER. */ - mems_conflict_p = 0; -- note_stores (PATTERN (setter), find_mem_conflicts, x); -+ note_stores (setter, find_mem_conflicts, x); - if (mems_conflict_p) - return 1; - -@@ -774,7 +774,7 @@ record_opr_changes (rtx_insn *insn) - rtx note; - - /* Find all stores and record them. */ -- note_stores (PATTERN (insn), record_last_set_info, insn); -+ note_stores (insn, record_last_set_info, insn); - - /* Also record autoincremented REGs for this insn as changed. */ - for (note = REG_NOTES (insn); note; note = XEXP (note, 1)) -@@ -785,25 +785,10 @@ record_opr_changes (rtx_insn *insn) - if (CALL_P (insn)) - { - unsigned int regno; -- rtx link, x; - hard_reg_set_iterator hrsi; - EXECUTE_IF_SET_IN_HARD_REG_SET (regs_invalidated_by_call, 0, regno, hrsi) - record_last_reg_set_info_regno (insn, regno); - -- for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1)) -- { -- gcc_assert (GET_CODE (XEXP (link, 0)) != CLOBBER_HIGH); -- if (GET_CODE (XEXP (link, 0)) == CLOBBER) -- { -- x = XEXP (XEXP (link, 0), 0); -- if (REG_P (x)) -- { -- gcc_assert (HARD_REGISTER_P (x)); -- record_last_reg_set_info (insn, x); -- } -- } -- } -- - if (! RTL_CONST_OR_PURE_CALL_P (insn)) - record_last_mem_set_info (insn); - } -diff --git a/gcc/postreload.c b/gcc/postreload.c -index b76c7b0b7..ee0dc6ae8 100644 ---- a/gcc/postreload.c -+++ b/gcc/postreload.c -@@ -40,6 +40,7 @@ along with GCC; see the file COPYING3. If not see - #include "cselib.h" - #include "tree-pass.h" - #include "dbgcnt.h" -+#include "function-abi.h" - - static int reload_cse_noop_set_p (rtx); - static bool reload_cse_simplify (rtx_insn *, rtx); -@@ -133,8 +134,6 @@ reload_cse_simplify (rtx_insn *insn, rtx testreg) - for (i = XVECLEN (body, 0) - 1; i >= 0; --i) - { - rtx part = XVECEXP (body, 0, i); -- /* asms can only have full clobbers, not clobber_highs. */ -- gcc_assert (GET_CODE (part) != CLOBBER_HIGH); - if (GET_CODE (part) == CLOBBER && REG_P (XEXP (part, 0))) - cselib_invalidate_rtx (XEXP (part, 0)); - } -@@ -157,9 +156,7 @@ reload_cse_simplify (rtx_insn *insn, rtx testreg) - value = SET_DEST (part); - } - } -- else if (GET_CODE (part) != CLOBBER -- && GET_CODE (part) != CLOBBER_HIGH -- && GET_CODE (part) != USE) -+ else if (GET_CODE (part) != CLOBBER && GET_CODE (part) != USE) - break; - } - -@@ -1139,7 +1136,7 @@ reload_combine_recognize_pattern (rtx_insn *insn) - if (TEST_HARD_REG_BIT (reg_class_contents[INDEX_REG_CLASS], i) - && reg_state[i].use_index == RELOAD_COMBINE_MAX_USES - && reg_state[i].store_ruid <= reg_state[regno].use_ruid -- && (call_used_regs[i] || df_regs_ever_live_p (i)) -+ && (call_used_or_fixed_reg_p (i) || df_regs_ever_live_p (i)) - && (!frame_pointer_needed || i != HARD_FRAME_POINTER_REGNUM) - && !fixed_regs[i] && !global_regs[i] - && hard_regno_nregs (i, GET_MODE (reg)) == 1 -@@ -1271,8 +1268,8 @@ reload_combine (void) - - REG_SET_TO_HARD_REG_SET (live, live_in); - compute_use_by_pseudos (&live, live_in); -- COPY_HARD_REG_SET (LABEL_LIVE (insn), live); -- IOR_HARD_REG_SET (ever_live_at_start, live); -+ LABEL_LIVE (insn) = live; -+ ever_live_at_start |= live; - } - } - -@@ -1329,14 +1326,15 @@ reload_combine (void) - || reload_combine_recognize_pattern (insn)) - continue; - -- note_stores (PATTERN (insn), reload_combine_note_store, NULL); -+ note_stores (insn, reload_combine_note_store, NULL); - - if (CALL_P (insn)) - { - rtx link; -- HARD_REG_SET used_regs; -- -- get_call_reg_set_usage (insn, &used_regs, call_used_reg_set); -+ HARD_REG_SET used_regs = insn_callee_abi (insn).full_reg_clobbers (); -+ /* ??? This preserves traditional behavior; it might not be -+ needed. */ -+ used_regs |= fixed_reg_set; - - for (r = 0; r < FIRST_PSEUDO_REGISTER; r++) - if (TEST_HARD_REG_BIT (used_regs, r)) -@@ -1350,22 +1348,12 @@ reload_combine (void) - { - rtx setuse = XEXP (link, 0); - rtx usage_rtx = XEXP (setuse, 0); -- /* We could support CLOBBER_HIGH and treat it in the same way as -- HARD_REGNO_CALL_PART_CLOBBERED, but no port needs that yet. */ -- gcc_assert (GET_CODE (setuse) != CLOBBER_HIGH); - -- if ((GET_CODE (setuse) == USE || GET_CODE (setuse) == CLOBBER) -- && REG_P (usage_rtx)) -+ if (GET_CODE (setuse) == USE && REG_P (usage_rtx)) - { - unsigned int end_regno = END_REGNO (usage_rtx); - for (unsigned int i = REGNO (usage_rtx); i < end_regno; ++i) -- if (GET_CODE (XEXP (link, 0)) == CLOBBER) -- { -- reg_state[i].use_index = RELOAD_COMBINE_MAX_USES; -- reg_state[i].store_ruid = reload_combine_ruid; -- } -- else -- reg_state[i].use_index = -1; -+ reg_state[i].use_index = -1; - } - } - } -@@ -1529,10 +1517,6 @@ reload_combine_note_use (rtx *xp, rtx_insn *insn, int ruid, rtx containing_mem) - } - break; - -- case CLOBBER_HIGH: -- gcc_assert (REG_P (SET_DEST (x))); -- return; -- - case PLUS: - /* We are interested in (plus (reg) (const_int)) . */ - if (!REG_P (XEXP (x, 0)) -@@ -2108,7 +2092,7 @@ reload_cse_move2add (rtx_insn *first) - } - } - } -- note_stores (PATTERN (insn), move2add_note_store, insn); -+ note_stores (insn, move2add_note_store, insn); - - /* If INSN is a conditional branch, we try to extract an - implicit set out of it. */ -@@ -2138,32 +2122,12 @@ reload_cse_move2add (rtx_insn *first) - unknown values. */ - if (CALL_P (insn)) - { -- rtx link; -- - for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--) - { -- if (call_used_regs[i]) -+ if (call_used_or_fixed_reg_p (i)) - /* Reset the information about this register. */ - reg_mode[i] = VOIDmode; - } -- -- for (link = CALL_INSN_FUNCTION_USAGE (insn); link; -- link = XEXP (link, 1)) -- { -- rtx setuse = XEXP (link, 0); -- rtx usage_rtx = XEXP (setuse, 0); -- /* CALL_INSN_FUNCTION_USAGEs can only have full clobbers, not -- clobber_highs. */ -- gcc_assert (GET_CODE (setuse) != CLOBBER_HIGH); -- if (GET_CODE (setuse) == CLOBBER -- && REG_P (usage_rtx)) -- { -- unsigned int end_regno = END_REGNO (usage_rtx); -- for (unsigned int r = REGNO (usage_rtx); r < end_regno; ++r) -- /* Reset the information about this register. */ -- reg_mode[r] = VOIDmode; -- } -- } - } - } - return changed; -@@ -2317,13 +2281,6 @@ move2add_note_store (rtx dst, const_rtx set, void *data) - - move2add_record_mode (dst); - } -- else if (GET_CODE (set) == CLOBBER_HIGH) -- { -- /* Only invalidate if actually clobbered. */ -- if (reg_mode[regno] == BLKmode -- || reg_is_clobbered_by_clobber_high (regno, reg_mode[regno], dst)) -- goto invalidate; -- } - else - { - invalidate: -diff --git a/gcc/predict.c b/gcc/predict.c -index eaab47f99..03dd4ddfa 100644 ---- a/gcc/predict.c -+++ b/gcc/predict.c -@@ -2450,7 +2450,7 @@ expr_expected_value_1 (tree type, tree op0, enum tree_code code, - return NULL; - } - -- if (DECL_IS_MALLOC (decl) || DECL_IS_OPERATOR_NEW (decl)) -+ if (DECL_IS_MALLOC (decl) || DECL_IS_OPERATOR_NEW_P (decl)) - { - if (predictor) - *predictor = PRED_MALLOC_NONNULL; -diff --git a/gcc/pretty-print.c b/gcc/pretty-print.c -index 6948971ce..5af7ca764 100644 ---- a/gcc/pretty-print.c -+++ b/gcc/pretty-print.c -@@ -1579,7 +1579,8 @@ pretty_printer::pretty_printer (int maximum_length) - emitted_prefix (), - need_newline (), - translate_identifiers (true), -- show_color () -+ show_color (), -+ show_urls (false) - { - pp_line_cutoff (this) = maximum_length; - /* By default, we emit prefixes once per message. */ -@@ -2028,6 +2029,41 @@ identifier_to_locale (const char *ident) - } - } - -+/* Support for encoding URLs. -+ See egmontkob/Hyperlinks_in_Terminal_Emulators.md -+ ( https://gist.github.com/egmontkob/eb114294efbcd5adb1944c9f3cb5feda ). -+ -+ > A hyperlink is opened upon encountering an OSC 8 escape sequence with -+ > the target URI. The syntax is -+ > -+ > OSC 8 ; params ; URI ST -+ > -+ > A hyperlink is closed with the same escape sequence, omitting the -+ > parameters and the URI but keeping the separators: -+ > -+ > OSC 8 ; ; ST -+ > -+ > OSC (operating system command) is typically ESC ]. */ -+ -+/* If URL-printing is enabled, write an "open URL" escape sequence to PP -+ for the given URL. */ -+ -+void -+pp_begin_url (pretty_printer *pp, const char *url) -+{ -+ if (pp->show_urls) -+ pp_printf (pp, "\33]8;;%s\33\\", url); -+} -+ -+/* If URL-printing is enabled, write a "close URL" escape sequence to PP. */ -+ -+void -+pp_end_url (pretty_printer *pp) -+{ -+ if (pp->show_urls) -+ pp_string (pp, "\33]8;;\33\\"); -+} -+ - #if CHECKING_P - - namespace selftest { -@@ -2312,6 +2348,32 @@ test_prefixes_and_wrapping () - - } - -+/* Verify that URL-printing works as expected. */ -+ -+void -+test_urls () -+{ -+ { -+ pretty_printer pp; -+ pp.show_urls = false; -+ pp_begin_url (&pp, "http://example.com"); -+ pp_string (&pp, "This is a link"); -+ pp_end_url (&pp); -+ ASSERT_STREQ ("This is a link", -+ pp_formatted_text (&pp)); -+ } -+ -+ { -+ pretty_printer pp; -+ pp.show_urls = true; -+ pp_begin_url (&pp, "http://example.com"); -+ pp_string (&pp, "This is a link"); -+ pp_end_url (&pp); -+ ASSERT_STREQ ("\33]8;;http://example.com\33\\This is a link\33]8;;\33\\", -+ pp_formatted_text (&pp)); -+ } -+} -+ - /* Run all of the selftests within this file. */ - - void -@@ -2320,6 +2382,7 @@ pretty_print_c_tests () - test_basic_printing (); - test_pp_format (); - test_prefixes_and_wrapping (); -+ test_urls (); - } - - } // namespace selftest -diff --git a/gcc/pretty-print.h b/gcc/pretty-print.h -index e4df65907..07cd39176 100644 ---- a/gcc/pretty-print.h -+++ b/gcc/pretty-print.h -@@ -271,6 +271,9 @@ struct pretty_printer - - /* Nonzero means that text should be colorized. */ - bool show_color; -+ -+ /* Nonzero means that URLs should be emitted. */ -+ bool show_urls; - }; - - static inline const char * -@@ -391,6 +394,9 @@ extern void pp_maybe_space (pretty_printer *); - extern void pp_begin_quote (pretty_printer *, bool); - extern void pp_end_quote (pretty_printer *, bool); - -+extern void pp_begin_url (pretty_printer *pp, const char *url); -+extern void pp_end_url (pretty_printer *pp); -+ - /* Switch into verbatim mode and return the old mode. */ - static inline pp_wrapping_mode_t - pp_set_verbatim_wrapping_ (pretty_printer *pp) -diff --git a/gcc/print-rtl.c b/gcc/print-rtl.c -index fbb108568..01f281604 100644 ---- a/gcc/print-rtl.c -+++ b/gcc/print-rtl.c -@@ -1756,7 +1756,6 @@ print_pattern (pretty_printer *pp, const_rtx x, int verbose) - print_exp (pp, x, verbose); - break; - case CLOBBER: -- case CLOBBER_HIGH: - case USE: - pp_printf (pp, "%s ", GET_RTX_NAME (GET_CODE (x))); - print_value (pp, XEXP (x, 0), verbose); -diff --git a/gcc/print-tree.c b/gcc/print-tree.c -index 81b66a189..7c0d05548 100644 ---- a/gcc/print-tree.c -+++ b/gcc/print-tree.c -@@ -517,7 +517,11 @@ print_node (FILE *file, const char *prefix, tree node, int indent, - if (code == FUNCTION_DECL && fndecl_built_in_p (node)) - { - if (DECL_BUILT_IN_CLASS (node) == BUILT_IN_MD) -- fprintf (file, " built-in: BUILT_IN_MD:%d", DECL_FUNCTION_CODE (node)); -+ fprintf (file, " built-in: BUILT_IN_MD:%d", -+ DECL_MD_FUNCTION_CODE (node)); -+ else if (DECL_BUILT_IN_CLASS (node) == BUILT_IN_FRONTEND) -+ fprintf (file, " built-in: BUILT_IN_FRONTEND:%d", -+ DECL_FE_FUNCTION_CODE (node)); - else - fprintf (file, " built-in: %s:%s", - built_in_class_names[(int) DECL_BUILT_IN_CLASS (node)], -diff --git a/gcc/read-md.h b/gcc/read-md.h -index 18426f71d..327f378ea 100644 ---- a/gcc/read-md.h -+++ b/gcc/read-md.h -@@ -337,6 +337,7 @@ class rtx_reader : public md_reader - ~rtx_reader (); - - bool read_rtx (const char *rtx_name, vec *rtxen); -+ rtx rtx_alloc_for_name (const char *); - rtx read_rtx_code (const char *code_name); - virtual rtx read_rtx_operand (rtx return_rtx, int idx); - rtx read_nested_rtx (); -diff --git a/gcc/read-rtl-function.c b/gcc/read-rtl-function.c -index 53f7a94c1..ded407737 100644 ---- a/gcc/read-rtl-function.c -+++ b/gcc/read-rtl-function.c -@@ -41,6 +41,8 @@ along with GCC; see the file COPYING3. If not see - #include "read-rtl-function.h" - #include "selftest.h" - #include "selftest-rtl.h" -+#include "regs.h" -+#include "function-abi.h" - - /* Forward decls. */ - class function_reader; -@@ -1610,6 +1612,7 @@ bool - read_rtl_function_body (const char *path) - { - initialize_rtl (); -+ crtl->abi = &default_function_abi; - init_emit (); - init_varasm_status (); - -@@ -1643,6 +1646,7 @@ read_rtl_function_body_from_file_range (location_t start_loc, - } - - initialize_rtl (); -+ crtl->abi = &fndecl_abi (cfun->decl).base_abi (); - init_emit (); - init_varasm_status (); - -diff --git a/gcc/read-rtl.c b/gcc/read-rtl.c -index 1af51f686..6b1b811cb 100644 ---- a/gcc/read-rtl.c -+++ b/gcc/read-rtl.c -@@ -194,22 +194,31 @@ static const compact_insn_name compact_insn_names[] = { - { NOTE, "cnote" } - }; - --/* Implementations of the iterator_group callbacks for codes. */ -+/* Return the rtx code for NAME, or UNKNOWN if NAME isn't a valid rtx code. */ - --static int --find_code (const char *name) -+static rtx_code -+maybe_find_code (const char *name) - { -- int i; -- -- for (i = 0; i < NUM_RTX_CODE; i++) -+ for (int i = 0; i < NUM_RTX_CODE; i++) - if (strcmp (GET_RTX_NAME (i), name) == 0) -- return i; -+ return (rtx_code) i; - -- for (i = 0; i < (signed)ARRAY_SIZE (compact_insn_names); i++) -+ for (int i = 0; i < (signed)ARRAY_SIZE (compact_insn_names); i++) - if (strcmp (compact_insn_names[i].name, name) == 0) - return compact_insn_names[i].code; - -- fatal_with_file_and_line ("unknown rtx code `%s'", name); -+ return UNKNOWN; -+} -+ -+/* Implementations of the iterator_group callbacks for codes. */ -+ -+static int -+find_code (const char *name) -+{ -+ rtx_code code = maybe_find_code (name); -+ if (code == UNKNOWN) -+ fatal_with_file_and_line ("unknown rtx code `%s'", name); -+ return code; - } - - static void -@@ -277,9 +286,11 @@ apply_subst_iterator (rtx rt, unsigned int, int value) - return; - gcc_assert (GET_CODE (rt) == DEFINE_INSN - || GET_CODE (rt) == DEFINE_INSN_AND_SPLIT -+ || GET_CODE (rt) == DEFINE_INSN_AND_REWRITE - || GET_CODE (rt) == DEFINE_EXPAND); - -- int attrs = GET_CODE (rt) == DEFINE_INSN_AND_SPLIT ? 7 : 4; -+ int attrs = (GET_CODE (rt) == DEFINE_INSN_AND_SPLIT ? 7 -+ : GET_CODE (rt) == DEFINE_INSN_AND_REWRITE ? 6 : 4); - attrs_vec = XVEC (rt, attrs); - - /* If we've already added attribute 'current_iterator_name', then we -@@ -540,6 +551,7 @@ add_condition_to_rtx (rtx x, const char *extra) - break; - - case DEFINE_INSN_AND_SPLIT: -+ case DEFINE_INSN_AND_REWRITE: - XSTR (x, 2) = add_condition_to_string (XSTR (x, 2), extra); - XSTR (x, 4) = add_condition_to_string (XSTR (x, 4), extra); - break; -@@ -623,6 +635,7 @@ named_rtx_p (rtx x) - case DEFINE_EXPAND: - case DEFINE_INSN: - case DEFINE_INSN_AND_SPLIT: -+ case DEFINE_INSN_AND_REWRITE: - return true; - - default: -@@ -1306,7 +1319,37 @@ check_code_iterator (struct mapping *iterator) - for (v = iterator->values->next; v != 0; v = v->next) - if (strcmp (GET_RTX_FORMAT (bellwether), GET_RTX_FORMAT (v->number)) != 0) - fatal_with_file_and_line ("code iterator `%s' combines " -- "different rtx formats", iterator->name); -+ "`%s' and `%s', which have different " -+ "rtx formats", iterator->name, -+ GET_RTX_NAME (bellwether), -+ GET_RTX_NAME (v->number)); -+} -+ -+/* Check that all values of attribute ATTR are rtx codes that have a -+ consistent format. Return a representative code. */ -+ -+static rtx_code -+check_code_attribute (mapping *attr) -+{ -+ rtx_code bellwether = UNKNOWN; -+ for (map_value *v = attr->values; v != 0; v = v->next) -+ { -+ rtx_code code = maybe_find_code (v->string); -+ if (code == UNKNOWN) -+ fatal_with_file_and_line ("code attribute `%s' contains " -+ "unrecognized rtx code `%s'", -+ attr->name, v->string); -+ if (bellwether == UNKNOWN) -+ bellwether = code; -+ else if (strcmp (GET_RTX_FORMAT (bellwether), -+ GET_RTX_FORMAT (code)) != 0) -+ fatal_with_file_and_line ("code attribute `%s' combines " -+ "`%s' and `%s', which have different " -+ "rtx formats", attr->name, -+ GET_RTX_NAME (bellwether), -+ GET_RTX_NAME (code)); -+ } -+ return bellwether; - } - - /* Read an rtx-related declaration from the MD file, given that it -@@ -1467,6 +1510,54 @@ parse_reg_note_name (const char *string) - fatal_with_file_and_line ("unrecognized REG_NOTE name: `%s'", string); - } - -+/* Allocate an rtx for code NAME. If NAME is a code iterator or code -+ attribute, record its use for later and use one of its possible -+ values as an interim rtx code. */ -+ -+rtx -+rtx_reader::rtx_alloc_for_name (const char *name) -+{ -+#ifdef GENERATOR_FILE -+ size_t len = strlen (name); -+ if (name[0] == '<' && name[len - 1] == '>') -+ { -+ /* Copy the attribute string into permanent storage, without the -+ angle brackets around it. */ -+ obstack *strings = get_string_obstack (); -+ obstack_grow0 (strings, name + 1, len - 2); -+ char *deferred_name = XOBFINISH (strings, char *); -+ -+ /* Find the name of the attribute. */ -+ const char *attr = strchr (deferred_name, ':'); -+ if (!attr) -+ attr = deferred_name; -+ -+ /* Find the attribute itself. */ -+ mapping *m = (mapping *) htab_find (codes.attrs, &attr); -+ if (!m) -+ fatal_with_file_and_line ("unknown code attribute `%s'", attr); -+ -+ /* Pick the first possible code for now, and record the attribute -+ use for later. */ -+ rtx x = rtx_alloc (check_code_attribute (m)); -+ record_attribute_use (&codes, x, 0, deferred_name); -+ return x; -+ } -+ -+ mapping *iterator = (mapping *) htab_find (codes.iterators, &name); -+ if (iterator != 0) -+ { -+ /* Pick the first possible code for now, and record the iterator -+ use for later. */ -+ rtx x = rtx_alloc (rtx_code (iterator->values->number)); -+ record_iterator_use (iterator, x, 0); -+ return x; -+ } -+#endif -+ -+ return rtx_alloc (rtx_code (codes.find_builtin (name))); -+} -+ - /* Subroutine of read_rtx and read_nested_rtx. CODE_NAME is the name of - either an rtx code or a code iterator. Parse the rest of the rtx and - return it. */ -@@ -1475,7 +1566,6 @@ rtx - rtx_reader::read_rtx_code (const char *code_name) - { - RTX_CODE code; -- struct mapping *iterator = NULL; - const char *format_ptr; - struct md_name name; - rtx return_rtx; -@@ -1509,20 +1599,9 @@ rtx_reader::read_rtx_code (const char *code_name) - return return_rtx; - } - -- /* If this code is an iterator, build the rtx using the iterator's -- first value. */ --#ifdef GENERATOR_FILE -- iterator = (struct mapping *) htab_find (codes.iterators, &code_name); -- if (iterator != 0) -- code = (enum rtx_code) iterator->values->number; -- else -- code = (enum rtx_code) codes.find_builtin (code_name); --#else -- code = (enum rtx_code) codes.find_builtin (code_name); --#endif -- - /* If we end up with an insn expression then we free this space below. */ -- return_rtx = rtx_alloc (code); -+ return_rtx = rtx_alloc_for_name (code_name); -+ code = GET_CODE (return_rtx); - format_ptr = GET_RTX_FORMAT (code); - memset (return_rtx, 0, RTX_CODE_SIZE (code)); - PUT_CODE (return_rtx, code); -@@ -1534,9 +1613,6 @@ rtx_reader::read_rtx_code (const char *code_name) - m_reuse_rtx_by_id[reuse_id] = return_rtx; - } - -- if (iterator) -- record_iterator_use (iterator, return_rtx, 0); -- - /* Check for flags. */ - read_flags (return_rtx); - -@@ -1765,8 +1841,8 @@ rtx_reader::read_rtx_operand (rtx return_rtx, int idx) - break; - } - -- /* The output template slot of a DEFINE_INSN, -- DEFINE_INSN_AND_SPLIT, or DEFINE_PEEPHOLE automatically -+ /* The output template slot of a DEFINE_INSN, DEFINE_INSN_AND_SPLIT, -+ DEFINE_INSN_AND_REWRITE or DEFINE_PEEPHOLE automatically - gets a star inserted as its first character, if it is - written with a brace block instead of a string constant. */ - star_if_braced = (format_ptr[idx] == 'T'); -@@ -1783,7 +1859,8 @@ rtx_reader::read_rtx_operand (rtx return_rtx, int idx) - if (*stringbuf == '\0' - && idx == 0 - && (GET_CODE (return_rtx) == DEFINE_INSN -- || GET_CODE (return_rtx) == DEFINE_INSN_AND_SPLIT)) -+ || GET_CODE (return_rtx) == DEFINE_INSN_AND_SPLIT -+ || GET_CODE (return_rtx) == DEFINE_INSN_AND_REWRITE)) - { - struct obstack *string_obstack = get_string_obstack (); - char line_name[20]; -diff --git a/gcc/real.c b/gcc/real.c -index 0164f097a..a2bd37a9e 100644 ---- a/gcc/real.c -+++ b/gcc/real.c -@@ -4799,6 +4799,116 @@ decode_ieee_half (const struct real_format *fmt, REAL_VALUE_TYPE *r, - } - } - -+/* Encode arm_bfloat types. */ -+static void -+encode_arm_bfloat_half (const struct real_format *fmt, long *buf, -+ const REAL_VALUE_TYPE *r) -+{ -+ unsigned long image, sig, exp; -+ unsigned long sign = r->sign; -+ bool denormal = (r->sig[SIGSZ-1] & SIG_MSB) == 0; -+ -+ image = sign << 15; -+ sig = (r->sig[SIGSZ-1] >> (HOST_BITS_PER_LONG - 8)) & 0x7f; -+ -+ switch (r->cl) -+ { -+ case rvc_zero: -+ break; -+ -+ case rvc_inf: -+ if (fmt->has_inf) -+ image |= 255 << 7; -+ else -+ image |= 0x7fff; -+ break; -+ -+ case rvc_nan: -+ if (fmt->has_nans) -+ { -+ if (r->canonical) -+ sig = (fmt->canonical_nan_lsbs_set ? (1 << 6) - 1 : 0); -+ if (r->signalling == fmt->qnan_msb_set) -+ sig &= ~(1 << 6); -+ else -+ sig |= 1 << 6; -+ if (sig == 0) -+ sig = 1 << 5; -+ -+ image |= 255 << 7; -+ image |= sig; -+ } -+ else -+ image |= 0x7fff; -+ break; -+ -+ case rvc_normal: -+ if (denormal) -+ exp = 0; -+ else -+ exp = REAL_EXP (r) + 127 - 1; -+ image |= exp << 7; -+ image |= sig; -+ break; -+ -+ default: -+ gcc_unreachable (); -+ } -+ -+ buf[0] = image; -+} -+ -+/* Decode arm_bfloat types. */ -+static void -+decode_arm_bfloat_half (const struct real_format *fmt, REAL_VALUE_TYPE *r, -+ const long *buf) -+{ -+ unsigned long image = buf[0] & 0xffff; -+ bool sign = (image >> 15) & 1; -+ int exp = (image >> 7) & 0xff; -+ -+ memset (r, 0, sizeof (*r)); -+ image <<= HOST_BITS_PER_LONG - 8; -+ image &= ~SIG_MSB; -+ -+ if (exp == 0) -+ { -+ if (image && fmt->has_denorm) -+ { -+ r->cl = rvc_normal; -+ r->sign = sign; -+ SET_REAL_EXP (r, -126); -+ r->sig[SIGSZ-1] = image << 1; -+ normalize (r); -+ } -+ else if (fmt->has_signed_zero) -+ r->sign = sign; -+ } -+ else if (exp == 255 && (fmt->has_nans || fmt->has_inf)) -+ { -+ if (image) -+ { -+ r->cl = rvc_nan; -+ r->sign = sign; -+ r->signalling = (((image >> (HOST_BITS_PER_LONG - 2)) & 1) -+ ^ fmt->qnan_msb_set); -+ r->sig[SIGSZ-1] = image; -+ } -+ else -+ { -+ r->cl = rvc_inf; -+ r->sign = sign; -+ } -+ } -+ else -+ { -+ r->cl = rvc_normal; -+ r->sign = sign; -+ SET_REAL_EXP (r, exp - 127 + 1); -+ r->sig[SIGSZ-1] = image | SIG_MSB; -+ } -+} -+ - /* Half-precision format, as specified in IEEE 754R. */ - const struct real_format ieee_half_format = - { -@@ -4848,6 +4958,33 @@ const struct real_format arm_half_format = - false, - "arm_half" - }; -+ -+/* ARM Bfloat half-precision format. This format resembles a truncated -+ (16-bit) version of the 32-bit IEEE 754 single-precision floating-point -+ format. */ -+const struct real_format arm_bfloat_half_format = -+ { -+ encode_arm_bfloat_half, -+ decode_arm_bfloat_half, -+ 2, -+ 8, -+ 8, -+ -125, -+ 128, -+ 15, -+ 15, -+ 0, -+ false, -+ true, -+ true, -+ true, -+ true, -+ true, -+ true, -+ false, -+ "arm_bfloat_half" -+ }; -+ - - /* A synthetic "format" for internal arithmetic. It's the size of the - internal significand minus the two bits needed for proper rounding. -diff --git a/gcc/real.h b/gcc/real.h -index 95b9db83d..d1b79f804 100644 ---- a/gcc/real.h -+++ b/gcc/real.h -@@ -361,6 +361,7 @@ extern const struct real_format decimal_double_format; - extern const struct real_format decimal_quad_format; - extern const struct real_format ieee_half_format; - extern const struct real_format arm_half_format; -+extern const struct real_format arm_bfloat_half_format; - - - /* ====================================================================== */ -diff --git a/gcc/recog.c b/gcc/recog.c -index a9f584bc0..b12eba33a 100644 ---- a/gcc/recog.c -+++ b/gcc/recog.c -@@ -3227,7 +3227,8 @@ peep2_find_free_register (int from, int to, const char *class_str, - break; - } - /* And that we don't create an extra save/restore. */ -- if (! call_used_regs[regno + j] && ! df_regs_ever_live_p (regno + j)) -+ if (! call_used_or_fixed_reg_p (regno + j) -+ && ! df_regs_ever_live_p (regno + j)) - { - success = 0; - break; -@@ -3724,8 +3725,7 @@ store_data_bypass_p_1 (rtx_insn *out_insn, rtx in_set) - { - rtx out_exp = XVECEXP (out_pat, 0, i); - -- if (GET_CODE (out_exp) == CLOBBER || GET_CODE (out_exp) == USE -- || GET_CODE (out_exp) == CLOBBER_HIGH) -+ if (GET_CODE (out_exp) == CLOBBER || GET_CODE (out_exp) == USE) - continue; - - gcc_assert (GET_CODE (out_exp) == SET); -@@ -3756,8 +3756,7 @@ store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn) - { - rtx in_exp = XVECEXP (in_pat, 0, i); - -- if (GET_CODE (in_exp) == CLOBBER || GET_CODE (in_exp) == USE -- || GET_CODE (in_exp) == CLOBBER_HIGH) -+ if (GET_CODE (in_exp) == CLOBBER || GET_CODE (in_exp) == USE) - continue; - - gcc_assert (GET_CODE (in_exp) == SET); -@@ -3809,7 +3808,7 @@ if_test_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn) - { - rtx exp = XVECEXP (out_pat, 0, i); - -- if (GET_CODE (exp) == CLOBBER || GET_CODE (exp) == CLOBBER_HIGH) -+ if (GET_CODE (exp) == CLOBBER) - continue; - - gcc_assert (GET_CODE (exp) == SET); -diff --git a/gcc/recog.h b/gcc/recog.h -index 75cbbdc10..71d88e3e3 100644 ---- a/gcc/recog.h -+++ b/gcc/recog.h -@@ -142,7 +142,7 @@ extern void preprocess_constraints (rtx_insn *); - extern rtx_insn *peep2_next_insn (int); - extern int peep2_regno_dead_p (int, int); - extern int peep2_reg_dead_p (int, rtx); --#ifdef CLEAR_HARD_REG_SET -+#ifdef HARD_CONST - extern rtx peep2_find_free_register (int, int, const char *, - machine_mode, HARD_REG_SET *); - #endif -@@ -186,6 +186,23 @@ skip_alternative (const char *p) - /* Nonzero means volatile operands are recognized. */ - extern int volatile_ok; - -+/* RAII class for temporarily setting volatile_ok. */ -+ -+class temporary_volatile_ok -+{ -+public: -+ temporary_volatile_ok (int value) : save_volatile_ok (volatile_ok) -+ { -+ volatile_ok = value; -+ } -+ -+ ~temporary_volatile_ok () { volatile_ok = save_volatile_ok; } -+ -+private: -+ temporary_volatile_ok (const temporary_volatile_ok &); -+ int save_volatile_ok; -+}; -+ - /* Set by constrain_operands to the number of the alternative that - matched. */ - extern int which_alternative; -diff --git a/gcc/reg-stack.c b/gcc/reg-stack.c -index 033c978a1..b464f493f 100644 ---- a/gcc/reg-stack.c -+++ b/gcc/reg-stack.c -@@ -368,7 +368,7 @@ straighten_stack (rtx_insn *insn, stack_ptr regstack) - if (regstack->top <= 0) - return; - -- COPY_HARD_REG_SET (temp_stack.reg_set, regstack->reg_set); -+ temp_stack.reg_set = regstack->reg_set; - - for (top = temp_stack.top = regstack->top; top >= 0; top--) - temp_stack.reg[top] = FIRST_STACK_REG + temp_stack.top - top; -@@ -568,7 +568,7 @@ check_asm_stack_operands (rtx_insn *insn) - - if (i != LAST_STACK_REG + 1) - { -- error_for_asm (insn, "output regs must be grouped at top of stack"); -+ error_for_asm (insn, "output registers must be grouped at top of stack"); - malformed_asm = 1; - } - -@@ -625,7 +625,8 @@ check_asm_stack_operands (rtx_insn *insn) - if (i != LAST_STACK_REG + 1) - { - error_for_asm (insn, -- "explicitly used regs must be grouped at top of stack"); -+ "explicitly used registers must be grouped " -+ "at top of stack"); - malformed_asm = 1; - } - -@@ -2640,7 +2641,7 @@ change_stack (rtx_insn *insn, stack_ptr old, stack_ptr new_stack, - /* By now, the only difference should be the order of the stack, - not their depth or liveliness. */ - -- gcc_assert (hard_reg_set_equal_p (old->reg_set, new_stack->reg_set)); -+ gcc_assert (old->reg_set == new_stack->reg_set); - gcc_assert (old->top == new_stack->top); - - /* If the stack is not empty (new_stack->top != -1), loop here emitting -@@ -3154,8 +3155,7 @@ convert_regs_1 (basic_block block) - asms, we zapped the instruction itself, but that didn't produce the - same pattern of register kills as before. */ - -- gcc_assert (hard_reg_set_equal_p (regstack.reg_set, bi->out_reg_set) -- || any_malformed_asm); -+ gcc_assert (regstack.reg_set == bi->out_reg_set || any_malformed_asm); - bi->stack_out = regstack; - bi->done = true; - -diff --git a/gcc/regcprop.c b/gcc/regcprop.c -index 4842ce922..675111db8 100644 ---- a/gcc/regcprop.c -+++ b/gcc/regcprop.c -@@ -35,6 +35,7 @@ - #include "rtl-iter.h" - #include "cfgrtl.h" - #include "target.h" -+#include "function-abi.h" - - /* The following code does forward propagation of hard register copies. - The object is to eliminate as many dependencies as possible, so that -@@ -237,11 +238,8 @@ static void - kill_clobbered_value (rtx x, const_rtx set, void *data) - { - struct value_data *const vd = (struct value_data *) data; -- gcc_assert (GET_CODE (set) != CLOBBER_HIGH || REG_P (x)); - -- if (GET_CODE (set) == CLOBBER -- || (GET_CODE (set) == CLOBBER_HIGH -- && reg_is_clobbered_by_clobber_high (x, XEXP (set, 0)))) -+ if (GET_CODE (set) == CLOBBER) - kill_value (x, vd); - } - -@@ -262,8 +260,7 @@ kill_set_value (rtx x, const_rtx set, void *data) - if (rtx_equal_p (x, ksvd->ignore_set_reg)) - return; - -- gcc_assert (GET_CODE (set) != CLOBBER_HIGH || REG_P (x)); -- if (GET_CODE (set) != CLOBBER && GET_CODE (set) != CLOBBER_HIGH) -+ if (GET_CODE (set) != CLOBBER) - { - kill_value (x, ksvd->vd); - if (REG_P (x)) -@@ -728,19 +725,7 @@ cprop_find_used_regs (rtx *loc, void *data) - static void - kill_clobbered_values (rtx_insn *insn, struct value_data *vd) - { -- note_stores (PATTERN (insn), kill_clobbered_value, vd); -- -- if (CALL_P (insn)) -- { -- rtx exp; -- -- for (exp = CALL_INSN_FUNCTION_USAGE (insn); exp; exp = XEXP (exp, 1)) -- { -- rtx x = XEXP (exp, 0); -- if (GET_CODE (x) == CLOBBER) -- kill_value (SET_DEST (x), vd); -- } -- } -+ note_stores (insn, kill_clobbered_value, vd); - } - - /* Perform the forward copy propagation on basic block BB. */ -@@ -1047,7 +1032,6 @@ copyprop_hardreg_forward_1 (basic_block bb, struct value_data *vd) - unsigned int set_nregs = 0; - unsigned int regno; - rtx exp; -- HARD_REG_SET regs_invalidated_by_this_call; - - for (exp = CALL_INSN_FUNCTION_USAGE (insn); exp; exp = XEXP (exp, 1)) - { -@@ -1065,13 +1049,11 @@ copyprop_hardreg_forward_1 (basic_block bb, struct value_data *vd) - } - } - -- get_call_reg_set_usage (insn, -- ®s_invalidated_by_this_call, -- regs_invalidated_by_call); -+ function_abi callee_abi = insn_callee_abi (insn); - for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) -- if ((TEST_HARD_REG_BIT (regs_invalidated_by_this_call, regno) -+ if ((callee_abi.clobbers_full_reg_p (regno) - || (targetm.hard_regno_call_part_clobbered -- (insn, regno, vd->e[regno].mode))) -+ (callee_abi.id (), regno, vd->e[regno].mode))) - && (regno < set_regno || regno >= set_regno + set_nregs)) - kill_value_regno (regno, 1, vd); - -@@ -1109,7 +1091,7 @@ copyprop_hardreg_forward_1 (basic_block bb, struct value_data *vd) - if (!noop_p) - { - /* Notice stores. */ -- note_stores (PATTERN (insn), kill_set_value, &ksvd); -+ note_stores (insn, kill_set_value, &ksvd); - - /* Notice copies. */ - if (copy_p) -diff --git a/gcc/reginfo.c b/gcc/reginfo.c -index 315c5ecab..4f07e968e 100644 ---- a/gcc/reginfo.c -+++ b/gcc/reginfo.c -@@ -43,6 +43,7 @@ along with GCC; see the file COPYING3. If not see - #include "reload.h" - #include "output.h" - #include "tree-pass.h" -+#include "function-abi.h" - - /* Maximum register number used in this function, plus one. */ - -@@ -65,21 +66,20 @@ struct target_hard_regs *this_target_hard_regs = &default_target_hard_regs; - struct target_regs *this_target_regs = &default_target_regs; - #endif - -+#define call_used_regs \ -+ (this_target_hard_regs->x_call_used_regs) -+ - /* Data for initializing fixed_regs. */ - static const char initial_fixed_regs[] = FIXED_REGISTERS; - - /* Data for initializing call_used_regs. */ --static const char initial_call_used_regs[] = CALL_USED_REGISTERS; -- - #ifdef CALL_REALLY_USED_REGISTERS --/* Data for initializing call_really_used_regs. */ --static const char initial_call_really_used_regs[] = CALL_REALLY_USED_REGISTERS; -+#ifdef CALL_USED_REGISTERS -+#error CALL_USED_REGISTERS and CALL_REALLY_USED_REGISTERS are both defined - #endif -- --#ifdef CALL_REALLY_USED_REGISTERS --#define CALL_REALLY_USED_REGNO_P(X) call_really_used_regs[X] -+static const char initial_call_used_regs[] = CALL_REALLY_USED_REGISTERS; - #else --#define CALL_REALLY_USED_REGNO_P(X) call_used_regs[X] -+static const char initial_call_used_regs[] = CALL_USED_REGISTERS; - #endif - - /* Indexed by hard register number, contains 1 for registers -@@ -91,17 +91,6 @@ char global_regs[FIRST_PSEUDO_REGISTER]; - /* Declaration for the global register. */ - tree global_regs_decl[FIRST_PSEUDO_REGISTER]; - --/* Same information as REGS_INVALIDATED_BY_CALL but in regset form to be used -- in dataflow more conveniently. */ --regset regs_invalidated_by_call_regset; -- --/* Same information as FIXED_REG_SET but in regset form. */ --regset fixed_reg_set_regset; -- --/* The bitmap_obstack is used to hold some static variables that -- should not be reset after each function is compiled. */ --static bitmap_obstack persistent_obstack; -- - /* Used to initialize reg_alloc_order. */ - #ifdef REG_ALLOC_ORDER - static int initial_reg_alloc_order[FIRST_PSEUDO_REGISTER] = REG_ALLOC_ORDER; -@@ -171,10 +160,6 @@ init_reg_sets (void) - CALL_USED_REGISTERS had the right number of initializers. */ - gcc_assert (sizeof fixed_regs == sizeof initial_fixed_regs); - gcc_assert (sizeof call_used_regs == sizeof initial_call_used_regs); --#ifdef CALL_REALLY_USED_REGISTERS -- gcc_assert (sizeof call_really_used_regs -- == sizeof initial_call_really_used_regs); --#endif - #ifdef REG_ALLOC_ORDER - gcc_assert (sizeof reg_alloc_order == sizeof initial_reg_alloc_order); - #endif -@@ -182,10 +167,6 @@ init_reg_sets (void) - - memcpy (fixed_regs, initial_fixed_regs, sizeof fixed_regs); - memcpy (call_used_regs, initial_call_used_regs, sizeof call_used_regs); --#ifdef CALL_REALLY_USED_REGISTERS -- memcpy (call_really_used_regs, initial_call_really_used_regs, -- sizeof call_really_used_regs); --#endif - #ifdef REG_ALLOC_ORDER - memcpy (reg_alloc_order, initial_reg_alloc_order, sizeof reg_alloc_order); - #endif -@@ -200,9 +181,6 @@ init_reg_sets (void) - subsequent back-end reinitialization. */ - static char saved_fixed_regs[FIRST_PSEUDO_REGISTER]; - static char saved_call_used_regs[FIRST_PSEUDO_REGISTER]; --#ifdef CALL_REALLY_USED_REGISTERS --static char saved_call_really_used_regs[FIRST_PSEUDO_REGISTER]; --#endif - static const char *saved_reg_names[FIRST_PSEUDO_REGISTER]; - static HARD_REG_SET saved_accessible_reg_set; - static HARD_REG_SET saved_operand_reg_set; -@@ -218,19 +196,11 @@ save_register_info (void) - memcpy (saved_fixed_regs, fixed_regs, sizeof fixed_regs); - memcpy (saved_call_used_regs, call_used_regs, sizeof call_used_regs); - -- /* Likewise for call_really_used_regs. */ --#ifdef CALL_REALLY_USED_REGISTERS -- gcc_assert (sizeof call_really_used_regs -- == sizeof saved_call_really_used_regs); -- memcpy (saved_call_really_used_regs, call_really_used_regs, -- sizeof call_really_used_regs); --#endif -- - /* And similarly for reg_names. */ - gcc_assert (sizeof reg_names == sizeof saved_reg_names); - memcpy (saved_reg_names, reg_names, sizeof reg_names); -- COPY_HARD_REG_SET (saved_accessible_reg_set, accessible_reg_set); -- COPY_HARD_REG_SET (saved_operand_reg_set, operand_reg_set); -+ saved_accessible_reg_set = accessible_reg_set; -+ saved_operand_reg_set = operand_reg_set; - } - - /* Restore the register information. */ -@@ -240,14 +210,9 @@ restore_register_info (void) - memcpy (fixed_regs, saved_fixed_regs, sizeof fixed_regs); - memcpy (call_used_regs, saved_call_used_regs, sizeof call_used_regs); - --#ifdef CALL_REALLY_USED_REGISTERS -- memcpy (call_really_used_regs, saved_call_really_used_regs, -- sizeof call_really_used_regs); --#endif -- - memcpy (reg_names, saved_reg_names, sizeof reg_names); -- COPY_HARD_REG_SET (accessible_reg_set, saved_accessible_reg_set); -- COPY_HARD_REG_SET (operand_reg_set, saved_operand_reg_set); -+ accessible_reg_set = saved_accessible_reg_set; -+ operand_reg_set = saved_operand_reg_set; - } - - /* After switches have been processed, which perhaps alter -@@ -297,8 +262,7 @@ init_reg_sets_1 (void) - HARD_REG_SET c; - int k; - -- COPY_HARD_REG_SET (c, reg_class_contents[i]); -- IOR_HARD_REG_SET (c, reg_class_contents[j]); -+ c = reg_class_contents[i] | reg_class_contents[j]; - for (k = 0; k < N_REG_CLASSES; k++) - if (hard_reg_set_subset_p (reg_class_contents[k], c) - && !hard_reg_set_subset_p (reg_class_contents[k], -@@ -320,8 +284,7 @@ init_reg_sets_1 (void) - HARD_REG_SET c; - int k; - -- COPY_HARD_REG_SET (c, reg_class_contents[i]); -- IOR_HARD_REG_SET (c, reg_class_contents[j]); -+ c = reg_class_contents[i] | reg_class_contents[j]; - for (k = 0; k < N_REG_CLASSES; k++) - if (hard_reg_set_subset_p (c, reg_class_contents[k])) - break; -@@ -362,22 +325,9 @@ init_reg_sets_1 (void) - /* Initialize "constant" tables. */ - - CLEAR_HARD_REG_SET (fixed_reg_set); -- CLEAR_HARD_REG_SET (call_used_reg_set); -- CLEAR_HARD_REG_SET (call_fixed_reg_set); - CLEAR_HARD_REG_SET (regs_invalidated_by_call); -- if (!regs_invalidated_by_call_regset) -- { -- bitmap_obstack_initialize (&persistent_obstack); -- regs_invalidated_by_call_regset = ALLOC_REG_SET (&persistent_obstack); -- } -- else -- CLEAR_REG_SET (regs_invalidated_by_call_regset); -- if (!fixed_reg_set_regset) -- fixed_reg_set_regset = ALLOC_REG_SET (&persistent_obstack); -- else -- CLEAR_REG_SET (fixed_reg_set_regset); - -- AND_HARD_REG_SET (operand_reg_set, accessible_reg_set); -+ operand_reg_set &= accessible_reg_set; - for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) - { - /* As a special exception, registers whose class is NO_REGS are -@@ -393,26 +343,10 @@ init_reg_sets_1 (void) - /* If a register is too limited to be treated as a register operand, - then it should never be allocated to a pseudo. */ - if (!TEST_HARD_REG_BIT (operand_reg_set, i)) -- { -- fixed_regs[i] = 1; -- call_used_regs[i] = 1; -- } -- -- /* call_used_regs must include fixed_regs. */ -- gcc_assert (!fixed_regs[i] || call_used_regs[i]); --#ifdef CALL_REALLY_USED_REGISTERS -- /* call_used_regs must include call_really_used_regs. */ -- gcc_assert (!call_really_used_regs[i] || call_used_regs[i]); --#endif -+ fixed_regs[i] = 1; - - if (fixed_regs[i]) -- { -- SET_HARD_REG_BIT (fixed_reg_set, i); -- SET_REGNO_REG_SET (fixed_reg_set_regset, i); -- } -- -- if (call_used_regs[i]) -- SET_HARD_REG_BIT (call_used_reg_set, i); -+ SET_HARD_REG_BIT (fixed_reg_set, i); - - /* There are a couple of fixed registers that we know are safe to - exclude from being clobbered by calls: -@@ -427,10 +361,7 @@ init_reg_sets_1 (void) - if (i == STACK_POINTER_REGNUM) - ; - else if (global_regs[i]) -- { -- SET_HARD_REG_BIT (regs_invalidated_by_call, i); -- SET_REGNO_REG_SET (regs_invalidated_by_call_regset, i); -- } -+ SET_HARD_REG_BIT (regs_invalidated_by_call, i); - else if (i == FRAME_POINTER_REGNUM) - ; - else if (!HARD_FRAME_POINTER_IS_FRAME_POINTER -@@ -442,15 +373,12 @@ init_reg_sets_1 (void) - else if (!PIC_OFFSET_TABLE_REG_CALL_CLOBBERED - && i == (unsigned) PIC_OFFSET_TABLE_REGNUM && fixed_regs[i]) - ; -- else if (CALL_REALLY_USED_REGNO_P (i)) -- { -- SET_HARD_REG_BIT (regs_invalidated_by_call, i); -- SET_REGNO_REG_SET (regs_invalidated_by_call_regset, i); -- } -+ else if (call_used_regs[i]) -+ SET_HARD_REG_BIT (regs_invalidated_by_call, i); - } - -- COPY_HARD_REG_SET (call_fixed_reg_set, fixed_reg_set); -- COPY_HARD_REG_SET (fixed_nonglobal_reg_set, fixed_reg_set); -+ SET_HARD_REG_SET (savable_regs); -+ fixed_nonglobal_reg_set = fixed_reg_set; - - /* Preserve global registers if called more than once. */ - for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) -@@ -459,8 +387,6 @@ init_reg_sets_1 (void) - { - fixed_regs[i] = call_used_regs[i] = 1; - SET_HARD_REG_BIT (fixed_reg_set, i); -- SET_HARD_REG_BIT (call_used_reg_set, i); -- SET_HARD_REG_BIT (call_fixed_reg_set, i); - } - } - -@@ -493,6 +419,8 @@ init_reg_sets_1 (void) - } - } - } -+ -+ default_function_abi.initialize (0, regs_invalidated_by_call); - } - - /* Compute the table of register modes. -@@ -639,7 +567,7 @@ choose_hard_reg_mode (unsigned int regno ATTRIBUTE_UNUSED, - if (hard_regno_nregs (regno, mode) == nregs - && targetm.hard_regno_mode_ok (regno, mode) - && (!call_saved -- || !targetm.hard_regno_call_part_clobbered (NULL, regno, mode)) -+ || !targetm.hard_regno_call_part_clobbered (0, regno, mode)) - && maybe_gt (GET_MODE_SIZE (mode), GET_MODE_SIZE (found_mode))) - found_mode = mode; - -@@ -647,7 +575,7 @@ choose_hard_reg_mode (unsigned int regno ATTRIBUTE_UNUSED, - if (hard_regno_nregs (regno, mode) == nregs - && targetm.hard_regno_mode_ok (regno, mode) - && (!call_saved -- || !targetm.hard_regno_call_part_clobbered (NULL, regno, mode)) -+ || !targetm.hard_regno_call_part_clobbered (0, regno, mode)) - && maybe_gt (GET_MODE_SIZE (mode), GET_MODE_SIZE (found_mode))) - found_mode = mode; - -@@ -655,7 +583,7 @@ choose_hard_reg_mode (unsigned int regno ATTRIBUTE_UNUSED, - if (hard_regno_nregs (regno, mode) == nregs - && targetm.hard_regno_mode_ok (regno, mode) - && (!call_saved -- || !targetm.hard_regno_call_part_clobbered (NULL, regno, mode)) -+ || !targetm.hard_regno_call_part_clobbered (0, regno, mode)) - && maybe_gt (GET_MODE_SIZE (mode), GET_MODE_SIZE (found_mode))) - found_mode = mode; - -@@ -663,7 +591,7 @@ choose_hard_reg_mode (unsigned int regno ATTRIBUTE_UNUSED, - if (hard_regno_nregs (regno, mode) == nregs - && targetm.hard_regno_mode_ok (regno, mode) - && (!call_saved -- || !targetm.hard_regno_call_part_clobbered (NULL, regno, mode)) -+ || !targetm.hard_regno_call_part_clobbered (0, regno, mode)) - && maybe_gt (GET_MODE_SIZE (mode), GET_MODE_SIZE (found_mode))) - found_mode = mode; - -@@ -677,7 +605,7 @@ choose_hard_reg_mode (unsigned int regno ATTRIBUTE_UNUSED, - if (hard_regno_nregs (regno, mode) == nregs - && targetm.hard_regno_mode_ok (regno, mode) - && (!call_saved -- || !targetm.hard_regno_call_part_clobbered (NULL, regno, mode))) -+ || !targetm.hard_regno_call_part_clobbered (0, regno, mode))) - return mode; - } - -@@ -749,10 +677,11 @@ fix_register (const char *name, int fixed, int call_used) - else - { - fixed_regs[i] = fixed; -- call_used_regs[i] = call_used; - #ifdef CALL_REALLY_USED_REGISTERS - if (fixed == 0) -- call_really_used_regs[i] = call_used; -+ call_used_regs[i] = call_used; -+#else -+ call_used_regs[i] = call_used; - #endif - } - } -@@ -803,7 +732,8 @@ globalize_reg (tree decl, int i) - if (i != STACK_POINTER_REGNUM) - { - SET_HARD_REG_BIT (regs_invalidated_by_call, i); -- SET_REGNO_REG_SET (regs_invalidated_by_call_regset, i); -+ for (unsigned int i = 0; i < NUM_ABI_IDS; ++i) -+ function_abis[i].add_full_reg_clobber (i); - } - - /* If already fixed, nothing else to do. */ -@@ -811,13 +741,8 @@ globalize_reg (tree decl, int i) - return; - - fixed_regs[i] = call_used_regs[i] = 1; --#ifdef CALL_REALLY_USED_REGISTERS -- call_really_used_regs[i] = 1; --#endif - - SET_HARD_REG_BIT (fixed_reg_set, i); -- SET_HARD_REG_BIT (call_used_reg_set, i); -- SET_HARD_REG_BIT (call_fixed_reg_set, i); - - reinit_regs (); - } -@@ -1101,10 +1026,6 @@ reg_scan_mark_refs (rtx x, rtx_insn *insn) - reg_scan_mark_refs (XEXP (XEXP (x, 0), 0), insn); - break; - -- case CLOBBER_HIGH: -- gcc_assert (!(MEM_P (XEXP (x, 0)))); -- break; -- - case SET: - /* Count a set of the destination if it is a register. */ - for (dest = SET_DEST (x); -@@ -1316,14 +1237,12 @@ record_subregs_of_mode (rtx subreg, bool partial_def) - } - - if (valid_mode_changes[regno]) -- AND_HARD_REG_SET (*valid_mode_changes[regno], -- simplifiable_subregs (shape)); -+ *valid_mode_changes[regno] &= simplifiable_subregs (shape); - else - { - valid_mode_changes[regno] - = XOBNEW (&valid_mode_changes_obstack, HARD_REG_SET); -- COPY_HARD_REG_SET (*valid_mode_changes[regno], -- simplifiable_subregs (shape)); -+ *valid_mode_changes[regno] = simplifiable_subregs (shape); - } - } - -diff --git a/gcc/regrename.c b/gcc/regrename.c -index 5259d565e..6f7fe0a6d 100644 ---- a/gcc/regrename.c -+++ b/gcc/regrename.c -@@ -33,6 +33,7 @@ - #include "addresses.h" - #include "cfganal.h" - #include "tree-pass.h" -+#include "function-abi.h" - #include "regrename.h" - - /* This file implements the RTL register renaming pass of the compiler. It is -@@ -253,7 +254,7 @@ create_new_chain (unsigned this_regno, unsigned this_nregs, rtx *loc, - CLEAR_HARD_REG_BIT (live_hard_regs, head->regno + nregs); - } - -- COPY_HARD_REG_SET (head->hard_conflicts, live_hard_regs); -+ head->hard_conflicts = live_hard_regs; - bitmap_set_bit (&open_chains_set, head->id); - - open_chains = head; -@@ -292,7 +293,7 @@ merge_overlapping_regs (HARD_REG_SET *pset, struct du_head *head) - { - bitmap_iterator bi; - unsigned i; -- IOR_HARD_REG_SET (*pset, head->hard_conflicts); -+ *pset |= head->hard_conflicts; - EXECUTE_IF_SET_IN_BITMAP (&head->conflicts, 0, i, bi) - { - du_head_p other = regrename_chain_from_id (i); -@@ -303,6 +304,18 @@ merge_overlapping_regs (HARD_REG_SET *pset, struct du_head *head) - } - } - -+/* Return true if (reg:MODE REGNO) would be clobbered by a call covered -+ by THIS_HEAD. */ -+ -+static bool -+call_clobbered_in_chain_p (du_head *this_head, machine_mode mode, -+ unsigned int regno) -+{ -+ return call_clobbered_in_region_p (this_head->call_abis, -+ this_head->call_clobber_mask, -+ mode, regno); -+} -+ - /* Check if NEW_REG can be the candidate register to rename for - REG in THIS_HEAD chain. THIS_UNAVAILABLE is a set of unavailable hard - registers. */ -@@ -322,7 +335,7 @@ check_new_reg_p (int reg ATTRIBUTE_UNUSED, int new_reg, - || global_regs[new_reg + i] - /* Can't use regs which aren't saved by the prologue. */ - || (! df_regs_ever_live_p (new_reg + i) -- && ! call_used_regs[new_reg + i]) -+ && ! crtl->abi->clobbers_full_reg_p (new_reg + i)) - #ifdef LEAF_REGISTERS - /* We can't use a non-leaf register if we're in a - leaf function. */ -@@ -337,11 +350,8 @@ check_new_reg_p (int reg ATTRIBUTE_UNUSED, int new_reg, - for (tmp = this_head->first; tmp; tmp = tmp->next_use) - if ((!targetm.hard_regno_mode_ok (new_reg, GET_MODE (*tmp->loc)) - && ! DEBUG_INSN_P (tmp->insn)) -- || (this_head->need_caller_save_reg -- && ! (targetm.hard_regno_call_part_clobbered -- (NULL, reg, GET_MODE (*tmp->loc))) -- && (targetm.hard_regno_call_part_clobbered -- (NULL, new_reg, GET_MODE (*tmp->loc))))) -+ || call_clobbered_in_chain_p (this_head, GET_MODE (*tmp->loc), -+ new_reg)) - return false; - - return true; -@@ -363,12 +373,6 @@ find_rename_reg (du_head_p this_head, enum reg_class super_class, - int pass; - int best_new_reg = old_reg; - -- /* Further narrow the set of registers we can use for renaming. -- If the chain needs a call-saved register, mark the call-used -- registers as unavailable. */ -- if (this_head->need_caller_save_reg) -- IOR_HARD_REG_SET (*unavailable, call_used_reg_set); -- - /* Mark registers that overlap this chain's lifetime as unavailable. */ - merge_overlapping_regs (unavailable, this_head); - -@@ -441,8 +445,7 @@ regrename_find_superclass (du_head_p head, int *pn_uses, - if (DEBUG_INSN_P (tmp->insn)) - continue; - n_uses++; -- IOR_COMPL_HARD_REG_SET (*punavailable, -- reg_class_contents[tmp->cl]); -+ *punavailable |= ~reg_class_contents[tmp->cl]; - super_class - = reg_class_superunion[(int) super_class][(int) tmp->cl]; - } -@@ -486,7 +489,7 @@ rename_chains (void) - && reg == FRAME_POINTER_REGNUM)) - continue; - -- COPY_HARD_REG_SET (this_unavailable, unavailable); -+ this_unavailable = unavailable; - - reg_class super_class = regrename_find_superclass (this_head, &n_uses, - &this_unavailable); -@@ -500,7 +503,7 @@ rename_chains (void) - { - fprintf (dump_file, "Register %s in insn %d", - reg_names[reg], INSN_UID (this_head->first->insn)); -- if (this_head->need_caller_save_reg) -+ if (this_head->call_abis) - fprintf (dump_file, " crosses a call"); - } - -@@ -677,10 +680,11 @@ merge_chains (du_head_p c1, du_head_p c2) - c2->first = c2->last = NULL; - c2->id = c1->id; - -- IOR_HARD_REG_SET (c1->hard_conflicts, c2->hard_conflicts); -+ c1->hard_conflicts |= c2->hard_conflicts; - bitmap_ior_into (&c1->conflicts, &c2->conflicts); - -- c1->need_caller_save_reg |= c2->need_caller_save_reg; -+ c1->call_clobber_mask |= c2->call_clobber_mask; -+ c1->call_abis |= c2->call_abis; - c1->cannot_rename |= c2->cannot_rename; - } - -@@ -1740,7 +1744,7 @@ build_def_use (basic_block bb) - outside an operand, as live. */ - hide_operands (n_ops, old_operands, old_dups, untracked_operands, - false); -- note_stores (PATTERN (insn), note_sets_clobbers, &clobber_code); -+ note_stores (insn, note_sets_clobbers, &clobber_code); - restore_operands (insn, n_ops, old_operands, old_dups); - - /* Step 1b: Begin new chains for earlyclobbered writes inside -@@ -1834,9 +1838,15 @@ build_def_use (basic_block bb) - requires a caller-saved reg. */ - if (CALL_P (insn)) - { -+ function_abi callee_abi = insn_callee_abi (insn); - struct du_head *p; - for (p = open_chains; p; p = p->next_chain) -- p->need_caller_save_reg = 1; -+ { -+ p->call_abis |= (1 << callee_abi.id ()); -+ p->call_clobber_mask -+ |= callee_abi.full_and_partial_reg_clobbers (); -+ p->hard_conflicts |= callee_abi.full_reg_clobbers (); -+ } - } - - /* Step 5: Close open chains that overlap writes. Similar to -@@ -1856,7 +1866,7 @@ build_def_use (basic_block bb) - outside an operand, as live. */ - hide_operands (n_ops, old_operands, old_dups, untracked_operands, - false); -- note_stores (PATTERN (insn), note_sets_clobbers, &set_code); -+ note_stores (insn, note_sets_clobbers, &set_code); - restore_operands (insn, n_ops, old_operands, old_dups); - - /* Step 6b: Begin new chains for writes inside operands. */ -diff --git a/gcc/regrename.h b/gcc/regrename.h -index 37f5e398d..1bbf78fda 100644 ---- a/gcc/regrename.h -+++ b/gcc/regrename.h -@@ -40,9 +40,12 @@ struct du_head - bitmap_head conflicts; - /* Conflicts with untracked hard registers. */ - HARD_REG_SET hard_conflicts; -+ /* Which registers are fully or partially clobbered by the calls that -+ the chain crosses. */ -+ HARD_REG_SET call_clobber_mask; - -- /* Nonzero if the chain crosses a call. */ -- unsigned int need_caller_save_reg:1; -+ /* A bitmask of ABIs used by the calls that the chain crosses. */ -+ unsigned int call_abis : NUM_ABI_IDS; - /* Nonzero if the register is used in a way that prevents renaming, - such as the SET_DEST of a CALL_INSN or an asm operand that used - to be a hard register. */ -diff --git a/gcc/regs.h b/gcc/regs.h -index 48b2e7081..821979ec6 100644 ---- a/gcc/regs.h -+++ b/gcc/regs.h -@@ -298,7 +298,7 @@ remove_from_hard_reg_set (HARD_REG_SET *regs, machine_mode mode, - /* Return true if REGS contains the whole of (reg:MODE REGNO). */ - - static inline bool --in_hard_reg_set_p (const HARD_REG_SET regs, machine_mode mode, -+in_hard_reg_set_p (const_hard_reg_set regs, machine_mode mode, - unsigned int regno) - { - unsigned int end_regno; -@@ -323,7 +323,7 @@ in_hard_reg_set_p (const HARD_REG_SET regs, machine_mode mode, - /* Return true if (reg:MODE REGNO) includes an element of REGS. */ - - static inline bool --overlaps_hard_reg_set_p (const HARD_REG_SET regs, machine_mode mode, -+overlaps_hard_reg_set_p (const_hard_reg_set regs, machine_mode mode, - unsigned int regno) - { - unsigned int end_regno; -@@ -363,7 +363,7 @@ remove_range_from_hard_reg_set (HARD_REG_SET *regs, unsigned int regno, - /* Like overlaps_hard_reg_set_p, but use a REGNO/NREGS range instead of - REGNO and MODE. */ - static inline bool --range_overlaps_hard_reg_set_p (const HARD_REG_SET set, unsigned regno, -+range_overlaps_hard_reg_set_p (const_hard_reg_set set, unsigned regno, - int nregs) - { - while (nregs-- > 0) -@@ -375,7 +375,7 @@ range_overlaps_hard_reg_set_p (const HARD_REG_SET set, unsigned regno, - /* Like in_hard_reg_set_p, but use a REGNO/NREGS range instead of - REGNO and MODE. */ - static inline bool --range_in_hard_reg_set_p (const HARD_REG_SET set, unsigned regno, int nregs) -+range_in_hard_reg_set_p (const_hard_reg_set set, unsigned regno, int nregs) - { - while (nregs-- > 0) - if (!TEST_HARD_REG_BIT (set, regno + nregs)) -@@ -383,8 +383,4 @@ range_in_hard_reg_set_p (const HARD_REG_SET set, unsigned regno, int nregs) - return true; - } - --/* Get registers used by given function call instruction. */ --extern bool get_call_reg_set_usage (rtx_insn *insn, HARD_REG_SET *reg_set, -- HARD_REG_SET default_set); -- - #endif /* GCC_REGS_H */ -diff --git a/gcc/regset.h b/gcc/regset.h -index 34a9eb457..72ff45891 100644 ---- a/gcc/regset.h -+++ b/gcc/regset.h -@@ -64,6 +64,10 @@ typedef bitmap regset; - /* Inclusive or a register set with a second register set. */ - #define IOR_REG_SET(TO, FROM) bitmap_ior_into (TO, FROM) - -+/* Same, but with FROM being a HARD_REG_SET. */ -+#define IOR_REG_SET_HRS(TO, FROM) \ -+ bitmap_ior_into (TO, bitmap_view (FROM)) -+ - /* Exclusive or a register set with a second register set. */ - #define XOR_REG_SET(TO, FROM) bitmap_xor_into (TO, FROM) - -@@ -107,14 +111,6 @@ typedef bitmap_iterator reg_set_iterator; - #define EXECUTE_IF_AND_IN_REG_SET(REGSET1, REGSET2, MIN, REGNUM, RSI) \ - EXECUTE_IF_AND_IN_BITMAP (REGSET1, REGSET2, MIN, REGNUM, RSI) \ - --/* Same information as REGS_INVALIDATED_BY_CALL but in regset form to be used -- in dataflow more conveniently. */ -- --extern regset regs_invalidated_by_call_regset; -- --/* Same information as FIXED_REG_SET but in regset form. */ --extern regset fixed_reg_set_regset; -- - /* An obstack for regsets. */ - extern bitmap_obstack reg_obstack; - -diff --git a/gcc/reload.c b/gcc/reload.c -index 72cc38a0e..b7601307f 100644 ---- a/gcc/reload.c -+++ b/gcc/reload.c -@@ -6911,15 +6911,15 @@ find_equiv_reg (rtx goal, rtx_insn *insn, enum reg_class rclass, int other, - - if (regno >= 0 && regno < FIRST_PSEUDO_REGISTER) - for (i = 0; i < nregs; ++i) -- if (call_used_regs[regno + i] -- || targetm.hard_regno_call_part_clobbered (NULL, regno + i, -+ if (call_used_or_fixed_reg_p (regno + i) -+ || targetm.hard_regno_call_part_clobbered (0, regno + i, - mode)) - return 0; - - if (valueno >= 0 && valueno < FIRST_PSEUDO_REGISTER) - for (i = 0; i < valuenregs; ++i) -- if (call_used_regs[valueno + i] -- || targetm.hard_regno_call_part_clobbered (NULL, valueno + i, -+ if (call_used_or_fixed_reg_p (valueno + i) -+ || targetm.hard_regno_call_part_clobbered (0, valueno + i, - mode)) - return 0; - } -diff --git a/gcc/reload.h b/gcc/reload.h -index 813075b6f..fef6aa9da 100644 ---- a/gcc/reload.h -+++ b/gcc/reload.h -@@ -274,7 +274,7 @@ extern int reload_first_uid; - - extern int num_not_at_initial_offset; - --#if defined SET_HARD_REG_BIT && defined CLEAR_REG_SET -+#if defined HARD_CONST && defined CLEAR_REG_SET - /* This structure describes instructions which are relevant for reload. - Apart from all regular insns, this also includes CODE_LABELs, since they - must be examined for register elimination. */ -@@ -325,7 +325,7 @@ extern struct insn_chain *reload_insn_chain; - extern struct insn_chain *new_insn_chain (void); - #endif - --#if defined SET_HARD_REG_BIT -+#if defined HARD_CONST - extern void compute_use_by_pseudos (HARD_REG_SET *, bitmap); - #endif - -diff --git a/gcc/reload1.c b/gcc/reload1.c -index bb112d817..d36ebec60 100644 ---- a/gcc/reload1.c -+++ b/gcc/reload1.c -@@ -795,7 +795,9 @@ reload (rtx_insn *first, int global) - - if (crtl->saves_all_registers) - for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) -- if (! call_used_regs[i] && ! fixed_regs[i] && ! LOCAL_REGNO (i)) -+ if (! call_used_or_fixed_reg_p (i) -+ && ! fixed_regs[i] -+ && ! LOCAL_REGNO (i)) - df_set_regs_ever_live (i, true); - - /* Find all the pseudo registers that didn't get hard regs -@@ -843,7 +845,7 @@ reload (rtx_insn *first, int global) - cannot be done. */ - for (insn = first; insn && num_eliminable; insn = NEXT_INSN (insn)) - if (INSN_P (insn)) -- note_stores (PATTERN (insn), mark_not_eliminable, NULL); -+ note_pattern_stores (PATTERN (insn), mark_not_eliminable, NULL); - - maybe_fix_stack_asms (); - -@@ -1339,8 +1341,6 @@ maybe_fix_stack_asms (void) - rtx t = XVECEXP (pat, 0, i); - if (GET_CODE (t) == CLOBBER && STACK_REG_P (XEXP (t, 0))) - SET_HARD_REG_BIT (clobbered, REGNO (XEXP (t, 0))); -- /* CLOBBER_HIGH is only supported for LRA. */ -- gcc_assert (GET_CODE (t) != CLOBBER_HIGH); - } - - /* Get the operand values and constraints out of the insn. */ -@@ -1364,7 +1364,7 @@ maybe_fix_stack_asms (void) - { - /* End of one alternative - mark the regs in the current - class, and reset the class. */ -- IOR_HARD_REG_SET (allowed, reg_class_contents[cls]); -+ allowed |= reg_class_contents[cls]; - cls = NO_REGS; - p++; - if (c == '#') -@@ -1399,7 +1399,7 @@ maybe_fix_stack_asms (void) - /* Those of the registers which are clobbered, but allowed by the - constraints, must be usable as reload registers. So clear them - out of the life information. */ -- AND_HARD_REG_SET (allowed, clobbered); -+ allowed &= clobbered; - for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) - if (TEST_HARD_REG_BIT (allowed, i)) - { -@@ -1732,7 +1732,7 @@ order_regs_for_reload (struct insn_chain *chain) - HARD_REG_SET used_by_pseudos2; - reg_set_iterator rsi; - -- COPY_HARD_REG_SET (bad_spill_regs, fixed_reg_set); -+ bad_spill_regs = fixed_reg_set; - - memset (spill_cost, 0, sizeof spill_cost); - memset (spill_add_cost, 0, sizeof spill_add_cost); -@@ -1745,8 +1745,8 @@ order_regs_for_reload (struct insn_chain *chain) - - REG_SET_TO_HARD_REG_SET (used_by_pseudos, &chain->live_throughout); - REG_SET_TO_HARD_REG_SET (used_by_pseudos2, &chain->dead_or_set); -- IOR_HARD_REG_SET (bad_spill_regs, used_by_pseudos); -- IOR_HARD_REG_SET (bad_spill_regs, used_by_pseudos2); -+ bad_spill_regs |= used_by_pseudos; -+ bad_spill_regs |= used_by_pseudos2; - - /* Now find out which pseudos are allocated to it, and update - hard_reg_n_uses. */ -@@ -1823,9 +1823,9 @@ find_reg (struct insn_chain *chain, int order) - static int regno_pseudo_regs[FIRST_PSEUDO_REGISTER]; - static int best_regno_pseudo_regs[FIRST_PSEUDO_REGISTER]; - -- COPY_HARD_REG_SET (not_usable, bad_spill_regs); -- IOR_HARD_REG_SET (not_usable, bad_spill_regs_global); -- IOR_COMPL_HARD_REG_SET (not_usable, reg_class_contents[rl->rclass]); -+ not_usable = (bad_spill_regs -+ | bad_spill_regs_global -+ | ~reg_class_contents[rl->rclass]); - - CLEAR_HARD_REG_SET (used_by_other_reload); - for (k = 0; k < order; k++) -@@ -1906,8 +1906,8 @@ find_reg (struct insn_chain *chain, int order) - && (inv_reg_alloc_order[regno] - < inv_reg_alloc_order[best_reg]) - #else -- && call_used_regs[regno] -- && ! call_used_regs[best_reg] -+ && call_used_or_fixed_reg_p (regno) -+ && ! call_used_or_fixed_reg_p (best_reg) - #endif - )) - { -@@ -2007,8 +2007,8 @@ find_reload_regs (struct insn_chain *chain) - } - } - -- COPY_HARD_REG_SET (chain->used_spill_regs, used_spill_regs_local); -- IOR_HARD_REG_SET (used_spill_regs, used_spill_regs_local); -+ chain->used_spill_regs = used_spill_regs_local; -+ used_spill_regs |= used_spill_regs_local; - - memcpy (chain->rld, rld, n_reloads * sizeof (struct reload)); - } -@@ -2881,7 +2881,6 @@ eliminate_regs_1 (rtx x, machine_mode mem_mode, rtx insn, - return x; - - case CLOBBER: -- case CLOBBER_HIGH: - case ASM_OPERANDS: - gcc_assert (insn && DEBUG_INSN_P (insn)); - break; -@@ -3092,10 +3091,6 @@ elimination_effects (rtx x, machine_mode mem_mode) - elimination_effects (XEXP (x, 0), mem_mode); - return; - -- case CLOBBER_HIGH: -- /* CLOBBER_HIGH is only supported for LRA. */ -- return; -- - case SET: - /* Check for setting a register that we know about. */ - if (REG_P (SET_DEST (x))) -@@ -3817,9 +3812,6 @@ mark_not_eliminable (rtx dest, const_rtx x, void *data ATTRIBUTE_UNUSED) - if (dest == hard_frame_pointer_rtx) - return; - -- /* CLOBBER_HIGH is only supported for LRA. */ -- gcc_assert (GET_CODE (x) != CLOBBER_HIGH); -- - for (i = 0; i < NUM_ELIMINABLE_REGS; i++) - if (reg_eliminate[i].can_eliminate && dest == reg_eliminate[i].to_rtx - && (GET_CODE (x) != SET -@@ -4020,7 +4012,7 @@ update_eliminables_and_spill (void) - HARD_REG_SET to_spill; - CLEAR_HARD_REG_SET (to_spill); - update_eliminables (&to_spill); -- AND_COMPL_HARD_REG_SET (used_spill_regs, to_spill); -+ used_spill_regs &= ~to_spill; - - for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) - if (TEST_HARD_REG_BIT (to_spill, i)) -@@ -4346,14 +4338,12 @@ finish_spills (int global) - EXECUTE_IF_SET_IN_REG_SET - (&chain->live_throughout, FIRST_PSEUDO_REGISTER, i, rsi) - { -- IOR_HARD_REG_SET (pseudo_forbidden_regs[i], -- chain->used_spill_regs); -+ pseudo_forbidden_regs[i] |= chain->used_spill_regs; - } - EXECUTE_IF_SET_IN_REG_SET - (&chain->dead_or_set, FIRST_PSEUDO_REGISTER, i, rsi) - { -- IOR_HARD_REG_SET (pseudo_forbidden_regs[i], -- chain->used_spill_regs); -+ pseudo_forbidden_regs[i] |= chain->used_spill_regs; - } - } - -@@ -4397,7 +4387,7 @@ finish_spills (int global) - { - REG_SET_TO_HARD_REG_SET (used_by_pseudos, &chain->live_throughout); - REG_SET_TO_HARD_REG_SET (used_by_pseudos2, &chain->dead_or_set); -- IOR_HARD_REG_SET (used_by_pseudos, used_by_pseudos2); -+ used_by_pseudos |= used_by_pseudos2; - - compute_use_by_pseudos (&used_by_pseudos, &chain->live_throughout); - compute_use_by_pseudos (&used_by_pseudos, &chain->dead_or_set); -@@ -4405,8 +4395,7 @@ finish_spills (int global) - may be not included in the value calculated here because - of possible removing caller-saves insns (see function - delete_caller_save_insns. */ -- COMPL_HARD_REG_SET (chain->used_spill_regs, used_by_pseudos); -- AND_HARD_REG_SET (chain->used_spill_regs, used_spill_regs); -+ chain->used_spill_regs = ~used_by_pseudos & used_spill_regs; - } - } - -@@ -4455,7 +4444,6 @@ scan_paradoxical_subregs (rtx x) - case PC: - case USE: - case CLOBBER: -- case CLOBBER_HIGH: - return; - - case SUBREG: -@@ -4589,7 +4577,7 @@ reload_as_needed (int live_known) - { - regset_head regs_to_forget; - INIT_REG_SET (®s_to_forget); -- note_stores (PATTERN (insn), forget_old_reloads_1, ®s_to_forget); -+ note_stores (insn, forget_old_reloads_1, ®s_to_forget); - - /* If this is a USE and CLOBBER of a MEM, ensure that any - references to eliminable registers have been removed. */ -@@ -4716,7 +4704,7 @@ reload_as_needed (int live_known) - between INSN and NEXT and use them to forget old reloads. */ - for (rtx_insn *x = NEXT_INSN (insn); x != old_next; x = NEXT_INSN (x)) - if (NONJUMP_INSN_P (x) && GET_CODE (PATTERN (x)) == CLOBBER) -- note_stores (PATTERN (x), forget_old_reloads_1, NULL); -+ note_stores (x, forget_old_reloads_1, NULL); - - #if AUTO_INC_DEC - /* Likewise for regs altered by auto-increment in this insn. -@@ -4882,8 +4870,8 @@ reload_as_needed (int live_known) - be partially clobbered by the call. */ - else if (CALL_P (insn)) - { -- AND_COMPL_HARD_REG_SET (reg_reloaded_valid, call_used_reg_set); -- AND_COMPL_HARD_REG_SET (reg_reloaded_valid, reg_reloaded_call_part_clobbered); -+ reg_reloaded_valid &= ~(call_used_or_fixed_regs -+ | reg_reloaded_call_part_clobbered); - - /* If this is a call to a setjmp-type function, we must not - reuse any reload reg contents across the call; that will -@@ -4910,8 +4898,7 @@ reload_as_needed (int live_known) - to be forgotten later. */ - - static void --forget_old_reloads_1 (rtx x, const_rtx setter, -- void *data) -+forget_old_reloads_1 (rtx x, const_rtx, void *data) - { - unsigned int regno; - unsigned int nr; -@@ -4930,9 +4917,6 @@ forget_old_reloads_1 (rtx x, const_rtx setter, - if (!REG_P (x)) - return; - -- /* CLOBBER_HIGH is only supported for LRA. */ -- gcc_assert (setter == NULL_RTX || GET_CODE (setter) != CLOBBER_HIGH); -- - regno = REGNO (x); - - if (regno >= FIRST_PSEUDO_REGISTER) -@@ -6335,9 +6319,9 @@ choose_reload_regs_init (struct insn_chain *chain, rtx *save_reload_reg_rtx) - { - HARD_REG_SET tmp; - REG_SET_TO_HARD_REG_SET (tmp, &chain->live_throughout); -- IOR_HARD_REG_SET (reg_used_in_insn, tmp); -+ reg_used_in_insn |= tmp; - REG_SET_TO_HARD_REG_SET (tmp, &chain->dead_or_set); -- IOR_HARD_REG_SET (reg_used_in_insn, tmp); -+ reg_used_in_insn |= tmp; - compute_use_by_pseudos (®_used_in_insn, &chain->live_throughout); - compute_use_by_pseudos (®_used_in_insn, &chain->dead_or_set); - } -@@ -6352,7 +6336,7 @@ choose_reload_regs_init (struct insn_chain *chain, rtx *save_reload_reg_rtx) - CLEAR_HARD_REG_SET (reload_reg_used_in_outaddr_addr[i]); - } - -- COMPL_HARD_REG_SET (reload_reg_unavailable, chain->used_spill_regs); -+ reload_reg_unavailable = ~chain->used_spill_regs; - - CLEAR_HARD_REG_SET (reload_reg_used_for_inherit); - -@@ -7797,7 +7781,7 @@ emit_output_reload_insns (struct insn_chain *chain, struct reload *rl, - clear any memory of reloaded copies of the pseudo reg. - If this output reload comes from a spill reg, - reg_has_output_reload will make this do nothing. */ -- note_stores (pat, forget_old_reloads_1, NULL); -+ note_stores (p, forget_old_reloads_1, NULL); - - if (reg_mentioned_p (rl_reg_rtx, pat)) - { -@@ -8289,8 +8273,7 @@ emit_reload_insns (struct insn_chain *chain) - : out_regno + k); - reg_reloaded_insn[regno + k] = insn; - SET_HARD_REG_BIT (reg_reloaded_valid, regno + k); -- if (targetm.hard_regno_call_part_clobbered (NULL, -- regno + k, -+ if (targetm.hard_regno_call_part_clobbered (0, regno + k, - mode)) - SET_HARD_REG_BIT (reg_reloaded_call_part_clobbered, - regno + k); -@@ -8370,8 +8353,7 @@ emit_reload_insns (struct insn_chain *chain) - : in_regno + k); - reg_reloaded_insn[regno + k] = insn; - SET_HARD_REG_BIT (reg_reloaded_valid, regno + k); -- if (targetm.hard_regno_call_part_clobbered (NULL, -- regno + k, -+ if (targetm.hard_regno_call_part_clobbered (0, regno + k, - mode)) - SET_HARD_REG_BIT (reg_reloaded_call_part_clobbered, - regno + k); -@@ -8487,7 +8469,7 @@ emit_reload_insns (struct insn_chain *chain) - CLEAR_HARD_REG_BIT (reg_reloaded_dead, src_regno + k); - SET_HARD_REG_BIT (reg_reloaded_valid, src_regno + k); - if (targetm.hard_regno_call_part_clobbered -- (NULL, src_regno + k, mode)) -+ (0, src_regno + k, mode)) - SET_HARD_REG_BIT (reg_reloaded_call_part_clobbered, - src_regno + k); - else -@@ -8516,7 +8498,7 @@ emit_reload_insns (struct insn_chain *chain) - } - } - } -- IOR_HARD_REG_SET (reg_reloaded_dead, reg_reloaded_died); -+ reg_reloaded_dead |= reg_reloaded_died; - } - - /* Go through the motions to emit INSN and test if it is strictly valid. -diff --git a/gcc/reorg.c b/gcc/reorg.c -index bdfcf8851..cba183e9c 100644 ---- a/gcc/reorg.c -+++ b/gcc/reorg.c -@@ -410,8 +410,7 @@ find_end_label (rtx kind) - while (NOTE_P (insn) - || (NONJUMP_INSN_P (insn) - && (GET_CODE (PATTERN (insn)) == USE -- || GET_CODE (PATTERN (insn)) == CLOBBER -- || GET_CODE (PATTERN (insn)) == CLOBBER_HIGH))) -+ || GET_CODE (PATTERN (insn)) == CLOBBER))) - insn = PREV_INSN (insn); - - /* When a target threads its epilogue we might already have a -@@ -1311,8 +1310,7 @@ try_merge_delay_insns (rtx_insn *insn, rtx_insn *thread) - - /* TRIAL must be a CALL_INSN or INSN. Skip USE and CLOBBER. */ - if (NONJUMP_INSN_P (trial) -- && (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER -- || GET_CODE (pat) == CLOBBER_HIGH)) -+ && (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)) - continue; - - if (GET_CODE (next_to_match) == GET_CODE (trial) -@@ -1506,8 +1504,7 @@ redundant_insn (rtx insn, rtx_insn *target, const vec &delay_list) - --insns_to_search; - - pat = PATTERN (trial); -- if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER -- || GET_CODE (pat) == CLOBBER_HIGH) -+ if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER) - continue; - - if (GET_CODE (trial) == DEBUG_INSN) -@@ -1575,7 +1572,7 @@ redundant_insn (rtx insn, rtx_insn *target, const vec &delay_list) - /* Insns we pass may not set either NEEDED or SET, so merge them for - simpler tests. */ - needed.memory |= set.memory; -- IOR_HARD_REG_SET (needed.regs, set.regs); -+ needed.regs |= set.regs; - - /* This insn isn't redundant if it conflicts with an insn that either is - or will be in a delay slot of TARGET. */ -@@ -1605,8 +1602,7 @@ redundant_insn (rtx insn, rtx_insn *target, const vec &delay_list) - --insns_to_search; - - pat = PATTERN (trial); -- if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER -- || GET_CODE (pat) == CLOBBER_HIGH) -+ if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER) - continue; - - if (GET_CODE (trial) == DEBUG_INSN) -@@ -1718,8 +1714,7 @@ own_thread_p (rtx thread, rtx label, int allow_fallthrough) - || LABEL_P (insn) - || (NONJUMP_INSN_P (insn) - && GET_CODE (PATTERN (insn)) != USE -- && GET_CODE (PATTERN (insn)) != CLOBBER -- && GET_CODE (PATTERN (insn)) != CLOBBER_HIGH)) -+ && GET_CODE (PATTERN (insn)) != CLOBBER)) - return 0; - - return 1; -@@ -2042,8 +2037,7 @@ fill_simple_delay_slots (int non_jumps_p) - pat = PATTERN (trial); - - /* Stand-alone USE and CLOBBER are just for flow. */ -- if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER -- || GET_CODE (pat) == CLOBBER_HIGH) -+ if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER) - continue; - - /* And DEBUG_INSNs never go into delay slots. */ -@@ -2169,8 +2163,7 @@ fill_simple_delay_slots (int non_jumps_p) - pat = PATTERN (trial); - - /* Stand-alone USE and CLOBBER are just for flow. */ -- if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER -- || GET_CODE (pat) == CLOBBER_HIGH) -+ if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER) - continue; - - /* And DEBUG_INSNs do not go in delay slots. */ -@@ -2438,8 +2431,7 @@ fill_slots_from_thread (rtx_jump_insn *insn, rtx condition, - } - - pat = PATTERN (trial); -- if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER -- || GET_CODE (pat) == CLOBBER_HIGH) -+ if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER) - continue; - - if (GET_CODE (trial) == DEBUG_INSN) -@@ -3833,8 +3825,7 @@ dbr_schedule (rtx_insn *first) - if (! insn->deleted () - && NONJUMP_INSN_P (insn) - && GET_CODE (PATTERN (insn)) != USE -- && GET_CODE (PATTERN (insn)) != CLOBBER -- && GET_CODE (PATTERN (insn)) != CLOBBER_HIGH) -+ && GET_CODE (PATTERN (insn)) != CLOBBER) - { - if (GET_CODE (PATTERN (insn)) == SEQUENCE) - { -diff --git a/gcc/resource.c b/gcc/resource.c -index c4bcfd7dc..bf2d6beaf 100644 ---- a/gcc/resource.c -+++ b/gcc/resource.c -@@ -30,6 +30,7 @@ along with GCC; see the file COPYING3. If not see - #include "resource.h" - #include "insn-attr.h" - #include "params.h" -+#include "function-abi.h" - - /* This structure is used to record liveness information at the targets or - fallthrough insns of branches. We will most likely need the information -@@ -108,11 +109,6 @@ update_live_status (rtx dest, const_rtx x, void *data ATTRIBUTE_UNUSED) - if (GET_CODE (x) == CLOBBER) - for (i = first_regno; i < last_regno; i++) - CLEAR_HARD_REG_BIT (current_live_regs, i); -- else if (GET_CODE (x) == CLOBBER_HIGH) -- /* No current target supports both branch delay slots and CLOBBER_HIGH. -- We'd need more elaborate liveness tracking to handle that -- combination. */ -- gcc_unreachable (); - else - for (i = first_regno; i < last_regno; i++) - { -@@ -298,7 +294,6 @@ mark_referenced_resources (rtx x, struct resources *res, - return; - - case CLOBBER: -- case CLOBBER_HIGH: - return; - - case CALL_INSN: -@@ -450,8 +445,8 @@ find_dead_or_set_registers (rtx_insn *target, struct resources *res, - case CODE_LABEL: - /* After a label, any pending dead registers that weren't yet - used can be made dead. */ -- AND_COMPL_HARD_REG_SET (pending_dead_regs, needed.regs); -- AND_COMPL_HARD_REG_SET (res->regs, pending_dead_regs); -+ pending_dead_regs &= ~needed.regs; -+ res->regs &= ~pending_dead_regs; - CLEAR_HARD_REG_SET (pending_dead_regs); - - continue; -@@ -565,14 +560,12 @@ find_dead_or_set_registers (rtx_insn *target, struct resources *res, - } - - target_res = *res; -- COPY_HARD_REG_SET (scratch, target_set.regs); -- AND_COMPL_HARD_REG_SET (scratch, needed.regs); -- AND_COMPL_HARD_REG_SET (target_res.regs, scratch); -+ scratch = target_set.regs & ~needed.regs; -+ target_res.regs &= ~scratch; - - fallthrough_res = *res; -- COPY_HARD_REG_SET (scratch, set.regs); -- AND_COMPL_HARD_REG_SET (scratch, needed.regs); -- AND_COMPL_HARD_REG_SET (fallthrough_res.regs, scratch); -+ scratch = set.regs & ~needed.regs; -+ fallthrough_res.regs &= ~scratch; - - if (!ANY_RETURN_P (this_jump_insn->jump_label ())) - find_dead_or_set_registers -@@ -581,8 +574,8 @@ find_dead_or_set_registers (rtx_insn *target, struct resources *res, - find_dead_or_set_registers (next_insn, - &fallthrough_res, 0, jump_count, - set, needed); -- IOR_HARD_REG_SET (fallthrough_res.regs, target_res.regs); -- AND_HARD_REG_SET (res->regs, fallthrough_res.regs); -+ fallthrough_res.regs |= target_res.regs; -+ res->regs &= fallthrough_res.regs; - break; - } - else -@@ -601,9 +594,8 @@ find_dead_or_set_registers (rtx_insn *target, struct resources *res, - mark_referenced_resources (insn, &needed, true); - mark_set_resources (insn, &set, 0, MARK_SRC_DEST_CALL); - -- COPY_HARD_REG_SET (scratch, set.regs); -- AND_COMPL_HARD_REG_SET (scratch, needed.regs); -- AND_COMPL_HARD_REG_SET (res->regs, scratch); -+ scratch = set.regs & ~needed.regs; -+ res->regs &= ~scratch; - } - - return jump_insn; -@@ -665,24 +657,16 @@ mark_set_resources (rtx x, struct resources *res, int in_dest, - { - rtx_call_insn *call_insn = as_a (x); - rtx link; -- HARD_REG_SET regs; - - res->cc = res->memory = 1; - -- get_call_reg_set_usage (call_insn, ®s, regs_invalidated_by_call); -- IOR_HARD_REG_SET (res->regs, regs); -+ res->regs |= insn_callee_abi (call_insn).full_reg_clobbers (); - - for (link = CALL_INSN_FUNCTION_USAGE (call_insn); - link; link = XEXP (link, 1)) -- { -- /* We could support CLOBBER_HIGH and treat it in the same way as -- HARD_REGNO_CALL_PART_CLOBBERED, but no port needs that -- yet. */ -- gcc_assert (GET_CODE (XEXP (link, 0)) != CLOBBER_HIGH); -- if (GET_CODE (XEXP (link, 0)) == CLOBBER) -- mark_set_resources (SET_DEST (XEXP (link, 0)), res, 1, -- MARK_SRC_DEST); -- } -+ if (GET_CODE (XEXP (link, 0)) == CLOBBER) -+ mark_set_resources (SET_DEST (XEXP (link, 0)), res, 1, -+ MARK_SRC_DEST); - - /* Check for a REG_SETJMP. If it exists, then we must - assume that this call can clobber any register. */ -@@ -725,12 +709,6 @@ mark_set_resources (rtx x, struct resources *res, int in_dest, - mark_set_resources (XEXP (x, 0), res, 1, MARK_SRC_DEST); - return; - -- case CLOBBER_HIGH: -- /* No current target supports both branch delay slots and CLOBBER_HIGH. -- We'd need more elaborate liveness tracking to handle that -- combination. */ -- gcc_unreachable (); -- - case SEQUENCE: - { - rtx_sequence *seq = as_a (x); -@@ -960,7 +938,7 @@ mark_target_live_regs (rtx_insn *insns, rtx target_maybe_return, struct resource - update it below. */ - if (b == tinfo->block && b != -1 && tinfo->bb_tick == bb_ticks[b]) - { -- COPY_HARD_REG_SET (res->regs, tinfo->live_regs); -+ res->regs = tinfo->live_regs; - return; - } - } -@@ -1041,15 +1019,12 @@ mark_target_live_regs (rtx_insn *insns, rtx target_maybe_return, struct resource - predicated instruction, or if the CALL is NORETURN. */ - if (GET_CODE (PATTERN (real_insn)) != COND_EXEC) - { -- HARD_REG_SET regs_invalidated_by_this_call; -- get_call_reg_set_usage (real_insn, -- ®s_invalidated_by_this_call, -- regs_invalidated_by_call); -+ HARD_REG_SET regs_invalidated_by_this_call -+ = insn_callee_abi (real_insn).full_reg_clobbers (); - /* CALL clobbers all call-used regs that aren't fixed except - sp, ap, and fp. Do this before setting the result of the - call live. */ -- AND_COMPL_HARD_REG_SET (current_live_regs, -- regs_invalidated_by_this_call); -+ current_live_regs &= ~regs_invalidated_by_this_call; - } - - /* A CALL_INSN sets any global register live, since it may -@@ -1078,7 +1053,7 @@ mark_target_live_regs (rtx_insn *insns, rtx target_maybe_return, struct resource - GET_MODE (XEXP (link, 0)), - REGNO (XEXP (link, 0))); - -- note_stores (PATTERN (real_insn), update_live_status, NULL); -+ note_stores (real_insn, update_live_status, NULL); - - /* If any registers were unused after this insn, kill them. - These notes will always be accurate. */ -@@ -1097,7 +1072,7 @@ mark_target_live_regs (rtx_insn *insns, rtx target_maybe_return, struct resource - - /* A label clobbers the pending dead registers since neither - reload nor jump will propagate a value across a label. */ -- AND_COMPL_HARD_REG_SET (current_live_regs, pending_dead_regs); -+ current_live_regs &= ~pending_dead_regs; - CLEAR_HARD_REG_SET (pending_dead_regs); - - /* We must conservatively assume that all registers that used -@@ -1109,7 +1084,7 @@ mark_target_live_regs (rtx_insn *insns, rtx target_maybe_return, struct resource - HARD_REG_SET extra_live; - - REG_SET_TO_HARD_REG_SET (extra_live, DF_LR_IN (bb)); -- IOR_HARD_REG_SET (current_live_regs, extra_live); -+ current_live_regs |= extra_live; - } - } - -@@ -1118,10 +1093,10 @@ mark_target_live_regs (rtx_insn *insns, rtx target_maybe_return, struct resource - are implicitly required at that point. */ - else if (NOTE_P (real_insn) - && NOTE_KIND (real_insn) == NOTE_INSN_EPILOGUE_BEG) -- IOR_HARD_REG_SET (current_live_regs, start_of_epilogue_needs.regs); -+ current_live_regs |= start_of_epilogue_needs.regs; - } - -- COPY_HARD_REG_SET (res->regs, current_live_regs); -+ res->regs = current_live_regs; - if (tinfo != NULL) - { - tinfo->block = b; -@@ -1160,20 +1135,17 @@ mark_target_live_regs (rtx_insn *insns, rtx target_maybe_return, struct resource - { - mark_referenced_resources (insn, &needed, true); - -- COPY_HARD_REG_SET (scratch, needed.regs); -- AND_COMPL_HARD_REG_SET (scratch, set.regs); -- IOR_HARD_REG_SET (new_resources.regs, scratch); -+ scratch = needed.regs & ~set.regs; -+ new_resources.regs |= scratch; - - mark_set_resources (insn, &set, 0, MARK_SRC_DEST_CALL); - } - -- IOR_HARD_REG_SET (res->regs, new_resources.regs); -+ res->regs |= new_resources.regs; - } - - if (tinfo != NULL) -- { -- COPY_HARD_REG_SET (tinfo->live_regs, res->regs); -- } -+ tinfo->live_regs = res->regs; - } - - /* Initialize the resources required by mark_target_live_regs (). -diff --git a/gcc/rtl.c b/gcc/rtl.c -index d7b8e9877..ec65fbb37 100644 ---- a/gcc/rtl.c -+++ b/gcc/rtl.c -@@ -315,10 +315,6 @@ copy_rtx (rtx orig) - return orig; - break; - -- case CLOBBER_HIGH: -- gcc_assert (REG_P (XEXP (orig, 0))); -- return orig; -- - case CONST: - if (shared_const_p (orig)) - return orig; -diff --git a/gcc/rtl.def b/gcc/rtl.def -index f4c9d946c..edb34c5ac 100644 ---- a/gcc/rtl.def -+++ b/gcc/rtl.def -@@ -312,16 +312,6 @@ DEF_RTL_EXPR(USE, "use", "e", RTX_EXTRA) - is considered undeletable before reload. */ - DEF_RTL_EXPR(CLOBBER, "clobber", "e", RTX_EXTRA) - --/* Indicate that the upper parts of something are clobbered in a way that we -- don't want to explain. The MODE references the lower bits that will be -- preserved. Anything above that size will be clobbered. -- -- CLOBBER_HIGH only occurs as the operand of a PARALLEL rtx. It cannot appear -- in other contexts, and unlike CLOBBER, it cannot appear on its own. -- CLOBBER_HIGH can only be used with fixed register rtxes. */ -- --DEF_RTL_EXPR(CLOBBER_HIGH, "clobber_high", "e", RTX_EXTRA) -- - /* Call a subroutine. - Operand 1 is the address to call. - Operand 2 is the number of arguments. */ -@@ -936,6 +926,12 @@ DEF_RTL_EXPR(DEFINE_SPLIT, "define_split", "EsES", RTX_EXTRA) - 7: optionally, a vector of attributes for this insn. */ - DEF_RTL_EXPR(DEFINE_INSN_AND_SPLIT, "define_insn_and_split", "sEsTsESV", RTX_EXTRA) - -+/* A form of define_insn_and_split in which the split insn pattern (operand 5) -+ is determined automatically by replacing match_operands with match_dups -+ and match_operators with match_op_dups. The operands are the same as -+ define_insn_and_split but with operand 5 removed. */ -+DEF_RTL_EXPR(DEFINE_INSN_AND_REWRITE, "define_insn_and_rewrite", "sEsTsSV", RTX_EXTRA) -+ - /* Definition of an RTL peephole operation. - Follows the same arguments as define_split. */ - DEF_RTL_EXPR(DEFINE_PEEPHOLE2, "define_peephole2", "EsES", RTX_EXTRA) -diff --git a/gcc/rtl.h b/gcc/rtl.h -index b4a906f91..6093d42c0 100644 ---- a/gcc/rtl.h -+++ b/gcc/rtl.h -@@ -1623,11 +1623,17 @@ extern const char * const reg_note_name[]; - #define GET_REG_NOTE_NAME(MODE) (reg_note_name[(int) (MODE)]) - - /* This field is only present on CALL_INSNs. It holds a chain of EXPR_LIST of -- USE and CLOBBER expressions. -+ USE, CLOBBER and SET expressions. - USE expressions list the registers filled with arguments that - are passed to the function. - CLOBBER expressions document the registers explicitly clobbered - by this CALL_INSN. -+ SET expressions say that the return value of the call (the SET_DEST) -+ is equivalent to a value available before the call (the SET_SRC). -+ This kind of SET is used when the return value is predictable in -+ advance. It is purely an optimisation hint; unlike USEs and CLOBBERs, -+ it does not affect register liveness. -+ - Pseudo registers cannot be mentioned in this list. */ - #define CALL_INSN_FUNCTION_USAGE(INSN) XEXP(INSN, 7) - -@@ -2392,12 +2398,30 @@ extern int rtx_cost (rtx, machine_mode, enum rtx_code, int, bool); - extern int address_cost (rtx, machine_mode, addr_space_t, bool); - extern void get_full_rtx_cost (rtx, machine_mode, enum rtx_code, int, - struct full_rtx_costs *); -+extern bool native_encode_rtx (machine_mode, rtx, vec &, -+ unsigned int, unsigned int); -+extern rtx native_decode_rtx (machine_mode, vec, -+ unsigned int); -+extern rtx native_decode_vector_rtx (machine_mode, vec, -+ unsigned int, unsigned int, unsigned int); - extern poly_uint64 subreg_lsb (const_rtx); --extern poly_uint64 subreg_lsb_1 (machine_mode, machine_mode, poly_uint64); -+extern poly_uint64 subreg_size_lsb (poly_uint64, poly_uint64, poly_uint64); - extern poly_uint64 subreg_size_offset_from_lsb (poly_uint64, poly_uint64, - poly_uint64); - extern bool read_modify_subreg_p (const_rtx); - -+/* Given a subreg's OUTER_MODE, INNER_MODE, and SUBREG_BYTE, return the -+ bit offset at which the subreg begins (counting from the least significant -+ bit of the operand). */ -+ -+inline poly_uint64 -+subreg_lsb_1 (machine_mode outer_mode, machine_mode inner_mode, -+ poly_uint64 subreg_byte) -+{ -+ return subreg_size_lsb (GET_MODE_SIZE (outer_mode), -+ GET_MODE_SIZE (inner_mode), subreg_byte); -+} -+ - /* Return the subreg byte offset for a subreg whose outer mode is - OUTER_MODE, whose inner mode is INNER_MODE, and where there are - LSB_SHIFT *bits* between the lsb of the outer value and the lsb of -@@ -2645,7 +2669,7 @@ do { \ - - /* For a SET rtx, SET_DEST is the place that is set - and SET_SRC is the value it is set to. */ --#define SET_DEST(RTX) XC3EXP (RTX, 0, SET, CLOBBER, CLOBBER_HIGH) -+#define SET_DEST(RTX) XC2EXP (RTX, 0, SET, CLOBBER) - #define SET_SRC(RTX) XCEXP (RTX, 1, SET) - #define SET_IS_RETURN_P(RTX) \ - (RTL_FLAG_CHECK1 ("SET_IS_RETURN_P", (RTX), SET)->jump) -@@ -3369,8 +3393,7 @@ extern bool val_signbit_known_clear_p (machine_mode, - unsigned HOST_WIDE_INT); - - /* In reginfo.c */ --extern machine_mode choose_hard_reg_mode (unsigned int, unsigned int, -- bool); -+extern machine_mode choose_hard_reg_mode (unsigned int, unsigned int, bool); - extern const HARD_REG_SET &simplifiable_subregs (const subreg_shape &); - - /* In emit-rtl.c */ -@@ -3407,6 +3430,7 @@ extern int rtx_unstable_p (const_rtx); - extern bool rtx_varies_p (const_rtx, bool); - extern bool rtx_addr_varies_p (const_rtx, bool); - extern rtx get_call_rtx_from (rtx); -+extern tree get_call_fndecl (const rtx_insn *); - extern HOST_WIDE_INT get_integer_term (const_rtx); - extern rtx get_related_value (const_rtx); - extern bool offset_within_block_p (const_rtx, HOST_WIDE_INT); -@@ -3435,7 +3459,10 @@ extern void record_hard_reg_sets (rtx, const_rtx, void *); - extern void record_hard_reg_uses (rtx *, void *); - extern void find_all_hard_regs (const_rtx, HARD_REG_SET *); - extern void find_all_hard_reg_sets (const rtx_insn *, HARD_REG_SET *, bool); --extern void note_stores (const_rtx, void (*) (rtx, const_rtx, void *), void *); -+extern void note_pattern_stores (const_rtx, -+ void (*) (rtx, const_rtx, void *), void *); -+extern void note_stores (const rtx_insn *, -+ void (*) (rtx, const_rtx, void *), void *); - extern void note_uses (rtx *, void (*) (rtx *, void *), void *); - extern int dead_or_set_p (const rtx_insn *, const_rtx); - extern int dead_or_set_regno_p (const rtx_insn *, unsigned int); -@@ -3476,16 +3503,6 @@ extern bool tablejump_p (const rtx_insn *, rtx_insn **, rtx_jump_table_data **); - extern int computed_jump_p (const rtx_insn *); - extern bool tls_referenced_p (const_rtx); - extern bool contains_mem_rtx_p (rtx x); --extern bool reg_is_clobbered_by_clobber_high (unsigned int, machine_mode, -- const_rtx); -- --/* Convenient wrapper for reg_is_clobbered_by_clobber_high. */ --inline bool --reg_is_clobbered_by_clobber_high (const_rtx x, const_rtx clobber_high_op) --{ -- return reg_is_clobbered_by_clobber_high (REGNO (x), GET_MODE (x), -- clobber_high_op); --} - - /* Overload for refers_to_regno_p for checking a single register. */ - inline bool -@@ -4279,7 +4296,6 @@ extern void vt_equate_reg_base_value (const_rtx, const_rtx); - extern bool memory_modified_in_insn_p (const_rtx, const_rtx); - extern bool may_be_sp_based_p (rtx); - extern rtx gen_hard_reg_clobber (machine_mode, unsigned int); --extern rtx gen_hard_reg_clobber_high (machine_mode, unsigned int); - extern rtx get_reg_known_value (unsigned int); - extern bool get_reg_known_equiv_p (unsigned int); - extern rtx get_reg_base_value (unsigned int); -@@ -4353,14 +4369,11 @@ extern tree GTY(()) global_regs_decl[FIRST_PSEUDO_REGISTER]; - Available only for functions that has been already assembled. */ - - struct GTY(()) cgraph_rtl_info { -- unsigned int preferred_incoming_stack_boundary; -+ unsigned int preferred_incoming_stack_boundary; - -- /* Call unsaved hard registers really used by the corresponding -- function (including ones used by functions called by the -- function). */ -+ /* Which registers the function clobbers, either directly or by -+ calling another function. */ - HARD_REG_SET function_used_regs; -- /* Set if function_used_regs is valid. */ -- unsigned function_used_regs_valid: 1; - }; - - /* If loads from memories of mode MODE always sign or zero extend, -diff --git a/gcc/rtlanal.c b/gcc/rtlanal.c -index 01af063a2..553d71c1c 100644 ---- a/gcc/rtlanal.c -+++ b/gcc/rtlanal.c -@@ -823,6 +823,24 @@ get_call_rtx_from (rtx x) - return x; - return NULL_RTX; - } -+ -+/* Get the declaration of the function called by INSN. */ -+ -+tree -+get_call_fndecl (const rtx_insn *insn) -+{ -+ rtx note, datum; -+ -+ note = find_reg_note (insn, REG_CALL_DECL, NULL_RTX); -+ if (note == NULL_RTX) -+ return NULL_TREE; -+ -+ datum = XEXP (note, 0); -+ if (datum != NULL_RTX) -+ return SYMBOL_REF_DECL (datum); -+ -+ return NULL_TREE; -+} - - /* Return the value of the integer term in X, if one is apparent; - otherwise return 0. -@@ -1198,10 +1216,6 @@ reg_referenced_p (const_rtx x, const_rtx body) - return 1; - return 0; - -- case CLOBBER_HIGH: -- gcc_assert (REG_P (XEXP (body, 0))); -- return 0; -- - case COND_EXEC: - if (reg_overlap_mentioned_p (x, COND_EXEC_TEST (body))) - return 1; -@@ -1424,11 +1438,7 @@ set_of_1 (rtx x, const_rtx pat, void *data1) - { - struct set_of_data *const data = (struct set_of_data *) (data1); - if (rtx_equal_p (x, data->pat) -- || (GET_CODE (pat) == CLOBBER_HIGH -- && REGNO(data->pat) == REGNO(XEXP (pat, 0)) -- && reg_is_clobbered_by_clobber_high (data->pat, XEXP (pat, 0))) -- || (GET_CODE (pat) != CLOBBER_HIGH && !MEM_P (x) -- && reg_overlap_mentioned_p (data->pat, x))) -+ || (!MEM_P (x) && reg_overlap_mentioned_p (data->pat, x))) - data->found = pat; - } - -@@ -1440,7 +1450,7 @@ set_of (const_rtx pat, const_rtx insn) - struct set_of_data data; - data.found = NULL_RTX; - data.pat = pat; -- note_stores (INSN_P (insn) ? PATTERN (insn) : insn, set_of_1, &data); -+ note_pattern_stores (INSN_P (insn) ? PATTERN (insn) : insn, set_of_1, &data); - return data.found; - } - -@@ -1476,15 +1486,9 @@ find_all_hard_reg_sets (const rtx_insn *insn, HARD_REG_SET *pset, bool implicit) - rtx link; - - CLEAR_HARD_REG_SET (*pset); -- note_stores (PATTERN (insn), record_hard_reg_sets, pset); -- if (CALL_P (insn)) -- { -- if (implicit) -- IOR_HARD_REG_SET (*pset, call_used_reg_set); -- -- for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1)) -- record_hard_reg_sets (XEXP (link, 0), NULL, pset); -- } -+ note_stores (insn, record_hard_reg_sets, pset); -+ if (CALL_P (insn) && implicit) -+ *pset |= call_used_or_fixed_regs; - for (link = REG_NOTES (insn); link; link = XEXP (link, 1)) - if (REG_NOTE_KIND (link) == REG_INC) - record_hard_reg_sets (XEXP (link, 0), NULL, pset); -@@ -1517,7 +1521,6 @@ single_set_2 (const rtx_insn *insn, const_rtx pat) - { - case USE: - case CLOBBER: -- case CLOBBER_HIGH: - break; - - case SET: -@@ -1671,9 +1674,7 @@ noop_move_p (const rtx_insn *insn) - { - rtx tem = XVECEXP (pat, 0, i); - -- if (GET_CODE (tem) == USE -- || GET_CODE (tem) == CLOBBER -- || GET_CODE (tem) == CLOBBER_HIGH) -+ if (GET_CODE (tem) == USE || GET_CODE (tem) == CLOBBER) - continue; - - if (GET_CODE (tem) != SET || ! set_noop_p (tem)) -@@ -1899,16 +1900,15 @@ reg_overlap_mentioned_p (const_rtx x, const_rtx in) - the SUBREG will be passed. */ - - void --note_stores (const_rtx x, void (*fun) (rtx, const_rtx, void *), void *data) -+note_pattern_stores (const_rtx x, -+ void (*fun) (rtx, const_rtx, void *), void *data) - { - int i; - - if (GET_CODE (x) == COND_EXEC) - x = COND_EXEC_CODE (x); - -- if (GET_CODE (x) == SET -- || GET_CODE (x) == CLOBBER -- || GET_CODE (x) == CLOBBER_HIGH) -+ if (GET_CODE (x) == SET || GET_CODE (x) == CLOBBER) - { - rtx dest = SET_DEST (x); - -@@ -1933,7 +1933,22 @@ note_stores (const_rtx x, void (*fun) (rtx, const_rtx, void *), void *data) - - else if (GET_CODE (x) == PARALLEL) - for (i = XVECLEN (x, 0) - 1; i >= 0; i--) -- note_stores (XVECEXP (x, 0, i), fun, data); -+ note_pattern_stores (XVECEXP (x, 0, i), fun, data); -+} -+ -+/* Same, but for an instruction. If the instruction is a call, include -+ any CLOBBERs in its CALL_INSN_FUNCTION_USAGE. */ -+ -+void -+note_stores (const rtx_insn *insn, -+ void (*fun) (rtx, const_rtx, void *), void *data) -+{ -+ if (CALL_P (insn)) -+ for (rtx link = CALL_INSN_FUNCTION_USAGE (insn); -+ link; link = XEXP (link, 1)) -+ if (GET_CODE (XEXP (link, 0)) == CLOBBER) -+ note_pattern_stores (XEXP (link, 0), fun, data); -+ note_pattern_stores (PATTERN (insn), fun, data); - } - - /* Like notes_stores, but call FUN for each expression that is being -@@ -3611,23 +3626,31 @@ loc_mentioned_in_p (rtx *loc, const_rtx in) - return 0; - } - --/* Helper function for subreg_lsb. Given a subreg's OUTER_MODE, INNER_MODE, -- and SUBREG_BYTE, return the bit offset where the subreg begins -- (counting from the least significant bit of the operand). */ -+/* Reinterpret a subreg as a bit extraction from an integer and return -+ the position of the least significant bit of the extracted value. -+ In other words, if the extraction were performed as a shift right -+ and mask, return the number of bits to shift right. -+ -+ The outer value of the subreg has OUTER_BYTES bytes and starts at -+ byte offset SUBREG_BYTE within an inner value of INNER_BYTES bytes. */ - - poly_uint64 --subreg_lsb_1 (machine_mode outer_mode, -- machine_mode inner_mode, -- poly_uint64 subreg_byte) -+subreg_size_lsb (poly_uint64 outer_bytes, -+ poly_uint64 inner_bytes, -+ poly_uint64 subreg_byte) - { - poly_uint64 subreg_end, trailing_bytes, byte_pos; - - /* A paradoxical subreg begins at bit position 0. */ -- if (paradoxical_subreg_p (outer_mode, inner_mode)) -- return 0; -+ gcc_checking_assert (ordered_p (outer_bytes, inner_bytes)); -+ if (maybe_gt (outer_bytes, inner_bytes)) -+ { -+ gcc_checking_assert (known_eq (subreg_byte, 0U)); -+ return 0; -+ } - -- subreg_end = subreg_byte + GET_MODE_SIZE (outer_mode); -- trailing_bytes = GET_MODE_SIZE (inner_mode) - subreg_end; -+ subreg_end = subreg_byte + outer_bytes; -+ trailing_bytes = inner_bytes - subreg_end; - if (WORDS_BIG_ENDIAN && BYTES_BIG_ENDIAN) - byte_pos = trailing_bytes; - else if (!WORDS_BIG_ENDIAN && !BYTES_BIG_ENDIAN) -@@ -4123,7 +4146,7 @@ find_first_parameter_load (rtx_insn *call_insn, rtx_insn *boundary) - if (INSN_P (before)) - { - int nregs_old = parm.nregs; -- note_stores (PATTERN (before), parms_set, &parm); -+ note_stores (before, parms_set, &parm); - /* If we found something that did not set a parameter reg, - we're done. Do not keep going, as that might result - in hoisting an insn before the setting of a pseudo -@@ -6601,32 +6624,3 @@ tls_referenced_p (const_rtx x) - return true; - return false; - } -- --/* Return true if reg REGNO with mode REG_MODE would be clobbered by the -- clobber_high operand in CLOBBER_HIGH_OP. */ -- --bool --reg_is_clobbered_by_clobber_high (unsigned int regno, machine_mode reg_mode, -- const_rtx clobber_high_op) --{ -- unsigned int clobber_regno = REGNO (clobber_high_op); -- machine_mode clobber_mode = GET_MODE (clobber_high_op); -- unsigned char regno_nregs = hard_regno_nregs (regno, reg_mode); -- -- /* Clobber high should always span exactly one register. */ -- gcc_assert (REG_NREGS (clobber_high_op) == 1); -- -- /* Clobber high needs to match with one of the registers in X. */ -- if (clobber_regno < regno || clobber_regno >= regno + regno_nregs) -- return false; -- -- gcc_assert (reg_mode != BLKmode && clobber_mode != BLKmode); -- -- if (reg_mode == VOIDmode) -- return clobber_mode != VOIDmode; -- -- /* Clobber high will clobber if its size might be greater than the size of -- register regno. */ -- return maybe_gt (exact_div (GET_MODE_SIZE (reg_mode), regno_nregs), -- GET_MODE_SIZE (clobber_mode)); --} -diff --git a/gcc/rtx-vector-builder.h b/gcc/rtx-vector-builder.h -index d5950e2b8..08b55dd36 100644 ---- a/gcc/rtx-vector-builder.h -+++ b/gcc/rtx-vector-builder.h -@@ -24,10 +24,11 @@ along with GCC; see the file COPYING3. If not see - - /* This class is used to build VECTOR_CSTs from a sequence of elements. - See vector_builder for more details. */ --class rtx_vector_builder : public vector_builder -+class rtx_vector_builder : public vector_builder - { -- typedef vector_builder parent; -- friend class vector_builder; -+ typedef vector_builder parent; -+ friend class vector_builder; - - public: - rtx_vector_builder () : m_mode (VOIDmode) {} -@@ -48,6 +49,15 @@ private: - bool can_elide_p (rtx) const { return true; } - void note_representative (rtx *, rtx) {} - -+ static poly_uint64 shape_nelts (machine_mode mode) -+ { return GET_MODE_NUNITS (mode); } -+ static poly_uint64 nelts_of (const_rtx x) -+ { return CONST_VECTOR_NUNITS (x); } -+ static unsigned int npatterns_of (const_rtx x) -+ { return CONST_VECTOR_NPATTERNS (x); } -+ static unsigned int nelts_per_pattern_of (const_rtx x) -+ { return CONST_VECTOR_NELTS_PER_PATTERN (x); } -+ - rtx find_cached_value (); - - machine_mode m_mode; -diff --git a/gcc/sched-deps.c b/gcc/sched-deps.c -index 28b9d38ab..fe447d16a 100644 ---- a/gcc/sched-deps.c -+++ b/gcc/sched-deps.c -@@ -38,6 +38,7 @@ along with GCC; see the file COPYING3. If not see - #include "sched-int.h" - #include "params.h" - #include "cselib.h" -+#include "function-abi.h" - - #ifdef INSN_SCHEDULING - -@@ -2203,9 +2204,9 @@ init_insn_reg_pressure_info (rtx_insn *insn) - reg_pressure_info[cl].change = 0; - } - -- note_stores (PATTERN (insn), mark_insn_reg_clobber, insn); -+ note_stores (insn, mark_insn_reg_clobber, insn); - -- note_stores (PATTERN (insn), mark_insn_reg_store, insn); -+ note_stores (insn, mark_insn_reg_store, insn); - - if (AUTO_INC_DEC) - for (link = REG_NOTES (insn); link; link = XEXP (link, 1)) -@@ -2319,13 +2320,6 @@ sched_analyze_reg (struct deps_desc *deps, int regno, machine_mode mode, - while (--i >= 0) - note_reg_use (regno + i); - } -- else if (ref == CLOBBER_HIGH) -- { -- gcc_assert (i == 1); -- /* We don't know the current state of the register, so have to treat -- the clobber high as a full clobber. */ -- note_reg_clobber (regno); -- } - else - { - while (--i >= 0) -@@ -2349,8 +2343,6 @@ sched_analyze_reg (struct deps_desc *deps, int regno, machine_mode mode, - else if (ref == USE) - note_reg_use (regno); - else -- /* For CLOBBER_HIGH, we don't know the current state of the register, -- so have to treat it as a full clobber. */ - note_reg_clobber (regno); - - /* Pseudos that are REG_EQUIV to something may be replaced -@@ -2885,7 +2877,7 @@ get_implicit_reg_pending_clobbers (HARD_REG_SET *temp, rtx_insn *insn) - preprocess_constraints (insn); - alternative_mask preferred = get_preferred_alternatives (insn); - ira_implicitly_set_insn_hard_regs (temp, preferred); -- AND_COMPL_HARD_REG_SET (*temp, ira_no_alloc_regs); -+ *temp &= ~ira_no_alloc_regs; - } - - /* Analyze an INSN with pattern X to find all dependencies. */ -@@ -2901,7 +2893,7 @@ sched_analyze_insn (struct deps_desc *deps, rtx x, rtx_insn *insn) - { - HARD_REG_SET temp; - get_implicit_reg_pending_clobbers (&temp, insn); -- IOR_HARD_REG_SET (implicit_reg_pending_clobbers, temp); -+ implicit_reg_pending_clobbers |= temp; - } - - can_start_lhs_rhs_p = (NONJUMP_INSN_P (insn) -@@ -2973,7 +2965,7 @@ sched_analyze_insn (struct deps_desc *deps, rtx x, rtx_insn *insn) - sub = COND_EXEC_CODE (sub); - code = GET_CODE (sub); - } -- else if (code == SET || code == CLOBBER || code == CLOBBER_HIGH) -+ else if (code == SET || code == CLOBBER) - sched_analyze_1 (deps, sub, insn); - else - sched_analyze_2 (deps, sub, insn); -@@ -2989,10 +2981,6 @@ sched_analyze_insn (struct deps_desc *deps, rtx x, rtx_insn *insn) - { - if (GET_CODE (XEXP (link, 0)) == CLOBBER) - sched_analyze_1 (deps, XEXP (link, 0), insn); -- else if (GET_CODE (XEXP (link, 0)) == CLOBBER_HIGH) -- /* We could support CLOBBER_HIGH and treat it in the same way as -- HARD_REGNO_CALL_PART_CLOBBERED, but no port needs that yet. */ -- gcc_unreachable (); - else if (GET_CODE (XEXP (link, 0)) != SET) - sched_analyze_2 (deps, XEXP (link, 0), insn); - } -@@ -3332,10 +3320,9 @@ sched_analyze_insn (struct deps_desc *deps, rtx x, rtx_insn *insn) - IOR_REG_SET (&deps->reg_last_in_use, reg_pending_uses); - IOR_REG_SET (&deps->reg_last_in_use, reg_pending_clobbers); - IOR_REG_SET (&deps->reg_last_in_use, reg_pending_sets); -- for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) -- if (TEST_HARD_REG_BIT (implicit_reg_pending_uses, i) -- || TEST_HARD_REG_BIT (implicit_reg_pending_clobbers, i)) -- SET_REGNO_REG_SET (&deps->reg_last_in_use, i); -+ IOR_REG_SET_HRS (&deps->reg_last_in_use, -+ implicit_reg_pending_uses -+ | implicit_reg_pending_clobbers); - - /* Set up the pending barrier found. */ - deps->last_reg_pending_barrier = reg_pending_barrier; -@@ -3724,6 +3711,7 @@ deps_analyze_insn (struct deps_desc *deps, rtx_insn *insn) - } - else - { -+ function_abi callee_abi = insn_callee_abi (insn); - for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) - /* A call may read and modify global register variables. */ - if (global_regs[i]) -@@ -3735,8 +3723,8 @@ deps_analyze_insn (struct deps_desc *deps, rtx_insn *insn) - Since we only have a choice between 'might be clobbered' - and 'definitely not clobbered', we must include all - partly call-clobbered registers here. */ -- else if (targetm.hard_regno_call_part_clobbered (insn, i, -- reg_raw_mode[i]) -+ else if (targetm.hard_regno_call_part_clobbered -+ (callee_abi.id (), i, reg_raw_mode[i]) - || TEST_HARD_REG_BIT (regs_invalidated_by_call, i)) - SET_REGNO_REG_SET (reg_pending_clobbers, i); - /* We don't know what set of fixed registers might be used -diff --git a/gcc/sched-rgn.c b/gcc/sched-rgn.c -index 83688b3c9..c5ee33bf5 100644 ---- a/gcc/sched-rgn.c -+++ b/gcc/sched-rgn.c -@@ -2409,7 +2409,7 @@ static bool - sets_likely_spilled (rtx pat) - { - bool ret = false; -- note_stores (pat, sets_likely_spilled_1, &ret); -+ note_pattern_stores (pat, sets_likely_spilled_1, &ret); - return ret; - } - -diff --git a/gcc/sel-sched-ir.c b/gcc/sel-sched-ir.c -index 6dec1beaa..f8f1d8238 100644 ---- a/gcc/sel-sched-ir.c -+++ b/gcc/sel-sched-ir.c -@@ -2661,12 +2661,9 @@ setup_id_implicit_regs (idata_t id, insn_t insn) - return; - - HARD_REG_SET temp; -- unsigned regno; -- hard_reg_set_iterator hrsi; - - get_implicit_reg_pending_clobbers (&temp, insn); -- EXECUTE_IF_SET_IN_HARD_REG_SET (temp, 0, regno, hrsi) -- SET_REGNO_REG_SET (IDATA_REG_SETS (id), regno); -+ IOR_REG_SET_HRS (IDATA_REG_SETS (id), temp); - } - - /* Setup register sets describing INSN in ID. */ -diff --git a/gcc/sel-sched.c b/gcc/sel-sched.c -index f127ff745..bf370b5a5 100644 ---- a/gcc/sel-sched.c -+++ b/gcc/sel-sched.c -@@ -1102,7 +1102,7 @@ init_regs_for_mode (machine_mode mode) - if (i >= 0) - continue; - -- if (targetm.hard_regno_call_part_clobbered (NULL, cur_reg, mode)) -+ if (targetm.hard_regno_call_part_clobbered (0, cur_reg, mode)) - SET_HARD_REG_BIT (sel_hrd.regs_for_call_clobbered[mode], - cur_reg); - -@@ -1123,7 +1123,7 @@ init_hard_regs_data (void) - - CLEAR_HARD_REG_SET (sel_hrd.regs_ever_used); - for (cur_reg = 0; cur_reg < FIRST_PSEUDO_REGISTER; cur_reg++) -- if (df_regs_ever_live_p (cur_reg) || call_used_regs[cur_reg]) -+ if (df_regs_ever_live_p (cur_reg) || call_used_or_fixed_reg_p (cur_reg)) - SET_HARD_REG_BIT (sel_hrd.regs_ever_used, cur_reg); - - /* Initialize registers that are valid based on mode when this is -@@ -1221,15 +1221,13 @@ mark_unavailable_hard_regs (def_t def, struct reg_rename *reg_rename_p, - The HARD_REGNO_RENAME_OK covers other cases in condition below. */ - if (IN_RANGE (REGNO (orig_dest), FIRST_STACK_REG, LAST_STACK_REG) - && REGNO_REG_SET_P (used_regs, FIRST_STACK_REG)) -- IOR_HARD_REG_SET (reg_rename_p->unavailable_hard_regs, -- sel_hrd.stack_regs); -+ reg_rename_p->unavailable_hard_regs |= sel_hrd.stack_regs; - #endif - -- /* If there's a call on this path, make regs from call_used_reg_set -+ /* If there's a call on this path, make regs from call_used_or_fixed_regs - unavailable. */ - if (def->crosses_call) -- IOR_HARD_REG_SET (reg_rename_p->unavailable_hard_regs, -- call_used_reg_set); -+ reg_rename_p->unavailable_hard_regs |= call_used_or_fixed_regs; - - /* Stop here before reload: we need FRAME_REGS, STACK_REGS, and crosses_call, - but not register classes. */ -@@ -1238,22 +1236,20 @@ mark_unavailable_hard_regs (def_t def, struct reg_rename *reg_rename_p, - - /* Leave regs as 'available' only from the current - register class. */ -- COPY_HARD_REG_SET (reg_rename_p->available_for_renaming, -- reg_class_contents[cl]); -+ reg_rename_p->available_for_renaming = reg_class_contents[cl]; - - mode = GET_MODE (orig_dest); - - /* Leave only registers available for this mode. */ - if (!sel_hrd.regs_for_mode_ok[mode]) - init_regs_for_mode (mode); -- AND_HARD_REG_SET (reg_rename_p->available_for_renaming, -- sel_hrd.regs_for_mode[mode]); -+ reg_rename_p->available_for_renaming &= sel_hrd.regs_for_mode[mode]; - - /* Exclude registers that are partially call clobbered. */ - if (def->crosses_call -- && !targetm.hard_regno_call_part_clobbered (NULL, regno, mode)) -- AND_COMPL_HARD_REG_SET (reg_rename_p->available_for_renaming, -- sel_hrd.regs_for_call_clobbered[mode]); -+ && !targetm.hard_regno_call_part_clobbered (0, regno, mode)) -+ reg_rename_p->available_for_renaming -+ &= ~sel_hrd.regs_for_call_clobbered[mode]; - - /* Leave only those that are ok to rename. */ - EXECUTE_IF_SET_IN_HARD_REG_SET (reg_rename_p->available_for_renaming, -@@ -1274,8 +1270,7 @@ mark_unavailable_hard_regs (def_t def, struct reg_rename *reg_rename_p, - cur_reg); - } - -- AND_COMPL_HARD_REG_SET (reg_rename_p->available_for_renaming, -- reg_rename_p->unavailable_hard_regs); -+ reg_rename_p->available_for_renaming &= ~reg_rename_p->unavailable_hard_regs; - - /* Regno is always ok from the renaming part of view, but it really - could be in *unavailable_hard_regs already, so set it here instead -@@ -1686,8 +1681,7 @@ find_best_reg_for_expr (expr_t expr, blist_t bnds, bool *is_orig_reg_p) - - /* Join hard registers unavailable due to register class - restrictions and live range intersection. */ -- IOR_HARD_REG_SET (hard_regs_used, -- reg_rename_data.unavailable_hard_regs); -+ hard_regs_used |= reg_rename_data.unavailable_hard_regs; - - best_reg = choose_best_reg (hard_regs_used, ®_rename_data, - original_insns, is_orig_reg_p); -@@ -2110,7 +2104,7 @@ implicit_clobber_conflict_p (insn_t through_insn, expr_t expr) - preprocess_constraints (insn); - alternative_mask prefrred = get_preferred_alternatives (insn); - ira_implicitly_set_insn_hard_regs (&temp, prefrred); -- AND_COMPL_HARD_REG_SET (temp, ira_no_alloc_regs); -+ temp &= ~ira_no_alloc_regs; - - /* If any implicit clobber registers intersect with regular ones in - through_insn, we have a dependency and thus bail out. */ -diff --git a/gcc/shrink-wrap.c b/gcc/shrink-wrap.c -index 57124db92..018696637 100644 ---- a/gcc/shrink-wrap.c -+++ b/gcc/shrink-wrap.c -@@ -76,7 +76,7 @@ requires_stack_frame_p (rtx_insn *insn, HARD_REG_SET prologue_used, - } - if (hard_reg_set_intersect_p (hardregs, prologue_used)) - return true; -- AND_COMPL_HARD_REG_SET (hardregs, call_used_reg_set); -+ hardregs &= ~call_used_or_fixed_regs; - for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) - if (TEST_HARD_REG_BIT (hardregs, regno) - && df_regs_ever_live_p (regno)) -@@ -151,8 +151,8 @@ live_edge_for_reg (basic_block bb, int regno, int end_regno) - - static bool - move_insn_for_shrink_wrap (basic_block bb, rtx_insn *insn, -- const HARD_REG_SET uses, -- const HARD_REG_SET defs, -+ const_hard_reg_set uses, -+ const_hard_reg_set defs, - bool *split_p, - struct dead_debug_local *debug) - { -@@ -687,9 +687,9 @@ try_shrink_wrapping (edge *entry_edge, rtx_insn *prologue_seq) - HARD_REG_SET this_used; - CLEAR_HARD_REG_SET (this_used); - note_uses (&PATTERN (insn), record_hard_reg_uses, &this_used); -- AND_COMPL_HARD_REG_SET (this_used, prologue_clobbered); -- IOR_HARD_REG_SET (prologue_used, this_used); -- note_stores (PATTERN (insn), record_hard_reg_sets, &prologue_clobbered); -+ this_used &= ~prologue_clobbered; -+ prologue_used |= this_used; -+ note_stores (insn, record_hard_reg_sets, &prologue_clobbered); - } - CLEAR_HARD_REG_BIT (prologue_clobbered, STACK_POINTER_REGNUM); - if (frame_pointer_needed) -diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c -index bdbd1b98e..612d21b72 100644 ---- a/gcc/simplify-rtx.c -+++ b/gcc/simplify-rtx.c -@@ -35,6 +35,7 @@ along with GCC; see the file COPYING3. If not see - #include "flags.h" - #include "selftest.h" - #include "selftest-rtl.h" -+#include "rtx-vector-builder.h" - - /* Simplification and canonicalization of RTL. */ - -@@ -45,7 +46,6 @@ along with GCC; see the file COPYING3. If not see - #define HWI_SIGN_EXTEND(low) \ - ((((HOST_WIDE_INT) low) < 0) ? HOST_WIDE_INT_M1 : HOST_WIDE_INT_0) - --static rtx neg_const_int (machine_mode, const_rtx); - static bool plus_minus_operand_p (const_rtx); - static rtx simplify_plus_minus (enum rtx_code, machine_mode, rtx, rtx); - static rtx simplify_associative_operation (enum rtx_code, machine_mode, -@@ -56,17 +56,12 @@ static rtx simplify_unary_operation_1 (enum rtx_code, machine_mode, rtx); - static rtx simplify_binary_operation_1 (enum rtx_code, machine_mode, - rtx, rtx, rtx, rtx); - --/* Negate a CONST_INT rtx. */ -+/* Negate I, which satisfies poly_int_rtx_p. MODE is the mode of I. */ -+ - static rtx --neg_const_int (machine_mode mode, const_rtx i) -+neg_poly_int_rtx (machine_mode mode, const_rtx i) - { -- unsigned HOST_WIDE_INT val = -UINTVAL (i); -- -- if (!HWI_COMPUTABLE_MODE_P (mode) -- && val == UINTVAL (i)) -- return simplify_const_unary_operation (NEG, mode, CONST_CAST_RTX (i), -- mode); -- return gen_int_mode (val, mode); -+ return immed_wide_int_const (-wi::to_poly_wide (i, mode), mode); - } - - /* Test whether expression, X, is an immediate constant that represents -@@ -1504,12 +1499,12 @@ simplify_unary_operation_1 (enum rtx_code code, machine_mode mode, rtx op) - && CONST_INT_P (XEXP (op, 1)) - && XEXP (XEXP (op, 0), 1) == XEXP (op, 1) - && (op_mode = as_a (GET_MODE (op)), -- GET_MODE_BITSIZE (op_mode) > INTVAL (XEXP (op, 1)))) -+ GET_MODE_PRECISION (op_mode) > INTVAL (XEXP (op, 1)))) - { - scalar_int_mode tmode; -- gcc_assert (GET_MODE_BITSIZE (int_mode) -- > GET_MODE_BITSIZE (op_mode)); -- if (int_mode_for_size (GET_MODE_BITSIZE (op_mode) -+ gcc_assert (GET_MODE_PRECISION (int_mode) -+ > GET_MODE_PRECISION (op_mode)); -+ if (int_mode_for_size (GET_MODE_PRECISION (op_mode) - - INTVAL (XEXP (op, 1)), 1).exists (&tmode)) - { - rtx inner = -@@ -1735,45 +1730,42 @@ simplify_const_unary_operation (enum rtx_code code, machine_mode mode, - } - if (CONST_SCALAR_INT_P (op) || CONST_DOUBLE_AS_FLOAT_P (op)) - return gen_const_vec_duplicate (mode, op); -- unsigned int n_elts; - if (GET_CODE (op) == CONST_VECTOR -- && GET_MODE_NUNITS (mode).is_constant (&n_elts)) -- { -- /* This must be constant if we're duplicating it to a constant -- number of elements. */ -- unsigned int in_n_elts = CONST_VECTOR_NUNITS (op).to_constant (); -- gcc_assert (in_n_elts < n_elts); -- gcc_assert ((n_elts % in_n_elts) == 0); -- rtvec v = rtvec_alloc (n_elts); -- for (unsigned i = 0; i < n_elts; i++) -- RTVEC_ELT (v, i) = CONST_VECTOR_ELT (op, i % in_n_elts); -- return gen_rtx_CONST_VECTOR (mode, v); -+ && (CONST_VECTOR_DUPLICATE_P (op) -+ || CONST_VECTOR_NUNITS (op).is_constant ())) -+ { -+ unsigned int npatterns = (CONST_VECTOR_DUPLICATE_P (op) -+ ? CONST_VECTOR_NPATTERNS (op) -+ : CONST_VECTOR_NUNITS (op).to_constant ()); -+ gcc_assert (multiple_p (GET_MODE_NUNITS (mode), npatterns)); -+ rtx_vector_builder builder (mode, npatterns, 1); -+ for (unsigned i = 0; i < npatterns; i++) -+ builder.quick_push (CONST_VECTOR_ELT (op, i)); -+ return builder.build (); - } - } - -- if (VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR) -+ if (VECTOR_MODE_P (mode) -+ && GET_CODE (op) == CONST_VECTOR -+ && known_eq (GET_MODE_NUNITS (mode), CONST_VECTOR_NUNITS (op))) - { -- unsigned int n_elts; -- if (!CONST_VECTOR_NUNITS (op).is_constant (&n_elts)) -- return NULL_RTX; -+ gcc_assert (GET_MODE (op) == op_mode); - -- machine_mode opmode = GET_MODE (op); -- gcc_assert (known_eq (GET_MODE_NUNITS (mode), n_elts)); -- gcc_assert (known_eq (GET_MODE_NUNITS (opmode), n_elts)); -- -- rtvec v = rtvec_alloc (n_elts); -- unsigned int i; -+ rtx_vector_builder builder; -+ if (!builder.new_unary_operation (mode, op, false)) -+ return 0; - -- for (i = 0; i < n_elts; i++) -+ unsigned int count = builder.encoded_nelts (); -+ for (unsigned int i = 0; i < count; i++) - { - rtx x = simplify_unary_operation (code, GET_MODE_INNER (mode), - CONST_VECTOR_ELT (op, i), -- GET_MODE_INNER (opmode)); -+ GET_MODE_INNER (op_mode)); - if (!x || !valid_for_const_vector_p (mode, x)) - return 0; -- RTVEC_ELT (v, i) = x; -+ builder.quick_push (x); - } -- return gen_rtx_CONST_VECTOR (mode, v); -+ return builder.build (); - } - - /* The order of these tests is critical so that, for example, we don't -@@ -2549,10 +2541,10 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode, - return plus_constant (mode, op0, trunc_int_for_mode (-offset, mode)); - - /* Don't let a relocatable value get a negative coeff. */ -- if (CONST_INT_P (op1) && GET_MODE (op0) != VOIDmode) -+ if (poly_int_rtx_p (op1) && GET_MODE (op0) != VOIDmode) - return simplify_gen_binary (PLUS, mode, - op0, -- neg_const_int (mode, op1)); -+ neg_poly_int_rtx (mode, op1)); - - /* (x - (x & y)) -> (x & ~y) */ - if (INTEGRAL_MODE_P (mode) && GET_CODE (op1) == AND) -@@ -4071,6 +4063,27 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode, - return 0; - } - -+/* Return true if binary operation OP distributes over addition in operand -+ OPNO, with the other operand being held constant. OPNO counts from 1. */ -+ -+static bool -+distributes_over_addition_p (rtx_code op, int opno) -+{ -+ switch (op) -+ { -+ case PLUS: -+ case MINUS: -+ case MULT: -+ return true; -+ -+ case ASHIFT: -+ return opno == 1; -+ -+ default: -+ return false; -+ } -+} -+ - rtx - simplify_const_binary_operation (enum rtx_code code, machine_mode mode, - rtx op0, rtx op1) -@@ -4080,26 +4093,45 @@ simplify_const_binary_operation (enum rtx_code code, machine_mode mode, - && GET_CODE (op0) == CONST_VECTOR - && GET_CODE (op1) == CONST_VECTOR) - { -- unsigned int n_elts; -- if (!CONST_VECTOR_NUNITS (op0).is_constant (&n_elts)) -- return NULL_RTX; -- -- gcc_assert (known_eq (n_elts, CONST_VECTOR_NUNITS (op1))); -- gcc_assert (known_eq (n_elts, GET_MODE_NUNITS (mode))); -- rtvec v = rtvec_alloc (n_elts); -- unsigned int i; -+ bool step_ok_p; -+ if (CONST_VECTOR_STEPPED_P (op0) -+ && CONST_VECTOR_STEPPED_P (op1)) -+ /* We can operate directly on the encoding if: -+ -+ a3 - a2 == a2 - a1 && b3 - b2 == b2 - b1 -+ implies -+ (a3 op b3) - (a2 op b2) == (a2 op b2) - (a1 op b1) -+ -+ Addition and subtraction are the supported operators -+ for which this is true. */ -+ step_ok_p = (code == PLUS || code == MINUS); -+ else if (CONST_VECTOR_STEPPED_P (op0)) -+ /* We can operate directly on stepped encodings if: -+ -+ a3 - a2 == a2 - a1 -+ implies: -+ (a3 op c) - (a2 op c) == (a2 op c) - (a1 op c) -+ -+ which is true if (x -> x op c) distributes over addition. */ -+ step_ok_p = distributes_over_addition_p (code, 1); -+ else -+ /* Similarly in reverse. */ -+ step_ok_p = distributes_over_addition_p (code, 2); -+ rtx_vector_builder builder; -+ if (!builder.new_binary_operation (mode, op0, op1, step_ok_p)) -+ return 0; - -- for (i = 0; i < n_elts; i++) -+ unsigned int count = builder.encoded_nelts (); -+ for (unsigned int i = 0; i < count; i++) - { - rtx x = simplify_binary_operation (code, GET_MODE_INNER (mode), - CONST_VECTOR_ELT (op0, i), - CONST_VECTOR_ELT (op1, i)); - if (!x || !valid_for_const_vector_p (mode, x)) - return 0; -- RTVEC_ELT (v, i) = x; -+ builder.quick_push (x); - } -- -- return gen_rtx_CONST_VECTOR (mode, v); -+ return builder.build (); - } - - if (VECTOR_MODE_P (mode) -@@ -4593,11 +4625,12 @@ simplify_plus_minus (enum rtx_code code, machine_mode mode, rtx op0, - } - break; - -- case CONST_INT: -+ CASE_CONST_SCALAR_INT: -+ case CONST_POLY_INT: - n_constants++; - if (this_neg) - { -- ops[i].op = neg_const_int (mode, this_op); -+ ops[i].op = neg_poly_int_rtx (mode, this_op); - ops[i].neg = 0; - changed = 1; - canonicalized = 1; -@@ -4722,8 +4755,8 @@ simplify_plus_minus (enum rtx_code code, machine_mode mode, rtx op0, - lneg &= rneg; - if (GET_CODE (tem) == NEG) - tem = XEXP (tem, 0), lneg = !lneg; -- if (CONST_INT_P (tem) && lneg) -- tem = neg_const_int (mode, tem), lneg = 0; -+ if (poly_int_rtx_p (tem) && lneg) -+ tem = neg_poly_int_rtx (mode, tem), lneg = 0; - - ops[i].op = tem; - ops[i].neg = lneg; -@@ -4782,12 +4815,12 @@ simplify_plus_minus (enum rtx_code code, machine_mode mode, rtx op0, - in the array and that any other constant will be next-to-last. */ - - if (n_ops > 1 -- && CONST_INT_P (ops[n_ops - 1].op) -+ && poly_int_rtx_p (ops[n_ops - 1].op) - && CONSTANT_P (ops[n_ops - 2].op)) - { - rtx value = ops[n_ops - 1].op; - if (ops[n_ops - 1].neg ^ ops[n_ops - 2].neg) -- value = neg_const_int (mode, value); -+ value = neg_poly_int_rtx (mode, value); - if (CONST_INT_P (value)) - { - ops[n_ops - 2].op = plus_constant (mode, ops[n_ops - 2].op, -@@ -6104,342 +6137,466 @@ simplify_ternary_operation (enum rtx_code code, machine_mode mode, - return 0; - } - --/* Evaluate a SUBREG of a CONST_INT or CONST_WIDE_INT or CONST_DOUBLE -- or CONST_FIXED or CONST_VECTOR, returning another CONST_INT or -- CONST_WIDE_INT or CONST_DOUBLE or CONST_FIXED or CONST_VECTOR. -+/* Try to calculate NUM_BYTES bytes of the target memory image of X, -+ starting at byte FIRST_BYTE. Return true on success and add the -+ bytes to BYTES, such that each byte has BITS_PER_UNIT bits and such -+ that the bytes follow target memory order. Leave BYTES unmodified -+ on failure. - -- Works by unpacking INNER_BYTES bytes of OP into a collection of 8-bit values -- represented as a little-endian array of 'unsigned char', selecting by BYTE, -- and then repacking them again for OUTERMODE. If OP is a CONST_VECTOR, -- FIRST_ELEM is the number of the first element to extract, otherwise -- FIRST_ELEM is ignored. */ -+ MODE is the mode of X. The caller must reserve NUM_BYTES bytes in -+ BYTES before calling this function. */ - --static rtx --simplify_immed_subreg (fixed_size_mode outermode, rtx op, -- machine_mode innermode, unsigned int byte, -- unsigned int first_elem, unsigned int inner_bytes) -+bool -+native_encode_rtx (machine_mode mode, rtx x, vec &bytes, -+ unsigned int first_byte, unsigned int num_bytes) - { -- enum { -- value_bit = 8, -- value_mask = (1 << value_bit) - 1 -- }; -- unsigned char value[MAX_BITSIZE_MODE_ANY_MODE / value_bit]; -- int value_start; -- int i; -- int elem; -- -- int num_elem; -- rtx * elems; -- int elem_bitsize; -- rtx result_s = NULL; -- rtvec result_v = NULL; -- enum mode_class outer_class; -- scalar_mode outer_submode; -- int max_bitsize; -+ /* Check the mode is sensible. */ -+ gcc_assert (GET_MODE (x) == VOIDmode -+ ? is_a (mode) -+ : mode == GET_MODE (x)); - -- /* Some ports misuse CCmode. */ -- if (GET_MODE_CLASS (outermode) == MODE_CC && CONST_INT_P (op)) -- return op; -+ if (GET_CODE (x) == CONST_VECTOR) -+ { -+ /* CONST_VECTOR_ELT follows target memory order, so no shuffling -+ is necessary. The only complication is that MODE_VECTOR_BOOL -+ vectors can have several elements per byte. */ -+ unsigned int elt_bits = vector_element_size (GET_MODE_BITSIZE (mode), -+ GET_MODE_NUNITS (mode)); -+ unsigned int elt = first_byte * BITS_PER_UNIT / elt_bits; -+ if (elt_bits < BITS_PER_UNIT) -+ { -+ /* This is the only case in which elements can be smaller than -+ a byte. */ -+ gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL); -+ for (unsigned int i = 0; i < num_bytes; ++i) -+ { -+ target_unit value = 0; -+ for (unsigned int j = 0; j < BITS_PER_UNIT; j += elt_bits) -+ { -+ value |= (INTVAL (CONST_VECTOR_ELT (x, elt)) & 1) << j; -+ elt += 1; -+ } -+ bytes.quick_push (value); -+ } -+ return true; -+ } - -- /* We have no way to represent a complex constant at the rtl level. */ -- if (COMPLEX_MODE_P (outermode)) -- return NULL_RTX; -+ unsigned int start = bytes.length (); -+ unsigned int elt_bytes = GET_MODE_UNIT_SIZE (mode); -+ /* Make FIRST_BYTE relative to ELT. */ -+ first_byte %= elt_bytes; -+ while (num_bytes > 0) -+ { -+ /* Work out how many bytes we want from element ELT. */ -+ unsigned int chunk_bytes = MIN (num_bytes, elt_bytes - first_byte); -+ if (!native_encode_rtx (GET_MODE_INNER (mode), -+ CONST_VECTOR_ELT (x, elt), bytes, -+ first_byte, chunk_bytes)) -+ { -+ bytes.truncate (start); -+ return false; -+ } -+ elt += 1; -+ first_byte = 0; -+ num_bytes -= chunk_bytes; -+ } -+ return true; -+ } - -- /* We support any size mode. */ -- max_bitsize = MAX (GET_MODE_BITSIZE (outermode), -- inner_bytes * BITS_PER_UNIT); -+ /* All subsequent cases are limited to scalars. */ -+ scalar_mode smode; -+ if (!is_a (mode, &smode)) -+ return false; - -- /* Unpack the value. */ -+ /* Make sure that the region is in range. */ -+ unsigned int end_byte = first_byte + num_bytes; -+ unsigned int mode_bytes = GET_MODE_SIZE (smode); -+ gcc_assert (end_byte <= mode_bytes); - -- if (GET_CODE (op) == CONST_VECTOR) -+ if (CONST_SCALAR_INT_P (x)) - { -- num_elem = CEIL (inner_bytes, GET_MODE_UNIT_SIZE (innermode)); -- elem_bitsize = GET_MODE_UNIT_BITSIZE (innermode); -+ /* The target memory layout is affected by both BYTES_BIG_ENDIAN -+ and WORDS_BIG_ENDIAN. Use the subreg machinery to get the lsb -+ position of each byte. */ -+ rtx_mode_t value (x, smode); -+ wide_int_ref value_wi (value); -+ for (unsigned int byte = first_byte; byte < end_byte; ++byte) -+ { -+ /* Always constant because the inputs are. */ -+ unsigned int lsb -+ = subreg_size_lsb (1, mode_bytes, byte).to_constant (); -+ /* Operate directly on the encoding rather than using -+ wi::extract_uhwi, so that we preserve the sign or zero -+ extension for modes that are not a whole number of bits in -+ size. (Zero extension is only used for the combination of -+ innermode == BImode && STORE_FLAG_VALUE == 1). */ -+ unsigned int elt = lsb / HOST_BITS_PER_WIDE_INT; -+ unsigned int shift = lsb % HOST_BITS_PER_WIDE_INT; -+ unsigned HOST_WIDE_INT uhwi = value_wi.elt (elt); -+ bytes.quick_push (uhwi >> shift); -+ } -+ return true; - } -- else -+ -+ if (CONST_DOUBLE_P (x)) - { -- num_elem = 1; -- elem_bitsize = max_bitsize; -+ /* real_to_target produces an array of integers in target memory order. -+ All integers before the last one have 32 bits; the last one may -+ have 32 bits or fewer, depending on whether the mode bitsize -+ is divisible by 32. Each of these integers is then laid out -+ in target memory as any other integer would be. */ -+ long el32[MAX_BITSIZE_MODE_ANY_MODE / 32]; -+ real_to_target (el32, CONST_DOUBLE_REAL_VALUE (x), smode); -+ -+ /* The (maximum) number of target bytes per element of el32. */ -+ unsigned int bytes_per_el32 = 32 / BITS_PER_UNIT; -+ gcc_assert (bytes_per_el32 != 0); -+ -+ /* Build up the integers in a similar way to the CONST_SCALAR_INT_P -+ handling above. */ -+ for (unsigned int byte = first_byte; byte < end_byte; ++byte) -+ { -+ unsigned int index = byte / bytes_per_el32; -+ unsigned int subbyte = byte % bytes_per_el32; -+ unsigned int int_bytes = MIN (bytes_per_el32, -+ mode_bytes - index * bytes_per_el32); -+ /* Always constant because the inputs are. */ -+ unsigned int lsb -+ = subreg_size_lsb (1, int_bytes, subbyte).to_constant (); -+ bytes.quick_push ((unsigned long) el32[index] >> lsb); -+ } -+ return true; - } -- /* If this asserts, it is too complicated; reducing value_bit may help. */ -- gcc_assert (BITS_PER_UNIT % value_bit == 0); -- /* I don't know how to handle endianness of sub-units. */ -- gcc_assert (elem_bitsize % BITS_PER_UNIT == 0); - -- for (elem = 0; elem < num_elem; elem++) -+ if (GET_CODE (x) == CONST_FIXED) - { -- unsigned char * vp; -- rtx el = (GET_CODE (op) == CONST_VECTOR -- ? CONST_VECTOR_ELT (op, first_elem + elem) -- : op); -+ for (unsigned int byte = first_byte; byte < end_byte; ++byte) -+ { -+ /* Always constant because the inputs are. */ -+ unsigned int lsb -+ = subreg_size_lsb (1, mode_bytes, byte).to_constant (); -+ unsigned HOST_WIDE_INT piece = CONST_FIXED_VALUE_LOW (x); -+ if (lsb >= HOST_BITS_PER_WIDE_INT) -+ { -+ lsb -= HOST_BITS_PER_WIDE_INT; -+ piece = CONST_FIXED_VALUE_HIGH (x); -+ } -+ bytes.quick_push (piece >> lsb); -+ } -+ return true; -+ } - -- /* Vectors are kept in target memory order. (This is probably -- a mistake.) */ -- { -- unsigned byte = (elem * elem_bitsize) / BITS_PER_UNIT; -- unsigned ibyte = (((num_elem - 1 - elem) * elem_bitsize) -- / BITS_PER_UNIT); -- unsigned word_byte = WORDS_BIG_ENDIAN ? ibyte : byte; -- unsigned subword_byte = BYTES_BIG_ENDIAN ? ibyte : byte; -- unsigned bytele = (subword_byte % UNITS_PER_WORD -- + (word_byte / UNITS_PER_WORD) * UNITS_PER_WORD); -- vp = value + (bytele * BITS_PER_UNIT) / value_bit; -- } -+ return false; -+} - -- switch (GET_CODE (el)) -- { -- case CONST_INT: -- for (i = 0; -- i < HOST_BITS_PER_WIDE_INT && i < elem_bitsize; -- i += value_bit) -- *vp++ = INTVAL (el) >> i; -- /* CONST_INTs are always logically sign-extended. */ -- for (; i < elem_bitsize; i += value_bit) -- *vp++ = INTVAL (el) < 0 ? -1 : 0; -- break; -+/* Read a vector of mode MODE from the target memory image given by BYTES, -+ starting at byte FIRST_BYTE. The vector is known to be encodable using -+ NPATTERNS interleaved patterns with NELTS_PER_PATTERN elements each, -+ and BYTES is known to have enough bytes to supply NPATTERNS * -+ NELTS_PER_PATTERN vector elements. Each element of BYTES contains -+ BITS_PER_UNIT bits and the bytes are in target memory order. - -- case CONST_WIDE_INT: -- { -- rtx_mode_t val = rtx_mode_t (el, GET_MODE_INNER (innermode)); -- unsigned char extend = wi::sign_mask (val); -- int prec = wi::get_precision (val); -- -- for (i = 0; i < prec && i < elem_bitsize; i += value_bit) -- *vp++ = wi::extract_uhwi (val, i, value_bit); -- for (; i < elem_bitsize; i += value_bit) -- *vp++ = extend; -- } -- break; -+ Return the vector on success, otherwise return NULL_RTX. */ - -- case CONST_DOUBLE: -- if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (el) == VOIDmode) -- { -- unsigned char extend = 0; -- /* If this triggers, someone should have generated a -- CONST_INT instead. */ -- gcc_assert (elem_bitsize > HOST_BITS_PER_WIDE_INT); -- -- for (i = 0; i < HOST_BITS_PER_WIDE_INT; i += value_bit) -- *vp++ = CONST_DOUBLE_LOW (el) >> i; -- while (i < HOST_BITS_PER_DOUBLE_INT && i < elem_bitsize) -- { -- *vp++ -- = CONST_DOUBLE_HIGH (el) >> (i - HOST_BITS_PER_WIDE_INT); -- i += value_bit; -- } -+rtx -+native_decode_vector_rtx (machine_mode mode, vec bytes, -+ unsigned int first_byte, unsigned int npatterns, -+ unsigned int nelts_per_pattern) -+{ -+ rtx_vector_builder builder (mode, npatterns, nelts_per_pattern); - -- if (CONST_DOUBLE_HIGH (el) >> (HOST_BITS_PER_WIDE_INT - 1)) -- extend = -1; -- for (; i < elem_bitsize; i += value_bit) -- *vp++ = extend; -- } -- else -- { -- /* This is big enough for anything on the platform. */ -- long tmp[MAX_BITSIZE_MODE_ANY_MODE / 32]; -- scalar_float_mode el_mode; -+ unsigned int elt_bits = vector_element_size (GET_MODE_BITSIZE (mode), -+ GET_MODE_NUNITS (mode)); -+ if (elt_bits < BITS_PER_UNIT) -+ { -+ /* This is the only case in which elements can be smaller than a byte. -+ Element 0 is always in the lsb of the containing byte. */ -+ gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL); -+ for (unsigned int i = 0; i < builder.encoded_nelts (); ++i) -+ { -+ unsigned int bit_index = first_byte * BITS_PER_UNIT + i * elt_bits; -+ unsigned int byte_index = bit_index / BITS_PER_UNIT; -+ unsigned int lsb = bit_index % BITS_PER_UNIT; -+ builder.quick_push (bytes[byte_index] & (1 << lsb) -+ ? CONST1_RTX (BImode) -+ : CONST0_RTX (BImode)); -+ } -+ } -+ else -+ { -+ for (unsigned int i = 0; i < builder.encoded_nelts (); ++i) -+ { -+ rtx x = native_decode_rtx (GET_MODE_INNER (mode), bytes, first_byte); -+ if (!x) -+ return NULL_RTX; -+ builder.quick_push (x); -+ first_byte += elt_bits / BITS_PER_UNIT; -+ } -+ } -+ return builder.build (); -+} - -- el_mode = as_a (GET_MODE (el)); -- int bitsize = GET_MODE_BITSIZE (el_mode); -+/* Read an rtx of mode MODE from the target memory image given by BYTES, -+ starting at byte FIRST_BYTE. Each element of BYTES contains BITS_PER_UNIT -+ bits and the bytes are in target memory order. The image has enough -+ values to specify all bytes of MODE. - -- gcc_assert (bitsize <= elem_bitsize); -- gcc_assert (bitsize % value_bit == 0); -+ Return the rtx on success, otherwise return NULL_RTX. */ - -- real_to_target (tmp, CONST_DOUBLE_REAL_VALUE (el), -- GET_MODE (el)); -+rtx -+native_decode_rtx (machine_mode mode, vec bytes, -+ unsigned int first_byte) -+{ -+ if (VECTOR_MODE_P (mode)) -+ { -+ /* If we know at compile time how many elements there are, -+ pull each element directly from BYTES. */ -+ unsigned int nelts; -+ if (GET_MODE_NUNITS (mode).is_constant (&nelts)) -+ return native_decode_vector_rtx (mode, bytes, first_byte, nelts, 1); -+ return NULL_RTX; -+ } - -- /* real_to_target produces its result in words affected by -- FLOAT_WORDS_BIG_ENDIAN. However, we ignore this, -- and use WORDS_BIG_ENDIAN instead; see the documentation -- of SUBREG in rtl.texi. */ -- for (i = 0; i < bitsize; i += value_bit) -- { -- int ibase; -- if (WORDS_BIG_ENDIAN) -- ibase = bitsize - 1 - i; -- else -- ibase = i; -- *vp++ = tmp[ibase / 32] >> i % 32; -- } -+ scalar_int_mode imode; -+ if (is_a (mode, &imode) -+ && GET_MODE_PRECISION (imode) <= MAX_BITSIZE_MODE_ANY_INT) -+ { -+ /* Pull the bytes msb first, so that we can use simple -+ shift-and-insert wide_int operations. */ -+ unsigned int size = GET_MODE_SIZE (imode); -+ wide_int result (wi::zero (GET_MODE_PRECISION (imode))); -+ for (unsigned int i = 0; i < size; ++i) -+ { -+ unsigned int lsb = (size - i - 1) * BITS_PER_UNIT; -+ /* Always constant because the inputs are. */ -+ unsigned int subbyte -+ = subreg_size_offset_from_lsb (1, size, lsb).to_constant (); -+ result <<= BITS_PER_UNIT; -+ result |= bytes[first_byte + subbyte]; -+ } -+ return immed_wide_int_const (result, imode); -+ } - -- /* It shouldn't matter what's done here, so fill it with -- zero. */ -- for (; i < elem_bitsize; i += value_bit) -- *vp++ = 0; -- } -- break; -+ scalar_float_mode fmode; -+ if (is_a (mode, &fmode)) -+ { -+ /* We need to build an array of integers in target memory order. -+ All integers before the last one have 32 bits; the last one may -+ have 32 bits or fewer, depending on whether the mode bitsize -+ is divisible by 32. */ -+ long el32[MAX_BITSIZE_MODE_ANY_MODE / 32]; -+ unsigned int num_el32 = CEIL (GET_MODE_BITSIZE (fmode), 32); -+ memset (el32, 0, num_el32 * sizeof (long)); -+ -+ /* The (maximum) number of target bytes per element of el32. */ -+ unsigned int bytes_per_el32 = 32 / BITS_PER_UNIT; -+ gcc_assert (bytes_per_el32 != 0); -+ -+ unsigned int mode_bytes = GET_MODE_SIZE (fmode); -+ for (unsigned int byte = 0; byte < mode_bytes; ++byte) -+ { -+ unsigned int index = byte / bytes_per_el32; -+ unsigned int subbyte = byte % bytes_per_el32; -+ unsigned int int_bytes = MIN (bytes_per_el32, -+ mode_bytes - index * bytes_per_el32); -+ /* Always constant because the inputs are. */ -+ unsigned int lsb -+ = subreg_size_lsb (1, int_bytes, subbyte).to_constant (); -+ el32[index] |= (unsigned long) bytes[first_byte + byte] << lsb; -+ } -+ REAL_VALUE_TYPE r; -+ real_from_target (&r, el32, fmode); -+ return const_double_from_real_value (r, fmode); -+ } - -- case CONST_FIXED: -- if (elem_bitsize <= HOST_BITS_PER_WIDE_INT) -- { -- for (i = 0; i < elem_bitsize; i += value_bit) -- *vp++ = CONST_FIXED_VALUE_LOW (el) >> i; -- } -+ if (ALL_SCALAR_FIXED_POINT_MODE_P (mode)) -+ { -+ scalar_mode smode = as_a (mode); -+ FIXED_VALUE_TYPE f; -+ f.data.low = 0; -+ f.data.high = 0; -+ f.mode = smode; -+ -+ unsigned int mode_bytes = GET_MODE_SIZE (smode); -+ for (unsigned int byte = 0; byte < mode_bytes; ++byte) -+ { -+ /* Always constant because the inputs are. */ -+ unsigned int lsb -+ = subreg_size_lsb (1, mode_bytes, byte).to_constant (); -+ unsigned HOST_WIDE_INT unit = bytes[first_byte + byte]; -+ if (lsb >= HOST_BITS_PER_WIDE_INT) -+ f.data.high |= unit << (lsb - HOST_BITS_PER_WIDE_INT); - else -- { -- for (i = 0; i < HOST_BITS_PER_WIDE_INT; i += value_bit) -- *vp++ = CONST_FIXED_VALUE_LOW (el) >> i; -- for (; i < HOST_BITS_PER_DOUBLE_INT && i < elem_bitsize; -- i += value_bit) -- *vp++ = CONST_FIXED_VALUE_HIGH (el) -- >> (i - HOST_BITS_PER_WIDE_INT); -- for (; i < elem_bitsize; i += value_bit) -- *vp++ = 0; -- } -- break; -- -- default: -- gcc_unreachable (); -+ f.data.low |= unit << lsb; - } -+ return CONST_FIXED_FROM_FIXED_VALUE (f, mode); - } - -- /* Now, pick the right byte to start with. */ -- /* Renumber BYTE so that the least-significant byte is byte 0. A special -- case is paradoxical SUBREGs, which shouldn't be adjusted since they -- will already have offset 0. */ -- if (inner_bytes >= GET_MODE_SIZE (outermode)) -+ return NULL_RTX; -+} -+ -+/* Simplify a byte offset BYTE into CONST_VECTOR X. The main purpose -+ is to convert a runtime BYTE value into a constant one. */ -+ -+static poly_uint64 -+simplify_const_vector_byte_offset (rtx x, poly_uint64 byte) -+{ -+ /* Cope with MODE_VECTOR_BOOL by operating on bits rather than bytes. */ -+ machine_mode mode = GET_MODE (x); -+ unsigned int elt_bits = vector_element_size (GET_MODE_BITSIZE (mode), -+ GET_MODE_NUNITS (mode)); -+ /* The number of bits needed to encode one element from each pattern. */ -+ unsigned int sequence_bits = CONST_VECTOR_NPATTERNS (x) * elt_bits; -+ -+ /* Identify the start point in terms of a sequence number and a byte offset -+ within that sequence. */ -+ poly_uint64 first_sequence; -+ unsigned HOST_WIDE_INT subbit; -+ if (can_div_trunc_p (byte * BITS_PER_UNIT, sequence_bits, -+ &first_sequence, &subbit)) - { -- unsigned ibyte = inner_bytes - GET_MODE_SIZE (outermode) - byte; -- unsigned word_byte = WORDS_BIG_ENDIAN ? ibyte : byte; -- unsigned subword_byte = BYTES_BIG_ENDIAN ? ibyte : byte; -- byte = (subword_byte % UNITS_PER_WORD -- + (word_byte / UNITS_PER_WORD) * UNITS_PER_WORD); -+ unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x); -+ if (nelts_per_pattern == 1) -+ /* This is a duplicated vector, so the value of FIRST_SEQUENCE -+ doesn't matter. */ -+ byte = subbit / BITS_PER_UNIT; -+ else if (nelts_per_pattern == 2 && known_gt (first_sequence, 0U)) -+ { -+ /* The subreg drops the first element from each pattern and -+ only uses the second element. Find the first sequence -+ that starts on a byte boundary. */ -+ subbit += least_common_multiple (sequence_bits, BITS_PER_UNIT); -+ byte = subbit / BITS_PER_UNIT; -+ } - } -+ return byte; -+} -+ -+/* Subroutine of simplify_subreg in which: - -- /* BYTE should still be inside OP. (Note that BYTE is unsigned, -- so if it's become negative it will instead be very large.) */ -- gcc_assert (byte < inner_bytes); -+ - X is known to be a CONST_VECTOR -+ - OUTERMODE is known to be a vector mode - -- /* Convert from bytes to chunks of size value_bit. */ -- value_start = byte * (BITS_PER_UNIT / value_bit); -+ Try to handle the subreg by operating on the CONST_VECTOR encoding -+ rather than on each individual element of the CONST_VECTOR. - -- /* Re-pack the value. */ -- num_elem = GET_MODE_NUNITS (outermode); -+ Return the simplified subreg on success, otherwise return NULL_RTX. */ - -- if (VECTOR_MODE_P (outermode)) -+static rtx -+simplify_const_vector_subreg (machine_mode outermode, rtx x, -+ machine_mode innermode, unsigned int first_byte) -+{ -+ /* Paradoxical subregs of vectors have dubious semantics. */ -+ if (paradoxical_subreg_p (outermode, innermode)) -+ return NULL_RTX; -+ -+ /* We can only preserve the semantics of a stepped pattern if the new -+ vector element is the same as the original one. */ -+ if (CONST_VECTOR_STEPPED_P (x) -+ && GET_MODE_INNER (outermode) != GET_MODE_INNER (innermode)) -+ return NULL_RTX; -+ -+ /* Cope with MODE_VECTOR_BOOL by operating on bits rather than bytes. */ -+ unsigned int x_elt_bits -+ = vector_element_size (GET_MODE_BITSIZE (innermode), -+ GET_MODE_NUNITS (innermode)); -+ unsigned int out_elt_bits -+ = vector_element_size (GET_MODE_BITSIZE (outermode), -+ GET_MODE_NUNITS (outermode)); -+ -+ /* The number of bits needed to encode one element from every pattern -+ of the original vector. */ -+ unsigned int x_sequence_bits = CONST_VECTOR_NPATTERNS (x) * x_elt_bits; -+ -+ /* The number of bits needed to encode one element from every pattern -+ of the result. */ -+ unsigned int out_sequence_bits -+ = least_common_multiple (x_sequence_bits, out_elt_bits); -+ -+ /* Work out the number of interleaved patterns in the output vector -+ and the number of encoded elements per pattern. */ -+ unsigned int out_npatterns = out_sequence_bits / out_elt_bits; -+ unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x); -+ -+ /* The encoding scheme requires the number of elements to be a multiple -+ of the number of patterns, so that each pattern appears at least once -+ and so that the same number of elements appear from each pattern. */ -+ bool ok_p = multiple_p (GET_MODE_NUNITS (outermode), out_npatterns); -+ unsigned int const_nunits; -+ if (GET_MODE_NUNITS (outermode).is_constant (&const_nunits) -+ && (!ok_p || out_npatterns * nelts_per_pattern > const_nunits)) - { -- result_v = rtvec_alloc (num_elem); -- elems = &RTVEC_ELT (result_v, 0); -+ /* Either the encoding is invalid, or applying it would give us -+ more elements than we need. Just encode each element directly. */ -+ out_npatterns = const_nunits; -+ nelts_per_pattern = 1; - } -- else -- elems = &result_s; -+ else if (!ok_p) -+ return NULL_RTX; - -- outer_submode = GET_MODE_INNER (outermode); -- outer_class = GET_MODE_CLASS (outer_submode); -- elem_bitsize = GET_MODE_BITSIZE (outer_submode); -+ /* Get enough bytes of X to form the new encoding. */ -+ unsigned int buffer_bits = out_npatterns * nelts_per_pattern * out_elt_bits; -+ unsigned int buffer_bytes = CEIL (buffer_bits, BITS_PER_UNIT); -+ auto_vec buffer (buffer_bytes); -+ if (!native_encode_rtx (innermode, x, buffer, first_byte, buffer_bytes)) -+ return NULL_RTX; - -- gcc_assert (elem_bitsize % value_bit == 0); -- gcc_assert (elem_bitsize + value_start * value_bit <= max_bitsize); -+ /* Reencode the bytes as OUTERMODE. */ -+ return native_decode_vector_rtx (outermode, buffer, 0, out_npatterns, -+ nelts_per_pattern); -+} - -- for (elem = 0; elem < num_elem; elem++) -- { -- unsigned char *vp; -+/* Try to simplify a subreg of a constant by encoding the subreg region -+ as a sequence of target bytes and reading them back in the new mode. -+ Return the new value on success, otherwise return null. - -- /* Vectors are stored in target memory order. (This is probably -- a mistake.) */ -- { -- unsigned byte = (elem * elem_bitsize) / BITS_PER_UNIT; -- unsigned ibyte = (((num_elem - 1 - elem) * elem_bitsize) -- / BITS_PER_UNIT); -- unsigned word_byte = WORDS_BIG_ENDIAN ? ibyte : byte; -- unsigned subword_byte = BYTES_BIG_ENDIAN ? ibyte : byte; -- unsigned bytele = (subword_byte % UNITS_PER_WORD -- + (word_byte / UNITS_PER_WORD) * UNITS_PER_WORD); -- vp = value + value_start + (bytele * BITS_PER_UNIT) / value_bit; -- } -+ The subreg has outer mode OUTERMODE, inner mode INNERMODE, inner value X -+ and byte offset FIRST_BYTE. */ - -- switch (outer_class) -- { -- case MODE_INT: -- case MODE_PARTIAL_INT: -- { -- int u; -- int base = 0; -- int units -- = (GET_MODE_BITSIZE (outer_submode) + HOST_BITS_PER_WIDE_INT - 1) -- / HOST_BITS_PER_WIDE_INT; -- HOST_WIDE_INT tmp[MAX_BITSIZE_MODE_ANY_INT / HOST_BITS_PER_WIDE_INT]; -- wide_int r; -- -- if (GET_MODE_PRECISION (outer_submode) > MAX_BITSIZE_MODE_ANY_INT) -- return NULL_RTX; -- for (u = 0; u < units; u++) -- { -- unsigned HOST_WIDE_INT buf = 0; -- for (i = 0; -- i < HOST_BITS_PER_WIDE_INT && base + i < elem_bitsize; -- i += value_bit) -- buf |= (unsigned HOST_WIDE_INT)(*vp++ & value_mask) << i; -- -- tmp[u] = buf; -- base += HOST_BITS_PER_WIDE_INT; -- } -- r = wide_int::from_array (tmp, units, -- GET_MODE_PRECISION (outer_submode)); --#if TARGET_SUPPORTS_WIDE_INT == 0 -- /* Make sure r will fit into CONST_INT or CONST_DOUBLE. */ -- if (wi::min_precision (r, SIGNED) > HOST_BITS_PER_DOUBLE_INT) -- return NULL_RTX; --#endif -- elems[elem] = immed_wide_int_const (r, outer_submode); -- } -- break; -+static rtx -+simplify_immed_subreg (fixed_size_mode outermode, rtx x, -+ machine_mode innermode, unsigned int first_byte) -+{ -+ unsigned int buffer_bytes = GET_MODE_SIZE (outermode); -+ auto_vec buffer (buffer_bytes); - -- case MODE_FLOAT: -- case MODE_DECIMAL_FLOAT: -- { -- REAL_VALUE_TYPE r; -- long tmp[MAX_BITSIZE_MODE_ANY_MODE / 32] = { 0 }; -- -- /* real_from_target wants its input in words affected by -- FLOAT_WORDS_BIG_ENDIAN. However, we ignore this, -- and use WORDS_BIG_ENDIAN instead; see the documentation -- of SUBREG in rtl.texi. */ -- for (i = 0; i < elem_bitsize; i += value_bit) -- { -- int ibase; -- if (WORDS_BIG_ENDIAN) -- ibase = elem_bitsize - 1 - i; -- else -- ibase = i; -- tmp[ibase / 32] |= (*vp++ & value_mask) << i % 32; -- } -+ /* Some ports misuse CCmode. */ -+ if (GET_MODE_CLASS (outermode) == MODE_CC && CONST_INT_P (x)) -+ return x; - -- real_from_target (&r, tmp, outer_submode); -- elems[elem] = const_double_from_real_value (r, outer_submode); -- } -- break; -+ /* Paradoxical subregs read undefined values for bytes outside of the -+ inner value. However, we have traditionally always sign-extended -+ integer constants and zero-extended others. */ -+ unsigned int inner_bytes = buffer_bytes; -+ if (paradoxical_subreg_p (outermode, innermode)) -+ { -+ if (!GET_MODE_SIZE (innermode).is_constant (&inner_bytes)) -+ return NULL_RTX; - -- case MODE_FRACT: -- case MODE_UFRACT: -- case MODE_ACCUM: -- case MODE_UACCUM: -- { -- FIXED_VALUE_TYPE f; -- f.data.low = 0; -- f.data.high = 0; -- f.mode = outer_submode; -- -- for (i = 0; -- i < HOST_BITS_PER_WIDE_INT && i < elem_bitsize; -- i += value_bit) -- f.data.low |= (unsigned HOST_WIDE_INT)(*vp++ & value_mask) << i; -- for (; i < elem_bitsize; i += value_bit) -- f.data.high |= ((unsigned HOST_WIDE_INT)(*vp++ & value_mask) -- << (i - HOST_BITS_PER_WIDE_INT)); -- -- elems[elem] = CONST_FIXED_FROM_FIXED_VALUE (f, outer_submode); -- } -- break; -+ target_unit filler = 0; -+ if (CONST_SCALAR_INT_P (x) && wi::neg_p (rtx_mode_t (x, innermode))) -+ filler = -1; - -- default: -- gcc_unreachable (); -- } -+ /* Add any leading bytes due to big-endian layout. The number of -+ bytes must be constant because both modes have constant size. */ -+ unsigned int leading_bytes -+ = -byte_lowpart_offset (outermode, innermode).to_constant (); -+ for (unsigned int i = 0; i < leading_bytes; ++i) -+ buffer.quick_push (filler); -+ -+ if (!native_encode_rtx (innermode, x, buffer, first_byte, inner_bytes)) -+ return NULL_RTX; -+ -+ /* Add any trailing bytes due to little-endian layout. */ -+ while (buffer.length () < buffer_bytes) -+ buffer.quick_push (filler); - } -- if (VECTOR_MODE_P (outermode)) -- return gen_rtx_CONST_VECTOR (outermode, result_v); - else -- return result_s; -+ { -+ if (!native_encode_rtx (innermode, x, buffer, first_byte, inner_bytes)) -+ return NULL_RTX; -+ } -+ return native_decode_rtx (outermode, buffer, 0); - } - - /* Simplify SUBREG:OUTERMODE(OP:INNERMODE, BYTE) -@@ -6468,6 +6625,9 @@ simplify_subreg (machine_mode outermode, rtx op, - if (outermode == innermode && known_eq (byte, 0U)) - return op; - -+ if (GET_CODE (op) == CONST_VECTOR) -+ byte = simplify_const_vector_byte_offset (op, byte); -+ - if (multiple_p (byte, GET_MODE_UNIT_SIZE (innermode))) - { - rtx elt; -@@ -6487,30 +6647,21 @@ simplify_subreg (machine_mode outermode, rtx op, - || CONST_FIXED_P (op) - || GET_CODE (op) == CONST_VECTOR) - { -- /* simplify_immed_subreg deconstructs OP into bytes and constructs -- the result from bytes, so it only works if the sizes of the modes -- and the value of the offset are known at compile time. Cases that -- that apply to general modes and offsets should be handled here -- before calling simplify_immed_subreg. */ -- fixed_size_mode fs_outermode, fs_innermode; - unsigned HOST_WIDE_INT cbyte; -- if (is_a (outermode, &fs_outermode) -- && is_a (innermode, &fs_innermode) -- && byte.is_constant (&cbyte)) -- return simplify_immed_subreg (fs_outermode, op, fs_innermode, cbyte, -- 0, GET_MODE_SIZE (fs_innermode)); -- -- /* Handle constant-sized outer modes and variable-sized inner modes. */ -- unsigned HOST_WIDE_INT first_elem; -- if (GET_CODE (op) == CONST_VECTOR -- && is_a (outermode, &fs_outermode) -- && constant_multiple_p (byte, GET_MODE_UNIT_SIZE (innermode), -- &first_elem)) -- return simplify_immed_subreg (fs_outermode, op, innermode, 0, -- first_elem, -- GET_MODE_SIZE (fs_outermode)); -+ if (byte.is_constant (&cbyte)) -+ { -+ if (GET_CODE (op) == CONST_VECTOR && VECTOR_MODE_P (outermode)) -+ { -+ rtx tmp = simplify_const_vector_subreg (outermode, op, -+ innermode, cbyte); -+ if (tmp) -+ return tmp; -+ } - -- return NULL_RTX; -+ fixed_size_mode fs_outermode; -+ if (is_a (outermode, &fs_outermode)) -+ return simplify_immed_subreg (fs_outermode, op, innermode, cbyte); -+ } - } - - /* Changing mode twice with SUBREG => just change it once, -@@ -6952,6 +7103,18 @@ test_vector_ops_duplicate (machine_mode mode, rtx scalar_reg) - && mode_for_vector (inner_mode, 2).exists (&narrower_mode) - && VECTOR_MODE_P (narrower_mode)) - { -+ /* Test VEC_DUPLICATE of a vector. */ -+ rtx_vector_builder nbuilder (narrower_mode, 2, 1); -+ nbuilder.quick_push (const0_rtx); -+ nbuilder.quick_push (const1_rtx); -+ rtx_vector_builder builder (mode, 2, 1); -+ builder.quick_push (const0_rtx); -+ builder.quick_push (const1_rtx); -+ ASSERT_RTX_EQ (builder.build (), -+ simplify_unary_operation (VEC_DUPLICATE, mode, -+ nbuilder.build (), -+ narrower_mode)); -+ - /* Test VEC_SELECT of a vector. */ - rtx vec_par - = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, const1_rtx, const0_rtx)); -@@ -7024,6 +7187,58 @@ test_vector_ops_series (machine_mode mode, rtx scalar_reg) - ASSERT_RTX_EQ (series_0_m1, - simplify_binary_operation (VEC_SERIES, mode, const0_rtx, - constm1_rtx)); -+ -+ /* Test NEG on constant vector series. */ -+ ASSERT_RTX_EQ (series_0_m1, -+ simplify_unary_operation (NEG, mode, series_0_1, mode)); -+ ASSERT_RTX_EQ (series_0_1, -+ simplify_unary_operation (NEG, mode, series_0_m1, mode)); -+ -+ /* Test PLUS and MINUS on constant vector series. */ -+ rtx scalar2 = gen_int_mode (2, inner_mode); -+ rtx scalar3 = gen_int_mode (3, inner_mode); -+ rtx series_1_1 = gen_const_vec_series (mode, const1_rtx, const1_rtx); -+ rtx series_0_2 = gen_const_vec_series (mode, const0_rtx, scalar2); -+ rtx series_1_3 = gen_const_vec_series (mode, const1_rtx, scalar3); -+ ASSERT_RTX_EQ (series_1_1, -+ simplify_binary_operation (PLUS, mode, series_0_1, -+ CONST1_RTX (mode))); -+ ASSERT_RTX_EQ (series_0_m1, -+ simplify_binary_operation (PLUS, mode, CONST0_RTX (mode), -+ series_0_m1)); -+ ASSERT_RTX_EQ (series_1_3, -+ simplify_binary_operation (PLUS, mode, series_1_1, -+ series_0_2)); -+ ASSERT_RTX_EQ (series_0_1, -+ simplify_binary_operation (MINUS, mode, series_1_1, -+ CONST1_RTX (mode))); -+ ASSERT_RTX_EQ (series_1_1, -+ simplify_binary_operation (MINUS, mode, CONST1_RTX (mode), -+ series_0_m1)); -+ ASSERT_RTX_EQ (series_1_1, -+ simplify_binary_operation (MINUS, mode, series_1_3, -+ series_0_2)); -+ -+ /* Test MULT between constant vectors. */ -+ rtx vec2 = gen_const_vec_duplicate (mode, scalar2); -+ rtx vec3 = gen_const_vec_duplicate (mode, scalar3); -+ rtx scalar9 = gen_int_mode (9, inner_mode); -+ rtx series_3_9 = gen_const_vec_series (mode, scalar3, scalar9); -+ ASSERT_RTX_EQ (series_0_2, -+ simplify_binary_operation (MULT, mode, series_0_1, vec2)); -+ ASSERT_RTX_EQ (series_3_9, -+ simplify_binary_operation (MULT, mode, vec3, series_1_3)); -+ if (!GET_MODE_NUNITS (mode).is_constant ()) -+ ASSERT_FALSE (simplify_binary_operation (MULT, mode, series_0_1, -+ series_0_1)); -+ -+ /* Test ASHIFT between constant vectors. */ -+ ASSERT_RTX_EQ (series_0_2, -+ simplify_binary_operation (ASHIFT, mode, series_0_1, -+ CONST1_RTX (mode))); -+ if (!GET_MODE_NUNITS (mode).is_constant ()) -+ ASSERT_FALSE (simplify_binary_operation (ASHIFT, mode, CONST1_RTX (mode), -+ series_0_1)); - } - - /* Verify simplify_merge_mask works correctly. */ -@@ -7089,6 +7304,165 @@ test_vec_merge (machine_mode mode) - simplify_rtx (nvm)); - } - -+/* Test subregs of integer vector constant X, trying elements in -+ the range [ELT_BIAS, ELT_BIAS + constant_lower_bound (NELTS)), -+ where NELTS is the number of elements in X. Subregs involving -+ elements [ELT_BIAS, ELT_BIAS + FIRST_VALID) are expected to fail. */ -+ -+static void -+test_vector_subregs_modes (rtx x, poly_uint64 elt_bias = 0, -+ unsigned int first_valid = 0) -+{ -+ machine_mode inner_mode = GET_MODE (x); -+ scalar_mode int_mode = GET_MODE_INNER (inner_mode); -+ -+ for (unsigned int modei = 0; modei < NUM_MACHINE_MODES; ++modei) -+ { -+ machine_mode outer_mode = (machine_mode) modei; -+ if (!VECTOR_MODE_P (outer_mode)) -+ continue; -+ -+ unsigned int outer_nunits; -+ if (GET_MODE_INNER (outer_mode) == int_mode -+ && GET_MODE_NUNITS (outer_mode).is_constant (&outer_nunits) -+ && multiple_p (GET_MODE_NUNITS (inner_mode), outer_nunits)) -+ { -+ /* Test subregs in which the outer mode is a smaller, -+ constant-sized vector of the same element type. */ -+ unsigned int limit -+ = constant_lower_bound (GET_MODE_NUNITS (inner_mode)); -+ for (unsigned int elt = 0; elt < limit; elt += outer_nunits) -+ { -+ rtx expected = NULL_RTX; -+ if (elt >= first_valid) -+ { -+ rtx_vector_builder builder (outer_mode, outer_nunits, 1); -+ for (unsigned int i = 0; i < outer_nunits; ++i) -+ builder.quick_push (CONST_VECTOR_ELT (x, elt + i)); -+ expected = builder.build (); -+ } -+ poly_uint64 byte = (elt_bias + elt) * GET_MODE_SIZE (int_mode); -+ ASSERT_RTX_EQ (expected, -+ simplify_subreg (outer_mode, x, -+ inner_mode, byte)); -+ } -+ } -+ else if (known_eq (GET_MODE_SIZE (outer_mode), -+ GET_MODE_SIZE (inner_mode)) -+ && known_eq (elt_bias, 0U) -+ && (GET_MODE_CLASS (outer_mode) != MODE_VECTOR_BOOL -+ || known_eq (GET_MODE_BITSIZE (outer_mode), -+ GET_MODE_NUNITS (outer_mode))) -+ && (!FLOAT_MODE_P (outer_mode) -+ || (FLOAT_MODE_FORMAT (outer_mode)->ieee_bits -+ == GET_MODE_UNIT_PRECISION (outer_mode))) -+ && (GET_MODE_SIZE (inner_mode).is_constant () -+ || !CONST_VECTOR_STEPPED_P (x))) -+ { -+ /* Try converting to OUTER_MODE and back. */ -+ rtx outer_x = simplify_subreg (outer_mode, x, inner_mode, 0); -+ ASSERT_TRUE (outer_x != NULL_RTX); -+ ASSERT_RTX_EQ (x, simplify_subreg (inner_mode, outer_x, -+ outer_mode, 0)); -+ } -+ } -+ -+ if (BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN) -+ { -+ /* Test each byte in the element range. */ -+ unsigned int limit -+ = constant_lower_bound (GET_MODE_SIZE (inner_mode)); -+ for (unsigned int i = 0; i < limit; ++i) -+ { -+ unsigned int elt = i / GET_MODE_SIZE (int_mode); -+ rtx expected = NULL_RTX; -+ if (elt >= first_valid) -+ { -+ unsigned int byte_shift = i % GET_MODE_SIZE (int_mode); -+ if (BYTES_BIG_ENDIAN) -+ byte_shift = GET_MODE_SIZE (int_mode) - byte_shift - 1; -+ rtx_mode_t vec_elt (CONST_VECTOR_ELT (x, elt), int_mode); -+ wide_int shifted_elt -+ = wi::lrshift (vec_elt, byte_shift * BITS_PER_UNIT); -+ expected = immed_wide_int_const (shifted_elt, QImode); -+ } -+ poly_uint64 byte = elt_bias * GET_MODE_SIZE (int_mode) + i; -+ ASSERT_RTX_EQ (expected, -+ simplify_subreg (QImode, x, inner_mode, byte)); -+ } -+ } -+} -+ -+/* Test constant subregs of integer vector mode INNER_MODE, using 1 -+ element per pattern. */ -+ -+static void -+test_vector_subregs_repeating (machine_mode inner_mode) -+{ -+ poly_uint64 nunits = GET_MODE_NUNITS (inner_mode); -+ unsigned int min_nunits = constant_lower_bound (nunits); -+ scalar_mode int_mode = GET_MODE_INNER (inner_mode); -+ unsigned int count = gcd (min_nunits, 8); -+ -+ rtx_vector_builder builder (inner_mode, count, 1); -+ for (unsigned int i = 0; i < count; ++i) -+ builder.quick_push (gen_int_mode (8 - i, int_mode)); -+ rtx x = builder.build (); -+ -+ test_vector_subregs_modes (x); -+ if (!nunits.is_constant ()) -+ test_vector_subregs_modes (x, nunits - min_nunits); -+} -+ -+/* Test constant subregs of integer vector mode INNER_MODE, using 2 -+ elements per pattern. */ -+ -+static void -+test_vector_subregs_fore_back (machine_mode inner_mode) -+{ -+ poly_uint64 nunits = GET_MODE_NUNITS (inner_mode); -+ unsigned int min_nunits = constant_lower_bound (nunits); -+ scalar_mode int_mode = GET_MODE_INNER (inner_mode); -+ unsigned int count = gcd (min_nunits, 4); -+ -+ rtx_vector_builder builder (inner_mode, count, 2); -+ for (unsigned int i = 0; i < count; ++i) -+ builder.quick_push (gen_int_mode (i, int_mode)); -+ for (unsigned int i = 0; i < count; ++i) -+ builder.quick_push (gen_int_mode (-(int) i, int_mode)); -+ rtx x = builder.build (); -+ -+ test_vector_subregs_modes (x); -+ if (!nunits.is_constant ()) -+ test_vector_subregs_modes (x, nunits - min_nunits, count); -+} -+ -+/* Test constant subregs of integer vector mode INNER_MODE, using 3 -+ elements per pattern. */ -+ -+static void -+test_vector_subregs_stepped (machine_mode inner_mode) -+{ -+ /* Build { 0, 1, 2, 3, ... }. */ -+ scalar_mode int_mode = GET_MODE_INNER (inner_mode); -+ rtx_vector_builder builder (inner_mode, 1, 3); -+ for (unsigned int i = 0; i < 3; ++i) -+ builder.quick_push (gen_int_mode (i, int_mode)); -+ rtx x = builder.build (); -+ -+ test_vector_subregs_modes (x); -+} -+ -+/* Test constant subregs of integer vector mode INNER_MODE. */ -+ -+static void -+test_vector_subregs (machine_mode inner_mode) -+{ -+ test_vector_subregs_repeating (inner_mode); -+ test_vector_subregs_fore_back (inner_mode); -+ test_vector_subregs_stepped (inner_mode); -+} -+ - /* Verify some simplifications involving vectors. */ - - static void -@@ -7103,7 +7477,10 @@ test_vector_ops () - test_vector_ops_duplicate (mode, scalar_reg); - if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT - && maybe_gt (GET_MODE_NUNITS (mode), 2)) -- test_vector_ops_series (mode, scalar_reg); -+ { -+ test_vector_ops_series (mode, scalar_reg); -+ test_vector_subregs (mode); -+ } - test_vec_merge (mode); - } - } -diff --git a/gcc/stack-ptr-mod.c b/gcc/stack-ptr-mod.c -index a10d59b61..5cb95e712 100644 ---- a/gcc/stack-ptr-mod.c -+++ b/gcc/stack-ptr-mod.c -@@ -91,9 +91,7 @@ pass_stack_ptr_mod::execute (function *fun) - if (INSN_P (insn)) - { - /* Check if insn modifies the stack pointer. */ -- note_stores (PATTERN (insn), -- notice_stack_pointer_modification_1, -- NULL); -+ note_stores (insn, notice_stack_pointer_modification_1, NULL); - if (! crtl->sp_is_unchanging) - return 0; - } -diff --git a/gcc/stor-layout.c b/gcc/stor-layout.c -index a054b7887..7d1917f82 100644 ---- a/gcc/stor-layout.c -+++ b/gcc/stor-layout.c -@@ -42,6 +42,7 @@ along with GCC; see the file COPYING3. If not see - #include "gimplify.h" - #include "attribs.h" - #include "debug.h" -+#include "calls.h" - - /* Data type for the expressions representing sizes of data types. - It is the first integer type laid out. */ -@@ -1835,7 +1836,8 @@ compute_record_mode (tree type) - line. */ - SET_TYPE_MODE (type, BLKmode); - -- if (! tree_fits_uhwi_p (TYPE_SIZE (type))) -+ poly_uint64 type_size; -+ if (!poly_int_tree_p (TYPE_SIZE (type), &type_size)) - return; - - /* A record which has any BLKmode members must itself be -@@ -1846,20 +1848,21 @@ compute_record_mode (tree type) - if (TREE_CODE (field) != FIELD_DECL) - continue; - -+ poly_uint64 field_size; - if (TREE_CODE (TREE_TYPE (field)) == ERROR_MARK - || (TYPE_MODE (TREE_TYPE (field)) == BLKmode - && ! TYPE_NO_FORCE_BLK (TREE_TYPE (field)) - && !(TYPE_SIZE (TREE_TYPE (field)) != 0 - && integer_zerop (TYPE_SIZE (TREE_TYPE (field))))) -- || ! tree_fits_uhwi_p (bit_position (field)) -+ || !tree_fits_poly_uint64_p (bit_position (field)) - || DECL_SIZE (field) == 0 -- || ! tree_fits_uhwi_p (DECL_SIZE (field))) -+ || !poly_int_tree_p (DECL_SIZE (field), &field_size)) - return; - - /* If this field is the whole struct, remember its mode so - that, say, we can put a double in a class into a DF - register instead of forcing it to live in the stack. */ -- if (simple_cst_equal (TYPE_SIZE (type), DECL_SIZE (field)) -+ if (known_eq (field_size, type_size) - /* Partial int types (e.g. __int20) may have TYPE_SIZE equal to - wider types (e.g. int32), despite precision being less. Ensure - that the TYPE_MODE of the struct does not get set to the partial -@@ -1879,15 +1882,14 @@ compute_record_mode (tree type) - For UNION_TYPE, if the widest field is MODE_INT then use that mode. - If the widest field is MODE_PARTIAL_INT, and the union will be passed - by reference, then use that mode. */ -- poly_uint64 type_size; - if ((TREE_CODE (type) == RECORD_TYPE - || (TREE_CODE (type) == UNION_TYPE - && (GET_MODE_CLASS (mode) == MODE_INT - || (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT -- && targetm.calls.pass_by_reference (pack_cumulative_args (0), -- mode, type, 0))))) -+ && (targetm.calls.pass_by_reference -+ (pack_cumulative_args (0), -+ function_arg_info (type, mode, /*named=*/false))))))) - && mode != VOIDmode -- && poly_int_tree_p (TYPE_SIZE (type), &type_size) - && known_eq (GET_MODE_BITSIZE (mode), type_size)) - ; - else -diff --git a/gcc/target-globals.c b/gcc/target-globals.c -index 94a465c91..00bbda69c 100644 ---- a/gcc/target-globals.c -+++ b/gcc/target-globals.c -@@ -40,6 +40,7 @@ along with GCC; see the file COPYING3. If not see - #include "gcse.h" - #include "bb-reorder.h" - #include "lower-subreg.h" -+#include "function-abi.h" - - #if SWITCHABLE_TARGET - struct target_globals default_target_globals = { -@@ -48,6 +49,7 @@ struct target_globals default_target_globals = { - &default_target_rtl, - &default_target_recog, - &default_target_hard_regs, -+ &default_target_function_abi_info, - &default_target_reload, - &default_target_expmed, - &default_target_optabs, -@@ -70,6 +72,7 @@ save_target_globals (void) - g->rtl = ggc_cleared_alloc (); - g->recog = XCNEW (struct target_recog); - g->hard_regs = XCNEW (struct target_hard_regs); -+ g->function_abi_info = XCNEW (struct target_function_abi_info); - g->reload = XCNEW (struct target_reload); - g->expmed = XCNEW (struct target_expmed); - g->optabs = XCNEW (struct target_optabs); -@@ -127,6 +130,7 @@ target_globals::~target_globals () - XDELETE (regs); - XDELETE (recog); - XDELETE (hard_regs); -+ XDELETE (function_abi_info); - XDELETE (reload); - XDELETE (expmed); - XDELETE (optabs); -diff --git a/gcc/target-globals.h b/gcc/target-globals.h -index 5af846c9f..f21580be6 100644 ---- a/gcc/target-globals.h -+++ b/gcc/target-globals.h -@@ -26,6 +26,7 @@ extern struct target_regs *this_target_regs; - extern struct target_rtl *this_target_rtl; - extern struct target_recog *this_target_recog; - extern struct target_hard_regs *this_target_hard_regs; -+extern struct target_function_abi_info *this_target_function_abi_info; - extern struct target_reload *this_target_reload; - extern struct target_expmed *this_target_expmed; - extern struct target_optabs *this_target_optabs; -@@ -47,6 +48,7 @@ struct GTY(()) target_globals { - struct target_rtl *rtl; - struct target_recog *GTY((skip)) recog; - struct target_hard_regs *GTY((skip)) hard_regs; -+ struct target_function_abi_info *GTY((skip)) function_abi_info; - struct target_reload *GTY((skip)) reload; - struct target_expmed *GTY((skip)) expmed; - struct target_optabs *GTY((skip)) optabs; -@@ -74,6 +76,7 @@ restore_target_globals (struct target_globals *g) - this_target_rtl = g->rtl; - this_target_recog = g->recog; - this_target_hard_regs = g->hard_regs; -+ this_target_function_abi_info = g->function_abi_info; - this_target_reload = g->reload; - this_target_expmed = g->expmed; - this_target_optabs = g->optabs; -diff --git a/gcc/target.def b/gcc/target.def -index f998470ff..05389cdd1 100644 ---- a/gcc/target.def -+++ b/gcc/target.def -@@ -1782,22 +1782,6 @@ return type of the vectorized function shall be of vector type\n\ - tree, (tree fndecl, tree vec_type_out, tree vec_type_in), - default_builtin_md_vectorized_function) - --/* Returns a function declaration for a builtin that realizes the -- vector conversion, or NULL_TREE if not available. */ --DEFHOOK --(builtin_conversion, -- "This hook should return the DECL of a function that implements conversion of the\n\ --input vector of type @var{src_type} to type @var{dest_type}.\n\ --The value of @var{code} is one of the enumerators in @code{enum tree_code} and\n\ --specifies how the conversion is to be applied\n\ --(truncation, rounding, etc.).\n\ --\n\ --If this hook is defined, the autovectorizer will use the\n\ --@code{TARGET_VECTORIZE_BUILTIN_CONVERSION} target hook when vectorizing\n\ --conversion. Otherwise, it will return @code{NULL_TREE}.", -- tree, (unsigned code, tree dest_type, tree src_type), -- default_builtin_vectorized_conversion) -- - /* Cost of different vector/scalar statements in vectorization cost - model. In case of misaligned vector loads and stores the cost depends - on the data type and misalignment value. */ -@@ -2431,6 +2415,24 @@ another @code{CALL_EXPR}.\n\ - @var{arglist} really has type @samp{VEC(tree,gc)*}", - tree, (unsigned int /*location_t*/ loc, tree fndecl, void *arglist), NULL) - -+DEFHOOK -+(check_builtin_call, -+ "Perform semantic checking on a call to a machine-specific built-in\n\ -+function after its arguments have been constrained to the function\n\ -+signature. Return true if the call is valid, otherwise report an error\n\ -+and return false.\n\ -+\n\ -+This hook is called after @code{TARGET_RESOLVE_OVERLOADED_BUILTIN}.\n\ -+The call was originally to built-in function @var{orig_fndecl},\n\ -+but after the optional @code{TARGET_RESOLVE_OVERLOADED_BUILTIN}\n\ -+step is now to built-in function @var{fndecl}. @var{loc} is the\n\ -+location of the call and @var{args} is an array of function arguments,\n\ -+of which there are @var{nargs}. @var{arg_loc} specifies the location\n\ -+of each argument.", -+ bool, (location_t loc, vec arg_loc, tree fndecl, -+ tree orig_fndecl, unsigned int nargs, tree *args), -+ NULL) -+ - /* Fold a target-specific builtin to a tree valid for both GIMPLE - and GENERIC. */ - DEFHOOK -@@ -2624,38 +2626,6 @@ DEFHOOK - bool, (const rtx_insn *follower, const rtx_insn *followee), - hook_bool_const_rtx_insn_const_rtx_insn_true) - --/* Return a register class for which branch target register -- optimizations should be applied. */ --DEFHOOK --(branch_target_register_class, -- "This target hook returns a register class for which branch target register\n\ --optimizations should be applied. All registers in this class should be\n\ --usable interchangeably. After reload, registers in this class will be\n\ --re-allocated and loads will be hoisted out of loops and be subjected\n\ --to inter-block scheduling.", -- reg_class_t, (void), -- default_branch_target_register_class) -- --/* Return true if branch target register optimizations should include -- callee-saved registers that are not already live during the current -- function. AFTER_PE_GEN is true if prologues and epilogues have -- already been generated. */ --DEFHOOK --(branch_target_register_callee_saved, -- "Branch target register optimization will by default exclude callee-saved\n\ --registers\n\ --that are not already live during the current function; if this target hook\n\ --returns true, they will be included. The target code must than make sure\n\ --that all target registers in the class returned by\n\ --@samp{TARGET_BRANCH_TARGET_REGISTER_CLASS} that might need saving are\n\ --saved. @var{after_prologue_epilogue_gen} indicates if prologues and\n\ --epilogues have already been generated. Note, even if you only return\n\ --true when @var{after_prologue_epilogue_gen} is false, you still are likely\n\ --to have to make special provisions in @code{INITIAL_ELIMINATION_OFFSET}\n\ --to reserve space for caller-saved target registers.", -- bool, (bool after_prologue_epilogue_gen), -- hook_bool_bool_false) -- - /* Return true if the target supports conditional execution. */ - DEFHOOK - (have_conditional_execution, -@@ -3407,6 +3377,29 @@ must have move patterns for this mode.", - bool, (machine_mode mode), - hook_bool_mode_false) - -+DEFHOOK -+(compatible_vector_types_p, -+ "Return true if there is no target-specific reason for treating\n\ -+vector types @var{type1} and @var{type2} as distinct types. The caller\n\ -+has already checked for target-independent reasons, meaning that the\n\ -+types are known to have the same mode, to have the same number of elements,\n\ -+and to have what the caller considers to be compatible element types.\n\ -+\n\ -+The main reason for defining this hook is to reject pairs of types\n\ -+that are handled differently by the target's calling convention.\n\ -+For example, when a new @var{N}-bit vector architecture is added\n\ -+to a target, the target may want to handle normal @var{N}-bit\n\ -+@code{VECTOR_TYPE} arguments and return values in the same way as\n\ -+before, to maintain backwards compatibility. However, it may also\n\ -+provide new, architecture-specific @code{VECTOR_TYPE}s that are passed\n\ -+and returned in a more efficient way. It is then important to maintain\n\ -+a distinction between the ``normal'' @code{VECTOR_TYPE}s and the new\n\ -+architecture-specific ones.\n\ -+\n\ -+The default implementation returns true, which is correct for most targets.", -+ bool, (const_tree type1, const_tree type2), -+ hook_bool_const_tree_const_tree_true) -+ - DEFHOOK - (vector_alignment, - "This hook can be used to define the alignment for a vector of type\n\ -@@ -3569,7 +3562,7 @@ two areas of memory, or to set, clear or store to memory, for example\n\ - when copying a @code{struct}. The @code{by_pieces} infrastructure\n\ - implements such memory operations as a sequence of load, store or move\n\ - insns. Alternate strategies are to expand the\n\ --@code{movmem} or @code{setmem} optabs, to emit a library call, or to emit\n\ -+@code{cpymem} or @code{setmem} optabs, to emit a library call, or to emit\n\ - unit-by-unit, loop-based operations.\n\ - \n\ - This target hook should return true if, for a memory operation with a\n\ -@@ -3588,7 +3581,7 @@ optimized for speed rather than size.\n\ - \n\ - Returning true for higher values of @var{size} can improve code generation\n\ - for speed if the target does not provide an implementation of the\n\ --@code{movmem} or @code{setmem} standard names, if the @code{movmem} or\n\ -+@code{cpymem} or @code{setmem} standard names, if the @code{cpymem} or\n\ - @code{setmem} implementation would be more expensive than a sequence of\n\ - insns, or if the overhead of a library call would dominate that of\n\ - the body of the memory operation.\n\ -@@ -4479,18 +4472,18 @@ or 3-byte structure is returned at the most significant end of a\n\ - from __builtin_va_arg. */ - DEFHOOK - (pass_by_reference, -- "This target hook should return @code{true} if an argument at the\n\ -+ "This target hook should return @code{true} if argument @var{arg} at the\n\ - position indicated by @var{cum} should be passed by reference. This\n\ - predicate is queried after target independent reasons for being\n\ --passed by reference, such as @code{TREE_ADDRESSABLE (type)}.\n\ -+passed by reference, such as @code{TREE_ADDRESSABLE (@var{arg}.type)}.\n\ - \n\ - If the hook returns true, a copy of that argument is made in memory and a\n\ - pointer to the argument is passed instead of the argument itself.\n\ - The pointer is passed in whatever way is appropriate for passing a pointer\n\ - to that type.", - bool, -- (cumulative_args_t cum, machine_mode mode, const_tree type, bool named), -- hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false) -+ (cumulative_args_t cum, const function_arg_info &arg), -+ hook_bool_CUMULATIVE_ARGS_arg_info_false) - - DEFHOOK - (expand_builtin_saveregs, -@@ -4515,8 +4508,8 @@ pass all their arguments on the stack.\n\ - \n\ - The argument @var{args_so_far} points to the @code{CUMULATIVE_ARGS} data\n\ - structure, containing the values that are obtained after processing the\n\ --named arguments. The arguments @var{mode} and @var{type} describe the\n\ --last named argument---its machine mode and its data type as a tree node.\n\ -+named arguments. The argument @var{arg} describes the last of these named\n\ -+arguments.\n\ - \n\ - The target hook should do two things: first, push onto the stack all the\n\ - argument registers @emph{not} used for the named arguments, and second,\n\ -@@ -4536,7 +4529,7 @@ arguments of the function are being analyzed for the second time. This\n\ - happens for an inline function, which is not actually compiled until the\n\ - end of the source file. The hook @code{TARGET_SETUP_INCOMING_VARARGS} should\n\ - not generate any instructions in this case.", -- void, (cumulative_args_t args_so_far, machine_mode mode, tree type, -+ void, (cumulative_args_t args_so_far, const function_arg_info &arg, - int *pretend_args_size, int second_time), - default_setup_incoming_varargs) - -@@ -4579,15 +4572,6 @@ returned by function call into @var{slot}.", - void, (rtx slot, rtx bounds), - default_store_returned_bounds) - --DEFHOOK --(setup_incoming_vararg_bounds, -- "Use it to store bounds for anonymous register arguments stored\n\ --into the stack. Arguments meaning is similar to\n\ --@code{TARGET_SETUP_INCOMING_VARARGS}.", -- void, (cumulative_args_t args_so_far, machine_mode mode, tree type, -- int *pretend_args_size, int second_time), -- default_setup_incoming_vararg_bounds) -- - DEFHOOK - (call_args, - "While generating RTL for a function call, this target hook is invoked once\n\ -@@ -4668,11 +4652,11 @@ false.", - Need audit to verify that this is the case. */ - DEFHOOK - (must_pass_in_stack, -- "This target hook should return @code{true} if we should not pass @var{type}\n\ -+ "This target hook should return @code{true} if we should not pass @var{arg}\n\ - solely in registers. The file @file{expr.h} defines a\n\ - definition that is usually appropriate, refer to @file{expr.h} for additional\n\ - documentation.", -- bool, (machine_mode mode, const_tree type), -+ bool, (const function_arg_info &arg), - must_pass_in_stack_var_size_or_pad) - - /* Return true if type TYPE, mode MODE, which is passed by reference, -@@ -4691,8 +4675,8 @@ not be generated.\n\ - \n\ - The default version of this hook always returns false.", - bool, -- (cumulative_args_t cum, machine_mode mode, const_tree type, bool named), -- hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false) -+ (cumulative_args_t cum, const function_arg_info &arg), -+ hook_bool_CUMULATIVE_ARGS_arg_info_false) - - /* Return zero for arguments passed entirely on the stack or entirely - in registers. If passed in both, return the number of bytes passed -@@ -4715,8 +4699,8 @@ compiler when this occurs, and how many bytes should go in registers.\n\ - @code{TARGET_FUNCTION_ARG} for these arguments should return the first\n\ - register to be used by the caller for this argument; likewise\n\ - @code{TARGET_FUNCTION_INCOMING_ARG}, for the called function.", -- int, (cumulative_args_t cum, machine_mode mode, tree type, bool named), -- hook_int_CUMULATIVE_ARGS_mode_tree_bool_0) -+ int, (cumulative_args_t cum, const function_arg_info &arg), -+ hook_int_CUMULATIVE_ARGS_arg_info_0) - - /* Update the state in CA to advance past an argument in the - argument list. The values MODE, TYPE, and NAMED describe that -@@ -4724,8 +4708,7 @@ register to be used by the caller for this argument; likewise\n\ - DEFHOOK - (function_arg_advance, - "This hook updates the summarizer variable pointed to by @var{ca} to\n\ --advance past an argument in the argument list. The values @var{mode},\n\ --@var{type} and @var{named} describe that argument. Once this is done,\n\ -+advance past argument @var{arg} in the argument list. Once this is done,\n\ - the variable @var{cum} is suitable for analyzing the @emph{following}\n\ - argument with @code{TARGET_FUNCTION_ARG}, etc.\n\ - \n\ -@@ -4733,7 +4716,7 @@ This hook need not do anything if the argument in question was passed\n\ - on the stack. The compiler knows how to track the amount of stack space\n\ - used for arguments without any special help.", - void, -- (cumulative_args_t ca, machine_mode mode, const_tree type, bool named), -+ (cumulative_args_t ca, const function_arg_info &arg), - default_function_arg_advance) - - DEFHOOK -@@ -4770,17 +4753,9 @@ constant size shorter than an @code{int}, and upward otherwise.", - argument. */ - DEFHOOK - (function_arg, -- "Return an RTX indicating whether a function argument is passed in a\n\ --register and if so, which register.\n\ --\n\ --The arguments are @var{ca}, which summarizes all the previous\n\ --arguments; @var{mode}, the machine mode of the argument; @var{type},\n\ --the data type of the argument as a tree node or 0 if that is not known\n\ --(which happens for C support library functions); and @var{named},\n\ --which is @code{true} for an ordinary argument and @code{false} for\n\ --nameless arguments that correspond to @samp{@dots{}} in the called\n\ --function's prototype. @var{type} can be an incomplete type if a\n\ --syntax error has previously occurred.\n\ -+ "Return an RTX indicating whether function argument @var{arg} is passed\n\ -+in a register and if so, which register. Argument @var{ca} summarizes all\n\ -+the previous arguments.\n\ - \n\ - The return value is usually either a @code{reg} RTX for the hard\n\ - register in which to pass the argument, or zero to pass the argument\n\ -@@ -4826,8 +4801,7 @@ is not defined and @code{TARGET_FUNCTION_ARG} returns nonzero for such an\n\ - argument, the compiler will abort. If @code{REG_PARM_STACK_SPACE} is\n\ - defined, the argument will be computed in the stack and then loaded into\n\ - a register.", -- rtx, (cumulative_args_t ca, machine_mode mode, const_tree type, -- bool named), -+ rtx, (cumulative_args_t ca, const function_arg_info &arg), - default_function_arg) - - DEFHOOK -@@ -4849,8 +4823,7 @@ so that it can be used to pass special arguments.\n\ - \n\ - If @code{TARGET_FUNCTION_INCOMING_ARG} is not defined,\n\ - @code{TARGET_FUNCTION_ARG} serves both purposes.", -- rtx, (cumulative_args_t ca, machine_mode mode, const_tree type, -- bool named), -+ rtx, (cumulative_args_t ca, const function_arg_info &arg), - default_function_incoming_arg) - - DEFHOOK -@@ -4962,6 +4935,28 @@ If this hook is not defined, then FUNCTION_VALUE_REGNO_P will be used.", - bool, (const unsigned int regno), - default_function_value_regno_p) - -+DEFHOOK -+(fntype_abi, -+ "Return the ABI used by a function with type @var{type}; see the\n\ -+definition of @code{predefined_function_abi} for details of the ABI\n\ -+descriptor. Targets only need to define this hook if they support\n\ -+interoperability between several ABIs in the same translation unit.", -+ const predefined_function_abi &, (const_tree type), -+ NULL) -+ -+DEFHOOK -+(insn_callee_abi, -+ "This hook returns a description of the ABI used by the target of\n\ -+call instruction @var{insn}; see the definition of\n\ -+@code{predefined_function_abi} for details of the ABI descriptor.\n\ -+Only the global function @code{insn_callee_abi} should call this hook\n\ -+directly.\n\ -+\n\ -+Targets only need to define this hook if they support\n\ -+interoperability between several ABIs in the same translation unit.", -+ const predefined_function_abi &, (const rtx_insn *insn), -+ NULL) -+ - /* ??? Documenting this hook requires a GFDL license grant. */ - DEFHOOK_UNDOC - (internal_arg_pointer, -@@ -5811,32 +5806,27 @@ The default version of this hook always returns @code{true}.", - - DEFHOOK - (hard_regno_call_part_clobbered, -- "This hook should return true if @var{regno} is partly call-saved and\n\ --partly call-clobbered, and if a value of mode @var{mode} would be partly\n\ --clobbered by call instruction @var{insn}. If @var{insn} is NULL then it\n\ --should return true if any call could partly clobber the register.\n\ --For example, if the low 32 bits of @var{regno} are preserved across a call\n\ --but higher bits are clobbered, this hook should return true for a 64-bit\n\ --mode but false for a 32-bit mode.\n\ -+ "ABIs usually specify that calls must preserve the full contents\n\ -+of a particular register, or that calls can alter any part of a\n\ -+particular register. This information is captured by the target macro\n\ -+@code{CALL_REALLY_USED_REGISTERS}. However, some ABIs specify that calls\n\ -+must preserve certain bits of a particular register but can alter others.\n\ -+This hook should return true if this applies to at least one of the\n\ -+registers in @samp{(reg:@var{mode} @var{regno})}, and if as a result the\n\ -+call would alter part of the @var{mode} value. For example, if a call\n\ -+preserves the low 32 bits of a 64-bit hard register @var{regno} but can\n\ -+clobber the upper 32 bits, this hook should return true for a 64-bit mode\n\ -+but false for a 32-bit mode.\n\ -+\n\ -+The value of @var{abi_id} comes from the @code{predefined_function_abi}\n\ -+structure that describes the ABI of the call; see the definition of the\n\ -+structure for more details. If (as is usual) the target uses the same ABI\n\ -+for all functions in a translation unit, @var{abi_id} is always 0.\n\ - \n\ - The default implementation returns false, which is correct\n\ - for targets that don't have partly call-clobbered registers.", -- bool, (rtx_insn *insn, unsigned int regno, machine_mode mode), -- hook_bool_insn_uint_mode_false) -- --DEFHOOK --(return_call_with_max_clobbers, -- "This hook returns a pointer to the call that partially clobbers the\n\ --most registers. If a platform supports multiple ABIs where the registers\n\ --that are partially clobbered may vary, this function compares two\n\ --calls and returns a pointer to the one that clobbers the most registers.\n\ --If both calls clobber the same registers, @var{call_1} must be returned.\n\ --\n\ --The registers clobbered in different ABIs must be a proper subset or\n\ --superset of all other ABIs. @var{call_1} must always be a call insn,\n\ --call_2 may be NULL or a call insn.", -- rtx_insn *, (rtx_insn *call_1, rtx_insn *call_2), -- NULL) -+ bool, (unsigned int abi_id, unsigned int regno, machine_mode mode), -+ hook_bool_uint_uint_mode_false) - - DEFHOOK - (get_multilib_abi_name, -@@ -5844,20 +5834,6 @@ DEFHOOK - const char *, (void), - hook_constcharptr_void_null) - --DEFHOOK --(remove_extra_call_preserved_regs, -- "This hook removes registers from the set of call-clobbered registers\n\ -- in @var{used_regs} if, contrary to the default rules, something guarantees\n\ -- that @samp{insn} preserves those registers. For example, some targets\n\ -- support variant ABIs in which functions preserve more registers than\n\ -- normal functions would. Removing those extra registers from @var{used_regs}\n\ -- can lead to better register allocation.\n\ -- \n\ -- The default implementation does nothing, which is always safe.\n\ -- Defining the hook is purely an optimization.", -- void, (rtx_insn *insn, HARD_REG_SET *used_regs), -- default_remove_extra_call_preserved_regs) -- - /* Return the smallest number of different values for which it is best to - use a jump-table instead of a tree of conditional branches. */ - DEFHOOK -diff --git a/gcc/target.h b/gcc/target.h -index 057e6ae87..964629669 100644 ---- a/gcc/target.h -+++ b/gcc/target.h -@@ -149,6 +149,12 @@ struct ao_ref; - /* This is defined in tree-vectorizer.h. */ - struct _stmt_vec_info; - -+/* This is defined in calls.h. */ -+struct function_arg_info; -+ -+/* This is defined in function-abi.h. */ -+struct predefined_function_abi; -+ - /* These are defined in tree-vect-stmts.c. */ - extern tree stmt_vectype (struct _stmt_vec_info *); - extern bool stmt_in_inner_loop_p (struct _stmt_vec_info *); -diff --git a/gcc/targhooks.c b/gcc/targhooks.c -index 6396f6f4b..6f54de0d5 100644 ---- a/gcc/targhooks.c -+++ b/gcc/targhooks.c -@@ -193,11 +193,8 @@ default_expand_builtin_saveregs (void) - } - - void --default_setup_incoming_varargs (cumulative_args_t ca ATTRIBUTE_UNUSED, -- machine_mode mode ATTRIBUTE_UNUSED, -- tree type ATTRIBUTE_UNUSED, -- int *pretend_arg_size ATTRIBUTE_UNUSED, -- int second_time ATTRIBUTE_UNUSED) -+default_setup_incoming_varargs (cumulative_args_t, -+ const function_arg_info &, int *, int) - { - } - -@@ -323,22 +320,19 @@ default_cxx_get_cookie_size (tree type) - of the TARGET_PASS_BY_REFERENCE hook uses just MUST_PASS_IN_STACK. */ - - bool --hook_pass_by_reference_must_pass_in_stack (cumulative_args_t c ATTRIBUTE_UNUSED, -- machine_mode mode ATTRIBUTE_UNUSED, const_tree type ATTRIBUTE_UNUSED, -- bool named_arg ATTRIBUTE_UNUSED) -+hook_pass_by_reference_must_pass_in_stack (cumulative_args_t, -+ const function_arg_info &arg) - { -- return targetm.calls.must_pass_in_stack (mode, type); -+ return targetm.calls.must_pass_in_stack (arg); - } - - /* Return true if a parameter follows callee copies conventions. This - version of the hook is true for all named arguments. */ - - bool --hook_callee_copies_named (cumulative_args_t ca ATTRIBUTE_UNUSED, -- machine_mode mode ATTRIBUTE_UNUSED, -- const_tree type ATTRIBUTE_UNUSED, bool named) -+hook_callee_copies_named (cumulative_args_t, const function_arg_info &arg) - { -- return named; -+ return arg.named; - } - - /* Emit to STREAM the assembler syntax for insn operand X. */ -@@ -681,16 +675,6 @@ default_builtin_md_vectorized_function (tree, tree, tree) - return NULL_TREE; - } - --/* Vectorized conversion. */ -- --tree --default_builtin_vectorized_conversion (unsigned int code ATTRIBUTE_UNUSED, -- tree dest_type ATTRIBUTE_UNUSED, -- tree src_type ATTRIBUTE_UNUSED) --{ -- return NULL_TREE; --} -- - /* Default vectorizer cost model values. */ - - int -@@ -737,28 +721,22 @@ default_builtin_reciprocal (tree) - } - - bool --hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false ( -- cumulative_args_t ca ATTRIBUTE_UNUSED, -- machine_mode mode ATTRIBUTE_UNUSED, -- const_tree type ATTRIBUTE_UNUSED, bool named ATTRIBUTE_UNUSED) -+hook_bool_CUMULATIVE_ARGS_arg_info_false (cumulative_args_t, -+ const function_arg_info &) - { - return false; - } - - bool --hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true ( -- cumulative_args_t ca ATTRIBUTE_UNUSED, -- machine_mode mode ATTRIBUTE_UNUSED, -- const_tree type ATTRIBUTE_UNUSED, bool named ATTRIBUTE_UNUSED) -+hook_bool_CUMULATIVE_ARGS_arg_info_true (cumulative_args_t, -+ const function_arg_info &) - { - return true; - } - - int --hook_int_CUMULATIVE_ARGS_mode_tree_bool_0 ( -- cumulative_args_t ca ATTRIBUTE_UNUSED, -- machine_mode mode ATTRIBUTE_UNUSED, -- tree type ATTRIBUTE_UNUSED, bool named ATTRIBUTE_UNUSED) -+hook_int_CUMULATIVE_ARGS_arg_info_0 (cumulative_args_t, -+ const function_arg_info &) - { - return 0; - } -@@ -770,10 +748,7 @@ hook_void_CUMULATIVE_ARGS_tree (cumulative_args_t ca ATTRIBUTE_UNUSED, - } - - void --default_function_arg_advance (cumulative_args_t ca ATTRIBUTE_UNUSED, -- machine_mode mode ATTRIBUTE_UNUSED, -- const_tree type ATTRIBUTE_UNUSED, -- bool named ATTRIBUTE_UNUSED) -+default_function_arg_advance (cumulative_args_t, const function_arg_info &) - { - gcc_unreachable (); - } -@@ -814,19 +789,13 @@ default_function_arg_padding (machine_mode mode, const_tree type) - } - - rtx --default_function_arg (cumulative_args_t ca ATTRIBUTE_UNUSED, -- machine_mode mode ATTRIBUTE_UNUSED, -- const_tree type ATTRIBUTE_UNUSED, -- bool named ATTRIBUTE_UNUSED) -+default_function_arg (cumulative_args_t, const function_arg_info &) - { - gcc_unreachable (); - } - - rtx --default_function_incoming_arg (cumulative_args_t ca ATTRIBUTE_UNUSED, -- machine_mode mode ATTRIBUTE_UNUSED, -- const_tree type ATTRIBUTE_UNUSED, -- bool named ATTRIBUTE_UNUSED) -+default_function_incoming_arg (cumulative_args_t, const function_arg_info &) - { - gcc_unreachable (); - } -@@ -1061,12 +1030,6 @@ default_return_pops_args (tree, tree, poly_int64) - return 0; - } - --reg_class_t --default_branch_target_register_class (void) --{ -- return NO_REGS; --} -- - reg_class_t - default_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED, - reg_class_t cl, -@@ -1732,9 +1695,9 @@ get_move_ratio (bool speed_p ATTRIBUTE_UNUSED) - #ifdef MOVE_RATIO - move_ratio = (unsigned int) MOVE_RATIO (speed_p); - #else --#if defined (HAVE_movmemqi) || defined (HAVE_movmemhi) || defined (HAVE_movmemsi) || defined (HAVE_movmemdi) || defined (HAVE_movmemti) -+#if defined (HAVE_cpymemqi) || defined (HAVE_cpymemhi) || defined (HAVE_cpymemsi) || defined (HAVE_cpymemdi) || defined (HAVE_cpymemti) - move_ratio = 2; --#else /* No movmem patterns, pick a default. */ -+#else /* No cpymem patterns, pick a default. */ - move_ratio = ((speed_p) ? 15 : 3); - #endif - #endif -@@ -1742,7 +1705,7 @@ get_move_ratio (bool speed_p ATTRIBUTE_UNUSED) - } - - /* Return TRUE if the move_by_pieces/set_by_pieces infrastructure should be -- used; return FALSE if the movmem/setmem optab should be expanded, or -+ used; return FALSE if the cpymem/setmem optab should be expanded, or - a call to memcpy emitted. */ - - bool -@@ -1941,7 +1904,7 @@ default_dwarf_frame_reg_mode (int regno) - { - machine_mode save_mode = reg_raw_mode[regno]; - -- if (targetm.hard_regno_call_part_clobbered (NULL, regno, save_mode)) -+ if (targetm.hard_regno_call_part_clobbered (0, regno, save_mode)) - save_mode = choose_hard_reg_mode (regno, 1, true); - return save_mode; - } -@@ -2163,7 +2126,7 @@ std_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, - if (ARGS_GROW_DOWNWARD) - gcc_unreachable (); - -- indirect = pass_by_reference (NULL, TYPE_MODE (type), type, false); -+ indirect = pass_va_arg_by_reference (type); - if (indirect) - type = build_pointer_type (type); - -@@ -2260,15 +2223,6 @@ std_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, - return build_va_arg_indirect_ref (addr); - } - --void --default_setup_incoming_vararg_bounds (cumulative_args_t ca ATTRIBUTE_UNUSED, -- machine_mode mode ATTRIBUTE_UNUSED, -- tree type ATTRIBUTE_UNUSED, -- int *pretend_arg_size ATTRIBUTE_UNUSED, -- int second_time ATTRIBUTE_UNUSED) --{ --} -- - /* An implementation of TARGET_CAN_USE_DOLOOP_P for targets that do - not support nested low-overhead loops. */ - -@@ -2385,9 +2339,4 @@ default_speculation_safe_value (machine_mode mode ATTRIBUTE_UNUSED, - return result; - } - --void --default_remove_extra_call_preserved_regs (rtx_insn *, HARD_REG_SET *) --{ --} -- - #include "gt-targhooks.h" -diff --git a/gcc/targhooks.h b/gcc/targhooks.h -index 2d5991908..e5e803c33 100644 ---- a/gcc/targhooks.h -+++ b/gcc/targhooks.h -@@ -40,7 +40,9 @@ extern machine_mode default_cc_modes_compatible (machine_mode, - extern bool default_return_in_memory (const_tree, const_tree); - - extern rtx default_expand_builtin_saveregs (void); --extern void default_setup_incoming_varargs (cumulative_args_t, machine_mode, tree, int *, int); -+extern void default_setup_incoming_varargs (cumulative_args_t, -+ const function_arg_info &, -+ int *, int); - extern rtx default_builtin_setjmp_frame_value (void); - extern bool default_pretend_outgoing_varargs_named (cumulative_args_t); - -@@ -63,9 +65,9 @@ extern tree default_cxx_guard_type (void); - extern tree default_cxx_get_cookie_size (tree); - - extern bool hook_pass_by_reference_must_pass_in_stack -- (cumulative_args_t, machine_mode mode, const_tree, bool); -+ (cumulative_args_t, const function_arg_info &); - extern bool hook_callee_copies_named -- (cumulative_args_t ca, machine_mode, const_tree, bool); -+ (cumulative_args_t ca, const function_arg_info &); - - extern void default_print_operand (FILE *, rtx, int); - extern void default_print_operand_address (FILE *, machine_mode, rtx); -@@ -90,8 +92,6 @@ extern const char * default_invalid_within_doloop (const rtx_insn *); - extern tree default_builtin_vectorized_function (unsigned int, tree, tree); - extern tree default_builtin_md_vectorized_function (tree, tree, tree); - --extern tree default_builtin_vectorized_conversion (unsigned int, tree, tree); -- - extern int default_builtin_vectorization_cost (enum vect_cost_for_stmt, tree, int); - - extern tree default_builtin_reciprocal (tree); -@@ -135,24 +135,23 @@ extern void default_goacc_reduction (gcall *); - extern bool hook_bool_CUMULATIVE_ARGS_false (cumulative_args_t); - extern bool hook_bool_CUMULATIVE_ARGS_true (cumulative_args_t); - --extern bool hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false -- (cumulative_args_t, machine_mode, const_tree, bool); --extern bool hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true -- (cumulative_args_t, machine_mode, const_tree, bool); --extern int hook_int_CUMULATIVE_ARGS_mode_tree_bool_0 -- (cumulative_args_t, machine_mode, tree, bool); -+extern bool hook_bool_CUMULATIVE_ARGS_arg_info_false -+ (cumulative_args_t, const function_arg_info &); -+extern bool hook_bool_CUMULATIVE_ARGS_arg_info_true -+ (cumulative_args_t, const function_arg_info &); -+extern int hook_int_CUMULATIVE_ARGS_arg_info_0 -+ (cumulative_args_t, const function_arg_info &); - extern void hook_void_CUMULATIVE_ARGS_tree - (cumulative_args_t, tree); - extern const char *hook_invalid_arg_for_unprototyped_fn - (const_tree, const_tree, const_tree); - extern void default_function_arg_advance -- (cumulative_args_t, machine_mode, const_tree, bool); -+ (cumulative_args_t, const function_arg_info &); - extern HOST_WIDE_INT default_function_arg_offset (machine_mode, const_tree); - extern pad_direction default_function_arg_padding (machine_mode, const_tree); --extern rtx default_function_arg -- (cumulative_args_t, machine_mode, const_tree, bool); --extern rtx default_function_incoming_arg -- (cumulative_args_t, machine_mode, const_tree, bool); -+extern rtx default_function_arg (cumulative_args_t, const function_arg_info &); -+extern rtx default_function_incoming_arg (cumulative_args_t, -+ const function_arg_info &); - extern unsigned int default_function_arg_boundary (machine_mode, - const_tree); - extern unsigned int default_function_arg_round_boundary (machine_mode, -@@ -165,7 +164,6 @@ extern rtx default_internal_arg_pointer (void); - extern rtx default_static_chain (const_tree, bool); - extern void default_trampoline_init (rtx, tree, rtx); - extern poly_int64 default_return_pops_args (tree, tree, poly_int64); --extern reg_class_t default_branch_target_register_class (void); - extern reg_class_t default_ira_change_pseudo_allocno_class (int, reg_class_t, - reg_class_t); - extern bool default_lra_p (void); -@@ -266,11 +264,6 @@ extern rtx default_load_bounds_for_arg (rtx, rtx, rtx); - extern void default_store_bounds_for_arg (rtx, rtx, rtx, rtx); - extern rtx default_load_returned_bounds (rtx); - extern void default_store_returned_bounds (rtx,rtx); --extern void default_setup_incoming_vararg_bounds (cumulative_args_t ca ATTRIBUTE_UNUSED, -- machine_mode mode ATTRIBUTE_UNUSED, -- tree type ATTRIBUTE_UNUSED, -- int *pretend_arg_size ATTRIBUTE_UNUSED, -- int second_time ATTRIBUTE_UNUSED); - extern bool default_optab_supported_p (int, machine_mode, machine_mode, - optimization_type); - extern unsigned int default_max_noce_ifcvt_seq_cost (edge); -@@ -287,7 +280,5 @@ extern tree default_preferred_else_value (unsigned, tree, unsigned, tree *); - extern bool default_have_speculation_safe_value (bool); - extern bool speculation_safe_value_not_needed (bool); - extern rtx default_speculation_safe_value (machine_mode, rtx, rtx, rtx); --extern void default_remove_extra_call_preserved_regs (rtx_insn *, -- HARD_REG_SET *); - - #endif /* GCC_TARGHOOKS_H */ -diff --git a/gcc/testsuite/c-c++-common/guality/Og-dce-1.c b/gcc/testsuite/c-c++-common/guality/Og-dce-1.c -new file mode 100644 -index 000000000..a859e3252 ---- /dev/null -+++ b/gcc/testsuite/c-c++-common/guality/Og-dce-1.c -@@ -0,0 +1,14 @@ -+/* { dg-do run } */ -+/* { dg-options "-g" } */ -+ -+int *__attribute__((noipa)) consume (int *ptr) { return ptr; } -+ -+int -+main (void) -+{ -+ int x; -+ int *volatile ptr = consume (&x); -+ x = 0; -+ x = 1; /* { dg-final { gdb-test . "*ptr" "0" } } */ -+ return 0; /* { dg-final { gdb-test . "*ptr" "1" } } */ -+} -diff --git a/gcc/testsuite/c-c++-common/guality/Og-dce-2.c b/gcc/testsuite/c-c++-common/guality/Og-dce-2.c -new file mode 100644 -index 000000000..3df2c7921 ---- /dev/null -+++ b/gcc/testsuite/c-c++-common/guality/Og-dce-2.c -@@ -0,0 +1,19 @@ -+/* { dg-do run } */ -+/* { dg-options "-g" } */ -+ -+struct s { int a, b, c, d; }; -+ -+struct s gs1 = { 1, 2, 3, 4 }; -+struct s gs2 = { 5, 6, 7, 8 }; -+ -+struct s *__attribute__((noipa)) consume (struct s *ptr) { return ptr; } -+ -+int -+main (void) -+{ -+ struct s x; -+ struct s *volatile ptr = consume (&x); -+ x = gs1; -+ x = gs2; /* { dg-final { gdb-test . "ptr->a" "1" } } */ -+ return 0; /* { dg-final { gdb-test . "ptr->a" "5" } } */ -+} -diff --git a/gcc/testsuite/c-c++-common/guality/Og-dce-3.c b/gcc/testsuite/c-c++-common/guality/Og-dce-3.c -new file mode 100644 -index 000000000..fa6186a73 ---- /dev/null -+++ b/gcc/testsuite/c-c++-common/guality/Og-dce-3.c -@@ -0,0 +1,29 @@ -+/* { dg-do run } */ -+/* { dg-options "-g" } */ -+ -+volatile int amount = 10; -+ -+void __attribute__((noipa)) -+do_something (int *ptr) -+{ -+ *ptr += 10; -+} -+ -+int __attribute__((noipa)) -+foo (int count) -+{ -+ int x = 1; -+ for (int i = 0; i < count; ++i) -+ do_something (&x); /* { dg-final { gdb-test . "x" "1" } } */ -+ int res = x; /* { dg-final { gdb-test . "x" "101" } } */ -+ x = res + 1; -+ return res; /* { dg-final { gdb-test . "x" "102" } } */ -+ -+} -+ -+int -+main (void) -+{ -+ foo (10); -+ return 0; -+} -diff --git a/gcc/testsuite/c-c++-common/guality/Og-global-dse-1.c b/gcc/testsuite/c-c++-common/guality/Og-global-dse-1.c -new file mode 100644 -index 000000000..3d4b4e60e ---- /dev/null -+++ b/gcc/testsuite/c-c++-common/guality/Og-global-dse-1.c -@@ -0,0 +1,17 @@ -+/* { dg-do run } */ -+/* { dg-options "-g" } */ -+ -+struct s { int i, j; }; -+struct s gs1, gs2 = { 3, 4 }; -+ -+void __attribute__((noipa)) consume (void) {}; -+ -+int -+main (void) -+{ -+ gs1.i = 1; -+ gs1.j = 2; /* { dg-final { gdb-test . "gs1.i" "1" } } */ -+ gs1 = gs2; /* { dg-final { gdb-test . "gs1.j" "2" } } */ -+ consume (); /* { dg-final { gdb-test . "gs1.i" "3" } } */ -+ return 0; /* { dg-final { gdb-test . "gs1.j" "4" } } */ -+} -diff --git a/gcc/testsuite/c-c++-common/guality/Og-static-wo-1.c b/gcc/testsuite/c-c++-common/guality/Og-static-wo-1.c -new file mode 100644 -index 000000000..a4c7f3067 ---- /dev/null -+++ b/gcc/testsuite/c-c++-common/guality/Og-static-wo-1.c -@@ -0,0 +1,15 @@ -+/* { dg-do run } */ -+/* { dg-options "-g" } */ -+ -+#include "../../gcc.dg/nop.h" -+ -+static int x = 0; -+ -+int -+main (void) -+{ -+ asm volatile (NOP); /* { dg-final { gdb-test . "x" "0" } } */ -+ x = 1; -+ asm volatile (NOP); /* { dg-final { gdb-test . "x" "1" } } */ -+ return 0; -+} -diff --git a/gcc/testsuite/g++.dg/abi/mangle-neon-aarch64.C b/gcc/testsuite/g++.dg/abi/mangle-neon-aarch64.C -index 5740c0281..50c1452ed 100644 ---- a/gcc/testsuite/g++.dg/abi/mangle-neon-aarch64.C -+++ b/gcc/testsuite/g++.dg/abi/mangle-neon-aarch64.C -@@ -14,6 +14,7 @@ void f4 (uint16x4_t a) {} - void f5 (uint32x2_t a) {} - void f23 (uint64x1_t a) {} - void f61 (float16x4_t a) {} -+void f62 (bfloat16x4_t a) {} - void f6 (float32x2_t a) {} - void f7 (poly8x8_t a) {} - void f8 (poly16x4_t a) {} -@@ -27,6 +28,7 @@ void f14 (uint16x8_t a) {} - void f15 (uint32x4_t a) {} - void f16 (uint64x2_t a) {} - void f171 (float16x8_t a) {} -+void f172 (bfloat16x8_t a) {} - void f17 (float32x4_t a) {} - void f18 (float64x2_t a) {} - void f19 (poly8x16_t a) {} -@@ -45,6 +47,7 @@ void g1 (int8x16_t, int8x16_t) {} - // { dg-final { scan-assembler "_Z2f512__Uint32x2_t:" } } - // { dg-final { scan-assembler "_Z3f2312__Uint64x1_t:" } } - // { dg-final { scan-assembler "_Z3f6113__Float16x4_t:" } } -+// { dg-final { scan-assembler "_Z3f6214__Bfloat16x4_t:" } } - // { dg-final { scan-assembler "_Z2f613__Float32x2_t:" } } - // { dg-final { scan-assembler "_Z2f711__Poly8x8_t:" } } - // { dg-final { scan-assembler "_Z2f812__Poly16x4_t:" } } -@@ -57,6 +60,7 @@ void g1 (int8x16_t, int8x16_t) {} - // { dg-final { scan-assembler "_Z3f1512__Uint32x4_t:" } } - // { dg-final { scan-assembler "_Z3f1612__Uint64x2_t:" } } - // { dg-final { scan-assembler "_Z4f17113__Float16x8_t:" } } -+// { dg-final { scan-assembler "_Z4f17214__Bfloat16x8_t:" } } - // { dg-final { scan-assembler "_Z3f1713__Float32x4_t:" } } - // { dg-final { scan-assembler "_Z3f1813__Float64x2_t:" } } - // { dg-final { scan-assembler "_Z3f1912__Poly8x16_t:" } } -diff --git a/gcc/testsuite/g++.dg/diagnostic/aka4.C b/gcc/testsuite/g++.dg/diagnostic/aka4.C -new file mode 100644 -index 000000000..da8c57964 ---- /dev/null -+++ b/gcc/testsuite/g++.dg/diagnostic/aka4.C -@@ -0,0 +1,9 @@ -+typedef unsigned int myvec __attribute__((vector_size (16))); -+ -+void f (float x) -+{ -+ myvec y = x; // { dg-error {cannot convert 'float' to 'myvec' {aka '__vector\([48]\) unsigned int'} in initialization} } -+ myvec *ptr = &x; // { dg-error {cannot convert 'float\*' to 'myvec\*' {aka '__vector\([48]\) unsigned int\*'} in initialization} } -+ const myvec *const_ptr = &x; // { dg-error {cannot convert 'float\*' to 'const myvec\*' {aka 'const __vector\([48]\) unsigned int\*'} in initialization} } -+ volatile myvec *volatile_ptr = &x; // { dg-error {cannot convert 'float\*' to 'volatile myvec\*' {aka 'volatile __vector\([48]\) unsigned int\*'} in initialization} } -+} -diff --git a/gcc/testsuite/g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C b/gcc/testsuite/g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C -new file mode 100644 -index 000000000..5426a1814 ---- /dev/null -+++ b/gcc/testsuite/g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C -@@ -0,0 +1,13 @@ -+/* { dg-do compile { target aarch64*-*-* } } */ -+ -+/* Test mangling */ -+ -+/* { dg-final { scan-assembler "\t.global\t_Z1fPu6__bf16" } } */ -+void f (__bf16 *x) { } -+ -+/* { dg-final { scan-assembler "\t.global\t_Z1gPu6__bf16S_" } } */ -+void g (__bf16 *x, __bf16 *y) { } -+ -+/* { dg-final { scan-assembler "\t.global\t_ZN1SIu6__bf16u6__bf16E1iE" } } */ -+template struct S { static int i; }; -+template <> int S<__bf16, __bf16>::i = 3; -diff --git a/gcc/testsuite/g++.dg/guality/guality.exp b/gcc/testsuite/g++.dg/guality/guality.exp -index 757b20b61..33571f1f2 100644 ---- a/gcc/testsuite/g++.dg/guality/guality.exp -+++ b/gcc/testsuite/g++.dg/guality/guality.exp -@@ -65,8 +65,22 @@ if {[check_guality " - return 0; - } - "]} { -- gcc-dg-runtest [lsort [glob $srcdir/$subdir/*.C]] "" "" -- gcc-dg-runtest [lsort [glob $srcdir/c-c++-common/guality/*.c]] "" "" -+ set general [list] -+ set Og [list] -+ foreach file [lsort [glob $srcdir/c-c++-common/guality/*.c]] { -+ switch -glob -- [file tail $file] { -+ Og-* { lappend Og $file } -+ * { lappend general $file } -+ } -+ } -+ -+ gcc-dg-runtest [lsort [glob $srcdir/$subdir/*.C]] "" "" -+ gcc-dg-runtest $general "" "" -+ set-torture-options \ -+ [list "-O0" "-Og"] \ -+ [list {}] \ -+ [list "-Og -flto"] -+ gcc-dg-runtest $Og "" "" - } - - if [info exists guality_gdb_name] { -diff --git a/gcc/testsuite/g++.dg/ipa/pr93763.C b/gcc/testsuite/g++.dg/ipa/pr93763.C -index 61117108e..13ab2d57f 100644 ---- a/gcc/testsuite/g++.dg/ipa/pr93763.C -+++ b/gcc/testsuite/g++.dg/ipa/pr93763.C -@@ -1,4 +1,4 @@ --/* { dg-do compile } */ -+/* { dg-do compile { target c++11 } } */ - /* { dg-options "-O3" } */ - - struct search_param { -diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr53844.C b/gcc/testsuite/g++.dg/tree-ssa/pr53844.C -index 954cc71b4..ab9879f6a 100644 ---- a/gcc/testsuite/g++.dg/tree-ssa/pr53844.C -+++ b/gcc/testsuite/g++.dg/tree-ssa/pr53844.C -@@ -1,5 +1,5 @@ - // { dg-do compile } --// { dg-options "-O2 -fdump-tree-optimized-vops" } -+// { dg-options "-O2 -fdump-tree-optimized-vops -fno-inline-functions --param max-inline-insns-single-O2=200" } - - struct VBase; - -diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr61034.C b/gcc/testsuite/g++.dg/tree-ssa/pr61034.C -index 870b23721..2e3dfecac 100644 ---- a/gcc/testsuite/g++.dg/tree-ssa/pr61034.C -+++ b/gcc/testsuite/g++.dg/tree-ssa/pr61034.C -@@ -1,5 +1,5 @@ - // { dg-do compile } --// { dg-options "-O2 -fdump-tree-fre3 -fdump-tree-optimized -fdelete-null-pointer-checks" } -+// { dg-options "-O2 -fdump-tree-fre3 -fdump-tree-optimized -fdelete-null-pointer-checks --param early-inlining-insns-O2=14" } - - #define assume(x) if(!(x))__builtin_unreachable() - -diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr8781.C b/gcc/testsuite/g++.dg/tree-ssa/pr8781.C -index 1f115b2b2..5bc1ef035 100644 ---- a/gcc/testsuite/g++.dg/tree-ssa/pr8781.C -+++ b/gcc/testsuite/g++.dg/tree-ssa/pr8781.C -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-O -fno-tree-sra -fdump-tree-fre1" } */ -+/* { dg-options "-O -fno-tree-sra -fdump-tree-fre1 --param early-inlining-insns-O2=14" } */ - - int f(); - -diff --git a/gcc/testsuite/g++.dg/warn/Wstringop-truncation-1.C b/gcc/testsuite/g++.dg/warn/Wstringop-truncation-1.C -index 830660197..49dde0a65 100644 ---- a/gcc/testsuite/g++.dg/warn/Wstringop-truncation-1.C -+++ b/gcc/testsuite/g++.dg/warn/Wstringop-truncation-1.C -@@ -1,7 +1,7 @@ - /* PR/tree-optimization/84480 - bogus -Wstringop-truncation despite - assignment with an inlined string literal - { dg-do compile } -- { dg-options "-O2 -Wstringop-truncation" } */ -+ { dg-options "-O2 -Wstringop-truncation --param early-inlining-insns-O2=14" } */ - - #include - -diff --git a/gcc/testsuite/g++.target/aarch64/bfloat_cpp_typecheck.C b/gcc/testsuite/g++.target/aarch64/bfloat_cpp_typecheck.C -new file mode 100644 -index 000000000..9203d91f8 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/bfloat_cpp_typecheck.C -@@ -0,0 +1,14 @@ -+/* { dg-do assemble { target { aarch64*-*-* } } } */ -+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ -+/* { dg-add-options arm_v8_2a_bf16_neon } */ -+/* { dg-additional-options "-O3 --save-temps" } */ -+ -+#include -+ -+void foo (void) -+{ -+ bfloat16_t (); /* { dg-bogus {invalid conversion to type 'bfloat16_t'} "" { xfail *-*-* } } */ -+ bfloat16_t a = bfloat16_t(); /* { dg-bogus {invalid conversion to type 'bfloat16_t'} "" { xfail *-*-* } } */ -+ bfloat16_t (0x1234); /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ bfloat16_t (0.1); /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+} -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/aarch64-sve-acle-asm.exp b/gcc/testsuite/g++.target/aarch64/sve/acle/aarch64-sve-acle-asm.exp -new file mode 100644 -index 000000000..e9d624ff8 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/aarch64-sve-acle-asm.exp -@@ -0,0 +1,83 @@ -+# Assembly-based regression-test driver for the SVE ACLE -+# Copyright (C) 2009-2019 Free Software Foundation, Inc. -+# -+# This file is part of GCC. -+# -+# GCC is free software; you can redistribute it and/or modify it -+# under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 3, or (at your option) -+# any later version. -+# -+# GCC is distributed in the hope that it will be useful, but -+# WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+# General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with GCC; see the file COPYING3. If not see -+# . */ -+ -+# GCC testsuite that uses the `dg.exp' driver. -+ -+# Exit immediately if this isn't an AArch64 target. -+if { ![istarget aarch64*-*-*] } { -+ return -+} -+ -+# Load support procs. -+load_lib g++-dg.exp -+ -+# Initialize `dg'. -+dg-init -+ -+# Force SVE if we're not testing it already. -+if { [check_effective_target_aarch64_sve] } { -+ set sve_flags "" -+} else { -+ set sve_flags "-march=armv8.2-a+sve" -+} -+ -+global gcc_runtest_parallelize_limit_minor -+if { [info exists gcc_runtest_parallelize_limit_minor] } { -+ set old_limit_minor $gcc_runtest_parallelize_limit_minor -+ set gcc_runtest_parallelize_limit_minor 1 -+} -+ -+torture-init -+set-torture-options { -+ "-std=c++98 -O0 -g" -+ "-std=c++98 -O1 -g" -+ "-std=c++11 -O2 -g" -+ "-std=c++14 -O3 -g" -+ "-std=c++17 -Og -g" -+ "-std=c++2a -Os -g" -+ "-std=gnu++98 -O2 -fno-schedule-insns -DCHECK_ASM --save-temps" -+ "-std=gnu++11 -Ofast -g" -+ "-std=gnu++17 -O3 -g" -+ "-std=gnu++2a -O0 -g" -+} { -+ "-DTEST_FULL" -+ "-DTEST_OVERLOADS" -+} -+ -+# Main loop. -+set gcc_subdir [string replace $subdir 0 2 gcc] -+set files [glob -nocomplain $srcdir/$gcc_subdir/asm/*.c] -+set save-dg-do-what-default ${dg-do-what-default} -+if { [check_effective_target_aarch64_asm_sve_ok] -+ && [check_effective_target_aarch64_variant_pcs] } { -+ set dg-do-what-default assemble -+} else { -+ set dg-do-what-default compile -+} -+gcc-dg-runtest [lsort $files] "" "$sve_flags -fno-ipa-icf" -+set dg-do-what-default ${save-dg-do-what-default} -+ -+torture-finish -+ -+if { [info exists gcc_runtest_parallelize_limit_minor] } { -+ set gcc_runtest_parallelize_limit_minor $old_limit_minor -+} -+ -+# All done. -+dg-finish -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/aarch64-sve-acle.exp b/gcc/testsuite/g++.target/aarch64/sve/acle/aarch64-sve-acle.exp -new file mode 100644 -index 000000000..54c43a3ac ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/aarch64-sve-acle.exp -@@ -0,0 +1,55 @@ -+# Specific regression driver for AArch64 SVE. -+# Copyright (C) 2009-2019 Free Software Foundation, Inc. -+# Contributed by ARM Ltd. -+# -+# This file is part of GCC. -+# -+# GCC is free software; you can redistribute it and/or modify it -+# under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 3, or (at your option) -+# any later version. -+# -+# GCC is distributed in the hope that it will be useful, but -+# WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+# General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with GCC; see the file COPYING3. If not see -+# . */ -+ -+# GCC testsuite that uses the `dg.exp' driver. -+ -+# Exit immediately if this isn't an AArch64 target. -+if {![istarget aarch64*-*-*] } { -+ return -+} -+ -+# Load support procs. -+load_lib g++-dg.exp -+ -+# If a testcase doesn't have special options, use these. -+global DEFAULT_CXXFLAGS -+if ![info exists DEFAULT_CXXFLAGS] then { -+ set DEFAULT_CXXFLAGS " -pedantic-errors -Wno-long-long" -+} -+ -+# Initialize `dg'. -+dg-init -+ -+# Force SVE if we're not testing it already. -+if { [check_effective_target_aarch64_sve] } { -+ set sve_flags "" -+} else { -+ set sve_flags "-march=armv8.2-a+sve" -+} -+ -+# Main loop. -+set gcc_subdir [string replace $subdir 0 2 gcc] -+set files [glob -nocomplain \ -+ "$srcdir/$gcc_subdir/general/*.c" \ -+ "$srcdir/$subdir/general-c++/*.\[cC\]"] -+dg-runtest [lsort $files] "$sve_flags" $DEFAULT_CXXFLAGS -+ -+# All done. -+dg-finish -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/add_1.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/add_1.C -new file mode 100644 -index 000000000..44aa10e20 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/add_1.C -@@ -0,0 +1,9 @@ -+/* { dg-do compile } */ -+ -+#include "add_1.h" -+ -+svuint8_t -+f1 (svbool_t pg, svuint8_t x, svint8_t y) -+{ -+ return svadd_u8_x (pg, x, y); /* { dg-error "cannot convert 'svint8_t' to 'svuint8_t'" } */ -+} -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/add_1.h b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/add_1.h -new file mode 100644 -index 000000000..d441328a3 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/add_1.h -@@ -0,0 +1,2 @@ -+#pragma GCC system_header -+#pragma GCC aarch64 "arm_sve.h" /* { dg-message "initializing argument 3" } */ -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/add_2.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/add_2.C -new file mode 100644 -index 000000000..fcfb0f489 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/add_2.C -@@ -0,0 +1,14 @@ -+/* { dg-do compile } */ -+ -+#include "add_2.h" -+ -+void -+f1 (svbool_t pg, svuint8_t x, svint8_t y) -+{ -+ svadd_x (pg, x); /* { dg-error {no matching function for call to 'svadd_x\(svbool_t&, svuint8_t&\)'} } */ -+ svadd_x (pg, x, x, x); /* { dg-error {no matching function for call to 'svadd_x\(svbool_t&, svuint8_t&, svuint8_t&, svuint8_t&\)'} } */ -+ svadd_x (x, x, x); /* { dg-error {no matching function for call to 'svadd_x\(svuint8_t&, svuint8_t&, svuint8_t&\)'} } */ -+ svadd_x (pg, pg, pg); /* { dg-error {no matching function for call to 'svadd_x\(svbool_t&, svbool_t&, svbool_t&\)'} } */ -+ svadd_x (pg, 1, x); /* { dg-error {no matching function for call to 'svadd_x\(svbool_t&, int, svuint8_t&\)'} } */ -+ svadd_x (pg, x, y); /* { dg-error {no matching function for call to 'svadd_x\(svbool_t&, svuint8_t&, svint8_t&\)'} } */ -+} -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/add_2.h b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/add_2.h -new file mode 100644 -index 000000000..2b3a520d3 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/add_2.h -@@ -0,0 +1,9 @@ -+#pragma GCC system_header -+#pragma GCC aarch64 "arm_sve.h" -+/* { dg-message {note: candidate: 'svfloat16_t svadd_x\(svbool_t, svfloat16_t, svfloat16_t\)'} "" { target *-*-* } 3 } */ -+/* { dg-message {note: *candidate expects 3 arguments, 2 provided} "" { target *-*-* } 3 } */ -+/* { dg-message {note: *candidate expects 3 arguments, 4 provided} "" { target *-*-* } 3 } */ -+/* { dg-message {note: *no known conversion for argument 1 from 'svuint8_t' to 'svbool_t'} "" { target *-*-* } 3 } */ -+/* { dg-message {note: *no known conversion for argument 2 from 'svbool_t' to 'svfloat16_t'} "" { target *-*-* } 3 } */ -+/* { dg-message {note: *no known conversion for argument 2 from 'int' to 'svfloat16_t'} "" { target *-*-* } 3 } */ -+/* { dg-message {note: *no known conversion for argument 2 from 'svuint8_t' to 'svfloat16_t'} "" { target *-*-* } 3 } */ -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/add_3.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/add_3.C -new file mode 100644 -index 000000000..1d811fc76 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/add_3.C -@@ -0,0 +1,20 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -fdump-tree-optimized -fnon-call-exceptions" } */ -+ -+#include -+ -+svint8_t -+foo (svbool_t pg, svint8_t a, svint8_t b) -+{ -+ try -+ { -+ a = svadd_m (pg, a, b); -+ } -+ catch (...) -+ { -+ a = b; -+ } -+ return a; -+} -+ -+/* { dg-final { scan-tree-dump-not {__cxa_begin_catch} "optimized" } } */ -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/asrd_1.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/asrd_1.C -new file mode 100644 -index 000000000..a73934f56 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/asrd_1.C -@@ -0,0 +1,39 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-std=c++11 -Wall -Wextra" } */ -+ -+#include -+ -+constexpr uint64_t const_add (uint64_t a, uint64_t b) { return a + b; } -+uint64_t add (uint64_t a, uint64_t b) { return a + b; } -+ -+void -+f1 (svbool_t pg, svuint8_t u8, svint8_t s8, svint16_t s16, -+ svint32_t s32, svint64_t s64, int x) -+{ -+ const int one = 1; -+ u8 = svasrd_x (pg, u8, 1); /* { dg-error {no matching function for call to 'svasrd_x\(svbool_t&, svuint8_t&, [^)]*\)'} } */ -+ s8 = svasrd_x (pg, s8, x); /* { dg-error "argument 3 of 'svasrd_x' must be an integer constant expression" } */ -+ s8 = svasrd_x (pg, s8, one); -+ s8 = svasrd_x (pg, s8, 0.4); /* { dg-error {passing 0 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 8\]} } */ -+ s8 = svasrd_x (pg, s8, 1.0); -+ s8 = svasrd_x (pg, s8, 0); /* { dg-error {passing 0 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 8\]} } */ -+ s8 = svasrd_x (pg, s8, 1); -+ s8 = svasrd_x (pg, s8, 1 + 1); -+ s8 = svasrd_x (pg, s8, const_add (1, 1)); -+ s8 = svasrd_x (pg, s8, add (1, 1)); /* { dg-error "argument 3 of 'svasrd_x' must be an integer constant expression" } */ -+ s8 = svasrd_x (pg, s8, 8); -+ s8 = svasrd_x (pg, s8, 9); /* { dg-error {passing 9 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 8\]} } */ -+ s8 = svasrd_x (pg, s8, (uint64_t (1) << 62) + 1); /* { dg-error {passing [^ ]* to argument 3 of 'svasrd_x', which expects a value in the range \[1, 8\]} } */ -+ s16 = svasrd_x (pg, s16, 0); /* { dg-error {passing 0 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 16\]} } */ -+ s16 = svasrd_x (pg, s16, 1); -+ s16 = svasrd_x (pg, s16, 16); -+ s16 = svasrd_x (pg, s16, 17); /* { dg-error {passing 17 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 16\]} } */ -+ s32 = svasrd_x (pg, s32, 0); /* { dg-error {passing 0 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 32\]} } */ -+ s32 = svasrd_x (pg, s32, 1); -+ s32 = svasrd_x (pg, s32, 32); -+ s32 = svasrd_x (pg, s32, 33); /* { dg-error {passing 33 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 32\]} } */ -+ s64 = svasrd_x (pg, s64, 0); /* { dg-error {passing 0 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 64\]} } */ -+ s64 = svasrd_x (pg, s64, 1); -+ s64 = svasrd_x (pg, s64, 64); -+ s64 = svasrd_x (pg, s64, 65); /* { dg-error {passing 65 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 64\]} } */ -+} -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/asrd_2.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/asrd_2.C -new file mode 100644 -index 000000000..bbe7ba72b ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/asrd_2.C -@@ -0,0 +1,38 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-std=c++11 -Wall -Wextra" } */ -+ -+#include -+ -+constexpr uint64_t const_add (uint64_t a, uint64_t b) { return a + b; } -+uint64_t add (uint64_t a, uint64_t b) { return a + b; } -+ -+void -+f1 (svbool_t pg, svint8_t s8, svint16_t s16, svint32_t s32, svint64_t s64, -+ int x) -+{ -+ const int one = 1; -+ s8 = svasrd_n_s8_x (pg, s8, x); /* { dg-error "argument 3 of 'svasrd_n_s8_x' must be an integer constant expression" } */ -+ s8 = svasrd_n_s8_x (pg, s8, one); -+ s8 = svasrd_n_s8_x (pg, s8, 0.4); /* { dg-error {passing 0 to argument 3 of 'svasrd_n_s8_x', which expects a value in the range \[1, 8\]} } */ -+ s8 = svasrd_n_s8_x (pg, s8, 1.0); -+ s8 = svasrd_n_s8_x (pg, s8, 0); /* { dg-error {passing 0 to argument 3 of 'svasrd_n_s8_x', which expects a value in the range \[1, 8\]} } */ -+ s8 = svasrd_n_s8_x (pg, s8, 1); -+ s8 = svasrd_n_s8_x (pg, s8, 1 + 1); -+ s8 = svasrd_n_s8_x (pg, s8, const_add (1, 1)); -+ s8 = svasrd_n_s8_x (pg, s8, add (1, 1)); /* { dg-error "argument 3 of 'svasrd_n_s8_x' must be an integer constant expression" } */ -+ s8 = svasrd_n_s8_x (pg, s8, 8); -+ s8 = svasrd_n_s8_x (pg, s8, 9); /* { dg-error {passing 9 to argument 3 of 'svasrd_n_s8_x', which expects a value in the range \[1, 8\]} } */ -+ s8 = svasrd_n_s8_x (pg, s8, (uint64_t (1) << 62) + 1); /* { dg-error {passing [^ ]* to argument 3 of 'svasrd_n_s8_x', which expects a value in the range \[1, 8\]} } */ -+ s16 = svasrd_n_s16_x (pg, s16, 0); /* { dg-error {passing 0 to argument 3 of 'svasrd_n_s16_x', which expects a value in the range \[1, 16\]} } */ -+ s16 = svasrd_n_s16_x (pg, s16, 1); -+ s16 = svasrd_n_s16_x (pg, s16, 16); -+ s16 = svasrd_n_s16_x (pg, s16, 17); /* { dg-error {passing 17 to argument 3 of 'svasrd_n_s16_x', which expects a value in the range \[1, 16\]} } */ -+ s32 = svasrd_n_s32_x (pg, s32, 0); /* { dg-error {passing 0 to argument 3 of 'svasrd_n_s32_x', which expects a value in the range \[1, 32\]} } */ -+ s32 = svasrd_n_s32_x (pg, s32, 1); -+ s32 = svasrd_n_s32_x (pg, s32, 32); -+ s32 = svasrd_n_s32_x (pg, s32, 33); /* { dg-error {passing 33 to argument 3 of 'svasrd_n_s32_x', which expects a value in the range \[1, 32\]} } */ -+ s64 = svasrd_n_s64_x (pg, s64, 0); /* { dg-error {passing 0 to argument 3 of 'svasrd_n_s64_x', which expects a value in the range \[1, 64\]} } */ -+ s64 = svasrd_n_s64_x (pg, s64, 1); -+ s64 = svasrd_n_s64_x (pg, s64, 64); -+ s64 = svasrd_n_s64_x (pg, s64, 65); /* { dg-error {passing 65 to argument 3 of 'svasrd_n_s64_x', which expects a value in the range \[1, 64\]} } */ -+} -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/asrd_3.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/asrd_3.C -new file mode 100644 -index 000000000..5ebd770b2 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/asrd_3.C -@@ -0,0 +1,51 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-std=c++11 -Wall -Wextra" } */ -+ -+#include -+ -+constexpr uint64_t const_add (uint64_t a, uint64_t b) { return a + b; } -+uint64_t add (uint64_t a, uint64_t b) { return a + b; } -+ -+template -+T shift (svbool_t pg, T v) { return svasrd_x (pg, v, N); } -+/* { dg-error {no matching function for call to 'svasrd_x\(svbool_t&,} "" { target *-*-* } .-1 } */ -+/* { dg-error {passing 0 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 8\]} "" { target *-*-* } .-2 } */ -+/* { dg-error {passing 9 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 8\]} "" { target *-*-* } .-3 } */ -+/* { dg-error {passing 0 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 16\]} "" { target *-*-* } .-4 } */ -+/* { dg-error {passing 17 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 16\]} "" { target *-*-* } .-5 } */ -+/* { dg-error {passing 0 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 32\]} "" { target *-*-* } .-6 } */ -+/* { dg-error {passing 33 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 32\]} "" { target *-*-* } .-7 } */ -+/* { dg-error {passing 0 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 64\]} "" { target *-*-* } .-8 } */ -+/* { dg-error {passing 65 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 64\]} "" { target *-*-* } .-9 } */ -+ -+template -+T shift1 (svbool_t pg, T v, uint64_t n) { return svasrd_x (pg, v, n); } -+ -+template -+T shift2 (svbool_t pg, T v, uint64_t n) { return svasrd_x (pg, v, n); } -+/* { dg-error {argument 3 of 'svasrd_x' must be an integer constant expression} "" { target *-*-* } .-1 } */ -+ -+void -+f1 (svbool_t pg, svuint8_t u8, svint8_t s8, svint16_t s16, -+ svint32_t s32, svint64_t s64) -+{ -+ u8 = shift <1> (pg, u8); -+ s8 = shift <0> (pg, s8); -+ s8 = shift <1> (pg, s8); -+ s8 = shift <8> (pg, s8); -+ s8 = shift <9> (pg, s8); -+ s16 = shift <0> (pg, s16); -+ s16 = shift <1> (pg, s16); -+ s16 = shift <16> (pg, s16); -+ s16 = shift <17> (pg, s16); -+ s32 = shift <0> (pg, s32); -+ s32 = shift <1> (pg, s32); -+ s32 = shift <32> (pg, s32); -+ s32 = shift <33> (pg, s32); -+ s64 = shift <0> (pg, s64); -+ s64 = shift <1> (pg, s64); -+ s64 = shift <64> (pg, s64); -+ s64 = shift <65> (pg, s64); -+ -+ s8 = shift2 (pg, s8, 1); -+} -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/cntb_pat.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/cntb_pat.c -new file mode 100644 -index 000000000..bbc9f9010 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/cntb_pat.c -@@ -0,0 +1,45 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+void -+test (svpattern pat, int i) -+{ -+ svcntb_pat (pat); /* { dg-error "argument 1 of 'svcntb_pat' must be an integer constant expression" } */ -+ svcntb_pat (i); /* { dg-error "invalid conversion from 'int' to 'svpattern'" } */ -+ /* { dg-error "argument 1 of 'svcntb_pat' must be an integer constant expression" "" { target *-*-* } .-1 } */ -+ svcntb_pat ((svpattern) -1); /* { dg-error "passing 4294967295 to argument 1 of 'svcntb_pat', which expects a valid 'svpattern' value" } */ -+ svcntb_pat ((svpattern) 0); -+ svcntb_pat ((svpattern) 1); -+ svcntb_pat ((svpattern) 2); -+ svcntb_pat ((svpattern) 3); -+ svcntb_pat ((svpattern) 4); -+ svcntb_pat ((svpattern) 5); -+ svcntb_pat ((svpattern) 6); -+ svcntb_pat ((svpattern) 7); -+ svcntb_pat ((svpattern) 8); -+ svcntb_pat ((svpattern) 9); -+ svcntb_pat ((svpattern) 10); -+ svcntb_pat ((svpattern) 11); -+ svcntb_pat ((svpattern) 12); -+ svcntb_pat ((svpattern) 13); -+ svcntb_pat ((svpattern) 14); /* { dg-error "passing 14 to argument 1 of 'svcntb_pat', which expects a valid 'svpattern' value" } */ -+ svcntb_pat ((svpattern) 15); /* { dg-error "passing 15 to argument 1 of 'svcntb_pat', which expects a valid 'svpattern' value" } */ -+ svcntb_pat ((svpattern) 16); /* { dg-error "passing 16 to argument 1 of 'svcntb_pat', which expects a valid 'svpattern' value" } */ -+ svcntb_pat ((svpattern) 17); /* { dg-error "passing 17 to argument 1 of 'svcntb_pat', which expects a valid 'svpattern' value" } */ -+ svcntb_pat ((svpattern) 18); /* { dg-error "passing 18 to argument 1 of 'svcntb_pat', which expects a valid 'svpattern' value" } */ -+ svcntb_pat ((svpattern) 19); /* { dg-error "passing 19 to argument 1 of 'svcntb_pat', which expects a valid 'svpattern' value" } */ -+ svcntb_pat ((svpattern) 20); /* { dg-error "passing 20 to argument 1 of 'svcntb_pat', which expects a valid 'svpattern' value" } */ -+ svcntb_pat ((svpattern) 21); /* { dg-error "passing 21 to argument 1 of 'svcntb_pat', which expects a valid 'svpattern' value" } */ -+ svcntb_pat ((svpattern) 22); /* { dg-error "passing 22 to argument 1 of 'svcntb_pat', which expects a valid 'svpattern' value" } */ -+ svcntb_pat ((svpattern) 23); /* { dg-error "passing 23 to argument 1 of 'svcntb_pat', which expects a valid 'svpattern' value" } */ -+ svcntb_pat ((svpattern) 24); /* { dg-error "passing 24 to argument 1 of 'svcntb_pat', which expects a valid 'svpattern' value" } */ -+ svcntb_pat ((svpattern) 25); /* { dg-error "passing 25 to argument 1 of 'svcntb_pat', which expects a valid 'svpattern' value" } */ -+ svcntb_pat ((svpattern) 26); /* { dg-error "passing 26 to argument 1 of 'svcntb_pat', which expects a valid 'svpattern' value" } */ -+ svcntb_pat ((svpattern) 27); /* { dg-error "passing 27 to argument 1 of 'svcntb_pat', which expects a valid 'svpattern' value" } */ -+ svcntb_pat ((svpattern) 28); /* { dg-error "passing 28 to argument 1 of 'svcntb_pat', which expects a valid 'svpattern' value" } */ -+ svcntb_pat ((svpattern) 29); -+ svcntb_pat ((svpattern) 30); -+ svcntb_pat ((svpattern) 31); -+ svcntb_pat ((svpattern) 32); /* { dg-error "passing 32 to argument 1 of 'svcntb_pat', which expects a valid 'svpattern' value" } */ -+} -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/conversion_1.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/conversion_1.C -new file mode 100644 -index 000000000..1b939cdf7 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/conversion_1.C -@@ -0,0 +1,20 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+template -+struct S -+{ -+ S(T); -+ operator T() const; -+ void *base; -+}; -+ -+void f(svbool_t pg, const S &u8a, const S &u8b, -+ const S &s8a) -+{ -+ svadd_x(pg, u8a, u8b); -+ svadd_x(pg, u8a, 1); -+ svadd_x(pg, s8a, u8b); // { dg-error "no matching function for call" } -+ svadd_x(pg, s8a, 1); -+} -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/create2_1.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/create2_1.C -new file mode 100644 -index 000000000..247fd85ec ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/create2_1.C -@@ -0,0 +1,17 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-Wall -Wextra" } */ -+ -+#include -+ -+void -+f1 (svuint8x2_t *ptr, svbool_t pg, svuint8_t u8, svfloat64_t f64, -+ svuint8x2_t u8x2) -+{ -+ *ptr = svcreate2 (u8); /* { dg-error {no matching function for call to 'svcreate2\(svuint8_t\&\)'} } */ -+ *ptr = svcreate2 (u8, u8, u8); /* { dg-error {no matching function for call to 'svcreate2\(svuint8_t\&, svuint8_t\&, svuint8_t\&\)'} } */ -+ *ptr = svcreate2 (u8x2, u8x2); /* { dg-error {no matching function for call to 'svcreate2\(svuint8x2_t\&, svuint8x2_t\&\)'} } */ -+ *ptr = svcreate2 (u8, f64); /* { dg-error {no matching function for call to 'svcreate2\(svuint8_t\&, svfloat64_t\&\)'} } */ -+ *ptr = svcreate2 (u8, pg); /* { dg-error {no matching function for call to 'svcreate2\(svuint8_t\&, svbool_t\&\)'} } */ -+ *ptr = svcreate2 (u8, u8); -+ *ptr = svcreate2 (f64, f64); /* { dg-error {cannot convert 'svfloat64x2_t' to 'svuint8x2_t' in assignment} } */ -+} -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/create2_2.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/create2_2.C -new file mode 100644 -index 000000000..10f3231fa ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/create2_2.C -@@ -0,0 +1,17 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-Wall -Wextra" } */ -+ -+#include -+ -+void -+f1 (svuint8x2_t *ptr, svbool_t pg, svuint8_t u8, svfloat64_t f64, -+ svuint8x2_t u8x2) -+{ -+ *ptr = svcreate2_u8 (u8); /* { dg-error {too few arguments to function '[^']*'} } */ -+ *ptr = svcreate2_u8 (u8, u8, u8); /* { dg-error {too many arguments to function '[^']*'} } */ -+ *ptr = svcreate2_u8 (u8x2, u8x2); /* { dg-error {cannot convert 'svuint8x2_t' to 'svuint8_t'} } */ -+ *ptr = svcreate2_u8 (u8, f64); /* { dg-error {cannot convert 'svfloat64_t' to 'svuint8_t'} } */ -+ *ptr = svcreate2_u8 (pg, u8); /* { dg-error {cannot convert 'svbool_t' to 'svuint8_t'} } */ -+ *ptr = svcreate2_u8 (u8, u8); -+ *ptr = svcreate2_f64 (f64, f64); /* { dg-error {cannot convert 'svfloat64x2_t' to 'svuint8x2_t' in assignment} } */ -+} -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/create3_1.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/create3_1.C -new file mode 100644 -index 000000000..ff013634d ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/create3_1.C -@@ -0,0 +1,18 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-Wall -Wextra" } */ -+ -+#include -+ -+void -+f1 (svfloat16x3_t *ptr, svbool_t pg, svfloat16_t f16, svfloat64_t f64, -+ svfloat16x3_t f16x3) -+{ -+ *ptr = svcreate3 (f16); /* { dg-error {no matching function for call to 'svcreate3\(svfloat16_t\&\)'} } */ -+ *ptr = svcreate3 (f16, f16); /* { dg-error {no matching function for call to 'svcreate3\(svfloat16_t\&, svfloat16_t\&\)'} } */ -+ *ptr = svcreate3 (f16, f16, f16, f16); /* { dg-error {no matching function for call to 'svcreate3\(svfloat16_t\&, svfloat16_t\&, svfloat16_t\&, svfloat16_t\&\)'} } */ -+ *ptr = svcreate3 (f16x3, f16x3, f16x3); /* { dg-error {no matching function for call to 'svcreate3\(svfloat16x3_t\&, svfloat16x3_t\&, svfloat16x3_t\&\)'} } */ -+ *ptr = svcreate3 (f16, f16, f64); /* { dg-error {no matching function for call to 'svcreate3\(svfloat16_t\&, svfloat16_t\&, svfloat64_t\&\)'} } */ -+ *ptr = svcreate3 (f16, pg, f16); /* { dg-error {no matching function for call to 'svcreate3\(svfloat16_t\&, svbool_t\&, svfloat16_t\&\)'} } */ -+ *ptr = svcreate3 (f16, f16, f16); -+ *ptr = svcreate3 (f64, f64, f64); /* { dg-error {cannot convert 'svfloat64x3_t' to 'svfloat16x3_t' in assignment} } */ -+} -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/create3_2.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/create3_2.C -new file mode 100644 -index 000000000..07a72b1e2 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/create3_2.C -@@ -0,0 +1,18 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-Wall -Wextra" } */ -+ -+#include -+ -+void -+f1 (svfloat16x3_t *ptr, svbool_t pg, svfloat16_t f16, svfloat64_t f64, -+ svfloat16x3_t f16x3) -+{ -+ *ptr = svcreate3_f16 (f16); /* { dg-error {too few arguments to function '[^']*'} } */ -+ *ptr = svcreate3_f16 (f16, f16); /* { dg-error {too few arguments to function '[^']*'} } */ -+ *ptr = svcreate3_f16 (f16, f16, f16, f16); /* { dg-error {too many arguments to function '[^']*'} } */ -+ *ptr = svcreate3_f16 (f16x3, f16x3, f16x3); /* { dg-error {cannot convert 'svfloat16x3_t' to 'svfloat16_t'} } */ -+ *ptr = svcreate3_f16 (f16, f16, f64); /* { dg-error {cannot convert 'svfloat64_t' to 'svfloat16_t'} } */ -+ *ptr = svcreate3_f16 (f16, pg, f16); /* { dg-error {cannot convert 'svbool_t' to 'svfloat16_t'} } */ -+ *ptr = svcreate3_f16 (f16, f16, f16); -+ *ptr = svcreate3_f64 (f64, f64, f64); /* { dg-error {cannot convert 'svfloat64x3_t' to 'svfloat16x3_t' in assignment} } */ -+} -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/create4_1.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/create4_1.C -new file mode 100644 -index 000000000..2785d9011 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/create4_1.C -@@ -0,0 +1,19 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-Wall -Wextra" } */ -+ -+#include -+ -+void -+f1 (svint32x4_t *ptr, svbool_t pg, svint32_t s32, svfloat64_t f64, -+ svint32x4_t s32x4) -+{ -+ *ptr = svcreate4 (s32); /* { dg-error {no matching function for call to 'svcreate4\(svint32_t\&\)'} } */ -+ *ptr = svcreate4 (s32, s32); /* { dg-error {no matching function for call to 'svcreate4\(svint32_t\&, svint32_t\&\)'} } */ -+ *ptr = svcreate4 (s32, s32, s32); /* { dg-error {no matching function for call to 'svcreate4\(svint32_t\&, svint32_t\&, svint32_t\&\)'} } */ -+ *ptr = svcreate4 (s32, s32, s32, s32, s32); /* { dg-error {no matching function for call to 'svcreate4\(svint32_t\&, svint32_t\&, svint32_t\&, svint32_t\&, svint32_t\&\)'} } */ -+ *ptr = svcreate4 (s32x4, s32x4, s32x4, s32x4); /* { dg-error {no matching function for call to 'svcreate4\(svint32x4_t\&, svint32x4_t\&, svint32x4_t\&, svint32x4_t\&\)'} } */ -+ *ptr = svcreate4 (s32, s32, s32, f64); /* { dg-error {no matching function for call to 'svcreate4\(svint32_t\&, svint32_t\&, svint32_t\&, svfloat64_t\&\)'} } */ -+ *ptr = svcreate4 (s32, pg, s32, s32); /* { dg-error {no matching function for call to 'svcreate4\(svint32_t\&, svbool_t\&, svint32_t\&, svint32_t\&\)'} } */ -+ *ptr = svcreate4 (s32, s32, s32, s32); -+ *ptr = svcreate4 (f64, f64, f64, f64); /* { dg-error {cannot convert 'svfloat64x4_t' to 'svint32x4_t' in assignment} } */ -+} -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/create4_2.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/create4_2.C -new file mode 100644 -index 000000000..68f21a1d4 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/create4_2.C -@@ -0,0 +1,19 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-Wall -Wextra" } */ -+ -+#include -+ -+void -+f1 (svint32x4_t *ptr, svbool_t pg, svint32_t s32, svfloat64_t f64, -+ svint32x4_t s32x4) -+{ -+ *ptr = svcreate4_s32 (s32); /* { dg-error {too few arguments to function '[^']*'} } */ -+ *ptr = svcreate4_s32 (s32, s32); /* { dg-error {too few arguments to function '[^']*'} } */ -+ *ptr = svcreate4_s32 (s32, s32, s32); /* { dg-error {too few arguments to function '[^']*'} } */ -+ *ptr = svcreate4_s32 (s32, s32, s32, s32, s32); /* { dg-error {too many arguments to function '[^']*'} } */ -+ *ptr = svcreate4_s32 (s32x4, s32x4, s32x4, s32x4); /* { dg-error {cannot convert 'svint32x4_t' to 'svint32_t'} } */ -+ *ptr = svcreate4_s32 (s32, s32, s32, f64); /* { dg-error {cannot convert 'svfloat64_t' to 'svint32_t'} } */ -+ *ptr = svcreate4_s32 (s32, pg, s32, s32); /* { dg-error {cannot convert 'svbool_t' to 'svint32_t'} } */ -+ *ptr = svcreate4_s32 (s32, s32, s32, s32); -+ *ptr = svcreate4_f64 (f64, f64, f64, f64); /* { dg-error {cannot convert 'svfloat64x4_t' to 'svint32x4_t' in assignment} } */ -+} -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/dot_1.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/dot_1.C -new file mode 100644 -index 000000000..93397c82f ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/dot_1.C -@@ -0,0 +1,9 @@ -+/* { dg-do compile } */ -+ -+#include "dot_1.h" -+ -+svuint32_t -+f1 (svuint32_t x, svint8_t y, svuint8_t z) -+{ -+ return svdot_u32 (x, y, z); /* { dg-error "cannot convert 'svint8_t' to 'svuint8_t'" } */ -+} -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/dot_1.h b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/dot_1.h -new file mode 100644 -index 000000000..aef02f20b ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/dot_1.h -@@ -0,0 +1,2 @@ -+#pragma GCC system_header -+#pragma GCC aarch64 "arm_sve.h" /* { dg-message "initializing argument 2" } */ -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/dot_2.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/dot_2.C -new file mode 100644 -index 000000000..2084ed828 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/dot_2.C -@@ -0,0 +1,12 @@ -+/* { dg-do compile } */ -+ -+#include "dot_2.h" -+ -+void -+f1 (svuint32_t x, svint8_t y, svuint8_t z) -+{ -+ svdot (x, y); /* { dg-error {no matching function for call to 'svdot\(svuint32_t&, svint8_t&\)'} } */ -+ svdot (x, x, x); /* { dg-error {no matching function for call to 'svdot\(svuint32_t&, svuint32_t&, svuint32_t&\)'} } */ -+ svdot (1, z, z); /* { dg-error {no matching function for call to 'svdot\(int, svuint8_t&, svuint8_t&\)'} } */ -+ svdot (x, y, z); /* { dg-error {no matching function for call to 'svdot\(svuint32_t&, svint8_t&, svuint8_t&\)'} } */ -+} -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/dot_2.h b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/dot_2.h -new file mode 100644 -index 000000000..3e4a9c794 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/dot_2.h -@@ -0,0 +1,7 @@ -+#pragma GCC system_header -+#pragma GCC aarch64 "arm_sve.h" -+/* { dg-message {note: candidate: 'svuint32_t svdot\(svuint32_t, svuint8_t, svuint8_t\)'} "" { target *-*-* } 3 } */ -+/* { dg-message {note: *candidate expects 3 arguments, 2 provided} "" { target *-*-* } 3 } */ -+/* { dg-message {note: *no known conversion for argument 2 from 'svuint32_t' to 'svuint8_t'} "" { target *-*-* } 3 } */ -+/* { dg-message {note: *no known conversion for argument 1 from 'int' to 'svuint32_t'} "" { target *-*-* } 3 } */ -+/* { dg-message {note: *no known conversion for argument 2 from 'svint8_t' to 'svuint8_t'} "" { target *-*-* } 3 } */ -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_1.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_1.c -new file mode 100644 -index 000000000..8f18810c0 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_1.c -@@ -0,0 +1,5 @@ -+/* { dg-do compile } */ -+ -+int svadd_n_u8_x; /* { dg-message "note: previous declaration 'int svadd_n_u8_x'" } */ -+ -+#pragma GCC aarch64 "arm_sve.h" /* { dg-error {'svuint8_t svadd_n_u8_x\(svbool_t, svuint8_t, [^)\n]*\)' redeclared as different kind of entity} } */ -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_2.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_2.c -new file mode 100644 -index 000000000..a67f9f756 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_2.c -@@ -0,0 +1,5 @@ -+/* { dg-do compile } */ -+ -+int svadd_n_u8_x = 1; /* { dg-message "note: previous declaration 'int svadd_n_u8_x'" } */ -+ -+#pragma GCC aarch64 "arm_sve.h" /* { dg-error {'svuint8_t svadd_n_u8_x\(svbool_t, svuint8_t, [^)\n]*\)' redeclared as different kind of entity} } */ -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_3.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_3.c -new file mode 100644 -index 000000000..74b820fe6 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_3.c -@@ -0,0 +1,7 @@ -+/* { dg-do compile } */ -+ -+/* Although not supported, there's nothing to stop the user overloading -+ the sv* functions. */ -+extern __SVInt8_t svadd_u8_x (__SVBool_t, __SVInt8_t, __SVInt8_t); -+ -+#pragma GCC aarch64 "arm_sve.h" -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_4.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_4.c -new file mode 100644 -index 000000000..9591e3d01 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_4.c -@@ -0,0 +1,9 @@ -+/* { dg-do compile } */ -+ -+/* Although somewhat suspect, this isn't actively wrong, and doesn't need -+ to be diagnosed. Any attempt to call the function before including -+ arm_sve.h will lead to a link failure. (Same for taking its address, -+ etc.) */ -+extern __SVUint8_t svadd_u8_x (__SVBool_t, __SVUint8_t, __SVUint8_t); -+ -+#pragma GCC aarch64 "arm_sve.h" -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_5.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_5.c -new file mode 100644 -index 000000000..f87201984 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_5.c -@@ -0,0 +1,15 @@ -+/* { dg-do compile } */ -+ -+__SVUint8_t -+svadd_u8_x (__SVBool_t pg, __SVUint8_t x, __SVUint8_t y) -+{ -+ return x; -+} -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+svuint8_t -+f (svbool_t pg, svuint8_t x, svuint8_t y) -+{ -+ return svadd_u8_x (pg, x, y); -+} -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_6.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_6.c -new file mode 100644 -index 000000000..a65e0d65c ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_6.c -@@ -0,0 +1,5 @@ -+/* { dg-do compile } */ -+ -+typedef int svadd_u8_x; /* { dg-message "note: previous declaration 'typedef int svadd_u8_x'" } */ -+ -+#pragma GCC aarch64 "arm_sve.h" /* { dg-error {'svuint8_t svadd_u8_x\(svbool_t, svuint8_t, svuint8_t\)' redeclared as different kind of entity} } */ -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_7.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_7.c -new file mode 100644 -index 000000000..1f2e4bf66 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/func_redef_7.c -@@ -0,0 +1,15 @@ -+/* { dg-do compile } */ -+ -+__SVUint8_t -+svadd_x (__SVBool_t pg, __SVUint8_t x, __SVUint8_t y) -+{ -+ return x; -+} -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+svuint8_t -+f (svbool_t pg, svuint8_t x, svuint8_t y) -+{ -+ return svadd_x (pg, x, y); -+} -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/get2_1.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/get2_1.C -new file mode 100644 -index 000000000..8d6bb2307 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/get2_1.C -@@ -0,0 +1,39 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-std=c++11 -Wall -Wextra" } */ -+ -+#include -+ -+constexpr uint64_t const_sub (uint64_t a, uint64_t b) { return a - b; } -+uint64_t add (uint64_t a, uint64_t b) { return a + b; } -+ -+svfloat64_t -+f1 (svbool_t pg, svuint8_t u8, svuint8x2_t u8x2, svuint8x3_t u8x3, int x) -+{ -+ const int one = 1; -+ svfloat64_t f64; -+ -+ u8 = svget2 (u8x2); /* { dg-error {no matching function for call to 'svget2\(svuint8x2_t\&\)'} } */ -+ u8 = svget2 (u8x2, 1, 2); /* { dg-error {no matching function for call to 'svget2\(svuint8x2_t\&, int, int\)'} } */ -+ u8 = svget2 (u8, 0); /* { dg-error {no matching function for call to 'svget2\(svuint8_t\&, int\)'} } */ -+ u8 = svget2 (u8x3, 0); /* { dg-error {no matching function for call to 'svget2\(svuint8x3_t\&, int\)'} } */ -+ u8 = svget2 (pg, 0); /* { dg-error {no matching function for call to 'svget2\(svbool_t\&, int\)'} } */ -+ u8 = svget2 (u8x2, x); /* { dg-error "argument 2 of 'svget2' must be an integer constant expression" } */ -+ u8 = svget2 (u8x2, 0); -+ f64 = svget2 (u8x2, 0); /* { dg-error "cannot convert 'svuint8_t' to 'svfloat64_t' in assignment" } */ -+ u8 = svget2 (u8x2, 1); -+ u8 = svget2 (u8x2, 2); /* { dg-error {passing 2 to argument 2 of 'svget2', which expects a value in the range \[0, 1\]} } */ -+ u8 = svget2 (u8x2, 3); /* { dg-error {passing 3 to argument 2 of 'svget2', which expects a value in the range \[0, 1\]} } */ -+ u8 = svget2 (u8x2, 4); /* { dg-error {passing 4 to argument 2 of 'svget2', which expects a value in the range \[0, 1\]} } */ -+ u8 = svget2 (u8x2, 5); /* { dg-error {passing 5 to argument 2 of 'svget2', which expects a value in the range \[0, 1\]} } */ -+ u8 = svget2 (u8x2, ~0U); /* { dg-error {passing [^ ]* to argument 2 of 'svget2', which expects a value in the range \[0, 1\]} } */ -+ u8 = svget2 (u8x2, one); -+ u8 = svget2 (u8x2, 3 - 2); -+ u8 = svget2 (u8x2, 1.0); -+ u8 = svget2 (u8x2, const_sub (5, 4)); -+ u8 = svget2 (u8x2, const_sub (6, 4)); /* { dg-error {passing 2 to argument 2 of 'svget2', which expects a value in the range \[0, 1\]} } */ -+ u8 = svget2 (u8x2, const_sub (7, 4)); /* { dg-error {passing 3 to argument 2 of 'svget2', which expects a value in the range \[0, 1\]} } */ -+ u8 = svget2 (u8x2, const_sub (8, 4)); /* { dg-error {passing 4 to argument 2 of 'svget2', which expects a value in the range \[0, 1\]} } */ -+ u8 = svget2 (u8x2, add (0, 0)); /* { dg-error "argument 2 of 'svget2' must be an integer constant expression" } */ -+ -+ return f64; -+} -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/get2_2.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/get2_2.C -new file mode 100644 -index 000000000..9c7674be1 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/get2_2.C -@@ -0,0 +1,39 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-std=c++11 -Wall -Wextra" } */ -+ -+#include -+ -+constexpr uint64_t const_sub (uint64_t a, uint64_t b) { return a - b; } -+uint64_t add (uint64_t a, uint64_t b) { return a + b; } -+ -+svfloat64_t -+f1 (svbool_t pg, svuint8_t u8, svuint8x2_t u8x2, svuint8x3_t u8x3, int x) -+{ -+ const int one = 1; -+ svfloat64_t f64; -+ -+ u8 = svget2_u8 (u8x2); /* { dg-error {too few arguments to function '[^']*'} } */ -+ u8 = svget2_u8 (u8x2, 1, 2); /* { dg-error {too many arguments to function '[^']*'} } */ -+ u8 = svget2_u8 (u8, 0); /* { dg-error {cannot convert 'svuint8_t' to 'svuint8x2_t'} } */ -+ u8 = svget2_u8 (u8x3, 0); /* { dg-error {cannot convert 'svuint8x3_t' to 'svuint8x2_t'} } */ -+ u8 = svget2_u8 (pg, 0); /* { dg-error {cannot convert 'svbool_t' to 'svuint8x2_t'} } */ -+ u8 = svget2_u8 (u8x2, x); /* { dg-error "argument 2 of 'svget2_u8' must be an integer constant expression" } */ -+ u8 = svget2_u8 (u8x2, 0); -+ f64 = svget2_u8 (u8x2, 0); /* { dg-error "cannot convert 'svuint8_t' to 'svfloat64_t' in assignment" } */ -+ u8 = svget2_u8 (u8x2, 1); -+ u8 = svget2_u8 (u8x2, 2); /* { dg-error {passing 2 to argument 2 of 'svget2_u8', which expects a value in the range \[0, 1\]} } */ -+ u8 = svget2_u8 (u8x2, 3); /* { dg-error {passing 3 to argument 2 of 'svget2_u8', which expects a value in the range \[0, 1\]} } */ -+ u8 = svget2_u8 (u8x2, 4); /* { dg-error {passing 4 to argument 2 of 'svget2_u8', which expects a value in the range \[0, 1\]} } */ -+ u8 = svget2_u8 (u8x2, 5); /* { dg-error {passing 5 to argument 2 of 'svget2_u8', which expects a value in the range \[0, 1\]} } */ -+ u8 = svget2_u8 (u8x2, ~0U); /* { dg-error {passing [^ ]* to argument 2 of 'svget2_u8', which expects a value in the range \[0, 1\]} } */ -+ u8 = svget2_u8 (u8x2, one); -+ u8 = svget2_u8 (u8x2, 3 - 2); -+ u8 = svget2_u8 (u8x2, 1.0); -+ u8 = svget2_u8 (u8x2, const_sub (5, 4)); -+ u8 = svget2_u8 (u8x2, const_sub (6, 4)); /* { dg-error {passing 2 to argument 2 of 'svget2_u8', which expects a value in the range \[0, 1\]} } */ -+ u8 = svget2_u8 (u8x2, const_sub (7, 4)); /* { dg-error {passing 3 to argument 2 of 'svget2_u8', which expects a value in the range \[0, 1\]} } */ -+ u8 = svget2_u8 (u8x2, const_sub (8, 4)); /* { dg-error {passing 4 to argument 2 of 'svget2_u8', which expects a value in the range \[0, 1\]} } */ -+ u8 = svget2_u8 (u8x2, add (0, 0)); /* { dg-error "argument 2 of 'svget2_u8' must be an integer constant expression" } */ -+ -+ return f64; -+} -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/get3_1.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/get3_1.C -new file mode 100644 -index 000000000..bd8808a8b ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/get3_1.C -@@ -0,0 +1,40 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-std=c++11 -Wall -Wextra" } */ -+ -+#include -+ -+constexpr uint64_t const_sub (uint64_t a, uint64_t b) { return a - b; } -+uint64_t add (uint64_t a, uint64_t b) { return a + b; } -+ -+svfloat64_t -+f1 (svbool_t pg, svfloat16_t f16, svfloat16x3_t f16x3, svfloat16x4_t f16x4, -+ int x) -+{ -+ const int one = 1; -+ svfloat64_t f64; -+ -+ f16 = svget3 (f16x3); /* { dg-error {no matching function for call to 'svget3\(svfloat16x3_t\&\)'} } */ -+ f16 = svget3 (f16x3, 1, 2); /* { dg-error {no matching function for call to 'svget3\(svfloat16x3_t\&, int, int\)'} } */ -+ f16 = svget3 (f16, 0); /* { dg-error {no matching function for call to 'svget3\(svfloat16_t\&, int\)'} } */ -+ f16 = svget3 (f16x4, 0); /* { dg-error {no matching function for call to 'svget3\(svfloat16x4_t\&, int\)'} } */ -+ f16 = svget3 (pg, 0); /* { dg-error {no matching function for call to 'svget3\(svbool_t\&, int\)'} } */ -+ f16 = svget3 (f16x3, x); /* { dg-error "argument 2 of 'svget3' must be an integer constant expression" } */ -+ f16 = svget3 (f16x3, 0); -+ f64 = svget3 (f16x3, 0); /* { dg-error "cannot convert 'svfloat16_t' to 'svfloat64_t' in assignment" } */ -+ f16 = svget3 (f16x3, 1); -+ f16 = svget3 (f16x3, 2); -+ f16 = svget3 (f16x3, 3); /* { dg-error {passing 3 to argument 2 of 'svget3', which expects a value in the range \[0, 2\]} } */ -+ f16 = svget3 (f16x3, 4); /* { dg-error {passing 4 to argument 2 of 'svget3', which expects a value in the range \[0, 2\]} } */ -+ f16 = svget3 (f16x3, 5); /* { dg-error {passing 5 to argument 2 of 'svget3', which expects a value in the range \[0, 2\]} } */ -+ f16 = svget3 (f16x3, ~0U); /* { dg-error {passing [^ ]* to argument 2 of 'svget3', which expects a value in the range \[0, 2\]} } */ -+ f16 = svget3 (f16x3, one); -+ f16 = svget3 (f16x3, 3 - 2); -+ f16 = svget3 (f16x3, 1.0); -+ f16 = svget3 (f16x3, const_sub (5, 4)); -+ f16 = svget3 (f16x3, const_sub (6, 4)); -+ f16 = svget3 (f16x3, const_sub (7, 4)); /* { dg-error {passing 3 to argument 2 of 'svget3', which expects a value in the range \[0, 2\]} } */ -+ f16 = svget3 (f16x3, const_sub (8, 4)); /* { dg-error {passing 4 to argument 2 of 'svget3', which expects a value in the range \[0, 2\]} } */ -+ f16 = svget3 (f16x3, add (0, 0)); /* { dg-error "argument 2 of 'svget3' must be an integer constant expression" } */ -+ -+ return f64; -+} -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/get3_2.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/get3_2.C -new file mode 100644 -index 000000000..d526947d1 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/get3_2.C -@@ -0,0 +1,40 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-std=c++11 -Wall -Wextra" } */ -+ -+#include -+ -+constexpr uint64_t const_sub (uint64_t a, uint64_t b) { return a - b; } -+uint64_t add (uint64_t a, uint64_t b) { return a + b; } -+ -+svfloat64_t -+f1 (svbool_t pg, svfloat16_t f16, svfloat16x3_t f16x3, svfloat16x4_t f16x4, -+ int x) -+{ -+ const int one = 1; -+ svfloat64_t f64; -+ -+ f16 = svget3_f16 (f16x3); /* { dg-error {too few arguments to function '[^']*'} } */ -+ f16 = svget3_f16 (f16x3, 1, 2); /* { dg-error {too many arguments to function '[^']*'} } */ -+ f16 = svget3_f16 (f16, 0); /* { dg-error {cannot convert 'svfloat16_t' to 'svfloat16x3_t'} } */ -+ f16 = svget3_f16 (f16x4, 0); /* { dg-error {cannot convert 'svfloat16x4_t' to 'svfloat16x3_t'} } */ -+ f16 = svget3_f16 (pg, 0); /* { dg-error {cannot convert 'svbool_t' to 'svfloat16x3_t'} } */ -+ f16 = svget3_f16 (f16x3, x); /* { dg-error "argument 2 of 'svget3_f16' must be an integer constant expression" } */ -+ f16 = svget3_f16 (f16x3, 0); -+ f64 = svget3_f16 (f16x3, 0); /* { dg-error "cannot convert 'svfloat16_t' to 'svfloat64_t' in assignment" } */ -+ f16 = svget3_f16 (f16x3, 1); -+ f16 = svget3_f16 (f16x3, 2); -+ f16 = svget3_f16 (f16x3, 3); /* { dg-error {passing 3 to argument 2 of 'svget3_f16', which expects a value in the range \[0, 2\]} } */ -+ f16 = svget3_f16 (f16x3, 4); /* { dg-error {passing 4 to argument 2 of 'svget3_f16', which expects a value in the range \[0, 2\]} } */ -+ f16 = svget3_f16 (f16x3, 5); /* { dg-error {passing 5 to argument 2 of 'svget3_f16', which expects a value in the range \[0, 2\]} } */ -+ f16 = svget3_f16 (f16x3, ~0U); /* { dg-error {passing [^ ]* to argument 2 of 'svget3_f16', which expects a value in the range \[0, 2\]} } */ -+ f16 = svget3_f16 (f16x3, one); -+ f16 = svget3_f16 (f16x3, 3 - 2); -+ f16 = svget3_f16 (f16x3, 1.0); -+ f16 = svget3_f16 (f16x3, const_sub (5, 4)); -+ f16 = svget3_f16 (f16x3, const_sub (6, 4)); -+ f16 = svget3_f16 (f16x3, const_sub (7, 4)); /* { dg-error {passing 3 to argument 2 of 'svget3_f16', which expects a value in the range \[0, 2\]} } */ -+ f16 = svget3_f16 (f16x3, const_sub (8, 4)); /* { dg-error {passing 4 to argument 2 of 'svget3_f16', which expects a value in the range \[0, 2\]} } */ -+ f16 = svget3_f16 (f16x3, add (0, 0)); /* { dg-error "argument 2 of 'svget3_f16' must be an integer constant expression" } */ -+ -+ return f64; -+} -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/get4_1.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/get4_1.C -new file mode 100644 -index 000000000..19853dece ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/get4_1.C -@@ -0,0 +1,39 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-std=c++11 -Wall -Wextra" } */ -+ -+#include -+ -+constexpr uint64_t const_sub (uint64_t a, uint64_t b) { return a - b; } -+uint64_t add (uint64_t a, uint64_t b) { return a + b; } -+ -+svfloat64_t -+f1 (svbool_t pg, svint32_t s32, svint32x4_t s32x4, svint32x2_t s32x2, int x) -+{ -+ const int one = 1; -+ svfloat64_t f64; -+ -+ s32 = svget4 (s32x4); /* { dg-error {no matching function for call to 'svget4\(svint32x4_t\&\)'} } */ -+ s32 = svget4 (s32x4, 1, 2); /* { dg-error {no matching function for call to 'svget4\(svint32x4_t\&, int, int\)'} } */ -+ s32 = svget4 (s32, 0); /* { dg-error {no matching function for call to 'svget4\(svint32_t\&, int\)'} } */ -+ s32 = svget4 (s32x2, 0); /* { dg-error {no matching function for call to 'svget4\(svint32x2_t\&, int\)'} } */ -+ s32 = svget4 (pg, 0); /* { dg-error {no matching function for call to 'svget4\(svbool_t\&, int\)'} } */ -+ s32 = svget4 (s32x4, x); /* { dg-error "argument 2 of 'svget4' must be an integer constant expression" } */ -+ s32 = svget4 (s32x4, 0); -+ f64 = svget4 (s32x4, 0); /* { dg-error "cannot convert 'svint32_t' to 'svfloat64_t' in assignment" } */ -+ s32 = svget4 (s32x4, 1); -+ s32 = svget4 (s32x4, 2); -+ s32 = svget4 (s32x4, 3); -+ s32 = svget4 (s32x4, 4); /* { dg-error {passing 4 to argument 2 of 'svget4', which expects a value in the range \[0, 3\]} } */ -+ s32 = svget4 (s32x4, 5); /* { dg-error {passing 5 to argument 2 of 'svget4', which expects a value in the range \[0, 3\]} } */ -+ s32 = svget4 (s32x4, ~0U); /* { dg-error {passing [^ ]* to argument 2 of 'svget4', which expects a value in the range \[0, 3\]} } */ -+ s32 = svget4 (s32x4, one); -+ s32 = svget4 (s32x4, 3 - 2); -+ s32 = svget4 (s32x4, 1.0); -+ s32 = svget4 (s32x4, const_sub (5, 4)); -+ s32 = svget4 (s32x4, const_sub (6, 4)); -+ s32 = svget4 (s32x4, const_sub (7, 4)); -+ s32 = svget4 (s32x4, const_sub (8, 4)); /* { dg-error {passing 4 to argument 2 of 'svget4', which expects a value in the range \[0, 3\]} } */ -+ s32 = svget4 (s32x4, add (0, 0)); /* { dg-error "argument 2 of 'svget4' must be an integer constant expression" } */ -+ -+ return f64; -+} -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/get4_2.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/get4_2.C -new file mode 100644 -index 000000000..7a0979225 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/get4_2.C -@@ -0,0 +1,39 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-std=c++11 -Wall -Wextra" } */ -+ -+#include -+ -+constexpr uint64_t const_sub (uint64_t a, uint64_t b) { return a - b; } -+uint64_t add (uint64_t a, uint64_t b) { return a + b; } -+ -+svfloat64_t -+f1 (svbool_t pg, svint32_t s32, svint32x4_t s32x4, svint32x2_t s32x2, int x) -+{ -+ const int one = 1; -+ svfloat64_t f64; -+ -+ s32 = svget4_s32 (s32x4); /* { dg-error {too few arguments to function '[^']*'} } */ -+ s32 = svget4_s32 (s32x4, 1, 2); /* { dg-error {too many arguments to function '[^']*'} } */ -+ s32 = svget4_s32 (s32, 0); /* { dg-error {cannot convert 'svint32_t' to 'svint32x4_t'} } */ -+ s32 = svget4_s32 (s32x2, 0); /* { dg-error {cannot convert 'svint32x2_t' to 'svint32x4_t'} } */ -+ s32 = svget4_s32 (pg, 0); /* { dg-error {cannot convert 'svbool_t' to 'svint32x4_t'} } */ -+ s32 = svget4_s32 (s32x4, x); /* { dg-error "argument 2 of 'svget4_s32' must be an integer constant expression" } */ -+ s32 = svget4_s32 (s32x4, 0); -+ f64 = svget4_s32 (s32x4, 0); /* { dg-error "cannot convert 'svint32_t' to 'svfloat64_t' in assignment" } */ -+ s32 = svget4_s32 (s32x4, 1); -+ s32 = svget4_s32 (s32x4, 2); -+ s32 = svget4_s32 (s32x4, 3); -+ s32 = svget4_s32 (s32x4, 4); /* { dg-error {passing 4 to argument 2 of 'svget4_s32', which expects a value in the range \[0, 3\]} } */ -+ s32 = svget4_s32 (s32x4, 5); /* { dg-error {passing 5 to argument 2 of 'svget4_s32', which expects a value in the range \[0, 3\]} } */ -+ s32 = svget4_s32 (s32x4, ~0U); /* { dg-error {passing [^ ]* to argument 2 of 'svget4_s32', which expects a value in the range \[0, 3\]} } */ -+ s32 = svget4_s32 (s32x4, one); -+ s32 = svget4_s32 (s32x4, 3 - 2); -+ s32 = svget4_s32 (s32x4, 1.0); -+ s32 = svget4_s32 (s32x4, const_sub (5, 4)); -+ s32 = svget4_s32 (s32x4, const_sub (6, 4)); -+ s32 = svget4_s32 (s32x4, const_sub (7, 4)); -+ s32 = svget4_s32 (s32x4, const_sub (8, 4)); /* { dg-error {passing 4 to argument 2 of 'svget4_s32', which expects a value in the range \[0, 3\]} } */ -+ s32 = svget4_s32 (s32x4, add (0, 0)); /* { dg-error "argument 2 of 'svget4_s32' must be an integer constant expression" } */ -+ -+ return f64; -+} -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/lsl_wide_1.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/lsl_wide_1.C -new file mode 100644 -index 000000000..fb31e947d ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/lsl_wide_1.C -@@ -0,0 +1,12 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+svuint8_t -+f1 (svbool_t pg, svuint8_t x, svint8_t w, svuint64_t y) -+{ -+ svlsl_wide_u8_x (pg, x, x); /* { dg-error "cannot convert 'svuint8_t' to 'svuint64_t'" } */ -+ svlsl_wide_u8_x (pg, x); /* { dg-error {too few arguments to function 'svuint8_t svlsl_wide_u8_x\(svbool_t, svuint8_t, svuint64_t\)'} } */ -+ svlsl_wide_u8_x (pg, x, y, x); /* { dg-error {too many arguments to function 'svuint8_t svlsl_wide_u8_x\(svbool_t, svuint8_t, svuint64_t\)'} } */ -+ return svlsl_wide_s8_x (pg, w, y); /* { dg-error {cannot convert 'svint8_t' to 'svuint8_t' in return} } */ -+} -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/lsl_wide_2.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/lsl_wide_2.C -new file mode 100644 -index 000000000..95d341dc5 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/lsl_wide_2.C -@@ -0,0 +1,14 @@ -+/* { dg-do compile } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+void -+f1 (svbool_t pg, svuint8_t x, svuint64_t y) -+{ -+ svlsl_wide_x (pg, x); /* { dg-error {no matching function for call to 'svlsl_wide_x\(svbool_t&, svuint8_t&\)'} } */ -+ svlsl_wide_x (pg, x, x, x, x); /* { dg-error {no matching function for call to 'svlsl_wide_x\(svbool_t&, svuint8_t&, svuint8_t&, svuint8_t&, svuint8_t&\)'} } */ -+ svlsl_wide_x (x, x, y); /* { dg-error {no matching function for call to 'svlsl_wide_x\(svuint8_t&, svuint8_t&, svuint64_t&\)'} } */ -+ svlsl_wide_x (pg, 1, y); /* { dg-error {no matching function for call to 'svlsl_wide_x\(svbool_t&, int, svuint64_t&\)'} } */ -+ svlsl_wide_x (pg, x, x); /* { dg-error {no matching function for call to 'svlsl_wide_x\(svbool_t&, svuint8_t&, svuint8_t&\)'} } */ -+ svlsl_wide_x (pg, y, y); /* { dg-error {no matching function for call to 'svlsl_wide_x\(svbool_t&, svuint64_t&, svuint64_t&\)'} } */ -+} -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/mangle_1.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/mangle_1.C -new file mode 100644 -index 000000000..1a1712485 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/mangle_1.C -@@ -0,0 +1,31 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+void f1(svbool_t) {} -+void f2(svint8_t) {} -+void f3(svint16_t) {} -+void f4(svint32_t) {} -+void f5(svint64_t) {} -+void f6(svuint8_t) {} -+void f7(svuint16_t) {} -+void f8(svuint32_t) {} -+void f9(svuint64_t) {} -+void f10(svfloat16_t) {} -+void f11(svfloat32_t) {} -+void f12(svfloat64_t) {} -+void f13(svbfloat16_t) {} -+ -+/* { dg-final { scan-assembler "_Z2f110__SVBool_t:" } } */ -+/* { dg-final { scan-assembler "_Z2f210__SVInt8_t:" } } */ -+/* { dg-final { scan-assembler "_Z2f311__SVInt16_t:" } } */ -+/* { dg-final { scan-assembler "_Z2f411__SVInt32_t:" } } */ -+/* { dg-final { scan-assembler "_Z2f511__SVInt64_t:" } } */ -+/* { dg-final { scan-assembler "_Z2f611__SVUint8_t:" } } */ -+/* { dg-final { scan-assembler "_Z2f712__SVUint16_t:" } } */ -+/* { dg-final { scan-assembler "_Z2f812__SVUint32_t:" } } */ -+/* { dg-final { scan-assembler "_Z2f912__SVUint64_t:" } } */ -+/* { dg-final { scan-assembler "_Z3f1013__SVFloat16_t:" } } */ -+/* { dg-final { scan-assembler "_Z3f1113__SVFloat32_t:" } } */ -+/* { dg-final { scan-assembler "_Z3f1213__SVFloat64_t:" } } */ -+/* { dg-final { scan-assembler "_Z3f1314__SVBfloat16_t:" } } */ -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/mangle_2.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/mangle_2.C -new file mode 100644 -index 000000000..6792b8a31 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/mangle_2.C -@@ -0,0 +1,29 @@ -+/* { dg-do compile } */ -+ -+void f1(__SVBool_t) {} -+void f2(__SVInt8_t) {} -+void f3(__SVInt16_t) {} -+void f4(__SVInt32_t) {} -+void f5(__SVInt64_t) {} -+void f6(__SVUint8_t) {} -+void f7(__SVUint16_t) {} -+void f8(__SVUint32_t) {} -+void f9(__SVUint64_t) {} -+void f10(__SVFloat16_t) {} -+void f11(__SVFloat32_t) {} -+void f12(__SVFloat64_t) {} -+void f13(__SVBfloat16_t) {} -+ -+/* { dg-final { scan-assembler "_Z2f110__SVBool_t:" } } */ -+/* { dg-final { scan-assembler "_Z2f210__SVInt8_t:" } } */ -+/* { dg-final { scan-assembler "_Z2f311__SVInt16_t:" } } */ -+/* { dg-final { scan-assembler "_Z2f411__SVInt32_t:" } } */ -+/* { dg-final { scan-assembler "_Z2f511__SVInt64_t:" } } */ -+/* { dg-final { scan-assembler "_Z2f611__SVUint8_t:" } } */ -+/* { dg-final { scan-assembler "_Z2f712__SVUint16_t:" } } */ -+/* { dg-final { scan-assembler "_Z2f812__SVUint32_t:" } } */ -+/* { dg-final { scan-assembler "_Z2f912__SVUint64_t:" } } */ -+/* { dg-final { scan-assembler "_Z3f1013__SVFloat16_t:" } } */ -+/* { dg-final { scan-assembler "_Z3f1113__SVFloat32_t:" } } */ -+/* { dg-final { scan-assembler "_Z3f1213__SVFloat64_t:" } } */ -+/* { dg-final { scan-assembler "_Z3f1314__SVBfloat16_t:" } } */ -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/mangle_3.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/mangle_3.C -new file mode 100644 -index 000000000..8f64f7c2e ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/mangle_3.C -@@ -0,0 +1,18 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-msve-vector-bits=256" } */ -+ -+#include -+ -+typedef __SVInt8_t t1; -+typedef svint8_t t2; -+/* Distinct from svint8_t, but compatible with it. */ -+typedef int8_t t3 __attribute__((vector_size(32))); -+ -+void f1(t1) {} -+void f2(t2) {} -+void f3(t3) {} -+void f4(t1 &a, t2 &b, t3 &c) { a = b = c; } -+ -+/* { dg-final { scan-assembler "_Z2f110__SVInt8_t:" } } */ -+/* { dg-final { scan-assembler "_Z2f210__SVInt8_t:" } } */ -+/* { dg-final { scan-assembler "_Z2f3Dv32_a:" } } */ -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/mangle_4.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/mangle_4.C -new file mode 100644 -index 000000000..7cdc6cb0c ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/mangle_4.C -@@ -0,0 +1,75 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+void f1(svint8x2_t) {} -+void f2(svint16x2_t) {} -+void f3(svint32x2_t) {} -+void f4(svint64x2_t) {} -+void f5(svuint8x2_t) {} -+void f6(svuint16x2_t) {} -+void f7(svuint32x2_t) {} -+void f8(svuint64x2_t) {} -+void f9(svfloat16x2_t) {} -+void f10(svfloat32x2_t) {} -+void f11(svfloat64x2_t) {} -+ -+void g1(svint8x3_t) {} -+void g2(svint16x3_t) {} -+void g3(svint32x3_t) {} -+void g4(svint64x3_t) {} -+void g5(svuint8x3_t) {} -+void g6(svuint16x3_t) {} -+void g7(svuint32x3_t) {} -+void g8(svuint64x3_t) {} -+void g9(svfloat16x3_t) {} -+void g10(svfloat32x3_t) {} -+void g11(svfloat64x3_t) {} -+ -+void h1(svint8x4_t) {} -+void h2(svint16x4_t) {} -+void h3(svint32x4_t) {} -+void h4(svint64x4_t) {} -+void h5(svuint8x4_t) {} -+void h6(svuint16x4_t) {} -+void h7(svuint32x4_t) {} -+void h8(svuint64x4_t) {} -+void h9(svfloat16x4_t) {} -+void h10(svfloat32x4_t) {} -+void h11(svfloat64x4_t) {} -+ -+/* { dg-final { scan-assembler "_Z2f110svint8x2_t:" } } */ -+/* { dg-final { scan-assembler "_Z2f211svint16x2_t:" } } */ -+/* { dg-final { scan-assembler "_Z2f311svint32x2_t:" } } */ -+/* { dg-final { scan-assembler "_Z2f411svint64x2_t:" } } */ -+/* { dg-final { scan-assembler "_Z2f511svuint8x2_t:" } } */ -+/* { dg-final { scan-assembler "_Z2f612svuint16x2_t:" } } */ -+/* { dg-final { scan-assembler "_Z2f712svuint32x2_t:" } } */ -+/* { dg-final { scan-assembler "_Z2f812svuint64x2_t:" } } */ -+/* { dg-final { scan-assembler "_Z2f913svfloat16x2_t:" } } */ -+/* { dg-final { scan-assembler "_Z3f1013svfloat32x2_t:" } } */ -+/* { dg-final { scan-assembler "_Z3f1113svfloat64x2_t:" } } */ -+ -+/* { dg-final { scan-assembler "_Z2g110svint8x3_t:" } } */ -+/* { dg-final { scan-assembler "_Z2g211svint16x3_t:" } } */ -+/* { dg-final { scan-assembler "_Z2g311svint32x3_t:" } } */ -+/* { dg-final { scan-assembler "_Z2g411svint64x3_t:" } } */ -+/* { dg-final { scan-assembler "_Z2g511svuint8x3_t:" } } */ -+/* { dg-final { scan-assembler "_Z2g612svuint16x3_t:" } } */ -+/* { dg-final { scan-assembler "_Z2g712svuint32x3_t:" } } */ -+/* { dg-final { scan-assembler "_Z2g812svuint64x3_t:" } } */ -+/* { dg-final { scan-assembler "_Z2g913svfloat16x3_t:" } } */ -+/* { dg-final { scan-assembler "_Z3g1013svfloat32x3_t:" } } */ -+/* { dg-final { scan-assembler "_Z3g1113svfloat64x3_t:" } } */ -+ -+/* { dg-final { scan-assembler "_Z2h110svint8x4_t:" } } */ -+/* { dg-final { scan-assembler "_Z2h211svint16x4_t:" } } */ -+/* { dg-final { scan-assembler "_Z2h311svint32x4_t:" } } */ -+/* { dg-final { scan-assembler "_Z2h411svint64x4_t:" } } */ -+/* { dg-final { scan-assembler "_Z2h511svuint8x4_t:" } } */ -+/* { dg-final { scan-assembler "_Z2h612svuint16x4_t:" } } */ -+/* { dg-final { scan-assembler "_Z2h712svuint32x4_t:" } } */ -+/* { dg-final { scan-assembler "_Z2h812svuint64x4_t:" } } */ -+/* { dg-final { scan-assembler "_Z2h913svfloat16x4_t:" } } */ -+/* { dg-final { scan-assembler "_Z3h1013svfloat32x4_t:" } } */ -+/* { dg-final { scan-assembler "_Z3h1113svfloat64x4_t:" } } */ -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/set2_1.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/set2_1.C -new file mode 100644 -index 000000000..80c3ad74f ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/set2_1.C -@@ -0,0 +1,45 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-std=c++11 -Wall -Wextra" } */ -+ -+#include -+ -+constexpr uint64_t const_sub (uint64_t a, uint64_t b) { return a - b; } -+uint64_t add (uint64_t a, uint64_t b) { return a + b; } -+ -+svfloat64_t -+f1 (svbool_t pg, svuint8_t u8, svuint8x2_t u8x2, svint8x2_t s8x2, -+ svuint8x3_t u8x3, int x) -+{ -+ const int one = 1; -+ svfloat64_t f64; -+ -+ u8x2 = svset2 (u8x2); /* { dg-error {no matching function for call to 'svset2\(svuint8x2_t\&\)'} } */ -+ u8x2 = svset2 (u8x2, 1); /* { dg-error {no matching function for call to 'svset2\(svuint8x2_t\&, int\)'} } */ -+ u8x2 = svset2 (u8x2, 1, u8, 2); /* { dg-error {no matching function for call to 'svset2\(svuint8x2_t\&, int, svuint8_t\&, int\)'} } */ -+ u8x2 = svset2 (u8, 0, u8); /* { dg-error {no matching function for call to 'svset2\(svuint8_t\&, int, svuint8_t\&\)'} } */ -+ u8x2 = svset2 (s8x2, 0, u8); /* { dg-error {no matching function for call to 'svset2\(svint8x2_t\&, int, svuint8_t\&\)'} } */ -+ u8x2 = svset2 (u8x3, 0, u8); /* { dg-error {no matching function for call to 'svset2\(svuint8x3_t\&, int, svuint8_t\&\)'} } */ -+ u8x2 = svset2 (pg, 0, u8); /* { dg-error {no matching function for call to 'svset2\(svbool_t\&, int, svuint8_t\&\)'} } */ -+ u8x2 = svset2 (u8x2, 0, f64); /* { dg-error {no matching function for call to 'svset2\(svuint8x2_t\&, int, svfloat64_t\&\)'} } */ -+ u8x2 = svset2 (u8x2, 0, u8x2); /* { dg-error {no matching function for call to 'svset2\(svuint8x2_t\&, int, svuint8x2_t\&\)'} } */ -+ u8x2 = svset2 (u8x2, 0, pg); /* { dg-error {no matching function for call to 'svset2\(svuint8x2_t\&, int, svbool_t\&\)'} } */ -+ u8x2 = svset2 (u8x2, x, u8); /* { dg-error "argument 2 of 'svset2' must be an integer constant expression" } */ -+ u8x2 = svset2 (u8x2, 0, u8); -+ s8x2 = svset2 (u8x2, 0, u8); /* { dg-error {cannot convert 'svuint8x2_t' to 'svint8x2_t' in assignment} } */ -+ u8x2 = svset2 (u8x2, 1, u8); -+ u8x2 = svset2 (u8x2, 2, u8); /* { dg-error {passing 2 to argument 2 of 'svset2', which expects a value in the range \[0, 1\]} } */ -+ u8x2 = svset2 (u8x2, 3, u8); /* { dg-error {passing 3 to argument 2 of 'svset2', which expects a value in the range \[0, 1\]} } */ -+ u8x2 = svset2 (u8x2, 4, u8); /* { dg-error {passing 4 to argument 2 of 'svset2', which expects a value in the range \[0, 1\]} } */ -+ u8x2 = svset2 (u8x2, 5, u8); /* { dg-error {passing 5 to argument 2 of 'svset2', which expects a value in the range \[0, 1\]} } */ -+ u8x2 = svset2 (u8x2, ~0U, u8); /* { dg-error {passing [^ ]* to argument 2 of 'svset2', which expects a value in the range \[0, 1\]} } */ -+ u8x2 = svset2 (u8x2, one, u8); -+ u8x2 = svset2 (u8x2, 3 - 2, u8); -+ u8x2 = svset2 (u8x2, 1.0, u8); -+ u8x2 = svset2 (u8x2, const_sub (5, 4), u8); -+ u8x2 = svset2 (u8x2, const_sub (6, 4), u8); /* { dg-error {passing 2 to argument 2 of 'svset2', which expects a value in the range \[0, 1\]} } */ -+ u8x2 = svset2 (u8x2, const_sub (7, 4), u8); /* { dg-error {passing 3 to argument 2 of 'svset2', which expects a value in the range \[0, 1\]} } */ -+ u8x2 = svset2 (u8x2, const_sub (8, 4), u8); /* { dg-error {passing 4 to argument 2 of 'svset2', which expects a value in the range \[0, 1\]} } */ -+ u8x2 = svset2 (u8x2, add (0, 0), u8); /* { dg-error "argument 2 of 'svset2' must be an integer constant expression" } */ -+ -+ return f64; -+} -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/set2_2.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/set2_2.C -new file mode 100644 -index 000000000..1433b78ba ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/set2_2.C -@@ -0,0 +1,45 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-std=c++11 -Wall -Wextra" } */ -+ -+#include -+ -+constexpr uint64_t const_sub (uint64_t a, uint64_t b) { return a - b; } -+uint64_t add (uint64_t a, uint64_t b) { return a + b; } -+ -+svfloat64_t -+f1 (svbool_t pg, svuint8_t u8, svuint8x2_t u8x2, svint8x2_t s8x2, -+ svuint8x3_t u8x3, int x) -+{ -+ const int one = 1; -+ svfloat64_t f64; -+ -+ u8x2 = svset2_u8 (u8x2); /* { dg-error {too few arguments to function '[^']*'} } */ -+ u8x2 = svset2_u8 (u8x2, 1); /* { dg-error {too few arguments to function '[^']*'} } */ -+ u8x2 = svset2_u8 (u8x2, 1, u8, 2); /* { dg-error {too many arguments to function '[^']*'} } */ -+ u8x2 = svset2_u8 (u8, 0, u8); /* { dg-error {cannot convert 'svuint8_t' to 'svuint8x2_t'} } */ -+ u8x2 = svset2_u8 (s8x2, 0, u8); /* { dg-error {cannot convert 'svint8x2_t' to 'svuint8x2_t'} } */ -+ u8x2 = svset2_u8 (u8x3, 0, u8); /* { dg-error {cannot convert 'svuint8x3_t' to 'svuint8x2_t'} } */ -+ u8x2 = svset2_u8 (pg, 0, u8); /* { dg-error {cannot convert 'svbool_t' to 'svuint8x2_t'} } */ -+ u8x2 = svset2_u8 (u8x2, 0, f64); /* { dg-error {cannot convert 'svfloat64_t' to 'svuint8_t'} } */ -+ u8x2 = svset2_u8 (u8x2, 0, u8x2); /* { dg-error {cannot convert 'svuint8x2_t' to 'svuint8_t'} } */ -+ u8x2 = svset2_u8 (u8x2, 0, pg); /* { dg-error {cannot convert 'svbool_t' to 'svuint8_t'} } */ -+ u8x2 = svset2_u8 (u8x2, x, u8); /* { dg-error "argument 2 of 'svset2_u8' must be an integer constant expression" } */ -+ u8x2 = svset2_u8 (u8x2, 0, u8); -+ s8x2 = svset2_u8 (u8x2, 0, u8); /* { dg-error {cannot convert 'svuint8x2_t' to 'svint8x2_t' in assignment} } */ -+ u8x2 = svset2_u8 (u8x2, 1, u8); -+ u8x2 = svset2_u8 (u8x2, 2, u8); /* { dg-error {passing 2 to argument 2 of 'svset2_u8', which expects a value in the range \[0, 1\]} } */ -+ u8x2 = svset2_u8 (u8x2, 3, u8); /* { dg-error {passing 3 to argument 2 of 'svset2_u8', which expects a value in the range \[0, 1\]} } */ -+ u8x2 = svset2_u8 (u8x2, 4, u8); /* { dg-error {passing 4 to argument 2 of 'svset2_u8', which expects a value in the range \[0, 1\]} } */ -+ u8x2 = svset2_u8 (u8x2, 5, u8); /* { dg-error {passing 5 to argument 2 of 'svset2_u8', which expects a value in the range \[0, 1\]} } */ -+ u8x2 = svset2_u8 (u8x2, ~0U, u8); /* { dg-error {passing [^ ]* to argument 2 of 'svset2_u8', which expects a value in the range \[0, 1\]} } */ -+ u8x2 = svset2_u8 (u8x2, one, u8); -+ u8x2 = svset2_u8 (u8x2, 3 - 2, u8); -+ u8x2 = svset2_u8 (u8x2, 1.0, u8); -+ u8x2 = svset2_u8 (u8x2, const_sub (5, 4), u8); -+ u8x2 = svset2_u8 (u8x2, const_sub (6, 4), u8); /* { dg-error {passing 2 to argument 2 of 'svset2_u8', which expects a value in the range \[0, 1\]} } */ -+ u8x2 = svset2_u8 (u8x2, const_sub (7, 4), u8); /* { dg-error {passing 3 to argument 2 of 'svset2_u8', which expects a value in the range \[0, 1\]} } */ -+ u8x2 = svset2_u8 (u8x2, const_sub (8, 4), u8); /* { dg-error {passing 4 to argument 2 of 'svset2_u8', which expects a value in the range \[0, 1\]} } */ -+ u8x2 = svset2_u8 (u8x2, add (0, 0), u8); /* { dg-error "argument 2 of 'svset2_u8' must be an integer constant expression" } */ -+ -+ return f64; -+} -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/set3_1.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/set3_1.C -new file mode 100644 -index 000000000..9bb4f7a04 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/set3_1.C -@@ -0,0 +1,45 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-std=c++11 -Wall -Wextra" } */ -+ -+#include -+ -+constexpr uint64_t const_sub (uint64_t a, uint64_t b) { return a - b; } -+uint64_t add (uint64_t a, uint64_t b) { return a + b; } -+ -+svfloat64_t -+f1 (svbool_t pg, svfloat16_t f16, svfloat16x3_t f16x3, svuint16x3_t u16x3, -+ svfloat16x4_t f16x4, int x) -+{ -+ const int one = 1; -+ svfloat64_t f64; -+ -+ f16x3 = svset3 (f16x3); /* { dg-error {no matching function for call to 'svset3\(svfloat16x3_t\&\)'} } */ -+ f16x3 = svset3 (f16x3, 1); /* { dg-error {no matching function for call to 'svset3\(svfloat16x3_t\&, int\)'} } */ -+ f16x3 = svset3 (f16x3, 1, f16, 2); /* { dg-error {no matching function for call to 'svset3\(svfloat16x3_t\&, int, svfloat16_t\&, int\)'} } */ -+ f16x3 = svset3 (f16, 0, f16); /* { dg-error {no matching function for call to 'svset3\(svfloat16_t\&, int, svfloat16_t\&\)'} } */ -+ f16x3 = svset3 (u16x3, 0, f16); /* { dg-error {no matching function for call to 'svset3\(svuint16x3_t\&, int, svfloat16_t\&\)'} } */ -+ f16x3 = svset3 (f16x4, 0, f16); /* { dg-error {no matching function for call to 'svset3\(svfloat16x4_t\&, int, svfloat16_t\&\)'} } */ -+ f16x3 = svset3 (pg, 0, f16); /* { dg-error {no matching function for call to 'svset3\(svbool_t\&, int, svfloat16_t\&\)'} } */ -+ f16x3 = svset3 (f16x3, 0, f64); /* { dg-error {no matching function for call to 'svset3\(svfloat16x3_t\&, int, svfloat64_t\&\)'} } */ -+ f16x3 = svset3 (f16x3, 0, f16x3); /* { dg-error {no matching function for call to 'svset3\(svfloat16x3_t\&, int, svfloat16x3_t\&\)'} } */ -+ f16x3 = svset3 (f16x3, 0, pg); /* { dg-error {no matching function for call to 'svset3\(svfloat16x3_t\&, int, svbool_t\&\)'} } */ -+ f16x3 = svset3 (f16x3, x, f16); /* { dg-error "argument 2 of 'svset3' must be an integer constant expression" } */ -+ f16x3 = svset3 (f16x3, 0, f16); -+ u16x3 = svset3 (f16x3, 0, f16); /* { dg-error {cannot convert 'svfloat16x3_t' to 'svuint16x3_t' in assignment} } */ -+ f16x3 = svset3 (f16x3, 1, f16); -+ f16x3 = svset3 (f16x3, 2, f16); -+ f16x3 = svset3 (f16x3, 3, f16); /* { dg-error {passing 3 to argument 2 of 'svset3', which expects a value in the range \[0, 2\]} } */ -+ f16x3 = svset3 (f16x3, 4, f16); /* { dg-error {passing 4 to argument 2 of 'svset3', which expects a value in the range \[0, 2\]} } */ -+ f16x3 = svset3 (f16x3, 5, f16); /* { dg-error {passing 5 to argument 2 of 'svset3', which expects a value in the range \[0, 2\]} } */ -+ f16x3 = svset3 (f16x3, ~0U, f16); /* { dg-error {passing [^ ]* to argument 2 of 'svset3', which expects a value in the range \[0, 2\]} } */ -+ f16x3 = svset3 (f16x3, one, f16); -+ f16x3 = svset3 (f16x3, 3 - 2, f16); -+ f16x3 = svset3 (f16x3, 1.0, f16); -+ f16x3 = svset3 (f16x3, const_sub (5, 4), f16); -+ f16x3 = svset3 (f16x3, const_sub (6, 4), f16); -+ f16x3 = svset3 (f16x3, const_sub (7, 4), f16); /* { dg-error {passing 3 to argument 2 of 'svset3', which expects a value in the range \[0, 2\]} } */ -+ f16x3 = svset3 (f16x3, const_sub (8, 4), f16); /* { dg-error {passing 4 to argument 2 of 'svset3', which expects a value in the range \[0, 2\]} } */ -+ f16x3 = svset3 (f16x3, add (0, 0), f16); /* { dg-error "argument 2 of 'svset3' must be an integer constant expression" } */ -+ -+ return f64; -+} -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/set3_2.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/set3_2.C -new file mode 100644 -index 000000000..0bb604924 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/set3_2.C -@@ -0,0 +1,45 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-std=c++11 -Wall -Wextra" } */ -+ -+#include -+ -+constexpr uint64_t const_sub (uint64_t a, uint64_t b) { return a - b; } -+uint64_t add (uint64_t a, uint64_t b) { return a + b; } -+ -+svfloat64_t -+f1 (svbool_t pg, svfloat16_t f16, svfloat16x3_t f16x3, svuint16x3_t u16x3, -+ svfloat16x4_t f16x4, int x) -+{ -+ const int one = 1; -+ svfloat64_t f64; -+ -+ f16x3 = svset3_f16 (f16x3); /* { dg-error {too few arguments to function '[^']*'} } */ -+ f16x3 = svset3_f16 (f16x3, 1); /* { dg-error {too few arguments to function '[^']*'} } */ -+ f16x3 = svset3_f16 (f16x3, 1, f16, 2); /* { dg-error {too many arguments to function '[^']*'} } */ -+ f16x3 = svset3_f16 (f16, 0, f16); /* { dg-error {cannot convert 'svfloat16_t' to 'svfloat16x3_t'} } */ -+ f16x3 = svset3_f16 (u16x3, 0, f16); /* { dg-error {cannot convert 'svuint16x3_t' to 'svfloat16x3_t'} } */ -+ f16x3 = svset3_f16 (f16x4, 0, f16); /* { dg-error {cannot convert 'svfloat16x4_t' to 'svfloat16x3_t'} } */ -+ f16x3 = svset3_f16 (pg, 0, f16); /* { dg-error {cannot convert 'svbool_t' to 'svfloat16x3_t'} } */ -+ f16x3 = svset3_f16 (f16x3, 0, f64); /* { dg-error {cannot convert 'svfloat64_t' to 'svfloat16_t'} } */ -+ f16x3 = svset3_f16 (f16x3, 0, f16x3); /* { dg-error {cannot convert 'svfloat16x3_t' to 'svfloat16_t'} } */ -+ f16x3 = svset3_f16 (f16x3, 0, pg); /* { dg-error {cannot convert 'svbool_t' to 'svfloat16_t'} } */ -+ f16x3 = svset3_f16 (f16x3, x, f16); /* { dg-error "argument 2 of 'svset3_f16' must be an integer constant expression" } */ -+ f16x3 = svset3_f16 (f16x3, 0, f16); -+ u16x3 = svset3_f16 (f16x3, 0, f16); /* { dg-error {cannot convert 'svfloat16x3_t' to 'svuint16x3_t' in assignment} } */ -+ f16x3 = svset3_f16 (f16x3, 1, f16); -+ f16x3 = svset3_f16 (f16x3, 2, f16); -+ f16x3 = svset3_f16 (f16x3, 3, f16); /* { dg-error {passing 3 to argument 2 of 'svset3_f16', which expects a value in the range \[0, 2\]} } */ -+ f16x3 = svset3_f16 (f16x3, 4, f16); /* { dg-error {passing 4 to argument 2 of 'svset3_f16', which expects a value in the range \[0, 2\]} } */ -+ f16x3 = svset3_f16 (f16x3, 5, f16); /* { dg-error {passing 5 to argument 2 of 'svset3_f16', which expects a value in the range \[0, 2\]} } */ -+ f16x3 = svset3_f16 (f16x3, ~0U, f16); /* { dg-error {passing [^ ]* to argument 2 of 'svset3_f16', which expects a value in the range \[0, 2\]} } */ -+ f16x3 = svset3_f16 (f16x3, one, f16); -+ f16x3 = svset3_f16 (f16x3, 3 - 2, f16); -+ f16x3 = svset3_f16 (f16x3, 1.0, f16); -+ f16x3 = svset3_f16 (f16x3, const_sub (5, 4), f16); -+ f16x3 = svset3_f16 (f16x3, const_sub (6, 4), f16); -+ f16x3 = svset3_f16 (f16x3, const_sub (7, 4), f16); /* { dg-error {passing 3 to argument 2 of 'svset3_f16', which expects a value in the range \[0, 2\]} } */ -+ f16x3 = svset3_f16 (f16x3, const_sub (8, 4), f16); /* { dg-error {passing 4 to argument 2 of 'svset3_f16', which expects a value in the range \[0, 2\]} } */ -+ f16x3 = svset3_f16 (f16x3, add (0, 0), f16); /* { dg-error "argument 2 of 'svset3_f16' must be an integer constant expression" } */ -+ -+ return f64; -+} -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/set4_1.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/set4_1.C -new file mode 100644 -index 000000000..dc5dae872 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/set4_1.C -@@ -0,0 +1,45 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-std=c++11 -Wall -Wextra" } */ -+ -+#include -+ -+constexpr uint64_t const_sub (uint64_t a, uint64_t b) { return a - b; } -+uint64_t add (uint64_t a, uint64_t b) { return a + b; } -+ -+svfloat64_t -+f1 (svbool_t pg, svint32_t s32, svint32x4_t s32x4, svfloat32x4_t f32x4, -+ svint32x2_t s32x2, int x) -+{ -+ const int one = 1; -+ svfloat64_t f64; -+ -+ s32x4 = svset4 (s32x4); /* { dg-error {no matching function for call to 'svset4\(svint32x4_t\&\)'} } */ -+ s32x4 = svset4 (s32x4, 1); /* { dg-error {no matching function for call to 'svset4\(svint32x4_t\&, int\)'} } */ -+ s32x4 = svset4 (s32x4, 1, s32, 2); /* { dg-error {no matching function for call to 'svset4\(svint32x4_t\&, int, svint32_t\&, int\)'} } */ -+ s32x4 = svset4 (s32, 0, s32); /* { dg-error {no matching function for call to 'svset4\(svint32_t\&, int, svint32_t\&\)'} } */ -+ s32x4 = svset4 (f32x4, 0, s32); /* { dg-error {no matching function for call to 'svset4\(svfloat32x4_t\&, int, svint32_t\&\)'} } */ -+ s32x4 = svset4 (s32x2, 0, s32); /* { dg-error {no matching function for call to 'svset4\(svint32x2_t\&, int, svint32_t\&\)'} } */ -+ s32x4 = svset4 (pg, 0, s32); /* { dg-error {no matching function for call to 'svset4\(svbool_t\&, int, svint32_t\&\)'} } */ -+ s32x4 = svset4 (s32x4, 0, f64); /* { dg-error {no matching function for call to 'svset4\(svint32x4_t\&, int, svfloat64_t\&\)'} } */ -+ s32x4 = svset4 (s32x4, 0, s32x4); /* { dg-error {no matching function for call to 'svset4\(svint32x4_t\&, int, svint32x4_t\&\)'} } */ -+ s32x4 = svset4 (s32x4, 0, pg); /* { dg-error {no matching function for call to 'svset4\(svint32x4_t\&, int, svbool_t\&\)'} } */ -+ s32x4 = svset4 (s32x4, x, s32); /* { dg-error "argument 2 of 'svset4' must be an integer constant expression" } */ -+ s32x4 = svset4 (s32x4, 0, s32); -+ f32x4 = svset4 (s32x4, 0, s32); /* { dg-error {cannot convert 'svint32x4_t' to 'svfloat32x4_t' in assignment} } */ -+ s32x4 = svset4 (s32x4, 1, s32); -+ s32x4 = svset4 (s32x4, 2, s32); -+ s32x4 = svset4 (s32x4, 3, s32); -+ s32x4 = svset4 (s32x4, 4, s32); /* { dg-error {passing 4 to argument 2 of 'svset4', which expects a value in the range \[0, 3\]} } */ -+ s32x4 = svset4 (s32x4, 5, s32); /* { dg-error {passing 5 to argument 2 of 'svset4', which expects a value in the range \[0, 3\]} } */ -+ s32x4 = svset4 (s32x4, ~0U, s32); /* { dg-error {passing [^ ]* to argument 2 of 'svset4', which expects a value in the range \[0, 3\]} } */ -+ s32x4 = svset4 (s32x4, one, s32); -+ s32x4 = svset4 (s32x4, 3 - 2, s32); -+ s32x4 = svset4 (s32x4, 1.0, s32); -+ s32x4 = svset4 (s32x4, const_sub (5, 4), s32); -+ s32x4 = svset4 (s32x4, const_sub (6, 4), s32); -+ s32x4 = svset4 (s32x4, const_sub (7, 4), s32); -+ s32x4 = svset4 (s32x4, const_sub (8, 4), s32); /* { dg-error {passing 4 to argument 2 of 'svset4', which expects a value in the range \[0, 3\]} } */ -+ s32x4 = svset4 (s32x4, add (0, 0), s32); /* { dg-error "argument 2 of 'svset4' must be an integer constant expression" } */ -+ -+ return f64; -+} -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/set4_2.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/set4_2.C -new file mode 100644 -index 000000000..762a6db74 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/set4_2.C -@@ -0,0 +1,45 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-std=c++11 -Wall -Wextra" } */ -+ -+#include -+ -+constexpr uint64_t const_sub (uint64_t a, uint64_t b) { return a - b; } -+uint64_t add (uint64_t a, uint64_t b) { return a + b; } -+ -+svfloat64_t -+f1 (svbool_t pg, svint32_t s32, svint32x4_t s32x4, svfloat32x4_t f32x4, -+ svint32x2_t s32x2, int x) -+{ -+ const int one = 1; -+ svfloat64_t f64; -+ -+ s32x4 = svset4_s32 (s32x4); /* { dg-error {too few arguments to function '[^']*'} } */ -+ s32x4 = svset4_s32 (s32x4, 1); /* { dg-error {too few arguments to function '[^']*'} } */ -+ s32x4 = svset4_s32 (s32x4, 1, s32, 2); /* { dg-error {too many arguments to function '[^']*'} } */ -+ s32x4 = svset4_s32 (s32, 0, s32); /* { dg-error {cannot convert 'svint32_t' to 'svint32x4_t'} } */ -+ s32x4 = svset4_s32 (f32x4, 0, s32); /* { dg-error {cannot convert 'svfloat32x4_t' to 'svint32x4_t'} } */ -+ s32x4 = svset4_s32 (s32x2, 0, s32); /* { dg-error {cannot convert 'svint32x2_t' to 'svint32x4_t'} } */ -+ s32x4 = svset4_s32 (pg, 0, s32); /* { dg-error {cannot convert 'svbool_t' to 'svint32x4_t'} } */ -+ s32x4 = svset4_s32 (s32x4, 0, f64); /* { dg-error {cannot convert 'svfloat64_t' to 'svint32_t'} } */ -+ s32x4 = svset4_s32 (s32x4, 0, s32x4); /* { dg-error {cannot convert 'svint32x4_t' to 'svint32_t'} } */ -+ s32x4 = svset4_s32 (s32x4, 0, pg); /* { dg-error {cannot convert 'svbool_t' to 'svint32_t'} } */ -+ s32x4 = svset4_s32 (s32x4, x, s32); /* { dg-error "argument 2 of 'svset4_s32' must be an integer constant expression" } */ -+ s32x4 = svset4_s32 (s32x4, 0, s32); -+ f32x4 = svset4_s32 (s32x4, 0, s32); /* { dg-error {cannot convert 'svint32x4_t' to 'svfloat32x4_t' in assignment} } */ -+ s32x4 = svset4_s32 (s32x4, 1, s32); -+ s32x4 = svset4_s32 (s32x4, 2, s32); -+ s32x4 = svset4_s32 (s32x4, 3, s32); -+ s32x4 = svset4_s32 (s32x4, 4, s32); /* { dg-error {passing 4 to argument 2 of 'svset4_s32', which expects a value in the range \[0, 3\]} } */ -+ s32x4 = svset4_s32 (s32x4, 5, s32); /* { dg-error {passing 5 to argument 2 of 'svset4_s32', which expects a value in the range \[0, 3\]} } */ -+ s32x4 = svset4_s32 (s32x4, ~0U, s32); /* { dg-error {passing [^ ]* to argument 2 of 'svset4_s32', which expects a value in the range \[0, 3\]} } */ -+ s32x4 = svset4_s32 (s32x4, one, s32); -+ s32x4 = svset4_s32 (s32x4, 3 - 2, s32); -+ s32x4 = svset4_s32 (s32x4, 1.0, s32); -+ s32x4 = svset4_s32 (s32x4, const_sub (5, 4), s32); -+ s32x4 = svset4_s32 (s32x4, const_sub (6, 4), s32); -+ s32x4 = svset4_s32 (s32x4, const_sub (7, 4), s32); -+ s32x4 = svset4_s32 (s32x4, const_sub (8, 4), s32); /* { dg-error {passing 4 to argument 2 of 'svset4_s32', which expects a value in the range \[0, 3\]} } */ -+ s32x4 = svset4_s32 (s32x4, add (0, 0), s32); /* { dg-error "argument 2 of 'svset4_s32' must be an integer constant expression" } */ -+ -+ return f64; -+} -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_1.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_1.c -new file mode 100644 -index 000000000..ff2590032 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_1.c -@@ -0,0 +1,5 @@ -+/* { dg-do compile } */ -+ -+int svbool_t; /* { dg-message "note: previous declaration 'int svbool_t'" } */ -+ -+#pragma GCC aarch64 "arm_sve.h" /* { dg-error {'typedef [^'\n]* svbool_t' redeclared as different kind of entity} } */ -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_10.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_10.c -new file mode 100644 -index 000000000..86d87fa37 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_10.c -@@ -0,0 +1,5 @@ -+/* { dg-do compile } */ -+ -+typedef int svint8x2_t; -+ -+#pragma GCC aarch64 "arm_sve.h" /* { dg-error {conflicting declaration 'typedef struct svint8x2_t svint8x2_t'} } */ -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_11.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_11.c -new file mode 100644 -index 000000000..741d10eaf ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_11.c -@@ -0,0 +1,7 @@ -+/* { dg-do compile } */ -+ -+struct svint8x2_t; -+ -+#pragma GCC aarch64 "arm_sve.h" /* { dg-error {conflicting declaration 'typedef struct svint8x2_t svint8x2_t'} } */ -+ -+svint8_t f (svint8x2_t x) { return x.__val[0]; } /* { dg-error {'x' has incomplete type} } */ -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_12.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_12.c -new file mode 100644 -index 000000000..fc6a07ac6 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_12.c -@@ -0,0 +1,7 @@ -+/* { dg-do compile } */ -+ -+typedef struct svint8x2_t svint8x2_t; -+ -+#pragma GCC aarch64 "arm_sve.h" /* { dg-error {conflicting declaration 'typedef struct svint8x2_t svint8x2_t'} } */ -+ -+svint8_t f (svint8x2_t x) { return x.__val[0]; } /* { dg-error {'x' has incomplete type} } */ -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_13.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_13.c -new file mode 100644 -index 000000000..161aacb7b ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_13.c -@@ -0,0 +1,5 @@ -+/* { dg-do compile } */ -+ -+struct svint8x2_t {}; -+ -+#pragma GCC aarch64 "arm_sve.h" /* { dg-error {conflicting declaration 'typedef struct svint8x2_t svint8x2_t'} } */ -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_14.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_14.c -new file mode 100644 -index 000000000..83191118f ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_14.c -@@ -0,0 +1,5 @@ -+/* { dg-do compile } */ -+ -+enum svpattern { FOO }; /* { dg-message "note: previous definition here" } */ -+ -+#pragma GCC aarch64 "arm_sve.h" /* { dg-error "multiple definition of 'enum svpattern'" } */ -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_15.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_15.c -new file mode 100644 -index 000000000..71e35a4eb ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_15.c -@@ -0,0 +1,8 @@ -+/* { dg-do compile } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+enum svpattern { FOO }; /* { dg-error "multiple definition of 'enum svpattern'" } */ -+enum foo { SV_ALL }; /* { dg-error "'SV_ALL' conflicts with a previous declaration" } */ -+typedef int SV_POW2; /* { dg-error "'typedef int SV_POW2' redeclared as different kind of entity" } */ -+int SV_VL3; /* { dg-error "'int SV_VL3' redeclared as different kind of entity" } */ -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_16.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_16.c -new file mode 100644 -index 000000000..277064d31 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_16.c -@@ -0,0 +1,5 @@ -+/* { dg-do compile } */ -+ -+struct svpattern { int x; }; -+ -+#pragma GCC aarch64 "arm_sve.h" /* { dg-error "'svpattern' referred to as enum" } */ -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_17.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_17.c -new file mode 100644 -index 000000000..e4bcda6fb ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_17.c -@@ -0,0 +1,5 @@ -+/* { dg-do compile } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+struct svpattern { int x; }; /* { dg-error "'svpattern' referred to as 'struct'" } */ -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_18.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_18.c -new file mode 100644 -index 000000000..b6706150b ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_18.c -@@ -0,0 +1,5 @@ -+/* { dg-do compile } */ -+ -+int svpattern; /* OK in C. */ -+ -+#pragma GCC aarch64 "arm_sve.h" -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_19.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_19.c -new file mode 100644 -index 000000000..c6379f762 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_19.c -@@ -0,0 +1,5 @@ -+/* { dg-do compile } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+int svpattern; /* OK in C. */ -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_2.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_2.c -new file mode 100644 -index 000000000..5baf59932 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_2.c -@@ -0,0 +1,5 @@ -+/* { dg-do compile } */ -+ -+int svint8_t; /* { dg-message "note: previous declaration 'int svint8_t" } */ -+ -+#pragma GCC aarch64 "arm_sve.h" /* { dg-error {'typedef [^'\n]* svint8_t' redeclared as different kind of entity} } */ -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_20.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_20.c -new file mode 100644 -index 000000000..3ba19f596 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_20.c -@@ -0,0 +1,9 @@ -+/* { dg-do compile } */ -+ -+enum foo { SV_VL4 }; -+typedef int SV_POW2; -+int SV_ALL; -+ -+#pragma GCC aarch64 "arm_sve.h" /* { dg-error "'SV_VL4' conflicts with a previous declaration" } */ -+/* { dg-error "'SV_POW2' redeclared as different kind of entity" "" { target *-*-* } .-1 } */ -+/* { dg-error "'SV_ALL' redeclared as different kind of entity" "" { target *-*-* } .-2 } */ -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_3.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_3.c -new file mode 100644 -index 000000000..a8d7bdcc7 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_3.c -@@ -0,0 +1,5 @@ -+/* { dg-do compile } */ -+ -+int svuint16_t; /* { dg-message "note: previous declaration 'int svuint16_t'" } */ -+ -+#pragma GCC aarch64 "arm_sve.h" /* { dg-error {'typedef [^'\n]* svuint16_t' redeclared as different kind of entity} } */ -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_4.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_4.c -new file mode 100644 -index 000000000..c0563d0ee ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_4.c -@@ -0,0 +1,5 @@ -+/* { dg-do compile } */ -+ -+int svfloat32_t; /* { dg-message "note: previous declaration 'int svfloat32_t'" } */ -+ -+#pragma GCC aarch64 "arm_sve.h" /* { dg-error {'typedef [^'\n]* svfloat32_t' redeclared as different kind of entity} } */ -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_5.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_5.c -new file mode 100644 -index 000000000..ee28e9527 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_5.c -@@ -0,0 +1,5 @@ -+/* { dg-do compile } */ -+ -+typedef int svbool_t; /* { dg-message "note: previous declaration as 'typedef int svbool_t'" } */ -+ -+#pragma GCC aarch64 "arm_sve.h" /* { dg-error {conflicting declaration '[^'\n]* svbool_t'} } */ -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_6.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_6.c -new file mode 100644 -index 000000000..85c17eab6 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_6.c -@@ -0,0 +1,6 @@ -+/* { dg-do compile } */ -+ -+typedef __SVBool_t svbool_t; -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_7.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_7.c -new file mode 100644 -index 000000000..3a0dfb1c0 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_7.c -@@ -0,0 +1,8 @@ -+/* { dg-do compile } */ -+ -+int svint8x2_t; -+ -+#pragma GCC aarch64 "arm_sve.h" /* { dg-error {'typedef struct svint8x2_t svint8x2_t' redeclared as different kind of entity} } */ -+ -+void f (struct svint8x2_t) {} /* { dg-error {incomplete type} } */ -+void g () { int &x = svint8x2_t; } -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_8.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_8.c -new file mode 100644 -index 000000000..9b0df9137 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_8.c -@@ -0,0 +1,7 @@ -+/* { dg-do compile } */ -+ -+struct svint8x2_t; -+ -+#pragma GCC aarch64 "arm_sve.h" /* { dg-error {conflicting declaration 'typedef struct svint8x2_t svint8x2_t'} } */ -+ -+void f (svint8x2_t) {} /* { dg-error {incomplete type} } */ -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_9.c b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_9.c -new file mode 100644 -index 000000000..43068da78 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/type_redef_9.c -@@ -0,0 +1,8 @@ -+/* { dg-do compile } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+int svint8x2_t; /* { dg-error {'int svint8x2_t' redeclared as different kind of entity} } */ -+ -+void f (struct svint8x2_t) {} /* { dg-error {using typedef-name 'svint8x2_t' after 'struct'} } */ -+void g () { int &x = svint8x2_t; } /* { dg-error {expected primary-expression before ';' token} } */ -diff --git a/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/whilele_1.C b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/whilele_1.C -new file mode 100644 -index 000000000..9571e668b ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/acle/general-c++/whilele_1.C -@@ -0,0 +1,81 @@ -+// { dg-do compile } -+ -+#include -+ -+enum foo { A, B }; -+ -+void -+test (int8_t s8, int16_t s16, int32_t s32, int64_t s64, -+ uint8_t u8, uint16_t u16, uint32_t u32, uint64_t u64, -+ bool b, foo e, int *ptr, float f32, svbool_t pg, -+ svint32_t vec) -+{ -+ svwhilele_b8 (s32); // { dg-error {no matching function for call to 'svwhilele_b8\(int32_t&\)'} } -+ svwhilele_b8 (s32, s32, s32); // { dg-error {no matching function for call to 'svwhilele_b8\(int32_t&, int32_t&, int32_t&\)'} } -+ -+ svwhilele_b8 (b, b); -+ svwhilele_b8 (e, e); -+ svwhilele_b8 (s8, s8); -+ svwhilele_b8 (u8, u8); -+ svwhilele_b8 (s16, s16); -+ svwhilele_b8 (u16, u16); -+ svwhilele_b8 (ptr, ptr); // { dg-error {no matching function for call to 'svwhilele_b8\(int\*&, int\*&\)'} } -+ // { dg-error {invalid conversion from 'int\*' to '[^']*'} "" { target *-*-* } .-1 } -+ svwhilele_b8 (f32, f32); // { dg-error {call of overloaded 'svwhilele_b8\(float&, float&\)' is ambiguous} } -+ svwhilele_b8 (pg, pg); // { dg-error {no matching function for call to 'svwhilele_b8\(svbool_t&, svbool_t&\)'} } -+ svwhilele_b8 (vec, vec); // { dg-error {no matching function for call to 'svwhilele_b8\(svint32_t&, svint32_t&\)'} } -+ -+ svwhilele_b8 (s32, b); -+ svwhilele_b8 (s32, e); -+ svwhilele_b8 (s32, s8); -+ svwhilele_b8 (s32, u8); -+ svwhilele_b8 (s32, s16); -+ svwhilele_b8 (s32, u16); -+ -+ svwhilele_b8 (u32, b); // { dg-error {call of overloaded 'svwhilele_b8\(uint32_t&, bool&\)' is ambiguous} } -+ svwhilele_b8 (u32, e); // { dg-error {call of overloaded 'svwhilele_b8\(uint32_t&, foo&\)' is ambiguous} } -+ svwhilele_b8 (u32, s8); // { dg-error {call of overloaded 'svwhilele_b8\(uint32_t&, int8_t&\)' is ambiguous} } -+ svwhilele_b8 (u32, u8); // { dg-error {call of overloaded 'svwhilele_b8\(uint32_t&, uint8_t&\)' is ambiguous} } -+ svwhilele_b8 (u32, s16); // { dg-error {call of overloaded 'svwhilele_b8\(uint32_t&, int16_t&\)' is ambiguous} } -+ svwhilele_b8 (u32, u16); // { dg-error {call of overloaded 'svwhilele_b8\(uint32_t&, uint16_t&\)' is ambiguous} } -+ -+ svwhilele_b8 (s32, s32); -+ svwhilele_b8 (s32, u32); // { dg-error {call of overloaded 'svwhilele_b8\(int32_t&, uint32_t&\)' is ambiguous} } -+ svwhilele_b8 (s32, s64); // { dg-error {call of overloaded 'svwhilele_b8\(int32_t&, int64_t&\)' is ambiguous} } -+ svwhilele_b8 (s32, u64); // { dg-error {call of overloaded 'svwhilele_b8\(int32_t&, uint64_t&\)' is ambiguous} } -+ -+ svwhilele_b8 (u32, s32); // { dg-error {call of overloaded 'svwhilele_b8\(uint32_t&, int32_t&\)' is ambiguous} } -+ svwhilele_b8 (u32, u32); -+ svwhilele_b8 (u32, s64); // { dg-error {call of overloaded 'svwhilele_b8\(uint32_t&, int64_t&\)' is ambiguous} } -+ svwhilele_b8 (u32, u64); // { dg-error {call of overloaded 'svwhilele_b8\(uint32_t&, uint64_t&\)' is ambiguous} } -+ -+ svwhilele_b8 (s64, s32); // { dg-error {call of overloaded 'svwhilele_b8\(int64_t&, int32_t&\)' is ambiguous} } -+ svwhilele_b8 (s64, u32); // { dg-error {call of overloaded 'svwhilele_b8\(int64_t&, uint32_t&\)' is ambiguous} } -+ svwhilele_b8 (s64, s64); -+ svwhilele_b8 (s64, u64); // { dg-error {call of overloaded 'svwhilele_b8\(int64_t&, uint64_t&\)' is ambiguous} } -+ -+ svwhilele_b8 (u64, s32); // { dg-error {call of overloaded 'svwhilele_b8\(uint64_t&, int32_t&\)' is ambiguous} } -+ svwhilele_b8 (u64, u32); // { dg-error {call of overloaded 'svwhilele_b8\(uint64_t&, uint32_t&\)' is ambiguous} } -+ svwhilele_b8 (u64, s64); // { dg-error {call of overloaded 'svwhilele_b8\(uint64_t&, int64_t&\)' is ambiguous} } -+ svwhilele_b8 (u64, u64); -+ -+ svwhilele_b8 (0, s32); -+ svwhilele_b8 (0, u32); // { dg-error {call of overloaded 'svwhilele_b8\(int, uint32_t&\)' is ambiguous} } -+ svwhilele_b8 (0, s64); // { dg-error {call of overloaded 'svwhilele_b8\(int, int64_t&\)' is ambiguous} } -+ svwhilele_b8 (0, u64); // { dg-error {call of overloaded 'svwhilele_b8\(int, uint64_t&\)' is ambiguous} } -+ -+ svwhilele_b8 (s32, 0); -+ svwhilele_b8 (u32, 0); // { dg-error {call of overloaded 'svwhilele_b8\(uint32_t&, int\)' is ambiguous} } -+ svwhilele_b8 (s64, 0); // { dg-error {call of overloaded 'svwhilele_b8\(int64_t&, int\)' is ambiguous} } -+ svwhilele_b8 (u64, 0); // { dg-error {call of overloaded 'svwhilele_b8\(uint64_t&, int\)' is ambiguous} } -+ -+ svwhilele_b8 (0U, s32); // { dg-error {call of overloaded 'svwhilele_b8\(unsigned int, int32_t&\)' is ambiguous} } -+ svwhilele_b8 (0U, u32); -+ svwhilele_b8 (0U, s64); // { dg-error {call of overloaded 'svwhilele_b8\(unsigned int, int64_t&\)' is ambiguous} } -+ svwhilele_b8 (0U, u64); // { dg-error {call of overloaded 'svwhilele_b8\(unsigned int, uint64_t&\)' is ambiguous} } -+ -+ svwhilele_b8 (s32, 0U); // { dg-error {call of overloaded 'svwhilele_b8\(int32_t&, unsigned int\)' is ambiguous} } -+ svwhilele_b8 (u32, 0U); -+ svwhilele_b8 (s64, 0U); // { dg-error {call of overloaded 'svwhilele_b8\(int64_t&, unsigned int\)' is ambiguous} } -+ svwhilele_b8 (u64, 0U); // { dg-error {call of overloaded 'svwhilele_b8\(uint64_t&, unsigned int\)' is ambiguous} } -+} -diff --git a/gcc/testsuite/g++.target/aarch64/sve/catch_7.C b/gcc/testsuite/g++.target/aarch64/sve/catch_7.C -new file mode 100644 -index 000000000..ac10b6984 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/catch_7.C -@@ -0,0 +1,38 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O" } */ -+ -+#include -+ -+void __attribute__ ((noipa)) -+f1 (void) -+{ -+ throw 1; -+} -+ -+void __attribute__ ((noipa)) -+f2 (svbool_t) -+{ -+ register svint8_t z8 asm ("z8") = svindex_s8 (11, 1); -+ asm volatile ("" :: "w" (z8)); -+ f1 (); -+} -+ -+void __attribute__ ((noipa)) -+f3 (int n) -+{ -+ register double d8 asm ("v8") = 42.0; -+ for (int i = 0; i < n; ++i) -+ { -+ asm volatile ("" : "=w" (d8) : "w" (d8)); -+ try { f2 (svptrue_b8 ()); } catch (int) { break; } -+ } -+ if (d8 != 42.0) -+ __builtin_abort (); -+} -+ -+int -+main (void) -+{ -+ f3 (100); -+ return 0; -+} -diff --git a/gcc/testsuite/g++.target/aarch64/sve/dup_sel_1.C b/gcc/testsuite/g++.target/aarch64/sve/dup_sel_1.C -new file mode 100644 -index 000000000..a59862cf9 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/dup_sel_1.C -@@ -0,0 +1,21 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -msve-vector-bits=256" } */ -+ -+#include -+ -+typedef int32_t vnx4si __attribute__((vector_size(32))); -+ -+void -+foo (int32_t val) -+{ -+ register vnx4si x asm ("z0"); -+ register vnx4si y asm ("z0"); -+ asm volatile ("" : "=w" (y)); -+ val += 1; -+ vnx4si z = { val, val, val, val, val, val, val, val }; -+ x = (vnx4si) { -1, 0, 0, -1, 0, -1, 0, -1 } ? z : y; -+ asm volatile ("" :: "w" (x)); -+} -+ -+/* { dg-final { scan-assembler {\tmov\tz0\.s, p[0-7]/m, w[0-9]+\n} } } */ -+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ -diff --git a/gcc/testsuite/g++.target/aarch64/sve/dup_sel_2.C b/gcc/testsuite/g++.target/aarch64/sve/dup_sel_2.C -new file mode 100644 -index 000000000..47aad2d58 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/dup_sel_2.C -@@ -0,0 +1,20 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -msve-vector-bits=256" } */ -+ -+#include -+ -+typedef int32_t vnx4si __attribute__((vector_size(32))); -+ -+void -+foo (int32_t val) -+{ -+ register vnx4si x asm ("z0"); -+ register vnx4si y asm ("z1"); -+ asm volatile ("" : "=w" (y)); -+ val += 1; -+ vnx4si z = { val, val, val, val, val, val, val, val }; -+ x = (vnx4si) { -1, 0, 0, -1, 0, -1, 0, -1 } ? z : y; -+ asm volatile ("" :: "w" (x)); -+} -+ -+/* { dg-final { scan-assembler {\tmovprfx\tz0, z1\n\tmov\tz0\.s, p[0-7]/m, w[0-9]+\n} } } */ -diff --git a/gcc/testsuite/g++.target/aarch64/sve/dup_sel_3.C b/gcc/testsuite/g++.target/aarch64/sve/dup_sel_3.C -new file mode 100644 -index 000000000..e8ec6f8b4 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/dup_sel_3.C -@@ -0,0 +1,21 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -msve-vector-bits=256" } */ -+ -+#include -+ -+typedef int32_t vnx4si __attribute__((vector_size(32))); -+typedef float vnx4sf __attribute__((vector_size(32))); -+ -+void -+foo (float val) -+{ -+ register vnx4sf x asm ("z0"); -+ register vnx4sf y asm ("z0"); -+ asm volatile ("" : "=w" (y)); -+ vnx4sf z = { val, val, val, val, val, val, val, val }; -+ x = (vnx4si) { -1, 0, 0, -1, 0, -1, 0, -1 } ? z : y; -+ asm volatile ("" :: "w" (x)); -+} -+ -+/* { dg-final { scan-assembler {\tmov\tz0\.s, p[0-7]/m, s[0-9]+\n} } } */ -+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ -diff --git a/gcc/testsuite/g++.target/aarch64/sve/dup_sel_4.C b/gcc/testsuite/g++.target/aarch64/sve/dup_sel_4.C -new file mode 100644 -index 000000000..32ca59439 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/dup_sel_4.C -@@ -0,0 +1,20 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -msve-vector-bits=256" } */ -+ -+#include -+ -+typedef int32_t vnx4si __attribute__((vector_size(32))); -+typedef float vnx4sf __attribute__((vector_size(32))); -+ -+void -+foo (float val) -+{ -+ register vnx4sf x asm ("z0"); -+ register vnx4sf y asm ("z1"); -+ asm volatile ("" : "=w" (y)); -+ vnx4sf z = { val, val, val, val, val, val, val, val }; -+ x = (vnx4si) { -1, 0, 0, -1, 0, -1, 0, -1 } ? z : y; -+ asm volatile ("" :: "w" (x)); -+} -+ -+/* { dg-final { scan-assembler {\tmovprfx\tz0, z1\n\tmov\tz0\.s, p[0-7]/m, s[0-9]+\n} } } */ -diff --git a/gcc/testsuite/g++.target/aarch64/sve/dup_sel_5.C b/gcc/testsuite/g++.target/aarch64/sve/dup_sel_5.C -new file mode 100644 -index 000000000..2fb903a91 ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/dup_sel_5.C -@@ -0,0 +1,18 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -msve-vector-bits=256" } */ -+ -+#include -+ -+typedef int32_t vnx4si __attribute__((vector_size(32))); -+ -+void -+foo (int32_t val) -+{ -+ register vnx4si x asm ("z0"); -+ val += 1; -+ vnx4si y = { val, val, val, val, val, val, val, val }; -+ x = (vnx4si) { -1, 0, 0, -1, 0, -1, 0, -1 } ? y : (vnx4si) { 0 }; -+ asm volatile ("" :: "w" (x)); -+} -+ -+/* { dg-final { scan-assembler {\tmovprfx\tz0\.s, p[0-7]/z, z0\.s\n\tmov\tz0\.s, p[0-7]/m, w[0-9]+\n} } } */ -diff --git a/gcc/testsuite/g++.target/aarch64/sve/dup_sel_6.C b/gcc/testsuite/g++.target/aarch64/sve/dup_sel_6.C -new file mode 100644 -index 000000000..f2b0181bb ---- /dev/null -+++ b/gcc/testsuite/g++.target/aarch64/sve/dup_sel_6.C -@@ -0,0 +1,18 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -msve-vector-bits=256" } */ -+ -+#include -+ -+typedef int32_t vnx4si __attribute__((vector_size(32))); -+typedef float vnx4sf __attribute__((vector_size(32))); -+ -+void -+foo (float val) -+{ -+ register vnx4sf x asm ("z0"); -+ vnx4sf y = { val, val, val, val, val, val, val, val }; -+ x = (vnx4si) { -1, 0, 0, -1, 0, -1, 0, -1 } ? y : (vnx4sf) { 0 }; -+ asm volatile ("" :: "w" (x)); -+} -+ -+/* { dg-final { scan-assembler {\tmovprfx\tz0\.s, p[0-7]/z, z0\.s\n\tmov\tz0\.s, p[0-7]/m, s[0-9]+\n} } } */ -diff --git a/gcc/testsuite/gcc.c-torture/execute/builtins/builtins.exp b/gcc/testsuite/gcc.c-torture/execute/builtins/builtins.exp -index acb9eacb4..3560a1ff2 100644 ---- a/gcc/testsuite/gcc.c-torture/execute/builtins/builtins.exp -+++ b/gcc/testsuite/gcc.c-torture/execute/builtins/builtins.exp -@@ -37,7 +37,7 @@ load_lib c-torture.exp - torture-init - set-torture-options $C_TORTURE_OPTIONS {{}} $LTO_TORTURE_OPTIONS - --set additional_flags "-fno-tree-loop-distribute-patterns -fno-tracer -fno-ipa-ra" -+set additional_flags "-fno-tree-loop-distribute-patterns -fno-tracer -fno-ipa-ra -fno-inline-functions" - if [istarget "powerpc-*-darwin*"] { - lappend additional_flags "-Wl,-multiply_defined,suppress" - } -diff --git a/gcc/testsuite/gcc.dg/diag-aka-3.c b/gcc/testsuite/gcc.dg/diag-aka-3.c -new file mode 100644 -index 000000000..a3778ed7d ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/diag-aka-3.c -@@ -0,0 +1,9 @@ -+typedef unsigned int myvec __attribute__((vector_size (16))); -+ -+void f (float x) -+{ -+ myvec y = x; /* { dg-error {incompatible types when initializing type 'myvec' {aka '__vector\([48]\) unsigned int'} using type 'float'} } */ -+ myvec *ptr = &x; /* { dg-error {initialization of 'myvec \*' {aka '__vector\([48]\) unsigned int \*'} from incompatible pointer type 'float \*'} } */ -+ const myvec *const_ptr = &x; /* { dg-error {initialization of 'const myvec \*' {aka 'const __vector\([48]\) unsigned int \*'} from incompatible pointer type 'float \*'} } */ -+ volatile myvec *volatile_ptr = &x; /* { dg-error {initialization of 'volatile myvec \*' {aka 'volatile __vector\([48]\) unsigned int \*'} from incompatible pointer type 'float \*'} } */ -+} -diff --git a/gcc/testsuite/gcc.dg/enum-redef-1.c b/gcc/testsuite/gcc.dg/enum-redef-1.c -new file mode 100644 -index 000000000..b3fa6cbf8 ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/enum-redef-1.c -@@ -0,0 +1,29 @@ -+enum a { A }; -+enum a { B }; /* { dg-bogus "nested redefinition" } */ -+/* { dg-error "redeclaration of 'enum a'" "" { target *-*-* } .-1 } */ -+ -+enum empty {}; /* { dg-error "empty enum is invalid" } */ -+enum empty {}; /* { dg-bogus "nested redefinition" } */ -+/* { dg-error "empty enum is invalid" "" { target *-*-* } .-1 } */ -+ -+enum nested_first { -+ C1 = sizeof(enum nested_first { C1a }), /* { dg-error "nested redefinition of 'enum nested_first" } */ -+ C2 = sizeof(enum nested_first { C2a }) /* { dg-error "redeclaration of 'enum nested_first'" "" } */ -+}; -+ -+enum nested_second { -+ D1, -+ D2 = sizeof(enum nested_second { D2a }), /* { dg-error "nested redefinition of 'enum nested_second" } */ -+ D3 = sizeof(enum nested_second { D3a }) /* { dg-error "redeclaration of 'enum nested_second'" "" } */ -+}; -+ -+enum nested_repeat { E }; -+enum nested_repeat { /* { dg-error "redeclaration of 'enum nested_repeat'" "" } */ -+ F = sizeof(enum nested_repeat { Fa }) /* { dg-error "nested redefinition of 'enum nested_repeat" } */ -+}; -+ -+enum nested_empty { -+ G1 = sizeof(enum nested_empty {}), /* { dg-error "nested redefinition of 'enum nested_empty" } */ -+ /* { dg-error "empty enum is invalid" "" { target *-*-* } .-1 } */ -+ G2 = sizeof(enum nested_empty { G2a }) -+}; -diff --git a/gcc/testsuite/gcc.dg/graphite/interchange-1.c b/gcc/testsuite/gcc.dg/graphite/interchange-1.c -index b65d4861e..65a569e71 100644 ---- a/gcc/testsuite/gcc.dg/graphite/interchange-1.c -+++ b/gcc/testsuite/gcc.dg/graphite/interchange-1.c -@@ -48,10 +48,3 @@ main (void) - - return 0; - } -- --/*FIXME: Between isl 0.12 and isl 0.15 the schedule optimizer needs to print --something canonical so that it can be checked in the test. The final code --generated by both are same in this case but the messaged printed are --not consistent. */ -- --/* { dg-final { scan-tree-dump "tiled" "graphite" } } */ -diff --git a/gcc/testsuite/gcc.dg/graphite/interchange-10.c b/gcc/testsuite/gcc.dg/graphite/interchange-10.c -index a955644de..45c248db8 100644 ---- a/gcc/testsuite/gcc.dg/graphite/interchange-10.c -+++ b/gcc/testsuite/gcc.dg/graphite/interchange-10.c -@@ -45,5 +45,3 @@ main (void) - - return 0; - } -- --/* { dg-final { scan-tree-dump "tiled" "graphite" } } */ -diff --git a/gcc/testsuite/gcc.dg/graphite/interchange-11.c b/gcc/testsuite/gcc.dg/graphite/interchange-11.c -index 61028225f..6ba6907a5 100644 ---- a/gcc/testsuite/gcc.dg/graphite/interchange-11.c -+++ b/gcc/testsuite/gcc.dg/graphite/interchange-11.c -@@ -45,5 +45,3 @@ main (void) - - return 0; - } -- --/* { dg-final { scan-tree-dump "tiled" "graphite" } } */ -diff --git a/gcc/testsuite/gcc.dg/graphite/interchange-3.c b/gcc/testsuite/gcc.dg/graphite/interchange-3.c -index 4aec82418..e8539e2d3 100644 ---- a/gcc/testsuite/gcc.dg/graphite/interchange-3.c -+++ b/gcc/testsuite/gcc.dg/graphite/interchange-3.c -@@ -46,5 +46,3 @@ main (void) - - return 0; - } -- --/* { dg-final { scan-tree-dump "tiled" "graphite" } } */ -diff --git a/gcc/testsuite/gcc.dg/graphite/interchange-4.c b/gcc/testsuite/gcc.dg/graphite/interchange-4.c -index 463ecb5a6..1370d5f9d 100644 ---- a/gcc/testsuite/gcc.dg/graphite/interchange-4.c -+++ b/gcc/testsuite/gcc.dg/graphite/interchange-4.c -@@ -45,5 +45,3 @@ main (void) - - return 0; - } -- --/* { dg-final { scan-tree-dump "tiled" "graphite" } } */ -diff --git a/gcc/testsuite/gcc.dg/graphite/interchange-7.c b/gcc/testsuite/gcc.dg/graphite/interchange-7.c -index 50f7dd7f8..b2696dbec 100644 ---- a/gcc/testsuite/gcc.dg/graphite/interchange-7.c -+++ b/gcc/testsuite/gcc.dg/graphite/interchange-7.c -@@ -46,5 +46,3 @@ main (void) - - return 0; - } -- --/* { dg-final { scan-tree-dump "tiled" "graphite" } } */ -diff --git a/gcc/testsuite/gcc.dg/graphite/interchange-9.c b/gcc/testsuite/gcc.dg/graphite/interchange-9.c -index 88a357893..506b5001f 100644 ---- a/gcc/testsuite/gcc.dg/graphite/interchange-9.c -+++ b/gcc/testsuite/gcc.dg/graphite/interchange-9.c -@@ -43,5 +43,3 @@ main (void) - - return 0; - } -- --/* { dg-final { scan-tree-dump "tiled" "graphite" } } */ -diff --git a/gcc/testsuite/gcc.dg/graphite/uns-interchange-9.c b/gcc/testsuite/gcc.dg/graphite/uns-interchange-9.c -index cc108c2bb..a89578032 100644 ---- a/gcc/testsuite/gcc.dg/graphite/uns-interchange-9.c -+++ b/gcc/testsuite/gcc.dg/graphite/uns-interchange-9.c -@@ -44,5 +44,3 @@ main (void) - - return 0; - } -- --/* { dg-final { scan-tree-dump "tiled" "graphite" } } */ -diff --git a/gcc/testsuite/gcc.dg/guality/guality.exp b/gcc/testsuite/gcc.dg/guality/guality.exp -index ca77a446f..89cd896d0 100644 ---- a/gcc/testsuite/gcc.dg/guality/guality.exp -+++ b/gcc/testsuite/gcc.dg/guality/guality.exp -@@ -80,8 +80,22 @@ if {[check_guality " - return 0; - } - "]} { -- gcc-dg-runtest [lsort [glob $srcdir/$subdir/*.c]] "" "" -- gcc-dg-runtest [lsort [glob $srcdir/c-c++-common/guality/*.c]] "" "-Wc++-compat" -+ set general [list] -+ set Og [list] -+ foreach file [lsort [glob $srcdir/c-c++-common/guality/*.c]] { -+ switch -glob -- [file tail $file] { -+ Og-* { lappend Og $file } -+ * { lappend general $file } -+ } -+ } -+ -+ gcc-dg-runtest [lsort [glob $srcdir/$subdir/*.c]] "" "" -+ gcc-dg-runtest $general "" "-Wc++-compat" -+ set-torture-options \ -+ [list "-O0" "-Og"] \ -+ [list {}] \ -+ [list "-Og -flto"] -+ gcc-dg-runtest $Og "" "-Wc++-compat" - } - - if [info exists guality_gdb_name] { -diff --git a/gcc/testsuite/gcc.dg/guality/pr59776.c b/gcc/testsuite/gcc.dg/guality/pr59776.c -index 382abb622..6c1c8165b 100644 ---- a/gcc/testsuite/gcc.dg/guality/pr59776.c -+++ b/gcc/testsuite/gcc.dg/guality/pr59776.c -@@ -12,11 +12,11 @@ foo (struct S *p) - struct S s1, s2; /* { dg-final { gdb-test pr59776.c:17 "s1.f" "5.0" } } */ - s1 = *p; /* { dg-final { gdb-test pr59776.c:17 "s1.g" "6.0" } } */ - s2 = s1; /* { dg-final { gdb-test pr59776.c:17 "s2.f" "0.0" } } */ -- *(int *) &s2.f = 0; /* { dg-final { gdb-test pr59776.c:17 "s2.g" "6.0" } } */ -+ *(int *) &s2.f = 0; /* { dg-final { gdb-test pr59776.c:17 "s2.g" "6.0" { xfail *-*-* } } } */ - asm volatile (NOP : : : "memory"); /* { dg-final { gdb-test pr59776.c:20 "s1.f" "5.0" } } */ - asm volatile (NOP : : : "memory"); /* { dg-final { gdb-test pr59776.c:20 "s1.g" "6.0" } } */ - s2 = s1; /* { dg-final { gdb-test pr59776.c:20 "s2.f" "5.0" } } */ -- asm volatile (NOP : : : "memory"); /* { dg-final { gdb-test pr59776.c:20 "s2.g" "6.0" } } */ -+ asm volatile (NOP : : : "memory"); /* { dg-final { gdb-test pr59776.c:20 "s2.g" "6.0" { xfail *-*-* } } } */ - asm volatile (NOP : : : "memory"); - } - -diff --git a/gcc/testsuite/gcc.dg/ipa/inline-7.c b/gcc/testsuite/gcc.dg/ipa/inline-7.c -index 7dabb14f6..7c6491141 100644 ---- a/gcc/testsuite/gcc.dg/ipa/inline-7.c -+++ b/gcc/testsuite/gcc.dg/ipa/inline-7.c -@@ -1,6 +1,6 @@ - /* Check that early inliner works out that a is empty of parameter 0. */ - /* { dg-do compile } */ --/* { dg-options "-O2 -fdump-tree-einline-optimized -fopt-info-inline -fno-partial-inlining" } */ -+/* { dg-options "-O2 -fdump-tree-einline-optimized -fopt-info-inline -fno-partial-inlining -fno-inline-functions" } */ - void t(void); - int a (int b) - { -diff --git a/gcc/testsuite/gcc.dg/ipa/pr63416.c b/gcc/testsuite/gcc.dg/ipa/pr63416.c -index b5374c51f..5873954fb 100644 ---- a/gcc/testsuite/gcc.dg/ipa/pr63416.c -+++ b/gcc/testsuite/gcc.dg/ipa/pr63416.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-O2 -fdump-tree-optimized" } */ -+/* { dg-options "-O2 -fdump-tree-optimized --param early-inlining-insns-O2=14" } */ - #define _UNUSED_ __attribute__((__unused__)) - - typedef int TEST_F30 (int *v); -diff --git a/gcc/testsuite/gcc.dg/ipa/pr93763.c b/gcc/testsuite/gcc.dg/ipa/pr93763.c -index d11705932..aa2e60c5f 100644 ---- a/gcc/testsuite/gcc.dg/ipa/pr93763.c -+++ b/gcc/testsuite/gcc.dg/ipa/pr93763.c -@@ -3,44 +3,48 @@ - - typedef struct a a; - struct a { -- a *b -+ a *b; - } d; --e, k, ah, al; --f(aa) { -+int e, k, ah, al; -+void h(void); -+void -+f(aa) int aa; { - if (aa & 1) - goto g; - f(aa | 2); - g: - h(); - } -+void i(); -+void - l() { -- { - f(072); - i(e, d, 92); -- } - } -+void - ag() { -- { i(e, d, 36); } -+ i(e, d, 36); - } -+void j(); -+void - ai(a *m, a *n, unsigned aa) { - f(aa); - j(k, l, ah, 1); - } -+void - j(int c, a m, int aj, int aa) { - int ak = aa; -- { i(e, d, ak); } -+ i(e, d, ak); - } -+void - i(int c, a *m, unsigned aa) { -- { -- { i(c, (*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*( -+ i(c, (*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*( - *(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*( - *(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*m).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b) - .b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b) - .b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b) - .b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b) - .b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b, 0); -- } -- } - int am = aa; -- ai(ag, al, am); -+ ai((a *) (void *) ag, (a *) (__INTPTR_TYPE__) al, am); - } -diff --git a/gcc/testsuite/gcc.dg/optimize-bswapsi-5.c b/gcc/testsuite/gcc.dg/optimize-bswapsi-5.c -index 5819fd719..b4d8b9a8d 100644 ---- a/gcc/testsuite/gcc.dg/optimize-bswapsi-5.c -+++ b/gcc/testsuite/gcc.dg/optimize-bswapsi-5.c -@@ -1,6 +1,6 @@ - /* { dg-do compile } */ - /* { dg-require-effective-target bswap } */ --/* { dg-options "-O2 -fdump-tree-bswap" } */ -+/* { dg-options "-O2 -fdump-tree-bswap -fno-inline-functions" } */ - /* { dg-additional-options "-march=z900" { target s390-*-* } } */ - - struct L { unsigned int l[2]; }; -diff --git a/gcc/testsuite/gcc.dg/pr79983.c b/gcc/testsuite/gcc.dg/pr79983.c -index 84aae6913..1e292d421 100644 ---- a/gcc/testsuite/gcc.dg/pr79983.c -+++ b/gcc/testsuite/gcc.dg/pr79983.c -@@ -8,7 +8,7 @@ struct S { int i, j; }; /* { dg-error "redefinition of 'struct S'" } */ - - enum E; - enum E { A, B, C }; /* { dg-message "originally defined here" } */ --enum E { D, F }; /* { dg-error "nested redefinition of 'enum E'|redeclaration of 'enum E'" } */ -+enum E { D, F }; /* { dg-error "redeclaration of 'enum E'" } */ - - union U; - union U { int i; }; /* { dg-message "originally defined here" } */ -diff --git a/gcc/testsuite/gcc.dg/struct-ret-1.c b/gcc/testsuite/gcc.dg/struct-ret-1.c -index 23c9e9813..330c76ab8 100644 ---- a/gcc/testsuite/gcc.dg/struct-ret-1.c -+++ b/gcc/testsuite/gcc.dg/struct-ret-1.c -@@ -1,5 +1,5 @@ --/* { dg-do run { target hppa*-*-* } } */ --/* { dg-options { -O2 } { target hppa*-*-* } } */ -+/* { dg-do run } */ -+/* { dg-options { -O2 } } */ - extern void abort (void); - extern void exit (int); - typedef struct { -diff --git a/gcc/testsuite/gcc.dg/torture/pr90395.c b/gcc/testsuite/gcc.dg/torture/pr90395.c -new file mode 100644 -index 000000000..eba8750ef ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/torture/pr90395.c -@@ -0,0 +1,12 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-fexceptions -fnon-call-exceptions" } */ -+ -+typedef int v16si __attribute__ ((__vector_size__ (64))); -+ -+void -+rl (int uq) -+{ -+ v16si qw[1]; -+ -+ qw[uq] = (v16si) { uq }; -+} -diff --git a/gcc/testsuite/gcc.dg/torture/pr92690.c b/gcc/testsuite/gcc.dg/torture/pr92690.c -new file mode 100644 -index 000000000..b49f184fc ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/torture/pr92690.c -@@ -0,0 +1,38 @@ -+/* { dg-do run { target *-*-*gnu* } } */ -+/* { dg-additional-options "-D_GNU_SOURCE" } */ -+/* { dg-require-effective-target fenv_exceptions } */ -+ -+#include -+ -+typedef int v4si __attribute__((vector_size(16))); -+typedef float v4sf __attribute__((vector_size(16))); -+ -+void __attribute__((noipa)) -+foo (v4si *dstp, v4sf *srcp) -+{ -+ v4sf src = *srcp; -+ *dstp = (v4si) { src[0], src[1], 3, 4 }; -+} -+ -+void __attribute__((noipa)) -+bar (v4sf *dstp, v4si *srcp) -+{ -+ v4si src = *srcp; -+ *dstp = (v4sf) { src[0], src[1], 3.5, 4.5 }; -+} -+ -+int -+main() -+{ -+ feenableexcept (FE_INVALID|FE_INEXACT); -+ v4sf x = (v4sf) { 1, 2, __builtin_nanf (""), 3.5 }; -+ v4si y; -+ foo (&y, &x); -+ if (y[0] != 1 || y[1] != 2 || y[2] != 3 || y[3] != 4) -+ __builtin_abort (); -+ y = (v4si) { 0, 1, __INT_MAX__, -__INT_MAX__ }; -+ bar (&x, &y); -+ if (x[0] != 0 || x[1] != 1 || x[2] != 3.5 || x[3] != 4.5) -+ __builtin_abort (); -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.dg/torture/pr92715.c b/gcc/testsuite/gcc.dg/torture/pr92715.c -new file mode 100644 -index 000000000..170179c20 ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/torture/pr92715.c -@@ -0,0 +1,17 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-mavx2" { target x86_64-*-* i?86-*-* } } */ -+ -+typedef double v4si __attribute__((vector_size(32))); -+typedef double v2si __attribute__((vector_size(16))); -+ -+void foo (v4si *dstp, v2si *srcp) -+{ -+ v2si src = *srcp; -+ *dstp = (v4si) { src[0], src[1], src[0], src[1] }; -+} -+ -+void bar (v4si *dstp, v2si *srcp) -+{ -+ v2si src = *srcp; -+ *dstp = (v4si) { src[0], src[0], src[0], src[0] }; -+} -diff --git a/gcc/testsuite/gcc.dg/tree-ssa/alias-access-path-1.c b/gcc/testsuite/gcc.dg/tree-ssa/alias-access-path-1.c -new file mode 100644 -index 000000000..ba90b56fe ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/tree-ssa/alias-access-path-1.c -@@ -0,0 +1,21 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -fdump-tree-fre3" } */ -+struct foo -+{ -+ int val; -+} *fooptr; -+struct bar -+{ -+ struct foo foo; -+ int val2; -+} *barptr; -+int -+test () -+{ -+ struct foo foo = { 0 }; -+ barptr->val2 = 123; -+ *fooptr = foo; -+ return barptr->val2; -+} -+ -+/* { dg-final { scan-tree-dump-times "return 123" 1 "fre3"} } */ -diff --git a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-35.c b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-35.c -index d55197bce..24e633869 100644 ---- a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-35.c -+++ b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-35.c -@@ -16,4 +16,5 @@ v4sf vec_cast_perm(v4si f) - } - - /* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "cddce1" { target { i?86-*-* x86_64-*-* } } } } */ --/* { dg-final { scan-tree-dump-times "\\\(v4sf\\\) " 2 "cddce1" { target { i?86-*-* x86_64-*-* } } } } */ -+/* Catch (v4sf) and (vector(4) float). */ -+/* { dg-final { scan-tree-dump-times " = \\\(v" 2 "cddce1" { target { i?86-*-* x86_64-*-* } } } } */ -diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr92706-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr92706-2.c -new file mode 100644 -index 000000000..37ab9765d ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr92706-2.c -@@ -0,0 +1,19 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -fdump-tree-esra" } */ -+ -+typedef __UINT64_TYPE__ uint64_t; -+typedef __UINT32_TYPE__ uint32_t; -+struct S { uint32_t i[2]; } __attribute__((aligned(__alignof__(uint64_t)))); -+typedef uint64_t my_int64 __attribute__((may_alias)); -+uint64_t load (void *p) -+{ -+ struct S u, v, w; -+ uint64_t tem; -+ tem = *(my_int64 *)p; -+ *(my_int64 *)&v = tem; -+ u = v; -+ w = u; -+ return *(my_int64 *)&w; -+} -+ -+/* { dg-final { scan-tree-dump "Created a replacement for v" "esra" } } */ -diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-26.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-26.c -index 32d63899b..836a8092a 100644 ---- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-26.c -+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-26.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-O2 -fdump-tree-dse1-details -fno-short-enums" } */ -+/* { dg-options "-O2 -fdump-tree-dse1-details -fno-short-enums -fno-tree-fre" } */ - /* { dg-skip-if "temporary variable for constraint_expr is never used" { msp430-*-* } } */ - - enum constraint_expr_type -diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-31.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-31.c -index 6402c81e6..3d429ab15 100644 ---- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-31.c -+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-31.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-O -fdump-tree-fre1-details" } */ -+/* { dg-options "-O -fdump-tree-fre1-details -fno-tree-forwprop" } */ - /* { dg-additional-options "-fno-common" { target hppa*-*-hpux* } } */ - - typedef double d128 __attribute__((vector_size(16))); -diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-12.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-12.c -index 67526762f..fff731e8c 100644 ---- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-12.c -+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-12.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-O2 -fdump-tree-thread2-details -fdump-tree-thread3-details -fdump-tree-thread4-details -fno-finite-loops" } */ -+/* { dg-options "-O2 -fdump-tree-thread2-details -fdump-tree-thread3-details -fdump-tree-thread4-details -fno-finite-loops --param early-inlining-insns-O2=14 -fno-inline-functions" } */ - /* { dg-final { scan-tree-dump "FSM" "thread2" } } */ - /* { dg-final { scan-tree-dump "FSM" "thread3" } } */ - /* { dg-final { scan-tree-dump "FSM" "thread4" { xfail *-*-* } } } */ -diff --git a/gcc/testsuite/gcc.dg/vect/pr66142.c b/gcc/testsuite/gcc.dg/vect/pr66142.c -index 8c79f2907..a0316f1f0 100644 ---- a/gcc/testsuite/gcc.dg/vect/pr66142.c -+++ b/gcc/testsuite/gcc.dg/vect/pr66142.c -@@ -1,6 +1,6 @@ - /* PR middle-end/66142 */ - /* { dg-do compile } */ --/* { dg-additional-options "-ffast-math -fopenmp-simd" } */ -+/* { dg-additional-options "-ffast-math -fopenmp-simd --param early-inlining-insns-O2=14" } */ - /* { dg-additional-options "-mavx" { target avx_runtime } } */ - - struct A { float x, y; }; -diff --git a/gcc/testsuite/gcc.dg/vect/vect-cond-arith-7.c b/gcc/testsuite/gcc.dg/vect/vect-cond-arith-7.c -new file mode 100644 -index 000000000..739b98f59 ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/vect/vect-cond-arith-7.c -@@ -0,0 +1,60 @@ -+/* { dg-require-effective-target scalar_all_fma } */ -+/* { dg-additional-options "-fdump-tree-optimized -ffp-contract=fast" } */ -+ -+#include "tree-vect.h" -+ -+#define N (VECTOR_BITS * 11 / 64 + 3) -+ -+#define DEF(INV) \ -+ void __attribute__ ((noipa)) \ -+ f_##INV (double *restrict a, double *restrict b, \ -+ double *restrict c, double *restrict d) \ -+ { \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ double mb = (INV & 1 ? -b[i] : b[i]); \ -+ double mc = c[i]; \ -+ double md = (INV & 2 ? -d[i] : d[i]); \ -+ a[i] = b[i] < 10 ? mb * mc + md : 10.0; \ -+ } \ -+ } -+ -+#define TEST(INV) \ -+ { \ -+ f_##INV (a, b, c, d); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ double mb = (INV & 1 ? -b[i] : b[i]); \ -+ double mc = c[i]; \ -+ double md = (INV & 2 ? -d[i] : d[i]); \ -+ double fma = __builtin_fma (mb, mc, md); \ -+ if (a[i] != (i % 17 < 10 ? fma : 10.0)) \ -+ __builtin_abort (); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ } -+ -+#define FOR_EACH_INV(T) \ -+ T (0) T (1) T (2) T (3) -+ -+FOR_EACH_INV (DEF) -+ -+int -+main (void) -+{ -+ double a[N], b[N], c[N], d[N]; -+ for (int i = 0; i < N; ++i) -+ { -+ b[i] = i % 17; -+ c[i] = i % 9 + 11; -+ d[i] = i % 13 + 14; -+ asm volatile ("" ::: "memory"); -+ } -+ FOR_EACH_INV (TEST) -+ return 0; -+} -+ -+/* { dg-final { scan-tree-dump-times { = \.COND_FMA } 1 "optimized" { target vect_double_cond_arith } } } */ -+/* { dg-final { scan-tree-dump-times { = \.COND_FMS } 1 "optimized" { target vect_double_cond_arith } } } */ -+/* { dg-final { scan-tree-dump-times { = \.COND_FNMA } 1 "optimized" { target vect_double_cond_arith } } } */ -+/* { dg-final { scan-tree-dump-times { = \.COND_FNMS } 1 "optimized" { target vect_double_cond_arith } } } */ -diff --git a/gcc/testsuite/gcc.dg/vect/vect-mulhrs-1.c b/gcc/testsuite/gcc.dg/vect/vect-mulhrs-1.c -new file mode 100644 -index 000000000..8e46ff6b0 ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/vect/vect-mulhrs-1.c -@@ -0,0 +1,49 @@ -+/* { dg-require-effective-target vect_int } */ -+ -+#include "tree-vect.h" -+#ifndef SIGNEDNESS -+#define SIGNEDNESS signed -+#endif -+#ifndef BIAS -+#define BIAS 0 -+#endif -+ -+#define HRS(x) ((((x) >> (15 - BIAS)) + BIAS) >> BIAS) -+ -+void __attribute__ ((noipa)) -+f (SIGNEDNESS short *restrict a, SIGNEDNESS short *restrict b, -+ SIGNEDNESS short *restrict c, __INTPTR_TYPE__ n) -+{ -+ for (__INTPTR_TYPE__ i = 0; i < n; ++i) -+ a[i] = HRS((SIGNEDNESS int) b[i] * (SIGNEDNESS int) c[i]); -+} -+ -+#define N 50 -+#define BASE1 ((SIGNEDNESS int) -1 < 0 ? -126 : 4) -+#define BASE2 ((SIGNEDNESS int) -1 < 0 ? -101 : 26) -+#define CONST1 0x01AB -+#define CONST2 0x01CD -+ -+int -+main (void) -+{ -+ check_vect (); -+ -+ SIGNEDNESS short a[N], b[N], c[N]; -+ for (int i = 0; i < N; ++i) -+ { -+ b[i] = BASE1 + i * CONST1; -+ c[i] = BASE2 + i * CONST2; -+ asm volatile ("" ::: "memory"); -+ } -+ f (a, b, c, N); -+ for (int i = 0; i < N; ++i) -+ if (a[i] != HRS(BASE1 * BASE2 + i * i * (CONST1 * CONST2) -+ + i * (BASE1 * CONST2 + BASE2 * CONST1))) -+ __builtin_abort (); -+ return 0; -+} -+ -+/* { dg-final { scan-tree-dump "vect_recog_mulhs_pattern: detected" "vect" } } */ -+/* { dg-final { scan-tree-dump {\.MULHS} "vect" { target vect_mulhrs_hi } } } */ -+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_mulhrs_hi } } } */ -diff --git a/gcc/testsuite/gcc.dg/vect/vect-mulhrs-2.c b/gcc/testsuite/gcc.dg/vect/vect-mulhrs-2.c -new file mode 100644 -index 000000000..a16e71c6a ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/vect/vect-mulhrs-2.c -@@ -0,0 +1,9 @@ -+/* { dg-require-effective-target vect_int } */ -+ -+#define SIGNEDNESS unsigned -+ -+#include "vect-mulhrs-1.c" -+ -+/* { dg-final { scan-tree-dump "vect_recog_mulhs_pattern: detected" "vect" } } */ -+/* { dg-final { scan-tree-dump {\.MULHS} "vect" { target vect_mulhrs_hi } } } */ -+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_mulhrs_hi } } } */ -diff --git a/gcc/testsuite/gcc.dg/vect/vect-mulhrs-3.c b/gcc/testsuite/gcc.dg/vect/vect-mulhrs-3.c -new file mode 100644 -index 000000000..e7d44d75d ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/vect/vect-mulhrs-3.c -@@ -0,0 +1,9 @@ -+/* { dg-require-effective-target vect_int } */ -+ -+#define BIAS 1 -+ -+#include "vect-mulhrs-1.c" -+ -+/* { dg-final { scan-tree-dump "vect_recog_mulhs_pattern: detected" "vect" } } */ -+/* { dg-final { scan-tree-dump {\.MULHRS} "vect" { target vect_mulhrs_hi } } } */ -+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_mulhrs_hi } } } */ -diff --git a/gcc/testsuite/gcc.dg/vect/vect-mulhrs-4.c b/gcc/testsuite/gcc.dg/vect/vect-mulhrs-4.c -new file mode 100644 -index 000000000..e12176335 ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/vect/vect-mulhrs-4.c -@@ -0,0 +1,10 @@ -+/* { dg-require-effective-target vect_int } */ -+ -+#define SIGNEDNESS unsigned -+#define BIAS 1 -+ -+#include "vect-mulhrs-1.c" -+ -+/* { dg-final { scan-tree-dump "vect_recog_mulhs_pattern: detected" "vect" } } */ -+/* { dg-final { scan-tree-dump {\.MULHRS} "vect" { target vect_mulhrs_hi } } } */ -+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" { target vect_mulhrs_hi } } } */ -diff --git a/gcc/testsuite/gcc.dg/vect/vect-sdiv-pow2-1.c b/gcc/testsuite/gcc.dg/vect/vect-sdiv-pow2-1.c -new file mode 100644 -index 000000000..be70bc6c4 ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/vect/vect-sdiv-pow2-1.c -@@ -0,0 +1,79 @@ -+/* { dg-require-effective-target vect_int } */ -+ -+#include "tree-vect.h" -+ -+#define DIV(x,y) ((x)/(y)) -+#define MOD(x,y) ((x)%(y)) -+ -+#define TEMPLATE(PO2,OP) \ -+void __attribute__ ((noipa)) \ -+f_##PO2##_##OP (int *restrict a, int *restrict b, __INTPTR_TYPE__ n) \ -+{ \ -+ for (__INTPTR_TYPE__ i = 0; i < n; ++i) \ -+ a[i] = OP (b[i], (1 << PO2)); \ -+} -+#define TEMPLATES(PO2) \ -+TEMPLATE (PO2,DIV); \ -+TEMPLATE (PO2,MOD); -+ -+TEMPLATES (1); -+TEMPLATES (2); -+TEMPLATES (3); -+TEMPLATES (7); -+TEMPLATES (8); -+TEMPLATES (10); -+TEMPLATES (15); -+TEMPLATES (16); -+TEMPLATES (20); -+ -+typedef void (*func_t) (int *, int *, __INTPTR_TYPE__); -+typedef struct { -+ int po2; -+ func_t div; -+ func_t mod; -+} fn_t; -+const fn_t fns[] = { -+#define FN_PAIR(PO2) { PO2, f_##PO2##_DIV, f_##PO2##_MOD } -+ FN_PAIR (1), -+ FN_PAIR (2), -+ FN_PAIR (3), -+ FN_PAIR (7), -+ FN_PAIR (8), -+ FN_PAIR (10), -+ FN_PAIR (15), -+ FN_PAIR (16), -+ FN_PAIR (20), -+}; -+ -+int __attribute__ ((noipa, noinline)) -+power2 (int x) -+{ -+ return 1 << x; -+} -+ -+#define N 50 -+ -+int -+main (void) -+{ -+ int a[N], b[N], c[N]; -+ -+ for (int i = 0; i < (sizeof(fns)/sizeof(fns[0])); i++) -+ { -+ int p = power2 (fns[i].po2); -+ for (int j = 0; j < N; j++) -+ a[j] = ((p << 4) * j) / (N - 1) - (p << 5); -+ -+ fns[i].div (b, a, N); -+ fns[i].mod (c, a, N); -+ -+ for (int j = 0; j < N; j++) -+ if (a[j] != (b[j] * p + c[j])) -+ __builtin_abort (); -+ } -+ -+ return 0; -+} -+ -+/* { dg-final { scan-tree-dump {\.DIV_POW2} "vect" { target vect_sdiv_pow2_si } } } */ -+/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 18 "vect" { target vect_sdiv_pow2_si } } } */ -diff --git a/gcc/testsuite/gcc.dg/winline-3.c b/gcc/testsuite/gcc.dg/winline-3.c -index 7b7c8c5b9..7043a2760 100644 ---- a/gcc/testsuite/gcc.dg/winline-3.c -+++ b/gcc/testsuite/gcc.dg/winline-3.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-Winline -O2 --param max-inline-insns-single=1 --param inline-min-speedup=100 -fgnu89-inline" } */ -+/* { dg-options "-Winline -O2 --param max-inline-insns-single-O2=1 --param inline-min-speedup-O2=100 -fgnu89-inline" } */ - - void big (void); - inline int q(void) /* { dg-warning "max-inline-insns-single" } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/acle/jcvt_1.c b/gcc/testsuite/gcc.target/aarch64/acle/jcvt_1.c -new file mode 100644 -index 000000000..0c900b1b5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/acle/jcvt_1.c -@@ -0,0 +1,15 @@ -+/* Test the __jcvt ACLE intrinsic. */ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -march=armv8.3-a" } */ -+ -+#include -+ -+#ifdef __ARM_FEATURE_JCVT -+int32_t -+test_jcvt (double a) -+{ -+ return __jcvt (a); -+} -+#endif -+ -+/* { dg-final { scan-assembler-times "fjcvtzs\tw\[0-9\]+, d\[0-9\]+\n" 1 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/acle/rintnzx_1.c b/gcc/testsuite/gcc.target/aarch64/acle/rintnzx_1.c -new file mode 100644 -index 000000000..125720848 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/acle/rintnzx_1.c -@@ -0,0 +1,73 @@ -+/* Test the __rint[32,64][z,x] intrinsics. */ -+ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -march=armv8.5-a" } */ -+ -+#include -+ -+#ifdef __ARM_FEATURE_FRINT -+float -+foo_32z_f32_scal (float a) -+{ -+ return __rint32zf (a); -+} -+ -+/* { dg-final { scan-assembler-times "frint32z\ts\[0-9\]+, s\[0-9\]+\n" 1 } } */ -+ -+double -+foo_32z_f64_scal (double a) -+{ -+ return __rint32z (a); -+} -+ -+/* { dg-final { scan-assembler-times "frint32z\td\[0-9\]+, d\[0-9\]+\n" 1 } } */ -+ -+float -+foo_32x_f32_scal (float a) -+{ -+ return __rint32xf (a); -+} -+ -+/* { dg-final { scan-assembler-times "frint32x\ts\[0-9\]+, s\[0-9\]+\n" 1 } } */ -+ -+double -+foo_32x_f64_scal (double a) -+{ -+ return __rint32x (a); -+} -+ -+/* { dg-final { scan-assembler-times "frint32x\td\[0-9\]+, d\[0-9\]+\n" 1 } } */ -+ -+float -+foo_64z_f32_scal (float a) -+{ -+ return __rint64zf (a); -+} -+ -+/* { dg-final { scan-assembler-times "frint64z\ts\[0-9\]+, s\[0-9\]+\n" 1 } } */ -+ -+double -+foo_64z_f64_scal (double a) -+{ -+ return __rint64z (a); -+} -+ -+/* { dg-final { scan-assembler-times "frint64z\td\[0-9\]+, d\[0-9\]+\n" 1 } } */ -+ -+float -+foo_64x_f32_scal (float a) -+{ -+ return __rint64xf (a); -+} -+ -+/* { dg-final { scan-assembler-times "frint64x\ts\[0-9\]+, s\[0-9\]+\n" 1 } } */ -+ -+double -+foo_64x_f64_scal (double a) -+{ -+ return __rint64x (a); -+} -+ -+/* { dg-final { scan-assembler-times "frint64x\td\[0-9\]+, d\[0-9\]+\n" 1 } } */ -+ -+#endif -diff --git a/gcc/testsuite/gcc.target/aarch64/acle/rng_1.c b/gcc/testsuite/gcc.target/aarch64/acle/rng_1.c -new file mode 100644 -index 000000000..1fbdb6276 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/acle/rng_1.c -@@ -0,0 +1,53 @@ -+/* Test the __rndr ACLE intrinsic. */ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -march=armv8.5-a+rng" } */ -+ -+#include -+ -+#ifdef __ARM_FEATURE_RNG -+/* Check that instruction is generated when status result is unused. */ -+uint64_t -+test_rndr_no_stat (void) -+{ -+ uint64_t res; -+ __rndr (&res); -+ return res; -+} -+ -+/* Check that instruction is generated when random number result -+ is unused. */ -+int -+test_rndr_error_check (void) -+{ -+ uint64_t res; -+ int fail = __rndr (&res); -+ if (fail) -+ return 0; -+ return -1; -+} -+ -+/* { dg-final { scan-assembler-times "mrs\tx..?, RNDR\n" 2 } } */ -+ -+/* Check that instruction is generated when status result is unused. */ -+uint64_t -+test_rndrrs_no_stat (void) -+{ -+ uint64_t res; -+ __rndrrs (&res); -+ return res; -+} -+ -+/* Check that instruction is generated when random number result -+ is unused. */ -+int -+test_rndrrs_error_check (void) -+{ -+ uint64_t res; -+ int fail = __rndrrs (&res); -+ if (fail) -+ return 0; -+ return -1; -+} -+ -+/* { dg-final { scan-assembler-times "mrs\tx..?, RNDRRS\n" 2 } } */ -+#endif -diff --git a/gcc/testsuite/gcc.target/aarch64/acle/tme.c b/gcc/testsuite/gcc.target/aarch64/acle/tme.c -new file mode 100644 -index 000000000..5df93b1dc ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/acle/tme.c -@@ -0,0 +1,34 @@ -+/* Test the TME intrinsics. */ -+ -+/* { dg-do compile } */ -+/* { dg-options "-save-temps -O2 -march=armv8-a+tme" } */ -+ -+#include "arm_acle.h" -+ -+#define tcancel_reason 0x234 -+ -+unsigned -+check_tme (void) -+{ -+ unsigned status = __tstart (); -+ if (status == 0) -+ { -+ if (__ttest () == 2) -+ { -+ __tcancel (tcancel_reason & _TMFAILURE_REASON); -+ return tcancel_reason; -+ } -+ -+ __tcommit (); -+ return 0; -+ } -+ else if (status & _TMFAILURE_NEST) -+ return _TMFAILURE_NEST; -+ else if (status & _TMFAILURE_TRIVIAL) -+ return _TMFAILURE_TRIVIAL; -+} -+ -+/* { dg-final { scan-assembler "tstart\tx..?\n" } } */ -+/* { dg-final { scan-assembler "tcancel\t#564\n" } } */ -+/* { dg-final { scan-assembler "ttest\tx..?\n" } } */ -+/* { dg-final { scan-assembler "tcommit\n" } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_dup.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_dup.c -new file mode 100644 -index 000000000..c42c7acbb ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_dup.c -@@ -0,0 +1,85 @@ -+/* { dg-do assemble { target { aarch64*-*-* } } } */ -+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ -+/* { dg-options "-O2" } */ -+/* { dg-add-options arm_v8_2a_bf16_neon } */ -+/* { dg-additional-options "-save-temps" } */ -+ -+#include -+ -+float32x2_t test_vcreate (float32x2_t r, uint64_t a, uint64_t b) -+{ -+ bfloat16x4_t _a = vcreate_bf16(a); -+ bfloat16x4_t _b = vcreate_bf16(b); -+ -+ return vbfdot_f32 (r, _a, _b); -+} -+/* { dg-final { scan-assembler {bfdot\tv[0-9]+.2s, v[0-9]+.4h, v[0-9]+.4h} } } */ -+ -+bfloat16x4_t test_vset_lane_bf16 (bfloat16_t a, bfloat16x4_t b) -+{ -+ return vset_lane_bf16 (a, b, 3); -+} -+ -+bfloat16x8_t test_vsetq_lane_bf16 (bfloat16_t a, bfloat16x8_t b) -+{ -+ return vsetq_lane_bf16 (a, b, 7); -+} -+/* { dg-final { scan-assembler-times "ins\\t" 2 } } */ -+ -+bfloat16x4_t vdup_test (bfloat16_t a) -+{ -+ return vdup_n_bf16 (a); -+} -+/* { dg-final { scan-assembler "dup\\tv\[0-9\]+\.4h, v\[0-9\]+.h\\\[0\\\]" } } */ -+ -+bfloat16x8_t vdupq_test (bfloat16_t a) -+{ -+ return vdupq_n_bf16 (a); -+} -+ -+bfloat16x8_t test_vdupq_lane_bf16 (bfloat16x4_t a) -+{ -+ return vdupq_lane_bf16 (a, 1); -+} -+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, v\[0-9\]+.h\\\[0\\\]" 2 } } */ -+ -+bfloat16_t test_vget_lane_bf16 (bfloat16x4_t a) -+{ -+ return vget_lane_bf16 (a, 1); -+} -+/* { dg-final { scan-assembler-times "dup\\th\[0-9\]+, v\[0-9\]+\.h\\\[1\\\]" 2 } } */ -+ -+bfloat16x4_t test_vdup_lane_bf16 (bfloat16x4_t a) -+{ -+ return vdup_lane_bf16 (a, 1); -+} -+/* { dg-final { scan-assembler "dup\\tv\[0-9\]+\.4h, v\[0-9\]+\.h\\\[1\\\]" } } */ -+ -+bfloat16x4_t test_vdup_laneq_bf16 (bfloat16x8_t a) -+{ -+ return vdup_laneq_bf16 (a, 7); -+} -+/* { dg-final { scan-assembler "dup\\tv\[0-9\]+\.8h, v\[0-9\]+\.h\\\[7\\\]" } } */ -+ -+bfloat16x8_t test_vdupq_laneq_bf16 (bfloat16x8_t a) -+{ -+ return vdupq_laneq_bf16 (a, 5); -+} -+/* { dg-final { scan-assembler "dup\\tv\[0-9\]+\.8h, v\[0-9\]+\.h\\\[5\\\]" } } */ -+ -+bfloat16_t test_vduph_lane_bf16 (bfloat16x4_t a) -+{ -+ return vduph_lane_bf16 (a, 3); -+} -+/* { dg-final { scan-assembler "dup\\th\[0-9\]+, v\[0-9\]+\.h\\\[3\\\]" } } */ -+ -+bfloat16_t test_vgetq_lane_bf16 (bfloat16x8_t a) -+{ -+ return vgetq_lane_bf16 (a, 7); -+} -+ -+bfloat16_t test_vduph_laneq_bf16 (bfloat16x8_t a) -+{ -+ return vduph_laneq_bf16 (a, 7); -+} -+/* { dg-final { scan-assembler-times "dup\\th\[0-9\]+, v\[0-9\]+\.h\\\[7\\\]" 2 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_reinterpret.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_reinterpret.c -new file mode 100644 -index 000000000..f5adf40c6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_reinterpret.c -@@ -0,0 +1,466 @@ -+/* { dg-do assemble { target { aarch64*-*-* } } } */ -+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ -+/* { dg-add-options arm_v8_2a_bf16_neon } */ -+/* { dg-additional-options "-save-temps" } */ -+ -+#include -+ -+float32x2_t -+test_vbfdot_f32_s8 (float32x2_t r, int8x8_t a, int8x8_t b) -+{ -+ bfloat16x4_t _a = vreinterpret_bf16_s8(a); -+ bfloat16x4_t _b = vreinterpret_bf16_s8(b); -+ -+ return vbfdot_f32 (r, _a, _b); -+} -+ -+float32x2_t -+test_vbfdot_f32_s16 (float32x2_t r, int16x4_t a, int16x4_t b) -+{ -+ bfloat16x4_t _a = vreinterpret_bf16_s16(a); -+ bfloat16x4_t _b = vreinterpret_bf16_s16(b); -+ -+ return vbfdot_f32 (r, _a, _b); -+} -+ -+float32x2_t -+test_vbfdot_f32_s32 (float32x2_t r, int32x2_t a, int32x2_t b) -+{ -+ bfloat16x4_t _a = vreinterpret_bf16_s32(a); -+ bfloat16x4_t _b = vreinterpret_bf16_s32(b); -+ -+ return vbfdot_f32 (r, _a, _b); -+} -+ -+float32x2_t -+test_vbfdot_f32_s64 (float32x2_t r, int64x1_t a, int64x1_t b) -+{ -+ bfloat16x4_t _a = vreinterpret_bf16_s64(a); -+ bfloat16x4_t _b = vreinterpret_bf16_s64(b); -+ -+ return vbfdot_f32 (r, _a, _b); -+} -+ -+float32x2_t -+test_vbfdot_f32_u8 (float32x2_t r, uint8x8_t a, uint8x8_t b) -+{ -+ bfloat16x4_t _a = vreinterpret_bf16_u8(a); -+ bfloat16x4_t _b = vreinterpret_bf16_u8(b); -+ -+ return vbfdot_f32 (r, _a, _b); -+} -+ -+float32x2_t -+test_vbfdot_f32_u16 (float32x2_t r, uint16x4_t a, uint16x4_t b) -+{ -+ bfloat16x4_t _a = vreinterpret_bf16_u16(a); -+ bfloat16x4_t _b = vreinterpret_bf16_u16(b); -+ -+ return vbfdot_f32 (r, _a, _b); -+} -+ -+float32x2_t -+test_vbfdot_f32_u32 (float32x2_t r, uint32x2_t a, uint32x2_t b) -+{ -+ bfloat16x4_t _a = vreinterpret_bf16_u32(a); -+ bfloat16x4_t _b = vreinterpret_bf16_u32(b); -+ -+ return vbfdot_f32 (r, _a, _b); -+} -+ -+float32x2_t -+test_vbfdot_f32_u64 (float32x2_t r, uint64x1_t a, uint64x1_t b) -+{ -+ bfloat16x4_t _a = vreinterpret_bf16_u64(a); -+ bfloat16x4_t _b = vreinterpret_bf16_u64(b); -+ -+ return vbfdot_f32 (r, _a, _b); -+} -+ -+float32x2_t -+test_vbfdot_f32_p8 (float32x2_t r, poly8x8_t a, poly8x8_t b) -+{ -+ bfloat16x4_t _a = vreinterpret_bf16_p8(a); -+ bfloat16x4_t _b = vreinterpret_bf16_p8(b); -+ -+ return vbfdot_f32 (r, _a, _b); -+} -+ -+float32x2_t -+test_vbfdot_f32_p16 (float32x2_t r, poly16x4_t a, poly16x4_t b) -+{ -+ bfloat16x4_t _a = vreinterpret_bf16_p16(a); -+ bfloat16x4_t _b = vreinterpret_bf16_p16(b); -+ -+ return vbfdot_f32 (r, _a, _b); -+} -+ -+float32x2_t -+test_vbfdot_f32_p64 (float32x2_t r, poly64x1_t a, poly64x1_t b) -+{ -+ bfloat16x4_t _a = vreinterpret_bf16_p64(a); -+ bfloat16x4_t _b = vreinterpret_bf16_p64(b); -+ -+ return vbfdot_f32 (r, _a, _b); -+} -+ -+float32x2_t -+test_vbfdot_f32_f16 (float32x2_t r, float16x4_t a, float16x4_t b) -+{ -+ bfloat16x4_t _a = vreinterpret_bf16_f16(a); -+ bfloat16x4_t _b = vreinterpret_bf16_f16(b); -+ -+ return vbfdot_f32 (r, _a, _b); -+} -+ -+float32x2_t -+test_vbfdot_f32_f32 (float32x2_t r, float32x2_t a, float32x2_t b) -+{ -+ bfloat16x4_t _a = vreinterpret_bf16_f32(a); -+ bfloat16x4_t _b = vreinterpret_bf16_f32(b); -+ -+ return vbfdot_f32 (r, _a, _b); -+} -+ -+float32x2_t -+test_vbfdot_f32_f64 (float32x2_t r, float64x1_t a, float64x1_t b) -+{ -+ bfloat16x4_t _a = vreinterpret_bf16_f64(a); -+ bfloat16x4_t _b = vreinterpret_bf16_f64(b); -+ -+ return vbfdot_f32 (r, _a, _b); -+} -+ -+float32x4_t -+test_vbfdotq_f32_s8 (float32x4_t r, int8x16_t a, int8x16_t b) -+{ -+ bfloat16x8_t _a = vreinterpretq_bf16_s8(a); -+ bfloat16x8_t _b = vreinterpretq_bf16_s8(b); -+ -+ return vbfdotq_f32 (r, _a, _b); -+} -+ -+float32x4_t -+test_vbfdotq_f32_s16 (float32x4_t r, int16x8_t a, int16x8_t b) -+{ -+ bfloat16x8_t _a = vreinterpretq_bf16_s16(a); -+ bfloat16x8_t _b = vreinterpretq_bf16_s16(b); -+ -+ return vbfdotq_f32 (r, _a, _b); -+} -+ -+float32x4_t -+test_vbfdotq_f32_s32 (float32x4_t r, int32x4_t a, int32x4_t b) -+{ -+ bfloat16x8_t _a = vreinterpretq_bf16_s32(a); -+ bfloat16x8_t _b = vreinterpretq_bf16_s32(b); -+ -+ return vbfdotq_f32 (r, _a, _b); -+} -+ -+float32x4_t -+test_vbfdotq_f32_s64 (float32x4_t r, int64x2_t a, int64x2_t b) -+{ -+ bfloat16x8_t _a = vreinterpretq_bf16_s64(a); -+ bfloat16x8_t _b = vreinterpretq_bf16_s64(b); -+ -+ return vbfdotq_f32 (r, _a, _b); -+} -+ -+float32x4_t -+test_vbfdotq_f32_u8 (float32x4_t r, uint8x16_t a, uint8x16_t b) -+{ -+ bfloat16x8_t _a = vreinterpretq_bf16_u8(a); -+ bfloat16x8_t _b = vreinterpretq_bf16_u8(b); -+ -+ return vbfdotq_f32 (r, _a, _b); -+} -+ -+float32x4_t -+test_vbfdotq_f32_u16 (float32x4_t r, uint16x8_t a, uint16x8_t b) -+{ -+ bfloat16x8_t _a = vreinterpretq_bf16_u16(a); -+ bfloat16x8_t _b = vreinterpretq_bf16_u16(b); -+ -+ return vbfdotq_f32 (r, _a, _b); -+} -+ -+float32x4_t -+test_vbfdotq_f32_u32 (float32x4_t r, uint32x4_t a, uint32x4_t b) -+{ -+ bfloat16x8_t _a = vreinterpretq_bf16_u32(a); -+ bfloat16x8_t _b = vreinterpretq_bf16_u32(b); -+ -+ return vbfdotq_f32 (r, _a, _b); -+} -+ -+float32x4_t -+test_vbfdotq_f32_u64 (float32x4_t r, uint64x2_t a, uint64x2_t b) -+{ -+ bfloat16x8_t _a = vreinterpretq_bf16_u64(a); -+ bfloat16x8_t _b = vreinterpretq_bf16_u64(b); -+ -+ return vbfdotq_f32 (r, _a, _b); -+} -+ -+float32x4_t -+test_vbfdotq_f32_p8 (float32x4_t r, poly8x16_t a, poly8x16_t b) -+{ -+ bfloat16x8_t _a = vreinterpretq_bf16_p8(a); -+ bfloat16x8_t _b = vreinterpretq_bf16_p8(b); -+ -+ return vbfdotq_f32 (r, _a, _b); -+} -+ -+float32x4_t -+test_vbfdotq_f32_p16 (float32x4_t r, poly16x8_t a, poly16x8_t b) -+{ -+ bfloat16x8_t _a = vreinterpretq_bf16_p16(a); -+ bfloat16x8_t _b = vreinterpretq_bf16_p16(b); -+ -+ return vbfdotq_f32 (r, _a, _b); -+} -+ -+float32x4_t -+test_vbfdotq_f32_p64 (float32x4_t r, poly64x2_t a, poly64x2_t b) -+{ -+ bfloat16x8_t _a = vreinterpretq_bf16_p64(a); -+ bfloat16x8_t _b = vreinterpretq_bf16_p64(b); -+ -+ return vbfdotq_f32 (r, _a, _b); -+} -+ -+float32x4_t -+test_vbfdotq_f32_p128 (float32x4_t r, poly128_t a, poly128_t b) -+{ -+ bfloat16x8_t _a = vreinterpretq_bf16_p128(a); -+ bfloat16x8_t _b = vreinterpretq_bf16_p128(b); -+ -+ return vbfdotq_f32 (r, _a, _b); -+} -+ -+float32x4_t -+test_vbfdotq_f32_f16 (float32x4_t r, float16x8_t a, float16x8_t b) -+{ -+ bfloat16x8_t _a = vreinterpretq_bf16_f16(a); -+ bfloat16x8_t _b = vreinterpretq_bf16_f16(b); -+ -+ return vbfdotq_f32 (r, _a, _b); -+} -+ -+float32x4_t -+test_vbfdotq_f32_f32 (float32x4_t r, float32x4_t a, float32x4_t b) -+{ -+ bfloat16x8_t _a = vreinterpretq_bf16_f32(a); -+ bfloat16x8_t _b = vreinterpretq_bf16_f32(b); -+ -+ return vbfdotq_f32 (r, _a, _b); -+} -+ -+float32x4_t -+test_vbfdotq_f32_f64 (float32x4_t r, float64x2_t a, float64x2_t b) -+{ -+ bfloat16x8_t _a = vreinterpretq_bf16_f64(a); -+ bfloat16x8_t _b = vreinterpretq_bf16_f64(b); -+ -+ return vbfdotq_f32 (r, _a, _b); -+} -+ -+/* { dg-final { scan-assembler-times {bfdot\tv[0-9]+.2s, v[0-9]+.4h, v[0-9]+.4h} 14 } } */ -+/* { dg-final { scan-assembler-times {bfdot\tv[0-9]+.4s, v[0-9]+.8h, v[0-9]+.8h} 15 } } */ -+ -+int8x8_t test_vreinterpret_s8_bf16 (bfloat16x4_t a, int8x8_t b) -+{ -+ int8x8_t _a = vreinterpret_s8_bf16 (a); -+ return vadd_s8 (_a, b); -+} -+ -+int16x4_t test_vreinterpret_s16_bf16 (bfloat16x4_t a, int16x4_t b) -+{ -+ int16x4_t _a = vreinterpret_s16_bf16 (a); -+ return vadd_s16 (_a, b); -+} -+ -+int32x2_t test_vreinterpret_s32_bf16 (bfloat16x4_t a, int32x2_t b) -+{ -+ int32x2_t _a = vreinterpret_s32_bf16 (a); -+ return vadd_s32 (_a, b); -+} -+ -+int64x1_t test_vreinterpret_s64_bf16 (bfloat16x4_t a, int64x1_t b) -+{ -+ int64x1_t _a = vreinterpret_s64_bf16 (a); -+ return vrshl_s64 (_a, b); -+} -+ -+uint8x8_t test_vreinterpret_u8_bf16 (bfloat16x4_t a, uint8x8_t b) -+{ -+ uint8x8_t _a = vreinterpret_u8_bf16 (a); -+ return vadd_u8 (_a, b); -+} -+ -+uint16x4_t test_vreinterpret_u16_bf16 (bfloat16x4_t a, uint16x4_t b) -+{ -+ uint16x4_t _a = vreinterpret_u16_bf16 (a); -+ return vadd_u16 (_a, b); -+} -+ -+uint32x2_t test_vreinterpret_u32_bf16 (bfloat16x4_t a, uint32x2_t b) -+{ -+ uint32x2_t _a = vreinterpret_u32_bf16 (a); -+ return vadd_u32 (_a, b); -+} -+ -+uint64x1_t test_vreinterpret_u64_bf16 (bfloat16x4_t a, int64x1_t b) -+{ -+ uint64x1_t _a = vreinterpret_u64_bf16 (a); -+ return vrshl_u64 (_a, b); -+} -+ -+poly8x8_t test_vreinterpret_p8_bf16 (bfloat16x4_t a, poly8x8_t b) -+{ -+ poly8x8_t _a = vreinterpret_p8_bf16 (a); -+ return vzip1_p8 (_a, b); -+} -+ -+poly16x4_t test_vreinterpret_p16_bf16 (bfloat16x4_t a, poly16x4_t b) -+{ -+ poly16x4_t _a = vreinterpret_p16_bf16 (a); -+ return vzip1_p16 (_a, b); -+} -+ -+poly64x1_t test_vreinterpret_p64_bf16 (bfloat16x4_t a, poly64x1_t b) -+{ -+ poly64x1_t _a = vreinterpret_p64_bf16 (a); -+ return vsli_n_p64 (_a, b, 3); -+} -+ -+float32x2_t test_vreinterpret_f32_bf16 (bfloat16x4_t a, float32x2_t b) -+{ -+ float32x2_t _a = vreinterpret_f32_bf16 (a); -+ return vsub_f32 (_a, b); -+} -+ -+float64x1_t test_vreinterpret_f64_bf16 (bfloat16x4_t a, float64x1_t b) -+{ -+ float64x1_t _a = vreinterpret_f64_bf16 (a); -+ return vsub_f64 (_a, b); -+} -+ -+int8x16_t test_vreinterpretq_s8_bf16 (bfloat16x8_t a, int8x16_t b) -+{ -+ int8x16_t _a = vreinterpretq_s8_bf16 (a); -+ return vaddq_s8 (_a, b); -+} -+ -+int16x8_t test_vreinterpretq_s16_bf16 (bfloat16x8_t a, int16x8_t b) -+{ -+ int16x8_t _a = vreinterpretq_s16_bf16 (a); -+ return vaddq_s16 (_a, b); -+} -+ -+int32x4_t test_vreinterpretq_s32_bf16 (bfloat16x8_t a, int32x4_t b) -+{ -+ int32x4_t _a = vreinterpretq_s32_bf16 (a); -+ return vaddq_s32 (_a, b); -+} -+ -+int64x2_t test_vreinterpretq_s64_bf16 (bfloat16x8_t a, int64x2_t b) -+{ -+ int64x2_t _a = vreinterpretq_s64_bf16 (a); -+ return vaddq_s64 (_a, b); -+} -+ -+uint8x16_t test_vreinterpretq_u8_bf16 (bfloat16x8_t a, uint8x16_t b) -+{ -+ uint8x16_t _a = vreinterpretq_u8_bf16 (a); -+ return vaddq_u8 (_a, b); -+} -+ -+uint16x8_t test_vreinterpretq_u16_bf16 (bfloat16x8_t a, uint16x8_t b) -+{ -+ uint16x8_t _a = vreinterpretq_u16_bf16 (a); -+ return vaddq_u16 (_a, b); -+} -+ -+uint32x4_t test_vreinterpretq_u32_bf16 (bfloat16x8_t a, uint32x4_t b) -+{ -+ uint32x4_t _a = vreinterpretq_u32_bf16 (a); -+ return vaddq_u32 (_a, b); -+} -+ -+uint64x2_t test_vreinterpretq_u64_bf16 (bfloat16x8_t a, uint64x2_t b) -+{ -+ uint64x2_t _a = vreinterpretq_u64_bf16 (a); -+ return vaddq_u64 (_a, b); -+} -+ -+poly8x16_t test_vreinterpretq_p8_bf16 (bfloat16x8_t a, poly8x16_t b) -+{ -+ poly8x16_t _a = vreinterpretq_p8_bf16 (a); -+ return vzip1q_p8 (_a, b); -+} -+ -+poly16x8_t test_vreinterpretq_p16_bf16 (bfloat16x8_t a, poly16x8_t b) -+{ -+ poly16x8_t _a = vreinterpretq_p16_bf16 (a); -+ return vzip1q_p16 (_a, b); -+} -+ -+poly64x2_t test_vreinterpretq_p64_bf16 (bfloat16x8_t a, poly64x2_t b) -+{ -+ poly64x2_t _a = vreinterpretq_p64_bf16 (a); -+ return vsliq_n_p64 (_a, b, 3); -+} -+ -+poly128_t test_vreinterpretq_p128_bf16 (bfloat16x8_t a, poly16x8_t b) -+{ -+ poly128_t _a = vreinterpretq_p128_bf16 (a); -+ return _a; -+} -+ -+float32x4_t test_vreinterpretq_f32_bf16 (bfloat16x8_t a, float32x4_t b) -+{ -+ float32x4_t _a = vreinterpretq_f32_bf16 (a); -+ return vsubq_f32 (_a, b); -+} -+ -+float64x2_t test_vreinterpretq_f64_bf16 (bfloat16x8_t a, float64x2_t b) -+{ -+ float64x2_t _a = vreinterpretq_f64_bf16 (a); -+ return vsubq_f64 (_a, b); -+} -+ -+float16x4_t test_vreinterpret_f16_bf16 (bfloat16x4_t a) -+{ -+ return vreinterpret_f16_bf16 (a); -+} -+ -+float16x8_t test_vreinterpretq_f16_bf16 (bfloat16x8_t a) -+{ -+ return vreinterpretq_f16_bf16 (a); -+} -+ -+/* { dg-final { scan-assembler-times {add\tv[0-9]+.2s, v[0-9]+.2s, v[0-9]+.2s} 2 } } */ -+/* { dg-final { scan-assembler-times {add\tv[0-9]+.4h, v[0-9]+.4h, v[0-9]+.4h} 2 } } */ -+/* { dg-final { scan-assembler-times {add\tv[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b} 2 } } */ -+ -+/* { dg-final { scan-assembler-times {add\tv[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s} 2 } } */ -+/* { dg-final { scan-assembler-times {add\tv[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h} 2 } } */ -+/* { dg-final { scan-assembler-times {add\tv[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b} 2 } } */ -+ -+/* { dg-final { scan-assembler {fsub\tv[0-9]+.2s, v[0-9]+.2s, v[0-9]+.2s} } } */ -+/* { dg-final { scan-assembler {fsub\tv[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s} } } */ -+/* { dg-final { scan-assembler {fsub\tv[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d} } } */ -+/* { dg-final { scan-assembler {fsub\td[0-9]+, d[0-9]+, d[0-9]+} } } */ -+ -+/* { dg-final { scan-assembler {zip1\tv[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b} } } */ -+/* { dg-final { scan-assembler {zip1\tv[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b} } } */ -+/* { dg-final { scan-assembler {zip1\tv[0-9]+.4h, v[0-9]+.4h, v[0-9]+.4h} } } */ -+/* { dg-final { scan-assembler {zip1\tv[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h} } } */ -+ -+/* { dg-final { scan-assembler {sli\tv[0-9]+.2d, v[0-9]+.2d, 3} } } */ -+/* { dg-final { scan-assembler {sli\td[0-9]+, d[0-9]+, 3} } } */ -+ -+/* { dg-final { scan-assembler {urshl\td[0-9]+, d[0-9]+, d[0-9]+} } } */ -+/* { dg-final { scan-assembler {srshl\td[0-9]+, d[0-9]+, d[0-9]+} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_vldn.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_vldn.c -new file mode 100644 -index 000000000..cf245091a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_vldn.c -@@ -0,0 +1,150 @@ -+/* { dg-do assemble { target { aarch64*-*-* } } } */ -+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ -+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ -+/* { dg-add-options arm_v8_2a_bf16_neon } */ -+ -+#include -+ -+bfloat16x4_t -+test_vld1_dup_bf16 (bfloat16_t * ptr) -+{ -+ return vld1_dup_bf16 (ptr); -+} -+ -+bfloat16x8_t -+test_vld1q_dup_bf16 (bfloat16_t * ptr) -+{ -+ return vld1q_dup_bf16 (ptr); -+} -+ -+bfloat16x4_t -+test_vld1_lane_bf16 (bfloat16_t * ptr, bfloat16x4_t src) -+{ -+ return vld1_lane_bf16 (ptr, src, 3); -+} -+ -+bfloat16x8_t -+test_vld1q_lane_bf16 (bfloat16_t * ptr, bfloat16x8_t src) -+{ -+ return vld1q_lane_bf16 (ptr, src, 7); -+} -+ -+bfloat16x4_t -+test_vld1_bf16 (bfloat16_t * ptr) -+{ -+ return vld1_bf16 (ptr); -+} -+ -+bfloat16x8_t -+test_vld1q_bf16 (bfloat16_t * ptr) -+{ -+ return vld1q_bf16 (ptr); -+} -+ -+bfloat16x4x2_t -+test_vld1_bf16_x2 (bfloat16_t * ptr) -+{ -+ return vld1_bf16_x2 (ptr); -+} -+ -+bfloat16x8x2_t -+test_vld1q_bf16_x2 (bfloat16_t * ptr) -+{ -+ return vld1q_bf16_x2 (ptr); -+} -+ -+bfloat16x4x3_t -+test_vld1_bf16_x3 (bfloat16_t * ptr) -+{ -+ return vld1_bf16_x3 (ptr); -+} -+ -+bfloat16x8x3_t -+test_vld1q_bf16_x3 (bfloat16_t * ptr) -+{ -+ return vld1q_bf16_x3 (ptr); -+} -+ -+bfloat16x4x4_t -+test_vld1_bf16_x4 (bfloat16_t * ptr) -+{ -+ return vld1_bf16_x4 (ptr); -+} -+ -+bfloat16x8x4_t -+test_vld1q_bf16_x4 (bfloat16_t * ptr) -+{ -+ return vld1q_bf16_x4 (ptr); -+} -+ -+bfloat16x4x2_t -+test_vld2_bf16 (bfloat16_t * ptr) -+{ -+ return vld2_bf16 (ptr); -+} -+ -+bfloat16x8x2_t -+test_vld2q_bf16 (bfloat16_t * ptr) -+{ -+ return vld2q_bf16 (ptr); -+} -+ -+bfloat16x4x2_t -+test_vld2_dup_bf16 (bfloat16_t * ptr) -+{ -+ return vld2_dup_bf16 (ptr); -+} -+ -+bfloat16x8x2_t -+test_vld2q_dup_bf16 (bfloat16_t * ptr) -+{ -+ return vld2q_dup_bf16 (ptr); -+} -+ -+bfloat16x4x3_t -+test_vld3_bf16 (bfloat16_t * ptr) -+{ -+ return vld3_bf16 (ptr); -+} -+ -+bfloat16x8x3_t -+test_vld3q_bf16 (bfloat16_t * ptr) -+{ -+ return vld3q_bf16 (ptr); -+} -+ -+bfloat16x4x3_t -+test_vld3_dup_bf16 (bfloat16_t * ptr) -+{ -+ return vld3_dup_bf16 (ptr); -+} -+ -+bfloat16x8x3_t -+test_vld3q_dup_bf16 (bfloat16_t * ptr) -+{ -+ return vld3q_dup_bf16 (ptr); -+} -+ -+bfloat16x4x4_t -+test_vld4_bf16 (bfloat16_t * ptr) -+{ -+ return vld4_bf16 (ptr); -+} -+ -+bfloat16x8x4_t -+test_vld4q_bf16 (bfloat16_t * ptr) -+{ -+ return vld4q_bf16 (ptr); -+} -+ -+bfloat16x4x4_t -+test_vld4_dup_bf16 (bfloat16_t * ptr) -+{ -+ return vld4_dup_bf16 (ptr); -+} -+ -+bfloat16x8x4_t -+test_vld4q_dup_bf16 (bfloat16_t * ptr) -+{ -+ return vld4q_dup_bf16 (ptr); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_vstn.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_vstn.c -new file mode 100644 -index 000000000..162b3ee36 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_vstn.c -@@ -0,0 +1,107 @@ -+/* { dg-do assemble { target { aarch64*-*-* } } } */ -+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ -+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ -+/* { dg-add-options arm_v8_2a_bf16_neon } */ -+ -+#include -+ -+void -+test_vst1_bf16_x2 (bfloat16_t *ptr, bfloat16x4x2_t val) -+{ -+ vst1_bf16_x2 (ptr, val); -+} -+ -+void -+test_vst1q_bf16_x2 (bfloat16_t *ptr, bfloat16x8x2_t val) -+{ -+ vst1q_bf16_x2 (ptr, val); -+} -+ -+void -+test_vst1_bf16_x3 (bfloat16_t *ptr, bfloat16x4x3_t val) -+{ -+ vst1_bf16_x3 (ptr, val); -+} -+ -+void -+test_vst1q_bf16_x3 (bfloat16_t *ptr, bfloat16x8x3_t val) -+{ -+ vst1q_bf16_x3 (ptr, val); -+} -+ -+void -+test_vst1_bf16_x4 (bfloat16_t *ptr, bfloat16x4x4_t val) -+{ -+ vst1_bf16_x4 (ptr, val); -+} -+ -+void -+test_vst1q_bf16_x4 (bfloat16_t *ptr, bfloat16x8x4_t val) -+{ -+ vst1q_bf16_x4 (ptr, val); -+} -+ -+void -+test_vst1_lane_bf16 (bfloat16_t *ptr, bfloat16x4_t val) -+{ -+ vst1_lane_bf16 (ptr, val, 3); -+} -+ -+void -+test_vst1q_lane_bf16 (bfloat16_t *ptr, bfloat16x8_t val) -+{ -+ vst1q_lane_bf16 (ptr, val, 7); -+} -+ -+void -+test_vst1_bf16 (bfloat16_t *ptr, bfloat16x4_t val) -+{ -+ vst1_bf16 (ptr, val); -+} -+ -+void -+test_vst1q_bf16 (bfloat16_t *ptr, bfloat16x8_t val) -+{ -+ vst1q_bf16 (ptr, val); -+} -+ -+void -+test_vst2_bf16 (bfloat16_t *ptr, bfloat16x4x2_t val) -+{ -+ vst2_bf16 (ptr, val); -+} -+ -+void -+test_vst2q_bf16 (bfloat16_t *ptr, bfloat16x8x2_t val) -+{ -+ vst2q_bf16 (ptr, val); -+} -+ -+void -+test_vst3_bf16 (bfloat16_t *ptr, bfloat16x4x3_t val) -+{ -+ vst3_bf16 (ptr, val); -+} -+ -+void -+test_vst3q_bf16 (bfloat16_t *ptr, bfloat16x8x3_t val) -+{ -+ vst3q_bf16 (ptr, val); -+} -+ -+void -+test_vst4_bf16 (bfloat16_t *ptr, bfloat16x4x4_t val) -+{ -+ vst4_bf16 (ptr, val); -+} -+ -+void -+test_vst4q_bf16 (bfloat16_t *ptr, bfloat16x8x4_t val) -+{ -+ vst4q_bf16 (ptr, val); -+} -+ -+int main() -+{ -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c -new file mode 100644 -index 000000000..bbea630b1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-compile.c -@@ -0,0 +1,48 @@ -+/* { dg-do assemble { target { aarch64*-*-* } } } */ -+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ -+/* { dg-add-options arm_v8_2a_bf16_neon } */ -+/* { dg-additional-options "-save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */ -+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ -+ -+#include -+ -+/* -+**test_bfcvtn: -+** bfcvtn v0.4h, v0.4s -+** ret -+*/ -+bfloat16x4_t test_bfcvtn (float32x4_t a) -+{ -+ return vcvt_bf16_f32 (a); -+} -+ -+/* -+**test_bfcvtnq: -+** bfcvtn v0.4h, v0.4s -+** ret -+*/ -+bfloat16x8_t test_bfcvtnq (float32x4_t a) -+{ -+ return vcvtq_low_bf16_f32 (a); -+} -+ -+/* -+**test_bfcvtnq2: -+** bfcvtn2 v0.8h, v1.4s -+** ret -+*/ -+bfloat16x8_t test_bfcvtnq2 (bfloat16x8_t inactive, float32x4_t a) -+{ -+ return vcvtq_high_bf16_f32 (inactive, a); -+} -+ -+/* -+**test_bfcvt: -+** bfcvt h0, s0 -+** ret -+*/ -+bfloat16_t test_bfcvt (float32_t a) -+{ -+ return vcvth_bf16_f32 (a); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nobf16.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nobf16.c -new file mode 100644 -index 000000000..9904d65f9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nobf16.c -@@ -0,0 +1,10 @@ -+/* { dg-do assemble { target { aarch64*-*-* } } } */ -+/* { dg-additional-options "-march=armv8.2-a+nobf16" } */ -+ -+#include -+ -+bfloat16_t test_bfcvt (float32_t a) -+{ -+ /* { dg-error "inlining failed .* 'vcvth_bf16_f32" "" { target *-*-* } 0 } */ -+ return vcvth_bf16_f32 (a); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c -new file mode 100644 -index 000000000..a91468093 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvt-nosimd.c -@@ -0,0 +1,17 @@ -+/* { dg-do assemble { target { aarch64*-*-* } } } */ -+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ -+/* { dg-require-effective-target aarch64_asm_bf16_ok } */ -+/* { dg-additional-options "-save-temps -march=armv8.2-a+bf16+nosimd" } */ -+/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */ -+ -+#include -+ -+/* -+**test_bfcvt: -+** bfcvt h0, s0 -+** ret -+*/ -+bfloat16_t test_bfcvt (float32_t a) -+{ -+ return vcvth_bf16_f32 (a); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvtn-nobf16.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvtn-nobf16.c -new file mode 100644 -index 000000000..b3b6db123 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvtn-nobf16.c -@@ -0,0 +1,10 @@ -+/* { dg-do assemble { target { aarch64*-*-* } } } */ -+/* { dg-additional-options "-march=armv8.2-a+nobf16" } */ -+ -+#include -+ -+bfloat16x4_t test_bfcvtn (float32x4_t a) -+{ -+ /* { dg-error "inlining failed .* 'vcvt_bf16_f32" "" { target *-*-* } 0 } */ -+ return vcvt_bf16_f32 (a); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvtnq2-untied.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvtnq2-untied.c -new file mode 100644 -index 000000000..4b730e39d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfcvtnq2-untied.c -@@ -0,0 +1,20 @@ -+/* { dg-do assemble { target { aarch64*-*-* } } } */ -+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ -+/* { dg-add-options arm_v8_2a_bf16_neon } */ -+/* { dg-additional-options "-save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */ -+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ -+ -+#include -+ -+/* -+**test_bfcvtnq2_untied: -+** mov v0.16b, v1.16b -+** bfcvtn2 v0.8h, v2.4s -+** ret -+*/ -+bfloat16x8_t test_bfcvtnq2_untied (bfloat16x8_t unused, bfloat16x8_t inactive, -+ float32x4_t a) -+{ -+ return vcvtq_high_bf16_f32 (inactive, a); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfdot-1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfdot-1.c -new file mode 100755 -index 000000000..ad5150773 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfdot-1.c -@@ -0,0 +1,91 @@ -+/* { dg-do assemble { target { aarch64*-*-* } } } */ -+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ -+/* { dg-add-options arm_v8_2a_bf16_neon } */ -+/* { dg-additional-options "-save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */ -+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ -+ -+#include -+ -+/* -+**ufoo: -+** bfdot v0.2s, (v1.4h, v2.4h|v2.4h, v1.4h) -+** ret -+*/ -+float32x2_t ufoo(float32x2_t r, bfloat16x4_t x, bfloat16x4_t y) -+{ -+ return vbfdot_f32 (r, x, y); -+} -+ -+/* -+**ufooq: -+** bfdot v0.4s, (v1.8h, v2.8h|v2.8h, v1.8h) -+** ret -+*/ -+float32x4_t ufooq(float32x4_t r, bfloat16x8_t x, bfloat16x8_t y) -+{ -+ return vbfdotq_f32 (r, x, y); -+} -+ -+/* -+**ufoo_lane: -+** bfdot v0.2s, v1.4h, v2.2h\[0\] -+** ret -+*/ -+float32x2_t ufoo_lane(float32x2_t r, bfloat16x4_t x, bfloat16x4_t y) -+{ -+ return vbfdot_lane_f32 (r, x, y, 0); -+} -+ -+/* -+**ufooq_laneq: -+** bfdot v0.4s, v1.8h, v2.2h\[2\] -+** ret -+*/ -+float32x4_t ufooq_laneq(float32x4_t r, bfloat16x8_t x, bfloat16x8_t y) -+{ -+ return vbfdotq_laneq_f32 (r, x, y, 2); -+} -+ -+/* -+**ufoo_laneq: -+** bfdot v0.2s, v1.4h, v2.2h\[3\] -+** ret -+*/ -+float32x2_t ufoo_laneq(float32x2_t r, bfloat16x4_t x, bfloat16x8_t y) -+{ -+ return vbfdot_laneq_f32 (r, x, y, 3); -+} -+ -+/* -+**ufooq_lane: -+** bfdot v0.4s, v1.8h, v2.2h\[1\] -+** ret -+*/ -+float32x4_t ufooq_lane(float32x4_t r, bfloat16x8_t x, bfloat16x4_t y) -+{ -+ return vbfdotq_lane_f32 (r, x, y, 1); -+} -+ -+/* -+**ufoo_untied: -+** mov v0.8b, v1.8b -+** bfdot v0.2s, (v2.4h, v3.4h|v3.4h, v2.4h) -+** ret -+*/ -+float32x2_t ufoo_untied(float32x4_t unused, float32x2_t r, bfloat16x4_t x, bfloat16x4_t y) -+{ -+ return vbfdot_f32 (r, x, y); -+} -+ -+/* -+**ufooq_lane_untied: -+** mov v0.16b, v1.16b -+** bfdot v0.4s, v2.8h, v3.2h\[1\] -+** ret -+*/ -+float32x4_t ufooq_lane_untied(float32x4_t unused, float32x4_t r, bfloat16x8_t x, bfloat16x4_t y) -+{ -+ return vbfdotq_lane_f32 (r, x, y, 1); -+} -+ -diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c -new file mode 100755 -index 000000000..58bdee5ac ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfdot-2.c -@@ -0,0 +1,91 @@ -+/* { dg-do assemble { target { aarch64*-*-* } } } */ -+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ -+/* { dg-add-options arm_v8_2a_bf16_neon } */ -+/* { dg-additional-options "-mbig-endian --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */ -+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ -+ -+#include -+ -+/* -+**ufoo: -+** bfdot v0.2s, (v1.4h, v2.4h|v2.4h, v1.4h) -+** ret -+*/ -+float32x2_t ufoo(float32x2_t r, bfloat16x4_t x, bfloat16x4_t y) -+{ -+ return vbfdot_f32 (r, x, y); -+} -+ -+/* -+**ufooq: -+** bfdot v0.4s, (v1.8h, v2.8h|v2.8h, v1.8h) -+** ret -+*/ -+float32x4_t ufooq(float32x4_t r, bfloat16x8_t x, bfloat16x8_t y) -+{ -+ return vbfdotq_f32 (r, x, y); -+} -+ -+/* -+**ufoo_lane: -+** bfdot v0.2s, v1.4h, v2.2h\[0\] -+** ret -+*/ -+float32x2_t ufoo_lane(float32x2_t r, bfloat16x4_t x, bfloat16x4_t y) -+{ -+ return vbfdot_lane_f32 (r, x, y, 0); -+} -+ -+/* -+**ufooq_laneq: -+** bfdot v0.4s, v1.8h, v2.2h\[2\] -+** ret -+*/ -+float32x4_t ufooq_laneq(float32x4_t r, bfloat16x8_t x, bfloat16x8_t y) -+{ -+ return vbfdotq_laneq_f32 (r, x, y, 2); -+} -+ -+/* -+**ufoo_laneq: -+** bfdot v0.2s, v1.4h, v2.2h\[3\] -+** ret -+*/ -+float32x2_t ufoo_laneq(float32x2_t r, bfloat16x4_t x, bfloat16x8_t y) -+{ -+ return vbfdot_laneq_f32 (r, x, y, 3); -+} -+ -+/* -+**ufooq_lane: -+** bfdot v0.4s, v1.8h, v2.2h\[1\] -+** ret -+*/ -+float32x4_t ufooq_lane(float32x4_t r, bfloat16x8_t x, bfloat16x4_t y) -+{ -+ return vbfdotq_lane_f32 (r, x, y, 1); -+} -+ -+/* -+**ufoo_untied: -+** mov v0.8b, v1.8b -+** bfdot v0.2s, (v2.4h, v3.4h|v3.4h, v2.4h) -+** ret -+*/ -+float32x2_t ufoo_untied(float32x4_t unused, float32x2_t r, bfloat16x4_t x, bfloat16x4_t y) -+{ -+ return vbfdot_f32 (r, x, y); -+} -+ -+/* -+**ufooq_lane_untied: -+** mov v0.16b, v1.16b -+** bfdot v0.4s, v2.8h, v3.2h\[1\] -+** ret -+*/ -+float32x4_t ufooq_lane_untied(float32x4_t unused, float32x4_t r, bfloat16x8_t x, bfloat16x4_t y) -+{ -+ return vbfdotq_lane_f32 (r, x, y, 1); -+} -+ -diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfdot-3.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfdot-3.c -new file mode 100755 -index 000000000..607126203 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfdot-3.c -@@ -0,0 +1,28 @@ -+/* { dg-do assemble { target { aarch64*-*-* } } } */ -+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ -+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ -+/* { dg-add-options arm_v8_2a_bf16_neon } */ -+/* { dg-additional-options "--save-temps" } */ -+ -+#include -+ -+float32x2_t ufoo_lane(float32x2_t r, bfloat16x4_t x, bfloat16x4_t y) -+{ -+ return vbfdot_lane_f32 (r, x, y, -1); /* { dg-error {lane -1 out of range 0 - 1} "" { target *-*-* } 0 } */ -+} -+ -+float32x4_t ufooq_laneq(float32x4_t r, bfloat16x8_t x, bfloat16x8_t y) -+{ -+ return vbfdotq_laneq_f32 (r, x, y, -1); /* { dg-error {lane -1 out of range 0 - 3} "" { target *-*-* } 0 } */ -+} -+ -+float32x2_t ufoo_laneq(float32x2_t r, bfloat16x4_t x, bfloat16x8_t y) -+{ -+ return vbfdot_laneq_f32 (r, x, y, 4); /* { dg-error {lane 4 out of range 0 - 3} "" { target *-*-* } 0 } */ -+} -+ -+float32x4_t ufooq_lane(float32x4_t r, bfloat16x8_t x, bfloat16x4_t y) -+{ -+ return vbfdotq_lane_f32 (r, x, y, 2); /* { dg-error {lane 2 out of range 0 - 1} "" { target *-*-* } 0 } */ -+} -+ -diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfmlalbt-compile.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfmlalbt-compile.c -new file mode 100644 -index 000000000..9810e4ba3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfmlalbt-compile.c -@@ -0,0 +1,67 @@ -+/* { dg-do assemble { target { aarch64*-*-* } } } */ -+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ -+/* { dg-add-options arm_v8_2a_bf16_neon } */ -+/* { dg-additional-options "-save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include -+ -+/* -+**test_bfmlalb: -+** bfmlalb v0.4s, v1.8h, v2.8h -+** ret -+*/ -+float32x4_t test_bfmlalb (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) -+{ -+ return vbfmlalbq_f32 (r, a, b); -+} -+ -+/* -+**test_bfmlalt: -+** bfmlalt v0.4s, v1.8h, v2.8h -+** ret -+*/ -+float32x4_t test_bfmlalt (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) -+{ -+ return vbfmlaltq_f32 (r, a, b); -+} -+ -+/* -+**test_bfmlalb_lane: -+** bfmlalb v0.4s, v1.8h, v2.h[0] -+** ret -+*/ -+float32x4_t test_bfmlalb_lane (float32x4_t r, bfloat16x8_t a, bfloat16x4_t b) -+{ -+ return vbfmlalbq_lane_f32 (r, a, b, 0); -+} -+ -+/* -+**test_bfmlalt_lane: -+** bfmlalt v0.4s, v1.8h, v2.h[2] -+** ret -+*/ -+float32x4_t test_bfmlalt_lane (float32x4_t r, bfloat16x8_t a, bfloat16x4_t b) -+{ -+ return vbfmlaltq_lane_f32 (r, a, b, 2); -+} -+ -+/* -+**test_bfmlalb_laneq: -+** bfmlalb v0.4s, v1.8h, v2.h[4] -+** ret -+*/ -+float32x4_t test_bfmlalb_laneq (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) -+{ -+ return vbfmlalbq_laneq_f32 (r, a, b, 4); -+} -+ -+/* -+**test_bfmlalt_laneq: -+** bfmlalt v0.4s, v1.8h, v2.h[7] -+** ret -+*/ -+float32x4_t test_bfmlalt_laneq (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) -+{ -+ return vbfmlaltq_laneq_f32 (r, a, b, 7); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfmmla-compile.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfmmla-compile.c -new file mode 100644 -index 000000000..0aaa69f00 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bfmmla-compile.c -@@ -0,0 +1,18 @@ -+/* { dg-do assemble { target { aarch64*-*-* } } } */ -+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ -+/* { dg-add-options arm_v8_2a_bf16_neon } */ -+/* { dg-additional-options "-save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include -+ -+ -+/* -+**test_bfmmla: -+** bfmmla v0.4s, v1.8h, v2.8h -+** ret -+*/ -+float32x4_t test_bfmmla (float32x4_t r, bfloat16x8_t x, bfloat16x8_t y) -+{ -+ return vbfmmlaq_f32 (r, x, y); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbfmlalbt_lane_f32_indices_1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbfmlalbt_lane_f32_indices_1.c -new file mode 100644 -index 000000000..4d50ba3a3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbfmlalbt_lane_f32_indices_1.c -@@ -0,0 +1,46 @@ -+/* { dg-do compile { target { aarch64*-*-* } } } */ -+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ -+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ -+/* { dg-add-options arm_v8_2a_bf16_neon } */ -+ -+#include -+ -+void -+f_vbfmlaltq_lane_f32 (float32x4_t r, bfloat16x8_t a, bfloat16x4_t b) -+{ -+ /* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */ -+ vbfmlaltq_lane_f32 (r, a, b, -1); -+ /* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */ -+ vbfmlaltq_lane_f32 (r, a, b, 4); -+ return; -+} -+ -+void -+f_vbfmlaltq_laneq_f32 (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) -+{ -+ /* { dg-error "lane -1 out of range 0 - 7" "" { target *-*-* } 0 } */ -+ vbfmlaltq_laneq_f32 (r, a, b, -1); -+ /* { dg-error "lane 8 out of range 0 - 7" "" { target *-*-* } 0 } */ -+ vbfmlaltq_laneq_f32 (r, a, b, 8); -+ return; -+} -+ -+void -+f_vbfmlalbq_lane_f32 (float32x4_t r, bfloat16x8_t a, bfloat16x4_t b) -+{ -+ /* { dg-error "lane -2 out of range 0 - 3" "" { target *-*-* } 0 } */ -+ vbfmlalbq_lane_f32 (r, a, b, -2); -+ /* { dg-error "lane 5 out of range 0 - 3" "" { target *-*-* } 0 } */ -+ vbfmlalbq_lane_f32 (r, a, b, 5); -+ return; -+} -+ -+void -+f_vbfmlalbq_laneq_f32 (float32x4_t r, bfloat16x8_t a, bfloat16x8_t b) -+{ -+ /* { dg-error "lane -2 out of range 0 - 7" "" { target *-*-* } 0 } */ -+ vbfmlalbq_laneq_f32 (r, a, b, -2); -+ /* { dg-error "lane 9 out of range 0 - 7" "" { target *-*-* } 0 } */ -+ vbfmlalbq_laneq_f32 (r, a, b, 9); -+ return; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-1.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-1.c -new file mode 100755 -index 000000000..ac4f821e7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-1.c -@@ -0,0 +1,136 @@ -+/* { dg-do assemble { target { aarch64*-*-* } } } */ -+/* { dg-require-effective-target arm_v8_2a_i8mm_ok } */ -+/* { dg-add-options arm_v8_2a_i8mm } */ -+/* { dg-additional-options "-save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */ -+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ -+ -+#include -+ -+/* Unsigned-Signed Dot Product instructions. */ -+ -+/* -+**ufoo: -+** usdot v0\.2s, v1\.8b, v2\.8b -+** ret -+*/ -+int32x2_t ufoo (int32x2_t r, uint8x8_t x, int8x8_t y) -+{ -+ return vusdot_s32 (r, x, y); -+} -+ -+/* -+**ufooq: -+** usdot v0\.4s, v1\.16b, v2\.16b -+** ret -+*/ -+int32x4_t ufooq (int32x4_t r, uint8x16_t x, int8x16_t y) -+{ -+ return vusdotq_s32 (r, x, y); -+} -+ -+/* -+**ufoo_lane: -+** usdot v0\.2s, v1\.8b, v2\.4b\[0\] -+** ret -+*/ -+int32x2_t ufoo_lane (int32x2_t r, uint8x8_t x, int8x8_t y) -+{ -+ return vusdot_lane_s32 (r, x, y, 0); -+} -+ -+/* -+**ufoo_laneq: -+** usdot v0\.2s, v1\.8b, v2\.4b\[2\] -+** ret -+*/ -+int32x2_t ufoo_laneq (int32x2_t r, uint8x8_t x, int8x16_t y) -+{ -+ return vusdot_laneq_s32 (r, x, y, 2); -+} -+ -+/* -+**ufooq_lane: -+** usdot v0\.4s, v1\.16b, v2\.4b\[1\] -+** ret -+*/ -+int32x4_t ufooq_lane (int32x4_t r, uint8x16_t x, int8x8_t y) -+{ -+ return vusdotq_lane_s32 (r, x, y, 1); -+} -+ -+/* -+**ufooq_laneq: -+** usdot v0\.4s, v1\.16b, v2\.4b\[3\] -+** ret -+*/ -+int32x4_t ufooq_laneq (int32x4_t r, uint8x16_t x, int8x16_t y) -+{ -+ return vusdotq_laneq_s32 (r, x, y, 3); -+} -+ -+ -+/* Signed-Unsigned Dot Product instructions. */ -+ -+/* -+**sfoo_lane: -+** sudot v0\.2s, v1\.8b, v2\.4b\[0\] -+** ret -+*/ -+int32x2_t sfoo_lane (int32x2_t r, int8x8_t x, uint8x8_t y) -+{ -+ return vsudot_lane_s32 (r, x, y, 0); -+} -+ -+/* -+**sfoo_laneq: -+** sudot v0\.2s, v1\.8b, v2\.4b\[2\] -+** ret -+*/ -+int32x2_t sfoo_laneq (int32x2_t r, int8x8_t x, uint8x16_t y) -+{ -+ return vsudot_laneq_s32 (r, x, y, 2); -+} -+ -+/* -+**sfooq_lane: -+** sudot v0\.4s, v1\.16b, v2\.4b\[1\] -+** ret -+*/ -+int32x4_t sfooq_lane (int32x4_t r, int8x16_t x, uint8x8_t y) -+{ -+ return vsudotq_lane_s32 (r, x, y, 1); -+} -+ -+/* -+**sfooq_laneq: -+** sudot v0\.4s, v1\.16b, v2\.4b\[3\] -+** ret -+*/ -+int32x4_t sfooq_laneq (int32x4_t r, int8x16_t x, uint8x16_t y) -+{ -+ return vsudotq_laneq_s32 (r, x, y, 3); -+} -+ -+/* -+**ufoo_untied: -+** mov v0\.8b, v1\.8b -+** usdot v0\.2s, v2\.8b, v3\.8b -+** ret -+*/ -+int32x2_t ufoo_untied (int32x2_t unused, int32x2_t r, uint8x8_t x, int8x8_t y) -+{ -+ return vusdot_s32 (r, x, y); -+} -+ -+/* -+**ufooq_laneq_untied: -+** mov v0\.16b, v1\.16b -+** usdot v0\.4s, v2\.16b, v3\.4b\[3\] -+** ret -+*/ -+int32x4_t ufooq_laneq_untied (int32x2_t unused, int32x4_t r, uint8x16_t x, int8x16_t y) -+{ -+ return vusdotq_laneq_s32 (r, x, y, 3); -+} -+ -diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c -new file mode 100755 -index 000000000..96bca2356 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-2.c -@@ -0,0 +1,137 @@ -+/* { dg-do assemble { target { aarch64*-*-* } } } */ -+/* { dg-require-effective-target arm_v8_2a_i8mm_ok } */ -+/* { dg-add-options arm_v8_2a_i8mm } */ -+/* { dg-additional-options "-mbig-endian -save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */ -+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ -+ -+#include -+ -+/* Unsigned-Signed Dot Product instructions. */ -+ -+/* -+**ufoo: -+** usdot v0\.2s, v1\.8b, v2\.8b -+** ret -+*/ -+int32x2_t ufoo (int32x2_t r, uint8x8_t x, int8x8_t y) -+{ -+ return vusdot_s32 (r, x, y); -+} -+ -+/* -+**ufooq: -+** usdot v0\.4s, v1\.16b, v2\.16b -+** ret -+*/ -+int32x4_t ufooq (int32x4_t r, uint8x16_t x, int8x16_t y) -+{ -+ return vusdotq_s32 (r, x, y); -+} -+ -+/* -+**ufoo_lane: -+** usdot v0\.2s, v1\.8b, v2\.4b\[0\] -+** ret -+*/ -+int32x2_t ufoo_lane (int32x2_t r, uint8x8_t x, int8x8_t y) -+{ -+ return vusdot_lane_s32 (r, x, y, 0); -+} -+ -+/* -+**ufoo_laneq: -+** usdot v0\.2s, v1\.8b, v2\.4b\[2\] -+** ret -+*/ -+int32x2_t ufoo_laneq (int32x2_t r, uint8x8_t x, int8x16_t y) -+{ -+ return vusdot_laneq_s32 (r, x, y, 2); -+} -+ -+/* -+**ufooq_lane: -+** usdot v0\.4s, v1\.16b, v2\.4b\[1\] -+** ret -+*/ -+int32x4_t ufooq_lane (int32x4_t r, uint8x16_t x, int8x8_t y) -+{ -+ return vusdotq_lane_s32 (r, x, y, 1); -+} -+ -+/* -+**ufooq_laneq: -+** usdot v0\.4s, v1\.16b, v2\.4b\[3\] -+** ret -+*/ -+int32x4_t ufooq_laneq (int32x4_t r, uint8x16_t x, int8x16_t y) -+{ -+ return vusdotq_laneq_s32 (r, x, y, 3); -+} -+ -+ -+/* Signed-Unsigned Dot Product instructions. */ -+ -+/* -+**sfoo_lane: -+** sudot v0\.2s, v1\.8b, v2\.4b\[0\] -+** ret -+*/ -+int32x2_t sfoo_lane (int32x2_t r, int8x8_t x, uint8x8_t y) -+{ -+ return vsudot_lane_s32 (r, x, y, 0); -+} -+ -+/* -+**sfoo_laneq: -+** sudot v0\.2s, v1\.8b, v2\.4b\[2\] -+** ret -+*/ -+int32x2_t sfoo_laneq (int32x2_t r, int8x8_t x, uint8x16_t y) -+{ -+ return vsudot_laneq_s32 (r, x, y, 2); -+} -+ -+/* -+**sfooq_lane: -+** sudot v0\.4s, v1\.16b, v2\.4b\[1\] -+** ret -+*/ -+int32x4_t sfooq_lane (int32x4_t r, int8x16_t x, uint8x8_t y) -+{ -+ return vsudotq_lane_s32 (r, x, y, 1); -+} -+ -+/* -+**sfooq_laneq: -+** sudot v0\.4s, v1\.16b, v2\.4b\[3\] -+** ret -+*/ -+int32x4_t sfooq_laneq (int32x4_t r, int8x16_t x, uint8x16_t y) -+{ -+ return vsudotq_laneq_s32 (r, x, y, 3); -+} -+ -+/* -+**ufoo_untied: -+** mov v0\.8b, v1\.8b -+** usdot v0\.2s, v2\.8b, v3\.8b -+** ret -+*/ -+int32x2_t ufoo_untied (int32x2_t unused, int32x2_t r, uint8x8_t x, int8x8_t y) -+{ -+ return vusdot_s32 (r, x, y); -+} -+ -+/* -+**ufooq_laneq_untied: -+** mov v0\.16b, v1\.16b -+** usdot v0\.4s, v2\.16b, v3\.4b\[3\] -+** ret -+*/ -+int32x4_t ufooq_laneq_untied (int32x2_t unused, int32x4_t r, uint8x16_t x, int8x16_t y) -+{ -+ return vusdotq_laneq_s32 (r, x, y, 3); -+} -+ -+ -diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-3.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-3.c -new file mode 100755 -index 000000000..18ecabef8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-3.c -@@ -0,0 +1,31 @@ -+/* { dg-do assemble { target { aarch64*-*-* } } } */ -+/* { dg-require-effective-target arm_v8_2a_i8mm_ok } */ -+/* { dg-add-options arm_v8_2a_i8mm } */ -+/* { dg-additional-options "--save-temps" } */ -+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ -+ -+#include -+ -+int32x2_t ufoo_lane (int32x2_t r, uint8x8_t x, int8x8_t y) -+{ -+ /* { dg-error "lane -1 out of range 0 - 1" "" { target *-*-* } 0 } */ -+ return vusdot_lane_s32 (r, x, y, -1); -+} -+ -+int32x2_t ufoo_laneq (int32x2_t r, uint8x8_t x, int8x16_t y) -+{ -+ /* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */ -+ return vusdot_laneq_s32 (r, x, y, -1); -+} -+ -+int32x4_t ufooq_lane (int32x4_t r, uint8x16_t x, int8x8_t y) -+{ -+ /* { dg-error "lane 2 out of range 0 - 1" "" { target *-*-* } 0 } */ -+ return vusdotq_lane_s32 (r, x, y, 2); -+} -+ -+int32x4_t ufooq_laneq (int32x4_t r, uint8x16_t x, int8x16_t y) -+{ -+ /* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */ -+ return vusdotq_laneq_s32 (r, x, y, 4); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-4.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-4.c -new file mode 100755 -index 000000000..66c87d486 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdot-3-4.c -@@ -0,0 +1,31 @@ -+/* { dg-do assemble { target { aarch64*-*-* } } } */ -+/* { dg-require-effective-target arm_v8_2a_i8mm_ok } */ -+/* { dg-add-options arm_v8_2a_i8mm } */ -+/* { dg-additional-options "--save-temps" } */ -+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ -+ -+#include -+ -+int32x2_t sfoo_lane (int32x2_t r, int8x8_t x, uint8x8_t y) -+{ -+ /* { dg-error "lane -1 out of range 0 - 1" "" { target *-*-* } 0 } */ -+ return vsudot_lane_s32 (r, x, y, -1); -+} -+ -+int32x2_t sfoo_laneq (int32x2_t r, int8x8_t x, uint8x16_t y) -+{ -+ /* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */ -+ return vsudot_laneq_s32 (r, x, y, -1); -+} -+ -+int32x4_t sfooq_lane (int32x4_t r, int8x16_t x, uint8x8_t y) -+{ -+ /* { dg-error "lane 2 out of range 0 - 1" "" { target *-*-* } 0 } */ -+ return vsudotq_lane_s32 (r, x, y, 2); -+} -+ -+int32x4_t sfooq_laneq (int32x4_t r, int8x16_t x, uint8x16_t y) -+{ -+ /* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */ -+ return vsudotq_laneq_s32 (r, x, y, 4); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1x4.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1x4.c -new file mode 100644 -index 000000000..451a0afc6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld1x4.c -@@ -0,0 +1,83 @@ -+/* We haven't implemented these intrinsics for arm yet. */ -+/* { dg-xfail-if "" { arm*-*-* } } */ -+/* { dg-do run } */ -+/* { dg-options "-O3" } */ -+ -+#include -+#include "arm-neon-ref.h" -+ -+extern void abort (void); -+ -+#define TESTMETH(BASE, ELTS, SUFFIX) \ -+int __attribute__ ((noinline)) \ -+test_vld1##SUFFIX##_x4 () \ -+{ \ -+ BASE##_t data[ELTS * 4]; \ -+ BASE##_t temp[ELTS * 4]; \ -+ BASE##x##ELTS##x##4##_t vectors; \ -+ int i,j; \ -+ for (i = 0; i < ELTS * 4; i++) \ -+ data [i] = (BASE##_t) 4*i; \ -+ asm volatile ("" : : : "memory"); \ -+ vectors = vld1##SUFFIX##_x4 (data); \ -+ vst1##SUFFIX (temp, vectors.val[0]); \ -+ vst1##SUFFIX (&temp[ELTS], vectors.val[1]); \ -+ vst1##SUFFIX (&temp[ELTS * 2], vectors.val[2]); \ -+ vst1##SUFFIX (&temp[ELTS * 3], vectors.val[3]); \ -+ asm volatile ("" : : : "memory"); \ -+ for (j = 0; j < ELTS * 4; j++) \ -+ if (temp[j] != data[j]) \ -+ return 1; \ -+ return 0; \ -+} -+ -+#define VARIANTS_1(VARIANT) \ -+VARIANT (uint8, 8, _u8) \ -+VARIANT (uint16, 4, _u16) \ -+VARIANT (uint32, 2, _u32) \ -+VARIANT (uint64, 1, _u64) \ -+VARIANT (int8, 8, _s8) \ -+VARIANT (int16, 4, _s16) \ -+VARIANT (int32, 2, _s32) \ -+VARIANT (int64, 1, _s64) \ -+VARIANT (poly8, 8, _p8) \ -+VARIANT (poly16, 4, _p16) \ -+VARIANT (poly64, 1, _p64) \ -+VARIANT (float16, 4, _f16) \ -+VARIANT (float32, 2, _f32) \ -+VARIANT (uint8, 16, q_u8) \ -+VARIANT (uint16, 8, q_u16) \ -+VARIANT (uint32, 4, q_u32) \ -+VARIANT (uint64, 2, q_u64) \ -+VARIANT (int8, 16, q_s8) \ -+VARIANT (int16, 8, q_s16) \ -+VARIANT (int32, 4, q_s32) \ -+VARIANT (int64, 2, q_s64) \ -+VARIANT (poly8, 16, q_p8) \ -+VARIANT (poly16, 8, q_p16) \ -+VARIANT (poly64, 2, q_p64) \ -+VARIANT (float16, 8, q_f16) \ -+VARIANT (float32, 4, q_f32) -+ -+#ifdef __aarch64__ -+#define VARIANTS(VARIANT) VARIANTS_1(VARIANT) \ -+VARIANT (float64, 1, _f64) \ -+VARIANT (float64, 2, q_f64) -+#else -+#define VARIANTS(VARIANT) VARIANTS_1(VARIANT) -+#endif -+ -+/* Tests of vld1_x4 and vld1q_x4. */ -+VARIANTS (TESTMETH) -+ -+#define CHECKS(BASE, ELTS, SUFFIX) \ -+ if (test_vld1##SUFFIX##_x4 () != 0) \ -+ fprintf (stderr, "test_vld1##SUFFIX##_x4"); -+ -+int -+main (int argc, char **argv) -+{ -+ VARIANTS (CHECKS) -+ -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst1x4.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst1x4.c -new file mode 100644 -index 000000000..1f17b5342 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst1x4.c -@@ -0,0 +1,83 @@ -+/* We haven't implemented these intrinsics for arm yet. */ -+/* { dg-xfail-if "" { arm*-*-* } } */ -+/* { dg-do run } */ -+/* { dg-options "-O3" } */ -+ -+#include -+#include "arm-neon-ref.h" -+ -+extern void abort (void); -+ -+#define TESTMETH(BASE, ELTS, SUFFIX) \ -+int __attribute__ ((noinline)) \ -+test_vst1##SUFFIX##_x4 () \ -+{ \ -+ BASE##_t data[ELTS * 4]; \ -+ BASE##_t temp[ELTS * 4]; \ -+ BASE##x##ELTS##x##4##_t vectors; \ -+ int i,j; \ -+ for (i = 0; i < ELTS * 4; i++) \ -+ data [i] = (BASE##_t) 4*i; \ -+ asm volatile ("" : : : "memory"); \ -+ vectors.val[0] = vld1##SUFFIX (data); \ -+ vectors.val[1] = vld1##SUFFIX (&data[ELTS]); \ -+ vectors.val[2] = vld1##SUFFIX (&data[ELTS * 2]); \ -+ vectors.val[3] = vld1##SUFFIX (&data[ELTS * 3]); \ -+ vst1##SUFFIX##_x4 (temp, vectors); \ -+ asm volatile ("" : : : "memory"); \ -+ for (j = 0; j < ELTS * 4; j++) \ -+ if (temp[j] != data[j]) \ -+ return 1; \ -+ return 0; \ -+} -+ -+#define VARIANTS_1(VARIANT) \ -+VARIANT (uint8, 8, _u8) \ -+VARIANT (uint16, 4, _u16) \ -+VARIANT (uint32, 2, _u32) \ -+VARIANT (uint64, 1, _u64) \ -+VARIANT (int8, 8, _s8) \ -+VARIANT (int16, 4, _s16) \ -+VARIANT (int32, 2, _s32) \ -+VARIANT (int64, 1, _s64) \ -+VARIANT (poly8, 8, _p8) \ -+VARIANT (poly16, 4, _p16) \ -+VARIANT (poly64, 1, _p64) \ -+VARIANT (float16, 4, _f16) \ -+VARIANT (float32, 2, _f32) \ -+VARIANT (uint8, 16, q_u8) \ -+VARIANT (uint16, 8, q_u16) \ -+VARIANT (uint32, 4, q_u32) \ -+VARIANT (uint64, 2, q_u64) \ -+VARIANT (int8, 16, q_s8) \ -+VARIANT (int16, 8, q_s16) \ -+VARIANT (int32, 4, q_s32) \ -+VARIANT (int64, 2, q_s64) \ -+VARIANT (poly8, 16, q_p8) \ -+VARIANT (poly16, 8, q_p16) \ -+VARIANT (poly64, 2, q_p64) \ -+VARIANT (float16, 8, q_f16) \ -+VARIANT (float32, 4, q_f32) -+ -+#ifdef __aarch64__ -+#define VARIANTS(VARIANT) VARIANTS_1(VARIANT) \ -+VARIANT (float64, 1, _f64) \ -+VARIANT (float64, 2, q_f64) -+#else -+#define VARIANTS(VARIANT) VARIANTS_1(VARIANT) -+#endif -+ -+/* Tests of vst1_x4 and vst1q_x4. */ -+VARIANTS (TESTMETH) -+ -+#define CHECKS(BASE, ELTS, SUFFIX) \ -+ if (test_vst1##SUFFIX##_x4 () != 0) \ -+ fprintf (stderr, "test_vst1##SUFFIX##_x4"); -+ -+int -+main (int argc, char **argv) -+{ -+ VARIANTS (CHECKS) -+ -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/crypto-fuse-1.c b/gcc/testsuite/gcc.target/aarch64/aes-fuse-1.c -similarity index 51% -rename from gcc/testsuite/gcc.target/aarch64/crypto-fuse-1.c -rename to gcc/testsuite/gcc.target/aarch64/aes-fuse-1.c -index d8adc8946..d7b4f8991 100644 ---- a/gcc/testsuite/gcc.target/aarch64/crypto-fuse-1.c -+++ b/gcc/testsuite/gcc.target/aarch64/aes-fuse-1.c -@@ -1,45 +1,66 @@ - /* { dg-do compile } */ - /* { dg-options "-O3 -mcpu=cortex-a72+crypto -dp" } */ -+/* { dg-additional-options "-march=armv8-a+crypto" { target { aarch64*-*-* } } }*/ - - #include - - #define AESE(r, v, key) (r = vaeseq_u8 ((v), (key))); - #define AESMC(r, i) (r = vaesmcq_u8 (i)) - -+const uint8x16_t zero = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; -+ - uint8x16_t dummy; - uint8x16_t a; - uint8x16_t b; - uint8x16_t c; - uint8x16_t d; --uint8x16_t e; -+uint8x16_t x; -+uint8x16_t y; -+uint8x16_t k; -+ -+void foo (void) - --void --foo (void) - { -- AESE (a, a, e); -+ AESE (a, a, k); - dummy = vaddq_u8 (dummy, dummy); - dummy = vaddq_u8 (dummy, dummy); -- AESE (b, b, e); -+ AESE (b, b, k); - dummy = vaddq_u8 (dummy, dummy); - dummy = vaddq_u8 (dummy, dummy); -- AESE (c, c, e); -+ AESE (c, c, k); - dummy = vaddq_u8 (dummy, dummy); - dummy = vaddq_u8 (dummy, dummy); -- AESE (d, d, e); -+ AESE (d, d, k); - dummy = vaddq_u8 (dummy, dummy); - dummy = vaddq_u8 (dummy, dummy); - -- AESMC (a, a); -+ x = x ^ k; -+ AESE (x, x, zero); - dummy = vaddq_u8 (dummy, dummy); - dummy = vaddq_u8 (dummy, dummy); -- AESMC (b, b); -+ y = y ^ k; -+ AESE (y, y, zero); -+ dummy = vaddq_u8 (dummy, dummy); -+ dummy = vaddq_u8 (dummy, dummy); -+ -+ AESMC (d, d); - dummy = vaddq_u8 (dummy, dummy); - dummy = vaddq_u8 (dummy, dummy); - AESMC (c, c); - dummy = vaddq_u8 (dummy, dummy); - dummy = vaddq_u8 (dummy, dummy); -- AESMC (d, d); --} -+ AESMC (b, b); -+ dummy = vaddq_u8 (dummy, dummy); -+ dummy = vaddq_u8 (dummy, dummy); -+ AESMC (a, a); -+ dummy = vaddq_u8 (dummy, dummy); -+ dummy = vaddq_u8 (dummy, dummy); - --/* { dg-final { scan-assembler-times "crypto_aese_fused" 4 } } */ -+ AESMC (y, y); -+ dummy = vaddq_u8 (dummy, dummy); -+ dummy = vaddq_u8 (dummy, dummy); -+ AESMC (x, x); -+} - -+/* { dg-final { scan-assembler-times "crypto_aese_fused" 6 } } */ -+/* { dg-final { scan-assembler-not "veor" } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/aes-fuse-2.c b/gcc/testsuite/gcc.target/aarch64/aes-fuse-2.c -new file mode 100644 -index 000000000..dfe01b03a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/aes-fuse-2.c -@@ -0,0 +1,65 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O3 -mcpu=cortex-a72+crypto -dp" } */ -+/* { dg-additional-options "-march=armv8-a+crypto" { target { aarch64*-*-* } } }*/ -+ -+#include -+ -+#define AESD(r, v, key) (r = vaesdq_u8 ((v), (key))); -+#define AESIMC(r, i) (r = vaesimcq_u8 (i)) -+ -+const uint8x16_t zero = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; -+ -+uint8x16_t dummy; -+uint8x16_t a; -+uint8x16_t b; -+uint8x16_t c; -+uint8x16_t d; -+uint8x16_t x; -+uint8x16_t y; -+uint8x16_t k; -+ -+void foo (void) -+{ -+ AESD (a, a, k); -+ dummy = vaddq_u8 (dummy, dummy); -+ dummy = vaddq_u8 (dummy, dummy); -+ AESD (b, b, k); -+ dummy = vaddq_u8 (dummy, dummy); -+ dummy = vaddq_u8 (dummy, dummy); -+ AESD (c, c, k); -+ dummy = vaddq_u8 (dummy, dummy); -+ dummy = vaddq_u8 (dummy, dummy); -+ AESD (d, d, k); -+ dummy = vaddq_u8 (dummy, dummy); -+ dummy = vaddq_u8 (dummy, dummy); -+ -+ x = x ^ k; -+ AESD (x, x, zero); -+ dummy = vaddq_u8 (dummy, dummy); -+ dummy = vaddq_u8 (dummy, dummy); -+ y = y ^ k; -+ AESD (y, y, zero); -+ dummy = vaddq_u8 (dummy, dummy); -+ dummy = vaddq_u8 (dummy, dummy); -+ -+ AESIMC (d, d); -+ dummy = vaddq_u8 (dummy, dummy); -+ dummy = vaddq_u8 (dummy, dummy); -+ AESIMC (c, c); -+ dummy = vaddq_u8 (dummy, dummy); -+ dummy = vaddq_u8 (dummy, dummy); -+ AESIMC (b, b); -+ dummy = vaddq_u8 (dummy, dummy); -+ dummy = vaddq_u8 (dummy, dummy); -+ AESIMC (a, a); -+ dummy = vaddq_u8 (dummy, dummy); -+ dummy = vaddq_u8 (dummy, dummy); -+ -+ AESIMC (y, y); -+ dummy = vaddq_u8 (dummy, dummy); -+ dummy = vaddq_u8 (dummy, dummy); -+ AESIMC (x, x); -+} -+ -+/* { dg-final { scan-assembler-times "crypto_aesd_fused" 6 } } */ -+/* { dg-final { scan-assembler-not "veor" } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/asm-x-constraint-1.c b/gcc/testsuite/gcc.target/aarch64/asm-x-constraint-1.c -new file mode 100644 -index 000000000..a71043be5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/asm-x-constraint-1.c -@@ -0,0 +1,34 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O" } */ -+ -+void -+f (void) -+{ -+ register float s0 asm ("s0"); -+ register float s7 asm ("s7"); -+ register float s8 asm ("s8"); -+ register float s15 asm ("s15"); -+ register float s16 asm ("s16"); -+ register float s31 asm ("s31"); -+ asm volatile ("// s0 out: %s0" : "=w" (s0)); -+ asm volatile ("// s0 in: %s0" :: "x" (s0)); -+ asm volatile ("// s7 out: %s0" : "=w" (s7)); -+ asm volatile ("// s7 in: %s0" :: "x" (s7)); -+ asm volatile ("// s8 out: %s0" : "=w" (s8)); -+ asm volatile ("// s8 in: %s0" :: "x" (s8)); -+ asm volatile ("// s15 out: %s0" : "=w" (s15)); -+ asm volatile ("// s15 in: %s0" :: "x" (s15)); -+ asm volatile ("// s16 out: %s0" : "=w" (s16)); -+ asm volatile ("// s16 in: %s0" :: "x" (s16)); -+ asm volatile ("// s31 out: %s0" : "=w" (s31)); -+ asm volatile ("// s31 in: %s0" :: "x" (s31)); -+} -+ -+/* { dg-final { scan-assembler {\t// s0 out: s0\n.*[/]/ s0 in: s0\n} } } */ -+/* { dg-final { scan-assembler {\t// s7 out: s7\n.*[/]/ s7 in: s7\n} } } */ -+/* { dg-final { scan-assembler {\t// s8 out: s8\n.*[/]/ s8 in: s8\n} } } */ -+/* { dg-final { scan-assembler {\t// s15 out: s15\n.*[/]/ s15 in: s15\n} } } */ -+/* { dg-final { scan-assembler {\t// s16 out: s16\n.*\tfmov\t(s[0-7]), s16\n.*[/]/ s16 in: \1\n} } } */ -+/* { dg-final { scan-assembler {\t// s31 out: s31\n.*\tfmov\t(s[0-7]), s31\n.*[/]/ s31 in: \1\n} } } */ -+/* { dg-final { scan-assembler-not {\t// s16 in: s16\n} } } */ -+/* { dg-final { scan-assembler-not {\t// s31 in: s31\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/asm-y-constraint-1.c b/gcc/testsuite/gcc.target/aarch64/asm-y-constraint-1.c -new file mode 100644 -index 000000000..4a3fcac56 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/asm-y-constraint-1.c -@@ -0,0 +1,36 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O" } */ -+ -+void -+f (void) -+{ -+ register float s0 asm ("s0"); -+ register float s7 asm ("s7"); -+ register float s8 asm ("s8"); -+ register float s15 asm ("s15"); -+ register float s16 asm ("s16"); -+ register float s31 asm ("s31"); -+ asm volatile ("// s0 out: %s0" : "=w" (s0)); -+ asm volatile ("// s0 in: %s0" :: "y" (s0)); -+ asm volatile ("// s7 out: %s0" : "=w" (s7)); -+ asm volatile ("// s7 in: %s0" :: "y" (s7)); -+ asm volatile ("// s8 out: %s0" : "=w" (s8)); -+ asm volatile ("// s8 in: %s0" :: "y" (s8)); -+ asm volatile ("// s15 out: %s0" : "=w" (s15)); -+ asm volatile ("// s15 in: %s0" :: "y" (s15)); -+ asm volatile ("// s16 out: %s0" : "=w" (s16)); -+ asm volatile ("// s16 in: %s0" :: "y" (s16)); -+ asm volatile ("// s31 out: %s0" : "=w" (s31)); -+ asm volatile ("// s31 in: %s0" :: "y" (s31)); -+} -+ -+/* { dg-final { scan-assembler {\t// s0 out: s0\n.*[/]/ s0 in: s0\n} } } */ -+/* { dg-final { scan-assembler {\t// s7 out: s7\n.*[/]/ s7 in: s7\n} } } */ -+/* { dg-final { scan-assembler {\t// s8 out: s8\n.*\tfmov\t(s[0-7]), s8\n.*[/]/ s8 in: \1\n} } } */ -+/* { dg-final { scan-assembler {\t// s15 out: s15\n.*\tfmov\t(s[0-7]), s15\n.*[/]/ s15 in: \1\n} } } */ -+/* { dg-final { scan-assembler {\t// s16 out: s16\n.*\tfmov\t(s[0-7]), s16\n.*[/]/ s16 in: \1\n} } } */ -+/* { dg-final { scan-assembler {\t// s31 out: s31\n.*\tfmov\t(s[0-7]), s31\n.*[/]/ s31 in: \1\n} } } */ -+/* { dg-final { scan-assembler-not {\t// s8 in: s8\n} } } */ -+/* { dg-final { scan-assembler-not {\t// s15 in: s15\n} } } */ -+/* { dg-final { scan-assembler-not {\t// s16 in: s16\n} } } */ -+/* { dg-final { scan-assembler-not {\t// s31 in: s31\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.c b/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.c -index 49ca5d0d0..a828a72aa 100644 ---- a/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.c -+++ b/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-march=armv8-a+nolse -O2 -fno-ipa-icf" } */ -+/* { dg-options "-march=armv8-a+nolse -O2 -fno-ipa-icf -mno-outline-atomics" } */ - - #include "atomic-comp-swap-release-acquire.x" - -diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.c -index 74f26348e..6823ce381 100644 ---- a/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.c -+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-march=armv8-a+nolse -O2" } */ -+/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */ - - #include "atomic-op-acq_rel.x" - -diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.c -index 66c1b1efe..87937de37 100644 ---- a/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.c -+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-march=armv8-a+nolse -O2" } */ -+/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */ - - #include "atomic-op-acquire.x" - -diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-char.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-char.c -index c09d0434e..60955e57d 100644 ---- a/gcc/testsuite/gcc.target/aarch64/atomic-op-char.c -+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-char.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-march=armv8-a+nolse -O2" } */ -+/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */ - - #include "atomic-op-char.x" - -diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c -index 5783ab84f..16cb11aee 100644 ---- a/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c -+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-march=armv8-a+nolse -O2" } */ -+/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */ - - #include "atomic-op-consume.x" - -diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-imm.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-imm.c -index 18b8f0b04..bcab4e481 100644 ---- a/gcc/testsuite/gcc.target/aarch64/atomic-op-imm.c -+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-imm.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-march=armv8-a+nolse -O2" } */ -+/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */ - - int v = 0; - -diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-int.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-int.c -index 8520f0839..040e4a8d1 100644 ---- a/gcc/testsuite/gcc.target/aarch64/atomic-op-int.c -+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-int.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-march=armv8-a+nolse -O2" } */ -+/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */ - - #include "atomic-op-int.x" - -diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-long.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-long.c -index d011f8c5c..fc88b92cd 100644 ---- a/gcc/testsuite/gcc.target/aarch64/atomic-op-long.c -+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-long.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-march=armv8-a+nolse -O2" } */ -+/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */ - - long v = 0; - -diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.c -index ed96bfdb9..503d62b02 100644 ---- a/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.c -+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-march=armv8-a+nolse -O2" } */ -+/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */ - - #include "atomic-op-relaxed.x" - -diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-release.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-release.c -index fc4be17de..efe14aea7 100644 ---- a/gcc/testsuite/gcc.target/aarch64/atomic-op-release.c -+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-release.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-march=armv8-a+nolse -O2" } */ -+/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */ - - #include "atomic-op-release.x" - -diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.c -index 613000fe4..09973bf82 100644 ---- a/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.c -+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-march=armv8-a+nolse -O2" } */ -+/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */ - - #include "atomic-op-seq_cst.x" - -diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-op-short.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-short.c -index e82c8118e..e1dcebb0f 100644 ---- a/gcc/testsuite/gcc.target/aarch64/atomic-op-short.c -+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-short.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-march=armv8-a+nolse -O2" } */ -+/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */ - - #include "atomic-op-short.x" - -diff --git a/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c b/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c -index f2a21ddf2..29246979b 100644 ---- a/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-O2 -march=armv8-a+nolse" } */ -+/* { dg-options "-O2 -march=armv8-a+nolse -mno-outline-atomics" } */ - /* { dg-skip-if "" { *-*-* } { "-mcpu=*" } { "" } } */ - - int -diff --git a/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c b/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c -index 8d2ae67df..6daf9b08f 100644 ---- a/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-O2 -march=armv8-a+nolse" } */ -+/* { dg-options "-O2 -march=armv8-a+nolse -mno-outline-atomics" } */ - /* { dg-skip-if "" { *-*-* } { "-mcpu=*" } { "" } } */ - - int -diff --git a/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_1.c b/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_1.c -new file mode 100644 -index 000000000..ef4376649 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_1.c -@@ -0,0 +1,102 @@ -+/* { dg-do assemble { target { aarch64*-*-* } } } */ -+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ -+/* { dg-add-options arm_v8_2a_bf16_neon } */ -+/* { dg-additional-options "-O3 --save-temps -std=gnu90" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#include -+ -+/* -+**stacktest1: -+** sub sp, sp, #16 -+** str h0, \[sp, 14\] -+** ldr h0, \[sp, 14\] -+** add sp, sp, 16 -+** ret -+*/ -+bfloat16_t stacktest1 (bfloat16_t __a) -+{ -+ volatile bfloat16_t b = __a; -+ return b; -+} -+ -+/* -+**bfloat_mov_ww: -+** mov v1.h\[0\], v2.h\[0\] -+** ret -+*/ -+void bfloat_mov_ww (void) -+{ -+ register bfloat16_t x asm ("h2"); -+ register bfloat16_t y asm ("h1"); -+ asm volatile ("" : "=w" (x)); -+ y = x; -+ asm volatile ("" :: "w" (y)); -+} -+ -+/* -+**bfloat_mov_rw: -+** dup v1.4h, w1 -+** ret -+*/ -+void bfloat_mov_rw (void) -+{ -+ register bfloat16_t x asm ("w1"); -+ register bfloat16_t y asm ("h1"); -+ asm volatile ("" : "=r" (x)); -+ y = x; -+ asm volatile ("" :: "w" (y)); -+} -+ -+/* -+**bfloat_mov_wr: -+** umov w1, v1.h\[0\] -+** ret -+*/ -+void bfloat_mov_wr (void) -+{ -+ register bfloat16_t x asm ("h1"); -+ register bfloat16_t y asm ("w1"); -+ asm volatile ("" : "=w" (x)); -+ y = x; -+ asm volatile ("" :: "r" (y)); -+} -+ -+/* -+**bfloat_mov_rr: -+** mov w1, w2 -+** ret -+*/ -+void bfloat_mov_rr (void) -+{ -+ register bfloat16_t x asm ("w2"); -+ register bfloat16_t y asm ("w1"); -+ asm volatile ("" : "=r" (x)); -+ y = x; -+ asm volatile ("" :: "r" (y)); -+} -+ -+/* -+**bfloat_mov_rm: -+** strh w2, \[x0\] -+** ret -+*/ -+void bfloat_mov_rm (bfloat16_t *ptr) -+{ -+ register bfloat16_t x asm ("w2"); -+ asm volatile ("" : "=r" (x)); -+ *ptr = x; -+} -+ -+/* -+**bfloat_mov_mr: -+** ldrh w2, \[x0\] -+** ret -+*/ -+void bfloat_mov_mr (bfloat16_t *ptr) -+{ -+ register bfloat16_t y asm ("w2"); -+ y = *ptr; -+ asm volatile ("" :: "r" (y)); -+} -+ -diff --git a/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_2.c b/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_2.c -new file mode 100644 -index 000000000..df8e7518c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_2.c -@@ -0,0 +1,106 @@ -+/* { dg-do assemble { target { aarch64*-*-* } } } */ -+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ -+/* { dg-additional-options "-march=armv8.2-a -O3 --save-temps -std=gnu90" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#include -+ -+#pragma GCC push_options -+#pragma GCC target ("+bf16") -+ -+/* -+**stacktest1: -+** sub sp, sp, #16 -+** str h0, \[sp, 14\] -+** ldr h0, \[sp, 14\] -+** add sp, sp, 16 -+** ret -+*/ -+bfloat16_t stacktest1 (bfloat16_t __a) -+{ -+ volatile bfloat16_t b = __a; -+ return b; -+} -+ -+/* -+**bfloat_mov_ww: -+** mov v1.h\[0\], v2.h\[0\] -+** ret -+*/ -+void bfloat_mov_ww (void) -+{ -+ register bfloat16_t x asm ("h2"); -+ register bfloat16_t y asm ("h1"); -+ asm volatile ("" : "=w" (x)); -+ y = x; -+ asm volatile ("" :: "w" (y)); -+} -+ -+/* -+**bfloat_mov_rw: -+** dup v1.4h, w1 -+** ret -+*/ -+void bfloat_mov_rw (void) -+{ -+ register bfloat16_t x asm ("w1"); -+ register bfloat16_t y asm ("h1"); -+ asm volatile ("" : "=r" (x)); -+ y = x; -+ asm volatile ("" :: "w" (y)); -+} -+ -+/* -+**bfloat_mov_wr: -+** umov w1, v1.h\[0\] -+** ret -+*/ -+void bfloat_mov_wr (void) -+{ -+ register bfloat16_t x asm ("h1"); -+ register bfloat16_t y asm ("w1"); -+ asm volatile ("" : "=w" (x)); -+ y = x; -+ asm volatile ("" :: "r" (y)); -+} -+ -+/* -+**bfloat_mov_rr: -+** mov w1, w2 -+** ret -+*/ -+void bfloat_mov_rr (void) -+{ -+ register bfloat16_t x asm ("w2"); -+ register bfloat16_t y asm ("w1"); -+ asm volatile ("" : "=r" (x)); -+ y = x; -+ asm volatile ("" :: "r" (y)); -+} -+ -+/* -+**bfloat_mov_rm: -+** strh w2, \[x0\] -+** ret -+*/ -+void bfloat_mov_rm (bfloat16_t *ptr) -+{ -+ register bfloat16_t x asm ("w2"); -+ asm volatile ("" : "=r" (x)); -+ *ptr = x; -+} -+ -+/* -+**bfloat_mov_mr: -+** ldrh w2, \[x0\] -+** ret -+*/ -+void bfloat_mov_mr (bfloat16_t *ptr) -+{ -+ register bfloat16_t y asm ("w2"); -+ y = *ptr; -+ asm volatile ("" :: "r" (y)); -+} -+ -+#pragma GCC pop_options -+ -diff --git a/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_3.c b/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_3.c -new file mode 100644 -index 000000000..5d7a4317c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_3.c -@@ -0,0 +1,101 @@ -+/* { dg-do assemble { target { aarch64*-*-* } } } */ -+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ -+/* { dg-additional-options "-march=armv8.2-a -O3 --save-temps -std=gnu90" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#include -+ -+/* -+**stacktest1: -+** sub sp, sp, #16 -+** str h0, \[sp, 14\] -+** ldr h0, \[sp, 14\] -+** add sp, sp, 16 -+** ret -+*/ -+bfloat16_t stacktest1 (bfloat16_t __a) -+{ -+ volatile bfloat16_t b = __a; -+ return b; -+} -+ -+/* -+**bfloat_mov_ww: -+** mov v1.h\[0\], v2.h\[0\] -+** ret -+*/ -+void bfloat_mov_ww (void) -+{ -+ register bfloat16_t x asm ("h2"); -+ register bfloat16_t y asm ("h1"); -+ asm volatile ("" : "=w" (x)); -+ y = x; -+ asm volatile ("" :: "w" (y)); -+} -+ -+/* -+**bfloat_mov_rw: -+** dup v1.4h, w1 -+** ret -+*/ -+void bfloat_mov_rw (void) -+{ -+ register bfloat16_t x asm ("w1"); -+ register bfloat16_t y asm ("h1"); -+ asm volatile ("" : "=r" (x)); -+ y = x; -+ asm volatile ("" :: "w" (y)); -+} -+ -+/* -+**bfloat_mov_wr: -+** umov w1, v1.h\[0\] -+** ret -+*/ -+void bfloat_mov_wr (void) -+{ -+ register bfloat16_t x asm ("h1"); -+ register bfloat16_t y asm ("w1"); -+ asm volatile ("" : "=w" (x)); -+ y = x; -+ asm volatile ("" :: "r" (y)); -+} -+ -+/* -+**bfloat_mov_rr: -+** mov w1, w2 -+** ret -+*/ -+void bfloat_mov_rr (void) -+{ -+ register bfloat16_t x asm ("w2"); -+ register bfloat16_t y asm ("w1"); -+ asm volatile ("" : "=r" (x)); -+ y = x; -+ asm volatile ("" :: "r" (y)); -+} -+ -+/* -+**bfloat_mov_rm: -+** strh w2, \[x0\] -+** ret -+*/ -+void bfloat_mov_rm (bfloat16_t *ptr) -+{ -+ register bfloat16_t x asm ("w2"); -+ asm volatile ("" : "=r" (x)); -+ *ptr = x; -+} -+ -+/* -+**bfloat_mov_mr: -+** ldrh w2, \[x0\] -+** ret -+*/ -+void bfloat_mov_mr (bfloat16_t *ptr) -+{ -+ register bfloat16_t y asm ("w2"); -+ y = *ptr; -+ asm volatile ("" :: "r" (y)); -+} -+ -diff --git a/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_4.c b/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_4.c -new file mode 100644 -index 000000000..b812011c2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_4.c -@@ -0,0 +1,16 @@ -+/* { dg-do assemble { target { aarch64*-*-* } } } */ -+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ -+/* { dg-add-options arm_v8_2a_bf16_neon } */ -+/* { dg-additional-options "-std=c99 -pedantic-errors -O3 --save-temps" } */ -+ -+#include -+ -+_Complex bfloat16_t stacktest1 (_Complex bfloat16_t __a) -+{ -+ volatile _Complex bfloat16_t b = __a; -+ return b; -+} -+ -+/* { dg-error {ISO C does not support plain 'complex' meaning 'double complex'} "" { target *-*-* } 8 } */ -+/* { dg-error {expected '=', ',', ';', 'asm' or '__attribute__' before 'stacktest1'} "" { target *-*-* } 8 } */ -+ -diff --git a/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_typecheck.c b/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_typecheck.c -new file mode 100644 -index 000000000..7c9188cf2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_typecheck.c -@@ -0,0 +1,219 @@ -+/* { dg-do assemble { target { aarch64*-*-* } } } */ -+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ -+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ -+/* { dg-add-options arm_v8_2a_bf16_neon } */ -+/* { dg-additional-options "-Wno-pedantic -O3 --save-temps" } */ -+ -+#include -+ -+bfloat16_t glob_bfloat; -+ -+int is_an_int; -+short is_a_short_int; -+float is_a_float; -+float is_a_float16; -+double is_a_double; -+ -+float *float_ptr; -+ -+bfloat16_t foo1 (void) { return (bfloat16_t) 0x1234; } /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+bfloat16_t foo2 (void) { return (bfloat16_t) (short) 0x1234; } /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ -+bfloat16_t footest (bfloat16_t scalar0) -+{ -+ -+ /* Initialisation */ -+ -+ bfloat16_t scalar1_1; -+ bfloat16_t scalar1_2 = glob_bfloat; -+ bfloat16_t scalar1_3 = 0; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ bfloat16_t scalar1_4 = 0.1; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ bfloat16_t scalar1_5 = is_a_float; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ bfloat16_t scalar1_6 = is_an_int; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ bfloat16_t scalar1_7 = is_a_float16; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ bfloat16_t scalar1_8 = is_a_double; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ bfloat16_t scalar1_9 = is_a_short_int; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ -+ int initi_1_1 = glob_bfloat; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ float initi_1_2 = glob_bfloat; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ float16_t initi_1_3 = glob_bfloat; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ short initi_1_4 = glob_bfloat; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ double initi_1_5 = glob_bfloat; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ -+ bfloat16_t scalar2_1 = {}; /* { dg-error {empty scalar initializer} } */ -+ bfloat16_t scalar2_2 = { glob_bfloat }; -+ bfloat16_t scalar2_3 = { 0 }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ bfloat16_t scalar2_4 = { 0.1 }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ bfloat16_t scalar2_5 = { is_a_float }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ bfloat16_t scalar2_6 = { is_an_int }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ bfloat16_t scalar2_7 = { is_a_float16 }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ bfloat16_t scalar2_8 = { is_a_double }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ bfloat16_t scalar2_9 = { is_a_short_int }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ -+ int initi_2_1 = { glob_bfloat }; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ float initi_2_2 = { glob_bfloat }; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ float16_t initi_2_3 = { glob_bfloat }; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ short initi_2_4 = { glob_bfloat }; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ double initi_2_5 = { glob_bfloat }; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ -+ /* Assignments. */ -+ -+ glob_bfloat = glob_bfloat; -+ glob_bfloat = 0; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ glob_bfloat = 0.1; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ glob_bfloat = is_a_float; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ glob_bfloat = is_an_int; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ glob_bfloat = is_a_float16; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ glob_bfloat = is_a_double; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ glob_bfloat = is_a_short_int; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ -+ is_an_int = glob_bfloat; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ is_a_float = glob_bfloat; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ is_a_float16 = glob_bfloat; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ is_a_double = glob_bfloat; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ is_a_short_int = glob_bfloat; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ -+ /* Casting. */ -+ -+ (void) glob_bfloat; -+ (bfloat16_t) glob_bfloat; -+ -+ (int) glob_bfloat; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ (float) glob_bfloat; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ (float16_t) glob_bfloat; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ (double) glob_bfloat; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ (short) glob_bfloat; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ -+ (bfloat16_t) is_an_int; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ (bfloat16_t) is_a_float; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ (bfloat16_t) is_a_float16; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ (bfloat16_t) is_a_double; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ (bfloat16_t) is_a_short_int; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ -+ /* Compound literals. */ -+ -+ (bfloat16_t) {}; /* { dg-error {empty scalar initializer} } */ -+ (bfloat16_t) { glob_bfloat }; -+ (bfloat16_t) { 0 }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ (bfloat16_t) { 0.1 }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ (bfloat16_t) { is_a_float }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ (bfloat16_t) { is_an_int }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ (bfloat16_t) { is_a_float16 }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ (bfloat16_t) { is_a_double }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ (bfloat16_t) { is_a_short_int }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ -+ (int) { glob_bfloat }; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ (float) { glob_bfloat }; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ (float16_t) { glob_bfloat }; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ (double) { glob_bfloat }; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ (short) { glob_bfloat }; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ -+ /* Arrays and Structs. */ -+ -+ typedef bfloat16_t array_type[2]; -+ extern bfloat16_t extern_array[]; -+ -+ bfloat16_t array[2]; -+ bfloat16_t zero_length_array[0]; -+ bfloat16_t empty_init_array[] = {}; -+ typedef bfloat16_t some_other_type[is_an_int]; -+ -+ struct struct1 { -+ bfloat16_t a; -+ }; -+ -+ union union1 { -+ bfloat16_t a; -+ }; -+ -+ /* Addressing and dereferencing. */ -+ -+ bfloat16_t *bfloat_ptr = &scalar0; -+ scalar0 = *bfloat_ptr; -+ -+ /* Pointer assignment. */ -+ -+ bfloat16_t *bfloat_ptr2 = bfloat_ptr; -+ bfloat16_t *bfloat_ptr3 = array; -+ -+ /* Pointer arithmetic. */ -+ -+ ++bfloat_ptr; -+ --bfloat_ptr; -+ bfloat_ptr++; -+ bfloat_ptr--; -+ bfloat_ptr += 1; -+ bfloat_ptr -= 1; -+ bfloat_ptr - bfloat_ptr2; -+ bfloat_ptr = &bfloat_ptr3[0]; -+ bfloat_ptr = &bfloat_ptr3[1]; -+ -+ /* Simple comparison. */ -+ scalar0 > glob_bfloat; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ glob_bfloat == scalar0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ scalar0 > is_a_float; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ is_a_float == scalar0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ scalar0 > 0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ 0 == scalar0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ scalar0 > 0.1; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ 0.1 == scalar0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ scalar0 > is_an_int; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ is_an_int == scalar0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ -+ /* Pointer comparison. */ -+ -+ bfloat_ptr == &scalar0; -+ bfloat_ptr != &scalar0; -+ bfloat_ptr < &scalar0; -+ bfloat_ptr <= &scalar0; -+ bfloat_ptr > &scalar0; -+ bfloat_ptr >= &scalar0; -+ bfloat_ptr == bfloat_ptr2; -+ bfloat_ptr != bfloat_ptr2; -+ bfloat_ptr < bfloat_ptr2; -+ bfloat_ptr <= bfloat_ptr2; -+ bfloat_ptr > bfloat_ptr2; -+ bfloat_ptr >= bfloat_ptr2; -+ -+ /* Conditional expressions. */ -+ -+ 0 ? scalar0 : scalar0; -+ 0 ? scalar0 : is_a_float; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ 0 ? is_a_float : scalar0; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ 0 ? scalar0 : 0; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ 0 ? 0 : scalar0; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ 0 ? 0.1 : scalar0; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ 0 ? scalar0 : 0.1; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ 0 ? bfloat_ptr : bfloat_ptr2; -+ 0 ? bfloat_ptr : float_ptr; /* { dg-error {pointer type mismatch in conditional expression} } */ -+ 0 ? float_ptr : bfloat_ptr; /* { dg-error {pointer type mismatch in conditional expression} } */ -+ -+ scalar0 ? scalar0 : scalar0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ scalar0 ? is_a_float : scalar0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ scalar0 ? scalar0 : is_a_float; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ scalar0 ? is_a_float : is_a_float; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ -+ /* Unary operators. */ -+ -+ +scalar0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ -scalar0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ ~scalar0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ !scalar0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ *scalar0; /* { dg-error {invalid type argument of unary '\*'} } */ -+ __real scalar0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ __imag scalar0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ ++scalar0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ --scalar0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ scalar0++; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ scalar0--; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ -+ /* Binary arithmetic operations. */ -+ -+ scalar0 = glob_bfloat + *bfloat_ptr; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ scalar0 = glob_bfloat + 0.1; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ scalar0 = glob_bfloat + 0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ scalar0 = glob_bfloat + is_a_float; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ -+ return scalar0; -+} -+ -diff --git a/gcc/testsuite/gcc.target/aarch64/bfloat16_simd_1.c b/gcc/testsuite/gcc.target/aarch64/bfloat16_simd_1.c -new file mode 100644 -index 000000000..6cad557eb ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/bfloat16_simd_1.c -@@ -0,0 +1,93 @@ -+/* { dg-do assemble { target { aarch64*-*-* } } } */ -+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ -+/* { dg-add-options arm_v8_2a_bf16_neon } */ -+/* { dg-additional-options "-O3 --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#include -+ -+/* -+**stacktest1: -+** sub sp, sp, #16 -+** str h0, \[sp, 14\] -+** ldr h0, \[sp, 14\] -+** add sp, sp, 16 -+** ret -+*/ -+bfloat16_t stacktest1 (bfloat16_t __a) -+{ -+ volatile bfloat16_t b = __a; -+ return b; -+} -+ -+/* -+**stacktest2: -+** sub sp, sp, #16 -+** str d0, \[sp, 8\] -+** ldr d0, \[sp, 8\] -+** add sp, sp, 16 -+** ret -+*/ -+bfloat16x4_t stacktest2 (bfloat16x4_t __a) -+{ -+ volatile bfloat16x4_t b = __a; -+ return b; -+} -+ -+/* -+**stacktest3: -+** sub sp, sp, #16 -+** str q0, \[sp\] -+** ldr q0, \[sp\] -+** add sp, sp, 16 -+** ret -+*/ -+bfloat16x8_t stacktest3 (bfloat16x8_t __a) -+{ -+ volatile bfloat16x8_t b = __a; -+ return b; -+} -+ -+/* Test compilation of __attribute__ vectors of 8, 16, 32, etc. BFloats. */ -+typedef bfloat16_t v8bf __attribute__((vector_size(16))); -+typedef bfloat16_t v16bf __attribute__((vector_size(32))); -+typedef bfloat16_t v32bf __attribute__((vector_size(64))); -+typedef bfloat16_t v64bf __attribute__((vector_size(128))); -+typedef bfloat16_t v128bf __attribute__((vector_size(256))); -+ -+v8bf stacktest4 (v8bf __a) -+{ -+ volatile v8bf b = __a; -+ return b; -+} -+ -+v16bf stacktest5 (v16bf __a) -+{ -+ volatile v16bf b = __a; -+ return b; -+} -+ -+v32bf stacktest6 (v32bf __a) -+{ -+ volatile v32bf b = __a; -+ return b; -+} -+ -+v64bf stacktest7 (v64bf __a) -+{ -+ volatile v64bf b = __a; -+ return b; -+} -+ -+v128bf stacktest8 (v128bf __a) -+{ -+ volatile v128bf b = __a; -+ return b; -+} -+ -+/* Test use of constant values to assign values to vectors. */ -+ -+typedef bfloat16_t v2bf __attribute__((vector_size(4))); -+v2bf c2 (void) { return (v2bf) 0x12345678; } -+ -+bfloat16x4_t c3 (void) { return (bfloat16x4_t) 0x1234567812345678; } -diff --git a/gcc/testsuite/gcc.target/aarch64/bfloat16_simd_2.c b/gcc/testsuite/gcc.target/aarch64/bfloat16_simd_2.c -new file mode 100644 -index 000000000..3891dcfc9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/bfloat16_simd_2.c -@@ -0,0 +1,97 @@ -+/* { dg-do assemble { target { aarch64*-*-* } } } */ -+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ -+/* { dg-additional-options "-march=armv8.2-a -O3 --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#include -+ -+#pragma GCC push_options -+#pragma GCC target ("+bf16") -+ -+/* -+**stacktest1: -+** sub sp, sp, #16 -+** str h0, \[sp, 14\] -+** ldr h0, \[sp, 14\] -+** add sp, sp, 16 -+** ret -+*/ -+bfloat16_t stacktest1 (bfloat16_t __a) -+{ -+ volatile bfloat16_t b = __a; -+ return b; -+} -+ -+/* -+**stacktest2: -+** sub sp, sp, #16 -+** str d0, \[sp, 8\] -+** ldr d0, \[sp, 8\] -+** add sp, sp, 16 -+** ret -+*/ -+bfloat16x4_t stacktest2 (bfloat16x4_t __a) -+{ -+ volatile bfloat16x4_t b = __a; -+ return b; -+} -+ -+/* -+**stacktest3: -+** sub sp, sp, #16 -+** str q0, \[sp\] -+** ldr q0, \[sp\] -+** add sp, sp, 16 -+** ret -+*/ -+bfloat16x8_t stacktest3 (bfloat16x8_t __a) -+{ -+ volatile bfloat16x8_t b = __a; -+ return b; -+} -+ -+/* Test compilation of __attribute__ vectors of 8, 16, 32, etc. BFloats. */ -+typedef bfloat16_t v8bf __attribute__((vector_size(16))); -+typedef bfloat16_t v16bf __attribute__((vector_size(32))); -+typedef bfloat16_t v32bf __attribute__((vector_size(64))); -+typedef bfloat16_t v64bf __attribute__((vector_size(128))); -+typedef bfloat16_t v128bf __attribute__((vector_size(256))); -+ -+v8bf stacktest4 (v8bf __a) -+{ -+ volatile v8bf b = __a; -+ return b; -+} -+ -+v16bf stacktest5 (v16bf __a) -+{ -+ volatile v16bf b = __a; -+ return b; -+} -+ -+v32bf stacktest6 (v32bf __a) -+{ -+ volatile v32bf b = __a; -+ return b; -+} -+ -+v64bf stacktest7 (v64bf __a) -+{ -+ volatile v64bf b = __a; -+ return b; -+} -+ -+v128bf stacktest8 (v128bf __a) -+{ -+ volatile v128bf b = __a; -+ return b; -+} -+ -+/* Test use of constant values to assign values to vectors. */ -+ -+typedef bfloat16_t v2bf __attribute__((vector_size(4))); -+v2bf c2 (void) { return (v2bf) 0x12345678; } -+ -+bfloat16x4_t c3 (void) { return (bfloat16x4_t) 0x1234567812345678; } -+ -+#pragma GCC pop_options -diff --git a/gcc/testsuite/gcc.target/aarch64/bfloat16_simd_3.c b/gcc/testsuite/gcc.target/aarch64/bfloat16_simd_3.c -new file mode 100644 -index 000000000..b35f5e527 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/bfloat16_simd_3.c -@@ -0,0 +1,92 @@ -+/* { dg-do assemble { target { aarch64*-*-* } } } */ -+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ -+/* { dg-additional-options "-march=armv8.2-a -O3 --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#include -+ -+/* -+**stacktest1: -+** sub sp, sp, #16 -+** str h0, \[sp, 14\] -+** ldr h0, \[sp, 14\] -+** add sp, sp, 16 -+** ret -+*/ -+bfloat16_t stacktest1 (bfloat16_t __a) -+{ -+ volatile bfloat16_t b = __a; -+ return b; -+} -+ -+/* -+**stacktest2: -+** sub sp, sp, #16 -+** str d0, \[sp, 8\] -+** ldr d0, \[sp, 8\] -+** add sp, sp, 16 -+** ret -+*/ -+bfloat16x4_t stacktest2 (bfloat16x4_t __a) -+{ -+ volatile bfloat16x4_t b = __a; -+ return b; -+} -+ -+/* -+**stacktest3: -+** sub sp, sp, #16 -+** str q0, \[sp\] -+** ldr q0, \[sp\] -+** add sp, sp, 16 -+** ret -+*/ -+bfloat16x8_t stacktest3 (bfloat16x8_t __a) -+{ -+ volatile bfloat16x8_t b = __a; -+ return b; -+} -+ -+/* Test compilation of __attribute__ vectors of 8, 16, 32, etc. BFloats. */ -+typedef bfloat16_t v8bf __attribute__((vector_size(16))); -+typedef bfloat16_t v16bf __attribute__((vector_size(32))); -+typedef bfloat16_t v32bf __attribute__((vector_size(64))); -+typedef bfloat16_t v64bf __attribute__((vector_size(128))); -+typedef bfloat16_t v128bf __attribute__((vector_size(256))); -+ -+v8bf stacktest4 (v8bf __a) -+{ -+ volatile v8bf b = __a; -+ return b; -+} -+ -+v16bf stacktest5 (v16bf __a) -+{ -+ volatile v16bf b = __a; -+ return b; -+} -+ -+v32bf stacktest6 (v32bf __a) -+{ -+ volatile v32bf b = __a; -+ return b; -+} -+ -+v64bf stacktest7 (v64bf __a) -+{ -+ volatile v64bf b = __a; -+ return b; -+} -+ -+v128bf stacktest8 (v128bf __a) -+{ -+ volatile v128bf b = __a; -+ return b; -+} -+ -+/* Test use of constant values to assign values to vectors. */ -+ -+typedef bfloat16_t v2bf __attribute__((vector_size(4))); -+v2bf c2 (void) { return (v2bf) 0x12345678; } -+ -+bfloat16x4_t c3 (void) { return (bfloat16x4_t) 0x1234567812345678; } -diff --git a/gcc/testsuite/gcc.target/aarch64/bfloat16_vector_typecheck_1.c b/gcc/testsuite/gcc.target/aarch64/bfloat16_vector_typecheck_1.c -new file mode 100644 -index 000000000..4af3d295f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/bfloat16_vector_typecheck_1.c -@@ -0,0 +1,262 @@ -+/* { dg-do assemble { target { aarch64*-*-* } } } */ -+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ -+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ -+/* { dg-add-options arm_v8_2a_bf16_neon } */ -+/* { dg-additional-options "-O3 --save-temps -Wno-pedantic" } */ -+#include -+ -+bfloat16_t glob_bfloat; -+bfloat16x4_t glob_bfloat_vec; -+ -+float32x4_t is_a_float_vec; -+float32x2_t is_a_float_pair; -+ -+float16x4_t *float_ptr; -+float16x4_t is_a_float16_vec; -+ -+int32x4_t is_an_int_vec; -+int32x2_t is_an_int_pair; -+int16x4_t is_a_short_vec; -+ -+int is_an_int; -+short is_a_short_int; -+float is_a_float; -+float is_a_float16; -+double is_a_double; -+ -+/* Create a vector of 2 bfloat16_t. */ -+typedef bfloat16_t v2bf __attribute__((vector_size(4))); -+v2bf foo1 (void) { return (v2bf) 0x12345678; } -+bfloat16x4_t foo2 (void) { return (bfloat16x4_t) 0x1234567812345678; } -+ -+bfloat16x4_t footest (bfloat16x4_t vector0) -+{ -+ /* Initialisation */ -+ -+ bfloat16x4_t vector1_1; -+ bfloat16x4_t vector1_2 = glob_bfloat_vec; -+ bfloat16x4_t vector1_3 = is_a_float_vec; /* { dg-error {incompatible types when initializing type 'bfloat16x4_t' using type 'float32x4_t'} } */ -+ bfloat16x4_t vector1_4 = is_an_int_vec; /* { dg-error {incompatible types when initializing type 'bfloat16x4_t' using type 'int32x4_t'} } */ -+ bfloat16x4_t vector1_5 = is_a_float16_vec; /* { dg-error {incompatible types when initializing type 'bfloat16x4_t' using type 'float16x4_t'} } */ -+ bfloat16x4_t vector1_6 = is_a_float_pair; /* { dg-error {incompatible types when initializing type 'bfloat16x4_t' using type 'float32x2_t'} } */ -+ bfloat16x4_t vector1_7 = is_an_int_pair; /* { dg-error {incompatible types when initializing type 'bfloat16x4_t' using type 'int32x2_t'} } */ -+ bfloat16x4_t vector1_8 = is_a_short_vec; /* { dg-error {incompatible types when initializing type 'bfloat16x4_t' using type 'int16x4_t'} } */ -+ -+ int32x4_t initi_1_1 = glob_bfloat_vec; /* { dg-error {incompatible types when initializing type 'int32x4_t' using type 'bfloat16x4_t'} } */ -+ float32x4_t initi_1_2 = glob_bfloat_vec; /* { dg-error {incompatible types when initializing type 'float32x4_t' using type 'bfloat16x4_t'} } */ -+ float16x4_t initi_1_3 = glob_bfloat_vec; /* { dg-error {incompatible types when initializing type 'float16x4_t' using type 'bfloat16x4_t'} } */ -+ float32x2_t initi_1_4 = glob_bfloat_vec; /* { dg-error {incompatible types when initializing type 'float32x2_t' using type 'bfloat16x4_t'} } */ -+ int32x2_t initi_1_5 = glob_bfloat_vec; /* { dg-error {incompatible types when initializing type 'int32x2_t' using type 'bfloat16x4_t'} } */ -+ int16x4_t initi_1_6 = glob_bfloat_vec; /* { dg-error {incompatible types when initializing type 'int16x4_t' using type 'bfloat16x4_t'} } */ -+ -+ bfloat16x4_t vector2_1 = {}; -+ bfloat16x4_t vector2_2 = { glob_bfloat }; -+ bfloat16x4_t vector2_3 = { glob_bfloat, glob_bfloat, glob_bfloat, glob_bfloat }; -+ bfloat16x4_t vector2_4 = { 0 }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ bfloat16x4_t vector2_5 = { 0.1 }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ bfloat16x4_t vector2_6 = { is_a_float16 }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ bfloat16x4_t vector2_7 = { is_a_float }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ bfloat16x4_t vector2_8 = { is_an_int }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ bfloat16x4_t vector2_9 = { is_a_short_int }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ bfloat16x4_t vector2_10 = { 0.0, 0, is_a_short_int, is_a_float }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ -+ int32x4_t initi_2_1 = { glob_bfloat }; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ float32x4_t initi_2_2 = { glob_bfloat }; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ float16x4_t initi_2_3 = { glob_bfloat }; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ float32x2_t initi_2_4 = { glob_bfloat }; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ int32x2_t initi_2_5 = { glob_bfloat }; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ int16x4_t initi_2_6 = { glob_bfloat }; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ -+ /* Assignments to/from vectors. */ -+ -+ glob_bfloat_vec = glob_bfloat_vec; -+ glob_bfloat_vec = 0; /* { dg-error {incompatible types when assigning to type 'bfloat16x4_t' from type 'int'} } */ -+ glob_bfloat_vec = 0.1; /* { dg-error {incompatible types when assigning to type 'bfloat16x4_t' from type 'double'} } */ -+ glob_bfloat_vec = is_a_float_vec; /* { dg-error {incompatible types when assigning to type 'bfloat16x4_t' from type 'float32x4_t'} } */ -+ glob_bfloat_vec = is_an_int_vec; /* { dg-error {incompatible types when assigning to type 'bfloat16x4_t' from type 'int32x4_t'} } */ -+ glob_bfloat_vec = is_a_float16_vec; /* { dg-error {incompatible types when assigning to type 'bfloat16x4_t' from type 'float16x4_t'} } */ -+ glob_bfloat_vec = is_a_float_pair; /* { dg-error {incompatible types when assigning to type 'bfloat16x4_t' from type 'float32x2_t'} } */ -+ glob_bfloat_vec = is_an_int_pair; /* { dg-error {incompatible types when assigning to type 'bfloat16x4_t' from type 'int32x2_t'} } */ -+ glob_bfloat_vec = is_a_short_vec; /* { dg-error {incompatible types when assigning to type 'bfloat16x4_t' from type 'int16x4_t'} } */ -+ -+ is_an_int_vec = glob_bfloat_vec; /* { dg-error {incompatible types when assigning to type 'int32x4_t' from type 'bfloat16x4_t'} } */ -+ is_a_float_vec = glob_bfloat_vec; /* { dg-error {incompatible types when assigning to type 'float32x4_t' from type 'bfloat16x4_t'} } */ -+ is_a_float16_vec = glob_bfloat_vec; /* { dg-error {incompatible types when assigning to type 'float16x4_t' from type 'bfloat16x4_t'} } */ -+ is_a_float_pair = glob_bfloat_vec; /* { dg-error {incompatible types when assigning to type 'float32x2_t' from type 'bfloat16x4_t'} } */ -+ is_an_int_pair = glob_bfloat_vec; /* { dg-error {incompatible types when assigning to type 'int32x2_t' from type 'bfloat16x4_t'} } */ -+ is_a_short_vec = glob_bfloat_vec;/* { dg-error {incompatible types when assigning to type 'int16x4_t' from type 'bfloat16x4_t'} } */ -+ -+ /* Assignments to/from elements. */ -+ -+ vector2_3[0] = glob_bfloat; -+ vector2_3[0] = is_an_int; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ vector2_3[0] = is_a_short_int; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ vector2_3[0] = is_a_float; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ vector2_3[0] = is_a_float16; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ vector2_3[0] = 0; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ vector2_3[0] = 0.1; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ -+ glob_bfloat = vector2_3[0]; -+ is_an_int = vector2_3[0]; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ is_a_short_int = vector2_3[0]; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ is_a_float = vector2_3[0]; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ is_a_float16 = vector2_3[0]; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ -+ /* Compound literals. */ -+ -+ (bfloat16x4_t) {}; -+ -+ (bfloat16x4_t) { 0 }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ (bfloat16x4_t) { 0.1 }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ (bfloat16x4_t) { is_a_float_vec }; /* { dg-error {incompatible types when initializing type '__bf16' using type 'float32x4_t'} } */ -+ (bfloat16x4_t) { is_an_int_vec }; /* { dg-error {incompatible types when initializing type '__bf16' using type 'int32x4_t'} } */ -+ (bfloat16x4_t) { is_a_float_pair }; /* { dg-error {incompatible types when initializing type '__bf16' using type 'float32x2_t'} } */ -+ (bfloat16x4_t) { is_an_int_pair }; /* { dg-error {incompatible types when initializing type '__bf16' using type 'int32x2_t'} } */ -+ (bfloat16x4_t) { is_a_float16_vec }; /* { dg-error {incompatible types when initializing type '__bf16' using type 'float16x4_t'} } */ -+ (bfloat16x4_t) { is_a_short_vec }; /* { dg-error {incompatible types when initializing type '__bf16' using type 'int16x4_t'} } */ -+ -+ (bfloat16x4_t) { glob_bfloat_vec }; /* { dg-error {incompatible types when initializing type '__bf16' using type 'bfloat16x4_t'} } */ -+ (int32x4_t) { glob_bfloat_vec }; /* { dg-error {incompatible types when initializing type 'int' using type 'bfloat16x4_t'} } */ -+ (float32x4_t) { glob_bfloat_vec }; /* { dg-error {incompatible types when initializing type 'float' using type 'bfloat16x4_t'} } */ -+ (int32x2_t) { glob_bfloat_vec }; /* { dg-error {incompatible types when initializing type 'int' using type 'bfloat16x4_t'} } */ -+ (float16x4_t) { glob_bfloat_vec }; /* { dg-error {incompatible types when initializing type '__fp16' using type 'bfloat16x4_t'} } */ -+ (int16x4_t) { glob_bfloat_vec }; /* { dg-error {incompatible types when initializing type 'short int' using type 'bfloat16x4_t'} } */ -+ -+ /* Casting. */ -+ -+ (void) glob_bfloat_vec; -+ (bfloat16x4_t) glob_bfloat_vec; -+ -+ (bfloat16_t) glob_bfloat_vec; /* { dg-error {aggregate value used where a float was expected} } */ -+ (short) glob_bfloat_vec; /* { dg-error {can't convert a vector of type 'bfloat16x4_t' to type 'short int' which has different size} } */ -+ (int) glob_bfloat_vec; /* { dg-error {can't convert a vector of type 'bfloat16x4_t' to type 'int' which has different size} } */ -+ (float16_t) glob_bfloat_vec; /* { dg-error {aggregate value used where a float was expected} } */ -+ (float) glob_bfloat_vec; /* { dg-error {aggregate value used where a float was expected} } */ -+ (double) glob_bfloat_vec; /* { dg-error {aggregate value used where a float was expected} } */ -+ -+ (int32x4_t) glob_bfloat_vec; /* { dg-error {can't convert a value of type 'bfloat16x4_t' to vector type '__Int32x4_t' which has different size} } */ -+ (float32x4_t) glob_bfloat_vec; /* { dg-error {can't convert a value of type 'bfloat16x4_t' to vector type '__Float32x4_t' which has different size} } */ -+ (float16x4_t) glob_bfloat_vec; -+ (int32x2_t) glob_bfloat_vec; -+ (float32x2_t) glob_bfloat_vec; -+ (int16x4_t) glob_bfloat_vec; -+ -+ (bfloat16x4_t) is_an_int_vec; /* { dg-error {can't convert a value of type 'int32x4_t' to vector type '__Bfloat16x4_t' which has different size} } */ -+ (bfloat16x4_t) is_a_float_vec; /* { dg-error {can't convert a value of type 'float32x4_t' to vector type '__Bfloat16x4_t' which has different size} } */ -+ (bfloat16x4_t) is_a_float16_vec; -+ (bfloat16x4_t) is_an_int_pair; -+ (bfloat16x4_t) is_a_float_pair; -+ (bfloat16x4_t) is_a_short_vec; -+ (bfloat16x4_t) is_a_double; /* { dg-error {can't convert value to a vector} } */ -+ -+ /* Arrays and Structs. */ -+ -+ typedef bfloat16x4_t array_type[2]; -+ extern bfloat16x4_t extern_array[]; -+ -+ bfloat16x4_t array[2]; -+ bfloat16x4_t zero_length_array[0]; -+ bfloat16x4_t empty_init_array[] = {}; -+ typedef bfloat16x4_t some_other_type[is_an_int]; -+ -+ struct struct1 { -+ bfloat16x4_t a; -+ }; -+ -+ union union1 { -+ bfloat16x4_t a; -+ }; -+ -+ /* Addressing and dereferencing. */ -+ -+ bfloat16x4_t *bfloat_ptr = &vector0; -+ vector0 = *bfloat_ptr; -+ -+ /* Pointer assignment. */ -+ -+ bfloat16x4_t *bfloat_ptr2 = bfloat_ptr; -+ bfloat16x4_t *bfloat_ptr3 = array; -+ -+ /* Pointer arithmetic. */ -+ -+ ++bfloat_ptr; -+ --bfloat_ptr; -+ bfloat_ptr++; -+ bfloat_ptr--; -+ bfloat_ptr += 1; -+ bfloat_ptr -= 1; -+ bfloat_ptr - bfloat_ptr2; -+ bfloat_ptr = &bfloat_ptr3[0]; -+ bfloat_ptr = &bfloat_ptr3[1]; -+ -+ /* Simple comparison. */ -+ vector0 > glob_bfloat_vec; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ glob_bfloat_vec == vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ vector0 > is_a_float_vec; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ is_a_float_vec == vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ vector0 > 0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ 0 == vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ vector0 > 0.1; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ 0.1 == vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ vector0 > is_an_int_vec; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ is_an_int_vec == vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ -+ /* Pointer comparison. */ -+ -+ bfloat_ptr == &vector0; -+ bfloat_ptr != &vector0; -+ bfloat_ptr < &vector0; -+ bfloat_ptr <= &vector0; -+ bfloat_ptr > &vector0; -+ bfloat_ptr >= &vector0; -+ bfloat_ptr == bfloat_ptr2; -+ bfloat_ptr != bfloat_ptr2; -+ bfloat_ptr < bfloat_ptr2; -+ bfloat_ptr <= bfloat_ptr2; -+ bfloat_ptr > bfloat_ptr2; -+ bfloat_ptr >= bfloat_ptr2; -+ -+ /* Conditional expressions. */ -+ -+ 0 ? vector0 : vector0; -+ 0 ? vector0 : is_a_float_vec; /* { dg-error {type mismatch in conditional expression} } */ -+ 0 ? is_a_float_vec : vector0; /* { dg-error {type mismatch in conditional expression} } */ -+ 0 ? vector0 : is_a_float16_vec; /* { dg-error {type mismatch in conditional expression} } */ -+ 0 ? is_a_float16_vec : vector0; /* { dg-error {type mismatch in conditional expression} } */ -+ 0 ? vector0 : 0; /* { dg-error {type mismatch in conditional expression} } */ -+ 0 ? 0 : vector0; /* { dg-error {type mismatch in conditional expression} } */ -+ 0 ? 0.1 : vector0; /* { dg-error {type mismatch in conditional expression} } */ -+ 0 ? vector0 : 0.1; /* { dg-error {type mismatch in conditional expression} } */ -+ 0 ? bfloat_ptr : bfloat_ptr2; -+ 0 ? bfloat_ptr : float_ptr; /* { dg-error {pointer type mismatch in conditional expression} } */ -+ 0 ? float_ptr : bfloat_ptr; /* { dg-error {pointer type mismatch in conditional expression} } */ -+ -+ vector0 ? vector0 : vector0; /* { dg-error {used vector type where scalar is required} } */ -+ vector0 ? is_a_float16_vec : vector0; /* { dg-error {used vector type where scalar is required} } */ -+ vector0 ? vector0 : is_a_float16_vec; /* { dg-error {used vector type where scalar is required} } */ -+ vector0 ? is_a_float16_vec : is_a_float16_vec; /* { dg-error {used vector type where scalar is required} } */ -+ -+ /* Unary operators. */ -+ -+ +vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ -vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ ~vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ !vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ *vector0; /* { dg-error {invalid type argument of unary '\*'} } */ -+ __real vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ __imag vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ ++vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ --vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ vector0++; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ vector0--; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ -+ /* Binary arithmetic operations. */ -+ -+ vector0 = glob_bfloat_vec + *bfloat_ptr; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ vector0 = glob_bfloat_vec + 0.1; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ vector0 = glob_bfloat_vec + 0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ vector0 = glob_bfloat_vec + is_a_float_vec; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ -+ return vector0; -+} -+ -diff --git a/gcc/testsuite/gcc.target/aarch64/bfloat16_vector_typecheck_2.c b/gcc/testsuite/gcc.target/aarch64/bfloat16_vector_typecheck_2.c -new file mode 100644 -index 000000000..99c499ce8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/bfloat16_vector_typecheck_2.c -@@ -0,0 +1,260 @@ -+/* { dg-do assemble { target { aarch64*-*-* } } } */ -+/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ -+/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ -+/* { dg-add-options arm_v8_2a_bf16_neon } */ -+/* { dg-additional-options "-O3 --save-temps -Wno-pedantic" } */ -+#include -+ -+bfloat16_t glob_bfloat; -+bfloat16x8_t glob_bfloat_vec; -+ -+float32x4_t is_a_float_vec; -+float64x2_t is_a_double_pair; -+ -+float16x8_t *float_ptr; -+float16x8_t is_a_float16_vec; -+ -+int32x4_t is_an_int_vec; -+int64x2_t is_a_long_int_pair; -+int16x8_t is_a_short_vec; -+ -+int is_an_int; -+short is_a_short_int; -+float is_a_float; -+float is_a_float16; -+double is_a_double; -+ -+bfloat16x8_t foo3 (void) { return (bfloat16x8_t) 0x12345678123456781234567812345678; } -+ /* { dg-error {integer constant is too large for its type} "" {target *-*-*} 27 } */ -+ /* { dg-error {can't convert a value of type 'long int' to vector type '__Bfloat16x8_t' which has different size} "" {target *-*-*} 27 } */ -+ -+bfloat16x8_t footest (bfloat16x8_t vector0) -+{ -+ /* Initialisation */ -+ -+ bfloat16x8_t vector1_1; -+ bfloat16x8_t vector1_2 = glob_bfloat_vec; -+ bfloat16x8_t vector1_3 = is_a_float_vec; /* { dg-error {incompatible types when initializing type 'bfloat16x8_t' using type 'float32x4_t'} } */ -+ bfloat16x8_t vector1_4 = is_an_int_vec; /* { dg-error {incompatible types when initializing type 'bfloat16x8_t' using type 'int32x4_t'} } */ -+ bfloat16x8_t vector1_5 = is_a_float16_vec; /* { dg-error {incompatible types when initializing type 'bfloat16x8_t' using type 'float16x8_t'} } */ -+ bfloat16x8_t vector1_6 = is_a_double_pair; /* { dg-error {incompatible types when initializing type 'bfloat16x8_t' using type 'float64x2_t'} } */ -+ bfloat16x8_t vector1_7 = is_a_long_int_pair; /* { dg-error {incompatible types when initializing type 'bfloat16x8_t' using type 'int64x2_t'} } */ -+ bfloat16x8_t vector1_8 = is_a_short_vec; /* { dg-error {incompatible types when initializing type 'bfloat16x8_t' using type 'int16x8_t'} } */ -+ -+ int32x4_t initi_1_1 = glob_bfloat_vec; /* { dg-error {incompatible types when initializing type 'int32x4_t' using type 'bfloat16x8_t'} } */ -+ float32x4_t initi_1_2 = glob_bfloat_vec; /* { dg-error {incompatible types when initializing type 'float32x4_t' using type 'bfloat16x8_t'} } */ -+ float16x8_t initi_1_3 = glob_bfloat_vec; /* { dg-error {incompatible types when initializing type 'float16x8_t' using type 'bfloat16x8_t'} } */ -+ float64x2_t initi_1_4 = glob_bfloat_vec; /* { dg-error {incompatible types when initializing type 'float64x2_t' using type 'bfloat16x8_t'} } */ -+ int64x2_t initi_1_5 = glob_bfloat_vec; /* { dg-error {incompatible types when initializing type 'int64x2_t' using type 'bfloat16x8_t'} } */ -+ int16x8_t initi_1_6 = glob_bfloat_vec; /* { dg-error {incompatible types when initializing type 'int16x8_t' using type 'bfloat16x8_t'} } */ -+ -+ bfloat16x8_t vector2_1 = {}; -+ bfloat16x8_t vector2_2 = { glob_bfloat }; -+ bfloat16x8_t vector2_3 = { glob_bfloat, glob_bfloat, glob_bfloat, glob_bfloat }; -+ bfloat16x8_t vector2_4 = { 0 }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ bfloat16x8_t vector2_5 = { 0.1 }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ bfloat16x8_t vector2_6 = { is_a_float16 }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ bfloat16x8_t vector2_7 = { is_a_float }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ bfloat16x8_t vector2_8 = { is_an_int }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ bfloat16x8_t vector2_9 = { is_a_short_int }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ bfloat16x8_t vector2_10 = { 0.0, 0, is_a_short_int, is_a_float }; /* { dg-error "invalid conversion to type 'bfloat16_t'" } */ -+ -+ int32x4_t initi_2_1 = { glob_bfloat }; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ float32x4_t initi_2_2 = { glob_bfloat }; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ float16x8_t initi_2_3 = { glob_bfloat }; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ float64x2_t initi_2_4 = { glob_bfloat }; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ int64x2_t initi_2_5 = { glob_bfloat }; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ int16x8_t initi_2_6 = { glob_bfloat }; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ -+ /* Assignments to/from vectors. */ -+ -+ glob_bfloat_vec = glob_bfloat_vec; -+ glob_bfloat_vec = 0; /* { dg-error {incompatible types when assigning to type 'bfloat16x8_t' from type 'int'} } */ -+ glob_bfloat_vec = 0.1; /* { dg-error {incompatible types when assigning to type 'bfloat16x8_t' from type 'double'} } */ -+ glob_bfloat_vec = is_a_float_vec; /* { dg-error {incompatible types when assigning to type 'bfloat16x8_t' from type 'float32x4_t'} } */ -+ glob_bfloat_vec = is_an_int_vec; /* { dg-error {incompatible types when assigning to type 'bfloat16x8_t' from type 'int32x4_t'} } */ -+ glob_bfloat_vec = is_a_float16_vec; /* { dg-error {incompatible types when assigning to type 'bfloat16x8_t' from type 'float16x8_t'} } */ -+ glob_bfloat_vec = is_a_double_pair; /* { dg-error {incompatible types when assigning to type 'bfloat16x8_t' from type 'float64x2_t'} } */ -+ glob_bfloat_vec = is_a_long_int_pair; /* { dg-error {incompatible types when assigning to type 'bfloat16x8_t' from type 'int64x2_t'} } */ -+ glob_bfloat_vec = is_a_short_vec; /* { dg-error {incompatible types when assigning to type 'bfloat16x8_t' from type 'int16x8_t'} } */ -+ -+ is_an_int_vec = glob_bfloat_vec; /* { dg-error {incompatible types when assigning to type 'int32x4_t' from type 'bfloat16x8_t'} } */ -+ is_a_float_vec = glob_bfloat_vec; /* { dg-error {incompatible types when assigning to type 'float32x4_t' from type 'bfloat16x8_t'} } */ -+ is_a_float16_vec = glob_bfloat_vec; /* { dg-error {incompatible types when assigning to type 'float16x8_t' from type 'bfloat16x8_t'} } */ -+ is_a_double_pair = glob_bfloat_vec; /* { dg-error {incompatible types when assigning to type 'float64x2_t' from type 'bfloat16x8_t'} } */ -+ is_a_long_int_pair = glob_bfloat_vec; /* { dg-error {incompatible types when assigning to type 'int64x2_t' from type 'bfloat16x8_t'} } */ -+ is_a_short_vec = glob_bfloat_vec;/* { dg-error {incompatible types when assigning to type 'int16x8_t' from type 'bfloat16x8_t'} } */ -+ -+ /* Assignments to/from elements. */ -+ -+ vector2_3[0] = glob_bfloat; -+ vector2_3[0] = is_an_int; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ vector2_3[0] = is_a_short_int; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ vector2_3[0] = is_a_float; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ vector2_3[0] = is_a_float16; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ vector2_3[0] = 0; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ vector2_3[0] = 0.1; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ -+ glob_bfloat = vector2_3[0]; -+ is_an_int = vector2_3[0]; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ is_a_short_int = vector2_3[0]; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ is_a_float = vector2_3[0]; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ is_a_float16 = vector2_3[0]; /* { dg-error {invalid conversion from type 'bfloat16_t'} } */ -+ -+ /* Compound literals. */ -+ -+ (bfloat16x8_t) {}; -+ -+ (bfloat16x8_t) { 0 }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ (bfloat16x8_t) { 0.1 }; /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ (bfloat16x8_t) { is_a_float_vec }; /* { dg-error {incompatible types when initializing type '__bf16' using type 'float32x4_t'} } */ -+ (bfloat16x8_t) { is_an_int_vec }; /* { dg-error {incompatible types when initializing type '__bf16' using type 'int32x4_t'} } */ -+ (bfloat16x8_t) { is_a_double_pair }; /* { dg-error {incompatible types when initializing type '__bf16' using type 'float64x2_t'} } */ -+ (bfloat16x8_t) { is_a_long_int_pair }; /* { dg-error {incompatible types when initializing type '__bf16' using type 'int64x2_t'} } */ -+ (bfloat16x8_t) { is_a_float16_vec }; /* { dg-error {incompatible types when initializing type '__bf16' using type 'float16x8_t'} } */ -+ (bfloat16x8_t) { is_a_short_vec }; /* { dg-error {incompatible types when initializing type '__bf16' using type 'int16x8_t'} } */ -+ -+ (bfloat16x8_t) { glob_bfloat_vec }; /* { dg-error {incompatible types when initializing type '__bf16' using type 'bfloat16x8_t'} } */ -+ (int32x4_t) { glob_bfloat_vec }; /* { dg-error {incompatible types when initializing type 'int' using type 'bfloat16x8_t'} } */ -+ (float32x4_t) { glob_bfloat_vec }; /* { dg-error {incompatible types when initializing type 'float' using type 'bfloat16x8_t'} } */ -+ (int64x2_t) { glob_bfloat_vec }; /* { dg-error {incompatible types when initializing type 'long int' using type 'bfloat16x8_t'} } */ -+ (float16x8_t) { glob_bfloat_vec }; /* { dg-error {incompatible types when initializing type '__fp16' using type 'bfloat16x8_t'} } */ -+ (int16x8_t) { glob_bfloat_vec }; /* { dg-error {incompatible types when initializing type 'short int' using type 'bfloat16x8_t'} } */ -+ -+ /* Casting. */ -+ -+ (void) glob_bfloat_vec; -+ (bfloat16x8_t) glob_bfloat_vec; -+ -+ (bfloat16_t) glob_bfloat_vec; /* { dg-error {aggregate value used where a float was expected} } */ -+ (short) glob_bfloat_vec; /* { dg-error {can't convert a vector of type 'bfloat16x8_t' to type 'short int' which has different size} } */ -+ (int) glob_bfloat_vec; /* { dg-error {can't convert a vector of type 'bfloat16x8_t' to type 'int' which has different size} } */ -+ (float16_t) glob_bfloat_vec; /* { dg-error {aggregate value used where a float was expected} } */ -+ (float) glob_bfloat_vec; /* { dg-error {aggregate value used where a float was expected} } */ -+ (double) glob_bfloat_vec; /* { dg-error {aggregate value used where a float was expected} } */ -+ -+ (int32x4_t) glob_bfloat_vec; -+ (float32x4_t) glob_bfloat_vec; -+ (float16x8_t) glob_bfloat_vec; -+ (int64x2_t) glob_bfloat_vec; -+ (float64x2_t) glob_bfloat_vec; -+ (int16x8_t) glob_bfloat_vec; -+ -+ (bfloat16x8_t) is_an_int_vec; -+ (bfloat16x8_t) is_a_float_vec; -+ (bfloat16x8_t) is_a_float16_vec; -+ (bfloat16x8_t) is_a_long_int_pair; -+ (bfloat16x8_t) is_a_double_pair; -+ (bfloat16x8_t) is_a_short_vec; -+ -+ /* Arrays and Structs. */ -+ -+ typedef bfloat16x8_t array_type[2]; -+ extern bfloat16x8_t extern_array[]; -+ -+ bfloat16x8_t array[2]; -+ bfloat16x8_t zero_length_array[0]; -+ bfloat16x8_t empty_init_array[] = {}; -+ typedef bfloat16x8_t some_other_type[is_an_int]; -+ -+ struct struct1 { -+ bfloat16x8_t a; -+ }; -+ -+ union union1 { -+ bfloat16x8_t a; -+ }; -+ -+ /* Addressing and dereferencing. */ -+ -+ bfloat16x8_t *bfloat_ptr = &vector0; -+ vector0 = *bfloat_ptr; -+ -+ /* Pointer assignment. */ -+ -+ bfloat16x8_t *bfloat_ptr2 = bfloat_ptr; -+ bfloat16x8_t *bfloat_ptr3 = array; -+ -+ /* Pointer arithmetic. */ -+ -+ ++bfloat_ptr; -+ --bfloat_ptr; -+ bfloat_ptr++; -+ bfloat_ptr--; -+ bfloat_ptr += 1; -+ bfloat_ptr -= 1; -+ bfloat_ptr - bfloat_ptr2; -+ bfloat_ptr = &bfloat_ptr3[0]; -+ bfloat_ptr = &bfloat_ptr3[1]; -+ -+ /* Simple comparison. */ -+ vector0 > glob_bfloat_vec; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ glob_bfloat_vec == vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ vector0 > is_a_float_vec; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ is_a_float_vec == vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ vector0 > 0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ 0 == vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ vector0 > 0.1; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ 0.1 == vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ vector0 > is_an_int_vec; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ is_an_int_vec == vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ -+ /* Pointer comparison. */ -+ -+ bfloat_ptr == &vector0; -+ bfloat_ptr != &vector0; -+ bfloat_ptr < &vector0; -+ bfloat_ptr <= &vector0; -+ bfloat_ptr > &vector0; -+ bfloat_ptr >= &vector0; -+ bfloat_ptr == bfloat_ptr2; -+ bfloat_ptr != bfloat_ptr2; -+ bfloat_ptr < bfloat_ptr2; -+ bfloat_ptr <= bfloat_ptr2; -+ bfloat_ptr > bfloat_ptr2; -+ bfloat_ptr >= bfloat_ptr2; -+ -+ /* Conditional expressions. */ -+ -+ 0 ? vector0 : vector0; -+ 0 ? vector0 : is_a_float_vec; /* { dg-error {type mismatch in conditional expression} } */ -+ 0 ? is_a_float_vec : vector0; /* { dg-error {type mismatch in conditional expression} } */ -+ 0 ? vector0 : is_a_float16_vec; /* { dg-error {type mismatch in conditional expression} } */ -+ 0 ? is_a_float16_vec : vector0; /* { dg-error {type mismatch in conditional expression} } */ -+ 0 ? vector0 : 0; /* { dg-error {type mismatch in conditional expression} } */ -+ 0 ? 0 : vector0; /* { dg-error {type mismatch in conditional expression} } */ -+ 0 ? 0.1 : vector0; /* { dg-error {type mismatch in conditional expression} } */ -+ 0 ? vector0 : 0.1; /* { dg-error {type mismatch in conditional expression} } */ -+ 0 ? bfloat_ptr : bfloat_ptr2; -+ 0 ? bfloat_ptr : float_ptr; /* { dg-error {pointer type mismatch in conditional expression} } */ -+ 0 ? float_ptr : bfloat_ptr; /* { dg-error {pointer type mismatch in conditional expression} } */ -+ -+ vector0 ? vector0 : vector0; /* { dg-error {used vector type where scalar is required} } */ -+ vector0 ? is_a_float16_vec : vector0; /* { dg-error {used vector type where scalar is required} } */ -+ vector0 ? vector0 : is_a_float16_vec; /* { dg-error {used vector type where scalar is required} } */ -+ vector0 ? is_a_float16_vec : is_a_float16_vec; /* { dg-error {used vector type where scalar is required} } */ -+ -+ /* Unary operators. */ -+ -+ +vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ -vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ ~vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ !vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ *vector0; /* { dg-error {invalid type argument of unary '\*'} } */ -+ __real vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ __imag vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ ++vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ --vector0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ vector0++; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ vector0--; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ -+ /* Binary arithmetic operations. */ -+ -+ vector0 = glob_bfloat_vec + *bfloat_ptr; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ vector0 = glob_bfloat_vec + 0.1; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ vector0 = glob_bfloat_vec + 0; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ vector0 = glob_bfloat_vec + is_a_float_vec; /* { dg-error {operation not permitted on type 'bfloat16_t'} } */ -+ -+ return vector0; -+} -+ -diff --git a/gcc/testsuite/gcc.target/aarch64/crypto-fuse-2.c b/gcc/testsuite/gcc.target/aarch64/crypto-fuse-2.c -deleted file mode 100644 -index b12df2d3e..000000000 ---- a/gcc/testsuite/gcc.target/aarch64/crypto-fuse-2.c -+++ /dev/null -@@ -1,45 +0,0 @@ --/* { dg-do compile } */ --/* { dg-options "-O3 -mcpu=cortex-a72+crypto -dp" } */ -- --#include -- --#define AESE(r, v, key) (r = vaesdq_u8 ((v), (key))); --#define AESMC(r, i) (r = vaesimcq_u8 (i)) -- --uint8x16_t dummy; --uint8x16_t a; --uint8x16_t b; --uint8x16_t c; --uint8x16_t d; --uint8x16_t e; -- --void --foo (void) --{ -- AESE (a, a, e); -- dummy = vaddq_u8 (dummy, dummy); -- dummy = vaddq_u8 (dummy, dummy); -- AESE (b, b, e); -- dummy = vaddq_u8 (dummy, dummy); -- dummy = vaddq_u8 (dummy, dummy); -- AESE (c, c, e); -- dummy = vaddq_u8 (dummy, dummy); -- dummy = vaddq_u8 (dummy, dummy); -- AESE (d, d, e); -- dummy = vaddq_u8 (dummy, dummy); -- dummy = vaddq_u8 (dummy, dummy); -- -- AESMC (a, a); -- dummy = vaddq_u8 (dummy, dummy); -- dummy = vaddq_u8 (dummy, dummy); -- AESMC (b, b); -- dummy = vaddq_u8 (dummy, dummy); -- dummy = vaddq_u8 (dummy, dummy); -- AESMC (c, c); -- dummy = vaddq_u8 (dummy, dummy); -- dummy = vaddq_u8 (dummy, dummy); -- AESMC (d, d); --} -- --/* { dg-final { scan-assembler-times "crypto_aesd_fused" 4 } } */ -- -diff --git a/gcc/testsuite/gcc.target/aarch64/diag_aka_1.c b/gcc/testsuite/gcc.target/aarch64/diag_aka_1.c -new file mode 100644 -index 000000000..59e24f48b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/diag_aka_1.c -@@ -0,0 +1,14 @@ -+#include -+ -+typedef int16x4_t myvec; -+ -+void f (float x) -+{ -+ __Int8x8_t y1 = x; /* { dg-error {incompatible types when initializing type '__Int8x8_t' using type 'float'} } */ -+ __Int8x8_t *ptr1 = &x; /* { dg-error {initialization of '__Int8x8_t \*' from incompatible pointer type 'float \*'} } */ -+ int8x8_t y2 = x; /* { dg-error {incompatible types when initializing type 'int8x8_t' using type 'float'} } */ -+ int8x8_t *ptr2 = &x; /* { dg-error {initialization of 'int8x8_t \*' from incompatible pointer type 'float \*'} } */ -+ /* ??? For these it would be better to print an aka for 'int16x4_t'. */ -+ myvec y3 = x; /* { dg-error {incompatible types when initializing type 'myvec' using type 'float'} } */ -+ myvec *ptr3 = &x; /* { dg-error {initialization of 'myvec \*' from incompatible pointer type 'float \*'} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/fmul_scvtf_1.c b/gcc/testsuite/gcc.target/aarch64/fmul_scvtf_1.c -new file mode 100644 -index 000000000..8bfe06ac3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/fmul_scvtf_1.c -@@ -0,0 +1,140 @@ -+/* { dg-do run } */ -+/* { dg-options "-save-temps -O2 -fno-inline" } */ -+ -+#define FUNC_DEFS(__a) \ -+float \ -+fsfoo##__a (int x) \ -+{ \ -+ return ((float) x)/(1lu << __a); \ -+} \ -+float \ -+fusfoo##__a (unsigned int x) \ -+{ \ -+ return ((float) x)/(1lu << __a); \ -+} \ -+float \ -+fslfoo##__a (long long x) \ -+{ \ -+ return ((float) x)/(1lu << __a); \ -+} \ -+float \ -+fulfoo##__a (unsigned long long x) \ -+{ \ -+ return ((float) x)/(1lu << __a); \ -+} \ -+ -+#define FUNC_DEFD(__a) \ -+double \ -+dsfoo##__a (int x) \ -+{ \ -+ return ((double) x)/(1lu << __a); \ -+} \ -+double \ -+dusfoo##__a (unsigned int x) \ -+{ \ -+ return ((double) x)/(1lu << __a); \ -+} \ -+double \ -+dslfoo##__a (long long x) \ -+{ \ -+ return ((double) x)/(1lu << __a); \ -+} \ -+double \ -+dulfoo##__a (unsigned long long x) \ -+{ \ -+ return ((double) x)/(1lu << __a); \ -+} -+ -+FUNC_DEFS (4) -+ /* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], w\[0-9\]*.*#4" 1 } } */ -+ /* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], w\[0-9\]*.*#4" 1 } } */ -+ /* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], x\[0-9\]*.*#4" 1 } } */ -+ /* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], x\[0-9\]*.*#4" 1 } } */ -+ -+FUNC_DEFD (4) -+ /* { dg-final { scan-assembler-times "scvtf\td\[0-9\], w\[0-9\]*.*#4" 1 } } */ -+ /* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], w\[0-9\]*.*#4" 1 } } */ -+ /* { dg-final { scan-assembler-times "scvtf\td\[0-9\], x\[0-9\]*.*#4" 1 } } */ -+ /* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], x\[0-9\]*.*#4" 1 } } */ -+ -+FUNC_DEFS (8) -+ /* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], w\[0-9\]*.*#8" 1 } } */ -+ /* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], w\[0-9\]*.*#8" 1 } } */ -+ /* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], x\[0-9\]*.*#8" 1 } } */ -+ /* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], x\[0-9\]*.*#8" 1 } } */ -+ -+FUNC_DEFD (8) -+ /* { dg-final { scan-assembler-times "scvtf\td\[0-9\], w\[0-9\]*.*#8" 1 } } */ -+ /* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], w\[0-9\]*.*#8" 1 } } */ -+ /* { dg-final { scan-assembler-times "scvtf\td\[0-9\], x\[0-9\]*.*#8" 1 } } */ -+ /* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], x\[0-9\]*.*#8" 1 } } */ -+ -+FUNC_DEFS (16) -+ /* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], w\[0-9\]*.*#16" 1 } } */ -+ /* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], w\[0-9\]*.*#16" 1 } } */ -+ /* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], x\[0-9\]*.*#16" 1 } } */ -+ /* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], x\[0-9\]*.*#16" 1 } } */ -+ -+FUNC_DEFD (16) -+ /* { dg-final { scan-assembler-times "scvtf\td\[0-9\], w\[0-9\]*.*#16" 1 } } */ -+ /* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], w\[0-9\]*.*#16" 1 } } */ -+ /* { dg-final { scan-assembler-times "scvtf\td\[0-9\], x\[0-9\]*.*#16" 1 } } */ -+ /* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], x\[0-9\]*.*#16" 1 } } */ -+ -+FUNC_DEFS (32) -+ /* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], w\[0-9\]*.*#32" 1 } } */ -+ /* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], w\[0-9\]*.*#32" 1 } } */ -+ /* { dg-final { scan-assembler-times "scvtf\ts\[0-9\], x\[0-9\]*.*#32" 1 } } */ -+ /* { dg-final { scan-assembler-times "ucvtf\ts\[0-9\], x\[0-9\]*.*#32" 1 } } */ -+ -+FUNC_DEFD (32) -+ /* { dg-final { scan-assembler-times "scvtf\td\[0-9\], w\[0-9\]*.*#32" 1 } } */ -+ /* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], w\[0-9\]*.*#32" 1 } } */ -+ /* { dg-final { scan-assembler-times "scvtf\td\[0-9\], x\[0-9\]*.*#32" 1 } } */ -+ /* { dg-final { scan-assembler-times "ucvtf\td\[0-9\], x\[0-9\]*.*#32" 1 } } */ -+ -+#define FUNC_TESTS(__a, __b) \ -+do \ -+{ \ -+ if (fsfoo##__a (__b) != ((int) i) * (1.0f/(1lu << __a)) ) \ -+ __builtin_abort (); \ -+ if (fusfoo##__a (__b) != ((int) i) * (1.0f/(1lu << __a)) ) \ -+ __builtin_abort (); \ -+ if (fslfoo##__a (__b) != ((int) i) * (1.0f/(1lu << __a)) ) \ -+ __builtin_abort (); \ -+ if (fulfoo##__a (__b) != ((int) i) * (1.0f/(1lu << __a)) ) \ -+ __builtin_abort (); \ -+} while (0) -+ -+#define FUNC_TESTD(__a, __b) \ -+do \ -+{ \ -+ if (dsfoo##__a (__b) != ((int) i) * (1.0d/(1lu << __a)) ) \ -+ __builtin_abort (); \ -+ if (dusfoo##__a (__b) != ((int) i) * (1.0d/(1lu << __a)) ) \ -+ __builtin_abort (); \ -+ if (dslfoo##__a (__b) != ((int) i) * (1.0d/(1lu << __a)) ) \ -+ __builtin_abort (); \ -+ if (dulfoo##__a (__b) != ((int) i) * (1.0d/(1lu << __a)) ) \ -+ __builtin_abort (); \ -+} while (0) -+ -+int -+main (void) -+{ -+ int i; -+ -+ for (i = 0; i < 32; i ++) -+ { -+ FUNC_TESTS (4, i); -+ FUNC_TESTS (8, i); -+ FUNC_TESTS (16, i); -+ FUNC_TESTS (32, i); -+ -+ FUNC_TESTD (4, i); -+ FUNC_TESTD (8, i); -+ FUNC_TESTD (16, i); -+ FUNC_TESTD (32, i); -+ } -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/pr88834.c b/gcc/testsuite/gcc.target/aarch64/pr88834.c -new file mode 100644 -index 000000000..ea00967ef ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/pr88834.c -@@ -0,0 +1,15 @@ -+/* { dg-do compile } */ -+/* { dg-options "-S -O3 -march=armv8.2-a+sve" } */ -+ -+void -+f (int *restrict x, int *restrict y, int *restrict z, int n) -+{ -+ for (int i = 0; i < n; i += 2) -+ { -+ x[i] = y[i] + z[i]; -+ x[i + 1] = y[i + 1] - z[i + 1]; -+ } -+} -+ -+/* { dg-final { scan-assembler-times {\tld2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tst2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+, x[0-9]+, lsl 2\]\n} 1 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_2.c b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_2.c -new file mode 100644 -index 000000000..fa2267598 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_2.c -@@ -0,0 +1,215 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#pragma GCC push_options -+#pragma GCC target ("arch=armv8-a") -+ -+#pragma GCC push_options -+#pragma GCC target ("arch=armv8-a+tme") -+#ifndef __ARM_FEATURE_TME -+#error "__ARM_FEATURE_TME is not defined but should be!" -+#endif -+ -+#pragma GCC pop_options -+ -+#ifdef __ARM_FEATURE_TME -+#error "__ARM_FEATURE_TME is defined but should not be!" -+#endif -+ -+/* Test Armv8.6-A features. */ -+ -+#ifdef __ARM_FEATURE_MATMUL_INT8 -+#error "__ARM_FEATURE_MATMUL_INT8 is defined but should not be!" -+#endif -+#ifdef __ARM_FEATURE_SVE_MATMUL_INT8 -+#error "__ARM_FEATURE_SVE_MATMUL_INT8 is defined but should not be!" -+#endif -+#ifdef __ARM_FEATURE_SVE_MATMUL_FP32 -+#error "__ARM_FEATURE_SVE_MATMUL_FP32 is defined but should not be!" -+#endif -+#ifdef __ARM_FEATURE_SVE_MATMUL_FP64 -+#error "__ARM_FEATURE_SVE_MATMUL_FP64 is defined but should not be!" -+#endif -+ -+#pragma GCC push_options -+#pragma GCC target ("arch=armv8.6-a") -+#ifndef __ARM_FEATURE_MATMUL_INT8 -+#error "__ARM_FEATURE_MATMUL_INT8 is not defined but should be!" -+#endif -+#ifdef __ARM_FEATURE_SVE -+#error "__ARM_FEATURE_SVE is defined but should not be!" -+#endif -+#ifdef __ARM_FEATURE_SVE_MATMUL_INT8 -+#error "__ARM_FEATURE_SVE_MATMUL_INT8 is defined but should not be!" -+#endif -+#ifdef __ARM_FEATURE_SVE_MATMUL_FP32 -+#error "__ARM_FEATURE_SVE_MATMUL_FP32 is defined but should not be!" -+#endif -+#ifdef __ARM_FEATURE_SVE_MATMUL_FP64 -+#error "__ARM_FEATURE_SVE_MATMUL_FP64 is defined but should not be!" -+#endif -+#pragma GCC pop_options -+ -+#pragma GCC push_options -+#pragma GCC target ("arch=armv8.6-a+sve") -+#ifndef __ARM_FEATURE_MATMUL_INT8 -+#error "__ARM_FEATURE_MATMUL_INT8 is not defined but should be!" -+#endif -+#ifndef __ARM_FEATURE_SVE -+#error "__ARM_FEATURE_SVE is not defined but should be!" -+#endif -+#ifndef __ARM_FEATURE_SVE_MATMUL_INT8 -+#error "__ARM_FEATURE_SVE_MATMUL_INT8 is not defined but should be!" -+#endif -+#ifdef __ARM_FEATURE_SVE_MATMUL_FP32 -+#error "__ARM_FEATURE_SVE_MATMUL_FP32 is defined but should not be!" -+#endif -+#ifdef __ARM_FEATURE_SVE_MATMUL_FP64 -+#error "__ARM_FEATURE_SVE_MATMUL_FP64 is defined but should not be!" -+#endif -+#pragma GCC pop_pragma -+ -+#pragma GCC push_options -+#pragma GCC target ("arch=armv8.2-a+i8mm") -+#ifndef __ARM_FEATURE_MATMUL_INT8 -+#error "__ARM_FEATURE_MATMUL_INT8 is not defined but should be!" -+#endif -+#ifdef __ARM_FEATURE_SVE -+#error "__ARM_FEATURE_SVE is defined but should not be!" -+#endif -+#ifdef __ARM_FEATURE_SVE_MATMUL_INT8 -+#error "__ARM_FEATURE_SVE_MATMUL_INT8 is defined but should not be!" -+#endif -+#pragma GCC pop_options -+ -+#pragma GCC push_options -+#pragma GCC target ("arch=armv8.2-a+i8mm+sve") -+#ifndef __ARM_FEATURE_MATMUL_INT8 -+#error "__ARM_FEATURE_MATMUL_INT8 is not defined but should be!" -+#endif -+#ifndef __ARM_FEATURE_SVE -+#error "__ARM_FEATURE_SVE is not defined but should be!" -+#endif -+#ifndef __ARM_FEATURE_SVE_MATMUL_INT8 -+#error "__ARM_FEATURE_SVE_MATMUL_INT8 is not defined but should be!" -+#endif -+#ifdef __ARM_FEATURE_SVE_MATMUL_FP32 -+#error "__ARM_FEATURE_SVE_MATMUL_FP32 is defined but should not be!" -+#endif -+#ifdef __ARM_FEATURE_SVE_MATMUL_FP64 -+#error "__ARM_FEATURE_SVE_MATMUL_FP64 is defined but should not be!" -+#endif -+#pragma GCC pop_options -+ -+#pragma GCC push_options -+#pragma GCC target ("arch=armv8.2-a+f32mm") -+#ifndef __ARM_FEATURE_SVE -+#error "__ARM_FEATURE_SVE is not defined but should be!" -+#endif -+#ifdef __ARM_FEATURE_SVE_MATMUL_INT8 -+#error "__ARM_FEATURE_SVE_MATMUL_INT8 is defined but should not be!" -+#endif -+#ifndef __ARM_FEATURE_SVE_MATMUL_FP32 -+#error "__ARM_FEATURE_SVE_MATMUL_FP32 is not defined but should be!" -+#endif -+#ifdef __ARM_FEATURE_SVE_MATMUL_FP64 -+#error "__ARM_FEATURE_SVE_MATMUL_FP64 is defined but should not be!" -+#endif -+#pragma GCC pop_pragma -+ -+#pragma GCC push_options -+#pragma GCC target ("arch=armv8.2-a+f64mm") -+#ifndef __ARM_FEATURE_SVE -+#error "__ARM_FEATURE_SVE is not defined but should be!" -+#endif -+#ifdef __ARM_FEATURE_SVE_MATMUL_INT8 -+#error "__ARM_FEATURE_SVE_MATMUL_INT8 is defined but should not be!" -+#endif -+#ifdef __ARM_FEATURE_SVE_MATMUL_FP32 -+#error "__ARM_FEATURE_SVE_MATMUL_FP32 is defined but should not be!" -+#endif -+#ifndef __ARM_FEATURE_SVE_MATMUL_FP64 -+#error "__ARM_FEATURE_SVE_MATMUL_FP64 is not defined but should be!" -+#endif -+#pragma GCC pop_options -+ -+#pragma GCC push_options -+#pragma GCC target ("arch=armv8.6-a+nosimd") -+#ifdef __ARM_FEATURE_MATMUL_INT8 -+#error "__ARM_FEATURE_MATMUL_INT8 is defined but should not be!" -+#endif -+#ifdef __ARM_FEATURE_SVE_MATMUL_FP32 -+#error "__ARM_FEATURE_SVE_MATMUL_FP32 is defined but should not be!" -+#endif -+#ifdef __ARM_FEATURE_SVE_MATMUL_FP64 -+#error "__ARM_FEATURE_SVE_MATMUL_FP64 is defined but should not be!" -+#endif -+#pragma GCC pop_options -+ -+#pragma GCC push_options -+#pragma GCC target ("arch=armv8.6-a+nofp") -+#ifdef __ARM_FEATURE_MATMUL_INT8 -+#error "__ARM_FEATURE_MATMUL_INT8 is defined but should not be!" -+#endif -+#ifdef __ARM_FEATURE_SVE_MATMUL_FP32 -+#error "__ARM_FEATURE_SVE_MATMUL_FP32 is defined but should not be!" -+#endif -+#ifdef __ARM_FEATURE_SVE_MATMUL_FP64 -+#error "__ARM_FEATURE_SVE_MATMUL_FP64 is defined but should not be!" -+#endif -+#pragma GCC pop_options -+ -+#ifdef __ARM_FEATURE_BF16_SCALAR_ARITHMETIC -+#error "__ARM_FEATURE_BF16_SCALAR_ARITHMETIC is defined but should not be!" -+#endif -+#ifdef __ARM_FEATURE_BF16_VECTOR_ARITHMETIC -+#error "__ARM_FEATURE_BF16_VECTOR_ARITHMETIC is defined but should not be!" -+#endif -+ -+#pragma GCC push_options -+#pragma GCC target ("arch=armv8.6-a") -+#ifndef __ARM_FEATURE_BF16_SCALAR_ARITHMETIC -+#error "__ARM_FEATURE_BF16_SCALAR_ARITHMETIC is not defined but should be!" -+#endif -+#ifndef __ARM_FEATURE_BF16_VECTOR_ARITHMETIC -+#error "__ARM_FEATURE_BF16_VECTOR_ARITHMETIC is not defined but should be!" -+#endif -+#pragma GCC pop_options -+ -+#pragma GCC push_options -+#pragma GCC target ("arch=armv8.2-a+bf16") -+#ifndef __ARM_FEATURE_BF16_SCALAR_ARITHMETIC -+#error "__ARM_FEATURE_BF16_SCALAR_ARITHMETIC is not defined but should be!" -+#endif -+#ifndef __ARM_FEATURE_BF16_VECTOR_ARITHMETIC -+#error "__ARM_FEATURE_BF16_VECTOR_ARITHMETIC is not defined but should be!" -+#endif -+#pragma GCC pop_options -+ -+#pragma GCC push_options -+#pragma GCC target ("arch=armv8.2-a+bf16+nosimd") -+#ifndef __ARM_FEATURE_BF16_SCALAR_ARITHMETIC -+#error "__ARM_FEATURE_BF16_SCALAR_ARITHMETIC is not defined but should be!" -+#endif -+#ifdef __ARM_FEATURE_BF16_VECTOR_ARITHMETIC -+#error "__ARM_FEATURE_BF16_VECTOR_ARITHMETIC is defined but should not be!" -+#endif -+#pragma GCC pop_options -+ -+#pragma GCC push_options -+#pragma GCC target ("arch=armv8.6-a+nofp") -+#ifdef __ARM_FEATURE_BF16_SCALAR_ARITHMETIC -+#error "__ARM_FEATURE_BF16_SCALAR_ARITHMETIC is defined but should not be!" -+#endif -+#ifdef __ARM_FEATURE_BF16_VECTOR_ARITHMETIC -+#error "__ARM_FEATURE_BF16_VECTOR_ARITHMETIC is defined but should not be!" -+#endif -+#pragma GCC pop_options -+ -+#pragma GCC pop_options -+ -+int -+foo (int a) -+{ -+ return a; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/signbitv2sf.c b/gcc/testsuite/gcc.target/aarch64/signbitv2sf.c -new file mode 100644 -index 000000000..2587bfedd ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/signbitv2sf.c -@@ -0,0 +1,40 @@ -+/* { dg-do run } */ -+/* { dg-additional-options "-O3 --save-temps" } */ -+ -+extern void abort (); -+ -+#define N 8 -+float in[N] = {1.0, -1.0, -2.0, 3.0, -5.0, -8.0, 13.0, 21.0}; -+int out[N]; -+ -+void -+foo (int *i, float *f) -+{ -+ i[0] = __builtin_signbit (f[0]); -+ i[1] = __builtin_signbit (f[1]); -+} -+ -+/* { dg-final { scan-assembler-not {-2147483648} } } */ -+/* { dg-final { scan-assembler {\tushr\tv[0-9]+.2s, v[0-9]+.2s, 31} } } */ -+ -+int -+main () -+{ -+ int i; -+ -+ foo (out, in); -+ foo (out + 2, in + 2); -+ foo (out + 4, in + 4); -+ foo (out + 6, in + 6); -+ -+ for (i = 0; i < N; i++) -+ { -+ if (in[i] >= 0.0 && out[i]) -+ abort (); -+ if (in[i] < 0.0 && !out[i]) -+ abort (); -+ } -+ -+ return 0; -+} -+ -diff --git a/gcc/testsuite/gcc.target/aarch64/signbitv4sf.c b/gcc/testsuite/gcc.target/aarch64/signbitv4sf.c -new file mode 100644 -index 000000000..18cffdc7d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/signbitv4sf.c -@@ -0,0 +1,38 @@ -+/* { dg-do run } */ -+/* { dg-additional-options "-O3 --save-temps" } */ -+ -+extern void abort (); -+ -+#define N 1024 -+float in[N] = {1.0, -1.0, -2.0, 3.0, -5.0, -8.0, 13.0, 21.0}; -+int out[N]; -+ -+void -+foo () -+{ -+ int i; -+ for (i = 0; i < N; i++) -+ out[i] = __builtin_signbit (in[i]); -+} -+ -+/* { dg-final { scan-assembler-not {-2147483648} } } */ -+/* { dg-final { scan-assembler {\tushr\tv[0-9]+.4s, v[0-9]+.4s, 31} } } */ -+ -+int -+main () -+{ -+ int i; -+ -+ foo (); -+ -+ for (i = 0; i < N; i++) -+ { -+ if (in[i] >= 0.0 && out[i]) -+ abort (); -+ if (in[i] < 0.0 && !out[i]) -+ abort (); -+ } -+ -+ return 0; -+} -+ -diff --git a/gcc/testsuite/gcc.target/aarch64/simd/ssra.c b/gcc/testsuite/gcc.target/aarch64/simd/ssra.c -new file mode 100644 -index 000000000..e9c2e04c0 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/simd/ssra.c -@@ -0,0 +1,36 @@ -+/* { dg-do compile { target aarch64*-*-* } } */ -+/* { dg-options "-O3" } */ -+/* { dg-skip-if "" { *-*-* } {"*sve*"} {""} } */ -+ -+#include -+ -+#define SSRA(func, vtype, n) \ -+ void func () \ -+ { \ -+ int i; \ -+ for (i = 0; i < n; i++) \ -+ { \ -+ s1##vtype[i] += s2##vtype[i] >> 2; \ -+ } \ -+ } -+ -+#define TEST_VDQ_I_MODES(FUNC) \ -+ FUNC (test_v8qi_v16qi, _char, 16) \ -+ FUNC (test_v4hi_v8h1, _short, 8) \ -+ FUNC (test_v2si_v4si, _int, 4) \ -+ FUNC (test_v2di, _ll, 2) \ -+ -+int8_t s1_char[16], s2_char[16]; -+int16_t s1_short[8], s2_short[8]; -+int32_t s1_int[4], s2_int[4]; -+int64_t s1_ll[2], s2_ll[2]; -+ -+TEST_VDQ_I_MODES(SSRA) -+ -+/* { dg-final { scan-assembler "ssra" } } */ -+/* { dg-final { scan-assembler-not "sshr" } } */ -+ -+/* { dg-final { scan-assembler-times {ssra\tv[0-9]+\.16b, v[0-9]+\.16b, [0-9]+} 1 } } */ -+/* { dg-final { scan-assembler-times {ssra\tv[0-9]+\.8h, v[0-9]+\.8h, [0-9]+} 1 } } */ -+/* { dg-final { scan-assembler-times {ssra\tv[0-9]+\.4s, v[0-9]+\.4s, [0-9]+} 1 } } */ -+/* { dg-final { scan-assembler-times {ssra\tv[0-9]+\.2d, v[0-9]+\.2d, [0-9]+} 1 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/simd/usra.c b/gcc/testsuite/gcc.target/aarch64/simd/usra.c -new file mode 100644 -index 000000000..4e7446dfa ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/simd/usra.c -@@ -0,0 +1,36 @@ -+/* { dg-do compile { target aarch64*-*-* } } */ -+/* { dg-options "-O3" } */ -+/* { dg-skip-if "" { *-*-* } {"*sve*"} {""} } */ -+ -+#include -+ -+#define USRA(func, vtype, n) \ -+ void func () \ -+ { \ -+ int i; \ -+ for (i = 0; i < n; i++) \ -+ { \ -+ u1##vtype[i] += u2##vtype[i] >> 2; \ -+ } \ -+ } -+ -+#define TEST_VDQ_I_MODES(FUNC) \ -+ FUNC (test_v8qi_v16qi, _char, 16) \ -+ FUNC (test_v4hi_v8h1, _short, 8) \ -+ FUNC (test_v2si_v4si, _int, 4) \ -+ FUNC (test_v2di, _ll, 2) \ -+ -+uint8_t u1_char[16], u2_char[16]; -+uint16_t u1_short[8], u2_short[8]; -+uint32_t u1_int[4], u2_int[4]; -+uint64_t u1_ll[2], u2_ll[2]; -+ -+TEST_VDQ_I_MODES(USRA) -+ -+/* { dg-final { scan-assembler "usra" } } */ -+/* { dg-final { scan-assembler-not "ushr" } } */ -+ -+/* { dg-final { scan-assembler-times {usra\tv[0-9]+\.16b, v[0-9]+\.16b, [0-9]+} 1 } } */ -+/* { dg-final { scan-assembler-times {usra\tv[0-9]+\.8h, v[0-9]+\.8h, [0-9]+} 1 } } */ -+/* { dg-final { scan-assembler-times {usra\tv[0-9]+\.4s, v[0-9]+\.4s, [0-9]+} 1 } } */ -+/* { dg-final { scan-assembler-times {usra\tv[0-9]+\.2d, v[0-9]+\.2d, [0-9]+} 1 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmmla.c b/gcc/testsuite/gcc.target/aarch64/simd/vmmla.c -new file mode 100644 -index 000000000..5eec2b5cf ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/simd/vmmla.c -@@ -0,0 +1,27 @@ -+/* { dg-do assemble} */ -+/* { dg-require-effective-target arm_v8_2a_i8mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+i8mm" } */ -+ -+#include "arm_neon.h" -+ -+int32x4_t -+test_vmmlaq_s32 (int32x4_t r, int8x16_t a, int8x16_t b) -+{ -+ return vmmlaq_s32 (r, a, b); -+} -+ -+uint32x4_t -+test_vmmlaq_u32 (uint32x4_t r, uint8x16_t a, uint8x16_t b) -+{ -+ return vmmlaq_u32 (r, a, b); -+} -+ -+int32x4_t -+test_vusmmlaq_s32 (int32x4_t r, uint8x16_t a, int8x16_t b) -+{ -+ return vusmmlaq_s32 (r, a, b); -+} -+ -+/* { dg-final { scan-assembler-times {\tsmmla\tv[0-9]+.4s, v[0-9]+.16b, v[0-9]+.16b} 1 } } */ -+/* { dg-final { scan-assembler-times {\tummla\tv[0-9]+.4s, v[0-9]+.16b, v[0-9]+.16b} 1 } } */ -+/* { dg-final { scan-assembler-times {\tusmmla\tv[0-9]+.4s, v[0-9]+.16b, v[0-9]+.16b} 1 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vrndnzx_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vrndnzx_1.c -new file mode 100644 -index 000000000..0399b838d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/simd/vrndnzx_1.c -@@ -0,0 +1,137 @@ -+/* Test the vrnd[32,64][z,x] intrinsics. */ -+ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -march=armv8.5-a" } */ -+ -+#include "arm_neon.h" -+ -+#ifdef __ARM_FEATURE_FRINT -+ -+float32x2_t -+foo_32z (float32x2_t a) -+{ -+ return vrnd32z_f32 (a); -+} -+ -+/* { dg-final { scan-assembler-times "frint32z\tv\[0-9\]+\.2s, v\[0-9\]+\.2s\n" 1 } } */ -+ -+float32x4_t -+foo_32z_q (float32x4_t a) -+{ -+ return vrnd32zq_f32 (a); -+} -+ -+/* { dg-final { scan-assembler-times "frint32z\tv\[0-9\]+\.4s, v\[0-9\]+\.4s\n" 1 } } */ -+ -+float64x1_t -+foo_32z_f64 (float64x1_t a) -+{ -+ return vrnd32z_f64 (a); -+} -+ -+/* { dg-final { scan-assembler-times "frint32z\td\[0-9\]+, d\[0-9\]+\n" 1 } } */ -+ -+float64x2_t -+foo_32z_q_f64 (float64x2_t a) -+{ -+ return vrnd32zq_f64 (a); -+} -+ -+/* { dg-final { scan-assembler-times "frint32z\tv\[0-9\]+\.2d, v\[0-9\]+\.2d\n" 1 } } */ -+ -+float32x2_t -+foo_32x (float32x2_t a) -+{ -+ return vrnd32x_f32 (a); -+} -+ -+/* { dg-final { scan-assembler-times "frint32x\tv\[0-9\]+\.2s, v\[0-9\]+\.2s\n" 1 } } */ -+ -+float32x4_t -+foo_32x_q (float32x4_t a) -+{ -+ return vrnd32xq_f32 (a); -+} -+ -+/* { dg-final { scan-assembler-times "frint32x\tv\[0-9\]+\.4s, v\[0-9\]+\.4s\n" 1 } } */ -+ -+float64x1_t -+foo_32x_f64 (float64x1_t a) -+{ -+ return vrnd32x_f64 (a); -+} -+ -+/* { dg-final { scan-assembler-times "frint32x\td\[0-9\]+, d\[0-9\]+\n" 1 } } */ -+ -+float64x2_t -+foo_32x_q_f64 (float64x2_t a) -+{ -+ return vrnd32xq_f64 (a); -+} -+ -+/* { dg-final { scan-assembler-times "frint32x\tv\[0-9\]+\.2d, v\[0-9\]+\.2d\n" 1 } } */ -+ -+float32x2_t -+foo_64z (float32x2_t a) -+{ -+ return vrnd64z_f32 (a); -+} -+ -+/* { dg-final { scan-assembler-times "frint64z\tv\[0-9\]+\.2s, v\[0-9\]+\.2s\n" 1 } } */ -+ -+float32x4_t -+foo_64z_q (float32x4_t a) -+{ -+ return vrnd64zq_f32 (a); -+} -+ -+/* { dg-final { scan-assembler-times "frint64z\tv\[0-9\]+\.4s, v\[0-9\]+\.4s\n" 1 } } */ -+ -+float64x1_t -+foo_64z_f64 (float64x1_t a) -+{ -+ return vrnd64z_f64 (a); -+} -+ -+/* { dg-final { scan-assembler-times "frint64z\td\[0-9\]+, d\[0-9\]+\n" 1 } } */ -+ -+float64x2_t -+foo_64z_q_f64 (float64x2_t a) -+{ -+ return vrnd64zq_f64 (a); -+} -+ -+/* { dg-final { scan-assembler-times "frint64z\tv\[0-9\]+\.2d, v\[0-9\]+\.2d\n" 1 } } */ -+ -+float32x2_t -+foo_64x (float32x2_t a) -+{ -+ return vrnd64x_f32 (a); -+} -+ -+/* { dg-final { scan-assembler-times "frint64x\tv\[0-9\]+\.2s, v\[0-9\]+\.2s\n" 1 } } */ -+ -+float32x4_t -+foo_64x_q (float32x4_t a) -+{ -+ return vrnd64xq_f32 (a); -+} -+ -+/* { dg-final { scan-assembler-times "frint64x\tv\[0-9\]+\.4s, v\[0-9\]+\.4s\n" 1 } } */ -+ -+float64x1_t -+foo_64x_f64 (float64x1_t a) -+{ -+ return vrnd64x_f64 (a); -+} -+ -+/* { dg-final { scan-assembler-times "frint64x\td\[0-9\]+, d\[0-9\]+\n" 1 } } */ -+ -+float64x2_t -+foo_64x_q_f64 (float64x2_t a) -+{ -+ return vrnd64xq_f64 (a); -+} -+ -+/* { dg-final { scan-assembler-times "frint64x\tv\[0-9\]+\.2d, v\[0-9\]+\.2d\n" 1 } } */ -+#endif -diff --git a/gcc/testsuite/gcc.target/aarch64/ssadv16qi-dotprod.c b/gcc/testsuite/gcc.target/aarch64/ssadv16qi-dotprod.c -new file mode 100644 -index 000000000..08b6831cf ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/ssadv16qi-dotprod.c -@@ -0,0 +1,31 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_v8_2a_dotprod_neon_ok } */ -+/* { dg-add-options arm_v8_2a_dotprod_neon } */ -+/* { dg-additional-options "-O3" } */ -+ -+#pragma GCC target "+nosve" -+ -+#define N 1024 -+ -+signed char pix1[N], pix2[N]; -+ -+int foo (void) -+{ -+ int i_sum = 0; -+ int i; -+ -+ for (i = 0; i < N; i++) -+ i_sum += __builtin_abs (pix1[i] - pix2[i]); -+ -+ return i_sum; -+} -+ -+/* { dg-final { scan-assembler-not {\tsshll\t} } } */ -+/* { dg-final { scan-assembler-not {\tsshll2\t} } } */ -+/* { dg-final { scan-assembler-not {\tssubl\t} } } */ -+/* { dg-final { scan-assembler-not {\tssubl2\t} } } */ -+/* { dg-final { scan-assembler-not {\tabs\t} } } */ -+ -+/* { dg-final { scan-assembler {\tsabd\t} } } */ -+/* { dg-final { scan-assembler {\tudot\t} } } */ -+ -diff --git a/gcc/testsuite/gcc.target/aarch64/ssadv16qi.c b/gcc/testsuite/gcc.target/aarch64/ssadv16qi.c -index 40b288436..85a867a11 100644 ---- a/gcc/testsuite/gcc.target/aarch64/ssadv16qi.c -+++ b/gcc/testsuite/gcc.target/aarch64/ssadv16qi.c -@@ -1,7 +1,7 @@ - /* { dg-do compile } */ - /* { dg-options "-O3" } */ - --#pragma GCC target "+nosve" -+#pragma GCC target "+nosve+nodotprod" - - #define N 1024 - -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/aarch64-sve-acle-asm.exp b/gcc/testsuite/gcc.target/aarch64/sve/acle/aarch64-sve-acle-asm.exp -new file mode 100644 -index 000000000..7ce85a414 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/aarch64-sve-acle-asm.exp -@@ -0,0 +1,79 @@ -+# Assembly-based regression-test driver for the SVE ACLE -+# Copyright (C) 2009-2019 Free Software Foundation, Inc. -+# -+# This file is part of GCC. -+# -+# GCC is free software; you can redistribute it and/or modify it -+# under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 3, or (at your option) -+# any later version. -+# -+# GCC is distributed in the hope that it will be useful, but -+# WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+# General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with GCC; see the file COPYING3. If not see -+# . */ -+ -+# GCC testsuite that uses the `dg.exp' driver. -+ -+# Exit immediately if this isn't an AArch64 target. -+if {![istarget aarch64*-*-*] } { -+ return -+} -+ -+# Load support procs. -+load_lib gcc-dg.exp -+ -+# Initialize `dg'. -+dg-init -+ -+# Force SVE if we're not testing it already. -+if { [check_effective_target_aarch64_sve] } { -+ set sve_flags "" -+} else { -+ set sve_flags "-march=armv8.2-a+sve" -+} -+ -+global gcc_runtest_parallelize_limit_minor -+if { [info exists gcc_runtest_parallelize_limit_minor] } { -+ set old_limit_minor $gcc_runtest_parallelize_limit_minor -+ set gcc_runtest_parallelize_limit_minor 1 -+} -+ -+torture-init -+set-torture-options { -+ "-std=c90 -O0 -g" -+ "-std=c90 -O1 -g" -+ "-std=c99 -O2 -g" -+ "-std=c11 -O3 -g" -+ "-std=gnu90 -O2 -fno-schedule-insns -DCHECK_ASM --save-temps" -+ "-std=gnu99 -Ofast -g" -+ "-std=gnu11 -Os -g" -+} { -+ "-DTEST_FULL" -+ "-DTEST_OVERLOADS" -+} -+ -+# Main loop. -+set files [glob -nocomplain $srcdir/$subdir/asm/*.c] -+set save-dg-do-what-default ${dg-do-what-default} -+if { [check_effective_target_aarch64_asm_sve_ok] -+ && [check_effective_target_aarch64_variant_pcs] } { -+ set dg-do-what-default assemble -+} else { -+ set dg-do-what-default compile -+} -+gcc-dg-runtest [lsort $files] "" "$sve_flags -fno-ipa-icf" -+set dg-do-what-default ${save-dg-do-what-default} -+ -+torture-finish -+ -+if { [info exists gcc_runtest_parallelize_limit_minor] } { -+ set gcc_runtest_parallelize_limit_minor $old_limit_minor -+} -+ -+# All done. -+dg-finish -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/aarch64-sve-acle.exp b/gcc/testsuite/gcc.target/aarch64/sve/acle/aarch64-sve-acle.exp -new file mode 100644 -index 000000000..34d9dfd43 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/aarch64-sve-acle.exp -@@ -0,0 +1,54 @@ -+# Specific regression driver for AArch64 SVE. -+# Copyright (C) 2009-2019 Free Software Foundation, Inc. -+# Contributed by ARM Ltd. -+# -+# This file is part of GCC. -+# -+# GCC is free software; you can redistribute it and/or modify it -+# under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 3, or (at your option) -+# any later version. -+# -+# GCC is distributed in the hope that it will be useful, but -+# WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+# General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with GCC; see the file COPYING3. If not see -+# . */ -+ -+# GCC testsuite that uses the `dg.exp' driver. -+ -+# Exit immediately if this isn't an AArch64 target. -+if {![istarget aarch64*-*-*] } { -+ return -+} -+ -+# Load support procs. -+load_lib gcc-dg.exp -+ -+# If a testcase doesn't have special options, use these. -+global DEFAULT_CFLAGS -+if ![info exists DEFAULT_CFLAGS] then { -+ set DEFAULT_CFLAGS " -ansi -pedantic-errors" -+} -+ -+# Initialize `dg'. -+dg-init -+ -+# Force SVE if we're not testing it already. -+if { [check_effective_target_aarch64_sve] } { -+ set sve_flags "" -+} else { -+ set sve_flags "-march=armv8.2-a+sve" -+} -+ -+# Main loop. -+# FIXME: This should include general/*.c too, but leave that until the -+# C frontend allows initialization of SVE vectors. -+set files [glob -nocomplain $srcdir/$subdir/general-c/*.c] -+dg-runtest [lsort $files] "$sve_flags" $DEFAULT_CFLAGS -+ -+# All done. -+dg-finish -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_f16.c -new file mode 100644 -index 000000000..c019f248d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_f16.c -@@ -0,0 +1,552 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** abd_f16_m_tied1: -+** fabd z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (abd_f16_m_tied1, svfloat16_t, -+ z0 = svabd_f16_m (p0, z0, z1), -+ z0 = svabd_m (p0, z0, z1)) -+ -+/* -+** abd_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fabd z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (abd_f16_m_tied2, svfloat16_t, -+ z0 = svabd_f16_m (p0, z1, z0), -+ z0 = svabd_m (p0, z1, z0)) -+ -+/* -+** abd_f16_m_untied: -+** movprfx z0, z1 -+** fabd z0\.h, p0/m, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (abd_f16_m_untied, svfloat16_t, -+ z0 = svabd_f16_m (p0, z1, z2), -+ z0 = svabd_m (p0, z1, z2)) -+ -+/* -+** abd_h4_f16_m_tied1: -+** mov (z[0-9]+\.h), h4 -+** fabd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (abd_h4_f16_m_tied1, svfloat16_t, __fp16, -+ z0 = svabd_n_f16_m (p0, z0, d4), -+ z0 = svabd_m (p0, z0, d4)) -+ -+/* -+** abd_h4_f16_m_untied: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0, z1 -+** fabd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (abd_h4_f16_m_untied, svfloat16_t, __fp16, -+ z0 = svabd_n_f16_m (p0, z1, d4), -+ z0 = svabd_m (p0, z1, d4)) -+ -+/* -+** abd_1_f16_m_tied1: -+** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? -+** fabd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_f16_m_tied1, svfloat16_t, -+ z0 = svabd_n_f16_m (p0, z0, 1), -+ z0 = svabd_m (p0, z0, 1)) -+ -+/* -+** abd_1_f16_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? -+** movprfx z0, z1 -+** fabd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_f16_m_untied, svfloat16_t, -+ z0 = svabd_n_f16_m (p0, z1, 1), -+ z0 = svabd_m (p0, z1, 1)) -+ -+/* -+** abd_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fabd z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (abd_f16_z_tied1, svfloat16_t, -+ z0 = svabd_f16_z (p0, z0, z1), -+ z0 = svabd_z (p0, z0, z1)) -+ -+/* -+** abd_f16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** fabd z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (abd_f16_z_tied2, svfloat16_t, -+ z0 = svabd_f16_z (p0, z1, z0), -+ z0 = svabd_z (p0, z1, z0)) -+ -+/* -+** abd_f16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fabd z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** fabd z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (abd_f16_z_untied, svfloat16_t, -+ z0 = svabd_f16_z (p0, z1, z2), -+ z0 = svabd_z (p0, z1, z2)) -+ -+/* -+** abd_h4_f16_z_tied1: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0\.h, p0/z, z0\.h -+** fabd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (abd_h4_f16_z_tied1, svfloat16_t, __fp16, -+ z0 = svabd_n_f16_z (p0, z0, d4), -+ z0 = svabd_z (p0, z0, d4)) -+ -+/* -+** abd_h4_f16_z_untied: -+** mov (z[0-9]+\.h), h4 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fabd z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** fabd z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (abd_h4_f16_z_untied, svfloat16_t, __fp16, -+ z0 = svabd_n_f16_z (p0, z1, d4), -+ z0 = svabd_z (p0, z1, d4)) -+ -+/* -+** abd_1_f16_z_tied1: -+** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? -+** movprfx z0\.h, p0/z, z0\.h -+** fabd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_f16_z_tied1, svfloat16_t, -+ z0 = svabd_n_f16_z (p0, z0, 1), -+ z0 = svabd_z (p0, z0, 1)) -+ -+/* -+** abd_1_f16_z_untied: -+** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fabd z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** fabd z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_f16_z_untied, svfloat16_t, -+ z0 = svabd_n_f16_z (p0, z1, 1), -+ z0 = svabd_z (p0, z1, 1)) -+ -+/* -+** abd_0p5_f16_z_tied1: -+** fmov (z[0-9]+\.h), #(?:0\.5|5\.0e-1) -+** movprfx z0\.h, p0/z, z0\.h -+** fabd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_0p5_f16_z_tied1, svfloat16_t, -+ z0 = svabd_n_f16_z (p0, z0, 0.5), -+ z0 = svabd_z (p0, z0, 0.5)) -+ -+/* -+** abd_0p5_f16_z_untied: -+** fmov (z[0-9]+\.h), #(?:0\.5|5\.0e-1) -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fabd z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** fabd z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (abd_0p5_f16_z_untied, svfloat16_t, -+ z0 = svabd_n_f16_z (p0, z1, 0.5), -+ z0 = svabd_z (p0, z1, 0.5)) -+ -+/* -+** abd_m1_f16_z_tied1: -+** fmov (z[0-9]+\.h), #-1\.0(?:e\+0)? -+** movprfx z0\.h, p0/z, z0\.h -+** fabd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_m1_f16_z_tied1, svfloat16_t, -+ z0 = svabd_n_f16_z (p0, z0, -1), -+ z0 = svabd_z (p0, z0, -1)) -+ -+/* -+** abd_m1_f16_z_untied: -+** fmov (z[0-9]+\.h), #-1\.0(?:e\+0)? -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fabd z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** fabd z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (abd_m1_f16_z_untied, svfloat16_t, -+ z0 = svabd_n_f16_z (p0, z1, -1), -+ z0 = svabd_z (p0, z1, -1)) -+ -+/* -+** abd_m0p5_f16_z_tied1: -+** fmov (z[0-9]+\.h), #-(?:0\.5|5\.0e-1) -+** movprfx z0\.h, p0/z, z0\.h -+** fabd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_m0p5_f16_z_tied1, svfloat16_t, -+ z0 = svabd_n_f16_z (p0, z0, -0.5), -+ z0 = svabd_z (p0, z0, -0.5)) -+ -+/* -+** abd_m0p5_f16_z_untied: -+** fmov (z[0-9]+\.h), #-(?:0\.5|5\.0e-1) -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fabd z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** fabd z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (abd_m0p5_f16_z_untied, svfloat16_t, -+ z0 = svabd_n_f16_z (p0, z1, -0.5), -+ z0 = svabd_z (p0, z1, -0.5)) -+ -+/* -+** abd_m2_f16_z: -+** fmov (z[0-9]+\.h), #-2\.0(?:e\+0)? -+** movprfx z0\.h, p0/z, z0\.h -+** fabd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_m2_f16_z, svfloat16_t, -+ z0 = svabd_n_f16_z (p0, z0, -2), -+ z0 = svabd_z (p0, z0, -2)) -+ -+/* -+** abd_f16_x_tied1: -+** fabd z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (abd_f16_x_tied1, svfloat16_t, -+ z0 = svabd_f16_x (p0, z0, z1), -+ z0 = svabd_x (p0, z0, z1)) -+ -+/* -+** abd_f16_x_tied2: -+** fabd z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (abd_f16_x_tied2, svfloat16_t, -+ z0 = svabd_f16_x (p0, z1, z0), -+ z0 = svabd_x (p0, z1, z0)) -+ -+/* -+** abd_f16_x_untied: -+** ( -+** movprfx z0, z1 -+** fabd z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0, z2 -+** fabd z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (abd_f16_x_untied, svfloat16_t, -+ z0 = svabd_f16_x (p0, z1, z2), -+ z0 = svabd_x (p0, z1, z2)) -+ -+/* -+** abd_h4_f16_x_tied1: -+** mov (z[0-9]+\.h), h4 -+** fabd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (abd_h4_f16_x_tied1, svfloat16_t, __fp16, -+ z0 = svabd_n_f16_x (p0, z0, d4), -+ z0 = svabd_x (p0, z0, d4)) -+ -+/* -+** abd_h4_f16_x_untied: -+** mov z0\.h, h4 -+** fabd z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZD (abd_h4_f16_x_untied, svfloat16_t, __fp16, -+ z0 = svabd_n_f16_x (p0, z1, d4), -+ z0 = svabd_x (p0, z1, d4)) -+ -+/* -+** abd_1_f16_x_tied1: -+** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? -+** fabd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_f16_x_tied1, svfloat16_t, -+ z0 = svabd_n_f16_x (p0, z0, 1), -+ z0 = svabd_x (p0, z0, 1)) -+ -+/* -+** abd_1_f16_x_untied: -+** fmov z0\.h, #1\.0(?:e\+0)? -+** fabd z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_f16_x_untied, svfloat16_t, -+ z0 = svabd_n_f16_x (p0, z1, 1), -+ z0 = svabd_x (p0, z1, 1)) -+ -+/* -+** abd_0p5_f16_x_tied1: -+** fmov (z[0-9]+\.h), #(?:0\.5|5\.0e-1) -+** fabd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_0p5_f16_x_tied1, svfloat16_t, -+ z0 = svabd_n_f16_x (p0, z0, 0.5), -+ z0 = svabd_x (p0, z0, 0.5)) -+ -+/* -+** abd_0p5_f16_x_untied: -+** fmov z0\.h, #(?:0\.5|5\.0e-1) -+** fabd z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (abd_0p5_f16_x_untied, svfloat16_t, -+ z0 = svabd_n_f16_x (p0, z1, 0.5), -+ z0 = svabd_x (p0, z1, 0.5)) -+ -+/* -+** abd_m1_f16_x_tied1: -+** fmov (z[0-9]+\.h), #-1\.0(?:e\+0)? -+** fabd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_m1_f16_x_tied1, svfloat16_t, -+ z0 = svabd_n_f16_x (p0, z0, -1), -+ z0 = svabd_x (p0, z0, -1)) -+ -+/* -+** abd_m1_f16_x_untied: -+** fmov z0\.h, #-1\.0(?:e\+0)? -+** fabd z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (abd_m1_f16_x_untied, svfloat16_t, -+ z0 = svabd_n_f16_x (p0, z1, -1), -+ z0 = svabd_x (p0, z1, -1)) -+ -+/* -+** abd_m0p5_f16_x_tied1: -+** fmov (z[0-9]+\.h), #-(?:0\.5|5\.0e-1) -+** fabd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_m0p5_f16_x_tied1, svfloat16_t, -+ z0 = svabd_n_f16_x (p0, z0, -0.5), -+ z0 = svabd_x (p0, z0, -0.5)) -+ -+/* -+** abd_m0p5_f16_x_untied: -+** fmov z0\.h, #-(?:0\.5|5\.0e-1) -+** fabd z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (abd_m0p5_f16_x_untied, svfloat16_t, -+ z0 = svabd_n_f16_x (p0, z1, -0.5), -+ z0 = svabd_x (p0, z1, -0.5)) -+ -+/* -+** abd_2_f16_x_tied1: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** fabd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_2_f16_x_tied1, svfloat16_t, -+ z0 = svabd_n_f16_x (p0, z0, 2), -+ z0 = svabd_x (p0, z0, 2)) -+ -+/* -+** abd_2_f16_x_untied: -+** fmov z0\.h, #2\.0(?:e\+0)? -+** fabd z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (abd_2_f16_x_untied, svfloat16_t, -+ z0 = svabd_n_f16_x (p0, z1, 2), -+ z0 = svabd_x (p0, z1, 2)) -+ -+/* -+** ptrue_abd_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_abd_f16_x_tied1, svfloat16_t, -+ z0 = svabd_f16_x (svptrue_b16 (), z0, z1), -+ z0 = svabd_x (svptrue_b16 (), z0, z1)) -+ -+/* -+** ptrue_abd_f16_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_abd_f16_x_tied2, svfloat16_t, -+ z0 = svabd_f16_x (svptrue_b16 (), z1, z0), -+ z0 = svabd_x (svptrue_b16 (), z1, z0)) -+ -+/* -+** ptrue_abd_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_abd_f16_x_untied, svfloat16_t, -+ z0 = svabd_f16_x (svptrue_b16 (), z1, z2), -+ z0 = svabd_x (svptrue_b16 (), z1, z2)) -+ -+/* -+** ptrue_abd_1_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_abd_1_f16_x_tied1, svfloat16_t, -+ z0 = svabd_n_f16_x (svptrue_b16 (), z0, 1), -+ z0 = svabd_x (svptrue_b16 (), z0, 1)) -+ -+/* -+** ptrue_abd_1_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_abd_1_f16_x_untied, svfloat16_t, -+ z0 = svabd_n_f16_x (svptrue_b16 (), z1, 1), -+ z0 = svabd_x (svptrue_b16 (), z1, 1)) -+ -+/* -+** ptrue_abd_0p5_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_abd_0p5_f16_x_tied1, svfloat16_t, -+ z0 = svabd_n_f16_x (svptrue_b16 (), z0, 0.5), -+ z0 = svabd_x (svptrue_b16 (), z0, 0.5)) -+ -+/* -+** ptrue_abd_0p5_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_abd_0p5_f16_x_untied, svfloat16_t, -+ z0 = svabd_n_f16_x (svptrue_b16 (), z1, 0.5), -+ z0 = svabd_x (svptrue_b16 (), z1, 0.5)) -+ -+/* -+** ptrue_abd_m1_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_abd_m1_f16_x_tied1, svfloat16_t, -+ z0 = svabd_n_f16_x (svptrue_b16 (), z0, -1), -+ z0 = svabd_x (svptrue_b16 (), z0, -1)) -+ -+/* -+** ptrue_abd_m1_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_abd_m1_f16_x_untied, svfloat16_t, -+ z0 = svabd_n_f16_x (svptrue_b16 (), z1, -1), -+ z0 = svabd_x (svptrue_b16 (), z1, -1)) -+ -+/* -+** ptrue_abd_m0p5_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_abd_m0p5_f16_x_tied1, svfloat16_t, -+ z0 = svabd_n_f16_x (svptrue_b16 (), z0, -0.5), -+ z0 = svabd_x (svptrue_b16 (), z0, -0.5)) -+ -+/* -+** ptrue_abd_m0p5_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_abd_m0p5_f16_x_untied, svfloat16_t, -+ z0 = svabd_n_f16_x (svptrue_b16 (), z1, -0.5), -+ z0 = svabd_x (svptrue_b16 (), z1, -0.5)) -+ -+/* -+** ptrue_abd_2_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_abd_2_f16_x_tied1, svfloat16_t, -+ z0 = svabd_n_f16_x (svptrue_b16 (), z0, 2), -+ z0 = svabd_x (svptrue_b16 (), z0, 2)) -+ -+/* -+** ptrue_abd_2_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_abd_2_f16_x_untied, svfloat16_t, -+ z0 = svabd_n_f16_x (svptrue_b16 (), z1, 2), -+ z0 = svabd_x (svptrue_b16 (), z1, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_f32.c -new file mode 100644 -index 000000000..bff37580c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_f32.c -@@ -0,0 +1,552 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** abd_f32_m_tied1: -+** fabd z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (abd_f32_m_tied1, svfloat32_t, -+ z0 = svabd_f32_m (p0, z0, z1), -+ z0 = svabd_m (p0, z0, z1)) -+ -+/* -+** abd_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fabd z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (abd_f32_m_tied2, svfloat32_t, -+ z0 = svabd_f32_m (p0, z1, z0), -+ z0 = svabd_m (p0, z1, z0)) -+ -+/* -+** abd_f32_m_untied: -+** movprfx z0, z1 -+** fabd z0\.s, p0/m, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (abd_f32_m_untied, svfloat32_t, -+ z0 = svabd_f32_m (p0, z1, z2), -+ z0 = svabd_m (p0, z1, z2)) -+ -+/* -+** abd_s4_f32_m_tied1: -+** mov (z[0-9]+\.s), s4 -+** fabd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (abd_s4_f32_m_tied1, svfloat32_t, float, -+ z0 = svabd_n_f32_m (p0, z0, d4), -+ z0 = svabd_m (p0, z0, d4)) -+ -+/* -+** abd_s4_f32_m_untied: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0, z1 -+** fabd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (abd_s4_f32_m_untied, svfloat32_t, float, -+ z0 = svabd_n_f32_m (p0, z1, d4), -+ z0 = svabd_m (p0, z1, d4)) -+ -+/* -+** abd_1_f32_m_tied1: -+** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? -+** fabd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_f32_m_tied1, svfloat32_t, -+ z0 = svabd_n_f32_m (p0, z0, 1), -+ z0 = svabd_m (p0, z0, 1)) -+ -+/* -+** abd_1_f32_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? -+** movprfx z0, z1 -+** fabd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_f32_m_untied, svfloat32_t, -+ z0 = svabd_n_f32_m (p0, z1, 1), -+ z0 = svabd_m (p0, z1, 1)) -+ -+/* -+** abd_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fabd z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (abd_f32_z_tied1, svfloat32_t, -+ z0 = svabd_f32_z (p0, z0, z1), -+ z0 = svabd_z (p0, z0, z1)) -+ -+/* -+** abd_f32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** fabd z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (abd_f32_z_tied2, svfloat32_t, -+ z0 = svabd_f32_z (p0, z1, z0), -+ z0 = svabd_z (p0, z1, z0)) -+ -+/* -+** abd_f32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fabd z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** fabd z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (abd_f32_z_untied, svfloat32_t, -+ z0 = svabd_f32_z (p0, z1, z2), -+ z0 = svabd_z (p0, z1, z2)) -+ -+/* -+** abd_s4_f32_z_tied1: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0\.s, p0/z, z0\.s -+** fabd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (abd_s4_f32_z_tied1, svfloat32_t, float, -+ z0 = svabd_n_f32_z (p0, z0, d4), -+ z0 = svabd_z (p0, z0, d4)) -+ -+/* -+** abd_s4_f32_z_untied: -+** mov (z[0-9]+\.s), s4 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fabd z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** fabd z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (abd_s4_f32_z_untied, svfloat32_t, float, -+ z0 = svabd_n_f32_z (p0, z1, d4), -+ z0 = svabd_z (p0, z1, d4)) -+ -+/* -+** abd_1_f32_z_tied1: -+** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? -+** movprfx z0\.s, p0/z, z0\.s -+** fabd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_f32_z_tied1, svfloat32_t, -+ z0 = svabd_n_f32_z (p0, z0, 1), -+ z0 = svabd_z (p0, z0, 1)) -+ -+/* -+** abd_1_f32_z_untied: -+** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fabd z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** fabd z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_f32_z_untied, svfloat32_t, -+ z0 = svabd_n_f32_z (p0, z1, 1), -+ z0 = svabd_z (p0, z1, 1)) -+ -+/* -+** abd_0p5_f32_z_tied1: -+** fmov (z[0-9]+\.s), #(?:0\.5|5\.0e-1) -+** movprfx z0\.s, p0/z, z0\.s -+** fabd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_0p5_f32_z_tied1, svfloat32_t, -+ z0 = svabd_n_f32_z (p0, z0, 0.5), -+ z0 = svabd_z (p0, z0, 0.5)) -+ -+/* -+** abd_0p5_f32_z_untied: -+** fmov (z[0-9]+\.s), #(?:0\.5|5\.0e-1) -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fabd z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** fabd z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (abd_0p5_f32_z_untied, svfloat32_t, -+ z0 = svabd_n_f32_z (p0, z1, 0.5), -+ z0 = svabd_z (p0, z1, 0.5)) -+ -+/* -+** abd_m1_f32_z_tied1: -+** fmov (z[0-9]+\.s), #-1\.0(?:e\+0)? -+** movprfx z0\.s, p0/z, z0\.s -+** fabd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_m1_f32_z_tied1, svfloat32_t, -+ z0 = svabd_n_f32_z (p0, z0, -1), -+ z0 = svabd_z (p0, z0, -1)) -+ -+/* -+** abd_m1_f32_z_untied: -+** fmov (z[0-9]+\.s), #-1\.0(?:e\+0)? -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fabd z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** fabd z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (abd_m1_f32_z_untied, svfloat32_t, -+ z0 = svabd_n_f32_z (p0, z1, -1), -+ z0 = svabd_z (p0, z1, -1)) -+ -+/* -+** abd_m0p5_f32_z_tied1: -+** fmov (z[0-9]+\.s), #-(?:0\.5|5\.0e-1) -+** movprfx z0\.s, p0/z, z0\.s -+** fabd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_m0p5_f32_z_tied1, svfloat32_t, -+ z0 = svabd_n_f32_z (p0, z0, -0.5), -+ z0 = svabd_z (p0, z0, -0.5)) -+ -+/* -+** abd_m0p5_f32_z_untied: -+** fmov (z[0-9]+\.s), #-(?:0\.5|5\.0e-1) -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fabd z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** fabd z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (abd_m0p5_f32_z_untied, svfloat32_t, -+ z0 = svabd_n_f32_z (p0, z1, -0.5), -+ z0 = svabd_z (p0, z1, -0.5)) -+ -+/* -+** abd_m2_f32_z: -+** fmov (z[0-9]+\.s), #-2\.0(?:e\+0)? -+** movprfx z0\.s, p0/z, z0\.s -+** fabd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_m2_f32_z, svfloat32_t, -+ z0 = svabd_n_f32_z (p0, z0, -2), -+ z0 = svabd_z (p0, z0, -2)) -+ -+/* -+** abd_f32_x_tied1: -+** fabd z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (abd_f32_x_tied1, svfloat32_t, -+ z0 = svabd_f32_x (p0, z0, z1), -+ z0 = svabd_x (p0, z0, z1)) -+ -+/* -+** abd_f32_x_tied2: -+** fabd z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (abd_f32_x_tied2, svfloat32_t, -+ z0 = svabd_f32_x (p0, z1, z0), -+ z0 = svabd_x (p0, z1, z0)) -+ -+/* -+** abd_f32_x_untied: -+** ( -+** movprfx z0, z1 -+** fabd z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0, z2 -+** fabd z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (abd_f32_x_untied, svfloat32_t, -+ z0 = svabd_f32_x (p0, z1, z2), -+ z0 = svabd_x (p0, z1, z2)) -+ -+/* -+** abd_s4_f32_x_tied1: -+** mov (z[0-9]+\.s), s4 -+** fabd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (abd_s4_f32_x_tied1, svfloat32_t, float, -+ z0 = svabd_n_f32_x (p0, z0, d4), -+ z0 = svabd_x (p0, z0, d4)) -+ -+/* -+** abd_s4_f32_x_untied: -+** mov z0\.s, s4 -+** fabd z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZD (abd_s4_f32_x_untied, svfloat32_t, float, -+ z0 = svabd_n_f32_x (p0, z1, d4), -+ z0 = svabd_x (p0, z1, d4)) -+ -+/* -+** abd_1_f32_x_tied1: -+** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? -+** fabd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_f32_x_tied1, svfloat32_t, -+ z0 = svabd_n_f32_x (p0, z0, 1), -+ z0 = svabd_x (p0, z0, 1)) -+ -+/* -+** abd_1_f32_x_untied: -+** fmov z0\.s, #1\.0(?:e\+0)? -+** fabd z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_f32_x_untied, svfloat32_t, -+ z0 = svabd_n_f32_x (p0, z1, 1), -+ z0 = svabd_x (p0, z1, 1)) -+ -+/* -+** abd_0p5_f32_x_tied1: -+** fmov (z[0-9]+\.s), #(?:0\.5|5\.0e-1) -+** fabd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_0p5_f32_x_tied1, svfloat32_t, -+ z0 = svabd_n_f32_x (p0, z0, 0.5), -+ z0 = svabd_x (p0, z0, 0.5)) -+ -+/* -+** abd_0p5_f32_x_untied: -+** fmov z0\.s, #(?:0\.5|5\.0e-1) -+** fabd z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (abd_0p5_f32_x_untied, svfloat32_t, -+ z0 = svabd_n_f32_x (p0, z1, 0.5), -+ z0 = svabd_x (p0, z1, 0.5)) -+ -+/* -+** abd_m1_f32_x_tied1: -+** fmov (z[0-9]+\.s), #-1\.0(?:e\+0)? -+** fabd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_m1_f32_x_tied1, svfloat32_t, -+ z0 = svabd_n_f32_x (p0, z0, -1), -+ z0 = svabd_x (p0, z0, -1)) -+ -+/* -+** abd_m1_f32_x_untied: -+** fmov z0\.s, #-1\.0(?:e\+0)? -+** fabd z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (abd_m1_f32_x_untied, svfloat32_t, -+ z0 = svabd_n_f32_x (p0, z1, -1), -+ z0 = svabd_x (p0, z1, -1)) -+ -+/* -+** abd_m0p5_f32_x_tied1: -+** fmov (z[0-9]+\.s), #-(?:0\.5|5\.0e-1) -+** fabd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_m0p5_f32_x_tied1, svfloat32_t, -+ z0 = svabd_n_f32_x (p0, z0, -0.5), -+ z0 = svabd_x (p0, z0, -0.5)) -+ -+/* -+** abd_m0p5_f32_x_untied: -+** fmov z0\.s, #-(?:0\.5|5\.0e-1) -+** fabd z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (abd_m0p5_f32_x_untied, svfloat32_t, -+ z0 = svabd_n_f32_x (p0, z1, -0.5), -+ z0 = svabd_x (p0, z1, -0.5)) -+ -+/* -+** abd_2_f32_x_tied1: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** fabd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_2_f32_x_tied1, svfloat32_t, -+ z0 = svabd_n_f32_x (p0, z0, 2), -+ z0 = svabd_x (p0, z0, 2)) -+ -+/* -+** abd_2_f32_x_untied: -+** fmov z0\.s, #2\.0(?:e\+0)? -+** fabd z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (abd_2_f32_x_untied, svfloat32_t, -+ z0 = svabd_n_f32_x (p0, z1, 2), -+ z0 = svabd_x (p0, z1, 2)) -+ -+/* -+** ptrue_abd_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_abd_f32_x_tied1, svfloat32_t, -+ z0 = svabd_f32_x (svptrue_b32 (), z0, z1), -+ z0 = svabd_x (svptrue_b32 (), z0, z1)) -+ -+/* -+** ptrue_abd_f32_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_abd_f32_x_tied2, svfloat32_t, -+ z0 = svabd_f32_x (svptrue_b32 (), z1, z0), -+ z0 = svabd_x (svptrue_b32 (), z1, z0)) -+ -+/* -+** ptrue_abd_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_abd_f32_x_untied, svfloat32_t, -+ z0 = svabd_f32_x (svptrue_b32 (), z1, z2), -+ z0 = svabd_x (svptrue_b32 (), z1, z2)) -+ -+/* -+** ptrue_abd_1_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_abd_1_f32_x_tied1, svfloat32_t, -+ z0 = svabd_n_f32_x (svptrue_b32 (), z0, 1), -+ z0 = svabd_x (svptrue_b32 (), z0, 1)) -+ -+/* -+** ptrue_abd_1_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_abd_1_f32_x_untied, svfloat32_t, -+ z0 = svabd_n_f32_x (svptrue_b32 (), z1, 1), -+ z0 = svabd_x (svptrue_b32 (), z1, 1)) -+ -+/* -+** ptrue_abd_0p5_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_abd_0p5_f32_x_tied1, svfloat32_t, -+ z0 = svabd_n_f32_x (svptrue_b32 (), z0, 0.5), -+ z0 = svabd_x (svptrue_b32 (), z0, 0.5)) -+ -+/* -+** ptrue_abd_0p5_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_abd_0p5_f32_x_untied, svfloat32_t, -+ z0 = svabd_n_f32_x (svptrue_b32 (), z1, 0.5), -+ z0 = svabd_x (svptrue_b32 (), z1, 0.5)) -+ -+/* -+** ptrue_abd_m1_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_abd_m1_f32_x_tied1, svfloat32_t, -+ z0 = svabd_n_f32_x (svptrue_b32 (), z0, -1), -+ z0 = svabd_x (svptrue_b32 (), z0, -1)) -+ -+/* -+** ptrue_abd_m1_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_abd_m1_f32_x_untied, svfloat32_t, -+ z0 = svabd_n_f32_x (svptrue_b32 (), z1, -1), -+ z0 = svabd_x (svptrue_b32 (), z1, -1)) -+ -+/* -+** ptrue_abd_m0p5_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_abd_m0p5_f32_x_tied1, svfloat32_t, -+ z0 = svabd_n_f32_x (svptrue_b32 (), z0, -0.5), -+ z0 = svabd_x (svptrue_b32 (), z0, -0.5)) -+ -+/* -+** ptrue_abd_m0p5_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_abd_m0p5_f32_x_untied, svfloat32_t, -+ z0 = svabd_n_f32_x (svptrue_b32 (), z1, -0.5), -+ z0 = svabd_x (svptrue_b32 (), z1, -0.5)) -+ -+/* -+** ptrue_abd_2_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_abd_2_f32_x_tied1, svfloat32_t, -+ z0 = svabd_n_f32_x (svptrue_b32 (), z0, 2), -+ z0 = svabd_x (svptrue_b32 (), z0, 2)) -+ -+/* -+** ptrue_abd_2_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_abd_2_f32_x_untied, svfloat32_t, -+ z0 = svabd_n_f32_x (svptrue_b32 (), z1, 2), -+ z0 = svabd_x (svptrue_b32 (), z1, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_f64.c -new file mode 100644 -index 000000000..c1e5f14e6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_f64.c -@@ -0,0 +1,552 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** abd_f64_m_tied1: -+** fabd z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (abd_f64_m_tied1, svfloat64_t, -+ z0 = svabd_f64_m (p0, z0, z1), -+ z0 = svabd_m (p0, z0, z1)) -+ -+/* -+** abd_f64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fabd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_f64_m_tied2, svfloat64_t, -+ z0 = svabd_f64_m (p0, z1, z0), -+ z0 = svabd_m (p0, z1, z0)) -+ -+/* -+** abd_f64_m_untied: -+** movprfx z0, z1 -+** fabd z0\.d, p0/m, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (abd_f64_m_untied, svfloat64_t, -+ z0 = svabd_f64_m (p0, z1, z2), -+ z0 = svabd_m (p0, z1, z2)) -+ -+/* -+** abd_d4_f64_m_tied1: -+** mov (z[0-9]+\.d), d4 -+** fabd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (abd_d4_f64_m_tied1, svfloat64_t, double, -+ z0 = svabd_n_f64_m (p0, z0, d4), -+ z0 = svabd_m (p0, z0, d4)) -+ -+/* -+** abd_d4_f64_m_untied: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0, z1 -+** fabd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (abd_d4_f64_m_untied, svfloat64_t, double, -+ z0 = svabd_n_f64_m (p0, z1, d4), -+ z0 = svabd_m (p0, z1, d4)) -+ -+/* -+** abd_1_f64_m_tied1: -+** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? -+** fabd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_f64_m_tied1, svfloat64_t, -+ z0 = svabd_n_f64_m (p0, z0, 1), -+ z0 = svabd_m (p0, z0, 1)) -+ -+/* -+** abd_1_f64_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? -+** movprfx z0, z1 -+** fabd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_f64_m_untied, svfloat64_t, -+ z0 = svabd_n_f64_m (p0, z1, 1), -+ z0 = svabd_m (p0, z1, 1)) -+ -+/* -+** abd_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fabd z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (abd_f64_z_tied1, svfloat64_t, -+ z0 = svabd_f64_z (p0, z0, z1), -+ z0 = svabd_z (p0, z0, z1)) -+ -+/* -+** abd_f64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** fabd z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (abd_f64_z_tied2, svfloat64_t, -+ z0 = svabd_f64_z (p0, z1, z0), -+ z0 = svabd_z (p0, z1, z0)) -+ -+/* -+** abd_f64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fabd z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** fabd z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (abd_f64_z_untied, svfloat64_t, -+ z0 = svabd_f64_z (p0, z1, z2), -+ z0 = svabd_z (p0, z1, z2)) -+ -+/* -+** abd_d4_f64_z_tied1: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0\.d, p0/z, z0\.d -+** fabd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (abd_d4_f64_z_tied1, svfloat64_t, double, -+ z0 = svabd_n_f64_z (p0, z0, d4), -+ z0 = svabd_z (p0, z0, d4)) -+ -+/* -+** abd_d4_f64_z_untied: -+** mov (z[0-9]+\.d), d4 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fabd z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** fabd z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (abd_d4_f64_z_untied, svfloat64_t, double, -+ z0 = svabd_n_f64_z (p0, z1, d4), -+ z0 = svabd_z (p0, z1, d4)) -+ -+/* -+** abd_1_f64_z_tied1: -+** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? -+** movprfx z0\.d, p0/z, z0\.d -+** fabd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_f64_z_tied1, svfloat64_t, -+ z0 = svabd_n_f64_z (p0, z0, 1), -+ z0 = svabd_z (p0, z0, 1)) -+ -+/* -+** abd_1_f64_z_untied: -+** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fabd z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** fabd z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_f64_z_untied, svfloat64_t, -+ z0 = svabd_n_f64_z (p0, z1, 1), -+ z0 = svabd_z (p0, z1, 1)) -+ -+/* -+** abd_0p5_f64_z_tied1: -+** fmov (z[0-9]+\.d), #(?:0\.5|5\.0e-1) -+** movprfx z0\.d, p0/z, z0\.d -+** fabd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_0p5_f64_z_tied1, svfloat64_t, -+ z0 = svabd_n_f64_z (p0, z0, 0.5), -+ z0 = svabd_z (p0, z0, 0.5)) -+ -+/* -+** abd_0p5_f64_z_untied: -+** fmov (z[0-9]+\.d), #(?:0\.5|5\.0e-1) -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fabd z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** fabd z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (abd_0p5_f64_z_untied, svfloat64_t, -+ z0 = svabd_n_f64_z (p0, z1, 0.5), -+ z0 = svabd_z (p0, z1, 0.5)) -+ -+/* -+** abd_m1_f64_z_tied1: -+** fmov (z[0-9]+\.d), #-1\.0(?:e\+0)? -+** movprfx z0\.d, p0/z, z0\.d -+** fabd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_m1_f64_z_tied1, svfloat64_t, -+ z0 = svabd_n_f64_z (p0, z0, -1), -+ z0 = svabd_z (p0, z0, -1)) -+ -+/* -+** abd_m1_f64_z_untied: -+** fmov (z[0-9]+\.d), #-1\.0(?:e\+0)? -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fabd z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** fabd z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (abd_m1_f64_z_untied, svfloat64_t, -+ z0 = svabd_n_f64_z (p0, z1, -1), -+ z0 = svabd_z (p0, z1, -1)) -+ -+/* -+** abd_m0p5_f64_z_tied1: -+** fmov (z[0-9]+\.d), #-(?:0\.5|5\.0e-1) -+** movprfx z0\.d, p0/z, z0\.d -+** fabd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_m0p5_f64_z_tied1, svfloat64_t, -+ z0 = svabd_n_f64_z (p0, z0, -0.5), -+ z0 = svabd_z (p0, z0, -0.5)) -+ -+/* -+** abd_m0p5_f64_z_untied: -+** fmov (z[0-9]+\.d), #-(?:0\.5|5\.0e-1) -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fabd z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** fabd z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (abd_m0p5_f64_z_untied, svfloat64_t, -+ z0 = svabd_n_f64_z (p0, z1, -0.5), -+ z0 = svabd_z (p0, z1, -0.5)) -+ -+/* -+** abd_m2_f64_z: -+** fmov (z[0-9]+\.d), #-2\.0(?:e\+0)? -+** movprfx z0\.d, p0/z, z0\.d -+** fabd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_m2_f64_z, svfloat64_t, -+ z0 = svabd_n_f64_z (p0, z0, -2), -+ z0 = svabd_z (p0, z0, -2)) -+ -+/* -+** abd_f64_x_tied1: -+** fabd z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (abd_f64_x_tied1, svfloat64_t, -+ z0 = svabd_f64_x (p0, z0, z1), -+ z0 = svabd_x (p0, z0, z1)) -+ -+/* -+** abd_f64_x_tied2: -+** fabd z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (abd_f64_x_tied2, svfloat64_t, -+ z0 = svabd_f64_x (p0, z1, z0), -+ z0 = svabd_x (p0, z1, z0)) -+ -+/* -+** abd_f64_x_untied: -+** ( -+** movprfx z0, z1 -+** fabd z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0, z2 -+** fabd z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (abd_f64_x_untied, svfloat64_t, -+ z0 = svabd_f64_x (p0, z1, z2), -+ z0 = svabd_x (p0, z1, z2)) -+ -+/* -+** abd_d4_f64_x_tied1: -+** mov (z[0-9]+\.d), d4 -+** fabd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (abd_d4_f64_x_tied1, svfloat64_t, double, -+ z0 = svabd_n_f64_x (p0, z0, d4), -+ z0 = svabd_x (p0, z0, d4)) -+ -+/* -+** abd_d4_f64_x_untied: -+** mov z0\.d, d4 -+** fabd z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZD (abd_d4_f64_x_untied, svfloat64_t, double, -+ z0 = svabd_n_f64_x (p0, z1, d4), -+ z0 = svabd_x (p0, z1, d4)) -+ -+/* -+** abd_1_f64_x_tied1: -+** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? -+** fabd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_f64_x_tied1, svfloat64_t, -+ z0 = svabd_n_f64_x (p0, z0, 1), -+ z0 = svabd_x (p0, z0, 1)) -+ -+/* -+** abd_1_f64_x_untied: -+** fmov z0\.d, #1\.0(?:e\+0)? -+** fabd z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_f64_x_untied, svfloat64_t, -+ z0 = svabd_n_f64_x (p0, z1, 1), -+ z0 = svabd_x (p0, z1, 1)) -+ -+/* -+** abd_0p5_f64_x_tied1: -+** fmov (z[0-9]+\.d), #(?:0\.5|5\.0e-1) -+** fabd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_0p5_f64_x_tied1, svfloat64_t, -+ z0 = svabd_n_f64_x (p0, z0, 0.5), -+ z0 = svabd_x (p0, z0, 0.5)) -+ -+/* -+** abd_0p5_f64_x_untied: -+** fmov z0\.d, #(?:0\.5|5\.0e-1) -+** fabd z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (abd_0p5_f64_x_untied, svfloat64_t, -+ z0 = svabd_n_f64_x (p0, z1, 0.5), -+ z0 = svabd_x (p0, z1, 0.5)) -+ -+/* -+** abd_m1_f64_x_tied1: -+** fmov (z[0-9]+\.d), #-1\.0(?:e\+0)? -+** fabd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_m1_f64_x_tied1, svfloat64_t, -+ z0 = svabd_n_f64_x (p0, z0, -1), -+ z0 = svabd_x (p0, z0, -1)) -+ -+/* -+** abd_m1_f64_x_untied: -+** fmov z0\.d, #-1\.0(?:e\+0)? -+** fabd z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (abd_m1_f64_x_untied, svfloat64_t, -+ z0 = svabd_n_f64_x (p0, z1, -1), -+ z0 = svabd_x (p0, z1, -1)) -+ -+/* -+** abd_m0p5_f64_x_tied1: -+** fmov (z[0-9]+\.d), #-(?:0\.5|5\.0e-1) -+** fabd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_m0p5_f64_x_tied1, svfloat64_t, -+ z0 = svabd_n_f64_x (p0, z0, -0.5), -+ z0 = svabd_x (p0, z0, -0.5)) -+ -+/* -+** abd_m0p5_f64_x_untied: -+** fmov z0\.d, #-(?:0\.5|5\.0e-1) -+** fabd z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (abd_m0p5_f64_x_untied, svfloat64_t, -+ z0 = svabd_n_f64_x (p0, z1, -0.5), -+ z0 = svabd_x (p0, z1, -0.5)) -+ -+/* -+** abd_2_f64_x_tied1: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** fabd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_2_f64_x_tied1, svfloat64_t, -+ z0 = svabd_n_f64_x (p0, z0, 2), -+ z0 = svabd_x (p0, z0, 2)) -+ -+/* -+** abd_2_f64_x_untied: -+** fmov z0\.d, #2\.0(?:e\+0)? -+** fabd z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (abd_2_f64_x_untied, svfloat64_t, -+ z0 = svabd_n_f64_x (p0, z1, 2), -+ z0 = svabd_x (p0, z1, 2)) -+ -+/* -+** ptrue_abd_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_abd_f64_x_tied1, svfloat64_t, -+ z0 = svabd_f64_x (svptrue_b64 (), z0, z1), -+ z0 = svabd_x (svptrue_b64 (), z0, z1)) -+ -+/* -+** ptrue_abd_f64_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_abd_f64_x_tied2, svfloat64_t, -+ z0 = svabd_f64_x (svptrue_b64 (), z1, z0), -+ z0 = svabd_x (svptrue_b64 (), z1, z0)) -+ -+/* -+** ptrue_abd_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_abd_f64_x_untied, svfloat64_t, -+ z0 = svabd_f64_x (svptrue_b64 (), z1, z2), -+ z0 = svabd_x (svptrue_b64 (), z1, z2)) -+ -+/* -+** ptrue_abd_1_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_abd_1_f64_x_tied1, svfloat64_t, -+ z0 = svabd_n_f64_x (svptrue_b64 (), z0, 1), -+ z0 = svabd_x (svptrue_b64 (), z0, 1)) -+ -+/* -+** ptrue_abd_1_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_abd_1_f64_x_untied, svfloat64_t, -+ z0 = svabd_n_f64_x (svptrue_b64 (), z1, 1), -+ z0 = svabd_x (svptrue_b64 (), z1, 1)) -+ -+/* -+** ptrue_abd_0p5_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_abd_0p5_f64_x_tied1, svfloat64_t, -+ z0 = svabd_n_f64_x (svptrue_b64 (), z0, 0.5), -+ z0 = svabd_x (svptrue_b64 (), z0, 0.5)) -+ -+/* -+** ptrue_abd_0p5_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_abd_0p5_f64_x_untied, svfloat64_t, -+ z0 = svabd_n_f64_x (svptrue_b64 (), z1, 0.5), -+ z0 = svabd_x (svptrue_b64 (), z1, 0.5)) -+ -+/* -+** ptrue_abd_m1_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_abd_m1_f64_x_tied1, svfloat64_t, -+ z0 = svabd_n_f64_x (svptrue_b64 (), z0, -1), -+ z0 = svabd_x (svptrue_b64 (), z0, -1)) -+ -+/* -+** ptrue_abd_m1_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_abd_m1_f64_x_untied, svfloat64_t, -+ z0 = svabd_n_f64_x (svptrue_b64 (), z1, -1), -+ z0 = svabd_x (svptrue_b64 (), z1, -1)) -+ -+/* -+** ptrue_abd_m0p5_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_abd_m0p5_f64_x_tied1, svfloat64_t, -+ z0 = svabd_n_f64_x (svptrue_b64 (), z0, -0.5), -+ z0 = svabd_x (svptrue_b64 (), z0, -0.5)) -+ -+/* -+** ptrue_abd_m0p5_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_abd_m0p5_f64_x_untied, svfloat64_t, -+ z0 = svabd_n_f64_x (svptrue_b64 (), z1, -0.5), -+ z0 = svabd_x (svptrue_b64 (), z1, -0.5)) -+ -+/* -+** ptrue_abd_2_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_abd_2_f64_x_tied1, svfloat64_t, -+ z0 = svabd_n_f64_x (svptrue_b64 (), z0, 2), -+ z0 = svabd_x (svptrue_b64 (), z0, 2)) -+ -+/* -+** ptrue_abd_2_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_abd_2_f64_x_untied, svfloat64_t, -+ z0 = svabd_n_f64_x (svptrue_b64 (), z1, 2), -+ z0 = svabd_x (svptrue_b64 (), z1, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_s16.c -new file mode 100644 -index 000000000..e2d0c0fb7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_s16.c -@@ -0,0 +1,237 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** abd_s16_m_tied1: -+** sabd z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (abd_s16_m_tied1, svint16_t, -+ z0 = svabd_s16_m (p0, z0, z1), -+ z0 = svabd_m (p0, z0, z1)) -+ -+/* -+** abd_s16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** sabd z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (abd_s16_m_tied2, svint16_t, -+ z0 = svabd_s16_m (p0, z1, z0), -+ z0 = svabd_m (p0, z1, z0)) -+ -+/* -+** abd_s16_m_untied: -+** movprfx z0, z1 -+** sabd z0\.h, p0/m, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (abd_s16_m_untied, svint16_t, -+ z0 = svabd_s16_m (p0, z1, z2), -+ z0 = svabd_m (p0, z1, z2)) -+ -+/* -+** abd_w0_s16_m_tied1: -+** mov (z[0-9]+\.h), w0 -+** sabd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (abd_w0_s16_m_tied1, svint16_t, int16_t, -+ z0 = svabd_n_s16_m (p0, z0, x0), -+ z0 = svabd_m (p0, z0, x0)) -+ -+/* -+** abd_w0_s16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), w0 -+** movprfx z0, z1 -+** sabd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (abd_w0_s16_m_untied, svint16_t, int16_t, -+ z0 = svabd_n_s16_m (p0, z1, x0), -+ z0 = svabd_m (p0, z1, x0)) -+ -+/* -+** abd_1_s16_m_tied1: -+** mov (z[0-9]+\.h), #1 -+** sabd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_s16_m_tied1, svint16_t, -+ z0 = svabd_n_s16_m (p0, z0, 1), -+ z0 = svabd_m (p0, z0, 1)) -+ -+/* -+** abd_1_s16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), #1 -+** movprfx z0, z1 -+** sabd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_s16_m_untied, svint16_t, -+ z0 = svabd_n_s16_m (p0, z1, 1), -+ z0 = svabd_m (p0, z1, 1)) -+ -+/* -+** abd_s16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** sabd z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (abd_s16_z_tied1, svint16_t, -+ z0 = svabd_s16_z (p0, z0, z1), -+ z0 = svabd_z (p0, z0, z1)) -+ -+/* -+** abd_s16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** sabd z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (abd_s16_z_tied2, svint16_t, -+ z0 = svabd_s16_z (p0, z1, z0), -+ z0 = svabd_z (p0, z1, z0)) -+ -+/* -+** abd_s16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** sabd z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** sabd z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (abd_s16_z_untied, svint16_t, -+ z0 = svabd_s16_z (p0, z1, z2), -+ z0 = svabd_z (p0, z1, z2)) -+ -+/* -+** abd_w0_s16_z_tied1: -+** mov (z[0-9]+\.h), w0 -+** movprfx z0\.h, p0/z, z0\.h -+** sabd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (abd_w0_s16_z_tied1, svint16_t, int16_t, -+ z0 = svabd_n_s16_z (p0, z0, x0), -+ z0 = svabd_z (p0, z0, x0)) -+ -+/* -+** abd_w0_s16_z_untied: -+** mov (z[0-9]+\.h), w0 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** sabd z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** sabd z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (abd_w0_s16_z_untied, svint16_t, int16_t, -+ z0 = svabd_n_s16_z (p0, z1, x0), -+ z0 = svabd_z (p0, z1, x0)) -+ -+/* -+** abd_1_s16_z_tied1: -+** mov (z[0-9]+\.h), #1 -+** movprfx z0\.h, p0/z, z0\.h -+** sabd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_s16_z_tied1, svint16_t, -+ z0 = svabd_n_s16_z (p0, z0, 1), -+ z0 = svabd_z (p0, z0, 1)) -+ -+/* -+** abd_1_s16_z_untied: -+** mov (z[0-9]+\.h), #1 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** sabd z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** sabd z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_s16_z_untied, svint16_t, -+ z0 = svabd_n_s16_z (p0, z1, 1), -+ z0 = svabd_z (p0, z1, 1)) -+ -+/* -+** abd_s16_x_tied1: -+** sabd z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (abd_s16_x_tied1, svint16_t, -+ z0 = svabd_s16_x (p0, z0, z1), -+ z0 = svabd_x (p0, z0, z1)) -+ -+/* -+** abd_s16_x_tied2: -+** sabd z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (abd_s16_x_tied2, svint16_t, -+ z0 = svabd_s16_x (p0, z1, z0), -+ z0 = svabd_x (p0, z1, z0)) -+ -+/* -+** abd_s16_x_untied: -+** ( -+** movprfx z0, z1 -+** sabd z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0, z2 -+** sabd z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (abd_s16_x_untied, svint16_t, -+ z0 = svabd_s16_x (p0, z1, z2), -+ z0 = svabd_x (p0, z1, z2)) -+ -+/* -+** abd_w0_s16_x_tied1: -+** mov (z[0-9]+\.h), w0 -+** sabd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (abd_w0_s16_x_tied1, svint16_t, int16_t, -+ z0 = svabd_n_s16_x (p0, z0, x0), -+ z0 = svabd_x (p0, z0, x0)) -+ -+/* -+** abd_w0_s16_x_untied: -+** mov z0\.h, w0 -+** sabd z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZX (abd_w0_s16_x_untied, svint16_t, int16_t, -+ z0 = svabd_n_s16_x (p0, z1, x0), -+ z0 = svabd_x (p0, z1, x0)) -+ -+/* -+** abd_1_s16_x_tied1: -+** mov (z[0-9]+\.h), #1 -+** sabd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_s16_x_tied1, svint16_t, -+ z0 = svabd_n_s16_x (p0, z0, 1), -+ z0 = svabd_x (p0, z0, 1)) -+ -+/* -+** abd_1_s16_x_untied: -+** mov z0\.h, #1 -+** sabd z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_s16_x_untied, svint16_t, -+ z0 = svabd_n_s16_x (p0, z1, 1), -+ z0 = svabd_x (p0, z1, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_s32.c -new file mode 100644 -index 000000000..5c95ec04d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_s32.c -@@ -0,0 +1,237 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** abd_s32_m_tied1: -+** sabd z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (abd_s32_m_tied1, svint32_t, -+ z0 = svabd_s32_m (p0, z0, z1), -+ z0 = svabd_m (p0, z0, z1)) -+ -+/* -+** abd_s32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** sabd z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (abd_s32_m_tied2, svint32_t, -+ z0 = svabd_s32_m (p0, z1, z0), -+ z0 = svabd_m (p0, z1, z0)) -+ -+/* -+** abd_s32_m_untied: -+** movprfx z0, z1 -+** sabd z0\.s, p0/m, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (abd_s32_m_untied, svint32_t, -+ z0 = svabd_s32_m (p0, z1, z2), -+ z0 = svabd_m (p0, z1, z2)) -+ -+/* -+** abd_w0_s32_m_tied1: -+** mov (z[0-9]+\.s), w0 -+** sabd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (abd_w0_s32_m_tied1, svint32_t, int32_t, -+ z0 = svabd_n_s32_m (p0, z0, x0), -+ z0 = svabd_m (p0, z0, x0)) -+ -+/* -+** abd_w0_s32_m_untied: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0, z1 -+** sabd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (abd_w0_s32_m_untied, svint32_t, int32_t, -+ z0 = svabd_n_s32_m (p0, z1, x0), -+ z0 = svabd_m (p0, z1, x0)) -+ -+/* -+** abd_1_s32_m_tied1: -+** mov (z[0-9]+\.s), #1 -+** sabd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_s32_m_tied1, svint32_t, -+ z0 = svabd_n_s32_m (p0, z0, 1), -+ z0 = svabd_m (p0, z0, 1)) -+ -+/* -+** abd_1_s32_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.s), #1 -+** movprfx z0, z1 -+** sabd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_s32_m_untied, svint32_t, -+ z0 = svabd_n_s32_m (p0, z1, 1), -+ z0 = svabd_m (p0, z1, 1)) -+ -+/* -+** abd_s32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** sabd z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (abd_s32_z_tied1, svint32_t, -+ z0 = svabd_s32_z (p0, z0, z1), -+ z0 = svabd_z (p0, z0, z1)) -+ -+/* -+** abd_s32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** sabd z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (abd_s32_z_tied2, svint32_t, -+ z0 = svabd_s32_z (p0, z1, z0), -+ z0 = svabd_z (p0, z1, z0)) -+ -+/* -+** abd_s32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** sabd z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** sabd z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (abd_s32_z_untied, svint32_t, -+ z0 = svabd_s32_z (p0, z1, z2), -+ z0 = svabd_z (p0, z1, z2)) -+ -+/* -+** abd_w0_s32_z_tied1: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** sabd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (abd_w0_s32_z_tied1, svint32_t, int32_t, -+ z0 = svabd_n_s32_z (p0, z0, x0), -+ z0 = svabd_z (p0, z0, x0)) -+ -+/* -+** abd_w0_s32_z_untied: -+** mov (z[0-9]+\.s), w0 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** sabd z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** sabd z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (abd_w0_s32_z_untied, svint32_t, int32_t, -+ z0 = svabd_n_s32_z (p0, z1, x0), -+ z0 = svabd_z (p0, z1, x0)) -+ -+/* -+** abd_1_s32_z_tied1: -+** mov (z[0-9]+\.s), #1 -+** movprfx z0\.s, p0/z, z0\.s -+** sabd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_s32_z_tied1, svint32_t, -+ z0 = svabd_n_s32_z (p0, z0, 1), -+ z0 = svabd_z (p0, z0, 1)) -+ -+/* -+** abd_1_s32_z_untied: -+** mov (z[0-9]+\.s), #1 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** sabd z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** sabd z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_s32_z_untied, svint32_t, -+ z0 = svabd_n_s32_z (p0, z1, 1), -+ z0 = svabd_z (p0, z1, 1)) -+ -+/* -+** abd_s32_x_tied1: -+** sabd z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (abd_s32_x_tied1, svint32_t, -+ z0 = svabd_s32_x (p0, z0, z1), -+ z0 = svabd_x (p0, z0, z1)) -+ -+/* -+** abd_s32_x_tied2: -+** sabd z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (abd_s32_x_tied2, svint32_t, -+ z0 = svabd_s32_x (p0, z1, z0), -+ z0 = svabd_x (p0, z1, z0)) -+ -+/* -+** abd_s32_x_untied: -+** ( -+** movprfx z0, z1 -+** sabd z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0, z2 -+** sabd z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (abd_s32_x_untied, svint32_t, -+ z0 = svabd_s32_x (p0, z1, z2), -+ z0 = svabd_x (p0, z1, z2)) -+ -+/* -+** abd_w0_s32_x_tied1: -+** mov (z[0-9]+\.s), w0 -+** sabd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (abd_w0_s32_x_tied1, svint32_t, int32_t, -+ z0 = svabd_n_s32_x (p0, z0, x0), -+ z0 = svabd_x (p0, z0, x0)) -+ -+/* -+** abd_w0_s32_x_untied: -+** mov z0\.s, w0 -+** sabd z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZX (abd_w0_s32_x_untied, svint32_t, int32_t, -+ z0 = svabd_n_s32_x (p0, z1, x0), -+ z0 = svabd_x (p0, z1, x0)) -+ -+/* -+** abd_1_s32_x_tied1: -+** mov (z[0-9]+\.s), #1 -+** sabd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_s32_x_tied1, svint32_t, -+ z0 = svabd_n_s32_x (p0, z0, 1), -+ z0 = svabd_x (p0, z0, 1)) -+ -+/* -+** abd_1_s32_x_untied: -+** mov z0\.s, #1 -+** sabd z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_s32_x_untied, svint32_t, -+ z0 = svabd_n_s32_x (p0, z1, 1), -+ z0 = svabd_x (p0, z1, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_s64.c -new file mode 100644 -index 000000000..2402ecf29 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_s64.c -@@ -0,0 +1,237 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** abd_s64_m_tied1: -+** sabd z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (abd_s64_m_tied1, svint64_t, -+ z0 = svabd_s64_m (p0, z0, z1), -+ z0 = svabd_m (p0, z0, z1)) -+ -+/* -+** abd_s64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** sabd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_s64_m_tied2, svint64_t, -+ z0 = svabd_s64_m (p0, z1, z0), -+ z0 = svabd_m (p0, z1, z0)) -+ -+/* -+** abd_s64_m_untied: -+** movprfx z0, z1 -+** sabd z0\.d, p0/m, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (abd_s64_m_untied, svint64_t, -+ z0 = svabd_s64_m (p0, z1, z2), -+ z0 = svabd_m (p0, z1, z2)) -+ -+/* -+** abd_x0_s64_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** sabd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (abd_x0_s64_m_tied1, svint64_t, int64_t, -+ z0 = svabd_n_s64_m (p0, z0, x0), -+ z0 = svabd_m (p0, z0, x0)) -+ -+/* -+** abd_x0_s64_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** sabd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (abd_x0_s64_m_untied, svint64_t, int64_t, -+ z0 = svabd_n_s64_m (p0, z1, x0), -+ z0 = svabd_m (p0, z1, x0)) -+ -+/* -+** abd_1_s64_m_tied1: -+** mov (z[0-9]+\.d), #1 -+** sabd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_s64_m_tied1, svint64_t, -+ z0 = svabd_n_s64_m (p0, z0, 1), -+ z0 = svabd_m (p0, z0, 1)) -+ -+/* -+** abd_1_s64_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #1 -+** movprfx z0, z1 -+** sabd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_s64_m_untied, svint64_t, -+ z0 = svabd_n_s64_m (p0, z1, 1), -+ z0 = svabd_m (p0, z1, 1)) -+ -+/* -+** abd_s64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** sabd z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (abd_s64_z_tied1, svint64_t, -+ z0 = svabd_s64_z (p0, z0, z1), -+ z0 = svabd_z (p0, z0, z1)) -+ -+/* -+** abd_s64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** sabd z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (abd_s64_z_tied2, svint64_t, -+ z0 = svabd_s64_z (p0, z1, z0), -+ z0 = svabd_z (p0, z1, z0)) -+ -+/* -+** abd_s64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** sabd z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** sabd z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (abd_s64_z_untied, svint64_t, -+ z0 = svabd_s64_z (p0, z1, z2), -+ z0 = svabd_z (p0, z1, z2)) -+ -+/* -+** abd_x0_s64_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** sabd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (abd_x0_s64_z_tied1, svint64_t, int64_t, -+ z0 = svabd_n_s64_z (p0, z0, x0), -+ z0 = svabd_z (p0, z0, x0)) -+ -+/* -+** abd_x0_s64_z_untied: -+** mov (z[0-9]+\.d), x0 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** sabd z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** sabd z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (abd_x0_s64_z_untied, svint64_t, int64_t, -+ z0 = svabd_n_s64_z (p0, z1, x0), -+ z0 = svabd_z (p0, z1, x0)) -+ -+/* -+** abd_1_s64_z_tied1: -+** mov (z[0-9]+\.d), #1 -+** movprfx z0\.d, p0/z, z0\.d -+** sabd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_s64_z_tied1, svint64_t, -+ z0 = svabd_n_s64_z (p0, z0, 1), -+ z0 = svabd_z (p0, z0, 1)) -+ -+/* -+** abd_1_s64_z_untied: -+** mov (z[0-9]+\.d), #1 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** sabd z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** sabd z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_s64_z_untied, svint64_t, -+ z0 = svabd_n_s64_z (p0, z1, 1), -+ z0 = svabd_z (p0, z1, 1)) -+ -+/* -+** abd_s64_x_tied1: -+** sabd z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (abd_s64_x_tied1, svint64_t, -+ z0 = svabd_s64_x (p0, z0, z1), -+ z0 = svabd_x (p0, z0, z1)) -+ -+/* -+** abd_s64_x_tied2: -+** sabd z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (abd_s64_x_tied2, svint64_t, -+ z0 = svabd_s64_x (p0, z1, z0), -+ z0 = svabd_x (p0, z1, z0)) -+ -+/* -+** abd_s64_x_untied: -+** ( -+** movprfx z0, z1 -+** sabd z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0, z2 -+** sabd z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (abd_s64_x_untied, svint64_t, -+ z0 = svabd_s64_x (p0, z1, z2), -+ z0 = svabd_x (p0, z1, z2)) -+ -+/* -+** abd_x0_s64_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** sabd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (abd_x0_s64_x_tied1, svint64_t, int64_t, -+ z0 = svabd_n_s64_x (p0, z0, x0), -+ z0 = svabd_x (p0, z0, x0)) -+ -+/* -+** abd_x0_s64_x_untied: -+** mov z0\.d, x0 -+** sabd z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (abd_x0_s64_x_untied, svint64_t, int64_t, -+ z0 = svabd_n_s64_x (p0, z1, x0), -+ z0 = svabd_x (p0, z1, x0)) -+ -+/* -+** abd_1_s64_x_tied1: -+** mov (z[0-9]+\.d), #1 -+** sabd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_s64_x_tied1, svint64_t, -+ z0 = svabd_n_s64_x (p0, z0, 1), -+ z0 = svabd_x (p0, z0, 1)) -+ -+/* -+** abd_1_s64_x_untied: -+** mov z0\.d, #1 -+** sabd z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_s64_x_untied, svint64_t, -+ z0 = svabd_n_s64_x (p0, z1, 1), -+ z0 = svabd_x (p0, z1, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_s8.c -new file mode 100644 -index 000000000..49a2cc388 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_s8.c -@@ -0,0 +1,237 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** abd_s8_m_tied1: -+** sabd z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (abd_s8_m_tied1, svint8_t, -+ z0 = svabd_s8_m (p0, z0, z1), -+ z0 = svabd_m (p0, z0, z1)) -+ -+/* -+** abd_s8_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** sabd z0\.b, p0/m, z0\.b, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (abd_s8_m_tied2, svint8_t, -+ z0 = svabd_s8_m (p0, z1, z0), -+ z0 = svabd_m (p0, z1, z0)) -+ -+/* -+** abd_s8_m_untied: -+** movprfx z0, z1 -+** sabd z0\.b, p0/m, z0\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (abd_s8_m_untied, svint8_t, -+ z0 = svabd_s8_m (p0, z1, z2), -+ z0 = svabd_m (p0, z1, z2)) -+ -+/* -+** abd_w0_s8_m_tied1: -+** mov (z[0-9]+\.b), w0 -+** sabd z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (abd_w0_s8_m_tied1, svint8_t, int8_t, -+ z0 = svabd_n_s8_m (p0, z0, x0), -+ z0 = svabd_m (p0, z0, x0)) -+ -+/* -+** abd_w0_s8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), w0 -+** movprfx z0, z1 -+** sabd z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (abd_w0_s8_m_untied, svint8_t, int8_t, -+ z0 = svabd_n_s8_m (p0, z1, x0), -+ z0 = svabd_m (p0, z1, x0)) -+ -+/* -+** abd_1_s8_m_tied1: -+** mov (z[0-9]+\.b), #1 -+** sabd z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_s8_m_tied1, svint8_t, -+ z0 = svabd_n_s8_m (p0, z0, 1), -+ z0 = svabd_m (p0, z0, 1)) -+ -+/* -+** abd_1_s8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), #1 -+** movprfx z0, z1 -+** sabd z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_s8_m_untied, svint8_t, -+ z0 = svabd_n_s8_m (p0, z1, 1), -+ z0 = svabd_m (p0, z1, 1)) -+ -+/* -+** abd_s8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** sabd z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (abd_s8_z_tied1, svint8_t, -+ z0 = svabd_s8_z (p0, z0, z1), -+ z0 = svabd_z (p0, z0, z1)) -+ -+/* -+** abd_s8_z_tied2: -+** movprfx z0\.b, p0/z, z0\.b -+** sabd z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (abd_s8_z_tied2, svint8_t, -+ z0 = svabd_s8_z (p0, z1, z0), -+ z0 = svabd_z (p0, z1, z0)) -+ -+/* -+** abd_s8_z_untied: -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** sabd z0\.b, p0/m, z0\.b, z2\.b -+** | -+** movprfx z0\.b, p0/z, z2\.b -+** sabd z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (abd_s8_z_untied, svint8_t, -+ z0 = svabd_s8_z (p0, z1, z2), -+ z0 = svabd_z (p0, z1, z2)) -+ -+/* -+** abd_w0_s8_z_tied1: -+** mov (z[0-9]+\.b), w0 -+** movprfx z0\.b, p0/z, z0\.b -+** sabd z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (abd_w0_s8_z_tied1, svint8_t, int8_t, -+ z0 = svabd_n_s8_z (p0, z0, x0), -+ z0 = svabd_z (p0, z0, x0)) -+ -+/* -+** abd_w0_s8_z_untied: -+** mov (z[0-9]+\.b), w0 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** sabd z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** sabd z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (abd_w0_s8_z_untied, svint8_t, int8_t, -+ z0 = svabd_n_s8_z (p0, z1, x0), -+ z0 = svabd_z (p0, z1, x0)) -+ -+/* -+** abd_1_s8_z_tied1: -+** mov (z[0-9]+\.b), #1 -+** movprfx z0\.b, p0/z, z0\.b -+** sabd z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_s8_z_tied1, svint8_t, -+ z0 = svabd_n_s8_z (p0, z0, 1), -+ z0 = svabd_z (p0, z0, 1)) -+ -+/* -+** abd_1_s8_z_untied: -+** mov (z[0-9]+\.b), #1 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** sabd z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** sabd z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_s8_z_untied, svint8_t, -+ z0 = svabd_n_s8_z (p0, z1, 1), -+ z0 = svabd_z (p0, z1, 1)) -+ -+/* -+** abd_s8_x_tied1: -+** sabd z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (abd_s8_x_tied1, svint8_t, -+ z0 = svabd_s8_x (p0, z0, z1), -+ z0 = svabd_x (p0, z0, z1)) -+ -+/* -+** abd_s8_x_tied2: -+** sabd z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (abd_s8_x_tied2, svint8_t, -+ z0 = svabd_s8_x (p0, z1, z0), -+ z0 = svabd_x (p0, z1, z0)) -+ -+/* -+** abd_s8_x_untied: -+** ( -+** movprfx z0, z1 -+** sabd z0\.b, p0/m, z0\.b, z2\.b -+** | -+** movprfx z0, z2 -+** sabd z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (abd_s8_x_untied, svint8_t, -+ z0 = svabd_s8_x (p0, z1, z2), -+ z0 = svabd_x (p0, z1, z2)) -+ -+/* -+** abd_w0_s8_x_tied1: -+** mov (z[0-9]+\.b), w0 -+** sabd z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (abd_w0_s8_x_tied1, svint8_t, int8_t, -+ z0 = svabd_n_s8_x (p0, z0, x0), -+ z0 = svabd_x (p0, z0, x0)) -+ -+/* -+** abd_w0_s8_x_untied: -+** mov z0\.b, w0 -+** sabd z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_ZX (abd_w0_s8_x_untied, svint8_t, int8_t, -+ z0 = svabd_n_s8_x (p0, z1, x0), -+ z0 = svabd_x (p0, z1, x0)) -+ -+/* -+** abd_1_s8_x_tied1: -+** mov (z[0-9]+\.b), #1 -+** sabd z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_s8_x_tied1, svint8_t, -+ z0 = svabd_n_s8_x (p0, z0, 1), -+ z0 = svabd_x (p0, z0, 1)) -+ -+/* -+** abd_1_s8_x_untied: -+** mov z0\.b, #1 -+** sabd z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_s8_x_untied, svint8_t, -+ z0 = svabd_n_s8_x (p0, z1, 1), -+ z0 = svabd_x (p0, z1, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_u16.c -new file mode 100644 -index 000000000..60aa9429e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_u16.c -@@ -0,0 +1,237 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** abd_u16_m_tied1: -+** uabd z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (abd_u16_m_tied1, svuint16_t, -+ z0 = svabd_u16_m (p0, z0, z1), -+ z0 = svabd_m (p0, z0, z1)) -+ -+/* -+** abd_u16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** uabd z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (abd_u16_m_tied2, svuint16_t, -+ z0 = svabd_u16_m (p0, z1, z0), -+ z0 = svabd_m (p0, z1, z0)) -+ -+/* -+** abd_u16_m_untied: -+** movprfx z0, z1 -+** uabd z0\.h, p0/m, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (abd_u16_m_untied, svuint16_t, -+ z0 = svabd_u16_m (p0, z1, z2), -+ z0 = svabd_m (p0, z1, z2)) -+ -+/* -+** abd_w0_u16_m_tied1: -+** mov (z[0-9]+\.h), w0 -+** uabd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (abd_w0_u16_m_tied1, svuint16_t, uint16_t, -+ z0 = svabd_n_u16_m (p0, z0, x0), -+ z0 = svabd_m (p0, z0, x0)) -+ -+/* -+** abd_w0_u16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), w0 -+** movprfx z0, z1 -+** uabd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (abd_w0_u16_m_untied, svuint16_t, uint16_t, -+ z0 = svabd_n_u16_m (p0, z1, x0), -+ z0 = svabd_m (p0, z1, x0)) -+ -+/* -+** abd_1_u16_m_tied1: -+** mov (z[0-9]+\.h), #1 -+** uabd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_u16_m_tied1, svuint16_t, -+ z0 = svabd_n_u16_m (p0, z0, 1), -+ z0 = svabd_m (p0, z0, 1)) -+ -+/* -+** abd_1_u16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), #1 -+** movprfx z0, z1 -+** uabd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_u16_m_untied, svuint16_t, -+ z0 = svabd_n_u16_m (p0, z1, 1), -+ z0 = svabd_m (p0, z1, 1)) -+ -+/* -+** abd_u16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** uabd z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (abd_u16_z_tied1, svuint16_t, -+ z0 = svabd_u16_z (p0, z0, z1), -+ z0 = svabd_z (p0, z0, z1)) -+ -+/* -+** abd_u16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** uabd z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (abd_u16_z_tied2, svuint16_t, -+ z0 = svabd_u16_z (p0, z1, z0), -+ z0 = svabd_z (p0, z1, z0)) -+ -+/* -+** abd_u16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** uabd z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** uabd z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (abd_u16_z_untied, svuint16_t, -+ z0 = svabd_u16_z (p0, z1, z2), -+ z0 = svabd_z (p0, z1, z2)) -+ -+/* -+** abd_w0_u16_z_tied1: -+** mov (z[0-9]+\.h), w0 -+** movprfx z0\.h, p0/z, z0\.h -+** uabd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (abd_w0_u16_z_tied1, svuint16_t, uint16_t, -+ z0 = svabd_n_u16_z (p0, z0, x0), -+ z0 = svabd_z (p0, z0, x0)) -+ -+/* -+** abd_w0_u16_z_untied: -+** mov (z[0-9]+\.h), w0 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** uabd z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** uabd z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (abd_w0_u16_z_untied, svuint16_t, uint16_t, -+ z0 = svabd_n_u16_z (p0, z1, x0), -+ z0 = svabd_z (p0, z1, x0)) -+ -+/* -+** abd_1_u16_z_tied1: -+** mov (z[0-9]+\.h), #1 -+** movprfx z0\.h, p0/z, z0\.h -+** uabd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_u16_z_tied1, svuint16_t, -+ z0 = svabd_n_u16_z (p0, z0, 1), -+ z0 = svabd_z (p0, z0, 1)) -+ -+/* -+** abd_1_u16_z_untied: -+** mov (z[0-9]+\.h), #1 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** uabd z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** uabd z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_u16_z_untied, svuint16_t, -+ z0 = svabd_n_u16_z (p0, z1, 1), -+ z0 = svabd_z (p0, z1, 1)) -+ -+/* -+** abd_u16_x_tied1: -+** uabd z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (abd_u16_x_tied1, svuint16_t, -+ z0 = svabd_u16_x (p0, z0, z1), -+ z0 = svabd_x (p0, z0, z1)) -+ -+/* -+** abd_u16_x_tied2: -+** uabd z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (abd_u16_x_tied2, svuint16_t, -+ z0 = svabd_u16_x (p0, z1, z0), -+ z0 = svabd_x (p0, z1, z0)) -+ -+/* -+** abd_u16_x_untied: -+** ( -+** movprfx z0, z1 -+** uabd z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0, z2 -+** uabd z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (abd_u16_x_untied, svuint16_t, -+ z0 = svabd_u16_x (p0, z1, z2), -+ z0 = svabd_x (p0, z1, z2)) -+ -+/* -+** abd_w0_u16_x_tied1: -+** mov (z[0-9]+\.h), w0 -+** uabd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (abd_w0_u16_x_tied1, svuint16_t, uint16_t, -+ z0 = svabd_n_u16_x (p0, z0, x0), -+ z0 = svabd_x (p0, z0, x0)) -+ -+/* -+** abd_w0_u16_x_untied: -+** mov z0\.h, w0 -+** uabd z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZX (abd_w0_u16_x_untied, svuint16_t, uint16_t, -+ z0 = svabd_n_u16_x (p0, z1, x0), -+ z0 = svabd_x (p0, z1, x0)) -+ -+/* -+** abd_1_u16_x_tied1: -+** mov (z[0-9]+\.h), #1 -+** uabd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_u16_x_tied1, svuint16_t, -+ z0 = svabd_n_u16_x (p0, z0, 1), -+ z0 = svabd_x (p0, z0, 1)) -+ -+/* -+** abd_1_u16_x_untied: -+** mov z0\.h, #1 -+** uabd z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_u16_x_untied, svuint16_t, -+ z0 = svabd_n_u16_x (p0, z1, 1), -+ z0 = svabd_x (p0, z1, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_u32.c -new file mode 100644 -index 000000000..bc2410783 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_u32.c -@@ -0,0 +1,237 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** abd_u32_m_tied1: -+** uabd z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (abd_u32_m_tied1, svuint32_t, -+ z0 = svabd_u32_m (p0, z0, z1), -+ z0 = svabd_m (p0, z0, z1)) -+ -+/* -+** abd_u32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** uabd z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (abd_u32_m_tied2, svuint32_t, -+ z0 = svabd_u32_m (p0, z1, z0), -+ z0 = svabd_m (p0, z1, z0)) -+ -+/* -+** abd_u32_m_untied: -+** movprfx z0, z1 -+** uabd z0\.s, p0/m, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (abd_u32_m_untied, svuint32_t, -+ z0 = svabd_u32_m (p0, z1, z2), -+ z0 = svabd_m (p0, z1, z2)) -+ -+/* -+** abd_w0_u32_m_tied1: -+** mov (z[0-9]+\.s), w0 -+** uabd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (abd_w0_u32_m_tied1, svuint32_t, uint32_t, -+ z0 = svabd_n_u32_m (p0, z0, x0), -+ z0 = svabd_m (p0, z0, x0)) -+ -+/* -+** abd_w0_u32_m_untied: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0, z1 -+** uabd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (abd_w0_u32_m_untied, svuint32_t, uint32_t, -+ z0 = svabd_n_u32_m (p0, z1, x0), -+ z0 = svabd_m (p0, z1, x0)) -+ -+/* -+** abd_1_u32_m_tied1: -+** mov (z[0-9]+\.s), #1 -+** uabd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_u32_m_tied1, svuint32_t, -+ z0 = svabd_n_u32_m (p0, z0, 1), -+ z0 = svabd_m (p0, z0, 1)) -+ -+/* -+** abd_1_u32_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.s), #1 -+** movprfx z0, z1 -+** uabd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_u32_m_untied, svuint32_t, -+ z0 = svabd_n_u32_m (p0, z1, 1), -+ z0 = svabd_m (p0, z1, 1)) -+ -+/* -+** abd_u32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** uabd z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (abd_u32_z_tied1, svuint32_t, -+ z0 = svabd_u32_z (p0, z0, z1), -+ z0 = svabd_z (p0, z0, z1)) -+ -+/* -+** abd_u32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** uabd z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (abd_u32_z_tied2, svuint32_t, -+ z0 = svabd_u32_z (p0, z1, z0), -+ z0 = svabd_z (p0, z1, z0)) -+ -+/* -+** abd_u32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** uabd z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** uabd z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (abd_u32_z_untied, svuint32_t, -+ z0 = svabd_u32_z (p0, z1, z2), -+ z0 = svabd_z (p0, z1, z2)) -+ -+/* -+** abd_w0_u32_z_tied1: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** uabd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (abd_w0_u32_z_tied1, svuint32_t, uint32_t, -+ z0 = svabd_n_u32_z (p0, z0, x0), -+ z0 = svabd_z (p0, z0, x0)) -+ -+/* -+** abd_w0_u32_z_untied: -+** mov (z[0-9]+\.s), w0 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** uabd z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** uabd z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (abd_w0_u32_z_untied, svuint32_t, uint32_t, -+ z0 = svabd_n_u32_z (p0, z1, x0), -+ z0 = svabd_z (p0, z1, x0)) -+ -+/* -+** abd_1_u32_z_tied1: -+** mov (z[0-9]+\.s), #1 -+** movprfx z0\.s, p0/z, z0\.s -+** uabd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_u32_z_tied1, svuint32_t, -+ z0 = svabd_n_u32_z (p0, z0, 1), -+ z0 = svabd_z (p0, z0, 1)) -+ -+/* -+** abd_1_u32_z_untied: -+** mov (z[0-9]+\.s), #1 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** uabd z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** uabd z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_u32_z_untied, svuint32_t, -+ z0 = svabd_n_u32_z (p0, z1, 1), -+ z0 = svabd_z (p0, z1, 1)) -+ -+/* -+** abd_u32_x_tied1: -+** uabd z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (abd_u32_x_tied1, svuint32_t, -+ z0 = svabd_u32_x (p0, z0, z1), -+ z0 = svabd_x (p0, z0, z1)) -+ -+/* -+** abd_u32_x_tied2: -+** uabd z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (abd_u32_x_tied2, svuint32_t, -+ z0 = svabd_u32_x (p0, z1, z0), -+ z0 = svabd_x (p0, z1, z0)) -+ -+/* -+** abd_u32_x_untied: -+** ( -+** movprfx z0, z1 -+** uabd z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0, z2 -+** uabd z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (abd_u32_x_untied, svuint32_t, -+ z0 = svabd_u32_x (p0, z1, z2), -+ z0 = svabd_x (p0, z1, z2)) -+ -+/* -+** abd_w0_u32_x_tied1: -+** mov (z[0-9]+\.s), w0 -+** uabd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (abd_w0_u32_x_tied1, svuint32_t, uint32_t, -+ z0 = svabd_n_u32_x (p0, z0, x0), -+ z0 = svabd_x (p0, z0, x0)) -+ -+/* -+** abd_w0_u32_x_untied: -+** mov z0\.s, w0 -+** uabd z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZX (abd_w0_u32_x_untied, svuint32_t, uint32_t, -+ z0 = svabd_n_u32_x (p0, z1, x0), -+ z0 = svabd_x (p0, z1, x0)) -+ -+/* -+** abd_1_u32_x_tied1: -+** mov (z[0-9]+\.s), #1 -+** uabd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_u32_x_tied1, svuint32_t, -+ z0 = svabd_n_u32_x (p0, z0, 1), -+ z0 = svabd_x (p0, z0, 1)) -+ -+/* -+** abd_1_u32_x_untied: -+** mov z0\.s, #1 -+** uabd z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_u32_x_untied, svuint32_t, -+ z0 = svabd_n_u32_x (p0, z1, 1), -+ z0 = svabd_x (p0, z1, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_u64.c -new file mode 100644 -index 000000000..d2cdaa06a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_u64.c -@@ -0,0 +1,237 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** abd_u64_m_tied1: -+** uabd z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (abd_u64_m_tied1, svuint64_t, -+ z0 = svabd_u64_m (p0, z0, z1), -+ z0 = svabd_m (p0, z0, z1)) -+ -+/* -+** abd_u64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** uabd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_u64_m_tied2, svuint64_t, -+ z0 = svabd_u64_m (p0, z1, z0), -+ z0 = svabd_m (p0, z1, z0)) -+ -+/* -+** abd_u64_m_untied: -+** movprfx z0, z1 -+** uabd z0\.d, p0/m, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (abd_u64_m_untied, svuint64_t, -+ z0 = svabd_u64_m (p0, z1, z2), -+ z0 = svabd_m (p0, z1, z2)) -+ -+/* -+** abd_x0_u64_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** uabd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (abd_x0_u64_m_tied1, svuint64_t, uint64_t, -+ z0 = svabd_n_u64_m (p0, z0, x0), -+ z0 = svabd_m (p0, z0, x0)) -+ -+/* -+** abd_x0_u64_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** uabd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (abd_x0_u64_m_untied, svuint64_t, uint64_t, -+ z0 = svabd_n_u64_m (p0, z1, x0), -+ z0 = svabd_m (p0, z1, x0)) -+ -+/* -+** abd_1_u64_m_tied1: -+** mov (z[0-9]+\.d), #1 -+** uabd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_u64_m_tied1, svuint64_t, -+ z0 = svabd_n_u64_m (p0, z0, 1), -+ z0 = svabd_m (p0, z0, 1)) -+ -+/* -+** abd_1_u64_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #1 -+** movprfx z0, z1 -+** uabd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_u64_m_untied, svuint64_t, -+ z0 = svabd_n_u64_m (p0, z1, 1), -+ z0 = svabd_m (p0, z1, 1)) -+ -+/* -+** abd_u64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** uabd z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (abd_u64_z_tied1, svuint64_t, -+ z0 = svabd_u64_z (p0, z0, z1), -+ z0 = svabd_z (p0, z0, z1)) -+ -+/* -+** abd_u64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** uabd z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (abd_u64_z_tied2, svuint64_t, -+ z0 = svabd_u64_z (p0, z1, z0), -+ z0 = svabd_z (p0, z1, z0)) -+ -+/* -+** abd_u64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** uabd z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** uabd z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (abd_u64_z_untied, svuint64_t, -+ z0 = svabd_u64_z (p0, z1, z2), -+ z0 = svabd_z (p0, z1, z2)) -+ -+/* -+** abd_x0_u64_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** uabd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (abd_x0_u64_z_tied1, svuint64_t, uint64_t, -+ z0 = svabd_n_u64_z (p0, z0, x0), -+ z0 = svabd_z (p0, z0, x0)) -+ -+/* -+** abd_x0_u64_z_untied: -+** mov (z[0-9]+\.d), x0 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** uabd z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** uabd z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (abd_x0_u64_z_untied, svuint64_t, uint64_t, -+ z0 = svabd_n_u64_z (p0, z1, x0), -+ z0 = svabd_z (p0, z1, x0)) -+ -+/* -+** abd_1_u64_z_tied1: -+** mov (z[0-9]+\.d), #1 -+** movprfx z0\.d, p0/z, z0\.d -+** uabd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_u64_z_tied1, svuint64_t, -+ z0 = svabd_n_u64_z (p0, z0, 1), -+ z0 = svabd_z (p0, z0, 1)) -+ -+/* -+** abd_1_u64_z_untied: -+** mov (z[0-9]+\.d), #1 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** uabd z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** uabd z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_u64_z_untied, svuint64_t, -+ z0 = svabd_n_u64_z (p0, z1, 1), -+ z0 = svabd_z (p0, z1, 1)) -+ -+/* -+** abd_u64_x_tied1: -+** uabd z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (abd_u64_x_tied1, svuint64_t, -+ z0 = svabd_u64_x (p0, z0, z1), -+ z0 = svabd_x (p0, z0, z1)) -+ -+/* -+** abd_u64_x_tied2: -+** uabd z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (abd_u64_x_tied2, svuint64_t, -+ z0 = svabd_u64_x (p0, z1, z0), -+ z0 = svabd_x (p0, z1, z0)) -+ -+/* -+** abd_u64_x_untied: -+** ( -+** movprfx z0, z1 -+** uabd z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0, z2 -+** uabd z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (abd_u64_x_untied, svuint64_t, -+ z0 = svabd_u64_x (p0, z1, z2), -+ z0 = svabd_x (p0, z1, z2)) -+ -+/* -+** abd_x0_u64_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** uabd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (abd_x0_u64_x_tied1, svuint64_t, uint64_t, -+ z0 = svabd_n_u64_x (p0, z0, x0), -+ z0 = svabd_x (p0, z0, x0)) -+ -+/* -+** abd_x0_u64_x_untied: -+** mov z0\.d, x0 -+** uabd z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (abd_x0_u64_x_untied, svuint64_t, uint64_t, -+ z0 = svabd_n_u64_x (p0, z1, x0), -+ z0 = svabd_x (p0, z1, x0)) -+ -+/* -+** abd_1_u64_x_tied1: -+** mov (z[0-9]+\.d), #1 -+** uabd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_u64_x_tied1, svuint64_t, -+ z0 = svabd_n_u64_x (p0, z0, 1), -+ z0 = svabd_x (p0, z0, 1)) -+ -+/* -+** abd_1_u64_x_untied: -+** mov z0\.d, #1 -+** uabd z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_u64_x_untied, svuint64_t, -+ z0 = svabd_n_u64_x (p0, z1, 1), -+ z0 = svabd_x (p0, z1, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_u8.c -new file mode 100644 -index 000000000..454ef153c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abd_u8.c -@@ -0,0 +1,237 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** abd_u8_m_tied1: -+** uabd z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (abd_u8_m_tied1, svuint8_t, -+ z0 = svabd_u8_m (p0, z0, z1), -+ z0 = svabd_m (p0, z0, z1)) -+ -+/* -+** abd_u8_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** uabd z0\.b, p0/m, z0\.b, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (abd_u8_m_tied2, svuint8_t, -+ z0 = svabd_u8_m (p0, z1, z0), -+ z0 = svabd_m (p0, z1, z0)) -+ -+/* -+** abd_u8_m_untied: -+** movprfx z0, z1 -+** uabd z0\.b, p0/m, z0\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (abd_u8_m_untied, svuint8_t, -+ z0 = svabd_u8_m (p0, z1, z2), -+ z0 = svabd_m (p0, z1, z2)) -+ -+/* -+** abd_w0_u8_m_tied1: -+** mov (z[0-9]+\.b), w0 -+** uabd z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (abd_w0_u8_m_tied1, svuint8_t, uint8_t, -+ z0 = svabd_n_u8_m (p0, z0, x0), -+ z0 = svabd_m (p0, z0, x0)) -+ -+/* -+** abd_w0_u8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), w0 -+** movprfx z0, z1 -+** uabd z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (abd_w0_u8_m_untied, svuint8_t, uint8_t, -+ z0 = svabd_n_u8_m (p0, z1, x0), -+ z0 = svabd_m (p0, z1, x0)) -+ -+/* -+** abd_1_u8_m_tied1: -+** mov (z[0-9]+\.b), #1 -+** uabd z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_u8_m_tied1, svuint8_t, -+ z0 = svabd_n_u8_m (p0, z0, 1), -+ z0 = svabd_m (p0, z0, 1)) -+ -+/* -+** abd_1_u8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), #1 -+** movprfx z0, z1 -+** uabd z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_u8_m_untied, svuint8_t, -+ z0 = svabd_n_u8_m (p0, z1, 1), -+ z0 = svabd_m (p0, z1, 1)) -+ -+/* -+** abd_u8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** uabd z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (abd_u8_z_tied1, svuint8_t, -+ z0 = svabd_u8_z (p0, z0, z1), -+ z0 = svabd_z (p0, z0, z1)) -+ -+/* -+** abd_u8_z_tied2: -+** movprfx z0\.b, p0/z, z0\.b -+** uabd z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (abd_u8_z_tied2, svuint8_t, -+ z0 = svabd_u8_z (p0, z1, z0), -+ z0 = svabd_z (p0, z1, z0)) -+ -+/* -+** abd_u8_z_untied: -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** uabd z0\.b, p0/m, z0\.b, z2\.b -+** | -+** movprfx z0\.b, p0/z, z2\.b -+** uabd z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (abd_u8_z_untied, svuint8_t, -+ z0 = svabd_u8_z (p0, z1, z2), -+ z0 = svabd_z (p0, z1, z2)) -+ -+/* -+** abd_w0_u8_z_tied1: -+** mov (z[0-9]+\.b), w0 -+** movprfx z0\.b, p0/z, z0\.b -+** uabd z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (abd_w0_u8_z_tied1, svuint8_t, uint8_t, -+ z0 = svabd_n_u8_z (p0, z0, x0), -+ z0 = svabd_z (p0, z0, x0)) -+ -+/* -+** abd_w0_u8_z_untied: -+** mov (z[0-9]+\.b), w0 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** uabd z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** uabd z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (abd_w0_u8_z_untied, svuint8_t, uint8_t, -+ z0 = svabd_n_u8_z (p0, z1, x0), -+ z0 = svabd_z (p0, z1, x0)) -+ -+/* -+** abd_1_u8_z_tied1: -+** mov (z[0-9]+\.b), #1 -+** movprfx z0\.b, p0/z, z0\.b -+** uabd z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_u8_z_tied1, svuint8_t, -+ z0 = svabd_n_u8_z (p0, z0, 1), -+ z0 = svabd_z (p0, z0, 1)) -+ -+/* -+** abd_1_u8_z_untied: -+** mov (z[0-9]+\.b), #1 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** uabd z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** uabd z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_u8_z_untied, svuint8_t, -+ z0 = svabd_n_u8_z (p0, z1, 1), -+ z0 = svabd_z (p0, z1, 1)) -+ -+/* -+** abd_u8_x_tied1: -+** uabd z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (abd_u8_x_tied1, svuint8_t, -+ z0 = svabd_u8_x (p0, z0, z1), -+ z0 = svabd_x (p0, z0, z1)) -+ -+/* -+** abd_u8_x_tied2: -+** uabd z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (abd_u8_x_tied2, svuint8_t, -+ z0 = svabd_u8_x (p0, z1, z0), -+ z0 = svabd_x (p0, z1, z0)) -+ -+/* -+** abd_u8_x_untied: -+** ( -+** movprfx z0, z1 -+** uabd z0\.b, p0/m, z0\.b, z2\.b -+** | -+** movprfx z0, z2 -+** uabd z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (abd_u8_x_untied, svuint8_t, -+ z0 = svabd_u8_x (p0, z1, z2), -+ z0 = svabd_x (p0, z1, z2)) -+ -+/* -+** abd_w0_u8_x_tied1: -+** mov (z[0-9]+\.b), w0 -+** uabd z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (abd_w0_u8_x_tied1, svuint8_t, uint8_t, -+ z0 = svabd_n_u8_x (p0, z0, x0), -+ z0 = svabd_x (p0, z0, x0)) -+ -+/* -+** abd_w0_u8_x_untied: -+** mov z0\.b, w0 -+** uabd z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_ZX (abd_w0_u8_x_untied, svuint8_t, uint8_t, -+ z0 = svabd_n_u8_x (p0, z1, x0), -+ z0 = svabd_x (p0, z1, x0)) -+ -+/* -+** abd_1_u8_x_tied1: -+** mov (z[0-9]+\.b), #1 -+** uabd z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_u8_x_tied1, svuint8_t, -+ z0 = svabd_n_u8_x (p0, z0, 1), -+ z0 = svabd_x (p0, z0, 1)) -+ -+/* -+** abd_1_u8_x_untied: -+** mov z0\.b, #1 -+** uabd z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (abd_1_u8_x_untied, svuint8_t, -+ z0 = svabd_n_u8_x (p0, z1, 1), -+ z0 = svabd_x (p0, z1, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_f16.c -new file mode 100644 -index 000000000..2aa8736e6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_f16.c -@@ -0,0 +1,103 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** abs_f16_m_tied12: -+** fabs z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (abs_f16_m_tied12, svfloat16_t, -+ z0 = svabs_f16_m (z0, p0, z0), -+ z0 = svabs_m (z0, p0, z0)) -+ -+/* -+** abs_f16_m_tied1: -+** fabs z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (abs_f16_m_tied1, svfloat16_t, -+ z0 = svabs_f16_m (z0, p0, z1), -+ z0 = svabs_m (z0, p0, z1)) -+ -+/* -+** abs_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fabs z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (abs_f16_m_tied2, svfloat16_t, -+ z0 = svabs_f16_m (z1, p0, z0), -+ z0 = svabs_m (z1, p0, z0)) -+ -+/* -+** abs_f16_m_untied: -+** movprfx z0, z2 -+** fabs z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (abs_f16_m_untied, svfloat16_t, -+ z0 = svabs_f16_m (z2, p0, z1), -+ z0 = svabs_m (z2, p0, z1)) -+ -+/* -+** abs_f16_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.h, p0/z, \1\.h -+** fabs z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (abs_f16_z_tied1, svfloat16_t, -+ z0 = svabs_f16_z (p0, z0), -+ z0 = svabs_z (p0, z0)) -+ -+/* -+** abs_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** fabs z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (abs_f16_z_untied, svfloat16_t, -+ z0 = svabs_f16_z (p0, z1), -+ z0 = svabs_z (p0, z1)) -+ -+/* -+** abs_f16_x_tied1: -+** fabs z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (abs_f16_x_tied1, svfloat16_t, -+ z0 = svabs_f16_x (p0, z0), -+ z0 = svabs_x (p0, z0)) -+ -+/* -+** abs_f16_x_untied: -+** fabs z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (abs_f16_x_untied, svfloat16_t, -+ z0 = svabs_f16_x (p0, z1), -+ z0 = svabs_x (p0, z1)) -+ -+/* -+** ptrue_abs_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_abs_f16_x_tied1, svfloat16_t, -+ z0 = svabs_f16_x (svptrue_b16 (), z0), -+ z0 = svabs_x (svptrue_b16 (), z0)) -+ -+/* -+** ptrue_abs_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_abs_f16_x_untied, svfloat16_t, -+ z0 = svabs_f16_x (svptrue_b16 (), z1), -+ z0 = svabs_x (svptrue_b16 (), z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_f32.c -new file mode 100644 -index 000000000..30286afc7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_f32.c -@@ -0,0 +1,103 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** abs_f32_m_tied12: -+** fabs z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (abs_f32_m_tied12, svfloat32_t, -+ z0 = svabs_f32_m (z0, p0, z0), -+ z0 = svabs_m (z0, p0, z0)) -+ -+/* -+** abs_f32_m_tied1: -+** fabs z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (abs_f32_m_tied1, svfloat32_t, -+ z0 = svabs_f32_m (z0, p0, z1), -+ z0 = svabs_m (z0, p0, z1)) -+ -+/* -+** abs_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fabs z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (abs_f32_m_tied2, svfloat32_t, -+ z0 = svabs_f32_m (z1, p0, z0), -+ z0 = svabs_m (z1, p0, z0)) -+ -+/* -+** abs_f32_m_untied: -+** movprfx z0, z2 -+** fabs z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (abs_f32_m_untied, svfloat32_t, -+ z0 = svabs_f32_m (z2, p0, z1), -+ z0 = svabs_m (z2, p0, z1)) -+ -+/* -+** abs_f32_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, \1\.s -+** fabs z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (abs_f32_z_tied1, svfloat32_t, -+ z0 = svabs_f32_z (p0, z0), -+ z0 = svabs_z (p0, z0)) -+ -+/* -+** abs_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** fabs z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (abs_f32_z_untied, svfloat32_t, -+ z0 = svabs_f32_z (p0, z1), -+ z0 = svabs_z (p0, z1)) -+ -+/* -+** abs_f32_x_tied1: -+** fabs z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (abs_f32_x_tied1, svfloat32_t, -+ z0 = svabs_f32_x (p0, z0), -+ z0 = svabs_x (p0, z0)) -+ -+/* -+** abs_f32_x_untied: -+** fabs z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (abs_f32_x_untied, svfloat32_t, -+ z0 = svabs_f32_x (p0, z1), -+ z0 = svabs_x (p0, z1)) -+ -+/* -+** ptrue_abs_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_abs_f32_x_tied1, svfloat32_t, -+ z0 = svabs_f32_x (svptrue_b32 (), z0), -+ z0 = svabs_x (svptrue_b32 (), z0)) -+ -+/* -+** ptrue_abs_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_abs_f32_x_untied, svfloat32_t, -+ z0 = svabs_f32_x (svptrue_b32 (), z1), -+ z0 = svabs_x (svptrue_b32 (), z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_f64.c -new file mode 100644 -index 000000000..28ef9fbba ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_f64.c -@@ -0,0 +1,103 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** abs_f64_m_tied12: -+** fabs z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (abs_f64_m_tied12, svfloat64_t, -+ z0 = svabs_f64_m (z0, p0, z0), -+ z0 = svabs_m (z0, p0, z0)) -+ -+/* -+** abs_f64_m_tied1: -+** fabs z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (abs_f64_m_tied1, svfloat64_t, -+ z0 = svabs_f64_m (z0, p0, z1), -+ z0 = svabs_m (z0, p0, z1)) -+ -+/* -+** abs_f64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fabs z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abs_f64_m_tied2, svfloat64_t, -+ z0 = svabs_f64_m (z1, p0, z0), -+ z0 = svabs_m (z1, p0, z0)) -+ -+/* -+** abs_f64_m_untied: -+** movprfx z0, z2 -+** fabs z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (abs_f64_m_untied, svfloat64_t, -+ z0 = svabs_f64_m (z2, p0, z1), -+ z0 = svabs_m (z2, p0, z1)) -+ -+/* -+** abs_f64_z_tied1: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** fabs z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abs_f64_z_tied1, svfloat64_t, -+ z0 = svabs_f64_z (p0, z0), -+ z0 = svabs_z (p0, z0)) -+ -+/* -+** abs_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** fabs z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (abs_f64_z_untied, svfloat64_t, -+ z0 = svabs_f64_z (p0, z1), -+ z0 = svabs_z (p0, z1)) -+ -+/* -+** abs_f64_x_tied1: -+** fabs z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (abs_f64_x_tied1, svfloat64_t, -+ z0 = svabs_f64_x (p0, z0), -+ z0 = svabs_x (p0, z0)) -+ -+/* -+** abs_f64_x_untied: -+** fabs z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (abs_f64_x_untied, svfloat64_t, -+ z0 = svabs_f64_x (p0, z1), -+ z0 = svabs_x (p0, z1)) -+ -+/* -+** ptrue_abs_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_abs_f64_x_tied1, svfloat64_t, -+ z0 = svabs_f64_x (svptrue_b64 (), z0), -+ z0 = svabs_x (svptrue_b64 (), z0)) -+ -+/* -+** ptrue_abs_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_abs_f64_x_untied, svfloat64_t, -+ z0 = svabs_f64_x (svptrue_b64 (), z1), -+ z0 = svabs_x (svptrue_b64 (), z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_s16.c -new file mode 100644 -index 000000000..3b16a9c4f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_s16.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** abs_s16_m_tied12: -+** abs z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (abs_s16_m_tied12, svint16_t, -+ z0 = svabs_s16_m (z0, p0, z0), -+ z0 = svabs_m (z0, p0, z0)) -+ -+/* -+** abs_s16_m_tied1: -+** abs z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (abs_s16_m_tied1, svint16_t, -+ z0 = svabs_s16_m (z0, p0, z1), -+ z0 = svabs_m (z0, p0, z1)) -+ -+/* -+** abs_s16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** abs z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (abs_s16_m_tied2, svint16_t, -+ z0 = svabs_s16_m (z1, p0, z0), -+ z0 = svabs_m (z1, p0, z0)) -+ -+/* -+** abs_s16_m_untied: -+** movprfx z0, z2 -+** abs z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (abs_s16_m_untied, svint16_t, -+ z0 = svabs_s16_m (z2, p0, z1), -+ z0 = svabs_m (z2, p0, z1)) -+ -+/* -+** abs_s16_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.h, p0/z, \1\.h -+** abs z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (abs_s16_z_tied1, svint16_t, -+ z0 = svabs_s16_z (p0, z0), -+ z0 = svabs_z (p0, z0)) -+ -+/* -+** abs_s16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** abs z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (abs_s16_z_untied, svint16_t, -+ z0 = svabs_s16_z (p0, z1), -+ z0 = svabs_z (p0, z1)) -+ -+/* -+** abs_s16_x_tied1: -+** abs z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (abs_s16_x_tied1, svint16_t, -+ z0 = svabs_s16_x (p0, z0), -+ z0 = svabs_x (p0, z0)) -+ -+/* -+** abs_s16_x_untied: -+** abs z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (abs_s16_x_untied, svint16_t, -+ z0 = svabs_s16_x (p0, z1), -+ z0 = svabs_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_s32.c -new file mode 100644 -index 000000000..14bcbd50c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_s32.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** abs_s32_m_tied12: -+** abs z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (abs_s32_m_tied12, svint32_t, -+ z0 = svabs_s32_m (z0, p0, z0), -+ z0 = svabs_m (z0, p0, z0)) -+ -+/* -+** abs_s32_m_tied1: -+** abs z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (abs_s32_m_tied1, svint32_t, -+ z0 = svabs_s32_m (z0, p0, z1), -+ z0 = svabs_m (z0, p0, z1)) -+ -+/* -+** abs_s32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** abs z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (abs_s32_m_tied2, svint32_t, -+ z0 = svabs_s32_m (z1, p0, z0), -+ z0 = svabs_m (z1, p0, z0)) -+ -+/* -+** abs_s32_m_untied: -+** movprfx z0, z2 -+** abs z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (abs_s32_m_untied, svint32_t, -+ z0 = svabs_s32_m (z2, p0, z1), -+ z0 = svabs_m (z2, p0, z1)) -+ -+/* -+** abs_s32_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, \1\.s -+** abs z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (abs_s32_z_tied1, svint32_t, -+ z0 = svabs_s32_z (p0, z0), -+ z0 = svabs_z (p0, z0)) -+ -+/* -+** abs_s32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** abs z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (abs_s32_z_untied, svint32_t, -+ z0 = svabs_s32_z (p0, z1), -+ z0 = svabs_z (p0, z1)) -+ -+/* -+** abs_s32_x_tied1: -+** abs z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (abs_s32_x_tied1, svint32_t, -+ z0 = svabs_s32_x (p0, z0), -+ z0 = svabs_x (p0, z0)) -+ -+/* -+** abs_s32_x_untied: -+** abs z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (abs_s32_x_untied, svint32_t, -+ z0 = svabs_s32_x (p0, z1), -+ z0 = svabs_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_s64.c -new file mode 100644 -index 000000000..c7b60ff48 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_s64.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** abs_s64_m_tied12: -+** abs z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (abs_s64_m_tied12, svint64_t, -+ z0 = svabs_s64_m (z0, p0, z0), -+ z0 = svabs_m (z0, p0, z0)) -+ -+/* -+** abs_s64_m_tied1: -+** abs z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (abs_s64_m_tied1, svint64_t, -+ z0 = svabs_s64_m (z0, p0, z1), -+ z0 = svabs_m (z0, p0, z1)) -+ -+/* -+** abs_s64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** abs z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abs_s64_m_tied2, svint64_t, -+ z0 = svabs_s64_m (z1, p0, z0), -+ z0 = svabs_m (z1, p0, z0)) -+ -+/* -+** abs_s64_m_untied: -+** movprfx z0, z2 -+** abs z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (abs_s64_m_untied, svint64_t, -+ z0 = svabs_s64_m (z2, p0, z1), -+ z0 = svabs_m (z2, p0, z1)) -+ -+/* -+** abs_s64_z_tied1: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** abs z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (abs_s64_z_tied1, svint64_t, -+ z0 = svabs_s64_z (p0, z0), -+ z0 = svabs_z (p0, z0)) -+ -+/* -+** abs_s64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** abs z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (abs_s64_z_untied, svint64_t, -+ z0 = svabs_s64_z (p0, z1), -+ z0 = svabs_z (p0, z1)) -+ -+/* -+** abs_s64_x_tied1: -+** abs z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (abs_s64_x_tied1, svint64_t, -+ z0 = svabs_s64_x (p0, z0), -+ z0 = svabs_x (p0, z0)) -+ -+/* -+** abs_s64_x_untied: -+** abs z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (abs_s64_x_untied, svint64_t, -+ z0 = svabs_s64_x (p0, z1), -+ z0 = svabs_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_s8.c -new file mode 100644 -index 000000000..0bc64c078 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/abs_s8.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** abs_s8_m_tied12: -+** abs z0\.b, p0/m, z0\.b -+** ret -+*/ -+TEST_UNIFORM_Z (abs_s8_m_tied12, svint8_t, -+ z0 = svabs_s8_m (z0, p0, z0), -+ z0 = svabs_m (z0, p0, z0)) -+ -+/* -+** abs_s8_m_tied1: -+** abs z0\.b, p0/m, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (abs_s8_m_tied1, svint8_t, -+ z0 = svabs_s8_m (z0, p0, z1), -+ z0 = svabs_m (z0, p0, z1)) -+ -+/* -+** abs_s8_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** abs z0\.b, p0/m, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (abs_s8_m_tied2, svint8_t, -+ z0 = svabs_s8_m (z1, p0, z0), -+ z0 = svabs_m (z1, p0, z0)) -+ -+/* -+** abs_s8_m_untied: -+** movprfx z0, z2 -+** abs z0\.b, p0/m, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (abs_s8_m_untied, svint8_t, -+ z0 = svabs_s8_m (z2, p0, z1), -+ z0 = svabs_m (z2, p0, z1)) -+ -+/* -+** abs_s8_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.b, p0/z, \1\.b -+** abs z0\.b, p0/m, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (abs_s8_z_tied1, svint8_t, -+ z0 = svabs_s8_z (p0, z0), -+ z0 = svabs_z (p0, z0)) -+ -+/* -+** abs_s8_z_untied: -+** movprfx z0\.b, p0/z, z1\.b -+** abs z0\.b, p0/m, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (abs_s8_z_untied, svint8_t, -+ z0 = svabs_s8_z (p0, z1), -+ z0 = svabs_z (p0, z1)) -+ -+/* -+** abs_s8_x_tied1: -+** abs z0\.b, p0/m, z0\.b -+** ret -+*/ -+TEST_UNIFORM_Z (abs_s8_x_tied1, svint8_t, -+ z0 = svabs_s8_x (p0, z0), -+ z0 = svabs_x (p0, z0)) -+ -+/* -+** abs_s8_x_untied: -+** abs z0\.b, p0/m, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (abs_s8_x_untied, svint8_t, -+ z0 = svabs_s8_x (p0, z1), -+ z0 = svabs_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acge_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acge_f16.c -new file mode 100644 -index 000000000..acef17309 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acge_f16.c -@@ -0,0 +1,71 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** acge_f16_tied: -+** ( -+** facge p0\.h, p0/z, z0\.h, z1\.h -+** | -+** facle p0\.h, p0/z, z1\.h, z0\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (acge_f16_tied, svfloat16_t, -+ p0 = svacge_f16 (p0, z0, z1), -+ p0 = svacge (p0, z0, z1)) -+ -+/* -+** acge_f16_untied: -+** ( -+** facge p0\.h, p1/z, z0\.h, z1\.h -+** | -+** facle p0\.h, p1/z, z1\.h, z0\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (acge_f16_untied, svfloat16_t, -+ p0 = svacge_f16 (p1, z0, z1), -+ p0 = svacge (p1, z0, z1)) -+ -+/* -+** acge_h4_f16: -+** mov (z[0-9]+\.h), h4 -+** ( -+** facge p0\.h, p1/z, z0\.h, \1 -+** | -+** facle p0\.h, p1/z, \1, z0\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_ZD (acge_h4_f16, svfloat16_t, float16_t, -+ p0 = svacge_n_f16 (p1, z0, d4), -+ p0 = svacge (p1, z0, d4)) -+ -+/* -+** acge_0_f16: -+** mov (z[0-9]+\.h), #0 -+** ( -+** facge p0\.h, p1/z, z0\.h, \1 -+** | -+** facle p0\.h, p1/z, \1, z0\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (acge_0_f16, svfloat16_t, -+ p0 = svacge_n_f16 (p1, z0, 0), -+ p0 = svacge (p1, z0, 0)) -+ -+/* -+** acge_1_f16: -+** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? -+** ( -+** facge p0\.h, p1/z, z0\.h, \1 -+** | -+** facle p0\.h, p1/z, \1, z0\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (acge_1_f16, svfloat16_t, -+ p0 = svacge_n_f16 (p1, z0, 1), -+ p0 = svacge (p1, z0, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acge_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acge_f32.c -new file mode 100644 -index 000000000..c3d195ab8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acge_f32.c -@@ -0,0 +1,71 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** acge_f32_tied: -+** ( -+** facge p0\.s, p0/z, z0\.s, z1\.s -+** | -+** facle p0\.s, p0/z, z1\.s, z0\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (acge_f32_tied, svfloat32_t, -+ p0 = svacge_f32 (p0, z0, z1), -+ p0 = svacge (p0, z0, z1)) -+ -+/* -+** acge_f32_untied: -+** ( -+** facge p0\.s, p1/z, z0\.s, z1\.s -+** | -+** facle p0\.s, p1/z, z1\.s, z0\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (acge_f32_untied, svfloat32_t, -+ p0 = svacge_f32 (p1, z0, z1), -+ p0 = svacge (p1, z0, z1)) -+ -+/* -+** acge_s4_f32: -+** mov (z[0-9]+\.s), s4 -+** ( -+** facge p0\.s, p1/z, z0\.s, \1 -+** | -+** facle p0\.s, p1/z, \1, z0\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_ZD (acge_s4_f32, svfloat32_t, float32_t, -+ p0 = svacge_n_f32 (p1, z0, d4), -+ p0 = svacge (p1, z0, d4)) -+ -+/* -+** acge_0_f32: -+** mov (z[0-9]+\.s), #0 -+** ( -+** facge p0\.s, p1/z, z0\.s, \1 -+** | -+** facle p0\.s, p1/z, \1, z0\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (acge_0_f32, svfloat32_t, -+ p0 = svacge_n_f32 (p1, z0, 0), -+ p0 = svacge (p1, z0, 0)) -+ -+/* -+** acge_1_f32: -+** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? -+** ( -+** facge p0\.s, p1/z, z0\.s, \1 -+** | -+** facle p0\.s, p1/z, \1, z0\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (acge_1_f32, svfloat32_t, -+ p0 = svacge_n_f32 (p1, z0, 1), -+ p0 = svacge (p1, z0, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acge_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acge_f64.c -new file mode 100644 -index 000000000..207ce93a2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acge_f64.c -@@ -0,0 +1,71 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** acge_f64_tied: -+** ( -+** facge p0\.d, p0/z, z0\.d, z1\.d -+** | -+** facle p0\.d, p0/z, z1\.d, z0\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (acge_f64_tied, svfloat64_t, -+ p0 = svacge_f64 (p0, z0, z1), -+ p0 = svacge (p0, z0, z1)) -+ -+/* -+** acge_f64_untied: -+** ( -+** facge p0\.d, p1/z, z0\.d, z1\.d -+** | -+** facle p0\.d, p1/z, z1\.d, z0\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (acge_f64_untied, svfloat64_t, -+ p0 = svacge_f64 (p1, z0, z1), -+ p0 = svacge (p1, z0, z1)) -+ -+/* -+** acge_d4_f64: -+** mov (z[0-9]+\.d), d4 -+** ( -+** facge p0\.d, p1/z, z0\.d, \1 -+** | -+** facle p0\.d, p1/z, \1, z0\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_ZD (acge_d4_f64, svfloat64_t, float64_t, -+ p0 = svacge_n_f64 (p1, z0, d4), -+ p0 = svacge (p1, z0, d4)) -+ -+/* -+** acge_0_f64: -+** mov (z[0-9]+\.d), #0 -+** ( -+** facge p0\.d, p1/z, z0\.d, \1 -+** | -+** facle p0\.d, p1/z, \1, z0\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (acge_0_f64, svfloat64_t, -+ p0 = svacge_n_f64 (p1, z0, 0), -+ p0 = svacge (p1, z0, 0)) -+ -+/* -+** acge_1_f64: -+** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? -+** ( -+** facge p0\.d, p1/z, z0\.d, \1 -+** | -+** facle p0\.d, p1/z, \1, z0\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (acge_1_f64, svfloat64_t, -+ p0 = svacge_n_f64 (p1, z0, 1), -+ p0 = svacge (p1, z0, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acgt_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acgt_f16.c -new file mode 100644 -index 000000000..53c63351c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acgt_f16.c -@@ -0,0 +1,71 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** acgt_f16_tied: -+** ( -+** facgt p0\.h, p0/z, z0\.h, z1\.h -+** | -+** faclt p0\.h, p0/z, z1\.h, z0\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (acgt_f16_tied, svfloat16_t, -+ p0 = svacgt_f16 (p0, z0, z1), -+ p0 = svacgt (p0, z0, z1)) -+ -+/* -+** acgt_f16_untied: -+** ( -+** facgt p0\.h, p1/z, z0\.h, z1\.h -+** | -+** faclt p0\.h, p1/z, z1\.h, z0\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (acgt_f16_untied, svfloat16_t, -+ p0 = svacgt_f16 (p1, z0, z1), -+ p0 = svacgt (p1, z0, z1)) -+ -+/* -+** acgt_h4_f16: -+** mov (z[0-9]+\.h), h4 -+** ( -+** facgt p0\.h, p1/z, z0\.h, \1 -+** | -+** faclt p0\.h, p1/z, \1, z0\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_ZD (acgt_h4_f16, svfloat16_t, float16_t, -+ p0 = svacgt_n_f16 (p1, z0, d4), -+ p0 = svacgt (p1, z0, d4)) -+ -+/* -+** acgt_0_f16: -+** mov (z[0-9]+\.h), #0 -+** ( -+** facgt p0\.h, p1/z, z0\.h, \1 -+** | -+** faclt p0\.h, p1/z, \1, z0\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (acgt_0_f16, svfloat16_t, -+ p0 = svacgt_n_f16 (p1, z0, 0), -+ p0 = svacgt (p1, z0, 0)) -+ -+/* -+** acgt_1_f16: -+** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? -+** ( -+** facgt p0\.h, p1/z, z0\.h, \1 -+** | -+** faclt p0\.h, p1/z, \1, z0\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (acgt_1_f16, svfloat16_t, -+ p0 = svacgt_n_f16 (p1, z0, 1), -+ p0 = svacgt (p1, z0, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acgt_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acgt_f32.c -new file mode 100644 -index 000000000..d71c84ea6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acgt_f32.c -@@ -0,0 +1,71 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** acgt_f32_tied: -+** ( -+** facgt p0\.s, p0/z, z0\.s, z1\.s -+** | -+** faclt p0\.s, p0/z, z1\.s, z0\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (acgt_f32_tied, svfloat32_t, -+ p0 = svacgt_f32 (p0, z0, z1), -+ p0 = svacgt (p0, z0, z1)) -+ -+/* -+** acgt_f32_untied: -+** ( -+** facgt p0\.s, p1/z, z0\.s, z1\.s -+** | -+** faclt p0\.s, p1/z, z1\.s, z0\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (acgt_f32_untied, svfloat32_t, -+ p0 = svacgt_f32 (p1, z0, z1), -+ p0 = svacgt (p1, z0, z1)) -+ -+/* -+** acgt_s4_f32: -+** mov (z[0-9]+\.s), s4 -+** ( -+** facgt p0\.s, p1/z, z0\.s, \1 -+** | -+** faclt p0\.s, p1/z, \1, z0\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_ZD (acgt_s4_f32, svfloat32_t, float32_t, -+ p0 = svacgt_n_f32 (p1, z0, d4), -+ p0 = svacgt (p1, z0, d4)) -+ -+/* -+** acgt_0_f32: -+** mov (z[0-9]+\.s), #0 -+** ( -+** facgt p0\.s, p1/z, z0\.s, \1 -+** | -+** faclt p0\.s, p1/z, \1, z0\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (acgt_0_f32, svfloat32_t, -+ p0 = svacgt_n_f32 (p1, z0, 0), -+ p0 = svacgt (p1, z0, 0)) -+ -+/* -+** acgt_1_f32: -+** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? -+** ( -+** facgt p0\.s, p1/z, z0\.s, \1 -+** | -+** faclt p0\.s, p1/z, \1, z0\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (acgt_1_f32, svfloat32_t, -+ p0 = svacgt_n_f32 (p1, z0, 1), -+ p0 = svacgt (p1, z0, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acgt_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acgt_f64.c -new file mode 100644 -index 000000000..15d549e18 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acgt_f64.c -@@ -0,0 +1,71 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** acgt_f64_tied: -+** ( -+** facgt p0\.d, p0/z, z0\.d, z1\.d -+** | -+** faclt p0\.d, p0/z, z1\.d, z0\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (acgt_f64_tied, svfloat64_t, -+ p0 = svacgt_f64 (p0, z0, z1), -+ p0 = svacgt (p0, z0, z1)) -+ -+/* -+** acgt_f64_untied: -+** ( -+** facgt p0\.d, p1/z, z0\.d, z1\.d -+** | -+** faclt p0\.d, p1/z, z1\.d, z0\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (acgt_f64_untied, svfloat64_t, -+ p0 = svacgt_f64 (p1, z0, z1), -+ p0 = svacgt (p1, z0, z1)) -+ -+/* -+** acgt_d4_f64: -+** mov (z[0-9]+\.d), d4 -+** ( -+** facgt p0\.d, p1/z, z0\.d, \1 -+** | -+** faclt p0\.d, p1/z, \1, z0\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_ZD (acgt_d4_f64, svfloat64_t, float64_t, -+ p0 = svacgt_n_f64 (p1, z0, d4), -+ p0 = svacgt (p1, z0, d4)) -+ -+/* -+** acgt_0_f64: -+** mov (z[0-9]+\.d), #0 -+** ( -+** facgt p0\.d, p1/z, z0\.d, \1 -+** | -+** faclt p0\.d, p1/z, \1, z0\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (acgt_0_f64, svfloat64_t, -+ p0 = svacgt_n_f64 (p1, z0, 0), -+ p0 = svacgt (p1, z0, 0)) -+ -+/* -+** acgt_1_f64: -+** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? -+** ( -+** facgt p0\.d, p1/z, z0\.d, \1 -+** | -+** faclt p0\.d, p1/z, \1, z0\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (acgt_1_f64, svfloat64_t, -+ p0 = svacgt_n_f64 (p1, z0, 1), -+ p0 = svacgt (p1, z0, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acle_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acle_f16.c -new file mode 100644 -index 000000000..ed6721d57 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acle_f16.c -@@ -0,0 +1,71 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** acle_f16_tied: -+** ( -+** facge p0\.h, p0/z, z1\.h, z0\.h -+** | -+** facle p0\.h, p0/z, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (acle_f16_tied, svfloat16_t, -+ p0 = svacle_f16 (p0, z0, z1), -+ p0 = svacle (p0, z0, z1)) -+ -+/* -+** acle_f16_untied: -+** ( -+** facge p0\.h, p1/z, z1\.h, z0\.h -+** | -+** facle p0\.h, p1/z, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (acle_f16_untied, svfloat16_t, -+ p0 = svacle_f16 (p1, z0, z1), -+ p0 = svacle (p1, z0, z1)) -+ -+/* -+** acle_h4_f16: -+** mov (z[0-9]+\.h), h4 -+** ( -+** facge p0\.h, p1/z, \1, z0\.h -+** | -+** facle p0\.h, p1/z, z0\.h, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_ZD (acle_h4_f16, svfloat16_t, float16_t, -+ p0 = svacle_n_f16 (p1, z0, d4), -+ p0 = svacle (p1, z0, d4)) -+ -+/* -+** acle_0_f16: -+** mov (z[0-9]+\.h), #0 -+** ( -+** facge p0\.h, p1/z, \1, z0\.h -+** | -+** facle p0\.h, p1/z, z0\.h, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (acle_0_f16, svfloat16_t, -+ p0 = svacle_n_f16 (p1, z0, 0), -+ p0 = svacle (p1, z0, 0)) -+ -+/* -+** acle_1_f16: -+** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? -+** ( -+** facge p0\.h, p1/z, \1, z0\.h -+** | -+** facle p0\.h, p1/z, z0\.h, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (acle_1_f16, svfloat16_t, -+ p0 = svacle_n_f16 (p1, z0, 1), -+ p0 = svacle (p1, z0, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acle_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acle_f32.c -new file mode 100644 -index 000000000..7fc9da701 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acle_f32.c -@@ -0,0 +1,71 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** acle_f32_tied: -+** ( -+** facge p0\.s, p0/z, z1\.s, z0\.s -+** | -+** facle p0\.s, p0/z, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (acle_f32_tied, svfloat32_t, -+ p0 = svacle_f32 (p0, z0, z1), -+ p0 = svacle (p0, z0, z1)) -+ -+/* -+** acle_f32_untied: -+** ( -+** facge p0\.s, p1/z, z1\.s, z0\.s -+** | -+** facle p0\.s, p1/z, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (acle_f32_untied, svfloat32_t, -+ p0 = svacle_f32 (p1, z0, z1), -+ p0 = svacle (p1, z0, z1)) -+ -+/* -+** acle_s4_f32: -+** mov (z[0-9]+\.s), s4 -+** ( -+** facge p0\.s, p1/z, \1, z0\.s -+** | -+** facle p0\.s, p1/z, z0\.s, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_ZD (acle_s4_f32, svfloat32_t, float32_t, -+ p0 = svacle_n_f32 (p1, z0, d4), -+ p0 = svacle (p1, z0, d4)) -+ -+/* -+** acle_0_f32: -+** mov (z[0-9]+\.s), #0 -+** ( -+** facge p0\.s, p1/z, \1, z0\.s -+** | -+** facle p0\.s, p1/z, z0\.s, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (acle_0_f32, svfloat32_t, -+ p0 = svacle_n_f32 (p1, z0, 0), -+ p0 = svacle (p1, z0, 0)) -+ -+/* -+** acle_1_f32: -+** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? -+** ( -+** facge p0\.s, p1/z, \1, z0\.s -+** | -+** facle p0\.s, p1/z, z0\.s, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (acle_1_f32, svfloat32_t, -+ p0 = svacle_n_f32 (p1, z0, 1), -+ p0 = svacle (p1, z0, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acle_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acle_f64.c -new file mode 100644 -index 000000000..ecbb8e500 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/acle_f64.c -@@ -0,0 +1,71 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** acle_f64_tied: -+** ( -+** facge p0\.d, p0/z, z1\.d, z0\.d -+** | -+** facle p0\.d, p0/z, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (acle_f64_tied, svfloat64_t, -+ p0 = svacle_f64 (p0, z0, z1), -+ p0 = svacle (p0, z0, z1)) -+ -+/* -+** acle_f64_untied: -+** ( -+** facge p0\.d, p1/z, z1\.d, z0\.d -+** | -+** facle p0\.d, p1/z, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (acle_f64_untied, svfloat64_t, -+ p0 = svacle_f64 (p1, z0, z1), -+ p0 = svacle (p1, z0, z1)) -+ -+/* -+** acle_d4_f64: -+** mov (z[0-9]+\.d), d4 -+** ( -+** facge p0\.d, p1/z, \1, z0\.d -+** | -+** facle p0\.d, p1/z, z0\.d, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_ZD (acle_d4_f64, svfloat64_t, float64_t, -+ p0 = svacle_n_f64 (p1, z0, d4), -+ p0 = svacle (p1, z0, d4)) -+ -+/* -+** acle_0_f64: -+** mov (z[0-9]+\.d), #0 -+** ( -+** facge p0\.d, p1/z, \1, z0\.d -+** | -+** facle p0\.d, p1/z, z0\.d, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (acle_0_f64, svfloat64_t, -+ p0 = svacle_n_f64 (p1, z0, 0), -+ p0 = svacle (p1, z0, 0)) -+ -+/* -+** acle_1_f64: -+** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? -+** ( -+** facge p0\.d, p1/z, \1, z0\.d -+** | -+** facle p0\.d, p1/z, z0\.d, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (acle_1_f64, svfloat64_t, -+ p0 = svacle_n_f64 (p1, z0, 1), -+ p0 = svacle (p1, z0, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/aclt_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/aclt_f16.c -new file mode 100644 -index 000000000..e5f5040c7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/aclt_f16.c -@@ -0,0 +1,71 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** aclt_f16_tied: -+** ( -+** facgt p0\.h, p0/z, z1\.h, z0\.h -+** | -+** faclt p0\.h, p0/z, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (aclt_f16_tied, svfloat16_t, -+ p0 = svaclt_f16 (p0, z0, z1), -+ p0 = svaclt (p0, z0, z1)) -+ -+/* -+** aclt_f16_untied: -+** ( -+** facgt p0\.h, p1/z, z1\.h, z0\.h -+** | -+** faclt p0\.h, p1/z, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (aclt_f16_untied, svfloat16_t, -+ p0 = svaclt_f16 (p1, z0, z1), -+ p0 = svaclt (p1, z0, z1)) -+ -+/* -+** aclt_h4_f16: -+** mov (z[0-9]+\.h), h4 -+** ( -+** facgt p0\.h, p1/z, \1, z0\.h -+** | -+** faclt p0\.h, p1/z, z0\.h, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_ZD (aclt_h4_f16, svfloat16_t, float16_t, -+ p0 = svaclt_n_f16 (p1, z0, d4), -+ p0 = svaclt (p1, z0, d4)) -+ -+/* -+** aclt_0_f16: -+** mov (z[0-9]+\.h), #0 -+** ( -+** facgt p0\.h, p1/z, \1, z0\.h -+** | -+** faclt p0\.h, p1/z, z0\.h, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (aclt_0_f16, svfloat16_t, -+ p0 = svaclt_n_f16 (p1, z0, 0), -+ p0 = svaclt (p1, z0, 0)) -+ -+/* -+** aclt_1_f16: -+** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? -+** ( -+** facgt p0\.h, p1/z, \1, z0\.h -+** | -+** faclt p0\.h, p1/z, z0\.h, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (aclt_1_f16, svfloat16_t, -+ p0 = svaclt_n_f16 (p1, z0, 1), -+ p0 = svaclt (p1, z0, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/aclt_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/aclt_f32.c -new file mode 100644 -index 000000000..f40826445 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/aclt_f32.c -@@ -0,0 +1,71 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** aclt_f32_tied: -+** ( -+** facgt p0\.s, p0/z, z1\.s, z0\.s -+** | -+** faclt p0\.s, p0/z, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (aclt_f32_tied, svfloat32_t, -+ p0 = svaclt_f32 (p0, z0, z1), -+ p0 = svaclt (p0, z0, z1)) -+ -+/* -+** aclt_f32_untied: -+** ( -+** facgt p0\.s, p1/z, z1\.s, z0\.s -+** | -+** faclt p0\.s, p1/z, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (aclt_f32_untied, svfloat32_t, -+ p0 = svaclt_f32 (p1, z0, z1), -+ p0 = svaclt (p1, z0, z1)) -+ -+/* -+** aclt_s4_f32: -+** mov (z[0-9]+\.s), s4 -+** ( -+** facgt p0\.s, p1/z, \1, z0\.s -+** | -+** faclt p0\.s, p1/z, z0\.s, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_ZD (aclt_s4_f32, svfloat32_t, float32_t, -+ p0 = svaclt_n_f32 (p1, z0, d4), -+ p0 = svaclt (p1, z0, d4)) -+ -+/* -+** aclt_0_f32: -+** mov (z[0-9]+\.s), #0 -+** ( -+** facgt p0\.s, p1/z, \1, z0\.s -+** | -+** faclt p0\.s, p1/z, z0\.s, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (aclt_0_f32, svfloat32_t, -+ p0 = svaclt_n_f32 (p1, z0, 0), -+ p0 = svaclt (p1, z0, 0)) -+ -+/* -+** aclt_1_f32: -+** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? -+** ( -+** facgt p0\.s, p1/z, \1, z0\.s -+** | -+** faclt p0\.s, p1/z, z0\.s, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (aclt_1_f32, svfloat32_t, -+ p0 = svaclt_n_f32 (p1, z0, 1), -+ p0 = svaclt (p1, z0, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/aclt_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/aclt_f64.c -new file mode 100644 -index 000000000..0170b3307 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/aclt_f64.c -@@ -0,0 +1,71 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** aclt_f64_tied: -+** ( -+** facgt p0\.d, p0/z, z1\.d, z0\.d -+** | -+** faclt p0\.d, p0/z, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (aclt_f64_tied, svfloat64_t, -+ p0 = svaclt_f64 (p0, z0, z1), -+ p0 = svaclt (p0, z0, z1)) -+ -+/* -+** aclt_f64_untied: -+** ( -+** facgt p0\.d, p1/z, z1\.d, z0\.d -+** | -+** faclt p0\.d, p1/z, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (aclt_f64_untied, svfloat64_t, -+ p0 = svaclt_f64 (p1, z0, z1), -+ p0 = svaclt (p1, z0, z1)) -+ -+/* -+** aclt_d4_f64: -+** mov (z[0-9]+\.d), d4 -+** ( -+** facgt p0\.d, p1/z, \1, z0\.d -+** | -+** faclt p0\.d, p1/z, z0\.d, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_ZD (aclt_d4_f64, svfloat64_t, float64_t, -+ p0 = svaclt_n_f64 (p1, z0, d4), -+ p0 = svaclt (p1, z0, d4)) -+ -+/* -+** aclt_0_f64: -+** mov (z[0-9]+\.d), #0 -+** ( -+** facgt p0\.d, p1/z, \1, z0\.d -+** | -+** faclt p0\.d, p1/z, z0\.d, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (aclt_0_f64, svfloat64_t, -+ p0 = svaclt_n_f64 (p1, z0, 0), -+ p0 = svaclt (p1, z0, 0)) -+ -+/* -+** aclt_1_f64: -+** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? -+** ( -+** facgt p0\.d, p1/z, \1, z0\.d -+** | -+** faclt p0\.d, p1/z, z0\.d, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (aclt_1_f64, svfloat64_t, -+ p0 = svaclt_n_f64 (p1, z0, 1), -+ p0 = svaclt (p1, z0, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_f16.c -new file mode 100644 -index 000000000..7228e5dd5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_f16.c -@@ -0,0 +1,577 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** add_f16_m_tied1: -+** fadd z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (add_f16_m_tied1, svfloat16_t, -+ z0 = svadd_f16_m (p0, z0, z1), -+ z0 = svadd_m (p0, z0, z1)) -+ -+/* -+** add_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fadd z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (add_f16_m_tied2, svfloat16_t, -+ z0 = svadd_f16_m (p0, z1, z0), -+ z0 = svadd_m (p0, z1, z0)) -+ -+/* -+** add_f16_m_untied: -+** movprfx z0, z1 -+** fadd z0\.h, p0/m, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (add_f16_m_untied, svfloat16_t, -+ z0 = svadd_f16_m (p0, z1, z2), -+ z0 = svadd_m (p0, z1, z2)) -+ -+/* -+** add_h4_f16_m_tied1: -+** mov (z[0-9]+\.h), h4 -+** fadd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (add_h4_f16_m_tied1, svfloat16_t, __fp16, -+ z0 = svadd_n_f16_m (p0, z0, d4), -+ z0 = svadd_m (p0, z0, d4)) -+ -+/* -+** add_h4_f16_m_untied: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0, z1 -+** fadd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (add_h4_f16_m_untied, svfloat16_t, __fp16, -+ z0 = svadd_n_f16_m (p0, z1, d4), -+ z0 = svadd_m (p0, z1, d4)) -+ -+/* -+** add_1_f16_m_tied1: -+** fadd z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_f16_m_tied1, svfloat16_t, -+ z0 = svadd_n_f16_m (p0, z0, 1), -+ z0 = svadd_m (p0, z0, 1)) -+ -+/* -+** add_1_f16_m_untied: -+** movprfx z0, z1 -+** fadd z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_f16_m_untied, svfloat16_t, -+ z0 = svadd_n_f16_m (p0, z1, 1), -+ z0 = svadd_m (p0, z1, 1)) -+ -+/* -+** add_0p5_f16_m_tied1: -+** fadd z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_0p5_f16_m_tied1, svfloat16_t, -+ z0 = svadd_n_f16_m (p0, z0, 0.5), -+ z0 = svadd_m (p0, z0, 0.5)) -+ -+/* -+** add_0p5_f16_m_untied: -+** movprfx z0, z1 -+** fadd z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_0p5_f16_m_untied, svfloat16_t, -+ z0 = svadd_n_f16_m (p0, z1, 0.5), -+ z0 = svadd_m (p0, z1, 0.5)) -+ -+/* -+** add_m1_f16_m_tied1: -+** fsub z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m1_f16_m_tied1, svfloat16_t, -+ z0 = svadd_n_f16_m (p0, z0, -1), -+ z0 = svadd_m (p0, z0, -1)) -+ -+/* -+** add_m1_f16_m_untied: -+** movprfx z0, z1 -+** fsub z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m1_f16_m_untied, svfloat16_t, -+ z0 = svadd_n_f16_m (p0, z1, -1), -+ z0 = svadd_m (p0, z1, -1)) -+ -+/* -+** add_m0p5_f16_m_tied1: -+** fsub z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m0p5_f16_m_tied1, svfloat16_t, -+ z0 = svadd_n_f16_m (p0, z0, -0.5), -+ z0 = svadd_m (p0, z0, -0.5)) -+ -+/* -+** add_m0p5_f16_m_untied: -+** movprfx z0, z1 -+** fsub z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m0p5_f16_m_untied, svfloat16_t, -+ z0 = svadd_n_f16_m (p0, z1, -0.5), -+ z0 = svadd_m (p0, z1, -0.5)) -+ -+/* -+** add_m2_f16_m: -+** fmov (z[0-9]+\.h), #-2\.0(?:e\+0)? -+** fadd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m2_f16_m, svfloat16_t, -+ z0 = svadd_n_f16_m (p0, z0, -2), -+ z0 = svadd_m (p0, z0, -2)) -+ -+/* -+** add_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fadd z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (add_f16_z_tied1, svfloat16_t, -+ z0 = svadd_f16_z (p0, z0, z1), -+ z0 = svadd_z (p0, z0, z1)) -+ -+/* -+** add_f16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** fadd z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (add_f16_z_tied2, svfloat16_t, -+ z0 = svadd_f16_z (p0, z1, z0), -+ z0 = svadd_z (p0, z1, z0)) -+ -+/* -+** add_f16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fadd z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** fadd z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (add_f16_z_untied, svfloat16_t, -+ z0 = svadd_f16_z (p0, z1, z2), -+ z0 = svadd_z (p0, z1, z2)) -+ -+/* -+** add_h4_f16_z_tied1: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0\.h, p0/z, z0\.h -+** fadd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (add_h4_f16_z_tied1, svfloat16_t, __fp16, -+ z0 = svadd_n_f16_z (p0, z0, d4), -+ z0 = svadd_z (p0, z0, d4)) -+ -+/* -+** add_h4_f16_z_untied: -+** mov (z[0-9]+\.h), h4 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fadd z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** fadd z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (add_h4_f16_z_untied, svfloat16_t, __fp16, -+ z0 = svadd_n_f16_z (p0, z1, d4), -+ z0 = svadd_z (p0, z1, d4)) -+ -+/* -+** add_1_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fadd z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_f16_z_tied1, svfloat16_t, -+ z0 = svadd_n_f16_z (p0, z0, 1), -+ z0 = svadd_z (p0, z0, 1)) -+ -+/* -+** add_1_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** fadd z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_f16_z_untied, svfloat16_t, -+ z0 = svadd_n_f16_z (p0, z1, 1), -+ z0 = svadd_z (p0, z1, 1)) -+ -+/* -+** add_0p5_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fadd z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_0p5_f16_z_tied1, svfloat16_t, -+ z0 = svadd_n_f16_z (p0, z0, 0.5), -+ z0 = svadd_z (p0, z0, 0.5)) -+ -+/* -+** add_0p5_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** fadd z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_0p5_f16_z_untied, svfloat16_t, -+ z0 = svadd_n_f16_z (p0, z1, 0.5), -+ z0 = svadd_z (p0, z1, 0.5)) -+ -+/* -+** add_m1_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fsub z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m1_f16_z_tied1, svfloat16_t, -+ z0 = svadd_n_f16_z (p0, z0, -1), -+ z0 = svadd_z (p0, z0, -1)) -+ -+/* -+** add_m1_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** fsub z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m1_f16_z_untied, svfloat16_t, -+ z0 = svadd_n_f16_z (p0, z1, -1), -+ z0 = svadd_z (p0, z1, -1)) -+ -+/* -+** add_m0p5_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fsub z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m0p5_f16_z_tied1, svfloat16_t, -+ z0 = svadd_n_f16_z (p0, z0, -0.5), -+ z0 = svadd_z (p0, z0, -0.5)) -+ -+/* -+** add_m0p5_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** fsub z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m0p5_f16_z_untied, svfloat16_t, -+ z0 = svadd_n_f16_z (p0, z1, -0.5), -+ z0 = svadd_z (p0, z1, -0.5)) -+ -+/* -+** add_m2_f16_z: -+** fmov (z[0-9]+\.h), #-2\.0(?:e\+0)? -+** movprfx z0\.h, p0/z, z0\.h -+** fadd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m2_f16_z, svfloat16_t, -+ z0 = svadd_n_f16_z (p0, z0, -2), -+ z0 = svadd_z (p0, z0, -2)) -+ -+/* -+** add_f16_x_tied1: -+** fadd z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (add_f16_x_tied1, svfloat16_t, -+ z0 = svadd_f16_x (p0, z0, z1), -+ z0 = svadd_x (p0, z0, z1)) -+ -+/* -+** add_f16_x_tied2: -+** fadd z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (add_f16_x_tied2, svfloat16_t, -+ z0 = svadd_f16_x (p0, z1, z0), -+ z0 = svadd_x (p0, z1, z0)) -+ -+/* -+** add_f16_x_untied: -+** ( -+** movprfx z0, z1 -+** fadd z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0, z2 -+** fadd z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (add_f16_x_untied, svfloat16_t, -+ z0 = svadd_f16_x (p0, z1, z2), -+ z0 = svadd_x (p0, z1, z2)) -+ -+/* -+** add_h4_f16_x_tied1: -+** mov (z[0-9]+\.h), h4 -+** fadd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (add_h4_f16_x_tied1, svfloat16_t, __fp16, -+ z0 = svadd_n_f16_x (p0, z0, d4), -+ z0 = svadd_x (p0, z0, d4)) -+ -+/* -+** add_h4_f16_x_untied: -+** mov z0\.h, h4 -+** fadd z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZD (add_h4_f16_x_untied, svfloat16_t, __fp16, -+ z0 = svadd_n_f16_x (p0, z1, d4), -+ z0 = svadd_x (p0, z1, d4)) -+ -+/* -+** add_1_f16_x_tied1: -+** fadd z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_f16_x_tied1, svfloat16_t, -+ z0 = svadd_n_f16_x (p0, z0, 1), -+ z0 = svadd_x (p0, z0, 1)) -+ -+/* -+** add_1_f16_x_untied: -+** movprfx z0, z1 -+** fadd z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_f16_x_untied, svfloat16_t, -+ z0 = svadd_n_f16_x (p0, z1, 1), -+ z0 = svadd_x (p0, z1, 1)) -+ -+/* -+** add_0p5_f16_x_tied1: -+** fadd z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_0p5_f16_x_tied1, svfloat16_t, -+ z0 = svadd_n_f16_x (p0, z0, 0.5), -+ z0 = svadd_x (p0, z0, 0.5)) -+ -+/* -+** add_0p5_f16_x_untied: -+** movprfx z0, z1 -+** fadd z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_0p5_f16_x_untied, svfloat16_t, -+ z0 = svadd_n_f16_x (p0, z1, 0.5), -+ z0 = svadd_x (p0, z1, 0.5)) -+ -+/* -+** add_m1_f16_x_tied1: -+** fsub z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m1_f16_x_tied1, svfloat16_t, -+ z0 = svadd_n_f16_x (p0, z0, -1), -+ z0 = svadd_x (p0, z0, -1)) -+ -+/* -+** add_m1_f16_x_untied: -+** movprfx z0, z1 -+** fsub z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m1_f16_x_untied, svfloat16_t, -+ z0 = svadd_n_f16_x (p0, z1, -1), -+ z0 = svadd_x (p0, z1, -1)) -+ -+/* -+** add_m0p5_f16_x_tied1: -+** fsub z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m0p5_f16_x_tied1, svfloat16_t, -+ z0 = svadd_n_f16_x (p0, z0, -0.5), -+ z0 = svadd_x (p0, z0, -0.5)) -+ -+/* -+** add_m0p5_f16_x_untied: -+** movprfx z0, z1 -+** fsub z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m0p5_f16_x_untied, svfloat16_t, -+ z0 = svadd_n_f16_x (p0, z1, -0.5), -+ z0 = svadd_x (p0, z1, -0.5)) -+ -+/* -+** add_2_f16_x_tied1: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** fadd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_2_f16_x_tied1, svfloat16_t, -+ z0 = svadd_n_f16_x (p0, z0, 2), -+ z0 = svadd_x (p0, z0, 2)) -+ -+/* -+** add_2_f16_x_untied: -+** fmov z0\.h, #2\.0(?:e\+0)? -+** fadd z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (add_2_f16_x_untied, svfloat16_t, -+ z0 = svadd_n_f16_x (p0, z1, 2), -+ z0 = svadd_x (p0, z1, 2)) -+ -+/* -+** ptrue_add_f16_x_tied1: -+** fadd z0\.h, (z0\.h, z1\.h|z1\.h, z0\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_f16_x_tied1, svfloat16_t, -+ z0 = svadd_f16_x (svptrue_b16 (), z0, z1), -+ z0 = svadd_x (svptrue_b16 (), z0, z1)) -+ -+/* -+** ptrue_add_f16_x_tied2: -+** fadd z0\.h, (z0\.h, z1\.h|z1\.h, z0\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_f16_x_tied2, svfloat16_t, -+ z0 = svadd_f16_x (svptrue_b16 (), z1, z0), -+ z0 = svadd_x (svptrue_b16 (), z1, z0)) -+ -+/* -+** ptrue_add_f16_x_untied: -+** fadd z0\.h, (z1\.h, z2\.h|z2\.h, z1\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_f16_x_untied, svfloat16_t, -+ z0 = svadd_f16_x (svptrue_b16 (), z1, z2), -+ z0 = svadd_x (svptrue_b16 (), z1, z2)) -+ -+/* -+** ptrue_add_1_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_1_f16_x_tied1, svfloat16_t, -+ z0 = svadd_n_f16_x (svptrue_b16 (), z0, 1), -+ z0 = svadd_x (svptrue_b16 (), z0, 1)) -+ -+/* -+** ptrue_add_1_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_1_f16_x_untied, svfloat16_t, -+ z0 = svadd_n_f16_x (svptrue_b16 (), z1, 1), -+ z0 = svadd_x (svptrue_b16 (), z1, 1)) -+ -+/* -+** ptrue_add_0p5_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_0p5_f16_x_tied1, svfloat16_t, -+ z0 = svadd_n_f16_x (svptrue_b16 (), z0, 0.5), -+ z0 = svadd_x (svptrue_b16 (), z0, 0.5)) -+ -+/* -+** ptrue_add_0p5_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_0p5_f16_x_untied, svfloat16_t, -+ z0 = svadd_n_f16_x (svptrue_b16 (), z1, 0.5), -+ z0 = svadd_x (svptrue_b16 (), z1, 0.5)) -+ -+/* -+** ptrue_add_m1_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_m1_f16_x_tied1, svfloat16_t, -+ z0 = svadd_n_f16_x (svptrue_b16 (), z0, -1), -+ z0 = svadd_x (svptrue_b16 (), z0, -1)) -+ -+/* -+** ptrue_add_m1_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_m1_f16_x_untied, svfloat16_t, -+ z0 = svadd_n_f16_x (svptrue_b16 (), z1, -1), -+ z0 = svadd_x (svptrue_b16 (), z1, -1)) -+ -+/* -+** ptrue_add_m0p5_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_m0p5_f16_x_tied1, svfloat16_t, -+ z0 = svadd_n_f16_x (svptrue_b16 (), z0, -0.5), -+ z0 = svadd_x (svptrue_b16 (), z0, -0.5)) -+ -+/* -+** ptrue_add_m0p5_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_m0p5_f16_x_untied, svfloat16_t, -+ z0 = svadd_n_f16_x (svptrue_b16 (), z1, -0.5), -+ z0 = svadd_x (svptrue_b16 (), z1, -0.5)) -+ -+/* -+** ptrue_add_2_f16_x_tied1: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** fadd z0\.h, (z0\.h, \1|\1, z0\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_2_f16_x_tied1, svfloat16_t, -+ z0 = svadd_n_f16_x (svptrue_b16 (), z0, 2), -+ z0 = svadd_x (svptrue_b16 (), z0, 2)) -+ -+/* -+** ptrue_add_2_f16_x_untied: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** fadd z0\.h, (z1\.h, \1|\1, z1\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_2_f16_x_untied, svfloat16_t, -+ z0 = svadd_n_f16_x (svptrue_b16 (), z1, 2), -+ z0 = svadd_x (svptrue_b16 (), z1, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_f16_notrap.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_f16_notrap.c -new file mode 100644 -index 000000000..f6330acee ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_f16_notrap.c -@@ -0,0 +1,572 @@ -+/* { dg-additional-options "-fno-trapping-math" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** add_f16_m_tied1: -+** fadd z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (add_f16_m_tied1, svfloat16_t, -+ z0 = svadd_f16_m (p0, z0, z1), -+ z0 = svadd_m (p0, z0, z1)) -+ -+/* -+** add_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fadd z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (add_f16_m_tied2, svfloat16_t, -+ z0 = svadd_f16_m (p0, z1, z0), -+ z0 = svadd_m (p0, z1, z0)) -+ -+/* -+** add_f16_m_untied: -+** movprfx z0, z1 -+** fadd z0\.h, p0/m, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (add_f16_m_untied, svfloat16_t, -+ z0 = svadd_f16_m (p0, z1, z2), -+ z0 = svadd_m (p0, z1, z2)) -+ -+/* -+** add_h4_f16_m_tied1: -+** mov (z[0-9]+\.h), h4 -+** fadd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (add_h4_f16_m_tied1, svfloat16_t, __fp16, -+ z0 = svadd_n_f16_m (p0, z0, d4), -+ z0 = svadd_m (p0, z0, d4)) -+ -+/* -+** add_h4_f16_m_untied: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0, z1 -+** fadd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (add_h4_f16_m_untied, svfloat16_t, __fp16, -+ z0 = svadd_n_f16_m (p0, z1, d4), -+ z0 = svadd_m (p0, z1, d4)) -+ -+/* -+** add_1_f16_m_tied1: -+** fadd z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_f16_m_tied1, svfloat16_t, -+ z0 = svadd_n_f16_m (p0, z0, 1), -+ z0 = svadd_m (p0, z0, 1)) -+ -+/* -+** add_1_f16_m_untied: -+** movprfx z0, z1 -+** fadd z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_f16_m_untied, svfloat16_t, -+ z0 = svadd_n_f16_m (p0, z1, 1), -+ z0 = svadd_m (p0, z1, 1)) -+ -+/* -+** add_0p5_f16_m_tied1: -+** fadd z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_0p5_f16_m_tied1, svfloat16_t, -+ z0 = svadd_n_f16_m (p0, z0, 0.5), -+ z0 = svadd_m (p0, z0, 0.5)) -+ -+/* -+** add_0p5_f16_m_untied: -+** movprfx z0, z1 -+** fadd z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_0p5_f16_m_untied, svfloat16_t, -+ z0 = svadd_n_f16_m (p0, z1, 0.5), -+ z0 = svadd_m (p0, z1, 0.5)) -+ -+/* -+** add_m1_f16_m_tied1: -+** fsub z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m1_f16_m_tied1, svfloat16_t, -+ z0 = svadd_n_f16_m (p0, z0, -1), -+ z0 = svadd_m (p0, z0, -1)) -+ -+/* -+** add_m1_f16_m_untied: -+** movprfx z0, z1 -+** fsub z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m1_f16_m_untied, svfloat16_t, -+ z0 = svadd_n_f16_m (p0, z1, -1), -+ z0 = svadd_m (p0, z1, -1)) -+ -+/* -+** add_m0p5_f16_m_tied1: -+** fsub z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m0p5_f16_m_tied1, svfloat16_t, -+ z0 = svadd_n_f16_m (p0, z0, -0.5), -+ z0 = svadd_m (p0, z0, -0.5)) -+ -+/* -+** add_m0p5_f16_m_untied: -+** movprfx z0, z1 -+** fsub z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m0p5_f16_m_untied, svfloat16_t, -+ z0 = svadd_n_f16_m (p0, z1, -0.5), -+ z0 = svadd_m (p0, z1, -0.5)) -+ -+/* -+** add_m2_f16_m: -+** fmov (z[0-9]+\.h), #-2\.0(?:e\+0)? -+** fadd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m2_f16_m, svfloat16_t, -+ z0 = svadd_n_f16_m (p0, z0, -2), -+ z0 = svadd_m (p0, z0, -2)) -+ -+/* -+** add_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fadd z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (add_f16_z_tied1, svfloat16_t, -+ z0 = svadd_f16_z (p0, z0, z1), -+ z0 = svadd_z (p0, z0, z1)) -+ -+/* -+** add_f16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** fadd z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (add_f16_z_tied2, svfloat16_t, -+ z0 = svadd_f16_z (p0, z1, z0), -+ z0 = svadd_z (p0, z1, z0)) -+ -+/* -+** add_f16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fadd z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** fadd z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (add_f16_z_untied, svfloat16_t, -+ z0 = svadd_f16_z (p0, z1, z2), -+ z0 = svadd_z (p0, z1, z2)) -+ -+/* -+** add_h4_f16_z_tied1: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0\.h, p0/z, z0\.h -+** fadd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (add_h4_f16_z_tied1, svfloat16_t, __fp16, -+ z0 = svadd_n_f16_z (p0, z0, d4), -+ z0 = svadd_z (p0, z0, d4)) -+ -+/* -+** add_h4_f16_z_untied: -+** mov (z[0-9]+\.h), h4 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fadd z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** fadd z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (add_h4_f16_z_untied, svfloat16_t, __fp16, -+ z0 = svadd_n_f16_z (p0, z1, d4), -+ z0 = svadd_z (p0, z1, d4)) -+ -+/* -+** add_1_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fadd z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_f16_z_tied1, svfloat16_t, -+ z0 = svadd_n_f16_z (p0, z0, 1), -+ z0 = svadd_z (p0, z0, 1)) -+ -+/* -+** add_1_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** fadd z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_f16_z_untied, svfloat16_t, -+ z0 = svadd_n_f16_z (p0, z1, 1), -+ z0 = svadd_z (p0, z1, 1)) -+ -+/* -+** add_0p5_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fadd z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_0p5_f16_z_tied1, svfloat16_t, -+ z0 = svadd_n_f16_z (p0, z0, 0.5), -+ z0 = svadd_z (p0, z0, 0.5)) -+ -+/* -+** add_0p5_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** fadd z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_0p5_f16_z_untied, svfloat16_t, -+ z0 = svadd_n_f16_z (p0, z1, 0.5), -+ z0 = svadd_z (p0, z1, 0.5)) -+ -+/* -+** add_m1_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fsub z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m1_f16_z_tied1, svfloat16_t, -+ z0 = svadd_n_f16_z (p0, z0, -1), -+ z0 = svadd_z (p0, z0, -1)) -+ -+/* -+** add_m1_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** fsub z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m1_f16_z_untied, svfloat16_t, -+ z0 = svadd_n_f16_z (p0, z1, -1), -+ z0 = svadd_z (p0, z1, -1)) -+ -+/* -+** add_m0p5_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fsub z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m0p5_f16_z_tied1, svfloat16_t, -+ z0 = svadd_n_f16_z (p0, z0, -0.5), -+ z0 = svadd_z (p0, z0, -0.5)) -+ -+/* -+** add_m0p5_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** fsub z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m0p5_f16_z_untied, svfloat16_t, -+ z0 = svadd_n_f16_z (p0, z1, -0.5), -+ z0 = svadd_z (p0, z1, -0.5)) -+ -+/* -+** add_m2_f16_z: -+** fmov (z[0-9]+\.h), #-2\.0(?:e\+0)? -+** movprfx z0\.h, p0/z, z0\.h -+** fadd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m2_f16_z, svfloat16_t, -+ z0 = svadd_n_f16_z (p0, z0, -2), -+ z0 = svadd_z (p0, z0, -2)) -+ -+/* -+** add_f16_x_tied1: -+** fadd z0\.h, (z0\.h, z1\.h|z1\.h, z0\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (add_f16_x_tied1, svfloat16_t, -+ z0 = svadd_f16_x (p0, z0, z1), -+ z0 = svadd_x (p0, z0, z1)) -+ -+/* -+** add_f16_x_tied2: -+** fadd z0\.h, (z0\.h, z1\.h|z1\.h, z0\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (add_f16_x_tied2, svfloat16_t, -+ z0 = svadd_f16_x (p0, z1, z0), -+ z0 = svadd_x (p0, z1, z0)) -+ -+/* -+** add_f16_x_untied: -+** fadd z0\.h, (z1\.h, z2\.h|z2\.h, z1\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (add_f16_x_untied, svfloat16_t, -+ z0 = svadd_f16_x (p0, z1, z2), -+ z0 = svadd_x (p0, z1, z2)) -+ -+/* -+** add_h4_f16_x_tied1: -+** mov (z[0-9]+\.h), h4 -+** fadd z0\.h, (z0\.h, \1|\1, z0\.h) -+** ret -+*/ -+TEST_UNIFORM_ZD (add_h4_f16_x_tied1, svfloat16_t, __fp16, -+ z0 = svadd_n_f16_x (p0, z0, d4), -+ z0 = svadd_x (p0, z0, d4)) -+ -+/* -+** add_h4_f16_x_untied: -+** mov (z[0-9]+\.h), h4 -+** fadd z0\.h, (z1\.h, \1|\1, z1\.h) -+** ret -+*/ -+TEST_UNIFORM_ZD (add_h4_f16_x_untied, svfloat16_t, __fp16, -+ z0 = svadd_n_f16_x (p0, z1, d4), -+ z0 = svadd_x (p0, z1, d4)) -+ -+/* -+** add_1_f16_x_tied1: -+** fadd z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_f16_x_tied1, svfloat16_t, -+ z0 = svadd_n_f16_x (p0, z0, 1), -+ z0 = svadd_x (p0, z0, 1)) -+ -+/* -+** add_1_f16_x_untied: -+** movprfx z0, z1 -+** fadd z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_f16_x_untied, svfloat16_t, -+ z0 = svadd_n_f16_x (p0, z1, 1), -+ z0 = svadd_x (p0, z1, 1)) -+ -+/* -+** add_0p5_f16_x_tied1: -+** fadd z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_0p5_f16_x_tied1, svfloat16_t, -+ z0 = svadd_n_f16_x (p0, z0, 0.5), -+ z0 = svadd_x (p0, z0, 0.5)) -+ -+/* -+** add_0p5_f16_x_untied: -+** movprfx z0, z1 -+** fadd z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_0p5_f16_x_untied, svfloat16_t, -+ z0 = svadd_n_f16_x (p0, z1, 0.5), -+ z0 = svadd_x (p0, z1, 0.5)) -+ -+/* -+** add_m1_f16_x_tied1: -+** fsub z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m1_f16_x_tied1, svfloat16_t, -+ z0 = svadd_n_f16_x (p0, z0, -1), -+ z0 = svadd_x (p0, z0, -1)) -+ -+/* -+** add_m1_f16_x_untied: -+** movprfx z0, z1 -+** fsub z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m1_f16_x_untied, svfloat16_t, -+ z0 = svadd_n_f16_x (p0, z1, -1), -+ z0 = svadd_x (p0, z1, -1)) -+ -+/* -+** add_m0p5_f16_x_tied1: -+** fsub z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m0p5_f16_x_tied1, svfloat16_t, -+ z0 = svadd_n_f16_x (p0, z0, -0.5), -+ z0 = svadd_x (p0, z0, -0.5)) -+ -+/* -+** add_m0p5_f16_x_untied: -+** movprfx z0, z1 -+** fsub z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m0p5_f16_x_untied, svfloat16_t, -+ z0 = svadd_n_f16_x (p0, z1, -0.5), -+ z0 = svadd_x (p0, z1, -0.5)) -+ -+/* -+** add_2_f16_x_tied1: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** fadd z0\.h, (z0\.h, \1|\1, z0\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (add_2_f16_x_tied1, svfloat16_t, -+ z0 = svadd_n_f16_x (p0, z0, 2), -+ z0 = svadd_x (p0, z0, 2)) -+ -+/* -+** add_2_f16_x_untied: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** fadd z0\.h, (z1\.h, \1|\1, z1\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (add_2_f16_x_untied, svfloat16_t, -+ z0 = svadd_n_f16_x (p0, z1, 2), -+ z0 = svadd_x (p0, z1, 2)) -+ -+/* -+** ptrue_add_f16_x_tied1: -+** fadd z0\.h, (z0\.h, z1\.h|z1\.h, z0\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_f16_x_tied1, svfloat16_t, -+ z0 = svadd_f16_x (svptrue_b16 (), z0, z1), -+ z0 = svadd_x (svptrue_b16 (), z0, z1)) -+ -+/* -+** ptrue_add_f16_x_tied2: -+** fadd z0\.h, (z0\.h, z1\.h|z1\.h, z0\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_f16_x_tied2, svfloat16_t, -+ z0 = svadd_f16_x (svptrue_b16 (), z1, z0), -+ z0 = svadd_x (svptrue_b16 (), z1, z0)) -+ -+/* -+** ptrue_add_f16_x_untied: -+** fadd z0\.h, (z1\.h, z2\.h|z2\.h, z1\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_f16_x_untied, svfloat16_t, -+ z0 = svadd_f16_x (svptrue_b16 (), z1, z2), -+ z0 = svadd_x (svptrue_b16 (), z1, z2)) -+ -+/* -+** ptrue_add_1_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_1_f16_x_tied1, svfloat16_t, -+ z0 = svadd_n_f16_x (svptrue_b16 (), z0, 1), -+ z0 = svadd_x (svptrue_b16 (), z0, 1)) -+ -+/* -+** ptrue_add_1_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_1_f16_x_untied, svfloat16_t, -+ z0 = svadd_n_f16_x (svptrue_b16 (), z1, 1), -+ z0 = svadd_x (svptrue_b16 (), z1, 1)) -+ -+/* -+** ptrue_add_0p5_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_0p5_f16_x_tied1, svfloat16_t, -+ z0 = svadd_n_f16_x (svptrue_b16 (), z0, 0.5), -+ z0 = svadd_x (svptrue_b16 (), z0, 0.5)) -+ -+/* -+** ptrue_add_0p5_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_0p5_f16_x_untied, svfloat16_t, -+ z0 = svadd_n_f16_x (svptrue_b16 (), z1, 0.5), -+ z0 = svadd_x (svptrue_b16 (), z1, 0.5)) -+ -+/* -+** ptrue_add_m1_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_m1_f16_x_tied1, svfloat16_t, -+ z0 = svadd_n_f16_x (svptrue_b16 (), z0, -1), -+ z0 = svadd_x (svptrue_b16 (), z0, -1)) -+ -+/* -+** ptrue_add_m1_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_m1_f16_x_untied, svfloat16_t, -+ z0 = svadd_n_f16_x (svptrue_b16 (), z1, -1), -+ z0 = svadd_x (svptrue_b16 (), z1, -1)) -+ -+/* -+** ptrue_add_m0p5_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_m0p5_f16_x_tied1, svfloat16_t, -+ z0 = svadd_n_f16_x (svptrue_b16 (), z0, -0.5), -+ z0 = svadd_x (svptrue_b16 (), z0, -0.5)) -+ -+/* -+** ptrue_add_m0p5_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_m0p5_f16_x_untied, svfloat16_t, -+ z0 = svadd_n_f16_x (svptrue_b16 (), z1, -0.5), -+ z0 = svadd_x (svptrue_b16 (), z1, -0.5)) -+ -+/* -+** ptrue_add_2_f16_x_tied1: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** fadd z0\.h, (z0\.h, \1|\1, z0\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_2_f16_x_tied1, svfloat16_t, -+ z0 = svadd_n_f16_x (svptrue_b16 (), z0, 2), -+ z0 = svadd_x (svptrue_b16 (), z0, 2)) -+ -+/* -+** ptrue_add_2_f16_x_untied: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** fadd z0\.h, (z1\.h, \1|\1, z1\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_2_f16_x_untied, svfloat16_t, -+ z0 = svadd_n_f16_x (svptrue_b16 (), z1, 2), -+ z0 = svadd_x (svptrue_b16 (), z1, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_f32.c -new file mode 100644 -index 000000000..b5f4e9623 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_f32.c -@@ -0,0 +1,577 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** add_f32_m_tied1: -+** fadd z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (add_f32_m_tied1, svfloat32_t, -+ z0 = svadd_f32_m (p0, z0, z1), -+ z0 = svadd_m (p0, z0, z1)) -+ -+/* -+** add_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fadd z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (add_f32_m_tied2, svfloat32_t, -+ z0 = svadd_f32_m (p0, z1, z0), -+ z0 = svadd_m (p0, z1, z0)) -+ -+/* -+** add_f32_m_untied: -+** movprfx z0, z1 -+** fadd z0\.s, p0/m, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (add_f32_m_untied, svfloat32_t, -+ z0 = svadd_f32_m (p0, z1, z2), -+ z0 = svadd_m (p0, z1, z2)) -+ -+/* -+** add_s4_f32_m_tied1: -+** mov (z[0-9]+\.s), s4 -+** fadd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (add_s4_f32_m_tied1, svfloat32_t, float, -+ z0 = svadd_n_f32_m (p0, z0, d4), -+ z0 = svadd_m (p0, z0, d4)) -+ -+/* -+** add_s4_f32_m_untied: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0, z1 -+** fadd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (add_s4_f32_m_untied, svfloat32_t, float, -+ z0 = svadd_n_f32_m (p0, z1, d4), -+ z0 = svadd_m (p0, z1, d4)) -+ -+/* -+** add_1_f32_m_tied1: -+** fadd z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_f32_m_tied1, svfloat32_t, -+ z0 = svadd_n_f32_m (p0, z0, 1), -+ z0 = svadd_m (p0, z0, 1)) -+ -+/* -+** add_1_f32_m_untied: -+** movprfx z0, z1 -+** fadd z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_f32_m_untied, svfloat32_t, -+ z0 = svadd_n_f32_m (p0, z1, 1), -+ z0 = svadd_m (p0, z1, 1)) -+ -+/* -+** add_0p5_f32_m_tied1: -+** fadd z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_0p5_f32_m_tied1, svfloat32_t, -+ z0 = svadd_n_f32_m (p0, z0, 0.5), -+ z0 = svadd_m (p0, z0, 0.5)) -+ -+/* -+** add_0p5_f32_m_untied: -+** movprfx z0, z1 -+** fadd z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_0p5_f32_m_untied, svfloat32_t, -+ z0 = svadd_n_f32_m (p0, z1, 0.5), -+ z0 = svadd_m (p0, z1, 0.5)) -+ -+/* -+** add_m1_f32_m_tied1: -+** fsub z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m1_f32_m_tied1, svfloat32_t, -+ z0 = svadd_n_f32_m (p0, z0, -1), -+ z0 = svadd_m (p0, z0, -1)) -+ -+/* -+** add_m1_f32_m_untied: -+** movprfx z0, z1 -+** fsub z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m1_f32_m_untied, svfloat32_t, -+ z0 = svadd_n_f32_m (p0, z1, -1), -+ z0 = svadd_m (p0, z1, -1)) -+ -+/* -+** add_m0p5_f32_m_tied1: -+** fsub z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m0p5_f32_m_tied1, svfloat32_t, -+ z0 = svadd_n_f32_m (p0, z0, -0.5), -+ z0 = svadd_m (p0, z0, -0.5)) -+ -+/* -+** add_m0p5_f32_m_untied: -+** movprfx z0, z1 -+** fsub z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m0p5_f32_m_untied, svfloat32_t, -+ z0 = svadd_n_f32_m (p0, z1, -0.5), -+ z0 = svadd_m (p0, z1, -0.5)) -+ -+/* -+** add_m2_f32_m: -+** fmov (z[0-9]+\.s), #-2\.0(?:e\+0)? -+** fadd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m2_f32_m, svfloat32_t, -+ z0 = svadd_n_f32_m (p0, z0, -2), -+ z0 = svadd_m (p0, z0, -2)) -+ -+/* -+** add_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fadd z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (add_f32_z_tied1, svfloat32_t, -+ z0 = svadd_f32_z (p0, z0, z1), -+ z0 = svadd_z (p0, z0, z1)) -+ -+/* -+** add_f32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** fadd z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (add_f32_z_tied2, svfloat32_t, -+ z0 = svadd_f32_z (p0, z1, z0), -+ z0 = svadd_z (p0, z1, z0)) -+ -+/* -+** add_f32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fadd z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** fadd z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (add_f32_z_untied, svfloat32_t, -+ z0 = svadd_f32_z (p0, z1, z2), -+ z0 = svadd_z (p0, z1, z2)) -+ -+/* -+** add_s4_f32_z_tied1: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0\.s, p0/z, z0\.s -+** fadd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (add_s4_f32_z_tied1, svfloat32_t, float, -+ z0 = svadd_n_f32_z (p0, z0, d4), -+ z0 = svadd_z (p0, z0, d4)) -+ -+/* -+** add_s4_f32_z_untied: -+** mov (z[0-9]+\.s), s4 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fadd z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** fadd z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (add_s4_f32_z_untied, svfloat32_t, float, -+ z0 = svadd_n_f32_z (p0, z1, d4), -+ z0 = svadd_z (p0, z1, d4)) -+ -+/* -+** add_1_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fadd z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_f32_z_tied1, svfloat32_t, -+ z0 = svadd_n_f32_z (p0, z0, 1), -+ z0 = svadd_z (p0, z0, 1)) -+ -+/* -+** add_1_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** fadd z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_f32_z_untied, svfloat32_t, -+ z0 = svadd_n_f32_z (p0, z1, 1), -+ z0 = svadd_z (p0, z1, 1)) -+ -+/* -+** add_0p5_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fadd z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_0p5_f32_z_tied1, svfloat32_t, -+ z0 = svadd_n_f32_z (p0, z0, 0.5), -+ z0 = svadd_z (p0, z0, 0.5)) -+ -+/* -+** add_0p5_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** fadd z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_0p5_f32_z_untied, svfloat32_t, -+ z0 = svadd_n_f32_z (p0, z1, 0.5), -+ z0 = svadd_z (p0, z1, 0.5)) -+ -+/* -+** add_m1_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fsub z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m1_f32_z_tied1, svfloat32_t, -+ z0 = svadd_n_f32_z (p0, z0, -1), -+ z0 = svadd_z (p0, z0, -1)) -+ -+/* -+** add_m1_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** fsub z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m1_f32_z_untied, svfloat32_t, -+ z0 = svadd_n_f32_z (p0, z1, -1), -+ z0 = svadd_z (p0, z1, -1)) -+ -+/* -+** add_m0p5_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fsub z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m0p5_f32_z_tied1, svfloat32_t, -+ z0 = svadd_n_f32_z (p0, z0, -0.5), -+ z0 = svadd_z (p0, z0, -0.5)) -+ -+/* -+** add_m0p5_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** fsub z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m0p5_f32_z_untied, svfloat32_t, -+ z0 = svadd_n_f32_z (p0, z1, -0.5), -+ z0 = svadd_z (p0, z1, -0.5)) -+ -+/* -+** add_m2_f32_z: -+** fmov (z[0-9]+\.s), #-2\.0(?:e\+0)? -+** movprfx z0\.s, p0/z, z0\.s -+** fadd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m2_f32_z, svfloat32_t, -+ z0 = svadd_n_f32_z (p0, z0, -2), -+ z0 = svadd_z (p0, z0, -2)) -+ -+/* -+** add_f32_x_tied1: -+** fadd z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (add_f32_x_tied1, svfloat32_t, -+ z0 = svadd_f32_x (p0, z0, z1), -+ z0 = svadd_x (p0, z0, z1)) -+ -+/* -+** add_f32_x_tied2: -+** fadd z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (add_f32_x_tied2, svfloat32_t, -+ z0 = svadd_f32_x (p0, z1, z0), -+ z0 = svadd_x (p0, z1, z0)) -+ -+/* -+** add_f32_x_untied: -+** ( -+** movprfx z0, z1 -+** fadd z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0, z2 -+** fadd z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (add_f32_x_untied, svfloat32_t, -+ z0 = svadd_f32_x (p0, z1, z2), -+ z0 = svadd_x (p0, z1, z2)) -+ -+/* -+** add_s4_f32_x_tied1: -+** mov (z[0-9]+\.s), s4 -+** fadd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (add_s4_f32_x_tied1, svfloat32_t, float, -+ z0 = svadd_n_f32_x (p0, z0, d4), -+ z0 = svadd_x (p0, z0, d4)) -+ -+/* -+** add_s4_f32_x_untied: -+** mov z0\.s, s4 -+** fadd z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZD (add_s4_f32_x_untied, svfloat32_t, float, -+ z0 = svadd_n_f32_x (p0, z1, d4), -+ z0 = svadd_x (p0, z1, d4)) -+ -+/* -+** add_1_f32_x_tied1: -+** fadd z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_f32_x_tied1, svfloat32_t, -+ z0 = svadd_n_f32_x (p0, z0, 1), -+ z0 = svadd_x (p0, z0, 1)) -+ -+/* -+** add_1_f32_x_untied: -+** movprfx z0, z1 -+** fadd z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_f32_x_untied, svfloat32_t, -+ z0 = svadd_n_f32_x (p0, z1, 1), -+ z0 = svadd_x (p0, z1, 1)) -+ -+/* -+** add_0p5_f32_x_tied1: -+** fadd z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_0p5_f32_x_tied1, svfloat32_t, -+ z0 = svadd_n_f32_x (p0, z0, 0.5), -+ z0 = svadd_x (p0, z0, 0.5)) -+ -+/* -+** add_0p5_f32_x_untied: -+** movprfx z0, z1 -+** fadd z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_0p5_f32_x_untied, svfloat32_t, -+ z0 = svadd_n_f32_x (p0, z1, 0.5), -+ z0 = svadd_x (p0, z1, 0.5)) -+ -+/* -+** add_m1_f32_x_tied1: -+** fsub z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m1_f32_x_tied1, svfloat32_t, -+ z0 = svadd_n_f32_x (p0, z0, -1), -+ z0 = svadd_x (p0, z0, -1)) -+ -+/* -+** add_m1_f32_x_untied: -+** movprfx z0, z1 -+** fsub z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m1_f32_x_untied, svfloat32_t, -+ z0 = svadd_n_f32_x (p0, z1, -1), -+ z0 = svadd_x (p0, z1, -1)) -+ -+/* -+** add_m0p5_f32_x_tied1: -+** fsub z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m0p5_f32_x_tied1, svfloat32_t, -+ z0 = svadd_n_f32_x (p0, z0, -0.5), -+ z0 = svadd_x (p0, z0, -0.5)) -+ -+/* -+** add_m0p5_f32_x_untied: -+** movprfx z0, z1 -+** fsub z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m0p5_f32_x_untied, svfloat32_t, -+ z0 = svadd_n_f32_x (p0, z1, -0.5), -+ z0 = svadd_x (p0, z1, -0.5)) -+ -+/* -+** add_2_f32_x_tied1: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** fadd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_2_f32_x_tied1, svfloat32_t, -+ z0 = svadd_n_f32_x (p0, z0, 2), -+ z0 = svadd_x (p0, z0, 2)) -+ -+/* -+** add_2_f32_x_untied: -+** fmov z0\.s, #2\.0(?:e\+0)? -+** fadd z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (add_2_f32_x_untied, svfloat32_t, -+ z0 = svadd_n_f32_x (p0, z1, 2), -+ z0 = svadd_x (p0, z1, 2)) -+ -+/* -+** ptrue_add_f32_x_tied1: -+** fadd z0\.s, (z0\.s, z1\.s|z1\.s, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_f32_x_tied1, svfloat32_t, -+ z0 = svadd_f32_x (svptrue_b32 (), z0, z1), -+ z0 = svadd_x (svptrue_b32 (), z0, z1)) -+ -+/* -+** ptrue_add_f32_x_tied2: -+** fadd z0\.s, (z0\.s, z1\.s|z1\.s, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_f32_x_tied2, svfloat32_t, -+ z0 = svadd_f32_x (svptrue_b32 (), z1, z0), -+ z0 = svadd_x (svptrue_b32 (), z1, z0)) -+ -+/* -+** ptrue_add_f32_x_untied: -+** fadd z0\.s, (z1\.s, z2\.s|z2\.s, z1\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_f32_x_untied, svfloat32_t, -+ z0 = svadd_f32_x (svptrue_b32 (), z1, z2), -+ z0 = svadd_x (svptrue_b32 (), z1, z2)) -+ -+/* -+** ptrue_add_1_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_1_f32_x_tied1, svfloat32_t, -+ z0 = svadd_n_f32_x (svptrue_b32 (), z0, 1), -+ z0 = svadd_x (svptrue_b32 (), z0, 1)) -+ -+/* -+** ptrue_add_1_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_1_f32_x_untied, svfloat32_t, -+ z0 = svadd_n_f32_x (svptrue_b32 (), z1, 1), -+ z0 = svadd_x (svptrue_b32 (), z1, 1)) -+ -+/* -+** ptrue_add_0p5_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_0p5_f32_x_tied1, svfloat32_t, -+ z0 = svadd_n_f32_x (svptrue_b32 (), z0, 0.5), -+ z0 = svadd_x (svptrue_b32 (), z0, 0.5)) -+ -+/* -+** ptrue_add_0p5_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_0p5_f32_x_untied, svfloat32_t, -+ z0 = svadd_n_f32_x (svptrue_b32 (), z1, 0.5), -+ z0 = svadd_x (svptrue_b32 (), z1, 0.5)) -+ -+/* -+** ptrue_add_m1_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_m1_f32_x_tied1, svfloat32_t, -+ z0 = svadd_n_f32_x (svptrue_b32 (), z0, -1), -+ z0 = svadd_x (svptrue_b32 (), z0, -1)) -+ -+/* -+** ptrue_add_m1_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_m1_f32_x_untied, svfloat32_t, -+ z0 = svadd_n_f32_x (svptrue_b32 (), z1, -1), -+ z0 = svadd_x (svptrue_b32 (), z1, -1)) -+ -+/* -+** ptrue_add_m0p5_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_m0p5_f32_x_tied1, svfloat32_t, -+ z0 = svadd_n_f32_x (svptrue_b32 (), z0, -0.5), -+ z0 = svadd_x (svptrue_b32 (), z0, -0.5)) -+ -+/* -+** ptrue_add_m0p5_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_m0p5_f32_x_untied, svfloat32_t, -+ z0 = svadd_n_f32_x (svptrue_b32 (), z1, -0.5), -+ z0 = svadd_x (svptrue_b32 (), z1, -0.5)) -+ -+/* -+** ptrue_add_2_f32_x_tied1: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** fadd z0\.s, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_2_f32_x_tied1, svfloat32_t, -+ z0 = svadd_n_f32_x (svptrue_b32 (), z0, 2), -+ z0 = svadd_x (svptrue_b32 (), z0, 2)) -+ -+/* -+** ptrue_add_2_f32_x_untied: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** fadd z0\.s, (z1\.s, \1|\1, z1\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_2_f32_x_untied, svfloat32_t, -+ z0 = svadd_n_f32_x (svptrue_b32 (), z1, 2), -+ z0 = svadd_x (svptrue_b32 (), z1, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_f32_notrap.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_f32_notrap.c -new file mode 100644 -index 000000000..062e5fd67 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_f32_notrap.c -@@ -0,0 +1,572 @@ -+/* { dg-additional-options "-fno-trapping-math" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** add_f32_m_tied1: -+** fadd z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (add_f32_m_tied1, svfloat32_t, -+ z0 = svadd_f32_m (p0, z0, z1), -+ z0 = svadd_m (p0, z0, z1)) -+ -+/* -+** add_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fadd z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (add_f32_m_tied2, svfloat32_t, -+ z0 = svadd_f32_m (p0, z1, z0), -+ z0 = svadd_m (p0, z1, z0)) -+ -+/* -+** add_f32_m_untied: -+** movprfx z0, z1 -+** fadd z0\.s, p0/m, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (add_f32_m_untied, svfloat32_t, -+ z0 = svadd_f32_m (p0, z1, z2), -+ z0 = svadd_m (p0, z1, z2)) -+ -+/* -+** add_s4_f32_m_tied1: -+** mov (z[0-9]+\.s), s4 -+** fadd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (add_s4_f32_m_tied1, svfloat32_t, float, -+ z0 = svadd_n_f32_m (p0, z0, d4), -+ z0 = svadd_m (p0, z0, d4)) -+ -+/* -+** add_s4_f32_m_untied: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0, z1 -+** fadd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (add_s4_f32_m_untied, svfloat32_t, float, -+ z0 = svadd_n_f32_m (p0, z1, d4), -+ z0 = svadd_m (p0, z1, d4)) -+ -+/* -+** add_1_f32_m_tied1: -+** fadd z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_f32_m_tied1, svfloat32_t, -+ z0 = svadd_n_f32_m (p0, z0, 1), -+ z0 = svadd_m (p0, z0, 1)) -+ -+/* -+** add_1_f32_m_untied: -+** movprfx z0, z1 -+** fadd z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_f32_m_untied, svfloat32_t, -+ z0 = svadd_n_f32_m (p0, z1, 1), -+ z0 = svadd_m (p0, z1, 1)) -+ -+/* -+** add_0p5_f32_m_tied1: -+** fadd z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_0p5_f32_m_tied1, svfloat32_t, -+ z0 = svadd_n_f32_m (p0, z0, 0.5), -+ z0 = svadd_m (p0, z0, 0.5)) -+ -+/* -+** add_0p5_f32_m_untied: -+** movprfx z0, z1 -+** fadd z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_0p5_f32_m_untied, svfloat32_t, -+ z0 = svadd_n_f32_m (p0, z1, 0.5), -+ z0 = svadd_m (p0, z1, 0.5)) -+ -+/* -+** add_m1_f32_m_tied1: -+** fsub z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m1_f32_m_tied1, svfloat32_t, -+ z0 = svadd_n_f32_m (p0, z0, -1), -+ z0 = svadd_m (p0, z0, -1)) -+ -+/* -+** add_m1_f32_m_untied: -+** movprfx z0, z1 -+** fsub z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m1_f32_m_untied, svfloat32_t, -+ z0 = svadd_n_f32_m (p0, z1, -1), -+ z0 = svadd_m (p0, z1, -1)) -+ -+/* -+** add_m0p5_f32_m_tied1: -+** fsub z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m0p5_f32_m_tied1, svfloat32_t, -+ z0 = svadd_n_f32_m (p0, z0, -0.5), -+ z0 = svadd_m (p0, z0, -0.5)) -+ -+/* -+** add_m0p5_f32_m_untied: -+** movprfx z0, z1 -+** fsub z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m0p5_f32_m_untied, svfloat32_t, -+ z0 = svadd_n_f32_m (p0, z1, -0.5), -+ z0 = svadd_m (p0, z1, -0.5)) -+ -+/* -+** add_m2_f32_m: -+** fmov (z[0-9]+\.s), #-2\.0(?:e\+0)? -+** fadd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m2_f32_m, svfloat32_t, -+ z0 = svadd_n_f32_m (p0, z0, -2), -+ z0 = svadd_m (p0, z0, -2)) -+ -+/* -+** add_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fadd z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (add_f32_z_tied1, svfloat32_t, -+ z0 = svadd_f32_z (p0, z0, z1), -+ z0 = svadd_z (p0, z0, z1)) -+ -+/* -+** add_f32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** fadd z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (add_f32_z_tied2, svfloat32_t, -+ z0 = svadd_f32_z (p0, z1, z0), -+ z0 = svadd_z (p0, z1, z0)) -+ -+/* -+** add_f32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fadd z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** fadd z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (add_f32_z_untied, svfloat32_t, -+ z0 = svadd_f32_z (p0, z1, z2), -+ z0 = svadd_z (p0, z1, z2)) -+ -+/* -+** add_s4_f32_z_tied1: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0\.s, p0/z, z0\.s -+** fadd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (add_s4_f32_z_tied1, svfloat32_t, float, -+ z0 = svadd_n_f32_z (p0, z0, d4), -+ z0 = svadd_z (p0, z0, d4)) -+ -+/* -+** add_s4_f32_z_untied: -+** mov (z[0-9]+\.s), s4 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fadd z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** fadd z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (add_s4_f32_z_untied, svfloat32_t, float, -+ z0 = svadd_n_f32_z (p0, z1, d4), -+ z0 = svadd_z (p0, z1, d4)) -+ -+/* -+** add_1_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fadd z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_f32_z_tied1, svfloat32_t, -+ z0 = svadd_n_f32_z (p0, z0, 1), -+ z0 = svadd_z (p0, z0, 1)) -+ -+/* -+** add_1_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** fadd z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_f32_z_untied, svfloat32_t, -+ z0 = svadd_n_f32_z (p0, z1, 1), -+ z0 = svadd_z (p0, z1, 1)) -+ -+/* -+** add_0p5_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fadd z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_0p5_f32_z_tied1, svfloat32_t, -+ z0 = svadd_n_f32_z (p0, z0, 0.5), -+ z0 = svadd_z (p0, z0, 0.5)) -+ -+/* -+** add_0p5_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** fadd z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_0p5_f32_z_untied, svfloat32_t, -+ z0 = svadd_n_f32_z (p0, z1, 0.5), -+ z0 = svadd_z (p0, z1, 0.5)) -+ -+/* -+** add_m1_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fsub z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m1_f32_z_tied1, svfloat32_t, -+ z0 = svadd_n_f32_z (p0, z0, -1), -+ z0 = svadd_z (p0, z0, -1)) -+ -+/* -+** add_m1_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** fsub z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m1_f32_z_untied, svfloat32_t, -+ z0 = svadd_n_f32_z (p0, z1, -1), -+ z0 = svadd_z (p0, z1, -1)) -+ -+/* -+** add_m0p5_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fsub z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m0p5_f32_z_tied1, svfloat32_t, -+ z0 = svadd_n_f32_z (p0, z0, -0.5), -+ z0 = svadd_z (p0, z0, -0.5)) -+ -+/* -+** add_m0p5_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** fsub z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m0p5_f32_z_untied, svfloat32_t, -+ z0 = svadd_n_f32_z (p0, z1, -0.5), -+ z0 = svadd_z (p0, z1, -0.5)) -+ -+/* -+** add_m2_f32_z: -+** fmov (z[0-9]+\.s), #-2\.0(?:e\+0)? -+** movprfx z0\.s, p0/z, z0\.s -+** fadd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m2_f32_z, svfloat32_t, -+ z0 = svadd_n_f32_z (p0, z0, -2), -+ z0 = svadd_z (p0, z0, -2)) -+ -+/* -+** add_f32_x_tied1: -+** fadd z0\.s, (z0\.s, z1\.s|z1\.s, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (add_f32_x_tied1, svfloat32_t, -+ z0 = svadd_f32_x (p0, z0, z1), -+ z0 = svadd_x (p0, z0, z1)) -+ -+/* -+** add_f32_x_tied2: -+** fadd z0\.s, (z0\.s, z1\.s|z1\.s, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (add_f32_x_tied2, svfloat32_t, -+ z0 = svadd_f32_x (p0, z1, z0), -+ z0 = svadd_x (p0, z1, z0)) -+ -+/* -+** add_f32_x_untied: -+** fadd z0\.s, (z1\.s, z2\.s|z2\.s, z1\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (add_f32_x_untied, svfloat32_t, -+ z0 = svadd_f32_x (p0, z1, z2), -+ z0 = svadd_x (p0, z1, z2)) -+ -+/* -+** add_s4_f32_x_tied1: -+** mov (z[0-9]+\.s), s4 -+** fadd z0\.s, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_ZD (add_s4_f32_x_tied1, svfloat32_t, float, -+ z0 = svadd_n_f32_x (p0, z0, d4), -+ z0 = svadd_x (p0, z0, d4)) -+ -+/* -+** add_s4_f32_x_untied: -+** mov (z[0-9]+\.s), s4 -+** fadd z0\.s, (z1\.s, \1|\1, z1\.s) -+** ret -+*/ -+TEST_UNIFORM_ZD (add_s4_f32_x_untied, svfloat32_t, float, -+ z0 = svadd_n_f32_x (p0, z1, d4), -+ z0 = svadd_x (p0, z1, d4)) -+ -+/* -+** add_1_f32_x_tied1: -+** fadd z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_f32_x_tied1, svfloat32_t, -+ z0 = svadd_n_f32_x (p0, z0, 1), -+ z0 = svadd_x (p0, z0, 1)) -+ -+/* -+** add_1_f32_x_untied: -+** movprfx z0, z1 -+** fadd z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_f32_x_untied, svfloat32_t, -+ z0 = svadd_n_f32_x (p0, z1, 1), -+ z0 = svadd_x (p0, z1, 1)) -+ -+/* -+** add_0p5_f32_x_tied1: -+** fadd z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_0p5_f32_x_tied1, svfloat32_t, -+ z0 = svadd_n_f32_x (p0, z0, 0.5), -+ z0 = svadd_x (p0, z0, 0.5)) -+ -+/* -+** add_0p5_f32_x_untied: -+** movprfx z0, z1 -+** fadd z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_0p5_f32_x_untied, svfloat32_t, -+ z0 = svadd_n_f32_x (p0, z1, 0.5), -+ z0 = svadd_x (p0, z1, 0.5)) -+ -+/* -+** add_m1_f32_x_tied1: -+** fsub z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m1_f32_x_tied1, svfloat32_t, -+ z0 = svadd_n_f32_x (p0, z0, -1), -+ z0 = svadd_x (p0, z0, -1)) -+ -+/* -+** add_m1_f32_x_untied: -+** movprfx z0, z1 -+** fsub z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m1_f32_x_untied, svfloat32_t, -+ z0 = svadd_n_f32_x (p0, z1, -1), -+ z0 = svadd_x (p0, z1, -1)) -+ -+/* -+** add_m0p5_f32_x_tied1: -+** fsub z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m0p5_f32_x_tied1, svfloat32_t, -+ z0 = svadd_n_f32_x (p0, z0, -0.5), -+ z0 = svadd_x (p0, z0, -0.5)) -+ -+/* -+** add_m0p5_f32_x_untied: -+** movprfx z0, z1 -+** fsub z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m0p5_f32_x_untied, svfloat32_t, -+ z0 = svadd_n_f32_x (p0, z1, -0.5), -+ z0 = svadd_x (p0, z1, -0.5)) -+ -+/* -+** add_2_f32_x_tied1: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** fadd z0\.s, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (add_2_f32_x_tied1, svfloat32_t, -+ z0 = svadd_n_f32_x (p0, z0, 2), -+ z0 = svadd_x (p0, z0, 2)) -+ -+/* -+** add_2_f32_x_untied: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** fadd z0\.s, (z1\.s, \1|\1, z1\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (add_2_f32_x_untied, svfloat32_t, -+ z0 = svadd_n_f32_x (p0, z1, 2), -+ z0 = svadd_x (p0, z1, 2)) -+ -+/* -+** ptrue_add_f32_x_tied1: -+** fadd z0\.s, (z0\.s, z1\.s|z1\.s, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_f32_x_tied1, svfloat32_t, -+ z0 = svadd_f32_x (svptrue_b32 (), z0, z1), -+ z0 = svadd_x (svptrue_b32 (), z0, z1)) -+ -+/* -+** ptrue_add_f32_x_tied2: -+** fadd z0\.s, (z0\.s, z1\.s|z1\.s, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_f32_x_tied2, svfloat32_t, -+ z0 = svadd_f32_x (svptrue_b32 (), z1, z0), -+ z0 = svadd_x (svptrue_b32 (), z1, z0)) -+ -+/* -+** ptrue_add_f32_x_untied: -+** fadd z0\.s, (z1\.s, z2\.s|z2\.s, z1\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_f32_x_untied, svfloat32_t, -+ z0 = svadd_f32_x (svptrue_b32 (), z1, z2), -+ z0 = svadd_x (svptrue_b32 (), z1, z2)) -+ -+/* -+** ptrue_add_1_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_1_f32_x_tied1, svfloat32_t, -+ z0 = svadd_n_f32_x (svptrue_b32 (), z0, 1), -+ z0 = svadd_x (svptrue_b32 (), z0, 1)) -+ -+/* -+** ptrue_add_1_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_1_f32_x_untied, svfloat32_t, -+ z0 = svadd_n_f32_x (svptrue_b32 (), z1, 1), -+ z0 = svadd_x (svptrue_b32 (), z1, 1)) -+ -+/* -+** ptrue_add_0p5_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_0p5_f32_x_tied1, svfloat32_t, -+ z0 = svadd_n_f32_x (svptrue_b32 (), z0, 0.5), -+ z0 = svadd_x (svptrue_b32 (), z0, 0.5)) -+ -+/* -+** ptrue_add_0p5_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_0p5_f32_x_untied, svfloat32_t, -+ z0 = svadd_n_f32_x (svptrue_b32 (), z1, 0.5), -+ z0 = svadd_x (svptrue_b32 (), z1, 0.5)) -+ -+/* -+** ptrue_add_m1_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_m1_f32_x_tied1, svfloat32_t, -+ z0 = svadd_n_f32_x (svptrue_b32 (), z0, -1), -+ z0 = svadd_x (svptrue_b32 (), z0, -1)) -+ -+/* -+** ptrue_add_m1_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_m1_f32_x_untied, svfloat32_t, -+ z0 = svadd_n_f32_x (svptrue_b32 (), z1, -1), -+ z0 = svadd_x (svptrue_b32 (), z1, -1)) -+ -+/* -+** ptrue_add_m0p5_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_m0p5_f32_x_tied1, svfloat32_t, -+ z0 = svadd_n_f32_x (svptrue_b32 (), z0, -0.5), -+ z0 = svadd_x (svptrue_b32 (), z0, -0.5)) -+ -+/* -+** ptrue_add_m0p5_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_m0p5_f32_x_untied, svfloat32_t, -+ z0 = svadd_n_f32_x (svptrue_b32 (), z1, -0.5), -+ z0 = svadd_x (svptrue_b32 (), z1, -0.5)) -+ -+/* -+** ptrue_add_2_f32_x_tied1: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** fadd z0\.s, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_2_f32_x_tied1, svfloat32_t, -+ z0 = svadd_n_f32_x (svptrue_b32 (), z0, 2), -+ z0 = svadd_x (svptrue_b32 (), z0, 2)) -+ -+/* -+** ptrue_add_2_f32_x_untied: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** fadd z0\.s, (z1\.s, \1|\1, z1\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_2_f32_x_untied, svfloat32_t, -+ z0 = svadd_n_f32_x (svptrue_b32 (), z1, 2), -+ z0 = svadd_x (svptrue_b32 (), z1, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_f64.c -new file mode 100644 -index 000000000..7185f3acf ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_f64.c -@@ -0,0 +1,577 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** add_f64_m_tied1: -+** fadd z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (add_f64_m_tied1, svfloat64_t, -+ z0 = svadd_f64_m (p0, z0, z1), -+ z0 = svadd_m (p0, z0, z1)) -+ -+/* -+** add_f64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fadd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_f64_m_tied2, svfloat64_t, -+ z0 = svadd_f64_m (p0, z1, z0), -+ z0 = svadd_m (p0, z1, z0)) -+ -+/* -+** add_f64_m_untied: -+** movprfx z0, z1 -+** fadd z0\.d, p0/m, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (add_f64_m_untied, svfloat64_t, -+ z0 = svadd_f64_m (p0, z1, z2), -+ z0 = svadd_m (p0, z1, z2)) -+ -+/* -+** add_d4_f64_m_tied1: -+** mov (z[0-9]+\.d), d4 -+** fadd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (add_d4_f64_m_tied1, svfloat64_t, double, -+ z0 = svadd_n_f64_m (p0, z0, d4), -+ z0 = svadd_m (p0, z0, d4)) -+ -+/* -+** add_d4_f64_m_untied: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0, z1 -+** fadd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (add_d4_f64_m_untied, svfloat64_t, double, -+ z0 = svadd_n_f64_m (p0, z1, d4), -+ z0 = svadd_m (p0, z1, d4)) -+ -+/* -+** add_1_f64_m_tied1: -+** fadd z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_f64_m_tied1, svfloat64_t, -+ z0 = svadd_n_f64_m (p0, z0, 1), -+ z0 = svadd_m (p0, z0, 1)) -+ -+/* -+** add_1_f64_m_untied: -+** movprfx z0, z1 -+** fadd z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_f64_m_untied, svfloat64_t, -+ z0 = svadd_n_f64_m (p0, z1, 1), -+ z0 = svadd_m (p0, z1, 1)) -+ -+/* -+** add_0p5_f64_m_tied1: -+** fadd z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_0p5_f64_m_tied1, svfloat64_t, -+ z0 = svadd_n_f64_m (p0, z0, 0.5), -+ z0 = svadd_m (p0, z0, 0.5)) -+ -+/* -+** add_0p5_f64_m_untied: -+** movprfx z0, z1 -+** fadd z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_0p5_f64_m_untied, svfloat64_t, -+ z0 = svadd_n_f64_m (p0, z1, 0.5), -+ z0 = svadd_m (p0, z1, 0.5)) -+ -+/* -+** add_m1_f64_m_tied1: -+** fsub z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m1_f64_m_tied1, svfloat64_t, -+ z0 = svadd_n_f64_m (p0, z0, -1), -+ z0 = svadd_m (p0, z0, -1)) -+ -+/* -+** add_m1_f64_m_untied: -+** movprfx z0, z1 -+** fsub z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m1_f64_m_untied, svfloat64_t, -+ z0 = svadd_n_f64_m (p0, z1, -1), -+ z0 = svadd_m (p0, z1, -1)) -+ -+/* -+** add_m0p5_f64_m_tied1: -+** fsub z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m0p5_f64_m_tied1, svfloat64_t, -+ z0 = svadd_n_f64_m (p0, z0, -0.5), -+ z0 = svadd_m (p0, z0, -0.5)) -+ -+/* -+** add_m0p5_f64_m_untied: -+** movprfx z0, z1 -+** fsub z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m0p5_f64_m_untied, svfloat64_t, -+ z0 = svadd_n_f64_m (p0, z1, -0.5), -+ z0 = svadd_m (p0, z1, -0.5)) -+ -+/* -+** add_m2_f64_m: -+** fmov (z[0-9]+\.d), #-2\.0(?:e\+0)? -+** fadd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m2_f64_m, svfloat64_t, -+ z0 = svadd_n_f64_m (p0, z0, -2), -+ z0 = svadd_m (p0, z0, -2)) -+ -+/* -+** add_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fadd z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (add_f64_z_tied1, svfloat64_t, -+ z0 = svadd_f64_z (p0, z0, z1), -+ z0 = svadd_z (p0, z0, z1)) -+ -+/* -+** add_f64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** fadd z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (add_f64_z_tied2, svfloat64_t, -+ z0 = svadd_f64_z (p0, z1, z0), -+ z0 = svadd_z (p0, z1, z0)) -+ -+/* -+** add_f64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fadd z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** fadd z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (add_f64_z_untied, svfloat64_t, -+ z0 = svadd_f64_z (p0, z1, z2), -+ z0 = svadd_z (p0, z1, z2)) -+ -+/* -+** add_d4_f64_z_tied1: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0\.d, p0/z, z0\.d -+** fadd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (add_d4_f64_z_tied1, svfloat64_t, double, -+ z0 = svadd_n_f64_z (p0, z0, d4), -+ z0 = svadd_z (p0, z0, d4)) -+ -+/* -+** add_d4_f64_z_untied: -+** mov (z[0-9]+\.d), d4 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fadd z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** fadd z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (add_d4_f64_z_untied, svfloat64_t, double, -+ z0 = svadd_n_f64_z (p0, z1, d4), -+ z0 = svadd_z (p0, z1, d4)) -+ -+/* -+** add_1_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fadd z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_f64_z_tied1, svfloat64_t, -+ z0 = svadd_n_f64_z (p0, z0, 1), -+ z0 = svadd_z (p0, z0, 1)) -+ -+/* -+** add_1_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** fadd z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_f64_z_untied, svfloat64_t, -+ z0 = svadd_n_f64_z (p0, z1, 1), -+ z0 = svadd_z (p0, z1, 1)) -+ -+/* -+** add_0p5_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fadd z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_0p5_f64_z_tied1, svfloat64_t, -+ z0 = svadd_n_f64_z (p0, z0, 0.5), -+ z0 = svadd_z (p0, z0, 0.5)) -+ -+/* -+** add_0p5_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** fadd z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_0p5_f64_z_untied, svfloat64_t, -+ z0 = svadd_n_f64_z (p0, z1, 0.5), -+ z0 = svadd_z (p0, z1, 0.5)) -+ -+/* -+** add_m1_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fsub z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m1_f64_z_tied1, svfloat64_t, -+ z0 = svadd_n_f64_z (p0, z0, -1), -+ z0 = svadd_z (p0, z0, -1)) -+ -+/* -+** add_m1_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** fsub z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m1_f64_z_untied, svfloat64_t, -+ z0 = svadd_n_f64_z (p0, z1, -1), -+ z0 = svadd_z (p0, z1, -1)) -+ -+/* -+** add_m0p5_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fsub z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m0p5_f64_z_tied1, svfloat64_t, -+ z0 = svadd_n_f64_z (p0, z0, -0.5), -+ z0 = svadd_z (p0, z0, -0.5)) -+ -+/* -+** add_m0p5_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** fsub z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m0p5_f64_z_untied, svfloat64_t, -+ z0 = svadd_n_f64_z (p0, z1, -0.5), -+ z0 = svadd_z (p0, z1, -0.5)) -+ -+/* -+** add_m2_f64_z: -+** fmov (z[0-9]+\.d), #-2\.0(?:e\+0)? -+** movprfx z0\.d, p0/z, z0\.d -+** fadd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m2_f64_z, svfloat64_t, -+ z0 = svadd_n_f64_z (p0, z0, -2), -+ z0 = svadd_z (p0, z0, -2)) -+ -+/* -+** add_f64_x_tied1: -+** fadd z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (add_f64_x_tied1, svfloat64_t, -+ z0 = svadd_f64_x (p0, z0, z1), -+ z0 = svadd_x (p0, z0, z1)) -+ -+/* -+** add_f64_x_tied2: -+** fadd z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (add_f64_x_tied2, svfloat64_t, -+ z0 = svadd_f64_x (p0, z1, z0), -+ z0 = svadd_x (p0, z1, z0)) -+ -+/* -+** add_f64_x_untied: -+** ( -+** movprfx z0, z1 -+** fadd z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0, z2 -+** fadd z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (add_f64_x_untied, svfloat64_t, -+ z0 = svadd_f64_x (p0, z1, z2), -+ z0 = svadd_x (p0, z1, z2)) -+ -+/* -+** add_d4_f64_x_tied1: -+** mov (z[0-9]+\.d), d4 -+** fadd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (add_d4_f64_x_tied1, svfloat64_t, double, -+ z0 = svadd_n_f64_x (p0, z0, d4), -+ z0 = svadd_x (p0, z0, d4)) -+ -+/* -+** add_d4_f64_x_untied: -+** mov z0\.d, d4 -+** fadd z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZD (add_d4_f64_x_untied, svfloat64_t, double, -+ z0 = svadd_n_f64_x (p0, z1, d4), -+ z0 = svadd_x (p0, z1, d4)) -+ -+/* -+** add_1_f64_x_tied1: -+** fadd z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_f64_x_tied1, svfloat64_t, -+ z0 = svadd_n_f64_x (p0, z0, 1), -+ z0 = svadd_x (p0, z0, 1)) -+ -+/* -+** add_1_f64_x_untied: -+** movprfx z0, z1 -+** fadd z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_f64_x_untied, svfloat64_t, -+ z0 = svadd_n_f64_x (p0, z1, 1), -+ z0 = svadd_x (p0, z1, 1)) -+ -+/* -+** add_0p5_f64_x_tied1: -+** fadd z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_0p5_f64_x_tied1, svfloat64_t, -+ z0 = svadd_n_f64_x (p0, z0, 0.5), -+ z0 = svadd_x (p0, z0, 0.5)) -+ -+/* -+** add_0p5_f64_x_untied: -+** movprfx z0, z1 -+** fadd z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_0p5_f64_x_untied, svfloat64_t, -+ z0 = svadd_n_f64_x (p0, z1, 0.5), -+ z0 = svadd_x (p0, z1, 0.5)) -+ -+/* -+** add_m1_f64_x_tied1: -+** fsub z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m1_f64_x_tied1, svfloat64_t, -+ z0 = svadd_n_f64_x (p0, z0, -1), -+ z0 = svadd_x (p0, z0, -1)) -+ -+/* -+** add_m1_f64_x_untied: -+** movprfx z0, z1 -+** fsub z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m1_f64_x_untied, svfloat64_t, -+ z0 = svadd_n_f64_x (p0, z1, -1), -+ z0 = svadd_x (p0, z1, -1)) -+ -+/* -+** add_m0p5_f64_x_tied1: -+** fsub z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m0p5_f64_x_tied1, svfloat64_t, -+ z0 = svadd_n_f64_x (p0, z0, -0.5), -+ z0 = svadd_x (p0, z0, -0.5)) -+ -+/* -+** add_m0p5_f64_x_untied: -+** movprfx z0, z1 -+** fsub z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m0p5_f64_x_untied, svfloat64_t, -+ z0 = svadd_n_f64_x (p0, z1, -0.5), -+ z0 = svadd_x (p0, z1, -0.5)) -+ -+/* -+** add_2_f64_x_tied1: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** fadd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_2_f64_x_tied1, svfloat64_t, -+ z0 = svadd_n_f64_x (p0, z0, 2), -+ z0 = svadd_x (p0, z0, 2)) -+ -+/* -+** add_2_f64_x_untied: -+** fmov z0\.d, #2\.0(?:e\+0)? -+** fadd z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (add_2_f64_x_untied, svfloat64_t, -+ z0 = svadd_n_f64_x (p0, z1, 2), -+ z0 = svadd_x (p0, z1, 2)) -+ -+/* -+** ptrue_add_f64_x_tied1: -+** fadd z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_f64_x_tied1, svfloat64_t, -+ z0 = svadd_f64_x (svptrue_b64 (), z0, z1), -+ z0 = svadd_x (svptrue_b64 (), z0, z1)) -+ -+/* -+** ptrue_add_f64_x_tied2: -+** fadd z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_f64_x_tied2, svfloat64_t, -+ z0 = svadd_f64_x (svptrue_b64 (), z1, z0), -+ z0 = svadd_x (svptrue_b64 (), z1, z0)) -+ -+/* -+** ptrue_add_f64_x_untied: -+** fadd z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_f64_x_untied, svfloat64_t, -+ z0 = svadd_f64_x (svptrue_b64 (), z1, z2), -+ z0 = svadd_x (svptrue_b64 (), z1, z2)) -+ -+/* -+** ptrue_add_1_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_1_f64_x_tied1, svfloat64_t, -+ z0 = svadd_n_f64_x (svptrue_b64 (), z0, 1), -+ z0 = svadd_x (svptrue_b64 (), z0, 1)) -+ -+/* -+** ptrue_add_1_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_1_f64_x_untied, svfloat64_t, -+ z0 = svadd_n_f64_x (svptrue_b64 (), z1, 1), -+ z0 = svadd_x (svptrue_b64 (), z1, 1)) -+ -+/* -+** ptrue_add_0p5_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_0p5_f64_x_tied1, svfloat64_t, -+ z0 = svadd_n_f64_x (svptrue_b64 (), z0, 0.5), -+ z0 = svadd_x (svptrue_b64 (), z0, 0.5)) -+ -+/* -+** ptrue_add_0p5_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_0p5_f64_x_untied, svfloat64_t, -+ z0 = svadd_n_f64_x (svptrue_b64 (), z1, 0.5), -+ z0 = svadd_x (svptrue_b64 (), z1, 0.5)) -+ -+/* -+** ptrue_add_m1_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_m1_f64_x_tied1, svfloat64_t, -+ z0 = svadd_n_f64_x (svptrue_b64 (), z0, -1), -+ z0 = svadd_x (svptrue_b64 (), z0, -1)) -+ -+/* -+** ptrue_add_m1_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_m1_f64_x_untied, svfloat64_t, -+ z0 = svadd_n_f64_x (svptrue_b64 (), z1, -1), -+ z0 = svadd_x (svptrue_b64 (), z1, -1)) -+ -+/* -+** ptrue_add_m0p5_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_m0p5_f64_x_tied1, svfloat64_t, -+ z0 = svadd_n_f64_x (svptrue_b64 (), z0, -0.5), -+ z0 = svadd_x (svptrue_b64 (), z0, -0.5)) -+ -+/* -+** ptrue_add_m0p5_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_m0p5_f64_x_untied, svfloat64_t, -+ z0 = svadd_n_f64_x (svptrue_b64 (), z1, -0.5), -+ z0 = svadd_x (svptrue_b64 (), z1, -0.5)) -+ -+/* -+** ptrue_add_2_f64_x_tied1: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** fadd z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_2_f64_x_tied1, svfloat64_t, -+ z0 = svadd_n_f64_x (svptrue_b64 (), z0, 2), -+ z0 = svadd_x (svptrue_b64 (), z0, 2)) -+ -+/* -+** ptrue_add_2_f64_x_untied: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** fadd z0\.d, (z1\.d, \1|\1, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_2_f64_x_untied, svfloat64_t, -+ z0 = svadd_n_f64_x (svptrue_b64 (), z1, 2), -+ z0 = svadd_x (svptrue_b64 (), z1, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_f64_notrap.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_f64_notrap.c -new file mode 100644 -index 000000000..6d095b507 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_f64_notrap.c -@@ -0,0 +1,572 @@ -+/* { dg-additional-options "-fno-trapping-math" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** add_f64_m_tied1: -+** fadd z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (add_f64_m_tied1, svfloat64_t, -+ z0 = svadd_f64_m (p0, z0, z1), -+ z0 = svadd_m (p0, z0, z1)) -+ -+/* -+** add_f64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fadd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_f64_m_tied2, svfloat64_t, -+ z0 = svadd_f64_m (p0, z1, z0), -+ z0 = svadd_m (p0, z1, z0)) -+ -+/* -+** add_f64_m_untied: -+** movprfx z0, z1 -+** fadd z0\.d, p0/m, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (add_f64_m_untied, svfloat64_t, -+ z0 = svadd_f64_m (p0, z1, z2), -+ z0 = svadd_m (p0, z1, z2)) -+ -+/* -+** add_d4_f64_m_tied1: -+** mov (z[0-9]+\.d), d4 -+** fadd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (add_d4_f64_m_tied1, svfloat64_t, double, -+ z0 = svadd_n_f64_m (p0, z0, d4), -+ z0 = svadd_m (p0, z0, d4)) -+ -+/* -+** add_d4_f64_m_untied: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0, z1 -+** fadd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (add_d4_f64_m_untied, svfloat64_t, double, -+ z0 = svadd_n_f64_m (p0, z1, d4), -+ z0 = svadd_m (p0, z1, d4)) -+ -+/* -+** add_1_f64_m_tied1: -+** fadd z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_f64_m_tied1, svfloat64_t, -+ z0 = svadd_n_f64_m (p0, z0, 1), -+ z0 = svadd_m (p0, z0, 1)) -+ -+/* -+** add_1_f64_m_untied: -+** movprfx z0, z1 -+** fadd z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_f64_m_untied, svfloat64_t, -+ z0 = svadd_n_f64_m (p0, z1, 1), -+ z0 = svadd_m (p0, z1, 1)) -+ -+/* -+** add_0p5_f64_m_tied1: -+** fadd z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_0p5_f64_m_tied1, svfloat64_t, -+ z0 = svadd_n_f64_m (p0, z0, 0.5), -+ z0 = svadd_m (p0, z0, 0.5)) -+ -+/* -+** add_0p5_f64_m_untied: -+** movprfx z0, z1 -+** fadd z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_0p5_f64_m_untied, svfloat64_t, -+ z0 = svadd_n_f64_m (p0, z1, 0.5), -+ z0 = svadd_m (p0, z1, 0.5)) -+ -+/* -+** add_m1_f64_m_tied1: -+** fsub z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m1_f64_m_tied1, svfloat64_t, -+ z0 = svadd_n_f64_m (p0, z0, -1), -+ z0 = svadd_m (p0, z0, -1)) -+ -+/* -+** add_m1_f64_m_untied: -+** movprfx z0, z1 -+** fsub z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m1_f64_m_untied, svfloat64_t, -+ z0 = svadd_n_f64_m (p0, z1, -1), -+ z0 = svadd_m (p0, z1, -1)) -+ -+/* -+** add_m0p5_f64_m_tied1: -+** fsub z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m0p5_f64_m_tied1, svfloat64_t, -+ z0 = svadd_n_f64_m (p0, z0, -0.5), -+ z0 = svadd_m (p0, z0, -0.5)) -+ -+/* -+** add_m0p5_f64_m_untied: -+** movprfx z0, z1 -+** fsub z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m0p5_f64_m_untied, svfloat64_t, -+ z0 = svadd_n_f64_m (p0, z1, -0.5), -+ z0 = svadd_m (p0, z1, -0.5)) -+ -+/* -+** add_m2_f64_m: -+** fmov (z[0-9]+\.d), #-2\.0(?:e\+0)? -+** fadd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m2_f64_m, svfloat64_t, -+ z0 = svadd_n_f64_m (p0, z0, -2), -+ z0 = svadd_m (p0, z0, -2)) -+ -+/* -+** add_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fadd z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (add_f64_z_tied1, svfloat64_t, -+ z0 = svadd_f64_z (p0, z0, z1), -+ z0 = svadd_z (p0, z0, z1)) -+ -+/* -+** add_f64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** fadd z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (add_f64_z_tied2, svfloat64_t, -+ z0 = svadd_f64_z (p0, z1, z0), -+ z0 = svadd_z (p0, z1, z0)) -+ -+/* -+** add_f64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fadd z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** fadd z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (add_f64_z_untied, svfloat64_t, -+ z0 = svadd_f64_z (p0, z1, z2), -+ z0 = svadd_z (p0, z1, z2)) -+ -+/* -+** add_d4_f64_z_tied1: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0\.d, p0/z, z0\.d -+** fadd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (add_d4_f64_z_tied1, svfloat64_t, double, -+ z0 = svadd_n_f64_z (p0, z0, d4), -+ z0 = svadd_z (p0, z0, d4)) -+ -+/* -+** add_d4_f64_z_untied: -+** mov (z[0-9]+\.d), d4 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fadd z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** fadd z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (add_d4_f64_z_untied, svfloat64_t, double, -+ z0 = svadd_n_f64_z (p0, z1, d4), -+ z0 = svadd_z (p0, z1, d4)) -+ -+/* -+** add_1_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fadd z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_f64_z_tied1, svfloat64_t, -+ z0 = svadd_n_f64_z (p0, z0, 1), -+ z0 = svadd_z (p0, z0, 1)) -+ -+/* -+** add_1_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** fadd z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_f64_z_untied, svfloat64_t, -+ z0 = svadd_n_f64_z (p0, z1, 1), -+ z0 = svadd_z (p0, z1, 1)) -+ -+/* -+** add_0p5_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fadd z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_0p5_f64_z_tied1, svfloat64_t, -+ z0 = svadd_n_f64_z (p0, z0, 0.5), -+ z0 = svadd_z (p0, z0, 0.5)) -+ -+/* -+** add_0p5_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** fadd z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_0p5_f64_z_untied, svfloat64_t, -+ z0 = svadd_n_f64_z (p0, z1, 0.5), -+ z0 = svadd_z (p0, z1, 0.5)) -+ -+/* -+** add_m1_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fsub z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m1_f64_z_tied1, svfloat64_t, -+ z0 = svadd_n_f64_z (p0, z0, -1), -+ z0 = svadd_z (p0, z0, -1)) -+ -+/* -+** add_m1_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** fsub z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m1_f64_z_untied, svfloat64_t, -+ z0 = svadd_n_f64_z (p0, z1, -1), -+ z0 = svadd_z (p0, z1, -1)) -+ -+/* -+** add_m0p5_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fsub z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m0p5_f64_z_tied1, svfloat64_t, -+ z0 = svadd_n_f64_z (p0, z0, -0.5), -+ z0 = svadd_z (p0, z0, -0.5)) -+ -+/* -+** add_m0p5_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** fsub z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m0p5_f64_z_untied, svfloat64_t, -+ z0 = svadd_n_f64_z (p0, z1, -0.5), -+ z0 = svadd_z (p0, z1, -0.5)) -+ -+/* -+** add_m2_f64_z: -+** fmov (z[0-9]+\.d), #-2\.0(?:e\+0)? -+** movprfx z0\.d, p0/z, z0\.d -+** fadd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m2_f64_z, svfloat64_t, -+ z0 = svadd_n_f64_z (p0, z0, -2), -+ z0 = svadd_z (p0, z0, -2)) -+ -+/* -+** add_f64_x_tied1: -+** fadd z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (add_f64_x_tied1, svfloat64_t, -+ z0 = svadd_f64_x (p0, z0, z1), -+ z0 = svadd_x (p0, z0, z1)) -+ -+/* -+** add_f64_x_tied2: -+** fadd z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (add_f64_x_tied2, svfloat64_t, -+ z0 = svadd_f64_x (p0, z1, z0), -+ z0 = svadd_x (p0, z1, z0)) -+ -+/* -+** add_f64_x_untied: -+** fadd z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (add_f64_x_untied, svfloat64_t, -+ z0 = svadd_f64_x (p0, z1, z2), -+ z0 = svadd_x (p0, z1, z2)) -+ -+/* -+** add_d4_f64_x_tied1: -+** mov (z[0-9]+\.d), d4 -+** fadd z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_ZD (add_d4_f64_x_tied1, svfloat64_t, double, -+ z0 = svadd_n_f64_x (p0, z0, d4), -+ z0 = svadd_x (p0, z0, d4)) -+ -+/* -+** add_d4_f64_x_untied: -+** mov (z[0-9]+\.d), d4 -+** fadd z0\.d, (z1\.d, \1|\1, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_ZD (add_d4_f64_x_untied, svfloat64_t, double, -+ z0 = svadd_n_f64_x (p0, z1, d4), -+ z0 = svadd_x (p0, z1, d4)) -+ -+/* -+** add_1_f64_x_tied1: -+** fadd z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_f64_x_tied1, svfloat64_t, -+ z0 = svadd_n_f64_x (p0, z0, 1), -+ z0 = svadd_x (p0, z0, 1)) -+ -+/* -+** add_1_f64_x_untied: -+** movprfx z0, z1 -+** fadd z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_f64_x_untied, svfloat64_t, -+ z0 = svadd_n_f64_x (p0, z1, 1), -+ z0 = svadd_x (p0, z1, 1)) -+ -+/* -+** add_0p5_f64_x_tied1: -+** fadd z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_0p5_f64_x_tied1, svfloat64_t, -+ z0 = svadd_n_f64_x (p0, z0, 0.5), -+ z0 = svadd_x (p0, z0, 0.5)) -+ -+/* -+** add_0p5_f64_x_untied: -+** movprfx z0, z1 -+** fadd z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_0p5_f64_x_untied, svfloat64_t, -+ z0 = svadd_n_f64_x (p0, z1, 0.5), -+ z0 = svadd_x (p0, z1, 0.5)) -+ -+/* -+** add_m1_f64_x_tied1: -+** fsub z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m1_f64_x_tied1, svfloat64_t, -+ z0 = svadd_n_f64_x (p0, z0, -1), -+ z0 = svadd_x (p0, z0, -1)) -+ -+/* -+** add_m1_f64_x_untied: -+** movprfx z0, z1 -+** fsub z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m1_f64_x_untied, svfloat64_t, -+ z0 = svadd_n_f64_x (p0, z1, -1), -+ z0 = svadd_x (p0, z1, -1)) -+ -+/* -+** add_m0p5_f64_x_tied1: -+** fsub z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m0p5_f64_x_tied1, svfloat64_t, -+ z0 = svadd_n_f64_x (p0, z0, -0.5), -+ z0 = svadd_x (p0, z0, -0.5)) -+ -+/* -+** add_m0p5_f64_x_untied: -+** movprfx z0, z1 -+** fsub z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m0p5_f64_x_untied, svfloat64_t, -+ z0 = svadd_n_f64_x (p0, z1, -0.5), -+ z0 = svadd_x (p0, z1, -0.5)) -+ -+/* -+** add_2_f64_x_tied1: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** fadd z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (add_2_f64_x_tied1, svfloat64_t, -+ z0 = svadd_n_f64_x (p0, z0, 2), -+ z0 = svadd_x (p0, z0, 2)) -+ -+/* -+** add_2_f64_x_untied: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** fadd z0\.d, (z1\.d, \1|\1, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (add_2_f64_x_untied, svfloat64_t, -+ z0 = svadd_n_f64_x (p0, z1, 2), -+ z0 = svadd_x (p0, z1, 2)) -+ -+/* -+** ptrue_add_f64_x_tied1: -+** fadd z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_f64_x_tied1, svfloat64_t, -+ z0 = svadd_f64_x (svptrue_b64 (), z0, z1), -+ z0 = svadd_x (svptrue_b64 (), z0, z1)) -+ -+/* -+** ptrue_add_f64_x_tied2: -+** fadd z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_f64_x_tied2, svfloat64_t, -+ z0 = svadd_f64_x (svptrue_b64 (), z1, z0), -+ z0 = svadd_x (svptrue_b64 (), z1, z0)) -+ -+/* -+** ptrue_add_f64_x_untied: -+** fadd z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_f64_x_untied, svfloat64_t, -+ z0 = svadd_f64_x (svptrue_b64 (), z1, z2), -+ z0 = svadd_x (svptrue_b64 (), z1, z2)) -+ -+/* -+** ptrue_add_1_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_1_f64_x_tied1, svfloat64_t, -+ z0 = svadd_n_f64_x (svptrue_b64 (), z0, 1), -+ z0 = svadd_x (svptrue_b64 (), z0, 1)) -+ -+/* -+** ptrue_add_1_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_1_f64_x_untied, svfloat64_t, -+ z0 = svadd_n_f64_x (svptrue_b64 (), z1, 1), -+ z0 = svadd_x (svptrue_b64 (), z1, 1)) -+ -+/* -+** ptrue_add_0p5_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_0p5_f64_x_tied1, svfloat64_t, -+ z0 = svadd_n_f64_x (svptrue_b64 (), z0, 0.5), -+ z0 = svadd_x (svptrue_b64 (), z0, 0.5)) -+ -+/* -+** ptrue_add_0p5_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_0p5_f64_x_untied, svfloat64_t, -+ z0 = svadd_n_f64_x (svptrue_b64 (), z1, 0.5), -+ z0 = svadd_x (svptrue_b64 (), z1, 0.5)) -+ -+/* -+** ptrue_add_m1_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_m1_f64_x_tied1, svfloat64_t, -+ z0 = svadd_n_f64_x (svptrue_b64 (), z0, -1), -+ z0 = svadd_x (svptrue_b64 (), z0, -1)) -+ -+/* -+** ptrue_add_m1_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_m1_f64_x_untied, svfloat64_t, -+ z0 = svadd_n_f64_x (svptrue_b64 (), z1, -1), -+ z0 = svadd_x (svptrue_b64 (), z1, -1)) -+ -+/* -+** ptrue_add_m0p5_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_m0p5_f64_x_tied1, svfloat64_t, -+ z0 = svadd_n_f64_x (svptrue_b64 (), z0, -0.5), -+ z0 = svadd_x (svptrue_b64 (), z0, -0.5)) -+ -+/* -+** ptrue_add_m0p5_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_m0p5_f64_x_untied, svfloat64_t, -+ z0 = svadd_n_f64_x (svptrue_b64 (), z1, -0.5), -+ z0 = svadd_x (svptrue_b64 (), z1, -0.5)) -+ -+/* -+** ptrue_add_2_f64_x_tied1: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** fadd z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_2_f64_x_tied1, svfloat64_t, -+ z0 = svadd_n_f64_x (svptrue_b64 (), z0, 2), -+ z0 = svadd_x (svptrue_b64 (), z0, 2)) -+ -+/* -+** ptrue_add_2_f64_x_untied: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** fadd z0\.d, (z1\.d, \1|\1, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_add_2_f64_x_untied, svfloat64_t, -+ z0 = svadd_n_f64_x (svptrue_b64 (), z1, 2), -+ z0 = svadd_x (svptrue_b64 (), z1, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_s16.c -new file mode 100644 -index 000000000..c0883edf9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_s16.c -@@ -0,0 +1,377 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** add_s16_m_tied1: -+** add z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (add_s16_m_tied1, svint16_t, -+ z0 = svadd_s16_m (p0, z0, z1), -+ z0 = svadd_m (p0, z0, z1)) -+ -+/* -+** add_s16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** add z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (add_s16_m_tied2, svint16_t, -+ z0 = svadd_s16_m (p0, z1, z0), -+ z0 = svadd_m (p0, z1, z0)) -+ -+/* -+** add_s16_m_untied: -+** movprfx z0, z1 -+** add z0\.h, p0/m, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (add_s16_m_untied, svint16_t, -+ z0 = svadd_s16_m (p0, z1, z2), -+ z0 = svadd_m (p0, z1, z2)) -+ -+/* -+** add_w0_s16_m_tied1: -+** mov (z[0-9]+\.h), w0 -+** add z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (add_w0_s16_m_tied1, svint16_t, int16_t, -+ z0 = svadd_n_s16_m (p0, z0, x0), -+ z0 = svadd_m (p0, z0, x0)) -+ -+/* -+** add_w0_s16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), w0 -+** movprfx z0, z1 -+** add z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (add_w0_s16_m_untied, svint16_t, int16_t, -+ z0 = svadd_n_s16_m (p0, z1, x0), -+ z0 = svadd_m (p0, z1, x0)) -+ -+/* -+** add_1_s16_m_tied1: -+** mov (z[0-9]+\.h), #1 -+** add z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_s16_m_tied1, svint16_t, -+ z0 = svadd_n_s16_m (p0, z0, 1), -+ z0 = svadd_m (p0, z0, 1)) -+ -+/* -+** add_1_s16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), #1 -+** movprfx z0, z1 -+** add z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_s16_m_untied, svint16_t, -+ z0 = svadd_n_s16_m (p0, z1, 1), -+ z0 = svadd_m (p0, z1, 1)) -+ -+/* -+** add_m2_s16_m: -+** mov (z[0-9]+\.h), #-2 -+** add z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m2_s16_m, svint16_t, -+ z0 = svadd_n_s16_m (p0, z0, -2), -+ z0 = svadd_m (p0, z0, -2)) -+ -+/* -+** add_s16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** add z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (add_s16_z_tied1, svint16_t, -+ z0 = svadd_s16_z (p0, z0, z1), -+ z0 = svadd_z (p0, z0, z1)) -+ -+/* -+** add_s16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** add z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (add_s16_z_tied2, svint16_t, -+ z0 = svadd_s16_z (p0, z1, z0), -+ z0 = svadd_z (p0, z1, z0)) -+ -+/* -+** add_s16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** add z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** add z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (add_s16_z_untied, svint16_t, -+ z0 = svadd_s16_z (p0, z1, z2), -+ z0 = svadd_z (p0, z1, z2)) -+ -+/* -+** add_w0_s16_z_tied1: -+** mov (z[0-9]+\.h), w0 -+** movprfx z0\.h, p0/z, z0\.h -+** add z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (add_w0_s16_z_tied1, svint16_t, int16_t, -+ z0 = svadd_n_s16_z (p0, z0, x0), -+ z0 = svadd_z (p0, z0, x0)) -+ -+/* -+** add_w0_s16_z_untied: -+** mov (z[0-9]+\.h), w0 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** add z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** add z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (add_w0_s16_z_untied, svint16_t, int16_t, -+ z0 = svadd_n_s16_z (p0, z1, x0), -+ z0 = svadd_z (p0, z1, x0)) -+ -+/* -+** add_1_s16_z_tied1: -+** mov (z[0-9]+\.h), #1 -+** movprfx z0\.h, p0/z, z0\.h -+** add z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_s16_z_tied1, svint16_t, -+ z0 = svadd_n_s16_z (p0, z0, 1), -+ z0 = svadd_z (p0, z0, 1)) -+ -+/* -+** add_1_s16_z_untied: -+** mov (z[0-9]+\.h), #1 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** add z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** add z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_s16_z_untied, svint16_t, -+ z0 = svadd_n_s16_z (p0, z1, 1), -+ z0 = svadd_z (p0, z1, 1)) -+ -+/* -+** add_s16_x_tied1: -+** add z0\.h, (z0\.h, z1\.h|z1\.h, z0\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (add_s16_x_tied1, svint16_t, -+ z0 = svadd_s16_x (p0, z0, z1), -+ z0 = svadd_x (p0, z0, z1)) -+ -+/* -+** add_s16_x_tied2: -+** add z0\.h, (z0\.h, z1\.h|z1\.h, z0\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (add_s16_x_tied2, svint16_t, -+ z0 = svadd_s16_x (p0, z1, z0), -+ z0 = svadd_x (p0, z1, z0)) -+ -+/* -+** add_s16_x_untied: -+** add z0\.h, (z1\.h, z2\.h|z2\.h, z1\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (add_s16_x_untied, svint16_t, -+ z0 = svadd_s16_x (p0, z1, z2), -+ z0 = svadd_x (p0, z1, z2)) -+ -+/* -+** add_w0_s16_x_tied1: -+** mov (z[0-9]+\.h), w0 -+** add z0\.h, (z0\.h, \1|\1, z0\.h) -+** ret -+*/ -+TEST_UNIFORM_ZX (add_w0_s16_x_tied1, svint16_t, int16_t, -+ z0 = svadd_n_s16_x (p0, z0, x0), -+ z0 = svadd_x (p0, z0, x0)) -+ -+/* -+** add_w0_s16_x_untied: -+** mov (z[0-9]+\.h), w0 -+** add z0\.h, (z1\.h, \1|\1, z1\.h) -+** ret -+*/ -+TEST_UNIFORM_ZX (add_w0_s16_x_untied, svint16_t, int16_t, -+ z0 = svadd_n_s16_x (p0, z1, x0), -+ z0 = svadd_x (p0, z1, x0)) -+ -+/* -+** add_1_s16_x_tied1: -+** add z0\.h, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_s16_x_tied1, svint16_t, -+ z0 = svadd_n_s16_x (p0, z0, 1), -+ z0 = svadd_x (p0, z0, 1)) -+ -+/* -+** add_1_s16_x_untied: -+** movprfx z0, z1 -+** add z0\.h, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_s16_x_untied, svint16_t, -+ z0 = svadd_n_s16_x (p0, z1, 1), -+ z0 = svadd_x (p0, z1, 1)) -+ -+/* -+** add_127_s16_x: -+** add z0\.h, z0\.h, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (add_127_s16_x, svint16_t, -+ z0 = svadd_n_s16_x (p0, z0, 127), -+ z0 = svadd_x (p0, z0, 127)) -+ -+/* -+** add_128_s16_x: -+** add z0\.h, z0\.h, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (add_128_s16_x, svint16_t, -+ z0 = svadd_n_s16_x (p0, z0, 128), -+ z0 = svadd_x (p0, z0, 128)) -+ -+/* -+** add_255_s16_x: -+** add z0\.h, z0\.h, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (add_255_s16_x, svint16_t, -+ z0 = svadd_n_s16_x (p0, z0, 255), -+ z0 = svadd_x (p0, z0, 255)) -+ -+/* -+** add_256_s16_x: -+** add z0\.h, z0\.h, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (add_256_s16_x, svint16_t, -+ z0 = svadd_n_s16_x (p0, z0, 256), -+ z0 = svadd_x (p0, z0, 256)) -+ -+/* -+** add_257_s16_x: -+** mov (z[0-9]+)\.b, #1 -+** add z0\.h, (z0\.h, \1\.h|\1\.h, z0\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (add_257_s16_x, svint16_t, -+ z0 = svadd_n_s16_x (p0, z0, 257), -+ z0 = svadd_x (p0, z0, 257)) -+ -+/* -+** add_512_s16_x: -+** add z0\.h, z0\.h, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (add_512_s16_x, svint16_t, -+ z0 = svadd_n_s16_x (p0, z0, 512), -+ z0 = svadd_x (p0, z0, 512)) -+ -+/* -+** add_65280_s16_x: -+** add z0\.h, z0\.h, #65280 -+** ret -+*/ -+TEST_UNIFORM_Z (add_65280_s16_x, svint16_t, -+ z0 = svadd_n_s16_x (p0, z0, 0xff00), -+ z0 = svadd_x (p0, z0, 0xff00)) -+ -+/* -+** add_m1_s16_x: -+** sub z0\.h, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m1_s16_x, svint16_t, -+ z0 = svadd_n_s16_x (p0, z0, -1), -+ z0 = svadd_x (p0, z0, -1)) -+ -+/* -+** add_m127_s16_x: -+** sub z0\.h, z0\.h, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m127_s16_x, svint16_t, -+ z0 = svadd_n_s16_x (p0, z0, -127), -+ z0 = svadd_x (p0, z0, -127)) -+ -+/* -+** add_m128_s16_x: -+** sub z0\.h, z0\.h, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m128_s16_x, svint16_t, -+ z0 = svadd_n_s16_x (p0, z0, -128), -+ z0 = svadd_x (p0, z0, -128)) -+ -+/* -+** add_m255_s16_x: -+** sub z0\.h, z0\.h, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m255_s16_x, svint16_t, -+ z0 = svadd_n_s16_x (p0, z0, -255), -+ z0 = svadd_x (p0, z0, -255)) -+ -+/* -+** add_m256_s16_x: -+** add z0\.h, z0\.h, #65280 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m256_s16_x, svint16_t, -+ z0 = svadd_n_s16_x (p0, z0, -256), -+ z0 = svadd_x (p0, z0, -256)) -+ -+/* -+** add_m257_s16_x: -+** mov (z[0-9]+\.h), #-257 -+** add z0\.h, (z0\.h, \1|\1, z0\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (add_m257_s16_x, svint16_t, -+ z0 = svadd_n_s16_x (p0, z0, -257), -+ z0 = svadd_x (p0, z0, -257)) -+ -+/* -+** add_m512_s16_x: -+** add z0\.h, z0\.h, #65024 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m512_s16_x, svint16_t, -+ z0 = svadd_n_s16_x (p0, z0, -512), -+ z0 = svadd_x (p0, z0, -512)) -+ -+/* -+** add_m32768_s16_x: -+** add z0\.h, z0\.h, #32768 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m32768_s16_x, svint16_t, -+ z0 = svadd_n_s16_x (p0, z0, -0x8000), -+ z0 = svadd_x (p0, z0, -0x8000)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_s32.c -new file mode 100644 -index 000000000..887038ba3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_s32.c -@@ -0,0 +1,426 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** add_s32_m_tied1: -+** add z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (add_s32_m_tied1, svint32_t, -+ z0 = svadd_s32_m (p0, z0, z1), -+ z0 = svadd_m (p0, z0, z1)) -+ -+/* -+** add_s32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** add z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (add_s32_m_tied2, svint32_t, -+ z0 = svadd_s32_m (p0, z1, z0), -+ z0 = svadd_m (p0, z1, z0)) -+ -+/* -+** add_s32_m_untied: -+** movprfx z0, z1 -+** add z0\.s, p0/m, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (add_s32_m_untied, svint32_t, -+ z0 = svadd_s32_m (p0, z1, z2), -+ z0 = svadd_m (p0, z1, z2)) -+ -+/* -+** add_w0_s32_m_tied1: -+** mov (z[0-9]+\.s), w0 -+** add z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (add_w0_s32_m_tied1, svint32_t, int32_t, -+ z0 = svadd_n_s32_m (p0, z0, x0), -+ z0 = svadd_m (p0, z0, x0)) -+ -+/* -+** add_w0_s32_m_untied: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0, z1 -+** add z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (add_w0_s32_m_untied, svint32_t, int32_t, -+ z0 = svadd_n_s32_m (p0, z1, x0), -+ z0 = svadd_m (p0, z1, x0)) -+ -+/* -+** add_1_s32_m_tied1: -+** mov (z[0-9]+\.s), #1 -+** add z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_s32_m_tied1, svint32_t, -+ z0 = svadd_n_s32_m (p0, z0, 1), -+ z0 = svadd_m (p0, z0, 1)) -+ -+/* -+** add_1_s32_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.s), #1 -+** movprfx z0, z1 -+** add z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_s32_m_untied, svint32_t, -+ z0 = svadd_n_s32_m (p0, z1, 1), -+ z0 = svadd_m (p0, z1, 1)) -+ -+/* -+** add_m2_s32_m: -+** mov (z[0-9]+\.s), #-2 -+** add z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m2_s32_m, svint32_t, -+ z0 = svadd_n_s32_m (p0, z0, -2), -+ z0 = svadd_m (p0, z0, -2)) -+ -+/* -+** add_s32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** add z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (add_s32_z_tied1, svint32_t, -+ z0 = svadd_s32_z (p0, z0, z1), -+ z0 = svadd_z (p0, z0, z1)) -+ -+/* -+** add_s32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** add z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (add_s32_z_tied2, svint32_t, -+ z0 = svadd_s32_z (p0, z1, z0), -+ z0 = svadd_z (p0, z1, z0)) -+ -+/* -+** add_s32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** add z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** add z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (add_s32_z_untied, svint32_t, -+ z0 = svadd_s32_z (p0, z1, z2), -+ z0 = svadd_z (p0, z1, z2)) -+ -+/* -+** add_w0_s32_z_tied1: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** add z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (add_w0_s32_z_tied1, svint32_t, int32_t, -+ z0 = svadd_n_s32_z (p0, z0, x0), -+ z0 = svadd_z (p0, z0, x0)) -+ -+/* -+** add_w0_s32_z_untied: -+** mov (z[0-9]+\.s), w0 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** add z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** add z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (add_w0_s32_z_untied, svint32_t, int32_t, -+ z0 = svadd_n_s32_z (p0, z1, x0), -+ z0 = svadd_z (p0, z1, x0)) -+ -+/* -+** add_1_s32_z_tied1: -+** mov (z[0-9]+\.s), #1 -+** movprfx z0\.s, p0/z, z0\.s -+** add z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_s32_z_tied1, svint32_t, -+ z0 = svadd_n_s32_z (p0, z0, 1), -+ z0 = svadd_z (p0, z0, 1)) -+ -+/* -+** add_1_s32_z_untied: -+** mov (z[0-9]+\.s), #1 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** add z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** add z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_s32_z_untied, svint32_t, -+ z0 = svadd_n_s32_z (p0, z1, 1), -+ z0 = svadd_z (p0, z1, 1)) -+ -+/* -+** add_s32_x_tied1: -+** add z0\.s, (z0\.s, z1\.s|z1\.s, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (add_s32_x_tied1, svint32_t, -+ z0 = svadd_s32_x (p0, z0, z1), -+ z0 = svadd_x (p0, z0, z1)) -+ -+/* -+** add_s32_x_tied2: -+** add z0\.s, (z0\.s, z1\.s|z1\.s, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (add_s32_x_tied2, svint32_t, -+ z0 = svadd_s32_x (p0, z1, z0), -+ z0 = svadd_x (p0, z1, z0)) -+ -+/* -+** add_s32_x_untied: -+** add z0\.s, (z1\.s, z2\.s|z2\.s, z1\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (add_s32_x_untied, svint32_t, -+ z0 = svadd_s32_x (p0, z1, z2), -+ z0 = svadd_x (p0, z1, z2)) -+ -+/* -+** add_w0_s32_x_tied1: -+** mov (z[0-9]+\.s), w0 -+** add z0\.s, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_ZX (add_w0_s32_x_tied1, svint32_t, int32_t, -+ z0 = svadd_n_s32_x (p0, z0, x0), -+ z0 = svadd_x (p0, z0, x0)) -+ -+/* -+** add_w0_s32_x_untied: -+** mov (z[0-9]+\.s), w0 -+** add z0\.s, (z1\.s, \1|\1, z1\.s) -+** ret -+*/ -+TEST_UNIFORM_ZX (add_w0_s32_x_untied, svint32_t, int32_t, -+ z0 = svadd_n_s32_x (p0, z1, x0), -+ z0 = svadd_x (p0, z1, x0)) -+ -+/* -+** add_1_s32_x_tied1: -+** add z0\.s, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_s32_x_tied1, svint32_t, -+ z0 = svadd_n_s32_x (p0, z0, 1), -+ z0 = svadd_x (p0, z0, 1)) -+ -+/* -+** add_1_s32_x_untied: -+** movprfx z0, z1 -+** add z0\.s, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_s32_x_untied, svint32_t, -+ z0 = svadd_n_s32_x (p0, z1, 1), -+ z0 = svadd_x (p0, z1, 1)) -+ -+/* -+** add_127_s32_x: -+** add z0\.s, z0\.s, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (add_127_s32_x, svint32_t, -+ z0 = svadd_n_s32_x (p0, z0, 127), -+ z0 = svadd_x (p0, z0, 127)) -+ -+/* -+** add_128_s32_x: -+** add z0\.s, z0\.s, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (add_128_s32_x, svint32_t, -+ z0 = svadd_n_s32_x (p0, z0, 128), -+ z0 = svadd_x (p0, z0, 128)) -+ -+/* -+** add_255_s32_x: -+** add z0\.s, z0\.s, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (add_255_s32_x, svint32_t, -+ z0 = svadd_n_s32_x (p0, z0, 255), -+ z0 = svadd_x (p0, z0, 255)) -+ -+/* -+** add_256_s32_x: -+** add z0\.s, z0\.s, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (add_256_s32_x, svint32_t, -+ z0 = svadd_n_s32_x (p0, z0, 256), -+ z0 = svadd_x (p0, z0, 256)) -+ -+/* -+** add_511_s32_x: -+** mov (z[0-9]+\.s), #511 -+** add z0\.s, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (add_511_s32_x, svint32_t, -+ z0 = svadd_n_s32_x (p0, z0, 511), -+ z0 = svadd_x (p0, z0, 511)) -+ -+/* -+** add_512_s32_x: -+** add z0\.s, z0\.s, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (add_512_s32_x, svint32_t, -+ z0 = svadd_n_s32_x (p0, z0, 512), -+ z0 = svadd_x (p0, z0, 512)) -+ -+/* -+** add_65280_s32_x: -+** add z0\.s, z0\.s, #65280 -+** ret -+*/ -+TEST_UNIFORM_Z (add_65280_s32_x, svint32_t, -+ z0 = svadd_n_s32_x (p0, z0, 0xff00), -+ z0 = svadd_x (p0, z0, 0xff00)) -+ -+/* -+** add_65535_s32_x: -+** mov (z[0-9]+\.s), #65535 -+** add z0\.s, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (add_65535_s32_x, svint32_t, -+ z0 = svadd_n_s32_x (p0, z0, 65535), -+ z0 = svadd_x (p0, z0, 65535)) -+ -+/* -+** add_65536_s32_x: -+** mov (z[0-9]+\.s), #65536 -+** add z0\.s, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (add_65536_s32_x, svint32_t, -+ z0 = svadd_n_s32_x (p0, z0, 65536), -+ z0 = svadd_x (p0, z0, 65536)) -+ -+/* -+** add_m1_s32_x: -+** sub z0\.s, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m1_s32_x, svint32_t, -+ z0 = svadd_n_s32_x (p0, z0, -1), -+ z0 = svadd_x (p0, z0, -1)) -+ -+/* -+** add_m127_s32_x: -+** sub z0\.s, z0\.s, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m127_s32_x, svint32_t, -+ z0 = svadd_n_s32_x (p0, z0, -127), -+ z0 = svadd_x (p0, z0, -127)) -+ -+/* -+** add_m128_s32_x: -+** sub z0\.s, z0\.s, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m128_s32_x, svint32_t, -+ z0 = svadd_n_s32_x (p0, z0, -128), -+ z0 = svadd_x (p0, z0, -128)) -+ -+/* -+** add_m255_s32_x: -+** sub z0\.s, z0\.s, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m255_s32_x, svint32_t, -+ z0 = svadd_n_s32_x (p0, z0, -255), -+ z0 = svadd_x (p0, z0, -255)) -+ -+/* -+** add_m256_s32_x: -+** sub z0\.s, z0\.s, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m256_s32_x, svint32_t, -+ z0 = svadd_n_s32_x (p0, z0, -256), -+ z0 = svadd_x (p0, z0, -256)) -+ -+/* -+** add_m511_s32_x: -+** mov (z[0-9]+\.s), #-511 -+** add z0\.s, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (add_m511_s32_x, svint32_t, -+ z0 = svadd_n_s32_x (p0, z0, -511), -+ z0 = svadd_x (p0, z0, -511)) -+ -+/* -+** add_m512_s32_x: -+** sub z0\.s, z0\.s, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m512_s32_x, svint32_t, -+ z0 = svadd_n_s32_x (p0, z0, -512), -+ z0 = svadd_x (p0, z0, -512)) -+ -+/* -+** add_m32768_s32_x: -+** sub z0\.s, z0\.s, #32768 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m32768_s32_x, svint32_t, -+ z0 = svadd_n_s32_x (p0, z0, -0x8000), -+ z0 = svadd_x (p0, z0, -0x8000)) -+ -+/* -+** add_m65280_s32_x: -+** sub z0\.s, z0\.s, #65280 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m65280_s32_x, svint32_t, -+ z0 = svadd_n_s32_x (p0, z0, -0xff00), -+ z0 = svadd_x (p0, z0, -0xff00)) -+ -+/* -+** add_m65535_s32_x: -+** mov (z[0-9]+\.s), #-65535 -+** add z0\.s, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (add_m65535_s32_x, svint32_t, -+ z0 = svadd_n_s32_x (p0, z0, -65535), -+ z0 = svadd_x (p0, z0, -65535)) -+ -+/* -+** add_m65536_s32_x: -+** mov (z[0-9]+\.s), #-65536 -+** add z0\.s, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (add_m65536_s32_x, svint32_t, -+ z0 = svadd_n_s32_x (p0, z0, -65536), -+ z0 = svadd_x (p0, z0, -65536)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_s64.c -new file mode 100644 -index 000000000..aab63ef62 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_s64.c -@@ -0,0 +1,426 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** add_s64_m_tied1: -+** add z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (add_s64_m_tied1, svint64_t, -+ z0 = svadd_s64_m (p0, z0, z1), -+ z0 = svadd_m (p0, z0, z1)) -+ -+/* -+** add_s64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** add z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_s64_m_tied2, svint64_t, -+ z0 = svadd_s64_m (p0, z1, z0), -+ z0 = svadd_m (p0, z1, z0)) -+ -+/* -+** add_s64_m_untied: -+** movprfx z0, z1 -+** add z0\.d, p0/m, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (add_s64_m_untied, svint64_t, -+ z0 = svadd_s64_m (p0, z1, z2), -+ z0 = svadd_m (p0, z1, z2)) -+ -+/* -+** add_x0_s64_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** add z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (add_x0_s64_m_tied1, svint64_t, int64_t, -+ z0 = svadd_n_s64_m (p0, z0, x0), -+ z0 = svadd_m (p0, z0, x0)) -+ -+/* -+** add_x0_s64_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** add z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (add_x0_s64_m_untied, svint64_t, int64_t, -+ z0 = svadd_n_s64_m (p0, z1, x0), -+ z0 = svadd_m (p0, z1, x0)) -+ -+/* -+** add_1_s64_m_tied1: -+** mov (z[0-9]+\.d), #1 -+** add z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_s64_m_tied1, svint64_t, -+ z0 = svadd_n_s64_m (p0, z0, 1), -+ z0 = svadd_m (p0, z0, 1)) -+ -+/* -+** add_1_s64_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #1 -+** movprfx z0, z1 -+** add z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_s64_m_untied, svint64_t, -+ z0 = svadd_n_s64_m (p0, z1, 1), -+ z0 = svadd_m (p0, z1, 1)) -+ -+/* -+** add_m2_s64_m: -+** mov (z[0-9]+\.d), #-2 -+** add z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m2_s64_m, svint64_t, -+ z0 = svadd_n_s64_m (p0, z0, -2), -+ z0 = svadd_m (p0, z0, -2)) -+ -+/* -+** add_s64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** add z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (add_s64_z_tied1, svint64_t, -+ z0 = svadd_s64_z (p0, z0, z1), -+ z0 = svadd_z (p0, z0, z1)) -+ -+/* -+** add_s64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** add z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (add_s64_z_tied2, svint64_t, -+ z0 = svadd_s64_z (p0, z1, z0), -+ z0 = svadd_z (p0, z1, z0)) -+ -+/* -+** add_s64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** add z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** add z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (add_s64_z_untied, svint64_t, -+ z0 = svadd_s64_z (p0, z1, z2), -+ z0 = svadd_z (p0, z1, z2)) -+ -+/* -+** add_x0_s64_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** add z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (add_x0_s64_z_tied1, svint64_t, int64_t, -+ z0 = svadd_n_s64_z (p0, z0, x0), -+ z0 = svadd_z (p0, z0, x0)) -+ -+/* -+** add_x0_s64_z_untied: -+** mov (z[0-9]+\.d), x0 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** add z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** add z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (add_x0_s64_z_untied, svint64_t, int64_t, -+ z0 = svadd_n_s64_z (p0, z1, x0), -+ z0 = svadd_z (p0, z1, x0)) -+ -+/* -+** add_1_s64_z_tied1: -+** mov (z[0-9]+\.d), #1 -+** movprfx z0\.d, p0/z, z0\.d -+** add z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_s64_z_tied1, svint64_t, -+ z0 = svadd_n_s64_z (p0, z0, 1), -+ z0 = svadd_z (p0, z0, 1)) -+ -+/* -+** add_1_s64_z_untied: -+** mov (z[0-9]+\.d), #1 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** add z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** add z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_s64_z_untied, svint64_t, -+ z0 = svadd_n_s64_z (p0, z1, 1), -+ z0 = svadd_z (p0, z1, 1)) -+ -+/* -+** add_s64_x_tied1: -+** add z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (add_s64_x_tied1, svint64_t, -+ z0 = svadd_s64_x (p0, z0, z1), -+ z0 = svadd_x (p0, z0, z1)) -+ -+/* -+** add_s64_x_tied2: -+** add z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (add_s64_x_tied2, svint64_t, -+ z0 = svadd_s64_x (p0, z1, z0), -+ z0 = svadd_x (p0, z1, z0)) -+ -+/* -+** add_s64_x_untied: -+** add z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (add_s64_x_untied, svint64_t, -+ z0 = svadd_s64_x (p0, z1, z2), -+ z0 = svadd_x (p0, z1, z2)) -+ -+/* -+** add_x0_s64_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** add z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (add_x0_s64_x_tied1, svint64_t, int64_t, -+ z0 = svadd_n_s64_x (p0, z0, x0), -+ z0 = svadd_x (p0, z0, x0)) -+ -+/* -+** add_x0_s64_x_untied: -+** mov (z[0-9]+\.d), x0 -+** add z0\.d, (z1\.d, \1|\1, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (add_x0_s64_x_untied, svint64_t, int64_t, -+ z0 = svadd_n_s64_x (p0, z1, x0), -+ z0 = svadd_x (p0, z1, x0)) -+ -+/* -+** add_1_s64_x_tied1: -+** add z0\.d, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_s64_x_tied1, svint64_t, -+ z0 = svadd_n_s64_x (p0, z0, 1), -+ z0 = svadd_x (p0, z0, 1)) -+ -+/* -+** add_1_s64_x_untied: -+** movprfx z0, z1 -+** add z0\.d, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_s64_x_untied, svint64_t, -+ z0 = svadd_n_s64_x (p0, z1, 1), -+ z0 = svadd_x (p0, z1, 1)) -+ -+/* -+** add_127_s64_x: -+** add z0\.d, z0\.d, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (add_127_s64_x, svint64_t, -+ z0 = svadd_n_s64_x (p0, z0, 127), -+ z0 = svadd_x (p0, z0, 127)) -+ -+/* -+** add_128_s64_x: -+** add z0\.d, z0\.d, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (add_128_s64_x, svint64_t, -+ z0 = svadd_n_s64_x (p0, z0, 128), -+ z0 = svadd_x (p0, z0, 128)) -+ -+/* -+** add_255_s64_x: -+** add z0\.d, z0\.d, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (add_255_s64_x, svint64_t, -+ z0 = svadd_n_s64_x (p0, z0, 255), -+ z0 = svadd_x (p0, z0, 255)) -+ -+/* -+** add_256_s64_x: -+** add z0\.d, z0\.d, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (add_256_s64_x, svint64_t, -+ z0 = svadd_n_s64_x (p0, z0, 256), -+ z0 = svadd_x (p0, z0, 256)) -+ -+/* -+** add_511_s64_x: -+** mov (z[0-9]+\.d), #511 -+** add z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (add_511_s64_x, svint64_t, -+ z0 = svadd_n_s64_x (p0, z0, 511), -+ z0 = svadd_x (p0, z0, 511)) -+ -+/* -+** add_512_s64_x: -+** add z0\.d, z0\.d, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (add_512_s64_x, svint64_t, -+ z0 = svadd_n_s64_x (p0, z0, 512), -+ z0 = svadd_x (p0, z0, 512)) -+ -+/* -+** add_65280_s64_x: -+** add z0\.d, z0\.d, #65280 -+** ret -+*/ -+TEST_UNIFORM_Z (add_65280_s64_x, svint64_t, -+ z0 = svadd_n_s64_x (p0, z0, 0xff00), -+ z0 = svadd_x (p0, z0, 0xff00)) -+ -+/* -+** add_65535_s64_x: -+** mov (z[0-9]+\.d), #65535 -+** add z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (add_65535_s64_x, svint64_t, -+ z0 = svadd_n_s64_x (p0, z0, 65535), -+ z0 = svadd_x (p0, z0, 65535)) -+ -+/* -+** add_65536_s64_x: -+** mov (z[0-9]+\.d), #65536 -+** add z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (add_65536_s64_x, svint64_t, -+ z0 = svadd_n_s64_x (p0, z0, 65536), -+ z0 = svadd_x (p0, z0, 65536)) -+ -+/* -+** add_m1_s64_x: -+** sub z0\.d, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m1_s64_x, svint64_t, -+ z0 = svadd_n_s64_x (p0, z0, -1), -+ z0 = svadd_x (p0, z0, -1)) -+ -+/* -+** add_m127_s64_x: -+** sub z0\.d, z0\.d, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m127_s64_x, svint64_t, -+ z0 = svadd_n_s64_x (p0, z0, -127), -+ z0 = svadd_x (p0, z0, -127)) -+ -+/* -+** add_m128_s64_x: -+** sub z0\.d, z0\.d, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m128_s64_x, svint64_t, -+ z0 = svadd_n_s64_x (p0, z0, -128), -+ z0 = svadd_x (p0, z0, -128)) -+ -+/* -+** add_m255_s64_x: -+** sub z0\.d, z0\.d, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m255_s64_x, svint64_t, -+ z0 = svadd_n_s64_x (p0, z0, -255), -+ z0 = svadd_x (p0, z0, -255)) -+ -+/* -+** add_m256_s64_x: -+** sub z0\.d, z0\.d, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m256_s64_x, svint64_t, -+ z0 = svadd_n_s64_x (p0, z0, -256), -+ z0 = svadd_x (p0, z0, -256)) -+ -+/* -+** add_m511_s64_x: -+** mov (z[0-9]+\.d), #-511 -+** add z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (add_m511_s64_x, svint64_t, -+ z0 = svadd_n_s64_x (p0, z0, -511), -+ z0 = svadd_x (p0, z0, -511)) -+ -+/* -+** add_m512_s64_x: -+** sub z0\.d, z0\.d, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m512_s64_x, svint64_t, -+ z0 = svadd_n_s64_x (p0, z0, -512), -+ z0 = svadd_x (p0, z0, -512)) -+ -+/* -+** add_m32768_s64_x: -+** sub z0\.d, z0\.d, #32768 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m32768_s64_x, svint64_t, -+ z0 = svadd_n_s64_x (p0, z0, -0x8000), -+ z0 = svadd_x (p0, z0, -0x8000)) -+ -+/* -+** add_m65280_s64_x: -+** sub z0\.d, z0\.d, #65280 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m65280_s64_x, svint64_t, -+ z0 = svadd_n_s64_x (p0, z0, -0xff00), -+ z0 = svadd_x (p0, z0, -0xff00)) -+ -+/* -+** add_m65535_s64_x: -+** mov (z[0-9]+\.d), #-65535 -+** add z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (add_m65535_s64_x, svint64_t, -+ z0 = svadd_n_s64_x (p0, z0, -65535), -+ z0 = svadd_x (p0, z0, -65535)) -+ -+/* -+** add_m65536_s64_x: -+** mov (z[0-9]+\.d), #-65536 -+** add z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (add_m65536_s64_x, svint64_t, -+ z0 = svadd_n_s64_x (p0, z0, -65536), -+ z0 = svadd_x (p0, z0, -65536)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_s8.c -new file mode 100644 -index 000000000..0889c189d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_s8.c -@@ -0,0 +1,294 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** add_s8_m_tied1: -+** add z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (add_s8_m_tied1, svint8_t, -+ z0 = svadd_s8_m (p0, z0, z1), -+ z0 = svadd_m (p0, z0, z1)) -+ -+/* -+** add_s8_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** add z0\.b, p0/m, z0\.b, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (add_s8_m_tied2, svint8_t, -+ z0 = svadd_s8_m (p0, z1, z0), -+ z0 = svadd_m (p0, z1, z0)) -+ -+/* -+** add_s8_m_untied: -+** movprfx z0, z1 -+** add z0\.b, p0/m, z0\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (add_s8_m_untied, svint8_t, -+ z0 = svadd_s8_m (p0, z1, z2), -+ z0 = svadd_m (p0, z1, z2)) -+ -+/* -+** add_w0_s8_m_tied1: -+** mov (z[0-9]+\.b), w0 -+** add z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (add_w0_s8_m_tied1, svint8_t, int8_t, -+ z0 = svadd_n_s8_m (p0, z0, x0), -+ z0 = svadd_m (p0, z0, x0)) -+ -+/* -+** add_w0_s8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), w0 -+** movprfx z0, z1 -+** add z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (add_w0_s8_m_untied, svint8_t, int8_t, -+ z0 = svadd_n_s8_m (p0, z1, x0), -+ z0 = svadd_m (p0, z1, x0)) -+ -+/* -+** add_1_s8_m_tied1: -+** mov (z[0-9]+\.b), #1 -+** add z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_s8_m_tied1, svint8_t, -+ z0 = svadd_n_s8_m (p0, z0, 1), -+ z0 = svadd_m (p0, z0, 1)) -+ -+/* -+** add_1_s8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), #1 -+** movprfx z0, z1 -+** add z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_s8_m_untied, svint8_t, -+ z0 = svadd_n_s8_m (p0, z1, 1), -+ z0 = svadd_m (p0, z1, 1)) -+ -+/* -+** add_m1_s8_m: -+** mov (z[0-9]+\.b), #-1 -+** add z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m1_s8_m, svint8_t, -+ z0 = svadd_n_s8_m (p0, z0, -1), -+ z0 = svadd_m (p0, z0, -1)) -+ -+/* -+** add_s8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** add z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (add_s8_z_tied1, svint8_t, -+ z0 = svadd_s8_z (p0, z0, z1), -+ z0 = svadd_z (p0, z0, z1)) -+ -+/* -+** add_s8_z_tied2: -+** movprfx z0\.b, p0/z, z0\.b -+** add z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (add_s8_z_tied2, svint8_t, -+ z0 = svadd_s8_z (p0, z1, z0), -+ z0 = svadd_z (p0, z1, z0)) -+ -+/* -+** add_s8_z_untied: -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** add z0\.b, p0/m, z0\.b, z2\.b -+** | -+** movprfx z0\.b, p0/z, z2\.b -+** add z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (add_s8_z_untied, svint8_t, -+ z0 = svadd_s8_z (p0, z1, z2), -+ z0 = svadd_z (p0, z1, z2)) -+ -+/* -+** add_w0_s8_z_tied1: -+** mov (z[0-9]+\.b), w0 -+** movprfx z0\.b, p0/z, z0\.b -+** add z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (add_w0_s8_z_tied1, svint8_t, int8_t, -+ z0 = svadd_n_s8_z (p0, z0, x0), -+ z0 = svadd_z (p0, z0, x0)) -+ -+/* -+** add_w0_s8_z_untied: -+** mov (z[0-9]+\.b), w0 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** add z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** add z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (add_w0_s8_z_untied, svint8_t, int8_t, -+ z0 = svadd_n_s8_z (p0, z1, x0), -+ z0 = svadd_z (p0, z1, x0)) -+ -+/* -+** add_1_s8_z_tied1: -+** mov (z[0-9]+\.b), #1 -+** movprfx z0\.b, p0/z, z0\.b -+** add z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_s8_z_tied1, svint8_t, -+ z0 = svadd_n_s8_z (p0, z0, 1), -+ z0 = svadd_z (p0, z0, 1)) -+ -+/* -+** add_1_s8_z_untied: -+** mov (z[0-9]+\.b), #1 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** add z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** add z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_s8_z_untied, svint8_t, -+ z0 = svadd_n_s8_z (p0, z1, 1), -+ z0 = svadd_z (p0, z1, 1)) -+ -+/* -+** add_s8_x_tied1: -+** add z0\.b, (z0\.b, z1\.b|z1\.b, z0\.b) -+** ret -+*/ -+TEST_UNIFORM_Z (add_s8_x_tied1, svint8_t, -+ z0 = svadd_s8_x (p0, z0, z1), -+ z0 = svadd_x (p0, z0, z1)) -+ -+/* -+** add_s8_x_tied2: -+** add z0\.b, (z0\.b, z1\.b|z1\.b, z0\.b) -+** ret -+*/ -+TEST_UNIFORM_Z (add_s8_x_tied2, svint8_t, -+ z0 = svadd_s8_x (p0, z1, z0), -+ z0 = svadd_x (p0, z1, z0)) -+ -+/* -+** add_s8_x_untied: -+** add z0\.b, (z1\.b, z2\.b|z2\.b, z1\.b) -+** ret -+*/ -+TEST_UNIFORM_Z (add_s8_x_untied, svint8_t, -+ z0 = svadd_s8_x (p0, z1, z2), -+ z0 = svadd_x (p0, z1, z2)) -+ -+/* -+** add_w0_s8_x_tied1: -+** mov (z[0-9]+\.b), w0 -+** add z0\.b, (z0\.b, \1|\1, z0\.b) -+** ret -+*/ -+TEST_UNIFORM_ZX (add_w0_s8_x_tied1, svint8_t, int8_t, -+ z0 = svadd_n_s8_x (p0, z0, x0), -+ z0 = svadd_x (p0, z0, x0)) -+ -+/* -+** add_w0_s8_x_untied: -+** mov (z[0-9]+\.b), w0 -+** add z0\.b, (z1\.b, \1|\1, z1\.b) -+** ret -+*/ -+TEST_UNIFORM_ZX (add_w0_s8_x_untied, svint8_t, int8_t, -+ z0 = svadd_n_s8_x (p0, z1, x0), -+ z0 = svadd_x (p0, z1, x0)) -+ -+/* -+** add_1_s8_x_tied1: -+** add z0\.b, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_s8_x_tied1, svint8_t, -+ z0 = svadd_n_s8_x (p0, z0, 1), -+ z0 = svadd_x (p0, z0, 1)) -+ -+/* -+** add_1_s8_x_untied: -+** movprfx z0, z1 -+** add z0\.b, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_s8_x_untied, svint8_t, -+ z0 = svadd_n_s8_x (p0, z1, 1), -+ z0 = svadd_x (p0, z1, 1)) -+ -+/* -+** add_127_s8_x: -+** add z0\.b, z0\.b, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (add_127_s8_x, svint8_t, -+ z0 = svadd_n_s8_x (p0, z0, 127), -+ z0 = svadd_x (p0, z0, 127)) -+ -+/* -+** add_128_s8_x: -+** add z0\.b, z0\.b, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (add_128_s8_x, svint8_t, -+ z0 = svadd_n_s8_x (p0, z0, 128), -+ z0 = svadd_x (p0, z0, 128)) -+ -+/* -+** add_255_s8_x: -+** add z0\.b, z0\.b, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (add_255_s8_x, svint8_t, -+ z0 = svadd_n_s8_x (p0, z0, 255), -+ z0 = svadd_x (p0, z0, 255)) -+ -+/* -+** add_m1_s8_x: -+** add z0\.b, z0\.b, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m1_s8_x, svint8_t, -+ z0 = svadd_n_s8_x (p0, z0, -1), -+ z0 = svadd_x (p0, z0, -1)) -+ -+/* -+** add_m127_s8_x: -+** add z0\.b, z0\.b, #129 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m127_s8_x, svint8_t, -+ z0 = svadd_n_s8_x (p0, z0, -127), -+ z0 = svadd_x (p0, z0, -127)) -+ -+/* -+** add_m128_s8_x: -+** add z0\.b, z0\.b, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m128_s8_x, svint8_t, -+ z0 = svadd_n_s8_x (p0, z0, -128), -+ z0 = svadd_x (p0, z0, -128)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_u16.c -new file mode 100644 -index 000000000..25cb90353 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_u16.c -@@ -0,0 +1,377 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** add_u16_m_tied1: -+** add z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (add_u16_m_tied1, svuint16_t, -+ z0 = svadd_u16_m (p0, z0, z1), -+ z0 = svadd_m (p0, z0, z1)) -+ -+/* -+** add_u16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** add z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (add_u16_m_tied2, svuint16_t, -+ z0 = svadd_u16_m (p0, z1, z0), -+ z0 = svadd_m (p0, z1, z0)) -+ -+/* -+** add_u16_m_untied: -+** movprfx z0, z1 -+** add z0\.h, p0/m, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (add_u16_m_untied, svuint16_t, -+ z0 = svadd_u16_m (p0, z1, z2), -+ z0 = svadd_m (p0, z1, z2)) -+ -+/* -+** add_w0_u16_m_tied1: -+** mov (z[0-9]+\.h), w0 -+** add z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (add_w0_u16_m_tied1, svuint16_t, uint16_t, -+ z0 = svadd_n_u16_m (p0, z0, x0), -+ z0 = svadd_m (p0, z0, x0)) -+ -+/* -+** add_w0_u16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), w0 -+** movprfx z0, z1 -+** add z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (add_w0_u16_m_untied, svuint16_t, uint16_t, -+ z0 = svadd_n_u16_m (p0, z1, x0), -+ z0 = svadd_m (p0, z1, x0)) -+ -+/* -+** add_1_u16_m_tied1: -+** mov (z[0-9]+\.h), #1 -+** add z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_u16_m_tied1, svuint16_t, -+ z0 = svadd_n_u16_m (p0, z0, 1), -+ z0 = svadd_m (p0, z0, 1)) -+ -+/* -+** add_1_u16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), #1 -+** movprfx z0, z1 -+** add z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_u16_m_untied, svuint16_t, -+ z0 = svadd_n_u16_m (p0, z1, 1), -+ z0 = svadd_m (p0, z1, 1)) -+ -+/* -+** add_m2_u16_m: -+** mov (z[0-9]+\.h), #-2 -+** add z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m2_u16_m, svuint16_t, -+ z0 = svadd_n_u16_m (p0, z0, -2), -+ z0 = svadd_m (p0, z0, -2)) -+ -+/* -+** add_u16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** add z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (add_u16_z_tied1, svuint16_t, -+ z0 = svadd_u16_z (p0, z0, z1), -+ z0 = svadd_z (p0, z0, z1)) -+ -+/* -+** add_u16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** add z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (add_u16_z_tied2, svuint16_t, -+ z0 = svadd_u16_z (p0, z1, z0), -+ z0 = svadd_z (p0, z1, z0)) -+ -+/* -+** add_u16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** add z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** add z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (add_u16_z_untied, svuint16_t, -+ z0 = svadd_u16_z (p0, z1, z2), -+ z0 = svadd_z (p0, z1, z2)) -+ -+/* -+** add_w0_u16_z_tied1: -+** mov (z[0-9]+\.h), w0 -+** movprfx z0\.h, p0/z, z0\.h -+** add z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (add_w0_u16_z_tied1, svuint16_t, uint16_t, -+ z0 = svadd_n_u16_z (p0, z0, x0), -+ z0 = svadd_z (p0, z0, x0)) -+ -+/* -+** add_w0_u16_z_untied: -+** mov (z[0-9]+\.h), w0 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** add z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** add z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (add_w0_u16_z_untied, svuint16_t, uint16_t, -+ z0 = svadd_n_u16_z (p0, z1, x0), -+ z0 = svadd_z (p0, z1, x0)) -+ -+/* -+** add_1_u16_z_tied1: -+** mov (z[0-9]+\.h), #1 -+** movprfx z0\.h, p0/z, z0\.h -+** add z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_u16_z_tied1, svuint16_t, -+ z0 = svadd_n_u16_z (p0, z0, 1), -+ z0 = svadd_z (p0, z0, 1)) -+ -+/* -+** add_1_u16_z_untied: -+** mov (z[0-9]+\.h), #1 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** add z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** add z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_u16_z_untied, svuint16_t, -+ z0 = svadd_n_u16_z (p0, z1, 1), -+ z0 = svadd_z (p0, z1, 1)) -+ -+/* -+** add_u16_x_tied1: -+** add z0\.h, (z0\.h, z1\.h|z1\.h, z0\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (add_u16_x_tied1, svuint16_t, -+ z0 = svadd_u16_x (p0, z0, z1), -+ z0 = svadd_x (p0, z0, z1)) -+ -+/* -+** add_u16_x_tied2: -+** add z0\.h, (z0\.h, z1\.h|z1\.h, z0\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (add_u16_x_tied2, svuint16_t, -+ z0 = svadd_u16_x (p0, z1, z0), -+ z0 = svadd_x (p0, z1, z0)) -+ -+/* -+** add_u16_x_untied: -+** add z0\.h, (z1\.h, z2\.h|z2\.h, z1\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (add_u16_x_untied, svuint16_t, -+ z0 = svadd_u16_x (p0, z1, z2), -+ z0 = svadd_x (p0, z1, z2)) -+ -+/* -+** add_w0_u16_x_tied1: -+** mov (z[0-9]+\.h), w0 -+** add z0\.h, (z0\.h, \1|\1, z0\.h) -+** ret -+*/ -+TEST_UNIFORM_ZX (add_w0_u16_x_tied1, svuint16_t, uint16_t, -+ z0 = svadd_n_u16_x (p0, z0, x0), -+ z0 = svadd_x (p0, z0, x0)) -+ -+/* -+** add_w0_u16_x_untied: -+** mov (z[0-9]+\.h), w0 -+** add z0\.h, (z1\.h, \1|\1, z1\.h) -+** ret -+*/ -+TEST_UNIFORM_ZX (add_w0_u16_x_untied, svuint16_t, uint16_t, -+ z0 = svadd_n_u16_x (p0, z1, x0), -+ z0 = svadd_x (p0, z1, x0)) -+ -+/* -+** add_1_u16_x_tied1: -+** add z0\.h, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_u16_x_tied1, svuint16_t, -+ z0 = svadd_n_u16_x (p0, z0, 1), -+ z0 = svadd_x (p0, z0, 1)) -+ -+/* -+** add_1_u16_x_untied: -+** movprfx z0, z1 -+** add z0\.h, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_u16_x_untied, svuint16_t, -+ z0 = svadd_n_u16_x (p0, z1, 1), -+ z0 = svadd_x (p0, z1, 1)) -+ -+/* -+** add_127_u16_x: -+** add z0\.h, z0\.h, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (add_127_u16_x, svuint16_t, -+ z0 = svadd_n_u16_x (p0, z0, 127), -+ z0 = svadd_x (p0, z0, 127)) -+ -+/* -+** add_128_u16_x: -+** add z0\.h, z0\.h, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (add_128_u16_x, svuint16_t, -+ z0 = svadd_n_u16_x (p0, z0, 128), -+ z0 = svadd_x (p0, z0, 128)) -+ -+/* -+** add_255_u16_x: -+** add z0\.h, z0\.h, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (add_255_u16_x, svuint16_t, -+ z0 = svadd_n_u16_x (p0, z0, 255), -+ z0 = svadd_x (p0, z0, 255)) -+ -+/* -+** add_256_u16_x: -+** add z0\.h, z0\.h, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (add_256_u16_x, svuint16_t, -+ z0 = svadd_n_u16_x (p0, z0, 256), -+ z0 = svadd_x (p0, z0, 256)) -+ -+/* -+** add_257_u16_x: -+** mov (z[0-9]+)\.b, #1 -+** add z0\.h, (z0\.h, \1\.h|\1\.h, z0\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (add_257_u16_x, svuint16_t, -+ z0 = svadd_n_u16_x (p0, z0, 257), -+ z0 = svadd_x (p0, z0, 257)) -+ -+/* -+** add_512_u16_x: -+** add z0\.h, z0\.h, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (add_512_u16_x, svuint16_t, -+ z0 = svadd_n_u16_x (p0, z0, 512), -+ z0 = svadd_x (p0, z0, 512)) -+ -+/* -+** add_65280_u16_x: -+** add z0\.h, z0\.h, #65280 -+** ret -+*/ -+TEST_UNIFORM_Z (add_65280_u16_x, svuint16_t, -+ z0 = svadd_n_u16_x (p0, z0, 0xff00), -+ z0 = svadd_x (p0, z0, 0xff00)) -+ -+/* -+** add_m1_u16_x: -+** sub z0\.h, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m1_u16_x, svuint16_t, -+ z0 = svadd_n_u16_x (p0, z0, -1), -+ z0 = svadd_x (p0, z0, -1)) -+ -+/* -+** add_m127_u16_x: -+** sub z0\.h, z0\.h, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m127_u16_x, svuint16_t, -+ z0 = svadd_n_u16_x (p0, z0, -127), -+ z0 = svadd_x (p0, z0, -127)) -+ -+/* -+** add_m128_u16_x: -+** sub z0\.h, z0\.h, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m128_u16_x, svuint16_t, -+ z0 = svadd_n_u16_x (p0, z0, -128), -+ z0 = svadd_x (p0, z0, -128)) -+ -+/* -+** add_m255_u16_x: -+** sub z0\.h, z0\.h, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m255_u16_x, svuint16_t, -+ z0 = svadd_n_u16_x (p0, z0, -255), -+ z0 = svadd_x (p0, z0, -255)) -+ -+/* -+** add_m256_u16_x: -+** add z0\.h, z0\.h, #65280 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m256_u16_x, svuint16_t, -+ z0 = svadd_n_u16_x (p0, z0, -256), -+ z0 = svadd_x (p0, z0, -256)) -+ -+/* -+** add_m257_u16_x: -+** mov (z[0-9]+\.h), #-257 -+** add z0\.h, (z0\.h, \1|\1, z0\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (add_m257_u16_x, svuint16_t, -+ z0 = svadd_n_u16_x (p0, z0, -257), -+ z0 = svadd_x (p0, z0, -257)) -+ -+/* -+** add_m512_u16_x: -+** add z0\.h, z0\.h, #65024 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m512_u16_x, svuint16_t, -+ z0 = svadd_n_u16_x (p0, z0, -512), -+ z0 = svadd_x (p0, z0, -512)) -+ -+/* -+** add_m32768_u16_x: -+** add z0\.h, z0\.h, #32768 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m32768_u16_x, svuint16_t, -+ z0 = svadd_n_u16_x (p0, z0, -0x8000), -+ z0 = svadd_x (p0, z0, -0x8000)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_u32.c -new file mode 100644 -index 000000000..ee979489b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_u32.c -@@ -0,0 +1,426 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** add_u32_m_tied1: -+** add z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (add_u32_m_tied1, svuint32_t, -+ z0 = svadd_u32_m (p0, z0, z1), -+ z0 = svadd_m (p0, z0, z1)) -+ -+/* -+** add_u32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** add z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (add_u32_m_tied2, svuint32_t, -+ z0 = svadd_u32_m (p0, z1, z0), -+ z0 = svadd_m (p0, z1, z0)) -+ -+/* -+** add_u32_m_untied: -+** movprfx z0, z1 -+** add z0\.s, p0/m, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (add_u32_m_untied, svuint32_t, -+ z0 = svadd_u32_m (p0, z1, z2), -+ z0 = svadd_m (p0, z1, z2)) -+ -+/* -+** add_w0_u32_m_tied1: -+** mov (z[0-9]+\.s), w0 -+** add z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (add_w0_u32_m_tied1, svuint32_t, uint32_t, -+ z0 = svadd_n_u32_m (p0, z0, x0), -+ z0 = svadd_m (p0, z0, x0)) -+ -+/* -+** add_w0_u32_m_untied: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0, z1 -+** add z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (add_w0_u32_m_untied, svuint32_t, uint32_t, -+ z0 = svadd_n_u32_m (p0, z1, x0), -+ z0 = svadd_m (p0, z1, x0)) -+ -+/* -+** add_1_u32_m_tied1: -+** mov (z[0-9]+\.s), #1 -+** add z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_u32_m_tied1, svuint32_t, -+ z0 = svadd_n_u32_m (p0, z0, 1), -+ z0 = svadd_m (p0, z0, 1)) -+ -+/* -+** add_1_u32_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.s), #1 -+** movprfx z0, z1 -+** add z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_u32_m_untied, svuint32_t, -+ z0 = svadd_n_u32_m (p0, z1, 1), -+ z0 = svadd_m (p0, z1, 1)) -+ -+/* -+** add_m2_u32_m: -+** mov (z[0-9]+\.s), #-2 -+** add z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m2_u32_m, svuint32_t, -+ z0 = svadd_n_u32_m (p0, z0, -2), -+ z0 = svadd_m (p0, z0, -2)) -+ -+/* -+** add_u32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** add z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (add_u32_z_tied1, svuint32_t, -+ z0 = svadd_u32_z (p0, z0, z1), -+ z0 = svadd_z (p0, z0, z1)) -+ -+/* -+** add_u32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** add z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (add_u32_z_tied2, svuint32_t, -+ z0 = svadd_u32_z (p0, z1, z0), -+ z0 = svadd_z (p0, z1, z0)) -+ -+/* -+** add_u32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** add z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** add z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (add_u32_z_untied, svuint32_t, -+ z0 = svadd_u32_z (p0, z1, z2), -+ z0 = svadd_z (p0, z1, z2)) -+ -+/* -+** add_w0_u32_z_tied1: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** add z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (add_w0_u32_z_tied1, svuint32_t, uint32_t, -+ z0 = svadd_n_u32_z (p0, z0, x0), -+ z0 = svadd_z (p0, z0, x0)) -+ -+/* -+** add_w0_u32_z_untied: -+** mov (z[0-9]+\.s), w0 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** add z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** add z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (add_w0_u32_z_untied, svuint32_t, uint32_t, -+ z0 = svadd_n_u32_z (p0, z1, x0), -+ z0 = svadd_z (p0, z1, x0)) -+ -+/* -+** add_1_u32_z_tied1: -+** mov (z[0-9]+\.s), #1 -+** movprfx z0\.s, p0/z, z0\.s -+** add z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_u32_z_tied1, svuint32_t, -+ z0 = svadd_n_u32_z (p0, z0, 1), -+ z0 = svadd_z (p0, z0, 1)) -+ -+/* -+** add_1_u32_z_untied: -+** mov (z[0-9]+\.s), #1 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** add z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** add z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_u32_z_untied, svuint32_t, -+ z0 = svadd_n_u32_z (p0, z1, 1), -+ z0 = svadd_z (p0, z1, 1)) -+ -+/* -+** add_u32_x_tied1: -+** add z0\.s, (z0\.s, z1\.s|z1\.s, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (add_u32_x_tied1, svuint32_t, -+ z0 = svadd_u32_x (p0, z0, z1), -+ z0 = svadd_x (p0, z0, z1)) -+ -+/* -+** add_u32_x_tied2: -+** add z0\.s, (z0\.s, z1\.s|z1\.s, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (add_u32_x_tied2, svuint32_t, -+ z0 = svadd_u32_x (p0, z1, z0), -+ z0 = svadd_x (p0, z1, z0)) -+ -+/* -+** add_u32_x_untied: -+** add z0\.s, (z1\.s, z2\.s|z2\.s, z1\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (add_u32_x_untied, svuint32_t, -+ z0 = svadd_u32_x (p0, z1, z2), -+ z0 = svadd_x (p0, z1, z2)) -+ -+/* -+** add_w0_u32_x_tied1: -+** mov (z[0-9]+\.s), w0 -+** add z0\.s, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_ZX (add_w0_u32_x_tied1, svuint32_t, uint32_t, -+ z0 = svadd_n_u32_x (p0, z0, x0), -+ z0 = svadd_x (p0, z0, x0)) -+ -+/* -+** add_w0_u32_x_untied: -+** mov (z[0-9]+\.s), w0 -+** add z0\.s, (z1\.s, \1|\1, z1\.s) -+** ret -+*/ -+TEST_UNIFORM_ZX (add_w0_u32_x_untied, svuint32_t, uint32_t, -+ z0 = svadd_n_u32_x (p0, z1, x0), -+ z0 = svadd_x (p0, z1, x0)) -+ -+/* -+** add_1_u32_x_tied1: -+** add z0\.s, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_u32_x_tied1, svuint32_t, -+ z0 = svadd_n_u32_x (p0, z0, 1), -+ z0 = svadd_x (p0, z0, 1)) -+ -+/* -+** add_1_u32_x_untied: -+** movprfx z0, z1 -+** add z0\.s, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_u32_x_untied, svuint32_t, -+ z0 = svadd_n_u32_x (p0, z1, 1), -+ z0 = svadd_x (p0, z1, 1)) -+ -+/* -+** add_127_u32_x: -+** add z0\.s, z0\.s, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (add_127_u32_x, svuint32_t, -+ z0 = svadd_n_u32_x (p0, z0, 127), -+ z0 = svadd_x (p0, z0, 127)) -+ -+/* -+** add_128_u32_x: -+** add z0\.s, z0\.s, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (add_128_u32_x, svuint32_t, -+ z0 = svadd_n_u32_x (p0, z0, 128), -+ z0 = svadd_x (p0, z0, 128)) -+ -+/* -+** add_255_u32_x: -+** add z0\.s, z0\.s, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (add_255_u32_x, svuint32_t, -+ z0 = svadd_n_u32_x (p0, z0, 255), -+ z0 = svadd_x (p0, z0, 255)) -+ -+/* -+** add_256_u32_x: -+** add z0\.s, z0\.s, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (add_256_u32_x, svuint32_t, -+ z0 = svadd_n_u32_x (p0, z0, 256), -+ z0 = svadd_x (p0, z0, 256)) -+ -+/* -+** add_511_u32_x: -+** mov (z[0-9]+\.s), #511 -+** add z0\.s, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (add_511_u32_x, svuint32_t, -+ z0 = svadd_n_u32_x (p0, z0, 511), -+ z0 = svadd_x (p0, z0, 511)) -+ -+/* -+** add_512_u32_x: -+** add z0\.s, z0\.s, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (add_512_u32_x, svuint32_t, -+ z0 = svadd_n_u32_x (p0, z0, 512), -+ z0 = svadd_x (p0, z0, 512)) -+ -+/* -+** add_65280_u32_x: -+** add z0\.s, z0\.s, #65280 -+** ret -+*/ -+TEST_UNIFORM_Z (add_65280_u32_x, svuint32_t, -+ z0 = svadd_n_u32_x (p0, z0, 0xff00), -+ z0 = svadd_x (p0, z0, 0xff00)) -+ -+/* -+** add_65535_u32_x: -+** mov (z[0-9]+\.s), #65535 -+** add z0\.s, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (add_65535_u32_x, svuint32_t, -+ z0 = svadd_n_u32_x (p0, z0, 65535), -+ z0 = svadd_x (p0, z0, 65535)) -+ -+/* -+** add_65536_u32_x: -+** mov (z[0-9]+\.s), #65536 -+** add z0\.s, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (add_65536_u32_x, svuint32_t, -+ z0 = svadd_n_u32_x (p0, z0, 65536), -+ z0 = svadd_x (p0, z0, 65536)) -+ -+/* -+** add_m1_u32_x: -+** sub z0\.s, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m1_u32_x, svuint32_t, -+ z0 = svadd_n_u32_x (p0, z0, -1), -+ z0 = svadd_x (p0, z0, -1)) -+ -+/* -+** add_m127_u32_x: -+** sub z0\.s, z0\.s, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m127_u32_x, svuint32_t, -+ z0 = svadd_n_u32_x (p0, z0, -127), -+ z0 = svadd_x (p0, z0, -127)) -+ -+/* -+** add_m128_u32_x: -+** sub z0\.s, z0\.s, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m128_u32_x, svuint32_t, -+ z0 = svadd_n_u32_x (p0, z0, -128), -+ z0 = svadd_x (p0, z0, -128)) -+ -+/* -+** add_m255_u32_x: -+** sub z0\.s, z0\.s, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m255_u32_x, svuint32_t, -+ z0 = svadd_n_u32_x (p0, z0, -255), -+ z0 = svadd_x (p0, z0, -255)) -+ -+/* -+** add_m256_u32_x: -+** sub z0\.s, z0\.s, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m256_u32_x, svuint32_t, -+ z0 = svadd_n_u32_x (p0, z0, -256), -+ z0 = svadd_x (p0, z0, -256)) -+ -+/* -+** add_m511_u32_x: -+** mov (z[0-9]+\.s), #-511 -+** add z0\.s, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (add_m511_u32_x, svuint32_t, -+ z0 = svadd_n_u32_x (p0, z0, -511), -+ z0 = svadd_x (p0, z0, -511)) -+ -+/* -+** add_m512_u32_x: -+** sub z0\.s, z0\.s, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m512_u32_x, svuint32_t, -+ z0 = svadd_n_u32_x (p0, z0, -512), -+ z0 = svadd_x (p0, z0, -512)) -+ -+/* -+** add_m32768_u32_x: -+** sub z0\.s, z0\.s, #32768 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m32768_u32_x, svuint32_t, -+ z0 = svadd_n_u32_x (p0, z0, -0x8000), -+ z0 = svadd_x (p0, z0, -0x8000)) -+ -+/* -+** add_m65280_u32_x: -+** sub z0\.s, z0\.s, #65280 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m65280_u32_x, svuint32_t, -+ z0 = svadd_n_u32_x (p0, z0, -0xff00), -+ z0 = svadd_x (p0, z0, -0xff00)) -+ -+/* -+** add_m65535_u32_x: -+** mov (z[0-9]+\.s), #-65535 -+** add z0\.s, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (add_m65535_u32_x, svuint32_t, -+ z0 = svadd_n_u32_x (p0, z0, -65535), -+ z0 = svadd_x (p0, z0, -65535)) -+ -+/* -+** add_m65536_u32_x: -+** mov (z[0-9]+\.s), #-65536 -+** add z0\.s, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (add_m65536_u32_x, svuint32_t, -+ z0 = svadd_n_u32_x (p0, z0, -65536), -+ z0 = svadd_x (p0, z0, -65536)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_u64.c -new file mode 100644 -index 000000000..25d2972a6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_u64.c -@@ -0,0 +1,426 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** add_u64_m_tied1: -+** add z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (add_u64_m_tied1, svuint64_t, -+ z0 = svadd_u64_m (p0, z0, z1), -+ z0 = svadd_m (p0, z0, z1)) -+ -+/* -+** add_u64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** add z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_u64_m_tied2, svuint64_t, -+ z0 = svadd_u64_m (p0, z1, z0), -+ z0 = svadd_m (p0, z1, z0)) -+ -+/* -+** add_u64_m_untied: -+** movprfx z0, z1 -+** add z0\.d, p0/m, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (add_u64_m_untied, svuint64_t, -+ z0 = svadd_u64_m (p0, z1, z2), -+ z0 = svadd_m (p0, z1, z2)) -+ -+/* -+** add_x0_u64_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** add z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (add_x0_u64_m_tied1, svuint64_t, uint64_t, -+ z0 = svadd_n_u64_m (p0, z0, x0), -+ z0 = svadd_m (p0, z0, x0)) -+ -+/* -+** add_x0_u64_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** add z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (add_x0_u64_m_untied, svuint64_t, uint64_t, -+ z0 = svadd_n_u64_m (p0, z1, x0), -+ z0 = svadd_m (p0, z1, x0)) -+ -+/* -+** add_1_u64_m_tied1: -+** mov (z[0-9]+\.d), #1 -+** add z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_u64_m_tied1, svuint64_t, -+ z0 = svadd_n_u64_m (p0, z0, 1), -+ z0 = svadd_m (p0, z0, 1)) -+ -+/* -+** add_1_u64_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #1 -+** movprfx z0, z1 -+** add z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_u64_m_untied, svuint64_t, -+ z0 = svadd_n_u64_m (p0, z1, 1), -+ z0 = svadd_m (p0, z1, 1)) -+ -+/* -+** add_m2_u64_m: -+** mov (z[0-9]+\.d), #-2 -+** add z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m2_u64_m, svuint64_t, -+ z0 = svadd_n_u64_m (p0, z0, -2), -+ z0 = svadd_m (p0, z0, -2)) -+ -+/* -+** add_u64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** add z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (add_u64_z_tied1, svuint64_t, -+ z0 = svadd_u64_z (p0, z0, z1), -+ z0 = svadd_z (p0, z0, z1)) -+ -+/* -+** add_u64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** add z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (add_u64_z_tied2, svuint64_t, -+ z0 = svadd_u64_z (p0, z1, z0), -+ z0 = svadd_z (p0, z1, z0)) -+ -+/* -+** add_u64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** add z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** add z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (add_u64_z_untied, svuint64_t, -+ z0 = svadd_u64_z (p0, z1, z2), -+ z0 = svadd_z (p0, z1, z2)) -+ -+/* -+** add_x0_u64_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** add z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (add_x0_u64_z_tied1, svuint64_t, uint64_t, -+ z0 = svadd_n_u64_z (p0, z0, x0), -+ z0 = svadd_z (p0, z0, x0)) -+ -+/* -+** add_x0_u64_z_untied: -+** mov (z[0-9]+\.d), x0 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** add z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** add z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (add_x0_u64_z_untied, svuint64_t, uint64_t, -+ z0 = svadd_n_u64_z (p0, z1, x0), -+ z0 = svadd_z (p0, z1, x0)) -+ -+/* -+** add_1_u64_z_tied1: -+** mov (z[0-9]+\.d), #1 -+** movprfx z0\.d, p0/z, z0\.d -+** add z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_u64_z_tied1, svuint64_t, -+ z0 = svadd_n_u64_z (p0, z0, 1), -+ z0 = svadd_z (p0, z0, 1)) -+ -+/* -+** add_1_u64_z_untied: -+** mov (z[0-9]+\.d), #1 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** add z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** add z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_u64_z_untied, svuint64_t, -+ z0 = svadd_n_u64_z (p0, z1, 1), -+ z0 = svadd_z (p0, z1, 1)) -+ -+/* -+** add_u64_x_tied1: -+** add z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (add_u64_x_tied1, svuint64_t, -+ z0 = svadd_u64_x (p0, z0, z1), -+ z0 = svadd_x (p0, z0, z1)) -+ -+/* -+** add_u64_x_tied2: -+** add z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (add_u64_x_tied2, svuint64_t, -+ z0 = svadd_u64_x (p0, z1, z0), -+ z0 = svadd_x (p0, z1, z0)) -+ -+/* -+** add_u64_x_untied: -+** add z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (add_u64_x_untied, svuint64_t, -+ z0 = svadd_u64_x (p0, z1, z2), -+ z0 = svadd_x (p0, z1, z2)) -+ -+/* -+** add_x0_u64_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** add z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (add_x0_u64_x_tied1, svuint64_t, uint64_t, -+ z0 = svadd_n_u64_x (p0, z0, x0), -+ z0 = svadd_x (p0, z0, x0)) -+ -+/* -+** add_x0_u64_x_untied: -+** mov (z[0-9]+\.d), x0 -+** add z0\.d, (z1\.d, \1|\1, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (add_x0_u64_x_untied, svuint64_t, uint64_t, -+ z0 = svadd_n_u64_x (p0, z1, x0), -+ z0 = svadd_x (p0, z1, x0)) -+ -+/* -+** add_1_u64_x_tied1: -+** add z0\.d, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_u64_x_tied1, svuint64_t, -+ z0 = svadd_n_u64_x (p0, z0, 1), -+ z0 = svadd_x (p0, z0, 1)) -+ -+/* -+** add_1_u64_x_untied: -+** movprfx z0, z1 -+** add z0\.d, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_u64_x_untied, svuint64_t, -+ z0 = svadd_n_u64_x (p0, z1, 1), -+ z0 = svadd_x (p0, z1, 1)) -+ -+/* -+** add_127_u64_x: -+** add z0\.d, z0\.d, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (add_127_u64_x, svuint64_t, -+ z0 = svadd_n_u64_x (p0, z0, 127), -+ z0 = svadd_x (p0, z0, 127)) -+ -+/* -+** add_128_u64_x: -+** add z0\.d, z0\.d, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (add_128_u64_x, svuint64_t, -+ z0 = svadd_n_u64_x (p0, z0, 128), -+ z0 = svadd_x (p0, z0, 128)) -+ -+/* -+** add_255_u64_x: -+** add z0\.d, z0\.d, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (add_255_u64_x, svuint64_t, -+ z0 = svadd_n_u64_x (p0, z0, 255), -+ z0 = svadd_x (p0, z0, 255)) -+ -+/* -+** add_256_u64_x: -+** add z0\.d, z0\.d, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (add_256_u64_x, svuint64_t, -+ z0 = svadd_n_u64_x (p0, z0, 256), -+ z0 = svadd_x (p0, z0, 256)) -+ -+/* -+** add_511_u64_x: -+** mov (z[0-9]+\.d), #511 -+** add z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (add_511_u64_x, svuint64_t, -+ z0 = svadd_n_u64_x (p0, z0, 511), -+ z0 = svadd_x (p0, z0, 511)) -+ -+/* -+** add_512_u64_x: -+** add z0\.d, z0\.d, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (add_512_u64_x, svuint64_t, -+ z0 = svadd_n_u64_x (p0, z0, 512), -+ z0 = svadd_x (p0, z0, 512)) -+ -+/* -+** add_65280_u64_x: -+** add z0\.d, z0\.d, #65280 -+** ret -+*/ -+TEST_UNIFORM_Z (add_65280_u64_x, svuint64_t, -+ z0 = svadd_n_u64_x (p0, z0, 0xff00), -+ z0 = svadd_x (p0, z0, 0xff00)) -+ -+/* -+** add_65535_u64_x: -+** mov (z[0-9]+\.d), #65535 -+** add z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (add_65535_u64_x, svuint64_t, -+ z0 = svadd_n_u64_x (p0, z0, 65535), -+ z0 = svadd_x (p0, z0, 65535)) -+ -+/* -+** add_65536_u64_x: -+** mov (z[0-9]+\.d), #65536 -+** add z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (add_65536_u64_x, svuint64_t, -+ z0 = svadd_n_u64_x (p0, z0, 65536), -+ z0 = svadd_x (p0, z0, 65536)) -+ -+/* -+** add_m1_u64_x: -+** sub z0\.d, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m1_u64_x, svuint64_t, -+ z0 = svadd_n_u64_x (p0, z0, -1), -+ z0 = svadd_x (p0, z0, -1)) -+ -+/* -+** add_m127_u64_x: -+** sub z0\.d, z0\.d, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m127_u64_x, svuint64_t, -+ z0 = svadd_n_u64_x (p0, z0, -127), -+ z0 = svadd_x (p0, z0, -127)) -+ -+/* -+** add_m128_u64_x: -+** sub z0\.d, z0\.d, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m128_u64_x, svuint64_t, -+ z0 = svadd_n_u64_x (p0, z0, -128), -+ z0 = svadd_x (p0, z0, -128)) -+ -+/* -+** add_m255_u64_x: -+** sub z0\.d, z0\.d, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m255_u64_x, svuint64_t, -+ z0 = svadd_n_u64_x (p0, z0, -255), -+ z0 = svadd_x (p0, z0, -255)) -+ -+/* -+** add_m256_u64_x: -+** sub z0\.d, z0\.d, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m256_u64_x, svuint64_t, -+ z0 = svadd_n_u64_x (p0, z0, -256), -+ z0 = svadd_x (p0, z0, -256)) -+ -+/* -+** add_m511_u64_x: -+** mov (z[0-9]+\.d), #-511 -+** add z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (add_m511_u64_x, svuint64_t, -+ z0 = svadd_n_u64_x (p0, z0, -511), -+ z0 = svadd_x (p0, z0, -511)) -+ -+/* -+** add_m512_u64_x: -+** sub z0\.d, z0\.d, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m512_u64_x, svuint64_t, -+ z0 = svadd_n_u64_x (p0, z0, -512), -+ z0 = svadd_x (p0, z0, -512)) -+ -+/* -+** add_m32768_u64_x: -+** sub z0\.d, z0\.d, #32768 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m32768_u64_x, svuint64_t, -+ z0 = svadd_n_u64_x (p0, z0, -0x8000), -+ z0 = svadd_x (p0, z0, -0x8000)) -+ -+/* -+** add_m65280_u64_x: -+** sub z0\.d, z0\.d, #65280 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m65280_u64_x, svuint64_t, -+ z0 = svadd_n_u64_x (p0, z0, -0xff00), -+ z0 = svadd_x (p0, z0, -0xff00)) -+ -+/* -+** add_m65535_u64_x: -+** mov (z[0-9]+\.d), #-65535 -+** add z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (add_m65535_u64_x, svuint64_t, -+ z0 = svadd_n_u64_x (p0, z0, -65535), -+ z0 = svadd_x (p0, z0, -65535)) -+ -+/* -+** add_m65536_u64_x: -+** mov (z[0-9]+\.d), #-65536 -+** add z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (add_m65536_u64_x, svuint64_t, -+ z0 = svadd_n_u64_x (p0, z0, -65536), -+ z0 = svadd_x (p0, z0, -65536)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_u8.c -new file mode 100644 -index 000000000..06b68c97c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/add_u8.c -@@ -0,0 +1,294 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** add_u8_m_tied1: -+** add z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (add_u8_m_tied1, svuint8_t, -+ z0 = svadd_u8_m (p0, z0, z1), -+ z0 = svadd_m (p0, z0, z1)) -+ -+/* -+** add_u8_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** add z0\.b, p0/m, z0\.b, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (add_u8_m_tied2, svuint8_t, -+ z0 = svadd_u8_m (p0, z1, z0), -+ z0 = svadd_m (p0, z1, z0)) -+ -+/* -+** add_u8_m_untied: -+** movprfx z0, z1 -+** add z0\.b, p0/m, z0\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (add_u8_m_untied, svuint8_t, -+ z0 = svadd_u8_m (p0, z1, z2), -+ z0 = svadd_m (p0, z1, z2)) -+ -+/* -+** add_w0_u8_m_tied1: -+** mov (z[0-9]+\.b), w0 -+** add z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (add_w0_u8_m_tied1, svuint8_t, uint8_t, -+ z0 = svadd_n_u8_m (p0, z0, x0), -+ z0 = svadd_m (p0, z0, x0)) -+ -+/* -+** add_w0_u8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), w0 -+** movprfx z0, z1 -+** add z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (add_w0_u8_m_untied, svuint8_t, uint8_t, -+ z0 = svadd_n_u8_m (p0, z1, x0), -+ z0 = svadd_m (p0, z1, x0)) -+ -+/* -+** add_1_u8_m_tied1: -+** mov (z[0-9]+\.b), #1 -+** add z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_u8_m_tied1, svuint8_t, -+ z0 = svadd_n_u8_m (p0, z0, 1), -+ z0 = svadd_m (p0, z0, 1)) -+ -+/* -+** add_1_u8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), #1 -+** movprfx z0, z1 -+** add z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_u8_m_untied, svuint8_t, -+ z0 = svadd_n_u8_m (p0, z1, 1), -+ z0 = svadd_m (p0, z1, 1)) -+ -+/* -+** add_m1_u8_m: -+** mov (z[0-9]+\.b), #-1 -+** add z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m1_u8_m, svuint8_t, -+ z0 = svadd_n_u8_m (p0, z0, -1), -+ z0 = svadd_m (p0, z0, -1)) -+ -+/* -+** add_u8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** add z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (add_u8_z_tied1, svuint8_t, -+ z0 = svadd_u8_z (p0, z0, z1), -+ z0 = svadd_z (p0, z0, z1)) -+ -+/* -+** add_u8_z_tied2: -+** movprfx z0\.b, p0/z, z0\.b -+** add z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (add_u8_z_tied2, svuint8_t, -+ z0 = svadd_u8_z (p0, z1, z0), -+ z0 = svadd_z (p0, z1, z0)) -+ -+/* -+** add_u8_z_untied: -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** add z0\.b, p0/m, z0\.b, z2\.b -+** | -+** movprfx z0\.b, p0/z, z2\.b -+** add z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (add_u8_z_untied, svuint8_t, -+ z0 = svadd_u8_z (p0, z1, z2), -+ z0 = svadd_z (p0, z1, z2)) -+ -+/* -+** add_w0_u8_z_tied1: -+** mov (z[0-9]+\.b), w0 -+** movprfx z0\.b, p0/z, z0\.b -+** add z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (add_w0_u8_z_tied1, svuint8_t, uint8_t, -+ z0 = svadd_n_u8_z (p0, z0, x0), -+ z0 = svadd_z (p0, z0, x0)) -+ -+/* -+** add_w0_u8_z_untied: -+** mov (z[0-9]+\.b), w0 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** add z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** add z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (add_w0_u8_z_untied, svuint8_t, uint8_t, -+ z0 = svadd_n_u8_z (p0, z1, x0), -+ z0 = svadd_z (p0, z1, x0)) -+ -+/* -+** add_1_u8_z_tied1: -+** mov (z[0-9]+\.b), #1 -+** movprfx z0\.b, p0/z, z0\.b -+** add z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_u8_z_tied1, svuint8_t, -+ z0 = svadd_n_u8_z (p0, z0, 1), -+ z0 = svadd_z (p0, z0, 1)) -+ -+/* -+** add_1_u8_z_untied: -+** mov (z[0-9]+\.b), #1 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** add z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** add z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_u8_z_untied, svuint8_t, -+ z0 = svadd_n_u8_z (p0, z1, 1), -+ z0 = svadd_z (p0, z1, 1)) -+ -+/* -+** add_u8_x_tied1: -+** add z0\.b, (z0\.b, z1\.b|z1\.b, z0\.b) -+** ret -+*/ -+TEST_UNIFORM_Z (add_u8_x_tied1, svuint8_t, -+ z0 = svadd_u8_x (p0, z0, z1), -+ z0 = svadd_x (p0, z0, z1)) -+ -+/* -+** add_u8_x_tied2: -+** add z0\.b, (z0\.b, z1\.b|z1\.b, z0\.b) -+** ret -+*/ -+TEST_UNIFORM_Z (add_u8_x_tied2, svuint8_t, -+ z0 = svadd_u8_x (p0, z1, z0), -+ z0 = svadd_x (p0, z1, z0)) -+ -+/* -+** add_u8_x_untied: -+** add z0\.b, (z1\.b, z2\.b|z2\.b, z1\.b) -+** ret -+*/ -+TEST_UNIFORM_Z (add_u8_x_untied, svuint8_t, -+ z0 = svadd_u8_x (p0, z1, z2), -+ z0 = svadd_x (p0, z1, z2)) -+ -+/* -+** add_w0_u8_x_tied1: -+** mov (z[0-9]+\.b), w0 -+** add z0\.b, (z0\.b, \1|\1, z0\.b) -+** ret -+*/ -+TEST_UNIFORM_ZX (add_w0_u8_x_tied1, svuint8_t, uint8_t, -+ z0 = svadd_n_u8_x (p0, z0, x0), -+ z0 = svadd_x (p0, z0, x0)) -+ -+/* -+** add_w0_u8_x_untied: -+** mov (z[0-9]+\.b), w0 -+** add z0\.b, (z1\.b, \1|\1, z1\.b) -+** ret -+*/ -+TEST_UNIFORM_ZX (add_w0_u8_x_untied, svuint8_t, uint8_t, -+ z0 = svadd_n_u8_x (p0, z1, x0), -+ z0 = svadd_x (p0, z1, x0)) -+ -+/* -+** add_1_u8_x_tied1: -+** add z0\.b, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_u8_x_tied1, svuint8_t, -+ z0 = svadd_n_u8_x (p0, z0, 1), -+ z0 = svadd_x (p0, z0, 1)) -+ -+/* -+** add_1_u8_x_untied: -+** movprfx z0, z1 -+** add z0\.b, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (add_1_u8_x_untied, svuint8_t, -+ z0 = svadd_n_u8_x (p0, z1, 1), -+ z0 = svadd_x (p0, z1, 1)) -+ -+/* -+** add_127_u8_x: -+** add z0\.b, z0\.b, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (add_127_u8_x, svuint8_t, -+ z0 = svadd_n_u8_x (p0, z0, 127), -+ z0 = svadd_x (p0, z0, 127)) -+ -+/* -+** add_128_u8_x: -+** add z0\.b, z0\.b, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (add_128_u8_x, svuint8_t, -+ z0 = svadd_n_u8_x (p0, z0, 128), -+ z0 = svadd_x (p0, z0, 128)) -+ -+/* -+** add_255_u8_x: -+** add z0\.b, z0\.b, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (add_255_u8_x, svuint8_t, -+ z0 = svadd_n_u8_x (p0, z0, 255), -+ z0 = svadd_x (p0, z0, 255)) -+ -+/* -+** add_m1_u8_x: -+** add z0\.b, z0\.b, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m1_u8_x, svuint8_t, -+ z0 = svadd_n_u8_x (p0, z0, -1), -+ z0 = svadd_x (p0, z0, -1)) -+ -+/* -+** add_m127_u8_x: -+** add z0\.b, z0\.b, #129 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m127_u8_x, svuint8_t, -+ z0 = svadd_n_u8_x (p0, z0, -127), -+ z0 = svadd_x (p0, z0, -127)) -+ -+/* -+** add_m128_u8_x: -+** add z0\.b, z0\.b, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (add_m128_u8_x, svuint8_t, -+ z0 = svadd_n_u8_x (p0, z0, -128), -+ z0 = svadd_x (p0, z0, -128)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f16.c -new file mode 100644 -index 000000000..6c6bfa1c2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f16.c -@@ -0,0 +1,22 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** adda_d0_f16: -+** fadda h0, p0, h0, z2\.h -+** ret -+*/ -+TEST_FOLD_LEFT_D (adda_d0_f16, float16_t, svfloat16_t, -+ d0 = svadda_f16 (p0, d0, z2), -+ d0 = svadda (p0, d0, z2)) -+ -+/* -+** adda_d1_f16: -+** mov v0\.h\[0\], v1\.h\[0\] -+** fadda h0, p0, h0, z2\.h -+** ret -+*/ -+TEST_FOLD_LEFT_D (adda_d1_f16, float16_t, svfloat16_t, -+ d0 = svadda_f16 (p0, d1, z2), -+ d0 = svadda (p0, d1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f32.c -new file mode 100644 -index 000000000..8b2a1dd1c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f32.c -@@ -0,0 +1,22 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** adda_d0_f32: -+** fadda s0, p0, s0, z2\.s -+** ret -+*/ -+TEST_FOLD_LEFT_D (adda_d0_f32, float32_t, svfloat32_t, -+ d0 = svadda_f32 (p0, d0, z2), -+ d0 = svadda (p0, d0, z2)) -+ -+/* -+** adda_d1_f32: -+** fmov s0, s1 -+** fadda s0, p0, s0, z2\.s -+** ret -+*/ -+TEST_FOLD_LEFT_D (adda_d1_f32, float32_t, svfloat32_t, -+ d0 = svadda_f32 (p0, d1, z2), -+ d0 = svadda (p0, d1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f64.c -new file mode 100644 -index 000000000..90a56420a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adda_f64.c -@@ -0,0 +1,22 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** adda_d0_f64: -+** fadda d0, p0, d0, z2\.d -+** ret -+*/ -+TEST_FOLD_LEFT_D (adda_d0_f64, float64_t, svfloat64_t, -+ d0 = svadda_f64 (p0, d0, z2), -+ d0 = svadda (p0, d0, z2)) -+ -+/* -+** adda_d1_f64: -+** fmov d0, d1 -+** fadda d0, p0, d0, z2\.d -+** ret -+*/ -+TEST_FOLD_LEFT_D (adda_d1_f64, float64_t, svfloat64_t, -+ d0 = svadda_f64 (p0, d1, z2), -+ d0 = svadda (p0, d1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_f16.c -new file mode 100644 -index 000000000..7bb0c1de4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_f16.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** addv_d0_f16_tied: -+** faddv h0, p0, z0\.h -+** ret -+*/ -+TEST_REDUCTION_D (addv_d0_f16_tied, float16_t, svfloat16_t, -+ d0 = svaddv_f16 (p0, z0), -+ d0 = svaddv (p0, z0)) -+ -+/* -+** addv_d0_f16_untied: -+** faddv h0, p0, z1\.h -+** ret -+*/ -+TEST_REDUCTION_D (addv_d0_f16_untied, float16_t, svfloat16_t, -+ d0 = svaddv_f16 (p0, z1), -+ d0 = svaddv (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_f32.c -new file mode 100644 -index 000000000..51c621910 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_f32.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** addv_d0_f32_tied: -+** faddv s0, p0, z0\.s -+** ret -+*/ -+TEST_REDUCTION_D (addv_d0_f32_tied, float32_t, svfloat32_t, -+ d0 = svaddv_f32 (p0, z0), -+ d0 = svaddv (p0, z0)) -+ -+/* -+** addv_d0_f32_untied: -+** faddv s0, p0, z1\.s -+** ret -+*/ -+TEST_REDUCTION_D (addv_d0_f32_untied, float32_t, svfloat32_t, -+ d0 = svaddv_f32 (p0, z1), -+ d0 = svaddv (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_f64.c -new file mode 100644 -index 000000000..882866210 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_f64.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** addv_d0_f64_tied: -+** faddv d0, p0, z0\.d -+** ret -+*/ -+TEST_REDUCTION_D (addv_d0_f64_tied, float64_t, svfloat64_t, -+ d0 = svaddv_f64 (p0, z0), -+ d0 = svaddv (p0, z0)) -+ -+/* -+** addv_d0_f64_untied: -+** faddv d0, p0, z1\.d -+** ret -+*/ -+TEST_REDUCTION_D (addv_d0_f64_untied, float64_t, svfloat64_t, -+ d0 = svaddv_f64 (p0, z1), -+ d0 = svaddv (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_s16.c -new file mode 100644 -index 000000000..05429a47e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_s16.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** addv_x0_s16: -+** saddv (d[0-9]+), p0, z0\.h -+** fmov x0, \1 -+** ret -+*/ -+TEST_REDUCTION_X (addv_x0_s16, int64_t, svint16_t, -+ x0 = svaddv_s16 (p0, z0), -+ x0 = svaddv (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_s32.c -new file mode 100644 -index 000000000..5f7789a9a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_s32.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** addv_x0_s32: -+** saddv (d[0-9]+), p0, z0\.s -+** fmov x0, \1 -+** ret -+*/ -+TEST_REDUCTION_X (addv_x0_s32, int64_t, svint32_t, -+ x0 = svaddv_s32 (p0, z0), -+ x0 = svaddv (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_s64.c -new file mode 100644 -index 000000000..76c480091 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_s64.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** addv_x0_s64: -+** uaddv (d[0-9]+), p0, z0\.d -+** fmov x0, \1 -+** ret -+*/ -+TEST_REDUCTION_X (addv_x0_s64, int64_t, svint64_t, -+ x0 = svaddv_s64 (p0, z0), -+ x0 = svaddv (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_s8.c -new file mode 100644 -index 000000000..8ccb2bf4f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_s8.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** addv_x0_s8: -+** saddv (d[0-9]+), p0, z0\.b -+** fmov x0, \1 -+** ret -+*/ -+TEST_REDUCTION_X (addv_x0_s8, int64_t, svint8_t, -+ x0 = svaddv_s8 (p0, z0), -+ x0 = svaddv (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_u16.c -new file mode 100644 -index 000000000..6371921fe ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_u16.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** addv_x0_u16: -+** uaddv (d[0-9]+), p0, z0\.h -+** fmov x0, \1 -+** ret -+*/ -+TEST_REDUCTION_X (addv_x0_u16, uint64_t, svuint16_t, -+ x0 = svaddv_u16 (p0, z0), -+ x0 = svaddv (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_u32.c -new file mode 100644 -index 000000000..bdd0ed1f9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_u32.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** addv_x0_u32: -+** uaddv (d[0-9]+), p0, z0\.s -+** fmov x0, \1 -+** ret -+*/ -+TEST_REDUCTION_X (addv_x0_u32, uint64_t, svuint32_t, -+ x0 = svaddv_u32 (p0, z0), -+ x0 = svaddv (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_u64.c -new file mode 100644 -index 000000000..7b1995d3f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_u64.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** addv_x0_u64: -+** uaddv (d[0-9]+), p0, z0\.d -+** fmov x0, \1 -+** ret -+*/ -+TEST_REDUCTION_X (addv_x0_u64, uint64_t, svuint64_t, -+ x0 = svaddv_u64 (p0, z0), -+ x0 = svaddv (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_u8.c -new file mode 100644 -index 000000000..0e972f093 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/addv_u8.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** addv_x0_u8: -+** uaddv (d[0-9]+), p0, z0\.b -+** fmov x0, \1 -+** ret -+*/ -+TEST_REDUCTION_X (addv_x0_u8, uint64_t, svuint8_t, -+ x0 = svaddv_u8 (p0, z0), -+ x0 = svaddv (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrb.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrb.c -new file mode 100644 -index 000000000..a61eec971 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrb.c -@@ -0,0 +1,57 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** adrb_u32base_s32offset: -+** adr z0\.s, \[z0\.s, z1\.s\] -+** ret -+*/ -+TEST_ADR (adrb_u32base_s32offset, svuint32_t, svint32_t, -+ z0 = svadrb_u32base_s32offset (z0, z1), -+ z0 = svadrb_offset (z0, z1)) -+ -+/* -+** adrb_u32base_u32offset: -+** adr z0\.s, \[z0\.s, z1\.s\] -+** ret -+*/ -+TEST_ADR (adrb_u32base_u32offset, svuint32_t, svuint32_t, -+ z0 = svadrb_u32base_u32offset (z0, z1), -+ z0 = svadrb_offset (z0, z1)) -+ -+/* -+** adrb_u64base_s64offset: -+** adr z0\.d, \[z0\.d, z1\.d\] -+** ret -+*/ -+TEST_ADR (adrb_u64base_s64offset, svuint64_t, svint64_t, -+ z0 = svadrb_u64base_s64offset (z0, z1), -+ z0 = svadrb_offset (z0, z1)) -+ -+/* -+** adrb_ext_u64base_s64offset: -+** adr z0\.d, \[z0\.d, z1\.d, sxtw\] -+** ret -+*/ -+TEST_ADR (adrb_ext_u64base_s64offset, svuint64_t, svint64_t, -+ z0 = svadrb_u64base_s64offset (z0, svextw_s64_x (svptrue_b64 (), z1)), -+ z0 = svadrb_offset (z0, svextw_x (svptrue_b64 (), z1))) -+ -+/* -+** adrb_u64base_u64offset: -+** adr z0\.d, \[z0\.d, z1\.d\] -+** ret -+*/ -+TEST_ADR (adrb_u64base_u64offset, svuint64_t, svuint64_t, -+ z0 = svadrb_u64base_u64offset (z0, z1), -+ z0 = svadrb_offset (z0, z1)) -+ -+/* -+** adrb_ext_u64base_u64offset: -+** adr z0\.d, \[z0\.d, z1\.d, uxtw\] -+** ret -+*/ -+TEST_ADR (adrb_ext_u64base_u64offset, svuint64_t, svuint64_t, -+ z0 = svadrb_u64base_u64offset (z0, svextw_u64_x (svptrue_b64 (), z1)), -+ z0 = svadrb_offset (z0, svextw_x (svptrue_b64 (), z1))) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrd.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrd.c -new file mode 100644 -index 000000000..970485bd6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrd.c -@@ -0,0 +1,57 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** adrd_u32base_s32index: -+** adr z0\.s, \[z0\.s, z1\.s, lsl 3\] -+** ret -+*/ -+TEST_ADR (adrd_u32base_s32index, svuint32_t, svint32_t, -+ z0 = svadrd_u32base_s32index (z0, z1), -+ z0 = svadrd_index (z0, z1)) -+ -+/* -+** adrd_u32base_u32index: -+** adr z0\.s, \[z0\.s, z1\.s, lsl 3\] -+** ret -+*/ -+TEST_ADR (adrd_u32base_u32index, svuint32_t, svuint32_t, -+ z0 = svadrd_u32base_u32index (z0, z1), -+ z0 = svadrd_index (z0, z1)) -+ -+/* -+** adrd_u64base_s64index: -+** adr z0\.d, \[z0\.d, z1\.d, lsl 3\] -+** ret -+*/ -+TEST_ADR (adrd_u64base_s64index, svuint64_t, svint64_t, -+ z0 = svadrd_u64base_s64index (z0, z1), -+ z0 = svadrd_index (z0, z1)) -+ -+/* -+** adrd_ext_u64base_s64index: -+** adr z0\.d, \[z0\.d, z1\.d, sxtw 3\] -+** ret -+*/ -+TEST_ADR (adrd_ext_u64base_s64index, svuint64_t, svint64_t, -+ z0 = svadrd_u64base_s64index (z0, svextw_s64_x (svptrue_b64 (), z1)), -+ z0 = svadrd_index (z0, svextw_x (svptrue_b64 (), z1))) -+ -+/* -+** adrd_u64base_u64index: -+** adr z0\.d, \[z0\.d, z1\.d, lsl 3\] -+** ret -+*/ -+TEST_ADR (adrd_u64base_u64index, svuint64_t, svuint64_t, -+ z0 = svadrd_u64base_u64index (z0, z1), -+ z0 = svadrd_index (z0, z1)) -+ -+/* -+** adrd_ext_u64base_u64index: -+** adr z0\.d, \[z0\.d, z1\.d, uxtw 3\] -+** ret -+*/ -+TEST_ADR (adrd_ext_u64base_u64index, svuint64_t, svuint64_t, -+ z0 = svadrd_u64base_u64index (z0, svextw_u64_x (svptrue_b64 (), z1)), -+ z0 = svadrd_index (z0, svextw_x (svptrue_b64 (), z1))) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrh.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrh.c -new file mode 100644 -index 000000000..d06f51fe3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrh.c -@@ -0,0 +1,57 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** adrh_u32base_s32index: -+** adr z0\.s, \[z0\.s, z1\.s, lsl 1\] -+** ret -+*/ -+TEST_ADR (adrh_u32base_s32index, svuint32_t, svint32_t, -+ z0 = svadrh_u32base_s32index (z0, z1), -+ z0 = svadrh_index (z0, z1)) -+ -+/* -+** adrh_u32base_u32index: -+** adr z0\.s, \[z0\.s, z1\.s, lsl 1\] -+** ret -+*/ -+TEST_ADR (adrh_u32base_u32index, svuint32_t, svuint32_t, -+ z0 = svadrh_u32base_u32index (z0, z1), -+ z0 = svadrh_index (z0, z1)) -+ -+/* -+** adrh_u64base_s64index: -+** adr z0\.d, \[z0\.d, z1\.d, lsl 1\] -+** ret -+*/ -+TEST_ADR (adrh_u64base_s64index, svuint64_t, svint64_t, -+ z0 = svadrh_u64base_s64index (z0, z1), -+ z0 = svadrh_index (z0, z1)) -+ -+/* -+** adrh_ext_u64base_s64index: -+** adr z0\.d, \[z0\.d, z1\.d, sxtw 1\] -+** ret -+*/ -+TEST_ADR (adrh_ext_u64base_s64index, svuint64_t, svint64_t, -+ z0 = svadrh_u64base_s64index (z0, svextw_s64_x (svptrue_b64 (), z1)), -+ z0 = svadrh_index (z0, svextw_x (svptrue_b64 (), z1))) -+ -+/* -+** adrh_u64base_u64index: -+** adr z0\.d, \[z0\.d, z1\.d, lsl 1\] -+** ret -+*/ -+TEST_ADR (adrh_u64base_u64index, svuint64_t, svuint64_t, -+ z0 = svadrh_u64base_u64index (z0, z1), -+ z0 = svadrh_index (z0, z1)) -+ -+/* -+** adrh_ext_u64base_u64index: -+** adr z0\.d, \[z0\.d, z1\.d, uxtw 1\] -+** ret -+*/ -+TEST_ADR (adrh_ext_u64base_u64index, svuint64_t, svuint64_t, -+ z0 = svadrh_u64base_u64index (z0, svextw_u64_x (svptrue_b64 (), z1)), -+ z0 = svadrh_index (z0, svextw_x (svptrue_b64 (), z1))) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrw.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrw.c -new file mode 100644 -index 000000000..b23f25a11 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/adrw.c -@@ -0,0 +1,57 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** adrw_u32base_s32index: -+** adr z0\.s, \[z0\.s, z1\.s, lsl 2\] -+** ret -+*/ -+TEST_ADR (adrw_u32base_s32index, svuint32_t, svint32_t, -+ z0 = svadrw_u32base_s32index (z0, z1), -+ z0 = svadrw_index (z0, z1)) -+ -+/* -+** adrw_u32base_u32index: -+** adr z0\.s, \[z0\.s, z1\.s, lsl 2\] -+** ret -+*/ -+TEST_ADR (adrw_u32base_u32index, svuint32_t, svuint32_t, -+ z0 = svadrw_u32base_u32index (z0, z1), -+ z0 = svadrw_index (z0, z1)) -+ -+/* -+** adrw_u64base_s64index: -+** adr z0\.d, \[z0\.d, z1\.d, lsl 2\] -+** ret -+*/ -+TEST_ADR (adrw_u64base_s64index, svuint64_t, svint64_t, -+ z0 = svadrw_u64base_s64index (z0, z1), -+ z0 = svadrw_index (z0, z1)) -+ -+/* -+** adrw_ext_u64base_s64index: -+** adr z0\.d, \[z0\.d, z1\.d, sxtw 2\] -+** ret -+*/ -+TEST_ADR (adrw_ext_u64base_s64index, svuint64_t, svint64_t, -+ z0 = svadrw_u64base_s64index (z0, svextw_s64_x (svptrue_b64 (), z1)), -+ z0 = svadrw_index (z0, svextw_x (svptrue_b64 (), z1))) -+ -+/* -+** adrw_u64base_u64index: -+** adr z0\.d, \[z0\.d, z1\.d, lsl 2\] -+** ret -+*/ -+TEST_ADR (adrw_u64base_u64index, svuint64_t, svuint64_t, -+ z0 = svadrw_u64base_u64index (z0, z1), -+ z0 = svadrw_index (z0, z1)) -+ -+/* -+** adrw_ext_u64base_u64index: -+** adr z0\.d, \[z0\.d, z1\.d, uxtw 2\] -+** ret -+*/ -+TEST_ADR (adrw_ext_u64base_u64index, svuint64_t, svuint64_t, -+ z0 = svadrw_u64base_u64index (z0, svextw_u64_x (svptrue_b64 (), z1)), -+ z0 = svadrw_index (z0, svextw_x (svptrue_b64 (), z1))) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_b.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_b.c -new file mode 100644 -index 000000000..f0c4ff1b1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_b.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** and_b_z_tied1: -+** and p0\.b, p3/z, (p0\.b, p1\.b|p1\.b, p0\.b) -+** ret -+*/ -+TEST_UNIFORM_P (and_b_z_tied1, -+ p0 = svand_b_z (p3, p0, p1), -+ p0 = svand_z (p3, p0, p1)) -+ -+/* -+** and_b_z_tied2: -+** and p0\.b, p3/z, (p0\.b, p1\.b|p1\.b, p0\.b) -+** ret -+*/ -+TEST_UNIFORM_P (and_b_z_tied2, -+ p0 = svand_b_z (p3, p1, p0), -+ p0 = svand_z (p3, p1, p0)) -+ -+/* -+** and_b_z_untied: -+** and p0\.b, p3/z, (p1\.b, p2\.b|p2\.b, p1\.b) -+** ret -+*/ -+TEST_UNIFORM_P (and_b_z_untied, -+ p0 = svand_b_z (p3, p1, p2), -+ p0 = svand_z (p3, p1, p2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_s16.c -new file mode 100644 -index 000000000..d54613e91 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_s16.c -@@ -0,0 +1,422 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** and_s16_m_tied1: -+** and z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (and_s16_m_tied1, svint16_t, -+ z0 = svand_s16_m (p0, z0, z1), -+ z0 = svand_m (p0, z0, z1)) -+ -+/* -+** and_s16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** and z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (and_s16_m_tied2, svint16_t, -+ z0 = svand_s16_m (p0, z1, z0), -+ z0 = svand_m (p0, z1, z0)) -+ -+/* -+** and_s16_m_untied: -+** movprfx z0, z1 -+** and z0\.h, p0/m, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (and_s16_m_untied, svint16_t, -+ z0 = svand_s16_m (p0, z1, z2), -+ z0 = svand_m (p0, z1, z2)) -+ -+/* -+** and_w0_s16_m_tied1: -+** mov (z[0-9]+\.h), w0 -+** and z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (and_w0_s16_m_tied1, svint16_t, int16_t, -+ z0 = svand_n_s16_m (p0, z0, x0), -+ z0 = svand_m (p0, z0, x0)) -+ -+/* -+** and_w0_s16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), w0 -+** movprfx z0, z1 -+** and z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (and_w0_s16_m_untied, svint16_t, int16_t, -+ z0 = svand_n_s16_m (p0, z1, x0), -+ z0 = svand_m (p0, z1, x0)) -+ -+/* -+** and_1_s16_m_tied1: -+** mov (z[0-9]+\.h), #1 -+** and z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_1_s16_m_tied1, svint16_t, -+ z0 = svand_n_s16_m (p0, z0, 1), -+ z0 = svand_m (p0, z0, 1)) -+ -+/* -+** and_1_s16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), #1 -+** movprfx z0, z1 -+** and z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_1_s16_m_untied, svint16_t, -+ z0 = svand_n_s16_m (p0, z1, 1), -+ z0 = svand_m (p0, z1, 1)) -+ -+/* -+** and_m2_s16_m: -+** mov (z[0-9]+\.h), #-2 -+** and z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_m2_s16_m, svint16_t, -+ z0 = svand_n_s16_m (p0, z0, -2), -+ z0 = svand_m (p0, z0, -2)) -+ -+/* -+** and_255_s16_m_tied1: -+** uxtb z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (and_255_s16_m_tied1, svint16_t, -+ z0 = svand_n_s16_m (p0, z0, 255), -+ z0 = svand_m (p0, z0, 255)) -+ -+/* -+** and_255_s16_m_untied: -+** movprfx z0, z1 -+** uxtb z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (and_255_s16_m_untied, svint16_t, -+ z0 = svand_n_s16_m (p0, z1, 255), -+ z0 = svand_m (p0, z1, 255)) -+ -+/* -+** and_s16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** and z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (and_s16_z_tied1, svint16_t, -+ z0 = svand_s16_z (p0, z0, z1), -+ z0 = svand_z (p0, z0, z1)) -+ -+/* -+** and_s16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** and z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (and_s16_z_tied2, svint16_t, -+ z0 = svand_s16_z (p0, z1, z0), -+ z0 = svand_z (p0, z1, z0)) -+ -+/* -+** and_s16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** and z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** and z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (and_s16_z_untied, svint16_t, -+ z0 = svand_s16_z (p0, z1, z2), -+ z0 = svand_z (p0, z1, z2)) -+ -+/* -+** and_w0_s16_z_tied1: -+** mov (z[0-9]+\.h), w0 -+** movprfx z0\.h, p0/z, z0\.h -+** and z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (and_w0_s16_z_tied1, svint16_t, int16_t, -+ z0 = svand_n_s16_z (p0, z0, x0), -+ z0 = svand_z (p0, z0, x0)) -+ -+/* -+** and_w0_s16_z_untied: -+** mov (z[0-9]+\.h), w0 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** and z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** and z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (and_w0_s16_z_untied, svint16_t, int16_t, -+ z0 = svand_n_s16_z (p0, z1, x0), -+ z0 = svand_z (p0, z1, x0)) -+ -+/* -+** and_1_s16_z_tied1: -+** mov (z[0-9]+\.h), #1 -+** movprfx z0\.h, p0/z, z0\.h -+** and z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_1_s16_z_tied1, svint16_t, -+ z0 = svand_n_s16_z (p0, z0, 1), -+ z0 = svand_z (p0, z0, 1)) -+ -+/* -+** and_1_s16_z_untied: -+** mov (z[0-9]+\.h), #1 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** and z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** and z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (and_1_s16_z_untied, svint16_t, -+ z0 = svand_n_s16_z (p0, z1, 1), -+ z0 = svand_z (p0, z1, 1)) -+ -+/* -+** and_255_s16_z_tied1: -+** ( -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.h, p0/z, \1\.h -+** uxtb z0\.h, p0/m, \1\.h -+** | -+** mov (z[0-9]+\.h), #255 -+** movprfx z0\.h, p0/z, z0\.h -+** and z0\.h, p0/m, z0\.h, \1 -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (and_255_s16_z_tied1, svint16_t, -+ z0 = svand_n_s16_z (p0, z0, 255), -+ z0 = svand_z (p0, z0, 255)) -+ -+/* -+** and_255_s16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** uxtb z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (and_255_s16_z_untied, svint16_t, -+ z0 = svand_n_s16_z (p0, z1, 255), -+ z0 = svand_z (p0, z1, 255)) -+ -+/* -+** and_s16_x_tied1: -+** and z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (and_s16_x_tied1, svint16_t, -+ z0 = svand_s16_x (p0, z0, z1), -+ z0 = svand_x (p0, z0, z1)) -+ -+/* -+** and_s16_x_tied2: -+** and z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (and_s16_x_tied2, svint16_t, -+ z0 = svand_s16_x (p0, z1, z0), -+ z0 = svand_x (p0, z1, z0)) -+ -+/* -+** and_s16_x_untied: -+** and z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (and_s16_x_untied, svint16_t, -+ z0 = svand_s16_x (p0, z1, z2), -+ z0 = svand_x (p0, z1, z2)) -+ -+/* -+** and_w0_s16_x_tied1: -+** mov (z[0-9]+)\.h, w0 -+** and z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (and_w0_s16_x_tied1, svint16_t, int16_t, -+ z0 = svand_n_s16_x (p0, z0, x0), -+ z0 = svand_x (p0, z0, x0)) -+ -+/* -+** and_w0_s16_x_untied: -+** mov (z[0-9]+)\.h, w0 -+** and z0\.d, (z1\.d, \1\.d|\1\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (and_w0_s16_x_untied, svint16_t, int16_t, -+ z0 = svand_n_s16_x (p0, z1, x0), -+ z0 = svand_x (p0, z1, x0)) -+ -+/* -+** and_1_s16_x_tied1: -+** and z0\.h, z0\.h, #0x1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_1_s16_x_tied1, svint16_t, -+ z0 = svand_n_s16_x (p0, z0, 1), -+ z0 = svand_x (p0, z0, 1)) -+ -+/* -+** and_1_s16_x_untied: -+** movprfx z0, z1 -+** and z0\.h, z0\.h, #0x1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_1_s16_x_untied, svint16_t, -+ z0 = svand_n_s16_x (p0, z1, 1), -+ z0 = svand_x (p0, z1, 1)) -+ -+/* -+** and_127_s16_x: -+** and z0\.h, z0\.h, #0x7f -+** ret -+*/ -+TEST_UNIFORM_Z (and_127_s16_x, svint16_t, -+ z0 = svand_n_s16_x (p0, z0, 127), -+ z0 = svand_x (p0, z0, 127)) -+ -+/* -+** and_128_s16_x: -+** and z0\.h, z0\.h, #0x80 -+** ret -+*/ -+TEST_UNIFORM_Z (and_128_s16_x, svint16_t, -+ z0 = svand_n_s16_x (p0, z0, 128), -+ z0 = svand_x (p0, z0, 128)) -+ -+/* -+** and_255_s16_x: -+** and z0\.h, z0\.h, #0xff -+** ret -+*/ -+TEST_UNIFORM_Z (and_255_s16_x, svint16_t, -+ z0 = svand_n_s16_x (p0, z0, 255), -+ z0 = svand_x (p0, z0, 255)) -+ -+/* -+** and_256_s16_x: -+** and z0\.h, z0\.h, #0x100 -+** ret -+*/ -+TEST_UNIFORM_Z (and_256_s16_x, svint16_t, -+ z0 = svand_n_s16_x (p0, z0, 256), -+ z0 = svand_x (p0, z0, 256)) -+ -+/* -+** and_257_s16_x: -+** and z0\.h, z0\.h, #0x101 -+** ret -+*/ -+TEST_UNIFORM_Z (and_257_s16_x, svint16_t, -+ z0 = svand_n_s16_x (p0, z0, 257), -+ z0 = svand_x (p0, z0, 257)) -+ -+/* -+** and_512_s16_x: -+** and z0\.h, z0\.h, #0x200 -+** ret -+*/ -+TEST_UNIFORM_Z (and_512_s16_x, svint16_t, -+ z0 = svand_n_s16_x (p0, z0, 512), -+ z0 = svand_x (p0, z0, 512)) -+ -+/* -+** and_65280_s16_x: -+** and z0\.h, z0\.h, #0xff00 -+** ret -+*/ -+TEST_UNIFORM_Z (and_65280_s16_x, svint16_t, -+ z0 = svand_n_s16_x (p0, z0, 0xff00), -+ z0 = svand_x (p0, z0, 0xff00)) -+ -+/* -+** and_m127_s16_x: -+** and z0\.h, z0\.h, #0xff81 -+** ret -+*/ -+TEST_UNIFORM_Z (and_m127_s16_x, svint16_t, -+ z0 = svand_n_s16_x (p0, z0, -127), -+ z0 = svand_x (p0, z0, -127)) -+ -+/* -+** and_m128_s16_x: -+** and z0\.h, z0\.h, #0xff80 -+** ret -+*/ -+TEST_UNIFORM_Z (and_m128_s16_x, svint16_t, -+ z0 = svand_n_s16_x (p0, z0, -128), -+ z0 = svand_x (p0, z0, -128)) -+ -+/* -+** and_m255_s16_x: -+** and z0\.h, z0\.h, #0xff01 -+** ret -+*/ -+TEST_UNIFORM_Z (and_m255_s16_x, svint16_t, -+ z0 = svand_n_s16_x (p0, z0, -255), -+ z0 = svand_x (p0, z0, -255)) -+ -+/* -+** and_m256_s16_x: -+** and z0\.h, z0\.h, #0xff00 -+** ret -+*/ -+TEST_UNIFORM_Z (and_m256_s16_x, svint16_t, -+ z0 = svand_n_s16_x (p0, z0, -256), -+ z0 = svand_x (p0, z0, -256)) -+ -+/* -+** and_m257_s16_x: -+** and z0\.h, z0\.h, #0xfeff -+** ret -+*/ -+TEST_UNIFORM_Z (and_m257_s16_x, svint16_t, -+ z0 = svand_n_s16_x (p0, z0, -257), -+ z0 = svand_x (p0, z0, -257)) -+ -+/* -+** and_m512_s16_x: -+** and z0\.h, z0\.h, #0xfe00 -+** ret -+*/ -+TEST_UNIFORM_Z (and_m512_s16_x, svint16_t, -+ z0 = svand_n_s16_x (p0, z0, -512), -+ z0 = svand_x (p0, z0, -512)) -+ -+/* -+** and_m32768_s16_x: -+** and z0\.h, z0\.h, #0x8000 -+** ret -+*/ -+TEST_UNIFORM_Z (and_m32768_s16_x, svint16_t, -+ z0 = svand_n_s16_x (p0, z0, -0x8000), -+ z0 = svand_x (p0, z0, -0x8000)) -+ -+/* -+** and_5_s16_x: -+** mov (z[0-9]+)\.h, #5 -+** and z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (and_5_s16_x, svint16_t, -+ z0 = svand_n_s16_x (p0, z0, 5), -+ z0 = svand_x (p0, z0, 5)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_s32.c -new file mode 100644 -index 000000000..7f4082b32 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_s32.c -@@ -0,0 +1,464 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** and_s32_m_tied1: -+** and z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (and_s32_m_tied1, svint32_t, -+ z0 = svand_s32_m (p0, z0, z1), -+ z0 = svand_m (p0, z0, z1)) -+ -+/* -+** and_s32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** and z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (and_s32_m_tied2, svint32_t, -+ z0 = svand_s32_m (p0, z1, z0), -+ z0 = svand_m (p0, z1, z0)) -+ -+/* -+** and_s32_m_untied: -+** movprfx z0, z1 -+** and z0\.s, p0/m, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (and_s32_m_untied, svint32_t, -+ z0 = svand_s32_m (p0, z1, z2), -+ z0 = svand_m (p0, z1, z2)) -+ -+/* -+** and_w0_s32_m_tied1: -+** mov (z[0-9]+\.s), w0 -+** and z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (and_w0_s32_m_tied1, svint32_t, int32_t, -+ z0 = svand_n_s32_m (p0, z0, x0), -+ z0 = svand_m (p0, z0, x0)) -+ -+/* -+** and_w0_s32_m_untied: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0, z1 -+** and z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (and_w0_s32_m_untied, svint32_t, int32_t, -+ z0 = svand_n_s32_m (p0, z1, x0), -+ z0 = svand_m (p0, z1, x0)) -+ -+/* -+** and_1_s32_m_tied1: -+** mov (z[0-9]+\.s), #1 -+** and z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_1_s32_m_tied1, svint32_t, -+ z0 = svand_n_s32_m (p0, z0, 1), -+ z0 = svand_m (p0, z0, 1)) -+ -+/* -+** and_1_s32_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.s), #1 -+** movprfx z0, z1 -+** and z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_1_s32_m_untied, svint32_t, -+ z0 = svand_n_s32_m (p0, z1, 1), -+ z0 = svand_m (p0, z1, 1)) -+ -+/* -+** and_m2_s32_m: -+** mov (z[0-9]+\.s), #-2 -+** and z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_m2_s32_m, svint32_t, -+ z0 = svand_n_s32_m (p0, z0, -2), -+ z0 = svand_m (p0, z0, -2)) -+ -+/* -+** and_255_s32_m_tied1: -+** uxtb z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (and_255_s32_m_tied1, svint32_t, -+ z0 = svand_n_s32_m (p0, z0, 255), -+ z0 = svand_m (p0, z0, 255)) -+ -+/* -+** and_255_s32_m_untied: -+** movprfx z0, z1 -+** uxtb z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (and_255_s32_m_untied, svint32_t, -+ z0 = svand_n_s32_m (p0, z1, 255), -+ z0 = svand_m (p0, z1, 255)) -+ -+/* -+** and_65535_s32_m_tied1: -+** uxth z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (and_65535_s32_m_tied1, svint32_t, -+ z0 = svand_n_s32_m (p0, z0, 65535), -+ z0 = svand_m (p0, z0, 65535)) -+ -+/* -+** and_65535_s32_m_untied: -+** movprfx z0, z1 -+** uxth z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (and_65535_s32_m_untied, svint32_t, -+ z0 = svand_n_s32_m (p0, z1, 65535), -+ z0 = svand_m (p0, z1, 65535)) -+ -+/* -+** and_s32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** and z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (and_s32_z_tied1, svint32_t, -+ z0 = svand_s32_z (p0, z0, z1), -+ z0 = svand_z (p0, z0, z1)) -+ -+/* -+** and_s32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** and z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (and_s32_z_tied2, svint32_t, -+ z0 = svand_s32_z (p0, z1, z0), -+ z0 = svand_z (p0, z1, z0)) -+ -+/* -+** and_s32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** and z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** and z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (and_s32_z_untied, svint32_t, -+ z0 = svand_s32_z (p0, z1, z2), -+ z0 = svand_z (p0, z1, z2)) -+ -+/* -+** and_w0_s32_z_tied1: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** and z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (and_w0_s32_z_tied1, svint32_t, int32_t, -+ z0 = svand_n_s32_z (p0, z0, x0), -+ z0 = svand_z (p0, z0, x0)) -+ -+/* -+** and_w0_s32_z_untied: -+** mov (z[0-9]+\.s), w0 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** and z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** and z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (and_w0_s32_z_untied, svint32_t, int32_t, -+ z0 = svand_n_s32_z (p0, z1, x0), -+ z0 = svand_z (p0, z1, x0)) -+ -+/* -+** and_1_s32_z_tied1: -+** mov (z[0-9]+\.s), #1 -+** movprfx z0\.s, p0/z, z0\.s -+** and z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_1_s32_z_tied1, svint32_t, -+ z0 = svand_n_s32_z (p0, z0, 1), -+ z0 = svand_z (p0, z0, 1)) -+ -+/* -+** and_1_s32_z_untied: -+** mov (z[0-9]+\.s), #1 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** and z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** and z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (and_1_s32_z_untied, svint32_t, -+ z0 = svand_n_s32_z (p0, z1, 1), -+ z0 = svand_z (p0, z1, 1)) -+ -+/* -+** and_255_s32_z_tied1: -+** ( -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, \1\.s -+** uxtb z0\.s, p0/m, \1\.s -+** | -+** mov (z[0-9]+\.s), #255 -+** movprfx z0\.s, p0/z, z0\.s -+** and z0\.s, p0/m, z0\.s, \1 -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (and_255_s32_z_tied1, svint32_t, -+ z0 = svand_n_s32_z (p0, z0, 255), -+ z0 = svand_z (p0, z0, 255)) -+ -+/* -+** and_255_s32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** uxtb z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (and_255_s32_z_untied, svint32_t, -+ z0 = svand_n_s32_z (p0, z1, 255), -+ z0 = svand_z (p0, z1, 255)) -+ -+/* -+** and_65535_s32_z_tied1: -+** ( -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, \1\.s -+** uxth z0\.s, p0/m, \1\.s -+** | -+** mov (z[0-9]+\.s), #65535 -+** movprfx z0\.s, p0/z, z0\.s -+** and z0\.s, p0/m, z0\.s, \1 -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (and_65535_s32_z_tied1, svint32_t, -+ z0 = svand_n_s32_z (p0, z0, 65535), -+ z0 = svand_z (p0, z0, 65535)) -+ -+/* -+** and_65535_s32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** uxth z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (and_65535_s32_z_untied, svint32_t, -+ z0 = svand_n_s32_z (p0, z1, 65535), -+ z0 = svand_z (p0, z1, 65535)) -+ -+/* -+** and_s32_x_tied1: -+** and z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (and_s32_x_tied1, svint32_t, -+ z0 = svand_s32_x (p0, z0, z1), -+ z0 = svand_x (p0, z0, z1)) -+ -+/* -+** and_s32_x_tied2: -+** and z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (and_s32_x_tied2, svint32_t, -+ z0 = svand_s32_x (p0, z1, z0), -+ z0 = svand_x (p0, z1, z0)) -+ -+/* -+** and_s32_x_untied: -+** and z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (and_s32_x_untied, svint32_t, -+ z0 = svand_s32_x (p0, z1, z2), -+ z0 = svand_x (p0, z1, z2)) -+ -+/* -+** and_w0_s32_x_tied1: -+** mov (z[0-9]+)\.s, w0 -+** and z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (and_w0_s32_x_tied1, svint32_t, int32_t, -+ z0 = svand_n_s32_x (p0, z0, x0), -+ z0 = svand_x (p0, z0, x0)) -+ -+/* -+** and_w0_s32_x_untied: -+** mov (z[0-9]+)\.s, w0 -+** and z0\.d, (z1\.d, \1\.d|\1\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (and_w0_s32_x_untied, svint32_t, int32_t, -+ z0 = svand_n_s32_x (p0, z1, x0), -+ z0 = svand_x (p0, z1, x0)) -+ -+/* -+** and_1_s32_x_tied1: -+** and z0\.s, z0\.s, #0x1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_1_s32_x_tied1, svint32_t, -+ z0 = svand_n_s32_x (p0, z0, 1), -+ z0 = svand_x (p0, z0, 1)) -+ -+/* -+** and_1_s32_x_untied: -+** movprfx z0, z1 -+** and z0\.s, z0\.s, #0x1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_1_s32_x_untied, svint32_t, -+ z0 = svand_n_s32_x (p0, z1, 1), -+ z0 = svand_x (p0, z1, 1)) -+ -+/* -+** and_127_s32_x: -+** and z0\.s, z0\.s, #0x7f -+** ret -+*/ -+TEST_UNIFORM_Z (and_127_s32_x, svint32_t, -+ z0 = svand_n_s32_x (p0, z0, 127), -+ z0 = svand_x (p0, z0, 127)) -+ -+/* -+** and_128_s32_x: -+** and z0\.s, z0\.s, #0x80 -+** ret -+*/ -+TEST_UNIFORM_Z (and_128_s32_x, svint32_t, -+ z0 = svand_n_s32_x (p0, z0, 128), -+ z0 = svand_x (p0, z0, 128)) -+ -+/* -+** and_255_s32_x: -+** and z0\.s, z0\.s, #0xff -+** ret -+*/ -+TEST_UNIFORM_Z (and_255_s32_x, svint32_t, -+ z0 = svand_n_s32_x (p0, z0, 255), -+ z0 = svand_x (p0, z0, 255)) -+ -+/* -+** and_256_s32_x: -+** and z0\.s, z0\.s, #0x100 -+** ret -+*/ -+TEST_UNIFORM_Z (and_256_s32_x, svint32_t, -+ z0 = svand_n_s32_x (p0, z0, 256), -+ z0 = svand_x (p0, z0, 256)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (and_257_s32_x, svint32_t, -+ z0 = svand_n_s32_x (p0, z0, 257), -+ z0 = svand_x (p0, z0, 257)) -+ -+/* -+** and_512_s32_x: -+** and z0\.s, z0\.s, #0x200 -+** ret -+*/ -+TEST_UNIFORM_Z (and_512_s32_x, svint32_t, -+ z0 = svand_n_s32_x (p0, z0, 512), -+ z0 = svand_x (p0, z0, 512)) -+ -+/* -+** and_65280_s32_x: -+** and z0\.s, z0\.s, #0xff00 -+** ret -+*/ -+TEST_UNIFORM_Z (and_65280_s32_x, svint32_t, -+ z0 = svand_n_s32_x (p0, z0, 0xff00), -+ z0 = svand_x (p0, z0, 0xff00)) -+ -+/* -+** and_m127_s32_x: -+** and z0\.s, z0\.s, #0xffffff81 -+** ret -+*/ -+TEST_UNIFORM_Z (and_m127_s32_x, svint32_t, -+ z0 = svand_n_s32_x (p0, z0, -127), -+ z0 = svand_x (p0, z0, -127)) -+ -+/* -+** and_m128_s32_x: -+** and z0\.s, z0\.s, #0xffffff80 -+** ret -+*/ -+TEST_UNIFORM_Z (and_m128_s32_x, svint32_t, -+ z0 = svand_n_s32_x (p0, z0, -128), -+ z0 = svand_x (p0, z0, -128)) -+ -+/* -+** and_m255_s32_x: -+** and z0\.s, z0\.s, #0xffffff01 -+** ret -+*/ -+TEST_UNIFORM_Z (and_m255_s32_x, svint32_t, -+ z0 = svand_n_s32_x (p0, z0, -255), -+ z0 = svand_x (p0, z0, -255)) -+ -+/* -+** and_m256_s32_x: -+** and z0\.s, z0\.s, #0xffffff00 -+** ret -+*/ -+TEST_UNIFORM_Z (and_m256_s32_x, svint32_t, -+ z0 = svand_n_s32_x (p0, z0, -256), -+ z0 = svand_x (p0, z0, -256)) -+ -+/* -+** and_m257_s32_x: -+** and z0\.s, z0\.s, #0xfffffeff -+** ret -+*/ -+TEST_UNIFORM_Z (and_m257_s32_x, svint32_t, -+ z0 = svand_n_s32_x (p0, z0, -257), -+ z0 = svand_x (p0, z0, -257)) -+ -+/* -+** and_m512_s32_x: -+** and z0\.s, z0\.s, #0xfffffe00 -+** ret -+*/ -+TEST_UNIFORM_Z (and_m512_s32_x, svint32_t, -+ z0 = svand_n_s32_x (p0, z0, -512), -+ z0 = svand_x (p0, z0, -512)) -+ -+/* -+** and_m32768_s32_x: -+** and z0\.s, z0\.s, #0xffff8000 -+** ret -+*/ -+TEST_UNIFORM_Z (and_m32768_s32_x, svint32_t, -+ z0 = svand_n_s32_x (p0, z0, -0x8000), -+ z0 = svand_x (p0, z0, -0x8000)) -+ -+/* -+** and_5_s32_x: -+** mov (z[0-9]+)\.s, #5 -+** and z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (and_5_s32_x, svint32_t, -+ z0 = svand_n_s32_x (p0, z0, 5), -+ z0 = svand_x (p0, z0, 5)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_s64.c -new file mode 100644 -index 000000000..8868258dc ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_s64.c -@@ -0,0 +1,510 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** and_s64_m_tied1: -+** and z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (and_s64_m_tied1, svint64_t, -+ z0 = svand_s64_m (p0, z0, z1), -+ z0 = svand_m (p0, z0, z1)) -+ -+/* -+** and_s64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** and z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_s64_m_tied2, svint64_t, -+ z0 = svand_s64_m (p0, z1, z0), -+ z0 = svand_m (p0, z1, z0)) -+ -+/* -+** and_s64_m_untied: -+** movprfx z0, z1 -+** and z0\.d, p0/m, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (and_s64_m_untied, svint64_t, -+ z0 = svand_s64_m (p0, z1, z2), -+ z0 = svand_m (p0, z1, z2)) -+ -+/* -+** and_x0_s64_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** and z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (and_x0_s64_m_tied1, svint64_t, int64_t, -+ z0 = svand_n_s64_m (p0, z0, x0), -+ z0 = svand_m (p0, z0, x0)) -+ -+/* -+** and_x0_s64_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** and z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (and_x0_s64_m_untied, svint64_t, int64_t, -+ z0 = svand_n_s64_m (p0, z1, x0), -+ z0 = svand_m (p0, z1, x0)) -+ -+/* -+** and_1_s64_m_tied1: -+** mov (z[0-9]+\.d), #1 -+** and z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_1_s64_m_tied1, svint64_t, -+ z0 = svand_n_s64_m (p0, z0, 1), -+ z0 = svand_m (p0, z0, 1)) -+ -+/* -+** and_1_s64_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #1 -+** movprfx z0, z1 -+** and z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_1_s64_m_untied, svint64_t, -+ z0 = svand_n_s64_m (p0, z1, 1), -+ z0 = svand_m (p0, z1, 1)) -+ -+/* -+** and_m2_s64_m: -+** mov (z[0-9]+\.d), #-2 -+** and z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_m2_s64_m, svint64_t, -+ z0 = svand_n_s64_m (p0, z0, -2), -+ z0 = svand_m (p0, z0, -2)) -+ -+/* -+** and_255_s64_m_tied1: -+** uxtb z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (and_255_s64_m_tied1, svint64_t, -+ z0 = svand_n_s64_m (p0, z0, 255), -+ z0 = svand_m (p0, z0, 255)) -+ -+/* -+** and_255_s64_m_untied: -+** movprfx z0, z1 -+** uxtb z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (and_255_s64_m_untied, svint64_t, -+ z0 = svand_n_s64_m (p0, z1, 255), -+ z0 = svand_m (p0, z1, 255)) -+ -+/* -+** and_65535_s64_m_tied1: -+** uxth z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (and_65535_s64_m_tied1, svint64_t, -+ z0 = svand_n_s64_m (p0, z0, 65535), -+ z0 = svand_m (p0, z0, 65535)) -+ -+/* -+** and_65535_s64_m_untied: -+** movprfx z0, z1 -+** uxth z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (and_65535_s64_m_untied, svint64_t, -+ z0 = svand_n_s64_m (p0, z1, 65535), -+ z0 = svand_m (p0, z1, 65535)) -+ -+/* -+** and_0xffffffff_s64_m_tied1: -+** uxtw z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (and_0xffffffff_s64_m_tied1, svint64_t, -+ z0 = svand_n_s64_m (p0, z0, 0xffffffff), -+ z0 = svand_m (p0, z0, 0xffffffff)) -+ -+/* -+** and_0xffffffff_s64_m_untied: -+** movprfx z0, z1 -+** uxtw z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (and_0xffffffff_s64_m_untied, svint64_t, -+ z0 = svand_n_s64_m (p0, z1, 0xffffffff), -+ z0 = svand_m (p0, z1, 0xffffffff)) -+ -+/* -+** and_s64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** and z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (and_s64_z_tied1, svint64_t, -+ z0 = svand_s64_z (p0, z0, z1), -+ z0 = svand_z (p0, z0, z1)) -+ -+/* -+** and_s64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** and z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (and_s64_z_tied2, svint64_t, -+ z0 = svand_s64_z (p0, z1, z0), -+ z0 = svand_z (p0, z1, z0)) -+ -+/* -+** and_s64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** and z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** and z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (and_s64_z_untied, svint64_t, -+ z0 = svand_s64_z (p0, z1, z2), -+ z0 = svand_z (p0, z1, z2)) -+ -+/* -+** and_x0_s64_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** and z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (and_x0_s64_z_tied1, svint64_t, int64_t, -+ z0 = svand_n_s64_z (p0, z0, x0), -+ z0 = svand_z (p0, z0, x0)) -+ -+/* -+** and_x0_s64_z_untied: -+** mov (z[0-9]+\.d), x0 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** and z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** and z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (and_x0_s64_z_untied, svint64_t, int64_t, -+ z0 = svand_n_s64_z (p0, z1, x0), -+ z0 = svand_z (p0, z1, x0)) -+ -+/* -+** and_1_s64_z_tied1: -+** mov (z[0-9]+\.d), #1 -+** movprfx z0\.d, p0/z, z0\.d -+** and z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_1_s64_z_tied1, svint64_t, -+ z0 = svand_n_s64_z (p0, z0, 1), -+ z0 = svand_z (p0, z0, 1)) -+ -+/* -+** and_1_s64_z_untied: -+** mov (z[0-9]+\.d), #1 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** and z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** and z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (and_1_s64_z_untied, svint64_t, -+ z0 = svand_n_s64_z (p0, z1, 1), -+ z0 = svand_z (p0, z1, 1)) -+ -+/* -+** and_255_s64_z_tied1: -+** ( -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** uxtb z0\.d, p0/m, \1 -+** | -+** mov (z[0-9]+\.d), #255 -+** movprfx z0\.d, p0/z, z0\.d -+** and z0\.d, p0/m, z0\.d, \1 -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (and_255_s64_z_tied1, svint64_t, -+ z0 = svand_n_s64_z (p0, z0, 255), -+ z0 = svand_z (p0, z0, 255)) -+ -+/* -+** and_255_s64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** uxtb z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (and_255_s64_z_untied, svint64_t, -+ z0 = svand_n_s64_z (p0, z1, 255), -+ z0 = svand_z (p0, z1, 255)) -+ -+/* -+** and_65535_s64_z_tied1: -+** ( -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** uxth z0\.d, p0/m, \1 -+** | -+** mov (z[0-9]+\.d), #65535 -+** movprfx z0\.d, p0/z, z0\.d -+** and z0\.d, p0/m, z0\.d, \1 -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (and_65535_s64_z_tied1, svint64_t, -+ z0 = svand_n_s64_z (p0, z0, 65535), -+ z0 = svand_z (p0, z0, 65535)) -+ -+/* -+** and_65535_s64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** uxth z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (and_65535_s64_z_untied, svint64_t, -+ z0 = svand_n_s64_z (p0, z1, 65535), -+ z0 = svand_z (p0, z1, 65535)) -+ -+/* -+** and_0xffffffff_s64_z_tied1: -+** ( -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** uxtw z0\.d, p0/m, \1 -+** | -+** mov (z[0-9]+\.d), #4294967295 -+** movprfx z0\.d, p0/z, z0\.d -+** and z0\.d, p0/m, z0\.d, \1 -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (and_0xffffffff_s64_z_tied1, svint64_t, -+ z0 = svand_n_s64_z (p0, z0, 0xffffffff), -+ z0 = svand_z (p0, z0, 0xffffffff)) -+ -+/* -+** and_0xffffffff_s64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** uxtw z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (and_0xffffffff_s64_z_untied, svint64_t, -+ z0 = svand_n_s64_z (p0, z1, 0xffffffff), -+ z0 = svand_z (p0, z1, 0xffffffff)) -+ -+/* -+** and_s64_x_tied1: -+** and z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (and_s64_x_tied1, svint64_t, -+ z0 = svand_s64_x (p0, z0, z1), -+ z0 = svand_x (p0, z0, z1)) -+ -+/* -+** and_s64_x_tied2: -+** and z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (and_s64_x_tied2, svint64_t, -+ z0 = svand_s64_x (p0, z1, z0), -+ z0 = svand_x (p0, z1, z0)) -+ -+/* -+** and_s64_x_untied: -+** and z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (and_s64_x_untied, svint64_t, -+ z0 = svand_s64_x (p0, z1, z2), -+ z0 = svand_x (p0, z1, z2)) -+ -+/* -+** and_x0_s64_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** and z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (and_x0_s64_x_tied1, svint64_t, int64_t, -+ z0 = svand_n_s64_x (p0, z0, x0), -+ z0 = svand_x (p0, z0, x0)) -+ -+/* -+** and_x0_s64_x_untied: -+** mov (z[0-9]+\.d), x0 -+** and z0\.d, (z1\.d, \1|\1, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (and_x0_s64_x_untied, svint64_t, int64_t, -+ z0 = svand_n_s64_x (p0, z1, x0), -+ z0 = svand_x (p0, z1, x0)) -+ -+/* -+** and_1_s64_x_tied1: -+** and z0\.d, z0\.d, #0x1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_1_s64_x_tied1, svint64_t, -+ z0 = svand_n_s64_x (p0, z0, 1), -+ z0 = svand_x (p0, z0, 1)) -+ -+/* -+** and_1_s64_x_untied: -+** movprfx z0, z1 -+** and z0\.d, z0\.d, #0x1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_1_s64_x_untied, svint64_t, -+ z0 = svand_n_s64_x (p0, z1, 1), -+ z0 = svand_x (p0, z1, 1)) -+ -+/* -+** and_127_s64_x: -+** and z0\.d, z0\.d, #0x7f -+** ret -+*/ -+TEST_UNIFORM_Z (and_127_s64_x, svint64_t, -+ z0 = svand_n_s64_x (p0, z0, 127), -+ z0 = svand_x (p0, z0, 127)) -+ -+/* -+** and_128_s64_x: -+** and z0\.d, z0\.d, #0x80 -+** ret -+*/ -+TEST_UNIFORM_Z (and_128_s64_x, svint64_t, -+ z0 = svand_n_s64_x (p0, z0, 128), -+ z0 = svand_x (p0, z0, 128)) -+ -+/* -+** and_255_s64_x: -+** and z0\.d, z0\.d, #0xff -+** ret -+*/ -+TEST_UNIFORM_Z (and_255_s64_x, svint64_t, -+ z0 = svand_n_s64_x (p0, z0, 255), -+ z0 = svand_x (p0, z0, 255)) -+ -+/* -+** and_256_s64_x: -+** and z0\.d, z0\.d, #0x100 -+** ret -+*/ -+TEST_UNIFORM_Z (and_256_s64_x, svint64_t, -+ z0 = svand_n_s64_x (p0, z0, 256), -+ z0 = svand_x (p0, z0, 256)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (and_257_s64_x, svint64_t, -+ z0 = svand_n_s64_x (p0, z0, 257), -+ z0 = svand_x (p0, z0, 257)) -+ -+/* -+** and_512_s64_x: -+** and z0\.d, z0\.d, #0x200 -+** ret -+*/ -+TEST_UNIFORM_Z (and_512_s64_x, svint64_t, -+ z0 = svand_n_s64_x (p0, z0, 512), -+ z0 = svand_x (p0, z0, 512)) -+ -+/* -+** and_65280_s64_x: -+** and z0\.d, z0\.d, #0xff00 -+** ret -+*/ -+TEST_UNIFORM_Z (and_65280_s64_x, svint64_t, -+ z0 = svand_n_s64_x (p0, z0, 0xff00), -+ z0 = svand_x (p0, z0, 0xff00)) -+ -+/* -+** and_m127_s64_x: -+** and z0\.d, z0\.d, #0xffffffffffffff81 -+** ret -+*/ -+TEST_UNIFORM_Z (and_m127_s64_x, svint64_t, -+ z0 = svand_n_s64_x (p0, z0, -127), -+ z0 = svand_x (p0, z0, -127)) -+ -+/* -+** and_m128_s64_x: -+** and z0\.d, z0\.d, #0xffffffffffffff80 -+** ret -+*/ -+TEST_UNIFORM_Z (and_m128_s64_x, svint64_t, -+ z0 = svand_n_s64_x (p0, z0, -128), -+ z0 = svand_x (p0, z0, -128)) -+ -+/* -+** and_m255_s64_x: -+** and z0\.d, z0\.d, #0xffffffffffffff01 -+** ret -+*/ -+TEST_UNIFORM_Z (and_m255_s64_x, svint64_t, -+ z0 = svand_n_s64_x (p0, z0, -255), -+ z0 = svand_x (p0, z0, -255)) -+ -+/* -+** and_m256_s64_x: -+** and z0\.d, z0\.d, #0xffffffffffffff00 -+** ret -+*/ -+TEST_UNIFORM_Z (and_m256_s64_x, svint64_t, -+ z0 = svand_n_s64_x (p0, z0, -256), -+ z0 = svand_x (p0, z0, -256)) -+ -+/* -+** and_m257_s64_x: -+** and z0\.d, z0\.d, #0xfffffffffffffeff -+** ret -+*/ -+TEST_UNIFORM_Z (and_m257_s64_x, svint64_t, -+ z0 = svand_n_s64_x (p0, z0, -257), -+ z0 = svand_x (p0, z0, -257)) -+ -+/* -+** and_m512_s64_x: -+** and z0\.d, z0\.d, #0xfffffffffffffe00 -+** ret -+*/ -+TEST_UNIFORM_Z (and_m512_s64_x, svint64_t, -+ z0 = svand_n_s64_x (p0, z0, -512), -+ z0 = svand_x (p0, z0, -512)) -+ -+/* -+** and_m32768_s64_x: -+** and z0\.d, z0\.d, #0xffffffffffff8000 -+** ret -+*/ -+TEST_UNIFORM_Z (and_m32768_s64_x, svint64_t, -+ z0 = svand_n_s64_x (p0, z0, -0x8000), -+ z0 = svand_x (p0, z0, -0x8000)) -+ -+/* -+** and_5_s64_x: -+** mov (z[0-9]+\.d), #5 -+** and z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (and_5_s64_x, svint64_t, -+ z0 = svand_n_s64_x (p0, z0, 5), -+ z0 = svand_x (p0, z0, 5)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_s8.c -new file mode 100644 -index 000000000..61d168d3f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_s8.c -@@ -0,0 +1,294 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** and_s8_m_tied1: -+** and z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (and_s8_m_tied1, svint8_t, -+ z0 = svand_s8_m (p0, z0, z1), -+ z0 = svand_m (p0, z0, z1)) -+ -+/* -+** and_s8_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** and z0\.b, p0/m, z0\.b, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (and_s8_m_tied2, svint8_t, -+ z0 = svand_s8_m (p0, z1, z0), -+ z0 = svand_m (p0, z1, z0)) -+ -+/* -+** and_s8_m_untied: -+** movprfx z0, z1 -+** and z0\.b, p0/m, z0\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (and_s8_m_untied, svint8_t, -+ z0 = svand_s8_m (p0, z1, z2), -+ z0 = svand_m (p0, z1, z2)) -+ -+/* -+** and_w0_s8_m_tied1: -+** mov (z[0-9]+\.b), w0 -+** and z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (and_w0_s8_m_tied1, svint8_t, int8_t, -+ z0 = svand_n_s8_m (p0, z0, x0), -+ z0 = svand_m (p0, z0, x0)) -+ -+/* -+** and_w0_s8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), w0 -+** movprfx z0, z1 -+** and z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (and_w0_s8_m_untied, svint8_t, int8_t, -+ z0 = svand_n_s8_m (p0, z1, x0), -+ z0 = svand_m (p0, z1, x0)) -+ -+/* -+** and_1_s8_m_tied1: -+** mov (z[0-9]+\.b), #1 -+** and z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_1_s8_m_tied1, svint8_t, -+ z0 = svand_n_s8_m (p0, z0, 1), -+ z0 = svand_m (p0, z0, 1)) -+ -+/* -+** and_1_s8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), #1 -+** movprfx z0, z1 -+** and z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_1_s8_m_untied, svint8_t, -+ z0 = svand_n_s8_m (p0, z1, 1), -+ z0 = svand_m (p0, z1, 1)) -+ -+/* -+** and_m2_s8_m: -+** mov (z[0-9]+\.b), #-2 -+** and z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_m2_s8_m, svint8_t, -+ z0 = svand_n_s8_m (p0, z0, -2), -+ z0 = svand_m (p0, z0, -2)) -+ -+/* -+** and_s8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** and z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (and_s8_z_tied1, svint8_t, -+ z0 = svand_s8_z (p0, z0, z1), -+ z0 = svand_z (p0, z0, z1)) -+ -+/* -+** and_s8_z_tied2: -+** movprfx z0\.b, p0/z, z0\.b -+** and z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (and_s8_z_tied2, svint8_t, -+ z0 = svand_s8_z (p0, z1, z0), -+ z0 = svand_z (p0, z1, z0)) -+ -+/* -+** and_s8_z_untied: -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** and z0\.b, p0/m, z0\.b, z2\.b -+** | -+** movprfx z0\.b, p0/z, z2\.b -+** and z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (and_s8_z_untied, svint8_t, -+ z0 = svand_s8_z (p0, z1, z2), -+ z0 = svand_z (p0, z1, z2)) -+ -+/* -+** and_w0_s8_z_tied1: -+** mov (z[0-9]+\.b), w0 -+** movprfx z0\.b, p0/z, z0\.b -+** and z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (and_w0_s8_z_tied1, svint8_t, int8_t, -+ z0 = svand_n_s8_z (p0, z0, x0), -+ z0 = svand_z (p0, z0, x0)) -+ -+/* -+** and_w0_s8_z_untied: -+** mov (z[0-9]+\.b), w0 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** and z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** and z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (and_w0_s8_z_untied, svint8_t, int8_t, -+ z0 = svand_n_s8_z (p0, z1, x0), -+ z0 = svand_z (p0, z1, x0)) -+ -+/* -+** and_1_s8_z_tied1: -+** mov (z[0-9]+\.b), #1 -+** movprfx z0\.b, p0/z, z0\.b -+** and z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_1_s8_z_tied1, svint8_t, -+ z0 = svand_n_s8_z (p0, z0, 1), -+ z0 = svand_z (p0, z0, 1)) -+ -+/* -+** and_1_s8_z_untied: -+** mov (z[0-9]+\.b), #1 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** and z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** and z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (and_1_s8_z_untied, svint8_t, -+ z0 = svand_n_s8_z (p0, z1, 1), -+ z0 = svand_z (p0, z1, 1)) -+ -+/* -+** and_s8_x_tied1: -+** and z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (and_s8_x_tied1, svint8_t, -+ z0 = svand_s8_x (p0, z0, z1), -+ z0 = svand_x (p0, z0, z1)) -+ -+/* -+** and_s8_x_tied2: -+** and z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (and_s8_x_tied2, svint8_t, -+ z0 = svand_s8_x (p0, z1, z0), -+ z0 = svand_x (p0, z1, z0)) -+ -+/* -+** and_s8_x_untied: -+** and z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (and_s8_x_untied, svint8_t, -+ z0 = svand_s8_x (p0, z1, z2), -+ z0 = svand_x (p0, z1, z2)) -+ -+/* -+** and_w0_s8_x_tied1: -+** mov (z[0-9]+)\.b, w0 -+** and z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (and_w0_s8_x_tied1, svint8_t, int8_t, -+ z0 = svand_n_s8_x (p0, z0, x0), -+ z0 = svand_x (p0, z0, x0)) -+ -+/* -+** and_w0_s8_x_untied: -+** mov (z[0-9]+)\.b, w0 -+** and z0\.d, (z1\.d, \1\.d|\1\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (and_w0_s8_x_untied, svint8_t, int8_t, -+ z0 = svand_n_s8_x (p0, z1, x0), -+ z0 = svand_x (p0, z1, x0)) -+ -+/* -+** and_1_s8_x_tied1: -+** and z0\.b, z0\.b, #0x1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_1_s8_x_tied1, svint8_t, -+ z0 = svand_n_s8_x (p0, z0, 1), -+ z0 = svand_x (p0, z0, 1)) -+ -+/* -+** and_1_s8_x_untied: -+** movprfx z0, z1 -+** and z0\.b, z0\.b, #0x1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_1_s8_x_untied, svint8_t, -+ z0 = svand_n_s8_x (p0, z1, 1), -+ z0 = svand_x (p0, z1, 1)) -+ -+/* -+** and_127_s8_x: -+** and z0\.b, z0\.b, #0x7f -+** ret -+*/ -+TEST_UNIFORM_Z (and_127_s8_x, svint8_t, -+ z0 = svand_n_s8_x (p0, z0, 127), -+ z0 = svand_x (p0, z0, 127)) -+ -+/* -+** and_128_s8_x: -+** and z0\.b, z0\.b, #0x80 -+** ret -+*/ -+TEST_UNIFORM_Z (and_128_s8_x, svint8_t, -+ z0 = svand_n_s8_x (p0, z0, 128), -+ z0 = svand_x (p0, z0, 128)) -+ -+/* -+** and_255_s8_x: -+** ret -+*/ -+TEST_UNIFORM_Z (and_255_s8_x, svint8_t, -+ z0 = svand_n_s8_x (p0, z0, 255), -+ z0 = svand_x (p0, z0, 255)) -+ -+/* -+** and_m127_s8_x: -+** and z0\.b, z0\.b, #0x81 -+** ret -+*/ -+TEST_UNIFORM_Z (and_m127_s8_x, svint8_t, -+ z0 = svand_n_s8_x (p0, z0, -127), -+ z0 = svand_x (p0, z0, -127)) -+ -+/* -+** and_m128_s8_x: -+** and z0\.b, z0\.b, #0x80 -+** ret -+*/ -+TEST_UNIFORM_Z (and_m128_s8_x, svint8_t, -+ z0 = svand_n_s8_x (p0, z0, -128), -+ z0 = svand_x (p0, z0, -128)) -+ -+/* -+** and_5_s8_x: -+** mov (z[0-9]+)\.b, #5 -+** and z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (and_5_s8_x, svint8_t, -+ z0 = svand_n_s8_x (p0, z0, 5), -+ z0 = svand_x (p0, z0, 5)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_u16.c -new file mode 100644 -index 000000000..875a08d71 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_u16.c -@@ -0,0 +1,422 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** and_u16_m_tied1: -+** and z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (and_u16_m_tied1, svuint16_t, -+ z0 = svand_u16_m (p0, z0, z1), -+ z0 = svand_m (p0, z0, z1)) -+ -+/* -+** and_u16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** and z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (and_u16_m_tied2, svuint16_t, -+ z0 = svand_u16_m (p0, z1, z0), -+ z0 = svand_m (p0, z1, z0)) -+ -+/* -+** and_u16_m_untied: -+** movprfx z0, z1 -+** and z0\.h, p0/m, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (and_u16_m_untied, svuint16_t, -+ z0 = svand_u16_m (p0, z1, z2), -+ z0 = svand_m (p0, z1, z2)) -+ -+/* -+** and_w0_u16_m_tied1: -+** mov (z[0-9]+\.h), w0 -+** and z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (and_w0_u16_m_tied1, svuint16_t, uint16_t, -+ z0 = svand_n_u16_m (p0, z0, x0), -+ z0 = svand_m (p0, z0, x0)) -+ -+/* -+** and_w0_u16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), w0 -+** movprfx z0, z1 -+** and z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (and_w0_u16_m_untied, svuint16_t, uint16_t, -+ z0 = svand_n_u16_m (p0, z1, x0), -+ z0 = svand_m (p0, z1, x0)) -+ -+/* -+** and_1_u16_m_tied1: -+** mov (z[0-9]+\.h), #1 -+** and z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_1_u16_m_tied1, svuint16_t, -+ z0 = svand_n_u16_m (p0, z0, 1), -+ z0 = svand_m (p0, z0, 1)) -+ -+/* -+** and_1_u16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), #1 -+** movprfx z0, z1 -+** and z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_1_u16_m_untied, svuint16_t, -+ z0 = svand_n_u16_m (p0, z1, 1), -+ z0 = svand_m (p0, z1, 1)) -+ -+/* -+** and_m2_u16_m: -+** mov (z[0-9]+\.h), #-2 -+** and z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_m2_u16_m, svuint16_t, -+ z0 = svand_n_u16_m (p0, z0, -2), -+ z0 = svand_m (p0, z0, -2)) -+ -+/* -+** and_255_u16_m_tied1: -+** uxtb z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (and_255_u16_m_tied1, svuint16_t, -+ z0 = svand_n_u16_m (p0, z0, 255), -+ z0 = svand_m (p0, z0, 255)) -+ -+/* -+** and_255_u16_m_untied: -+** movprfx z0, z1 -+** uxtb z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (and_255_u16_m_untied, svuint16_t, -+ z0 = svand_n_u16_m (p0, z1, 255), -+ z0 = svand_m (p0, z1, 255)) -+ -+/* -+** and_u16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** and z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (and_u16_z_tied1, svuint16_t, -+ z0 = svand_u16_z (p0, z0, z1), -+ z0 = svand_z (p0, z0, z1)) -+ -+/* -+** and_u16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** and z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (and_u16_z_tied2, svuint16_t, -+ z0 = svand_u16_z (p0, z1, z0), -+ z0 = svand_z (p0, z1, z0)) -+ -+/* -+** and_u16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** and z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** and z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (and_u16_z_untied, svuint16_t, -+ z0 = svand_u16_z (p0, z1, z2), -+ z0 = svand_z (p0, z1, z2)) -+ -+/* -+** and_w0_u16_z_tied1: -+** mov (z[0-9]+\.h), w0 -+** movprfx z0\.h, p0/z, z0\.h -+** and z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (and_w0_u16_z_tied1, svuint16_t, uint16_t, -+ z0 = svand_n_u16_z (p0, z0, x0), -+ z0 = svand_z (p0, z0, x0)) -+ -+/* -+** and_w0_u16_z_untied: -+** mov (z[0-9]+\.h), w0 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** and z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** and z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (and_w0_u16_z_untied, svuint16_t, uint16_t, -+ z0 = svand_n_u16_z (p0, z1, x0), -+ z0 = svand_z (p0, z1, x0)) -+ -+/* -+** and_1_u16_z_tied1: -+** mov (z[0-9]+\.h), #1 -+** movprfx z0\.h, p0/z, z0\.h -+** and z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_1_u16_z_tied1, svuint16_t, -+ z0 = svand_n_u16_z (p0, z0, 1), -+ z0 = svand_z (p0, z0, 1)) -+ -+/* -+** and_1_u16_z_untied: -+** mov (z[0-9]+\.h), #1 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** and z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** and z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (and_1_u16_z_untied, svuint16_t, -+ z0 = svand_n_u16_z (p0, z1, 1), -+ z0 = svand_z (p0, z1, 1)) -+ -+/* -+** and_255_u16_z_tied1: -+** ( -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.h, p0/z, \1\.h -+** uxtb z0\.h, p0/m, \1\.h -+** | -+** mov (z[0-9]+\.h), #255 -+** movprfx z0\.h, p0/z, z0\.h -+** and z0\.h, p0/m, z0\.h, \1 -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (and_255_u16_z_tied1, svuint16_t, -+ z0 = svand_n_u16_z (p0, z0, 255), -+ z0 = svand_z (p0, z0, 255)) -+ -+/* -+** and_255_u16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** uxtb z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (and_255_u16_z_untied, svuint16_t, -+ z0 = svand_n_u16_z (p0, z1, 255), -+ z0 = svand_z (p0, z1, 255)) -+ -+/* -+** and_u16_x_tied1: -+** and z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (and_u16_x_tied1, svuint16_t, -+ z0 = svand_u16_x (p0, z0, z1), -+ z0 = svand_x (p0, z0, z1)) -+ -+/* -+** and_u16_x_tied2: -+** and z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (and_u16_x_tied2, svuint16_t, -+ z0 = svand_u16_x (p0, z1, z0), -+ z0 = svand_x (p0, z1, z0)) -+ -+/* -+** and_u16_x_untied: -+** and z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (and_u16_x_untied, svuint16_t, -+ z0 = svand_u16_x (p0, z1, z2), -+ z0 = svand_x (p0, z1, z2)) -+ -+/* -+** and_w0_u16_x_tied1: -+** mov (z[0-9]+)\.h, w0 -+** and z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (and_w0_u16_x_tied1, svuint16_t, uint16_t, -+ z0 = svand_n_u16_x (p0, z0, x0), -+ z0 = svand_x (p0, z0, x0)) -+ -+/* -+** and_w0_u16_x_untied: -+** mov (z[0-9]+)\.h, w0 -+** and z0\.d, (z1\.d, \1\.d|\1\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (and_w0_u16_x_untied, svuint16_t, uint16_t, -+ z0 = svand_n_u16_x (p0, z1, x0), -+ z0 = svand_x (p0, z1, x0)) -+ -+/* -+** and_1_u16_x_tied1: -+** and z0\.h, z0\.h, #0x1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_1_u16_x_tied1, svuint16_t, -+ z0 = svand_n_u16_x (p0, z0, 1), -+ z0 = svand_x (p0, z0, 1)) -+ -+/* -+** and_1_u16_x_untied: -+** movprfx z0, z1 -+** and z0\.h, z0\.h, #0x1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_1_u16_x_untied, svuint16_t, -+ z0 = svand_n_u16_x (p0, z1, 1), -+ z0 = svand_x (p0, z1, 1)) -+ -+/* -+** and_127_u16_x: -+** and z0\.h, z0\.h, #0x7f -+** ret -+*/ -+TEST_UNIFORM_Z (and_127_u16_x, svuint16_t, -+ z0 = svand_n_u16_x (p0, z0, 127), -+ z0 = svand_x (p0, z0, 127)) -+ -+/* -+** and_128_u16_x: -+** and z0\.h, z0\.h, #0x80 -+** ret -+*/ -+TEST_UNIFORM_Z (and_128_u16_x, svuint16_t, -+ z0 = svand_n_u16_x (p0, z0, 128), -+ z0 = svand_x (p0, z0, 128)) -+ -+/* -+** and_255_u16_x: -+** and z0\.h, z0\.h, #0xff -+** ret -+*/ -+TEST_UNIFORM_Z (and_255_u16_x, svuint16_t, -+ z0 = svand_n_u16_x (p0, z0, 255), -+ z0 = svand_x (p0, z0, 255)) -+ -+/* -+** and_256_u16_x: -+** and z0\.h, z0\.h, #0x100 -+** ret -+*/ -+TEST_UNIFORM_Z (and_256_u16_x, svuint16_t, -+ z0 = svand_n_u16_x (p0, z0, 256), -+ z0 = svand_x (p0, z0, 256)) -+ -+/* -+** and_257_u16_x: -+** and z0\.h, z0\.h, #0x101 -+** ret -+*/ -+TEST_UNIFORM_Z (and_257_u16_x, svuint16_t, -+ z0 = svand_n_u16_x (p0, z0, 257), -+ z0 = svand_x (p0, z0, 257)) -+ -+/* -+** and_512_u16_x: -+** and z0\.h, z0\.h, #0x200 -+** ret -+*/ -+TEST_UNIFORM_Z (and_512_u16_x, svuint16_t, -+ z0 = svand_n_u16_x (p0, z0, 512), -+ z0 = svand_x (p0, z0, 512)) -+ -+/* -+** and_65280_u16_x: -+** and z0\.h, z0\.h, #0xff00 -+** ret -+*/ -+TEST_UNIFORM_Z (and_65280_u16_x, svuint16_t, -+ z0 = svand_n_u16_x (p0, z0, 0xff00), -+ z0 = svand_x (p0, z0, 0xff00)) -+ -+/* -+** and_m127_u16_x: -+** and z0\.h, z0\.h, #0xff81 -+** ret -+*/ -+TEST_UNIFORM_Z (and_m127_u16_x, svuint16_t, -+ z0 = svand_n_u16_x (p0, z0, -127), -+ z0 = svand_x (p0, z0, -127)) -+ -+/* -+** and_m128_u16_x: -+** and z0\.h, z0\.h, #0xff80 -+** ret -+*/ -+TEST_UNIFORM_Z (and_m128_u16_x, svuint16_t, -+ z0 = svand_n_u16_x (p0, z0, -128), -+ z0 = svand_x (p0, z0, -128)) -+ -+/* -+** and_m255_u16_x: -+** and z0\.h, z0\.h, #0xff01 -+** ret -+*/ -+TEST_UNIFORM_Z (and_m255_u16_x, svuint16_t, -+ z0 = svand_n_u16_x (p0, z0, -255), -+ z0 = svand_x (p0, z0, -255)) -+ -+/* -+** and_m256_u16_x: -+** and z0\.h, z0\.h, #0xff00 -+** ret -+*/ -+TEST_UNIFORM_Z (and_m256_u16_x, svuint16_t, -+ z0 = svand_n_u16_x (p0, z0, -256), -+ z0 = svand_x (p0, z0, -256)) -+ -+/* -+** and_m257_u16_x: -+** and z0\.h, z0\.h, #0xfeff -+** ret -+*/ -+TEST_UNIFORM_Z (and_m257_u16_x, svuint16_t, -+ z0 = svand_n_u16_x (p0, z0, -257), -+ z0 = svand_x (p0, z0, -257)) -+ -+/* -+** and_m512_u16_x: -+** and z0\.h, z0\.h, #0xfe00 -+** ret -+*/ -+TEST_UNIFORM_Z (and_m512_u16_x, svuint16_t, -+ z0 = svand_n_u16_x (p0, z0, -512), -+ z0 = svand_x (p0, z0, -512)) -+ -+/* -+** and_m32768_u16_x: -+** and z0\.h, z0\.h, #0x8000 -+** ret -+*/ -+TEST_UNIFORM_Z (and_m32768_u16_x, svuint16_t, -+ z0 = svand_n_u16_x (p0, z0, -0x8000), -+ z0 = svand_x (p0, z0, -0x8000)) -+ -+/* -+** and_5_u16_x: -+** mov (z[0-9]+)\.h, #5 -+** and z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (and_5_u16_x, svuint16_t, -+ z0 = svand_n_u16_x (p0, z0, 5), -+ z0 = svand_x (p0, z0, 5)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_u32.c -new file mode 100644 -index 000000000..80ff50396 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_u32.c -@@ -0,0 +1,464 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** and_u32_m_tied1: -+** and z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (and_u32_m_tied1, svuint32_t, -+ z0 = svand_u32_m (p0, z0, z1), -+ z0 = svand_m (p0, z0, z1)) -+ -+/* -+** and_u32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** and z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (and_u32_m_tied2, svuint32_t, -+ z0 = svand_u32_m (p0, z1, z0), -+ z0 = svand_m (p0, z1, z0)) -+ -+/* -+** and_u32_m_untied: -+** movprfx z0, z1 -+** and z0\.s, p0/m, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (and_u32_m_untied, svuint32_t, -+ z0 = svand_u32_m (p0, z1, z2), -+ z0 = svand_m (p0, z1, z2)) -+ -+/* -+** and_w0_u32_m_tied1: -+** mov (z[0-9]+\.s), w0 -+** and z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (and_w0_u32_m_tied1, svuint32_t, uint32_t, -+ z0 = svand_n_u32_m (p0, z0, x0), -+ z0 = svand_m (p0, z0, x0)) -+ -+/* -+** and_w0_u32_m_untied: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0, z1 -+** and z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (and_w0_u32_m_untied, svuint32_t, uint32_t, -+ z0 = svand_n_u32_m (p0, z1, x0), -+ z0 = svand_m (p0, z1, x0)) -+ -+/* -+** and_1_u32_m_tied1: -+** mov (z[0-9]+\.s), #1 -+** and z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_1_u32_m_tied1, svuint32_t, -+ z0 = svand_n_u32_m (p0, z0, 1), -+ z0 = svand_m (p0, z0, 1)) -+ -+/* -+** and_1_u32_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.s), #1 -+** movprfx z0, z1 -+** and z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_1_u32_m_untied, svuint32_t, -+ z0 = svand_n_u32_m (p0, z1, 1), -+ z0 = svand_m (p0, z1, 1)) -+ -+/* -+** and_m2_u32_m: -+** mov (z[0-9]+\.s), #-2 -+** and z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_m2_u32_m, svuint32_t, -+ z0 = svand_n_u32_m (p0, z0, -2), -+ z0 = svand_m (p0, z0, -2)) -+ -+/* -+** and_255_u32_m_tied1: -+** uxtb z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (and_255_u32_m_tied1, svuint32_t, -+ z0 = svand_n_u32_m (p0, z0, 255), -+ z0 = svand_m (p0, z0, 255)) -+ -+/* -+** and_255_u32_m_untied: -+** movprfx z0, z1 -+** uxtb z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (and_255_u32_m_untied, svuint32_t, -+ z0 = svand_n_u32_m (p0, z1, 255), -+ z0 = svand_m (p0, z1, 255)) -+ -+/* -+** and_65535_u32_m_tied1: -+** uxth z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (and_65535_u32_m_tied1, svuint32_t, -+ z0 = svand_n_u32_m (p0, z0, 65535), -+ z0 = svand_m (p0, z0, 65535)) -+ -+/* -+** and_65535_u32_m_untied: -+** movprfx z0, z1 -+** uxth z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (and_65535_u32_m_untied, svuint32_t, -+ z0 = svand_n_u32_m (p0, z1, 65535), -+ z0 = svand_m (p0, z1, 65535)) -+ -+/* -+** and_u32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** and z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (and_u32_z_tied1, svuint32_t, -+ z0 = svand_u32_z (p0, z0, z1), -+ z0 = svand_z (p0, z0, z1)) -+ -+/* -+** and_u32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** and z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (and_u32_z_tied2, svuint32_t, -+ z0 = svand_u32_z (p0, z1, z0), -+ z0 = svand_z (p0, z1, z0)) -+ -+/* -+** and_u32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** and z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** and z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (and_u32_z_untied, svuint32_t, -+ z0 = svand_u32_z (p0, z1, z2), -+ z0 = svand_z (p0, z1, z2)) -+ -+/* -+** and_w0_u32_z_tied1: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** and z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (and_w0_u32_z_tied1, svuint32_t, uint32_t, -+ z0 = svand_n_u32_z (p0, z0, x0), -+ z0 = svand_z (p0, z0, x0)) -+ -+/* -+** and_w0_u32_z_untied: -+** mov (z[0-9]+\.s), w0 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** and z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** and z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (and_w0_u32_z_untied, svuint32_t, uint32_t, -+ z0 = svand_n_u32_z (p0, z1, x0), -+ z0 = svand_z (p0, z1, x0)) -+ -+/* -+** and_1_u32_z_tied1: -+** mov (z[0-9]+\.s), #1 -+** movprfx z0\.s, p0/z, z0\.s -+** and z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_1_u32_z_tied1, svuint32_t, -+ z0 = svand_n_u32_z (p0, z0, 1), -+ z0 = svand_z (p0, z0, 1)) -+ -+/* -+** and_1_u32_z_untied: -+** mov (z[0-9]+\.s), #1 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** and z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** and z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (and_1_u32_z_untied, svuint32_t, -+ z0 = svand_n_u32_z (p0, z1, 1), -+ z0 = svand_z (p0, z1, 1)) -+ -+/* -+** and_255_u32_z_tied1: -+** ( -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, \1\.s -+** uxtb z0\.s, p0/m, \1\.s -+** | -+** mov (z[0-9]+\.s), #255 -+** movprfx z0\.s, p0/z, z0\.s -+** and z0\.s, p0/m, z0\.s, \1 -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (and_255_u32_z_tied1, svuint32_t, -+ z0 = svand_n_u32_z (p0, z0, 255), -+ z0 = svand_z (p0, z0, 255)) -+ -+/* -+** and_255_u32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** uxtb z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (and_255_u32_z_untied, svuint32_t, -+ z0 = svand_n_u32_z (p0, z1, 255), -+ z0 = svand_z (p0, z1, 255)) -+ -+/* -+** and_65535_u32_z_tied1: -+** ( -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, \1\.s -+** uxth z0\.s, p0/m, \1\.s -+** | -+** mov (z[0-9]+\.s), #65535 -+** movprfx z0\.s, p0/z, z0\.s -+** and z0\.s, p0/m, z0\.s, \1 -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (and_65535_u32_z_tied1, svuint32_t, -+ z0 = svand_n_u32_z (p0, z0, 65535), -+ z0 = svand_z (p0, z0, 65535)) -+ -+/* -+** and_65535_u32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** uxth z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (and_65535_u32_z_untied, svuint32_t, -+ z0 = svand_n_u32_z (p0, z1, 65535), -+ z0 = svand_z (p0, z1, 65535)) -+ -+/* -+** and_u32_x_tied1: -+** and z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (and_u32_x_tied1, svuint32_t, -+ z0 = svand_u32_x (p0, z0, z1), -+ z0 = svand_x (p0, z0, z1)) -+ -+/* -+** and_u32_x_tied2: -+** and z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (and_u32_x_tied2, svuint32_t, -+ z0 = svand_u32_x (p0, z1, z0), -+ z0 = svand_x (p0, z1, z0)) -+ -+/* -+** and_u32_x_untied: -+** and z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (and_u32_x_untied, svuint32_t, -+ z0 = svand_u32_x (p0, z1, z2), -+ z0 = svand_x (p0, z1, z2)) -+ -+/* -+** and_w0_u32_x_tied1: -+** mov (z[0-9]+)\.s, w0 -+** and z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (and_w0_u32_x_tied1, svuint32_t, uint32_t, -+ z0 = svand_n_u32_x (p0, z0, x0), -+ z0 = svand_x (p0, z0, x0)) -+ -+/* -+** and_w0_u32_x_untied: -+** mov (z[0-9]+)\.s, w0 -+** and z0\.d, (z1\.d, \1\.d|\1\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (and_w0_u32_x_untied, svuint32_t, uint32_t, -+ z0 = svand_n_u32_x (p0, z1, x0), -+ z0 = svand_x (p0, z1, x0)) -+ -+/* -+** and_1_u32_x_tied1: -+** and z0\.s, z0\.s, #0x1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_1_u32_x_tied1, svuint32_t, -+ z0 = svand_n_u32_x (p0, z0, 1), -+ z0 = svand_x (p0, z0, 1)) -+ -+/* -+** and_1_u32_x_untied: -+** movprfx z0, z1 -+** and z0\.s, z0\.s, #0x1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_1_u32_x_untied, svuint32_t, -+ z0 = svand_n_u32_x (p0, z1, 1), -+ z0 = svand_x (p0, z1, 1)) -+ -+/* -+** and_127_u32_x: -+** and z0\.s, z0\.s, #0x7f -+** ret -+*/ -+TEST_UNIFORM_Z (and_127_u32_x, svuint32_t, -+ z0 = svand_n_u32_x (p0, z0, 127), -+ z0 = svand_x (p0, z0, 127)) -+ -+/* -+** and_128_u32_x: -+** and z0\.s, z0\.s, #0x80 -+** ret -+*/ -+TEST_UNIFORM_Z (and_128_u32_x, svuint32_t, -+ z0 = svand_n_u32_x (p0, z0, 128), -+ z0 = svand_x (p0, z0, 128)) -+ -+/* -+** and_255_u32_x: -+** and z0\.s, z0\.s, #0xff -+** ret -+*/ -+TEST_UNIFORM_Z (and_255_u32_x, svuint32_t, -+ z0 = svand_n_u32_x (p0, z0, 255), -+ z0 = svand_x (p0, z0, 255)) -+ -+/* -+** and_256_u32_x: -+** and z0\.s, z0\.s, #0x100 -+** ret -+*/ -+TEST_UNIFORM_Z (and_256_u32_x, svuint32_t, -+ z0 = svand_n_u32_x (p0, z0, 256), -+ z0 = svand_x (p0, z0, 256)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (and_257_u32_x, svuint32_t, -+ z0 = svand_n_u32_x (p0, z0, 257), -+ z0 = svand_x (p0, z0, 257)) -+ -+/* -+** and_512_u32_x: -+** and z0\.s, z0\.s, #0x200 -+** ret -+*/ -+TEST_UNIFORM_Z (and_512_u32_x, svuint32_t, -+ z0 = svand_n_u32_x (p0, z0, 512), -+ z0 = svand_x (p0, z0, 512)) -+ -+/* -+** and_65280_u32_x: -+** and z0\.s, z0\.s, #0xff00 -+** ret -+*/ -+TEST_UNIFORM_Z (and_65280_u32_x, svuint32_t, -+ z0 = svand_n_u32_x (p0, z0, 0xff00), -+ z0 = svand_x (p0, z0, 0xff00)) -+ -+/* -+** and_m127_u32_x: -+** and z0\.s, z0\.s, #0xffffff81 -+** ret -+*/ -+TEST_UNIFORM_Z (and_m127_u32_x, svuint32_t, -+ z0 = svand_n_u32_x (p0, z0, -127), -+ z0 = svand_x (p0, z0, -127)) -+ -+/* -+** and_m128_u32_x: -+** and z0\.s, z0\.s, #0xffffff80 -+** ret -+*/ -+TEST_UNIFORM_Z (and_m128_u32_x, svuint32_t, -+ z0 = svand_n_u32_x (p0, z0, -128), -+ z0 = svand_x (p0, z0, -128)) -+ -+/* -+** and_m255_u32_x: -+** and z0\.s, z0\.s, #0xffffff01 -+** ret -+*/ -+TEST_UNIFORM_Z (and_m255_u32_x, svuint32_t, -+ z0 = svand_n_u32_x (p0, z0, -255), -+ z0 = svand_x (p0, z0, -255)) -+ -+/* -+** and_m256_u32_x: -+** and z0\.s, z0\.s, #0xffffff00 -+** ret -+*/ -+TEST_UNIFORM_Z (and_m256_u32_x, svuint32_t, -+ z0 = svand_n_u32_x (p0, z0, -256), -+ z0 = svand_x (p0, z0, -256)) -+ -+/* -+** and_m257_u32_x: -+** and z0\.s, z0\.s, #0xfffffeff -+** ret -+*/ -+TEST_UNIFORM_Z (and_m257_u32_x, svuint32_t, -+ z0 = svand_n_u32_x (p0, z0, -257), -+ z0 = svand_x (p0, z0, -257)) -+ -+/* -+** and_m512_u32_x: -+** and z0\.s, z0\.s, #0xfffffe00 -+** ret -+*/ -+TEST_UNIFORM_Z (and_m512_u32_x, svuint32_t, -+ z0 = svand_n_u32_x (p0, z0, -512), -+ z0 = svand_x (p0, z0, -512)) -+ -+/* -+** and_m32768_u32_x: -+** and z0\.s, z0\.s, #0xffff8000 -+** ret -+*/ -+TEST_UNIFORM_Z (and_m32768_u32_x, svuint32_t, -+ z0 = svand_n_u32_x (p0, z0, -0x8000), -+ z0 = svand_x (p0, z0, -0x8000)) -+ -+/* -+** and_5_u32_x: -+** mov (z[0-9]+)\.s, #5 -+** and z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (and_5_u32_x, svuint32_t, -+ z0 = svand_n_u32_x (p0, z0, 5), -+ z0 = svand_x (p0, z0, 5)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_u64.c -new file mode 100644 -index 000000000..906b19c37 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_u64.c -@@ -0,0 +1,510 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** and_u64_m_tied1: -+** and z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (and_u64_m_tied1, svuint64_t, -+ z0 = svand_u64_m (p0, z0, z1), -+ z0 = svand_m (p0, z0, z1)) -+ -+/* -+** and_u64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** and z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_u64_m_tied2, svuint64_t, -+ z0 = svand_u64_m (p0, z1, z0), -+ z0 = svand_m (p0, z1, z0)) -+ -+/* -+** and_u64_m_untied: -+** movprfx z0, z1 -+** and z0\.d, p0/m, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (and_u64_m_untied, svuint64_t, -+ z0 = svand_u64_m (p0, z1, z2), -+ z0 = svand_m (p0, z1, z2)) -+ -+/* -+** and_x0_u64_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** and z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (and_x0_u64_m_tied1, svuint64_t, uint64_t, -+ z0 = svand_n_u64_m (p0, z0, x0), -+ z0 = svand_m (p0, z0, x0)) -+ -+/* -+** and_x0_u64_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** and z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (and_x0_u64_m_untied, svuint64_t, uint64_t, -+ z0 = svand_n_u64_m (p0, z1, x0), -+ z0 = svand_m (p0, z1, x0)) -+ -+/* -+** and_1_u64_m_tied1: -+** mov (z[0-9]+\.d), #1 -+** and z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_1_u64_m_tied1, svuint64_t, -+ z0 = svand_n_u64_m (p0, z0, 1), -+ z0 = svand_m (p0, z0, 1)) -+ -+/* -+** and_1_u64_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #1 -+** movprfx z0, z1 -+** and z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_1_u64_m_untied, svuint64_t, -+ z0 = svand_n_u64_m (p0, z1, 1), -+ z0 = svand_m (p0, z1, 1)) -+ -+/* -+** and_m2_u64_m: -+** mov (z[0-9]+\.d), #-2 -+** and z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_m2_u64_m, svuint64_t, -+ z0 = svand_n_u64_m (p0, z0, -2), -+ z0 = svand_m (p0, z0, -2)) -+ -+/* -+** and_255_u64_m_tied1: -+** uxtb z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (and_255_u64_m_tied1, svuint64_t, -+ z0 = svand_n_u64_m (p0, z0, 255), -+ z0 = svand_m (p0, z0, 255)) -+ -+/* -+** and_255_u64_m_untied: -+** movprfx z0, z1 -+** uxtb z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (and_255_u64_m_untied, svuint64_t, -+ z0 = svand_n_u64_m (p0, z1, 255), -+ z0 = svand_m (p0, z1, 255)) -+ -+/* -+** and_65535_u64_m_tied1: -+** uxth z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (and_65535_u64_m_tied1, svuint64_t, -+ z0 = svand_n_u64_m (p0, z0, 65535), -+ z0 = svand_m (p0, z0, 65535)) -+ -+/* -+** and_65535_u64_m_untied: -+** movprfx z0, z1 -+** uxth z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (and_65535_u64_m_untied, svuint64_t, -+ z0 = svand_n_u64_m (p0, z1, 65535), -+ z0 = svand_m (p0, z1, 65535)) -+ -+/* -+** and_0xffffffff_u64_m_tied1: -+** uxtw z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (and_0xffffffff_u64_m_tied1, svuint64_t, -+ z0 = svand_n_u64_m (p0, z0, 0xffffffff), -+ z0 = svand_m (p0, z0, 0xffffffff)) -+ -+/* -+** and_0xffffffff_u64_m_untied: -+** movprfx z0, z1 -+** uxtw z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (and_0xffffffff_u64_m_untied, svuint64_t, -+ z0 = svand_n_u64_m (p0, z1, 0xffffffff), -+ z0 = svand_m (p0, z1, 0xffffffff)) -+ -+/* -+** and_u64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** and z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (and_u64_z_tied1, svuint64_t, -+ z0 = svand_u64_z (p0, z0, z1), -+ z0 = svand_z (p0, z0, z1)) -+ -+/* -+** and_u64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** and z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (and_u64_z_tied2, svuint64_t, -+ z0 = svand_u64_z (p0, z1, z0), -+ z0 = svand_z (p0, z1, z0)) -+ -+/* -+** and_u64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** and z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** and z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (and_u64_z_untied, svuint64_t, -+ z0 = svand_u64_z (p0, z1, z2), -+ z0 = svand_z (p0, z1, z2)) -+ -+/* -+** and_x0_u64_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** and z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (and_x0_u64_z_tied1, svuint64_t, uint64_t, -+ z0 = svand_n_u64_z (p0, z0, x0), -+ z0 = svand_z (p0, z0, x0)) -+ -+/* -+** and_x0_u64_z_untied: -+** mov (z[0-9]+\.d), x0 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** and z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** and z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (and_x0_u64_z_untied, svuint64_t, uint64_t, -+ z0 = svand_n_u64_z (p0, z1, x0), -+ z0 = svand_z (p0, z1, x0)) -+ -+/* -+** and_1_u64_z_tied1: -+** mov (z[0-9]+\.d), #1 -+** movprfx z0\.d, p0/z, z0\.d -+** and z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_1_u64_z_tied1, svuint64_t, -+ z0 = svand_n_u64_z (p0, z0, 1), -+ z0 = svand_z (p0, z0, 1)) -+ -+/* -+** and_1_u64_z_untied: -+** mov (z[0-9]+\.d), #1 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** and z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** and z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (and_1_u64_z_untied, svuint64_t, -+ z0 = svand_n_u64_z (p0, z1, 1), -+ z0 = svand_z (p0, z1, 1)) -+ -+/* -+** and_255_u64_z_tied1: -+** ( -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** uxtb z0\.d, p0/m, \1 -+** | -+** mov (z[0-9]+\.d), #255 -+** movprfx z0\.d, p0/z, z0\.d -+** and z0\.d, p0/m, z0\.d, \1 -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (and_255_u64_z_tied1, svuint64_t, -+ z0 = svand_n_u64_z (p0, z0, 255), -+ z0 = svand_z (p0, z0, 255)) -+ -+/* -+** and_255_u64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** uxtb z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (and_255_u64_z_untied, svuint64_t, -+ z0 = svand_n_u64_z (p0, z1, 255), -+ z0 = svand_z (p0, z1, 255)) -+ -+/* -+** and_65535_u64_z_tied1: -+** ( -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** uxth z0\.d, p0/m, \1 -+** | -+** mov (z[0-9]+\.d), #65535 -+** movprfx z0\.d, p0/z, z0\.d -+** and z0\.d, p0/m, z0\.d, \1 -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (and_65535_u64_z_tied1, svuint64_t, -+ z0 = svand_n_u64_z (p0, z0, 65535), -+ z0 = svand_z (p0, z0, 65535)) -+ -+/* -+** and_65535_u64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** uxth z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (and_65535_u64_z_untied, svuint64_t, -+ z0 = svand_n_u64_z (p0, z1, 65535), -+ z0 = svand_z (p0, z1, 65535)) -+ -+/* -+** and_0xffffffff_u64_z_tied1: -+** ( -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** uxtw z0\.d, p0/m, \1 -+** | -+** mov (z[0-9]+\.d), #4294967295 -+** movprfx z0\.d, p0/z, z0\.d -+** and z0\.d, p0/m, z0\.d, \1 -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (and_0xffffffff_u64_z_tied1, svuint64_t, -+ z0 = svand_n_u64_z (p0, z0, 0xffffffff), -+ z0 = svand_z (p0, z0, 0xffffffff)) -+ -+/* -+** and_0xffffffff_u64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** uxtw z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (and_0xffffffff_u64_z_untied, svuint64_t, -+ z0 = svand_n_u64_z (p0, z1, 0xffffffff), -+ z0 = svand_z (p0, z1, 0xffffffff)) -+ -+/* -+** and_u64_x_tied1: -+** and z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (and_u64_x_tied1, svuint64_t, -+ z0 = svand_u64_x (p0, z0, z1), -+ z0 = svand_x (p0, z0, z1)) -+ -+/* -+** and_u64_x_tied2: -+** and z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (and_u64_x_tied2, svuint64_t, -+ z0 = svand_u64_x (p0, z1, z0), -+ z0 = svand_x (p0, z1, z0)) -+ -+/* -+** and_u64_x_untied: -+** and z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (and_u64_x_untied, svuint64_t, -+ z0 = svand_u64_x (p0, z1, z2), -+ z0 = svand_x (p0, z1, z2)) -+ -+/* -+** and_x0_u64_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** and z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (and_x0_u64_x_tied1, svuint64_t, uint64_t, -+ z0 = svand_n_u64_x (p0, z0, x0), -+ z0 = svand_x (p0, z0, x0)) -+ -+/* -+** and_x0_u64_x_untied: -+** mov (z[0-9]+\.d), x0 -+** and z0\.d, (z1\.d, \1|\1, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (and_x0_u64_x_untied, svuint64_t, uint64_t, -+ z0 = svand_n_u64_x (p0, z1, x0), -+ z0 = svand_x (p0, z1, x0)) -+ -+/* -+** and_1_u64_x_tied1: -+** and z0\.d, z0\.d, #0x1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_1_u64_x_tied1, svuint64_t, -+ z0 = svand_n_u64_x (p0, z0, 1), -+ z0 = svand_x (p0, z0, 1)) -+ -+/* -+** and_1_u64_x_untied: -+** movprfx z0, z1 -+** and z0\.d, z0\.d, #0x1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_1_u64_x_untied, svuint64_t, -+ z0 = svand_n_u64_x (p0, z1, 1), -+ z0 = svand_x (p0, z1, 1)) -+ -+/* -+** and_127_u64_x: -+** and z0\.d, z0\.d, #0x7f -+** ret -+*/ -+TEST_UNIFORM_Z (and_127_u64_x, svuint64_t, -+ z0 = svand_n_u64_x (p0, z0, 127), -+ z0 = svand_x (p0, z0, 127)) -+ -+/* -+** and_128_u64_x: -+** and z0\.d, z0\.d, #0x80 -+** ret -+*/ -+TEST_UNIFORM_Z (and_128_u64_x, svuint64_t, -+ z0 = svand_n_u64_x (p0, z0, 128), -+ z0 = svand_x (p0, z0, 128)) -+ -+/* -+** and_255_u64_x: -+** and z0\.d, z0\.d, #0xff -+** ret -+*/ -+TEST_UNIFORM_Z (and_255_u64_x, svuint64_t, -+ z0 = svand_n_u64_x (p0, z0, 255), -+ z0 = svand_x (p0, z0, 255)) -+ -+/* -+** and_256_u64_x: -+** and z0\.d, z0\.d, #0x100 -+** ret -+*/ -+TEST_UNIFORM_Z (and_256_u64_x, svuint64_t, -+ z0 = svand_n_u64_x (p0, z0, 256), -+ z0 = svand_x (p0, z0, 256)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (and_257_u64_x, svuint64_t, -+ z0 = svand_n_u64_x (p0, z0, 257), -+ z0 = svand_x (p0, z0, 257)) -+ -+/* -+** and_512_u64_x: -+** and z0\.d, z0\.d, #0x200 -+** ret -+*/ -+TEST_UNIFORM_Z (and_512_u64_x, svuint64_t, -+ z0 = svand_n_u64_x (p0, z0, 512), -+ z0 = svand_x (p0, z0, 512)) -+ -+/* -+** and_65280_u64_x: -+** and z0\.d, z0\.d, #0xff00 -+** ret -+*/ -+TEST_UNIFORM_Z (and_65280_u64_x, svuint64_t, -+ z0 = svand_n_u64_x (p0, z0, 0xff00), -+ z0 = svand_x (p0, z0, 0xff00)) -+ -+/* -+** and_m127_u64_x: -+** and z0\.d, z0\.d, #0xffffffffffffff81 -+** ret -+*/ -+TEST_UNIFORM_Z (and_m127_u64_x, svuint64_t, -+ z0 = svand_n_u64_x (p0, z0, -127), -+ z0 = svand_x (p0, z0, -127)) -+ -+/* -+** and_m128_u64_x: -+** and z0\.d, z0\.d, #0xffffffffffffff80 -+** ret -+*/ -+TEST_UNIFORM_Z (and_m128_u64_x, svuint64_t, -+ z0 = svand_n_u64_x (p0, z0, -128), -+ z0 = svand_x (p0, z0, -128)) -+ -+/* -+** and_m255_u64_x: -+** and z0\.d, z0\.d, #0xffffffffffffff01 -+** ret -+*/ -+TEST_UNIFORM_Z (and_m255_u64_x, svuint64_t, -+ z0 = svand_n_u64_x (p0, z0, -255), -+ z0 = svand_x (p0, z0, -255)) -+ -+/* -+** and_m256_u64_x: -+** and z0\.d, z0\.d, #0xffffffffffffff00 -+** ret -+*/ -+TEST_UNIFORM_Z (and_m256_u64_x, svuint64_t, -+ z0 = svand_n_u64_x (p0, z0, -256), -+ z0 = svand_x (p0, z0, -256)) -+ -+/* -+** and_m257_u64_x: -+** and z0\.d, z0\.d, #0xfffffffffffffeff -+** ret -+*/ -+TEST_UNIFORM_Z (and_m257_u64_x, svuint64_t, -+ z0 = svand_n_u64_x (p0, z0, -257), -+ z0 = svand_x (p0, z0, -257)) -+ -+/* -+** and_m512_u64_x: -+** and z0\.d, z0\.d, #0xfffffffffffffe00 -+** ret -+*/ -+TEST_UNIFORM_Z (and_m512_u64_x, svuint64_t, -+ z0 = svand_n_u64_x (p0, z0, -512), -+ z0 = svand_x (p0, z0, -512)) -+ -+/* -+** and_m32768_u64_x: -+** and z0\.d, z0\.d, #0xffffffffffff8000 -+** ret -+*/ -+TEST_UNIFORM_Z (and_m32768_u64_x, svuint64_t, -+ z0 = svand_n_u64_x (p0, z0, -0x8000), -+ z0 = svand_x (p0, z0, -0x8000)) -+ -+/* -+** and_5_u64_x: -+** mov (z[0-9]+\.d), #5 -+** and z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (and_5_u64_x, svuint64_t, -+ z0 = svand_n_u64_x (p0, z0, 5), -+ z0 = svand_x (p0, z0, 5)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_u8.c -new file mode 100644 -index 000000000..b0f1c9529 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/and_u8.c -@@ -0,0 +1,294 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** and_u8_m_tied1: -+** and z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (and_u8_m_tied1, svuint8_t, -+ z0 = svand_u8_m (p0, z0, z1), -+ z0 = svand_m (p0, z0, z1)) -+ -+/* -+** and_u8_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** and z0\.b, p0/m, z0\.b, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (and_u8_m_tied2, svuint8_t, -+ z0 = svand_u8_m (p0, z1, z0), -+ z0 = svand_m (p0, z1, z0)) -+ -+/* -+** and_u8_m_untied: -+** movprfx z0, z1 -+** and z0\.b, p0/m, z0\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (and_u8_m_untied, svuint8_t, -+ z0 = svand_u8_m (p0, z1, z2), -+ z0 = svand_m (p0, z1, z2)) -+ -+/* -+** and_w0_u8_m_tied1: -+** mov (z[0-9]+\.b), w0 -+** and z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (and_w0_u8_m_tied1, svuint8_t, uint8_t, -+ z0 = svand_n_u8_m (p0, z0, x0), -+ z0 = svand_m (p0, z0, x0)) -+ -+/* -+** and_w0_u8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), w0 -+** movprfx z0, z1 -+** and z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (and_w0_u8_m_untied, svuint8_t, uint8_t, -+ z0 = svand_n_u8_m (p0, z1, x0), -+ z0 = svand_m (p0, z1, x0)) -+ -+/* -+** and_1_u8_m_tied1: -+** mov (z[0-9]+\.b), #1 -+** and z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_1_u8_m_tied1, svuint8_t, -+ z0 = svand_n_u8_m (p0, z0, 1), -+ z0 = svand_m (p0, z0, 1)) -+ -+/* -+** and_1_u8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), #1 -+** movprfx z0, z1 -+** and z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_1_u8_m_untied, svuint8_t, -+ z0 = svand_n_u8_m (p0, z1, 1), -+ z0 = svand_m (p0, z1, 1)) -+ -+/* -+** and_m2_u8_m: -+** mov (z[0-9]+\.b), #-2 -+** and z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_m2_u8_m, svuint8_t, -+ z0 = svand_n_u8_m (p0, z0, -2), -+ z0 = svand_m (p0, z0, -2)) -+ -+/* -+** and_u8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** and z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (and_u8_z_tied1, svuint8_t, -+ z0 = svand_u8_z (p0, z0, z1), -+ z0 = svand_z (p0, z0, z1)) -+ -+/* -+** and_u8_z_tied2: -+** movprfx z0\.b, p0/z, z0\.b -+** and z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (and_u8_z_tied2, svuint8_t, -+ z0 = svand_u8_z (p0, z1, z0), -+ z0 = svand_z (p0, z1, z0)) -+ -+/* -+** and_u8_z_untied: -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** and z0\.b, p0/m, z0\.b, z2\.b -+** | -+** movprfx z0\.b, p0/z, z2\.b -+** and z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (and_u8_z_untied, svuint8_t, -+ z0 = svand_u8_z (p0, z1, z2), -+ z0 = svand_z (p0, z1, z2)) -+ -+/* -+** and_w0_u8_z_tied1: -+** mov (z[0-9]+\.b), w0 -+** movprfx z0\.b, p0/z, z0\.b -+** and z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (and_w0_u8_z_tied1, svuint8_t, uint8_t, -+ z0 = svand_n_u8_z (p0, z0, x0), -+ z0 = svand_z (p0, z0, x0)) -+ -+/* -+** and_w0_u8_z_untied: -+** mov (z[0-9]+\.b), w0 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** and z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** and z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (and_w0_u8_z_untied, svuint8_t, uint8_t, -+ z0 = svand_n_u8_z (p0, z1, x0), -+ z0 = svand_z (p0, z1, x0)) -+ -+/* -+** and_1_u8_z_tied1: -+** mov (z[0-9]+\.b), #1 -+** movprfx z0\.b, p0/z, z0\.b -+** and z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_1_u8_z_tied1, svuint8_t, -+ z0 = svand_n_u8_z (p0, z0, 1), -+ z0 = svand_z (p0, z0, 1)) -+ -+/* -+** and_1_u8_z_untied: -+** mov (z[0-9]+\.b), #1 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** and z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** and z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (and_1_u8_z_untied, svuint8_t, -+ z0 = svand_n_u8_z (p0, z1, 1), -+ z0 = svand_z (p0, z1, 1)) -+ -+/* -+** and_u8_x_tied1: -+** and z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (and_u8_x_tied1, svuint8_t, -+ z0 = svand_u8_x (p0, z0, z1), -+ z0 = svand_x (p0, z0, z1)) -+ -+/* -+** and_u8_x_tied2: -+** and z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (and_u8_x_tied2, svuint8_t, -+ z0 = svand_u8_x (p0, z1, z0), -+ z0 = svand_x (p0, z1, z0)) -+ -+/* -+** and_u8_x_untied: -+** and z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (and_u8_x_untied, svuint8_t, -+ z0 = svand_u8_x (p0, z1, z2), -+ z0 = svand_x (p0, z1, z2)) -+ -+/* -+** and_w0_u8_x_tied1: -+** mov (z[0-9]+)\.b, w0 -+** and z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (and_w0_u8_x_tied1, svuint8_t, uint8_t, -+ z0 = svand_n_u8_x (p0, z0, x0), -+ z0 = svand_x (p0, z0, x0)) -+ -+/* -+** and_w0_u8_x_untied: -+** mov (z[0-9]+)\.b, w0 -+** and z0\.d, (z1\.d, \1\.d|\1\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (and_w0_u8_x_untied, svuint8_t, uint8_t, -+ z0 = svand_n_u8_x (p0, z1, x0), -+ z0 = svand_x (p0, z1, x0)) -+ -+/* -+** and_1_u8_x_tied1: -+** and z0\.b, z0\.b, #0x1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_1_u8_x_tied1, svuint8_t, -+ z0 = svand_n_u8_x (p0, z0, 1), -+ z0 = svand_x (p0, z0, 1)) -+ -+/* -+** and_1_u8_x_untied: -+** movprfx z0, z1 -+** and z0\.b, z0\.b, #0x1 -+** ret -+*/ -+TEST_UNIFORM_Z (and_1_u8_x_untied, svuint8_t, -+ z0 = svand_n_u8_x (p0, z1, 1), -+ z0 = svand_x (p0, z1, 1)) -+ -+/* -+** and_127_u8_x: -+** and z0\.b, z0\.b, #0x7f -+** ret -+*/ -+TEST_UNIFORM_Z (and_127_u8_x, svuint8_t, -+ z0 = svand_n_u8_x (p0, z0, 127), -+ z0 = svand_x (p0, z0, 127)) -+ -+/* -+** and_128_u8_x: -+** and z0\.b, z0\.b, #0x80 -+** ret -+*/ -+TEST_UNIFORM_Z (and_128_u8_x, svuint8_t, -+ z0 = svand_n_u8_x (p0, z0, 128), -+ z0 = svand_x (p0, z0, 128)) -+ -+/* -+** and_255_u8_x: -+** ret -+*/ -+TEST_UNIFORM_Z (and_255_u8_x, svuint8_t, -+ z0 = svand_n_u8_x (p0, z0, 255), -+ z0 = svand_x (p0, z0, 255)) -+ -+/* -+** and_m127_u8_x: -+** and z0\.b, z0\.b, #0x81 -+** ret -+*/ -+TEST_UNIFORM_Z (and_m127_u8_x, svuint8_t, -+ z0 = svand_n_u8_x (p0, z0, -127), -+ z0 = svand_x (p0, z0, -127)) -+ -+/* -+** and_m128_u8_x: -+** and z0\.b, z0\.b, #0x80 -+** ret -+*/ -+TEST_UNIFORM_Z (and_m128_u8_x, svuint8_t, -+ z0 = svand_n_u8_x (p0, z0, -128), -+ z0 = svand_x (p0, z0, -128)) -+ -+/* -+** and_5_u8_x: -+** mov (z[0-9]+)\.b, #5 -+** and z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (and_5_u8_x, svuint8_t, -+ z0 = svand_n_u8_x (p0, z0, 5), -+ z0 = svand_x (p0, z0, 5)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/andv_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/andv_s16.c -new file mode 100644 -index 000000000..16761b823 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/andv_s16.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** andv_x0_s16: -+** andv h([0-9]+), p0, z0\.h -+** umov w0, v\1\.h\[0\] -+** ret -+*/ -+TEST_REDUCTION_X (andv_x0_s16, int16_t, svint16_t, -+ x0 = svandv_s16 (p0, z0), -+ x0 = svandv (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/andv_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/andv_s32.c -new file mode 100644 -index 000000000..bccc91e21 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/andv_s32.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** andv_x0_s32: -+** andv (s[0-9]+), p0, z0\.s -+** fmov w0, \1 -+** ret -+*/ -+TEST_REDUCTION_X (andv_x0_s32, int32_t, svint32_t, -+ x0 = svandv_s32 (p0, z0), -+ x0 = svandv (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/andv_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/andv_s64.c -new file mode 100644 -index 000000000..53488b6e3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/andv_s64.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** andv_x0_s64: -+** andv (d[0-9]+), p0, z0\.d -+** fmov x0, \1 -+** ret -+*/ -+TEST_REDUCTION_X (andv_x0_s64, int64_t, svint64_t, -+ x0 = svandv_s64 (p0, z0), -+ x0 = svandv (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/andv_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/andv_s8.c -new file mode 100644 -index 000000000..052f74c7f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/andv_s8.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** andv_x0_s8: -+** andv b([0-9]+), p0, z0\.b -+** umov w0, v\1\.b\[0\] -+** ret -+*/ -+TEST_REDUCTION_X (andv_x0_s8, int8_t, svint8_t, -+ x0 = svandv_s8 (p0, z0), -+ x0 = svandv (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/andv_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/andv_u16.c -new file mode 100644 -index 000000000..03328022d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/andv_u16.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** andv_x0_u16: -+** andv h([0-9]+), p0, z0\.h -+** umov w0, v\1\.h\[0\] -+** ret -+*/ -+TEST_REDUCTION_X (andv_x0_u16, uint16_t, svuint16_t, -+ x0 = svandv_u16 (p0, z0), -+ x0 = svandv (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/andv_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/andv_u32.c -new file mode 100644 -index 000000000..a1677e703 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/andv_u32.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** andv_x0_u32: -+** andv (s[0-9]+), p0, z0\.s -+** fmov w0, \1 -+** ret -+*/ -+TEST_REDUCTION_X (andv_x0_u32, uint32_t, svuint32_t, -+ x0 = svandv_u32 (p0, z0), -+ x0 = svandv (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/andv_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/andv_u64.c -new file mode 100644 -index 000000000..d45422693 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/andv_u64.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** andv_x0_u64: -+** andv (d[0-9]+), p0, z0\.d -+** fmov x0, \1 -+** ret -+*/ -+TEST_REDUCTION_X (andv_x0_u64, uint64_t, svuint64_t, -+ x0 = svandv_u64 (p0, z0), -+ x0 = svandv (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/andv_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/andv_u8.c -new file mode 100644 -index 000000000..b07f6b6e6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/andv_u8.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** andv_x0_u8: -+** andv b([0-9]+), p0, z0\.b -+** umov w0, v\1\.b\[0\] -+** ret -+*/ -+TEST_REDUCTION_X (andv_x0_u8, uint8_t, svuint8_t, -+ x0 = svandv_u8 (p0, z0), -+ x0 = svandv (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asr_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asr_s16.c -new file mode 100644 -index 000000000..877bf1068 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asr_s16.c -@@ -0,0 +1,340 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** asr_s16_m_tied1: -+** asr z0\.h, p0/m, z0\.h, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (asr_s16_m_tied1, svint16_t, svuint16_t, -+ z0 = svasr_s16_m (p0, z0, z4), -+ z0 = svasr_m (p0, z0, z4)) -+ -+/* -+** asr_s16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** asr z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (asr_s16_m_tied2, svint16_t, svuint16_t, -+ z0_res = svasr_s16_m (p0, z4, z0), -+ z0_res = svasr_m (p0, z4, z0)) -+ -+/* -+** asr_s16_m_untied: -+** movprfx z0, z1 -+** asr z0\.h, p0/m, z0\.h, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (asr_s16_m_untied, svint16_t, svuint16_t, -+ z0 = svasr_s16_m (p0, z1, z4), -+ z0 = svasr_m (p0, z1, z4)) -+ -+/* -+** asr_w0_s16_m_tied1: -+** mov (z[0-9]+\.h), w0 -+** asr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (asr_w0_s16_m_tied1, svint16_t, uint16_t, -+ z0 = svasr_n_s16_m (p0, z0, x0), -+ z0 = svasr_m (p0, z0, x0)) -+ -+/* -+** asr_w0_s16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), w0 -+** movprfx z0, z1 -+** asr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (asr_w0_s16_m_untied, svint16_t, uint16_t, -+ z0 = svasr_n_s16_m (p0, z1, x0), -+ z0 = svasr_m (p0, z1, x0)) -+ -+/* -+** asr_1_s16_m_tied1: -+** asr z0\.h, p0/m, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_1_s16_m_tied1, svint16_t, -+ z0 = svasr_n_s16_m (p0, z0, 1), -+ z0 = svasr_m (p0, z0, 1)) -+ -+/* -+** asr_1_s16_m_untied: -+** movprfx z0, z1 -+** asr z0\.h, p0/m, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_1_s16_m_untied, svint16_t, -+ z0 = svasr_n_s16_m (p0, z1, 1), -+ z0 = svasr_m (p0, z1, 1)) -+ -+/* -+** asr_15_s16_m_tied1: -+** asr z0\.h, p0/m, z0\.h, #15 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_15_s16_m_tied1, svint16_t, -+ z0 = svasr_n_s16_m (p0, z0, 15), -+ z0 = svasr_m (p0, z0, 15)) -+ -+/* -+** asr_15_s16_m_untied: -+** movprfx z0, z1 -+** asr z0\.h, p0/m, z0\.h, #15 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_15_s16_m_untied, svint16_t, -+ z0 = svasr_n_s16_m (p0, z1, 15), -+ z0 = svasr_m (p0, z1, 15)) -+ -+/* -+** asr_16_s16_m_tied1: -+** asr z0\.h, p0/m, z0\.h, #16 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_16_s16_m_tied1, svint16_t, -+ z0 = svasr_n_s16_m (p0, z0, 16), -+ z0 = svasr_m (p0, z0, 16)) -+ -+/* -+** asr_16_s16_m_untied: -+** movprfx z0, z1 -+** asr z0\.h, p0/m, z0\.h, #16 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_16_s16_m_untied, svint16_t, -+ z0 = svasr_n_s16_m (p0, z1, 16), -+ z0 = svasr_m (p0, z1, 16)) -+ -+/* -+** asr_s16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** asr z0\.h, p0/m, z0\.h, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (asr_s16_z_tied1, svint16_t, svuint16_t, -+ z0 = svasr_s16_z (p0, z0, z4), -+ z0 = svasr_z (p0, z0, z4)) -+ -+/* -+** asr_s16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** asrr z0\.h, p0/m, z0\.h, z4\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (asr_s16_z_tied2, svint16_t, svuint16_t, -+ z0_res = svasr_s16_z (p0, z4, z0), -+ z0_res = svasr_z (p0, z4, z0)) -+ -+/* -+** asr_s16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** asr z0\.h, p0/m, z0\.h, z4\.h -+** | -+** movprfx z0\.h, p0/z, z4\.h -+** asrr z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_DUAL_Z (asr_s16_z_untied, svint16_t, svuint16_t, -+ z0 = svasr_s16_z (p0, z1, z4), -+ z0 = svasr_z (p0, z1, z4)) -+ -+/* -+** asr_w0_s16_z_tied1: -+** mov (z[0-9]+\.h), w0 -+** movprfx z0\.h, p0/z, z0\.h -+** asr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (asr_w0_s16_z_tied1, svint16_t, uint16_t, -+ z0 = svasr_n_s16_z (p0, z0, x0), -+ z0 = svasr_z (p0, z0, x0)) -+ -+/* -+** asr_w0_s16_z_untied: -+** mov (z[0-9]+\.h), w0 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** asr z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** asrr z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (asr_w0_s16_z_untied, svint16_t, uint16_t, -+ z0 = svasr_n_s16_z (p0, z1, x0), -+ z0 = svasr_z (p0, z1, x0)) -+ -+/* -+** asr_1_s16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** asr z0\.h, p0/m, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_1_s16_z_tied1, svint16_t, -+ z0 = svasr_n_s16_z (p0, z0, 1), -+ z0 = svasr_z (p0, z0, 1)) -+ -+/* -+** asr_1_s16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** asr z0\.h, p0/m, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_1_s16_z_untied, svint16_t, -+ z0 = svasr_n_s16_z (p0, z1, 1), -+ z0 = svasr_z (p0, z1, 1)) -+ -+/* -+** asr_15_s16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** asr z0\.h, p0/m, z0\.h, #15 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_15_s16_z_tied1, svint16_t, -+ z0 = svasr_n_s16_z (p0, z0, 15), -+ z0 = svasr_z (p0, z0, 15)) -+ -+/* -+** asr_15_s16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** asr z0\.h, p0/m, z0\.h, #15 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_15_s16_z_untied, svint16_t, -+ z0 = svasr_n_s16_z (p0, z1, 15), -+ z0 = svasr_z (p0, z1, 15)) -+ -+/* -+** asr_16_s16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** asr z0\.h, p0/m, z0\.h, #16 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_16_s16_z_tied1, svint16_t, -+ z0 = svasr_n_s16_z (p0, z0, 16), -+ z0 = svasr_z (p0, z0, 16)) -+ -+/* -+** asr_16_s16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** asr z0\.h, p0/m, z0\.h, #16 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_16_s16_z_untied, svint16_t, -+ z0 = svasr_n_s16_z (p0, z1, 16), -+ z0 = svasr_z (p0, z1, 16)) -+ -+/* -+** asr_s16_x_tied1: -+** asr z0\.h, p0/m, z0\.h, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (asr_s16_x_tied1, svint16_t, svuint16_t, -+ z0 = svasr_s16_x (p0, z0, z4), -+ z0 = svasr_x (p0, z0, z4)) -+ -+/* -+** asr_s16_x_tied2: -+** asrr z0\.h, p0/m, z0\.h, z4\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (asr_s16_x_tied2, svint16_t, svuint16_t, -+ z0_res = svasr_s16_x (p0, z4, z0), -+ z0_res = svasr_x (p0, z4, z0)) -+ -+/* -+** asr_s16_x_untied: -+** ( -+** movprfx z0, z1 -+** asr z0\.h, p0/m, z0\.h, z4\.h -+** | -+** movprfx z0, z4 -+** asrr z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_DUAL_Z (asr_s16_x_untied, svint16_t, svuint16_t, -+ z0 = svasr_s16_x (p0, z1, z4), -+ z0 = svasr_x (p0, z1, z4)) -+ -+/* -+** asr_w0_s16_x_tied1: -+** mov (z[0-9]+\.h), w0 -+** asr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (asr_w0_s16_x_tied1, svint16_t, uint16_t, -+ z0 = svasr_n_s16_x (p0, z0, x0), -+ z0 = svasr_x (p0, z0, x0)) -+ -+/* -+** asr_w0_s16_x_untied: -+** mov z0\.h, w0 -+** asrr z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZX (asr_w0_s16_x_untied, svint16_t, uint16_t, -+ z0 = svasr_n_s16_x (p0, z1, x0), -+ z0 = svasr_x (p0, z1, x0)) -+ -+/* -+** asr_1_s16_x_tied1: -+** asr z0\.h, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_1_s16_x_tied1, svint16_t, -+ z0 = svasr_n_s16_x (p0, z0, 1), -+ z0 = svasr_x (p0, z0, 1)) -+ -+/* -+** asr_1_s16_x_untied: -+** asr z0\.h, z1\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_1_s16_x_untied, svint16_t, -+ z0 = svasr_n_s16_x (p0, z1, 1), -+ z0 = svasr_x (p0, z1, 1)) -+ -+/* -+** asr_15_s16_x_tied1: -+** asr z0\.h, z0\.h, #15 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_15_s16_x_tied1, svint16_t, -+ z0 = svasr_n_s16_x (p0, z0, 15), -+ z0 = svasr_x (p0, z0, 15)) -+ -+/* -+** asr_15_s16_x_untied: -+** asr z0\.h, z1\.h, #15 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_15_s16_x_untied, svint16_t, -+ z0 = svasr_n_s16_x (p0, z1, 15), -+ z0 = svasr_x (p0, z1, 15)) -+ -+/* -+** asr_16_s16_x_tied1: -+** asr z0\.h, z0\.h, #16 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_16_s16_x_tied1, svint16_t, -+ z0 = svasr_n_s16_x (p0, z0, 16), -+ z0 = svasr_x (p0, z0, 16)) -+ -+/* -+** asr_16_s16_x_untied: -+** asr z0\.h, z1\.h, #16 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_16_s16_x_untied, svint16_t, -+ z0 = svasr_n_s16_x (p0, z1, 16), -+ z0 = svasr_x (p0, z1, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asr_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asr_s32.c -new file mode 100644 -index 000000000..0f5a37372 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asr_s32.c -@@ -0,0 +1,340 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** asr_s32_m_tied1: -+** asr z0\.s, p0/m, z0\.s, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (asr_s32_m_tied1, svint32_t, svuint32_t, -+ z0 = svasr_s32_m (p0, z0, z4), -+ z0 = svasr_m (p0, z0, z4)) -+ -+/* -+** asr_s32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** asr z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (asr_s32_m_tied2, svint32_t, svuint32_t, -+ z0_res = svasr_s32_m (p0, z4, z0), -+ z0_res = svasr_m (p0, z4, z0)) -+ -+/* -+** asr_s32_m_untied: -+** movprfx z0, z1 -+** asr z0\.s, p0/m, z0\.s, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (asr_s32_m_untied, svint32_t, svuint32_t, -+ z0 = svasr_s32_m (p0, z1, z4), -+ z0 = svasr_m (p0, z1, z4)) -+ -+/* -+** asr_w0_s32_m_tied1: -+** mov (z[0-9]+\.s), w0 -+** asr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (asr_w0_s32_m_tied1, svint32_t, uint32_t, -+ z0 = svasr_n_s32_m (p0, z0, x0), -+ z0 = svasr_m (p0, z0, x0)) -+ -+/* -+** asr_w0_s32_m_untied: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0, z1 -+** asr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (asr_w0_s32_m_untied, svint32_t, uint32_t, -+ z0 = svasr_n_s32_m (p0, z1, x0), -+ z0 = svasr_m (p0, z1, x0)) -+ -+/* -+** asr_1_s32_m_tied1: -+** asr z0\.s, p0/m, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_1_s32_m_tied1, svint32_t, -+ z0 = svasr_n_s32_m (p0, z0, 1), -+ z0 = svasr_m (p0, z0, 1)) -+ -+/* -+** asr_1_s32_m_untied: -+** movprfx z0, z1 -+** asr z0\.s, p0/m, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_1_s32_m_untied, svint32_t, -+ z0 = svasr_n_s32_m (p0, z1, 1), -+ z0 = svasr_m (p0, z1, 1)) -+ -+/* -+** asr_31_s32_m_tied1: -+** asr z0\.s, p0/m, z0\.s, #31 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_31_s32_m_tied1, svint32_t, -+ z0 = svasr_n_s32_m (p0, z0, 31), -+ z0 = svasr_m (p0, z0, 31)) -+ -+/* -+** asr_31_s32_m_untied: -+** movprfx z0, z1 -+** asr z0\.s, p0/m, z0\.s, #31 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_31_s32_m_untied, svint32_t, -+ z0 = svasr_n_s32_m (p0, z1, 31), -+ z0 = svasr_m (p0, z1, 31)) -+ -+/* -+** asr_32_s32_m_tied1: -+** asr z0\.s, p0/m, z0\.s, #32 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_32_s32_m_tied1, svint32_t, -+ z0 = svasr_n_s32_m (p0, z0, 32), -+ z0 = svasr_m (p0, z0, 32)) -+ -+/* -+** asr_32_s32_m_untied: -+** movprfx z0, z1 -+** asr z0\.s, p0/m, z0\.s, #32 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_32_s32_m_untied, svint32_t, -+ z0 = svasr_n_s32_m (p0, z1, 32), -+ z0 = svasr_m (p0, z1, 32)) -+ -+/* -+** asr_s32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** asr z0\.s, p0/m, z0\.s, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (asr_s32_z_tied1, svint32_t, svuint32_t, -+ z0 = svasr_s32_z (p0, z0, z4), -+ z0 = svasr_z (p0, z0, z4)) -+ -+/* -+** asr_s32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** asrr z0\.s, p0/m, z0\.s, z4\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (asr_s32_z_tied2, svint32_t, svuint32_t, -+ z0_res = svasr_s32_z (p0, z4, z0), -+ z0_res = svasr_z (p0, z4, z0)) -+ -+/* -+** asr_s32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** asr z0\.s, p0/m, z0\.s, z4\.s -+** | -+** movprfx z0\.s, p0/z, z4\.s -+** asrr z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_DUAL_Z (asr_s32_z_untied, svint32_t, svuint32_t, -+ z0 = svasr_s32_z (p0, z1, z4), -+ z0 = svasr_z (p0, z1, z4)) -+ -+/* -+** asr_w0_s32_z_tied1: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** asr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (asr_w0_s32_z_tied1, svint32_t, uint32_t, -+ z0 = svasr_n_s32_z (p0, z0, x0), -+ z0 = svasr_z (p0, z0, x0)) -+ -+/* -+** asr_w0_s32_z_untied: -+** mov (z[0-9]+\.s), w0 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** asr z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** asrr z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (asr_w0_s32_z_untied, svint32_t, uint32_t, -+ z0 = svasr_n_s32_z (p0, z1, x0), -+ z0 = svasr_z (p0, z1, x0)) -+ -+/* -+** asr_1_s32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** asr z0\.s, p0/m, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_1_s32_z_tied1, svint32_t, -+ z0 = svasr_n_s32_z (p0, z0, 1), -+ z0 = svasr_z (p0, z0, 1)) -+ -+/* -+** asr_1_s32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** asr z0\.s, p0/m, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_1_s32_z_untied, svint32_t, -+ z0 = svasr_n_s32_z (p0, z1, 1), -+ z0 = svasr_z (p0, z1, 1)) -+ -+/* -+** asr_31_s32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** asr z0\.s, p0/m, z0\.s, #31 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_31_s32_z_tied1, svint32_t, -+ z0 = svasr_n_s32_z (p0, z0, 31), -+ z0 = svasr_z (p0, z0, 31)) -+ -+/* -+** asr_31_s32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** asr z0\.s, p0/m, z0\.s, #31 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_31_s32_z_untied, svint32_t, -+ z0 = svasr_n_s32_z (p0, z1, 31), -+ z0 = svasr_z (p0, z1, 31)) -+ -+/* -+** asr_32_s32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** asr z0\.s, p0/m, z0\.s, #32 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_32_s32_z_tied1, svint32_t, -+ z0 = svasr_n_s32_z (p0, z0, 32), -+ z0 = svasr_z (p0, z0, 32)) -+ -+/* -+** asr_32_s32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** asr z0\.s, p0/m, z0\.s, #32 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_32_s32_z_untied, svint32_t, -+ z0 = svasr_n_s32_z (p0, z1, 32), -+ z0 = svasr_z (p0, z1, 32)) -+ -+/* -+** asr_s32_x_tied1: -+** asr z0\.s, p0/m, z0\.s, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (asr_s32_x_tied1, svint32_t, svuint32_t, -+ z0 = svasr_s32_x (p0, z0, z4), -+ z0 = svasr_x (p0, z0, z4)) -+ -+/* -+** asr_s32_x_tied2: -+** asrr z0\.s, p0/m, z0\.s, z4\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (asr_s32_x_tied2, svint32_t, svuint32_t, -+ z0_res = svasr_s32_x (p0, z4, z0), -+ z0_res = svasr_x (p0, z4, z0)) -+ -+/* -+** asr_s32_x_untied: -+** ( -+** movprfx z0, z1 -+** asr z0\.s, p0/m, z0\.s, z4\.s -+** | -+** movprfx z0, z4 -+** asrr z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_DUAL_Z (asr_s32_x_untied, svint32_t, svuint32_t, -+ z0 = svasr_s32_x (p0, z1, z4), -+ z0 = svasr_x (p0, z1, z4)) -+ -+/* -+** asr_w0_s32_x_tied1: -+** mov (z[0-9]+\.s), w0 -+** asr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (asr_w0_s32_x_tied1, svint32_t, uint32_t, -+ z0 = svasr_n_s32_x (p0, z0, x0), -+ z0 = svasr_x (p0, z0, x0)) -+ -+/* -+** asr_w0_s32_x_untied: -+** mov z0\.s, w0 -+** asrr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZX (asr_w0_s32_x_untied, svint32_t, uint32_t, -+ z0 = svasr_n_s32_x (p0, z1, x0), -+ z0 = svasr_x (p0, z1, x0)) -+ -+/* -+** asr_1_s32_x_tied1: -+** asr z0\.s, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_1_s32_x_tied1, svint32_t, -+ z0 = svasr_n_s32_x (p0, z0, 1), -+ z0 = svasr_x (p0, z0, 1)) -+ -+/* -+** asr_1_s32_x_untied: -+** asr z0\.s, z1\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_1_s32_x_untied, svint32_t, -+ z0 = svasr_n_s32_x (p0, z1, 1), -+ z0 = svasr_x (p0, z1, 1)) -+ -+/* -+** asr_31_s32_x_tied1: -+** asr z0\.s, z0\.s, #31 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_31_s32_x_tied1, svint32_t, -+ z0 = svasr_n_s32_x (p0, z0, 31), -+ z0 = svasr_x (p0, z0, 31)) -+ -+/* -+** asr_31_s32_x_untied: -+** asr z0\.s, z1\.s, #31 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_31_s32_x_untied, svint32_t, -+ z0 = svasr_n_s32_x (p0, z1, 31), -+ z0 = svasr_x (p0, z1, 31)) -+ -+/* -+** asr_32_s32_x_tied1: -+** asr z0\.s, z0\.s, #32 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_32_s32_x_tied1, svint32_t, -+ z0 = svasr_n_s32_x (p0, z0, 32), -+ z0 = svasr_x (p0, z0, 32)) -+ -+/* -+** asr_32_s32_x_untied: -+** asr z0\.s, z1\.s, #32 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_32_s32_x_untied, svint32_t, -+ z0 = svasr_n_s32_x (p0, z1, 32), -+ z0 = svasr_x (p0, z1, 32)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asr_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asr_s64.c -new file mode 100644 -index 000000000..80cae07c3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asr_s64.c -@@ -0,0 +1,340 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** asr_s64_m_tied1: -+** asr z0\.d, p0/m, z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (asr_s64_m_tied1, svint64_t, svuint64_t, -+ z0 = svasr_s64_m (p0, z0, z4), -+ z0 = svasr_m (p0, z0, z4)) -+ -+/* -+** asr_s64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z4 -+** asr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (asr_s64_m_tied2, svint64_t, svuint64_t, -+ z0_res = svasr_s64_m (p0, z4, z0), -+ z0_res = svasr_m (p0, z4, z0)) -+ -+/* -+** asr_s64_m_untied: -+** movprfx z0, z1 -+** asr z0\.d, p0/m, z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (asr_s64_m_untied, svint64_t, svuint64_t, -+ z0 = svasr_s64_m (p0, z1, z4), -+ z0 = svasr_m (p0, z1, z4)) -+ -+/* -+** asr_x0_s64_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** asr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (asr_x0_s64_m_tied1, svint64_t, uint64_t, -+ z0 = svasr_n_s64_m (p0, z0, x0), -+ z0 = svasr_m (p0, z0, x0)) -+ -+/* -+** asr_x0_s64_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** asr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (asr_x0_s64_m_untied, svint64_t, uint64_t, -+ z0 = svasr_n_s64_m (p0, z1, x0), -+ z0 = svasr_m (p0, z1, x0)) -+ -+/* -+** asr_1_s64_m_tied1: -+** asr z0\.d, p0/m, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_1_s64_m_tied1, svint64_t, -+ z0 = svasr_n_s64_m (p0, z0, 1), -+ z0 = svasr_m (p0, z0, 1)) -+ -+/* -+** asr_1_s64_m_untied: -+** movprfx z0, z1 -+** asr z0\.d, p0/m, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_1_s64_m_untied, svint64_t, -+ z0 = svasr_n_s64_m (p0, z1, 1), -+ z0 = svasr_m (p0, z1, 1)) -+ -+/* -+** asr_63_s64_m_tied1: -+** asr z0\.d, p0/m, z0\.d, #63 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_63_s64_m_tied1, svint64_t, -+ z0 = svasr_n_s64_m (p0, z0, 63), -+ z0 = svasr_m (p0, z0, 63)) -+ -+/* -+** asr_63_s64_m_untied: -+** movprfx z0, z1 -+** asr z0\.d, p0/m, z0\.d, #63 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_63_s64_m_untied, svint64_t, -+ z0 = svasr_n_s64_m (p0, z1, 63), -+ z0 = svasr_m (p0, z1, 63)) -+ -+/* -+** asr_64_s64_m_tied1: -+** asr z0\.d, p0/m, z0\.d, #64 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_64_s64_m_tied1, svint64_t, -+ z0 = svasr_n_s64_m (p0, z0, 64), -+ z0 = svasr_m (p0, z0, 64)) -+ -+/* -+** asr_64_s64_m_untied: -+** movprfx z0, z1 -+** asr z0\.d, p0/m, z0\.d, #64 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_64_s64_m_untied, svint64_t, -+ z0 = svasr_n_s64_m (p0, z1, 64), -+ z0 = svasr_m (p0, z1, 64)) -+ -+/* -+** asr_s64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** asr z0\.d, p0/m, z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (asr_s64_z_tied1, svint64_t, svuint64_t, -+ z0 = svasr_s64_z (p0, z0, z4), -+ z0 = svasr_z (p0, z0, z4)) -+ -+/* -+** asr_s64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** asrr z0\.d, p0/m, z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z_REV (asr_s64_z_tied2, svint64_t, svuint64_t, -+ z0_res = svasr_s64_z (p0, z4, z0), -+ z0_res = svasr_z (p0, z4, z0)) -+ -+/* -+** asr_s64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** asr z0\.d, p0/m, z0\.d, z4\.d -+** | -+** movprfx z0\.d, p0/z, z4\.d -+** asrr z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_DUAL_Z (asr_s64_z_untied, svint64_t, svuint64_t, -+ z0 = svasr_s64_z (p0, z1, z4), -+ z0 = svasr_z (p0, z1, z4)) -+ -+/* -+** asr_x0_s64_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** asr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (asr_x0_s64_z_tied1, svint64_t, uint64_t, -+ z0 = svasr_n_s64_z (p0, z0, x0), -+ z0 = svasr_z (p0, z0, x0)) -+ -+/* -+** asr_x0_s64_z_untied: -+** mov (z[0-9]+\.d), x0 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** asr z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** asrr z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (asr_x0_s64_z_untied, svint64_t, uint64_t, -+ z0 = svasr_n_s64_z (p0, z1, x0), -+ z0 = svasr_z (p0, z1, x0)) -+ -+/* -+** asr_1_s64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** asr z0\.d, p0/m, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_1_s64_z_tied1, svint64_t, -+ z0 = svasr_n_s64_z (p0, z0, 1), -+ z0 = svasr_z (p0, z0, 1)) -+ -+/* -+** asr_1_s64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** asr z0\.d, p0/m, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_1_s64_z_untied, svint64_t, -+ z0 = svasr_n_s64_z (p0, z1, 1), -+ z0 = svasr_z (p0, z1, 1)) -+ -+/* -+** asr_63_s64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** asr z0\.d, p0/m, z0\.d, #63 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_63_s64_z_tied1, svint64_t, -+ z0 = svasr_n_s64_z (p0, z0, 63), -+ z0 = svasr_z (p0, z0, 63)) -+ -+/* -+** asr_63_s64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** asr z0\.d, p0/m, z0\.d, #63 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_63_s64_z_untied, svint64_t, -+ z0 = svasr_n_s64_z (p0, z1, 63), -+ z0 = svasr_z (p0, z1, 63)) -+ -+/* -+** asr_64_s64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** asr z0\.d, p0/m, z0\.d, #64 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_64_s64_z_tied1, svint64_t, -+ z0 = svasr_n_s64_z (p0, z0, 64), -+ z0 = svasr_z (p0, z0, 64)) -+ -+/* -+** asr_64_s64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** asr z0\.d, p0/m, z0\.d, #64 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_64_s64_z_untied, svint64_t, -+ z0 = svasr_n_s64_z (p0, z1, 64), -+ z0 = svasr_z (p0, z1, 64)) -+ -+/* -+** asr_s64_x_tied1: -+** asr z0\.d, p0/m, z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (asr_s64_x_tied1, svint64_t, svuint64_t, -+ z0 = svasr_s64_x (p0, z0, z4), -+ z0 = svasr_x (p0, z0, z4)) -+ -+/* -+** asr_s64_x_tied2: -+** asrr z0\.d, p0/m, z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z_REV (asr_s64_x_tied2, svint64_t, svuint64_t, -+ z0_res = svasr_s64_x (p0, z4, z0), -+ z0_res = svasr_x (p0, z4, z0)) -+ -+/* -+** asr_s64_x_untied: -+** ( -+** movprfx z0, z1 -+** asr z0\.d, p0/m, z0\.d, z4\.d -+** | -+** movprfx z0, z4 -+** asrr z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_DUAL_Z (asr_s64_x_untied, svint64_t, svuint64_t, -+ z0 = svasr_s64_x (p0, z1, z4), -+ z0 = svasr_x (p0, z1, z4)) -+ -+/* -+** asr_x0_s64_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** asr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (asr_x0_s64_x_tied1, svint64_t, uint64_t, -+ z0 = svasr_n_s64_x (p0, z0, x0), -+ z0 = svasr_x (p0, z0, x0)) -+ -+/* -+** asr_x0_s64_x_untied: -+** mov z0\.d, x0 -+** asrr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (asr_x0_s64_x_untied, svint64_t, uint64_t, -+ z0 = svasr_n_s64_x (p0, z1, x0), -+ z0 = svasr_x (p0, z1, x0)) -+ -+/* -+** asr_1_s64_x_tied1: -+** asr z0\.d, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_1_s64_x_tied1, svint64_t, -+ z0 = svasr_n_s64_x (p0, z0, 1), -+ z0 = svasr_x (p0, z0, 1)) -+ -+/* -+** asr_1_s64_x_untied: -+** asr z0\.d, z1\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_1_s64_x_untied, svint64_t, -+ z0 = svasr_n_s64_x (p0, z1, 1), -+ z0 = svasr_x (p0, z1, 1)) -+ -+/* -+** asr_63_s64_x_tied1: -+** asr z0\.d, z0\.d, #63 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_63_s64_x_tied1, svint64_t, -+ z0 = svasr_n_s64_x (p0, z0, 63), -+ z0 = svasr_x (p0, z0, 63)) -+ -+/* -+** asr_63_s64_x_untied: -+** asr z0\.d, z1\.d, #63 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_63_s64_x_untied, svint64_t, -+ z0 = svasr_n_s64_x (p0, z1, 63), -+ z0 = svasr_x (p0, z1, 63)) -+ -+/* -+** asr_64_s64_x_tied1: -+** asr z0\.d, z0\.d, #64 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_64_s64_x_tied1, svint64_t, -+ z0 = svasr_n_s64_x (p0, z0, 64), -+ z0 = svasr_x (p0, z0, 64)) -+ -+/* -+** asr_64_s64_x_untied: -+** asr z0\.d, z1\.d, #64 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_64_s64_x_untied, svint64_t, -+ z0 = svasr_n_s64_x (p0, z1, 64), -+ z0 = svasr_x (p0, z1, 64)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asr_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asr_s8.c -new file mode 100644 -index 000000000..992e93fde ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asr_s8.c -@@ -0,0 +1,340 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** asr_s8_m_tied1: -+** asr z0\.b, p0/m, z0\.b, z4\.b -+** ret -+*/ -+TEST_DUAL_Z (asr_s8_m_tied1, svint8_t, svuint8_t, -+ z0 = svasr_s8_m (p0, z0, z4), -+ z0 = svasr_m (p0, z0, z4)) -+ -+/* -+** asr_s8_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** asr z0\.b, p0/m, z0\.b, \1\.b -+** ret -+*/ -+TEST_DUAL_Z_REV (asr_s8_m_tied2, svint8_t, svuint8_t, -+ z0_res = svasr_s8_m (p0, z4, z0), -+ z0_res = svasr_m (p0, z4, z0)) -+ -+/* -+** asr_s8_m_untied: -+** movprfx z0, z1 -+** asr z0\.b, p0/m, z0\.b, z4\.b -+** ret -+*/ -+TEST_DUAL_Z (asr_s8_m_untied, svint8_t, svuint8_t, -+ z0 = svasr_s8_m (p0, z1, z4), -+ z0 = svasr_m (p0, z1, z4)) -+ -+/* -+** asr_w0_s8_m_tied1: -+** mov (z[0-9]+\.b), w0 -+** asr z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (asr_w0_s8_m_tied1, svint8_t, uint8_t, -+ z0 = svasr_n_s8_m (p0, z0, x0), -+ z0 = svasr_m (p0, z0, x0)) -+ -+/* -+** asr_w0_s8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), w0 -+** movprfx z0, z1 -+** asr z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (asr_w0_s8_m_untied, svint8_t, uint8_t, -+ z0 = svasr_n_s8_m (p0, z1, x0), -+ z0 = svasr_m (p0, z1, x0)) -+ -+/* -+** asr_1_s8_m_tied1: -+** asr z0\.b, p0/m, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_1_s8_m_tied1, svint8_t, -+ z0 = svasr_n_s8_m (p0, z0, 1), -+ z0 = svasr_m (p0, z0, 1)) -+ -+/* -+** asr_1_s8_m_untied: -+** movprfx z0, z1 -+** asr z0\.b, p0/m, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_1_s8_m_untied, svint8_t, -+ z0 = svasr_n_s8_m (p0, z1, 1), -+ z0 = svasr_m (p0, z1, 1)) -+ -+/* -+** asr_7_s8_m_tied1: -+** asr z0\.b, p0/m, z0\.b, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_7_s8_m_tied1, svint8_t, -+ z0 = svasr_n_s8_m (p0, z0, 7), -+ z0 = svasr_m (p0, z0, 7)) -+ -+/* -+** asr_7_s8_m_untied: -+** movprfx z0, z1 -+** asr z0\.b, p0/m, z0\.b, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_7_s8_m_untied, svint8_t, -+ z0 = svasr_n_s8_m (p0, z1, 7), -+ z0 = svasr_m (p0, z1, 7)) -+ -+/* -+** asr_8_s8_m_tied1: -+** asr z0\.b, p0/m, z0\.b, #8 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_8_s8_m_tied1, svint8_t, -+ z0 = svasr_n_s8_m (p0, z0, 8), -+ z0 = svasr_m (p0, z0, 8)) -+ -+/* -+** asr_8_s8_m_untied: -+** movprfx z0, z1 -+** asr z0\.b, p0/m, z0\.b, #8 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_8_s8_m_untied, svint8_t, -+ z0 = svasr_n_s8_m (p0, z1, 8), -+ z0 = svasr_m (p0, z1, 8)) -+ -+/* -+** asr_s8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** asr z0\.b, p0/m, z0\.b, z4\.b -+** ret -+*/ -+TEST_DUAL_Z (asr_s8_z_tied1, svint8_t, svuint8_t, -+ z0 = svasr_s8_z (p0, z0, z4), -+ z0 = svasr_z (p0, z0, z4)) -+ -+/* -+** asr_s8_z_tied2: -+** movprfx z0\.b, p0/z, z0\.b -+** asrr z0\.b, p0/m, z0\.b, z4\.b -+** ret -+*/ -+TEST_DUAL_Z_REV (asr_s8_z_tied2, svint8_t, svuint8_t, -+ z0_res = svasr_s8_z (p0, z4, z0), -+ z0_res = svasr_z (p0, z4, z0)) -+ -+/* -+** asr_s8_z_untied: -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** asr z0\.b, p0/m, z0\.b, z4\.b -+** | -+** movprfx z0\.b, p0/z, z4\.b -+** asrr z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_DUAL_Z (asr_s8_z_untied, svint8_t, svuint8_t, -+ z0 = svasr_s8_z (p0, z1, z4), -+ z0 = svasr_z (p0, z1, z4)) -+ -+/* -+** asr_w0_s8_z_tied1: -+** mov (z[0-9]+\.b), w0 -+** movprfx z0\.b, p0/z, z0\.b -+** asr z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (asr_w0_s8_z_tied1, svint8_t, uint8_t, -+ z0 = svasr_n_s8_z (p0, z0, x0), -+ z0 = svasr_z (p0, z0, x0)) -+ -+/* -+** asr_w0_s8_z_untied: -+** mov (z[0-9]+\.b), w0 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** asr z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** asrr z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (asr_w0_s8_z_untied, svint8_t, uint8_t, -+ z0 = svasr_n_s8_z (p0, z1, x0), -+ z0 = svasr_z (p0, z1, x0)) -+ -+/* -+** asr_1_s8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** asr z0\.b, p0/m, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_1_s8_z_tied1, svint8_t, -+ z0 = svasr_n_s8_z (p0, z0, 1), -+ z0 = svasr_z (p0, z0, 1)) -+ -+/* -+** asr_1_s8_z_untied: -+** movprfx z0\.b, p0/z, z1\.b -+** asr z0\.b, p0/m, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_1_s8_z_untied, svint8_t, -+ z0 = svasr_n_s8_z (p0, z1, 1), -+ z0 = svasr_z (p0, z1, 1)) -+ -+/* -+** asr_7_s8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** asr z0\.b, p0/m, z0\.b, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_7_s8_z_tied1, svint8_t, -+ z0 = svasr_n_s8_z (p0, z0, 7), -+ z0 = svasr_z (p0, z0, 7)) -+ -+/* -+** asr_7_s8_z_untied: -+** movprfx z0\.b, p0/z, z1\.b -+** asr z0\.b, p0/m, z0\.b, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_7_s8_z_untied, svint8_t, -+ z0 = svasr_n_s8_z (p0, z1, 7), -+ z0 = svasr_z (p0, z1, 7)) -+ -+/* -+** asr_8_s8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** asr z0\.b, p0/m, z0\.b, #8 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_8_s8_z_tied1, svint8_t, -+ z0 = svasr_n_s8_z (p0, z0, 8), -+ z0 = svasr_z (p0, z0, 8)) -+ -+/* -+** asr_8_s8_z_untied: -+** movprfx z0\.b, p0/z, z1\.b -+** asr z0\.b, p0/m, z0\.b, #8 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_8_s8_z_untied, svint8_t, -+ z0 = svasr_n_s8_z (p0, z1, 8), -+ z0 = svasr_z (p0, z1, 8)) -+ -+/* -+** asr_s8_x_tied1: -+** asr z0\.b, p0/m, z0\.b, z4\.b -+** ret -+*/ -+TEST_DUAL_Z (asr_s8_x_tied1, svint8_t, svuint8_t, -+ z0 = svasr_s8_x (p0, z0, z4), -+ z0 = svasr_x (p0, z0, z4)) -+ -+/* -+** asr_s8_x_tied2: -+** asrr z0\.b, p0/m, z0\.b, z4\.b -+** ret -+*/ -+TEST_DUAL_Z_REV (asr_s8_x_tied2, svint8_t, svuint8_t, -+ z0_res = svasr_s8_x (p0, z4, z0), -+ z0_res = svasr_x (p0, z4, z0)) -+ -+/* -+** asr_s8_x_untied: -+** ( -+** movprfx z0, z1 -+** asr z0\.b, p0/m, z0\.b, z4\.b -+** | -+** movprfx z0, z4 -+** asrr z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_DUAL_Z (asr_s8_x_untied, svint8_t, svuint8_t, -+ z0 = svasr_s8_x (p0, z1, z4), -+ z0 = svasr_x (p0, z1, z4)) -+ -+/* -+** asr_w0_s8_x_tied1: -+** mov (z[0-9]+\.b), w0 -+** asr z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (asr_w0_s8_x_tied1, svint8_t, uint8_t, -+ z0 = svasr_n_s8_x (p0, z0, x0), -+ z0 = svasr_x (p0, z0, x0)) -+ -+/* -+** asr_w0_s8_x_untied: -+** mov z0\.b, w0 -+** asrr z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_ZX (asr_w0_s8_x_untied, svint8_t, uint8_t, -+ z0 = svasr_n_s8_x (p0, z1, x0), -+ z0 = svasr_x (p0, z1, x0)) -+ -+/* -+** asr_1_s8_x_tied1: -+** asr z0\.b, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_1_s8_x_tied1, svint8_t, -+ z0 = svasr_n_s8_x (p0, z0, 1), -+ z0 = svasr_x (p0, z0, 1)) -+ -+/* -+** asr_1_s8_x_untied: -+** asr z0\.b, z1\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_1_s8_x_untied, svint8_t, -+ z0 = svasr_n_s8_x (p0, z1, 1), -+ z0 = svasr_x (p0, z1, 1)) -+ -+/* -+** asr_7_s8_x_tied1: -+** asr z0\.b, z0\.b, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_7_s8_x_tied1, svint8_t, -+ z0 = svasr_n_s8_x (p0, z0, 7), -+ z0 = svasr_x (p0, z0, 7)) -+ -+/* -+** asr_7_s8_x_untied: -+** asr z0\.b, z1\.b, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_7_s8_x_untied, svint8_t, -+ z0 = svasr_n_s8_x (p0, z1, 7), -+ z0 = svasr_x (p0, z1, 7)) -+ -+/* -+** asr_8_s8_x_tied1: -+** asr z0\.b, z0\.b, #8 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_8_s8_x_tied1, svint8_t, -+ z0 = svasr_n_s8_x (p0, z0, 8), -+ z0 = svasr_x (p0, z0, 8)) -+ -+/* -+** asr_8_s8_x_untied: -+** asr z0\.b, z1\.b, #8 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_8_s8_x_untied, svint8_t, -+ z0 = svasr_n_s8_x (p0, z1, 8), -+ z0 = svasr_x (p0, z1, 8)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asr_wide_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asr_wide_s16.c -new file mode 100644 -index 000000000..b74ae33e1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asr_wide_s16.c -@@ -0,0 +1,325 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** asr_wide_s16_m_tied1: -+** asr z0\.h, p0/m, z0\.h, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (asr_wide_s16_m_tied1, svint16_t, svuint64_t, -+ z0 = svasr_wide_s16_m (p0, z0, z4), -+ z0 = svasr_wide_m (p0, z0, z4)) -+ -+/* -+** asr_wide_s16_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z4 -+** asr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (asr_wide_s16_m_tied2, svint16_t, svuint64_t, -+ z0_res = svasr_wide_s16_m (p0, z4, z0), -+ z0_res = svasr_wide_m (p0, z4, z0)) -+ -+/* -+** asr_wide_s16_m_untied: -+** movprfx z0, z1 -+** asr z0\.h, p0/m, z0\.h, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (asr_wide_s16_m_untied, svint16_t, svuint64_t, -+ z0 = svasr_wide_s16_m (p0, z1, z4), -+ z0 = svasr_wide_m (p0, z1, z4)) -+ -+/* -+** asr_wide_x0_s16_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** asr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (asr_wide_x0_s16_m_tied1, svint16_t, uint64_t, -+ z0 = svasr_wide_n_s16_m (p0, z0, x0), -+ z0 = svasr_wide_m (p0, z0, x0)) -+ -+/* -+** asr_wide_x0_s16_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** asr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (asr_wide_x0_s16_m_untied, svint16_t, uint64_t, -+ z0 = svasr_wide_n_s16_m (p0, z1, x0), -+ z0 = svasr_wide_m (p0, z1, x0)) -+ -+/* -+** asr_wide_1_s16_m_tied1: -+** asr z0\.h, p0/m, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_1_s16_m_tied1, svint16_t, -+ z0 = svasr_wide_n_s16_m (p0, z0, 1), -+ z0 = svasr_wide_m (p0, z0, 1)) -+ -+/* -+** asr_wide_1_s16_m_untied: -+** movprfx z0, z1 -+** asr z0\.h, p0/m, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_1_s16_m_untied, svint16_t, -+ z0 = svasr_wide_n_s16_m (p0, z1, 1), -+ z0 = svasr_wide_m (p0, z1, 1)) -+ -+/* -+** asr_wide_15_s16_m_tied1: -+** asr z0\.h, p0/m, z0\.h, #15 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_15_s16_m_tied1, svint16_t, -+ z0 = svasr_wide_n_s16_m (p0, z0, 15), -+ z0 = svasr_wide_m (p0, z0, 15)) -+ -+/* -+** asr_wide_15_s16_m_untied: -+** movprfx z0, z1 -+** asr z0\.h, p0/m, z0\.h, #15 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_15_s16_m_untied, svint16_t, -+ z0 = svasr_wide_n_s16_m (p0, z1, 15), -+ z0 = svasr_wide_m (p0, z1, 15)) -+ -+/* -+** asr_wide_16_s16_m_tied1: -+** asr z0\.h, p0/m, z0\.h, #16 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_16_s16_m_tied1, svint16_t, -+ z0 = svasr_wide_n_s16_m (p0, z0, 16), -+ z0 = svasr_wide_m (p0, z0, 16)) -+ -+/* -+** asr_wide_16_s16_m_untied: -+** movprfx z0, z1 -+** asr z0\.h, p0/m, z0\.h, #16 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_16_s16_m_untied, svint16_t, -+ z0 = svasr_wide_n_s16_m (p0, z1, 16), -+ z0 = svasr_wide_m (p0, z1, 16)) -+ -+/* -+** asr_wide_s16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** asr z0\.h, p0/m, z0\.h, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (asr_wide_s16_z_tied1, svint16_t, svuint64_t, -+ z0 = svasr_wide_s16_z (p0, z0, z4), -+ z0 = svasr_wide_z (p0, z0, z4)) -+ -+/* -+** asr_wide_s16_z_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.h, p0/z, z4\.h -+** asr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (asr_wide_s16_z_tied2, svint16_t, svuint64_t, -+ z0_res = svasr_wide_s16_z (p0, z4, z0), -+ z0_res = svasr_wide_z (p0, z4, z0)) -+ -+/* -+** asr_wide_s16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** asr z0\.h, p0/m, z0\.h, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (asr_wide_s16_z_untied, svint16_t, svuint64_t, -+ z0 = svasr_wide_s16_z (p0, z1, z4), -+ z0 = svasr_wide_z (p0, z1, z4)) -+ -+/* -+** asr_wide_x0_s16_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.h, p0/z, z0\.h -+** asr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (asr_wide_x0_s16_z_tied1, svint16_t, uint64_t, -+ z0 = svasr_wide_n_s16_z (p0, z0, x0), -+ z0 = svasr_wide_z (p0, z0, x0)) -+ -+/* -+** asr_wide_x0_s16_z_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.h, p0/z, z1\.h -+** asr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (asr_wide_x0_s16_z_untied, svint16_t, uint64_t, -+ z0 = svasr_wide_n_s16_z (p0, z1, x0), -+ z0 = svasr_wide_z (p0, z1, x0)) -+ -+/* -+** asr_wide_1_s16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** asr z0\.h, p0/m, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_1_s16_z_tied1, svint16_t, -+ z0 = svasr_wide_n_s16_z (p0, z0, 1), -+ z0 = svasr_wide_z (p0, z0, 1)) -+ -+/* -+** asr_wide_1_s16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** asr z0\.h, p0/m, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_1_s16_z_untied, svint16_t, -+ z0 = svasr_wide_n_s16_z (p0, z1, 1), -+ z0 = svasr_wide_z (p0, z1, 1)) -+ -+/* -+** asr_wide_15_s16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** asr z0\.h, p0/m, z0\.h, #15 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_15_s16_z_tied1, svint16_t, -+ z0 = svasr_wide_n_s16_z (p0, z0, 15), -+ z0 = svasr_wide_z (p0, z0, 15)) -+ -+/* -+** asr_wide_15_s16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** asr z0\.h, p0/m, z0\.h, #15 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_15_s16_z_untied, svint16_t, -+ z0 = svasr_wide_n_s16_z (p0, z1, 15), -+ z0 = svasr_wide_z (p0, z1, 15)) -+ -+/* -+** asr_wide_16_s16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** asr z0\.h, p0/m, z0\.h, #16 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_16_s16_z_tied1, svint16_t, -+ z0 = svasr_wide_n_s16_z (p0, z0, 16), -+ z0 = svasr_wide_z (p0, z0, 16)) -+ -+/* -+** asr_wide_16_s16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** asr z0\.h, p0/m, z0\.h, #16 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_16_s16_z_untied, svint16_t, -+ z0 = svasr_wide_n_s16_z (p0, z1, 16), -+ z0 = svasr_wide_z (p0, z1, 16)) -+ -+/* -+** asr_wide_s16_x_tied1: -+** asr z0\.h, z0\.h, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (asr_wide_s16_x_tied1, svint16_t, svuint64_t, -+ z0 = svasr_wide_s16_x (p0, z0, z4), -+ z0 = svasr_wide_x (p0, z0, z4)) -+ -+/* -+** asr_wide_s16_x_tied2: -+** asr z0\.h, z4\.h, z0\.d -+** ret -+*/ -+TEST_DUAL_Z_REV (asr_wide_s16_x_tied2, svint16_t, svuint64_t, -+ z0_res = svasr_wide_s16_x (p0, z4, z0), -+ z0_res = svasr_wide_x (p0, z4, z0)) -+ -+/* -+** asr_wide_s16_x_untied: -+** asr z0\.h, z1\.h, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (asr_wide_s16_x_untied, svint16_t, svuint64_t, -+ z0 = svasr_wide_s16_x (p0, z1, z4), -+ z0 = svasr_wide_x (p0, z1, z4)) -+ -+/* -+** asr_wide_x0_s16_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** asr z0\.h, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (asr_wide_x0_s16_x_tied1, svint16_t, uint64_t, -+ z0 = svasr_wide_n_s16_x (p0, z0, x0), -+ z0 = svasr_wide_x (p0, z0, x0)) -+ -+/* -+** asr_wide_x0_s16_x_untied: -+** mov (z[0-9]+\.d), x0 -+** asr z0\.h, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (asr_wide_x0_s16_x_untied, svint16_t, uint64_t, -+ z0 = svasr_wide_n_s16_x (p0, z1, x0), -+ z0 = svasr_wide_x (p0, z1, x0)) -+ -+/* -+** asr_wide_1_s16_x_tied1: -+** asr z0\.h, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_1_s16_x_tied1, svint16_t, -+ z0 = svasr_wide_n_s16_x (p0, z0, 1), -+ z0 = svasr_wide_x (p0, z0, 1)) -+ -+/* -+** asr_wide_1_s16_x_untied: -+** asr z0\.h, z1\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_1_s16_x_untied, svint16_t, -+ z0 = svasr_wide_n_s16_x (p0, z1, 1), -+ z0 = svasr_wide_x (p0, z1, 1)) -+ -+/* -+** asr_wide_15_s16_x_tied1: -+** asr z0\.h, z0\.h, #15 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_15_s16_x_tied1, svint16_t, -+ z0 = svasr_wide_n_s16_x (p0, z0, 15), -+ z0 = svasr_wide_x (p0, z0, 15)) -+ -+/* -+** asr_wide_15_s16_x_untied: -+** asr z0\.h, z1\.h, #15 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_15_s16_x_untied, svint16_t, -+ z0 = svasr_wide_n_s16_x (p0, z1, 15), -+ z0 = svasr_wide_x (p0, z1, 15)) -+ -+/* -+** asr_wide_16_s16_x_tied1: -+** asr z0\.h, z0\.h, #16 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_16_s16_x_tied1, svint16_t, -+ z0 = svasr_wide_n_s16_x (p0, z0, 16), -+ z0 = svasr_wide_x (p0, z0, 16)) -+ -+/* -+** asr_wide_16_s16_x_untied: -+** asr z0\.h, z1\.h, #16 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_16_s16_x_untied, svint16_t, -+ z0 = svasr_wide_n_s16_x (p0, z1, 16), -+ z0 = svasr_wide_x (p0, z1, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asr_wide_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asr_wide_s32.c -new file mode 100644 -index 000000000..8698aef26 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asr_wide_s32.c -@@ -0,0 +1,325 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** asr_wide_s32_m_tied1: -+** asr z0\.s, p0/m, z0\.s, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (asr_wide_s32_m_tied1, svint32_t, svuint64_t, -+ z0 = svasr_wide_s32_m (p0, z0, z4), -+ z0 = svasr_wide_m (p0, z0, z4)) -+ -+/* -+** asr_wide_s32_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z4 -+** asr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (asr_wide_s32_m_tied2, svint32_t, svuint64_t, -+ z0_res = svasr_wide_s32_m (p0, z4, z0), -+ z0_res = svasr_wide_m (p0, z4, z0)) -+ -+/* -+** asr_wide_s32_m_untied: -+** movprfx z0, z1 -+** asr z0\.s, p0/m, z0\.s, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (asr_wide_s32_m_untied, svint32_t, svuint64_t, -+ z0 = svasr_wide_s32_m (p0, z1, z4), -+ z0 = svasr_wide_m (p0, z1, z4)) -+ -+/* -+** asr_wide_x0_s32_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** asr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (asr_wide_x0_s32_m_tied1, svint32_t, uint64_t, -+ z0 = svasr_wide_n_s32_m (p0, z0, x0), -+ z0 = svasr_wide_m (p0, z0, x0)) -+ -+/* -+** asr_wide_x0_s32_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** asr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (asr_wide_x0_s32_m_untied, svint32_t, uint64_t, -+ z0 = svasr_wide_n_s32_m (p0, z1, x0), -+ z0 = svasr_wide_m (p0, z1, x0)) -+ -+/* -+** asr_wide_1_s32_m_tied1: -+** asr z0\.s, p0/m, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_1_s32_m_tied1, svint32_t, -+ z0 = svasr_wide_n_s32_m (p0, z0, 1), -+ z0 = svasr_wide_m (p0, z0, 1)) -+ -+/* -+** asr_wide_1_s32_m_untied: -+** movprfx z0, z1 -+** asr z0\.s, p0/m, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_1_s32_m_untied, svint32_t, -+ z0 = svasr_wide_n_s32_m (p0, z1, 1), -+ z0 = svasr_wide_m (p0, z1, 1)) -+ -+/* -+** asr_wide_31_s32_m_tied1: -+** asr z0\.s, p0/m, z0\.s, #31 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_31_s32_m_tied1, svint32_t, -+ z0 = svasr_wide_n_s32_m (p0, z0, 31), -+ z0 = svasr_wide_m (p0, z0, 31)) -+ -+/* -+** asr_wide_31_s32_m_untied: -+** movprfx z0, z1 -+** asr z0\.s, p0/m, z0\.s, #31 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_31_s32_m_untied, svint32_t, -+ z0 = svasr_wide_n_s32_m (p0, z1, 31), -+ z0 = svasr_wide_m (p0, z1, 31)) -+ -+/* -+** asr_wide_32_s32_m_tied1: -+** asr z0\.s, p0/m, z0\.s, #32 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_32_s32_m_tied1, svint32_t, -+ z0 = svasr_wide_n_s32_m (p0, z0, 32), -+ z0 = svasr_wide_m (p0, z0, 32)) -+ -+/* -+** asr_wide_32_s32_m_untied: -+** movprfx z0, z1 -+** asr z0\.s, p0/m, z0\.s, #32 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_32_s32_m_untied, svint32_t, -+ z0 = svasr_wide_n_s32_m (p0, z1, 32), -+ z0 = svasr_wide_m (p0, z1, 32)) -+ -+/* -+** asr_wide_s32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** asr z0\.s, p0/m, z0\.s, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (asr_wide_s32_z_tied1, svint32_t, svuint64_t, -+ z0 = svasr_wide_s32_z (p0, z0, z4), -+ z0 = svasr_wide_z (p0, z0, z4)) -+ -+/* -+** asr_wide_s32_z_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.s, p0/z, z4\.s -+** asr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (asr_wide_s32_z_tied2, svint32_t, svuint64_t, -+ z0_res = svasr_wide_s32_z (p0, z4, z0), -+ z0_res = svasr_wide_z (p0, z4, z0)) -+ -+/* -+** asr_wide_s32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** asr z0\.s, p0/m, z0\.s, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (asr_wide_s32_z_untied, svint32_t, svuint64_t, -+ z0 = svasr_wide_s32_z (p0, z1, z4), -+ z0 = svasr_wide_z (p0, z1, z4)) -+ -+/* -+** asr_wide_x0_s32_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.s, p0/z, z0\.s -+** asr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (asr_wide_x0_s32_z_tied1, svint32_t, uint64_t, -+ z0 = svasr_wide_n_s32_z (p0, z0, x0), -+ z0 = svasr_wide_z (p0, z0, x0)) -+ -+/* -+** asr_wide_x0_s32_z_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.s, p0/z, z1\.s -+** asr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (asr_wide_x0_s32_z_untied, svint32_t, uint64_t, -+ z0 = svasr_wide_n_s32_z (p0, z1, x0), -+ z0 = svasr_wide_z (p0, z1, x0)) -+ -+/* -+** asr_wide_1_s32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** asr z0\.s, p0/m, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_1_s32_z_tied1, svint32_t, -+ z0 = svasr_wide_n_s32_z (p0, z0, 1), -+ z0 = svasr_wide_z (p0, z0, 1)) -+ -+/* -+** asr_wide_1_s32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** asr z0\.s, p0/m, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_1_s32_z_untied, svint32_t, -+ z0 = svasr_wide_n_s32_z (p0, z1, 1), -+ z0 = svasr_wide_z (p0, z1, 1)) -+ -+/* -+** asr_wide_31_s32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** asr z0\.s, p0/m, z0\.s, #31 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_31_s32_z_tied1, svint32_t, -+ z0 = svasr_wide_n_s32_z (p0, z0, 31), -+ z0 = svasr_wide_z (p0, z0, 31)) -+ -+/* -+** asr_wide_31_s32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** asr z0\.s, p0/m, z0\.s, #31 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_31_s32_z_untied, svint32_t, -+ z0 = svasr_wide_n_s32_z (p0, z1, 31), -+ z0 = svasr_wide_z (p0, z1, 31)) -+ -+/* -+** asr_wide_32_s32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** asr z0\.s, p0/m, z0\.s, #32 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_32_s32_z_tied1, svint32_t, -+ z0 = svasr_wide_n_s32_z (p0, z0, 32), -+ z0 = svasr_wide_z (p0, z0, 32)) -+ -+/* -+** asr_wide_32_s32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** asr z0\.s, p0/m, z0\.s, #32 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_32_s32_z_untied, svint32_t, -+ z0 = svasr_wide_n_s32_z (p0, z1, 32), -+ z0 = svasr_wide_z (p0, z1, 32)) -+ -+/* -+** asr_wide_s32_x_tied1: -+** asr z0\.s, z0\.s, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (asr_wide_s32_x_tied1, svint32_t, svuint64_t, -+ z0 = svasr_wide_s32_x (p0, z0, z4), -+ z0 = svasr_wide_x (p0, z0, z4)) -+ -+/* -+** asr_wide_s32_x_tied2: -+** asr z0\.s, z4\.s, z0\.d -+** ret -+*/ -+TEST_DUAL_Z_REV (asr_wide_s32_x_tied2, svint32_t, svuint64_t, -+ z0_res = svasr_wide_s32_x (p0, z4, z0), -+ z0_res = svasr_wide_x (p0, z4, z0)) -+ -+/* -+** asr_wide_s32_x_untied: -+** asr z0\.s, z1\.s, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (asr_wide_s32_x_untied, svint32_t, svuint64_t, -+ z0 = svasr_wide_s32_x (p0, z1, z4), -+ z0 = svasr_wide_x (p0, z1, z4)) -+ -+/* -+** asr_wide_x0_s32_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** asr z0\.s, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (asr_wide_x0_s32_x_tied1, svint32_t, uint64_t, -+ z0 = svasr_wide_n_s32_x (p0, z0, x0), -+ z0 = svasr_wide_x (p0, z0, x0)) -+ -+/* -+** asr_wide_x0_s32_x_untied: -+** mov (z[0-9]+\.d), x0 -+** asr z0\.s, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (asr_wide_x0_s32_x_untied, svint32_t, uint64_t, -+ z0 = svasr_wide_n_s32_x (p0, z1, x0), -+ z0 = svasr_wide_x (p0, z1, x0)) -+ -+/* -+** asr_wide_1_s32_x_tied1: -+** asr z0\.s, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_1_s32_x_tied1, svint32_t, -+ z0 = svasr_wide_n_s32_x (p0, z0, 1), -+ z0 = svasr_wide_x (p0, z0, 1)) -+ -+/* -+** asr_wide_1_s32_x_untied: -+** asr z0\.s, z1\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_1_s32_x_untied, svint32_t, -+ z0 = svasr_wide_n_s32_x (p0, z1, 1), -+ z0 = svasr_wide_x (p0, z1, 1)) -+ -+/* -+** asr_wide_31_s32_x_tied1: -+** asr z0\.s, z0\.s, #31 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_31_s32_x_tied1, svint32_t, -+ z0 = svasr_wide_n_s32_x (p0, z0, 31), -+ z0 = svasr_wide_x (p0, z0, 31)) -+ -+/* -+** asr_wide_31_s32_x_untied: -+** asr z0\.s, z1\.s, #31 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_31_s32_x_untied, svint32_t, -+ z0 = svasr_wide_n_s32_x (p0, z1, 31), -+ z0 = svasr_wide_x (p0, z1, 31)) -+ -+/* -+** asr_wide_32_s32_x_tied1: -+** asr z0\.s, z0\.s, #32 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_32_s32_x_tied1, svint32_t, -+ z0 = svasr_wide_n_s32_x (p0, z0, 32), -+ z0 = svasr_wide_x (p0, z0, 32)) -+ -+/* -+** asr_wide_32_s32_x_untied: -+** asr z0\.s, z1\.s, #32 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_32_s32_x_untied, svint32_t, -+ z0 = svasr_wide_n_s32_x (p0, z1, 32), -+ z0 = svasr_wide_x (p0, z1, 32)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asr_wide_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asr_wide_s8.c -new file mode 100644 -index 000000000..77b166939 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asr_wide_s8.c -@@ -0,0 +1,325 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** asr_wide_s8_m_tied1: -+** asr z0\.b, p0/m, z0\.b, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (asr_wide_s8_m_tied1, svint8_t, svuint64_t, -+ z0 = svasr_wide_s8_m (p0, z0, z4), -+ z0 = svasr_wide_m (p0, z0, z4)) -+ -+/* -+** asr_wide_s8_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z4 -+** asr z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (asr_wide_s8_m_tied2, svint8_t, svuint64_t, -+ z0_res = svasr_wide_s8_m (p0, z4, z0), -+ z0_res = svasr_wide_m (p0, z4, z0)) -+ -+/* -+** asr_wide_s8_m_untied: -+** movprfx z0, z1 -+** asr z0\.b, p0/m, z0\.b, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (asr_wide_s8_m_untied, svint8_t, svuint64_t, -+ z0 = svasr_wide_s8_m (p0, z1, z4), -+ z0 = svasr_wide_m (p0, z1, z4)) -+ -+/* -+** asr_wide_x0_s8_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** asr z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (asr_wide_x0_s8_m_tied1, svint8_t, uint64_t, -+ z0 = svasr_wide_n_s8_m (p0, z0, x0), -+ z0 = svasr_wide_m (p0, z0, x0)) -+ -+/* -+** asr_wide_x0_s8_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** asr z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (asr_wide_x0_s8_m_untied, svint8_t, uint64_t, -+ z0 = svasr_wide_n_s8_m (p0, z1, x0), -+ z0 = svasr_wide_m (p0, z1, x0)) -+ -+/* -+** asr_wide_1_s8_m_tied1: -+** asr z0\.b, p0/m, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_1_s8_m_tied1, svint8_t, -+ z0 = svasr_wide_n_s8_m (p0, z0, 1), -+ z0 = svasr_wide_m (p0, z0, 1)) -+ -+/* -+** asr_wide_1_s8_m_untied: -+** movprfx z0, z1 -+** asr z0\.b, p0/m, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_1_s8_m_untied, svint8_t, -+ z0 = svasr_wide_n_s8_m (p0, z1, 1), -+ z0 = svasr_wide_m (p0, z1, 1)) -+ -+/* -+** asr_wide_7_s8_m_tied1: -+** asr z0\.b, p0/m, z0\.b, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_7_s8_m_tied1, svint8_t, -+ z0 = svasr_wide_n_s8_m (p0, z0, 7), -+ z0 = svasr_wide_m (p0, z0, 7)) -+ -+/* -+** asr_wide_7_s8_m_untied: -+** movprfx z0, z1 -+** asr z0\.b, p0/m, z0\.b, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_7_s8_m_untied, svint8_t, -+ z0 = svasr_wide_n_s8_m (p0, z1, 7), -+ z0 = svasr_wide_m (p0, z1, 7)) -+ -+/* -+** asr_wide_8_s8_m_tied1: -+** asr z0\.b, p0/m, z0\.b, #8 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_8_s8_m_tied1, svint8_t, -+ z0 = svasr_wide_n_s8_m (p0, z0, 8), -+ z0 = svasr_wide_m (p0, z0, 8)) -+ -+/* -+** asr_wide_8_s8_m_untied: -+** movprfx z0, z1 -+** asr z0\.b, p0/m, z0\.b, #8 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_8_s8_m_untied, svint8_t, -+ z0 = svasr_wide_n_s8_m (p0, z1, 8), -+ z0 = svasr_wide_m (p0, z1, 8)) -+ -+/* -+** asr_wide_s8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** asr z0\.b, p0/m, z0\.b, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (asr_wide_s8_z_tied1, svint8_t, svuint64_t, -+ z0 = svasr_wide_s8_z (p0, z0, z4), -+ z0 = svasr_wide_z (p0, z0, z4)) -+ -+/* -+** asr_wide_s8_z_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.b, p0/z, z4\.b -+** asr z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (asr_wide_s8_z_tied2, svint8_t, svuint64_t, -+ z0_res = svasr_wide_s8_z (p0, z4, z0), -+ z0_res = svasr_wide_z (p0, z4, z0)) -+ -+/* -+** asr_wide_s8_z_untied: -+** movprfx z0\.b, p0/z, z1\.b -+** asr z0\.b, p0/m, z0\.b, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (asr_wide_s8_z_untied, svint8_t, svuint64_t, -+ z0 = svasr_wide_s8_z (p0, z1, z4), -+ z0 = svasr_wide_z (p0, z1, z4)) -+ -+/* -+** asr_wide_x0_s8_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.b, p0/z, z0\.b -+** asr z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (asr_wide_x0_s8_z_tied1, svint8_t, uint64_t, -+ z0 = svasr_wide_n_s8_z (p0, z0, x0), -+ z0 = svasr_wide_z (p0, z0, x0)) -+ -+/* -+** asr_wide_x0_s8_z_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.b, p0/z, z1\.b -+** asr z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (asr_wide_x0_s8_z_untied, svint8_t, uint64_t, -+ z0 = svasr_wide_n_s8_z (p0, z1, x0), -+ z0 = svasr_wide_z (p0, z1, x0)) -+ -+/* -+** asr_wide_1_s8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** asr z0\.b, p0/m, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_1_s8_z_tied1, svint8_t, -+ z0 = svasr_wide_n_s8_z (p0, z0, 1), -+ z0 = svasr_wide_z (p0, z0, 1)) -+ -+/* -+** asr_wide_1_s8_z_untied: -+** movprfx z0\.b, p0/z, z1\.b -+** asr z0\.b, p0/m, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_1_s8_z_untied, svint8_t, -+ z0 = svasr_wide_n_s8_z (p0, z1, 1), -+ z0 = svasr_wide_z (p0, z1, 1)) -+ -+/* -+** asr_wide_7_s8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** asr z0\.b, p0/m, z0\.b, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_7_s8_z_tied1, svint8_t, -+ z0 = svasr_wide_n_s8_z (p0, z0, 7), -+ z0 = svasr_wide_z (p0, z0, 7)) -+ -+/* -+** asr_wide_7_s8_z_untied: -+** movprfx z0\.b, p0/z, z1\.b -+** asr z0\.b, p0/m, z0\.b, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_7_s8_z_untied, svint8_t, -+ z0 = svasr_wide_n_s8_z (p0, z1, 7), -+ z0 = svasr_wide_z (p0, z1, 7)) -+ -+/* -+** asr_wide_8_s8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** asr z0\.b, p0/m, z0\.b, #8 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_8_s8_z_tied1, svint8_t, -+ z0 = svasr_wide_n_s8_z (p0, z0, 8), -+ z0 = svasr_wide_z (p0, z0, 8)) -+ -+/* -+** asr_wide_8_s8_z_untied: -+** movprfx z0\.b, p0/z, z1\.b -+** asr z0\.b, p0/m, z0\.b, #8 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_8_s8_z_untied, svint8_t, -+ z0 = svasr_wide_n_s8_z (p0, z1, 8), -+ z0 = svasr_wide_z (p0, z1, 8)) -+ -+/* -+** asr_wide_s8_x_tied1: -+** asr z0\.b, z0\.b, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (asr_wide_s8_x_tied1, svint8_t, svuint64_t, -+ z0 = svasr_wide_s8_x (p0, z0, z4), -+ z0 = svasr_wide_x (p0, z0, z4)) -+ -+/* -+** asr_wide_s8_x_tied2: -+** asr z0\.b, z4\.b, z0\.d -+** ret -+*/ -+TEST_DUAL_Z_REV (asr_wide_s8_x_tied2, svint8_t, svuint64_t, -+ z0_res = svasr_wide_s8_x (p0, z4, z0), -+ z0_res = svasr_wide_x (p0, z4, z0)) -+ -+/* -+** asr_wide_s8_x_untied: -+** asr z0\.b, z1\.b, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (asr_wide_s8_x_untied, svint8_t, svuint64_t, -+ z0 = svasr_wide_s8_x (p0, z1, z4), -+ z0 = svasr_wide_x (p0, z1, z4)) -+ -+/* -+** asr_wide_x0_s8_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** asr z0\.b, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (asr_wide_x0_s8_x_tied1, svint8_t, uint64_t, -+ z0 = svasr_wide_n_s8_x (p0, z0, x0), -+ z0 = svasr_wide_x (p0, z0, x0)) -+ -+/* -+** asr_wide_x0_s8_x_untied: -+** mov (z[0-9]+\.d), x0 -+** asr z0\.b, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (asr_wide_x0_s8_x_untied, svint8_t, uint64_t, -+ z0 = svasr_wide_n_s8_x (p0, z1, x0), -+ z0 = svasr_wide_x (p0, z1, x0)) -+ -+/* -+** asr_wide_1_s8_x_tied1: -+** asr z0\.b, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_1_s8_x_tied1, svint8_t, -+ z0 = svasr_wide_n_s8_x (p0, z0, 1), -+ z0 = svasr_wide_x (p0, z0, 1)) -+ -+/* -+** asr_wide_1_s8_x_untied: -+** asr z0\.b, z1\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_1_s8_x_untied, svint8_t, -+ z0 = svasr_wide_n_s8_x (p0, z1, 1), -+ z0 = svasr_wide_x (p0, z1, 1)) -+ -+/* -+** asr_wide_7_s8_x_tied1: -+** asr z0\.b, z0\.b, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_7_s8_x_tied1, svint8_t, -+ z0 = svasr_wide_n_s8_x (p0, z0, 7), -+ z0 = svasr_wide_x (p0, z0, 7)) -+ -+/* -+** asr_wide_7_s8_x_untied: -+** asr z0\.b, z1\.b, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_7_s8_x_untied, svint8_t, -+ z0 = svasr_wide_n_s8_x (p0, z1, 7), -+ z0 = svasr_wide_x (p0, z1, 7)) -+ -+/* -+** asr_wide_8_s8_x_tied1: -+** asr z0\.b, z0\.b, #8 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_8_s8_x_tied1, svint8_t, -+ z0 = svasr_wide_n_s8_x (p0, z0, 8), -+ z0 = svasr_wide_x (p0, z0, 8)) -+ -+/* -+** asr_wide_8_s8_x_untied: -+** asr z0\.b, z1\.b, #8 -+** ret -+*/ -+TEST_UNIFORM_Z (asr_wide_8_s8_x_untied, svint8_t, -+ z0 = svasr_wide_n_s8_x (p0, z1, 8), -+ z0 = svasr_wide_x (p0, z1, 8)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asrd_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asrd_s16.c -new file mode 100644 -index 000000000..40bbce042 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asrd_s16.c -@@ -0,0 +1,177 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** asrd_1_s16_m_tied1: -+** asrd z0\.h, p0/m, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_1_s16_m_tied1, svint16_t, -+ z0 = svasrd_n_s16_m (p0, z0, 1), -+ z0 = svasrd_m (p0, z0, 1)) -+ -+/* -+** asrd_1_s16_m_untied: -+** movprfx z0, z1 -+** asrd z0\.h, p0/m, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_1_s16_m_untied, svint16_t, -+ z0 = svasrd_n_s16_m (p0, z1, 1), -+ z0 = svasrd_m (p0, z1, 1)) -+ -+/* -+** asrd_2_s16_m_tied1: -+** asrd z0\.h, p0/m, z0\.h, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_2_s16_m_tied1, svint16_t, -+ z0 = svasrd_n_s16_m (p0, z0, 2), -+ z0 = svasrd_m (p0, z0, 2)) -+ -+/* -+** asrd_2_s16_m_untied: -+** movprfx z0, z1 -+** asrd z0\.h, p0/m, z0\.h, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_2_s16_m_untied, svint16_t, -+ z0 = svasrd_n_s16_m (p0, z1, 2), -+ z0 = svasrd_m (p0, z1, 2)) -+ -+/* -+** asrd_16_s16_m_tied1: -+** asrd z0\.h, p0/m, z0\.h, #16 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_16_s16_m_tied1, svint16_t, -+ z0 = svasrd_n_s16_m (p0, z0, 16), -+ z0 = svasrd_m (p0, z0, 16)) -+ -+/* -+** asrd_16_s16_m_untied: -+** movprfx z0, z1 -+** asrd z0\.h, p0/m, z0\.h, #16 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_16_s16_m_untied, svint16_t, -+ z0 = svasrd_n_s16_m (p0, z1, 16), -+ z0 = svasrd_m (p0, z1, 16)) -+ -+/* -+** asrd_1_s16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** asrd z0\.h, p0/m, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_1_s16_z_tied1, svint16_t, -+ z0 = svasrd_n_s16_z (p0, z0, 1), -+ z0 = svasrd_z (p0, z0, 1)) -+ -+/* -+** asrd_1_s16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** asrd z0\.h, p0/m, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_1_s16_z_untied, svint16_t, -+ z0 = svasrd_n_s16_z (p0, z1, 1), -+ z0 = svasrd_z (p0, z1, 1)) -+ -+/* -+** asrd_2_s16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** asrd z0\.h, p0/m, z0\.h, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_2_s16_z_tied1, svint16_t, -+ z0 = svasrd_n_s16_z (p0, z0, 2), -+ z0 = svasrd_z (p0, z0, 2)) -+ -+/* -+** asrd_2_s16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** asrd z0\.h, p0/m, z0\.h, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_2_s16_z_untied, svint16_t, -+ z0 = svasrd_n_s16_z (p0, z1, 2), -+ z0 = svasrd_z (p0, z1, 2)) -+ -+/* -+** asrd_16_s16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** asrd z0\.h, p0/m, z0\.h, #16 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_16_s16_z_tied1, svint16_t, -+ z0 = svasrd_n_s16_z (p0, z0, 16), -+ z0 = svasrd_z (p0, z0, 16)) -+ -+/* -+** asrd_16_s16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** asrd z0\.h, p0/m, z0\.h, #16 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_16_s16_z_untied, svint16_t, -+ z0 = svasrd_n_s16_z (p0, z1, 16), -+ z0 = svasrd_z (p0, z1, 16)) -+ -+/* -+** asrd_1_s16_x_tied1: -+** asrd z0\.h, p0/m, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_1_s16_x_tied1, svint16_t, -+ z0 = svasrd_n_s16_x (p0, z0, 1), -+ z0 = svasrd_x (p0, z0, 1)) -+ -+/* -+** asrd_1_s16_x_untied: -+** movprfx z0, z1 -+** asrd z0\.h, p0/m, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_1_s16_x_untied, svint16_t, -+ z0 = svasrd_n_s16_x (p0, z1, 1), -+ z0 = svasrd_x (p0, z1, 1)) -+ -+/* -+** asrd_2_s16_x_tied1: -+** asrd z0\.h, p0/m, z0\.h, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_2_s16_x_tied1, svint16_t, -+ z0 = svasrd_n_s16_x (p0, z0, 2), -+ z0 = svasrd_x (p0, z0, 2)) -+ -+/* -+** asrd_2_s16_x_untied: -+** movprfx z0, z1 -+** asrd z0\.h, p0/m, z0\.h, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_2_s16_x_untied, svint16_t, -+ z0 = svasrd_n_s16_x (p0, z1, 2), -+ z0 = svasrd_x (p0, z1, 2)) -+ -+/* -+** asrd_16_s16_x_tied1: -+** asrd z0\.h, p0/m, z0\.h, #16 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_16_s16_x_tied1, svint16_t, -+ z0 = svasrd_n_s16_x (p0, z0, 16), -+ z0 = svasrd_x (p0, z0, 16)) -+ -+/* -+** asrd_16_s16_x_untied: -+** movprfx z0, z1 -+** asrd z0\.h, p0/m, z0\.h, #16 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_16_s16_x_untied, svint16_t, -+ z0 = svasrd_n_s16_x (p0, z1, 16), -+ z0 = svasrd_x (p0, z1, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asrd_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asrd_s32.c -new file mode 100644 -index 000000000..0760b03de ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asrd_s32.c -@@ -0,0 +1,177 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** asrd_1_s32_m_tied1: -+** asrd z0\.s, p0/m, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_1_s32_m_tied1, svint32_t, -+ z0 = svasrd_n_s32_m (p0, z0, 1), -+ z0 = svasrd_m (p0, z0, 1)) -+ -+/* -+** asrd_1_s32_m_untied: -+** movprfx z0, z1 -+** asrd z0\.s, p0/m, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_1_s32_m_untied, svint32_t, -+ z0 = svasrd_n_s32_m (p0, z1, 1), -+ z0 = svasrd_m (p0, z1, 1)) -+ -+/* -+** asrd_2_s32_m_tied1: -+** asrd z0\.s, p0/m, z0\.s, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_2_s32_m_tied1, svint32_t, -+ z0 = svasrd_n_s32_m (p0, z0, 2), -+ z0 = svasrd_m (p0, z0, 2)) -+ -+/* -+** asrd_2_s32_m_untied: -+** movprfx z0, z1 -+** asrd z0\.s, p0/m, z0\.s, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_2_s32_m_untied, svint32_t, -+ z0 = svasrd_n_s32_m (p0, z1, 2), -+ z0 = svasrd_m (p0, z1, 2)) -+ -+/* -+** asrd_32_s32_m_tied1: -+** asrd z0\.s, p0/m, z0\.s, #32 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_32_s32_m_tied1, svint32_t, -+ z0 = svasrd_n_s32_m (p0, z0, 32), -+ z0 = svasrd_m (p0, z0, 32)) -+ -+/* -+** asrd_32_s32_m_untied: -+** movprfx z0, z1 -+** asrd z0\.s, p0/m, z0\.s, #32 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_32_s32_m_untied, svint32_t, -+ z0 = svasrd_n_s32_m (p0, z1, 32), -+ z0 = svasrd_m (p0, z1, 32)) -+ -+/* -+** asrd_1_s32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** asrd z0\.s, p0/m, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_1_s32_z_tied1, svint32_t, -+ z0 = svasrd_n_s32_z (p0, z0, 1), -+ z0 = svasrd_z (p0, z0, 1)) -+ -+/* -+** asrd_1_s32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** asrd z0\.s, p0/m, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_1_s32_z_untied, svint32_t, -+ z0 = svasrd_n_s32_z (p0, z1, 1), -+ z0 = svasrd_z (p0, z1, 1)) -+ -+/* -+** asrd_2_s32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** asrd z0\.s, p0/m, z0\.s, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_2_s32_z_tied1, svint32_t, -+ z0 = svasrd_n_s32_z (p0, z0, 2), -+ z0 = svasrd_z (p0, z0, 2)) -+ -+/* -+** asrd_2_s32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** asrd z0\.s, p0/m, z0\.s, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_2_s32_z_untied, svint32_t, -+ z0 = svasrd_n_s32_z (p0, z1, 2), -+ z0 = svasrd_z (p0, z1, 2)) -+ -+/* -+** asrd_32_s32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** asrd z0\.s, p0/m, z0\.s, #32 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_32_s32_z_tied1, svint32_t, -+ z0 = svasrd_n_s32_z (p0, z0, 32), -+ z0 = svasrd_z (p0, z0, 32)) -+ -+/* -+** asrd_32_s32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** asrd z0\.s, p0/m, z0\.s, #32 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_32_s32_z_untied, svint32_t, -+ z0 = svasrd_n_s32_z (p0, z1, 32), -+ z0 = svasrd_z (p0, z1, 32)) -+ -+/* -+** asrd_1_s32_x_tied1: -+** asrd z0\.s, p0/m, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_1_s32_x_tied1, svint32_t, -+ z0 = svasrd_n_s32_x (p0, z0, 1), -+ z0 = svasrd_x (p0, z0, 1)) -+ -+/* -+** asrd_1_s32_x_untied: -+** movprfx z0, z1 -+** asrd z0\.s, p0/m, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_1_s32_x_untied, svint32_t, -+ z0 = svasrd_n_s32_x (p0, z1, 1), -+ z0 = svasrd_x (p0, z1, 1)) -+ -+/* -+** asrd_2_s32_x_tied1: -+** asrd z0\.s, p0/m, z0\.s, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_2_s32_x_tied1, svint32_t, -+ z0 = svasrd_n_s32_x (p0, z0, 2), -+ z0 = svasrd_x (p0, z0, 2)) -+ -+/* -+** asrd_2_s32_x_untied: -+** movprfx z0, z1 -+** asrd z0\.s, p0/m, z0\.s, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_2_s32_x_untied, svint32_t, -+ z0 = svasrd_n_s32_x (p0, z1, 2), -+ z0 = svasrd_x (p0, z1, 2)) -+ -+/* -+** asrd_32_s32_x_tied1: -+** asrd z0\.s, p0/m, z0\.s, #32 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_32_s32_x_tied1, svint32_t, -+ z0 = svasrd_n_s32_x (p0, z0, 32), -+ z0 = svasrd_x (p0, z0, 32)) -+ -+/* -+** asrd_32_s32_x_untied: -+** movprfx z0, z1 -+** asrd z0\.s, p0/m, z0\.s, #32 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_32_s32_x_untied, svint32_t, -+ z0 = svasrd_n_s32_x (p0, z1, 32), -+ z0 = svasrd_x (p0, z1, 32)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asrd_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asrd_s64.c -new file mode 100644 -index 000000000..0ef26c9fe ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asrd_s64.c -@@ -0,0 +1,177 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** asrd_1_s64_m_tied1: -+** asrd z0\.d, p0/m, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_1_s64_m_tied1, svint64_t, -+ z0 = svasrd_n_s64_m (p0, z0, 1), -+ z0 = svasrd_m (p0, z0, 1)) -+ -+/* -+** asrd_1_s64_m_untied: -+** movprfx z0, z1 -+** asrd z0\.d, p0/m, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_1_s64_m_untied, svint64_t, -+ z0 = svasrd_n_s64_m (p0, z1, 1), -+ z0 = svasrd_m (p0, z1, 1)) -+ -+/* -+** asrd_2_s64_m_tied1: -+** asrd z0\.d, p0/m, z0\.d, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_2_s64_m_tied1, svint64_t, -+ z0 = svasrd_n_s64_m (p0, z0, 2), -+ z0 = svasrd_m (p0, z0, 2)) -+ -+/* -+** asrd_2_s64_m_untied: -+** movprfx z0, z1 -+** asrd z0\.d, p0/m, z0\.d, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_2_s64_m_untied, svint64_t, -+ z0 = svasrd_n_s64_m (p0, z1, 2), -+ z0 = svasrd_m (p0, z1, 2)) -+ -+/* -+** asrd_64_s64_m_tied1: -+** asrd z0\.d, p0/m, z0\.d, #64 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_64_s64_m_tied1, svint64_t, -+ z0 = svasrd_n_s64_m (p0, z0, 64), -+ z0 = svasrd_m (p0, z0, 64)) -+ -+/* -+** asrd_64_s64_m_untied: -+** movprfx z0, z1 -+** asrd z0\.d, p0/m, z0\.d, #64 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_64_s64_m_untied, svint64_t, -+ z0 = svasrd_n_s64_m (p0, z1, 64), -+ z0 = svasrd_m (p0, z1, 64)) -+ -+/* -+** asrd_1_s64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** asrd z0\.d, p0/m, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_1_s64_z_tied1, svint64_t, -+ z0 = svasrd_n_s64_z (p0, z0, 1), -+ z0 = svasrd_z (p0, z0, 1)) -+ -+/* -+** asrd_1_s64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** asrd z0\.d, p0/m, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_1_s64_z_untied, svint64_t, -+ z0 = svasrd_n_s64_z (p0, z1, 1), -+ z0 = svasrd_z (p0, z1, 1)) -+ -+/* -+** asrd_2_s64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** asrd z0\.d, p0/m, z0\.d, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_2_s64_z_tied1, svint64_t, -+ z0 = svasrd_n_s64_z (p0, z0, 2), -+ z0 = svasrd_z (p0, z0, 2)) -+ -+/* -+** asrd_2_s64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** asrd z0\.d, p0/m, z0\.d, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_2_s64_z_untied, svint64_t, -+ z0 = svasrd_n_s64_z (p0, z1, 2), -+ z0 = svasrd_z (p0, z1, 2)) -+ -+/* -+** asrd_64_s64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** asrd z0\.d, p0/m, z0\.d, #64 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_64_s64_z_tied1, svint64_t, -+ z0 = svasrd_n_s64_z (p0, z0, 64), -+ z0 = svasrd_z (p0, z0, 64)) -+ -+/* -+** asrd_64_s64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** asrd z0\.d, p0/m, z0\.d, #64 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_64_s64_z_untied, svint64_t, -+ z0 = svasrd_n_s64_z (p0, z1, 64), -+ z0 = svasrd_z (p0, z1, 64)) -+ -+/* -+** asrd_1_s64_x_tied1: -+** asrd z0\.d, p0/m, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_1_s64_x_tied1, svint64_t, -+ z0 = svasrd_n_s64_x (p0, z0, 1), -+ z0 = svasrd_x (p0, z0, 1)) -+ -+/* -+** asrd_1_s64_x_untied: -+** movprfx z0, z1 -+** asrd z0\.d, p0/m, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_1_s64_x_untied, svint64_t, -+ z0 = svasrd_n_s64_x (p0, z1, 1), -+ z0 = svasrd_x (p0, z1, 1)) -+ -+/* -+** asrd_2_s64_x_tied1: -+** asrd z0\.d, p0/m, z0\.d, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_2_s64_x_tied1, svint64_t, -+ z0 = svasrd_n_s64_x (p0, z0, 2), -+ z0 = svasrd_x (p0, z0, 2)) -+ -+/* -+** asrd_2_s64_x_untied: -+** movprfx z0, z1 -+** asrd z0\.d, p0/m, z0\.d, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_2_s64_x_untied, svint64_t, -+ z0 = svasrd_n_s64_x (p0, z1, 2), -+ z0 = svasrd_x (p0, z1, 2)) -+ -+/* -+** asrd_64_s64_x_tied1: -+** asrd z0\.d, p0/m, z0\.d, #64 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_64_s64_x_tied1, svint64_t, -+ z0 = svasrd_n_s64_x (p0, z0, 64), -+ z0 = svasrd_x (p0, z0, 64)) -+ -+/* -+** asrd_64_s64_x_untied: -+** movprfx z0, z1 -+** asrd z0\.d, p0/m, z0\.d, #64 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_64_s64_x_untied, svint64_t, -+ z0 = svasrd_n_s64_x (p0, z1, 64), -+ z0 = svasrd_x (p0, z1, 64)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asrd_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asrd_s8.c -new file mode 100644 -index 000000000..9249ffbcb ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/asrd_s8.c -@@ -0,0 +1,177 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** asrd_1_s8_m_tied1: -+** asrd z0\.b, p0/m, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_1_s8_m_tied1, svint8_t, -+ z0 = svasrd_n_s8_m (p0, z0, 1), -+ z0 = svasrd_m (p0, z0, 1)) -+ -+/* -+** asrd_1_s8_m_untied: -+** movprfx z0, z1 -+** asrd z0\.b, p0/m, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_1_s8_m_untied, svint8_t, -+ z0 = svasrd_n_s8_m (p0, z1, 1), -+ z0 = svasrd_m (p0, z1, 1)) -+ -+/* -+** asrd_2_s8_m_tied1: -+** asrd z0\.b, p0/m, z0\.b, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_2_s8_m_tied1, svint8_t, -+ z0 = svasrd_n_s8_m (p0, z0, 2), -+ z0 = svasrd_m (p0, z0, 2)) -+ -+/* -+** asrd_2_s8_m_untied: -+** movprfx z0, z1 -+** asrd z0\.b, p0/m, z0\.b, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_2_s8_m_untied, svint8_t, -+ z0 = svasrd_n_s8_m (p0, z1, 2), -+ z0 = svasrd_m (p0, z1, 2)) -+ -+/* -+** asrd_8_s8_m_tied1: -+** asrd z0\.b, p0/m, z0\.b, #8 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_8_s8_m_tied1, svint8_t, -+ z0 = svasrd_n_s8_m (p0, z0, 8), -+ z0 = svasrd_m (p0, z0, 8)) -+ -+/* -+** asrd_8_s8_m_untied: -+** movprfx z0, z1 -+** asrd z0\.b, p0/m, z0\.b, #8 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_8_s8_m_untied, svint8_t, -+ z0 = svasrd_n_s8_m (p0, z1, 8), -+ z0 = svasrd_m (p0, z1, 8)) -+ -+/* -+** asrd_1_s8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** asrd z0\.b, p0/m, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_1_s8_z_tied1, svint8_t, -+ z0 = svasrd_n_s8_z (p0, z0, 1), -+ z0 = svasrd_z (p0, z0, 1)) -+ -+/* -+** asrd_1_s8_z_untied: -+** movprfx z0\.b, p0/z, z1\.b -+** asrd z0\.b, p0/m, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_1_s8_z_untied, svint8_t, -+ z0 = svasrd_n_s8_z (p0, z1, 1), -+ z0 = svasrd_z (p0, z1, 1)) -+ -+/* -+** asrd_2_s8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** asrd z0\.b, p0/m, z0\.b, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_2_s8_z_tied1, svint8_t, -+ z0 = svasrd_n_s8_z (p0, z0, 2), -+ z0 = svasrd_z (p0, z0, 2)) -+ -+/* -+** asrd_2_s8_z_untied: -+** movprfx z0\.b, p0/z, z1\.b -+** asrd z0\.b, p0/m, z0\.b, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_2_s8_z_untied, svint8_t, -+ z0 = svasrd_n_s8_z (p0, z1, 2), -+ z0 = svasrd_z (p0, z1, 2)) -+ -+/* -+** asrd_8_s8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** asrd z0\.b, p0/m, z0\.b, #8 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_8_s8_z_tied1, svint8_t, -+ z0 = svasrd_n_s8_z (p0, z0, 8), -+ z0 = svasrd_z (p0, z0, 8)) -+ -+/* -+** asrd_8_s8_z_untied: -+** movprfx z0\.b, p0/z, z1\.b -+** asrd z0\.b, p0/m, z0\.b, #8 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_8_s8_z_untied, svint8_t, -+ z0 = svasrd_n_s8_z (p0, z1, 8), -+ z0 = svasrd_z (p0, z1, 8)) -+ -+/* -+** asrd_1_s8_x_tied1: -+** asrd z0\.b, p0/m, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_1_s8_x_tied1, svint8_t, -+ z0 = svasrd_n_s8_x (p0, z0, 1), -+ z0 = svasrd_x (p0, z0, 1)) -+ -+/* -+** asrd_1_s8_x_untied: -+** movprfx z0, z1 -+** asrd z0\.b, p0/m, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_1_s8_x_untied, svint8_t, -+ z0 = svasrd_n_s8_x (p0, z1, 1), -+ z0 = svasrd_x (p0, z1, 1)) -+ -+/* -+** asrd_2_s8_x_tied1: -+** asrd z0\.b, p0/m, z0\.b, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_2_s8_x_tied1, svint8_t, -+ z0 = svasrd_n_s8_x (p0, z0, 2), -+ z0 = svasrd_x (p0, z0, 2)) -+ -+/* -+** asrd_2_s8_x_untied: -+** movprfx z0, z1 -+** asrd z0\.b, p0/m, z0\.b, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_2_s8_x_untied, svint8_t, -+ z0 = svasrd_n_s8_x (p0, z1, 2), -+ z0 = svasrd_x (p0, z1, 2)) -+ -+/* -+** asrd_8_s8_x_tied1: -+** asrd z0\.b, p0/m, z0\.b, #8 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_8_s8_x_tied1, svint8_t, -+ z0 = svasrd_n_s8_x (p0, z0, 8), -+ z0 = svasrd_x (p0, z0, 8)) -+ -+/* -+** asrd_8_s8_x_untied: -+** movprfx z0, z1 -+** asrd z0\.b, p0/m, z0\.b, #8 -+** ret -+*/ -+TEST_UNIFORM_Z (asrd_8_s8_x_untied, svint8_t, -+ z0 = svasrd_n_s8_x (p0, z1, 8), -+ z0 = svasrd_x (p0, z1, 8)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfdot_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfdot_f32.c -new file mode 100644 -index 000000000..376622da0 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfdot_f32.c -@@ -0,0 +1,67 @@ -+/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */ -+/* { dg-require-effective-target aarch64_asm_bf16_ok } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** bfdot_f32_tied1: -+** bfdot z0\.s, z4\.h, z5\.h -+** ret -+*/ -+TEST_DUAL_Z (bfdot_f32_tied1, svfloat32_t, svbfloat16_t, -+ z0 = svbfdot_f32 (z0, z4, z5), -+ z0 = svbfdot (z0, z4, z5)) -+ -+/* -+** bfdot_f32_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** bfdot z0\.s, \1\.h, z1\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (bfdot_f32_tied2, svfloat32_t, svbfloat16_t, -+ z0_res = svbfdot_f32 (z4, z0, z1), -+ z0_res = svbfdot (z4, z0, z1)) -+ -+/* -+** bfdot_f32_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** bfdot z0\.s, z1\.h, \1\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (bfdot_f32_tied3, svfloat32_t, svbfloat16_t, -+ z0_res = svbfdot_f32 (z4, z1, z0), -+ z0_res = svbfdot (z4, z1, z0)) -+ -+/* -+** bfdot_f32_untied: -+** movprfx z0, z1 -+** bfdot z0\.s, z4\.h, z5\.h -+** ret -+*/ -+TEST_DUAL_Z (bfdot_f32_untied, svfloat32_t, svbfloat16_t, -+ z0 = svbfdot_f32 (z1, z4, z5), -+ z0 = svbfdot (z1, z4, z5)) -+ -+/* -+** bfdot_h7_f32_tied1: -+** mov (z[0-9]+\.h), h7 -+** bfdot z0\.s, z4\.h, \1 -+** ret -+*/ -+TEST_DUAL_ZD (bfdot_h7_f32_tied1, svfloat32_t, svbfloat16_t, bfloat16_t, -+ z0 = svbfdot_n_f32 (z0, z4, d7), -+ z0 = svbfdot (z0, z4, d7)) -+ -+/* -+** bfdot_h7_f32_untied: -+** mov (z[0-9]+\.h), h7 -+** movprfx z0, z1 -+** bfdot z0\.s, z4\.h, \1 -+** ret -+*/ -+TEST_DUAL_ZD (bfdot_h7_f32_untied, svfloat32_t, svbfloat16_t, bfloat16_t, -+ z0 = svbfdot_n_f32 (z1, z4, d7), -+ z0 = svbfdot (z1, z4, d7)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfdot_lane_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfdot_lane_f32.c -new file mode 100644 -index 000000000..0f624fe9f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfdot_lane_f32.c -@@ -0,0 +1,86 @@ -+/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */ -+/* { dg-require-effective-target aarch64_asm_bf16_ok } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** bfdot_lane_0_f32_tied1: -+** bfdot z0\.s, z4\.h, z5\.h\[0\] -+** ret -+*/ -+TEST_DUAL_Z (bfdot_lane_0_f32_tied1, svfloat32_t, svbfloat16_t, -+ z0 = svbfdot_lane_f32 (z0, z4, z5, 0), -+ z0 = svbfdot_lane (z0, z4, z5, 0)) -+ -+/* -+** bfdot_lane_0_f32_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** bfdot z0\.s, \1\.h, z1\.h\[0\] -+** ret -+*/ -+TEST_DUAL_Z_REV (bfdot_lane_0_f32_tied2, svfloat32_t, svbfloat16_t, -+ z0_res = svbfdot_lane_f32 (z4, z0, z1, 0), -+ z0_res = svbfdot_lane (z4, z0, z1, 0)) -+ -+/* -+** bfdot_lane_0_f32_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** bfdot z0\.s, z1\.h, \1\.h\[0\] -+** ret -+*/ -+TEST_DUAL_Z_REV (bfdot_lane_0_f32_tied3, svfloat32_t, svbfloat16_t, -+ z0_res = svbfdot_lane_f32 (z4, z1, z0, 0), -+ z0_res = svbfdot_lane (z4, z1, z0, 0)) -+ -+/* -+** bfdot_lane_0_f32_untied: -+** movprfx z0, z1 -+** bfdot z0\.s, z4\.h, z5\.h\[0\] -+** ret -+*/ -+TEST_DUAL_Z (bfdot_lane_0_f32_untied, svfloat32_t, svbfloat16_t, -+ z0 = svbfdot_lane_f32 (z1, z4, z5, 0), -+ z0 = svbfdot_lane (z1, z4, z5, 0)) -+ -+/* -+** bfdot_lane_1_f32: -+** bfdot z0\.s, z4\.h, z5\.h\[1\] -+** ret -+*/ -+TEST_DUAL_Z (bfdot_lane_1_f32, svfloat32_t, svbfloat16_t, -+ z0 = svbfdot_lane_f32 (z0, z4, z5, 1), -+ z0 = svbfdot_lane (z0, z4, z5, 1)) -+ -+/* -+** bfdot_lane_3_f32: -+** bfdot z0\.s, z4\.h, z5\.h\[3\] -+** ret -+*/ -+TEST_DUAL_Z (bfdot_lane_3_f32, svfloat32_t, svbfloat16_t, -+ z0 = svbfdot_lane_f32 (z0, z4, z5, 3), -+ z0 = svbfdot_lane (z0, z4, z5, 3)) -+ -+/* -+** bfdot_lane_z8_f32: -+** str d8, \[sp, -16\]! -+** mov (z[0-7])\.d, z8\.d -+** bfdot z0\.s, z1\.h, \1\.h\[1\] -+** ldr d8, \[sp\], 16 -+** ret -+*/ -+TEST_DUAL_LANE_REG (bfdot_lane_z8_f32, svfloat32_t, svbfloat16_t, z8, -+ z0 = svbfdot_lane_f32 (z0, z1, z8, 1), -+ z0 = svbfdot_lane (z0, z1, z8, 1)) -+ -+/* -+** bfdot_lane_z16_f32: -+** mov (z[0-7])\.d, z16\.d -+** bfdot z0\.s, z1\.h, \1\.h\[1\] -+** ret -+*/ -+TEST_DUAL_LANE_REG (bfdot_lane_z16_f32, svfloat32_t, svbfloat16_t, z16, -+ z0 = svbfdot_lane_f32 (z0, z1, z16, 1), -+ z0 = svbfdot_lane (z0, z1, z16, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalb_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalb_f32.c -new file mode 100644 -index 000000000..0f810116c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalb_f32.c -@@ -0,0 +1,67 @@ -+/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */ -+/* { dg-require-effective-target aarch64_asm_bf16_ok } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** bfmlalb_f32_tied1: -+** bfmlalb z0\.s, z4\.h, z5\.h -+** ret -+*/ -+TEST_DUAL_Z (bfmlalb_f32_tied1, svfloat32_t, svbfloat16_t, -+ z0 = svbfmlalb_f32 (z0, z4, z5), -+ z0 = svbfmlalb (z0, z4, z5)) -+ -+/* -+** bfmlalb_f32_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** bfmlalb z0\.s, \1\.h, z1\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (bfmlalb_f32_tied2, svfloat32_t, svbfloat16_t, -+ z0_res = svbfmlalb_f32 (z4, z0, z1), -+ z0_res = svbfmlalb (z4, z0, z1)) -+ -+/* -+** bfmlalb_f32_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** bfmlalb z0\.s, z1\.h, \1\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (bfmlalb_f32_tied3, svfloat32_t, svbfloat16_t, -+ z0_res = svbfmlalb_f32 (z4, z1, z0), -+ z0_res = svbfmlalb (z4, z1, z0)) -+ -+/* -+** bfmlalb_f32_untied: -+** movprfx z0, z1 -+** bfmlalb z0\.s, z4\.h, z5\.h -+** ret -+*/ -+TEST_DUAL_Z (bfmlalb_f32_untied, svfloat32_t, svbfloat16_t, -+ z0 = svbfmlalb_f32 (z1, z4, z5), -+ z0 = svbfmlalb (z1, z4, z5)) -+ -+/* -+** bfmlalb_h7_f32_tied1: -+** mov (z[0-9]+\.h), h7 -+** bfmlalb z0\.s, z4\.h, \1 -+** ret -+*/ -+TEST_DUAL_ZD (bfmlalb_h7_f32_tied1, svfloat32_t, svbfloat16_t, bfloat16_t, -+ z0 = svbfmlalb_n_f32 (z0, z4, d7), -+ z0 = svbfmlalb (z0, z4, d7)) -+ -+/* -+** bfmlalb_h7_f32_untied: -+** mov (z[0-9]+\.h), h7 -+** movprfx z0, z1 -+** bfmlalb z0\.s, z4\.h, \1 -+** ret -+*/ -+TEST_DUAL_ZD (bfmlalb_h7_f32_untied, svfloat32_t, svbfloat16_t, bfloat16_t, -+ z0 = svbfmlalb_n_f32 (z1, z4, d7), -+ z0 = svbfmlalb (z1, z4, d7)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalb_lane_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalb_lane_f32.c -new file mode 100644 -index 000000000..b0ec0881d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalb_lane_f32.c -@@ -0,0 +1,86 @@ -+/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */ -+/* { dg-require-effective-target aarch64_asm_bf16_ok } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** bfmlalb_lane_0_f32_tied1: -+** bfmlalb z0\.s, z4\.h, z5\.h\[0\] -+** ret -+*/ -+TEST_DUAL_Z (bfmlalb_lane_0_f32_tied1, svfloat32_t, svbfloat16_t, -+ z0 = svbfmlalb_lane_f32 (z0, z4, z5, 0), -+ z0 = svbfmlalb_lane (z0, z4, z5, 0)) -+ -+/* -+** bfmlalb_lane_0_f32_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** bfmlalb z0\.s, \1\.h, z1\.h\[0\] -+** ret -+*/ -+TEST_DUAL_Z_REV (bfmlalb_lane_0_f32_tied2, svfloat32_t, svbfloat16_t, -+ z0_res = svbfmlalb_lane_f32 (z4, z0, z1, 0), -+ z0_res = svbfmlalb_lane (z4, z0, z1, 0)) -+ -+/* -+** bfmlalb_lane_0_f32_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** bfmlalb z0\.s, z1\.h, \1\.h\[0\] -+** ret -+*/ -+TEST_DUAL_Z_REV (bfmlalb_lane_0_f32_tied3, svfloat32_t, svbfloat16_t, -+ z0_res = svbfmlalb_lane_f32 (z4, z1, z0, 0), -+ z0_res = svbfmlalb_lane (z4, z1, z0, 0)) -+ -+/* -+** bfmlalb_lane_0_f32_untied: -+** movprfx z0, z1 -+** bfmlalb z0\.s, z4\.h, z5\.h\[0\] -+** ret -+*/ -+TEST_DUAL_Z (bfmlalb_lane_0_f32_untied, svfloat32_t, svbfloat16_t, -+ z0 = svbfmlalb_lane_f32 (z1, z4, z5, 0), -+ z0 = svbfmlalb_lane (z1, z4, z5, 0)) -+ -+/* -+** bfmlalb_lane_1_f32: -+** bfmlalb z0\.s, z4\.h, z5\.h\[1\] -+** ret -+*/ -+TEST_DUAL_Z (bfmlalb_lane_1_f32, svfloat32_t, svbfloat16_t, -+ z0 = svbfmlalb_lane_f32 (z0, z4, z5, 1), -+ z0 = svbfmlalb_lane (z0, z4, z5, 1)) -+ -+/* -+** bfmlalb_lane_7_f32: -+** bfmlalb z0\.s, z4\.h, z5\.h\[7\] -+** ret -+*/ -+TEST_DUAL_Z (bfmlalb_lane_7_f32, svfloat32_t, svbfloat16_t, -+ z0 = svbfmlalb_lane_f32 (z0, z4, z5, 7), -+ z0 = svbfmlalb_lane (z0, z4, z5, 7)) -+ -+/* -+** bfmlalb_lane_z8_f32: -+** str d8, \[sp, -16\]! -+** mov (z[0-7])\.d, z8\.d -+** bfmlalb z0\.s, z1\.h, \1\.h\[1\] -+** ldr d8, \[sp\], 16 -+** ret -+*/ -+TEST_DUAL_LANE_REG (bfmlalb_lane_z8_f32, svfloat32_t, svbfloat16_t, z8, -+ z0 = svbfmlalb_lane_f32 (z0, z1, z8, 1), -+ z0 = svbfmlalb_lane (z0, z1, z8, 1)) -+ -+/* -+** bfmlalb_lane_z16_f32: -+** mov (z[0-7])\.d, z16\.d -+** bfmlalb z0\.s, z1\.h, \1\.h\[1\] -+** ret -+*/ -+TEST_DUAL_LANE_REG (bfmlalb_lane_z16_f32, svfloat32_t, svbfloat16_t, z16, -+ z0 = svbfmlalb_lane_f32 (z0, z1, z16, 1), -+ z0 = svbfmlalb_lane (z0, z1, z16, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalt_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalt_f32.c -new file mode 100644 -index 000000000..2a583fa4a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalt_f32.c -@@ -0,0 +1,67 @@ -+/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */ -+/* { dg-require-effective-target aarch64_asm_bf16_ok } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** bfmlalt_f32_tied1: -+** bfmlalt z0\.s, z4\.h, z5\.h -+** ret -+*/ -+TEST_DUAL_Z (bfmlalt_f32_tied1, svfloat32_t, svbfloat16_t, -+ z0 = svbfmlalt_f32 (z0, z4, z5), -+ z0 = svbfmlalt (z0, z4, z5)) -+ -+/* -+** bfmlalt_f32_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** bfmlalt z0\.s, \1\.h, z1\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (bfmlalt_f32_tied2, svfloat32_t, svbfloat16_t, -+ z0_res = svbfmlalt_f32 (z4, z0, z1), -+ z0_res = svbfmlalt (z4, z0, z1)) -+ -+/* -+** bfmlalt_f32_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** bfmlalt z0\.s, z1\.h, \1\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (bfmlalt_f32_tied3, svfloat32_t, svbfloat16_t, -+ z0_res = svbfmlalt_f32 (z4, z1, z0), -+ z0_res = svbfmlalt (z4, z1, z0)) -+ -+/* -+** bfmlalt_f32_untied: -+** movprfx z0, z1 -+** bfmlalt z0\.s, z4\.h, z5\.h -+** ret -+*/ -+TEST_DUAL_Z (bfmlalt_f32_untied, svfloat32_t, svbfloat16_t, -+ z0 = svbfmlalt_f32 (z1, z4, z5), -+ z0 = svbfmlalt (z1, z4, z5)) -+ -+/* -+** bfmlalt_h7_f32_tied1: -+** mov (z[0-9]+\.h), h7 -+** bfmlalt z0\.s, z4\.h, \1 -+** ret -+*/ -+TEST_DUAL_ZD (bfmlalt_h7_f32_tied1, svfloat32_t, svbfloat16_t, bfloat16_t, -+ z0 = svbfmlalt_n_f32 (z0, z4, d7), -+ z0 = svbfmlalt (z0, z4, d7)) -+ -+/* -+** bfmlalt_h7_f32_untied: -+** mov (z[0-9]+\.h), h7 -+** movprfx z0, z1 -+** bfmlalt z0\.s, z4\.h, \1 -+** ret -+*/ -+TEST_DUAL_ZD (bfmlalt_h7_f32_untied, svfloat32_t, svbfloat16_t, bfloat16_t, -+ z0 = svbfmlalt_n_f32 (z1, z4, d7), -+ z0 = svbfmlalt (z1, z4, d7)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalt_lane_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalt_lane_f32.c -new file mode 100644 -index 000000000..3af3997e9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmlalt_lane_f32.c -@@ -0,0 +1,86 @@ -+/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */ -+/* { dg-require-effective-target aarch64_asm_bf16_ok } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** bfmlalt_lane_0_f32_tied1: -+** bfmlalt z0\.s, z4\.h, z5\.h\[0\] -+** ret -+*/ -+TEST_DUAL_Z (bfmlalt_lane_0_f32_tied1, svfloat32_t, svbfloat16_t, -+ z0 = svbfmlalt_lane_f32 (z0, z4, z5, 0), -+ z0 = svbfmlalt_lane (z0, z4, z5, 0)) -+ -+/* -+** bfmlalt_lane_0_f32_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** bfmlalt z0\.s, \1\.h, z1\.h\[0\] -+** ret -+*/ -+TEST_DUAL_Z_REV (bfmlalt_lane_0_f32_tied2, svfloat32_t, svbfloat16_t, -+ z0_res = svbfmlalt_lane_f32 (z4, z0, z1, 0), -+ z0_res = svbfmlalt_lane (z4, z0, z1, 0)) -+ -+/* -+** bfmlalt_lane_0_f32_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** bfmlalt z0\.s, z1\.h, \1\.h\[0\] -+** ret -+*/ -+TEST_DUAL_Z_REV (bfmlalt_lane_0_f32_tied3, svfloat32_t, svbfloat16_t, -+ z0_res = svbfmlalt_lane_f32 (z4, z1, z0, 0), -+ z0_res = svbfmlalt_lane (z4, z1, z0, 0)) -+ -+/* -+** bfmlalt_lane_0_f32_untied: -+** movprfx z0, z1 -+** bfmlalt z0\.s, z4\.h, z5\.h\[0\] -+** ret -+*/ -+TEST_DUAL_Z (bfmlalt_lane_0_f32_untied, svfloat32_t, svbfloat16_t, -+ z0 = svbfmlalt_lane_f32 (z1, z4, z5, 0), -+ z0 = svbfmlalt_lane (z1, z4, z5, 0)) -+ -+/* -+** bfmlalt_lane_1_f32: -+** bfmlalt z0\.s, z4\.h, z5\.h\[1\] -+** ret -+*/ -+TEST_DUAL_Z (bfmlalt_lane_1_f32, svfloat32_t, svbfloat16_t, -+ z0 = svbfmlalt_lane_f32 (z0, z4, z5, 1), -+ z0 = svbfmlalt_lane (z0, z4, z5, 1)) -+ -+/* -+** bfmlalt_lane_7_f32: -+** bfmlalt z0\.s, z4\.h, z5\.h\[7\] -+** ret -+*/ -+TEST_DUAL_Z (bfmlalt_lane_7_f32, svfloat32_t, svbfloat16_t, -+ z0 = svbfmlalt_lane_f32 (z0, z4, z5, 7), -+ z0 = svbfmlalt_lane (z0, z4, z5, 7)) -+ -+/* -+** bfmlalt_lane_z8_f32: -+** str d8, \[sp, -16\]! -+** mov (z[0-7])\.d, z8\.d -+** bfmlalt z0\.s, z1\.h, \1\.h\[1\] -+** ldr d8, \[sp\], 16 -+** ret -+*/ -+TEST_DUAL_LANE_REG (bfmlalt_lane_z8_f32, svfloat32_t, svbfloat16_t, z8, -+ z0 = svbfmlalt_lane_f32 (z0, z1, z8, 1), -+ z0 = svbfmlalt_lane (z0, z1, z8, 1)) -+ -+/* -+** bfmlalt_lane_z16_f32: -+** mov (z[0-7])\.d, z16\.d -+** bfmlalt z0\.s, z1\.h, \1\.h\[1\] -+** ret -+*/ -+TEST_DUAL_LANE_REG (bfmlalt_lane_z16_f32, svfloat32_t, svbfloat16_t, z16, -+ z0 = svbfmlalt_lane_f32 (z0, z1, z16, 1), -+ z0 = svbfmlalt_lane (z0, z1, z16, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmmla_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmmla_f32.c -new file mode 100644 -index 000000000..b1d98fbf5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bfmmla_f32.c -@@ -0,0 +1,46 @@ -+/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */ -+/* { dg-require-effective-target aarch64_asm_bf16_ok } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** bfmmla_f32_tied1: -+** bfmmla z0\.s, z4\.h, z5\.h -+** ret -+*/ -+TEST_DUAL_Z (bfmmla_f32_tied1, svfloat32_t, svbfloat16_t, -+ z0 = svbfmmla_f32 (z0, z4, z5), -+ z0 = svbfmmla (z0, z4, z5)) -+ -+/* -+** bfmmla_f32_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** bfmmla z0\.s, \1\.h, z1\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (bfmmla_f32_tied2, svfloat32_t, svbfloat16_t, -+ z0_res = svbfmmla_f32 (z4, z0, z1), -+ z0_res = svbfmmla (z4, z0, z1)) -+ -+/* -+** bfmmla_f32_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** bfmmla z0\.s, z1\.h, \1\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (bfmmla_f32_tied3, svfloat32_t, svbfloat16_t, -+ z0_res = svbfmmla_f32 (z4, z1, z0), -+ z0_res = svbfmmla (z4, z1, z0)) -+ -+/* -+** bfmmla_f32_untied: -+** movprfx z0, z1 -+** bfmmla z0\.s, z4\.h, z5\.h -+** ret -+*/ -+TEST_DUAL_Z (bfmmla_f32_untied, svfloat32_t, svbfloat16_t, -+ z0 = svbfmmla_f32 (z1, z4, z5), -+ z0 = svbfmmla (z1, z4, z5)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_b.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_b.c -new file mode 100644 -index 000000000..9d41aeaa2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_b.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** bic_b_z_tied1: -+** bic p0\.b, p3/z, p0\.b, p1\.b -+** ret -+*/ -+TEST_UNIFORM_P (bic_b_z_tied1, -+ p0 = svbic_b_z (p3, p0, p1), -+ p0 = svbic_z (p3, p0, p1)) -+ -+/* -+** bic_b_z_tied2: -+** bic p0\.b, p3/z, p1\.b, p0\.b -+** ret -+*/ -+TEST_UNIFORM_P (bic_b_z_tied2, -+ p0 = svbic_b_z (p3, p1, p0), -+ p0 = svbic_z (p3, p1, p0)) -+ -+/* -+** bic_b_z_untied: -+** bic p0\.b, p3/z, p1\.b, p2\.b -+** ret -+*/ -+TEST_UNIFORM_P (bic_b_z_untied, -+ p0 = svbic_b_z (p3, p1, p2), -+ p0 = svbic_z (p3, p1, p2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_s16.c -new file mode 100644 -index 000000000..c80f5697f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_s16.c -@@ -0,0 +1,367 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** bic_s16_m_tied1: -+** bic z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (bic_s16_m_tied1, svint16_t, -+ z0 = svbic_s16_m (p0, z0, z1), -+ z0 = svbic_m (p0, z0, z1)) -+ -+/* -+** bic_s16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** bic z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (bic_s16_m_tied2, svint16_t, -+ z0 = svbic_s16_m (p0, z1, z0), -+ z0 = svbic_m (p0, z1, z0)) -+ -+/* -+** bic_s16_m_untied: -+** movprfx z0, z1 -+** bic z0\.h, p0/m, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (bic_s16_m_untied, svint16_t, -+ z0 = svbic_s16_m (p0, z1, z2), -+ z0 = svbic_m (p0, z1, z2)) -+ -+/* -+** bic_w0_s16_m_tied1: -+** mov (z[0-9]+\.h), w0 -+** bic z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (bic_w0_s16_m_tied1, svint16_t, int16_t, -+ z0 = svbic_n_s16_m (p0, z0, x0), -+ z0 = svbic_m (p0, z0, x0)) -+ -+/* -+** bic_w0_s16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), w0 -+** movprfx z0, z1 -+** bic z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (bic_w0_s16_m_untied, svint16_t, int16_t, -+ z0 = svbic_n_s16_m (p0, z1, x0), -+ z0 = svbic_m (p0, z1, x0)) -+ -+/* -+** bic_1_s16_m_tied1: -+** mov (z[0-9]+\.h), #-2 -+** and z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_1_s16_m_tied1, svint16_t, -+ z0 = svbic_n_s16_m (p0, z0, 1), -+ z0 = svbic_m (p0, z0, 1)) -+ -+/* -+** bic_1_s16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), #-2 -+** movprfx z0, z1 -+** and z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_1_s16_m_untied, svint16_t, -+ z0 = svbic_n_s16_m (p0, z1, 1), -+ z0 = svbic_m (p0, z1, 1)) -+ -+/* -+** bic_m2_s16_m: -+** mov (z[0-9]+\.h), #1 -+** and z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m2_s16_m, svint16_t, -+ z0 = svbic_n_s16_m (p0, z0, -2), -+ z0 = svbic_m (p0, z0, -2)) -+ -+/* -+** bic_s16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** bic z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (bic_s16_z_tied1, svint16_t, -+ z0 = svbic_s16_z (p0, z0, z1), -+ z0 = svbic_z (p0, z0, z1)) -+ -+/* -+** bic_s16_z_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.h, p0/z, z1\.h -+** bic z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (bic_s16_z_tied2, svint16_t, -+ z0 = svbic_s16_z (p0, z1, z0), -+ z0 = svbic_z (p0, z1, z0)) -+ -+/* -+** bic_s16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** bic z0\.h, p0/m, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (bic_s16_z_untied, svint16_t, -+ z0 = svbic_s16_z (p0, z1, z2), -+ z0 = svbic_z (p0, z1, z2)) -+ -+/* -+** bic_w0_s16_z_tied1: -+** mov (z[0-9]+\.h), w0 -+** movprfx z0\.h, p0/z, z0\.h -+** bic z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (bic_w0_s16_z_tied1, svint16_t, int16_t, -+ z0 = svbic_n_s16_z (p0, z0, x0), -+ z0 = svbic_z (p0, z0, x0)) -+ -+/* -+** bic_w0_s16_z_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), w0 -+** movprfx z0\.h, p0/z, z1\.h -+** bic z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (bic_w0_s16_z_untied, svint16_t, int16_t, -+ z0 = svbic_n_s16_z (p0, z1, x0), -+ z0 = svbic_z (p0, z1, x0)) -+ -+/* -+** bic_1_s16_z_tied1: -+** mov (z[0-9]+\.h), #-2 -+** movprfx z0\.h, p0/z, z0\.h -+** and z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_1_s16_z_tied1, svint16_t, -+ z0 = svbic_n_s16_z (p0, z0, 1), -+ z0 = svbic_z (p0, z0, 1)) -+ -+/* -+** bic_1_s16_z_untied: -+** mov (z[0-9]+\.h), #-2 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** and z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** and z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (bic_1_s16_z_untied, svint16_t, -+ z0 = svbic_n_s16_z (p0, z1, 1), -+ z0 = svbic_z (p0, z1, 1)) -+ -+/* -+** bic_s16_x_tied1: -+** bic z0\.d, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (bic_s16_x_tied1, svint16_t, -+ z0 = svbic_s16_x (p0, z0, z1), -+ z0 = svbic_x (p0, z0, z1)) -+ -+/* -+** bic_s16_x_tied2: -+** bic z0\.d, z1\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (bic_s16_x_tied2, svint16_t, -+ z0 = svbic_s16_x (p0, z1, z0), -+ z0 = svbic_x (p0, z1, z0)) -+ -+/* -+** bic_s16_x_untied: -+** bic z0\.d, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (bic_s16_x_untied, svint16_t, -+ z0 = svbic_s16_x (p0, z1, z2), -+ z0 = svbic_x (p0, z1, z2)) -+ -+/* -+** bic_w0_s16_x_tied1: -+** mov (z[0-9]+)\.h, w0 -+** bic z0\.d, z0\.d, \1\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (bic_w0_s16_x_tied1, svint16_t, int16_t, -+ z0 = svbic_n_s16_x (p0, z0, x0), -+ z0 = svbic_x (p0, z0, x0)) -+ -+/* -+** bic_w0_s16_x_untied: -+** mov (z[0-9]+)\.h, w0 -+** bic z0\.d, z1\.d, \1\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (bic_w0_s16_x_untied, svint16_t, int16_t, -+ z0 = svbic_n_s16_x (p0, z1, x0), -+ z0 = svbic_x (p0, z1, x0)) -+ -+/* -+** bic_1_s16_x_tied1: -+** and z0\.h, z0\.h, #0xfffe -+** ret -+*/ -+TEST_UNIFORM_Z (bic_1_s16_x_tied1, svint16_t, -+ z0 = svbic_n_s16_x (p0, z0, 1), -+ z0 = svbic_x (p0, z0, 1)) -+ -+/* -+** bic_1_s16_x_untied: -+** movprfx z0, z1 -+** and z0\.h, z0\.h, #0xfffe -+** ret -+*/ -+TEST_UNIFORM_Z (bic_1_s16_x_untied, svint16_t, -+ z0 = svbic_n_s16_x (p0, z1, 1), -+ z0 = svbic_x (p0, z1, 1)) -+ -+/* -+** bic_127_s16_x: -+** and z0\.h, z0\.h, #0xff80 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_127_s16_x, svint16_t, -+ z0 = svbic_n_s16_x (p0, z0, 127), -+ z0 = svbic_x (p0, z0, 127)) -+ -+/* -+** bic_128_s16_x: -+** and z0\.h, z0\.h, #0xff7f -+** ret -+*/ -+TEST_UNIFORM_Z (bic_128_s16_x, svint16_t, -+ z0 = svbic_n_s16_x (p0, z0, 128), -+ z0 = svbic_x (p0, z0, 128)) -+ -+/* -+** bic_255_s16_x: -+** and z0\.h, z0\.h, #0xff00 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_255_s16_x, svint16_t, -+ z0 = svbic_n_s16_x (p0, z0, 255), -+ z0 = svbic_x (p0, z0, 255)) -+ -+/* -+** bic_256_s16_x: -+** and z0\.h, z0\.h, #0xfeff -+** ret -+*/ -+TEST_UNIFORM_Z (bic_256_s16_x, svint16_t, -+ z0 = svbic_n_s16_x (p0, z0, 256), -+ z0 = svbic_x (p0, z0, 256)) -+ -+/* -+** bic_257_s16_x: -+** and z0\.h, z0\.h, #0xfefe -+** ret -+*/ -+TEST_UNIFORM_Z (bic_257_s16_x, svint16_t, -+ z0 = svbic_n_s16_x (p0, z0, 257), -+ z0 = svbic_x (p0, z0, 257)) -+ -+/* -+** bic_512_s16_x: -+** and z0\.h, z0\.h, #0xfdff -+** ret -+*/ -+TEST_UNIFORM_Z (bic_512_s16_x, svint16_t, -+ z0 = svbic_n_s16_x (p0, z0, 512), -+ z0 = svbic_x (p0, z0, 512)) -+ -+/* -+** bic_65280_s16_x: -+** and z0\.h, z0\.h, #0xff -+** ret -+*/ -+TEST_UNIFORM_Z (bic_65280_s16_x, svint16_t, -+ z0 = svbic_n_s16_x (p0, z0, 0xff00), -+ z0 = svbic_x (p0, z0, 0xff00)) -+ -+/* -+** bic_m127_s16_x: -+** and z0\.h, z0\.h, #0x7e -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m127_s16_x, svint16_t, -+ z0 = svbic_n_s16_x (p0, z0, -127), -+ z0 = svbic_x (p0, z0, -127)) -+ -+/* -+** bic_m128_s16_x: -+** and z0\.h, z0\.h, #0x7f -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m128_s16_x, svint16_t, -+ z0 = svbic_n_s16_x (p0, z0, -128), -+ z0 = svbic_x (p0, z0, -128)) -+ -+/* -+** bic_m255_s16_x: -+** and z0\.h, z0\.h, #0xfe -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m255_s16_x, svint16_t, -+ z0 = svbic_n_s16_x (p0, z0, -255), -+ z0 = svbic_x (p0, z0, -255)) -+ -+/* -+** bic_m256_s16_x: -+** and z0\.h, z0\.h, #0xff -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m256_s16_x, svint16_t, -+ z0 = svbic_n_s16_x (p0, z0, -256), -+ z0 = svbic_x (p0, z0, -256)) -+ -+/* -+** bic_m257_s16_x: -+** and z0\.h, z0\.h, #0x100 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m257_s16_x, svint16_t, -+ z0 = svbic_n_s16_x (p0, z0, -257), -+ z0 = svbic_x (p0, z0, -257)) -+ -+/* -+** bic_m512_s16_x: -+** and z0\.h, z0\.h, #0x1ff -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m512_s16_x, svint16_t, -+ z0 = svbic_n_s16_x (p0, z0, -512), -+ z0 = svbic_x (p0, z0, -512)) -+ -+/* -+** bic_m32768_s16_x: -+** and z0\.h, z0\.h, #0x7fff -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m32768_s16_x, svint16_t, -+ z0 = svbic_n_s16_x (p0, z0, -0x8000), -+ z0 = svbic_x (p0, z0, -0x8000)) -+ -+/* -+** bic_5_s16_x: -+** mov (z[0-9]+)\.h, #-6 -+** and z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (bic_5_s16_x, svint16_t, -+ z0 = svbic_n_s16_x (p0, z0, 5), -+ z0 = svbic_x (p0, z0, 5)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_s32.c -new file mode 100644 -index 000000000..9e388e499 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_s32.c -@@ -0,0 +1,363 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** bic_s32_m_tied1: -+** bic z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (bic_s32_m_tied1, svint32_t, -+ z0 = svbic_s32_m (p0, z0, z1), -+ z0 = svbic_m (p0, z0, z1)) -+ -+/* -+** bic_s32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** bic z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (bic_s32_m_tied2, svint32_t, -+ z0 = svbic_s32_m (p0, z1, z0), -+ z0 = svbic_m (p0, z1, z0)) -+ -+/* -+** bic_s32_m_untied: -+** movprfx z0, z1 -+** bic z0\.s, p0/m, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (bic_s32_m_untied, svint32_t, -+ z0 = svbic_s32_m (p0, z1, z2), -+ z0 = svbic_m (p0, z1, z2)) -+ -+/* -+** bic_w0_s32_m_tied1: -+** mov (z[0-9]+\.s), w0 -+** bic z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (bic_w0_s32_m_tied1, svint32_t, int32_t, -+ z0 = svbic_n_s32_m (p0, z0, x0), -+ z0 = svbic_m (p0, z0, x0)) -+ -+/* -+** bic_w0_s32_m_untied: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0, z1 -+** bic z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (bic_w0_s32_m_untied, svint32_t, int32_t, -+ z0 = svbic_n_s32_m (p0, z1, x0), -+ z0 = svbic_m (p0, z1, x0)) -+ -+/* -+** bic_1_s32_m_tied1: -+** mov (z[0-9]+\.s), #-2 -+** and z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_1_s32_m_tied1, svint32_t, -+ z0 = svbic_n_s32_m (p0, z0, 1), -+ z0 = svbic_m (p0, z0, 1)) -+ -+/* -+** bic_1_s32_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.s), #-2 -+** movprfx z0, z1 -+** and z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_1_s32_m_untied, svint32_t, -+ z0 = svbic_n_s32_m (p0, z1, 1), -+ z0 = svbic_m (p0, z1, 1)) -+ -+/* -+** bic_m2_s32_m: -+** mov (z[0-9]+\.s), #1 -+** and z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m2_s32_m, svint32_t, -+ z0 = svbic_n_s32_m (p0, z0, -2), -+ z0 = svbic_m (p0, z0, -2)) -+ -+/* -+** bic_s32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** bic z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (bic_s32_z_tied1, svint32_t, -+ z0 = svbic_s32_z (p0, z0, z1), -+ z0 = svbic_z (p0, z0, z1)) -+ -+/* -+** bic_s32_z_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, z1\.s -+** bic z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (bic_s32_z_tied2, svint32_t, -+ z0 = svbic_s32_z (p0, z1, z0), -+ z0 = svbic_z (p0, z1, z0)) -+ -+/* -+** bic_s32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** bic z0\.s, p0/m, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (bic_s32_z_untied, svint32_t, -+ z0 = svbic_s32_z (p0, z1, z2), -+ z0 = svbic_z (p0, z1, z2)) -+ -+/* -+** bic_w0_s32_z_tied1: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** bic z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (bic_w0_s32_z_tied1, svint32_t, int32_t, -+ z0 = svbic_n_s32_z (p0, z0, x0), -+ z0 = svbic_z (p0, z0, x0)) -+ -+/* -+** bic_w0_s32_z_untied: { xfail *-*-* } -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z1\.s -+** bic z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (bic_w0_s32_z_untied, svint32_t, int32_t, -+ z0 = svbic_n_s32_z (p0, z1, x0), -+ z0 = svbic_z (p0, z1, x0)) -+ -+/* -+** bic_1_s32_z_tied1: -+** mov (z[0-9]+\.s), #-2 -+** movprfx z0\.s, p0/z, z0\.s -+** and z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_1_s32_z_tied1, svint32_t, -+ z0 = svbic_n_s32_z (p0, z0, 1), -+ z0 = svbic_z (p0, z0, 1)) -+ -+/* -+** bic_1_s32_z_untied: -+** mov (z[0-9]+\.s), #-2 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** and z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** and z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (bic_1_s32_z_untied, svint32_t, -+ z0 = svbic_n_s32_z (p0, z1, 1), -+ z0 = svbic_z (p0, z1, 1)) -+ -+/* -+** bic_s32_x_tied1: -+** bic z0\.d, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (bic_s32_x_tied1, svint32_t, -+ z0 = svbic_s32_x (p0, z0, z1), -+ z0 = svbic_x (p0, z0, z1)) -+ -+/* -+** bic_s32_x_tied2: -+** bic z0\.d, z1\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (bic_s32_x_tied2, svint32_t, -+ z0 = svbic_s32_x (p0, z1, z0), -+ z0 = svbic_x (p0, z1, z0)) -+ -+/* -+** bic_s32_x_untied: -+** bic z0\.d, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (bic_s32_x_untied, svint32_t, -+ z0 = svbic_s32_x (p0, z1, z2), -+ z0 = svbic_x (p0, z1, z2)) -+ -+/* -+** bic_w0_s32_x_tied1: -+** mov (z[0-9]+)\.s, w0 -+** bic z0\.d, z0\.d, \1\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (bic_w0_s32_x_tied1, svint32_t, int32_t, -+ z0 = svbic_n_s32_x (p0, z0, x0), -+ z0 = svbic_x (p0, z0, x0)) -+ -+/* -+** bic_w0_s32_x_untied: -+** mov (z[0-9]+)\.s, w0 -+** bic z0\.d, z1\.d, \1\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (bic_w0_s32_x_untied, svint32_t, int32_t, -+ z0 = svbic_n_s32_x (p0, z1, x0), -+ z0 = svbic_x (p0, z1, x0)) -+ -+/* -+** bic_1_s32_x_tied1: -+** and z0\.s, z0\.s, #0xfffffffe -+** ret -+*/ -+TEST_UNIFORM_Z (bic_1_s32_x_tied1, svint32_t, -+ z0 = svbic_n_s32_x (p0, z0, 1), -+ z0 = svbic_x (p0, z0, 1)) -+ -+/* -+** bic_1_s32_x_untied: -+** movprfx z0, z1 -+** and z0\.s, z0\.s, #0xfffffffe -+** ret -+*/ -+TEST_UNIFORM_Z (bic_1_s32_x_untied, svint32_t, -+ z0 = svbic_n_s32_x (p0, z1, 1), -+ z0 = svbic_x (p0, z1, 1)) -+ -+/* -+** bic_127_s32_x: -+** and z0\.s, z0\.s, #0xffffff80 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_127_s32_x, svint32_t, -+ z0 = svbic_n_s32_x (p0, z0, 127), -+ z0 = svbic_x (p0, z0, 127)) -+ -+/* -+** bic_128_s32_x: -+** and z0\.s, z0\.s, #0xffffff7f -+** ret -+*/ -+TEST_UNIFORM_Z (bic_128_s32_x, svint32_t, -+ z0 = svbic_n_s32_x (p0, z0, 128), -+ z0 = svbic_x (p0, z0, 128)) -+ -+/* -+** bic_255_s32_x: -+** and z0\.s, z0\.s, #0xffffff00 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_255_s32_x, svint32_t, -+ z0 = svbic_n_s32_x (p0, z0, 255), -+ z0 = svbic_x (p0, z0, 255)) -+ -+/* -+** bic_256_s32_x: -+** and z0\.s, z0\.s, #0xfffffeff -+** ret -+*/ -+TEST_UNIFORM_Z (bic_256_s32_x, svint32_t, -+ z0 = svbic_n_s32_x (p0, z0, 256), -+ z0 = svbic_x (p0, z0, 256)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (bic_257_s32_x, svint32_t, -+ z0 = svbic_n_s32_x (p0, z0, 257), -+ z0 = svbic_x (p0, z0, 257)) -+ -+/* -+** bic_512_s32_x: -+** and z0\.s, z0\.s, #0xfffffdff -+** ret -+*/ -+TEST_UNIFORM_Z (bic_512_s32_x, svint32_t, -+ z0 = svbic_n_s32_x (p0, z0, 512), -+ z0 = svbic_x (p0, z0, 512)) -+ -+/* -+** bic_65280_s32_x: -+** and z0\.s, z0\.s, #0xffff00ff -+** ret -+*/ -+TEST_UNIFORM_Z (bic_65280_s32_x, svint32_t, -+ z0 = svbic_n_s32_x (p0, z0, 0xff00), -+ z0 = svbic_x (p0, z0, 0xff00)) -+ -+/* -+** bic_m127_s32_x: -+** and z0\.s, z0\.s, #0x7e -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m127_s32_x, svint32_t, -+ z0 = svbic_n_s32_x (p0, z0, -127), -+ z0 = svbic_x (p0, z0, -127)) -+ -+/* -+** bic_m128_s32_x: -+** and z0\.s, z0\.s, #0x7f -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m128_s32_x, svint32_t, -+ z0 = svbic_n_s32_x (p0, z0, -128), -+ z0 = svbic_x (p0, z0, -128)) -+ -+/* -+** bic_m255_s32_x: -+** and z0\.s, z0\.s, #0xfe -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m255_s32_x, svint32_t, -+ z0 = svbic_n_s32_x (p0, z0, -255), -+ z0 = svbic_x (p0, z0, -255)) -+ -+/* -+** bic_m256_s32_x: -+** and z0\.s, z0\.s, #0xff -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m256_s32_x, svint32_t, -+ z0 = svbic_n_s32_x (p0, z0, -256), -+ z0 = svbic_x (p0, z0, -256)) -+ -+/* -+** bic_m257_s32_x: -+** and z0\.s, z0\.s, #0x100 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m257_s32_x, svint32_t, -+ z0 = svbic_n_s32_x (p0, z0, -257), -+ z0 = svbic_x (p0, z0, -257)) -+ -+/* -+** bic_m512_s32_x: -+** and z0\.s, z0\.s, #0x1ff -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m512_s32_x, svint32_t, -+ z0 = svbic_n_s32_x (p0, z0, -512), -+ z0 = svbic_x (p0, z0, -512)) -+ -+/* -+** bic_m32768_s32_x: -+** and z0\.s, z0\.s, #0x7fff -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m32768_s32_x, svint32_t, -+ z0 = svbic_n_s32_x (p0, z0, -0x8000), -+ z0 = svbic_x (p0, z0, -0x8000)) -+ -+/* -+** bic_5_s32_x: -+** mov (z[0-9]+)\.s, #-6 -+** and z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (bic_5_s32_x, svint32_t, -+ z0 = svbic_n_s32_x (p0, z0, 5), -+ z0 = svbic_x (p0, z0, 5)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_s64.c -new file mode 100644 -index 000000000..bf9536815 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_s64.c -@@ -0,0 +1,363 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** bic_s64_m_tied1: -+** bic z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (bic_s64_m_tied1, svint64_t, -+ z0 = svbic_s64_m (p0, z0, z1), -+ z0 = svbic_m (p0, z0, z1)) -+ -+/* -+** bic_s64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** bic z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_s64_m_tied2, svint64_t, -+ z0 = svbic_s64_m (p0, z1, z0), -+ z0 = svbic_m (p0, z1, z0)) -+ -+/* -+** bic_s64_m_untied: -+** movprfx z0, z1 -+** bic z0\.d, p0/m, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (bic_s64_m_untied, svint64_t, -+ z0 = svbic_s64_m (p0, z1, z2), -+ z0 = svbic_m (p0, z1, z2)) -+ -+/* -+** bic_x0_s64_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** bic z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (bic_x0_s64_m_tied1, svint64_t, int64_t, -+ z0 = svbic_n_s64_m (p0, z0, x0), -+ z0 = svbic_m (p0, z0, x0)) -+ -+/* -+** bic_x0_s64_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** bic z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (bic_x0_s64_m_untied, svint64_t, int64_t, -+ z0 = svbic_n_s64_m (p0, z1, x0), -+ z0 = svbic_m (p0, z1, x0)) -+ -+/* -+** bic_1_s64_m_tied1: -+** mov (z[0-9]+\.d), #-2 -+** and z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_1_s64_m_tied1, svint64_t, -+ z0 = svbic_n_s64_m (p0, z0, 1), -+ z0 = svbic_m (p0, z0, 1)) -+ -+/* -+** bic_1_s64_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #-2 -+** movprfx z0, z1 -+** and z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_1_s64_m_untied, svint64_t, -+ z0 = svbic_n_s64_m (p0, z1, 1), -+ z0 = svbic_m (p0, z1, 1)) -+ -+/* -+** bic_m2_s64_m: -+** mov (z[0-9]+\.d), #1 -+** and z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m2_s64_m, svint64_t, -+ z0 = svbic_n_s64_m (p0, z0, -2), -+ z0 = svbic_m (p0, z0, -2)) -+ -+/* -+** bic_s64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** bic z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (bic_s64_z_tied1, svint64_t, -+ z0 = svbic_s64_z (p0, z0, z1), -+ z0 = svbic_z (p0, z0, z1)) -+ -+/* -+** bic_s64_z_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, z1\.d -+** bic z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_s64_z_tied2, svint64_t, -+ z0 = svbic_s64_z (p0, z1, z0), -+ z0 = svbic_z (p0, z1, z0)) -+ -+/* -+** bic_s64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** bic z0\.d, p0/m, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (bic_s64_z_untied, svint64_t, -+ z0 = svbic_s64_z (p0, z1, z2), -+ z0 = svbic_z (p0, z1, z2)) -+ -+/* -+** bic_x0_s64_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** bic z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (bic_x0_s64_z_tied1, svint64_t, int64_t, -+ z0 = svbic_n_s64_z (p0, z0, x0), -+ z0 = svbic_z (p0, z0, x0)) -+ -+/* -+** bic_x0_s64_z_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z1\.d -+** bic z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (bic_x0_s64_z_untied, svint64_t, int64_t, -+ z0 = svbic_n_s64_z (p0, z1, x0), -+ z0 = svbic_z (p0, z1, x0)) -+ -+/* -+** bic_1_s64_z_tied1: -+** mov (z[0-9]+\.d), #-2 -+** movprfx z0\.d, p0/z, z0\.d -+** and z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_1_s64_z_tied1, svint64_t, -+ z0 = svbic_n_s64_z (p0, z0, 1), -+ z0 = svbic_z (p0, z0, 1)) -+ -+/* -+** bic_1_s64_z_untied: -+** mov (z[0-9]+\.d), #-2 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** and z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** and z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (bic_1_s64_z_untied, svint64_t, -+ z0 = svbic_n_s64_z (p0, z1, 1), -+ z0 = svbic_z (p0, z1, 1)) -+ -+/* -+** bic_s64_x_tied1: -+** bic z0\.d, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (bic_s64_x_tied1, svint64_t, -+ z0 = svbic_s64_x (p0, z0, z1), -+ z0 = svbic_x (p0, z0, z1)) -+ -+/* -+** bic_s64_x_tied2: -+** bic z0\.d, z1\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (bic_s64_x_tied2, svint64_t, -+ z0 = svbic_s64_x (p0, z1, z0), -+ z0 = svbic_x (p0, z1, z0)) -+ -+/* -+** bic_s64_x_untied: -+** bic z0\.d, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (bic_s64_x_untied, svint64_t, -+ z0 = svbic_s64_x (p0, z1, z2), -+ z0 = svbic_x (p0, z1, z2)) -+ -+/* -+** bic_x0_s64_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** bic z0\.d, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (bic_x0_s64_x_tied1, svint64_t, int64_t, -+ z0 = svbic_n_s64_x (p0, z0, x0), -+ z0 = svbic_x (p0, z0, x0)) -+ -+/* -+** bic_x0_s64_x_untied: -+** mov (z[0-9]+\.d), x0 -+** bic z0\.d, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (bic_x0_s64_x_untied, svint64_t, int64_t, -+ z0 = svbic_n_s64_x (p0, z1, x0), -+ z0 = svbic_x (p0, z1, x0)) -+ -+/* -+** bic_1_s64_x_tied1: -+** and z0\.d, z0\.d, #0xfffffffffffffffe -+** ret -+*/ -+TEST_UNIFORM_Z (bic_1_s64_x_tied1, svint64_t, -+ z0 = svbic_n_s64_x (p0, z0, 1), -+ z0 = svbic_x (p0, z0, 1)) -+ -+/* -+** bic_1_s64_x_untied: -+** movprfx z0, z1 -+** and z0\.d, z0\.d, #0xfffffffffffffffe -+** ret -+*/ -+TEST_UNIFORM_Z (bic_1_s64_x_untied, svint64_t, -+ z0 = svbic_n_s64_x (p0, z1, 1), -+ z0 = svbic_x (p0, z1, 1)) -+ -+/* -+** bic_127_s64_x: -+** and z0\.d, z0\.d, #0xffffffffffffff80 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_127_s64_x, svint64_t, -+ z0 = svbic_n_s64_x (p0, z0, 127), -+ z0 = svbic_x (p0, z0, 127)) -+ -+/* -+** bic_128_s64_x: -+** and z0\.d, z0\.d, #0xffffffffffffff7f -+** ret -+*/ -+TEST_UNIFORM_Z (bic_128_s64_x, svint64_t, -+ z0 = svbic_n_s64_x (p0, z0, 128), -+ z0 = svbic_x (p0, z0, 128)) -+ -+/* -+** bic_255_s64_x: -+** and z0\.d, z0\.d, #0xffffffffffffff00 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_255_s64_x, svint64_t, -+ z0 = svbic_n_s64_x (p0, z0, 255), -+ z0 = svbic_x (p0, z0, 255)) -+ -+/* -+** bic_256_s64_x: -+** and z0\.d, z0\.d, #0xfffffffffffffeff -+** ret -+*/ -+TEST_UNIFORM_Z (bic_256_s64_x, svint64_t, -+ z0 = svbic_n_s64_x (p0, z0, 256), -+ z0 = svbic_x (p0, z0, 256)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (bic_257_s64_x, svint64_t, -+ z0 = svbic_n_s64_x (p0, z0, 257), -+ z0 = svbic_x (p0, z0, 257)) -+ -+/* -+** bic_512_s64_x: -+** and z0\.d, z0\.d, #0xfffffffffffffdff -+** ret -+*/ -+TEST_UNIFORM_Z (bic_512_s64_x, svint64_t, -+ z0 = svbic_n_s64_x (p0, z0, 512), -+ z0 = svbic_x (p0, z0, 512)) -+ -+/* -+** bic_65280_s64_x: -+** and z0\.d, z0\.d, #0xffffffffffff00ff -+** ret -+*/ -+TEST_UNIFORM_Z (bic_65280_s64_x, svint64_t, -+ z0 = svbic_n_s64_x (p0, z0, 0xff00), -+ z0 = svbic_x (p0, z0, 0xff00)) -+ -+/* -+** bic_m127_s64_x: -+** and z0\.d, z0\.d, #0x7e -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m127_s64_x, svint64_t, -+ z0 = svbic_n_s64_x (p0, z0, -127), -+ z0 = svbic_x (p0, z0, -127)) -+ -+/* -+** bic_m128_s64_x: -+** and z0\.d, z0\.d, #0x7f -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m128_s64_x, svint64_t, -+ z0 = svbic_n_s64_x (p0, z0, -128), -+ z0 = svbic_x (p0, z0, -128)) -+ -+/* -+** bic_m255_s64_x: -+** and z0\.d, z0\.d, #0xfe -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m255_s64_x, svint64_t, -+ z0 = svbic_n_s64_x (p0, z0, -255), -+ z0 = svbic_x (p0, z0, -255)) -+ -+/* -+** bic_m256_s64_x: -+** and z0\.d, z0\.d, #0xff -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m256_s64_x, svint64_t, -+ z0 = svbic_n_s64_x (p0, z0, -256), -+ z0 = svbic_x (p0, z0, -256)) -+ -+/* -+** bic_m257_s64_x: -+** and z0\.d, z0\.d, #0x100 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m257_s64_x, svint64_t, -+ z0 = svbic_n_s64_x (p0, z0, -257), -+ z0 = svbic_x (p0, z0, -257)) -+ -+/* -+** bic_m512_s64_x: -+** and z0\.d, z0\.d, #0x1ff -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m512_s64_x, svint64_t, -+ z0 = svbic_n_s64_x (p0, z0, -512), -+ z0 = svbic_x (p0, z0, -512)) -+ -+/* -+** bic_m32768_s64_x: -+** and z0\.d, z0\.d, #0x7fff -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m32768_s64_x, svint64_t, -+ z0 = svbic_n_s64_x (p0, z0, -0x8000), -+ z0 = svbic_x (p0, z0, -0x8000)) -+ -+/* -+** bic_5_s64_x: -+** mov (z[0-9]+\.d), #-6 -+** and z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (bic_5_s64_x, svint64_t, -+ z0 = svbic_n_s64_x (p0, z0, 5), -+ z0 = svbic_x (p0, z0, 5)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_s8.c -new file mode 100644 -index 000000000..0958a3403 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_s8.c -@@ -0,0 +1,286 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** bic_s8_m_tied1: -+** bic z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (bic_s8_m_tied1, svint8_t, -+ z0 = svbic_s8_m (p0, z0, z1), -+ z0 = svbic_m (p0, z0, z1)) -+ -+/* -+** bic_s8_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** bic z0\.b, p0/m, z0\.b, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (bic_s8_m_tied2, svint8_t, -+ z0 = svbic_s8_m (p0, z1, z0), -+ z0 = svbic_m (p0, z1, z0)) -+ -+/* -+** bic_s8_m_untied: -+** movprfx z0, z1 -+** bic z0\.b, p0/m, z0\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (bic_s8_m_untied, svint8_t, -+ z0 = svbic_s8_m (p0, z1, z2), -+ z0 = svbic_m (p0, z1, z2)) -+ -+/* -+** bic_w0_s8_m_tied1: -+** mov (z[0-9]+\.b), w0 -+** bic z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (bic_w0_s8_m_tied1, svint8_t, int8_t, -+ z0 = svbic_n_s8_m (p0, z0, x0), -+ z0 = svbic_m (p0, z0, x0)) -+ -+/* -+** bic_w0_s8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), w0 -+** movprfx z0, z1 -+** bic z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (bic_w0_s8_m_untied, svint8_t, int8_t, -+ z0 = svbic_n_s8_m (p0, z1, x0), -+ z0 = svbic_m (p0, z1, x0)) -+ -+/* -+** bic_1_s8_m_tied1: -+** mov (z[0-9]+\.b), #-2 -+** and z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_1_s8_m_tied1, svint8_t, -+ z0 = svbic_n_s8_m (p0, z0, 1), -+ z0 = svbic_m (p0, z0, 1)) -+ -+/* -+** bic_1_s8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), #-2 -+** movprfx z0, z1 -+** and z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_1_s8_m_untied, svint8_t, -+ z0 = svbic_n_s8_m (p0, z1, 1), -+ z0 = svbic_m (p0, z1, 1)) -+ -+/* -+** bic_m2_s8_m: -+** mov (z[0-9]+\.b), #1 -+** and z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m2_s8_m, svint8_t, -+ z0 = svbic_n_s8_m (p0, z0, -2), -+ z0 = svbic_m (p0, z0, -2)) -+ -+/* -+** bic_s8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** bic z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (bic_s8_z_tied1, svint8_t, -+ z0 = svbic_s8_z (p0, z0, z1), -+ z0 = svbic_z (p0, z0, z1)) -+ -+/* -+** bic_s8_z_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.b, p0/z, z1\.b -+** bic z0\.b, p0/m, z0\.b, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (bic_s8_z_tied2, svint8_t, -+ z0 = svbic_s8_z (p0, z1, z0), -+ z0 = svbic_z (p0, z1, z0)) -+ -+/* -+** bic_s8_z_untied: -+** movprfx z0\.b, p0/z, z1\.b -+** bic z0\.b, p0/m, z0\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (bic_s8_z_untied, svint8_t, -+ z0 = svbic_s8_z (p0, z1, z2), -+ z0 = svbic_z (p0, z1, z2)) -+ -+/* -+** bic_w0_s8_z_tied1: -+** mov (z[0-9]+\.b), w0 -+** movprfx z0\.b, p0/z, z0\.b -+** bic z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (bic_w0_s8_z_tied1, svint8_t, int8_t, -+ z0 = svbic_n_s8_z (p0, z0, x0), -+ z0 = svbic_z (p0, z0, x0)) -+ -+/* -+** bic_w0_s8_z_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), w0 -+** movprfx z0\.b, p0/z, z1\.b -+** bic z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (bic_w0_s8_z_untied, svint8_t, int8_t, -+ z0 = svbic_n_s8_z (p0, z1, x0), -+ z0 = svbic_z (p0, z1, x0)) -+ -+/* -+** bic_1_s8_z_tied1: -+** mov (z[0-9]+\.b), #-2 -+** movprfx z0\.b, p0/z, z0\.b -+** and z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_1_s8_z_tied1, svint8_t, -+ z0 = svbic_n_s8_z (p0, z0, 1), -+ z0 = svbic_z (p0, z0, 1)) -+ -+/* -+** bic_1_s8_z_untied: -+** mov (z[0-9]+\.b), #-2 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** and z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** and z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (bic_1_s8_z_untied, svint8_t, -+ z0 = svbic_n_s8_z (p0, z1, 1), -+ z0 = svbic_z (p0, z1, 1)) -+ -+/* -+** bic_s8_x_tied1: -+** bic z0\.d, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (bic_s8_x_tied1, svint8_t, -+ z0 = svbic_s8_x (p0, z0, z1), -+ z0 = svbic_x (p0, z0, z1)) -+ -+/* -+** bic_s8_x_tied2: -+** bic z0\.d, z1\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (bic_s8_x_tied2, svint8_t, -+ z0 = svbic_s8_x (p0, z1, z0), -+ z0 = svbic_x (p0, z1, z0)) -+ -+/* -+** bic_s8_x_untied: -+** bic z0\.d, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (bic_s8_x_untied, svint8_t, -+ z0 = svbic_s8_x (p0, z1, z2), -+ z0 = svbic_x (p0, z1, z2)) -+ -+/* -+** bic_w0_s8_x_tied1: -+** mov (z[0-9]+)\.b, w0 -+** bic z0\.d, z0\.d, \1\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (bic_w0_s8_x_tied1, svint8_t, int8_t, -+ z0 = svbic_n_s8_x (p0, z0, x0), -+ z0 = svbic_x (p0, z0, x0)) -+ -+/* -+** bic_w0_s8_x_untied: -+** mov (z[0-9]+)\.b, w0 -+** bic z0\.d, z1\.d, \1\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (bic_w0_s8_x_untied, svint8_t, int8_t, -+ z0 = svbic_n_s8_x (p0, z1, x0), -+ z0 = svbic_x (p0, z1, x0)) -+ -+/* -+** bic_1_s8_x_tied1: -+** and z0\.b, z0\.b, #0xfe -+** ret -+*/ -+TEST_UNIFORM_Z (bic_1_s8_x_tied1, svint8_t, -+ z0 = svbic_n_s8_x (p0, z0, 1), -+ z0 = svbic_x (p0, z0, 1)) -+ -+/* -+** bic_1_s8_x_untied: -+** movprfx z0, z1 -+** and z0\.b, z0\.b, #0xfe -+** ret -+*/ -+TEST_UNIFORM_Z (bic_1_s8_x_untied, svint8_t, -+ z0 = svbic_n_s8_x (p0, z1, 1), -+ z0 = svbic_x (p0, z1, 1)) -+ -+/* -+** bic_127_s8_x: -+** and z0\.b, z0\.b, #0x80 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_127_s8_x, svint8_t, -+ z0 = svbic_n_s8_x (p0, z0, 127), -+ z0 = svbic_x (p0, z0, 127)) -+ -+/* -+** bic_128_s8_x: -+** and z0\.b, z0\.b, #0x7f -+** ret -+*/ -+TEST_UNIFORM_Z (bic_128_s8_x, svint8_t, -+ z0 = svbic_n_s8_x (p0, z0, 128), -+ z0 = svbic_x (p0, z0, 128)) -+ -+/* -+** bic_255_s8_x: -+** mov z0\.b, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_255_s8_x, svint8_t, -+ z0 = svbic_n_s8_x (p0, z0, 255), -+ z0 = svbic_x (p0, z0, 255)) -+ -+/* -+** bic_m127_s8_x: -+** and z0\.b, z0\.b, #0x7e -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m127_s8_x, svint8_t, -+ z0 = svbic_n_s8_x (p0, z0, -127), -+ z0 = svbic_x (p0, z0, -127)) -+ -+/* -+** bic_m128_s8_x: -+** and z0\.b, z0\.b, #0x7f -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m128_s8_x, svint8_t, -+ z0 = svbic_n_s8_x (p0, z0, -128), -+ z0 = svbic_x (p0, z0, -128)) -+ -+/* -+** bic_5_s8_x: -+** mov (z[0-9]+)\.b, #-6 -+** and z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (bic_5_s8_x, svint8_t, -+ z0 = svbic_n_s8_x (p0, z0, 5), -+ z0 = svbic_x (p0, z0, 5)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_u16.c -new file mode 100644 -index 000000000..30209ffb4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_u16.c -@@ -0,0 +1,367 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** bic_u16_m_tied1: -+** bic z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (bic_u16_m_tied1, svuint16_t, -+ z0 = svbic_u16_m (p0, z0, z1), -+ z0 = svbic_m (p0, z0, z1)) -+ -+/* -+** bic_u16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** bic z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (bic_u16_m_tied2, svuint16_t, -+ z0 = svbic_u16_m (p0, z1, z0), -+ z0 = svbic_m (p0, z1, z0)) -+ -+/* -+** bic_u16_m_untied: -+** movprfx z0, z1 -+** bic z0\.h, p0/m, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (bic_u16_m_untied, svuint16_t, -+ z0 = svbic_u16_m (p0, z1, z2), -+ z0 = svbic_m (p0, z1, z2)) -+ -+/* -+** bic_w0_u16_m_tied1: -+** mov (z[0-9]+\.h), w0 -+** bic z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (bic_w0_u16_m_tied1, svuint16_t, uint16_t, -+ z0 = svbic_n_u16_m (p0, z0, x0), -+ z0 = svbic_m (p0, z0, x0)) -+ -+/* -+** bic_w0_u16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), w0 -+** movprfx z0, z1 -+** bic z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (bic_w0_u16_m_untied, svuint16_t, uint16_t, -+ z0 = svbic_n_u16_m (p0, z1, x0), -+ z0 = svbic_m (p0, z1, x0)) -+ -+/* -+** bic_1_u16_m_tied1: -+** mov (z[0-9]+\.h), #-2 -+** and z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_1_u16_m_tied1, svuint16_t, -+ z0 = svbic_n_u16_m (p0, z0, 1), -+ z0 = svbic_m (p0, z0, 1)) -+ -+/* -+** bic_1_u16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), #-2 -+** movprfx z0, z1 -+** and z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_1_u16_m_untied, svuint16_t, -+ z0 = svbic_n_u16_m (p0, z1, 1), -+ z0 = svbic_m (p0, z1, 1)) -+ -+/* -+** bic_m2_u16_m: -+** mov (z[0-9]+\.h), #1 -+** and z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m2_u16_m, svuint16_t, -+ z0 = svbic_n_u16_m (p0, z0, -2), -+ z0 = svbic_m (p0, z0, -2)) -+ -+/* -+** bic_u16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** bic z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (bic_u16_z_tied1, svuint16_t, -+ z0 = svbic_u16_z (p0, z0, z1), -+ z0 = svbic_z (p0, z0, z1)) -+ -+/* -+** bic_u16_z_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.h, p0/z, z1\.h -+** bic z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (bic_u16_z_tied2, svuint16_t, -+ z0 = svbic_u16_z (p0, z1, z0), -+ z0 = svbic_z (p0, z1, z0)) -+ -+/* -+** bic_u16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** bic z0\.h, p0/m, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (bic_u16_z_untied, svuint16_t, -+ z0 = svbic_u16_z (p0, z1, z2), -+ z0 = svbic_z (p0, z1, z2)) -+ -+/* -+** bic_w0_u16_z_tied1: -+** mov (z[0-9]+\.h), w0 -+** movprfx z0\.h, p0/z, z0\.h -+** bic z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (bic_w0_u16_z_tied1, svuint16_t, uint16_t, -+ z0 = svbic_n_u16_z (p0, z0, x0), -+ z0 = svbic_z (p0, z0, x0)) -+ -+/* -+** bic_w0_u16_z_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), w0 -+** movprfx z0\.h, p0/z, z1\.h -+** bic z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (bic_w0_u16_z_untied, svuint16_t, uint16_t, -+ z0 = svbic_n_u16_z (p0, z1, x0), -+ z0 = svbic_z (p0, z1, x0)) -+ -+/* -+** bic_1_u16_z_tied1: -+** mov (z[0-9]+\.h), #-2 -+** movprfx z0\.h, p0/z, z0\.h -+** and z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_1_u16_z_tied1, svuint16_t, -+ z0 = svbic_n_u16_z (p0, z0, 1), -+ z0 = svbic_z (p0, z0, 1)) -+ -+/* -+** bic_1_u16_z_untied: -+** mov (z[0-9]+\.h), #-2 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** and z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** and z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (bic_1_u16_z_untied, svuint16_t, -+ z0 = svbic_n_u16_z (p0, z1, 1), -+ z0 = svbic_z (p0, z1, 1)) -+ -+/* -+** bic_u16_x_tied1: -+** bic z0\.d, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (bic_u16_x_tied1, svuint16_t, -+ z0 = svbic_u16_x (p0, z0, z1), -+ z0 = svbic_x (p0, z0, z1)) -+ -+/* -+** bic_u16_x_tied2: -+** bic z0\.d, z1\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (bic_u16_x_tied2, svuint16_t, -+ z0 = svbic_u16_x (p0, z1, z0), -+ z0 = svbic_x (p0, z1, z0)) -+ -+/* -+** bic_u16_x_untied: -+** bic z0\.d, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (bic_u16_x_untied, svuint16_t, -+ z0 = svbic_u16_x (p0, z1, z2), -+ z0 = svbic_x (p0, z1, z2)) -+ -+/* -+** bic_w0_u16_x_tied1: -+** mov (z[0-9]+)\.h, w0 -+** bic z0\.d, z0\.d, \1\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (bic_w0_u16_x_tied1, svuint16_t, uint16_t, -+ z0 = svbic_n_u16_x (p0, z0, x0), -+ z0 = svbic_x (p0, z0, x0)) -+ -+/* -+** bic_w0_u16_x_untied: -+** mov (z[0-9]+)\.h, w0 -+** bic z0\.d, z1\.d, \1\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (bic_w0_u16_x_untied, svuint16_t, uint16_t, -+ z0 = svbic_n_u16_x (p0, z1, x0), -+ z0 = svbic_x (p0, z1, x0)) -+ -+/* -+** bic_1_u16_x_tied1: -+** and z0\.h, z0\.h, #0xfffe -+** ret -+*/ -+TEST_UNIFORM_Z (bic_1_u16_x_tied1, svuint16_t, -+ z0 = svbic_n_u16_x (p0, z0, 1), -+ z0 = svbic_x (p0, z0, 1)) -+ -+/* -+** bic_1_u16_x_untied: -+** movprfx z0, z1 -+** and z0\.h, z0\.h, #0xfffe -+** ret -+*/ -+TEST_UNIFORM_Z (bic_1_u16_x_untied, svuint16_t, -+ z0 = svbic_n_u16_x (p0, z1, 1), -+ z0 = svbic_x (p0, z1, 1)) -+ -+/* -+** bic_127_u16_x: -+** and z0\.h, z0\.h, #0xff80 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_127_u16_x, svuint16_t, -+ z0 = svbic_n_u16_x (p0, z0, 127), -+ z0 = svbic_x (p0, z0, 127)) -+ -+/* -+** bic_128_u16_x: -+** and z0\.h, z0\.h, #0xff7f -+** ret -+*/ -+TEST_UNIFORM_Z (bic_128_u16_x, svuint16_t, -+ z0 = svbic_n_u16_x (p0, z0, 128), -+ z0 = svbic_x (p0, z0, 128)) -+ -+/* -+** bic_255_u16_x: -+** and z0\.h, z0\.h, #0xff00 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_255_u16_x, svuint16_t, -+ z0 = svbic_n_u16_x (p0, z0, 255), -+ z0 = svbic_x (p0, z0, 255)) -+ -+/* -+** bic_256_u16_x: -+** and z0\.h, z0\.h, #0xfeff -+** ret -+*/ -+TEST_UNIFORM_Z (bic_256_u16_x, svuint16_t, -+ z0 = svbic_n_u16_x (p0, z0, 256), -+ z0 = svbic_x (p0, z0, 256)) -+ -+/* -+** bic_257_u16_x: -+** and z0\.h, z0\.h, #0xfefe -+** ret -+*/ -+TEST_UNIFORM_Z (bic_257_u16_x, svuint16_t, -+ z0 = svbic_n_u16_x (p0, z0, 257), -+ z0 = svbic_x (p0, z0, 257)) -+ -+/* -+** bic_512_u16_x: -+** and z0\.h, z0\.h, #0xfdff -+** ret -+*/ -+TEST_UNIFORM_Z (bic_512_u16_x, svuint16_t, -+ z0 = svbic_n_u16_x (p0, z0, 512), -+ z0 = svbic_x (p0, z0, 512)) -+ -+/* -+** bic_65280_u16_x: -+** and z0\.h, z0\.h, #0xff -+** ret -+*/ -+TEST_UNIFORM_Z (bic_65280_u16_x, svuint16_t, -+ z0 = svbic_n_u16_x (p0, z0, 0xff00), -+ z0 = svbic_x (p0, z0, 0xff00)) -+ -+/* -+** bic_m127_u16_x: -+** and z0\.h, z0\.h, #0x7e -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m127_u16_x, svuint16_t, -+ z0 = svbic_n_u16_x (p0, z0, -127), -+ z0 = svbic_x (p0, z0, -127)) -+ -+/* -+** bic_m128_u16_x: -+** and z0\.h, z0\.h, #0x7f -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m128_u16_x, svuint16_t, -+ z0 = svbic_n_u16_x (p0, z0, -128), -+ z0 = svbic_x (p0, z0, -128)) -+ -+/* -+** bic_m255_u16_x: -+** and z0\.h, z0\.h, #0xfe -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m255_u16_x, svuint16_t, -+ z0 = svbic_n_u16_x (p0, z0, -255), -+ z0 = svbic_x (p0, z0, -255)) -+ -+/* -+** bic_m256_u16_x: -+** and z0\.h, z0\.h, #0xff -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m256_u16_x, svuint16_t, -+ z0 = svbic_n_u16_x (p0, z0, -256), -+ z0 = svbic_x (p0, z0, -256)) -+ -+/* -+** bic_m257_u16_x: -+** and z0\.h, z0\.h, #0x100 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m257_u16_x, svuint16_t, -+ z0 = svbic_n_u16_x (p0, z0, -257), -+ z0 = svbic_x (p0, z0, -257)) -+ -+/* -+** bic_m512_u16_x: -+** and z0\.h, z0\.h, #0x1ff -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m512_u16_x, svuint16_t, -+ z0 = svbic_n_u16_x (p0, z0, -512), -+ z0 = svbic_x (p0, z0, -512)) -+ -+/* -+** bic_m32768_u16_x: -+** and z0\.h, z0\.h, #0x7fff -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m32768_u16_x, svuint16_t, -+ z0 = svbic_n_u16_x (p0, z0, -0x8000), -+ z0 = svbic_x (p0, z0, -0x8000)) -+ -+/* -+** bic_5_u16_x: -+** mov (z[0-9]+)\.h, #-6 -+** and z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (bic_5_u16_x, svuint16_t, -+ z0 = svbic_n_u16_x (p0, z0, 5), -+ z0 = svbic_x (p0, z0, 5)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_u32.c -new file mode 100644 -index 000000000..b308b599b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_u32.c -@@ -0,0 +1,363 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** bic_u32_m_tied1: -+** bic z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (bic_u32_m_tied1, svuint32_t, -+ z0 = svbic_u32_m (p0, z0, z1), -+ z0 = svbic_m (p0, z0, z1)) -+ -+/* -+** bic_u32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** bic z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (bic_u32_m_tied2, svuint32_t, -+ z0 = svbic_u32_m (p0, z1, z0), -+ z0 = svbic_m (p0, z1, z0)) -+ -+/* -+** bic_u32_m_untied: -+** movprfx z0, z1 -+** bic z0\.s, p0/m, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (bic_u32_m_untied, svuint32_t, -+ z0 = svbic_u32_m (p0, z1, z2), -+ z0 = svbic_m (p0, z1, z2)) -+ -+/* -+** bic_w0_u32_m_tied1: -+** mov (z[0-9]+\.s), w0 -+** bic z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (bic_w0_u32_m_tied1, svuint32_t, uint32_t, -+ z0 = svbic_n_u32_m (p0, z0, x0), -+ z0 = svbic_m (p0, z0, x0)) -+ -+/* -+** bic_w0_u32_m_untied: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0, z1 -+** bic z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (bic_w0_u32_m_untied, svuint32_t, uint32_t, -+ z0 = svbic_n_u32_m (p0, z1, x0), -+ z0 = svbic_m (p0, z1, x0)) -+ -+/* -+** bic_1_u32_m_tied1: -+** mov (z[0-9]+\.s), #-2 -+** and z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_1_u32_m_tied1, svuint32_t, -+ z0 = svbic_n_u32_m (p0, z0, 1), -+ z0 = svbic_m (p0, z0, 1)) -+ -+/* -+** bic_1_u32_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.s), #-2 -+** movprfx z0, z1 -+** and z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_1_u32_m_untied, svuint32_t, -+ z0 = svbic_n_u32_m (p0, z1, 1), -+ z0 = svbic_m (p0, z1, 1)) -+ -+/* -+** bic_m2_u32_m: -+** mov (z[0-9]+\.s), #1 -+** and z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m2_u32_m, svuint32_t, -+ z0 = svbic_n_u32_m (p0, z0, -2), -+ z0 = svbic_m (p0, z0, -2)) -+ -+/* -+** bic_u32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** bic z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (bic_u32_z_tied1, svuint32_t, -+ z0 = svbic_u32_z (p0, z0, z1), -+ z0 = svbic_z (p0, z0, z1)) -+ -+/* -+** bic_u32_z_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, z1\.s -+** bic z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (bic_u32_z_tied2, svuint32_t, -+ z0 = svbic_u32_z (p0, z1, z0), -+ z0 = svbic_z (p0, z1, z0)) -+ -+/* -+** bic_u32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** bic z0\.s, p0/m, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (bic_u32_z_untied, svuint32_t, -+ z0 = svbic_u32_z (p0, z1, z2), -+ z0 = svbic_z (p0, z1, z2)) -+ -+/* -+** bic_w0_u32_z_tied1: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** bic z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (bic_w0_u32_z_tied1, svuint32_t, uint32_t, -+ z0 = svbic_n_u32_z (p0, z0, x0), -+ z0 = svbic_z (p0, z0, x0)) -+ -+/* -+** bic_w0_u32_z_untied: { xfail *-*-* } -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z1\.s -+** bic z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (bic_w0_u32_z_untied, svuint32_t, uint32_t, -+ z0 = svbic_n_u32_z (p0, z1, x0), -+ z0 = svbic_z (p0, z1, x0)) -+ -+/* -+** bic_1_u32_z_tied1: -+** mov (z[0-9]+\.s), #-2 -+** movprfx z0\.s, p0/z, z0\.s -+** and z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_1_u32_z_tied1, svuint32_t, -+ z0 = svbic_n_u32_z (p0, z0, 1), -+ z0 = svbic_z (p0, z0, 1)) -+ -+/* -+** bic_1_u32_z_untied: -+** mov (z[0-9]+\.s), #-2 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** and z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** and z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (bic_1_u32_z_untied, svuint32_t, -+ z0 = svbic_n_u32_z (p0, z1, 1), -+ z0 = svbic_z (p0, z1, 1)) -+ -+/* -+** bic_u32_x_tied1: -+** bic z0\.d, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (bic_u32_x_tied1, svuint32_t, -+ z0 = svbic_u32_x (p0, z0, z1), -+ z0 = svbic_x (p0, z0, z1)) -+ -+/* -+** bic_u32_x_tied2: -+** bic z0\.d, z1\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (bic_u32_x_tied2, svuint32_t, -+ z0 = svbic_u32_x (p0, z1, z0), -+ z0 = svbic_x (p0, z1, z0)) -+ -+/* -+** bic_u32_x_untied: -+** bic z0\.d, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (bic_u32_x_untied, svuint32_t, -+ z0 = svbic_u32_x (p0, z1, z2), -+ z0 = svbic_x (p0, z1, z2)) -+ -+/* -+** bic_w0_u32_x_tied1: -+** mov (z[0-9]+)\.s, w0 -+** bic z0\.d, z0\.d, \1\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (bic_w0_u32_x_tied1, svuint32_t, uint32_t, -+ z0 = svbic_n_u32_x (p0, z0, x0), -+ z0 = svbic_x (p0, z0, x0)) -+ -+/* -+** bic_w0_u32_x_untied: -+** mov (z[0-9]+)\.s, w0 -+** bic z0\.d, z1\.d, \1\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (bic_w0_u32_x_untied, svuint32_t, uint32_t, -+ z0 = svbic_n_u32_x (p0, z1, x0), -+ z0 = svbic_x (p0, z1, x0)) -+ -+/* -+** bic_1_u32_x_tied1: -+** and z0\.s, z0\.s, #0xfffffffe -+** ret -+*/ -+TEST_UNIFORM_Z (bic_1_u32_x_tied1, svuint32_t, -+ z0 = svbic_n_u32_x (p0, z0, 1), -+ z0 = svbic_x (p0, z0, 1)) -+ -+/* -+** bic_1_u32_x_untied: -+** movprfx z0, z1 -+** and z0\.s, z0\.s, #0xfffffffe -+** ret -+*/ -+TEST_UNIFORM_Z (bic_1_u32_x_untied, svuint32_t, -+ z0 = svbic_n_u32_x (p0, z1, 1), -+ z0 = svbic_x (p0, z1, 1)) -+ -+/* -+** bic_127_u32_x: -+** and z0\.s, z0\.s, #0xffffff80 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_127_u32_x, svuint32_t, -+ z0 = svbic_n_u32_x (p0, z0, 127), -+ z0 = svbic_x (p0, z0, 127)) -+ -+/* -+** bic_128_u32_x: -+** and z0\.s, z0\.s, #0xffffff7f -+** ret -+*/ -+TEST_UNIFORM_Z (bic_128_u32_x, svuint32_t, -+ z0 = svbic_n_u32_x (p0, z0, 128), -+ z0 = svbic_x (p0, z0, 128)) -+ -+/* -+** bic_255_u32_x: -+** and z0\.s, z0\.s, #0xffffff00 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_255_u32_x, svuint32_t, -+ z0 = svbic_n_u32_x (p0, z0, 255), -+ z0 = svbic_x (p0, z0, 255)) -+ -+/* -+** bic_256_u32_x: -+** and z0\.s, z0\.s, #0xfffffeff -+** ret -+*/ -+TEST_UNIFORM_Z (bic_256_u32_x, svuint32_t, -+ z0 = svbic_n_u32_x (p0, z0, 256), -+ z0 = svbic_x (p0, z0, 256)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (bic_257_u32_x, svuint32_t, -+ z0 = svbic_n_u32_x (p0, z0, 257), -+ z0 = svbic_x (p0, z0, 257)) -+ -+/* -+** bic_512_u32_x: -+** and z0\.s, z0\.s, #0xfffffdff -+** ret -+*/ -+TEST_UNIFORM_Z (bic_512_u32_x, svuint32_t, -+ z0 = svbic_n_u32_x (p0, z0, 512), -+ z0 = svbic_x (p0, z0, 512)) -+ -+/* -+** bic_65280_u32_x: -+** and z0\.s, z0\.s, #0xffff00ff -+** ret -+*/ -+TEST_UNIFORM_Z (bic_65280_u32_x, svuint32_t, -+ z0 = svbic_n_u32_x (p0, z0, 0xff00), -+ z0 = svbic_x (p0, z0, 0xff00)) -+ -+/* -+** bic_m127_u32_x: -+** and z0\.s, z0\.s, #0x7e -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m127_u32_x, svuint32_t, -+ z0 = svbic_n_u32_x (p0, z0, -127), -+ z0 = svbic_x (p0, z0, -127)) -+ -+/* -+** bic_m128_u32_x: -+** and z0\.s, z0\.s, #0x7f -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m128_u32_x, svuint32_t, -+ z0 = svbic_n_u32_x (p0, z0, -128), -+ z0 = svbic_x (p0, z0, -128)) -+ -+/* -+** bic_m255_u32_x: -+** and z0\.s, z0\.s, #0xfe -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m255_u32_x, svuint32_t, -+ z0 = svbic_n_u32_x (p0, z0, -255), -+ z0 = svbic_x (p0, z0, -255)) -+ -+/* -+** bic_m256_u32_x: -+** and z0\.s, z0\.s, #0xff -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m256_u32_x, svuint32_t, -+ z0 = svbic_n_u32_x (p0, z0, -256), -+ z0 = svbic_x (p0, z0, -256)) -+ -+/* -+** bic_m257_u32_x: -+** and z0\.s, z0\.s, #0x100 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m257_u32_x, svuint32_t, -+ z0 = svbic_n_u32_x (p0, z0, -257), -+ z0 = svbic_x (p0, z0, -257)) -+ -+/* -+** bic_m512_u32_x: -+** and z0\.s, z0\.s, #0x1ff -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m512_u32_x, svuint32_t, -+ z0 = svbic_n_u32_x (p0, z0, -512), -+ z0 = svbic_x (p0, z0, -512)) -+ -+/* -+** bic_m32768_u32_x: -+** and z0\.s, z0\.s, #0x7fff -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m32768_u32_x, svuint32_t, -+ z0 = svbic_n_u32_x (p0, z0, -0x8000), -+ z0 = svbic_x (p0, z0, -0x8000)) -+ -+/* -+** bic_5_u32_x: -+** mov (z[0-9]+)\.s, #-6 -+** and z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (bic_5_u32_x, svuint32_t, -+ z0 = svbic_n_u32_x (p0, z0, 5), -+ z0 = svbic_x (p0, z0, 5)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_u64.c -new file mode 100644 -index 000000000..e82db1e94 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_u64.c -@@ -0,0 +1,363 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** bic_u64_m_tied1: -+** bic z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (bic_u64_m_tied1, svuint64_t, -+ z0 = svbic_u64_m (p0, z0, z1), -+ z0 = svbic_m (p0, z0, z1)) -+ -+/* -+** bic_u64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** bic z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_u64_m_tied2, svuint64_t, -+ z0 = svbic_u64_m (p0, z1, z0), -+ z0 = svbic_m (p0, z1, z0)) -+ -+/* -+** bic_u64_m_untied: -+** movprfx z0, z1 -+** bic z0\.d, p0/m, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (bic_u64_m_untied, svuint64_t, -+ z0 = svbic_u64_m (p0, z1, z2), -+ z0 = svbic_m (p0, z1, z2)) -+ -+/* -+** bic_x0_u64_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** bic z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (bic_x0_u64_m_tied1, svuint64_t, uint64_t, -+ z0 = svbic_n_u64_m (p0, z0, x0), -+ z0 = svbic_m (p0, z0, x0)) -+ -+/* -+** bic_x0_u64_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** bic z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (bic_x0_u64_m_untied, svuint64_t, uint64_t, -+ z0 = svbic_n_u64_m (p0, z1, x0), -+ z0 = svbic_m (p0, z1, x0)) -+ -+/* -+** bic_1_u64_m_tied1: -+** mov (z[0-9]+\.d), #-2 -+** and z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_1_u64_m_tied1, svuint64_t, -+ z0 = svbic_n_u64_m (p0, z0, 1), -+ z0 = svbic_m (p0, z0, 1)) -+ -+/* -+** bic_1_u64_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #-2 -+** movprfx z0, z1 -+** and z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_1_u64_m_untied, svuint64_t, -+ z0 = svbic_n_u64_m (p0, z1, 1), -+ z0 = svbic_m (p0, z1, 1)) -+ -+/* -+** bic_m2_u64_m: -+** mov (z[0-9]+\.d), #1 -+** and z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m2_u64_m, svuint64_t, -+ z0 = svbic_n_u64_m (p0, z0, -2), -+ z0 = svbic_m (p0, z0, -2)) -+ -+/* -+** bic_u64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** bic z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (bic_u64_z_tied1, svuint64_t, -+ z0 = svbic_u64_z (p0, z0, z1), -+ z0 = svbic_z (p0, z0, z1)) -+ -+/* -+** bic_u64_z_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, z1\.d -+** bic z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_u64_z_tied2, svuint64_t, -+ z0 = svbic_u64_z (p0, z1, z0), -+ z0 = svbic_z (p0, z1, z0)) -+ -+/* -+** bic_u64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** bic z0\.d, p0/m, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (bic_u64_z_untied, svuint64_t, -+ z0 = svbic_u64_z (p0, z1, z2), -+ z0 = svbic_z (p0, z1, z2)) -+ -+/* -+** bic_x0_u64_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** bic z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (bic_x0_u64_z_tied1, svuint64_t, uint64_t, -+ z0 = svbic_n_u64_z (p0, z0, x0), -+ z0 = svbic_z (p0, z0, x0)) -+ -+/* -+** bic_x0_u64_z_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z1\.d -+** bic z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (bic_x0_u64_z_untied, svuint64_t, uint64_t, -+ z0 = svbic_n_u64_z (p0, z1, x0), -+ z0 = svbic_z (p0, z1, x0)) -+ -+/* -+** bic_1_u64_z_tied1: -+** mov (z[0-9]+\.d), #-2 -+** movprfx z0\.d, p0/z, z0\.d -+** and z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_1_u64_z_tied1, svuint64_t, -+ z0 = svbic_n_u64_z (p0, z0, 1), -+ z0 = svbic_z (p0, z0, 1)) -+ -+/* -+** bic_1_u64_z_untied: -+** mov (z[0-9]+\.d), #-2 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** and z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** and z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (bic_1_u64_z_untied, svuint64_t, -+ z0 = svbic_n_u64_z (p0, z1, 1), -+ z0 = svbic_z (p0, z1, 1)) -+ -+/* -+** bic_u64_x_tied1: -+** bic z0\.d, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (bic_u64_x_tied1, svuint64_t, -+ z0 = svbic_u64_x (p0, z0, z1), -+ z0 = svbic_x (p0, z0, z1)) -+ -+/* -+** bic_u64_x_tied2: -+** bic z0\.d, z1\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (bic_u64_x_tied2, svuint64_t, -+ z0 = svbic_u64_x (p0, z1, z0), -+ z0 = svbic_x (p0, z1, z0)) -+ -+/* -+** bic_u64_x_untied: -+** bic z0\.d, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (bic_u64_x_untied, svuint64_t, -+ z0 = svbic_u64_x (p0, z1, z2), -+ z0 = svbic_x (p0, z1, z2)) -+ -+/* -+** bic_x0_u64_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** bic z0\.d, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (bic_x0_u64_x_tied1, svuint64_t, uint64_t, -+ z0 = svbic_n_u64_x (p0, z0, x0), -+ z0 = svbic_x (p0, z0, x0)) -+ -+/* -+** bic_x0_u64_x_untied: -+** mov (z[0-9]+\.d), x0 -+** bic z0\.d, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (bic_x0_u64_x_untied, svuint64_t, uint64_t, -+ z0 = svbic_n_u64_x (p0, z1, x0), -+ z0 = svbic_x (p0, z1, x0)) -+ -+/* -+** bic_1_u64_x_tied1: -+** and z0\.d, z0\.d, #0xfffffffffffffffe -+** ret -+*/ -+TEST_UNIFORM_Z (bic_1_u64_x_tied1, svuint64_t, -+ z0 = svbic_n_u64_x (p0, z0, 1), -+ z0 = svbic_x (p0, z0, 1)) -+ -+/* -+** bic_1_u64_x_untied: -+** movprfx z0, z1 -+** and z0\.d, z0\.d, #0xfffffffffffffffe -+** ret -+*/ -+TEST_UNIFORM_Z (bic_1_u64_x_untied, svuint64_t, -+ z0 = svbic_n_u64_x (p0, z1, 1), -+ z0 = svbic_x (p0, z1, 1)) -+ -+/* -+** bic_127_u64_x: -+** and z0\.d, z0\.d, #0xffffffffffffff80 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_127_u64_x, svuint64_t, -+ z0 = svbic_n_u64_x (p0, z0, 127), -+ z0 = svbic_x (p0, z0, 127)) -+ -+/* -+** bic_128_u64_x: -+** and z0\.d, z0\.d, #0xffffffffffffff7f -+** ret -+*/ -+TEST_UNIFORM_Z (bic_128_u64_x, svuint64_t, -+ z0 = svbic_n_u64_x (p0, z0, 128), -+ z0 = svbic_x (p0, z0, 128)) -+ -+/* -+** bic_255_u64_x: -+** and z0\.d, z0\.d, #0xffffffffffffff00 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_255_u64_x, svuint64_t, -+ z0 = svbic_n_u64_x (p0, z0, 255), -+ z0 = svbic_x (p0, z0, 255)) -+ -+/* -+** bic_256_u64_x: -+** and z0\.d, z0\.d, #0xfffffffffffffeff -+** ret -+*/ -+TEST_UNIFORM_Z (bic_256_u64_x, svuint64_t, -+ z0 = svbic_n_u64_x (p0, z0, 256), -+ z0 = svbic_x (p0, z0, 256)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (bic_257_u64_x, svuint64_t, -+ z0 = svbic_n_u64_x (p0, z0, 257), -+ z0 = svbic_x (p0, z0, 257)) -+ -+/* -+** bic_512_u64_x: -+** and z0\.d, z0\.d, #0xfffffffffffffdff -+** ret -+*/ -+TEST_UNIFORM_Z (bic_512_u64_x, svuint64_t, -+ z0 = svbic_n_u64_x (p0, z0, 512), -+ z0 = svbic_x (p0, z0, 512)) -+ -+/* -+** bic_65280_u64_x: -+** and z0\.d, z0\.d, #0xffffffffffff00ff -+** ret -+*/ -+TEST_UNIFORM_Z (bic_65280_u64_x, svuint64_t, -+ z0 = svbic_n_u64_x (p0, z0, 0xff00), -+ z0 = svbic_x (p0, z0, 0xff00)) -+ -+/* -+** bic_m127_u64_x: -+** and z0\.d, z0\.d, #0x7e -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m127_u64_x, svuint64_t, -+ z0 = svbic_n_u64_x (p0, z0, -127), -+ z0 = svbic_x (p0, z0, -127)) -+ -+/* -+** bic_m128_u64_x: -+** and z0\.d, z0\.d, #0x7f -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m128_u64_x, svuint64_t, -+ z0 = svbic_n_u64_x (p0, z0, -128), -+ z0 = svbic_x (p0, z0, -128)) -+ -+/* -+** bic_m255_u64_x: -+** and z0\.d, z0\.d, #0xfe -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m255_u64_x, svuint64_t, -+ z0 = svbic_n_u64_x (p0, z0, -255), -+ z0 = svbic_x (p0, z0, -255)) -+ -+/* -+** bic_m256_u64_x: -+** and z0\.d, z0\.d, #0xff -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m256_u64_x, svuint64_t, -+ z0 = svbic_n_u64_x (p0, z0, -256), -+ z0 = svbic_x (p0, z0, -256)) -+ -+/* -+** bic_m257_u64_x: -+** and z0\.d, z0\.d, #0x100 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m257_u64_x, svuint64_t, -+ z0 = svbic_n_u64_x (p0, z0, -257), -+ z0 = svbic_x (p0, z0, -257)) -+ -+/* -+** bic_m512_u64_x: -+** and z0\.d, z0\.d, #0x1ff -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m512_u64_x, svuint64_t, -+ z0 = svbic_n_u64_x (p0, z0, -512), -+ z0 = svbic_x (p0, z0, -512)) -+ -+/* -+** bic_m32768_u64_x: -+** and z0\.d, z0\.d, #0x7fff -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m32768_u64_x, svuint64_t, -+ z0 = svbic_n_u64_x (p0, z0, -0x8000), -+ z0 = svbic_x (p0, z0, -0x8000)) -+ -+/* -+** bic_5_u64_x: -+** mov (z[0-9]+\.d), #-6 -+** and z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (bic_5_u64_x, svuint64_t, -+ z0 = svbic_n_u64_x (p0, z0, 5), -+ z0 = svbic_x (p0, z0, 5)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_u8.c -new file mode 100644 -index 000000000..80c489b9c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/bic_u8.c -@@ -0,0 +1,286 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** bic_u8_m_tied1: -+** bic z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (bic_u8_m_tied1, svuint8_t, -+ z0 = svbic_u8_m (p0, z0, z1), -+ z0 = svbic_m (p0, z0, z1)) -+ -+/* -+** bic_u8_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** bic z0\.b, p0/m, z0\.b, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (bic_u8_m_tied2, svuint8_t, -+ z0 = svbic_u8_m (p0, z1, z0), -+ z0 = svbic_m (p0, z1, z0)) -+ -+/* -+** bic_u8_m_untied: -+** movprfx z0, z1 -+** bic z0\.b, p0/m, z0\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (bic_u8_m_untied, svuint8_t, -+ z0 = svbic_u8_m (p0, z1, z2), -+ z0 = svbic_m (p0, z1, z2)) -+ -+/* -+** bic_w0_u8_m_tied1: -+** mov (z[0-9]+\.b), w0 -+** bic z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (bic_w0_u8_m_tied1, svuint8_t, uint8_t, -+ z0 = svbic_n_u8_m (p0, z0, x0), -+ z0 = svbic_m (p0, z0, x0)) -+ -+/* -+** bic_w0_u8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), w0 -+** movprfx z0, z1 -+** bic z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (bic_w0_u8_m_untied, svuint8_t, uint8_t, -+ z0 = svbic_n_u8_m (p0, z1, x0), -+ z0 = svbic_m (p0, z1, x0)) -+ -+/* -+** bic_1_u8_m_tied1: -+** mov (z[0-9]+\.b), #-2 -+** and z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_1_u8_m_tied1, svuint8_t, -+ z0 = svbic_n_u8_m (p0, z0, 1), -+ z0 = svbic_m (p0, z0, 1)) -+ -+/* -+** bic_1_u8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), #-2 -+** movprfx z0, z1 -+** and z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_1_u8_m_untied, svuint8_t, -+ z0 = svbic_n_u8_m (p0, z1, 1), -+ z0 = svbic_m (p0, z1, 1)) -+ -+/* -+** bic_m2_u8_m: -+** mov (z[0-9]+\.b), #1 -+** and z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m2_u8_m, svuint8_t, -+ z0 = svbic_n_u8_m (p0, z0, -2), -+ z0 = svbic_m (p0, z0, -2)) -+ -+/* -+** bic_u8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** bic z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (bic_u8_z_tied1, svuint8_t, -+ z0 = svbic_u8_z (p0, z0, z1), -+ z0 = svbic_z (p0, z0, z1)) -+ -+/* -+** bic_u8_z_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.b, p0/z, z1\.b -+** bic z0\.b, p0/m, z0\.b, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (bic_u8_z_tied2, svuint8_t, -+ z0 = svbic_u8_z (p0, z1, z0), -+ z0 = svbic_z (p0, z1, z0)) -+ -+/* -+** bic_u8_z_untied: -+** movprfx z0\.b, p0/z, z1\.b -+** bic z0\.b, p0/m, z0\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (bic_u8_z_untied, svuint8_t, -+ z0 = svbic_u8_z (p0, z1, z2), -+ z0 = svbic_z (p0, z1, z2)) -+ -+/* -+** bic_w0_u8_z_tied1: -+** mov (z[0-9]+\.b), w0 -+** movprfx z0\.b, p0/z, z0\.b -+** bic z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (bic_w0_u8_z_tied1, svuint8_t, uint8_t, -+ z0 = svbic_n_u8_z (p0, z0, x0), -+ z0 = svbic_z (p0, z0, x0)) -+ -+/* -+** bic_w0_u8_z_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), w0 -+** movprfx z0\.b, p0/z, z1\.b -+** bic z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (bic_w0_u8_z_untied, svuint8_t, uint8_t, -+ z0 = svbic_n_u8_z (p0, z1, x0), -+ z0 = svbic_z (p0, z1, x0)) -+ -+/* -+** bic_1_u8_z_tied1: -+** mov (z[0-9]+\.b), #-2 -+** movprfx z0\.b, p0/z, z0\.b -+** and z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_1_u8_z_tied1, svuint8_t, -+ z0 = svbic_n_u8_z (p0, z0, 1), -+ z0 = svbic_z (p0, z0, 1)) -+ -+/* -+** bic_1_u8_z_untied: -+** mov (z[0-9]+\.b), #-2 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** and z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** and z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (bic_1_u8_z_untied, svuint8_t, -+ z0 = svbic_n_u8_z (p0, z1, 1), -+ z0 = svbic_z (p0, z1, 1)) -+ -+/* -+** bic_u8_x_tied1: -+** bic z0\.d, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (bic_u8_x_tied1, svuint8_t, -+ z0 = svbic_u8_x (p0, z0, z1), -+ z0 = svbic_x (p0, z0, z1)) -+ -+/* -+** bic_u8_x_tied2: -+** bic z0\.d, z1\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (bic_u8_x_tied2, svuint8_t, -+ z0 = svbic_u8_x (p0, z1, z0), -+ z0 = svbic_x (p0, z1, z0)) -+ -+/* -+** bic_u8_x_untied: -+** bic z0\.d, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (bic_u8_x_untied, svuint8_t, -+ z0 = svbic_u8_x (p0, z1, z2), -+ z0 = svbic_x (p0, z1, z2)) -+ -+/* -+** bic_w0_u8_x_tied1: -+** mov (z[0-9]+)\.b, w0 -+** bic z0\.d, z0\.d, \1\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (bic_w0_u8_x_tied1, svuint8_t, uint8_t, -+ z0 = svbic_n_u8_x (p0, z0, x0), -+ z0 = svbic_x (p0, z0, x0)) -+ -+/* -+** bic_w0_u8_x_untied: -+** mov (z[0-9]+)\.b, w0 -+** bic z0\.d, z1\.d, \1\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (bic_w0_u8_x_untied, svuint8_t, uint8_t, -+ z0 = svbic_n_u8_x (p0, z1, x0), -+ z0 = svbic_x (p0, z1, x0)) -+ -+/* -+** bic_1_u8_x_tied1: -+** and z0\.b, z0\.b, #0xfe -+** ret -+*/ -+TEST_UNIFORM_Z (bic_1_u8_x_tied1, svuint8_t, -+ z0 = svbic_n_u8_x (p0, z0, 1), -+ z0 = svbic_x (p0, z0, 1)) -+ -+/* -+** bic_1_u8_x_untied: -+** movprfx z0, z1 -+** and z0\.b, z0\.b, #0xfe -+** ret -+*/ -+TEST_UNIFORM_Z (bic_1_u8_x_untied, svuint8_t, -+ z0 = svbic_n_u8_x (p0, z1, 1), -+ z0 = svbic_x (p0, z1, 1)) -+ -+/* -+** bic_127_u8_x: -+** and z0\.b, z0\.b, #0x80 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_127_u8_x, svuint8_t, -+ z0 = svbic_n_u8_x (p0, z0, 127), -+ z0 = svbic_x (p0, z0, 127)) -+ -+/* -+** bic_128_u8_x: -+** and z0\.b, z0\.b, #0x7f -+** ret -+*/ -+TEST_UNIFORM_Z (bic_128_u8_x, svuint8_t, -+ z0 = svbic_n_u8_x (p0, z0, 128), -+ z0 = svbic_x (p0, z0, 128)) -+ -+/* -+** bic_255_u8_x: -+** mov z0\.b, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (bic_255_u8_x, svuint8_t, -+ z0 = svbic_n_u8_x (p0, z0, 255), -+ z0 = svbic_x (p0, z0, 255)) -+ -+/* -+** bic_m127_u8_x: -+** and z0\.b, z0\.b, #0x7e -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m127_u8_x, svuint8_t, -+ z0 = svbic_n_u8_x (p0, z0, -127), -+ z0 = svbic_x (p0, z0, -127)) -+ -+/* -+** bic_m128_u8_x: -+** and z0\.b, z0\.b, #0x7f -+** ret -+*/ -+TEST_UNIFORM_Z (bic_m128_u8_x, svuint8_t, -+ z0 = svbic_n_u8_x (p0, z0, -128), -+ z0 = svbic_x (p0, z0, -128)) -+ -+/* -+** bic_5_u8_x: -+** mov (z[0-9]+)\.b, #-6 -+** and z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (bic_5_u8_x, svuint8_t, -+ z0 = svbic_n_u8_x (p0, z0, 5), -+ z0 = svbic_x (p0, z0, 5)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brka_b.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brka_b.c -new file mode 100644 -index 000000000..63426cf94 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brka_b.c -@@ -0,0 +1,54 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** brka_b_m_tied12: -+** brka p0\.b, p3/m, p0\.b -+** ret -+*/ -+TEST_UNIFORM_P (brka_b_m_tied12, -+ p0 = svbrka_b_m (p0, p3, p0), -+ p0 = svbrka_m (p0, p3, p0)) -+ -+/* -+** brka_b_m_tied1: -+** brka p0\.b, p3/m, p1\.b -+** ret -+*/ -+TEST_UNIFORM_P (brka_b_m_tied1, -+ p0 = svbrka_b_m (p0, p3, p1), -+ p0 = svbrka_m (p0, p3, p1)) -+ -+/* Bad RA choice: no preferred output sequence. */ -+TEST_UNIFORM_P (brka_b_m_tied2, -+ p0 = svbrka_b_m (p1, p3, p0), -+ p0 = svbrka_m (p1, p3, p0)) -+ -+/* -+** brka_b_m_untied: -+** mov p0\.b, p2\.b -+** brka p0\.b, p3/m, p1\.b -+** ret -+*/ -+TEST_UNIFORM_P (brka_b_m_untied, -+ p0 = svbrka_b_m (p2, p3, p1), -+ p0 = svbrka_m (p2, p3, p1)) -+ -+/* -+** brka_b_z_tied1: -+** brka p0\.b, p3/z, p0\.b -+** ret -+*/ -+TEST_UNIFORM_P (brka_b_z_tied1, -+ p0 = svbrka_b_z (p3, p0), -+ p0 = svbrka_z (p3, p0)) -+ -+/* -+** brka_b_z_untied: -+** brka p0\.b, p3/z, p1\.b -+** ret -+*/ -+TEST_UNIFORM_P (brka_b_z_untied, -+ p0 = svbrka_b_z (p3, p1), -+ p0 = svbrka_z (p3, p1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brkb_b.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brkb_b.c -new file mode 100644 -index 000000000..4f9a2c2d7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brkb_b.c -@@ -0,0 +1,54 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** brkb_b_m_tied12: -+** brkb p0\.b, p3/m, p0\.b -+** ret -+*/ -+TEST_UNIFORM_P (brkb_b_m_tied12, -+ p0 = svbrkb_b_m (p0, p3, p0), -+ p0 = svbrkb_m (p0, p3, p0)) -+ -+/* -+** brkb_b_m_tied1: -+** brkb p0\.b, p3/m, p1\.b -+** ret -+*/ -+TEST_UNIFORM_P (brkb_b_m_tied1, -+ p0 = svbrkb_b_m (p0, p3, p1), -+ p0 = svbrkb_m (p0, p3, p1)) -+ -+/* Bad RA choice: no preferred output sequence. */ -+TEST_UNIFORM_P (brkb_b_m_tied2, -+ p0 = svbrkb_b_m (p1, p3, p0), -+ p0 = svbrkb_m (p1, p3, p0)) -+ -+/* -+** brkb_b_m_untied: -+** mov p0\.b, p2\.b -+** brkb p0\.b, p3/m, p1\.b -+** ret -+*/ -+TEST_UNIFORM_P (brkb_b_m_untied, -+ p0 = svbrkb_b_m (p2, p3, p1), -+ p0 = svbrkb_m (p2, p3, p1)) -+ -+/* -+** brkb_b_z_tied1: -+** brkb p0\.b, p3/z, p0\.b -+** ret -+*/ -+TEST_UNIFORM_P (brkb_b_z_tied1, -+ p0 = svbrkb_b_z (p3, p0), -+ p0 = svbrkb_z (p3, p0)) -+ -+/* -+** brkb_b_z_untied: -+** brkb p0\.b, p3/z, p1\.b -+** ret -+*/ -+TEST_UNIFORM_P (brkb_b_z_untied, -+ p0 = svbrkb_b_z (p3, p1), -+ p0 = svbrkb_z (p3, p1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brkn_b.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brkn_b.c -new file mode 100644 -index 000000000..229a5fff9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brkn_b.c -@@ -0,0 +1,27 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* Bad RA choice: no preferred output sequence. */ -+TEST_UNIFORM_P (brkn_b_z_tied1, -+ p0 = svbrkn_b_z (p3, p0, p1), -+ p0 = svbrkn_z (p3, p0, p1)) -+ -+/* -+** brkn_b_z_tied2: -+** brkn p0\.b, p3/z, p1\.b, p0\.b -+** ret -+*/ -+TEST_UNIFORM_P (brkn_b_z_tied2, -+ p0 = svbrkn_b_z (p3, p1, p0), -+ p0 = svbrkn_z (p3, p1, p0)) -+ -+/* -+** brkn_b_z_untied: -+** mov p0\.b, p2\.b -+** brkn p0\.b, p3/z, p1\.b, p0\.b -+** ret -+*/ -+TEST_UNIFORM_P (brkn_b_z_untied, -+ p0 = svbrkn_b_z (p3, p1, p2), -+ p0 = svbrkn_z (p3, p1, p2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brkpa_b.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brkpa_b.c -new file mode 100644 -index 000000000..2c074e389 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brkpa_b.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** brkpa_b_z_tied1: -+** brkpa p0\.b, p3/z, p0\.b, p1\.b -+** ret -+*/ -+TEST_UNIFORM_P (brkpa_b_z_tied1, -+ p0 = svbrkpa_b_z (p3, p0, p1), -+ p0 = svbrkpa_z (p3, p0, p1)) -+ -+/* -+** brkpa_b_z_tied2: -+** brkpa p0\.b, p3/z, p1\.b, p0\.b -+** ret -+*/ -+TEST_UNIFORM_P (brkpa_b_z_tied2, -+ p0 = svbrkpa_b_z (p3, p1, p0), -+ p0 = svbrkpa_z (p3, p1, p0)) -+ -+/* -+** brkpa_b_z_untied: -+** brkpa p0\.b, p3/z, p1\.b, p2\.b -+** ret -+*/ -+TEST_UNIFORM_P (brkpa_b_z_untied, -+ p0 = svbrkpa_b_z (p3, p1, p2), -+ p0 = svbrkpa_z (p3, p1, p2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brkpb_b.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brkpb_b.c -new file mode 100644 -index 000000000..b41797ee1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/brkpb_b.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** brkpb_b_z_tied1: -+** brkpb p0\.b, p3/z, p0\.b, p1\.b -+** ret -+*/ -+TEST_UNIFORM_P (brkpb_b_z_tied1, -+ p0 = svbrkpb_b_z (p3, p0, p1), -+ p0 = svbrkpb_z (p3, p0, p1)) -+ -+/* -+** brkpb_b_z_tied2: -+** brkpb p0\.b, p3/z, p1\.b, p0\.b -+** ret -+*/ -+TEST_UNIFORM_P (brkpb_b_z_tied2, -+ p0 = svbrkpb_b_z (p3, p1, p0), -+ p0 = svbrkpb_z (p3, p1, p0)) -+ -+/* -+** brkpb_b_z_untied: -+** brkpb p0\.b, p3/z, p1\.b, p2\.b -+** ret -+*/ -+TEST_UNIFORM_P (brkpb_b_z_untied, -+ p0 = svbrkpb_b_z (p3, p1, p2), -+ p0 = svbrkpb_z (p3, p1, p2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cadd_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cadd_f16.c -new file mode 100644 -index 000000000..e89c78455 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cadd_f16.c -@@ -0,0 +1,251 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cadd_90_f16_m_tied1: -+** fcadd z0\.h, p0/m, z0\.h, z1\.h, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_90_f16_m_tied1, svfloat16_t, -+ z0 = svcadd_f16_m (p0, z0, z1, 90), -+ z0 = svcadd_m (p0, z0, z1, 90)) -+ -+/* -+** cadd_90_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcadd z0\.h, p0/m, z0\.h, \1\.h, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_90_f16_m_tied2, svfloat16_t, -+ z0 = svcadd_f16_m (p0, z1, z0, 90), -+ z0 = svcadd_m (p0, z1, z0, 90)) -+ -+/* -+** cadd_90_f16_m_untied: -+** movprfx z0, z1 -+** fcadd z0\.h, p0/m, z0\.h, z2\.h, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_90_f16_m_untied, svfloat16_t, -+ z0 = svcadd_f16_m (p0, z1, z2, 90), -+ z0 = svcadd_m (p0, z1, z2, 90)) -+ -+/* -+** cadd_270_f16_m_tied1: -+** fcadd z0\.h, p0/m, z0\.h, z1\.h, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_270_f16_m_tied1, svfloat16_t, -+ z0 = svcadd_f16_m (p0, z0, z1, 270), -+ z0 = svcadd_m (p0, z0, z1, 270)) -+ -+/* -+** cadd_270_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcadd z0\.h, p0/m, z0\.h, \1\.h, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_270_f16_m_tied2, svfloat16_t, -+ z0 = svcadd_f16_m (p0, z1, z0, 270), -+ z0 = svcadd_m (p0, z1, z0, 270)) -+ -+/* -+** cadd_270_f16_m_untied: -+** movprfx z0, z1 -+** fcadd z0\.h, p0/m, z0\.h, z2\.h, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_270_f16_m_untied, svfloat16_t, -+ z0 = svcadd_f16_m (p0, z1, z2, 270), -+ z0 = svcadd_m (p0, z1, z2, 270)) -+ -+/* -+** cadd_90_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fcadd z0\.h, p0/m, z0\.h, z1\.h, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_90_f16_z_tied1, svfloat16_t, -+ z0 = svcadd_f16_z (p0, z0, z1, 90), -+ z0 = svcadd_z (p0, z0, z1, 90)) -+ -+/* -+** cadd_90_f16_z_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.h, p0/z, z1\.h -+** fcadd z0\.h, p0/m, z0\.h, \1\.h, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_90_f16_z_tied2, svfloat16_t, -+ z0 = svcadd_f16_z (p0, z1, z0, 90), -+ z0 = svcadd_z (p0, z1, z0, 90)) -+ -+/* -+** cadd_90_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** fcadd z0\.h, p0/m, z0\.h, z2\.h, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_90_f16_z_untied, svfloat16_t, -+ z0 = svcadd_f16_z (p0, z1, z2, 90), -+ z0 = svcadd_z (p0, z1, z2, 90)) -+ -+/* -+** cadd_270_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fcadd z0\.h, p0/m, z0\.h, z1\.h, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_270_f16_z_tied1, svfloat16_t, -+ z0 = svcadd_f16_z (p0, z0, z1, 270), -+ z0 = svcadd_z (p0, z0, z1, 270)) -+ -+/* -+** cadd_270_f16_z_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.h, p0/z, z1\.h -+** fcadd z0\.h, p0/m, z0\.h, \1\.h, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_270_f16_z_tied2, svfloat16_t, -+ z0 = svcadd_f16_z (p0, z1, z0, 270), -+ z0 = svcadd_z (p0, z1, z0, 270)) -+ -+/* -+** cadd_270_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** fcadd z0\.h, p0/m, z0\.h, z2\.h, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_270_f16_z_untied, svfloat16_t, -+ z0 = svcadd_f16_z (p0, z1, z2, 270), -+ z0 = svcadd_z (p0, z1, z2, 270)) -+ -+/* -+** cadd_90_f16_x_tied1: -+** fcadd z0\.h, p0/m, z0\.h, z1\.h, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_90_f16_x_tied1, svfloat16_t, -+ z0 = svcadd_f16_x (p0, z0, z1, 90), -+ z0 = svcadd_x (p0, z0, z1, 90)) -+ -+/* -+** cadd_90_f16_x_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcadd z0\.h, p0/m, z0\.h, \1\.h, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_90_f16_x_tied2, svfloat16_t, -+ z0 = svcadd_f16_x (p0, z1, z0, 90), -+ z0 = svcadd_x (p0, z1, z0, 90)) -+ -+/* -+** cadd_90_f16_x_untied: -+** movprfx z0, z1 -+** fcadd z0\.h, p0/m, z0\.h, z2\.h, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_90_f16_x_untied, svfloat16_t, -+ z0 = svcadd_f16_x (p0, z1, z2, 90), -+ z0 = svcadd_x (p0, z1, z2, 90)) -+ -+/* -+** cadd_270_f16_x_tied1: -+** fcadd z0\.h, p0/m, z0\.h, z1\.h, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_270_f16_x_tied1, svfloat16_t, -+ z0 = svcadd_f16_x (p0, z0, z1, 270), -+ z0 = svcadd_x (p0, z0, z1, 270)) -+ -+/* -+** cadd_270_f16_x_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcadd z0\.h, p0/m, z0\.h, \1\.h, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_270_f16_x_tied2, svfloat16_t, -+ z0 = svcadd_f16_x (p0, z1, z0, 270), -+ z0 = svcadd_x (p0, z1, z0, 270)) -+ -+/* -+** cadd_270_f16_x_untied: -+** movprfx z0, z1 -+** fcadd z0\.h, p0/m, z0\.h, z2\.h, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_270_f16_x_untied, svfloat16_t, -+ z0 = svcadd_f16_x (p0, z1, z2, 270), -+ z0 = svcadd_x (p0, z1, z2, 270)) -+ -+/* -+** ptrue_cadd_90_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cadd_90_f16_x_tied1, svfloat16_t, -+ z0 = svcadd_f16_x (svptrue_b16 (), z0, z1, 90), -+ z0 = svcadd_x (svptrue_b16 (), z0, z1, 90)) -+ -+/* -+** ptrue_cadd_90_f16_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cadd_90_f16_x_tied2, svfloat16_t, -+ z0 = svcadd_f16_x (svptrue_b16 (), z1, z0, 90), -+ z0 = svcadd_x (svptrue_b16 (), z1, z0, 90)) -+ -+/* -+** ptrue_cadd_90_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cadd_90_f16_x_untied, svfloat16_t, -+ z0 = svcadd_f16_x (svptrue_b16 (), z1, z2, 90), -+ z0 = svcadd_x (svptrue_b16 (), z1, z2, 90)) -+ -+/* -+** ptrue_cadd_270_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cadd_270_f16_x_tied1, svfloat16_t, -+ z0 = svcadd_f16_x (svptrue_b16 (), z0, z1, 270), -+ z0 = svcadd_x (svptrue_b16 (), z0, z1, 270)) -+ -+/* -+** ptrue_cadd_270_f16_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cadd_270_f16_x_tied2, svfloat16_t, -+ z0 = svcadd_f16_x (svptrue_b16 (), z1, z0, 270), -+ z0 = svcadd_x (svptrue_b16 (), z1, z0, 270)) -+ -+/* -+** ptrue_cadd_270_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cadd_270_f16_x_untied, svfloat16_t, -+ z0 = svcadd_f16_x (svptrue_b16 (), z1, z2, 270), -+ z0 = svcadd_x (svptrue_b16 (), z1, z2, 270)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cadd_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cadd_f32.c -new file mode 100644 -index 000000000..ed5c16ff3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cadd_f32.c -@@ -0,0 +1,251 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cadd_90_f32_m_tied1: -+** fcadd z0\.s, p0/m, z0\.s, z1\.s, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_90_f32_m_tied1, svfloat32_t, -+ z0 = svcadd_f32_m (p0, z0, z1, 90), -+ z0 = svcadd_m (p0, z0, z1, 90)) -+ -+/* -+** cadd_90_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcadd z0\.s, p0/m, z0\.s, \1\.s, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_90_f32_m_tied2, svfloat32_t, -+ z0 = svcadd_f32_m (p0, z1, z0, 90), -+ z0 = svcadd_m (p0, z1, z0, 90)) -+ -+/* -+** cadd_90_f32_m_untied: -+** movprfx z0, z1 -+** fcadd z0\.s, p0/m, z0\.s, z2\.s, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_90_f32_m_untied, svfloat32_t, -+ z0 = svcadd_f32_m (p0, z1, z2, 90), -+ z0 = svcadd_m (p0, z1, z2, 90)) -+ -+/* -+** cadd_270_f32_m_tied1: -+** fcadd z0\.s, p0/m, z0\.s, z1\.s, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_270_f32_m_tied1, svfloat32_t, -+ z0 = svcadd_f32_m (p0, z0, z1, 270), -+ z0 = svcadd_m (p0, z0, z1, 270)) -+ -+/* -+** cadd_270_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcadd z0\.s, p0/m, z0\.s, \1\.s, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_270_f32_m_tied2, svfloat32_t, -+ z0 = svcadd_f32_m (p0, z1, z0, 270), -+ z0 = svcadd_m (p0, z1, z0, 270)) -+ -+/* -+** cadd_270_f32_m_untied: -+** movprfx z0, z1 -+** fcadd z0\.s, p0/m, z0\.s, z2\.s, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_270_f32_m_untied, svfloat32_t, -+ z0 = svcadd_f32_m (p0, z1, z2, 270), -+ z0 = svcadd_m (p0, z1, z2, 270)) -+ -+/* -+** cadd_90_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fcadd z0\.s, p0/m, z0\.s, z1\.s, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_90_f32_z_tied1, svfloat32_t, -+ z0 = svcadd_f32_z (p0, z0, z1, 90), -+ z0 = svcadd_z (p0, z0, z1, 90)) -+ -+/* -+** cadd_90_f32_z_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, z1\.s -+** fcadd z0\.s, p0/m, z0\.s, \1\.s, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_90_f32_z_tied2, svfloat32_t, -+ z0 = svcadd_f32_z (p0, z1, z0, 90), -+ z0 = svcadd_z (p0, z1, z0, 90)) -+ -+/* -+** cadd_90_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** fcadd z0\.s, p0/m, z0\.s, z2\.s, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_90_f32_z_untied, svfloat32_t, -+ z0 = svcadd_f32_z (p0, z1, z2, 90), -+ z0 = svcadd_z (p0, z1, z2, 90)) -+ -+/* -+** cadd_270_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fcadd z0\.s, p0/m, z0\.s, z1\.s, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_270_f32_z_tied1, svfloat32_t, -+ z0 = svcadd_f32_z (p0, z0, z1, 270), -+ z0 = svcadd_z (p0, z0, z1, 270)) -+ -+/* -+** cadd_270_f32_z_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, z1\.s -+** fcadd z0\.s, p0/m, z0\.s, \1\.s, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_270_f32_z_tied2, svfloat32_t, -+ z0 = svcadd_f32_z (p0, z1, z0, 270), -+ z0 = svcadd_z (p0, z1, z0, 270)) -+ -+/* -+** cadd_270_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** fcadd z0\.s, p0/m, z0\.s, z2\.s, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_270_f32_z_untied, svfloat32_t, -+ z0 = svcadd_f32_z (p0, z1, z2, 270), -+ z0 = svcadd_z (p0, z1, z2, 270)) -+ -+/* -+** cadd_90_f32_x_tied1: -+** fcadd z0\.s, p0/m, z0\.s, z1\.s, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_90_f32_x_tied1, svfloat32_t, -+ z0 = svcadd_f32_x (p0, z0, z1, 90), -+ z0 = svcadd_x (p0, z0, z1, 90)) -+ -+/* -+** cadd_90_f32_x_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcadd z0\.s, p0/m, z0\.s, \1\.s, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_90_f32_x_tied2, svfloat32_t, -+ z0 = svcadd_f32_x (p0, z1, z0, 90), -+ z0 = svcadd_x (p0, z1, z0, 90)) -+ -+/* -+** cadd_90_f32_x_untied: -+** movprfx z0, z1 -+** fcadd z0\.s, p0/m, z0\.s, z2\.s, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_90_f32_x_untied, svfloat32_t, -+ z0 = svcadd_f32_x (p0, z1, z2, 90), -+ z0 = svcadd_x (p0, z1, z2, 90)) -+ -+/* -+** cadd_270_f32_x_tied1: -+** fcadd z0\.s, p0/m, z0\.s, z1\.s, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_270_f32_x_tied1, svfloat32_t, -+ z0 = svcadd_f32_x (p0, z0, z1, 270), -+ z0 = svcadd_x (p0, z0, z1, 270)) -+ -+/* -+** cadd_270_f32_x_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcadd z0\.s, p0/m, z0\.s, \1\.s, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_270_f32_x_tied2, svfloat32_t, -+ z0 = svcadd_f32_x (p0, z1, z0, 270), -+ z0 = svcadd_x (p0, z1, z0, 270)) -+ -+/* -+** cadd_270_f32_x_untied: -+** movprfx z0, z1 -+** fcadd z0\.s, p0/m, z0\.s, z2\.s, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_270_f32_x_untied, svfloat32_t, -+ z0 = svcadd_f32_x (p0, z1, z2, 270), -+ z0 = svcadd_x (p0, z1, z2, 270)) -+ -+/* -+** ptrue_cadd_90_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cadd_90_f32_x_tied1, svfloat32_t, -+ z0 = svcadd_f32_x (svptrue_b32 (), z0, z1, 90), -+ z0 = svcadd_x (svptrue_b32 (), z0, z1, 90)) -+ -+/* -+** ptrue_cadd_90_f32_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cadd_90_f32_x_tied2, svfloat32_t, -+ z0 = svcadd_f32_x (svptrue_b32 (), z1, z0, 90), -+ z0 = svcadd_x (svptrue_b32 (), z1, z0, 90)) -+ -+/* -+** ptrue_cadd_90_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cadd_90_f32_x_untied, svfloat32_t, -+ z0 = svcadd_f32_x (svptrue_b32 (), z1, z2, 90), -+ z0 = svcadd_x (svptrue_b32 (), z1, z2, 90)) -+ -+/* -+** ptrue_cadd_270_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cadd_270_f32_x_tied1, svfloat32_t, -+ z0 = svcadd_f32_x (svptrue_b32 (), z0, z1, 270), -+ z0 = svcadd_x (svptrue_b32 (), z0, z1, 270)) -+ -+/* -+** ptrue_cadd_270_f32_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cadd_270_f32_x_tied2, svfloat32_t, -+ z0 = svcadd_f32_x (svptrue_b32 (), z1, z0, 270), -+ z0 = svcadd_x (svptrue_b32 (), z1, z0, 270)) -+ -+/* -+** ptrue_cadd_270_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cadd_270_f32_x_untied, svfloat32_t, -+ z0 = svcadd_f32_x (svptrue_b32 (), z1, z2, 270), -+ z0 = svcadd_x (svptrue_b32 (), z1, z2, 270)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cadd_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cadd_f64.c -new file mode 100644 -index 000000000..0ada881c5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cadd_f64.c -@@ -0,0 +1,251 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cadd_90_f64_m_tied1: -+** fcadd z0\.d, p0/m, z0\.d, z1\.d, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_90_f64_m_tied1, svfloat64_t, -+ z0 = svcadd_f64_m (p0, z0, z1, 90), -+ z0 = svcadd_m (p0, z0, z1, 90)) -+ -+/* -+** cadd_90_f64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fcadd z0\.d, p0/m, z0\.d, \1, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_90_f64_m_tied2, svfloat64_t, -+ z0 = svcadd_f64_m (p0, z1, z0, 90), -+ z0 = svcadd_m (p0, z1, z0, 90)) -+ -+/* -+** cadd_90_f64_m_untied: -+** movprfx z0, z1 -+** fcadd z0\.d, p0/m, z0\.d, z2\.d, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_90_f64_m_untied, svfloat64_t, -+ z0 = svcadd_f64_m (p0, z1, z2, 90), -+ z0 = svcadd_m (p0, z1, z2, 90)) -+ -+/* -+** cadd_270_f64_m_tied1: -+** fcadd z0\.d, p0/m, z0\.d, z1\.d, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_270_f64_m_tied1, svfloat64_t, -+ z0 = svcadd_f64_m (p0, z0, z1, 270), -+ z0 = svcadd_m (p0, z0, z1, 270)) -+ -+/* -+** cadd_270_f64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fcadd z0\.d, p0/m, z0\.d, \1, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_270_f64_m_tied2, svfloat64_t, -+ z0 = svcadd_f64_m (p0, z1, z0, 270), -+ z0 = svcadd_m (p0, z1, z0, 270)) -+ -+/* -+** cadd_270_f64_m_untied: -+** movprfx z0, z1 -+** fcadd z0\.d, p0/m, z0\.d, z2\.d, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_270_f64_m_untied, svfloat64_t, -+ z0 = svcadd_f64_m (p0, z1, z2, 270), -+ z0 = svcadd_m (p0, z1, z2, 270)) -+ -+/* -+** cadd_90_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fcadd z0\.d, p0/m, z0\.d, z1\.d, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_90_f64_z_tied1, svfloat64_t, -+ z0 = svcadd_f64_z (p0, z0, z1, 90), -+ z0 = svcadd_z (p0, z0, z1, 90)) -+ -+/* -+** cadd_90_f64_z_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, z1\.d -+** fcadd z0\.d, p0/m, z0\.d, \1, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_90_f64_z_tied2, svfloat64_t, -+ z0 = svcadd_f64_z (p0, z1, z0, 90), -+ z0 = svcadd_z (p0, z1, z0, 90)) -+ -+/* -+** cadd_90_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** fcadd z0\.d, p0/m, z0\.d, z2\.d, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_90_f64_z_untied, svfloat64_t, -+ z0 = svcadd_f64_z (p0, z1, z2, 90), -+ z0 = svcadd_z (p0, z1, z2, 90)) -+ -+/* -+** cadd_270_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fcadd z0\.d, p0/m, z0\.d, z1\.d, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_270_f64_z_tied1, svfloat64_t, -+ z0 = svcadd_f64_z (p0, z0, z1, 270), -+ z0 = svcadd_z (p0, z0, z1, 270)) -+ -+/* -+** cadd_270_f64_z_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, z1\.d -+** fcadd z0\.d, p0/m, z0\.d, \1, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_270_f64_z_tied2, svfloat64_t, -+ z0 = svcadd_f64_z (p0, z1, z0, 270), -+ z0 = svcadd_z (p0, z1, z0, 270)) -+ -+/* -+** cadd_270_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** fcadd z0\.d, p0/m, z0\.d, z2\.d, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_270_f64_z_untied, svfloat64_t, -+ z0 = svcadd_f64_z (p0, z1, z2, 270), -+ z0 = svcadd_z (p0, z1, z2, 270)) -+ -+/* -+** cadd_90_f64_x_tied1: -+** fcadd z0\.d, p0/m, z0\.d, z1\.d, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_90_f64_x_tied1, svfloat64_t, -+ z0 = svcadd_f64_x (p0, z0, z1, 90), -+ z0 = svcadd_x (p0, z0, z1, 90)) -+ -+/* -+** cadd_90_f64_x_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fcadd z0\.d, p0/m, z0\.d, \1, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_90_f64_x_tied2, svfloat64_t, -+ z0 = svcadd_f64_x (p0, z1, z0, 90), -+ z0 = svcadd_x (p0, z1, z0, 90)) -+ -+/* -+** cadd_90_f64_x_untied: -+** movprfx z0, z1 -+** fcadd z0\.d, p0/m, z0\.d, z2\.d, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_90_f64_x_untied, svfloat64_t, -+ z0 = svcadd_f64_x (p0, z1, z2, 90), -+ z0 = svcadd_x (p0, z1, z2, 90)) -+ -+/* -+** cadd_270_f64_x_tied1: -+** fcadd z0\.d, p0/m, z0\.d, z1\.d, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_270_f64_x_tied1, svfloat64_t, -+ z0 = svcadd_f64_x (p0, z0, z1, 270), -+ z0 = svcadd_x (p0, z0, z1, 270)) -+ -+/* -+** cadd_270_f64_x_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fcadd z0\.d, p0/m, z0\.d, \1, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_270_f64_x_tied2, svfloat64_t, -+ z0 = svcadd_f64_x (p0, z1, z0, 270), -+ z0 = svcadd_x (p0, z1, z0, 270)) -+ -+/* -+** cadd_270_f64_x_untied: -+** movprfx z0, z1 -+** fcadd z0\.d, p0/m, z0\.d, z2\.d, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cadd_270_f64_x_untied, svfloat64_t, -+ z0 = svcadd_f64_x (p0, z1, z2, 270), -+ z0 = svcadd_x (p0, z1, z2, 270)) -+ -+/* -+** ptrue_cadd_90_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cadd_90_f64_x_tied1, svfloat64_t, -+ z0 = svcadd_f64_x (svptrue_b64 (), z0, z1, 90), -+ z0 = svcadd_x (svptrue_b64 (), z0, z1, 90)) -+ -+/* -+** ptrue_cadd_90_f64_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cadd_90_f64_x_tied2, svfloat64_t, -+ z0 = svcadd_f64_x (svptrue_b64 (), z1, z0, 90), -+ z0 = svcadd_x (svptrue_b64 (), z1, z0, 90)) -+ -+/* -+** ptrue_cadd_90_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cadd_90_f64_x_untied, svfloat64_t, -+ z0 = svcadd_f64_x (svptrue_b64 (), z1, z2, 90), -+ z0 = svcadd_x (svptrue_b64 (), z1, z2, 90)) -+ -+/* -+** ptrue_cadd_270_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cadd_270_f64_x_tied1, svfloat64_t, -+ z0 = svcadd_f64_x (svptrue_b64 (), z0, z1, 270), -+ z0 = svcadd_x (svptrue_b64 (), z0, z1, 270)) -+ -+/* -+** ptrue_cadd_270_f64_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cadd_270_f64_x_tied2, svfloat64_t, -+ z0 = svcadd_f64_x (svptrue_b64 (), z1, z0, 270), -+ z0 = svcadd_x (svptrue_b64 (), z1, z0, 270)) -+ -+/* -+** ptrue_cadd_270_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cadd_270_f64_x_untied, svfloat64_t, -+ z0 = svcadd_f64_x (svptrue_b64 (), z1, z2, 270), -+ z0 = svcadd_x (svptrue_b64 (), z1, z2, 270)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_bf16.c -new file mode 100644 -index 000000000..a15e34400 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_bf16.c -@@ -0,0 +1,52 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** clasta_bf16_tied1: -+** clasta z0\.h, p0, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (clasta_bf16_tied1, svbfloat16_t, -+ z0 = svclasta_bf16 (p0, z0, z1), -+ z0 = svclasta (p0, z0, z1)) -+ -+/* -+** clasta_bf16_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** clasta z0\.h, p0, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (clasta_bf16_tied2, svbfloat16_t, -+ z0 = svclasta_bf16 (p0, z1, z0), -+ z0 = svclasta (p0, z1, z0)) -+ -+/* -+** clasta_bf16_untied: -+** movprfx z0, z1 -+** clasta z0\.h, p0, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (clasta_bf16_untied, svbfloat16_t, -+ z0 = svclasta_bf16 (p0, z1, z2), -+ z0 = svclasta (p0, z1, z2)) -+ -+/* -+** clasta_d0_bf16: -+** clasta h0, p0, h0, z2\.h -+** ret -+*/ -+TEST_FOLD_LEFT_D (clasta_d0_bf16, bfloat16_t, svbfloat16_t, -+ d0 = svclasta_n_bf16 (p0, d0, z2), -+ d0 = svclasta (p0, d0, z2)) -+ -+/* -+** clasta_d1_bf16: -+** mov v0\.h\[0\], v1\.h\[0\] -+** clasta h0, p0, h0, z2\.h -+** ret -+*/ -+TEST_FOLD_LEFT_D (clasta_d1_bf16, bfloat16_t, svbfloat16_t, -+ d0 = svclasta_n_bf16 (p0, d1, z2), -+ d0 = svclasta (p0, d1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_f16.c -new file mode 100644 -index 000000000..d9a980f60 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_f16.c -@@ -0,0 +1,52 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** clasta_f16_tied1: -+** clasta z0\.h, p0, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (clasta_f16_tied1, svfloat16_t, -+ z0 = svclasta_f16 (p0, z0, z1), -+ z0 = svclasta (p0, z0, z1)) -+ -+/* -+** clasta_f16_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** clasta z0\.h, p0, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (clasta_f16_tied2, svfloat16_t, -+ z0 = svclasta_f16 (p0, z1, z0), -+ z0 = svclasta (p0, z1, z0)) -+ -+/* -+** clasta_f16_untied: -+** movprfx z0, z1 -+** clasta z0\.h, p0, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (clasta_f16_untied, svfloat16_t, -+ z0 = svclasta_f16 (p0, z1, z2), -+ z0 = svclasta (p0, z1, z2)) -+ -+/* -+** clasta_d0_f16: -+** clasta h0, p0, h0, z2\.h -+** ret -+*/ -+TEST_FOLD_LEFT_D (clasta_d0_f16, float16_t, svfloat16_t, -+ d0 = svclasta_n_f16 (p0, d0, z2), -+ d0 = svclasta (p0, d0, z2)) -+ -+/* -+** clasta_d1_f16: -+** mov v0\.h\[0\], v1\.h\[0\] -+** clasta h0, p0, h0, z2\.h -+** ret -+*/ -+TEST_FOLD_LEFT_D (clasta_d1_f16, float16_t, svfloat16_t, -+ d0 = svclasta_n_f16 (p0, d1, z2), -+ d0 = svclasta (p0, d1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_f32.c -new file mode 100644 -index 000000000..cac01fa6d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_f32.c -@@ -0,0 +1,52 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** clasta_f32_tied1: -+** clasta z0\.s, p0, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (clasta_f32_tied1, svfloat32_t, -+ z0 = svclasta_f32 (p0, z0, z1), -+ z0 = svclasta (p0, z0, z1)) -+ -+/* -+** clasta_f32_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** clasta z0\.s, p0, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (clasta_f32_tied2, svfloat32_t, -+ z0 = svclasta_f32 (p0, z1, z0), -+ z0 = svclasta (p0, z1, z0)) -+ -+/* -+** clasta_f32_untied: -+** movprfx z0, z1 -+** clasta z0\.s, p0, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (clasta_f32_untied, svfloat32_t, -+ z0 = svclasta_f32 (p0, z1, z2), -+ z0 = svclasta (p0, z1, z2)) -+ -+/* -+** clasta_d0_f32: -+** clasta s0, p0, s0, z2\.s -+** ret -+*/ -+TEST_FOLD_LEFT_D (clasta_d0_f32, float32_t, svfloat32_t, -+ d0 = svclasta_n_f32 (p0, d0, z2), -+ d0 = svclasta (p0, d0, z2)) -+ -+/* -+** clasta_d1_f32: -+** fmov s0, s1 -+** clasta s0, p0, s0, z2\.s -+** ret -+*/ -+TEST_FOLD_LEFT_D (clasta_d1_f32, float32_t, svfloat32_t, -+ d0 = svclasta_n_f32 (p0, d1, z2), -+ d0 = svclasta (p0, d1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_f64.c -new file mode 100644 -index 000000000..43b93553b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_f64.c -@@ -0,0 +1,52 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** clasta_f64_tied1: -+** clasta z0\.d, p0, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (clasta_f64_tied1, svfloat64_t, -+ z0 = svclasta_f64 (p0, z0, z1), -+ z0 = svclasta (p0, z0, z1)) -+ -+/* -+** clasta_f64_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** clasta z0\.d, p0, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (clasta_f64_tied2, svfloat64_t, -+ z0 = svclasta_f64 (p0, z1, z0), -+ z0 = svclasta (p0, z1, z0)) -+ -+/* -+** clasta_f64_untied: -+** movprfx z0, z1 -+** clasta z0\.d, p0, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (clasta_f64_untied, svfloat64_t, -+ z0 = svclasta_f64 (p0, z1, z2), -+ z0 = svclasta (p0, z1, z2)) -+ -+/* -+** clasta_d0_f64: -+** clasta d0, p0, d0, z2\.d -+** ret -+*/ -+TEST_FOLD_LEFT_D (clasta_d0_f64, float64_t, svfloat64_t, -+ d0 = svclasta_n_f64 (p0, d0, z2), -+ d0 = svclasta (p0, d0, z2)) -+ -+/* -+** clasta_d1_f64: -+** fmov d0, d1 -+** clasta d0, p0, d0, z2\.d -+** ret -+*/ -+TEST_FOLD_LEFT_D (clasta_d1_f64, float64_t, svfloat64_t, -+ d0 = svclasta_n_f64 (p0, d1, z2), -+ d0 = svclasta (p0, d1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_s16.c -new file mode 100644 -index 000000000..f5e4f85ce ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_s16.c -@@ -0,0 +1,52 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** clasta_s16_tied1: -+** clasta z0\.h, p0, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (clasta_s16_tied1, svint16_t, -+ z0 = svclasta_s16 (p0, z0, z1), -+ z0 = svclasta (p0, z0, z1)) -+ -+/* -+** clasta_s16_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** clasta z0\.h, p0, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (clasta_s16_tied2, svint16_t, -+ z0 = svclasta_s16 (p0, z1, z0), -+ z0 = svclasta (p0, z1, z0)) -+ -+/* -+** clasta_s16_untied: -+** movprfx z0, z1 -+** clasta z0\.h, p0, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (clasta_s16_untied, svint16_t, -+ z0 = svclasta_s16 (p0, z1, z2), -+ z0 = svclasta (p0, z1, z2)) -+ -+/* -+** clasta_x0_s16: -+** clasta w0, p0, w0, z0\.h -+** ret -+*/ -+TEST_FOLD_LEFT_X (clasta_x0_s16, int16_t, svint16_t, -+ x0 = svclasta_n_s16 (p0, x0, z0), -+ x0 = svclasta (p0, x0, z0)) -+ -+/* -+** clasta_x1_s16: -+** mov w0, w1 -+** clasta w0, p0, w0, z0\.h -+** ret -+*/ -+TEST_FOLD_LEFT_X (clasta_x1_s16, int16_t, svint16_t, -+ x0 = svclasta_n_s16 (p0, x1, z0), -+ x0 = svclasta (p0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_s32.c -new file mode 100644 -index 000000000..fbd82e778 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_s32.c -@@ -0,0 +1,52 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** clasta_s32_tied1: -+** clasta z0\.s, p0, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (clasta_s32_tied1, svint32_t, -+ z0 = svclasta_s32 (p0, z0, z1), -+ z0 = svclasta (p0, z0, z1)) -+ -+/* -+** clasta_s32_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** clasta z0\.s, p0, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (clasta_s32_tied2, svint32_t, -+ z0 = svclasta_s32 (p0, z1, z0), -+ z0 = svclasta (p0, z1, z0)) -+ -+/* -+** clasta_s32_untied: -+** movprfx z0, z1 -+** clasta z0\.s, p0, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (clasta_s32_untied, svint32_t, -+ z0 = svclasta_s32 (p0, z1, z2), -+ z0 = svclasta (p0, z1, z2)) -+ -+/* -+** clasta_x0_s32: -+** clasta w0, p0, w0, z0\.s -+** ret -+*/ -+TEST_FOLD_LEFT_X (clasta_x0_s32, int32_t, svint32_t, -+ x0 = svclasta_n_s32 (p0, x0, z0), -+ x0 = svclasta (p0, x0, z0)) -+ -+/* -+** clasta_x1_s32: -+** mov w0, w1 -+** clasta w0, p0, w0, z0\.s -+** ret -+*/ -+TEST_FOLD_LEFT_X (clasta_x1_s32, int32_t, svint32_t, -+ x0 = svclasta_n_s32 (p0, x1, z0), -+ x0 = svclasta (p0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_s64.c -new file mode 100644 -index 000000000..08edf157b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_s64.c -@@ -0,0 +1,52 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** clasta_s64_tied1: -+** clasta z0\.d, p0, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (clasta_s64_tied1, svint64_t, -+ z0 = svclasta_s64 (p0, z0, z1), -+ z0 = svclasta (p0, z0, z1)) -+ -+/* -+** clasta_s64_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** clasta z0\.d, p0, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (clasta_s64_tied2, svint64_t, -+ z0 = svclasta_s64 (p0, z1, z0), -+ z0 = svclasta (p0, z1, z0)) -+ -+/* -+** clasta_s64_untied: -+** movprfx z0, z1 -+** clasta z0\.d, p0, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (clasta_s64_untied, svint64_t, -+ z0 = svclasta_s64 (p0, z1, z2), -+ z0 = svclasta (p0, z1, z2)) -+ -+/* -+** clasta_x0_s64: -+** clasta x0, p0, x0, z0\.d -+** ret -+*/ -+TEST_FOLD_LEFT_X (clasta_x0_s64, int64_t, svint64_t, -+ x0 = svclasta_n_s64 (p0, x0, z0), -+ x0 = svclasta (p0, x0, z0)) -+ -+/* -+** clasta_x1_s64: -+** mov x0, x1 -+** clasta x0, p0, x0, z0\.d -+** ret -+*/ -+TEST_FOLD_LEFT_X (clasta_x1_s64, int64_t, svint64_t, -+ x0 = svclasta_n_s64 (p0, x1, z0), -+ x0 = svclasta (p0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_s8.c -new file mode 100644 -index 000000000..286f16a9d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_s8.c -@@ -0,0 +1,52 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** clasta_s8_tied1: -+** clasta z0\.b, p0, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (clasta_s8_tied1, svint8_t, -+ z0 = svclasta_s8 (p0, z0, z1), -+ z0 = svclasta (p0, z0, z1)) -+ -+/* -+** clasta_s8_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** clasta z0\.b, p0, z0\.b, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (clasta_s8_tied2, svint8_t, -+ z0 = svclasta_s8 (p0, z1, z0), -+ z0 = svclasta (p0, z1, z0)) -+ -+/* -+** clasta_s8_untied: -+** movprfx z0, z1 -+** clasta z0\.b, p0, z0\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (clasta_s8_untied, svint8_t, -+ z0 = svclasta_s8 (p0, z1, z2), -+ z0 = svclasta (p0, z1, z2)) -+ -+/* -+** clasta_x0_s8: -+** clasta w0, p0, w0, z0\.b -+** ret -+*/ -+TEST_FOLD_LEFT_X (clasta_x0_s8, int8_t, svint8_t, -+ x0 = svclasta_n_s8 (p0, x0, z0), -+ x0 = svclasta (p0, x0, z0)) -+ -+/* -+** clasta_x1_s8: -+** mov w0, w1 -+** clasta w0, p0, w0, z0\.b -+** ret -+*/ -+TEST_FOLD_LEFT_X (clasta_x1_s8, int8_t, svint8_t, -+ x0 = svclasta_n_s8 (p0, x1, z0), -+ x0 = svclasta (p0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_u16.c -new file mode 100644 -index 000000000..40c6dca90 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_u16.c -@@ -0,0 +1,52 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** clasta_u16_tied1: -+** clasta z0\.h, p0, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (clasta_u16_tied1, svuint16_t, -+ z0 = svclasta_u16 (p0, z0, z1), -+ z0 = svclasta (p0, z0, z1)) -+ -+/* -+** clasta_u16_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** clasta z0\.h, p0, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (clasta_u16_tied2, svuint16_t, -+ z0 = svclasta_u16 (p0, z1, z0), -+ z0 = svclasta (p0, z1, z0)) -+ -+/* -+** clasta_u16_untied: -+** movprfx z0, z1 -+** clasta z0\.h, p0, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (clasta_u16_untied, svuint16_t, -+ z0 = svclasta_u16 (p0, z1, z2), -+ z0 = svclasta (p0, z1, z2)) -+ -+/* -+** clasta_x0_u16: -+** clasta w0, p0, w0, z0\.h -+** ret -+*/ -+TEST_FOLD_LEFT_X (clasta_x0_u16, uint16_t, svuint16_t, -+ x0 = svclasta_n_u16 (p0, x0, z0), -+ x0 = svclasta (p0, x0, z0)) -+ -+/* -+** clasta_x1_u16: -+** mov w0, w1 -+** clasta w0, p0, w0, z0\.h -+** ret -+*/ -+TEST_FOLD_LEFT_X (clasta_x1_u16, uint16_t, svuint16_t, -+ x0 = svclasta_n_u16 (p0, x1, z0), -+ x0 = svclasta (p0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_u32.c -new file mode 100644 -index 000000000..6c46e13cf ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_u32.c -@@ -0,0 +1,52 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** clasta_u32_tied1: -+** clasta z0\.s, p0, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (clasta_u32_tied1, svuint32_t, -+ z0 = svclasta_u32 (p0, z0, z1), -+ z0 = svclasta (p0, z0, z1)) -+ -+/* -+** clasta_u32_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** clasta z0\.s, p0, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (clasta_u32_tied2, svuint32_t, -+ z0 = svclasta_u32 (p0, z1, z0), -+ z0 = svclasta (p0, z1, z0)) -+ -+/* -+** clasta_u32_untied: -+** movprfx z0, z1 -+** clasta z0\.s, p0, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (clasta_u32_untied, svuint32_t, -+ z0 = svclasta_u32 (p0, z1, z2), -+ z0 = svclasta (p0, z1, z2)) -+ -+/* -+** clasta_x0_u32: -+** clasta w0, p0, w0, z0\.s -+** ret -+*/ -+TEST_FOLD_LEFT_X (clasta_x0_u32, uint32_t, svuint32_t, -+ x0 = svclasta_n_u32 (p0, x0, z0), -+ x0 = svclasta (p0, x0, z0)) -+ -+/* -+** clasta_x1_u32: -+** mov w0, w1 -+** clasta w0, p0, w0, z0\.s -+** ret -+*/ -+TEST_FOLD_LEFT_X (clasta_x1_u32, uint32_t, svuint32_t, -+ x0 = svclasta_n_u32 (p0, x1, z0), -+ x0 = svclasta (p0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_u64.c -new file mode 100644 -index 000000000..99ad41e50 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_u64.c -@@ -0,0 +1,52 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** clasta_u64_tied1: -+** clasta z0\.d, p0, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (clasta_u64_tied1, svuint64_t, -+ z0 = svclasta_u64 (p0, z0, z1), -+ z0 = svclasta (p0, z0, z1)) -+ -+/* -+** clasta_u64_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** clasta z0\.d, p0, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (clasta_u64_tied2, svuint64_t, -+ z0 = svclasta_u64 (p0, z1, z0), -+ z0 = svclasta (p0, z1, z0)) -+ -+/* -+** clasta_u64_untied: -+** movprfx z0, z1 -+** clasta z0\.d, p0, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (clasta_u64_untied, svuint64_t, -+ z0 = svclasta_u64 (p0, z1, z2), -+ z0 = svclasta (p0, z1, z2)) -+ -+/* -+** clasta_x0_u64: -+** clasta x0, p0, x0, z0\.d -+** ret -+*/ -+TEST_FOLD_LEFT_X (clasta_x0_u64, uint64_t, svuint64_t, -+ x0 = svclasta_n_u64 (p0, x0, z0), -+ x0 = svclasta (p0, x0, z0)) -+ -+/* -+** clasta_x1_u64: -+** mov x0, x1 -+** clasta x0, p0, x0, z0\.d -+** ret -+*/ -+TEST_FOLD_LEFT_X (clasta_x1_u64, uint64_t, svuint64_t, -+ x0 = svclasta_n_u64 (p0, x1, z0), -+ x0 = svclasta (p0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_u8.c -new file mode 100644 -index 000000000..eb438f4ea ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clasta_u8.c -@@ -0,0 +1,52 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** clasta_u8_tied1: -+** clasta z0\.b, p0, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (clasta_u8_tied1, svuint8_t, -+ z0 = svclasta_u8 (p0, z0, z1), -+ z0 = svclasta (p0, z0, z1)) -+ -+/* -+** clasta_u8_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** clasta z0\.b, p0, z0\.b, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (clasta_u8_tied2, svuint8_t, -+ z0 = svclasta_u8 (p0, z1, z0), -+ z0 = svclasta (p0, z1, z0)) -+ -+/* -+** clasta_u8_untied: -+** movprfx z0, z1 -+** clasta z0\.b, p0, z0\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (clasta_u8_untied, svuint8_t, -+ z0 = svclasta_u8 (p0, z1, z2), -+ z0 = svclasta (p0, z1, z2)) -+ -+/* -+** clasta_x0_u8: -+** clasta w0, p0, w0, z0\.b -+** ret -+*/ -+TEST_FOLD_LEFT_X (clasta_x0_u8, uint8_t, svuint8_t, -+ x0 = svclasta_n_u8 (p0, x0, z0), -+ x0 = svclasta (p0, x0, z0)) -+ -+/* -+** clasta_x1_u8: -+** mov w0, w1 -+** clasta w0, p0, w0, z0\.b -+** ret -+*/ -+TEST_FOLD_LEFT_X (clasta_x1_u8, uint8_t, svuint8_t, -+ x0 = svclasta_n_u8 (p0, x1, z0), -+ x0 = svclasta (p0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_bf16.c -new file mode 100644 -index 000000000..235fd1b4e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_bf16.c -@@ -0,0 +1,52 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** clastb_bf16_tied1: -+** clastb z0\.h, p0, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (clastb_bf16_tied1, svbfloat16_t, -+ z0 = svclastb_bf16 (p0, z0, z1), -+ z0 = svclastb (p0, z0, z1)) -+ -+/* -+** clastb_bf16_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** clastb z0\.h, p0, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (clastb_bf16_tied2, svbfloat16_t, -+ z0 = svclastb_bf16 (p0, z1, z0), -+ z0 = svclastb (p0, z1, z0)) -+ -+/* -+** clastb_bf16_untied: -+** movprfx z0, z1 -+** clastb z0\.h, p0, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (clastb_bf16_untied, svbfloat16_t, -+ z0 = svclastb_bf16 (p0, z1, z2), -+ z0 = svclastb (p0, z1, z2)) -+ -+/* -+** clastb_d0_bf16: -+** clastb h0, p0, h0, z2\.h -+** ret -+*/ -+TEST_FOLD_LEFT_D (clastb_d0_bf16, bfloat16_t, svbfloat16_t, -+ d0 = svclastb_n_bf16 (p0, d0, z2), -+ d0 = svclastb (p0, d0, z2)) -+ -+/* -+** clastb_d1_bf16: -+** mov v0\.h\[0\], v1\.h\[0\] -+** clastb h0, p0, h0, z2\.h -+** ret -+*/ -+TEST_FOLD_LEFT_D (clastb_d1_bf16, bfloat16_t, svbfloat16_t, -+ d0 = svclastb_n_bf16 (p0, d1, z2), -+ d0 = svclastb (p0, d1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_f16.c -new file mode 100644 -index 000000000..e56d7688a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_f16.c -@@ -0,0 +1,52 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** clastb_f16_tied1: -+** clastb z0\.h, p0, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (clastb_f16_tied1, svfloat16_t, -+ z0 = svclastb_f16 (p0, z0, z1), -+ z0 = svclastb (p0, z0, z1)) -+ -+/* -+** clastb_f16_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** clastb z0\.h, p0, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (clastb_f16_tied2, svfloat16_t, -+ z0 = svclastb_f16 (p0, z1, z0), -+ z0 = svclastb (p0, z1, z0)) -+ -+/* -+** clastb_f16_untied: -+** movprfx z0, z1 -+** clastb z0\.h, p0, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (clastb_f16_untied, svfloat16_t, -+ z0 = svclastb_f16 (p0, z1, z2), -+ z0 = svclastb (p0, z1, z2)) -+ -+/* -+** clastb_d0_f16: -+** clastb h0, p0, h0, z2\.h -+** ret -+*/ -+TEST_FOLD_LEFT_D (clastb_d0_f16, float16_t, svfloat16_t, -+ d0 = svclastb_n_f16 (p0, d0, z2), -+ d0 = svclastb (p0, d0, z2)) -+ -+/* -+** clastb_d1_f16: -+** mov v0\.h\[0\], v1\.h\[0\] -+** clastb h0, p0, h0, z2\.h -+** ret -+*/ -+TEST_FOLD_LEFT_D (clastb_d1_f16, float16_t, svfloat16_t, -+ d0 = svclastb_n_f16 (p0, d1, z2), -+ d0 = svclastb (p0, d1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_f32.c -new file mode 100644 -index 000000000..c580d1306 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_f32.c -@@ -0,0 +1,52 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** clastb_f32_tied1: -+** clastb z0\.s, p0, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (clastb_f32_tied1, svfloat32_t, -+ z0 = svclastb_f32 (p0, z0, z1), -+ z0 = svclastb (p0, z0, z1)) -+ -+/* -+** clastb_f32_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** clastb z0\.s, p0, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (clastb_f32_tied2, svfloat32_t, -+ z0 = svclastb_f32 (p0, z1, z0), -+ z0 = svclastb (p0, z1, z0)) -+ -+/* -+** clastb_f32_untied: -+** movprfx z0, z1 -+** clastb z0\.s, p0, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (clastb_f32_untied, svfloat32_t, -+ z0 = svclastb_f32 (p0, z1, z2), -+ z0 = svclastb (p0, z1, z2)) -+ -+/* -+** clastb_d0_f32: -+** clastb s0, p0, s0, z2\.s -+** ret -+*/ -+TEST_FOLD_LEFT_D (clastb_d0_f32, float32_t, svfloat32_t, -+ d0 = svclastb_n_f32 (p0, d0, z2), -+ d0 = svclastb (p0, d0, z2)) -+ -+/* -+** clastb_d1_f32: -+** fmov s0, s1 -+** clastb s0, p0, s0, z2\.s -+** ret -+*/ -+TEST_FOLD_LEFT_D (clastb_d1_f32, float32_t, svfloat32_t, -+ d0 = svclastb_n_f32 (p0, d1, z2), -+ d0 = svclastb (p0, d1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_f64.c -new file mode 100644 -index 000000000..217a76f51 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_f64.c -@@ -0,0 +1,52 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** clastb_f64_tied1: -+** clastb z0\.d, p0, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (clastb_f64_tied1, svfloat64_t, -+ z0 = svclastb_f64 (p0, z0, z1), -+ z0 = svclastb (p0, z0, z1)) -+ -+/* -+** clastb_f64_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** clastb z0\.d, p0, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (clastb_f64_tied2, svfloat64_t, -+ z0 = svclastb_f64 (p0, z1, z0), -+ z0 = svclastb (p0, z1, z0)) -+ -+/* -+** clastb_f64_untied: -+** movprfx z0, z1 -+** clastb z0\.d, p0, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (clastb_f64_untied, svfloat64_t, -+ z0 = svclastb_f64 (p0, z1, z2), -+ z0 = svclastb (p0, z1, z2)) -+ -+/* -+** clastb_d0_f64: -+** clastb d0, p0, d0, z2\.d -+** ret -+*/ -+TEST_FOLD_LEFT_D (clastb_d0_f64, float64_t, svfloat64_t, -+ d0 = svclastb_n_f64 (p0, d0, z2), -+ d0 = svclastb (p0, d0, z2)) -+ -+/* -+** clastb_d1_f64: -+** fmov d0, d1 -+** clastb d0, p0, d0, z2\.d -+** ret -+*/ -+TEST_FOLD_LEFT_D (clastb_d1_f64, float64_t, svfloat64_t, -+ d0 = svclastb_n_f64 (p0, d1, z2), -+ d0 = svclastb (p0, d1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_s16.c -new file mode 100644 -index 000000000..37be28040 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_s16.c -@@ -0,0 +1,52 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** clastb_s16_tied1: -+** clastb z0\.h, p0, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (clastb_s16_tied1, svint16_t, -+ z0 = svclastb_s16 (p0, z0, z1), -+ z0 = svclastb (p0, z0, z1)) -+ -+/* -+** clastb_s16_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** clastb z0\.h, p0, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (clastb_s16_tied2, svint16_t, -+ z0 = svclastb_s16 (p0, z1, z0), -+ z0 = svclastb (p0, z1, z0)) -+ -+/* -+** clastb_s16_untied: -+** movprfx z0, z1 -+** clastb z0\.h, p0, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (clastb_s16_untied, svint16_t, -+ z0 = svclastb_s16 (p0, z1, z2), -+ z0 = svclastb (p0, z1, z2)) -+ -+/* -+** clastb_x0_s16: -+** clastb w0, p0, w0, z0\.h -+** ret -+*/ -+TEST_FOLD_LEFT_X (clastb_x0_s16, int16_t, svint16_t, -+ x0 = svclastb_n_s16 (p0, x0, z0), -+ x0 = svclastb (p0, x0, z0)) -+ -+/* -+** clastb_x1_s16: -+** mov w0, w1 -+** clastb w0, p0, w0, z0\.h -+** ret -+*/ -+TEST_FOLD_LEFT_X (clastb_x1_s16, int16_t, svint16_t, -+ x0 = svclastb_n_s16 (p0, x1, z0), -+ x0 = svclastb (p0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_s32.c -new file mode 100644 -index 000000000..2e56c5a8f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_s32.c -@@ -0,0 +1,52 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** clastb_s32_tied1: -+** clastb z0\.s, p0, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (clastb_s32_tied1, svint32_t, -+ z0 = svclastb_s32 (p0, z0, z1), -+ z0 = svclastb (p0, z0, z1)) -+ -+/* -+** clastb_s32_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** clastb z0\.s, p0, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (clastb_s32_tied2, svint32_t, -+ z0 = svclastb_s32 (p0, z1, z0), -+ z0 = svclastb (p0, z1, z0)) -+ -+/* -+** clastb_s32_untied: -+** movprfx z0, z1 -+** clastb z0\.s, p0, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (clastb_s32_untied, svint32_t, -+ z0 = svclastb_s32 (p0, z1, z2), -+ z0 = svclastb (p0, z1, z2)) -+ -+/* -+** clastb_x0_s32: -+** clastb w0, p0, w0, z0\.s -+** ret -+*/ -+TEST_FOLD_LEFT_X (clastb_x0_s32, int32_t, svint32_t, -+ x0 = svclastb_n_s32 (p0, x0, z0), -+ x0 = svclastb (p0, x0, z0)) -+ -+/* -+** clastb_x1_s32: -+** mov w0, w1 -+** clastb w0, p0, w0, z0\.s -+** ret -+*/ -+TEST_FOLD_LEFT_X (clastb_x1_s32, int32_t, svint32_t, -+ x0 = svclastb_n_s32 (p0, x1, z0), -+ x0 = svclastb (p0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_s64.c -new file mode 100644 -index 000000000..9ce210aae ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_s64.c -@@ -0,0 +1,52 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** clastb_s64_tied1: -+** clastb z0\.d, p0, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (clastb_s64_tied1, svint64_t, -+ z0 = svclastb_s64 (p0, z0, z1), -+ z0 = svclastb (p0, z0, z1)) -+ -+/* -+** clastb_s64_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** clastb z0\.d, p0, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (clastb_s64_tied2, svint64_t, -+ z0 = svclastb_s64 (p0, z1, z0), -+ z0 = svclastb (p0, z1, z0)) -+ -+/* -+** clastb_s64_untied: -+** movprfx z0, z1 -+** clastb z0\.d, p0, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (clastb_s64_untied, svint64_t, -+ z0 = svclastb_s64 (p0, z1, z2), -+ z0 = svclastb (p0, z1, z2)) -+ -+/* -+** clastb_x0_s64: -+** clastb x0, p0, x0, z0\.d -+** ret -+*/ -+TEST_FOLD_LEFT_X (clastb_x0_s64, int64_t, svint64_t, -+ x0 = svclastb_n_s64 (p0, x0, z0), -+ x0 = svclastb (p0, x0, z0)) -+ -+/* -+** clastb_x1_s64: -+** mov x0, x1 -+** clastb x0, p0, x0, z0\.d -+** ret -+*/ -+TEST_FOLD_LEFT_X (clastb_x1_s64, int64_t, svint64_t, -+ x0 = svclastb_n_s64 (p0, x1, z0), -+ x0 = svclastb (p0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_s8.c -new file mode 100644 -index 000000000..eb76c22cd ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_s8.c -@@ -0,0 +1,52 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** clastb_s8_tied1: -+** clastb z0\.b, p0, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (clastb_s8_tied1, svint8_t, -+ z0 = svclastb_s8 (p0, z0, z1), -+ z0 = svclastb (p0, z0, z1)) -+ -+/* -+** clastb_s8_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** clastb z0\.b, p0, z0\.b, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (clastb_s8_tied2, svint8_t, -+ z0 = svclastb_s8 (p0, z1, z0), -+ z0 = svclastb (p0, z1, z0)) -+ -+/* -+** clastb_s8_untied: -+** movprfx z0, z1 -+** clastb z0\.b, p0, z0\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (clastb_s8_untied, svint8_t, -+ z0 = svclastb_s8 (p0, z1, z2), -+ z0 = svclastb (p0, z1, z2)) -+ -+/* -+** clastb_x0_s8: -+** clastb w0, p0, w0, z0\.b -+** ret -+*/ -+TEST_FOLD_LEFT_X (clastb_x0_s8, int8_t, svint8_t, -+ x0 = svclastb_n_s8 (p0, x0, z0), -+ x0 = svclastb (p0, x0, z0)) -+ -+/* -+** clastb_x1_s8: -+** mov w0, w1 -+** clastb w0, p0, w0, z0\.b -+** ret -+*/ -+TEST_FOLD_LEFT_X (clastb_x1_s8, int8_t, svint8_t, -+ x0 = svclastb_n_s8 (p0, x1, z0), -+ x0 = svclastb (p0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_u16.c -new file mode 100644 -index 000000000..5aea9c7bd ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_u16.c -@@ -0,0 +1,52 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** clastb_u16_tied1: -+** clastb z0\.h, p0, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (clastb_u16_tied1, svuint16_t, -+ z0 = svclastb_u16 (p0, z0, z1), -+ z0 = svclastb (p0, z0, z1)) -+ -+/* -+** clastb_u16_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** clastb z0\.h, p0, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (clastb_u16_tied2, svuint16_t, -+ z0 = svclastb_u16 (p0, z1, z0), -+ z0 = svclastb (p0, z1, z0)) -+ -+/* -+** clastb_u16_untied: -+** movprfx z0, z1 -+** clastb z0\.h, p0, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (clastb_u16_untied, svuint16_t, -+ z0 = svclastb_u16 (p0, z1, z2), -+ z0 = svclastb (p0, z1, z2)) -+ -+/* -+** clastb_x0_u16: -+** clastb w0, p0, w0, z0\.h -+** ret -+*/ -+TEST_FOLD_LEFT_X (clastb_x0_u16, uint16_t, svuint16_t, -+ x0 = svclastb_n_u16 (p0, x0, z0), -+ x0 = svclastb (p0, x0, z0)) -+ -+/* -+** clastb_x1_u16: -+** mov w0, w1 -+** clastb w0, p0, w0, z0\.h -+** ret -+*/ -+TEST_FOLD_LEFT_X (clastb_x1_u16, uint16_t, svuint16_t, -+ x0 = svclastb_n_u16 (p0, x1, z0), -+ x0 = svclastb (p0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_u32.c -new file mode 100644 -index 000000000..47fcf4f27 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_u32.c -@@ -0,0 +1,52 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** clastb_u32_tied1: -+** clastb z0\.s, p0, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (clastb_u32_tied1, svuint32_t, -+ z0 = svclastb_u32 (p0, z0, z1), -+ z0 = svclastb (p0, z0, z1)) -+ -+/* -+** clastb_u32_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** clastb z0\.s, p0, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (clastb_u32_tied2, svuint32_t, -+ z0 = svclastb_u32 (p0, z1, z0), -+ z0 = svclastb (p0, z1, z0)) -+ -+/* -+** clastb_u32_untied: -+** movprfx z0, z1 -+** clastb z0\.s, p0, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (clastb_u32_untied, svuint32_t, -+ z0 = svclastb_u32 (p0, z1, z2), -+ z0 = svclastb (p0, z1, z2)) -+ -+/* -+** clastb_x0_u32: -+** clastb w0, p0, w0, z0\.s -+** ret -+*/ -+TEST_FOLD_LEFT_X (clastb_x0_u32, uint32_t, svuint32_t, -+ x0 = svclastb_n_u32 (p0, x0, z0), -+ x0 = svclastb (p0, x0, z0)) -+ -+/* -+** clastb_x1_u32: -+** mov w0, w1 -+** clastb w0, p0, w0, z0\.s -+** ret -+*/ -+TEST_FOLD_LEFT_X (clastb_x1_u32, uint32_t, svuint32_t, -+ x0 = svclastb_n_u32 (p0, x1, z0), -+ x0 = svclastb (p0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_u64.c -new file mode 100644 -index 000000000..fb57afe85 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_u64.c -@@ -0,0 +1,52 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** clastb_u64_tied1: -+** clastb z0\.d, p0, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (clastb_u64_tied1, svuint64_t, -+ z0 = svclastb_u64 (p0, z0, z1), -+ z0 = svclastb (p0, z0, z1)) -+ -+/* -+** clastb_u64_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** clastb z0\.d, p0, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (clastb_u64_tied2, svuint64_t, -+ z0 = svclastb_u64 (p0, z1, z0), -+ z0 = svclastb (p0, z1, z0)) -+ -+/* -+** clastb_u64_untied: -+** movprfx z0, z1 -+** clastb z0\.d, p0, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (clastb_u64_untied, svuint64_t, -+ z0 = svclastb_u64 (p0, z1, z2), -+ z0 = svclastb (p0, z1, z2)) -+ -+/* -+** clastb_x0_u64: -+** clastb x0, p0, x0, z0\.d -+** ret -+*/ -+TEST_FOLD_LEFT_X (clastb_x0_u64, uint64_t, svuint64_t, -+ x0 = svclastb_n_u64 (p0, x0, z0), -+ x0 = svclastb (p0, x0, z0)) -+ -+/* -+** clastb_x1_u64: -+** mov x0, x1 -+** clastb x0, p0, x0, z0\.d -+** ret -+*/ -+TEST_FOLD_LEFT_X (clastb_x1_u64, uint64_t, svuint64_t, -+ x0 = svclastb_n_u64 (p0, x1, z0), -+ x0 = svclastb (p0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_u8.c -new file mode 100644 -index 000000000..f3ca84920 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clastb_u8.c -@@ -0,0 +1,52 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** clastb_u8_tied1: -+** clastb z0\.b, p0, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (clastb_u8_tied1, svuint8_t, -+ z0 = svclastb_u8 (p0, z0, z1), -+ z0 = svclastb (p0, z0, z1)) -+ -+/* -+** clastb_u8_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** clastb z0\.b, p0, z0\.b, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (clastb_u8_tied2, svuint8_t, -+ z0 = svclastb_u8 (p0, z1, z0), -+ z0 = svclastb (p0, z1, z0)) -+ -+/* -+** clastb_u8_untied: -+** movprfx z0, z1 -+** clastb z0\.b, p0, z0\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (clastb_u8_untied, svuint8_t, -+ z0 = svclastb_u8 (p0, z1, z2), -+ z0 = svclastb (p0, z1, z2)) -+ -+/* -+** clastb_x0_u8: -+** clastb w0, p0, w0, z0\.b -+** ret -+*/ -+TEST_FOLD_LEFT_X (clastb_x0_u8, uint8_t, svuint8_t, -+ x0 = svclastb_n_u8 (p0, x0, z0), -+ x0 = svclastb (p0, x0, z0)) -+ -+/* -+** clastb_x1_u8: -+** mov w0, w1 -+** clastb w0, p0, w0, z0\.b -+** ret -+*/ -+TEST_FOLD_LEFT_X (clastb_x1_u8, uint8_t, svuint8_t, -+ x0 = svclastb_n_u8 (p0, x1, z0), -+ x0 = svclastb (p0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cls_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cls_s16.c -new file mode 100644 -index 000000000..7af312397 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cls_s16.c -@@ -0,0 +1,41 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cls_s16_m_tied1: -+** cls z0\.h, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cls_s16_m_tied1, svuint16_t, svint16_t, -+ z0 = svcls_s16_m (z0, p0, z4), -+ z0 = svcls_m (z0, p0, z4)) -+ -+/* -+** cls_s16_m_untied: -+** movprfx z0, z1 -+** cls z0\.h, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cls_s16_m_untied, svuint16_t, svint16_t, -+ z0 = svcls_s16_m (z1, p0, z4), -+ z0 = svcls_m (z1, p0, z4)) -+ -+/* -+** cls_s16_z: -+** movprfx z0\.h, p0/z, z4\.h -+** cls z0\.h, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cls_s16_z, svuint16_t, svint16_t, -+ z0 = svcls_s16_z (p0, z4), -+ z0 = svcls_z (p0, z4)) -+ -+/* -+** cls_s16_x: -+** cls z0\.h, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cls_s16_x, svuint16_t, svint16_t, -+ z0 = svcls_s16_x (p0, z4), -+ z0 = svcls_x (p0, z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cls_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cls_s32.c -new file mode 100644 -index 000000000..813876f68 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cls_s32.c -@@ -0,0 +1,41 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cls_s32_m_tied1: -+** cls z0\.s, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cls_s32_m_tied1, svuint32_t, svint32_t, -+ z0 = svcls_s32_m (z0, p0, z4), -+ z0 = svcls_m (z0, p0, z4)) -+ -+/* -+** cls_s32_m_untied: -+** movprfx z0, z1 -+** cls z0\.s, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cls_s32_m_untied, svuint32_t, svint32_t, -+ z0 = svcls_s32_m (z1, p0, z4), -+ z0 = svcls_m (z1, p0, z4)) -+ -+/* -+** cls_s32_z: -+** movprfx z0\.s, p0/z, z4\.s -+** cls z0\.s, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cls_s32_z, svuint32_t, svint32_t, -+ z0 = svcls_s32_z (p0, z4), -+ z0 = svcls_z (p0, z4)) -+ -+/* -+** cls_s32_x: -+** cls z0\.s, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cls_s32_x, svuint32_t, svint32_t, -+ z0 = svcls_s32_x (p0, z4), -+ z0 = svcls_x (p0, z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cls_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cls_s64.c -new file mode 100644 -index 000000000..660a20556 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cls_s64.c -@@ -0,0 +1,41 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cls_s64_m_tied1: -+** cls z0\.d, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cls_s64_m_tied1, svuint64_t, svint64_t, -+ z0 = svcls_s64_m (z0, p0, z4), -+ z0 = svcls_m (z0, p0, z4)) -+ -+/* -+** cls_s64_m_untied: -+** movprfx z0, z1 -+** cls z0\.d, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cls_s64_m_untied, svuint64_t, svint64_t, -+ z0 = svcls_s64_m (z1, p0, z4), -+ z0 = svcls_m (z1, p0, z4)) -+ -+/* -+** cls_s64_z: -+** movprfx z0\.d, p0/z, z4\.d -+** cls z0\.d, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cls_s64_z, svuint64_t, svint64_t, -+ z0 = svcls_s64_z (p0, z4), -+ z0 = svcls_z (p0, z4)) -+ -+/* -+** cls_s64_x: -+** cls z0\.d, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cls_s64_x, svuint64_t, svint64_t, -+ z0 = svcls_s64_x (p0, z4), -+ z0 = svcls_x (p0, z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cls_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cls_s8.c -new file mode 100644 -index 000000000..56f5c2608 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cls_s8.c -@@ -0,0 +1,41 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cls_s8_m_tied1: -+** cls z0\.b, p0/m, z4\.b -+** ret -+*/ -+TEST_DUAL_Z (cls_s8_m_tied1, svuint8_t, svint8_t, -+ z0 = svcls_s8_m (z0, p0, z4), -+ z0 = svcls_m (z0, p0, z4)) -+ -+/* -+** cls_s8_m_untied: -+** movprfx z0, z1 -+** cls z0\.b, p0/m, z4\.b -+** ret -+*/ -+TEST_DUAL_Z (cls_s8_m_untied, svuint8_t, svint8_t, -+ z0 = svcls_s8_m (z1, p0, z4), -+ z0 = svcls_m (z1, p0, z4)) -+ -+/* -+** cls_s8_z: -+** movprfx z0\.b, p0/z, z4\.b -+** cls z0\.b, p0/m, z4\.b -+** ret -+*/ -+TEST_DUAL_Z (cls_s8_z, svuint8_t, svint8_t, -+ z0 = svcls_s8_z (p0, z4), -+ z0 = svcls_z (p0, z4)) -+ -+/* -+** cls_s8_x: -+** cls z0\.b, p0/m, z4\.b -+** ret -+*/ -+TEST_DUAL_Z (cls_s8_x, svuint8_t, svint8_t, -+ z0 = svcls_s8_x (p0, z4), -+ z0 = svcls_x (p0, z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_s16.c -new file mode 100644 -index 000000000..58f89005c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_s16.c -@@ -0,0 +1,41 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** clz_s16_m_tied1: -+** clz z0\.h, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (clz_s16_m_tied1, svuint16_t, svint16_t, -+ z0 = svclz_s16_m (z0, p0, z4), -+ z0 = svclz_m (z0, p0, z4)) -+ -+/* -+** clz_s16_m_untied: -+** movprfx z0, z1 -+** clz z0\.h, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (clz_s16_m_untied, svuint16_t, svint16_t, -+ z0 = svclz_s16_m (z1, p0, z4), -+ z0 = svclz_m (z1, p0, z4)) -+ -+/* -+** clz_s16_z: -+** movprfx z0\.h, p0/z, z4\.h -+** clz z0\.h, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (clz_s16_z, svuint16_t, svint16_t, -+ z0 = svclz_s16_z (p0, z4), -+ z0 = svclz_z (p0, z4)) -+ -+/* -+** clz_s16_x: -+** clz z0\.h, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (clz_s16_x, svuint16_t, svint16_t, -+ z0 = svclz_s16_x (p0, z4), -+ z0 = svclz_x (p0, z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_s32.c -new file mode 100644 -index 000000000..a9198070b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_s32.c -@@ -0,0 +1,41 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** clz_s32_m_tied1: -+** clz z0\.s, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (clz_s32_m_tied1, svuint32_t, svint32_t, -+ z0 = svclz_s32_m (z0, p0, z4), -+ z0 = svclz_m (z0, p0, z4)) -+ -+/* -+** clz_s32_m_untied: -+** movprfx z0, z1 -+** clz z0\.s, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (clz_s32_m_untied, svuint32_t, svint32_t, -+ z0 = svclz_s32_m (z1, p0, z4), -+ z0 = svclz_m (z1, p0, z4)) -+ -+/* -+** clz_s32_z: -+** movprfx z0\.s, p0/z, z4\.s -+** clz z0\.s, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (clz_s32_z, svuint32_t, svint32_t, -+ z0 = svclz_s32_z (p0, z4), -+ z0 = svclz_z (p0, z4)) -+ -+/* -+** clz_s32_x: -+** clz z0\.s, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (clz_s32_x, svuint32_t, svint32_t, -+ z0 = svclz_s32_x (p0, z4), -+ z0 = svclz_x (p0, z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_s64.c -new file mode 100644 -index 000000000..02c0c993e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_s64.c -@@ -0,0 +1,41 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** clz_s64_m_tied1: -+** clz z0\.d, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (clz_s64_m_tied1, svuint64_t, svint64_t, -+ z0 = svclz_s64_m (z0, p0, z4), -+ z0 = svclz_m (z0, p0, z4)) -+ -+/* -+** clz_s64_m_untied: -+** movprfx z0, z1 -+** clz z0\.d, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (clz_s64_m_untied, svuint64_t, svint64_t, -+ z0 = svclz_s64_m (z1, p0, z4), -+ z0 = svclz_m (z1, p0, z4)) -+ -+/* -+** clz_s64_z: -+** movprfx z0\.d, p0/z, z4\.d -+** clz z0\.d, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (clz_s64_z, svuint64_t, svint64_t, -+ z0 = svclz_s64_z (p0, z4), -+ z0 = svclz_z (p0, z4)) -+ -+/* -+** clz_s64_x: -+** clz z0\.d, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (clz_s64_x, svuint64_t, svint64_t, -+ z0 = svclz_s64_x (p0, z4), -+ z0 = svclz_x (p0, z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_s8.c -new file mode 100644 -index 000000000..642d298c8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_s8.c -@@ -0,0 +1,41 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** clz_s8_m_tied1: -+** clz z0\.b, p0/m, z4\.b -+** ret -+*/ -+TEST_DUAL_Z (clz_s8_m_tied1, svuint8_t, svint8_t, -+ z0 = svclz_s8_m (z0, p0, z4), -+ z0 = svclz_m (z0, p0, z4)) -+ -+/* -+** clz_s8_m_untied: -+** movprfx z0, z1 -+** clz z0\.b, p0/m, z4\.b -+** ret -+*/ -+TEST_DUAL_Z (clz_s8_m_untied, svuint8_t, svint8_t, -+ z0 = svclz_s8_m (z1, p0, z4), -+ z0 = svclz_m (z1, p0, z4)) -+ -+/* -+** clz_s8_z: -+** movprfx z0\.b, p0/z, z4\.b -+** clz z0\.b, p0/m, z4\.b -+** ret -+*/ -+TEST_DUAL_Z (clz_s8_z, svuint8_t, svint8_t, -+ z0 = svclz_s8_z (p0, z4), -+ z0 = svclz_z (p0, z4)) -+ -+/* -+** clz_s8_x: -+** clz z0\.b, p0/m, z4\.b -+** ret -+*/ -+TEST_DUAL_Z (clz_s8_x, svuint8_t, svint8_t, -+ z0 = svclz_s8_x (p0, z4), -+ z0 = svclz_x (p0, z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_u16.c -new file mode 100644 -index 000000000..f08723017 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_u16.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** clz_u16_m_tied12: -+** clz z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (clz_u16_m_tied12, svuint16_t, -+ z0 = svclz_u16_m (z0, p0, z0), -+ z0 = svclz_m (z0, p0, z0)) -+ -+/* -+** clz_u16_m_tied1: -+** clz z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (clz_u16_m_tied1, svuint16_t, -+ z0 = svclz_u16_m (z0, p0, z1), -+ z0 = svclz_m (z0, p0, z1)) -+ -+/* -+** clz_u16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** clz z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (clz_u16_m_tied2, svuint16_t, -+ z0 = svclz_u16_m (z1, p0, z0), -+ z0 = svclz_m (z1, p0, z0)) -+ -+/* -+** clz_u16_m_untied: -+** movprfx z0, z2 -+** clz z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (clz_u16_m_untied, svuint16_t, -+ z0 = svclz_u16_m (z2, p0, z1), -+ z0 = svclz_m (z2, p0, z1)) -+ -+/* -+** clz_u16_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.h, p0/z, \1\.h -+** clz z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (clz_u16_z_tied1, svuint16_t, -+ z0 = svclz_u16_z (p0, z0), -+ z0 = svclz_z (p0, z0)) -+ -+/* -+** clz_u16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** clz z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (clz_u16_z_untied, svuint16_t, -+ z0 = svclz_u16_z (p0, z1), -+ z0 = svclz_z (p0, z1)) -+ -+/* -+** clz_u16_x_tied1: -+** clz z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (clz_u16_x_tied1, svuint16_t, -+ z0 = svclz_u16_x (p0, z0), -+ z0 = svclz_x (p0, z0)) -+ -+/* -+** clz_u16_x_untied: -+** clz z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (clz_u16_x_untied, svuint16_t, -+ z0 = svclz_u16_x (p0, z1), -+ z0 = svclz_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_u32.c -new file mode 100644 -index 000000000..e00424131 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_u32.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** clz_u32_m_tied12: -+** clz z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (clz_u32_m_tied12, svuint32_t, -+ z0 = svclz_u32_m (z0, p0, z0), -+ z0 = svclz_m (z0, p0, z0)) -+ -+/* -+** clz_u32_m_tied1: -+** clz z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (clz_u32_m_tied1, svuint32_t, -+ z0 = svclz_u32_m (z0, p0, z1), -+ z0 = svclz_m (z0, p0, z1)) -+ -+/* -+** clz_u32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** clz z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (clz_u32_m_tied2, svuint32_t, -+ z0 = svclz_u32_m (z1, p0, z0), -+ z0 = svclz_m (z1, p0, z0)) -+ -+/* -+** clz_u32_m_untied: -+** movprfx z0, z2 -+** clz z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (clz_u32_m_untied, svuint32_t, -+ z0 = svclz_u32_m (z2, p0, z1), -+ z0 = svclz_m (z2, p0, z1)) -+ -+/* -+** clz_u32_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, \1\.s -+** clz z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (clz_u32_z_tied1, svuint32_t, -+ z0 = svclz_u32_z (p0, z0), -+ z0 = svclz_z (p0, z0)) -+ -+/* -+** clz_u32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** clz z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (clz_u32_z_untied, svuint32_t, -+ z0 = svclz_u32_z (p0, z1), -+ z0 = svclz_z (p0, z1)) -+ -+/* -+** clz_u32_x_tied1: -+** clz z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (clz_u32_x_tied1, svuint32_t, -+ z0 = svclz_u32_x (p0, z0), -+ z0 = svclz_x (p0, z0)) -+ -+/* -+** clz_u32_x_untied: -+** clz z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (clz_u32_x_untied, svuint32_t, -+ z0 = svclz_u32_x (p0, z1), -+ z0 = svclz_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_u64.c -new file mode 100644 -index 000000000..e879e1b9a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_u64.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** clz_u64_m_tied12: -+** clz z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (clz_u64_m_tied12, svuint64_t, -+ z0 = svclz_u64_m (z0, p0, z0), -+ z0 = svclz_m (z0, p0, z0)) -+ -+/* -+** clz_u64_m_tied1: -+** clz z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (clz_u64_m_tied1, svuint64_t, -+ z0 = svclz_u64_m (z0, p0, z1), -+ z0 = svclz_m (z0, p0, z1)) -+ -+/* -+** clz_u64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** clz z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (clz_u64_m_tied2, svuint64_t, -+ z0 = svclz_u64_m (z1, p0, z0), -+ z0 = svclz_m (z1, p0, z0)) -+ -+/* -+** clz_u64_m_untied: -+** movprfx z0, z2 -+** clz z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (clz_u64_m_untied, svuint64_t, -+ z0 = svclz_u64_m (z2, p0, z1), -+ z0 = svclz_m (z2, p0, z1)) -+ -+/* -+** clz_u64_z_tied1: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** clz z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (clz_u64_z_tied1, svuint64_t, -+ z0 = svclz_u64_z (p0, z0), -+ z0 = svclz_z (p0, z0)) -+ -+/* -+** clz_u64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** clz z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (clz_u64_z_untied, svuint64_t, -+ z0 = svclz_u64_z (p0, z1), -+ z0 = svclz_z (p0, z1)) -+ -+/* -+** clz_u64_x_tied1: -+** clz z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (clz_u64_x_tied1, svuint64_t, -+ z0 = svclz_u64_x (p0, z0), -+ z0 = svclz_x (p0, z0)) -+ -+/* -+** clz_u64_x_untied: -+** clz z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (clz_u64_x_untied, svuint64_t, -+ z0 = svclz_u64_x (p0, z1), -+ z0 = svclz_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_u8.c -new file mode 100644 -index 000000000..ce6cb8f45 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/clz_u8.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** clz_u8_m_tied12: -+** clz z0\.b, p0/m, z0\.b -+** ret -+*/ -+TEST_UNIFORM_Z (clz_u8_m_tied12, svuint8_t, -+ z0 = svclz_u8_m (z0, p0, z0), -+ z0 = svclz_m (z0, p0, z0)) -+ -+/* -+** clz_u8_m_tied1: -+** clz z0\.b, p0/m, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (clz_u8_m_tied1, svuint8_t, -+ z0 = svclz_u8_m (z0, p0, z1), -+ z0 = svclz_m (z0, p0, z1)) -+ -+/* -+** clz_u8_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** clz z0\.b, p0/m, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (clz_u8_m_tied2, svuint8_t, -+ z0 = svclz_u8_m (z1, p0, z0), -+ z0 = svclz_m (z1, p0, z0)) -+ -+/* -+** clz_u8_m_untied: -+** movprfx z0, z2 -+** clz z0\.b, p0/m, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (clz_u8_m_untied, svuint8_t, -+ z0 = svclz_u8_m (z2, p0, z1), -+ z0 = svclz_m (z2, p0, z1)) -+ -+/* -+** clz_u8_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.b, p0/z, \1\.b -+** clz z0\.b, p0/m, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (clz_u8_z_tied1, svuint8_t, -+ z0 = svclz_u8_z (p0, z0), -+ z0 = svclz_z (p0, z0)) -+ -+/* -+** clz_u8_z_untied: -+** movprfx z0\.b, p0/z, z1\.b -+** clz z0\.b, p0/m, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (clz_u8_z_untied, svuint8_t, -+ z0 = svclz_u8_z (p0, z1), -+ z0 = svclz_z (p0, z1)) -+ -+/* -+** clz_u8_x_tied1: -+** clz z0\.b, p0/m, z0\.b -+** ret -+*/ -+TEST_UNIFORM_Z (clz_u8_x_tied1, svuint8_t, -+ z0 = svclz_u8_x (p0, z0), -+ z0 = svclz_x (p0, z0)) -+ -+/* -+** clz_u8_x_untied: -+** clz z0\.b, p0/m, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (clz_u8_x_untied, svuint8_t, -+ z0 = svclz_u8_x (p0, z1), -+ z0 = svclz_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmla_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmla_f16.c -new file mode 100644 -index 000000000..3bf44a59f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmla_f16.c -@@ -0,0 +1,675 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmla_0_f16_m_tied1: -+** fcmla z0\.h, p0/m, z1\.h, z2\.h, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_0_f16_m_tied1, svfloat16_t, -+ z0 = svcmla_f16_m (p0, z0, z1, z2, 0), -+ z0 = svcmla_m (p0, z0, z1, z2, 0)) -+ -+/* -+** cmla_0_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcmla z0\.h, p0/m, \1\.h, z2\.h, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_0_f16_m_tied2, svfloat16_t, -+ z0 = svcmla_f16_m (p0, z1, z0, z2, 0), -+ z0 = svcmla_m (p0, z1, z0, z2, 0)) -+ -+/* -+** cmla_0_f16_m_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcmla z0\.h, p0/m, z2\.h, \1\.h, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_0_f16_m_tied3, svfloat16_t, -+ z0 = svcmla_f16_m (p0, z1, z2, z0, 0), -+ z0 = svcmla_m (p0, z1, z2, z0, 0)) -+ -+/* -+** cmla_0_f16_m_untied: -+** movprfx z0, z1 -+** fcmla z0\.h, p0/m, z2\.h, z3\.h, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_0_f16_m_untied, svfloat16_t, -+ z0 = svcmla_f16_m (p0, z1, z2, z3, 0), -+ z0 = svcmla_m (p0, z1, z2, z3, 0)) -+ -+/* -+** cmla_90_f16_m_tied1: -+** fcmla z0\.h, p0/m, z1\.h, z2\.h, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_90_f16_m_tied1, svfloat16_t, -+ z0 = svcmla_f16_m (p0, z0, z1, z2, 90), -+ z0 = svcmla_m (p0, z0, z1, z2, 90)) -+ -+/* -+** cmla_90_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcmla z0\.h, p0/m, \1\.h, z2\.h, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_90_f16_m_tied2, svfloat16_t, -+ z0 = svcmla_f16_m (p0, z1, z0, z2, 90), -+ z0 = svcmla_m (p0, z1, z0, z2, 90)) -+ -+/* -+** cmla_90_f16_m_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcmla z0\.h, p0/m, z2\.h, \1\.h, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_90_f16_m_tied3, svfloat16_t, -+ z0 = svcmla_f16_m (p0, z1, z2, z0, 90), -+ z0 = svcmla_m (p0, z1, z2, z0, 90)) -+ -+/* -+** cmla_90_f16_m_untied: -+** movprfx z0, z1 -+** fcmla z0\.h, p0/m, z2\.h, z3\.h, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_90_f16_m_untied, svfloat16_t, -+ z0 = svcmla_f16_m (p0, z1, z2, z3, 90), -+ z0 = svcmla_m (p0, z1, z2, z3, 90)) -+ -+/* -+** cmla_180_f16_m_tied1: -+** fcmla z0\.h, p0/m, z1\.h, z2\.h, #180 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_180_f16_m_tied1, svfloat16_t, -+ z0 = svcmla_f16_m (p0, z0, z1, z2, 180), -+ z0 = svcmla_m (p0, z0, z1, z2, 180)) -+ -+/* -+** cmla_180_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcmla z0\.h, p0/m, \1\.h, z2\.h, #180 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_180_f16_m_tied2, svfloat16_t, -+ z0 = svcmla_f16_m (p0, z1, z0, z2, 180), -+ z0 = svcmla_m (p0, z1, z0, z2, 180)) -+ -+/* -+** cmla_180_f16_m_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcmla z0\.h, p0/m, z2\.h, \1\.h, #180 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_180_f16_m_tied3, svfloat16_t, -+ z0 = svcmla_f16_m (p0, z1, z2, z0, 180), -+ z0 = svcmla_m (p0, z1, z2, z0, 180)) -+ -+/* -+** cmla_180_f16_m_untied: -+** movprfx z0, z1 -+** fcmla z0\.h, p0/m, z2\.h, z3\.h, #180 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_180_f16_m_untied, svfloat16_t, -+ z0 = svcmla_f16_m (p0, z1, z2, z3, 180), -+ z0 = svcmla_m (p0, z1, z2, z3, 180)) -+ -+/* -+** cmla_270_f16_m_tied1: -+** fcmla z0\.h, p0/m, z1\.h, z2\.h, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_270_f16_m_tied1, svfloat16_t, -+ z0 = svcmla_f16_m (p0, z0, z1, z2, 270), -+ z0 = svcmla_m (p0, z0, z1, z2, 270)) -+ -+/* -+** cmla_270_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcmla z0\.h, p0/m, \1\.h, z2\.h, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_270_f16_m_tied2, svfloat16_t, -+ z0 = svcmla_f16_m (p0, z1, z0, z2, 270), -+ z0 = svcmla_m (p0, z1, z0, z2, 270)) -+ -+/* -+** cmla_270_f16_m_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcmla z0\.h, p0/m, z2\.h, \1\.h, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_270_f16_m_tied3, svfloat16_t, -+ z0 = svcmla_f16_m (p0, z1, z2, z0, 270), -+ z0 = svcmla_m (p0, z1, z2, z0, 270)) -+ -+/* -+** cmla_270_f16_m_untied: -+** movprfx z0, z1 -+** fcmla z0\.h, p0/m, z2\.h, z3\.h, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_270_f16_m_untied, svfloat16_t, -+ z0 = svcmla_f16_m (p0, z1, z2, z3, 270), -+ z0 = svcmla_m (p0, z1, z2, z3, 270)) -+ -+/* -+** cmla_0_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fcmla z0\.h, p0/m, z1\.h, z2\.h, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_0_f16_z_tied1, svfloat16_t, -+ z0 = svcmla_f16_z (p0, z0, z1, z2, 0), -+ z0 = svcmla_z (p0, z0, z1, z2, 0)) -+ -+/* -+** cmla_0_f16_z_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.h, p0/z, z1\.h -+** fcmla z0\.h, p0/m, \1\.h, z2\.h, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_0_f16_z_tied2, svfloat16_t, -+ z0 = svcmla_f16_z (p0, z1, z0, z2, 0), -+ z0 = svcmla_z (p0, z1, z0, z2, 0)) -+ -+/* -+** cmla_0_f16_z_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.h, p0/z, z1\.h -+** fcmla z0\.h, p0/m, z2\.h, \1\.h, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_0_f16_z_tied3, svfloat16_t, -+ z0 = svcmla_f16_z (p0, z1, z2, z0, 0), -+ z0 = svcmla_z (p0, z1, z2, z0, 0)) -+ -+/* -+** cmla_0_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** fcmla z0\.h, p0/m, z2\.h, z3\.h, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_0_f16_z_untied, svfloat16_t, -+ z0 = svcmla_f16_z (p0, z1, z2, z3, 0), -+ z0 = svcmla_z (p0, z1, z2, z3, 0)) -+ -+/* -+** cmla_90_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fcmla z0\.h, p0/m, z1\.h, z2\.h, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_90_f16_z_tied1, svfloat16_t, -+ z0 = svcmla_f16_z (p0, z0, z1, z2, 90), -+ z0 = svcmla_z (p0, z0, z1, z2, 90)) -+ -+/* -+** cmla_90_f16_z_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.h, p0/z, z1\.h -+** fcmla z0\.h, p0/m, \1\.h, z2\.h, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_90_f16_z_tied2, svfloat16_t, -+ z0 = svcmla_f16_z (p0, z1, z0, z2, 90), -+ z0 = svcmla_z (p0, z1, z0, z2, 90)) -+ -+/* -+** cmla_90_f16_z_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.h, p0/z, z1\.h -+** fcmla z0\.h, p0/m, z2\.h, \1\.h, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_90_f16_z_tied3, svfloat16_t, -+ z0 = svcmla_f16_z (p0, z1, z2, z0, 90), -+ z0 = svcmla_z (p0, z1, z2, z0, 90)) -+ -+/* -+** cmla_90_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** fcmla z0\.h, p0/m, z2\.h, z3\.h, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_90_f16_z_untied, svfloat16_t, -+ z0 = svcmla_f16_z (p0, z1, z2, z3, 90), -+ z0 = svcmla_z (p0, z1, z2, z3, 90)) -+ -+/* -+** cmla_180_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fcmla z0\.h, p0/m, z1\.h, z2\.h, #180 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_180_f16_z_tied1, svfloat16_t, -+ z0 = svcmla_f16_z (p0, z0, z1, z2, 180), -+ z0 = svcmla_z (p0, z0, z1, z2, 180)) -+ -+/* -+** cmla_180_f16_z_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.h, p0/z, z1\.h -+** fcmla z0\.h, p0/m, \1\.h, z2\.h, #180 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_180_f16_z_tied2, svfloat16_t, -+ z0 = svcmla_f16_z (p0, z1, z0, z2, 180), -+ z0 = svcmla_z (p0, z1, z0, z2, 180)) -+ -+/* -+** cmla_180_f16_z_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.h, p0/z, z1\.h -+** fcmla z0\.h, p0/m, z2\.h, \1\.h, #180 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_180_f16_z_tied3, svfloat16_t, -+ z0 = svcmla_f16_z (p0, z1, z2, z0, 180), -+ z0 = svcmla_z (p0, z1, z2, z0, 180)) -+ -+/* -+** cmla_180_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** fcmla z0\.h, p0/m, z2\.h, z3\.h, #180 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_180_f16_z_untied, svfloat16_t, -+ z0 = svcmla_f16_z (p0, z1, z2, z3, 180), -+ z0 = svcmla_z (p0, z1, z2, z3, 180)) -+ -+/* -+** cmla_270_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fcmla z0\.h, p0/m, z1\.h, z2\.h, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_270_f16_z_tied1, svfloat16_t, -+ z0 = svcmla_f16_z (p0, z0, z1, z2, 270), -+ z0 = svcmla_z (p0, z0, z1, z2, 270)) -+ -+/* -+** cmla_270_f16_z_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.h, p0/z, z1\.h -+** fcmla z0\.h, p0/m, \1\.h, z2\.h, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_270_f16_z_tied2, svfloat16_t, -+ z0 = svcmla_f16_z (p0, z1, z0, z2, 270), -+ z0 = svcmla_z (p0, z1, z0, z2, 270)) -+ -+/* -+** cmla_270_f16_z_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.h, p0/z, z1\.h -+** fcmla z0\.h, p0/m, z2\.h, \1\.h, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_270_f16_z_tied3, svfloat16_t, -+ z0 = svcmla_f16_z (p0, z1, z2, z0, 270), -+ z0 = svcmla_z (p0, z1, z2, z0, 270)) -+ -+/* -+** cmla_270_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** fcmla z0\.h, p0/m, z2\.h, z3\.h, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_270_f16_z_untied, svfloat16_t, -+ z0 = svcmla_f16_z (p0, z1, z2, z3, 270), -+ z0 = svcmla_z (p0, z1, z2, z3, 270)) -+ -+/* -+** cmla_0_f16_x_tied1: -+** fcmla z0\.h, p0/m, z1\.h, z2\.h, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_0_f16_x_tied1, svfloat16_t, -+ z0 = svcmla_f16_x (p0, z0, z1, z2, 0), -+ z0 = svcmla_x (p0, z0, z1, z2, 0)) -+ -+/* -+** cmla_0_f16_x_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcmla z0\.h, p0/m, \1\.h, z2\.h, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_0_f16_x_tied2, svfloat16_t, -+ z0 = svcmla_f16_x (p0, z1, z0, z2, 0), -+ z0 = svcmla_x (p0, z1, z0, z2, 0)) -+ -+/* -+** cmla_0_f16_x_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcmla z0\.h, p0/m, z2\.h, \1\.h, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_0_f16_x_tied3, svfloat16_t, -+ z0 = svcmla_f16_x (p0, z1, z2, z0, 0), -+ z0 = svcmla_x (p0, z1, z2, z0, 0)) -+ -+/* -+** cmla_0_f16_x_untied: -+** movprfx z0, z1 -+** fcmla z0\.h, p0/m, z2\.h, z3\.h, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_0_f16_x_untied, svfloat16_t, -+ z0 = svcmla_f16_x (p0, z1, z2, z3, 0), -+ z0 = svcmla_x (p0, z1, z2, z3, 0)) -+ -+/* -+** cmla_90_f16_x_tied1: -+** fcmla z0\.h, p0/m, z1\.h, z2\.h, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_90_f16_x_tied1, svfloat16_t, -+ z0 = svcmla_f16_x (p0, z0, z1, z2, 90), -+ z0 = svcmla_x (p0, z0, z1, z2, 90)) -+ -+/* -+** cmla_90_f16_x_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcmla z0\.h, p0/m, \1\.h, z2\.h, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_90_f16_x_tied2, svfloat16_t, -+ z0 = svcmla_f16_x (p0, z1, z0, z2, 90), -+ z0 = svcmla_x (p0, z1, z0, z2, 90)) -+ -+/* -+** cmla_90_f16_x_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcmla z0\.h, p0/m, z2\.h, \1\.h, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_90_f16_x_tied3, svfloat16_t, -+ z0 = svcmla_f16_x (p0, z1, z2, z0, 90), -+ z0 = svcmla_x (p0, z1, z2, z0, 90)) -+ -+/* -+** cmla_90_f16_x_untied: -+** movprfx z0, z1 -+** fcmla z0\.h, p0/m, z2\.h, z3\.h, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_90_f16_x_untied, svfloat16_t, -+ z0 = svcmla_f16_x (p0, z1, z2, z3, 90), -+ z0 = svcmla_x (p0, z1, z2, z3, 90)) -+ -+/* -+** cmla_180_f16_x_tied1: -+** fcmla z0\.h, p0/m, z1\.h, z2\.h, #180 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_180_f16_x_tied1, svfloat16_t, -+ z0 = svcmla_f16_x (p0, z0, z1, z2, 180), -+ z0 = svcmla_x (p0, z0, z1, z2, 180)) -+ -+/* -+** cmla_180_f16_x_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcmla z0\.h, p0/m, \1\.h, z2\.h, #180 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_180_f16_x_tied2, svfloat16_t, -+ z0 = svcmla_f16_x (p0, z1, z0, z2, 180), -+ z0 = svcmla_x (p0, z1, z0, z2, 180)) -+ -+/* -+** cmla_180_f16_x_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcmla z0\.h, p0/m, z2\.h, \1\.h, #180 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_180_f16_x_tied3, svfloat16_t, -+ z0 = svcmla_f16_x (p0, z1, z2, z0, 180), -+ z0 = svcmla_x (p0, z1, z2, z0, 180)) -+ -+/* -+** cmla_180_f16_x_untied: -+** movprfx z0, z1 -+** fcmla z0\.h, p0/m, z2\.h, z3\.h, #180 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_180_f16_x_untied, svfloat16_t, -+ z0 = svcmla_f16_x (p0, z1, z2, z3, 180), -+ z0 = svcmla_x (p0, z1, z2, z3, 180)) -+ -+/* -+** cmla_270_f16_x_tied1: -+** fcmla z0\.h, p0/m, z1\.h, z2\.h, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_270_f16_x_tied1, svfloat16_t, -+ z0 = svcmla_f16_x (p0, z0, z1, z2, 270), -+ z0 = svcmla_x (p0, z0, z1, z2, 270)) -+ -+/* -+** cmla_270_f16_x_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcmla z0\.h, p0/m, \1\.h, z2\.h, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_270_f16_x_tied2, svfloat16_t, -+ z0 = svcmla_f16_x (p0, z1, z0, z2, 270), -+ z0 = svcmla_x (p0, z1, z0, z2, 270)) -+ -+/* -+** cmla_270_f16_x_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcmla z0\.h, p0/m, z2\.h, \1\.h, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_270_f16_x_tied3, svfloat16_t, -+ z0 = svcmla_f16_x (p0, z1, z2, z0, 270), -+ z0 = svcmla_x (p0, z1, z2, z0, 270)) -+ -+/* -+** cmla_270_f16_x_untied: -+** movprfx z0, z1 -+** fcmla z0\.h, p0/m, z2\.h, z3\.h, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_270_f16_x_untied, svfloat16_t, -+ z0 = svcmla_f16_x (p0, z1, z2, z3, 270), -+ z0 = svcmla_x (p0, z1, z2, z3, 270)) -+ -+/* -+** ptrue_cmla_0_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cmla_0_f16_x_tied1, svfloat16_t, -+ z0 = svcmla_f16_x (svptrue_b16 (), z0, z1, z2, 0), -+ z0 = svcmla_x (svptrue_b16 (), z0, z1, z2, 0)) -+ -+/* -+** ptrue_cmla_0_f16_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cmla_0_f16_x_tied2, svfloat16_t, -+ z0 = svcmla_f16_x (svptrue_b16 (), z1, z0, z2, 0), -+ z0 = svcmla_x (svptrue_b16 (), z1, z0, z2, 0)) -+ -+/* -+** ptrue_cmla_0_f16_x_tied3: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cmla_0_f16_x_tied3, svfloat16_t, -+ z0 = svcmla_f16_x (svptrue_b16 (), z1, z2, z0, 0), -+ z0 = svcmla_x (svptrue_b16 (), z1, z2, z0, 0)) -+ -+/* -+** ptrue_cmla_0_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cmla_0_f16_x_untied, svfloat16_t, -+ z0 = svcmla_f16_x (svptrue_b16 (), z1, z2, z3, 0), -+ z0 = svcmla_x (svptrue_b16 (), z1, z2, z3, 0)) -+ -+/* -+** ptrue_cmla_90_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cmla_90_f16_x_tied1, svfloat16_t, -+ z0 = svcmla_f16_x (svptrue_b16 (), z0, z1, z2, 90), -+ z0 = svcmla_x (svptrue_b16 (), z0, z1, z2, 90)) -+ -+/* -+** ptrue_cmla_90_f16_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cmla_90_f16_x_tied2, svfloat16_t, -+ z0 = svcmla_f16_x (svptrue_b16 (), z1, z0, z2, 90), -+ z0 = svcmla_x (svptrue_b16 (), z1, z0, z2, 90)) -+ -+/* -+** ptrue_cmla_90_f16_x_tied3: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cmla_90_f16_x_tied3, svfloat16_t, -+ z0 = svcmla_f16_x (svptrue_b16 (), z1, z2, z0, 90), -+ z0 = svcmla_x (svptrue_b16 (), z1, z2, z0, 90)) -+ -+/* -+** ptrue_cmla_90_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cmla_90_f16_x_untied, svfloat16_t, -+ z0 = svcmla_f16_x (svptrue_b16 (), z1, z2, z3, 90), -+ z0 = svcmla_x (svptrue_b16 (), z1, z2, z3, 90)) -+ -+/* -+** ptrue_cmla_180_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cmla_180_f16_x_tied1, svfloat16_t, -+ z0 = svcmla_f16_x (svptrue_b16 (), z0, z1, z2, 180), -+ z0 = svcmla_x (svptrue_b16 (), z0, z1, z2, 180)) -+ -+/* -+** ptrue_cmla_180_f16_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cmla_180_f16_x_tied2, svfloat16_t, -+ z0 = svcmla_f16_x (svptrue_b16 (), z1, z0, z2, 180), -+ z0 = svcmla_x (svptrue_b16 (), z1, z0, z2, 180)) -+ -+/* -+** ptrue_cmla_180_f16_x_tied3: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cmla_180_f16_x_tied3, svfloat16_t, -+ z0 = svcmla_f16_x (svptrue_b16 (), z1, z2, z0, 180), -+ z0 = svcmla_x (svptrue_b16 (), z1, z2, z0, 180)) -+ -+/* -+** ptrue_cmla_180_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cmla_180_f16_x_untied, svfloat16_t, -+ z0 = svcmla_f16_x (svptrue_b16 (), z1, z2, z3, 180), -+ z0 = svcmla_x (svptrue_b16 (), z1, z2, z3, 180)) -+ -+/* -+** ptrue_cmla_270_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cmla_270_f16_x_tied1, svfloat16_t, -+ z0 = svcmla_f16_x (svptrue_b16 (), z0, z1, z2, 270), -+ z0 = svcmla_x (svptrue_b16 (), z0, z1, z2, 270)) -+ -+/* -+** ptrue_cmla_270_f16_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cmla_270_f16_x_tied2, svfloat16_t, -+ z0 = svcmla_f16_x (svptrue_b16 (), z1, z0, z2, 270), -+ z0 = svcmla_x (svptrue_b16 (), z1, z0, z2, 270)) -+ -+/* -+** ptrue_cmla_270_f16_x_tied3: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cmla_270_f16_x_tied3, svfloat16_t, -+ z0 = svcmla_f16_x (svptrue_b16 (), z1, z2, z0, 270), -+ z0 = svcmla_x (svptrue_b16 (), z1, z2, z0, 270)) -+ -+/* -+** ptrue_cmla_270_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cmla_270_f16_x_untied, svfloat16_t, -+ z0 = svcmla_f16_x (svptrue_b16 (), z1, z2, z3, 270), -+ z0 = svcmla_x (svptrue_b16 (), z1, z2, z3, 270)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmla_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmla_f32.c -new file mode 100644 -index 000000000..b266738b2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmla_f32.c -@@ -0,0 +1,675 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmla_0_f32_m_tied1: -+** fcmla z0\.s, p0/m, z1\.s, z2\.s, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_0_f32_m_tied1, svfloat32_t, -+ z0 = svcmla_f32_m (p0, z0, z1, z2, 0), -+ z0 = svcmla_m (p0, z0, z1, z2, 0)) -+ -+/* -+** cmla_0_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcmla z0\.s, p0/m, \1\.s, z2\.s, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_0_f32_m_tied2, svfloat32_t, -+ z0 = svcmla_f32_m (p0, z1, z0, z2, 0), -+ z0 = svcmla_m (p0, z1, z0, z2, 0)) -+ -+/* -+** cmla_0_f32_m_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcmla z0\.s, p0/m, z2\.s, \1\.s, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_0_f32_m_tied3, svfloat32_t, -+ z0 = svcmla_f32_m (p0, z1, z2, z0, 0), -+ z0 = svcmla_m (p0, z1, z2, z0, 0)) -+ -+/* -+** cmla_0_f32_m_untied: -+** movprfx z0, z1 -+** fcmla z0\.s, p0/m, z2\.s, z3\.s, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_0_f32_m_untied, svfloat32_t, -+ z0 = svcmla_f32_m (p0, z1, z2, z3, 0), -+ z0 = svcmla_m (p0, z1, z2, z3, 0)) -+ -+/* -+** cmla_90_f32_m_tied1: -+** fcmla z0\.s, p0/m, z1\.s, z2\.s, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_90_f32_m_tied1, svfloat32_t, -+ z0 = svcmla_f32_m (p0, z0, z1, z2, 90), -+ z0 = svcmla_m (p0, z0, z1, z2, 90)) -+ -+/* -+** cmla_90_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcmla z0\.s, p0/m, \1\.s, z2\.s, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_90_f32_m_tied2, svfloat32_t, -+ z0 = svcmla_f32_m (p0, z1, z0, z2, 90), -+ z0 = svcmla_m (p0, z1, z0, z2, 90)) -+ -+/* -+** cmla_90_f32_m_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcmla z0\.s, p0/m, z2\.s, \1\.s, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_90_f32_m_tied3, svfloat32_t, -+ z0 = svcmla_f32_m (p0, z1, z2, z0, 90), -+ z0 = svcmla_m (p0, z1, z2, z0, 90)) -+ -+/* -+** cmla_90_f32_m_untied: -+** movprfx z0, z1 -+** fcmla z0\.s, p0/m, z2\.s, z3\.s, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_90_f32_m_untied, svfloat32_t, -+ z0 = svcmla_f32_m (p0, z1, z2, z3, 90), -+ z0 = svcmla_m (p0, z1, z2, z3, 90)) -+ -+/* -+** cmla_180_f32_m_tied1: -+** fcmla z0\.s, p0/m, z1\.s, z2\.s, #180 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_180_f32_m_tied1, svfloat32_t, -+ z0 = svcmla_f32_m (p0, z0, z1, z2, 180), -+ z0 = svcmla_m (p0, z0, z1, z2, 180)) -+ -+/* -+** cmla_180_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcmla z0\.s, p0/m, \1\.s, z2\.s, #180 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_180_f32_m_tied2, svfloat32_t, -+ z0 = svcmla_f32_m (p0, z1, z0, z2, 180), -+ z0 = svcmla_m (p0, z1, z0, z2, 180)) -+ -+/* -+** cmla_180_f32_m_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcmla z0\.s, p0/m, z2\.s, \1\.s, #180 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_180_f32_m_tied3, svfloat32_t, -+ z0 = svcmla_f32_m (p0, z1, z2, z0, 180), -+ z0 = svcmla_m (p0, z1, z2, z0, 180)) -+ -+/* -+** cmla_180_f32_m_untied: -+** movprfx z0, z1 -+** fcmla z0\.s, p0/m, z2\.s, z3\.s, #180 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_180_f32_m_untied, svfloat32_t, -+ z0 = svcmla_f32_m (p0, z1, z2, z3, 180), -+ z0 = svcmla_m (p0, z1, z2, z3, 180)) -+ -+/* -+** cmla_270_f32_m_tied1: -+** fcmla z0\.s, p0/m, z1\.s, z2\.s, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_270_f32_m_tied1, svfloat32_t, -+ z0 = svcmla_f32_m (p0, z0, z1, z2, 270), -+ z0 = svcmla_m (p0, z0, z1, z2, 270)) -+ -+/* -+** cmla_270_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcmla z0\.s, p0/m, \1\.s, z2\.s, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_270_f32_m_tied2, svfloat32_t, -+ z0 = svcmla_f32_m (p0, z1, z0, z2, 270), -+ z0 = svcmla_m (p0, z1, z0, z2, 270)) -+ -+/* -+** cmla_270_f32_m_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcmla z0\.s, p0/m, z2\.s, \1\.s, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_270_f32_m_tied3, svfloat32_t, -+ z0 = svcmla_f32_m (p0, z1, z2, z0, 270), -+ z0 = svcmla_m (p0, z1, z2, z0, 270)) -+ -+/* -+** cmla_270_f32_m_untied: -+** movprfx z0, z1 -+** fcmla z0\.s, p0/m, z2\.s, z3\.s, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_270_f32_m_untied, svfloat32_t, -+ z0 = svcmla_f32_m (p0, z1, z2, z3, 270), -+ z0 = svcmla_m (p0, z1, z2, z3, 270)) -+ -+/* -+** cmla_0_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fcmla z0\.s, p0/m, z1\.s, z2\.s, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_0_f32_z_tied1, svfloat32_t, -+ z0 = svcmla_f32_z (p0, z0, z1, z2, 0), -+ z0 = svcmla_z (p0, z0, z1, z2, 0)) -+ -+/* -+** cmla_0_f32_z_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, z1\.s -+** fcmla z0\.s, p0/m, \1\.s, z2\.s, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_0_f32_z_tied2, svfloat32_t, -+ z0 = svcmla_f32_z (p0, z1, z0, z2, 0), -+ z0 = svcmla_z (p0, z1, z0, z2, 0)) -+ -+/* -+** cmla_0_f32_z_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, z1\.s -+** fcmla z0\.s, p0/m, z2\.s, \1\.s, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_0_f32_z_tied3, svfloat32_t, -+ z0 = svcmla_f32_z (p0, z1, z2, z0, 0), -+ z0 = svcmla_z (p0, z1, z2, z0, 0)) -+ -+/* -+** cmla_0_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** fcmla z0\.s, p0/m, z2\.s, z3\.s, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_0_f32_z_untied, svfloat32_t, -+ z0 = svcmla_f32_z (p0, z1, z2, z3, 0), -+ z0 = svcmla_z (p0, z1, z2, z3, 0)) -+ -+/* -+** cmla_90_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fcmla z0\.s, p0/m, z1\.s, z2\.s, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_90_f32_z_tied1, svfloat32_t, -+ z0 = svcmla_f32_z (p0, z0, z1, z2, 90), -+ z0 = svcmla_z (p0, z0, z1, z2, 90)) -+ -+/* -+** cmla_90_f32_z_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, z1\.s -+** fcmla z0\.s, p0/m, \1\.s, z2\.s, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_90_f32_z_tied2, svfloat32_t, -+ z0 = svcmla_f32_z (p0, z1, z0, z2, 90), -+ z0 = svcmla_z (p0, z1, z0, z2, 90)) -+ -+/* -+** cmla_90_f32_z_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, z1\.s -+** fcmla z0\.s, p0/m, z2\.s, \1\.s, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_90_f32_z_tied3, svfloat32_t, -+ z0 = svcmla_f32_z (p0, z1, z2, z0, 90), -+ z0 = svcmla_z (p0, z1, z2, z0, 90)) -+ -+/* -+** cmla_90_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** fcmla z0\.s, p0/m, z2\.s, z3\.s, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_90_f32_z_untied, svfloat32_t, -+ z0 = svcmla_f32_z (p0, z1, z2, z3, 90), -+ z0 = svcmla_z (p0, z1, z2, z3, 90)) -+ -+/* -+** cmla_180_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fcmla z0\.s, p0/m, z1\.s, z2\.s, #180 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_180_f32_z_tied1, svfloat32_t, -+ z0 = svcmla_f32_z (p0, z0, z1, z2, 180), -+ z0 = svcmla_z (p0, z0, z1, z2, 180)) -+ -+/* -+** cmla_180_f32_z_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, z1\.s -+** fcmla z0\.s, p0/m, \1\.s, z2\.s, #180 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_180_f32_z_tied2, svfloat32_t, -+ z0 = svcmla_f32_z (p0, z1, z0, z2, 180), -+ z0 = svcmla_z (p0, z1, z0, z2, 180)) -+ -+/* -+** cmla_180_f32_z_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, z1\.s -+** fcmla z0\.s, p0/m, z2\.s, \1\.s, #180 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_180_f32_z_tied3, svfloat32_t, -+ z0 = svcmla_f32_z (p0, z1, z2, z0, 180), -+ z0 = svcmla_z (p0, z1, z2, z0, 180)) -+ -+/* -+** cmla_180_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** fcmla z0\.s, p0/m, z2\.s, z3\.s, #180 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_180_f32_z_untied, svfloat32_t, -+ z0 = svcmla_f32_z (p0, z1, z2, z3, 180), -+ z0 = svcmla_z (p0, z1, z2, z3, 180)) -+ -+/* -+** cmla_270_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fcmla z0\.s, p0/m, z1\.s, z2\.s, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_270_f32_z_tied1, svfloat32_t, -+ z0 = svcmla_f32_z (p0, z0, z1, z2, 270), -+ z0 = svcmla_z (p0, z0, z1, z2, 270)) -+ -+/* -+** cmla_270_f32_z_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, z1\.s -+** fcmla z0\.s, p0/m, \1\.s, z2\.s, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_270_f32_z_tied2, svfloat32_t, -+ z0 = svcmla_f32_z (p0, z1, z0, z2, 270), -+ z0 = svcmla_z (p0, z1, z0, z2, 270)) -+ -+/* -+** cmla_270_f32_z_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, z1\.s -+** fcmla z0\.s, p0/m, z2\.s, \1\.s, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_270_f32_z_tied3, svfloat32_t, -+ z0 = svcmla_f32_z (p0, z1, z2, z0, 270), -+ z0 = svcmla_z (p0, z1, z2, z0, 270)) -+ -+/* -+** cmla_270_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** fcmla z0\.s, p0/m, z2\.s, z3\.s, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_270_f32_z_untied, svfloat32_t, -+ z0 = svcmla_f32_z (p0, z1, z2, z3, 270), -+ z0 = svcmla_z (p0, z1, z2, z3, 270)) -+ -+/* -+** cmla_0_f32_x_tied1: -+** fcmla z0\.s, p0/m, z1\.s, z2\.s, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_0_f32_x_tied1, svfloat32_t, -+ z0 = svcmla_f32_x (p0, z0, z1, z2, 0), -+ z0 = svcmla_x (p0, z0, z1, z2, 0)) -+ -+/* -+** cmla_0_f32_x_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcmla z0\.s, p0/m, \1\.s, z2\.s, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_0_f32_x_tied2, svfloat32_t, -+ z0 = svcmla_f32_x (p0, z1, z0, z2, 0), -+ z0 = svcmla_x (p0, z1, z0, z2, 0)) -+ -+/* -+** cmla_0_f32_x_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcmla z0\.s, p0/m, z2\.s, \1\.s, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_0_f32_x_tied3, svfloat32_t, -+ z0 = svcmla_f32_x (p0, z1, z2, z0, 0), -+ z0 = svcmla_x (p0, z1, z2, z0, 0)) -+ -+/* -+** cmla_0_f32_x_untied: -+** movprfx z0, z1 -+** fcmla z0\.s, p0/m, z2\.s, z3\.s, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_0_f32_x_untied, svfloat32_t, -+ z0 = svcmla_f32_x (p0, z1, z2, z3, 0), -+ z0 = svcmla_x (p0, z1, z2, z3, 0)) -+ -+/* -+** cmla_90_f32_x_tied1: -+** fcmla z0\.s, p0/m, z1\.s, z2\.s, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_90_f32_x_tied1, svfloat32_t, -+ z0 = svcmla_f32_x (p0, z0, z1, z2, 90), -+ z0 = svcmla_x (p0, z0, z1, z2, 90)) -+ -+/* -+** cmla_90_f32_x_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcmla z0\.s, p0/m, \1\.s, z2\.s, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_90_f32_x_tied2, svfloat32_t, -+ z0 = svcmla_f32_x (p0, z1, z0, z2, 90), -+ z0 = svcmla_x (p0, z1, z0, z2, 90)) -+ -+/* -+** cmla_90_f32_x_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcmla z0\.s, p0/m, z2\.s, \1\.s, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_90_f32_x_tied3, svfloat32_t, -+ z0 = svcmla_f32_x (p0, z1, z2, z0, 90), -+ z0 = svcmla_x (p0, z1, z2, z0, 90)) -+ -+/* -+** cmla_90_f32_x_untied: -+** movprfx z0, z1 -+** fcmla z0\.s, p0/m, z2\.s, z3\.s, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_90_f32_x_untied, svfloat32_t, -+ z0 = svcmla_f32_x (p0, z1, z2, z3, 90), -+ z0 = svcmla_x (p0, z1, z2, z3, 90)) -+ -+/* -+** cmla_180_f32_x_tied1: -+** fcmla z0\.s, p0/m, z1\.s, z2\.s, #180 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_180_f32_x_tied1, svfloat32_t, -+ z0 = svcmla_f32_x (p0, z0, z1, z2, 180), -+ z0 = svcmla_x (p0, z0, z1, z2, 180)) -+ -+/* -+** cmla_180_f32_x_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcmla z0\.s, p0/m, \1\.s, z2\.s, #180 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_180_f32_x_tied2, svfloat32_t, -+ z0 = svcmla_f32_x (p0, z1, z0, z2, 180), -+ z0 = svcmla_x (p0, z1, z0, z2, 180)) -+ -+/* -+** cmla_180_f32_x_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcmla z0\.s, p0/m, z2\.s, \1\.s, #180 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_180_f32_x_tied3, svfloat32_t, -+ z0 = svcmla_f32_x (p0, z1, z2, z0, 180), -+ z0 = svcmla_x (p0, z1, z2, z0, 180)) -+ -+/* -+** cmla_180_f32_x_untied: -+** movprfx z0, z1 -+** fcmla z0\.s, p0/m, z2\.s, z3\.s, #180 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_180_f32_x_untied, svfloat32_t, -+ z0 = svcmla_f32_x (p0, z1, z2, z3, 180), -+ z0 = svcmla_x (p0, z1, z2, z3, 180)) -+ -+/* -+** cmla_270_f32_x_tied1: -+** fcmla z0\.s, p0/m, z1\.s, z2\.s, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_270_f32_x_tied1, svfloat32_t, -+ z0 = svcmla_f32_x (p0, z0, z1, z2, 270), -+ z0 = svcmla_x (p0, z0, z1, z2, 270)) -+ -+/* -+** cmla_270_f32_x_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcmla z0\.s, p0/m, \1\.s, z2\.s, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_270_f32_x_tied2, svfloat32_t, -+ z0 = svcmla_f32_x (p0, z1, z0, z2, 270), -+ z0 = svcmla_x (p0, z1, z0, z2, 270)) -+ -+/* -+** cmla_270_f32_x_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcmla z0\.s, p0/m, z2\.s, \1\.s, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_270_f32_x_tied3, svfloat32_t, -+ z0 = svcmla_f32_x (p0, z1, z2, z0, 270), -+ z0 = svcmla_x (p0, z1, z2, z0, 270)) -+ -+/* -+** cmla_270_f32_x_untied: -+** movprfx z0, z1 -+** fcmla z0\.s, p0/m, z2\.s, z3\.s, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_270_f32_x_untied, svfloat32_t, -+ z0 = svcmla_f32_x (p0, z1, z2, z3, 270), -+ z0 = svcmla_x (p0, z1, z2, z3, 270)) -+ -+/* -+** ptrue_cmla_0_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cmla_0_f32_x_tied1, svfloat32_t, -+ z0 = svcmla_f32_x (svptrue_b32 (), z0, z1, z2, 0), -+ z0 = svcmla_x (svptrue_b32 (), z0, z1, z2, 0)) -+ -+/* -+** ptrue_cmla_0_f32_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cmla_0_f32_x_tied2, svfloat32_t, -+ z0 = svcmla_f32_x (svptrue_b32 (), z1, z0, z2, 0), -+ z0 = svcmla_x (svptrue_b32 (), z1, z0, z2, 0)) -+ -+/* -+** ptrue_cmla_0_f32_x_tied3: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cmla_0_f32_x_tied3, svfloat32_t, -+ z0 = svcmla_f32_x (svptrue_b32 (), z1, z2, z0, 0), -+ z0 = svcmla_x (svptrue_b32 (), z1, z2, z0, 0)) -+ -+/* -+** ptrue_cmla_0_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cmla_0_f32_x_untied, svfloat32_t, -+ z0 = svcmla_f32_x (svptrue_b32 (), z1, z2, z3, 0), -+ z0 = svcmla_x (svptrue_b32 (), z1, z2, z3, 0)) -+ -+/* -+** ptrue_cmla_90_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cmla_90_f32_x_tied1, svfloat32_t, -+ z0 = svcmla_f32_x (svptrue_b32 (), z0, z1, z2, 90), -+ z0 = svcmla_x (svptrue_b32 (), z0, z1, z2, 90)) -+ -+/* -+** ptrue_cmla_90_f32_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cmla_90_f32_x_tied2, svfloat32_t, -+ z0 = svcmla_f32_x (svptrue_b32 (), z1, z0, z2, 90), -+ z0 = svcmla_x (svptrue_b32 (), z1, z0, z2, 90)) -+ -+/* -+** ptrue_cmla_90_f32_x_tied3: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cmla_90_f32_x_tied3, svfloat32_t, -+ z0 = svcmla_f32_x (svptrue_b32 (), z1, z2, z0, 90), -+ z0 = svcmla_x (svptrue_b32 (), z1, z2, z0, 90)) -+ -+/* -+** ptrue_cmla_90_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cmla_90_f32_x_untied, svfloat32_t, -+ z0 = svcmla_f32_x (svptrue_b32 (), z1, z2, z3, 90), -+ z0 = svcmla_x (svptrue_b32 (), z1, z2, z3, 90)) -+ -+/* -+** ptrue_cmla_180_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cmla_180_f32_x_tied1, svfloat32_t, -+ z0 = svcmla_f32_x (svptrue_b32 (), z0, z1, z2, 180), -+ z0 = svcmla_x (svptrue_b32 (), z0, z1, z2, 180)) -+ -+/* -+** ptrue_cmla_180_f32_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cmla_180_f32_x_tied2, svfloat32_t, -+ z0 = svcmla_f32_x (svptrue_b32 (), z1, z0, z2, 180), -+ z0 = svcmla_x (svptrue_b32 (), z1, z0, z2, 180)) -+ -+/* -+** ptrue_cmla_180_f32_x_tied3: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cmla_180_f32_x_tied3, svfloat32_t, -+ z0 = svcmla_f32_x (svptrue_b32 (), z1, z2, z0, 180), -+ z0 = svcmla_x (svptrue_b32 (), z1, z2, z0, 180)) -+ -+/* -+** ptrue_cmla_180_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cmla_180_f32_x_untied, svfloat32_t, -+ z0 = svcmla_f32_x (svptrue_b32 (), z1, z2, z3, 180), -+ z0 = svcmla_x (svptrue_b32 (), z1, z2, z3, 180)) -+ -+/* -+** ptrue_cmla_270_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cmla_270_f32_x_tied1, svfloat32_t, -+ z0 = svcmla_f32_x (svptrue_b32 (), z0, z1, z2, 270), -+ z0 = svcmla_x (svptrue_b32 (), z0, z1, z2, 270)) -+ -+/* -+** ptrue_cmla_270_f32_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cmla_270_f32_x_tied2, svfloat32_t, -+ z0 = svcmla_f32_x (svptrue_b32 (), z1, z0, z2, 270), -+ z0 = svcmla_x (svptrue_b32 (), z1, z0, z2, 270)) -+ -+/* -+** ptrue_cmla_270_f32_x_tied3: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cmla_270_f32_x_tied3, svfloat32_t, -+ z0 = svcmla_f32_x (svptrue_b32 (), z1, z2, z0, 270), -+ z0 = svcmla_x (svptrue_b32 (), z1, z2, z0, 270)) -+ -+/* -+** ptrue_cmla_270_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cmla_270_f32_x_untied, svfloat32_t, -+ z0 = svcmla_f32_x (svptrue_b32 (), z1, z2, z3, 270), -+ z0 = svcmla_x (svptrue_b32 (), z1, z2, z3, 270)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmla_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmla_f64.c -new file mode 100644 -index 000000000..024ae5ce3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmla_f64.c -@@ -0,0 +1,675 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmla_0_f64_m_tied1: -+** fcmla z0\.d, p0/m, z1\.d, z2\.d, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_0_f64_m_tied1, svfloat64_t, -+ z0 = svcmla_f64_m (p0, z0, z1, z2, 0), -+ z0 = svcmla_m (p0, z0, z1, z2, 0)) -+ -+/* -+** cmla_0_f64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fcmla z0\.d, p0/m, \1, z2\.d, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_0_f64_m_tied2, svfloat64_t, -+ z0 = svcmla_f64_m (p0, z1, z0, z2, 0), -+ z0 = svcmla_m (p0, z1, z0, z2, 0)) -+ -+/* -+** cmla_0_f64_m_tied3: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fcmla z0\.d, p0/m, z2\.d, \1, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_0_f64_m_tied3, svfloat64_t, -+ z0 = svcmla_f64_m (p0, z1, z2, z0, 0), -+ z0 = svcmla_m (p0, z1, z2, z0, 0)) -+ -+/* -+** cmla_0_f64_m_untied: -+** movprfx z0, z1 -+** fcmla z0\.d, p0/m, z2\.d, z3\.d, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_0_f64_m_untied, svfloat64_t, -+ z0 = svcmla_f64_m (p0, z1, z2, z3, 0), -+ z0 = svcmla_m (p0, z1, z2, z3, 0)) -+ -+/* -+** cmla_90_f64_m_tied1: -+** fcmla z0\.d, p0/m, z1\.d, z2\.d, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_90_f64_m_tied1, svfloat64_t, -+ z0 = svcmla_f64_m (p0, z0, z1, z2, 90), -+ z0 = svcmla_m (p0, z0, z1, z2, 90)) -+ -+/* -+** cmla_90_f64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fcmla z0\.d, p0/m, \1, z2\.d, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_90_f64_m_tied2, svfloat64_t, -+ z0 = svcmla_f64_m (p0, z1, z0, z2, 90), -+ z0 = svcmla_m (p0, z1, z0, z2, 90)) -+ -+/* -+** cmla_90_f64_m_tied3: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fcmla z0\.d, p0/m, z2\.d, \1, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_90_f64_m_tied3, svfloat64_t, -+ z0 = svcmla_f64_m (p0, z1, z2, z0, 90), -+ z0 = svcmla_m (p0, z1, z2, z0, 90)) -+ -+/* -+** cmla_90_f64_m_untied: -+** movprfx z0, z1 -+** fcmla z0\.d, p0/m, z2\.d, z3\.d, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_90_f64_m_untied, svfloat64_t, -+ z0 = svcmla_f64_m (p0, z1, z2, z3, 90), -+ z0 = svcmla_m (p0, z1, z2, z3, 90)) -+ -+/* -+** cmla_180_f64_m_tied1: -+** fcmla z0\.d, p0/m, z1\.d, z2\.d, #180 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_180_f64_m_tied1, svfloat64_t, -+ z0 = svcmla_f64_m (p0, z0, z1, z2, 180), -+ z0 = svcmla_m (p0, z0, z1, z2, 180)) -+ -+/* -+** cmla_180_f64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fcmla z0\.d, p0/m, \1, z2\.d, #180 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_180_f64_m_tied2, svfloat64_t, -+ z0 = svcmla_f64_m (p0, z1, z0, z2, 180), -+ z0 = svcmla_m (p0, z1, z0, z2, 180)) -+ -+/* -+** cmla_180_f64_m_tied3: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fcmla z0\.d, p0/m, z2\.d, \1, #180 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_180_f64_m_tied3, svfloat64_t, -+ z0 = svcmla_f64_m (p0, z1, z2, z0, 180), -+ z0 = svcmla_m (p0, z1, z2, z0, 180)) -+ -+/* -+** cmla_180_f64_m_untied: -+** movprfx z0, z1 -+** fcmla z0\.d, p0/m, z2\.d, z3\.d, #180 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_180_f64_m_untied, svfloat64_t, -+ z0 = svcmla_f64_m (p0, z1, z2, z3, 180), -+ z0 = svcmla_m (p0, z1, z2, z3, 180)) -+ -+/* -+** cmla_270_f64_m_tied1: -+** fcmla z0\.d, p0/m, z1\.d, z2\.d, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_270_f64_m_tied1, svfloat64_t, -+ z0 = svcmla_f64_m (p0, z0, z1, z2, 270), -+ z0 = svcmla_m (p0, z0, z1, z2, 270)) -+ -+/* -+** cmla_270_f64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fcmla z0\.d, p0/m, \1, z2\.d, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_270_f64_m_tied2, svfloat64_t, -+ z0 = svcmla_f64_m (p0, z1, z0, z2, 270), -+ z0 = svcmla_m (p0, z1, z0, z2, 270)) -+ -+/* -+** cmla_270_f64_m_tied3: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fcmla z0\.d, p0/m, z2\.d, \1, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_270_f64_m_tied3, svfloat64_t, -+ z0 = svcmla_f64_m (p0, z1, z2, z0, 270), -+ z0 = svcmla_m (p0, z1, z2, z0, 270)) -+ -+/* -+** cmla_270_f64_m_untied: -+** movprfx z0, z1 -+** fcmla z0\.d, p0/m, z2\.d, z3\.d, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_270_f64_m_untied, svfloat64_t, -+ z0 = svcmla_f64_m (p0, z1, z2, z3, 270), -+ z0 = svcmla_m (p0, z1, z2, z3, 270)) -+ -+/* -+** cmla_0_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fcmla z0\.d, p0/m, z1\.d, z2\.d, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_0_f64_z_tied1, svfloat64_t, -+ z0 = svcmla_f64_z (p0, z0, z1, z2, 0), -+ z0 = svcmla_z (p0, z0, z1, z2, 0)) -+ -+/* -+** cmla_0_f64_z_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, z1\.d -+** fcmla z0\.d, p0/m, \1, z2\.d, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_0_f64_z_tied2, svfloat64_t, -+ z0 = svcmla_f64_z (p0, z1, z0, z2, 0), -+ z0 = svcmla_z (p0, z1, z0, z2, 0)) -+ -+/* -+** cmla_0_f64_z_tied3: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, z1\.d -+** fcmla z0\.d, p0/m, z2\.d, \1, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_0_f64_z_tied3, svfloat64_t, -+ z0 = svcmla_f64_z (p0, z1, z2, z0, 0), -+ z0 = svcmla_z (p0, z1, z2, z0, 0)) -+ -+/* -+** cmla_0_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** fcmla z0\.d, p0/m, z2\.d, z3\.d, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_0_f64_z_untied, svfloat64_t, -+ z0 = svcmla_f64_z (p0, z1, z2, z3, 0), -+ z0 = svcmla_z (p0, z1, z2, z3, 0)) -+ -+/* -+** cmla_90_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fcmla z0\.d, p0/m, z1\.d, z2\.d, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_90_f64_z_tied1, svfloat64_t, -+ z0 = svcmla_f64_z (p0, z0, z1, z2, 90), -+ z0 = svcmla_z (p0, z0, z1, z2, 90)) -+ -+/* -+** cmla_90_f64_z_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, z1\.d -+** fcmla z0\.d, p0/m, \1, z2\.d, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_90_f64_z_tied2, svfloat64_t, -+ z0 = svcmla_f64_z (p0, z1, z0, z2, 90), -+ z0 = svcmla_z (p0, z1, z0, z2, 90)) -+ -+/* -+** cmla_90_f64_z_tied3: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, z1\.d -+** fcmla z0\.d, p0/m, z2\.d, \1, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_90_f64_z_tied3, svfloat64_t, -+ z0 = svcmla_f64_z (p0, z1, z2, z0, 90), -+ z0 = svcmla_z (p0, z1, z2, z0, 90)) -+ -+/* -+** cmla_90_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** fcmla z0\.d, p0/m, z2\.d, z3\.d, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_90_f64_z_untied, svfloat64_t, -+ z0 = svcmla_f64_z (p0, z1, z2, z3, 90), -+ z0 = svcmla_z (p0, z1, z2, z3, 90)) -+ -+/* -+** cmla_180_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fcmla z0\.d, p0/m, z1\.d, z2\.d, #180 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_180_f64_z_tied1, svfloat64_t, -+ z0 = svcmla_f64_z (p0, z0, z1, z2, 180), -+ z0 = svcmla_z (p0, z0, z1, z2, 180)) -+ -+/* -+** cmla_180_f64_z_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, z1\.d -+** fcmla z0\.d, p0/m, \1, z2\.d, #180 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_180_f64_z_tied2, svfloat64_t, -+ z0 = svcmla_f64_z (p0, z1, z0, z2, 180), -+ z0 = svcmla_z (p0, z1, z0, z2, 180)) -+ -+/* -+** cmla_180_f64_z_tied3: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, z1\.d -+** fcmla z0\.d, p0/m, z2\.d, \1, #180 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_180_f64_z_tied3, svfloat64_t, -+ z0 = svcmla_f64_z (p0, z1, z2, z0, 180), -+ z0 = svcmla_z (p0, z1, z2, z0, 180)) -+ -+/* -+** cmla_180_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** fcmla z0\.d, p0/m, z2\.d, z3\.d, #180 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_180_f64_z_untied, svfloat64_t, -+ z0 = svcmla_f64_z (p0, z1, z2, z3, 180), -+ z0 = svcmla_z (p0, z1, z2, z3, 180)) -+ -+/* -+** cmla_270_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fcmla z0\.d, p0/m, z1\.d, z2\.d, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_270_f64_z_tied1, svfloat64_t, -+ z0 = svcmla_f64_z (p0, z0, z1, z2, 270), -+ z0 = svcmla_z (p0, z0, z1, z2, 270)) -+ -+/* -+** cmla_270_f64_z_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, z1\.d -+** fcmla z0\.d, p0/m, \1, z2\.d, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_270_f64_z_tied2, svfloat64_t, -+ z0 = svcmla_f64_z (p0, z1, z0, z2, 270), -+ z0 = svcmla_z (p0, z1, z0, z2, 270)) -+ -+/* -+** cmla_270_f64_z_tied3: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, z1\.d -+** fcmla z0\.d, p0/m, z2\.d, \1, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_270_f64_z_tied3, svfloat64_t, -+ z0 = svcmla_f64_z (p0, z1, z2, z0, 270), -+ z0 = svcmla_z (p0, z1, z2, z0, 270)) -+ -+/* -+** cmla_270_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** fcmla z0\.d, p0/m, z2\.d, z3\.d, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_270_f64_z_untied, svfloat64_t, -+ z0 = svcmla_f64_z (p0, z1, z2, z3, 270), -+ z0 = svcmla_z (p0, z1, z2, z3, 270)) -+ -+/* -+** cmla_0_f64_x_tied1: -+** fcmla z0\.d, p0/m, z1\.d, z2\.d, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_0_f64_x_tied1, svfloat64_t, -+ z0 = svcmla_f64_x (p0, z0, z1, z2, 0), -+ z0 = svcmla_x (p0, z0, z1, z2, 0)) -+ -+/* -+** cmla_0_f64_x_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fcmla z0\.d, p0/m, \1, z2\.d, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_0_f64_x_tied2, svfloat64_t, -+ z0 = svcmla_f64_x (p0, z1, z0, z2, 0), -+ z0 = svcmla_x (p0, z1, z0, z2, 0)) -+ -+/* -+** cmla_0_f64_x_tied3: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fcmla z0\.d, p0/m, z2\.d, \1, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_0_f64_x_tied3, svfloat64_t, -+ z0 = svcmla_f64_x (p0, z1, z2, z0, 0), -+ z0 = svcmla_x (p0, z1, z2, z0, 0)) -+ -+/* -+** cmla_0_f64_x_untied: -+** movprfx z0, z1 -+** fcmla z0\.d, p0/m, z2\.d, z3\.d, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_0_f64_x_untied, svfloat64_t, -+ z0 = svcmla_f64_x (p0, z1, z2, z3, 0), -+ z0 = svcmla_x (p0, z1, z2, z3, 0)) -+ -+/* -+** cmla_90_f64_x_tied1: -+** fcmla z0\.d, p0/m, z1\.d, z2\.d, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_90_f64_x_tied1, svfloat64_t, -+ z0 = svcmla_f64_x (p0, z0, z1, z2, 90), -+ z0 = svcmla_x (p0, z0, z1, z2, 90)) -+ -+/* -+** cmla_90_f64_x_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fcmla z0\.d, p0/m, \1, z2\.d, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_90_f64_x_tied2, svfloat64_t, -+ z0 = svcmla_f64_x (p0, z1, z0, z2, 90), -+ z0 = svcmla_x (p0, z1, z0, z2, 90)) -+ -+/* -+** cmla_90_f64_x_tied3: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fcmla z0\.d, p0/m, z2\.d, \1, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_90_f64_x_tied3, svfloat64_t, -+ z0 = svcmla_f64_x (p0, z1, z2, z0, 90), -+ z0 = svcmla_x (p0, z1, z2, z0, 90)) -+ -+/* -+** cmla_90_f64_x_untied: -+** movprfx z0, z1 -+** fcmla z0\.d, p0/m, z2\.d, z3\.d, #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_90_f64_x_untied, svfloat64_t, -+ z0 = svcmla_f64_x (p0, z1, z2, z3, 90), -+ z0 = svcmla_x (p0, z1, z2, z3, 90)) -+ -+/* -+** cmla_180_f64_x_tied1: -+** fcmla z0\.d, p0/m, z1\.d, z2\.d, #180 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_180_f64_x_tied1, svfloat64_t, -+ z0 = svcmla_f64_x (p0, z0, z1, z2, 180), -+ z0 = svcmla_x (p0, z0, z1, z2, 180)) -+ -+/* -+** cmla_180_f64_x_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fcmla z0\.d, p0/m, \1, z2\.d, #180 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_180_f64_x_tied2, svfloat64_t, -+ z0 = svcmla_f64_x (p0, z1, z0, z2, 180), -+ z0 = svcmla_x (p0, z1, z0, z2, 180)) -+ -+/* -+** cmla_180_f64_x_tied3: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fcmla z0\.d, p0/m, z2\.d, \1, #180 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_180_f64_x_tied3, svfloat64_t, -+ z0 = svcmla_f64_x (p0, z1, z2, z0, 180), -+ z0 = svcmla_x (p0, z1, z2, z0, 180)) -+ -+/* -+** cmla_180_f64_x_untied: -+** movprfx z0, z1 -+** fcmla z0\.d, p0/m, z2\.d, z3\.d, #180 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_180_f64_x_untied, svfloat64_t, -+ z0 = svcmla_f64_x (p0, z1, z2, z3, 180), -+ z0 = svcmla_x (p0, z1, z2, z3, 180)) -+ -+/* -+** cmla_270_f64_x_tied1: -+** fcmla z0\.d, p0/m, z1\.d, z2\.d, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_270_f64_x_tied1, svfloat64_t, -+ z0 = svcmla_f64_x (p0, z0, z1, z2, 270), -+ z0 = svcmla_x (p0, z0, z1, z2, 270)) -+ -+/* -+** cmla_270_f64_x_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fcmla z0\.d, p0/m, \1, z2\.d, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_270_f64_x_tied2, svfloat64_t, -+ z0 = svcmla_f64_x (p0, z1, z0, z2, 270), -+ z0 = svcmla_x (p0, z1, z0, z2, 270)) -+ -+/* -+** cmla_270_f64_x_tied3: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fcmla z0\.d, p0/m, z2\.d, \1, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_270_f64_x_tied3, svfloat64_t, -+ z0 = svcmla_f64_x (p0, z1, z2, z0, 270), -+ z0 = svcmla_x (p0, z1, z2, z0, 270)) -+ -+/* -+** cmla_270_f64_x_untied: -+** movprfx z0, z1 -+** fcmla z0\.d, p0/m, z2\.d, z3\.d, #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_270_f64_x_untied, svfloat64_t, -+ z0 = svcmla_f64_x (p0, z1, z2, z3, 270), -+ z0 = svcmla_x (p0, z1, z2, z3, 270)) -+ -+/* -+** ptrue_cmla_0_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cmla_0_f64_x_tied1, svfloat64_t, -+ z0 = svcmla_f64_x (svptrue_b64 (), z0, z1, z2, 0), -+ z0 = svcmla_x (svptrue_b64 (), z0, z1, z2, 0)) -+ -+/* -+** ptrue_cmla_0_f64_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cmla_0_f64_x_tied2, svfloat64_t, -+ z0 = svcmla_f64_x (svptrue_b64 (), z1, z0, z2, 0), -+ z0 = svcmla_x (svptrue_b64 (), z1, z0, z2, 0)) -+ -+/* -+** ptrue_cmla_0_f64_x_tied3: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cmla_0_f64_x_tied3, svfloat64_t, -+ z0 = svcmla_f64_x (svptrue_b64 (), z1, z2, z0, 0), -+ z0 = svcmla_x (svptrue_b64 (), z1, z2, z0, 0)) -+ -+/* -+** ptrue_cmla_0_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cmla_0_f64_x_untied, svfloat64_t, -+ z0 = svcmla_f64_x (svptrue_b64 (), z1, z2, z3, 0), -+ z0 = svcmla_x (svptrue_b64 (), z1, z2, z3, 0)) -+ -+/* -+** ptrue_cmla_90_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cmla_90_f64_x_tied1, svfloat64_t, -+ z0 = svcmla_f64_x (svptrue_b64 (), z0, z1, z2, 90), -+ z0 = svcmla_x (svptrue_b64 (), z0, z1, z2, 90)) -+ -+/* -+** ptrue_cmla_90_f64_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cmla_90_f64_x_tied2, svfloat64_t, -+ z0 = svcmla_f64_x (svptrue_b64 (), z1, z0, z2, 90), -+ z0 = svcmla_x (svptrue_b64 (), z1, z0, z2, 90)) -+ -+/* -+** ptrue_cmla_90_f64_x_tied3: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cmla_90_f64_x_tied3, svfloat64_t, -+ z0 = svcmla_f64_x (svptrue_b64 (), z1, z2, z0, 90), -+ z0 = svcmla_x (svptrue_b64 (), z1, z2, z0, 90)) -+ -+/* -+** ptrue_cmla_90_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cmla_90_f64_x_untied, svfloat64_t, -+ z0 = svcmla_f64_x (svptrue_b64 (), z1, z2, z3, 90), -+ z0 = svcmla_x (svptrue_b64 (), z1, z2, z3, 90)) -+ -+/* -+** ptrue_cmla_180_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cmla_180_f64_x_tied1, svfloat64_t, -+ z0 = svcmla_f64_x (svptrue_b64 (), z0, z1, z2, 180), -+ z0 = svcmla_x (svptrue_b64 (), z0, z1, z2, 180)) -+ -+/* -+** ptrue_cmla_180_f64_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cmla_180_f64_x_tied2, svfloat64_t, -+ z0 = svcmla_f64_x (svptrue_b64 (), z1, z0, z2, 180), -+ z0 = svcmla_x (svptrue_b64 (), z1, z0, z2, 180)) -+ -+/* -+** ptrue_cmla_180_f64_x_tied3: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cmla_180_f64_x_tied3, svfloat64_t, -+ z0 = svcmla_f64_x (svptrue_b64 (), z1, z2, z0, 180), -+ z0 = svcmla_x (svptrue_b64 (), z1, z2, z0, 180)) -+ -+/* -+** ptrue_cmla_180_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cmla_180_f64_x_untied, svfloat64_t, -+ z0 = svcmla_f64_x (svptrue_b64 (), z1, z2, z3, 180), -+ z0 = svcmla_x (svptrue_b64 (), z1, z2, z3, 180)) -+ -+/* -+** ptrue_cmla_270_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cmla_270_f64_x_tied1, svfloat64_t, -+ z0 = svcmla_f64_x (svptrue_b64 (), z0, z1, z2, 270), -+ z0 = svcmla_x (svptrue_b64 (), z0, z1, z2, 270)) -+ -+/* -+** ptrue_cmla_270_f64_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cmla_270_f64_x_tied2, svfloat64_t, -+ z0 = svcmla_f64_x (svptrue_b64 (), z1, z0, z2, 270), -+ z0 = svcmla_x (svptrue_b64 (), z1, z0, z2, 270)) -+ -+/* -+** ptrue_cmla_270_f64_x_tied3: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cmla_270_f64_x_tied3, svfloat64_t, -+ z0 = svcmla_f64_x (svptrue_b64 (), z1, z2, z0, 270), -+ z0 = svcmla_x (svptrue_b64 (), z1, z2, z0, 270)) -+ -+/* -+** ptrue_cmla_270_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_cmla_270_f64_x_untied, svfloat64_t, -+ z0 = svcmla_f64_x (svptrue_b64 (), z1, z2, z3, 270), -+ z0 = svcmla_x (svptrue_b64 (), z1, z2, z3, 270)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmla_lane_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmla_lane_f16.c -new file mode 100644 -index 000000000..16f1b77ad ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmla_lane_f16.c -@@ -0,0 +1,194 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmla_lane_0_0_f16_tied1: -+** fcmla z0\.h, z1\.h, z2\.h\[0\], #0 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_lane_0_0_f16_tied1, svfloat16_t, -+ z0 = svcmla_lane_f16 (z0, z1, z2, 0, 0), -+ z0 = svcmla_lane (z0, z1, z2, 0, 0)) -+ -+/* -+** cmla_lane_0_0_f16_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcmla z0\.h, \1\.h, z2\.h\[0\], #0 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_lane_0_0_f16_tied2, svfloat16_t, -+ z0 = svcmla_lane_f16 (z1, z0, z2, 0, 0), -+ z0 = svcmla_lane (z1, z0, z2, 0, 0)) -+ -+/* -+** cmla_lane_0_0_f16_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcmla z0\.h, z2\.h, \1\.h\[0\], #0 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_lane_0_0_f16_tied3, svfloat16_t, -+ z0 = svcmla_lane_f16 (z1, z2, z0, 0, 0), -+ z0 = svcmla_lane (z1, z2, z0, 0, 0)) -+ -+/* -+** cmla_lane_0_0_f16_untied: -+** movprfx z0, z1 -+** fcmla z0\.h, z2\.h, z3\.h\[0\], #0 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_lane_0_0_f16_untied, svfloat16_t, -+ z0 = svcmla_lane_f16 (z1, z2, z3, 0, 0), -+ z0 = svcmla_lane (z1, z2, z3, 0, 0)) -+ -+/* -+** cmla_lane_0_90_f16_tied1: -+** fcmla z0\.h, z1\.h, z2\.h\[0\], #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_lane_0_90_f16_tied1, svfloat16_t, -+ z0 = svcmla_lane_f16 (z0, z1, z2, 0, 90), -+ z0 = svcmla_lane (z0, z1, z2, 0, 90)) -+ -+/* -+** cmla_lane_0_90_f16_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcmla z0\.h, \1\.h, z2\.h\[0\], #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_lane_0_90_f16_tied2, svfloat16_t, -+ z0 = svcmla_lane_f16 (z1, z0, z2, 0, 90), -+ z0 = svcmla_lane (z1, z0, z2, 0, 90)) -+ -+/* -+** cmla_lane_0_90_f16_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcmla z0\.h, z2\.h, \1\.h\[0\], #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_lane_0_90_f16_tied3, svfloat16_t, -+ z0 = svcmla_lane_f16 (z1, z2, z0, 0, 90), -+ z0 = svcmla_lane (z1, z2, z0, 0, 90)) -+ -+/* -+** cmla_lane_0_90_f16_untied: -+** movprfx z0, z1 -+** fcmla z0\.h, z2\.h, z3\.h\[0\], #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_lane_0_90_f16_untied, svfloat16_t, -+ z0 = svcmla_lane_f16 (z1, z2, z3, 0, 90), -+ z0 = svcmla_lane (z1, z2, z3, 0, 90)) -+ -+/* -+** cmla_lane_0_180_f16_tied1: -+** fcmla z0\.h, z1\.h, z2\.h\[0\], #180 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_lane_0_180_f16_tied1, svfloat16_t, -+ z0 = svcmla_lane_f16 (z0, z1, z2, 0, 180), -+ z0 = svcmla_lane (z0, z1, z2, 0, 180)) -+ -+/* -+** cmla_lane_0_180_f16_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcmla z0\.h, \1\.h, z2\.h\[0\], #180 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_lane_0_180_f16_tied2, svfloat16_t, -+ z0 = svcmla_lane_f16 (z1, z0, z2, 0, 180), -+ z0 = svcmla_lane (z1, z0, z2, 0, 180)) -+ -+/* -+** cmla_lane_0_180_f16_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcmla z0\.h, z2\.h, \1\.h\[0\], #180 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_lane_0_180_f16_tied3, svfloat16_t, -+ z0 = svcmla_lane_f16 (z1, z2, z0, 0, 180), -+ z0 = svcmla_lane (z1, z2, z0, 0, 180)) -+ -+/* -+** cmla_lane_0_180_f16_untied: -+** movprfx z0, z1 -+** fcmla z0\.h, z2\.h, z3\.h\[0\], #180 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_lane_0_180_f16_untied, svfloat16_t, -+ z0 = svcmla_lane_f16 (z1, z2, z3, 0, 180), -+ z0 = svcmla_lane (z1, z2, z3, 0, 180)) -+ -+/* -+** cmla_lane_0_270_f16_tied1: -+** fcmla z0\.h, z1\.h, z2\.h\[0\], #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_lane_0_270_f16_tied1, svfloat16_t, -+ z0 = svcmla_lane_f16 (z0, z1, z2, 0, 270), -+ z0 = svcmla_lane (z0, z1, z2, 0, 270)) -+ -+/* -+** cmla_lane_0_270_f16_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcmla z0\.h, \1\.h, z2\.h\[0\], #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_lane_0_270_f16_tied2, svfloat16_t, -+ z0 = svcmla_lane_f16 (z1, z0, z2, 0, 270), -+ z0 = svcmla_lane (z1, z0, z2, 0, 270)) -+ -+/* -+** cmla_lane_0_270_f16_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcmla z0\.h, z2\.h, \1\.h\[0\], #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_lane_0_270_f16_tied3, svfloat16_t, -+ z0 = svcmla_lane_f16 (z1, z2, z0, 0, 270), -+ z0 = svcmla_lane (z1, z2, z0, 0, 270)) -+ -+/* -+** cmla_lane_0_270_f16_untied: -+** movprfx z0, z1 -+** fcmla z0\.h, z2\.h, z3\.h\[0\], #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_lane_0_270_f16_untied, svfloat16_t, -+ z0 = svcmla_lane_f16 (z1, z2, z3, 0, 270), -+ z0 = svcmla_lane (z1, z2, z3, 0, 270)) -+ -+/* -+** cmla_lane_1_f16: -+** fcmla z0\.h, z1\.h, z2\.h\[1\], #0 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_lane_1_f16, svfloat16_t, -+ z0 = svcmla_lane_f16 (z0, z1, z2, 1, 0), -+ z0 = svcmla_lane (z0, z1, z2, 1, 0)) -+ -+/* -+** cmla_lane_2_f16: -+** fcmla z0\.h, z1\.h, z2\.h\[2\], #0 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_lane_2_f16, svfloat16_t, -+ z0 = svcmla_lane_f16 (z0, z1, z2, 2, 0), -+ z0 = svcmla_lane (z0, z1, z2, 2, 0)) -+ -+/* -+** cmla_lane_3_f16: -+** fcmla z0\.h, z1\.h, z2\.h\[3\], #0 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_lane_3_f16, svfloat16_t, -+ z0 = svcmla_lane_f16 (z0, z1, z2, 3, 0), -+ z0 = svcmla_lane (z0, z1, z2, 3, 0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmla_lane_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmla_lane_f32.c -new file mode 100644 -index 000000000..85bff68fd ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmla_lane_f32.c -@@ -0,0 +1,176 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmla_lane_0_0_f32_tied1: -+** fcmla z0\.s, z1\.s, z2\.s\[0\], #0 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_lane_0_0_f32_tied1, svfloat32_t, -+ z0 = svcmla_lane_f32 (z0, z1, z2, 0, 0), -+ z0 = svcmla_lane (z0, z1, z2, 0, 0)) -+ -+/* -+** cmla_lane_0_0_f32_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcmla z0\.s, \1\.s, z2\.s\[0\], #0 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_lane_0_0_f32_tied2, svfloat32_t, -+ z0 = svcmla_lane_f32 (z1, z0, z2, 0, 0), -+ z0 = svcmla_lane (z1, z0, z2, 0, 0)) -+ -+/* -+** cmla_lane_0_0_f32_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcmla z0\.s, z2\.s, \1\.s\[0\], #0 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_lane_0_0_f32_tied3, svfloat32_t, -+ z0 = svcmla_lane_f32 (z1, z2, z0, 0, 0), -+ z0 = svcmla_lane (z1, z2, z0, 0, 0)) -+ -+/* -+** cmla_lane_0_0_f32_untied: -+** movprfx z0, z1 -+** fcmla z0\.s, z2\.s, z3\.s\[0\], #0 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_lane_0_0_f32_untied, svfloat32_t, -+ z0 = svcmla_lane_f32 (z1, z2, z3, 0, 0), -+ z0 = svcmla_lane (z1, z2, z3, 0, 0)) -+ -+/* -+** cmla_lane_0_90_f32_tied1: -+** fcmla z0\.s, z1\.s, z2\.s\[0\], #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_lane_0_90_f32_tied1, svfloat32_t, -+ z0 = svcmla_lane_f32 (z0, z1, z2, 0, 90), -+ z0 = svcmla_lane (z0, z1, z2, 0, 90)) -+ -+/* -+** cmla_lane_0_90_f32_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcmla z0\.s, \1\.s, z2\.s\[0\], #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_lane_0_90_f32_tied2, svfloat32_t, -+ z0 = svcmla_lane_f32 (z1, z0, z2, 0, 90), -+ z0 = svcmla_lane (z1, z0, z2, 0, 90)) -+ -+/* -+** cmla_lane_0_90_f32_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcmla z0\.s, z2\.s, \1\.s\[0\], #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_lane_0_90_f32_tied3, svfloat32_t, -+ z0 = svcmla_lane_f32 (z1, z2, z0, 0, 90), -+ z0 = svcmla_lane (z1, z2, z0, 0, 90)) -+ -+/* -+** cmla_lane_0_90_f32_untied: -+** movprfx z0, z1 -+** fcmla z0\.s, z2\.s, z3\.s\[0\], #90 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_lane_0_90_f32_untied, svfloat32_t, -+ z0 = svcmla_lane_f32 (z1, z2, z3, 0, 90), -+ z0 = svcmla_lane (z1, z2, z3, 0, 90)) -+ -+/* -+** cmla_lane_0_180_f32_tied1: -+** fcmla z0\.s, z1\.s, z2\.s\[0\], #180 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_lane_0_180_f32_tied1, svfloat32_t, -+ z0 = svcmla_lane_f32 (z0, z1, z2, 0, 180), -+ z0 = svcmla_lane (z0, z1, z2, 0, 180)) -+ -+/* -+** cmla_lane_0_180_f32_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcmla z0\.s, \1\.s, z2\.s\[0\], #180 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_lane_0_180_f32_tied2, svfloat32_t, -+ z0 = svcmla_lane_f32 (z1, z0, z2, 0, 180), -+ z0 = svcmla_lane (z1, z0, z2, 0, 180)) -+ -+/* -+** cmla_lane_0_180_f32_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcmla z0\.s, z2\.s, \1\.s\[0\], #180 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_lane_0_180_f32_tied3, svfloat32_t, -+ z0 = svcmla_lane_f32 (z1, z2, z0, 0, 180), -+ z0 = svcmla_lane (z1, z2, z0, 0, 180)) -+ -+/* -+** cmla_lane_0_180_f32_untied: -+** movprfx z0, z1 -+** fcmla z0\.s, z2\.s, z3\.s\[0\], #180 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_lane_0_180_f32_untied, svfloat32_t, -+ z0 = svcmla_lane_f32 (z1, z2, z3, 0, 180), -+ z0 = svcmla_lane (z1, z2, z3, 0, 180)) -+ -+/* -+** cmla_lane_0_270_f32_tied1: -+** fcmla z0\.s, z1\.s, z2\.s\[0\], #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_lane_0_270_f32_tied1, svfloat32_t, -+ z0 = svcmla_lane_f32 (z0, z1, z2, 0, 270), -+ z0 = svcmla_lane (z0, z1, z2, 0, 270)) -+ -+/* -+** cmla_lane_0_270_f32_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcmla z0\.s, \1\.s, z2\.s\[0\], #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_lane_0_270_f32_tied2, svfloat32_t, -+ z0 = svcmla_lane_f32 (z1, z0, z2, 0, 270), -+ z0 = svcmla_lane (z1, z0, z2, 0, 270)) -+ -+/* -+** cmla_lane_0_270_f32_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fcmla z0\.s, z2\.s, \1\.s\[0\], #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_lane_0_270_f32_tied3, svfloat32_t, -+ z0 = svcmla_lane_f32 (z1, z2, z0, 0, 270), -+ z0 = svcmla_lane (z1, z2, z0, 0, 270)) -+ -+/* -+** cmla_lane_0_270_f32_untied: -+** movprfx z0, z1 -+** fcmla z0\.s, z2\.s, z3\.s\[0\], #270 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_lane_0_270_f32_untied, svfloat32_t, -+ z0 = svcmla_lane_f32 (z1, z2, z3, 0, 270), -+ z0 = svcmla_lane (z1, z2, z3, 0, 270)) -+ -+/* -+** cmla_lane_1_f32: -+** fcmla z0\.s, z1\.s, z2\.s\[1\], #0 -+** ret -+*/ -+TEST_UNIFORM_Z (cmla_lane_1_f32, svfloat32_t, -+ z0 = svcmla_lane_f32 (z0, z1, z2, 1, 0), -+ z0 = svcmla_lane (z0, z1, z2, 1, 0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_f16.c -new file mode 100644 -index 000000000..7149ad300 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_f16.c -@@ -0,0 +1,50 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpeq_f16_tied: -+** fcmeq p0\.h, p0/z, (z0\.h, z1\.h|z1\.h, z0\.h) -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_f16_tied, svfloat16_t, -+ p0 = svcmpeq_f16 (p0, z0, z1), -+ p0 = svcmpeq (p0, z0, z1)) -+ -+/* -+** cmpeq_f16_untied: -+** fcmeq p0\.h, p1/z, (z0\.h, z1\.h|z1\.h, z0\.h) -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_f16_untied, svfloat16_t, -+ p0 = svcmpeq_f16 (p1, z0, z1), -+ p0 = svcmpeq (p1, z0, z1)) -+ -+/* -+** cmpeq_h4_f16: -+** mov (z[0-9]+\.h), h4 -+** fcmeq p0\.h, p1/z, (z0\.h, \1|\1, z0\.h) -+** ret -+*/ -+TEST_COMPARE_ZD (cmpeq_h4_f16, svfloat16_t, float16_t, -+ p0 = svcmpeq_n_f16 (p1, z0, d4), -+ p0 = svcmpeq (p1, z0, d4)) -+ -+/* -+** cmpeq_0_f16: -+** fcmeq p0\.h, p1/z, z0\.h, #0\.0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_0_f16, svfloat16_t, -+ p0 = svcmpeq_n_f16 (p1, z0, 0), -+ p0 = svcmpeq (p1, z0, 0)) -+ -+/* -+** cmpeq_1_f16: -+** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? -+** fcmeq p0\.h, p1/z, (z0\.h, \1|\1, z0\.h) -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_1_f16, svfloat16_t, -+ p0 = svcmpeq_n_f16 (p1, z0, 1), -+ p0 = svcmpeq (p1, z0, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_f32.c -new file mode 100644 -index 000000000..05910bc50 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_f32.c -@@ -0,0 +1,50 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpeq_f32_tied: -+** fcmeq p0\.s, p0/z, (z0\.s, z1\.s|z1\.s, z0\.s) -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_f32_tied, svfloat32_t, -+ p0 = svcmpeq_f32 (p0, z0, z1), -+ p0 = svcmpeq (p0, z0, z1)) -+ -+/* -+** cmpeq_f32_untied: -+** fcmeq p0\.s, p1/z, (z0\.s, z1\.s|z1\.s, z0\.s) -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_f32_untied, svfloat32_t, -+ p0 = svcmpeq_f32 (p1, z0, z1), -+ p0 = svcmpeq (p1, z0, z1)) -+ -+/* -+** cmpeq_s4_f32: -+** mov (z[0-9]+\.s), s4 -+** fcmeq p0\.s, p1/z, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_COMPARE_ZD (cmpeq_s4_f32, svfloat32_t, float32_t, -+ p0 = svcmpeq_n_f32 (p1, z0, d4), -+ p0 = svcmpeq (p1, z0, d4)) -+ -+/* -+** cmpeq_0_f32: -+** fcmeq p0\.s, p1/z, z0\.s, #0\.0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_0_f32, svfloat32_t, -+ p0 = svcmpeq_n_f32 (p1, z0, 0), -+ p0 = svcmpeq (p1, z0, 0)) -+ -+/* -+** cmpeq_1_f32: -+** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? -+** fcmeq p0\.s, p1/z, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_1_f32, svfloat32_t, -+ p0 = svcmpeq_n_f32 (p1, z0, 1), -+ p0 = svcmpeq (p1, z0, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_f64.c -new file mode 100644 -index 000000000..f94bdfe27 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_f64.c -@@ -0,0 +1,50 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpeq_f64_tied: -+** fcmeq p0\.d, p0/z, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_f64_tied, svfloat64_t, -+ p0 = svcmpeq_f64 (p0, z0, z1), -+ p0 = svcmpeq (p0, z0, z1)) -+ -+/* -+** cmpeq_f64_untied: -+** fcmeq p0\.d, p1/z, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_f64_untied, svfloat64_t, -+ p0 = svcmpeq_f64 (p1, z0, z1), -+ p0 = svcmpeq (p1, z0, z1)) -+ -+/* -+** cmpeq_d4_f64: -+** mov (z[0-9]+\.d), d4 -+** fcmeq p0\.d, p1/z, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_COMPARE_ZD (cmpeq_d4_f64, svfloat64_t, float64_t, -+ p0 = svcmpeq_n_f64 (p1, z0, d4), -+ p0 = svcmpeq (p1, z0, d4)) -+ -+/* -+** cmpeq_0_f64: -+** fcmeq p0\.d, p1/z, z0\.d, #0\.0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_0_f64, svfloat64_t, -+ p0 = svcmpeq_n_f64 (p1, z0, 0), -+ p0 = svcmpeq (p1, z0, 0)) -+ -+/* -+** cmpeq_1_f64: -+** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? -+** fcmeq p0\.d, p1/z, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_1_f64, svfloat64_t, -+ p0 = svcmpeq_n_f64 (p1, z0, 1), -+ p0 = svcmpeq (p1, z0, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_s16.c -new file mode 100644 -index 000000000..b0befcb77 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_s16.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpeq_s16_tied: -+** cmpeq p0\.h, p0/z, (z0\.h, z1\.h|z1\.h, z0\.h) -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_s16_tied, svint16_t, -+ p0 = svcmpeq_s16 (p0, z0, z1), -+ p0 = svcmpeq (p0, z0, z1)) -+ -+/* -+** cmpeq_s16_untied: -+** cmpeq p0\.h, p1/z, (z0\.h, z1\.h|z1\.h, z0\.h) -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_s16_untied, svint16_t, -+ p0 = svcmpeq_s16 (p1, z0, z1), -+ p0 = svcmpeq (p1, z0, z1)) -+ -+/* -+** cmpeq_w0_s16: -+** mov (z[0-9]+\.h), w0 -+** cmpeq p0\.h, p1/z, (z0\.h, \1|\1, z0\.h) -+** ret -+*/ -+TEST_COMPARE_ZX (cmpeq_w0_s16, svint16_t, int16_t, -+ p0 = svcmpeq_n_s16 (p1, z0, x0), -+ p0 = svcmpeq (p1, z0, x0)) -+ -+/* -+** cmpeq_0_s16: -+** cmpeq p0\.h, p1/z, z0\.h, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_0_s16, svint16_t, -+ p0 = svcmpeq_n_s16 (p1, z0, 0), -+ p0 = svcmpeq (p1, z0, 0)) -+ -+/* -+** cmpeq_1_s16: -+** cmpeq p0\.h, p1/z, z0\.h, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_1_s16, svint16_t, -+ p0 = svcmpeq_n_s16 (p1, z0, 1), -+ p0 = svcmpeq (p1, z0, 1)) -+ -+/* -+** cmpeq_15_s16: -+** cmpeq p0\.h, p1/z, z0\.h, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_15_s16, svint16_t, -+ p0 = svcmpeq_n_s16 (p1, z0, 15), -+ p0 = svcmpeq (p1, z0, 15)) -+ -+/* -+** cmpeq_16_s16: -+** mov (z[0-9]+\.h), #16 -+** cmpeq p0\.h, p1/z, (z0\.h, \1|\1, z0\.h) -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_16_s16, svint16_t, -+ p0 = svcmpeq_n_s16 (p1, z0, 16), -+ p0 = svcmpeq (p1, z0, 16)) -+ -+/* -+** cmpeq_m1_s16: -+** cmpeq p0\.h, p1/z, z0\.h, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_m1_s16, svint16_t, -+ p0 = svcmpeq_n_s16 (p1, z0, -1), -+ p0 = svcmpeq (p1, z0, -1)) -+ -+/* -+** cmpeq_m16_s16: -+** cmpeq p0\.h, p1/z, z0\.h, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_m16_s16, svint16_t, -+ p0 = svcmpeq_n_s16 (p1, z0, -16), -+ p0 = svcmpeq (p1, z0, -16)) -+ -+/* -+** cmpeq_m17_s16: -+** mov (z[0-9]+\.h), #-17 -+** cmpeq p0\.h, p1/z, (z0\.h, \1|\1, z0\.h) -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_m17_s16, svint16_t, -+ p0 = svcmpeq_n_s16 (p1, z0, -17), -+ p0 = svcmpeq (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_s32.c -new file mode 100644 -index 000000000..de48a2c38 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_s32.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpeq_s32_tied: -+** cmpeq p0\.s, p0/z, (z0\.s, z1\.s|z1\.s, z0\.s) -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_s32_tied, svint32_t, -+ p0 = svcmpeq_s32 (p0, z0, z1), -+ p0 = svcmpeq (p0, z0, z1)) -+ -+/* -+** cmpeq_s32_untied: -+** cmpeq p0\.s, p1/z, (z0\.s, z1\.s|z1\.s, z0\.s) -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_s32_untied, svint32_t, -+ p0 = svcmpeq_s32 (p1, z0, z1), -+ p0 = svcmpeq (p1, z0, z1)) -+ -+/* -+** cmpeq_w0_s32: -+** mov (z[0-9]+\.s), w0 -+** cmpeq p0\.s, p1/z, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_COMPARE_ZX (cmpeq_w0_s32, svint32_t, int32_t, -+ p0 = svcmpeq_n_s32 (p1, z0, x0), -+ p0 = svcmpeq (p1, z0, x0)) -+ -+/* -+** cmpeq_0_s32: -+** cmpeq p0\.s, p1/z, z0\.s, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_0_s32, svint32_t, -+ p0 = svcmpeq_n_s32 (p1, z0, 0), -+ p0 = svcmpeq (p1, z0, 0)) -+ -+/* -+** cmpeq_1_s32: -+** cmpeq p0\.s, p1/z, z0\.s, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_1_s32, svint32_t, -+ p0 = svcmpeq_n_s32 (p1, z0, 1), -+ p0 = svcmpeq (p1, z0, 1)) -+ -+/* -+** cmpeq_15_s32: -+** cmpeq p0\.s, p1/z, z0\.s, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_15_s32, svint32_t, -+ p0 = svcmpeq_n_s32 (p1, z0, 15), -+ p0 = svcmpeq (p1, z0, 15)) -+ -+/* -+** cmpeq_16_s32: -+** mov (z[0-9]+\.s), #16 -+** cmpeq p0\.s, p1/z, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_16_s32, svint32_t, -+ p0 = svcmpeq_n_s32 (p1, z0, 16), -+ p0 = svcmpeq (p1, z0, 16)) -+ -+/* -+** cmpeq_m1_s32: -+** cmpeq p0\.s, p1/z, z0\.s, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_m1_s32, svint32_t, -+ p0 = svcmpeq_n_s32 (p1, z0, -1), -+ p0 = svcmpeq (p1, z0, -1)) -+ -+/* -+** cmpeq_m16_s32: -+** cmpeq p0\.s, p1/z, z0\.s, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_m16_s32, svint32_t, -+ p0 = svcmpeq_n_s32 (p1, z0, -16), -+ p0 = svcmpeq (p1, z0, -16)) -+ -+/* -+** cmpeq_m17_s32: -+** mov (z[0-9]+\.s), #-17 -+** cmpeq p0\.s, p1/z, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_m17_s32, svint32_t, -+ p0 = svcmpeq_n_s32 (p1, z0, -17), -+ p0 = svcmpeq (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_s64.c -new file mode 100644 -index 000000000..ff976712a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_s64.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpeq_s64_tied: -+** cmpeq p0\.d, p0/z, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_s64_tied, svint64_t, -+ p0 = svcmpeq_s64 (p0, z0, z1), -+ p0 = svcmpeq (p0, z0, z1)) -+ -+/* -+** cmpeq_s64_untied: -+** cmpeq p0\.d, p1/z, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_s64_untied, svint64_t, -+ p0 = svcmpeq_s64 (p1, z0, z1), -+ p0 = svcmpeq (p1, z0, z1)) -+ -+/* -+** cmpeq_x0_s64: -+** mov (z[0-9]+\.d), x0 -+** cmpeq p0\.d, p1/z, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_COMPARE_ZX (cmpeq_x0_s64, svint64_t, int64_t, -+ p0 = svcmpeq_n_s64 (p1, z0, x0), -+ p0 = svcmpeq (p1, z0, x0)) -+ -+/* -+** cmpeq_0_s64: -+** cmpeq p0\.d, p1/z, z0\.d, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_0_s64, svint64_t, -+ p0 = svcmpeq_n_s64 (p1, z0, 0), -+ p0 = svcmpeq (p1, z0, 0)) -+ -+/* -+** cmpeq_1_s64: -+** cmpeq p0\.d, p1/z, z0\.d, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_1_s64, svint64_t, -+ p0 = svcmpeq_n_s64 (p1, z0, 1), -+ p0 = svcmpeq (p1, z0, 1)) -+ -+/* -+** cmpeq_15_s64: -+** cmpeq p0\.d, p1/z, z0\.d, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_15_s64, svint64_t, -+ p0 = svcmpeq_n_s64 (p1, z0, 15), -+ p0 = svcmpeq (p1, z0, 15)) -+ -+/* -+** cmpeq_16_s64: -+** mov (z[0-9]+\.d), #16 -+** cmpeq p0\.d, p1/z, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_16_s64, svint64_t, -+ p0 = svcmpeq_n_s64 (p1, z0, 16), -+ p0 = svcmpeq (p1, z0, 16)) -+ -+/* -+** cmpeq_m1_s64: -+** cmpeq p0\.d, p1/z, z0\.d, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_m1_s64, svint64_t, -+ p0 = svcmpeq_n_s64 (p1, z0, -1), -+ p0 = svcmpeq (p1, z0, -1)) -+ -+/* -+** cmpeq_m16_s64: -+** cmpeq p0\.d, p1/z, z0\.d, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_m16_s64, svint64_t, -+ p0 = svcmpeq_n_s64 (p1, z0, -16), -+ p0 = svcmpeq (p1, z0, -16)) -+ -+/* -+** cmpeq_m17_s64: -+** mov (z[0-9]+\.d), #-17 -+** cmpeq p0\.d, p1/z, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_m17_s64, svint64_t, -+ p0 = svcmpeq_n_s64 (p1, z0, -17), -+ p0 = svcmpeq (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_s8.c -new file mode 100644 -index 000000000..1325755a8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_s8.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpeq_s8_tied: -+** cmpeq p0\.b, p0/z, (z0\.b, z1\.b|z1\.b, z0\.b) -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_s8_tied, svint8_t, -+ p0 = svcmpeq_s8 (p0, z0, z1), -+ p0 = svcmpeq (p0, z0, z1)) -+ -+/* -+** cmpeq_s8_untied: -+** cmpeq p0\.b, p1/z, (z0\.b, z1\.b|z1\.b, z0\.b) -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_s8_untied, svint8_t, -+ p0 = svcmpeq_s8 (p1, z0, z1), -+ p0 = svcmpeq (p1, z0, z1)) -+ -+/* -+** cmpeq_w0_s8: -+** mov (z[0-9]+\.b), w0 -+** cmpeq p0\.b, p1/z, (z0\.b, \1|\1, z0\.b) -+** ret -+*/ -+TEST_COMPARE_ZX (cmpeq_w0_s8, svint8_t, int8_t, -+ p0 = svcmpeq_n_s8 (p1, z0, x0), -+ p0 = svcmpeq (p1, z0, x0)) -+ -+/* -+** cmpeq_0_s8: -+** cmpeq p0\.b, p1/z, z0\.b, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_0_s8, svint8_t, -+ p0 = svcmpeq_n_s8 (p1, z0, 0), -+ p0 = svcmpeq (p1, z0, 0)) -+ -+/* -+** cmpeq_1_s8: -+** cmpeq p0\.b, p1/z, z0\.b, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_1_s8, svint8_t, -+ p0 = svcmpeq_n_s8 (p1, z0, 1), -+ p0 = svcmpeq (p1, z0, 1)) -+ -+/* -+** cmpeq_15_s8: -+** cmpeq p0\.b, p1/z, z0\.b, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_15_s8, svint8_t, -+ p0 = svcmpeq_n_s8 (p1, z0, 15), -+ p0 = svcmpeq (p1, z0, 15)) -+ -+/* -+** cmpeq_16_s8: -+** mov (z[0-9]+\.b), #16 -+** cmpeq p0\.b, p1/z, (z0\.b, \1|\1, z0\.b) -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_16_s8, svint8_t, -+ p0 = svcmpeq_n_s8 (p1, z0, 16), -+ p0 = svcmpeq (p1, z0, 16)) -+ -+/* -+** cmpeq_m1_s8: -+** cmpeq p0\.b, p1/z, z0\.b, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_m1_s8, svint8_t, -+ p0 = svcmpeq_n_s8 (p1, z0, -1), -+ p0 = svcmpeq (p1, z0, -1)) -+ -+/* -+** cmpeq_m16_s8: -+** cmpeq p0\.b, p1/z, z0\.b, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_m16_s8, svint8_t, -+ p0 = svcmpeq_n_s8 (p1, z0, -16), -+ p0 = svcmpeq (p1, z0, -16)) -+ -+/* -+** cmpeq_m17_s8: -+** mov (z[0-9]+\.b), #-17 -+** cmpeq p0\.b, p1/z, (z0\.b, \1|\1, z0\.b) -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_m17_s8, svint8_t, -+ p0 = svcmpeq_n_s8 (p1, z0, -17), -+ p0 = svcmpeq (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_u16.c -new file mode 100644 -index 000000000..91004692c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_u16.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpeq_u16_tied: -+** cmpeq p0\.h, p0/z, (z0\.h, z1\.h|z1\.h, z0\.h) -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_u16_tied, svuint16_t, -+ p0 = svcmpeq_u16 (p0, z0, z1), -+ p0 = svcmpeq (p0, z0, z1)) -+ -+/* -+** cmpeq_u16_untied: -+** cmpeq p0\.h, p1/z, (z0\.h, z1\.h|z1\.h, z0\.h) -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_u16_untied, svuint16_t, -+ p0 = svcmpeq_u16 (p1, z0, z1), -+ p0 = svcmpeq (p1, z0, z1)) -+ -+/* -+** cmpeq_w0_u16: -+** mov (z[0-9]+\.h), w0 -+** cmpeq p0\.h, p1/z, (z0\.h, \1|\1, z0\.h) -+** ret -+*/ -+TEST_COMPARE_ZX (cmpeq_w0_u16, svuint16_t, uint16_t, -+ p0 = svcmpeq_n_u16 (p1, z0, x0), -+ p0 = svcmpeq (p1, z0, x0)) -+ -+/* -+** cmpeq_0_u16: -+** cmpeq p0\.h, p1/z, z0\.h, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_0_u16, svuint16_t, -+ p0 = svcmpeq_n_u16 (p1, z0, 0), -+ p0 = svcmpeq (p1, z0, 0)) -+ -+/* -+** cmpeq_1_u16: -+** cmpeq p0\.h, p1/z, z0\.h, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_1_u16, svuint16_t, -+ p0 = svcmpeq_n_u16 (p1, z0, 1), -+ p0 = svcmpeq (p1, z0, 1)) -+ -+/* -+** cmpeq_15_u16: -+** cmpeq p0\.h, p1/z, z0\.h, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_15_u16, svuint16_t, -+ p0 = svcmpeq_n_u16 (p1, z0, 15), -+ p0 = svcmpeq (p1, z0, 15)) -+ -+/* -+** cmpeq_16_u16: -+** mov (z[0-9]+\.h), #16 -+** cmpeq p0\.h, p1/z, (z0\.h, \1|\1, z0\.h) -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_16_u16, svuint16_t, -+ p0 = svcmpeq_n_u16 (p1, z0, 16), -+ p0 = svcmpeq (p1, z0, 16)) -+ -+/* -+** cmpeq_m1_u16: -+** cmpeq p0\.h, p1/z, z0\.h, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_m1_u16, svuint16_t, -+ p0 = svcmpeq_n_u16 (p1, z0, -1), -+ p0 = svcmpeq (p1, z0, -1)) -+ -+/* -+** cmpeq_m16_u16: -+** cmpeq p0\.h, p1/z, z0\.h, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_m16_u16, svuint16_t, -+ p0 = svcmpeq_n_u16 (p1, z0, -16), -+ p0 = svcmpeq (p1, z0, -16)) -+ -+/* -+** cmpeq_m17_u16: -+** mov (z[0-9]+\.h), #-17 -+** cmpeq p0\.h, p1/z, (z0\.h, \1|\1, z0\.h) -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_m17_u16, svuint16_t, -+ p0 = svcmpeq_n_u16 (p1, z0, -17), -+ p0 = svcmpeq (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_u32.c -new file mode 100644 -index 000000000..2cff56eb6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_u32.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpeq_u32_tied: -+** cmpeq p0\.s, p0/z, (z0\.s, z1\.s|z1\.s, z0\.s) -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_u32_tied, svuint32_t, -+ p0 = svcmpeq_u32 (p0, z0, z1), -+ p0 = svcmpeq (p0, z0, z1)) -+ -+/* -+** cmpeq_u32_untied: -+** cmpeq p0\.s, p1/z, (z0\.s, z1\.s|z1\.s, z0\.s) -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_u32_untied, svuint32_t, -+ p0 = svcmpeq_u32 (p1, z0, z1), -+ p0 = svcmpeq (p1, z0, z1)) -+ -+/* -+** cmpeq_w0_u32: -+** mov (z[0-9]+\.s), w0 -+** cmpeq p0\.s, p1/z, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_COMPARE_ZX (cmpeq_w0_u32, svuint32_t, uint32_t, -+ p0 = svcmpeq_n_u32 (p1, z0, x0), -+ p0 = svcmpeq (p1, z0, x0)) -+ -+/* -+** cmpeq_0_u32: -+** cmpeq p0\.s, p1/z, z0\.s, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_0_u32, svuint32_t, -+ p0 = svcmpeq_n_u32 (p1, z0, 0), -+ p0 = svcmpeq (p1, z0, 0)) -+ -+/* -+** cmpeq_1_u32: -+** cmpeq p0\.s, p1/z, z0\.s, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_1_u32, svuint32_t, -+ p0 = svcmpeq_n_u32 (p1, z0, 1), -+ p0 = svcmpeq (p1, z0, 1)) -+ -+/* -+** cmpeq_15_u32: -+** cmpeq p0\.s, p1/z, z0\.s, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_15_u32, svuint32_t, -+ p0 = svcmpeq_n_u32 (p1, z0, 15), -+ p0 = svcmpeq (p1, z0, 15)) -+ -+/* -+** cmpeq_16_u32: -+** mov (z[0-9]+\.s), #16 -+** cmpeq p0\.s, p1/z, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_16_u32, svuint32_t, -+ p0 = svcmpeq_n_u32 (p1, z0, 16), -+ p0 = svcmpeq (p1, z0, 16)) -+ -+/* -+** cmpeq_m1_u32: -+** cmpeq p0\.s, p1/z, z0\.s, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_m1_u32, svuint32_t, -+ p0 = svcmpeq_n_u32 (p1, z0, -1), -+ p0 = svcmpeq (p1, z0, -1)) -+ -+/* -+** cmpeq_m16_u32: -+** cmpeq p0\.s, p1/z, z0\.s, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_m16_u32, svuint32_t, -+ p0 = svcmpeq_n_u32 (p1, z0, -16), -+ p0 = svcmpeq (p1, z0, -16)) -+ -+/* -+** cmpeq_m17_u32: -+** mov (z[0-9]+\.s), #-17 -+** cmpeq p0\.s, p1/z, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_m17_u32, svuint32_t, -+ p0 = svcmpeq_n_u32 (p1, z0, -17), -+ p0 = svcmpeq (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_u64.c -new file mode 100644 -index 000000000..0f02c9988 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_u64.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpeq_u64_tied: -+** cmpeq p0\.d, p0/z, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_u64_tied, svuint64_t, -+ p0 = svcmpeq_u64 (p0, z0, z1), -+ p0 = svcmpeq (p0, z0, z1)) -+ -+/* -+** cmpeq_u64_untied: -+** cmpeq p0\.d, p1/z, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_u64_untied, svuint64_t, -+ p0 = svcmpeq_u64 (p1, z0, z1), -+ p0 = svcmpeq (p1, z0, z1)) -+ -+/* -+** cmpeq_x0_u64: -+** mov (z[0-9]+\.d), x0 -+** cmpeq p0\.d, p1/z, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_COMPARE_ZX (cmpeq_x0_u64, svuint64_t, uint64_t, -+ p0 = svcmpeq_n_u64 (p1, z0, x0), -+ p0 = svcmpeq (p1, z0, x0)) -+ -+/* -+** cmpeq_0_u64: -+** cmpeq p0\.d, p1/z, z0\.d, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_0_u64, svuint64_t, -+ p0 = svcmpeq_n_u64 (p1, z0, 0), -+ p0 = svcmpeq (p1, z0, 0)) -+ -+/* -+** cmpeq_1_u64: -+** cmpeq p0\.d, p1/z, z0\.d, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_1_u64, svuint64_t, -+ p0 = svcmpeq_n_u64 (p1, z0, 1), -+ p0 = svcmpeq (p1, z0, 1)) -+ -+/* -+** cmpeq_15_u64: -+** cmpeq p0\.d, p1/z, z0\.d, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_15_u64, svuint64_t, -+ p0 = svcmpeq_n_u64 (p1, z0, 15), -+ p0 = svcmpeq (p1, z0, 15)) -+ -+/* -+** cmpeq_16_u64: -+** mov (z[0-9]+\.d), #16 -+** cmpeq p0\.d, p1/z, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_16_u64, svuint64_t, -+ p0 = svcmpeq_n_u64 (p1, z0, 16), -+ p0 = svcmpeq (p1, z0, 16)) -+ -+/* -+** cmpeq_m1_u64: -+** cmpeq p0\.d, p1/z, z0\.d, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_m1_u64, svuint64_t, -+ p0 = svcmpeq_n_u64 (p1, z0, -1), -+ p0 = svcmpeq (p1, z0, -1)) -+ -+/* -+** cmpeq_m16_u64: -+** cmpeq p0\.d, p1/z, z0\.d, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_m16_u64, svuint64_t, -+ p0 = svcmpeq_n_u64 (p1, z0, -16), -+ p0 = svcmpeq (p1, z0, -16)) -+ -+/* -+** cmpeq_m17_u64: -+** mov (z[0-9]+\.d), #-17 -+** cmpeq p0\.d, p1/z, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_m17_u64, svuint64_t, -+ p0 = svcmpeq_n_u64 (p1, z0, -17), -+ p0 = svcmpeq (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_u8.c -new file mode 100644 -index 000000000..ccd9a61c6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_u8.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpeq_u8_tied: -+** cmpeq p0\.b, p0/z, (z0\.b, z1\.b|z1\.b, z0\.b) -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_u8_tied, svuint8_t, -+ p0 = svcmpeq_u8 (p0, z0, z1), -+ p0 = svcmpeq (p0, z0, z1)) -+ -+/* -+** cmpeq_u8_untied: -+** cmpeq p0\.b, p1/z, (z0\.b, z1\.b|z1\.b, z0\.b) -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_u8_untied, svuint8_t, -+ p0 = svcmpeq_u8 (p1, z0, z1), -+ p0 = svcmpeq (p1, z0, z1)) -+ -+/* -+** cmpeq_w0_u8: -+** mov (z[0-9]+\.b), w0 -+** cmpeq p0\.b, p1/z, (z0\.b, \1|\1, z0\.b) -+** ret -+*/ -+TEST_COMPARE_ZX (cmpeq_w0_u8, svuint8_t, uint8_t, -+ p0 = svcmpeq_n_u8 (p1, z0, x0), -+ p0 = svcmpeq (p1, z0, x0)) -+ -+/* -+** cmpeq_0_u8: -+** cmpeq p0\.b, p1/z, z0\.b, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_0_u8, svuint8_t, -+ p0 = svcmpeq_n_u8 (p1, z0, 0), -+ p0 = svcmpeq (p1, z0, 0)) -+ -+/* -+** cmpeq_1_u8: -+** cmpeq p0\.b, p1/z, z0\.b, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_1_u8, svuint8_t, -+ p0 = svcmpeq_n_u8 (p1, z0, 1), -+ p0 = svcmpeq (p1, z0, 1)) -+ -+/* -+** cmpeq_15_u8: -+** cmpeq p0\.b, p1/z, z0\.b, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_15_u8, svuint8_t, -+ p0 = svcmpeq_n_u8 (p1, z0, 15), -+ p0 = svcmpeq (p1, z0, 15)) -+ -+/* -+** cmpeq_16_u8: -+** mov (z[0-9]+\.b), #16 -+** cmpeq p0\.b, p1/z, (z0\.b, \1|\1, z0\.b) -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_16_u8, svuint8_t, -+ p0 = svcmpeq_n_u8 (p1, z0, 16), -+ p0 = svcmpeq (p1, z0, 16)) -+ -+/* -+** cmpeq_m1_u8: -+** cmpeq p0\.b, p1/z, z0\.b, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_m1_u8, svuint8_t, -+ p0 = svcmpeq_n_u8 (p1, z0, -1), -+ p0 = svcmpeq (p1, z0, -1)) -+ -+/* -+** cmpeq_m16_u8: -+** cmpeq p0\.b, p1/z, z0\.b, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_m16_u8, svuint8_t, -+ p0 = svcmpeq_n_u8 (p1, z0, -16), -+ p0 = svcmpeq (p1, z0, -16)) -+ -+/* -+** cmpeq_m17_u8: -+** mov (z[0-9]+\.b), #-17 -+** cmpeq p0\.b, p1/z, (z0\.b, \1|\1, z0\.b) -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_m17_u8, svuint8_t, -+ p0 = svcmpeq_n_u8 (p1, z0, -17), -+ p0 = svcmpeq (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_wide_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_wide_s16.c -new file mode 100644 -index 000000000..c9712b3b4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_wide_s16.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpeq_wide_s16_tied: -+** cmpeq p0\.h, p0/z, z0\.h, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmpeq_wide_s16_tied, svint16_t, svint64_t, -+ p0 = svcmpeq_wide_s16 (p0, z0, z1), -+ p0 = svcmpeq_wide (p0, z0, z1)) -+ -+/* -+** cmpeq_wide_s16_untied: -+** cmpeq p0\.h, p1/z, z0\.h, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmpeq_wide_s16_untied, svint16_t, svint64_t, -+ p0 = svcmpeq_wide_s16 (p1, z0, z1), -+ p0 = svcmpeq_wide (p1, z0, z1)) -+ -+/* -+** cmpeq_wide_x0_s16: -+** mov (z[0-9]+\.d), x0 -+** cmpeq p0\.h, p1/z, z0\.h, \1 -+** ret -+*/ -+TEST_COMPARE_ZX (cmpeq_wide_x0_s16, svint16_t, int64_t, -+ p0 = svcmpeq_wide_n_s16 (p1, z0, x0), -+ p0 = svcmpeq_wide (p1, z0, x0)) -+ -+/* -+** cmpeq_wide_0_s16: -+** cmpeq p0\.h, p1/z, z0\.h, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_wide_0_s16, svint16_t, -+ p0 = svcmpeq_wide_n_s16 (p1, z0, 0), -+ p0 = svcmpeq_wide (p1, z0, 0)) -+ -+/* -+** cmpeq_wide_1_s16: -+** cmpeq p0\.h, p1/z, z0\.h, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_wide_1_s16, svint16_t, -+ p0 = svcmpeq_wide_n_s16 (p1, z0, 1), -+ p0 = svcmpeq_wide (p1, z0, 1)) -+ -+/* -+** cmpeq_wide_15_s16: -+** cmpeq p0\.h, p1/z, z0\.h, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_wide_15_s16, svint16_t, -+ p0 = svcmpeq_wide_n_s16 (p1, z0, 15), -+ p0 = svcmpeq_wide (p1, z0, 15)) -+ -+/* -+** cmpeq_wide_16_s16: -+** mov (z[0-9]+\.d), #16 -+** cmpeq p0\.h, p1/z, z0\.h, \1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_wide_16_s16, svint16_t, -+ p0 = svcmpeq_wide_n_s16 (p1, z0, 16), -+ p0 = svcmpeq_wide (p1, z0, 16)) -+ -+/* -+** cmpeq_wide_m1_s16: -+** cmpeq p0\.h, p1/z, z0\.h, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_wide_m1_s16, svint16_t, -+ p0 = svcmpeq_wide_n_s16 (p1, z0, -1), -+ p0 = svcmpeq_wide (p1, z0, -1)) -+ -+/* -+** cmpeq_wide_m16_s16: -+** cmpeq p0\.h, p1/z, z0\.h, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_wide_m16_s16, svint16_t, -+ p0 = svcmpeq_wide_n_s16 (p1, z0, -16), -+ p0 = svcmpeq_wide (p1, z0, -16)) -+ -+/* -+** cmpeq_wide_m17_s16: -+** mov (z[0-9]+\.d), #-17 -+** cmpeq p0\.h, p1/z, z0\.h, \1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_wide_m17_s16, svint16_t, -+ p0 = svcmpeq_wide_n_s16 (p1, z0, -17), -+ p0 = svcmpeq_wide (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_wide_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_wide_s32.c -new file mode 100644 -index 000000000..22bd99f57 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_wide_s32.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpeq_wide_s32_tied: -+** cmpeq p0\.s, p0/z, z0\.s, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmpeq_wide_s32_tied, svint32_t, svint64_t, -+ p0 = svcmpeq_wide_s32 (p0, z0, z1), -+ p0 = svcmpeq_wide (p0, z0, z1)) -+ -+/* -+** cmpeq_wide_s32_untied: -+** cmpeq p0\.s, p1/z, z0\.s, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmpeq_wide_s32_untied, svint32_t, svint64_t, -+ p0 = svcmpeq_wide_s32 (p1, z0, z1), -+ p0 = svcmpeq_wide (p1, z0, z1)) -+ -+/* -+** cmpeq_wide_x0_s32: -+** mov (z[0-9]+\.d), x0 -+** cmpeq p0\.s, p1/z, z0\.s, \1 -+** ret -+*/ -+TEST_COMPARE_ZX (cmpeq_wide_x0_s32, svint32_t, int64_t, -+ p0 = svcmpeq_wide_n_s32 (p1, z0, x0), -+ p0 = svcmpeq_wide (p1, z0, x0)) -+ -+/* -+** cmpeq_wide_0_s32: -+** cmpeq p0\.s, p1/z, z0\.s, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_wide_0_s32, svint32_t, -+ p0 = svcmpeq_wide_n_s32 (p1, z0, 0), -+ p0 = svcmpeq_wide (p1, z0, 0)) -+ -+/* -+** cmpeq_wide_1_s32: -+** cmpeq p0\.s, p1/z, z0\.s, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_wide_1_s32, svint32_t, -+ p0 = svcmpeq_wide_n_s32 (p1, z0, 1), -+ p0 = svcmpeq_wide (p1, z0, 1)) -+ -+/* -+** cmpeq_wide_15_s32: -+** cmpeq p0\.s, p1/z, z0\.s, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_wide_15_s32, svint32_t, -+ p0 = svcmpeq_wide_n_s32 (p1, z0, 15), -+ p0 = svcmpeq_wide (p1, z0, 15)) -+ -+/* -+** cmpeq_wide_16_s32: -+** mov (z[0-9]+\.d), #16 -+** cmpeq p0\.s, p1/z, z0\.s, \1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_wide_16_s32, svint32_t, -+ p0 = svcmpeq_wide_n_s32 (p1, z0, 16), -+ p0 = svcmpeq_wide (p1, z0, 16)) -+ -+/* -+** cmpeq_wide_m1_s32: -+** cmpeq p0\.s, p1/z, z0\.s, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_wide_m1_s32, svint32_t, -+ p0 = svcmpeq_wide_n_s32 (p1, z0, -1), -+ p0 = svcmpeq_wide (p1, z0, -1)) -+ -+/* -+** cmpeq_wide_m16_s32: -+** cmpeq p0\.s, p1/z, z0\.s, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_wide_m16_s32, svint32_t, -+ p0 = svcmpeq_wide_n_s32 (p1, z0, -16), -+ p0 = svcmpeq_wide (p1, z0, -16)) -+ -+/* -+** cmpeq_wide_m17_s32: -+** mov (z[0-9]+\.d), #-17 -+** cmpeq p0\.s, p1/z, z0\.s, \1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_wide_m17_s32, svint32_t, -+ p0 = svcmpeq_wide_n_s32 (p1, z0, -17), -+ p0 = svcmpeq_wide (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_wide_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_wide_s8.c -new file mode 100644 -index 000000000..a9e9a0bf5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpeq_wide_s8.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpeq_wide_s8_tied: -+** cmpeq p0\.b, p0/z, z0\.b, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmpeq_wide_s8_tied, svint8_t, svint64_t, -+ p0 = svcmpeq_wide_s8 (p0, z0, z1), -+ p0 = svcmpeq_wide (p0, z0, z1)) -+ -+/* -+** cmpeq_wide_s8_untied: -+** cmpeq p0\.b, p1/z, z0\.b, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmpeq_wide_s8_untied, svint8_t, svint64_t, -+ p0 = svcmpeq_wide_s8 (p1, z0, z1), -+ p0 = svcmpeq_wide (p1, z0, z1)) -+ -+/* -+** cmpeq_wide_x0_s8: -+** mov (z[0-9]+\.d), x0 -+** cmpeq p0\.b, p1/z, z0\.b, \1 -+** ret -+*/ -+TEST_COMPARE_ZX (cmpeq_wide_x0_s8, svint8_t, int64_t, -+ p0 = svcmpeq_wide_n_s8 (p1, z0, x0), -+ p0 = svcmpeq_wide (p1, z0, x0)) -+ -+/* -+** cmpeq_wide_0_s8: -+** cmpeq p0\.b, p1/z, z0\.b, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_wide_0_s8, svint8_t, -+ p0 = svcmpeq_wide_n_s8 (p1, z0, 0), -+ p0 = svcmpeq_wide (p1, z0, 0)) -+ -+/* -+** cmpeq_wide_1_s8: -+** cmpeq p0\.b, p1/z, z0\.b, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_wide_1_s8, svint8_t, -+ p0 = svcmpeq_wide_n_s8 (p1, z0, 1), -+ p0 = svcmpeq_wide (p1, z0, 1)) -+ -+/* -+** cmpeq_wide_15_s8: -+** cmpeq p0\.b, p1/z, z0\.b, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_wide_15_s8, svint8_t, -+ p0 = svcmpeq_wide_n_s8 (p1, z0, 15), -+ p0 = svcmpeq_wide (p1, z0, 15)) -+ -+/* -+** cmpeq_wide_16_s8: -+** mov (z[0-9]+\.d), #16 -+** cmpeq p0\.b, p1/z, z0\.b, \1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_wide_16_s8, svint8_t, -+ p0 = svcmpeq_wide_n_s8 (p1, z0, 16), -+ p0 = svcmpeq_wide (p1, z0, 16)) -+ -+/* -+** cmpeq_wide_m1_s8: -+** cmpeq p0\.b, p1/z, z0\.b, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_wide_m1_s8, svint8_t, -+ p0 = svcmpeq_wide_n_s8 (p1, z0, -1), -+ p0 = svcmpeq_wide (p1, z0, -1)) -+ -+/* -+** cmpeq_wide_m16_s8: -+** cmpeq p0\.b, p1/z, z0\.b, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_wide_m16_s8, svint8_t, -+ p0 = svcmpeq_wide_n_s8 (p1, z0, -16), -+ p0 = svcmpeq_wide (p1, z0, -16)) -+ -+/* -+** cmpeq_wide_m17_s8: -+** mov (z[0-9]+\.d), #-17 -+** cmpeq p0\.b, p1/z, z0\.b, \1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpeq_wide_m17_s8, svint8_t, -+ p0 = svcmpeq_wide_n_s8 (p1, z0, -17), -+ p0 = svcmpeq_wide (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_f16.c -new file mode 100644 -index 000000000..a6db8c16a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_f16.c -@@ -0,0 +1,66 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpge_f16_tied: -+** ( -+** fcmge p0\.h, p0/z, z0\.h, z1\.h -+** | -+** fcmle p0\.h, p0/z, z1\.h, z0\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_f16_tied, svfloat16_t, -+ p0 = svcmpge_f16 (p0, z0, z1), -+ p0 = svcmpge (p0, z0, z1)) -+ -+/* -+** cmpge_f16_untied: -+** ( -+** fcmge p0\.h, p1/z, z0\.h, z1\.h -+** | -+** fcmle p0\.h, p1/z, z1\.h, z0\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_f16_untied, svfloat16_t, -+ p0 = svcmpge_f16 (p1, z0, z1), -+ p0 = svcmpge (p1, z0, z1)) -+ -+/* -+** cmpge_h4_f16: -+** mov (z[0-9]+\.h), h4 -+** ( -+** fcmge p0\.h, p1/z, z0\.h, \1 -+** | -+** fcmle p0\.h, p1/z, \1, z0\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_ZD (cmpge_h4_f16, svfloat16_t, float16_t, -+ p0 = svcmpge_n_f16 (p1, z0, d4), -+ p0 = svcmpge (p1, z0, d4)) -+ -+/* -+** cmpge_0_f16: -+** fcmge p0\.h, p1/z, z0\.h, #0\.0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_0_f16, svfloat16_t, -+ p0 = svcmpge_n_f16 (p1, z0, 0), -+ p0 = svcmpge (p1, z0, 0)) -+ -+/* -+** cmpge_1_f16: -+** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? -+** ( -+** fcmge p0\.h, p1/z, z0\.h, \1 -+** | -+** fcmle p0\.h, p1/z, \1, z0\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_1_f16, svfloat16_t, -+ p0 = svcmpge_n_f16 (p1, z0, 1), -+ p0 = svcmpge (p1, z0, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_f32.c -new file mode 100644 -index 000000000..ee2976e58 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_f32.c -@@ -0,0 +1,66 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpge_f32_tied: -+** ( -+** fcmge p0\.s, p0/z, z0\.s, z1\.s -+** | -+** fcmle p0\.s, p0/z, z1\.s, z0\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_f32_tied, svfloat32_t, -+ p0 = svcmpge_f32 (p0, z0, z1), -+ p0 = svcmpge (p0, z0, z1)) -+ -+/* -+** cmpge_f32_untied: -+** ( -+** fcmge p0\.s, p1/z, z0\.s, z1\.s -+** | -+** fcmle p0\.s, p1/z, z1\.s, z0\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_f32_untied, svfloat32_t, -+ p0 = svcmpge_f32 (p1, z0, z1), -+ p0 = svcmpge (p1, z0, z1)) -+ -+/* -+** cmpge_s4_f32: -+** mov (z[0-9]+\.s), s4 -+** ( -+** fcmge p0\.s, p1/z, z0\.s, \1 -+** | -+** fcmle p0\.s, p1/z, \1, z0\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_ZD (cmpge_s4_f32, svfloat32_t, float32_t, -+ p0 = svcmpge_n_f32 (p1, z0, d4), -+ p0 = svcmpge (p1, z0, d4)) -+ -+/* -+** cmpge_0_f32: -+** fcmge p0\.s, p1/z, z0\.s, #0\.0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_0_f32, svfloat32_t, -+ p0 = svcmpge_n_f32 (p1, z0, 0), -+ p0 = svcmpge (p1, z0, 0)) -+ -+/* -+** cmpge_1_f32: -+** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? -+** ( -+** fcmge p0\.s, p1/z, z0\.s, \1 -+** | -+** fcmle p0\.s, p1/z, \1, z0\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_1_f32, svfloat32_t, -+ p0 = svcmpge_n_f32 (p1, z0, 1), -+ p0 = svcmpge (p1, z0, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_f64.c -new file mode 100644 -index 000000000..ceea0afe3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_f64.c -@@ -0,0 +1,66 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpge_f64_tied: -+** ( -+** fcmge p0\.d, p0/z, z0\.d, z1\.d -+** | -+** fcmle p0\.d, p0/z, z1\.d, z0\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_f64_tied, svfloat64_t, -+ p0 = svcmpge_f64 (p0, z0, z1), -+ p0 = svcmpge (p0, z0, z1)) -+ -+/* -+** cmpge_f64_untied: -+** ( -+** fcmge p0\.d, p1/z, z0\.d, z1\.d -+** | -+** fcmle p0\.d, p1/z, z1\.d, z0\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_f64_untied, svfloat64_t, -+ p0 = svcmpge_f64 (p1, z0, z1), -+ p0 = svcmpge (p1, z0, z1)) -+ -+/* -+** cmpge_d4_f64: -+** mov (z[0-9]+\.d), d4 -+** ( -+** fcmge p0\.d, p1/z, z0\.d, \1 -+** | -+** fcmle p0\.d, p1/z, \1, z0\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_ZD (cmpge_d4_f64, svfloat64_t, float64_t, -+ p0 = svcmpge_n_f64 (p1, z0, d4), -+ p0 = svcmpge (p1, z0, d4)) -+ -+/* -+** cmpge_0_f64: -+** fcmge p0\.d, p1/z, z0\.d, #0\.0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_0_f64, svfloat64_t, -+ p0 = svcmpge_n_f64 (p1, z0, 0), -+ p0 = svcmpge (p1, z0, 0)) -+ -+/* -+** cmpge_1_f64: -+** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? -+** ( -+** fcmge p0\.d, p1/z, z0\.d, \1 -+** | -+** fcmle p0\.d, p1/z, \1, z0\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_1_f64, svfloat64_t, -+ p0 = svcmpge_n_f64 (p1, z0, 1), -+ p0 = svcmpge (p1, z0, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_s16.c -new file mode 100644 -index 000000000..de9180b84 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_s16.c -@@ -0,0 +1,116 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpge_s16_tied: -+** ( -+** cmpge p0\.h, p0/z, z0\.h, z1\.h -+** | -+** cmple p0\.h, p0/z, z1\.h, z0\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_s16_tied, svint16_t, -+ p0 = svcmpge_s16 (p0, z0, z1), -+ p0 = svcmpge (p0, z0, z1)) -+ -+/* -+** cmpge_s16_untied: -+** ( -+** cmpge p0\.h, p1/z, z0\.h, z1\.h -+** | -+** cmple p0\.h, p1/z, z1\.h, z0\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_s16_untied, svint16_t, -+ p0 = svcmpge_s16 (p1, z0, z1), -+ p0 = svcmpge (p1, z0, z1)) -+ -+/* -+** cmpge_w0_s16: -+** mov (z[0-9]+\.h), w0 -+** ( -+** cmpge p0\.h, p1/z, z0\.h, \1 -+** | -+** cmple p0\.h, p1/z, \1, z0\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_ZX (cmpge_w0_s16, svint16_t, int16_t, -+ p0 = svcmpge_n_s16 (p1, z0, x0), -+ p0 = svcmpge (p1, z0, x0)) -+ -+/* -+** cmpge_0_s16: -+** cmpge p0\.h, p1/z, z0\.h, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_0_s16, svint16_t, -+ p0 = svcmpge_n_s16 (p1, z0, 0), -+ p0 = svcmpge (p1, z0, 0)) -+ -+/* -+** cmpge_1_s16: -+** cmpge p0\.h, p1/z, z0\.h, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_1_s16, svint16_t, -+ p0 = svcmpge_n_s16 (p1, z0, 1), -+ p0 = svcmpge (p1, z0, 1)) -+ -+/* -+** cmpge_15_s16: -+** cmpge p0\.h, p1/z, z0\.h, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_15_s16, svint16_t, -+ p0 = svcmpge_n_s16 (p1, z0, 15), -+ p0 = svcmpge (p1, z0, 15)) -+ -+/* -+** cmpge_16_s16: -+** mov (z[0-9]+\.h), #16 -+** ( -+** cmpge p0\.h, p1/z, z0\.h, \1 -+** | -+** cmple p0\.h, p1/z, \1, z0\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_16_s16, svint16_t, -+ p0 = svcmpge_n_s16 (p1, z0, 16), -+ p0 = svcmpge (p1, z0, 16)) -+ -+/* -+** cmpge_m1_s16: -+** cmpge p0\.h, p1/z, z0\.h, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_m1_s16, svint16_t, -+ p0 = svcmpge_n_s16 (p1, z0, -1), -+ p0 = svcmpge (p1, z0, -1)) -+ -+/* -+** cmpge_m16_s16: -+** cmpge p0\.h, p1/z, z0\.h, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_m16_s16, svint16_t, -+ p0 = svcmpge_n_s16 (p1, z0, -16), -+ p0 = svcmpge (p1, z0, -16)) -+ -+/* -+** cmpge_m17_s16: -+** mov (z[0-9]+\.h), #-17 -+** ( -+** cmpge p0\.h, p1/z, z0\.h, \1 -+** | -+** cmple p0\.h, p1/z, \1, z0\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_m17_s16, svint16_t, -+ p0 = svcmpge_n_s16 (p1, z0, -17), -+ p0 = svcmpge (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_s32.c -new file mode 100644 -index 000000000..67286b1fe ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_s32.c -@@ -0,0 +1,116 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpge_s32_tied: -+** ( -+** cmpge p0\.s, p0/z, z0\.s, z1\.s -+** | -+** cmple p0\.s, p0/z, z1\.s, z0\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_s32_tied, svint32_t, -+ p0 = svcmpge_s32 (p0, z0, z1), -+ p0 = svcmpge (p0, z0, z1)) -+ -+/* -+** cmpge_s32_untied: -+** ( -+** cmpge p0\.s, p1/z, z0\.s, z1\.s -+** | -+** cmple p0\.s, p1/z, z1\.s, z0\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_s32_untied, svint32_t, -+ p0 = svcmpge_s32 (p1, z0, z1), -+ p0 = svcmpge (p1, z0, z1)) -+ -+/* -+** cmpge_w0_s32: -+** mov (z[0-9]+\.s), w0 -+** ( -+** cmpge p0\.s, p1/z, z0\.s, \1 -+** | -+** cmple p0\.s, p1/z, \1, z0\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_ZX (cmpge_w0_s32, svint32_t, int32_t, -+ p0 = svcmpge_n_s32 (p1, z0, x0), -+ p0 = svcmpge (p1, z0, x0)) -+ -+/* -+** cmpge_0_s32: -+** cmpge p0\.s, p1/z, z0\.s, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_0_s32, svint32_t, -+ p0 = svcmpge_n_s32 (p1, z0, 0), -+ p0 = svcmpge (p1, z0, 0)) -+ -+/* -+** cmpge_1_s32: -+** cmpge p0\.s, p1/z, z0\.s, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_1_s32, svint32_t, -+ p0 = svcmpge_n_s32 (p1, z0, 1), -+ p0 = svcmpge (p1, z0, 1)) -+ -+/* -+** cmpge_15_s32: -+** cmpge p0\.s, p1/z, z0\.s, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_15_s32, svint32_t, -+ p0 = svcmpge_n_s32 (p1, z0, 15), -+ p0 = svcmpge (p1, z0, 15)) -+ -+/* -+** cmpge_16_s32: -+** mov (z[0-9]+\.s), #16 -+** ( -+** cmpge p0\.s, p1/z, z0\.s, \1 -+** | -+** cmple p0\.s, p1/z, \1, z0\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_16_s32, svint32_t, -+ p0 = svcmpge_n_s32 (p1, z0, 16), -+ p0 = svcmpge (p1, z0, 16)) -+ -+/* -+** cmpge_m1_s32: -+** cmpge p0\.s, p1/z, z0\.s, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_m1_s32, svint32_t, -+ p0 = svcmpge_n_s32 (p1, z0, -1), -+ p0 = svcmpge (p1, z0, -1)) -+ -+/* -+** cmpge_m16_s32: -+** cmpge p0\.s, p1/z, z0\.s, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_m16_s32, svint32_t, -+ p0 = svcmpge_n_s32 (p1, z0, -16), -+ p0 = svcmpge (p1, z0, -16)) -+ -+/* -+** cmpge_m17_s32: -+** mov (z[0-9]+\.s), #-17 -+** ( -+** cmpge p0\.s, p1/z, z0\.s, \1 -+** | -+** cmple p0\.s, p1/z, \1, z0\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_m17_s32, svint32_t, -+ p0 = svcmpge_n_s32 (p1, z0, -17), -+ p0 = svcmpge (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_s64.c -new file mode 100644 -index 000000000..02e3ac07a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_s64.c -@@ -0,0 +1,116 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpge_s64_tied: -+** ( -+** cmpge p0\.d, p0/z, z0\.d, z1\.d -+** | -+** cmple p0\.d, p0/z, z1\.d, z0\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_s64_tied, svint64_t, -+ p0 = svcmpge_s64 (p0, z0, z1), -+ p0 = svcmpge (p0, z0, z1)) -+ -+/* -+** cmpge_s64_untied: -+** ( -+** cmpge p0\.d, p1/z, z0\.d, z1\.d -+** | -+** cmple p0\.d, p1/z, z1\.d, z0\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_s64_untied, svint64_t, -+ p0 = svcmpge_s64 (p1, z0, z1), -+ p0 = svcmpge (p1, z0, z1)) -+ -+/* -+** cmpge_x0_s64: -+** mov (z[0-9]+\.d), x0 -+** ( -+** cmpge p0\.d, p1/z, z0\.d, \1 -+** | -+** cmple p0\.d, p1/z, \1, z0\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_ZX (cmpge_x0_s64, svint64_t, int64_t, -+ p0 = svcmpge_n_s64 (p1, z0, x0), -+ p0 = svcmpge (p1, z0, x0)) -+ -+/* -+** cmpge_0_s64: -+** cmpge p0\.d, p1/z, z0\.d, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_0_s64, svint64_t, -+ p0 = svcmpge_n_s64 (p1, z0, 0), -+ p0 = svcmpge (p1, z0, 0)) -+ -+/* -+** cmpge_1_s64: -+** cmpge p0\.d, p1/z, z0\.d, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_1_s64, svint64_t, -+ p0 = svcmpge_n_s64 (p1, z0, 1), -+ p0 = svcmpge (p1, z0, 1)) -+ -+/* -+** cmpge_15_s64: -+** cmpge p0\.d, p1/z, z0\.d, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_15_s64, svint64_t, -+ p0 = svcmpge_n_s64 (p1, z0, 15), -+ p0 = svcmpge (p1, z0, 15)) -+ -+/* -+** cmpge_16_s64: -+** mov (z[0-9]+\.d), #16 -+** ( -+** cmpge p0\.d, p1/z, z0\.d, \1 -+** | -+** cmple p0\.d, p1/z, \1, z0\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_16_s64, svint64_t, -+ p0 = svcmpge_n_s64 (p1, z0, 16), -+ p0 = svcmpge (p1, z0, 16)) -+ -+/* -+** cmpge_m1_s64: -+** cmpge p0\.d, p1/z, z0\.d, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_m1_s64, svint64_t, -+ p0 = svcmpge_n_s64 (p1, z0, -1), -+ p0 = svcmpge (p1, z0, -1)) -+ -+/* -+** cmpge_m16_s64: -+** cmpge p0\.d, p1/z, z0\.d, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_m16_s64, svint64_t, -+ p0 = svcmpge_n_s64 (p1, z0, -16), -+ p0 = svcmpge (p1, z0, -16)) -+ -+/* -+** cmpge_m17_s64: -+** mov (z[0-9]+\.d), #-17 -+** ( -+** cmpge p0\.d, p1/z, z0\.d, \1 -+** | -+** cmple p0\.d, p1/z, \1, z0\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_m17_s64, svint64_t, -+ p0 = svcmpge_n_s64 (p1, z0, -17), -+ p0 = svcmpge (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_s8.c -new file mode 100644 -index 000000000..45c9c5f10 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_s8.c -@@ -0,0 +1,116 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpge_s8_tied: -+** ( -+** cmpge p0\.b, p0/z, z0\.b, z1\.b -+** | -+** cmple p0\.b, p0/z, z1\.b, z0\.b -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_s8_tied, svint8_t, -+ p0 = svcmpge_s8 (p0, z0, z1), -+ p0 = svcmpge (p0, z0, z1)) -+ -+/* -+** cmpge_s8_untied: -+** ( -+** cmpge p0\.b, p1/z, z0\.b, z1\.b -+** | -+** cmple p0\.b, p1/z, z1\.b, z0\.b -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_s8_untied, svint8_t, -+ p0 = svcmpge_s8 (p1, z0, z1), -+ p0 = svcmpge (p1, z0, z1)) -+ -+/* -+** cmpge_w0_s8: -+** mov (z[0-9]+\.b), w0 -+** ( -+** cmpge p0\.b, p1/z, z0\.b, \1 -+** | -+** cmple p0\.b, p1/z, \1, z0\.b -+** ) -+** ret -+*/ -+TEST_COMPARE_ZX (cmpge_w0_s8, svint8_t, int8_t, -+ p0 = svcmpge_n_s8 (p1, z0, x0), -+ p0 = svcmpge (p1, z0, x0)) -+ -+/* -+** cmpge_0_s8: -+** cmpge p0\.b, p1/z, z0\.b, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_0_s8, svint8_t, -+ p0 = svcmpge_n_s8 (p1, z0, 0), -+ p0 = svcmpge (p1, z0, 0)) -+ -+/* -+** cmpge_1_s8: -+** cmpge p0\.b, p1/z, z0\.b, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_1_s8, svint8_t, -+ p0 = svcmpge_n_s8 (p1, z0, 1), -+ p0 = svcmpge (p1, z0, 1)) -+ -+/* -+** cmpge_15_s8: -+** cmpge p0\.b, p1/z, z0\.b, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_15_s8, svint8_t, -+ p0 = svcmpge_n_s8 (p1, z0, 15), -+ p0 = svcmpge (p1, z0, 15)) -+ -+/* -+** cmpge_16_s8: -+** mov (z[0-9]+\.b), #16 -+** ( -+** cmpge p0\.b, p1/z, z0\.b, \1 -+** | -+** cmple p0\.b, p1/z, \1, z0\.b -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_16_s8, svint8_t, -+ p0 = svcmpge_n_s8 (p1, z0, 16), -+ p0 = svcmpge (p1, z0, 16)) -+ -+/* -+** cmpge_m1_s8: -+** cmpge p0\.b, p1/z, z0\.b, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_m1_s8, svint8_t, -+ p0 = svcmpge_n_s8 (p1, z0, -1), -+ p0 = svcmpge (p1, z0, -1)) -+ -+/* -+** cmpge_m16_s8: -+** cmpge p0\.b, p1/z, z0\.b, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_m16_s8, svint8_t, -+ p0 = svcmpge_n_s8 (p1, z0, -16), -+ p0 = svcmpge (p1, z0, -16)) -+ -+/* -+** cmpge_m17_s8: -+** mov (z[0-9]+\.b), #-17 -+** ( -+** cmpge p0\.b, p1/z, z0\.b, \1 -+** | -+** cmple p0\.b, p1/z, \1, z0\.b -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_m17_s8, svint8_t, -+ p0 = svcmpge_n_s8 (p1, z0, -17), -+ p0 = svcmpge (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_u16.c -new file mode 100644 -index 000000000..7c7d2b307 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_u16.c -@@ -0,0 +1,116 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpge_u16_tied: -+** ( -+** cmphs p0\.h, p0/z, z0\.h, z1\.h -+** | -+** cmpls p0\.h, p0/z, z1\.h, z0\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_u16_tied, svuint16_t, -+ p0 = svcmpge_u16 (p0, z0, z1), -+ p0 = svcmpge (p0, z0, z1)) -+ -+/* -+** cmpge_u16_untied: -+** ( -+** cmphs p0\.h, p1/z, z0\.h, z1\.h -+** | -+** cmpls p0\.h, p1/z, z1\.h, z0\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_u16_untied, svuint16_t, -+ p0 = svcmpge_u16 (p1, z0, z1), -+ p0 = svcmpge (p1, z0, z1)) -+ -+/* -+** cmpge_w0_u16: -+** mov (z[0-9]+\.h), w0 -+** ( -+** cmphs p0\.h, p1/z, z0\.h, \1 -+** | -+** cmpls p0\.h, p1/z, \1, z0\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_ZX (cmpge_w0_u16, svuint16_t, uint16_t, -+ p0 = svcmpge_n_u16 (p1, z0, x0), -+ p0 = svcmpge (p1, z0, x0)) -+ -+/* -+** cmpge_0_u16: -+** cmphs p0\.h, p1/z, z0\.h, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_0_u16, svuint16_t, -+ p0 = svcmpge_n_u16 (p1, z0, 0), -+ p0 = svcmpge (p1, z0, 0)) -+ -+/* -+** cmpge_1_u16: -+** cmphs p0\.h, p1/z, z0\.h, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_1_u16, svuint16_t, -+ p0 = svcmpge_n_u16 (p1, z0, 1), -+ p0 = svcmpge (p1, z0, 1)) -+ -+/* -+** cmpge_15_u16: -+** cmphs p0\.h, p1/z, z0\.h, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_15_u16, svuint16_t, -+ p0 = svcmpge_n_u16 (p1, z0, 15), -+ p0 = svcmpge (p1, z0, 15)) -+ -+/* -+** cmpge_16_u16: -+** cmphs p0\.h, p1/z, z0\.h, #16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_16_u16, svuint16_t, -+ p0 = svcmpge_n_u16 (p1, z0, 16), -+ p0 = svcmpge (p1, z0, 16)) -+ -+/* -+** cmpge_127_u16: -+** cmphs p0\.h, p1/z, z0\.h, #127 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_127_u16, svuint16_t, -+ p0 = svcmpge_n_u16 (p1, z0, 127), -+ p0 = svcmpge (p1, z0, 127)) -+ -+/* -+** cmpge_128_u16: -+** mov (z[0-9]+\.h), #128 -+** ( -+** cmphs p0\.h, p1/z, z0\.h, \1 -+** | -+** cmpls p0\.h, p1/z, \1, z0\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_128_u16, svuint16_t, -+ p0 = svcmpge_n_u16 (p1, z0, 128), -+ p0 = svcmpge (p1, z0, 128)) -+ -+/* -+** cmpge_m1_u16: -+** mov (z[0-9]+)\.b, #-1 -+** ( -+** cmphs p0\.h, p1/z, z0\.h, \1\.h -+** | -+** cmpls p0\.h, p1/z, \1\.h, z0\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_m1_u16, svuint16_t, -+ p0 = svcmpge_n_u16 (p1, z0, -1), -+ p0 = svcmpge (p1, z0, -1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_u32.c -new file mode 100644 -index 000000000..a2021ef50 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_u32.c -@@ -0,0 +1,116 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpge_u32_tied: -+** ( -+** cmphs p0\.s, p0/z, z0\.s, z1\.s -+** | -+** cmpls p0\.s, p0/z, z1\.s, z0\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_u32_tied, svuint32_t, -+ p0 = svcmpge_u32 (p0, z0, z1), -+ p0 = svcmpge (p0, z0, z1)) -+ -+/* -+** cmpge_u32_untied: -+** ( -+** cmphs p0\.s, p1/z, z0\.s, z1\.s -+** | -+** cmpls p0\.s, p1/z, z1\.s, z0\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_u32_untied, svuint32_t, -+ p0 = svcmpge_u32 (p1, z0, z1), -+ p0 = svcmpge (p1, z0, z1)) -+ -+/* -+** cmpge_w0_u32: -+** mov (z[0-9]+\.s), w0 -+** ( -+** cmphs p0\.s, p1/z, z0\.s, \1 -+** | -+** cmpls p0\.s, p1/z, \1, z0\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_ZX (cmpge_w0_u32, svuint32_t, uint32_t, -+ p0 = svcmpge_n_u32 (p1, z0, x0), -+ p0 = svcmpge (p1, z0, x0)) -+ -+/* -+** cmpge_0_u32: -+** cmphs p0\.s, p1/z, z0\.s, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_0_u32, svuint32_t, -+ p0 = svcmpge_n_u32 (p1, z0, 0), -+ p0 = svcmpge (p1, z0, 0)) -+ -+/* -+** cmpge_1_u32: -+** cmphs p0\.s, p1/z, z0\.s, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_1_u32, svuint32_t, -+ p0 = svcmpge_n_u32 (p1, z0, 1), -+ p0 = svcmpge (p1, z0, 1)) -+ -+/* -+** cmpge_15_u32: -+** cmphs p0\.s, p1/z, z0\.s, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_15_u32, svuint32_t, -+ p0 = svcmpge_n_u32 (p1, z0, 15), -+ p0 = svcmpge (p1, z0, 15)) -+ -+/* -+** cmpge_16_u32: -+** cmphs p0\.s, p1/z, z0\.s, #16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_16_u32, svuint32_t, -+ p0 = svcmpge_n_u32 (p1, z0, 16), -+ p0 = svcmpge (p1, z0, 16)) -+ -+/* -+** cmpge_127_u32: -+** cmphs p0\.s, p1/z, z0\.s, #127 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_127_u32, svuint32_t, -+ p0 = svcmpge_n_u32 (p1, z0, 127), -+ p0 = svcmpge (p1, z0, 127)) -+ -+/* -+** cmpge_128_u32: -+** mov (z[0-9]+\.s), #128 -+** ( -+** cmphs p0\.s, p1/z, z0\.s, \1 -+** | -+** cmpls p0\.s, p1/z, \1, z0\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_128_u32, svuint32_t, -+ p0 = svcmpge_n_u32 (p1, z0, 128), -+ p0 = svcmpge (p1, z0, 128)) -+ -+/* -+** cmpge_m1_u32: -+** mov (z[0-9]+)\.b, #-1 -+** ( -+** cmphs p0\.s, p1/z, z0\.s, \1\.s -+** | -+** cmpls p0\.s, p1/z, \1\.s, z0\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_m1_u32, svuint32_t, -+ p0 = svcmpge_n_u32 (p1, z0, -1), -+ p0 = svcmpge (p1, z0, -1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_u64.c -new file mode 100644 -index 000000000..0f9159590 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_u64.c -@@ -0,0 +1,116 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpge_u64_tied: -+** ( -+** cmphs p0\.d, p0/z, z0\.d, z1\.d -+** | -+** cmpls p0\.d, p0/z, z1\.d, z0\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_u64_tied, svuint64_t, -+ p0 = svcmpge_u64 (p0, z0, z1), -+ p0 = svcmpge (p0, z0, z1)) -+ -+/* -+** cmpge_u64_untied: -+** ( -+** cmphs p0\.d, p1/z, z0\.d, z1\.d -+** | -+** cmpls p0\.d, p1/z, z1\.d, z0\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_u64_untied, svuint64_t, -+ p0 = svcmpge_u64 (p1, z0, z1), -+ p0 = svcmpge (p1, z0, z1)) -+ -+/* -+** cmpge_x0_u64: -+** mov (z[0-9]+\.d), x0 -+** ( -+** cmphs p0\.d, p1/z, z0\.d, \1 -+** | -+** cmpls p0\.d, p1/z, \1, z0\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_ZX (cmpge_x0_u64, svuint64_t, uint64_t, -+ p0 = svcmpge_n_u64 (p1, z0, x0), -+ p0 = svcmpge (p1, z0, x0)) -+ -+/* -+** cmpge_0_u64: -+** cmphs p0\.d, p1/z, z0\.d, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_0_u64, svuint64_t, -+ p0 = svcmpge_n_u64 (p1, z0, 0), -+ p0 = svcmpge (p1, z0, 0)) -+ -+/* -+** cmpge_1_u64: -+** cmphs p0\.d, p1/z, z0\.d, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_1_u64, svuint64_t, -+ p0 = svcmpge_n_u64 (p1, z0, 1), -+ p0 = svcmpge (p1, z0, 1)) -+ -+/* -+** cmpge_15_u64: -+** cmphs p0\.d, p1/z, z0\.d, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_15_u64, svuint64_t, -+ p0 = svcmpge_n_u64 (p1, z0, 15), -+ p0 = svcmpge (p1, z0, 15)) -+ -+/* -+** cmpge_16_u64: -+** cmphs p0\.d, p1/z, z0\.d, #16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_16_u64, svuint64_t, -+ p0 = svcmpge_n_u64 (p1, z0, 16), -+ p0 = svcmpge (p1, z0, 16)) -+ -+/* -+** cmpge_127_u64: -+** cmphs p0\.d, p1/z, z0\.d, #127 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_127_u64, svuint64_t, -+ p0 = svcmpge_n_u64 (p1, z0, 127), -+ p0 = svcmpge (p1, z0, 127)) -+ -+/* -+** cmpge_128_u64: -+** mov (z[0-9]+\.d), #128 -+** ( -+** cmphs p0\.d, p1/z, z0\.d, \1 -+** | -+** cmpls p0\.d, p1/z, \1, z0\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_128_u64, svuint64_t, -+ p0 = svcmpge_n_u64 (p1, z0, 128), -+ p0 = svcmpge (p1, z0, 128)) -+ -+/* -+** cmpge_m1_u64: -+** mov (z[0-9]+)\.b, #-1 -+** ( -+** cmphs p0\.d, p1/z, z0\.d, \1\.d -+** | -+** cmpls p0\.d, p1/z, \1\.d, z0\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_m1_u64, svuint64_t, -+ p0 = svcmpge_n_u64 (p1, z0, -1), -+ p0 = svcmpge (p1, z0, -1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_u8.c -new file mode 100644 -index 000000000..39f988d01 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_u8.c -@@ -0,0 +1,116 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpge_u8_tied: -+** ( -+** cmphs p0\.b, p0/z, z0\.b, z1\.b -+** | -+** cmpls p0\.b, p0/z, z1\.b, z0\.b -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_u8_tied, svuint8_t, -+ p0 = svcmpge_u8 (p0, z0, z1), -+ p0 = svcmpge (p0, z0, z1)) -+ -+/* -+** cmpge_u8_untied: -+** ( -+** cmphs p0\.b, p1/z, z0\.b, z1\.b -+** | -+** cmpls p0\.b, p1/z, z1\.b, z0\.b -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_u8_untied, svuint8_t, -+ p0 = svcmpge_u8 (p1, z0, z1), -+ p0 = svcmpge (p1, z0, z1)) -+ -+/* -+** cmpge_w0_u8: -+** mov (z[0-9]+\.b), w0 -+** ( -+** cmphs p0\.b, p1/z, z0\.b, \1 -+** | -+** cmpls p0\.b, p1/z, \1, z0\.b -+** ) -+** ret -+*/ -+TEST_COMPARE_ZX (cmpge_w0_u8, svuint8_t, uint8_t, -+ p0 = svcmpge_n_u8 (p1, z0, x0), -+ p0 = svcmpge (p1, z0, x0)) -+ -+/* -+** cmpge_0_u8: -+** cmphs p0\.b, p1/z, z0\.b, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_0_u8, svuint8_t, -+ p0 = svcmpge_n_u8 (p1, z0, 0), -+ p0 = svcmpge (p1, z0, 0)) -+ -+/* -+** cmpge_1_u8: -+** cmphs p0\.b, p1/z, z0\.b, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_1_u8, svuint8_t, -+ p0 = svcmpge_n_u8 (p1, z0, 1), -+ p0 = svcmpge (p1, z0, 1)) -+ -+/* -+** cmpge_15_u8: -+** cmphs p0\.b, p1/z, z0\.b, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_15_u8, svuint8_t, -+ p0 = svcmpge_n_u8 (p1, z0, 15), -+ p0 = svcmpge (p1, z0, 15)) -+ -+/* -+** cmpge_16_u8: -+** cmphs p0\.b, p1/z, z0\.b, #16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_16_u8, svuint8_t, -+ p0 = svcmpge_n_u8 (p1, z0, 16), -+ p0 = svcmpge (p1, z0, 16)) -+ -+/* -+** cmpge_127_u8: -+** cmphs p0\.b, p1/z, z0\.b, #127 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_127_u8, svuint8_t, -+ p0 = svcmpge_n_u8 (p1, z0, 127), -+ p0 = svcmpge (p1, z0, 127)) -+ -+/* -+** cmpge_128_u8: -+** mov (z[0-9]+\.b), #-128 -+** ( -+** cmphs p0\.b, p1/z, z0\.b, \1 -+** | -+** cmpls p0\.b, p1/z, \1, z0\.b -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_128_u8, svuint8_t, -+ p0 = svcmpge_n_u8 (p1, z0, 128), -+ p0 = svcmpge (p1, z0, 128)) -+ -+/* -+** cmpge_m1_u8: -+** mov (z[0-9]+\.b), #-1 -+** ( -+** cmphs p0\.b, p1/z, z0\.b, \1 -+** | -+** cmpls p0\.b, p1/z, \1, z0\.b -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_m1_u8, svuint8_t, -+ p0 = svcmpge_n_u8 (p1, z0, -1), -+ p0 = svcmpge (p1, z0, -1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_wide_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_wide_s16.c -new file mode 100644 -index 000000000..0400d7871 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_wide_s16.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpge_wide_s16_tied: -+** cmpge p0\.h, p0/z, z0\.h, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmpge_wide_s16_tied, svint16_t, svint64_t, -+ p0 = svcmpge_wide_s16 (p0, z0, z1), -+ p0 = svcmpge_wide (p0, z0, z1)) -+ -+/* -+** cmpge_wide_s16_untied: -+** cmpge p0\.h, p1/z, z0\.h, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmpge_wide_s16_untied, svint16_t, svint64_t, -+ p0 = svcmpge_wide_s16 (p1, z0, z1), -+ p0 = svcmpge_wide (p1, z0, z1)) -+ -+/* -+** cmpge_wide_x0_s16: -+** mov (z[0-9]+\.d), x0 -+** cmpge p0\.h, p1/z, z0\.h, \1 -+** ret -+*/ -+TEST_COMPARE_ZX (cmpge_wide_x0_s16, svint16_t, int64_t, -+ p0 = svcmpge_wide_n_s16 (p1, z0, x0), -+ p0 = svcmpge_wide (p1, z0, x0)) -+ -+/* -+** cmpge_wide_0_s16: -+** cmpge p0\.h, p1/z, z0\.h, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_wide_0_s16, svint16_t, -+ p0 = svcmpge_wide_n_s16 (p1, z0, 0), -+ p0 = svcmpge_wide (p1, z0, 0)) -+ -+/* -+** cmpge_wide_1_s16: -+** cmpge p0\.h, p1/z, z0\.h, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_wide_1_s16, svint16_t, -+ p0 = svcmpge_wide_n_s16 (p1, z0, 1), -+ p0 = svcmpge_wide (p1, z0, 1)) -+ -+/* -+** cmpge_wide_15_s16: -+** cmpge p0\.h, p1/z, z0\.h, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_wide_15_s16, svint16_t, -+ p0 = svcmpge_wide_n_s16 (p1, z0, 15), -+ p0 = svcmpge_wide (p1, z0, 15)) -+ -+/* -+** cmpge_wide_16_s16: -+** mov (z[0-9]+\.d), #16 -+** cmpge p0\.h, p1/z, z0\.h, \1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_wide_16_s16, svint16_t, -+ p0 = svcmpge_wide_n_s16 (p1, z0, 16), -+ p0 = svcmpge_wide (p1, z0, 16)) -+ -+/* -+** cmpge_wide_m1_s16: -+** cmpge p0\.h, p1/z, z0\.h, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_wide_m1_s16, svint16_t, -+ p0 = svcmpge_wide_n_s16 (p1, z0, -1), -+ p0 = svcmpge_wide (p1, z0, -1)) -+ -+/* -+** cmpge_wide_m16_s16: -+** cmpge p0\.h, p1/z, z0\.h, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_wide_m16_s16, svint16_t, -+ p0 = svcmpge_wide_n_s16 (p1, z0, -16), -+ p0 = svcmpge_wide (p1, z0, -16)) -+ -+/* -+** cmpge_wide_m17_s16: -+** mov (z[0-9]+\.d), #-17 -+** cmpge p0\.h, p1/z, z0\.h, \1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_wide_m17_s16, svint16_t, -+ p0 = svcmpge_wide_n_s16 (p1, z0, -17), -+ p0 = svcmpge_wide (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_wide_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_wide_s32.c -new file mode 100644 -index 000000000..ad7b9c55b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_wide_s32.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpge_wide_s32_tied: -+** cmpge p0\.s, p0/z, z0\.s, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmpge_wide_s32_tied, svint32_t, svint64_t, -+ p0 = svcmpge_wide_s32 (p0, z0, z1), -+ p0 = svcmpge_wide (p0, z0, z1)) -+ -+/* -+** cmpge_wide_s32_untied: -+** cmpge p0\.s, p1/z, z0\.s, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmpge_wide_s32_untied, svint32_t, svint64_t, -+ p0 = svcmpge_wide_s32 (p1, z0, z1), -+ p0 = svcmpge_wide (p1, z0, z1)) -+ -+/* -+** cmpge_wide_x0_s32: -+** mov (z[0-9]+\.d), x0 -+** cmpge p0\.s, p1/z, z0\.s, \1 -+** ret -+*/ -+TEST_COMPARE_ZX (cmpge_wide_x0_s32, svint32_t, int64_t, -+ p0 = svcmpge_wide_n_s32 (p1, z0, x0), -+ p0 = svcmpge_wide (p1, z0, x0)) -+ -+/* -+** cmpge_wide_0_s32: -+** cmpge p0\.s, p1/z, z0\.s, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_wide_0_s32, svint32_t, -+ p0 = svcmpge_wide_n_s32 (p1, z0, 0), -+ p0 = svcmpge_wide (p1, z0, 0)) -+ -+/* -+** cmpge_wide_1_s32: -+** cmpge p0\.s, p1/z, z0\.s, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_wide_1_s32, svint32_t, -+ p0 = svcmpge_wide_n_s32 (p1, z0, 1), -+ p0 = svcmpge_wide (p1, z0, 1)) -+ -+/* -+** cmpge_wide_15_s32: -+** cmpge p0\.s, p1/z, z0\.s, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_wide_15_s32, svint32_t, -+ p0 = svcmpge_wide_n_s32 (p1, z0, 15), -+ p0 = svcmpge_wide (p1, z0, 15)) -+ -+/* -+** cmpge_wide_16_s32: -+** mov (z[0-9]+\.d), #16 -+** cmpge p0\.s, p1/z, z0\.s, \1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_wide_16_s32, svint32_t, -+ p0 = svcmpge_wide_n_s32 (p1, z0, 16), -+ p0 = svcmpge_wide (p1, z0, 16)) -+ -+/* -+** cmpge_wide_m1_s32: -+** cmpge p0\.s, p1/z, z0\.s, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_wide_m1_s32, svint32_t, -+ p0 = svcmpge_wide_n_s32 (p1, z0, -1), -+ p0 = svcmpge_wide (p1, z0, -1)) -+ -+/* -+** cmpge_wide_m16_s32: -+** cmpge p0\.s, p1/z, z0\.s, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_wide_m16_s32, svint32_t, -+ p0 = svcmpge_wide_n_s32 (p1, z0, -16), -+ p0 = svcmpge_wide (p1, z0, -16)) -+ -+/* -+** cmpge_wide_m17_s32: -+** mov (z[0-9]+\.d), #-17 -+** cmpge p0\.s, p1/z, z0\.s, \1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_wide_m17_s32, svint32_t, -+ p0 = svcmpge_wide_n_s32 (p1, z0, -17), -+ p0 = svcmpge_wide (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_wide_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_wide_s8.c -new file mode 100644 -index 000000000..b03a42488 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_wide_s8.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpge_wide_s8_tied: -+** cmpge p0\.b, p0/z, z0\.b, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmpge_wide_s8_tied, svint8_t, svint64_t, -+ p0 = svcmpge_wide_s8 (p0, z0, z1), -+ p0 = svcmpge_wide (p0, z0, z1)) -+ -+/* -+** cmpge_wide_s8_untied: -+** cmpge p0\.b, p1/z, z0\.b, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmpge_wide_s8_untied, svint8_t, svint64_t, -+ p0 = svcmpge_wide_s8 (p1, z0, z1), -+ p0 = svcmpge_wide (p1, z0, z1)) -+ -+/* -+** cmpge_wide_x0_s8: -+** mov (z[0-9]+\.d), x0 -+** cmpge p0\.b, p1/z, z0\.b, \1 -+** ret -+*/ -+TEST_COMPARE_ZX (cmpge_wide_x0_s8, svint8_t, int64_t, -+ p0 = svcmpge_wide_n_s8 (p1, z0, x0), -+ p0 = svcmpge_wide (p1, z0, x0)) -+ -+/* -+** cmpge_wide_0_s8: -+** cmpge p0\.b, p1/z, z0\.b, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_wide_0_s8, svint8_t, -+ p0 = svcmpge_wide_n_s8 (p1, z0, 0), -+ p0 = svcmpge_wide (p1, z0, 0)) -+ -+/* -+** cmpge_wide_1_s8: -+** cmpge p0\.b, p1/z, z0\.b, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_wide_1_s8, svint8_t, -+ p0 = svcmpge_wide_n_s8 (p1, z0, 1), -+ p0 = svcmpge_wide (p1, z0, 1)) -+ -+/* -+** cmpge_wide_15_s8: -+** cmpge p0\.b, p1/z, z0\.b, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_wide_15_s8, svint8_t, -+ p0 = svcmpge_wide_n_s8 (p1, z0, 15), -+ p0 = svcmpge_wide (p1, z0, 15)) -+ -+/* -+** cmpge_wide_16_s8: -+** mov (z[0-9]+\.d), #16 -+** cmpge p0\.b, p1/z, z0\.b, \1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_wide_16_s8, svint8_t, -+ p0 = svcmpge_wide_n_s8 (p1, z0, 16), -+ p0 = svcmpge_wide (p1, z0, 16)) -+ -+/* -+** cmpge_wide_m1_s8: -+** cmpge p0\.b, p1/z, z0\.b, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_wide_m1_s8, svint8_t, -+ p0 = svcmpge_wide_n_s8 (p1, z0, -1), -+ p0 = svcmpge_wide (p1, z0, -1)) -+ -+/* -+** cmpge_wide_m16_s8: -+** cmpge p0\.b, p1/z, z0\.b, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_wide_m16_s8, svint8_t, -+ p0 = svcmpge_wide_n_s8 (p1, z0, -16), -+ p0 = svcmpge_wide (p1, z0, -16)) -+ -+/* -+** cmpge_wide_m17_s8: -+** mov (z[0-9]+\.d), #-17 -+** cmpge p0\.b, p1/z, z0\.b, \1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_wide_m17_s8, svint8_t, -+ p0 = svcmpge_wide_n_s8 (p1, z0, -17), -+ p0 = svcmpge_wide (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_wide_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_wide_u16.c -new file mode 100644 -index 000000000..966b1e554 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_wide_u16.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpge_wide_u16_tied: -+** cmphs p0\.h, p0/z, z0\.h, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmpge_wide_u16_tied, svuint16_t, svuint64_t, -+ p0 = svcmpge_wide_u16 (p0, z0, z1), -+ p0 = svcmpge_wide (p0, z0, z1)) -+ -+/* -+** cmpge_wide_u16_untied: -+** cmphs p0\.h, p1/z, z0\.h, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmpge_wide_u16_untied, svuint16_t, svuint64_t, -+ p0 = svcmpge_wide_u16 (p1, z0, z1), -+ p0 = svcmpge_wide (p1, z0, z1)) -+ -+/* -+** cmpge_wide_x0_u16: -+** mov (z[0-9]+\.d), x0 -+** cmphs p0\.h, p1/z, z0\.h, \1 -+** ret -+*/ -+TEST_COMPARE_ZX (cmpge_wide_x0_u16, svuint16_t, uint64_t, -+ p0 = svcmpge_wide_n_u16 (p1, z0, x0), -+ p0 = svcmpge_wide (p1, z0, x0)) -+ -+/* -+** cmpge_wide_0_u16: -+** cmphs p0\.h, p1/z, z0\.h, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_wide_0_u16, svuint16_t, -+ p0 = svcmpge_wide_n_u16 (p1, z0, 0), -+ p0 = svcmpge_wide (p1, z0, 0)) -+ -+/* -+** cmpge_wide_1_u16: -+** cmphs p0\.h, p1/z, z0\.h, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_wide_1_u16, svuint16_t, -+ p0 = svcmpge_wide_n_u16 (p1, z0, 1), -+ p0 = svcmpge_wide (p1, z0, 1)) -+ -+/* -+** cmpge_wide_15_u16: -+** cmphs p0\.h, p1/z, z0\.h, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_wide_15_u16, svuint16_t, -+ p0 = svcmpge_wide_n_u16 (p1, z0, 15), -+ p0 = svcmpge_wide (p1, z0, 15)) -+ -+/* -+** cmpge_wide_16_u16: -+** cmphs p0\.h, p1/z, z0\.h, #16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_wide_16_u16, svuint16_t, -+ p0 = svcmpge_wide_n_u16 (p1, z0, 16), -+ p0 = svcmpge_wide (p1, z0, 16)) -+ -+/* -+** cmpge_wide_127_u16: -+** cmphs p0\.h, p1/z, z0\.h, #127 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_wide_127_u16, svuint16_t, -+ p0 = svcmpge_wide_n_u16 (p1, z0, 127), -+ p0 = svcmpge_wide (p1, z0, 127)) -+ -+/* -+** cmpge_wide_128_u16: -+** mov (z[0-9]+\.d), #128 -+** cmphs p0\.h, p1/z, z0\.h, \1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_wide_128_u16, svuint16_t, -+ p0 = svcmpge_wide_n_u16 (p1, z0, 128), -+ p0 = svcmpge_wide (p1, z0, 128)) -+ -+/* -+** cmpge_wide_m1_u16: -+** mov (z[0-9]+)\.b, #-1 -+** cmphs p0\.h, p1/z, z0\.h, \1\.d -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_wide_m1_u16, svuint16_t, -+ p0 = svcmpge_wide_n_u16 (p1, z0, -1), -+ p0 = svcmpge_wide (p1, z0, -1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_wide_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_wide_u32.c -new file mode 100644 -index 000000000..fdeb53a46 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_wide_u32.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpge_wide_u32_tied: -+** cmphs p0\.s, p0/z, z0\.s, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmpge_wide_u32_tied, svuint32_t, svuint64_t, -+ p0 = svcmpge_wide_u32 (p0, z0, z1), -+ p0 = svcmpge_wide (p0, z0, z1)) -+ -+/* -+** cmpge_wide_u32_untied: -+** cmphs p0\.s, p1/z, z0\.s, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmpge_wide_u32_untied, svuint32_t, svuint64_t, -+ p0 = svcmpge_wide_u32 (p1, z0, z1), -+ p0 = svcmpge_wide (p1, z0, z1)) -+ -+/* -+** cmpge_wide_x0_u32: -+** mov (z[0-9]+\.d), x0 -+** cmphs p0\.s, p1/z, z0\.s, \1 -+** ret -+*/ -+TEST_COMPARE_ZX (cmpge_wide_x0_u32, svuint32_t, uint64_t, -+ p0 = svcmpge_wide_n_u32 (p1, z0, x0), -+ p0 = svcmpge_wide (p1, z0, x0)) -+ -+/* -+** cmpge_wide_0_u32: -+** cmphs p0\.s, p1/z, z0\.s, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_wide_0_u32, svuint32_t, -+ p0 = svcmpge_wide_n_u32 (p1, z0, 0), -+ p0 = svcmpge_wide (p1, z0, 0)) -+ -+/* -+** cmpge_wide_1_u32: -+** cmphs p0\.s, p1/z, z0\.s, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_wide_1_u32, svuint32_t, -+ p0 = svcmpge_wide_n_u32 (p1, z0, 1), -+ p0 = svcmpge_wide (p1, z0, 1)) -+ -+/* -+** cmpge_wide_15_u32: -+** cmphs p0\.s, p1/z, z0\.s, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_wide_15_u32, svuint32_t, -+ p0 = svcmpge_wide_n_u32 (p1, z0, 15), -+ p0 = svcmpge_wide (p1, z0, 15)) -+ -+/* -+** cmpge_wide_16_u32: -+** cmphs p0\.s, p1/z, z0\.s, #16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_wide_16_u32, svuint32_t, -+ p0 = svcmpge_wide_n_u32 (p1, z0, 16), -+ p0 = svcmpge_wide (p1, z0, 16)) -+ -+/* -+** cmpge_wide_127_u32: -+** cmphs p0\.s, p1/z, z0\.s, #127 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_wide_127_u32, svuint32_t, -+ p0 = svcmpge_wide_n_u32 (p1, z0, 127), -+ p0 = svcmpge_wide (p1, z0, 127)) -+ -+/* -+** cmpge_wide_128_u32: -+** mov (z[0-9]+\.d), #128 -+** cmphs p0\.s, p1/z, z0\.s, \1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_wide_128_u32, svuint32_t, -+ p0 = svcmpge_wide_n_u32 (p1, z0, 128), -+ p0 = svcmpge_wide (p1, z0, 128)) -+ -+/* -+** cmpge_wide_m1_u32: -+** mov (z[0-9]+)\.b, #-1 -+** cmphs p0\.s, p1/z, z0\.s, \1\.d -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_wide_m1_u32, svuint32_t, -+ p0 = svcmpge_wide_n_u32 (p1, z0, -1), -+ p0 = svcmpge_wide (p1, z0, -1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_wide_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_wide_u8.c -new file mode 100644 -index 000000000..565093120 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpge_wide_u8.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpge_wide_u8_tied: -+** cmphs p0\.b, p0/z, z0\.b, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmpge_wide_u8_tied, svuint8_t, svuint64_t, -+ p0 = svcmpge_wide_u8 (p0, z0, z1), -+ p0 = svcmpge_wide (p0, z0, z1)) -+ -+/* -+** cmpge_wide_u8_untied: -+** cmphs p0\.b, p1/z, z0\.b, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmpge_wide_u8_untied, svuint8_t, svuint64_t, -+ p0 = svcmpge_wide_u8 (p1, z0, z1), -+ p0 = svcmpge_wide (p1, z0, z1)) -+ -+/* -+** cmpge_wide_x0_u8: -+** mov (z[0-9]+\.d), x0 -+** cmphs p0\.b, p1/z, z0\.b, \1 -+** ret -+*/ -+TEST_COMPARE_ZX (cmpge_wide_x0_u8, svuint8_t, uint64_t, -+ p0 = svcmpge_wide_n_u8 (p1, z0, x0), -+ p0 = svcmpge_wide (p1, z0, x0)) -+ -+/* -+** cmpge_wide_0_u8: -+** cmphs p0\.b, p1/z, z0\.b, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_wide_0_u8, svuint8_t, -+ p0 = svcmpge_wide_n_u8 (p1, z0, 0), -+ p0 = svcmpge_wide (p1, z0, 0)) -+ -+/* -+** cmpge_wide_1_u8: -+** cmphs p0\.b, p1/z, z0\.b, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_wide_1_u8, svuint8_t, -+ p0 = svcmpge_wide_n_u8 (p1, z0, 1), -+ p0 = svcmpge_wide (p1, z0, 1)) -+ -+/* -+** cmpge_wide_15_u8: -+** cmphs p0\.b, p1/z, z0\.b, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_wide_15_u8, svuint8_t, -+ p0 = svcmpge_wide_n_u8 (p1, z0, 15), -+ p0 = svcmpge_wide (p1, z0, 15)) -+ -+/* -+** cmpge_wide_16_u8: -+** cmphs p0\.b, p1/z, z0\.b, #16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_wide_16_u8, svuint8_t, -+ p0 = svcmpge_wide_n_u8 (p1, z0, 16), -+ p0 = svcmpge_wide (p1, z0, 16)) -+ -+/* -+** cmpge_wide_127_u8: -+** cmphs p0\.b, p1/z, z0\.b, #127 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_wide_127_u8, svuint8_t, -+ p0 = svcmpge_wide_n_u8 (p1, z0, 127), -+ p0 = svcmpge_wide (p1, z0, 127)) -+ -+/* -+** cmpge_wide_128_u8: -+** mov (z[0-9]+\.d), #128 -+** cmphs p0\.b, p1/z, z0\.b, \1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_wide_128_u8, svuint8_t, -+ p0 = svcmpge_wide_n_u8 (p1, z0, 128), -+ p0 = svcmpge_wide (p1, z0, 128)) -+ -+/* -+** cmpge_wide_m1_u8: -+** mov (z[0-9]+)\.b, #-1 -+** cmphs p0\.b, p1/z, z0\.b, \1\.d -+** ret -+*/ -+TEST_COMPARE_Z (cmpge_wide_m1_u8, svuint8_t, -+ p0 = svcmpge_wide_n_u8 (p1, z0, -1), -+ p0 = svcmpge_wide (p1, z0, -1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_f16.c -new file mode 100644 -index 000000000..69b015794 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_f16.c -@@ -0,0 +1,66 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpgt_f16_tied: -+** ( -+** fcmgt p0\.h, p0/z, z0\.h, z1\.h -+** | -+** fcmlt p0\.h, p0/z, z1\.h, z0\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_f16_tied, svfloat16_t, -+ p0 = svcmpgt_f16 (p0, z0, z1), -+ p0 = svcmpgt (p0, z0, z1)) -+ -+/* -+** cmpgt_f16_untied: -+** ( -+** fcmgt p0\.h, p1/z, z0\.h, z1\.h -+** | -+** fcmlt p0\.h, p1/z, z1\.h, z0\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_f16_untied, svfloat16_t, -+ p0 = svcmpgt_f16 (p1, z0, z1), -+ p0 = svcmpgt (p1, z0, z1)) -+ -+/* -+** cmpgt_h4_f16: -+** mov (z[0-9]+\.h), h4 -+** ( -+** fcmgt p0\.h, p1/z, z0\.h, \1 -+** | -+** fcmlt p0\.h, p1/z, \1, z0\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_ZD (cmpgt_h4_f16, svfloat16_t, float16_t, -+ p0 = svcmpgt_n_f16 (p1, z0, d4), -+ p0 = svcmpgt (p1, z0, d4)) -+ -+/* -+** cmpgt_0_f16: -+** fcmgt p0\.h, p1/z, z0\.h, #0\.0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_0_f16, svfloat16_t, -+ p0 = svcmpgt_n_f16 (p1, z0, 0), -+ p0 = svcmpgt (p1, z0, 0)) -+ -+/* -+** cmpgt_1_f16: -+** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? -+** ( -+** fcmgt p0\.h, p1/z, z0\.h, \1 -+** | -+** fcmlt p0\.h, p1/z, \1, z0\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_1_f16, svfloat16_t, -+ p0 = svcmpgt_n_f16 (p1, z0, 1), -+ p0 = svcmpgt (p1, z0, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_f32.c -new file mode 100644 -index 000000000..7d66b67c3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_f32.c -@@ -0,0 +1,66 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpgt_f32_tied: -+** ( -+** fcmgt p0\.s, p0/z, z0\.s, z1\.s -+** | -+** fcmlt p0\.s, p0/z, z1\.s, z0\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_f32_tied, svfloat32_t, -+ p0 = svcmpgt_f32 (p0, z0, z1), -+ p0 = svcmpgt (p0, z0, z1)) -+ -+/* -+** cmpgt_f32_untied: -+** ( -+** fcmgt p0\.s, p1/z, z0\.s, z1\.s -+** | -+** fcmlt p0\.s, p1/z, z1\.s, z0\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_f32_untied, svfloat32_t, -+ p0 = svcmpgt_f32 (p1, z0, z1), -+ p0 = svcmpgt (p1, z0, z1)) -+ -+/* -+** cmpgt_s4_f32: -+** mov (z[0-9]+\.s), s4 -+** ( -+** fcmgt p0\.s, p1/z, z0\.s, \1 -+** | -+** fcmlt p0\.s, p1/z, \1, z0\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_ZD (cmpgt_s4_f32, svfloat32_t, float32_t, -+ p0 = svcmpgt_n_f32 (p1, z0, d4), -+ p0 = svcmpgt (p1, z0, d4)) -+ -+/* -+** cmpgt_0_f32: -+** fcmgt p0\.s, p1/z, z0\.s, #0\.0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_0_f32, svfloat32_t, -+ p0 = svcmpgt_n_f32 (p1, z0, 0), -+ p0 = svcmpgt (p1, z0, 0)) -+ -+/* -+** cmpgt_1_f32: -+** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? -+** ( -+** fcmgt p0\.s, p1/z, z0\.s, \1 -+** | -+** fcmlt p0\.s, p1/z, \1, z0\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_1_f32, svfloat32_t, -+ p0 = svcmpgt_n_f32 (p1, z0, 1), -+ p0 = svcmpgt (p1, z0, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_f64.c -new file mode 100644 -index 000000000..f3a155476 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_f64.c -@@ -0,0 +1,66 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpgt_f64_tied: -+** ( -+** fcmgt p0\.d, p0/z, z0\.d, z1\.d -+** | -+** fcmlt p0\.d, p0/z, z1\.d, z0\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_f64_tied, svfloat64_t, -+ p0 = svcmpgt_f64 (p0, z0, z1), -+ p0 = svcmpgt (p0, z0, z1)) -+ -+/* -+** cmpgt_f64_untied: -+** ( -+** fcmgt p0\.d, p1/z, z0\.d, z1\.d -+** | -+** fcmlt p0\.d, p1/z, z1\.d, z0\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_f64_untied, svfloat64_t, -+ p0 = svcmpgt_f64 (p1, z0, z1), -+ p0 = svcmpgt (p1, z0, z1)) -+ -+/* -+** cmpgt_d4_f64: -+** mov (z[0-9]+\.d), d4 -+** ( -+** fcmgt p0\.d, p1/z, z0\.d, \1 -+** | -+** fcmlt p0\.d, p1/z, \1, z0\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_ZD (cmpgt_d4_f64, svfloat64_t, float64_t, -+ p0 = svcmpgt_n_f64 (p1, z0, d4), -+ p0 = svcmpgt (p1, z0, d4)) -+ -+/* -+** cmpgt_0_f64: -+** fcmgt p0\.d, p1/z, z0\.d, #0\.0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_0_f64, svfloat64_t, -+ p0 = svcmpgt_n_f64 (p1, z0, 0), -+ p0 = svcmpgt (p1, z0, 0)) -+ -+/* -+** cmpgt_1_f64: -+** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? -+** ( -+** fcmgt p0\.d, p1/z, z0\.d, \1 -+** | -+** fcmlt p0\.d, p1/z, \1, z0\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_1_f64, svfloat64_t, -+ p0 = svcmpgt_n_f64 (p1, z0, 1), -+ p0 = svcmpgt (p1, z0, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_s16.c -new file mode 100644 -index 000000000..cc86c0c00 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_s16.c -@@ -0,0 +1,116 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpgt_s16_tied: -+** ( -+** cmpgt p0\.h, p0/z, z0\.h, z1\.h -+** | -+** cmplt p0\.h, p0/z, z1\.h, z0\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_s16_tied, svint16_t, -+ p0 = svcmpgt_s16 (p0, z0, z1), -+ p0 = svcmpgt (p0, z0, z1)) -+ -+/* -+** cmpgt_s16_untied: -+** ( -+** cmpgt p0\.h, p1/z, z0\.h, z1\.h -+** | -+** cmplt p0\.h, p1/z, z1\.h, z0\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_s16_untied, svint16_t, -+ p0 = svcmpgt_s16 (p1, z0, z1), -+ p0 = svcmpgt (p1, z0, z1)) -+ -+/* -+** cmpgt_w0_s16: -+** mov (z[0-9]+\.h), w0 -+** ( -+** cmpgt p0\.h, p1/z, z0\.h, \1 -+** | -+** cmplt p0\.h, p1/z, \1, z0\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_ZX (cmpgt_w0_s16, svint16_t, int16_t, -+ p0 = svcmpgt_n_s16 (p1, z0, x0), -+ p0 = svcmpgt (p1, z0, x0)) -+ -+/* -+** cmpgt_0_s16: -+** cmpgt p0\.h, p1/z, z0\.h, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_0_s16, svint16_t, -+ p0 = svcmpgt_n_s16 (p1, z0, 0), -+ p0 = svcmpgt (p1, z0, 0)) -+ -+/* -+** cmpgt_1_s16: -+** cmpgt p0\.h, p1/z, z0\.h, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_1_s16, svint16_t, -+ p0 = svcmpgt_n_s16 (p1, z0, 1), -+ p0 = svcmpgt (p1, z0, 1)) -+ -+/* -+** cmpgt_15_s16: -+** cmpgt p0\.h, p1/z, z0\.h, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_15_s16, svint16_t, -+ p0 = svcmpgt_n_s16 (p1, z0, 15), -+ p0 = svcmpgt (p1, z0, 15)) -+ -+/* -+** cmpgt_16_s16: -+** mov (z[0-9]+\.h), #16 -+** ( -+** cmpgt p0\.h, p1/z, z0\.h, \1 -+** | -+** cmplt p0\.h, p1/z, \1, z0\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_16_s16, svint16_t, -+ p0 = svcmpgt_n_s16 (p1, z0, 16), -+ p0 = svcmpgt (p1, z0, 16)) -+ -+/* -+** cmpgt_m1_s16: -+** cmpgt p0\.h, p1/z, z0\.h, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_m1_s16, svint16_t, -+ p0 = svcmpgt_n_s16 (p1, z0, -1), -+ p0 = svcmpgt (p1, z0, -1)) -+ -+/* -+** cmpgt_m16_s16: -+** cmpgt p0\.h, p1/z, z0\.h, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_m16_s16, svint16_t, -+ p0 = svcmpgt_n_s16 (p1, z0, -16), -+ p0 = svcmpgt (p1, z0, -16)) -+ -+/* -+** cmpgt_m17_s16: -+** mov (z[0-9]+\.h), #-17 -+** ( -+** cmpgt p0\.h, p1/z, z0\.h, \1 -+** | -+** cmplt p0\.h, p1/z, \1, z0\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_m17_s16, svint16_t, -+ p0 = svcmpgt_n_s16 (p1, z0, -17), -+ p0 = svcmpgt (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_s32.c -new file mode 100644 -index 000000000..75f0cc737 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_s32.c -@@ -0,0 +1,116 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpgt_s32_tied: -+** ( -+** cmpgt p0\.s, p0/z, z0\.s, z1\.s -+** | -+** cmplt p0\.s, p0/z, z1\.s, z0\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_s32_tied, svint32_t, -+ p0 = svcmpgt_s32 (p0, z0, z1), -+ p0 = svcmpgt (p0, z0, z1)) -+ -+/* -+** cmpgt_s32_untied: -+** ( -+** cmpgt p0\.s, p1/z, z0\.s, z1\.s -+** | -+** cmplt p0\.s, p1/z, z1\.s, z0\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_s32_untied, svint32_t, -+ p0 = svcmpgt_s32 (p1, z0, z1), -+ p0 = svcmpgt (p1, z0, z1)) -+ -+/* -+** cmpgt_w0_s32: -+** mov (z[0-9]+\.s), w0 -+** ( -+** cmpgt p0\.s, p1/z, z0\.s, \1 -+** | -+** cmplt p0\.s, p1/z, \1, z0\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_ZX (cmpgt_w0_s32, svint32_t, int32_t, -+ p0 = svcmpgt_n_s32 (p1, z0, x0), -+ p0 = svcmpgt (p1, z0, x0)) -+ -+/* -+** cmpgt_0_s32: -+** cmpgt p0\.s, p1/z, z0\.s, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_0_s32, svint32_t, -+ p0 = svcmpgt_n_s32 (p1, z0, 0), -+ p0 = svcmpgt (p1, z0, 0)) -+ -+/* -+** cmpgt_1_s32: -+** cmpgt p0\.s, p1/z, z0\.s, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_1_s32, svint32_t, -+ p0 = svcmpgt_n_s32 (p1, z0, 1), -+ p0 = svcmpgt (p1, z0, 1)) -+ -+/* -+** cmpgt_15_s32: -+** cmpgt p0\.s, p1/z, z0\.s, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_15_s32, svint32_t, -+ p0 = svcmpgt_n_s32 (p1, z0, 15), -+ p0 = svcmpgt (p1, z0, 15)) -+ -+/* -+** cmpgt_16_s32: -+** mov (z[0-9]+\.s), #16 -+** ( -+** cmpgt p0\.s, p1/z, z0\.s, \1 -+** | -+** cmplt p0\.s, p1/z, \1, z0\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_16_s32, svint32_t, -+ p0 = svcmpgt_n_s32 (p1, z0, 16), -+ p0 = svcmpgt (p1, z0, 16)) -+ -+/* -+** cmpgt_m1_s32: -+** cmpgt p0\.s, p1/z, z0\.s, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_m1_s32, svint32_t, -+ p0 = svcmpgt_n_s32 (p1, z0, -1), -+ p0 = svcmpgt (p1, z0, -1)) -+ -+/* -+** cmpgt_m16_s32: -+** cmpgt p0\.s, p1/z, z0\.s, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_m16_s32, svint32_t, -+ p0 = svcmpgt_n_s32 (p1, z0, -16), -+ p0 = svcmpgt (p1, z0, -16)) -+ -+/* -+** cmpgt_m17_s32: -+** mov (z[0-9]+\.s), #-17 -+** ( -+** cmpgt p0\.s, p1/z, z0\.s, \1 -+** | -+** cmplt p0\.s, p1/z, \1, z0\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_m17_s32, svint32_t, -+ p0 = svcmpgt_n_s32 (p1, z0, -17), -+ p0 = svcmpgt (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_s64.c -new file mode 100644 -index 000000000..dbfd55e6f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_s64.c -@@ -0,0 +1,116 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpgt_s64_tied: -+** ( -+** cmpgt p0\.d, p0/z, z0\.d, z1\.d -+** | -+** cmplt p0\.d, p0/z, z1\.d, z0\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_s64_tied, svint64_t, -+ p0 = svcmpgt_s64 (p0, z0, z1), -+ p0 = svcmpgt (p0, z0, z1)) -+ -+/* -+** cmpgt_s64_untied: -+** ( -+** cmpgt p0\.d, p1/z, z0\.d, z1\.d -+** | -+** cmplt p0\.d, p1/z, z1\.d, z0\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_s64_untied, svint64_t, -+ p0 = svcmpgt_s64 (p1, z0, z1), -+ p0 = svcmpgt (p1, z0, z1)) -+ -+/* -+** cmpgt_x0_s64: -+** mov (z[0-9]+\.d), x0 -+** ( -+** cmpgt p0\.d, p1/z, z0\.d, \1 -+** | -+** cmplt p0\.d, p1/z, \1, z0\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_ZX (cmpgt_x0_s64, svint64_t, int64_t, -+ p0 = svcmpgt_n_s64 (p1, z0, x0), -+ p0 = svcmpgt (p1, z0, x0)) -+ -+/* -+** cmpgt_0_s64: -+** cmpgt p0\.d, p1/z, z0\.d, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_0_s64, svint64_t, -+ p0 = svcmpgt_n_s64 (p1, z0, 0), -+ p0 = svcmpgt (p1, z0, 0)) -+ -+/* -+** cmpgt_1_s64: -+** cmpgt p0\.d, p1/z, z0\.d, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_1_s64, svint64_t, -+ p0 = svcmpgt_n_s64 (p1, z0, 1), -+ p0 = svcmpgt (p1, z0, 1)) -+ -+/* -+** cmpgt_15_s64: -+** cmpgt p0\.d, p1/z, z0\.d, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_15_s64, svint64_t, -+ p0 = svcmpgt_n_s64 (p1, z0, 15), -+ p0 = svcmpgt (p1, z0, 15)) -+ -+/* -+** cmpgt_16_s64: -+** mov (z[0-9]+\.d), #16 -+** ( -+** cmpgt p0\.d, p1/z, z0\.d, \1 -+** | -+** cmplt p0\.d, p1/z, \1, z0\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_16_s64, svint64_t, -+ p0 = svcmpgt_n_s64 (p1, z0, 16), -+ p0 = svcmpgt (p1, z0, 16)) -+ -+/* -+** cmpgt_m1_s64: -+** cmpgt p0\.d, p1/z, z0\.d, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_m1_s64, svint64_t, -+ p0 = svcmpgt_n_s64 (p1, z0, -1), -+ p0 = svcmpgt (p1, z0, -1)) -+ -+/* -+** cmpgt_m16_s64: -+** cmpgt p0\.d, p1/z, z0\.d, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_m16_s64, svint64_t, -+ p0 = svcmpgt_n_s64 (p1, z0, -16), -+ p0 = svcmpgt (p1, z0, -16)) -+ -+/* -+** cmpgt_m17_s64: -+** mov (z[0-9]+\.d), #-17 -+** ( -+** cmpgt p0\.d, p1/z, z0\.d, \1 -+** | -+** cmplt p0\.d, p1/z, \1, z0\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_m17_s64, svint64_t, -+ p0 = svcmpgt_n_s64 (p1, z0, -17), -+ p0 = svcmpgt (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_s8.c -new file mode 100644 -index 000000000..710c2e602 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_s8.c -@@ -0,0 +1,116 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpgt_s8_tied: -+** ( -+** cmpgt p0\.b, p0/z, z0\.b, z1\.b -+** | -+** cmplt p0\.b, p0/z, z1\.b, z0\.b -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_s8_tied, svint8_t, -+ p0 = svcmpgt_s8 (p0, z0, z1), -+ p0 = svcmpgt (p0, z0, z1)) -+ -+/* -+** cmpgt_s8_untied: -+** ( -+** cmpgt p0\.b, p1/z, z0\.b, z1\.b -+** | -+** cmplt p0\.b, p1/z, z1\.b, z0\.b -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_s8_untied, svint8_t, -+ p0 = svcmpgt_s8 (p1, z0, z1), -+ p0 = svcmpgt (p1, z0, z1)) -+ -+/* -+** cmpgt_w0_s8: -+** mov (z[0-9]+\.b), w0 -+** ( -+** cmpgt p0\.b, p1/z, z0\.b, \1 -+** | -+** cmplt p0\.b, p1/z, \1, z0\.b -+** ) -+** ret -+*/ -+TEST_COMPARE_ZX (cmpgt_w0_s8, svint8_t, int8_t, -+ p0 = svcmpgt_n_s8 (p1, z0, x0), -+ p0 = svcmpgt (p1, z0, x0)) -+ -+/* -+** cmpgt_0_s8: -+** cmpgt p0\.b, p1/z, z0\.b, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_0_s8, svint8_t, -+ p0 = svcmpgt_n_s8 (p1, z0, 0), -+ p0 = svcmpgt (p1, z0, 0)) -+ -+/* -+** cmpgt_1_s8: -+** cmpgt p0\.b, p1/z, z0\.b, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_1_s8, svint8_t, -+ p0 = svcmpgt_n_s8 (p1, z0, 1), -+ p0 = svcmpgt (p1, z0, 1)) -+ -+/* -+** cmpgt_15_s8: -+** cmpgt p0\.b, p1/z, z0\.b, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_15_s8, svint8_t, -+ p0 = svcmpgt_n_s8 (p1, z0, 15), -+ p0 = svcmpgt (p1, z0, 15)) -+ -+/* -+** cmpgt_16_s8: -+** mov (z[0-9]+\.b), #16 -+** ( -+** cmpgt p0\.b, p1/z, z0\.b, \1 -+** | -+** cmplt p0\.b, p1/z, \1, z0\.b -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_16_s8, svint8_t, -+ p0 = svcmpgt_n_s8 (p1, z0, 16), -+ p0 = svcmpgt (p1, z0, 16)) -+ -+/* -+** cmpgt_m1_s8: -+** cmpgt p0\.b, p1/z, z0\.b, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_m1_s8, svint8_t, -+ p0 = svcmpgt_n_s8 (p1, z0, -1), -+ p0 = svcmpgt (p1, z0, -1)) -+ -+/* -+** cmpgt_m16_s8: -+** cmpgt p0\.b, p1/z, z0\.b, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_m16_s8, svint8_t, -+ p0 = svcmpgt_n_s8 (p1, z0, -16), -+ p0 = svcmpgt (p1, z0, -16)) -+ -+/* -+** cmpgt_m17_s8: -+** mov (z[0-9]+\.b), #-17 -+** ( -+** cmpgt p0\.b, p1/z, z0\.b, \1 -+** | -+** cmplt p0\.b, p1/z, \1, z0\.b -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_m17_s8, svint8_t, -+ p0 = svcmpgt_n_s8 (p1, z0, -17), -+ p0 = svcmpgt (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_u16.c -new file mode 100644 -index 000000000..48e99c72c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_u16.c -@@ -0,0 +1,116 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpgt_u16_tied: -+** ( -+** cmphi p0\.h, p0/z, z0\.h, z1\.h -+** | -+** cmplo p0\.h, p0/z, z1\.h, z0\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_u16_tied, svuint16_t, -+ p0 = svcmpgt_u16 (p0, z0, z1), -+ p0 = svcmpgt (p0, z0, z1)) -+ -+/* -+** cmpgt_u16_untied: -+** ( -+** cmphi p0\.h, p1/z, z0\.h, z1\.h -+** | -+** cmplo p0\.h, p1/z, z1\.h, z0\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_u16_untied, svuint16_t, -+ p0 = svcmpgt_u16 (p1, z0, z1), -+ p0 = svcmpgt (p1, z0, z1)) -+ -+/* -+** cmpgt_w0_u16: -+** mov (z[0-9]+\.h), w0 -+** ( -+** cmphi p0\.h, p1/z, z0\.h, \1 -+** | -+** cmplo p0\.h, p1/z, \1, z0\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_ZX (cmpgt_w0_u16, svuint16_t, uint16_t, -+ p0 = svcmpgt_n_u16 (p1, z0, x0), -+ p0 = svcmpgt (p1, z0, x0)) -+ -+/* -+** cmpgt_0_u16: -+** cmphi p0\.h, p1/z, z0\.h, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_0_u16, svuint16_t, -+ p0 = svcmpgt_n_u16 (p1, z0, 0), -+ p0 = svcmpgt (p1, z0, 0)) -+ -+/* -+** cmpgt_1_u16: -+** cmphi p0\.h, p1/z, z0\.h, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_1_u16, svuint16_t, -+ p0 = svcmpgt_n_u16 (p1, z0, 1), -+ p0 = svcmpgt (p1, z0, 1)) -+ -+/* -+** cmpgt_15_u16: -+** cmphi p0\.h, p1/z, z0\.h, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_15_u16, svuint16_t, -+ p0 = svcmpgt_n_u16 (p1, z0, 15), -+ p0 = svcmpgt (p1, z0, 15)) -+ -+/* -+** cmpgt_16_u16: -+** cmphi p0\.h, p1/z, z0\.h, #16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_16_u16, svuint16_t, -+ p0 = svcmpgt_n_u16 (p1, z0, 16), -+ p0 = svcmpgt (p1, z0, 16)) -+ -+/* -+** cmpgt_127_u16: -+** cmphi p0\.h, p1/z, z0\.h, #127 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_127_u16, svuint16_t, -+ p0 = svcmpgt_n_u16 (p1, z0, 127), -+ p0 = svcmpgt (p1, z0, 127)) -+ -+/* -+** cmpgt_128_u16: -+** mov (z[0-9]+\.h), #128 -+** ( -+** cmphi p0\.h, p1/z, z0\.h, \1 -+** | -+** cmplo p0\.h, p1/z, \1, z0\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_128_u16, svuint16_t, -+ p0 = svcmpgt_n_u16 (p1, z0, 128), -+ p0 = svcmpgt (p1, z0, 128)) -+ -+/* -+** cmpgt_m1_u16: -+** mov (z[0-9]+)\.b, #-1 -+** ( -+** cmphi p0\.h, p1/z, z0\.h, \1\.h -+** | -+** cmplo p0\.h, p1/z, \1\.h, z0\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_m1_u16, svuint16_t, -+ p0 = svcmpgt_n_u16 (p1, z0, -1), -+ p0 = svcmpgt (p1, z0, -1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_u32.c -new file mode 100644 -index 000000000..408037d72 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_u32.c -@@ -0,0 +1,116 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpgt_u32_tied: -+** ( -+** cmphi p0\.s, p0/z, z0\.s, z1\.s -+** | -+** cmplo p0\.s, p0/z, z1\.s, z0\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_u32_tied, svuint32_t, -+ p0 = svcmpgt_u32 (p0, z0, z1), -+ p0 = svcmpgt (p0, z0, z1)) -+ -+/* -+** cmpgt_u32_untied: -+** ( -+** cmphi p0\.s, p1/z, z0\.s, z1\.s -+** | -+** cmplo p0\.s, p1/z, z1\.s, z0\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_u32_untied, svuint32_t, -+ p0 = svcmpgt_u32 (p1, z0, z1), -+ p0 = svcmpgt (p1, z0, z1)) -+ -+/* -+** cmpgt_w0_u32: -+** mov (z[0-9]+\.s), w0 -+** ( -+** cmphi p0\.s, p1/z, z0\.s, \1 -+** | -+** cmplo p0\.s, p1/z, \1, z0\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_ZX (cmpgt_w0_u32, svuint32_t, uint32_t, -+ p0 = svcmpgt_n_u32 (p1, z0, x0), -+ p0 = svcmpgt (p1, z0, x0)) -+ -+/* -+** cmpgt_0_u32: -+** cmphi p0\.s, p1/z, z0\.s, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_0_u32, svuint32_t, -+ p0 = svcmpgt_n_u32 (p1, z0, 0), -+ p0 = svcmpgt (p1, z0, 0)) -+ -+/* -+** cmpgt_1_u32: -+** cmphi p0\.s, p1/z, z0\.s, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_1_u32, svuint32_t, -+ p0 = svcmpgt_n_u32 (p1, z0, 1), -+ p0 = svcmpgt (p1, z0, 1)) -+ -+/* -+** cmpgt_15_u32: -+** cmphi p0\.s, p1/z, z0\.s, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_15_u32, svuint32_t, -+ p0 = svcmpgt_n_u32 (p1, z0, 15), -+ p0 = svcmpgt (p1, z0, 15)) -+ -+/* -+** cmpgt_16_u32: -+** cmphi p0\.s, p1/z, z0\.s, #16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_16_u32, svuint32_t, -+ p0 = svcmpgt_n_u32 (p1, z0, 16), -+ p0 = svcmpgt (p1, z0, 16)) -+ -+/* -+** cmpgt_127_u32: -+** cmphi p0\.s, p1/z, z0\.s, #127 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_127_u32, svuint32_t, -+ p0 = svcmpgt_n_u32 (p1, z0, 127), -+ p0 = svcmpgt (p1, z0, 127)) -+ -+/* -+** cmpgt_128_u32: -+** mov (z[0-9]+\.s), #128 -+** ( -+** cmphi p0\.s, p1/z, z0\.s, \1 -+** | -+** cmplo p0\.s, p1/z, \1, z0\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_128_u32, svuint32_t, -+ p0 = svcmpgt_n_u32 (p1, z0, 128), -+ p0 = svcmpgt (p1, z0, 128)) -+ -+/* -+** cmpgt_m1_u32: -+** mov (z[0-9]+)\.b, #-1 -+** ( -+** cmphi p0\.s, p1/z, z0\.s, \1\.s -+** | -+** cmplo p0\.s, p1/z, \1\.s, z0\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_m1_u32, svuint32_t, -+ p0 = svcmpgt_n_u32 (p1, z0, -1), -+ p0 = svcmpgt (p1, z0, -1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_u64.c -new file mode 100644 -index 000000000..f76a23e49 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_u64.c -@@ -0,0 +1,116 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpgt_u64_tied: -+** ( -+** cmphi p0\.d, p0/z, z0\.d, z1\.d -+** | -+** cmplo p0\.d, p0/z, z1\.d, z0\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_u64_tied, svuint64_t, -+ p0 = svcmpgt_u64 (p0, z0, z1), -+ p0 = svcmpgt (p0, z0, z1)) -+ -+/* -+** cmpgt_u64_untied: -+** ( -+** cmphi p0\.d, p1/z, z0\.d, z1\.d -+** | -+** cmplo p0\.d, p1/z, z1\.d, z0\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_u64_untied, svuint64_t, -+ p0 = svcmpgt_u64 (p1, z0, z1), -+ p0 = svcmpgt (p1, z0, z1)) -+ -+/* -+** cmpgt_x0_u64: -+** mov (z[0-9]+\.d), x0 -+** ( -+** cmphi p0\.d, p1/z, z0\.d, \1 -+** | -+** cmplo p0\.d, p1/z, \1, z0\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_ZX (cmpgt_x0_u64, svuint64_t, uint64_t, -+ p0 = svcmpgt_n_u64 (p1, z0, x0), -+ p0 = svcmpgt (p1, z0, x0)) -+ -+/* -+** cmpgt_0_u64: -+** cmphi p0\.d, p1/z, z0\.d, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_0_u64, svuint64_t, -+ p0 = svcmpgt_n_u64 (p1, z0, 0), -+ p0 = svcmpgt (p1, z0, 0)) -+ -+/* -+** cmpgt_1_u64: -+** cmphi p0\.d, p1/z, z0\.d, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_1_u64, svuint64_t, -+ p0 = svcmpgt_n_u64 (p1, z0, 1), -+ p0 = svcmpgt (p1, z0, 1)) -+ -+/* -+** cmpgt_15_u64: -+** cmphi p0\.d, p1/z, z0\.d, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_15_u64, svuint64_t, -+ p0 = svcmpgt_n_u64 (p1, z0, 15), -+ p0 = svcmpgt (p1, z0, 15)) -+ -+/* -+** cmpgt_16_u64: -+** cmphi p0\.d, p1/z, z0\.d, #16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_16_u64, svuint64_t, -+ p0 = svcmpgt_n_u64 (p1, z0, 16), -+ p0 = svcmpgt (p1, z0, 16)) -+ -+/* -+** cmpgt_127_u64: -+** cmphi p0\.d, p1/z, z0\.d, #127 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_127_u64, svuint64_t, -+ p0 = svcmpgt_n_u64 (p1, z0, 127), -+ p0 = svcmpgt (p1, z0, 127)) -+ -+/* -+** cmpgt_128_u64: -+** mov (z[0-9]+\.d), #128 -+** ( -+** cmphi p0\.d, p1/z, z0\.d, \1 -+** | -+** cmplo p0\.d, p1/z, \1, z0\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_128_u64, svuint64_t, -+ p0 = svcmpgt_n_u64 (p1, z0, 128), -+ p0 = svcmpgt (p1, z0, 128)) -+ -+/* -+** cmpgt_m1_u64: -+** mov (z[0-9]+)\.b, #-1 -+** ( -+** cmphi p0\.d, p1/z, z0\.d, \1\.d -+** | -+** cmplo p0\.d, p1/z, \1\.d, z0\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_m1_u64, svuint64_t, -+ p0 = svcmpgt_n_u64 (p1, z0, -1), -+ p0 = svcmpgt (p1, z0, -1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_u8.c -new file mode 100644 -index 000000000..4f28331f9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_u8.c -@@ -0,0 +1,116 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpgt_u8_tied: -+** ( -+** cmphi p0\.b, p0/z, z0\.b, z1\.b -+** | -+** cmplo p0\.b, p0/z, z1\.b, z0\.b -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_u8_tied, svuint8_t, -+ p0 = svcmpgt_u8 (p0, z0, z1), -+ p0 = svcmpgt (p0, z0, z1)) -+ -+/* -+** cmpgt_u8_untied: -+** ( -+** cmphi p0\.b, p1/z, z0\.b, z1\.b -+** | -+** cmplo p0\.b, p1/z, z1\.b, z0\.b -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_u8_untied, svuint8_t, -+ p0 = svcmpgt_u8 (p1, z0, z1), -+ p0 = svcmpgt (p1, z0, z1)) -+ -+/* -+** cmpgt_w0_u8: -+** mov (z[0-9]+\.b), w0 -+** ( -+** cmphi p0\.b, p1/z, z0\.b, \1 -+** | -+** cmplo p0\.b, p1/z, \1, z0\.b -+** ) -+** ret -+*/ -+TEST_COMPARE_ZX (cmpgt_w0_u8, svuint8_t, uint8_t, -+ p0 = svcmpgt_n_u8 (p1, z0, x0), -+ p0 = svcmpgt (p1, z0, x0)) -+ -+/* -+** cmpgt_0_u8: -+** cmphi p0\.b, p1/z, z0\.b, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_0_u8, svuint8_t, -+ p0 = svcmpgt_n_u8 (p1, z0, 0), -+ p0 = svcmpgt (p1, z0, 0)) -+ -+/* -+** cmpgt_1_u8: -+** cmphi p0\.b, p1/z, z0\.b, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_1_u8, svuint8_t, -+ p0 = svcmpgt_n_u8 (p1, z0, 1), -+ p0 = svcmpgt (p1, z0, 1)) -+ -+/* -+** cmpgt_15_u8: -+** cmphi p0\.b, p1/z, z0\.b, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_15_u8, svuint8_t, -+ p0 = svcmpgt_n_u8 (p1, z0, 15), -+ p0 = svcmpgt (p1, z0, 15)) -+ -+/* -+** cmpgt_16_u8: -+** cmphi p0\.b, p1/z, z0\.b, #16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_16_u8, svuint8_t, -+ p0 = svcmpgt_n_u8 (p1, z0, 16), -+ p0 = svcmpgt (p1, z0, 16)) -+ -+/* -+** cmpgt_127_u8: -+** cmphi p0\.b, p1/z, z0\.b, #127 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_127_u8, svuint8_t, -+ p0 = svcmpgt_n_u8 (p1, z0, 127), -+ p0 = svcmpgt (p1, z0, 127)) -+ -+/* -+** cmpgt_128_u8: -+** mov (z[0-9]+\.b), #-128 -+** ( -+** cmphi p0\.b, p1/z, z0\.b, \1 -+** | -+** cmplo p0\.b, p1/z, \1, z0\.b -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_128_u8, svuint8_t, -+ p0 = svcmpgt_n_u8 (p1, z0, 128), -+ p0 = svcmpgt (p1, z0, 128)) -+ -+/* -+** cmpgt_m1_u8: -+** mov (z[0-9]+\.b), #-1 -+** ( -+** cmphi p0\.b, p1/z, z0\.b, \1 -+** | -+** cmplo p0\.b, p1/z, \1, z0\.b -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_m1_u8, svuint8_t, -+ p0 = svcmpgt_n_u8 (p1, z0, -1), -+ p0 = svcmpgt (p1, z0, -1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_wide_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_wide_s16.c -new file mode 100644 -index 000000000..07d3bbbd9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_wide_s16.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpgt_wide_s16_tied: -+** cmpgt p0\.h, p0/z, z0\.h, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmpgt_wide_s16_tied, svint16_t, svint64_t, -+ p0 = svcmpgt_wide_s16 (p0, z0, z1), -+ p0 = svcmpgt_wide (p0, z0, z1)) -+ -+/* -+** cmpgt_wide_s16_untied: -+** cmpgt p0\.h, p1/z, z0\.h, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmpgt_wide_s16_untied, svint16_t, svint64_t, -+ p0 = svcmpgt_wide_s16 (p1, z0, z1), -+ p0 = svcmpgt_wide (p1, z0, z1)) -+ -+/* -+** cmpgt_wide_x0_s16: -+** mov (z[0-9]+\.d), x0 -+** cmpgt p0\.h, p1/z, z0\.h, \1 -+** ret -+*/ -+TEST_COMPARE_ZX (cmpgt_wide_x0_s16, svint16_t, int64_t, -+ p0 = svcmpgt_wide_n_s16 (p1, z0, x0), -+ p0 = svcmpgt_wide (p1, z0, x0)) -+ -+/* -+** cmpgt_wide_0_s16: -+** cmpgt p0\.h, p1/z, z0\.h, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_wide_0_s16, svint16_t, -+ p0 = svcmpgt_wide_n_s16 (p1, z0, 0), -+ p0 = svcmpgt_wide (p1, z0, 0)) -+ -+/* -+** cmpgt_wide_1_s16: -+** cmpgt p0\.h, p1/z, z0\.h, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_wide_1_s16, svint16_t, -+ p0 = svcmpgt_wide_n_s16 (p1, z0, 1), -+ p0 = svcmpgt_wide (p1, z0, 1)) -+ -+/* -+** cmpgt_wide_15_s16: -+** cmpgt p0\.h, p1/z, z0\.h, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_wide_15_s16, svint16_t, -+ p0 = svcmpgt_wide_n_s16 (p1, z0, 15), -+ p0 = svcmpgt_wide (p1, z0, 15)) -+ -+/* -+** cmpgt_wide_16_s16: -+** mov (z[0-9]+\.d), #16 -+** cmpgt p0\.h, p1/z, z0\.h, \1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_wide_16_s16, svint16_t, -+ p0 = svcmpgt_wide_n_s16 (p1, z0, 16), -+ p0 = svcmpgt_wide (p1, z0, 16)) -+ -+/* -+** cmpgt_wide_m1_s16: -+** cmpgt p0\.h, p1/z, z0\.h, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_wide_m1_s16, svint16_t, -+ p0 = svcmpgt_wide_n_s16 (p1, z0, -1), -+ p0 = svcmpgt_wide (p1, z0, -1)) -+ -+/* -+** cmpgt_wide_m16_s16: -+** cmpgt p0\.h, p1/z, z0\.h, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_wide_m16_s16, svint16_t, -+ p0 = svcmpgt_wide_n_s16 (p1, z0, -16), -+ p0 = svcmpgt_wide (p1, z0, -16)) -+ -+/* -+** cmpgt_wide_m17_s16: -+** mov (z[0-9]+\.d), #-17 -+** cmpgt p0\.h, p1/z, z0\.h, \1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_wide_m17_s16, svint16_t, -+ p0 = svcmpgt_wide_n_s16 (p1, z0, -17), -+ p0 = svcmpgt_wide (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_wide_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_wide_s32.c -new file mode 100644 -index 000000000..f984362e6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_wide_s32.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpgt_wide_s32_tied: -+** cmpgt p0\.s, p0/z, z0\.s, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmpgt_wide_s32_tied, svint32_t, svint64_t, -+ p0 = svcmpgt_wide_s32 (p0, z0, z1), -+ p0 = svcmpgt_wide (p0, z0, z1)) -+ -+/* -+** cmpgt_wide_s32_untied: -+** cmpgt p0\.s, p1/z, z0\.s, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmpgt_wide_s32_untied, svint32_t, svint64_t, -+ p0 = svcmpgt_wide_s32 (p1, z0, z1), -+ p0 = svcmpgt_wide (p1, z0, z1)) -+ -+/* -+** cmpgt_wide_x0_s32: -+** mov (z[0-9]+\.d), x0 -+** cmpgt p0\.s, p1/z, z0\.s, \1 -+** ret -+*/ -+TEST_COMPARE_ZX (cmpgt_wide_x0_s32, svint32_t, int64_t, -+ p0 = svcmpgt_wide_n_s32 (p1, z0, x0), -+ p0 = svcmpgt_wide (p1, z0, x0)) -+ -+/* -+** cmpgt_wide_0_s32: -+** cmpgt p0\.s, p1/z, z0\.s, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_wide_0_s32, svint32_t, -+ p0 = svcmpgt_wide_n_s32 (p1, z0, 0), -+ p0 = svcmpgt_wide (p1, z0, 0)) -+ -+/* -+** cmpgt_wide_1_s32: -+** cmpgt p0\.s, p1/z, z0\.s, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_wide_1_s32, svint32_t, -+ p0 = svcmpgt_wide_n_s32 (p1, z0, 1), -+ p0 = svcmpgt_wide (p1, z0, 1)) -+ -+/* -+** cmpgt_wide_15_s32: -+** cmpgt p0\.s, p1/z, z0\.s, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_wide_15_s32, svint32_t, -+ p0 = svcmpgt_wide_n_s32 (p1, z0, 15), -+ p0 = svcmpgt_wide (p1, z0, 15)) -+ -+/* -+** cmpgt_wide_16_s32: -+** mov (z[0-9]+\.d), #16 -+** cmpgt p0\.s, p1/z, z0\.s, \1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_wide_16_s32, svint32_t, -+ p0 = svcmpgt_wide_n_s32 (p1, z0, 16), -+ p0 = svcmpgt_wide (p1, z0, 16)) -+ -+/* -+** cmpgt_wide_m1_s32: -+** cmpgt p0\.s, p1/z, z0\.s, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_wide_m1_s32, svint32_t, -+ p0 = svcmpgt_wide_n_s32 (p1, z0, -1), -+ p0 = svcmpgt_wide (p1, z0, -1)) -+ -+/* -+** cmpgt_wide_m16_s32: -+** cmpgt p0\.s, p1/z, z0\.s, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_wide_m16_s32, svint32_t, -+ p0 = svcmpgt_wide_n_s32 (p1, z0, -16), -+ p0 = svcmpgt_wide (p1, z0, -16)) -+ -+/* -+** cmpgt_wide_m17_s32: -+** mov (z[0-9]+\.d), #-17 -+** cmpgt p0\.s, p1/z, z0\.s, \1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_wide_m17_s32, svint32_t, -+ p0 = svcmpgt_wide_n_s32 (p1, z0, -17), -+ p0 = svcmpgt_wide (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_wide_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_wide_s8.c -new file mode 100644 -index 000000000..07047a315 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_wide_s8.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpgt_wide_s8_tied: -+** cmpgt p0\.b, p0/z, z0\.b, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmpgt_wide_s8_tied, svint8_t, svint64_t, -+ p0 = svcmpgt_wide_s8 (p0, z0, z1), -+ p0 = svcmpgt_wide (p0, z0, z1)) -+ -+/* -+** cmpgt_wide_s8_untied: -+** cmpgt p0\.b, p1/z, z0\.b, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmpgt_wide_s8_untied, svint8_t, svint64_t, -+ p0 = svcmpgt_wide_s8 (p1, z0, z1), -+ p0 = svcmpgt_wide (p1, z0, z1)) -+ -+/* -+** cmpgt_wide_x0_s8: -+** mov (z[0-9]+\.d), x0 -+** cmpgt p0\.b, p1/z, z0\.b, \1 -+** ret -+*/ -+TEST_COMPARE_ZX (cmpgt_wide_x0_s8, svint8_t, int64_t, -+ p0 = svcmpgt_wide_n_s8 (p1, z0, x0), -+ p0 = svcmpgt_wide (p1, z0, x0)) -+ -+/* -+** cmpgt_wide_0_s8: -+** cmpgt p0\.b, p1/z, z0\.b, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_wide_0_s8, svint8_t, -+ p0 = svcmpgt_wide_n_s8 (p1, z0, 0), -+ p0 = svcmpgt_wide (p1, z0, 0)) -+ -+/* -+** cmpgt_wide_1_s8: -+** cmpgt p0\.b, p1/z, z0\.b, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_wide_1_s8, svint8_t, -+ p0 = svcmpgt_wide_n_s8 (p1, z0, 1), -+ p0 = svcmpgt_wide (p1, z0, 1)) -+ -+/* -+** cmpgt_wide_15_s8: -+** cmpgt p0\.b, p1/z, z0\.b, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_wide_15_s8, svint8_t, -+ p0 = svcmpgt_wide_n_s8 (p1, z0, 15), -+ p0 = svcmpgt_wide (p1, z0, 15)) -+ -+/* -+** cmpgt_wide_16_s8: -+** mov (z[0-9]+\.d), #16 -+** cmpgt p0\.b, p1/z, z0\.b, \1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_wide_16_s8, svint8_t, -+ p0 = svcmpgt_wide_n_s8 (p1, z0, 16), -+ p0 = svcmpgt_wide (p1, z0, 16)) -+ -+/* -+** cmpgt_wide_m1_s8: -+** cmpgt p0\.b, p1/z, z0\.b, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_wide_m1_s8, svint8_t, -+ p0 = svcmpgt_wide_n_s8 (p1, z0, -1), -+ p0 = svcmpgt_wide (p1, z0, -1)) -+ -+/* -+** cmpgt_wide_m16_s8: -+** cmpgt p0\.b, p1/z, z0\.b, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_wide_m16_s8, svint8_t, -+ p0 = svcmpgt_wide_n_s8 (p1, z0, -16), -+ p0 = svcmpgt_wide (p1, z0, -16)) -+ -+/* -+** cmpgt_wide_m17_s8: -+** mov (z[0-9]+\.d), #-17 -+** cmpgt p0\.b, p1/z, z0\.b, \1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_wide_m17_s8, svint8_t, -+ p0 = svcmpgt_wide_n_s8 (p1, z0, -17), -+ p0 = svcmpgt_wide (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_wide_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_wide_u16.c -new file mode 100644 -index 000000000..bcffb88c0 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_wide_u16.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpgt_wide_u16_tied: -+** cmphi p0\.h, p0/z, z0\.h, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmpgt_wide_u16_tied, svuint16_t, svuint64_t, -+ p0 = svcmpgt_wide_u16 (p0, z0, z1), -+ p0 = svcmpgt_wide (p0, z0, z1)) -+ -+/* -+** cmpgt_wide_u16_untied: -+** cmphi p0\.h, p1/z, z0\.h, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmpgt_wide_u16_untied, svuint16_t, svuint64_t, -+ p0 = svcmpgt_wide_u16 (p1, z0, z1), -+ p0 = svcmpgt_wide (p1, z0, z1)) -+ -+/* -+** cmpgt_wide_x0_u16: -+** mov (z[0-9]+\.d), x0 -+** cmphi p0\.h, p1/z, z0\.h, \1 -+** ret -+*/ -+TEST_COMPARE_ZX (cmpgt_wide_x0_u16, svuint16_t, uint64_t, -+ p0 = svcmpgt_wide_n_u16 (p1, z0, x0), -+ p0 = svcmpgt_wide (p1, z0, x0)) -+ -+/* -+** cmpgt_wide_0_u16: -+** cmphi p0\.h, p1/z, z0\.h, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_wide_0_u16, svuint16_t, -+ p0 = svcmpgt_wide_n_u16 (p1, z0, 0), -+ p0 = svcmpgt_wide (p1, z0, 0)) -+ -+/* -+** cmpgt_wide_1_u16: -+** cmphi p0\.h, p1/z, z0\.h, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_wide_1_u16, svuint16_t, -+ p0 = svcmpgt_wide_n_u16 (p1, z0, 1), -+ p0 = svcmpgt_wide (p1, z0, 1)) -+ -+/* -+** cmpgt_wide_15_u16: -+** cmphi p0\.h, p1/z, z0\.h, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_wide_15_u16, svuint16_t, -+ p0 = svcmpgt_wide_n_u16 (p1, z0, 15), -+ p0 = svcmpgt_wide (p1, z0, 15)) -+ -+/* -+** cmpgt_wide_16_u16: -+** cmphi p0\.h, p1/z, z0\.h, #16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_wide_16_u16, svuint16_t, -+ p0 = svcmpgt_wide_n_u16 (p1, z0, 16), -+ p0 = svcmpgt_wide (p1, z0, 16)) -+ -+/* -+** cmpgt_wide_127_u16: -+** cmphi p0\.h, p1/z, z0\.h, #127 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_wide_127_u16, svuint16_t, -+ p0 = svcmpgt_wide_n_u16 (p1, z0, 127), -+ p0 = svcmpgt_wide (p1, z0, 127)) -+ -+/* -+** cmpgt_wide_128_u16: -+** mov (z[0-9]+\.d), #128 -+** cmphi p0\.h, p1/z, z0\.h, \1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_wide_128_u16, svuint16_t, -+ p0 = svcmpgt_wide_n_u16 (p1, z0, 128), -+ p0 = svcmpgt_wide (p1, z0, 128)) -+ -+/* -+** cmpgt_wide_m1_u16: -+** mov (z[0-9]+)\.b, #-1 -+** cmphi p0\.h, p1/z, z0\.h, \1\.d -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_wide_m1_u16, svuint16_t, -+ p0 = svcmpgt_wide_n_u16 (p1, z0, -1), -+ p0 = svcmpgt_wide (p1, z0, -1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_wide_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_wide_u32.c -new file mode 100644 -index 000000000..65c0231e5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_wide_u32.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpgt_wide_u32_tied: -+** cmphi p0\.s, p0/z, z0\.s, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmpgt_wide_u32_tied, svuint32_t, svuint64_t, -+ p0 = svcmpgt_wide_u32 (p0, z0, z1), -+ p0 = svcmpgt_wide (p0, z0, z1)) -+ -+/* -+** cmpgt_wide_u32_untied: -+** cmphi p0\.s, p1/z, z0\.s, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmpgt_wide_u32_untied, svuint32_t, svuint64_t, -+ p0 = svcmpgt_wide_u32 (p1, z0, z1), -+ p0 = svcmpgt_wide (p1, z0, z1)) -+ -+/* -+** cmpgt_wide_x0_u32: -+** mov (z[0-9]+\.d), x0 -+** cmphi p0\.s, p1/z, z0\.s, \1 -+** ret -+*/ -+TEST_COMPARE_ZX (cmpgt_wide_x0_u32, svuint32_t, uint64_t, -+ p0 = svcmpgt_wide_n_u32 (p1, z0, x0), -+ p0 = svcmpgt_wide (p1, z0, x0)) -+ -+/* -+** cmpgt_wide_0_u32: -+** cmphi p0\.s, p1/z, z0\.s, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_wide_0_u32, svuint32_t, -+ p0 = svcmpgt_wide_n_u32 (p1, z0, 0), -+ p0 = svcmpgt_wide (p1, z0, 0)) -+ -+/* -+** cmpgt_wide_1_u32: -+** cmphi p0\.s, p1/z, z0\.s, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_wide_1_u32, svuint32_t, -+ p0 = svcmpgt_wide_n_u32 (p1, z0, 1), -+ p0 = svcmpgt_wide (p1, z0, 1)) -+ -+/* -+** cmpgt_wide_15_u32: -+** cmphi p0\.s, p1/z, z0\.s, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_wide_15_u32, svuint32_t, -+ p0 = svcmpgt_wide_n_u32 (p1, z0, 15), -+ p0 = svcmpgt_wide (p1, z0, 15)) -+ -+/* -+** cmpgt_wide_16_u32: -+** cmphi p0\.s, p1/z, z0\.s, #16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_wide_16_u32, svuint32_t, -+ p0 = svcmpgt_wide_n_u32 (p1, z0, 16), -+ p0 = svcmpgt_wide (p1, z0, 16)) -+ -+/* -+** cmpgt_wide_127_u32: -+** cmphi p0\.s, p1/z, z0\.s, #127 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_wide_127_u32, svuint32_t, -+ p0 = svcmpgt_wide_n_u32 (p1, z0, 127), -+ p0 = svcmpgt_wide (p1, z0, 127)) -+ -+/* -+** cmpgt_wide_128_u32: -+** mov (z[0-9]+\.d), #128 -+** cmphi p0\.s, p1/z, z0\.s, \1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_wide_128_u32, svuint32_t, -+ p0 = svcmpgt_wide_n_u32 (p1, z0, 128), -+ p0 = svcmpgt_wide (p1, z0, 128)) -+ -+/* -+** cmpgt_wide_m1_u32: -+** mov (z[0-9]+)\.b, #-1 -+** cmphi p0\.s, p1/z, z0\.s, \1\.d -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_wide_m1_u32, svuint32_t, -+ p0 = svcmpgt_wide_n_u32 (p1, z0, -1), -+ p0 = svcmpgt_wide (p1, z0, -1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_wide_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_wide_u8.c -new file mode 100644 -index 000000000..0d1142f27 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpgt_wide_u8.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpgt_wide_u8_tied: -+** cmphi p0\.b, p0/z, z0\.b, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmpgt_wide_u8_tied, svuint8_t, svuint64_t, -+ p0 = svcmpgt_wide_u8 (p0, z0, z1), -+ p0 = svcmpgt_wide (p0, z0, z1)) -+ -+/* -+** cmpgt_wide_u8_untied: -+** cmphi p0\.b, p1/z, z0\.b, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmpgt_wide_u8_untied, svuint8_t, svuint64_t, -+ p0 = svcmpgt_wide_u8 (p1, z0, z1), -+ p0 = svcmpgt_wide (p1, z0, z1)) -+ -+/* -+** cmpgt_wide_x0_u8: -+** mov (z[0-9]+\.d), x0 -+** cmphi p0\.b, p1/z, z0\.b, \1 -+** ret -+*/ -+TEST_COMPARE_ZX (cmpgt_wide_x0_u8, svuint8_t, uint64_t, -+ p0 = svcmpgt_wide_n_u8 (p1, z0, x0), -+ p0 = svcmpgt_wide (p1, z0, x0)) -+ -+/* -+** cmpgt_wide_0_u8: -+** cmphi p0\.b, p1/z, z0\.b, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_wide_0_u8, svuint8_t, -+ p0 = svcmpgt_wide_n_u8 (p1, z0, 0), -+ p0 = svcmpgt_wide (p1, z0, 0)) -+ -+/* -+** cmpgt_wide_1_u8: -+** cmphi p0\.b, p1/z, z0\.b, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_wide_1_u8, svuint8_t, -+ p0 = svcmpgt_wide_n_u8 (p1, z0, 1), -+ p0 = svcmpgt_wide (p1, z0, 1)) -+ -+/* -+** cmpgt_wide_15_u8: -+** cmphi p0\.b, p1/z, z0\.b, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_wide_15_u8, svuint8_t, -+ p0 = svcmpgt_wide_n_u8 (p1, z0, 15), -+ p0 = svcmpgt_wide (p1, z0, 15)) -+ -+/* -+** cmpgt_wide_16_u8: -+** cmphi p0\.b, p1/z, z0\.b, #16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_wide_16_u8, svuint8_t, -+ p0 = svcmpgt_wide_n_u8 (p1, z0, 16), -+ p0 = svcmpgt_wide (p1, z0, 16)) -+ -+/* -+** cmpgt_wide_127_u8: -+** cmphi p0\.b, p1/z, z0\.b, #127 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_wide_127_u8, svuint8_t, -+ p0 = svcmpgt_wide_n_u8 (p1, z0, 127), -+ p0 = svcmpgt_wide (p1, z0, 127)) -+ -+/* -+** cmpgt_wide_128_u8: -+** mov (z[0-9]+\.d), #128 -+** cmphi p0\.b, p1/z, z0\.b, \1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_wide_128_u8, svuint8_t, -+ p0 = svcmpgt_wide_n_u8 (p1, z0, 128), -+ p0 = svcmpgt_wide (p1, z0, 128)) -+ -+/* -+** cmpgt_wide_m1_u8: -+** mov (z[0-9]+)\.b, #-1 -+** cmphi p0\.b, p1/z, z0\.b, \1\.d -+** ret -+*/ -+TEST_COMPARE_Z (cmpgt_wide_m1_u8, svuint8_t, -+ p0 = svcmpgt_wide_n_u8 (p1, z0, -1), -+ p0 = svcmpgt_wide (p1, z0, -1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_f16.c -new file mode 100644 -index 000000000..7d500590f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_f16.c -@@ -0,0 +1,66 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmple_f16_tied: -+** ( -+** fcmge p0\.h, p0/z, z1\.h, z0\.h -+** | -+** fcmle p0\.h, p0/z, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmple_f16_tied, svfloat16_t, -+ p0 = svcmple_f16 (p0, z0, z1), -+ p0 = svcmple (p0, z0, z1)) -+ -+/* -+** cmple_f16_untied: -+** ( -+** fcmge p0\.h, p1/z, z1\.h, z0\.h -+** | -+** fcmle p0\.h, p1/z, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmple_f16_untied, svfloat16_t, -+ p0 = svcmple_f16 (p1, z0, z1), -+ p0 = svcmple (p1, z0, z1)) -+ -+/* -+** cmple_h4_f16: -+** mov (z[0-9]+\.h), h4 -+** ( -+** fcmge p0\.h, p1/z, \1, z0\.h -+** | -+** fcmle p0\.h, p1/z, z0\.h, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_ZD (cmple_h4_f16, svfloat16_t, float16_t, -+ p0 = svcmple_n_f16 (p1, z0, d4), -+ p0 = svcmple (p1, z0, d4)) -+ -+/* -+** cmple_0_f16: -+** fcmle p0\.h, p1/z, z0\.h, #0\.0 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_0_f16, svfloat16_t, -+ p0 = svcmple_n_f16 (p1, z0, 0), -+ p0 = svcmple (p1, z0, 0)) -+ -+/* -+** cmple_1_f16: -+** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? -+** ( -+** fcmge p0\.h, p1/z, \1, z0\.h -+** | -+** fcmle p0\.h, p1/z, z0\.h, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmple_1_f16, svfloat16_t, -+ p0 = svcmple_n_f16 (p1, z0, 1), -+ p0 = svcmple (p1, z0, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_f32.c -new file mode 100644 -index 000000000..3df63fef7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_f32.c -@@ -0,0 +1,66 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmple_f32_tied: -+** ( -+** fcmge p0\.s, p0/z, z1\.s, z0\.s -+** | -+** fcmle p0\.s, p0/z, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmple_f32_tied, svfloat32_t, -+ p0 = svcmple_f32 (p0, z0, z1), -+ p0 = svcmple (p0, z0, z1)) -+ -+/* -+** cmple_f32_untied: -+** ( -+** fcmge p0\.s, p1/z, z1\.s, z0\.s -+** | -+** fcmle p0\.s, p1/z, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmple_f32_untied, svfloat32_t, -+ p0 = svcmple_f32 (p1, z0, z1), -+ p0 = svcmple (p1, z0, z1)) -+ -+/* -+** cmple_s4_f32: -+** mov (z[0-9]+\.s), s4 -+** ( -+** fcmge p0\.s, p1/z, \1, z0\.s -+** | -+** fcmle p0\.s, p1/z, z0\.s, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_ZD (cmple_s4_f32, svfloat32_t, float32_t, -+ p0 = svcmple_n_f32 (p1, z0, d4), -+ p0 = svcmple (p1, z0, d4)) -+ -+/* -+** cmple_0_f32: -+** fcmle p0\.s, p1/z, z0\.s, #0\.0 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_0_f32, svfloat32_t, -+ p0 = svcmple_n_f32 (p1, z0, 0), -+ p0 = svcmple (p1, z0, 0)) -+ -+/* -+** cmple_1_f32: -+** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? -+** ( -+** fcmge p0\.s, p1/z, \1, z0\.s -+** | -+** fcmle p0\.s, p1/z, z0\.s, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmple_1_f32, svfloat32_t, -+ p0 = svcmple_n_f32 (p1, z0, 1), -+ p0 = svcmple (p1, z0, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_f64.c -new file mode 100644 -index 000000000..5946a1b3a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_f64.c -@@ -0,0 +1,66 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmple_f64_tied: -+** ( -+** fcmge p0\.d, p0/z, z1\.d, z0\.d -+** | -+** fcmle p0\.d, p0/z, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmple_f64_tied, svfloat64_t, -+ p0 = svcmple_f64 (p0, z0, z1), -+ p0 = svcmple (p0, z0, z1)) -+ -+/* -+** cmple_f64_untied: -+** ( -+** fcmge p0\.d, p1/z, z1\.d, z0\.d -+** | -+** fcmle p0\.d, p1/z, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmple_f64_untied, svfloat64_t, -+ p0 = svcmple_f64 (p1, z0, z1), -+ p0 = svcmple (p1, z0, z1)) -+ -+/* -+** cmple_d4_f64: -+** mov (z[0-9]+\.d), d4 -+** ( -+** fcmge p0\.d, p1/z, \1, z0\.d -+** | -+** fcmle p0\.d, p1/z, z0\.d, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_ZD (cmple_d4_f64, svfloat64_t, float64_t, -+ p0 = svcmple_n_f64 (p1, z0, d4), -+ p0 = svcmple (p1, z0, d4)) -+ -+/* -+** cmple_0_f64: -+** fcmle p0\.d, p1/z, z0\.d, #0\.0 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_0_f64, svfloat64_t, -+ p0 = svcmple_n_f64 (p1, z0, 0), -+ p0 = svcmple (p1, z0, 0)) -+ -+/* -+** cmple_1_f64: -+** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? -+** ( -+** fcmge p0\.d, p1/z, \1, z0\.d -+** | -+** fcmle p0\.d, p1/z, z0\.d, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmple_1_f64, svfloat64_t, -+ p0 = svcmple_n_f64 (p1, z0, 1), -+ p0 = svcmple (p1, z0, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_s16.c -new file mode 100644 -index 000000000..9b221bb4c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_s16.c -@@ -0,0 +1,116 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmple_s16_tied: -+** ( -+** cmpge p0\.h, p0/z, z1\.h, z0\.h -+** | -+** cmple p0\.h, p0/z, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmple_s16_tied, svint16_t, -+ p0 = svcmple_s16 (p0, z0, z1), -+ p0 = svcmple (p0, z0, z1)) -+ -+/* -+** cmple_s16_untied: -+** ( -+** cmpge p0\.h, p1/z, z1\.h, z0\.h -+** | -+** cmple p0\.h, p1/z, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmple_s16_untied, svint16_t, -+ p0 = svcmple_s16 (p1, z0, z1), -+ p0 = svcmple (p1, z0, z1)) -+ -+/* -+** cmple_w0_s16: -+** mov (z[0-9]+\.h), w0 -+** ( -+** cmpge p0\.h, p1/z, \1, z0\.h -+** | -+** cmple p0\.h, p1/z, z0\.h, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_ZX (cmple_w0_s16, svint16_t, int16_t, -+ p0 = svcmple_n_s16 (p1, z0, x0), -+ p0 = svcmple (p1, z0, x0)) -+ -+/* -+** cmple_0_s16: -+** cmple p0\.h, p1/z, z0\.h, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_0_s16, svint16_t, -+ p0 = svcmple_n_s16 (p1, z0, 0), -+ p0 = svcmple (p1, z0, 0)) -+ -+/* -+** cmple_1_s16: -+** cmple p0\.h, p1/z, z0\.h, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_1_s16, svint16_t, -+ p0 = svcmple_n_s16 (p1, z0, 1), -+ p0 = svcmple (p1, z0, 1)) -+ -+/* -+** cmple_15_s16: -+** cmple p0\.h, p1/z, z0\.h, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_15_s16, svint16_t, -+ p0 = svcmple_n_s16 (p1, z0, 15), -+ p0 = svcmple (p1, z0, 15)) -+ -+/* -+** cmple_16_s16: -+** mov (z[0-9]+\.h), #16 -+** ( -+** cmpge p0\.h, p1/z, \1, z0\.h -+** | -+** cmple p0\.h, p1/z, z0\.h, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmple_16_s16, svint16_t, -+ p0 = svcmple_n_s16 (p1, z0, 16), -+ p0 = svcmple (p1, z0, 16)) -+ -+/* -+** cmple_m1_s16: -+** cmple p0\.h, p1/z, z0\.h, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_m1_s16, svint16_t, -+ p0 = svcmple_n_s16 (p1, z0, -1), -+ p0 = svcmple (p1, z0, -1)) -+ -+/* -+** cmple_m16_s16: -+** cmple p0\.h, p1/z, z0\.h, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_m16_s16, svint16_t, -+ p0 = svcmple_n_s16 (p1, z0, -16), -+ p0 = svcmple (p1, z0, -16)) -+ -+/* -+** cmple_m17_s16: -+** mov (z[0-9]+\.h), #-17 -+** ( -+** cmpge p0\.h, p1/z, \1, z0\.h -+** | -+** cmple p0\.h, p1/z, z0\.h, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmple_m17_s16, svint16_t, -+ p0 = svcmple_n_s16 (p1, z0, -17), -+ p0 = svcmple (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_s32.c -new file mode 100644 -index 000000000..b0c8367e2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_s32.c -@@ -0,0 +1,116 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmple_s32_tied: -+** ( -+** cmpge p0\.s, p0/z, z1\.s, z0\.s -+** | -+** cmple p0\.s, p0/z, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmple_s32_tied, svint32_t, -+ p0 = svcmple_s32 (p0, z0, z1), -+ p0 = svcmple (p0, z0, z1)) -+ -+/* -+** cmple_s32_untied: -+** ( -+** cmpge p0\.s, p1/z, z1\.s, z0\.s -+** | -+** cmple p0\.s, p1/z, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmple_s32_untied, svint32_t, -+ p0 = svcmple_s32 (p1, z0, z1), -+ p0 = svcmple (p1, z0, z1)) -+ -+/* -+** cmple_w0_s32: -+** mov (z[0-9]+\.s), w0 -+** ( -+** cmpge p0\.s, p1/z, \1, z0\.s -+** | -+** cmple p0\.s, p1/z, z0\.s, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_ZX (cmple_w0_s32, svint32_t, int32_t, -+ p0 = svcmple_n_s32 (p1, z0, x0), -+ p0 = svcmple (p1, z0, x0)) -+ -+/* -+** cmple_0_s32: -+** cmple p0\.s, p1/z, z0\.s, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_0_s32, svint32_t, -+ p0 = svcmple_n_s32 (p1, z0, 0), -+ p0 = svcmple (p1, z0, 0)) -+ -+/* -+** cmple_1_s32: -+** cmple p0\.s, p1/z, z0\.s, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_1_s32, svint32_t, -+ p0 = svcmple_n_s32 (p1, z0, 1), -+ p0 = svcmple (p1, z0, 1)) -+ -+/* -+** cmple_15_s32: -+** cmple p0\.s, p1/z, z0\.s, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_15_s32, svint32_t, -+ p0 = svcmple_n_s32 (p1, z0, 15), -+ p0 = svcmple (p1, z0, 15)) -+ -+/* -+** cmple_16_s32: -+** mov (z[0-9]+\.s), #16 -+** ( -+** cmpge p0\.s, p1/z, \1, z0\.s -+** | -+** cmple p0\.s, p1/z, z0\.s, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmple_16_s32, svint32_t, -+ p0 = svcmple_n_s32 (p1, z0, 16), -+ p0 = svcmple (p1, z0, 16)) -+ -+/* -+** cmple_m1_s32: -+** cmple p0\.s, p1/z, z0\.s, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_m1_s32, svint32_t, -+ p0 = svcmple_n_s32 (p1, z0, -1), -+ p0 = svcmple (p1, z0, -1)) -+ -+/* -+** cmple_m16_s32: -+** cmple p0\.s, p1/z, z0\.s, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_m16_s32, svint32_t, -+ p0 = svcmple_n_s32 (p1, z0, -16), -+ p0 = svcmple (p1, z0, -16)) -+ -+/* -+** cmple_m17_s32: -+** mov (z[0-9]+\.s), #-17 -+** ( -+** cmpge p0\.s, p1/z, \1, z0\.s -+** | -+** cmple p0\.s, p1/z, z0\.s, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmple_m17_s32, svint32_t, -+ p0 = svcmple_n_s32 (p1, z0, -17), -+ p0 = svcmple (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_s64.c -new file mode 100644 -index 000000000..faaa87614 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_s64.c -@@ -0,0 +1,116 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmple_s64_tied: -+** ( -+** cmpge p0\.d, p0/z, z1\.d, z0\.d -+** | -+** cmple p0\.d, p0/z, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmple_s64_tied, svint64_t, -+ p0 = svcmple_s64 (p0, z0, z1), -+ p0 = svcmple (p0, z0, z1)) -+ -+/* -+** cmple_s64_untied: -+** ( -+** cmpge p0\.d, p1/z, z1\.d, z0\.d -+** | -+** cmple p0\.d, p1/z, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmple_s64_untied, svint64_t, -+ p0 = svcmple_s64 (p1, z0, z1), -+ p0 = svcmple (p1, z0, z1)) -+ -+/* -+** cmple_x0_s64: -+** mov (z[0-9]+\.d), x0 -+** ( -+** cmpge p0\.d, p1/z, \1, z0\.d -+** | -+** cmple p0\.d, p1/z, z0\.d, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_ZX (cmple_x0_s64, svint64_t, int64_t, -+ p0 = svcmple_n_s64 (p1, z0, x0), -+ p0 = svcmple (p1, z0, x0)) -+ -+/* -+** cmple_0_s64: -+** cmple p0\.d, p1/z, z0\.d, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_0_s64, svint64_t, -+ p0 = svcmple_n_s64 (p1, z0, 0), -+ p0 = svcmple (p1, z0, 0)) -+ -+/* -+** cmple_1_s64: -+** cmple p0\.d, p1/z, z0\.d, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_1_s64, svint64_t, -+ p0 = svcmple_n_s64 (p1, z0, 1), -+ p0 = svcmple (p1, z0, 1)) -+ -+/* -+** cmple_15_s64: -+** cmple p0\.d, p1/z, z0\.d, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_15_s64, svint64_t, -+ p0 = svcmple_n_s64 (p1, z0, 15), -+ p0 = svcmple (p1, z0, 15)) -+ -+/* -+** cmple_16_s64: -+** mov (z[0-9]+\.d), #16 -+** ( -+** cmpge p0\.d, p1/z, \1, z0\.d -+** | -+** cmple p0\.d, p1/z, z0\.d, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmple_16_s64, svint64_t, -+ p0 = svcmple_n_s64 (p1, z0, 16), -+ p0 = svcmple (p1, z0, 16)) -+ -+/* -+** cmple_m1_s64: -+** cmple p0\.d, p1/z, z0\.d, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_m1_s64, svint64_t, -+ p0 = svcmple_n_s64 (p1, z0, -1), -+ p0 = svcmple (p1, z0, -1)) -+ -+/* -+** cmple_m16_s64: -+** cmple p0\.d, p1/z, z0\.d, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_m16_s64, svint64_t, -+ p0 = svcmple_n_s64 (p1, z0, -16), -+ p0 = svcmple (p1, z0, -16)) -+ -+/* -+** cmple_m17_s64: -+** mov (z[0-9]+\.d), #-17 -+** ( -+** cmpge p0\.d, p1/z, \1, z0\.d -+** | -+** cmple p0\.d, p1/z, z0\.d, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmple_m17_s64, svint64_t, -+ p0 = svcmple_n_s64 (p1, z0, -17), -+ p0 = svcmple (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_s8.c -new file mode 100644 -index 000000000..222487d75 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_s8.c -@@ -0,0 +1,116 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmple_s8_tied: -+** ( -+** cmpge p0\.b, p0/z, z1\.b, z0\.b -+** | -+** cmple p0\.b, p0/z, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmple_s8_tied, svint8_t, -+ p0 = svcmple_s8 (p0, z0, z1), -+ p0 = svcmple (p0, z0, z1)) -+ -+/* -+** cmple_s8_untied: -+** ( -+** cmpge p0\.b, p1/z, z1\.b, z0\.b -+** | -+** cmple p0\.b, p1/z, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmple_s8_untied, svint8_t, -+ p0 = svcmple_s8 (p1, z0, z1), -+ p0 = svcmple (p1, z0, z1)) -+ -+/* -+** cmple_w0_s8: -+** mov (z[0-9]+\.b), w0 -+** ( -+** cmpge p0\.b, p1/z, \1, z0\.b -+** | -+** cmple p0\.b, p1/z, z0\.b, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_ZX (cmple_w0_s8, svint8_t, int8_t, -+ p0 = svcmple_n_s8 (p1, z0, x0), -+ p0 = svcmple (p1, z0, x0)) -+ -+/* -+** cmple_0_s8: -+** cmple p0\.b, p1/z, z0\.b, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_0_s8, svint8_t, -+ p0 = svcmple_n_s8 (p1, z0, 0), -+ p0 = svcmple (p1, z0, 0)) -+ -+/* -+** cmple_1_s8: -+** cmple p0\.b, p1/z, z0\.b, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_1_s8, svint8_t, -+ p0 = svcmple_n_s8 (p1, z0, 1), -+ p0 = svcmple (p1, z0, 1)) -+ -+/* -+** cmple_15_s8: -+** cmple p0\.b, p1/z, z0\.b, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_15_s8, svint8_t, -+ p0 = svcmple_n_s8 (p1, z0, 15), -+ p0 = svcmple (p1, z0, 15)) -+ -+/* -+** cmple_16_s8: -+** mov (z[0-9]+\.b), #16 -+** ( -+** cmpge p0\.b, p1/z, \1, z0\.b -+** | -+** cmple p0\.b, p1/z, z0\.b, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmple_16_s8, svint8_t, -+ p0 = svcmple_n_s8 (p1, z0, 16), -+ p0 = svcmple (p1, z0, 16)) -+ -+/* -+** cmple_m1_s8: -+** cmple p0\.b, p1/z, z0\.b, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_m1_s8, svint8_t, -+ p0 = svcmple_n_s8 (p1, z0, -1), -+ p0 = svcmple (p1, z0, -1)) -+ -+/* -+** cmple_m16_s8: -+** cmple p0\.b, p1/z, z0\.b, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_m16_s8, svint8_t, -+ p0 = svcmple_n_s8 (p1, z0, -16), -+ p0 = svcmple (p1, z0, -16)) -+ -+/* -+** cmple_m17_s8: -+** mov (z[0-9]+\.b), #-17 -+** ( -+** cmpge p0\.b, p1/z, \1, z0\.b -+** | -+** cmple p0\.b, p1/z, z0\.b, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmple_m17_s8, svint8_t, -+ p0 = svcmple_n_s8 (p1, z0, -17), -+ p0 = svcmple (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_u16.c -new file mode 100644 -index 000000000..26af06e52 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_u16.c -@@ -0,0 +1,116 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmple_u16_tied: -+** ( -+** cmphs p0\.h, p0/z, z1\.h, z0\.h -+** | -+** cmpls p0\.h, p0/z, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmple_u16_tied, svuint16_t, -+ p0 = svcmple_u16 (p0, z0, z1), -+ p0 = svcmple (p0, z0, z1)) -+ -+/* -+** cmple_u16_untied: -+** ( -+** cmphs p0\.h, p1/z, z1\.h, z0\.h -+** | -+** cmpls p0\.h, p1/z, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmple_u16_untied, svuint16_t, -+ p0 = svcmple_u16 (p1, z0, z1), -+ p0 = svcmple (p1, z0, z1)) -+ -+/* -+** cmple_w0_u16: -+** mov (z[0-9]+\.h), w0 -+** ( -+** cmphs p0\.h, p1/z, \1, z0\.h -+** | -+** cmpls p0\.h, p1/z, z0\.h, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_ZX (cmple_w0_u16, svuint16_t, uint16_t, -+ p0 = svcmple_n_u16 (p1, z0, x0), -+ p0 = svcmple (p1, z0, x0)) -+ -+/* -+** cmple_0_u16: -+** cmpls p0\.h, p1/z, z0\.h, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_0_u16, svuint16_t, -+ p0 = svcmple_n_u16 (p1, z0, 0), -+ p0 = svcmple (p1, z0, 0)) -+ -+/* -+** cmple_1_u16: -+** cmpls p0\.h, p1/z, z0\.h, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_1_u16, svuint16_t, -+ p0 = svcmple_n_u16 (p1, z0, 1), -+ p0 = svcmple (p1, z0, 1)) -+ -+/* -+** cmple_15_u16: -+** cmpls p0\.h, p1/z, z0\.h, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_15_u16, svuint16_t, -+ p0 = svcmple_n_u16 (p1, z0, 15), -+ p0 = svcmple (p1, z0, 15)) -+ -+/* -+** cmple_16_u16: -+** cmpls p0\.h, p1/z, z0\.h, #16 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_16_u16, svuint16_t, -+ p0 = svcmple_n_u16 (p1, z0, 16), -+ p0 = svcmple (p1, z0, 16)) -+ -+/* -+** cmple_127_u16: -+** cmpls p0\.h, p1/z, z0\.h, #127 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_127_u16, svuint16_t, -+ p0 = svcmple_n_u16 (p1, z0, 127), -+ p0 = svcmple (p1, z0, 127)) -+ -+/* -+** cmple_128_u16: -+** mov (z[0-9]+\.h), #128 -+** ( -+** cmphs p0\.h, p1/z, \1, z0\.h -+** | -+** cmpls p0\.h, p1/z, z0\.h, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmple_128_u16, svuint16_t, -+ p0 = svcmple_n_u16 (p1, z0, 128), -+ p0 = svcmple (p1, z0, 128)) -+ -+/* -+** cmple_m1_u16: -+** mov (z[0-9]+)\.b, #-1 -+** ( -+** cmphs p0\.h, p1/z, \1\.h, z0\.h -+** | -+** cmpls p0\.h, p1/z, z0\.h, \1\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmple_m1_u16, svuint16_t, -+ p0 = svcmple_n_u16 (p1, z0, -1), -+ p0 = svcmple (p1, z0, -1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_u32.c -new file mode 100644 -index 000000000..cee2d14c8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_u32.c -@@ -0,0 +1,116 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmple_u32_tied: -+** ( -+** cmphs p0\.s, p0/z, z1\.s, z0\.s -+** | -+** cmpls p0\.s, p0/z, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmple_u32_tied, svuint32_t, -+ p0 = svcmple_u32 (p0, z0, z1), -+ p0 = svcmple (p0, z0, z1)) -+ -+/* -+** cmple_u32_untied: -+** ( -+** cmphs p0\.s, p1/z, z1\.s, z0\.s -+** | -+** cmpls p0\.s, p1/z, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmple_u32_untied, svuint32_t, -+ p0 = svcmple_u32 (p1, z0, z1), -+ p0 = svcmple (p1, z0, z1)) -+ -+/* -+** cmple_w0_u32: -+** mov (z[0-9]+\.s), w0 -+** ( -+** cmphs p0\.s, p1/z, \1, z0\.s -+** | -+** cmpls p0\.s, p1/z, z0\.s, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_ZX (cmple_w0_u32, svuint32_t, uint32_t, -+ p0 = svcmple_n_u32 (p1, z0, x0), -+ p0 = svcmple (p1, z0, x0)) -+ -+/* -+** cmple_0_u32: -+** cmpls p0\.s, p1/z, z0\.s, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_0_u32, svuint32_t, -+ p0 = svcmple_n_u32 (p1, z0, 0), -+ p0 = svcmple (p1, z0, 0)) -+ -+/* -+** cmple_1_u32: -+** cmpls p0\.s, p1/z, z0\.s, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_1_u32, svuint32_t, -+ p0 = svcmple_n_u32 (p1, z0, 1), -+ p0 = svcmple (p1, z0, 1)) -+ -+/* -+** cmple_15_u32: -+** cmpls p0\.s, p1/z, z0\.s, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_15_u32, svuint32_t, -+ p0 = svcmple_n_u32 (p1, z0, 15), -+ p0 = svcmple (p1, z0, 15)) -+ -+/* -+** cmple_16_u32: -+** cmpls p0\.s, p1/z, z0\.s, #16 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_16_u32, svuint32_t, -+ p0 = svcmple_n_u32 (p1, z0, 16), -+ p0 = svcmple (p1, z0, 16)) -+ -+/* -+** cmple_127_u32: -+** cmpls p0\.s, p1/z, z0\.s, #127 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_127_u32, svuint32_t, -+ p0 = svcmple_n_u32 (p1, z0, 127), -+ p0 = svcmple (p1, z0, 127)) -+ -+/* -+** cmple_128_u32: -+** mov (z[0-9]+\.s), #128 -+** ( -+** cmphs p0\.s, p1/z, \1, z0\.s -+** | -+** cmpls p0\.s, p1/z, z0\.s, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmple_128_u32, svuint32_t, -+ p0 = svcmple_n_u32 (p1, z0, 128), -+ p0 = svcmple (p1, z0, 128)) -+ -+/* -+** cmple_m1_u32: -+** mov (z[0-9]+)\.b, #-1 -+** ( -+** cmphs p0\.s, p1/z, \1\.s, z0\.s -+** | -+** cmpls p0\.s, p1/z, z0\.s, \1\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmple_m1_u32, svuint32_t, -+ p0 = svcmple_n_u32 (p1, z0, -1), -+ p0 = svcmple (p1, z0, -1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_u64.c -new file mode 100644 -index 000000000..b8388bca8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_u64.c -@@ -0,0 +1,116 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmple_u64_tied: -+** ( -+** cmphs p0\.d, p0/z, z1\.d, z0\.d -+** | -+** cmpls p0\.d, p0/z, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmple_u64_tied, svuint64_t, -+ p0 = svcmple_u64 (p0, z0, z1), -+ p0 = svcmple (p0, z0, z1)) -+ -+/* -+** cmple_u64_untied: -+** ( -+** cmphs p0\.d, p1/z, z1\.d, z0\.d -+** | -+** cmpls p0\.d, p1/z, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmple_u64_untied, svuint64_t, -+ p0 = svcmple_u64 (p1, z0, z1), -+ p0 = svcmple (p1, z0, z1)) -+ -+/* -+** cmple_x0_u64: -+** mov (z[0-9]+\.d), x0 -+** ( -+** cmphs p0\.d, p1/z, \1, z0\.d -+** | -+** cmpls p0\.d, p1/z, z0\.d, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_ZX (cmple_x0_u64, svuint64_t, uint64_t, -+ p0 = svcmple_n_u64 (p1, z0, x0), -+ p0 = svcmple (p1, z0, x0)) -+ -+/* -+** cmple_0_u64: -+** cmpls p0\.d, p1/z, z0\.d, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_0_u64, svuint64_t, -+ p0 = svcmple_n_u64 (p1, z0, 0), -+ p0 = svcmple (p1, z0, 0)) -+ -+/* -+** cmple_1_u64: -+** cmpls p0\.d, p1/z, z0\.d, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_1_u64, svuint64_t, -+ p0 = svcmple_n_u64 (p1, z0, 1), -+ p0 = svcmple (p1, z0, 1)) -+ -+/* -+** cmple_15_u64: -+** cmpls p0\.d, p1/z, z0\.d, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_15_u64, svuint64_t, -+ p0 = svcmple_n_u64 (p1, z0, 15), -+ p0 = svcmple (p1, z0, 15)) -+ -+/* -+** cmple_16_u64: -+** cmpls p0\.d, p1/z, z0\.d, #16 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_16_u64, svuint64_t, -+ p0 = svcmple_n_u64 (p1, z0, 16), -+ p0 = svcmple (p1, z0, 16)) -+ -+/* -+** cmple_127_u64: -+** cmpls p0\.d, p1/z, z0\.d, #127 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_127_u64, svuint64_t, -+ p0 = svcmple_n_u64 (p1, z0, 127), -+ p0 = svcmple (p1, z0, 127)) -+ -+/* -+** cmple_128_u64: -+** mov (z[0-9]+\.d), #128 -+** ( -+** cmphs p0\.d, p1/z, \1, z0\.d -+** | -+** cmpls p0\.d, p1/z, z0\.d, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmple_128_u64, svuint64_t, -+ p0 = svcmple_n_u64 (p1, z0, 128), -+ p0 = svcmple (p1, z0, 128)) -+ -+/* -+** cmple_m1_u64: -+** mov (z[0-9]+)\.b, #-1 -+** ( -+** cmphs p0\.d, p1/z, \1\.d, z0\.d -+** | -+** cmpls p0\.d, p1/z, z0\.d, \1\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmple_m1_u64, svuint64_t, -+ p0 = svcmple_n_u64 (p1, z0, -1), -+ p0 = svcmple (p1, z0, -1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_u8.c -new file mode 100644 -index 000000000..55a8d4f40 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_u8.c -@@ -0,0 +1,116 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmple_u8_tied: -+** ( -+** cmphs p0\.b, p0/z, z1\.b, z0\.b -+** | -+** cmpls p0\.b, p0/z, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmple_u8_tied, svuint8_t, -+ p0 = svcmple_u8 (p0, z0, z1), -+ p0 = svcmple (p0, z0, z1)) -+ -+/* -+** cmple_u8_untied: -+** ( -+** cmphs p0\.b, p1/z, z1\.b, z0\.b -+** | -+** cmpls p0\.b, p1/z, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmple_u8_untied, svuint8_t, -+ p0 = svcmple_u8 (p1, z0, z1), -+ p0 = svcmple (p1, z0, z1)) -+ -+/* -+** cmple_w0_u8: -+** mov (z[0-9]+\.b), w0 -+** ( -+** cmphs p0\.b, p1/z, \1, z0\.b -+** | -+** cmpls p0\.b, p1/z, z0\.b, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_ZX (cmple_w0_u8, svuint8_t, uint8_t, -+ p0 = svcmple_n_u8 (p1, z0, x0), -+ p0 = svcmple (p1, z0, x0)) -+ -+/* -+** cmple_0_u8: -+** cmpls p0\.b, p1/z, z0\.b, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_0_u8, svuint8_t, -+ p0 = svcmple_n_u8 (p1, z0, 0), -+ p0 = svcmple (p1, z0, 0)) -+ -+/* -+** cmple_1_u8: -+** cmpls p0\.b, p1/z, z0\.b, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_1_u8, svuint8_t, -+ p0 = svcmple_n_u8 (p1, z0, 1), -+ p0 = svcmple (p1, z0, 1)) -+ -+/* -+** cmple_15_u8: -+** cmpls p0\.b, p1/z, z0\.b, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_15_u8, svuint8_t, -+ p0 = svcmple_n_u8 (p1, z0, 15), -+ p0 = svcmple (p1, z0, 15)) -+ -+/* -+** cmple_16_u8: -+** cmpls p0\.b, p1/z, z0\.b, #16 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_16_u8, svuint8_t, -+ p0 = svcmple_n_u8 (p1, z0, 16), -+ p0 = svcmple (p1, z0, 16)) -+ -+/* -+** cmple_127_u8: -+** cmpls p0\.b, p1/z, z0\.b, #127 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_127_u8, svuint8_t, -+ p0 = svcmple_n_u8 (p1, z0, 127), -+ p0 = svcmple (p1, z0, 127)) -+ -+/* -+** cmple_128_u8: -+** mov (z[0-9]+\.b), #-128 -+** ( -+** cmphs p0\.b, p1/z, \1, z0\.b -+** | -+** cmpls p0\.b, p1/z, z0\.b, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmple_128_u8, svuint8_t, -+ p0 = svcmple_n_u8 (p1, z0, 128), -+ p0 = svcmple (p1, z0, 128)) -+ -+/* -+** cmple_m1_u8: -+** mov (z[0-9]+\.b), #-1 -+** ( -+** cmphs p0\.b, p1/z, \1, z0\.b -+** | -+** cmpls p0\.b, p1/z, z0\.b, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmple_m1_u8, svuint8_t, -+ p0 = svcmple_n_u8 (p1, z0, -1), -+ p0 = svcmple (p1, z0, -1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_wide_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_wide_s16.c -new file mode 100644 -index 000000000..f1f0b2ed6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_wide_s16.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmple_wide_s16_tied: -+** cmple p0\.h, p0/z, z0\.h, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmple_wide_s16_tied, svint16_t, svint64_t, -+ p0 = svcmple_wide_s16 (p0, z0, z1), -+ p0 = svcmple_wide (p0, z0, z1)) -+ -+/* -+** cmple_wide_s16_untied: -+** cmple p0\.h, p1/z, z0\.h, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmple_wide_s16_untied, svint16_t, svint64_t, -+ p0 = svcmple_wide_s16 (p1, z0, z1), -+ p0 = svcmple_wide (p1, z0, z1)) -+ -+/* -+** cmple_wide_x0_s16: -+** mov (z[0-9]+\.d), x0 -+** cmple p0\.h, p1/z, z0\.h, \1 -+** ret -+*/ -+TEST_COMPARE_ZX (cmple_wide_x0_s16, svint16_t, int64_t, -+ p0 = svcmple_wide_n_s16 (p1, z0, x0), -+ p0 = svcmple_wide (p1, z0, x0)) -+ -+/* -+** cmple_wide_0_s16: -+** cmple p0\.h, p1/z, z0\.h, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_wide_0_s16, svint16_t, -+ p0 = svcmple_wide_n_s16 (p1, z0, 0), -+ p0 = svcmple_wide (p1, z0, 0)) -+ -+/* -+** cmple_wide_1_s16: -+** cmple p0\.h, p1/z, z0\.h, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_wide_1_s16, svint16_t, -+ p0 = svcmple_wide_n_s16 (p1, z0, 1), -+ p0 = svcmple_wide (p1, z0, 1)) -+ -+/* -+** cmple_wide_15_s16: -+** cmple p0\.h, p1/z, z0\.h, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_wide_15_s16, svint16_t, -+ p0 = svcmple_wide_n_s16 (p1, z0, 15), -+ p0 = svcmple_wide (p1, z0, 15)) -+ -+/* -+** cmple_wide_16_s16: -+** mov (z[0-9]+\.d), #16 -+** cmple p0\.h, p1/z, z0\.h, \1 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_wide_16_s16, svint16_t, -+ p0 = svcmple_wide_n_s16 (p1, z0, 16), -+ p0 = svcmple_wide (p1, z0, 16)) -+ -+/* -+** cmple_wide_m1_s16: -+** cmple p0\.h, p1/z, z0\.h, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_wide_m1_s16, svint16_t, -+ p0 = svcmple_wide_n_s16 (p1, z0, -1), -+ p0 = svcmple_wide (p1, z0, -1)) -+ -+/* -+** cmple_wide_m16_s16: -+** cmple p0\.h, p1/z, z0\.h, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_wide_m16_s16, svint16_t, -+ p0 = svcmple_wide_n_s16 (p1, z0, -16), -+ p0 = svcmple_wide (p1, z0, -16)) -+ -+/* -+** cmple_wide_m17_s16: -+** mov (z[0-9]+\.d), #-17 -+** cmple p0\.h, p1/z, z0\.h, \1 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_wide_m17_s16, svint16_t, -+ p0 = svcmple_wide_n_s16 (p1, z0, -17), -+ p0 = svcmple_wide (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_wide_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_wide_s32.c -new file mode 100644 -index 000000000..edc5513b6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_wide_s32.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmple_wide_s32_tied: -+** cmple p0\.s, p0/z, z0\.s, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmple_wide_s32_tied, svint32_t, svint64_t, -+ p0 = svcmple_wide_s32 (p0, z0, z1), -+ p0 = svcmple_wide (p0, z0, z1)) -+ -+/* -+** cmple_wide_s32_untied: -+** cmple p0\.s, p1/z, z0\.s, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmple_wide_s32_untied, svint32_t, svint64_t, -+ p0 = svcmple_wide_s32 (p1, z0, z1), -+ p0 = svcmple_wide (p1, z0, z1)) -+ -+/* -+** cmple_wide_x0_s32: -+** mov (z[0-9]+\.d), x0 -+** cmple p0\.s, p1/z, z0\.s, \1 -+** ret -+*/ -+TEST_COMPARE_ZX (cmple_wide_x0_s32, svint32_t, int64_t, -+ p0 = svcmple_wide_n_s32 (p1, z0, x0), -+ p0 = svcmple_wide (p1, z0, x0)) -+ -+/* -+** cmple_wide_0_s32: -+** cmple p0\.s, p1/z, z0\.s, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_wide_0_s32, svint32_t, -+ p0 = svcmple_wide_n_s32 (p1, z0, 0), -+ p0 = svcmple_wide (p1, z0, 0)) -+ -+/* -+** cmple_wide_1_s32: -+** cmple p0\.s, p1/z, z0\.s, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_wide_1_s32, svint32_t, -+ p0 = svcmple_wide_n_s32 (p1, z0, 1), -+ p0 = svcmple_wide (p1, z0, 1)) -+ -+/* -+** cmple_wide_15_s32: -+** cmple p0\.s, p1/z, z0\.s, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_wide_15_s32, svint32_t, -+ p0 = svcmple_wide_n_s32 (p1, z0, 15), -+ p0 = svcmple_wide (p1, z0, 15)) -+ -+/* -+** cmple_wide_16_s32: -+** mov (z[0-9]+\.d), #16 -+** cmple p0\.s, p1/z, z0\.s, \1 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_wide_16_s32, svint32_t, -+ p0 = svcmple_wide_n_s32 (p1, z0, 16), -+ p0 = svcmple_wide (p1, z0, 16)) -+ -+/* -+** cmple_wide_m1_s32: -+** cmple p0\.s, p1/z, z0\.s, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_wide_m1_s32, svint32_t, -+ p0 = svcmple_wide_n_s32 (p1, z0, -1), -+ p0 = svcmple_wide (p1, z0, -1)) -+ -+/* -+** cmple_wide_m16_s32: -+** cmple p0\.s, p1/z, z0\.s, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_wide_m16_s32, svint32_t, -+ p0 = svcmple_wide_n_s32 (p1, z0, -16), -+ p0 = svcmple_wide (p1, z0, -16)) -+ -+/* -+** cmple_wide_m17_s32: -+** mov (z[0-9]+\.d), #-17 -+** cmple p0\.s, p1/z, z0\.s, \1 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_wide_m17_s32, svint32_t, -+ p0 = svcmple_wide_n_s32 (p1, z0, -17), -+ p0 = svcmple_wide (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_wide_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_wide_s8.c -new file mode 100644 -index 000000000..984044460 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_wide_s8.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmple_wide_s8_tied: -+** cmple p0\.b, p0/z, z0\.b, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmple_wide_s8_tied, svint8_t, svint64_t, -+ p0 = svcmple_wide_s8 (p0, z0, z1), -+ p0 = svcmple_wide (p0, z0, z1)) -+ -+/* -+** cmple_wide_s8_untied: -+** cmple p0\.b, p1/z, z0\.b, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmple_wide_s8_untied, svint8_t, svint64_t, -+ p0 = svcmple_wide_s8 (p1, z0, z1), -+ p0 = svcmple_wide (p1, z0, z1)) -+ -+/* -+** cmple_wide_x0_s8: -+** mov (z[0-9]+\.d), x0 -+** cmple p0\.b, p1/z, z0\.b, \1 -+** ret -+*/ -+TEST_COMPARE_ZX (cmple_wide_x0_s8, svint8_t, int64_t, -+ p0 = svcmple_wide_n_s8 (p1, z0, x0), -+ p0 = svcmple_wide (p1, z0, x0)) -+ -+/* -+** cmple_wide_0_s8: -+** cmple p0\.b, p1/z, z0\.b, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_wide_0_s8, svint8_t, -+ p0 = svcmple_wide_n_s8 (p1, z0, 0), -+ p0 = svcmple_wide (p1, z0, 0)) -+ -+/* -+** cmple_wide_1_s8: -+** cmple p0\.b, p1/z, z0\.b, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_wide_1_s8, svint8_t, -+ p0 = svcmple_wide_n_s8 (p1, z0, 1), -+ p0 = svcmple_wide (p1, z0, 1)) -+ -+/* -+** cmple_wide_15_s8: -+** cmple p0\.b, p1/z, z0\.b, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_wide_15_s8, svint8_t, -+ p0 = svcmple_wide_n_s8 (p1, z0, 15), -+ p0 = svcmple_wide (p1, z0, 15)) -+ -+/* -+** cmple_wide_16_s8: -+** mov (z[0-9]+\.d), #16 -+** cmple p0\.b, p1/z, z0\.b, \1 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_wide_16_s8, svint8_t, -+ p0 = svcmple_wide_n_s8 (p1, z0, 16), -+ p0 = svcmple_wide (p1, z0, 16)) -+ -+/* -+** cmple_wide_m1_s8: -+** cmple p0\.b, p1/z, z0\.b, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_wide_m1_s8, svint8_t, -+ p0 = svcmple_wide_n_s8 (p1, z0, -1), -+ p0 = svcmple_wide (p1, z0, -1)) -+ -+/* -+** cmple_wide_m16_s8: -+** cmple p0\.b, p1/z, z0\.b, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_wide_m16_s8, svint8_t, -+ p0 = svcmple_wide_n_s8 (p1, z0, -16), -+ p0 = svcmple_wide (p1, z0, -16)) -+ -+/* -+** cmple_wide_m17_s8: -+** mov (z[0-9]+\.d), #-17 -+** cmple p0\.b, p1/z, z0\.b, \1 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_wide_m17_s8, svint8_t, -+ p0 = svcmple_wide_n_s8 (p1, z0, -17), -+ p0 = svcmple_wide (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_wide_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_wide_u16.c -new file mode 100644 -index 000000000..a39a1aad5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_wide_u16.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmple_wide_u16_tied: -+** cmpls p0\.h, p0/z, z0\.h, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmple_wide_u16_tied, svuint16_t, svuint64_t, -+ p0 = svcmple_wide_u16 (p0, z0, z1), -+ p0 = svcmple_wide (p0, z0, z1)) -+ -+/* -+** cmple_wide_u16_untied: -+** cmpls p0\.h, p1/z, z0\.h, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmple_wide_u16_untied, svuint16_t, svuint64_t, -+ p0 = svcmple_wide_u16 (p1, z0, z1), -+ p0 = svcmple_wide (p1, z0, z1)) -+ -+/* -+** cmple_wide_x0_u16: -+** mov (z[0-9]+\.d), x0 -+** cmpls p0\.h, p1/z, z0\.h, \1 -+** ret -+*/ -+TEST_COMPARE_ZX (cmple_wide_x0_u16, svuint16_t, uint64_t, -+ p0 = svcmple_wide_n_u16 (p1, z0, x0), -+ p0 = svcmple_wide (p1, z0, x0)) -+ -+/* -+** cmple_wide_0_u16: -+** cmpls p0\.h, p1/z, z0\.h, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_wide_0_u16, svuint16_t, -+ p0 = svcmple_wide_n_u16 (p1, z0, 0), -+ p0 = svcmple_wide (p1, z0, 0)) -+ -+/* -+** cmple_wide_1_u16: -+** cmpls p0\.h, p1/z, z0\.h, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_wide_1_u16, svuint16_t, -+ p0 = svcmple_wide_n_u16 (p1, z0, 1), -+ p0 = svcmple_wide (p1, z0, 1)) -+ -+/* -+** cmple_wide_15_u16: -+** cmpls p0\.h, p1/z, z0\.h, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_wide_15_u16, svuint16_t, -+ p0 = svcmple_wide_n_u16 (p1, z0, 15), -+ p0 = svcmple_wide (p1, z0, 15)) -+ -+/* -+** cmple_wide_16_u16: -+** cmpls p0\.h, p1/z, z0\.h, #16 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_wide_16_u16, svuint16_t, -+ p0 = svcmple_wide_n_u16 (p1, z0, 16), -+ p0 = svcmple_wide (p1, z0, 16)) -+ -+/* -+** cmple_wide_127_u16: -+** cmpls p0\.h, p1/z, z0\.h, #127 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_wide_127_u16, svuint16_t, -+ p0 = svcmple_wide_n_u16 (p1, z0, 127), -+ p0 = svcmple_wide (p1, z0, 127)) -+ -+/* -+** cmple_wide_128_u16: -+** mov (z[0-9]+\.d), #128 -+** cmpls p0\.h, p1/z, z0\.h, \1 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_wide_128_u16, svuint16_t, -+ p0 = svcmple_wide_n_u16 (p1, z0, 128), -+ p0 = svcmple_wide (p1, z0, 128)) -+ -+/* -+** cmple_wide_m1_u16: -+** mov (z[0-9]+)\.b, #-1 -+** cmpls p0\.h, p1/z, z0\.h, \1\.d -+** ret -+*/ -+TEST_COMPARE_Z (cmple_wide_m1_u16, svuint16_t, -+ p0 = svcmple_wide_n_u16 (p1, z0, -1), -+ p0 = svcmple_wide (p1, z0, -1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_wide_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_wide_u32.c -new file mode 100644 -index 000000000..fe682c9e8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_wide_u32.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmple_wide_u32_tied: -+** cmpls p0\.s, p0/z, z0\.s, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmple_wide_u32_tied, svuint32_t, svuint64_t, -+ p0 = svcmple_wide_u32 (p0, z0, z1), -+ p0 = svcmple_wide (p0, z0, z1)) -+ -+/* -+** cmple_wide_u32_untied: -+** cmpls p0\.s, p1/z, z0\.s, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmple_wide_u32_untied, svuint32_t, svuint64_t, -+ p0 = svcmple_wide_u32 (p1, z0, z1), -+ p0 = svcmple_wide (p1, z0, z1)) -+ -+/* -+** cmple_wide_x0_u32: -+** mov (z[0-9]+\.d), x0 -+** cmpls p0\.s, p1/z, z0\.s, \1 -+** ret -+*/ -+TEST_COMPARE_ZX (cmple_wide_x0_u32, svuint32_t, uint64_t, -+ p0 = svcmple_wide_n_u32 (p1, z0, x0), -+ p0 = svcmple_wide (p1, z0, x0)) -+ -+/* -+** cmple_wide_0_u32: -+** cmpls p0\.s, p1/z, z0\.s, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_wide_0_u32, svuint32_t, -+ p0 = svcmple_wide_n_u32 (p1, z0, 0), -+ p0 = svcmple_wide (p1, z0, 0)) -+ -+/* -+** cmple_wide_1_u32: -+** cmpls p0\.s, p1/z, z0\.s, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_wide_1_u32, svuint32_t, -+ p0 = svcmple_wide_n_u32 (p1, z0, 1), -+ p0 = svcmple_wide (p1, z0, 1)) -+ -+/* -+** cmple_wide_15_u32: -+** cmpls p0\.s, p1/z, z0\.s, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_wide_15_u32, svuint32_t, -+ p0 = svcmple_wide_n_u32 (p1, z0, 15), -+ p0 = svcmple_wide (p1, z0, 15)) -+ -+/* -+** cmple_wide_16_u32: -+** cmpls p0\.s, p1/z, z0\.s, #16 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_wide_16_u32, svuint32_t, -+ p0 = svcmple_wide_n_u32 (p1, z0, 16), -+ p0 = svcmple_wide (p1, z0, 16)) -+ -+/* -+** cmple_wide_127_u32: -+** cmpls p0\.s, p1/z, z0\.s, #127 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_wide_127_u32, svuint32_t, -+ p0 = svcmple_wide_n_u32 (p1, z0, 127), -+ p0 = svcmple_wide (p1, z0, 127)) -+ -+/* -+** cmple_wide_128_u32: -+** mov (z[0-9]+\.d), #128 -+** cmpls p0\.s, p1/z, z0\.s, \1 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_wide_128_u32, svuint32_t, -+ p0 = svcmple_wide_n_u32 (p1, z0, 128), -+ p0 = svcmple_wide (p1, z0, 128)) -+ -+/* -+** cmple_wide_m1_u32: -+** mov (z[0-9]+)\.b, #-1 -+** cmpls p0\.s, p1/z, z0\.s, \1\.d -+** ret -+*/ -+TEST_COMPARE_Z (cmple_wide_m1_u32, svuint32_t, -+ p0 = svcmple_wide_n_u32 (p1, z0, -1), -+ p0 = svcmple_wide (p1, z0, -1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_wide_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_wide_u8.c -new file mode 100644 -index 000000000..893dfa627 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmple_wide_u8.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmple_wide_u8_tied: -+** cmpls p0\.b, p0/z, z0\.b, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmple_wide_u8_tied, svuint8_t, svuint64_t, -+ p0 = svcmple_wide_u8 (p0, z0, z1), -+ p0 = svcmple_wide (p0, z0, z1)) -+ -+/* -+** cmple_wide_u8_untied: -+** cmpls p0\.b, p1/z, z0\.b, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmple_wide_u8_untied, svuint8_t, svuint64_t, -+ p0 = svcmple_wide_u8 (p1, z0, z1), -+ p0 = svcmple_wide (p1, z0, z1)) -+ -+/* -+** cmple_wide_x0_u8: -+** mov (z[0-9]+\.d), x0 -+** cmpls p0\.b, p1/z, z0\.b, \1 -+** ret -+*/ -+TEST_COMPARE_ZX (cmple_wide_x0_u8, svuint8_t, uint64_t, -+ p0 = svcmple_wide_n_u8 (p1, z0, x0), -+ p0 = svcmple_wide (p1, z0, x0)) -+ -+/* -+** cmple_wide_0_u8: -+** cmpls p0\.b, p1/z, z0\.b, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_wide_0_u8, svuint8_t, -+ p0 = svcmple_wide_n_u8 (p1, z0, 0), -+ p0 = svcmple_wide (p1, z0, 0)) -+ -+/* -+** cmple_wide_1_u8: -+** cmpls p0\.b, p1/z, z0\.b, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_wide_1_u8, svuint8_t, -+ p0 = svcmple_wide_n_u8 (p1, z0, 1), -+ p0 = svcmple_wide (p1, z0, 1)) -+ -+/* -+** cmple_wide_15_u8: -+** cmpls p0\.b, p1/z, z0\.b, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_wide_15_u8, svuint8_t, -+ p0 = svcmple_wide_n_u8 (p1, z0, 15), -+ p0 = svcmple_wide (p1, z0, 15)) -+ -+/* -+** cmple_wide_16_u8: -+** cmpls p0\.b, p1/z, z0\.b, #16 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_wide_16_u8, svuint8_t, -+ p0 = svcmple_wide_n_u8 (p1, z0, 16), -+ p0 = svcmple_wide (p1, z0, 16)) -+ -+/* -+** cmple_wide_127_u8: -+** cmpls p0\.b, p1/z, z0\.b, #127 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_wide_127_u8, svuint8_t, -+ p0 = svcmple_wide_n_u8 (p1, z0, 127), -+ p0 = svcmple_wide (p1, z0, 127)) -+ -+/* -+** cmple_wide_128_u8: -+** mov (z[0-9]+\.d), #128 -+** cmpls p0\.b, p1/z, z0\.b, \1 -+** ret -+*/ -+TEST_COMPARE_Z (cmple_wide_128_u8, svuint8_t, -+ p0 = svcmple_wide_n_u8 (p1, z0, 128), -+ p0 = svcmple_wide (p1, z0, 128)) -+ -+/* -+** cmple_wide_m1_u8: -+** mov (z[0-9]+)\.b, #-1 -+** cmpls p0\.b, p1/z, z0\.b, \1\.d -+** ret -+*/ -+TEST_COMPARE_Z (cmple_wide_m1_u8, svuint8_t, -+ p0 = svcmple_wide_n_u8 (p1, z0, -1), -+ p0 = svcmple_wide (p1, z0, -1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_f16.c -new file mode 100644 -index 000000000..598f673a8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_f16.c -@@ -0,0 +1,66 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmplt_f16_tied: -+** ( -+** fcmgt p0\.h, p0/z, z1\.h, z0\.h -+** | -+** fcmlt p0\.h, p0/z, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_f16_tied, svfloat16_t, -+ p0 = svcmplt_f16 (p0, z0, z1), -+ p0 = svcmplt (p0, z0, z1)) -+ -+/* -+** cmplt_f16_untied: -+** ( -+** fcmgt p0\.h, p1/z, z1\.h, z0\.h -+** | -+** fcmlt p0\.h, p1/z, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_f16_untied, svfloat16_t, -+ p0 = svcmplt_f16 (p1, z0, z1), -+ p0 = svcmplt (p1, z0, z1)) -+ -+/* -+** cmplt_h4_f16: -+** mov (z[0-9]+\.h), h4 -+** ( -+** fcmgt p0\.h, p1/z, \1, z0\.h -+** | -+** fcmlt p0\.h, p1/z, z0\.h, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_ZD (cmplt_h4_f16, svfloat16_t, float16_t, -+ p0 = svcmplt_n_f16 (p1, z0, d4), -+ p0 = svcmplt (p1, z0, d4)) -+ -+/* -+** cmplt_0_f16: -+** fcmlt p0\.h, p1/z, z0\.h, #0\.0 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_0_f16, svfloat16_t, -+ p0 = svcmplt_n_f16 (p1, z0, 0), -+ p0 = svcmplt (p1, z0, 0)) -+ -+/* -+** cmplt_1_f16: -+** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? -+** ( -+** fcmgt p0\.h, p1/z, \1, z0\.h -+** | -+** fcmlt p0\.h, p1/z, z0\.h, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_1_f16, svfloat16_t, -+ p0 = svcmplt_n_f16 (p1, z0, 1), -+ p0 = svcmplt (p1, z0, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_f32.c -new file mode 100644 -index 000000000..f9dea3665 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_f32.c -@@ -0,0 +1,66 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmplt_f32_tied: -+** ( -+** fcmgt p0\.s, p0/z, z1\.s, z0\.s -+** | -+** fcmlt p0\.s, p0/z, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_f32_tied, svfloat32_t, -+ p0 = svcmplt_f32 (p0, z0, z1), -+ p0 = svcmplt (p0, z0, z1)) -+ -+/* -+** cmplt_f32_untied: -+** ( -+** fcmgt p0\.s, p1/z, z1\.s, z0\.s -+** | -+** fcmlt p0\.s, p1/z, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_f32_untied, svfloat32_t, -+ p0 = svcmplt_f32 (p1, z0, z1), -+ p0 = svcmplt (p1, z0, z1)) -+ -+/* -+** cmplt_s4_f32: -+** mov (z[0-9]+\.s), s4 -+** ( -+** fcmgt p0\.s, p1/z, \1, z0\.s -+** | -+** fcmlt p0\.s, p1/z, z0\.s, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_ZD (cmplt_s4_f32, svfloat32_t, float32_t, -+ p0 = svcmplt_n_f32 (p1, z0, d4), -+ p0 = svcmplt (p1, z0, d4)) -+ -+/* -+** cmplt_0_f32: -+** fcmlt p0\.s, p1/z, z0\.s, #0\.0 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_0_f32, svfloat32_t, -+ p0 = svcmplt_n_f32 (p1, z0, 0), -+ p0 = svcmplt (p1, z0, 0)) -+ -+/* -+** cmplt_1_f32: -+** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? -+** ( -+** fcmgt p0\.s, p1/z, \1, z0\.s -+** | -+** fcmlt p0\.s, p1/z, z0\.s, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_1_f32, svfloat32_t, -+ p0 = svcmplt_n_f32 (p1, z0, 1), -+ p0 = svcmplt (p1, z0, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_f64.c -new file mode 100644 -index 000000000..6f251db4f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_f64.c -@@ -0,0 +1,66 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmplt_f64_tied: -+** ( -+** fcmgt p0\.d, p0/z, z1\.d, z0\.d -+** | -+** fcmlt p0\.d, p0/z, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_f64_tied, svfloat64_t, -+ p0 = svcmplt_f64 (p0, z0, z1), -+ p0 = svcmplt (p0, z0, z1)) -+ -+/* -+** cmplt_f64_untied: -+** ( -+** fcmgt p0\.d, p1/z, z1\.d, z0\.d -+** | -+** fcmlt p0\.d, p1/z, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_f64_untied, svfloat64_t, -+ p0 = svcmplt_f64 (p1, z0, z1), -+ p0 = svcmplt (p1, z0, z1)) -+ -+/* -+** cmplt_d4_f64: -+** mov (z[0-9]+\.d), d4 -+** ( -+** fcmgt p0\.d, p1/z, \1, z0\.d -+** | -+** fcmlt p0\.d, p1/z, z0\.d, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_ZD (cmplt_d4_f64, svfloat64_t, float64_t, -+ p0 = svcmplt_n_f64 (p1, z0, d4), -+ p0 = svcmplt (p1, z0, d4)) -+ -+/* -+** cmplt_0_f64: -+** fcmlt p0\.d, p1/z, z0\.d, #0\.0 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_0_f64, svfloat64_t, -+ p0 = svcmplt_n_f64 (p1, z0, 0), -+ p0 = svcmplt (p1, z0, 0)) -+ -+/* -+** cmplt_1_f64: -+** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? -+** ( -+** fcmgt p0\.d, p1/z, \1, z0\.d -+** | -+** fcmlt p0\.d, p1/z, z0\.d, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_1_f64, svfloat64_t, -+ p0 = svcmplt_n_f64 (p1, z0, 1), -+ p0 = svcmplt (p1, z0, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_s16.c -new file mode 100644 -index 000000000..1e2bf9dde ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_s16.c -@@ -0,0 +1,116 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmplt_s16_tied: -+** ( -+** cmpgt p0\.h, p0/z, z1\.h, z0\.h -+** | -+** cmplt p0\.h, p0/z, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_s16_tied, svint16_t, -+ p0 = svcmplt_s16 (p0, z0, z1), -+ p0 = svcmplt (p0, z0, z1)) -+ -+/* -+** cmplt_s16_untied: -+** ( -+** cmpgt p0\.h, p1/z, z1\.h, z0\.h -+** | -+** cmplt p0\.h, p1/z, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_s16_untied, svint16_t, -+ p0 = svcmplt_s16 (p1, z0, z1), -+ p0 = svcmplt (p1, z0, z1)) -+ -+/* -+** cmplt_w0_s16: -+** mov (z[0-9]+\.h), w0 -+** ( -+** cmpgt p0\.h, p1/z, \1, z0\.h -+** | -+** cmplt p0\.h, p1/z, z0\.h, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_ZX (cmplt_w0_s16, svint16_t, int16_t, -+ p0 = svcmplt_n_s16 (p1, z0, x0), -+ p0 = svcmplt (p1, z0, x0)) -+ -+/* -+** cmplt_0_s16: -+** cmplt p0\.h, p1/z, z0\.h, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_0_s16, svint16_t, -+ p0 = svcmplt_n_s16 (p1, z0, 0), -+ p0 = svcmplt (p1, z0, 0)) -+ -+/* -+** cmplt_1_s16: -+** cmplt p0\.h, p1/z, z0\.h, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_1_s16, svint16_t, -+ p0 = svcmplt_n_s16 (p1, z0, 1), -+ p0 = svcmplt (p1, z0, 1)) -+ -+/* -+** cmplt_15_s16: -+** cmplt p0\.h, p1/z, z0\.h, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_15_s16, svint16_t, -+ p0 = svcmplt_n_s16 (p1, z0, 15), -+ p0 = svcmplt (p1, z0, 15)) -+ -+/* -+** cmplt_16_s16: -+** mov (z[0-9]+\.h), #16 -+** ( -+** cmpgt p0\.h, p1/z, \1, z0\.h -+** | -+** cmplt p0\.h, p1/z, z0\.h, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_16_s16, svint16_t, -+ p0 = svcmplt_n_s16 (p1, z0, 16), -+ p0 = svcmplt (p1, z0, 16)) -+ -+/* -+** cmplt_m1_s16: -+** cmplt p0\.h, p1/z, z0\.h, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_m1_s16, svint16_t, -+ p0 = svcmplt_n_s16 (p1, z0, -1), -+ p0 = svcmplt (p1, z0, -1)) -+ -+/* -+** cmplt_m16_s16: -+** cmplt p0\.h, p1/z, z0\.h, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_m16_s16, svint16_t, -+ p0 = svcmplt_n_s16 (p1, z0, -16), -+ p0 = svcmplt (p1, z0, -16)) -+ -+/* -+** cmplt_m17_s16: -+** mov (z[0-9]+\.h), #-17 -+** ( -+** cmpgt p0\.h, p1/z, \1, z0\.h -+** | -+** cmplt p0\.h, p1/z, z0\.h, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_m17_s16, svint16_t, -+ p0 = svcmplt_n_s16 (p1, z0, -17), -+ p0 = svcmplt (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_s32.c -new file mode 100644 -index 000000000..8e2c02c4d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_s32.c -@@ -0,0 +1,116 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmplt_s32_tied: -+** ( -+** cmpgt p0\.s, p0/z, z1\.s, z0\.s -+** | -+** cmplt p0\.s, p0/z, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_s32_tied, svint32_t, -+ p0 = svcmplt_s32 (p0, z0, z1), -+ p0 = svcmplt (p0, z0, z1)) -+ -+/* -+** cmplt_s32_untied: -+** ( -+** cmpgt p0\.s, p1/z, z1\.s, z0\.s -+** | -+** cmplt p0\.s, p1/z, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_s32_untied, svint32_t, -+ p0 = svcmplt_s32 (p1, z0, z1), -+ p0 = svcmplt (p1, z0, z1)) -+ -+/* -+** cmplt_w0_s32: -+** mov (z[0-9]+\.s), w0 -+** ( -+** cmpgt p0\.s, p1/z, \1, z0\.s -+** | -+** cmplt p0\.s, p1/z, z0\.s, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_ZX (cmplt_w0_s32, svint32_t, int32_t, -+ p0 = svcmplt_n_s32 (p1, z0, x0), -+ p0 = svcmplt (p1, z0, x0)) -+ -+/* -+** cmplt_0_s32: -+** cmplt p0\.s, p1/z, z0\.s, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_0_s32, svint32_t, -+ p0 = svcmplt_n_s32 (p1, z0, 0), -+ p0 = svcmplt (p1, z0, 0)) -+ -+/* -+** cmplt_1_s32: -+** cmplt p0\.s, p1/z, z0\.s, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_1_s32, svint32_t, -+ p0 = svcmplt_n_s32 (p1, z0, 1), -+ p0 = svcmplt (p1, z0, 1)) -+ -+/* -+** cmplt_15_s32: -+** cmplt p0\.s, p1/z, z0\.s, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_15_s32, svint32_t, -+ p0 = svcmplt_n_s32 (p1, z0, 15), -+ p0 = svcmplt (p1, z0, 15)) -+ -+/* -+** cmplt_16_s32: -+** mov (z[0-9]+\.s), #16 -+** ( -+** cmpgt p0\.s, p1/z, \1, z0\.s -+** | -+** cmplt p0\.s, p1/z, z0\.s, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_16_s32, svint32_t, -+ p0 = svcmplt_n_s32 (p1, z0, 16), -+ p0 = svcmplt (p1, z0, 16)) -+ -+/* -+** cmplt_m1_s32: -+** cmplt p0\.s, p1/z, z0\.s, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_m1_s32, svint32_t, -+ p0 = svcmplt_n_s32 (p1, z0, -1), -+ p0 = svcmplt (p1, z0, -1)) -+ -+/* -+** cmplt_m16_s32: -+** cmplt p0\.s, p1/z, z0\.s, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_m16_s32, svint32_t, -+ p0 = svcmplt_n_s32 (p1, z0, -16), -+ p0 = svcmplt (p1, z0, -16)) -+ -+/* -+** cmplt_m17_s32: -+** mov (z[0-9]+\.s), #-17 -+** ( -+** cmpgt p0\.s, p1/z, \1, z0\.s -+** | -+** cmplt p0\.s, p1/z, z0\.s, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_m17_s32, svint32_t, -+ p0 = svcmplt_n_s32 (p1, z0, -17), -+ p0 = svcmplt (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_s64.c -new file mode 100644 -index 000000000..818c9fba9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_s64.c -@@ -0,0 +1,116 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmplt_s64_tied: -+** ( -+** cmpgt p0\.d, p0/z, z1\.d, z0\.d -+** | -+** cmplt p0\.d, p0/z, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_s64_tied, svint64_t, -+ p0 = svcmplt_s64 (p0, z0, z1), -+ p0 = svcmplt (p0, z0, z1)) -+ -+/* -+** cmplt_s64_untied: -+** ( -+** cmpgt p0\.d, p1/z, z1\.d, z0\.d -+** | -+** cmplt p0\.d, p1/z, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_s64_untied, svint64_t, -+ p0 = svcmplt_s64 (p1, z0, z1), -+ p0 = svcmplt (p1, z0, z1)) -+ -+/* -+** cmplt_x0_s64: -+** mov (z[0-9]+\.d), x0 -+** ( -+** cmpgt p0\.d, p1/z, \1, z0\.d -+** | -+** cmplt p0\.d, p1/z, z0\.d, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_ZX (cmplt_x0_s64, svint64_t, int64_t, -+ p0 = svcmplt_n_s64 (p1, z0, x0), -+ p0 = svcmplt (p1, z0, x0)) -+ -+/* -+** cmplt_0_s64: -+** cmplt p0\.d, p1/z, z0\.d, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_0_s64, svint64_t, -+ p0 = svcmplt_n_s64 (p1, z0, 0), -+ p0 = svcmplt (p1, z0, 0)) -+ -+/* -+** cmplt_1_s64: -+** cmplt p0\.d, p1/z, z0\.d, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_1_s64, svint64_t, -+ p0 = svcmplt_n_s64 (p1, z0, 1), -+ p0 = svcmplt (p1, z0, 1)) -+ -+/* -+** cmplt_15_s64: -+** cmplt p0\.d, p1/z, z0\.d, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_15_s64, svint64_t, -+ p0 = svcmplt_n_s64 (p1, z0, 15), -+ p0 = svcmplt (p1, z0, 15)) -+ -+/* -+** cmplt_16_s64: -+** mov (z[0-9]+\.d), #16 -+** ( -+** cmpgt p0\.d, p1/z, \1, z0\.d -+** | -+** cmplt p0\.d, p1/z, z0\.d, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_16_s64, svint64_t, -+ p0 = svcmplt_n_s64 (p1, z0, 16), -+ p0 = svcmplt (p1, z0, 16)) -+ -+/* -+** cmplt_m1_s64: -+** cmplt p0\.d, p1/z, z0\.d, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_m1_s64, svint64_t, -+ p0 = svcmplt_n_s64 (p1, z0, -1), -+ p0 = svcmplt (p1, z0, -1)) -+ -+/* -+** cmplt_m16_s64: -+** cmplt p0\.d, p1/z, z0\.d, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_m16_s64, svint64_t, -+ p0 = svcmplt_n_s64 (p1, z0, -16), -+ p0 = svcmplt (p1, z0, -16)) -+ -+/* -+** cmplt_m17_s64: -+** mov (z[0-9]+\.d), #-17 -+** ( -+** cmpgt p0\.d, p1/z, \1, z0\.d -+** | -+** cmplt p0\.d, p1/z, z0\.d, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_m17_s64, svint64_t, -+ p0 = svcmplt_n_s64 (p1, z0, -17), -+ p0 = svcmplt (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_s8.c -new file mode 100644 -index 000000000..54b8dc408 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_s8.c -@@ -0,0 +1,116 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmplt_s8_tied: -+** ( -+** cmpgt p0\.b, p0/z, z1\.b, z0\.b -+** | -+** cmplt p0\.b, p0/z, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_s8_tied, svint8_t, -+ p0 = svcmplt_s8 (p0, z0, z1), -+ p0 = svcmplt (p0, z0, z1)) -+ -+/* -+** cmplt_s8_untied: -+** ( -+** cmpgt p0\.b, p1/z, z1\.b, z0\.b -+** | -+** cmplt p0\.b, p1/z, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_s8_untied, svint8_t, -+ p0 = svcmplt_s8 (p1, z0, z1), -+ p0 = svcmplt (p1, z0, z1)) -+ -+/* -+** cmplt_w0_s8: -+** mov (z[0-9]+\.b), w0 -+** ( -+** cmpgt p0\.b, p1/z, \1, z0\.b -+** | -+** cmplt p0\.b, p1/z, z0\.b, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_ZX (cmplt_w0_s8, svint8_t, int8_t, -+ p0 = svcmplt_n_s8 (p1, z0, x0), -+ p0 = svcmplt (p1, z0, x0)) -+ -+/* -+** cmplt_0_s8: -+** cmplt p0\.b, p1/z, z0\.b, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_0_s8, svint8_t, -+ p0 = svcmplt_n_s8 (p1, z0, 0), -+ p0 = svcmplt (p1, z0, 0)) -+ -+/* -+** cmplt_1_s8: -+** cmplt p0\.b, p1/z, z0\.b, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_1_s8, svint8_t, -+ p0 = svcmplt_n_s8 (p1, z0, 1), -+ p0 = svcmplt (p1, z0, 1)) -+ -+/* -+** cmplt_15_s8: -+** cmplt p0\.b, p1/z, z0\.b, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_15_s8, svint8_t, -+ p0 = svcmplt_n_s8 (p1, z0, 15), -+ p0 = svcmplt (p1, z0, 15)) -+ -+/* -+** cmplt_16_s8: -+** mov (z[0-9]+\.b), #16 -+** ( -+** cmpgt p0\.b, p1/z, \1, z0\.b -+** | -+** cmplt p0\.b, p1/z, z0\.b, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_16_s8, svint8_t, -+ p0 = svcmplt_n_s8 (p1, z0, 16), -+ p0 = svcmplt (p1, z0, 16)) -+ -+/* -+** cmplt_m1_s8: -+** cmplt p0\.b, p1/z, z0\.b, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_m1_s8, svint8_t, -+ p0 = svcmplt_n_s8 (p1, z0, -1), -+ p0 = svcmplt (p1, z0, -1)) -+ -+/* -+** cmplt_m16_s8: -+** cmplt p0\.b, p1/z, z0\.b, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_m16_s8, svint8_t, -+ p0 = svcmplt_n_s8 (p1, z0, -16), -+ p0 = svcmplt (p1, z0, -16)) -+ -+/* -+** cmplt_m17_s8: -+** mov (z[0-9]+\.b), #-17 -+** ( -+** cmpgt p0\.b, p1/z, \1, z0\.b -+** | -+** cmplt p0\.b, p1/z, z0\.b, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_m17_s8, svint8_t, -+ p0 = svcmplt_n_s8 (p1, z0, -17), -+ p0 = svcmplt (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_u16.c -new file mode 100644 -index 000000000..c0f2a0550 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_u16.c -@@ -0,0 +1,116 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmplt_u16_tied: -+** ( -+** cmphi p0\.h, p0/z, z1\.h, z0\.h -+** | -+** cmplo p0\.h, p0/z, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_u16_tied, svuint16_t, -+ p0 = svcmplt_u16 (p0, z0, z1), -+ p0 = svcmplt (p0, z0, z1)) -+ -+/* -+** cmplt_u16_untied: -+** ( -+** cmphi p0\.h, p1/z, z1\.h, z0\.h -+** | -+** cmplo p0\.h, p1/z, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_u16_untied, svuint16_t, -+ p0 = svcmplt_u16 (p1, z0, z1), -+ p0 = svcmplt (p1, z0, z1)) -+ -+/* -+** cmplt_w0_u16: -+** mov (z[0-9]+\.h), w0 -+** ( -+** cmphi p0\.h, p1/z, \1, z0\.h -+** | -+** cmplo p0\.h, p1/z, z0\.h, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_ZX (cmplt_w0_u16, svuint16_t, uint16_t, -+ p0 = svcmplt_n_u16 (p1, z0, x0), -+ p0 = svcmplt (p1, z0, x0)) -+ -+/* -+** cmplt_0_u16: -+** cmplo p0\.h, p1/z, z0\.h, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_0_u16, svuint16_t, -+ p0 = svcmplt_n_u16 (p1, z0, 0), -+ p0 = svcmplt (p1, z0, 0)) -+ -+/* -+** cmplt_1_u16: -+** cmplo p0\.h, p1/z, z0\.h, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_1_u16, svuint16_t, -+ p0 = svcmplt_n_u16 (p1, z0, 1), -+ p0 = svcmplt (p1, z0, 1)) -+ -+/* -+** cmplt_15_u16: -+** cmplo p0\.h, p1/z, z0\.h, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_15_u16, svuint16_t, -+ p0 = svcmplt_n_u16 (p1, z0, 15), -+ p0 = svcmplt (p1, z0, 15)) -+ -+/* -+** cmplt_16_u16: -+** cmplo p0\.h, p1/z, z0\.h, #16 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_16_u16, svuint16_t, -+ p0 = svcmplt_n_u16 (p1, z0, 16), -+ p0 = svcmplt (p1, z0, 16)) -+ -+/* -+** cmplt_127_u16: -+** cmplo p0\.h, p1/z, z0\.h, #127 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_127_u16, svuint16_t, -+ p0 = svcmplt_n_u16 (p1, z0, 127), -+ p0 = svcmplt (p1, z0, 127)) -+ -+/* -+** cmplt_128_u16: -+** mov (z[0-9]+\.h), #128 -+** ( -+** cmphi p0\.h, p1/z, \1, z0\.h -+** | -+** cmplo p0\.h, p1/z, z0\.h, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_128_u16, svuint16_t, -+ p0 = svcmplt_n_u16 (p1, z0, 128), -+ p0 = svcmplt (p1, z0, 128)) -+ -+/* -+** cmplt_m1_u16: -+** mov (z[0-9]+)\.b, #-1 -+** ( -+** cmphi p0\.h, p1/z, \1\.h, z0\.h -+** | -+** cmplo p0\.h, p1/z, z0\.h, \1\.h -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_m1_u16, svuint16_t, -+ p0 = svcmplt_n_u16 (p1, z0, -1), -+ p0 = svcmplt (p1, z0, -1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_u32.c -new file mode 100644 -index 000000000..3bb0b1464 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_u32.c -@@ -0,0 +1,116 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmplt_u32_tied: -+** ( -+** cmphi p0\.s, p0/z, z1\.s, z0\.s -+** | -+** cmplo p0\.s, p0/z, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_u32_tied, svuint32_t, -+ p0 = svcmplt_u32 (p0, z0, z1), -+ p0 = svcmplt (p0, z0, z1)) -+ -+/* -+** cmplt_u32_untied: -+** ( -+** cmphi p0\.s, p1/z, z1\.s, z0\.s -+** | -+** cmplo p0\.s, p1/z, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_u32_untied, svuint32_t, -+ p0 = svcmplt_u32 (p1, z0, z1), -+ p0 = svcmplt (p1, z0, z1)) -+ -+/* -+** cmplt_w0_u32: -+** mov (z[0-9]+\.s), w0 -+** ( -+** cmphi p0\.s, p1/z, \1, z0\.s -+** | -+** cmplo p0\.s, p1/z, z0\.s, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_ZX (cmplt_w0_u32, svuint32_t, uint32_t, -+ p0 = svcmplt_n_u32 (p1, z0, x0), -+ p0 = svcmplt (p1, z0, x0)) -+ -+/* -+** cmplt_0_u32: -+** cmplo p0\.s, p1/z, z0\.s, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_0_u32, svuint32_t, -+ p0 = svcmplt_n_u32 (p1, z0, 0), -+ p0 = svcmplt (p1, z0, 0)) -+ -+/* -+** cmplt_1_u32: -+** cmplo p0\.s, p1/z, z0\.s, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_1_u32, svuint32_t, -+ p0 = svcmplt_n_u32 (p1, z0, 1), -+ p0 = svcmplt (p1, z0, 1)) -+ -+/* -+** cmplt_15_u32: -+** cmplo p0\.s, p1/z, z0\.s, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_15_u32, svuint32_t, -+ p0 = svcmplt_n_u32 (p1, z0, 15), -+ p0 = svcmplt (p1, z0, 15)) -+ -+/* -+** cmplt_16_u32: -+** cmplo p0\.s, p1/z, z0\.s, #16 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_16_u32, svuint32_t, -+ p0 = svcmplt_n_u32 (p1, z0, 16), -+ p0 = svcmplt (p1, z0, 16)) -+ -+/* -+** cmplt_127_u32: -+** cmplo p0\.s, p1/z, z0\.s, #127 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_127_u32, svuint32_t, -+ p0 = svcmplt_n_u32 (p1, z0, 127), -+ p0 = svcmplt (p1, z0, 127)) -+ -+/* -+** cmplt_128_u32: -+** mov (z[0-9]+\.s), #128 -+** ( -+** cmphi p0\.s, p1/z, \1, z0\.s -+** | -+** cmplo p0\.s, p1/z, z0\.s, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_128_u32, svuint32_t, -+ p0 = svcmplt_n_u32 (p1, z0, 128), -+ p0 = svcmplt (p1, z0, 128)) -+ -+/* -+** cmplt_m1_u32: -+** mov (z[0-9]+)\.b, #-1 -+** ( -+** cmphi p0\.s, p1/z, \1\.s, z0\.s -+** | -+** cmplo p0\.s, p1/z, z0\.s, \1\.s -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_m1_u32, svuint32_t, -+ p0 = svcmplt_n_u32 (p1, z0, -1), -+ p0 = svcmplt (p1, z0, -1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_u64.c -new file mode 100644 -index 000000000..d9de5add2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_u64.c -@@ -0,0 +1,116 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmplt_u64_tied: -+** ( -+** cmphi p0\.d, p0/z, z1\.d, z0\.d -+** | -+** cmplo p0\.d, p0/z, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_u64_tied, svuint64_t, -+ p0 = svcmplt_u64 (p0, z0, z1), -+ p0 = svcmplt (p0, z0, z1)) -+ -+/* -+** cmplt_u64_untied: -+** ( -+** cmphi p0\.d, p1/z, z1\.d, z0\.d -+** | -+** cmplo p0\.d, p1/z, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_u64_untied, svuint64_t, -+ p0 = svcmplt_u64 (p1, z0, z1), -+ p0 = svcmplt (p1, z0, z1)) -+ -+/* -+** cmplt_x0_u64: -+** mov (z[0-9]+\.d), x0 -+** ( -+** cmphi p0\.d, p1/z, \1, z0\.d -+** | -+** cmplo p0\.d, p1/z, z0\.d, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_ZX (cmplt_x0_u64, svuint64_t, uint64_t, -+ p0 = svcmplt_n_u64 (p1, z0, x0), -+ p0 = svcmplt (p1, z0, x0)) -+ -+/* -+** cmplt_0_u64: -+** cmplo p0\.d, p1/z, z0\.d, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_0_u64, svuint64_t, -+ p0 = svcmplt_n_u64 (p1, z0, 0), -+ p0 = svcmplt (p1, z0, 0)) -+ -+/* -+** cmplt_1_u64: -+** cmplo p0\.d, p1/z, z0\.d, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_1_u64, svuint64_t, -+ p0 = svcmplt_n_u64 (p1, z0, 1), -+ p0 = svcmplt (p1, z0, 1)) -+ -+/* -+** cmplt_15_u64: -+** cmplo p0\.d, p1/z, z0\.d, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_15_u64, svuint64_t, -+ p0 = svcmplt_n_u64 (p1, z0, 15), -+ p0 = svcmplt (p1, z0, 15)) -+ -+/* -+** cmplt_16_u64: -+** cmplo p0\.d, p1/z, z0\.d, #16 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_16_u64, svuint64_t, -+ p0 = svcmplt_n_u64 (p1, z0, 16), -+ p0 = svcmplt (p1, z0, 16)) -+ -+/* -+** cmplt_127_u64: -+** cmplo p0\.d, p1/z, z0\.d, #127 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_127_u64, svuint64_t, -+ p0 = svcmplt_n_u64 (p1, z0, 127), -+ p0 = svcmplt (p1, z0, 127)) -+ -+/* -+** cmplt_128_u64: -+** mov (z[0-9]+\.d), #128 -+** ( -+** cmphi p0\.d, p1/z, \1, z0\.d -+** | -+** cmplo p0\.d, p1/z, z0\.d, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_128_u64, svuint64_t, -+ p0 = svcmplt_n_u64 (p1, z0, 128), -+ p0 = svcmplt (p1, z0, 128)) -+ -+/* -+** cmplt_m1_u64: -+** mov (z[0-9]+)\.b, #-1 -+** ( -+** cmphi p0\.d, p1/z, \1\.d, z0\.d -+** | -+** cmplo p0\.d, p1/z, z0\.d, \1\.d -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_m1_u64, svuint64_t, -+ p0 = svcmplt_n_u64 (p1, z0, -1), -+ p0 = svcmplt (p1, z0, -1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_u8.c -new file mode 100644 -index 000000000..42d5ad868 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_u8.c -@@ -0,0 +1,116 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmplt_u8_tied: -+** ( -+** cmphi p0\.b, p0/z, z1\.b, z0\.b -+** | -+** cmplo p0\.b, p0/z, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_u8_tied, svuint8_t, -+ p0 = svcmplt_u8 (p0, z0, z1), -+ p0 = svcmplt (p0, z0, z1)) -+ -+/* -+** cmplt_u8_untied: -+** ( -+** cmphi p0\.b, p1/z, z1\.b, z0\.b -+** | -+** cmplo p0\.b, p1/z, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_u8_untied, svuint8_t, -+ p0 = svcmplt_u8 (p1, z0, z1), -+ p0 = svcmplt (p1, z0, z1)) -+ -+/* -+** cmplt_w0_u8: -+** mov (z[0-9]+\.b), w0 -+** ( -+** cmphi p0\.b, p1/z, \1, z0\.b -+** | -+** cmplo p0\.b, p1/z, z0\.b, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_ZX (cmplt_w0_u8, svuint8_t, uint8_t, -+ p0 = svcmplt_n_u8 (p1, z0, x0), -+ p0 = svcmplt (p1, z0, x0)) -+ -+/* -+** cmplt_0_u8: -+** cmplo p0\.b, p1/z, z0\.b, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_0_u8, svuint8_t, -+ p0 = svcmplt_n_u8 (p1, z0, 0), -+ p0 = svcmplt (p1, z0, 0)) -+ -+/* -+** cmplt_1_u8: -+** cmplo p0\.b, p1/z, z0\.b, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_1_u8, svuint8_t, -+ p0 = svcmplt_n_u8 (p1, z0, 1), -+ p0 = svcmplt (p1, z0, 1)) -+ -+/* -+** cmplt_15_u8: -+** cmplo p0\.b, p1/z, z0\.b, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_15_u8, svuint8_t, -+ p0 = svcmplt_n_u8 (p1, z0, 15), -+ p0 = svcmplt (p1, z0, 15)) -+ -+/* -+** cmplt_16_u8: -+** cmplo p0\.b, p1/z, z0\.b, #16 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_16_u8, svuint8_t, -+ p0 = svcmplt_n_u8 (p1, z0, 16), -+ p0 = svcmplt (p1, z0, 16)) -+ -+/* -+** cmplt_127_u8: -+** cmplo p0\.b, p1/z, z0\.b, #127 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_127_u8, svuint8_t, -+ p0 = svcmplt_n_u8 (p1, z0, 127), -+ p0 = svcmplt (p1, z0, 127)) -+ -+/* -+** cmplt_128_u8: -+** mov (z[0-9]+\.b), #-128 -+** ( -+** cmphi p0\.b, p1/z, \1, z0\.b -+** | -+** cmplo p0\.b, p1/z, z0\.b, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_128_u8, svuint8_t, -+ p0 = svcmplt_n_u8 (p1, z0, 128), -+ p0 = svcmplt (p1, z0, 128)) -+ -+/* -+** cmplt_m1_u8: -+** mov (z[0-9]+\.b), #-1 -+** ( -+** cmphi p0\.b, p1/z, \1, z0\.b -+** | -+** cmplo p0\.b, p1/z, z0\.b, \1 -+** ) -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_m1_u8, svuint8_t, -+ p0 = svcmplt_n_u8 (p1, z0, -1), -+ p0 = svcmplt (p1, z0, -1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_wide_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_wide_s16.c -new file mode 100644 -index 000000000..a3c8942ba ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_wide_s16.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmplt_wide_s16_tied: -+** cmplt p0\.h, p0/z, z0\.h, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmplt_wide_s16_tied, svint16_t, svint64_t, -+ p0 = svcmplt_wide_s16 (p0, z0, z1), -+ p0 = svcmplt_wide (p0, z0, z1)) -+ -+/* -+** cmplt_wide_s16_untied: -+** cmplt p0\.h, p1/z, z0\.h, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmplt_wide_s16_untied, svint16_t, svint64_t, -+ p0 = svcmplt_wide_s16 (p1, z0, z1), -+ p0 = svcmplt_wide (p1, z0, z1)) -+ -+/* -+** cmplt_wide_x0_s16: -+** mov (z[0-9]+\.d), x0 -+** cmplt p0\.h, p1/z, z0\.h, \1 -+** ret -+*/ -+TEST_COMPARE_ZX (cmplt_wide_x0_s16, svint16_t, int64_t, -+ p0 = svcmplt_wide_n_s16 (p1, z0, x0), -+ p0 = svcmplt_wide (p1, z0, x0)) -+ -+/* -+** cmplt_wide_0_s16: -+** cmplt p0\.h, p1/z, z0\.h, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_wide_0_s16, svint16_t, -+ p0 = svcmplt_wide_n_s16 (p1, z0, 0), -+ p0 = svcmplt_wide (p1, z0, 0)) -+ -+/* -+** cmplt_wide_1_s16: -+** cmplt p0\.h, p1/z, z0\.h, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_wide_1_s16, svint16_t, -+ p0 = svcmplt_wide_n_s16 (p1, z0, 1), -+ p0 = svcmplt_wide (p1, z0, 1)) -+ -+/* -+** cmplt_wide_15_s16: -+** cmplt p0\.h, p1/z, z0\.h, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_wide_15_s16, svint16_t, -+ p0 = svcmplt_wide_n_s16 (p1, z0, 15), -+ p0 = svcmplt_wide (p1, z0, 15)) -+ -+/* -+** cmplt_wide_16_s16: -+** mov (z[0-9]+\.d), #16 -+** cmplt p0\.h, p1/z, z0\.h, \1 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_wide_16_s16, svint16_t, -+ p0 = svcmplt_wide_n_s16 (p1, z0, 16), -+ p0 = svcmplt_wide (p1, z0, 16)) -+ -+/* -+** cmplt_wide_m1_s16: -+** cmplt p0\.h, p1/z, z0\.h, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_wide_m1_s16, svint16_t, -+ p0 = svcmplt_wide_n_s16 (p1, z0, -1), -+ p0 = svcmplt_wide (p1, z0, -1)) -+ -+/* -+** cmplt_wide_m16_s16: -+** cmplt p0\.h, p1/z, z0\.h, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_wide_m16_s16, svint16_t, -+ p0 = svcmplt_wide_n_s16 (p1, z0, -16), -+ p0 = svcmplt_wide (p1, z0, -16)) -+ -+/* -+** cmplt_wide_m17_s16: -+** mov (z[0-9]+\.d), #-17 -+** cmplt p0\.h, p1/z, z0\.h, \1 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_wide_m17_s16, svint16_t, -+ p0 = svcmplt_wide_n_s16 (p1, z0, -17), -+ p0 = svcmplt_wide (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_wide_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_wide_s32.c -new file mode 100644 -index 000000000..b2cad6773 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_wide_s32.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmplt_wide_s32_tied: -+** cmplt p0\.s, p0/z, z0\.s, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmplt_wide_s32_tied, svint32_t, svint64_t, -+ p0 = svcmplt_wide_s32 (p0, z0, z1), -+ p0 = svcmplt_wide (p0, z0, z1)) -+ -+/* -+** cmplt_wide_s32_untied: -+** cmplt p0\.s, p1/z, z0\.s, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmplt_wide_s32_untied, svint32_t, svint64_t, -+ p0 = svcmplt_wide_s32 (p1, z0, z1), -+ p0 = svcmplt_wide (p1, z0, z1)) -+ -+/* -+** cmplt_wide_x0_s32: -+** mov (z[0-9]+\.d), x0 -+** cmplt p0\.s, p1/z, z0\.s, \1 -+** ret -+*/ -+TEST_COMPARE_ZX (cmplt_wide_x0_s32, svint32_t, int64_t, -+ p0 = svcmplt_wide_n_s32 (p1, z0, x0), -+ p0 = svcmplt_wide (p1, z0, x0)) -+ -+/* -+** cmplt_wide_0_s32: -+** cmplt p0\.s, p1/z, z0\.s, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_wide_0_s32, svint32_t, -+ p0 = svcmplt_wide_n_s32 (p1, z0, 0), -+ p0 = svcmplt_wide (p1, z0, 0)) -+ -+/* -+** cmplt_wide_1_s32: -+** cmplt p0\.s, p1/z, z0\.s, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_wide_1_s32, svint32_t, -+ p0 = svcmplt_wide_n_s32 (p1, z0, 1), -+ p0 = svcmplt_wide (p1, z0, 1)) -+ -+/* -+** cmplt_wide_15_s32: -+** cmplt p0\.s, p1/z, z0\.s, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_wide_15_s32, svint32_t, -+ p0 = svcmplt_wide_n_s32 (p1, z0, 15), -+ p0 = svcmplt_wide (p1, z0, 15)) -+ -+/* -+** cmplt_wide_16_s32: -+** mov (z[0-9]+\.d), #16 -+** cmplt p0\.s, p1/z, z0\.s, \1 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_wide_16_s32, svint32_t, -+ p0 = svcmplt_wide_n_s32 (p1, z0, 16), -+ p0 = svcmplt_wide (p1, z0, 16)) -+ -+/* -+** cmplt_wide_m1_s32: -+** cmplt p0\.s, p1/z, z0\.s, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_wide_m1_s32, svint32_t, -+ p0 = svcmplt_wide_n_s32 (p1, z0, -1), -+ p0 = svcmplt_wide (p1, z0, -1)) -+ -+/* -+** cmplt_wide_m16_s32: -+** cmplt p0\.s, p1/z, z0\.s, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_wide_m16_s32, svint32_t, -+ p0 = svcmplt_wide_n_s32 (p1, z0, -16), -+ p0 = svcmplt_wide (p1, z0, -16)) -+ -+/* -+** cmplt_wide_m17_s32: -+** mov (z[0-9]+\.d), #-17 -+** cmplt p0\.s, p1/z, z0\.s, \1 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_wide_m17_s32, svint32_t, -+ p0 = svcmplt_wide_n_s32 (p1, z0, -17), -+ p0 = svcmplt_wide (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_wide_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_wide_s8.c -new file mode 100644 -index 000000000..1015fe309 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_wide_s8.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmplt_wide_s8_tied: -+** cmplt p0\.b, p0/z, z0\.b, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmplt_wide_s8_tied, svint8_t, svint64_t, -+ p0 = svcmplt_wide_s8 (p0, z0, z1), -+ p0 = svcmplt_wide (p0, z0, z1)) -+ -+/* -+** cmplt_wide_s8_untied: -+** cmplt p0\.b, p1/z, z0\.b, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmplt_wide_s8_untied, svint8_t, svint64_t, -+ p0 = svcmplt_wide_s8 (p1, z0, z1), -+ p0 = svcmplt_wide (p1, z0, z1)) -+ -+/* -+** cmplt_wide_x0_s8: -+** mov (z[0-9]+\.d), x0 -+** cmplt p0\.b, p1/z, z0\.b, \1 -+** ret -+*/ -+TEST_COMPARE_ZX (cmplt_wide_x0_s8, svint8_t, int64_t, -+ p0 = svcmplt_wide_n_s8 (p1, z0, x0), -+ p0 = svcmplt_wide (p1, z0, x0)) -+ -+/* -+** cmplt_wide_0_s8: -+** cmplt p0\.b, p1/z, z0\.b, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_wide_0_s8, svint8_t, -+ p0 = svcmplt_wide_n_s8 (p1, z0, 0), -+ p0 = svcmplt_wide (p1, z0, 0)) -+ -+/* -+** cmplt_wide_1_s8: -+** cmplt p0\.b, p1/z, z0\.b, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_wide_1_s8, svint8_t, -+ p0 = svcmplt_wide_n_s8 (p1, z0, 1), -+ p0 = svcmplt_wide (p1, z0, 1)) -+ -+/* -+** cmplt_wide_15_s8: -+** cmplt p0\.b, p1/z, z0\.b, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_wide_15_s8, svint8_t, -+ p0 = svcmplt_wide_n_s8 (p1, z0, 15), -+ p0 = svcmplt_wide (p1, z0, 15)) -+ -+/* -+** cmplt_wide_16_s8: -+** mov (z[0-9]+\.d), #16 -+** cmplt p0\.b, p1/z, z0\.b, \1 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_wide_16_s8, svint8_t, -+ p0 = svcmplt_wide_n_s8 (p1, z0, 16), -+ p0 = svcmplt_wide (p1, z0, 16)) -+ -+/* -+** cmplt_wide_m1_s8: -+** cmplt p0\.b, p1/z, z0\.b, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_wide_m1_s8, svint8_t, -+ p0 = svcmplt_wide_n_s8 (p1, z0, -1), -+ p0 = svcmplt_wide (p1, z0, -1)) -+ -+/* -+** cmplt_wide_m16_s8: -+** cmplt p0\.b, p1/z, z0\.b, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_wide_m16_s8, svint8_t, -+ p0 = svcmplt_wide_n_s8 (p1, z0, -16), -+ p0 = svcmplt_wide (p1, z0, -16)) -+ -+/* -+** cmplt_wide_m17_s8: -+** mov (z[0-9]+\.d), #-17 -+** cmplt p0\.b, p1/z, z0\.b, \1 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_wide_m17_s8, svint8_t, -+ p0 = svcmplt_wide_n_s8 (p1, z0, -17), -+ p0 = svcmplt_wide (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_wide_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_wide_u16.c -new file mode 100644 -index 000000000..851400d36 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_wide_u16.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmplt_wide_u16_tied: -+** cmplo p0\.h, p0/z, z0\.h, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmplt_wide_u16_tied, svuint16_t, svuint64_t, -+ p0 = svcmplt_wide_u16 (p0, z0, z1), -+ p0 = svcmplt_wide (p0, z0, z1)) -+ -+/* -+** cmplt_wide_u16_untied: -+** cmplo p0\.h, p1/z, z0\.h, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmplt_wide_u16_untied, svuint16_t, svuint64_t, -+ p0 = svcmplt_wide_u16 (p1, z0, z1), -+ p0 = svcmplt_wide (p1, z0, z1)) -+ -+/* -+** cmplt_wide_x0_u16: -+** mov (z[0-9]+\.d), x0 -+** cmplo p0\.h, p1/z, z0\.h, \1 -+** ret -+*/ -+TEST_COMPARE_ZX (cmplt_wide_x0_u16, svuint16_t, uint64_t, -+ p0 = svcmplt_wide_n_u16 (p1, z0, x0), -+ p0 = svcmplt_wide (p1, z0, x0)) -+ -+/* -+** cmplt_wide_0_u16: -+** cmplo p0\.h, p1/z, z0\.h, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_wide_0_u16, svuint16_t, -+ p0 = svcmplt_wide_n_u16 (p1, z0, 0), -+ p0 = svcmplt_wide (p1, z0, 0)) -+ -+/* -+** cmplt_wide_1_u16: -+** cmplo p0\.h, p1/z, z0\.h, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_wide_1_u16, svuint16_t, -+ p0 = svcmplt_wide_n_u16 (p1, z0, 1), -+ p0 = svcmplt_wide (p1, z0, 1)) -+ -+/* -+** cmplt_wide_15_u16: -+** cmplo p0\.h, p1/z, z0\.h, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_wide_15_u16, svuint16_t, -+ p0 = svcmplt_wide_n_u16 (p1, z0, 15), -+ p0 = svcmplt_wide (p1, z0, 15)) -+ -+/* -+** cmplt_wide_16_u16: -+** cmplo p0\.h, p1/z, z0\.h, #16 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_wide_16_u16, svuint16_t, -+ p0 = svcmplt_wide_n_u16 (p1, z0, 16), -+ p0 = svcmplt_wide (p1, z0, 16)) -+ -+/* -+** cmplt_wide_127_u16: -+** cmplo p0\.h, p1/z, z0\.h, #127 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_wide_127_u16, svuint16_t, -+ p0 = svcmplt_wide_n_u16 (p1, z0, 127), -+ p0 = svcmplt_wide (p1, z0, 127)) -+ -+/* -+** cmplt_wide_128_u16: -+** mov (z[0-9]+\.d), #128 -+** cmplo p0\.h, p1/z, z0\.h, \1 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_wide_128_u16, svuint16_t, -+ p0 = svcmplt_wide_n_u16 (p1, z0, 128), -+ p0 = svcmplt_wide (p1, z0, 128)) -+ -+/* -+** cmplt_wide_m1_u16: -+** mov (z[0-9]+)\.b, #-1 -+** cmplo p0\.h, p1/z, z0\.h, \1\.d -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_wide_m1_u16, svuint16_t, -+ p0 = svcmplt_wide_n_u16 (p1, z0, -1), -+ p0 = svcmplt_wide (p1, z0, -1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_wide_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_wide_u32.c -new file mode 100644 -index 000000000..1f9652def ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_wide_u32.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmplt_wide_u32_tied: -+** cmplo p0\.s, p0/z, z0\.s, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmplt_wide_u32_tied, svuint32_t, svuint64_t, -+ p0 = svcmplt_wide_u32 (p0, z0, z1), -+ p0 = svcmplt_wide (p0, z0, z1)) -+ -+/* -+** cmplt_wide_u32_untied: -+** cmplo p0\.s, p1/z, z0\.s, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmplt_wide_u32_untied, svuint32_t, svuint64_t, -+ p0 = svcmplt_wide_u32 (p1, z0, z1), -+ p0 = svcmplt_wide (p1, z0, z1)) -+ -+/* -+** cmplt_wide_x0_u32: -+** mov (z[0-9]+\.d), x0 -+** cmplo p0\.s, p1/z, z0\.s, \1 -+** ret -+*/ -+TEST_COMPARE_ZX (cmplt_wide_x0_u32, svuint32_t, uint64_t, -+ p0 = svcmplt_wide_n_u32 (p1, z0, x0), -+ p0 = svcmplt_wide (p1, z0, x0)) -+ -+/* -+** cmplt_wide_0_u32: -+** cmplo p0\.s, p1/z, z0\.s, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_wide_0_u32, svuint32_t, -+ p0 = svcmplt_wide_n_u32 (p1, z0, 0), -+ p0 = svcmplt_wide (p1, z0, 0)) -+ -+/* -+** cmplt_wide_1_u32: -+** cmplo p0\.s, p1/z, z0\.s, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_wide_1_u32, svuint32_t, -+ p0 = svcmplt_wide_n_u32 (p1, z0, 1), -+ p0 = svcmplt_wide (p1, z0, 1)) -+ -+/* -+** cmplt_wide_15_u32: -+** cmplo p0\.s, p1/z, z0\.s, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_wide_15_u32, svuint32_t, -+ p0 = svcmplt_wide_n_u32 (p1, z0, 15), -+ p0 = svcmplt_wide (p1, z0, 15)) -+ -+/* -+** cmplt_wide_16_u32: -+** cmplo p0\.s, p1/z, z0\.s, #16 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_wide_16_u32, svuint32_t, -+ p0 = svcmplt_wide_n_u32 (p1, z0, 16), -+ p0 = svcmplt_wide (p1, z0, 16)) -+ -+/* -+** cmplt_wide_127_u32: -+** cmplo p0\.s, p1/z, z0\.s, #127 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_wide_127_u32, svuint32_t, -+ p0 = svcmplt_wide_n_u32 (p1, z0, 127), -+ p0 = svcmplt_wide (p1, z0, 127)) -+ -+/* -+** cmplt_wide_128_u32: -+** mov (z[0-9]+\.d), #128 -+** cmplo p0\.s, p1/z, z0\.s, \1 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_wide_128_u32, svuint32_t, -+ p0 = svcmplt_wide_n_u32 (p1, z0, 128), -+ p0 = svcmplt_wide (p1, z0, 128)) -+ -+/* -+** cmplt_wide_m1_u32: -+** mov (z[0-9]+)\.b, #-1 -+** cmplo p0\.s, p1/z, z0\.s, \1\.d -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_wide_m1_u32, svuint32_t, -+ p0 = svcmplt_wide_n_u32 (p1, z0, -1), -+ p0 = svcmplt_wide (p1, z0, -1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_wide_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_wide_u8.c -new file mode 100644 -index 000000000..95ef3cf16 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmplt_wide_u8.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmplt_wide_u8_tied: -+** cmplo p0\.b, p0/z, z0\.b, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmplt_wide_u8_tied, svuint8_t, svuint64_t, -+ p0 = svcmplt_wide_u8 (p0, z0, z1), -+ p0 = svcmplt_wide (p0, z0, z1)) -+ -+/* -+** cmplt_wide_u8_untied: -+** cmplo p0\.b, p1/z, z0\.b, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmplt_wide_u8_untied, svuint8_t, svuint64_t, -+ p0 = svcmplt_wide_u8 (p1, z0, z1), -+ p0 = svcmplt_wide (p1, z0, z1)) -+ -+/* -+** cmplt_wide_x0_u8: -+** mov (z[0-9]+\.d), x0 -+** cmplo p0\.b, p1/z, z0\.b, \1 -+** ret -+*/ -+TEST_COMPARE_ZX (cmplt_wide_x0_u8, svuint8_t, uint64_t, -+ p0 = svcmplt_wide_n_u8 (p1, z0, x0), -+ p0 = svcmplt_wide (p1, z0, x0)) -+ -+/* -+** cmplt_wide_0_u8: -+** cmplo p0\.b, p1/z, z0\.b, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_wide_0_u8, svuint8_t, -+ p0 = svcmplt_wide_n_u8 (p1, z0, 0), -+ p0 = svcmplt_wide (p1, z0, 0)) -+ -+/* -+** cmplt_wide_1_u8: -+** cmplo p0\.b, p1/z, z0\.b, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_wide_1_u8, svuint8_t, -+ p0 = svcmplt_wide_n_u8 (p1, z0, 1), -+ p0 = svcmplt_wide (p1, z0, 1)) -+ -+/* -+** cmplt_wide_15_u8: -+** cmplo p0\.b, p1/z, z0\.b, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_wide_15_u8, svuint8_t, -+ p0 = svcmplt_wide_n_u8 (p1, z0, 15), -+ p0 = svcmplt_wide (p1, z0, 15)) -+ -+/* -+** cmplt_wide_16_u8: -+** cmplo p0\.b, p1/z, z0\.b, #16 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_wide_16_u8, svuint8_t, -+ p0 = svcmplt_wide_n_u8 (p1, z0, 16), -+ p0 = svcmplt_wide (p1, z0, 16)) -+ -+/* -+** cmplt_wide_127_u8: -+** cmplo p0\.b, p1/z, z0\.b, #127 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_wide_127_u8, svuint8_t, -+ p0 = svcmplt_wide_n_u8 (p1, z0, 127), -+ p0 = svcmplt_wide (p1, z0, 127)) -+ -+/* -+** cmplt_wide_128_u8: -+** mov (z[0-9]+\.d), #128 -+** cmplo p0\.b, p1/z, z0\.b, \1 -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_wide_128_u8, svuint8_t, -+ p0 = svcmplt_wide_n_u8 (p1, z0, 128), -+ p0 = svcmplt_wide (p1, z0, 128)) -+ -+/* -+** cmplt_wide_m1_u8: -+** mov (z[0-9]+)\.b, #-1 -+** cmplo p0\.b, p1/z, z0\.b, \1\.d -+** ret -+*/ -+TEST_COMPARE_Z (cmplt_wide_m1_u8, svuint8_t, -+ p0 = svcmplt_wide_n_u8 (p1, z0, -1), -+ p0 = svcmplt_wide (p1, z0, -1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_f16.c -new file mode 100644 -index 000000000..63e203b09 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_f16.c -@@ -0,0 +1,50 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpne_f16_tied: -+** fcmne p0\.h, p0/z, (z0\.h, z1\.h|z1\.h, z0\.h) -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_f16_tied, svfloat16_t, -+ p0 = svcmpne_f16 (p0, z0, z1), -+ p0 = svcmpne (p0, z0, z1)) -+ -+/* -+** cmpne_f16_untied: -+** fcmne p0\.h, p1/z, (z0\.h, z1\.h|z1\.h, z0\.h) -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_f16_untied, svfloat16_t, -+ p0 = svcmpne_f16 (p1, z0, z1), -+ p0 = svcmpne (p1, z0, z1)) -+ -+/* -+** cmpne_h4_f16: -+** mov (z[0-9]+\.h), h4 -+** fcmne p0\.h, p1/z, (z0\.h, \1|\1, z0\.h) -+** ret -+*/ -+TEST_COMPARE_ZD (cmpne_h4_f16, svfloat16_t, float16_t, -+ p0 = svcmpne_n_f16 (p1, z0, d4), -+ p0 = svcmpne (p1, z0, d4)) -+ -+/* -+** cmpne_0_f16: -+** fcmne p0\.h, p1/z, z0\.h, #0\.0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_0_f16, svfloat16_t, -+ p0 = svcmpne_n_f16 (p1, z0, 0), -+ p0 = svcmpne (p1, z0, 0)) -+ -+/* -+** cmpne_1_f16: -+** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? -+** fcmne p0\.h, p1/z, (z0\.h, \1|\1, z0\.h) -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_1_f16, svfloat16_t, -+ p0 = svcmpne_n_f16 (p1, z0, 1), -+ p0 = svcmpne (p1, z0, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_f32.c -new file mode 100644 -index 000000000..f81e2da51 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_f32.c -@@ -0,0 +1,50 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpne_f32_tied: -+** fcmne p0\.s, p0/z, (z0\.s, z1\.s|z1\.s, z0\.s) -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_f32_tied, svfloat32_t, -+ p0 = svcmpne_f32 (p0, z0, z1), -+ p0 = svcmpne (p0, z0, z1)) -+ -+/* -+** cmpne_f32_untied: -+** fcmne p0\.s, p1/z, (z0\.s, z1\.s|z1\.s, z0\.s) -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_f32_untied, svfloat32_t, -+ p0 = svcmpne_f32 (p1, z0, z1), -+ p0 = svcmpne (p1, z0, z1)) -+ -+/* -+** cmpne_s4_f32: -+** mov (z[0-9]+\.s), s4 -+** fcmne p0\.s, p1/z, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_COMPARE_ZD (cmpne_s4_f32, svfloat32_t, float32_t, -+ p0 = svcmpne_n_f32 (p1, z0, d4), -+ p0 = svcmpne (p1, z0, d4)) -+ -+/* -+** cmpne_0_f32: -+** fcmne p0\.s, p1/z, z0\.s, #0\.0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_0_f32, svfloat32_t, -+ p0 = svcmpne_n_f32 (p1, z0, 0), -+ p0 = svcmpne (p1, z0, 0)) -+ -+/* -+** cmpne_1_f32: -+** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? -+** fcmne p0\.s, p1/z, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_1_f32, svfloat32_t, -+ p0 = svcmpne_n_f32 (p1, z0, 1), -+ p0 = svcmpne (p1, z0, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_f64.c -new file mode 100644 -index 000000000..22e4eeef4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_f64.c -@@ -0,0 +1,50 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpne_f64_tied: -+** fcmne p0\.d, p0/z, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_f64_tied, svfloat64_t, -+ p0 = svcmpne_f64 (p0, z0, z1), -+ p0 = svcmpne (p0, z0, z1)) -+ -+/* -+** cmpne_f64_untied: -+** fcmne p0\.d, p1/z, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_f64_untied, svfloat64_t, -+ p0 = svcmpne_f64 (p1, z0, z1), -+ p0 = svcmpne (p1, z0, z1)) -+ -+/* -+** cmpne_d4_f64: -+** mov (z[0-9]+\.d), d4 -+** fcmne p0\.d, p1/z, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_COMPARE_ZD (cmpne_d4_f64, svfloat64_t, float64_t, -+ p0 = svcmpne_n_f64 (p1, z0, d4), -+ p0 = svcmpne (p1, z0, d4)) -+ -+/* -+** cmpne_0_f64: -+** fcmne p0\.d, p1/z, z0\.d, #0\.0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_0_f64, svfloat64_t, -+ p0 = svcmpne_n_f64 (p1, z0, 0), -+ p0 = svcmpne (p1, z0, 0)) -+ -+/* -+** cmpne_1_f64: -+** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? -+** fcmne p0\.d, p1/z, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_1_f64, svfloat64_t, -+ p0 = svcmpne_n_f64 (p1, z0, 1), -+ p0 = svcmpne (p1, z0, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_s16.c -new file mode 100644 -index 000000000..d8c743f8b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_s16.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpne_s16_tied: -+** cmpne p0\.h, p0/z, (z0\.h, z1\.h|z1\.h, z0\.h) -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_s16_tied, svint16_t, -+ p0 = svcmpne_s16 (p0, z0, z1), -+ p0 = svcmpne (p0, z0, z1)) -+ -+/* -+** cmpne_s16_untied: -+** cmpne p0\.h, p1/z, (z0\.h, z1\.h|z1\.h, z0\.h) -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_s16_untied, svint16_t, -+ p0 = svcmpne_s16 (p1, z0, z1), -+ p0 = svcmpne (p1, z0, z1)) -+ -+/* -+** cmpne_w0_s16: -+** mov (z[0-9]+\.h), w0 -+** cmpne p0\.h, p1/z, (z0\.h, \1|\1, z0\.h) -+** ret -+*/ -+TEST_COMPARE_ZX (cmpne_w0_s16, svint16_t, int16_t, -+ p0 = svcmpne_n_s16 (p1, z0, x0), -+ p0 = svcmpne (p1, z0, x0)) -+ -+/* -+** cmpne_0_s16: -+** cmpne p0\.h, p1/z, z0\.h, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_0_s16, svint16_t, -+ p0 = svcmpne_n_s16 (p1, z0, 0), -+ p0 = svcmpne (p1, z0, 0)) -+ -+/* -+** cmpne_1_s16: -+** cmpne p0\.h, p1/z, z0\.h, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_1_s16, svint16_t, -+ p0 = svcmpne_n_s16 (p1, z0, 1), -+ p0 = svcmpne (p1, z0, 1)) -+ -+/* -+** cmpne_15_s16: -+** cmpne p0\.h, p1/z, z0\.h, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_15_s16, svint16_t, -+ p0 = svcmpne_n_s16 (p1, z0, 15), -+ p0 = svcmpne (p1, z0, 15)) -+ -+/* -+** cmpne_16_s16: -+** mov (z[0-9]+\.h), #16 -+** cmpne p0\.h, p1/z, (z0\.h, \1|\1, z0\.h) -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_16_s16, svint16_t, -+ p0 = svcmpne_n_s16 (p1, z0, 16), -+ p0 = svcmpne (p1, z0, 16)) -+ -+/* -+** cmpne_m1_s16: -+** cmpne p0\.h, p1/z, z0\.h, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_m1_s16, svint16_t, -+ p0 = svcmpne_n_s16 (p1, z0, -1), -+ p0 = svcmpne (p1, z0, -1)) -+ -+/* -+** cmpne_m16_s16: -+** cmpne p0\.h, p1/z, z0\.h, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_m16_s16, svint16_t, -+ p0 = svcmpne_n_s16 (p1, z0, -16), -+ p0 = svcmpne (p1, z0, -16)) -+ -+/* -+** cmpne_m17_s16: -+** mov (z[0-9]+\.h), #-17 -+** cmpne p0\.h, p1/z, (z0\.h, \1|\1, z0\.h) -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_m17_s16, svint16_t, -+ p0 = svcmpne_n_s16 (p1, z0, -17), -+ p0 = svcmpne (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_s32.c -new file mode 100644 -index 000000000..0d3c35111 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_s32.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpne_s32_tied: -+** cmpne p0\.s, p0/z, (z0\.s, z1\.s|z1\.s, z0\.s) -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_s32_tied, svint32_t, -+ p0 = svcmpne_s32 (p0, z0, z1), -+ p0 = svcmpne (p0, z0, z1)) -+ -+/* -+** cmpne_s32_untied: -+** cmpne p0\.s, p1/z, (z0\.s, z1\.s|z1\.s, z0\.s) -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_s32_untied, svint32_t, -+ p0 = svcmpne_s32 (p1, z0, z1), -+ p0 = svcmpne (p1, z0, z1)) -+ -+/* -+** cmpne_w0_s32: -+** mov (z[0-9]+\.s), w0 -+** cmpne p0\.s, p1/z, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_COMPARE_ZX (cmpne_w0_s32, svint32_t, int32_t, -+ p0 = svcmpne_n_s32 (p1, z0, x0), -+ p0 = svcmpne (p1, z0, x0)) -+ -+/* -+** cmpne_0_s32: -+** cmpne p0\.s, p1/z, z0\.s, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_0_s32, svint32_t, -+ p0 = svcmpne_n_s32 (p1, z0, 0), -+ p0 = svcmpne (p1, z0, 0)) -+ -+/* -+** cmpne_1_s32: -+** cmpne p0\.s, p1/z, z0\.s, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_1_s32, svint32_t, -+ p0 = svcmpne_n_s32 (p1, z0, 1), -+ p0 = svcmpne (p1, z0, 1)) -+ -+/* -+** cmpne_15_s32: -+** cmpne p0\.s, p1/z, z0\.s, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_15_s32, svint32_t, -+ p0 = svcmpne_n_s32 (p1, z0, 15), -+ p0 = svcmpne (p1, z0, 15)) -+ -+/* -+** cmpne_16_s32: -+** mov (z[0-9]+\.s), #16 -+** cmpne p0\.s, p1/z, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_16_s32, svint32_t, -+ p0 = svcmpne_n_s32 (p1, z0, 16), -+ p0 = svcmpne (p1, z0, 16)) -+ -+/* -+** cmpne_m1_s32: -+** cmpne p0\.s, p1/z, z0\.s, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_m1_s32, svint32_t, -+ p0 = svcmpne_n_s32 (p1, z0, -1), -+ p0 = svcmpne (p1, z0, -1)) -+ -+/* -+** cmpne_m16_s32: -+** cmpne p0\.s, p1/z, z0\.s, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_m16_s32, svint32_t, -+ p0 = svcmpne_n_s32 (p1, z0, -16), -+ p0 = svcmpne (p1, z0, -16)) -+ -+/* -+** cmpne_m17_s32: -+** mov (z[0-9]+\.s), #-17 -+** cmpne p0\.s, p1/z, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_m17_s32, svint32_t, -+ p0 = svcmpne_n_s32 (p1, z0, -17), -+ p0 = svcmpne (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_s64.c -new file mode 100644 -index 000000000..4cf78f2dd ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_s64.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpne_s64_tied: -+** cmpne p0\.d, p0/z, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_s64_tied, svint64_t, -+ p0 = svcmpne_s64 (p0, z0, z1), -+ p0 = svcmpne (p0, z0, z1)) -+ -+/* -+** cmpne_s64_untied: -+** cmpne p0\.d, p1/z, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_s64_untied, svint64_t, -+ p0 = svcmpne_s64 (p1, z0, z1), -+ p0 = svcmpne (p1, z0, z1)) -+ -+/* -+** cmpne_x0_s64: -+** mov (z[0-9]+\.d), x0 -+** cmpne p0\.d, p1/z, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_COMPARE_ZX (cmpne_x0_s64, svint64_t, int64_t, -+ p0 = svcmpne_n_s64 (p1, z0, x0), -+ p0 = svcmpne (p1, z0, x0)) -+ -+/* -+** cmpne_0_s64: -+** cmpne p0\.d, p1/z, z0\.d, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_0_s64, svint64_t, -+ p0 = svcmpne_n_s64 (p1, z0, 0), -+ p0 = svcmpne (p1, z0, 0)) -+ -+/* -+** cmpne_1_s64: -+** cmpne p0\.d, p1/z, z0\.d, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_1_s64, svint64_t, -+ p0 = svcmpne_n_s64 (p1, z0, 1), -+ p0 = svcmpne (p1, z0, 1)) -+ -+/* -+** cmpne_15_s64: -+** cmpne p0\.d, p1/z, z0\.d, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_15_s64, svint64_t, -+ p0 = svcmpne_n_s64 (p1, z0, 15), -+ p0 = svcmpne (p1, z0, 15)) -+ -+/* -+** cmpne_16_s64: -+** mov (z[0-9]+\.d), #16 -+** cmpne p0\.d, p1/z, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_16_s64, svint64_t, -+ p0 = svcmpne_n_s64 (p1, z0, 16), -+ p0 = svcmpne (p1, z0, 16)) -+ -+/* -+** cmpne_m1_s64: -+** cmpne p0\.d, p1/z, z0\.d, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_m1_s64, svint64_t, -+ p0 = svcmpne_n_s64 (p1, z0, -1), -+ p0 = svcmpne (p1, z0, -1)) -+ -+/* -+** cmpne_m16_s64: -+** cmpne p0\.d, p1/z, z0\.d, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_m16_s64, svint64_t, -+ p0 = svcmpne_n_s64 (p1, z0, -16), -+ p0 = svcmpne (p1, z0, -16)) -+ -+/* -+** cmpne_m17_s64: -+** mov (z[0-9]+\.d), #-17 -+** cmpne p0\.d, p1/z, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_m17_s64, svint64_t, -+ p0 = svcmpne_n_s64 (p1, z0, -17), -+ p0 = svcmpne (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_s8.c -new file mode 100644 -index 000000000..6409ecdd4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_s8.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpne_s8_tied: -+** cmpne p0\.b, p0/z, (z0\.b, z1\.b|z1\.b, z0\.b) -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_s8_tied, svint8_t, -+ p0 = svcmpne_s8 (p0, z0, z1), -+ p0 = svcmpne (p0, z0, z1)) -+ -+/* -+** cmpne_s8_untied: -+** cmpne p0\.b, p1/z, (z0\.b, z1\.b|z1\.b, z0\.b) -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_s8_untied, svint8_t, -+ p0 = svcmpne_s8 (p1, z0, z1), -+ p0 = svcmpne (p1, z0, z1)) -+ -+/* -+** cmpne_w0_s8: -+** mov (z[0-9]+\.b), w0 -+** cmpne p0\.b, p1/z, (z0\.b, \1|\1, z0\.b) -+** ret -+*/ -+TEST_COMPARE_ZX (cmpne_w0_s8, svint8_t, int8_t, -+ p0 = svcmpne_n_s8 (p1, z0, x0), -+ p0 = svcmpne (p1, z0, x0)) -+ -+/* -+** cmpne_0_s8: -+** cmpne p0\.b, p1/z, z0\.b, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_0_s8, svint8_t, -+ p0 = svcmpne_n_s8 (p1, z0, 0), -+ p0 = svcmpne (p1, z0, 0)) -+ -+/* -+** cmpne_1_s8: -+** cmpne p0\.b, p1/z, z0\.b, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_1_s8, svint8_t, -+ p0 = svcmpne_n_s8 (p1, z0, 1), -+ p0 = svcmpne (p1, z0, 1)) -+ -+/* -+** cmpne_15_s8: -+** cmpne p0\.b, p1/z, z0\.b, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_15_s8, svint8_t, -+ p0 = svcmpne_n_s8 (p1, z0, 15), -+ p0 = svcmpne (p1, z0, 15)) -+ -+/* -+** cmpne_16_s8: -+** mov (z[0-9]+\.b), #16 -+** cmpne p0\.b, p1/z, (z0\.b, \1|\1, z0\.b) -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_16_s8, svint8_t, -+ p0 = svcmpne_n_s8 (p1, z0, 16), -+ p0 = svcmpne (p1, z0, 16)) -+ -+/* -+** cmpne_m1_s8: -+** cmpne p0\.b, p1/z, z0\.b, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_m1_s8, svint8_t, -+ p0 = svcmpne_n_s8 (p1, z0, -1), -+ p0 = svcmpne (p1, z0, -1)) -+ -+/* -+** cmpne_m16_s8: -+** cmpne p0\.b, p1/z, z0\.b, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_m16_s8, svint8_t, -+ p0 = svcmpne_n_s8 (p1, z0, -16), -+ p0 = svcmpne (p1, z0, -16)) -+ -+/* -+** cmpne_m17_s8: -+** mov (z[0-9]+\.b), #-17 -+** cmpne p0\.b, p1/z, (z0\.b, \1|\1, z0\.b) -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_m17_s8, svint8_t, -+ p0 = svcmpne_n_s8 (p1, z0, -17), -+ p0 = svcmpne (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_u16.c -new file mode 100644 -index 000000000..4d22bc7d3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_u16.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpne_u16_tied: -+** cmpne p0\.h, p0/z, (z0\.h, z1\.h|z1\.h, z0\.h) -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_u16_tied, svuint16_t, -+ p0 = svcmpne_u16 (p0, z0, z1), -+ p0 = svcmpne (p0, z0, z1)) -+ -+/* -+** cmpne_u16_untied: -+** cmpne p0\.h, p1/z, (z0\.h, z1\.h|z1\.h, z0\.h) -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_u16_untied, svuint16_t, -+ p0 = svcmpne_u16 (p1, z0, z1), -+ p0 = svcmpne (p1, z0, z1)) -+ -+/* -+** cmpne_w0_u16: -+** mov (z[0-9]+\.h), w0 -+** cmpne p0\.h, p1/z, (z0\.h, \1|\1, z0\.h) -+** ret -+*/ -+TEST_COMPARE_ZX (cmpne_w0_u16, svuint16_t, uint16_t, -+ p0 = svcmpne_n_u16 (p1, z0, x0), -+ p0 = svcmpne (p1, z0, x0)) -+ -+/* -+** cmpne_0_u16: -+** cmpne p0\.h, p1/z, z0\.h, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_0_u16, svuint16_t, -+ p0 = svcmpne_n_u16 (p1, z0, 0), -+ p0 = svcmpne (p1, z0, 0)) -+ -+/* -+** cmpne_1_u16: -+** cmpne p0\.h, p1/z, z0\.h, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_1_u16, svuint16_t, -+ p0 = svcmpne_n_u16 (p1, z0, 1), -+ p0 = svcmpne (p1, z0, 1)) -+ -+/* -+** cmpne_15_u16: -+** cmpne p0\.h, p1/z, z0\.h, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_15_u16, svuint16_t, -+ p0 = svcmpne_n_u16 (p1, z0, 15), -+ p0 = svcmpne (p1, z0, 15)) -+ -+/* -+** cmpne_16_u16: -+** mov (z[0-9]+\.h), #16 -+** cmpne p0\.h, p1/z, (z0\.h, \1|\1, z0\.h) -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_16_u16, svuint16_t, -+ p0 = svcmpne_n_u16 (p1, z0, 16), -+ p0 = svcmpne (p1, z0, 16)) -+ -+/* -+** cmpne_m1_u16: -+** cmpne p0\.h, p1/z, z0\.h, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_m1_u16, svuint16_t, -+ p0 = svcmpne_n_u16 (p1, z0, -1), -+ p0 = svcmpne (p1, z0, -1)) -+ -+/* -+** cmpne_m16_u16: -+** cmpne p0\.h, p1/z, z0\.h, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_m16_u16, svuint16_t, -+ p0 = svcmpne_n_u16 (p1, z0, -16), -+ p0 = svcmpne (p1, z0, -16)) -+ -+/* -+** cmpne_m17_u16: -+** mov (z[0-9]+\.h), #-17 -+** cmpne p0\.h, p1/z, (z0\.h, \1|\1, z0\.h) -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_m17_u16, svuint16_t, -+ p0 = svcmpne_n_u16 (p1, z0, -17), -+ p0 = svcmpne (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_u32.c -new file mode 100644 -index 000000000..b7ca94a69 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_u32.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpne_u32_tied: -+** cmpne p0\.s, p0/z, (z0\.s, z1\.s|z1\.s, z0\.s) -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_u32_tied, svuint32_t, -+ p0 = svcmpne_u32 (p0, z0, z1), -+ p0 = svcmpne (p0, z0, z1)) -+ -+/* -+** cmpne_u32_untied: -+** cmpne p0\.s, p1/z, (z0\.s, z1\.s|z1\.s, z0\.s) -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_u32_untied, svuint32_t, -+ p0 = svcmpne_u32 (p1, z0, z1), -+ p0 = svcmpne (p1, z0, z1)) -+ -+/* -+** cmpne_w0_u32: -+** mov (z[0-9]+\.s), w0 -+** cmpne p0\.s, p1/z, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_COMPARE_ZX (cmpne_w0_u32, svuint32_t, uint32_t, -+ p0 = svcmpne_n_u32 (p1, z0, x0), -+ p0 = svcmpne (p1, z0, x0)) -+ -+/* -+** cmpne_0_u32: -+** cmpne p0\.s, p1/z, z0\.s, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_0_u32, svuint32_t, -+ p0 = svcmpne_n_u32 (p1, z0, 0), -+ p0 = svcmpne (p1, z0, 0)) -+ -+/* -+** cmpne_1_u32: -+** cmpne p0\.s, p1/z, z0\.s, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_1_u32, svuint32_t, -+ p0 = svcmpne_n_u32 (p1, z0, 1), -+ p0 = svcmpne (p1, z0, 1)) -+ -+/* -+** cmpne_15_u32: -+** cmpne p0\.s, p1/z, z0\.s, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_15_u32, svuint32_t, -+ p0 = svcmpne_n_u32 (p1, z0, 15), -+ p0 = svcmpne (p1, z0, 15)) -+ -+/* -+** cmpne_16_u32: -+** mov (z[0-9]+\.s), #16 -+** cmpne p0\.s, p1/z, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_16_u32, svuint32_t, -+ p0 = svcmpne_n_u32 (p1, z0, 16), -+ p0 = svcmpne (p1, z0, 16)) -+ -+/* -+** cmpne_m1_u32: -+** cmpne p0\.s, p1/z, z0\.s, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_m1_u32, svuint32_t, -+ p0 = svcmpne_n_u32 (p1, z0, -1), -+ p0 = svcmpne (p1, z0, -1)) -+ -+/* -+** cmpne_m16_u32: -+** cmpne p0\.s, p1/z, z0\.s, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_m16_u32, svuint32_t, -+ p0 = svcmpne_n_u32 (p1, z0, -16), -+ p0 = svcmpne (p1, z0, -16)) -+ -+/* -+** cmpne_m17_u32: -+** mov (z[0-9]+\.s), #-17 -+** cmpne p0\.s, p1/z, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_m17_u32, svuint32_t, -+ p0 = svcmpne_n_u32 (p1, z0, -17), -+ p0 = svcmpne (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_u64.c -new file mode 100644 -index 000000000..960ac85b1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_u64.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpne_u64_tied: -+** cmpne p0\.d, p0/z, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_u64_tied, svuint64_t, -+ p0 = svcmpne_u64 (p0, z0, z1), -+ p0 = svcmpne (p0, z0, z1)) -+ -+/* -+** cmpne_u64_untied: -+** cmpne p0\.d, p1/z, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_u64_untied, svuint64_t, -+ p0 = svcmpne_u64 (p1, z0, z1), -+ p0 = svcmpne (p1, z0, z1)) -+ -+/* -+** cmpne_x0_u64: -+** mov (z[0-9]+\.d), x0 -+** cmpne p0\.d, p1/z, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_COMPARE_ZX (cmpne_x0_u64, svuint64_t, uint64_t, -+ p0 = svcmpne_n_u64 (p1, z0, x0), -+ p0 = svcmpne (p1, z0, x0)) -+ -+/* -+** cmpne_0_u64: -+** cmpne p0\.d, p1/z, z0\.d, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_0_u64, svuint64_t, -+ p0 = svcmpne_n_u64 (p1, z0, 0), -+ p0 = svcmpne (p1, z0, 0)) -+ -+/* -+** cmpne_1_u64: -+** cmpne p0\.d, p1/z, z0\.d, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_1_u64, svuint64_t, -+ p0 = svcmpne_n_u64 (p1, z0, 1), -+ p0 = svcmpne (p1, z0, 1)) -+ -+/* -+** cmpne_15_u64: -+** cmpne p0\.d, p1/z, z0\.d, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_15_u64, svuint64_t, -+ p0 = svcmpne_n_u64 (p1, z0, 15), -+ p0 = svcmpne (p1, z0, 15)) -+ -+/* -+** cmpne_16_u64: -+** mov (z[0-9]+\.d), #16 -+** cmpne p0\.d, p1/z, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_16_u64, svuint64_t, -+ p0 = svcmpne_n_u64 (p1, z0, 16), -+ p0 = svcmpne (p1, z0, 16)) -+ -+/* -+** cmpne_m1_u64: -+** cmpne p0\.d, p1/z, z0\.d, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_m1_u64, svuint64_t, -+ p0 = svcmpne_n_u64 (p1, z0, -1), -+ p0 = svcmpne (p1, z0, -1)) -+ -+/* -+** cmpne_m16_u64: -+** cmpne p0\.d, p1/z, z0\.d, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_m16_u64, svuint64_t, -+ p0 = svcmpne_n_u64 (p1, z0, -16), -+ p0 = svcmpne (p1, z0, -16)) -+ -+/* -+** cmpne_m17_u64: -+** mov (z[0-9]+\.d), #-17 -+** cmpne p0\.d, p1/z, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_m17_u64, svuint64_t, -+ p0 = svcmpne_n_u64 (p1, z0, -17), -+ p0 = svcmpne (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_u8.c -new file mode 100644 -index 000000000..cb8496eab ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_u8.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpne_u8_tied: -+** cmpne p0\.b, p0/z, (z0\.b, z1\.b|z1\.b, z0\.b) -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_u8_tied, svuint8_t, -+ p0 = svcmpne_u8 (p0, z0, z1), -+ p0 = svcmpne (p0, z0, z1)) -+ -+/* -+** cmpne_u8_untied: -+** cmpne p0\.b, p1/z, (z0\.b, z1\.b|z1\.b, z0\.b) -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_u8_untied, svuint8_t, -+ p0 = svcmpne_u8 (p1, z0, z1), -+ p0 = svcmpne (p1, z0, z1)) -+ -+/* -+** cmpne_w0_u8: -+** mov (z[0-9]+\.b), w0 -+** cmpne p0\.b, p1/z, (z0\.b, \1|\1, z0\.b) -+** ret -+*/ -+TEST_COMPARE_ZX (cmpne_w0_u8, svuint8_t, uint8_t, -+ p0 = svcmpne_n_u8 (p1, z0, x0), -+ p0 = svcmpne (p1, z0, x0)) -+ -+/* -+** cmpne_0_u8: -+** cmpne p0\.b, p1/z, z0\.b, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_0_u8, svuint8_t, -+ p0 = svcmpne_n_u8 (p1, z0, 0), -+ p0 = svcmpne (p1, z0, 0)) -+ -+/* -+** cmpne_1_u8: -+** cmpne p0\.b, p1/z, z0\.b, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_1_u8, svuint8_t, -+ p0 = svcmpne_n_u8 (p1, z0, 1), -+ p0 = svcmpne (p1, z0, 1)) -+ -+/* -+** cmpne_15_u8: -+** cmpne p0\.b, p1/z, z0\.b, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_15_u8, svuint8_t, -+ p0 = svcmpne_n_u8 (p1, z0, 15), -+ p0 = svcmpne (p1, z0, 15)) -+ -+/* -+** cmpne_16_u8: -+** mov (z[0-9]+\.b), #16 -+** cmpne p0\.b, p1/z, (z0\.b, \1|\1, z0\.b) -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_16_u8, svuint8_t, -+ p0 = svcmpne_n_u8 (p1, z0, 16), -+ p0 = svcmpne (p1, z0, 16)) -+ -+/* -+** cmpne_m1_u8: -+** cmpne p0\.b, p1/z, z0\.b, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_m1_u8, svuint8_t, -+ p0 = svcmpne_n_u8 (p1, z0, -1), -+ p0 = svcmpne (p1, z0, -1)) -+ -+/* -+** cmpne_m16_u8: -+** cmpne p0\.b, p1/z, z0\.b, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_m16_u8, svuint8_t, -+ p0 = svcmpne_n_u8 (p1, z0, -16), -+ p0 = svcmpne (p1, z0, -16)) -+ -+/* -+** cmpne_m17_u8: -+** mov (z[0-9]+\.b), #-17 -+** cmpne p0\.b, p1/z, (z0\.b, \1|\1, z0\.b) -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_m17_u8, svuint8_t, -+ p0 = svcmpne_n_u8 (p1, z0, -17), -+ p0 = svcmpne (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_wide_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_wide_s16.c -new file mode 100644 -index 000000000..4cb7586c9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_wide_s16.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpne_wide_s16_tied: -+** cmpne p0\.h, p0/z, z0\.h, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmpne_wide_s16_tied, svint16_t, svint64_t, -+ p0 = svcmpne_wide_s16 (p0, z0, z1), -+ p0 = svcmpne_wide (p0, z0, z1)) -+ -+/* -+** cmpne_wide_s16_untied: -+** cmpne p0\.h, p1/z, z0\.h, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmpne_wide_s16_untied, svint16_t, svint64_t, -+ p0 = svcmpne_wide_s16 (p1, z0, z1), -+ p0 = svcmpne_wide (p1, z0, z1)) -+ -+/* -+** cmpne_wide_x0_s16: -+** mov (z[0-9]+\.d), x0 -+** cmpne p0\.h, p1/z, z0\.h, \1 -+** ret -+*/ -+TEST_COMPARE_ZX (cmpne_wide_x0_s16, svint16_t, int64_t, -+ p0 = svcmpne_wide_n_s16 (p1, z0, x0), -+ p0 = svcmpne_wide (p1, z0, x0)) -+ -+/* -+** cmpne_wide_0_s16: -+** cmpne p0\.h, p1/z, z0\.h, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_wide_0_s16, svint16_t, -+ p0 = svcmpne_wide_n_s16 (p1, z0, 0), -+ p0 = svcmpne_wide (p1, z0, 0)) -+ -+/* -+** cmpne_wide_1_s16: -+** cmpne p0\.h, p1/z, z0\.h, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_wide_1_s16, svint16_t, -+ p0 = svcmpne_wide_n_s16 (p1, z0, 1), -+ p0 = svcmpne_wide (p1, z0, 1)) -+ -+/* -+** cmpne_wide_15_s16: -+** cmpne p0\.h, p1/z, z0\.h, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_wide_15_s16, svint16_t, -+ p0 = svcmpne_wide_n_s16 (p1, z0, 15), -+ p0 = svcmpne_wide (p1, z0, 15)) -+ -+/* -+** cmpne_wide_16_s16: -+** mov (z[0-9]+\.d), #16 -+** cmpne p0\.h, p1/z, z0\.h, \1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_wide_16_s16, svint16_t, -+ p0 = svcmpne_wide_n_s16 (p1, z0, 16), -+ p0 = svcmpne_wide (p1, z0, 16)) -+ -+/* -+** cmpne_wide_m1_s16: -+** cmpne p0\.h, p1/z, z0\.h, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_wide_m1_s16, svint16_t, -+ p0 = svcmpne_wide_n_s16 (p1, z0, -1), -+ p0 = svcmpne_wide (p1, z0, -1)) -+ -+/* -+** cmpne_wide_m16_s16: -+** cmpne p0\.h, p1/z, z0\.h, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_wide_m16_s16, svint16_t, -+ p0 = svcmpne_wide_n_s16 (p1, z0, -16), -+ p0 = svcmpne_wide (p1, z0, -16)) -+ -+/* -+** cmpne_wide_m17_s16: -+** mov (z[0-9]+\.d), #-17 -+** cmpne p0\.h, p1/z, z0\.h, \1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_wide_m17_s16, svint16_t, -+ p0 = svcmpne_wide_n_s16 (p1, z0, -17), -+ p0 = svcmpne_wide (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_wide_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_wide_s32.c -new file mode 100644 -index 000000000..633994ed3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_wide_s32.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpne_wide_s32_tied: -+** cmpne p0\.s, p0/z, z0\.s, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmpne_wide_s32_tied, svint32_t, svint64_t, -+ p0 = svcmpne_wide_s32 (p0, z0, z1), -+ p0 = svcmpne_wide (p0, z0, z1)) -+ -+/* -+** cmpne_wide_s32_untied: -+** cmpne p0\.s, p1/z, z0\.s, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmpne_wide_s32_untied, svint32_t, svint64_t, -+ p0 = svcmpne_wide_s32 (p1, z0, z1), -+ p0 = svcmpne_wide (p1, z0, z1)) -+ -+/* -+** cmpne_wide_x0_s32: -+** mov (z[0-9]+\.d), x0 -+** cmpne p0\.s, p1/z, z0\.s, \1 -+** ret -+*/ -+TEST_COMPARE_ZX (cmpne_wide_x0_s32, svint32_t, int64_t, -+ p0 = svcmpne_wide_n_s32 (p1, z0, x0), -+ p0 = svcmpne_wide (p1, z0, x0)) -+ -+/* -+** cmpne_wide_0_s32: -+** cmpne p0\.s, p1/z, z0\.s, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_wide_0_s32, svint32_t, -+ p0 = svcmpne_wide_n_s32 (p1, z0, 0), -+ p0 = svcmpne_wide (p1, z0, 0)) -+ -+/* -+** cmpne_wide_1_s32: -+** cmpne p0\.s, p1/z, z0\.s, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_wide_1_s32, svint32_t, -+ p0 = svcmpne_wide_n_s32 (p1, z0, 1), -+ p0 = svcmpne_wide (p1, z0, 1)) -+ -+/* -+** cmpne_wide_15_s32: -+** cmpne p0\.s, p1/z, z0\.s, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_wide_15_s32, svint32_t, -+ p0 = svcmpne_wide_n_s32 (p1, z0, 15), -+ p0 = svcmpne_wide (p1, z0, 15)) -+ -+/* -+** cmpne_wide_16_s32: -+** mov (z[0-9]+\.d), #16 -+** cmpne p0\.s, p1/z, z0\.s, \1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_wide_16_s32, svint32_t, -+ p0 = svcmpne_wide_n_s32 (p1, z0, 16), -+ p0 = svcmpne_wide (p1, z0, 16)) -+ -+/* -+** cmpne_wide_m1_s32: -+** cmpne p0\.s, p1/z, z0\.s, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_wide_m1_s32, svint32_t, -+ p0 = svcmpne_wide_n_s32 (p1, z0, -1), -+ p0 = svcmpne_wide (p1, z0, -1)) -+ -+/* -+** cmpne_wide_m16_s32: -+** cmpne p0\.s, p1/z, z0\.s, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_wide_m16_s32, svint32_t, -+ p0 = svcmpne_wide_n_s32 (p1, z0, -16), -+ p0 = svcmpne_wide (p1, z0, -16)) -+ -+/* -+** cmpne_wide_m17_s32: -+** mov (z[0-9]+\.d), #-17 -+** cmpne p0\.s, p1/z, z0\.s, \1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_wide_m17_s32, svint32_t, -+ p0 = svcmpne_wide_n_s32 (p1, z0, -17), -+ p0 = svcmpne_wide (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_wide_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_wide_s8.c -new file mode 100644 -index 000000000..de343f4cc ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpne_wide_s8.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpne_wide_s8_tied: -+** cmpne p0\.b, p0/z, z0\.b, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmpne_wide_s8_tied, svint8_t, svint64_t, -+ p0 = svcmpne_wide_s8 (p0, z0, z1), -+ p0 = svcmpne_wide (p0, z0, z1)) -+ -+/* -+** cmpne_wide_s8_untied: -+** cmpne p0\.b, p1/z, z0\.b, z1\.d -+** ret -+*/ -+TEST_COMPARE_DUAL_Z (cmpne_wide_s8_untied, svint8_t, svint64_t, -+ p0 = svcmpne_wide_s8 (p1, z0, z1), -+ p0 = svcmpne_wide (p1, z0, z1)) -+ -+/* -+** cmpne_wide_x0_s8: -+** mov (z[0-9]+\.d), x0 -+** cmpne p0\.b, p1/z, z0\.b, \1 -+** ret -+*/ -+TEST_COMPARE_ZX (cmpne_wide_x0_s8, svint8_t, int64_t, -+ p0 = svcmpne_wide_n_s8 (p1, z0, x0), -+ p0 = svcmpne_wide (p1, z0, x0)) -+ -+/* -+** cmpne_wide_0_s8: -+** cmpne p0\.b, p1/z, z0\.b, #0 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_wide_0_s8, svint8_t, -+ p0 = svcmpne_wide_n_s8 (p1, z0, 0), -+ p0 = svcmpne_wide (p1, z0, 0)) -+ -+/* -+** cmpne_wide_1_s8: -+** cmpne p0\.b, p1/z, z0\.b, #1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_wide_1_s8, svint8_t, -+ p0 = svcmpne_wide_n_s8 (p1, z0, 1), -+ p0 = svcmpne_wide (p1, z0, 1)) -+ -+/* -+** cmpne_wide_15_s8: -+** cmpne p0\.b, p1/z, z0\.b, #15 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_wide_15_s8, svint8_t, -+ p0 = svcmpne_wide_n_s8 (p1, z0, 15), -+ p0 = svcmpne_wide (p1, z0, 15)) -+ -+/* -+** cmpne_wide_16_s8: -+** mov (z[0-9]+\.d), #16 -+** cmpne p0\.b, p1/z, z0\.b, \1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_wide_16_s8, svint8_t, -+ p0 = svcmpne_wide_n_s8 (p1, z0, 16), -+ p0 = svcmpne_wide (p1, z0, 16)) -+ -+/* -+** cmpne_wide_m1_s8: -+** cmpne p0\.b, p1/z, z0\.b, #-1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_wide_m1_s8, svint8_t, -+ p0 = svcmpne_wide_n_s8 (p1, z0, -1), -+ p0 = svcmpne_wide (p1, z0, -1)) -+ -+/* -+** cmpne_wide_m16_s8: -+** cmpne p0\.b, p1/z, z0\.b, #-16 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_wide_m16_s8, svint8_t, -+ p0 = svcmpne_wide_n_s8 (p1, z0, -16), -+ p0 = svcmpne_wide (p1, z0, -16)) -+ -+/* -+** cmpne_wide_m17_s8: -+** mov (z[0-9]+\.d), #-17 -+** cmpne p0\.b, p1/z, z0\.b, \1 -+** ret -+*/ -+TEST_COMPARE_Z (cmpne_wide_m17_s8, svint8_t, -+ p0 = svcmpne_wide_n_s8 (p1, z0, -17), -+ p0 = svcmpne_wide (p1, z0, -17)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpuo_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpuo_f16.c -new file mode 100644 -index 000000000..8f702cdde ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpuo_f16.c -@@ -0,0 +1,51 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpuo_f16_tied: -+** fcmuo p0\.h, p0/z, (z0\.h, z1\.h|z1\.h, z0\.h) -+** ret -+*/ -+TEST_COMPARE_Z (cmpuo_f16_tied, svfloat16_t, -+ p0 = svcmpuo_f16 (p0, z0, z1), -+ p0 = svcmpuo (p0, z0, z1)) -+ -+/* -+** cmpuo_f16_untied: -+** fcmuo p0\.h, p1/z, (z0\.h, z1\.h|z1\.h, z0\.h) -+** ret -+*/ -+TEST_COMPARE_Z (cmpuo_f16_untied, svfloat16_t, -+ p0 = svcmpuo_f16 (p1, z0, z1), -+ p0 = svcmpuo (p1, z0, z1)) -+ -+/* -+** cmpuo_h4_f16: -+** mov (z[0-9]+\.h), h4 -+** fcmuo p0\.h, p1/z, (z0\.h, \1|\1, z0\.h) -+** ret -+*/ -+TEST_COMPARE_ZD (cmpuo_h4_f16, svfloat16_t, float16_t, -+ p0 = svcmpuo_n_f16 (p1, z0, d4), -+ p0 = svcmpuo (p1, z0, d4)) -+ -+/* -+** cmpuo_0_f16: -+** mov (z[0-9]+\.h), #0 -+** fcmuo p0\.h, p1/z, (z0\.h, \1|\1, z0\.h) -+** ret -+*/ -+TEST_COMPARE_Z (cmpuo_0_f16, svfloat16_t, -+ p0 = svcmpuo_n_f16 (p1, z0, 0), -+ p0 = svcmpuo (p1, z0, 0)) -+ -+/* -+** cmpuo_1_f16: -+** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? -+** fcmuo p0\.h, p1/z, (z0\.h, \1|\1, z0\.h) -+** ret -+*/ -+TEST_COMPARE_Z (cmpuo_1_f16, svfloat16_t, -+ p0 = svcmpuo_n_f16 (p1, z0, 1), -+ p0 = svcmpuo (p1, z0, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpuo_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpuo_f32.c -new file mode 100644 -index 000000000..8827604aa ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpuo_f32.c -@@ -0,0 +1,51 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpuo_f32_tied: -+** fcmuo p0\.s, p0/z, (z0\.s, z1\.s|z1\.s, z0\.s) -+** ret -+*/ -+TEST_COMPARE_Z (cmpuo_f32_tied, svfloat32_t, -+ p0 = svcmpuo_f32 (p0, z0, z1), -+ p0 = svcmpuo (p0, z0, z1)) -+ -+/* -+** cmpuo_f32_untied: -+** fcmuo p0\.s, p1/z, (z0\.s, z1\.s|z1\.s, z0\.s) -+** ret -+*/ -+TEST_COMPARE_Z (cmpuo_f32_untied, svfloat32_t, -+ p0 = svcmpuo_f32 (p1, z0, z1), -+ p0 = svcmpuo (p1, z0, z1)) -+ -+/* -+** cmpuo_s4_f32: -+** mov (z[0-9]+\.s), s4 -+** fcmuo p0\.s, p1/z, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_COMPARE_ZD (cmpuo_s4_f32, svfloat32_t, float32_t, -+ p0 = svcmpuo_n_f32 (p1, z0, d4), -+ p0 = svcmpuo (p1, z0, d4)) -+ -+/* -+** cmpuo_0_f32: -+** mov (z[0-9]+\.s), #0 -+** fcmuo p0\.s, p1/z, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_COMPARE_Z (cmpuo_0_f32, svfloat32_t, -+ p0 = svcmpuo_n_f32 (p1, z0, 0), -+ p0 = svcmpuo (p1, z0, 0)) -+ -+/* -+** cmpuo_1_f32: -+** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? -+** fcmuo p0\.s, p1/z, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_COMPARE_Z (cmpuo_1_f32, svfloat32_t, -+ p0 = svcmpuo_n_f32 (p1, z0, 1), -+ p0 = svcmpuo (p1, z0, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpuo_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpuo_f64.c -new file mode 100644 -index 000000000..d7a71eca4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cmpuo_f64.c -@@ -0,0 +1,51 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cmpuo_f64_tied: -+** fcmuo p0\.d, p0/z, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_COMPARE_Z (cmpuo_f64_tied, svfloat64_t, -+ p0 = svcmpuo_f64 (p0, z0, z1), -+ p0 = svcmpuo (p0, z0, z1)) -+ -+/* -+** cmpuo_f64_untied: -+** fcmuo p0\.d, p1/z, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_COMPARE_Z (cmpuo_f64_untied, svfloat64_t, -+ p0 = svcmpuo_f64 (p1, z0, z1), -+ p0 = svcmpuo (p1, z0, z1)) -+ -+/* -+** cmpuo_d4_f64: -+** mov (z[0-9]+\.d), d4 -+** fcmuo p0\.d, p1/z, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_COMPARE_ZD (cmpuo_d4_f64, svfloat64_t, float64_t, -+ p0 = svcmpuo_n_f64 (p1, z0, d4), -+ p0 = svcmpuo (p1, z0, d4)) -+ -+/* -+** cmpuo_0_f64: -+** mov (z[0-9]+\.d), #0 -+** fcmuo p0\.d, p1/z, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_COMPARE_Z (cmpuo_0_f64, svfloat64_t, -+ p0 = svcmpuo_n_f64 (p1, z0, 0), -+ p0 = svcmpuo (p1, z0, 0)) -+ -+/* -+** cmpuo_1_f64: -+** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? -+** fcmuo p0\.d, p1/z, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_COMPARE_Z (cmpuo_1_f64, svfloat64_t, -+ p0 = svcmpuo_n_f64 (p1, z0, 1), -+ p0 = svcmpuo (p1, z0, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_s16.c -new file mode 100644 -index 000000000..19d46be68 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_s16.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cnot_s16_m_tied12: -+** cnot z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_s16_m_tied12, svint16_t, -+ z0 = svcnot_s16_m (z0, p0, z0), -+ z0 = svcnot_m (z0, p0, z0)) -+ -+/* -+** cnot_s16_m_tied1: -+** cnot z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_s16_m_tied1, svint16_t, -+ z0 = svcnot_s16_m (z0, p0, z1), -+ z0 = svcnot_m (z0, p0, z1)) -+ -+/* -+** cnot_s16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** cnot z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_s16_m_tied2, svint16_t, -+ z0 = svcnot_s16_m (z1, p0, z0), -+ z0 = svcnot_m (z1, p0, z0)) -+ -+/* -+** cnot_s16_m_untied: -+** movprfx z0, z2 -+** cnot z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_s16_m_untied, svint16_t, -+ z0 = svcnot_s16_m (z2, p0, z1), -+ z0 = svcnot_m (z2, p0, z1)) -+ -+/* -+** cnot_s16_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.h, p0/z, \1\.h -+** cnot z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_s16_z_tied1, svint16_t, -+ z0 = svcnot_s16_z (p0, z0), -+ z0 = svcnot_z (p0, z0)) -+ -+/* -+** cnot_s16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** cnot z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_s16_z_untied, svint16_t, -+ z0 = svcnot_s16_z (p0, z1), -+ z0 = svcnot_z (p0, z1)) -+ -+/* -+** cnot_s16_x_tied1: -+** cnot z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_s16_x_tied1, svint16_t, -+ z0 = svcnot_s16_x (p0, z0), -+ z0 = svcnot_x (p0, z0)) -+ -+/* -+** cnot_s16_x_untied: -+** cnot z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_s16_x_untied, svint16_t, -+ z0 = svcnot_s16_x (p0, z1), -+ z0 = svcnot_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_s32.c -new file mode 100644 -index 000000000..041b59a04 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_s32.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cnot_s32_m_tied12: -+** cnot z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_s32_m_tied12, svint32_t, -+ z0 = svcnot_s32_m (z0, p0, z0), -+ z0 = svcnot_m (z0, p0, z0)) -+ -+/* -+** cnot_s32_m_tied1: -+** cnot z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_s32_m_tied1, svint32_t, -+ z0 = svcnot_s32_m (z0, p0, z1), -+ z0 = svcnot_m (z0, p0, z1)) -+ -+/* -+** cnot_s32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** cnot z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_s32_m_tied2, svint32_t, -+ z0 = svcnot_s32_m (z1, p0, z0), -+ z0 = svcnot_m (z1, p0, z0)) -+ -+/* -+** cnot_s32_m_untied: -+** movprfx z0, z2 -+** cnot z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_s32_m_untied, svint32_t, -+ z0 = svcnot_s32_m (z2, p0, z1), -+ z0 = svcnot_m (z2, p0, z1)) -+ -+/* -+** cnot_s32_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, \1\.s -+** cnot z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_s32_z_tied1, svint32_t, -+ z0 = svcnot_s32_z (p0, z0), -+ z0 = svcnot_z (p0, z0)) -+ -+/* -+** cnot_s32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** cnot z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_s32_z_untied, svint32_t, -+ z0 = svcnot_s32_z (p0, z1), -+ z0 = svcnot_z (p0, z1)) -+ -+/* -+** cnot_s32_x_tied1: -+** cnot z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_s32_x_tied1, svint32_t, -+ z0 = svcnot_s32_x (p0, z0), -+ z0 = svcnot_x (p0, z0)) -+ -+/* -+** cnot_s32_x_untied: -+** cnot z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_s32_x_untied, svint32_t, -+ z0 = svcnot_s32_x (p0, z1), -+ z0 = svcnot_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_s64.c -new file mode 100644 -index 000000000..c7135cb95 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_s64.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cnot_s64_m_tied12: -+** cnot z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_s64_m_tied12, svint64_t, -+ z0 = svcnot_s64_m (z0, p0, z0), -+ z0 = svcnot_m (z0, p0, z0)) -+ -+/* -+** cnot_s64_m_tied1: -+** cnot z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_s64_m_tied1, svint64_t, -+ z0 = svcnot_s64_m (z0, p0, z1), -+ z0 = svcnot_m (z0, p0, z1)) -+ -+/* -+** cnot_s64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** cnot z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_s64_m_tied2, svint64_t, -+ z0 = svcnot_s64_m (z1, p0, z0), -+ z0 = svcnot_m (z1, p0, z0)) -+ -+/* -+** cnot_s64_m_untied: -+** movprfx z0, z2 -+** cnot z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_s64_m_untied, svint64_t, -+ z0 = svcnot_s64_m (z2, p0, z1), -+ z0 = svcnot_m (z2, p0, z1)) -+ -+/* -+** cnot_s64_z_tied1: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** cnot z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_s64_z_tied1, svint64_t, -+ z0 = svcnot_s64_z (p0, z0), -+ z0 = svcnot_z (p0, z0)) -+ -+/* -+** cnot_s64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** cnot z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_s64_z_untied, svint64_t, -+ z0 = svcnot_s64_z (p0, z1), -+ z0 = svcnot_z (p0, z1)) -+ -+/* -+** cnot_s64_x_tied1: -+** cnot z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_s64_x_tied1, svint64_t, -+ z0 = svcnot_s64_x (p0, z0), -+ z0 = svcnot_x (p0, z0)) -+ -+/* -+** cnot_s64_x_untied: -+** cnot z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_s64_x_untied, svint64_t, -+ z0 = svcnot_s64_x (p0, z1), -+ z0 = svcnot_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_s8.c -new file mode 100644 -index 000000000..0560f9751 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_s8.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cnot_s8_m_tied12: -+** cnot z0\.b, p0/m, z0\.b -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_s8_m_tied12, svint8_t, -+ z0 = svcnot_s8_m (z0, p0, z0), -+ z0 = svcnot_m (z0, p0, z0)) -+ -+/* -+** cnot_s8_m_tied1: -+** cnot z0\.b, p0/m, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_s8_m_tied1, svint8_t, -+ z0 = svcnot_s8_m (z0, p0, z1), -+ z0 = svcnot_m (z0, p0, z1)) -+ -+/* -+** cnot_s8_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** cnot z0\.b, p0/m, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_s8_m_tied2, svint8_t, -+ z0 = svcnot_s8_m (z1, p0, z0), -+ z0 = svcnot_m (z1, p0, z0)) -+ -+/* -+** cnot_s8_m_untied: -+** movprfx z0, z2 -+** cnot z0\.b, p0/m, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_s8_m_untied, svint8_t, -+ z0 = svcnot_s8_m (z2, p0, z1), -+ z0 = svcnot_m (z2, p0, z1)) -+ -+/* -+** cnot_s8_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.b, p0/z, \1\.b -+** cnot z0\.b, p0/m, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_s8_z_tied1, svint8_t, -+ z0 = svcnot_s8_z (p0, z0), -+ z0 = svcnot_z (p0, z0)) -+ -+/* -+** cnot_s8_z_untied: -+** movprfx z0\.b, p0/z, z1\.b -+** cnot z0\.b, p0/m, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_s8_z_untied, svint8_t, -+ z0 = svcnot_s8_z (p0, z1), -+ z0 = svcnot_z (p0, z1)) -+ -+/* -+** cnot_s8_x_tied1: -+** cnot z0\.b, p0/m, z0\.b -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_s8_x_tied1, svint8_t, -+ z0 = svcnot_s8_x (p0, z0), -+ z0 = svcnot_x (p0, z0)) -+ -+/* -+** cnot_s8_x_untied: -+** cnot z0\.b, p0/m, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_s8_x_untied, svint8_t, -+ z0 = svcnot_s8_x (p0, z1), -+ z0 = svcnot_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_u16.c -new file mode 100644 -index 000000000..7ea9ff71d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_u16.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cnot_u16_m_tied12: -+** cnot z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_u16_m_tied12, svuint16_t, -+ z0 = svcnot_u16_m (z0, p0, z0), -+ z0 = svcnot_m (z0, p0, z0)) -+ -+/* -+** cnot_u16_m_tied1: -+** cnot z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_u16_m_tied1, svuint16_t, -+ z0 = svcnot_u16_m (z0, p0, z1), -+ z0 = svcnot_m (z0, p0, z1)) -+ -+/* -+** cnot_u16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** cnot z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_u16_m_tied2, svuint16_t, -+ z0 = svcnot_u16_m (z1, p0, z0), -+ z0 = svcnot_m (z1, p0, z0)) -+ -+/* -+** cnot_u16_m_untied: -+** movprfx z0, z2 -+** cnot z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_u16_m_untied, svuint16_t, -+ z0 = svcnot_u16_m (z2, p0, z1), -+ z0 = svcnot_m (z2, p0, z1)) -+ -+/* -+** cnot_u16_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.h, p0/z, \1\.h -+** cnot z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_u16_z_tied1, svuint16_t, -+ z0 = svcnot_u16_z (p0, z0), -+ z0 = svcnot_z (p0, z0)) -+ -+/* -+** cnot_u16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** cnot z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_u16_z_untied, svuint16_t, -+ z0 = svcnot_u16_z (p0, z1), -+ z0 = svcnot_z (p0, z1)) -+ -+/* -+** cnot_u16_x_tied1: -+** cnot z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_u16_x_tied1, svuint16_t, -+ z0 = svcnot_u16_x (p0, z0), -+ z0 = svcnot_x (p0, z0)) -+ -+/* -+** cnot_u16_x_untied: -+** cnot z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_u16_x_untied, svuint16_t, -+ z0 = svcnot_u16_x (p0, z1), -+ z0 = svcnot_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_u32.c -new file mode 100644 -index 000000000..972c7751e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_u32.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cnot_u32_m_tied12: -+** cnot z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_u32_m_tied12, svuint32_t, -+ z0 = svcnot_u32_m (z0, p0, z0), -+ z0 = svcnot_m (z0, p0, z0)) -+ -+/* -+** cnot_u32_m_tied1: -+** cnot z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_u32_m_tied1, svuint32_t, -+ z0 = svcnot_u32_m (z0, p0, z1), -+ z0 = svcnot_m (z0, p0, z1)) -+ -+/* -+** cnot_u32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** cnot z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_u32_m_tied2, svuint32_t, -+ z0 = svcnot_u32_m (z1, p0, z0), -+ z0 = svcnot_m (z1, p0, z0)) -+ -+/* -+** cnot_u32_m_untied: -+** movprfx z0, z2 -+** cnot z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_u32_m_untied, svuint32_t, -+ z0 = svcnot_u32_m (z2, p0, z1), -+ z0 = svcnot_m (z2, p0, z1)) -+ -+/* -+** cnot_u32_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, \1\.s -+** cnot z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_u32_z_tied1, svuint32_t, -+ z0 = svcnot_u32_z (p0, z0), -+ z0 = svcnot_z (p0, z0)) -+ -+/* -+** cnot_u32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** cnot z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_u32_z_untied, svuint32_t, -+ z0 = svcnot_u32_z (p0, z1), -+ z0 = svcnot_z (p0, z1)) -+ -+/* -+** cnot_u32_x_tied1: -+** cnot z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_u32_x_tied1, svuint32_t, -+ z0 = svcnot_u32_x (p0, z0), -+ z0 = svcnot_x (p0, z0)) -+ -+/* -+** cnot_u32_x_untied: -+** cnot z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_u32_x_untied, svuint32_t, -+ z0 = svcnot_u32_x (p0, z1), -+ z0 = svcnot_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_u64.c -new file mode 100644 -index 000000000..f25e001c5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_u64.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cnot_u64_m_tied12: -+** cnot z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_u64_m_tied12, svuint64_t, -+ z0 = svcnot_u64_m (z0, p0, z0), -+ z0 = svcnot_m (z0, p0, z0)) -+ -+/* -+** cnot_u64_m_tied1: -+** cnot z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_u64_m_tied1, svuint64_t, -+ z0 = svcnot_u64_m (z0, p0, z1), -+ z0 = svcnot_m (z0, p0, z1)) -+ -+/* -+** cnot_u64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** cnot z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_u64_m_tied2, svuint64_t, -+ z0 = svcnot_u64_m (z1, p0, z0), -+ z0 = svcnot_m (z1, p0, z0)) -+ -+/* -+** cnot_u64_m_untied: -+** movprfx z0, z2 -+** cnot z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_u64_m_untied, svuint64_t, -+ z0 = svcnot_u64_m (z2, p0, z1), -+ z0 = svcnot_m (z2, p0, z1)) -+ -+/* -+** cnot_u64_z_tied1: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** cnot z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_u64_z_tied1, svuint64_t, -+ z0 = svcnot_u64_z (p0, z0), -+ z0 = svcnot_z (p0, z0)) -+ -+/* -+** cnot_u64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** cnot z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_u64_z_untied, svuint64_t, -+ z0 = svcnot_u64_z (p0, z1), -+ z0 = svcnot_z (p0, z1)) -+ -+/* -+** cnot_u64_x_tied1: -+** cnot z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_u64_x_tied1, svuint64_t, -+ z0 = svcnot_u64_x (p0, z0), -+ z0 = svcnot_x (p0, z0)) -+ -+/* -+** cnot_u64_x_untied: -+** cnot z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_u64_x_untied, svuint64_t, -+ z0 = svcnot_u64_x (p0, z1), -+ z0 = svcnot_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_u8.c -new file mode 100644 -index 000000000..e135a7295 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnot_u8.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cnot_u8_m_tied12: -+** cnot z0\.b, p0/m, z0\.b -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_u8_m_tied12, svuint8_t, -+ z0 = svcnot_u8_m (z0, p0, z0), -+ z0 = svcnot_m (z0, p0, z0)) -+ -+/* -+** cnot_u8_m_tied1: -+** cnot z0\.b, p0/m, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_u8_m_tied1, svuint8_t, -+ z0 = svcnot_u8_m (z0, p0, z1), -+ z0 = svcnot_m (z0, p0, z1)) -+ -+/* -+** cnot_u8_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** cnot z0\.b, p0/m, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_u8_m_tied2, svuint8_t, -+ z0 = svcnot_u8_m (z1, p0, z0), -+ z0 = svcnot_m (z1, p0, z0)) -+ -+/* -+** cnot_u8_m_untied: -+** movprfx z0, z2 -+** cnot z0\.b, p0/m, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_u8_m_untied, svuint8_t, -+ z0 = svcnot_u8_m (z2, p0, z1), -+ z0 = svcnot_m (z2, p0, z1)) -+ -+/* -+** cnot_u8_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.b, p0/z, \1\.b -+** cnot z0\.b, p0/m, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_u8_z_tied1, svuint8_t, -+ z0 = svcnot_u8_z (p0, z0), -+ z0 = svcnot_z (p0, z0)) -+ -+/* -+** cnot_u8_z_untied: -+** movprfx z0\.b, p0/z, z1\.b -+** cnot z0\.b, p0/m, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_u8_z_untied, svuint8_t, -+ z0 = svcnot_u8_z (p0, z1), -+ z0 = svcnot_z (p0, z1)) -+ -+/* -+** cnot_u8_x_tied1: -+** cnot z0\.b, p0/m, z0\.b -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_u8_x_tied1, svuint8_t, -+ z0 = svcnot_u8_x (p0, z0), -+ z0 = svcnot_x (p0, z0)) -+ -+/* -+** cnot_u8_x_untied: -+** cnot z0\.b, p0/m, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (cnot_u8_x_untied, svuint8_t, -+ z0 = svcnot_u8_x (p0, z1), -+ z0 = svcnot_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_bf16.c -new file mode 100644 -index 000000000..d92fbc157 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_bf16.c -@@ -0,0 +1,52 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cnt_bf16_m_tied1: -+** cnt z0\.h, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cnt_bf16_m_tied1, svuint16_t, svbfloat16_t, -+ z0 = svcnt_bf16_m (z0, p0, z4), -+ z0 = svcnt_m (z0, p0, z4)) -+ -+/* -+** cnt_bf16_m_untied: -+** movprfx z0, z1 -+** cnt z0\.h, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cnt_bf16_m_untied, svuint16_t, svbfloat16_t, -+ z0 = svcnt_bf16_m (z1, p0, z4), -+ z0 = svcnt_m (z1, p0, z4)) -+ -+/* -+** cnt_bf16_z: -+** movprfx z0\.h, p0/z, z4\.h -+** cnt z0\.h, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cnt_bf16_z, svuint16_t, svbfloat16_t, -+ z0 = svcnt_bf16_z (p0, z4), -+ z0 = svcnt_z (p0, z4)) -+ -+/* -+** cnt_bf16_x: -+** cnt z0\.h, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cnt_bf16_x, svuint16_t, svbfloat16_t, -+ z0 = svcnt_bf16_x (p0, z4), -+ z0 = svcnt_x (p0, z4)) -+ -+/* -+** ptrue_cnt_bf16_x: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z (ptrue_cnt_bf16_x, svuint16_t, svbfloat16_t, -+ z0 = svcnt_bf16_x (svptrue_b16 (), z4), -+ z0 = svcnt_x (svptrue_b16 (), z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_f16.c -new file mode 100644 -index 000000000..b8061bb80 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_f16.c -@@ -0,0 +1,52 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cnt_f16_m_tied1: -+** cnt z0\.h, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cnt_f16_m_tied1, svuint16_t, svfloat16_t, -+ z0 = svcnt_f16_m (z0, p0, z4), -+ z0 = svcnt_m (z0, p0, z4)) -+ -+/* -+** cnt_f16_m_untied: -+** movprfx z0, z1 -+** cnt z0\.h, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cnt_f16_m_untied, svuint16_t, svfloat16_t, -+ z0 = svcnt_f16_m (z1, p0, z4), -+ z0 = svcnt_m (z1, p0, z4)) -+ -+/* -+** cnt_f16_z: -+** movprfx z0\.h, p0/z, z4\.h -+** cnt z0\.h, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cnt_f16_z, svuint16_t, svfloat16_t, -+ z0 = svcnt_f16_z (p0, z4), -+ z0 = svcnt_z (p0, z4)) -+ -+/* -+** cnt_f16_x: -+** cnt z0\.h, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cnt_f16_x, svuint16_t, svfloat16_t, -+ z0 = svcnt_f16_x (p0, z4), -+ z0 = svcnt_x (p0, z4)) -+ -+/* -+** ptrue_cnt_f16_x: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z (ptrue_cnt_f16_x, svuint16_t, svfloat16_t, -+ z0 = svcnt_f16_x (svptrue_b16 (), z4), -+ z0 = svcnt_x (svptrue_b16 (), z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_f32.c -new file mode 100644 -index 000000000..b9292c977 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_f32.c -@@ -0,0 +1,52 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cnt_f32_m_tied1: -+** cnt z0\.s, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cnt_f32_m_tied1, svuint32_t, svfloat32_t, -+ z0 = svcnt_f32_m (z0, p0, z4), -+ z0 = svcnt_m (z0, p0, z4)) -+ -+/* -+** cnt_f32_m_untied: -+** movprfx z0, z1 -+** cnt z0\.s, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cnt_f32_m_untied, svuint32_t, svfloat32_t, -+ z0 = svcnt_f32_m (z1, p0, z4), -+ z0 = svcnt_m (z1, p0, z4)) -+ -+/* -+** cnt_f32_z: -+** movprfx z0\.s, p0/z, z4\.s -+** cnt z0\.s, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cnt_f32_z, svuint32_t, svfloat32_t, -+ z0 = svcnt_f32_z (p0, z4), -+ z0 = svcnt_z (p0, z4)) -+ -+/* -+** cnt_f32_x: -+** cnt z0\.s, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cnt_f32_x, svuint32_t, svfloat32_t, -+ z0 = svcnt_f32_x (p0, z4), -+ z0 = svcnt_x (p0, z4)) -+ -+/* -+** ptrue_cnt_f32_x: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z (ptrue_cnt_f32_x, svuint32_t, svfloat32_t, -+ z0 = svcnt_f32_x (svptrue_b32 (), z4), -+ z0 = svcnt_x (svptrue_b32 (), z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_f64.c -new file mode 100644 -index 000000000..4976ee467 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_f64.c -@@ -0,0 +1,52 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cnt_f64_m_tied1: -+** cnt z0\.d, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cnt_f64_m_tied1, svuint64_t, svfloat64_t, -+ z0 = svcnt_f64_m (z0, p0, z4), -+ z0 = svcnt_m (z0, p0, z4)) -+ -+/* -+** cnt_f64_m_untied: -+** movprfx z0, z1 -+** cnt z0\.d, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cnt_f64_m_untied, svuint64_t, svfloat64_t, -+ z0 = svcnt_f64_m (z1, p0, z4), -+ z0 = svcnt_m (z1, p0, z4)) -+ -+/* -+** cnt_f64_z: -+** movprfx z0\.d, p0/z, z4\.d -+** cnt z0\.d, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cnt_f64_z, svuint64_t, svfloat64_t, -+ z0 = svcnt_f64_z (p0, z4), -+ z0 = svcnt_z (p0, z4)) -+ -+/* -+** cnt_f64_x: -+** cnt z0\.d, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cnt_f64_x, svuint64_t, svfloat64_t, -+ z0 = svcnt_f64_x (p0, z4), -+ z0 = svcnt_x (p0, z4)) -+ -+/* -+** ptrue_cnt_f64_x: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z (ptrue_cnt_f64_x, svuint64_t, svfloat64_t, -+ z0 = svcnt_f64_x (svptrue_b64 (), z4), -+ z0 = svcnt_x (svptrue_b64 (), z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_s16.c -new file mode 100644 -index 000000000..a8ff8f3d2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_s16.c -@@ -0,0 +1,41 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cnt_s16_m_tied1: -+** cnt z0\.h, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cnt_s16_m_tied1, svuint16_t, svint16_t, -+ z0 = svcnt_s16_m (z0, p0, z4), -+ z0 = svcnt_m (z0, p0, z4)) -+ -+/* -+** cnt_s16_m_untied: -+** movprfx z0, z1 -+** cnt z0\.h, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cnt_s16_m_untied, svuint16_t, svint16_t, -+ z0 = svcnt_s16_m (z1, p0, z4), -+ z0 = svcnt_m (z1, p0, z4)) -+ -+/* -+** cnt_s16_z: -+** movprfx z0\.h, p0/z, z4\.h -+** cnt z0\.h, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cnt_s16_z, svuint16_t, svint16_t, -+ z0 = svcnt_s16_z (p0, z4), -+ z0 = svcnt_z (p0, z4)) -+ -+/* -+** cnt_s16_x: -+** cnt z0\.h, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cnt_s16_x, svuint16_t, svint16_t, -+ z0 = svcnt_s16_x (p0, z4), -+ z0 = svcnt_x (p0, z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_s32.c -new file mode 100644 -index 000000000..3d16041f2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_s32.c -@@ -0,0 +1,41 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cnt_s32_m_tied1: -+** cnt z0\.s, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cnt_s32_m_tied1, svuint32_t, svint32_t, -+ z0 = svcnt_s32_m (z0, p0, z4), -+ z0 = svcnt_m (z0, p0, z4)) -+ -+/* -+** cnt_s32_m_untied: -+** movprfx z0, z1 -+** cnt z0\.s, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cnt_s32_m_untied, svuint32_t, svint32_t, -+ z0 = svcnt_s32_m (z1, p0, z4), -+ z0 = svcnt_m (z1, p0, z4)) -+ -+/* -+** cnt_s32_z: -+** movprfx z0\.s, p0/z, z4\.s -+** cnt z0\.s, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cnt_s32_z, svuint32_t, svint32_t, -+ z0 = svcnt_s32_z (p0, z4), -+ z0 = svcnt_z (p0, z4)) -+ -+/* -+** cnt_s32_x: -+** cnt z0\.s, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cnt_s32_x, svuint32_t, svint32_t, -+ z0 = svcnt_s32_x (p0, z4), -+ z0 = svcnt_x (p0, z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_s64.c -new file mode 100644 -index 000000000..8c8871ba5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_s64.c -@@ -0,0 +1,41 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cnt_s64_m_tied1: -+** cnt z0\.d, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cnt_s64_m_tied1, svuint64_t, svint64_t, -+ z0 = svcnt_s64_m (z0, p0, z4), -+ z0 = svcnt_m (z0, p0, z4)) -+ -+/* -+** cnt_s64_m_untied: -+** movprfx z0, z1 -+** cnt z0\.d, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cnt_s64_m_untied, svuint64_t, svint64_t, -+ z0 = svcnt_s64_m (z1, p0, z4), -+ z0 = svcnt_m (z1, p0, z4)) -+ -+/* -+** cnt_s64_z: -+** movprfx z0\.d, p0/z, z4\.d -+** cnt z0\.d, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cnt_s64_z, svuint64_t, svint64_t, -+ z0 = svcnt_s64_z (p0, z4), -+ z0 = svcnt_z (p0, z4)) -+ -+/* -+** cnt_s64_x: -+** cnt z0\.d, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cnt_s64_x, svuint64_t, svint64_t, -+ z0 = svcnt_s64_x (p0, z4), -+ z0 = svcnt_x (p0, z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_s8.c -new file mode 100644 -index 000000000..8d85c8e51 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_s8.c -@@ -0,0 +1,41 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cnt_s8_m_tied1: -+** cnt z0\.b, p0/m, z4\.b -+** ret -+*/ -+TEST_DUAL_Z (cnt_s8_m_tied1, svuint8_t, svint8_t, -+ z0 = svcnt_s8_m (z0, p0, z4), -+ z0 = svcnt_m (z0, p0, z4)) -+ -+/* -+** cnt_s8_m_untied: -+** movprfx z0, z1 -+** cnt z0\.b, p0/m, z4\.b -+** ret -+*/ -+TEST_DUAL_Z (cnt_s8_m_untied, svuint8_t, svint8_t, -+ z0 = svcnt_s8_m (z1, p0, z4), -+ z0 = svcnt_m (z1, p0, z4)) -+ -+/* -+** cnt_s8_z: -+** movprfx z0\.b, p0/z, z4\.b -+** cnt z0\.b, p0/m, z4\.b -+** ret -+*/ -+TEST_DUAL_Z (cnt_s8_z, svuint8_t, svint8_t, -+ z0 = svcnt_s8_z (p0, z4), -+ z0 = svcnt_z (p0, z4)) -+ -+/* -+** cnt_s8_x: -+** cnt z0\.b, p0/m, z4\.b -+** ret -+*/ -+TEST_DUAL_Z (cnt_s8_x, svuint8_t, svint8_t, -+ z0 = svcnt_s8_x (p0, z4), -+ z0 = svcnt_x (p0, z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_u16.c -new file mode 100644 -index 000000000..f173d3108 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_u16.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cnt_u16_m_tied12: -+** cnt z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (cnt_u16_m_tied12, svuint16_t, -+ z0 = svcnt_u16_m (z0, p0, z0), -+ z0 = svcnt_m (z0, p0, z0)) -+ -+/* -+** cnt_u16_m_tied1: -+** cnt z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (cnt_u16_m_tied1, svuint16_t, -+ z0 = svcnt_u16_m (z0, p0, z1), -+ z0 = svcnt_m (z0, p0, z1)) -+ -+/* -+** cnt_u16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** cnt z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (cnt_u16_m_tied2, svuint16_t, -+ z0 = svcnt_u16_m (z1, p0, z0), -+ z0 = svcnt_m (z1, p0, z0)) -+ -+/* -+** cnt_u16_m_untied: -+** movprfx z0, z2 -+** cnt z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (cnt_u16_m_untied, svuint16_t, -+ z0 = svcnt_u16_m (z2, p0, z1), -+ z0 = svcnt_m (z2, p0, z1)) -+ -+/* -+** cnt_u16_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.h, p0/z, \1\.h -+** cnt z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (cnt_u16_z_tied1, svuint16_t, -+ z0 = svcnt_u16_z (p0, z0), -+ z0 = svcnt_z (p0, z0)) -+ -+/* -+** cnt_u16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** cnt z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (cnt_u16_z_untied, svuint16_t, -+ z0 = svcnt_u16_z (p0, z1), -+ z0 = svcnt_z (p0, z1)) -+ -+/* -+** cnt_u16_x_tied1: -+** cnt z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (cnt_u16_x_tied1, svuint16_t, -+ z0 = svcnt_u16_x (p0, z0), -+ z0 = svcnt_x (p0, z0)) -+ -+/* -+** cnt_u16_x_untied: -+** cnt z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (cnt_u16_x_untied, svuint16_t, -+ z0 = svcnt_u16_x (p0, z1), -+ z0 = svcnt_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_u32.c -new file mode 100644 -index 000000000..11969a6b6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_u32.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cnt_u32_m_tied12: -+** cnt z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (cnt_u32_m_tied12, svuint32_t, -+ z0 = svcnt_u32_m (z0, p0, z0), -+ z0 = svcnt_m (z0, p0, z0)) -+ -+/* -+** cnt_u32_m_tied1: -+** cnt z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (cnt_u32_m_tied1, svuint32_t, -+ z0 = svcnt_u32_m (z0, p0, z1), -+ z0 = svcnt_m (z0, p0, z1)) -+ -+/* -+** cnt_u32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** cnt z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (cnt_u32_m_tied2, svuint32_t, -+ z0 = svcnt_u32_m (z1, p0, z0), -+ z0 = svcnt_m (z1, p0, z0)) -+ -+/* -+** cnt_u32_m_untied: -+** movprfx z0, z2 -+** cnt z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (cnt_u32_m_untied, svuint32_t, -+ z0 = svcnt_u32_m (z2, p0, z1), -+ z0 = svcnt_m (z2, p0, z1)) -+ -+/* -+** cnt_u32_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, \1\.s -+** cnt z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (cnt_u32_z_tied1, svuint32_t, -+ z0 = svcnt_u32_z (p0, z0), -+ z0 = svcnt_z (p0, z0)) -+ -+/* -+** cnt_u32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** cnt z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (cnt_u32_z_untied, svuint32_t, -+ z0 = svcnt_u32_z (p0, z1), -+ z0 = svcnt_z (p0, z1)) -+ -+/* -+** cnt_u32_x_tied1: -+** cnt z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (cnt_u32_x_tied1, svuint32_t, -+ z0 = svcnt_u32_x (p0, z0), -+ z0 = svcnt_x (p0, z0)) -+ -+/* -+** cnt_u32_x_untied: -+** cnt z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (cnt_u32_x_untied, svuint32_t, -+ z0 = svcnt_u32_x (p0, z1), -+ z0 = svcnt_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_u64.c -new file mode 100644 -index 000000000..4eb69ea84 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_u64.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cnt_u64_m_tied12: -+** cnt z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (cnt_u64_m_tied12, svuint64_t, -+ z0 = svcnt_u64_m (z0, p0, z0), -+ z0 = svcnt_m (z0, p0, z0)) -+ -+/* -+** cnt_u64_m_tied1: -+** cnt z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (cnt_u64_m_tied1, svuint64_t, -+ z0 = svcnt_u64_m (z0, p0, z1), -+ z0 = svcnt_m (z0, p0, z1)) -+ -+/* -+** cnt_u64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** cnt z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (cnt_u64_m_tied2, svuint64_t, -+ z0 = svcnt_u64_m (z1, p0, z0), -+ z0 = svcnt_m (z1, p0, z0)) -+ -+/* -+** cnt_u64_m_untied: -+** movprfx z0, z2 -+** cnt z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (cnt_u64_m_untied, svuint64_t, -+ z0 = svcnt_u64_m (z2, p0, z1), -+ z0 = svcnt_m (z2, p0, z1)) -+ -+/* -+** cnt_u64_z_tied1: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** cnt z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (cnt_u64_z_tied1, svuint64_t, -+ z0 = svcnt_u64_z (p0, z0), -+ z0 = svcnt_z (p0, z0)) -+ -+/* -+** cnt_u64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** cnt z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (cnt_u64_z_untied, svuint64_t, -+ z0 = svcnt_u64_z (p0, z1), -+ z0 = svcnt_z (p0, z1)) -+ -+/* -+** cnt_u64_x_tied1: -+** cnt z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (cnt_u64_x_tied1, svuint64_t, -+ z0 = svcnt_u64_x (p0, z0), -+ z0 = svcnt_x (p0, z0)) -+ -+/* -+** cnt_u64_x_untied: -+** cnt z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (cnt_u64_x_untied, svuint64_t, -+ z0 = svcnt_u64_x (p0, z1), -+ z0 = svcnt_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_u8.c -new file mode 100644 -index 000000000..30e798302 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnt_u8.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cnt_u8_m_tied12: -+** cnt z0\.b, p0/m, z0\.b -+** ret -+*/ -+TEST_UNIFORM_Z (cnt_u8_m_tied12, svuint8_t, -+ z0 = svcnt_u8_m (z0, p0, z0), -+ z0 = svcnt_m (z0, p0, z0)) -+ -+/* -+** cnt_u8_m_tied1: -+** cnt z0\.b, p0/m, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (cnt_u8_m_tied1, svuint8_t, -+ z0 = svcnt_u8_m (z0, p0, z1), -+ z0 = svcnt_m (z0, p0, z1)) -+ -+/* -+** cnt_u8_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** cnt z0\.b, p0/m, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (cnt_u8_m_tied2, svuint8_t, -+ z0 = svcnt_u8_m (z1, p0, z0), -+ z0 = svcnt_m (z1, p0, z0)) -+ -+/* -+** cnt_u8_m_untied: -+** movprfx z0, z2 -+** cnt z0\.b, p0/m, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (cnt_u8_m_untied, svuint8_t, -+ z0 = svcnt_u8_m (z2, p0, z1), -+ z0 = svcnt_m (z2, p0, z1)) -+ -+/* -+** cnt_u8_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.b, p0/z, \1\.b -+** cnt z0\.b, p0/m, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (cnt_u8_z_tied1, svuint8_t, -+ z0 = svcnt_u8_z (p0, z0), -+ z0 = svcnt_z (p0, z0)) -+ -+/* -+** cnt_u8_z_untied: -+** movprfx z0\.b, p0/z, z1\.b -+** cnt z0\.b, p0/m, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (cnt_u8_z_untied, svuint8_t, -+ z0 = svcnt_u8_z (p0, z1), -+ z0 = svcnt_z (p0, z1)) -+ -+/* -+** cnt_u8_x_tied1: -+** cnt z0\.b, p0/m, z0\.b -+** ret -+*/ -+TEST_UNIFORM_Z (cnt_u8_x_tied1, svuint8_t, -+ z0 = svcnt_u8_x (p0, z0), -+ z0 = svcnt_x (p0, z0)) -+ -+/* -+** cnt_u8_x_untied: -+** cnt z0\.b, p0/m, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (cnt_u8_x_untied, svuint8_t, -+ z0 = svcnt_u8_x (p0, z1), -+ z0 = svcnt_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntb.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntb.c -new file mode 100644 -index 000000000..8b8fe8e4f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntb.c -@@ -0,0 +1,280 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cntb_1: -+** cntb x0 -+** ret -+*/ -+PROTO (cntb_1, uint64_t, ()) { return svcntb (); } -+ -+/* -+** cntb_2: -+** cntb x0, all, mul #2 -+** ret -+*/ -+PROTO (cntb_2, uint64_t, ()) { return svcntb () * 2; } -+ -+/* -+** cntb_3: -+** cntb x0, all, mul #3 -+** ret -+*/ -+PROTO (cntb_3, uint64_t, ()) { return svcntb () * 3; } -+ -+/* -+** cntb_4: -+** cntb x0, all, mul #4 -+** ret -+*/ -+PROTO (cntb_4, uint64_t, ()) { return svcntb () * 4; } -+ -+/* -+** cntb_8: -+** cntb x0, all, mul #8 -+** ret -+*/ -+PROTO (cntb_8, uint64_t, ()) { return svcntb () * 8; } -+ -+/* -+** cntb_15: -+** cntb x0, all, mul #15 -+** ret -+*/ -+PROTO (cntb_15, uint64_t, ()) { return svcntb () * 15; } -+ -+/* -+** cntb_16: -+** cntb x0, all, mul #16 -+** ret -+*/ -+PROTO (cntb_16, uint64_t, ()) { return svcntb () * 16; } -+ -+/* Other sequences would be OK. */ -+/* -+** cntb_17: -+** cntb x0, all, mul #16 -+** incb x0 -+** ret -+*/ -+PROTO (cntb_17, uint64_t, ()) { return svcntb () * 17; } -+ -+/* -+** cntb_32: -+** cntd (x[0-9]+) -+** lsl x0, \1, 8 -+** ret -+*/ -+PROTO (cntb_32, uint64_t, ()) { return svcntb () * 32; } -+ -+/* Other sequences would be OK. */ -+/* -+** cntb_33: -+** cntb (x[0-9]+) -+** lsl x0, \1, 5 -+** incb x0 -+** ret -+*/ -+PROTO (cntb_33, uint64_t, ()) { return svcntb () * 33; } -+ -+/* -+** cntb_64: -+** cntd (x[0-9]+) -+** lsl x0, \1, 9 -+** ret -+*/ -+PROTO (cntb_64, uint64_t, ()) { return svcntb () * 64; } -+ -+/* -+** cntb_128: -+** cntd (x[0-9]+) -+** lsl x0, \1, 10 -+** ret -+*/ -+PROTO (cntb_128, uint64_t, ()) { return svcntb () * 128; } -+ -+/* Other sequences would be OK. */ -+/* -+** cntb_129: -+** cntb (x[0-9]+) -+** lsl x0, \1, 7 -+** incb x0 -+** ret -+*/ -+PROTO (cntb_129, uint64_t, ()) { return svcntb () * 129; } -+ -+/* -+** cntb_m1: -+** cntb (x[0-9]+) -+** neg x0, \1 -+** ret -+*/ -+PROTO (cntb_m1, uint64_t, ()) { return -svcntb (); } -+ -+/* -+** cntb_m13: -+** cntb (x[0-9]+), all, mul #13 -+** neg x0, \1 -+** ret -+*/ -+PROTO (cntb_m13, uint64_t, ()) { return -svcntb () * 13; } -+ -+/* -+** cntb_m15: -+** cntb (x[0-9]+), all, mul #15 -+** neg x0, \1 -+** ret -+*/ -+PROTO (cntb_m15, uint64_t, ()) { return -svcntb () * 15; } -+ -+/* -+** cntb_m16: -+** cntb (x[0-9]+), all, mul #16 -+** neg x0, \1 -+** ret -+*/ -+PROTO (cntb_m16, uint64_t, ()) { return -svcntb () * 16; } -+ -+/* Other sequences would be OK. */ -+/* -+** cntb_m17: -+** cntb x0, all, mul #16 -+** incb x0 -+** neg x0, x0 -+** ret -+*/ -+PROTO (cntb_m17, uint64_t, ()) { return -svcntb () * 17; } -+ -+/* -+** incb_1: -+** incb x0 -+** ret -+*/ -+PROTO (incb_1, uint64_t, (uint64_t x0)) { return x0 + svcntb (); } -+ -+/* -+** incb_2: -+** incb x0, all, mul #2 -+** ret -+*/ -+PROTO (incb_2, uint64_t, (uint64_t x0)) { return x0 + svcntb () * 2; } -+ -+/* -+** incb_3: -+** incb x0, all, mul #3 -+** ret -+*/ -+PROTO (incb_3, uint64_t, (uint64_t x0)) { return x0 + svcntb () * 3; } -+ -+/* -+** incb_4: -+** incb x0, all, mul #4 -+** ret -+*/ -+PROTO (incb_4, uint64_t, (uint64_t x0)) { return x0 + svcntb () * 4; } -+ -+/* -+** incb_8: -+** incb x0, all, mul #8 -+** ret -+*/ -+PROTO (incb_8, uint64_t, (uint64_t x0)) { return x0 + svcntb () * 8; } -+ -+/* -+** incb_15: -+** incb x0, all, mul #15 -+** ret -+*/ -+PROTO (incb_15, uint64_t, (uint64_t x0)) { return x0 + svcntb () * 15; } -+ -+/* -+** incb_16: -+** incb x0, all, mul #16 -+** ret -+*/ -+PROTO (incb_16, uint64_t, (uint64_t x0)) { return x0 + svcntb () * 16; } -+ -+/* -+** incb_17: -+** addvl x0, x0, #17 -+** ret -+*/ -+PROTO (incb_17, uint64_t, (uint64_t x0)) { return x0 + svcntb () * 17; } -+ -+/* -+** incb_31: -+** addvl x0, x0, #31 -+** ret -+*/ -+PROTO (incb_31, uint64_t, (uint64_t x0)) { return x0 + svcntb () * 31; } -+ -+/* -+** decb_1: -+** decb x0 -+** ret -+*/ -+PROTO (decb_1, uint64_t, (uint64_t x0)) { return x0 - svcntb (); } -+ -+/* -+** decb_2: -+** decb x0, all, mul #2 -+** ret -+*/ -+PROTO (decb_2, uint64_t, (uint64_t x0)) { return x0 - svcntb () * 2; } -+ -+/* -+** decb_3: -+** decb x0, all, mul #3 -+** ret -+*/ -+PROTO (decb_3, uint64_t, (uint64_t x0)) { return x0 - svcntb () * 3; } -+ -+/* -+** decb_4: -+** decb x0, all, mul #4 -+** ret -+*/ -+PROTO (decb_4, uint64_t, (uint64_t x0)) { return x0 - svcntb () * 4; } -+ -+/* -+** decb_8: -+** decb x0, all, mul #8 -+** ret -+*/ -+PROTO (decb_8, uint64_t, (uint64_t x0)) { return x0 - svcntb () * 8; } -+ -+/* -+** decb_15: -+** decb x0, all, mul #15 -+** ret -+*/ -+PROTO (decb_15, uint64_t, (uint64_t x0)) { return x0 - svcntb () * 15; } -+ -+/* -+** decb_16: -+** decb x0, all, mul #16 -+** ret -+*/ -+PROTO (decb_16, uint64_t, (uint64_t x0)) { return x0 - svcntb () * 16; } -+ -+/* -+** decb_17: -+** addvl x0, x0, #-17 -+** ret -+*/ -+PROTO (decb_17, uint64_t, (uint64_t x0)) { return x0 - svcntb () * 17; } -+ -+/* -+** decb_31: -+** addvl x0, x0, #-31 -+** ret -+*/ -+PROTO (decb_31, uint64_t, (uint64_t x0)) { return x0 - svcntb () * 31; } -+ -+/* -+** decb_32: -+** addvl x0, x0, #-32 -+** ret -+*/ -+PROTO (decb_32, uint64_t, (uint64_t x0)) { return x0 - svcntb () * 32; } -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntb_pat.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntb_pat.c -new file mode 100644 -index 000000000..effc5668d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntb_pat.c -@@ -0,0 +1,432 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cntb_pow2: -+** cntb x0, pow2 -+** ret -+*/ -+PROTO (cntb_pow2, uint64_t, ()) { return svcntb_pat (SV_POW2); } -+ -+/* -+** cntb_vl1: -+** mov x0, #?1 -+** ret -+*/ -+PROTO (cntb_vl1, uint64_t, ()) { return svcntb_pat (SV_VL1); } -+ -+/* -+** cntb_vl2: -+** mov x0, #?2 -+** ret -+*/ -+PROTO (cntb_vl2, uint64_t, ()) { return svcntb_pat (SV_VL2); } -+ -+/* -+** cntb_vl3: -+** mov x0, #?3 -+** ret -+*/ -+PROTO (cntb_vl3, uint64_t, ()) { return svcntb_pat (SV_VL3); } -+ -+/* -+** cntb_vl4: -+** mov x0, #?4 -+** ret -+*/ -+PROTO (cntb_vl4, uint64_t, ()) { return svcntb_pat (SV_VL4); } -+ -+/* -+** cntb_vl5: -+** mov x0, #?5 -+** ret -+*/ -+PROTO (cntb_vl5, uint64_t, ()) { return svcntb_pat (SV_VL5); } -+ -+/* -+** cntb_vl6: -+** mov x0, #?6 -+** ret -+*/ -+PROTO (cntb_vl6, uint64_t, ()) { return svcntb_pat (SV_VL6); } -+ -+/* -+** cntb_vl7: -+** mov x0, #?7 -+** ret -+*/ -+PROTO (cntb_vl7, uint64_t, ()) { return svcntb_pat (SV_VL7); } -+ -+/* -+** cntb_vl8: -+** mov x0, #?8 -+** ret -+*/ -+PROTO (cntb_vl8, uint64_t, ()) { return svcntb_pat (SV_VL8); } -+ -+/* -+** cntb_vl16: -+** mov x0, #?16 -+** ret -+*/ -+PROTO (cntb_vl16, uint64_t, ()) { return svcntb_pat (SV_VL16); } -+ -+/* -+** cntb_vl32: -+** cntb x0, vl32 -+** ret -+*/ -+PROTO (cntb_vl32, uint64_t, ()) { return svcntb_pat (SV_VL32); } -+ -+/* -+** cntb_vl64: -+** cntb x0, vl64 -+** ret -+*/ -+PROTO (cntb_vl64, uint64_t, ()) { return svcntb_pat (SV_VL64); } -+ -+/* -+** cntb_vl128: -+** cntb x0, vl128 -+** ret -+*/ -+PROTO (cntb_vl128, uint64_t, ()) { return svcntb_pat (SV_VL128); } -+ -+/* -+** cntb_vl256: -+** cntb x0, vl256 -+** ret -+*/ -+PROTO (cntb_vl256, uint64_t, ()) { return svcntb_pat (SV_VL256); } -+ -+/* -+** cntb_mul3: -+** cntb x0, mul3 -+** ret -+*/ -+PROTO (cntb_mul3, uint64_t, ()) { return svcntb_pat (SV_MUL3); } -+ -+/* -+** cntb_mul4: -+** cntb x0, mul4 -+** ret -+*/ -+PROTO (cntb_mul4, uint64_t, ()) { return svcntb_pat (SV_MUL4); } -+ -+/* -+** cntb_all: -+** cntb x0 -+** ret -+*/ -+PROTO (cntb_all, uint64_t, ()) { return svcntb_pat (SV_ALL); } -+ -+/* -+** incb_32_pow2: -+** incb x0, pow2 -+** ret -+*/ -+PROTO (incb_32_pow2, uint32_t, (uint32_t w0)) { return w0 + svcntb_pat (SV_POW2); } -+ -+/* -+** incb_32_vl1: -+** add w0, w0, #?1 -+** ret -+*/ -+PROTO (incb_32_vl1, uint32_t, (uint32_t w0)) { return w0 + svcntb_pat (SV_VL1); } -+ -+/* -+** incb_32_vl2: -+** add w0, w0, #?2 -+** ret -+*/ -+PROTO (incb_32_vl2, uint32_t, (uint32_t w0)) { return w0 + svcntb_pat (SV_VL2); } -+ -+/* -+** incb_32_vl3: -+** add w0, w0, #?3 -+** ret -+*/ -+PROTO (incb_32_vl3, uint32_t, (uint32_t w0)) { return w0 + svcntb_pat (SV_VL3); } -+ -+/* -+** incb_32_vl4: -+** add w0, w0, #?4 -+** ret -+*/ -+PROTO (incb_32_vl4, uint32_t, (uint32_t w0)) { return w0 + svcntb_pat (SV_VL4); } -+ -+/* -+** incb_32_vl5: -+** add w0, w0, #?5 -+** ret -+*/ -+PROTO (incb_32_vl5, uint32_t, (uint32_t w0)) { return w0 + svcntb_pat (SV_VL5); } -+ -+/* -+** incb_32_vl6: -+** add w0, w0, #?6 -+** ret -+*/ -+PROTO (incb_32_vl6, uint32_t, (uint32_t w0)) { return w0 + svcntb_pat (SV_VL6); } -+ -+/* -+** incb_32_vl7: -+** add w0, w0, #?7 -+** ret -+*/ -+PROTO (incb_32_vl7, uint32_t, (uint32_t w0)) { return w0 + svcntb_pat (SV_VL7); } -+ -+/* -+** incb_32_vl8: -+** add w0, w0, #?8 -+** ret -+*/ -+PROTO (incb_32_vl8, uint32_t, (uint32_t w0)) { return w0 + svcntb_pat (SV_VL8); } -+ -+/* -+** incb_32_vl16: -+** add w0, w0, #?16 -+** ret -+*/ -+PROTO (incb_32_vl16, uint32_t, (uint32_t w0)) { return w0 + svcntb_pat (SV_VL16); } -+ -+/* -+** incb_32_vl32: -+** incb x0, vl32 -+** ret -+*/ -+PROTO (incb_32_vl32, uint32_t, (uint32_t w0)) { return w0 + svcntb_pat (SV_VL32); } -+ -+/* -+** incb_32_vl64: -+** incb x0, vl64 -+** ret -+*/ -+PROTO (incb_32_vl64, uint32_t, (uint32_t w0)) { return w0 + svcntb_pat (SV_VL64); } -+ -+/* -+** incb_32_vl128: -+** incb x0, vl128 -+** ret -+*/ -+PROTO (incb_32_vl128, uint32_t, (uint32_t w0)) { return w0 + svcntb_pat (SV_VL128); } -+ -+/* -+** incb_32_vl256: -+** incb x0, vl256 -+** ret -+*/ -+PROTO (incb_32_vl256, uint32_t, (uint32_t w0)) { return w0 + svcntb_pat (SV_VL256); } -+ -+/* -+** incb_32_mul3: -+** incb x0, mul3 -+** ret -+*/ -+PROTO (incb_32_mul3, uint32_t, (uint32_t w0)) { return w0 + svcntb_pat (SV_MUL3); } -+ -+/* -+** incb_32_mul4: -+** incb x0, mul4 -+** ret -+*/ -+PROTO (incb_32_mul4, uint32_t, (uint32_t w0)) { return w0 + svcntb_pat (SV_MUL4); } -+ -+/* -+** incb_32_all: -+** incb x0 -+** ret -+*/ -+PROTO (incb_32_all, uint32_t, (uint32_t w0)) { return w0 + svcntb_pat (SV_ALL); } -+ -+/* -+** incb_64_pow2: -+** incb x0, pow2 -+** ret -+*/ -+PROTO (incb_64_pow2, uint64_t, (uint64_t x0)) { return x0 + svcntb_pat (SV_POW2); } -+ -+/* -+** incb_64_all: -+** incb x0 -+** ret -+*/ -+PROTO (incb_64_all, uint64_t, (uint64_t x0)) { return x0 + svcntb_pat (SV_ALL); } -+ -+/* -+** decb_32_pow2: -+** decb x0, pow2 -+** ret -+*/ -+PROTO (decb_32_pow2, uint32_t, (uint32_t w0)) { return w0 - svcntb_pat (SV_POW2); } -+ -+/* -+** decb_32_vl1: -+** sub w0, w0, #?1 -+** ret -+*/ -+PROTO (decb_32_vl1, uint32_t, (uint32_t w0)) { return w0 - svcntb_pat (SV_VL1); } -+ -+/* -+** decb_32_vl2: -+** sub w0, w0, #?2 -+** ret -+*/ -+PROTO (decb_32_vl2, uint32_t, (uint32_t w0)) { return w0 - svcntb_pat (SV_VL2); } -+ -+/* -+** decb_32_vl3: -+** sub w0, w0, #?3 -+** ret -+*/ -+PROTO (decb_32_vl3, uint32_t, (uint32_t w0)) { return w0 - svcntb_pat (SV_VL3); } -+ -+/* -+** decb_32_vl4: -+** sub w0, w0, #?4 -+** ret -+*/ -+PROTO (decb_32_vl4, uint32_t, (uint32_t w0)) { return w0 - svcntb_pat (SV_VL4); } -+ -+/* -+** decb_32_vl5: -+** sub w0, w0, #?5 -+** ret -+*/ -+PROTO (decb_32_vl5, uint32_t, (uint32_t w0)) { return w0 - svcntb_pat (SV_VL5); } -+ -+/* -+** decb_32_vl6: -+** sub w0, w0, #?6 -+** ret -+*/ -+PROTO (decb_32_vl6, uint32_t, (uint32_t w0)) { return w0 - svcntb_pat (SV_VL6); } -+ -+/* -+** decb_32_vl7: -+** sub w0, w0, #?7 -+** ret -+*/ -+PROTO (decb_32_vl7, uint32_t, (uint32_t w0)) { return w0 - svcntb_pat (SV_VL7); } -+ -+/* -+** decb_32_vl8: -+** sub w0, w0, #?8 -+** ret -+*/ -+PROTO (decb_32_vl8, uint32_t, (uint32_t w0)) { return w0 - svcntb_pat (SV_VL8); } -+ -+/* -+** decb_32_vl16: -+** sub w0, w0, #?16 -+** ret -+*/ -+PROTO (decb_32_vl16, uint32_t, (uint32_t w0)) { return w0 - svcntb_pat (SV_VL16); } -+ -+/* -+** decb_32_vl32: -+** decb x0, vl32 -+** ret -+*/ -+PROTO (decb_32_vl32, uint32_t, (uint32_t w0)) { return w0 - svcntb_pat (SV_VL32); } -+ -+/* -+** decb_32_vl64: -+** decb x0, vl64 -+** ret -+*/ -+PROTO (decb_32_vl64, uint32_t, (uint32_t w0)) { return w0 - svcntb_pat (SV_VL64); } -+ -+/* -+** decb_32_vl128: -+** decb x0, vl128 -+** ret -+*/ -+PROTO (decb_32_vl128, uint32_t, (uint32_t w0)) { return w0 - svcntb_pat (SV_VL128); } -+ -+/* -+** decb_32_vl256: -+** decb x0, vl256 -+** ret -+*/ -+PROTO (decb_32_vl256, uint32_t, (uint32_t w0)) { return w0 - svcntb_pat (SV_VL256); } -+ -+/* -+** decb_32_mul3: -+** decb x0, mul3 -+** ret -+*/ -+PROTO (decb_32_mul3, uint32_t, (uint32_t w0)) { return w0 - svcntb_pat (SV_MUL3); } -+ -+/* -+** decb_32_mul4: -+** decb x0, mul4 -+** ret -+*/ -+PROTO (decb_32_mul4, uint32_t, (uint32_t w0)) { return w0 - svcntb_pat (SV_MUL4); } -+ -+/* -+** decb_32_all: -+** decb x0 -+** ret -+*/ -+PROTO (decb_32_all, uint32_t, (uint32_t w0)) { return w0 - svcntb_pat (SV_ALL); } -+ -+/* -+** decb_64_pow2: -+** decb x0, pow2 -+** ret -+*/ -+PROTO (decb_64_pow2, uint64_t, (uint64_t x0)) { return x0 - svcntb_pat (SV_POW2); } -+ -+/* -+** decb_64_all: -+** decb x0 -+** ret -+*/ -+PROTO (decb_64_all, uint64_t, (uint64_t x0)) { return x0 - svcntb_pat (SV_ALL); } -+ -+/* -+** incb_s8_pow2_z0: -+** cntb x([0-9]+), pow2 -+** mov (z[0-9]+\.b), w\1 -+** add z0\.b, (z0\.b, \2|\2, z0\.b) -+** ret -+*/ -+TEST_UNIFORM_Z (incb_s8_pow2_z0, svint8_t, -+ z0 = svadd_n_s8_x (svptrue_b8 (), z0, svcntb_pat (SV_POW2)), -+ z0 = svadd_x (svptrue_b8 (), z0, svcntb_pat (SV_POW2))); -+ -+/* -+** incb_s8_pow2_z1: -+** cntb x([0-9]+), pow2 -+** mov (z[0-9]+\.b), w\1 -+** add z0\.b, (z1\.b, \2|\2, z1\.b) -+** ret -+*/ -+TEST_UNIFORM_Z (incb_s8_pow2_z1, svint8_t, -+ z0 = svadd_n_s8_x (svptrue_b8 (), z1, svcntb_pat (SV_POW2)), -+ z0 = svadd_x (svptrue_b8 (), z1, svcntb_pat (SV_POW2))); -+ -+/* -+** decb_s8_pow2_z0: -+** cntb x([0-9]+), pow2 -+** mov (z[0-9]+\.b), w\1 -+** sub z0\.b, z0\.b, \2 -+** ret -+*/ -+TEST_UNIFORM_Z (decb_s8_pow2_z0, svint8_t, -+ z0 = svsub_n_s8_x (svptrue_b8 (), z0, svcntb_pat (SV_POW2)), -+ z0 = svsub_x (svptrue_b8 (), z0, svcntb_pat (SV_POW2))); -+ -+/* -+** decb_s8_pow2_z1: -+** cntb x([0-9]+), pow2 -+** mov (z[0-9]+\.b), w\1 -+** sub z0\.b, z1\.b, \2 -+** ret -+*/ -+TEST_UNIFORM_Z (decb_s8_pow2_z1, svint8_t, -+ z0 = svsub_n_s8_x (svptrue_b8 (), z1, svcntb_pat (SV_POW2)), -+ z0 = svsub_x (svptrue_b8 (), z1, svcntb_pat (SV_POW2))); -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntd.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntd.c -new file mode 100644 -index 000000000..0d0ed4849 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntd.c -@@ -0,0 +1,278 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cntd_1: -+** cntd x0 -+** ret -+*/ -+PROTO (cntd_1, uint64_t, ()) { return svcntd (); } -+ -+/* -+** cntd_2: -+** cntw x0 -+** ret -+*/ -+PROTO (cntd_2, uint64_t, ()) { return svcntd () * 2; } -+ -+/* -+** cntd_3: -+** cntd x0, all, mul #3 -+** ret -+*/ -+PROTO (cntd_3, uint64_t, ()) { return svcntd () * 3; } -+ -+/* -+** cntd_4: -+** cnth x0 -+** ret -+*/ -+PROTO (cntd_4, uint64_t, ()) { return svcntd () * 4; } -+ -+/* -+** cntd_8: -+** cntb x0 -+** ret -+*/ -+PROTO (cntd_8, uint64_t, ()) { return svcntd () * 8; } -+ -+/* -+** cntd_15: -+** cntd x0, all, mul #15 -+** ret -+*/ -+PROTO (cntd_15, uint64_t, ()) { return svcntd () * 15; } -+ -+/* -+** cntd_16: -+** cntb x0, all, mul #2 -+** ret -+*/ -+PROTO (cntd_16, uint64_t, ()) { return svcntd () * 16; } -+ -+/* Other sequences would be OK. */ -+/* -+** cntd_17: -+** cntb x0, all, mul #2 -+** incd x0 -+** ret -+*/ -+PROTO (cntd_17, uint64_t, ()) { return svcntd () * 17; } -+ -+/* -+** cntd_32: -+** cntb x0, all, mul #4 -+** ret -+*/ -+PROTO (cntd_32, uint64_t, ()) { return svcntd () * 32; } -+ -+/* -+** cntd_64: -+** cntb x0, all, mul #8 -+** ret -+*/ -+PROTO (cntd_64, uint64_t, ()) { return svcntd () * 64; } -+ -+/* -+** cntd_128: -+** cntb x0, all, mul #16 -+** ret -+*/ -+PROTO (cntd_128, uint64_t, ()) { return svcntd () * 128; } -+ -+/* -+** cntd_m1: -+** cntd (x[0-9]+) -+** neg x0, \1 -+** ret -+*/ -+PROTO (cntd_m1, uint64_t, ()) { return -svcntd (); } -+ -+/* -+** cntd_m13: -+** cntd (x[0-9]+), all, mul #13 -+** neg x0, \1 -+** ret -+*/ -+PROTO (cntd_m13, uint64_t, ()) { return -svcntd () * 13; } -+ -+/* -+** cntd_m15: -+** cntd (x[0-9]+), all, mul #15 -+** neg x0, \1 -+** ret -+*/ -+PROTO (cntd_m15, uint64_t, ()) { return -svcntd () * 15; } -+ -+/* -+** cntd_m16: -+** cntb (x[0-9]+), all, mul #2 -+** neg x0, \1 -+** ret -+*/ -+PROTO (cntd_m16, uint64_t, ()) { return -svcntd () * 16; } -+ -+/* Other sequences would be OK. */ -+/* -+** cntd_m17: -+** cntb x0, all, mul #2 -+** incd x0 -+** neg x0, x0 -+** ret -+*/ -+PROTO (cntd_m17, uint64_t, ()) { return -svcntd () * 17; } -+ -+/* -+** incd_1: -+** incd x0 -+** ret -+*/ -+PROTO (incd_1, uint64_t, (uint64_t x0)) { return x0 + svcntd (); } -+ -+/* -+** incd_2: -+** incw x0 -+** ret -+*/ -+PROTO (incd_2, uint64_t, (uint64_t x0)) { return x0 + svcntd () * 2; } -+ -+/* -+** incd_3: -+** incd x0, all, mul #3 -+** ret -+*/ -+PROTO (incd_3, uint64_t, (uint64_t x0)) { return x0 + svcntd () * 3; } -+ -+/* -+** incd_4: -+** inch x0 -+** ret -+*/ -+PROTO (incd_4, uint64_t, (uint64_t x0)) { return x0 + svcntd () * 4; } -+ -+/* -+** incd_7: -+** incd x0, all, mul #7 -+** ret -+*/ -+PROTO (incd_7, uint64_t, (uint64_t x0)) { return x0 + svcntd () * 7; } -+ -+/* -+** incd_8: -+** incb x0 -+** ret -+*/ -+PROTO (incd_8, uint64_t, (uint64_t x0)) { return x0 + svcntd () * 8; } -+ -+/* -+** incd_9: -+** incd x0, all, mul #9 -+** ret -+*/ -+PROTO (incd_9, uint64_t, (uint64_t x0)) { return x0 + svcntd () * 9; } -+ -+/* -+** incd_15: -+** incd x0, all, mul #15 -+** ret -+*/ -+PROTO (incd_15, uint64_t, (uint64_t x0)) { return x0 + svcntd () * 15; } -+ -+/* -+** incd_16: -+** incb x0, all, mul #2 -+** ret -+*/ -+PROTO (incd_16, uint64_t, (uint64_t x0)) { return x0 + svcntd () * 16; } -+ -+/* -+** incd_18: -+** incw x0, all, mul #9 -+** ret -+*/ -+PROTO (incd_18, uint64_t, (uint64_t x0)) { return x0 + svcntd () * 18; } -+ -+/* -+** incd_30: -+** incw x0, all, mul #15 -+** ret -+*/ -+PROTO (incd_30, uint64_t, (uint64_t x0)) { return x0 + svcntd () * 30; } -+ -+/* -+** decd_1: -+** decd x0 -+** ret -+*/ -+PROTO (decd_1, uint64_t, (uint64_t x0)) { return x0 - svcntd (); } -+ -+/* -+** decd_2: -+** decw x0 -+** ret -+*/ -+PROTO (decd_2, uint64_t, (uint64_t x0)) { return x0 - svcntd () * 2; } -+ -+/* -+** decd_3: -+** decd x0, all, mul #3 -+** ret -+*/ -+PROTO (decd_3, uint64_t, (uint64_t x0)) { return x0 - svcntd () * 3; } -+ -+/* -+** decd_4: -+** dech x0 -+** ret -+*/ -+PROTO (decd_4, uint64_t, (uint64_t x0)) { return x0 - svcntd () * 4; } -+ -+/* -+** decd_7: -+** decd x0, all, mul #7 -+** ret -+*/ -+PROTO (decd_7, uint64_t, (uint64_t x0)) { return x0 - svcntd () * 7; } -+ -+/* -+** decd_8: -+** decb x0 -+** ret -+*/ -+PROTO (decd_8, uint64_t, (uint64_t x0)) { return x0 - svcntd () * 8; } -+ -+/* -+** decd_9: -+** decd x0, all, mul #9 -+** ret -+*/ -+PROTO (decd_9, uint64_t, (uint64_t x0)) { return x0 - svcntd () * 9; } -+ -+/* -+** decd_15: -+** decd x0, all, mul #15 -+** ret -+*/ -+PROTO (decd_15, uint64_t, (uint64_t x0)) { return x0 - svcntd () * 15; } -+ -+/* -+** decd_16: -+** decb x0, all, mul #2 -+** ret -+*/ -+PROTO (decd_16, uint64_t, (uint64_t x0)) { return x0 - svcntd () * 16; } -+ -+/* -+** decd_18: -+** decw x0, all, mul #9 -+** ret -+*/ -+PROTO (decd_18, uint64_t, (uint64_t x0)) { return x0 - svcntd () * 18; } -+ -+/* -+** decd_30: -+** decw x0, all, mul #15 -+** ret -+*/ -+PROTO (decd_30, uint64_t, (uint64_t x0)) { return x0 - svcntd () * 30; } -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntd_pat.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntd_pat.c -new file mode 100644 -index 000000000..31ecde7ae ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntd_pat.c -@@ -0,0 +1,426 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cntd_pow2: -+** cntd x0, pow2 -+** ret -+*/ -+PROTO (cntd_pow2, uint64_t, ()) { return svcntd_pat (SV_POW2); } -+ -+/* -+** cntd_vl1: -+** mov x0, #?1 -+** ret -+*/ -+PROTO (cntd_vl1, uint64_t, ()) { return svcntd_pat (SV_VL1); } -+ -+/* -+** cntd_vl2: -+** mov x0, #?2 -+** ret -+*/ -+PROTO (cntd_vl2, uint64_t, ()) { return svcntd_pat (SV_VL2); } -+ -+/* -+** cntd_vl3: -+** cntd x0, vl3 -+** ret -+*/ -+PROTO (cntd_vl3, uint64_t, ()) { return svcntd_pat (SV_VL3); } -+ -+/* -+** cntd_vl4: -+** cntd x0, vl4 -+** ret -+*/ -+PROTO (cntd_vl4, uint64_t, ()) { return svcntd_pat (SV_VL4); } -+ -+/* -+** cntd_vl5: -+** cntd x0, vl5 -+** ret -+*/ -+PROTO (cntd_vl5, uint64_t, ()) { return svcntd_pat (SV_VL5); } -+ -+/* -+** cntd_vl6: -+** cntd x0, vl6 -+** ret -+*/ -+PROTO (cntd_vl6, uint64_t, ()) { return svcntd_pat (SV_VL6); } -+ -+/* -+** cntd_vl7: -+** cntd x0, vl7 -+** ret -+*/ -+PROTO (cntd_vl7, uint64_t, ()) { return svcntd_pat (SV_VL7); } -+ -+/* -+** cntd_vl8: -+** cntd x0, vl8 -+** ret -+*/ -+PROTO (cntd_vl8, uint64_t, ()) { return svcntd_pat (SV_VL8); } -+ -+/* -+** cntd_vl16: -+** cntd x0, vl16 -+** ret -+*/ -+PROTO (cntd_vl16, uint64_t, ()) { return svcntd_pat (SV_VL16); } -+ -+/* -+** cntd_vl32: -+** cntd x0, vl32 -+** ret -+*/ -+PROTO (cntd_vl32, uint64_t, ()) { return svcntd_pat (SV_VL32); } -+ -+/* -+** cntd_vl64: -+** cntd x0, vl64 -+** ret -+*/ -+PROTO (cntd_vl64, uint64_t, ()) { return svcntd_pat (SV_VL64); } -+ -+/* -+** cntd_vl128: -+** cntd x0, vl128 -+** ret -+*/ -+PROTO (cntd_vl128, uint64_t, ()) { return svcntd_pat (SV_VL128); } -+ -+/* -+** cntd_vl256: -+** cntd x0, vl256 -+** ret -+*/ -+PROTO (cntd_vl256, uint64_t, ()) { return svcntd_pat (SV_VL256); } -+ -+/* -+** cntd_mul3: -+** cntd x0, mul3 -+** ret -+*/ -+PROTO (cntd_mul3, uint64_t, ()) { return svcntd_pat (SV_MUL3); } -+ -+/* -+** cntd_mul4: -+** cntd x0, mul4 -+** ret -+*/ -+PROTO (cntd_mul4, uint64_t, ()) { return svcntd_pat (SV_MUL4); } -+ -+/* -+** cntd_all: -+** cntd x0 -+** ret -+*/ -+PROTO (cntd_all, uint64_t, ()) { return svcntd_pat (SV_ALL); } -+ -+/* -+** incd_32_pow2: -+** incd x0, pow2 -+** ret -+*/ -+PROTO (incd_32_pow2, uint32_t, (uint32_t w0)) { return w0 + svcntd_pat (SV_POW2); } -+ -+/* -+** incd_32_vl1: -+** add w0, w0, #?1 -+** ret -+*/ -+PROTO (incd_32_vl1, uint32_t, (uint32_t w0)) { return w0 + svcntd_pat (SV_VL1); } -+ -+/* -+** incd_32_vl2: -+** add w0, w0, #?2 -+** ret -+*/ -+PROTO (incd_32_vl2, uint32_t, (uint32_t w0)) { return w0 + svcntd_pat (SV_VL2); } -+ -+/* -+** incd_32_vl3: -+** incd x0, vl3 -+** ret -+*/ -+PROTO (incd_32_vl3, uint32_t, (uint32_t w0)) { return w0 + svcntd_pat (SV_VL3); } -+ -+/* -+** incd_32_vl4: -+** incd x0, vl4 -+** ret -+*/ -+PROTO (incd_32_vl4, uint32_t, (uint32_t w0)) { return w0 + svcntd_pat (SV_VL4); } -+ -+/* -+** incd_32_vl5: -+** incd x0, vl5 -+** ret -+*/ -+PROTO (incd_32_vl5, uint32_t, (uint32_t w0)) { return w0 + svcntd_pat (SV_VL5); } -+ -+/* -+** incd_32_vl6: -+** incd x0, vl6 -+** ret -+*/ -+PROTO (incd_32_vl6, uint32_t, (uint32_t w0)) { return w0 + svcntd_pat (SV_VL6); } -+ -+/* -+** incd_32_vl7: -+** incd x0, vl7 -+** ret -+*/ -+PROTO (incd_32_vl7, uint32_t, (uint32_t w0)) { return w0 + svcntd_pat (SV_VL7); } -+ -+/* -+** incd_32_vl8: -+** incd x0, vl8 -+** ret -+*/ -+PROTO (incd_32_vl8, uint32_t, (uint32_t w0)) { return w0 + svcntd_pat (SV_VL8); } -+ -+/* -+** incd_32_vl16: -+** incd x0, vl16 -+** ret -+*/ -+PROTO (incd_32_vl16, uint32_t, (uint32_t w0)) { return w0 + svcntd_pat (SV_VL16); } -+ -+/* -+** incd_32_vl32: -+** incd x0, vl32 -+** ret -+*/ -+PROTO (incd_32_vl32, uint32_t, (uint32_t w0)) { return w0 + svcntd_pat (SV_VL32); } -+ -+/* -+** incd_32_vl64: -+** incd x0, vl64 -+** ret -+*/ -+PROTO (incd_32_vl64, uint32_t, (uint32_t w0)) { return w0 + svcntd_pat (SV_VL64); } -+ -+/* -+** incd_32_vl128: -+** incd x0, vl128 -+** ret -+*/ -+PROTO (incd_32_vl128, uint32_t, (uint32_t w0)) { return w0 + svcntd_pat (SV_VL128); } -+ -+/* -+** incd_32_vl256: -+** incd x0, vl256 -+** ret -+*/ -+PROTO (incd_32_vl256, uint32_t, (uint32_t w0)) { return w0 + svcntd_pat (SV_VL256); } -+ -+/* -+** incd_32_mul3: -+** incd x0, mul3 -+** ret -+*/ -+PROTO (incd_32_mul3, uint32_t, (uint32_t w0)) { return w0 + svcntd_pat (SV_MUL3); } -+ -+/* -+** incd_32_mul4: -+** incd x0, mul4 -+** ret -+*/ -+PROTO (incd_32_mul4, uint32_t, (uint32_t w0)) { return w0 + svcntd_pat (SV_MUL4); } -+ -+/* -+** incd_32_all: -+** incd x0 -+** ret -+*/ -+PROTO (incd_32_all, uint32_t, (uint32_t w0)) { return w0 + svcntd_pat (SV_ALL); } -+ -+/* -+** incd_64_pow2: -+** incd x0, pow2 -+** ret -+*/ -+PROTO (incd_64_pow2, uint64_t, (uint64_t x0)) { return x0 + svcntd_pat (SV_POW2); } -+ -+/* -+** incd_64_all: -+** incd x0 -+** ret -+*/ -+PROTO (incd_64_all, uint64_t, (uint64_t x0)) { return x0 + svcntd_pat (SV_ALL); } -+ -+/* -+** decd_32_pow2: -+** decd x0, pow2 -+** ret -+*/ -+PROTO (decd_32_pow2, uint32_t, (uint32_t w0)) { return w0 - svcntd_pat (SV_POW2); } -+ -+/* -+** decd_32_vl1: -+** sub w0, w0, #?1 -+** ret -+*/ -+PROTO (decd_32_vl1, uint32_t, (uint32_t w0)) { return w0 - svcntd_pat (SV_VL1); } -+ -+/* -+** decd_32_vl2: -+** sub w0, w0, #?2 -+** ret -+*/ -+PROTO (decd_32_vl2, uint32_t, (uint32_t w0)) { return w0 - svcntd_pat (SV_VL2); } -+ -+/* -+** decd_32_vl3: -+** decd x0, vl3 -+** ret -+*/ -+PROTO (decd_32_vl3, uint32_t, (uint32_t w0)) { return w0 - svcntd_pat (SV_VL3); } -+ -+/* -+** decd_32_vl4: -+** decd x0, vl4 -+** ret -+*/ -+PROTO (decd_32_vl4, uint32_t, (uint32_t w0)) { return w0 - svcntd_pat (SV_VL4); } -+ -+/* -+** decd_32_vl5: -+** decd x0, vl5 -+** ret -+*/ -+PROTO (decd_32_vl5, uint32_t, (uint32_t w0)) { return w0 - svcntd_pat (SV_VL5); } -+ -+/* -+** decd_32_vl6: -+** decd x0, vl6 -+** ret -+*/ -+PROTO (decd_32_vl6, uint32_t, (uint32_t w0)) { return w0 - svcntd_pat (SV_VL6); } -+ -+/* -+** decd_32_vl7: -+** decd x0, vl7 -+** ret -+*/ -+PROTO (decd_32_vl7, uint32_t, (uint32_t w0)) { return w0 - svcntd_pat (SV_VL7); } -+ -+/* -+** decd_32_vl8: -+** decd x0, vl8 -+** ret -+*/ -+PROTO (decd_32_vl8, uint32_t, (uint32_t w0)) { return w0 - svcntd_pat (SV_VL8); } -+ -+/* -+** decd_32_vl16: -+** decd x0, vl16 -+** ret -+*/ -+PROTO (decd_32_vl16, uint32_t, (uint32_t w0)) { return w0 - svcntd_pat (SV_VL16); } -+ -+/* -+** decd_32_vl32: -+** decd x0, vl32 -+** ret -+*/ -+PROTO (decd_32_vl32, uint32_t, (uint32_t w0)) { return w0 - svcntd_pat (SV_VL32); } -+ -+/* -+** decd_32_vl64: -+** decd x0, vl64 -+** ret -+*/ -+PROTO (decd_32_vl64, uint32_t, (uint32_t w0)) { return w0 - svcntd_pat (SV_VL64); } -+ -+/* -+** decd_32_vl128: -+** decd x0, vl128 -+** ret -+*/ -+PROTO (decd_32_vl128, uint32_t, (uint32_t w0)) { return w0 - svcntd_pat (SV_VL128); } -+ -+/* -+** decd_32_vl256: -+** decd x0, vl256 -+** ret -+*/ -+PROTO (decd_32_vl256, uint32_t, (uint32_t w0)) { return w0 - svcntd_pat (SV_VL256); } -+ -+/* -+** decd_32_mul3: -+** decd x0, mul3 -+** ret -+*/ -+PROTO (decd_32_mul3, uint32_t, (uint32_t w0)) { return w0 - svcntd_pat (SV_MUL3); } -+ -+/* -+** decd_32_mul4: -+** decd x0, mul4 -+** ret -+*/ -+PROTO (decd_32_mul4, uint32_t, (uint32_t w0)) { return w0 - svcntd_pat (SV_MUL4); } -+ -+/* -+** decd_32_all: -+** decd x0 -+** ret -+*/ -+PROTO (decd_32_all, uint32_t, (uint32_t w0)) { return w0 - svcntd_pat (SV_ALL); } -+ -+/* -+** decd_64_pow2: -+** decd x0, pow2 -+** ret -+*/ -+PROTO (decd_64_pow2, uint64_t, (uint64_t x0)) { return x0 - svcntd_pat (SV_POW2); } -+ -+/* -+** decd_64_all: -+** decd x0 -+** ret -+*/ -+PROTO (decd_64_all, uint64_t, (uint64_t x0)) { return x0 - svcntd_pat (SV_ALL); } -+ -+/* -+** incd_s64_pow2_z0: -+** incd z0\.d, pow2 -+** ret -+*/ -+TEST_UNIFORM_Z (incd_s64_pow2_z0, svint64_t, -+ z0 = svadd_n_s64_x (svptrue_b64 (), z0, svcntd_pat (SV_POW2)), -+ z0 = svadd_x (svptrue_b64 (), z0, svcntd_pat (SV_POW2))); -+ -+/* -+** incd_s64_pow2_z1: -+** movprfx z0, z1 -+** incd z0\.d, pow2 -+** ret -+*/ -+TEST_UNIFORM_Z (incd_s64_pow2_z1, svint64_t, -+ z0 = svadd_n_s64_x (svptrue_b64 (), z1, svcntd_pat (SV_POW2)), -+ z0 = svadd_x (svptrue_b64 (), z1, svcntd_pat (SV_POW2))); -+ -+/* -+** decd_s64_pow2_z0: -+** decd z0\.d, pow2 -+** ret -+*/ -+TEST_UNIFORM_Z (decd_s64_pow2_z0, svint64_t, -+ z0 = svsub_n_s64_x (svptrue_b64 (), z0, svcntd_pat (SV_POW2)), -+ z0 = svsub_x (svptrue_b64 (), z0, svcntd_pat (SV_POW2))); -+ -+/* -+** decd_s64_pow2_z1: -+** movprfx z0, z1 -+** decd z0\.d, pow2 -+** ret -+*/ -+TEST_UNIFORM_Z (decd_s64_pow2_z1, svint64_t, -+ z0 = svsub_n_s64_x (svptrue_b64 (), z1, svcntd_pat (SV_POW2)), -+ z0 = svsub_x (svptrue_b64 (), z1, svcntd_pat (SV_POW2))); -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnth.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnth.c -new file mode 100644 -index 000000000..c29930f15 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnth.c -@@ -0,0 +1,280 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cnth_1: -+** cnth x0 -+** ret -+*/ -+PROTO (cnth_1, uint64_t, ()) { return svcnth (); } -+ -+/* -+** cnth_2: -+** cntb x0 -+** ret -+*/ -+PROTO (cnth_2, uint64_t, ()) { return svcnth () * 2; } -+ -+/* -+** cnth_3: -+** cnth x0, all, mul #3 -+** ret -+*/ -+PROTO (cnth_3, uint64_t, ()) { return svcnth () * 3; } -+ -+/* -+** cnth_4: -+** cntb x0, all, mul #2 -+** ret -+*/ -+PROTO (cnth_4, uint64_t, ()) { return svcnth () * 4; } -+ -+/* -+** cnth_8: -+** cntb x0, all, mul #4 -+** ret -+*/ -+PROTO (cnth_8, uint64_t, ()) { return svcnth () * 8; } -+ -+/* -+** cnth_15: -+** cnth x0, all, mul #15 -+** ret -+*/ -+PROTO (cnth_15, uint64_t, ()) { return svcnth () * 15; } -+ -+/* -+** cnth_16: -+** cntb x0, all, mul #8 -+** ret -+*/ -+PROTO (cnth_16, uint64_t, ()) { return svcnth () * 16; } -+ -+/* Other sequences would be OK. */ -+/* -+** cnth_17: -+** cntb x0, all, mul #8 -+** inch x0 -+** ret -+*/ -+PROTO (cnth_17, uint64_t, ()) { return svcnth () * 17; } -+ -+/* -+** cnth_32: -+** cntb x0, all, mul #16 -+** ret -+*/ -+PROTO (cnth_32, uint64_t, ()) { return svcnth () * 32; } -+ -+/* -+** cnth_64: -+** cntd (x[0-9]+) -+** lsl x0, \1, 8 -+** ret -+*/ -+PROTO (cnth_64, uint64_t, ()) { return svcnth () * 64; } -+ -+/* -+** cnth_128: -+** cntd (x[0-9]+) -+** lsl x0, \1, 9 -+** ret -+*/ -+PROTO (cnth_128, uint64_t, ()) { return svcnth () * 128; } -+ -+/* -+** cnth_m1: -+** cnth (x[0-9]+) -+** neg x0, \1 -+** ret -+*/ -+PROTO (cnth_m1, uint64_t, ()) { return -svcnth (); } -+ -+/* -+** cnth_m13: -+** cnth (x[0-9]+), all, mul #13 -+** neg x0, \1 -+** ret -+*/ -+PROTO (cnth_m13, uint64_t, ()) { return -svcnth () * 13; } -+ -+/* -+** cnth_m15: -+** cnth (x[0-9]+), all, mul #15 -+** neg x0, \1 -+** ret -+*/ -+PROTO (cnth_m15, uint64_t, ()) { return -svcnth () * 15; } -+ -+/* -+** cnth_m16: -+** cntb (x[0-9]+), all, mul #8 -+** neg x0, \1 -+** ret -+*/ -+PROTO (cnth_m16, uint64_t, ()) { return -svcnth () * 16; } -+ -+/* Other sequences would be OK. */ -+/* -+** cnth_m17: -+** cntb x0, all, mul #8 -+** inch x0 -+** neg x0, x0 -+** ret -+*/ -+PROTO (cnth_m17, uint64_t, ()) { return -svcnth () * 17; } -+ -+/* -+** inch_1: -+** inch x0 -+** ret -+*/ -+PROTO (inch_1, uint64_t, (uint64_t x0)) { return x0 + svcnth (); } -+ -+/* -+** inch_2: -+** incb x0 -+** ret -+*/ -+PROTO (inch_2, uint64_t, (uint64_t x0)) { return x0 + svcnth () * 2; } -+ -+/* -+** inch_3: -+** inch x0, all, mul #3 -+** ret -+*/ -+PROTO (inch_3, uint64_t, (uint64_t x0)) { return x0 + svcnth () * 3; } -+ -+/* -+** inch_4: -+** incb x0, all, mul #2 -+** ret -+*/ -+PROTO (inch_4, uint64_t, (uint64_t x0)) { return x0 + svcnth () * 4; } -+ -+/* -+** inch_7: -+** inch x0, all, mul #7 -+** ret -+*/ -+PROTO (inch_7, uint64_t, (uint64_t x0)) { return x0 + svcnth () * 7; } -+ -+/* -+** inch_8: -+** incb x0, all, mul #4 -+** ret -+*/ -+PROTO (inch_8, uint64_t, (uint64_t x0)) { return x0 + svcnth () * 8; } -+ -+/* -+** inch_9: -+** inch x0, all, mul #9 -+** ret -+*/ -+PROTO (inch_9, uint64_t, (uint64_t x0)) { return x0 + svcnth () * 9; } -+ -+/* -+** inch_15: -+** inch x0, all, mul #15 -+** ret -+*/ -+PROTO (inch_15, uint64_t, (uint64_t x0)) { return x0 + svcnth () * 15; } -+ -+/* -+** inch_16: -+** incb x0, all, mul #8 -+** ret -+*/ -+PROTO (inch_16, uint64_t, (uint64_t x0)) { return x0 + svcnth () * 16; } -+ -+/* -+** inch_18: -+** incb x0, all, mul #9 -+** ret -+*/ -+PROTO (inch_18, uint64_t, (uint64_t x0)) { return x0 + svcnth () * 18; } -+ -+/* -+** inch_30: -+** incb x0, all, mul #15 -+** ret -+*/ -+PROTO (inch_30, uint64_t, (uint64_t x0)) { return x0 + svcnth () * 30; } -+ -+/* -+** dech_1: -+** dech x0 -+** ret -+*/ -+PROTO (dech_1, uint64_t, (uint64_t x0)) { return x0 - svcnth (); } -+ -+/* -+** dech_2: -+** decb x0 -+** ret -+*/ -+PROTO (dech_2, uint64_t, (uint64_t x0)) { return x0 - svcnth () * 2; } -+ -+/* -+** dech_3: -+** dech x0, all, mul #3 -+** ret -+*/ -+PROTO (dech_3, uint64_t, (uint64_t x0)) { return x0 - svcnth () * 3; } -+ -+/* -+** dech_4: -+** decb x0, all, mul #2 -+** ret -+*/ -+PROTO (dech_4, uint64_t, (uint64_t x0)) { return x0 - svcnth () * 4; } -+ -+/* -+** dech_7: -+** dech x0, all, mul #7 -+** ret -+*/ -+PROTO (dech_7, uint64_t, (uint64_t x0)) { return x0 - svcnth () * 7; } -+ -+/* -+** dech_8: -+** decb x0, all, mul #4 -+** ret -+*/ -+PROTO (dech_8, uint64_t, (uint64_t x0)) { return x0 - svcnth () * 8; } -+ -+/* -+** dech_9: -+** dech x0, all, mul #9 -+** ret -+*/ -+PROTO (dech_9, uint64_t, (uint64_t x0)) { return x0 - svcnth () * 9; } -+ -+/* -+** dech_15: -+** dech x0, all, mul #15 -+** ret -+*/ -+PROTO (dech_15, uint64_t, (uint64_t x0)) { return x0 - svcnth () * 15; } -+ -+/* -+** dech_16: -+** decb x0, all, mul #8 -+** ret -+*/ -+PROTO (dech_16, uint64_t, (uint64_t x0)) { return x0 - svcnth () * 16; } -+ -+/* -+** dech_18: -+** decb x0, all, mul #9 -+** ret -+*/ -+PROTO (dech_18, uint64_t, (uint64_t x0)) { return x0 - svcnth () * 18; } -+ -+/* -+** dech_30: -+** decb x0, all, mul #15 -+** ret -+*/ -+PROTO (dech_30, uint64_t, (uint64_t x0)) { return x0 - svcnth () * 30; } -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnth_pat.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnth_pat.c -new file mode 100644 -index 000000000..7a42e7ad9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnth_pat.c -@@ -0,0 +1,426 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cnth_pow2: -+** cnth x0, pow2 -+** ret -+*/ -+PROTO (cnth_pow2, uint64_t, ()) { return svcnth_pat (SV_POW2); } -+ -+/* -+** cnth_vl1: -+** mov x0, #?1 -+** ret -+*/ -+PROTO (cnth_vl1, uint64_t, ()) { return svcnth_pat (SV_VL1); } -+ -+/* -+** cnth_vl2: -+** mov x0, #?2 -+** ret -+*/ -+PROTO (cnth_vl2, uint64_t, ()) { return svcnth_pat (SV_VL2); } -+ -+/* -+** cnth_vl3: -+** mov x0, #?3 -+** ret -+*/ -+PROTO (cnth_vl3, uint64_t, ()) { return svcnth_pat (SV_VL3); } -+ -+/* -+** cnth_vl4: -+** mov x0, #?4 -+** ret -+*/ -+PROTO (cnth_vl4, uint64_t, ()) { return svcnth_pat (SV_VL4); } -+ -+/* -+** cnth_vl5: -+** mov x0, #?5 -+** ret -+*/ -+PROTO (cnth_vl5, uint64_t, ()) { return svcnth_pat (SV_VL5); } -+ -+/* -+** cnth_vl6: -+** mov x0, #?6 -+** ret -+*/ -+PROTO (cnth_vl6, uint64_t, ()) { return svcnth_pat (SV_VL6); } -+ -+/* -+** cnth_vl7: -+** mov x0, #?7 -+** ret -+*/ -+PROTO (cnth_vl7, uint64_t, ()) { return svcnth_pat (SV_VL7); } -+ -+/* -+** cnth_vl8: -+** mov x0, #?8 -+** ret -+*/ -+PROTO (cnth_vl8, uint64_t, ()) { return svcnth_pat (SV_VL8); } -+ -+/* -+** cnth_vl16: -+** cnth x0, vl16 -+** ret -+*/ -+PROTO (cnth_vl16, uint64_t, ()) { return svcnth_pat (SV_VL16); } -+ -+/* -+** cnth_vl32: -+** cnth x0, vl32 -+** ret -+*/ -+PROTO (cnth_vl32, uint64_t, ()) { return svcnth_pat (SV_VL32); } -+ -+/* -+** cnth_vl64: -+** cnth x0, vl64 -+** ret -+*/ -+PROTO (cnth_vl64, uint64_t, ()) { return svcnth_pat (SV_VL64); } -+ -+/* -+** cnth_vl128: -+** cnth x0, vl128 -+** ret -+*/ -+PROTO (cnth_vl128, uint64_t, ()) { return svcnth_pat (SV_VL128); } -+ -+/* -+** cnth_vl256: -+** cnth x0, vl256 -+** ret -+*/ -+PROTO (cnth_vl256, uint64_t, ()) { return svcnth_pat (SV_VL256); } -+ -+/* -+** cnth_mul3: -+** cnth x0, mul3 -+** ret -+*/ -+PROTO (cnth_mul3, uint64_t, ()) { return svcnth_pat (SV_MUL3); } -+ -+/* -+** cnth_mul4: -+** cnth x0, mul4 -+** ret -+*/ -+PROTO (cnth_mul4, uint64_t, ()) { return svcnth_pat (SV_MUL4); } -+ -+/* -+** cnth_all: -+** cnth x0 -+** ret -+*/ -+PROTO (cnth_all, uint64_t, ()) { return svcnth_pat (SV_ALL); } -+ -+/* -+** inch_32_pow2: -+** inch x0, pow2 -+** ret -+*/ -+PROTO (inch_32_pow2, uint32_t, (uint32_t w0)) { return w0 + svcnth_pat (SV_POW2); } -+ -+/* -+** inch_32_vl1: -+** add w0, w0, #?1 -+** ret -+*/ -+PROTO (inch_32_vl1, uint32_t, (uint32_t w0)) { return w0 + svcnth_pat (SV_VL1); } -+ -+/* -+** inch_32_vl2: -+** add w0, w0, #?2 -+** ret -+*/ -+PROTO (inch_32_vl2, uint32_t, (uint32_t w0)) { return w0 + svcnth_pat (SV_VL2); } -+ -+/* -+** inch_32_vl3: -+** add w0, w0, #?3 -+** ret -+*/ -+PROTO (inch_32_vl3, uint32_t, (uint32_t w0)) { return w0 + svcnth_pat (SV_VL3); } -+ -+/* -+** inch_32_vl4: -+** add w0, w0, #?4 -+** ret -+*/ -+PROTO (inch_32_vl4, uint32_t, (uint32_t w0)) { return w0 + svcnth_pat (SV_VL4); } -+ -+/* -+** inch_32_vl5: -+** add w0, w0, #?5 -+** ret -+*/ -+PROTO (inch_32_vl5, uint32_t, (uint32_t w0)) { return w0 + svcnth_pat (SV_VL5); } -+ -+/* -+** inch_32_vl6: -+** add w0, w0, #?6 -+** ret -+*/ -+PROTO (inch_32_vl6, uint32_t, (uint32_t w0)) { return w0 + svcnth_pat (SV_VL6); } -+ -+/* -+** inch_32_vl7: -+** add w0, w0, #?7 -+** ret -+*/ -+PROTO (inch_32_vl7, uint32_t, (uint32_t w0)) { return w0 + svcnth_pat (SV_VL7); } -+ -+/* -+** inch_32_vl8: -+** add w0, w0, #?8 -+** ret -+*/ -+PROTO (inch_32_vl8, uint32_t, (uint32_t w0)) { return w0 + svcnth_pat (SV_VL8); } -+ -+/* -+** inch_32_vl16: -+** inch x0, vl16 -+** ret -+*/ -+PROTO (inch_32_vl16, uint32_t, (uint32_t w0)) { return w0 + svcnth_pat (SV_VL16); } -+ -+/* -+** inch_32_vl32: -+** inch x0, vl32 -+** ret -+*/ -+PROTO (inch_32_vl32, uint32_t, (uint32_t w0)) { return w0 + svcnth_pat (SV_VL32); } -+ -+/* -+** inch_32_vl64: -+** inch x0, vl64 -+** ret -+*/ -+PROTO (inch_32_vl64, uint32_t, (uint32_t w0)) { return w0 + svcnth_pat (SV_VL64); } -+ -+/* -+** inch_32_vl128: -+** inch x0, vl128 -+** ret -+*/ -+PROTO (inch_32_vl128, uint32_t, (uint32_t w0)) { return w0 + svcnth_pat (SV_VL128); } -+ -+/* -+** inch_32_vl256: -+** inch x0, vl256 -+** ret -+*/ -+PROTO (inch_32_vl256, uint32_t, (uint32_t w0)) { return w0 + svcnth_pat (SV_VL256); } -+ -+/* -+** inch_32_mul3: -+** inch x0, mul3 -+** ret -+*/ -+PROTO (inch_32_mul3, uint32_t, (uint32_t w0)) { return w0 + svcnth_pat (SV_MUL3); } -+ -+/* -+** inch_32_mul4: -+** inch x0, mul4 -+** ret -+*/ -+PROTO (inch_32_mul4, uint32_t, (uint32_t w0)) { return w0 + svcnth_pat (SV_MUL4); } -+ -+/* -+** inch_32_all: -+** inch x0 -+** ret -+*/ -+PROTO (inch_32_all, uint32_t, (uint32_t w0)) { return w0 + svcnth_pat (SV_ALL); } -+ -+/* -+** inch_64_pow2: -+** inch x0, pow2 -+** ret -+*/ -+PROTO (inch_64_pow2, uint64_t, (uint64_t x0)) { return x0 + svcnth_pat (SV_POW2); } -+ -+/* -+** inch_64_all: -+** inch x0 -+** ret -+*/ -+PROTO (inch_64_all, uint64_t, (uint64_t x0)) { return x0 + svcnth_pat (SV_ALL); } -+ -+/* -+** dech_32_pow2: -+** dech x0, pow2 -+** ret -+*/ -+PROTO (dech_32_pow2, uint32_t, (uint32_t w0)) { return w0 - svcnth_pat (SV_POW2); } -+ -+/* -+** dech_32_vl1: -+** sub w0, w0, #?1 -+** ret -+*/ -+PROTO (dech_32_vl1, uint32_t, (uint32_t w0)) { return w0 - svcnth_pat (SV_VL1); } -+ -+/* -+** dech_32_vl2: -+** sub w0, w0, #?2 -+** ret -+*/ -+PROTO (dech_32_vl2, uint32_t, (uint32_t w0)) { return w0 - svcnth_pat (SV_VL2); } -+ -+/* -+** dech_32_vl3: -+** sub w0, w0, #?3 -+** ret -+*/ -+PROTO (dech_32_vl3, uint32_t, (uint32_t w0)) { return w0 - svcnth_pat (SV_VL3); } -+ -+/* -+** dech_32_vl4: -+** sub w0, w0, #?4 -+** ret -+*/ -+PROTO (dech_32_vl4, uint32_t, (uint32_t w0)) { return w0 - svcnth_pat (SV_VL4); } -+ -+/* -+** dech_32_vl5: -+** sub w0, w0, #?5 -+** ret -+*/ -+PROTO (dech_32_vl5, uint32_t, (uint32_t w0)) { return w0 - svcnth_pat (SV_VL5); } -+ -+/* -+** dech_32_vl6: -+** sub w0, w0, #?6 -+** ret -+*/ -+PROTO (dech_32_vl6, uint32_t, (uint32_t w0)) { return w0 - svcnth_pat (SV_VL6); } -+ -+/* -+** dech_32_vl7: -+** sub w0, w0, #?7 -+** ret -+*/ -+PROTO (dech_32_vl7, uint32_t, (uint32_t w0)) { return w0 - svcnth_pat (SV_VL7); } -+ -+/* -+** dech_32_vl8: -+** sub w0, w0, #?8 -+** ret -+*/ -+PROTO (dech_32_vl8, uint32_t, (uint32_t w0)) { return w0 - svcnth_pat (SV_VL8); } -+ -+/* -+** dech_32_vl16: -+** dech x0, vl16 -+** ret -+*/ -+PROTO (dech_32_vl16, uint32_t, (uint32_t w0)) { return w0 - svcnth_pat (SV_VL16); } -+ -+/* -+** dech_32_vl32: -+** dech x0, vl32 -+** ret -+*/ -+PROTO (dech_32_vl32, uint32_t, (uint32_t w0)) { return w0 - svcnth_pat (SV_VL32); } -+ -+/* -+** dech_32_vl64: -+** dech x0, vl64 -+** ret -+*/ -+PROTO (dech_32_vl64, uint32_t, (uint32_t w0)) { return w0 - svcnth_pat (SV_VL64); } -+ -+/* -+** dech_32_vl128: -+** dech x0, vl128 -+** ret -+*/ -+PROTO (dech_32_vl128, uint32_t, (uint32_t w0)) { return w0 - svcnth_pat (SV_VL128); } -+ -+/* -+** dech_32_vl256: -+** dech x0, vl256 -+** ret -+*/ -+PROTO (dech_32_vl256, uint32_t, (uint32_t w0)) { return w0 - svcnth_pat (SV_VL256); } -+ -+/* -+** dech_32_mul3: -+** dech x0, mul3 -+** ret -+*/ -+PROTO (dech_32_mul3, uint32_t, (uint32_t w0)) { return w0 - svcnth_pat (SV_MUL3); } -+ -+/* -+** dech_32_mul4: -+** dech x0, mul4 -+** ret -+*/ -+PROTO (dech_32_mul4, uint32_t, (uint32_t w0)) { return w0 - svcnth_pat (SV_MUL4); } -+ -+/* -+** dech_32_all: -+** dech x0 -+** ret -+*/ -+PROTO (dech_32_all, uint32_t, (uint32_t w0)) { return w0 - svcnth_pat (SV_ALL); } -+ -+/* -+** dech_64_pow2: -+** dech x0, pow2 -+** ret -+*/ -+PROTO (dech_64_pow2, uint64_t, (uint64_t x0)) { return x0 - svcnth_pat (SV_POW2); } -+ -+/* -+** dech_64_all: -+** dech x0 -+** ret -+*/ -+PROTO (dech_64_all, uint64_t, (uint64_t x0)) { return x0 - svcnth_pat (SV_ALL); } -+ -+/* -+** inch_s16_pow2_z0: -+** inch z0\.h, pow2 -+** ret -+*/ -+TEST_UNIFORM_Z (inch_s16_pow2_z0, svint16_t, -+ z0 = svadd_n_s16_x (svptrue_b16 (), z0, svcnth_pat (SV_POW2)), -+ z0 = svadd_x (svptrue_b16 (), z0, svcnth_pat (SV_POW2))); -+ -+/* -+** inch_s16_pow2_z1: -+** movprfx z0, z1 -+** inch z0\.h, pow2 -+** ret -+*/ -+TEST_UNIFORM_Z (inch_s16_pow2_z1, svint16_t, -+ z0 = svadd_n_s16_x (svptrue_b16 (), z1, svcnth_pat (SV_POW2)), -+ z0 = svadd_x (svptrue_b16 (), z1, svcnth_pat (SV_POW2))); -+ -+/* -+** dech_s16_pow2_z0: -+** dech z0\.h, pow2 -+** ret -+*/ -+TEST_UNIFORM_Z (dech_s16_pow2_z0, svint16_t, -+ z0 = svsub_n_s16_x (svptrue_b16 (), z0, svcnth_pat (SV_POW2)), -+ z0 = svsub_x (svptrue_b16 (), z0, svcnth_pat (SV_POW2))); -+ -+/* -+** dech_s16_pow2_z1: -+** movprfx z0, z1 -+** dech z0\.h, pow2 -+** ret -+*/ -+TEST_UNIFORM_Z (dech_s16_pow2_z1, svint16_t, -+ z0 = svsub_n_s16_x (svptrue_b16 (), z1, svcnth_pat (SV_POW2)), -+ z0 = svsub_x (svptrue_b16 (), z1, svcnth_pat (SV_POW2))); -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntp_b16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntp_b16.c -new file mode 100644 -index 000000000..d88b9e5f3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntp_b16.c -@@ -0,0 +1,243 @@ -+/* { dg-additional-options "-msve-vector-bits=scalable" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+#include -+ -+/* -+** cnt_b16_32: -+** cntp x0, p0, p1\.h -+** ret -+*/ -+TEST_PTEST (cnt_b16_32, uint32_t, -+ x0 = svcntp_b16 (p0, p1)); -+ -+/* -+** cnt_b16_64: -+** cntp x0, p0, p1\.h -+** ret -+*/ -+TEST_PTEST (cnt_b16_64, uint64_t, -+ x0 = svcntp_b16 (p0, p1)); -+ -+/* -+** inc_b16_32_general_x0: -+** cntp x([0-9]+), p0, p1\.h -+** add w0, (w0, w\1|w\1, w0) -+** ret -+*/ -+TEST_PTEST (inc_b16_32_general_x0, uint32_t, -+ x0 += svcntp_b16 (p0, p1)); -+ -+/* -+** inc_b16_32_general_x1: -+** cntp x([0-9]+), p0, p1\.h -+** add w0, (w1, w\1|w\1, w1) -+** ret -+*/ -+TEST_PTEST (inc_b16_32_general_x1, uint32_t, -+ x0 = x1 + svcntp_b16 (p0, p1)); -+ -+/* -+** inc_b16_32_ptrue_x0: -+** incp x0, p1\.h -+** ret -+*/ -+TEST_PTEST (inc_b16_32_ptrue_x0, uint32_t, -+ x0 += svcntp_b16 (svptrue_b16 (), p1)); -+ -+/* -+** inc_b16_32_ptrue_x1: -+** mov w0, w1 -+** incp x0, p1\.h -+** ret -+*/ -+TEST_PTEST (inc_b16_32_ptrue_x1, uint32_t, -+ x0 = x1 + svcntp_b16 (svptrue_b16 (), p1)); -+ -+/* -+** inc_b16_64_general_x0: -+** cntp (x[0-9]+), p0, p1\.h -+** add x0, (x0, \1|\1, x0) -+** ret -+*/ -+TEST_PTEST (inc_b16_64_general_x0, uint64_t, -+ x0 += svcntp_b16 (p0, p1)); -+ -+/* -+** inc_b16_64_general_x1: -+** cntp (x[0-9]+), p0, p1\.h -+** add x0, (x1, \1|\1, x1) -+** ret -+*/ -+TEST_PTEST (inc_b16_64_general_x1, uint64_t, -+ x0 = x1 + svcntp_b16 (p0, p1)); -+ -+/* -+** inc_b16_64_ptrue_x0: -+** incp x0, p1\.h -+** ret -+*/ -+TEST_PTEST (inc_b16_64_ptrue_x0, uint64_t, -+ x0 += svcntp_b16 (svptrue_b16 (), p1)); -+ -+/* -+** inc_b16_64_ptrue_x1: -+** mov x0, x1 -+** incp x0, p1\.h -+** ret -+*/ -+TEST_PTEST (inc_b16_64_ptrue_x1, uint64_t, -+ x0 = x1 + svcntp_b16 (svptrue_b16 (), p1)); -+ -+/* -+** dec_b16_32_general_x0: -+** cntp x([0-9]+), p0, p1\.h -+** sub w0, w0, w\1 -+** ret -+*/ -+TEST_PTEST (dec_b16_32_general_x0, uint32_t, -+ x0 -= svcntp_b16 (p0, p1)); -+ -+/* -+** dec_b16_32_general_x1: -+** cntp x([0-9]+), p0, p1\.h -+** sub w0, w1, w\1 -+** ret -+*/ -+TEST_PTEST (dec_b16_32_general_x1, uint32_t, -+ x0 = x1 - svcntp_b16 (p0, p1)); -+ -+/* -+** dec_b16_32_ptrue_x0: -+** decp x0, p1\.h -+** ret -+*/ -+TEST_PTEST (dec_b16_32_ptrue_x0, uint32_t, -+ x0 -= svcntp_b16 (svptrue_b16 (), p1)); -+ -+/* -+** dec_b16_32_ptrue_x1: -+** mov w0, w1 -+** decp x0, p1\.h -+** ret -+*/ -+TEST_PTEST (dec_b16_32_ptrue_x1, uint32_t, -+ x0 = x1 - svcntp_b16 (svptrue_b16 (), p1)); -+ -+/* -+** dec_b16_64_general_x0: -+** cntp (x[0-9]+), p0, p1\.h -+** sub x0, x0, \1 -+** ret -+*/ -+TEST_PTEST (dec_b16_64_general_x0, uint64_t, -+ x0 -= svcntp_b16 (p0, p1)); -+ -+/* -+** dec_b16_64_general_x1: -+** cntp (x[0-9]+), p0, p1\.h -+** sub x0, x1, \1 -+** ret -+*/ -+TEST_PTEST (dec_b16_64_general_x1, uint64_t, -+ x0 = x1 - svcntp_b16 (p0, p1)); -+ -+/* -+** dec_b16_64_ptrue_x0: -+** decp x0, p1\.h -+** ret -+*/ -+TEST_PTEST (dec_b16_64_ptrue_x0, uint64_t, -+ x0 -= svcntp_b16 (svptrue_b16 (), p1)); -+ -+/* -+** dec_b16_64_ptrue_x1: -+** mov x0, x1 -+** decp x0, p1\.h -+** ret -+*/ -+TEST_PTEST (dec_b16_64_ptrue_x1, uint64_t, -+ x0 = x1 - svcntp_b16 (svptrue_b16 (), p1)); -+ -+/* -+** inc_b16_u16_general_z0: -+** cntp x([0-9]+), p0, p1\.h -+** mov (z[0-9]+\.h), w\1 -+** add z0\.h, (z0\.h, \2|\2, z0\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (inc_b16_u16_general_z0, svuint16_t, -+ z0 = svadd_n_u16_x (svptrue_b16 (), z0, svcntp_b16 (p0, p1)), -+ z0 = svadd_x (svptrue_b16 (), z0, svcntp_b16 (p0, p1))); -+ -+/* -+** inc_b16_u16_general_z1: -+** cntp x([0-9]+), p0, p1\.h -+** mov (z[0-9]+\.h), w\1 -+** add z0\.h, (z1\.h, \2|\2, z1\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (inc_b16_u16_general_z1, svuint16_t, -+ z0 = svadd_n_u16_x (svptrue_b16 (), z1, svcntp_b16 (p0, p1)), -+ z0 = svadd_x (svptrue_b16 (), z1, svcntp_b16 (p0, p1))); -+ -+/* -+** inc_b16_u16_ptrue_z0: -+** incp z0\.h, p0 -+** ret -+*/ -+TEST_UNIFORM_Z (inc_b16_u16_ptrue_z0, svuint16_t, -+ z0 = svadd_n_u16_x (svptrue_b16 (), z0, svcntp_b16 (svptrue_b16 (), p0)), -+ z0 = svadd_x (svptrue_b16 (), z0, svcntp_b16 (svptrue_b16 (), p0))); -+ -+/* -+** inc_b16_u16_ptrue_z1: -+** movprfx z0, z1 -+** incp z0\.h, p0 -+** ret -+*/ -+TEST_UNIFORM_Z (inc_b16_u16_ptrue_z1, svuint16_t, -+ z0 = svadd_n_u16_x (svptrue_b16 (), z1, svcntp_b16 (svptrue_b16 (), p0)), -+ z0 = svadd_x (svptrue_b16 (), z1, svcntp_b16 (svptrue_b16 (), p0))); -+ -+/* -+** dec_b16_u16_general_z0: -+** cntp x([0-9]+), p0, p1\.h -+** mov (z[0-9]+\.h), w\1 -+** sub z0\.h, z0\.h, \2 -+** ret -+*/ -+TEST_UNIFORM_Z (dec_b16_u16_general_z0, svuint16_t, -+ z0 = svsub_n_u16_x (svptrue_b16 (), z0, svcntp_b16 (p0, p1)), -+ z0 = svsub_x (svptrue_b16 (), z0, svcntp_b16 (p0, p1))); -+ -+/* -+** dec_b16_u16_general_z1: -+** cntp x([0-9]+), p0, p1\.h -+** mov (z[0-9]+\.h), w\1 -+** sub z0\.h, z1\.h, \2 -+** ret -+*/ -+TEST_UNIFORM_Z (dec_b16_u16_general_z1, svuint16_t, -+ z0 = svsub_n_u16_x (svptrue_b16 (), z1, svcntp_b16 (p0, p1)), -+ z0 = svsub_x (svptrue_b16 (), z1, svcntp_b16 (p0, p1))); -+ -+/* -+** dec_b16_u16_ptrue_z0: -+** decp z0\.h, p0 -+** ret -+*/ -+TEST_UNIFORM_Z (dec_b16_u16_ptrue_z0, svuint16_t, -+ z0 = svsub_n_u16_x (svptrue_b16 (), z0, svcntp_b16 (svptrue_b16 (), p0)), -+ z0 = svsub_x (svptrue_b16 (), z0, svcntp_b16 (svptrue_b16 (), p0))); -+ -+/* -+** dec_b16_u16_ptrue_z1: -+** movprfx z0, z1 -+** decp z0\.h, p0 -+** ret -+*/ -+TEST_UNIFORM_Z (dec_b16_u16_ptrue_z1, svuint16_t, -+ z0 = svsub_n_u16_x (svptrue_b16 (), z1, svcntp_b16 (svptrue_b16 (), p0)), -+ z0 = svsub_x (svptrue_b16 (), z1, svcntp_b16 (svptrue_b16 (), p0))); -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntp_b32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntp_b32.c -new file mode 100644 -index 000000000..0da818895 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntp_b32.c -@@ -0,0 +1,243 @@ -+/* { dg-additional-options "-msve-vector-bits=scalable" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+#include -+ -+/* -+** cnt_b32_32: -+** cntp x0, p0, p1\.s -+** ret -+*/ -+TEST_PTEST (cnt_b32_32, uint32_t, -+ x0 = svcntp_b32 (p0, p1)); -+ -+/* -+** cnt_b32_64: -+** cntp x0, p0, p1\.s -+** ret -+*/ -+TEST_PTEST (cnt_b32_64, uint64_t, -+ x0 = svcntp_b32 (p0, p1)); -+ -+/* -+** inc_b32_32_general_x0: -+** cntp x([0-9]+), p0, p1\.s -+** add w0, (w0, w\1|w\1, w0) -+** ret -+*/ -+TEST_PTEST (inc_b32_32_general_x0, uint32_t, -+ x0 += svcntp_b32 (p0, p1)); -+ -+/* -+** inc_b32_32_general_x1: -+** cntp x([0-9]+), p0, p1\.s -+** add w0, (w1, w\1|w\1, w1) -+** ret -+*/ -+TEST_PTEST (inc_b32_32_general_x1, uint32_t, -+ x0 = x1 + svcntp_b32 (p0, p1)); -+ -+/* -+** inc_b32_32_ptrue_x0: -+** incp x0, p1\.s -+** ret -+*/ -+TEST_PTEST (inc_b32_32_ptrue_x0, uint32_t, -+ x0 += svcntp_b32 (svptrue_b32 (), p1)); -+ -+/* -+** inc_b32_32_ptrue_x1: -+** mov w0, w1 -+** incp x0, p1\.s -+** ret -+*/ -+TEST_PTEST (inc_b32_32_ptrue_x1, uint32_t, -+ x0 = x1 + svcntp_b32 (svptrue_b32 (), p1)); -+ -+/* -+** inc_b32_64_general_x0: -+** cntp (x[0-9]+), p0, p1\.s -+** add x0, (x0, \1|\1, x0) -+** ret -+*/ -+TEST_PTEST (inc_b32_64_general_x0, uint64_t, -+ x0 += svcntp_b32 (p0, p1)); -+ -+/* -+** inc_b32_64_general_x1: -+** cntp (x[0-9]+), p0, p1\.s -+** add x0, (x1, \1|\1, x1) -+** ret -+*/ -+TEST_PTEST (inc_b32_64_general_x1, uint64_t, -+ x0 = x1 + svcntp_b32 (p0, p1)); -+ -+/* -+** inc_b32_64_ptrue_x0: -+** incp x0, p1\.s -+** ret -+*/ -+TEST_PTEST (inc_b32_64_ptrue_x0, uint64_t, -+ x0 += svcntp_b32 (svptrue_b32 (), p1)); -+ -+/* -+** inc_b32_64_ptrue_x1: -+** mov x0, x1 -+** incp x0, p1\.s -+** ret -+*/ -+TEST_PTEST (inc_b32_64_ptrue_x1, uint64_t, -+ x0 = x1 + svcntp_b32 (svptrue_b32 (), p1)); -+ -+/* -+** dec_b32_32_general_x0: -+** cntp x([0-9]+), p0, p1\.s -+** sub w0, w0, w\1 -+** ret -+*/ -+TEST_PTEST (dec_b32_32_general_x0, uint32_t, -+ x0 -= svcntp_b32 (p0, p1)); -+ -+/* -+** dec_b32_32_general_x1: -+** cntp x([0-9]+), p0, p1\.s -+** sub w0, w1, w\1 -+** ret -+*/ -+TEST_PTEST (dec_b32_32_general_x1, uint32_t, -+ x0 = x1 - svcntp_b32 (p0, p1)); -+ -+/* -+** dec_b32_32_ptrue_x0: -+** decp x0, p1\.s -+** ret -+*/ -+TEST_PTEST (dec_b32_32_ptrue_x0, uint32_t, -+ x0 -= svcntp_b32 (svptrue_b32 (), p1)); -+ -+/* -+** dec_b32_32_ptrue_x1: -+** mov w0, w1 -+** decp x0, p1\.s -+** ret -+*/ -+TEST_PTEST (dec_b32_32_ptrue_x1, uint32_t, -+ x0 = x1 - svcntp_b32 (svptrue_b32 (), p1)); -+ -+/* -+** dec_b32_64_general_x0: -+** cntp (x[0-9]+), p0, p1\.s -+** sub x0, x0, \1 -+** ret -+*/ -+TEST_PTEST (dec_b32_64_general_x0, uint64_t, -+ x0 -= svcntp_b32 (p0, p1)); -+ -+/* -+** dec_b32_64_general_x1: -+** cntp (x[0-9]+), p0, p1\.s -+** sub x0, x1, \1 -+** ret -+*/ -+TEST_PTEST (dec_b32_64_general_x1, uint64_t, -+ x0 = x1 - svcntp_b32 (p0, p1)); -+ -+/* -+** dec_b32_64_ptrue_x0: -+** decp x0, p1\.s -+** ret -+*/ -+TEST_PTEST (dec_b32_64_ptrue_x0, uint64_t, -+ x0 -= svcntp_b32 (svptrue_b32 (), p1)); -+ -+/* -+** dec_b32_64_ptrue_x1: -+** mov x0, x1 -+** decp x0, p1\.s -+** ret -+*/ -+TEST_PTEST (dec_b32_64_ptrue_x1, uint64_t, -+ x0 = x1 - svcntp_b32 (svptrue_b32 (), p1)); -+ -+/* -+** inc_b32_s32_general_z0: -+** cntp x([0-9]+), p0, p1\.s -+** mov (z[0-9]+\.s), w\1 -+** add z0\.s, (z0\.s, \2|\2, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (inc_b32_s32_general_z0, svint32_t, -+ z0 = svadd_n_s32_x (svptrue_b32 (), z0, svcntp_b32 (p0, p1)), -+ z0 = svadd_x (svptrue_b32 (), z0, svcntp_b32 (p0, p1))); -+ -+/* -+** inc_b32_s32_general_z1: -+** cntp x([0-9]+), p0, p1\.s -+** mov (z[0-9]+\.s), w\1 -+** add z0\.s, (z1\.s, \2|\2, z1\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (inc_b32_s32_general_z1, svint32_t, -+ z0 = svadd_n_s32_x (svptrue_b32 (), z1, svcntp_b32 (p0, p1)), -+ z0 = svadd_x (svptrue_b32 (), z1, svcntp_b32 (p0, p1))); -+ -+/* -+** inc_b32_s32_ptrue_z0: -+** incp z0\.s, p0 -+** ret -+*/ -+TEST_UNIFORM_Z (inc_b32_s32_ptrue_z0, svint32_t, -+ z0 = svadd_n_s32_x (svptrue_b32 (), z0, svcntp_b32 (svptrue_b32 (), p0)), -+ z0 = svadd_x (svptrue_b32 (), z0, svcntp_b32 (svptrue_b32 (), p0))); -+ -+/* -+** inc_b32_s32_ptrue_z1: -+** movprfx z0, z1 -+** incp z0\.s, p0 -+** ret -+*/ -+TEST_UNIFORM_Z (inc_b32_s32_ptrue_z1, svint32_t, -+ z0 = svadd_n_s32_x (svptrue_b32 (), z1, svcntp_b32 (svptrue_b32 (), p0)), -+ z0 = svadd_x (svptrue_b32 (), z1, svcntp_b32 (svptrue_b32 (), p0))); -+ -+/* -+** dec_b32_s32_general_z0: -+** cntp x([0-9]+), p0, p1\.s -+** mov (z[0-9]+\.s), w\1 -+** sub z0\.s, z0\.s, \2 -+** ret -+*/ -+TEST_UNIFORM_Z (dec_b32_s32_general_z0, svint32_t, -+ z0 = svsub_n_s32_x (svptrue_b32 (), z0, svcntp_b32 (p0, p1)), -+ z0 = svsub_x (svptrue_b32 (), z0, svcntp_b32 (p0, p1))); -+ -+/* -+** dec_b32_s32_general_z1: -+** cntp x([0-9]+), p0, p1\.s -+** mov (z[0-9]+\.s), w\1 -+** sub z0\.s, z1\.s, \2 -+** ret -+*/ -+TEST_UNIFORM_Z (dec_b32_s32_general_z1, svint32_t, -+ z0 = svsub_n_s32_x (svptrue_b32 (), z1, svcntp_b32 (p0, p1)), -+ z0 = svsub_x (svptrue_b32 (), z1, svcntp_b32 (p0, p1))); -+ -+/* -+** dec_b32_s32_ptrue_z0: -+** decp z0\.s, p0 -+** ret -+*/ -+TEST_UNIFORM_Z (dec_b32_s32_ptrue_z0, svint32_t, -+ z0 = svsub_n_s32_x (svptrue_b32 (), z0, svcntp_b32 (svptrue_b32 (), p0)), -+ z0 = svsub_x (svptrue_b32 (), z0, svcntp_b32 (svptrue_b32 (), p0))); -+ -+/* -+** dec_b32_s32_ptrue_z1: -+** movprfx z0, z1 -+** decp z0\.s, p0 -+** ret -+*/ -+TEST_UNIFORM_Z (dec_b32_s32_ptrue_z1, svint32_t, -+ z0 = svsub_n_s32_x (svptrue_b32 (), z1, svcntp_b32 (svptrue_b32 (), p0)), -+ z0 = svsub_x (svptrue_b32 (), z1, svcntp_b32 (svptrue_b32 (), p0))); -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntp_b64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntp_b64.c -new file mode 100644 -index 000000000..6ddbaef5a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntp_b64.c -@@ -0,0 +1,243 @@ -+/* { dg-additional-options "-msve-vector-bits=scalable" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+#include -+ -+/* -+** cnt_b64_32: -+** cntp x0, p0, p1\.d -+** ret -+*/ -+TEST_PTEST (cnt_b64_32, uint32_t, -+ x0 = svcntp_b64 (p0, p1)); -+ -+/* -+** cnt_b64_64: -+** cntp x0, p0, p1\.d -+** ret -+*/ -+TEST_PTEST (cnt_b64_64, uint64_t, -+ x0 = svcntp_b64 (p0, p1)); -+ -+/* -+** inc_b64_32_general_x0: -+** cntp x([0-9]+), p0, p1\.d -+** add w0, (w0, w\1|w\1, w0) -+** ret -+*/ -+TEST_PTEST (inc_b64_32_general_x0, uint32_t, -+ x0 += svcntp_b64 (p0, p1)); -+ -+/* -+** inc_b64_32_general_x1: -+** cntp x([0-9]+), p0, p1\.d -+** add w0, (w1, w\1|w\1, w1) -+** ret -+*/ -+TEST_PTEST (inc_b64_32_general_x1, uint32_t, -+ x0 = x1 + svcntp_b64 (p0, p1)); -+ -+/* -+** inc_b64_32_ptrue_x0: -+** incp x0, p1\.d -+** ret -+*/ -+TEST_PTEST (inc_b64_32_ptrue_x0, uint32_t, -+ x0 += svcntp_b64 (svptrue_b64 (), p1)); -+ -+/* -+** inc_b64_32_ptrue_x1: -+** mov w0, w1 -+** incp x0, p1\.d -+** ret -+*/ -+TEST_PTEST (inc_b64_32_ptrue_x1, uint32_t, -+ x0 = x1 + svcntp_b64 (svptrue_b64 (), p1)); -+ -+/* -+** inc_b64_64_general_x0: -+** cntp (x[0-9]+), p0, p1\.d -+** add x0, (x0, \1|\1, x0) -+** ret -+*/ -+TEST_PTEST (inc_b64_64_general_x0, uint64_t, -+ x0 += svcntp_b64 (p0, p1)); -+ -+/* -+** inc_b64_64_general_x1: -+** cntp (x[0-9]+), p0, p1\.d -+** add x0, (x1, \1|\1, x1) -+** ret -+*/ -+TEST_PTEST (inc_b64_64_general_x1, uint64_t, -+ x0 = x1 + svcntp_b64 (p0, p1)); -+ -+/* -+** inc_b64_64_ptrue_x0: -+** incp x0, p1\.d -+** ret -+*/ -+TEST_PTEST (inc_b64_64_ptrue_x0, uint64_t, -+ x0 += svcntp_b64 (svptrue_b64 (), p1)); -+ -+/* -+** inc_b64_64_ptrue_x1: -+** mov x0, x1 -+** incp x0, p1\.d -+** ret -+*/ -+TEST_PTEST (inc_b64_64_ptrue_x1, uint64_t, -+ x0 = x1 + svcntp_b64 (svptrue_b64 (), p1)); -+ -+/* -+** dec_b64_32_general_x0: -+** cntp x([0-9]+), p0, p1\.d -+** sub w0, w0, w\1 -+** ret -+*/ -+TEST_PTEST (dec_b64_32_general_x0, uint32_t, -+ x0 -= svcntp_b64 (p0, p1)); -+ -+/* -+** dec_b64_32_general_x1: -+** cntp x([0-9]+), p0, p1\.d -+** sub w0, w1, w\1 -+** ret -+*/ -+TEST_PTEST (dec_b64_32_general_x1, uint32_t, -+ x0 = x1 - svcntp_b64 (p0, p1)); -+ -+/* -+** dec_b64_32_ptrue_x0: -+** decp x0, p1\.d -+** ret -+*/ -+TEST_PTEST (dec_b64_32_ptrue_x0, uint32_t, -+ x0 -= svcntp_b64 (svptrue_b64 (), p1)); -+ -+/* -+** dec_b64_32_ptrue_x1: -+** mov w0, w1 -+** decp x0, p1\.d -+** ret -+*/ -+TEST_PTEST (dec_b64_32_ptrue_x1, uint32_t, -+ x0 = x1 - svcntp_b64 (svptrue_b64 (), p1)); -+ -+/* -+** dec_b64_64_general_x0: -+** cntp (x[0-9]+), p0, p1\.d -+** sub x0, x0, \1 -+** ret -+*/ -+TEST_PTEST (dec_b64_64_general_x0, uint64_t, -+ x0 -= svcntp_b64 (p0, p1)); -+ -+/* -+** dec_b64_64_general_x1: -+** cntp (x[0-9]+), p0, p1\.d -+** sub x0, x1, \1 -+** ret -+*/ -+TEST_PTEST (dec_b64_64_general_x1, uint64_t, -+ x0 = x1 - svcntp_b64 (p0, p1)); -+ -+/* -+** dec_b64_64_ptrue_x0: -+** decp x0, p1\.d -+** ret -+*/ -+TEST_PTEST (dec_b64_64_ptrue_x0, uint64_t, -+ x0 -= svcntp_b64 (svptrue_b64 (), p1)); -+ -+/* -+** dec_b64_64_ptrue_x1: -+** mov x0, x1 -+** decp x0, p1\.d -+** ret -+*/ -+TEST_PTEST (dec_b64_64_ptrue_x1, uint64_t, -+ x0 = x1 - svcntp_b64 (svptrue_b64 (), p1)); -+ -+/* -+** inc_b64_u64_general_z0: -+** cntp (x[0-9]+), p0, p1\.d -+** mov (z[0-9]+\.d), \1 -+** add z0\.d, (z0\.d, \2|\2, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (inc_b64_u64_general_z0, svuint64_t, -+ z0 = svadd_n_u64_x (svptrue_b64 (), z0, svcntp_b64 (p0, p1)), -+ z0 = svadd_x (svptrue_b64 (), z0, svcntp_b64 (p0, p1))); -+ -+/* -+** inc_b64_u64_general_z1: -+** cntp (x[0-9]+), p0, p1\.d -+** mov (z[0-9]+\.d), \1 -+** add z0\.d, (z1\.d, \2|\2, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (inc_b64_u64_general_z1, svuint64_t, -+ z0 = svadd_n_u64_x (svptrue_b64 (), z1, svcntp_b64 (p0, p1)), -+ z0 = svadd_x (svptrue_b64 (), z1, svcntp_b64 (p0, p1))); -+ -+/* -+** inc_b64_u64_ptrue_z0: -+** incp z0\.d, p0 -+** ret -+*/ -+TEST_UNIFORM_Z (inc_b64_u64_ptrue_z0, svuint64_t, -+ z0 = svadd_n_u64_x (svptrue_b64 (), z0, svcntp_b64 (svptrue_b64 (), p0)), -+ z0 = svadd_x (svptrue_b64 (), z0, svcntp_b64 (svptrue_b64 (), p0))); -+ -+/* -+** inc_b64_u64_ptrue_z1: -+** movprfx z0, z1 -+** incp z0\.d, p0 -+** ret -+*/ -+TEST_UNIFORM_Z (inc_b64_u64_ptrue_z1, svuint64_t, -+ z0 = svadd_n_u64_x (svptrue_b64 (), z1, svcntp_b64 (svptrue_b64 (), p0)), -+ z0 = svadd_x (svptrue_b64 (), z1, svcntp_b64 (svptrue_b64 (), p0))); -+ -+/* -+** dec_b64_u64_general_z0: -+** cntp (x[0-9]+), p0, p1\.d -+** mov (z[0-9]+\.d), \1 -+** sub z0\.d, z0\.d, \2 -+** ret -+*/ -+TEST_UNIFORM_Z (dec_b64_u64_general_z0, svuint64_t, -+ z0 = svsub_n_u64_x (svptrue_b64 (), z0, svcntp_b64 (p0, p1)), -+ z0 = svsub_x (svptrue_b64 (), z0, svcntp_b64 (p0, p1))); -+ -+/* -+** dec_b64_u64_general_z1: -+** cntp (x[0-9]+), p0, p1\.d -+** mov (z[0-9]+\.d), \1 -+** sub z0\.d, z1\.d, \2 -+** ret -+*/ -+TEST_UNIFORM_Z (dec_b64_u64_general_z1, svuint64_t, -+ z0 = svsub_n_u64_x (svptrue_b64 (), z1, svcntp_b64 (p0, p1)), -+ z0 = svsub_x (svptrue_b64 (), z1, svcntp_b64 (p0, p1))); -+ -+/* -+** dec_b64_u64_ptrue_z0: -+** decp z0\.d, p0 -+** ret -+*/ -+TEST_UNIFORM_Z (dec_b64_u64_ptrue_z0, svuint64_t, -+ z0 = svsub_n_u64_x (svptrue_b64 (), z0, svcntp_b64 (svptrue_b64 (), p0)), -+ z0 = svsub_x (svptrue_b64 (), z0, svcntp_b64 (svptrue_b64 (), p0))); -+ -+/* -+** dec_b64_u64_ptrue_z1: -+** movprfx z0, z1 -+** decp z0\.d, p0 -+** ret -+*/ -+TEST_UNIFORM_Z (dec_b64_u64_ptrue_z1, svuint64_t, -+ z0 = svsub_n_u64_x (svptrue_b64 (), z1, svcntp_b64 (svptrue_b64 (), p0)), -+ z0 = svsub_x (svptrue_b64 (), z1, svcntp_b64 (svptrue_b64 (), p0))); -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntp_b8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntp_b8.c -new file mode 100644 -index 000000000..e02c02cd6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntp_b8.c -@@ -0,0 +1,253 @@ -+/* { dg-additional-options "-msve-vector-bits=scalable" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+#include -+ -+/* -+** cnt_b8_32: -+** cntp x0, p0, p1\.b -+** ret -+*/ -+TEST_PTEST (cnt_b8_32, uint32_t, -+ x0 = svcntp_b8 (p0, p1)); -+ -+/* -+** cnt_b8_64: -+** cntp x0, p0, p1\.b -+** ret -+*/ -+TEST_PTEST (cnt_b8_64, uint64_t, -+ x0 = svcntp_b8 (p0, p1)); -+ -+/* -+** inc_b8_32_general_x0: -+** cntp x([0-9]+), p0, p1\.b -+** add w0, (w0, w\1|w\1, w0) -+** ret -+*/ -+TEST_PTEST (inc_b8_32_general_x0, uint32_t, -+ x0 += svcntp_b8 (p0, p1)); -+ -+/* -+** inc_b8_32_general_x1: -+** cntp x([0-9]+), p0, p1\.b -+** add w0, (w1, w\1|w\1, w1) -+** ret -+*/ -+TEST_PTEST (inc_b8_32_general_x1, uint32_t, -+ x0 = x1 + svcntp_b8 (p0, p1)); -+ -+/* -+** inc_b8_32_ptrue_x0: -+** incp x0, p1\.b -+** ret -+*/ -+TEST_PTEST (inc_b8_32_ptrue_x0, uint32_t, -+ x0 += svcntp_b8 (svptrue_b8 (), p1)); -+ -+/* -+** inc_b8_32_ptrue_x1: -+** mov w0, w1 -+** incp x0, p1\.b -+** ret -+*/ -+TEST_PTEST (inc_b8_32_ptrue_x1, uint32_t, -+ x0 = x1 + svcntp_b8 (svptrue_b8 (), p1)); -+ -+/* -+** inc_b8_64_general_x0: -+** cntp (x[0-9]+), p0, p1\.b -+** add x0, (x0, \1|\1, x0) -+** ret -+*/ -+TEST_PTEST (inc_b8_64_general_x0, uint64_t, -+ x0 += svcntp_b8 (p0, p1)); -+ -+/* -+** inc_b8_64_general_x1: -+** cntp (x[0-9]+), p0, p1\.b -+** add x0, (x1, \1|\1, x1) -+** ret -+*/ -+TEST_PTEST (inc_b8_64_general_x1, uint64_t, -+ x0 = x1 + svcntp_b8 (p0, p1)); -+ -+/* -+** inc_b8_64_ptrue_x0: -+** incp x0, p1\.b -+** ret -+*/ -+TEST_PTEST (inc_b8_64_ptrue_x0, uint64_t, -+ x0 += svcntp_b8 (svptrue_b8 (), p1)); -+ -+/* -+** inc_b8_64_ptrue_x1: -+** mov x0, x1 -+** incp x0, p1\.b -+** ret -+*/ -+TEST_PTEST (inc_b8_64_ptrue_x1, uint64_t, -+ x0 = x1 + svcntp_b8 (svptrue_b8 (), p1)); -+ -+/* -+** dec_b8_32_general_x0: -+** cntp x([0-9]+), p0, p1\.b -+** sub w0, w0, w\1 -+** ret -+*/ -+TEST_PTEST (dec_b8_32_general_x0, uint32_t, -+ x0 -= svcntp_b8 (p0, p1)); -+ -+/* -+** dec_b8_32_general_x1: -+** cntp x([0-9]+), p0, p1\.b -+** sub w0, w1, w\1 -+** ret -+*/ -+TEST_PTEST (dec_b8_32_general_x1, uint32_t, -+ x0 = x1 - svcntp_b8 (p0, p1)); -+ -+/* -+** dec_b8_32_ptrue_x0: -+** decp x0, p1\.b -+** ret -+*/ -+TEST_PTEST (dec_b8_32_ptrue_x0, uint32_t, -+ x0 -= svcntp_b8 (svptrue_b8 (), p1)); -+ -+/* -+** dec_b8_32_ptrue_x1: -+** mov w0, w1 -+** decp x0, p1\.b -+** ret -+*/ -+TEST_PTEST (dec_b8_32_ptrue_x1, uint32_t, -+ x0 = x1 - svcntp_b8 (svptrue_b8 (), p1)); -+ -+/* -+** dec_b8_64_general_x0: -+** cntp (x[0-9]+), p0, p1\.b -+** sub x0, x0, \1 -+** ret -+*/ -+TEST_PTEST (dec_b8_64_general_x0, uint64_t, -+ x0 -= svcntp_b8 (p0, p1)); -+ -+/* -+** dec_b8_64_general_x1: -+** cntp (x[0-9]+), p0, p1\.b -+** sub x0, x1, \1 -+** ret -+*/ -+TEST_PTEST (dec_b8_64_general_x1, uint64_t, -+ x0 = x1 - svcntp_b8 (p0, p1)); -+ -+/* -+** dec_b8_64_ptrue_x0: -+** decp x0, p1\.b -+** ret -+*/ -+TEST_PTEST (dec_b8_64_ptrue_x0, uint64_t, -+ x0 -= svcntp_b8 (svptrue_b8 (), p1)); -+ -+/* -+** dec_b8_64_ptrue_x1: -+** mov x0, x1 -+** decp x0, p1\.b -+** ret -+*/ -+TEST_PTEST (dec_b8_64_ptrue_x1, uint64_t, -+ x0 = x1 - svcntp_b8 (svptrue_b8 (), p1)); -+ -+/* -+** inc_b8_s8_general_z0: -+** cntp x([0-9]+), p0, p1\.b -+** mov (z[0-9]+\.b), w\1 -+** add z0\.b, (z0\.b, \2|\2, z0\.b) -+** ret -+*/ -+TEST_UNIFORM_Z (inc_b8_s8_general_z0, svint8_t, -+ z0 = svadd_n_s8_x (svptrue_b8 (), z0, svcntp_b8 (p0, p1)), -+ z0 = svadd_x (svptrue_b8 (), z0, svcntp_b8 (p0, p1))); -+ -+/* -+** inc_b8_s8_general_z1: -+** cntp x([0-9]+), p0, p1\.b -+** mov (z[0-9]+\.b), w\1 -+** add z0\.b, (z1\.b, \2|\2, z1\.b) -+** ret -+*/ -+TEST_UNIFORM_Z (inc_b8_s8_general_z1, svint8_t, -+ z0 = svadd_n_s8_x (svptrue_b8 (), z1, svcntp_b8 (p0, p1)), -+ z0 = svadd_x (svptrue_b8 (), z1, svcntp_b8 (p0, p1))); -+ -+/* -+** inc_b8_s8_ptrue_z0: -+** ptrue (p[0-7])\.b, all -+** cntp x([0-9]+), \1, p0\.b -+** mov (z[0-9]+\.b), w\2 -+** add z0\.b, (z0\.b, \3|\3, z0\.b) -+** ret -+*/ -+TEST_UNIFORM_Z (inc_b8_s8_ptrue_z0, svint8_t, -+ z0 = svadd_n_s8_x (svptrue_b8 (), z0, svcntp_b8 (svptrue_b8 (), p0)), -+ z0 = svadd_x (svptrue_b8 (), z0, svcntp_b8 (svptrue_b8 (), p0))); -+ -+/* -+** inc_b8_s8_ptrue_z1: -+** ptrue (p[0-7])\.b, all -+** cntp x([0-9]+), \1, p0\.b -+** mov (z[0-9]+\.b), w\2 -+** add z0\.b, (z1\.b, \3|\3, z1\.b) -+** ret -+*/ -+TEST_UNIFORM_Z (inc_b8_s8_ptrue_z1, svint8_t, -+ z0 = svadd_n_s8_x (svptrue_b8 (), z1, svcntp_b8 (svptrue_b8 (), p0)), -+ z0 = svadd_x (svptrue_b8 (), z1, svcntp_b8 (svptrue_b8 (), p0))); -+ -+/* -+** dec_b8_s8_general_z0: -+** cntp x([0-9]+), p0, p1\.b -+** mov (z[0-9]+\.b), w\1 -+** sub z0\.b, z0\.b, \2 -+** ret -+*/ -+TEST_UNIFORM_Z (dec_b8_s8_general_z0, svint8_t, -+ z0 = svsub_n_s8_x (svptrue_b8 (), z0, svcntp_b8 (p0, p1)), -+ z0 = svsub_x (svptrue_b8 (), z0, svcntp_b8 (p0, p1))); -+ -+/* -+** dec_b8_s8_general_z1: -+** cntp x([0-9]+), p0, p1\.b -+** mov (z[0-9]+\.b), w\1 -+** sub z0\.b, z1\.b, \2 -+** ret -+*/ -+TEST_UNIFORM_Z (dec_b8_s8_general_z1, svint8_t, -+ z0 = svsub_n_s8_x (svptrue_b8 (), z1, svcntp_b8 (p0, p1)), -+ z0 = svsub_x (svptrue_b8 (), z1, svcntp_b8 (p0, p1))); -+ -+/* -+** dec_b8_s8_ptrue_z0: -+** ptrue (p[0-7])\.b, all -+** cntp x([0-9]+), \1, p0\.b -+** mov (z[0-9]+\.b), w\2 -+** sub z0\.b, z0\.b, \3 -+** ret -+*/ -+TEST_UNIFORM_Z (dec_b8_s8_ptrue_z0, svint8_t, -+ z0 = svsub_n_s8_x (svptrue_b8 (), z0, svcntp_b8 (svptrue_b8 (), p0)), -+ z0 = svsub_x (svptrue_b8 (), z0, svcntp_b8 (svptrue_b8 (), p0))); -+ -+/* -+** dec_b8_s8_ptrue_z1: -+** ptrue (p[0-7])\.b, all -+** cntp x([0-9]+), \1, p0\.b -+** mov (z[0-9]+\.b), w\2 -+** sub z0\.b, z1\.b, \3 -+** ret -+*/ -+TEST_UNIFORM_Z (dec_b8_s8_ptrue_z1, svint8_t, -+ z0 = svsub_n_s8_x (svptrue_b8 (), z1, svcntp_b8 (svptrue_b8 (), p0)), -+ z0 = svsub_x (svptrue_b8 (), z1, svcntp_b8 (svptrue_b8 (), p0))); -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntw.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntw.c -new file mode 100644 -index 000000000..e26cc67a4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntw.c -@@ -0,0 +1,279 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cntw_1: -+** cntw x0 -+** ret -+*/ -+PROTO (cntw_1, uint64_t, ()) { return svcntw (); } -+ -+/* -+** cntw_2: -+** cnth x0 -+** ret -+*/ -+PROTO (cntw_2, uint64_t, ()) { return svcntw () * 2; } -+ -+/* -+** cntw_3: -+** cntw x0, all, mul #3 -+** ret -+*/ -+PROTO (cntw_3, uint64_t, ()) { return svcntw () * 3; } -+ -+/* -+** cntw_4: -+** cntb x0 -+** ret -+*/ -+PROTO (cntw_4, uint64_t, ()) { return svcntw () * 4; } -+ -+/* -+** cntw_8: -+** cntb x0, all, mul #2 -+** ret -+*/ -+PROTO (cntw_8, uint64_t, ()) { return svcntw () * 8; } -+ -+/* -+** cntw_15: -+** cntw x0, all, mul #15 -+** ret -+*/ -+PROTO (cntw_15, uint64_t, ()) { return svcntw () * 15; } -+ -+/* -+** cntw_16: -+** cntb x0, all, mul #4 -+** ret -+*/ -+PROTO (cntw_16, uint64_t, ()) { return svcntw () * 16; } -+ -+/* Other sequences would be OK. */ -+/* -+** cntw_17: -+** cntb x0, all, mul #4 -+** incw x0 -+** ret -+*/ -+PROTO (cntw_17, uint64_t, ()) { return svcntw () * 17; } -+ -+/* -+** cntw_32: -+** cntb x0, all, mul #8 -+** ret -+*/ -+PROTO (cntw_32, uint64_t, ()) { return svcntw () * 32; } -+ -+/* -+** cntw_64: -+** cntb x0, all, mul #16 -+** ret -+*/ -+PROTO (cntw_64, uint64_t, ()) { return svcntw () * 64; } -+ -+/* -+** cntw_128: -+** cntd (x[0-9]+) -+** lsl x0, \1, 8 -+** ret -+*/ -+PROTO (cntw_128, uint64_t, ()) { return svcntw () * 128; } -+ -+/* -+** cntw_m1: -+** cntw (x[0-9]+) -+** neg x0, \1 -+** ret -+*/ -+PROTO (cntw_m1, uint64_t, ()) { return -svcntw (); } -+ -+/* -+** cntw_m13: -+** cntw (x[0-9]+), all, mul #13 -+** neg x0, \1 -+** ret -+*/ -+PROTO (cntw_m13, uint64_t, ()) { return -svcntw () * 13; } -+ -+/* -+** cntw_m15: -+** cntw (x[0-9]+), all, mul #15 -+** neg x0, \1 -+** ret -+*/ -+PROTO (cntw_m15, uint64_t, ()) { return -svcntw () * 15; } -+ -+/* -+** cntw_m16: -+** cntb (x[0-9]+), all, mul #4 -+** neg x0, \1 -+** ret -+*/ -+PROTO (cntw_m16, uint64_t, ()) { return -svcntw () * 16; } -+ -+/* Other sequences would be OK. */ -+/* -+** cntw_m17: -+** cntb x0, all, mul #4 -+** incw x0 -+** neg x0, x0 -+** ret -+*/ -+PROTO (cntw_m17, uint64_t, ()) { return -svcntw () * 17; } -+ -+/* -+** incw_1: -+** incw x0 -+** ret -+*/ -+PROTO (incw_1, uint64_t, (uint64_t x0)) { return x0 + svcntw (); } -+ -+/* -+** incw_2: -+** inch x0 -+** ret -+*/ -+PROTO (incw_2, uint64_t, (uint64_t x0)) { return x0 + svcntw () * 2; } -+ -+/* -+** incw_3: -+** incw x0, all, mul #3 -+** ret -+*/ -+PROTO (incw_3, uint64_t, (uint64_t x0)) { return x0 + svcntw () * 3; } -+ -+/* -+** incw_4: -+** incb x0 -+** ret -+*/ -+PROTO (incw_4, uint64_t, (uint64_t x0)) { return x0 + svcntw () * 4; } -+ -+/* -+** incw_7: -+** incw x0, all, mul #7 -+** ret -+*/ -+PROTO (incw_7, uint64_t, (uint64_t x0)) { return x0 + svcntw () * 7; } -+ -+/* -+** incw_8: -+** incb x0, all, mul #2 -+** ret -+*/ -+PROTO (incw_8, uint64_t, (uint64_t x0)) { return x0 + svcntw () * 8; } -+ -+/* -+** incw_9: -+** incw x0, all, mul #9 -+** ret -+*/ -+PROTO (incw_9, uint64_t, (uint64_t x0)) { return x0 + svcntw () * 9; } -+ -+/* -+** incw_15: -+** incw x0, all, mul #15 -+** ret -+*/ -+PROTO (incw_15, uint64_t, (uint64_t x0)) { return x0 + svcntw () * 15; } -+ -+/* -+** incw_16: -+** incb x0, all, mul #4 -+** ret -+*/ -+PROTO (incw_16, uint64_t, (uint64_t x0)) { return x0 + svcntw () * 16; } -+ -+/* -+** incw_18: -+** inch x0, all, mul #9 -+** ret -+*/ -+PROTO (incw_18, uint64_t, (uint64_t x0)) { return x0 + svcntw () * 18; } -+ -+/* -+** incw_30: -+** inch x0, all, mul #15 -+** ret -+*/ -+PROTO (incw_30, uint64_t, (uint64_t x0)) { return x0 + svcntw () * 30; } -+ -+/* -+** decw_1: -+** decw x0 -+** ret -+*/ -+PROTO (decw_1, uint64_t, (uint64_t x0)) { return x0 - svcntw (); } -+ -+/* -+** decw_2: -+** dech x0 -+** ret -+*/ -+PROTO (decw_2, uint64_t, (uint64_t x0)) { return x0 - svcntw () * 2; } -+ -+/* -+** decw_3: -+** decw x0, all, mul #3 -+** ret -+*/ -+PROTO (decw_3, uint64_t, (uint64_t x0)) { return x0 - svcntw () * 3; } -+ -+/* -+** decw_4: -+** decb x0 -+** ret -+*/ -+PROTO (decw_4, uint64_t, (uint64_t x0)) { return x0 - svcntw () * 4; } -+ -+/* -+** decw_7: -+** decw x0, all, mul #7 -+** ret -+*/ -+PROTO (decw_7, uint64_t, (uint64_t x0)) { return x0 - svcntw () * 7; } -+ -+/* -+** decw_8: -+** decb x0, all, mul #2 -+** ret -+*/ -+PROTO (decw_8, uint64_t, (uint64_t x0)) { return x0 - svcntw () * 8; } -+ -+/* -+** decw_9: -+** decw x0, all, mul #9 -+** ret -+*/ -+PROTO (decw_9, uint64_t, (uint64_t x0)) { return x0 - svcntw () * 9; } -+ -+/* -+** decw_15: -+** decw x0, all, mul #15 -+** ret -+*/ -+PROTO (decw_15, uint64_t, (uint64_t x0)) { return x0 - svcntw () * 15; } -+ -+/* -+** decw_16: -+** decb x0, all, mul #4 -+** ret -+*/ -+PROTO (decw_16, uint64_t, (uint64_t x0)) { return x0 - svcntw () * 16; } -+ -+/* -+** decw_18: -+** dech x0, all, mul #9 -+** ret -+*/ -+PROTO (decw_18, uint64_t, (uint64_t x0)) { return x0 - svcntw () * 18; } -+ -+/* -+** decw_30: -+** dech x0, all, mul #15 -+** ret -+*/ -+PROTO (decw_30, uint64_t, (uint64_t x0)) { return x0 - svcntw () * 30; } -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntw_pat.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntw_pat.c -new file mode 100644 -index 000000000..ff6b7d882 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntw_pat.c -@@ -0,0 +1,426 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cntw_pow2: -+** cntw x0, pow2 -+** ret -+*/ -+PROTO (cntw_pow2, uint64_t, ()) { return svcntw_pat (SV_POW2); } -+ -+/* -+** cntw_vl1: -+** mov x0, #?1 -+** ret -+*/ -+PROTO (cntw_vl1, uint64_t, ()) { return svcntw_pat (SV_VL1); } -+ -+/* -+** cntw_vl2: -+** mov x0, #?2 -+** ret -+*/ -+PROTO (cntw_vl2, uint64_t, ()) { return svcntw_pat (SV_VL2); } -+ -+/* -+** cntw_vl3: -+** mov x0, #?3 -+** ret -+*/ -+PROTO (cntw_vl3, uint64_t, ()) { return svcntw_pat (SV_VL3); } -+ -+/* -+** cntw_vl4: -+** mov x0, #?4 -+** ret -+*/ -+PROTO (cntw_vl4, uint64_t, ()) { return svcntw_pat (SV_VL4); } -+ -+/* -+** cntw_vl5: -+** cntw x0, vl5 -+** ret -+*/ -+PROTO (cntw_vl5, uint64_t, ()) { return svcntw_pat (SV_VL5); } -+ -+/* -+** cntw_vl6: -+** cntw x0, vl6 -+** ret -+*/ -+PROTO (cntw_vl6, uint64_t, ()) { return svcntw_pat (SV_VL6); } -+ -+/* -+** cntw_vl7: -+** cntw x0, vl7 -+** ret -+*/ -+PROTO (cntw_vl7, uint64_t, ()) { return svcntw_pat (SV_VL7); } -+ -+/* -+** cntw_vl8: -+** cntw x0, vl8 -+** ret -+*/ -+PROTO (cntw_vl8, uint64_t, ()) { return svcntw_pat (SV_VL8); } -+ -+/* -+** cntw_vl16: -+** cntw x0, vl16 -+** ret -+*/ -+PROTO (cntw_vl16, uint64_t, ()) { return svcntw_pat (SV_VL16); } -+ -+/* -+** cntw_vl32: -+** cntw x0, vl32 -+** ret -+*/ -+PROTO (cntw_vl32, uint64_t, ()) { return svcntw_pat (SV_VL32); } -+ -+/* -+** cntw_vl64: -+** cntw x0, vl64 -+** ret -+*/ -+PROTO (cntw_vl64, uint64_t, ()) { return svcntw_pat (SV_VL64); } -+ -+/* -+** cntw_vl128: -+** cntw x0, vl128 -+** ret -+*/ -+PROTO (cntw_vl128, uint64_t, ()) { return svcntw_pat (SV_VL128); } -+ -+/* -+** cntw_vl256: -+** cntw x0, vl256 -+** ret -+*/ -+PROTO (cntw_vl256, uint64_t, ()) { return svcntw_pat (SV_VL256); } -+ -+/* -+** cntw_mul3: -+** cntw x0, mul3 -+** ret -+*/ -+PROTO (cntw_mul3, uint64_t, ()) { return svcntw_pat (SV_MUL3); } -+ -+/* -+** cntw_mul4: -+** cntw x0, mul4 -+** ret -+*/ -+PROTO (cntw_mul4, uint64_t, ()) { return svcntw_pat (SV_MUL4); } -+ -+/* -+** cntw_all: -+** cntw x0 -+** ret -+*/ -+PROTO (cntw_all, uint64_t, ()) { return svcntw_pat (SV_ALL); } -+ -+/* -+** incw_32_pow2: -+** incw x0, pow2 -+** ret -+*/ -+PROTO (incw_32_pow2, uint32_t, (uint32_t w0)) { return w0 + svcntw_pat (SV_POW2); } -+ -+/* -+** incw_32_vl1: -+** add w0, w0, #?1 -+** ret -+*/ -+PROTO (incw_32_vl1, uint32_t, (uint32_t w0)) { return w0 + svcntw_pat (SV_VL1); } -+ -+/* -+** incw_32_vl2: -+** add w0, w0, #?2 -+** ret -+*/ -+PROTO (incw_32_vl2, uint32_t, (uint32_t w0)) { return w0 + svcntw_pat (SV_VL2); } -+ -+/* -+** incw_32_vl3: -+** add w0, w0, #?3 -+** ret -+*/ -+PROTO (incw_32_vl3, uint32_t, (uint32_t w0)) { return w0 + svcntw_pat (SV_VL3); } -+ -+/* -+** incw_32_vl4: -+** add w0, w0, #?4 -+** ret -+*/ -+PROTO (incw_32_vl4, uint32_t, (uint32_t w0)) { return w0 + svcntw_pat (SV_VL4); } -+ -+/* -+** incw_32_vl5: -+** incw x0, vl5 -+** ret -+*/ -+PROTO (incw_32_vl5, uint32_t, (uint32_t w0)) { return w0 + svcntw_pat (SV_VL5); } -+ -+/* -+** incw_32_vl6: -+** incw x0, vl6 -+** ret -+*/ -+PROTO (incw_32_vl6, uint32_t, (uint32_t w0)) { return w0 + svcntw_pat (SV_VL6); } -+ -+/* -+** incw_32_vl7: -+** incw x0, vl7 -+** ret -+*/ -+PROTO (incw_32_vl7, uint32_t, (uint32_t w0)) { return w0 + svcntw_pat (SV_VL7); } -+ -+/* -+** incw_32_vl8: -+** incw x0, vl8 -+** ret -+*/ -+PROTO (incw_32_vl8, uint32_t, (uint32_t w0)) { return w0 + svcntw_pat (SV_VL8); } -+ -+/* -+** incw_32_vl16: -+** incw x0, vl16 -+** ret -+*/ -+PROTO (incw_32_vl16, uint32_t, (uint32_t w0)) { return w0 + svcntw_pat (SV_VL16); } -+ -+/* -+** incw_32_vl32: -+** incw x0, vl32 -+** ret -+*/ -+PROTO (incw_32_vl32, uint32_t, (uint32_t w0)) { return w0 + svcntw_pat (SV_VL32); } -+ -+/* -+** incw_32_vl64: -+** incw x0, vl64 -+** ret -+*/ -+PROTO (incw_32_vl64, uint32_t, (uint32_t w0)) { return w0 + svcntw_pat (SV_VL64); } -+ -+/* -+** incw_32_vl128: -+** incw x0, vl128 -+** ret -+*/ -+PROTO (incw_32_vl128, uint32_t, (uint32_t w0)) { return w0 + svcntw_pat (SV_VL128); } -+ -+/* -+** incw_32_vl256: -+** incw x0, vl256 -+** ret -+*/ -+PROTO (incw_32_vl256, uint32_t, (uint32_t w0)) { return w0 + svcntw_pat (SV_VL256); } -+ -+/* -+** incw_32_mul3: -+** incw x0, mul3 -+** ret -+*/ -+PROTO (incw_32_mul3, uint32_t, (uint32_t w0)) { return w0 + svcntw_pat (SV_MUL3); } -+ -+/* -+** incw_32_mul4: -+** incw x0, mul4 -+** ret -+*/ -+PROTO (incw_32_mul4, uint32_t, (uint32_t w0)) { return w0 + svcntw_pat (SV_MUL4); } -+ -+/* -+** incw_32_all: -+** incw x0 -+** ret -+*/ -+PROTO (incw_32_all, uint32_t, (uint32_t w0)) { return w0 + svcntw_pat (SV_ALL); } -+ -+/* -+** incw_64_pow2: -+** incw x0, pow2 -+** ret -+*/ -+PROTO (incw_64_pow2, uint64_t, (uint64_t x0)) { return x0 + svcntw_pat (SV_POW2); } -+ -+/* -+** incw_64_all: -+** incw x0 -+** ret -+*/ -+PROTO (incw_64_all, uint64_t, (uint64_t x0)) { return x0 + svcntw_pat (SV_ALL); } -+ -+/* -+** decw_32_pow2: -+** decw x0, pow2 -+** ret -+*/ -+PROTO (decw_32_pow2, uint32_t, (uint32_t w0)) { return w0 - svcntw_pat (SV_POW2); } -+ -+/* -+** decw_32_vl1: -+** sub w0, w0, #?1 -+** ret -+*/ -+PROTO (decw_32_vl1, uint32_t, (uint32_t w0)) { return w0 - svcntw_pat (SV_VL1); } -+ -+/* -+** decw_32_vl2: -+** sub w0, w0, #?2 -+** ret -+*/ -+PROTO (decw_32_vl2, uint32_t, (uint32_t w0)) { return w0 - svcntw_pat (SV_VL2); } -+ -+/* -+** decw_32_vl3: -+** sub w0, w0, #?3 -+** ret -+*/ -+PROTO (decw_32_vl3, uint32_t, (uint32_t w0)) { return w0 - svcntw_pat (SV_VL3); } -+ -+/* -+** decw_32_vl4: -+** sub w0, w0, #?4 -+** ret -+*/ -+PROTO (decw_32_vl4, uint32_t, (uint32_t w0)) { return w0 - svcntw_pat (SV_VL4); } -+ -+/* -+** decw_32_vl5: -+** decw x0, vl5 -+** ret -+*/ -+PROTO (decw_32_vl5, uint32_t, (uint32_t w0)) { return w0 - svcntw_pat (SV_VL5); } -+ -+/* -+** decw_32_vl6: -+** decw x0, vl6 -+** ret -+*/ -+PROTO (decw_32_vl6, uint32_t, (uint32_t w0)) { return w0 - svcntw_pat (SV_VL6); } -+ -+/* -+** decw_32_vl7: -+** decw x0, vl7 -+** ret -+*/ -+PROTO (decw_32_vl7, uint32_t, (uint32_t w0)) { return w0 - svcntw_pat (SV_VL7); } -+ -+/* -+** decw_32_vl8: -+** decw x0, vl8 -+** ret -+*/ -+PROTO (decw_32_vl8, uint32_t, (uint32_t w0)) { return w0 - svcntw_pat (SV_VL8); } -+ -+/* -+** decw_32_vl16: -+** decw x0, vl16 -+** ret -+*/ -+PROTO (decw_32_vl16, uint32_t, (uint32_t w0)) { return w0 - svcntw_pat (SV_VL16); } -+ -+/* -+** decw_32_vl32: -+** decw x0, vl32 -+** ret -+*/ -+PROTO (decw_32_vl32, uint32_t, (uint32_t w0)) { return w0 - svcntw_pat (SV_VL32); } -+ -+/* -+** decw_32_vl64: -+** decw x0, vl64 -+** ret -+*/ -+PROTO (decw_32_vl64, uint32_t, (uint32_t w0)) { return w0 - svcntw_pat (SV_VL64); } -+ -+/* -+** decw_32_vl128: -+** decw x0, vl128 -+** ret -+*/ -+PROTO (decw_32_vl128, uint32_t, (uint32_t w0)) { return w0 - svcntw_pat (SV_VL128); } -+ -+/* -+** decw_32_vl256: -+** decw x0, vl256 -+** ret -+*/ -+PROTO (decw_32_vl256, uint32_t, (uint32_t w0)) { return w0 - svcntw_pat (SV_VL256); } -+ -+/* -+** decw_32_mul3: -+** decw x0, mul3 -+** ret -+*/ -+PROTO (decw_32_mul3, uint32_t, (uint32_t w0)) { return w0 - svcntw_pat (SV_MUL3); } -+ -+/* -+** decw_32_mul4: -+** decw x0, mul4 -+** ret -+*/ -+PROTO (decw_32_mul4, uint32_t, (uint32_t w0)) { return w0 - svcntw_pat (SV_MUL4); } -+ -+/* -+** decw_32_all: -+** decw x0 -+** ret -+*/ -+PROTO (decw_32_all, uint32_t, (uint32_t w0)) { return w0 - svcntw_pat (SV_ALL); } -+ -+/* -+** decw_64_pow2: -+** decw x0, pow2 -+** ret -+*/ -+PROTO (decw_64_pow2, uint64_t, (uint64_t x0)) { return x0 - svcntw_pat (SV_POW2); } -+ -+/* -+** decw_64_all: -+** decw x0 -+** ret -+*/ -+PROTO (decw_64_all, uint64_t, (uint64_t x0)) { return x0 - svcntw_pat (SV_ALL); } -+ -+/* -+** incw_s32_pow2_z0: -+** incw z0\.s, pow2 -+** ret -+*/ -+TEST_UNIFORM_Z (incw_s32_pow2_z0, svint32_t, -+ z0 = svadd_n_s32_x (svptrue_b32 (), z0, svcntw_pat (SV_POW2)), -+ z0 = svadd_x (svptrue_b32 (), z0, svcntw_pat (SV_POW2))); -+ -+/* -+** incw_s32_pow2_z1: -+** movprfx z0, z1 -+** incw z0\.s, pow2 -+** ret -+*/ -+TEST_UNIFORM_Z (incw_s32_pow2_z1, svint32_t, -+ z0 = svadd_n_s32_x (svptrue_b32 (), z1, svcntw_pat (SV_POW2)), -+ z0 = svadd_x (svptrue_b32 (), z1, svcntw_pat (SV_POW2))); -+ -+/* -+** decw_s32_pow2_z0: -+** decw z0\.s, pow2 -+** ret -+*/ -+TEST_UNIFORM_Z (decw_s32_pow2_z0, svint32_t, -+ z0 = svsub_n_s32_x (svptrue_b32 (), z0, svcntw_pat (SV_POW2)), -+ z0 = svsub_x (svptrue_b32 (), z0, svcntw_pat (SV_POW2))); -+ -+/* -+** decw_s32_pow2_z1: -+** movprfx z0, z1 -+** decw z0\.s, pow2 -+** ret -+*/ -+TEST_UNIFORM_Z (decw_s32_pow2_z1, svint32_t, -+ z0 = svsub_n_s32_x (svptrue_b32 (), z1, svcntw_pat (SV_POW2)), -+ z0 = svsub_x (svptrue_b32 (), z1, svcntw_pat (SV_POW2))); -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_f32.c -new file mode 100644 -index 000000000..2e80d6830 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_f32.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** compact_f32_tied1: -+** compact z0\.s, p0, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (compact_f32_tied1, svfloat32_t, -+ z0 = svcompact_f32 (p0, z0), -+ z0 = svcompact (p0, z0)) -+ -+/* -+** compact_f32_untied: -+** compact z0\.s, p0, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (compact_f32_untied, svfloat32_t, -+ z0 = svcompact_f32 (p0, z1), -+ z0 = svcompact (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_f64.c -new file mode 100644 -index 000000000..e0bc33efe ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_f64.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** compact_f64_tied1: -+** compact z0\.d, p0, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (compact_f64_tied1, svfloat64_t, -+ z0 = svcompact_f64 (p0, z0), -+ z0 = svcompact (p0, z0)) -+ -+/* -+** compact_f64_untied: -+** compact z0\.d, p0, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (compact_f64_untied, svfloat64_t, -+ z0 = svcompact_f64 (p0, z1), -+ z0 = svcompact (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_s32.c -new file mode 100644 -index 000000000..e4634982b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_s32.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** compact_s32_tied1: -+** compact z0\.s, p0, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (compact_s32_tied1, svint32_t, -+ z0 = svcompact_s32 (p0, z0), -+ z0 = svcompact (p0, z0)) -+ -+/* -+** compact_s32_untied: -+** compact z0\.s, p0, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (compact_s32_untied, svint32_t, -+ z0 = svcompact_s32 (p0, z1), -+ z0 = svcompact (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_s64.c -new file mode 100644 -index 000000000..71cb97b8a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_s64.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** compact_s64_tied1: -+** compact z0\.d, p0, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (compact_s64_tied1, svint64_t, -+ z0 = svcompact_s64 (p0, z0), -+ z0 = svcompact (p0, z0)) -+ -+/* -+** compact_s64_untied: -+** compact z0\.d, p0, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (compact_s64_untied, svint64_t, -+ z0 = svcompact_s64 (p0, z1), -+ z0 = svcompact (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_u32.c -new file mode 100644 -index 000000000..954329a0b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_u32.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** compact_u32_tied1: -+** compact z0\.s, p0, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (compact_u32_tied1, svuint32_t, -+ z0 = svcompact_u32 (p0, z0), -+ z0 = svcompact (p0, z0)) -+ -+/* -+** compact_u32_untied: -+** compact z0\.s, p0, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (compact_u32_untied, svuint32_t, -+ z0 = svcompact_u32 (p0, z1), -+ z0 = svcompact (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_u64.c -new file mode 100644 -index 000000000..ec664845f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/compact_u64.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** compact_u64_tied1: -+** compact z0\.d, p0, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (compact_u64_tied1, svuint64_t, -+ z0 = svcompact_u64 (p0, z0), -+ z0 = svcompact (p0, z0)) -+ -+/* -+** compact_u64_untied: -+** compact z0\.d, p0, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (compact_u64_untied, svuint64_t, -+ z0 = svcompact_u64 (p0, z1), -+ z0 = svcompact (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/create2_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/create2_1.c -new file mode 100644 -index 000000000..e9158ed8a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/create2_1.c -@@ -0,0 +1,123 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** create2_s8: -+** mov z0\.d, z6\.d -+** mov z1\.d, z4\.d -+** ret -+*/ -+TEST_CREATE (create2_s8, svint8x2_t, svint8_t, -+ z0 = svcreate2_s8 (z6, z4), -+ z0 = svcreate2 (z6, z4)) -+ -+/* -+** create2_u8: -+** mov z0\.d, z4\.d -+** mov z1\.d, z6\.d -+** ret -+*/ -+TEST_CREATE (create2_u8, svuint8x2_t, svuint8_t, -+ z0 = svcreate2_u8 (z4, z6), -+ z0 = svcreate2 (z4, z6)) -+ -+/* -+** create2_s16: -+** mov z0\.d, z6\.d -+** mov z1\.d, z4\.d -+** ret -+*/ -+TEST_CREATE (create2_s16, svint16x2_t, svint16_t, -+ z0 = svcreate2_s16 (z6, z4), -+ z0 = svcreate2 (z6, z4)) -+ -+/* -+** create2_u16: -+** mov z0\.d, z6\.d -+** mov z1\.d, z5\.d -+** ret -+*/ -+TEST_CREATE (create2_u16, svuint16x2_t, svuint16_t, -+ z0 = svcreate2_u16 (z6, z5), -+ z0 = svcreate2 (z6, z5)) -+ -+/* -+** create2_bf16: -+** mov z0\.d, z4\.d -+** mov z1\.d, z5\.d -+** ret -+*/ -+TEST_CREATE (create2_bf16, svbfloat16x2_t, svbfloat16_t, -+ z0 = svcreate2_bf16 (z4, z5), -+ z0 = svcreate2 (z4, z5)) -+ -+/* -+** create2_f16: -+** mov z0\.d, z4\.d -+** mov z1\.d, z5\.d -+** ret -+*/ -+TEST_CREATE (create2_f16, svfloat16x2_t, svfloat16_t, -+ z0 = svcreate2_f16 (z4, z5), -+ z0 = svcreate2 (z4, z5)) -+ -+/* -+** create2_s32: -+** mov z0\.d, z6\.d -+** mov z1\.d, z7\.d -+** ret -+*/ -+TEST_CREATE (create2_s32, svint32x2_t, svint32_t, -+ z0 = svcreate2_s32 (z6, z7), -+ z0 = svcreate2 (z6, z7)) -+ -+/* -+** create2_u32: -+** mov z0\.d, z7\.d -+** mov z1\.d, z5\.d -+** ret -+*/ -+TEST_CREATE (create2_u32, svuint32x2_t, svuint32_t, -+ z0 = svcreate2_u32 (z7, z5), -+ z0 = svcreate2 (z7, z5)) -+ -+/* -+** create2_f32: -+** mov z0\.d, z7\.d -+** mov z1\.d, z4\.d -+** ret -+*/ -+TEST_CREATE (create2_f32, svfloat32x2_t, svfloat32_t, -+ z0 = svcreate2_f32 (z7, z4), -+ z0 = svcreate2 (z7, z4)) -+ -+/* -+** create2_s64: -+** mov z0\.d, z5\.d -+** mov z1\.d, z7\.d -+** ret -+*/ -+TEST_CREATE (create2_s64, svint64x2_t, svint64_t, -+ z0 = svcreate2_s64 (z5, z7), -+ z0 = svcreate2 (z5, z7)) -+ -+/* -+** create2_u64: -+** mov z0\.d, z7\.d -+** mov z1\.d, z6\.d -+** ret -+*/ -+TEST_CREATE (create2_u64, svuint64x2_t, svuint64_t, -+ z0 = svcreate2_u64 (z7, z6), -+ z0 = svcreate2 (z7, z6)) -+ -+/* -+** create2_f64: -+** mov z0\.d, z5\.d -+** mov z1\.d, z4\.d -+** ret -+*/ -+TEST_CREATE (create2_f64, svfloat64x2_t, svfloat64_t, -+ z0 = svcreate2_f64 (z5, z4), -+ z0 = svcreate2 (z5, z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/create3_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/create3_1.c -new file mode 100644 -index 000000000..6f1afb772 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/create3_1.c -@@ -0,0 +1,135 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** create3_s8: -+** mov z0\.d, z6\.d -+** mov z1\.d, z4\.d -+** mov z2\.d, z7\.d -+** ret -+*/ -+TEST_CREATE (create3_s8, svint8x3_t, svint8_t, -+ z0 = svcreate3_s8 (z6, z4, z7), -+ z0 = svcreate3 (z6, z4, z7)) -+ -+/* -+** create3_u8: -+** mov z0\.d, z4\.d -+** mov z1\.d, z6\.d -+** mov z2\.d, z5\.d -+** ret -+*/ -+TEST_CREATE (create3_u8, svuint8x3_t, svuint8_t, -+ z0 = svcreate3_u8 (z4, z6, z5), -+ z0 = svcreate3 (z4, z6, z5)) -+ -+/* -+** create3_s16: -+** mov z0\.d, z6\.d -+** mov z1\.d, z4\.d -+** mov z2\.d, z5\.d -+** ret -+*/ -+TEST_CREATE (create3_s16, svint16x3_t, svint16_t, -+ z0 = svcreate3_s16 (z6, z4, z5), -+ z0 = svcreate3 (z6, z4, z5)) -+ -+/* -+** create3_u16: -+** mov z0\.d, z6\.d -+** mov z1\.d, z5\.d -+** mov z2\.d, z4\.d -+** ret -+*/ -+TEST_CREATE (create3_u16, svuint16x3_t, svuint16_t, -+ z0 = svcreate3_u16 (z6, z5, z4), -+ z0 = svcreate3 (z6, z5, z4)) -+ -+/* -+** create3_bf16: -+** mov z0\.d, z4\.d -+** mov z1\.d, z5\.d -+** mov z2\.d, z6\.d -+** ret -+*/ -+TEST_CREATE (create3_bf16, svbfloat16x3_t, svbfloat16_t, -+ z0 = svcreate3_bf16 (z4, z5, z6), -+ z0 = svcreate3 (z4, z5, z6)) -+ -+/* -+** create3_f16: -+** mov z0\.d, z4\.d -+** mov z1\.d, z5\.d -+** mov z2\.d, z6\.d -+** ret -+*/ -+TEST_CREATE (create3_f16, svfloat16x3_t, svfloat16_t, -+ z0 = svcreate3_f16 (z4, z5, z6), -+ z0 = svcreate3 (z4, z5, z6)) -+ -+/* -+** create3_s32: -+** mov z0\.d, z6\.d -+** mov z1\.d, z7\.d -+** mov z2\.d, z4\.d -+** ret -+*/ -+TEST_CREATE (create3_s32, svint32x3_t, svint32_t, -+ z0 = svcreate3_s32 (z6, z7, z4), -+ z0 = svcreate3 (z6, z7, z4)) -+ -+/* -+** create3_u32: -+** mov z0\.d, z7\.d -+** mov z1\.d, z5\.d -+** mov z2\.d, z6\.d -+** ret -+*/ -+TEST_CREATE (create3_u32, svuint32x3_t, svuint32_t, -+ z0 = svcreate3_u32 (z7, z5, z6), -+ z0 = svcreate3 (z7, z5, z6)) -+ -+/* -+** create3_f32: -+** mov z0\.d, z7\.d -+** mov z1\.d, z4\.d -+** mov z2\.d, z6\.d -+** ret -+*/ -+TEST_CREATE (create3_f32, svfloat32x3_t, svfloat32_t, -+ z0 = svcreate3_f32 (z7, z4, z6), -+ z0 = svcreate3 (z7, z4, z6)) -+ -+/* -+** create3_s64: -+** mov z0\.d, z5\.d -+** mov z1\.d, z7\.d -+** mov z2\.d, z6\.d -+** ret -+*/ -+TEST_CREATE (create3_s64, svint64x3_t, svint64_t, -+ z0 = svcreate3_s64 (z5, z7, z6), -+ z0 = svcreate3 (z5, z7, z6)) -+ -+/* -+** create3_u64: -+** mov z0\.d, z7\.d -+** mov z1\.d, z6\.d -+** mov z2\.d, z4\.d -+** ret -+*/ -+TEST_CREATE (create3_u64, svuint64x3_t, svuint64_t, -+ z0 = svcreate3_u64 (z7, z6, z4), -+ z0 = svcreate3 (z7, z6, z4)) -+ -+/* -+** create3_f64: -+** mov z0\.d, z5\.d -+** mov z1\.d, z4\.d -+** mov z2\.d, z7\.d -+** ret -+*/ -+TEST_CREATE (create3_f64, svfloat64x3_t, svfloat64_t, -+ z0 = svcreate3_f64 (z5, z4, z7), -+ z0 = svcreate3 (z5, z4, z7)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/create4_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/create4_1.c -new file mode 100644 -index 000000000..a3866286e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/create4_1.c -@@ -0,0 +1,147 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** create4_s8: -+** mov z0\.d, z6\.d -+** mov z1\.d, z4\.d -+** mov z2\.d, z7\.d -+** mov z3\.d, z5\.d -+** ret -+*/ -+TEST_CREATE (create4_s8, svint8x4_t, svint8_t, -+ z0 = svcreate4_s8 (z6, z4, z7, z5), -+ z0 = svcreate4 (z6, z4, z7, z5)) -+ -+/* -+** create4_u8: -+** mov z0\.d, z4\.d -+** mov z1\.d, z6\.d -+** mov z2\.d, z5\.d -+** mov z3\.d, z7\.d -+** ret -+*/ -+TEST_CREATE (create4_u8, svuint8x4_t, svuint8_t, -+ z0 = svcreate4_u8 (z4, z6, z5, z7), -+ z0 = svcreate4 (z4, z6, z5, z7)) -+ -+/* -+** create4_s16: -+** mov z0\.d, z6\.d -+** mov z1\.d, z4\.d -+** mov z2\.d, z5\.d -+** mov z3\.d, z7\.d -+** ret -+*/ -+TEST_CREATE (create4_s16, svint16x4_t, svint16_t, -+ z0 = svcreate4_s16 (z6, z4, z5, z7), -+ z0 = svcreate4 (z6, z4, z5, z7)) -+ -+/* -+** create4_u16: -+** mov z0\.d, z6\.d -+** mov z1\.d, z5\.d -+** mov z2\.d, z4\.d -+** mov z3\.d, z7\.d -+** ret -+*/ -+TEST_CREATE (create4_u16, svuint16x4_t, svuint16_t, -+ z0 = svcreate4_u16 (z6, z5, z4, z7), -+ z0 = svcreate4 (z6, z5, z4, z7)) -+ -+/* -+** create4_bf16: -+** mov z0\.d, z4\.d -+** mov z1\.d, z5\.d -+** mov z2\.d, z6\.d -+** mov z3\.d, z7\.d -+** ret -+*/ -+TEST_CREATE (create4_bf16, svbfloat16x4_t, svbfloat16_t, -+ z0 = svcreate4_bf16 (z4, z5, z6, z7), -+ z0 = svcreate4 (z4, z5, z6, z7)) -+ -+/* -+** create4_f16: -+** mov z0\.d, z4\.d -+** mov z1\.d, z5\.d -+** mov z2\.d, z6\.d -+** mov z3\.d, z7\.d -+** ret -+*/ -+TEST_CREATE (create4_f16, svfloat16x4_t, svfloat16_t, -+ z0 = svcreate4_f16 (z4, z5, z6, z7), -+ z0 = svcreate4 (z4, z5, z6, z7)) -+ -+/* -+** create4_s32: -+** mov z0\.d, z6\.d -+** mov z1\.d, z7\.d -+** mov z2\.d, z4\.d -+** mov z3\.d, z5\.d -+** ret -+*/ -+TEST_CREATE (create4_s32, svint32x4_t, svint32_t, -+ z0 = svcreate4_s32 (z6, z7, z4, z5), -+ z0 = svcreate4 (z6, z7, z4, z5)) -+ -+/* -+** create4_u32: -+** mov z0\.d, z7\.d -+** mov z1\.d, z5\.d -+** mov z2\.d, z6\.d -+** mov z3\.d, z7\.d -+** ret -+*/ -+TEST_CREATE (create4_u32, svuint32x4_t, svuint32_t, -+ z0 = svcreate4_u32 (z7, z5, z6, z7), -+ z0 = svcreate4 (z7, z5, z6, z7)) -+ -+/* -+** create4_f32: -+** mov z0\.d, z7\.d -+** mov z1\.d, z4\.d -+** mov z2\.d, z6\.d -+** mov z3\.d, z4\.d -+** ret -+*/ -+TEST_CREATE (create4_f32, svfloat32x4_t, svfloat32_t, -+ z0 = svcreate4_f32 (z7, z4, z6, z4), -+ z0 = svcreate4 (z7, z4, z6, z4)) -+ -+/* -+** create4_s64: -+** mov z0\.d, z5\.d -+** mov z1\.d, z7\.d -+** mov z2\.d, z6\.d -+** mov z3\.d, z6\.d -+** ret -+*/ -+TEST_CREATE (create4_s64, svint64x4_t, svint64_t, -+ z0 = svcreate4_s64 (z5, z7, z6, z6), -+ z0 = svcreate4 (z5, z7, z6, z6)) -+ -+/* -+** create4_u64: -+** mov z0\.d, z7\.d -+** mov z1\.d, z6\.d -+** mov z2\.d, z4\.d -+** mov z3\.d, z5\.d -+** ret -+*/ -+TEST_CREATE (create4_u64, svuint64x4_t, svuint64_t, -+ z0 = svcreate4_u64 (z7, z6, z4, z5), -+ z0 = svcreate4 (z7, z6, z4, z5)) -+ -+/* -+** create4_f64: -+** mov z0\.d, z5\.d -+** mov z1\.d, z4\.d -+** mov z2\.d, z7\.d -+** mov z3\.d, z6\.d -+** ret -+*/ -+TEST_CREATE (create4_f64, svfloat64x4_t, svfloat64_t, -+ z0 = svcreate4_f64 (z5, z4, z7, z6), -+ z0 = svcreate4 (z5, z4, z7, z6)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_bf16.c -new file mode 100644 -index 000000000..52baa1f58 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_bf16.c -@@ -0,0 +1,96 @@ -+/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */ -+/* { dg-require-effective-target aarch64_asm_bf16_ok } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cvt_bf16_f32_m_tied1: -+** bfcvt z0\.h, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_bf16_f32_m_tied1, svbfloat16_t, svfloat32_t, -+ z0 = svcvt_bf16_f32_m (z0, p0, z4), -+ z0 = svcvt_bf16_m (z0, p0, z4)) -+ -+/* -+** cvt_bf16_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** bfcvt z0\.h, p0/m, \1\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_bf16_f32_m_tied2, svbfloat16_t, svfloat32_t, -+ z0_res = svcvt_bf16_f32_m (z4, p0, z0), -+ z0_res = svcvt_bf16_m (z4, p0, z0)) -+ -+/* -+** cvt_bf16_f32_m_untied: -+** movprfx z0, z1 -+** bfcvt z0\.h, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_bf16_f32_m_untied, svbfloat16_t, svfloat32_t, -+ z0 = svcvt_bf16_f32_m (z1, p0, z4), -+ z0 = svcvt_bf16_m (z1, p0, z4)) -+ -+/* -+** cvt_bf16_f32_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, \1\.s -+** bfcvt z0\.h, p0/m, \1\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_bf16_f32_z_tied1, svbfloat16_t, svfloat32_t, -+ z0_res = svcvt_bf16_f32_z (p0, z0), -+ z0_res = svcvt_bf16_z (p0, z0)) -+ -+/* -+** cvt_bf16_f32_z_untied: -+** movprfx z0\.s, p0/z, z4\.s -+** bfcvt z0\.h, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_bf16_f32_z_untied, svbfloat16_t, svfloat32_t, -+ z0 = svcvt_bf16_f32_z (p0, z4), -+ z0 = svcvt_bf16_z (p0, z4)) -+ -+/* -+** cvt_bf16_f32_x_tied1: -+** bfcvt z0\.h, p0/m, z0\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_bf16_f32_x_tied1, svbfloat16_t, svfloat32_t, -+ z0_res = svcvt_bf16_f32_x (p0, z0), -+ z0_res = svcvt_bf16_x (p0, z0)) -+ -+/* -+** cvt_bf16_f32_x_untied: -+** bfcvt z0\.h, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_bf16_f32_x_untied, svbfloat16_t, svfloat32_t, -+ z0 = svcvt_bf16_f32_x (p0, z4), -+ z0 = svcvt_bf16_x (p0, z4)) -+ -+/* -+** ptrue_cvt_bf16_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z_REV (ptrue_cvt_bf16_f32_x_tied1, svbfloat16_t, svfloat32_t, -+ z0_res = svcvt_bf16_f32_x (svptrue_b32 (), z0), -+ z0_res = svcvt_bf16_x (svptrue_b32 (), z0)) -+ -+/* -+** ptrue_cvt_bf16_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z (ptrue_cvt_bf16_f32_x_untied, svbfloat16_t, svfloat32_t, -+ z0 = svcvt_bf16_f32_x (svptrue_b32 (), z4), -+ z0 = svcvt_bf16_x (svptrue_b32 (), z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_f16.c -new file mode 100644 -index 000000000..5dcd48046 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_f16.c -@@ -0,0 +1,731 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cvt_f16_f32_m_tied1: -+** fcvt z0\.h, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_f16_f32_m_tied1, svfloat16_t, svfloat32_t, -+ z0 = svcvt_f16_f32_m (z0, p0, z4), -+ z0 = svcvt_f16_m (z0, p0, z4)) -+ -+/* -+** cvt_f16_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** fcvt z0\.h, p0/m, \1\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f16_f32_m_tied2, svfloat16_t, svfloat32_t, -+ z0_res = svcvt_f16_f32_m (z4, p0, z0), -+ z0_res = svcvt_f16_m (z4, p0, z0)) -+ -+/* -+** cvt_f16_f32_m_untied: -+** movprfx z0, z1 -+** fcvt z0\.h, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_f16_f32_m_untied, svfloat16_t, svfloat32_t, -+ z0 = svcvt_f16_f32_m (z1, p0, z4), -+ z0 = svcvt_f16_m (z1, p0, z4)) -+ -+/* -+** cvt_f16_f64_m_tied1: -+** fcvt z0\.h, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cvt_f16_f64_m_tied1, svfloat16_t, svfloat64_t, -+ z0 = svcvt_f16_f64_m (z0, p0, z4), -+ z0 = svcvt_f16_m (z0, p0, z4)) -+ -+/* -+** cvt_f16_f64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z4 -+** fcvt z0\.h, p0/m, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f16_f64_m_tied2, svfloat16_t, svfloat64_t, -+ z0_res = svcvt_f16_f64_m (z4, p0, z0), -+ z0_res = svcvt_f16_m (z4, p0, z0)) -+ -+/* -+** cvt_f16_f64_m_untied: -+** movprfx z0, z1 -+** fcvt z0\.h, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cvt_f16_f64_m_untied, svfloat16_t, svfloat64_t, -+ z0 = svcvt_f16_f64_m (z1, p0, z4), -+ z0 = svcvt_f16_m (z1, p0, z4)) -+ -+/* -+** cvt_f16_s16_m_tied1: -+** scvtf z0\.h, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cvt_f16_s16_m_tied1, svfloat16_t, svint16_t, -+ z0 = svcvt_f16_s16_m (z0, p0, z4), -+ z0 = svcvt_f16_m (z0, p0, z4)) -+ -+/* -+** cvt_f16_s16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** scvtf z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f16_s16_m_tied2, svfloat16_t, svint16_t, -+ z0_res = svcvt_f16_s16_m (z4, p0, z0), -+ z0_res = svcvt_f16_m (z4, p0, z0)) -+ -+/* -+** cvt_f16_s16_m_untied: -+** movprfx z0, z1 -+** scvtf z0\.h, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cvt_f16_s16_m_untied, svfloat16_t, svint16_t, -+ z0 = svcvt_f16_s16_m (z1, p0, z4), -+ z0 = svcvt_f16_m (z1, p0, z4)) -+ -+/* -+** cvt_f16_s32_m_tied1: -+** scvtf z0\.h, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_f16_s32_m_tied1, svfloat16_t, svint32_t, -+ z0 = svcvt_f16_s32_m (z0, p0, z4), -+ z0 = svcvt_f16_m (z0, p0, z4)) -+ -+/* -+** cvt_f16_s32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** scvtf z0\.h, p0/m, \1\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f16_s32_m_tied2, svfloat16_t, svint32_t, -+ z0_res = svcvt_f16_s32_m (z4, p0, z0), -+ z0_res = svcvt_f16_m (z4, p0, z0)) -+ -+/* -+** cvt_f16_s32_m_untied: -+** movprfx z0, z1 -+** scvtf z0\.h, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_f16_s32_m_untied, svfloat16_t, svint32_t, -+ z0 = svcvt_f16_s32_m (z1, p0, z4), -+ z0 = svcvt_f16_m (z1, p0, z4)) -+ -+/* -+** cvt_f16_s64_m_tied1: -+** scvtf z0\.h, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cvt_f16_s64_m_tied1, svfloat16_t, svint64_t, -+ z0 = svcvt_f16_s64_m (z0, p0, z4), -+ z0 = svcvt_f16_m (z0, p0, z4)) -+ -+/* -+** cvt_f16_s64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z4 -+** scvtf z0\.h, p0/m, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f16_s64_m_tied2, svfloat16_t, svint64_t, -+ z0_res = svcvt_f16_s64_m (z4, p0, z0), -+ z0_res = svcvt_f16_m (z4, p0, z0)) -+ -+/* -+** cvt_f16_s64_m_untied: -+** movprfx z0, z1 -+** scvtf z0\.h, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cvt_f16_s64_m_untied, svfloat16_t, svint64_t, -+ z0 = svcvt_f16_s64_m (z1, p0, z4), -+ z0 = svcvt_f16_m (z1, p0, z4)) -+ -+/* -+** cvt_f16_u16_m_tied1: -+** ucvtf z0\.h, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cvt_f16_u16_m_tied1, svfloat16_t, svuint16_t, -+ z0 = svcvt_f16_u16_m (z0, p0, z4), -+ z0 = svcvt_f16_m (z0, p0, z4)) -+ -+/* -+** cvt_f16_u16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** ucvtf z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f16_u16_m_tied2, svfloat16_t, svuint16_t, -+ z0_res = svcvt_f16_u16_m (z4, p0, z0), -+ z0_res = svcvt_f16_m (z4, p0, z0)) -+ -+/* -+** cvt_f16_u16_m_untied: -+** movprfx z0, z1 -+** ucvtf z0\.h, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cvt_f16_u16_m_untied, svfloat16_t, svuint16_t, -+ z0 = svcvt_f16_u16_m (z1, p0, z4), -+ z0 = svcvt_f16_m (z1, p0, z4)) -+ -+/* -+** cvt_f16_u32_m_tied1: -+** ucvtf z0\.h, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_f16_u32_m_tied1, svfloat16_t, svuint32_t, -+ z0 = svcvt_f16_u32_m (z0, p0, z4), -+ z0 = svcvt_f16_m (z0, p0, z4)) -+ -+/* -+** cvt_f16_u32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** ucvtf z0\.h, p0/m, \1\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f16_u32_m_tied2, svfloat16_t, svuint32_t, -+ z0_res = svcvt_f16_u32_m (z4, p0, z0), -+ z0_res = svcvt_f16_m (z4, p0, z0)) -+ -+/* -+** cvt_f16_u32_m_untied: -+** movprfx z0, z1 -+** ucvtf z0\.h, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_f16_u32_m_untied, svfloat16_t, svuint32_t, -+ z0 = svcvt_f16_u32_m (z1, p0, z4), -+ z0 = svcvt_f16_m (z1, p0, z4)) -+ -+/* -+** cvt_f16_u64_m_tied1: -+** ucvtf z0\.h, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cvt_f16_u64_m_tied1, svfloat16_t, svuint64_t, -+ z0 = svcvt_f16_u64_m (z0, p0, z4), -+ z0 = svcvt_f16_m (z0, p0, z4)) -+ -+/* -+** cvt_f16_u64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z4 -+** ucvtf z0\.h, p0/m, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f16_u64_m_tied2, svfloat16_t, svuint64_t, -+ z0_res = svcvt_f16_u64_m (z4, p0, z0), -+ z0_res = svcvt_f16_m (z4, p0, z0)) -+ -+/* -+** cvt_f16_u64_m_untied: -+** movprfx z0, z1 -+** ucvtf z0\.h, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cvt_f16_u64_m_untied, svfloat16_t, svuint64_t, -+ z0 = svcvt_f16_u64_m (z1, p0, z4), -+ z0 = svcvt_f16_m (z1, p0, z4)) -+ -+/* -+** cvt_f16_f32_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, \1\.s -+** fcvt z0\.h, p0/m, \1\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f16_f32_z_tied1, svfloat16_t, svfloat32_t, -+ z0_res = svcvt_f16_f32_z (p0, z0), -+ z0_res = svcvt_f16_z (p0, z0)) -+ -+/* -+** cvt_f16_f32_z_untied: -+** movprfx z0\.s, p0/z, z4\.s -+** fcvt z0\.h, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_f16_f32_z_untied, svfloat16_t, svfloat32_t, -+ z0 = svcvt_f16_f32_z (p0, z4), -+ z0 = svcvt_f16_z (p0, z4)) -+ -+/* -+** cvt_f16_f64_z_tied1: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** fcvt z0\.h, p0/m, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f16_f64_z_tied1, svfloat16_t, svfloat64_t, -+ z0_res = svcvt_f16_f64_z (p0, z0), -+ z0_res = svcvt_f16_z (p0, z0)) -+ -+/* -+** cvt_f16_f64_z_untied: -+** movprfx z0\.d, p0/z, z4\.d -+** fcvt z0\.h, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cvt_f16_f64_z_untied, svfloat16_t, svfloat64_t, -+ z0 = svcvt_f16_f64_z (p0, z4), -+ z0 = svcvt_f16_z (p0, z4)) -+ -+/* -+** cvt_f16_s16_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.h, p0/z, \1\.h -+** scvtf z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f16_s16_z_tied1, svfloat16_t, svint16_t, -+ z0_res = svcvt_f16_s16_z (p0, z0), -+ z0_res = svcvt_f16_z (p0, z0)) -+ -+/* -+** cvt_f16_s16_z_untied: -+** movprfx z0\.h, p0/z, z4\.h -+** scvtf z0\.h, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cvt_f16_s16_z_untied, svfloat16_t, svint16_t, -+ z0 = svcvt_f16_s16_z (p0, z4), -+ z0 = svcvt_f16_z (p0, z4)) -+ -+/* -+** cvt_f16_s32_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, \1\.s -+** scvtf z0\.h, p0/m, \1\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f16_s32_z_tied1, svfloat16_t, svint32_t, -+ z0_res = svcvt_f16_s32_z (p0, z0), -+ z0_res = svcvt_f16_z (p0, z0)) -+ -+/* -+** cvt_f16_s32_z_untied: -+** movprfx z0\.s, p0/z, z4\.s -+** scvtf z0\.h, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_f16_s32_z_untied, svfloat16_t, svint32_t, -+ z0 = svcvt_f16_s32_z (p0, z4), -+ z0 = svcvt_f16_z (p0, z4)) -+ -+/* -+** cvt_f16_s64_z_tied1: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** scvtf z0\.h, p0/m, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f16_s64_z_tied1, svfloat16_t, svint64_t, -+ z0_res = svcvt_f16_s64_z (p0, z0), -+ z0_res = svcvt_f16_z (p0, z0)) -+ -+/* -+** cvt_f16_s64_z_untied: -+** movprfx z0\.d, p0/z, z4\.d -+** scvtf z0\.h, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cvt_f16_s64_z_untied, svfloat16_t, svint64_t, -+ z0 = svcvt_f16_s64_z (p0, z4), -+ z0 = svcvt_f16_z (p0, z4)) -+ -+/* -+** cvt_f16_u16_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.h, p0/z, \1\.h -+** ucvtf z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f16_u16_z_tied1, svfloat16_t, svuint16_t, -+ z0_res = svcvt_f16_u16_z (p0, z0), -+ z0_res = svcvt_f16_z (p0, z0)) -+ -+/* -+** cvt_f16_u16_z_untied: -+** movprfx z0\.h, p0/z, z4\.h -+** ucvtf z0\.h, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cvt_f16_u16_z_untied, svfloat16_t, svuint16_t, -+ z0 = svcvt_f16_u16_z (p0, z4), -+ z0 = svcvt_f16_z (p0, z4)) -+ -+/* -+** cvt_f16_u32_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, \1\.s -+** ucvtf z0\.h, p0/m, \1\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f16_u32_z_tied1, svfloat16_t, svuint32_t, -+ z0_res = svcvt_f16_u32_z (p0, z0), -+ z0_res = svcvt_f16_z (p0, z0)) -+ -+/* -+** cvt_f16_u32_z_untied: -+** movprfx z0\.s, p0/z, z4\.s -+** ucvtf z0\.h, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_f16_u32_z_untied, svfloat16_t, svuint32_t, -+ z0 = svcvt_f16_u32_z (p0, z4), -+ z0 = svcvt_f16_z (p0, z4)) -+ -+/* -+** cvt_f16_u64_z_tied1: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** ucvtf z0\.h, p0/m, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f16_u64_z_tied1, svfloat16_t, svuint64_t, -+ z0_res = svcvt_f16_u64_z (p0, z0), -+ z0_res = svcvt_f16_z (p0, z0)) -+ -+/* -+** cvt_f16_u64_z_untied: -+** movprfx z0\.d, p0/z, z4\.d -+** ucvtf z0\.h, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cvt_f16_u64_z_untied, svfloat16_t, svuint64_t, -+ z0 = svcvt_f16_u64_z (p0, z4), -+ z0 = svcvt_f16_z (p0, z4)) -+ -+/* -+** cvt_f16_f32_x_tied1: -+** fcvt z0\.h, p0/m, z0\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f16_f32_x_tied1, svfloat16_t, svfloat32_t, -+ z0_res = svcvt_f16_f32_x (p0, z0), -+ z0_res = svcvt_f16_x (p0, z0)) -+ -+/* -+** cvt_f16_f32_x_untied: -+** fcvt z0\.h, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_f16_f32_x_untied, svfloat16_t, svfloat32_t, -+ z0 = svcvt_f16_f32_x (p0, z4), -+ z0 = svcvt_f16_x (p0, z4)) -+ -+/* -+** cvt_f16_f64_x_tied1: -+** fcvt z0\.h, p0/m, z0\.d -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f16_f64_x_tied1, svfloat16_t, svfloat64_t, -+ z0_res = svcvt_f16_f64_x (p0, z0), -+ z0_res = svcvt_f16_x (p0, z0)) -+ -+/* -+** cvt_f16_f64_x_untied: -+** fcvt z0\.h, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cvt_f16_f64_x_untied, svfloat16_t, svfloat64_t, -+ z0 = svcvt_f16_f64_x (p0, z4), -+ z0 = svcvt_f16_x (p0, z4)) -+ -+/* -+** cvt_f16_s16_x_tied1: -+** scvtf z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f16_s16_x_tied1, svfloat16_t, svint16_t, -+ z0_res = svcvt_f16_s16_x (p0, z0), -+ z0_res = svcvt_f16_x (p0, z0)) -+ -+/* -+** cvt_f16_s16_x_untied: -+** scvtf z0\.h, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cvt_f16_s16_x_untied, svfloat16_t, svint16_t, -+ z0 = svcvt_f16_s16_x (p0, z4), -+ z0 = svcvt_f16_x (p0, z4)) -+ -+/* -+** cvt_f16_s32_x_tied1: -+** scvtf z0\.h, p0/m, z0\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f16_s32_x_tied1, svfloat16_t, svint32_t, -+ z0_res = svcvt_f16_s32_x (p0, z0), -+ z0_res = svcvt_f16_x (p0, z0)) -+ -+/* -+** cvt_f16_s32_x_untied: -+** scvtf z0\.h, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_f16_s32_x_untied, svfloat16_t, svint32_t, -+ z0 = svcvt_f16_s32_x (p0, z4), -+ z0 = svcvt_f16_x (p0, z4)) -+ -+/* -+** cvt_f16_s64_x_tied1: -+** scvtf z0\.h, p0/m, z0\.d -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f16_s64_x_tied1, svfloat16_t, svint64_t, -+ z0_res = svcvt_f16_s64_x (p0, z0), -+ z0_res = svcvt_f16_x (p0, z0)) -+ -+/* -+** cvt_f16_s64_x_untied: -+** scvtf z0\.h, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cvt_f16_s64_x_untied, svfloat16_t, svint64_t, -+ z0 = svcvt_f16_s64_x (p0, z4), -+ z0 = svcvt_f16_x (p0, z4)) -+ -+/* -+** cvt_f16_u16_x_tied1: -+** ucvtf z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f16_u16_x_tied1, svfloat16_t, svuint16_t, -+ z0_res = svcvt_f16_u16_x (p0, z0), -+ z0_res = svcvt_f16_x (p0, z0)) -+ -+/* -+** cvt_f16_u16_x_untied: -+** ucvtf z0\.h, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cvt_f16_u16_x_untied, svfloat16_t, svuint16_t, -+ z0 = svcvt_f16_u16_x (p0, z4), -+ z0 = svcvt_f16_x (p0, z4)) -+ -+/* -+** cvt_f16_u32_x_tied1: -+** ucvtf z0\.h, p0/m, z0\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f16_u32_x_tied1, svfloat16_t, svuint32_t, -+ z0_res = svcvt_f16_u32_x (p0, z0), -+ z0_res = svcvt_f16_x (p0, z0)) -+ -+/* -+** cvt_f16_u32_x_untied: -+** ucvtf z0\.h, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_f16_u32_x_untied, svfloat16_t, svuint32_t, -+ z0 = svcvt_f16_u32_x (p0, z4), -+ z0 = svcvt_f16_x (p0, z4)) -+ -+/* -+** cvt_f16_u64_x_tied1: -+** ucvtf z0\.h, p0/m, z0\.d -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f16_u64_x_tied1, svfloat16_t, svuint64_t, -+ z0_res = svcvt_f16_u64_x (p0, z0), -+ z0_res = svcvt_f16_x (p0, z0)) -+ -+/* -+** cvt_f16_u64_x_untied: -+** ucvtf z0\.h, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cvt_f16_u64_x_untied, svfloat16_t, svuint64_t, -+ z0 = svcvt_f16_u64_x (p0, z4), -+ z0 = svcvt_f16_x (p0, z4)) -+ -+/* -+** ptrue_cvt_f16_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z_REV (ptrue_cvt_f16_f32_x_tied1, svfloat16_t, svfloat32_t, -+ z0_res = svcvt_f16_f32_x (svptrue_b32 (), z0), -+ z0_res = svcvt_f16_x (svptrue_b32 (), z0)) -+ -+/* -+** ptrue_cvt_f16_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z (ptrue_cvt_f16_f32_x_untied, svfloat16_t, svfloat32_t, -+ z0 = svcvt_f16_f32_x (svptrue_b32 (), z4), -+ z0 = svcvt_f16_x (svptrue_b32 (), z4)) -+ -+/* -+** ptrue_cvt_f16_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z_REV (ptrue_cvt_f16_f64_x_tied1, svfloat16_t, svfloat64_t, -+ z0_res = svcvt_f16_f64_x (svptrue_b64 (), z0), -+ z0_res = svcvt_f16_x (svptrue_b64 (), z0)) -+ -+/* -+** ptrue_cvt_f16_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z (ptrue_cvt_f16_f64_x_untied, svfloat16_t, svfloat64_t, -+ z0 = svcvt_f16_f64_x (svptrue_b64 (), z4), -+ z0 = svcvt_f16_x (svptrue_b64 (), z4)) -+ -+/* -+** ptrue_cvt_f16_s16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z_REV (ptrue_cvt_f16_s16_x_tied1, svfloat16_t, svint16_t, -+ z0_res = svcvt_f16_s16_x (svptrue_b16 (), z0), -+ z0_res = svcvt_f16_x (svptrue_b16 (), z0)) -+ -+/* -+** ptrue_cvt_f16_s16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z (ptrue_cvt_f16_s16_x_untied, svfloat16_t, svint16_t, -+ z0 = svcvt_f16_s16_x (svptrue_b16 (), z4), -+ z0 = svcvt_f16_x (svptrue_b16 (), z4)) -+ -+/* -+** ptrue_cvt_f16_s32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z_REV (ptrue_cvt_f16_s32_x_tied1, svfloat16_t, svint32_t, -+ z0_res = svcvt_f16_s32_x (svptrue_b32 (), z0), -+ z0_res = svcvt_f16_x (svptrue_b32 (), z0)) -+ -+/* -+** ptrue_cvt_f16_s32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z (ptrue_cvt_f16_s32_x_untied, svfloat16_t, svint32_t, -+ z0 = svcvt_f16_s32_x (svptrue_b32 (), z4), -+ z0 = svcvt_f16_x (svptrue_b32 (), z4)) -+ -+/* -+** ptrue_cvt_f16_s64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z_REV (ptrue_cvt_f16_s64_x_tied1, svfloat16_t, svint64_t, -+ z0_res = svcvt_f16_s64_x (svptrue_b64 (), z0), -+ z0_res = svcvt_f16_x (svptrue_b64 (), z0)) -+ -+/* -+** ptrue_cvt_f16_s64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z (ptrue_cvt_f16_s64_x_untied, svfloat16_t, svint64_t, -+ z0 = svcvt_f16_s64_x (svptrue_b64 (), z4), -+ z0 = svcvt_f16_x (svptrue_b64 (), z4)) -+ -+/* -+** ptrue_cvt_f16_u16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z_REV (ptrue_cvt_f16_u16_x_tied1, svfloat16_t, svuint16_t, -+ z0_res = svcvt_f16_u16_x (svptrue_b16 (), z0), -+ z0_res = svcvt_f16_x (svptrue_b16 (), z0)) -+ -+/* -+** ptrue_cvt_f16_u16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z (ptrue_cvt_f16_u16_x_untied, svfloat16_t, svuint16_t, -+ z0 = svcvt_f16_u16_x (svptrue_b16 (), z4), -+ z0 = svcvt_f16_x (svptrue_b16 (), z4)) -+ -+/* -+** ptrue_cvt_f16_u32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z_REV (ptrue_cvt_f16_u32_x_tied1, svfloat16_t, svuint32_t, -+ z0_res = svcvt_f16_u32_x (svptrue_b32 (), z0), -+ z0_res = svcvt_f16_x (svptrue_b32 (), z0)) -+ -+/* -+** ptrue_cvt_f16_u32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z (ptrue_cvt_f16_u32_x_untied, svfloat16_t, svuint32_t, -+ z0 = svcvt_f16_u32_x (svptrue_b32 (), z4), -+ z0 = svcvt_f16_x (svptrue_b32 (), z4)) -+ -+/* -+** ptrue_cvt_f16_u64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z_REV (ptrue_cvt_f16_u64_x_tied1, svfloat16_t, svuint64_t, -+ z0_res = svcvt_f16_u64_x (svptrue_b64 (), z0), -+ z0_res = svcvt_f16_x (svptrue_b64 (), z0)) -+ -+/* -+** ptrue_cvt_f16_u64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z (ptrue_cvt_f16_u64_x_untied, svfloat16_t, svuint64_t, -+ z0 = svcvt_f16_u64_x (svptrue_b64 (), z4), -+ z0 = svcvt_f16_x (svptrue_b64 (), z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_f32.c -new file mode 100644 -index 000000000..c16469939 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_f32.c -@@ -0,0 +1,549 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cvt_f32_f16_m_tied1: -+** fcvt z0\.s, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cvt_f32_f16_m_tied1, svfloat32_t, svfloat16_t, -+ z0 = svcvt_f32_f16_m (z0, p0, z4), -+ z0 = svcvt_f32_m (z0, p0, z4)) -+ -+/* -+** cvt_f32_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** fcvt z0\.s, p0/m, \1\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f32_f16_m_tied2, svfloat32_t, svfloat16_t, -+ z0_res = svcvt_f32_f16_m (z4, p0, z0), -+ z0_res = svcvt_f32_m (z4, p0, z0)) -+ -+/* -+** cvt_f32_f16_m_untied: -+** movprfx z0, z1 -+** fcvt z0\.s, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cvt_f32_f16_m_untied, svfloat32_t, svfloat16_t, -+ z0 = svcvt_f32_f16_m (z1, p0, z4), -+ z0 = svcvt_f32_m (z1, p0, z4)) -+ -+/* -+** cvt_f32_f64_m_tied1: -+** fcvt z0\.s, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cvt_f32_f64_m_tied1, svfloat32_t, svfloat64_t, -+ z0 = svcvt_f32_f64_m (z0, p0, z4), -+ z0 = svcvt_f32_m (z0, p0, z4)) -+ -+/* -+** cvt_f32_f64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z4 -+** fcvt z0\.s, p0/m, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f32_f64_m_tied2, svfloat32_t, svfloat64_t, -+ z0_res = svcvt_f32_f64_m (z4, p0, z0), -+ z0_res = svcvt_f32_m (z4, p0, z0)) -+ -+/* -+** cvt_f32_f64_m_untied: -+** movprfx z0, z1 -+** fcvt z0\.s, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cvt_f32_f64_m_untied, svfloat32_t, svfloat64_t, -+ z0 = svcvt_f32_f64_m (z1, p0, z4), -+ z0 = svcvt_f32_m (z1, p0, z4)) -+ -+/* -+** cvt_f32_s32_m_tied1: -+** scvtf z0\.s, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_f32_s32_m_tied1, svfloat32_t, svint32_t, -+ z0 = svcvt_f32_s32_m (z0, p0, z4), -+ z0 = svcvt_f32_m (z0, p0, z4)) -+ -+/* -+** cvt_f32_s32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** scvtf z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f32_s32_m_tied2, svfloat32_t, svint32_t, -+ z0_res = svcvt_f32_s32_m (z4, p0, z0), -+ z0_res = svcvt_f32_m (z4, p0, z0)) -+ -+/* -+** cvt_f32_s32_m_untied: -+** movprfx z0, z1 -+** scvtf z0\.s, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_f32_s32_m_untied, svfloat32_t, svint32_t, -+ z0 = svcvt_f32_s32_m (z1, p0, z4), -+ z0 = svcvt_f32_m (z1, p0, z4)) -+ -+/* -+** cvt_f32_s64_m_tied1: -+** scvtf z0\.s, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cvt_f32_s64_m_tied1, svfloat32_t, svint64_t, -+ z0 = svcvt_f32_s64_m (z0, p0, z4), -+ z0 = svcvt_f32_m (z0, p0, z4)) -+ -+/* -+** cvt_f32_s64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z4 -+** scvtf z0\.s, p0/m, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f32_s64_m_tied2, svfloat32_t, svint64_t, -+ z0_res = svcvt_f32_s64_m (z4, p0, z0), -+ z0_res = svcvt_f32_m (z4, p0, z0)) -+ -+/* -+** cvt_f32_s64_m_untied: -+** movprfx z0, z1 -+** scvtf z0\.s, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cvt_f32_s64_m_untied, svfloat32_t, svint64_t, -+ z0 = svcvt_f32_s64_m (z1, p0, z4), -+ z0 = svcvt_f32_m (z1, p0, z4)) -+ -+/* -+** cvt_f32_u32_m_tied1: -+** ucvtf z0\.s, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_f32_u32_m_tied1, svfloat32_t, svuint32_t, -+ z0 = svcvt_f32_u32_m (z0, p0, z4), -+ z0 = svcvt_f32_m (z0, p0, z4)) -+ -+/* -+** cvt_f32_u32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** ucvtf z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f32_u32_m_tied2, svfloat32_t, svuint32_t, -+ z0_res = svcvt_f32_u32_m (z4, p0, z0), -+ z0_res = svcvt_f32_m (z4, p0, z0)) -+ -+/* -+** cvt_f32_u32_m_untied: -+** movprfx z0, z1 -+** ucvtf z0\.s, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_f32_u32_m_untied, svfloat32_t, svuint32_t, -+ z0 = svcvt_f32_u32_m (z1, p0, z4), -+ z0 = svcvt_f32_m (z1, p0, z4)) -+ -+/* -+** cvt_f32_u64_m_tied1: -+** ucvtf z0\.s, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cvt_f32_u64_m_tied1, svfloat32_t, svuint64_t, -+ z0 = svcvt_f32_u64_m (z0, p0, z4), -+ z0 = svcvt_f32_m (z0, p0, z4)) -+ -+/* -+** cvt_f32_u64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z4 -+** ucvtf z0\.s, p0/m, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f32_u64_m_tied2, svfloat32_t, svuint64_t, -+ z0_res = svcvt_f32_u64_m (z4, p0, z0), -+ z0_res = svcvt_f32_m (z4, p0, z0)) -+ -+/* -+** cvt_f32_u64_m_untied: -+** movprfx z0, z1 -+** ucvtf z0\.s, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cvt_f32_u64_m_untied, svfloat32_t, svuint64_t, -+ z0 = svcvt_f32_u64_m (z1, p0, z4), -+ z0 = svcvt_f32_m (z1, p0, z4)) -+ -+/* -+** cvt_f32_f16_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, \1\.s -+** fcvt z0\.s, p0/m, \1\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f32_f16_z_tied1, svfloat32_t, svfloat16_t, -+ z0_res = svcvt_f32_f16_z (p0, z0), -+ z0_res = svcvt_f32_z (p0, z0)) -+ -+/* -+** cvt_f32_f16_z_untied: -+** movprfx z0\.s, p0/z, z4\.s -+** fcvt z0\.s, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cvt_f32_f16_z_untied, svfloat32_t, svfloat16_t, -+ z0 = svcvt_f32_f16_z (p0, z4), -+ z0 = svcvt_f32_z (p0, z4)) -+ -+/* -+** cvt_f32_f64_z_tied1: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** fcvt z0\.s, p0/m, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f32_f64_z_tied1, svfloat32_t, svfloat64_t, -+ z0_res = svcvt_f32_f64_z (p0, z0), -+ z0_res = svcvt_f32_z (p0, z0)) -+ -+/* -+** cvt_f32_f64_z_untied: -+** movprfx z0\.d, p0/z, z4\.d -+** fcvt z0\.s, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cvt_f32_f64_z_untied, svfloat32_t, svfloat64_t, -+ z0 = svcvt_f32_f64_z (p0, z4), -+ z0 = svcvt_f32_z (p0, z4)) -+ -+/* -+** cvt_f32_s32_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, \1\.s -+** scvtf z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f32_s32_z_tied1, svfloat32_t, svint32_t, -+ z0_res = svcvt_f32_s32_z (p0, z0), -+ z0_res = svcvt_f32_z (p0, z0)) -+ -+/* -+** cvt_f32_s32_z_untied: -+** movprfx z0\.s, p0/z, z4\.s -+** scvtf z0\.s, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_f32_s32_z_untied, svfloat32_t, svint32_t, -+ z0 = svcvt_f32_s32_z (p0, z4), -+ z0 = svcvt_f32_z (p0, z4)) -+ -+/* -+** cvt_f32_s64_z_tied1: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** scvtf z0\.s, p0/m, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f32_s64_z_tied1, svfloat32_t, svint64_t, -+ z0_res = svcvt_f32_s64_z (p0, z0), -+ z0_res = svcvt_f32_z (p0, z0)) -+ -+/* -+** cvt_f32_s64_z_untied: -+** movprfx z0\.d, p0/z, z4\.d -+** scvtf z0\.s, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cvt_f32_s64_z_untied, svfloat32_t, svint64_t, -+ z0 = svcvt_f32_s64_z (p0, z4), -+ z0 = svcvt_f32_z (p0, z4)) -+ -+/* -+** cvt_f32_u32_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, \1\.s -+** ucvtf z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f32_u32_z_tied1, svfloat32_t, svuint32_t, -+ z0_res = svcvt_f32_u32_z (p0, z0), -+ z0_res = svcvt_f32_z (p0, z0)) -+ -+/* -+** cvt_f32_u32_z_untied: -+** movprfx z0\.s, p0/z, z4\.s -+** ucvtf z0\.s, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_f32_u32_z_untied, svfloat32_t, svuint32_t, -+ z0 = svcvt_f32_u32_z (p0, z4), -+ z0 = svcvt_f32_z (p0, z4)) -+ -+/* -+** cvt_f32_u64_z_tied1: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** ucvtf z0\.s, p0/m, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f32_u64_z_tied1, svfloat32_t, svuint64_t, -+ z0_res = svcvt_f32_u64_z (p0, z0), -+ z0_res = svcvt_f32_z (p0, z0)) -+ -+/* -+** cvt_f32_u64_z_untied: -+** movprfx z0\.d, p0/z, z4\.d -+** ucvtf z0\.s, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cvt_f32_u64_z_untied, svfloat32_t, svuint64_t, -+ z0 = svcvt_f32_u64_z (p0, z4), -+ z0 = svcvt_f32_z (p0, z4)) -+ -+/* -+** cvt_f32_f16_x_tied1: -+** fcvt z0\.s, p0/m, z0\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f32_f16_x_tied1, svfloat32_t, svfloat16_t, -+ z0_res = svcvt_f32_f16_x (p0, z0), -+ z0_res = svcvt_f32_x (p0, z0)) -+ -+/* -+** cvt_f32_f16_x_untied: -+** fcvt z0\.s, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cvt_f32_f16_x_untied, svfloat32_t, svfloat16_t, -+ z0 = svcvt_f32_f16_x (p0, z4), -+ z0 = svcvt_f32_x (p0, z4)) -+ -+/* -+** cvt_f32_f64_x_tied1: -+** fcvt z0\.s, p0/m, z0\.d -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f32_f64_x_tied1, svfloat32_t, svfloat64_t, -+ z0_res = svcvt_f32_f64_x (p0, z0), -+ z0_res = svcvt_f32_x (p0, z0)) -+ -+/* -+** cvt_f32_f64_x_untied: -+** fcvt z0\.s, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cvt_f32_f64_x_untied, svfloat32_t, svfloat64_t, -+ z0 = svcvt_f32_f64_x (p0, z4), -+ z0 = svcvt_f32_x (p0, z4)) -+ -+/* -+** cvt_f32_s32_x_tied1: -+** scvtf z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f32_s32_x_tied1, svfloat32_t, svint32_t, -+ z0_res = svcvt_f32_s32_x (p0, z0), -+ z0_res = svcvt_f32_x (p0, z0)) -+ -+/* -+** cvt_f32_s32_x_untied: -+** scvtf z0\.s, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_f32_s32_x_untied, svfloat32_t, svint32_t, -+ z0 = svcvt_f32_s32_x (p0, z4), -+ z0 = svcvt_f32_x (p0, z4)) -+ -+/* -+** cvt_f32_s64_x_tied1: -+** scvtf z0\.s, p0/m, z0\.d -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f32_s64_x_tied1, svfloat32_t, svint64_t, -+ z0_res = svcvt_f32_s64_x (p0, z0), -+ z0_res = svcvt_f32_x (p0, z0)) -+ -+/* -+** cvt_f32_s64_x_untied: -+** scvtf z0\.s, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cvt_f32_s64_x_untied, svfloat32_t, svint64_t, -+ z0 = svcvt_f32_s64_x (p0, z4), -+ z0 = svcvt_f32_x (p0, z4)) -+ -+/* -+** cvt_f32_u32_x_tied1: -+** ucvtf z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f32_u32_x_tied1, svfloat32_t, svuint32_t, -+ z0_res = svcvt_f32_u32_x (p0, z0), -+ z0_res = svcvt_f32_x (p0, z0)) -+ -+/* -+** cvt_f32_u32_x_untied: -+** ucvtf z0\.s, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_f32_u32_x_untied, svfloat32_t, svuint32_t, -+ z0 = svcvt_f32_u32_x (p0, z4), -+ z0 = svcvt_f32_x (p0, z4)) -+ -+/* -+** cvt_f32_u64_x_tied1: -+** ucvtf z0\.s, p0/m, z0\.d -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f32_u64_x_tied1, svfloat32_t, svuint64_t, -+ z0_res = svcvt_f32_u64_x (p0, z0), -+ z0_res = svcvt_f32_x (p0, z0)) -+ -+/* -+** cvt_f32_u64_x_untied: -+** ucvtf z0\.s, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cvt_f32_u64_x_untied, svfloat32_t, svuint64_t, -+ z0 = svcvt_f32_u64_x (p0, z4), -+ z0 = svcvt_f32_x (p0, z4)) -+ -+/* -+** ptrue_cvt_f32_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z_REV (ptrue_cvt_f32_f16_x_tied1, svfloat32_t, svfloat16_t, -+ z0_res = svcvt_f32_f16_x (svptrue_b32 (), z0), -+ z0_res = svcvt_f32_x (svptrue_b32 (), z0)) -+ -+/* -+** ptrue_cvt_f32_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z (ptrue_cvt_f32_f16_x_untied, svfloat32_t, svfloat16_t, -+ z0 = svcvt_f32_f16_x (svptrue_b32 (), z4), -+ z0 = svcvt_f32_x (svptrue_b32 (), z4)) -+ -+/* -+** ptrue_cvt_f32_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z_REV (ptrue_cvt_f32_f64_x_tied1, svfloat32_t, svfloat64_t, -+ z0_res = svcvt_f32_f64_x (svptrue_b64 (), z0), -+ z0_res = svcvt_f32_x (svptrue_b64 (), z0)) -+ -+/* -+** ptrue_cvt_f32_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z (ptrue_cvt_f32_f64_x_untied, svfloat32_t, svfloat64_t, -+ z0 = svcvt_f32_f64_x (svptrue_b64 (), z4), -+ z0 = svcvt_f32_x (svptrue_b64 (), z4)) -+ -+/* -+** ptrue_cvt_f32_s32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z_REV (ptrue_cvt_f32_s32_x_tied1, svfloat32_t, svint32_t, -+ z0_res = svcvt_f32_s32_x (svptrue_b32 (), z0), -+ z0_res = svcvt_f32_x (svptrue_b32 (), z0)) -+ -+/* -+** ptrue_cvt_f32_s32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z (ptrue_cvt_f32_s32_x_untied, svfloat32_t, svint32_t, -+ z0 = svcvt_f32_s32_x (svptrue_b32 (), z4), -+ z0 = svcvt_f32_x (svptrue_b32 (), z4)) -+ -+/* -+** ptrue_cvt_f32_s64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z_REV (ptrue_cvt_f32_s64_x_tied1, svfloat32_t, svint64_t, -+ z0_res = svcvt_f32_s64_x (svptrue_b64 (), z0), -+ z0_res = svcvt_f32_x (svptrue_b64 (), z0)) -+ -+/* -+** ptrue_cvt_f32_s64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z (ptrue_cvt_f32_s64_x_untied, svfloat32_t, svint64_t, -+ z0 = svcvt_f32_s64_x (svptrue_b64 (), z4), -+ z0 = svcvt_f32_x (svptrue_b64 (), z4)) -+ -+/* -+** ptrue_cvt_f32_u32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z_REV (ptrue_cvt_f32_u32_x_tied1, svfloat32_t, svuint32_t, -+ z0_res = svcvt_f32_u32_x (svptrue_b32 (), z0), -+ z0_res = svcvt_f32_x (svptrue_b32 (), z0)) -+ -+/* -+** ptrue_cvt_f32_u32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z (ptrue_cvt_f32_u32_x_untied, svfloat32_t, svuint32_t, -+ z0 = svcvt_f32_u32_x (svptrue_b32 (), z4), -+ z0 = svcvt_f32_x (svptrue_b32 (), z4)) -+ -+/* -+** ptrue_cvt_f32_u64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z_REV (ptrue_cvt_f32_u64_x_tied1, svfloat32_t, svuint64_t, -+ z0_res = svcvt_f32_u64_x (svptrue_b64 (), z0), -+ z0_res = svcvt_f32_x (svptrue_b64 (), z0)) -+ -+/* -+** ptrue_cvt_f32_u64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z (ptrue_cvt_f32_u64_x_untied, svfloat32_t, svuint64_t, -+ z0 = svcvt_f32_u64_x (svptrue_b64 (), z4), -+ z0 = svcvt_f32_x (svptrue_b64 (), z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_f64.c -new file mode 100644 -index 000000000..1d08e6ec5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_f64.c -@@ -0,0 +1,549 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cvt_f64_f16_m_tied1: -+** fcvt z0\.d, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cvt_f64_f16_m_tied1, svfloat64_t, svfloat16_t, -+ z0 = svcvt_f64_f16_m (z0, p0, z4), -+ z0 = svcvt_f64_m (z0, p0, z4)) -+ -+/* -+** cvt_f64_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** fcvt z0\.d, p0/m, \1\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f64_f16_m_tied2, svfloat64_t, svfloat16_t, -+ z0_res = svcvt_f64_f16_m (z4, p0, z0), -+ z0_res = svcvt_f64_m (z4, p0, z0)) -+ -+/* -+** cvt_f64_f16_m_untied: -+** movprfx z0, z1 -+** fcvt z0\.d, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cvt_f64_f16_m_untied, svfloat64_t, svfloat16_t, -+ z0 = svcvt_f64_f16_m (z1, p0, z4), -+ z0 = svcvt_f64_m (z1, p0, z4)) -+ -+/* -+** cvt_f64_f32_m_tied1: -+** fcvt z0\.d, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_f64_f32_m_tied1, svfloat64_t, svfloat32_t, -+ z0 = svcvt_f64_f32_m (z0, p0, z4), -+ z0 = svcvt_f64_m (z0, p0, z4)) -+ -+/* -+** cvt_f64_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** fcvt z0\.d, p0/m, \1\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f64_f32_m_tied2, svfloat64_t, svfloat32_t, -+ z0_res = svcvt_f64_f32_m (z4, p0, z0), -+ z0_res = svcvt_f64_m (z4, p0, z0)) -+ -+/* -+** cvt_f64_f32_m_untied: -+** movprfx z0, z1 -+** fcvt z0\.d, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_f64_f32_m_untied, svfloat64_t, svfloat32_t, -+ z0 = svcvt_f64_f32_m (z1, p0, z4), -+ z0 = svcvt_f64_m (z1, p0, z4)) -+ -+/* -+** cvt_f64_s32_m_tied1: -+** scvtf z0\.d, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_f64_s32_m_tied1, svfloat64_t, svint32_t, -+ z0 = svcvt_f64_s32_m (z0, p0, z4), -+ z0 = svcvt_f64_m (z0, p0, z4)) -+ -+/* -+** cvt_f64_s32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** scvtf z0\.d, p0/m, \1\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f64_s32_m_tied2, svfloat64_t, svint32_t, -+ z0_res = svcvt_f64_s32_m (z4, p0, z0), -+ z0_res = svcvt_f64_m (z4, p0, z0)) -+ -+/* -+** cvt_f64_s32_m_untied: -+** movprfx z0, z1 -+** scvtf z0\.d, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_f64_s32_m_untied, svfloat64_t, svint32_t, -+ z0 = svcvt_f64_s32_m (z1, p0, z4), -+ z0 = svcvt_f64_m (z1, p0, z4)) -+ -+/* -+** cvt_f64_s64_m_tied1: -+** scvtf z0\.d, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cvt_f64_s64_m_tied1, svfloat64_t, svint64_t, -+ z0 = svcvt_f64_s64_m (z0, p0, z4), -+ z0 = svcvt_f64_m (z0, p0, z4)) -+ -+/* -+** cvt_f64_s64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z4 -+** scvtf z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f64_s64_m_tied2, svfloat64_t, svint64_t, -+ z0_res = svcvt_f64_s64_m (z4, p0, z0), -+ z0_res = svcvt_f64_m (z4, p0, z0)) -+ -+/* -+** cvt_f64_s64_m_untied: -+** movprfx z0, z1 -+** scvtf z0\.d, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cvt_f64_s64_m_untied, svfloat64_t, svint64_t, -+ z0 = svcvt_f64_s64_m (z1, p0, z4), -+ z0 = svcvt_f64_m (z1, p0, z4)) -+ -+/* -+** cvt_f64_u32_m_tied1: -+** ucvtf z0\.d, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_f64_u32_m_tied1, svfloat64_t, svuint32_t, -+ z0 = svcvt_f64_u32_m (z0, p0, z4), -+ z0 = svcvt_f64_m (z0, p0, z4)) -+ -+/* -+** cvt_f64_u32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** ucvtf z0\.d, p0/m, \1\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f64_u32_m_tied2, svfloat64_t, svuint32_t, -+ z0_res = svcvt_f64_u32_m (z4, p0, z0), -+ z0_res = svcvt_f64_m (z4, p0, z0)) -+ -+/* -+** cvt_f64_u32_m_untied: -+** movprfx z0, z1 -+** ucvtf z0\.d, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_f64_u32_m_untied, svfloat64_t, svuint32_t, -+ z0 = svcvt_f64_u32_m (z1, p0, z4), -+ z0 = svcvt_f64_m (z1, p0, z4)) -+ -+/* -+** cvt_f64_u64_m_tied1: -+** ucvtf z0\.d, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cvt_f64_u64_m_tied1, svfloat64_t, svuint64_t, -+ z0 = svcvt_f64_u64_m (z0, p0, z4), -+ z0 = svcvt_f64_m (z0, p0, z4)) -+ -+/* -+** cvt_f64_u64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z4 -+** ucvtf z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f64_u64_m_tied2, svfloat64_t, svuint64_t, -+ z0_res = svcvt_f64_u64_m (z4, p0, z0), -+ z0_res = svcvt_f64_m (z4, p0, z0)) -+ -+/* -+** cvt_f64_u64_m_untied: -+** movprfx z0, z1 -+** ucvtf z0\.d, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cvt_f64_u64_m_untied, svfloat64_t, svuint64_t, -+ z0 = svcvt_f64_u64_m (z1, p0, z4), -+ z0 = svcvt_f64_m (z1, p0, z4)) -+ -+/* -+** cvt_f64_f16_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.d, p0/z, \1\.d -+** fcvt z0\.d, p0/m, \1\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f64_f16_z_tied1, svfloat64_t, svfloat16_t, -+ z0_res = svcvt_f64_f16_z (p0, z0), -+ z0_res = svcvt_f64_z (p0, z0)) -+ -+/* -+** cvt_f64_f16_z_untied: -+** movprfx z0\.d, p0/z, z4\.d -+** fcvt z0\.d, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cvt_f64_f16_z_untied, svfloat64_t, svfloat16_t, -+ z0 = svcvt_f64_f16_z (p0, z4), -+ z0 = svcvt_f64_z (p0, z4)) -+ -+/* -+** cvt_f64_f32_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.d, p0/z, \1\.d -+** fcvt z0\.d, p0/m, \1\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f64_f32_z_tied1, svfloat64_t, svfloat32_t, -+ z0_res = svcvt_f64_f32_z (p0, z0), -+ z0_res = svcvt_f64_z (p0, z0)) -+ -+/* -+** cvt_f64_f32_z_untied: -+** movprfx z0\.d, p0/z, z4\.d -+** fcvt z0\.d, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_f64_f32_z_untied, svfloat64_t, svfloat32_t, -+ z0 = svcvt_f64_f32_z (p0, z4), -+ z0 = svcvt_f64_z (p0, z4)) -+ -+/* -+** cvt_f64_s32_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.d, p0/z, \1\.d -+** scvtf z0\.d, p0/m, \1\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f64_s32_z_tied1, svfloat64_t, svint32_t, -+ z0_res = svcvt_f64_s32_z (p0, z0), -+ z0_res = svcvt_f64_z (p0, z0)) -+ -+/* -+** cvt_f64_s32_z_untied: -+** movprfx z0\.d, p0/z, z4\.d -+** scvtf z0\.d, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_f64_s32_z_untied, svfloat64_t, svint32_t, -+ z0 = svcvt_f64_s32_z (p0, z4), -+ z0 = svcvt_f64_z (p0, z4)) -+ -+/* -+** cvt_f64_s64_z_tied1: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** scvtf z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f64_s64_z_tied1, svfloat64_t, svint64_t, -+ z0_res = svcvt_f64_s64_z (p0, z0), -+ z0_res = svcvt_f64_z (p0, z0)) -+ -+/* -+** cvt_f64_s64_z_untied: -+** movprfx z0\.d, p0/z, z4\.d -+** scvtf z0\.d, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cvt_f64_s64_z_untied, svfloat64_t, svint64_t, -+ z0 = svcvt_f64_s64_z (p0, z4), -+ z0 = svcvt_f64_z (p0, z4)) -+ -+/* -+** cvt_f64_u32_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.d, p0/z, \1\.d -+** ucvtf z0\.d, p0/m, \1\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f64_u32_z_tied1, svfloat64_t, svuint32_t, -+ z0_res = svcvt_f64_u32_z (p0, z0), -+ z0_res = svcvt_f64_z (p0, z0)) -+ -+/* -+** cvt_f64_u32_z_untied: -+** movprfx z0\.d, p0/z, z4\.d -+** ucvtf z0\.d, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_f64_u32_z_untied, svfloat64_t, svuint32_t, -+ z0 = svcvt_f64_u32_z (p0, z4), -+ z0 = svcvt_f64_z (p0, z4)) -+ -+/* -+** cvt_f64_u64_z_tied1: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** ucvtf z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f64_u64_z_tied1, svfloat64_t, svuint64_t, -+ z0_res = svcvt_f64_u64_z (p0, z0), -+ z0_res = svcvt_f64_z (p0, z0)) -+ -+/* -+** cvt_f64_u64_z_untied: -+** movprfx z0\.d, p0/z, z4\.d -+** ucvtf z0\.d, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cvt_f64_u64_z_untied, svfloat64_t, svuint64_t, -+ z0 = svcvt_f64_u64_z (p0, z4), -+ z0 = svcvt_f64_z (p0, z4)) -+ -+/* -+** cvt_f64_f16_x_tied1: -+** fcvt z0\.d, p0/m, z0\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f64_f16_x_tied1, svfloat64_t, svfloat16_t, -+ z0_res = svcvt_f64_f16_x (p0, z0), -+ z0_res = svcvt_f64_x (p0, z0)) -+ -+/* -+** cvt_f64_f16_x_untied: -+** fcvt z0\.d, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cvt_f64_f16_x_untied, svfloat64_t, svfloat16_t, -+ z0 = svcvt_f64_f16_x (p0, z4), -+ z0 = svcvt_f64_x (p0, z4)) -+ -+/* -+** cvt_f64_f32_x_tied1: -+** fcvt z0\.d, p0/m, z0\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f64_f32_x_tied1, svfloat64_t, svfloat32_t, -+ z0_res = svcvt_f64_f32_x (p0, z0), -+ z0_res = svcvt_f64_x (p0, z0)) -+ -+/* -+** cvt_f64_f32_x_untied: -+** fcvt z0\.d, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_f64_f32_x_untied, svfloat64_t, svfloat32_t, -+ z0 = svcvt_f64_f32_x (p0, z4), -+ z0 = svcvt_f64_x (p0, z4)) -+ -+/* -+** cvt_f64_s32_x_tied1: -+** scvtf z0\.d, p0/m, z0\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f64_s32_x_tied1, svfloat64_t, svint32_t, -+ z0_res = svcvt_f64_s32_x (p0, z0), -+ z0_res = svcvt_f64_x (p0, z0)) -+ -+/* -+** cvt_f64_s32_x_untied: -+** scvtf z0\.d, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_f64_s32_x_untied, svfloat64_t, svint32_t, -+ z0 = svcvt_f64_s32_x (p0, z4), -+ z0 = svcvt_f64_x (p0, z4)) -+ -+/* -+** cvt_f64_s64_x_tied1: -+** scvtf z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f64_s64_x_tied1, svfloat64_t, svint64_t, -+ z0_res = svcvt_f64_s64_x (p0, z0), -+ z0_res = svcvt_f64_x (p0, z0)) -+ -+/* -+** cvt_f64_s64_x_untied: -+** scvtf z0\.d, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cvt_f64_s64_x_untied, svfloat64_t, svint64_t, -+ z0 = svcvt_f64_s64_x (p0, z4), -+ z0 = svcvt_f64_x (p0, z4)) -+ -+/* -+** cvt_f64_u32_x_tied1: -+** ucvtf z0\.d, p0/m, z0\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f64_u32_x_tied1, svfloat64_t, svuint32_t, -+ z0_res = svcvt_f64_u32_x (p0, z0), -+ z0_res = svcvt_f64_x (p0, z0)) -+ -+/* -+** cvt_f64_u32_x_untied: -+** ucvtf z0\.d, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_f64_u32_x_untied, svfloat64_t, svuint32_t, -+ z0 = svcvt_f64_u32_x (p0, z4), -+ z0 = svcvt_f64_x (p0, z4)) -+ -+/* -+** cvt_f64_u64_x_tied1: -+** ucvtf z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_f64_u64_x_tied1, svfloat64_t, svuint64_t, -+ z0_res = svcvt_f64_u64_x (p0, z0), -+ z0_res = svcvt_f64_x (p0, z0)) -+ -+/* -+** cvt_f64_u64_x_untied: -+** ucvtf z0\.d, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cvt_f64_u64_x_untied, svfloat64_t, svuint64_t, -+ z0 = svcvt_f64_u64_x (p0, z4), -+ z0 = svcvt_f64_x (p0, z4)) -+ -+/* -+** ptrue_cvt_f64_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z_REV (ptrue_cvt_f64_f16_x_tied1, svfloat64_t, svfloat16_t, -+ z0_res = svcvt_f64_f16_x (svptrue_b64 (), z0), -+ z0_res = svcvt_f64_x (svptrue_b64 (), z0)) -+ -+/* -+** ptrue_cvt_f64_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z (ptrue_cvt_f64_f16_x_untied, svfloat64_t, svfloat16_t, -+ z0 = svcvt_f64_f16_x (svptrue_b64 (), z4), -+ z0 = svcvt_f64_x (svptrue_b64 (), z4)) -+ -+/* -+** ptrue_cvt_f64_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z_REV (ptrue_cvt_f64_f32_x_tied1, svfloat64_t, svfloat32_t, -+ z0_res = svcvt_f64_f32_x (svptrue_b64 (), z0), -+ z0_res = svcvt_f64_x (svptrue_b64 (), z0)) -+ -+/* -+** ptrue_cvt_f64_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z (ptrue_cvt_f64_f32_x_untied, svfloat64_t, svfloat32_t, -+ z0 = svcvt_f64_f32_x (svptrue_b64 (), z4), -+ z0 = svcvt_f64_x (svptrue_b64 (), z4)) -+ -+/* -+** ptrue_cvt_f64_s32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z_REV (ptrue_cvt_f64_s32_x_tied1, svfloat64_t, svint32_t, -+ z0_res = svcvt_f64_s32_x (svptrue_b64 (), z0), -+ z0_res = svcvt_f64_x (svptrue_b64 (), z0)) -+ -+/* -+** ptrue_cvt_f64_s32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z (ptrue_cvt_f64_s32_x_untied, svfloat64_t, svint32_t, -+ z0 = svcvt_f64_s32_x (svptrue_b64 (), z4), -+ z0 = svcvt_f64_x (svptrue_b64 (), z4)) -+ -+/* -+** ptrue_cvt_f64_s64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z_REV (ptrue_cvt_f64_s64_x_tied1, svfloat64_t, svint64_t, -+ z0_res = svcvt_f64_s64_x (svptrue_b64 (), z0), -+ z0_res = svcvt_f64_x (svptrue_b64 (), z0)) -+ -+/* -+** ptrue_cvt_f64_s64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z (ptrue_cvt_f64_s64_x_untied, svfloat64_t, svint64_t, -+ z0 = svcvt_f64_s64_x (svptrue_b64 (), z4), -+ z0 = svcvt_f64_x (svptrue_b64 (), z4)) -+ -+/* -+** ptrue_cvt_f64_u32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z_REV (ptrue_cvt_f64_u32_x_tied1, svfloat64_t, svuint32_t, -+ z0_res = svcvt_f64_u32_x (svptrue_b64 (), z0), -+ z0_res = svcvt_f64_x (svptrue_b64 (), z0)) -+ -+/* -+** ptrue_cvt_f64_u32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z (ptrue_cvt_f64_u32_x_untied, svfloat64_t, svuint32_t, -+ z0 = svcvt_f64_u32_x (svptrue_b64 (), z4), -+ z0 = svcvt_f64_x (svptrue_b64 (), z4)) -+ -+/* -+** ptrue_cvt_f64_u64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z_REV (ptrue_cvt_f64_u64_x_tied1, svfloat64_t, svuint64_t, -+ z0_res = svcvt_f64_u64_x (svptrue_b64 (), z0), -+ z0_res = svcvt_f64_x (svptrue_b64 (), z0)) -+ -+/* -+** ptrue_cvt_f64_u64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z (ptrue_cvt_f64_u64_x_untied, svfloat64_t, svuint64_t, -+ z0 = svcvt_f64_u64_x (svptrue_b64 (), z4), -+ z0 = svcvt_f64_x (svptrue_b64 (), z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_s16.c -new file mode 100644 -index 000000000..81761ab09 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_s16.c -@@ -0,0 +1,72 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cvt_s16_f16_m_tied1: -+** fcvtzs z0\.h, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cvt_s16_f16_m_tied1, svint16_t, svfloat16_t, -+ z0 = svcvt_s16_f16_m (z0, p0, z4), -+ z0 = svcvt_s16_m (z0, p0, z4)) -+ -+/* -+** cvt_s16_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** fcvtzs z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_s16_f16_m_tied2, svint16_t, svfloat16_t, -+ z0_res = svcvt_s16_f16_m (z4, p0, z0), -+ z0_res = svcvt_s16_m (z4, p0, z0)) -+ -+/* -+** cvt_s16_f16_m_untied: -+** movprfx z0, z1 -+** fcvtzs z0\.h, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cvt_s16_f16_m_untied, svint16_t, svfloat16_t, -+ z0 = svcvt_s16_f16_m (z1, p0, z4), -+ z0 = svcvt_s16_m (z1, p0, z4)) -+ -+/* -+** cvt_s16_f16_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.h, p0/z, \1\.h -+** fcvtzs z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_s16_f16_z_tied1, svint16_t, svfloat16_t, -+ z0_res = svcvt_s16_f16_z (p0, z0), -+ z0_res = svcvt_s16_z (p0, z0)) -+ -+/* -+** cvt_s16_f16_z_untied: -+** movprfx z0\.h, p0/z, z4\.h -+** fcvtzs z0\.h, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cvt_s16_f16_z_untied, svint16_t, svfloat16_t, -+ z0 = svcvt_s16_f16_z (p0, z4), -+ z0 = svcvt_s16_z (p0, z4)) -+ -+/* -+** cvt_s16_f16_x_tied1: -+** fcvtzs z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_s16_f16_x_tied1, svint16_t, svfloat16_t, -+ z0_res = svcvt_s16_f16_x (p0, z0), -+ z0_res = svcvt_s16_x (p0, z0)) -+ -+/* -+** cvt_s16_f16_x_untied: -+** fcvtzs z0\.h, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cvt_s16_f16_x_untied, svint16_t, svfloat16_t, -+ z0 = svcvt_s16_f16_x (p0, z4), -+ z0 = svcvt_s16_x (p0, z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_s32.c -new file mode 100644 -index 000000000..d30da5cc5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_s32.c -@@ -0,0 +1,210 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cvt_s32_f16_m_tied1: -+** fcvtzs z0\.s, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cvt_s32_f16_m_tied1, svint32_t, svfloat16_t, -+ z0 = svcvt_s32_f16_m (z0, p0, z4), -+ z0 = svcvt_s32_m (z0, p0, z4)) -+ -+/* -+** cvt_s32_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** fcvtzs z0\.s, p0/m, \1\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_s32_f16_m_tied2, svint32_t, svfloat16_t, -+ z0_res = svcvt_s32_f16_m (z4, p0, z0), -+ z0_res = svcvt_s32_m (z4, p0, z0)) -+ -+/* -+** cvt_s32_f16_m_untied: -+** movprfx z0, z1 -+** fcvtzs z0\.s, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cvt_s32_f16_m_untied, svint32_t, svfloat16_t, -+ z0 = svcvt_s32_f16_m (z1, p0, z4), -+ z0 = svcvt_s32_m (z1, p0, z4)) -+ -+/* -+** cvt_s32_f32_m_tied1: -+** fcvtzs z0\.s, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_s32_f32_m_tied1, svint32_t, svfloat32_t, -+ z0 = svcvt_s32_f32_m (z0, p0, z4), -+ z0 = svcvt_s32_m (z0, p0, z4)) -+ -+/* -+** cvt_s32_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** fcvtzs z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_s32_f32_m_tied2, svint32_t, svfloat32_t, -+ z0_res = svcvt_s32_f32_m (z4, p0, z0), -+ z0_res = svcvt_s32_m (z4, p0, z0)) -+ -+/* -+** cvt_s32_f32_m_untied: -+** movprfx z0, z1 -+** fcvtzs z0\.s, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_s32_f32_m_untied, svint32_t, svfloat32_t, -+ z0 = svcvt_s32_f32_m (z1, p0, z4), -+ z0 = svcvt_s32_m (z1, p0, z4)) -+ -+/* -+** cvt_s32_f64_m_tied1: -+** fcvtzs z0\.s, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cvt_s32_f64_m_tied1, svint32_t, svfloat64_t, -+ z0 = svcvt_s32_f64_m (z0, p0, z4), -+ z0 = svcvt_s32_m (z0, p0, z4)) -+ -+/* -+** cvt_s32_f64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z4 -+** fcvtzs z0\.s, p0/m, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_s32_f64_m_tied2, svint32_t, svfloat64_t, -+ z0_res = svcvt_s32_f64_m (z4, p0, z0), -+ z0_res = svcvt_s32_m (z4, p0, z0)) -+ -+/* -+** cvt_s32_f64_m_untied: -+** movprfx z0, z1 -+** fcvtzs z0\.s, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cvt_s32_f64_m_untied, svint32_t, svfloat64_t, -+ z0 = svcvt_s32_f64_m (z1, p0, z4), -+ z0 = svcvt_s32_m (z1, p0, z4)) -+ -+/* -+** cvt_s32_f16_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, \1\.s -+** fcvtzs z0\.s, p0/m, \1\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_s32_f16_z_tied1, svint32_t, svfloat16_t, -+ z0_res = svcvt_s32_f16_z (p0, z0), -+ z0_res = svcvt_s32_z (p0, z0)) -+ -+/* -+** cvt_s32_f16_z_untied: -+** movprfx z0\.s, p0/z, z4\.s -+** fcvtzs z0\.s, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cvt_s32_f16_z_untied, svint32_t, svfloat16_t, -+ z0 = svcvt_s32_f16_z (p0, z4), -+ z0 = svcvt_s32_z (p0, z4)) -+ -+/* -+** cvt_s32_f32_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, \1\.s -+** fcvtzs z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_s32_f32_z_tied1, svint32_t, svfloat32_t, -+ z0_res = svcvt_s32_f32_z (p0, z0), -+ z0_res = svcvt_s32_z (p0, z0)) -+ -+/* -+** cvt_s32_f32_z_untied: -+** movprfx z0\.s, p0/z, z4\.s -+** fcvtzs z0\.s, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_s32_f32_z_untied, svint32_t, svfloat32_t, -+ z0 = svcvt_s32_f32_z (p0, z4), -+ z0 = svcvt_s32_z (p0, z4)) -+ -+/* -+** cvt_s32_f64_z_tied1: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** fcvtzs z0\.s, p0/m, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_s32_f64_z_tied1, svint32_t, svfloat64_t, -+ z0_res = svcvt_s32_f64_z (p0, z0), -+ z0_res = svcvt_s32_z (p0, z0)) -+ -+/* -+** cvt_s32_f64_z_untied: -+** movprfx z0\.d, p0/z, z4\.d -+** fcvtzs z0\.s, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cvt_s32_f64_z_untied, svint32_t, svfloat64_t, -+ z0 = svcvt_s32_f64_z (p0, z4), -+ z0 = svcvt_s32_z (p0, z4)) -+ -+/* -+** cvt_s32_f16_x_tied1: -+** fcvtzs z0\.s, p0/m, z0\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_s32_f16_x_tied1, svint32_t, svfloat16_t, -+ z0_res = svcvt_s32_f16_x (p0, z0), -+ z0_res = svcvt_s32_x (p0, z0)) -+ -+/* -+** cvt_s32_f16_x_untied: -+** fcvtzs z0\.s, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cvt_s32_f16_x_untied, svint32_t, svfloat16_t, -+ z0 = svcvt_s32_f16_x (p0, z4), -+ z0 = svcvt_s32_x (p0, z4)) -+ -+/* -+** cvt_s32_f32_x_tied1: -+** fcvtzs z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_s32_f32_x_tied1, svint32_t, svfloat32_t, -+ z0_res = svcvt_s32_f32_x (p0, z0), -+ z0_res = svcvt_s32_x (p0, z0)) -+ -+/* -+** cvt_s32_f32_x_untied: -+** fcvtzs z0\.s, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_s32_f32_x_untied, svint32_t, svfloat32_t, -+ z0 = svcvt_s32_f32_x (p0, z4), -+ z0 = svcvt_s32_x (p0, z4)) -+ -+/* -+** cvt_s32_f64_x_tied1: -+** fcvtzs z0\.s, p0/m, z0\.d -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_s32_f64_x_tied1, svint32_t, svfloat64_t, -+ z0_res = svcvt_s32_f64_x (p0, z0), -+ z0_res = svcvt_s32_x (p0, z0)) -+ -+/* -+** cvt_s32_f64_x_untied: -+** fcvtzs z0\.s, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cvt_s32_f64_x_untied, svint32_t, svfloat64_t, -+ z0 = svcvt_s32_f64_x (p0, z4), -+ z0 = svcvt_s32_x (p0, z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_s64.c -new file mode 100644 -index 000000000..68cd80784 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_s64.c -@@ -0,0 +1,210 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cvt_s64_f16_m_tied1: -+** fcvtzs z0\.d, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cvt_s64_f16_m_tied1, svint64_t, svfloat16_t, -+ z0 = svcvt_s64_f16_m (z0, p0, z4), -+ z0 = svcvt_s64_m (z0, p0, z4)) -+ -+/* -+** cvt_s64_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** fcvtzs z0\.d, p0/m, \1\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_s64_f16_m_tied2, svint64_t, svfloat16_t, -+ z0_res = svcvt_s64_f16_m (z4, p0, z0), -+ z0_res = svcvt_s64_m (z4, p0, z0)) -+ -+/* -+** cvt_s64_f16_m_untied: -+** movprfx z0, z1 -+** fcvtzs z0\.d, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cvt_s64_f16_m_untied, svint64_t, svfloat16_t, -+ z0 = svcvt_s64_f16_m (z1, p0, z4), -+ z0 = svcvt_s64_m (z1, p0, z4)) -+ -+/* -+** cvt_s64_f32_m_tied1: -+** fcvtzs z0\.d, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_s64_f32_m_tied1, svint64_t, svfloat32_t, -+ z0 = svcvt_s64_f32_m (z0, p0, z4), -+ z0 = svcvt_s64_m (z0, p0, z4)) -+ -+/* -+** cvt_s64_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** fcvtzs z0\.d, p0/m, \1\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_s64_f32_m_tied2, svint64_t, svfloat32_t, -+ z0_res = svcvt_s64_f32_m (z4, p0, z0), -+ z0_res = svcvt_s64_m (z4, p0, z0)) -+ -+/* -+** cvt_s64_f32_m_untied: -+** movprfx z0, z1 -+** fcvtzs z0\.d, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_s64_f32_m_untied, svint64_t, svfloat32_t, -+ z0 = svcvt_s64_f32_m (z1, p0, z4), -+ z0 = svcvt_s64_m (z1, p0, z4)) -+ -+/* -+** cvt_s64_f64_m_tied1: -+** fcvtzs z0\.d, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cvt_s64_f64_m_tied1, svint64_t, svfloat64_t, -+ z0 = svcvt_s64_f64_m (z0, p0, z4), -+ z0 = svcvt_s64_m (z0, p0, z4)) -+ -+/* -+** cvt_s64_f64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z4 -+** fcvtzs z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_s64_f64_m_tied2, svint64_t, svfloat64_t, -+ z0_res = svcvt_s64_f64_m (z4, p0, z0), -+ z0_res = svcvt_s64_m (z4, p0, z0)) -+ -+/* -+** cvt_s64_f64_m_untied: -+** movprfx z0, z1 -+** fcvtzs z0\.d, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cvt_s64_f64_m_untied, svint64_t, svfloat64_t, -+ z0 = svcvt_s64_f64_m (z1, p0, z4), -+ z0 = svcvt_s64_m (z1, p0, z4)) -+ -+/* -+** cvt_s64_f16_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.d, p0/z, \1\.d -+** fcvtzs z0\.d, p0/m, \1\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_s64_f16_z_tied1, svint64_t, svfloat16_t, -+ z0_res = svcvt_s64_f16_z (p0, z0), -+ z0_res = svcvt_s64_z (p0, z0)) -+ -+/* -+** cvt_s64_f16_z_untied: -+** movprfx z0\.d, p0/z, z4\.d -+** fcvtzs z0\.d, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cvt_s64_f16_z_untied, svint64_t, svfloat16_t, -+ z0 = svcvt_s64_f16_z (p0, z4), -+ z0 = svcvt_s64_z (p0, z4)) -+ -+/* -+** cvt_s64_f32_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.d, p0/z, \1\.d -+** fcvtzs z0\.d, p0/m, \1\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_s64_f32_z_tied1, svint64_t, svfloat32_t, -+ z0_res = svcvt_s64_f32_z (p0, z0), -+ z0_res = svcvt_s64_z (p0, z0)) -+ -+/* -+** cvt_s64_f32_z_untied: -+** movprfx z0\.d, p0/z, z4\.d -+** fcvtzs z0\.d, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_s64_f32_z_untied, svint64_t, svfloat32_t, -+ z0 = svcvt_s64_f32_z (p0, z4), -+ z0 = svcvt_s64_z (p0, z4)) -+ -+/* -+** cvt_s64_f64_z_tied1: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** fcvtzs z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_s64_f64_z_tied1, svint64_t, svfloat64_t, -+ z0_res = svcvt_s64_f64_z (p0, z0), -+ z0_res = svcvt_s64_z (p0, z0)) -+ -+/* -+** cvt_s64_f64_z_untied: -+** movprfx z0\.d, p0/z, z4\.d -+** fcvtzs z0\.d, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cvt_s64_f64_z_untied, svint64_t, svfloat64_t, -+ z0 = svcvt_s64_f64_z (p0, z4), -+ z0 = svcvt_s64_z (p0, z4)) -+ -+/* -+** cvt_s64_f16_x_tied1: -+** fcvtzs z0\.d, p0/m, z0\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_s64_f16_x_tied1, svint64_t, svfloat16_t, -+ z0_res = svcvt_s64_f16_x (p0, z0), -+ z0_res = svcvt_s64_x (p0, z0)) -+ -+/* -+** cvt_s64_f16_x_untied: -+** fcvtzs z0\.d, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cvt_s64_f16_x_untied, svint64_t, svfloat16_t, -+ z0 = svcvt_s64_f16_x (p0, z4), -+ z0 = svcvt_s64_x (p0, z4)) -+ -+/* -+** cvt_s64_f32_x_tied1: -+** fcvtzs z0\.d, p0/m, z0\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_s64_f32_x_tied1, svint64_t, svfloat32_t, -+ z0_res = svcvt_s64_f32_x (p0, z0), -+ z0_res = svcvt_s64_x (p0, z0)) -+ -+/* -+** cvt_s64_f32_x_untied: -+** fcvtzs z0\.d, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_s64_f32_x_untied, svint64_t, svfloat32_t, -+ z0 = svcvt_s64_f32_x (p0, z4), -+ z0 = svcvt_s64_x (p0, z4)) -+ -+/* -+** cvt_s64_f64_x_tied1: -+** fcvtzs z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_s64_f64_x_tied1, svint64_t, svfloat64_t, -+ z0_res = svcvt_s64_f64_x (p0, z0), -+ z0_res = svcvt_s64_x (p0, z0)) -+ -+/* -+** cvt_s64_f64_x_untied: -+** fcvtzs z0\.d, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cvt_s64_f64_x_untied, svint64_t, svfloat64_t, -+ z0 = svcvt_s64_f64_x (p0, z4), -+ z0 = svcvt_s64_x (p0, z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_u16.c -new file mode 100644 -index 000000000..4db0dffdd ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_u16.c -@@ -0,0 +1,72 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cvt_u16_f16_m_tied1: -+** fcvtzu z0\.h, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cvt_u16_f16_m_tied1, svuint16_t, svfloat16_t, -+ z0 = svcvt_u16_f16_m (z0, p0, z4), -+ z0 = svcvt_u16_m (z0, p0, z4)) -+ -+/* -+** cvt_u16_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** fcvtzu z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_u16_f16_m_tied2, svuint16_t, svfloat16_t, -+ z0_res = svcvt_u16_f16_m (z4, p0, z0), -+ z0_res = svcvt_u16_m (z4, p0, z0)) -+ -+/* -+** cvt_u16_f16_m_untied: -+** movprfx z0, z1 -+** fcvtzu z0\.h, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cvt_u16_f16_m_untied, svuint16_t, svfloat16_t, -+ z0 = svcvt_u16_f16_m (z1, p0, z4), -+ z0 = svcvt_u16_m (z1, p0, z4)) -+ -+/* -+** cvt_u16_f16_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.h, p0/z, \1\.h -+** fcvtzu z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_u16_f16_z_tied1, svuint16_t, svfloat16_t, -+ z0_res = svcvt_u16_f16_z (p0, z0), -+ z0_res = svcvt_u16_z (p0, z0)) -+ -+/* -+** cvt_u16_f16_z_untied: -+** movprfx z0\.h, p0/z, z4\.h -+** fcvtzu z0\.h, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cvt_u16_f16_z_untied, svuint16_t, svfloat16_t, -+ z0 = svcvt_u16_f16_z (p0, z4), -+ z0 = svcvt_u16_z (p0, z4)) -+ -+/* -+** cvt_u16_f16_x_tied1: -+** fcvtzu z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_u16_f16_x_tied1, svuint16_t, svfloat16_t, -+ z0_res = svcvt_u16_f16_x (p0, z0), -+ z0_res = svcvt_u16_x (p0, z0)) -+ -+/* -+** cvt_u16_f16_x_untied: -+** fcvtzu z0\.h, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cvt_u16_f16_x_untied, svuint16_t, svfloat16_t, -+ z0 = svcvt_u16_f16_x (p0, z4), -+ z0 = svcvt_u16_x (p0, z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_u32.c -new file mode 100644 -index 000000000..52ef49fcf ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_u32.c -@@ -0,0 +1,210 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cvt_u32_f16_m_tied1: -+** fcvtzu z0\.s, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cvt_u32_f16_m_tied1, svuint32_t, svfloat16_t, -+ z0 = svcvt_u32_f16_m (z0, p0, z4), -+ z0 = svcvt_u32_m (z0, p0, z4)) -+ -+/* -+** cvt_u32_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** fcvtzu z0\.s, p0/m, \1\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_u32_f16_m_tied2, svuint32_t, svfloat16_t, -+ z0_res = svcvt_u32_f16_m (z4, p0, z0), -+ z0_res = svcvt_u32_m (z4, p0, z0)) -+ -+/* -+** cvt_u32_f16_m_untied: -+** movprfx z0, z1 -+** fcvtzu z0\.s, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cvt_u32_f16_m_untied, svuint32_t, svfloat16_t, -+ z0 = svcvt_u32_f16_m (z1, p0, z4), -+ z0 = svcvt_u32_m (z1, p0, z4)) -+ -+/* -+** cvt_u32_f32_m_tied1: -+** fcvtzu z0\.s, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_u32_f32_m_tied1, svuint32_t, svfloat32_t, -+ z0 = svcvt_u32_f32_m (z0, p0, z4), -+ z0 = svcvt_u32_m (z0, p0, z4)) -+ -+/* -+** cvt_u32_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** fcvtzu z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_u32_f32_m_tied2, svuint32_t, svfloat32_t, -+ z0_res = svcvt_u32_f32_m (z4, p0, z0), -+ z0_res = svcvt_u32_m (z4, p0, z0)) -+ -+/* -+** cvt_u32_f32_m_untied: -+** movprfx z0, z1 -+** fcvtzu z0\.s, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_u32_f32_m_untied, svuint32_t, svfloat32_t, -+ z0 = svcvt_u32_f32_m (z1, p0, z4), -+ z0 = svcvt_u32_m (z1, p0, z4)) -+ -+/* -+** cvt_u32_f64_m_tied1: -+** fcvtzu z0\.s, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cvt_u32_f64_m_tied1, svuint32_t, svfloat64_t, -+ z0 = svcvt_u32_f64_m (z0, p0, z4), -+ z0 = svcvt_u32_m (z0, p0, z4)) -+ -+/* -+** cvt_u32_f64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z4 -+** fcvtzu z0\.s, p0/m, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_u32_f64_m_tied2, svuint32_t, svfloat64_t, -+ z0_res = svcvt_u32_f64_m (z4, p0, z0), -+ z0_res = svcvt_u32_m (z4, p0, z0)) -+ -+/* -+** cvt_u32_f64_m_untied: -+** movprfx z0, z1 -+** fcvtzu z0\.s, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cvt_u32_f64_m_untied, svuint32_t, svfloat64_t, -+ z0 = svcvt_u32_f64_m (z1, p0, z4), -+ z0 = svcvt_u32_m (z1, p0, z4)) -+ -+/* -+** cvt_u32_f16_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, \1\.s -+** fcvtzu z0\.s, p0/m, \1\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_u32_f16_z_tied1, svuint32_t, svfloat16_t, -+ z0_res = svcvt_u32_f16_z (p0, z0), -+ z0_res = svcvt_u32_z (p0, z0)) -+ -+/* -+** cvt_u32_f16_z_untied: -+** movprfx z0\.s, p0/z, z4\.s -+** fcvtzu z0\.s, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cvt_u32_f16_z_untied, svuint32_t, svfloat16_t, -+ z0 = svcvt_u32_f16_z (p0, z4), -+ z0 = svcvt_u32_z (p0, z4)) -+ -+/* -+** cvt_u32_f32_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, \1\.s -+** fcvtzu z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_u32_f32_z_tied1, svuint32_t, svfloat32_t, -+ z0_res = svcvt_u32_f32_z (p0, z0), -+ z0_res = svcvt_u32_z (p0, z0)) -+ -+/* -+** cvt_u32_f32_z_untied: -+** movprfx z0\.s, p0/z, z4\.s -+** fcvtzu z0\.s, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_u32_f32_z_untied, svuint32_t, svfloat32_t, -+ z0 = svcvt_u32_f32_z (p0, z4), -+ z0 = svcvt_u32_z (p0, z4)) -+ -+/* -+** cvt_u32_f64_z_tied1: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** fcvtzu z0\.s, p0/m, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_u32_f64_z_tied1, svuint32_t, svfloat64_t, -+ z0_res = svcvt_u32_f64_z (p0, z0), -+ z0_res = svcvt_u32_z (p0, z0)) -+ -+/* -+** cvt_u32_f64_z_untied: -+** movprfx z0\.d, p0/z, z4\.d -+** fcvtzu z0\.s, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cvt_u32_f64_z_untied, svuint32_t, svfloat64_t, -+ z0 = svcvt_u32_f64_z (p0, z4), -+ z0 = svcvt_u32_z (p0, z4)) -+ -+/* -+** cvt_u32_f16_x_tied1: -+** fcvtzu z0\.s, p0/m, z0\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_u32_f16_x_tied1, svuint32_t, svfloat16_t, -+ z0_res = svcvt_u32_f16_x (p0, z0), -+ z0_res = svcvt_u32_x (p0, z0)) -+ -+/* -+** cvt_u32_f16_x_untied: -+** fcvtzu z0\.s, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cvt_u32_f16_x_untied, svuint32_t, svfloat16_t, -+ z0 = svcvt_u32_f16_x (p0, z4), -+ z0 = svcvt_u32_x (p0, z4)) -+ -+/* -+** cvt_u32_f32_x_tied1: -+** fcvtzu z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_u32_f32_x_tied1, svuint32_t, svfloat32_t, -+ z0_res = svcvt_u32_f32_x (p0, z0), -+ z0_res = svcvt_u32_x (p0, z0)) -+ -+/* -+** cvt_u32_f32_x_untied: -+** fcvtzu z0\.s, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_u32_f32_x_untied, svuint32_t, svfloat32_t, -+ z0 = svcvt_u32_f32_x (p0, z4), -+ z0 = svcvt_u32_x (p0, z4)) -+ -+/* -+** cvt_u32_f64_x_tied1: -+** fcvtzu z0\.s, p0/m, z0\.d -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_u32_f64_x_tied1, svuint32_t, svfloat64_t, -+ z0_res = svcvt_u32_f64_x (p0, z0), -+ z0_res = svcvt_u32_x (p0, z0)) -+ -+/* -+** cvt_u32_f64_x_untied: -+** fcvtzu z0\.s, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cvt_u32_f64_x_untied, svuint32_t, svfloat64_t, -+ z0 = svcvt_u32_f64_x (p0, z4), -+ z0 = svcvt_u32_x (p0, z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_u64.c -new file mode 100644 -index 000000000..0c43758ae ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvt_u64.c -@@ -0,0 +1,210 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cvt_u64_f16_m_tied1: -+** fcvtzu z0\.d, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cvt_u64_f16_m_tied1, svuint64_t, svfloat16_t, -+ z0 = svcvt_u64_f16_m (z0, p0, z4), -+ z0 = svcvt_u64_m (z0, p0, z4)) -+ -+/* -+** cvt_u64_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** fcvtzu z0\.d, p0/m, \1\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_u64_f16_m_tied2, svuint64_t, svfloat16_t, -+ z0_res = svcvt_u64_f16_m (z4, p0, z0), -+ z0_res = svcvt_u64_m (z4, p0, z0)) -+ -+/* -+** cvt_u64_f16_m_untied: -+** movprfx z0, z1 -+** fcvtzu z0\.d, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cvt_u64_f16_m_untied, svuint64_t, svfloat16_t, -+ z0 = svcvt_u64_f16_m (z1, p0, z4), -+ z0 = svcvt_u64_m (z1, p0, z4)) -+ -+/* -+** cvt_u64_f32_m_tied1: -+** fcvtzu z0\.d, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_u64_f32_m_tied1, svuint64_t, svfloat32_t, -+ z0 = svcvt_u64_f32_m (z0, p0, z4), -+ z0 = svcvt_u64_m (z0, p0, z4)) -+ -+/* -+** cvt_u64_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** fcvtzu z0\.d, p0/m, \1\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_u64_f32_m_tied2, svuint64_t, svfloat32_t, -+ z0_res = svcvt_u64_f32_m (z4, p0, z0), -+ z0_res = svcvt_u64_m (z4, p0, z0)) -+ -+/* -+** cvt_u64_f32_m_untied: -+** movprfx z0, z1 -+** fcvtzu z0\.d, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_u64_f32_m_untied, svuint64_t, svfloat32_t, -+ z0 = svcvt_u64_f32_m (z1, p0, z4), -+ z0 = svcvt_u64_m (z1, p0, z4)) -+ -+/* -+** cvt_u64_f64_m_tied1: -+** fcvtzu z0\.d, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cvt_u64_f64_m_tied1, svuint64_t, svfloat64_t, -+ z0 = svcvt_u64_f64_m (z0, p0, z4), -+ z0 = svcvt_u64_m (z0, p0, z4)) -+ -+/* -+** cvt_u64_f64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z4 -+** fcvtzu z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_u64_f64_m_tied2, svuint64_t, svfloat64_t, -+ z0_res = svcvt_u64_f64_m (z4, p0, z0), -+ z0_res = svcvt_u64_m (z4, p0, z0)) -+ -+/* -+** cvt_u64_f64_m_untied: -+** movprfx z0, z1 -+** fcvtzu z0\.d, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cvt_u64_f64_m_untied, svuint64_t, svfloat64_t, -+ z0 = svcvt_u64_f64_m (z1, p0, z4), -+ z0 = svcvt_u64_m (z1, p0, z4)) -+ -+/* -+** cvt_u64_f16_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.d, p0/z, \1\.d -+** fcvtzu z0\.d, p0/m, \1\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_u64_f16_z_tied1, svuint64_t, svfloat16_t, -+ z0_res = svcvt_u64_f16_z (p0, z0), -+ z0_res = svcvt_u64_z (p0, z0)) -+ -+/* -+** cvt_u64_f16_z_untied: -+** movprfx z0\.d, p0/z, z4\.d -+** fcvtzu z0\.d, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cvt_u64_f16_z_untied, svuint64_t, svfloat16_t, -+ z0 = svcvt_u64_f16_z (p0, z4), -+ z0 = svcvt_u64_z (p0, z4)) -+ -+/* -+** cvt_u64_f32_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.d, p0/z, \1\.d -+** fcvtzu z0\.d, p0/m, \1\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_u64_f32_z_tied1, svuint64_t, svfloat32_t, -+ z0_res = svcvt_u64_f32_z (p0, z0), -+ z0_res = svcvt_u64_z (p0, z0)) -+ -+/* -+** cvt_u64_f32_z_untied: -+** movprfx z0\.d, p0/z, z4\.d -+** fcvtzu z0\.d, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_u64_f32_z_untied, svuint64_t, svfloat32_t, -+ z0 = svcvt_u64_f32_z (p0, z4), -+ z0 = svcvt_u64_z (p0, z4)) -+ -+/* -+** cvt_u64_f64_z_tied1: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** fcvtzu z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_u64_f64_z_tied1, svuint64_t, svfloat64_t, -+ z0_res = svcvt_u64_f64_z (p0, z0), -+ z0_res = svcvt_u64_z (p0, z0)) -+ -+/* -+** cvt_u64_f64_z_untied: -+** movprfx z0\.d, p0/z, z4\.d -+** fcvtzu z0\.d, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cvt_u64_f64_z_untied, svuint64_t, svfloat64_t, -+ z0 = svcvt_u64_f64_z (p0, z4), -+ z0 = svcvt_u64_z (p0, z4)) -+ -+/* -+** cvt_u64_f16_x_tied1: -+** fcvtzu z0\.d, p0/m, z0\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_u64_f16_x_tied1, svuint64_t, svfloat16_t, -+ z0_res = svcvt_u64_f16_x (p0, z0), -+ z0_res = svcvt_u64_x (p0, z0)) -+ -+/* -+** cvt_u64_f16_x_untied: -+** fcvtzu z0\.d, p0/m, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (cvt_u64_f16_x_untied, svuint64_t, svfloat16_t, -+ z0 = svcvt_u64_f16_x (p0, z4), -+ z0 = svcvt_u64_x (p0, z4)) -+ -+/* -+** cvt_u64_f32_x_tied1: -+** fcvtzu z0\.d, p0/m, z0\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_u64_f32_x_tied1, svuint64_t, svfloat32_t, -+ z0_res = svcvt_u64_f32_x (p0, z0), -+ z0_res = svcvt_u64_x (p0, z0)) -+ -+/* -+** cvt_u64_f32_x_untied: -+** fcvtzu z0\.d, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvt_u64_f32_x_untied, svuint64_t, svfloat32_t, -+ z0 = svcvt_u64_f32_x (p0, z4), -+ z0 = svcvt_u64_x (p0, z4)) -+ -+/* -+** cvt_u64_f64_x_tied1: -+** fcvtzu z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_DUAL_Z_REV (cvt_u64_f64_x_tied1, svuint64_t, svfloat64_t, -+ z0_res = svcvt_u64_f64_x (p0, z0), -+ z0_res = svcvt_u64_x (p0, z0)) -+ -+/* -+** cvt_u64_f64_x_untied: -+** fcvtzu z0\.d, p0/m, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (cvt_u64_f64_x_untied, svuint64_t, svfloat64_t, -+ z0 = svcvt_u64_f64_x (p0, z4), -+ z0 = svcvt_u64_x (p0, z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvtnt_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvtnt_bf16.c -new file mode 100644 -index 000000000..54614c95d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cvtnt_bf16.c -@@ -0,0 +1,90 @@ -+/* { dg-additional-options "-march=armv8.2-a+sve+bf16" } */ -+/* { dg-require-effective-target aarch64_asm_bf16_ok } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** cvtnt_bf16_f32_m_tied1: -+** bfcvtnt z0\.h, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvtnt_bf16_f32_m_tied1, svbfloat16_t, svfloat32_t, -+ z0 = svcvtnt_bf16_f32_m (z0, p0, z4), -+ z0 = svcvtnt_bf16_m (z0, p0, z4)) -+ -+/* Bad RA choice: no preferred output sequence. */ -+TEST_DUAL_Z_REV (cvtnt_bf16_f32_m_tied2, svbfloat16_t, svfloat32_t, -+ z0_res = svcvtnt_bf16_f32_m (z4, p0, z0), -+ z0_res = svcvtnt_bf16_m (z4, p0, z0)) -+ -+/* -+** cvtnt_bf16_f32_m_untied: -+** ( -+** mov z0\.d, z1\.d -+** bfcvtnt z0\.h, p0/m, z4\.s -+** | -+** bfcvtnt z1\.h, p0/m, z4\.s -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_DUAL_Z (cvtnt_bf16_f32_m_untied, svbfloat16_t, svfloat32_t, -+ z0 = svcvtnt_bf16_f32_m (z1, p0, z4), -+ z0 = svcvtnt_bf16_m (z1, p0, z4)) -+ -+/* -+** cvtnt_bf16_f32_x_tied1: -+** bfcvtnt z0\.h, p0/m, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (cvtnt_bf16_f32_x_tied1, svbfloat16_t, svfloat32_t, -+ z0 = svcvtnt_bf16_f32_x (z0, p0, z4), -+ z0 = svcvtnt_bf16_x (z0, p0, z4)) -+ -+/* Bad RA choice: no preferred output sequence. */ -+TEST_DUAL_Z_REV (cvtnt_bf16_f32_x_tied2, svbfloat16_t, svfloat32_t, -+ z0_res = svcvtnt_bf16_f32_x (z4, p0, z0), -+ z0_res = svcvtnt_bf16_x (z4, p0, z0)) -+ -+/* -+** cvtnt_bf16_f32_x_untied: -+** ( -+** mov z0\.d, z1\.d -+** bfcvtnt z0\.h, p0/m, z4\.s -+** | -+** bfcvtnt z1\.h, p0/m, z4\.s -+** mov z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_DUAL_Z (cvtnt_bf16_f32_x_untied, svbfloat16_t, svfloat32_t, -+ z0 = svcvtnt_bf16_f32_x (z1, p0, z4), -+ z0 = svcvtnt_bf16_x (z1, p0, z4)) -+ -+/* -+** ptrue_cvtnt_bf16_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z (ptrue_cvtnt_bf16_f32_x_tied1, svbfloat16_t, svfloat32_t, -+ z0 = svcvtnt_bf16_f32_x (z0, svptrue_b32 (), z4), -+ z0 = svcvtnt_bf16_x (z0, svptrue_b32 (), z4)) -+ -+/* Bad RA choice: no preferred output sequence. */ -+TEST_DUAL_Z_REV (ptrue_cvtnt_bf16_f32_x_tied2, svbfloat16_t, svfloat32_t, -+ z0_res = svcvtnt_bf16_f32_x (z4, svptrue_b32 (), z0), -+ z0_res = svcvtnt_bf16_x (z4, svptrue_b32 (), z0)) -+ -+/* -+** ptrue_cvtnt_bf16_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z (ptrue_cvtnt_bf16_f32_x_untied, svbfloat16_t, svfloat32_t, -+ z0 = svcvtnt_bf16_f32_x (z1, svptrue_b32 (), z4), -+ z0 = svcvtnt_bf16_x (z1, svptrue_b32 (), z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_f16.c -new file mode 100644 -index 000000000..35f5c1589 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_f16.c -@@ -0,0 +1,303 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** div_f16_m_tied1: -+** fdiv z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (div_f16_m_tied1, svfloat16_t, -+ z0 = svdiv_f16_m (p0, z0, z1), -+ z0 = svdiv_m (p0, z0, z1)) -+ -+/* -+** div_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fdiv z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (div_f16_m_tied2, svfloat16_t, -+ z0 = svdiv_f16_m (p0, z1, z0), -+ z0 = svdiv_m (p0, z1, z0)) -+ -+/* -+** div_f16_m_untied: -+** movprfx z0, z1 -+** fdiv z0\.h, p0/m, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (div_f16_m_untied, svfloat16_t, -+ z0 = svdiv_f16_m (p0, z1, z2), -+ z0 = svdiv_m (p0, z1, z2)) -+ -+/* -+** div_h4_f16_m_tied1: -+** mov (z[0-9]+\.h), h4 -+** fdiv z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (div_h4_f16_m_tied1, svfloat16_t, __fp16, -+ z0 = svdiv_n_f16_m (p0, z0, d4), -+ z0 = svdiv_m (p0, z0, d4)) -+ -+/* -+** div_h4_f16_m_untied: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0, z1 -+** fdiv z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (div_h4_f16_m_untied, svfloat16_t, __fp16, -+ z0 = svdiv_n_f16_m (p0, z1, d4), -+ z0 = svdiv_m (p0, z1, d4)) -+ -+/* -+** div_1_f16_m_tied1: -+** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? -+** fdiv z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (div_1_f16_m_tied1, svfloat16_t, -+ z0 = svdiv_n_f16_m (p0, z0, 1), -+ z0 = svdiv_m (p0, z0, 1)) -+ -+/* -+** div_1_f16_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? -+** movprfx z0, z1 -+** fdiv z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (div_1_f16_m_untied, svfloat16_t, -+ z0 = svdiv_n_f16_m (p0, z1, 1), -+ z0 = svdiv_m (p0, z1, 1)) -+ -+/* -+** div_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fdiv z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (div_f16_z_tied1, svfloat16_t, -+ z0 = svdiv_f16_z (p0, z0, z1), -+ z0 = svdiv_z (p0, z0, z1)) -+ -+/* -+** div_f16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** fdivr z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (div_f16_z_tied2, svfloat16_t, -+ z0 = svdiv_f16_z (p0, z1, z0), -+ z0 = svdiv_z (p0, z1, z0)) -+ -+/* -+** div_f16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fdiv z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** fdivr z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (div_f16_z_untied, svfloat16_t, -+ z0 = svdiv_f16_z (p0, z1, z2), -+ z0 = svdiv_z (p0, z1, z2)) -+ -+/* -+** div_h4_f16_z_tied1: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0\.h, p0/z, z0\.h -+** fdiv z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (div_h4_f16_z_tied1, svfloat16_t, __fp16, -+ z0 = svdiv_n_f16_z (p0, z0, d4), -+ z0 = svdiv_z (p0, z0, d4)) -+ -+/* -+** div_h4_f16_z_untied: -+** mov (z[0-9]+\.h), h4 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fdiv z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** fdivr z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (div_h4_f16_z_untied, svfloat16_t, __fp16, -+ z0 = svdiv_n_f16_z (p0, z1, d4), -+ z0 = svdiv_z (p0, z1, d4)) -+ -+/* -+** div_1_f16_z_tied1: -+** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? -+** movprfx z0\.h, p0/z, z0\.h -+** fdiv z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (div_1_f16_z_tied1, svfloat16_t, -+ z0 = svdiv_n_f16_z (p0, z0, 1), -+ z0 = svdiv_z (p0, z0, 1)) -+ -+/* -+** div_1_f16_z_untied: -+** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fdiv z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** fdivr z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (div_1_f16_z_untied, svfloat16_t, -+ z0 = svdiv_n_f16_z (p0, z1, 1), -+ z0 = svdiv_z (p0, z1, 1)) -+ -+/* -+** div_0p5_f16_z: -+** fmov (z[0-9]+\.h), #(?:0\.5|5\.0e-1) -+** movprfx z0\.h, p0/z, z0\.h -+** fdiv z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (div_0p5_f16_z, svfloat16_t, -+ z0 = svdiv_n_f16_z (p0, z0, 0.5), -+ z0 = svdiv_z (p0, z0, 0.5)) -+ -+/* -+** div_f16_x_tied1: -+** fdiv z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (div_f16_x_tied1, svfloat16_t, -+ z0 = svdiv_f16_x (p0, z0, z1), -+ z0 = svdiv_x (p0, z0, z1)) -+ -+/* -+** div_f16_x_tied2: -+** fdivr z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (div_f16_x_tied2, svfloat16_t, -+ z0 = svdiv_f16_x (p0, z1, z0), -+ z0 = svdiv_x (p0, z1, z0)) -+ -+/* -+** div_f16_x_untied: -+** ( -+** movprfx z0, z1 -+** fdiv z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0, z2 -+** fdivr z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (div_f16_x_untied, svfloat16_t, -+ z0 = svdiv_f16_x (p0, z1, z2), -+ z0 = svdiv_x (p0, z1, z2)) -+ -+/* -+** div_h4_f16_x_tied1: -+** mov (z[0-9]+\.h), h4 -+** fdiv z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (div_h4_f16_x_tied1, svfloat16_t, __fp16, -+ z0 = svdiv_n_f16_x (p0, z0, d4), -+ z0 = svdiv_x (p0, z0, d4)) -+ -+/* -+** div_h4_f16_x_untied: { xfail *-*-* } -+** mov z0\.h, h4 -+** fdivr z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZD (div_h4_f16_x_untied, svfloat16_t, __fp16, -+ z0 = svdiv_n_f16_x (p0, z1, d4), -+ z0 = svdiv_x (p0, z1, d4)) -+ -+/* -+** div_1_f16_x_tied1: -+** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? -+** fdiv z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (div_1_f16_x_tied1, svfloat16_t, -+ z0 = svdiv_n_f16_x (p0, z0, 1), -+ z0 = svdiv_x (p0, z0, 1)) -+ -+/* -+** div_1_f16_x_untied: -+** fmov z0\.h, #1\.0(?:e\+0)? -+** fdivr z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (div_1_f16_x_untied, svfloat16_t, -+ z0 = svdiv_n_f16_x (p0, z1, 1), -+ z0 = svdiv_x (p0, z1, 1)) -+ -+/* -+** ptrue_div_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_div_f16_x_tied1, svfloat16_t, -+ z0 = svdiv_f16_x (svptrue_b16 (), z0, z1), -+ z0 = svdiv_x (svptrue_b16 (), z0, z1)) -+ -+/* -+** ptrue_div_f16_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_div_f16_x_tied2, svfloat16_t, -+ z0 = svdiv_f16_x (svptrue_b16 (), z1, z0), -+ z0 = svdiv_x (svptrue_b16 (), z1, z0)) -+ -+/* -+** ptrue_div_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_div_f16_x_untied, svfloat16_t, -+ z0 = svdiv_f16_x (svptrue_b16 (), z1, z2), -+ z0 = svdiv_x (svptrue_b16 (), z1, z2)) -+ -+/* -+** ptrue_div_1_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_div_1_f16_x_tied1, svfloat16_t, -+ z0 = svdiv_n_f16_x (svptrue_b16 (), z0, 1), -+ z0 = svdiv_x (svptrue_b16 (), z0, 1)) -+ -+/* -+** ptrue_div_1_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_div_1_f16_x_untied, svfloat16_t, -+ z0 = svdiv_n_f16_x (svptrue_b16 (), z1, 1), -+ z0 = svdiv_x (svptrue_b16 (), z1, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_f32.c -new file mode 100644 -index 000000000..40cc203da ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_f32.c -@@ -0,0 +1,303 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** div_f32_m_tied1: -+** fdiv z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (div_f32_m_tied1, svfloat32_t, -+ z0 = svdiv_f32_m (p0, z0, z1), -+ z0 = svdiv_m (p0, z0, z1)) -+ -+/* -+** div_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fdiv z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (div_f32_m_tied2, svfloat32_t, -+ z0 = svdiv_f32_m (p0, z1, z0), -+ z0 = svdiv_m (p0, z1, z0)) -+ -+/* -+** div_f32_m_untied: -+** movprfx z0, z1 -+** fdiv z0\.s, p0/m, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (div_f32_m_untied, svfloat32_t, -+ z0 = svdiv_f32_m (p0, z1, z2), -+ z0 = svdiv_m (p0, z1, z2)) -+ -+/* -+** div_s4_f32_m_tied1: -+** mov (z[0-9]+\.s), s4 -+** fdiv z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (div_s4_f32_m_tied1, svfloat32_t, float, -+ z0 = svdiv_n_f32_m (p0, z0, d4), -+ z0 = svdiv_m (p0, z0, d4)) -+ -+/* -+** div_s4_f32_m_untied: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0, z1 -+** fdiv z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (div_s4_f32_m_untied, svfloat32_t, float, -+ z0 = svdiv_n_f32_m (p0, z1, d4), -+ z0 = svdiv_m (p0, z1, d4)) -+ -+/* -+** div_1_f32_m_tied1: -+** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? -+** fdiv z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (div_1_f32_m_tied1, svfloat32_t, -+ z0 = svdiv_n_f32_m (p0, z0, 1), -+ z0 = svdiv_m (p0, z0, 1)) -+ -+/* -+** div_1_f32_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? -+** movprfx z0, z1 -+** fdiv z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (div_1_f32_m_untied, svfloat32_t, -+ z0 = svdiv_n_f32_m (p0, z1, 1), -+ z0 = svdiv_m (p0, z1, 1)) -+ -+/* -+** div_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fdiv z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (div_f32_z_tied1, svfloat32_t, -+ z0 = svdiv_f32_z (p0, z0, z1), -+ z0 = svdiv_z (p0, z0, z1)) -+ -+/* -+** div_f32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** fdivr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (div_f32_z_tied2, svfloat32_t, -+ z0 = svdiv_f32_z (p0, z1, z0), -+ z0 = svdiv_z (p0, z1, z0)) -+ -+/* -+** div_f32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fdiv z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** fdivr z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (div_f32_z_untied, svfloat32_t, -+ z0 = svdiv_f32_z (p0, z1, z2), -+ z0 = svdiv_z (p0, z1, z2)) -+ -+/* -+** div_s4_f32_z_tied1: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0\.s, p0/z, z0\.s -+** fdiv z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (div_s4_f32_z_tied1, svfloat32_t, float, -+ z0 = svdiv_n_f32_z (p0, z0, d4), -+ z0 = svdiv_z (p0, z0, d4)) -+ -+/* -+** div_s4_f32_z_untied: -+** mov (z[0-9]+\.s), s4 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fdiv z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** fdivr z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (div_s4_f32_z_untied, svfloat32_t, float, -+ z0 = svdiv_n_f32_z (p0, z1, d4), -+ z0 = svdiv_z (p0, z1, d4)) -+ -+/* -+** div_1_f32_z_tied1: -+** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? -+** movprfx z0\.s, p0/z, z0\.s -+** fdiv z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (div_1_f32_z_tied1, svfloat32_t, -+ z0 = svdiv_n_f32_z (p0, z0, 1), -+ z0 = svdiv_z (p0, z0, 1)) -+ -+/* -+** div_1_f32_z_untied: -+** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fdiv z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** fdivr z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (div_1_f32_z_untied, svfloat32_t, -+ z0 = svdiv_n_f32_z (p0, z1, 1), -+ z0 = svdiv_z (p0, z1, 1)) -+ -+/* -+** div_0p5_f32_z: -+** fmov (z[0-9]+\.s), #(?:0\.5|5\.0e-1) -+** movprfx z0\.s, p0/z, z0\.s -+** fdiv z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (div_0p5_f32_z, svfloat32_t, -+ z0 = svdiv_n_f32_z (p0, z0, 0.5), -+ z0 = svdiv_z (p0, z0, 0.5)) -+ -+/* -+** div_f32_x_tied1: -+** fdiv z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (div_f32_x_tied1, svfloat32_t, -+ z0 = svdiv_f32_x (p0, z0, z1), -+ z0 = svdiv_x (p0, z0, z1)) -+ -+/* -+** div_f32_x_tied2: -+** fdivr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (div_f32_x_tied2, svfloat32_t, -+ z0 = svdiv_f32_x (p0, z1, z0), -+ z0 = svdiv_x (p0, z1, z0)) -+ -+/* -+** div_f32_x_untied: -+** ( -+** movprfx z0, z1 -+** fdiv z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0, z2 -+** fdivr z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (div_f32_x_untied, svfloat32_t, -+ z0 = svdiv_f32_x (p0, z1, z2), -+ z0 = svdiv_x (p0, z1, z2)) -+ -+/* -+** div_s4_f32_x_tied1: -+** mov (z[0-9]+\.s), s4 -+** fdiv z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (div_s4_f32_x_tied1, svfloat32_t, float, -+ z0 = svdiv_n_f32_x (p0, z0, d4), -+ z0 = svdiv_x (p0, z0, d4)) -+ -+/* -+** div_s4_f32_x_untied: { xfail *-*-* } -+** mov z0\.s, s4 -+** fdivr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZD (div_s4_f32_x_untied, svfloat32_t, float, -+ z0 = svdiv_n_f32_x (p0, z1, d4), -+ z0 = svdiv_x (p0, z1, d4)) -+ -+/* -+** div_1_f32_x_tied1: -+** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? -+** fdiv z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (div_1_f32_x_tied1, svfloat32_t, -+ z0 = svdiv_n_f32_x (p0, z0, 1), -+ z0 = svdiv_x (p0, z0, 1)) -+ -+/* -+** div_1_f32_x_untied: -+** fmov z0\.s, #1\.0(?:e\+0)? -+** fdivr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (div_1_f32_x_untied, svfloat32_t, -+ z0 = svdiv_n_f32_x (p0, z1, 1), -+ z0 = svdiv_x (p0, z1, 1)) -+ -+/* -+** ptrue_div_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_div_f32_x_tied1, svfloat32_t, -+ z0 = svdiv_f32_x (svptrue_b32 (), z0, z1), -+ z0 = svdiv_x (svptrue_b32 (), z0, z1)) -+ -+/* -+** ptrue_div_f32_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_div_f32_x_tied2, svfloat32_t, -+ z0 = svdiv_f32_x (svptrue_b32 (), z1, z0), -+ z0 = svdiv_x (svptrue_b32 (), z1, z0)) -+ -+/* -+** ptrue_div_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_div_f32_x_untied, svfloat32_t, -+ z0 = svdiv_f32_x (svptrue_b32 (), z1, z2), -+ z0 = svdiv_x (svptrue_b32 (), z1, z2)) -+ -+/* -+** ptrue_div_1_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_div_1_f32_x_tied1, svfloat32_t, -+ z0 = svdiv_n_f32_x (svptrue_b32 (), z0, 1), -+ z0 = svdiv_x (svptrue_b32 (), z0, 1)) -+ -+/* -+** ptrue_div_1_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_div_1_f32_x_untied, svfloat32_t, -+ z0 = svdiv_n_f32_x (svptrue_b32 (), z1, 1), -+ z0 = svdiv_x (svptrue_b32 (), z1, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_f64.c -new file mode 100644 -index 000000000..56acbbe95 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_f64.c -@@ -0,0 +1,303 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** div_f64_m_tied1: -+** fdiv z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (div_f64_m_tied1, svfloat64_t, -+ z0 = svdiv_f64_m (p0, z0, z1), -+ z0 = svdiv_m (p0, z0, z1)) -+ -+/* -+** div_f64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fdiv z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (div_f64_m_tied2, svfloat64_t, -+ z0 = svdiv_f64_m (p0, z1, z0), -+ z0 = svdiv_m (p0, z1, z0)) -+ -+/* -+** div_f64_m_untied: -+** movprfx z0, z1 -+** fdiv z0\.d, p0/m, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (div_f64_m_untied, svfloat64_t, -+ z0 = svdiv_f64_m (p0, z1, z2), -+ z0 = svdiv_m (p0, z1, z2)) -+ -+/* -+** div_d4_f64_m_tied1: -+** mov (z[0-9]+\.d), d4 -+** fdiv z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (div_d4_f64_m_tied1, svfloat64_t, double, -+ z0 = svdiv_n_f64_m (p0, z0, d4), -+ z0 = svdiv_m (p0, z0, d4)) -+ -+/* -+** div_d4_f64_m_untied: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0, z1 -+** fdiv z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (div_d4_f64_m_untied, svfloat64_t, double, -+ z0 = svdiv_n_f64_m (p0, z1, d4), -+ z0 = svdiv_m (p0, z1, d4)) -+ -+/* -+** div_1_f64_m_tied1: -+** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? -+** fdiv z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (div_1_f64_m_tied1, svfloat64_t, -+ z0 = svdiv_n_f64_m (p0, z0, 1), -+ z0 = svdiv_m (p0, z0, 1)) -+ -+/* -+** div_1_f64_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? -+** movprfx z0, z1 -+** fdiv z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (div_1_f64_m_untied, svfloat64_t, -+ z0 = svdiv_n_f64_m (p0, z1, 1), -+ z0 = svdiv_m (p0, z1, 1)) -+ -+/* -+** div_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fdiv z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (div_f64_z_tied1, svfloat64_t, -+ z0 = svdiv_f64_z (p0, z0, z1), -+ z0 = svdiv_z (p0, z0, z1)) -+ -+/* -+** div_f64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** fdivr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (div_f64_z_tied2, svfloat64_t, -+ z0 = svdiv_f64_z (p0, z1, z0), -+ z0 = svdiv_z (p0, z1, z0)) -+ -+/* -+** div_f64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fdiv z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** fdivr z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (div_f64_z_untied, svfloat64_t, -+ z0 = svdiv_f64_z (p0, z1, z2), -+ z0 = svdiv_z (p0, z1, z2)) -+ -+/* -+** div_d4_f64_z_tied1: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0\.d, p0/z, z0\.d -+** fdiv z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (div_d4_f64_z_tied1, svfloat64_t, double, -+ z0 = svdiv_n_f64_z (p0, z0, d4), -+ z0 = svdiv_z (p0, z0, d4)) -+ -+/* -+** div_d4_f64_z_untied: -+** mov (z[0-9]+\.d), d4 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fdiv z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** fdivr z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (div_d4_f64_z_untied, svfloat64_t, double, -+ z0 = svdiv_n_f64_z (p0, z1, d4), -+ z0 = svdiv_z (p0, z1, d4)) -+ -+/* -+** div_1_f64_z_tied1: -+** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? -+** movprfx z0\.d, p0/z, z0\.d -+** fdiv z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (div_1_f64_z_tied1, svfloat64_t, -+ z0 = svdiv_n_f64_z (p0, z0, 1), -+ z0 = svdiv_z (p0, z0, 1)) -+ -+/* -+** div_1_f64_z_untied: -+** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fdiv z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** fdivr z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (div_1_f64_z_untied, svfloat64_t, -+ z0 = svdiv_n_f64_z (p0, z1, 1), -+ z0 = svdiv_z (p0, z1, 1)) -+ -+/* -+** div_0p5_f64_z: -+** fmov (z[0-9]+\.d), #(?:0\.5|5\.0e-1) -+** movprfx z0\.d, p0/z, z0\.d -+** fdiv z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (div_0p5_f64_z, svfloat64_t, -+ z0 = svdiv_n_f64_z (p0, z0, 0.5), -+ z0 = svdiv_z (p0, z0, 0.5)) -+ -+/* -+** div_f64_x_tied1: -+** fdiv z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (div_f64_x_tied1, svfloat64_t, -+ z0 = svdiv_f64_x (p0, z0, z1), -+ z0 = svdiv_x (p0, z0, z1)) -+ -+/* -+** div_f64_x_tied2: -+** fdivr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (div_f64_x_tied2, svfloat64_t, -+ z0 = svdiv_f64_x (p0, z1, z0), -+ z0 = svdiv_x (p0, z1, z0)) -+ -+/* -+** div_f64_x_untied: -+** ( -+** movprfx z0, z1 -+** fdiv z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0, z2 -+** fdivr z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (div_f64_x_untied, svfloat64_t, -+ z0 = svdiv_f64_x (p0, z1, z2), -+ z0 = svdiv_x (p0, z1, z2)) -+ -+/* -+** div_d4_f64_x_tied1: -+** mov (z[0-9]+\.d), d4 -+** fdiv z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (div_d4_f64_x_tied1, svfloat64_t, double, -+ z0 = svdiv_n_f64_x (p0, z0, d4), -+ z0 = svdiv_x (p0, z0, d4)) -+ -+/* -+** div_d4_f64_x_untied: { xfail *-*-* } -+** mov z0\.d, d4 -+** fdivr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZD (div_d4_f64_x_untied, svfloat64_t, double, -+ z0 = svdiv_n_f64_x (p0, z1, d4), -+ z0 = svdiv_x (p0, z1, d4)) -+ -+/* -+** div_1_f64_x_tied1: -+** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? -+** fdiv z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (div_1_f64_x_tied1, svfloat64_t, -+ z0 = svdiv_n_f64_x (p0, z0, 1), -+ z0 = svdiv_x (p0, z0, 1)) -+ -+/* -+** div_1_f64_x_untied: -+** fmov z0\.d, #1\.0(?:e\+0)? -+** fdivr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (div_1_f64_x_untied, svfloat64_t, -+ z0 = svdiv_n_f64_x (p0, z1, 1), -+ z0 = svdiv_x (p0, z1, 1)) -+ -+/* -+** ptrue_div_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_div_f64_x_tied1, svfloat64_t, -+ z0 = svdiv_f64_x (svptrue_b64 (), z0, z1), -+ z0 = svdiv_x (svptrue_b64 (), z0, z1)) -+ -+/* -+** ptrue_div_f64_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_div_f64_x_tied2, svfloat64_t, -+ z0 = svdiv_f64_x (svptrue_b64 (), z1, z0), -+ z0 = svdiv_x (svptrue_b64 (), z1, z0)) -+ -+/* -+** ptrue_div_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_div_f64_x_untied, svfloat64_t, -+ z0 = svdiv_f64_x (svptrue_b64 (), z1, z2), -+ z0 = svdiv_x (svptrue_b64 (), z1, z2)) -+ -+/* -+** ptrue_div_1_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_div_1_f64_x_tied1, svfloat64_t, -+ z0 = svdiv_n_f64_x (svptrue_b64 (), z0, 1), -+ z0 = svdiv_x (svptrue_b64 (), z0, 1)) -+ -+/* -+** ptrue_div_1_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_div_1_f64_x_untied, svfloat64_t, -+ z0 = svdiv_n_f64_x (svptrue_b64 (), z1, 1), -+ z0 = svdiv_x (svptrue_b64 (), z1, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s32.c -new file mode 100644 -index 000000000..8e70ae797 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s32.c -@@ -0,0 +1,237 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** div_s32_m_tied1: -+** sdiv z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (div_s32_m_tied1, svint32_t, -+ z0 = svdiv_s32_m (p0, z0, z1), -+ z0 = svdiv_m (p0, z0, z1)) -+ -+/* -+** div_s32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** sdiv z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (div_s32_m_tied2, svint32_t, -+ z0 = svdiv_s32_m (p0, z1, z0), -+ z0 = svdiv_m (p0, z1, z0)) -+ -+/* -+** div_s32_m_untied: -+** movprfx z0, z1 -+** sdiv z0\.s, p0/m, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (div_s32_m_untied, svint32_t, -+ z0 = svdiv_s32_m (p0, z1, z2), -+ z0 = svdiv_m (p0, z1, z2)) -+ -+/* -+** div_w0_s32_m_tied1: -+** mov (z[0-9]+\.s), w0 -+** sdiv z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (div_w0_s32_m_tied1, svint32_t, int32_t, -+ z0 = svdiv_n_s32_m (p0, z0, x0), -+ z0 = svdiv_m (p0, z0, x0)) -+ -+/* -+** div_w0_s32_m_untied: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0, z1 -+** sdiv z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (div_w0_s32_m_untied, svint32_t, int32_t, -+ z0 = svdiv_n_s32_m (p0, z1, x0), -+ z0 = svdiv_m (p0, z1, x0)) -+ -+/* -+** div_2_s32_m_tied1: -+** mov (z[0-9]+\.s), #2 -+** sdiv z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (div_2_s32_m_tied1, svint32_t, -+ z0 = svdiv_n_s32_m (p0, z0, 2), -+ z0 = svdiv_m (p0, z0, 2)) -+ -+/* -+** div_2_s32_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.s), #2 -+** movprfx z0, z1 -+** sdiv z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (div_2_s32_m_untied, svint32_t, -+ z0 = svdiv_n_s32_m (p0, z1, 2), -+ z0 = svdiv_m (p0, z1, 2)) -+ -+/* -+** div_s32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** sdiv z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (div_s32_z_tied1, svint32_t, -+ z0 = svdiv_s32_z (p0, z0, z1), -+ z0 = svdiv_z (p0, z0, z1)) -+ -+/* -+** div_s32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** sdivr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (div_s32_z_tied2, svint32_t, -+ z0 = svdiv_s32_z (p0, z1, z0), -+ z0 = svdiv_z (p0, z1, z0)) -+ -+/* -+** div_s32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** sdiv z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** sdivr z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (div_s32_z_untied, svint32_t, -+ z0 = svdiv_s32_z (p0, z1, z2), -+ z0 = svdiv_z (p0, z1, z2)) -+ -+/* -+** div_w0_s32_z_tied1: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** sdiv z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (div_w0_s32_z_tied1, svint32_t, int32_t, -+ z0 = svdiv_n_s32_z (p0, z0, x0), -+ z0 = svdiv_z (p0, z0, x0)) -+ -+/* -+** div_w0_s32_z_untied: -+** mov (z[0-9]+\.s), w0 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** sdiv z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** sdivr z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (div_w0_s32_z_untied, svint32_t, int32_t, -+ z0 = svdiv_n_s32_z (p0, z1, x0), -+ z0 = svdiv_z (p0, z1, x0)) -+ -+/* -+** div_2_s32_z_tied1: -+** mov (z[0-9]+\.s), #2 -+** movprfx z0\.s, p0/z, z0\.s -+** sdiv z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (div_2_s32_z_tied1, svint32_t, -+ z0 = svdiv_n_s32_z (p0, z0, 2), -+ z0 = svdiv_z (p0, z0, 2)) -+ -+/* -+** div_2_s32_z_untied: -+** mov (z[0-9]+\.s), #2 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** sdiv z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** sdivr z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (div_2_s32_z_untied, svint32_t, -+ z0 = svdiv_n_s32_z (p0, z1, 2), -+ z0 = svdiv_z (p0, z1, 2)) -+ -+/* -+** div_s32_x_tied1: -+** sdiv z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (div_s32_x_tied1, svint32_t, -+ z0 = svdiv_s32_x (p0, z0, z1), -+ z0 = svdiv_x (p0, z0, z1)) -+ -+/* -+** div_s32_x_tied2: -+** sdivr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (div_s32_x_tied2, svint32_t, -+ z0 = svdiv_s32_x (p0, z1, z0), -+ z0 = svdiv_x (p0, z1, z0)) -+ -+/* -+** div_s32_x_untied: -+** ( -+** movprfx z0, z1 -+** sdiv z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0, z2 -+** sdivr z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (div_s32_x_untied, svint32_t, -+ z0 = svdiv_s32_x (p0, z1, z2), -+ z0 = svdiv_x (p0, z1, z2)) -+ -+/* -+** div_w0_s32_x_tied1: -+** mov (z[0-9]+\.s), w0 -+** sdiv z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (div_w0_s32_x_tied1, svint32_t, int32_t, -+ z0 = svdiv_n_s32_x (p0, z0, x0), -+ z0 = svdiv_x (p0, z0, x0)) -+ -+/* -+** div_w0_s32_x_untied: -+** mov z0\.s, w0 -+** sdivr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZX (div_w0_s32_x_untied, svint32_t, int32_t, -+ z0 = svdiv_n_s32_x (p0, z1, x0), -+ z0 = svdiv_x (p0, z1, x0)) -+ -+/* -+** div_2_s32_x_tied1: -+** mov (z[0-9]+\.s), #2 -+** sdiv z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (div_2_s32_x_tied1, svint32_t, -+ z0 = svdiv_n_s32_x (p0, z0, 2), -+ z0 = svdiv_x (p0, z0, 2)) -+ -+/* -+** div_2_s32_x_untied: -+** mov z0\.s, #2 -+** sdivr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (div_2_s32_x_untied, svint32_t, -+ z0 = svdiv_n_s32_x (p0, z1, 2), -+ z0 = svdiv_x (p0, z1, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s64.c -new file mode 100644 -index 000000000..439da1f57 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_s64.c -@@ -0,0 +1,237 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** div_s64_m_tied1: -+** sdiv z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (div_s64_m_tied1, svint64_t, -+ z0 = svdiv_s64_m (p0, z0, z1), -+ z0 = svdiv_m (p0, z0, z1)) -+ -+/* -+** div_s64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** sdiv z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (div_s64_m_tied2, svint64_t, -+ z0 = svdiv_s64_m (p0, z1, z0), -+ z0 = svdiv_m (p0, z1, z0)) -+ -+/* -+** div_s64_m_untied: -+** movprfx z0, z1 -+** sdiv z0\.d, p0/m, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (div_s64_m_untied, svint64_t, -+ z0 = svdiv_s64_m (p0, z1, z2), -+ z0 = svdiv_m (p0, z1, z2)) -+ -+/* -+** div_x0_s64_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** sdiv z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (div_x0_s64_m_tied1, svint64_t, int64_t, -+ z0 = svdiv_n_s64_m (p0, z0, x0), -+ z0 = svdiv_m (p0, z0, x0)) -+ -+/* -+** div_x0_s64_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** sdiv z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (div_x0_s64_m_untied, svint64_t, int64_t, -+ z0 = svdiv_n_s64_m (p0, z1, x0), -+ z0 = svdiv_m (p0, z1, x0)) -+ -+/* -+** div_2_s64_m_tied1: -+** mov (z[0-9]+\.d), #2 -+** sdiv z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (div_2_s64_m_tied1, svint64_t, -+ z0 = svdiv_n_s64_m (p0, z0, 2), -+ z0 = svdiv_m (p0, z0, 2)) -+ -+/* -+** div_2_s64_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #2 -+** movprfx z0, z1 -+** sdiv z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (div_2_s64_m_untied, svint64_t, -+ z0 = svdiv_n_s64_m (p0, z1, 2), -+ z0 = svdiv_m (p0, z1, 2)) -+ -+/* -+** div_s64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** sdiv z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (div_s64_z_tied1, svint64_t, -+ z0 = svdiv_s64_z (p0, z0, z1), -+ z0 = svdiv_z (p0, z0, z1)) -+ -+/* -+** div_s64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** sdivr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (div_s64_z_tied2, svint64_t, -+ z0 = svdiv_s64_z (p0, z1, z0), -+ z0 = svdiv_z (p0, z1, z0)) -+ -+/* -+** div_s64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** sdiv z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** sdivr z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (div_s64_z_untied, svint64_t, -+ z0 = svdiv_s64_z (p0, z1, z2), -+ z0 = svdiv_z (p0, z1, z2)) -+ -+/* -+** div_x0_s64_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** sdiv z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (div_x0_s64_z_tied1, svint64_t, int64_t, -+ z0 = svdiv_n_s64_z (p0, z0, x0), -+ z0 = svdiv_z (p0, z0, x0)) -+ -+/* -+** div_x0_s64_z_untied: -+** mov (z[0-9]+\.d), x0 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** sdiv z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** sdivr z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (div_x0_s64_z_untied, svint64_t, int64_t, -+ z0 = svdiv_n_s64_z (p0, z1, x0), -+ z0 = svdiv_z (p0, z1, x0)) -+ -+/* -+** div_2_s64_z_tied1: -+** mov (z[0-9]+\.d), #2 -+** movprfx z0\.d, p0/z, z0\.d -+** sdiv z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (div_2_s64_z_tied1, svint64_t, -+ z0 = svdiv_n_s64_z (p0, z0, 2), -+ z0 = svdiv_z (p0, z0, 2)) -+ -+/* -+** div_2_s64_z_untied: -+** mov (z[0-9]+\.d), #2 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** sdiv z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** sdivr z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (div_2_s64_z_untied, svint64_t, -+ z0 = svdiv_n_s64_z (p0, z1, 2), -+ z0 = svdiv_z (p0, z1, 2)) -+ -+/* -+** div_s64_x_tied1: -+** sdiv z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (div_s64_x_tied1, svint64_t, -+ z0 = svdiv_s64_x (p0, z0, z1), -+ z0 = svdiv_x (p0, z0, z1)) -+ -+/* -+** div_s64_x_tied2: -+** sdivr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (div_s64_x_tied2, svint64_t, -+ z0 = svdiv_s64_x (p0, z1, z0), -+ z0 = svdiv_x (p0, z1, z0)) -+ -+/* -+** div_s64_x_untied: -+** ( -+** movprfx z0, z1 -+** sdiv z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0, z2 -+** sdivr z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (div_s64_x_untied, svint64_t, -+ z0 = svdiv_s64_x (p0, z1, z2), -+ z0 = svdiv_x (p0, z1, z2)) -+ -+/* -+** div_x0_s64_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** sdiv z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (div_x0_s64_x_tied1, svint64_t, int64_t, -+ z0 = svdiv_n_s64_x (p0, z0, x0), -+ z0 = svdiv_x (p0, z0, x0)) -+ -+/* -+** div_x0_s64_x_untied: -+** mov z0\.d, x0 -+** sdivr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (div_x0_s64_x_untied, svint64_t, int64_t, -+ z0 = svdiv_n_s64_x (p0, z1, x0), -+ z0 = svdiv_x (p0, z1, x0)) -+ -+/* -+** div_2_s64_x_tied1: -+** mov (z[0-9]+\.d), #2 -+** sdiv z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (div_2_s64_x_tied1, svint64_t, -+ z0 = svdiv_n_s64_x (p0, z0, 2), -+ z0 = svdiv_x (p0, z0, 2)) -+ -+/* -+** div_2_s64_x_untied: -+** mov z0\.d, #2 -+** sdivr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (div_2_s64_x_untied, svint64_t, -+ z0 = svdiv_n_s64_x (p0, z1, 2), -+ z0 = svdiv_x (p0, z1, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_u32.c -new file mode 100644 -index 000000000..8e8e464b7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_u32.c -@@ -0,0 +1,237 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** div_u32_m_tied1: -+** udiv z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (div_u32_m_tied1, svuint32_t, -+ z0 = svdiv_u32_m (p0, z0, z1), -+ z0 = svdiv_m (p0, z0, z1)) -+ -+/* -+** div_u32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** udiv z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (div_u32_m_tied2, svuint32_t, -+ z0 = svdiv_u32_m (p0, z1, z0), -+ z0 = svdiv_m (p0, z1, z0)) -+ -+/* -+** div_u32_m_untied: -+** movprfx z0, z1 -+** udiv z0\.s, p0/m, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (div_u32_m_untied, svuint32_t, -+ z0 = svdiv_u32_m (p0, z1, z2), -+ z0 = svdiv_m (p0, z1, z2)) -+ -+/* -+** div_w0_u32_m_tied1: -+** mov (z[0-9]+\.s), w0 -+** udiv z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (div_w0_u32_m_tied1, svuint32_t, uint32_t, -+ z0 = svdiv_n_u32_m (p0, z0, x0), -+ z0 = svdiv_m (p0, z0, x0)) -+ -+/* -+** div_w0_u32_m_untied: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0, z1 -+** udiv z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (div_w0_u32_m_untied, svuint32_t, uint32_t, -+ z0 = svdiv_n_u32_m (p0, z1, x0), -+ z0 = svdiv_m (p0, z1, x0)) -+ -+/* -+** div_2_u32_m_tied1: -+** mov (z[0-9]+\.s), #2 -+** udiv z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (div_2_u32_m_tied1, svuint32_t, -+ z0 = svdiv_n_u32_m (p0, z0, 2), -+ z0 = svdiv_m (p0, z0, 2)) -+ -+/* -+** div_2_u32_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.s), #2 -+** movprfx z0, z1 -+** udiv z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (div_2_u32_m_untied, svuint32_t, -+ z0 = svdiv_n_u32_m (p0, z1, 2), -+ z0 = svdiv_m (p0, z1, 2)) -+ -+/* -+** div_u32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** udiv z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (div_u32_z_tied1, svuint32_t, -+ z0 = svdiv_u32_z (p0, z0, z1), -+ z0 = svdiv_z (p0, z0, z1)) -+ -+/* -+** div_u32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** udivr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (div_u32_z_tied2, svuint32_t, -+ z0 = svdiv_u32_z (p0, z1, z0), -+ z0 = svdiv_z (p0, z1, z0)) -+ -+/* -+** div_u32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** udiv z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** udivr z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (div_u32_z_untied, svuint32_t, -+ z0 = svdiv_u32_z (p0, z1, z2), -+ z0 = svdiv_z (p0, z1, z2)) -+ -+/* -+** div_w0_u32_z_tied1: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** udiv z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (div_w0_u32_z_tied1, svuint32_t, uint32_t, -+ z0 = svdiv_n_u32_z (p0, z0, x0), -+ z0 = svdiv_z (p0, z0, x0)) -+ -+/* -+** div_w0_u32_z_untied: -+** mov (z[0-9]+\.s), w0 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** udiv z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** udivr z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (div_w0_u32_z_untied, svuint32_t, uint32_t, -+ z0 = svdiv_n_u32_z (p0, z1, x0), -+ z0 = svdiv_z (p0, z1, x0)) -+ -+/* -+** div_2_u32_z_tied1: -+** mov (z[0-9]+\.s), #2 -+** movprfx z0\.s, p0/z, z0\.s -+** udiv z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (div_2_u32_z_tied1, svuint32_t, -+ z0 = svdiv_n_u32_z (p0, z0, 2), -+ z0 = svdiv_z (p0, z0, 2)) -+ -+/* -+** div_2_u32_z_untied: -+** mov (z[0-9]+\.s), #2 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** udiv z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** udivr z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (div_2_u32_z_untied, svuint32_t, -+ z0 = svdiv_n_u32_z (p0, z1, 2), -+ z0 = svdiv_z (p0, z1, 2)) -+ -+/* -+** div_u32_x_tied1: -+** udiv z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (div_u32_x_tied1, svuint32_t, -+ z0 = svdiv_u32_x (p0, z0, z1), -+ z0 = svdiv_x (p0, z0, z1)) -+ -+/* -+** div_u32_x_tied2: -+** udivr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (div_u32_x_tied2, svuint32_t, -+ z0 = svdiv_u32_x (p0, z1, z0), -+ z0 = svdiv_x (p0, z1, z0)) -+ -+/* -+** div_u32_x_untied: -+** ( -+** movprfx z0, z1 -+** udiv z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0, z2 -+** udivr z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (div_u32_x_untied, svuint32_t, -+ z0 = svdiv_u32_x (p0, z1, z2), -+ z0 = svdiv_x (p0, z1, z2)) -+ -+/* -+** div_w0_u32_x_tied1: -+** mov (z[0-9]+\.s), w0 -+** udiv z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (div_w0_u32_x_tied1, svuint32_t, uint32_t, -+ z0 = svdiv_n_u32_x (p0, z0, x0), -+ z0 = svdiv_x (p0, z0, x0)) -+ -+/* -+** div_w0_u32_x_untied: -+** mov z0\.s, w0 -+** udivr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZX (div_w0_u32_x_untied, svuint32_t, uint32_t, -+ z0 = svdiv_n_u32_x (p0, z1, x0), -+ z0 = svdiv_x (p0, z1, x0)) -+ -+/* -+** div_2_u32_x_tied1: -+** mov (z[0-9]+\.s), #2 -+** udiv z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (div_2_u32_x_tied1, svuint32_t, -+ z0 = svdiv_n_u32_x (p0, z0, 2), -+ z0 = svdiv_x (p0, z0, 2)) -+ -+/* -+** div_2_u32_x_untied: -+** mov z0\.s, #2 -+** udivr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (div_2_u32_x_untied, svuint32_t, -+ z0 = svdiv_n_u32_x (p0, z1, 2), -+ z0 = svdiv_x (p0, z1, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_u64.c -new file mode 100644 -index 000000000..fc152e8e5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/div_u64.c -@@ -0,0 +1,237 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** div_u64_m_tied1: -+** udiv z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (div_u64_m_tied1, svuint64_t, -+ z0 = svdiv_u64_m (p0, z0, z1), -+ z0 = svdiv_m (p0, z0, z1)) -+ -+/* -+** div_u64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** udiv z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (div_u64_m_tied2, svuint64_t, -+ z0 = svdiv_u64_m (p0, z1, z0), -+ z0 = svdiv_m (p0, z1, z0)) -+ -+/* -+** div_u64_m_untied: -+** movprfx z0, z1 -+** udiv z0\.d, p0/m, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (div_u64_m_untied, svuint64_t, -+ z0 = svdiv_u64_m (p0, z1, z2), -+ z0 = svdiv_m (p0, z1, z2)) -+ -+/* -+** div_x0_u64_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** udiv z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (div_x0_u64_m_tied1, svuint64_t, uint64_t, -+ z0 = svdiv_n_u64_m (p0, z0, x0), -+ z0 = svdiv_m (p0, z0, x0)) -+ -+/* -+** div_x0_u64_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** udiv z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (div_x0_u64_m_untied, svuint64_t, uint64_t, -+ z0 = svdiv_n_u64_m (p0, z1, x0), -+ z0 = svdiv_m (p0, z1, x0)) -+ -+/* -+** div_2_u64_m_tied1: -+** mov (z[0-9]+\.d), #2 -+** udiv z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (div_2_u64_m_tied1, svuint64_t, -+ z0 = svdiv_n_u64_m (p0, z0, 2), -+ z0 = svdiv_m (p0, z0, 2)) -+ -+/* -+** div_2_u64_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #2 -+** movprfx z0, z1 -+** udiv z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (div_2_u64_m_untied, svuint64_t, -+ z0 = svdiv_n_u64_m (p0, z1, 2), -+ z0 = svdiv_m (p0, z1, 2)) -+ -+/* -+** div_u64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** udiv z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (div_u64_z_tied1, svuint64_t, -+ z0 = svdiv_u64_z (p0, z0, z1), -+ z0 = svdiv_z (p0, z0, z1)) -+ -+/* -+** div_u64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** udivr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (div_u64_z_tied2, svuint64_t, -+ z0 = svdiv_u64_z (p0, z1, z0), -+ z0 = svdiv_z (p0, z1, z0)) -+ -+/* -+** div_u64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** udiv z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** udivr z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (div_u64_z_untied, svuint64_t, -+ z0 = svdiv_u64_z (p0, z1, z2), -+ z0 = svdiv_z (p0, z1, z2)) -+ -+/* -+** div_x0_u64_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** udiv z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (div_x0_u64_z_tied1, svuint64_t, uint64_t, -+ z0 = svdiv_n_u64_z (p0, z0, x0), -+ z0 = svdiv_z (p0, z0, x0)) -+ -+/* -+** div_x0_u64_z_untied: -+** mov (z[0-9]+\.d), x0 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** udiv z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** udivr z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (div_x0_u64_z_untied, svuint64_t, uint64_t, -+ z0 = svdiv_n_u64_z (p0, z1, x0), -+ z0 = svdiv_z (p0, z1, x0)) -+ -+/* -+** div_2_u64_z_tied1: -+** mov (z[0-9]+\.d), #2 -+** movprfx z0\.d, p0/z, z0\.d -+** udiv z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (div_2_u64_z_tied1, svuint64_t, -+ z0 = svdiv_n_u64_z (p0, z0, 2), -+ z0 = svdiv_z (p0, z0, 2)) -+ -+/* -+** div_2_u64_z_untied: -+** mov (z[0-9]+\.d), #2 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** udiv z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** udivr z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (div_2_u64_z_untied, svuint64_t, -+ z0 = svdiv_n_u64_z (p0, z1, 2), -+ z0 = svdiv_z (p0, z1, 2)) -+ -+/* -+** div_u64_x_tied1: -+** udiv z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (div_u64_x_tied1, svuint64_t, -+ z0 = svdiv_u64_x (p0, z0, z1), -+ z0 = svdiv_x (p0, z0, z1)) -+ -+/* -+** div_u64_x_tied2: -+** udivr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (div_u64_x_tied2, svuint64_t, -+ z0 = svdiv_u64_x (p0, z1, z0), -+ z0 = svdiv_x (p0, z1, z0)) -+ -+/* -+** div_u64_x_untied: -+** ( -+** movprfx z0, z1 -+** udiv z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0, z2 -+** udivr z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (div_u64_x_untied, svuint64_t, -+ z0 = svdiv_u64_x (p0, z1, z2), -+ z0 = svdiv_x (p0, z1, z2)) -+ -+/* -+** div_x0_u64_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** udiv z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (div_x0_u64_x_tied1, svuint64_t, uint64_t, -+ z0 = svdiv_n_u64_x (p0, z0, x0), -+ z0 = svdiv_x (p0, z0, x0)) -+ -+/* -+** div_x0_u64_x_untied: -+** mov z0\.d, x0 -+** udivr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (div_x0_u64_x_untied, svuint64_t, uint64_t, -+ z0 = svdiv_n_u64_x (p0, z1, x0), -+ z0 = svdiv_x (p0, z1, x0)) -+ -+/* -+** div_2_u64_x_tied1: -+** mov (z[0-9]+\.d), #2 -+** udiv z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (div_2_u64_x_tied1, svuint64_t, -+ z0 = svdiv_n_u64_x (p0, z0, 2), -+ z0 = svdiv_x (p0, z0, 2)) -+ -+/* -+** div_2_u64_x_untied: -+** mov z0\.d, #2 -+** udivr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (div_2_u64_x_untied, svuint64_t, -+ z0 = svdiv_n_u64_x (p0, z1, 2), -+ z0 = svdiv_x (p0, z1, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/divr_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/divr_f16.c -new file mode 100644 -index 000000000..03cc0343b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/divr_f16.c -@@ -0,0 +1,324 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** divr_f16_m_tied1: -+** fdivr z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (divr_f16_m_tied1, svfloat16_t, -+ z0 = svdivr_f16_m (p0, z0, z1), -+ z0 = svdivr_m (p0, z0, z1)) -+ -+/* -+** divr_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fdivr z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (divr_f16_m_tied2, svfloat16_t, -+ z0 = svdivr_f16_m (p0, z1, z0), -+ z0 = svdivr_m (p0, z1, z0)) -+ -+/* -+** divr_f16_m_untied: -+** movprfx z0, z1 -+** fdivr z0\.h, p0/m, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (divr_f16_m_untied, svfloat16_t, -+ z0 = svdivr_f16_m (p0, z1, z2), -+ z0 = svdivr_m (p0, z1, z2)) -+ -+/* -+** divr_h4_f16_m_tied1: -+** mov (z[0-9]+\.h), h4 -+** fdivr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (divr_h4_f16_m_tied1, svfloat16_t, __fp16, -+ z0 = svdivr_n_f16_m (p0, z0, d4), -+ z0 = svdivr_m (p0, z0, d4)) -+ -+/* -+** divr_h4_f16_m_untied: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0, z1 -+** fdivr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (divr_h4_f16_m_untied, svfloat16_t, __fp16, -+ z0 = svdivr_n_f16_m (p0, z1, d4), -+ z0 = svdivr_m (p0, z1, d4)) -+ -+/* -+** divr_1_f16_m_tied1: -+** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? -+** fdivr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (divr_1_f16_m_tied1, svfloat16_t, -+ z0 = svdivr_n_f16_m (p0, z0, 1), -+ z0 = svdivr_m (p0, z0, 1)) -+ -+/* -+** divr_1_f16_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? -+** movprfx z0, z1 -+** fdivr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (divr_1_f16_m_untied, svfloat16_t, -+ z0 = svdivr_n_f16_m (p0, z1, 1), -+ z0 = svdivr_m (p0, z1, 1)) -+ -+/* -+** divr_0p5_f16_m_tied1: -+** fmov (z[0-9]+\.h), #(?:0\.5|5\.0e-1) -+** fdivr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (divr_0p5_f16_m_tied1, svfloat16_t, -+ z0 = svdivr_n_f16_m (p0, z0, 0.5), -+ z0 = svdivr_m (p0, z0, 0.5)) -+ -+/* -+** divr_0p5_f16_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.h), #(?:0\.5|5\.0e-1) -+** movprfx z0, z1 -+** fdivr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (divr_0p5_f16_m_untied, svfloat16_t, -+ z0 = svdivr_n_f16_m (p0, z1, 0.5), -+ z0 = svdivr_m (p0, z1, 0.5)) -+ -+/* -+** divr_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fdivr z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (divr_f16_z_tied1, svfloat16_t, -+ z0 = svdivr_f16_z (p0, z0, z1), -+ z0 = svdivr_z (p0, z0, z1)) -+ -+/* -+** divr_f16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** fdiv z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (divr_f16_z_tied2, svfloat16_t, -+ z0 = svdivr_f16_z (p0, z1, z0), -+ z0 = svdivr_z (p0, z1, z0)) -+ -+/* -+** divr_f16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fdivr z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** fdiv z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (divr_f16_z_untied, svfloat16_t, -+ z0 = svdivr_f16_z (p0, z1, z2), -+ z0 = svdivr_z (p0, z1, z2)) -+ -+/* -+** divr_h4_f16_z_tied1: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0\.h, p0/z, z0\.h -+** fdivr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (divr_h4_f16_z_tied1, svfloat16_t, __fp16, -+ z0 = svdivr_n_f16_z (p0, z0, d4), -+ z0 = svdivr_z (p0, z0, d4)) -+ -+/* -+** divr_h4_f16_z_untied: -+** mov (z[0-9]+\.h), h4 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fdivr z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** fdiv z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (divr_h4_f16_z_untied, svfloat16_t, __fp16, -+ z0 = svdivr_n_f16_z (p0, z1, d4), -+ z0 = svdivr_z (p0, z1, d4)) -+ -+/* -+** divr_1_f16_z: -+** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? -+** movprfx z0\.h, p0/z, z0\.h -+** fdivr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (divr_1_f16_z, svfloat16_t, -+ z0 = svdivr_n_f16_z (p0, z0, 1), -+ z0 = svdivr_z (p0, z0, 1)) -+ -+/* -+** divr_0p5_f16_z_tied1: -+** fmov (z[0-9]+\.h), #(?:0\.5|5\.0e-1) -+** movprfx z0\.h, p0/z, z0\.h -+** fdivr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (divr_0p5_f16_z_tied1, svfloat16_t, -+ z0 = svdivr_n_f16_z (p0, z0, 0.5), -+ z0 = svdivr_z (p0, z0, 0.5)) -+ -+/* -+** divr_0p5_f16_z_untied: -+** fmov (z[0-9]+\.h), #(?:0\.5|5\.0e-1) -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fdivr z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** fdiv z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (divr_0p5_f16_z_untied, svfloat16_t, -+ z0 = svdivr_n_f16_z (p0, z1, 0.5), -+ z0 = svdivr_z (p0, z1, 0.5)) -+ -+/* -+** divr_f16_x_tied1: -+** fdivr z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (divr_f16_x_tied1, svfloat16_t, -+ z0 = svdivr_f16_x (p0, z0, z1), -+ z0 = svdivr_x (p0, z0, z1)) -+ -+/* -+** divr_f16_x_tied2: -+** fdiv z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (divr_f16_x_tied2, svfloat16_t, -+ z0 = svdivr_f16_x (p0, z1, z0), -+ z0 = svdivr_x (p0, z1, z0)) -+ -+/* -+** divr_f16_x_untied: -+** ( -+** movprfx z0, z1 -+** fdivr z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0, z2 -+** fdiv z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (divr_f16_x_untied, svfloat16_t, -+ z0 = svdivr_f16_x (p0, z1, z2), -+ z0 = svdivr_x (p0, z1, z2)) -+ -+/* -+** divr_h4_f16_x_tied1: -+** mov (z[0-9]+\.h), h4 -+** fdivr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (divr_h4_f16_x_tied1, svfloat16_t, __fp16, -+ z0 = svdivr_n_f16_x (p0, z0, d4), -+ z0 = svdivr_x (p0, z0, d4)) -+ -+/* -+** divr_h4_f16_x_untied: { xfail *-*-* } -+** mov z0\.h, h4 -+** fdiv z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZD (divr_h4_f16_x_untied, svfloat16_t, __fp16, -+ z0 = svdivr_n_f16_x (p0, z1, d4), -+ z0 = svdivr_x (p0, z1, d4)) -+ -+/* -+** divr_1_f16_x_tied1: -+** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? -+** fdivr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (divr_1_f16_x_tied1, svfloat16_t, -+ z0 = svdivr_n_f16_x (p0, z0, 1), -+ z0 = svdivr_x (p0, z0, 1)) -+ -+/* -+** divr_1_f16_x_untied: -+** fmov z0\.h, #1\.0(?:e\+0)? -+** fdiv z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (divr_1_f16_x_untied, svfloat16_t, -+ z0 = svdivr_n_f16_x (p0, z1, 1), -+ z0 = svdivr_x (p0, z1, 1)) -+ -+/* -+** ptrue_divr_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_divr_f16_x_tied1, svfloat16_t, -+ z0 = svdivr_f16_x (svptrue_b16 (), z0, z1), -+ z0 = svdivr_x (svptrue_b16 (), z0, z1)) -+ -+/* -+** ptrue_divr_f16_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_divr_f16_x_tied2, svfloat16_t, -+ z0 = svdivr_f16_x (svptrue_b16 (), z1, z0), -+ z0 = svdivr_x (svptrue_b16 (), z1, z0)) -+ -+/* -+** ptrue_divr_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_divr_f16_x_untied, svfloat16_t, -+ z0 = svdivr_f16_x (svptrue_b16 (), z1, z2), -+ z0 = svdivr_x (svptrue_b16 (), z1, z2)) -+ -+/* -+** ptrue_divr_1_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_divr_1_f16_x_tied1, svfloat16_t, -+ z0 = svdivr_n_f16_x (svptrue_b16 (), z0, 1), -+ z0 = svdivr_x (svptrue_b16 (), z0, 1)) -+ -+/* -+** ptrue_divr_1_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_divr_1_f16_x_untied, svfloat16_t, -+ z0 = svdivr_n_f16_x (svptrue_b16 (), z1, 1), -+ z0 = svdivr_x (svptrue_b16 (), z1, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/divr_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/divr_f32.c -new file mode 100644 -index 000000000..c2b65fc33 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/divr_f32.c -@@ -0,0 +1,324 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** divr_f32_m_tied1: -+** fdivr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (divr_f32_m_tied1, svfloat32_t, -+ z0 = svdivr_f32_m (p0, z0, z1), -+ z0 = svdivr_m (p0, z0, z1)) -+ -+/* -+** divr_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fdivr z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (divr_f32_m_tied2, svfloat32_t, -+ z0 = svdivr_f32_m (p0, z1, z0), -+ z0 = svdivr_m (p0, z1, z0)) -+ -+/* -+** divr_f32_m_untied: -+** movprfx z0, z1 -+** fdivr z0\.s, p0/m, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (divr_f32_m_untied, svfloat32_t, -+ z0 = svdivr_f32_m (p0, z1, z2), -+ z0 = svdivr_m (p0, z1, z2)) -+ -+/* -+** divr_s4_f32_m_tied1: -+** mov (z[0-9]+\.s), s4 -+** fdivr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (divr_s4_f32_m_tied1, svfloat32_t, float, -+ z0 = svdivr_n_f32_m (p0, z0, d4), -+ z0 = svdivr_m (p0, z0, d4)) -+ -+/* -+** divr_s4_f32_m_untied: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0, z1 -+** fdivr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (divr_s4_f32_m_untied, svfloat32_t, float, -+ z0 = svdivr_n_f32_m (p0, z1, d4), -+ z0 = svdivr_m (p0, z1, d4)) -+ -+/* -+** divr_1_f32_m_tied1: -+** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? -+** fdivr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (divr_1_f32_m_tied1, svfloat32_t, -+ z0 = svdivr_n_f32_m (p0, z0, 1), -+ z0 = svdivr_m (p0, z0, 1)) -+ -+/* -+** divr_1_f32_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? -+** movprfx z0, z1 -+** fdivr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (divr_1_f32_m_untied, svfloat32_t, -+ z0 = svdivr_n_f32_m (p0, z1, 1), -+ z0 = svdivr_m (p0, z1, 1)) -+ -+/* -+** divr_0p5_f32_m_tied1: -+** fmov (z[0-9]+\.s), #(?:0\.5|5\.0e-1) -+** fdivr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (divr_0p5_f32_m_tied1, svfloat32_t, -+ z0 = svdivr_n_f32_m (p0, z0, 0.5), -+ z0 = svdivr_m (p0, z0, 0.5)) -+ -+/* -+** divr_0p5_f32_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.s), #(?:0\.5|5\.0e-1) -+** movprfx z0, z1 -+** fdivr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (divr_0p5_f32_m_untied, svfloat32_t, -+ z0 = svdivr_n_f32_m (p0, z1, 0.5), -+ z0 = svdivr_m (p0, z1, 0.5)) -+ -+/* -+** divr_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fdivr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (divr_f32_z_tied1, svfloat32_t, -+ z0 = svdivr_f32_z (p0, z0, z1), -+ z0 = svdivr_z (p0, z0, z1)) -+ -+/* -+** divr_f32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** fdiv z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (divr_f32_z_tied2, svfloat32_t, -+ z0 = svdivr_f32_z (p0, z1, z0), -+ z0 = svdivr_z (p0, z1, z0)) -+ -+/* -+** divr_f32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fdivr z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** fdiv z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (divr_f32_z_untied, svfloat32_t, -+ z0 = svdivr_f32_z (p0, z1, z2), -+ z0 = svdivr_z (p0, z1, z2)) -+ -+/* -+** divr_s4_f32_z_tied1: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0\.s, p0/z, z0\.s -+** fdivr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (divr_s4_f32_z_tied1, svfloat32_t, float, -+ z0 = svdivr_n_f32_z (p0, z0, d4), -+ z0 = svdivr_z (p0, z0, d4)) -+ -+/* -+** divr_s4_f32_z_untied: -+** mov (z[0-9]+\.s), s4 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fdivr z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** fdiv z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (divr_s4_f32_z_untied, svfloat32_t, float, -+ z0 = svdivr_n_f32_z (p0, z1, d4), -+ z0 = svdivr_z (p0, z1, d4)) -+ -+/* -+** divr_1_f32_z: -+** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? -+** movprfx z0\.s, p0/z, z0\.s -+** fdivr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (divr_1_f32_z, svfloat32_t, -+ z0 = svdivr_n_f32_z (p0, z0, 1), -+ z0 = svdivr_z (p0, z0, 1)) -+ -+/* -+** divr_0p5_f32_z_tied1: -+** fmov (z[0-9]+\.s), #(?:0\.5|5\.0e-1) -+** movprfx z0\.s, p0/z, z0\.s -+** fdivr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (divr_0p5_f32_z_tied1, svfloat32_t, -+ z0 = svdivr_n_f32_z (p0, z0, 0.5), -+ z0 = svdivr_z (p0, z0, 0.5)) -+ -+/* -+** divr_0p5_f32_z_untied: -+** fmov (z[0-9]+\.s), #(?:0\.5|5\.0e-1) -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fdivr z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** fdiv z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (divr_0p5_f32_z_untied, svfloat32_t, -+ z0 = svdivr_n_f32_z (p0, z1, 0.5), -+ z0 = svdivr_z (p0, z1, 0.5)) -+ -+/* -+** divr_f32_x_tied1: -+** fdivr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (divr_f32_x_tied1, svfloat32_t, -+ z0 = svdivr_f32_x (p0, z0, z1), -+ z0 = svdivr_x (p0, z0, z1)) -+ -+/* -+** divr_f32_x_tied2: -+** fdiv z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (divr_f32_x_tied2, svfloat32_t, -+ z0 = svdivr_f32_x (p0, z1, z0), -+ z0 = svdivr_x (p0, z1, z0)) -+ -+/* -+** divr_f32_x_untied: -+** ( -+** movprfx z0, z1 -+** fdivr z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0, z2 -+** fdiv z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (divr_f32_x_untied, svfloat32_t, -+ z0 = svdivr_f32_x (p0, z1, z2), -+ z0 = svdivr_x (p0, z1, z2)) -+ -+/* -+** divr_s4_f32_x_tied1: -+** mov (z[0-9]+\.s), s4 -+** fdivr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (divr_s4_f32_x_tied1, svfloat32_t, float, -+ z0 = svdivr_n_f32_x (p0, z0, d4), -+ z0 = svdivr_x (p0, z0, d4)) -+ -+/* -+** divr_s4_f32_x_untied: { xfail *-*-* } -+** mov z0\.s, s4 -+** fdiv z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZD (divr_s4_f32_x_untied, svfloat32_t, float, -+ z0 = svdivr_n_f32_x (p0, z1, d4), -+ z0 = svdivr_x (p0, z1, d4)) -+ -+/* -+** divr_1_f32_x_tied1: -+** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? -+** fdivr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (divr_1_f32_x_tied1, svfloat32_t, -+ z0 = svdivr_n_f32_x (p0, z0, 1), -+ z0 = svdivr_x (p0, z0, 1)) -+ -+/* -+** divr_1_f32_x_untied: -+** fmov z0\.s, #1\.0(?:e\+0)? -+** fdiv z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (divr_1_f32_x_untied, svfloat32_t, -+ z0 = svdivr_n_f32_x (p0, z1, 1), -+ z0 = svdivr_x (p0, z1, 1)) -+ -+/* -+** ptrue_divr_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_divr_f32_x_tied1, svfloat32_t, -+ z0 = svdivr_f32_x (svptrue_b32 (), z0, z1), -+ z0 = svdivr_x (svptrue_b32 (), z0, z1)) -+ -+/* -+** ptrue_divr_f32_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_divr_f32_x_tied2, svfloat32_t, -+ z0 = svdivr_f32_x (svptrue_b32 (), z1, z0), -+ z0 = svdivr_x (svptrue_b32 (), z1, z0)) -+ -+/* -+** ptrue_divr_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_divr_f32_x_untied, svfloat32_t, -+ z0 = svdivr_f32_x (svptrue_b32 (), z1, z2), -+ z0 = svdivr_x (svptrue_b32 (), z1, z2)) -+ -+/* -+** ptrue_divr_1_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_divr_1_f32_x_tied1, svfloat32_t, -+ z0 = svdivr_n_f32_x (svptrue_b32 (), z0, 1), -+ z0 = svdivr_x (svptrue_b32 (), z0, 1)) -+ -+/* -+** ptrue_divr_1_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_divr_1_f32_x_untied, svfloat32_t, -+ z0 = svdivr_n_f32_x (svptrue_b32 (), z1, 1), -+ z0 = svdivr_x (svptrue_b32 (), z1, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/divr_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/divr_f64.c -new file mode 100644 -index 000000000..0a72a37b1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/divr_f64.c -@@ -0,0 +1,324 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** divr_f64_m_tied1: -+** fdivr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (divr_f64_m_tied1, svfloat64_t, -+ z0 = svdivr_f64_m (p0, z0, z1), -+ z0 = svdivr_m (p0, z0, z1)) -+ -+/* -+** divr_f64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fdivr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (divr_f64_m_tied2, svfloat64_t, -+ z0 = svdivr_f64_m (p0, z1, z0), -+ z0 = svdivr_m (p0, z1, z0)) -+ -+/* -+** divr_f64_m_untied: -+** movprfx z0, z1 -+** fdivr z0\.d, p0/m, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (divr_f64_m_untied, svfloat64_t, -+ z0 = svdivr_f64_m (p0, z1, z2), -+ z0 = svdivr_m (p0, z1, z2)) -+ -+/* -+** divr_d4_f64_m_tied1: -+** mov (z[0-9]+\.d), d4 -+** fdivr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (divr_d4_f64_m_tied1, svfloat64_t, double, -+ z0 = svdivr_n_f64_m (p0, z0, d4), -+ z0 = svdivr_m (p0, z0, d4)) -+ -+/* -+** divr_d4_f64_m_untied: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0, z1 -+** fdivr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (divr_d4_f64_m_untied, svfloat64_t, double, -+ z0 = svdivr_n_f64_m (p0, z1, d4), -+ z0 = svdivr_m (p0, z1, d4)) -+ -+/* -+** divr_1_f64_m_tied1: -+** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? -+** fdivr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (divr_1_f64_m_tied1, svfloat64_t, -+ z0 = svdivr_n_f64_m (p0, z0, 1), -+ z0 = svdivr_m (p0, z0, 1)) -+ -+/* -+** divr_1_f64_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? -+** movprfx z0, z1 -+** fdivr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (divr_1_f64_m_untied, svfloat64_t, -+ z0 = svdivr_n_f64_m (p0, z1, 1), -+ z0 = svdivr_m (p0, z1, 1)) -+ -+/* -+** divr_0p5_f64_m_tied1: -+** fmov (z[0-9]+\.d), #(?:0\.5|5\.0e-1) -+** fdivr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (divr_0p5_f64_m_tied1, svfloat64_t, -+ z0 = svdivr_n_f64_m (p0, z0, 0.5), -+ z0 = svdivr_m (p0, z0, 0.5)) -+ -+/* -+** divr_0p5_f64_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.d), #(?:0\.5|5\.0e-1) -+** movprfx z0, z1 -+** fdivr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (divr_0p5_f64_m_untied, svfloat64_t, -+ z0 = svdivr_n_f64_m (p0, z1, 0.5), -+ z0 = svdivr_m (p0, z1, 0.5)) -+ -+/* -+** divr_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fdivr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (divr_f64_z_tied1, svfloat64_t, -+ z0 = svdivr_f64_z (p0, z0, z1), -+ z0 = svdivr_z (p0, z0, z1)) -+ -+/* -+** divr_f64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** fdiv z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (divr_f64_z_tied2, svfloat64_t, -+ z0 = svdivr_f64_z (p0, z1, z0), -+ z0 = svdivr_z (p0, z1, z0)) -+ -+/* -+** divr_f64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fdivr z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** fdiv z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (divr_f64_z_untied, svfloat64_t, -+ z0 = svdivr_f64_z (p0, z1, z2), -+ z0 = svdivr_z (p0, z1, z2)) -+ -+/* -+** divr_d4_f64_z_tied1: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0\.d, p0/z, z0\.d -+** fdivr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (divr_d4_f64_z_tied1, svfloat64_t, double, -+ z0 = svdivr_n_f64_z (p0, z0, d4), -+ z0 = svdivr_z (p0, z0, d4)) -+ -+/* -+** divr_d4_f64_z_untied: -+** mov (z[0-9]+\.d), d4 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fdivr z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** fdiv z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (divr_d4_f64_z_untied, svfloat64_t, double, -+ z0 = svdivr_n_f64_z (p0, z1, d4), -+ z0 = svdivr_z (p0, z1, d4)) -+ -+/* -+** divr_1_f64_z: -+** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? -+** movprfx z0\.d, p0/z, z0\.d -+** fdivr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (divr_1_f64_z, svfloat64_t, -+ z0 = svdivr_n_f64_z (p0, z0, 1), -+ z0 = svdivr_z (p0, z0, 1)) -+ -+/* -+** divr_0p5_f64_z_tied1: -+** fmov (z[0-9]+\.d), #(?:0\.5|5\.0e-1) -+** movprfx z0\.d, p0/z, z0\.d -+** fdivr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (divr_0p5_f64_z_tied1, svfloat64_t, -+ z0 = svdivr_n_f64_z (p0, z0, 0.5), -+ z0 = svdivr_z (p0, z0, 0.5)) -+ -+/* -+** divr_0p5_f64_z_untied: -+** fmov (z[0-9]+\.d), #(?:0\.5|5\.0e-1) -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fdivr z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** fdiv z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (divr_0p5_f64_z_untied, svfloat64_t, -+ z0 = svdivr_n_f64_z (p0, z1, 0.5), -+ z0 = svdivr_z (p0, z1, 0.5)) -+ -+/* -+** divr_f64_x_tied1: -+** fdivr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (divr_f64_x_tied1, svfloat64_t, -+ z0 = svdivr_f64_x (p0, z0, z1), -+ z0 = svdivr_x (p0, z0, z1)) -+ -+/* -+** divr_f64_x_tied2: -+** fdiv z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (divr_f64_x_tied2, svfloat64_t, -+ z0 = svdivr_f64_x (p0, z1, z0), -+ z0 = svdivr_x (p0, z1, z0)) -+ -+/* -+** divr_f64_x_untied: -+** ( -+** movprfx z0, z1 -+** fdivr z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0, z2 -+** fdiv z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (divr_f64_x_untied, svfloat64_t, -+ z0 = svdivr_f64_x (p0, z1, z2), -+ z0 = svdivr_x (p0, z1, z2)) -+ -+/* -+** divr_d4_f64_x_tied1: -+** mov (z[0-9]+\.d), d4 -+** fdivr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (divr_d4_f64_x_tied1, svfloat64_t, double, -+ z0 = svdivr_n_f64_x (p0, z0, d4), -+ z0 = svdivr_x (p0, z0, d4)) -+ -+/* -+** divr_d4_f64_x_untied: { xfail *-*-* } -+** mov z0\.d, d4 -+** fdiv z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZD (divr_d4_f64_x_untied, svfloat64_t, double, -+ z0 = svdivr_n_f64_x (p0, z1, d4), -+ z0 = svdivr_x (p0, z1, d4)) -+ -+/* -+** divr_1_f64_x_tied1: -+** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? -+** fdivr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (divr_1_f64_x_tied1, svfloat64_t, -+ z0 = svdivr_n_f64_x (p0, z0, 1), -+ z0 = svdivr_x (p0, z0, 1)) -+ -+/* -+** divr_1_f64_x_untied: -+** fmov z0\.d, #1\.0(?:e\+0)? -+** fdiv z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (divr_1_f64_x_untied, svfloat64_t, -+ z0 = svdivr_n_f64_x (p0, z1, 1), -+ z0 = svdivr_x (p0, z1, 1)) -+ -+/* -+** ptrue_divr_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_divr_f64_x_tied1, svfloat64_t, -+ z0 = svdivr_f64_x (svptrue_b64 (), z0, z1), -+ z0 = svdivr_x (svptrue_b64 (), z0, z1)) -+ -+/* -+** ptrue_divr_f64_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_divr_f64_x_tied2, svfloat64_t, -+ z0 = svdivr_f64_x (svptrue_b64 (), z1, z0), -+ z0 = svdivr_x (svptrue_b64 (), z1, z0)) -+ -+/* -+** ptrue_divr_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_divr_f64_x_untied, svfloat64_t, -+ z0 = svdivr_f64_x (svptrue_b64 (), z1, z2), -+ z0 = svdivr_x (svptrue_b64 (), z1, z2)) -+ -+/* -+** ptrue_divr_1_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_divr_1_f64_x_tied1, svfloat64_t, -+ z0 = svdivr_n_f64_x (svptrue_b64 (), z0, 1), -+ z0 = svdivr_x (svptrue_b64 (), z0, 1)) -+ -+/* -+** ptrue_divr_1_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_divr_1_f64_x_untied, svfloat64_t, -+ z0 = svdivr_n_f64_x (svptrue_b64 (), z1, 1), -+ z0 = svdivr_x (svptrue_b64 (), z1, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/divr_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/divr_s32.c -new file mode 100644 -index 000000000..75a6c1d97 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/divr_s32.c -@@ -0,0 +1,247 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** divr_s32_m_tied1: -+** sdivr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (divr_s32_m_tied1, svint32_t, -+ z0 = svdivr_s32_m (p0, z0, z1), -+ z0 = svdivr_m (p0, z0, z1)) -+ -+/* -+** divr_s32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** sdivr z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (divr_s32_m_tied2, svint32_t, -+ z0 = svdivr_s32_m (p0, z1, z0), -+ z0 = svdivr_m (p0, z1, z0)) -+ -+/* -+** divr_s32_m_untied: -+** movprfx z0, z1 -+** sdivr z0\.s, p0/m, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (divr_s32_m_untied, svint32_t, -+ z0 = svdivr_s32_m (p0, z1, z2), -+ z0 = svdivr_m (p0, z1, z2)) -+ -+/* -+** divr_w0_s32_m_tied1: -+** mov (z[0-9]+\.s), w0 -+** sdivr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (divr_w0_s32_m_tied1, svint32_t, int32_t, -+ z0 = svdivr_n_s32_m (p0, z0, x0), -+ z0 = svdivr_m (p0, z0, x0)) -+ -+/* -+** divr_w0_s32_m_untied: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0, z1 -+** sdivr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (divr_w0_s32_m_untied, svint32_t, int32_t, -+ z0 = svdivr_n_s32_m (p0, z1, x0), -+ z0 = svdivr_m (p0, z1, x0)) -+ -+/* -+** divr_2_s32_m_tied1: -+** mov (z[0-9]+\.s), #2 -+** sdivr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (divr_2_s32_m_tied1, svint32_t, -+ z0 = svdivr_n_s32_m (p0, z0, 2), -+ z0 = svdivr_m (p0, z0, 2)) -+ -+/* -+** divr_2_s32_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.s), #2 -+** movprfx z0, z1 -+** sdivr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (divr_2_s32_m_untied, svint32_t, -+ z0 = svdivr_n_s32_m (p0, z1, 2), -+ z0 = svdivr_m (p0, z1, 2)) -+ -+/* -+** divr_m1_s32_m: -+** mov (z[0-9]+)\.b, #-1 -+** sdivr z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (divr_m1_s32_m, svint32_t, -+ z0 = svdivr_n_s32_m (p0, z0, -1), -+ z0 = svdivr_m (p0, z0, -1)) -+ -+/* -+** divr_s32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** sdivr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (divr_s32_z_tied1, svint32_t, -+ z0 = svdivr_s32_z (p0, z0, z1), -+ z0 = svdivr_z (p0, z0, z1)) -+ -+/* -+** divr_s32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** sdiv z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (divr_s32_z_tied2, svint32_t, -+ z0 = svdivr_s32_z (p0, z1, z0), -+ z0 = svdivr_z (p0, z1, z0)) -+ -+/* -+** divr_s32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** sdivr z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** sdiv z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (divr_s32_z_untied, svint32_t, -+ z0 = svdivr_s32_z (p0, z1, z2), -+ z0 = svdivr_z (p0, z1, z2)) -+ -+/* -+** divr_w0_s32_z_tied1: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** sdivr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (divr_w0_s32_z_tied1, svint32_t, int32_t, -+ z0 = svdivr_n_s32_z (p0, z0, x0), -+ z0 = svdivr_z (p0, z0, x0)) -+ -+/* -+** divr_w0_s32_z_untied: -+** mov (z[0-9]+\.s), w0 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** sdivr z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** sdiv z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (divr_w0_s32_z_untied, svint32_t, int32_t, -+ z0 = svdivr_n_s32_z (p0, z1, x0), -+ z0 = svdivr_z (p0, z1, x0)) -+ -+/* -+** divr_2_s32_z_tied1: -+** mov (z[0-9]+\.s), #2 -+** movprfx z0\.s, p0/z, z0\.s -+** sdivr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (divr_2_s32_z_tied1, svint32_t, -+ z0 = svdivr_n_s32_z (p0, z0, 2), -+ z0 = svdivr_z (p0, z0, 2)) -+ -+/* -+** divr_2_s32_z_untied: -+** mov (z[0-9]+\.s), #2 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** sdivr z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** sdiv z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (divr_2_s32_z_untied, svint32_t, -+ z0 = svdivr_n_s32_z (p0, z1, 2), -+ z0 = svdivr_z (p0, z1, 2)) -+ -+/* -+** divr_s32_x_tied1: -+** sdivr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (divr_s32_x_tied1, svint32_t, -+ z0 = svdivr_s32_x (p0, z0, z1), -+ z0 = svdivr_x (p0, z0, z1)) -+ -+/* -+** divr_s32_x_tied2: -+** sdiv z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (divr_s32_x_tied2, svint32_t, -+ z0 = svdivr_s32_x (p0, z1, z0), -+ z0 = svdivr_x (p0, z1, z0)) -+ -+/* -+** divr_s32_x_untied: -+** ( -+** movprfx z0, z1 -+** sdivr z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0, z2 -+** sdiv z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (divr_s32_x_untied, svint32_t, -+ z0 = svdivr_s32_x (p0, z1, z2), -+ z0 = svdivr_x (p0, z1, z2)) -+ -+/* -+** divr_w0_s32_x_tied1: -+** mov (z[0-9]+\.s), w0 -+** sdivr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (divr_w0_s32_x_tied1, svint32_t, int32_t, -+ z0 = svdivr_n_s32_x (p0, z0, x0), -+ z0 = svdivr_x (p0, z0, x0)) -+ -+/* -+** divr_w0_s32_x_untied: -+** mov z0\.s, w0 -+** sdiv z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZX (divr_w0_s32_x_untied, svint32_t, int32_t, -+ z0 = svdivr_n_s32_x (p0, z1, x0), -+ z0 = svdivr_x (p0, z1, x0)) -+ -+/* -+** divr_2_s32_x_tied1: -+** mov (z[0-9]+\.s), #2 -+** sdivr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (divr_2_s32_x_tied1, svint32_t, -+ z0 = svdivr_n_s32_x (p0, z0, 2), -+ z0 = svdivr_x (p0, z0, 2)) -+ -+/* -+** divr_2_s32_x_untied: -+** mov z0\.s, #2 -+** sdiv z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (divr_2_s32_x_untied, svint32_t, -+ z0 = svdivr_n_s32_x (p0, z1, 2), -+ z0 = svdivr_x (p0, z1, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/divr_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/divr_s64.c -new file mode 100644 -index 000000000..8f4939a91 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/divr_s64.c -@@ -0,0 +1,247 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** divr_s64_m_tied1: -+** sdivr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (divr_s64_m_tied1, svint64_t, -+ z0 = svdivr_s64_m (p0, z0, z1), -+ z0 = svdivr_m (p0, z0, z1)) -+ -+/* -+** divr_s64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** sdivr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (divr_s64_m_tied2, svint64_t, -+ z0 = svdivr_s64_m (p0, z1, z0), -+ z0 = svdivr_m (p0, z1, z0)) -+ -+/* -+** divr_s64_m_untied: -+** movprfx z0, z1 -+** sdivr z0\.d, p0/m, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (divr_s64_m_untied, svint64_t, -+ z0 = svdivr_s64_m (p0, z1, z2), -+ z0 = svdivr_m (p0, z1, z2)) -+ -+/* -+** divr_x0_s64_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** sdivr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (divr_x0_s64_m_tied1, svint64_t, int64_t, -+ z0 = svdivr_n_s64_m (p0, z0, x0), -+ z0 = svdivr_m (p0, z0, x0)) -+ -+/* -+** divr_x0_s64_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** sdivr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (divr_x0_s64_m_untied, svint64_t, int64_t, -+ z0 = svdivr_n_s64_m (p0, z1, x0), -+ z0 = svdivr_m (p0, z1, x0)) -+ -+/* -+** divr_2_s64_m_tied1: -+** mov (z[0-9]+\.d), #2 -+** sdivr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (divr_2_s64_m_tied1, svint64_t, -+ z0 = svdivr_n_s64_m (p0, z0, 2), -+ z0 = svdivr_m (p0, z0, 2)) -+ -+/* -+** divr_2_s64_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #2 -+** movprfx z0, z1 -+** sdivr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (divr_2_s64_m_untied, svint64_t, -+ z0 = svdivr_n_s64_m (p0, z1, 2), -+ z0 = svdivr_m (p0, z1, 2)) -+ -+/* -+** divr_m1_s64_m: -+** mov (z[0-9]+)\.b, #-1 -+** sdivr z0\.d, p0/m, z0\.d, \1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (divr_m1_s64_m, svint64_t, -+ z0 = svdivr_n_s64_m (p0, z0, -1), -+ z0 = svdivr_m (p0, z0, -1)) -+ -+/* -+** divr_s64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** sdivr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (divr_s64_z_tied1, svint64_t, -+ z0 = svdivr_s64_z (p0, z0, z1), -+ z0 = svdivr_z (p0, z0, z1)) -+ -+/* -+** divr_s64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** sdiv z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (divr_s64_z_tied2, svint64_t, -+ z0 = svdivr_s64_z (p0, z1, z0), -+ z0 = svdivr_z (p0, z1, z0)) -+ -+/* -+** divr_s64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** sdivr z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** sdiv z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (divr_s64_z_untied, svint64_t, -+ z0 = svdivr_s64_z (p0, z1, z2), -+ z0 = svdivr_z (p0, z1, z2)) -+ -+/* -+** divr_x0_s64_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** sdivr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (divr_x0_s64_z_tied1, svint64_t, int64_t, -+ z0 = svdivr_n_s64_z (p0, z0, x0), -+ z0 = svdivr_z (p0, z0, x0)) -+ -+/* -+** divr_x0_s64_z_untied: -+** mov (z[0-9]+\.d), x0 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** sdivr z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** sdiv z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (divr_x0_s64_z_untied, svint64_t, int64_t, -+ z0 = svdivr_n_s64_z (p0, z1, x0), -+ z0 = svdivr_z (p0, z1, x0)) -+ -+/* -+** divr_2_s64_z_tied1: -+** mov (z[0-9]+\.d), #2 -+** movprfx z0\.d, p0/z, z0\.d -+** sdivr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (divr_2_s64_z_tied1, svint64_t, -+ z0 = svdivr_n_s64_z (p0, z0, 2), -+ z0 = svdivr_z (p0, z0, 2)) -+ -+/* -+** divr_2_s64_z_untied: -+** mov (z[0-9]+\.d), #2 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** sdivr z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** sdiv z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (divr_2_s64_z_untied, svint64_t, -+ z0 = svdivr_n_s64_z (p0, z1, 2), -+ z0 = svdivr_z (p0, z1, 2)) -+ -+/* -+** divr_s64_x_tied1: -+** sdivr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (divr_s64_x_tied1, svint64_t, -+ z0 = svdivr_s64_x (p0, z0, z1), -+ z0 = svdivr_x (p0, z0, z1)) -+ -+/* -+** divr_s64_x_tied2: -+** sdiv z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (divr_s64_x_tied2, svint64_t, -+ z0 = svdivr_s64_x (p0, z1, z0), -+ z0 = svdivr_x (p0, z1, z0)) -+ -+/* -+** divr_s64_x_untied: -+** ( -+** movprfx z0, z1 -+** sdivr z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0, z2 -+** sdiv z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (divr_s64_x_untied, svint64_t, -+ z0 = svdivr_s64_x (p0, z1, z2), -+ z0 = svdivr_x (p0, z1, z2)) -+ -+/* -+** divr_x0_s64_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** sdivr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (divr_x0_s64_x_tied1, svint64_t, int64_t, -+ z0 = svdivr_n_s64_x (p0, z0, x0), -+ z0 = svdivr_x (p0, z0, x0)) -+ -+/* -+** divr_x0_s64_x_untied: -+** mov z0\.d, x0 -+** sdiv z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (divr_x0_s64_x_untied, svint64_t, int64_t, -+ z0 = svdivr_n_s64_x (p0, z1, x0), -+ z0 = svdivr_x (p0, z1, x0)) -+ -+/* -+** divr_2_s64_x_tied1: -+** mov (z[0-9]+\.d), #2 -+** sdivr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (divr_2_s64_x_tied1, svint64_t, -+ z0 = svdivr_n_s64_x (p0, z0, 2), -+ z0 = svdivr_x (p0, z0, 2)) -+ -+/* -+** divr_2_s64_x_untied: -+** mov z0\.d, #2 -+** sdiv z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (divr_2_s64_x_untied, svint64_t, -+ z0 = svdivr_n_s64_x (p0, z1, 2), -+ z0 = svdivr_x (p0, z1, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/divr_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/divr_u32.c -new file mode 100644 -index 000000000..84c243b44 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/divr_u32.c -@@ -0,0 +1,247 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** divr_u32_m_tied1: -+** udivr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (divr_u32_m_tied1, svuint32_t, -+ z0 = svdivr_u32_m (p0, z0, z1), -+ z0 = svdivr_m (p0, z0, z1)) -+ -+/* -+** divr_u32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** udivr z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (divr_u32_m_tied2, svuint32_t, -+ z0 = svdivr_u32_m (p0, z1, z0), -+ z0 = svdivr_m (p0, z1, z0)) -+ -+/* -+** divr_u32_m_untied: -+** movprfx z0, z1 -+** udivr z0\.s, p0/m, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (divr_u32_m_untied, svuint32_t, -+ z0 = svdivr_u32_m (p0, z1, z2), -+ z0 = svdivr_m (p0, z1, z2)) -+ -+/* -+** divr_w0_u32_m_tied1: -+** mov (z[0-9]+\.s), w0 -+** udivr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (divr_w0_u32_m_tied1, svuint32_t, uint32_t, -+ z0 = svdivr_n_u32_m (p0, z0, x0), -+ z0 = svdivr_m (p0, z0, x0)) -+ -+/* -+** divr_w0_u32_m_untied: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0, z1 -+** udivr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (divr_w0_u32_m_untied, svuint32_t, uint32_t, -+ z0 = svdivr_n_u32_m (p0, z1, x0), -+ z0 = svdivr_m (p0, z1, x0)) -+ -+/* -+** divr_2_u32_m_tied1: -+** mov (z[0-9]+\.s), #2 -+** udivr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (divr_2_u32_m_tied1, svuint32_t, -+ z0 = svdivr_n_u32_m (p0, z0, 2), -+ z0 = svdivr_m (p0, z0, 2)) -+ -+/* -+** divr_2_u32_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.s), #2 -+** movprfx z0, z1 -+** udivr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (divr_2_u32_m_untied, svuint32_t, -+ z0 = svdivr_n_u32_m (p0, z1, 2), -+ z0 = svdivr_m (p0, z1, 2)) -+ -+/* -+** divr_m1_u32_m: -+** mov (z[0-9]+)\.b, #-1 -+** udivr z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (divr_m1_u32_m, svuint32_t, -+ z0 = svdivr_n_u32_m (p0, z0, -1), -+ z0 = svdivr_m (p0, z0, -1)) -+ -+/* -+** divr_u32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** udivr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (divr_u32_z_tied1, svuint32_t, -+ z0 = svdivr_u32_z (p0, z0, z1), -+ z0 = svdivr_z (p0, z0, z1)) -+ -+/* -+** divr_u32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** udiv z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (divr_u32_z_tied2, svuint32_t, -+ z0 = svdivr_u32_z (p0, z1, z0), -+ z0 = svdivr_z (p0, z1, z0)) -+ -+/* -+** divr_u32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** udivr z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** udiv z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (divr_u32_z_untied, svuint32_t, -+ z0 = svdivr_u32_z (p0, z1, z2), -+ z0 = svdivr_z (p0, z1, z2)) -+ -+/* -+** divr_w0_u32_z_tied1: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** udivr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (divr_w0_u32_z_tied1, svuint32_t, uint32_t, -+ z0 = svdivr_n_u32_z (p0, z0, x0), -+ z0 = svdivr_z (p0, z0, x0)) -+ -+/* -+** divr_w0_u32_z_untied: -+** mov (z[0-9]+\.s), w0 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** udivr z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** udiv z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (divr_w0_u32_z_untied, svuint32_t, uint32_t, -+ z0 = svdivr_n_u32_z (p0, z1, x0), -+ z0 = svdivr_z (p0, z1, x0)) -+ -+/* -+** divr_2_u32_z_tied1: -+** mov (z[0-9]+\.s), #2 -+** movprfx z0\.s, p0/z, z0\.s -+** udivr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (divr_2_u32_z_tied1, svuint32_t, -+ z0 = svdivr_n_u32_z (p0, z0, 2), -+ z0 = svdivr_z (p0, z0, 2)) -+ -+/* -+** divr_2_u32_z_untied: -+** mov (z[0-9]+\.s), #2 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** udivr z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** udiv z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (divr_2_u32_z_untied, svuint32_t, -+ z0 = svdivr_n_u32_z (p0, z1, 2), -+ z0 = svdivr_z (p0, z1, 2)) -+ -+/* -+** divr_u32_x_tied1: -+** udivr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (divr_u32_x_tied1, svuint32_t, -+ z0 = svdivr_u32_x (p0, z0, z1), -+ z0 = svdivr_x (p0, z0, z1)) -+ -+/* -+** divr_u32_x_tied2: -+** udiv z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (divr_u32_x_tied2, svuint32_t, -+ z0 = svdivr_u32_x (p0, z1, z0), -+ z0 = svdivr_x (p0, z1, z0)) -+ -+/* -+** divr_u32_x_untied: -+** ( -+** movprfx z0, z1 -+** udivr z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0, z2 -+** udiv z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (divr_u32_x_untied, svuint32_t, -+ z0 = svdivr_u32_x (p0, z1, z2), -+ z0 = svdivr_x (p0, z1, z2)) -+ -+/* -+** divr_w0_u32_x_tied1: -+** mov (z[0-9]+\.s), w0 -+** udivr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (divr_w0_u32_x_tied1, svuint32_t, uint32_t, -+ z0 = svdivr_n_u32_x (p0, z0, x0), -+ z0 = svdivr_x (p0, z0, x0)) -+ -+/* -+** divr_w0_u32_x_untied: -+** mov z0\.s, w0 -+** udiv z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZX (divr_w0_u32_x_untied, svuint32_t, uint32_t, -+ z0 = svdivr_n_u32_x (p0, z1, x0), -+ z0 = svdivr_x (p0, z1, x0)) -+ -+/* -+** divr_2_u32_x_tied1: -+** mov (z[0-9]+\.s), #2 -+** udivr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (divr_2_u32_x_tied1, svuint32_t, -+ z0 = svdivr_n_u32_x (p0, z0, 2), -+ z0 = svdivr_x (p0, z0, 2)) -+ -+/* -+** divr_2_u32_x_untied: -+** mov z0\.s, #2 -+** udiv z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (divr_2_u32_x_untied, svuint32_t, -+ z0 = svdivr_n_u32_x (p0, z1, 2), -+ z0 = svdivr_x (p0, z1, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/divr_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/divr_u64.c -new file mode 100644 -index 000000000..03bb62472 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/divr_u64.c -@@ -0,0 +1,247 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** divr_u64_m_tied1: -+** udivr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (divr_u64_m_tied1, svuint64_t, -+ z0 = svdivr_u64_m (p0, z0, z1), -+ z0 = svdivr_m (p0, z0, z1)) -+ -+/* -+** divr_u64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** udivr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (divr_u64_m_tied2, svuint64_t, -+ z0 = svdivr_u64_m (p0, z1, z0), -+ z0 = svdivr_m (p0, z1, z0)) -+ -+/* -+** divr_u64_m_untied: -+** movprfx z0, z1 -+** udivr z0\.d, p0/m, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (divr_u64_m_untied, svuint64_t, -+ z0 = svdivr_u64_m (p0, z1, z2), -+ z0 = svdivr_m (p0, z1, z2)) -+ -+/* -+** divr_x0_u64_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** udivr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (divr_x0_u64_m_tied1, svuint64_t, uint64_t, -+ z0 = svdivr_n_u64_m (p0, z0, x0), -+ z0 = svdivr_m (p0, z0, x0)) -+ -+/* -+** divr_x0_u64_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** udivr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (divr_x0_u64_m_untied, svuint64_t, uint64_t, -+ z0 = svdivr_n_u64_m (p0, z1, x0), -+ z0 = svdivr_m (p0, z1, x0)) -+ -+/* -+** divr_2_u64_m_tied1: -+** mov (z[0-9]+\.d), #2 -+** udivr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (divr_2_u64_m_tied1, svuint64_t, -+ z0 = svdivr_n_u64_m (p0, z0, 2), -+ z0 = svdivr_m (p0, z0, 2)) -+ -+/* -+** divr_2_u64_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #2 -+** movprfx z0, z1 -+** udivr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (divr_2_u64_m_untied, svuint64_t, -+ z0 = svdivr_n_u64_m (p0, z1, 2), -+ z0 = svdivr_m (p0, z1, 2)) -+ -+/* -+** divr_m1_u64_m: -+** mov (z[0-9]+)\.b, #-1 -+** udivr z0\.d, p0/m, z0\.d, \1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (divr_m1_u64_m, svuint64_t, -+ z0 = svdivr_n_u64_m (p0, z0, -1), -+ z0 = svdivr_m (p0, z0, -1)) -+ -+/* -+** divr_u64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** udivr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (divr_u64_z_tied1, svuint64_t, -+ z0 = svdivr_u64_z (p0, z0, z1), -+ z0 = svdivr_z (p0, z0, z1)) -+ -+/* -+** divr_u64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** udiv z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (divr_u64_z_tied2, svuint64_t, -+ z0 = svdivr_u64_z (p0, z1, z0), -+ z0 = svdivr_z (p0, z1, z0)) -+ -+/* -+** divr_u64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** udivr z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** udiv z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (divr_u64_z_untied, svuint64_t, -+ z0 = svdivr_u64_z (p0, z1, z2), -+ z0 = svdivr_z (p0, z1, z2)) -+ -+/* -+** divr_x0_u64_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** udivr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (divr_x0_u64_z_tied1, svuint64_t, uint64_t, -+ z0 = svdivr_n_u64_z (p0, z0, x0), -+ z0 = svdivr_z (p0, z0, x0)) -+ -+/* -+** divr_x0_u64_z_untied: -+** mov (z[0-9]+\.d), x0 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** udivr z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** udiv z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (divr_x0_u64_z_untied, svuint64_t, uint64_t, -+ z0 = svdivr_n_u64_z (p0, z1, x0), -+ z0 = svdivr_z (p0, z1, x0)) -+ -+/* -+** divr_2_u64_z_tied1: -+** mov (z[0-9]+\.d), #2 -+** movprfx z0\.d, p0/z, z0\.d -+** udivr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (divr_2_u64_z_tied1, svuint64_t, -+ z0 = svdivr_n_u64_z (p0, z0, 2), -+ z0 = svdivr_z (p0, z0, 2)) -+ -+/* -+** divr_2_u64_z_untied: -+** mov (z[0-9]+\.d), #2 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** udivr z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** udiv z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (divr_2_u64_z_untied, svuint64_t, -+ z0 = svdivr_n_u64_z (p0, z1, 2), -+ z0 = svdivr_z (p0, z1, 2)) -+ -+/* -+** divr_u64_x_tied1: -+** udivr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (divr_u64_x_tied1, svuint64_t, -+ z0 = svdivr_u64_x (p0, z0, z1), -+ z0 = svdivr_x (p0, z0, z1)) -+ -+/* -+** divr_u64_x_tied2: -+** udiv z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (divr_u64_x_tied2, svuint64_t, -+ z0 = svdivr_u64_x (p0, z1, z0), -+ z0 = svdivr_x (p0, z1, z0)) -+ -+/* -+** divr_u64_x_untied: -+** ( -+** movprfx z0, z1 -+** udivr z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0, z2 -+** udiv z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (divr_u64_x_untied, svuint64_t, -+ z0 = svdivr_u64_x (p0, z1, z2), -+ z0 = svdivr_x (p0, z1, z2)) -+ -+/* -+** divr_x0_u64_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** udivr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (divr_x0_u64_x_tied1, svuint64_t, uint64_t, -+ z0 = svdivr_n_u64_x (p0, z0, x0), -+ z0 = svdivr_x (p0, z0, x0)) -+ -+/* -+** divr_x0_u64_x_untied: -+** mov z0\.d, x0 -+** udiv z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (divr_x0_u64_x_untied, svuint64_t, uint64_t, -+ z0 = svdivr_n_u64_x (p0, z1, x0), -+ z0 = svdivr_x (p0, z1, x0)) -+ -+/* -+** divr_2_u64_x_tied1: -+** mov (z[0-9]+\.d), #2 -+** udivr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (divr_2_u64_x_tied1, svuint64_t, -+ z0 = svdivr_n_u64_x (p0, z0, 2), -+ z0 = svdivr_x (p0, z0, 2)) -+ -+/* -+** divr_2_u64_x_untied: -+** mov z0\.d, #2 -+** udiv z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (divr_2_u64_x_untied, svuint64_t, -+ z0 = svdivr_n_u64_x (p0, z1, 2), -+ z0 = svdivr_x (p0, z1, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dot_lane_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dot_lane_s32.c -new file mode 100644 -index 000000000..a4d713e29 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dot_lane_s32.c -@@ -0,0 +1,93 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dot_lane_0_s32_tied1: -+** sdot z0\.s, z4\.b, z5\.b\[0\] -+** ret -+*/ -+TEST_DUAL_Z (dot_lane_0_s32_tied1, svint32_t, svint8_t, -+ z0 = svdot_lane_s32 (z0, z4, z5, 0), -+ z0 = svdot_lane (z0, z4, z5, 0)) -+ -+/* -+** dot_lane_0_s32_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** sdot z0\.s, \1\.b, z1\.b\[0\] -+** ret -+*/ -+TEST_DUAL_Z_REV (dot_lane_0_s32_tied2, svint32_t, svint8_t, -+ z0_res = svdot_lane_s32 (z4, z0, z1, 0), -+ z0_res = svdot_lane (z4, z0, z1, 0)) -+ -+/* -+** dot_lane_0_s32_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** sdot z0\.s, z1\.b, \1\.b\[0\] -+** ret -+*/ -+TEST_DUAL_Z_REV (dot_lane_0_s32_tied3, svint32_t, svint8_t, -+ z0_res = svdot_lane_s32 (z4, z1, z0, 0), -+ z0_res = svdot_lane (z4, z1, z0, 0)) -+ -+/* -+** dot_lane_0_s32_untied: -+** movprfx z0, z1 -+** sdot z0\.s, z4\.b, z5\.b\[0\] -+** ret -+*/ -+TEST_DUAL_Z (dot_lane_0_s32_untied, svint32_t, svint8_t, -+ z0 = svdot_lane_s32 (z1, z4, z5, 0), -+ z0 = svdot_lane (z1, z4, z5, 0)) -+ -+/* -+** dot_lane_1_s32: -+** sdot z0\.s, z4\.b, z5\.b\[1\] -+** ret -+*/ -+TEST_DUAL_Z (dot_lane_1_s32, svint32_t, svint8_t, -+ z0 = svdot_lane_s32 (z0, z4, z5, 1), -+ z0 = svdot_lane (z0, z4, z5, 1)) -+ -+/* -+** dot_lane_2_s32: -+** sdot z0\.s, z4\.b, z5\.b\[2\] -+** ret -+*/ -+TEST_DUAL_Z (dot_lane_2_s32, svint32_t, svint8_t, -+ z0 = svdot_lane_s32 (z0, z4, z5, 2), -+ z0 = svdot_lane (z0, z4, z5, 2)) -+ -+/* -+** dot_lane_3_s32: -+** sdot z0\.s, z4\.b, z5\.b\[3\] -+** ret -+*/ -+TEST_DUAL_Z (dot_lane_3_s32, svint32_t, svint8_t, -+ z0 = svdot_lane_s32 (z0, z4, z5, 3), -+ z0 = svdot_lane (z0, z4, z5, 3)) -+ -+/* -+** dot_lane_z8_s32: -+** str d8, \[sp, -16\]! -+** mov (z[0-7])\.d, z8\.d -+** sdot z0\.s, z1\.b, \1\.b\[1\] -+** ldr d8, \[sp\], 16 -+** ret -+*/ -+TEST_DUAL_LANE_REG (dot_lane_z8_s32, svint32_t, svint8_t, z8, -+ z0 = svdot_lane_s32 (z0, z1, z8, 1), -+ z0 = svdot_lane (z0, z1, z8, 1)) -+ -+/* -+** dot_lane_z16_s32: -+** mov (z[0-7])\.d, z16\.d -+** sdot z0\.s, z1\.b, \1\.b\[1\] -+** ret -+*/ -+TEST_DUAL_LANE_REG (dot_lane_z16_s32, svint32_t, svint8_t, z16, -+ z0 = svdot_lane_s32 (z0, z1, z16, 1), -+ z0 = svdot_lane (z0, z1, z16, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dot_lane_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dot_lane_s64.c -new file mode 100644 -index 000000000..daee74091 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dot_lane_s64.c -@@ -0,0 +1,74 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dot_lane_0_s64_tied1: -+** sdot z0\.d, z4\.h, z5\.h\[0\] -+** ret -+*/ -+TEST_DUAL_Z (dot_lane_0_s64_tied1, svint64_t, svint16_t, -+ z0 = svdot_lane_s64 (z0, z4, z5, 0), -+ z0 = svdot_lane (z0, z4, z5, 0)) -+ -+/* -+** dot_lane_0_s64_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** sdot z0\.d, \1\.h, z1\.h\[0\] -+** ret -+*/ -+TEST_DUAL_Z_REV (dot_lane_0_s64_tied2, svint64_t, svint16_t, -+ z0_res = svdot_lane_s64 (z4, z0, z1, 0), -+ z0_res = svdot_lane (z4, z0, z1, 0)) -+ -+/* -+** dot_lane_0_s64_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** sdot z0\.d, z1\.h, \1\.h\[0\] -+** ret -+*/ -+TEST_DUAL_Z_REV (dot_lane_0_s64_tied3, svint64_t, svint16_t, -+ z0_res = svdot_lane_s64 (z4, z1, z0, 0), -+ z0_res = svdot_lane (z4, z1, z0, 0)) -+ -+/* -+** dot_lane_0_s64_untied: -+** movprfx z0, z1 -+** sdot z0\.d, z4\.h, z5\.h\[0\] -+** ret -+*/ -+TEST_DUAL_Z (dot_lane_0_s64_untied, svint64_t, svint16_t, -+ z0 = svdot_lane_s64 (z1, z4, z5, 0), -+ z0 = svdot_lane (z1, z4, z5, 0)) -+ -+/* -+** dot_lane_1_s64: -+** sdot z0\.d, z4\.h, z5\.h\[1\] -+** ret -+*/ -+TEST_DUAL_Z (dot_lane_1_s64, svint64_t, svint16_t, -+ z0 = svdot_lane_s64 (z0, z4, z5, 1), -+ z0 = svdot_lane (z0, z4, z5, 1)) -+ -+/* -+** dot_lane_z15_s64: -+** str d15, \[sp, -16\]! -+** sdot z0\.d, z1\.h, z15\.h\[1\] -+** ldr d15, \[sp\], 16 -+** ret -+*/ -+TEST_DUAL_LANE_REG (dot_lane_z15_s64, svint64_t, svint16_t, z15, -+ z0 = svdot_lane_s64 (z0, z1, z15, 1), -+ z0 = svdot_lane (z0, z1, z15, 1)) -+ -+/* -+** dot_lane_z16_s64: -+** mov (z[0-9]|z1[0-5])\.d, z16\.d -+** sdot z0\.d, z1\.h, \1\.h\[1\] -+** ret -+*/ -+TEST_DUAL_LANE_REG (dot_lane_z16_s64, svint64_t, svint16_t, z16, -+ z0 = svdot_lane_s64 (z0, z1, z16, 1), -+ z0 = svdot_lane (z0, z1, z16, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dot_lane_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dot_lane_u32.c -new file mode 100644 -index 000000000..6d69df76d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dot_lane_u32.c -@@ -0,0 +1,93 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dot_lane_0_u32_tied1: -+** udot z0\.s, z4\.b, z5\.b\[0\] -+** ret -+*/ -+TEST_DUAL_Z (dot_lane_0_u32_tied1, svuint32_t, svuint8_t, -+ z0 = svdot_lane_u32 (z0, z4, z5, 0), -+ z0 = svdot_lane (z0, z4, z5, 0)) -+ -+/* -+** dot_lane_0_u32_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** udot z0\.s, \1\.b, z1\.b\[0\] -+** ret -+*/ -+TEST_DUAL_Z_REV (dot_lane_0_u32_tied2, svuint32_t, svuint8_t, -+ z0_res = svdot_lane_u32 (z4, z0, z1, 0), -+ z0_res = svdot_lane (z4, z0, z1, 0)) -+ -+/* -+** dot_lane_0_u32_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** udot z0\.s, z1\.b, \1\.b\[0\] -+** ret -+*/ -+TEST_DUAL_Z_REV (dot_lane_0_u32_tied3, svuint32_t, svuint8_t, -+ z0_res = svdot_lane_u32 (z4, z1, z0, 0), -+ z0_res = svdot_lane (z4, z1, z0, 0)) -+ -+/* -+** dot_lane_0_u32_untied: -+** movprfx z0, z1 -+** udot z0\.s, z4\.b, z5\.b\[0\] -+** ret -+*/ -+TEST_DUAL_Z (dot_lane_0_u32_untied, svuint32_t, svuint8_t, -+ z0 = svdot_lane_u32 (z1, z4, z5, 0), -+ z0 = svdot_lane (z1, z4, z5, 0)) -+ -+/* -+** dot_lane_1_u32: -+** udot z0\.s, z4\.b, z5\.b\[1\] -+** ret -+*/ -+TEST_DUAL_Z (dot_lane_1_u32, svuint32_t, svuint8_t, -+ z0 = svdot_lane_u32 (z0, z4, z5, 1), -+ z0 = svdot_lane (z0, z4, z5, 1)) -+ -+/* -+** dot_lane_2_u32: -+** udot z0\.s, z4\.b, z5\.b\[2\] -+** ret -+*/ -+TEST_DUAL_Z (dot_lane_2_u32, svuint32_t, svuint8_t, -+ z0 = svdot_lane_u32 (z0, z4, z5, 2), -+ z0 = svdot_lane (z0, z4, z5, 2)) -+ -+/* -+** dot_lane_3_u32: -+** udot z0\.s, z4\.b, z5\.b\[3\] -+** ret -+*/ -+TEST_DUAL_Z (dot_lane_3_u32, svuint32_t, svuint8_t, -+ z0 = svdot_lane_u32 (z0, z4, z5, 3), -+ z0 = svdot_lane (z0, z4, z5, 3)) -+ -+/* -+** dot_lane_z8_u32: -+** str d8, \[sp, -16\]! -+** mov (z[0-7])\.d, z8\.d -+** udot z0\.s, z1\.b, \1\.b\[1\] -+** ldr d8, \[sp\], 16 -+** ret -+*/ -+TEST_DUAL_LANE_REG (dot_lane_z8_u32, svuint32_t, svuint8_t, z8, -+ z0 = svdot_lane_u32 (z0, z1, z8, 1), -+ z0 = svdot_lane (z0, z1, z8, 1)) -+ -+/* -+** dot_lane_z16_u32: -+** mov (z[0-7])\.d, z16\.d -+** udot z0\.s, z1\.b, \1\.b\[1\] -+** ret -+*/ -+TEST_DUAL_LANE_REG (dot_lane_z16_u32, svuint32_t, svuint8_t, z16, -+ z0 = svdot_lane_u32 (z0, z1, z16, 1), -+ z0 = svdot_lane (z0, z1, z16, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dot_lane_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dot_lane_u64.c -new file mode 100644 -index 000000000..242e21c78 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dot_lane_u64.c -@@ -0,0 +1,74 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dot_lane_0_u64_tied1: -+** udot z0\.d, z4\.h, z5\.h\[0\] -+** ret -+*/ -+TEST_DUAL_Z (dot_lane_0_u64_tied1, svuint64_t, svuint16_t, -+ z0 = svdot_lane_u64 (z0, z4, z5, 0), -+ z0 = svdot_lane (z0, z4, z5, 0)) -+ -+/* -+** dot_lane_0_u64_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** udot z0\.d, \1\.h, z1\.h\[0\] -+** ret -+*/ -+TEST_DUAL_Z_REV (dot_lane_0_u64_tied2, svuint64_t, svuint16_t, -+ z0_res = svdot_lane_u64 (z4, z0, z1, 0), -+ z0_res = svdot_lane (z4, z0, z1, 0)) -+ -+/* -+** dot_lane_0_u64_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** udot z0\.d, z1\.h, \1\.h\[0\] -+** ret -+*/ -+TEST_DUAL_Z_REV (dot_lane_0_u64_tied3, svuint64_t, svuint16_t, -+ z0_res = svdot_lane_u64 (z4, z1, z0, 0), -+ z0_res = svdot_lane (z4, z1, z0, 0)) -+ -+/* -+** dot_lane_0_u64_untied: -+** movprfx z0, z1 -+** udot z0\.d, z4\.h, z5\.h\[0\] -+** ret -+*/ -+TEST_DUAL_Z (dot_lane_0_u64_untied, svuint64_t, svuint16_t, -+ z0 = svdot_lane_u64 (z1, z4, z5, 0), -+ z0 = svdot_lane (z1, z4, z5, 0)) -+ -+/* -+** dot_lane_1_u64: -+** udot z0\.d, z4\.h, z5\.h\[1\] -+** ret -+*/ -+TEST_DUAL_Z (dot_lane_1_u64, svuint64_t, svuint16_t, -+ z0 = svdot_lane_u64 (z0, z4, z5, 1), -+ z0 = svdot_lane (z0, z4, z5, 1)) -+ -+/* -+** dot_lane_z15_u64: -+** str d15, \[sp, -16\]! -+** udot z0\.d, z1\.h, z15\.h\[1\] -+** ldr d15, \[sp\], 16 -+** ret -+*/ -+TEST_DUAL_LANE_REG (dot_lane_z15_u64, svuint64_t, svuint16_t, z15, -+ z0 = svdot_lane_u64 (z0, z1, z15, 1), -+ z0 = svdot_lane (z0, z1, z15, 1)) -+ -+/* -+** dot_lane_z16_u64: -+** mov (z[0-9]|z1[0-5])\.d, z16\.d -+** udot z0\.d, z1\.h, \1\.h\[1\] -+** ret -+*/ -+TEST_DUAL_LANE_REG (dot_lane_z16_u64, svuint64_t, svuint16_t, z16, -+ z0 = svdot_lane_u64 (z0, z1, z16, 1), -+ z0 = svdot_lane (z0, z1, z16, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dot_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dot_s32.c -new file mode 100644 -index 000000000..605bd1b30 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dot_s32.c -@@ -0,0 +1,86 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dot_s32_tied1: -+** sdot z0\.s, z4\.b, z5\.b -+** ret -+*/ -+TEST_DUAL_Z (dot_s32_tied1, svint32_t, svint8_t, -+ z0 = svdot_s32 (z0, z4, z5), -+ z0 = svdot (z0, z4, z5)) -+ -+/* -+** dot_s32_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** sdot z0\.s, \1\.b, z1\.b -+** ret -+*/ -+TEST_DUAL_Z_REV (dot_s32_tied2, svint32_t, svint8_t, -+ z0_res = svdot_s32 (z4, z0, z1), -+ z0_res = svdot (z4, z0, z1)) -+ -+/* -+** dot_s32_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** sdot z0\.s, z1\.b, \1\.b -+** ret -+*/ -+TEST_DUAL_Z_REV (dot_s32_tied3, svint32_t, svint8_t, -+ z0_res = svdot_s32 (z4, z1, z0), -+ z0_res = svdot (z4, z1, z0)) -+ -+/* -+** dot_s32_untied: -+** movprfx z0, z1 -+** sdot z0\.s, z4\.b, z5\.b -+** ret -+*/ -+TEST_DUAL_Z (dot_s32_untied, svint32_t, svint8_t, -+ z0 = svdot_s32 (z1, z4, z5), -+ z0 = svdot (z1, z4, z5)) -+ -+/* -+** dot_w0_s32_tied1: -+** mov (z[0-9]+\.b), w0 -+** sdot z0\.s, z4\.b, \1 -+** ret -+*/ -+TEST_DUAL_ZX (dot_w0_s32_tied1, svint32_t, svint8_t, int8_t, -+ z0 = svdot_n_s32 (z0, z4, x0), -+ z0 = svdot (z0, z4, x0)) -+ -+/* -+** dot_w0_s32_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), w0 -+** movprfx z0, z1 -+** sdot z0\.s, z4\.b, \1 -+** ret -+*/ -+TEST_DUAL_ZX (dot_w0_s32_untied, svint32_t, svint8_t, int8_t, -+ z0 = svdot_n_s32 (z1, z4, x0), -+ z0 = svdot (z1, z4, x0)) -+ -+/* -+** dot_9_s32_tied1: -+** mov (z[0-9]+\.b), #9 -+** sdot z0\.s, z4\.b, \1 -+** ret -+*/ -+TEST_DUAL_Z (dot_9_s32_tied1, svint32_t, svint8_t, -+ z0 = svdot_n_s32 (z0, z4, 9), -+ z0 = svdot (z0, z4, 9)) -+ -+/* -+** dot_9_s32_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), #9 -+** movprfx z0, z1 -+** sdot z0\.s, z4\.b, \1 -+** ret -+*/ -+TEST_DUAL_Z (dot_9_s32_untied, svint32_t, svint8_t, -+ z0 = svdot_n_s32 (z1, z4, 9), -+ z0 = svdot (z1, z4, 9)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dot_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dot_s64.c -new file mode 100644 -index 000000000..b6574740b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dot_s64.c -@@ -0,0 +1,86 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dot_s64_tied1: -+** sdot z0\.d, z4\.h, z5\.h -+** ret -+*/ -+TEST_DUAL_Z (dot_s64_tied1, svint64_t, svint16_t, -+ z0 = svdot_s64 (z0, z4, z5), -+ z0 = svdot (z0, z4, z5)) -+ -+/* -+** dot_s64_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** sdot z0\.d, \1\.h, z1\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (dot_s64_tied2, svint64_t, svint16_t, -+ z0_res = svdot_s64 (z4, z0, z1), -+ z0_res = svdot (z4, z0, z1)) -+ -+/* -+** dot_s64_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** sdot z0\.d, z1\.h, \1\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (dot_s64_tied3, svint64_t, svint16_t, -+ z0_res = svdot_s64 (z4, z1, z0), -+ z0_res = svdot (z4, z1, z0)) -+ -+/* -+** dot_s64_untied: -+** movprfx z0, z1 -+** sdot z0\.d, z4\.h, z5\.h -+** ret -+*/ -+TEST_DUAL_Z (dot_s64_untied, svint64_t, svint16_t, -+ z0 = svdot_s64 (z1, z4, z5), -+ z0 = svdot (z1, z4, z5)) -+ -+/* -+** dot_w0_s64_tied1: -+** mov (z[0-9]+\.h), w0 -+** sdot z0\.d, z4\.h, \1 -+** ret -+*/ -+TEST_DUAL_ZX (dot_w0_s64_tied1, svint64_t, svint16_t, int16_t, -+ z0 = svdot_n_s64 (z0, z4, x0), -+ z0 = svdot (z0, z4, x0)) -+ -+/* -+** dot_w0_s64_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), w0 -+** movprfx z0, z1 -+** sdot z0\.d, z4\.h, \1 -+** ret -+*/ -+TEST_DUAL_ZX (dot_w0_s64_untied, svint64_t, svint16_t, int16_t, -+ z0 = svdot_n_s64 (z1, z4, x0), -+ z0 = svdot (z1, z4, x0)) -+ -+/* -+** dot_9_s64_tied1: -+** mov (z[0-9]+\.h), #9 -+** sdot z0\.d, z4\.h, \1 -+** ret -+*/ -+TEST_DUAL_Z (dot_9_s64_tied1, svint64_t, svint16_t, -+ z0 = svdot_n_s64 (z0, z4, 9), -+ z0 = svdot (z0, z4, 9)) -+ -+/* -+** dot_9_s64_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), #9 -+** movprfx z0, z1 -+** sdot z0\.d, z4\.h, \1 -+** ret -+*/ -+TEST_DUAL_Z (dot_9_s64_untied, svint64_t, svint16_t, -+ z0 = svdot_n_s64 (z1, z4, 9), -+ z0 = svdot (z1, z4, 9)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dot_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dot_u32.c -new file mode 100644 -index 000000000..541e71cc2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dot_u32.c -@@ -0,0 +1,86 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dot_u32_tied1: -+** udot z0\.s, z4\.b, z5\.b -+** ret -+*/ -+TEST_DUAL_Z (dot_u32_tied1, svuint32_t, svuint8_t, -+ z0 = svdot_u32 (z0, z4, z5), -+ z0 = svdot (z0, z4, z5)) -+ -+/* -+** dot_u32_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** udot z0\.s, \1\.b, z1\.b -+** ret -+*/ -+TEST_DUAL_Z_REV (dot_u32_tied2, svuint32_t, svuint8_t, -+ z0_res = svdot_u32 (z4, z0, z1), -+ z0_res = svdot (z4, z0, z1)) -+ -+/* -+** dot_u32_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** udot z0\.s, z1\.b, \1\.b -+** ret -+*/ -+TEST_DUAL_Z_REV (dot_u32_tied3, svuint32_t, svuint8_t, -+ z0_res = svdot_u32 (z4, z1, z0), -+ z0_res = svdot (z4, z1, z0)) -+ -+/* -+** dot_u32_untied: -+** movprfx z0, z1 -+** udot z0\.s, z4\.b, z5\.b -+** ret -+*/ -+TEST_DUAL_Z (dot_u32_untied, svuint32_t, svuint8_t, -+ z0 = svdot_u32 (z1, z4, z5), -+ z0 = svdot (z1, z4, z5)) -+ -+/* -+** dot_w0_u32_tied1: -+** mov (z[0-9]+\.b), w0 -+** udot z0\.s, z4\.b, \1 -+** ret -+*/ -+TEST_DUAL_ZX (dot_w0_u32_tied1, svuint32_t, svuint8_t, uint8_t, -+ z0 = svdot_n_u32 (z0, z4, x0), -+ z0 = svdot (z0, z4, x0)) -+ -+/* -+** dot_w0_u32_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), w0 -+** movprfx z0, z1 -+** udot z0\.s, z4\.b, \1 -+** ret -+*/ -+TEST_DUAL_ZX (dot_w0_u32_untied, svuint32_t, svuint8_t, uint8_t, -+ z0 = svdot_n_u32 (z1, z4, x0), -+ z0 = svdot (z1, z4, x0)) -+ -+/* -+** dot_9_u32_tied1: -+** mov (z[0-9]+\.b), #9 -+** udot z0\.s, z4\.b, \1 -+** ret -+*/ -+TEST_DUAL_Z (dot_9_u32_tied1, svuint32_t, svuint8_t, -+ z0 = svdot_n_u32 (z0, z4, 9), -+ z0 = svdot (z0, z4, 9)) -+ -+/* -+** dot_9_u32_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), #9 -+** movprfx z0, z1 -+** udot z0\.s, z4\.b, \1 -+** ret -+*/ -+TEST_DUAL_Z (dot_9_u32_untied, svuint32_t, svuint8_t, -+ z0 = svdot_n_u32 (z1, z4, 9), -+ z0 = svdot (z1, z4, 9)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dot_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dot_u64.c -new file mode 100644 -index 000000000..cc0e85373 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dot_u64.c -@@ -0,0 +1,86 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dot_u64_tied1: -+** udot z0\.d, z4\.h, z5\.h -+** ret -+*/ -+TEST_DUAL_Z (dot_u64_tied1, svuint64_t, svuint16_t, -+ z0 = svdot_u64 (z0, z4, z5), -+ z0 = svdot (z0, z4, z5)) -+ -+/* -+** dot_u64_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** udot z0\.d, \1\.h, z1\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (dot_u64_tied2, svuint64_t, svuint16_t, -+ z0_res = svdot_u64 (z4, z0, z1), -+ z0_res = svdot (z4, z0, z1)) -+ -+/* -+** dot_u64_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** udot z0\.d, z1\.h, \1\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (dot_u64_tied3, svuint64_t, svuint16_t, -+ z0_res = svdot_u64 (z4, z1, z0), -+ z0_res = svdot (z4, z1, z0)) -+ -+/* -+** dot_u64_untied: -+** movprfx z0, z1 -+** udot z0\.d, z4\.h, z5\.h -+** ret -+*/ -+TEST_DUAL_Z (dot_u64_untied, svuint64_t, svuint16_t, -+ z0 = svdot_u64 (z1, z4, z5), -+ z0 = svdot (z1, z4, z5)) -+ -+/* -+** dot_w0_u64_tied1: -+** mov (z[0-9]+\.h), w0 -+** udot z0\.d, z4\.h, \1 -+** ret -+*/ -+TEST_DUAL_ZX (dot_w0_u64_tied1, svuint64_t, svuint16_t, uint16_t, -+ z0 = svdot_n_u64 (z0, z4, x0), -+ z0 = svdot (z0, z4, x0)) -+ -+/* -+** dot_w0_u64_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), w0 -+** movprfx z0, z1 -+** udot z0\.d, z4\.h, \1 -+** ret -+*/ -+TEST_DUAL_ZX (dot_w0_u64_untied, svuint64_t, svuint16_t, uint16_t, -+ z0 = svdot_n_u64 (z1, z4, x0), -+ z0 = svdot (z1, z4, x0)) -+ -+/* -+** dot_9_u64_tied1: -+** mov (z[0-9]+\.h), #9 -+** udot z0\.d, z4\.h, \1 -+** ret -+*/ -+TEST_DUAL_Z (dot_9_u64_tied1, svuint64_t, svuint16_t, -+ z0 = svdot_n_u64 (z0, z4, 9), -+ z0 = svdot (z0, z4, 9)) -+ -+/* -+** dot_9_u64_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), #9 -+** movprfx z0, z1 -+** udot z0\.d, z4\.h, \1 -+** ret -+*/ -+TEST_DUAL_Z (dot_9_u64_untied, svuint64_t, svuint16_t, -+ z0 = svdot_n_u64 (z1, z4, 9), -+ z0 = svdot (z1, z4, 9)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_b16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_b16.c -new file mode 100644 -index 000000000..785832ab3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_b16.c -@@ -0,0 +1,32 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include -+#include "test_sve_acle.h" -+ -+/* -+** dup_false_b16: -+** pfalse p0\.b -+** ret -+*/ -+TEST_UNIFORM_P (dup_false_b16, -+ p0 = svdup_n_b16 (false), -+ p0 = svdup_b16 (false)) -+ -+/* -+** dup_true_b16: -+** ptrue p0\.h, all -+** ret -+*/ -+TEST_UNIFORM_P (dup_true_b16, -+ p0 = svdup_n_b16 (true), -+ p0 = svdup_b16 (true)) -+ -+/* -+** dup_w0_b16: -+** lsl (x[0-9]+), x0, 63 -+** whilelo p0\.h, xzr, \1 -+** ret -+*/ -+TEST_UNIFORM_PS (dup_w0_b16, -+ p0 = svdup_n_b16 (x0), -+ p0 = svdup_b16 (x0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_b32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_b32.c -new file mode 100644 -index 000000000..6e9d91eaf ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_b32.c -@@ -0,0 +1,32 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include -+#include "test_sve_acle.h" -+ -+/* -+** dup_false_b32: -+** pfalse p0\.b -+** ret -+*/ -+TEST_UNIFORM_P (dup_false_b32, -+ p0 = svdup_n_b32 (false), -+ p0 = svdup_b32 (false)) -+ -+/* -+** dup_true_b32: -+** ptrue p0\.s, all -+** ret -+*/ -+TEST_UNIFORM_P (dup_true_b32, -+ p0 = svdup_n_b32 (true), -+ p0 = svdup_b32 (true)) -+ -+/* -+** dup_w0_b32: -+** lsl (x[0-9]+), x0, 63 -+** whilelo p0\.s, xzr, \1 -+** ret -+*/ -+TEST_UNIFORM_PS (dup_w0_b32, -+ p0 = svdup_n_b32 (x0), -+ p0 = svdup_b32 (x0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_b64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_b64.c -new file mode 100644 -index 000000000..ed69896c4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_b64.c -@@ -0,0 +1,32 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include -+#include "test_sve_acle.h" -+ -+/* -+** dup_false_b64: -+** pfalse p0\.b -+** ret -+*/ -+TEST_UNIFORM_P (dup_false_b64, -+ p0 = svdup_n_b64 (false), -+ p0 = svdup_b64 (false)) -+ -+/* -+** dup_true_b64: -+** ptrue p0\.d, all -+** ret -+*/ -+TEST_UNIFORM_P (dup_true_b64, -+ p0 = svdup_n_b64 (true), -+ p0 = svdup_b64 (true)) -+ -+/* -+** dup_w0_b64: -+** lsl (x[0-9]+), x0, 63 -+** whilelo p0\.d, xzr, \1 -+** ret -+*/ -+TEST_UNIFORM_PS (dup_w0_b64, -+ p0 = svdup_n_b64 (x0), -+ p0 = svdup_b64 (x0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_b8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_b8.c -new file mode 100644 -index 000000000..a99ab552a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_b8.c -@@ -0,0 +1,32 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include -+#include "test_sve_acle.h" -+ -+/* -+** dup_false_b8: -+** pfalse p0\.b -+** ret -+*/ -+TEST_UNIFORM_P (dup_false_b8, -+ p0 = svdup_n_b8 (false), -+ p0 = svdup_b8 (false)) -+ -+/* -+** dup_true_b8: -+** ptrue p0\.b, all -+** ret -+*/ -+TEST_UNIFORM_P (dup_true_b8, -+ p0 = svdup_n_b8 (true), -+ p0 = svdup_b8 (true)) -+ -+/* -+** dup_w0_b8: -+** lsl (x[0-9]+), x0, 63 -+** whilelo p0\.b, xzr, \1 -+** ret -+*/ -+TEST_UNIFORM_PS (dup_w0_b8, -+ p0 = svdup_n_b8 (x0), -+ p0 = svdup_b8 (x0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_bf16.c -new file mode 100644 -index 000000000..db47d849c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_bf16.c -@@ -0,0 +1,41 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dup_h4_bf16: -+** mov z0\.h, h4 -+** ret -+*/ -+TEST_UNIFORM_ZD (dup_h4_bf16, svbfloat16_t, __bf16, -+ z0 = svdup_n_bf16 (d4), -+ z0 = svdup_bf16 (d4)) -+ -+/* -+** dup_h4_bf16_m: -+** movprfx z0, z1 -+** mov z0\.h, p0/m, h4 -+** ret -+*/ -+TEST_UNIFORM_ZD (dup_h4_bf16_m, svbfloat16_t, __bf16, -+ z0 = svdup_n_bf16_m (z1, p0, d4), -+ z0 = svdup_bf16_m (z1, p0, d4)) -+ -+/* -+** dup_h4_bf16_z: -+** movprfx z0\.h, p0/z, z0\.h -+** mov z0\.h, p0/m, h4 -+** ret -+*/ -+TEST_UNIFORM_ZD (dup_h4_bf16_z, svbfloat16_t, __bf16, -+ z0 = svdup_n_bf16_z (p0, d4), -+ z0 = svdup_bf16_z (p0, d4)) -+ -+/* -+** dup_h4_bf16_x: -+** mov z0\.h, h4 -+** ret -+*/ -+TEST_UNIFORM_ZD (dup_h4_bf16_x, svbfloat16_t, __bf16, -+ z0 = svdup_n_bf16_x (p0, d4), -+ z0 = svdup_bf16_x (p0, d4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_f16.c -new file mode 100644 -index 000000000..2d48b9a3d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_f16.c -@@ -0,0 +1,215 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dup_1_f16: -+** fmov z0\.h, #1\.0(?:e\+0)? -+** ret -+*/ -+TEST_UNIFORM_Z (dup_1_f16, svfloat16_t, -+ z0 = svdup_n_f16 (1), -+ z0 = svdup_f16 (1)) -+ -+/* -+** dup_0_f16: -+** mov z0\.h, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_0_f16, svfloat16_t, -+ z0 = svdup_n_f16 (0), -+ z0 = svdup_f16 (0)) -+ -+/* -+** dup_8_f16: -+** fmov z0\.h, #8\.0(?:e\+0)? -+** ret -+*/ -+TEST_UNIFORM_Z (dup_8_f16, svfloat16_t, -+ z0 = svdup_n_f16 (8), -+ z0 = svdup_f16 (8)) -+ -+/* -+** dup_512_f16: -+** mov z0\.h, #24576 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_512_f16, svfloat16_t, -+ z0 = svdup_n_f16 (512), -+ z0 = svdup_f16 (512)) -+ -+/* -+** dup_513_f16: -+** mov (w[0-7]+), 24578 -+** mov z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_513_f16, svfloat16_t, -+ z0 = svdup_n_f16 (513), -+ z0 = svdup_f16 (513)) -+ -+/* -+** dup_h4_f16: -+** mov z0\.h, h4 -+** ret -+*/ -+TEST_UNIFORM_ZD (dup_h4_f16, svfloat16_t, __fp16, -+ z0 = svdup_n_f16 (d4), -+ z0 = svdup_f16 (d4)) -+ -+/* -+** dup_1_f16_m: -+** mov z0\.h, p0/m, #15360 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_1_f16_m, svfloat16_t, -+ z0 = svdup_n_f16_m (z0, p0, 1), -+ z0 = svdup_f16_m (z0, p0, 1)) -+ -+/* -+** dup_0_f16_m: -+** mov z0\.h, p0/m, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_0_f16_m, svfloat16_t, -+ z0 = svdup_n_f16_m (z0, p0, 0), -+ z0 = svdup_f16_m (z0, p0, 0)) -+ -+/* -+** dup_8_f16_m: -+** mov z0\.h, p0/m, #18432 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_8_f16_m, svfloat16_t, -+ z0 = svdup_n_f16_m (z0, p0, 8), -+ z0 = svdup_f16_m (z0, p0, 8)) -+ -+/* -+** dup_512_f16_m: -+** mov z0\.h, p0/m, #24576 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_512_f16_m, svfloat16_t, -+ z0 = svdup_n_f16_m (z0, p0, 512), -+ z0 = svdup_f16_m (z0, p0, 512)) -+ -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_513_f16_m, svfloat16_t, -+ z0 = svdup_n_f16_m (z0, p0, 513), -+ z0 = svdup_f16_m (z0, p0, 513)) -+ -+/* -+** dup_h4_f16_m: -+** movprfx z0, z1 -+** mov z0\.h, p0/m, h4 -+** ret -+*/ -+TEST_UNIFORM_ZD (dup_h4_f16_m, svfloat16_t, __fp16, -+ z0 = svdup_n_f16_m (z1, p0, d4), -+ z0 = svdup_f16_m (z1, p0, d4)) -+ -+/* -+** dup_1_f16_z: -+** mov z0\.h, p0/z, #15360 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_1_f16_z, svfloat16_t, -+ z0 = svdup_n_f16_z (p0, 1), -+ z0 = svdup_f16_z (p0, 1)) -+ -+/* -+** dup_0_f16_z: -+** mov z0\.h, p0/z, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_0_f16_z, svfloat16_t, -+ z0 = svdup_n_f16_z (p0, 0), -+ z0 = svdup_f16_z (p0, 0)) -+ -+/* -+** dup_8_f16_z: -+** mov z0\.h, p0/z, #18432 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_8_f16_z, svfloat16_t, -+ z0 = svdup_n_f16_z (p0, 8), -+ z0 = svdup_f16_z (p0, 8)) -+ -+/* -+** dup_512_f16_z: -+** mov z0\.h, p0/z, #24576 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_512_f16_z, svfloat16_t, -+ z0 = svdup_n_f16_z (p0, 512), -+ z0 = svdup_f16_z (p0, 512)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_513_f16_z, svfloat16_t, -+ z0 = svdup_n_f16_z (p0, 513), -+ z0 = svdup_f16_z (p0, 513)) -+/* -+** dup_h4_f16_z: -+** movprfx z0\.h, p0/z, z0\.h -+** mov z0\.h, p0/m, h4 -+** ret -+*/ -+TEST_UNIFORM_ZD (dup_h4_f16_z, svfloat16_t, __fp16, -+ z0 = svdup_n_f16_z (p0, d4), -+ z0 = svdup_f16_z (p0, d4)) -+ -+/* -+** dup_1_f16_x: -+** fmov z0\.h, #1\.0(?:e\+0)? -+** ret -+*/ -+TEST_UNIFORM_Z (dup_1_f16_x, svfloat16_t, -+ z0 = svdup_n_f16_x (p0, 1), -+ z0 = svdup_f16_x (p0, 1)) -+ -+/* -+** dup_0_f16_x: -+** mov z0\.h, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_0_f16_x, svfloat16_t, -+ z0 = svdup_n_f16_x (p0, 0), -+ z0 = svdup_f16_x (p0, 0)) -+ -+/* -+** dup_8_f16_x: -+** fmov z0\.h, #8\.0(?:e\+0)? -+** ret -+*/ -+TEST_UNIFORM_Z (dup_8_f16_x, svfloat16_t, -+ z0 = svdup_n_f16_x (p0, 8), -+ z0 = svdup_f16_x (p0, 8)) -+ -+/* -+** dup_512_f16_x: -+** mov z0\.h, #24576 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_512_f16_x, svfloat16_t, -+ z0 = svdup_n_f16_x (p0, 512), -+ z0 = svdup_f16_x (p0, 512)) -+ -+/* -+** dup_513_f16_x: -+** mov (w[0-7]+), 24578 -+** mov z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_513_f16_x, svfloat16_t, -+ z0 = svdup_n_f16_x (p0, 513), -+ z0 = svdup_f16_x (p0, 513)) -+ -+/* -+** dup_h4_f16_x: -+** mov z0\.h, h4 -+** ret -+*/ -+TEST_UNIFORM_ZD (dup_h4_f16_x, svfloat16_t, __fp16, -+ z0 = svdup_n_f16_x (p0, d4), -+ z0 = svdup_f16_x (p0, d4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_f32.c -new file mode 100644 -index 000000000..f997b7a7d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_f32.c -@@ -0,0 +1,212 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dup_1_f32: -+** fmov z0\.s, #1\.0(?:e\+0)? -+** ret -+*/ -+TEST_UNIFORM_Z (dup_1_f32, svfloat32_t, -+ z0 = svdup_n_f32 (1), -+ z0 = svdup_f32 (1)) -+ -+/* -+** dup_0_f32: -+** mov z0\.s, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_0_f32, svfloat32_t, -+ z0 = svdup_n_f32 (0), -+ z0 = svdup_f32 (0)) -+ -+/* -+** dup_8_f32: -+** fmov z0\.s, #8\.0(?:e\+0)? -+** ret -+*/ -+TEST_UNIFORM_Z (dup_8_f32, svfloat32_t, -+ z0 = svdup_n_f32 (8), -+ z0 = svdup_f32 (8)) -+ -+/* -+** dup_512_f32: -+** movi v([0-9]+).4s, 0x44, lsl 24 -+** dup z0\.q, z0\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_512_f32, svfloat32_t, -+ z0 = svdup_n_f32 (512), -+ z0 = svdup_f32 (512)) -+ -+/* -+** dup_513_f32: -+** ... -+** ld1rw z0\.s, p[0-7]/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_513_f32, svfloat32_t, -+ z0 = svdup_n_f32 (513), -+ z0 = svdup_f32 (513)) -+ -+/* -+** dup_s4_f32: -+** mov z0\.s, s4 -+** ret -+*/ -+TEST_UNIFORM_ZD (dup_s4_f32, svfloat32_t, float, -+ z0 = svdup_n_f32 (d4), -+ z0 = svdup_f32 (d4)) -+ -+/* -+** dup_1_f32_m: -+** fmov z0\.s, p0/m, #1\.0(?:e\+0)? -+** ret -+*/ -+TEST_UNIFORM_Z (dup_1_f32_m, svfloat32_t, -+ z0 = svdup_n_f32_m (z0, p0, 1), -+ z0 = svdup_f32_m (z0, p0, 1)) -+ -+/* -+** dup_0_f32_m: -+** mov z0\.s, p0/m, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_0_f32_m, svfloat32_t, -+ z0 = svdup_n_f32_m (z0, p0, 0), -+ z0 = svdup_f32_m (z0, p0, 0)) -+ -+/* -+** dup_8_f32_m: -+** fmov z0\.s, p0/m, #8\.0(?:e\+0)? -+** ret -+*/ -+TEST_UNIFORM_Z (dup_8_f32_m, svfloat32_t, -+ z0 = svdup_n_f32_m (z0, p0, 8), -+ z0 = svdup_f32_m (z0, p0, 8)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_512_f32_m, svfloat32_t, -+ z0 = svdup_n_f32_m (z0, p0, 512), -+ z0 = svdup_f32_m (z0, p0, 512)) -+ -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_513_f32_m, svfloat32_t, -+ z0 = svdup_n_f32_m (z0, p0, 513), -+ z0 = svdup_f32_m (z0, p0, 513)) -+ -+/* -+** dup_s4_f32_m: -+** movprfx z0, z1 -+** mov z0\.s, p0/m, s4 -+** ret -+*/ -+TEST_UNIFORM_ZD (dup_s4_f32_m, svfloat32_t, float, -+ z0 = svdup_n_f32_m (z1, p0, d4), -+ z0 = svdup_f32_m (z1, p0, d4)) -+ -+/* -+** dup_1_f32_z: -+** movprfx z0\.s, p0/z, z0\.s -+** fmov z0\.s, p0/m, #1\.0(?:e\+0)? -+** ret -+*/ -+TEST_UNIFORM_Z (dup_1_f32_z, svfloat32_t, -+ z0 = svdup_n_f32_z (p0, 1), -+ z0 = svdup_f32_z (p0, 1)) -+ -+/* -+** dup_0_f32_z: -+** mov z0\.s, p0/z, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_0_f32_z, svfloat32_t, -+ z0 = svdup_n_f32_z (p0, 0), -+ z0 = svdup_f32_z (p0, 0)) -+ -+/* -+** dup_8_f32_z: -+** movprfx z0\.s, p0/z, z0\.s -+** fmov z0\.s, p0/m, #8\.0(?:e\+0)? -+** ret -+*/ -+TEST_UNIFORM_Z (dup_8_f32_z, svfloat32_t, -+ z0 = svdup_n_f32_z (p0, 8), -+ z0 = svdup_f32_z (p0, 8)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_512_f32_z, svfloat32_t, -+ z0 = svdup_n_f32_z (p0, 512), -+ z0 = svdup_f32_z (p0, 512)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_513_f32_z, svfloat32_t, -+ z0 = svdup_n_f32_z (p0, 513), -+ z0 = svdup_f32_z (p0, 513)) -+ -+/* -+** dup_s4_f32_z: -+** movprfx z0\.s, p0/z, z0\.s -+** mov z0\.s, p0/m, s4 -+** ret -+*/ -+TEST_UNIFORM_ZD (dup_s4_f32_z, svfloat32_t, float, -+ z0 = svdup_n_f32_z (p0, d4), -+ z0 = svdup_f32_z (p0, d4)) -+ -+/* -+** dup_1_f32_x: -+** fmov z0\.s, #1\.0(?:e\+0)? -+** ret -+*/ -+TEST_UNIFORM_Z (dup_1_f32_x, svfloat32_t, -+ z0 = svdup_n_f32_x (p0, 1), -+ z0 = svdup_f32_x (p0, 1)) -+ -+/* -+** dup_0_f32_x: -+** mov z0\.s, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_0_f32_x, svfloat32_t, -+ z0 = svdup_n_f32_x (p0, 0), -+ z0 = svdup_f32_x (p0, 0)) -+ -+/* -+** dup_8_f32_x: -+** fmov z0\.s, #8\.0(?:e\+0)? -+** ret -+*/ -+TEST_UNIFORM_Z (dup_8_f32_x, svfloat32_t, -+ z0 = svdup_n_f32_x (p0, 8), -+ z0 = svdup_f32_x (p0, 8)) -+ -+/* -+** dup_512_f32_x: -+** movi v([0-9]+).4s, 0x44, lsl 24 -+** dup z0\.q, z0\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_512_f32_x, svfloat32_t, -+ z0 = svdup_n_f32_x (p0, 512), -+ z0 = svdup_f32_x (p0, 512)) -+ -+/* -+** dup_513_f32_x: -+** ... -+** ld1rw z0\.s, p[0-7]/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_513_f32_x, svfloat32_t, -+ z0 = svdup_n_f32_x (p0, 513), -+ z0 = svdup_f32_x (p0, 513)) -+ -+/* -+** dup_s4_f32_x: -+** mov z0\.s, s4 -+** ret -+*/ -+TEST_UNIFORM_ZD (dup_s4_f32_x, svfloat32_t, float, -+ z0 = svdup_n_f32_x (p0, d4), -+ z0 = svdup_f32_x (p0, d4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_f64.c -new file mode 100644 -index 000000000..e177d9108 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_f64.c -@@ -0,0 +1,212 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dup_1_f64: -+** fmov z0\.d, #1\.0(?:e\+0)? -+** ret -+*/ -+TEST_UNIFORM_Z (dup_1_f64, svfloat64_t, -+ z0 = svdup_n_f64 (1), -+ z0 = svdup_f64 (1)) -+ -+/* -+** dup_0_f64: -+** mov z0\.d, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_0_f64, svfloat64_t, -+ z0 = svdup_n_f64 (0), -+ z0 = svdup_f64 (0)) -+ -+/* -+** dup_8_f64: -+** fmov z0\.d, #8\.0(?:e\+0)? -+** ret -+*/ -+TEST_UNIFORM_Z (dup_8_f64, svfloat64_t, -+ z0 = svdup_n_f64 (8), -+ z0 = svdup_f64 (8)) -+ -+/* -+** dup_512_f64: -+** mov (x[0-9]+), 4647714815446351872 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_512_f64, svfloat64_t, -+ z0 = svdup_n_f64 (512), -+ z0 = svdup_f64 (512)) -+ -+/* -+** dup_513_f64: -+** ... -+** ld1rd z0\.d, p[0-7]/z, \[x[0-9+]\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_513_f64, svfloat64_t, -+ z0 = svdup_n_f64 (513), -+ z0 = svdup_f64 (513)) -+ -+/* -+** dup_d4_f64: -+** mov z0\.d, d4 -+** ret -+*/ -+TEST_UNIFORM_ZD (dup_d4_f64, svfloat64_t, double, -+ z0 = svdup_n_f64 (d4), -+ z0 = svdup_f64 (d4)) -+ -+/* -+** dup_1_f64_m: -+** fmov z0\.d, p0/m, #1\.0(?:e\+0)? -+** ret -+*/ -+TEST_UNIFORM_Z (dup_1_f64_m, svfloat64_t, -+ z0 = svdup_n_f64_m (z0, p0, 1), -+ z0 = svdup_f64_m (z0, p0, 1)) -+ -+/* -+** dup_0_f64_m: -+** mov z0\.d, p0/m, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_0_f64_m, svfloat64_t, -+ z0 = svdup_n_f64_m (z0, p0, 0), -+ z0 = svdup_f64_m (z0, p0, 0)) -+ -+/* -+** dup_8_f64_m: -+** fmov z0\.d, p0/m, #8\.0(?:e\+0)? -+** ret -+*/ -+TEST_UNIFORM_Z (dup_8_f64_m, svfloat64_t, -+ z0 = svdup_n_f64_m (z0, p0, 8), -+ z0 = svdup_f64_m (z0, p0, 8)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_512_f64_m, svfloat64_t, -+ z0 = svdup_n_f64_m (z0, p0, 512), -+ z0 = svdup_f64_m (z0, p0, 512)) -+ -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_513_f64_m, svfloat64_t, -+ z0 = svdup_n_f64_m (z0, p0, 513), -+ z0 = svdup_f64_m (z0, p0, 513)) -+ -+/* -+** dup_d4_f64_m: -+** movprfx z0, z1 -+** mov z0\.d, p0/m, d4 -+** ret -+*/ -+TEST_UNIFORM_ZD (dup_d4_f64_m, svfloat64_t, double, -+ z0 = svdup_n_f64_m (z1, p0, d4), -+ z0 = svdup_f64_m (z1, p0, d4)) -+ -+/* -+** dup_1_f64_z: -+** movprfx z0\.d, p0/z, z0\.d -+** fmov z0\.d, p0/m, #1\.0(?:e\+0)? -+** ret -+*/ -+TEST_UNIFORM_Z (dup_1_f64_z, svfloat64_t, -+ z0 = svdup_n_f64_z (p0, 1), -+ z0 = svdup_f64_z (p0, 1)) -+ -+/* -+** dup_0_f64_z: -+** mov z0\.d, p0/z, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_0_f64_z, svfloat64_t, -+ z0 = svdup_n_f64_z (p0, 0), -+ z0 = svdup_f64_z (p0, 0)) -+ -+/* -+** dup_8_f64_z: -+** movprfx z0\.d, p0/z, z0\.d -+** fmov z0\.d, p0/m, #8\.0(?:e\+0)? -+** ret -+*/ -+TEST_UNIFORM_Z (dup_8_f64_z, svfloat64_t, -+ z0 = svdup_n_f64_z (p0, 8), -+ z0 = svdup_f64_z (p0, 8)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_512_f64_z, svfloat64_t, -+ z0 = svdup_n_f64_z (p0, 512), -+ z0 = svdup_f64_z (p0, 512)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_513_f64_z, svfloat64_t, -+ z0 = svdup_n_f64_z (p0, 513), -+ z0 = svdup_f64_z (p0, 513)) -+ -+/* -+** dup_d4_f64_z: -+** movprfx z0\.d, p0/z, z0\.d -+** mov z0\.d, p0/m, d4 -+** ret -+*/ -+TEST_UNIFORM_ZD (dup_d4_f64_z, svfloat64_t, double, -+ z0 = svdup_n_f64_z (p0, d4), -+ z0 = svdup_f64_z (p0, d4)) -+ -+/* -+** dup_1_f64_x: -+** fmov z0\.d, #1\.0(?:e\+0)? -+** ret -+*/ -+TEST_UNIFORM_Z (dup_1_f64_x, svfloat64_t, -+ z0 = svdup_n_f64_x (p0, 1), -+ z0 = svdup_f64_x (p0, 1)) -+ -+/* -+** dup_0_f64_x: -+** mov z0\.d, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_0_f64_x, svfloat64_t, -+ z0 = svdup_n_f64_x (p0, 0), -+ z0 = svdup_f64_x (p0, 0)) -+ -+/* -+** dup_8_f64_x: -+** fmov z0\.d, #8\.0(?:e\+0)? -+** ret -+*/ -+TEST_UNIFORM_Z (dup_8_f64_x, svfloat64_t, -+ z0 = svdup_n_f64_x (p0, 8), -+ z0 = svdup_f64_x (p0, 8)) -+ -+/* -+** dup_512_f64_x: -+** mov (x[0-9]+), 4647714815446351872 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_512_f64_x, svfloat64_t, -+ z0 = svdup_n_f64_x (p0, 512), -+ z0 = svdup_f64_x (p0, 512)) -+ -+/* -+** dup_513_f64_x: -+** ... -+** ld1rd z0\.d, p[0-7]/z, \[x[0-9+]\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_513_f64_x, svfloat64_t, -+ z0 = svdup_n_f64_x (p0, 513), -+ z0 = svdup_f64_x (p0, 513)) -+ -+/* -+** dup_d4_f64_x: -+** mov z0\.d, d4 -+** ret -+*/ -+TEST_UNIFORM_ZD (dup_d4_f64_x, svfloat64_t, double, -+ z0 = svdup_n_f64_x (p0, d4), -+ z0 = svdup_f64_x (p0, d4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_bf16.c -new file mode 100644 -index 000000000..d05ad5adb ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_bf16.c -@@ -0,0 +1,108 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dup_lane_w0_bf16_tied1: -+** mov (z[0-9]+\.h), w0 -+** tbl z0\.h, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_lane_w0_bf16_tied1, svbfloat16_t, uint16_t, -+ z0 = svdup_lane_bf16 (z0, x0), -+ z0 = svdup_lane (z0, x0)) -+ -+/* -+** dup_lane_w0_bf16_untied: -+** mov (z[0-9]+\.h), w0 -+** tbl z0\.h, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_lane_w0_bf16_untied, svbfloat16_t, uint16_t, -+ z0 = svdup_lane_bf16 (z1, x0), -+ z0 = svdup_lane (z1, x0)) -+ -+/* -+** dup_lane_0_bf16_tied1: -+** dup z0\.h, z0\.h\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_0_bf16_tied1, svbfloat16_t, -+ z0 = svdup_lane_bf16 (z0, 0), -+ z0 = svdup_lane (z0, 0)) -+ -+/* -+** dup_lane_0_bf16_untied: -+** dup z0\.h, z1\.h\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_0_bf16_untied, svbfloat16_t, -+ z0 = svdup_lane_bf16 (z1, 0), -+ z0 = svdup_lane (z1, 0)) -+ -+/* -+** dup_lane_15_bf16: -+** dup z0\.h, z0\.h\[15\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_15_bf16, svbfloat16_t, -+ z0 = svdup_lane_bf16 (z0, 15), -+ z0 = svdup_lane (z0, 15)) -+ -+/* -+** dup_lane_16_bf16: -+** dup z0\.h, z0\.h\[16\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_16_bf16, svbfloat16_t, -+ z0 = svdup_lane_bf16 (z0, 16), -+ z0 = svdup_lane (z0, 16)) -+ -+/* -+** dup_lane_31_bf16: -+** dup z0\.h, z0\.h\[31\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_31_bf16, svbfloat16_t, -+ z0 = svdup_lane_bf16 (z0, 31), -+ z0 = svdup_lane (z0, 31)) -+ -+/* -+** dup_lane_32_bf16: -+** mov (z[0-9]+\.h), #32 -+** tbl z0\.h, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_32_bf16, svbfloat16_t, -+ z0 = svdup_lane_bf16 (z0, 32), -+ z0 = svdup_lane (z0, 32)) -+ -+/* -+** dup_lane_63_bf16: -+** mov (z[0-9]+\.h), #63 -+** tbl z0\.h, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_63_bf16, svbfloat16_t, -+ z0 = svdup_lane_bf16 (z0, 63), -+ z0 = svdup_lane (z0, 63)) -+ -+/* -+** dup_lane_64_bf16: -+** mov (z[0-9]+\.h), #64 -+** tbl z0\.h, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_64_bf16, svbfloat16_t, -+ z0 = svdup_lane_bf16 (z0, 64), -+ z0 = svdup_lane (z0, 64)) -+ -+/* -+** dup_lane_255_bf16: -+** mov (z[0-9]+\.h), #255 -+** tbl z0\.h, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_255_bf16, svbfloat16_t, -+ z0 = svdup_lane_bf16 (z0, 255), -+ z0 = svdup_lane (z0, 255)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_f16.c -new file mode 100644 -index 000000000..142afbb24 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_f16.c -@@ -0,0 +1,108 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dup_lane_w0_f16_tied1: -+** mov (z[0-9]+\.h), w0 -+** tbl z0\.h, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_lane_w0_f16_tied1, svfloat16_t, uint16_t, -+ z0 = svdup_lane_f16 (z0, x0), -+ z0 = svdup_lane (z0, x0)) -+ -+/* -+** dup_lane_w0_f16_untied: -+** mov (z[0-9]+\.h), w0 -+** tbl z0\.h, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_lane_w0_f16_untied, svfloat16_t, uint16_t, -+ z0 = svdup_lane_f16 (z1, x0), -+ z0 = svdup_lane (z1, x0)) -+ -+/* -+** dup_lane_0_f16_tied1: -+** dup z0\.h, z0\.h\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_0_f16_tied1, svfloat16_t, -+ z0 = svdup_lane_f16 (z0, 0), -+ z0 = svdup_lane (z0, 0)) -+ -+/* -+** dup_lane_0_f16_untied: -+** dup z0\.h, z1\.h\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_0_f16_untied, svfloat16_t, -+ z0 = svdup_lane_f16 (z1, 0), -+ z0 = svdup_lane (z1, 0)) -+ -+/* -+** dup_lane_15_f16: -+** dup z0\.h, z0\.h\[15\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_15_f16, svfloat16_t, -+ z0 = svdup_lane_f16 (z0, 15), -+ z0 = svdup_lane (z0, 15)) -+ -+/* -+** dup_lane_16_f16: -+** dup z0\.h, z0\.h\[16\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_16_f16, svfloat16_t, -+ z0 = svdup_lane_f16 (z0, 16), -+ z0 = svdup_lane (z0, 16)) -+ -+/* -+** dup_lane_31_f16: -+** dup z0\.h, z0\.h\[31\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_31_f16, svfloat16_t, -+ z0 = svdup_lane_f16 (z0, 31), -+ z0 = svdup_lane (z0, 31)) -+ -+/* -+** dup_lane_32_f16: -+** mov (z[0-9]+\.h), #32 -+** tbl z0\.h, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_32_f16, svfloat16_t, -+ z0 = svdup_lane_f16 (z0, 32), -+ z0 = svdup_lane (z0, 32)) -+ -+/* -+** dup_lane_63_f16: -+** mov (z[0-9]+\.h), #63 -+** tbl z0\.h, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_63_f16, svfloat16_t, -+ z0 = svdup_lane_f16 (z0, 63), -+ z0 = svdup_lane (z0, 63)) -+ -+/* -+** dup_lane_64_f16: -+** mov (z[0-9]+\.h), #64 -+** tbl z0\.h, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_64_f16, svfloat16_t, -+ z0 = svdup_lane_f16 (z0, 64), -+ z0 = svdup_lane (z0, 64)) -+ -+/* -+** dup_lane_255_f16: -+** mov (z[0-9]+\.h), #255 -+** tbl z0\.h, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_255_f16, svfloat16_t, -+ z0 = svdup_lane_f16 (z0, 255), -+ z0 = svdup_lane (z0, 255)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_f32.c -new file mode 100644 -index 000000000..b32068a37 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_f32.c -@@ -0,0 +1,110 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dup_lane_w0_f32_tied1: -+** mov (z[0-9]+\.s), w0 -+** tbl z0\.s, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_lane_w0_f32_tied1, svfloat32_t, uint32_t, -+ z0 = svdup_lane_f32 (z0, x0), -+ z0 = svdup_lane (z0, x0)) -+ -+/* -+** dup_lane_w0_f32_untied: -+** mov (z[0-9]+\.s), w0 -+** tbl z0\.s, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_lane_w0_f32_untied, svfloat32_t, uint32_t, -+ z0 = svdup_lane_f32 (z1, x0), -+ z0 = svdup_lane (z1, x0)) -+ -+/* -+** dup_lane_0_f32_tied1: -+** dup z0\.s, z0\.s\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_0_f32_tied1, svfloat32_t, -+ z0 = svdup_lane_f32 (z0, 0), -+ z0 = svdup_lane (z0, 0)) -+ -+/* -+** dup_lane_0_f32_untied: -+** dup z0\.s, z1\.s\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_0_f32_untied, svfloat32_t, -+ z0 = svdup_lane_f32 (z1, 0), -+ z0 = svdup_lane (z1, 0)) -+ -+/* -+** dup_lane_15_f32: -+** dup z0\.s, z0\.s\[15\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_15_f32, svfloat32_t, -+ z0 = svdup_lane_f32 (z0, 15), -+ z0 = svdup_lane (z0, 15)) -+ -+/* -+** dup_lane_16_f32: -+** mov (z[0-9]+\.s), #16 -+** tbl z0\.s, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_16_f32, svfloat32_t, -+ z0 = svdup_lane_f32 (z0, 16), -+ z0 = svdup_lane (z0, 16)) -+ -+/* -+** dup_lane_31_f32: -+** mov (z[0-9]+\.s), #31 -+** tbl z0\.s, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_31_f32, svfloat32_t, -+ z0 = svdup_lane_f32 (z0, 31), -+ z0 = svdup_lane (z0, 31)) -+ -+/* -+** dup_lane_32_f32: -+** mov (z[0-9]+\.s), #32 -+** tbl z0\.s, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_32_f32, svfloat32_t, -+ z0 = svdup_lane_f32 (z0, 32), -+ z0 = svdup_lane (z0, 32)) -+ -+/* -+** dup_lane_63_f32: -+** mov (z[0-9]+\.s), #63 -+** tbl z0\.s, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_63_f32, svfloat32_t, -+ z0 = svdup_lane_f32 (z0, 63), -+ z0 = svdup_lane (z0, 63)) -+ -+/* -+** dup_lane_64_f32: -+** mov (z[0-9]+\.s), #64 -+** tbl z0\.s, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_64_f32, svfloat32_t, -+ z0 = svdup_lane_f32 (z0, 64), -+ z0 = svdup_lane (z0, 64)) -+ -+/* -+** dup_lane_255_f32: -+** mov (z[0-9]+\.s), #255 -+** tbl z0\.s, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_255_f32, svfloat32_t, -+ z0 = svdup_lane_f32 (z0, 255), -+ z0 = svdup_lane (z0, 255)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_f64.c -new file mode 100644 -index 000000000..64af50d0c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_f64.c -@@ -0,0 +1,111 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dup_lane_x0_f64_tied1: -+** mov (z[0-9]+\.d), x0 -+** tbl z0\.d, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_lane_x0_f64_tied1, svfloat64_t, uint64_t, -+ z0 = svdup_lane_f64 (z0, x0), -+ z0 = svdup_lane (z0, x0)) -+ -+/* -+** dup_lane_x0_f64_untied: -+** mov (z[0-9]+\.d), x0 -+** tbl z0\.d, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_lane_x0_f64_untied, svfloat64_t, uint64_t, -+ z0 = svdup_lane_f64 (z1, x0), -+ z0 = svdup_lane (z1, x0)) -+ -+/* -+** dup_lane_0_f64_tied1: -+** dup z0\.d, z0\.d\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_0_f64_tied1, svfloat64_t, -+ z0 = svdup_lane_f64 (z0, 0), -+ z0 = svdup_lane (z0, 0)) -+ -+/* -+** dup_lane_0_f64_untied: -+** dup z0\.d, z1\.d\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_0_f64_untied, svfloat64_t, -+ z0 = svdup_lane_f64 (z1, 0), -+ z0 = svdup_lane (z1, 0)) -+ -+/* -+** dup_lane_15_f64: -+** mov (z[0-9]+\.d), #15 -+** tbl z0\.d, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_15_f64, svfloat64_t, -+ z0 = svdup_lane_f64 (z0, 15), -+ z0 = svdup_lane (z0, 15)) -+ -+/* -+** dup_lane_16_f64: -+** mov (z[0-9]+\.d), #16 -+** tbl z0\.d, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_16_f64, svfloat64_t, -+ z0 = svdup_lane_f64 (z0, 16), -+ z0 = svdup_lane (z0, 16)) -+ -+/* -+** dup_lane_31_f64: -+** mov (z[0-9]+\.d), #31 -+** tbl z0\.d, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_31_f64, svfloat64_t, -+ z0 = svdup_lane_f64 (z0, 31), -+ z0 = svdup_lane (z0, 31)) -+ -+/* -+** dup_lane_32_f64: -+** mov (z[0-9]+\.d), #32 -+** tbl z0\.d, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_32_f64, svfloat64_t, -+ z0 = svdup_lane_f64 (z0, 32), -+ z0 = svdup_lane (z0, 32)) -+ -+/* -+** dup_lane_63_f64: -+** mov (z[0-9]+\.d), #63 -+** tbl z0\.d, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_63_f64, svfloat64_t, -+ z0 = svdup_lane_f64 (z0, 63), -+ z0 = svdup_lane (z0, 63)) -+ -+/* -+** dup_lane_64_f64: -+** mov (z[0-9]+\.d), #64 -+** tbl z0\.d, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_64_f64, svfloat64_t, -+ z0 = svdup_lane_f64 (z0, 64), -+ z0 = svdup_lane (z0, 64)) -+ -+/* -+** dup_lane_255_f64: -+** mov (z[0-9]+\.d), #255 -+** tbl z0\.d, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_255_f64, svfloat64_t, -+ z0 = svdup_lane_f64 (z0, 255), -+ z0 = svdup_lane (z0, 255)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_s16.c -new file mode 100644 -index 000000000..3b6f20696 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_s16.c -@@ -0,0 +1,126 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dup_lane_w0_s16_tied1: -+** mov (z[0-9]+\.h), w0 -+** tbl z0\.h, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_lane_w0_s16_tied1, svint16_t, uint16_t, -+ z0 = svdup_lane_s16 (z0, x0), -+ z0 = svdup_lane (z0, x0)) -+ -+/* -+** dup_lane_w0_s16_untied: -+** mov (z[0-9]+\.h), w0 -+** tbl z0\.h, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_lane_w0_s16_untied, svint16_t, uint16_t, -+ z0 = svdup_lane_s16 (z1, x0), -+ z0 = svdup_lane (z1, x0)) -+ -+/* -+** dup_lane_0_s16_tied1: -+** dup z0\.h, z0\.h\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_0_s16_tied1, svint16_t, -+ z0 = svdup_lane_s16 (z0, 0), -+ z0 = svdup_lane (z0, 0)) -+ -+/* -+** dup_lane_0_s16_untied: -+** dup z0\.h, z1\.h\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_0_s16_untied, svint16_t, -+ z0 = svdup_lane_s16 (z1, 0), -+ z0 = svdup_lane (z1, 0)) -+ -+/* -+** dup_lane_7_s16: -+** dup z0\.h, z0\.h\[7\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_7_s16, svint16_t, -+ z0 = svdup_lane_s16 (z0, 7), -+ z0 = svdup_lane (z0, 7)) -+ -+/* -+** dup_lane_8_s16: -+** dup z0\.h, z0\.h\[8\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_8_s16, svint16_t, -+ z0 = svdup_lane_s16 (z0, 8), -+ z0 = svdup_lane (z0, 8)) -+ -+/* -+** dup_lane_15_s16: -+** dup z0\.h, z0\.h\[15\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_15_s16, svint16_t, -+ z0 = svdup_lane_s16 (z0, 15), -+ z0 = svdup_lane (z0, 15)) -+ -+/* -+** dup_lane_16_s16: -+** dup z0\.h, z0\.h\[16\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_16_s16, svint16_t, -+ z0 = svdup_lane_s16 (z0, 16), -+ z0 = svdup_lane (z0, 16)) -+ -+/* -+** dup_lane_31_s16: -+** dup z0\.h, z0\.h\[31\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_31_s16, svint16_t, -+ z0 = svdup_lane_s16 (z0, 31), -+ z0 = svdup_lane (z0, 31)) -+ -+/* -+** dup_lane_32_s16: -+** mov (z[0-9]+\.h), #32 -+** tbl z0\.h, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_32_s16, svint16_t, -+ z0 = svdup_lane_s16 (z0, 32), -+ z0 = svdup_lane (z0, 32)) -+ -+/* -+** dup_lane_63_s16: -+** mov (z[0-9]+\.h), #63 -+** tbl z0\.h, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_63_s16, svint16_t, -+ z0 = svdup_lane_s16 (z0, 63), -+ z0 = svdup_lane (z0, 63)) -+ -+/* -+** dup_lane_64_s16: -+** mov (z[0-9]+\.h), #64 -+** tbl z0\.h, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_64_s16, svint16_t, -+ z0 = svdup_lane_s16 (z0, 64), -+ z0 = svdup_lane (z0, 64)) -+ -+/* -+** dup_lane_255_s16: -+** mov (z[0-9]+\.h), #255 -+** tbl z0\.h, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_255_s16, svint16_t, -+ z0 = svdup_lane_s16 (z0, 255), -+ z0 = svdup_lane (z0, 255)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_s32.c -new file mode 100644 -index 000000000..bf597fdf6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_s32.c -@@ -0,0 +1,128 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dup_lane_w0_s32_tied1: -+** mov (z[0-9]+\.s), w0 -+** tbl z0\.s, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_lane_w0_s32_tied1, svint32_t, uint32_t, -+ z0 = svdup_lane_s32 (z0, x0), -+ z0 = svdup_lane (z0, x0)) -+ -+/* -+** dup_lane_w0_s32_untied: -+** mov (z[0-9]+\.s), w0 -+** tbl z0\.s, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_lane_w0_s32_untied, svint32_t, uint32_t, -+ z0 = svdup_lane_s32 (z1, x0), -+ z0 = svdup_lane (z1, x0)) -+ -+/* -+** dup_lane_0_s32_tied1: -+** dup z0\.s, z0\.s\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_0_s32_tied1, svint32_t, -+ z0 = svdup_lane_s32 (z0, 0), -+ z0 = svdup_lane (z0, 0)) -+ -+/* -+** dup_lane_0_s32_untied: -+** dup z0\.s, z1\.s\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_0_s32_untied, svint32_t, -+ z0 = svdup_lane_s32 (z1, 0), -+ z0 = svdup_lane (z1, 0)) -+ -+/* -+** dup_lane_7_s32: -+** dup z0\.s, z0\.s\[7\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_7_s32, svint32_t, -+ z0 = svdup_lane_s32 (z0, 7), -+ z0 = svdup_lane (z0, 7)) -+ -+/* -+** dup_lane_8_s32: -+** dup z0\.s, z0\.s\[8\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_8_s32, svint32_t, -+ z0 = svdup_lane_s32 (z0, 8), -+ z0 = svdup_lane (z0, 8)) -+ -+/* -+** dup_lane_15_s32: -+** dup z0\.s, z0\.s\[15\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_15_s32, svint32_t, -+ z0 = svdup_lane_s32 (z0, 15), -+ z0 = svdup_lane (z0, 15)) -+ -+/* -+** dup_lane_16_s32: -+** mov (z[0-9]+\.s), #16 -+** tbl z0\.s, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_16_s32, svint32_t, -+ z0 = svdup_lane_s32 (z0, 16), -+ z0 = svdup_lane (z0, 16)) -+ -+/* -+** dup_lane_31_s32: -+** mov (z[0-9]+\.s), #31 -+** tbl z0\.s, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_31_s32, svint32_t, -+ z0 = svdup_lane_s32 (z0, 31), -+ z0 = svdup_lane (z0, 31)) -+ -+/* -+** dup_lane_32_s32: -+** mov (z[0-9]+\.s), #32 -+** tbl z0\.s, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_32_s32, svint32_t, -+ z0 = svdup_lane_s32 (z0, 32), -+ z0 = svdup_lane (z0, 32)) -+ -+/* -+** dup_lane_63_s32: -+** mov (z[0-9]+\.s), #63 -+** tbl z0\.s, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_63_s32, svint32_t, -+ z0 = svdup_lane_s32 (z0, 63), -+ z0 = svdup_lane (z0, 63)) -+ -+/* -+** dup_lane_64_s32: -+** mov (z[0-9]+\.s), #64 -+** tbl z0\.s, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_64_s32, svint32_t, -+ z0 = svdup_lane_s32 (z0, 64), -+ z0 = svdup_lane (z0, 64)) -+ -+/* -+** dup_lane_255_s32: -+** mov (z[0-9]+\.s), #255 -+** tbl z0\.s, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_255_s32, svint32_t, -+ z0 = svdup_lane_s32 (z0, 255), -+ z0 = svdup_lane (z0, 255)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_s64.c -new file mode 100644 -index 000000000..f2f3a1770 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_s64.c -@@ -0,0 +1,130 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dup_lane_x0_s64_tied1: -+** mov (z[0-9]+\.d), x0 -+** tbl z0\.d, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_lane_x0_s64_tied1, svint64_t, uint64_t, -+ z0 = svdup_lane_s64 (z0, x0), -+ z0 = svdup_lane (z0, x0)) -+ -+/* -+** dup_lane_x0_s64_untied: -+** mov (z[0-9]+\.d), x0 -+** tbl z0\.d, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_lane_x0_s64_untied, svint64_t, uint64_t, -+ z0 = svdup_lane_s64 (z1, x0), -+ z0 = svdup_lane (z1, x0)) -+ -+/* -+** dup_lane_0_s64_tied1: -+** dup z0\.d, z0\.d\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_0_s64_tied1, svint64_t, -+ z0 = svdup_lane_s64 (z0, 0), -+ z0 = svdup_lane (z0, 0)) -+ -+/* -+** dup_lane_0_s64_untied: -+** dup z0\.d, z1\.d\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_0_s64_untied, svint64_t, -+ z0 = svdup_lane_s64 (z1, 0), -+ z0 = svdup_lane (z1, 0)) -+ -+/* -+** dup_lane_7_s64: -+** dup z0\.d, z0\.d\[7\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_7_s64, svint64_t, -+ z0 = svdup_lane_s64 (z0, 7), -+ z0 = svdup_lane (z0, 7)) -+ -+/* -+** dup_lane_8_s64: -+** mov (z[0-9]+\.d), #8 -+** tbl z0\.d, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_8_s64, svint64_t, -+ z0 = svdup_lane_s64 (z0, 8), -+ z0 = svdup_lane (z0, 8)) -+ -+/* -+** dup_lane_15_s64: -+** mov (z[0-9]+\.d), #15 -+** tbl z0\.d, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_15_s64, svint64_t, -+ z0 = svdup_lane_s64 (z0, 15), -+ z0 = svdup_lane (z0, 15)) -+ -+/* -+** dup_lane_16_s64: -+** mov (z[0-9]+\.d), #16 -+** tbl z0\.d, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_16_s64, svint64_t, -+ z0 = svdup_lane_s64 (z0, 16), -+ z0 = svdup_lane (z0, 16)) -+ -+/* -+** dup_lane_31_s64: -+** mov (z[0-9]+\.d), #31 -+** tbl z0\.d, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_31_s64, svint64_t, -+ z0 = svdup_lane_s64 (z0, 31), -+ z0 = svdup_lane (z0, 31)) -+ -+/* -+** dup_lane_32_s64: -+** mov (z[0-9]+\.d), #32 -+** tbl z0\.d, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_32_s64, svint64_t, -+ z0 = svdup_lane_s64 (z0, 32), -+ z0 = svdup_lane (z0, 32)) -+ -+/* -+** dup_lane_63_s64: -+** mov (z[0-9]+\.d), #63 -+** tbl z0\.d, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_63_s64, svint64_t, -+ z0 = svdup_lane_s64 (z0, 63), -+ z0 = svdup_lane (z0, 63)) -+ -+/* -+** dup_lane_64_s64: -+** mov (z[0-9]+\.d), #64 -+** tbl z0\.d, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_64_s64, svint64_t, -+ z0 = svdup_lane_s64 (z0, 64), -+ z0 = svdup_lane (z0, 64)) -+ -+/* -+** dup_lane_255_s64: -+** mov (z[0-9]+\.d), #255 -+** tbl z0\.d, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_255_s64, svint64_t, -+ z0 = svdup_lane_s64 (z0, 255), -+ z0 = svdup_lane (z0, 255)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_s8.c -new file mode 100644 -index 000000000..f5a07e9f3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_s8.c -@@ -0,0 +1,124 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dup_lane_w0_s8_tied1: -+** mov (z[0-9]+\.b), w0 -+** tbl z0\.b, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_lane_w0_s8_tied1, svint8_t, uint8_t, -+ z0 = svdup_lane_s8 (z0, x0), -+ z0 = svdup_lane (z0, x0)) -+ -+/* -+** dup_lane_w0_s8_untied: -+** mov (z[0-9]+\.b), w0 -+** tbl z0\.b, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_lane_w0_s8_untied, svint8_t, uint8_t, -+ z0 = svdup_lane_s8 (z1, x0), -+ z0 = svdup_lane (z1, x0)) -+ -+/* -+** dup_lane_0_s8_tied1: -+** dup z0\.b, z0\.b\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_0_s8_tied1, svint8_t, -+ z0 = svdup_lane_s8 (z0, 0), -+ z0 = svdup_lane (z0, 0)) -+ -+/* -+** dup_lane_0_s8_untied: -+** dup z0\.b, z1\.b\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_0_s8_untied, svint8_t, -+ z0 = svdup_lane_s8 (z1, 0), -+ z0 = svdup_lane (z1, 0)) -+ -+/* -+** dup_lane_7_s8: -+** dup z0\.b, z0\.b\[7\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_7_s8, svint8_t, -+ z0 = svdup_lane_s8 (z0, 7), -+ z0 = svdup_lane (z0, 7)) -+ -+/* -+** dup_lane_8_s8: -+** dup z0\.b, z0\.b\[8\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_8_s8, svint8_t, -+ z0 = svdup_lane_s8 (z0, 8), -+ z0 = svdup_lane (z0, 8)) -+ -+/* -+** dup_lane_15_s8: -+** dup z0\.b, z0\.b\[15\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_15_s8, svint8_t, -+ z0 = svdup_lane_s8 (z0, 15), -+ z0 = svdup_lane (z0, 15)) -+ -+/* -+** dup_lane_16_s8: -+** dup z0\.b, z0\.b\[16\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_16_s8, svint8_t, -+ z0 = svdup_lane_s8 (z0, 16), -+ z0 = svdup_lane (z0, 16)) -+ -+/* -+** dup_lane_31_s8: -+** dup z0\.b, z0\.b\[31\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_31_s8, svint8_t, -+ z0 = svdup_lane_s8 (z0, 31), -+ z0 = svdup_lane (z0, 31)) -+ -+/* -+** dup_lane_32_s8: -+** dup z0\.b, z0\.b\[32\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_32_s8, svint8_t, -+ z0 = svdup_lane_s8 (z0, 32), -+ z0 = svdup_lane (z0, 32)) -+ -+/* -+** dup_lane_63_s8: -+** dup z0\.b, z0\.b\[63\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_63_s8, svint8_t, -+ z0 = svdup_lane_s8 (z0, 63), -+ z0 = svdup_lane (z0, 63)) -+ -+/* -+** dup_lane_64_s8: -+** mov (z[0-9]+\.b), #64 -+** tbl z0\.b, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_64_s8, svint8_t, -+ z0 = svdup_lane_s8 (z0, 64), -+ z0 = svdup_lane (z0, 64)) -+ -+/* -+** dup_lane_255_s8: -+** mov (z[0-9]+\.b), #-1 -+** tbl z0\.b, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_255_s8, svint8_t, -+ z0 = svdup_lane_s8 (z0, 255), -+ z0 = svdup_lane (z0, 255)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_u16.c -new file mode 100644 -index 000000000..e5135caa5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_u16.c -@@ -0,0 +1,126 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dup_lane_w0_u16_tied1: -+** mov (z[0-9]+\.h), w0 -+** tbl z0\.h, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_lane_w0_u16_tied1, svuint16_t, uint16_t, -+ z0 = svdup_lane_u16 (z0, x0), -+ z0 = svdup_lane (z0, x0)) -+ -+/* -+** dup_lane_w0_u16_untied: -+** mov (z[0-9]+\.h), w0 -+** tbl z0\.h, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_lane_w0_u16_untied, svuint16_t, uint16_t, -+ z0 = svdup_lane_u16 (z1, x0), -+ z0 = svdup_lane (z1, x0)) -+ -+/* -+** dup_lane_0_u16_tied1: -+** dup z0\.h, z0\.h\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_0_u16_tied1, svuint16_t, -+ z0 = svdup_lane_u16 (z0, 0), -+ z0 = svdup_lane (z0, 0)) -+ -+/* -+** dup_lane_0_u16_untied: -+** dup z0\.h, z1\.h\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_0_u16_untied, svuint16_t, -+ z0 = svdup_lane_u16 (z1, 0), -+ z0 = svdup_lane (z1, 0)) -+ -+/* -+** dup_lane_7_u16: -+** dup z0\.h, z0\.h\[7\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_7_u16, svuint16_t, -+ z0 = svdup_lane_u16 (z0, 7), -+ z0 = svdup_lane (z0, 7)) -+ -+/* -+** dup_lane_8_u16: -+** dup z0\.h, z0\.h\[8\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_8_u16, svuint16_t, -+ z0 = svdup_lane_u16 (z0, 8), -+ z0 = svdup_lane (z0, 8)) -+ -+/* -+** dup_lane_15_u16: -+** dup z0\.h, z0\.h\[15\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_15_u16, svuint16_t, -+ z0 = svdup_lane_u16 (z0, 15), -+ z0 = svdup_lane (z0, 15)) -+ -+/* -+** dup_lane_16_u16: -+** dup z0\.h, z0\.h\[16\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_16_u16, svuint16_t, -+ z0 = svdup_lane_u16 (z0, 16), -+ z0 = svdup_lane (z0, 16)) -+ -+/* -+** dup_lane_31_u16: -+** dup z0\.h, z0\.h\[31\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_31_u16, svuint16_t, -+ z0 = svdup_lane_u16 (z0, 31), -+ z0 = svdup_lane (z0, 31)) -+ -+/* -+** dup_lane_32_u16: -+** mov (z[0-9]+\.h), #32 -+** tbl z0\.h, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_32_u16, svuint16_t, -+ z0 = svdup_lane_u16 (z0, 32), -+ z0 = svdup_lane (z0, 32)) -+ -+/* -+** dup_lane_63_u16: -+** mov (z[0-9]+\.h), #63 -+** tbl z0\.h, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_63_u16, svuint16_t, -+ z0 = svdup_lane_u16 (z0, 63), -+ z0 = svdup_lane (z0, 63)) -+ -+/* -+** dup_lane_64_u16: -+** mov (z[0-9]+\.h), #64 -+** tbl z0\.h, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_64_u16, svuint16_t, -+ z0 = svdup_lane_u16 (z0, 64), -+ z0 = svdup_lane (z0, 64)) -+ -+/* -+** dup_lane_255_u16: -+** mov (z[0-9]+\.h), #255 -+** tbl z0\.h, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_255_u16, svuint16_t, -+ z0 = svdup_lane_u16 (z0, 255), -+ z0 = svdup_lane (z0, 255)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_u32.c -new file mode 100644 -index 000000000..7e972aca7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_u32.c -@@ -0,0 +1,128 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dup_lane_w0_u32_tied1: -+** mov (z[0-9]+\.s), w0 -+** tbl z0\.s, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_lane_w0_u32_tied1, svuint32_t, uint32_t, -+ z0 = svdup_lane_u32 (z0, x0), -+ z0 = svdup_lane (z0, x0)) -+ -+/* -+** dup_lane_w0_u32_untied: -+** mov (z[0-9]+\.s), w0 -+** tbl z0\.s, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_lane_w0_u32_untied, svuint32_t, uint32_t, -+ z0 = svdup_lane_u32 (z1, x0), -+ z0 = svdup_lane (z1, x0)) -+ -+/* -+** dup_lane_0_u32_tied1: -+** dup z0\.s, z0\.s\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_0_u32_tied1, svuint32_t, -+ z0 = svdup_lane_u32 (z0, 0), -+ z0 = svdup_lane (z0, 0)) -+ -+/* -+** dup_lane_0_u32_untied: -+** dup z0\.s, z1\.s\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_0_u32_untied, svuint32_t, -+ z0 = svdup_lane_u32 (z1, 0), -+ z0 = svdup_lane (z1, 0)) -+ -+/* -+** dup_lane_7_u32: -+** dup z0\.s, z0\.s\[7\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_7_u32, svuint32_t, -+ z0 = svdup_lane_u32 (z0, 7), -+ z0 = svdup_lane (z0, 7)) -+ -+/* -+** dup_lane_8_u32: -+** dup z0\.s, z0\.s\[8\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_8_u32, svuint32_t, -+ z0 = svdup_lane_u32 (z0, 8), -+ z0 = svdup_lane (z0, 8)) -+ -+/* -+** dup_lane_15_u32: -+** dup z0\.s, z0\.s\[15\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_15_u32, svuint32_t, -+ z0 = svdup_lane_u32 (z0, 15), -+ z0 = svdup_lane (z0, 15)) -+ -+/* -+** dup_lane_16_u32: -+** mov (z[0-9]+\.s), #16 -+** tbl z0\.s, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_16_u32, svuint32_t, -+ z0 = svdup_lane_u32 (z0, 16), -+ z0 = svdup_lane (z0, 16)) -+ -+/* -+** dup_lane_31_u32: -+** mov (z[0-9]+\.s), #31 -+** tbl z0\.s, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_31_u32, svuint32_t, -+ z0 = svdup_lane_u32 (z0, 31), -+ z0 = svdup_lane (z0, 31)) -+ -+/* -+** dup_lane_32_u32: -+** mov (z[0-9]+\.s), #32 -+** tbl z0\.s, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_32_u32, svuint32_t, -+ z0 = svdup_lane_u32 (z0, 32), -+ z0 = svdup_lane (z0, 32)) -+ -+/* -+** dup_lane_63_u32: -+** mov (z[0-9]+\.s), #63 -+** tbl z0\.s, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_63_u32, svuint32_t, -+ z0 = svdup_lane_u32 (z0, 63), -+ z0 = svdup_lane (z0, 63)) -+ -+/* -+** dup_lane_64_u32: -+** mov (z[0-9]+\.s), #64 -+** tbl z0\.s, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_64_u32, svuint32_t, -+ z0 = svdup_lane_u32 (z0, 64), -+ z0 = svdup_lane (z0, 64)) -+ -+/* -+** dup_lane_255_u32: -+** mov (z[0-9]+\.s), #255 -+** tbl z0\.s, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_255_u32, svuint32_t, -+ z0 = svdup_lane_u32 (z0, 255), -+ z0 = svdup_lane (z0, 255)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_u64.c -new file mode 100644 -index 000000000..5097b7e96 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_u64.c -@@ -0,0 +1,130 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dup_lane_x0_u64_tied1: -+** mov (z[0-9]+\.d), x0 -+** tbl z0\.d, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_lane_x0_u64_tied1, svuint64_t, uint64_t, -+ z0 = svdup_lane_u64 (z0, x0), -+ z0 = svdup_lane (z0, x0)) -+ -+/* -+** dup_lane_x0_u64_untied: -+** mov (z[0-9]+\.d), x0 -+** tbl z0\.d, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_lane_x0_u64_untied, svuint64_t, uint64_t, -+ z0 = svdup_lane_u64 (z1, x0), -+ z0 = svdup_lane (z1, x0)) -+ -+/* -+** dup_lane_0_u64_tied1: -+** dup z0\.d, z0\.d\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_0_u64_tied1, svuint64_t, -+ z0 = svdup_lane_u64 (z0, 0), -+ z0 = svdup_lane (z0, 0)) -+ -+/* -+** dup_lane_0_u64_untied: -+** dup z0\.d, z1\.d\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_0_u64_untied, svuint64_t, -+ z0 = svdup_lane_u64 (z1, 0), -+ z0 = svdup_lane (z1, 0)) -+ -+/* -+** dup_lane_7_u64: -+** dup z0\.d, z0\.d\[7\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_7_u64, svuint64_t, -+ z0 = svdup_lane_u64 (z0, 7), -+ z0 = svdup_lane (z0, 7)) -+ -+/* -+** dup_lane_8_u64: -+** mov (z[0-9]+\.d), #8 -+** tbl z0\.d, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_8_u64, svuint64_t, -+ z0 = svdup_lane_u64 (z0, 8), -+ z0 = svdup_lane (z0, 8)) -+ -+/* -+** dup_lane_15_u64: -+** mov (z[0-9]+\.d), #15 -+** tbl z0\.d, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_15_u64, svuint64_t, -+ z0 = svdup_lane_u64 (z0, 15), -+ z0 = svdup_lane (z0, 15)) -+ -+/* -+** dup_lane_16_u64: -+** mov (z[0-9]+\.d), #16 -+** tbl z0\.d, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_16_u64, svuint64_t, -+ z0 = svdup_lane_u64 (z0, 16), -+ z0 = svdup_lane (z0, 16)) -+ -+/* -+** dup_lane_31_u64: -+** mov (z[0-9]+\.d), #31 -+** tbl z0\.d, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_31_u64, svuint64_t, -+ z0 = svdup_lane_u64 (z0, 31), -+ z0 = svdup_lane (z0, 31)) -+ -+/* -+** dup_lane_32_u64: -+** mov (z[0-9]+\.d), #32 -+** tbl z0\.d, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_32_u64, svuint64_t, -+ z0 = svdup_lane_u64 (z0, 32), -+ z0 = svdup_lane (z0, 32)) -+ -+/* -+** dup_lane_63_u64: -+** mov (z[0-9]+\.d), #63 -+** tbl z0\.d, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_63_u64, svuint64_t, -+ z0 = svdup_lane_u64 (z0, 63), -+ z0 = svdup_lane (z0, 63)) -+ -+/* -+** dup_lane_64_u64: -+** mov (z[0-9]+\.d), #64 -+** tbl z0\.d, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_64_u64, svuint64_t, -+ z0 = svdup_lane_u64 (z0, 64), -+ z0 = svdup_lane (z0, 64)) -+ -+/* -+** dup_lane_255_u64: -+** mov (z[0-9]+\.d), #255 -+** tbl z0\.d, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_255_u64, svuint64_t, -+ z0 = svdup_lane_u64 (z0, 255), -+ z0 = svdup_lane (z0, 255)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_u8.c -new file mode 100644 -index 000000000..25fdf0acb ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_lane_u8.c -@@ -0,0 +1,124 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dup_lane_w0_u8_tied1: -+** mov (z[0-9]+\.b), w0 -+** tbl z0\.b, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_lane_w0_u8_tied1, svuint8_t, uint8_t, -+ z0 = svdup_lane_u8 (z0, x0), -+ z0 = svdup_lane (z0, x0)) -+ -+/* -+** dup_lane_w0_u8_untied: -+** mov (z[0-9]+\.b), w0 -+** tbl z0\.b, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_lane_w0_u8_untied, svuint8_t, uint8_t, -+ z0 = svdup_lane_u8 (z1, x0), -+ z0 = svdup_lane (z1, x0)) -+ -+/* -+** dup_lane_0_u8_tied1: -+** dup z0\.b, z0\.b\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_0_u8_tied1, svuint8_t, -+ z0 = svdup_lane_u8 (z0, 0), -+ z0 = svdup_lane (z0, 0)) -+ -+/* -+** dup_lane_0_u8_untied: -+** dup z0\.b, z1\.b\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_0_u8_untied, svuint8_t, -+ z0 = svdup_lane_u8 (z1, 0), -+ z0 = svdup_lane (z1, 0)) -+ -+/* -+** dup_lane_7_u8: -+** dup z0\.b, z0\.b\[7\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_7_u8, svuint8_t, -+ z0 = svdup_lane_u8 (z0, 7), -+ z0 = svdup_lane (z0, 7)) -+ -+/* -+** dup_lane_8_u8: -+** dup z0\.b, z0\.b\[8\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_8_u8, svuint8_t, -+ z0 = svdup_lane_u8 (z0, 8), -+ z0 = svdup_lane (z0, 8)) -+ -+/* -+** dup_lane_15_u8: -+** dup z0\.b, z0\.b\[15\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_15_u8, svuint8_t, -+ z0 = svdup_lane_u8 (z0, 15), -+ z0 = svdup_lane (z0, 15)) -+ -+/* -+** dup_lane_16_u8: -+** dup z0\.b, z0\.b\[16\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_16_u8, svuint8_t, -+ z0 = svdup_lane_u8 (z0, 16), -+ z0 = svdup_lane (z0, 16)) -+ -+/* -+** dup_lane_31_u8: -+** dup z0\.b, z0\.b\[31\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_31_u8, svuint8_t, -+ z0 = svdup_lane_u8 (z0, 31), -+ z0 = svdup_lane (z0, 31)) -+ -+/* -+** dup_lane_32_u8: -+** dup z0\.b, z0\.b\[32\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_32_u8, svuint8_t, -+ z0 = svdup_lane_u8 (z0, 32), -+ z0 = svdup_lane (z0, 32)) -+ -+/* -+** dup_lane_63_u8: -+** dup z0\.b, z0\.b\[63\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_63_u8, svuint8_t, -+ z0 = svdup_lane_u8 (z0, 63), -+ z0 = svdup_lane (z0, 63)) -+ -+/* -+** dup_lane_64_u8: -+** mov (z[0-9]+\.b), #64 -+** tbl z0\.b, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_64_u8, svuint8_t, -+ z0 = svdup_lane_u8 (z0, 64), -+ z0 = svdup_lane (z0, 64)) -+ -+/* -+** dup_lane_255_u8: -+** mov (z[0-9]+\.b), #-1 -+** tbl z0\.b, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_lane_255_u8, svuint8_t, -+ z0 = svdup_lane_u8 (z0, 255), -+ z0 = svdup_lane (z0, 255)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s16.c -new file mode 100644 -index 000000000..876f36db7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s16.c -@@ -0,0 +1,1193 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dup_1_s16: -+** mov z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_1_s16, svint16_t, -+ z0 = svdup_n_s16 (1), -+ z0 = svdup_s16 (1)) -+ -+/* -+** dup_127_s16: -+** mov z0\.h, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_127_s16, svint16_t, -+ z0 = svdup_n_s16 (127), -+ z0 = svdup_s16 (127)) -+ -+/* -+** dup_128_s16: -+** mov z0\.h, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_128_s16, svint16_t, -+ z0 = svdup_n_s16 (128), -+ z0 = svdup_s16 (128)) -+ -+/* -+** dup_129_s16: -+** movi v([0-9]+)\.8h, 0x81 -+** dup z0\.q, z\1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_129_s16, svint16_t, -+ z0 = svdup_n_s16 (129), -+ z0 = svdup_s16 (129)) -+ -+/* -+** dup_253_s16: -+** movi v([0-9]+)\.8h, 0xfd -+** dup z0\.q, z\1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_253_s16, svint16_t, -+ z0 = svdup_n_s16 (253), -+ z0 = svdup_s16 (253)) -+ -+/* -+** dup_254_s16: -+** mov z0\.h, #254 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_254_s16, svint16_t, -+ z0 = svdup_n_s16 (254), -+ z0 = svdup_s16 (254)) -+ -+/* -+** dup_255_s16: -+** mov z0\.h, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_255_s16, svint16_t, -+ z0 = svdup_n_s16 (255), -+ z0 = svdup_s16 (255)) -+ -+/* -+** dup_256_s16: -+** mov z0\.h, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_256_s16, svint16_t, -+ z0 = svdup_n_s16 (256), -+ z0 = svdup_s16 (256)) -+ -+/* -+** dup_257_s16: -+** mov z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_257_s16, svint16_t, -+ z0 = svdup_n_s16 (257), -+ z0 = svdup_s16 (257)) -+ -+/* -+** dup_512_s16: -+** mov z0\.h, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_512_s16, svint16_t, -+ z0 = svdup_n_s16 (512), -+ z0 = svdup_s16 (512)) -+ -+/* -+** dup_7f00_s16: -+** mov z0\.h, #32512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7f00_s16, svint16_t, -+ z0 = svdup_n_s16 (0x7f00), -+ z0 = svdup_s16 (0x7f00)) -+ -+/* -+** dup_7f01_s16: -+** mov (w[0-9]+), 32513 -+** mov z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7f01_s16, svint16_t, -+ z0 = svdup_n_s16 (0x7f01), -+ z0 = svdup_s16 (0x7f01)) -+ -+/* -+** dup_7ffd_s16: -+** mov (w[0-9]+), 32765 -+** mov z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7ffd_s16, svint16_t, -+ z0 = svdup_n_s16 (0x7ffd), -+ z0 = svdup_s16 (0x7ffd)) -+ -+/* -+** dup_7ffe_s16: -+** mov z0\.h, #32766 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7ffe_s16, svint16_t, -+ z0 = svdup_n_s16 (0x7ffe), -+ z0 = svdup_s16 (0x7ffe)) -+ -+/* -+** dup_7fff_s16: -+** mov z0\.h, #32767 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7fff_s16, svint16_t, -+ z0 = svdup_n_s16 (0x7fff), -+ z0 = svdup_s16 (0x7fff)) -+ -+/* -+** dup_m1_s16: -+** mov z0\.b, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m1_s16, svint16_t, -+ z0 = svdup_n_s16 (-1), -+ z0 = svdup_s16 (-1)) -+ -+/* -+** dup_m128_s16: -+** mov z0\.h, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m128_s16, svint16_t, -+ z0 = svdup_n_s16 (-128), -+ z0 = svdup_s16 (-128)) -+ -+/* -+** dup_m129_s16: -+** mov z0\.h, #-129 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m129_s16, svint16_t, -+ z0 = svdup_n_s16 (-129), -+ z0 = svdup_s16 (-129)) -+ -+/* -+** dup_m130_s16: -+** mvni v([0-9]+)\.8h, 0x81 -+** dup z0\.q, z\1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m130_s16, svint16_t, -+ z0 = svdup_n_s16 (-130), -+ z0 = svdup_s16 (-130)) -+ -+/* -+** dup_m254_s16: -+** mvni v([0-9]+)\.8h, 0xfd -+** dup z0\.q, z\1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m254_s16, svint16_t, -+ z0 = svdup_n_s16 (-254), -+ z0 = svdup_s16 (-254)) -+ -+/* -+** dup_m255_s16: -+** mov z0\.h, #-255 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m255_s16, svint16_t, -+ z0 = svdup_n_s16 (-255), -+ z0 = svdup_s16 (-255)) -+ -+/* -+** dup_m256_s16: -+** mov z0\.h, #-256 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m256_s16, svint16_t, -+ z0 = svdup_n_s16 (-256), -+ z0 = svdup_s16 (-256)) -+ -+/* -+** dup_m257_s16: -+** mov z0\.h, #-257 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m257_s16, svint16_t, -+ z0 = svdup_n_s16 (-257), -+ z0 = svdup_s16 (-257)) -+ -+/* -+** dup_m258_s16: -+** mov z0\.b, #-2 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m258_s16, svint16_t, -+ z0 = svdup_n_s16 (-258), -+ z0 = svdup_s16 (-258)) -+ -+/* -+** dup_m259_s16: -+** mov (w[0-9]+), -259 -+** mov z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m259_s16, svint16_t, -+ z0 = svdup_n_s16 (-259), -+ z0 = svdup_s16 (-259)) -+ -+/* -+** dup_m512_s16: -+** mov z0\.h, #-512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m512_s16, svint16_t, -+ z0 = svdup_n_s16 (-512), -+ z0 = svdup_s16 (-512)) -+ -+/* -+** dup_m7f00_s16: -+** mov z0\.h, #-32512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f00_s16, svint16_t, -+ z0 = svdup_n_s16 (-0x7f00), -+ z0 = svdup_s16 (-0x7f00)) -+ -+/* -+** dup_m7f01_s16: -+** mov z0\.h, #-32513 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f01_s16, svint16_t, -+ z0 = svdup_n_s16 (-0x7f01), -+ z0 = svdup_s16 (-0x7f01)) -+ -+/* -+** dup_m7f02_s16: -+** mov (w[0-9]+), -32514 -+** mov z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f02_s16, svint16_t, -+ z0 = svdup_n_s16 (-0x7f02), -+ z0 = svdup_s16 (-0x7f02)) -+ -+/* -+** dup_m7ffe_s16: -+** mov (w[0-9]+), -32766 -+** mov z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7ffe_s16, svint16_t, -+ z0 = svdup_n_s16 (-0x7ffe), -+ z0 = svdup_s16 (-0x7ffe)) -+ -+/* -+** dup_m7fff_s16: -+** mov z0\.h, #-32767 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7fff_s16, svint16_t, -+ z0 = svdup_n_s16 (-0x7fff), -+ z0 = svdup_s16 (-0x7fff)) -+ -+/* -+** dup_m8000_s16: -+** mov z0\.h, #-32768 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m8000_s16, svint16_t, -+ z0 = svdup_n_s16 (-0x8000), -+ z0 = svdup_s16 (-0x8000)) -+ -+/* -+** dup_w0_s16: -+** mov z0\.h, w0 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_w0_s16, svint16_t, int16_t, -+ z0 = svdup_n_s16 (x0), -+ z0 = svdup_s16 (x0)) -+ -+/* -+** dup_1_s16_m: -+** mov z0\.h, p0/m, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_1_s16_m, svint16_t, -+ z0 = svdup_n_s16_m (z0, p0, 1), -+ z0 = svdup_s16_m (z0, p0, 1)) -+ -+/* -+** dup_127_s16_m: -+** mov z0\.h, p0/m, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_127_s16_m, svint16_t, -+ z0 = svdup_n_s16_m (z0, p0, 127), -+ z0 = svdup_s16_m (z0, p0, 127)) -+ -+/* -+** dup_128_s16_m: -+** mov (z[0-9]+\.h), #128 -+** sel z0\.h, p0, \1, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (dup_128_s16_m, svint16_t, -+ z0 = svdup_n_s16_m (z0, p0, 128), -+ z0 = svdup_s16_m (z0, p0, 128)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_129_s16_m, svint16_t, -+ z0 = svdup_n_s16_m (z0, p0, 129), -+ z0 = svdup_s16_m (z0, p0, 129)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_253_s16_m, svint16_t, -+ z0 = svdup_n_s16_m (z0, p0, 253), -+ z0 = svdup_s16_m (z0, p0, 253)) -+ -+/* -+** dup_254_s16_m: -+** mov (z[0-9]+\.h), #254 -+** sel z0\.h, p0, \1, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (dup_254_s16_m, svint16_t, -+ z0 = svdup_n_s16_m (z0, p0, 254), -+ z0 = svdup_s16_m (z0, p0, 254)) -+ -+/* -+** dup_255_s16_m: -+** mov (z[0-9]+\.h), #255 -+** sel z0\.h, p0, \1, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (dup_255_s16_m, svint16_t, -+ z0 = svdup_n_s16_m (z0, p0, 255), -+ z0 = svdup_s16_m (z0, p0, 255)) -+ -+/* -+** dup_256_s16_m: -+** mov z0\.h, p0/m, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_256_s16_m, svint16_t, -+ z0 = svdup_n_s16_m (z0, p0, 256), -+ z0 = svdup_s16_m (z0, p0, 256)) -+ -+/* -+** dup_257_s16_m: -+** mov (z[0-9]+)\.b, #1 -+** sel z0\.h, p0, \1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (dup_257_s16_m, svint16_t, -+ z0 = svdup_n_s16_m (z0, p0, 257), -+ z0 = svdup_s16_m (z0, p0, 257)) -+ -+/* -+** dup_512_s16_m: -+** mov z0\.h, p0/m, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_512_s16_m, svint16_t, -+ z0 = svdup_n_s16_m (z0, p0, 512), -+ z0 = svdup_s16_m (z0, p0, 512)) -+ -+/* -+** dup_7f00_s16_m: -+** mov z0\.h, p0/m, #32512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7f00_s16_m, svint16_t, -+ z0 = svdup_n_s16_m (z0, p0, 0x7f00), -+ z0 = svdup_s16_m (z0, p0, 0x7f00)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_7f01_s16_m, svint16_t, -+ z0 = svdup_n_s16_m (z0, p0, 0x7f01), -+ z0 = svdup_s16_m (z0, p0, 0x7f01)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_7ffd_s16_m, svint16_t, -+ z0 = svdup_n_s16_m (z0, p0, 0x7ffd), -+ z0 = svdup_s16_m (z0, p0, 0x7ffd)) -+ -+/* -+** dup_7ffe_s16_m: -+** mov (z[0-9]+\.h), #32766 -+** sel z0\.h, p0, \1, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7ffe_s16_m, svint16_t, -+ z0 = svdup_n_s16_m (z0, p0, 0x7ffe), -+ z0 = svdup_s16_m (z0, p0, 0x7ffe)) -+ -+/* -+** dup_7fff_s16_m: -+** mov (z[0-9]+\.h), #32767 -+** sel z0\.h, p0, \1, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7fff_s16_m, svint16_t, -+ z0 = svdup_n_s16_m (z0, p0, 0x7fff), -+ z0 = svdup_s16_m (z0, p0, 0x7fff)) -+ -+/* -+** dup_m1_s16_m: -+** mov z0\.h, p0/m, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m1_s16_m, svint16_t, -+ z0 = svdup_n_s16_m (z0, p0, -1), -+ z0 = svdup_s16_m (z0, p0, -1)) -+ -+/* -+** dup_m128_s16_m: -+** mov z0\.h, p0/m, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m128_s16_m, svint16_t, -+ z0 = svdup_n_s16_m (z0, p0, -128), -+ z0 = svdup_s16_m (z0, p0, -128)) -+ -+/* -+** dup_m129_s16_m: -+** mov (z[0-9]+\.h), #-129 -+** sel z0\.h, p0, \1, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m129_s16_m, svint16_t, -+ z0 = svdup_n_s16_m (z0, p0, -129), -+ z0 = svdup_s16_m (z0, p0, -129)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m130_s16_m, svint16_t, -+ z0 = svdup_n_s16_m (z0, p0, -130), -+ z0 = svdup_s16_m (z0, p0, -130)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m254_s16_m, svint16_t, -+ z0 = svdup_n_s16_m (z0, p0, -254), -+ z0 = svdup_s16_m (z0, p0, -254)) -+ -+/* -+** dup_m255_s16_m: -+** mov (z[0-9]+\.h), #-255 -+** sel z0\.h, p0, \1, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m255_s16_m, svint16_t, -+ z0 = svdup_n_s16_m (z0, p0, -255), -+ z0 = svdup_s16_m (z0, p0, -255)) -+ -+/* -+** dup_m256_s16_m: -+** mov z0\.h, p0/m, #-256 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m256_s16_m, svint16_t, -+ z0 = svdup_n_s16_m (z0, p0, -256), -+ z0 = svdup_s16_m (z0, p0, -256)) -+ -+/* -+** dup_m257_s16_m: -+** mov (z[0-9]+\.h), #-257 -+** sel z0\.h, p0, \1, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m257_s16_m, svint16_t, -+ z0 = svdup_n_s16_m (z0, p0, -257), -+ z0 = svdup_s16_m (z0, p0, -257)) -+ -+/* -+** dup_m258_s16_m: -+** mov (z[0-9]+)\.b, #-2 -+** sel z0\.h, p0, \1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m258_s16_m, svint16_t, -+ z0 = svdup_n_s16_m (z0, p0, -258), -+ z0 = svdup_s16_m (z0, p0, -258)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m259_s16_m, svint16_t, -+ z0 = svdup_n_s16_m (z0, p0, -259), -+ z0 = svdup_s16_m (z0, p0, -259)) -+ -+/* -+** dup_m512_s16_m: -+** mov z0\.h, p0/m, #-512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m512_s16_m, svint16_t, -+ z0 = svdup_n_s16_m (z0, p0, -512), -+ z0 = svdup_s16_m (z0, p0, -512)) -+ -+/* -+** dup_m7f00_s16_m: -+** mov z0\.h, p0/m, #-32512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f00_s16_m, svint16_t, -+ z0 = svdup_n_s16_m (z0, p0, -0x7f00), -+ z0 = svdup_s16_m (z0, p0, -0x7f00)) -+ -+/* -+** dup_m7f01_s16_m: -+** mov (z[0-9]+\.h), #-32513 -+** sel z0\.h, p0, \1, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f01_s16_m, svint16_t, -+ z0 = svdup_n_s16_m (z0, p0, -0x7f01), -+ z0 = svdup_s16_m (z0, p0, -0x7f01)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m7f02_s16_m, svint16_t, -+ z0 = svdup_n_s16_m (z0, p0, -0x7f02), -+ z0 = svdup_s16_m (z0, p0, -0x7f02)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m7ffe_s16_m, svint16_t, -+ z0 = svdup_n_s16_m (z0, p0, -0x7ffe), -+ z0 = svdup_s16_m (z0, p0, -0x7ffe)) -+ -+/* -+** dup_m7fff_s16_m: -+** mov (z[0-9]+\.h), #-32767 -+** sel z0\.h, p0, \1, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7fff_s16_m, svint16_t, -+ z0 = svdup_n_s16_m (z0, p0, -0x7fff), -+ z0 = svdup_s16_m (z0, p0, -0x7fff)) -+ -+/* -+** dup_m8000_s16_m: -+** mov z0\.h, p0/m, #-32768 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m8000_s16_m, svint16_t, -+ z0 = svdup_n_s16_m (z0, p0, -0x8000), -+ z0 = svdup_s16_m (z0, p0, -0x8000)) -+ -+/* -+** dup_0_s16_m: -+** mov z0\.h, p0/m, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_0_s16_m, svint16_t, -+ z0 = svdup_n_s16_m (z0, p0, 0), -+ z0 = svdup_s16_m (z0, p0, 0)) -+ -+/* -+** dup_w0_s16_m: -+** movprfx z0, z1 -+** mov z0\.h, p0/m, w0 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_w0_s16_m, svint16_t, int16_t, -+ z0 = svdup_n_s16_m (z1, p0, x0), -+ z0 = svdup_s16_m (z1, p0, x0)) -+ -+/* -+** dup_1_s16_z: -+** mov z0\.h, p0/z, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_1_s16_z, svint16_t, -+ z0 = svdup_n_s16_z (p0, 1), -+ z0 = svdup_s16_z (p0, 1)) -+ -+/* -+** dup_127_s16_z: -+** mov z0\.h, p0/z, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_127_s16_z, svint16_t, -+ z0 = svdup_n_s16_z (p0, 127), -+ z0 = svdup_s16_z (p0, 127)) -+ -+/* -+** dup_128_s16_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.h), #128 -+** sel z0\.h, p0, \2, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (dup_128_s16_z, svint16_t, -+ z0 = svdup_n_s16_z (p0, 128), -+ z0 = svdup_s16_z (p0, 128)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_129_s16_z, svint16_t, -+ z0 = svdup_n_s16_z (p0, 129), -+ z0 = svdup_s16_z (p0, 129)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_253_s16_z, svint16_t, -+ z0 = svdup_n_s16_z (p0, 253), -+ z0 = svdup_s16_z (p0, 253)) -+ -+/* -+** dup_254_s16_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.h), #254 -+** sel z0\.h, p0, \2, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (dup_254_s16_z, svint16_t, -+ z0 = svdup_n_s16_z (p0, 254), -+ z0 = svdup_s16_z (p0, 254)) -+ -+/* -+** dup_255_s16_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.h), #255 -+** sel z0\.h, p0, \2, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (dup_255_s16_z, svint16_t, -+ z0 = svdup_n_s16_z (p0, 255), -+ z0 = svdup_s16_z (p0, 255)) -+ -+/* -+** dup_256_s16_z: -+** mov z0\.h, p0/z, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_256_s16_z, svint16_t, -+ z0 = svdup_n_s16_z (p0, 256), -+ z0 = svdup_s16_z (p0, 256)) -+ -+/* -+** dup_257_s16_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+)\.b, #1 -+** sel z0\.h, p0, \2\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (dup_257_s16_z, svint16_t, -+ z0 = svdup_n_s16_z (p0, 257), -+ z0 = svdup_s16_z (p0, 257)) -+ -+/* -+** dup_512_s16_z: -+** mov z0\.h, p0/z, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_512_s16_z, svint16_t, -+ z0 = svdup_n_s16_z (p0, 512), -+ z0 = svdup_s16_z (p0, 512)) -+ -+/* -+** dup_7f00_s16_z: -+** mov z0\.h, p0/z, #32512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7f00_s16_z, svint16_t, -+ z0 = svdup_n_s16_z (p0, 0x7f00), -+ z0 = svdup_s16_z (p0, 0x7f00)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_7f01_s16_z, svint16_t, -+ z0 = svdup_n_s16_z (p0, 0x7f01), -+ z0 = svdup_s16_z (p0, 0x7f01)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_7ffd_s16_z, svint16_t, -+ z0 = svdup_n_s16_z (p0, 0x7ffd), -+ z0 = svdup_s16_z (p0, 0x7ffd)) -+ -+/* -+** dup_7ffe_s16_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.h), #32766 -+** sel z0\.h, p0, \2, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7ffe_s16_z, svint16_t, -+ z0 = svdup_n_s16_z (p0, 0x7ffe), -+ z0 = svdup_s16_z (p0, 0x7ffe)) -+ -+/* -+** dup_7fff_s16_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.h), #32767 -+** sel z0\.h, p0, \2, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7fff_s16_z, svint16_t, -+ z0 = svdup_n_s16_z (p0, 0x7fff), -+ z0 = svdup_s16_z (p0, 0x7fff)) -+ -+/* -+** dup_m1_s16_z: -+** mov z0\.h, p0/z, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m1_s16_z, svint16_t, -+ z0 = svdup_n_s16_z (p0, -1), -+ z0 = svdup_s16_z (p0, -1)) -+ -+/* -+** dup_m128_s16_z: -+** mov z0\.h, p0/z, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m128_s16_z, svint16_t, -+ z0 = svdup_n_s16_z (p0, -128), -+ z0 = svdup_s16_z (p0, -128)) -+ -+/* -+** dup_m129_s16_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.h), #-129 -+** sel z0\.h, p0, \2, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m129_s16_z, svint16_t, -+ z0 = svdup_n_s16_z (p0, -129), -+ z0 = svdup_s16_z (p0, -129)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m130_s16_z, svint16_t, -+ z0 = svdup_n_s16_z (p0, -130), -+ z0 = svdup_s16_z (p0, -130)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m254_s16_z, svint16_t, -+ z0 = svdup_n_s16_z (p0, -254), -+ z0 = svdup_s16_z (p0, -254)) -+ -+/* -+** dup_m255_s16_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.h), #-255 -+** sel z0\.h, p0, \2, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m255_s16_z, svint16_t, -+ z0 = svdup_n_s16_z (p0, -255), -+ z0 = svdup_s16_z (p0, -255)) -+ -+/* -+** dup_m256_s16_z: -+** mov z0\.h, p0/z, #-256 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m256_s16_z, svint16_t, -+ z0 = svdup_n_s16_z (p0, -256), -+ z0 = svdup_s16_z (p0, -256)) -+ -+/* -+** dup_m257_s16_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.h), #-257 -+** sel z0\.h, p0, \2, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m257_s16_z, svint16_t, -+ z0 = svdup_n_s16_z (p0, -257), -+ z0 = svdup_s16_z (p0, -257)) -+ -+/* -+** dup_m258_s16_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+)\.b, #-2 -+** sel z0\.h, p0, \2\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m258_s16_z, svint16_t, -+ z0 = svdup_n_s16_z (p0, -258), -+ z0 = svdup_s16_z (p0, -258)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m259_s16_z, svint16_t, -+ z0 = svdup_n_s16_z (p0, -259), -+ z0 = svdup_s16_z (p0, -259)) -+ -+/* -+** dup_m512_s16_z: -+** mov z0\.h, p0/z, #-512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m512_s16_z, svint16_t, -+ z0 = svdup_n_s16_z (p0, -512), -+ z0 = svdup_s16_z (p0, -512)) -+ -+/* -+** dup_m7f00_s16_z: -+** mov z0\.h, p0/z, #-32512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f00_s16_z, svint16_t, -+ z0 = svdup_n_s16_z (p0, -0x7f00), -+ z0 = svdup_s16_z (p0, -0x7f00)) -+ -+/* -+** dup_m7f01_s16_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.h), #-32513 -+** sel z0\.h, p0, \2, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f01_s16_z, svint16_t, -+ z0 = svdup_n_s16_z (p0, -0x7f01), -+ z0 = svdup_s16_z (p0, -0x7f01)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m7f02_s16_z, svint16_t, -+ z0 = svdup_n_s16_z (p0, -0x7f02), -+ z0 = svdup_s16_z (p0, -0x7f02)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m7ffe_s16_z, svint16_t, -+ z0 = svdup_n_s16_z (p0, -0x7ffe), -+ z0 = svdup_s16_z (p0, -0x7ffe)) -+ -+/* -+** dup_m7fff_s16_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.h), #-32767 -+** sel z0\.h, p0, \2, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7fff_s16_z, svint16_t, -+ z0 = svdup_n_s16_z (p0, -0x7fff), -+ z0 = svdup_s16_z (p0, -0x7fff)) -+ -+/* -+** dup_m8000_s16_z: -+** mov z0\.h, p0/z, #-32768 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m8000_s16_z, svint16_t, -+ z0 = svdup_n_s16_z (p0, -0x8000), -+ z0 = svdup_s16_z (p0, -0x8000)) -+ -+/* -+** dup_0_s16_z: -+** mov z0\.h, p0/z, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_0_s16_z, svint16_t, -+ z0 = svdup_n_s16_z (p0, 0), -+ z0 = svdup_s16_z (p0, 0)) -+ -+/* -+** dup_w0_s16_z: -+** movprfx z0\.h, p0/z, z0\.h -+** mov z0\.h, p0/m, w0 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_w0_s16_z, svint16_t, int16_t, -+ z0 = svdup_n_s16_z (p0, x0), -+ z0 = svdup_s16_z (p0, x0)) -+ -+/* -+** dup_1_s16_x: -+** mov z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_1_s16_x, svint16_t, -+ z0 = svdup_n_s16_x (p0, 1), -+ z0 = svdup_s16_x (p0, 1)) -+ -+/* -+** dup_127_s16_x: -+** mov z0\.h, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_127_s16_x, svint16_t, -+ z0 = svdup_n_s16_x (p0, 127), -+ z0 = svdup_s16_x (p0, 127)) -+ -+/* -+** dup_128_s16_x: -+** mov z0\.h, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_128_s16_x, svint16_t, -+ z0 = svdup_n_s16_x (p0, 128), -+ z0 = svdup_s16_x (p0, 128)) -+ -+/* -+** dup_129_s16_x: -+** movi v([0-9]+)\.8h, 0x81 -+** dup z0\.q, z\1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_129_s16_x, svint16_t, -+ z0 = svdup_n_s16_x (p0, 129), -+ z0 = svdup_s16_x (p0, 129)) -+ -+/* -+** dup_253_s16_x: -+** movi v([0-9]+)\.8h, 0xfd -+** dup z0\.q, z\1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_253_s16_x, svint16_t, -+ z0 = svdup_n_s16_x (p0, 253), -+ z0 = svdup_s16_x (p0, 253)) -+ -+/* -+** dup_254_s16_x: -+** mov z0\.h, #254 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_254_s16_x, svint16_t, -+ z0 = svdup_n_s16_x (p0, 254), -+ z0 = svdup_s16_x (p0, 254)) -+ -+/* -+** dup_255_s16_x: -+** mov z0\.h, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_255_s16_x, svint16_t, -+ z0 = svdup_n_s16_x (p0, 255), -+ z0 = svdup_s16_x (p0, 255)) -+ -+/* -+** dup_256_s16_x: -+** mov z0\.h, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_256_s16_x, svint16_t, -+ z0 = svdup_n_s16_x (p0, 256), -+ z0 = svdup_s16_x (p0, 256)) -+ -+/* -+** dup_257_s16_x: -+** mov z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_257_s16_x, svint16_t, -+ z0 = svdup_n_s16_x (p0, 257), -+ z0 = svdup_s16_x (p0, 257)) -+ -+/* -+** dup_512_s16_x: -+** mov z0\.h, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_512_s16_x, svint16_t, -+ z0 = svdup_n_s16_x (p0, 512), -+ z0 = svdup_s16_x (p0, 512)) -+ -+/* -+** dup_7f00_s16_x: -+** mov z0\.h, #32512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7f00_s16_x, svint16_t, -+ z0 = svdup_n_s16_x (p0, 0x7f00), -+ z0 = svdup_s16_x (p0, 0x7f00)) -+ -+/* -+** dup_7f01_s16_x: -+** mov (w[0-9]+), 32513 -+** mov z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7f01_s16_x, svint16_t, -+ z0 = svdup_n_s16_x (p0, 0x7f01), -+ z0 = svdup_s16_x (p0, 0x7f01)) -+ -+/* -+** dup_7ffd_s16_x: -+** mov (w[0-9]+), 32765 -+** mov z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7ffd_s16_x, svint16_t, -+ z0 = svdup_n_s16_x (p0, 0x7ffd), -+ z0 = svdup_s16_x (p0, 0x7ffd)) -+ -+/* -+** dup_7ffe_s16_x: -+** mov z0\.h, #32766 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7ffe_s16_x, svint16_t, -+ z0 = svdup_n_s16_x (p0, 0x7ffe), -+ z0 = svdup_s16_x (p0, 0x7ffe)) -+ -+/* -+** dup_7fff_s16_x: -+** mov z0\.h, #32767 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7fff_s16_x, svint16_t, -+ z0 = svdup_n_s16_x (p0, 0x7fff), -+ z0 = svdup_s16_x (p0, 0x7fff)) -+ -+/* -+** dup_m1_s16_x: -+** mov z0\.b, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m1_s16_x, svint16_t, -+ z0 = svdup_n_s16_x (p0, -1), -+ z0 = svdup_s16_x (p0, -1)) -+ -+/* -+** dup_m128_s16_x: -+** mov z0\.h, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m128_s16_x, svint16_t, -+ z0 = svdup_n_s16_x (p0, -128), -+ z0 = svdup_s16_x (p0, -128)) -+ -+/* -+** dup_m129_s16_x: -+** mov z0\.h, #-129 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m129_s16_x, svint16_t, -+ z0 = svdup_n_s16_x (p0, -129), -+ z0 = svdup_s16_x (p0, -129)) -+ -+/* -+** dup_m130_s16_x: -+** mvni v([0-9]+)\.8h, 0x81 -+** dup z0\.q, z\1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m130_s16_x, svint16_t, -+ z0 = svdup_n_s16_x (p0, -130), -+ z0 = svdup_s16_x (p0, -130)) -+ -+/* -+** dup_m254_s16_x: -+** mvni v([0-9]+)\.8h, 0xfd -+** dup z0\.q, z\1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m254_s16_x, svint16_t, -+ z0 = svdup_n_s16_x (p0, -254), -+ z0 = svdup_s16_x (p0, -254)) -+ -+/* -+** dup_m255_s16_x: -+** mov z0\.h, #-255 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m255_s16_x, svint16_t, -+ z0 = svdup_n_s16_x (p0, -255), -+ z0 = svdup_s16_x (p0, -255)) -+ -+/* -+** dup_m256_s16_x: -+** mov z0\.h, #-256 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m256_s16_x, svint16_t, -+ z0 = svdup_n_s16_x (p0, -256), -+ z0 = svdup_s16_x (p0, -256)) -+ -+/* -+** dup_m257_s16_x: -+** mov z0\.h, #-257 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m257_s16_x, svint16_t, -+ z0 = svdup_n_s16_x (p0, -257), -+ z0 = svdup_s16_x (p0, -257)) -+ -+/* -+** dup_m258_s16_x: -+** mov z0\.b, #-2 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m258_s16_x, svint16_t, -+ z0 = svdup_n_s16_x (p0, -258), -+ z0 = svdup_s16_x (p0, -258)) -+ -+/* -+** dup_m259_s16_x: -+** mov (w[0-9]+), -259 -+** mov z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m259_s16_x, svint16_t, -+ z0 = svdup_n_s16_x (p0, -259), -+ z0 = svdup_s16_x (p0, -259)) -+ -+/* -+** dup_m512_s16_x: -+** mov z0\.h, #-512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m512_s16_x, svint16_t, -+ z0 = svdup_n_s16_x (p0, -512), -+ z0 = svdup_s16_x (p0, -512)) -+ -+/* -+** dup_m7f00_s16_x: -+** mov z0\.h, #-32512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f00_s16_x, svint16_t, -+ z0 = svdup_n_s16_x (p0, -0x7f00), -+ z0 = svdup_s16_x (p0, -0x7f00)) -+ -+/* -+** dup_m7f01_s16_x: -+** mov z0\.h, #-32513 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f01_s16_x, svint16_t, -+ z0 = svdup_n_s16_x (p0, -0x7f01), -+ z0 = svdup_s16_x (p0, -0x7f01)) -+ -+/* -+** dup_m7f02_s16_x: -+** mov (w[0-9]+), -32514 -+** mov z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f02_s16_x, svint16_t, -+ z0 = svdup_n_s16_x (p0, -0x7f02), -+ z0 = svdup_s16_x (p0, -0x7f02)) -+ -+/* -+** dup_m7ffe_s16_x: -+** mov (w[0-9]+), -32766 -+** mov z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7ffe_s16_x, svint16_t, -+ z0 = svdup_n_s16_x (p0, -0x7ffe), -+ z0 = svdup_s16_x (p0, -0x7ffe)) -+ -+/* -+** dup_m7fff_s16_x: -+** mov z0\.h, #-32767 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7fff_s16_x, svint16_t, -+ z0 = svdup_n_s16_x (p0, -0x7fff), -+ z0 = svdup_s16_x (p0, -0x7fff)) -+ -+/* -+** dup_m8000_s16_x: -+** mov z0\.h, #-32768 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m8000_s16_x, svint16_t, -+ z0 = svdup_n_s16_x (p0, -0x8000), -+ z0 = svdup_s16_x (p0, -0x8000)) -+ -+/* -+** dup_w0_s16_x: -+** mov z0\.h, w0 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_w0_s16_x, svint16_t, int16_t, -+ z0 = svdup_n_s16_x (p0, x0), -+ z0 = svdup_s16_x (p0, x0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s32.c -new file mode 100644 -index 000000000..0b396dbeb ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s32.c -@@ -0,0 +1,1175 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dup_1_s32: -+** mov z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_1_s32, svint32_t, -+ z0 = svdup_n_s32 (1), -+ z0 = svdup_s32 (1)) -+ -+/* -+** dup_127_s32: -+** mov z0\.s, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_127_s32, svint32_t, -+ z0 = svdup_n_s32 (127), -+ z0 = svdup_s32 (127)) -+ -+/* -+** dup_128_s32: -+** mov z0\.s, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_128_s32, svint32_t, -+ z0 = svdup_n_s32 (128), -+ z0 = svdup_s32 (128)) -+ -+/* -+** dup_129_s32: -+** movi v([0-9]+)\.4s, 0x81 -+** dup z0\.q, z\1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_129_s32, svint32_t, -+ z0 = svdup_n_s32 (129), -+ z0 = svdup_s32 (129)) -+ -+/* -+** dup_253_s32: -+** movi v([0-9]+)\.4s, 0xfd -+** dup z0\.q, z\1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_253_s32, svint32_t, -+ z0 = svdup_n_s32 (253), -+ z0 = svdup_s32 (253)) -+ -+/* -+** dup_254_s32: -+** mov z0\.s, #254 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_254_s32, svint32_t, -+ z0 = svdup_n_s32 (254), -+ z0 = svdup_s32 (254)) -+ -+/* -+** dup_255_s32: -+** mov z0\.s, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_255_s32, svint32_t, -+ z0 = svdup_n_s32 (255), -+ z0 = svdup_s32 (255)) -+ -+/* -+** dup_256_s32: -+** mov z0\.s, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_256_s32, svint32_t, -+ z0 = svdup_n_s32 (256), -+ z0 = svdup_s32 (256)) -+ -+/* -+** dup_257_s32: -+** mov (w[0-9]+), 257 -+** mov z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_257_s32, svint32_t, -+ z0 = svdup_n_s32 (257), -+ z0 = svdup_s32 (257)) -+ -+/* -+** dup_512_s32: -+** mov z0\.s, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_512_s32, svint32_t, -+ z0 = svdup_n_s32 (512), -+ z0 = svdup_s32 (512)) -+ -+/* -+** dup_7f00_s32: -+** mov z0\.s, #32512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7f00_s32, svint32_t, -+ z0 = svdup_n_s32 (0x7f00), -+ z0 = svdup_s32 (0x7f00)) -+ -+/* -+** dup_7f01_s32: -+** mov (w[0-9]+), 32513 -+** mov z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7f01_s32, svint32_t, -+ z0 = svdup_n_s32 (0x7f01), -+ z0 = svdup_s32 (0x7f01)) -+ -+/* -+** dup_7ffd_s32: -+** mov (w[0-9]+), 32765 -+** mov z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7ffd_s32, svint32_t, -+ z0 = svdup_n_s32 (0x7ffd), -+ z0 = svdup_s32 (0x7ffd)) -+ -+/* -+** dup_7ffe_s32: -+** mov z0\.s, #32766 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7ffe_s32, svint32_t, -+ z0 = svdup_n_s32 (0x7ffe), -+ z0 = svdup_s32 (0x7ffe)) -+ -+/* -+** dup_7fff_s32: -+** mov z0\.s, #32767 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7fff_s32, svint32_t, -+ z0 = svdup_n_s32 (0x7fff), -+ z0 = svdup_s32 (0x7fff)) -+ -+/* -+** dup_m1_s32: -+** mov z0\.b, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m1_s32, svint32_t, -+ z0 = svdup_n_s32 (-1), -+ z0 = svdup_s32 (-1)) -+ -+/* -+** dup_m128_s32: -+** mov z0\.s, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m128_s32, svint32_t, -+ z0 = svdup_n_s32 (-128), -+ z0 = svdup_s32 (-128)) -+ -+/* -+** dup_m129_s32: -+** mov z0\.s, #-129 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m129_s32, svint32_t, -+ z0 = svdup_n_s32 (-129), -+ z0 = svdup_s32 (-129)) -+ -+/* -+** dup_m130_s32: -+** mvni v([0-9]+)\.4s, 0x81 -+** dup z0\.q, z\1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m130_s32, svint32_t, -+ z0 = svdup_n_s32 (-130), -+ z0 = svdup_s32 (-130)) -+ -+/* -+** dup_m254_s32: -+** mvni v([0-9]+)\.4s, 0xfd -+** dup z0\.q, z\1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m254_s32, svint32_t, -+ z0 = svdup_n_s32 (-254), -+ z0 = svdup_s32 (-254)) -+ -+/* -+** dup_m255_s32: -+** mov z0\.s, #-255 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m255_s32, svint32_t, -+ z0 = svdup_n_s32 (-255), -+ z0 = svdup_s32 (-255)) -+ -+/* -+** dup_m256_s32: -+** mov z0\.s, #-256 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m256_s32, svint32_t, -+ z0 = svdup_n_s32 (-256), -+ z0 = svdup_s32 (-256)) -+ -+/* -+** dup_m257_s32: -+** mov z0\.s, #-257 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m257_s32, svint32_t, -+ z0 = svdup_n_s32 (-257), -+ z0 = svdup_s32 (-257)) -+ -+/* -+** dup_m258_s32: -+** mov (w[0-9]+), -258 -+** mov z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m258_s32, svint32_t, -+ z0 = svdup_n_s32 (-258), -+ z0 = svdup_s32 (-258)) -+ -+/* -+** dup_m259_s32: -+** mov (w[0-9]+), -259 -+** mov z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m259_s32, svint32_t, -+ z0 = svdup_n_s32 (-259), -+ z0 = svdup_s32 (-259)) -+ -+/* -+** dup_m512_s32: -+** mov z0\.s, #-512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m512_s32, svint32_t, -+ z0 = svdup_n_s32 (-512), -+ z0 = svdup_s32 (-512)) -+ -+/* -+** dup_m7f00_s32: -+** mov z0\.s, #-32512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f00_s32, svint32_t, -+ z0 = svdup_n_s32 (-0x7f00), -+ z0 = svdup_s32 (-0x7f00)) -+ -+/* -+** dup_m7f01_s32: -+** mov z0\.s, #-32513 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f01_s32, svint32_t, -+ z0 = svdup_n_s32 (-0x7f01), -+ z0 = svdup_s32 (-0x7f01)) -+ -+/* -+** dup_m7f02_s32: -+** mov (w[0-9]+), -32514 -+** mov z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f02_s32, svint32_t, -+ z0 = svdup_n_s32 (-0x7f02), -+ z0 = svdup_s32 (-0x7f02)) -+ -+/* -+** dup_m7ffe_s32: -+** mov (w[0-9]+), -32766 -+** mov z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7ffe_s32, svint32_t, -+ z0 = svdup_n_s32 (-0x7ffe), -+ z0 = svdup_s32 (-0x7ffe)) -+ -+/* -+** dup_m7fff_s32: -+** mov z0\.s, #-32767 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7fff_s32, svint32_t, -+ z0 = svdup_n_s32 (-0x7fff), -+ z0 = svdup_s32 (-0x7fff)) -+ -+/* -+** dup_m8000_s32: -+** mov z0\.s, #-32768 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m8000_s32, svint32_t, -+ z0 = svdup_n_s32 (-0x8000), -+ z0 = svdup_s32 (-0x8000)) -+ -+/* -+** dup_w0_s32: -+** mov z0\.s, w0 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_w0_s32, svint32_t, int32_t, -+ z0 = svdup_n_s32 (x0), -+ z0 = svdup_s32 (x0)) -+ -+/* -+** dup_1_s32_m: -+** mov z0\.s, p0/m, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_1_s32_m, svint32_t, -+ z0 = svdup_n_s32_m (z0, p0, 1), -+ z0 = svdup_s32_m (z0, p0, 1)) -+ -+/* -+** dup_127_s32_m: -+** mov z0\.s, p0/m, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_127_s32_m, svint32_t, -+ z0 = svdup_n_s32_m (z0, p0, 127), -+ z0 = svdup_s32_m (z0, p0, 127)) -+ -+/* -+** dup_128_s32_m: -+** mov (z[0-9]+\.s), #128 -+** sel z0\.s, p0, \1, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (dup_128_s32_m, svint32_t, -+ z0 = svdup_n_s32_m (z0, p0, 128), -+ z0 = svdup_s32_m (z0, p0, 128)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_129_s32_m, svint32_t, -+ z0 = svdup_n_s32_m (z0, p0, 129), -+ z0 = svdup_s32_m (z0, p0, 129)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_253_s32_m, svint32_t, -+ z0 = svdup_n_s32_m (z0, p0, 253), -+ z0 = svdup_s32_m (z0, p0, 253)) -+ -+/* -+** dup_254_s32_m: -+** mov (z[0-9]+\.s), #254 -+** sel z0\.s, p0, \1, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (dup_254_s32_m, svint32_t, -+ z0 = svdup_n_s32_m (z0, p0, 254), -+ z0 = svdup_s32_m (z0, p0, 254)) -+ -+/* -+** dup_255_s32_m: -+** mov (z[0-9]+\.s), #255 -+** sel z0\.s, p0, \1, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (dup_255_s32_m, svint32_t, -+ z0 = svdup_n_s32_m (z0, p0, 255), -+ z0 = svdup_s32_m (z0, p0, 255)) -+ -+/* -+** dup_256_s32_m: -+** mov z0\.s, p0/m, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_256_s32_m, svint32_t, -+ z0 = svdup_n_s32_m (z0, p0, 256), -+ z0 = svdup_s32_m (z0, p0, 256)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_257_s32_m, svint32_t, -+ z0 = svdup_n_s32_m (z0, p0, 257), -+ z0 = svdup_s32_m (z0, p0, 257)) -+ -+/* -+** dup_512_s32_m: -+** mov z0\.s, p0/m, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_512_s32_m, svint32_t, -+ z0 = svdup_n_s32_m (z0, p0, 512), -+ z0 = svdup_s32_m (z0, p0, 512)) -+ -+/* -+** dup_7f00_s32_m: -+** mov z0\.s, p0/m, #32512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7f00_s32_m, svint32_t, -+ z0 = svdup_n_s32_m (z0, p0, 0x7f00), -+ z0 = svdup_s32_m (z0, p0, 0x7f00)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_7f01_s32_m, svint32_t, -+ z0 = svdup_n_s32_m (z0, p0, 0x7f01), -+ z0 = svdup_s32_m (z0, p0, 0x7f01)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_7ffd_s32_m, svint32_t, -+ z0 = svdup_n_s32_m (z0, p0, 0x7ffd), -+ z0 = svdup_s32_m (z0, p0, 0x7ffd)) -+ -+/* -+** dup_7ffe_s32_m: -+** mov (z[0-9]+\.s), #32766 -+** sel z0\.s, p0, \1, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7ffe_s32_m, svint32_t, -+ z0 = svdup_n_s32_m (z0, p0, 0x7ffe), -+ z0 = svdup_s32_m (z0, p0, 0x7ffe)) -+ -+/* -+** dup_7fff_s32_m: -+** mov (z[0-9]+\.s), #32767 -+** sel z0\.s, p0, \1, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7fff_s32_m, svint32_t, -+ z0 = svdup_n_s32_m (z0, p0, 0x7fff), -+ z0 = svdup_s32_m (z0, p0, 0x7fff)) -+ -+/* -+** dup_m1_s32_m: -+** mov z0\.s, p0/m, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m1_s32_m, svint32_t, -+ z0 = svdup_n_s32_m (z0, p0, -1), -+ z0 = svdup_s32_m (z0, p0, -1)) -+ -+/* -+** dup_m128_s32_m: -+** mov z0\.s, p0/m, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m128_s32_m, svint32_t, -+ z0 = svdup_n_s32_m (z0, p0, -128), -+ z0 = svdup_s32_m (z0, p0, -128)) -+ -+/* -+** dup_m129_s32_m: -+** mov (z[0-9]+\.s), #-129 -+** sel z0\.s, p0, \1, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m129_s32_m, svint32_t, -+ z0 = svdup_n_s32_m (z0, p0, -129), -+ z0 = svdup_s32_m (z0, p0, -129)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m130_s32_m, svint32_t, -+ z0 = svdup_n_s32_m (z0, p0, -130), -+ z0 = svdup_s32_m (z0, p0, -130)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m254_s32_m, svint32_t, -+ z0 = svdup_n_s32_m (z0, p0, -254), -+ z0 = svdup_s32_m (z0, p0, -254)) -+ -+/* -+** dup_m255_s32_m: -+** mov (z[0-9]+\.s), #-255 -+** sel z0\.s, p0, \1, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m255_s32_m, svint32_t, -+ z0 = svdup_n_s32_m (z0, p0, -255), -+ z0 = svdup_s32_m (z0, p0, -255)) -+ -+/* -+** dup_m256_s32_m: -+** mov z0\.s, p0/m, #-256 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m256_s32_m, svint32_t, -+ z0 = svdup_n_s32_m (z0, p0, -256), -+ z0 = svdup_s32_m (z0, p0, -256)) -+ -+/* -+** dup_m257_s32_m: -+** mov (z[0-9]+\.s), #-257 -+** sel z0\.s, p0, \1, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m257_s32_m, svint32_t, -+ z0 = svdup_n_s32_m (z0, p0, -257), -+ z0 = svdup_s32_m (z0, p0, -257)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m258_s32_m, svint32_t, -+ z0 = svdup_n_s32_m (z0, p0, -258), -+ z0 = svdup_s32_m (z0, p0, -258)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m259_s32_m, svint32_t, -+ z0 = svdup_n_s32_m (z0, p0, -259), -+ z0 = svdup_s32_m (z0, p0, -259)) -+ -+/* -+** dup_m512_s32_m: -+** mov z0\.s, p0/m, #-512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m512_s32_m, svint32_t, -+ z0 = svdup_n_s32_m (z0, p0, -512), -+ z0 = svdup_s32_m (z0, p0, -512)) -+ -+/* -+** dup_m7f00_s32_m: -+** mov z0\.s, p0/m, #-32512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f00_s32_m, svint32_t, -+ z0 = svdup_n_s32_m (z0, p0, -0x7f00), -+ z0 = svdup_s32_m (z0, p0, -0x7f00)) -+ -+/* -+** dup_m7f01_s32_m: -+** mov (z[0-9]+\.s), #-32513 -+** sel z0\.s, p0, \1, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f01_s32_m, svint32_t, -+ z0 = svdup_n_s32_m (z0, p0, -0x7f01), -+ z0 = svdup_s32_m (z0, p0, -0x7f01)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m7f02_s32_m, svint32_t, -+ z0 = svdup_n_s32_m (z0, p0, -0x7f02), -+ z0 = svdup_s32_m (z0, p0, -0x7f02)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m7ffe_s32_m, svint32_t, -+ z0 = svdup_n_s32_m (z0, p0, -0x7ffe), -+ z0 = svdup_s32_m (z0, p0, -0x7ffe)) -+ -+/* -+** dup_m7fff_s32_m: -+** mov (z[0-9]+\.s), #-32767 -+** sel z0\.s, p0, \1, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7fff_s32_m, svint32_t, -+ z0 = svdup_n_s32_m (z0, p0, -0x7fff), -+ z0 = svdup_s32_m (z0, p0, -0x7fff)) -+ -+/* -+** dup_m8000_s32_m: -+** mov z0\.s, p0/m, #-32768 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m8000_s32_m, svint32_t, -+ z0 = svdup_n_s32_m (z0, p0, -0x8000), -+ z0 = svdup_s32_m (z0, p0, -0x8000)) -+ -+/* -+** dup_0_s32_m: -+** mov z0\.s, p0/m, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_0_s32_m, svint32_t, -+ z0 = svdup_n_s32_m (z0, p0, 0), -+ z0 = svdup_s32_m (z0, p0, 0)) -+ -+/* -+** dup_w0_s32_m: -+** movprfx z0, z1 -+** mov z0\.s, p0/m, w0 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_w0_s32_m, svint32_t, int32_t, -+ z0 = svdup_n_s32_m (z1, p0, x0), -+ z0 = svdup_s32_m (z1, p0, x0)) -+ -+/* -+** dup_1_s32_z: -+** mov z0\.s, p0/z, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_1_s32_z, svint32_t, -+ z0 = svdup_n_s32_z (p0, 1), -+ z0 = svdup_s32_z (p0, 1)) -+ -+/* -+** dup_127_s32_z: -+** mov z0\.s, p0/z, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_127_s32_z, svint32_t, -+ z0 = svdup_n_s32_z (p0, 127), -+ z0 = svdup_s32_z (p0, 127)) -+ -+/* -+** dup_128_s32_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.s), #128 -+** sel z0\.s, p0, \2, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (dup_128_s32_z, svint32_t, -+ z0 = svdup_n_s32_z (p0, 128), -+ z0 = svdup_s32_z (p0, 128)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_129_s32_z, svint32_t, -+ z0 = svdup_n_s32_z (p0, 129), -+ z0 = svdup_s32_z (p0, 129)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_253_s32_z, svint32_t, -+ z0 = svdup_n_s32_z (p0, 253), -+ z0 = svdup_s32_z (p0, 253)) -+ -+/* -+** dup_254_s32_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.s), #254 -+** sel z0\.s, p0, \2, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (dup_254_s32_z, svint32_t, -+ z0 = svdup_n_s32_z (p0, 254), -+ z0 = svdup_s32_z (p0, 254)) -+ -+/* -+** dup_255_s32_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.s), #255 -+** sel z0\.s, p0, \2, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (dup_255_s32_z, svint32_t, -+ z0 = svdup_n_s32_z (p0, 255), -+ z0 = svdup_s32_z (p0, 255)) -+ -+/* -+** dup_256_s32_z: -+** mov z0\.s, p0/z, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_256_s32_z, svint32_t, -+ z0 = svdup_n_s32_z (p0, 256), -+ z0 = svdup_s32_z (p0, 256)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_257_s32_z, svint32_t, -+ z0 = svdup_n_s32_z (p0, 257), -+ z0 = svdup_s32_z (p0, 257)) -+ -+/* -+** dup_512_s32_z: -+** mov z0\.s, p0/z, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_512_s32_z, svint32_t, -+ z0 = svdup_n_s32_z (p0, 512), -+ z0 = svdup_s32_z (p0, 512)) -+ -+/* -+** dup_7f00_s32_z: -+** mov z0\.s, p0/z, #32512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7f00_s32_z, svint32_t, -+ z0 = svdup_n_s32_z (p0, 0x7f00), -+ z0 = svdup_s32_z (p0, 0x7f00)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_7f01_s32_z, svint32_t, -+ z0 = svdup_n_s32_z (p0, 0x7f01), -+ z0 = svdup_s32_z (p0, 0x7f01)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_7ffd_s32_z, svint32_t, -+ z0 = svdup_n_s32_z (p0, 0x7ffd), -+ z0 = svdup_s32_z (p0, 0x7ffd)) -+ -+/* -+** dup_7ffe_s32_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.s), #32766 -+** sel z0\.s, p0, \2, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7ffe_s32_z, svint32_t, -+ z0 = svdup_n_s32_z (p0, 0x7ffe), -+ z0 = svdup_s32_z (p0, 0x7ffe)) -+ -+/* -+** dup_7fff_s32_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.s), #32767 -+** sel z0\.s, p0, \2, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7fff_s32_z, svint32_t, -+ z0 = svdup_n_s32_z (p0, 0x7fff), -+ z0 = svdup_s32_z (p0, 0x7fff)) -+ -+/* -+** dup_m1_s32_z: -+** mov z0\.s, p0/z, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m1_s32_z, svint32_t, -+ z0 = svdup_n_s32_z (p0, -1), -+ z0 = svdup_s32_z (p0, -1)) -+ -+/* -+** dup_m128_s32_z: -+** mov z0\.s, p0/z, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m128_s32_z, svint32_t, -+ z0 = svdup_n_s32_z (p0, -128), -+ z0 = svdup_s32_z (p0, -128)) -+ -+/* -+** dup_m129_s32_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.s), #-129 -+** sel z0\.s, p0, \2, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m129_s32_z, svint32_t, -+ z0 = svdup_n_s32_z (p0, -129), -+ z0 = svdup_s32_z (p0, -129)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m130_s32_z, svint32_t, -+ z0 = svdup_n_s32_z (p0, -130), -+ z0 = svdup_s32_z (p0, -130)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m254_s32_z, svint32_t, -+ z0 = svdup_n_s32_z (p0, -254), -+ z0 = svdup_s32_z (p0, -254)) -+ -+/* -+** dup_m255_s32_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.s), #-255 -+** sel z0\.s, p0, \2, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m255_s32_z, svint32_t, -+ z0 = svdup_n_s32_z (p0, -255), -+ z0 = svdup_s32_z (p0, -255)) -+ -+/* -+** dup_m256_s32_z: -+** mov z0\.s, p0/z, #-256 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m256_s32_z, svint32_t, -+ z0 = svdup_n_s32_z (p0, -256), -+ z0 = svdup_s32_z (p0, -256)) -+ -+/* -+** dup_m257_s32_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.s), #-257 -+** sel z0\.s, p0, \2, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m257_s32_z, svint32_t, -+ z0 = svdup_n_s32_z (p0, -257), -+ z0 = svdup_s32_z (p0, -257)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m258_s32_z, svint32_t, -+ z0 = svdup_n_s32_z (p0, -258), -+ z0 = svdup_s32_z (p0, -258)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m259_s32_z, svint32_t, -+ z0 = svdup_n_s32_z (p0, -259), -+ z0 = svdup_s32_z (p0, -259)) -+ -+/* -+** dup_m512_s32_z: -+** mov z0\.s, p0/z, #-512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m512_s32_z, svint32_t, -+ z0 = svdup_n_s32_z (p0, -512), -+ z0 = svdup_s32_z (p0, -512)) -+ -+/* -+** dup_m7f00_s32_z: -+** mov z0\.s, p0/z, #-32512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f00_s32_z, svint32_t, -+ z0 = svdup_n_s32_z (p0, -0x7f00), -+ z0 = svdup_s32_z (p0, -0x7f00)) -+ -+/* -+** dup_m7f01_s32_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.s), #-32513 -+** sel z0\.s, p0, \2, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f01_s32_z, svint32_t, -+ z0 = svdup_n_s32_z (p0, -0x7f01), -+ z0 = svdup_s32_z (p0, -0x7f01)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m7f02_s32_z, svint32_t, -+ z0 = svdup_n_s32_z (p0, -0x7f02), -+ z0 = svdup_s32_z (p0, -0x7f02)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m7ffe_s32_z, svint32_t, -+ z0 = svdup_n_s32_z (p0, -0x7ffe), -+ z0 = svdup_s32_z (p0, -0x7ffe)) -+ -+/* -+** dup_m7fff_s32_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.s), #-32767 -+** sel z0\.s, p0, \2, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7fff_s32_z, svint32_t, -+ z0 = svdup_n_s32_z (p0, -0x7fff), -+ z0 = svdup_s32_z (p0, -0x7fff)) -+ -+/* -+** dup_m8000_s32_z: -+** mov z0\.s, p0/z, #-32768 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m8000_s32_z, svint32_t, -+ z0 = svdup_n_s32_z (p0, -0x8000), -+ z0 = svdup_s32_z (p0, -0x8000)) -+ -+/* -+** dup_0_s32_z: -+** mov z0\.s, p0/z, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_0_s32_z, svint32_t, -+ z0 = svdup_n_s32_z (p0, 0), -+ z0 = svdup_s32_z (p0, 0)) -+ -+/* -+** dup_w0_s32_z: -+** movprfx z0\.s, p0/z, z0\.s -+** mov z0\.s, p0/m, w0 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_w0_s32_z, svint32_t, int32_t, -+ z0 = svdup_n_s32_z (p0, x0), -+ z0 = svdup_s32_z (p0, x0)) -+ -+/* -+** dup_1_s32_x: -+** mov z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_1_s32_x, svint32_t, -+ z0 = svdup_n_s32_x (p0, 1), -+ z0 = svdup_s32_x (p0, 1)) -+ -+/* -+** dup_127_s32_x: -+** mov z0\.s, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_127_s32_x, svint32_t, -+ z0 = svdup_n_s32_x (p0, 127), -+ z0 = svdup_s32_x (p0, 127)) -+ -+/* -+** dup_128_s32_x: -+** mov z0\.s, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_128_s32_x, svint32_t, -+ z0 = svdup_n_s32_x (p0, 128), -+ z0 = svdup_s32_x (p0, 128)) -+ -+/* -+** dup_129_s32_x: -+** movi v([0-9]+)\.4s, 0x81 -+** dup z0\.q, z\1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_129_s32_x, svint32_t, -+ z0 = svdup_n_s32_x (p0, 129), -+ z0 = svdup_s32_x (p0, 129)) -+ -+/* -+** dup_253_s32_x: -+** movi v([0-9]+)\.4s, 0xfd -+** dup z0\.q, z\1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_253_s32_x, svint32_t, -+ z0 = svdup_n_s32_x (p0, 253), -+ z0 = svdup_s32_x (p0, 253)) -+ -+/* -+** dup_254_s32_x: -+** mov z0\.s, #254 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_254_s32_x, svint32_t, -+ z0 = svdup_n_s32_x (p0, 254), -+ z0 = svdup_s32_x (p0, 254)) -+ -+/* -+** dup_255_s32_x: -+** mov z0\.s, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_255_s32_x, svint32_t, -+ z0 = svdup_n_s32_x (p0, 255), -+ z0 = svdup_s32_x (p0, 255)) -+ -+/* -+** dup_256_s32_x: -+** mov z0\.s, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_256_s32_x, svint32_t, -+ z0 = svdup_n_s32_x (p0, 256), -+ z0 = svdup_s32_x (p0, 256)) -+ -+/* -+** dup_257_s32_x: -+** mov (w[0-9]+), 257 -+** mov z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_257_s32_x, svint32_t, -+ z0 = svdup_n_s32_x (p0, 257), -+ z0 = svdup_s32_x (p0, 257)) -+ -+/* -+** dup_512_s32_x: -+** mov z0\.s, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_512_s32_x, svint32_t, -+ z0 = svdup_n_s32_x (p0, 512), -+ z0 = svdup_s32_x (p0, 512)) -+ -+/* -+** dup_7f00_s32_x: -+** mov z0\.s, #32512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7f00_s32_x, svint32_t, -+ z0 = svdup_n_s32_x (p0, 0x7f00), -+ z0 = svdup_s32_x (p0, 0x7f00)) -+ -+/* -+** dup_7f01_s32_x: -+** mov (w[0-9]+), 32513 -+** mov z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7f01_s32_x, svint32_t, -+ z0 = svdup_n_s32_x (p0, 0x7f01), -+ z0 = svdup_s32_x (p0, 0x7f01)) -+ -+/* -+** dup_7ffd_s32_x: -+** mov (w[0-9]+), 32765 -+** mov z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7ffd_s32_x, svint32_t, -+ z0 = svdup_n_s32_x (p0, 0x7ffd), -+ z0 = svdup_s32_x (p0, 0x7ffd)) -+ -+/* -+** dup_7ffe_s32_x: -+** mov z0\.s, #32766 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7ffe_s32_x, svint32_t, -+ z0 = svdup_n_s32_x (p0, 0x7ffe), -+ z0 = svdup_s32_x (p0, 0x7ffe)) -+ -+/* -+** dup_7fff_s32_x: -+** mov z0\.s, #32767 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7fff_s32_x, svint32_t, -+ z0 = svdup_n_s32_x (p0, 0x7fff), -+ z0 = svdup_s32_x (p0, 0x7fff)) -+ -+/* -+** dup_m1_s32_x: -+** mov z0\.b, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m1_s32_x, svint32_t, -+ z0 = svdup_n_s32_x (p0, -1), -+ z0 = svdup_s32_x (p0, -1)) -+ -+/* -+** dup_m128_s32_x: -+** mov z0\.s, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m128_s32_x, svint32_t, -+ z0 = svdup_n_s32_x (p0, -128), -+ z0 = svdup_s32_x (p0, -128)) -+ -+/* -+** dup_m129_s32_x: -+** mov z0\.s, #-129 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m129_s32_x, svint32_t, -+ z0 = svdup_n_s32_x (p0, -129), -+ z0 = svdup_s32_x (p0, -129)) -+ -+/* -+** dup_m130_s32_x: -+** mvni v([0-9]+)\.4s, 0x81 -+** dup z0\.q, z\1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m130_s32_x, svint32_t, -+ z0 = svdup_n_s32_x (p0, -130), -+ z0 = svdup_s32_x (p0, -130)) -+ -+/* -+** dup_m254_s32_x: -+** mvni v([0-9]+)\.4s, 0xfd -+** dup z0\.q, z\1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m254_s32_x, svint32_t, -+ z0 = svdup_n_s32_x (p0, -254), -+ z0 = svdup_s32_x (p0, -254)) -+ -+/* -+** dup_m255_s32_x: -+** mov z0\.s, #-255 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m255_s32_x, svint32_t, -+ z0 = svdup_n_s32_x (p0, -255), -+ z0 = svdup_s32_x (p0, -255)) -+ -+/* -+** dup_m256_s32_x: -+** mov z0\.s, #-256 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m256_s32_x, svint32_t, -+ z0 = svdup_n_s32_x (p0, -256), -+ z0 = svdup_s32_x (p0, -256)) -+ -+/* -+** dup_m257_s32_x: -+** mov z0\.s, #-257 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m257_s32_x, svint32_t, -+ z0 = svdup_n_s32_x (p0, -257), -+ z0 = svdup_s32_x (p0, -257)) -+ -+/* -+** dup_m258_s32_x: -+** mov (w[0-9]+), -258 -+** mov z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m258_s32_x, svint32_t, -+ z0 = svdup_n_s32_x (p0, -258), -+ z0 = svdup_s32_x (p0, -258)) -+ -+/* -+** dup_m259_s32_x: -+** mov (w[0-9]+), -259 -+** mov z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m259_s32_x, svint32_t, -+ z0 = svdup_n_s32_x (p0, -259), -+ z0 = svdup_s32_x (p0, -259)) -+ -+/* -+** dup_m512_s32_x: -+** mov z0\.s, #-512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m512_s32_x, svint32_t, -+ z0 = svdup_n_s32_x (p0, -512), -+ z0 = svdup_s32_x (p0, -512)) -+ -+/* -+** dup_m7f00_s32_x: -+** mov z0\.s, #-32512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f00_s32_x, svint32_t, -+ z0 = svdup_n_s32_x (p0, -0x7f00), -+ z0 = svdup_s32_x (p0, -0x7f00)) -+ -+/* -+** dup_m7f01_s32_x: -+** mov z0\.s, #-32513 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f01_s32_x, svint32_t, -+ z0 = svdup_n_s32_x (p0, -0x7f01), -+ z0 = svdup_s32_x (p0, -0x7f01)) -+ -+/* -+** dup_m7f02_s32_x: -+** mov (w[0-9]+), -32514 -+** mov z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f02_s32_x, svint32_t, -+ z0 = svdup_n_s32_x (p0, -0x7f02), -+ z0 = svdup_s32_x (p0, -0x7f02)) -+ -+/* -+** dup_m7ffe_s32_x: -+** mov (w[0-9]+), -32766 -+** mov z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7ffe_s32_x, svint32_t, -+ z0 = svdup_n_s32_x (p0, -0x7ffe), -+ z0 = svdup_s32_x (p0, -0x7ffe)) -+ -+/* -+** dup_m7fff_s32_x: -+** mov z0\.s, #-32767 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7fff_s32_x, svint32_t, -+ z0 = svdup_n_s32_x (p0, -0x7fff), -+ z0 = svdup_s32_x (p0, -0x7fff)) -+ -+/* -+** dup_m8000_s32_x: -+** mov z0\.s, #-32768 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m8000_s32_x, svint32_t, -+ z0 = svdup_n_s32_x (p0, -0x8000), -+ z0 = svdup_s32_x (p0, -0x8000)) -+ -+/* -+** dup_w0_s32_x: -+** mov z0\.s, w0 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_w0_s32_x, svint32_t, int32_t, -+ z0 = svdup_n_s32_x (p0, x0), -+ z0 = svdup_s32_x (p0, x0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s64.c -new file mode 100644 -index 000000000..6259b7fb5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s64.c -@@ -0,0 +1,1175 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dup_1_s64: -+** mov z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_1_s64, svint64_t, -+ z0 = svdup_n_s64 (1), -+ z0 = svdup_s64 (1)) -+ -+/* -+** dup_127_s64: -+** mov z0\.d, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_127_s64, svint64_t, -+ z0 = svdup_n_s64 (127), -+ z0 = svdup_s64 (127)) -+ -+/* -+** dup_128_s64: -+** mov z0\.d, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_128_s64, svint64_t, -+ z0 = svdup_n_s64 (128), -+ z0 = svdup_s64 (128)) -+ -+/* -+** dup_129_s64: -+** mov (x[0-9]+), 129 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_129_s64, svint64_t, -+ z0 = svdup_n_s64 (129), -+ z0 = svdup_s64 (129)) -+ -+/* -+** dup_253_s64: -+** mov (x[0-9]+), 253 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_253_s64, svint64_t, -+ z0 = svdup_n_s64 (253), -+ z0 = svdup_s64 (253)) -+ -+/* -+** dup_254_s64: -+** mov z0\.d, #254 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_254_s64, svint64_t, -+ z0 = svdup_n_s64 (254), -+ z0 = svdup_s64 (254)) -+ -+/* -+** dup_255_s64: -+** mov z0\.d, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_255_s64, svint64_t, -+ z0 = svdup_n_s64 (255), -+ z0 = svdup_s64 (255)) -+ -+/* -+** dup_256_s64: -+** mov z0\.d, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_256_s64, svint64_t, -+ z0 = svdup_n_s64 (256), -+ z0 = svdup_s64 (256)) -+ -+/* -+** dup_257_s64: -+** mov (x[0-9]+), 257 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_257_s64, svint64_t, -+ z0 = svdup_n_s64 (257), -+ z0 = svdup_s64 (257)) -+ -+/* -+** dup_512_s64: -+** mov z0\.d, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_512_s64, svint64_t, -+ z0 = svdup_n_s64 (512), -+ z0 = svdup_s64 (512)) -+ -+/* -+** dup_7f00_s64: -+** mov z0\.d, #32512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7f00_s64, svint64_t, -+ z0 = svdup_n_s64 (0x7f00), -+ z0 = svdup_s64 (0x7f00)) -+ -+/* -+** dup_7f01_s64: -+** mov (x[0-9]+), 32513 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7f01_s64, svint64_t, -+ z0 = svdup_n_s64 (0x7f01), -+ z0 = svdup_s64 (0x7f01)) -+ -+/* -+** dup_7ffd_s64: -+** mov (x[0-9]+), 32765 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7ffd_s64, svint64_t, -+ z0 = svdup_n_s64 (0x7ffd), -+ z0 = svdup_s64 (0x7ffd)) -+ -+/* -+** dup_7ffe_s64: -+** mov z0\.d, #32766 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7ffe_s64, svint64_t, -+ z0 = svdup_n_s64 (0x7ffe), -+ z0 = svdup_s64 (0x7ffe)) -+ -+/* -+** dup_7fff_s64: -+** mov z0\.d, #32767 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7fff_s64, svint64_t, -+ z0 = svdup_n_s64 (0x7fff), -+ z0 = svdup_s64 (0x7fff)) -+ -+/* -+** dup_m1_s64: -+** mov z0\.b, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m1_s64, svint64_t, -+ z0 = svdup_n_s64 (-1), -+ z0 = svdup_s64 (-1)) -+ -+/* -+** dup_m128_s64: -+** mov z0\.d, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m128_s64, svint64_t, -+ z0 = svdup_n_s64 (-128), -+ z0 = svdup_s64 (-128)) -+ -+/* -+** dup_m129_s64: -+** mov z0\.d, #-129 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m129_s64, svint64_t, -+ z0 = svdup_n_s64 (-129), -+ z0 = svdup_s64 (-129)) -+ -+/* -+** dup_m130_s64: -+** mov (x[0-9]+), -130 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m130_s64, svint64_t, -+ z0 = svdup_n_s64 (-130), -+ z0 = svdup_s64 (-130)) -+ -+/* -+** dup_m254_s64: -+** mov (x[0-9]+), -254 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m254_s64, svint64_t, -+ z0 = svdup_n_s64 (-254), -+ z0 = svdup_s64 (-254)) -+ -+/* -+** dup_m255_s64: -+** mov z0\.d, #-255 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m255_s64, svint64_t, -+ z0 = svdup_n_s64 (-255), -+ z0 = svdup_s64 (-255)) -+ -+/* -+** dup_m256_s64: -+** mov z0\.d, #-256 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m256_s64, svint64_t, -+ z0 = svdup_n_s64 (-256), -+ z0 = svdup_s64 (-256)) -+ -+/* -+** dup_m257_s64: -+** mov z0\.d, #-257 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m257_s64, svint64_t, -+ z0 = svdup_n_s64 (-257), -+ z0 = svdup_s64 (-257)) -+ -+/* -+** dup_m258_s64: -+** mov (x[0-9]+), -258 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m258_s64, svint64_t, -+ z0 = svdup_n_s64 (-258), -+ z0 = svdup_s64 (-258)) -+ -+/* -+** dup_m259_s64: -+** mov (x[0-9]+), -259 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m259_s64, svint64_t, -+ z0 = svdup_n_s64 (-259), -+ z0 = svdup_s64 (-259)) -+ -+/* -+** dup_m512_s64: -+** mov z0\.d, #-512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m512_s64, svint64_t, -+ z0 = svdup_n_s64 (-512), -+ z0 = svdup_s64 (-512)) -+ -+/* -+** dup_m7f00_s64: -+** mov z0\.d, #-32512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f00_s64, svint64_t, -+ z0 = svdup_n_s64 (-0x7f00), -+ z0 = svdup_s64 (-0x7f00)) -+ -+/* -+** dup_m7f01_s64: -+** mov z0\.d, #-32513 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f01_s64, svint64_t, -+ z0 = svdup_n_s64 (-0x7f01), -+ z0 = svdup_s64 (-0x7f01)) -+ -+/* -+** dup_m7f02_s64: -+** mov (x[0-9]+), -32514 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f02_s64, svint64_t, -+ z0 = svdup_n_s64 (-0x7f02), -+ z0 = svdup_s64 (-0x7f02)) -+ -+/* -+** dup_m7ffe_s64: -+** mov (x[0-9]+), -32766 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7ffe_s64, svint64_t, -+ z0 = svdup_n_s64 (-0x7ffe), -+ z0 = svdup_s64 (-0x7ffe)) -+ -+/* -+** dup_m7fff_s64: -+** mov z0\.d, #-32767 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7fff_s64, svint64_t, -+ z0 = svdup_n_s64 (-0x7fff), -+ z0 = svdup_s64 (-0x7fff)) -+ -+/* -+** dup_m8000_s64: -+** mov z0\.d, #-32768 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m8000_s64, svint64_t, -+ z0 = svdup_n_s64 (-0x8000), -+ z0 = svdup_s64 (-0x8000)) -+ -+/* -+** dup_x0_s64: -+** mov z0\.d, x0 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_x0_s64, svint64_t, int64_t, -+ z0 = svdup_n_s64 (x0), -+ z0 = svdup_s64 (x0)) -+ -+/* -+** dup_1_s64_m: -+** mov z0\.d, p0/m, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_1_s64_m, svint64_t, -+ z0 = svdup_n_s64_m (z0, p0, 1), -+ z0 = svdup_s64_m (z0, p0, 1)) -+ -+/* -+** dup_127_s64_m: -+** mov z0\.d, p0/m, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_127_s64_m, svint64_t, -+ z0 = svdup_n_s64_m (z0, p0, 127), -+ z0 = svdup_s64_m (z0, p0, 127)) -+ -+/* -+** dup_128_s64_m: -+** mov (z[0-9]+\.d), #128 -+** sel z0\.d, p0, \1, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (dup_128_s64_m, svint64_t, -+ z0 = svdup_n_s64_m (z0, p0, 128), -+ z0 = svdup_s64_m (z0, p0, 128)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_129_s64_m, svint64_t, -+ z0 = svdup_n_s64_m (z0, p0, 129), -+ z0 = svdup_s64_m (z0, p0, 129)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_253_s64_m, svint64_t, -+ z0 = svdup_n_s64_m (z0, p0, 253), -+ z0 = svdup_s64_m (z0, p0, 253)) -+ -+/* -+** dup_254_s64_m: -+** mov (z[0-9]+\.d), #254 -+** sel z0\.d, p0, \1, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (dup_254_s64_m, svint64_t, -+ z0 = svdup_n_s64_m (z0, p0, 254), -+ z0 = svdup_s64_m (z0, p0, 254)) -+ -+/* -+** dup_255_s64_m: -+** mov (z[0-9]+\.d), #255 -+** sel z0\.d, p0, \1, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (dup_255_s64_m, svint64_t, -+ z0 = svdup_n_s64_m (z0, p0, 255), -+ z0 = svdup_s64_m (z0, p0, 255)) -+ -+/* -+** dup_256_s64_m: -+** mov z0\.d, p0/m, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_256_s64_m, svint64_t, -+ z0 = svdup_n_s64_m (z0, p0, 256), -+ z0 = svdup_s64_m (z0, p0, 256)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_257_s64_m, svint64_t, -+ z0 = svdup_n_s64_m (z0, p0, 257), -+ z0 = svdup_s64_m (z0, p0, 257)) -+ -+/* -+** dup_512_s64_m: -+** mov z0\.d, p0/m, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_512_s64_m, svint64_t, -+ z0 = svdup_n_s64_m (z0, p0, 512), -+ z0 = svdup_s64_m (z0, p0, 512)) -+ -+/* -+** dup_7f00_s64_m: -+** mov z0\.d, p0/m, #32512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7f00_s64_m, svint64_t, -+ z0 = svdup_n_s64_m (z0, p0, 0x7f00), -+ z0 = svdup_s64_m (z0, p0, 0x7f00)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_7f01_s64_m, svint64_t, -+ z0 = svdup_n_s64_m (z0, p0, 0x7f01), -+ z0 = svdup_s64_m (z0, p0, 0x7f01)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_7ffd_s64_m, svint64_t, -+ z0 = svdup_n_s64_m (z0, p0, 0x7ffd), -+ z0 = svdup_s64_m (z0, p0, 0x7ffd)) -+ -+/* -+** dup_7ffe_s64_m: -+** mov (z[0-9]+\.d), #32766 -+** sel z0\.d, p0, \1, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7ffe_s64_m, svint64_t, -+ z0 = svdup_n_s64_m (z0, p0, 0x7ffe), -+ z0 = svdup_s64_m (z0, p0, 0x7ffe)) -+ -+/* -+** dup_7fff_s64_m: -+** mov (z[0-9]+\.d), #32767 -+** sel z0\.d, p0, \1, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7fff_s64_m, svint64_t, -+ z0 = svdup_n_s64_m (z0, p0, 0x7fff), -+ z0 = svdup_s64_m (z0, p0, 0x7fff)) -+ -+/* -+** dup_m1_s64_m: -+** mov z0\.d, p0/m, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m1_s64_m, svint64_t, -+ z0 = svdup_n_s64_m (z0, p0, -1), -+ z0 = svdup_s64_m (z0, p0, -1)) -+ -+/* -+** dup_m128_s64_m: -+** mov z0\.d, p0/m, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m128_s64_m, svint64_t, -+ z0 = svdup_n_s64_m (z0, p0, -128), -+ z0 = svdup_s64_m (z0, p0, -128)) -+ -+/* -+** dup_m129_s64_m: -+** mov (z[0-9]+\.d), #-129 -+** sel z0\.d, p0, \1, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m129_s64_m, svint64_t, -+ z0 = svdup_n_s64_m (z0, p0, -129), -+ z0 = svdup_s64_m (z0, p0, -129)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m130_s64_m, svint64_t, -+ z0 = svdup_n_s64_m (z0, p0, -130), -+ z0 = svdup_s64_m (z0, p0, -130)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m254_s64_m, svint64_t, -+ z0 = svdup_n_s64_m (z0, p0, -254), -+ z0 = svdup_s64_m (z0, p0, -254)) -+ -+/* -+** dup_m255_s64_m: -+** mov (z[0-9]+\.d), #-255 -+** sel z0\.d, p0, \1, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m255_s64_m, svint64_t, -+ z0 = svdup_n_s64_m (z0, p0, -255), -+ z0 = svdup_s64_m (z0, p0, -255)) -+ -+/* -+** dup_m256_s64_m: -+** mov z0\.d, p0/m, #-256 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m256_s64_m, svint64_t, -+ z0 = svdup_n_s64_m (z0, p0, -256), -+ z0 = svdup_s64_m (z0, p0, -256)) -+ -+/* -+** dup_m257_s64_m: -+** mov (z[0-9]+\.d), #-257 -+** sel z0\.d, p0, \1, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m257_s64_m, svint64_t, -+ z0 = svdup_n_s64_m (z0, p0, -257), -+ z0 = svdup_s64_m (z0, p0, -257)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m258_s64_m, svint64_t, -+ z0 = svdup_n_s64_m (z0, p0, -258), -+ z0 = svdup_s64_m (z0, p0, -258)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m259_s64_m, svint64_t, -+ z0 = svdup_n_s64_m (z0, p0, -259), -+ z0 = svdup_s64_m (z0, p0, -259)) -+ -+/* -+** dup_m512_s64_m: -+** mov z0\.d, p0/m, #-512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m512_s64_m, svint64_t, -+ z0 = svdup_n_s64_m (z0, p0, -512), -+ z0 = svdup_s64_m (z0, p0, -512)) -+ -+/* -+** dup_m7f00_s64_m: -+** mov z0\.d, p0/m, #-32512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f00_s64_m, svint64_t, -+ z0 = svdup_n_s64_m (z0, p0, -0x7f00), -+ z0 = svdup_s64_m (z0, p0, -0x7f00)) -+ -+/* -+** dup_m7f01_s64_m: -+** mov (z[0-9]+\.d), #-32513 -+** sel z0\.d, p0, \1, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f01_s64_m, svint64_t, -+ z0 = svdup_n_s64_m (z0, p0, -0x7f01), -+ z0 = svdup_s64_m (z0, p0, -0x7f01)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m7f02_s64_m, svint64_t, -+ z0 = svdup_n_s64_m (z0, p0, -0x7f02), -+ z0 = svdup_s64_m (z0, p0, -0x7f02)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m7ffe_s64_m, svint64_t, -+ z0 = svdup_n_s64_m (z0, p0, -0x7ffe), -+ z0 = svdup_s64_m (z0, p0, -0x7ffe)) -+ -+/* -+** dup_m7fff_s64_m: -+** mov (z[0-9]+\.d), #-32767 -+** sel z0\.d, p0, \1, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7fff_s64_m, svint64_t, -+ z0 = svdup_n_s64_m (z0, p0, -0x7fff), -+ z0 = svdup_s64_m (z0, p0, -0x7fff)) -+ -+/* -+** dup_m8000_s64_m: -+** mov z0\.d, p0/m, #-32768 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m8000_s64_m, svint64_t, -+ z0 = svdup_n_s64_m (z0, p0, -0x8000), -+ z0 = svdup_s64_m (z0, p0, -0x8000)) -+ -+/* -+** dup_0_s64_m: -+** mov z0\.d, p0/m, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_0_s64_m, svint64_t, -+ z0 = svdup_n_s64_m (z0, p0, 0), -+ z0 = svdup_s64_m (z0, p0, 0)) -+ -+/* -+** dup_x0_s64_m: -+** movprfx z0, z1 -+** mov z0\.d, p0/m, x0 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_x0_s64_m, svint64_t, int64_t, -+ z0 = svdup_n_s64_m (z1, p0, x0), -+ z0 = svdup_s64_m (z1, p0, x0)) -+ -+/* -+** dup_1_s64_z: -+** mov z0\.d, p0/z, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_1_s64_z, svint64_t, -+ z0 = svdup_n_s64_z (p0, 1), -+ z0 = svdup_s64_z (p0, 1)) -+ -+/* -+** dup_127_s64_z: -+** mov z0\.d, p0/z, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_127_s64_z, svint64_t, -+ z0 = svdup_n_s64_z (p0, 127), -+ z0 = svdup_s64_z (p0, 127)) -+ -+/* -+** dup_128_s64_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.d), #128 -+** sel z0\.d, p0, \2, \1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (dup_128_s64_z, svint64_t, -+ z0 = svdup_n_s64_z (p0, 128), -+ z0 = svdup_s64_z (p0, 128)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_129_s64_z, svint64_t, -+ z0 = svdup_n_s64_z (p0, 129), -+ z0 = svdup_s64_z (p0, 129)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_253_s64_z, svint64_t, -+ z0 = svdup_n_s64_z (p0, 253), -+ z0 = svdup_s64_z (p0, 253)) -+ -+/* -+** dup_254_s64_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.d), #254 -+** sel z0\.d, p0, \2, \1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (dup_254_s64_z, svint64_t, -+ z0 = svdup_n_s64_z (p0, 254), -+ z0 = svdup_s64_z (p0, 254)) -+ -+/* -+** dup_255_s64_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.d), #255 -+** sel z0\.d, p0, \2, \1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (dup_255_s64_z, svint64_t, -+ z0 = svdup_n_s64_z (p0, 255), -+ z0 = svdup_s64_z (p0, 255)) -+ -+/* -+** dup_256_s64_z: -+** mov z0\.d, p0/z, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_256_s64_z, svint64_t, -+ z0 = svdup_n_s64_z (p0, 256), -+ z0 = svdup_s64_z (p0, 256)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_257_s64_z, svint64_t, -+ z0 = svdup_n_s64_z (p0, 257), -+ z0 = svdup_s64_z (p0, 257)) -+ -+/* -+** dup_512_s64_z: -+** mov z0\.d, p0/z, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_512_s64_z, svint64_t, -+ z0 = svdup_n_s64_z (p0, 512), -+ z0 = svdup_s64_z (p0, 512)) -+ -+/* -+** dup_7f00_s64_z: -+** mov z0\.d, p0/z, #32512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7f00_s64_z, svint64_t, -+ z0 = svdup_n_s64_z (p0, 0x7f00), -+ z0 = svdup_s64_z (p0, 0x7f00)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_7f01_s64_z, svint64_t, -+ z0 = svdup_n_s64_z (p0, 0x7f01), -+ z0 = svdup_s64_z (p0, 0x7f01)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_7ffd_s64_z, svint64_t, -+ z0 = svdup_n_s64_z (p0, 0x7ffd), -+ z0 = svdup_s64_z (p0, 0x7ffd)) -+ -+/* -+** dup_7ffe_s64_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.d), #32766 -+** sel z0\.d, p0, \2, \1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7ffe_s64_z, svint64_t, -+ z0 = svdup_n_s64_z (p0, 0x7ffe), -+ z0 = svdup_s64_z (p0, 0x7ffe)) -+ -+/* -+** dup_7fff_s64_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.d), #32767 -+** sel z0\.d, p0, \2, \1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7fff_s64_z, svint64_t, -+ z0 = svdup_n_s64_z (p0, 0x7fff), -+ z0 = svdup_s64_z (p0, 0x7fff)) -+ -+/* -+** dup_m1_s64_z: -+** mov z0\.d, p0/z, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m1_s64_z, svint64_t, -+ z0 = svdup_n_s64_z (p0, -1), -+ z0 = svdup_s64_z (p0, -1)) -+ -+/* -+** dup_m128_s64_z: -+** mov z0\.d, p0/z, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m128_s64_z, svint64_t, -+ z0 = svdup_n_s64_z (p0, -128), -+ z0 = svdup_s64_z (p0, -128)) -+ -+/* -+** dup_m129_s64_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.d), #-129 -+** sel z0\.d, p0, \2, \1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m129_s64_z, svint64_t, -+ z0 = svdup_n_s64_z (p0, -129), -+ z0 = svdup_s64_z (p0, -129)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m130_s64_z, svint64_t, -+ z0 = svdup_n_s64_z (p0, -130), -+ z0 = svdup_s64_z (p0, -130)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m254_s64_z, svint64_t, -+ z0 = svdup_n_s64_z (p0, -254), -+ z0 = svdup_s64_z (p0, -254)) -+ -+/* -+** dup_m255_s64_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.d), #-255 -+** sel z0\.d, p0, \2, \1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m255_s64_z, svint64_t, -+ z0 = svdup_n_s64_z (p0, -255), -+ z0 = svdup_s64_z (p0, -255)) -+ -+/* -+** dup_m256_s64_z: -+** mov z0\.d, p0/z, #-256 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m256_s64_z, svint64_t, -+ z0 = svdup_n_s64_z (p0, -256), -+ z0 = svdup_s64_z (p0, -256)) -+ -+/* -+** dup_m257_s64_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.d), #-257 -+** sel z0\.d, p0, \2, \1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m257_s64_z, svint64_t, -+ z0 = svdup_n_s64_z (p0, -257), -+ z0 = svdup_s64_z (p0, -257)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m258_s64_z, svint64_t, -+ z0 = svdup_n_s64_z (p0, -258), -+ z0 = svdup_s64_z (p0, -258)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m259_s64_z, svint64_t, -+ z0 = svdup_n_s64_z (p0, -259), -+ z0 = svdup_s64_z (p0, -259)) -+ -+/* -+** dup_m512_s64_z: -+** mov z0\.d, p0/z, #-512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m512_s64_z, svint64_t, -+ z0 = svdup_n_s64_z (p0, -512), -+ z0 = svdup_s64_z (p0, -512)) -+ -+/* -+** dup_m7f00_s64_z: -+** mov z0\.d, p0/z, #-32512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f00_s64_z, svint64_t, -+ z0 = svdup_n_s64_z (p0, -0x7f00), -+ z0 = svdup_s64_z (p0, -0x7f00)) -+ -+/* -+** dup_m7f01_s64_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.d), #-32513 -+** sel z0\.d, p0, \2, \1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f01_s64_z, svint64_t, -+ z0 = svdup_n_s64_z (p0, -0x7f01), -+ z0 = svdup_s64_z (p0, -0x7f01)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m7f02_s64_z, svint64_t, -+ z0 = svdup_n_s64_z (p0, -0x7f02), -+ z0 = svdup_s64_z (p0, -0x7f02)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m7ffe_s64_z, svint64_t, -+ z0 = svdup_n_s64_z (p0, -0x7ffe), -+ z0 = svdup_s64_z (p0, -0x7ffe)) -+ -+/* -+** dup_m7fff_s64_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.d), #-32767 -+** sel z0\.d, p0, \2, \1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7fff_s64_z, svint64_t, -+ z0 = svdup_n_s64_z (p0, -0x7fff), -+ z0 = svdup_s64_z (p0, -0x7fff)) -+ -+/* -+** dup_m8000_s64_z: -+** mov z0\.d, p0/z, #-32768 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m8000_s64_z, svint64_t, -+ z0 = svdup_n_s64_z (p0, -0x8000), -+ z0 = svdup_s64_z (p0, -0x8000)) -+ -+/* -+** dup_0_s64_z: -+** mov z0\.d, p0/z, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_0_s64_z, svint64_t, -+ z0 = svdup_n_s64_z (p0, 0), -+ z0 = svdup_s64_z (p0, 0)) -+ -+/* -+** dup_x0_s64_z: -+** movprfx z0\.d, p0/z, z0\.d -+** mov z0\.d, p0/m, x0 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_x0_s64_z, svint64_t, int64_t, -+ z0 = svdup_n_s64_z (p0, x0), -+ z0 = svdup_s64_z (p0, x0)) -+ -+/* -+** dup_1_s64_x: -+** mov z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_1_s64_x, svint64_t, -+ z0 = svdup_n_s64_x (p0, 1), -+ z0 = svdup_s64_x (p0, 1)) -+ -+/* -+** dup_127_s64_x: -+** mov z0\.d, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_127_s64_x, svint64_t, -+ z0 = svdup_n_s64_x (p0, 127), -+ z0 = svdup_s64_x (p0, 127)) -+ -+/* -+** dup_128_s64_x: -+** mov z0\.d, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_128_s64_x, svint64_t, -+ z0 = svdup_n_s64_x (p0, 128), -+ z0 = svdup_s64_x (p0, 128)) -+ -+/* -+** dup_129_s64_x: -+** mov (x[0-9]+), 129 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_129_s64_x, svint64_t, -+ z0 = svdup_n_s64_x (p0, 129), -+ z0 = svdup_s64_x (p0, 129)) -+ -+/* -+** dup_253_s64_x: -+** mov (x[0-9]+), 253 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_253_s64_x, svint64_t, -+ z0 = svdup_n_s64_x (p0, 253), -+ z0 = svdup_s64_x (p0, 253)) -+ -+/* -+** dup_254_s64_x: -+** mov z0\.d, #254 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_254_s64_x, svint64_t, -+ z0 = svdup_n_s64_x (p0, 254), -+ z0 = svdup_s64_x (p0, 254)) -+ -+/* -+** dup_255_s64_x: -+** mov z0\.d, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_255_s64_x, svint64_t, -+ z0 = svdup_n_s64_x (p0, 255), -+ z0 = svdup_s64_x (p0, 255)) -+ -+/* -+** dup_256_s64_x: -+** mov z0\.d, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_256_s64_x, svint64_t, -+ z0 = svdup_n_s64_x (p0, 256), -+ z0 = svdup_s64_x (p0, 256)) -+ -+/* -+** dup_257_s64_x: -+** mov (x[0-9]+), 257 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_257_s64_x, svint64_t, -+ z0 = svdup_n_s64_x (p0, 257), -+ z0 = svdup_s64_x (p0, 257)) -+ -+/* -+** dup_512_s64_x: -+** mov z0\.d, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_512_s64_x, svint64_t, -+ z0 = svdup_n_s64_x (p0, 512), -+ z0 = svdup_s64_x (p0, 512)) -+ -+/* -+** dup_7f00_s64_x: -+** mov z0\.d, #32512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7f00_s64_x, svint64_t, -+ z0 = svdup_n_s64_x (p0, 0x7f00), -+ z0 = svdup_s64_x (p0, 0x7f00)) -+ -+/* -+** dup_7f01_s64_x: -+** mov (x[0-9]+), 32513 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7f01_s64_x, svint64_t, -+ z0 = svdup_n_s64_x (p0, 0x7f01), -+ z0 = svdup_s64_x (p0, 0x7f01)) -+ -+/* -+** dup_7ffd_s64_x: -+** mov (x[0-9]+), 32765 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7ffd_s64_x, svint64_t, -+ z0 = svdup_n_s64_x (p0, 0x7ffd), -+ z0 = svdup_s64_x (p0, 0x7ffd)) -+ -+/* -+** dup_7ffe_s64_x: -+** mov z0\.d, #32766 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7ffe_s64_x, svint64_t, -+ z0 = svdup_n_s64_x (p0, 0x7ffe), -+ z0 = svdup_s64_x (p0, 0x7ffe)) -+ -+/* -+** dup_7fff_s64_x: -+** mov z0\.d, #32767 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7fff_s64_x, svint64_t, -+ z0 = svdup_n_s64_x (p0, 0x7fff), -+ z0 = svdup_s64_x (p0, 0x7fff)) -+ -+/* -+** dup_m1_s64_x: -+** mov z0\.b, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m1_s64_x, svint64_t, -+ z0 = svdup_n_s64_x (p0, -1), -+ z0 = svdup_s64_x (p0, -1)) -+ -+/* -+** dup_m128_s64_x: -+** mov z0\.d, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m128_s64_x, svint64_t, -+ z0 = svdup_n_s64_x (p0, -128), -+ z0 = svdup_s64_x (p0, -128)) -+ -+/* -+** dup_m129_s64_x: -+** mov z0\.d, #-129 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m129_s64_x, svint64_t, -+ z0 = svdup_n_s64_x (p0, -129), -+ z0 = svdup_s64_x (p0, -129)) -+ -+/* -+** dup_m130_s64_x: -+** mov (x[0-9]+), -130 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m130_s64_x, svint64_t, -+ z0 = svdup_n_s64_x (p0, -130), -+ z0 = svdup_s64_x (p0, -130)) -+ -+/* -+** dup_m254_s64_x: -+** mov (x[0-9]+), -254 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m254_s64_x, svint64_t, -+ z0 = svdup_n_s64_x (p0, -254), -+ z0 = svdup_s64_x (p0, -254)) -+ -+/* -+** dup_m255_s64_x: -+** mov z0\.d, #-255 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m255_s64_x, svint64_t, -+ z0 = svdup_n_s64_x (p0, -255), -+ z0 = svdup_s64_x (p0, -255)) -+ -+/* -+** dup_m256_s64_x: -+** mov z0\.d, #-256 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m256_s64_x, svint64_t, -+ z0 = svdup_n_s64_x (p0, -256), -+ z0 = svdup_s64_x (p0, -256)) -+ -+/* -+** dup_m257_s64_x: -+** mov z0\.d, #-257 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m257_s64_x, svint64_t, -+ z0 = svdup_n_s64_x (p0, -257), -+ z0 = svdup_s64_x (p0, -257)) -+ -+/* -+** dup_m258_s64_x: -+** mov (x[0-9]+), -258 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m258_s64_x, svint64_t, -+ z0 = svdup_n_s64_x (p0, -258), -+ z0 = svdup_s64_x (p0, -258)) -+ -+/* -+** dup_m259_s64_x: -+** mov (x[0-9]+), -259 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m259_s64_x, svint64_t, -+ z0 = svdup_n_s64_x (p0, -259), -+ z0 = svdup_s64_x (p0, -259)) -+ -+/* -+** dup_m512_s64_x: -+** mov z0\.d, #-512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m512_s64_x, svint64_t, -+ z0 = svdup_n_s64_x (p0, -512), -+ z0 = svdup_s64_x (p0, -512)) -+ -+/* -+** dup_m7f00_s64_x: -+** mov z0\.d, #-32512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f00_s64_x, svint64_t, -+ z0 = svdup_n_s64_x (p0, -0x7f00), -+ z0 = svdup_s64_x (p0, -0x7f00)) -+ -+/* -+** dup_m7f01_s64_x: -+** mov z0\.d, #-32513 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f01_s64_x, svint64_t, -+ z0 = svdup_n_s64_x (p0, -0x7f01), -+ z0 = svdup_s64_x (p0, -0x7f01)) -+ -+/* -+** dup_m7f02_s64_x: -+** mov (x[0-9]+), -32514 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f02_s64_x, svint64_t, -+ z0 = svdup_n_s64_x (p0, -0x7f02), -+ z0 = svdup_s64_x (p0, -0x7f02)) -+ -+/* -+** dup_m7ffe_s64_x: -+** mov (x[0-9]+), -32766 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7ffe_s64_x, svint64_t, -+ z0 = svdup_n_s64_x (p0, -0x7ffe), -+ z0 = svdup_s64_x (p0, -0x7ffe)) -+ -+/* -+** dup_m7fff_s64_x: -+** mov z0\.d, #-32767 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7fff_s64_x, svint64_t, -+ z0 = svdup_n_s64_x (p0, -0x7fff), -+ z0 = svdup_s64_x (p0, -0x7fff)) -+ -+/* -+** dup_m8000_s64_x: -+** mov z0\.d, #-32768 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m8000_s64_x, svint64_t, -+ z0 = svdup_n_s64_x (p0, -0x8000), -+ z0 = svdup_s64_x (p0, -0x8000)) -+ -+/* -+** dup_x0_s64_x: -+** mov z0\.d, x0 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_x0_s64_x, svint64_t, int64_t, -+ z0 = svdup_n_s64_x (p0, x0), -+ z0 = svdup_s64_x (p0, x0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s8.c -new file mode 100644 -index 000000000..96fc5fa64 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_s8.c -@@ -0,0 +1,383 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dup_1_s8: -+** mov z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_1_s8, svint8_t, -+ z0 = svdup_n_s8 (1), -+ z0 = svdup_s8 (1)) -+ -+/* -+** dup_127_s8: -+** mov z0\.b, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_127_s8, svint8_t, -+ z0 = svdup_n_s8 (127), -+ z0 = svdup_s8 (127)) -+ -+/* -+** dup_128_s8: -+** mov z0\.b, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_128_s8, svint8_t, -+ z0 = svdup_n_s8 (128), -+ z0 = svdup_s8 (128)) -+ -+/* -+** dup_129_s8: -+** mov z0\.b, #-127 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_129_s8, svint8_t, -+ z0 = svdup_n_s8 (129), -+ z0 = svdup_s8 (129)) -+ -+/* -+** dup_253_s8: -+** mov z0\.b, #-3 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_253_s8, svint8_t, -+ z0 = svdup_n_s8 (253), -+ z0 = svdup_s8 (253)) -+ -+/* -+** dup_254_s8: -+** mov z0\.b, #-2 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_254_s8, svint8_t, -+ z0 = svdup_n_s8 (254), -+ z0 = svdup_s8 (254)) -+ -+/* -+** dup_255_s8: -+** mov z0\.b, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_255_s8, svint8_t, -+ z0 = svdup_n_s8 (255), -+ z0 = svdup_s8 (255)) -+ -+/* -+** dup_m1_s8: -+** mov z0\.b, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m1_s8, svint8_t, -+ z0 = svdup_n_s8 (-1), -+ z0 = svdup_s8 (-1)) -+ -+/* -+** dup_m128_s8: -+** mov z0\.b, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m128_s8, svint8_t, -+ z0 = svdup_n_s8 (-128), -+ z0 = svdup_s8 (-128)) -+ -+/* -+** dup_w0_s8: -+** mov z0\.b, w0 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_w0_s8, svint8_t, int8_t, -+ z0 = svdup_n_s8 (x0), -+ z0 = svdup_s8 (x0)) -+ -+/* -+** dup_1_s8_m: -+** mov z0\.b, p0/m, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_1_s8_m, svint8_t, -+ z0 = svdup_n_s8_m (z0, p0, 1), -+ z0 = svdup_s8_m (z0, p0, 1)) -+ -+/* -+** dup_127_s8_m: -+** mov z0\.b, p0/m, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_127_s8_m, svint8_t, -+ z0 = svdup_n_s8_m (z0, p0, 127), -+ z0 = svdup_s8_m (z0, p0, 127)) -+ -+/* -+** dup_128_s8_m: -+** mov z0\.b, p0/m, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_128_s8_m, svint8_t, -+ z0 = svdup_n_s8_m (z0, p0, 128), -+ z0 = svdup_s8_m (z0, p0, 128)) -+ -+/* -+** dup_129_s8_m: -+** mov z0\.b, p0/m, #-127 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_129_s8_m, svint8_t, -+ z0 = svdup_n_s8_m (z0, p0, 129), -+ z0 = svdup_s8_m (z0, p0, 129)) -+ -+/* -+** dup_253_s8_m: -+** mov z0\.b, p0/m, #-3 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_253_s8_m, svint8_t, -+ z0 = svdup_n_s8_m (z0, p0, 253), -+ z0 = svdup_s8_m (z0, p0, 253)) -+ -+/* -+** dup_254_s8_m: -+** mov z0\.b, p0/m, #-2 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_254_s8_m, svint8_t, -+ z0 = svdup_n_s8_m (z0, p0, 254), -+ z0 = svdup_s8_m (z0, p0, 254)) -+ -+/* -+** dup_255_s8_m: -+** mov z0\.b, p0/m, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_255_s8_m, svint8_t, -+ z0 = svdup_n_s8_m (z0, p0, 255), -+ z0 = svdup_s8_m (z0, p0, 255)) -+ -+/* -+** dup_m1_s8_m: -+** mov z0\.b, p0/m, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m1_s8_m, svint8_t, -+ z0 = svdup_n_s8_m (z0, p0, -1), -+ z0 = svdup_s8_m (z0, p0, -1)) -+ -+/* -+** dup_m128_s8_m: -+** mov z0\.b, p0/m, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m128_s8_m, svint8_t, -+ z0 = svdup_n_s8_m (z0, p0, -128), -+ z0 = svdup_s8_m (z0, p0, -128)) -+ -+/* -+** dup_0_s8_m: -+** mov z0\.b, p0/m, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_0_s8_m, svint8_t, -+ z0 = svdup_n_s8_m (z0, p0, 0), -+ z0 = svdup_s8_m (z0, p0, 0)) -+ -+/* -+** dup_w0_s8_m: -+** movprfx z0, z1 -+** mov z0\.b, p0/m, w0 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_w0_s8_m, svint8_t, int8_t, -+ z0 = svdup_n_s8_m (z1, p0, x0), -+ z0 = svdup_s8_m (z1, p0, x0)) -+ -+/* -+** dup_1_s8_z: -+** mov z0\.b, p0/z, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_1_s8_z, svint8_t, -+ z0 = svdup_n_s8_z (p0, 1), -+ z0 = svdup_s8_z (p0, 1)) -+ -+/* -+** dup_127_s8_z: -+** mov z0\.b, p0/z, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_127_s8_z, svint8_t, -+ z0 = svdup_n_s8_z (p0, 127), -+ z0 = svdup_s8_z (p0, 127)) -+ -+/* -+** dup_128_s8_z: -+** mov z0\.b, p0/z, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_128_s8_z, svint8_t, -+ z0 = svdup_n_s8_z (p0, 128), -+ z0 = svdup_s8_z (p0, 128)) -+ -+/* -+** dup_129_s8_z: -+** mov z0\.b, p0/z, #-127 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_129_s8_z, svint8_t, -+ z0 = svdup_n_s8_z (p0, 129), -+ z0 = svdup_s8_z (p0, 129)) -+ -+/* -+** dup_253_s8_z: -+** mov z0\.b, p0/z, #-3 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_253_s8_z, svint8_t, -+ z0 = svdup_n_s8_z (p0, 253), -+ z0 = svdup_s8_z (p0, 253)) -+ -+/* -+** dup_254_s8_z: -+** mov z0\.b, p0/z, #-2 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_254_s8_z, svint8_t, -+ z0 = svdup_n_s8_z (p0, 254), -+ z0 = svdup_s8_z (p0, 254)) -+ -+/* -+** dup_255_s8_z: -+** mov z0\.b, p0/z, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_255_s8_z, svint8_t, -+ z0 = svdup_n_s8_z (p0, 255), -+ z0 = svdup_s8_z (p0, 255)) -+ -+/* -+** dup_m1_s8_z: -+** mov z0\.b, p0/z, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m1_s8_z, svint8_t, -+ z0 = svdup_n_s8_z (p0, -1), -+ z0 = svdup_s8_z (p0, -1)) -+ -+/* -+** dup_m128_s8_z: -+** mov z0\.b, p0/z, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m128_s8_z, svint8_t, -+ z0 = svdup_n_s8_z (p0, -128), -+ z0 = svdup_s8_z (p0, -128)) -+ -+/* -+** dup_0_s8_z: -+** mov z0\.b, p0/z, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_0_s8_z, svint8_t, -+ z0 = svdup_n_s8_z (p0, 0), -+ z0 = svdup_s8_z (p0, 0)) -+ -+/* -+** dup_w0_s8_z: -+** movprfx z0\.b, p0/z, z0\.b -+** mov z0\.b, p0/m, w0 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_w0_s8_z, svint8_t, int8_t, -+ z0 = svdup_n_s8_z (p0, x0), -+ z0 = svdup_s8_z (p0, x0)) -+ -+/* -+** dup_1_s8_x: -+** mov z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_1_s8_x, svint8_t, -+ z0 = svdup_n_s8_x (p0, 1), -+ z0 = svdup_s8_x (p0, 1)) -+ -+/* -+** dup_127_s8_x: -+** mov z0\.b, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_127_s8_x, svint8_t, -+ z0 = svdup_n_s8_x (p0, 127), -+ z0 = svdup_s8_x (p0, 127)) -+ -+/* -+** dup_128_s8_x: -+** mov z0\.b, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_128_s8_x, svint8_t, -+ z0 = svdup_n_s8_x (p0, 128), -+ z0 = svdup_s8_x (p0, 128)) -+ -+/* -+** dup_129_s8_x: -+** mov z0\.b, #-127 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_129_s8_x, svint8_t, -+ z0 = svdup_n_s8_x (p0, 129), -+ z0 = svdup_s8_x (p0, 129)) -+ -+/* -+** dup_253_s8_x: -+** mov z0\.b, #-3 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_253_s8_x, svint8_t, -+ z0 = svdup_n_s8_x (p0, 253), -+ z0 = svdup_s8_x (p0, 253)) -+ -+/* -+** dup_254_s8_x: -+** mov z0\.b, #-2 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_254_s8_x, svint8_t, -+ z0 = svdup_n_s8_x (p0, 254), -+ z0 = svdup_s8_x (p0, 254)) -+ -+/* -+** dup_255_s8_x: -+** mov z0\.b, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_255_s8_x, svint8_t, -+ z0 = svdup_n_s8_x (p0, 255), -+ z0 = svdup_s8_x (p0, 255)) -+ -+/* -+** dup_m1_s8_x: -+** mov z0\.b, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m1_s8_x, svint8_t, -+ z0 = svdup_n_s8_x (p0, -1), -+ z0 = svdup_s8_x (p0, -1)) -+ -+/* -+** dup_m128_s8_x: -+** mov z0\.b, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m128_s8_x, svint8_t, -+ z0 = svdup_n_s8_x (p0, -128), -+ z0 = svdup_s8_x (p0, -128)) -+ -+/* -+** dup_w0_s8_x: -+** mov z0\.b, w0 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_w0_s8_x, svint8_t, int8_t, -+ z0 = svdup_n_s8_x (p0, x0), -+ z0 = svdup_s8_x (p0, x0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u16.c -new file mode 100644 -index 000000000..263eafef0 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u16.c -@@ -0,0 +1,1193 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dup_1_u16: -+** mov z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_1_u16, svuint16_t, -+ z0 = svdup_n_u16 (1), -+ z0 = svdup_u16 (1)) -+ -+/* -+** dup_127_u16: -+** mov z0\.h, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_127_u16, svuint16_t, -+ z0 = svdup_n_u16 (127), -+ z0 = svdup_u16 (127)) -+ -+/* -+** dup_128_u16: -+** mov z0\.h, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_128_u16, svuint16_t, -+ z0 = svdup_n_u16 (128), -+ z0 = svdup_u16 (128)) -+ -+/* -+** dup_129_u16: -+** movi v([0-9]+)\.8h, 0x81 -+** dup z0\.q, z\1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_129_u16, svuint16_t, -+ z0 = svdup_n_u16 (129), -+ z0 = svdup_u16 (129)) -+ -+/* -+** dup_253_u16: -+** movi v([0-9]+)\.8h, 0xfd -+** dup z0\.q, z\1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_253_u16, svuint16_t, -+ z0 = svdup_n_u16 (253), -+ z0 = svdup_u16 (253)) -+ -+/* -+** dup_254_u16: -+** mov z0\.h, #254 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_254_u16, svuint16_t, -+ z0 = svdup_n_u16 (254), -+ z0 = svdup_u16 (254)) -+ -+/* -+** dup_255_u16: -+** mov z0\.h, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_255_u16, svuint16_t, -+ z0 = svdup_n_u16 (255), -+ z0 = svdup_u16 (255)) -+ -+/* -+** dup_256_u16: -+** mov z0\.h, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_256_u16, svuint16_t, -+ z0 = svdup_n_u16 (256), -+ z0 = svdup_u16 (256)) -+ -+/* -+** dup_257_u16: -+** mov z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_257_u16, svuint16_t, -+ z0 = svdup_n_u16 (257), -+ z0 = svdup_u16 (257)) -+ -+/* -+** dup_512_u16: -+** mov z0\.h, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_512_u16, svuint16_t, -+ z0 = svdup_n_u16 (512), -+ z0 = svdup_u16 (512)) -+ -+/* -+** dup_7f00_u16: -+** mov z0\.h, #32512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7f00_u16, svuint16_t, -+ z0 = svdup_n_u16 (0x7f00), -+ z0 = svdup_u16 (0x7f00)) -+ -+/* -+** dup_7f01_u16: -+** mov (w[0-9]+), 32513 -+** mov z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7f01_u16, svuint16_t, -+ z0 = svdup_n_u16 (0x7f01), -+ z0 = svdup_u16 (0x7f01)) -+ -+/* -+** dup_7ffd_u16: -+** mov (w[0-9]+), 32765 -+** mov z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7ffd_u16, svuint16_t, -+ z0 = svdup_n_u16 (0x7ffd), -+ z0 = svdup_u16 (0x7ffd)) -+ -+/* -+** dup_7ffe_u16: -+** mov z0\.h, #32766 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7ffe_u16, svuint16_t, -+ z0 = svdup_n_u16 (0x7ffe), -+ z0 = svdup_u16 (0x7ffe)) -+ -+/* -+** dup_7fff_u16: -+** mov z0\.h, #32767 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7fff_u16, svuint16_t, -+ z0 = svdup_n_u16 (0x7fff), -+ z0 = svdup_u16 (0x7fff)) -+ -+/* -+** dup_m1_u16: -+** mov z0\.b, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m1_u16, svuint16_t, -+ z0 = svdup_n_u16 (-1), -+ z0 = svdup_u16 (-1)) -+ -+/* -+** dup_m128_u16: -+** mov z0\.h, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m128_u16, svuint16_t, -+ z0 = svdup_n_u16 (-128), -+ z0 = svdup_u16 (-128)) -+ -+/* -+** dup_m129_u16: -+** mov z0\.h, #-129 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m129_u16, svuint16_t, -+ z0 = svdup_n_u16 (-129), -+ z0 = svdup_u16 (-129)) -+ -+/* -+** dup_m130_u16: -+** mvni v([0-9]+)\.8h, 0x81 -+** dup z0\.q, z\1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m130_u16, svuint16_t, -+ z0 = svdup_n_u16 (-130), -+ z0 = svdup_u16 (-130)) -+ -+/* -+** dup_m254_u16: -+** mvni v([0-9]+)\.8h, 0xfd -+** dup z0\.q, z\1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m254_u16, svuint16_t, -+ z0 = svdup_n_u16 (-254), -+ z0 = svdup_u16 (-254)) -+ -+/* -+** dup_m255_u16: -+** mov z0\.h, #-255 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m255_u16, svuint16_t, -+ z0 = svdup_n_u16 (-255), -+ z0 = svdup_u16 (-255)) -+ -+/* -+** dup_m256_u16: -+** mov z0\.h, #-256 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m256_u16, svuint16_t, -+ z0 = svdup_n_u16 (-256), -+ z0 = svdup_u16 (-256)) -+ -+/* -+** dup_m257_u16: -+** mov z0\.h, #-257 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m257_u16, svuint16_t, -+ z0 = svdup_n_u16 (-257), -+ z0 = svdup_u16 (-257)) -+ -+/* -+** dup_m258_u16: -+** mov z0\.b, #-2 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m258_u16, svuint16_t, -+ z0 = svdup_n_u16 (-258), -+ z0 = svdup_u16 (-258)) -+ -+/* -+** dup_m259_u16: -+** mov (w[0-9]+), -259 -+** mov z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m259_u16, svuint16_t, -+ z0 = svdup_n_u16 (-259), -+ z0 = svdup_u16 (-259)) -+ -+/* -+** dup_m512_u16: -+** mov z0\.h, #-512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m512_u16, svuint16_t, -+ z0 = svdup_n_u16 (-512), -+ z0 = svdup_u16 (-512)) -+ -+/* -+** dup_m7f00_u16: -+** mov z0\.h, #-32512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f00_u16, svuint16_t, -+ z0 = svdup_n_u16 (-0x7f00), -+ z0 = svdup_u16 (-0x7f00)) -+ -+/* -+** dup_m7f01_u16: -+** mov z0\.h, #-32513 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f01_u16, svuint16_t, -+ z0 = svdup_n_u16 (-0x7f01), -+ z0 = svdup_u16 (-0x7f01)) -+ -+/* -+** dup_m7f02_u16: -+** mov (w[0-9]+), -32514 -+** mov z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f02_u16, svuint16_t, -+ z0 = svdup_n_u16 (-0x7f02), -+ z0 = svdup_u16 (-0x7f02)) -+ -+/* -+** dup_m7ffe_u16: -+** mov (w[0-9]+), -32766 -+** mov z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7ffe_u16, svuint16_t, -+ z0 = svdup_n_u16 (-0x7ffe), -+ z0 = svdup_u16 (-0x7ffe)) -+ -+/* -+** dup_m7fff_u16: -+** mov z0\.h, #-32767 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7fff_u16, svuint16_t, -+ z0 = svdup_n_u16 (-0x7fff), -+ z0 = svdup_u16 (-0x7fff)) -+ -+/* -+** dup_m8000_u16: -+** mov z0\.h, #-32768 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m8000_u16, svuint16_t, -+ z0 = svdup_n_u16 (-0x8000), -+ z0 = svdup_u16 (-0x8000)) -+ -+/* -+** dup_w0_u16: -+** mov z0\.h, w0 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_w0_u16, svuint16_t, uint16_t, -+ z0 = svdup_n_u16 (x0), -+ z0 = svdup_u16 (x0)) -+ -+/* -+** dup_1_u16_m: -+** mov z0\.h, p0/m, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_1_u16_m, svuint16_t, -+ z0 = svdup_n_u16_m (z0, p0, 1), -+ z0 = svdup_u16_m (z0, p0, 1)) -+ -+/* -+** dup_127_u16_m: -+** mov z0\.h, p0/m, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_127_u16_m, svuint16_t, -+ z0 = svdup_n_u16_m (z0, p0, 127), -+ z0 = svdup_u16_m (z0, p0, 127)) -+ -+/* -+** dup_128_u16_m: -+** mov (z[0-9]+\.h), #128 -+** sel z0\.h, p0, \1, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (dup_128_u16_m, svuint16_t, -+ z0 = svdup_n_u16_m (z0, p0, 128), -+ z0 = svdup_u16_m (z0, p0, 128)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_129_u16_m, svuint16_t, -+ z0 = svdup_n_u16_m (z0, p0, 129), -+ z0 = svdup_u16_m (z0, p0, 129)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_253_u16_m, svuint16_t, -+ z0 = svdup_n_u16_m (z0, p0, 253), -+ z0 = svdup_u16_m (z0, p0, 253)) -+ -+/* -+** dup_254_u16_m: -+** mov (z[0-9]+\.h), #254 -+** sel z0\.h, p0, \1, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (dup_254_u16_m, svuint16_t, -+ z0 = svdup_n_u16_m (z0, p0, 254), -+ z0 = svdup_u16_m (z0, p0, 254)) -+ -+/* -+** dup_255_u16_m: -+** mov (z[0-9]+\.h), #255 -+** sel z0\.h, p0, \1, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (dup_255_u16_m, svuint16_t, -+ z0 = svdup_n_u16_m (z0, p0, 255), -+ z0 = svdup_u16_m (z0, p0, 255)) -+ -+/* -+** dup_256_u16_m: -+** mov z0\.h, p0/m, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_256_u16_m, svuint16_t, -+ z0 = svdup_n_u16_m (z0, p0, 256), -+ z0 = svdup_u16_m (z0, p0, 256)) -+ -+/* -+** dup_257_u16_m: -+** mov (z[0-9]+)\.b, #1 -+** sel z0\.h, p0, \1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (dup_257_u16_m, svuint16_t, -+ z0 = svdup_n_u16_m (z0, p0, 257), -+ z0 = svdup_u16_m (z0, p0, 257)) -+ -+/* -+** dup_512_u16_m: -+** mov z0\.h, p0/m, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_512_u16_m, svuint16_t, -+ z0 = svdup_n_u16_m (z0, p0, 512), -+ z0 = svdup_u16_m (z0, p0, 512)) -+ -+/* -+** dup_7f00_u16_m: -+** mov z0\.h, p0/m, #32512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7f00_u16_m, svuint16_t, -+ z0 = svdup_n_u16_m (z0, p0, 0x7f00), -+ z0 = svdup_u16_m (z0, p0, 0x7f00)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_7f01_u16_m, svuint16_t, -+ z0 = svdup_n_u16_m (z0, p0, 0x7f01), -+ z0 = svdup_u16_m (z0, p0, 0x7f01)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_7ffd_u16_m, svuint16_t, -+ z0 = svdup_n_u16_m (z0, p0, 0x7ffd), -+ z0 = svdup_u16_m (z0, p0, 0x7ffd)) -+ -+/* -+** dup_7ffe_u16_m: -+** mov (z[0-9]+\.h), #32766 -+** sel z0\.h, p0, \1, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7ffe_u16_m, svuint16_t, -+ z0 = svdup_n_u16_m (z0, p0, 0x7ffe), -+ z0 = svdup_u16_m (z0, p0, 0x7ffe)) -+ -+/* -+** dup_7fff_u16_m: -+** mov (z[0-9]+\.h), #32767 -+** sel z0\.h, p0, \1, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7fff_u16_m, svuint16_t, -+ z0 = svdup_n_u16_m (z0, p0, 0x7fff), -+ z0 = svdup_u16_m (z0, p0, 0x7fff)) -+ -+/* -+** dup_m1_u16_m: -+** mov z0\.h, p0/m, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m1_u16_m, svuint16_t, -+ z0 = svdup_n_u16_m (z0, p0, -1), -+ z0 = svdup_u16_m (z0, p0, -1)) -+ -+/* -+** dup_m128_u16_m: -+** mov z0\.h, p0/m, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m128_u16_m, svuint16_t, -+ z0 = svdup_n_u16_m (z0, p0, -128), -+ z0 = svdup_u16_m (z0, p0, -128)) -+ -+/* -+** dup_m129_u16_m: -+** mov (z[0-9]+\.h), #-129 -+** sel z0\.h, p0, \1, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m129_u16_m, svuint16_t, -+ z0 = svdup_n_u16_m (z0, p0, -129), -+ z0 = svdup_u16_m (z0, p0, -129)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m130_u16_m, svuint16_t, -+ z0 = svdup_n_u16_m (z0, p0, -130), -+ z0 = svdup_u16_m (z0, p0, -130)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m254_u16_m, svuint16_t, -+ z0 = svdup_n_u16_m (z0, p0, -254), -+ z0 = svdup_u16_m (z0, p0, -254)) -+ -+/* -+** dup_m255_u16_m: -+** mov (z[0-9]+\.h), #-255 -+** sel z0\.h, p0, \1, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m255_u16_m, svuint16_t, -+ z0 = svdup_n_u16_m (z0, p0, -255), -+ z0 = svdup_u16_m (z0, p0, -255)) -+ -+/* -+** dup_m256_u16_m: -+** mov z0\.h, p0/m, #-256 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m256_u16_m, svuint16_t, -+ z0 = svdup_n_u16_m (z0, p0, -256), -+ z0 = svdup_u16_m (z0, p0, -256)) -+ -+/* -+** dup_m257_u16_m: -+** mov (z[0-9]+\.h), #-257 -+** sel z0\.h, p0, \1, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m257_u16_m, svuint16_t, -+ z0 = svdup_n_u16_m (z0, p0, -257), -+ z0 = svdup_u16_m (z0, p0, -257)) -+ -+/* -+** dup_m258_u16_m: -+** mov (z[0-9]+)\.b, #-2 -+** sel z0\.h, p0, \1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m258_u16_m, svuint16_t, -+ z0 = svdup_n_u16_m (z0, p0, -258), -+ z0 = svdup_u16_m (z0, p0, -258)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m259_u16_m, svuint16_t, -+ z0 = svdup_n_u16_m (z0, p0, -259), -+ z0 = svdup_u16_m (z0, p0, -259)) -+ -+/* -+** dup_m512_u16_m: -+** mov z0\.h, p0/m, #-512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m512_u16_m, svuint16_t, -+ z0 = svdup_n_u16_m (z0, p0, -512), -+ z0 = svdup_u16_m (z0, p0, -512)) -+ -+/* -+** dup_m7f00_u16_m: -+** mov z0\.h, p0/m, #-32512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f00_u16_m, svuint16_t, -+ z0 = svdup_n_u16_m (z0, p0, -0x7f00), -+ z0 = svdup_u16_m (z0, p0, -0x7f00)) -+ -+/* -+** dup_m7f01_u16_m: -+** mov (z[0-9]+\.h), #-32513 -+** sel z0\.h, p0, \1, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f01_u16_m, svuint16_t, -+ z0 = svdup_n_u16_m (z0, p0, -0x7f01), -+ z0 = svdup_u16_m (z0, p0, -0x7f01)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m7f02_u16_m, svuint16_t, -+ z0 = svdup_n_u16_m (z0, p0, -0x7f02), -+ z0 = svdup_u16_m (z0, p0, -0x7f02)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m7ffe_u16_m, svuint16_t, -+ z0 = svdup_n_u16_m (z0, p0, -0x7ffe), -+ z0 = svdup_u16_m (z0, p0, -0x7ffe)) -+ -+/* -+** dup_m7fff_u16_m: -+** mov (z[0-9]+\.h), #-32767 -+** sel z0\.h, p0, \1, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7fff_u16_m, svuint16_t, -+ z0 = svdup_n_u16_m (z0, p0, -0x7fff), -+ z0 = svdup_u16_m (z0, p0, -0x7fff)) -+ -+/* -+** dup_m8000_u16_m: -+** mov z0\.h, p0/m, #-32768 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m8000_u16_m, svuint16_t, -+ z0 = svdup_n_u16_m (z0, p0, -0x8000), -+ z0 = svdup_u16_m (z0, p0, -0x8000)) -+ -+/* -+** dup_0_u16_m: -+** mov z0\.h, p0/m, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_0_u16_m, svuint16_t, -+ z0 = svdup_n_u16_m (z0, p0, 0), -+ z0 = svdup_u16_m (z0, p0, 0)) -+ -+/* -+** dup_w0_u16_m: -+** movprfx z0, z1 -+** mov z0\.h, p0/m, w0 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_w0_u16_m, svuint16_t, uint16_t, -+ z0 = svdup_n_u16_m (z1, p0, x0), -+ z0 = svdup_u16_m (z1, p0, x0)) -+ -+/* -+** dup_1_u16_z: -+** mov z0\.h, p0/z, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_1_u16_z, svuint16_t, -+ z0 = svdup_n_u16_z (p0, 1), -+ z0 = svdup_u16_z (p0, 1)) -+ -+/* -+** dup_127_u16_z: -+** mov z0\.h, p0/z, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_127_u16_z, svuint16_t, -+ z0 = svdup_n_u16_z (p0, 127), -+ z0 = svdup_u16_z (p0, 127)) -+ -+/* -+** dup_128_u16_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.h), #128 -+** sel z0\.h, p0, \2, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (dup_128_u16_z, svuint16_t, -+ z0 = svdup_n_u16_z (p0, 128), -+ z0 = svdup_u16_z (p0, 128)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_129_u16_z, svuint16_t, -+ z0 = svdup_n_u16_z (p0, 129), -+ z0 = svdup_u16_z (p0, 129)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_253_u16_z, svuint16_t, -+ z0 = svdup_n_u16_z (p0, 253), -+ z0 = svdup_u16_z (p0, 253)) -+ -+/* -+** dup_254_u16_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.h), #254 -+** sel z0\.h, p0, \2, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (dup_254_u16_z, svuint16_t, -+ z0 = svdup_n_u16_z (p0, 254), -+ z0 = svdup_u16_z (p0, 254)) -+ -+/* -+** dup_255_u16_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.h), #255 -+** sel z0\.h, p0, \2, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (dup_255_u16_z, svuint16_t, -+ z0 = svdup_n_u16_z (p0, 255), -+ z0 = svdup_u16_z (p0, 255)) -+ -+/* -+** dup_256_u16_z: -+** mov z0\.h, p0/z, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_256_u16_z, svuint16_t, -+ z0 = svdup_n_u16_z (p0, 256), -+ z0 = svdup_u16_z (p0, 256)) -+ -+/* -+** dup_257_u16_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+)\.b, #1 -+** sel z0\.h, p0, \2\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (dup_257_u16_z, svuint16_t, -+ z0 = svdup_n_u16_z (p0, 257), -+ z0 = svdup_u16_z (p0, 257)) -+ -+/* -+** dup_512_u16_z: -+** mov z0\.h, p0/z, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_512_u16_z, svuint16_t, -+ z0 = svdup_n_u16_z (p0, 512), -+ z0 = svdup_u16_z (p0, 512)) -+ -+/* -+** dup_7f00_u16_z: -+** mov z0\.h, p0/z, #32512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7f00_u16_z, svuint16_t, -+ z0 = svdup_n_u16_z (p0, 0x7f00), -+ z0 = svdup_u16_z (p0, 0x7f00)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_7f01_u16_z, svuint16_t, -+ z0 = svdup_n_u16_z (p0, 0x7f01), -+ z0 = svdup_u16_z (p0, 0x7f01)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_7ffd_u16_z, svuint16_t, -+ z0 = svdup_n_u16_z (p0, 0x7ffd), -+ z0 = svdup_u16_z (p0, 0x7ffd)) -+ -+/* -+** dup_7ffe_u16_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.h), #32766 -+** sel z0\.h, p0, \2, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7ffe_u16_z, svuint16_t, -+ z0 = svdup_n_u16_z (p0, 0x7ffe), -+ z0 = svdup_u16_z (p0, 0x7ffe)) -+ -+/* -+** dup_7fff_u16_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.h), #32767 -+** sel z0\.h, p0, \2, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7fff_u16_z, svuint16_t, -+ z0 = svdup_n_u16_z (p0, 0x7fff), -+ z0 = svdup_u16_z (p0, 0x7fff)) -+ -+/* -+** dup_m1_u16_z: -+** mov z0\.h, p0/z, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m1_u16_z, svuint16_t, -+ z0 = svdup_n_u16_z (p0, -1), -+ z0 = svdup_u16_z (p0, -1)) -+ -+/* -+** dup_m128_u16_z: -+** mov z0\.h, p0/z, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m128_u16_z, svuint16_t, -+ z0 = svdup_n_u16_z (p0, -128), -+ z0 = svdup_u16_z (p0, -128)) -+ -+/* -+** dup_m129_u16_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.h), #-129 -+** sel z0\.h, p0, \2, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m129_u16_z, svuint16_t, -+ z0 = svdup_n_u16_z (p0, -129), -+ z0 = svdup_u16_z (p0, -129)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m130_u16_z, svuint16_t, -+ z0 = svdup_n_u16_z (p0, -130), -+ z0 = svdup_u16_z (p0, -130)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m254_u16_z, svuint16_t, -+ z0 = svdup_n_u16_z (p0, -254), -+ z0 = svdup_u16_z (p0, -254)) -+ -+/* -+** dup_m255_u16_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.h), #-255 -+** sel z0\.h, p0, \2, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m255_u16_z, svuint16_t, -+ z0 = svdup_n_u16_z (p0, -255), -+ z0 = svdup_u16_z (p0, -255)) -+ -+/* -+** dup_m256_u16_z: -+** mov z0\.h, p0/z, #-256 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m256_u16_z, svuint16_t, -+ z0 = svdup_n_u16_z (p0, -256), -+ z0 = svdup_u16_z (p0, -256)) -+ -+/* -+** dup_m257_u16_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.h), #-257 -+** sel z0\.h, p0, \2, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m257_u16_z, svuint16_t, -+ z0 = svdup_n_u16_z (p0, -257), -+ z0 = svdup_u16_z (p0, -257)) -+ -+/* -+** dup_m258_u16_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+)\.b, #-2 -+** sel z0\.h, p0, \2\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m258_u16_z, svuint16_t, -+ z0 = svdup_n_u16_z (p0, -258), -+ z0 = svdup_u16_z (p0, -258)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m259_u16_z, svuint16_t, -+ z0 = svdup_n_u16_z (p0, -259), -+ z0 = svdup_u16_z (p0, -259)) -+ -+/* -+** dup_m512_u16_z: -+** mov z0\.h, p0/z, #-512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m512_u16_z, svuint16_t, -+ z0 = svdup_n_u16_z (p0, -512), -+ z0 = svdup_u16_z (p0, -512)) -+ -+/* -+** dup_m7f00_u16_z: -+** mov z0\.h, p0/z, #-32512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f00_u16_z, svuint16_t, -+ z0 = svdup_n_u16_z (p0, -0x7f00), -+ z0 = svdup_u16_z (p0, -0x7f00)) -+ -+/* -+** dup_m7f01_u16_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.h), #-32513 -+** sel z0\.h, p0, \2, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f01_u16_z, svuint16_t, -+ z0 = svdup_n_u16_z (p0, -0x7f01), -+ z0 = svdup_u16_z (p0, -0x7f01)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m7f02_u16_z, svuint16_t, -+ z0 = svdup_n_u16_z (p0, -0x7f02), -+ z0 = svdup_u16_z (p0, -0x7f02)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m7ffe_u16_z, svuint16_t, -+ z0 = svdup_n_u16_z (p0, -0x7ffe), -+ z0 = svdup_u16_z (p0, -0x7ffe)) -+ -+/* -+** dup_m7fff_u16_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.h), #-32767 -+** sel z0\.h, p0, \2, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7fff_u16_z, svuint16_t, -+ z0 = svdup_n_u16_z (p0, -0x7fff), -+ z0 = svdup_u16_z (p0, -0x7fff)) -+ -+/* -+** dup_m8000_u16_z: -+** mov z0\.h, p0/z, #-32768 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m8000_u16_z, svuint16_t, -+ z0 = svdup_n_u16_z (p0, -0x8000), -+ z0 = svdup_u16_z (p0, -0x8000)) -+ -+/* -+** dup_0_u16_z: -+** mov z0\.h, p0/z, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_0_u16_z, svuint16_t, -+ z0 = svdup_n_u16_z (p0, 0), -+ z0 = svdup_u16_z (p0, 0)) -+ -+/* -+** dup_w0_u16_z: -+** movprfx z0\.h, p0/z, z0\.h -+** mov z0\.h, p0/m, w0 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_w0_u16_z, svuint16_t, uint16_t, -+ z0 = svdup_n_u16_z (p0, x0), -+ z0 = svdup_u16_z (p0, x0)) -+ -+/* -+** dup_1_u16_x: -+** mov z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_1_u16_x, svuint16_t, -+ z0 = svdup_n_u16_x (p0, 1), -+ z0 = svdup_u16_x (p0, 1)) -+ -+/* -+** dup_127_u16_x: -+** mov z0\.h, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_127_u16_x, svuint16_t, -+ z0 = svdup_n_u16_x (p0, 127), -+ z0 = svdup_u16_x (p0, 127)) -+ -+/* -+** dup_128_u16_x: -+** mov z0\.h, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_128_u16_x, svuint16_t, -+ z0 = svdup_n_u16_x (p0, 128), -+ z0 = svdup_u16_x (p0, 128)) -+ -+/* -+** dup_129_u16_x: -+** movi v([0-9]+)\.8h, 0x81 -+** dup z0\.q, z\1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_129_u16_x, svuint16_t, -+ z0 = svdup_n_u16_x (p0, 129), -+ z0 = svdup_u16_x (p0, 129)) -+ -+/* -+** dup_253_u16_x: -+** movi v([0-9]+)\.8h, 0xfd -+** dup z0\.q, z\1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_253_u16_x, svuint16_t, -+ z0 = svdup_n_u16_x (p0, 253), -+ z0 = svdup_u16_x (p0, 253)) -+ -+/* -+** dup_254_u16_x: -+** mov z0\.h, #254 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_254_u16_x, svuint16_t, -+ z0 = svdup_n_u16_x (p0, 254), -+ z0 = svdup_u16_x (p0, 254)) -+ -+/* -+** dup_255_u16_x: -+** mov z0\.h, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_255_u16_x, svuint16_t, -+ z0 = svdup_n_u16_x (p0, 255), -+ z0 = svdup_u16_x (p0, 255)) -+ -+/* -+** dup_256_u16_x: -+** mov z0\.h, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_256_u16_x, svuint16_t, -+ z0 = svdup_n_u16_x (p0, 256), -+ z0 = svdup_u16_x (p0, 256)) -+ -+/* -+** dup_257_u16_x: -+** mov z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_257_u16_x, svuint16_t, -+ z0 = svdup_n_u16_x (p0, 257), -+ z0 = svdup_u16_x (p0, 257)) -+ -+/* -+** dup_512_u16_x: -+** mov z0\.h, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_512_u16_x, svuint16_t, -+ z0 = svdup_n_u16_x (p0, 512), -+ z0 = svdup_u16_x (p0, 512)) -+ -+/* -+** dup_7f00_u16_x: -+** mov z0\.h, #32512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7f00_u16_x, svuint16_t, -+ z0 = svdup_n_u16_x (p0, 0x7f00), -+ z0 = svdup_u16_x (p0, 0x7f00)) -+ -+/* -+** dup_7f01_u16_x: -+** mov (w[0-9]+), 32513 -+** mov z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7f01_u16_x, svuint16_t, -+ z0 = svdup_n_u16_x (p0, 0x7f01), -+ z0 = svdup_u16_x (p0, 0x7f01)) -+ -+/* -+** dup_7ffd_u16_x: -+** mov (w[0-9]+), 32765 -+** mov z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7ffd_u16_x, svuint16_t, -+ z0 = svdup_n_u16_x (p0, 0x7ffd), -+ z0 = svdup_u16_x (p0, 0x7ffd)) -+ -+/* -+** dup_7ffe_u16_x: -+** mov z0\.h, #32766 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7ffe_u16_x, svuint16_t, -+ z0 = svdup_n_u16_x (p0, 0x7ffe), -+ z0 = svdup_u16_x (p0, 0x7ffe)) -+ -+/* -+** dup_7fff_u16_x: -+** mov z0\.h, #32767 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7fff_u16_x, svuint16_t, -+ z0 = svdup_n_u16_x (p0, 0x7fff), -+ z0 = svdup_u16_x (p0, 0x7fff)) -+ -+/* -+** dup_m1_u16_x: -+** mov z0\.b, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m1_u16_x, svuint16_t, -+ z0 = svdup_n_u16_x (p0, -1), -+ z0 = svdup_u16_x (p0, -1)) -+ -+/* -+** dup_m128_u16_x: -+** mov z0\.h, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m128_u16_x, svuint16_t, -+ z0 = svdup_n_u16_x (p0, -128), -+ z0 = svdup_u16_x (p0, -128)) -+ -+/* -+** dup_m129_u16_x: -+** mov z0\.h, #-129 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m129_u16_x, svuint16_t, -+ z0 = svdup_n_u16_x (p0, -129), -+ z0 = svdup_u16_x (p0, -129)) -+ -+/* -+** dup_m130_u16_x: -+** mvni v([0-9]+)\.8h, 0x81 -+** dup z0\.q, z\1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m130_u16_x, svuint16_t, -+ z0 = svdup_n_u16_x (p0, -130), -+ z0 = svdup_u16_x (p0, -130)) -+ -+/* -+** dup_m254_u16_x: -+** mvni v([0-9]+)\.8h, 0xfd -+** dup z0\.q, z\1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m254_u16_x, svuint16_t, -+ z0 = svdup_n_u16_x (p0, -254), -+ z0 = svdup_u16_x (p0, -254)) -+ -+/* -+** dup_m255_u16_x: -+** mov z0\.h, #-255 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m255_u16_x, svuint16_t, -+ z0 = svdup_n_u16_x (p0, -255), -+ z0 = svdup_u16_x (p0, -255)) -+ -+/* -+** dup_m256_u16_x: -+** mov z0\.h, #-256 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m256_u16_x, svuint16_t, -+ z0 = svdup_n_u16_x (p0, -256), -+ z0 = svdup_u16_x (p0, -256)) -+ -+/* -+** dup_m257_u16_x: -+** mov z0\.h, #-257 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m257_u16_x, svuint16_t, -+ z0 = svdup_n_u16_x (p0, -257), -+ z0 = svdup_u16_x (p0, -257)) -+ -+/* -+** dup_m258_u16_x: -+** mov z0\.b, #-2 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m258_u16_x, svuint16_t, -+ z0 = svdup_n_u16_x (p0, -258), -+ z0 = svdup_u16_x (p0, -258)) -+ -+/* -+** dup_m259_u16_x: -+** mov (w[0-9]+), -259 -+** mov z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m259_u16_x, svuint16_t, -+ z0 = svdup_n_u16_x (p0, -259), -+ z0 = svdup_u16_x (p0, -259)) -+ -+/* -+** dup_m512_u16_x: -+** mov z0\.h, #-512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m512_u16_x, svuint16_t, -+ z0 = svdup_n_u16_x (p0, -512), -+ z0 = svdup_u16_x (p0, -512)) -+ -+/* -+** dup_m7f00_u16_x: -+** mov z0\.h, #-32512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f00_u16_x, svuint16_t, -+ z0 = svdup_n_u16_x (p0, -0x7f00), -+ z0 = svdup_u16_x (p0, -0x7f00)) -+ -+/* -+** dup_m7f01_u16_x: -+** mov z0\.h, #-32513 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f01_u16_x, svuint16_t, -+ z0 = svdup_n_u16_x (p0, -0x7f01), -+ z0 = svdup_u16_x (p0, -0x7f01)) -+ -+/* -+** dup_m7f02_u16_x: -+** mov (w[0-9]+), -32514 -+** mov z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f02_u16_x, svuint16_t, -+ z0 = svdup_n_u16_x (p0, -0x7f02), -+ z0 = svdup_u16_x (p0, -0x7f02)) -+ -+/* -+** dup_m7ffe_u16_x: -+** mov (w[0-9]+), -32766 -+** mov z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7ffe_u16_x, svuint16_t, -+ z0 = svdup_n_u16_x (p0, -0x7ffe), -+ z0 = svdup_u16_x (p0, -0x7ffe)) -+ -+/* -+** dup_m7fff_u16_x: -+** mov z0\.h, #-32767 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7fff_u16_x, svuint16_t, -+ z0 = svdup_n_u16_x (p0, -0x7fff), -+ z0 = svdup_u16_x (p0, -0x7fff)) -+ -+/* -+** dup_m8000_u16_x: -+** mov z0\.h, #-32768 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m8000_u16_x, svuint16_t, -+ z0 = svdup_n_u16_x (p0, -0x8000), -+ z0 = svdup_u16_x (p0, -0x8000)) -+ -+/* -+** dup_w0_u16_x: -+** mov z0\.h, w0 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_w0_u16_x, svuint16_t, uint16_t, -+ z0 = svdup_n_u16_x (p0, x0), -+ z0 = svdup_u16_x (p0, x0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u32.c -new file mode 100644 -index 000000000..667feea64 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u32.c -@@ -0,0 +1,1175 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dup_1_u32: -+** mov z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_1_u32, svuint32_t, -+ z0 = svdup_n_u32 (1), -+ z0 = svdup_u32 (1)) -+ -+/* -+** dup_127_u32: -+** mov z0\.s, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_127_u32, svuint32_t, -+ z0 = svdup_n_u32 (127), -+ z0 = svdup_u32 (127)) -+ -+/* -+** dup_128_u32: -+** mov z0\.s, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_128_u32, svuint32_t, -+ z0 = svdup_n_u32 (128), -+ z0 = svdup_u32 (128)) -+ -+/* -+** dup_129_u32: -+** movi v([0-9]+)\.4s, 0x81 -+** dup z0\.q, z\1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_129_u32, svuint32_t, -+ z0 = svdup_n_u32 (129), -+ z0 = svdup_u32 (129)) -+ -+/* -+** dup_253_u32: -+** movi v([0-9]+)\.4s, 0xfd -+** dup z0\.q, z\1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_253_u32, svuint32_t, -+ z0 = svdup_n_u32 (253), -+ z0 = svdup_u32 (253)) -+ -+/* -+** dup_254_u32: -+** mov z0\.s, #254 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_254_u32, svuint32_t, -+ z0 = svdup_n_u32 (254), -+ z0 = svdup_u32 (254)) -+ -+/* -+** dup_255_u32: -+** mov z0\.s, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_255_u32, svuint32_t, -+ z0 = svdup_n_u32 (255), -+ z0 = svdup_u32 (255)) -+ -+/* -+** dup_256_u32: -+** mov z0\.s, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_256_u32, svuint32_t, -+ z0 = svdup_n_u32 (256), -+ z0 = svdup_u32 (256)) -+ -+/* -+** dup_257_u32: -+** mov (w[0-9]+), 257 -+** mov z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_257_u32, svuint32_t, -+ z0 = svdup_n_u32 (257), -+ z0 = svdup_u32 (257)) -+ -+/* -+** dup_512_u32: -+** mov z0\.s, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_512_u32, svuint32_t, -+ z0 = svdup_n_u32 (512), -+ z0 = svdup_u32 (512)) -+ -+/* -+** dup_7f00_u32: -+** mov z0\.s, #32512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7f00_u32, svuint32_t, -+ z0 = svdup_n_u32 (0x7f00), -+ z0 = svdup_u32 (0x7f00)) -+ -+/* -+** dup_7f01_u32: -+** mov (w[0-9]+), 32513 -+** mov z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7f01_u32, svuint32_t, -+ z0 = svdup_n_u32 (0x7f01), -+ z0 = svdup_u32 (0x7f01)) -+ -+/* -+** dup_7ffd_u32: -+** mov (w[0-9]+), 32765 -+** mov z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7ffd_u32, svuint32_t, -+ z0 = svdup_n_u32 (0x7ffd), -+ z0 = svdup_u32 (0x7ffd)) -+ -+/* -+** dup_7ffe_u32: -+** mov z0\.s, #32766 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7ffe_u32, svuint32_t, -+ z0 = svdup_n_u32 (0x7ffe), -+ z0 = svdup_u32 (0x7ffe)) -+ -+/* -+** dup_7fff_u32: -+** mov z0\.s, #32767 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7fff_u32, svuint32_t, -+ z0 = svdup_n_u32 (0x7fff), -+ z0 = svdup_u32 (0x7fff)) -+ -+/* -+** dup_m1_u32: -+** mov z0\.b, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m1_u32, svuint32_t, -+ z0 = svdup_n_u32 (-1), -+ z0 = svdup_u32 (-1)) -+ -+/* -+** dup_m128_u32: -+** mov z0\.s, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m128_u32, svuint32_t, -+ z0 = svdup_n_u32 (-128), -+ z0 = svdup_u32 (-128)) -+ -+/* -+** dup_m129_u32: -+** mov z0\.s, #-129 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m129_u32, svuint32_t, -+ z0 = svdup_n_u32 (-129), -+ z0 = svdup_u32 (-129)) -+ -+/* -+** dup_m130_u32: -+** mvni v([0-9]+)\.4s, 0x81 -+** dup z0\.q, z\1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m130_u32, svuint32_t, -+ z0 = svdup_n_u32 (-130), -+ z0 = svdup_u32 (-130)) -+ -+/* -+** dup_m254_u32: -+** mvni v([0-9]+)\.4s, 0xfd -+** dup z0\.q, z\1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m254_u32, svuint32_t, -+ z0 = svdup_n_u32 (-254), -+ z0 = svdup_u32 (-254)) -+ -+/* -+** dup_m255_u32: -+** mov z0\.s, #-255 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m255_u32, svuint32_t, -+ z0 = svdup_n_u32 (-255), -+ z0 = svdup_u32 (-255)) -+ -+/* -+** dup_m256_u32: -+** mov z0\.s, #-256 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m256_u32, svuint32_t, -+ z0 = svdup_n_u32 (-256), -+ z0 = svdup_u32 (-256)) -+ -+/* -+** dup_m257_u32: -+** mov z0\.s, #-257 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m257_u32, svuint32_t, -+ z0 = svdup_n_u32 (-257), -+ z0 = svdup_u32 (-257)) -+ -+/* -+** dup_m258_u32: -+** mov (w[0-9]+), -258 -+** mov z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m258_u32, svuint32_t, -+ z0 = svdup_n_u32 (-258), -+ z0 = svdup_u32 (-258)) -+ -+/* -+** dup_m259_u32: -+** mov (w[0-9]+), -259 -+** mov z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m259_u32, svuint32_t, -+ z0 = svdup_n_u32 (-259), -+ z0 = svdup_u32 (-259)) -+ -+/* -+** dup_m512_u32: -+** mov z0\.s, #-512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m512_u32, svuint32_t, -+ z0 = svdup_n_u32 (-512), -+ z0 = svdup_u32 (-512)) -+ -+/* -+** dup_m7f00_u32: -+** mov z0\.s, #-32512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f00_u32, svuint32_t, -+ z0 = svdup_n_u32 (-0x7f00), -+ z0 = svdup_u32 (-0x7f00)) -+ -+/* -+** dup_m7f01_u32: -+** mov z0\.s, #-32513 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f01_u32, svuint32_t, -+ z0 = svdup_n_u32 (-0x7f01), -+ z0 = svdup_u32 (-0x7f01)) -+ -+/* -+** dup_m7f02_u32: -+** mov (w[0-9]+), -32514 -+** mov z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f02_u32, svuint32_t, -+ z0 = svdup_n_u32 (-0x7f02), -+ z0 = svdup_u32 (-0x7f02)) -+ -+/* -+** dup_m7ffe_u32: -+** mov (w[0-9]+), -32766 -+** mov z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7ffe_u32, svuint32_t, -+ z0 = svdup_n_u32 (-0x7ffe), -+ z0 = svdup_u32 (-0x7ffe)) -+ -+/* -+** dup_m7fff_u32: -+** mov z0\.s, #-32767 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7fff_u32, svuint32_t, -+ z0 = svdup_n_u32 (-0x7fff), -+ z0 = svdup_u32 (-0x7fff)) -+ -+/* -+** dup_m8000_u32: -+** mov z0\.s, #-32768 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m8000_u32, svuint32_t, -+ z0 = svdup_n_u32 (-0x8000), -+ z0 = svdup_u32 (-0x8000)) -+ -+/* -+** dup_w0_u32: -+** mov z0\.s, w0 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_w0_u32, svuint32_t, uint32_t, -+ z0 = svdup_n_u32 (x0), -+ z0 = svdup_u32 (x0)) -+ -+/* -+** dup_1_u32_m: -+** mov z0\.s, p0/m, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_1_u32_m, svuint32_t, -+ z0 = svdup_n_u32_m (z0, p0, 1), -+ z0 = svdup_u32_m (z0, p0, 1)) -+ -+/* -+** dup_127_u32_m: -+** mov z0\.s, p0/m, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_127_u32_m, svuint32_t, -+ z0 = svdup_n_u32_m (z0, p0, 127), -+ z0 = svdup_u32_m (z0, p0, 127)) -+ -+/* -+** dup_128_u32_m: -+** mov (z[0-9]+\.s), #128 -+** sel z0\.s, p0, \1, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (dup_128_u32_m, svuint32_t, -+ z0 = svdup_n_u32_m (z0, p0, 128), -+ z0 = svdup_u32_m (z0, p0, 128)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_129_u32_m, svuint32_t, -+ z0 = svdup_n_u32_m (z0, p0, 129), -+ z0 = svdup_u32_m (z0, p0, 129)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_253_u32_m, svuint32_t, -+ z0 = svdup_n_u32_m (z0, p0, 253), -+ z0 = svdup_u32_m (z0, p0, 253)) -+ -+/* -+** dup_254_u32_m: -+** mov (z[0-9]+\.s), #254 -+** sel z0\.s, p0, \1, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (dup_254_u32_m, svuint32_t, -+ z0 = svdup_n_u32_m (z0, p0, 254), -+ z0 = svdup_u32_m (z0, p0, 254)) -+ -+/* -+** dup_255_u32_m: -+** mov (z[0-9]+\.s), #255 -+** sel z0\.s, p0, \1, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (dup_255_u32_m, svuint32_t, -+ z0 = svdup_n_u32_m (z0, p0, 255), -+ z0 = svdup_u32_m (z0, p0, 255)) -+ -+/* -+** dup_256_u32_m: -+** mov z0\.s, p0/m, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_256_u32_m, svuint32_t, -+ z0 = svdup_n_u32_m (z0, p0, 256), -+ z0 = svdup_u32_m (z0, p0, 256)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_257_u32_m, svuint32_t, -+ z0 = svdup_n_u32_m (z0, p0, 257), -+ z0 = svdup_u32_m (z0, p0, 257)) -+ -+/* -+** dup_512_u32_m: -+** mov z0\.s, p0/m, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_512_u32_m, svuint32_t, -+ z0 = svdup_n_u32_m (z0, p0, 512), -+ z0 = svdup_u32_m (z0, p0, 512)) -+ -+/* -+** dup_7f00_u32_m: -+** mov z0\.s, p0/m, #32512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7f00_u32_m, svuint32_t, -+ z0 = svdup_n_u32_m (z0, p0, 0x7f00), -+ z0 = svdup_u32_m (z0, p0, 0x7f00)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_7f01_u32_m, svuint32_t, -+ z0 = svdup_n_u32_m (z0, p0, 0x7f01), -+ z0 = svdup_u32_m (z0, p0, 0x7f01)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_7ffd_u32_m, svuint32_t, -+ z0 = svdup_n_u32_m (z0, p0, 0x7ffd), -+ z0 = svdup_u32_m (z0, p0, 0x7ffd)) -+ -+/* -+** dup_7ffe_u32_m: -+** mov (z[0-9]+\.s), #32766 -+** sel z0\.s, p0, \1, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7ffe_u32_m, svuint32_t, -+ z0 = svdup_n_u32_m (z0, p0, 0x7ffe), -+ z0 = svdup_u32_m (z0, p0, 0x7ffe)) -+ -+/* -+** dup_7fff_u32_m: -+** mov (z[0-9]+\.s), #32767 -+** sel z0\.s, p0, \1, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7fff_u32_m, svuint32_t, -+ z0 = svdup_n_u32_m (z0, p0, 0x7fff), -+ z0 = svdup_u32_m (z0, p0, 0x7fff)) -+ -+/* -+** dup_m1_u32_m: -+** mov z0\.s, p0/m, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m1_u32_m, svuint32_t, -+ z0 = svdup_n_u32_m (z0, p0, -1), -+ z0 = svdup_u32_m (z0, p0, -1)) -+ -+/* -+** dup_m128_u32_m: -+** mov z0\.s, p0/m, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m128_u32_m, svuint32_t, -+ z0 = svdup_n_u32_m (z0, p0, -128), -+ z0 = svdup_u32_m (z0, p0, -128)) -+ -+/* -+** dup_m129_u32_m: -+** mov (z[0-9]+\.s), #-129 -+** sel z0\.s, p0, \1, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m129_u32_m, svuint32_t, -+ z0 = svdup_n_u32_m (z0, p0, -129), -+ z0 = svdup_u32_m (z0, p0, -129)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m130_u32_m, svuint32_t, -+ z0 = svdup_n_u32_m (z0, p0, -130), -+ z0 = svdup_u32_m (z0, p0, -130)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m254_u32_m, svuint32_t, -+ z0 = svdup_n_u32_m (z0, p0, -254), -+ z0 = svdup_u32_m (z0, p0, -254)) -+ -+/* -+** dup_m255_u32_m: -+** mov (z[0-9]+\.s), #-255 -+** sel z0\.s, p0, \1, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m255_u32_m, svuint32_t, -+ z0 = svdup_n_u32_m (z0, p0, -255), -+ z0 = svdup_u32_m (z0, p0, -255)) -+ -+/* -+** dup_m256_u32_m: -+** mov z0\.s, p0/m, #-256 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m256_u32_m, svuint32_t, -+ z0 = svdup_n_u32_m (z0, p0, -256), -+ z0 = svdup_u32_m (z0, p0, -256)) -+ -+/* -+** dup_m257_u32_m: -+** mov (z[0-9]+\.s), #-257 -+** sel z0\.s, p0, \1, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m257_u32_m, svuint32_t, -+ z0 = svdup_n_u32_m (z0, p0, -257), -+ z0 = svdup_u32_m (z0, p0, -257)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m258_u32_m, svuint32_t, -+ z0 = svdup_n_u32_m (z0, p0, -258), -+ z0 = svdup_u32_m (z0, p0, -258)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m259_u32_m, svuint32_t, -+ z0 = svdup_n_u32_m (z0, p0, -259), -+ z0 = svdup_u32_m (z0, p0, -259)) -+ -+/* -+** dup_m512_u32_m: -+** mov z0\.s, p0/m, #-512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m512_u32_m, svuint32_t, -+ z0 = svdup_n_u32_m (z0, p0, -512), -+ z0 = svdup_u32_m (z0, p0, -512)) -+ -+/* -+** dup_m7f00_u32_m: -+** mov z0\.s, p0/m, #-32512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f00_u32_m, svuint32_t, -+ z0 = svdup_n_u32_m (z0, p0, -0x7f00), -+ z0 = svdup_u32_m (z0, p0, -0x7f00)) -+ -+/* -+** dup_m7f01_u32_m: -+** mov (z[0-9]+\.s), #-32513 -+** sel z0\.s, p0, \1, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f01_u32_m, svuint32_t, -+ z0 = svdup_n_u32_m (z0, p0, -0x7f01), -+ z0 = svdup_u32_m (z0, p0, -0x7f01)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m7f02_u32_m, svuint32_t, -+ z0 = svdup_n_u32_m (z0, p0, -0x7f02), -+ z0 = svdup_u32_m (z0, p0, -0x7f02)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m7ffe_u32_m, svuint32_t, -+ z0 = svdup_n_u32_m (z0, p0, -0x7ffe), -+ z0 = svdup_u32_m (z0, p0, -0x7ffe)) -+ -+/* -+** dup_m7fff_u32_m: -+** mov (z[0-9]+\.s), #-32767 -+** sel z0\.s, p0, \1, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7fff_u32_m, svuint32_t, -+ z0 = svdup_n_u32_m (z0, p0, -0x7fff), -+ z0 = svdup_u32_m (z0, p0, -0x7fff)) -+ -+/* -+** dup_m8000_u32_m: -+** mov z0\.s, p0/m, #-32768 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m8000_u32_m, svuint32_t, -+ z0 = svdup_n_u32_m (z0, p0, -0x8000), -+ z0 = svdup_u32_m (z0, p0, -0x8000)) -+ -+/* -+** dup_0_u32_m: -+** mov z0\.s, p0/m, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_0_u32_m, svuint32_t, -+ z0 = svdup_n_u32_m (z0, p0, 0), -+ z0 = svdup_u32_m (z0, p0, 0)) -+ -+/* -+** dup_w0_u32_m: -+** movprfx z0, z1 -+** mov z0\.s, p0/m, w0 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_w0_u32_m, svuint32_t, uint32_t, -+ z0 = svdup_n_u32_m (z1, p0, x0), -+ z0 = svdup_u32_m (z1, p0, x0)) -+ -+/* -+** dup_1_u32_z: -+** mov z0\.s, p0/z, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_1_u32_z, svuint32_t, -+ z0 = svdup_n_u32_z (p0, 1), -+ z0 = svdup_u32_z (p0, 1)) -+ -+/* -+** dup_127_u32_z: -+** mov z0\.s, p0/z, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_127_u32_z, svuint32_t, -+ z0 = svdup_n_u32_z (p0, 127), -+ z0 = svdup_u32_z (p0, 127)) -+ -+/* -+** dup_128_u32_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.s), #128 -+** sel z0\.s, p0, \2, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (dup_128_u32_z, svuint32_t, -+ z0 = svdup_n_u32_z (p0, 128), -+ z0 = svdup_u32_z (p0, 128)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_129_u32_z, svuint32_t, -+ z0 = svdup_n_u32_z (p0, 129), -+ z0 = svdup_u32_z (p0, 129)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_253_u32_z, svuint32_t, -+ z0 = svdup_n_u32_z (p0, 253), -+ z0 = svdup_u32_z (p0, 253)) -+ -+/* -+** dup_254_u32_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.s), #254 -+** sel z0\.s, p0, \2, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (dup_254_u32_z, svuint32_t, -+ z0 = svdup_n_u32_z (p0, 254), -+ z0 = svdup_u32_z (p0, 254)) -+ -+/* -+** dup_255_u32_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.s), #255 -+** sel z0\.s, p0, \2, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (dup_255_u32_z, svuint32_t, -+ z0 = svdup_n_u32_z (p0, 255), -+ z0 = svdup_u32_z (p0, 255)) -+ -+/* -+** dup_256_u32_z: -+** mov z0\.s, p0/z, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_256_u32_z, svuint32_t, -+ z0 = svdup_n_u32_z (p0, 256), -+ z0 = svdup_u32_z (p0, 256)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_257_u32_z, svuint32_t, -+ z0 = svdup_n_u32_z (p0, 257), -+ z0 = svdup_u32_z (p0, 257)) -+ -+/* -+** dup_512_u32_z: -+** mov z0\.s, p0/z, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_512_u32_z, svuint32_t, -+ z0 = svdup_n_u32_z (p0, 512), -+ z0 = svdup_u32_z (p0, 512)) -+ -+/* -+** dup_7f00_u32_z: -+** mov z0\.s, p0/z, #32512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7f00_u32_z, svuint32_t, -+ z0 = svdup_n_u32_z (p0, 0x7f00), -+ z0 = svdup_u32_z (p0, 0x7f00)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_7f01_u32_z, svuint32_t, -+ z0 = svdup_n_u32_z (p0, 0x7f01), -+ z0 = svdup_u32_z (p0, 0x7f01)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_7ffd_u32_z, svuint32_t, -+ z0 = svdup_n_u32_z (p0, 0x7ffd), -+ z0 = svdup_u32_z (p0, 0x7ffd)) -+ -+/* -+** dup_7ffe_u32_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.s), #32766 -+** sel z0\.s, p0, \2, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7ffe_u32_z, svuint32_t, -+ z0 = svdup_n_u32_z (p0, 0x7ffe), -+ z0 = svdup_u32_z (p0, 0x7ffe)) -+ -+/* -+** dup_7fff_u32_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.s), #32767 -+** sel z0\.s, p0, \2, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7fff_u32_z, svuint32_t, -+ z0 = svdup_n_u32_z (p0, 0x7fff), -+ z0 = svdup_u32_z (p0, 0x7fff)) -+ -+/* -+** dup_m1_u32_z: -+** mov z0\.s, p0/z, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m1_u32_z, svuint32_t, -+ z0 = svdup_n_u32_z (p0, -1), -+ z0 = svdup_u32_z (p0, -1)) -+ -+/* -+** dup_m128_u32_z: -+** mov z0\.s, p0/z, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m128_u32_z, svuint32_t, -+ z0 = svdup_n_u32_z (p0, -128), -+ z0 = svdup_u32_z (p0, -128)) -+ -+/* -+** dup_m129_u32_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.s), #-129 -+** sel z0\.s, p0, \2, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m129_u32_z, svuint32_t, -+ z0 = svdup_n_u32_z (p0, -129), -+ z0 = svdup_u32_z (p0, -129)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m130_u32_z, svuint32_t, -+ z0 = svdup_n_u32_z (p0, -130), -+ z0 = svdup_u32_z (p0, -130)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m254_u32_z, svuint32_t, -+ z0 = svdup_n_u32_z (p0, -254), -+ z0 = svdup_u32_z (p0, -254)) -+ -+/* -+** dup_m255_u32_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.s), #-255 -+** sel z0\.s, p0, \2, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m255_u32_z, svuint32_t, -+ z0 = svdup_n_u32_z (p0, -255), -+ z0 = svdup_u32_z (p0, -255)) -+ -+/* -+** dup_m256_u32_z: -+** mov z0\.s, p0/z, #-256 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m256_u32_z, svuint32_t, -+ z0 = svdup_n_u32_z (p0, -256), -+ z0 = svdup_u32_z (p0, -256)) -+ -+/* -+** dup_m257_u32_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.s), #-257 -+** sel z0\.s, p0, \2, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m257_u32_z, svuint32_t, -+ z0 = svdup_n_u32_z (p0, -257), -+ z0 = svdup_u32_z (p0, -257)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m258_u32_z, svuint32_t, -+ z0 = svdup_n_u32_z (p0, -258), -+ z0 = svdup_u32_z (p0, -258)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m259_u32_z, svuint32_t, -+ z0 = svdup_n_u32_z (p0, -259), -+ z0 = svdup_u32_z (p0, -259)) -+ -+/* -+** dup_m512_u32_z: -+** mov z0\.s, p0/z, #-512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m512_u32_z, svuint32_t, -+ z0 = svdup_n_u32_z (p0, -512), -+ z0 = svdup_u32_z (p0, -512)) -+ -+/* -+** dup_m7f00_u32_z: -+** mov z0\.s, p0/z, #-32512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f00_u32_z, svuint32_t, -+ z0 = svdup_n_u32_z (p0, -0x7f00), -+ z0 = svdup_u32_z (p0, -0x7f00)) -+ -+/* -+** dup_m7f01_u32_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.s), #-32513 -+** sel z0\.s, p0, \2, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f01_u32_z, svuint32_t, -+ z0 = svdup_n_u32_z (p0, -0x7f01), -+ z0 = svdup_u32_z (p0, -0x7f01)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m7f02_u32_z, svuint32_t, -+ z0 = svdup_n_u32_z (p0, -0x7f02), -+ z0 = svdup_u32_z (p0, -0x7f02)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m7ffe_u32_z, svuint32_t, -+ z0 = svdup_n_u32_z (p0, -0x7ffe), -+ z0 = svdup_u32_z (p0, -0x7ffe)) -+ -+/* -+** dup_m7fff_u32_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.s), #-32767 -+** sel z0\.s, p0, \2, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7fff_u32_z, svuint32_t, -+ z0 = svdup_n_u32_z (p0, -0x7fff), -+ z0 = svdup_u32_z (p0, -0x7fff)) -+ -+/* -+** dup_m8000_u32_z: -+** mov z0\.s, p0/z, #-32768 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m8000_u32_z, svuint32_t, -+ z0 = svdup_n_u32_z (p0, -0x8000), -+ z0 = svdup_u32_z (p0, -0x8000)) -+ -+/* -+** dup_0_u32_z: -+** mov z0\.s, p0/z, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_0_u32_z, svuint32_t, -+ z0 = svdup_n_u32_z (p0, 0), -+ z0 = svdup_u32_z (p0, 0)) -+ -+/* -+** dup_w0_u32_z: -+** movprfx z0\.s, p0/z, z0\.s -+** mov z0\.s, p0/m, w0 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_w0_u32_z, svuint32_t, uint32_t, -+ z0 = svdup_n_u32_z (p0, x0), -+ z0 = svdup_u32_z (p0, x0)) -+ -+/* -+** dup_1_u32_x: -+** mov z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_1_u32_x, svuint32_t, -+ z0 = svdup_n_u32_x (p0, 1), -+ z0 = svdup_u32_x (p0, 1)) -+ -+/* -+** dup_127_u32_x: -+** mov z0\.s, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_127_u32_x, svuint32_t, -+ z0 = svdup_n_u32_x (p0, 127), -+ z0 = svdup_u32_x (p0, 127)) -+ -+/* -+** dup_128_u32_x: -+** mov z0\.s, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_128_u32_x, svuint32_t, -+ z0 = svdup_n_u32_x (p0, 128), -+ z0 = svdup_u32_x (p0, 128)) -+ -+/* -+** dup_129_u32_x: -+** movi v([0-9]+)\.4s, 0x81 -+** dup z0\.q, z\1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_129_u32_x, svuint32_t, -+ z0 = svdup_n_u32_x (p0, 129), -+ z0 = svdup_u32_x (p0, 129)) -+ -+/* -+** dup_253_u32_x: -+** movi v([0-9]+)\.4s, 0xfd -+** dup z0\.q, z\1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_253_u32_x, svuint32_t, -+ z0 = svdup_n_u32_x (p0, 253), -+ z0 = svdup_u32_x (p0, 253)) -+ -+/* -+** dup_254_u32_x: -+** mov z0\.s, #254 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_254_u32_x, svuint32_t, -+ z0 = svdup_n_u32_x (p0, 254), -+ z0 = svdup_u32_x (p0, 254)) -+ -+/* -+** dup_255_u32_x: -+** mov z0\.s, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_255_u32_x, svuint32_t, -+ z0 = svdup_n_u32_x (p0, 255), -+ z0 = svdup_u32_x (p0, 255)) -+ -+/* -+** dup_256_u32_x: -+** mov z0\.s, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_256_u32_x, svuint32_t, -+ z0 = svdup_n_u32_x (p0, 256), -+ z0 = svdup_u32_x (p0, 256)) -+ -+/* -+** dup_257_u32_x: -+** mov (w[0-9]+), 257 -+** mov z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_257_u32_x, svuint32_t, -+ z0 = svdup_n_u32_x (p0, 257), -+ z0 = svdup_u32_x (p0, 257)) -+ -+/* -+** dup_512_u32_x: -+** mov z0\.s, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_512_u32_x, svuint32_t, -+ z0 = svdup_n_u32_x (p0, 512), -+ z0 = svdup_u32_x (p0, 512)) -+ -+/* -+** dup_7f00_u32_x: -+** mov z0\.s, #32512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7f00_u32_x, svuint32_t, -+ z0 = svdup_n_u32_x (p0, 0x7f00), -+ z0 = svdup_u32_x (p0, 0x7f00)) -+ -+/* -+** dup_7f01_u32_x: -+** mov (w[0-9]+), 32513 -+** mov z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7f01_u32_x, svuint32_t, -+ z0 = svdup_n_u32_x (p0, 0x7f01), -+ z0 = svdup_u32_x (p0, 0x7f01)) -+ -+/* -+** dup_7ffd_u32_x: -+** mov (w[0-9]+), 32765 -+** mov z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7ffd_u32_x, svuint32_t, -+ z0 = svdup_n_u32_x (p0, 0x7ffd), -+ z0 = svdup_u32_x (p0, 0x7ffd)) -+ -+/* -+** dup_7ffe_u32_x: -+** mov z0\.s, #32766 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7ffe_u32_x, svuint32_t, -+ z0 = svdup_n_u32_x (p0, 0x7ffe), -+ z0 = svdup_u32_x (p0, 0x7ffe)) -+ -+/* -+** dup_7fff_u32_x: -+** mov z0\.s, #32767 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7fff_u32_x, svuint32_t, -+ z0 = svdup_n_u32_x (p0, 0x7fff), -+ z0 = svdup_u32_x (p0, 0x7fff)) -+ -+/* -+** dup_m1_u32_x: -+** mov z0\.b, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m1_u32_x, svuint32_t, -+ z0 = svdup_n_u32_x (p0, -1), -+ z0 = svdup_u32_x (p0, -1)) -+ -+/* -+** dup_m128_u32_x: -+** mov z0\.s, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m128_u32_x, svuint32_t, -+ z0 = svdup_n_u32_x (p0, -128), -+ z0 = svdup_u32_x (p0, -128)) -+ -+/* -+** dup_m129_u32_x: -+** mov z0\.s, #-129 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m129_u32_x, svuint32_t, -+ z0 = svdup_n_u32_x (p0, -129), -+ z0 = svdup_u32_x (p0, -129)) -+ -+/* -+** dup_m130_u32_x: -+** mvni v([0-9]+)\.4s, 0x81 -+** dup z0\.q, z\1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m130_u32_x, svuint32_t, -+ z0 = svdup_n_u32_x (p0, -130), -+ z0 = svdup_u32_x (p0, -130)) -+ -+/* -+** dup_m254_u32_x: -+** mvni v([0-9]+)\.4s, 0xfd -+** dup z0\.q, z\1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m254_u32_x, svuint32_t, -+ z0 = svdup_n_u32_x (p0, -254), -+ z0 = svdup_u32_x (p0, -254)) -+ -+/* -+** dup_m255_u32_x: -+** mov z0\.s, #-255 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m255_u32_x, svuint32_t, -+ z0 = svdup_n_u32_x (p0, -255), -+ z0 = svdup_u32_x (p0, -255)) -+ -+/* -+** dup_m256_u32_x: -+** mov z0\.s, #-256 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m256_u32_x, svuint32_t, -+ z0 = svdup_n_u32_x (p0, -256), -+ z0 = svdup_u32_x (p0, -256)) -+ -+/* -+** dup_m257_u32_x: -+** mov z0\.s, #-257 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m257_u32_x, svuint32_t, -+ z0 = svdup_n_u32_x (p0, -257), -+ z0 = svdup_u32_x (p0, -257)) -+ -+/* -+** dup_m258_u32_x: -+** mov (w[0-9]+), -258 -+** mov z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m258_u32_x, svuint32_t, -+ z0 = svdup_n_u32_x (p0, -258), -+ z0 = svdup_u32_x (p0, -258)) -+ -+/* -+** dup_m259_u32_x: -+** mov (w[0-9]+), -259 -+** mov z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m259_u32_x, svuint32_t, -+ z0 = svdup_n_u32_x (p0, -259), -+ z0 = svdup_u32_x (p0, -259)) -+ -+/* -+** dup_m512_u32_x: -+** mov z0\.s, #-512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m512_u32_x, svuint32_t, -+ z0 = svdup_n_u32_x (p0, -512), -+ z0 = svdup_u32_x (p0, -512)) -+ -+/* -+** dup_m7f00_u32_x: -+** mov z0\.s, #-32512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f00_u32_x, svuint32_t, -+ z0 = svdup_n_u32_x (p0, -0x7f00), -+ z0 = svdup_u32_x (p0, -0x7f00)) -+ -+/* -+** dup_m7f01_u32_x: -+** mov z0\.s, #-32513 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f01_u32_x, svuint32_t, -+ z0 = svdup_n_u32_x (p0, -0x7f01), -+ z0 = svdup_u32_x (p0, -0x7f01)) -+ -+/* -+** dup_m7f02_u32_x: -+** mov (w[0-9]+), -32514 -+** mov z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f02_u32_x, svuint32_t, -+ z0 = svdup_n_u32_x (p0, -0x7f02), -+ z0 = svdup_u32_x (p0, -0x7f02)) -+ -+/* -+** dup_m7ffe_u32_x: -+** mov (w[0-9]+), -32766 -+** mov z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7ffe_u32_x, svuint32_t, -+ z0 = svdup_n_u32_x (p0, -0x7ffe), -+ z0 = svdup_u32_x (p0, -0x7ffe)) -+ -+/* -+** dup_m7fff_u32_x: -+** mov z0\.s, #-32767 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7fff_u32_x, svuint32_t, -+ z0 = svdup_n_u32_x (p0, -0x7fff), -+ z0 = svdup_u32_x (p0, -0x7fff)) -+ -+/* -+** dup_m8000_u32_x: -+** mov z0\.s, #-32768 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m8000_u32_x, svuint32_t, -+ z0 = svdup_n_u32_x (p0, -0x8000), -+ z0 = svdup_u32_x (p0, -0x8000)) -+ -+/* -+** dup_w0_u32_x: -+** mov z0\.s, w0 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_w0_u32_x, svuint32_t, uint32_t, -+ z0 = svdup_n_u32_x (p0, x0), -+ z0 = svdup_u32_x (p0, x0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u64.c -new file mode 100644 -index 000000000..a7cca7af0 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u64.c -@@ -0,0 +1,1175 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dup_1_u64: -+** mov z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_1_u64, svuint64_t, -+ z0 = svdup_n_u64 (1), -+ z0 = svdup_u64 (1)) -+ -+/* -+** dup_127_u64: -+** mov z0\.d, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_127_u64, svuint64_t, -+ z0 = svdup_n_u64 (127), -+ z0 = svdup_u64 (127)) -+ -+/* -+** dup_128_u64: -+** mov z0\.d, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_128_u64, svuint64_t, -+ z0 = svdup_n_u64 (128), -+ z0 = svdup_u64 (128)) -+ -+/* -+** dup_129_u64: -+** mov (x[0-9]+), 129 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_129_u64, svuint64_t, -+ z0 = svdup_n_u64 (129), -+ z0 = svdup_u64 (129)) -+ -+/* -+** dup_253_u64: -+** mov (x[0-9]+), 253 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_253_u64, svuint64_t, -+ z0 = svdup_n_u64 (253), -+ z0 = svdup_u64 (253)) -+ -+/* -+** dup_254_u64: -+** mov z0\.d, #254 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_254_u64, svuint64_t, -+ z0 = svdup_n_u64 (254), -+ z0 = svdup_u64 (254)) -+ -+/* -+** dup_255_u64: -+** mov z0\.d, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_255_u64, svuint64_t, -+ z0 = svdup_n_u64 (255), -+ z0 = svdup_u64 (255)) -+ -+/* -+** dup_256_u64: -+** mov z0\.d, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_256_u64, svuint64_t, -+ z0 = svdup_n_u64 (256), -+ z0 = svdup_u64 (256)) -+ -+/* -+** dup_257_u64: -+** mov (x[0-9]+), 257 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_257_u64, svuint64_t, -+ z0 = svdup_n_u64 (257), -+ z0 = svdup_u64 (257)) -+ -+/* -+** dup_512_u64: -+** mov z0\.d, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_512_u64, svuint64_t, -+ z0 = svdup_n_u64 (512), -+ z0 = svdup_u64 (512)) -+ -+/* -+** dup_7f00_u64: -+** mov z0\.d, #32512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7f00_u64, svuint64_t, -+ z0 = svdup_n_u64 (0x7f00), -+ z0 = svdup_u64 (0x7f00)) -+ -+/* -+** dup_7f01_u64: -+** mov (x[0-9]+), 32513 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7f01_u64, svuint64_t, -+ z0 = svdup_n_u64 (0x7f01), -+ z0 = svdup_u64 (0x7f01)) -+ -+/* -+** dup_7ffd_u64: -+** mov (x[0-9]+), 32765 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7ffd_u64, svuint64_t, -+ z0 = svdup_n_u64 (0x7ffd), -+ z0 = svdup_u64 (0x7ffd)) -+ -+/* -+** dup_7ffe_u64: -+** mov z0\.d, #32766 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7ffe_u64, svuint64_t, -+ z0 = svdup_n_u64 (0x7ffe), -+ z0 = svdup_u64 (0x7ffe)) -+ -+/* -+** dup_7fff_u64: -+** mov z0\.d, #32767 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7fff_u64, svuint64_t, -+ z0 = svdup_n_u64 (0x7fff), -+ z0 = svdup_u64 (0x7fff)) -+ -+/* -+** dup_m1_u64: -+** mov z0\.b, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m1_u64, svuint64_t, -+ z0 = svdup_n_u64 (-1), -+ z0 = svdup_u64 (-1)) -+ -+/* -+** dup_m128_u64: -+** mov z0\.d, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m128_u64, svuint64_t, -+ z0 = svdup_n_u64 (-128), -+ z0 = svdup_u64 (-128)) -+ -+/* -+** dup_m129_u64: -+** mov z0\.d, #-129 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m129_u64, svuint64_t, -+ z0 = svdup_n_u64 (-129), -+ z0 = svdup_u64 (-129)) -+ -+/* -+** dup_m130_u64: -+** mov (x[0-9]+), -130 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m130_u64, svuint64_t, -+ z0 = svdup_n_u64 (-130), -+ z0 = svdup_u64 (-130)) -+ -+/* -+** dup_m254_u64: -+** mov (x[0-9]+), -254 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m254_u64, svuint64_t, -+ z0 = svdup_n_u64 (-254), -+ z0 = svdup_u64 (-254)) -+ -+/* -+** dup_m255_u64: -+** mov z0\.d, #-255 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m255_u64, svuint64_t, -+ z0 = svdup_n_u64 (-255), -+ z0 = svdup_u64 (-255)) -+ -+/* -+** dup_m256_u64: -+** mov z0\.d, #-256 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m256_u64, svuint64_t, -+ z0 = svdup_n_u64 (-256), -+ z0 = svdup_u64 (-256)) -+ -+/* -+** dup_m257_u64: -+** mov z0\.d, #-257 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m257_u64, svuint64_t, -+ z0 = svdup_n_u64 (-257), -+ z0 = svdup_u64 (-257)) -+ -+/* -+** dup_m258_u64: -+** mov (x[0-9]+), -258 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m258_u64, svuint64_t, -+ z0 = svdup_n_u64 (-258), -+ z0 = svdup_u64 (-258)) -+ -+/* -+** dup_m259_u64: -+** mov (x[0-9]+), -259 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m259_u64, svuint64_t, -+ z0 = svdup_n_u64 (-259), -+ z0 = svdup_u64 (-259)) -+ -+/* -+** dup_m512_u64: -+** mov z0\.d, #-512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m512_u64, svuint64_t, -+ z0 = svdup_n_u64 (-512), -+ z0 = svdup_u64 (-512)) -+ -+/* -+** dup_m7f00_u64: -+** mov z0\.d, #-32512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f00_u64, svuint64_t, -+ z0 = svdup_n_u64 (-0x7f00), -+ z0 = svdup_u64 (-0x7f00)) -+ -+/* -+** dup_m7f01_u64: -+** mov z0\.d, #-32513 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f01_u64, svuint64_t, -+ z0 = svdup_n_u64 (-0x7f01), -+ z0 = svdup_u64 (-0x7f01)) -+ -+/* -+** dup_m7f02_u64: -+** mov (x[0-9]+), -32514 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f02_u64, svuint64_t, -+ z0 = svdup_n_u64 (-0x7f02), -+ z0 = svdup_u64 (-0x7f02)) -+ -+/* -+** dup_m7ffe_u64: -+** mov (x[0-9]+), -32766 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7ffe_u64, svuint64_t, -+ z0 = svdup_n_u64 (-0x7ffe), -+ z0 = svdup_u64 (-0x7ffe)) -+ -+/* -+** dup_m7fff_u64: -+** mov z0\.d, #-32767 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7fff_u64, svuint64_t, -+ z0 = svdup_n_u64 (-0x7fff), -+ z0 = svdup_u64 (-0x7fff)) -+ -+/* -+** dup_m8000_u64: -+** mov z0\.d, #-32768 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m8000_u64, svuint64_t, -+ z0 = svdup_n_u64 (-0x8000), -+ z0 = svdup_u64 (-0x8000)) -+ -+/* -+** dup_x0_u64: -+** mov z0\.d, x0 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_x0_u64, svuint64_t, uint64_t, -+ z0 = svdup_n_u64 (x0), -+ z0 = svdup_u64 (x0)) -+ -+/* -+** dup_1_u64_m: -+** mov z0\.d, p0/m, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_1_u64_m, svuint64_t, -+ z0 = svdup_n_u64_m (z0, p0, 1), -+ z0 = svdup_u64_m (z0, p0, 1)) -+ -+/* -+** dup_127_u64_m: -+** mov z0\.d, p0/m, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_127_u64_m, svuint64_t, -+ z0 = svdup_n_u64_m (z0, p0, 127), -+ z0 = svdup_u64_m (z0, p0, 127)) -+ -+/* -+** dup_128_u64_m: -+** mov (z[0-9]+\.d), #128 -+** sel z0\.d, p0, \1, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (dup_128_u64_m, svuint64_t, -+ z0 = svdup_n_u64_m (z0, p0, 128), -+ z0 = svdup_u64_m (z0, p0, 128)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_129_u64_m, svuint64_t, -+ z0 = svdup_n_u64_m (z0, p0, 129), -+ z0 = svdup_u64_m (z0, p0, 129)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_253_u64_m, svuint64_t, -+ z0 = svdup_n_u64_m (z0, p0, 253), -+ z0 = svdup_u64_m (z0, p0, 253)) -+ -+/* -+** dup_254_u64_m: -+** mov (z[0-9]+\.d), #254 -+** sel z0\.d, p0, \1, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (dup_254_u64_m, svuint64_t, -+ z0 = svdup_n_u64_m (z0, p0, 254), -+ z0 = svdup_u64_m (z0, p0, 254)) -+ -+/* -+** dup_255_u64_m: -+** mov (z[0-9]+\.d), #255 -+** sel z0\.d, p0, \1, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (dup_255_u64_m, svuint64_t, -+ z0 = svdup_n_u64_m (z0, p0, 255), -+ z0 = svdup_u64_m (z0, p0, 255)) -+ -+/* -+** dup_256_u64_m: -+** mov z0\.d, p0/m, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_256_u64_m, svuint64_t, -+ z0 = svdup_n_u64_m (z0, p0, 256), -+ z0 = svdup_u64_m (z0, p0, 256)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_257_u64_m, svuint64_t, -+ z0 = svdup_n_u64_m (z0, p0, 257), -+ z0 = svdup_u64_m (z0, p0, 257)) -+ -+/* -+** dup_512_u64_m: -+** mov z0\.d, p0/m, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_512_u64_m, svuint64_t, -+ z0 = svdup_n_u64_m (z0, p0, 512), -+ z0 = svdup_u64_m (z0, p0, 512)) -+ -+/* -+** dup_7f00_u64_m: -+** mov z0\.d, p0/m, #32512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7f00_u64_m, svuint64_t, -+ z0 = svdup_n_u64_m (z0, p0, 0x7f00), -+ z0 = svdup_u64_m (z0, p0, 0x7f00)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_7f01_u64_m, svuint64_t, -+ z0 = svdup_n_u64_m (z0, p0, 0x7f01), -+ z0 = svdup_u64_m (z0, p0, 0x7f01)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_7ffd_u64_m, svuint64_t, -+ z0 = svdup_n_u64_m (z0, p0, 0x7ffd), -+ z0 = svdup_u64_m (z0, p0, 0x7ffd)) -+ -+/* -+** dup_7ffe_u64_m: -+** mov (z[0-9]+\.d), #32766 -+** sel z0\.d, p0, \1, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7ffe_u64_m, svuint64_t, -+ z0 = svdup_n_u64_m (z0, p0, 0x7ffe), -+ z0 = svdup_u64_m (z0, p0, 0x7ffe)) -+ -+/* -+** dup_7fff_u64_m: -+** mov (z[0-9]+\.d), #32767 -+** sel z0\.d, p0, \1, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7fff_u64_m, svuint64_t, -+ z0 = svdup_n_u64_m (z0, p0, 0x7fff), -+ z0 = svdup_u64_m (z0, p0, 0x7fff)) -+ -+/* -+** dup_m1_u64_m: -+** mov z0\.d, p0/m, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m1_u64_m, svuint64_t, -+ z0 = svdup_n_u64_m (z0, p0, -1), -+ z0 = svdup_u64_m (z0, p0, -1)) -+ -+/* -+** dup_m128_u64_m: -+** mov z0\.d, p0/m, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m128_u64_m, svuint64_t, -+ z0 = svdup_n_u64_m (z0, p0, -128), -+ z0 = svdup_u64_m (z0, p0, -128)) -+ -+/* -+** dup_m129_u64_m: -+** mov (z[0-9]+\.d), #-129 -+** sel z0\.d, p0, \1, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m129_u64_m, svuint64_t, -+ z0 = svdup_n_u64_m (z0, p0, -129), -+ z0 = svdup_u64_m (z0, p0, -129)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m130_u64_m, svuint64_t, -+ z0 = svdup_n_u64_m (z0, p0, -130), -+ z0 = svdup_u64_m (z0, p0, -130)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m254_u64_m, svuint64_t, -+ z0 = svdup_n_u64_m (z0, p0, -254), -+ z0 = svdup_u64_m (z0, p0, -254)) -+ -+/* -+** dup_m255_u64_m: -+** mov (z[0-9]+\.d), #-255 -+** sel z0\.d, p0, \1, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m255_u64_m, svuint64_t, -+ z0 = svdup_n_u64_m (z0, p0, -255), -+ z0 = svdup_u64_m (z0, p0, -255)) -+ -+/* -+** dup_m256_u64_m: -+** mov z0\.d, p0/m, #-256 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m256_u64_m, svuint64_t, -+ z0 = svdup_n_u64_m (z0, p0, -256), -+ z0 = svdup_u64_m (z0, p0, -256)) -+ -+/* -+** dup_m257_u64_m: -+** mov (z[0-9]+\.d), #-257 -+** sel z0\.d, p0, \1, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m257_u64_m, svuint64_t, -+ z0 = svdup_n_u64_m (z0, p0, -257), -+ z0 = svdup_u64_m (z0, p0, -257)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m258_u64_m, svuint64_t, -+ z0 = svdup_n_u64_m (z0, p0, -258), -+ z0 = svdup_u64_m (z0, p0, -258)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m259_u64_m, svuint64_t, -+ z0 = svdup_n_u64_m (z0, p0, -259), -+ z0 = svdup_u64_m (z0, p0, -259)) -+ -+/* -+** dup_m512_u64_m: -+** mov z0\.d, p0/m, #-512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m512_u64_m, svuint64_t, -+ z0 = svdup_n_u64_m (z0, p0, -512), -+ z0 = svdup_u64_m (z0, p0, -512)) -+ -+/* -+** dup_m7f00_u64_m: -+** mov z0\.d, p0/m, #-32512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f00_u64_m, svuint64_t, -+ z0 = svdup_n_u64_m (z0, p0, -0x7f00), -+ z0 = svdup_u64_m (z0, p0, -0x7f00)) -+ -+/* -+** dup_m7f01_u64_m: -+** mov (z[0-9]+\.d), #-32513 -+** sel z0\.d, p0, \1, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f01_u64_m, svuint64_t, -+ z0 = svdup_n_u64_m (z0, p0, -0x7f01), -+ z0 = svdup_u64_m (z0, p0, -0x7f01)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m7f02_u64_m, svuint64_t, -+ z0 = svdup_n_u64_m (z0, p0, -0x7f02), -+ z0 = svdup_u64_m (z0, p0, -0x7f02)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m7ffe_u64_m, svuint64_t, -+ z0 = svdup_n_u64_m (z0, p0, -0x7ffe), -+ z0 = svdup_u64_m (z0, p0, -0x7ffe)) -+ -+/* -+** dup_m7fff_u64_m: -+** mov (z[0-9]+\.d), #-32767 -+** sel z0\.d, p0, \1, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7fff_u64_m, svuint64_t, -+ z0 = svdup_n_u64_m (z0, p0, -0x7fff), -+ z0 = svdup_u64_m (z0, p0, -0x7fff)) -+ -+/* -+** dup_m8000_u64_m: -+** mov z0\.d, p0/m, #-32768 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m8000_u64_m, svuint64_t, -+ z0 = svdup_n_u64_m (z0, p0, -0x8000), -+ z0 = svdup_u64_m (z0, p0, -0x8000)) -+ -+/* -+** dup_0_u64_m: -+** mov z0\.d, p0/m, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_0_u64_m, svuint64_t, -+ z0 = svdup_n_u64_m (z0, p0, 0), -+ z0 = svdup_u64_m (z0, p0, 0)) -+ -+/* -+** dup_x0_u64_m: -+** movprfx z0, z1 -+** mov z0\.d, p0/m, x0 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_x0_u64_m, svuint64_t, uint64_t, -+ z0 = svdup_n_u64_m (z1, p0, x0), -+ z0 = svdup_u64_m (z1, p0, x0)) -+ -+/* -+** dup_1_u64_z: -+** mov z0\.d, p0/z, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_1_u64_z, svuint64_t, -+ z0 = svdup_n_u64_z (p0, 1), -+ z0 = svdup_u64_z (p0, 1)) -+ -+/* -+** dup_127_u64_z: -+** mov z0\.d, p0/z, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_127_u64_z, svuint64_t, -+ z0 = svdup_n_u64_z (p0, 127), -+ z0 = svdup_u64_z (p0, 127)) -+ -+/* -+** dup_128_u64_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.d), #128 -+** sel z0\.d, p0, \2, \1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (dup_128_u64_z, svuint64_t, -+ z0 = svdup_n_u64_z (p0, 128), -+ z0 = svdup_u64_z (p0, 128)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_129_u64_z, svuint64_t, -+ z0 = svdup_n_u64_z (p0, 129), -+ z0 = svdup_u64_z (p0, 129)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_253_u64_z, svuint64_t, -+ z0 = svdup_n_u64_z (p0, 253), -+ z0 = svdup_u64_z (p0, 253)) -+ -+/* -+** dup_254_u64_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.d), #254 -+** sel z0\.d, p0, \2, \1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (dup_254_u64_z, svuint64_t, -+ z0 = svdup_n_u64_z (p0, 254), -+ z0 = svdup_u64_z (p0, 254)) -+ -+/* -+** dup_255_u64_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.d), #255 -+** sel z0\.d, p0, \2, \1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (dup_255_u64_z, svuint64_t, -+ z0 = svdup_n_u64_z (p0, 255), -+ z0 = svdup_u64_z (p0, 255)) -+ -+/* -+** dup_256_u64_z: -+** mov z0\.d, p0/z, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_256_u64_z, svuint64_t, -+ z0 = svdup_n_u64_z (p0, 256), -+ z0 = svdup_u64_z (p0, 256)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_257_u64_z, svuint64_t, -+ z0 = svdup_n_u64_z (p0, 257), -+ z0 = svdup_u64_z (p0, 257)) -+ -+/* -+** dup_512_u64_z: -+** mov z0\.d, p0/z, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_512_u64_z, svuint64_t, -+ z0 = svdup_n_u64_z (p0, 512), -+ z0 = svdup_u64_z (p0, 512)) -+ -+/* -+** dup_7f00_u64_z: -+** mov z0\.d, p0/z, #32512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7f00_u64_z, svuint64_t, -+ z0 = svdup_n_u64_z (p0, 0x7f00), -+ z0 = svdup_u64_z (p0, 0x7f00)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_7f01_u64_z, svuint64_t, -+ z0 = svdup_n_u64_z (p0, 0x7f01), -+ z0 = svdup_u64_z (p0, 0x7f01)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_7ffd_u64_z, svuint64_t, -+ z0 = svdup_n_u64_z (p0, 0x7ffd), -+ z0 = svdup_u64_z (p0, 0x7ffd)) -+ -+/* -+** dup_7ffe_u64_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.d), #32766 -+** sel z0\.d, p0, \2, \1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7ffe_u64_z, svuint64_t, -+ z0 = svdup_n_u64_z (p0, 0x7ffe), -+ z0 = svdup_u64_z (p0, 0x7ffe)) -+ -+/* -+** dup_7fff_u64_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.d), #32767 -+** sel z0\.d, p0, \2, \1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7fff_u64_z, svuint64_t, -+ z0 = svdup_n_u64_z (p0, 0x7fff), -+ z0 = svdup_u64_z (p0, 0x7fff)) -+ -+/* -+** dup_m1_u64_z: -+** mov z0\.d, p0/z, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m1_u64_z, svuint64_t, -+ z0 = svdup_n_u64_z (p0, -1), -+ z0 = svdup_u64_z (p0, -1)) -+ -+/* -+** dup_m128_u64_z: -+** mov z0\.d, p0/z, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m128_u64_z, svuint64_t, -+ z0 = svdup_n_u64_z (p0, -128), -+ z0 = svdup_u64_z (p0, -128)) -+ -+/* -+** dup_m129_u64_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.d), #-129 -+** sel z0\.d, p0, \2, \1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m129_u64_z, svuint64_t, -+ z0 = svdup_n_u64_z (p0, -129), -+ z0 = svdup_u64_z (p0, -129)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m130_u64_z, svuint64_t, -+ z0 = svdup_n_u64_z (p0, -130), -+ z0 = svdup_u64_z (p0, -130)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m254_u64_z, svuint64_t, -+ z0 = svdup_n_u64_z (p0, -254), -+ z0 = svdup_u64_z (p0, -254)) -+ -+/* -+** dup_m255_u64_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.d), #-255 -+** sel z0\.d, p0, \2, \1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m255_u64_z, svuint64_t, -+ z0 = svdup_n_u64_z (p0, -255), -+ z0 = svdup_u64_z (p0, -255)) -+ -+/* -+** dup_m256_u64_z: -+** mov z0\.d, p0/z, #-256 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m256_u64_z, svuint64_t, -+ z0 = svdup_n_u64_z (p0, -256), -+ z0 = svdup_u64_z (p0, -256)) -+ -+/* -+** dup_m257_u64_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.d), #-257 -+** sel z0\.d, p0, \2, \1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m257_u64_z, svuint64_t, -+ z0 = svdup_n_u64_z (p0, -257), -+ z0 = svdup_u64_z (p0, -257)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m258_u64_z, svuint64_t, -+ z0 = svdup_n_u64_z (p0, -258), -+ z0 = svdup_u64_z (p0, -258)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m259_u64_z, svuint64_t, -+ z0 = svdup_n_u64_z (p0, -259), -+ z0 = svdup_u64_z (p0, -259)) -+ -+/* -+** dup_m512_u64_z: -+** mov z0\.d, p0/z, #-512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m512_u64_z, svuint64_t, -+ z0 = svdup_n_u64_z (p0, -512), -+ z0 = svdup_u64_z (p0, -512)) -+ -+/* -+** dup_m7f00_u64_z: -+** mov z0\.d, p0/z, #-32512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f00_u64_z, svuint64_t, -+ z0 = svdup_n_u64_z (p0, -0x7f00), -+ z0 = svdup_u64_z (p0, -0x7f00)) -+ -+/* -+** dup_m7f01_u64_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.d), #-32513 -+** sel z0\.d, p0, \2, \1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f01_u64_z, svuint64_t, -+ z0 = svdup_n_u64_z (p0, -0x7f01), -+ z0 = svdup_u64_z (p0, -0x7f01)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m7f02_u64_z, svuint64_t, -+ z0 = svdup_n_u64_z (p0, -0x7f02), -+ z0 = svdup_u64_z (p0, -0x7f02)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (dup_m7ffe_u64_z, svuint64_t, -+ z0 = svdup_n_u64_z (p0, -0x7ffe), -+ z0 = svdup_u64_z (p0, -0x7ffe)) -+ -+/* -+** dup_m7fff_u64_z: -+** mov (z[0-9]+)\.b, #0 -+** mov (z[0-9]+\.d), #-32767 -+** sel z0\.d, p0, \2, \1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7fff_u64_z, svuint64_t, -+ z0 = svdup_n_u64_z (p0, -0x7fff), -+ z0 = svdup_u64_z (p0, -0x7fff)) -+ -+/* -+** dup_m8000_u64_z: -+** mov z0\.d, p0/z, #-32768 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m8000_u64_z, svuint64_t, -+ z0 = svdup_n_u64_z (p0, -0x8000), -+ z0 = svdup_u64_z (p0, -0x8000)) -+ -+/* -+** dup_0_u64_z: -+** mov z0\.d, p0/z, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_0_u64_z, svuint64_t, -+ z0 = svdup_n_u64_z (p0, 0), -+ z0 = svdup_u64_z (p0, 0)) -+ -+/* -+** dup_x0_u64_z: -+** movprfx z0\.d, p0/z, z0\.d -+** mov z0\.d, p0/m, x0 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_x0_u64_z, svuint64_t, uint64_t, -+ z0 = svdup_n_u64_z (p0, x0), -+ z0 = svdup_u64_z (p0, x0)) -+ -+/* -+** dup_1_u64_x: -+** mov z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_1_u64_x, svuint64_t, -+ z0 = svdup_n_u64_x (p0, 1), -+ z0 = svdup_u64_x (p0, 1)) -+ -+/* -+** dup_127_u64_x: -+** mov z0\.d, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_127_u64_x, svuint64_t, -+ z0 = svdup_n_u64_x (p0, 127), -+ z0 = svdup_u64_x (p0, 127)) -+ -+/* -+** dup_128_u64_x: -+** mov z0\.d, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_128_u64_x, svuint64_t, -+ z0 = svdup_n_u64_x (p0, 128), -+ z0 = svdup_u64_x (p0, 128)) -+ -+/* -+** dup_129_u64_x: -+** mov (x[0-9]+), 129 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_129_u64_x, svuint64_t, -+ z0 = svdup_n_u64_x (p0, 129), -+ z0 = svdup_u64_x (p0, 129)) -+ -+/* -+** dup_253_u64_x: -+** mov (x[0-9]+), 253 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_253_u64_x, svuint64_t, -+ z0 = svdup_n_u64_x (p0, 253), -+ z0 = svdup_u64_x (p0, 253)) -+ -+/* -+** dup_254_u64_x: -+** mov z0\.d, #254 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_254_u64_x, svuint64_t, -+ z0 = svdup_n_u64_x (p0, 254), -+ z0 = svdup_u64_x (p0, 254)) -+ -+/* -+** dup_255_u64_x: -+** mov z0\.d, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_255_u64_x, svuint64_t, -+ z0 = svdup_n_u64_x (p0, 255), -+ z0 = svdup_u64_x (p0, 255)) -+ -+/* -+** dup_256_u64_x: -+** mov z0\.d, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_256_u64_x, svuint64_t, -+ z0 = svdup_n_u64_x (p0, 256), -+ z0 = svdup_u64_x (p0, 256)) -+ -+/* -+** dup_257_u64_x: -+** mov (x[0-9]+), 257 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_257_u64_x, svuint64_t, -+ z0 = svdup_n_u64_x (p0, 257), -+ z0 = svdup_u64_x (p0, 257)) -+ -+/* -+** dup_512_u64_x: -+** mov z0\.d, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_512_u64_x, svuint64_t, -+ z0 = svdup_n_u64_x (p0, 512), -+ z0 = svdup_u64_x (p0, 512)) -+ -+/* -+** dup_7f00_u64_x: -+** mov z0\.d, #32512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7f00_u64_x, svuint64_t, -+ z0 = svdup_n_u64_x (p0, 0x7f00), -+ z0 = svdup_u64_x (p0, 0x7f00)) -+ -+/* -+** dup_7f01_u64_x: -+** mov (x[0-9]+), 32513 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7f01_u64_x, svuint64_t, -+ z0 = svdup_n_u64_x (p0, 0x7f01), -+ z0 = svdup_u64_x (p0, 0x7f01)) -+ -+/* -+** dup_7ffd_u64_x: -+** mov (x[0-9]+), 32765 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7ffd_u64_x, svuint64_t, -+ z0 = svdup_n_u64_x (p0, 0x7ffd), -+ z0 = svdup_u64_x (p0, 0x7ffd)) -+ -+/* -+** dup_7ffe_u64_x: -+** mov z0\.d, #32766 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7ffe_u64_x, svuint64_t, -+ z0 = svdup_n_u64_x (p0, 0x7ffe), -+ z0 = svdup_u64_x (p0, 0x7ffe)) -+ -+/* -+** dup_7fff_u64_x: -+** mov z0\.d, #32767 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_7fff_u64_x, svuint64_t, -+ z0 = svdup_n_u64_x (p0, 0x7fff), -+ z0 = svdup_u64_x (p0, 0x7fff)) -+ -+/* -+** dup_m1_u64_x: -+** mov z0\.b, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m1_u64_x, svuint64_t, -+ z0 = svdup_n_u64_x (p0, -1), -+ z0 = svdup_u64_x (p0, -1)) -+ -+/* -+** dup_m128_u64_x: -+** mov z0\.d, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m128_u64_x, svuint64_t, -+ z0 = svdup_n_u64_x (p0, -128), -+ z0 = svdup_u64_x (p0, -128)) -+ -+/* -+** dup_m129_u64_x: -+** mov z0\.d, #-129 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m129_u64_x, svuint64_t, -+ z0 = svdup_n_u64_x (p0, -129), -+ z0 = svdup_u64_x (p0, -129)) -+ -+/* -+** dup_m130_u64_x: -+** mov (x[0-9]+), -130 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m130_u64_x, svuint64_t, -+ z0 = svdup_n_u64_x (p0, -130), -+ z0 = svdup_u64_x (p0, -130)) -+ -+/* -+** dup_m254_u64_x: -+** mov (x[0-9]+), -254 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m254_u64_x, svuint64_t, -+ z0 = svdup_n_u64_x (p0, -254), -+ z0 = svdup_u64_x (p0, -254)) -+ -+/* -+** dup_m255_u64_x: -+** mov z0\.d, #-255 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m255_u64_x, svuint64_t, -+ z0 = svdup_n_u64_x (p0, -255), -+ z0 = svdup_u64_x (p0, -255)) -+ -+/* -+** dup_m256_u64_x: -+** mov z0\.d, #-256 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m256_u64_x, svuint64_t, -+ z0 = svdup_n_u64_x (p0, -256), -+ z0 = svdup_u64_x (p0, -256)) -+ -+/* -+** dup_m257_u64_x: -+** mov z0\.d, #-257 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m257_u64_x, svuint64_t, -+ z0 = svdup_n_u64_x (p0, -257), -+ z0 = svdup_u64_x (p0, -257)) -+ -+/* -+** dup_m258_u64_x: -+** mov (x[0-9]+), -258 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m258_u64_x, svuint64_t, -+ z0 = svdup_n_u64_x (p0, -258), -+ z0 = svdup_u64_x (p0, -258)) -+ -+/* -+** dup_m259_u64_x: -+** mov (x[0-9]+), -259 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m259_u64_x, svuint64_t, -+ z0 = svdup_n_u64_x (p0, -259), -+ z0 = svdup_u64_x (p0, -259)) -+ -+/* -+** dup_m512_u64_x: -+** mov z0\.d, #-512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m512_u64_x, svuint64_t, -+ z0 = svdup_n_u64_x (p0, -512), -+ z0 = svdup_u64_x (p0, -512)) -+ -+/* -+** dup_m7f00_u64_x: -+** mov z0\.d, #-32512 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f00_u64_x, svuint64_t, -+ z0 = svdup_n_u64_x (p0, -0x7f00), -+ z0 = svdup_u64_x (p0, -0x7f00)) -+ -+/* -+** dup_m7f01_u64_x: -+** mov z0\.d, #-32513 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f01_u64_x, svuint64_t, -+ z0 = svdup_n_u64_x (p0, -0x7f01), -+ z0 = svdup_u64_x (p0, -0x7f01)) -+ -+/* -+** dup_m7f02_u64_x: -+** mov (x[0-9]+), -32514 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7f02_u64_x, svuint64_t, -+ z0 = svdup_n_u64_x (p0, -0x7f02), -+ z0 = svdup_u64_x (p0, -0x7f02)) -+ -+/* -+** dup_m7ffe_u64_x: -+** mov (x[0-9]+), -32766 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7ffe_u64_x, svuint64_t, -+ z0 = svdup_n_u64_x (p0, -0x7ffe), -+ z0 = svdup_u64_x (p0, -0x7ffe)) -+ -+/* -+** dup_m7fff_u64_x: -+** mov z0\.d, #-32767 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m7fff_u64_x, svuint64_t, -+ z0 = svdup_n_u64_x (p0, -0x7fff), -+ z0 = svdup_u64_x (p0, -0x7fff)) -+ -+/* -+** dup_m8000_u64_x: -+** mov z0\.d, #-32768 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m8000_u64_x, svuint64_t, -+ z0 = svdup_n_u64_x (p0, -0x8000), -+ z0 = svdup_u64_x (p0, -0x8000)) -+ -+/* -+** dup_x0_u64_x: -+** mov z0\.d, x0 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_x0_u64_x, svuint64_t, uint64_t, -+ z0 = svdup_n_u64_x (p0, x0), -+ z0 = svdup_u64_x (p0, x0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u8.c -new file mode 100644 -index 000000000..d27f4bba9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dup_u8.c -@@ -0,0 +1,383 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dup_1_u8: -+** mov z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_1_u8, svuint8_t, -+ z0 = svdup_n_u8 (1), -+ z0 = svdup_u8 (1)) -+ -+/* -+** dup_127_u8: -+** mov z0\.b, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_127_u8, svuint8_t, -+ z0 = svdup_n_u8 (127), -+ z0 = svdup_u8 (127)) -+ -+/* -+** dup_128_u8: -+** mov z0\.b, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_128_u8, svuint8_t, -+ z0 = svdup_n_u8 (128), -+ z0 = svdup_u8 (128)) -+ -+/* -+** dup_129_u8: -+** mov z0\.b, #-127 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_129_u8, svuint8_t, -+ z0 = svdup_n_u8 (129), -+ z0 = svdup_u8 (129)) -+ -+/* -+** dup_253_u8: -+** mov z0\.b, #-3 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_253_u8, svuint8_t, -+ z0 = svdup_n_u8 (253), -+ z0 = svdup_u8 (253)) -+ -+/* -+** dup_254_u8: -+** mov z0\.b, #-2 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_254_u8, svuint8_t, -+ z0 = svdup_n_u8 (254), -+ z0 = svdup_u8 (254)) -+ -+/* -+** dup_255_u8: -+** mov z0\.b, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_255_u8, svuint8_t, -+ z0 = svdup_n_u8 (255), -+ z0 = svdup_u8 (255)) -+ -+/* -+** dup_m1_u8: -+** mov z0\.b, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m1_u8, svuint8_t, -+ z0 = svdup_n_u8 (-1), -+ z0 = svdup_u8 (-1)) -+ -+/* -+** dup_m128_u8: -+** mov z0\.b, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m128_u8, svuint8_t, -+ z0 = svdup_n_u8 (-128), -+ z0 = svdup_u8 (-128)) -+ -+/* -+** dup_w0_u8: -+** mov z0\.b, w0 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_w0_u8, svuint8_t, uint8_t, -+ z0 = svdup_n_u8 (x0), -+ z0 = svdup_u8 (x0)) -+ -+/* -+** dup_1_u8_m: -+** mov z0\.b, p0/m, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_1_u8_m, svuint8_t, -+ z0 = svdup_n_u8_m (z0, p0, 1), -+ z0 = svdup_u8_m (z0, p0, 1)) -+ -+/* -+** dup_127_u8_m: -+** mov z0\.b, p0/m, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_127_u8_m, svuint8_t, -+ z0 = svdup_n_u8_m (z0, p0, 127), -+ z0 = svdup_u8_m (z0, p0, 127)) -+ -+/* -+** dup_128_u8_m: -+** mov z0\.b, p0/m, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_128_u8_m, svuint8_t, -+ z0 = svdup_n_u8_m (z0, p0, 128), -+ z0 = svdup_u8_m (z0, p0, 128)) -+ -+/* -+** dup_129_u8_m: -+** mov z0\.b, p0/m, #-127 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_129_u8_m, svuint8_t, -+ z0 = svdup_n_u8_m (z0, p0, 129), -+ z0 = svdup_u8_m (z0, p0, 129)) -+ -+/* -+** dup_253_u8_m: -+** mov z0\.b, p0/m, #-3 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_253_u8_m, svuint8_t, -+ z0 = svdup_n_u8_m (z0, p0, 253), -+ z0 = svdup_u8_m (z0, p0, 253)) -+ -+/* -+** dup_254_u8_m: -+** mov z0\.b, p0/m, #-2 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_254_u8_m, svuint8_t, -+ z0 = svdup_n_u8_m (z0, p0, 254), -+ z0 = svdup_u8_m (z0, p0, 254)) -+ -+/* -+** dup_255_u8_m: -+** mov z0\.b, p0/m, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_255_u8_m, svuint8_t, -+ z0 = svdup_n_u8_m (z0, p0, 255), -+ z0 = svdup_u8_m (z0, p0, 255)) -+ -+/* -+** dup_m1_u8_m: -+** mov z0\.b, p0/m, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m1_u8_m, svuint8_t, -+ z0 = svdup_n_u8_m (z0, p0, -1), -+ z0 = svdup_u8_m (z0, p0, -1)) -+ -+/* -+** dup_m128_u8_m: -+** mov z0\.b, p0/m, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m128_u8_m, svuint8_t, -+ z0 = svdup_n_u8_m (z0, p0, -128), -+ z0 = svdup_u8_m (z0, p0, -128)) -+ -+/* -+** dup_0_u8_m: -+** mov z0\.b, p0/m, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_0_u8_m, svuint8_t, -+ z0 = svdup_n_u8_m (z0, p0, 0), -+ z0 = svdup_u8_m (z0, p0, 0)) -+ -+/* -+** dup_w0_u8_m: -+** movprfx z0, z1 -+** mov z0\.b, p0/m, w0 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_w0_u8_m, svuint8_t, uint8_t, -+ z0 = svdup_n_u8_m (z1, p0, x0), -+ z0 = svdup_u8_m (z1, p0, x0)) -+ -+/* -+** dup_1_u8_z: -+** mov z0\.b, p0/z, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_1_u8_z, svuint8_t, -+ z0 = svdup_n_u8_z (p0, 1), -+ z0 = svdup_u8_z (p0, 1)) -+ -+/* -+** dup_127_u8_z: -+** mov z0\.b, p0/z, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_127_u8_z, svuint8_t, -+ z0 = svdup_n_u8_z (p0, 127), -+ z0 = svdup_u8_z (p0, 127)) -+ -+/* -+** dup_128_u8_z: -+** mov z0\.b, p0/z, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_128_u8_z, svuint8_t, -+ z0 = svdup_n_u8_z (p0, 128), -+ z0 = svdup_u8_z (p0, 128)) -+ -+/* -+** dup_129_u8_z: -+** mov z0\.b, p0/z, #-127 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_129_u8_z, svuint8_t, -+ z0 = svdup_n_u8_z (p0, 129), -+ z0 = svdup_u8_z (p0, 129)) -+ -+/* -+** dup_253_u8_z: -+** mov z0\.b, p0/z, #-3 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_253_u8_z, svuint8_t, -+ z0 = svdup_n_u8_z (p0, 253), -+ z0 = svdup_u8_z (p0, 253)) -+ -+/* -+** dup_254_u8_z: -+** mov z0\.b, p0/z, #-2 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_254_u8_z, svuint8_t, -+ z0 = svdup_n_u8_z (p0, 254), -+ z0 = svdup_u8_z (p0, 254)) -+ -+/* -+** dup_255_u8_z: -+** mov z0\.b, p0/z, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_255_u8_z, svuint8_t, -+ z0 = svdup_n_u8_z (p0, 255), -+ z0 = svdup_u8_z (p0, 255)) -+ -+/* -+** dup_m1_u8_z: -+** mov z0\.b, p0/z, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m1_u8_z, svuint8_t, -+ z0 = svdup_n_u8_z (p0, -1), -+ z0 = svdup_u8_z (p0, -1)) -+ -+/* -+** dup_m128_u8_z: -+** mov z0\.b, p0/z, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m128_u8_z, svuint8_t, -+ z0 = svdup_n_u8_z (p0, -128), -+ z0 = svdup_u8_z (p0, -128)) -+ -+/* -+** dup_0_u8_z: -+** mov z0\.b, p0/z, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_0_u8_z, svuint8_t, -+ z0 = svdup_n_u8_z (p0, 0), -+ z0 = svdup_u8_z (p0, 0)) -+ -+/* -+** dup_w0_u8_z: -+** movprfx z0\.b, p0/z, z0\.b -+** mov z0\.b, p0/m, w0 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_w0_u8_z, svuint8_t, uint8_t, -+ z0 = svdup_n_u8_z (p0, x0), -+ z0 = svdup_u8_z (p0, x0)) -+ -+/* -+** dup_1_u8_x: -+** mov z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_1_u8_x, svuint8_t, -+ z0 = svdup_n_u8_x (p0, 1), -+ z0 = svdup_u8_x (p0, 1)) -+ -+/* -+** dup_127_u8_x: -+** mov z0\.b, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_127_u8_x, svuint8_t, -+ z0 = svdup_n_u8_x (p0, 127), -+ z0 = svdup_u8_x (p0, 127)) -+ -+/* -+** dup_128_u8_x: -+** mov z0\.b, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_128_u8_x, svuint8_t, -+ z0 = svdup_n_u8_x (p0, 128), -+ z0 = svdup_u8_x (p0, 128)) -+ -+/* -+** dup_129_u8_x: -+** mov z0\.b, #-127 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_129_u8_x, svuint8_t, -+ z0 = svdup_n_u8_x (p0, 129), -+ z0 = svdup_u8_x (p0, 129)) -+ -+/* -+** dup_253_u8_x: -+** mov z0\.b, #-3 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_253_u8_x, svuint8_t, -+ z0 = svdup_n_u8_x (p0, 253), -+ z0 = svdup_u8_x (p0, 253)) -+ -+/* -+** dup_254_u8_x: -+** mov z0\.b, #-2 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_254_u8_x, svuint8_t, -+ z0 = svdup_n_u8_x (p0, 254), -+ z0 = svdup_u8_x (p0, 254)) -+ -+/* -+** dup_255_u8_x: -+** mov z0\.b, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_255_u8_x, svuint8_t, -+ z0 = svdup_n_u8_x (p0, 255), -+ z0 = svdup_u8_x (p0, 255)) -+ -+/* -+** dup_m1_u8_x: -+** mov z0\.b, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m1_u8_x, svuint8_t, -+ z0 = svdup_n_u8_x (p0, -1), -+ z0 = svdup_u8_x (p0, -1)) -+ -+/* -+** dup_m128_u8_x: -+** mov z0\.b, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (dup_m128_u8_x, svuint8_t, -+ z0 = svdup_n_u8_x (p0, -128), -+ z0 = svdup_u8_x (p0, -128)) -+ -+/* -+** dup_w0_u8_x: -+** mov z0\.b, w0 -+** ret -+*/ -+TEST_UNIFORM_ZX (dup_w0_u8_x, svuint8_t, uint8_t, -+ z0 = svdup_n_u8_x (p0, x0), -+ z0 = svdup_u8_x (p0, x0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_b16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_b16.c -new file mode 100644 -index 000000000..ecbacd7e9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_b16.c -@@ -0,0 +1,276 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dupq_00_b16: -+** pfalse p0\.b -+** ret -+*/ -+TEST_UNIFORM_P (dupq_00_b16, -+ p0 = svdupq_n_b16 (0, 0, 0, 0, 0, 0, 0, 0), -+ p0 = svdupq_b16 (0, 0, 0, 0, 0, 0, 0, 0)) -+ -+/* -+** dupq_11_b16: -+** ptrue p0\.d, all -+** ret -+*/ -+TEST_UNIFORM_P (dupq_11_b16, -+ p0 = svdupq_n_b16 (1, 0, 0, 0, 1, 0, 0, 0), -+ p0 = svdupq_b16 (1, 0, 0, 0, 1, 0, 0, 0)) -+ -+/* -+** dupq_22_b16: -+** ( -+** pfalse (p[0-7])\.b -+** ptrue (p[0-7])\.d, all -+** trn1 p0\.h, \1\.h, \2\.h -+** | -+** ptrue (p[0-7])\.d, all -+** pfalse (p[0-7])\.b -+** trn1 p0\.h, \4\.h, \3\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_P (dupq_22_b16, -+ p0 = svdupq_n_b16 (0, 1, 0, 0, 0, 1, 0, 0), -+ p0 = svdupq_b16 (0, 1, 0, 0, 0, 1, 0, 0)) -+ -+/* -+** dupq_33_b16: -+** ptrue (p[0-7])\.d, all -+** trn1 p0\.h, \1\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_P (dupq_33_b16, -+ p0 = svdupq_n_b16 (1, 1, 0, 0, 1, 1, 0, 0), -+ p0 = svdupq_b16 (1, 1, 0, 0, 1, 1, 0, 0)) -+ -+/* -+** dupq_44_b16: -+** ( -+** ptrue (p[0-7])\.d, all -+** ptrue (p[0-7])\.s, all -+** not p0\.b, \2/z, \1\.b -+** | -+** ptrue (p[0-7])\.s, all -+** ptrue (p[0-7])\.d, all -+** not p0\.b, \3/z, \4\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_P (dupq_44_b16, -+ p0 = svdupq_n_b16 (0, 0, 1, 0, 0, 0, 1, 0), -+ p0 = svdupq_b16 (0, 0, 1, 0, 0, 0, 1, 0)) -+ -+/* -+** dupq_55_b16: -+** ptrue p0\.s, all -+** ret -+*/ -+TEST_UNIFORM_P (dupq_55_b16, -+ p0 = svdupq_n_b16 (1, 0, 1, 0, 1, 0, 1, 0), -+ p0 = svdupq_b16 (1, 0, 1, 0, 1, 0, 1, 0)) -+ -+/* -+** dupq_66_b16: -+** ... -+** cmpne p0\.b, p[0-7]/z, z[0-9]+\.b, #0 -+** ret -+*/ -+TEST_UNIFORM_P (dupq_66_b16, -+ p0 = svdupq_n_b16 (0, 1, 1, 0, 0, 1, 1, 0), -+ p0 = svdupq_b16 (0, 1, 1, 0, 0, 1, 1, 0)) -+ -+/* -+** dupq_77_b16: -+** ( -+** ptrue (p[0-7])\.d, all -+** ptrue (p[0-7])\.[hs], all -+** trn1 p0\.h, \2\.h, \1\.h -+** | -+** ptrue (p[0-7])\.[hs], all -+** ptrue (p[0-7])\.s, all -+** trn1 p0\.h, \3\.h, \4\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_P (dupq_77_b16, -+ p0 = svdupq_n_b16 (1, 1, 1, 0, 1, 1, 1, 0), -+ p0 = svdupq_b16 (1, 1, 1, 0, 1, 1, 1, 0)) -+ -+/* -+** dupq_88_b16: -+** ( -+** mov (z[0-9]+)\.d, #71776119061217280 -+** ptrue (p[0-7])\.b, all -+** cmpne p0\.b, \2/z, \1\.b, #0 -+** | -+** ptrue (p[0-7])\.b, all -+** mov (z[0-9]+)\.d, #71776119061217280 -+** cmpne p0\.b, \3/z, \4\.b, #0 -+** ) -+** ret -+*/ -+TEST_UNIFORM_P (dupq_88_b16, -+ p0 = svdupq_n_b16 (0, 0, 0, 1, 0, 0, 0, 1), -+ p0 = svdupq_b16 (0, 0, 0, 1, 0, 0, 0, 1)) -+ -+/* -+** dupq_99_b16: -+** ... -+** cmpne p0\.b, p[0-7]/z, z[0-9]+\.b, #0 -+** ret -+*/ -+TEST_UNIFORM_P (dupq_99_b16, -+ p0 = svdupq_n_b16 (1, 0, 0, 1, 1, 0, 0, 1), -+ p0 = svdupq_b16 (1, 0, 0, 1, 1, 0, 0, 1)) -+ -+/* -+** dupq_aa_b16: -+** ( -+** ptrue (p[0-7])\.s, all -+** ptrue (p[0-7])\.h, all -+** not p0\.b, \2/z, \1\.b -+** | -+** ptrue (p[0-7])\.h, all -+** ptrue (p[0-7])\.s, all -+** not p0\.b, \3/z, \4\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_P (dupq_aa_b16, -+ p0 = svdupq_n_b16 (0, 1, 0, 1, 0, 1, 0, 1), -+ p0 = svdupq_b16 (0, 1, 0, 1, 0, 1, 0, 1)) -+ -+/* -+** dupq_bb_b16: -+** ( -+** ptrue (p[0-7])\.d, all -+** ptrue (p[0-7])\.[hs], all -+** trn1 p0\.h, \1\.h, \2\.h -+** | -+** ptrue (p[0-7])\.[hs], all -+** ptrue (p[0-7])\.d, all -+** trn1 p0\.h, \4\.h, \3\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_P (dupq_bb_b16, -+ p0 = svdupq_n_b16 (1, 1, 0, 1, 1, 1, 0, 1), -+ p0 = svdupq_b16 (1, 1, 0, 1, 1, 1, 0, 1)) -+ -+/* -+** dupq_cc_b16: -+** ( -+** pfalse (p[0-7])\.b -+** ptrue (p[0-7])\.h, all -+** trn1 p0\.s, \1\.s, \2\.s -+** | -+** ptrue (p[0-7])\.h, all -+** pfalse (p[0-7])\.b -+** trn1 p0\.s, \4\.s, \3\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_P (dupq_cc_b16, -+ p0 = svdupq_n_b16 (0, 0, 1, 1, 0, 0, 1, 1), -+ p0 = svdupq_b16 (0, 0, 1, 1, 0, 0, 1, 1)) -+ -+/* -+** dupq_dd_b16: -+** ( -+** ptrue (p[0-7])\.[sd], all -+** ptrue (p[0-7])\.h, all -+** trn1 p0\.s, \1\.s, \2\.s -+** | -+** ptrue (p[0-7])\.h, all -+** ptrue (p[0-7])\.[sd], all -+** trn1 p0\.s, \4\.s, \3\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_P (dupq_dd_b16, -+ p0 = svdupq_n_b16 (1, 0, 1, 1, 1, 0, 1, 1), -+ p0 = svdupq_b16 (1, 0, 1, 1, 1, 0, 1, 1)) -+ -+/* -+** dupq_ee_b16: -+** ( -+** ptrue (p[0-7])\.d, all -+** ptrue (p[0-7])\.h, all -+** not p0\.b, \2/z, \1\.b -+** | -+** ptrue (p[0-7])\.h, all -+** ptrue (p[0-7])\.d, all -+** not p0\.b, \3/z, \4\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_P (dupq_ee_b16, -+ p0 = svdupq_n_b16 (0, 1, 1, 1, 0, 1, 1, 1), -+ p0 = svdupq_b16 (0, 1, 1, 1, 0, 1, 1, 1)) -+ -+/* -+** dupq_ff_b16: -+** ptrue p0\.h, all -+** ret -+*/ -+TEST_UNIFORM_P (dupq_ff_b16, -+ p0 = svdupq_n_b16 (1, 1, 1, 1, 1, 1, 1, 1), -+ p0 = svdupq_b16 (1, 1, 1, 1, 1, 1, 1, 1)) -+ -+/* -+** dupq_01_b16: -+** ( -+** ptrue (p[0-7])\.d, all -+** pfalse (p[0-7])\.b -+** trn1 p0\.d, \1\.d, \2\.d -+** | -+** pfalse (p[0-7])\.b -+** ptrue (p[0-7])\.d, all -+** trn1 p0\.d, \4\.d, \3\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_P (dupq_01_b16, -+ p0 = svdupq_n_b16 (1, 0, 0, 0, 0, 0, 0, 0), -+ p0 = svdupq_b16 (1, 0, 0, 0, 0, 0, 0, 0)) -+ -+/* -+** dupq_03_b16: -+** ... -+** cmpne p0\.b, p[0-7]/z, z[0-9]+\.b, #0 -+** ret -+*/ -+TEST_UNIFORM_P (dupq_03_b16, -+ p0 = svdupq_n_b16 (1, 1, 0, 0, 0, 0, 0, 0), -+ p0 = svdupq_b16 (1, 1, 0, 0, 0, 0, 0, 0)) -+ -+/* -+** dupq_0f_b16: -+** ( -+** ptrue (p[0-7])\.h, all -+** pfalse (p[0-7])\.b -+** trn1 p0\.d, \1\.d, \2\.d -+** | -+** pfalse (p[0-7])\.b -+** ptrue (p[0-7])\.h, all -+** trn1 p0\.d, \4\.d, \3\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_P (dupq_0f_b16, -+ p0 = svdupq_n_b16 (1, 1, 1, 1, 0, 0, 0, 0), -+ p0 = svdupq_b16 (1, 1, 1, 1, 0, 0, 0, 0)) -+ -+/* -+** dupq_3f_b16: -+** ... -+** cmpne p0\.b, p[0-7]/z, z[0-9]+\.b, #0 -+** ret -+*/ -+TEST_UNIFORM_P (dupq_3f_b16, -+ p0 = svdupq_n_b16 (1, 1, 1, 1, 1, 1, 0, 0), -+ p0 = svdupq_b16 (1, 1, 1, 1, 1, 1, 0, 0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_b32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_b32.c -new file mode 100644 -index 000000000..39719a76d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_b32.c -@@ -0,0 +1,132 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dupq_0_b32: -+** pfalse p0\.b -+** ret -+*/ -+TEST_UNIFORM_P (dupq_0_b32, -+ p0 = svdupq_n_b32 (0, 0, 0, 0), -+ p0 = svdupq_b32 (0, 0, 0, 0)) -+ -+/* -+** dupq_1_b32: -+** ( -+** ptrue (p[0-7])\.d, all -+** pfalse (p[0-7])\.b -+** trn1 p0\.d, \1\.d, \2\.d -+** | -+** pfalse (p[0-7])\.b -+** ptrue (p[0-7])\.d, all -+** trn1 p0\.d, \4\.d, \3\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_P (dupq_1_b32, -+ p0 = svdupq_n_b32 (1, 0, 0, 0), -+ p0 = svdupq_b32 (1, 0, 0, 0)) -+ -+/* -+** dupq_3_b32: -+** ( -+** ptrue (p[0-7])\.s, all -+** pfalse (p[0-7])\.b -+** trn1 p0\.d, \1\.d, \2\.d -+** | -+** pfalse (p[0-7])\.b -+** ptrue (p[0-7])\.s, all -+** trn1 p0\.d, \4\.d, \3\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_P (dupq_3_b32, -+ p0 = svdupq_n_b32 (1, 1, 0, 0), -+ p0 = svdupq_b32 (1, 1, 0, 0)) -+ -+/* -+** dupq_4_b32: -+** ( -+** pfalse (p[0-7])\.b -+** ptrue (p[0-7])\.d, all -+** trn1 p0\.d, \1\.d, \2\.d -+** | -+** ptrue (p[0-7])\.d, all -+** pfalse (p[0-7])\.b -+** trn1 p0\.d, \4\.d, \3\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_P (dupq_4_b32, -+ p0 = svdupq_n_b32 (0, 0, 1, 0), -+ p0 = svdupq_b32 (0, 0, 1, 0)) -+ -+/* -+** dupq_5_b32: -+** ptrue p0\.d, all -+** ret -+*/ -+TEST_UNIFORM_P (dupq_5_b32, -+ p0 = svdupq_n_b32 (1, 0, 1, 0), -+ p0 = svdupq_b32 (1, 0, 1, 0)) -+ -+/* -+** dupq_7_b32: -+** ( -+** ptrue (p[0-7])\.s, all -+** ptrue (p[0-7])\.d, all -+** trn1 p0\.d, \1\.d, \2\.d -+** | -+** ptrue (p[0-7])\.d, all -+** ptrue (p[0-7])\.s, all -+** trn1 p0\.d, \4\.d, \3\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_P (dupq_7_b32, -+ p0 = svdupq_n_b32 (1, 1, 1, 0), -+ p0 = svdupq_b32 (1, 1, 1, 0)) -+ -+/* -+** dupq_a_b32: -+** ( -+** ptrue (p[0-7])\.d, all -+** ptrue (p[0-7])\.s, all -+** not p0\.b, \2/z, \1\.b -+** | -+** ptrue (p[0-7])\.s, all -+** ptrue (p[0-7])\.d, all -+** not p0\.b, \3/z, \4\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_P (dupq_a_b32, -+ p0 = svdupq_n_b32 (0, 1, 0, 1), -+ p0 = svdupq_b32 (0, 1, 0, 1)) -+ -+/* -+** dupq_e_b32: -+** ( -+** ptrue (p[0-7])\.d, all -+** ptrue (p[0-7])\.s, all -+** trn1 p0\.d, \1\.d, \2\.d -+** | -+** ptrue (p[0-7])\.s, all -+** ptrue (p[0-7])\.d, all -+** trn1 p0\.d, \4\.d, \3\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_P (dupq_e_b32, -+ p0 = svdupq_n_b32 (1, 0, 1, 1), -+ p0 = svdupq_b32 (1, 0, 1, 1)) -+ -+/* -+** dupq_f_b32: -+** ptrue p0\.s, all -+** ret -+*/ -+TEST_UNIFORM_P (dupq_f_b32, -+ p0 = svdupq_n_b32 (1, 1, 1, 1), -+ p0 = svdupq_b32 (1, 1, 1, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_b64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_b64.c -new file mode 100644 -index 000000000..820ace431 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_b64.c -@@ -0,0 +1,55 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dupq_0_b64: -+** pfalse p0\.b -+** ret -+*/ -+TEST_UNIFORM_P (dupq_0_b64, -+ p0 = svdupq_n_b64 (0, 0), -+ p0 = svdupq_b64 (0, 0)) -+ -+/* -+** dupq_1_b64: -+** ( -+** ptrue (p[0-7])\.d, all -+** pfalse (p[0-7])\.b -+** trn1 p0\.d, \1\.d, \2\.d -+** | -+** pfalse (p[0-7])\.b -+** ptrue (p[0-7])\.d, all -+** trn1 p0\.d, \4\.d, \3\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_P (dupq_1_b64, -+ p0 = svdupq_n_b64 (1, 0), -+ p0 = svdupq_b64 (1, 0)) -+ -+/* -+** dupq_2_b64: -+** ( -+** pfalse (p[0-7])\.b -+** ptrue (p[0-7])\.d, all -+** trn1 p0\.d, \1\.d, \2\.d -+** | -+** ptrue (p[0-7])\.d, all -+** pfalse (p[0-7])\.b -+** trn1 p0\.d, \4\.d, \3\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_P (dupq_2_b64, -+ p0 = svdupq_n_b64 (0, 1), -+ p0 = svdupq_b64 (0, 1)) -+ -+/* -+** dupq_3_b64: -+** ptrue p0\.d, all -+** ret -+*/ -+TEST_UNIFORM_P (dupq_3_b64, -+ p0 = svdupq_n_b64 (1, 1), -+ p0 = svdupq_b64 (1, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_b8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_b8.c -new file mode 100644 -index 000000000..4762f950b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_b8.c -@@ -0,0 +1,413 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dupq_0000_b8: -+** pfalse p0\.b -+** ret -+*/ -+TEST_UNIFORM_P (dupq_0000_b8, -+ p0 = svdupq_n_b8 (0, 0, 0, 0, 0, 0, 0, 0, -+ 0, 0, 0, 0, 0, 0, 0, 0), -+ p0 = svdupq_b8 (0, 0, 0, 0, 0, 0, 0, 0, -+ 0, 0, 0, 0, 0, 0, 0, 0)) -+ -+/* -+** dupq_1111_b8: -+** ptrue p0\.s, all -+** ret -+*/ -+TEST_UNIFORM_P (dupq_1111_b8, -+ p0 = svdupq_n_b8 (1, 0, 0, 0, 1, 0, 0, 0, -+ 1, 0, 0, 0, 1, 0, 0, 0), -+ p0 = svdupq_b8 (1, 0, 0, 0, 1, 0, 0, 0, -+ 1, 0, 0, 0, 1, 0, 0, 0)) -+ -+/* -+** dupq_2222_b8: -+** ( -+** pfalse (p[0-7])\.b -+** ptrue (p[0-7])\.s, all -+** trn1 p0\.b, \1\.b, \2\.b -+** | -+** ptrue (p[0-7])\.s, all -+** pfalse (p[0-7])\.b -+** trn1 p0\.b, \4\.b, \3\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_P (dupq_2222_b8, -+ p0 = svdupq_n_b8 (0, 1, 0, 0, 0, 1, 0, 0, -+ 0, 1, 0, 0, 0, 1, 0, 0), -+ p0 = svdupq_b8 (0, 1, 0, 0, 0, 1, 0, 0, -+ 0, 1, 0, 0, 0, 1, 0, 0)) -+ -+/* -+** dupq_3333_b8: -+** ptrue (p[0-7])\.s, all -+** trn1 p0\.b, \1\.b, \1\.b -+** ret -+*/ -+TEST_UNIFORM_P (dupq_3333_b8, -+ p0 = svdupq_n_b8 (1, 1, 0, 0, 1, 1, 0, 0, -+ 1, 1, 0, 0, 1, 1, 0, 0), -+ p0 = svdupq_b8 (1, 1, 0, 0, 1, 1, 0, 0, -+ 1, 1, 0, 0, 1, 1, 0, 0)) -+ -+/* -+** dupq_4444_b8: -+** ( -+** ptrue (p[0-7])\.s, all -+** ptrue (p[0-7])\.h, all -+** not p0\.b, \2/z, \1\.b -+** | -+** ptrue (p[0-7])\.h, all -+** ptrue (p[0-7])\.s, all -+** not p0\.b, \3/z, \4\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_P (dupq_4444_b8, -+ p0 = svdupq_n_b8 (0, 0, 1, 0, 0, 0, 1, 0, -+ 0, 0, 1, 0, 0, 0, 1, 0), -+ p0 = svdupq_b8 (0, 0, 1, 0, 0, 0, 1, 0, -+ 0, 0, 1, 0, 0, 0, 1, 0)) -+ -+/* -+** dupq_5555_b8: -+** ptrue p0\.h, all -+** ret -+*/ -+TEST_UNIFORM_P (dupq_5555_b8, -+ p0 = svdupq_n_b8 (1, 0, 1, 0, 1, 0, 1, 0, -+ 1, 0, 1, 0, 1, 0, 1, 0), -+ p0 = svdupq_b8 (1, 0, 1, 0, 1, 0, 1, 0, -+ 1, 0, 1, 0, 1, 0, 1, 0)) -+ -+/* -+** dupq_6666_b8: -+** ( -+** mov (z[0-9]+)\.s, #16776960 -+** ptrue (p[0-7])\.b, all -+** cmpne p0\.b, \2/z, \1\.b, #0 -+** | -+** ptrue (p[0-7])\.b, all -+** mov (z[0-9]+)\.s, #16776960 -+** cmpne p0\.b, \3/z, \4\.b, #0 -+** ) -+** ret -+*/ -+TEST_UNIFORM_P (dupq_6666_b8, -+ p0 = svdupq_n_b8 (0, 1, 1, 0, 0, 1, 1, 0, -+ 0, 1, 1, 0, 0, 1, 1, 0), -+ p0 = svdupq_b8 (0, 1, 1, 0, 0, 1, 1, 0, -+ 0, 1, 1, 0, 0, 1, 1, 0)) -+ -+/* -+** dupq_7777_b8: -+** ( -+** ptrue (p[0-7])\.s, all -+** ptrue (p[0-7])\.[bh], all -+** trn1 p0\.b, \2\.b, \1\.b -+** | -+** ptrue (p[0-7])\.[bh], all -+** ptrue (p[0-7])\.s, all -+** trn1 p0\.b, \3\.b, \4\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_P (dupq_7777_b8, -+ p0 = svdupq_n_b8 (1, 1, 1, 0, 1, 1, 1, 0, -+ 1, 1, 1, 0, 1, 1, 1, 0), -+ p0 = svdupq_b8 (1, 1, 1, 0, 1, 1, 1, 0, -+ 1, 1, 1, 0, 1, 1, 1, 0)) -+ -+/* -+** dupq_8888_b8: -+** ( -+** mov (z[0-9]+)\.s, #-16777216 -+** ptrue (p[0-7])\.b, all -+** cmpne p0\.b, \2/z, \1\.b, #0 -+** | -+** ptrue (p[0-7])\.b, all -+** mov (z[0-9]+)\.s, #-16777216 -+** cmpne p0\.b, \3/z, \4\.b, #0 -+** ) -+** ret -+*/ -+TEST_UNIFORM_P (dupq_8888_b8, -+ p0 = svdupq_n_b8 (0, 0, 0, 1, 0, 0, 0, 1, -+ 0, 0, 0, 1, 0, 0, 0, 1), -+ p0 = svdupq_b8 (0, 0, 0, 1, 0, 0, 0, 1, -+ 0, 0, 0, 1, 0, 0, 0, 1)) -+ -+/* -+** dupq_9999_b8: -+** ( -+** mov (z[0-9]+)\.s, #-16776961 -+** ptrue (p[0-7])\.b, all -+** cmpne p0\.b, \2/z, \1\.b, #0 -+** | -+** ptrue (p[0-7])\.b, all -+** mov (z[0-9]+)\.s, #-16776961 -+** cmpne p0\.b, \3/z, \4\.b, #0 -+** ) -+** ret -+*/ -+TEST_UNIFORM_P (dupq_9999_b8, -+ p0 = svdupq_n_b8 (1, 0, 0, 1, 1, 0, 0, 1, -+ 1, 0, 0, 1, 1, 0, 0, 1), -+ p0 = svdupq_b8 (1, 0, 0, 1, 1, 0, 0, 1, -+ 1, 0, 0, 1, 1, 0, 0, 1)) -+ -+/* -+** dupq_aaaa_b8: -+** ( -+** ptrue (p[0-7])\.h, all -+** ptrue (p[0-7])\.b, all -+** not p0\.b, \2/z, \1\.b -+** | -+** ptrue (p[0-7])\.b, all -+** ptrue (p[0-7])\.h, all -+** not p0\.b, \3/z, \4\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_P (dupq_aaaa_b8, -+ p0 = svdupq_n_b8 (0, 1, 0, 1, 0, 1, 0, 1, -+ 0, 1, 0, 1, 0, 1, 0, 1), -+ p0 = svdupq_b8 (0, 1, 0, 1, 0, 1, 0, 1, -+ 0, 1, 0, 1, 0, 1, 0, 1)) -+ -+/* -+** dupq_bbbb_b8: -+** ( -+** ptrue (p[0-7])\.s, all -+** ptrue (p[0-7])\.[bh], all -+** trn1 p0\.b, \1\.b, \2\.b -+** | -+** ptrue (p[0-7])\.[bh], all -+** ptrue (p[0-7])\.s, all -+** trn1 p0\.b, \4\.b, \3\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_P (dupq_bbbb_b8, -+ p0 = svdupq_n_b8 (1, 1, 0, 1, 1, 1, 0, 1, -+ 1, 1, 0, 1, 1, 1, 0, 1), -+ p0 = svdupq_b8 (1, 1, 0, 1, 1, 1, 0, 1, -+ 1, 1, 0, 1, 1, 1, 0, 1)) -+ -+/* -+** dupq_cccc_b8: -+** ( -+** pfalse (p[0-7])\.b -+** ptrue (p[0-7])\.b, all -+** trn1 p0\.h, \1\.h, \2\.h -+** | -+** ptrue (p[0-7])\.b, all -+** pfalse (p[0-7])\.b -+** trn1 p0\.h, \4\.h, \3\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_P (dupq_cccc_b8, -+ p0 = svdupq_n_b8 (0, 0, 1, 1, 0, 0, 1, 1, -+ 0, 0, 1, 1, 0, 0, 1, 1), -+ p0 = svdupq_b8 (0, 0, 1, 1, 0, 0, 1, 1, -+ 0, 0, 1, 1, 0, 0, 1, 1)) -+ -+/* -+** dupq_dddd_b8: -+** ( -+** ptrue (p[0-7])\.[hs], all -+** ptrue (p[0-7])\.b, all -+** trn1 p0\.h, \1\.h, \2\.h -+** | -+** ptrue (p[0-7])\.b, all -+** ptrue (p[0-7])\.[hs], all -+** trn1 p0\.h, \4\.h, \3\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_P (dupq_dddd_b8, -+ p0 = svdupq_n_b8 (1, 0, 1, 1, 1, 0, 1, 1, -+ 1, 0, 1, 1, 1, 0, 1, 1), -+ p0 = svdupq_b8 (1, 0, 1, 1, 1, 0, 1, 1, -+ 1, 0, 1, 1, 1, 0, 1, 1)) -+ -+/* -+** dupq_eeee_b8: -+** ( -+** ptrue (p[0-7])\.s, all -+** ptrue (p[0-7])\.b, all -+** not p0\.b, \2/z, \1\.b -+** | -+** ptrue (p[0-7])\.b, all -+** ptrue (p[0-7])\.s, all -+** not p0\.b, \3/z, \4\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_P (dupq_eeee_b8, -+ p0 = svdupq_n_b8 (0, 1, 1, 1, 0, 1, 1, 1, -+ 0, 1, 1, 1, 0, 1, 1, 1), -+ p0 = svdupq_b8 (0, 1, 1, 1, 0, 1, 1, 1, -+ 0, 1, 1, 1, 0, 1, 1, 1)) -+ -+/* -+** dupq_ffff_b8: -+** ptrue p0\.b, all -+** ret -+*/ -+TEST_UNIFORM_P (dupq_ffff_b8, -+ p0 = svdupq_n_b8 (1, 1, 1, 1, 1, 1, 1, 1, -+ 1, 1, 1, 1, 1, 1, 1, 1), -+ p0 = svdupq_b8 (1, 1, 1, 1, 1, 1, 1, 1, -+ 1, 1, 1, 1, 1, 1, 1, 1)) -+ -+/* -+** dupq_5f5f_b8: -+** ( -+** ptrue (p[0-7])\.h, all -+** ptrue (p[0-7])\.b, all -+** trn1 p0\.s, \2\.s, \1\.s -+** | -+** ptrue (p[0-7])\.b, all -+** ptrue (p[0-7])\.h, all -+** trn1 p0\.s, \3\.s, \4\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_P (dupq_5f5f_b8, -+ p0 = svdupq_n_b8 (1, 1, 1, 1, 1, 0, 1, 0, -+ 1, 1, 1, 1, 1, 0, 1, 0), -+ p0 = svdupq_b8 (1, 1, 1, 1, 1, 0, 1, 0, -+ 1, 1, 1, 1, 1, 0, 1, 0)) -+ -+/* -+** dupq_1f1f_b8: -+** ( -+** ptrue (p[0-7])\.[sd], all -+** ptrue (p[0-7])\.b, all -+** trn1 p0\.s, \2\.s, \1\.s -+** | -+** ptrue (p[0-7])\.b, all -+** ptrue (p[0-7])\.[sd], all -+** trn1 p0\.s, \3\.s, \4\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_P (dupq_1f1f_b8, -+ p0 = svdupq_n_b8 (1, 1, 1, 1, 1, 0, 0, 0, -+ 1, 1, 1, 1, 1, 0, 0, 0), -+ p0 = svdupq_b8 (1, 1, 1, 1, 1, 0, 0, 0, -+ 1, 1, 1, 1, 1, 0, 0, 0)) -+ -+/* -+** dupq_1515_b8: -+** ( -+** ptrue (p[0-7])\.d, all -+** ptrue (p[0-7])\.[hs], all -+** trn1 p0\.h, \2\.h, \1\.h -+** | -+** ptrue (p[0-7])\.[hs], all -+** ptrue (p[0-7])\.d, all -+** trn1 p0\.h, \3\.h, \4\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_P (dupq_1515_b8, -+ p0 = svdupq_n_b8 (1, 0, 1, 0, 1, 0, 0, 0, -+ 1, 0, 1, 0, 1, 0, 0, 0), -+ p0 = svdupq_b8 (1, 0, 1, 0, 1, 0, 0, 0, -+ 1, 0, 1, 0, 1, 0, 0, 0)) -+ -+/* -+** dupq_0505_b8: -+** ptrue (p[0-7])\.d, all -+** trn1 p0\.h, \1\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_P (dupq_0505_b8, -+ p0 = svdupq_n_b8 (1, 0, 1, 0, 0, 0, 0, 0, -+ 1, 0, 1, 0, 0, 0, 0, 0), -+ p0 = svdupq_b8 (1, 0, 1, 0, 0, 0, 0, 0, -+ 1, 0, 1, 0, 0, 0, 0, 0)) -+ -+/* -+** dupq_00ff_b8: -+** ( -+** pfalse (p[0-7])\.b -+** ptrue (p[0-7])\.b, all -+** trn1 p0\.d, \2\.d, \1\.d -+** | -+** ptrue (p[0-7])\.b, all -+** pfalse (p[0-7])\.b -+** trn1 p0\.d, \3\.d, \4\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_P (dupq_00ff_b8, -+ p0 = svdupq_n_b8 (1, 1, 1, 1, 1, 1, 1, 1, -+ 0, 0, 0, 0, 0, 0, 0, 0), -+ p0 = svdupq_b8 (1, 1, 1, 1, 1, 1, 1, 1, -+ 0, 0, 0, 0, 0, 0, 0, 0)) -+ -+/* -+** dupq_0055_b8: -+** ( -+** pfalse (p[0-7])\.b -+** ptrue (p[0-7])\.h, all -+** trn1 p0\.d, \2\.d, \1\.d -+** | -+** ptrue (p[0-7])\.h, all -+** pfalse (p[0-7])\.b -+** trn1 p0\.d, \3\.d, \4\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_P (dupq_0055_b8, -+ p0 = svdupq_n_b8 (1, 0, 1, 0, 1, 0, 1, 0, -+ 0, 0, 0, 0, 0, 0, 0, 0), -+ p0 = svdupq_b8 (1, 0, 1, 0, 1, 0, 1, 0, -+ 0, 0, 0, 0, 0, 0, 0, 0)) -+ -+/* -+** dupq_0011_b8: -+** ( -+** pfalse (p[0-7])\.b -+** ptrue (p[0-7])\.s, all -+** trn1 p0\.d, \2\.d, \1\.d -+** | -+** ptrue (p[0-7])\.s, all -+** pfalse (p[0-7])\.b -+** trn1 p0\.d, \3\.d, \4\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_P (dupq_0011_b8, -+ p0 = svdupq_n_b8 (1, 0, 0, 0, 1, 0, 0, 0, -+ 0, 0, 0, 0, 0, 0, 0, 0), -+ p0 = svdupq_b8 (1, 0, 0, 0, 1, 0, 0, 0, -+ 0, 0, 0, 0, 0, 0, 0, 0)) -+ -+/* -+** dupq_0111_b8: -+** ( -+** ptrue (p[0-7])\.d, all -+** ptrue (p[0-7])\.s, all -+** trn1 p0\.d, \2\.d, \1\.d -+** | -+** ptrue (p[0-7])\.s, all -+** ptrue (p[0-7])\.d, all -+** trn1 p0\.d, \3\.d, \4\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_P (dupq_0111_b8, -+ p0 = svdupq_n_b8 (1, 0, 0, 0, 1, 0, 0, 0, -+ 1, 0, 0, 0, 0, 0, 0, 0), -+ p0 = svdupq_b8 (1, 0, 0, 0, 1, 0, 0, 0, -+ 1, 0, 0, 0, 0, 0, 0, 0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_f16.c -new file mode 100644 -index 000000000..91de8344c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_f16.c -@@ -0,0 +1,53 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dupq_1c_f16: -+** mov z0\.s, #15360 -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_1c_f16, svfloat16_t, -+ z0 = svdupq_n_f16 (1.0, 0, 1.0, 0, 1.0, 0, 1.0, 0), -+ z0 = svdupq_f16 (1.0, 0, 1.0, 0, 1.0, 0, 1.0, 0)); -+ -+/* -+** dupq_5ic_f16: -+** movi v([0-9]+)\.4s, 0x45, lsl 24 -+** dup z0\.q, z\1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_5ic_f16, svfloat16_t, -+ z0 = svdupq_n_f16 (0, 5.0, 0, 5.0, 0, 5.0, 0, 5.0), -+ z0 = svdupq_f16 (0, 5.0, 0, 5.0, 0, 5.0, 0, 5.0)); -+ -+ -+/* -+** dupq_m1c_f16: -+** movi v([0-9]+)\.4s, 0xbc, lsl 8 -+** dup z0\.q, z\1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_m1c_f16, svfloat16_t, -+ z0 = svdupq_n_f16 (-1.0, 0, -1.0, 0, -1.0, 0, -1.0, 0), -+ z0 = svdupq_f16 (-1.0, 0, -1.0, 0, -1.0, 0, -1.0, 0)); -+ -+/* -+** dupq_40p5c_f16: -+** mov (w[0-9]+), 20752 -+** mov z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_40p5c_f16, svfloat16_t, -+ z0 = svdupq_n_f16 (40.5, 0, 40.5, 0, 40.5, 0, 40.5, 0), -+ z0 = svdupq_f16 (40.5, 0, 40.5, 0, 40.5, 0, 40.5, 0)); -+ -+/* -+** dupq_pool_f16: -+** ... -+** ld1rqh z0\.h, p[0-7]/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_pool_f16, svfloat16_t, -+ z0 = svdupq_n_f16 (4.75, 1.0, 9, 77, 5.25, 22, 19, 50), -+ z0 = svdupq_f16 (4.75, 1.0, 9, 77, 5.25, 22, 19, 50)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_f32.c -new file mode 100644 -index 000000000..4f9c04f1a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_f32.c -@@ -0,0 +1,53 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dupq_1c_f32: -+** mov z0\.d, #1065353216 -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_1c_f32, svfloat32_t, -+ z0 = svdupq_n_f32 (1.0, 0, 1.0, 0), -+ z0 = svdupq_f32 (1.0, 0, 1.0, 0)); -+ -+/* -+** dupq_5ic_f32: -+** mov (x[0-9]+), 4656722014701092864 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_5ic_f32, svfloat32_t, -+ z0 = svdupq_n_f32 (0, 5.0, 0, 5.0), -+ z0 = svdupq_f32 (0, 5.0, 0, 5.0)); -+ -+ -+/* -+** dupq_m1c_f32: -+** mov (x[0-9]+), 3212836864 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_m1c_f32, svfloat32_t, -+ z0 = svdupq_n_f32 (-1.0, 0, -1.0, 0), -+ z0 = svdupq_f32 (-1.0, 0, -1.0, 0)); -+ -+/* -+** dupq_40p5c_f32: -+** mov (x[0-9]+), 1109524480 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_40p5c_f32, svfloat32_t, -+ z0 = svdupq_n_f32 (40.5, 0, 40.5, 0), -+ z0 = svdupq_f32 (40.5, 0, 40.5, 0)); -+ -+/* -+** dupq_pool_f32: -+** ... -+** ld1rqw z0\.s, p[0-7]/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_pool_f32, svfloat32_t, -+ z0 = svdupq_n_f32 (4.5, 10.1, 7.3, 11.8), -+ z0 = svdupq_f32 (4.5, 10.1, 7.3, 11.8)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_f64.c -new file mode 100644 -index 000000000..27d14480e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_f64.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dupq_pool_f64: -+** ... -+** ld1rqd z0\.d, p[0-7]/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_pool_f64, svfloat64_t, -+ z0 = svdupq_n_f64 (4.5, 10.1), -+ z0 = svdupq_f64 (4.5, 10.1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_bf16.c -new file mode 100644 -index 000000000..89ae4a4c2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_bf16.c -@@ -0,0 +1,48 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dupq_lane_0_bf16_tied: -+** dup z0\.q, z0\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_0_bf16_tied, svbfloat16_t, -+ z0 = svdupq_lane_bf16 (z0, 0), -+ z0 = svdupq_lane (z0, 0)) -+ -+/* -+** dupq_lane_0_bf16_untied: -+** dup z0\.q, z1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_0_bf16_untied, svbfloat16_t, -+ z0 = svdupq_lane_bf16 (z1, 0), -+ z0 = svdupq_lane (z1, 0)) -+ -+/* -+** dupq_lane_1_bf16: -+** dup z0\.q, z0\.q\[1\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_1_bf16, svbfloat16_t, -+ z0 = svdupq_lane_bf16 (z0, 1), -+ z0 = svdupq_lane (z0, 1)) -+ -+/* -+** dupq_lane_2_bf16: -+** dup z0\.q, z0\.q\[2\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_2_bf16, svbfloat16_t, -+ z0 = svdupq_lane_bf16 (z0, 2), -+ z0 = svdupq_lane (z0, 2)) -+ -+/* -+** dupq_lane_3_bf16: -+** dup z0\.q, z0\.q\[3\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_3_bf16, svbfloat16_t, -+ z0 = svdupq_lane_bf16 (z0, 3), -+ z0 = svdupq_lane (z0, 3)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_f16.c -new file mode 100644 -index 000000000..6fa97ca3a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_f16.c -@@ -0,0 +1,48 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dupq_lane_0_f16_tied: -+** dup z0\.q, z0\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_0_f16_tied, svfloat16_t, -+ z0 = svdupq_lane_f16 (z0, 0), -+ z0 = svdupq_lane (z0, 0)) -+ -+/* -+** dupq_lane_0_f16_untied: -+** dup z0\.q, z1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_0_f16_untied, svfloat16_t, -+ z0 = svdupq_lane_f16 (z1, 0), -+ z0 = svdupq_lane (z1, 0)) -+ -+/* -+** dupq_lane_1_f16: -+** dup z0\.q, z0\.q\[1\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_1_f16, svfloat16_t, -+ z0 = svdupq_lane_f16 (z0, 1), -+ z0 = svdupq_lane (z0, 1)) -+ -+/* -+** dupq_lane_2_f16: -+** dup z0\.q, z0\.q\[2\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_2_f16, svfloat16_t, -+ z0 = svdupq_lane_f16 (z0, 2), -+ z0 = svdupq_lane (z0, 2)) -+ -+/* -+** dupq_lane_3_f16: -+** dup z0\.q, z0\.q\[3\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_3_f16, svfloat16_t, -+ z0 = svdupq_lane_f16 (z0, 3), -+ z0 = svdupq_lane (z0, 3)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_f32.c -new file mode 100644 -index 000000000..69ce5452e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_f32.c -@@ -0,0 +1,48 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dupq_lane_0_f32_tied: -+** dup z0\.q, z0\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_0_f32_tied, svfloat32_t, -+ z0 = svdupq_lane_f32 (z0, 0), -+ z0 = svdupq_lane (z0, 0)) -+ -+/* -+** dupq_lane_0_f32_untied: -+** dup z0\.q, z1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_0_f32_untied, svfloat32_t, -+ z0 = svdupq_lane_f32 (z1, 0), -+ z0 = svdupq_lane (z1, 0)) -+ -+/* -+** dupq_lane_1_f32: -+** dup z0\.q, z0\.q\[1\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_1_f32, svfloat32_t, -+ z0 = svdupq_lane_f32 (z0, 1), -+ z0 = svdupq_lane (z0, 1)) -+ -+/* -+** dupq_lane_2_f32: -+** dup z0\.q, z0\.q\[2\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_2_f32, svfloat32_t, -+ z0 = svdupq_lane_f32 (z0, 2), -+ z0 = svdupq_lane (z0, 2)) -+ -+/* -+** dupq_lane_3_f32: -+** dup z0\.q, z0\.q\[3\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_3_f32, svfloat32_t, -+ z0 = svdupq_lane_f32 (z0, 3), -+ z0 = svdupq_lane (z0, 3)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_f64.c -new file mode 100644 -index 000000000..51a8d9f2d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_f64.c -@@ -0,0 +1,48 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dupq_lane_0_f64_tied: -+** dup z0\.q, z0\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_0_f64_tied, svfloat64_t, -+ z0 = svdupq_lane_f64 (z0, 0), -+ z0 = svdupq_lane (z0, 0)) -+ -+/* -+** dupq_lane_0_f64_untied: -+** dup z0\.q, z1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_0_f64_untied, svfloat64_t, -+ z0 = svdupq_lane_f64 (z1, 0), -+ z0 = svdupq_lane (z1, 0)) -+ -+/* -+** dupq_lane_1_f64: -+** dup z0\.q, z0\.q\[1\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_1_f64, svfloat64_t, -+ z0 = svdupq_lane_f64 (z0, 1), -+ z0 = svdupq_lane (z0, 1)) -+ -+/* -+** dupq_lane_2_f64: -+** dup z0\.q, z0\.q\[2\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_2_f64, svfloat64_t, -+ z0 = svdupq_lane_f64 (z0, 2), -+ z0 = svdupq_lane (z0, 2)) -+ -+/* -+** dupq_lane_3_f64: -+** dup z0\.q, z0\.q\[3\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_3_f64, svfloat64_t, -+ z0 = svdupq_lane_f64 (z0, 3), -+ z0 = svdupq_lane (z0, 3)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_s16.c -new file mode 100644 -index 000000000..08a0510be ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_s16.c -@@ -0,0 +1,48 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dupq_lane_0_s16_tied: -+** dup z0\.q, z0\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_0_s16_tied, svint16_t, -+ z0 = svdupq_lane_s16 (z0, 0), -+ z0 = svdupq_lane (z0, 0)) -+ -+/* -+** dupq_lane_0_s16_untied: -+** dup z0\.q, z1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_0_s16_untied, svint16_t, -+ z0 = svdupq_lane_s16 (z1, 0), -+ z0 = svdupq_lane (z1, 0)) -+ -+/* -+** dupq_lane_1_s16: -+** dup z0\.q, z0\.q\[1\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_1_s16, svint16_t, -+ z0 = svdupq_lane_s16 (z0, 1), -+ z0 = svdupq_lane (z0, 1)) -+ -+/* -+** dupq_lane_2_s16: -+** dup z0\.q, z0\.q\[2\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_2_s16, svint16_t, -+ z0 = svdupq_lane_s16 (z0, 2), -+ z0 = svdupq_lane (z0, 2)) -+ -+/* -+** dupq_lane_3_s16: -+** dup z0\.q, z0\.q\[3\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_3_s16, svint16_t, -+ z0 = svdupq_lane_s16 (z0, 3), -+ z0 = svdupq_lane (z0, 3)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_s32.c -new file mode 100644 -index 000000000..e9a9c9a60 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_s32.c -@@ -0,0 +1,48 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dupq_lane_0_s32_tied: -+** dup z0\.q, z0\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_0_s32_tied, svint32_t, -+ z0 = svdupq_lane_s32 (z0, 0), -+ z0 = svdupq_lane (z0, 0)) -+ -+/* -+** dupq_lane_0_s32_untied: -+** dup z0\.q, z1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_0_s32_untied, svint32_t, -+ z0 = svdupq_lane_s32 (z1, 0), -+ z0 = svdupq_lane (z1, 0)) -+ -+/* -+** dupq_lane_1_s32: -+** dup z0\.q, z0\.q\[1\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_1_s32, svint32_t, -+ z0 = svdupq_lane_s32 (z0, 1), -+ z0 = svdupq_lane (z0, 1)) -+ -+/* -+** dupq_lane_2_s32: -+** dup z0\.q, z0\.q\[2\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_2_s32, svint32_t, -+ z0 = svdupq_lane_s32 (z0, 2), -+ z0 = svdupq_lane (z0, 2)) -+ -+/* -+** dupq_lane_3_s32: -+** dup z0\.q, z0\.q\[3\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_3_s32, svint32_t, -+ z0 = svdupq_lane_s32 (z0, 3), -+ z0 = svdupq_lane (z0, 3)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_s64.c -new file mode 100644 -index 000000000..2c6342149 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_s64.c -@@ -0,0 +1,48 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dupq_lane_0_s64_tied: -+** dup z0\.q, z0\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_0_s64_tied, svint64_t, -+ z0 = svdupq_lane_s64 (z0, 0), -+ z0 = svdupq_lane (z0, 0)) -+ -+/* -+** dupq_lane_0_s64_untied: -+** dup z0\.q, z1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_0_s64_untied, svint64_t, -+ z0 = svdupq_lane_s64 (z1, 0), -+ z0 = svdupq_lane (z1, 0)) -+ -+/* -+** dupq_lane_1_s64: -+** dup z0\.q, z0\.q\[1\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_1_s64, svint64_t, -+ z0 = svdupq_lane_s64 (z0, 1), -+ z0 = svdupq_lane (z0, 1)) -+ -+/* -+** dupq_lane_2_s64: -+** dup z0\.q, z0\.q\[2\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_2_s64, svint64_t, -+ z0 = svdupq_lane_s64 (z0, 2), -+ z0 = svdupq_lane (z0, 2)) -+ -+/* -+** dupq_lane_3_s64: -+** dup z0\.q, z0\.q\[3\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_3_s64, svint64_t, -+ z0 = svdupq_lane_s64 (z0, 3), -+ z0 = svdupq_lane (z0, 3)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_s8.c -new file mode 100644 -index 000000000..2c2e6ee72 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_s8.c -@@ -0,0 +1,48 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dupq_lane_0_s8_tied: -+** dup z0\.q, z0\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_0_s8_tied, svint8_t, -+ z0 = svdupq_lane_s8 (z0, 0), -+ z0 = svdupq_lane (z0, 0)) -+ -+/* -+** dupq_lane_0_s8_untied: -+** dup z0\.q, z1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_0_s8_untied, svint8_t, -+ z0 = svdupq_lane_s8 (z1, 0), -+ z0 = svdupq_lane (z1, 0)) -+ -+/* -+** dupq_lane_1_s8: -+** dup z0\.q, z0\.q\[1\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_1_s8, svint8_t, -+ z0 = svdupq_lane_s8 (z0, 1), -+ z0 = svdupq_lane (z0, 1)) -+ -+/* -+** dupq_lane_2_s8: -+** dup z0\.q, z0\.q\[2\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_2_s8, svint8_t, -+ z0 = svdupq_lane_s8 (z0, 2), -+ z0 = svdupq_lane (z0, 2)) -+ -+/* -+** dupq_lane_3_s8: -+** dup z0\.q, z0\.q\[3\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_3_s8, svint8_t, -+ z0 = svdupq_lane_s8 (z0, 3), -+ z0 = svdupq_lane (z0, 3)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_u16.c -new file mode 100644 -index 000000000..e5fba592f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_u16.c -@@ -0,0 +1,48 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dupq_lane_0_u16_tied: -+** dup z0\.q, z0\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_0_u16_tied, svuint16_t, -+ z0 = svdupq_lane_u16 (z0, 0), -+ z0 = svdupq_lane (z0, 0)) -+ -+/* -+** dupq_lane_0_u16_untied: -+** dup z0\.q, z1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_0_u16_untied, svuint16_t, -+ z0 = svdupq_lane_u16 (z1, 0), -+ z0 = svdupq_lane (z1, 0)) -+ -+/* -+** dupq_lane_1_u16: -+** dup z0\.q, z0\.q\[1\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_1_u16, svuint16_t, -+ z0 = svdupq_lane_u16 (z0, 1), -+ z0 = svdupq_lane (z0, 1)) -+ -+/* -+** dupq_lane_2_u16: -+** dup z0\.q, z0\.q\[2\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_2_u16, svuint16_t, -+ z0 = svdupq_lane_u16 (z0, 2), -+ z0 = svdupq_lane (z0, 2)) -+ -+/* -+** dupq_lane_3_u16: -+** dup z0\.q, z0\.q\[3\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_3_u16, svuint16_t, -+ z0 = svdupq_lane_u16 (z0, 3), -+ z0 = svdupq_lane (z0, 3)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_u32.c -new file mode 100644 -index 000000000..fb3346e45 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_u32.c -@@ -0,0 +1,48 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dupq_lane_0_u32_tied: -+** dup z0\.q, z0\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_0_u32_tied, svuint32_t, -+ z0 = svdupq_lane_u32 (z0, 0), -+ z0 = svdupq_lane (z0, 0)) -+ -+/* -+** dupq_lane_0_u32_untied: -+** dup z0\.q, z1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_0_u32_untied, svuint32_t, -+ z0 = svdupq_lane_u32 (z1, 0), -+ z0 = svdupq_lane (z1, 0)) -+ -+/* -+** dupq_lane_1_u32: -+** dup z0\.q, z0\.q\[1\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_1_u32, svuint32_t, -+ z0 = svdupq_lane_u32 (z0, 1), -+ z0 = svdupq_lane (z0, 1)) -+ -+/* -+** dupq_lane_2_u32: -+** dup z0\.q, z0\.q\[2\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_2_u32, svuint32_t, -+ z0 = svdupq_lane_u32 (z0, 2), -+ z0 = svdupq_lane (z0, 2)) -+ -+/* -+** dupq_lane_3_u32: -+** dup z0\.q, z0\.q\[3\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_3_u32, svuint32_t, -+ z0 = svdupq_lane_u32 (z0, 3), -+ z0 = svdupq_lane (z0, 3)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_u64.c -new file mode 100644 -index 000000000..22f1d5d55 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_u64.c -@@ -0,0 +1,48 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dupq_lane_0_u64_tied: -+** dup z0\.q, z0\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_0_u64_tied, svuint64_t, -+ z0 = svdupq_lane_u64 (z0, 0), -+ z0 = svdupq_lane (z0, 0)) -+ -+/* -+** dupq_lane_0_u64_untied: -+** dup z0\.q, z1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_0_u64_untied, svuint64_t, -+ z0 = svdupq_lane_u64 (z1, 0), -+ z0 = svdupq_lane (z1, 0)) -+ -+/* -+** dupq_lane_1_u64: -+** dup z0\.q, z0\.q\[1\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_1_u64, svuint64_t, -+ z0 = svdupq_lane_u64 (z0, 1), -+ z0 = svdupq_lane (z0, 1)) -+ -+/* -+** dupq_lane_2_u64: -+** dup z0\.q, z0\.q\[2\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_2_u64, svuint64_t, -+ z0 = svdupq_lane_u64 (z0, 2), -+ z0 = svdupq_lane (z0, 2)) -+ -+/* -+** dupq_lane_3_u64: -+** dup z0\.q, z0\.q\[3\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_3_u64, svuint64_t, -+ z0 = svdupq_lane_u64 (z0, 3), -+ z0 = svdupq_lane (z0, 3)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_u8.c -new file mode 100644 -index 000000000..ba16f836a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_lane_u8.c -@@ -0,0 +1,48 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dupq_lane_0_u8_tied: -+** dup z0\.q, z0\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_0_u8_tied, svuint8_t, -+ z0 = svdupq_lane_u8 (z0, 0), -+ z0 = svdupq_lane (z0, 0)) -+ -+/* -+** dupq_lane_0_u8_untied: -+** dup z0\.q, z1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_0_u8_untied, svuint8_t, -+ z0 = svdupq_lane_u8 (z1, 0), -+ z0 = svdupq_lane (z1, 0)) -+ -+/* -+** dupq_lane_1_u8: -+** dup z0\.q, z0\.q\[1\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_1_u8, svuint8_t, -+ z0 = svdupq_lane_u8 (z0, 1), -+ z0 = svdupq_lane (z0, 1)) -+ -+/* -+** dupq_lane_2_u8: -+** dup z0\.q, z0\.q\[2\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_2_u8, svuint8_t, -+ z0 = svdupq_lane_u8 (z0, 2), -+ z0 = svdupq_lane (z0, 2)) -+ -+/* -+** dupq_lane_3_u8: -+** dup z0\.q, z0\.q\[3\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_lane_3_u8, svuint8_t, -+ z0 = svdupq_lane_u8 (z0, 3), -+ z0 = svdupq_lane (z0, 3)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_s16.c -new file mode 100644 -index 000000000..5a9a53b2d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_s16.c -@@ -0,0 +1,70 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dupq_25600s_s16: -+** mov z0\.s, #25600 -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_25600s_s16, svint16_t, -+ z0 = svdupq_n_s16 (25600, 0, 25600, 0, 25600, 0, 25600, 0), -+ z0 = svdupq_s16 (25600, 0, 25600, 0, 25600, 0, 25600, 0)) -+ -+/* -+** dupq_7ff00s_s16: -+** mov z0\.s, #524032 -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_7ff00s_s16, svint16_t, -+ z0 = svdupq_n_s16 (0xff00, 7, 0xff00, 7, 0xff00, 7, 0xff00, 7), -+ z0 = svdupq_s16 (0xff00, 7, 0xff00, 7, 0xff00, 7, 0xff00, 7)) -+ -+/* -+** dupq_65536d_s16: -+** mov z0\.d, #65536 -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_65536d_s16, svint16_t, -+ z0 = svdupq_n_s16 (0, 1, 0, 0, 0, 1, 0, 0), -+ z0 = svdupq_s16 (0, 1, 0, 0, 0, 1, 0, 0)) -+ -+/* -+** dupq_m2d_s16: -+** mov z0\.d, #-2 -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_m2d_s16, svint16_t, -+ z0 = svdupq_n_s16 (-2, -1, -1, -1, -2, -1, -1, -1), -+ z0 = svdupq_s16 (-2, -1, -1, -1, -2, -1, -1, -1)) -+ -+/* -+** dupq_4ddb_s16: -+** movi v([0-9]+)\.2d, 0xff0000ffff00ff -+** dup z0\.q, z\1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_4ddb_s16, svint16_t, -+ z0 = svdupq_n_s16 (0xff, -1, 0, 0xff, 0xff, -1, 0, 0xff), -+ z0 = svdupq_s16 (0xff, -1, 0, 0xff, 0xff, -1, 0, 0xff)) -+ -+ -+/* -+** dupq_a093s_s16: -+** mov (w[0-9]+), 41107 -+** mov z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_a093s_s16, svint16_t, -+ z0 = svdupq_n_s16 (0xa093, 0, 0xa093, 0, 0xa093, 0, 0xa093, 0), -+ z0 = svdupq_s16 (0xa093, 0, 0xa093, 0, 0xa093, 0, 0xa093, 0)); -+ -+/* -+** dupq_pool_s16: -+** ... -+** ld1rqh z0\.h, p[0-7]/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_pool_s16, svint16_t, -+ z0 = svdupq_n_s16 (4, 10, 9, 77, 52, 22, 19, 50), -+ z0 = svdupq_s16 (4, 10, 9, 77, 52, 22, 19, 50)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_s32.c -new file mode 100644 -index 000000000..13b24c0db ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_s32.c -@@ -0,0 +1,61 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dupq_12800d_s32: -+** mov z0\.d, #12800 -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_12800d_s32, svint32_t, -+ z0 = svdupq_n_s32 (12800, 0, 12800, 0), -+ z0 = svdupq_s32 (12800, 0, 12800, 0)) -+ -+/* -+** dupq_fffffffed_s32: -+** mov z0\.d, #4294967294 -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_fffffffed_s32, svint32_t, -+ z0 = svdupq_n_s32 (-2, 0, -2, 0), -+ z0 = svdupq_s32 (-2, 0, -2, 0)) -+ -+/* -+** dupq_ff00ffffff00d_s32: -+** movi v([0-9]+)\.2d, 0xff00ffffff00 -+** dup z0\.q, z\1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_ff00ffffff00d_s32, svint32_t, -+ z0 = svdupq_n_s32 (-256, 0xff00, -256, 0xff00), -+ z0 = svdupq_s32 (-256, 0xff00, -256, 0xff00)) -+ -+/* -+** dupq_fedcd_s32: -+** mov (x[0-9]+), 65244 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_fedcd_s32, svint32_t, -+ z0 = svdupq_n_s32 (0xfedc, 0, 0xfedc, 0), -+ z0 = svdupq_s32 (0xfedc, 0, 0xfedc, 0)) -+ -+/* -+** dupq_1357ud_s32: -+** mov (x[0-9]+), 21264383082496 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_1357ud_s32, svint32_t, -+ z0 = svdupq_n_s32 (0, 0x1357, 0, 0x1357), -+ z0 = svdupq_s32 (0, 0x1357, 0, 0x1357)) -+ -+/* -+** dupq_pool_s32: -+** ... -+** ld1rqw z0\.s, p[0-7]/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_pool_s32, svint32_t, -+ z0 = svdupq_n_s32 (4, 10, 9, 77), -+ z0 = svdupq_s32 (4, 10, 9, 77)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_s64.c -new file mode 100644 -index 000000000..d2689fa5c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_s64.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dupq_pool_s64: -+** ... -+** ld1rqd z0\.d, p[0-7]/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_pool_s64, svint64_t, -+ z0 = svdupq_n_s64 (4, 10), -+ z0 = svdupq_s64 (4, 10)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_s8.c -new file mode 100644 -index 000000000..30b36c162 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_s8.c -@@ -0,0 +1,99 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dupq_54h_s8: -+** mov z0\.h, #54 -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_54h_s8, svint8_t, -+ z0 = svdupq_n_s8 (54, 0, 54, 0, 54, 0, 54, 0, -+ 54, 0, 54, 0, 54, 0, 54, 0), -+ z0 = svdupq_s8 (54, 0, 54, 0, 54, 0, 54, 0, -+ 54, 0, 54, 0, 54, 0, 54, 0)) -+ -+/* -+** dupq_2560h_s8: -+** mov z0\.h, #2560 -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_2560h_s8, svint8_t, -+ z0 = svdupq_n_s8 (0, 10, 0, 10, 0, 10, 0, 10, -+ 0, 10, 0, 10, 0, 10, 0, 10), -+ z0 = svdupq_s8 (0, 10, 0, 10, 0, 10, 0, 10, -+ 0, 10, 0, 10, 0, 10, 0, 10)) -+ -+/* -+** dupq_5120s_s8: -+** mov z0\.s, #5120 -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_5120s_s8, svint8_t, -+ z0 = svdupq_n_s8 (0, 20, 0, 0, 0, 20, 0, 0, -+ 0, 20, 0, 0, 0, 20, 0, 0), -+ z0 = svdupq_s8 (0, 20, 0, 0, 0, 20, 0, 0, -+ 0, 20, 0, 0, 0, 20, 0, 0)) -+ -+/* -+** dupq_1ff00s_s8: -+** mov z0\.s, #130816 -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_1ff00s_s8, svint8_t, -+ z0 = svdupq_n_s8 (0, -1, 1, 0, 0, -1, 1, 0, -+ 0, -1, 1, 0, 0, -1, 1, 0), -+ z0 = svdupq_s8 (0, -1, 1, 0, 0, -1, 1, 0, -+ 0, -1, 1, 0, 0, -1, 1, 0)) -+ -+/* -+** dupq_96db_s8: -+** movi v([0-9]+)\.2d, 0xff0000ff00ffff00 -+** dup z0\.q, z\1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_96db_s8, svint8_t, -+ z0 = svdupq_n_s8 (0, -1, -1, 0, -1, 0, 0, -1, -+ 0, -1, -1, 0, -1, 0, 0, -1), -+ z0 = svdupq_s8 (0, -1, -1, 0, -1, 0, 0, -1, -+ 0, -1, -1, 0, -1, 0, 0, -1)) -+ -+/* -+** dupq_7755h_s8: -+** mov (w[0-9]+), 21879 -+** mov z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_7755h_s8, svint8_t, -+ z0 = svdupq_n_s8 (0x77, 0x55, 0x77, 0x55, -+ 0x77, 0x55, 0x77, 0x55, -+ 0x77, 0x55, 0x77, 0x55, -+ 0x77, 0x55, 0x77, 0x55), -+ z0 = svdupq_s8 (0x77, 0x55, 0x77, 0x55, -+ 0x77, 0x55, 0x77, 0x55, -+ 0x77, 0x55, 0x77, 0x55, -+ 0x77, 0x55, 0x77, 0x55)) -+ -+/* -+** dupq_729a0000s_s8: -+** mov (w[0-9]+), 1922695168 -+** mov z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_729a0000s_s8, svint8_t, -+ z0 = svdupq_n_s8 (0, 0, 0x9a, 0x72, 0, 0, 0x9a, 0x72, -+ 0, 0, 0x9a, 0x72, 0, 0, 0x9a, 0x72), -+ z0 = svdupq_s8 (0, 0, 0x9a, 0x72, 0, 0, 0x9a, 0x72, -+ 0, 0, 0x9a, 0x72, 0, 0, 0x9a, 0x72)) -+ -+/* -+** dupq_pool_s8: -+** ... -+** ld1rqb z0\.b, p[0-7]/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_pool_s8, svint8_t, -+ z0 = svdupq_n_s8 (4, 10, 9, 77, 52, 22, 19, 50, -+ -1, 32, 44, 17, 23, 99, 53, 39), -+ z0 = svdupq_s8 (4, 10, 9, 77, 52, 22, 19, 50, -+ -1, 32, 44, 17, 23, 99, 53, 39)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_u16.c -new file mode 100644 -index 000000000..6ca13222d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_u16.c -@@ -0,0 +1,70 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dupq_25600s_u16: -+** mov z0\.s, #25600 -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_25600s_u16, svuint16_t, -+ z0 = svdupq_n_u16 (25600, 0, 25600, 0, 25600, 0, 25600, 0), -+ z0 = svdupq_u16 (25600, 0, 25600, 0, 25600, 0, 25600, 0)) -+ -+/* -+** dupq_7ff00s_u16: -+** mov z0\.s, #524032 -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_7ff00s_u16, svuint16_t, -+ z0 = svdupq_n_u16 (0xff00, 7, 0xff00, 7, 0xff00, 7, 0xff00, 7), -+ z0 = svdupq_u16 (0xff00, 7, 0xff00, 7, 0xff00, 7, 0xff00, 7)) -+ -+/* -+** dupq_65536d_u16: -+** mov z0\.d, #65536 -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_65536d_u16, svuint16_t, -+ z0 = svdupq_n_u16 (0, 1, 0, 0, 0, 1, 0, 0), -+ z0 = svdupq_u16 (0, 1, 0, 0, 0, 1, 0, 0)) -+ -+/* -+** dupq_m2d_u16: -+** mov z0\.d, #-2 -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_m2d_u16, svuint16_t, -+ z0 = svdupq_n_u16 (-2, -1, -1, -1, -2, -1, -1, -1), -+ z0 = svdupq_u16 (-2, -1, -1, -1, -2, -1, -1, -1)) -+ -+/* -+** dupq_4ddb_u16: -+** movi v([0-9]+)\.2d, 0xff0000ffff00ff -+** dup z0\.q, z\1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_4ddb_u16, svuint16_t, -+ z0 = svdupq_n_u16 (0xff, -1, 0, 0xff, 0xff, -1, 0, 0xff), -+ z0 = svdupq_u16 (0xff, -1, 0, 0xff, 0xff, -1, 0, 0xff)) -+ -+ -+/* -+** dupq_a093s_u16: -+** mov (w[0-9]+), 41107 -+** mov z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_a093s_u16, svuint16_t, -+ z0 = svdupq_n_u16 (0xa093, 0, 0xa093, 0, 0xa093, 0, 0xa093, 0), -+ z0 = svdupq_u16 (0xa093, 0, 0xa093, 0, 0xa093, 0, 0xa093, 0)); -+ -+/* -+** dupq_pool_u16: -+** ... -+** ld1rqh z0\.h, p[0-7]/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_pool_u16, svuint16_t, -+ z0 = svdupq_n_u16 (4, 10, 9, 77, 52, 22, 19, 50), -+ z0 = svdupq_u16 (4, 10, 9, 77, 52, 22, 19, 50)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_u32.c -new file mode 100644 -index 000000000..3669bf8a1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_u32.c -@@ -0,0 +1,61 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dupq_12800d_u32: -+** mov z0\.d, #12800 -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_12800d_u32, svuint32_t, -+ z0 = svdupq_n_u32 (12800, 0, 12800, 0), -+ z0 = svdupq_u32 (12800, 0, 12800, 0)) -+ -+/* -+** dupq_fffffffed_u32: -+** mov z0\.d, #4294967294 -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_fffffffed_u32, svuint32_t, -+ z0 = svdupq_n_u32 (-2, 0, -2, 0), -+ z0 = svdupq_u32 (-2, 0, -2, 0)) -+ -+/* -+** dupq_ff00ffffff00d_u32: -+** movi v([0-9]+)\.2d, 0xff00ffffff00 -+** dup z0\.q, z\1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_ff00ffffff00d_u32, svuint32_t, -+ z0 = svdupq_n_u32 (-256, 0xff00, -256, 0xff00), -+ z0 = svdupq_u32 (-256, 0xff00, -256, 0xff00)) -+ -+/* -+** dupq_fedcd_u32: -+** mov (x[0-9]+), 65244 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_fedcd_u32, svuint32_t, -+ z0 = svdupq_n_u32 (0xfedc, 0, 0xfedc, 0), -+ z0 = svdupq_u32 (0xfedc, 0, 0xfedc, 0)) -+ -+/* -+** dupq_1357ud_u32: -+** mov (x[0-9]+), 21264383082496 -+** mov z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_1357ud_u32, svuint32_t, -+ z0 = svdupq_n_u32 (0, 0x1357, 0, 0x1357), -+ z0 = svdupq_u32 (0, 0x1357, 0, 0x1357)) -+ -+/* -+** dupq_pool_u32: -+** ... -+** ld1rqw z0\.s, p[0-7]/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_pool_u32, svuint32_t, -+ z0 = svdupq_n_u32 (4, 10, 9, 77), -+ z0 = svdupq_u32 (4, 10, 9, 77)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_u64.c -new file mode 100644 -index 000000000..cb655a15a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_u64.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dupq_pool_u64: -+** ... -+** ld1rqd z0\.d, p[0-7]/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_pool_u64, svuint64_t, -+ z0 = svdupq_n_u64 (4, 10), -+ z0 = svdupq_u64 (4, 10)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_u8.c -new file mode 100644 -index 000000000..8b40c2b41 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/dupq_u8.c -@@ -0,0 +1,99 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** dupq_54h_u8: -+** mov z0\.h, #54 -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_54h_u8, svuint8_t, -+ z0 = svdupq_n_u8 (54, 0, 54, 0, 54, 0, 54, 0, -+ 54, 0, 54, 0, 54, 0, 54, 0), -+ z0 = svdupq_u8 (54, 0, 54, 0, 54, 0, 54, 0, -+ 54, 0, 54, 0, 54, 0, 54, 0)) -+ -+/* -+** dupq_2560h_u8: -+** mov z0\.h, #2560 -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_2560h_u8, svuint8_t, -+ z0 = svdupq_n_u8 (0, 10, 0, 10, 0, 10, 0, 10, -+ 0, 10, 0, 10, 0, 10, 0, 10), -+ z0 = svdupq_u8 (0, 10, 0, 10, 0, 10, 0, 10, -+ 0, 10, 0, 10, 0, 10, 0, 10)) -+ -+/* -+** dupq_5120s_u8: -+** mov z0\.s, #5120 -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_5120s_u8, svuint8_t, -+ z0 = svdupq_n_u8 (0, 20, 0, 0, 0, 20, 0, 0, -+ 0, 20, 0, 0, 0, 20, 0, 0), -+ z0 = svdupq_u8 (0, 20, 0, 0, 0, 20, 0, 0, -+ 0, 20, 0, 0, 0, 20, 0, 0)) -+ -+/* -+** dupq_1ff00s_u8: -+** mov z0\.s, #130816 -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_1ff00s_u8, svuint8_t, -+ z0 = svdupq_n_u8 (0, -1, 1, 0, 0, -1, 1, 0, -+ 0, -1, 1, 0, 0, -1, 1, 0), -+ z0 = svdupq_u8 (0, -1, 1, 0, 0, -1, 1, 0, -+ 0, -1, 1, 0, 0, -1, 1, 0)) -+ -+/* -+** dupq_96db_u8: -+** movi v([0-9]+)\.2d, 0xff0000ff00ffff00 -+** dup z0\.q, z\1\.q\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_96db_u8, svuint8_t, -+ z0 = svdupq_n_u8 (0, -1, -1, 0, -1, 0, 0, -1, -+ 0, -1, -1, 0, -1, 0, 0, -1), -+ z0 = svdupq_u8 (0, -1, -1, 0, -1, 0, 0, -1, -+ 0, -1, -1, 0, -1, 0, 0, -1)) -+ -+/* -+** dupq_7755h_u8: -+** mov (w[0-9]+), 21879 -+** mov z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_7755h_u8, svuint8_t, -+ z0 = svdupq_n_u8 (0x77, 0x55, 0x77, 0x55, -+ 0x77, 0x55, 0x77, 0x55, -+ 0x77, 0x55, 0x77, 0x55, -+ 0x77, 0x55, 0x77, 0x55), -+ z0 = svdupq_u8 (0x77, 0x55, 0x77, 0x55, -+ 0x77, 0x55, 0x77, 0x55, -+ 0x77, 0x55, 0x77, 0x55, -+ 0x77, 0x55, 0x77, 0x55)) -+ -+/* -+** dupq_729a0000s_u8: -+** mov (w[0-9]+), 1922695168 -+** mov z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_729a0000s_u8, svuint8_t, -+ z0 = svdupq_n_u8 (0, 0, 0x9a, 0x72, 0, 0, 0x9a, 0x72, -+ 0, 0, 0x9a, 0x72, 0, 0, 0x9a, 0x72), -+ z0 = svdupq_u8 (0, 0, 0x9a, 0x72, 0, 0, 0x9a, 0x72, -+ 0, 0, 0x9a, 0x72, 0, 0, 0x9a, 0x72)) -+ -+/* -+** dupq_pool_u8: -+** ... -+** ld1rqb z0\.b, p[0-7]/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_UNIFORM_Z (dupq_pool_u8, svuint8_t, -+ z0 = svdupq_n_u8 (4, 10, 9, 77, 52, 22, 19, 50, -+ -1, 32, 44, 17, 23, 99, 53, 39), -+ z0 = svdupq_u8 (4, 10, 9, 77, 52, 22, 19, 50, -+ -1, 32, 44, 17, 23, 99, 53, 39)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_b.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_b.c -new file mode 100644 -index 000000000..961ae84c0 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_b.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** eor_b_z_tied1: -+** eor p0\.b, p3/z, (p0\.b, p1\.b|p1\.b, p0\.b) -+** ret -+*/ -+TEST_UNIFORM_P (eor_b_z_tied1, -+ p0 = sveor_b_z (p3, p0, p1), -+ p0 = sveor_z (p3, p0, p1)) -+ -+/* -+** eor_b_z_tied2: -+** eor p0\.b, p3/z, (p0\.b, p1\.b|p1\.b, p0\.b) -+** ret -+*/ -+TEST_UNIFORM_P (eor_b_z_tied2, -+ p0 = sveor_b_z (p3, p1, p0), -+ p0 = sveor_z (p3, p1, p0)) -+ -+/* -+** eor_b_z_untied: -+** eor p0\.b, p3/z, (p1\.b, p2\.b|p2\.b, p1\.b) -+** ret -+*/ -+TEST_UNIFORM_P (eor_b_z_untied, -+ p0 = sveor_b_z (p3, p1, p2), -+ p0 = sveor_z (p3, p1, p2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_s16.c -new file mode 100644 -index 000000000..7cf73609a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_s16.c -@@ -0,0 +1,376 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** eor_s16_m_tied1: -+** eor z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (eor_s16_m_tied1, svint16_t, -+ z0 = sveor_s16_m (p0, z0, z1), -+ z0 = sveor_m (p0, z0, z1)) -+ -+/* -+** eor_s16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** eor z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (eor_s16_m_tied2, svint16_t, -+ z0 = sveor_s16_m (p0, z1, z0), -+ z0 = sveor_m (p0, z1, z0)) -+ -+/* -+** eor_s16_m_untied: -+** movprfx z0, z1 -+** eor z0\.h, p0/m, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (eor_s16_m_untied, svint16_t, -+ z0 = sveor_s16_m (p0, z1, z2), -+ z0 = sveor_m (p0, z1, z2)) -+ -+/* -+** eor_w0_s16_m_tied1: -+** mov (z[0-9]+\.h), w0 -+** eor z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (eor_w0_s16_m_tied1, svint16_t, int16_t, -+ z0 = sveor_n_s16_m (p0, z0, x0), -+ z0 = sveor_m (p0, z0, x0)) -+ -+/* -+** eor_w0_s16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), w0 -+** movprfx z0, z1 -+** eor z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (eor_w0_s16_m_untied, svint16_t, int16_t, -+ z0 = sveor_n_s16_m (p0, z1, x0), -+ z0 = sveor_m (p0, z1, x0)) -+ -+/* -+** eor_1_s16_m_tied1: -+** mov (z[0-9]+\.h), #1 -+** eor z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_1_s16_m_tied1, svint16_t, -+ z0 = sveor_n_s16_m (p0, z0, 1), -+ z0 = sveor_m (p0, z0, 1)) -+ -+/* -+** eor_1_s16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), #1 -+** movprfx z0, z1 -+** eor z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_1_s16_m_untied, svint16_t, -+ z0 = sveor_n_s16_m (p0, z1, 1), -+ z0 = sveor_m (p0, z1, 1)) -+ -+/* -+** eor_m2_s16_m: -+** mov (z[0-9]+\.h), #-2 -+** eor z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m2_s16_m, svint16_t, -+ z0 = sveor_n_s16_m (p0, z0, -2), -+ z0 = sveor_m (p0, z0, -2)) -+ -+/* -+** eor_s16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** eor z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (eor_s16_z_tied1, svint16_t, -+ z0 = sveor_s16_z (p0, z0, z1), -+ z0 = sveor_z (p0, z0, z1)) -+ -+/* -+** eor_s16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** eor z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (eor_s16_z_tied2, svint16_t, -+ z0 = sveor_s16_z (p0, z1, z0), -+ z0 = sveor_z (p0, z1, z0)) -+ -+/* -+** eor_s16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** eor z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** eor z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_s16_z_untied, svint16_t, -+ z0 = sveor_s16_z (p0, z1, z2), -+ z0 = sveor_z (p0, z1, z2)) -+ -+/* -+** eor_w0_s16_z_tied1: -+** mov (z[0-9]+\.h), w0 -+** movprfx z0\.h, p0/z, z0\.h -+** eor z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (eor_w0_s16_z_tied1, svint16_t, int16_t, -+ z0 = sveor_n_s16_z (p0, z0, x0), -+ z0 = sveor_z (p0, z0, x0)) -+ -+/* -+** eor_w0_s16_z_untied: -+** mov (z[0-9]+\.h), w0 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** eor z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** eor z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (eor_w0_s16_z_untied, svint16_t, int16_t, -+ z0 = sveor_n_s16_z (p0, z1, x0), -+ z0 = sveor_z (p0, z1, x0)) -+ -+/* -+** eor_1_s16_z_tied1: -+** mov (z[0-9]+\.h), #1 -+** movprfx z0\.h, p0/z, z0\.h -+** eor z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_1_s16_z_tied1, svint16_t, -+ z0 = sveor_n_s16_z (p0, z0, 1), -+ z0 = sveor_z (p0, z0, 1)) -+ -+/* -+** eor_1_s16_z_untied: -+** mov (z[0-9]+\.h), #1 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** eor z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** eor z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_1_s16_z_untied, svint16_t, -+ z0 = sveor_n_s16_z (p0, z1, 1), -+ z0 = sveor_z (p0, z1, 1)) -+ -+/* -+** eor_s16_x_tied1: -+** eor z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_s16_x_tied1, svint16_t, -+ z0 = sveor_s16_x (p0, z0, z1), -+ z0 = sveor_x (p0, z0, z1)) -+ -+/* -+** eor_s16_x_tied2: -+** eor z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_s16_x_tied2, svint16_t, -+ z0 = sveor_s16_x (p0, z1, z0), -+ z0 = sveor_x (p0, z1, z0)) -+ -+/* -+** eor_s16_x_untied: -+** eor z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_s16_x_untied, svint16_t, -+ z0 = sveor_s16_x (p0, z1, z2), -+ z0 = sveor_x (p0, z1, z2)) -+ -+/* -+** eor_w0_s16_x_tied1: -+** mov (z[0-9]+)\.h, w0 -+** eor z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (eor_w0_s16_x_tied1, svint16_t, int16_t, -+ z0 = sveor_n_s16_x (p0, z0, x0), -+ z0 = sveor_x (p0, z0, x0)) -+ -+/* -+** eor_w0_s16_x_untied: -+** mov (z[0-9]+)\.h, w0 -+** eor z0\.d, (z1\.d, \1\.d|\1\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (eor_w0_s16_x_untied, svint16_t, int16_t, -+ z0 = sveor_n_s16_x (p0, z1, x0), -+ z0 = sveor_x (p0, z1, x0)) -+ -+/* -+** eor_1_s16_x_tied1: -+** eor z0\.h, z0\.h, #0x1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_1_s16_x_tied1, svint16_t, -+ z0 = sveor_n_s16_x (p0, z0, 1), -+ z0 = sveor_x (p0, z0, 1)) -+ -+/* -+** eor_1_s16_x_untied: -+** movprfx z0, z1 -+** eor z0\.h, z0\.h, #0x1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_1_s16_x_untied, svint16_t, -+ z0 = sveor_n_s16_x (p0, z1, 1), -+ z0 = sveor_x (p0, z1, 1)) -+ -+/* -+** eor_127_s16_x: -+** eor z0\.h, z0\.h, #0x7f -+** ret -+*/ -+TEST_UNIFORM_Z (eor_127_s16_x, svint16_t, -+ z0 = sveor_n_s16_x (p0, z0, 127), -+ z0 = sveor_x (p0, z0, 127)) -+ -+/* -+** eor_128_s16_x: -+** eor z0\.h, z0\.h, #0x80 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_128_s16_x, svint16_t, -+ z0 = sveor_n_s16_x (p0, z0, 128), -+ z0 = sveor_x (p0, z0, 128)) -+ -+/* -+** eor_255_s16_x: -+** eor z0\.h, z0\.h, #0xff -+** ret -+*/ -+TEST_UNIFORM_Z (eor_255_s16_x, svint16_t, -+ z0 = sveor_n_s16_x (p0, z0, 255), -+ z0 = sveor_x (p0, z0, 255)) -+ -+/* -+** eor_256_s16_x: -+** eor z0\.h, z0\.h, #0x100 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_256_s16_x, svint16_t, -+ z0 = sveor_n_s16_x (p0, z0, 256), -+ z0 = sveor_x (p0, z0, 256)) -+ -+/* -+** eor_257_s16_x: -+** eor z0\.h, z0\.h, #0x101 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_257_s16_x, svint16_t, -+ z0 = sveor_n_s16_x (p0, z0, 257), -+ z0 = sveor_x (p0, z0, 257)) -+ -+/* -+** eor_512_s16_x: -+** eor z0\.h, z0\.h, #0x200 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_512_s16_x, svint16_t, -+ z0 = sveor_n_s16_x (p0, z0, 512), -+ z0 = sveor_x (p0, z0, 512)) -+ -+/* -+** eor_65280_s16_x: -+** eor z0\.h, z0\.h, #0xff00 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_65280_s16_x, svint16_t, -+ z0 = sveor_n_s16_x (p0, z0, 0xff00), -+ z0 = sveor_x (p0, z0, 0xff00)) -+ -+/* -+** eor_m127_s16_x: -+** eor z0\.h, z0\.h, #0xff81 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m127_s16_x, svint16_t, -+ z0 = sveor_n_s16_x (p0, z0, -127), -+ z0 = sveor_x (p0, z0, -127)) -+ -+/* -+** eor_m128_s16_x: -+** eor z0\.h, z0\.h, #0xff80 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m128_s16_x, svint16_t, -+ z0 = sveor_n_s16_x (p0, z0, -128), -+ z0 = sveor_x (p0, z0, -128)) -+ -+/* -+** eor_m255_s16_x: -+** eor z0\.h, z0\.h, #0xff01 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m255_s16_x, svint16_t, -+ z0 = sveor_n_s16_x (p0, z0, -255), -+ z0 = sveor_x (p0, z0, -255)) -+ -+/* -+** eor_m256_s16_x: -+** eor z0\.h, z0\.h, #0xff00 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m256_s16_x, svint16_t, -+ z0 = sveor_n_s16_x (p0, z0, -256), -+ z0 = sveor_x (p0, z0, -256)) -+ -+/* -+** eor_m257_s16_x: -+** eor z0\.h, z0\.h, #0xfeff -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m257_s16_x, svint16_t, -+ z0 = sveor_n_s16_x (p0, z0, -257), -+ z0 = sveor_x (p0, z0, -257)) -+ -+/* -+** eor_m512_s16_x: -+** eor z0\.h, z0\.h, #0xfe00 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m512_s16_x, svint16_t, -+ z0 = sveor_n_s16_x (p0, z0, -512), -+ z0 = sveor_x (p0, z0, -512)) -+ -+/* -+** eor_m32768_s16_x: -+** eor z0\.h, z0\.h, #0x8000 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m32768_s16_x, svint16_t, -+ z0 = sveor_n_s16_x (p0, z0, -0x8000), -+ z0 = sveor_x (p0, z0, -0x8000)) -+ -+/* -+** eor_5_s16_x: -+** mov (z[0-9]+)\.h, #5 -+** eor z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_5_s16_x, svint16_t, -+ z0 = sveor_n_s16_x (p0, z0, 5), -+ z0 = sveor_x (p0, z0, 5)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_s32.c -new file mode 100644 -index 000000000..d5aecb201 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_s32.c -@@ -0,0 +1,372 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** eor_s32_m_tied1: -+** eor z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (eor_s32_m_tied1, svint32_t, -+ z0 = sveor_s32_m (p0, z0, z1), -+ z0 = sveor_m (p0, z0, z1)) -+ -+/* -+** eor_s32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** eor z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (eor_s32_m_tied2, svint32_t, -+ z0 = sveor_s32_m (p0, z1, z0), -+ z0 = sveor_m (p0, z1, z0)) -+ -+/* -+** eor_s32_m_untied: -+** movprfx z0, z1 -+** eor z0\.s, p0/m, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (eor_s32_m_untied, svint32_t, -+ z0 = sveor_s32_m (p0, z1, z2), -+ z0 = sveor_m (p0, z1, z2)) -+ -+/* -+** eor_w0_s32_m_tied1: -+** mov (z[0-9]+\.s), w0 -+** eor z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (eor_w0_s32_m_tied1, svint32_t, int32_t, -+ z0 = sveor_n_s32_m (p0, z0, x0), -+ z0 = sveor_m (p0, z0, x0)) -+ -+/* -+** eor_w0_s32_m_untied: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0, z1 -+** eor z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (eor_w0_s32_m_untied, svint32_t, int32_t, -+ z0 = sveor_n_s32_m (p0, z1, x0), -+ z0 = sveor_m (p0, z1, x0)) -+ -+/* -+** eor_1_s32_m_tied1: -+** mov (z[0-9]+\.s), #1 -+** eor z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_1_s32_m_tied1, svint32_t, -+ z0 = sveor_n_s32_m (p0, z0, 1), -+ z0 = sveor_m (p0, z0, 1)) -+ -+/* -+** eor_1_s32_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.s), #1 -+** movprfx z0, z1 -+** eor z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_1_s32_m_untied, svint32_t, -+ z0 = sveor_n_s32_m (p0, z1, 1), -+ z0 = sveor_m (p0, z1, 1)) -+ -+/* -+** eor_m2_s32_m: -+** mov (z[0-9]+\.s), #-2 -+** eor z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m2_s32_m, svint32_t, -+ z0 = sveor_n_s32_m (p0, z0, -2), -+ z0 = sveor_m (p0, z0, -2)) -+ -+/* -+** eor_s32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** eor z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (eor_s32_z_tied1, svint32_t, -+ z0 = sveor_s32_z (p0, z0, z1), -+ z0 = sveor_z (p0, z0, z1)) -+ -+/* -+** eor_s32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** eor z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (eor_s32_z_tied2, svint32_t, -+ z0 = sveor_s32_z (p0, z1, z0), -+ z0 = sveor_z (p0, z1, z0)) -+ -+/* -+** eor_s32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** eor z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** eor z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_s32_z_untied, svint32_t, -+ z0 = sveor_s32_z (p0, z1, z2), -+ z0 = sveor_z (p0, z1, z2)) -+ -+/* -+** eor_w0_s32_z_tied1: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** eor z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (eor_w0_s32_z_tied1, svint32_t, int32_t, -+ z0 = sveor_n_s32_z (p0, z0, x0), -+ z0 = sveor_z (p0, z0, x0)) -+ -+/* -+** eor_w0_s32_z_untied: -+** mov (z[0-9]+\.s), w0 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** eor z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** eor z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (eor_w0_s32_z_untied, svint32_t, int32_t, -+ z0 = sveor_n_s32_z (p0, z1, x0), -+ z0 = sveor_z (p0, z1, x0)) -+ -+/* -+** eor_1_s32_z_tied1: -+** mov (z[0-9]+\.s), #1 -+** movprfx z0\.s, p0/z, z0\.s -+** eor z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_1_s32_z_tied1, svint32_t, -+ z0 = sveor_n_s32_z (p0, z0, 1), -+ z0 = sveor_z (p0, z0, 1)) -+ -+/* -+** eor_1_s32_z_untied: -+** mov (z[0-9]+\.s), #1 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** eor z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** eor z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_1_s32_z_untied, svint32_t, -+ z0 = sveor_n_s32_z (p0, z1, 1), -+ z0 = sveor_z (p0, z1, 1)) -+ -+/* -+** eor_s32_x_tied1: -+** eor z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_s32_x_tied1, svint32_t, -+ z0 = sveor_s32_x (p0, z0, z1), -+ z0 = sveor_x (p0, z0, z1)) -+ -+/* -+** eor_s32_x_tied2: -+** eor z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_s32_x_tied2, svint32_t, -+ z0 = sveor_s32_x (p0, z1, z0), -+ z0 = sveor_x (p0, z1, z0)) -+ -+/* -+** eor_s32_x_untied: -+** eor z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_s32_x_untied, svint32_t, -+ z0 = sveor_s32_x (p0, z1, z2), -+ z0 = sveor_x (p0, z1, z2)) -+ -+/* -+** eor_w0_s32_x_tied1: -+** mov (z[0-9]+)\.s, w0 -+** eor z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (eor_w0_s32_x_tied1, svint32_t, int32_t, -+ z0 = sveor_n_s32_x (p0, z0, x0), -+ z0 = sveor_x (p0, z0, x0)) -+ -+/* -+** eor_w0_s32_x_untied: -+** mov (z[0-9]+)\.s, w0 -+** eor z0\.d, (z1\.d, \1\.d|\1\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (eor_w0_s32_x_untied, svint32_t, int32_t, -+ z0 = sveor_n_s32_x (p0, z1, x0), -+ z0 = sveor_x (p0, z1, x0)) -+ -+/* -+** eor_1_s32_x_tied1: -+** eor z0\.s, z0\.s, #0x1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_1_s32_x_tied1, svint32_t, -+ z0 = sveor_n_s32_x (p0, z0, 1), -+ z0 = sveor_x (p0, z0, 1)) -+ -+/* -+** eor_1_s32_x_untied: -+** movprfx z0, z1 -+** eor z0\.s, z0\.s, #0x1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_1_s32_x_untied, svint32_t, -+ z0 = sveor_n_s32_x (p0, z1, 1), -+ z0 = sveor_x (p0, z1, 1)) -+ -+/* -+** eor_127_s32_x: -+** eor z0\.s, z0\.s, #0x7f -+** ret -+*/ -+TEST_UNIFORM_Z (eor_127_s32_x, svint32_t, -+ z0 = sveor_n_s32_x (p0, z0, 127), -+ z0 = sveor_x (p0, z0, 127)) -+ -+/* -+** eor_128_s32_x: -+** eor z0\.s, z0\.s, #0x80 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_128_s32_x, svint32_t, -+ z0 = sveor_n_s32_x (p0, z0, 128), -+ z0 = sveor_x (p0, z0, 128)) -+ -+/* -+** eor_255_s32_x: -+** eor z0\.s, z0\.s, #0xff -+** ret -+*/ -+TEST_UNIFORM_Z (eor_255_s32_x, svint32_t, -+ z0 = sveor_n_s32_x (p0, z0, 255), -+ z0 = sveor_x (p0, z0, 255)) -+ -+/* -+** eor_256_s32_x: -+** eor z0\.s, z0\.s, #0x100 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_256_s32_x, svint32_t, -+ z0 = sveor_n_s32_x (p0, z0, 256), -+ z0 = sveor_x (p0, z0, 256)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (eor_257_s32_x, svint32_t, -+ z0 = sveor_n_s32_x (p0, z0, 257), -+ z0 = sveor_x (p0, z0, 257)) -+ -+/* -+** eor_512_s32_x: -+** eor z0\.s, z0\.s, #0x200 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_512_s32_x, svint32_t, -+ z0 = sveor_n_s32_x (p0, z0, 512), -+ z0 = sveor_x (p0, z0, 512)) -+ -+/* -+** eor_65280_s32_x: -+** eor z0\.s, z0\.s, #0xff00 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_65280_s32_x, svint32_t, -+ z0 = sveor_n_s32_x (p0, z0, 0xff00), -+ z0 = sveor_x (p0, z0, 0xff00)) -+ -+/* -+** eor_m127_s32_x: -+** eor z0\.s, z0\.s, #0xffffff81 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m127_s32_x, svint32_t, -+ z0 = sveor_n_s32_x (p0, z0, -127), -+ z0 = sveor_x (p0, z0, -127)) -+ -+/* -+** eor_m128_s32_x: -+** eor z0\.s, z0\.s, #0xffffff80 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m128_s32_x, svint32_t, -+ z0 = sveor_n_s32_x (p0, z0, -128), -+ z0 = sveor_x (p0, z0, -128)) -+ -+/* -+** eor_m255_s32_x: -+** eor z0\.s, z0\.s, #0xffffff01 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m255_s32_x, svint32_t, -+ z0 = sveor_n_s32_x (p0, z0, -255), -+ z0 = sveor_x (p0, z0, -255)) -+ -+/* -+** eor_m256_s32_x: -+** eor z0\.s, z0\.s, #0xffffff00 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m256_s32_x, svint32_t, -+ z0 = sveor_n_s32_x (p0, z0, -256), -+ z0 = sveor_x (p0, z0, -256)) -+ -+/* -+** eor_m257_s32_x: -+** eor z0\.s, z0\.s, #0xfffffeff -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m257_s32_x, svint32_t, -+ z0 = sveor_n_s32_x (p0, z0, -257), -+ z0 = sveor_x (p0, z0, -257)) -+ -+/* -+** eor_m512_s32_x: -+** eor z0\.s, z0\.s, #0xfffffe00 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m512_s32_x, svint32_t, -+ z0 = sveor_n_s32_x (p0, z0, -512), -+ z0 = sveor_x (p0, z0, -512)) -+ -+/* -+** eor_m32768_s32_x: -+** eor z0\.s, z0\.s, #0xffff8000 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m32768_s32_x, svint32_t, -+ z0 = sveor_n_s32_x (p0, z0, -0x8000), -+ z0 = sveor_x (p0, z0, -0x8000)) -+ -+/* -+** eor_5_s32_x: -+** mov (z[0-9]+)\.s, #5 -+** eor z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_5_s32_x, svint32_t, -+ z0 = sveor_n_s32_x (p0, z0, 5), -+ z0 = sveor_x (p0, z0, 5)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_s64.c -new file mode 100644 -index 000000000..157128974 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_s64.c -@@ -0,0 +1,372 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** eor_s64_m_tied1: -+** eor z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (eor_s64_m_tied1, svint64_t, -+ z0 = sveor_s64_m (p0, z0, z1), -+ z0 = sveor_m (p0, z0, z1)) -+ -+/* -+** eor_s64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** eor z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_s64_m_tied2, svint64_t, -+ z0 = sveor_s64_m (p0, z1, z0), -+ z0 = sveor_m (p0, z1, z0)) -+ -+/* -+** eor_s64_m_untied: -+** movprfx z0, z1 -+** eor z0\.d, p0/m, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (eor_s64_m_untied, svint64_t, -+ z0 = sveor_s64_m (p0, z1, z2), -+ z0 = sveor_m (p0, z1, z2)) -+ -+/* -+** eor_x0_s64_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** eor z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (eor_x0_s64_m_tied1, svint64_t, int64_t, -+ z0 = sveor_n_s64_m (p0, z0, x0), -+ z0 = sveor_m (p0, z0, x0)) -+ -+/* -+** eor_x0_s64_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** eor z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (eor_x0_s64_m_untied, svint64_t, int64_t, -+ z0 = sveor_n_s64_m (p0, z1, x0), -+ z0 = sveor_m (p0, z1, x0)) -+ -+/* -+** eor_1_s64_m_tied1: -+** mov (z[0-9]+\.d), #1 -+** eor z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_1_s64_m_tied1, svint64_t, -+ z0 = sveor_n_s64_m (p0, z0, 1), -+ z0 = sveor_m (p0, z0, 1)) -+ -+/* -+** eor_1_s64_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #1 -+** movprfx z0, z1 -+** eor z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_1_s64_m_untied, svint64_t, -+ z0 = sveor_n_s64_m (p0, z1, 1), -+ z0 = sveor_m (p0, z1, 1)) -+ -+/* -+** eor_m2_s64_m: -+** mov (z[0-9]+\.d), #-2 -+** eor z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m2_s64_m, svint64_t, -+ z0 = sveor_n_s64_m (p0, z0, -2), -+ z0 = sveor_m (p0, z0, -2)) -+ -+/* -+** eor_s64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** eor z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (eor_s64_z_tied1, svint64_t, -+ z0 = sveor_s64_z (p0, z0, z1), -+ z0 = sveor_z (p0, z0, z1)) -+ -+/* -+** eor_s64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** eor z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (eor_s64_z_tied2, svint64_t, -+ z0 = sveor_s64_z (p0, z1, z0), -+ z0 = sveor_z (p0, z1, z0)) -+ -+/* -+** eor_s64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** eor z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** eor z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_s64_z_untied, svint64_t, -+ z0 = sveor_s64_z (p0, z1, z2), -+ z0 = sveor_z (p0, z1, z2)) -+ -+/* -+** eor_x0_s64_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** eor z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (eor_x0_s64_z_tied1, svint64_t, int64_t, -+ z0 = sveor_n_s64_z (p0, z0, x0), -+ z0 = sveor_z (p0, z0, x0)) -+ -+/* -+** eor_x0_s64_z_untied: -+** mov (z[0-9]+\.d), x0 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** eor z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** eor z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (eor_x0_s64_z_untied, svint64_t, int64_t, -+ z0 = sveor_n_s64_z (p0, z1, x0), -+ z0 = sveor_z (p0, z1, x0)) -+ -+/* -+** eor_1_s64_z_tied1: -+** mov (z[0-9]+\.d), #1 -+** movprfx z0\.d, p0/z, z0\.d -+** eor z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_1_s64_z_tied1, svint64_t, -+ z0 = sveor_n_s64_z (p0, z0, 1), -+ z0 = sveor_z (p0, z0, 1)) -+ -+/* -+** eor_1_s64_z_untied: -+** mov (z[0-9]+\.d), #1 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** eor z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** eor z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_1_s64_z_untied, svint64_t, -+ z0 = sveor_n_s64_z (p0, z1, 1), -+ z0 = sveor_z (p0, z1, 1)) -+ -+/* -+** eor_s64_x_tied1: -+** eor z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_s64_x_tied1, svint64_t, -+ z0 = sveor_s64_x (p0, z0, z1), -+ z0 = sveor_x (p0, z0, z1)) -+ -+/* -+** eor_s64_x_tied2: -+** eor z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_s64_x_tied2, svint64_t, -+ z0 = sveor_s64_x (p0, z1, z0), -+ z0 = sveor_x (p0, z1, z0)) -+ -+/* -+** eor_s64_x_untied: -+** eor z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_s64_x_untied, svint64_t, -+ z0 = sveor_s64_x (p0, z1, z2), -+ z0 = sveor_x (p0, z1, z2)) -+ -+/* -+** eor_x0_s64_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** eor z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (eor_x0_s64_x_tied1, svint64_t, int64_t, -+ z0 = sveor_n_s64_x (p0, z0, x0), -+ z0 = sveor_x (p0, z0, x0)) -+ -+/* -+** eor_x0_s64_x_untied: -+** mov (z[0-9]+\.d), x0 -+** eor z0\.d, (z1\.d, \1|\1, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (eor_x0_s64_x_untied, svint64_t, int64_t, -+ z0 = sveor_n_s64_x (p0, z1, x0), -+ z0 = sveor_x (p0, z1, x0)) -+ -+/* -+** eor_1_s64_x_tied1: -+** eor z0\.d, z0\.d, #0x1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_1_s64_x_tied1, svint64_t, -+ z0 = sveor_n_s64_x (p0, z0, 1), -+ z0 = sveor_x (p0, z0, 1)) -+ -+/* -+** eor_1_s64_x_untied: -+** movprfx z0, z1 -+** eor z0\.d, z0\.d, #0x1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_1_s64_x_untied, svint64_t, -+ z0 = sveor_n_s64_x (p0, z1, 1), -+ z0 = sveor_x (p0, z1, 1)) -+ -+/* -+** eor_127_s64_x: -+** eor z0\.d, z0\.d, #0x7f -+** ret -+*/ -+TEST_UNIFORM_Z (eor_127_s64_x, svint64_t, -+ z0 = sveor_n_s64_x (p0, z0, 127), -+ z0 = sveor_x (p0, z0, 127)) -+ -+/* -+** eor_128_s64_x: -+** eor z0\.d, z0\.d, #0x80 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_128_s64_x, svint64_t, -+ z0 = sveor_n_s64_x (p0, z0, 128), -+ z0 = sveor_x (p0, z0, 128)) -+ -+/* -+** eor_255_s64_x: -+** eor z0\.d, z0\.d, #0xff -+** ret -+*/ -+TEST_UNIFORM_Z (eor_255_s64_x, svint64_t, -+ z0 = sveor_n_s64_x (p0, z0, 255), -+ z0 = sveor_x (p0, z0, 255)) -+ -+/* -+** eor_256_s64_x: -+** eor z0\.d, z0\.d, #0x100 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_256_s64_x, svint64_t, -+ z0 = sveor_n_s64_x (p0, z0, 256), -+ z0 = sveor_x (p0, z0, 256)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (eor_257_s64_x, svint64_t, -+ z0 = sveor_n_s64_x (p0, z0, 257), -+ z0 = sveor_x (p0, z0, 257)) -+ -+/* -+** eor_512_s64_x: -+** eor z0\.d, z0\.d, #0x200 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_512_s64_x, svint64_t, -+ z0 = sveor_n_s64_x (p0, z0, 512), -+ z0 = sveor_x (p0, z0, 512)) -+ -+/* -+** eor_65280_s64_x: -+** eor z0\.d, z0\.d, #0xff00 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_65280_s64_x, svint64_t, -+ z0 = sveor_n_s64_x (p0, z0, 0xff00), -+ z0 = sveor_x (p0, z0, 0xff00)) -+ -+/* -+** eor_m127_s64_x: -+** eor z0\.d, z0\.d, #0xffffffffffffff81 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m127_s64_x, svint64_t, -+ z0 = sveor_n_s64_x (p0, z0, -127), -+ z0 = sveor_x (p0, z0, -127)) -+ -+/* -+** eor_m128_s64_x: -+** eor z0\.d, z0\.d, #0xffffffffffffff80 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m128_s64_x, svint64_t, -+ z0 = sveor_n_s64_x (p0, z0, -128), -+ z0 = sveor_x (p0, z0, -128)) -+ -+/* -+** eor_m255_s64_x: -+** eor z0\.d, z0\.d, #0xffffffffffffff01 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m255_s64_x, svint64_t, -+ z0 = sveor_n_s64_x (p0, z0, -255), -+ z0 = sveor_x (p0, z0, -255)) -+ -+/* -+** eor_m256_s64_x: -+** eor z0\.d, z0\.d, #0xffffffffffffff00 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m256_s64_x, svint64_t, -+ z0 = sveor_n_s64_x (p0, z0, -256), -+ z0 = sveor_x (p0, z0, -256)) -+ -+/* -+** eor_m257_s64_x: -+** eor z0\.d, z0\.d, #0xfffffffffffffeff -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m257_s64_x, svint64_t, -+ z0 = sveor_n_s64_x (p0, z0, -257), -+ z0 = sveor_x (p0, z0, -257)) -+ -+/* -+** eor_m512_s64_x: -+** eor z0\.d, z0\.d, #0xfffffffffffffe00 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m512_s64_x, svint64_t, -+ z0 = sveor_n_s64_x (p0, z0, -512), -+ z0 = sveor_x (p0, z0, -512)) -+ -+/* -+** eor_m32768_s64_x: -+** eor z0\.d, z0\.d, #0xffffffffffff8000 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m32768_s64_x, svint64_t, -+ z0 = sveor_n_s64_x (p0, z0, -0x8000), -+ z0 = sveor_x (p0, z0, -0x8000)) -+ -+/* -+** eor_5_s64_x: -+** mov (z[0-9]+\.d), #5 -+** eor z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_5_s64_x, svint64_t, -+ z0 = sveor_n_s64_x (p0, z0, 5), -+ z0 = sveor_x (p0, z0, 5)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_s8.c -new file mode 100644 -index 000000000..083ac2dde ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_s8.c -@@ -0,0 +1,296 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** eor_s8_m_tied1: -+** eor z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (eor_s8_m_tied1, svint8_t, -+ z0 = sveor_s8_m (p0, z0, z1), -+ z0 = sveor_m (p0, z0, z1)) -+ -+/* -+** eor_s8_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** eor z0\.b, p0/m, z0\.b, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (eor_s8_m_tied2, svint8_t, -+ z0 = sveor_s8_m (p0, z1, z0), -+ z0 = sveor_m (p0, z1, z0)) -+ -+/* -+** eor_s8_m_untied: -+** movprfx z0, z1 -+** eor z0\.b, p0/m, z0\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (eor_s8_m_untied, svint8_t, -+ z0 = sveor_s8_m (p0, z1, z2), -+ z0 = sveor_m (p0, z1, z2)) -+ -+/* -+** eor_w0_s8_m_tied1: -+** mov (z[0-9]+\.b), w0 -+** eor z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (eor_w0_s8_m_tied1, svint8_t, int8_t, -+ z0 = sveor_n_s8_m (p0, z0, x0), -+ z0 = sveor_m (p0, z0, x0)) -+ -+/* -+** eor_w0_s8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), w0 -+** movprfx z0, z1 -+** eor z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (eor_w0_s8_m_untied, svint8_t, int8_t, -+ z0 = sveor_n_s8_m (p0, z1, x0), -+ z0 = sveor_m (p0, z1, x0)) -+ -+/* -+** eor_1_s8_m_tied1: -+** mov (z[0-9]+\.b), #1 -+** eor z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_1_s8_m_tied1, svint8_t, -+ z0 = sveor_n_s8_m (p0, z0, 1), -+ z0 = sveor_m (p0, z0, 1)) -+ -+/* -+** eor_1_s8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), #1 -+** movprfx z0, z1 -+** eor z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_1_s8_m_untied, svint8_t, -+ z0 = sveor_n_s8_m (p0, z1, 1), -+ z0 = sveor_m (p0, z1, 1)) -+ -+/* -+** eor_m2_s8_m: -+** mov (z[0-9]+\.b), #-2 -+** eor z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m2_s8_m, svint8_t, -+ z0 = sveor_n_s8_m (p0, z0, -2), -+ z0 = sveor_m (p0, z0, -2)) -+ -+/* -+** eor_s8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** eor z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (eor_s8_z_tied1, svint8_t, -+ z0 = sveor_s8_z (p0, z0, z1), -+ z0 = sveor_z (p0, z0, z1)) -+ -+/* -+** eor_s8_z_tied2: -+** movprfx z0\.b, p0/z, z0\.b -+** eor z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (eor_s8_z_tied2, svint8_t, -+ z0 = sveor_s8_z (p0, z1, z0), -+ z0 = sveor_z (p0, z1, z0)) -+ -+/* -+** eor_s8_z_untied: -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** eor z0\.b, p0/m, z0\.b, z2\.b -+** | -+** movprfx z0\.b, p0/z, z2\.b -+** eor z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_s8_z_untied, svint8_t, -+ z0 = sveor_s8_z (p0, z1, z2), -+ z0 = sveor_z (p0, z1, z2)) -+ -+/* -+** eor_w0_s8_z_tied1: -+** mov (z[0-9]+\.b), w0 -+** movprfx z0\.b, p0/z, z0\.b -+** eor z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (eor_w0_s8_z_tied1, svint8_t, int8_t, -+ z0 = sveor_n_s8_z (p0, z0, x0), -+ z0 = sveor_z (p0, z0, x0)) -+ -+/* -+** eor_w0_s8_z_untied: -+** mov (z[0-9]+\.b), w0 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** eor z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** eor z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (eor_w0_s8_z_untied, svint8_t, int8_t, -+ z0 = sveor_n_s8_z (p0, z1, x0), -+ z0 = sveor_z (p0, z1, x0)) -+ -+/* -+** eor_1_s8_z_tied1: -+** mov (z[0-9]+\.b), #1 -+** movprfx z0\.b, p0/z, z0\.b -+** eor z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_1_s8_z_tied1, svint8_t, -+ z0 = sveor_n_s8_z (p0, z0, 1), -+ z0 = sveor_z (p0, z0, 1)) -+ -+/* -+** eor_1_s8_z_untied: -+** mov (z[0-9]+\.b), #1 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** eor z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** eor z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_1_s8_z_untied, svint8_t, -+ z0 = sveor_n_s8_z (p0, z1, 1), -+ z0 = sveor_z (p0, z1, 1)) -+ -+/* -+** eor_s8_x_tied1: -+** eor z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_s8_x_tied1, svint8_t, -+ z0 = sveor_s8_x (p0, z0, z1), -+ z0 = sveor_x (p0, z0, z1)) -+ -+/* -+** eor_s8_x_tied2: -+** eor z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_s8_x_tied2, svint8_t, -+ z0 = sveor_s8_x (p0, z1, z0), -+ z0 = sveor_x (p0, z1, z0)) -+ -+/* -+** eor_s8_x_untied: -+** eor z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_s8_x_untied, svint8_t, -+ z0 = sveor_s8_x (p0, z1, z2), -+ z0 = sveor_x (p0, z1, z2)) -+ -+/* -+** eor_w0_s8_x_tied1: -+** mov (z[0-9]+)\.b, w0 -+** eor z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (eor_w0_s8_x_tied1, svint8_t, int8_t, -+ z0 = sveor_n_s8_x (p0, z0, x0), -+ z0 = sveor_x (p0, z0, x0)) -+ -+/* -+** eor_w0_s8_x_untied: -+** mov (z[0-9]+)\.b, w0 -+** eor z0\.d, (z1\.d, \1\.d|\1\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (eor_w0_s8_x_untied, svint8_t, int8_t, -+ z0 = sveor_n_s8_x (p0, z1, x0), -+ z0 = sveor_x (p0, z1, x0)) -+ -+/* -+** eor_1_s8_x_tied1: -+** eor z0\.b, z0\.b, #0x1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_1_s8_x_tied1, svint8_t, -+ z0 = sveor_n_s8_x (p0, z0, 1), -+ z0 = sveor_x (p0, z0, 1)) -+ -+/* -+** eor_1_s8_x_untied: -+** movprfx z0, z1 -+** eor z0\.b, z0\.b, #0x1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_1_s8_x_untied, svint8_t, -+ z0 = sveor_n_s8_x (p0, z1, 1), -+ z0 = sveor_x (p0, z1, 1)) -+ -+/* -+** eor_127_s8_x: -+** eor z0\.b, z0\.b, #0x7f -+** ret -+*/ -+TEST_UNIFORM_Z (eor_127_s8_x, svint8_t, -+ z0 = sveor_n_s8_x (p0, z0, 127), -+ z0 = sveor_x (p0, z0, 127)) -+ -+/* -+** eor_128_s8_x: -+** eor z0\.b, z0\.b, #0x80 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_128_s8_x, svint8_t, -+ z0 = sveor_n_s8_x (p0, z0, 128), -+ z0 = sveor_x (p0, z0, 128)) -+ -+/* -+** eor_255_s8_x: -+** mov (z[0-9]+)\.b, #-1 -+** eor z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_255_s8_x, svint8_t, -+ z0 = sveor_n_s8_x (p0, z0, 255), -+ z0 = sveor_x (p0, z0, 255)) -+ -+/* -+** eor_m127_s8_x: -+** eor z0\.b, z0\.b, #0x81 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m127_s8_x, svint8_t, -+ z0 = sveor_n_s8_x (p0, z0, -127), -+ z0 = sveor_x (p0, z0, -127)) -+ -+/* -+** eor_m128_s8_x: -+** eor z0\.b, z0\.b, #0x80 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m128_s8_x, svint8_t, -+ z0 = sveor_n_s8_x (p0, z0, -128), -+ z0 = sveor_x (p0, z0, -128)) -+ -+/* -+** eor_5_s8_x: -+** mov (z[0-9]+)\.b, #5 -+** eor z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_5_s8_x, svint8_t, -+ z0 = sveor_n_s8_x (p0, z0, 5), -+ z0 = sveor_x (p0, z0, 5)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_u16.c -new file mode 100644 -index 000000000..40b43a5f8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_u16.c -@@ -0,0 +1,376 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** eor_u16_m_tied1: -+** eor z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (eor_u16_m_tied1, svuint16_t, -+ z0 = sveor_u16_m (p0, z0, z1), -+ z0 = sveor_m (p0, z0, z1)) -+ -+/* -+** eor_u16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** eor z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (eor_u16_m_tied2, svuint16_t, -+ z0 = sveor_u16_m (p0, z1, z0), -+ z0 = sveor_m (p0, z1, z0)) -+ -+/* -+** eor_u16_m_untied: -+** movprfx z0, z1 -+** eor z0\.h, p0/m, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (eor_u16_m_untied, svuint16_t, -+ z0 = sveor_u16_m (p0, z1, z2), -+ z0 = sveor_m (p0, z1, z2)) -+ -+/* -+** eor_w0_u16_m_tied1: -+** mov (z[0-9]+\.h), w0 -+** eor z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (eor_w0_u16_m_tied1, svuint16_t, uint16_t, -+ z0 = sveor_n_u16_m (p0, z0, x0), -+ z0 = sveor_m (p0, z0, x0)) -+ -+/* -+** eor_w0_u16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), w0 -+** movprfx z0, z1 -+** eor z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (eor_w0_u16_m_untied, svuint16_t, uint16_t, -+ z0 = sveor_n_u16_m (p0, z1, x0), -+ z0 = sveor_m (p0, z1, x0)) -+ -+/* -+** eor_1_u16_m_tied1: -+** mov (z[0-9]+\.h), #1 -+** eor z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_1_u16_m_tied1, svuint16_t, -+ z0 = sveor_n_u16_m (p0, z0, 1), -+ z0 = sveor_m (p0, z0, 1)) -+ -+/* -+** eor_1_u16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), #1 -+** movprfx z0, z1 -+** eor z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_1_u16_m_untied, svuint16_t, -+ z0 = sveor_n_u16_m (p0, z1, 1), -+ z0 = sveor_m (p0, z1, 1)) -+ -+/* -+** eor_m2_u16_m: -+** mov (z[0-9]+\.h), #-2 -+** eor z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m2_u16_m, svuint16_t, -+ z0 = sveor_n_u16_m (p0, z0, -2), -+ z0 = sveor_m (p0, z0, -2)) -+ -+/* -+** eor_u16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** eor z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (eor_u16_z_tied1, svuint16_t, -+ z0 = sveor_u16_z (p0, z0, z1), -+ z0 = sveor_z (p0, z0, z1)) -+ -+/* -+** eor_u16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** eor z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (eor_u16_z_tied2, svuint16_t, -+ z0 = sveor_u16_z (p0, z1, z0), -+ z0 = sveor_z (p0, z1, z0)) -+ -+/* -+** eor_u16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** eor z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** eor z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_u16_z_untied, svuint16_t, -+ z0 = sveor_u16_z (p0, z1, z2), -+ z0 = sveor_z (p0, z1, z2)) -+ -+/* -+** eor_w0_u16_z_tied1: -+** mov (z[0-9]+\.h), w0 -+** movprfx z0\.h, p0/z, z0\.h -+** eor z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (eor_w0_u16_z_tied1, svuint16_t, uint16_t, -+ z0 = sveor_n_u16_z (p0, z0, x0), -+ z0 = sveor_z (p0, z0, x0)) -+ -+/* -+** eor_w0_u16_z_untied: -+** mov (z[0-9]+\.h), w0 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** eor z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** eor z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (eor_w0_u16_z_untied, svuint16_t, uint16_t, -+ z0 = sveor_n_u16_z (p0, z1, x0), -+ z0 = sveor_z (p0, z1, x0)) -+ -+/* -+** eor_1_u16_z_tied1: -+** mov (z[0-9]+\.h), #1 -+** movprfx z0\.h, p0/z, z0\.h -+** eor z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_1_u16_z_tied1, svuint16_t, -+ z0 = sveor_n_u16_z (p0, z0, 1), -+ z0 = sveor_z (p0, z0, 1)) -+ -+/* -+** eor_1_u16_z_untied: -+** mov (z[0-9]+\.h), #1 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** eor z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** eor z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_1_u16_z_untied, svuint16_t, -+ z0 = sveor_n_u16_z (p0, z1, 1), -+ z0 = sveor_z (p0, z1, 1)) -+ -+/* -+** eor_u16_x_tied1: -+** eor z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_u16_x_tied1, svuint16_t, -+ z0 = sveor_u16_x (p0, z0, z1), -+ z0 = sveor_x (p0, z0, z1)) -+ -+/* -+** eor_u16_x_tied2: -+** eor z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_u16_x_tied2, svuint16_t, -+ z0 = sveor_u16_x (p0, z1, z0), -+ z0 = sveor_x (p0, z1, z0)) -+ -+/* -+** eor_u16_x_untied: -+** eor z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_u16_x_untied, svuint16_t, -+ z0 = sveor_u16_x (p0, z1, z2), -+ z0 = sveor_x (p0, z1, z2)) -+ -+/* -+** eor_w0_u16_x_tied1: -+** mov (z[0-9]+)\.h, w0 -+** eor z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (eor_w0_u16_x_tied1, svuint16_t, uint16_t, -+ z0 = sveor_n_u16_x (p0, z0, x0), -+ z0 = sveor_x (p0, z0, x0)) -+ -+/* -+** eor_w0_u16_x_untied: -+** mov (z[0-9]+)\.h, w0 -+** eor z0\.d, (z1\.d, \1\.d|\1\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (eor_w0_u16_x_untied, svuint16_t, uint16_t, -+ z0 = sveor_n_u16_x (p0, z1, x0), -+ z0 = sveor_x (p0, z1, x0)) -+ -+/* -+** eor_1_u16_x_tied1: -+** eor z0\.h, z0\.h, #0x1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_1_u16_x_tied1, svuint16_t, -+ z0 = sveor_n_u16_x (p0, z0, 1), -+ z0 = sveor_x (p0, z0, 1)) -+ -+/* -+** eor_1_u16_x_untied: -+** movprfx z0, z1 -+** eor z0\.h, z0\.h, #0x1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_1_u16_x_untied, svuint16_t, -+ z0 = sveor_n_u16_x (p0, z1, 1), -+ z0 = sveor_x (p0, z1, 1)) -+ -+/* -+** eor_127_u16_x: -+** eor z0\.h, z0\.h, #0x7f -+** ret -+*/ -+TEST_UNIFORM_Z (eor_127_u16_x, svuint16_t, -+ z0 = sveor_n_u16_x (p0, z0, 127), -+ z0 = sveor_x (p0, z0, 127)) -+ -+/* -+** eor_128_u16_x: -+** eor z0\.h, z0\.h, #0x80 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_128_u16_x, svuint16_t, -+ z0 = sveor_n_u16_x (p0, z0, 128), -+ z0 = sveor_x (p0, z0, 128)) -+ -+/* -+** eor_255_u16_x: -+** eor z0\.h, z0\.h, #0xff -+** ret -+*/ -+TEST_UNIFORM_Z (eor_255_u16_x, svuint16_t, -+ z0 = sveor_n_u16_x (p0, z0, 255), -+ z0 = sveor_x (p0, z0, 255)) -+ -+/* -+** eor_256_u16_x: -+** eor z0\.h, z0\.h, #0x100 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_256_u16_x, svuint16_t, -+ z0 = sveor_n_u16_x (p0, z0, 256), -+ z0 = sveor_x (p0, z0, 256)) -+ -+/* -+** eor_257_u16_x: -+** eor z0\.h, z0\.h, #0x101 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_257_u16_x, svuint16_t, -+ z0 = sveor_n_u16_x (p0, z0, 257), -+ z0 = sveor_x (p0, z0, 257)) -+ -+/* -+** eor_512_u16_x: -+** eor z0\.h, z0\.h, #0x200 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_512_u16_x, svuint16_t, -+ z0 = sveor_n_u16_x (p0, z0, 512), -+ z0 = sveor_x (p0, z0, 512)) -+ -+/* -+** eor_65280_u16_x: -+** eor z0\.h, z0\.h, #0xff00 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_65280_u16_x, svuint16_t, -+ z0 = sveor_n_u16_x (p0, z0, 0xff00), -+ z0 = sveor_x (p0, z0, 0xff00)) -+ -+/* -+** eor_m127_u16_x: -+** eor z0\.h, z0\.h, #0xff81 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m127_u16_x, svuint16_t, -+ z0 = sveor_n_u16_x (p0, z0, -127), -+ z0 = sveor_x (p0, z0, -127)) -+ -+/* -+** eor_m128_u16_x: -+** eor z0\.h, z0\.h, #0xff80 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m128_u16_x, svuint16_t, -+ z0 = sveor_n_u16_x (p0, z0, -128), -+ z0 = sveor_x (p0, z0, -128)) -+ -+/* -+** eor_m255_u16_x: -+** eor z0\.h, z0\.h, #0xff01 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m255_u16_x, svuint16_t, -+ z0 = sveor_n_u16_x (p0, z0, -255), -+ z0 = sveor_x (p0, z0, -255)) -+ -+/* -+** eor_m256_u16_x: -+** eor z0\.h, z0\.h, #0xff00 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m256_u16_x, svuint16_t, -+ z0 = sveor_n_u16_x (p0, z0, -256), -+ z0 = sveor_x (p0, z0, -256)) -+ -+/* -+** eor_m257_u16_x: -+** eor z0\.h, z0\.h, #0xfeff -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m257_u16_x, svuint16_t, -+ z0 = sveor_n_u16_x (p0, z0, -257), -+ z0 = sveor_x (p0, z0, -257)) -+ -+/* -+** eor_m512_u16_x: -+** eor z0\.h, z0\.h, #0xfe00 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m512_u16_x, svuint16_t, -+ z0 = sveor_n_u16_x (p0, z0, -512), -+ z0 = sveor_x (p0, z0, -512)) -+ -+/* -+** eor_m32768_u16_x: -+** eor z0\.h, z0\.h, #0x8000 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m32768_u16_x, svuint16_t, -+ z0 = sveor_n_u16_x (p0, z0, -0x8000), -+ z0 = sveor_x (p0, z0, -0x8000)) -+ -+/* -+** eor_5_u16_x: -+** mov (z[0-9]+)\.h, #5 -+** eor z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_5_u16_x, svuint16_t, -+ z0 = sveor_n_u16_x (p0, z0, 5), -+ z0 = sveor_x (p0, z0, 5)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_u32.c -new file mode 100644 -index 000000000..8e46d08ca ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_u32.c -@@ -0,0 +1,372 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** eor_u32_m_tied1: -+** eor z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (eor_u32_m_tied1, svuint32_t, -+ z0 = sveor_u32_m (p0, z0, z1), -+ z0 = sveor_m (p0, z0, z1)) -+ -+/* -+** eor_u32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** eor z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (eor_u32_m_tied2, svuint32_t, -+ z0 = sveor_u32_m (p0, z1, z0), -+ z0 = sveor_m (p0, z1, z0)) -+ -+/* -+** eor_u32_m_untied: -+** movprfx z0, z1 -+** eor z0\.s, p0/m, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (eor_u32_m_untied, svuint32_t, -+ z0 = sveor_u32_m (p0, z1, z2), -+ z0 = sveor_m (p0, z1, z2)) -+ -+/* -+** eor_w0_u32_m_tied1: -+** mov (z[0-9]+\.s), w0 -+** eor z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (eor_w0_u32_m_tied1, svuint32_t, uint32_t, -+ z0 = sveor_n_u32_m (p0, z0, x0), -+ z0 = sveor_m (p0, z0, x0)) -+ -+/* -+** eor_w0_u32_m_untied: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0, z1 -+** eor z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (eor_w0_u32_m_untied, svuint32_t, uint32_t, -+ z0 = sveor_n_u32_m (p0, z1, x0), -+ z0 = sveor_m (p0, z1, x0)) -+ -+/* -+** eor_1_u32_m_tied1: -+** mov (z[0-9]+\.s), #1 -+** eor z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_1_u32_m_tied1, svuint32_t, -+ z0 = sveor_n_u32_m (p0, z0, 1), -+ z0 = sveor_m (p0, z0, 1)) -+ -+/* -+** eor_1_u32_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.s), #1 -+** movprfx z0, z1 -+** eor z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_1_u32_m_untied, svuint32_t, -+ z0 = sveor_n_u32_m (p0, z1, 1), -+ z0 = sveor_m (p0, z1, 1)) -+ -+/* -+** eor_m2_u32_m: -+** mov (z[0-9]+\.s), #-2 -+** eor z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m2_u32_m, svuint32_t, -+ z0 = sveor_n_u32_m (p0, z0, -2), -+ z0 = sveor_m (p0, z0, -2)) -+ -+/* -+** eor_u32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** eor z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (eor_u32_z_tied1, svuint32_t, -+ z0 = sveor_u32_z (p0, z0, z1), -+ z0 = sveor_z (p0, z0, z1)) -+ -+/* -+** eor_u32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** eor z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (eor_u32_z_tied2, svuint32_t, -+ z0 = sveor_u32_z (p0, z1, z0), -+ z0 = sveor_z (p0, z1, z0)) -+ -+/* -+** eor_u32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** eor z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** eor z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_u32_z_untied, svuint32_t, -+ z0 = sveor_u32_z (p0, z1, z2), -+ z0 = sveor_z (p0, z1, z2)) -+ -+/* -+** eor_w0_u32_z_tied1: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** eor z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (eor_w0_u32_z_tied1, svuint32_t, uint32_t, -+ z0 = sveor_n_u32_z (p0, z0, x0), -+ z0 = sveor_z (p0, z0, x0)) -+ -+/* -+** eor_w0_u32_z_untied: -+** mov (z[0-9]+\.s), w0 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** eor z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** eor z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (eor_w0_u32_z_untied, svuint32_t, uint32_t, -+ z0 = sveor_n_u32_z (p0, z1, x0), -+ z0 = sveor_z (p0, z1, x0)) -+ -+/* -+** eor_1_u32_z_tied1: -+** mov (z[0-9]+\.s), #1 -+** movprfx z0\.s, p0/z, z0\.s -+** eor z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_1_u32_z_tied1, svuint32_t, -+ z0 = sveor_n_u32_z (p0, z0, 1), -+ z0 = sveor_z (p0, z0, 1)) -+ -+/* -+** eor_1_u32_z_untied: -+** mov (z[0-9]+\.s), #1 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** eor z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** eor z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_1_u32_z_untied, svuint32_t, -+ z0 = sveor_n_u32_z (p0, z1, 1), -+ z0 = sveor_z (p0, z1, 1)) -+ -+/* -+** eor_u32_x_tied1: -+** eor z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_u32_x_tied1, svuint32_t, -+ z0 = sveor_u32_x (p0, z0, z1), -+ z0 = sveor_x (p0, z0, z1)) -+ -+/* -+** eor_u32_x_tied2: -+** eor z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_u32_x_tied2, svuint32_t, -+ z0 = sveor_u32_x (p0, z1, z0), -+ z0 = sveor_x (p0, z1, z0)) -+ -+/* -+** eor_u32_x_untied: -+** eor z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_u32_x_untied, svuint32_t, -+ z0 = sveor_u32_x (p0, z1, z2), -+ z0 = sveor_x (p0, z1, z2)) -+ -+/* -+** eor_w0_u32_x_tied1: -+** mov (z[0-9]+)\.s, w0 -+** eor z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (eor_w0_u32_x_tied1, svuint32_t, uint32_t, -+ z0 = sveor_n_u32_x (p0, z0, x0), -+ z0 = sveor_x (p0, z0, x0)) -+ -+/* -+** eor_w0_u32_x_untied: -+** mov (z[0-9]+)\.s, w0 -+** eor z0\.d, (z1\.d, \1\.d|\1\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (eor_w0_u32_x_untied, svuint32_t, uint32_t, -+ z0 = sveor_n_u32_x (p0, z1, x0), -+ z0 = sveor_x (p0, z1, x0)) -+ -+/* -+** eor_1_u32_x_tied1: -+** eor z0\.s, z0\.s, #0x1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_1_u32_x_tied1, svuint32_t, -+ z0 = sveor_n_u32_x (p0, z0, 1), -+ z0 = sveor_x (p0, z0, 1)) -+ -+/* -+** eor_1_u32_x_untied: -+** movprfx z0, z1 -+** eor z0\.s, z0\.s, #0x1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_1_u32_x_untied, svuint32_t, -+ z0 = sveor_n_u32_x (p0, z1, 1), -+ z0 = sveor_x (p0, z1, 1)) -+ -+/* -+** eor_127_u32_x: -+** eor z0\.s, z0\.s, #0x7f -+** ret -+*/ -+TEST_UNIFORM_Z (eor_127_u32_x, svuint32_t, -+ z0 = sveor_n_u32_x (p0, z0, 127), -+ z0 = sveor_x (p0, z0, 127)) -+ -+/* -+** eor_128_u32_x: -+** eor z0\.s, z0\.s, #0x80 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_128_u32_x, svuint32_t, -+ z0 = sveor_n_u32_x (p0, z0, 128), -+ z0 = sveor_x (p0, z0, 128)) -+ -+/* -+** eor_255_u32_x: -+** eor z0\.s, z0\.s, #0xff -+** ret -+*/ -+TEST_UNIFORM_Z (eor_255_u32_x, svuint32_t, -+ z0 = sveor_n_u32_x (p0, z0, 255), -+ z0 = sveor_x (p0, z0, 255)) -+ -+/* -+** eor_256_u32_x: -+** eor z0\.s, z0\.s, #0x100 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_256_u32_x, svuint32_t, -+ z0 = sveor_n_u32_x (p0, z0, 256), -+ z0 = sveor_x (p0, z0, 256)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (eor_257_u32_x, svuint32_t, -+ z0 = sveor_n_u32_x (p0, z0, 257), -+ z0 = sveor_x (p0, z0, 257)) -+ -+/* -+** eor_512_u32_x: -+** eor z0\.s, z0\.s, #0x200 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_512_u32_x, svuint32_t, -+ z0 = sveor_n_u32_x (p0, z0, 512), -+ z0 = sveor_x (p0, z0, 512)) -+ -+/* -+** eor_65280_u32_x: -+** eor z0\.s, z0\.s, #0xff00 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_65280_u32_x, svuint32_t, -+ z0 = sveor_n_u32_x (p0, z0, 0xff00), -+ z0 = sveor_x (p0, z0, 0xff00)) -+ -+/* -+** eor_m127_u32_x: -+** eor z0\.s, z0\.s, #0xffffff81 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m127_u32_x, svuint32_t, -+ z0 = sveor_n_u32_x (p0, z0, -127), -+ z0 = sveor_x (p0, z0, -127)) -+ -+/* -+** eor_m128_u32_x: -+** eor z0\.s, z0\.s, #0xffffff80 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m128_u32_x, svuint32_t, -+ z0 = sveor_n_u32_x (p0, z0, -128), -+ z0 = sveor_x (p0, z0, -128)) -+ -+/* -+** eor_m255_u32_x: -+** eor z0\.s, z0\.s, #0xffffff01 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m255_u32_x, svuint32_t, -+ z0 = sveor_n_u32_x (p0, z0, -255), -+ z0 = sveor_x (p0, z0, -255)) -+ -+/* -+** eor_m256_u32_x: -+** eor z0\.s, z0\.s, #0xffffff00 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m256_u32_x, svuint32_t, -+ z0 = sveor_n_u32_x (p0, z0, -256), -+ z0 = sveor_x (p0, z0, -256)) -+ -+/* -+** eor_m257_u32_x: -+** eor z0\.s, z0\.s, #0xfffffeff -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m257_u32_x, svuint32_t, -+ z0 = sveor_n_u32_x (p0, z0, -257), -+ z0 = sveor_x (p0, z0, -257)) -+ -+/* -+** eor_m512_u32_x: -+** eor z0\.s, z0\.s, #0xfffffe00 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m512_u32_x, svuint32_t, -+ z0 = sveor_n_u32_x (p0, z0, -512), -+ z0 = sveor_x (p0, z0, -512)) -+ -+/* -+** eor_m32768_u32_x: -+** eor z0\.s, z0\.s, #0xffff8000 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m32768_u32_x, svuint32_t, -+ z0 = sveor_n_u32_x (p0, z0, -0x8000), -+ z0 = sveor_x (p0, z0, -0x8000)) -+ -+/* -+** eor_5_u32_x: -+** mov (z[0-9]+)\.s, #5 -+** eor z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_5_u32_x, svuint32_t, -+ z0 = sveor_n_u32_x (p0, z0, 5), -+ z0 = sveor_x (p0, z0, 5)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_u64.c -new file mode 100644 -index 000000000..a82398f91 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_u64.c -@@ -0,0 +1,372 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** eor_u64_m_tied1: -+** eor z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (eor_u64_m_tied1, svuint64_t, -+ z0 = sveor_u64_m (p0, z0, z1), -+ z0 = sveor_m (p0, z0, z1)) -+ -+/* -+** eor_u64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** eor z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_u64_m_tied2, svuint64_t, -+ z0 = sveor_u64_m (p0, z1, z0), -+ z0 = sveor_m (p0, z1, z0)) -+ -+/* -+** eor_u64_m_untied: -+** movprfx z0, z1 -+** eor z0\.d, p0/m, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (eor_u64_m_untied, svuint64_t, -+ z0 = sveor_u64_m (p0, z1, z2), -+ z0 = sveor_m (p0, z1, z2)) -+ -+/* -+** eor_x0_u64_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** eor z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (eor_x0_u64_m_tied1, svuint64_t, uint64_t, -+ z0 = sveor_n_u64_m (p0, z0, x0), -+ z0 = sveor_m (p0, z0, x0)) -+ -+/* -+** eor_x0_u64_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** eor z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (eor_x0_u64_m_untied, svuint64_t, uint64_t, -+ z0 = sveor_n_u64_m (p0, z1, x0), -+ z0 = sveor_m (p0, z1, x0)) -+ -+/* -+** eor_1_u64_m_tied1: -+** mov (z[0-9]+\.d), #1 -+** eor z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_1_u64_m_tied1, svuint64_t, -+ z0 = sveor_n_u64_m (p0, z0, 1), -+ z0 = sveor_m (p0, z0, 1)) -+ -+/* -+** eor_1_u64_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #1 -+** movprfx z0, z1 -+** eor z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_1_u64_m_untied, svuint64_t, -+ z0 = sveor_n_u64_m (p0, z1, 1), -+ z0 = sveor_m (p0, z1, 1)) -+ -+/* -+** eor_m2_u64_m: -+** mov (z[0-9]+\.d), #-2 -+** eor z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m2_u64_m, svuint64_t, -+ z0 = sveor_n_u64_m (p0, z0, -2), -+ z0 = sveor_m (p0, z0, -2)) -+ -+/* -+** eor_u64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** eor z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (eor_u64_z_tied1, svuint64_t, -+ z0 = sveor_u64_z (p0, z0, z1), -+ z0 = sveor_z (p0, z0, z1)) -+ -+/* -+** eor_u64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** eor z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (eor_u64_z_tied2, svuint64_t, -+ z0 = sveor_u64_z (p0, z1, z0), -+ z0 = sveor_z (p0, z1, z0)) -+ -+/* -+** eor_u64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** eor z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** eor z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_u64_z_untied, svuint64_t, -+ z0 = sveor_u64_z (p0, z1, z2), -+ z0 = sveor_z (p0, z1, z2)) -+ -+/* -+** eor_x0_u64_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** eor z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (eor_x0_u64_z_tied1, svuint64_t, uint64_t, -+ z0 = sveor_n_u64_z (p0, z0, x0), -+ z0 = sveor_z (p0, z0, x0)) -+ -+/* -+** eor_x0_u64_z_untied: -+** mov (z[0-9]+\.d), x0 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** eor z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** eor z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (eor_x0_u64_z_untied, svuint64_t, uint64_t, -+ z0 = sveor_n_u64_z (p0, z1, x0), -+ z0 = sveor_z (p0, z1, x0)) -+ -+/* -+** eor_1_u64_z_tied1: -+** mov (z[0-9]+\.d), #1 -+** movprfx z0\.d, p0/z, z0\.d -+** eor z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_1_u64_z_tied1, svuint64_t, -+ z0 = sveor_n_u64_z (p0, z0, 1), -+ z0 = sveor_z (p0, z0, 1)) -+ -+/* -+** eor_1_u64_z_untied: -+** mov (z[0-9]+\.d), #1 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** eor z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** eor z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_1_u64_z_untied, svuint64_t, -+ z0 = sveor_n_u64_z (p0, z1, 1), -+ z0 = sveor_z (p0, z1, 1)) -+ -+/* -+** eor_u64_x_tied1: -+** eor z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_u64_x_tied1, svuint64_t, -+ z0 = sveor_u64_x (p0, z0, z1), -+ z0 = sveor_x (p0, z0, z1)) -+ -+/* -+** eor_u64_x_tied2: -+** eor z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_u64_x_tied2, svuint64_t, -+ z0 = sveor_u64_x (p0, z1, z0), -+ z0 = sveor_x (p0, z1, z0)) -+ -+/* -+** eor_u64_x_untied: -+** eor z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_u64_x_untied, svuint64_t, -+ z0 = sveor_u64_x (p0, z1, z2), -+ z0 = sveor_x (p0, z1, z2)) -+ -+/* -+** eor_x0_u64_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** eor z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (eor_x0_u64_x_tied1, svuint64_t, uint64_t, -+ z0 = sveor_n_u64_x (p0, z0, x0), -+ z0 = sveor_x (p0, z0, x0)) -+ -+/* -+** eor_x0_u64_x_untied: -+** mov (z[0-9]+\.d), x0 -+** eor z0\.d, (z1\.d, \1|\1, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (eor_x0_u64_x_untied, svuint64_t, uint64_t, -+ z0 = sveor_n_u64_x (p0, z1, x0), -+ z0 = sveor_x (p0, z1, x0)) -+ -+/* -+** eor_1_u64_x_tied1: -+** eor z0\.d, z0\.d, #0x1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_1_u64_x_tied1, svuint64_t, -+ z0 = sveor_n_u64_x (p0, z0, 1), -+ z0 = sveor_x (p0, z0, 1)) -+ -+/* -+** eor_1_u64_x_untied: -+** movprfx z0, z1 -+** eor z0\.d, z0\.d, #0x1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_1_u64_x_untied, svuint64_t, -+ z0 = sveor_n_u64_x (p0, z1, 1), -+ z0 = sveor_x (p0, z1, 1)) -+ -+/* -+** eor_127_u64_x: -+** eor z0\.d, z0\.d, #0x7f -+** ret -+*/ -+TEST_UNIFORM_Z (eor_127_u64_x, svuint64_t, -+ z0 = sveor_n_u64_x (p0, z0, 127), -+ z0 = sveor_x (p0, z0, 127)) -+ -+/* -+** eor_128_u64_x: -+** eor z0\.d, z0\.d, #0x80 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_128_u64_x, svuint64_t, -+ z0 = sveor_n_u64_x (p0, z0, 128), -+ z0 = sveor_x (p0, z0, 128)) -+ -+/* -+** eor_255_u64_x: -+** eor z0\.d, z0\.d, #0xff -+** ret -+*/ -+TEST_UNIFORM_Z (eor_255_u64_x, svuint64_t, -+ z0 = sveor_n_u64_x (p0, z0, 255), -+ z0 = sveor_x (p0, z0, 255)) -+ -+/* -+** eor_256_u64_x: -+** eor z0\.d, z0\.d, #0x100 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_256_u64_x, svuint64_t, -+ z0 = sveor_n_u64_x (p0, z0, 256), -+ z0 = sveor_x (p0, z0, 256)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (eor_257_u64_x, svuint64_t, -+ z0 = sveor_n_u64_x (p0, z0, 257), -+ z0 = sveor_x (p0, z0, 257)) -+ -+/* -+** eor_512_u64_x: -+** eor z0\.d, z0\.d, #0x200 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_512_u64_x, svuint64_t, -+ z0 = sveor_n_u64_x (p0, z0, 512), -+ z0 = sveor_x (p0, z0, 512)) -+ -+/* -+** eor_65280_u64_x: -+** eor z0\.d, z0\.d, #0xff00 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_65280_u64_x, svuint64_t, -+ z0 = sveor_n_u64_x (p0, z0, 0xff00), -+ z0 = sveor_x (p0, z0, 0xff00)) -+ -+/* -+** eor_m127_u64_x: -+** eor z0\.d, z0\.d, #0xffffffffffffff81 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m127_u64_x, svuint64_t, -+ z0 = sveor_n_u64_x (p0, z0, -127), -+ z0 = sveor_x (p0, z0, -127)) -+ -+/* -+** eor_m128_u64_x: -+** eor z0\.d, z0\.d, #0xffffffffffffff80 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m128_u64_x, svuint64_t, -+ z0 = sveor_n_u64_x (p0, z0, -128), -+ z0 = sveor_x (p0, z0, -128)) -+ -+/* -+** eor_m255_u64_x: -+** eor z0\.d, z0\.d, #0xffffffffffffff01 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m255_u64_x, svuint64_t, -+ z0 = sveor_n_u64_x (p0, z0, -255), -+ z0 = sveor_x (p0, z0, -255)) -+ -+/* -+** eor_m256_u64_x: -+** eor z0\.d, z0\.d, #0xffffffffffffff00 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m256_u64_x, svuint64_t, -+ z0 = sveor_n_u64_x (p0, z0, -256), -+ z0 = sveor_x (p0, z0, -256)) -+ -+/* -+** eor_m257_u64_x: -+** eor z0\.d, z0\.d, #0xfffffffffffffeff -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m257_u64_x, svuint64_t, -+ z0 = sveor_n_u64_x (p0, z0, -257), -+ z0 = sveor_x (p0, z0, -257)) -+ -+/* -+** eor_m512_u64_x: -+** eor z0\.d, z0\.d, #0xfffffffffffffe00 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m512_u64_x, svuint64_t, -+ z0 = sveor_n_u64_x (p0, z0, -512), -+ z0 = sveor_x (p0, z0, -512)) -+ -+/* -+** eor_m32768_u64_x: -+** eor z0\.d, z0\.d, #0xffffffffffff8000 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m32768_u64_x, svuint64_t, -+ z0 = sveor_n_u64_x (p0, z0, -0x8000), -+ z0 = sveor_x (p0, z0, -0x8000)) -+ -+/* -+** eor_5_u64_x: -+** mov (z[0-9]+\.d), #5 -+** eor z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_5_u64_x, svuint64_t, -+ z0 = sveor_n_u64_x (p0, z0, 5), -+ z0 = sveor_x (p0, z0, 5)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_u8.c -new file mode 100644 -index 000000000..006637699 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eor_u8.c -@@ -0,0 +1,296 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** eor_u8_m_tied1: -+** eor z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (eor_u8_m_tied1, svuint8_t, -+ z0 = sveor_u8_m (p0, z0, z1), -+ z0 = sveor_m (p0, z0, z1)) -+ -+/* -+** eor_u8_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** eor z0\.b, p0/m, z0\.b, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (eor_u8_m_tied2, svuint8_t, -+ z0 = sveor_u8_m (p0, z1, z0), -+ z0 = sveor_m (p0, z1, z0)) -+ -+/* -+** eor_u8_m_untied: -+** movprfx z0, z1 -+** eor z0\.b, p0/m, z0\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (eor_u8_m_untied, svuint8_t, -+ z0 = sveor_u8_m (p0, z1, z2), -+ z0 = sveor_m (p0, z1, z2)) -+ -+/* -+** eor_w0_u8_m_tied1: -+** mov (z[0-9]+\.b), w0 -+** eor z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (eor_w0_u8_m_tied1, svuint8_t, uint8_t, -+ z0 = sveor_n_u8_m (p0, z0, x0), -+ z0 = sveor_m (p0, z0, x0)) -+ -+/* -+** eor_w0_u8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), w0 -+** movprfx z0, z1 -+** eor z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (eor_w0_u8_m_untied, svuint8_t, uint8_t, -+ z0 = sveor_n_u8_m (p0, z1, x0), -+ z0 = sveor_m (p0, z1, x0)) -+ -+/* -+** eor_1_u8_m_tied1: -+** mov (z[0-9]+\.b), #1 -+** eor z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_1_u8_m_tied1, svuint8_t, -+ z0 = sveor_n_u8_m (p0, z0, 1), -+ z0 = sveor_m (p0, z0, 1)) -+ -+/* -+** eor_1_u8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), #1 -+** movprfx z0, z1 -+** eor z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_1_u8_m_untied, svuint8_t, -+ z0 = sveor_n_u8_m (p0, z1, 1), -+ z0 = sveor_m (p0, z1, 1)) -+ -+/* -+** eor_m2_u8_m: -+** mov (z[0-9]+\.b), #-2 -+** eor z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m2_u8_m, svuint8_t, -+ z0 = sveor_n_u8_m (p0, z0, -2), -+ z0 = sveor_m (p0, z0, -2)) -+ -+/* -+** eor_u8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** eor z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (eor_u8_z_tied1, svuint8_t, -+ z0 = sveor_u8_z (p0, z0, z1), -+ z0 = sveor_z (p0, z0, z1)) -+ -+/* -+** eor_u8_z_tied2: -+** movprfx z0\.b, p0/z, z0\.b -+** eor z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (eor_u8_z_tied2, svuint8_t, -+ z0 = sveor_u8_z (p0, z1, z0), -+ z0 = sveor_z (p0, z1, z0)) -+ -+/* -+** eor_u8_z_untied: -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** eor z0\.b, p0/m, z0\.b, z2\.b -+** | -+** movprfx z0\.b, p0/z, z2\.b -+** eor z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_u8_z_untied, svuint8_t, -+ z0 = sveor_u8_z (p0, z1, z2), -+ z0 = sveor_z (p0, z1, z2)) -+ -+/* -+** eor_w0_u8_z_tied1: -+** mov (z[0-9]+\.b), w0 -+** movprfx z0\.b, p0/z, z0\.b -+** eor z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (eor_w0_u8_z_tied1, svuint8_t, uint8_t, -+ z0 = sveor_n_u8_z (p0, z0, x0), -+ z0 = sveor_z (p0, z0, x0)) -+ -+/* -+** eor_w0_u8_z_untied: -+** mov (z[0-9]+\.b), w0 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** eor z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** eor z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (eor_w0_u8_z_untied, svuint8_t, uint8_t, -+ z0 = sveor_n_u8_z (p0, z1, x0), -+ z0 = sveor_z (p0, z1, x0)) -+ -+/* -+** eor_1_u8_z_tied1: -+** mov (z[0-9]+\.b), #1 -+** movprfx z0\.b, p0/z, z0\.b -+** eor z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_1_u8_z_tied1, svuint8_t, -+ z0 = sveor_n_u8_z (p0, z0, 1), -+ z0 = sveor_z (p0, z0, 1)) -+ -+/* -+** eor_1_u8_z_untied: -+** mov (z[0-9]+\.b), #1 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** eor z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** eor z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_1_u8_z_untied, svuint8_t, -+ z0 = sveor_n_u8_z (p0, z1, 1), -+ z0 = sveor_z (p0, z1, 1)) -+ -+/* -+** eor_u8_x_tied1: -+** eor z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_u8_x_tied1, svuint8_t, -+ z0 = sveor_u8_x (p0, z0, z1), -+ z0 = sveor_x (p0, z0, z1)) -+ -+/* -+** eor_u8_x_tied2: -+** eor z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_u8_x_tied2, svuint8_t, -+ z0 = sveor_u8_x (p0, z1, z0), -+ z0 = sveor_x (p0, z1, z0)) -+ -+/* -+** eor_u8_x_untied: -+** eor z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_u8_x_untied, svuint8_t, -+ z0 = sveor_u8_x (p0, z1, z2), -+ z0 = sveor_x (p0, z1, z2)) -+ -+/* -+** eor_w0_u8_x_tied1: -+** mov (z[0-9]+)\.b, w0 -+** eor z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (eor_w0_u8_x_tied1, svuint8_t, uint8_t, -+ z0 = sveor_n_u8_x (p0, z0, x0), -+ z0 = sveor_x (p0, z0, x0)) -+ -+/* -+** eor_w0_u8_x_untied: -+** mov (z[0-9]+)\.b, w0 -+** eor z0\.d, (z1\.d, \1\.d|\1\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (eor_w0_u8_x_untied, svuint8_t, uint8_t, -+ z0 = sveor_n_u8_x (p0, z1, x0), -+ z0 = sveor_x (p0, z1, x0)) -+ -+/* -+** eor_1_u8_x_tied1: -+** eor z0\.b, z0\.b, #0x1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_1_u8_x_tied1, svuint8_t, -+ z0 = sveor_n_u8_x (p0, z0, 1), -+ z0 = sveor_x (p0, z0, 1)) -+ -+/* -+** eor_1_u8_x_untied: -+** movprfx z0, z1 -+** eor z0\.b, z0\.b, #0x1 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_1_u8_x_untied, svuint8_t, -+ z0 = sveor_n_u8_x (p0, z1, 1), -+ z0 = sveor_x (p0, z1, 1)) -+ -+/* -+** eor_127_u8_x: -+** eor z0\.b, z0\.b, #0x7f -+** ret -+*/ -+TEST_UNIFORM_Z (eor_127_u8_x, svuint8_t, -+ z0 = sveor_n_u8_x (p0, z0, 127), -+ z0 = sveor_x (p0, z0, 127)) -+ -+/* -+** eor_128_u8_x: -+** eor z0\.b, z0\.b, #0x80 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_128_u8_x, svuint8_t, -+ z0 = sveor_n_u8_x (p0, z0, 128), -+ z0 = sveor_x (p0, z0, 128)) -+ -+/* -+** eor_255_u8_x: -+** mov (z[0-9]+)\.b, #-1 -+** eor z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_255_u8_x, svuint8_t, -+ z0 = sveor_n_u8_x (p0, z0, 255), -+ z0 = sveor_x (p0, z0, 255)) -+ -+/* -+** eor_m127_u8_x: -+** eor z0\.b, z0\.b, #0x81 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m127_u8_x, svuint8_t, -+ z0 = sveor_n_u8_x (p0, z0, -127), -+ z0 = sveor_x (p0, z0, -127)) -+ -+/* -+** eor_m128_u8_x: -+** eor z0\.b, z0\.b, #0x80 -+** ret -+*/ -+TEST_UNIFORM_Z (eor_m128_u8_x, svuint8_t, -+ z0 = sveor_n_u8_x (p0, z0, -128), -+ z0 = sveor_x (p0, z0, -128)) -+ -+/* -+** eor_5_u8_x: -+** mov (z[0-9]+)\.b, #5 -+** eor z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (eor_5_u8_x, svuint8_t, -+ z0 = sveor_n_u8_x (p0, z0, 5), -+ z0 = sveor_x (p0, z0, 5)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eorv_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eorv_s16.c -new file mode 100644 -index 000000000..0675d7ed9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eorv_s16.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** eorv_x0_s16: -+** eorv h([0-9]+), p0, z0\.h -+** umov w0, v\1\.h\[0\] -+** ret -+*/ -+TEST_REDUCTION_X (eorv_x0_s16, int16_t, svint16_t, -+ x0 = sveorv_s16 (p0, z0), -+ x0 = sveorv (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eorv_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eorv_s32.c -new file mode 100644 -index 000000000..9c0c1089f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eorv_s32.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** eorv_x0_s32: -+** eorv (s[0-9]+), p0, z0\.s -+** fmov w0, \1 -+** ret -+*/ -+TEST_REDUCTION_X (eorv_x0_s32, int32_t, svint32_t, -+ x0 = sveorv_s32 (p0, z0), -+ x0 = sveorv (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eorv_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eorv_s64.c -new file mode 100644 -index 000000000..7a474556c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eorv_s64.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** eorv_x0_s64: -+** eorv (d[0-9]+), p0, z0\.d -+** fmov x0, \1 -+** ret -+*/ -+TEST_REDUCTION_X (eorv_x0_s64, int64_t, svint64_t, -+ x0 = sveorv_s64 (p0, z0), -+ x0 = sveorv (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eorv_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eorv_s8.c -new file mode 100644 -index 000000000..43f056d3a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eorv_s8.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** eorv_x0_s8: -+** eorv b([0-9]+), p0, z0\.b -+** umov w0, v\1\.b\[0\] -+** ret -+*/ -+TEST_REDUCTION_X (eorv_x0_s8, int8_t, svint8_t, -+ x0 = sveorv_s8 (p0, z0), -+ x0 = sveorv (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eorv_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eorv_u16.c -new file mode 100644 -index 000000000..5f7836db4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eorv_u16.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** eorv_x0_u16: -+** eorv h([0-9]+), p0, z0\.h -+** umov w0, v\1\.h\[0\] -+** ret -+*/ -+TEST_REDUCTION_X (eorv_x0_u16, uint16_t, svuint16_t, -+ x0 = sveorv_u16 (p0, z0), -+ x0 = sveorv (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eorv_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eorv_u32.c -new file mode 100644 -index 000000000..f112a0dc2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eorv_u32.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** eorv_x0_u32: -+** eorv (s[0-9]+), p0, z0\.s -+** fmov w0, \1 -+** ret -+*/ -+TEST_REDUCTION_X (eorv_x0_u32, uint32_t, svuint32_t, -+ x0 = sveorv_u32 (p0, z0), -+ x0 = sveorv (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eorv_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eorv_u64.c -new file mode 100644 -index 000000000..5f8b8f86b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eorv_u64.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** eorv_x0_u64: -+** eorv (d[0-9]+), p0, z0\.d -+** fmov x0, \1 -+** ret -+*/ -+TEST_REDUCTION_X (eorv_x0_u64, uint64_t, svuint64_t, -+ x0 = sveorv_u64 (p0, z0), -+ x0 = sveorv (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eorv_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eorv_u8.c -new file mode 100644 -index 000000000..eed4d4915 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/eorv_u8.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** eorv_x0_u8: -+** eorv b([0-9]+), p0, z0\.b -+** umov w0, v\1\.b\[0\] -+** ret -+*/ -+TEST_REDUCTION_X (eorv_x0_u8, uint8_t, svuint8_t, -+ x0 = sveorv_u8 (p0, z0), -+ x0 = sveorv (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/expa_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/expa_f16.c -new file mode 100644 -index 000000000..5a5411e46 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/expa_f16.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** expa_f16_tied1: -+** fexpa z0\.h, z0\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (expa_f16_tied1, svfloat16_t, svuint16_t, -+ z0_res = svexpa_f16 (z0), -+ z0_res = svexpa (z0)) -+ -+/* -+** expa_f16_untied: -+** fexpa z0\.h, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (expa_f16_untied, svfloat16_t, svuint16_t, -+ z0 = svexpa_f16 (z4), -+ z0 = svexpa (z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/expa_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/expa_f32.c -new file mode 100644 -index 000000000..4ded1c575 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/expa_f32.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** expa_f32_tied1: -+** fexpa z0\.s, z0\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (expa_f32_tied1, svfloat32_t, svuint32_t, -+ z0_res = svexpa_f32 (z0), -+ z0_res = svexpa (z0)) -+ -+/* -+** expa_f32_untied: -+** fexpa z0\.s, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (expa_f32_untied, svfloat32_t, svuint32_t, -+ z0 = svexpa_f32 (z4), -+ z0 = svexpa (z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/expa_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/expa_f64.c -new file mode 100644 -index 000000000..c31f9ccb5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/expa_f64.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** expa_f64_tied1: -+** fexpa z0\.d, z0\.d -+** ret -+*/ -+TEST_DUAL_Z_REV (expa_f64_tied1, svfloat64_t, svuint64_t, -+ z0_res = svexpa_f64 (z0), -+ z0_res = svexpa (z0)) -+ -+/* -+** expa_f64_untied: -+** fexpa z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (expa_f64_untied, svfloat64_t, svuint64_t, -+ z0 = svexpa_f64 (z4), -+ z0 = svexpa (z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_bf16.c -new file mode 100644 -index 000000000..f982873c4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_bf16.c -@@ -0,0 +1,73 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ext_0_bf16_tied1: -+** ext z0\.b, z0\.b, z1\.b, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_0_bf16_tied1, svbfloat16_t, -+ z0 = svext_bf16 (z0, z1, 0), -+ z0 = svext (z0, z1, 0)) -+ -+/* -+** ext_0_bf16_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, \1\.b, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_0_bf16_tied2, svbfloat16_t, -+ z0 = svext_bf16 (z1, z0, 0), -+ z0 = svext (z1, z0, 0)) -+ -+/* -+** ext_0_bf16_untied: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_0_bf16_untied, svbfloat16_t, -+ z0 = svext_bf16 (z1, z2, 0), -+ z0 = svext (z1, z2, 0)) -+ -+/* -+** ext_1_bf16: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_1_bf16, svbfloat16_t, -+ z0 = svext_bf16 (z1, z2, 1), -+ z0 = svext (z1, z2, 1)) -+ -+/* -+** ext_2_bf16: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #4 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_2_bf16, svbfloat16_t, -+ z0 = svext_bf16 (z1, z2, 2), -+ z0 = svext (z1, z2, 2)) -+ -+/* -+** ext_3_bf16: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #6 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_3_bf16, svbfloat16_t, -+ z0 = svext_bf16 (z1, z2, 3), -+ z0 = svext (z1, z2, 3)) -+ -+/* -+** ext_127_bf16: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #254 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_127_bf16, svbfloat16_t, -+ z0 = svext_bf16 (z1, z2, 127), -+ z0 = svext (z1, z2, 127)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_f16.c -new file mode 100644 -index 000000000..d8edccb9f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_f16.c -@@ -0,0 +1,73 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ext_0_f16_tied1: -+** ext z0\.b, z0\.b, z1\.b, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_0_f16_tied1, svfloat16_t, -+ z0 = svext_f16 (z0, z1, 0), -+ z0 = svext (z0, z1, 0)) -+ -+/* -+** ext_0_f16_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, \1\.b, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_0_f16_tied2, svfloat16_t, -+ z0 = svext_f16 (z1, z0, 0), -+ z0 = svext (z1, z0, 0)) -+ -+/* -+** ext_0_f16_untied: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_0_f16_untied, svfloat16_t, -+ z0 = svext_f16 (z1, z2, 0), -+ z0 = svext (z1, z2, 0)) -+ -+/* -+** ext_1_f16: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_1_f16, svfloat16_t, -+ z0 = svext_f16 (z1, z2, 1), -+ z0 = svext (z1, z2, 1)) -+ -+/* -+** ext_2_f16: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #4 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_2_f16, svfloat16_t, -+ z0 = svext_f16 (z1, z2, 2), -+ z0 = svext (z1, z2, 2)) -+ -+/* -+** ext_3_f16: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #6 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_3_f16, svfloat16_t, -+ z0 = svext_f16 (z1, z2, 3), -+ z0 = svext (z1, z2, 3)) -+ -+/* -+** ext_127_f16: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #254 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_127_f16, svfloat16_t, -+ z0 = svext_f16 (z1, z2, 127), -+ z0 = svext (z1, z2, 127)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_f32.c -new file mode 100644 -index 000000000..c00ea06fb ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_f32.c -@@ -0,0 +1,73 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ext_0_f32_tied1: -+** ext z0\.b, z0\.b, z1\.b, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_0_f32_tied1, svfloat32_t, -+ z0 = svext_f32 (z0, z1, 0), -+ z0 = svext (z0, z1, 0)) -+ -+/* -+** ext_0_f32_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, \1\.b, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_0_f32_tied2, svfloat32_t, -+ z0 = svext_f32 (z1, z0, 0), -+ z0 = svext (z1, z0, 0)) -+ -+/* -+** ext_0_f32_untied: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_0_f32_untied, svfloat32_t, -+ z0 = svext_f32 (z1, z2, 0), -+ z0 = svext (z1, z2, 0)) -+ -+/* -+** ext_1_f32: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #4 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_1_f32, svfloat32_t, -+ z0 = svext_f32 (z1, z2, 1), -+ z0 = svext (z1, z2, 1)) -+ -+/* -+** ext_2_f32: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #8 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_2_f32, svfloat32_t, -+ z0 = svext_f32 (z1, z2, 2), -+ z0 = svext (z1, z2, 2)) -+ -+/* -+** ext_3_f32: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #12 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_3_f32, svfloat32_t, -+ z0 = svext_f32 (z1, z2, 3), -+ z0 = svext (z1, z2, 3)) -+ -+/* -+** ext_63_f32: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #252 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_63_f32, svfloat32_t, -+ z0 = svext_f32 (z1, z2, 63), -+ z0 = svext (z1, z2, 63)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_f64.c -new file mode 100644 -index 000000000..af72870ca ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_f64.c -@@ -0,0 +1,73 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ext_0_f64_tied1: -+** ext z0\.b, z0\.b, z1\.b, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_0_f64_tied1, svfloat64_t, -+ z0 = svext_f64 (z0, z1, 0), -+ z0 = svext (z0, z1, 0)) -+ -+/* -+** ext_0_f64_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, \1\.b, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_0_f64_tied2, svfloat64_t, -+ z0 = svext_f64 (z1, z0, 0), -+ z0 = svext (z1, z0, 0)) -+ -+/* -+** ext_0_f64_untied: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_0_f64_untied, svfloat64_t, -+ z0 = svext_f64 (z1, z2, 0), -+ z0 = svext (z1, z2, 0)) -+ -+/* -+** ext_1_f64: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #8 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_1_f64, svfloat64_t, -+ z0 = svext_f64 (z1, z2, 1), -+ z0 = svext (z1, z2, 1)) -+ -+/* -+** ext_2_f64: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #16 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_2_f64, svfloat64_t, -+ z0 = svext_f64 (z1, z2, 2), -+ z0 = svext (z1, z2, 2)) -+ -+/* -+** ext_3_f64: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #24 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_3_f64, svfloat64_t, -+ z0 = svext_f64 (z1, z2, 3), -+ z0 = svext (z1, z2, 3)) -+ -+/* -+** ext_31_f64: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #248 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_31_f64, svfloat64_t, -+ z0 = svext_f64 (z1, z2, 31), -+ z0 = svext (z1, z2, 31)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_s16.c -new file mode 100644 -index 000000000..a7c4484ac ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_s16.c -@@ -0,0 +1,73 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ext_0_s16_tied1: -+** ext z0\.b, z0\.b, z1\.b, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_0_s16_tied1, svint16_t, -+ z0 = svext_s16 (z0, z1, 0), -+ z0 = svext (z0, z1, 0)) -+ -+/* -+** ext_0_s16_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, \1\.b, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_0_s16_tied2, svint16_t, -+ z0 = svext_s16 (z1, z0, 0), -+ z0 = svext (z1, z0, 0)) -+ -+/* -+** ext_0_s16_untied: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_0_s16_untied, svint16_t, -+ z0 = svext_s16 (z1, z2, 0), -+ z0 = svext (z1, z2, 0)) -+ -+/* -+** ext_1_s16: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_1_s16, svint16_t, -+ z0 = svext_s16 (z1, z2, 1), -+ z0 = svext (z1, z2, 1)) -+ -+/* -+** ext_2_s16: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #4 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_2_s16, svint16_t, -+ z0 = svext_s16 (z1, z2, 2), -+ z0 = svext (z1, z2, 2)) -+ -+/* -+** ext_3_s16: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #6 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_3_s16, svint16_t, -+ z0 = svext_s16 (z1, z2, 3), -+ z0 = svext (z1, z2, 3)) -+ -+/* -+** ext_127_s16: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #254 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_127_s16, svint16_t, -+ z0 = svext_s16 (z1, z2, 127), -+ z0 = svext (z1, z2, 127)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_s32.c -new file mode 100644 -index 000000000..68242a9ec ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_s32.c -@@ -0,0 +1,73 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ext_0_s32_tied1: -+** ext z0\.b, z0\.b, z1\.b, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_0_s32_tied1, svint32_t, -+ z0 = svext_s32 (z0, z1, 0), -+ z0 = svext (z0, z1, 0)) -+ -+/* -+** ext_0_s32_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, \1\.b, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_0_s32_tied2, svint32_t, -+ z0 = svext_s32 (z1, z0, 0), -+ z0 = svext (z1, z0, 0)) -+ -+/* -+** ext_0_s32_untied: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_0_s32_untied, svint32_t, -+ z0 = svext_s32 (z1, z2, 0), -+ z0 = svext (z1, z2, 0)) -+ -+/* -+** ext_1_s32: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #4 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_1_s32, svint32_t, -+ z0 = svext_s32 (z1, z2, 1), -+ z0 = svext (z1, z2, 1)) -+ -+/* -+** ext_2_s32: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #8 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_2_s32, svint32_t, -+ z0 = svext_s32 (z1, z2, 2), -+ z0 = svext (z1, z2, 2)) -+ -+/* -+** ext_3_s32: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #12 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_3_s32, svint32_t, -+ z0 = svext_s32 (z1, z2, 3), -+ z0 = svext (z1, z2, 3)) -+ -+/* -+** ext_63_s32: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #252 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_63_s32, svint32_t, -+ z0 = svext_s32 (z1, z2, 63), -+ z0 = svext (z1, z2, 63)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_s64.c -new file mode 100644 -index 000000000..8bdbd0561 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_s64.c -@@ -0,0 +1,73 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ext_0_s64_tied1: -+** ext z0\.b, z0\.b, z1\.b, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_0_s64_tied1, svint64_t, -+ z0 = svext_s64 (z0, z1, 0), -+ z0 = svext (z0, z1, 0)) -+ -+/* -+** ext_0_s64_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, \1\.b, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_0_s64_tied2, svint64_t, -+ z0 = svext_s64 (z1, z0, 0), -+ z0 = svext (z1, z0, 0)) -+ -+/* -+** ext_0_s64_untied: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_0_s64_untied, svint64_t, -+ z0 = svext_s64 (z1, z2, 0), -+ z0 = svext (z1, z2, 0)) -+ -+/* -+** ext_1_s64: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #8 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_1_s64, svint64_t, -+ z0 = svext_s64 (z1, z2, 1), -+ z0 = svext (z1, z2, 1)) -+ -+/* -+** ext_2_s64: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #16 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_2_s64, svint64_t, -+ z0 = svext_s64 (z1, z2, 2), -+ z0 = svext (z1, z2, 2)) -+ -+/* -+** ext_3_s64: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #24 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_3_s64, svint64_t, -+ z0 = svext_s64 (z1, z2, 3), -+ z0 = svext (z1, z2, 3)) -+ -+/* -+** ext_31_s64: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #248 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_31_s64, svint64_t, -+ z0 = svext_s64 (z1, z2, 31), -+ z0 = svext (z1, z2, 31)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_s8.c -new file mode 100644 -index 000000000..52490f00e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_s8.c -@@ -0,0 +1,73 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ext_0_s8_tied1: -+** ext z0\.b, z0\.b, z1\.b, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_0_s8_tied1, svint8_t, -+ z0 = svext_s8 (z0, z1, 0), -+ z0 = svext (z0, z1, 0)) -+ -+/* -+** ext_0_s8_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, \1\.b, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_0_s8_tied2, svint8_t, -+ z0 = svext_s8 (z1, z0, 0), -+ z0 = svext (z1, z0, 0)) -+ -+/* -+** ext_0_s8_untied: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_0_s8_untied, svint8_t, -+ z0 = svext_s8 (z1, z2, 0), -+ z0 = svext (z1, z2, 0)) -+ -+/* -+** ext_1_s8: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_1_s8, svint8_t, -+ z0 = svext_s8 (z1, z2, 1), -+ z0 = svext (z1, z2, 1)) -+ -+/* -+** ext_2_s8: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_2_s8, svint8_t, -+ z0 = svext_s8 (z1, z2, 2), -+ z0 = svext (z1, z2, 2)) -+ -+/* -+** ext_3_s8: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #3 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_3_s8, svint8_t, -+ z0 = svext_s8 (z1, z2, 3), -+ z0 = svext (z1, z2, 3)) -+ -+/* -+** ext_255_s8: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_255_s8, svint8_t, -+ z0 = svext_s8 (z1, z2, 255), -+ z0 = svext (z1, z2, 255)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_u16.c -new file mode 100644 -index 000000000..dc7574ffa ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_u16.c -@@ -0,0 +1,73 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ext_0_u16_tied1: -+** ext z0\.b, z0\.b, z1\.b, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_0_u16_tied1, svuint16_t, -+ z0 = svext_u16 (z0, z1, 0), -+ z0 = svext (z0, z1, 0)) -+ -+/* -+** ext_0_u16_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, \1\.b, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_0_u16_tied2, svuint16_t, -+ z0 = svext_u16 (z1, z0, 0), -+ z0 = svext (z1, z0, 0)) -+ -+/* -+** ext_0_u16_untied: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_0_u16_untied, svuint16_t, -+ z0 = svext_u16 (z1, z2, 0), -+ z0 = svext (z1, z2, 0)) -+ -+/* -+** ext_1_u16: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_1_u16, svuint16_t, -+ z0 = svext_u16 (z1, z2, 1), -+ z0 = svext (z1, z2, 1)) -+ -+/* -+** ext_2_u16: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #4 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_2_u16, svuint16_t, -+ z0 = svext_u16 (z1, z2, 2), -+ z0 = svext (z1, z2, 2)) -+ -+/* -+** ext_3_u16: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #6 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_3_u16, svuint16_t, -+ z0 = svext_u16 (z1, z2, 3), -+ z0 = svext (z1, z2, 3)) -+ -+/* -+** ext_127_u16: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #254 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_127_u16, svuint16_t, -+ z0 = svext_u16 (z1, z2, 127), -+ z0 = svext (z1, z2, 127)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_u32.c -new file mode 100644 -index 000000000..0d417fc43 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_u32.c -@@ -0,0 +1,73 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ext_0_u32_tied1: -+** ext z0\.b, z0\.b, z1\.b, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_0_u32_tied1, svuint32_t, -+ z0 = svext_u32 (z0, z1, 0), -+ z0 = svext (z0, z1, 0)) -+ -+/* -+** ext_0_u32_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, \1\.b, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_0_u32_tied2, svuint32_t, -+ z0 = svext_u32 (z1, z0, 0), -+ z0 = svext (z1, z0, 0)) -+ -+/* -+** ext_0_u32_untied: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_0_u32_untied, svuint32_t, -+ z0 = svext_u32 (z1, z2, 0), -+ z0 = svext (z1, z2, 0)) -+ -+/* -+** ext_1_u32: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #4 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_1_u32, svuint32_t, -+ z0 = svext_u32 (z1, z2, 1), -+ z0 = svext (z1, z2, 1)) -+ -+/* -+** ext_2_u32: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #8 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_2_u32, svuint32_t, -+ z0 = svext_u32 (z1, z2, 2), -+ z0 = svext (z1, z2, 2)) -+ -+/* -+** ext_3_u32: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #12 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_3_u32, svuint32_t, -+ z0 = svext_u32 (z1, z2, 3), -+ z0 = svext (z1, z2, 3)) -+ -+/* -+** ext_63_u32: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #252 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_63_u32, svuint32_t, -+ z0 = svext_u32 (z1, z2, 63), -+ z0 = svext (z1, z2, 63)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_u64.c -new file mode 100644 -index 000000000..ed81f811e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_u64.c -@@ -0,0 +1,73 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ext_0_u64_tied1: -+** ext z0\.b, z0\.b, z1\.b, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_0_u64_tied1, svuint64_t, -+ z0 = svext_u64 (z0, z1, 0), -+ z0 = svext (z0, z1, 0)) -+ -+/* -+** ext_0_u64_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, \1\.b, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_0_u64_tied2, svuint64_t, -+ z0 = svext_u64 (z1, z0, 0), -+ z0 = svext (z1, z0, 0)) -+ -+/* -+** ext_0_u64_untied: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_0_u64_untied, svuint64_t, -+ z0 = svext_u64 (z1, z2, 0), -+ z0 = svext (z1, z2, 0)) -+ -+/* -+** ext_1_u64: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #8 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_1_u64, svuint64_t, -+ z0 = svext_u64 (z1, z2, 1), -+ z0 = svext (z1, z2, 1)) -+ -+/* -+** ext_2_u64: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #16 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_2_u64, svuint64_t, -+ z0 = svext_u64 (z1, z2, 2), -+ z0 = svext (z1, z2, 2)) -+ -+/* -+** ext_3_u64: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #24 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_3_u64, svuint64_t, -+ z0 = svext_u64 (z1, z2, 3), -+ z0 = svext (z1, z2, 3)) -+ -+/* -+** ext_31_u64: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #248 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_31_u64, svuint64_t, -+ z0 = svext_u64 (z1, z2, 31), -+ z0 = svext (z1, z2, 31)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_u8.c -new file mode 100644 -index 000000000..6c061406b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ext_u8.c -@@ -0,0 +1,73 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ext_0_u8_tied1: -+** ext z0\.b, z0\.b, z1\.b, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_0_u8_tied1, svuint8_t, -+ z0 = svext_u8 (z0, z1, 0), -+ z0 = svext (z0, z1, 0)) -+ -+/* -+** ext_0_u8_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, \1\.b, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_0_u8_tied2, svuint8_t, -+ z0 = svext_u8 (z1, z0, 0), -+ z0 = svext (z1, z0, 0)) -+ -+/* -+** ext_0_u8_untied: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_0_u8_untied, svuint8_t, -+ z0 = svext_u8 (z1, z2, 0), -+ z0 = svext (z1, z2, 0)) -+ -+/* -+** ext_1_u8: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_1_u8, svuint8_t, -+ z0 = svext_u8 (z1, z2, 1), -+ z0 = svext (z1, z2, 1)) -+ -+/* -+** ext_2_u8: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_2_u8, svuint8_t, -+ z0 = svext_u8 (z1, z2, 2), -+ z0 = svext (z1, z2, 2)) -+ -+/* -+** ext_3_u8: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #3 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_3_u8, svuint8_t, -+ z0 = svext_u8 (z1, z2, 3), -+ z0 = svext (z1, z2, 3)) -+ -+/* -+** ext_255_u8: -+** movprfx z0, z1 -+** ext z0\.b, z0\.b, z2\.b, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (ext_255_u8, svuint8_t, -+ z0 = svext_u8 (z1, z2, 255), -+ z0 = svext (z1, z2, 255)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extb_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extb_s16.c -new file mode 100644 -index 000000000..32e836f01 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extb_s16.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** extb_s16_m_tied12: -+** sxtb z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (extb_s16_m_tied12, svint16_t, -+ z0 = svextb_s16_m (z0, p0, z0), -+ z0 = svextb_m (z0, p0, z0)) -+ -+/* -+** extb_s16_m_tied1: -+** sxtb z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (extb_s16_m_tied1, svint16_t, -+ z0 = svextb_s16_m (z0, p0, z1), -+ z0 = svextb_m (z0, p0, z1)) -+ -+/* -+** extb_s16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** sxtb z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (extb_s16_m_tied2, svint16_t, -+ z0 = svextb_s16_m (z1, p0, z0), -+ z0 = svextb_m (z1, p0, z0)) -+ -+/* -+** extb_s16_m_untied: -+** movprfx z0, z2 -+** sxtb z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (extb_s16_m_untied, svint16_t, -+ z0 = svextb_s16_m (z2, p0, z1), -+ z0 = svextb_m (z2, p0, z1)) -+ -+/* -+** extb_s16_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.h, p0/z, \1\.h -+** sxtb z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (extb_s16_z_tied1, svint16_t, -+ z0 = svextb_s16_z (p0, z0), -+ z0 = svextb_z (p0, z0)) -+ -+/* -+** extb_s16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** sxtb z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (extb_s16_z_untied, svint16_t, -+ z0 = svextb_s16_z (p0, z1), -+ z0 = svextb_z (p0, z1)) -+ -+/* -+** extb_s16_x_tied1: -+** sxtb z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (extb_s16_x_tied1, svint16_t, -+ z0 = svextb_s16_x (p0, z0), -+ z0 = svextb_x (p0, z0)) -+ -+/* -+** extb_s16_x_untied: -+** sxtb z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (extb_s16_x_untied, svint16_t, -+ z0 = svextb_s16_x (p0, z1), -+ z0 = svextb_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extb_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extb_s32.c -new file mode 100644 -index 000000000..e2f13f41c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extb_s32.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** extb_s32_m_tied12: -+** sxtb z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (extb_s32_m_tied12, svint32_t, -+ z0 = svextb_s32_m (z0, p0, z0), -+ z0 = svextb_m (z0, p0, z0)) -+ -+/* -+** extb_s32_m_tied1: -+** sxtb z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (extb_s32_m_tied1, svint32_t, -+ z0 = svextb_s32_m (z0, p0, z1), -+ z0 = svextb_m (z0, p0, z1)) -+ -+/* -+** extb_s32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** sxtb z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (extb_s32_m_tied2, svint32_t, -+ z0 = svextb_s32_m (z1, p0, z0), -+ z0 = svextb_m (z1, p0, z0)) -+ -+/* -+** extb_s32_m_untied: -+** movprfx z0, z2 -+** sxtb z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (extb_s32_m_untied, svint32_t, -+ z0 = svextb_s32_m (z2, p0, z1), -+ z0 = svextb_m (z2, p0, z1)) -+ -+/* -+** extb_s32_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, \1\.s -+** sxtb z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (extb_s32_z_tied1, svint32_t, -+ z0 = svextb_s32_z (p0, z0), -+ z0 = svextb_z (p0, z0)) -+ -+/* -+** extb_s32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** sxtb z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (extb_s32_z_untied, svint32_t, -+ z0 = svextb_s32_z (p0, z1), -+ z0 = svextb_z (p0, z1)) -+ -+/* -+** extb_s32_x_tied1: -+** sxtb z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (extb_s32_x_tied1, svint32_t, -+ z0 = svextb_s32_x (p0, z0), -+ z0 = svextb_x (p0, z0)) -+ -+/* -+** extb_s32_x_untied: -+** sxtb z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (extb_s32_x_untied, svint32_t, -+ z0 = svextb_s32_x (p0, z1), -+ z0 = svextb_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extb_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extb_s64.c -new file mode 100644 -index 000000000..83363efdb ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extb_s64.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** extb_s64_m_tied12: -+** sxtb z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (extb_s64_m_tied12, svint64_t, -+ z0 = svextb_s64_m (z0, p0, z0), -+ z0 = svextb_m (z0, p0, z0)) -+ -+/* -+** extb_s64_m_tied1: -+** sxtb z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (extb_s64_m_tied1, svint64_t, -+ z0 = svextb_s64_m (z0, p0, z1), -+ z0 = svextb_m (z0, p0, z1)) -+ -+/* -+** extb_s64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** sxtb z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (extb_s64_m_tied2, svint64_t, -+ z0 = svextb_s64_m (z1, p0, z0), -+ z0 = svextb_m (z1, p0, z0)) -+ -+/* -+** extb_s64_m_untied: -+** movprfx z0, z2 -+** sxtb z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (extb_s64_m_untied, svint64_t, -+ z0 = svextb_s64_m (z2, p0, z1), -+ z0 = svextb_m (z2, p0, z1)) -+ -+/* -+** extb_s64_z_tied1: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** sxtb z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (extb_s64_z_tied1, svint64_t, -+ z0 = svextb_s64_z (p0, z0), -+ z0 = svextb_z (p0, z0)) -+ -+/* -+** extb_s64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** sxtb z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (extb_s64_z_untied, svint64_t, -+ z0 = svextb_s64_z (p0, z1), -+ z0 = svextb_z (p0, z1)) -+ -+/* -+** extb_s64_x_tied1: -+** sxtb z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (extb_s64_x_tied1, svint64_t, -+ z0 = svextb_s64_x (p0, z0), -+ z0 = svextb_x (p0, z0)) -+ -+/* -+** extb_s64_x_untied: -+** sxtb z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (extb_s64_x_untied, svint64_t, -+ z0 = svextb_s64_x (p0, z1), -+ z0 = svextb_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extb_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extb_u16.c -new file mode 100644 -index 000000000..d806edfaa ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extb_u16.c -@@ -0,0 +1,82 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** extb_u16_m_tied12: -+** uxtb z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (extb_u16_m_tied12, svuint16_t, -+ z0 = svextb_u16_m (z0, p0, z0), -+ z0 = svextb_m (z0, p0, z0)) -+ -+/* -+** extb_u16_m_tied1: -+** uxtb z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (extb_u16_m_tied1, svuint16_t, -+ z0 = svextb_u16_m (z0, p0, z1), -+ z0 = svextb_m (z0, p0, z1)) -+ -+/* -+** extb_u16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** uxtb z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (extb_u16_m_tied2, svuint16_t, -+ z0 = svextb_u16_m (z1, p0, z0), -+ z0 = svextb_m (z1, p0, z0)) -+ -+/* -+** extb_u16_m_untied: -+** movprfx z0, z2 -+** uxtb z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (extb_u16_m_untied, svuint16_t, -+ z0 = svextb_u16_m (z2, p0, z1), -+ z0 = svextb_m (z2, p0, z1)) -+ -+/* -+** extb_u16_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.h, p0/z, \1\.h -+** uxtb z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (extb_u16_z_tied1, svuint16_t, -+ z0 = svextb_u16_z (p0, z0), -+ z0 = svextb_z (p0, z0)) -+ -+/* -+** extb_u16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** uxtb z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (extb_u16_z_untied, svuint16_t, -+ z0 = svextb_u16_z (p0, z1), -+ z0 = svextb_z (p0, z1)) -+ -+/* -+** extb_u16_x_tied1: -+** and z0\.h, z0\.h, #0xff -+** ret -+*/ -+TEST_UNIFORM_Z (extb_u16_x_tied1, svuint16_t, -+ z0 = svextb_u16_x (p0, z0), -+ z0 = svextb_x (p0, z0)) -+ -+/* -+** extb_u16_x_untied: -+** movprfx z0, z1 -+** and z0\.h, z0\.h, #0xff -+** ret -+*/ -+TEST_UNIFORM_Z (extb_u16_x_untied, svuint16_t, -+ z0 = svextb_u16_x (p0, z1), -+ z0 = svextb_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extb_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extb_u32.c -new file mode 100644 -index 000000000..274656dbd ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extb_u32.c -@@ -0,0 +1,82 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** extb_u32_m_tied12: -+** uxtb z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (extb_u32_m_tied12, svuint32_t, -+ z0 = svextb_u32_m (z0, p0, z0), -+ z0 = svextb_m (z0, p0, z0)) -+ -+/* -+** extb_u32_m_tied1: -+** uxtb z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (extb_u32_m_tied1, svuint32_t, -+ z0 = svextb_u32_m (z0, p0, z1), -+ z0 = svextb_m (z0, p0, z1)) -+ -+/* -+** extb_u32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** uxtb z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (extb_u32_m_tied2, svuint32_t, -+ z0 = svextb_u32_m (z1, p0, z0), -+ z0 = svextb_m (z1, p0, z0)) -+ -+/* -+** extb_u32_m_untied: -+** movprfx z0, z2 -+** uxtb z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (extb_u32_m_untied, svuint32_t, -+ z0 = svextb_u32_m (z2, p0, z1), -+ z0 = svextb_m (z2, p0, z1)) -+ -+/* -+** extb_u32_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, \1\.s -+** uxtb z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (extb_u32_z_tied1, svuint32_t, -+ z0 = svextb_u32_z (p0, z0), -+ z0 = svextb_z (p0, z0)) -+ -+/* -+** extb_u32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** uxtb z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (extb_u32_z_untied, svuint32_t, -+ z0 = svextb_u32_z (p0, z1), -+ z0 = svextb_z (p0, z1)) -+ -+/* -+** extb_u32_x_tied1: -+** and z0\.s, z0\.s, #0xff -+** ret -+*/ -+TEST_UNIFORM_Z (extb_u32_x_tied1, svuint32_t, -+ z0 = svextb_u32_x (p0, z0), -+ z0 = svextb_x (p0, z0)) -+ -+/* -+** extb_u32_x_untied: -+** movprfx z0, z1 -+** and z0\.s, z0\.s, #0xff -+** ret -+*/ -+TEST_UNIFORM_Z (extb_u32_x_untied, svuint32_t, -+ z0 = svextb_u32_x (p0, z1), -+ z0 = svextb_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extb_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extb_u64.c -new file mode 100644 -index 000000000..de24cc605 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extb_u64.c -@@ -0,0 +1,82 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** extb_u64_m_tied12: -+** uxtb z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (extb_u64_m_tied12, svuint64_t, -+ z0 = svextb_u64_m (z0, p0, z0), -+ z0 = svextb_m (z0, p0, z0)) -+ -+/* -+** extb_u64_m_tied1: -+** uxtb z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (extb_u64_m_tied1, svuint64_t, -+ z0 = svextb_u64_m (z0, p0, z1), -+ z0 = svextb_m (z0, p0, z1)) -+ -+/* -+** extb_u64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** uxtb z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (extb_u64_m_tied2, svuint64_t, -+ z0 = svextb_u64_m (z1, p0, z0), -+ z0 = svextb_m (z1, p0, z0)) -+ -+/* -+** extb_u64_m_untied: -+** movprfx z0, z2 -+** uxtb z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (extb_u64_m_untied, svuint64_t, -+ z0 = svextb_u64_m (z2, p0, z1), -+ z0 = svextb_m (z2, p0, z1)) -+ -+/* -+** extb_u64_z_tied1: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** uxtb z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (extb_u64_z_tied1, svuint64_t, -+ z0 = svextb_u64_z (p0, z0), -+ z0 = svextb_z (p0, z0)) -+ -+/* -+** extb_u64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** uxtb z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (extb_u64_z_untied, svuint64_t, -+ z0 = svextb_u64_z (p0, z1), -+ z0 = svextb_z (p0, z1)) -+ -+/* -+** extb_u64_x_tied1: -+** and z0\.d, z0\.d, #0xff -+** ret -+*/ -+TEST_UNIFORM_Z (extb_u64_x_tied1, svuint64_t, -+ z0 = svextb_u64_x (p0, z0), -+ z0 = svextb_x (p0, z0)) -+ -+/* -+** extb_u64_x_untied: -+** movprfx z0, z1 -+** and z0\.d, z0\.d, #0xff -+** ret -+*/ -+TEST_UNIFORM_Z (extb_u64_x_untied, svuint64_t, -+ z0 = svextb_u64_x (p0, z1), -+ z0 = svextb_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/exth_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/exth_s32.c -new file mode 100644 -index 000000000..3bb0bf31f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/exth_s32.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** exth_s32_m_tied12: -+** sxth z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (exth_s32_m_tied12, svint32_t, -+ z0 = svexth_s32_m (z0, p0, z0), -+ z0 = svexth_m (z0, p0, z0)) -+ -+/* -+** exth_s32_m_tied1: -+** sxth z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (exth_s32_m_tied1, svint32_t, -+ z0 = svexth_s32_m (z0, p0, z1), -+ z0 = svexth_m (z0, p0, z1)) -+ -+/* -+** exth_s32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** sxth z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (exth_s32_m_tied2, svint32_t, -+ z0 = svexth_s32_m (z1, p0, z0), -+ z0 = svexth_m (z1, p0, z0)) -+ -+/* -+** exth_s32_m_untied: -+** movprfx z0, z2 -+** sxth z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (exth_s32_m_untied, svint32_t, -+ z0 = svexth_s32_m (z2, p0, z1), -+ z0 = svexth_m (z2, p0, z1)) -+ -+/* -+** exth_s32_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, \1\.s -+** sxth z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (exth_s32_z_tied1, svint32_t, -+ z0 = svexth_s32_z (p0, z0), -+ z0 = svexth_z (p0, z0)) -+ -+/* -+** exth_s32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** sxth z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (exth_s32_z_untied, svint32_t, -+ z0 = svexth_s32_z (p0, z1), -+ z0 = svexth_z (p0, z1)) -+ -+/* -+** exth_s32_x_tied1: -+** sxth z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (exth_s32_x_tied1, svint32_t, -+ z0 = svexth_s32_x (p0, z0), -+ z0 = svexth_x (p0, z0)) -+ -+/* -+** exth_s32_x_untied: -+** sxth z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (exth_s32_x_untied, svint32_t, -+ z0 = svexth_s32_x (p0, z1), -+ z0 = svexth_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/exth_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/exth_s64.c -new file mode 100644 -index 000000000..0718b67ad ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/exth_s64.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** exth_s64_m_tied12: -+** sxth z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (exth_s64_m_tied12, svint64_t, -+ z0 = svexth_s64_m (z0, p0, z0), -+ z0 = svexth_m (z0, p0, z0)) -+ -+/* -+** exth_s64_m_tied1: -+** sxth z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (exth_s64_m_tied1, svint64_t, -+ z0 = svexth_s64_m (z0, p0, z1), -+ z0 = svexth_m (z0, p0, z1)) -+ -+/* -+** exth_s64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** sxth z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (exth_s64_m_tied2, svint64_t, -+ z0 = svexth_s64_m (z1, p0, z0), -+ z0 = svexth_m (z1, p0, z0)) -+ -+/* -+** exth_s64_m_untied: -+** movprfx z0, z2 -+** sxth z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (exth_s64_m_untied, svint64_t, -+ z0 = svexth_s64_m (z2, p0, z1), -+ z0 = svexth_m (z2, p0, z1)) -+ -+/* -+** exth_s64_z_tied1: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** sxth z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (exth_s64_z_tied1, svint64_t, -+ z0 = svexth_s64_z (p0, z0), -+ z0 = svexth_z (p0, z0)) -+ -+/* -+** exth_s64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** sxth z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (exth_s64_z_untied, svint64_t, -+ z0 = svexth_s64_z (p0, z1), -+ z0 = svexth_z (p0, z1)) -+ -+/* -+** exth_s64_x_tied1: -+** sxth z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (exth_s64_x_tied1, svint64_t, -+ z0 = svexth_s64_x (p0, z0), -+ z0 = svexth_x (p0, z0)) -+ -+/* -+** exth_s64_x_untied: -+** sxth z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (exth_s64_x_untied, svint64_t, -+ z0 = svexth_s64_x (p0, z1), -+ z0 = svexth_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/exth_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/exth_u32.c -new file mode 100644 -index 000000000..1ba7fc8c3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/exth_u32.c -@@ -0,0 +1,82 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** exth_u32_m_tied12: -+** uxth z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (exth_u32_m_tied12, svuint32_t, -+ z0 = svexth_u32_m (z0, p0, z0), -+ z0 = svexth_m (z0, p0, z0)) -+ -+/* -+** exth_u32_m_tied1: -+** uxth z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (exth_u32_m_tied1, svuint32_t, -+ z0 = svexth_u32_m (z0, p0, z1), -+ z0 = svexth_m (z0, p0, z1)) -+ -+/* -+** exth_u32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** uxth z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (exth_u32_m_tied2, svuint32_t, -+ z0 = svexth_u32_m (z1, p0, z0), -+ z0 = svexth_m (z1, p0, z0)) -+ -+/* -+** exth_u32_m_untied: -+** movprfx z0, z2 -+** uxth z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (exth_u32_m_untied, svuint32_t, -+ z0 = svexth_u32_m (z2, p0, z1), -+ z0 = svexth_m (z2, p0, z1)) -+ -+/* -+** exth_u32_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, \1\.s -+** uxth z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (exth_u32_z_tied1, svuint32_t, -+ z0 = svexth_u32_z (p0, z0), -+ z0 = svexth_z (p0, z0)) -+ -+/* -+** exth_u32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** uxth z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (exth_u32_z_untied, svuint32_t, -+ z0 = svexth_u32_z (p0, z1), -+ z0 = svexth_z (p0, z1)) -+ -+/* -+** exth_u32_x_tied1: -+** and z0\.s, z0\.s, #0xffff -+** ret -+*/ -+TEST_UNIFORM_Z (exth_u32_x_tied1, svuint32_t, -+ z0 = svexth_u32_x (p0, z0), -+ z0 = svexth_x (p0, z0)) -+ -+/* -+** exth_u32_x_untied: -+** movprfx z0, z1 -+** and z0\.s, z0\.s, #0xffff -+** ret -+*/ -+TEST_UNIFORM_Z (exth_u32_x_untied, svuint32_t, -+ z0 = svexth_u32_x (p0, z1), -+ z0 = svexth_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/exth_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/exth_u64.c -new file mode 100644 -index 000000000..1555cf0b7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/exth_u64.c -@@ -0,0 +1,82 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** exth_u64_m_tied12: -+** uxth z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (exth_u64_m_tied12, svuint64_t, -+ z0 = svexth_u64_m (z0, p0, z0), -+ z0 = svexth_m (z0, p0, z0)) -+ -+/* -+** exth_u64_m_tied1: -+** uxth z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (exth_u64_m_tied1, svuint64_t, -+ z0 = svexth_u64_m (z0, p0, z1), -+ z0 = svexth_m (z0, p0, z1)) -+ -+/* -+** exth_u64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** uxth z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (exth_u64_m_tied2, svuint64_t, -+ z0 = svexth_u64_m (z1, p0, z0), -+ z0 = svexth_m (z1, p0, z0)) -+ -+/* -+** exth_u64_m_untied: -+** movprfx z0, z2 -+** uxth z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (exth_u64_m_untied, svuint64_t, -+ z0 = svexth_u64_m (z2, p0, z1), -+ z0 = svexth_m (z2, p0, z1)) -+ -+/* -+** exth_u64_z_tied1: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** uxth z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (exth_u64_z_tied1, svuint64_t, -+ z0 = svexth_u64_z (p0, z0), -+ z0 = svexth_z (p0, z0)) -+ -+/* -+** exth_u64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** uxth z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (exth_u64_z_untied, svuint64_t, -+ z0 = svexth_u64_z (p0, z1), -+ z0 = svexth_z (p0, z1)) -+ -+/* -+** exth_u64_x_tied1: -+** and z0\.d, z0\.d, #0xffff -+** ret -+*/ -+TEST_UNIFORM_Z (exth_u64_x_tied1, svuint64_t, -+ z0 = svexth_u64_x (p0, z0), -+ z0 = svexth_x (p0, z0)) -+ -+/* -+** exth_u64_x_untied: -+** movprfx z0, z1 -+** and z0\.d, z0\.d, #0xffff -+** ret -+*/ -+TEST_UNIFORM_Z (exth_u64_x_untied, svuint64_t, -+ z0 = svexth_u64_x (p0, z1), -+ z0 = svexth_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extw_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extw_s64.c -new file mode 100644 -index 000000000..a6edadfa7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extw_s64.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** extw_s64_m_tied12: -+** sxtw z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (extw_s64_m_tied12, svint64_t, -+ z0 = svextw_s64_m (z0, p0, z0), -+ z0 = svextw_m (z0, p0, z0)) -+ -+/* -+** extw_s64_m_tied1: -+** sxtw z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (extw_s64_m_tied1, svint64_t, -+ z0 = svextw_s64_m (z0, p0, z1), -+ z0 = svextw_m (z0, p0, z1)) -+ -+/* -+** extw_s64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** sxtw z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (extw_s64_m_tied2, svint64_t, -+ z0 = svextw_s64_m (z1, p0, z0), -+ z0 = svextw_m (z1, p0, z0)) -+ -+/* -+** extw_s64_m_untied: -+** movprfx z0, z2 -+** sxtw z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (extw_s64_m_untied, svint64_t, -+ z0 = svextw_s64_m (z2, p0, z1), -+ z0 = svextw_m (z2, p0, z1)) -+ -+/* -+** extw_s64_z_tied1: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** sxtw z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (extw_s64_z_tied1, svint64_t, -+ z0 = svextw_s64_z (p0, z0), -+ z0 = svextw_z (p0, z0)) -+ -+/* -+** extw_s64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** sxtw z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (extw_s64_z_untied, svint64_t, -+ z0 = svextw_s64_z (p0, z1), -+ z0 = svextw_z (p0, z1)) -+ -+/* -+** extw_s64_x_tied1: -+** sxtw z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (extw_s64_x_tied1, svint64_t, -+ z0 = svextw_s64_x (p0, z0), -+ z0 = svextw_x (p0, z0)) -+ -+/* -+** extw_s64_x_untied: -+** sxtw z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (extw_s64_x_untied, svint64_t, -+ z0 = svextw_s64_x (p0, z1), -+ z0 = svextw_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extw_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extw_u64.c -new file mode 100644 -index 000000000..880a287f3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/extw_u64.c -@@ -0,0 +1,82 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** extw_u64_m_tied12: -+** uxtw z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (extw_u64_m_tied12, svuint64_t, -+ z0 = svextw_u64_m (z0, p0, z0), -+ z0 = svextw_m (z0, p0, z0)) -+ -+/* -+** extw_u64_m_tied1: -+** uxtw z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (extw_u64_m_tied1, svuint64_t, -+ z0 = svextw_u64_m (z0, p0, z1), -+ z0 = svextw_m (z0, p0, z1)) -+ -+/* -+** extw_u64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** uxtw z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (extw_u64_m_tied2, svuint64_t, -+ z0 = svextw_u64_m (z1, p0, z0), -+ z0 = svextw_m (z1, p0, z0)) -+ -+/* -+** extw_u64_m_untied: -+** movprfx z0, z2 -+** uxtw z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (extw_u64_m_untied, svuint64_t, -+ z0 = svextw_u64_m (z2, p0, z1), -+ z0 = svextw_m (z2, p0, z1)) -+ -+/* -+** extw_u64_z_tied1: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** uxtw z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (extw_u64_z_tied1, svuint64_t, -+ z0 = svextw_u64_z (p0, z0), -+ z0 = svextw_z (p0, z0)) -+ -+/* -+** extw_u64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** uxtw z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (extw_u64_z_untied, svuint64_t, -+ z0 = svextw_u64_z (p0, z1), -+ z0 = svextw_z (p0, z1)) -+ -+/* -+** extw_u64_x_tied1: -+** and z0\.d, z0\.d, #0xffffffff -+** ret -+*/ -+TEST_UNIFORM_Z (extw_u64_x_tied1, svuint64_t, -+ z0 = svextw_u64_x (p0, z0), -+ z0 = svextw_x (p0, z0)) -+ -+/* -+** extw_u64_x_untied: -+** movprfx z0, z1 -+** and z0\.d, z0\.d, #0xffffffff -+** ret -+*/ -+TEST_UNIFORM_Z (extw_u64_x_untied, svuint64_t, -+ z0 = svextw_u64_x (p0, z1), -+ z0 = svextw_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_bf16.c -new file mode 100644 -index 000000000..6e5c773b5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_bf16.c -@@ -0,0 +1,55 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** get2_bf16_z0_0: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_GET (get2_bf16_z0_0, svbfloat16x2_t, svbfloat16_t, -+ z0 = svget2_bf16 (z4, 0), -+ z0 = svget2 (z4, 0)) -+ -+/* -+** get2_bf16_z0_1: -+** mov z0\.d, z5\.d -+** ret -+*/ -+TEST_GET (get2_bf16_z0_1, svbfloat16x2_t, svbfloat16_t, -+ z0 = svget2_bf16 (z4, 1), -+ z0 = svget2 (z4, 1)) -+ -+/* -+** get2_bf16_z4_0: -+** ret -+*/ -+TEST_GET (get2_bf16_z4_0, svbfloat16x2_t, svbfloat16_t, -+ z4_res = svget2_bf16 (z4, 0), -+ z4_res = svget2 (z4, 0)) -+ -+/* -+** get2_bf16_z4_1: -+** mov z4\.d, z5\.d -+** ret -+*/ -+TEST_GET (get2_bf16_z4_1, svbfloat16x2_t, svbfloat16_t, -+ z4_res = svget2_bf16 (z4, 1), -+ z4_res = svget2 (z4, 1)) -+ -+/* -+** get2_bf16_z5_0: -+** mov z5\.d, z4\.d -+** ret -+*/ -+TEST_GET (get2_bf16_z5_0, svbfloat16x2_t, svbfloat16_t, -+ z5_res = svget2_bf16 (z4, 0), -+ z5_res = svget2 (z4, 0)) -+ -+/* -+** get2_bf16_z5_1: -+** ret -+*/ -+TEST_GET (get2_bf16_z5_1, svbfloat16x2_t, svbfloat16_t, -+ z5_res = svget2_bf16 (z4, 1), -+ z5_res = svget2 (z4, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_f16.c -new file mode 100644 -index 000000000..9b6379e0b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_f16.c -@@ -0,0 +1,55 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** get2_f16_z0_0: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_GET (get2_f16_z0_0, svfloat16x2_t, svfloat16_t, -+ z0 = svget2_f16 (z4, 0), -+ z0 = svget2 (z4, 0)) -+ -+/* -+** get2_f16_z0_1: -+** mov z0\.d, z5\.d -+** ret -+*/ -+TEST_GET (get2_f16_z0_1, svfloat16x2_t, svfloat16_t, -+ z0 = svget2_f16 (z4, 1), -+ z0 = svget2 (z4, 1)) -+ -+/* -+** get2_f16_z4_0: -+** ret -+*/ -+TEST_GET (get2_f16_z4_0, svfloat16x2_t, svfloat16_t, -+ z4_res = svget2_f16 (z4, 0), -+ z4_res = svget2 (z4, 0)) -+ -+/* -+** get2_f16_z4_1: -+** mov z4\.d, z5\.d -+** ret -+*/ -+TEST_GET (get2_f16_z4_1, svfloat16x2_t, svfloat16_t, -+ z4_res = svget2_f16 (z4, 1), -+ z4_res = svget2 (z4, 1)) -+ -+/* -+** get2_f16_z5_0: -+** mov z5\.d, z4\.d -+** ret -+*/ -+TEST_GET (get2_f16_z5_0, svfloat16x2_t, svfloat16_t, -+ z5_res = svget2_f16 (z4, 0), -+ z5_res = svget2 (z4, 0)) -+ -+/* -+** get2_f16_z5_1: -+** ret -+*/ -+TEST_GET (get2_f16_z5_1, svfloat16x2_t, svfloat16_t, -+ z5_res = svget2_f16 (z4, 1), -+ z5_res = svget2 (z4, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_f32.c -new file mode 100644 -index 000000000..76080dc66 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_f32.c -@@ -0,0 +1,55 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** get2_f32_z0_0: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_GET (get2_f32_z0_0, svfloat32x2_t, svfloat32_t, -+ z0 = svget2_f32 (z4, 0), -+ z0 = svget2 (z4, 0)) -+ -+/* -+** get2_f32_z0_1: -+** mov z0\.d, z5\.d -+** ret -+*/ -+TEST_GET (get2_f32_z0_1, svfloat32x2_t, svfloat32_t, -+ z0 = svget2_f32 (z4, 1), -+ z0 = svget2 (z4, 1)) -+ -+/* -+** get2_f32_z4_0: -+** ret -+*/ -+TEST_GET (get2_f32_z4_0, svfloat32x2_t, svfloat32_t, -+ z4_res = svget2_f32 (z4, 0), -+ z4_res = svget2 (z4, 0)) -+ -+/* -+** get2_f32_z4_1: -+** mov z4\.d, z5\.d -+** ret -+*/ -+TEST_GET (get2_f32_z4_1, svfloat32x2_t, svfloat32_t, -+ z4_res = svget2_f32 (z4, 1), -+ z4_res = svget2 (z4, 1)) -+ -+/* -+** get2_f32_z5_0: -+** mov z5\.d, z4\.d -+** ret -+*/ -+TEST_GET (get2_f32_z5_0, svfloat32x2_t, svfloat32_t, -+ z5_res = svget2_f32 (z4, 0), -+ z5_res = svget2 (z4, 0)) -+ -+/* -+** get2_f32_z5_1: -+** ret -+*/ -+TEST_GET (get2_f32_z5_1, svfloat32x2_t, svfloat32_t, -+ z5_res = svget2_f32 (z4, 1), -+ z5_res = svget2 (z4, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_f64.c -new file mode 100644 -index 000000000..cabe6e7de ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_f64.c -@@ -0,0 +1,55 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** get2_f64_z0_0: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_GET (get2_f64_z0_0, svfloat64x2_t, svfloat64_t, -+ z0 = svget2_f64 (z4, 0), -+ z0 = svget2 (z4, 0)) -+ -+/* -+** get2_f64_z0_1: -+** mov z0\.d, z5\.d -+** ret -+*/ -+TEST_GET (get2_f64_z0_1, svfloat64x2_t, svfloat64_t, -+ z0 = svget2_f64 (z4, 1), -+ z0 = svget2 (z4, 1)) -+ -+/* -+** get2_f64_z4_0: -+** ret -+*/ -+TEST_GET (get2_f64_z4_0, svfloat64x2_t, svfloat64_t, -+ z4_res = svget2_f64 (z4, 0), -+ z4_res = svget2 (z4, 0)) -+ -+/* -+** get2_f64_z4_1: -+** mov z4\.d, z5\.d -+** ret -+*/ -+TEST_GET (get2_f64_z4_1, svfloat64x2_t, svfloat64_t, -+ z4_res = svget2_f64 (z4, 1), -+ z4_res = svget2 (z4, 1)) -+ -+/* -+** get2_f64_z5_0: -+** mov z5\.d, z4\.d -+** ret -+*/ -+TEST_GET (get2_f64_z5_0, svfloat64x2_t, svfloat64_t, -+ z5_res = svget2_f64 (z4, 0), -+ z5_res = svget2 (z4, 0)) -+ -+/* -+** get2_f64_z5_1: -+** ret -+*/ -+TEST_GET (get2_f64_z5_1, svfloat64x2_t, svfloat64_t, -+ z5_res = svget2_f64 (z4, 1), -+ z5_res = svget2 (z4, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_s16.c -new file mode 100644 -index 000000000..387e6daad ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_s16.c -@@ -0,0 +1,55 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** get2_s16_z0_0: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_GET (get2_s16_z0_0, svint16x2_t, svint16_t, -+ z0 = svget2_s16 (z4, 0), -+ z0 = svget2 (z4, 0)) -+ -+/* -+** get2_s16_z0_1: -+** mov z0\.d, z5\.d -+** ret -+*/ -+TEST_GET (get2_s16_z0_1, svint16x2_t, svint16_t, -+ z0 = svget2_s16 (z4, 1), -+ z0 = svget2 (z4, 1)) -+ -+/* -+** get2_s16_z4_0: -+** ret -+*/ -+TEST_GET (get2_s16_z4_0, svint16x2_t, svint16_t, -+ z4_res = svget2_s16 (z4, 0), -+ z4_res = svget2 (z4, 0)) -+ -+/* -+** get2_s16_z4_1: -+** mov z4\.d, z5\.d -+** ret -+*/ -+TEST_GET (get2_s16_z4_1, svint16x2_t, svint16_t, -+ z4_res = svget2_s16 (z4, 1), -+ z4_res = svget2 (z4, 1)) -+ -+/* -+** get2_s16_z5_0: -+** mov z5\.d, z4\.d -+** ret -+*/ -+TEST_GET (get2_s16_z5_0, svint16x2_t, svint16_t, -+ z5_res = svget2_s16 (z4, 0), -+ z5_res = svget2 (z4, 0)) -+ -+/* -+** get2_s16_z5_1: -+** ret -+*/ -+TEST_GET (get2_s16_z5_1, svint16x2_t, svint16_t, -+ z5_res = svget2_s16 (z4, 1), -+ z5_res = svget2 (z4, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_s32.c -new file mode 100644 -index 000000000..5c47286e0 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_s32.c -@@ -0,0 +1,55 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** get2_s32_z0_0: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_GET (get2_s32_z0_0, svint32x2_t, svint32_t, -+ z0 = svget2_s32 (z4, 0), -+ z0 = svget2 (z4, 0)) -+ -+/* -+** get2_s32_z0_1: -+** mov z0\.d, z5\.d -+** ret -+*/ -+TEST_GET (get2_s32_z0_1, svint32x2_t, svint32_t, -+ z0 = svget2_s32 (z4, 1), -+ z0 = svget2 (z4, 1)) -+ -+/* -+** get2_s32_z4_0: -+** ret -+*/ -+TEST_GET (get2_s32_z4_0, svint32x2_t, svint32_t, -+ z4_res = svget2_s32 (z4, 0), -+ z4_res = svget2 (z4, 0)) -+ -+/* -+** get2_s32_z4_1: -+** mov z4\.d, z5\.d -+** ret -+*/ -+TEST_GET (get2_s32_z4_1, svint32x2_t, svint32_t, -+ z4_res = svget2_s32 (z4, 1), -+ z4_res = svget2 (z4, 1)) -+ -+/* -+** get2_s32_z5_0: -+** mov z5\.d, z4\.d -+** ret -+*/ -+TEST_GET (get2_s32_z5_0, svint32x2_t, svint32_t, -+ z5_res = svget2_s32 (z4, 0), -+ z5_res = svget2 (z4, 0)) -+ -+/* -+** get2_s32_z5_1: -+** ret -+*/ -+TEST_GET (get2_s32_z5_1, svint32x2_t, svint32_t, -+ z5_res = svget2_s32 (z4, 1), -+ z5_res = svget2 (z4, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_s64.c -new file mode 100644 -index 000000000..18f930d4c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_s64.c -@@ -0,0 +1,55 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** get2_s64_z0_0: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_GET (get2_s64_z0_0, svint64x2_t, svint64_t, -+ z0 = svget2_s64 (z4, 0), -+ z0 = svget2 (z4, 0)) -+ -+/* -+** get2_s64_z0_1: -+** mov z0\.d, z5\.d -+** ret -+*/ -+TEST_GET (get2_s64_z0_1, svint64x2_t, svint64_t, -+ z0 = svget2_s64 (z4, 1), -+ z0 = svget2 (z4, 1)) -+ -+/* -+** get2_s64_z4_0: -+** ret -+*/ -+TEST_GET (get2_s64_z4_0, svint64x2_t, svint64_t, -+ z4_res = svget2_s64 (z4, 0), -+ z4_res = svget2 (z4, 0)) -+ -+/* -+** get2_s64_z4_1: -+** mov z4\.d, z5\.d -+** ret -+*/ -+TEST_GET (get2_s64_z4_1, svint64x2_t, svint64_t, -+ z4_res = svget2_s64 (z4, 1), -+ z4_res = svget2 (z4, 1)) -+ -+/* -+** get2_s64_z5_0: -+** mov z5\.d, z4\.d -+** ret -+*/ -+TEST_GET (get2_s64_z5_0, svint64x2_t, svint64_t, -+ z5_res = svget2_s64 (z4, 0), -+ z5_res = svget2 (z4, 0)) -+ -+/* -+** get2_s64_z5_1: -+** ret -+*/ -+TEST_GET (get2_s64_z5_1, svint64x2_t, svint64_t, -+ z5_res = svget2_s64 (z4, 1), -+ z5_res = svget2 (z4, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_s8.c -new file mode 100644 -index 000000000..27e2cfafb ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_s8.c -@@ -0,0 +1,55 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** get2_s8_z0_0: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_GET (get2_s8_z0_0, svint8x2_t, svint8_t, -+ z0 = svget2_s8 (z4, 0), -+ z0 = svget2 (z4, 0)) -+ -+/* -+** get2_s8_z0_1: -+** mov z0\.d, z5\.d -+** ret -+*/ -+TEST_GET (get2_s8_z0_1, svint8x2_t, svint8_t, -+ z0 = svget2_s8 (z4, 1), -+ z0 = svget2 (z4, 1)) -+ -+/* -+** get2_s8_z4_0: -+** ret -+*/ -+TEST_GET (get2_s8_z4_0, svint8x2_t, svint8_t, -+ z4_res = svget2_s8 (z4, 0), -+ z4_res = svget2 (z4, 0)) -+ -+/* -+** get2_s8_z4_1: -+** mov z4\.d, z5\.d -+** ret -+*/ -+TEST_GET (get2_s8_z4_1, svint8x2_t, svint8_t, -+ z4_res = svget2_s8 (z4, 1), -+ z4_res = svget2 (z4, 1)) -+ -+/* -+** get2_s8_z5_0: -+** mov z5\.d, z4\.d -+** ret -+*/ -+TEST_GET (get2_s8_z5_0, svint8x2_t, svint8_t, -+ z5_res = svget2_s8 (z4, 0), -+ z5_res = svget2 (z4, 0)) -+ -+/* -+** get2_s8_z5_1: -+** ret -+*/ -+TEST_GET (get2_s8_z5_1, svint8x2_t, svint8_t, -+ z5_res = svget2_s8 (z4, 1), -+ z5_res = svget2 (z4, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_u16.c -new file mode 100644 -index 000000000..1804900cc ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_u16.c -@@ -0,0 +1,55 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** get2_u16_z0_0: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_GET (get2_u16_z0_0, svuint16x2_t, svuint16_t, -+ z0 = svget2_u16 (z4, 0), -+ z0 = svget2 (z4, 0)) -+ -+/* -+** get2_u16_z0_1: -+** mov z0\.d, z5\.d -+** ret -+*/ -+TEST_GET (get2_u16_z0_1, svuint16x2_t, svuint16_t, -+ z0 = svget2_u16 (z4, 1), -+ z0 = svget2 (z4, 1)) -+ -+/* -+** get2_u16_z4_0: -+** ret -+*/ -+TEST_GET (get2_u16_z4_0, svuint16x2_t, svuint16_t, -+ z4_res = svget2_u16 (z4, 0), -+ z4_res = svget2 (z4, 0)) -+ -+/* -+** get2_u16_z4_1: -+** mov z4\.d, z5\.d -+** ret -+*/ -+TEST_GET (get2_u16_z4_1, svuint16x2_t, svuint16_t, -+ z4_res = svget2_u16 (z4, 1), -+ z4_res = svget2 (z4, 1)) -+ -+/* -+** get2_u16_z5_0: -+** mov z5\.d, z4\.d -+** ret -+*/ -+TEST_GET (get2_u16_z5_0, svuint16x2_t, svuint16_t, -+ z5_res = svget2_u16 (z4, 0), -+ z5_res = svget2 (z4, 0)) -+ -+/* -+** get2_u16_z5_1: -+** ret -+*/ -+TEST_GET (get2_u16_z5_1, svuint16x2_t, svuint16_t, -+ z5_res = svget2_u16 (z4, 1), -+ z5_res = svget2 (z4, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_u32.c -new file mode 100644 -index 000000000..5c14de6aa ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_u32.c -@@ -0,0 +1,55 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** get2_u32_z0_0: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_GET (get2_u32_z0_0, svuint32x2_t, svuint32_t, -+ z0 = svget2_u32 (z4, 0), -+ z0 = svget2 (z4, 0)) -+ -+/* -+** get2_u32_z0_1: -+** mov z0\.d, z5\.d -+** ret -+*/ -+TEST_GET (get2_u32_z0_1, svuint32x2_t, svuint32_t, -+ z0 = svget2_u32 (z4, 1), -+ z0 = svget2 (z4, 1)) -+ -+/* -+** get2_u32_z4_0: -+** ret -+*/ -+TEST_GET (get2_u32_z4_0, svuint32x2_t, svuint32_t, -+ z4_res = svget2_u32 (z4, 0), -+ z4_res = svget2 (z4, 0)) -+ -+/* -+** get2_u32_z4_1: -+** mov z4\.d, z5\.d -+** ret -+*/ -+TEST_GET (get2_u32_z4_1, svuint32x2_t, svuint32_t, -+ z4_res = svget2_u32 (z4, 1), -+ z4_res = svget2 (z4, 1)) -+ -+/* -+** get2_u32_z5_0: -+** mov z5\.d, z4\.d -+** ret -+*/ -+TEST_GET (get2_u32_z5_0, svuint32x2_t, svuint32_t, -+ z5_res = svget2_u32 (z4, 0), -+ z5_res = svget2 (z4, 0)) -+ -+/* -+** get2_u32_z5_1: -+** ret -+*/ -+TEST_GET (get2_u32_z5_1, svuint32x2_t, svuint32_t, -+ z5_res = svget2_u32 (z4, 1), -+ z5_res = svget2 (z4, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_u64.c -new file mode 100644 -index 000000000..fd389a01e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_u64.c -@@ -0,0 +1,55 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** get2_u64_z0_0: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_GET (get2_u64_z0_0, svuint64x2_t, svuint64_t, -+ z0 = svget2_u64 (z4, 0), -+ z0 = svget2 (z4, 0)) -+ -+/* -+** get2_u64_z0_1: -+** mov z0\.d, z5\.d -+** ret -+*/ -+TEST_GET (get2_u64_z0_1, svuint64x2_t, svuint64_t, -+ z0 = svget2_u64 (z4, 1), -+ z0 = svget2 (z4, 1)) -+ -+/* -+** get2_u64_z4_0: -+** ret -+*/ -+TEST_GET (get2_u64_z4_0, svuint64x2_t, svuint64_t, -+ z4_res = svget2_u64 (z4, 0), -+ z4_res = svget2 (z4, 0)) -+ -+/* -+** get2_u64_z4_1: -+** mov z4\.d, z5\.d -+** ret -+*/ -+TEST_GET (get2_u64_z4_1, svuint64x2_t, svuint64_t, -+ z4_res = svget2_u64 (z4, 1), -+ z4_res = svget2 (z4, 1)) -+ -+/* -+** get2_u64_z5_0: -+** mov z5\.d, z4\.d -+** ret -+*/ -+TEST_GET (get2_u64_z5_0, svuint64x2_t, svuint64_t, -+ z5_res = svget2_u64 (z4, 0), -+ z5_res = svget2 (z4, 0)) -+ -+/* -+** get2_u64_z5_1: -+** ret -+*/ -+TEST_GET (get2_u64_z5_1, svuint64x2_t, svuint64_t, -+ z5_res = svget2_u64 (z4, 1), -+ z5_res = svget2 (z4, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_u8.c -new file mode 100644 -index 000000000..42ffb0344 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get2_u8.c -@@ -0,0 +1,55 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** get2_u8_z0_0: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_GET (get2_u8_z0_0, svuint8x2_t, svuint8_t, -+ z0 = svget2_u8 (z4, 0), -+ z0 = svget2 (z4, 0)) -+ -+/* -+** get2_u8_z0_1: -+** mov z0\.d, z5\.d -+** ret -+*/ -+TEST_GET (get2_u8_z0_1, svuint8x2_t, svuint8_t, -+ z0 = svget2_u8 (z4, 1), -+ z0 = svget2 (z4, 1)) -+ -+/* -+** get2_u8_z4_0: -+** ret -+*/ -+TEST_GET (get2_u8_z4_0, svuint8x2_t, svuint8_t, -+ z4_res = svget2_u8 (z4, 0), -+ z4_res = svget2 (z4, 0)) -+ -+/* -+** get2_u8_z4_1: -+** mov z4\.d, z5\.d -+** ret -+*/ -+TEST_GET (get2_u8_z4_1, svuint8x2_t, svuint8_t, -+ z4_res = svget2_u8 (z4, 1), -+ z4_res = svget2 (z4, 1)) -+ -+/* -+** get2_u8_z5_0: -+** mov z5\.d, z4\.d -+** ret -+*/ -+TEST_GET (get2_u8_z5_0, svuint8x2_t, svuint8_t, -+ z5_res = svget2_u8 (z4, 0), -+ z5_res = svget2 (z4, 0)) -+ -+/* -+** get2_u8_z5_1: -+** ret -+*/ -+TEST_GET (get2_u8_z5_1, svuint8x2_t, svuint8_t, -+ z5_res = svget2_u8 (z4, 1), -+ z5_res = svget2 (z4, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_bf16.c -new file mode 100644 -index 000000000..292f02a12 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_bf16.c -@@ -0,0 +1,108 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** get3_bf16_z0_0: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_GET (get3_bf16_z0_0, svbfloat16x3_t, svbfloat16_t, -+ z0 = svget3_bf16 (z4, 0), -+ z0 = svget3 (z4, 0)) -+ -+/* -+** get3_bf16_z0_1: -+** mov z0\.d, z5\.d -+** ret -+*/ -+TEST_GET (get3_bf16_z0_1, svbfloat16x3_t, svbfloat16_t, -+ z0 = svget3_bf16 (z4, 1), -+ z0 = svget3 (z4, 1)) -+ -+/* -+** get3_bf16_z0_2: -+** mov z0\.d, z6\.d -+** ret -+*/ -+TEST_GET (get3_bf16_z0_2, svbfloat16x3_t, svbfloat16_t, -+ z0 = svget3_bf16 (z4, 2), -+ z0 = svget3 (z4, 2)) -+ -+/* -+** get3_bf16_z4_0: -+** ret -+*/ -+TEST_GET (get3_bf16_z4_0, svbfloat16x3_t, svbfloat16_t, -+ z4_res = svget3_bf16 (z4, 0), -+ z4_res = svget3 (z4, 0)) -+ -+/* -+** get3_bf16_z4_1: -+** mov z4\.d, z5\.d -+** ret -+*/ -+TEST_GET (get3_bf16_z4_1, svbfloat16x3_t, svbfloat16_t, -+ z4_res = svget3_bf16 (z4, 1), -+ z4_res = svget3 (z4, 1)) -+ -+/* -+** get3_bf16_z4_2: -+** mov z4\.d, z6\.d -+** ret -+*/ -+TEST_GET (get3_bf16_z4_2, svbfloat16x3_t, svbfloat16_t, -+ z4_res = svget3_bf16 (z4, 2), -+ z4_res = svget3 (z4, 2)) -+ -+/* -+** get3_bf16_z5_0: -+** mov z5\.d, z4\.d -+** ret -+*/ -+TEST_GET (get3_bf16_z5_0, svbfloat16x3_t, svbfloat16_t, -+ z5_res = svget3_bf16 (z4, 0), -+ z5_res = svget3 (z4, 0)) -+ -+/* -+** get3_bf16_z5_1: -+** ret -+*/ -+TEST_GET (get3_bf16_z5_1, svbfloat16x3_t, svbfloat16_t, -+ z5_res = svget3_bf16 (z4, 1), -+ z5_res = svget3 (z4, 1)) -+ -+/* -+** get3_bf16_z5_2: -+** mov z5\.d, z6\.d -+** ret -+*/ -+TEST_GET (get3_bf16_z5_2, svbfloat16x3_t, svbfloat16_t, -+ z5_res = svget3_bf16 (z4, 2), -+ z5_res = svget3 (z4, 2)) -+ -+/* -+** get3_bf16_z6_0: -+** mov z6\.d, z4\.d -+** ret -+*/ -+TEST_GET (get3_bf16_z6_0, svbfloat16x3_t, svbfloat16_t, -+ z6_res = svget3_bf16 (z4, 0), -+ z6_res = svget3 (z4, 0)) -+ -+/* -+** get3_bf16_z6_1: -+** mov z6\.d, z5\.d -+** ret -+*/ -+TEST_GET (get3_bf16_z6_1, svbfloat16x3_t, svbfloat16_t, -+ z6_res = svget3_bf16 (z4, 1), -+ z6_res = svget3 (z4, 1)) -+ -+/* -+** get3_bf16_z6_2: -+** ret -+*/ -+TEST_GET (get3_bf16_z6_2, svbfloat16x3_t, svbfloat16_t, -+ z6_res = svget3_bf16 (z4, 2), -+ z6_res = svget3 (z4, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_f16.c -new file mode 100644 -index 000000000..8bea03bc5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_f16.c -@@ -0,0 +1,108 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** get3_f16_z0_0: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_GET (get3_f16_z0_0, svfloat16x3_t, svfloat16_t, -+ z0 = svget3_f16 (z4, 0), -+ z0 = svget3 (z4, 0)) -+ -+/* -+** get3_f16_z0_1: -+** mov z0\.d, z5\.d -+** ret -+*/ -+TEST_GET (get3_f16_z0_1, svfloat16x3_t, svfloat16_t, -+ z0 = svget3_f16 (z4, 1), -+ z0 = svget3 (z4, 1)) -+ -+/* -+** get3_f16_z0_2: -+** mov z0\.d, z6\.d -+** ret -+*/ -+TEST_GET (get3_f16_z0_2, svfloat16x3_t, svfloat16_t, -+ z0 = svget3_f16 (z4, 2), -+ z0 = svget3 (z4, 2)) -+ -+/* -+** get3_f16_z4_0: -+** ret -+*/ -+TEST_GET (get3_f16_z4_0, svfloat16x3_t, svfloat16_t, -+ z4_res = svget3_f16 (z4, 0), -+ z4_res = svget3 (z4, 0)) -+ -+/* -+** get3_f16_z4_1: -+** mov z4\.d, z5\.d -+** ret -+*/ -+TEST_GET (get3_f16_z4_1, svfloat16x3_t, svfloat16_t, -+ z4_res = svget3_f16 (z4, 1), -+ z4_res = svget3 (z4, 1)) -+ -+/* -+** get3_f16_z4_2: -+** mov z4\.d, z6\.d -+** ret -+*/ -+TEST_GET (get3_f16_z4_2, svfloat16x3_t, svfloat16_t, -+ z4_res = svget3_f16 (z4, 2), -+ z4_res = svget3 (z4, 2)) -+ -+/* -+** get3_f16_z5_0: -+** mov z5\.d, z4\.d -+** ret -+*/ -+TEST_GET (get3_f16_z5_0, svfloat16x3_t, svfloat16_t, -+ z5_res = svget3_f16 (z4, 0), -+ z5_res = svget3 (z4, 0)) -+ -+/* -+** get3_f16_z5_1: -+** ret -+*/ -+TEST_GET (get3_f16_z5_1, svfloat16x3_t, svfloat16_t, -+ z5_res = svget3_f16 (z4, 1), -+ z5_res = svget3 (z4, 1)) -+ -+/* -+** get3_f16_z5_2: -+** mov z5\.d, z6\.d -+** ret -+*/ -+TEST_GET (get3_f16_z5_2, svfloat16x3_t, svfloat16_t, -+ z5_res = svget3_f16 (z4, 2), -+ z5_res = svget3 (z4, 2)) -+ -+/* -+** get3_f16_z6_0: -+** mov z6\.d, z4\.d -+** ret -+*/ -+TEST_GET (get3_f16_z6_0, svfloat16x3_t, svfloat16_t, -+ z6_res = svget3_f16 (z4, 0), -+ z6_res = svget3 (z4, 0)) -+ -+/* -+** get3_f16_z6_1: -+** mov z6\.d, z5\.d -+** ret -+*/ -+TEST_GET (get3_f16_z6_1, svfloat16x3_t, svfloat16_t, -+ z6_res = svget3_f16 (z4, 1), -+ z6_res = svget3 (z4, 1)) -+ -+/* -+** get3_f16_z6_2: -+** ret -+*/ -+TEST_GET (get3_f16_z6_2, svfloat16x3_t, svfloat16_t, -+ z6_res = svget3_f16 (z4, 2), -+ z6_res = svget3 (z4, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_f32.c -new file mode 100644 -index 000000000..246679584 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_f32.c -@@ -0,0 +1,108 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** get3_f32_z0_0: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_GET (get3_f32_z0_0, svfloat32x3_t, svfloat32_t, -+ z0 = svget3_f32 (z4, 0), -+ z0 = svget3 (z4, 0)) -+ -+/* -+** get3_f32_z0_1: -+** mov z0\.d, z5\.d -+** ret -+*/ -+TEST_GET (get3_f32_z0_1, svfloat32x3_t, svfloat32_t, -+ z0 = svget3_f32 (z4, 1), -+ z0 = svget3 (z4, 1)) -+ -+/* -+** get3_f32_z0_2: -+** mov z0\.d, z6\.d -+** ret -+*/ -+TEST_GET (get3_f32_z0_2, svfloat32x3_t, svfloat32_t, -+ z0 = svget3_f32 (z4, 2), -+ z0 = svget3 (z4, 2)) -+ -+/* -+** get3_f32_z4_0: -+** ret -+*/ -+TEST_GET (get3_f32_z4_0, svfloat32x3_t, svfloat32_t, -+ z4_res = svget3_f32 (z4, 0), -+ z4_res = svget3 (z4, 0)) -+ -+/* -+** get3_f32_z4_1: -+** mov z4\.d, z5\.d -+** ret -+*/ -+TEST_GET (get3_f32_z4_1, svfloat32x3_t, svfloat32_t, -+ z4_res = svget3_f32 (z4, 1), -+ z4_res = svget3 (z4, 1)) -+ -+/* -+** get3_f32_z4_2: -+** mov z4\.d, z6\.d -+** ret -+*/ -+TEST_GET (get3_f32_z4_2, svfloat32x3_t, svfloat32_t, -+ z4_res = svget3_f32 (z4, 2), -+ z4_res = svget3 (z4, 2)) -+ -+/* -+** get3_f32_z5_0: -+** mov z5\.d, z4\.d -+** ret -+*/ -+TEST_GET (get3_f32_z5_0, svfloat32x3_t, svfloat32_t, -+ z5_res = svget3_f32 (z4, 0), -+ z5_res = svget3 (z4, 0)) -+ -+/* -+** get3_f32_z5_1: -+** ret -+*/ -+TEST_GET (get3_f32_z5_1, svfloat32x3_t, svfloat32_t, -+ z5_res = svget3_f32 (z4, 1), -+ z5_res = svget3 (z4, 1)) -+ -+/* -+** get3_f32_z5_2: -+** mov z5\.d, z6\.d -+** ret -+*/ -+TEST_GET (get3_f32_z5_2, svfloat32x3_t, svfloat32_t, -+ z5_res = svget3_f32 (z4, 2), -+ z5_res = svget3 (z4, 2)) -+ -+/* -+** get3_f32_z6_0: -+** mov z6\.d, z4\.d -+** ret -+*/ -+TEST_GET (get3_f32_z6_0, svfloat32x3_t, svfloat32_t, -+ z6_res = svget3_f32 (z4, 0), -+ z6_res = svget3 (z4, 0)) -+ -+/* -+** get3_f32_z6_1: -+** mov z6\.d, z5\.d -+** ret -+*/ -+TEST_GET (get3_f32_z6_1, svfloat32x3_t, svfloat32_t, -+ z6_res = svget3_f32 (z4, 1), -+ z6_res = svget3 (z4, 1)) -+ -+/* -+** get3_f32_z6_2: -+** ret -+*/ -+TEST_GET (get3_f32_z6_2, svfloat32x3_t, svfloat32_t, -+ z6_res = svget3_f32 (z4, 2), -+ z6_res = svget3 (z4, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_f64.c -new file mode 100644 -index 000000000..e44eb15fd ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_f64.c -@@ -0,0 +1,108 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** get3_f64_z0_0: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_GET (get3_f64_z0_0, svfloat64x3_t, svfloat64_t, -+ z0 = svget3_f64 (z4, 0), -+ z0 = svget3 (z4, 0)) -+ -+/* -+** get3_f64_z0_1: -+** mov z0\.d, z5\.d -+** ret -+*/ -+TEST_GET (get3_f64_z0_1, svfloat64x3_t, svfloat64_t, -+ z0 = svget3_f64 (z4, 1), -+ z0 = svget3 (z4, 1)) -+ -+/* -+** get3_f64_z0_2: -+** mov z0\.d, z6\.d -+** ret -+*/ -+TEST_GET (get3_f64_z0_2, svfloat64x3_t, svfloat64_t, -+ z0 = svget3_f64 (z4, 2), -+ z0 = svget3 (z4, 2)) -+ -+/* -+** get3_f64_z4_0: -+** ret -+*/ -+TEST_GET (get3_f64_z4_0, svfloat64x3_t, svfloat64_t, -+ z4_res = svget3_f64 (z4, 0), -+ z4_res = svget3 (z4, 0)) -+ -+/* -+** get3_f64_z4_1: -+** mov z4\.d, z5\.d -+** ret -+*/ -+TEST_GET (get3_f64_z4_1, svfloat64x3_t, svfloat64_t, -+ z4_res = svget3_f64 (z4, 1), -+ z4_res = svget3 (z4, 1)) -+ -+/* -+** get3_f64_z4_2: -+** mov z4\.d, z6\.d -+** ret -+*/ -+TEST_GET (get3_f64_z4_2, svfloat64x3_t, svfloat64_t, -+ z4_res = svget3_f64 (z4, 2), -+ z4_res = svget3 (z4, 2)) -+ -+/* -+** get3_f64_z5_0: -+** mov z5\.d, z4\.d -+** ret -+*/ -+TEST_GET (get3_f64_z5_0, svfloat64x3_t, svfloat64_t, -+ z5_res = svget3_f64 (z4, 0), -+ z5_res = svget3 (z4, 0)) -+ -+/* -+** get3_f64_z5_1: -+** ret -+*/ -+TEST_GET (get3_f64_z5_1, svfloat64x3_t, svfloat64_t, -+ z5_res = svget3_f64 (z4, 1), -+ z5_res = svget3 (z4, 1)) -+ -+/* -+** get3_f64_z5_2: -+** mov z5\.d, z6\.d -+** ret -+*/ -+TEST_GET (get3_f64_z5_2, svfloat64x3_t, svfloat64_t, -+ z5_res = svget3_f64 (z4, 2), -+ z5_res = svget3 (z4, 2)) -+ -+/* -+** get3_f64_z6_0: -+** mov z6\.d, z4\.d -+** ret -+*/ -+TEST_GET (get3_f64_z6_0, svfloat64x3_t, svfloat64_t, -+ z6_res = svget3_f64 (z4, 0), -+ z6_res = svget3 (z4, 0)) -+ -+/* -+** get3_f64_z6_1: -+** mov z6\.d, z5\.d -+** ret -+*/ -+TEST_GET (get3_f64_z6_1, svfloat64x3_t, svfloat64_t, -+ z6_res = svget3_f64 (z4, 1), -+ z6_res = svget3 (z4, 1)) -+ -+/* -+** get3_f64_z6_2: -+** ret -+*/ -+TEST_GET (get3_f64_z6_2, svfloat64x3_t, svfloat64_t, -+ z6_res = svget3_f64 (z4, 2), -+ z6_res = svget3 (z4, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_s16.c -new file mode 100644 -index 000000000..88f7e4986 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_s16.c -@@ -0,0 +1,108 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** get3_s16_z0_0: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_GET (get3_s16_z0_0, svint16x3_t, svint16_t, -+ z0 = svget3_s16 (z4, 0), -+ z0 = svget3 (z4, 0)) -+ -+/* -+** get3_s16_z0_1: -+** mov z0\.d, z5\.d -+** ret -+*/ -+TEST_GET (get3_s16_z0_1, svint16x3_t, svint16_t, -+ z0 = svget3_s16 (z4, 1), -+ z0 = svget3 (z4, 1)) -+ -+/* -+** get3_s16_z0_2: -+** mov z0\.d, z6\.d -+** ret -+*/ -+TEST_GET (get3_s16_z0_2, svint16x3_t, svint16_t, -+ z0 = svget3_s16 (z4, 2), -+ z0 = svget3 (z4, 2)) -+ -+/* -+** get3_s16_z4_0: -+** ret -+*/ -+TEST_GET (get3_s16_z4_0, svint16x3_t, svint16_t, -+ z4_res = svget3_s16 (z4, 0), -+ z4_res = svget3 (z4, 0)) -+ -+/* -+** get3_s16_z4_1: -+** mov z4\.d, z5\.d -+** ret -+*/ -+TEST_GET (get3_s16_z4_1, svint16x3_t, svint16_t, -+ z4_res = svget3_s16 (z4, 1), -+ z4_res = svget3 (z4, 1)) -+ -+/* -+** get3_s16_z4_2: -+** mov z4\.d, z6\.d -+** ret -+*/ -+TEST_GET (get3_s16_z4_2, svint16x3_t, svint16_t, -+ z4_res = svget3_s16 (z4, 2), -+ z4_res = svget3 (z4, 2)) -+ -+/* -+** get3_s16_z5_0: -+** mov z5\.d, z4\.d -+** ret -+*/ -+TEST_GET (get3_s16_z5_0, svint16x3_t, svint16_t, -+ z5_res = svget3_s16 (z4, 0), -+ z5_res = svget3 (z4, 0)) -+ -+/* -+** get3_s16_z5_1: -+** ret -+*/ -+TEST_GET (get3_s16_z5_1, svint16x3_t, svint16_t, -+ z5_res = svget3_s16 (z4, 1), -+ z5_res = svget3 (z4, 1)) -+ -+/* -+** get3_s16_z5_2: -+** mov z5\.d, z6\.d -+** ret -+*/ -+TEST_GET (get3_s16_z5_2, svint16x3_t, svint16_t, -+ z5_res = svget3_s16 (z4, 2), -+ z5_res = svget3 (z4, 2)) -+ -+/* -+** get3_s16_z6_0: -+** mov z6\.d, z4\.d -+** ret -+*/ -+TEST_GET (get3_s16_z6_0, svint16x3_t, svint16_t, -+ z6_res = svget3_s16 (z4, 0), -+ z6_res = svget3 (z4, 0)) -+ -+/* -+** get3_s16_z6_1: -+** mov z6\.d, z5\.d -+** ret -+*/ -+TEST_GET (get3_s16_z6_1, svint16x3_t, svint16_t, -+ z6_res = svget3_s16 (z4, 1), -+ z6_res = svget3 (z4, 1)) -+ -+/* -+** get3_s16_z6_2: -+** ret -+*/ -+TEST_GET (get3_s16_z6_2, svint16x3_t, svint16_t, -+ z6_res = svget3_s16 (z4, 2), -+ z6_res = svget3 (z4, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_s32.c -new file mode 100644 -index 000000000..f0f7785c8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_s32.c -@@ -0,0 +1,108 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** get3_s32_z0_0: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_GET (get3_s32_z0_0, svint32x3_t, svint32_t, -+ z0 = svget3_s32 (z4, 0), -+ z0 = svget3 (z4, 0)) -+ -+/* -+** get3_s32_z0_1: -+** mov z0\.d, z5\.d -+** ret -+*/ -+TEST_GET (get3_s32_z0_1, svint32x3_t, svint32_t, -+ z0 = svget3_s32 (z4, 1), -+ z0 = svget3 (z4, 1)) -+ -+/* -+** get3_s32_z0_2: -+** mov z0\.d, z6\.d -+** ret -+*/ -+TEST_GET (get3_s32_z0_2, svint32x3_t, svint32_t, -+ z0 = svget3_s32 (z4, 2), -+ z0 = svget3 (z4, 2)) -+ -+/* -+** get3_s32_z4_0: -+** ret -+*/ -+TEST_GET (get3_s32_z4_0, svint32x3_t, svint32_t, -+ z4_res = svget3_s32 (z4, 0), -+ z4_res = svget3 (z4, 0)) -+ -+/* -+** get3_s32_z4_1: -+** mov z4\.d, z5\.d -+** ret -+*/ -+TEST_GET (get3_s32_z4_1, svint32x3_t, svint32_t, -+ z4_res = svget3_s32 (z4, 1), -+ z4_res = svget3 (z4, 1)) -+ -+/* -+** get3_s32_z4_2: -+** mov z4\.d, z6\.d -+** ret -+*/ -+TEST_GET (get3_s32_z4_2, svint32x3_t, svint32_t, -+ z4_res = svget3_s32 (z4, 2), -+ z4_res = svget3 (z4, 2)) -+ -+/* -+** get3_s32_z5_0: -+** mov z5\.d, z4\.d -+** ret -+*/ -+TEST_GET (get3_s32_z5_0, svint32x3_t, svint32_t, -+ z5_res = svget3_s32 (z4, 0), -+ z5_res = svget3 (z4, 0)) -+ -+/* -+** get3_s32_z5_1: -+** ret -+*/ -+TEST_GET (get3_s32_z5_1, svint32x3_t, svint32_t, -+ z5_res = svget3_s32 (z4, 1), -+ z5_res = svget3 (z4, 1)) -+ -+/* -+** get3_s32_z5_2: -+** mov z5\.d, z6\.d -+** ret -+*/ -+TEST_GET (get3_s32_z5_2, svint32x3_t, svint32_t, -+ z5_res = svget3_s32 (z4, 2), -+ z5_res = svget3 (z4, 2)) -+ -+/* -+** get3_s32_z6_0: -+** mov z6\.d, z4\.d -+** ret -+*/ -+TEST_GET (get3_s32_z6_0, svint32x3_t, svint32_t, -+ z6_res = svget3_s32 (z4, 0), -+ z6_res = svget3 (z4, 0)) -+ -+/* -+** get3_s32_z6_1: -+** mov z6\.d, z5\.d -+** ret -+*/ -+TEST_GET (get3_s32_z6_1, svint32x3_t, svint32_t, -+ z6_res = svget3_s32 (z4, 1), -+ z6_res = svget3 (z4, 1)) -+ -+/* -+** get3_s32_z6_2: -+** ret -+*/ -+TEST_GET (get3_s32_z6_2, svint32x3_t, svint32_t, -+ z6_res = svget3_s32 (z4, 2), -+ z6_res = svget3 (z4, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_s64.c -new file mode 100644 -index 000000000..92500bfdf ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_s64.c -@@ -0,0 +1,108 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** get3_s64_z0_0: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_GET (get3_s64_z0_0, svint64x3_t, svint64_t, -+ z0 = svget3_s64 (z4, 0), -+ z0 = svget3 (z4, 0)) -+ -+/* -+** get3_s64_z0_1: -+** mov z0\.d, z5\.d -+** ret -+*/ -+TEST_GET (get3_s64_z0_1, svint64x3_t, svint64_t, -+ z0 = svget3_s64 (z4, 1), -+ z0 = svget3 (z4, 1)) -+ -+/* -+** get3_s64_z0_2: -+** mov z0\.d, z6\.d -+** ret -+*/ -+TEST_GET (get3_s64_z0_2, svint64x3_t, svint64_t, -+ z0 = svget3_s64 (z4, 2), -+ z0 = svget3 (z4, 2)) -+ -+/* -+** get3_s64_z4_0: -+** ret -+*/ -+TEST_GET (get3_s64_z4_0, svint64x3_t, svint64_t, -+ z4_res = svget3_s64 (z4, 0), -+ z4_res = svget3 (z4, 0)) -+ -+/* -+** get3_s64_z4_1: -+** mov z4\.d, z5\.d -+** ret -+*/ -+TEST_GET (get3_s64_z4_1, svint64x3_t, svint64_t, -+ z4_res = svget3_s64 (z4, 1), -+ z4_res = svget3 (z4, 1)) -+ -+/* -+** get3_s64_z4_2: -+** mov z4\.d, z6\.d -+** ret -+*/ -+TEST_GET (get3_s64_z4_2, svint64x3_t, svint64_t, -+ z4_res = svget3_s64 (z4, 2), -+ z4_res = svget3 (z4, 2)) -+ -+/* -+** get3_s64_z5_0: -+** mov z5\.d, z4\.d -+** ret -+*/ -+TEST_GET (get3_s64_z5_0, svint64x3_t, svint64_t, -+ z5_res = svget3_s64 (z4, 0), -+ z5_res = svget3 (z4, 0)) -+ -+/* -+** get3_s64_z5_1: -+** ret -+*/ -+TEST_GET (get3_s64_z5_1, svint64x3_t, svint64_t, -+ z5_res = svget3_s64 (z4, 1), -+ z5_res = svget3 (z4, 1)) -+ -+/* -+** get3_s64_z5_2: -+** mov z5\.d, z6\.d -+** ret -+*/ -+TEST_GET (get3_s64_z5_2, svint64x3_t, svint64_t, -+ z5_res = svget3_s64 (z4, 2), -+ z5_res = svget3 (z4, 2)) -+ -+/* -+** get3_s64_z6_0: -+** mov z6\.d, z4\.d -+** ret -+*/ -+TEST_GET (get3_s64_z6_0, svint64x3_t, svint64_t, -+ z6_res = svget3_s64 (z4, 0), -+ z6_res = svget3 (z4, 0)) -+ -+/* -+** get3_s64_z6_1: -+** mov z6\.d, z5\.d -+** ret -+*/ -+TEST_GET (get3_s64_z6_1, svint64x3_t, svint64_t, -+ z6_res = svget3_s64 (z4, 1), -+ z6_res = svget3 (z4, 1)) -+ -+/* -+** get3_s64_z6_2: -+** ret -+*/ -+TEST_GET (get3_s64_z6_2, svint64x3_t, svint64_t, -+ z6_res = svget3_s64 (z4, 2), -+ z6_res = svget3 (z4, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_s8.c -new file mode 100644 -index 000000000..edf225ba5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_s8.c -@@ -0,0 +1,108 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** get3_s8_z0_0: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_GET (get3_s8_z0_0, svint8x3_t, svint8_t, -+ z0 = svget3_s8 (z4, 0), -+ z0 = svget3 (z4, 0)) -+ -+/* -+** get3_s8_z0_1: -+** mov z0\.d, z5\.d -+** ret -+*/ -+TEST_GET (get3_s8_z0_1, svint8x3_t, svint8_t, -+ z0 = svget3_s8 (z4, 1), -+ z0 = svget3 (z4, 1)) -+ -+/* -+** get3_s8_z0_2: -+** mov z0\.d, z6\.d -+** ret -+*/ -+TEST_GET (get3_s8_z0_2, svint8x3_t, svint8_t, -+ z0 = svget3_s8 (z4, 2), -+ z0 = svget3 (z4, 2)) -+ -+/* -+** get3_s8_z4_0: -+** ret -+*/ -+TEST_GET (get3_s8_z4_0, svint8x3_t, svint8_t, -+ z4_res = svget3_s8 (z4, 0), -+ z4_res = svget3 (z4, 0)) -+ -+/* -+** get3_s8_z4_1: -+** mov z4\.d, z5\.d -+** ret -+*/ -+TEST_GET (get3_s8_z4_1, svint8x3_t, svint8_t, -+ z4_res = svget3_s8 (z4, 1), -+ z4_res = svget3 (z4, 1)) -+ -+/* -+** get3_s8_z4_2: -+** mov z4\.d, z6\.d -+** ret -+*/ -+TEST_GET (get3_s8_z4_2, svint8x3_t, svint8_t, -+ z4_res = svget3_s8 (z4, 2), -+ z4_res = svget3 (z4, 2)) -+ -+/* -+** get3_s8_z5_0: -+** mov z5\.d, z4\.d -+** ret -+*/ -+TEST_GET (get3_s8_z5_0, svint8x3_t, svint8_t, -+ z5_res = svget3_s8 (z4, 0), -+ z5_res = svget3 (z4, 0)) -+ -+/* -+** get3_s8_z5_1: -+** ret -+*/ -+TEST_GET (get3_s8_z5_1, svint8x3_t, svint8_t, -+ z5_res = svget3_s8 (z4, 1), -+ z5_res = svget3 (z4, 1)) -+ -+/* -+** get3_s8_z5_2: -+** mov z5\.d, z6\.d -+** ret -+*/ -+TEST_GET (get3_s8_z5_2, svint8x3_t, svint8_t, -+ z5_res = svget3_s8 (z4, 2), -+ z5_res = svget3 (z4, 2)) -+ -+/* -+** get3_s8_z6_0: -+** mov z6\.d, z4\.d -+** ret -+*/ -+TEST_GET (get3_s8_z6_0, svint8x3_t, svint8_t, -+ z6_res = svget3_s8 (z4, 0), -+ z6_res = svget3 (z4, 0)) -+ -+/* -+** get3_s8_z6_1: -+** mov z6\.d, z5\.d -+** ret -+*/ -+TEST_GET (get3_s8_z6_1, svint8x3_t, svint8_t, -+ z6_res = svget3_s8 (z4, 1), -+ z6_res = svget3 (z4, 1)) -+ -+/* -+** get3_s8_z6_2: -+** ret -+*/ -+TEST_GET (get3_s8_z6_2, svint8x3_t, svint8_t, -+ z6_res = svget3_s8 (z4, 2), -+ z6_res = svget3 (z4, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_u16.c -new file mode 100644 -index 000000000..1fa7c63c0 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_u16.c -@@ -0,0 +1,108 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** get3_u16_z0_0: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_GET (get3_u16_z0_0, svuint16x3_t, svuint16_t, -+ z0 = svget3_u16 (z4, 0), -+ z0 = svget3 (z4, 0)) -+ -+/* -+** get3_u16_z0_1: -+** mov z0\.d, z5\.d -+** ret -+*/ -+TEST_GET (get3_u16_z0_1, svuint16x3_t, svuint16_t, -+ z0 = svget3_u16 (z4, 1), -+ z0 = svget3 (z4, 1)) -+ -+/* -+** get3_u16_z0_2: -+** mov z0\.d, z6\.d -+** ret -+*/ -+TEST_GET (get3_u16_z0_2, svuint16x3_t, svuint16_t, -+ z0 = svget3_u16 (z4, 2), -+ z0 = svget3 (z4, 2)) -+ -+/* -+** get3_u16_z4_0: -+** ret -+*/ -+TEST_GET (get3_u16_z4_0, svuint16x3_t, svuint16_t, -+ z4_res = svget3_u16 (z4, 0), -+ z4_res = svget3 (z4, 0)) -+ -+/* -+** get3_u16_z4_1: -+** mov z4\.d, z5\.d -+** ret -+*/ -+TEST_GET (get3_u16_z4_1, svuint16x3_t, svuint16_t, -+ z4_res = svget3_u16 (z4, 1), -+ z4_res = svget3 (z4, 1)) -+ -+/* -+** get3_u16_z4_2: -+** mov z4\.d, z6\.d -+** ret -+*/ -+TEST_GET (get3_u16_z4_2, svuint16x3_t, svuint16_t, -+ z4_res = svget3_u16 (z4, 2), -+ z4_res = svget3 (z4, 2)) -+ -+/* -+** get3_u16_z5_0: -+** mov z5\.d, z4\.d -+** ret -+*/ -+TEST_GET (get3_u16_z5_0, svuint16x3_t, svuint16_t, -+ z5_res = svget3_u16 (z4, 0), -+ z5_res = svget3 (z4, 0)) -+ -+/* -+** get3_u16_z5_1: -+** ret -+*/ -+TEST_GET (get3_u16_z5_1, svuint16x3_t, svuint16_t, -+ z5_res = svget3_u16 (z4, 1), -+ z5_res = svget3 (z4, 1)) -+ -+/* -+** get3_u16_z5_2: -+** mov z5\.d, z6\.d -+** ret -+*/ -+TEST_GET (get3_u16_z5_2, svuint16x3_t, svuint16_t, -+ z5_res = svget3_u16 (z4, 2), -+ z5_res = svget3 (z4, 2)) -+ -+/* -+** get3_u16_z6_0: -+** mov z6\.d, z4\.d -+** ret -+*/ -+TEST_GET (get3_u16_z6_0, svuint16x3_t, svuint16_t, -+ z6_res = svget3_u16 (z4, 0), -+ z6_res = svget3 (z4, 0)) -+ -+/* -+** get3_u16_z6_1: -+** mov z6\.d, z5\.d -+** ret -+*/ -+TEST_GET (get3_u16_z6_1, svuint16x3_t, svuint16_t, -+ z6_res = svget3_u16 (z4, 1), -+ z6_res = svget3 (z4, 1)) -+ -+/* -+** get3_u16_z6_2: -+** ret -+*/ -+TEST_GET (get3_u16_z6_2, svuint16x3_t, svuint16_t, -+ z6_res = svget3_u16 (z4, 2), -+ z6_res = svget3 (z4, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_u32.c -new file mode 100644 -index 000000000..03b5f2616 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_u32.c -@@ -0,0 +1,108 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** get3_u32_z0_0: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_GET (get3_u32_z0_0, svuint32x3_t, svuint32_t, -+ z0 = svget3_u32 (z4, 0), -+ z0 = svget3 (z4, 0)) -+ -+/* -+** get3_u32_z0_1: -+** mov z0\.d, z5\.d -+** ret -+*/ -+TEST_GET (get3_u32_z0_1, svuint32x3_t, svuint32_t, -+ z0 = svget3_u32 (z4, 1), -+ z0 = svget3 (z4, 1)) -+ -+/* -+** get3_u32_z0_2: -+** mov z0\.d, z6\.d -+** ret -+*/ -+TEST_GET (get3_u32_z0_2, svuint32x3_t, svuint32_t, -+ z0 = svget3_u32 (z4, 2), -+ z0 = svget3 (z4, 2)) -+ -+/* -+** get3_u32_z4_0: -+** ret -+*/ -+TEST_GET (get3_u32_z4_0, svuint32x3_t, svuint32_t, -+ z4_res = svget3_u32 (z4, 0), -+ z4_res = svget3 (z4, 0)) -+ -+/* -+** get3_u32_z4_1: -+** mov z4\.d, z5\.d -+** ret -+*/ -+TEST_GET (get3_u32_z4_1, svuint32x3_t, svuint32_t, -+ z4_res = svget3_u32 (z4, 1), -+ z4_res = svget3 (z4, 1)) -+ -+/* -+** get3_u32_z4_2: -+** mov z4\.d, z6\.d -+** ret -+*/ -+TEST_GET (get3_u32_z4_2, svuint32x3_t, svuint32_t, -+ z4_res = svget3_u32 (z4, 2), -+ z4_res = svget3 (z4, 2)) -+ -+/* -+** get3_u32_z5_0: -+** mov z5\.d, z4\.d -+** ret -+*/ -+TEST_GET (get3_u32_z5_0, svuint32x3_t, svuint32_t, -+ z5_res = svget3_u32 (z4, 0), -+ z5_res = svget3 (z4, 0)) -+ -+/* -+** get3_u32_z5_1: -+** ret -+*/ -+TEST_GET (get3_u32_z5_1, svuint32x3_t, svuint32_t, -+ z5_res = svget3_u32 (z4, 1), -+ z5_res = svget3 (z4, 1)) -+ -+/* -+** get3_u32_z5_2: -+** mov z5\.d, z6\.d -+** ret -+*/ -+TEST_GET (get3_u32_z5_2, svuint32x3_t, svuint32_t, -+ z5_res = svget3_u32 (z4, 2), -+ z5_res = svget3 (z4, 2)) -+ -+/* -+** get3_u32_z6_0: -+** mov z6\.d, z4\.d -+** ret -+*/ -+TEST_GET (get3_u32_z6_0, svuint32x3_t, svuint32_t, -+ z6_res = svget3_u32 (z4, 0), -+ z6_res = svget3 (z4, 0)) -+ -+/* -+** get3_u32_z6_1: -+** mov z6\.d, z5\.d -+** ret -+*/ -+TEST_GET (get3_u32_z6_1, svuint32x3_t, svuint32_t, -+ z6_res = svget3_u32 (z4, 1), -+ z6_res = svget3 (z4, 1)) -+ -+/* -+** get3_u32_z6_2: -+** ret -+*/ -+TEST_GET (get3_u32_z6_2, svuint32x3_t, svuint32_t, -+ z6_res = svget3_u32 (z4, 2), -+ z6_res = svget3 (z4, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_u64.c -new file mode 100644 -index 000000000..ae4ef0024 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_u64.c -@@ -0,0 +1,108 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** get3_u64_z0_0: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_GET (get3_u64_z0_0, svuint64x3_t, svuint64_t, -+ z0 = svget3_u64 (z4, 0), -+ z0 = svget3 (z4, 0)) -+ -+/* -+** get3_u64_z0_1: -+** mov z0\.d, z5\.d -+** ret -+*/ -+TEST_GET (get3_u64_z0_1, svuint64x3_t, svuint64_t, -+ z0 = svget3_u64 (z4, 1), -+ z0 = svget3 (z4, 1)) -+ -+/* -+** get3_u64_z0_2: -+** mov z0\.d, z6\.d -+** ret -+*/ -+TEST_GET (get3_u64_z0_2, svuint64x3_t, svuint64_t, -+ z0 = svget3_u64 (z4, 2), -+ z0 = svget3 (z4, 2)) -+ -+/* -+** get3_u64_z4_0: -+** ret -+*/ -+TEST_GET (get3_u64_z4_0, svuint64x3_t, svuint64_t, -+ z4_res = svget3_u64 (z4, 0), -+ z4_res = svget3 (z4, 0)) -+ -+/* -+** get3_u64_z4_1: -+** mov z4\.d, z5\.d -+** ret -+*/ -+TEST_GET (get3_u64_z4_1, svuint64x3_t, svuint64_t, -+ z4_res = svget3_u64 (z4, 1), -+ z4_res = svget3 (z4, 1)) -+ -+/* -+** get3_u64_z4_2: -+** mov z4\.d, z6\.d -+** ret -+*/ -+TEST_GET (get3_u64_z4_2, svuint64x3_t, svuint64_t, -+ z4_res = svget3_u64 (z4, 2), -+ z4_res = svget3 (z4, 2)) -+ -+/* -+** get3_u64_z5_0: -+** mov z5\.d, z4\.d -+** ret -+*/ -+TEST_GET (get3_u64_z5_0, svuint64x3_t, svuint64_t, -+ z5_res = svget3_u64 (z4, 0), -+ z5_res = svget3 (z4, 0)) -+ -+/* -+** get3_u64_z5_1: -+** ret -+*/ -+TEST_GET (get3_u64_z5_1, svuint64x3_t, svuint64_t, -+ z5_res = svget3_u64 (z4, 1), -+ z5_res = svget3 (z4, 1)) -+ -+/* -+** get3_u64_z5_2: -+** mov z5\.d, z6\.d -+** ret -+*/ -+TEST_GET (get3_u64_z5_2, svuint64x3_t, svuint64_t, -+ z5_res = svget3_u64 (z4, 2), -+ z5_res = svget3 (z4, 2)) -+ -+/* -+** get3_u64_z6_0: -+** mov z6\.d, z4\.d -+** ret -+*/ -+TEST_GET (get3_u64_z6_0, svuint64x3_t, svuint64_t, -+ z6_res = svget3_u64 (z4, 0), -+ z6_res = svget3 (z4, 0)) -+ -+/* -+** get3_u64_z6_1: -+** mov z6\.d, z5\.d -+** ret -+*/ -+TEST_GET (get3_u64_z6_1, svuint64x3_t, svuint64_t, -+ z6_res = svget3_u64 (z4, 1), -+ z6_res = svget3 (z4, 1)) -+ -+/* -+** get3_u64_z6_2: -+** ret -+*/ -+TEST_GET (get3_u64_z6_2, svuint64x3_t, svuint64_t, -+ z6_res = svget3_u64 (z4, 2), -+ z6_res = svget3 (z4, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_u8.c -new file mode 100644 -index 000000000..497dcbbae ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get3_u8.c -@@ -0,0 +1,108 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** get3_u8_z0_0: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_GET (get3_u8_z0_0, svuint8x3_t, svuint8_t, -+ z0 = svget3_u8 (z4, 0), -+ z0 = svget3 (z4, 0)) -+ -+/* -+** get3_u8_z0_1: -+** mov z0\.d, z5\.d -+** ret -+*/ -+TEST_GET (get3_u8_z0_1, svuint8x3_t, svuint8_t, -+ z0 = svget3_u8 (z4, 1), -+ z0 = svget3 (z4, 1)) -+ -+/* -+** get3_u8_z0_2: -+** mov z0\.d, z6\.d -+** ret -+*/ -+TEST_GET (get3_u8_z0_2, svuint8x3_t, svuint8_t, -+ z0 = svget3_u8 (z4, 2), -+ z0 = svget3 (z4, 2)) -+ -+/* -+** get3_u8_z4_0: -+** ret -+*/ -+TEST_GET (get3_u8_z4_0, svuint8x3_t, svuint8_t, -+ z4_res = svget3_u8 (z4, 0), -+ z4_res = svget3 (z4, 0)) -+ -+/* -+** get3_u8_z4_1: -+** mov z4\.d, z5\.d -+** ret -+*/ -+TEST_GET (get3_u8_z4_1, svuint8x3_t, svuint8_t, -+ z4_res = svget3_u8 (z4, 1), -+ z4_res = svget3 (z4, 1)) -+ -+/* -+** get3_u8_z4_2: -+** mov z4\.d, z6\.d -+** ret -+*/ -+TEST_GET (get3_u8_z4_2, svuint8x3_t, svuint8_t, -+ z4_res = svget3_u8 (z4, 2), -+ z4_res = svget3 (z4, 2)) -+ -+/* -+** get3_u8_z5_0: -+** mov z5\.d, z4\.d -+** ret -+*/ -+TEST_GET (get3_u8_z5_0, svuint8x3_t, svuint8_t, -+ z5_res = svget3_u8 (z4, 0), -+ z5_res = svget3 (z4, 0)) -+ -+/* -+** get3_u8_z5_1: -+** ret -+*/ -+TEST_GET (get3_u8_z5_1, svuint8x3_t, svuint8_t, -+ z5_res = svget3_u8 (z4, 1), -+ z5_res = svget3 (z4, 1)) -+ -+/* -+** get3_u8_z5_2: -+** mov z5\.d, z6\.d -+** ret -+*/ -+TEST_GET (get3_u8_z5_2, svuint8x3_t, svuint8_t, -+ z5_res = svget3_u8 (z4, 2), -+ z5_res = svget3 (z4, 2)) -+ -+/* -+** get3_u8_z6_0: -+** mov z6\.d, z4\.d -+** ret -+*/ -+TEST_GET (get3_u8_z6_0, svuint8x3_t, svuint8_t, -+ z6_res = svget3_u8 (z4, 0), -+ z6_res = svget3 (z4, 0)) -+ -+/* -+** get3_u8_z6_1: -+** mov z6\.d, z5\.d -+** ret -+*/ -+TEST_GET (get3_u8_z6_1, svuint8x3_t, svuint8_t, -+ z6_res = svget3_u8 (z4, 1), -+ z6_res = svget3 (z4, 1)) -+ -+/* -+** get3_u8_z6_2: -+** ret -+*/ -+TEST_GET (get3_u8_z6_2, svuint8x3_t, svuint8_t, -+ z6_res = svget3_u8 (z4, 2), -+ z6_res = svget3 (z4, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_bf16.c -new file mode 100644 -index 000000000..f751fc147 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_bf16.c -@@ -0,0 +1,179 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** get4_bf16_z0_0: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_GET (get4_bf16_z0_0, svbfloat16x4_t, svbfloat16_t, -+ z0 = svget4_bf16 (z4, 0), -+ z0 = svget4 (z4, 0)) -+ -+/* -+** get4_bf16_z0_1: -+** mov z0\.d, z5\.d -+** ret -+*/ -+TEST_GET (get4_bf16_z0_1, svbfloat16x4_t, svbfloat16_t, -+ z0 = svget4_bf16 (z4, 1), -+ z0 = svget4 (z4, 1)) -+ -+/* -+** get4_bf16_z0_2: -+** mov z0\.d, z6\.d -+** ret -+*/ -+TEST_GET (get4_bf16_z0_2, svbfloat16x4_t, svbfloat16_t, -+ z0 = svget4_bf16 (z4, 2), -+ z0 = svget4 (z4, 2)) -+ -+/* -+** get4_bf16_z0_3: -+** mov z0\.d, z7\.d -+** ret -+*/ -+TEST_GET (get4_bf16_z0_3, svbfloat16x4_t, svbfloat16_t, -+ z0 = svget4_bf16 (z4, 3), -+ z0 = svget4 (z4, 3)) -+ -+/* -+** get4_bf16_z4_0: -+** ret -+*/ -+TEST_GET (get4_bf16_z4_0, svbfloat16x4_t, svbfloat16_t, -+ z4_res = svget4_bf16 (z4, 0), -+ z4_res = svget4 (z4, 0)) -+ -+/* -+** get4_bf16_z4_1: -+** mov z4\.d, z5\.d -+** ret -+*/ -+TEST_GET (get4_bf16_z4_1, svbfloat16x4_t, svbfloat16_t, -+ z4_res = svget4_bf16 (z4, 1), -+ z4_res = svget4 (z4, 1)) -+ -+/* -+** get4_bf16_z4_2: -+** mov z4\.d, z6\.d -+** ret -+*/ -+TEST_GET (get4_bf16_z4_2, svbfloat16x4_t, svbfloat16_t, -+ z4_res = svget4_bf16 (z4, 2), -+ z4_res = svget4 (z4, 2)) -+ -+/* -+** get4_bf16_z4_3: -+** mov z4\.d, z7\.d -+** ret -+*/ -+TEST_GET (get4_bf16_z4_3, svbfloat16x4_t, svbfloat16_t, -+ z4_res = svget4_bf16 (z4, 3), -+ z4_res = svget4 (z4, 3)) -+ -+/* -+** get4_bf16_z5_0: -+** mov z5\.d, z4\.d -+** ret -+*/ -+TEST_GET (get4_bf16_z5_0, svbfloat16x4_t, svbfloat16_t, -+ z5_res = svget4_bf16 (z4, 0), -+ z5_res = svget4 (z4, 0)) -+ -+/* -+** get4_bf16_z5_1: -+** ret -+*/ -+TEST_GET (get4_bf16_z5_1, svbfloat16x4_t, svbfloat16_t, -+ z5_res = svget4_bf16 (z4, 1), -+ z5_res = svget4 (z4, 1)) -+ -+/* -+** get4_bf16_z5_2: -+** mov z5\.d, z6\.d -+** ret -+*/ -+TEST_GET (get4_bf16_z5_2, svbfloat16x4_t, svbfloat16_t, -+ z5_res = svget4_bf16 (z4, 2), -+ z5_res = svget4 (z4, 2)) -+ -+/* -+** get4_bf16_z5_3: -+** mov z5\.d, z7\.d -+** ret -+*/ -+TEST_GET (get4_bf16_z5_3, svbfloat16x4_t, svbfloat16_t, -+ z5_res = svget4_bf16 (z4, 3), -+ z5_res = svget4 (z4, 3)) -+ -+/* -+** get4_bf16_z6_0: -+** mov z6\.d, z4\.d -+** ret -+*/ -+TEST_GET (get4_bf16_z6_0, svbfloat16x4_t, svbfloat16_t, -+ z6_res = svget4_bf16 (z4, 0), -+ z6_res = svget4 (z4, 0)) -+ -+/* -+** get4_bf16_z6_1: -+** mov z6\.d, z5\.d -+** ret -+*/ -+TEST_GET (get4_bf16_z6_1, svbfloat16x4_t, svbfloat16_t, -+ z6_res = svget4_bf16 (z4, 1), -+ z6_res = svget4 (z4, 1)) -+ -+/* -+** get4_bf16_z6_2: -+** ret -+*/ -+TEST_GET (get4_bf16_z6_2, svbfloat16x4_t, svbfloat16_t, -+ z6_res = svget4_bf16 (z4, 2), -+ z6_res = svget4 (z4, 2)) -+ -+/* -+** get4_bf16_z6_3: -+** mov z6\.d, z7\.d -+** ret -+*/ -+TEST_GET (get4_bf16_z6_3, svbfloat16x4_t, svbfloat16_t, -+ z6_res = svget4_bf16 (z4, 3), -+ z6_res = svget4 (z4, 3)) -+ -+/* -+** get4_bf16_z7_0: -+** mov z7\.d, z4\.d -+** ret -+*/ -+TEST_GET (get4_bf16_z7_0, svbfloat16x4_t, svbfloat16_t, -+ z7_res = svget4_bf16 (z4, 0), -+ z7_res = svget4 (z4, 0)) -+ -+/* -+** get4_bf16_z7_1: -+** mov z7\.d, z5\.d -+** ret -+*/ -+TEST_GET (get4_bf16_z7_1, svbfloat16x4_t, svbfloat16_t, -+ z7_res = svget4_bf16 (z4, 1), -+ z7_res = svget4 (z4, 1)) -+ -+/* -+** get4_bf16_z7_2: -+** mov z7\.d, z6\.d -+** ret -+*/ -+TEST_GET (get4_bf16_z7_2, svbfloat16x4_t, svbfloat16_t, -+ z7_res = svget4_bf16 (z4, 2), -+ z7_res = svget4 (z4, 2)) -+ -+/* -+** get4_bf16_z7_3: -+** ret -+*/ -+TEST_GET (get4_bf16_z7_3, svbfloat16x4_t, svbfloat16_t, -+ z7_res = svget4_bf16 (z4, 3), -+ z7_res = svget4 (z4, 3)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_f16.c -new file mode 100644 -index 000000000..7871f6f4e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_f16.c -@@ -0,0 +1,179 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** get4_f16_z0_0: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_GET (get4_f16_z0_0, svfloat16x4_t, svfloat16_t, -+ z0 = svget4_f16 (z4, 0), -+ z0 = svget4 (z4, 0)) -+ -+/* -+** get4_f16_z0_1: -+** mov z0\.d, z5\.d -+** ret -+*/ -+TEST_GET (get4_f16_z0_1, svfloat16x4_t, svfloat16_t, -+ z0 = svget4_f16 (z4, 1), -+ z0 = svget4 (z4, 1)) -+ -+/* -+** get4_f16_z0_2: -+** mov z0\.d, z6\.d -+** ret -+*/ -+TEST_GET (get4_f16_z0_2, svfloat16x4_t, svfloat16_t, -+ z0 = svget4_f16 (z4, 2), -+ z0 = svget4 (z4, 2)) -+ -+/* -+** get4_f16_z0_3: -+** mov z0\.d, z7\.d -+** ret -+*/ -+TEST_GET (get4_f16_z0_3, svfloat16x4_t, svfloat16_t, -+ z0 = svget4_f16 (z4, 3), -+ z0 = svget4 (z4, 3)) -+ -+/* -+** get4_f16_z4_0: -+** ret -+*/ -+TEST_GET (get4_f16_z4_0, svfloat16x4_t, svfloat16_t, -+ z4_res = svget4_f16 (z4, 0), -+ z4_res = svget4 (z4, 0)) -+ -+/* -+** get4_f16_z4_1: -+** mov z4\.d, z5\.d -+** ret -+*/ -+TEST_GET (get4_f16_z4_1, svfloat16x4_t, svfloat16_t, -+ z4_res = svget4_f16 (z4, 1), -+ z4_res = svget4 (z4, 1)) -+ -+/* -+** get4_f16_z4_2: -+** mov z4\.d, z6\.d -+** ret -+*/ -+TEST_GET (get4_f16_z4_2, svfloat16x4_t, svfloat16_t, -+ z4_res = svget4_f16 (z4, 2), -+ z4_res = svget4 (z4, 2)) -+ -+/* -+** get4_f16_z4_3: -+** mov z4\.d, z7\.d -+** ret -+*/ -+TEST_GET (get4_f16_z4_3, svfloat16x4_t, svfloat16_t, -+ z4_res = svget4_f16 (z4, 3), -+ z4_res = svget4 (z4, 3)) -+ -+/* -+** get4_f16_z5_0: -+** mov z5\.d, z4\.d -+** ret -+*/ -+TEST_GET (get4_f16_z5_0, svfloat16x4_t, svfloat16_t, -+ z5_res = svget4_f16 (z4, 0), -+ z5_res = svget4 (z4, 0)) -+ -+/* -+** get4_f16_z5_1: -+** ret -+*/ -+TEST_GET (get4_f16_z5_1, svfloat16x4_t, svfloat16_t, -+ z5_res = svget4_f16 (z4, 1), -+ z5_res = svget4 (z4, 1)) -+ -+/* -+** get4_f16_z5_2: -+** mov z5\.d, z6\.d -+** ret -+*/ -+TEST_GET (get4_f16_z5_2, svfloat16x4_t, svfloat16_t, -+ z5_res = svget4_f16 (z4, 2), -+ z5_res = svget4 (z4, 2)) -+ -+/* -+** get4_f16_z5_3: -+** mov z5\.d, z7\.d -+** ret -+*/ -+TEST_GET (get4_f16_z5_3, svfloat16x4_t, svfloat16_t, -+ z5_res = svget4_f16 (z4, 3), -+ z5_res = svget4 (z4, 3)) -+ -+/* -+** get4_f16_z6_0: -+** mov z6\.d, z4\.d -+** ret -+*/ -+TEST_GET (get4_f16_z6_0, svfloat16x4_t, svfloat16_t, -+ z6_res = svget4_f16 (z4, 0), -+ z6_res = svget4 (z4, 0)) -+ -+/* -+** get4_f16_z6_1: -+** mov z6\.d, z5\.d -+** ret -+*/ -+TEST_GET (get4_f16_z6_1, svfloat16x4_t, svfloat16_t, -+ z6_res = svget4_f16 (z4, 1), -+ z6_res = svget4 (z4, 1)) -+ -+/* -+** get4_f16_z6_2: -+** ret -+*/ -+TEST_GET (get4_f16_z6_2, svfloat16x4_t, svfloat16_t, -+ z6_res = svget4_f16 (z4, 2), -+ z6_res = svget4 (z4, 2)) -+ -+/* -+** get4_f16_z6_3: -+** mov z6\.d, z7\.d -+** ret -+*/ -+TEST_GET (get4_f16_z6_3, svfloat16x4_t, svfloat16_t, -+ z6_res = svget4_f16 (z4, 3), -+ z6_res = svget4 (z4, 3)) -+ -+/* -+** get4_f16_z7_0: -+** mov z7\.d, z4\.d -+** ret -+*/ -+TEST_GET (get4_f16_z7_0, svfloat16x4_t, svfloat16_t, -+ z7_res = svget4_f16 (z4, 0), -+ z7_res = svget4 (z4, 0)) -+ -+/* -+** get4_f16_z7_1: -+** mov z7\.d, z5\.d -+** ret -+*/ -+TEST_GET (get4_f16_z7_1, svfloat16x4_t, svfloat16_t, -+ z7_res = svget4_f16 (z4, 1), -+ z7_res = svget4 (z4, 1)) -+ -+/* -+** get4_f16_z7_2: -+** mov z7\.d, z6\.d -+** ret -+*/ -+TEST_GET (get4_f16_z7_2, svfloat16x4_t, svfloat16_t, -+ z7_res = svget4_f16 (z4, 2), -+ z7_res = svget4 (z4, 2)) -+ -+/* -+** get4_f16_z7_3: -+** ret -+*/ -+TEST_GET (get4_f16_z7_3, svfloat16x4_t, svfloat16_t, -+ z7_res = svget4_f16 (z4, 3), -+ z7_res = svget4 (z4, 3)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_f32.c -new file mode 100644 -index 000000000..a290e026d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_f32.c -@@ -0,0 +1,179 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** get4_f32_z0_0: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_GET (get4_f32_z0_0, svfloat32x4_t, svfloat32_t, -+ z0 = svget4_f32 (z4, 0), -+ z0 = svget4 (z4, 0)) -+ -+/* -+** get4_f32_z0_1: -+** mov z0\.d, z5\.d -+** ret -+*/ -+TEST_GET (get4_f32_z0_1, svfloat32x4_t, svfloat32_t, -+ z0 = svget4_f32 (z4, 1), -+ z0 = svget4 (z4, 1)) -+ -+/* -+** get4_f32_z0_2: -+** mov z0\.d, z6\.d -+** ret -+*/ -+TEST_GET (get4_f32_z0_2, svfloat32x4_t, svfloat32_t, -+ z0 = svget4_f32 (z4, 2), -+ z0 = svget4 (z4, 2)) -+ -+/* -+** get4_f32_z0_3: -+** mov z0\.d, z7\.d -+** ret -+*/ -+TEST_GET (get4_f32_z0_3, svfloat32x4_t, svfloat32_t, -+ z0 = svget4_f32 (z4, 3), -+ z0 = svget4 (z4, 3)) -+ -+/* -+** get4_f32_z4_0: -+** ret -+*/ -+TEST_GET (get4_f32_z4_0, svfloat32x4_t, svfloat32_t, -+ z4_res = svget4_f32 (z4, 0), -+ z4_res = svget4 (z4, 0)) -+ -+/* -+** get4_f32_z4_1: -+** mov z4\.d, z5\.d -+** ret -+*/ -+TEST_GET (get4_f32_z4_1, svfloat32x4_t, svfloat32_t, -+ z4_res = svget4_f32 (z4, 1), -+ z4_res = svget4 (z4, 1)) -+ -+/* -+** get4_f32_z4_2: -+** mov z4\.d, z6\.d -+** ret -+*/ -+TEST_GET (get4_f32_z4_2, svfloat32x4_t, svfloat32_t, -+ z4_res = svget4_f32 (z4, 2), -+ z4_res = svget4 (z4, 2)) -+ -+/* -+** get4_f32_z4_3: -+** mov z4\.d, z7\.d -+** ret -+*/ -+TEST_GET (get4_f32_z4_3, svfloat32x4_t, svfloat32_t, -+ z4_res = svget4_f32 (z4, 3), -+ z4_res = svget4 (z4, 3)) -+ -+/* -+** get4_f32_z5_0: -+** mov z5\.d, z4\.d -+** ret -+*/ -+TEST_GET (get4_f32_z5_0, svfloat32x4_t, svfloat32_t, -+ z5_res = svget4_f32 (z4, 0), -+ z5_res = svget4 (z4, 0)) -+ -+/* -+** get4_f32_z5_1: -+** ret -+*/ -+TEST_GET (get4_f32_z5_1, svfloat32x4_t, svfloat32_t, -+ z5_res = svget4_f32 (z4, 1), -+ z5_res = svget4 (z4, 1)) -+ -+/* -+** get4_f32_z5_2: -+** mov z5\.d, z6\.d -+** ret -+*/ -+TEST_GET (get4_f32_z5_2, svfloat32x4_t, svfloat32_t, -+ z5_res = svget4_f32 (z4, 2), -+ z5_res = svget4 (z4, 2)) -+ -+/* -+** get4_f32_z5_3: -+** mov z5\.d, z7\.d -+** ret -+*/ -+TEST_GET (get4_f32_z5_3, svfloat32x4_t, svfloat32_t, -+ z5_res = svget4_f32 (z4, 3), -+ z5_res = svget4 (z4, 3)) -+ -+/* -+** get4_f32_z6_0: -+** mov z6\.d, z4\.d -+** ret -+*/ -+TEST_GET (get4_f32_z6_0, svfloat32x4_t, svfloat32_t, -+ z6_res = svget4_f32 (z4, 0), -+ z6_res = svget4 (z4, 0)) -+ -+/* -+** get4_f32_z6_1: -+** mov z6\.d, z5\.d -+** ret -+*/ -+TEST_GET (get4_f32_z6_1, svfloat32x4_t, svfloat32_t, -+ z6_res = svget4_f32 (z4, 1), -+ z6_res = svget4 (z4, 1)) -+ -+/* -+** get4_f32_z6_2: -+** ret -+*/ -+TEST_GET (get4_f32_z6_2, svfloat32x4_t, svfloat32_t, -+ z6_res = svget4_f32 (z4, 2), -+ z6_res = svget4 (z4, 2)) -+ -+/* -+** get4_f32_z6_3: -+** mov z6\.d, z7\.d -+** ret -+*/ -+TEST_GET (get4_f32_z6_3, svfloat32x4_t, svfloat32_t, -+ z6_res = svget4_f32 (z4, 3), -+ z6_res = svget4 (z4, 3)) -+ -+/* -+** get4_f32_z7_0: -+** mov z7\.d, z4\.d -+** ret -+*/ -+TEST_GET (get4_f32_z7_0, svfloat32x4_t, svfloat32_t, -+ z7_res = svget4_f32 (z4, 0), -+ z7_res = svget4 (z4, 0)) -+ -+/* -+** get4_f32_z7_1: -+** mov z7\.d, z5\.d -+** ret -+*/ -+TEST_GET (get4_f32_z7_1, svfloat32x4_t, svfloat32_t, -+ z7_res = svget4_f32 (z4, 1), -+ z7_res = svget4 (z4, 1)) -+ -+/* -+** get4_f32_z7_2: -+** mov z7\.d, z6\.d -+** ret -+*/ -+TEST_GET (get4_f32_z7_2, svfloat32x4_t, svfloat32_t, -+ z7_res = svget4_f32 (z4, 2), -+ z7_res = svget4 (z4, 2)) -+ -+/* -+** get4_f32_z7_3: -+** ret -+*/ -+TEST_GET (get4_f32_z7_3, svfloat32x4_t, svfloat32_t, -+ z7_res = svget4_f32 (z4, 3), -+ z7_res = svget4 (z4, 3)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_f64.c -new file mode 100644 -index 000000000..2c34dfef1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_f64.c -@@ -0,0 +1,179 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** get4_f64_z0_0: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_GET (get4_f64_z0_0, svfloat64x4_t, svfloat64_t, -+ z0 = svget4_f64 (z4, 0), -+ z0 = svget4 (z4, 0)) -+ -+/* -+** get4_f64_z0_1: -+** mov z0\.d, z5\.d -+** ret -+*/ -+TEST_GET (get4_f64_z0_1, svfloat64x4_t, svfloat64_t, -+ z0 = svget4_f64 (z4, 1), -+ z0 = svget4 (z4, 1)) -+ -+/* -+** get4_f64_z0_2: -+** mov z0\.d, z6\.d -+** ret -+*/ -+TEST_GET (get4_f64_z0_2, svfloat64x4_t, svfloat64_t, -+ z0 = svget4_f64 (z4, 2), -+ z0 = svget4 (z4, 2)) -+ -+/* -+** get4_f64_z0_3: -+** mov z0\.d, z7\.d -+** ret -+*/ -+TEST_GET (get4_f64_z0_3, svfloat64x4_t, svfloat64_t, -+ z0 = svget4_f64 (z4, 3), -+ z0 = svget4 (z4, 3)) -+ -+/* -+** get4_f64_z4_0: -+** ret -+*/ -+TEST_GET (get4_f64_z4_0, svfloat64x4_t, svfloat64_t, -+ z4_res = svget4_f64 (z4, 0), -+ z4_res = svget4 (z4, 0)) -+ -+/* -+** get4_f64_z4_1: -+** mov z4\.d, z5\.d -+** ret -+*/ -+TEST_GET (get4_f64_z4_1, svfloat64x4_t, svfloat64_t, -+ z4_res = svget4_f64 (z4, 1), -+ z4_res = svget4 (z4, 1)) -+ -+/* -+** get4_f64_z4_2: -+** mov z4\.d, z6\.d -+** ret -+*/ -+TEST_GET (get4_f64_z4_2, svfloat64x4_t, svfloat64_t, -+ z4_res = svget4_f64 (z4, 2), -+ z4_res = svget4 (z4, 2)) -+ -+/* -+** get4_f64_z4_3: -+** mov z4\.d, z7\.d -+** ret -+*/ -+TEST_GET (get4_f64_z4_3, svfloat64x4_t, svfloat64_t, -+ z4_res = svget4_f64 (z4, 3), -+ z4_res = svget4 (z4, 3)) -+ -+/* -+** get4_f64_z5_0: -+** mov z5\.d, z4\.d -+** ret -+*/ -+TEST_GET (get4_f64_z5_0, svfloat64x4_t, svfloat64_t, -+ z5_res = svget4_f64 (z4, 0), -+ z5_res = svget4 (z4, 0)) -+ -+/* -+** get4_f64_z5_1: -+** ret -+*/ -+TEST_GET (get4_f64_z5_1, svfloat64x4_t, svfloat64_t, -+ z5_res = svget4_f64 (z4, 1), -+ z5_res = svget4 (z4, 1)) -+ -+/* -+** get4_f64_z5_2: -+** mov z5\.d, z6\.d -+** ret -+*/ -+TEST_GET (get4_f64_z5_2, svfloat64x4_t, svfloat64_t, -+ z5_res = svget4_f64 (z4, 2), -+ z5_res = svget4 (z4, 2)) -+ -+/* -+** get4_f64_z5_3: -+** mov z5\.d, z7\.d -+** ret -+*/ -+TEST_GET (get4_f64_z5_3, svfloat64x4_t, svfloat64_t, -+ z5_res = svget4_f64 (z4, 3), -+ z5_res = svget4 (z4, 3)) -+ -+/* -+** get4_f64_z6_0: -+** mov z6\.d, z4\.d -+** ret -+*/ -+TEST_GET (get4_f64_z6_0, svfloat64x4_t, svfloat64_t, -+ z6_res = svget4_f64 (z4, 0), -+ z6_res = svget4 (z4, 0)) -+ -+/* -+** get4_f64_z6_1: -+** mov z6\.d, z5\.d -+** ret -+*/ -+TEST_GET (get4_f64_z6_1, svfloat64x4_t, svfloat64_t, -+ z6_res = svget4_f64 (z4, 1), -+ z6_res = svget4 (z4, 1)) -+ -+/* -+** get4_f64_z6_2: -+** ret -+*/ -+TEST_GET (get4_f64_z6_2, svfloat64x4_t, svfloat64_t, -+ z6_res = svget4_f64 (z4, 2), -+ z6_res = svget4 (z4, 2)) -+ -+/* -+** get4_f64_z6_3: -+** mov z6\.d, z7\.d -+** ret -+*/ -+TEST_GET (get4_f64_z6_3, svfloat64x4_t, svfloat64_t, -+ z6_res = svget4_f64 (z4, 3), -+ z6_res = svget4 (z4, 3)) -+ -+/* -+** get4_f64_z7_0: -+** mov z7\.d, z4\.d -+** ret -+*/ -+TEST_GET (get4_f64_z7_0, svfloat64x4_t, svfloat64_t, -+ z7_res = svget4_f64 (z4, 0), -+ z7_res = svget4 (z4, 0)) -+ -+/* -+** get4_f64_z7_1: -+** mov z7\.d, z5\.d -+** ret -+*/ -+TEST_GET (get4_f64_z7_1, svfloat64x4_t, svfloat64_t, -+ z7_res = svget4_f64 (z4, 1), -+ z7_res = svget4 (z4, 1)) -+ -+/* -+** get4_f64_z7_2: -+** mov z7\.d, z6\.d -+** ret -+*/ -+TEST_GET (get4_f64_z7_2, svfloat64x4_t, svfloat64_t, -+ z7_res = svget4_f64 (z4, 2), -+ z7_res = svget4 (z4, 2)) -+ -+/* -+** get4_f64_z7_3: -+** ret -+*/ -+TEST_GET (get4_f64_z7_3, svfloat64x4_t, svfloat64_t, -+ z7_res = svget4_f64 (z4, 3), -+ z7_res = svget4 (z4, 3)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_s16.c -new file mode 100644 -index 000000000..6a2280fea ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_s16.c -@@ -0,0 +1,179 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** get4_s16_z0_0: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_GET (get4_s16_z0_0, svint16x4_t, svint16_t, -+ z0 = svget4_s16 (z4, 0), -+ z0 = svget4 (z4, 0)) -+ -+/* -+** get4_s16_z0_1: -+** mov z0\.d, z5\.d -+** ret -+*/ -+TEST_GET (get4_s16_z0_1, svint16x4_t, svint16_t, -+ z0 = svget4_s16 (z4, 1), -+ z0 = svget4 (z4, 1)) -+ -+/* -+** get4_s16_z0_2: -+** mov z0\.d, z6\.d -+** ret -+*/ -+TEST_GET (get4_s16_z0_2, svint16x4_t, svint16_t, -+ z0 = svget4_s16 (z4, 2), -+ z0 = svget4 (z4, 2)) -+ -+/* -+** get4_s16_z0_3: -+** mov z0\.d, z7\.d -+** ret -+*/ -+TEST_GET (get4_s16_z0_3, svint16x4_t, svint16_t, -+ z0 = svget4_s16 (z4, 3), -+ z0 = svget4 (z4, 3)) -+ -+/* -+** get4_s16_z4_0: -+** ret -+*/ -+TEST_GET (get4_s16_z4_0, svint16x4_t, svint16_t, -+ z4_res = svget4_s16 (z4, 0), -+ z4_res = svget4 (z4, 0)) -+ -+/* -+** get4_s16_z4_1: -+** mov z4\.d, z5\.d -+** ret -+*/ -+TEST_GET (get4_s16_z4_1, svint16x4_t, svint16_t, -+ z4_res = svget4_s16 (z4, 1), -+ z4_res = svget4 (z4, 1)) -+ -+/* -+** get4_s16_z4_2: -+** mov z4\.d, z6\.d -+** ret -+*/ -+TEST_GET (get4_s16_z4_2, svint16x4_t, svint16_t, -+ z4_res = svget4_s16 (z4, 2), -+ z4_res = svget4 (z4, 2)) -+ -+/* -+** get4_s16_z4_3: -+** mov z4\.d, z7\.d -+** ret -+*/ -+TEST_GET (get4_s16_z4_3, svint16x4_t, svint16_t, -+ z4_res = svget4_s16 (z4, 3), -+ z4_res = svget4 (z4, 3)) -+ -+/* -+** get4_s16_z5_0: -+** mov z5\.d, z4\.d -+** ret -+*/ -+TEST_GET (get4_s16_z5_0, svint16x4_t, svint16_t, -+ z5_res = svget4_s16 (z4, 0), -+ z5_res = svget4 (z4, 0)) -+ -+/* -+** get4_s16_z5_1: -+** ret -+*/ -+TEST_GET (get4_s16_z5_1, svint16x4_t, svint16_t, -+ z5_res = svget4_s16 (z4, 1), -+ z5_res = svget4 (z4, 1)) -+ -+/* -+** get4_s16_z5_2: -+** mov z5\.d, z6\.d -+** ret -+*/ -+TEST_GET (get4_s16_z5_2, svint16x4_t, svint16_t, -+ z5_res = svget4_s16 (z4, 2), -+ z5_res = svget4 (z4, 2)) -+ -+/* -+** get4_s16_z5_3: -+** mov z5\.d, z7\.d -+** ret -+*/ -+TEST_GET (get4_s16_z5_3, svint16x4_t, svint16_t, -+ z5_res = svget4_s16 (z4, 3), -+ z5_res = svget4 (z4, 3)) -+ -+/* -+** get4_s16_z6_0: -+** mov z6\.d, z4\.d -+** ret -+*/ -+TEST_GET (get4_s16_z6_0, svint16x4_t, svint16_t, -+ z6_res = svget4_s16 (z4, 0), -+ z6_res = svget4 (z4, 0)) -+ -+/* -+** get4_s16_z6_1: -+** mov z6\.d, z5\.d -+** ret -+*/ -+TEST_GET (get4_s16_z6_1, svint16x4_t, svint16_t, -+ z6_res = svget4_s16 (z4, 1), -+ z6_res = svget4 (z4, 1)) -+ -+/* -+** get4_s16_z6_2: -+** ret -+*/ -+TEST_GET (get4_s16_z6_2, svint16x4_t, svint16_t, -+ z6_res = svget4_s16 (z4, 2), -+ z6_res = svget4 (z4, 2)) -+ -+/* -+** get4_s16_z6_3: -+** mov z6\.d, z7\.d -+** ret -+*/ -+TEST_GET (get4_s16_z6_3, svint16x4_t, svint16_t, -+ z6_res = svget4_s16 (z4, 3), -+ z6_res = svget4 (z4, 3)) -+ -+/* -+** get4_s16_z7_0: -+** mov z7\.d, z4\.d -+** ret -+*/ -+TEST_GET (get4_s16_z7_0, svint16x4_t, svint16_t, -+ z7_res = svget4_s16 (z4, 0), -+ z7_res = svget4 (z4, 0)) -+ -+/* -+** get4_s16_z7_1: -+** mov z7\.d, z5\.d -+** ret -+*/ -+TEST_GET (get4_s16_z7_1, svint16x4_t, svint16_t, -+ z7_res = svget4_s16 (z4, 1), -+ z7_res = svget4 (z4, 1)) -+ -+/* -+** get4_s16_z7_2: -+** mov z7\.d, z6\.d -+** ret -+*/ -+TEST_GET (get4_s16_z7_2, svint16x4_t, svint16_t, -+ z7_res = svget4_s16 (z4, 2), -+ z7_res = svget4 (z4, 2)) -+ -+/* -+** get4_s16_z7_3: -+** ret -+*/ -+TEST_GET (get4_s16_z7_3, svint16x4_t, svint16_t, -+ z7_res = svget4_s16 (z4, 3), -+ z7_res = svget4 (z4, 3)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_s32.c -new file mode 100644 -index 000000000..41aca09d9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_s32.c -@@ -0,0 +1,179 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** get4_s32_z0_0: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_GET (get4_s32_z0_0, svint32x4_t, svint32_t, -+ z0 = svget4_s32 (z4, 0), -+ z0 = svget4 (z4, 0)) -+ -+/* -+** get4_s32_z0_1: -+** mov z0\.d, z5\.d -+** ret -+*/ -+TEST_GET (get4_s32_z0_1, svint32x4_t, svint32_t, -+ z0 = svget4_s32 (z4, 1), -+ z0 = svget4 (z4, 1)) -+ -+/* -+** get4_s32_z0_2: -+** mov z0\.d, z6\.d -+** ret -+*/ -+TEST_GET (get4_s32_z0_2, svint32x4_t, svint32_t, -+ z0 = svget4_s32 (z4, 2), -+ z0 = svget4 (z4, 2)) -+ -+/* -+** get4_s32_z0_3: -+** mov z0\.d, z7\.d -+** ret -+*/ -+TEST_GET (get4_s32_z0_3, svint32x4_t, svint32_t, -+ z0 = svget4_s32 (z4, 3), -+ z0 = svget4 (z4, 3)) -+ -+/* -+** get4_s32_z4_0: -+** ret -+*/ -+TEST_GET (get4_s32_z4_0, svint32x4_t, svint32_t, -+ z4_res = svget4_s32 (z4, 0), -+ z4_res = svget4 (z4, 0)) -+ -+/* -+** get4_s32_z4_1: -+** mov z4\.d, z5\.d -+** ret -+*/ -+TEST_GET (get4_s32_z4_1, svint32x4_t, svint32_t, -+ z4_res = svget4_s32 (z4, 1), -+ z4_res = svget4 (z4, 1)) -+ -+/* -+** get4_s32_z4_2: -+** mov z4\.d, z6\.d -+** ret -+*/ -+TEST_GET (get4_s32_z4_2, svint32x4_t, svint32_t, -+ z4_res = svget4_s32 (z4, 2), -+ z4_res = svget4 (z4, 2)) -+ -+/* -+** get4_s32_z4_3: -+** mov z4\.d, z7\.d -+** ret -+*/ -+TEST_GET (get4_s32_z4_3, svint32x4_t, svint32_t, -+ z4_res = svget4_s32 (z4, 3), -+ z4_res = svget4 (z4, 3)) -+ -+/* -+** get4_s32_z5_0: -+** mov z5\.d, z4\.d -+** ret -+*/ -+TEST_GET (get4_s32_z5_0, svint32x4_t, svint32_t, -+ z5_res = svget4_s32 (z4, 0), -+ z5_res = svget4 (z4, 0)) -+ -+/* -+** get4_s32_z5_1: -+** ret -+*/ -+TEST_GET (get4_s32_z5_1, svint32x4_t, svint32_t, -+ z5_res = svget4_s32 (z4, 1), -+ z5_res = svget4 (z4, 1)) -+ -+/* -+** get4_s32_z5_2: -+** mov z5\.d, z6\.d -+** ret -+*/ -+TEST_GET (get4_s32_z5_2, svint32x4_t, svint32_t, -+ z5_res = svget4_s32 (z4, 2), -+ z5_res = svget4 (z4, 2)) -+ -+/* -+** get4_s32_z5_3: -+** mov z5\.d, z7\.d -+** ret -+*/ -+TEST_GET (get4_s32_z5_3, svint32x4_t, svint32_t, -+ z5_res = svget4_s32 (z4, 3), -+ z5_res = svget4 (z4, 3)) -+ -+/* -+** get4_s32_z6_0: -+** mov z6\.d, z4\.d -+** ret -+*/ -+TEST_GET (get4_s32_z6_0, svint32x4_t, svint32_t, -+ z6_res = svget4_s32 (z4, 0), -+ z6_res = svget4 (z4, 0)) -+ -+/* -+** get4_s32_z6_1: -+** mov z6\.d, z5\.d -+** ret -+*/ -+TEST_GET (get4_s32_z6_1, svint32x4_t, svint32_t, -+ z6_res = svget4_s32 (z4, 1), -+ z6_res = svget4 (z4, 1)) -+ -+/* -+** get4_s32_z6_2: -+** ret -+*/ -+TEST_GET (get4_s32_z6_2, svint32x4_t, svint32_t, -+ z6_res = svget4_s32 (z4, 2), -+ z6_res = svget4 (z4, 2)) -+ -+/* -+** get4_s32_z6_3: -+** mov z6\.d, z7\.d -+** ret -+*/ -+TEST_GET (get4_s32_z6_3, svint32x4_t, svint32_t, -+ z6_res = svget4_s32 (z4, 3), -+ z6_res = svget4 (z4, 3)) -+ -+/* -+** get4_s32_z7_0: -+** mov z7\.d, z4\.d -+** ret -+*/ -+TEST_GET (get4_s32_z7_0, svint32x4_t, svint32_t, -+ z7_res = svget4_s32 (z4, 0), -+ z7_res = svget4 (z4, 0)) -+ -+/* -+** get4_s32_z7_1: -+** mov z7\.d, z5\.d -+** ret -+*/ -+TEST_GET (get4_s32_z7_1, svint32x4_t, svint32_t, -+ z7_res = svget4_s32 (z4, 1), -+ z7_res = svget4 (z4, 1)) -+ -+/* -+** get4_s32_z7_2: -+** mov z7\.d, z6\.d -+** ret -+*/ -+TEST_GET (get4_s32_z7_2, svint32x4_t, svint32_t, -+ z7_res = svget4_s32 (z4, 2), -+ z7_res = svget4 (z4, 2)) -+ -+/* -+** get4_s32_z7_3: -+** ret -+*/ -+TEST_GET (get4_s32_z7_3, svint32x4_t, svint32_t, -+ z7_res = svget4_s32 (z4, 3), -+ z7_res = svget4 (z4, 3)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_s64.c -new file mode 100644 -index 000000000..a17e2779c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_s64.c -@@ -0,0 +1,179 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** get4_s64_z0_0: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_GET (get4_s64_z0_0, svint64x4_t, svint64_t, -+ z0 = svget4_s64 (z4, 0), -+ z0 = svget4 (z4, 0)) -+ -+/* -+** get4_s64_z0_1: -+** mov z0\.d, z5\.d -+** ret -+*/ -+TEST_GET (get4_s64_z0_1, svint64x4_t, svint64_t, -+ z0 = svget4_s64 (z4, 1), -+ z0 = svget4 (z4, 1)) -+ -+/* -+** get4_s64_z0_2: -+** mov z0\.d, z6\.d -+** ret -+*/ -+TEST_GET (get4_s64_z0_2, svint64x4_t, svint64_t, -+ z0 = svget4_s64 (z4, 2), -+ z0 = svget4 (z4, 2)) -+ -+/* -+** get4_s64_z0_3: -+** mov z0\.d, z7\.d -+** ret -+*/ -+TEST_GET (get4_s64_z0_3, svint64x4_t, svint64_t, -+ z0 = svget4_s64 (z4, 3), -+ z0 = svget4 (z4, 3)) -+ -+/* -+** get4_s64_z4_0: -+** ret -+*/ -+TEST_GET (get4_s64_z4_0, svint64x4_t, svint64_t, -+ z4_res = svget4_s64 (z4, 0), -+ z4_res = svget4 (z4, 0)) -+ -+/* -+** get4_s64_z4_1: -+** mov z4\.d, z5\.d -+** ret -+*/ -+TEST_GET (get4_s64_z4_1, svint64x4_t, svint64_t, -+ z4_res = svget4_s64 (z4, 1), -+ z4_res = svget4 (z4, 1)) -+ -+/* -+** get4_s64_z4_2: -+** mov z4\.d, z6\.d -+** ret -+*/ -+TEST_GET (get4_s64_z4_2, svint64x4_t, svint64_t, -+ z4_res = svget4_s64 (z4, 2), -+ z4_res = svget4 (z4, 2)) -+ -+/* -+** get4_s64_z4_3: -+** mov z4\.d, z7\.d -+** ret -+*/ -+TEST_GET (get4_s64_z4_3, svint64x4_t, svint64_t, -+ z4_res = svget4_s64 (z4, 3), -+ z4_res = svget4 (z4, 3)) -+ -+/* -+** get4_s64_z5_0: -+** mov z5\.d, z4\.d -+** ret -+*/ -+TEST_GET (get4_s64_z5_0, svint64x4_t, svint64_t, -+ z5_res = svget4_s64 (z4, 0), -+ z5_res = svget4 (z4, 0)) -+ -+/* -+** get4_s64_z5_1: -+** ret -+*/ -+TEST_GET (get4_s64_z5_1, svint64x4_t, svint64_t, -+ z5_res = svget4_s64 (z4, 1), -+ z5_res = svget4 (z4, 1)) -+ -+/* -+** get4_s64_z5_2: -+** mov z5\.d, z6\.d -+** ret -+*/ -+TEST_GET (get4_s64_z5_2, svint64x4_t, svint64_t, -+ z5_res = svget4_s64 (z4, 2), -+ z5_res = svget4 (z4, 2)) -+ -+/* -+** get4_s64_z5_3: -+** mov z5\.d, z7\.d -+** ret -+*/ -+TEST_GET (get4_s64_z5_3, svint64x4_t, svint64_t, -+ z5_res = svget4_s64 (z4, 3), -+ z5_res = svget4 (z4, 3)) -+ -+/* -+** get4_s64_z6_0: -+** mov z6\.d, z4\.d -+** ret -+*/ -+TEST_GET (get4_s64_z6_0, svint64x4_t, svint64_t, -+ z6_res = svget4_s64 (z4, 0), -+ z6_res = svget4 (z4, 0)) -+ -+/* -+** get4_s64_z6_1: -+** mov z6\.d, z5\.d -+** ret -+*/ -+TEST_GET (get4_s64_z6_1, svint64x4_t, svint64_t, -+ z6_res = svget4_s64 (z4, 1), -+ z6_res = svget4 (z4, 1)) -+ -+/* -+** get4_s64_z6_2: -+** ret -+*/ -+TEST_GET (get4_s64_z6_2, svint64x4_t, svint64_t, -+ z6_res = svget4_s64 (z4, 2), -+ z6_res = svget4 (z4, 2)) -+ -+/* -+** get4_s64_z6_3: -+** mov z6\.d, z7\.d -+** ret -+*/ -+TEST_GET (get4_s64_z6_3, svint64x4_t, svint64_t, -+ z6_res = svget4_s64 (z4, 3), -+ z6_res = svget4 (z4, 3)) -+ -+/* -+** get4_s64_z7_0: -+** mov z7\.d, z4\.d -+** ret -+*/ -+TEST_GET (get4_s64_z7_0, svint64x4_t, svint64_t, -+ z7_res = svget4_s64 (z4, 0), -+ z7_res = svget4 (z4, 0)) -+ -+/* -+** get4_s64_z7_1: -+** mov z7\.d, z5\.d -+** ret -+*/ -+TEST_GET (get4_s64_z7_1, svint64x4_t, svint64_t, -+ z7_res = svget4_s64 (z4, 1), -+ z7_res = svget4 (z4, 1)) -+ -+/* -+** get4_s64_z7_2: -+** mov z7\.d, z6\.d -+** ret -+*/ -+TEST_GET (get4_s64_z7_2, svint64x4_t, svint64_t, -+ z7_res = svget4_s64 (z4, 2), -+ z7_res = svget4 (z4, 2)) -+ -+/* -+** get4_s64_z7_3: -+** ret -+*/ -+TEST_GET (get4_s64_z7_3, svint64x4_t, svint64_t, -+ z7_res = svget4_s64 (z4, 3), -+ z7_res = svget4 (z4, 3)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_s8.c -new file mode 100644 -index 000000000..9fa159597 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_s8.c -@@ -0,0 +1,179 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** get4_s8_z0_0: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_GET (get4_s8_z0_0, svint8x4_t, svint8_t, -+ z0 = svget4_s8 (z4, 0), -+ z0 = svget4 (z4, 0)) -+ -+/* -+** get4_s8_z0_1: -+** mov z0\.d, z5\.d -+** ret -+*/ -+TEST_GET (get4_s8_z0_1, svint8x4_t, svint8_t, -+ z0 = svget4_s8 (z4, 1), -+ z0 = svget4 (z4, 1)) -+ -+/* -+** get4_s8_z0_2: -+** mov z0\.d, z6\.d -+** ret -+*/ -+TEST_GET (get4_s8_z0_2, svint8x4_t, svint8_t, -+ z0 = svget4_s8 (z4, 2), -+ z0 = svget4 (z4, 2)) -+ -+/* -+** get4_s8_z0_3: -+** mov z0\.d, z7\.d -+** ret -+*/ -+TEST_GET (get4_s8_z0_3, svint8x4_t, svint8_t, -+ z0 = svget4_s8 (z4, 3), -+ z0 = svget4 (z4, 3)) -+ -+/* -+** get4_s8_z4_0: -+** ret -+*/ -+TEST_GET (get4_s8_z4_0, svint8x4_t, svint8_t, -+ z4_res = svget4_s8 (z4, 0), -+ z4_res = svget4 (z4, 0)) -+ -+/* -+** get4_s8_z4_1: -+** mov z4\.d, z5\.d -+** ret -+*/ -+TEST_GET (get4_s8_z4_1, svint8x4_t, svint8_t, -+ z4_res = svget4_s8 (z4, 1), -+ z4_res = svget4 (z4, 1)) -+ -+/* -+** get4_s8_z4_2: -+** mov z4\.d, z6\.d -+** ret -+*/ -+TEST_GET (get4_s8_z4_2, svint8x4_t, svint8_t, -+ z4_res = svget4_s8 (z4, 2), -+ z4_res = svget4 (z4, 2)) -+ -+/* -+** get4_s8_z4_3: -+** mov z4\.d, z7\.d -+** ret -+*/ -+TEST_GET (get4_s8_z4_3, svint8x4_t, svint8_t, -+ z4_res = svget4_s8 (z4, 3), -+ z4_res = svget4 (z4, 3)) -+ -+/* -+** get4_s8_z5_0: -+** mov z5\.d, z4\.d -+** ret -+*/ -+TEST_GET (get4_s8_z5_0, svint8x4_t, svint8_t, -+ z5_res = svget4_s8 (z4, 0), -+ z5_res = svget4 (z4, 0)) -+ -+/* -+** get4_s8_z5_1: -+** ret -+*/ -+TEST_GET (get4_s8_z5_1, svint8x4_t, svint8_t, -+ z5_res = svget4_s8 (z4, 1), -+ z5_res = svget4 (z4, 1)) -+ -+/* -+** get4_s8_z5_2: -+** mov z5\.d, z6\.d -+** ret -+*/ -+TEST_GET (get4_s8_z5_2, svint8x4_t, svint8_t, -+ z5_res = svget4_s8 (z4, 2), -+ z5_res = svget4 (z4, 2)) -+ -+/* -+** get4_s8_z5_3: -+** mov z5\.d, z7\.d -+** ret -+*/ -+TEST_GET (get4_s8_z5_3, svint8x4_t, svint8_t, -+ z5_res = svget4_s8 (z4, 3), -+ z5_res = svget4 (z4, 3)) -+ -+/* -+** get4_s8_z6_0: -+** mov z6\.d, z4\.d -+** ret -+*/ -+TEST_GET (get4_s8_z6_0, svint8x4_t, svint8_t, -+ z6_res = svget4_s8 (z4, 0), -+ z6_res = svget4 (z4, 0)) -+ -+/* -+** get4_s8_z6_1: -+** mov z6\.d, z5\.d -+** ret -+*/ -+TEST_GET (get4_s8_z6_1, svint8x4_t, svint8_t, -+ z6_res = svget4_s8 (z4, 1), -+ z6_res = svget4 (z4, 1)) -+ -+/* -+** get4_s8_z6_2: -+** ret -+*/ -+TEST_GET (get4_s8_z6_2, svint8x4_t, svint8_t, -+ z6_res = svget4_s8 (z4, 2), -+ z6_res = svget4 (z4, 2)) -+ -+/* -+** get4_s8_z6_3: -+** mov z6\.d, z7\.d -+** ret -+*/ -+TEST_GET (get4_s8_z6_3, svint8x4_t, svint8_t, -+ z6_res = svget4_s8 (z4, 3), -+ z6_res = svget4 (z4, 3)) -+ -+/* -+** get4_s8_z7_0: -+** mov z7\.d, z4\.d -+** ret -+*/ -+TEST_GET (get4_s8_z7_0, svint8x4_t, svint8_t, -+ z7_res = svget4_s8 (z4, 0), -+ z7_res = svget4 (z4, 0)) -+ -+/* -+** get4_s8_z7_1: -+** mov z7\.d, z5\.d -+** ret -+*/ -+TEST_GET (get4_s8_z7_1, svint8x4_t, svint8_t, -+ z7_res = svget4_s8 (z4, 1), -+ z7_res = svget4 (z4, 1)) -+ -+/* -+** get4_s8_z7_2: -+** mov z7\.d, z6\.d -+** ret -+*/ -+TEST_GET (get4_s8_z7_2, svint8x4_t, svint8_t, -+ z7_res = svget4_s8 (z4, 2), -+ z7_res = svget4 (z4, 2)) -+ -+/* -+** get4_s8_z7_3: -+** ret -+*/ -+TEST_GET (get4_s8_z7_3, svint8x4_t, svint8_t, -+ z7_res = svget4_s8 (z4, 3), -+ z7_res = svget4 (z4, 3)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_u16.c -new file mode 100644 -index 000000000..8f17ad213 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_u16.c -@@ -0,0 +1,179 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** get4_u16_z0_0: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_GET (get4_u16_z0_0, svuint16x4_t, svuint16_t, -+ z0 = svget4_u16 (z4, 0), -+ z0 = svget4 (z4, 0)) -+ -+/* -+** get4_u16_z0_1: -+** mov z0\.d, z5\.d -+** ret -+*/ -+TEST_GET (get4_u16_z0_1, svuint16x4_t, svuint16_t, -+ z0 = svget4_u16 (z4, 1), -+ z0 = svget4 (z4, 1)) -+ -+/* -+** get4_u16_z0_2: -+** mov z0\.d, z6\.d -+** ret -+*/ -+TEST_GET (get4_u16_z0_2, svuint16x4_t, svuint16_t, -+ z0 = svget4_u16 (z4, 2), -+ z0 = svget4 (z4, 2)) -+ -+/* -+** get4_u16_z0_3: -+** mov z0\.d, z7\.d -+** ret -+*/ -+TEST_GET (get4_u16_z0_3, svuint16x4_t, svuint16_t, -+ z0 = svget4_u16 (z4, 3), -+ z0 = svget4 (z4, 3)) -+ -+/* -+** get4_u16_z4_0: -+** ret -+*/ -+TEST_GET (get4_u16_z4_0, svuint16x4_t, svuint16_t, -+ z4_res = svget4_u16 (z4, 0), -+ z4_res = svget4 (z4, 0)) -+ -+/* -+** get4_u16_z4_1: -+** mov z4\.d, z5\.d -+** ret -+*/ -+TEST_GET (get4_u16_z4_1, svuint16x4_t, svuint16_t, -+ z4_res = svget4_u16 (z4, 1), -+ z4_res = svget4 (z4, 1)) -+ -+/* -+** get4_u16_z4_2: -+** mov z4\.d, z6\.d -+** ret -+*/ -+TEST_GET (get4_u16_z4_2, svuint16x4_t, svuint16_t, -+ z4_res = svget4_u16 (z4, 2), -+ z4_res = svget4 (z4, 2)) -+ -+/* -+** get4_u16_z4_3: -+** mov z4\.d, z7\.d -+** ret -+*/ -+TEST_GET (get4_u16_z4_3, svuint16x4_t, svuint16_t, -+ z4_res = svget4_u16 (z4, 3), -+ z4_res = svget4 (z4, 3)) -+ -+/* -+** get4_u16_z5_0: -+** mov z5\.d, z4\.d -+** ret -+*/ -+TEST_GET (get4_u16_z5_0, svuint16x4_t, svuint16_t, -+ z5_res = svget4_u16 (z4, 0), -+ z5_res = svget4 (z4, 0)) -+ -+/* -+** get4_u16_z5_1: -+** ret -+*/ -+TEST_GET (get4_u16_z5_1, svuint16x4_t, svuint16_t, -+ z5_res = svget4_u16 (z4, 1), -+ z5_res = svget4 (z4, 1)) -+ -+/* -+** get4_u16_z5_2: -+** mov z5\.d, z6\.d -+** ret -+*/ -+TEST_GET (get4_u16_z5_2, svuint16x4_t, svuint16_t, -+ z5_res = svget4_u16 (z4, 2), -+ z5_res = svget4 (z4, 2)) -+ -+/* -+** get4_u16_z5_3: -+** mov z5\.d, z7\.d -+** ret -+*/ -+TEST_GET (get4_u16_z5_3, svuint16x4_t, svuint16_t, -+ z5_res = svget4_u16 (z4, 3), -+ z5_res = svget4 (z4, 3)) -+ -+/* -+** get4_u16_z6_0: -+** mov z6\.d, z4\.d -+** ret -+*/ -+TEST_GET (get4_u16_z6_0, svuint16x4_t, svuint16_t, -+ z6_res = svget4_u16 (z4, 0), -+ z6_res = svget4 (z4, 0)) -+ -+/* -+** get4_u16_z6_1: -+** mov z6\.d, z5\.d -+** ret -+*/ -+TEST_GET (get4_u16_z6_1, svuint16x4_t, svuint16_t, -+ z6_res = svget4_u16 (z4, 1), -+ z6_res = svget4 (z4, 1)) -+ -+/* -+** get4_u16_z6_2: -+** ret -+*/ -+TEST_GET (get4_u16_z6_2, svuint16x4_t, svuint16_t, -+ z6_res = svget4_u16 (z4, 2), -+ z6_res = svget4 (z4, 2)) -+ -+/* -+** get4_u16_z6_3: -+** mov z6\.d, z7\.d -+** ret -+*/ -+TEST_GET (get4_u16_z6_3, svuint16x4_t, svuint16_t, -+ z6_res = svget4_u16 (z4, 3), -+ z6_res = svget4 (z4, 3)) -+ -+/* -+** get4_u16_z7_0: -+** mov z7\.d, z4\.d -+** ret -+*/ -+TEST_GET (get4_u16_z7_0, svuint16x4_t, svuint16_t, -+ z7_res = svget4_u16 (z4, 0), -+ z7_res = svget4 (z4, 0)) -+ -+/* -+** get4_u16_z7_1: -+** mov z7\.d, z5\.d -+** ret -+*/ -+TEST_GET (get4_u16_z7_1, svuint16x4_t, svuint16_t, -+ z7_res = svget4_u16 (z4, 1), -+ z7_res = svget4 (z4, 1)) -+ -+/* -+** get4_u16_z7_2: -+** mov z7\.d, z6\.d -+** ret -+*/ -+TEST_GET (get4_u16_z7_2, svuint16x4_t, svuint16_t, -+ z7_res = svget4_u16 (z4, 2), -+ z7_res = svget4 (z4, 2)) -+ -+/* -+** get4_u16_z7_3: -+** ret -+*/ -+TEST_GET (get4_u16_z7_3, svuint16x4_t, svuint16_t, -+ z7_res = svget4_u16 (z4, 3), -+ z7_res = svget4 (z4, 3)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_u32.c -new file mode 100644 -index 000000000..e6c94b39d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_u32.c -@@ -0,0 +1,179 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** get4_u32_z0_0: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_GET (get4_u32_z0_0, svuint32x4_t, svuint32_t, -+ z0 = svget4_u32 (z4, 0), -+ z0 = svget4 (z4, 0)) -+ -+/* -+** get4_u32_z0_1: -+** mov z0\.d, z5\.d -+** ret -+*/ -+TEST_GET (get4_u32_z0_1, svuint32x4_t, svuint32_t, -+ z0 = svget4_u32 (z4, 1), -+ z0 = svget4 (z4, 1)) -+ -+/* -+** get4_u32_z0_2: -+** mov z0\.d, z6\.d -+** ret -+*/ -+TEST_GET (get4_u32_z0_2, svuint32x4_t, svuint32_t, -+ z0 = svget4_u32 (z4, 2), -+ z0 = svget4 (z4, 2)) -+ -+/* -+** get4_u32_z0_3: -+** mov z0\.d, z7\.d -+** ret -+*/ -+TEST_GET (get4_u32_z0_3, svuint32x4_t, svuint32_t, -+ z0 = svget4_u32 (z4, 3), -+ z0 = svget4 (z4, 3)) -+ -+/* -+** get4_u32_z4_0: -+** ret -+*/ -+TEST_GET (get4_u32_z4_0, svuint32x4_t, svuint32_t, -+ z4_res = svget4_u32 (z4, 0), -+ z4_res = svget4 (z4, 0)) -+ -+/* -+** get4_u32_z4_1: -+** mov z4\.d, z5\.d -+** ret -+*/ -+TEST_GET (get4_u32_z4_1, svuint32x4_t, svuint32_t, -+ z4_res = svget4_u32 (z4, 1), -+ z4_res = svget4 (z4, 1)) -+ -+/* -+** get4_u32_z4_2: -+** mov z4\.d, z6\.d -+** ret -+*/ -+TEST_GET (get4_u32_z4_2, svuint32x4_t, svuint32_t, -+ z4_res = svget4_u32 (z4, 2), -+ z4_res = svget4 (z4, 2)) -+ -+/* -+** get4_u32_z4_3: -+** mov z4\.d, z7\.d -+** ret -+*/ -+TEST_GET (get4_u32_z4_3, svuint32x4_t, svuint32_t, -+ z4_res = svget4_u32 (z4, 3), -+ z4_res = svget4 (z4, 3)) -+ -+/* -+** get4_u32_z5_0: -+** mov z5\.d, z4\.d -+** ret -+*/ -+TEST_GET (get4_u32_z5_0, svuint32x4_t, svuint32_t, -+ z5_res = svget4_u32 (z4, 0), -+ z5_res = svget4 (z4, 0)) -+ -+/* -+** get4_u32_z5_1: -+** ret -+*/ -+TEST_GET (get4_u32_z5_1, svuint32x4_t, svuint32_t, -+ z5_res = svget4_u32 (z4, 1), -+ z5_res = svget4 (z4, 1)) -+ -+/* -+** get4_u32_z5_2: -+** mov z5\.d, z6\.d -+** ret -+*/ -+TEST_GET (get4_u32_z5_2, svuint32x4_t, svuint32_t, -+ z5_res = svget4_u32 (z4, 2), -+ z5_res = svget4 (z4, 2)) -+ -+/* -+** get4_u32_z5_3: -+** mov z5\.d, z7\.d -+** ret -+*/ -+TEST_GET (get4_u32_z5_3, svuint32x4_t, svuint32_t, -+ z5_res = svget4_u32 (z4, 3), -+ z5_res = svget4 (z4, 3)) -+ -+/* -+** get4_u32_z6_0: -+** mov z6\.d, z4\.d -+** ret -+*/ -+TEST_GET (get4_u32_z6_0, svuint32x4_t, svuint32_t, -+ z6_res = svget4_u32 (z4, 0), -+ z6_res = svget4 (z4, 0)) -+ -+/* -+** get4_u32_z6_1: -+** mov z6\.d, z5\.d -+** ret -+*/ -+TEST_GET (get4_u32_z6_1, svuint32x4_t, svuint32_t, -+ z6_res = svget4_u32 (z4, 1), -+ z6_res = svget4 (z4, 1)) -+ -+/* -+** get4_u32_z6_2: -+** ret -+*/ -+TEST_GET (get4_u32_z6_2, svuint32x4_t, svuint32_t, -+ z6_res = svget4_u32 (z4, 2), -+ z6_res = svget4 (z4, 2)) -+ -+/* -+** get4_u32_z6_3: -+** mov z6\.d, z7\.d -+** ret -+*/ -+TEST_GET (get4_u32_z6_3, svuint32x4_t, svuint32_t, -+ z6_res = svget4_u32 (z4, 3), -+ z6_res = svget4 (z4, 3)) -+ -+/* -+** get4_u32_z7_0: -+** mov z7\.d, z4\.d -+** ret -+*/ -+TEST_GET (get4_u32_z7_0, svuint32x4_t, svuint32_t, -+ z7_res = svget4_u32 (z4, 0), -+ z7_res = svget4 (z4, 0)) -+ -+/* -+** get4_u32_z7_1: -+** mov z7\.d, z5\.d -+** ret -+*/ -+TEST_GET (get4_u32_z7_1, svuint32x4_t, svuint32_t, -+ z7_res = svget4_u32 (z4, 1), -+ z7_res = svget4 (z4, 1)) -+ -+/* -+** get4_u32_z7_2: -+** mov z7\.d, z6\.d -+** ret -+*/ -+TEST_GET (get4_u32_z7_2, svuint32x4_t, svuint32_t, -+ z7_res = svget4_u32 (z4, 2), -+ z7_res = svget4 (z4, 2)) -+ -+/* -+** get4_u32_z7_3: -+** ret -+*/ -+TEST_GET (get4_u32_z7_3, svuint32x4_t, svuint32_t, -+ z7_res = svget4_u32 (z4, 3), -+ z7_res = svget4 (z4, 3)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_u64.c -new file mode 100644 -index 000000000..79c293a2c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_u64.c -@@ -0,0 +1,179 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** get4_u64_z0_0: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_GET (get4_u64_z0_0, svuint64x4_t, svuint64_t, -+ z0 = svget4_u64 (z4, 0), -+ z0 = svget4 (z4, 0)) -+ -+/* -+** get4_u64_z0_1: -+** mov z0\.d, z5\.d -+** ret -+*/ -+TEST_GET (get4_u64_z0_1, svuint64x4_t, svuint64_t, -+ z0 = svget4_u64 (z4, 1), -+ z0 = svget4 (z4, 1)) -+ -+/* -+** get4_u64_z0_2: -+** mov z0\.d, z6\.d -+** ret -+*/ -+TEST_GET (get4_u64_z0_2, svuint64x4_t, svuint64_t, -+ z0 = svget4_u64 (z4, 2), -+ z0 = svget4 (z4, 2)) -+ -+/* -+** get4_u64_z0_3: -+** mov z0\.d, z7\.d -+** ret -+*/ -+TEST_GET (get4_u64_z0_3, svuint64x4_t, svuint64_t, -+ z0 = svget4_u64 (z4, 3), -+ z0 = svget4 (z4, 3)) -+ -+/* -+** get4_u64_z4_0: -+** ret -+*/ -+TEST_GET (get4_u64_z4_0, svuint64x4_t, svuint64_t, -+ z4_res = svget4_u64 (z4, 0), -+ z4_res = svget4 (z4, 0)) -+ -+/* -+** get4_u64_z4_1: -+** mov z4\.d, z5\.d -+** ret -+*/ -+TEST_GET (get4_u64_z4_1, svuint64x4_t, svuint64_t, -+ z4_res = svget4_u64 (z4, 1), -+ z4_res = svget4 (z4, 1)) -+ -+/* -+** get4_u64_z4_2: -+** mov z4\.d, z6\.d -+** ret -+*/ -+TEST_GET (get4_u64_z4_2, svuint64x4_t, svuint64_t, -+ z4_res = svget4_u64 (z4, 2), -+ z4_res = svget4 (z4, 2)) -+ -+/* -+** get4_u64_z4_3: -+** mov z4\.d, z7\.d -+** ret -+*/ -+TEST_GET (get4_u64_z4_3, svuint64x4_t, svuint64_t, -+ z4_res = svget4_u64 (z4, 3), -+ z4_res = svget4 (z4, 3)) -+ -+/* -+** get4_u64_z5_0: -+** mov z5\.d, z4\.d -+** ret -+*/ -+TEST_GET (get4_u64_z5_0, svuint64x4_t, svuint64_t, -+ z5_res = svget4_u64 (z4, 0), -+ z5_res = svget4 (z4, 0)) -+ -+/* -+** get4_u64_z5_1: -+** ret -+*/ -+TEST_GET (get4_u64_z5_1, svuint64x4_t, svuint64_t, -+ z5_res = svget4_u64 (z4, 1), -+ z5_res = svget4 (z4, 1)) -+ -+/* -+** get4_u64_z5_2: -+** mov z5\.d, z6\.d -+** ret -+*/ -+TEST_GET (get4_u64_z5_2, svuint64x4_t, svuint64_t, -+ z5_res = svget4_u64 (z4, 2), -+ z5_res = svget4 (z4, 2)) -+ -+/* -+** get4_u64_z5_3: -+** mov z5\.d, z7\.d -+** ret -+*/ -+TEST_GET (get4_u64_z5_3, svuint64x4_t, svuint64_t, -+ z5_res = svget4_u64 (z4, 3), -+ z5_res = svget4 (z4, 3)) -+ -+/* -+** get4_u64_z6_0: -+** mov z6\.d, z4\.d -+** ret -+*/ -+TEST_GET (get4_u64_z6_0, svuint64x4_t, svuint64_t, -+ z6_res = svget4_u64 (z4, 0), -+ z6_res = svget4 (z4, 0)) -+ -+/* -+** get4_u64_z6_1: -+** mov z6\.d, z5\.d -+** ret -+*/ -+TEST_GET (get4_u64_z6_1, svuint64x4_t, svuint64_t, -+ z6_res = svget4_u64 (z4, 1), -+ z6_res = svget4 (z4, 1)) -+ -+/* -+** get4_u64_z6_2: -+** ret -+*/ -+TEST_GET (get4_u64_z6_2, svuint64x4_t, svuint64_t, -+ z6_res = svget4_u64 (z4, 2), -+ z6_res = svget4 (z4, 2)) -+ -+/* -+** get4_u64_z6_3: -+** mov z6\.d, z7\.d -+** ret -+*/ -+TEST_GET (get4_u64_z6_3, svuint64x4_t, svuint64_t, -+ z6_res = svget4_u64 (z4, 3), -+ z6_res = svget4 (z4, 3)) -+ -+/* -+** get4_u64_z7_0: -+** mov z7\.d, z4\.d -+** ret -+*/ -+TEST_GET (get4_u64_z7_0, svuint64x4_t, svuint64_t, -+ z7_res = svget4_u64 (z4, 0), -+ z7_res = svget4 (z4, 0)) -+ -+/* -+** get4_u64_z7_1: -+** mov z7\.d, z5\.d -+** ret -+*/ -+TEST_GET (get4_u64_z7_1, svuint64x4_t, svuint64_t, -+ z7_res = svget4_u64 (z4, 1), -+ z7_res = svget4 (z4, 1)) -+ -+/* -+** get4_u64_z7_2: -+** mov z7\.d, z6\.d -+** ret -+*/ -+TEST_GET (get4_u64_z7_2, svuint64x4_t, svuint64_t, -+ z7_res = svget4_u64 (z4, 2), -+ z7_res = svget4 (z4, 2)) -+ -+/* -+** get4_u64_z7_3: -+** ret -+*/ -+TEST_GET (get4_u64_z7_3, svuint64x4_t, svuint64_t, -+ z7_res = svget4_u64 (z4, 3), -+ z7_res = svget4 (z4, 3)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_u8.c -new file mode 100644 -index 000000000..f3ad9a85b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_u8.c -@@ -0,0 +1,179 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** get4_u8_z0_0: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_GET (get4_u8_z0_0, svuint8x4_t, svuint8_t, -+ z0 = svget4_u8 (z4, 0), -+ z0 = svget4 (z4, 0)) -+ -+/* -+** get4_u8_z0_1: -+** mov z0\.d, z5\.d -+** ret -+*/ -+TEST_GET (get4_u8_z0_1, svuint8x4_t, svuint8_t, -+ z0 = svget4_u8 (z4, 1), -+ z0 = svget4 (z4, 1)) -+ -+/* -+** get4_u8_z0_2: -+** mov z0\.d, z6\.d -+** ret -+*/ -+TEST_GET (get4_u8_z0_2, svuint8x4_t, svuint8_t, -+ z0 = svget4_u8 (z4, 2), -+ z0 = svget4 (z4, 2)) -+ -+/* -+** get4_u8_z0_3: -+** mov z0\.d, z7\.d -+** ret -+*/ -+TEST_GET (get4_u8_z0_3, svuint8x4_t, svuint8_t, -+ z0 = svget4_u8 (z4, 3), -+ z0 = svget4 (z4, 3)) -+ -+/* -+** get4_u8_z4_0: -+** ret -+*/ -+TEST_GET (get4_u8_z4_0, svuint8x4_t, svuint8_t, -+ z4_res = svget4_u8 (z4, 0), -+ z4_res = svget4 (z4, 0)) -+ -+/* -+** get4_u8_z4_1: -+** mov z4\.d, z5\.d -+** ret -+*/ -+TEST_GET (get4_u8_z4_1, svuint8x4_t, svuint8_t, -+ z4_res = svget4_u8 (z4, 1), -+ z4_res = svget4 (z4, 1)) -+ -+/* -+** get4_u8_z4_2: -+** mov z4\.d, z6\.d -+** ret -+*/ -+TEST_GET (get4_u8_z4_2, svuint8x4_t, svuint8_t, -+ z4_res = svget4_u8 (z4, 2), -+ z4_res = svget4 (z4, 2)) -+ -+/* -+** get4_u8_z4_3: -+** mov z4\.d, z7\.d -+** ret -+*/ -+TEST_GET (get4_u8_z4_3, svuint8x4_t, svuint8_t, -+ z4_res = svget4_u8 (z4, 3), -+ z4_res = svget4 (z4, 3)) -+ -+/* -+** get4_u8_z5_0: -+** mov z5\.d, z4\.d -+** ret -+*/ -+TEST_GET (get4_u8_z5_0, svuint8x4_t, svuint8_t, -+ z5_res = svget4_u8 (z4, 0), -+ z5_res = svget4 (z4, 0)) -+ -+/* -+** get4_u8_z5_1: -+** ret -+*/ -+TEST_GET (get4_u8_z5_1, svuint8x4_t, svuint8_t, -+ z5_res = svget4_u8 (z4, 1), -+ z5_res = svget4 (z4, 1)) -+ -+/* -+** get4_u8_z5_2: -+** mov z5\.d, z6\.d -+** ret -+*/ -+TEST_GET (get4_u8_z5_2, svuint8x4_t, svuint8_t, -+ z5_res = svget4_u8 (z4, 2), -+ z5_res = svget4 (z4, 2)) -+ -+/* -+** get4_u8_z5_3: -+** mov z5\.d, z7\.d -+** ret -+*/ -+TEST_GET (get4_u8_z5_3, svuint8x4_t, svuint8_t, -+ z5_res = svget4_u8 (z4, 3), -+ z5_res = svget4 (z4, 3)) -+ -+/* -+** get4_u8_z6_0: -+** mov z6\.d, z4\.d -+** ret -+*/ -+TEST_GET (get4_u8_z6_0, svuint8x4_t, svuint8_t, -+ z6_res = svget4_u8 (z4, 0), -+ z6_res = svget4 (z4, 0)) -+ -+/* -+** get4_u8_z6_1: -+** mov z6\.d, z5\.d -+** ret -+*/ -+TEST_GET (get4_u8_z6_1, svuint8x4_t, svuint8_t, -+ z6_res = svget4_u8 (z4, 1), -+ z6_res = svget4 (z4, 1)) -+ -+/* -+** get4_u8_z6_2: -+** ret -+*/ -+TEST_GET (get4_u8_z6_2, svuint8x4_t, svuint8_t, -+ z6_res = svget4_u8 (z4, 2), -+ z6_res = svget4 (z4, 2)) -+ -+/* -+** get4_u8_z6_3: -+** mov z6\.d, z7\.d -+** ret -+*/ -+TEST_GET (get4_u8_z6_3, svuint8x4_t, svuint8_t, -+ z6_res = svget4_u8 (z4, 3), -+ z6_res = svget4 (z4, 3)) -+ -+/* -+** get4_u8_z7_0: -+** mov z7\.d, z4\.d -+** ret -+*/ -+TEST_GET (get4_u8_z7_0, svuint8x4_t, svuint8_t, -+ z7_res = svget4_u8 (z4, 0), -+ z7_res = svget4 (z4, 0)) -+ -+/* -+** get4_u8_z7_1: -+** mov z7\.d, z5\.d -+** ret -+*/ -+TEST_GET (get4_u8_z7_1, svuint8x4_t, svuint8_t, -+ z7_res = svget4_u8 (z4, 1), -+ z7_res = svget4 (z4, 1)) -+ -+/* -+** get4_u8_z7_2: -+** mov z7\.d, z6\.d -+** ret -+*/ -+TEST_GET (get4_u8_z7_2, svuint8x4_t, svuint8_t, -+ z7_res = svget4_u8 (z4, 2), -+ z7_res = svget4 (z4, 2)) -+ -+/* -+** get4_u8_z7_3: -+** ret -+*/ -+TEST_GET (get4_u8_z7_3, svuint8x4_t, svuint8_t, -+ z7_res = svget4_u8 (z4, 3), -+ z7_res = svget4 (z4, 3)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/index_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/index_s16.c -new file mode 100644 -index 000000000..90a1434f1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/index_s16.c -@@ -0,0 +1,220 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** index_s16_w0_w1: -+** index z0\.h, w0, w1 -+** ret -+*/ -+TEST_S (index_s16_w0_w1, svint16_t, int16_t, -+ z0 = svindex_s16 (x0, x1)) -+ -+/* -+** index_s16_w0_2: -+** index z0\.h, w0, #2 -+** ret -+*/ -+TEST_S (index_s16_w0_2, svint16_t, int16_t, -+ z0 = svindex_s16 (x0, 2)) -+ -+/* -+** index_s16_50_2: -+** mov (w[0-9]+), 50 -+** index z0\.h, \1, #2 -+** ret -+*/ -+TEST_S (index_s16_50_2, svint16_t, int16_t, -+ z0 = svindex_s16 (50, 2)) -+ -+/* -+** index_s16_0_m17: -+** mov (w[0-9]+), -17 -+** index z0\.h, #0, \1 -+** ret -+*/ -+TEST_S (index_s16_0_m17, svint16_t, int16_t, -+ z0 = svindex_s16 (0, -17)) -+ -+/* -+** index_s16_0_m16: -+** index z0\.h, #0, #-16 -+** ret -+*/ -+TEST_S (index_s16_0_m16, svint16_t, int16_t, -+ z0 = svindex_s16 (0, -16)) -+ -+/* -+** index_s16_0_1: -+** index z0\.h, #0, #1 -+** ret -+*/ -+TEST_S (index_s16_0_1, svint16_t, int16_t, -+ z0 = svindex_s16 (0, 1)) -+ -+/* -+** index_s16_0_15: -+** index z0\.h, #0, #15 -+** ret -+*/ -+TEST_S (index_s16_0_15, svint16_t, int16_t, -+ z0 = svindex_s16 (0, 15)) -+ -+/* -+** index_s16_0_16: -+** mov (w[0-9]+), 16 -+** index z0\.h, #0, \1 -+** ret -+*/ -+TEST_S (index_s16_0_16, svint16_t, int16_t, -+ z0 = svindex_s16 (0, 16)) -+ -+/* -+** index_s16_m17_1: -+** mov (w[0-9]+), -17 -+** index z0\.h, \1, #1 -+** ret -+*/ -+TEST_S (index_s16_m17_1, svint16_t, int16_t, -+ z0 = svindex_s16 (-17, 1)) -+ -+/* -+** index_s16_m16_1: -+** index z0\.h, #-16, #1 -+** ret -+*/ -+TEST_S (index_s16_m16_1, svint16_t, int16_t, -+ z0 = svindex_s16 (-16, 1)) -+ -+/* -+** index_s16_m1_1: -+** index z0\.h, #-1, #1 -+** ret -+*/ -+TEST_S (index_s16_m1_1, svint16_t, int16_t, -+ z0 = svindex_s16 (-1, 1)) -+ -+/* -+** index_s16_1_1: -+** index z0\.h, #1, #1 -+** ret -+*/ -+TEST_S (index_s16_1_1, svint16_t, int16_t, -+ z0 = svindex_s16 (1, 1)) -+ -+/* -+** index_s16_15_1: -+** index z0\.h, #15, #1 -+** ret -+*/ -+TEST_S (index_s16_15_1, svint16_t, int16_t, -+ z0 = svindex_s16 (15, 1)) -+ -+/* -+** index_s16_16_1: -+** mov (w[0-9]+), 16 -+** index z0\.h, \1, #1 -+** ret -+*/ -+TEST_S (index_s16_16_1, svint16_t, int16_t, -+ z0 = svindex_s16 (16, 1)) -+ -+/* -+** index_s16_m17_x0: -+** mov (w[0-9]+), -17 -+** index z0\.h, \1, w0 -+** ret -+*/ -+TEST_S (index_s16_m17_x0, svint16_t, int16_t, -+ z0 = svindex_s16 (-17, x0)) -+ -+/* -+** index_s16_m16_x0: -+** index z0\.h, #-16, w0 -+** ret -+*/ -+TEST_S (index_s16_m16_x0, svint16_t, int16_t, -+ z0 = svindex_s16 (-16, x0)) -+ -+/* -+** index_s16_m1_x0: -+** index z0\.h, #-1, w0 -+** ret -+*/ -+TEST_S (index_s16_m1_x0, svint16_t, int16_t, -+ z0 = svindex_s16 (-1, x0)) -+ -+/* -+** index_s16_0_x0: -+** index z0\.h, #0, w0 -+** ret -+*/ -+TEST_S (index_s16_0_x0, svint16_t, int16_t, -+ z0 = svindex_s16 (0, x0)) -+ -+/* -+** index_s16_1_x0: -+** index z0\.h, #1, w0 -+** ret -+*/ -+TEST_S (index_s16_1_x0, svint16_t, int16_t, -+ z0 = svindex_s16 (1, x0)) -+ -+/* -+** index_s16_15_x0: -+** index z0\.h, #15, w0 -+** ret -+*/ -+TEST_S (index_s16_15_x0, svint16_t, int16_t, -+ z0 = svindex_s16 (15, x0)) -+ -+/* -+** index_s16_16_x0: -+** mov (w[0-9]+), 16 -+** index z0\.h, \1, w0 -+** ret -+*/ -+TEST_S (index_s16_16_x0, svint16_t, int16_t, -+ z0 = svindex_s16 (16, x0)) -+ -+/* -+** index_s16_x0_m17: -+** mov (w[0-9]+), -17 -+** index z0\.h, w0, \1 -+** ret -+*/ -+TEST_S (index_s16_x0_m17, svint16_t, int16_t, -+ z0 = svindex_s16 (x0, -17)) -+ -+/* -+** index_s16_x0_m16: -+** index z0\.h, w0, #-16 -+** ret -+*/ -+TEST_S (index_s16_x0_m16, svint16_t, int16_t, -+ z0 = svindex_s16 (x0, -16)) -+ -+/* -+** index_s16_x0_1: -+** index z0\.h, w0, #1 -+** ret -+*/ -+TEST_S (index_s16_x0_1, svint16_t, int16_t, -+ z0 = svindex_s16 (x0, 1)) -+ -+/* -+** index_s16_x0_15: -+** index z0\.h, w0, #15 -+** ret -+*/ -+TEST_S (index_s16_x0_15, svint16_t, int16_t, -+ z0 = svindex_s16 (x0, 15)) -+ -+/* -+** index_s16_x0_16: -+** mov (w[0-9]+), 16 -+** index z0\.h, w0, \1 -+** ret -+*/ -+TEST_S (index_s16_x0_16, svint16_t, int16_t, -+ z0 = svindex_s16 (x0, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/index_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/index_s32.c -new file mode 100644 -index 000000000..18afedac0 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/index_s32.c -@@ -0,0 +1,220 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** index_s32_w0_w1: -+** index z0\.s, w0, w1 -+** ret -+*/ -+TEST_S (index_s32_w0_w1, svint32_t, int32_t, -+ z0 = svindex_s32 (x0, x1)) -+ -+/* -+** index_s32_w0_2: -+** index z0\.s, w0, #2 -+** ret -+*/ -+TEST_S (index_s32_w0_2, svint32_t, int32_t, -+ z0 = svindex_s32 (x0, 2)) -+ -+/* -+** index_s32_50_2: -+** mov (w[0-9]+), 50 -+** index z0\.s, \1, #2 -+** ret -+*/ -+TEST_S (index_s32_50_2, svint32_t, int32_t, -+ z0 = svindex_s32 (50, 2)) -+ -+/* -+** index_s32_0_m17: -+** mov (w[0-9]+), -17 -+** index z0\.s, #0, \1 -+** ret -+*/ -+TEST_S (index_s32_0_m17, svint32_t, int32_t, -+ z0 = svindex_s32 (0, -17)) -+ -+/* -+** index_s32_0_m16: -+** index z0\.s, #0, #-16 -+** ret -+*/ -+TEST_S (index_s32_0_m16, svint32_t, int32_t, -+ z0 = svindex_s32 (0, -16)) -+ -+/* -+** index_s32_0_1: -+** index z0\.s, #0, #1 -+** ret -+*/ -+TEST_S (index_s32_0_1, svint32_t, int32_t, -+ z0 = svindex_s32 (0, 1)) -+ -+/* -+** index_s32_0_15: -+** index z0\.s, #0, #15 -+** ret -+*/ -+TEST_S (index_s32_0_15, svint32_t, int32_t, -+ z0 = svindex_s32 (0, 15)) -+ -+/* -+** index_s32_0_16: -+** mov (w[0-9]+), 16 -+** index z0\.s, #0, \1 -+** ret -+*/ -+TEST_S (index_s32_0_16, svint32_t, int32_t, -+ z0 = svindex_s32 (0, 16)) -+ -+/* -+** index_s32_m17_1: -+** mov (w[0-9]+), -17 -+** index z0\.s, \1, #1 -+** ret -+*/ -+TEST_S (index_s32_m17_1, svint32_t, int32_t, -+ z0 = svindex_s32 (-17, 1)) -+ -+/* -+** index_s32_m16_1: -+** index z0\.s, #-16, #1 -+** ret -+*/ -+TEST_S (index_s32_m16_1, svint32_t, int32_t, -+ z0 = svindex_s32 (-16, 1)) -+ -+/* -+** index_s32_m1_1: -+** index z0\.s, #-1, #1 -+** ret -+*/ -+TEST_S (index_s32_m1_1, svint32_t, int32_t, -+ z0 = svindex_s32 (-1, 1)) -+ -+/* -+** index_s32_1_1: -+** index z0\.s, #1, #1 -+** ret -+*/ -+TEST_S (index_s32_1_1, svint32_t, int32_t, -+ z0 = svindex_s32 (1, 1)) -+ -+/* -+** index_s32_15_1: -+** index z0\.s, #15, #1 -+** ret -+*/ -+TEST_S (index_s32_15_1, svint32_t, int32_t, -+ z0 = svindex_s32 (15, 1)) -+ -+/* -+** index_s32_16_1: -+** mov (w[0-9]+), 16 -+** index z0\.s, \1, #1 -+** ret -+*/ -+TEST_S (index_s32_16_1, svint32_t, int32_t, -+ z0 = svindex_s32 (16, 1)) -+ -+/* -+** index_s32_m17_x0: -+** mov (w[0-9]+), -17 -+** index z0\.s, \1, w0 -+** ret -+*/ -+TEST_S (index_s32_m17_x0, svint32_t, int32_t, -+ z0 = svindex_s32 (-17, x0)) -+ -+/* -+** index_s32_m16_x0: -+** index z0\.s, #-16, w0 -+** ret -+*/ -+TEST_S (index_s32_m16_x0, svint32_t, int32_t, -+ z0 = svindex_s32 (-16, x0)) -+ -+/* -+** index_s32_m1_x0: -+** index z0\.s, #-1, w0 -+** ret -+*/ -+TEST_S (index_s32_m1_x0, svint32_t, int32_t, -+ z0 = svindex_s32 (-1, x0)) -+ -+/* -+** index_s32_0_x0: -+** index z0\.s, #0, w0 -+** ret -+*/ -+TEST_S (index_s32_0_x0, svint32_t, int32_t, -+ z0 = svindex_s32 (0, x0)) -+ -+/* -+** index_s32_1_x0: -+** index z0\.s, #1, w0 -+** ret -+*/ -+TEST_S (index_s32_1_x0, svint32_t, int32_t, -+ z0 = svindex_s32 (1, x0)) -+ -+/* -+** index_s32_15_x0: -+** index z0\.s, #15, w0 -+** ret -+*/ -+TEST_S (index_s32_15_x0, svint32_t, int32_t, -+ z0 = svindex_s32 (15, x0)) -+ -+/* -+** index_s32_16_x0: -+** mov (w[0-9]+), 16 -+** index z0\.s, \1, w0 -+** ret -+*/ -+TEST_S (index_s32_16_x0, svint32_t, int32_t, -+ z0 = svindex_s32 (16, x0)) -+ -+/* -+** index_s32_x0_m17: -+** mov (w[0-9]+), -17 -+** index z0\.s, w0, \1 -+** ret -+*/ -+TEST_S (index_s32_x0_m17, svint32_t, int32_t, -+ z0 = svindex_s32 (x0, -17)) -+ -+/* -+** index_s32_x0_m16: -+** index z0\.s, w0, #-16 -+** ret -+*/ -+TEST_S (index_s32_x0_m16, svint32_t, int32_t, -+ z0 = svindex_s32 (x0, -16)) -+ -+/* -+** index_s32_x0_1: -+** index z0\.s, w0, #1 -+** ret -+*/ -+TEST_S (index_s32_x0_1, svint32_t, int32_t, -+ z0 = svindex_s32 (x0, 1)) -+ -+/* -+** index_s32_x0_15: -+** index z0\.s, w0, #15 -+** ret -+*/ -+TEST_S (index_s32_x0_15, svint32_t, int32_t, -+ z0 = svindex_s32 (x0, 15)) -+ -+/* -+** index_s32_x0_16: -+** mov (w[0-9]+), 16 -+** index z0\.s, w0, \1 -+** ret -+*/ -+TEST_S (index_s32_x0_16, svint32_t, int32_t, -+ z0 = svindex_s32 (x0, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/index_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/index_s64.c -new file mode 100644 -index 000000000..298eec9ea ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/index_s64.c -@@ -0,0 +1,220 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** index_s64_x0_x1: -+** index z0\.d, x0, x1 -+** ret -+*/ -+TEST_S (index_s64_x0_x1, svint64_t, int64_t, -+ z0 = svindex_s64 (x0, x1)) -+ -+/* -+** index_s64_x0_2: -+** index z0\.d, x0, #2 -+** ret -+*/ -+TEST_S (index_s64_x0_2, svint64_t, int64_t, -+ z0 = svindex_s64 (x0, 2)) -+ -+/* -+** index_s64_50_2: -+** mov (x[0-9]+), 50 -+** index z0\.d, \1, #2 -+** ret -+*/ -+TEST_S (index_s64_50_2, svint64_t, int64_t, -+ z0 = svindex_s64 (50, 2)) -+ -+/* -+** index_s64_0_m17: -+** mov (x[0-9]+), -17 -+** index z0\.d, #0, \1 -+** ret -+*/ -+TEST_S (index_s64_0_m17, svint64_t, int64_t, -+ z0 = svindex_s64 (0, -17)) -+ -+/* -+** index_s64_0_m16: -+** index z0\.d, #0, #-16 -+** ret -+*/ -+TEST_S (index_s64_0_m16, svint64_t, int64_t, -+ z0 = svindex_s64 (0, -16)) -+ -+/* -+** index_s64_0_1: -+** index z0\.d, #0, #1 -+** ret -+*/ -+TEST_S (index_s64_0_1, svint64_t, int64_t, -+ z0 = svindex_s64 (0, 1)) -+ -+/* -+** index_s64_0_15: -+** index z0\.d, #0, #15 -+** ret -+*/ -+TEST_S (index_s64_0_15, svint64_t, int64_t, -+ z0 = svindex_s64 (0, 15)) -+ -+/* -+** index_s64_0_16: -+** mov (x[0-9]+), 16 -+** index z0\.d, #0, \1 -+** ret -+*/ -+TEST_S (index_s64_0_16, svint64_t, int64_t, -+ z0 = svindex_s64 (0, 16)) -+ -+/* -+** index_s64_m17_1: -+** mov (x[0-9]+), -17 -+** index z0\.d, \1, #1 -+** ret -+*/ -+TEST_S (index_s64_m17_1, svint64_t, int64_t, -+ z0 = svindex_s64 (-17, 1)) -+ -+/* -+** index_s64_m16_1: -+** index z0\.d, #-16, #1 -+** ret -+*/ -+TEST_S (index_s64_m16_1, svint64_t, int64_t, -+ z0 = svindex_s64 (-16, 1)) -+ -+/* -+** index_s64_m1_1: -+** index z0\.d, #-1, #1 -+** ret -+*/ -+TEST_S (index_s64_m1_1, svint64_t, int64_t, -+ z0 = svindex_s64 (-1, 1)) -+ -+/* -+** index_s64_1_1: -+** index z0\.d, #1, #1 -+** ret -+*/ -+TEST_S (index_s64_1_1, svint64_t, int64_t, -+ z0 = svindex_s64 (1, 1)) -+ -+/* -+** index_s64_15_1: -+** index z0\.d, #15, #1 -+** ret -+*/ -+TEST_S (index_s64_15_1, svint64_t, int64_t, -+ z0 = svindex_s64 (15, 1)) -+ -+/* -+** index_s64_16_1: -+** mov (x[0-9]+), 16 -+** index z0\.d, \1, #1 -+** ret -+*/ -+TEST_S (index_s64_16_1, svint64_t, int64_t, -+ z0 = svindex_s64 (16, 1)) -+ -+/* -+** index_s64_m17_x0: -+** mov (x[0-9]+), -17 -+** index z0\.d, \1, x0 -+** ret -+*/ -+TEST_S (index_s64_m17_x0, svint64_t, int64_t, -+ z0 = svindex_s64 (-17, x0)) -+ -+/* -+** index_s64_m16_x0: -+** index z0\.d, #-16, x0 -+** ret -+*/ -+TEST_S (index_s64_m16_x0, svint64_t, int64_t, -+ z0 = svindex_s64 (-16, x0)) -+ -+/* -+** index_s64_m1_x0: -+** index z0\.d, #-1, x0 -+** ret -+*/ -+TEST_S (index_s64_m1_x0, svint64_t, int64_t, -+ z0 = svindex_s64 (-1, x0)) -+ -+/* -+** index_s64_0_x0: -+** index z0\.d, #0, x0 -+** ret -+*/ -+TEST_S (index_s64_0_x0, svint64_t, int64_t, -+ z0 = svindex_s64 (0, x0)) -+ -+/* -+** index_s64_1_x0: -+** index z0\.d, #1, x0 -+** ret -+*/ -+TEST_S (index_s64_1_x0, svint64_t, int64_t, -+ z0 = svindex_s64 (1, x0)) -+ -+/* -+** index_s64_15_x0: -+** index z0\.d, #15, x0 -+** ret -+*/ -+TEST_S (index_s64_15_x0, svint64_t, int64_t, -+ z0 = svindex_s64 (15, x0)) -+ -+/* -+** index_s64_16_x0: -+** mov (x[0-9]+), 16 -+** index z0\.d, \1, x0 -+** ret -+*/ -+TEST_S (index_s64_16_x0, svint64_t, int64_t, -+ z0 = svindex_s64 (16, x0)) -+ -+/* -+** index_s64_x0_m17: -+** mov (x[0-9]+), -17 -+** index z0\.d, x0, \1 -+** ret -+*/ -+TEST_S (index_s64_x0_m17, svint64_t, int64_t, -+ z0 = svindex_s64 (x0, -17)) -+ -+/* -+** index_s64_x0_m16: -+** index z0\.d, x0, #-16 -+** ret -+*/ -+TEST_S (index_s64_x0_m16, svint64_t, int64_t, -+ z0 = svindex_s64 (x0, -16)) -+ -+/* -+** index_s64_x0_1: -+** index z0\.d, x0, #1 -+** ret -+*/ -+TEST_S (index_s64_x0_1, svint64_t, int64_t, -+ z0 = svindex_s64 (x0, 1)) -+ -+/* -+** index_s64_x0_15: -+** index z0\.d, x0, #15 -+** ret -+*/ -+TEST_S (index_s64_x0_15, svint64_t, int64_t, -+ z0 = svindex_s64 (x0, 15)) -+ -+/* -+** index_s64_x0_16: -+** mov (x[0-9]+), 16 -+** index z0\.d, x0, \1 -+** ret -+*/ -+TEST_S (index_s64_x0_16, svint64_t, int64_t, -+ z0 = svindex_s64 (x0, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/index_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/index_s8.c -new file mode 100644 -index 000000000..8a1f14f50 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/index_s8.c -@@ -0,0 +1,220 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** index_s8_w0_w1: -+** index z0\.b, w0, w1 -+** ret -+*/ -+TEST_S (index_s8_w0_w1, svint8_t, int8_t, -+ z0 = svindex_s8 (x0, x1)) -+ -+/* -+** index_s8_w0_2: -+** index z0\.b, w0, #2 -+** ret -+*/ -+TEST_S (index_s8_w0_2, svint8_t, int8_t, -+ z0 = svindex_s8 (x0, 2)) -+ -+/* -+** index_s8_50_2: -+** mov (w[0-9]+), 50 -+** index z0\.b, \1, #2 -+** ret -+*/ -+TEST_S (index_s8_50_2, svint8_t, int8_t, -+ z0 = svindex_s8 (50, 2)) -+ -+/* -+** index_s8_0_m17: -+** mov (w[0-9]+), -17 -+** index z0\.b, #0, \1 -+** ret -+*/ -+TEST_S (index_s8_0_m17, svint8_t, int8_t, -+ z0 = svindex_s8 (0, -17)) -+ -+/* -+** index_s8_0_m16: -+** index z0\.b, #0, #-16 -+** ret -+*/ -+TEST_S (index_s8_0_m16, svint8_t, int8_t, -+ z0 = svindex_s8 (0, -16)) -+ -+/* -+** index_s8_0_1: -+** index z0\.b, #0, #1 -+** ret -+*/ -+TEST_S (index_s8_0_1, svint8_t, int8_t, -+ z0 = svindex_s8 (0, 1)) -+ -+/* -+** index_s8_0_15: -+** index z0\.b, #0, #15 -+** ret -+*/ -+TEST_S (index_s8_0_15, svint8_t, int8_t, -+ z0 = svindex_s8 (0, 15)) -+ -+/* -+** index_s8_0_16: -+** mov (w[0-9]+), 16 -+** index z0\.b, #0, \1 -+** ret -+*/ -+TEST_S (index_s8_0_16, svint8_t, int8_t, -+ z0 = svindex_s8 (0, 16)) -+ -+/* -+** index_s8_m17_1: -+** mov (w[0-9]+), -17 -+** index z0\.b, \1, #1 -+** ret -+*/ -+TEST_S (index_s8_m17_1, svint8_t, int8_t, -+ z0 = svindex_s8 (-17, 1)) -+ -+/* -+** index_s8_m16_1: -+** index z0\.b, #-16, #1 -+** ret -+*/ -+TEST_S (index_s8_m16_1, svint8_t, int8_t, -+ z0 = svindex_s8 (-16, 1)) -+ -+/* -+** index_s8_m1_1: -+** index z0\.b, #-1, #1 -+** ret -+*/ -+TEST_S (index_s8_m1_1, svint8_t, int8_t, -+ z0 = svindex_s8 (-1, 1)) -+ -+/* -+** index_s8_1_1: -+** index z0\.b, #1, #1 -+** ret -+*/ -+TEST_S (index_s8_1_1, svint8_t, int8_t, -+ z0 = svindex_s8 (1, 1)) -+ -+/* -+** index_s8_15_1: -+** index z0\.b, #15, #1 -+** ret -+*/ -+TEST_S (index_s8_15_1, svint8_t, int8_t, -+ z0 = svindex_s8 (15, 1)) -+ -+/* -+** index_s8_16_1: -+** mov (w[0-9]+), 16 -+** index z0\.b, \1, #1 -+** ret -+*/ -+TEST_S (index_s8_16_1, svint8_t, int8_t, -+ z0 = svindex_s8 (16, 1)) -+ -+/* -+** index_s8_m17_x0: -+** mov (w[0-9]+), -17 -+** index z0\.b, \1, w0 -+** ret -+*/ -+TEST_S (index_s8_m17_x0, svint8_t, int8_t, -+ z0 = svindex_s8 (-17, x0)) -+ -+/* -+** index_s8_m16_x0: -+** index z0\.b, #-16, w0 -+** ret -+*/ -+TEST_S (index_s8_m16_x0, svint8_t, int8_t, -+ z0 = svindex_s8 (-16, x0)) -+ -+/* -+** index_s8_m1_x0: -+** index z0\.b, #-1, w0 -+** ret -+*/ -+TEST_S (index_s8_m1_x0, svint8_t, int8_t, -+ z0 = svindex_s8 (-1, x0)) -+ -+/* -+** index_s8_0_x0: -+** index z0\.b, #0, w0 -+** ret -+*/ -+TEST_S (index_s8_0_x0, svint8_t, int8_t, -+ z0 = svindex_s8 (0, x0)) -+ -+/* -+** index_s8_1_x0: -+** index z0\.b, #1, w0 -+** ret -+*/ -+TEST_S (index_s8_1_x0, svint8_t, int8_t, -+ z0 = svindex_s8 (1, x0)) -+ -+/* -+** index_s8_15_x0: -+** index z0\.b, #15, w0 -+** ret -+*/ -+TEST_S (index_s8_15_x0, svint8_t, int8_t, -+ z0 = svindex_s8 (15, x0)) -+ -+/* -+** index_s8_16_x0: -+** mov (w[0-9]+), 16 -+** index z0\.b, \1, w0 -+** ret -+*/ -+TEST_S (index_s8_16_x0, svint8_t, int8_t, -+ z0 = svindex_s8 (16, x0)) -+ -+/* -+** index_s8_x0_m17: -+** mov (w[0-9]+), -17 -+** index z0\.b, w0, \1 -+** ret -+*/ -+TEST_S (index_s8_x0_m17, svint8_t, int8_t, -+ z0 = svindex_s8 (x0, -17)) -+ -+/* -+** index_s8_x0_m16: -+** index z0\.b, w0, #-16 -+** ret -+*/ -+TEST_S (index_s8_x0_m16, svint8_t, int8_t, -+ z0 = svindex_s8 (x0, -16)) -+ -+/* -+** index_s8_x0_1: -+** index z0\.b, w0, #1 -+** ret -+*/ -+TEST_S (index_s8_x0_1, svint8_t, int8_t, -+ z0 = svindex_s8 (x0, 1)) -+ -+/* -+** index_s8_x0_15: -+** index z0\.b, w0, #15 -+** ret -+*/ -+TEST_S (index_s8_x0_15, svint8_t, int8_t, -+ z0 = svindex_s8 (x0, 15)) -+ -+/* -+** index_s8_x0_16: -+** mov (w[0-9]+), 16 -+** index z0\.b, w0, \1 -+** ret -+*/ -+TEST_S (index_s8_x0_16, svint8_t, int8_t, -+ z0 = svindex_s8 (x0, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/index_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/index_u16.c -new file mode 100644 -index 000000000..1c6631088 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/index_u16.c -@@ -0,0 +1,220 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** index_u16_w0_w1: -+** index z0\.h, w0, w1 -+** ret -+*/ -+TEST_S (index_u16_w0_w1, svuint16_t, uint16_t, -+ z0 = svindex_u16 (x0, x1)) -+ -+/* -+** index_u16_w0_2: -+** index z0\.h, w0, #2 -+** ret -+*/ -+TEST_S (index_u16_w0_2, svuint16_t, uint16_t, -+ z0 = svindex_u16 (x0, 2)) -+ -+/* -+** index_u16_50_2: -+** mov (w[0-9]+), 50 -+** index z0\.h, \1, #2 -+** ret -+*/ -+TEST_S (index_u16_50_2, svuint16_t, uint16_t, -+ z0 = svindex_u16 (50, 2)) -+ -+/* -+** index_u16_0_m17: -+** mov (w[0-9]+), -17 -+** index z0\.h, #0, \1 -+** ret -+*/ -+TEST_S (index_u16_0_m17, svuint16_t, uint16_t, -+ z0 = svindex_u16 (0, -17)) -+ -+/* -+** index_u16_0_m16: -+** index z0\.h, #0, #-16 -+** ret -+*/ -+TEST_S (index_u16_0_m16, svuint16_t, uint16_t, -+ z0 = svindex_u16 (0, -16)) -+ -+/* -+** index_u16_0_1: -+** index z0\.h, #0, #1 -+** ret -+*/ -+TEST_S (index_u16_0_1, svuint16_t, uint16_t, -+ z0 = svindex_u16 (0, 1)) -+ -+/* -+** index_u16_0_15: -+** index z0\.h, #0, #15 -+** ret -+*/ -+TEST_S (index_u16_0_15, svuint16_t, uint16_t, -+ z0 = svindex_u16 (0, 15)) -+ -+/* -+** index_u16_0_16: -+** mov (w[0-9]+), 16 -+** index z0\.h, #0, \1 -+** ret -+*/ -+TEST_S (index_u16_0_16, svuint16_t, uint16_t, -+ z0 = svindex_u16 (0, 16)) -+ -+/* -+** index_u16_m17_1: -+** mov (w[0-9]+), -17 -+** index z0\.h, \1, #1 -+** ret -+*/ -+TEST_S (index_u16_m17_1, svuint16_t, uint16_t, -+ z0 = svindex_u16 (-17, 1)) -+ -+/* -+** index_u16_m16_1: -+** index z0\.h, #-16, #1 -+** ret -+*/ -+TEST_S (index_u16_m16_1, svuint16_t, uint16_t, -+ z0 = svindex_u16 (-16, 1)) -+ -+/* -+** index_u16_m1_1: -+** index z0\.h, #-1, #1 -+** ret -+*/ -+TEST_S (index_u16_m1_1, svuint16_t, uint16_t, -+ z0 = svindex_u16 (-1, 1)) -+ -+/* -+** index_u16_1_1: -+** index z0\.h, #1, #1 -+** ret -+*/ -+TEST_S (index_u16_1_1, svuint16_t, uint16_t, -+ z0 = svindex_u16 (1, 1)) -+ -+/* -+** index_u16_15_1: -+** index z0\.h, #15, #1 -+** ret -+*/ -+TEST_S (index_u16_15_1, svuint16_t, uint16_t, -+ z0 = svindex_u16 (15, 1)) -+ -+/* -+** index_u16_16_1: -+** mov (w[0-9]+), 16 -+** index z0\.h, \1, #1 -+** ret -+*/ -+TEST_S (index_u16_16_1, svuint16_t, uint16_t, -+ z0 = svindex_u16 (16, 1)) -+ -+/* -+** index_u16_m17_x0: -+** mov (w[0-9]+), -17 -+** index z0\.h, \1, w0 -+** ret -+*/ -+TEST_S (index_u16_m17_x0, svuint16_t, uint16_t, -+ z0 = svindex_u16 (-17, x0)) -+ -+/* -+** index_u16_m16_x0: -+** index z0\.h, #-16, w0 -+** ret -+*/ -+TEST_S (index_u16_m16_x0, svuint16_t, uint16_t, -+ z0 = svindex_u16 (-16, x0)) -+ -+/* -+** index_u16_m1_x0: -+** index z0\.h, #-1, w0 -+** ret -+*/ -+TEST_S (index_u16_m1_x0, svuint16_t, uint16_t, -+ z0 = svindex_u16 (-1, x0)) -+ -+/* -+** index_u16_0_x0: -+** index z0\.h, #0, w0 -+** ret -+*/ -+TEST_S (index_u16_0_x0, svuint16_t, uint16_t, -+ z0 = svindex_u16 (0, x0)) -+ -+/* -+** index_u16_1_x0: -+** index z0\.h, #1, w0 -+** ret -+*/ -+TEST_S (index_u16_1_x0, svuint16_t, uint16_t, -+ z0 = svindex_u16 (1, x0)) -+ -+/* -+** index_u16_15_x0: -+** index z0\.h, #15, w0 -+** ret -+*/ -+TEST_S (index_u16_15_x0, svuint16_t, uint16_t, -+ z0 = svindex_u16 (15, x0)) -+ -+/* -+** index_u16_16_x0: -+** mov (w[0-9]+), 16 -+** index z0\.h, \1, w0 -+** ret -+*/ -+TEST_S (index_u16_16_x0, svuint16_t, uint16_t, -+ z0 = svindex_u16 (16, x0)) -+ -+/* -+** index_u16_x0_m17: -+** mov (w[0-9]+), -17 -+** index z0\.h, w0, \1 -+** ret -+*/ -+TEST_S (index_u16_x0_m17, svuint16_t, uint16_t, -+ z0 = svindex_u16 (x0, -17)) -+ -+/* -+** index_u16_x0_m16: -+** index z0\.h, w0, #-16 -+** ret -+*/ -+TEST_S (index_u16_x0_m16, svuint16_t, uint16_t, -+ z0 = svindex_u16 (x0, -16)) -+ -+/* -+** index_u16_x0_1: -+** index z0\.h, w0, #1 -+** ret -+*/ -+TEST_S (index_u16_x0_1, svuint16_t, uint16_t, -+ z0 = svindex_u16 (x0, 1)) -+ -+/* -+** index_u16_x0_15: -+** index z0\.h, w0, #15 -+** ret -+*/ -+TEST_S (index_u16_x0_15, svuint16_t, uint16_t, -+ z0 = svindex_u16 (x0, 15)) -+ -+/* -+** index_u16_x0_16: -+** mov (w[0-9]+), 16 -+** index z0\.h, w0, \1 -+** ret -+*/ -+TEST_S (index_u16_x0_16, svuint16_t, uint16_t, -+ z0 = svindex_u16 (x0, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/index_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/index_u32.c -new file mode 100644 -index 000000000..c2badb05e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/index_u32.c -@@ -0,0 +1,220 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** index_u32_w0_w1: -+** index z0\.s, w0, w1 -+** ret -+*/ -+TEST_S (index_u32_w0_w1, svuint32_t, uint32_t, -+ z0 = svindex_u32 (x0, x1)) -+ -+/* -+** index_u32_w0_2: -+** index z0\.s, w0, #2 -+** ret -+*/ -+TEST_S (index_u32_w0_2, svuint32_t, uint32_t, -+ z0 = svindex_u32 (x0, 2)) -+ -+/* -+** index_u32_50_2: -+** mov (w[0-9]+), 50 -+** index z0\.s, \1, #2 -+** ret -+*/ -+TEST_S (index_u32_50_2, svuint32_t, uint32_t, -+ z0 = svindex_u32 (50, 2)) -+ -+/* -+** index_u32_0_m17: -+** mov (w[0-9]+), -17 -+** index z0\.s, #0, \1 -+** ret -+*/ -+TEST_S (index_u32_0_m17, svuint32_t, uint32_t, -+ z0 = svindex_u32 (0, -17)) -+ -+/* -+** index_u32_0_m16: -+** index z0\.s, #0, #-16 -+** ret -+*/ -+TEST_S (index_u32_0_m16, svuint32_t, uint32_t, -+ z0 = svindex_u32 (0, -16)) -+ -+/* -+** index_u32_0_1: -+** index z0\.s, #0, #1 -+** ret -+*/ -+TEST_S (index_u32_0_1, svuint32_t, uint32_t, -+ z0 = svindex_u32 (0, 1)) -+ -+/* -+** index_u32_0_15: -+** index z0\.s, #0, #15 -+** ret -+*/ -+TEST_S (index_u32_0_15, svuint32_t, uint32_t, -+ z0 = svindex_u32 (0, 15)) -+ -+/* -+** index_u32_0_16: -+** mov (w[0-9]+), 16 -+** index z0\.s, #0, \1 -+** ret -+*/ -+TEST_S (index_u32_0_16, svuint32_t, uint32_t, -+ z0 = svindex_u32 (0, 16)) -+ -+/* -+** index_u32_m17_1: -+** mov (w[0-9]+), -17 -+** index z0\.s, \1, #1 -+** ret -+*/ -+TEST_S (index_u32_m17_1, svuint32_t, uint32_t, -+ z0 = svindex_u32 (-17, 1)) -+ -+/* -+** index_u32_m16_1: -+** index z0\.s, #-16, #1 -+** ret -+*/ -+TEST_S (index_u32_m16_1, svuint32_t, uint32_t, -+ z0 = svindex_u32 (-16, 1)) -+ -+/* -+** index_u32_m1_1: -+** index z0\.s, #-1, #1 -+** ret -+*/ -+TEST_S (index_u32_m1_1, svuint32_t, uint32_t, -+ z0 = svindex_u32 (-1, 1)) -+ -+/* -+** index_u32_1_1: -+** index z0\.s, #1, #1 -+** ret -+*/ -+TEST_S (index_u32_1_1, svuint32_t, uint32_t, -+ z0 = svindex_u32 (1, 1)) -+ -+/* -+** index_u32_15_1: -+** index z0\.s, #15, #1 -+** ret -+*/ -+TEST_S (index_u32_15_1, svuint32_t, uint32_t, -+ z0 = svindex_u32 (15, 1)) -+ -+/* -+** index_u32_16_1: -+** mov (w[0-9]+), 16 -+** index z0\.s, \1, #1 -+** ret -+*/ -+TEST_S (index_u32_16_1, svuint32_t, uint32_t, -+ z0 = svindex_u32 (16, 1)) -+ -+/* -+** index_u32_m17_x0: -+** mov (w[0-9]+), -17 -+** index z0\.s, \1, w0 -+** ret -+*/ -+TEST_S (index_u32_m17_x0, svuint32_t, uint32_t, -+ z0 = svindex_u32 (-17, x0)) -+ -+/* -+** index_u32_m16_x0: -+** index z0\.s, #-16, w0 -+** ret -+*/ -+TEST_S (index_u32_m16_x0, svuint32_t, uint32_t, -+ z0 = svindex_u32 (-16, x0)) -+ -+/* -+** index_u32_m1_x0: -+** index z0\.s, #-1, w0 -+** ret -+*/ -+TEST_S (index_u32_m1_x0, svuint32_t, uint32_t, -+ z0 = svindex_u32 (-1, x0)) -+ -+/* -+** index_u32_0_x0: -+** index z0\.s, #0, w0 -+** ret -+*/ -+TEST_S (index_u32_0_x0, svuint32_t, uint32_t, -+ z0 = svindex_u32 (0, x0)) -+ -+/* -+** index_u32_1_x0: -+** index z0\.s, #1, w0 -+** ret -+*/ -+TEST_S (index_u32_1_x0, svuint32_t, uint32_t, -+ z0 = svindex_u32 (1, x0)) -+ -+/* -+** index_u32_15_x0: -+** index z0\.s, #15, w0 -+** ret -+*/ -+TEST_S (index_u32_15_x0, svuint32_t, uint32_t, -+ z0 = svindex_u32 (15, x0)) -+ -+/* -+** index_u32_16_x0: -+** mov (w[0-9]+), 16 -+** index z0\.s, \1, w0 -+** ret -+*/ -+TEST_S (index_u32_16_x0, svuint32_t, uint32_t, -+ z0 = svindex_u32 (16, x0)) -+ -+/* -+** index_u32_x0_m17: -+** mov (w[0-9]+), -17 -+** index z0\.s, w0, \1 -+** ret -+*/ -+TEST_S (index_u32_x0_m17, svuint32_t, uint32_t, -+ z0 = svindex_u32 (x0, -17)) -+ -+/* -+** index_u32_x0_m16: -+** index z0\.s, w0, #-16 -+** ret -+*/ -+TEST_S (index_u32_x0_m16, svuint32_t, uint32_t, -+ z0 = svindex_u32 (x0, -16)) -+ -+/* -+** index_u32_x0_1: -+** index z0\.s, w0, #1 -+** ret -+*/ -+TEST_S (index_u32_x0_1, svuint32_t, uint32_t, -+ z0 = svindex_u32 (x0, 1)) -+ -+/* -+** index_u32_x0_15: -+** index z0\.s, w0, #15 -+** ret -+*/ -+TEST_S (index_u32_x0_15, svuint32_t, uint32_t, -+ z0 = svindex_u32 (x0, 15)) -+ -+/* -+** index_u32_x0_16: -+** mov (w[0-9]+), 16 -+** index z0\.s, w0, \1 -+** ret -+*/ -+TEST_S (index_u32_x0_16, svuint32_t, uint32_t, -+ z0 = svindex_u32 (x0, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/index_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/index_u64.c -new file mode 100644 -index 000000000..526c5e80a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/index_u64.c -@@ -0,0 +1,220 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** index_u64_x0_x1: -+** index z0\.d, x0, x1 -+** ret -+*/ -+TEST_S (index_u64_x0_x1, svuint64_t, uint64_t, -+ z0 = svindex_u64 (x0, x1)) -+ -+/* -+** index_u64_x0_2: -+** index z0\.d, x0, #2 -+** ret -+*/ -+TEST_S (index_u64_x0_2, svuint64_t, uint64_t, -+ z0 = svindex_u64 (x0, 2)) -+ -+/* -+** index_u64_50_2: -+** mov (x[0-9]+), 50 -+** index z0\.d, \1, #2 -+** ret -+*/ -+TEST_S (index_u64_50_2, svuint64_t, uint64_t, -+ z0 = svindex_u64 (50, 2)) -+ -+/* -+** index_u64_0_m17: -+** mov (x[0-9]+), -17 -+** index z0\.d, #0, \1 -+** ret -+*/ -+TEST_S (index_u64_0_m17, svuint64_t, uint64_t, -+ z0 = svindex_u64 (0, -17)) -+ -+/* -+** index_u64_0_m16: -+** index z0\.d, #0, #-16 -+** ret -+*/ -+TEST_S (index_u64_0_m16, svuint64_t, uint64_t, -+ z0 = svindex_u64 (0, -16)) -+ -+/* -+** index_u64_0_1: -+** index z0\.d, #0, #1 -+** ret -+*/ -+TEST_S (index_u64_0_1, svuint64_t, uint64_t, -+ z0 = svindex_u64 (0, 1)) -+ -+/* -+** index_u64_0_15: -+** index z0\.d, #0, #15 -+** ret -+*/ -+TEST_S (index_u64_0_15, svuint64_t, uint64_t, -+ z0 = svindex_u64 (0, 15)) -+ -+/* -+** index_u64_0_16: -+** mov (x[0-9]+), 16 -+** index z0\.d, #0, \1 -+** ret -+*/ -+TEST_S (index_u64_0_16, svuint64_t, uint64_t, -+ z0 = svindex_u64 (0, 16)) -+ -+/* -+** index_u64_m17_1: -+** mov (x[0-9]+), -17 -+** index z0\.d, \1, #1 -+** ret -+*/ -+TEST_S (index_u64_m17_1, svuint64_t, uint64_t, -+ z0 = svindex_u64 (-17, 1)) -+ -+/* -+** index_u64_m16_1: -+** index z0\.d, #-16, #1 -+** ret -+*/ -+TEST_S (index_u64_m16_1, svuint64_t, uint64_t, -+ z0 = svindex_u64 (-16, 1)) -+ -+/* -+** index_u64_m1_1: -+** index z0\.d, #-1, #1 -+** ret -+*/ -+TEST_S (index_u64_m1_1, svuint64_t, uint64_t, -+ z0 = svindex_u64 (-1, 1)) -+ -+/* -+** index_u64_1_1: -+** index z0\.d, #1, #1 -+** ret -+*/ -+TEST_S (index_u64_1_1, svuint64_t, uint64_t, -+ z0 = svindex_u64 (1, 1)) -+ -+/* -+** index_u64_15_1: -+** index z0\.d, #15, #1 -+** ret -+*/ -+TEST_S (index_u64_15_1, svuint64_t, uint64_t, -+ z0 = svindex_u64 (15, 1)) -+ -+/* -+** index_u64_16_1: -+** mov (x[0-9]+), 16 -+** index z0\.d, \1, #1 -+** ret -+*/ -+TEST_S (index_u64_16_1, svuint64_t, uint64_t, -+ z0 = svindex_u64 (16, 1)) -+ -+/* -+** index_u64_m17_x0: -+** mov (x[0-9]+), -17 -+** index z0\.d, \1, x0 -+** ret -+*/ -+TEST_S (index_u64_m17_x0, svuint64_t, uint64_t, -+ z0 = svindex_u64 (-17, x0)) -+ -+/* -+** index_u64_m16_x0: -+** index z0\.d, #-16, x0 -+** ret -+*/ -+TEST_S (index_u64_m16_x0, svuint64_t, uint64_t, -+ z0 = svindex_u64 (-16, x0)) -+ -+/* -+** index_u64_m1_x0: -+** index z0\.d, #-1, x0 -+** ret -+*/ -+TEST_S (index_u64_m1_x0, svuint64_t, uint64_t, -+ z0 = svindex_u64 (-1, x0)) -+ -+/* -+** index_u64_0_x0: -+** index z0\.d, #0, x0 -+** ret -+*/ -+TEST_S (index_u64_0_x0, svuint64_t, uint64_t, -+ z0 = svindex_u64 (0, x0)) -+ -+/* -+** index_u64_1_x0: -+** index z0\.d, #1, x0 -+** ret -+*/ -+TEST_S (index_u64_1_x0, svuint64_t, uint64_t, -+ z0 = svindex_u64 (1, x0)) -+ -+/* -+** index_u64_15_x0: -+** index z0\.d, #15, x0 -+** ret -+*/ -+TEST_S (index_u64_15_x0, svuint64_t, uint64_t, -+ z0 = svindex_u64 (15, x0)) -+ -+/* -+** index_u64_16_x0: -+** mov (x[0-9]+), 16 -+** index z0\.d, \1, x0 -+** ret -+*/ -+TEST_S (index_u64_16_x0, svuint64_t, uint64_t, -+ z0 = svindex_u64 (16, x0)) -+ -+/* -+** index_u64_x0_m17: -+** mov (x[0-9]+), -17 -+** index z0\.d, x0, \1 -+** ret -+*/ -+TEST_S (index_u64_x0_m17, svuint64_t, uint64_t, -+ z0 = svindex_u64 (x0, -17)) -+ -+/* -+** index_u64_x0_m16: -+** index z0\.d, x0, #-16 -+** ret -+*/ -+TEST_S (index_u64_x0_m16, svuint64_t, uint64_t, -+ z0 = svindex_u64 (x0, -16)) -+ -+/* -+** index_u64_x0_1: -+** index z0\.d, x0, #1 -+** ret -+*/ -+TEST_S (index_u64_x0_1, svuint64_t, uint64_t, -+ z0 = svindex_u64 (x0, 1)) -+ -+/* -+** index_u64_x0_15: -+** index z0\.d, x0, #15 -+** ret -+*/ -+TEST_S (index_u64_x0_15, svuint64_t, uint64_t, -+ z0 = svindex_u64 (x0, 15)) -+ -+/* -+** index_u64_x0_16: -+** mov (x[0-9]+), 16 -+** index z0\.d, x0, \1 -+** ret -+*/ -+TEST_S (index_u64_x0_16, svuint64_t, uint64_t, -+ z0 = svindex_u64 (x0, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/index_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/index_u8.c -new file mode 100644 -index 000000000..c6ce12ec8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/index_u8.c -@@ -0,0 +1,220 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** index_u8_w0_w1: -+** index z0\.b, w0, w1 -+** ret -+*/ -+TEST_S (index_u8_w0_w1, svuint8_t, uint8_t, -+ z0 = svindex_u8 (x0, x1)) -+ -+/* -+** index_u8_w0_2: -+** index z0\.b, w0, #2 -+** ret -+*/ -+TEST_S (index_u8_w0_2, svuint8_t, uint8_t, -+ z0 = svindex_u8 (x0, 2)) -+ -+/* -+** index_u8_50_2: -+** mov (w[0-9]+), 50 -+** index z0\.b, \1, #2 -+** ret -+*/ -+TEST_S (index_u8_50_2, svuint8_t, uint8_t, -+ z0 = svindex_u8 (50, 2)) -+ -+/* -+** index_u8_0_m17: -+** mov (w[0-9]+), -17 -+** index z0\.b, #0, \1 -+** ret -+*/ -+TEST_S (index_u8_0_m17, svuint8_t, uint8_t, -+ z0 = svindex_u8 (0, -17)) -+ -+/* -+** index_u8_0_m16: -+** index z0\.b, #0, #-16 -+** ret -+*/ -+TEST_S (index_u8_0_m16, svuint8_t, uint8_t, -+ z0 = svindex_u8 (0, -16)) -+ -+/* -+** index_u8_0_1: -+** index z0\.b, #0, #1 -+** ret -+*/ -+TEST_S (index_u8_0_1, svuint8_t, uint8_t, -+ z0 = svindex_u8 (0, 1)) -+ -+/* -+** index_u8_0_15: -+** index z0\.b, #0, #15 -+** ret -+*/ -+TEST_S (index_u8_0_15, svuint8_t, uint8_t, -+ z0 = svindex_u8 (0, 15)) -+ -+/* -+** index_u8_0_16: -+** mov (w[0-9]+), 16 -+** index z0\.b, #0, \1 -+** ret -+*/ -+TEST_S (index_u8_0_16, svuint8_t, uint8_t, -+ z0 = svindex_u8 (0, 16)) -+ -+/* -+** index_u8_m17_1: -+** mov (w[0-9]+), -17 -+** index z0\.b, \1, #1 -+** ret -+*/ -+TEST_S (index_u8_m17_1, svuint8_t, uint8_t, -+ z0 = svindex_u8 (-17, 1)) -+ -+/* -+** index_u8_m16_1: -+** index z0\.b, #-16, #1 -+** ret -+*/ -+TEST_S (index_u8_m16_1, svuint8_t, uint8_t, -+ z0 = svindex_u8 (-16, 1)) -+ -+/* -+** index_u8_m1_1: -+** index z0\.b, #-1, #1 -+** ret -+*/ -+TEST_S (index_u8_m1_1, svuint8_t, uint8_t, -+ z0 = svindex_u8 (-1, 1)) -+ -+/* -+** index_u8_1_1: -+** index z0\.b, #1, #1 -+** ret -+*/ -+TEST_S (index_u8_1_1, svuint8_t, uint8_t, -+ z0 = svindex_u8 (1, 1)) -+ -+/* -+** index_u8_15_1: -+** index z0\.b, #15, #1 -+** ret -+*/ -+TEST_S (index_u8_15_1, svuint8_t, uint8_t, -+ z0 = svindex_u8 (15, 1)) -+ -+/* -+** index_u8_16_1: -+** mov (w[0-9]+), 16 -+** index z0\.b, \1, #1 -+** ret -+*/ -+TEST_S (index_u8_16_1, svuint8_t, uint8_t, -+ z0 = svindex_u8 (16, 1)) -+ -+/* -+** index_u8_m17_x0: -+** mov (w[0-9]+), -17 -+** index z0\.b, \1, w0 -+** ret -+*/ -+TEST_S (index_u8_m17_x0, svuint8_t, uint8_t, -+ z0 = svindex_u8 (-17, x0)) -+ -+/* -+** index_u8_m16_x0: -+** index z0\.b, #-16, w0 -+** ret -+*/ -+TEST_S (index_u8_m16_x0, svuint8_t, uint8_t, -+ z0 = svindex_u8 (-16, x0)) -+ -+/* -+** index_u8_m1_x0: -+** index z0\.b, #-1, w0 -+** ret -+*/ -+TEST_S (index_u8_m1_x0, svuint8_t, uint8_t, -+ z0 = svindex_u8 (-1, x0)) -+ -+/* -+** index_u8_0_x0: -+** index z0\.b, #0, w0 -+** ret -+*/ -+TEST_S (index_u8_0_x0, svuint8_t, uint8_t, -+ z0 = svindex_u8 (0, x0)) -+ -+/* -+** index_u8_1_x0: -+** index z0\.b, #1, w0 -+** ret -+*/ -+TEST_S (index_u8_1_x0, svuint8_t, uint8_t, -+ z0 = svindex_u8 (1, x0)) -+ -+/* -+** index_u8_15_x0: -+** index z0\.b, #15, w0 -+** ret -+*/ -+TEST_S (index_u8_15_x0, svuint8_t, uint8_t, -+ z0 = svindex_u8 (15, x0)) -+ -+/* -+** index_u8_16_x0: -+** mov (w[0-9]+), 16 -+** index z0\.b, \1, w0 -+** ret -+*/ -+TEST_S (index_u8_16_x0, svuint8_t, uint8_t, -+ z0 = svindex_u8 (16, x0)) -+ -+/* -+** index_u8_x0_m17: -+** mov (w[0-9]+), -17 -+** index z0\.b, w0, \1 -+** ret -+*/ -+TEST_S (index_u8_x0_m17, svuint8_t, uint8_t, -+ z0 = svindex_u8 (x0, -17)) -+ -+/* -+** index_u8_x0_m16: -+** index z0\.b, w0, #-16 -+** ret -+*/ -+TEST_S (index_u8_x0_m16, svuint8_t, uint8_t, -+ z0 = svindex_u8 (x0, -16)) -+ -+/* -+** index_u8_x0_1: -+** index z0\.b, w0, #1 -+** ret -+*/ -+TEST_S (index_u8_x0_1, svuint8_t, uint8_t, -+ z0 = svindex_u8 (x0, 1)) -+ -+/* -+** index_u8_x0_15: -+** index z0\.b, w0, #15 -+** ret -+*/ -+TEST_S (index_u8_x0_15, svuint8_t, uint8_t, -+ z0 = svindex_u8 (x0, 15)) -+ -+/* -+** index_u8_x0_16: -+** mov (w[0-9]+), 16 -+** index z0\.b, w0, \1 -+** ret -+*/ -+TEST_S (index_u8_x0_16, svuint8_t, uint8_t, -+ z0 = svindex_u8 (x0, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_bf16.c -new file mode 100644 -index 000000000..55afdba62 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_bf16.c -@@ -0,0 +1,22 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** insr_h4_bf16_tied1: -+** insr z0\.h, h4 -+** ret -+*/ -+TEST_UNIFORM_ZD (insr_h4_bf16_tied1, svbfloat16_t, bfloat16_t, -+ z0 = svinsr_n_bf16 (z0, d4), -+ z0 = svinsr (z0, d4)) -+ -+/* -+** insr_h4_bf16_untied: -+** movprfx z0, z1 -+** insr z0\.h, h4 -+** ret -+*/ -+TEST_UNIFORM_ZD (insr_h4_bf16_untied, svbfloat16_t, bfloat16_t, -+ z0 = svinsr_n_bf16 (z1, d4), -+ z0 = svinsr (z1, d4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_f16.c -new file mode 100644 -index 000000000..f01a36189 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_f16.c -@@ -0,0 +1,51 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** insr_h4_f16_tied1: -+** insr z0\.h, h4 -+** ret -+*/ -+TEST_UNIFORM_ZD (insr_h4_f16_tied1, svfloat16_t, __fp16, -+ z0 = svinsr_n_f16 (z0, d4), -+ z0 = svinsr (z0, d4)) -+ -+/* -+** insr_h4_f16_untied: -+** movprfx z0, z1 -+** insr z0\.h, h4 -+** ret -+*/ -+TEST_UNIFORM_ZD (insr_h4_f16_untied, svfloat16_t, __fp16, -+ z0 = svinsr_n_f16 (z1, d4), -+ z0 = svinsr (z1, d4)) -+ -+/* -+** insr_0_f16_tied1: -+** insr z0\.h, wzr -+** ret -+*/ -+TEST_UNIFORM_Z (insr_0_f16_tied1, svfloat16_t, -+ z0 = svinsr_n_f16 (z0, 0), -+ z0 = svinsr (z0, 0)) -+ -+/* -+** insr_0_f16_untied: -+** movprfx z0, z1 -+** insr z0\.h, wzr -+** ret -+*/ -+TEST_UNIFORM_Z (insr_0_f16_untied, svfloat16_t, -+ z0 = svinsr_n_f16 (z1, 0), -+ z0 = svinsr (z1, 0)) -+ -+/* -+** insr_1_f16: -+** fmov (h[0-9]+), #?1\.0(?:e\+0)? -+** insr z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (insr_1_f16, svfloat16_t, -+ z0 = svinsr_n_f16 (z0, 1), -+ z0 = svinsr (z0, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_f32.c -new file mode 100644 -index 000000000..e339727b1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_f32.c -@@ -0,0 +1,51 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** insr_s4_f32_tied1: -+** insr z0\.s, s4 -+** ret -+*/ -+TEST_UNIFORM_ZD (insr_s4_f32_tied1, svfloat32_t, float, -+ z0 = svinsr_n_f32 (z0, d4), -+ z0 = svinsr (z0, d4)) -+ -+/* -+** insr_s4_f32_untied: -+** movprfx z0, z1 -+** insr z0\.s, s4 -+** ret -+*/ -+TEST_UNIFORM_ZD (insr_s4_f32_untied, svfloat32_t, float, -+ z0 = svinsr_n_f32 (z1, d4), -+ z0 = svinsr (z1, d4)) -+ -+/* -+** insr_0_f32_tied1: -+** insr z0\.s, wzr -+** ret -+*/ -+TEST_UNIFORM_Z (insr_0_f32_tied1, svfloat32_t, -+ z0 = svinsr_n_f32 (z0, 0), -+ z0 = svinsr (z0, 0)) -+ -+/* -+** insr_0_f32_untied: -+** movprfx z0, z1 -+** insr z0\.s, wzr -+** ret -+*/ -+TEST_UNIFORM_Z (insr_0_f32_untied, svfloat32_t, -+ z0 = svinsr_n_f32 (z1, 0), -+ z0 = svinsr (z1, 0)) -+ -+/* -+** insr_1_f32: -+** fmov (s[0-9]+), #?1\.0(?:e\+0)? -+** insr z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (insr_1_f32, svfloat32_t, -+ z0 = svinsr_n_f32 (z0, 1), -+ z0 = svinsr (z0, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_f64.c -new file mode 100644 -index 000000000..9400225a5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_f64.c -@@ -0,0 +1,51 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** insr_d4_f64_tied1: -+** insr z0\.d, d4 -+** ret -+*/ -+TEST_UNIFORM_ZD (insr_d4_f64_tied1, svfloat64_t, double, -+ z0 = svinsr_n_f64 (z0, d4), -+ z0 = svinsr (z0, d4)) -+ -+/* -+** insr_d4_f64_untied: -+** movprfx z0, z1 -+** insr z0\.d, d4 -+** ret -+*/ -+TEST_UNIFORM_ZD (insr_d4_f64_untied, svfloat64_t, double, -+ z0 = svinsr_n_f64 (z1, d4), -+ z0 = svinsr (z1, d4)) -+ -+/* -+** insr_0_f64_tied1: -+** insr z0\.d, xzr -+** ret -+*/ -+TEST_UNIFORM_Z (insr_0_f64_tied1, svfloat64_t, -+ z0 = svinsr_n_f64 (z0, 0), -+ z0 = svinsr (z0, 0)) -+ -+/* -+** insr_0_f64_untied: -+** movprfx z0, z1 -+** insr z0\.d, xzr -+** ret -+*/ -+TEST_UNIFORM_Z (insr_0_f64_untied, svfloat64_t, -+ z0 = svinsr_n_f64 (z1, 0), -+ z0 = svinsr (z1, 0)) -+ -+/* -+** insr_1_f64: -+** fmov (d[0-9]+), #?1\.0(?:e\+0)? -+** insr z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (insr_1_f64, svfloat64_t, -+ z0 = svinsr_n_f64 (z0, 1), -+ z0 = svinsr (z0, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_s16.c -new file mode 100644 -index 000000000..651977a9d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_s16.c -@@ -0,0 +1,56 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** insr_w0_s16_tied1: -+** insr z0\.h, w0 -+** ret -+*/ -+TEST_UNIFORM_ZX (insr_w0_s16_tied1, svint16_t, int16_t, -+ z0 = svinsr_n_s16 (z0, x0), -+ z0 = svinsr (z0, x0)) -+ -+/* -+** insr_w0_s16_untied: -+** movprfx z0, z1 -+** insr z0\.h, w0 -+** ret -+*/ -+TEST_UNIFORM_ZX (insr_w0_s16_untied, svint16_t, int16_t, -+ z0 = svinsr_n_s16 (z1, x0), -+ z0 = svinsr (z1, x0)) -+ -+/* -+** insr_0_s16_tied1: -+** insr z0\.h, wzr -+** ret -+*/ -+TEST_UNIFORM_Z (insr_0_s16_tied1, svint16_t, -+ z0 = svinsr_n_s16 (z0, 0), -+ z0 = svinsr (z0, 0)) -+ -+/* -+** insr_0_s16_untied: -+** movprfx z0, z1 -+** insr z0\.h, wzr -+** ret -+*/ -+TEST_UNIFORM_Z (insr_0_s16_untied, svint16_t, -+ z0 = svinsr_n_s16 (z1, 0), -+ z0 = svinsr (z1, 0)) -+ -+/* -+** insr_1_s16: -+** ( -+** mov (w[0-9]+), #?1 -+** insr z0\.h, \1 -+** | -+** movi v([0-9]+)\.4h, 0x1 -+** insr z0\.h, h\2 -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (insr_1_s16, svint16_t, -+ z0 = svinsr_n_s16 (z0, 1), -+ z0 = svinsr (z0, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_s32.c -new file mode 100644 -index 000000000..a1dcfc090 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_s32.c -@@ -0,0 +1,56 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** insr_w0_s32_tied1: -+** insr z0\.s, w0 -+** ret -+*/ -+TEST_UNIFORM_ZX (insr_w0_s32_tied1, svint32_t, int32_t, -+ z0 = svinsr_n_s32 (z0, x0), -+ z0 = svinsr (z0, x0)) -+ -+/* -+** insr_w0_s32_untied: -+** movprfx z0, z1 -+** insr z0\.s, w0 -+** ret -+*/ -+TEST_UNIFORM_ZX (insr_w0_s32_untied, svint32_t, int32_t, -+ z0 = svinsr_n_s32 (z1, x0), -+ z0 = svinsr (z1, x0)) -+ -+/* -+** insr_0_s32_tied1: -+** insr z0\.s, wzr -+** ret -+*/ -+TEST_UNIFORM_Z (insr_0_s32_tied1, svint32_t, -+ z0 = svinsr_n_s32 (z0, 0), -+ z0 = svinsr (z0, 0)) -+ -+/* -+** insr_0_s32_untied: -+** movprfx z0, z1 -+** insr z0\.s, wzr -+** ret -+*/ -+TEST_UNIFORM_Z (insr_0_s32_untied, svint32_t, -+ z0 = svinsr_n_s32 (z1, 0), -+ z0 = svinsr (z1, 0)) -+ -+/* -+** insr_1_s32: -+** ( -+** mov (w[0-9]+), #?1 -+** insr z0\.s, \1 -+** | -+** movi v([0-9]+)\.2s, 0x1 -+** insr z0\.s, s\2 -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (insr_1_s32, svint32_t, -+ z0 = svinsr_n_s32 (z0, 1), -+ z0 = svinsr (z0, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_s64.c -new file mode 100644 -index 000000000..32cdc8263 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_s64.c -@@ -0,0 +1,56 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** insr_x0_s64_tied1: -+** insr z0\.d, x0 -+** ret -+*/ -+TEST_UNIFORM_ZX (insr_x0_s64_tied1, svint64_t, int64_t, -+ z0 = svinsr_n_s64 (z0, x0), -+ z0 = svinsr (z0, x0)) -+ -+/* -+** insr_x0_s64_untied: -+** movprfx z0, z1 -+** insr z0\.d, x0 -+** ret -+*/ -+TEST_UNIFORM_ZX (insr_x0_s64_untied, svint64_t, int64_t, -+ z0 = svinsr_n_s64 (z1, x0), -+ z0 = svinsr (z1, x0)) -+ -+/* -+** insr_0_s64_tied1: -+** insr z0\.d, xzr -+** ret -+*/ -+TEST_UNIFORM_Z (insr_0_s64_tied1, svint64_t, -+ z0 = svinsr_n_s64 (z0, 0), -+ z0 = svinsr (z0, 0)) -+ -+/* -+** insr_0_s64_untied: -+** movprfx z0, z1 -+** insr z0\.d, xzr -+** ret -+*/ -+TEST_UNIFORM_Z (insr_0_s64_untied, svint64_t, -+ z0 = svinsr_n_s64 (z1, 0), -+ z0 = svinsr (z1, 0)) -+ -+/* -+** insr_1_s64: -+** ( -+** mov (x[0-9]+), #?1 -+** insr z0\.d, \1 -+** | -+** movi v([0-9]+)\.2d, 0x1 -+** insr z0\.d, d\2 -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (insr_1_s64, svint64_t, -+ z0 = svinsr_n_s64 (z0, 1), -+ z0 = svinsr (z0, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_s8.c -new file mode 100644 -index 000000000..cb69b09fa ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_s8.c -@@ -0,0 +1,56 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** insr_w0_s8_tied1: -+** insr z0\.b, w0 -+** ret -+*/ -+TEST_UNIFORM_ZX (insr_w0_s8_tied1, svint8_t, int8_t, -+ z0 = svinsr_n_s8 (z0, x0), -+ z0 = svinsr (z0, x0)) -+ -+/* -+** insr_w0_s8_untied: -+** movprfx z0, z1 -+** insr z0\.b, w0 -+** ret -+*/ -+TEST_UNIFORM_ZX (insr_w0_s8_untied, svint8_t, int8_t, -+ z0 = svinsr_n_s8 (z1, x0), -+ z0 = svinsr (z1, x0)) -+ -+/* -+** insr_0_s8_tied1: -+** insr z0\.b, wzr -+** ret -+*/ -+TEST_UNIFORM_Z (insr_0_s8_tied1, svint8_t, -+ z0 = svinsr_n_s8 (z0, 0), -+ z0 = svinsr (z0, 0)) -+ -+/* -+** insr_0_s8_untied: -+** movprfx z0, z1 -+** insr z0\.b, wzr -+** ret -+*/ -+TEST_UNIFORM_Z (insr_0_s8_untied, svint8_t, -+ z0 = svinsr_n_s8 (z1, 0), -+ z0 = svinsr (z1, 0)) -+ -+/* -+** insr_1_s8: -+** ( -+** mov (w[0-9]+), #?1 -+** insr z0\.b, \1 -+** | -+** movi v([0-9]+)\.8b, 0x1 -+** insr z0\.b, b\2 -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (insr_1_s8, svint8_t, -+ z0 = svinsr_n_s8 (z0, 1), -+ z0 = svinsr (z0, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_u16.c -new file mode 100644 -index 000000000..35af77402 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_u16.c -@@ -0,0 +1,56 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** insr_w0_u16_tied1: -+** insr z0\.h, w0 -+** ret -+*/ -+TEST_UNIFORM_ZX (insr_w0_u16_tied1, svuint16_t, uint16_t, -+ z0 = svinsr_n_u16 (z0, x0), -+ z0 = svinsr (z0, x0)) -+ -+/* -+** insr_w0_u16_untied: -+** movprfx z0, z1 -+** insr z0\.h, w0 -+** ret -+*/ -+TEST_UNIFORM_ZX (insr_w0_u16_untied, svuint16_t, uint16_t, -+ z0 = svinsr_n_u16 (z1, x0), -+ z0 = svinsr (z1, x0)) -+ -+/* -+** insr_0_u16_tied1: -+** insr z0\.h, wzr -+** ret -+*/ -+TEST_UNIFORM_Z (insr_0_u16_tied1, svuint16_t, -+ z0 = svinsr_n_u16 (z0, 0), -+ z0 = svinsr (z0, 0)) -+ -+/* -+** insr_0_u16_untied: -+** movprfx z0, z1 -+** insr z0\.h, wzr -+** ret -+*/ -+TEST_UNIFORM_Z (insr_0_u16_untied, svuint16_t, -+ z0 = svinsr_n_u16 (z1, 0), -+ z0 = svinsr (z1, 0)) -+ -+/* -+** insr_1_u16: -+** ( -+** mov (w[0-9]+), #?1 -+** insr z0\.h, \1 -+** | -+** movi v([0-9]+)\.4h, 0x1 -+** insr z0\.h, h\2 -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (insr_1_u16, svuint16_t, -+ z0 = svinsr_n_u16 (z0, 1), -+ z0 = svinsr (z0, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_u32.c -new file mode 100644 -index 000000000..8a72e7f2a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_u32.c -@@ -0,0 +1,56 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** insr_w0_u32_tied1: -+** insr z0\.s, w0 -+** ret -+*/ -+TEST_UNIFORM_ZX (insr_w0_u32_tied1, svuint32_t, uint32_t, -+ z0 = svinsr_n_u32 (z0, x0), -+ z0 = svinsr (z0, x0)) -+ -+/* -+** insr_w0_u32_untied: -+** movprfx z0, z1 -+** insr z0\.s, w0 -+** ret -+*/ -+TEST_UNIFORM_ZX (insr_w0_u32_untied, svuint32_t, uint32_t, -+ z0 = svinsr_n_u32 (z1, x0), -+ z0 = svinsr (z1, x0)) -+ -+/* -+** insr_0_u32_tied1: -+** insr z0\.s, wzr -+** ret -+*/ -+TEST_UNIFORM_Z (insr_0_u32_tied1, svuint32_t, -+ z0 = svinsr_n_u32 (z0, 0), -+ z0 = svinsr (z0, 0)) -+ -+/* -+** insr_0_u32_untied: -+** movprfx z0, z1 -+** insr z0\.s, wzr -+** ret -+*/ -+TEST_UNIFORM_Z (insr_0_u32_untied, svuint32_t, -+ z0 = svinsr_n_u32 (z1, 0), -+ z0 = svinsr (z1, 0)) -+ -+/* -+** insr_1_u32: -+** ( -+** mov (w[0-9]+), #?1 -+** insr z0\.s, \1 -+** | -+** movi v([0-9]+)\.2s, 0x1 -+** insr z0\.s, s\2 -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (insr_1_u32, svuint32_t, -+ z0 = svinsr_n_u32 (z0, 1), -+ z0 = svinsr (z0, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_u64.c -new file mode 100644 -index 000000000..ab23f677d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_u64.c -@@ -0,0 +1,56 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** insr_x0_u64_tied1: -+** insr z0\.d, x0 -+** ret -+*/ -+TEST_UNIFORM_ZX (insr_x0_u64_tied1, svuint64_t, uint64_t, -+ z0 = svinsr_n_u64 (z0, x0), -+ z0 = svinsr (z0, x0)) -+ -+/* -+** insr_x0_u64_untied: -+** movprfx z0, z1 -+** insr z0\.d, x0 -+** ret -+*/ -+TEST_UNIFORM_ZX (insr_x0_u64_untied, svuint64_t, uint64_t, -+ z0 = svinsr_n_u64 (z1, x0), -+ z0 = svinsr (z1, x0)) -+ -+/* -+** insr_0_u64_tied1: -+** insr z0\.d, xzr -+** ret -+*/ -+TEST_UNIFORM_Z (insr_0_u64_tied1, svuint64_t, -+ z0 = svinsr_n_u64 (z0, 0), -+ z0 = svinsr (z0, 0)) -+ -+/* -+** insr_0_u64_untied: -+** movprfx z0, z1 -+** insr z0\.d, xzr -+** ret -+*/ -+TEST_UNIFORM_Z (insr_0_u64_untied, svuint64_t, -+ z0 = svinsr_n_u64 (z1, 0), -+ z0 = svinsr (z1, 0)) -+ -+/* -+** insr_1_u64: -+** ( -+** mov (x[0-9]+), #?1 -+** insr z0\.d, \1 -+** | -+** movi v([0-9]+)\.2d, 0x1 -+** insr z0\.d, d\2 -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (insr_1_u64, svuint64_t, -+ z0 = svinsr_n_u64 (z0, 1), -+ z0 = svinsr (z0, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_u8.c -new file mode 100644 -index 000000000..549d71882 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/insr_u8.c -@@ -0,0 +1,56 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** insr_w0_u8_tied1: -+** insr z0\.b, w0 -+** ret -+*/ -+TEST_UNIFORM_ZX (insr_w0_u8_tied1, svuint8_t, uint8_t, -+ z0 = svinsr_n_u8 (z0, x0), -+ z0 = svinsr (z0, x0)) -+ -+/* -+** insr_w0_u8_untied: -+** movprfx z0, z1 -+** insr z0\.b, w0 -+** ret -+*/ -+TEST_UNIFORM_ZX (insr_w0_u8_untied, svuint8_t, uint8_t, -+ z0 = svinsr_n_u8 (z1, x0), -+ z0 = svinsr (z1, x0)) -+ -+/* -+** insr_0_u8_tied1: -+** insr z0\.b, wzr -+** ret -+*/ -+TEST_UNIFORM_Z (insr_0_u8_tied1, svuint8_t, -+ z0 = svinsr_n_u8 (z0, 0), -+ z0 = svinsr (z0, 0)) -+ -+/* -+** insr_0_u8_untied: -+** movprfx z0, z1 -+** insr z0\.b, wzr -+** ret -+*/ -+TEST_UNIFORM_Z (insr_0_u8_untied, svuint8_t, -+ z0 = svinsr_n_u8 (z1, 0), -+ z0 = svinsr (z1, 0)) -+ -+/* -+** insr_1_u8: -+** ( -+** mov (w[0-9]+), #?1 -+** insr z0\.b, \1 -+** | -+** movi v([0-9]+)\.8b, 0x1 -+** insr z0\.b, b\2 -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (insr_1_u8, svuint8_t, -+ z0 = svinsr_n_u8 (z0, 1), -+ z0 = svinsr (z0, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_bf16.c -new file mode 100644 -index 000000000..da30e05e5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_bf16.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** lasta_d0_bf16_tied: -+** lasta h0, p0, z0\.h -+** ret -+*/ -+TEST_REDUCTION_D (lasta_d0_bf16_tied, bfloat16_t, svbfloat16_t, -+ d0 = svlasta_bf16 (p0, z0), -+ d0 = svlasta (p0, z0)) -+ -+/* -+** lasta_d0_bf16_untied: -+** lasta h0, p0, z1\.h -+** ret -+*/ -+TEST_REDUCTION_D (lasta_d0_bf16_untied, bfloat16_t, svbfloat16_t, -+ d0 = svlasta_bf16 (p0, z1), -+ d0 = svlasta (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_f16.c -new file mode 100644 -index 000000000..972b55ab6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_f16.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** lasta_d0_f16_tied: -+** lasta h0, p0, z0\.h -+** ret -+*/ -+TEST_REDUCTION_D (lasta_d0_f16_tied, float16_t, svfloat16_t, -+ d0 = svlasta_f16 (p0, z0), -+ d0 = svlasta (p0, z0)) -+ -+/* -+** lasta_d0_f16_untied: -+** lasta h0, p0, z1\.h -+** ret -+*/ -+TEST_REDUCTION_D (lasta_d0_f16_untied, float16_t, svfloat16_t, -+ d0 = svlasta_f16 (p0, z1), -+ d0 = svlasta (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_f32.c -new file mode 100644 -index 000000000..cfb537f2f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_f32.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** lasta_d0_f32_tied: -+** lasta s0, p0, z0\.s -+** ret -+*/ -+TEST_REDUCTION_D (lasta_d0_f32_tied, float32_t, svfloat32_t, -+ d0 = svlasta_f32 (p0, z0), -+ d0 = svlasta (p0, z0)) -+ -+/* -+** lasta_d0_f32_untied: -+** lasta s0, p0, z1\.s -+** ret -+*/ -+TEST_REDUCTION_D (lasta_d0_f32_untied, float32_t, svfloat32_t, -+ d0 = svlasta_f32 (p0, z1), -+ d0 = svlasta (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_f64.c -new file mode 100644 -index 000000000..a4a8a74c9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_f64.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** lasta_d0_f64_tied: -+** lasta d0, p0, z0\.d -+** ret -+*/ -+TEST_REDUCTION_D (lasta_d0_f64_tied, float64_t, svfloat64_t, -+ d0 = svlasta_f64 (p0, z0), -+ d0 = svlasta (p0, z0)) -+ -+/* -+** lasta_d0_f64_untied: -+** lasta d0, p0, z1\.d -+** ret -+*/ -+TEST_REDUCTION_D (lasta_d0_f64_untied, float64_t, svfloat64_t, -+ d0 = svlasta_f64 (p0, z1), -+ d0 = svlasta (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_s16.c -new file mode 100644 -index 000000000..54bd0248f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_s16.c -@@ -0,0 +1,12 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** lasta_x0_s16: -+** lasta w0, p0, z0\.h -+** ret -+*/ -+TEST_REDUCTION_X (lasta_x0_s16, int16_t, svint16_t, -+ x0 = svlasta_s16 (p0, z0), -+ x0 = svlasta (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_s32.c -new file mode 100644 -index 000000000..18f852f94 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_s32.c -@@ -0,0 +1,12 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** lasta_x0_s32: -+** lasta w0, p0, z0\.s -+** ret -+*/ -+TEST_REDUCTION_X (lasta_x0_s32, int32_t, svint32_t, -+ x0 = svlasta_s32 (p0, z0), -+ x0 = svlasta (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_s64.c -new file mode 100644 -index 000000000..6e45af3d4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_s64.c -@@ -0,0 +1,12 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** lasta_x0_s64: -+** lasta x0, p0, z0\.d -+** ret -+*/ -+TEST_REDUCTION_X (lasta_x0_s64, int64_t, svint64_t, -+ x0 = svlasta_s64 (p0, z0), -+ x0 = svlasta (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_s8.c -new file mode 100644 -index 000000000..58e574f30 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_s8.c -@@ -0,0 +1,12 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** lasta_x0_s8: -+** lasta w0, p0, z0\.b -+** ret -+*/ -+TEST_REDUCTION_X (lasta_x0_s8, int8_t, svint8_t, -+ x0 = svlasta_s8 (p0, z0), -+ x0 = svlasta (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_u16.c -new file mode 100644 -index 000000000..a0e14eca4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_u16.c -@@ -0,0 +1,12 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** lasta_x0_u16: -+** lasta w0, p0, z0\.h -+** ret -+*/ -+TEST_REDUCTION_X (lasta_x0_u16, uint16_t, svuint16_t, -+ x0 = svlasta_u16 (p0, z0), -+ x0 = svlasta (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_u32.c -new file mode 100644 -index 000000000..dab37c36a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_u32.c -@@ -0,0 +1,12 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** lasta_x0_u32: -+** lasta w0, p0, z0\.s -+** ret -+*/ -+TEST_REDUCTION_X (lasta_x0_u32, uint32_t, svuint32_t, -+ x0 = svlasta_u32 (p0, z0), -+ x0 = svlasta (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_u64.c -new file mode 100644 -index 000000000..c766f36ec ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_u64.c -@@ -0,0 +1,12 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** lasta_x0_u64: -+** lasta x0, p0, z0\.d -+** ret -+*/ -+TEST_REDUCTION_X (lasta_x0_u64, uint64_t, svuint64_t, -+ x0 = svlasta_u64 (p0, z0), -+ x0 = svlasta (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_u8.c -new file mode 100644 -index 000000000..a83f25fe4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lasta_u8.c -@@ -0,0 +1,12 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** lasta_x0_u8: -+** lasta w0, p0, z0\.b -+** ret -+*/ -+TEST_REDUCTION_X (lasta_x0_u8, uint8_t, svuint8_t, -+ x0 = svlasta_u8 (p0, z0), -+ x0 = svlasta (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_bf16.c -new file mode 100644 -index 000000000..01ba39a02 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_bf16.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** lastb_d0_bf16_tied: -+** lastb h0, p0, z0\.h -+** ret -+*/ -+TEST_REDUCTION_D (lastb_d0_bf16_tied, bfloat16_t, svbfloat16_t, -+ d0 = svlastb_bf16 (p0, z0), -+ d0 = svlastb (p0, z0)) -+ -+/* -+** lastb_d0_bf16_untied: -+** lastb h0, p0, z1\.h -+** ret -+*/ -+TEST_REDUCTION_D (lastb_d0_bf16_untied, bfloat16_t, svbfloat16_t, -+ d0 = svlastb_bf16 (p0, z1), -+ d0 = svlastb (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_f16.c -new file mode 100644 -index 000000000..0bc7e9ef4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_f16.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** lastb_d0_f16_tied: -+** lastb h0, p0, z0\.h -+** ret -+*/ -+TEST_REDUCTION_D (lastb_d0_f16_tied, float16_t, svfloat16_t, -+ d0 = svlastb_f16 (p0, z0), -+ d0 = svlastb (p0, z0)) -+ -+/* -+** lastb_d0_f16_untied: -+** lastb h0, p0, z1\.h -+** ret -+*/ -+TEST_REDUCTION_D (lastb_d0_f16_untied, float16_t, svfloat16_t, -+ d0 = svlastb_f16 (p0, z1), -+ d0 = svlastb (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_f32.c -new file mode 100644 -index 000000000..b33d61eee ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_f32.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** lastb_d0_f32_tied: -+** lastb s0, p0, z0\.s -+** ret -+*/ -+TEST_REDUCTION_D (lastb_d0_f32_tied, float32_t, svfloat32_t, -+ d0 = svlastb_f32 (p0, z0), -+ d0 = svlastb (p0, z0)) -+ -+/* -+** lastb_d0_f32_untied: -+** lastb s0, p0, z1\.s -+** ret -+*/ -+TEST_REDUCTION_D (lastb_d0_f32_untied, float32_t, svfloat32_t, -+ d0 = svlastb_f32 (p0, z1), -+ d0 = svlastb (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_f64.c -new file mode 100644 -index 000000000..9fa7de706 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_f64.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** lastb_d0_f64_tied: -+** lastb d0, p0, z0\.d -+** ret -+*/ -+TEST_REDUCTION_D (lastb_d0_f64_tied, float64_t, svfloat64_t, -+ d0 = svlastb_f64 (p0, z0), -+ d0 = svlastb (p0, z0)) -+ -+/* -+** lastb_d0_f64_untied: -+** lastb d0, p0, z1\.d -+** ret -+*/ -+TEST_REDUCTION_D (lastb_d0_f64_untied, float64_t, svfloat64_t, -+ d0 = svlastb_f64 (p0, z1), -+ d0 = svlastb (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_s16.c -new file mode 100644 -index 000000000..6575f21cd ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_s16.c -@@ -0,0 +1,12 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** lastb_x0_s16: -+** lastb w0, p0, z0\.h -+** ret -+*/ -+TEST_REDUCTION_X (lastb_x0_s16, int16_t, svint16_t, -+ x0 = svlastb_s16 (p0, z0), -+ x0 = svlastb (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_s32.c -new file mode 100644 -index 000000000..856e5bdc8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_s32.c -@@ -0,0 +1,12 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** lastb_x0_s32: -+** lastb w0, p0, z0\.s -+** ret -+*/ -+TEST_REDUCTION_X (lastb_x0_s32, int32_t, svint32_t, -+ x0 = svlastb_s32 (p0, z0), -+ x0 = svlastb (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_s64.c -new file mode 100644 -index 000000000..bd7de2ab2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_s64.c -@@ -0,0 +1,12 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** lastb_x0_s64: -+** lastb x0, p0, z0\.d -+** ret -+*/ -+TEST_REDUCTION_X (lastb_x0_s64, int64_t, svint64_t, -+ x0 = svlastb_s64 (p0, z0), -+ x0 = svlastb (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_s8.c -new file mode 100644 -index 000000000..4c343a705 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_s8.c -@@ -0,0 +1,12 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** lastb_x0_s8: -+** lastb w0, p0, z0\.b -+** ret -+*/ -+TEST_REDUCTION_X (lastb_x0_s8, int8_t, svint8_t, -+ x0 = svlastb_s8 (p0, z0), -+ x0 = svlastb (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_u16.c -new file mode 100644 -index 000000000..7f3db1bb1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_u16.c -@@ -0,0 +1,12 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** lastb_x0_u16: -+** lastb w0, p0, z0\.h -+** ret -+*/ -+TEST_REDUCTION_X (lastb_x0_u16, uint16_t, svuint16_t, -+ x0 = svlastb_u16 (p0, z0), -+ x0 = svlastb (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_u32.c -new file mode 100644 -index 000000000..c2eeacba0 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_u32.c -@@ -0,0 +1,12 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** lastb_x0_u32: -+** lastb w0, p0, z0\.s -+** ret -+*/ -+TEST_REDUCTION_X (lastb_x0_u32, uint32_t, svuint32_t, -+ x0 = svlastb_u32 (p0, z0), -+ x0 = svlastb (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_u64.c -new file mode 100644 -index 000000000..1496ffa0e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_u64.c -@@ -0,0 +1,12 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** lastb_x0_u64: -+** lastb x0, p0, z0\.d -+** ret -+*/ -+TEST_REDUCTION_X (lastb_x0_u64, uint64_t, svuint64_t, -+ x0 = svlastb_u64 (p0, z0), -+ x0 = svlastb (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_u8.c -new file mode 100644 -index 000000000..25f036063 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lastb_u8.c -@@ -0,0 +1,12 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** lastb_x0_u8: -+** lastb w0, p0, z0\.b -+** ret -+*/ -+TEST_REDUCTION_X (lastb_x0_u8, uint8_t, svuint8_t, -+ x0 = svlastb_u8 (p0, z0), -+ x0 = svlastb (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_bf16.c -new file mode 100644 -index 000000000..07891de04 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_bf16.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1_bf16_base: -+** ld1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_bf16_base, svbfloat16_t, bfloat16_t, -+ z0 = svld1_bf16 (p0, x0), -+ z0 = svld1 (p0, x0)) -+ -+/* -+** ld1_bf16_index: -+** ld1h z0\.h, p0/z, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_LOAD (ld1_bf16_index, svbfloat16_t, bfloat16_t, -+ z0 = svld1_bf16 (p0, x0 + x1), -+ z0 = svld1 (p0, x0 + x1)) -+ -+/* -+** ld1_bf16_1: -+** ld1h z0\.h, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_bf16_1, svbfloat16_t, bfloat16_t, -+ z0 = svld1_bf16 (p0, x0 + svcnth ()), -+ z0 = svld1 (p0, x0 + svcnth ())) -+ -+/* -+** ld1_bf16_7: -+** ld1h z0\.h, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_bf16_7, svbfloat16_t, bfloat16_t, -+ z0 = svld1_bf16 (p0, x0 + svcnth () * 7), -+ z0 = svld1 (p0, x0 + svcnth () * 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1_bf16_8: -+** incb x0, all, mul #8 -+** ld1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_bf16_8, svbfloat16_t, bfloat16_t, -+ z0 = svld1_bf16 (p0, x0 + svcnth () * 8), -+ z0 = svld1 (p0, x0 + svcnth () * 8)) -+ -+/* -+** ld1_bf16_m1: -+** ld1h z0\.h, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_bf16_m1, svbfloat16_t, bfloat16_t, -+ z0 = svld1_bf16 (p0, x0 - svcnth ()), -+ z0 = svld1 (p0, x0 - svcnth ())) -+ -+/* -+** ld1_bf16_m8: -+** ld1h z0\.h, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_bf16_m8, svbfloat16_t, bfloat16_t, -+ z0 = svld1_bf16 (p0, x0 - svcnth () * 8), -+ z0 = svld1 (p0, x0 - svcnth () * 8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1_bf16_m9: -+** decb x0, all, mul #9 -+** ld1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_bf16_m9, svbfloat16_t, bfloat16_t, -+ z0 = svld1_bf16 (p0, x0 - svcnth () * 9), -+ z0 = svld1 (p0, x0 - svcnth () * 9)) -+ -+/* -+** ld1_vnum_bf16_0: -+** ld1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_bf16_0, svbfloat16_t, bfloat16_t, -+ z0 = svld1_vnum_bf16 (p0, x0, 0), -+ z0 = svld1_vnum (p0, x0, 0)) -+ -+/* -+** ld1_vnum_bf16_1: -+** ld1h z0\.h, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_bf16_1, svbfloat16_t, bfloat16_t, -+ z0 = svld1_vnum_bf16 (p0, x0, 1), -+ z0 = svld1_vnum (p0, x0, 1)) -+ -+/* -+** ld1_vnum_bf16_7: -+** ld1h z0\.h, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_bf16_7, svbfloat16_t, bfloat16_t, -+ z0 = svld1_vnum_bf16 (p0, x0, 7), -+ z0 = svld1_vnum (p0, x0, 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1_vnum_bf16_8: -+** incb x0, all, mul #8 -+** ld1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_bf16_8, svbfloat16_t, bfloat16_t, -+ z0 = svld1_vnum_bf16 (p0, x0, 8), -+ z0 = svld1_vnum (p0, x0, 8)) -+ -+/* -+** ld1_vnum_bf16_m1: -+** ld1h z0\.h, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_bf16_m1, svbfloat16_t, bfloat16_t, -+ z0 = svld1_vnum_bf16 (p0, x0, -1), -+ z0 = svld1_vnum (p0, x0, -1)) -+ -+/* -+** ld1_vnum_bf16_m8: -+** ld1h z0\.h, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_bf16_m8, svbfloat16_t, bfloat16_t, -+ z0 = svld1_vnum_bf16 (p0, x0, -8), -+ z0 = svld1_vnum (p0, x0, -8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1_vnum_bf16_m9: -+** decb x0, all, mul #9 -+** ld1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_bf16_m9, svbfloat16_t, bfloat16_t, -+ z0 = svld1_vnum_bf16 (p0, x0, -9), -+ z0 = svld1_vnum (p0, x0, -9)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld1_vnum_bf16_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld1h z0\.h, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_bf16_x1, svbfloat16_t, bfloat16_t, -+ z0 = svld1_vnum_bf16 (p0, x0, x1), -+ z0 = svld1_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_f16.c -new file mode 100644 -index 000000000..c3552bfbd ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_f16.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1_f16_base: -+** ld1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_f16_base, svfloat16_t, float16_t, -+ z0 = svld1_f16 (p0, x0), -+ z0 = svld1 (p0, x0)) -+ -+/* -+** ld1_f16_index: -+** ld1h z0\.h, p0/z, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_LOAD (ld1_f16_index, svfloat16_t, float16_t, -+ z0 = svld1_f16 (p0, x0 + x1), -+ z0 = svld1 (p0, x0 + x1)) -+ -+/* -+** ld1_f16_1: -+** ld1h z0\.h, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_f16_1, svfloat16_t, float16_t, -+ z0 = svld1_f16 (p0, x0 + svcnth ()), -+ z0 = svld1 (p0, x0 + svcnth ())) -+ -+/* -+** ld1_f16_7: -+** ld1h z0\.h, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_f16_7, svfloat16_t, float16_t, -+ z0 = svld1_f16 (p0, x0 + svcnth () * 7), -+ z0 = svld1 (p0, x0 + svcnth () * 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1_f16_8: -+** incb x0, all, mul #8 -+** ld1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_f16_8, svfloat16_t, float16_t, -+ z0 = svld1_f16 (p0, x0 + svcnth () * 8), -+ z0 = svld1 (p0, x0 + svcnth () * 8)) -+ -+/* -+** ld1_f16_m1: -+** ld1h z0\.h, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_f16_m1, svfloat16_t, float16_t, -+ z0 = svld1_f16 (p0, x0 - svcnth ()), -+ z0 = svld1 (p0, x0 - svcnth ())) -+ -+/* -+** ld1_f16_m8: -+** ld1h z0\.h, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_f16_m8, svfloat16_t, float16_t, -+ z0 = svld1_f16 (p0, x0 - svcnth () * 8), -+ z0 = svld1 (p0, x0 - svcnth () * 8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1_f16_m9: -+** decb x0, all, mul #9 -+** ld1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_f16_m9, svfloat16_t, float16_t, -+ z0 = svld1_f16 (p0, x0 - svcnth () * 9), -+ z0 = svld1 (p0, x0 - svcnth () * 9)) -+ -+/* -+** ld1_vnum_f16_0: -+** ld1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_f16_0, svfloat16_t, float16_t, -+ z0 = svld1_vnum_f16 (p0, x0, 0), -+ z0 = svld1_vnum (p0, x0, 0)) -+ -+/* -+** ld1_vnum_f16_1: -+** ld1h z0\.h, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_f16_1, svfloat16_t, float16_t, -+ z0 = svld1_vnum_f16 (p0, x0, 1), -+ z0 = svld1_vnum (p0, x0, 1)) -+ -+/* -+** ld1_vnum_f16_7: -+** ld1h z0\.h, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_f16_7, svfloat16_t, float16_t, -+ z0 = svld1_vnum_f16 (p0, x0, 7), -+ z0 = svld1_vnum (p0, x0, 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1_vnum_f16_8: -+** incb x0, all, mul #8 -+** ld1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_f16_8, svfloat16_t, float16_t, -+ z0 = svld1_vnum_f16 (p0, x0, 8), -+ z0 = svld1_vnum (p0, x0, 8)) -+ -+/* -+** ld1_vnum_f16_m1: -+** ld1h z0\.h, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_f16_m1, svfloat16_t, float16_t, -+ z0 = svld1_vnum_f16 (p0, x0, -1), -+ z0 = svld1_vnum (p0, x0, -1)) -+ -+/* -+** ld1_vnum_f16_m8: -+** ld1h z0\.h, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_f16_m8, svfloat16_t, float16_t, -+ z0 = svld1_vnum_f16 (p0, x0, -8), -+ z0 = svld1_vnum (p0, x0, -8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1_vnum_f16_m9: -+** decb x0, all, mul #9 -+** ld1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_f16_m9, svfloat16_t, float16_t, -+ z0 = svld1_vnum_f16 (p0, x0, -9), -+ z0 = svld1_vnum (p0, x0, -9)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld1_vnum_f16_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld1h z0\.h, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_f16_x1, svfloat16_t, float16_t, -+ z0 = svld1_vnum_f16 (p0, x0, x1), -+ z0 = svld1_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_f32.c -new file mode 100644 -index 000000000..8990f48d9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_f32.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1_f32_base: -+** ld1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_f32_base, svfloat32_t, float32_t, -+ z0 = svld1_f32 (p0, x0), -+ z0 = svld1 (p0, x0)) -+ -+/* -+** ld1_f32_index: -+** ld1w z0\.s, p0/z, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_LOAD (ld1_f32_index, svfloat32_t, float32_t, -+ z0 = svld1_f32 (p0, x0 + x1), -+ z0 = svld1 (p0, x0 + x1)) -+ -+/* -+** ld1_f32_1: -+** ld1w z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_f32_1, svfloat32_t, float32_t, -+ z0 = svld1_f32 (p0, x0 + svcntw ()), -+ z0 = svld1 (p0, x0 + svcntw ())) -+ -+/* -+** ld1_f32_7: -+** ld1w z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_f32_7, svfloat32_t, float32_t, -+ z0 = svld1_f32 (p0, x0 + svcntw () * 7), -+ z0 = svld1 (p0, x0 + svcntw () * 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1_f32_8: -+** incb x0, all, mul #8 -+** ld1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_f32_8, svfloat32_t, float32_t, -+ z0 = svld1_f32 (p0, x0 + svcntw () * 8), -+ z0 = svld1 (p0, x0 + svcntw () * 8)) -+ -+/* -+** ld1_f32_m1: -+** ld1w z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_f32_m1, svfloat32_t, float32_t, -+ z0 = svld1_f32 (p0, x0 - svcntw ()), -+ z0 = svld1 (p0, x0 - svcntw ())) -+ -+/* -+** ld1_f32_m8: -+** ld1w z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_f32_m8, svfloat32_t, float32_t, -+ z0 = svld1_f32 (p0, x0 - svcntw () * 8), -+ z0 = svld1 (p0, x0 - svcntw () * 8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1_f32_m9: -+** decb x0, all, mul #9 -+** ld1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_f32_m9, svfloat32_t, float32_t, -+ z0 = svld1_f32 (p0, x0 - svcntw () * 9), -+ z0 = svld1 (p0, x0 - svcntw () * 9)) -+ -+/* -+** ld1_vnum_f32_0: -+** ld1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_f32_0, svfloat32_t, float32_t, -+ z0 = svld1_vnum_f32 (p0, x0, 0), -+ z0 = svld1_vnum (p0, x0, 0)) -+ -+/* -+** ld1_vnum_f32_1: -+** ld1w z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_f32_1, svfloat32_t, float32_t, -+ z0 = svld1_vnum_f32 (p0, x0, 1), -+ z0 = svld1_vnum (p0, x0, 1)) -+ -+/* -+** ld1_vnum_f32_7: -+** ld1w z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_f32_7, svfloat32_t, float32_t, -+ z0 = svld1_vnum_f32 (p0, x0, 7), -+ z0 = svld1_vnum (p0, x0, 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1_vnum_f32_8: -+** incb x0, all, mul #8 -+** ld1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_f32_8, svfloat32_t, float32_t, -+ z0 = svld1_vnum_f32 (p0, x0, 8), -+ z0 = svld1_vnum (p0, x0, 8)) -+ -+/* -+** ld1_vnum_f32_m1: -+** ld1w z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_f32_m1, svfloat32_t, float32_t, -+ z0 = svld1_vnum_f32 (p0, x0, -1), -+ z0 = svld1_vnum (p0, x0, -1)) -+ -+/* -+** ld1_vnum_f32_m8: -+** ld1w z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_f32_m8, svfloat32_t, float32_t, -+ z0 = svld1_vnum_f32 (p0, x0, -8), -+ z0 = svld1_vnum (p0, x0, -8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1_vnum_f32_m9: -+** decb x0, all, mul #9 -+** ld1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_f32_m9, svfloat32_t, float32_t, -+ z0 = svld1_vnum_f32 (p0, x0, -9), -+ z0 = svld1_vnum (p0, x0, -9)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld1_vnum_f32_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld1w z0\.s, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_f32_x1, svfloat32_t, float32_t, -+ z0 = svld1_vnum_f32 (p0, x0, x1), -+ z0 = svld1_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_f64.c -new file mode 100644 -index 000000000..eb28687fe ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_f64.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1_f64_base: -+** ld1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_f64_base, svfloat64_t, float64_t, -+ z0 = svld1_f64 (p0, x0), -+ z0 = svld1 (p0, x0)) -+ -+/* -+** ld1_f64_index: -+** ld1d z0\.d, p0/z, \[x0, x1, lsl 3\] -+** ret -+*/ -+TEST_LOAD (ld1_f64_index, svfloat64_t, float64_t, -+ z0 = svld1_f64 (p0, x0 + x1), -+ z0 = svld1 (p0, x0 + x1)) -+ -+/* -+** ld1_f64_1: -+** ld1d z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_f64_1, svfloat64_t, float64_t, -+ z0 = svld1_f64 (p0, x0 + svcntd ()), -+ z0 = svld1 (p0, x0 + svcntd ())) -+ -+/* -+** ld1_f64_7: -+** ld1d z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_f64_7, svfloat64_t, float64_t, -+ z0 = svld1_f64 (p0, x0 + svcntd () * 7), -+ z0 = svld1 (p0, x0 + svcntd () * 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1_f64_8: -+** incb x0, all, mul #8 -+** ld1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_f64_8, svfloat64_t, float64_t, -+ z0 = svld1_f64 (p0, x0 + svcntd () * 8), -+ z0 = svld1 (p0, x0 + svcntd () * 8)) -+ -+/* -+** ld1_f64_m1: -+** ld1d z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_f64_m1, svfloat64_t, float64_t, -+ z0 = svld1_f64 (p0, x0 - svcntd ()), -+ z0 = svld1 (p0, x0 - svcntd ())) -+ -+/* -+** ld1_f64_m8: -+** ld1d z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_f64_m8, svfloat64_t, float64_t, -+ z0 = svld1_f64 (p0, x0 - svcntd () * 8), -+ z0 = svld1 (p0, x0 - svcntd () * 8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1_f64_m9: -+** decb x0, all, mul #9 -+** ld1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_f64_m9, svfloat64_t, float64_t, -+ z0 = svld1_f64 (p0, x0 - svcntd () * 9), -+ z0 = svld1 (p0, x0 - svcntd () * 9)) -+ -+/* -+** ld1_vnum_f64_0: -+** ld1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_f64_0, svfloat64_t, float64_t, -+ z0 = svld1_vnum_f64 (p0, x0, 0), -+ z0 = svld1_vnum (p0, x0, 0)) -+ -+/* -+** ld1_vnum_f64_1: -+** ld1d z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_f64_1, svfloat64_t, float64_t, -+ z0 = svld1_vnum_f64 (p0, x0, 1), -+ z0 = svld1_vnum (p0, x0, 1)) -+ -+/* -+** ld1_vnum_f64_7: -+** ld1d z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_f64_7, svfloat64_t, float64_t, -+ z0 = svld1_vnum_f64 (p0, x0, 7), -+ z0 = svld1_vnum (p0, x0, 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1_vnum_f64_8: -+** incb x0, all, mul #8 -+** ld1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_f64_8, svfloat64_t, float64_t, -+ z0 = svld1_vnum_f64 (p0, x0, 8), -+ z0 = svld1_vnum (p0, x0, 8)) -+ -+/* -+** ld1_vnum_f64_m1: -+** ld1d z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_f64_m1, svfloat64_t, float64_t, -+ z0 = svld1_vnum_f64 (p0, x0, -1), -+ z0 = svld1_vnum (p0, x0, -1)) -+ -+/* -+** ld1_vnum_f64_m8: -+** ld1d z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_f64_m8, svfloat64_t, float64_t, -+ z0 = svld1_vnum_f64 (p0, x0, -8), -+ z0 = svld1_vnum (p0, x0, -8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1_vnum_f64_m9: -+** decb x0, all, mul #9 -+** ld1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_f64_m9, svfloat64_t, float64_t, -+ z0 = svld1_vnum_f64 (p0, x0, -9), -+ z0 = svld1_vnum (p0, x0, -9)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld1_vnum_f64_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld1d z0\.d, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_f64_x1, svfloat64_t, float64_t, -+ z0 = svld1_vnum_f64 (p0, x0, x1), -+ z0 = svld1_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_f32.c -new file mode 100644 -index 000000000..00b68ff29 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_f32.c -@@ -0,0 +1,272 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1_gather_f32_tied1: -+** ld1w z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_f32_tied1, svfloat32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_f32 (p0, z0), -+ z0_res = svld1_gather_f32 (p0, z0)) -+ -+/* -+** ld1_gather_f32_untied: -+** ld1w z0\.s, p0/z, \[z1\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_f32_untied, svfloat32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_f32 (p0, z1), -+ z0_res = svld1_gather_f32 (p0, z1)) -+ -+/* -+** ld1_gather_x0_f32_offset: -+** ld1w z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_x0_f32_offset, svfloat32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_offset_f32 (p0, z0, x0), -+ z0_res = svld1_gather_offset_f32 (p0, z0, x0)) -+ -+/* -+** ld1_gather_m4_f32_offset: -+** mov (x[0-9]+), #?-4 -+** ld1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_m4_f32_offset, svfloat32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_offset_f32 (p0, z0, -4), -+ z0_res = svld1_gather_offset_f32 (p0, z0, -4)) -+ -+/* -+** ld1_gather_0_f32_offset: -+** ld1w z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_0_f32_offset, svfloat32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_offset_f32 (p0, z0, 0), -+ z0_res = svld1_gather_offset_f32 (p0, z0, 0)) -+ -+/* -+** ld1_gather_5_f32_offset: -+** mov (x[0-9]+), #?5 -+** ld1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_5_f32_offset, svfloat32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_offset_f32 (p0, z0, 5), -+ z0_res = svld1_gather_offset_f32 (p0, z0, 5)) -+ -+/* -+** ld1_gather_6_f32_offset: -+** mov (x[0-9]+), #?6 -+** ld1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_6_f32_offset, svfloat32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_offset_f32 (p0, z0, 6), -+ z0_res = svld1_gather_offset_f32 (p0, z0, 6)) -+ -+/* -+** ld1_gather_7_f32_offset: -+** mov (x[0-9]+), #?7 -+** ld1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_7_f32_offset, svfloat32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_offset_f32 (p0, z0, 7), -+ z0_res = svld1_gather_offset_f32 (p0, z0, 7)) -+ -+/* -+** ld1_gather_8_f32_offset: -+** ld1w z0\.s, p0/z, \[z0\.s, #8\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_8_f32_offset, svfloat32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_offset_f32 (p0, z0, 8), -+ z0_res = svld1_gather_offset_f32 (p0, z0, 8)) -+ -+/* -+** ld1_gather_124_f32_offset: -+** ld1w z0\.s, p0/z, \[z0\.s, #124\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_124_f32_offset, svfloat32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_offset_f32 (p0, z0, 124), -+ z0_res = svld1_gather_offset_f32 (p0, z0, 124)) -+ -+/* -+** ld1_gather_128_f32_offset: -+** mov (x[0-9]+), #?128 -+** ld1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_128_f32_offset, svfloat32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_offset_f32 (p0, z0, 128), -+ z0_res = svld1_gather_offset_f32 (p0, z0, 128)) -+ -+/* -+** ld1_gather_x0_f32_index: -+** lsl (x[0-9]+), x0, #?2 -+** ld1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_x0_f32_index, svfloat32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_index_f32 (p0, z0, x0), -+ z0_res = svld1_gather_index_f32 (p0, z0, x0)) -+ -+/* -+** ld1_gather_m1_f32_index: -+** mov (x[0-9]+), #?-4 -+** ld1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_m1_f32_index, svfloat32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_index_f32 (p0, z0, -1), -+ z0_res = svld1_gather_index_f32 (p0, z0, -1)) -+ -+/* -+** ld1_gather_0_f32_index: -+** ld1w z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_0_f32_index, svfloat32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_index_f32 (p0, z0, 0), -+ z0_res = svld1_gather_index_f32 (p0, z0, 0)) -+ -+/* -+** ld1_gather_5_f32_index: -+** ld1w z0\.s, p0/z, \[z0\.s, #20\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_5_f32_index, svfloat32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_index_f32 (p0, z0, 5), -+ z0_res = svld1_gather_index_f32 (p0, z0, 5)) -+ -+/* -+** ld1_gather_31_f32_index: -+** ld1w z0\.s, p0/z, \[z0\.s, #124\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_31_f32_index, svfloat32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_index_f32 (p0, z0, 31), -+ z0_res = svld1_gather_index_f32 (p0, z0, 31)) -+ -+/* -+** ld1_gather_32_f32_index: -+** mov (x[0-9]+), #?128 -+** ld1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_32_f32_index, svfloat32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_index_f32 (p0, z0, 32), -+ z0_res = svld1_gather_index_f32 (p0, z0, 32)) -+ -+/* -+** ld1_gather_x0_f32_s32offset: -+** ld1w z0\.s, p0/z, \[x0, z0\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_x0_f32_s32offset, svfloat32_t, float32_t, svint32_t, -+ z0_res = svld1_gather_s32offset_f32 (p0, x0, z0), -+ z0_res = svld1_gather_offset (p0, x0, z0)) -+ -+/* -+** ld1_gather_tied1_f32_s32offset: -+** ld1w z0\.s, p0/z, \[x0, z0\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_tied1_f32_s32offset, svfloat32_t, float32_t, svint32_t, -+ z0_res = svld1_gather_s32offset_f32 (p0, x0, z0), -+ z0_res = svld1_gather_offset (p0, x0, z0)) -+ -+/* -+** ld1_gather_untied_f32_s32offset: -+** ld1w z0\.s, p0/z, \[x0, z1\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_untied_f32_s32offset, svfloat32_t, float32_t, svint32_t, -+ z0_res = svld1_gather_s32offset_f32 (p0, x0, z1), -+ z0_res = svld1_gather_offset (p0, x0, z1)) -+ -+/* -+** ld1_gather_x0_f32_u32offset: -+** ld1w z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_x0_f32_u32offset, svfloat32_t, float32_t, svuint32_t, -+ z0_res = svld1_gather_u32offset_f32 (p0, x0, z0), -+ z0_res = svld1_gather_offset (p0, x0, z0)) -+ -+/* -+** ld1_gather_tied1_f32_u32offset: -+** ld1w z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_tied1_f32_u32offset, svfloat32_t, float32_t, svuint32_t, -+ z0_res = svld1_gather_u32offset_f32 (p0, x0, z0), -+ z0_res = svld1_gather_offset (p0, x0, z0)) -+ -+/* -+** ld1_gather_untied_f32_u32offset: -+** ld1w z0\.s, p0/z, \[x0, z1\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_untied_f32_u32offset, svfloat32_t, float32_t, svuint32_t, -+ z0_res = svld1_gather_u32offset_f32 (p0, x0, z1), -+ z0_res = svld1_gather_offset (p0, x0, z1)) -+ -+/* -+** ld1_gather_x0_f32_s32index: -+** ld1w z0\.s, p0/z, \[x0, z0\.s, sxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_x0_f32_s32index, svfloat32_t, float32_t, svint32_t, -+ z0_res = svld1_gather_s32index_f32 (p0, x0, z0), -+ z0_res = svld1_gather_index (p0, x0, z0)) -+ -+/* -+** ld1_gather_tied1_f32_s32index: -+** ld1w z0\.s, p0/z, \[x0, z0\.s, sxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_tied1_f32_s32index, svfloat32_t, float32_t, svint32_t, -+ z0_res = svld1_gather_s32index_f32 (p0, x0, z0), -+ z0_res = svld1_gather_index (p0, x0, z0)) -+ -+/* -+** ld1_gather_untied_f32_s32index: -+** ld1w z0\.s, p0/z, \[x0, z1\.s, sxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_untied_f32_s32index, svfloat32_t, float32_t, svint32_t, -+ z0_res = svld1_gather_s32index_f32 (p0, x0, z1), -+ z0_res = svld1_gather_index (p0, x0, z1)) -+ -+/* -+** ld1_gather_x0_f32_u32index: -+** ld1w z0\.s, p0/z, \[x0, z0\.s, uxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_x0_f32_u32index, svfloat32_t, float32_t, svuint32_t, -+ z0_res = svld1_gather_u32index_f32 (p0, x0, z0), -+ z0_res = svld1_gather_index (p0, x0, z0)) -+ -+/* -+** ld1_gather_tied1_f32_u32index: -+** ld1w z0\.s, p0/z, \[x0, z0\.s, uxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_tied1_f32_u32index, svfloat32_t, float32_t, svuint32_t, -+ z0_res = svld1_gather_u32index_f32 (p0, x0, z0), -+ z0_res = svld1_gather_index (p0, x0, z0)) -+ -+/* -+** ld1_gather_untied_f32_u32index: -+** ld1w z0\.s, p0/z, \[x0, z1\.s, uxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_untied_f32_u32index, svfloat32_t, float32_t, svuint32_t, -+ z0_res = svld1_gather_u32index_f32 (p0, x0, z1), -+ z0_res = svld1_gather_index (p0, x0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_f64.c -new file mode 100644 -index 000000000..47127960c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_f64.c -@@ -0,0 +1,348 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1_gather_f64_tied1: -+** ld1d z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_f64_tied1, svfloat64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_f64 (p0, z0), -+ z0_res = svld1_gather_f64 (p0, z0)) -+ -+/* -+** ld1_gather_f64_untied: -+** ld1d z0\.d, p0/z, \[z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_f64_untied, svfloat64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_f64 (p0, z1), -+ z0_res = svld1_gather_f64 (p0, z1)) -+ -+/* -+** ld1_gather_x0_f64_offset: -+** ld1d z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_x0_f64_offset, svfloat64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_offset_f64 (p0, z0, x0), -+ z0_res = svld1_gather_offset_f64 (p0, z0, x0)) -+ -+/* -+** ld1_gather_m8_f64_offset: -+** mov (x[0-9]+), #?-8 -+** ld1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_m8_f64_offset, svfloat64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_offset_f64 (p0, z0, -8), -+ z0_res = svld1_gather_offset_f64 (p0, z0, -8)) -+ -+/* -+** ld1_gather_0_f64_offset: -+** ld1d z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_0_f64_offset, svfloat64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_offset_f64 (p0, z0, 0), -+ z0_res = svld1_gather_offset_f64 (p0, z0, 0)) -+ -+/* -+** ld1_gather_9_f64_offset: -+** mov (x[0-9]+), #?9 -+** ld1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_9_f64_offset, svfloat64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_offset_f64 (p0, z0, 9), -+ z0_res = svld1_gather_offset_f64 (p0, z0, 9)) -+ -+/* -+** ld1_gather_10_f64_offset: -+** mov (x[0-9]+), #?10 -+** ld1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_10_f64_offset, svfloat64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_offset_f64 (p0, z0, 10), -+ z0_res = svld1_gather_offset_f64 (p0, z0, 10)) -+ -+/* -+** ld1_gather_11_f64_offset: -+** mov (x[0-9]+), #?11 -+** ld1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_11_f64_offset, svfloat64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_offset_f64 (p0, z0, 11), -+ z0_res = svld1_gather_offset_f64 (p0, z0, 11)) -+ -+/* -+** ld1_gather_12_f64_offset: -+** mov (x[0-9]+), #?12 -+** ld1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_12_f64_offset, svfloat64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_offset_f64 (p0, z0, 12), -+ z0_res = svld1_gather_offset_f64 (p0, z0, 12)) -+ -+/* -+** ld1_gather_13_f64_offset: -+** mov (x[0-9]+), #?13 -+** ld1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_13_f64_offset, svfloat64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_offset_f64 (p0, z0, 13), -+ z0_res = svld1_gather_offset_f64 (p0, z0, 13)) -+ -+/* -+** ld1_gather_14_f64_offset: -+** mov (x[0-9]+), #?14 -+** ld1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_14_f64_offset, svfloat64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_offset_f64 (p0, z0, 14), -+ z0_res = svld1_gather_offset_f64 (p0, z0, 14)) -+ -+/* -+** ld1_gather_15_f64_offset: -+** mov (x[0-9]+), #?15 -+** ld1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_15_f64_offset, svfloat64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_offset_f64 (p0, z0, 15), -+ z0_res = svld1_gather_offset_f64 (p0, z0, 15)) -+ -+/* -+** ld1_gather_16_f64_offset: -+** ld1d z0\.d, p0/z, \[z0\.d, #16\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_16_f64_offset, svfloat64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_offset_f64 (p0, z0, 16), -+ z0_res = svld1_gather_offset_f64 (p0, z0, 16)) -+ -+/* -+** ld1_gather_248_f64_offset: -+** ld1d z0\.d, p0/z, \[z0\.d, #248\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_248_f64_offset, svfloat64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_offset_f64 (p0, z0, 248), -+ z0_res = svld1_gather_offset_f64 (p0, z0, 248)) -+ -+/* -+** ld1_gather_256_f64_offset: -+** mov (x[0-9]+), #?256 -+** ld1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_256_f64_offset, svfloat64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_offset_f64 (p0, z0, 256), -+ z0_res = svld1_gather_offset_f64 (p0, z0, 256)) -+ -+/* -+** ld1_gather_x0_f64_index: -+** lsl (x[0-9]+), x0, #?3 -+** ld1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_x0_f64_index, svfloat64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_index_f64 (p0, z0, x0), -+ z0_res = svld1_gather_index_f64 (p0, z0, x0)) -+ -+/* -+** ld1_gather_m1_f64_index: -+** mov (x[0-9]+), #?-8 -+** ld1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_m1_f64_index, svfloat64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_index_f64 (p0, z0, -1), -+ z0_res = svld1_gather_index_f64 (p0, z0, -1)) -+ -+/* -+** ld1_gather_0_f64_index: -+** ld1d z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_0_f64_index, svfloat64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_index_f64 (p0, z0, 0), -+ z0_res = svld1_gather_index_f64 (p0, z0, 0)) -+ -+/* -+** ld1_gather_5_f64_index: -+** ld1d z0\.d, p0/z, \[z0\.d, #40\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_5_f64_index, svfloat64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_index_f64 (p0, z0, 5), -+ z0_res = svld1_gather_index_f64 (p0, z0, 5)) -+ -+/* -+** ld1_gather_31_f64_index: -+** ld1d z0\.d, p0/z, \[z0\.d, #248\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_31_f64_index, svfloat64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_index_f64 (p0, z0, 31), -+ z0_res = svld1_gather_index_f64 (p0, z0, 31)) -+ -+/* -+** ld1_gather_32_f64_index: -+** mov (x[0-9]+), #?256 -+** ld1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_32_f64_index, svfloat64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_index_f64 (p0, z0, 32), -+ z0_res = svld1_gather_index_f64 (p0, z0, 32)) -+ -+/* -+** ld1_gather_x0_f64_s64offset: -+** ld1d z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_x0_f64_s64offset, svfloat64_t, float64_t, svint64_t, -+ z0_res = svld1_gather_s64offset_f64 (p0, x0, z0), -+ z0_res = svld1_gather_offset (p0, x0, z0)) -+ -+/* -+** ld1_gather_tied1_f64_s64offset: -+** ld1d z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_tied1_f64_s64offset, svfloat64_t, float64_t, svint64_t, -+ z0_res = svld1_gather_s64offset_f64 (p0, x0, z0), -+ z0_res = svld1_gather_offset (p0, x0, z0)) -+ -+/* -+** ld1_gather_untied_f64_s64offset: -+** ld1d z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_untied_f64_s64offset, svfloat64_t, float64_t, svint64_t, -+ z0_res = svld1_gather_s64offset_f64 (p0, x0, z1), -+ z0_res = svld1_gather_offset (p0, x0, z1)) -+ -+/* -+** ld1_gather_ext_f64_s64offset: -+** ld1d z0\.d, p0/z, \[x0, z1\.d, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_ext_f64_s64offset, svfloat64_t, float64_t, svint64_t, -+ z0_res = svld1_gather_s64offset_f64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svld1_gather_offset (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ld1_gather_x0_f64_u64offset: -+** ld1d z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_x0_f64_u64offset, svfloat64_t, float64_t, svuint64_t, -+ z0_res = svld1_gather_u64offset_f64 (p0, x0, z0), -+ z0_res = svld1_gather_offset (p0, x0, z0)) -+ -+/* -+** ld1_gather_tied1_f64_u64offset: -+** ld1d z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_tied1_f64_u64offset, svfloat64_t, float64_t, svuint64_t, -+ z0_res = svld1_gather_u64offset_f64 (p0, x0, z0), -+ z0_res = svld1_gather_offset (p0, x0, z0)) -+ -+/* -+** ld1_gather_untied_f64_u64offset: -+** ld1d z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_untied_f64_u64offset, svfloat64_t, float64_t, svuint64_t, -+ z0_res = svld1_gather_u64offset_f64 (p0, x0, z1), -+ z0_res = svld1_gather_offset (p0, x0, z1)) -+ -+/* -+** ld1_gather_ext_f64_u64offset: -+** ld1d z0\.d, p0/z, \[x0, z1\.d, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_ext_f64_u64offset, svfloat64_t, float64_t, svuint64_t, -+ z0_res = svld1_gather_u64offset_f64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svld1_gather_offset (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ld1_gather_x0_f64_s64index: -+** ld1d z0\.d, p0/z, \[x0, z0\.d, lsl 3\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_x0_f64_s64index, svfloat64_t, float64_t, svint64_t, -+ z0_res = svld1_gather_s64index_f64 (p0, x0, z0), -+ z0_res = svld1_gather_index (p0, x0, z0)) -+ -+/* -+** ld1_gather_tied1_f64_s64index: -+** ld1d z0\.d, p0/z, \[x0, z0\.d, lsl 3\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_tied1_f64_s64index, svfloat64_t, float64_t, svint64_t, -+ z0_res = svld1_gather_s64index_f64 (p0, x0, z0), -+ z0_res = svld1_gather_index (p0, x0, z0)) -+ -+/* -+** ld1_gather_untied_f64_s64index: -+** ld1d z0\.d, p0/z, \[x0, z1\.d, lsl 3\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_untied_f64_s64index, svfloat64_t, float64_t, svint64_t, -+ z0_res = svld1_gather_s64index_f64 (p0, x0, z1), -+ z0_res = svld1_gather_index (p0, x0, z1)) -+ -+/* -+** ld1_gather_ext_f64_s64index: -+** ld1d z0\.d, p0/z, \[x0, z1\.d, sxtw 3\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_ext_f64_s64index, svfloat64_t, float64_t, svint64_t, -+ z0_res = svld1_gather_s64index_f64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svld1_gather_index (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ld1_gather_x0_f64_u64index: -+** ld1d z0\.d, p0/z, \[x0, z0\.d, lsl 3\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_x0_f64_u64index, svfloat64_t, float64_t, svuint64_t, -+ z0_res = svld1_gather_u64index_f64 (p0, x0, z0), -+ z0_res = svld1_gather_index (p0, x0, z0)) -+ -+/* -+** ld1_gather_tied1_f64_u64index: -+** ld1d z0\.d, p0/z, \[x0, z0\.d, lsl 3\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_tied1_f64_u64index, svfloat64_t, float64_t, svuint64_t, -+ z0_res = svld1_gather_u64index_f64 (p0, x0, z0), -+ z0_res = svld1_gather_index (p0, x0, z0)) -+ -+/* -+** ld1_gather_untied_f64_u64index: -+** ld1d z0\.d, p0/z, \[x0, z1\.d, lsl 3\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_untied_f64_u64index, svfloat64_t, float64_t, svuint64_t, -+ z0_res = svld1_gather_u64index_f64 (p0, x0, z1), -+ z0_res = svld1_gather_index (p0, x0, z1)) -+ -+/* -+** ld1_gather_ext_f64_u64index: -+** ld1d z0\.d, p0/z, \[x0, z1\.d, uxtw 3\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_ext_f64_u64index, svfloat64_t, float64_t, svuint64_t, -+ z0_res = svld1_gather_u64index_f64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svld1_gather_index (p0, x0, svextw_x (p0, z1))) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_s32.c -new file mode 100644 -index 000000000..9b6335547 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_s32.c -@@ -0,0 +1,272 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1_gather_s32_tied1: -+** ld1w z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_s32_tied1, svint32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_s32 (p0, z0), -+ z0_res = svld1_gather_s32 (p0, z0)) -+ -+/* -+** ld1_gather_s32_untied: -+** ld1w z0\.s, p0/z, \[z1\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_s32_untied, svint32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_s32 (p0, z1), -+ z0_res = svld1_gather_s32 (p0, z1)) -+ -+/* -+** ld1_gather_x0_s32_offset: -+** ld1w z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_x0_s32_offset, svint32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_offset_s32 (p0, z0, x0), -+ z0_res = svld1_gather_offset_s32 (p0, z0, x0)) -+ -+/* -+** ld1_gather_m4_s32_offset: -+** mov (x[0-9]+), #?-4 -+** ld1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_m4_s32_offset, svint32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_offset_s32 (p0, z0, -4), -+ z0_res = svld1_gather_offset_s32 (p0, z0, -4)) -+ -+/* -+** ld1_gather_0_s32_offset: -+** ld1w z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_0_s32_offset, svint32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_offset_s32 (p0, z0, 0), -+ z0_res = svld1_gather_offset_s32 (p0, z0, 0)) -+ -+/* -+** ld1_gather_5_s32_offset: -+** mov (x[0-9]+), #?5 -+** ld1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_5_s32_offset, svint32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_offset_s32 (p0, z0, 5), -+ z0_res = svld1_gather_offset_s32 (p0, z0, 5)) -+ -+/* -+** ld1_gather_6_s32_offset: -+** mov (x[0-9]+), #?6 -+** ld1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_6_s32_offset, svint32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_offset_s32 (p0, z0, 6), -+ z0_res = svld1_gather_offset_s32 (p0, z0, 6)) -+ -+/* -+** ld1_gather_7_s32_offset: -+** mov (x[0-9]+), #?7 -+** ld1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_7_s32_offset, svint32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_offset_s32 (p0, z0, 7), -+ z0_res = svld1_gather_offset_s32 (p0, z0, 7)) -+ -+/* -+** ld1_gather_8_s32_offset: -+** ld1w z0\.s, p0/z, \[z0\.s, #8\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_8_s32_offset, svint32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_offset_s32 (p0, z0, 8), -+ z0_res = svld1_gather_offset_s32 (p0, z0, 8)) -+ -+/* -+** ld1_gather_124_s32_offset: -+** ld1w z0\.s, p0/z, \[z0\.s, #124\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_124_s32_offset, svint32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_offset_s32 (p0, z0, 124), -+ z0_res = svld1_gather_offset_s32 (p0, z0, 124)) -+ -+/* -+** ld1_gather_128_s32_offset: -+** mov (x[0-9]+), #?128 -+** ld1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_128_s32_offset, svint32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_offset_s32 (p0, z0, 128), -+ z0_res = svld1_gather_offset_s32 (p0, z0, 128)) -+ -+/* -+** ld1_gather_x0_s32_index: -+** lsl (x[0-9]+), x0, #?2 -+** ld1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_x0_s32_index, svint32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_index_s32 (p0, z0, x0), -+ z0_res = svld1_gather_index_s32 (p0, z0, x0)) -+ -+/* -+** ld1_gather_m1_s32_index: -+** mov (x[0-9]+), #?-4 -+** ld1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_m1_s32_index, svint32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_index_s32 (p0, z0, -1), -+ z0_res = svld1_gather_index_s32 (p0, z0, -1)) -+ -+/* -+** ld1_gather_0_s32_index: -+** ld1w z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_0_s32_index, svint32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_index_s32 (p0, z0, 0), -+ z0_res = svld1_gather_index_s32 (p0, z0, 0)) -+ -+/* -+** ld1_gather_5_s32_index: -+** ld1w z0\.s, p0/z, \[z0\.s, #20\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_5_s32_index, svint32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_index_s32 (p0, z0, 5), -+ z0_res = svld1_gather_index_s32 (p0, z0, 5)) -+ -+/* -+** ld1_gather_31_s32_index: -+** ld1w z0\.s, p0/z, \[z0\.s, #124\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_31_s32_index, svint32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_index_s32 (p0, z0, 31), -+ z0_res = svld1_gather_index_s32 (p0, z0, 31)) -+ -+/* -+** ld1_gather_32_s32_index: -+** mov (x[0-9]+), #?128 -+** ld1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_32_s32_index, svint32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_index_s32 (p0, z0, 32), -+ z0_res = svld1_gather_index_s32 (p0, z0, 32)) -+ -+/* -+** ld1_gather_x0_s32_s32offset: -+** ld1w z0\.s, p0/z, \[x0, z0\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_x0_s32_s32offset, svint32_t, int32_t, svint32_t, -+ z0_res = svld1_gather_s32offset_s32 (p0, x0, z0), -+ z0_res = svld1_gather_offset (p0, x0, z0)) -+ -+/* -+** ld1_gather_tied1_s32_s32offset: -+** ld1w z0\.s, p0/z, \[x0, z0\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_tied1_s32_s32offset, svint32_t, int32_t, svint32_t, -+ z0_res = svld1_gather_s32offset_s32 (p0, x0, z0), -+ z0_res = svld1_gather_offset (p0, x0, z0)) -+ -+/* -+** ld1_gather_untied_s32_s32offset: -+** ld1w z0\.s, p0/z, \[x0, z1\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_untied_s32_s32offset, svint32_t, int32_t, svint32_t, -+ z0_res = svld1_gather_s32offset_s32 (p0, x0, z1), -+ z0_res = svld1_gather_offset (p0, x0, z1)) -+ -+/* -+** ld1_gather_x0_s32_u32offset: -+** ld1w z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_x0_s32_u32offset, svint32_t, int32_t, svuint32_t, -+ z0_res = svld1_gather_u32offset_s32 (p0, x0, z0), -+ z0_res = svld1_gather_offset (p0, x0, z0)) -+ -+/* -+** ld1_gather_tied1_s32_u32offset: -+** ld1w z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_tied1_s32_u32offset, svint32_t, int32_t, svuint32_t, -+ z0_res = svld1_gather_u32offset_s32 (p0, x0, z0), -+ z0_res = svld1_gather_offset (p0, x0, z0)) -+ -+/* -+** ld1_gather_untied_s32_u32offset: -+** ld1w z0\.s, p0/z, \[x0, z1\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_untied_s32_u32offset, svint32_t, int32_t, svuint32_t, -+ z0_res = svld1_gather_u32offset_s32 (p0, x0, z1), -+ z0_res = svld1_gather_offset (p0, x0, z1)) -+ -+/* -+** ld1_gather_x0_s32_s32index: -+** ld1w z0\.s, p0/z, \[x0, z0\.s, sxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_x0_s32_s32index, svint32_t, int32_t, svint32_t, -+ z0_res = svld1_gather_s32index_s32 (p0, x0, z0), -+ z0_res = svld1_gather_index (p0, x0, z0)) -+ -+/* -+** ld1_gather_tied1_s32_s32index: -+** ld1w z0\.s, p0/z, \[x0, z0\.s, sxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_tied1_s32_s32index, svint32_t, int32_t, svint32_t, -+ z0_res = svld1_gather_s32index_s32 (p0, x0, z0), -+ z0_res = svld1_gather_index (p0, x0, z0)) -+ -+/* -+** ld1_gather_untied_s32_s32index: -+** ld1w z0\.s, p0/z, \[x0, z1\.s, sxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_untied_s32_s32index, svint32_t, int32_t, svint32_t, -+ z0_res = svld1_gather_s32index_s32 (p0, x0, z1), -+ z0_res = svld1_gather_index (p0, x0, z1)) -+ -+/* -+** ld1_gather_x0_s32_u32index: -+** ld1w z0\.s, p0/z, \[x0, z0\.s, uxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_x0_s32_u32index, svint32_t, int32_t, svuint32_t, -+ z0_res = svld1_gather_u32index_s32 (p0, x0, z0), -+ z0_res = svld1_gather_index (p0, x0, z0)) -+ -+/* -+** ld1_gather_tied1_s32_u32index: -+** ld1w z0\.s, p0/z, \[x0, z0\.s, uxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_tied1_s32_u32index, svint32_t, int32_t, svuint32_t, -+ z0_res = svld1_gather_u32index_s32 (p0, x0, z0), -+ z0_res = svld1_gather_index (p0, x0, z0)) -+ -+/* -+** ld1_gather_untied_s32_u32index: -+** ld1w z0\.s, p0/z, \[x0, z1\.s, uxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_untied_s32_u32index, svint32_t, int32_t, svuint32_t, -+ z0_res = svld1_gather_u32index_s32 (p0, x0, z1), -+ z0_res = svld1_gather_index (p0, x0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_s64.c -new file mode 100644 -index 000000000..c9cea3ad8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_s64.c -@@ -0,0 +1,348 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1_gather_s64_tied1: -+** ld1d z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_s64_tied1, svint64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_s64 (p0, z0), -+ z0_res = svld1_gather_s64 (p0, z0)) -+ -+/* -+** ld1_gather_s64_untied: -+** ld1d z0\.d, p0/z, \[z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_s64_untied, svint64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_s64 (p0, z1), -+ z0_res = svld1_gather_s64 (p0, z1)) -+ -+/* -+** ld1_gather_x0_s64_offset: -+** ld1d z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_x0_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_offset_s64 (p0, z0, x0), -+ z0_res = svld1_gather_offset_s64 (p0, z0, x0)) -+ -+/* -+** ld1_gather_m8_s64_offset: -+** mov (x[0-9]+), #?-8 -+** ld1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_m8_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_offset_s64 (p0, z0, -8), -+ z0_res = svld1_gather_offset_s64 (p0, z0, -8)) -+ -+/* -+** ld1_gather_0_s64_offset: -+** ld1d z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_0_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_offset_s64 (p0, z0, 0), -+ z0_res = svld1_gather_offset_s64 (p0, z0, 0)) -+ -+/* -+** ld1_gather_9_s64_offset: -+** mov (x[0-9]+), #?9 -+** ld1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_9_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_offset_s64 (p0, z0, 9), -+ z0_res = svld1_gather_offset_s64 (p0, z0, 9)) -+ -+/* -+** ld1_gather_10_s64_offset: -+** mov (x[0-9]+), #?10 -+** ld1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_10_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_offset_s64 (p0, z0, 10), -+ z0_res = svld1_gather_offset_s64 (p0, z0, 10)) -+ -+/* -+** ld1_gather_11_s64_offset: -+** mov (x[0-9]+), #?11 -+** ld1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_11_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_offset_s64 (p0, z0, 11), -+ z0_res = svld1_gather_offset_s64 (p0, z0, 11)) -+ -+/* -+** ld1_gather_12_s64_offset: -+** mov (x[0-9]+), #?12 -+** ld1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_12_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_offset_s64 (p0, z0, 12), -+ z0_res = svld1_gather_offset_s64 (p0, z0, 12)) -+ -+/* -+** ld1_gather_13_s64_offset: -+** mov (x[0-9]+), #?13 -+** ld1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_13_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_offset_s64 (p0, z0, 13), -+ z0_res = svld1_gather_offset_s64 (p0, z0, 13)) -+ -+/* -+** ld1_gather_14_s64_offset: -+** mov (x[0-9]+), #?14 -+** ld1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_14_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_offset_s64 (p0, z0, 14), -+ z0_res = svld1_gather_offset_s64 (p0, z0, 14)) -+ -+/* -+** ld1_gather_15_s64_offset: -+** mov (x[0-9]+), #?15 -+** ld1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_15_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_offset_s64 (p0, z0, 15), -+ z0_res = svld1_gather_offset_s64 (p0, z0, 15)) -+ -+/* -+** ld1_gather_16_s64_offset: -+** ld1d z0\.d, p0/z, \[z0\.d, #16\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_16_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_offset_s64 (p0, z0, 16), -+ z0_res = svld1_gather_offset_s64 (p0, z0, 16)) -+ -+/* -+** ld1_gather_248_s64_offset: -+** ld1d z0\.d, p0/z, \[z0\.d, #248\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_248_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_offset_s64 (p0, z0, 248), -+ z0_res = svld1_gather_offset_s64 (p0, z0, 248)) -+ -+/* -+** ld1_gather_256_s64_offset: -+** mov (x[0-9]+), #?256 -+** ld1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_256_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_offset_s64 (p0, z0, 256), -+ z0_res = svld1_gather_offset_s64 (p0, z0, 256)) -+ -+/* -+** ld1_gather_x0_s64_index: -+** lsl (x[0-9]+), x0, #?3 -+** ld1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_x0_s64_index, svint64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_index_s64 (p0, z0, x0), -+ z0_res = svld1_gather_index_s64 (p0, z0, x0)) -+ -+/* -+** ld1_gather_m1_s64_index: -+** mov (x[0-9]+), #?-8 -+** ld1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_m1_s64_index, svint64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_index_s64 (p0, z0, -1), -+ z0_res = svld1_gather_index_s64 (p0, z0, -1)) -+ -+/* -+** ld1_gather_0_s64_index: -+** ld1d z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_0_s64_index, svint64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_index_s64 (p0, z0, 0), -+ z0_res = svld1_gather_index_s64 (p0, z0, 0)) -+ -+/* -+** ld1_gather_5_s64_index: -+** ld1d z0\.d, p0/z, \[z0\.d, #40\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_5_s64_index, svint64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_index_s64 (p0, z0, 5), -+ z0_res = svld1_gather_index_s64 (p0, z0, 5)) -+ -+/* -+** ld1_gather_31_s64_index: -+** ld1d z0\.d, p0/z, \[z0\.d, #248\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_31_s64_index, svint64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_index_s64 (p0, z0, 31), -+ z0_res = svld1_gather_index_s64 (p0, z0, 31)) -+ -+/* -+** ld1_gather_32_s64_index: -+** mov (x[0-9]+), #?256 -+** ld1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_32_s64_index, svint64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_index_s64 (p0, z0, 32), -+ z0_res = svld1_gather_index_s64 (p0, z0, 32)) -+ -+/* -+** ld1_gather_x0_s64_s64offset: -+** ld1d z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_x0_s64_s64offset, svint64_t, int64_t, svint64_t, -+ z0_res = svld1_gather_s64offset_s64 (p0, x0, z0), -+ z0_res = svld1_gather_offset (p0, x0, z0)) -+ -+/* -+** ld1_gather_tied1_s64_s64offset: -+** ld1d z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_tied1_s64_s64offset, svint64_t, int64_t, svint64_t, -+ z0_res = svld1_gather_s64offset_s64 (p0, x0, z0), -+ z0_res = svld1_gather_offset (p0, x0, z0)) -+ -+/* -+** ld1_gather_untied_s64_s64offset: -+** ld1d z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_untied_s64_s64offset, svint64_t, int64_t, svint64_t, -+ z0_res = svld1_gather_s64offset_s64 (p0, x0, z1), -+ z0_res = svld1_gather_offset (p0, x0, z1)) -+ -+/* -+** ld1_gather_ext_s64_s64offset: -+** ld1d z0\.d, p0/z, \[x0, z1\.d, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_ext_s64_s64offset, svint64_t, int64_t, svint64_t, -+ z0_res = svld1_gather_s64offset_s64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svld1_gather_offset (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ld1_gather_x0_s64_u64offset: -+** ld1d z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_x0_s64_u64offset, svint64_t, int64_t, svuint64_t, -+ z0_res = svld1_gather_u64offset_s64 (p0, x0, z0), -+ z0_res = svld1_gather_offset (p0, x0, z0)) -+ -+/* -+** ld1_gather_tied1_s64_u64offset: -+** ld1d z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_tied1_s64_u64offset, svint64_t, int64_t, svuint64_t, -+ z0_res = svld1_gather_u64offset_s64 (p0, x0, z0), -+ z0_res = svld1_gather_offset (p0, x0, z0)) -+ -+/* -+** ld1_gather_untied_s64_u64offset: -+** ld1d z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_untied_s64_u64offset, svint64_t, int64_t, svuint64_t, -+ z0_res = svld1_gather_u64offset_s64 (p0, x0, z1), -+ z0_res = svld1_gather_offset (p0, x0, z1)) -+ -+/* -+** ld1_gather_ext_s64_u64offset: -+** ld1d z0\.d, p0/z, \[x0, z1\.d, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_ext_s64_u64offset, svint64_t, int64_t, svuint64_t, -+ z0_res = svld1_gather_u64offset_s64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svld1_gather_offset (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ld1_gather_x0_s64_s64index: -+** ld1d z0\.d, p0/z, \[x0, z0\.d, lsl 3\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_x0_s64_s64index, svint64_t, int64_t, svint64_t, -+ z0_res = svld1_gather_s64index_s64 (p0, x0, z0), -+ z0_res = svld1_gather_index (p0, x0, z0)) -+ -+/* -+** ld1_gather_tied1_s64_s64index: -+** ld1d z0\.d, p0/z, \[x0, z0\.d, lsl 3\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_tied1_s64_s64index, svint64_t, int64_t, svint64_t, -+ z0_res = svld1_gather_s64index_s64 (p0, x0, z0), -+ z0_res = svld1_gather_index (p0, x0, z0)) -+ -+/* -+** ld1_gather_untied_s64_s64index: -+** ld1d z0\.d, p0/z, \[x0, z1\.d, lsl 3\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_untied_s64_s64index, svint64_t, int64_t, svint64_t, -+ z0_res = svld1_gather_s64index_s64 (p0, x0, z1), -+ z0_res = svld1_gather_index (p0, x0, z1)) -+ -+/* -+** ld1_gather_ext_s64_s64index: -+** ld1d z0\.d, p0/z, \[x0, z1\.d, sxtw 3\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_ext_s64_s64index, svint64_t, int64_t, svint64_t, -+ z0_res = svld1_gather_s64index_s64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svld1_gather_index (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ld1_gather_x0_s64_u64index: -+** ld1d z0\.d, p0/z, \[x0, z0\.d, lsl 3\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_x0_s64_u64index, svint64_t, int64_t, svuint64_t, -+ z0_res = svld1_gather_u64index_s64 (p0, x0, z0), -+ z0_res = svld1_gather_index (p0, x0, z0)) -+ -+/* -+** ld1_gather_tied1_s64_u64index: -+** ld1d z0\.d, p0/z, \[x0, z0\.d, lsl 3\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_tied1_s64_u64index, svint64_t, int64_t, svuint64_t, -+ z0_res = svld1_gather_u64index_s64 (p0, x0, z0), -+ z0_res = svld1_gather_index (p0, x0, z0)) -+ -+/* -+** ld1_gather_untied_s64_u64index: -+** ld1d z0\.d, p0/z, \[x0, z1\.d, lsl 3\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_untied_s64_u64index, svint64_t, int64_t, svuint64_t, -+ z0_res = svld1_gather_u64index_s64 (p0, x0, z1), -+ z0_res = svld1_gather_index (p0, x0, z1)) -+ -+/* -+** ld1_gather_ext_s64_u64index: -+** ld1d z0\.d, p0/z, \[x0, z1\.d, uxtw 3\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_ext_s64_u64index, svint64_t, int64_t, svuint64_t, -+ z0_res = svld1_gather_u64index_s64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svld1_gather_index (p0, x0, svextw_x (p0, z1))) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_u32.c -new file mode 100644 -index 000000000..2cccc8d49 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_u32.c -@@ -0,0 +1,272 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1_gather_u32_tied1: -+** ld1w z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_u32_tied1, svuint32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_u32 (p0, z0), -+ z0_res = svld1_gather_u32 (p0, z0)) -+ -+/* -+** ld1_gather_u32_untied: -+** ld1w z0\.s, p0/z, \[z1\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_u32_untied, svuint32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_u32 (p0, z1), -+ z0_res = svld1_gather_u32 (p0, z1)) -+ -+/* -+** ld1_gather_x0_u32_offset: -+** ld1w z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_x0_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_offset_u32 (p0, z0, x0), -+ z0_res = svld1_gather_offset_u32 (p0, z0, x0)) -+ -+/* -+** ld1_gather_m4_u32_offset: -+** mov (x[0-9]+), #?-4 -+** ld1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_m4_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_offset_u32 (p0, z0, -4), -+ z0_res = svld1_gather_offset_u32 (p0, z0, -4)) -+ -+/* -+** ld1_gather_0_u32_offset: -+** ld1w z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_0_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_offset_u32 (p0, z0, 0), -+ z0_res = svld1_gather_offset_u32 (p0, z0, 0)) -+ -+/* -+** ld1_gather_5_u32_offset: -+** mov (x[0-9]+), #?5 -+** ld1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_5_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_offset_u32 (p0, z0, 5), -+ z0_res = svld1_gather_offset_u32 (p0, z0, 5)) -+ -+/* -+** ld1_gather_6_u32_offset: -+** mov (x[0-9]+), #?6 -+** ld1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_6_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_offset_u32 (p0, z0, 6), -+ z0_res = svld1_gather_offset_u32 (p0, z0, 6)) -+ -+/* -+** ld1_gather_7_u32_offset: -+** mov (x[0-9]+), #?7 -+** ld1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_7_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_offset_u32 (p0, z0, 7), -+ z0_res = svld1_gather_offset_u32 (p0, z0, 7)) -+ -+/* -+** ld1_gather_8_u32_offset: -+** ld1w z0\.s, p0/z, \[z0\.s, #8\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_8_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_offset_u32 (p0, z0, 8), -+ z0_res = svld1_gather_offset_u32 (p0, z0, 8)) -+ -+/* -+** ld1_gather_124_u32_offset: -+** ld1w z0\.s, p0/z, \[z0\.s, #124\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_124_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_offset_u32 (p0, z0, 124), -+ z0_res = svld1_gather_offset_u32 (p0, z0, 124)) -+ -+/* -+** ld1_gather_128_u32_offset: -+** mov (x[0-9]+), #?128 -+** ld1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_128_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_offset_u32 (p0, z0, 128), -+ z0_res = svld1_gather_offset_u32 (p0, z0, 128)) -+ -+/* -+** ld1_gather_x0_u32_index: -+** lsl (x[0-9]+), x0, #?2 -+** ld1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_x0_u32_index, svuint32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_index_u32 (p0, z0, x0), -+ z0_res = svld1_gather_index_u32 (p0, z0, x0)) -+ -+/* -+** ld1_gather_m1_u32_index: -+** mov (x[0-9]+), #?-4 -+** ld1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_m1_u32_index, svuint32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_index_u32 (p0, z0, -1), -+ z0_res = svld1_gather_index_u32 (p0, z0, -1)) -+ -+/* -+** ld1_gather_0_u32_index: -+** ld1w z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_0_u32_index, svuint32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_index_u32 (p0, z0, 0), -+ z0_res = svld1_gather_index_u32 (p0, z0, 0)) -+ -+/* -+** ld1_gather_5_u32_index: -+** ld1w z0\.s, p0/z, \[z0\.s, #20\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_5_u32_index, svuint32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_index_u32 (p0, z0, 5), -+ z0_res = svld1_gather_index_u32 (p0, z0, 5)) -+ -+/* -+** ld1_gather_31_u32_index: -+** ld1w z0\.s, p0/z, \[z0\.s, #124\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_31_u32_index, svuint32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_index_u32 (p0, z0, 31), -+ z0_res = svld1_gather_index_u32 (p0, z0, 31)) -+ -+/* -+** ld1_gather_32_u32_index: -+** mov (x[0-9]+), #?128 -+** ld1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_32_u32_index, svuint32_t, svuint32_t, -+ z0_res = svld1_gather_u32base_index_u32 (p0, z0, 32), -+ z0_res = svld1_gather_index_u32 (p0, z0, 32)) -+ -+/* -+** ld1_gather_x0_u32_s32offset: -+** ld1w z0\.s, p0/z, \[x0, z0\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_x0_u32_s32offset, svuint32_t, uint32_t, svint32_t, -+ z0_res = svld1_gather_s32offset_u32 (p0, x0, z0), -+ z0_res = svld1_gather_offset (p0, x0, z0)) -+ -+/* -+** ld1_gather_tied1_u32_s32offset: -+** ld1w z0\.s, p0/z, \[x0, z0\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_tied1_u32_s32offset, svuint32_t, uint32_t, svint32_t, -+ z0_res = svld1_gather_s32offset_u32 (p0, x0, z0), -+ z0_res = svld1_gather_offset (p0, x0, z0)) -+ -+/* -+** ld1_gather_untied_u32_s32offset: -+** ld1w z0\.s, p0/z, \[x0, z1\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_untied_u32_s32offset, svuint32_t, uint32_t, svint32_t, -+ z0_res = svld1_gather_s32offset_u32 (p0, x0, z1), -+ z0_res = svld1_gather_offset (p0, x0, z1)) -+ -+/* -+** ld1_gather_x0_u32_u32offset: -+** ld1w z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_x0_u32_u32offset, svuint32_t, uint32_t, svuint32_t, -+ z0_res = svld1_gather_u32offset_u32 (p0, x0, z0), -+ z0_res = svld1_gather_offset (p0, x0, z0)) -+ -+/* -+** ld1_gather_tied1_u32_u32offset: -+** ld1w z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_tied1_u32_u32offset, svuint32_t, uint32_t, svuint32_t, -+ z0_res = svld1_gather_u32offset_u32 (p0, x0, z0), -+ z0_res = svld1_gather_offset (p0, x0, z0)) -+ -+/* -+** ld1_gather_untied_u32_u32offset: -+** ld1w z0\.s, p0/z, \[x0, z1\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_untied_u32_u32offset, svuint32_t, uint32_t, svuint32_t, -+ z0_res = svld1_gather_u32offset_u32 (p0, x0, z1), -+ z0_res = svld1_gather_offset (p0, x0, z1)) -+ -+/* -+** ld1_gather_x0_u32_s32index: -+** ld1w z0\.s, p0/z, \[x0, z0\.s, sxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_x0_u32_s32index, svuint32_t, uint32_t, svint32_t, -+ z0_res = svld1_gather_s32index_u32 (p0, x0, z0), -+ z0_res = svld1_gather_index (p0, x0, z0)) -+ -+/* -+** ld1_gather_tied1_u32_s32index: -+** ld1w z0\.s, p0/z, \[x0, z0\.s, sxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_tied1_u32_s32index, svuint32_t, uint32_t, svint32_t, -+ z0_res = svld1_gather_s32index_u32 (p0, x0, z0), -+ z0_res = svld1_gather_index (p0, x0, z0)) -+ -+/* -+** ld1_gather_untied_u32_s32index: -+** ld1w z0\.s, p0/z, \[x0, z1\.s, sxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_untied_u32_s32index, svuint32_t, uint32_t, svint32_t, -+ z0_res = svld1_gather_s32index_u32 (p0, x0, z1), -+ z0_res = svld1_gather_index (p0, x0, z1)) -+ -+/* -+** ld1_gather_x0_u32_u32index: -+** ld1w z0\.s, p0/z, \[x0, z0\.s, uxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_x0_u32_u32index, svuint32_t, uint32_t, svuint32_t, -+ z0_res = svld1_gather_u32index_u32 (p0, x0, z0), -+ z0_res = svld1_gather_index (p0, x0, z0)) -+ -+/* -+** ld1_gather_tied1_u32_u32index: -+** ld1w z0\.s, p0/z, \[x0, z0\.s, uxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_tied1_u32_u32index, svuint32_t, uint32_t, svuint32_t, -+ z0_res = svld1_gather_u32index_u32 (p0, x0, z0), -+ z0_res = svld1_gather_index (p0, x0, z0)) -+ -+/* -+** ld1_gather_untied_u32_u32index: -+** ld1w z0\.s, p0/z, \[x0, z1\.s, uxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_untied_u32_u32index, svuint32_t, uint32_t, svuint32_t, -+ z0_res = svld1_gather_u32index_u32 (p0, x0, z1), -+ z0_res = svld1_gather_index (p0, x0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_u64.c -new file mode 100644 -index 000000000..6ee1d48ab ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_gather_u64.c -@@ -0,0 +1,348 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1_gather_u64_tied1: -+** ld1d z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_u64_tied1, svuint64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_u64 (p0, z0), -+ z0_res = svld1_gather_u64 (p0, z0)) -+ -+/* -+** ld1_gather_u64_untied: -+** ld1d z0\.d, p0/z, \[z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_u64_untied, svuint64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_u64 (p0, z1), -+ z0_res = svld1_gather_u64 (p0, z1)) -+ -+/* -+** ld1_gather_x0_u64_offset: -+** ld1d z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_x0_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_offset_u64 (p0, z0, x0), -+ z0_res = svld1_gather_offset_u64 (p0, z0, x0)) -+ -+/* -+** ld1_gather_m8_u64_offset: -+** mov (x[0-9]+), #?-8 -+** ld1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_m8_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_offset_u64 (p0, z0, -8), -+ z0_res = svld1_gather_offset_u64 (p0, z0, -8)) -+ -+/* -+** ld1_gather_0_u64_offset: -+** ld1d z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_0_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_offset_u64 (p0, z0, 0), -+ z0_res = svld1_gather_offset_u64 (p0, z0, 0)) -+ -+/* -+** ld1_gather_9_u64_offset: -+** mov (x[0-9]+), #?9 -+** ld1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_9_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_offset_u64 (p0, z0, 9), -+ z0_res = svld1_gather_offset_u64 (p0, z0, 9)) -+ -+/* -+** ld1_gather_10_u64_offset: -+** mov (x[0-9]+), #?10 -+** ld1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_10_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_offset_u64 (p0, z0, 10), -+ z0_res = svld1_gather_offset_u64 (p0, z0, 10)) -+ -+/* -+** ld1_gather_11_u64_offset: -+** mov (x[0-9]+), #?11 -+** ld1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_11_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_offset_u64 (p0, z0, 11), -+ z0_res = svld1_gather_offset_u64 (p0, z0, 11)) -+ -+/* -+** ld1_gather_12_u64_offset: -+** mov (x[0-9]+), #?12 -+** ld1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_12_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_offset_u64 (p0, z0, 12), -+ z0_res = svld1_gather_offset_u64 (p0, z0, 12)) -+ -+/* -+** ld1_gather_13_u64_offset: -+** mov (x[0-9]+), #?13 -+** ld1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_13_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_offset_u64 (p0, z0, 13), -+ z0_res = svld1_gather_offset_u64 (p0, z0, 13)) -+ -+/* -+** ld1_gather_14_u64_offset: -+** mov (x[0-9]+), #?14 -+** ld1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_14_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_offset_u64 (p0, z0, 14), -+ z0_res = svld1_gather_offset_u64 (p0, z0, 14)) -+ -+/* -+** ld1_gather_15_u64_offset: -+** mov (x[0-9]+), #?15 -+** ld1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_15_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_offset_u64 (p0, z0, 15), -+ z0_res = svld1_gather_offset_u64 (p0, z0, 15)) -+ -+/* -+** ld1_gather_16_u64_offset: -+** ld1d z0\.d, p0/z, \[z0\.d, #16\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_16_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_offset_u64 (p0, z0, 16), -+ z0_res = svld1_gather_offset_u64 (p0, z0, 16)) -+ -+/* -+** ld1_gather_248_u64_offset: -+** ld1d z0\.d, p0/z, \[z0\.d, #248\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_248_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_offset_u64 (p0, z0, 248), -+ z0_res = svld1_gather_offset_u64 (p0, z0, 248)) -+ -+/* -+** ld1_gather_256_u64_offset: -+** mov (x[0-9]+), #?256 -+** ld1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_256_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_offset_u64 (p0, z0, 256), -+ z0_res = svld1_gather_offset_u64 (p0, z0, 256)) -+ -+/* -+** ld1_gather_x0_u64_index: -+** lsl (x[0-9]+), x0, #?3 -+** ld1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_x0_u64_index, svuint64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_index_u64 (p0, z0, x0), -+ z0_res = svld1_gather_index_u64 (p0, z0, x0)) -+ -+/* -+** ld1_gather_m1_u64_index: -+** mov (x[0-9]+), #?-8 -+** ld1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_m1_u64_index, svuint64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_index_u64 (p0, z0, -1), -+ z0_res = svld1_gather_index_u64 (p0, z0, -1)) -+ -+/* -+** ld1_gather_0_u64_index: -+** ld1d z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_0_u64_index, svuint64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_index_u64 (p0, z0, 0), -+ z0_res = svld1_gather_index_u64 (p0, z0, 0)) -+ -+/* -+** ld1_gather_5_u64_index: -+** ld1d z0\.d, p0/z, \[z0\.d, #40\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_5_u64_index, svuint64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_index_u64 (p0, z0, 5), -+ z0_res = svld1_gather_index_u64 (p0, z0, 5)) -+ -+/* -+** ld1_gather_31_u64_index: -+** ld1d z0\.d, p0/z, \[z0\.d, #248\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_31_u64_index, svuint64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_index_u64 (p0, z0, 31), -+ z0_res = svld1_gather_index_u64 (p0, z0, 31)) -+ -+/* -+** ld1_gather_32_u64_index: -+** mov (x[0-9]+), #?256 -+** ld1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1_gather_32_u64_index, svuint64_t, svuint64_t, -+ z0_res = svld1_gather_u64base_index_u64 (p0, z0, 32), -+ z0_res = svld1_gather_index_u64 (p0, z0, 32)) -+ -+/* -+** ld1_gather_x0_u64_s64offset: -+** ld1d z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_x0_u64_s64offset, svuint64_t, uint64_t, svint64_t, -+ z0_res = svld1_gather_s64offset_u64 (p0, x0, z0), -+ z0_res = svld1_gather_offset (p0, x0, z0)) -+ -+/* -+** ld1_gather_tied1_u64_s64offset: -+** ld1d z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_tied1_u64_s64offset, svuint64_t, uint64_t, svint64_t, -+ z0_res = svld1_gather_s64offset_u64 (p0, x0, z0), -+ z0_res = svld1_gather_offset (p0, x0, z0)) -+ -+/* -+** ld1_gather_untied_u64_s64offset: -+** ld1d z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_untied_u64_s64offset, svuint64_t, uint64_t, svint64_t, -+ z0_res = svld1_gather_s64offset_u64 (p0, x0, z1), -+ z0_res = svld1_gather_offset (p0, x0, z1)) -+ -+/* -+** ld1_gather_ext_u64_s64offset: -+** ld1d z0\.d, p0/z, \[x0, z1\.d, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_ext_u64_s64offset, svuint64_t, uint64_t, svint64_t, -+ z0_res = svld1_gather_s64offset_u64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svld1_gather_offset (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ld1_gather_x0_u64_u64offset: -+** ld1d z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_x0_u64_u64offset, svuint64_t, uint64_t, svuint64_t, -+ z0_res = svld1_gather_u64offset_u64 (p0, x0, z0), -+ z0_res = svld1_gather_offset (p0, x0, z0)) -+ -+/* -+** ld1_gather_tied1_u64_u64offset: -+** ld1d z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_tied1_u64_u64offset, svuint64_t, uint64_t, svuint64_t, -+ z0_res = svld1_gather_u64offset_u64 (p0, x0, z0), -+ z0_res = svld1_gather_offset (p0, x0, z0)) -+ -+/* -+** ld1_gather_untied_u64_u64offset: -+** ld1d z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_untied_u64_u64offset, svuint64_t, uint64_t, svuint64_t, -+ z0_res = svld1_gather_u64offset_u64 (p0, x0, z1), -+ z0_res = svld1_gather_offset (p0, x0, z1)) -+ -+/* -+** ld1_gather_ext_u64_u64offset: -+** ld1d z0\.d, p0/z, \[x0, z1\.d, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_ext_u64_u64offset, svuint64_t, uint64_t, svuint64_t, -+ z0_res = svld1_gather_u64offset_u64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svld1_gather_offset (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ld1_gather_x0_u64_s64index: -+** ld1d z0\.d, p0/z, \[x0, z0\.d, lsl 3\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_x0_u64_s64index, svuint64_t, uint64_t, svint64_t, -+ z0_res = svld1_gather_s64index_u64 (p0, x0, z0), -+ z0_res = svld1_gather_index (p0, x0, z0)) -+ -+/* -+** ld1_gather_tied1_u64_s64index: -+** ld1d z0\.d, p0/z, \[x0, z0\.d, lsl 3\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_tied1_u64_s64index, svuint64_t, uint64_t, svint64_t, -+ z0_res = svld1_gather_s64index_u64 (p0, x0, z0), -+ z0_res = svld1_gather_index (p0, x0, z0)) -+ -+/* -+** ld1_gather_untied_u64_s64index: -+** ld1d z0\.d, p0/z, \[x0, z1\.d, lsl 3\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_untied_u64_s64index, svuint64_t, uint64_t, svint64_t, -+ z0_res = svld1_gather_s64index_u64 (p0, x0, z1), -+ z0_res = svld1_gather_index (p0, x0, z1)) -+ -+/* -+** ld1_gather_ext_u64_s64index: -+** ld1d z0\.d, p0/z, \[x0, z1\.d, sxtw 3\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_ext_u64_s64index, svuint64_t, uint64_t, svint64_t, -+ z0_res = svld1_gather_s64index_u64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svld1_gather_index (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ld1_gather_x0_u64_u64index: -+** ld1d z0\.d, p0/z, \[x0, z0\.d, lsl 3\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_x0_u64_u64index, svuint64_t, uint64_t, svuint64_t, -+ z0_res = svld1_gather_u64index_u64 (p0, x0, z0), -+ z0_res = svld1_gather_index (p0, x0, z0)) -+ -+/* -+** ld1_gather_tied1_u64_u64index: -+** ld1d z0\.d, p0/z, \[x0, z0\.d, lsl 3\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_tied1_u64_u64index, svuint64_t, uint64_t, svuint64_t, -+ z0_res = svld1_gather_u64index_u64 (p0, x0, z0), -+ z0_res = svld1_gather_index (p0, x0, z0)) -+ -+/* -+** ld1_gather_untied_u64_u64index: -+** ld1d z0\.d, p0/z, \[x0, z1\.d, lsl 3\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_untied_u64_u64index, svuint64_t, uint64_t, svuint64_t, -+ z0_res = svld1_gather_u64index_u64 (p0, x0, z1), -+ z0_res = svld1_gather_index (p0, x0, z1)) -+ -+/* -+** ld1_gather_ext_u64_u64index: -+** ld1d z0\.d, p0/z, \[x0, z1\.d, uxtw 3\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1_gather_ext_u64_u64index, svuint64_t, uint64_t, svuint64_t, -+ z0_res = svld1_gather_u64index_u64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svld1_gather_index (p0, x0, svextw_x (p0, z1))) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_s16.c -new file mode 100644 -index 000000000..d86b49a73 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_s16.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1_s16_base: -+** ld1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_s16_base, svint16_t, int16_t, -+ z0 = svld1_s16 (p0, x0), -+ z0 = svld1 (p0, x0)) -+ -+/* -+** ld1_s16_index: -+** ld1h z0\.h, p0/z, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_LOAD (ld1_s16_index, svint16_t, int16_t, -+ z0 = svld1_s16 (p0, x0 + x1), -+ z0 = svld1 (p0, x0 + x1)) -+ -+/* -+** ld1_s16_1: -+** ld1h z0\.h, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_s16_1, svint16_t, int16_t, -+ z0 = svld1_s16 (p0, x0 + svcnth ()), -+ z0 = svld1 (p0, x0 + svcnth ())) -+ -+/* -+** ld1_s16_7: -+** ld1h z0\.h, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_s16_7, svint16_t, int16_t, -+ z0 = svld1_s16 (p0, x0 + svcnth () * 7), -+ z0 = svld1 (p0, x0 + svcnth () * 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1_s16_8: -+** incb x0, all, mul #8 -+** ld1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_s16_8, svint16_t, int16_t, -+ z0 = svld1_s16 (p0, x0 + svcnth () * 8), -+ z0 = svld1 (p0, x0 + svcnth () * 8)) -+ -+/* -+** ld1_s16_m1: -+** ld1h z0\.h, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_s16_m1, svint16_t, int16_t, -+ z0 = svld1_s16 (p0, x0 - svcnth ()), -+ z0 = svld1 (p0, x0 - svcnth ())) -+ -+/* -+** ld1_s16_m8: -+** ld1h z0\.h, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_s16_m8, svint16_t, int16_t, -+ z0 = svld1_s16 (p0, x0 - svcnth () * 8), -+ z0 = svld1 (p0, x0 - svcnth () * 8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1_s16_m9: -+** decb x0, all, mul #9 -+** ld1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_s16_m9, svint16_t, int16_t, -+ z0 = svld1_s16 (p0, x0 - svcnth () * 9), -+ z0 = svld1 (p0, x0 - svcnth () * 9)) -+ -+/* -+** ld1_vnum_s16_0: -+** ld1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_s16_0, svint16_t, int16_t, -+ z0 = svld1_vnum_s16 (p0, x0, 0), -+ z0 = svld1_vnum (p0, x0, 0)) -+ -+/* -+** ld1_vnum_s16_1: -+** ld1h z0\.h, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_s16_1, svint16_t, int16_t, -+ z0 = svld1_vnum_s16 (p0, x0, 1), -+ z0 = svld1_vnum (p0, x0, 1)) -+ -+/* -+** ld1_vnum_s16_7: -+** ld1h z0\.h, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_s16_7, svint16_t, int16_t, -+ z0 = svld1_vnum_s16 (p0, x0, 7), -+ z0 = svld1_vnum (p0, x0, 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1_vnum_s16_8: -+** incb x0, all, mul #8 -+** ld1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_s16_8, svint16_t, int16_t, -+ z0 = svld1_vnum_s16 (p0, x0, 8), -+ z0 = svld1_vnum (p0, x0, 8)) -+ -+/* -+** ld1_vnum_s16_m1: -+** ld1h z0\.h, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_s16_m1, svint16_t, int16_t, -+ z0 = svld1_vnum_s16 (p0, x0, -1), -+ z0 = svld1_vnum (p0, x0, -1)) -+ -+/* -+** ld1_vnum_s16_m8: -+** ld1h z0\.h, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_s16_m8, svint16_t, int16_t, -+ z0 = svld1_vnum_s16 (p0, x0, -8), -+ z0 = svld1_vnum (p0, x0, -8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1_vnum_s16_m9: -+** decb x0, all, mul #9 -+** ld1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_s16_m9, svint16_t, int16_t, -+ z0 = svld1_vnum_s16 (p0, x0, -9), -+ z0 = svld1_vnum (p0, x0, -9)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld1_vnum_s16_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld1h z0\.h, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_s16_x1, svint16_t, int16_t, -+ z0 = svld1_vnum_s16 (p0, x0, x1), -+ z0 = svld1_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_s32.c -new file mode 100644 -index 000000000..5b692e510 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_s32.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1_s32_base: -+** ld1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_s32_base, svint32_t, int32_t, -+ z0 = svld1_s32 (p0, x0), -+ z0 = svld1 (p0, x0)) -+ -+/* -+** ld1_s32_index: -+** ld1w z0\.s, p0/z, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_LOAD (ld1_s32_index, svint32_t, int32_t, -+ z0 = svld1_s32 (p0, x0 + x1), -+ z0 = svld1 (p0, x0 + x1)) -+ -+/* -+** ld1_s32_1: -+** ld1w z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_s32_1, svint32_t, int32_t, -+ z0 = svld1_s32 (p0, x0 + svcntw ()), -+ z0 = svld1 (p0, x0 + svcntw ())) -+ -+/* -+** ld1_s32_7: -+** ld1w z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_s32_7, svint32_t, int32_t, -+ z0 = svld1_s32 (p0, x0 + svcntw () * 7), -+ z0 = svld1 (p0, x0 + svcntw () * 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1_s32_8: -+** incb x0, all, mul #8 -+** ld1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_s32_8, svint32_t, int32_t, -+ z0 = svld1_s32 (p0, x0 + svcntw () * 8), -+ z0 = svld1 (p0, x0 + svcntw () * 8)) -+ -+/* -+** ld1_s32_m1: -+** ld1w z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_s32_m1, svint32_t, int32_t, -+ z0 = svld1_s32 (p0, x0 - svcntw ()), -+ z0 = svld1 (p0, x0 - svcntw ())) -+ -+/* -+** ld1_s32_m8: -+** ld1w z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_s32_m8, svint32_t, int32_t, -+ z0 = svld1_s32 (p0, x0 - svcntw () * 8), -+ z0 = svld1 (p0, x0 - svcntw () * 8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1_s32_m9: -+** decb x0, all, mul #9 -+** ld1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_s32_m9, svint32_t, int32_t, -+ z0 = svld1_s32 (p0, x0 - svcntw () * 9), -+ z0 = svld1 (p0, x0 - svcntw () * 9)) -+ -+/* -+** ld1_vnum_s32_0: -+** ld1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_s32_0, svint32_t, int32_t, -+ z0 = svld1_vnum_s32 (p0, x0, 0), -+ z0 = svld1_vnum (p0, x0, 0)) -+ -+/* -+** ld1_vnum_s32_1: -+** ld1w z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_s32_1, svint32_t, int32_t, -+ z0 = svld1_vnum_s32 (p0, x0, 1), -+ z0 = svld1_vnum (p0, x0, 1)) -+ -+/* -+** ld1_vnum_s32_7: -+** ld1w z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_s32_7, svint32_t, int32_t, -+ z0 = svld1_vnum_s32 (p0, x0, 7), -+ z0 = svld1_vnum (p0, x0, 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1_vnum_s32_8: -+** incb x0, all, mul #8 -+** ld1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_s32_8, svint32_t, int32_t, -+ z0 = svld1_vnum_s32 (p0, x0, 8), -+ z0 = svld1_vnum (p0, x0, 8)) -+ -+/* -+** ld1_vnum_s32_m1: -+** ld1w z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_s32_m1, svint32_t, int32_t, -+ z0 = svld1_vnum_s32 (p0, x0, -1), -+ z0 = svld1_vnum (p0, x0, -1)) -+ -+/* -+** ld1_vnum_s32_m8: -+** ld1w z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_s32_m8, svint32_t, int32_t, -+ z0 = svld1_vnum_s32 (p0, x0, -8), -+ z0 = svld1_vnum (p0, x0, -8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1_vnum_s32_m9: -+** decb x0, all, mul #9 -+** ld1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_s32_m9, svint32_t, int32_t, -+ z0 = svld1_vnum_s32 (p0, x0, -9), -+ z0 = svld1_vnum (p0, x0, -9)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld1_vnum_s32_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld1w z0\.s, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_s32_x1, svint32_t, int32_t, -+ z0 = svld1_vnum_s32 (p0, x0, x1), -+ z0 = svld1_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_s64.c -new file mode 100644 -index 000000000..15ee29bba ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_s64.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1_s64_base: -+** ld1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_s64_base, svint64_t, int64_t, -+ z0 = svld1_s64 (p0, x0), -+ z0 = svld1 (p0, x0)) -+ -+/* -+** ld1_s64_index: -+** ld1d z0\.d, p0/z, \[x0, x1, lsl 3\] -+** ret -+*/ -+TEST_LOAD (ld1_s64_index, svint64_t, int64_t, -+ z0 = svld1_s64 (p0, x0 + x1), -+ z0 = svld1 (p0, x0 + x1)) -+ -+/* -+** ld1_s64_1: -+** ld1d z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_s64_1, svint64_t, int64_t, -+ z0 = svld1_s64 (p0, x0 + svcntd ()), -+ z0 = svld1 (p0, x0 + svcntd ())) -+ -+/* -+** ld1_s64_7: -+** ld1d z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_s64_7, svint64_t, int64_t, -+ z0 = svld1_s64 (p0, x0 + svcntd () * 7), -+ z0 = svld1 (p0, x0 + svcntd () * 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1_s64_8: -+** incb x0, all, mul #8 -+** ld1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_s64_8, svint64_t, int64_t, -+ z0 = svld1_s64 (p0, x0 + svcntd () * 8), -+ z0 = svld1 (p0, x0 + svcntd () * 8)) -+ -+/* -+** ld1_s64_m1: -+** ld1d z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_s64_m1, svint64_t, int64_t, -+ z0 = svld1_s64 (p0, x0 - svcntd ()), -+ z0 = svld1 (p0, x0 - svcntd ())) -+ -+/* -+** ld1_s64_m8: -+** ld1d z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_s64_m8, svint64_t, int64_t, -+ z0 = svld1_s64 (p0, x0 - svcntd () * 8), -+ z0 = svld1 (p0, x0 - svcntd () * 8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1_s64_m9: -+** decb x0, all, mul #9 -+** ld1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_s64_m9, svint64_t, int64_t, -+ z0 = svld1_s64 (p0, x0 - svcntd () * 9), -+ z0 = svld1 (p0, x0 - svcntd () * 9)) -+ -+/* -+** ld1_vnum_s64_0: -+** ld1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_s64_0, svint64_t, int64_t, -+ z0 = svld1_vnum_s64 (p0, x0, 0), -+ z0 = svld1_vnum (p0, x0, 0)) -+ -+/* -+** ld1_vnum_s64_1: -+** ld1d z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_s64_1, svint64_t, int64_t, -+ z0 = svld1_vnum_s64 (p0, x0, 1), -+ z0 = svld1_vnum (p0, x0, 1)) -+ -+/* -+** ld1_vnum_s64_7: -+** ld1d z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_s64_7, svint64_t, int64_t, -+ z0 = svld1_vnum_s64 (p0, x0, 7), -+ z0 = svld1_vnum (p0, x0, 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1_vnum_s64_8: -+** incb x0, all, mul #8 -+** ld1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_s64_8, svint64_t, int64_t, -+ z0 = svld1_vnum_s64 (p0, x0, 8), -+ z0 = svld1_vnum (p0, x0, 8)) -+ -+/* -+** ld1_vnum_s64_m1: -+** ld1d z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_s64_m1, svint64_t, int64_t, -+ z0 = svld1_vnum_s64 (p0, x0, -1), -+ z0 = svld1_vnum (p0, x0, -1)) -+ -+/* -+** ld1_vnum_s64_m8: -+** ld1d z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_s64_m8, svint64_t, int64_t, -+ z0 = svld1_vnum_s64 (p0, x0, -8), -+ z0 = svld1_vnum (p0, x0, -8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1_vnum_s64_m9: -+** decb x0, all, mul #9 -+** ld1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_s64_m9, svint64_t, int64_t, -+ z0 = svld1_vnum_s64 (p0, x0, -9), -+ z0 = svld1_vnum (p0, x0, -9)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld1_vnum_s64_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld1d z0\.d, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_s64_x1, svint64_t, int64_t, -+ z0 = svld1_vnum_s64 (p0, x0, x1), -+ z0 = svld1_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_s8.c -new file mode 100644 -index 000000000..036fb3d41 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_s8.c -@@ -0,0 +1,162 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1_s8_base: -+** ld1b z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_s8_base, svint8_t, int8_t, -+ z0 = svld1_s8 (p0, x0), -+ z0 = svld1 (p0, x0)) -+ -+/* -+** ld1_s8_index: -+** ld1b z0\.b, p0/z, \[x0, x1\] -+** ret -+*/ -+TEST_LOAD (ld1_s8_index, svint8_t, int8_t, -+ z0 = svld1_s8 (p0, x0 + x1), -+ z0 = svld1 (p0, x0 + x1)) -+ -+/* -+** ld1_s8_1: -+** ld1b z0\.b, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_s8_1, svint8_t, int8_t, -+ z0 = svld1_s8 (p0, x0 + svcntb ()), -+ z0 = svld1 (p0, x0 + svcntb ())) -+ -+/* -+** ld1_s8_7: -+** ld1b z0\.b, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_s8_7, svint8_t, int8_t, -+ z0 = svld1_s8 (p0, x0 + svcntb () * 7), -+ z0 = svld1 (p0, x0 + svcntb () * 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1_s8_8: -+** incb x0, all, mul #8 -+** ld1b z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_s8_8, svint8_t, int8_t, -+ z0 = svld1_s8 (p0, x0 + svcntb () * 8), -+ z0 = svld1 (p0, x0 + svcntb () * 8)) -+ -+/* -+** ld1_s8_m1: -+** ld1b z0\.b, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_s8_m1, svint8_t, int8_t, -+ z0 = svld1_s8 (p0, x0 - svcntb ()), -+ z0 = svld1 (p0, x0 - svcntb ())) -+ -+/* -+** ld1_s8_m8: -+** ld1b z0\.b, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_s8_m8, svint8_t, int8_t, -+ z0 = svld1_s8 (p0, x0 - svcntb () * 8), -+ z0 = svld1 (p0, x0 - svcntb () * 8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1_s8_m9: -+** decb x0, all, mul #9 -+** ld1b z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_s8_m9, svint8_t, int8_t, -+ z0 = svld1_s8 (p0, x0 - svcntb () * 9), -+ z0 = svld1 (p0, x0 - svcntb () * 9)) -+ -+/* -+** ld1_vnum_s8_0: -+** ld1b z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_s8_0, svint8_t, int8_t, -+ z0 = svld1_vnum_s8 (p0, x0, 0), -+ z0 = svld1_vnum (p0, x0, 0)) -+ -+/* -+** ld1_vnum_s8_1: -+** ld1b z0\.b, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_s8_1, svint8_t, int8_t, -+ z0 = svld1_vnum_s8 (p0, x0, 1), -+ z0 = svld1_vnum (p0, x0, 1)) -+ -+/* -+** ld1_vnum_s8_7: -+** ld1b z0\.b, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_s8_7, svint8_t, int8_t, -+ z0 = svld1_vnum_s8 (p0, x0, 7), -+ z0 = svld1_vnum (p0, x0, 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1_vnum_s8_8: -+** incb x0, all, mul #8 -+** ld1b z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_s8_8, svint8_t, int8_t, -+ z0 = svld1_vnum_s8 (p0, x0, 8), -+ z0 = svld1_vnum (p0, x0, 8)) -+ -+/* -+** ld1_vnum_s8_m1: -+** ld1b z0\.b, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_s8_m1, svint8_t, int8_t, -+ z0 = svld1_vnum_s8 (p0, x0, -1), -+ z0 = svld1_vnum (p0, x0, -1)) -+ -+/* -+** ld1_vnum_s8_m8: -+** ld1b z0\.b, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_s8_m8, svint8_t, int8_t, -+ z0 = svld1_vnum_s8 (p0, x0, -8), -+ z0 = svld1_vnum (p0, x0, -8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1_vnum_s8_m9: -+** decb x0, all, mul #9 -+** ld1b z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_s8_m9, svint8_t, int8_t, -+ z0 = svld1_vnum_s8 (p0, x0, -9), -+ z0 = svld1_vnum (p0, x0, -9)) -+ -+/* -+** ld1_vnum_s8_x1: -+** cntb (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ld1b z0\.b, p0/z, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** ld1b z0\.b, p0/z, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_LOAD (ld1_vnum_s8_x1, svint8_t, int8_t, -+ z0 = svld1_vnum_s8 (p0, x0, x1), -+ z0 = svld1_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_u16.c -new file mode 100644 -index 000000000..ee25b9e37 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_u16.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1_u16_base: -+** ld1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_u16_base, svuint16_t, uint16_t, -+ z0 = svld1_u16 (p0, x0), -+ z0 = svld1 (p0, x0)) -+ -+/* -+** ld1_u16_index: -+** ld1h z0\.h, p0/z, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_LOAD (ld1_u16_index, svuint16_t, uint16_t, -+ z0 = svld1_u16 (p0, x0 + x1), -+ z0 = svld1 (p0, x0 + x1)) -+ -+/* -+** ld1_u16_1: -+** ld1h z0\.h, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_u16_1, svuint16_t, uint16_t, -+ z0 = svld1_u16 (p0, x0 + svcnth ()), -+ z0 = svld1 (p0, x0 + svcnth ())) -+ -+/* -+** ld1_u16_7: -+** ld1h z0\.h, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_u16_7, svuint16_t, uint16_t, -+ z0 = svld1_u16 (p0, x0 + svcnth () * 7), -+ z0 = svld1 (p0, x0 + svcnth () * 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1_u16_8: -+** incb x0, all, mul #8 -+** ld1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_u16_8, svuint16_t, uint16_t, -+ z0 = svld1_u16 (p0, x0 + svcnth () * 8), -+ z0 = svld1 (p0, x0 + svcnth () * 8)) -+ -+/* -+** ld1_u16_m1: -+** ld1h z0\.h, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_u16_m1, svuint16_t, uint16_t, -+ z0 = svld1_u16 (p0, x0 - svcnth ()), -+ z0 = svld1 (p0, x0 - svcnth ())) -+ -+/* -+** ld1_u16_m8: -+** ld1h z0\.h, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_u16_m8, svuint16_t, uint16_t, -+ z0 = svld1_u16 (p0, x0 - svcnth () * 8), -+ z0 = svld1 (p0, x0 - svcnth () * 8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1_u16_m9: -+** decb x0, all, mul #9 -+** ld1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_u16_m9, svuint16_t, uint16_t, -+ z0 = svld1_u16 (p0, x0 - svcnth () * 9), -+ z0 = svld1 (p0, x0 - svcnth () * 9)) -+ -+/* -+** ld1_vnum_u16_0: -+** ld1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_u16_0, svuint16_t, uint16_t, -+ z0 = svld1_vnum_u16 (p0, x0, 0), -+ z0 = svld1_vnum (p0, x0, 0)) -+ -+/* -+** ld1_vnum_u16_1: -+** ld1h z0\.h, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_u16_1, svuint16_t, uint16_t, -+ z0 = svld1_vnum_u16 (p0, x0, 1), -+ z0 = svld1_vnum (p0, x0, 1)) -+ -+/* -+** ld1_vnum_u16_7: -+** ld1h z0\.h, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_u16_7, svuint16_t, uint16_t, -+ z0 = svld1_vnum_u16 (p0, x0, 7), -+ z0 = svld1_vnum (p0, x0, 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1_vnum_u16_8: -+** incb x0, all, mul #8 -+** ld1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_u16_8, svuint16_t, uint16_t, -+ z0 = svld1_vnum_u16 (p0, x0, 8), -+ z0 = svld1_vnum (p0, x0, 8)) -+ -+/* -+** ld1_vnum_u16_m1: -+** ld1h z0\.h, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_u16_m1, svuint16_t, uint16_t, -+ z0 = svld1_vnum_u16 (p0, x0, -1), -+ z0 = svld1_vnum (p0, x0, -1)) -+ -+/* -+** ld1_vnum_u16_m8: -+** ld1h z0\.h, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_u16_m8, svuint16_t, uint16_t, -+ z0 = svld1_vnum_u16 (p0, x0, -8), -+ z0 = svld1_vnum (p0, x0, -8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1_vnum_u16_m9: -+** decb x0, all, mul #9 -+** ld1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_u16_m9, svuint16_t, uint16_t, -+ z0 = svld1_vnum_u16 (p0, x0, -9), -+ z0 = svld1_vnum (p0, x0, -9)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld1_vnum_u16_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld1h z0\.h, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_u16_x1, svuint16_t, uint16_t, -+ z0 = svld1_vnum_u16 (p0, x0, x1), -+ z0 = svld1_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_u32.c -new file mode 100644 -index 000000000..bcd304126 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_u32.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1_u32_base: -+** ld1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_u32_base, svuint32_t, uint32_t, -+ z0 = svld1_u32 (p0, x0), -+ z0 = svld1 (p0, x0)) -+ -+/* -+** ld1_u32_index: -+** ld1w z0\.s, p0/z, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_LOAD (ld1_u32_index, svuint32_t, uint32_t, -+ z0 = svld1_u32 (p0, x0 + x1), -+ z0 = svld1 (p0, x0 + x1)) -+ -+/* -+** ld1_u32_1: -+** ld1w z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_u32_1, svuint32_t, uint32_t, -+ z0 = svld1_u32 (p0, x0 + svcntw ()), -+ z0 = svld1 (p0, x0 + svcntw ())) -+ -+/* -+** ld1_u32_7: -+** ld1w z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_u32_7, svuint32_t, uint32_t, -+ z0 = svld1_u32 (p0, x0 + svcntw () * 7), -+ z0 = svld1 (p0, x0 + svcntw () * 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1_u32_8: -+** incb x0, all, mul #8 -+** ld1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_u32_8, svuint32_t, uint32_t, -+ z0 = svld1_u32 (p0, x0 + svcntw () * 8), -+ z0 = svld1 (p0, x0 + svcntw () * 8)) -+ -+/* -+** ld1_u32_m1: -+** ld1w z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_u32_m1, svuint32_t, uint32_t, -+ z0 = svld1_u32 (p0, x0 - svcntw ()), -+ z0 = svld1 (p0, x0 - svcntw ())) -+ -+/* -+** ld1_u32_m8: -+** ld1w z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_u32_m8, svuint32_t, uint32_t, -+ z0 = svld1_u32 (p0, x0 - svcntw () * 8), -+ z0 = svld1 (p0, x0 - svcntw () * 8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1_u32_m9: -+** decb x0, all, mul #9 -+** ld1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_u32_m9, svuint32_t, uint32_t, -+ z0 = svld1_u32 (p0, x0 - svcntw () * 9), -+ z0 = svld1 (p0, x0 - svcntw () * 9)) -+ -+/* -+** ld1_vnum_u32_0: -+** ld1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_u32_0, svuint32_t, uint32_t, -+ z0 = svld1_vnum_u32 (p0, x0, 0), -+ z0 = svld1_vnum (p0, x0, 0)) -+ -+/* -+** ld1_vnum_u32_1: -+** ld1w z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_u32_1, svuint32_t, uint32_t, -+ z0 = svld1_vnum_u32 (p0, x0, 1), -+ z0 = svld1_vnum (p0, x0, 1)) -+ -+/* -+** ld1_vnum_u32_7: -+** ld1w z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_u32_7, svuint32_t, uint32_t, -+ z0 = svld1_vnum_u32 (p0, x0, 7), -+ z0 = svld1_vnum (p0, x0, 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1_vnum_u32_8: -+** incb x0, all, mul #8 -+** ld1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_u32_8, svuint32_t, uint32_t, -+ z0 = svld1_vnum_u32 (p0, x0, 8), -+ z0 = svld1_vnum (p0, x0, 8)) -+ -+/* -+** ld1_vnum_u32_m1: -+** ld1w z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_u32_m1, svuint32_t, uint32_t, -+ z0 = svld1_vnum_u32 (p0, x0, -1), -+ z0 = svld1_vnum (p0, x0, -1)) -+ -+/* -+** ld1_vnum_u32_m8: -+** ld1w z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_u32_m8, svuint32_t, uint32_t, -+ z0 = svld1_vnum_u32 (p0, x0, -8), -+ z0 = svld1_vnum (p0, x0, -8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1_vnum_u32_m9: -+** decb x0, all, mul #9 -+** ld1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_u32_m9, svuint32_t, uint32_t, -+ z0 = svld1_vnum_u32 (p0, x0, -9), -+ z0 = svld1_vnum (p0, x0, -9)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld1_vnum_u32_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld1w z0\.s, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_u32_x1, svuint32_t, uint32_t, -+ z0 = svld1_vnum_u32 (p0, x0, x1), -+ z0 = svld1_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_u64.c -new file mode 100644 -index 000000000..ebb874720 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_u64.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1_u64_base: -+** ld1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_u64_base, svuint64_t, uint64_t, -+ z0 = svld1_u64 (p0, x0), -+ z0 = svld1 (p0, x0)) -+ -+/* -+** ld1_u64_index: -+** ld1d z0\.d, p0/z, \[x0, x1, lsl 3\] -+** ret -+*/ -+TEST_LOAD (ld1_u64_index, svuint64_t, uint64_t, -+ z0 = svld1_u64 (p0, x0 + x1), -+ z0 = svld1 (p0, x0 + x1)) -+ -+/* -+** ld1_u64_1: -+** ld1d z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_u64_1, svuint64_t, uint64_t, -+ z0 = svld1_u64 (p0, x0 + svcntd ()), -+ z0 = svld1 (p0, x0 + svcntd ())) -+ -+/* -+** ld1_u64_7: -+** ld1d z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_u64_7, svuint64_t, uint64_t, -+ z0 = svld1_u64 (p0, x0 + svcntd () * 7), -+ z0 = svld1 (p0, x0 + svcntd () * 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1_u64_8: -+** incb x0, all, mul #8 -+** ld1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_u64_8, svuint64_t, uint64_t, -+ z0 = svld1_u64 (p0, x0 + svcntd () * 8), -+ z0 = svld1 (p0, x0 + svcntd () * 8)) -+ -+/* -+** ld1_u64_m1: -+** ld1d z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_u64_m1, svuint64_t, uint64_t, -+ z0 = svld1_u64 (p0, x0 - svcntd ()), -+ z0 = svld1 (p0, x0 - svcntd ())) -+ -+/* -+** ld1_u64_m8: -+** ld1d z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_u64_m8, svuint64_t, uint64_t, -+ z0 = svld1_u64 (p0, x0 - svcntd () * 8), -+ z0 = svld1 (p0, x0 - svcntd () * 8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1_u64_m9: -+** decb x0, all, mul #9 -+** ld1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_u64_m9, svuint64_t, uint64_t, -+ z0 = svld1_u64 (p0, x0 - svcntd () * 9), -+ z0 = svld1 (p0, x0 - svcntd () * 9)) -+ -+/* -+** ld1_vnum_u64_0: -+** ld1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_u64_0, svuint64_t, uint64_t, -+ z0 = svld1_vnum_u64 (p0, x0, 0), -+ z0 = svld1_vnum (p0, x0, 0)) -+ -+/* -+** ld1_vnum_u64_1: -+** ld1d z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_u64_1, svuint64_t, uint64_t, -+ z0 = svld1_vnum_u64 (p0, x0, 1), -+ z0 = svld1_vnum (p0, x0, 1)) -+ -+/* -+** ld1_vnum_u64_7: -+** ld1d z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_u64_7, svuint64_t, uint64_t, -+ z0 = svld1_vnum_u64 (p0, x0, 7), -+ z0 = svld1_vnum (p0, x0, 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1_vnum_u64_8: -+** incb x0, all, mul #8 -+** ld1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_u64_8, svuint64_t, uint64_t, -+ z0 = svld1_vnum_u64 (p0, x0, 8), -+ z0 = svld1_vnum (p0, x0, 8)) -+ -+/* -+** ld1_vnum_u64_m1: -+** ld1d z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_u64_m1, svuint64_t, uint64_t, -+ z0 = svld1_vnum_u64 (p0, x0, -1), -+ z0 = svld1_vnum (p0, x0, -1)) -+ -+/* -+** ld1_vnum_u64_m8: -+** ld1d z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_u64_m8, svuint64_t, uint64_t, -+ z0 = svld1_vnum_u64 (p0, x0, -8), -+ z0 = svld1_vnum (p0, x0, -8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1_vnum_u64_m9: -+** decb x0, all, mul #9 -+** ld1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_u64_m9, svuint64_t, uint64_t, -+ z0 = svld1_vnum_u64 (p0, x0, -9), -+ z0 = svld1_vnum (p0, x0, -9)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld1_vnum_u64_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld1d z0\.d, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_u64_x1, svuint64_t, uint64_t, -+ z0 = svld1_vnum_u64 (p0, x0, x1), -+ z0 = svld1_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_u8.c -new file mode 100644 -index 000000000..12f42bd92 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1_u8.c -@@ -0,0 +1,162 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1_u8_base: -+** ld1b z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_u8_base, svuint8_t, uint8_t, -+ z0 = svld1_u8 (p0, x0), -+ z0 = svld1 (p0, x0)) -+ -+/* -+** ld1_u8_index: -+** ld1b z0\.b, p0/z, \[x0, x1\] -+** ret -+*/ -+TEST_LOAD (ld1_u8_index, svuint8_t, uint8_t, -+ z0 = svld1_u8 (p0, x0 + x1), -+ z0 = svld1 (p0, x0 + x1)) -+ -+/* -+** ld1_u8_1: -+** ld1b z0\.b, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_u8_1, svuint8_t, uint8_t, -+ z0 = svld1_u8 (p0, x0 + svcntb ()), -+ z0 = svld1 (p0, x0 + svcntb ())) -+ -+/* -+** ld1_u8_7: -+** ld1b z0\.b, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_u8_7, svuint8_t, uint8_t, -+ z0 = svld1_u8 (p0, x0 + svcntb () * 7), -+ z0 = svld1 (p0, x0 + svcntb () * 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1_u8_8: -+** incb x0, all, mul #8 -+** ld1b z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_u8_8, svuint8_t, uint8_t, -+ z0 = svld1_u8 (p0, x0 + svcntb () * 8), -+ z0 = svld1 (p0, x0 + svcntb () * 8)) -+ -+/* -+** ld1_u8_m1: -+** ld1b z0\.b, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_u8_m1, svuint8_t, uint8_t, -+ z0 = svld1_u8 (p0, x0 - svcntb ()), -+ z0 = svld1 (p0, x0 - svcntb ())) -+ -+/* -+** ld1_u8_m8: -+** ld1b z0\.b, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_u8_m8, svuint8_t, uint8_t, -+ z0 = svld1_u8 (p0, x0 - svcntb () * 8), -+ z0 = svld1 (p0, x0 - svcntb () * 8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1_u8_m9: -+** decb x0, all, mul #9 -+** ld1b z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_u8_m9, svuint8_t, uint8_t, -+ z0 = svld1_u8 (p0, x0 - svcntb () * 9), -+ z0 = svld1 (p0, x0 - svcntb () * 9)) -+ -+/* -+** ld1_vnum_u8_0: -+** ld1b z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_u8_0, svuint8_t, uint8_t, -+ z0 = svld1_vnum_u8 (p0, x0, 0), -+ z0 = svld1_vnum (p0, x0, 0)) -+ -+/* -+** ld1_vnum_u8_1: -+** ld1b z0\.b, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_u8_1, svuint8_t, uint8_t, -+ z0 = svld1_vnum_u8 (p0, x0, 1), -+ z0 = svld1_vnum (p0, x0, 1)) -+ -+/* -+** ld1_vnum_u8_7: -+** ld1b z0\.b, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_u8_7, svuint8_t, uint8_t, -+ z0 = svld1_vnum_u8 (p0, x0, 7), -+ z0 = svld1_vnum (p0, x0, 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1_vnum_u8_8: -+** incb x0, all, mul #8 -+** ld1b z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_u8_8, svuint8_t, uint8_t, -+ z0 = svld1_vnum_u8 (p0, x0, 8), -+ z0 = svld1_vnum (p0, x0, 8)) -+ -+/* -+** ld1_vnum_u8_m1: -+** ld1b z0\.b, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_u8_m1, svuint8_t, uint8_t, -+ z0 = svld1_vnum_u8 (p0, x0, -1), -+ z0 = svld1_vnum (p0, x0, -1)) -+ -+/* -+** ld1_vnum_u8_m8: -+** ld1b z0\.b, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_u8_m8, svuint8_t, uint8_t, -+ z0 = svld1_vnum_u8 (p0, x0, -8), -+ z0 = svld1_vnum (p0, x0, -8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1_vnum_u8_m9: -+** decb x0, all, mul #9 -+** ld1b z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1_vnum_u8_m9, svuint8_t, uint8_t, -+ z0 = svld1_vnum_u8 (p0, x0, -9), -+ z0 = svld1_vnum (p0, x0, -9)) -+ -+/* -+** ld1_vnum_u8_x1: -+** cntb (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ld1b z0\.b, p0/z, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** ld1b z0\.b, p0/z, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_LOAD (ld1_vnum_u8_x1, svuint8_t, uint8_t, -+ z0 = svld1_vnum_u8 (p0, x0, x1), -+ z0 = svld1_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_bf16.c -new file mode 100644 -index 000000000..cb1801778 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_bf16.c -@@ -0,0 +1,120 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+/* { dg-additional-options "-march=armv8.6-a+f64mm" } */ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1ro_bf16_base: -+** ld1roh z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1ro_bf16_base, svbfloat16_t, bfloat16_t, -+ z0 = svld1ro_bf16 (p0, x0), -+ z0 = svld1ro (p0, x0)) -+ -+/* -+** ld1ro_bf16_index: -+** ld1roh z0\.h, p0/z, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_bf16_index, svbfloat16_t, bfloat16_t, -+ z0 = svld1ro_bf16 (p0, x0 + x1), -+ z0 = svld1ro (p0, x0 + x1)) -+ -+/* -+** ld1ro_bf16_1: -+** add (x[0-9]+), x0, #?2 -+** ld1roh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_bf16_1, svbfloat16_t, bfloat16_t, -+ z0 = svld1ro_bf16 (p0, x0 + 1), -+ z0 = svld1ro (p0, x0 + 1)) -+ -+/* -+** ld1ro_bf16_8: -+** add (x[0-9]+), x0, #?16 -+** ld1roh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_bf16_8, svbfloat16_t, bfloat16_t, -+ z0 = svld1ro_bf16 (p0, x0 + 8), -+ z0 = svld1ro (p0, x0 + 8)) -+ -+/* -+** ld1ro_bf16_128: -+** add (x[0-9]+), x0, #?256 -+** ld1roh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_bf16_128, svbfloat16_t, bfloat16_t, -+ z0 = svld1ro_bf16 (p0, x0 + 128), -+ z0 = svld1ro (p0, x0 + 128)) -+ -+/* -+** ld1ro_bf16_m1: -+** sub (x[0-9]+), x0, #?2 -+** ld1roh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_bf16_m1, svbfloat16_t, bfloat16_t, -+ z0 = svld1ro_bf16 (p0, x0 - 1), -+ z0 = svld1ro (p0, x0 - 1)) -+ -+/* -+** ld1ro_bf16_m8: -+** sub (x[0-9]+), x0, #?16 -+** ld1roh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_bf16_m8, svbfloat16_t, bfloat16_t, -+ z0 = svld1ro_bf16 (p0, x0 - 8), -+ z0 = svld1ro (p0, x0 - 8)) -+ -+/* -+** ld1ro_bf16_m144: -+** sub (x[0-9]+), x0, #?288 -+** ld1roh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_bf16_m144, svbfloat16_t, bfloat16_t, -+ z0 = svld1ro_bf16 (p0, x0 - 144), -+ z0 = svld1ro (p0, x0 - 144)) -+ -+/* -+** ld1ro_bf16_16: -+** ld1roh z0\.h, p0/z, \[x0, #?32\] -+** ret -+*/ -+TEST_LOAD (ld1ro_bf16_16, svbfloat16_t, bfloat16_t, -+ z0 = svld1ro_bf16 (p0, x0 + 16), -+ z0 = svld1ro (p0, x0 + 16)) -+ -+/* -+** ld1ro_bf16_112: -+** ld1roh z0\.h, p0/z, \[x0, #?224\] -+** ret -+*/ -+TEST_LOAD (ld1ro_bf16_112, svbfloat16_t, bfloat16_t, -+ z0 = svld1ro_bf16 (p0, x0 + 112), -+ z0 = svld1ro (p0, x0 + 112)) -+ -+/* -+** ld1ro_bf16_m16: -+** ld1roh z0\.h, p0/z, \[x0, #?-32\] -+** ret -+*/ -+TEST_LOAD (ld1ro_bf16_m16, svbfloat16_t, bfloat16_t, -+ z0 = svld1ro_bf16 (p0, x0 - 16), -+ z0 = svld1ro (p0, x0 - 16)) -+ -+/* -+** ld1ro_bf16_m128: -+** ld1roh z0\.h, p0/z, \[x0, #?-256\] -+** ret -+*/ -+TEST_LOAD (ld1ro_bf16_m128, svbfloat16_t, bfloat16_t, -+ z0 = svld1ro_bf16 (p0, x0 - 128), -+ z0 = svld1ro (p0, x0 - 128)) -+ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f16.c -new file mode 100644 -index 000000000..86081edbd ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f16.c -@@ -0,0 +1,120 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+/* { dg-additional-options "-march=armv8.6-a+f64mm" } */ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1ro_f16_base: -+** ld1roh z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1ro_f16_base, svfloat16_t, float16_t, -+ z0 = svld1ro_f16 (p0, x0), -+ z0 = svld1ro (p0, x0)) -+ -+/* -+** ld1ro_f16_index: -+** ld1roh z0\.h, p0/z, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_f16_index, svfloat16_t, float16_t, -+ z0 = svld1ro_f16 (p0, x0 + x1), -+ z0 = svld1ro (p0, x0 + x1)) -+ -+/* -+** ld1ro_f16_1: -+** add (x[0-9]+), x0, #?2 -+** ld1roh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_f16_1, svfloat16_t, float16_t, -+ z0 = svld1ro_f16 (p0, x0 + 1), -+ z0 = svld1ro (p0, x0 + 1)) -+ -+/* -+** ld1ro_f16_8: -+** add (x[0-9]+), x0, #?16 -+** ld1roh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_f16_8, svfloat16_t, float16_t, -+ z0 = svld1ro_f16 (p0, x0 + 8), -+ z0 = svld1ro (p0, x0 + 8)) -+ -+/* -+** ld1ro_f16_128: -+** add (x[0-9]+), x0, #?256 -+** ld1roh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_f16_128, svfloat16_t, float16_t, -+ z0 = svld1ro_f16 (p0, x0 + 128), -+ z0 = svld1ro (p0, x0 + 128)) -+ -+/* -+** ld1ro_f16_m1: -+** sub (x[0-9]+), x0, #?2 -+** ld1roh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_f16_m1, svfloat16_t, float16_t, -+ z0 = svld1ro_f16 (p0, x0 - 1), -+ z0 = svld1ro (p0, x0 - 1)) -+ -+/* -+** ld1ro_f16_m8: -+** sub (x[0-9]+), x0, #?16 -+** ld1roh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_f16_m8, svfloat16_t, float16_t, -+ z0 = svld1ro_f16 (p0, x0 - 8), -+ z0 = svld1ro (p0, x0 - 8)) -+ -+/* -+** ld1ro_f16_m144: -+** sub (x[0-9]+), x0, #?288 -+** ld1roh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_f16_m144, svfloat16_t, float16_t, -+ z0 = svld1ro_f16 (p0, x0 - 144), -+ z0 = svld1ro (p0, x0 - 144)) -+ -+/* -+** ld1ro_f16_16: -+** ld1roh z0\.h, p0/z, \[x0, #?32\] -+** ret -+*/ -+TEST_LOAD (ld1ro_f16_16, svfloat16_t, float16_t, -+ z0 = svld1ro_f16 (p0, x0 + 16), -+ z0 = svld1ro (p0, x0 + 16)) -+ -+/* -+** ld1ro_f16_112: -+** ld1roh z0\.h, p0/z, \[x0, #?224\] -+** ret -+*/ -+TEST_LOAD (ld1ro_f16_112, svfloat16_t, float16_t, -+ z0 = svld1ro_f16 (p0, x0 + 112), -+ z0 = svld1ro (p0, x0 + 112)) -+ -+/* -+** ld1ro_f16_m16: -+** ld1roh z0\.h, p0/z, \[x0, #?-32\] -+** ret -+*/ -+TEST_LOAD (ld1ro_f16_m16, svfloat16_t, float16_t, -+ z0 = svld1ro_f16 (p0, x0 - 16), -+ z0 = svld1ro (p0, x0 - 16)) -+ -+/* -+** ld1ro_f16_m128: -+** ld1roh z0\.h, p0/z, \[x0, #?-256\] -+** ret -+*/ -+TEST_LOAD (ld1ro_f16_m128, svfloat16_t, float16_t, -+ z0 = svld1ro_f16 (p0, x0 - 128), -+ z0 = svld1ro (p0, x0 - 128)) -+ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f32.c -new file mode 100644 -index 000000000..c8df00f8a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f32.c -@@ -0,0 +1,120 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+/* { dg-additional-options "-march=armv8.6-a+f64mm" } */ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1ro_f32_base: -+** ld1row z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1ro_f32_base, svfloat32_t, float32_t, -+ z0 = svld1ro_f32 (p0, x0), -+ z0 = svld1ro (p0, x0)) -+ -+/* -+** ld1ro_f32_index: -+** ld1row z0\.s, p0/z, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_LOAD (ld1ro_f32_index, svfloat32_t, float32_t, -+ z0 = svld1ro_f32 (p0, x0 + x1), -+ z0 = svld1ro (p0, x0 + x1)) -+ -+/* -+** ld1ro_f32_1: -+** add (x[0-9]+), x0, #?4 -+** ld1row z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_f32_1, svfloat32_t, float32_t, -+ z0 = svld1ro_f32 (p0, x0 + 1), -+ z0 = svld1ro (p0, x0 + 1)) -+ -+/* -+** ld1ro_f32_4: -+** add (x[0-9]+), x0, #?16 -+** ld1row z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_f32_4, svfloat32_t, float32_t, -+ z0 = svld1ro_f32 (p0, x0 + 4), -+ z0 = svld1ro (p0, x0 + 4)) -+ -+/* -+** ld1ro_f32_64: -+** add (x[0-9]+), x0, #?256 -+** ld1row z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_f32_64, svfloat32_t, float32_t, -+ z0 = svld1ro_f32 (p0, x0 + 64), -+ z0 = svld1ro (p0, x0 + 64)) -+ -+/* -+** ld1ro_f32_m1: -+** sub (x[0-9]+), x0, #?4 -+** ld1row z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_f32_m1, svfloat32_t, float32_t, -+ z0 = svld1ro_f32 (p0, x0 - 1), -+ z0 = svld1ro (p0, x0 - 1)) -+ -+/* -+** ld1ro_f32_m4: -+** sub (x[0-9]+), x0, #?16 -+** ld1row z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_f32_m4, svfloat32_t, float32_t, -+ z0 = svld1ro_f32 (p0, x0 - 4), -+ z0 = svld1ro (p0, x0 - 4)) -+ -+/* -+** ld1ro_f32_m72: -+** sub (x[0-9]+), x0, #?288 -+** ld1row z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_f32_m72, svfloat32_t, float32_t, -+ z0 = svld1ro_f32 (p0, x0 - 72), -+ z0 = svld1ro (p0, x0 - 72)) -+ -+/* -+** ld1ro_f32_8: -+** ld1row z0\.s, p0/z, \[x0, #?32\] -+** ret -+*/ -+TEST_LOAD (ld1ro_f32_8, svfloat32_t, float32_t, -+ z0 = svld1ro_f32 (p0, x0 + 8), -+ z0 = svld1ro (p0, x0 + 8)) -+ -+/* -+** ld1ro_f32_56: -+** ld1row z0\.s, p0/z, \[x0, #?224\] -+** ret -+*/ -+TEST_LOAD (ld1ro_f32_56, svfloat32_t, float32_t, -+ z0 = svld1ro_f32 (p0, x0 + 56), -+ z0 = svld1ro (p0, x0 + 56)) -+ -+/* -+** ld1ro_f32_m8: -+** ld1row z0\.s, p0/z, \[x0, #?-32\] -+** ret -+*/ -+TEST_LOAD (ld1ro_f32_m8, svfloat32_t, float32_t, -+ z0 = svld1ro_f32 (p0, x0 - 8), -+ z0 = svld1ro (p0, x0 - 8)) -+ -+/* -+** ld1ro_f32_m64: -+** ld1row z0\.s, p0/z, \[x0, #?-256\] -+** ret -+*/ -+TEST_LOAD (ld1ro_f32_m64, svfloat32_t, float32_t, -+ z0 = svld1ro_f32 (p0, x0 - 64), -+ z0 = svld1ro (p0, x0 - 64)) -+ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f64.c -new file mode 100644 -index 000000000..2fb9d5b74 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_f64.c -@@ -0,0 +1,120 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+/* { dg-additional-options "-march=armv8.6-a+f64mm" } */ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1ro_f64_base: -+** ld1rod z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1ro_f64_base, svfloat64_t, float64_t, -+ z0 = svld1ro_f64 (p0, x0), -+ z0 = svld1ro (p0, x0)) -+ -+/* -+** ld1ro_f64_index: -+** ld1rod z0\.d, p0/z, \[x0, x1, lsl 3\] -+** ret -+*/ -+TEST_LOAD (ld1ro_f64_index, svfloat64_t, float64_t, -+ z0 = svld1ro_f64 (p0, x0 + x1), -+ z0 = svld1ro (p0, x0 + x1)) -+ -+/* -+** ld1ro_f64_1: -+** add (x[0-9]+), x0, #?8 -+** ld1rod z0\.d, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_f64_1, svfloat64_t, float64_t, -+ z0 = svld1ro_f64 (p0, x0 + 1), -+ z0 = svld1ro (p0, x0 + 1)) -+ -+/* -+** ld1ro_f64_2: -+** add (x[0-9]+), x0, #?16 -+** ld1rod z0\.d, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_f64_2, svfloat64_t, float64_t, -+ z0 = svld1ro_f64 (p0, x0 + 2), -+ z0 = svld1ro (p0, x0 + 2)) -+ -+/* -+** ld1ro_f64_32: -+** add (x[0-9]+), x0, #?256 -+** ld1rod z0\.d, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_f64_32, svfloat64_t, float64_t, -+ z0 = svld1ro_f64 (p0, x0 + 32), -+ z0 = svld1ro (p0, x0 + 32)) -+ -+/* -+** ld1ro_f64_m1: -+** sub (x[0-9]+), x0, #?8 -+** ld1rod z0\.d, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_f64_m1, svfloat64_t, float64_t, -+ z0 = svld1ro_f64 (p0, x0 - 1), -+ z0 = svld1ro (p0, x0 - 1)) -+ -+/* -+** ld1ro_f64_m2: -+** sub (x[0-9]+), x0, #?16 -+** ld1rod z0\.d, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_f64_m2, svfloat64_t, float64_t, -+ z0 = svld1ro_f64 (p0, x0 - 2), -+ z0 = svld1ro (p0, x0 - 2)) -+ -+/* -+** ld1ro_f64_m36: -+** sub (x[0-9]+), x0, #?288 -+** ld1rod z0\.d, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_f64_m36, svfloat64_t, float64_t, -+ z0 = svld1ro_f64 (p0, x0 - 36), -+ z0 = svld1ro (p0, x0 - 36)) -+ -+/* -+** ld1ro_f64_4: -+** ld1rod z0\.d, p0/z, \[x0, #?32\] -+** ret -+*/ -+TEST_LOAD (ld1ro_f64_4, svfloat64_t, float64_t, -+ z0 = svld1ro_f64 (p0, x0 + 4), -+ z0 = svld1ro (p0, x0 + 4)) -+ -+/* -+** ld1ro_f64_28: -+** ld1rod z0\.d, p0/z, \[x0, #?224\] -+** ret -+*/ -+TEST_LOAD (ld1ro_f64_28, svfloat64_t, float64_t, -+ z0 = svld1ro_f64 (p0, x0 + 28), -+ z0 = svld1ro (p0, x0 + 28)) -+ -+/* -+** ld1ro_f64_m4: -+** ld1rod z0\.d, p0/z, \[x0, #?-32\] -+** ret -+*/ -+TEST_LOAD (ld1ro_f64_m4, svfloat64_t, float64_t, -+ z0 = svld1ro_f64 (p0, x0 - 4), -+ z0 = svld1ro (p0, x0 - 4)) -+ -+/* -+** ld1ro_f64_m32: -+** ld1rod z0\.d, p0/z, \[x0, #?-256\] -+** ret -+*/ -+TEST_LOAD (ld1ro_f64_m32, svfloat64_t, float64_t, -+ z0 = svld1ro_f64 (p0, x0 - 32), -+ z0 = svld1ro (p0, x0 - 32)) -+ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s16.c -new file mode 100644 -index 000000000..3cd211b16 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s16.c -@@ -0,0 +1,120 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+/* { dg-additional-options "-march=armv8.6-a+f64mm" } */ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1ro_s16_base: -+** ld1roh z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1ro_s16_base, svint16_t, int16_t, -+ z0 = svld1ro_s16 (p0, x0), -+ z0 = svld1ro (p0, x0)) -+ -+/* -+** ld1ro_s16_index: -+** ld1roh z0\.h, p0/z, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_s16_index, svint16_t, int16_t, -+ z0 = svld1ro_s16 (p0, x0 + x1), -+ z0 = svld1ro (p0, x0 + x1)) -+ -+/* -+** ld1ro_s16_1: -+** add (x[0-9]+), x0, #?2 -+** ld1roh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_s16_1, svint16_t, int16_t, -+ z0 = svld1ro_s16 (p0, x0 + 1), -+ z0 = svld1ro (p0, x0 + 1)) -+ -+/* -+** ld1ro_s16_8: -+** add (x[0-9]+), x0, #?16 -+** ld1roh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_s16_8, svint16_t, int16_t, -+ z0 = svld1ro_s16 (p0, x0 + 8), -+ z0 = svld1ro (p0, x0 + 8)) -+ -+/* -+** ld1ro_s16_128: -+** add (x[0-9]+), x0, #?256 -+** ld1roh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_s16_128, svint16_t, int16_t, -+ z0 = svld1ro_s16 (p0, x0 + 128), -+ z0 = svld1ro (p0, x0 + 128)) -+ -+/* -+** ld1ro_s16_m1: -+** sub (x[0-9]+), x0, #?2 -+** ld1roh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_s16_m1, svint16_t, int16_t, -+ z0 = svld1ro_s16 (p0, x0 - 1), -+ z0 = svld1ro (p0, x0 - 1)) -+ -+/* -+** ld1ro_s16_m8: -+** sub (x[0-9]+), x0, #?16 -+** ld1roh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_s16_m8, svint16_t, int16_t, -+ z0 = svld1ro_s16 (p0, x0 - 8), -+ z0 = svld1ro (p0, x0 - 8)) -+ -+/* -+** ld1ro_s16_m144: -+** sub (x[0-9]+), x0, #?288 -+** ld1roh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_s16_m144, svint16_t, int16_t, -+ z0 = svld1ro_s16 (p0, x0 - 144), -+ z0 = svld1ro (p0, x0 - 144)) -+ -+/* -+** ld1ro_s16_16: -+** ld1roh z0\.h, p0/z, \[x0, #?32\] -+** ret -+*/ -+TEST_LOAD (ld1ro_s16_16, svint16_t, int16_t, -+ z0 = svld1ro_s16 (p0, x0 + 16), -+ z0 = svld1ro (p0, x0 + 16)) -+ -+/* -+** ld1ro_s16_112: -+** ld1roh z0\.h, p0/z, \[x0, #?224\] -+** ret -+*/ -+TEST_LOAD (ld1ro_s16_112, svint16_t, int16_t, -+ z0 = svld1ro_s16 (p0, x0 + 112), -+ z0 = svld1ro (p0, x0 + 112)) -+ -+/* -+** ld1ro_s16_m16: -+** ld1roh z0\.h, p0/z, \[x0, #?-32\] -+** ret -+*/ -+TEST_LOAD (ld1ro_s16_m16, svint16_t, int16_t, -+ z0 = svld1ro_s16 (p0, x0 - 16), -+ z0 = svld1ro (p0, x0 - 16)) -+ -+/* -+** ld1ro_s16_m128: -+** ld1roh z0\.h, p0/z, \[x0, #?-256\] -+** ret -+*/ -+TEST_LOAD (ld1ro_s16_m128, svint16_t, int16_t, -+ z0 = svld1ro_s16 (p0, x0 - 128), -+ z0 = svld1ro (p0, x0 - 128)) -+ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s32.c -new file mode 100644 -index 000000000..44b16ed5f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s32.c -@@ -0,0 +1,120 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+/* { dg-additional-options "-march=armv8.6-a+f64mm" } */ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1ro_s32_base: -+** ld1row z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1ro_s32_base, svint32_t, int32_t, -+ z0 = svld1ro_s32 (p0, x0), -+ z0 = svld1ro (p0, x0)) -+ -+/* -+** ld1ro_s32_index: -+** ld1row z0\.s, p0/z, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_LOAD (ld1ro_s32_index, svint32_t, int32_t, -+ z0 = svld1ro_s32 (p0, x0 + x1), -+ z0 = svld1ro (p0, x0 + x1)) -+ -+/* -+** ld1ro_s32_1: -+** add (x[0-9]+), x0, #?4 -+** ld1row z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_s32_1, svint32_t, int32_t, -+ z0 = svld1ro_s32 (p0, x0 + 1), -+ z0 = svld1ro (p0, x0 + 1)) -+ -+/* -+** ld1ro_s32_4: -+** add (x[0-9]+), x0, #?16 -+** ld1row z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_s32_4, svint32_t, int32_t, -+ z0 = svld1ro_s32 (p0, x0 + 4), -+ z0 = svld1ro (p0, x0 + 4)) -+ -+/* -+** ld1ro_s32_64: -+** add (x[0-9]+), x0, #?256 -+** ld1row z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_s32_64, svint32_t, int32_t, -+ z0 = svld1ro_s32 (p0, x0 + 64), -+ z0 = svld1ro (p0, x0 + 64)) -+ -+/* -+** ld1ro_s32_m1: -+** sub (x[0-9]+), x0, #?4 -+** ld1row z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_s32_m1, svint32_t, int32_t, -+ z0 = svld1ro_s32 (p0, x0 - 1), -+ z0 = svld1ro (p0, x0 - 1)) -+ -+/* -+** ld1ro_s32_m4: -+** sub (x[0-9]+), x0, #?16 -+** ld1row z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_s32_m4, svint32_t, int32_t, -+ z0 = svld1ro_s32 (p0, x0 - 4), -+ z0 = svld1ro (p0, x0 - 4)) -+ -+/* -+** ld1ro_s32_m72: -+** sub (x[0-9]+), x0, #?288 -+** ld1row z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_s32_m72, svint32_t, int32_t, -+ z0 = svld1ro_s32 (p0, x0 - 72), -+ z0 = svld1ro (p0, x0 - 72)) -+ -+/* -+** ld1ro_s32_8: -+** ld1row z0\.s, p0/z, \[x0, #?32\] -+** ret -+*/ -+TEST_LOAD (ld1ro_s32_8, svint32_t, int32_t, -+ z0 = svld1ro_s32 (p0, x0 + 8), -+ z0 = svld1ro (p0, x0 + 8)) -+ -+/* -+** ld1ro_s32_56: -+** ld1row z0\.s, p0/z, \[x0, #?224\] -+** ret -+*/ -+TEST_LOAD (ld1ro_s32_56, svint32_t, int32_t, -+ z0 = svld1ro_s32 (p0, x0 + 56), -+ z0 = svld1ro (p0, x0 + 56)) -+ -+/* -+** ld1ro_s32_m8: -+** ld1row z0\.s, p0/z, \[x0, #?-32\] -+** ret -+*/ -+TEST_LOAD (ld1ro_s32_m8, svint32_t, int32_t, -+ z0 = svld1ro_s32 (p0, x0 - 8), -+ z0 = svld1ro (p0, x0 - 8)) -+ -+/* -+** ld1ro_s32_m64: -+** ld1row z0\.s, p0/z, \[x0, #?-256\] -+** ret -+*/ -+TEST_LOAD (ld1ro_s32_m64, svint32_t, int32_t, -+ z0 = svld1ro_s32 (p0, x0 - 64), -+ z0 = svld1ro (p0, x0 - 64)) -+ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s64.c -new file mode 100644 -index 000000000..3aa9a15ee ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s64.c -@@ -0,0 +1,120 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+/* { dg-additional-options "-march=armv8.6-a+f64mm" } */ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1ro_s64_base: -+** ld1rod z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1ro_s64_base, svint64_t, int64_t, -+ z0 = svld1ro_s64 (p0, x0), -+ z0 = svld1ro (p0, x0)) -+ -+/* -+** ld1ro_s64_index: -+** ld1rod z0\.d, p0/z, \[x0, x1, lsl 3\] -+** ret -+*/ -+TEST_LOAD (ld1ro_s64_index, svint64_t, int64_t, -+ z0 = svld1ro_s64 (p0, x0 + x1), -+ z0 = svld1ro (p0, x0 + x1)) -+ -+/* -+** ld1ro_s64_1: -+** add (x[0-9]+), x0, #?8 -+** ld1rod z0\.d, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_s64_1, svint64_t, int64_t, -+ z0 = svld1ro_s64 (p0, x0 + 1), -+ z0 = svld1ro (p0, x0 + 1)) -+ -+/* -+** ld1ro_s64_2: -+** add (x[0-9]+), x0, #?16 -+** ld1rod z0\.d, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_s64_2, svint64_t, int64_t, -+ z0 = svld1ro_s64 (p0, x0 + 2), -+ z0 = svld1ro (p0, x0 + 2)) -+ -+/* -+** ld1ro_s64_32: -+** add (x[0-9]+), x0, #?256 -+** ld1rod z0\.d, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_s64_32, svint64_t, int64_t, -+ z0 = svld1ro_s64 (p0, x0 + 32), -+ z0 = svld1ro (p0, x0 + 32)) -+ -+/* -+** ld1ro_s64_m1: -+** sub (x[0-9]+), x0, #?8 -+** ld1rod z0\.d, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_s64_m1, svint64_t, int64_t, -+ z0 = svld1ro_s64 (p0, x0 - 1), -+ z0 = svld1ro (p0, x0 - 1)) -+ -+/* -+** ld1ro_s64_m2: -+** sub (x[0-9]+), x0, #?16 -+** ld1rod z0\.d, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_s64_m2, svint64_t, int64_t, -+ z0 = svld1ro_s64 (p0, x0 - 2), -+ z0 = svld1ro (p0, x0 - 2)) -+ -+/* -+** ld1ro_s64_m36: -+** sub (x[0-9]+), x0, #?288 -+** ld1rod z0\.d, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_s64_m36, svint64_t, int64_t, -+ z0 = svld1ro_s64 (p0, x0 - 36), -+ z0 = svld1ro (p0, x0 - 36)) -+ -+/* -+** ld1ro_s64_4: -+** ld1rod z0\.d, p0/z, \[x0, #?32\] -+** ret -+*/ -+TEST_LOAD (ld1ro_s64_4, svint64_t, int64_t, -+ z0 = svld1ro_s64 (p0, x0 + 4), -+ z0 = svld1ro (p0, x0 + 4)) -+ -+/* -+** ld1ro_s64_28: -+** ld1rod z0\.d, p0/z, \[x0, #?224\] -+** ret -+*/ -+TEST_LOAD (ld1ro_s64_28, svint64_t, int64_t, -+ z0 = svld1ro_s64 (p0, x0 + 28), -+ z0 = svld1ro (p0, x0 + 28)) -+ -+/* -+** ld1ro_s64_m4: -+** ld1rod z0\.d, p0/z, \[x0, #?-32\] -+** ret -+*/ -+TEST_LOAD (ld1ro_s64_m4, svint64_t, int64_t, -+ z0 = svld1ro_s64 (p0, x0 - 4), -+ z0 = svld1ro (p0, x0 - 4)) -+ -+/* -+** ld1ro_s64_m32: -+** ld1rod z0\.d, p0/z, \[x0, #?-256\] -+** ret -+*/ -+TEST_LOAD (ld1ro_s64_m32, svint64_t, int64_t, -+ z0 = svld1ro_s64 (p0, x0 - 32), -+ z0 = svld1ro (p0, x0 - 32)) -+ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s8.c -new file mode 100644 -index 000000000..49aff5146 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_s8.c -@@ -0,0 +1,120 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+/* { dg-additional-options "-march=armv8.6-a+f64mm" } */ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1ro_s8_base: -+** ld1rob z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1ro_s8_base, svint8_t, int8_t, -+ z0 = svld1ro_s8 (p0, x0), -+ z0 = svld1ro (p0, x0)) -+ -+/* -+** ld1ro_s8_index: -+** ld1rob z0\.b, p0/z, \[x0, x1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_s8_index, svint8_t, int8_t, -+ z0 = svld1ro_s8 (p0, x0 + x1), -+ z0 = svld1ro (p0, x0 + x1)) -+ -+/* -+** ld1ro_s8_1: -+** add (x[0-9]+), x0, #?1 -+** ld1rob z0\.b, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_s8_1, svint8_t, int8_t, -+ z0 = svld1ro_s8 (p0, x0 + 1), -+ z0 = svld1ro (p0, x0 + 1)) -+ -+/* -+** ld1ro_s8_16: -+** add (x[0-9]+), x0, #?16 -+** ld1rob z0\.b, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_s8_16, svint8_t, int8_t, -+ z0 = svld1ro_s8 (p0, x0 + 16), -+ z0 = svld1ro (p0, x0 + 16)) -+ -+/* -+** ld1ro_s8_256: -+** add (x[0-9]+), x0, #?256 -+** ld1rob z0\.b, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_s8_256, svint8_t, int8_t, -+ z0 = svld1ro_s8 (p0, x0 + 256), -+ z0 = svld1ro (p0, x0 + 256)) -+ -+/* -+** ld1ro_s8_m1: -+** sub (x[0-9]+), x0, #?1 -+** ld1rob z0\.b, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_s8_m1, svint8_t, int8_t, -+ z0 = svld1ro_s8 (p0, x0 - 1), -+ z0 = svld1ro (p0, x0 - 1)) -+ -+/* -+** ld1ro_s8_m16: -+** sub (x[0-9]+), x0, #?16 -+** ld1rob z0\.b, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_s8_m16, svint8_t, int8_t, -+ z0 = svld1ro_s8 (p0, x0 - 16), -+ z0 = svld1ro (p0, x0 - 16)) -+ -+/* -+** ld1ro_s8_m288: -+** sub (x[0-9]+), x0, #?288 -+** ld1rob z0\.b, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_s8_m288, svint8_t, int8_t, -+ z0 = svld1ro_s8 (p0, x0 - 288), -+ z0 = svld1ro (p0, x0 - 288)) -+ -+/* -+** ld1ro_s8_32: -+** ld1rob z0\.b, p0/z, \[x0, #?32\] -+** ret -+*/ -+TEST_LOAD (ld1ro_s8_32, svint8_t, int8_t, -+ z0 = svld1ro_s8 (p0, x0 + 32), -+ z0 = svld1ro (p0, x0 + 32)) -+ -+/* -+** ld1ro_s8_224: -+** ld1rob z0\.b, p0/z, \[x0, #?224\] -+** ret -+*/ -+TEST_LOAD (ld1ro_s8_224, svint8_t, int8_t, -+ z0 = svld1ro_s8 (p0, x0 + 224), -+ z0 = svld1ro (p0, x0 + 224)) -+ -+/* -+** ld1ro_s8_m32: -+** ld1rob z0\.b, p0/z, \[x0, #?-32\] -+** ret -+*/ -+TEST_LOAD (ld1ro_s8_m32, svint8_t, int8_t, -+ z0 = svld1ro_s8 (p0, x0 - 32), -+ z0 = svld1ro (p0, x0 - 32)) -+ -+/* -+** ld1ro_s8_m256: -+** ld1rob z0\.b, p0/z, \[x0, #?-256\] -+** ret -+*/ -+TEST_LOAD (ld1ro_s8_m256, svint8_t, int8_t, -+ z0 = svld1ro_s8 (p0, x0 - 256), -+ z0 = svld1ro (p0, x0 - 256)) -+ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u16.c -new file mode 100644 -index 000000000..00bf9e129 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u16.c -@@ -0,0 +1,120 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+/* { dg-additional-options "-march=armv8.6-a+f64mm" } */ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1ro_u16_base: -+** ld1roh z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1ro_u16_base, svuint16_t, uint16_t, -+ z0 = svld1ro_u16 (p0, x0), -+ z0 = svld1ro (p0, x0)) -+ -+/* -+** ld1ro_u16_index: -+** ld1roh z0\.h, p0/z, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_u16_index, svuint16_t, uint16_t, -+ z0 = svld1ro_u16 (p0, x0 + x1), -+ z0 = svld1ro (p0, x0 + x1)) -+ -+/* -+** ld1ro_u16_1: -+** add (x[0-9]+), x0, #?2 -+** ld1roh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_u16_1, svuint16_t, uint16_t, -+ z0 = svld1ro_u16 (p0, x0 + 1), -+ z0 = svld1ro (p0, x0 + 1)) -+ -+/* -+** ld1ro_u16_8: -+** add (x[0-9]+), x0, #?16 -+** ld1roh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_u16_8, svuint16_t, uint16_t, -+ z0 = svld1ro_u16 (p0, x0 + 8), -+ z0 = svld1ro (p0, x0 + 8)) -+ -+/* -+** ld1ro_u16_128: -+** add (x[0-9]+), x0, #?256 -+** ld1roh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_u16_128, svuint16_t, uint16_t, -+ z0 = svld1ro_u16 (p0, x0 + 128), -+ z0 = svld1ro (p0, x0 + 128)) -+ -+/* -+** ld1ro_u16_m1: -+** sub (x[0-9]+), x0, #?2 -+** ld1roh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_u16_m1, svuint16_t, uint16_t, -+ z0 = svld1ro_u16 (p0, x0 - 1), -+ z0 = svld1ro (p0, x0 - 1)) -+ -+/* -+** ld1ro_u16_m8: -+** sub (x[0-9]+), x0, #?16 -+** ld1roh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_u16_m8, svuint16_t, uint16_t, -+ z0 = svld1ro_u16 (p0, x0 - 8), -+ z0 = svld1ro (p0, x0 - 8)) -+ -+/* -+** ld1ro_u16_m144: -+** sub (x[0-9]+), x0, #?288 -+** ld1roh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_u16_m144, svuint16_t, uint16_t, -+ z0 = svld1ro_u16 (p0, x0 - 144), -+ z0 = svld1ro (p0, x0 - 144)) -+ -+/* -+** ld1ro_u16_16: -+** ld1roh z0\.h, p0/z, \[x0, #?32\] -+** ret -+*/ -+TEST_LOAD (ld1ro_u16_16, svuint16_t, uint16_t, -+ z0 = svld1ro_u16 (p0, x0 + 16), -+ z0 = svld1ro (p0, x0 + 16)) -+ -+/* -+** ld1ro_u16_112: -+** ld1roh z0\.h, p0/z, \[x0, #?224\] -+** ret -+*/ -+TEST_LOAD (ld1ro_u16_112, svuint16_t, uint16_t, -+ z0 = svld1ro_u16 (p0, x0 + 112), -+ z0 = svld1ro (p0, x0 + 112)) -+ -+/* -+** ld1ro_u16_m16: -+** ld1roh z0\.h, p0/z, \[x0, #?-32\] -+** ret -+*/ -+TEST_LOAD (ld1ro_u16_m16, svuint16_t, uint16_t, -+ z0 = svld1ro_u16 (p0, x0 - 16), -+ z0 = svld1ro (p0, x0 - 16)) -+ -+/* -+** ld1ro_u16_m128: -+** ld1roh z0\.h, p0/z, \[x0, #?-256\] -+** ret -+*/ -+TEST_LOAD (ld1ro_u16_m128, svuint16_t, uint16_t, -+ z0 = svld1ro_u16 (p0, x0 - 128), -+ z0 = svld1ro (p0, x0 - 128)) -+ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u32.c -new file mode 100644 -index 000000000..9e9b3290a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u32.c -@@ -0,0 +1,120 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+/* { dg-additional-options "-march=armv8.6-a+f64mm" } */ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1ro_u32_base: -+** ld1row z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1ro_u32_base, svuint32_t, uint32_t, -+ z0 = svld1ro_u32 (p0, x0), -+ z0 = svld1ro (p0, x0)) -+ -+/* -+** ld1ro_u32_index: -+** ld1row z0\.s, p0/z, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_LOAD (ld1ro_u32_index, svuint32_t, uint32_t, -+ z0 = svld1ro_u32 (p0, x0 + x1), -+ z0 = svld1ro (p0, x0 + x1)) -+ -+/* -+** ld1ro_u32_1: -+** add (x[0-9]+), x0, #?4 -+** ld1row z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_u32_1, svuint32_t, uint32_t, -+ z0 = svld1ro_u32 (p0, x0 + 1), -+ z0 = svld1ro (p0, x0 + 1)) -+ -+/* -+** ld1ro_u32_4: -+** add (x[0-9]+), x0, #?16 -+** ld1row z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_u32_4, svuint32_t, uint32_t, -+ z0 = svld1ro_u32 (p0, x0 + 4), -+ z0 = svld1ro (p0, x0 + 4)) -+ -+/* -+** ld1ro_u32_64: -+** add (x[0-9]+), x0, #?256 -+** ld1row z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_u32_64, svuint32_t, uint32_t, -+ z0 = svld1ro_u32 (p0, x0 + 64), -+ z0 = svld1ro (p0, x0 + 64)) -+ -+/* -+** ld1ro_u32_m1: -+** sub (x[0-9]+), x0, #?4 -+** ld1row z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_u32_m1, svuint32_t, uint32_t, -+ z0 = svld1ro_u32 (p0, x0 - 1), -+ z0 = svld1ro (p0, x0 - 1)) -+ -+/* -+** ld1ro_u32_m4: -+** sub (x[0-9]+), x0, #?16 -+** ld1row z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_u32_m4, svuint32_t, uint32_t, -+ z0 = svld1ro_u32 (p0, x0 - 4), -+ z0 = svld1ro (p0, x0 - 4)) -+ -+/* -+** ld1ro_u32_m72: -+** sub (x[0-9]+), x0, #?288 -+** ld1row z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_u32_m72, svuint32_t, uint32_t, -+ z0 = svld1ro_u32 (p0, x0 - 72), -+ z0 = svld1ro (p0, x0 - 72)) -+ -+/* -+** ld1ro_u32_8: -+** ld1row z0\.s, p0/z, \[x0, #?32\] -+** ret -+*/ -+TEST_LOAD (ld1ro_u32_8, svuint32_t, uint32_t, -+ z0 = svld1ro_u32 (p0, x0 + 8), -+ z0 = svld1ro (p0, x0 + 8)) -+ -+/* -+** ld1ro_u32_56: -+** ld1row z0\.s, p0/z, \[x0, #?224\] -+** ret -+*/ -+TEST_LOAD (ld1ro_u32_56, svuint32_t, uint32_t, -+ z0 = svld1ro_u32 (p0, x0 + 56), -+ z0 = svld1ro (p0, x0 + 56)) -+ -+/* -+** ld1ro_u32_m8: -+** ld1row z0\.s, p0/z, \[x0, #?-32\] -+** ret -+*/ -+TEST_LOAD (ld1ro_u32_m8, svuint32_t, uint32_t, -+ z0 = svld1ro_u32 (p0, x0 - 8), -+ z0 = svld1ro (p0, x0 - 8)) -+ -+/* -+** ld1ro_u32_m64: -+** ld1row z0\.s, p0/z, \[x0, #?-256\] -+** ret -+*/ -+TEST_LOAD (ld1ro_u32_m64, svuint32_t, uint32_t, -+ z0 = svld1ro_u32 (p0, x0 - 64), -+ z0 = svld1ro (p0, x0 - 64)) -+ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u64.c -new file mode 100644 -index 000000000..64ec62871 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u64.c -@@ -0,0 +1,120 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+/* { dg-additional-options "-march=armv8.6-a+f64mm" } */ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1ro_u64_base: -+** ld1rod z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1ro_u64_base, svuint64_t, uint64_t, -+ z0 = svld1ro_u64 (p0, x0), -+ z0 = svld1ro (p0, x0)) -+ -+/* -+** ld1ro_u64_index: -+** ld1rod z0\.d, p0/z, \[x0, x1, lsl 3\] -+** ret -+*/ -+TEST_LOAD (ld1ro_u64_index, svuint64_t, uint64_t, -+ z0 = svld1ro_u64 (p0, x0 + x1), -+ z0 = svld1ro (p0, x0 + x1)) -+ -+/* -+** ld1ro_u64_1: -+** add (x[0-9]+), x0, #?8 -+** ld1rod z0\.d, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_u64_1, svuint64_t, uint64_t, -+ z0 = svld1ro_u64 (p0, x0 + 1), -+ z0 = svld1ro (p0, x0 + 1)) -+ -+/* -+** ld1ro_u64_2: -+** add (x[0-9]+), x0, #?16 -+** ld1rod z0\.d, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_u64_2, svuint64_t, uint64_t, -+ z0 = svld1ro_u64 (p0, x0 + 2), -+ z0 = svld1ro (p0, x0 + 2)) -+ -+/* -+** ld1ro_u64_32: -+** add (x[0-9]+), x0, #?256 -+** ld1rod z0\.d, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_u64_32, svuint64_t, uint64_t, -+ z0 = svld1ro_u64 (p0, x0 + 32), -+ z0 = svld1ro (p0, x0 + 32)) -+ -+/* -+** ld1ro_u64_m1: -+** sub (x[0-9]+), x0, #?8 -+** ld1rod z0\.d, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_u64_m1, svuint64_t, uint64_t, -+ z0 = svld1ro_u64 (p0, x0 - 1), -+ z0 = svld1ro (p0, x0 - 1)) -+ -+/* -+** ld1ro_u64_m2: -+** sub (x[0-9]+), x0, #?16 -+** ld1rod z0\.d, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_u64_m2, svuint64_t, uint64_t, -+ z0 = svld1ro_u64 (p0, x0 - 2), -+ z0 = svld1ro (p0, x0 - 2)) -+ -+/* -+** ld1ro_u64_m36: -+** sub (x[0-9]+), x0, #?288 -+** ld1rod z0\.d, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_u64_m36, svuint64_t, uint64_t, -+ z0 = svld1ro_u64 (p0, x0 - 36), -+ z0 = svld1ro (p0, x0 - 36)) -+ -+/* -+** ld1ro_u64_4: -+** ld1rod z0\.d, p0/z, \[x0, #?32\] -+** ret -+*/ -+TEST_LOAD (ld1ro_u64_4, svuint64_t, uint64_t, -+ z0 = svld1ro_u64 (p0, x0 + 4), -+ z0 = svld1ro (p0, x0 + 4)) -+ -+/* -+** ld1ro_u64_28: -+** ld1rod z0\.d, p0/z, \[x0, #?224\] -+** ret -+*/ -+TEST_LOAD (ld1ro_u64_28, svuint64_t, uint64_t, -+ z0 = svld1ro_u64 (p0, x0 + 28), -+ z0 = svld1ro (p0, x0 + 28)) -+ -+/* -+** ld1ro_u64_m4: -+** ld1rod z0\.d, p0/z, \[x0, #?-32\] -+** ret -+*/ -+TEST_LOAD (ld1ro_u64_m4, svuint64_t, uint64_t, -+ z0 = svld1ro_u64 (p0, x0 - 4), -+ z0 = svld1ro (p0, x0 - 4)) -+ -+/* -+** ld1ro_u64_m32: -+** ld1rod z0\.d, p0/z, \[x0, #?-256\] -+** ret -+*/ -+TEST_LOAD (ld1ro_u64_m32, svuint64_t, uint64_t, -+ z0 = svld1ro_u64 (p0, x0 - 32), -+ z0 = svld1ro (p0, x0 - 32)) -+ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u8.c -new file mode 100644 -index 000000000..22701320b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ro_u8.c -@@ -0,0 +1,120 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+/* { dg-additional-options "-march=armv8.6-a+f64mm" } */ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1ro_u8_base: -+** ld1rob z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1ro_u8_base, svuint8_t, uint8_t, -+ z0 = svld1ro_u8 (p0, x0), -+ z0 = svld1ro (p0, x0)) -+ -+/* -+** ld1ro_u8_index: -+** ld1rob z0\.b, p0/z, \[x0, x1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_u8_index, svuint8_t, uint8_t, -+ z0 = svld1ro_u8 (p0, x0 + x1), -+ z0 = svld1ro (p0, x0 + x1)) -+ -+/* -+** ld1ro_u8_1: -+** add (x[0-9]+), x0, #?1 -+** ld1rob z0\.b, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_u8_1, svuint8_t, uint8_t, -+ z0 = svld1ro_u8 (p0, x0 + 1), -+ z0 = svld1ro (p0, x0 + 1)) -+ -+/* -+** ld1ro_u8_16: -+** add (x[0-9]+), x0, #?16 -+** ld1rob z0\.b, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_u8_16, svuint8_t, uint8_t, -+ z0 = svld1ro_u8 (p0, x0 + 16), -+ z0 = svld1ro (p0, x0 + 16)) -+ -+/* -+** ld1ro_u8_256: -+** add (x[0-9]+), x0, #?256 -+** ld1rob z0\.b, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_u8_256, svuint8_t, uint8_t, -+ z0 = svld1ro_u8 (p0, x0 + 256), -+ z0 = svld1ro (p0, x0 + 256)) -+ -+/* -+** ld1ro_u8_m1: -+** sub (x[0-9]+), x0, #?1 -+** ld1rob z0\.b, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_u8_m1, svuint8_t, uint8_t, -+ z0 = svld1ro_u8 (p0, x0 - 1), -+ z0 = svld1ro (p0, x0 - 1)) -+ -+/* -+** ld1ro_u8_m16: -+** sub (x[0-9]+), x0, #?16 -+** ld1rob z0\.b, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_u8_m16, svuint8_t, uint8_t, -+ z0 = svld1ro_u8 (p0, x0 - 16), -+ z0 = svld1ro (p0, x0 - 16)) -+ -+/* -+** ld1ro_u8_m288: -+** sub (x[0-9]+), x0, #?288 -+** ld1rob z0\.b, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1ro_u8_m288, svuint8_t, uint8_t, -+ z0 = svld1ro_u8 (p0, x0 - 288), -+ z0 = svld1ro (p0, x0 - 288)) -+ -+/* -+** ld1ro_u8_32: -+** ld1rob z0\.b, p0/z, \[x0, #?32\] -+** ret -+*/ -+TEST_LOAD (ld1ro_u8_32, svuint8_t, uint8_t, -+ z0 = svld1ro_u8 (p0, x0 + 32), -+ z0 = svld1ro (p0, x0 + 32)) -+ -+/* -+** ld1ro_u8_224: -+** ld1rob z0\.b, p0/z, \[x0, #?224\] -+** ret -+*/ -+TEST_LOAD (ld1ro_u8_224, svuint8_t, uint8_t, -+ z0 = svld1ro_u8 (p0, x0 + 224), -+ z0 = svld1ro (p0, x0 + 224)) -+ -+/* -+** ld1ro_u8_m32: -+** ld1rob z0\.b, p0/z, \[x0, #?-32\] -+** ret -+*/ -+TEST_LOAD (ld1ro_u8_m32, svuint8_t, uint8_t, -+ z0 = svld1ro_u8 (p0, x0 - 32), -+ z0 = svld1ro (p0, x0 - 32)) -+ -+/* -+** ld1ro_u8_m256: -+** ld1rob z0\.b, p0/z, \[x0, #?-256\] -+** ret -+*/ -+TEST_LOAD (ld1ro_u8_m256, svuint8_t, uint8_t, -+ z0 = svld1ro_u8 (p0, x0 - 256), -+ z0 = svld1ro (p0, x0 - 256)) -+ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_bf16.c -new file mode 100644 -index 000000000..54c69a1db ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_bf16.c -@@ -0,0 +1,137 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1rq_bf16_base: -+** ld1rqh z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1rq_bf16_base, svbfloat16_t, bfloat16_t, -+ z0 = svld1rq_bf16 (p0, x0), -+ z0 = svld1rq (p0, x0)) -+ -+/* -+** ld1rq_bf16_index: -+** ld1rqh z0\.h, p0/z, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_bf16_index, svbfloat16_t, bfloat16_t, -+ z0 = svld1rq_bf16 (p0, x0 + x1), -+ z0 = svld1rq (p0, x0 + x1)) -+ -+/* -+** ld1rq_bf16_1: -+** add (x[0-9]+), x0, #?2 -+** ld1rqh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_bf16_1, svbfloat16_t, bfloat16_t, -+ z0 = svld1rq_bf16 (p0, x0 + 1), -+ z0 = svld1rq (p0, x0 + 1)) -+ -+/* -+** ld1rq_bf16_4: -+** add (x[0-9]+), x0, #?8 -+** ld1rqh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_bf16_4, svbfloat16_t, bfloat16_t, -+ z0 = svld1rq_bf16 (p0, x0 + 4), -+ z0 = svld1rq (p0, x0 + 4)) -+ -+/* -+** ld1rq_bf16_7: -+** add (x[0-9]+), x0, #?14 -+** ld1rqh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_bf16_7, svbfloat16_t, bfloat16_t, -+ z0 = svld1rq_bf16 (p0, x0 + 7), -+ z0 = svld1rq (p0, x0 + 7)) -+ -+/* -+** ld1rq_bf16_8: -+** ld1rqh z0\.h, p0/z, \[x0, #?16\] -+** ret -+*/ -+TEST_LOAD (ld1rq_bf16_8, svbfloat16_t, bfloat16_t, -+ z0 = svld1rq_bf16 (p0, x0 + 8), -+ z0 = svld1rq (p0, x0 + 8)) -+ -+/* -+** ld1rq_bf16_56: -+** ld1rqh z0\.h, p0/z, \[x0, #?112\] -+** ret -+*/ -+TEST_LOAD (ld1rq_bf16_56, svbfloat16_t, bfloat16_t, -+ z0 = svld1rq_bf16 (p0, x0 + 56), -+ z0 = svld1rq (p0, x0 + 56)) -+ -+/* -+** ld1rq_bf16_64: -+** add (x[0-9]+), x0, #?128 -+** ld1rqh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_bf16_64, svbfloat16_t, bfloat16_t, -+ z0 = svld1rq_bf16 (p0, x0 + 64), -+ z0 = svld1rq (p0, x0 + 64)) -+ -+/* -+** ld1rq_bf16_m1: -+** sub (x[0-9]+), x0, #?2 -+** ld1rqh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_bf16_m1, svbfloat16_t, bfloat16_t, -+ z0 = svld1rq_bf16 (p0, x0 - 1), -+ z0 = svld1rq (p0, x0 - 1)) -+ -+/* -+** ld1rq_bf16_m4: -+** sub (x[0-9]+), x0, #?8 -+** ld1rqh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_bf16_m4, svbfloat16_t, bfloat16_t, -+ z0 = svld1rq_bf16 (p0, x0 - 4), -+ z0 = svld1rq (p0, x0 - 4)) -+ -+/* -+** ld1rq_bf16_m7: -+** sub (x[0-9]+), x0, #?14 -+** ld1rqh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_bf16_m7, svbfloat16_t, bfloat16_t, -+ z0 = svld1rq_bf16 (p0, x0 - 7), -+ z0 = svld1rq (p0, x0 - 7)) -+ -+/* -+** ld1rq_bf16_m8: -+** ld1rqh z0\.h, p0/z, \[x0, #?-16\] -+** ret -+*/ -+TEST_LOAD (ld1rq_bf16_m8, svbfloat16_t, bfloat16_t, -+ z0 = svld1rq_bf16 (p0, x0 - 8), -+ z0 = svld1rq (p0, x0 - 8)) -+ -+/* -+** ld1rq_bf16_m64: -+** ld1rqh z0\.h, p0/z, \[x0, #?-128\] -+** ret -+*/ -+TEST_LOAD (ld1rq_bf16_m64, svbfloat16_t, bfloat16_t, -+ z0 = svld1rq_bf16 (p0, x0 - 64), -+ z0 = svld1rq (p0, x0 - 64)) -+ -+/* -+** ld1rq_bf16_m72: -+** sub (x[0-9]+), x0, #?144 -+** ld1rqh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_bf16_m72, svbfloat16_t, bfloat16_t, -+ z0 = svld1rq_bf16 (p0, x0 - 72), -+ z0 = svld1rq (p0, x0 - 72)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_f16.c -new file mode 100644 -index 000000000..7536236f7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_f16.c -@@ -0,0 +1,137 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1rq_f16_base: -+** ld1rqh z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1rq_f16_base, svfloat16_t, float16_t, -+ z0 = svld1rq_f16 (p0, x0), -+ z0 = svld1rq (p0, x0)) -+ -+/* -+** ld1rq_f16_index: -+** ld1rqh z0\.h, p0/z, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_f16_index, svfloat16_t, float16_t, -+ z0 = svld1rq_f16 (p0, x0 + x1), -+ z0 = svld1rq (p0, x0 + x1)) -+ -+/* -+** ld1rq_f16_1: -+** add (x[0-9]+), x0, #?2 -+** ld1rqh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_f16_1, svfloat16_t, float16_t, -+ z0 = svld1rq_f16 (p0, x0 + 1), -+ z0 = svld1rq (p0, x0 + 1)) -+ -+/* -+** ld1rq_f16_4: -+** add (x[0-9]+), x0, #?8 -+** ld1rqh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_f16_4, svfloat16_t, float16_t, -+ z0 = svld1rq_f16 (p0, x0 + 4), -+ z0 = svld1rq (p0, x0 + 4)) -+ -+/* -+** ld1rq_f16_7: -+** add (x[0-9]+), x0, #?14 -+** ld1rqh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_f16_7, svfloat16_t, float16_t, -+ z0 = svld1rq_f16 (p0, x0 + 7), -+ z0 = svld1rq (p0, x0 + 7)) -+ -+/* -+** ld1rq_f16_8: -+** ld1rqh z0\.h, p0/z, \[x0, #?16\] -+** ret -+*/ -+TEST_LOAD (ld1rq_f16_8, svfloat16_t, float16_t, -+ z0 = svld1rq_f16 (p0, x0 + 8), -+ z0 = svld1rq (p0, x0 + 8)) -+ -+/* -+** ld1rq_f16_56: -+** ld1rqh z0\.h, p0/z, \[x0, #?112\] -+** ret -+*/ -+TEST_LOAD (ld1rq_f16_56, svfloat16_t, float16_t, -+ z0 = svld1rq_f16 (p0, x0 + 56), -+ z0 = svld1rq (p0, x0 + 56)) -+ -+/* -+** ld1rq_f16_64: -+** add (x[0-9]+), x0, #?128 -+** ld1rqh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_f16_64, svfloat16_t, float16_t, -+ z0 = svld1rq_f16 (p0, x0 + 64), -+ z0 = svld1rq (p0, x0 + 64)) -+ -+/* -+** ld1rq_f16_m1: -+** sub (x[0-9]+), x0, #?2 -+** ld1rqh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_f16_m1, svfloat16_t, float16_t, -+ z0 = svld1rq_f16 (p0, x0 - 1), -+ z0 = svld1rq (p0, x0 - 1)) -+ -+/* -+** ld1rq_f16_m4: -+** sub (x[0-9]+), x0, #?8 -+** ld1rqh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_f16_m4, svfloat16_t, float16_t, -+ z0 = svld1rq_f16 (p0, x0 - 4), -+ z0 = svld1rq (p0, x0 - 4)) -+ -+/* -+** ld1rq_f16_m7: -+** sub (x[0-9]+), x0, #?14 -+** ld1rqh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_f16_m7, svfloat16_t, float16_t, -+ z0 = svld1rq_f16 (p0, x0 - 7), -+ z0 = svld1rq (p0, x0 - 7)) -+ -+/* -+** ld1rq_f16_m8: -+** ld1rqh z0\.h, p0/z, \[x0, #?-16\] -+** ret -+*/ -+TEST_LOAD (ld1rq_f16_m8, svfloat16_t, float16_t, -+ z0 = svld1rq_f16 (p0, x0 - 8), -+ z0 = svld1rq (p0, x0 - 8)) -+ -+/* -+** ld1rq_f16_m64: -+** ld1rqh z0\.h, p0/z, \[x0, #?-128\] -+** ret -+*/ -+TEST_LOAD (ld1rq_f16_m64, svfloat16_t, float16_t, -+ z0 = svld1rq_f16 (p0, x0 - 64), -+ z0 = svld1rq (p0, x0 - 64)) -+ -+/* -+** ld1rq_f16_m72: -+** sub (x[0-9]+), x0, #?144 -+** ld1rqh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_f16_m72, svfloat16_t, float16_t, -+ z0 = svld1rq_f16 (p0, x0 - 72), -+ z0 = svld1rq (p0, x0 - 72)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_f32.c -new file mode 100644 -index 000000000..9be2b7412 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_f32.c -@@ -0,0 +1,137 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1rq_f32_base: -+** ld1rqw z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1rq_f32_base, svfloat32_t, float32_t, -+ z0 = svld1rq_f32 (p0, x0), -+ z0 = svld1rq (p0, x0)) -+ -+/* -+** ld1rq_f32_index: -+** ld1rqw z0\.s, p0/z, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_LOAD (ld1rq_f32_index, svfloat32_t, float32_t, -+ z0 = svld1rq_f32 (p0, x0 + x1), -+ z0 = svld1rq (p0, x0 + x1)) -+ -+/* -+** ld1rq_f32_1: -+** add (x[0-9]+), x0, #?4 -+** ld1rqw z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_f32_1, svfloat32_t, float32_t, -+ z0 = svld1rq_f32 (p0, x0 + 1), -+ z0 = svld1rq (p0, x0 + 1)) -+ -+/* -+** ld1rq_f32_2: -+** add (x[0-9]+), x0, #?8 -+** ld1rqw z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_f32_2, svfloat32_t, float32_t, -+ z0 = svld1rq_f32 (p0, x0 + 2), -+ z0 = svld1rq (p0, x0 + 2)) -+ -+/* -+** ld1rq_f32_3: -+** add (x[0-9]+), x0, #?12 -+** ld1rqw z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_f32_3, svfloat32_t, float32_t, -+ z0 = svld1rq_f32 (p0, x0 + 3), -+ z0 = svld1rq (p0, x0 + 3)) -+ -+/* -+** ld1rq_f32_4: -+** ld1rqw z0\.s, p0/z, \[x0, #?16\] -+** ret -+*/ -+TEST_LOAD (ld1rq_f32_4, svfloat32_t, float32_t, -+ z0 = svld1rq_f32 (p0, x0 + 4), -+ z0 = svld1rq (p0, x0 + 4)) -+ -+/* -+** ld1rq_f32_28: -+** ld1rqw z0\.s, p0/z, \[x0, #?112\] -+** ret -+*/ -+TEST_LOAD (ld1rq_f32_28, svfloat32_t, float32_t, -+ z0 = svld1rq_f32 (p0, x0 + 28), -+ z0 = svld1rq (p0, x0 + 28)) -+ -+/* -+** ld1rq_f32_32: -+** add (x[0-9]+), x0, #?128 -+** ld1rqw z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_f32_32, svfloat32_t, float32_t, -+ z0 = svld1rq_f32 (p0, x0 + 32), -+ z0 = svld1rq (p0, x0 + 32)) -+ -+/* -+** ld1rq_f32_m1: -+** sub (x[0-9]+), x0, #?4 -+** ld1rqw z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_f32_m1, svfloat32_t, float32_t, -+ z0 = svld1rq_f32 (p0, x0 - 1), -+ z0 = svld1rq (p0, x0 - 1)) -+ -+/* -+** ld1rq_f32_m2: -+** sub (x[0-9]+), x0, #?8 -+** ld1rqw z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_f32_m2, svfloat32_t, float32_t, -+ z0 = svld1rq_f32 (p0, x0 - 2), -+ z0 = svld1rq (p0, x0 - 2)) -+ -+/* -+** ld1rq_f32_m3: -+** sub (x[0-9]+), x0, #?12 -+** ld1rqw z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_f32_m3, svfloat32_t, float32_t, -+ z0 = svld1rq_f32 (p0, x0 - 3), -+ z0 = svld1rq (p0, x0 - 3)) -+ -+/* -+** ld1rq_f32_m4: -+** ld1rqw z0\.s, p0/z, \[x0, #?-16\] -+** ret -+*/ -+TEST_LOAD (ld1rq_f32_m4, svfloat32_t, float32_t, -+ z0 = svld1rq_f32 (p0, x0 - 4), -+ z0 = svld1rq (p0, x0 - 4)) -+ -+/* -+** ld1rq_f32_m32: -+** ld1rqw z0\.s, p0/z, \[x0, #?-128\] -+** ret -+*/ -+TEST_LOAD (ld1rq_f32_m32, svfloat32_t, float32_t, -+ z0 = svld1rq_f32 (p0, x0 - 32), -+ z0 = svld1rq (p0, x0 - 32)) -+ -+/* -+** ld1rq_f32_m36: -+** sub (x[0-9]+), x0, #?144 -+** ld1rqw z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_f32_m36, svfloat32_t, float32_t, -+ z0 = svld1rq_f32 (p0, x0 - 36), -+ z0 = svld1rq (p0, x0 - 36)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_f64.c -new file mode 100644 -index 000000000..32105af17 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_f64.c -@@ -0,0 +1,97 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1rq_f64_base: -+** ld1rqd z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1rq_f64_base, svfloat64_t, float64_t, -+ z0 = svld1rq_f64 (p0, x0), -+ z0 = svld1rq (p0, x0)) -+ -+/* -+** ld1rq_f64_index: -+** ld1rqd z0\.d, p0/z, \[x0, x1, lsl 3\] -+** ret -+*/ -+TEST_LOAD (ld1rq_f64_index, svfloat64_t, float64_t, -+ z0 = svld1rq_f64 (p0, x0 + x1), -+ z0 = svld1rq (p0, x0 + x1)) -+ -+/* -+** ld1rq_f64_1: -+** add (x[0-9]+), x0, #?8 -+** ld1rqd z0\.d, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_f64_1, svfloat64_t, float64_t, -+ z0 = svld1rq_f64 (p0, x0 + 1), -+ z0 = svld1rq (p0, x0 + 1)) -+ -+/* -+** ld1rq_f64_2: -+** ld1rqd z0\.d, p0/z, \[x0, #?16\] -+** ret -+*/ -+TEST_LOAD (ld1rq_f64_2, svfloat64_t, float64_t, -+ z0 = svld1rq_f64 (p0, x0 + 2), -+ z0 = svld1rq (p0, x0 + 2)) -+ -+/* -+** ld1rq_f64_14: -+** ld1rqd z0\.d, p0/z, \[x0, #?112\] -+** ret -+*/ -+TEST_LOAD (ld1rq_f64_14, svfloat64_t, float64_t, -+ z0 = svld1rq_f64 (p0, x0 + 14), -+ z0 = svld1rq (p0, x0 + 14)) -+ -+/* -+** ld1rq_f64_16: -+** add (x[0-9]+), x0, #?128 -+** ld1rqd z0\.d, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_f64_16, svfloat64_t, float64_t, -+ z0 = svld1rq_f64 (p0, x0 + 16), -+ z0 = svld1rq (p0, x0 + 16)) -+ -+/* -+** ld1rq_f64_m1: -+** sub (x[0-9]+), x0, #?8 -+** ld1rqd z0\.d, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_f64_m1, svfloat64_t, float64_t, -+ z0 = svld1rq_f64 (p0, x0 - 1), -+ z0 = svld1rq (p0, x0 - 1)) -+ -+/* -+** ld1rq_f64_m2: -+** ld1rqd z0\.d, p0/z, \[x0, #?-16\] -+** ret -+*/ -+TEST_LOAD (ld1rq_f64_m2, svfloat64_t, float64_t, -+ z0 = svld1rq_f64 (p0, x0 - 2), -+ z0 = svld1rq (p0, x0 - 2)) -+ -+/* -+** ld1rq_f64_m16: -+** ld1rqd z0\.d, p0/z, \[x0, #?-128\] -+** ret -+*/ -+TEST_LOAD (ld1rq_f64_m16, svfloat64_t, float64_t, -+ z0 = svld1rq_f64 (p0, x0 - 16), -+ z0 = svld1rq (p0, x0 - 16)) -+ -+/* -+** ld1rq_f64_m18: -+** sub (x[0-9]+), x0, #?144 -+** ld1rqd z0\.d, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_f64_m18, svfloat64_t, float64_t, -+ z0 = svld1rq_f64 (p0, x0 - 18), -+ z0 = svld1rq (p0, x0 - 18)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_s16.c -new file mode 100644 -index 000000000..8903b96a3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_s16.c -@@ -0,0 +1,137 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1rq_s16_base: -+** ld1rqh z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s16_base, svint16_t, int16_t, -+ z0 = svld1rq_s16 (p0, x0), -+ z0 = svld1rq (p0, x0)) -+ -+/* -+** ld1rq_s16_index: -+** ld1rqh z0\.h, p0/z, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s16_index, svint16_t, int16_t, -+ z0 = svld1rq_s16 (p0, x0 + x1), -+ z0 = svld1rq (p0, x0 + x1)) -+ -+/* -+** ld1rq_s16_1: -+** add (x[0-9]+), x0, #?2 -+** ld1rqh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s16_1, svint16_t, int16_t, -+ z0 = svld1rq_s16 (p0, x0 + 1), -+ z0 = svld1rq (p0, x0 + 1)) -+ -+/* -+** ld1rq_s16_4: -+** add (x[0-9]+), x0, #?8 -+** ld1rqh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s16_4, svint16_t, int16_t, -+ z0 = svld1rq_s16 (p0, x0 + 4), -+ z0 = svld1rq (p0, x0 + 4)) -+ -+/* -+** ld1rq_s16_7: -+** add (x[0-9]+), x0, #?14 -+** ld1rqh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s16_7, svint16_t, int16_t, -+ z0 = svld1rq_s16 (p0, x0 + 7), -+ z0 = svld1rq (p0, x0 + 7)) -+ -+/* -+** ld1rq_s16_8: -+** ld1rqh z0\.h, p0/z, \[x0, #?16\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s16_8, svint16_t, int16_t, -+ z0 = svld1rq_s16 (p0, x0 + 8), -+ z0 = svld1rq (p0, x0 + 8)) -+ -+/* -+** ld1rq_s16_56: -+** ld1rqh z0\.h, p0/z, \[x0, #?112\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s16_56, svint16_t, int16_t, -+ z0 = svld1rq_s16 (p0, x0 + 56), -+ z0 = svld1rq (p0, x0 + 56)) -+ -+/* -+** ld1rq_s16_64: -+** add (x[0-9]+), x0, #?128 -+** ld1rqh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s16_64, svint16_t, int16_t, -+ z0 = svld1rq_s16 (p0, x0 + 64), -+ z0 = svld1rq (p0, x0 + 64)) -+ -+/* -+** ld1rq_s16_m1: -+** sub (x[0-9]+), x0, #?2 -+** ld1rqh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s16_m1, svint16_t, int16_t, -+ z0 = svld1rq_s16 (p0, x0 - 1), -+ z0 = svld1rq (p0, x0 - 1)) -+ -+/* -+** ld1rq_s16_m4: -+** sub (x[0-9]+), x0, #?8 -+** ld1rqh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s16_m4, svint16_t, int16_t, -+ z0 = svld1rq_s16 (p0, x0 - 4), -+ z0 = svld1rq (p0, x0 - 4)) -+ -+/* -+** ld1rq_s16_m7: -+** sub (x[0-9]+), x0, #?14 -+** ld1rqh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s16_m7, svint16_t, int16_t, -+ z0 = svld1rq_s16 (p0, x0 - 7), -+ z0 = svld1rq (p0, x0 - 7)) -+ -+/* -+** ld1rq_s16_m8: -+** ld1rqh z0\.h, p0/z, \[x0, #?-16\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s16_m8, svint16_t, int16_t, -+ z0 = svld1rq_s16 (p0, x0 - 8), -+ z0 = svld1rq (p0, x0 - 8)) -+ -+/* -+** ld1rq_s16_m64: -+** ld1rqh z0\.h, p0/z, \[x0, #?-128\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s16_m64, svint16_t, int16_t, -+ z0 = svld1rq_s16 (p0, x0 - 64), -+ z0 = svld1rq (p0, x0 - 64)) -+ -+/* -+** ld1rq_s16_m72: -+** sub (x[0-9]+), x0, #?144 -+** ld1rqh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s16_m72, svint16_t, int16_t, -+ z0 = svld1rq_s16 (p0, x0 - 72), -+ z0 = svld1rq (p0, x0 - 72)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_s32.c -new file mode 100644 -index 000000000..a428b4350 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_s32.c -@@ -0,0 +1,137 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1rq_s32_base: -+** ld1rqw z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s32_base, svint32_t, int32_t, -+ z0 = svld1rq_s32 (p0, x0), -+ z0 = svld1rq (p0, x0)) -+ -+/* -+** ld1rq_s32_index: -+** ld1rqw z0\.s, p0/z, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s32_index, svint32_t, int32_t, -+ z0 = svld1rq_s32 (p0, x0 + x1), -+ z0 = svld1rq (p0, x0 + x1)) -+ -+/* -+** ld1rq_s32_1: -+** add (x[0-9]+), x0, #?4 -+** ld1rqw z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s32_1, svint32_t, int32_t, -+ z0 = svld1rq_s32 (p0, x0 + 1), -+ z0 = svld1rq (p0, x0 + 1)) -+ -+/* -+** ld1rq_s32_2: -+** add (x[0-9]+), x0, #?8 -+** ld1rqw z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s32_2, svint32_t, int32_t, -+ z0 = svld1rq_s32 (p0, x0 + 2), -+ z0 = svld1rq (p0, x0 + 2)) -+ -+/* -+** ld1rq_s32_3: -+** add (x[0-9]+), x0, #?12 -+** ld1rqw z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s32_3, svint32_t, int32_t, -+ z0 = svld1rq_s32 (p0, x0 + 3), -+ z0 = svld1rq (p0, x0 + 3)) -+ -+/* -+** ld1rq_s32_4: -+** ld1rqw z0\.s, p0/z, \[x0, #?16\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s32_4, svint32_t, int32_t, -+ z0 = svld1rq_s32 (p0, x0 + 4), -+ z0 = svld1rq (p0, x0 + 4)) -+ -+/* -+** ld1rq_s32_28: -+** ld1rqw z0\.s, p0/z, \[x0, #?112\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s32_28, svint32_t, int32_t, -+ z0 = svld1rq_s32 (p0, x0 + 28), -+ z0 = svld1rq (p0, x0 + 28)) -+ -+/* -+** ld1rq_s32_32: -+** add (x[0-9]+), x0, #?128 -+** ld1rqw z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s32_32, svint32_t, int32_t, -+ z0 = svld1rq_s32 (p0, x0 + 32), -+ z0 = svld1rq (p0, x0 + 32)) -+ -+/* -+** ld1rq_s32_m1: -+** sub (x[0-9]+), x0, #?4 -+** ld1rqw z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s32_m1, svint32_t, int32_t, -+ z0 = svld1rq_s32 (p0, x0 - 1), -+ z0 = svld1rq (p0, x0 - 1)) -+ -+/* -+** ld1rq_s32_m2: -+** sub (x[0-9]+), x0, #?8 -+** ld1rqw z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s32_m2, svint32_t, int32_t, -+ z0 = svld1rq_s32 (p0, x0 - 2), -+ z0 = svld1rq (p0, x0 - 2)) -+ -+/* -+** ld1rq_s32_m3: -+** sub (x[0-9]+), x0, #?12 -+** ld1rqw z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s32_m3, svint32_t, int32_t, -+ z0 = svld1rq_s32 (p0, x0 - 3), -+ z0 = svld1rq (p0, x0 - 3)) -+ -+/* -+** ld1rq_s32_m4: -+** ld1rqw z0\.s, p0/z, \[x0, #?-16\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s32_m4, svint32_t, int32_t, -+ z0 = svld1rq_s32 (p0, x0 - 4), -+ z0 = svld1rq (p0, x0 - 4)) -+ -+/* -+** ld1rq_s32_m32: -+** ld1rqw z0\.s, p0/z, \[x0, #?-128\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s32_m32, svint32_t, int32_t, -+ z0 = svld1rq_s32 (p0, x0 - 32), -+ z0 = svld1rq (p0, x0 - 32)) -+ -+/* -+** ld1rq_s32_m36: -+** sub (x[0-9]+), x0, #?144 -+** ld1rqw z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s32_m36, svint32_t, int32_t, -+ z0 = svld1rq_s32 (p0, x0 - 36), -+ z0 = svld1rq (p0, x0 - 36)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_s64.c -new file mode 100644 -index 000000000..efc0e740f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_s64.c -@@ -0,0 +1,97 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1rq_s64_base: -+** ld1rqd z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s64_base, svint64_t, int64_t, -+ z0 = svld1rq_s64 (p0, x0), -+ z0 = svld1rq (p0, x0)) -+ -+/* -+** ld1rq_s64_index: -+** ld1rqd z0\.d, p0/z, \[x0, x1, lsl 3\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s64_index, svint64_t, int64_t, -+ z0 = svld1rq_s64 (p0, x0 + x1), -+ z0 = svld1rq (p0, x0 + x1)) -+ -+/* -+** ld1rq_s64_1: -+** add (x[0-9]+), x0, #?8 -+** ld1rqd z0\.d, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s64_1, svint64_t, int64_t, -+ z0 = svld1rq_s64 (p0, x0 + 1), -+ z0 = svld1rq (p0, x0 + 1)) -+ -+/* -+** ld1rq_s64_2: -+** ld1rqd z0\.d, p0/z, \[x0, #?16\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s64_2, svint64_t, int64_t, -+ z0 = svld1rq_s64 (p0, x0 + 2), -+ z0 = svld1rq (p0, x0 + 2)) -+ -+/* -+** ld1rq_s64_14: -+** ld1rqd z0\.d, p0/z, \[x0, #?112\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s64_14, svint64_t, int64_t, -+ z0 = svld1rq_s64 (p0, x0 + 14), -+ z0 = svld1rq (p0, x0 + 14)) -+ -+/* -+** ld1rq_s64_16: -+** add (x[0-9]+), x0, #?128 -+** ld1rqd z0\.d, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s64_16, svint64_t, int64_t, -+ z0 = svld1rq_s64 (p0, x0 + 16), -+ z0 = svld1rq (p0, x0 + 16)) -+ -+/* -+** ld1rq_s64_m1: -+** sub (x[0-9]+), x0, #?8 -+** ld1rqd z0\.d, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s64_m1, svint64_t, int64_t, -+ z0 = svld1rq_s64 (p0, x0 - 1), -+ z0 = svld1rq (p0, x0 - 1)) -+ -+/* -+** ld1rq_s64_m2: -+** ld1rqd z0\.d, p0/z, \[x0, #?-16\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s64_m2, svint64_t, int64_t, -+ z0 = svld1rq_s64 (p0, x0 - 2), -+ z0 = svld1rq (p0, x0 - 2)) -+ -+/* -+** ld1rq_s64_m16: -+** ld1rqd z0\.d, p0/z, \[x0, #?-128\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s64_m16, svint64_t, int64_t, -+ z0 = svld1rq_s64 (p0, x0 - 16), -+ z0 = svld1rq (p0, x0 - 16)) -+ -+/* -+** ld1rq_s64_m18: -+** sub (x[0-9]+), x0, #?144 -+** ld1rqd z0\.d, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s64_m18, svint64_t, int64_t, -+ z0 = svld1rq_s64 (p0, x0 - 18), -+ z0 = svld1rq (p0, x0 - 18)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_s8.c -new file mode 100644 -index 000000000..e183e472f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_s8.c -@@ -0,0 +1,137 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1rq_s8_base: -+** ld1rqb z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s8_base, svint8_t, int8_t, -+ z0 = svld1rq_s8 (p0, x0), -+ z0 = svld1rq (p0, x0)) -+ -+/* -+** ld1rq_s8_index: -+** ld1rqb z0\.b, p0/z, \[x0, x1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s8_index, svint8_t, int8_t, -+ z0 = svld1rq_s8 (p0, x0 + x1), -+ z0 = svld1rq (p0, x0 + x1)) -+ -+/* -+** ld1rq_s8_1: -+** add (x[0-9]+), x0, #?1 -+** ld1rqb z0\.b, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s8_1, svint8_t, int8_t, -+ z0 = svld1rq_s8 (p0, x0 + 1), -+ z0 = svld1rq (p0, x0 + 1)) -+ -+/* -+** ld1rq_s8_8: -+** add (x[0-9]+), x0, #?8 -+** ld1rqb z0\.b, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s8_8, svint8_t, int8_t, -+ z0 = svld1rq_s8 (p0, x0 + 8), -+ z0 = svld1rq (p0, x0 + 8)) -+ -+/* -+** ld1rq_s8_15: -+** add (x[0-9]+), x0, #?15 -+** ld1rqb z0\.b, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s8_15, svint8_t, int8_t, -+ z0 = svld1rq_s8 (p0, x0 + 15), -+ z0 = svld1rq (p0, x0 + 15)) -+ -+/* -+** ld1rq_s8_16: -+** ld1rqb z0\.b, p0/z, \[x0, #?16\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s8_16, svint8_t, int8_t, -+ z0 = svld1rq_s8 (p0, x0 + 16), -+ z0 = svld1rq (p0, x0 + 16)) -+ -+/* -+** ld1rq_s8_112: -+** ld1rqb z0\.b, p0/z, \[x0, #?112\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s8_112, svint8_t, int8_t, -+ z0 = svld1rq_s8 (p0, x0 + 112), -+ z0 = svld1rq (p0, x0 + 112)) -+ -+/* -+** ld1rq_s8_128: -+** add (x[0-9]+), x0, #?128 -+** ld1rqb z0\.b, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s8_128, svint8_t, int8_t, -+ z0 = svld1rq_s8 (p0, x0 + 128), -+ z0 = svld1rq (p0, x0 + 128)) -+ -+/* -+** ld1rq_s8_m1: -+** sub (x[0-9]+), x0, #?1 -+** ld1rqb z0\.b, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s8_m1, svint8_t, int8_t, -+ z0 = svld1rq_s8 (p0, x0 - 1), -+ z0 = svld1rq (p0, x0 - 1)) -+ -+/* -+** ld1rq_s8_m8: -+** sub (x[0-9]+), x0, #?8 -+** ld1rqb z0\.b, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s8_m8, svint8_t, int8_t, -+ z0 = svld1rq_s8 (p0, x0 - 8), -+ z0 = svld1rq (p0, x0 - 8)) -+ -+/* -+** ld1rq_s8_m15: -+** sub (x[0-9]+), x0, #?15 -+** ld1rqb z0\.b, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s8_m15, svint8_t, int8_t, -+ z0 = svld1rq_s8 (p0, x0 - 15), -+ z0 = svld1rq (p0, x0 - 15)) -+ -+/* -+** ld1rq_s8_m16: -+** ld1rqb z0\.b, p0/z, \[x0, #?-16\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s8_m16, svint8_t, int8_t, -+ z0 = svld1rq_s8 (p0, x0 - 16), -+ z0 = svld1rq (p0, x0 - 16)) -+ -+/* -+** ld1rq_s8_m128: -+** ld1rqb z0\.b, p0/z, \[x0, #?-128\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s8_m128, svint8_t, int8_t, -+ z0 = svld1rq_s8 (p0, x0 - 128), -+ z0 = svld1rq (p0, x0 - 128)) -+ -+/* -+** ld1rq_s8_m144: -+** sub (x[0-9]+), x0, #?144 -+** ld1rqb z0\.b, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_s8_m144, svint8_t, int8_t, -+ z0 = svld1rq_s8 (p0, x0 - 144), -+ z0 = svld1rq (p0, x0 - 144)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_u16.c -new file mode 100644 -index 000000000..c24ab680a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_u16.c -@@ -0,0 +1,137 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1rq_u16_base: -+** ld1rqh z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u16_base, svuint16_t, uint16_t, -+ z0 = svld1rq_u16 (p0, x0), -+ z0 = svld1rq (p0, x0)) -+ -+/* -+** ld1rq_u16_index: -+** ld1rqh z0\.h, p0/z, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u16_index, svuint16_t, uint16_t, -+ z0 = svld1rq_u16 (p0, x0 + x1), -+ z0 = svld1rq (p0, x0 + x1)) -+ -+/* -+** ld1rq_u16_1: -+** add (x[0-9]+), x0, #?2 -+** ld1rqh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u16_1, svuint16_t, uint16_t, -+ z0 = svld1rq_u16 (p0, x0 + 1), -+ z0 = svld1rq (p0, x0 + 1)) -+ -+/* -+** ld1rq_u16_4: -+** add (x[0-9]+), x0, #?8 -+** ld1rqh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u16_4, svuint16_t, uint16_t, -+ z0 = svld1rq_u16 (p0, x0 + 4), -+ z0 = svld1rq (p0, x0 + 4)) -+ -+/* -+** ld1rq_u16_7: -+** add (x[0-9]+), x0, #?14 -+** ld1rqh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u16_7, svuint16_t, uint16_t, -+ z0 = svld1rq_u16 (p0, x0 + 7), -+ z0 = svld1rq (p0, x0 + 7)) -+ -+/* -+** ld1rq_u16_8: -+** ld1rqh z0\.h, p0/z, \[x0, #?16\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u16_8, svuint16_t, uint16_t, -+ z0 = svld1rq_u16 (p0, x0 + 8), -+ z0 = svld1rq (p0, x0 + 8)) -+ -+/* -+** ld1rq_u16_56: -+** ld1rqh z0\.h, p0/z, \[x0, #?112\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u16_56, svuint16_t, uint16_t, -+ z0 = svld1rq_u16 (p0, x0 + 56), -+ z0 = svld1rq (p0, x0 + 56)) -+ -+/* -+** ld1rq_u16_64: -+** add (x[0-9]+), x0, #?128 -+** ld1rqh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u16_64, svuint16_t, uint16_t, -+ z0 = svld1rq_u16 (p0, x0 + 64), -+ z0 = svld1rq (p0, x0 + 64)) -+ -+/* -+** ld1rq_u16_m1: -+** sub (x[0-9]+), x0, #?2 -+** ld1rqh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u16_m1, svuint16_t, uint16_t, -+ z0 = svld1rq_u16 (p0, x0 - 1), -+ z0 = svld1rq (p0, x0 - 1)) -+ -+/* -+** ld1rq_u16_m4: -+** sub (x[0-9]+), x0, #?8 -+** ld1rqh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u16_m4, svuint16_t, uint16_t, -+ z0 = svld1rq_u16 (p0, x0 - 4), -+ z0 = svld1rq (p0, x0 - 4)) -+ -+/* -+** ld1rq_u16_m7: -+** sub (x[0-9]+), x0, #?14 -+** ld1rqh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u16_m7, svuint16_t, uint16_t, -+ z0 = svld1rq_u16 (p0, x0 - 7), -+ z0 = svld1rq (p0, x0 - 7)) -+ -+/* -+** ld1rq_u16_m8: -+** ld1rqh z0\.h, p0/z, \[x0, #?-16\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u16_m8, svuint16_t, uint16_t, -+ z0 = svld1rq_u16 (p0, x0 - 8), -+ z0 = svld1rq (p0, x0 - 8)) -+ -+/* -+** ld1rq_u16_m64: -+** ld1rqh z0\.h, p0/z, \[x0, #?-128\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u16_m64, svuint16_t, uint16_t, -+ z0 = svld1rq_u16 (p0, x0 - 64), -+ z0 = svld1rq (p0, x0 - 64)) -+ -+/* -+** ld1rq_u16_m72: -+** sub (x[0-9]+), x0, #?144 -+** ld1rqh z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u16_m72, svuint16_t, uint16_t, -+ z0 = svld1rq_u16 (p0, x0 - 72), -+ z0 = svld1rq (p0, x0 - 72)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_u32.c -new file mode 100644 -index 000000000..722e34db3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_u32.c -@@ -0,0 +1,137 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1rq_u32_base: -+** ld1rqw z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u32_base, svuint32_t, uint32_t, -+ z0 = svld1rq_u32 (p0, x0), -+ z0 = svld1rq (p0, x0)) -+ -+/* -+** ld1rq_u32_index: -+** ld1rqw z0\.s, p0/z, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u32_index, svuint32_t, uint32_t, -+ z0 = svld1rq_u32 (p0, x0 + x1), -+ z0 = svld1rq (p0, x0 + x1)) -+ -+/* -+** ld1rq_u32_1: -+** add (x[0-9]+), x0, #?4 -+** ld1rqw z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u32_1, svuint32_t, uint32_t, -+ z0 = svld1rq_u32 (p0, x0 + 1), -+ z0 = svld1rq (p0, x0 + 1)) -+ -+/* -+** ld1rq_u32_2: -+** add (x[0-9]+), x0, #?8 -+** ld1rqw z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u32_2, svuint32_t, uint32_t, -+ z0 = svld1rq_u32 (p0, x0 + 2), -+ z0 = svld1rq (p0, x0 + 2)) -+ -+/* -+** ld1rq_u32_3: -+** add (x[0-9]+), x0, #?12 -+** ld1rqw z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u32_3, svuint32_t, uint32_t, -+ z0 = svld1rq_u32 (p0, x0 + 3), -+ z0 = svld1rq (p0, x0 + 3)) -+ -+/* -+** ld1rq_u32_4: -+** ld1rqw z0\.s, p0/z, \[x0, #?16\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u32_4, svuint32_t, uint32_t, -+ z0 = svld1rq_u32 (p0, x0 + 4), -+ z0 = svld1rq (p0, x0 + 4)) -+ -+/* -+** ld1rq_u32_28: -+** ld1rqw z0\.s, p0/z, \[x0, #?112\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u32_28, svuint32_t, uint32_t, -+ z0 = svld1rq_u32 (p0, x0 + 28), -+ z0 = svld1rq (p0, x0 + 28)) -+ -+/* -+** ld1rq_u32_32: -+** add (x[0-9]+), x0, #?128 -+** ld1rqw z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u32_32, svuint32_t, uint32_t, -+ z0 = svld1rq_u32 (p0, x0 + 32), -+ z0 = svld1rq (p0, x0 + 32)) -+ -+/* -+** ld1rq_u32_m1: -+** sub (x[0-9]+), x0, #?4 -+** ld1rqw z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u32_m1, svuint32_t, uint32_t, -+ z0 = svld1rq_u32 (p0, x0 - 1), -+ z0 = svld1rq (p0, x0 - 1)) -+ -+/* -+** ld1rq_u32_m2: -+** sub (x[0-9]+), x0, #?8 -+** ld1rqw z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u32_m2, svuint32_t, uint32_t, -+ z0 = svld1rq_u32 (p0, x0 - 2), -+ z0 = svld1rq (p0, x0 - 2)) -+ -+/* -+** ld1rq_u32_m3: -+** sub (x[0-9]+), x0, #?12 -+** ld1rqw z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u32_m3, svuint32_t, uint32_t, -+ z0 = svld1rq_u32 (p0, x0 - 3), -+ z0 = svld1rq (p0, x0 - 3)) -+ -+/* -+** ld1rq_u32_m4: -+** ld1rqw z0\.s, p0/z, \[x0, #?-16\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u32_m4, svuint32_t, uint32_t, -+ z0 = svld1rq_u32 (p0, x0 - 4), -+ z0 = svld1rq (p0, x0 - 4)) -+ -+/* -+** ld1rq_u32_m32: -+** ld1rqw z0\.s, p0/z, \[x0, #?-128\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u32_m32, svuint32_t, uint32_t, -+ z0 = svld1rq_u32 (p0, x0 - 32), -+ z0 = svld1rq (p0, x0 - 32)) -+ -+/* -+** ld1rq_u32_m36: -+** sub (x[0-9]+), x0, #?144 -+** ld1rqw z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u32_m36, svuint32_t, uint32_t, -+ z0 = svld1rq_u32 (p0, x0 - 36), -+ z0 = svld1rq (p0, x0 - 36)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_u64.c -new file mode 100644 -index 000000000..a116b7fd9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_u64.c -@@ -0,0 +1,97 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1rq_u64_base: -+** ld1rqd z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u64_base, svuint64_t, uint64_t, -+ z0 = svld1rq_u64 (p0, x0), -+ z0 = svld1rq (p0, x0)) -+ -+/* -+** ld1rq_u64_index: -+** ld1rqd z0\.d, p0/z, \[x0, x1, lsl 3\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u64_index, svuint64_t, uint64_t, -+ z0 = svld1rq_u64 (p0, x0 + x1), -+ z0 = svld1rq (p0, x0 + x1)) -+ -+/* -+** ld1rq_u64_1: -+** add (x[0-9]+), x0, #?8 -+** ld1rqd z0\.d, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u64_1, svuint64_t, uint64_t, -+ z0 = svld1rq_u64 (p0, x0 + 1), -+ z0 = svld1rq (p0, x0 + 1)) -+ -+/* -+** ld1rq_u64_2: -+** ld1rqd z0\.d, p0/z, \[x0, #?16\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u64_2, svuint64_t, uint64_t, -+ z0 = svld1rq_u64 (p0, x0 + 2), -+ z0 = svld1rq (p0, x0 + 2)) -+ -+/* -+** ld1rq_u64_14: -+** ld1rqd z0\.d, p0/z, \[x0, #?112\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u64_14, svuint64_t, uint64_t, -+ z0 = svld1rq_u64 (p0, x0 + 14), -+ z0 = svld1rq (p0, x0 + 14)) -+ -+/* -+** ld1rq_u64_16: -+** add (x[0-9]+), x0, #?128 -+** ld1rqd z0\.d, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u64_16, svuint64_t, uint64_t, -+ z0 = svld1rq_u64 (p0, x0 + 16), -+ z0 = svld1rq (p0, x0 + 16)) -+ -+/* -+** ld1rq_u64_m1: -+** sub (x[0-9]+), x0, #?8 -+** ld1rqd z0\.d, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u64_m1, svuint64_t, uint64_t, -+ z0 = svld1rq_u64 (p0, x0 - 1), -+ z0 = svld1rq (p0, x0 - 1)) -+ -+/* -+** ld1rq_u64_m2: -+** ld1rqd z0\.d, p0/z, \[x0, #?-16\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u64_m2, svuint64_t, uint64_t, -+ z0 = svld1rq_u64 (p0, x0 - 2), -+ z0 = svld1rq (p0, x0 - 2)) -+ -+/* -+** ld1rq_u64_m16: -+** ld1rqd z0\.d, p0/z, \[x0, #?-128\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u64_m16, svuint64_t, uint64_t, -+ z0 = svld1rq_u64 (p0, x0 - 16), -+ z0 = svld1rq (p0, x0 - 16)) -+ -+/* -+** ld1rq_u64_m18: -+** sub (x[0-9]+), x0, #?144 -+** ld1rqd z0\.d, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u64_m18, svuint64_t, uint64_t, -+ z0 = svld1rq_u64 (p0, x0 - 18), -+ z0 = svld1rq (p0, x0 - 18)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_u8.c -new file mode 100644 -index 000000000..74b72530e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1rq_u8.c -@@ -0,0 +1,137 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1rq_u8_base: -+** ld1rqb z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u8_base, svuint8_t, uint8_t, -+ z0 = svld1rq_u8 (p0, x0), -+ z0 = svld1rq (p0, x0)) -+ -+/* -+** ld1rq_u8_index: -+** ld1rqb z0\.b, p0/z, \[x0, x1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u8_index, svuint8_t, uint8_t, -+ z0 = svld1rq_u8 (p0, x0 + x1), -+ z0 = svld1rq (p0, x0 + x1)) -+ -+/* -+** ld1rq_u8_1: -+** add (x[0-9]+), x0, #?1 -+** ld1rqb z0\.b, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u8_1, svuint8_t, uint8_t, -+ z0 = svld1rq_u8 (p0, x0 + 1), -+ z0 = svld1rq (p0, x0 + 1)) -+ -+/* -+** ld1rq_u8_8: -+** add (x[0-9]+), x0, #?8 -+** ld1rqb z0\.b, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u8_8, svuint8_t, uint8_t, -+ z0 = svld1rq_u8 (p0, x0 + 8), -+ z0 = svld1rq (p0, x0 + 8)) -+ -+/* -+** ld1rq_u8_15: -+** add (x[0-9]+), x0, #?15 -+** ld1rqb z0\.b, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u8_15, svuint8_t, uint8_t, -+ z0 = svld1rq_u8 (p0, x0 + 15), -+ z0 = svld1rq (p0, x0 + 15)) -+ -+/* -+** ld1rq_u8_16: -+** ld1rqb z0\.b, p0/z, \[x0, #?16\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u8_16, svuint8_t, uint8_t, -+ z0 = svld1rq_u8 (p0, x0 + 16), -+ z0 = svld1rq (p0, x0 + 16)) -+ -+/* -+** ld1rq_u8_112: -+** ld1rqb z0\.b, p0/z, \[x0, #?112\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u8_112, svuint8_t, uint8_t, -+ z0 = svld1rq_u8 (p0, x0 + 112), -+ z0 = svld1rq (p0, x0 + 112)) -+ -+/* -+** ld1rq_u8_128: -+** add (x[0-9]+), x0, #?128 -+** ld1rqb z0\.b, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u8_128, svuint8_t, uint8_t, -+ z0 = svld1rq_u8 (p0, x0 + 128), -+ z0 = svld1rq (p0, x0 + 128)) -+ -+/* -+** ld1rq_u8_m1: -+** sub (x[0-9]+), x0, #?1 -+** ld1rqb z0\.b, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u8_m1, svuint8_t, uint8_t, -+ z0 = svld1rq_u8 (p0, x0 - 1), -+ z0 = svld1rq (p0, x0 - 1)) -+ -+/* -+** ld1rq_u8_m8: -+** sub (x[0-9]+), x0, #?8 -+** ld1rqb z0\.b, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u8_m8, svuint8_t, uint8_t, -+ z0 = svld1rq_u8 (p0, x0 - 8), -+ z0 = svld1rq (p0, x0 - 8)) -+ -+/* -+** ld1rq_u8_m15: -+** sub (x[0-9]+), x0, #?15 -+** ld1rqb z0\.b, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u8_m15, svuint8_t, uint8_t, -+ z0 = svld1rq_u8 (p0, x0 - 15), -+ z0 = svld1rq (p0, x0 - 15)) -+ -+/* -+** ld1rq_u8_m16: -+** ld1rqb z0\.b, p0/z, \[x0, #?-16\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u8_m16, svuint8_t, uint8_t, -+ z0 = svld1rq_u8 (p0, x0 - 16), -+ z0 = svld1rq (p0, x0 - 16)) -+ -+/* -+** ld1rq_u8_m128: -+** ld1rqb z0\.b, p0/z, \[x0, #?-128\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u8_m128, svuint8_t, uint8_t, -+ z0 = svld1rq_u8 (p0, x0 - 128), -+ z0 = svld1rq (p0, x0 - 128)) -+ -+/* -+** ld1rq_u8_m144: -+** sub (x[0-9]+), x0, #?144 -+** ld1rqb z0\.b, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld1rq_u8_m144, svuint8_t, uint8_t, -+ z0 = svld1rq_u8 (p0, x0 - 144), -+ z0 = svld1rq (p0, x0 - 144)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_s32.c -new file mode 100644 -index 000000000..16a5316a9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_s32.c -@@ -0,0 +1,131 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1sb_gather_s32_tied1: -+** ld1sb z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sb_gather_s32_tied1, svint32_t, svuint32_t, -+ z0_res = svld1sb_gather_u32base_s32 (p0, z0), -+ z0_res = svld1sb_gather_s32 (p0, z0)) -+ -+/* -+** ld1sb_gather_s32_untied: -+** ld1sb z0\.s, p0/z, \[z1\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sb_gather_s32_untied, svint32_t, svuint32_t, -+ z0_res = svld1sb_gather_u32base_s32 (p0, z1), -+ z0_res = svld1sb_gather_s32 (p0, z1)) -+ -+/* -+** ld1sb_gather_x0_s32_offset: -+** ld1sb z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sb_gather_x0_s32_offset, svint32_t, svuint32_t, -+ z0_res = svld1sb_gather_u32base_offset_s32 (p0, z0, x0), -+ z0_res = svld1sb_gather_offset_s32 (p0, z0, x0)) -+ -+/* -+** ld1sb_gather_m1_s32_offset: -+** mov (x[0-9]+), #?-1 -+** ld1sb z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sb_gather_m1_s32_offset, svint32_t, svuint32_t, -+ z0_res = svld1sb_gather_u32base_offset_s32 (p0, z0, -1), -+ z0_res = svld1sb_gather_offset_s32 (p0, z0, -1)) -+ -+/* -+** ld1sb_gather_0_s32_offset: -+** ld1sb z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sb_gather_0_s32_offset, svint32_t, svuint32_t, -+ z0_res = svld1sb_gather_u32base_offset_s32 (p0, z0, 0), -+ z0_res = svld1sb_gather_offset_s32 (p0, z0, 0)) -+ -+/* -+** ld1sb_gather_5_s32_offset: -+** ld1sb z0\.s, p0/z, \[z0\.s, #5\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sb_gather_5_s32_offset, svint32_t, svuint32_t, -+ z0_res = svld1sb_gather_u32base_offset_s32 (p0, z0, 5), -+ z0_res = svld1sb_gather_offset_s32 (p0, z0, 5)) -+ -+/* -+** ld1sb_gather_31_s32_offset: -+** ld1sb z0\.s, p0/z, \[z0\.s, #31\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sb_gather_31_s32_offset, svint32_t, svuint32_t, -+ z0_res = svld1sb_gather_u32base_offset_s32 (p0, z0, 31), -+ z0_res = svld1sb_gather_offset_s32 (p0, z0, 31)) -+ -+/* -+** ld1sb_gather_32_s32_offset: -+** mov (x[0-9]+), #?32 -+** ld1sb z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sb_gather_32_s32_offset, svint32_t, svuint32_t, -+ z0_res = svld1sb_gather_u32base_offset_s32 (p0, z0, 32), -+ z0_res = svld1sb_gather_offset_s32 (p0, z0, 32)) -+ -+/* -+** ld1sb_gather_x0_s32_s32offset: -+** ld1sb z0\.s, p0/z, \[x0, z0\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sb_gather_x0_s32_s32offset, svint32_t, int8_t, svint32_t, -+ z0_res = svld1sb_gather_s32offset_s32 (p0, x0, z0), -+ z0_res = svld1sb_gather_offset_s32 (p0, x0, z0)) -+ -+/* -+** ld1sb_gather_tied1_s32_s32offset: -+** ld1sb z0\.s, p0/z, \[x0, z0\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sb_gather_tied1_s32_s32offset, svint32_t, int8_t, svint32_t, -+ z0_res = svld1sb_gather_s32offset_s32 (p0, x0, z0), -+ z0_res = svld1sb_gather_offset_s32 (p0, x0, z0)) -+ -+/* -+** ld1sb_gather_untied_s32_s32offset: -+** ld1sb z0\.s, p0/z, \[x0, z1\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sb_gather_untied_s32_s32offset, svint32_t, int8_t, svint32_t, -+ z0_res = svld1sb_gather_s32offset_s32 (p0, x0, z1), -+ z0_res = svld1sb_gather_offset_s32 (p0, x0, z1)) -+ -+/* -+** ld1sb_gather_x0_s32_u32offset: -+** ld1sb z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sb_gather_x0_s32_u32offset, svint32_t, int8_t, svuint32_t, -+ z0_res = svld1sb_gather_u32offset_s32 (p0, x0, z0), -+ z0_res = svld1sb_gather_offset_s32 (p0, x0, z0)) -+ -+/* -+** ld1sb_gather_tied1_s32_u32offset: -+** ld1sb z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sb_gather_tied1_s32_u32offset, svint32_t, int8_t, svuint32_t, -+ z0_res = svld1sb_gather_u32offset_s32 (p0, x0, z0), -+ z0_res = svld1sb_gather_offset_s32 (p0, x0, z0)) -+ -+/* -+** ld1sb_gather_untied_s32_u32offset: -+** ld1sb z0\.s, p0/z, \[x0, z1\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sb_gather_untied_s32_u32offset, svint32_t, int8_t, svuint32_t, -+ z0_res = svld1sb_gather_u32offset_s32 (p0, x0, z1), -+ z0_res = svld1sb_gather_offset_s32 (p0, x0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_s64.c -new file mode 100644 -index 000000000..3f953247e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_s64.c -@@ -0,0 +1,149 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1sb_gather_s64_tied1: -+** ld1sb z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sb_gather_s64_tied1, svint64_t, svuint64_t, -+ z0_res = svld1sb_gather_u64base_s64 (p0, z0), -+ z0_res = svld1sb_gather_s64 (p0, z0)) -+ -+/* -+** ld1sb_gather_s64_untied: -+** ld1sb z0\.d, p0/z, \[z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sb_gather_s64_untied, svint64_t, svuint64_t, -+ z0_res = svld1sb_gather_u64base_s64 (p0, z1), -+ z0_res = svld1sb_gather_s64 (p0, z1)) -+ -+/* -+** ld1sb_gather_x0_s64_offset: -+** ld1sb z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sb_gather_x0_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1sb_gather_u64base_offset_s64 (p0, z0, x0), -+ z0_res = svld1sb_gather_offset_s64 (p0, z0, x0)) -+ -+/* -+** ld1sb_gather_m1_s64_offset: -+** mov (x[0-9]+), #?-1 -+** ld1sb z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sb_gather_m1_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1sb_gather_u64base_offset_s64 (p0, z0, -1), -+ z0_res = svld1sb_gather_offset_s64 (p0, z0, -1)) -+ -+/* -+** ld1sb_gather_0_s64_offset: -+** ld1sb z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sb_gather_0_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1sb_gather_u64base_offset_s64 (p0, z0, 0), -+ z0_res = svld1sb_gather_offset_s64 (p0, z0, 0)) -+ -+/* -+** ld1sb_gather_5_s64_offset: -+** ld1sb z0\.d, p0/z, \[z0\.d, #5\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sb_gather_5_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1sb_gather_u64base_offset_s64 (p0, z0, 5), -+ z0_res = svld1sb_gather_offset_s64 (p0, z0, 5)) -+ -+/* -+** ld1sb_gather_31_s64_offset: -+** ld1sb z0\.d, p0/z, \[z0\.d, #31\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sb_gather_31_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1sb_gather_u64base_offset_s64 (p0, z0, 31), -+ z0_res = svld1sb_gather_offset_s64 (p0, z0, 31)) -+ -+/* -+** ld1sb_gather_32_s64_offset: -+** mov (x[0-9]+), #?32 -+** ld1sb z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sb_gather_32_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1sb_gather_u64base_offset_s64 (p0, z0, 32), -+ z0_res = svld1sb_gather_offset_s64 (p0, z0, 32)) -+ -+/* -+** ld1sb_gather_x0_s64_s64offset: -+** ld1sb z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sb_gather_x0_s64_s64offset, svint64_t, int8_t, svint64_t, -+ z0_res = svld1sb_gather_s64offset_s64 (p0, x0, z0), -+ z0_res = svld1sb_gather_offset_s64 (p0, x0, z0)) -+ -+/* -+** ld1sb_gather_tied1_s64_s64offset: -+** ld1sb z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sb_gather_tied1_s64_s64offset, svint64_t, int8_t, svint64_t, -+ z0_res = svld1sb_gather_s64offset_s64 (p0, x0, z0), -+ z0_res = svld1sb_gather_offset_s64 (p0, x0, z0)) -+ -+/* -+** ld1sb_gather_untied_s64_s64offset: -+** ld1sb z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sb_gather_untied_s64_s64offset, svint64_t, int8_t, svint64_t, -+ z0_res = svld1sb_gather_s64offset_s64 (p0, x0, z1), -+ z0_res = svld1sb_gather_offset_s64 (p0, x0, z1)) -+ -+/* -+** ld1sb_gather_ext_s64_s64offset: -+** ld1sb z0\.d, p0/z, \[x0, z1\.d, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sb_gather_ext_s64_s64offset, svint64_t, int8_t, svint64_t, -+ z0_res = svld1sb_gather_s64offset_s64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svld1sb_gather_offset_s64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ld1sb_gather_x0_s64_u64offset: -+** ld1sb z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sb_gather_x0_s64_u64offset, svint64_t, int8_t, svuint64_t, -+ z0_res = svld1sb_gather_u64offset_s64 (p0, x0, z0), -+ z0_res = svld1sb_gather_offset_s64 (p0, x0, z0)) -+ -+/* -+** ld1sb_gather_tied1_s64_u64offset: -+** ld1sb z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sb_gather_tied1_s64_u64offset, svint64_t, int8_t, svuint64_t, -+ z0_res = svld1sb_gather_u64offset_s64 (p0, x0, z0), -+ z0_res = svld1sb_gather_offset_s64 (p0, x0, z0)) -+ -+/* -+** ld1sb_gather_untied_s64_u64offset: -+** ld1sb z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sb_gather_untied_s64_u64offset, svint64_t, int8_t, svuint64_t, -+ z0_res = svld1sb_gather_u64offset_s64 (p0, x0, z1), -+ z0_res = svld1sb_gather_offset_s64 (p0, x0, z1)) -+ -+/* -+** ld1sb_gather_ext_s64_u64offset: -+** ld1sb z0\.d, p0/z, \[x0, z1\.d, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sb_gather_ext_s64_u64offset, svint64_t, int8_t, svuint64_t, -+ z0_res = svld1sb_gather_u64offset_s64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svld1sb_gather_offset_s64 (p0, x0, svextw_x (p0, z1))) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_u32.c -new file mode 100644 -index 000000000..424de65a6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_u32.c -@@ -0,0 +1,131 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1sb_gather_u32_tied1: -+** ld1sb z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sb_gather_u32_tied1, svuint32_t, svuint32_t, -+ z0_res = svld1sb_gather_u32base_u32 (p0, z0), -+ z0_res = svld1sb_gather_u32 (p0, z0)) -+ -+/* -+** ld1sb_gather_u32_untied: -+** ld1sb z0\.s, p0/z, \[z1\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sb_gather_u32_untied, svuint32_t, svuint32_t, -+ z0_res = svld1sb_gather_u32base_u32 (p0, z1), -+ z0_res = svld1sb_gather_u32 (p0, z1)) -+ -+/* -+** ld1sb_gather_x0_u32_offset: -+** ld1sb z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sb_gather_x0_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svld1sb_gather_u32base_offset_u32 (p0, z0, x0), -+ z0_res = svld1sb_gather_offset_u32 (p0, z0, x0)) -+ -+/* -+** ld1sb_gather_m1_u32_offset: -+** mov (x[0-9]+), #?-1 -+** ld1sb z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sb_gather_m1_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svld1sb_gather_u32base_offset_u32 (p0, z0, -1), -+ z0_res = svld1sb_gather_offset_u32 (p0, z0, -1)) -+ -+/* -+** ld1sb_gather_0_u32_offset: -+** ld1sb z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sb_gather_0_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svld1sb_gather_u32base_offset_u32 (p0, z0, 0), -+ z0_res = svld1sb_gather_offset_u32 (p0, z0, 0)) -+ -+/* -+** ld1sb_gather_5_u32_offset: -+** ld1sb z0\.s, p0/z, \[z0\.s, #5\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sb_gather_5_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svld1sb_gather_u32base_offset_u32 (p0, z0, 5), -+ z0_res = svld1sb_gather_offset_u32 (p0, z0, 5)) -+ -+/* -+** ld1sb_gather_31_u32_offset: -+** ld1sb z0\.s, p0/z, \[z0\.s, #31\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sb_gather_31_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svld1sb_gather_u32base_offset_u32 (p0, z0, 31), -+ z0_res = svld1sb_gather_offset_u32 (p0, z0, 31)) -+ -+/* -+** ld1sb_gather_32_u32_offset: -+** mov (x[0-9]+), #?32 -+** ld1sb z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sb_gather_32_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svld1sb_gather_u32base_offset_u32 (p0, z0, 32), -+ z0_res = svld1sb_gather_offset_u32 (p0, z0, 32)) -+ -+/* -+** ld1sb_gather_x0_u32_s32offset: -+** ld1sb z0\.s, p0/z, \[x0, z0\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sb_gather_x0_u32_s32offset, svuint32_t, int8_t, svint32_t, -+ z0_res = svld1sb_gather_s32offset_u32 (p0, x0, z0), -+ z0_res = svld1sb_gather_offset_u32 (p0, x0, z0)) -+ -+/* -+** ld1sb_gather_tied1_u32_s32offset: -+** ld1sb z0\.s, p0/z, \[x0, z0\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sb_gather_tied1_u32_s32offset, svuint32_t, int8_t, svint32_t, -+ z0_res = svld1sb_gather_s32offset_u32 (p0, x0, z0), -+ z0_res = svld1sb_gather_offset_u32 (p0, x0, z0)) -+ -+/* -+** ld1sb_gather_untied_u32_s32offset: -+** ld1sb z0\.s, p0/z, \[x0, z1\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sb_gather_untied_u32_s32offset, svuint32_t, int8_t, svint32_t, -+ z0_res = svld1sb_gather_s32offset_u32 (p0, x0, z1), -+ z0_res = svld1sb_gather_offset_u32 (p0, x0, z1)) -+ -+/* -+** ld1sb_gather_x0_u32_u32offset: -+** ld1sb z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sb_gather_x0_u32_u32offset, svuint32_t, int8_t, svuint32_t, -+ z0_res = svld1sb_gather_u32offset_u32 (p0, x0, z0), -+ z0_res = svld1sb_gather_offset_u32 (p0, x0, z0)) -+ -+/* -+** ld1sb_gather_tied1_u32_u32offset: -+** ld1sb z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sb_gather_tied1_u32_u32offset, svuint32_t, int8_t, svuint32_t, -+ z0_res = svld1sb_gather_u32offset_u32 (p0, x0, z0), -+ z0_res = svld1sb_gather_offset_u32 (p0, x0, z0)) -+ -+/* -+** ld1sb_gather_untied_u32_u32offset: -+** ld1sb z0\.s, p0/z, \[x0, z1\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sb_gather_untied_u32_u32offset, svuint32_t, int8_t, svuint32_t, -+ z0_res = svld1sb_gather_u32offset_u32 (p0, x0, z1), -+ z0_res = svld1sb_gather_offset_u32 (p0, x0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_u64.c -new file mode 100644 -index 000000000..aa375bea2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_gather_u64.c -@@ -0,0 +1,149 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1sb_gather_u64_tied1: -+** ld1sb z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sb_gather_u64_tied1, svuint64_t, svuint64_t, -+ z0_res = svld1sb_gather_u64base_u64 (p0, z0), -+ z0_res = svld1sb_gather_u64 (p0, z0)) -+ -+/* -+** ld1sb_gather_u64_untied: -+** ld1sb z0\.d, p0/z, \[z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sb_gather_u64_untied, svuint64_t, svuint64_t, -+ z0_res = svld1sb_gather_u64base_u64 (p0, z1), -+ z0_res = svld1sb_gather_u64 (p0, z1)) -+ -+/* -+** ld1sb_gather_x0_u64_offset: -+** ld1sb z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sb_gather_x0_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1sb_gather_u64base_offset_u64 (p0, z0, x0), -+ z0_res = svld1sb_gather_offset_u64 (p0, z0, x0)) -+ -+/* -+** ld1sb_gather_m1_u64_offset: -+** mov (x[0-9]+), #?-1 -+** ld1sb z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sb_gather_m1_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1sb_gather_u64base_offset_u64 (p0, z0, -1), -+ z0_res = svld1sb_gather_offset_u64 (p0, z0, -1)) -+ -+/* -+** ld1sb_gather_0_u64_offset: -+** ld1sb z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sb_gather_0_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1sb_gather_u64base_offset_u64 (p0, z0, 0), -+ z0_res = svld1sb_gather_offset_u64 (p0, z0, 0)) -+ -+/* -+** ld1sb_gather_5_u64_offset: -+** ld1sb z0\.d, p0/z, \[z0\.d, #5\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sb_gather_5_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1sb_gather_u64base_offset_u64 (p0, z0, 5), -+ z0_res = svld1sb_gather_offset_u64 (p0, z0, 5)) -+ -+/* -+** ld1sb_gather_31_u64_offset: -+** ld1sb z0\.d, p0/z, \[z0\.d, #31\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sb_gather_31_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1sb_gather_u64base_offset_u64 (p0, z0, 31), -+ z0_res = svld1sb_gather_offset_u64 (p0, z0, 31)) -+ -+/* -+** ld1sb_gather_32_u64_offset: -+** mov (x[0-9]+), #?32 -+** ld1sb z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sb_gather_32_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1sb_gather_u64base_offset_u64 (p0, z0, 32), -+ z0_res = svld1sb_gather_offset_u64 (p0, z0, 32)) -+ -+/* -+** ld1sb_gather_x0_u64_s64offset: -+** ld1sb z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sb_gather_x0_u64_s64offset, svuint64_t, int8_t, svint64_t, -+ z0_res = svld1sb_gather_s64offset_u64 (p0, x0, z0), -+ z0_res = svld1sb_gather_offset_u64 (p0, x0, z0)) -+ -+/* -+** ld1sb_gather_tied1_u64_s64offset: -+** ld1sb z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sb_gather_tied1_u64_s64offset, svuint64_t, int8_t, svint64_t, -+ z0_res = svld1sb_gather_s64offset_u64 (p0, x0, z0), -+ z0_res = svld1sb_gather_offset_u64 (p0, x0, z0)) -+ -+/* -+** ld1sb_gather_untied_u64_s64offset: -+** ld1sb z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sb_gather_untied_u64_s64offset, svuint64_t, int8_t, svint64_t, -+ z0_res = svld1sb_gather_s64offset_u64 (p0, x0, z1), -+ z0_res = svld1sb_gather_offset_u64 (p0, x0, z1)) -+ -+/* -+** ld1sb_gather_ext_u64_s64offset: -+** ld1sb z0\.d, p0/z, \[x0, z1\.d, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sb_gather_ext_u64_s64offset, svuint64_t, int8_t, svint64_t, -+ z0_res = svld1sb_gather_s64offset_u64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svld1sb_gather_offset_u64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ld1sb_gather_x0_u64_u64offset: -+** ld1sb z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sb_gather_x0_u64_u64offset, svuint64_t, int8_t, svuint64_t, -+ z0_res = svld1sb_gather_u64offset_u64 (p0, x0, z0), -+ z0_res = svld1sb_gather_offset_u64 (p0, x0, z0)) -+ -+/* -+** ld1sb_gather_tied1_u64_u64offset: -+** ld1sb z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sb_gather_tied1_u64_u64offset, svuint64_t, int8_t, svuint64_t, -+ z0_res = svld1sb_gather_u64offset_u64 (p0, x0, z0), -+ z0_res = svld1sb_gather_offset_u64 (p0, x0, z0)) -+ -+/* -+** ld1sb_gather_untied_u64_u64offset: -+** ld1sb z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sb_gather_untied_u64_u64offset, svuint64_t, int8_t, svuint64_t, -+ z0_res = svld1sb_gather_u64offset_u64 (p0, x0, z1), -+ z0_res = svld1sb_gather_offset_u64 (p0, x0, z1)) -+ -+/* -+** ld1sb_gather_ext_u64_u64offset: -+** ld1sb z0\.d, p0/z, \[x0, z1\.d, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sb_gather_ext_u64_u64offset, svuint64_t, int8_t, svuint64_t, -+ z0_res = svld1sb_gather_u64offset_u64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svld1sb_gather_offset_u64 (p0, x0, svextw_x (p0, z1))) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_s16.c -new file mode 100644 -index 000000000..70a793c14 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_s16.c -@@ -0,0 +1,162 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1sb_s16_base: -+** ld1sb z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sb_s16_base, svint16_t, int8_t, -+ z0 = svld1sb_s16 (p0, x0), -+ z0 = svld1sb_s16 (p0, x0)) -+ -+/* -+** ld1sb_s16_index: -+** ld1sb z0\.h, p0/z, \[x0, x1\] -+** ret -+*/ -+TEST_LOAD (ld1sb_s16_index, svint16_t, int8_t, -+ z0 = svld1sb_s16 (p0, x0 + x1), -+ z0 = svld1sb_s16 (p0, x0 + x1)) -+ -+/* -+** ld1sb_s16_1: -+** ld1sb z0\.h, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sb_s16_1, svint16_t, int8_t, -+ z0 = svld1sb_s16 (p0, x0 + svcnth ()), -+ z0 = svld1sb_s16 (p0, x0 + svcnth ())) -+ -+/* -+** ld1sb_s16_7: -+** ld1sb z0\.h, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sb_s16_7, svint16_t, int8_t, -+ z0 = svld1sb_s16 (p0, x0 + svcnth () * 7), -+ z0 = svld1sb_s16 (p0, x0 + svcnth () * 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1sb_s16_8: -+** incb x0, all, mul #4 -+** ld1sb z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sb_s16_8, svint16_t, int8_t, -+ z0 = svld1sb_s16 (p0, x0 + svcnth () * 8), -+ z0 = svld1sb_s16 (p0, x0 + svcnth () * 8)) -+ -+/* -+** ld1sb_s16_m1: -+** ld1sb z0\.h, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sb_s16_m1, svint16_t, int8_t, -+ z0 = svld1sb_s16 (p0, x0 - svcnth ()), -+ z0 = svld1sb_s16 (p0, x0 - svcnth ())) -+ -+/* -+** ld1sb_s16_m8: -+** ld1sb z0\.h, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sb_s16_m8, svint16_t, int8_t, -+ z0 = svld1sb_s16 (p0, x0 - svcnth () * 8), -+ z0 = svld1sb_s16 (p0, x0 - svcnth () * 8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1sb_s16_m9: -+** dech x0, all, mul #9 -+** ld1sb z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sb_s16_m9, svint16_t, int8_t, -+ z0 = svld1sb_s16 (p0, x0 - svcnth () * 9), -+ z0 = svld1sb_s16 (p0, x0 - svcnth () * 9)) -+ -+/* -+** ld1sb_vnum_s16_0: -+** ld1sb z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sb_vnum_s16_0, svint16_t, int8_t, -+ z0 = svld1sb_vnum_s16 (p0, x0, 0), -+ z0 = svld1sb_vnum_s16 (p0, x0, 0)) -+ -+/* -+** ld1sb_vnum_s16_1: -+** ld1sb z0\.h, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sb_vnum_s16_1, svint16_t, int8_t, -+ z0 = svld1sb_vnum_s16 (p0, x0, 1), -+ z0 = svld1sb_vnum_s16 (p0, x0, 1)) -+ -+/* -+** ld1sb_vnum_s16_7: -+** ld1sb z0\.h, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sb_vnum_s16_7, svint16_t, int8_t, -+ z0 = svld1sb_vnum_s16 (p0, x0, 7), -+ z0 = svld1sb_vnum_s16 (p0, x0, 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1sb_vnum_s16_8: -+** incb x0, all, mul #4 -+** ld1sb z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sb_vnum_s16_8, svint16_t, int8_t, -+ z0 = svld1sb_vnum_s16 (p0, x0, 8), -+ z0 = svld1sb_vnum_s16 (p0, x0, 8)) -+ -+/* -+** ld1sb_vnum_s16_m1: -+** ld1sb z0\.h, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sb_vnum_s16_m1, svint16_t, int8_t, -+ z0 = svld1sb_vnum_s16 (p0, x0, -1), -+ z0 = svld1sb_vnum_s16 (p0, x0, -1)) -+ -+/* -+** ld1sb_vnum_s16_m8: -+** ld1sb z0\.h, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sb_vnum_s16_m8, svint16_t, int8_t, -+ z0 = svld1sb_vnum_s16 (p0, x0, -8), -+ z0 = svld1sb_vnum_s16 (p0, x0, -8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1sb_vnum_s16_m9: -+** dech x0, all, mul #9 -+** ld1sb z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sb_vnum_s16_m9, svint16_t, int8_t, -+ z0 = svld1sb_vnum_s16 (p0, x0, -9), -+ z0 = svld1sb_vnum_s16 (p0, x0, -9)) -+ -+/* -+** ld1sb_vnum_s16_x1: -+** cnth (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ld1sb z0\.h, p0/z, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** ld1sb z0\.h, p0/z, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_LOAD (ld1sb_vnum_s16_x1, svint16_t, int8_t, -+ z0 = svld1sb_vnum_s16 (p0, x0, x1), -+ z0 = svld1sb_vnum_s16 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_s32.c -new file mode 100644 -index 000000000..74b3a321b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_s32.c -@@ -0,0 +1,162 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1sb_s32_base: -+** ld1sb z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sb_s32_base, svint32_t, int8_t, -+ z0 = svld1sb_s32 (p0, x0), -+ z0 = svld1sb_s32 (p0, x0)) -+ -+/* -+** ld1sb_s32_index: -+** ld1sb z0\.s, p0/z, \[x0, x1\] -+** ret -+*/ -+TEST_LOAD (ld1sb_s32_index, svint32_t, int8_t, -+ z0 = svld1sb_s32 (p0, x0 + x1), -+ z0 = svld1sb_s32 (p0, x0 + x1)) -+ -+/* -+** ld1sb_s32_1: -+** ld1sb z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sb_s32_1, svint32_t, int8_t, -+ z0 = svld1sb_s32 (p0, x0 + svcntw ()), -+ z0 = svld1sb_s32 (p0, x0 + svcntw ())) -+ -+/* -+** ld1sb_s32_7: -+** ld1sb z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sb_s32_7, svint32_t, int8_t, -+ z0 = svld1sb_s32 (p0, x0 + svcntw () * 7), -+ z0 = svld1sb_s32 (p0, x0 + svcntw () * 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1sb_s32_8: -+** incb x0, all, mul #2 -+** ld1sb z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sb_s32_8, svint32_t, int8_t, -+ z0 = svld1sb_s32 (p0, x0 + svcntw () * 8), -+ z0 = svld1sb_s32 (p0, x0 + svcntw () * 8)) -+ -+/* -+** ld1sb_s32_m1: -+** ld1sb z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sb_s32_m1, svint32_t, int8_t, -+ z0 = svld1sb_s32 (p0, x0 - svcntw ()), -+ z0 = svld1sb_s32 (p0, x0 - svcntw ())) -+ -+/* -+** ld1sb_s32_m8: -+** ld1sb z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sb_s32_m8, svint32_t, int8_t, -+ z0 = svld1sb_s32 (p0, x0 - svcntw () * 8), -+ z0 = svld1sb_s32 (p0, x0 - svcntw () * 8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1sb_s32_m9: -+** decw x0, all, mul #9 -+** ld1sb z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sb_s32_m9, svint32_t, int8_t, -+ z0 = svld1sb_s32 (p0, x0 - svcntw () * 9), -+ z0 = svld1sb_s32 (p0, x0 - svcntw () * 9)) -+ -+/* -+** ld1sb_vnum_s32_0: -+** ld1sb z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sb_vnum_s32_0, svint32_t, int8_t, -+ z0 = svld1sb_vnum_s32 (p0, x0, 0), -+ z0 = svld1sb_vnum_s32 (p0, x0, 0)) -+ -+/* -+** ld1sb_vnum_s32_1: -+** ld1sb z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sb_vnum_s32_1, svint32_t, int8_t, -+ z0 = svld1sb_vnum_s32 (p0, x0, 1), -+ z0 = svld1sb_vnum_s32 (p0, x0, 1)) -+ -+/* -+** ld1sb_vnum_s32_7: -+** ld1sb z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sb_vnum_s32_7, svint32_t, int8_t, -+ z0 = svld1sb_vnum_s32 (p0, x0, 7), -+ z0 = svld1sb_vnum_s32 (p0, x0, 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1sb_vnum_s32_8: -+** incb x0, all, mul #2 -+** ld1sb z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sb_vnum_s32_8, svint32_t, int8_t, -+ z0 = svld1sb_vnum_s32 (p0, x0, 8), -+ z0 = svld1sb_vnum_s32 (p0, x0, 8)) -+ -+/* -+** ld1sb_vnum_s32_m1: -+** ld1sb z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sb_vnum_s32_m1, svint32_t, int8_t, -+ z0 = svld1sb_vnum_s32 (p0, x0, -1), -+ z0 = svld1sb_vnum_s32 (p0, x0, -1)) -+ -+/* -+** ld1sb_vnum_s32_m8: -+** ld1sb z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sb_vnum_s32_m8, svint32_t, int8_t, -+ z0 = svld1sb_vnum_s32 (p0, x0, -8), -+ z0 = svld1sb_vnum_s32 (p0, x0, -8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1sb_vnum_s32_m9: -+** decw x0, all, mul #9 -+** ld1sb z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sb_vnum_s32_m9, svint32_t, int8_t, -+ z0 = svld1sb_vnum_s32 (p0, x0, -9), -+ z0 = svld1sb_vnum_s32 (p0, x0, -9)) -+ -+/* -+** ld1sb_vnum_s32_x1: -+** cntw (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ld1sb z0\.s, p0/z, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** ld1sb z0\.s, p0/z, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_LOAD (ld1sb_vnum_s32_x1, svint32_t, int8_t, -+ z0 = svld1sb_vnum_s32 (p0, x0, x1), -+ z0 = svld1sb_vnum_s32 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_s64.c -new file mode 100644 -index 000000000..1984e1956 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_s64.c -@@ -0,0 +1,162 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1sb_s64_base: -+** ld1sb z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sb_s64_base, svint64_t, int8_t, -+ z0 = svld1sb_s64 (p0, x0), -+ z0 = svld1sb_s64 (p0, x0)) -+ -+/* -+** ld1sb_s64_index: -+** ld1sb z0\.d, p0/z, \[x0, x1\] -+** ret -+*/ -+TEST_LOAD (ld1sb_s64_index, svint64_t, int8_t, -+ z0 = svld1sb_s64 (p0, x0 + x1), -+ z0 = svld1sb_s64 (p0, x0 + x1)) -+ -+/* -+** ld1sb_s64_1: -+** ld1sb z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sb_s64_1, svint64_t, int8_t, -+ z0 = svld1sb_s64 (p0, x0 + svcntd ()), -+ z0 = svld1sb_s64 (p0, x0 + svcntd ())) -+ -+/* -+** ld1sb_s64_7: -+** ld1sb z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sb_s64_7, svint64_t, int8_t, -+ z0 = svld1sb_s64 (p0, x0 + svcntd () * 7), -+ z0 = svld1sb_s64 (p0, x0 + svcntd () * 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1sb_s64_8: -+** incb x0 -+** ld1sb z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sb_s64_8, svint64_t, int8_t, -+ z0 = svld1sb_s64 (p0, x0 + svcntd () * 8), -+ z0 = svld1sb_s64 (p0, x0 + svcntd () * 8)) -+ -+/* -+** ld1sb_s64_m1: -+** ld1sb z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sb_s64_m1, svint64_t, int8_t, -+ z0 = svld1sb_s64 (p0, x0 - svcntd ()), -+ z0 = svld1sb_s64 (p0, x0 - svcntd ())) -+ -+/* -+** ld1sb_s64_m8: -+** ld1sb z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sb_s64_m8, svint64_t, int8_t, -+ z0 = svld1sb_s64 (p0, x0 - svcntd () * 8), -+ z0 = svld1sb_s64 (p0, x0 - svcntd () * 8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1sb_s64_m9: -+** decd x0, all, mul #9 -+** ld1sb z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sb_s64_m9, svint64_t, int8_t, -+ z0 = svld1sb_s64 (p0, x0 - svcntd () * 9), -+ z0 = svld1sb_s64 (p0, x0 - svcntd () * 9)) -+ -+/* -+** ld1sb_vnum_s64_0: -+** ld1sb z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sb_vnum_s64_0, svint64_t, int8_t, -+ z0 = svld1sb_vnum_s64 (p0, x0, 0), -+ z0 = svld1sb_vnum_s64 (p0, x0, 0)) -+ -+/* -+** ld1sb_vnum_s64_1: -+** ld1sb z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sb_vnum_s64_1, svint64_t, int8_t, -+ z0 = svld1sb_vnum_s64 (p0, x0, 1), -+ z0 = svld1sb_vnum_s64 (p0, x0, 1)) -+ -+/* -+** ld1sb_vnum_s64_7: -+** ld1sb z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sb_vnum_s64_7, svint64_t, int8_t, -+ z0 = svld1sb_vnum_s64 (p0, x0, 7), -+ z0 = svld1sb_vnum_s64 (p0, x0, 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1sb_vnum_s64_8: -+** incb x0 -+** ld1sb z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sb_vnum_s64_8, svint64_t, int8_t, -+ z0 = svld1sb_vnum_s64 (p0, x0, 8), -+ z0 = svld1sb_vnum_s64 (p0, x0, 8)) -+ -+/* -+** ld1sb_vnum_s64_m1: -+** ld1sb z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sb_vnum_s64_m1, svint64_t, int8_t, -+ z0 = svld1sb_vnum_s64 (p0, x0, -1), -+ z0 = svld1sb_vnum_s64 (p0, x0, -1)) -+ -+/* -+** ld1sb_vnum_s64_m8: -+** ld1sb z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sb_vnum_s64_m8, svint64_t, int8_t, -+ z0 = svld1sb_vnum_s64 (p0, x0, -8), -+ z0 = svld1sb_vnum_s64 (p0, x0, -8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1sb_vnum_s64_m9: -+** decd x0, all, mul #9 -+** ld1sb z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sb_vnum_s64_m9, svint64_t, int8_t, -+ z0 = svld1sb_vnum_s64 (p0, x0, -9), -+ z0 = svld1sb_vnum_s64 (p0, x0, -9)) -+ -+/* -+** ld1sb_vnum_s64_x1: -+** cntd (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ld1sb z0\.d, p0/z, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** ld1sb z0\.d, p0/z, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_LOAD (ld1sb_vnum_s64_x1, svint64_t, int8_t, -+ z0 = svld1sb_vnum_s64 (p0, x0, x1), -+ z0 = svld1sb_vnum_s64 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_u16.c -new file mode 100644 -index 000000000..cfa616251 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_u16.c -@@ -0,0 +1,162 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1sb_u16_base: -+** ld1sb z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sb_u16_base, svuint16_t, int8_t, -+ z0 = svld1sb_u16 (p0, x0), -+ z0 = svld1sb_u16 (p0, x0)) -+ -+/* -+** ld1sb_u16_index: -+** ld1sb z0\.h, p0/z, \[x0, x1\] -+** ret -+*/ -+TEST_LOAD (ld1sb_u16_index, svuint16_t, int8_t, -+ z0 = svld1sb_u16 (p0, x0 + x1), -+ z0 = svld1sb_u16 (p0, x0 + x1)) -+ -+/* -+** ld1sb_u16_1: -+** ld1sb z0\.h, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sb_u16_1, svuint16_t, int8_t, -+ z0 = svld1sb_u16 (p0, x0 + svcnth ()), -+ z0 = svld1sb_u16 (p0, x0 + svcnth ())) -+ -+/* -+** ld1sb_u16_7: -+** ld1sb z0\.h, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sb_u16_7, svuint16_t, int8_t, -+ z0 = svld1sb_u16 (p0, x0 + svcnth () * 7), -+ z0 = svld1sb_u16 (p0, x0 + svcnth () * 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1sb_u16_8: -+** incb x0, all, mul #4 -+** ld1sb z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sb_u16_8, svuint16_t, int8_t, -+ z0 = svld1sb_u16 (p0, x0 + svcnth () * 8), -+ z0 = svld1sb_u16 (p0, x0 + svcnth () * 8)) -+ -+/* -+** ld1sb_u16_m1: -+** ld1sb z0\.h, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sb_u16_m1, svuint16_t, int8_t, -+ z0 = svld1sb_u16 (p0, x0 - svcnth ()), -+ z0 = svld1sb_u16 (p0, x0 - svcnth ())) -+ -+/* -+** ld1sb_u16_m8: -+** ld1sb z0\.h, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sb_u16_m8, svuint16_t, int8_t, -+ z0 = svld1sb_u16 (p0, x0 - svcnth () * 8), -+ z0 = svld1sb_u16 (p0, x0 - svcnth () * 8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1sb_u16_m9: -+** dech x0, all, mul #9 -+** ld1sb z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sb_u16_m9, svuint16_t, int8_t, -+ z0 = svld1sb_u16 (p0, x0 - svcnth () * 9), -+ z0 = svld1sb_u16 (p0, x0 - svcnth () * 9)) -+ -+/* -+** ld1sb_vnum_u16_0: -+** ld1sb z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sb_vnum_u16_0, svuint16_t, int8_t, -+ z0 = svld1sb_vnum_u16 (p0, x0, 0), -+ z0 = svld1sb_vnum_u16 (p0, x0, 0)) -+ -+/* -+** ld1sb_vnum_u16_1: -+** ld1sb z0\.h, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sb_vnum_u16_1, svuint16_t, int8_t, -+ z0 = svld1sb_vnum_u16 (p0, x0, 1), -+ z0 = svld1sb_vnum_u16 (p0, x0, 1)) -+ -+/* -+** ld1sb_vnum_u16_7: -+** ld1sb z0\.h, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sb_vnum_u16_7, svuint16_t, int8_t, -+ z0 = svld1sb_vnum_u16 (p0, x0, 7), -+ z0 = svld1sb_vnum_u16 (p0, x0, 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1sb_vnum_u16_8: -+** incb x0, all, mul #4 -+** ld1sb z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sb_vnum_u16_8, svuint16_t, int8_t, -+ z0 = svld1sb_vnum_u16 (p0, x0, 8), -+ z0 = svld1sb_vnum_u16 (p0, x0, 8)) -+ -+/* -+** ld1sb_vnum_u16_m1: -+** ld1sb z0\.h, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sb_vnum_u16_m1, svuint16_t, int8_t, -+ z0 = svld1sb_vnum_u16 (p0, x0, -1), -+ z0 = svld1sb_vnum_u16 (p0, x0, -1)) -+ -+/* -+** ld1sb_vnum_u16_m8: -+** ld1sb z0\.h, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sb_vnum_u16_m8, svuint16_t, int8_t, -+ z0 = svld1sb_vnum_u16 (p0, x0, -8), -+ z0 = svld1sb_vnum_u16 (p0, x0, -8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1sb_vnum_u16_m9: -+** dech x0, all, mul #9 -+** ld1sb z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sb_vnum_u16_m9, svuint16_t, int8_t, -+ z0 = svld1sb_vnum_u16 (p0, x0, -9), -+ z0 = svld1sb_vnum_u16 (p0, x0, -9)) -+ -+/* -+** ld1sb_vnum_u16_x1: -+** cnth (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ld1sb z0\.h, p0/z, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** ld1sb z0\.h, p0/z, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_LOAD (ld1sb_vnum_u16_x1, svuint16_t, int8_t, -+ z0 = svld1sb_vnum_u16 (p0, x0, x1), -+ z0 = svld1sb_vnum_u16 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_u32.c -new file mode 100644 -index 000000000..990ae5e1b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_u32.c -@@ -0,0 +1,162 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1sb_u32_base: -+** ld1sb z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sb_u32_base, svuint32_t, int8_t, -+ z0 = svld1sb_u32 (p0, x0), -+ z0 = svld1sb_u32 (p0, x0)) -+ -+/* -+** ld1sb_u32_index: -+** ld1sb z0\.s, p0/z, \[x0, x1\] -+** ret -+*/ -+TEST_LOAD (ld1sb_u32_index, svuint32_t, int8_t, -+ z0 = svld1sb_u32 (p0, x0 + x1), -+ z0 = svld1sb_u32 (p0, x0 + x1)) -+ -+/* -+** ld1sb_u32_1: -+** ld1sb z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sb_u32_1, svuint32_t, int8_t, -+ z0 = svld1sb_u32 (p0, x0 + svcntw ()), -+ z0 = svld1sb_u32 (p0, x0 + svcntw ())) -+ -+/* -+** ld1sb_u32_7: -+** ld1sb z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sb_u32_7, svuint32_t, int8_t, -+ z0 = svld1sb_u32 (p0, x0 + svcntw () * 7), -+ z0 = svld1sb_u32 (p0, x0 + svcntw () * 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1sb_u32_8: -+** incb x0, all, mul #2 -+** ld1sb z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sb_u32_8, svuint32_t, int8_t, -+ z0 = svld1sb_u32 (p0, x0 + svcntw () * 8), -+ z0 = svld1sb_u32 (p0, x0 + svcntw () * 8)) -+ -+/* -+** ld1sb_u32_m1: -+** ld1sb z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sb_u32_m1, svuint32_t, int8_t, -+ z0 = svld1sb_u32 (p0, x0 - svcntw ()), -+ z0 = svld1sb_u32 (p0, x0 - svcntw ())) -+ -+/* -+** ld1sb_u32_m8: -+** ld1sb z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sb_u32_m8, svuint32_t, int8_t, -+ z0 = svld1sb_u32 (p0, x0 - svcntw () * 8), -+ z0 = svld1sb_u32 (p0, x0 - svcntw () * 8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1sb_u32_m9: -+** decw x0, all, mul #9 -+** ld1sb z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sb_u32_m9, svuint32_t, int8_t, -+ z0 = svld1sb_u32 (p0, x0 - svcntw () * 9), -+ z0 = svld1sb_u32 (p0, x0 - svcntw () * 9)) -+ -+/* -+** ld1sb_vnum_u32_0: -+** ld1sb z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sb_vnum_u32_0, svuint32_t, int8_t, -+ z0 = svld1sb_vnum_u32 (p0, x0, 0), -+ z0 = svld1sb_vnum_u32 (p0, x0, 0)) -+ -+/* -+** ld1sb_vnum_u32_1: -+** ld1sb z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sb_vnum_u32_1, svuint32_t, int8_t, -+ z0 = svld1sb_vnum_u32 (p0, x0, 1), -+ z0 = svld1sb_vnum_u32 (p0, x0, 1)) -+ -+/* -+** ld1sb_vnum_u32_7: -+** ld1sb z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sb_vnum_u32_7, svuint32_t, int8_t, -+ z0 = svld1sb_vnum_u32 (p0, x0, 7), -+ z0 = svld1sb_vnum_u32 (p0, x0, 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1sb_vnum_u32_8: -+** incb x0, all, mul #2 -+** ld1sb z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sb_vnum_u32_8, svuint32_t, int8_t, -+ z0 = svld1sb_vnum_u32 (p0, x0, 8), -+ z0 = svld1sb_vnum_u32 (p0, x0, 8)) -+ -+/* -+** ld1sb_vnum_u32_m1: -+** ld1sb z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sb_vnum_u32_m1, svuint32_t, int8_t, -+ z0 = svld1sb_vnum_u32 (p0, x0, -1), -+ z0 = svld1sb_vnum_u32 (p0, x0, -1)) -+ -+/* -+** ld1sb_vnum_u32_m8: -+** ld1sb z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sb_vnum_u32_m8, svuint32_t, int8_t, -+ z0 = svld1sb_vnum_u32 (p0, x0, -8), -+ z0 = svld1sb_vnum_u32 (p0, x0, -8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1sb_vnum_u32_m9: -+** decw x0, all, mul #9 -+** ld1sb z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sb_vnum_u32_m9, svuint32_t, int8_t, -+ z0 = svld1sb_vnum_u32 (p0, x0, -9), -+ z0 = svld1sb_vnum_u32 (p0, x0, -9)) -+ -+/* -+** ld1sb_vnum_u32_x1: -+** cntw (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ld1sb z0\.s, p0/z, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** ld1sb z0\.s, p0/z, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_LOAD (ld1sb_vnum_u32_x1, svuint32_t, int8_t, -+ z0 = svld1sb_vnum_u32 (p0, x0, x1), -+ z0 = svld1sb_vnum_u32 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_u64.c -new file mode 100644 -index 000000000..8051bf140 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sb_u64.c -@@ -0,0 +1,162 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1sb_u64_base: -+** ld1sb z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sb_u64_base, svuint64_t, int8_t, -+ z0 = svld1sb_u64 (p0, x0), -+ z0 = svld1sb_u64 (p0, x0)) -+ -+/* -+** ld1sb_u64_index: -+** ld1sb z0\.d, p0/z, \[x0, x1\] -+** ret -+*/ -+TEST_LOAD (ld1sb_u64_index, svuint64_t, int8_t, -+ z0 = svld1sb_u64 (p0, x0 + x1), -+ z0 = svld1sb_u64 (p0, x0 + x1)) -+ -+/* -+** ld1sb_u64_1: -+** ld1sb z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sb_u64_1, svuint64_t, int8_t, -+ z0 = svld1sb_u64 (p0, x0 + svcntd ()), -+ z0 = svld1sb_u64 (p0, x0 + svcntd ())) -+ -+/* -+** ld1sb_u64_7: -+** ld1sb z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sb_u64_7, svuint64_t, int8_t, -+ z0 = svld1sb_u64 (p0, x0 + svcntd () * 7), -+ z0 = svld1sb_u64 (p0, x0 + svcntd () * 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1sb_u64_8: -+** incb x0 -+** ld1sb z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sb_u64_8, svuint64_t, int8_t, -+ z0 = svld1sb_u64 (p0, x0 + svcntd () * 8), -+ z0 = svld1sb_u64 (p0, x0 + svcntd () * 8)) -+ -+/* -+** ld1sb_u64_m1: -+** ld1sb z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sb_u64_m1, svuint64_t, int8_t, -+ z0 = svld1sb_u64 (p0, x0 - svcntd ()), -+ z0 = svld1sb_u64 (p0, x0 - svcntd ())) -+ -+/* -+** ld1sb_u64_m8: -+** ld1sb z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sb_u64_m8, svuint64_t, int8_t, -+ z0 = svld1sb_u64 (p0, x0 - svcntd () * 8), -+ z0 = svld1sb_u64 (p0, x0 - svcntd () * 8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1sb_u64_m9: -+** decd x0, all, mul #9 -+** ld1sb z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sb_u64_m9, svuint64_t, int8_t, -+ z0 = svld1sb_u64 (p0, x0 - svcntd () * 9), -+ z0 = svld1sb_u64 (p0, x0 - svcntd () * 9)) -+ -+/* -+** ld1sb_vnum_u64_0: -+** ld1sb z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sb_vnum_u64_0, svuint64_t, int8_t, -+ z0 = svld1sb_vnum_u64 (p0, x0, 0), -+ z0 = svld1sb_vnum_u64 (p0, x0, 0)) -+ -+/* -+** ld1sb_vnum_u64_1: -+** ld1sb z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sb_vnum_u64_1, svuint64_t, int8_t, -+ z0 = svld1sb_vnum_u64 (p0, x0, 1), -+ z0 = svld1sb_vnum_u64 (p0, x0, 1)) -+ -+/* -+** ld1sb_vnum_u64_7: -+** ld1sb z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sb_vnum_u64_7, svuint64_t, int8_t, -+ z0 = svld1sb_vnum_u64 (p0, x0, 7), -+ z0 = svld1sb_vnum_u64 (p0, x0, 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1sb_vnum_u64_8: -+** incb x0 -+** ld1sb z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sb_vnum_u64_8, svuint64_t, int8_t, -+ z0 = svld1sb_vnum_u64 (p0, x0, 8), -+ z0 = svld1sb_vnum_u64 (p0, x0, 8)) -+ -+/* -+** ld1sb_vnum_u64_m1: -+** ld1sb z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sb_vnum_u64_m1, svuint64_t, int8_t, -+ z0 = svld1sb_vnum_u64 (p0, x0, -1), -+ z0 = svld1sb_vnum_u64 (p0, x0, -1)) -+ -+/* -+** ld1sb_vnum_u64_m8: -+** ld1sb z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sb_vnum_u64_m8, svuint64_t, int8_t, -+ z0 = svld1sb_vnum_u64 (p0, x0, -8), -+ z0 = svld1sb_vnum_u64 (p0, x0, -8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1sb_vnum_u64_m9: -+** decd x0, all, mul #9 -+** ld1sb z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sb_vnum_u64_m9, svuint64_t, int8_t, -+ z0 = svld1sb_vnum_u64 (p0, x0, -9), -+ z0 = svld1sb_vnum_u64 (p0, x0, -9)) -+ -+/* -+** ld1sb_vnum_u64_x1: -+** cntd (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ld1sb z0\.d, p0/z, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** ld1sb z0\.d, p0/z, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_LOAD (ld1sb_vnum_u64_x1, svuint64_t, int8_t, -+ z0 = svld1sb_vnum_u64 (p0, x0, x1), -+ z0 = svld1sb_vnum_u64 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_s32.c -new file mode 100644 -index 000000000..ed07b4dfc ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_s32.c -@@ -0,0 +1,252 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1sh_gather_s32_tied1: -+** ld1sh z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_s32_tied1, svint32_t, svuint32_t, -+ z0_res = svld1sh_gather_u32base_s32 (p0, z0), -+ z0_res = svld1sh_gather_s32 (p0, z0)) -+ -+/* -+** ld1sh_gather_s32_untied: -+** ld1sh z0\.s, p0/z, \[z1\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_s32_untied, svint32_t, svuint32_t, -+ z0_res = svld1sh_gather_u32base_s32 (p0, z1), -+ z0_res = svld1sh_gather_s32 (p0, z1)) -+ -+/* -+** ld1sh_gather_x0_s32_offset: -+** ld1sh z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_x0_s32_offset, svint32_t, svuint32_t, -+ z0_res = svld1sh_gather_u32base_offset_s32 (p0, z0, x0), -+ z0_res = svld1sh_gather_offset_s32 (p0, z0, x0)) -+ -+/* -+** ld1sh_gather_m2_s32_offset: -+** mov (x[0-9]+), #?-2 -+** ld1sh z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_m2_s32_offset, svint32_t, svuint32_t, -+ z0_res = svld1sh_gather_u32base_offset_s32 (p0, z0, -2), -+ z0_res = svld1sh_gather_offset_s32 (p0, z0, -2)) -+ -+/* -+** ld1sh_gather_0_s32_offset: -+** ld1sh z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_0_s32_offset, svint32_t, svuint32_t, -+ z0_res = svld1sh_gather_u32base_offset_s32 (p0, z0, 0), -+ z0_res = svld1sh_gather_offset_s32 (p0, z0, 0)) -+ -+/* -+** ld1sh_gather_5_s32_offset: -+** mov (x[0-9]+), #?5 -+** ld1sh z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_5_s32_offset, svint32_t, svuint32_t, -+ z0_res = svld1sh_gather_u32base_offset_s32 (p0, z0, 5), -+ z0_res = svld1sh_gather_offset_s32 (p0, z0, 5)) -+ -+/* -+** ld1sh_gather_6_s32_offset: -+** ld1sh z0\.s, p0/z, \[z0\.s, #6\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_6_s32_offset, svint32_t, svuint32_t, -+ z0_res = svld1sh_gather_u32base_offset_s32 (p0, z0, 6), -+ z0_res = svld1sh_gather_offset_s32 (p0, z0, 6)) -+ -+/* -+** ld1sh_gather_62_s32_offset: -+** ld1sh z0\.s, p0/z, \[z0\.s, #62\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_62_s32_offset, svint32_t, svuint32_t, -+ z0_res = svld1sh_gather_u32base_offset_s32 (p0, z0, 62), -+ z0_res = svld1sh_gather_offset_s32 (p0, z0, 62)) -+ -+/* -+** ld1sh_gather_64_s32_offset: -+** mov (x[0-9]+), #?64 -+** ld1sh z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_64_s32_offset, svint32_t, svuint32_t, -+ z0_res = svld1sh_gather_u32base_offset_s32 (p0, z0, 64), -+ z0_res = svld1sh_gather_offset_s32 (p0, z0, 64)) -+ -+/* -+** ld1sh_gather_x0_s32_index: -+** lsl (x[0-9]+), x0, #?1 -+** ld1sh z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_x0_s32_index, svint32_t, svuint32_t, -+ z0_res = svld1sh_gather_u32base_index_s32 (p0, z0, x0), -+ z0_res = svld1sh_gather_index_s32 (p0, z0, x0)) -+ -+/* -+** ld1sh_gather_m1_s32_index: -+** mov (x[0-9]+), #?-2 -+** ld1sh z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_m1_s32_index, svint32_t, svuint32_t, -+ z0_res = svld1sh_gather_u32base_index_s32 (p0, z0, -1), -+ z0_res = svld1sh_gather_index_s32 (p0, z0, -1)) -+ -+/* -+** ld1sh_gather_0_s32_index: -+** ld1sh z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_0_s32_index, svint32_t, svuint32_t, -+ z0_res = svld1sh_gather_u32base_index_s32 (p0, z0, 0), -+ z0_res = svld1sh_gather_index_s32 (p0, z0, 0)) -+ -+/* -+** ld1sh_gather_5_s32_index: -+** ld1sh z0\.s, p0/z, \[z0\.s, #10\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_5_s32_index, svint32_t, svuint32_t, -+ z0_res = svld1sh_gather_u32base_index_s32 (p0, z0, 5), -+ z0_res = svld1sh_gather_index_s32 (p0, z0, 5)) -+ -+/* -+** ld1sh_gather_31_s32_index: -+** ld1sh z0\.s, p0/z, \[z0\.s, #62\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_31_s32_index, svint32_t, svuint32_t, -+ z0_res = svld1sh_gather_u32base_index_s32 (p0, z0, 31), -+ z0_res = svld1sh_gather_index_s32 (p0, z0, 31)) -+ -+/* -+** ld1sh_gather_32_s32_index: -+** mov (x[0-9]+), #?64 -+** ld1sh z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_32_s32_index, svint32_t, svuint32_t, -+ z0_res = svld1sh_gather_u32base_index_s32 (p0, z0, 32), -+ z0_res = svld1sh_gather_index_s32 (p0, z0, 32)) -+ -+/* -+** ld1sh_gather_x0_s32_s32offset: -+** ld1sh z0\.s, p0/z, \[x0, z0\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_x0_s32_s32offset, svint32_t, int16_t, svint32_t, -+ z0_res = svld1sh_gather_s32offset_s32 (p0, x0, z0), -+ z0_res = svld1sh_gather_offset_s32 (p0, x0, z0)) -+ -+/* -+** ld1sh_gather_tied1_s32_s32offset: -+** ld1sh z0\.s, p0/z, \[x0, z0\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_tied1_s32_s32offset, svint32_t, int16_t, svint32_t, -+ z0_res = svld1sh_gather_s32offset_s32 (p0, x0, z0), -+ z0_res = svld1sh_gather_offset_s32 (p0, x0, z0)) -+ -+/* -+** ld1sh_gather_untied_s32_s32offset: -+** ld1sh z0\.s, p0/z, \[x0, z1\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_untied_s32_s32offset, svint32_t, int16_t, svint32_t, -+ z0_res = svld1sh_gather_s32offset_s32 (p0, x0, z1), -+ z0_res = svld1sh_gather_offset_s32 (p0, x0, z1)) -+ -+/* -+** ld1sh_gather_x0_s32_u32offset: -+** ld1sh z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_x0_s32_u32offset, svint32_t, int16_t, svuint32_t, -+ z0_res = svld1sh_gather_u32offset_s32 (p0, x0, z0), -+ z0_res = svld1sh_gather_offset_s32 (p0, x0, z0)) -+ -+/* -+** ld1sh_gather_tied1_s32_u32offset: -+** ld1sh z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_tied1_s32_u32offset, svint32_t, int16_t, svuint32_t, -+ z0_res = svld1sh_gather_u32offset_s32 (p0, x0, z0), -+ z0_res = svld1sh_gather_offset_s32 (p0, x0, z0)) -+ -+/* -+** ld1sh_gather_untied_s32_u32offset: -+** ld1sh z0\.s, p0/z, \[x0, z1\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_untied_s32_u32offset, svint32_t, int16_t, svuint32_t, -+ z0_res = svld1sh_gather_u32offset_s32 (p0, x0, z1), -+ z0_res = svld1sh_gather_offset_s32 (p0, x0, z1)) -+ -+/* -+** ld1sh_gather_x0_s32_s32index: -+** ld1sh z0\.s, p0/z, \[x0, z0\.s, sxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_x0_s32_s32index, svint32_t, int16_t, svint32_t, -+ z0_res = svld1sh_gather_s32index_s32 (p0, x0, z0), -+ z0_res = svld1sh_gather_index_s32 (p0, x0, z0)) -+ -+/* -+** ld1sh_gather_tied1_s32_s32index: -+** ld1sh z0\.s, p0/z, \[x0, z0\.s, sxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_tied1_s32_s32index, svint32_t, int16_t, svint32_t, -+ z0_res = svld1sh_gather_s32index_s32 (p0, x0, z0), -+ z0_res = svld1sh_gather_index_s32 (p0, x0, z0)) -+ -+/* -+** ld1sh_gather_untied_s32_s32index: -+** ld1sh z0\.s, p0/z, \[x0, z1\.s, sxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_untied_s32_s32index, svint32_t, int16_t, svint32_t, -+ z0_res = svld1sh_gather_s32index_s32 (p0, x0, z1), -+ z0_res = svld1sh_gather_index_s32 (p0, x0, z1)) -+ -+/* -+** ld1sh_gather_x0_s32_u32index: -+** ld1sh z0\.s, p0/z, \[x0, z0\.s, uxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_x0_s32_u32index, svint32_t, int16_t, svuint32_t, -+ z0_res = svld1sh_gather_u32index_s32 (p0, x0, z0), -+ z0_res = svld1sh_gather_index_s32 (p0, x0, z0)) -+ -+/* -+** ld1sh_gather_tied1_s32_u32index: -+** ld1sh z0\.s, p0/z, \[x0, z0\.s, uxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_tied1_s32_u32index, svint32_t, int16_t, svuint32_t, -+ z0_res = svld1sh_gather_u32index_s32 (p0, x0, z0), -+ z0_res = svld1sh_gather_index_s32 (p0, x0, z0)) -+ -+/* -+** ld1sh_gather_untied_s32_u32index: -+** ld1sh z0\.s, p0/z, \[x0, z1\.s, uxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_untied_s32_u32index, svint32_t, int16_t, svuint32_t, -+ z0_res = svld1sh_gather_u32index_s32 (p0, x0, z1), -+ z0_res = svld1sh_gather_index_s32 (p0, x0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_s64.c -new file mode 100644 -index 000000000..20ca42720 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_s64.c -@@ -0,0 +1,288 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1sh_gather_s64_tied1: -+** ld1sh z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_s64_tied1, svint64_t, svuint64_t, -+ z0_res = svld1sh_gather_u64base_s64 (p0, z0), -+ z0_res = svld1sh_gather_s64 (p0, z0)) -+ -+/* -+** ld1sh_gather_s64_untied: -+** ld1sh z0\.d, p0/z, \[z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_s64_untied, svint64_t, svuint64_t, -+ z0_res = svld1sh_gather_u64base_s64 (p0, z1), -+ z0_res = svld1sh_gather_s64 (p0, z1)) -+ -+/* -+** ld1sh_gather_x0_s64_offset: -+** ld1sh z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_x0_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1sh_gather_u64base_offset_s64 (p0, z0, x0), -+ z0_res = svld1sh_gather_offset_s64 (p0, z0, x0)) -+ -+/* -+** ld1sh_gather_m2_s64_offset: -+** mov (x[0-9]+), #?-2 -+** ld1sh z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_m2_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1sh_gather_u64base_offset_s64 (p0, z0, -2), -+ z0_res = svld1sh_gather_offset_s64 (p0, z0, -2)) -+ -+/* -+** ld1sh_gather_0_s64_offset: -+** ld1sh z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_0_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1sh_gather_u64base_offset_s64 (p0, z0, 0), -+ z0_res = svld1sh_gather_offset_s64 (p0, z0, 0)) -+ -+/* -+** ld1sh_gather_5_s64_offset: -+** mov (x[0-9]+), #?5 -+** ld1sh z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_5_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1sh_gather_u64base_offset_s64 (p0, z0, 5), -+ z0_res = svld1sh_gather_offset_s64 (p0, z0, 5)) -+ -+/* -+** ld1sh_gather_6_s64_offset: -+** ld1sh z0\.d, p0/z, \[z0\.d, #6\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_6_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1sh_gather_u64base_offset_s64 (p0, z0, 6), -+ z0_res = svld1sh_gather_offset_s64 (p0, z0, 6)) -+ -+/* -+** ld1sh_gather_62_s64_offset: -+** ld1sh z0\.d, p0/z, \[z0\.d, #62\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_62_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1sh_gather_u64base_offset_s64 (p0, z0, 62), -+ z0_res = svld1sh_gather_offset_s64 (p0, z0, 62)) -+ -+/* -+** ld1sh_gather_64_s64_offset: -+** mov (x[0-9]+), #?64 -+** ld1sh z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_64_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1sh_gather_u64base_offset_s64 (p0, z0, 64), -+ z0_res = svld1sh_gather_offset_s64 (p0, z0, 64)) -+ -+/* -+** ld1sh_gather_x0_s64_index: -+** lsl (x[0-9]+), x0, #?1 -+** ld1sh z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_x0_s64_index, svint64_t, svuint64_t, -+ z0_res = svld1sh_gather_u64base_index_s64 (p0, z0, x0), -+ z0_res = svld1sh_gather_index_s64 (p0, z0, x0)) -+ -+/* -+** ld1sh_gather_m1_s64_index: -+** mov (x[0-9]+), #?-2 -+** ld1sh z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_m1_s64_index, svint64_t, svuint64_t, -+ z0_res = svld1sh_gather_u64base_index_s64 (p0, z0, -1), -+ z0_res = svld1sh_gather_index_s64 (p0, z0, -1)) -+ -+/* -+** ld1sh_gather_0_s64_index: -+** ld1sh z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_0_s64_index, svint64_t, svuint64_t, -+ z0_res = svld1sh_gather_u64base_index_s64 (p0, z0, 0), -+ z0_res = svld1sh_gather_index_s64 (p0, z0, 0)) -+ -+/* -+** ld1sh_gather_5_s64_index: -+** ld1sh z0\.d, p0/z, \[z0\.d, #10\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_5_s64_index, svint64_t, svuint64_t, -+ z0_res = svld1sh_gather_u64base_index_s64 (p0, z0, 5), -+ z0_res = svld1sh_gather_index_s64 (p0, z0, 5)) -+ -+/* -+** ld1sh_gather_31_s64_index: -+** ld1sh z0\.d, p0/z, \[z0\.d, #62\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_31_s64_index, svint64_t, svuint64_t, -+ z0_res = svld1sh_gather_u64base_index_s64 (p0, z0, 31), -+ z0_res = svld1sh_gather_index_s64 (p0, z0, 31)) -+ -+/* -+** ld1sh_gather_32_s64_index: -+** mov (x[0-9]+), #?64 -+** ld1sh z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_32_s64_index, svint64_t, svuint64_t, -+ z0_res = svld1sh_gather_u64base_index_s64 (p0, z0, 32), -+ z0_res = svld1sh_gather_index_s64 (p0, z0, 32)) -+ -+/* -+** ld1sh_gather_x0_s64_s64offset: -+** ld1sh z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_x0_s64_s64offset, svint64_t, int16_t, svint64_t, -+ z0_res = svld1sh_gather_s64offset_s64 (p0, x0, z0), -+ z0_res = svld1sh_gather_offset_s64 (p0, x0, z0)) -+ -+/* -+** ld1sh_gather_tied1_s64_s64offset: -+** ld1sh z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_tied1_s64_s64offset, svint64_t, int16_t, svint64_t, -+ z0_res = svld1sh_gather_s64offset_s64 (p0, x0, z0), -+ z0_res = svld1sh_gather_offset_s64 (p0, x0, z0)) -+ -+/* -+** ld1sh_gather_untied_s64_s64offset: -+** ld1sh z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_untied_s64_s64offset, svint64_t, int16_t, svint64_t, -+ z0_res = svld1sh_gather_s64offset_s64 (p0, x0, z1), -+ z0_res = svld1sh_gather_offset_s64 (p0, x0, z1)) -+ -+/* -+** ld1sh_gather_ext_s64_s64offset: -+** ld1sh z0\.d, p0/z, \[x0, z1\.d, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_ext_s64_s64offset, svint64_t, int16_t, svint64_t, -+ z0_res = svld1sh_gather_s64offset_s64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svld1sh_gather_offset_s64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ld1sh_gather_x0_s64_u64offset: -+** ld1sh z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_x0_s64_u64offset, svint64_t, int16_t, svuint64_t, -+ z0_res = svld1sh_gather_u64offset_s64 (p0, x0, z0), -+ z0_res = svld1sh_gather_offset_s64 (p0, x0, z0)) -+ -+/* -+** ld1sh_gather_tied1_s64_u64offset: -+** ld1sh z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_tied1_s64_u64offset, svint64_t, int16_t, svuint64_t, -+ z0_res = svld1sh_gather_u64offset_s64 (p0, x0, z0), -+ z0_res = svld1sh_gather_offset_s64 (p0, x0, z0)) -+ -+/* -+** ld1sh_gather_untied_s64_u64offset: -+** ld1sh z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_untied_s64_u64offset, svint64_t, int16_t, svuint64_t, -+ z0_res = svld1sh_gather_u64offset_s64 (p0, x0, z1), -+ z0_res = svld1sh_gather_offset_s64 (p0, x0, z1)) -+ -+/* -+** ld1sh_gather_ext_s64_u64offset: -+** ld1sh z0\.d, p0/z, \[x0, z1\.d, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_ext_s64_u64offset, svint64_t, int16_t, svuint64_t, -+ z0_res = svld1sh_gather_u64offset_s64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svld1sh_gather_offset_s64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ld1sh_gather_x0_s64_s64index: -+** ld1sh z0\.d, p0/z, \[x0, z0\.d, lsl 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_x0_s64_s64index, svint64_t, int16_t, svint64_t, -+ z0_res = svld1sh_gather_s64index_s64 (p0, x0, z0), -+ z0_res = svld1sh_gather_index_s64 (p0, x0, z0)) -+ -+/* -+** ld1sh_gather_tied1_s64_s64index: -+** ld1sh z0\.d, p0/z, \[x0, z0\.d, lsl 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_tied1_s64_s64index, svint64_t, int16_t, svint64_t, -+ z0_res = svld1sh_gather_s64index_s64 (p0, x0, z0), -+ z0_res = svld1sh_gather_index_s64 (p0, x0, z0)) -+ -+/* -+** ld1sh_gather_untied_s64_s64index: -+** ld1sh z0\.d, p0/z, \[x0, z1\.d, lsl 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_untied_s64_s64index, svint64_t, int16_t, svint64_t, -+ z0_res = svld1sh_gather_s64index_s64 (p0, x0, z1), -+ z0_res = svld1sh_gather_index_s64 (p0, x0, z1)) -+ -+/* -+** ld1sh_gather_ext_s64_s64index: -+** ld1sh z0\.d, p0/z, \[x0, z1\.d, sxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_ext_s64_s64index, svint64_t, int16_t, svint64_t, -+ z0_res = svld1sh_gather_s64index_s64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svld1sh_gather_index_s64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ld1sh_gather_x0_s64_u64index: -+** ld1sh z0\.d, p0/z, \[x0, z0\.d, lsl 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_x0_s64_u64index, svint64_t, int16_t, svuint64_t, -+ z0_res = svld1sh_gather_u64index_s64 (p0, x0, z0), -+ z0_res = svld1sh_gather_index_s64 (p0, x0, z0)) -+ -+/* -+** ld1sh_gather_tied1_s64_u64index: -+** ld1sh z0\.d, p0/z, \[x0, z0\.d, lsl 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_tied1_s64_u64index, svint64_t, int16_t, svuint64_t, -+ z0_res = svld1sh_gather_u64index_s64 (p0, x0, z0), -+ z0_res = svld1sh_gather_index_s64 (p0, x0, z0)) -+ -+/* -+** ld1sh_gather_untied_s64_u64index: -+** ld1sh z0\.d, p0/z, \[x0, z1\.d, lsl 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_untied_s64_u64index, svint64_t, int16_t, svuint64_t, -+ z0_res = svld1sh_gather_u64index_s64 (p0, x0, z1), -+ z0_res = svld1sh_gather_index_s64 (p0, x0, z1)) -+ -+/* -+** ld1sh_gather_ext_s64_u64index: -+** ld1sh z0\.d, p0/z, \[x0, z1\.d, uxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_ext_s64_u64index, svint64_t, int16_t, svuint64_t, -+ z0_res = svld1sh_gather_u64index_s64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svld1sh_gather_index_s64 (p0, x0, svextw_x (p0, z1))) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_u32.c -new file mode 100644 -index 000000000..e3a85a23f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_u32.c -@@ -0,0 +1,252 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1sh_gather_u32_tied1: -+** ld1sh z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_u32_tied1, svuint32_t, svuint32_t, -+ z0_res = svld1sh_gather_u32base_u32 (p0, z0), -+ z0_res = svld1sh_gather_u32 (p0, z0)) -+ -+/* -+** ld1sh_gather_u32_untied: -+** ld1sh z0\.s, p0/z, \[z1\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_u32_untied, svuint32_t, svuint32_t, -+ z0_res = svld1sh_gather_u32base_u32 (p0, z1), -+ z0_res = svld1sh_gather_u32 (p0, z1)) -+ -+/* -+** ld1sh_gather_x0_u32_offset: -+** ld1sh z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_x0_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svld1sh_gather_u32base_offset_u32 (p0, z0, x0), -+ z0_res = svld1sh_gather_offset_u32 (p0, z0, x0)) -+ -+/* -+** ld1sh_gather_m2_u32_offset: -+** mov (x[0-9]+), #?-2 -+** ld1sh z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_m2_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svld1sh_gather_u32base_offset_u32 (p0, z0, -2), -+ z0_res = svld1sh_gather_offset_u32 (p0, z0, -2)) -+ -+/* -+** ld1sh_gather_0_u32_offset: -+** ld1sh z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_0_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svld1sh_gather_u32base_offset_u32 (p0, z0, 0), -+ z0_res = svld1sh_gather_offset_u32 (p0, z0, 0)) -+ -+/* -+** ld1sh_gather_5_u32_offset: -+** mov (x[0-9]+), #?5 -+** ld1sh z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_5_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svld1sh_gather_u32base_offset_u32 (p0, z0, 5), -+ z0_res = svld1sh_gather_offset_u32 (p0, z0, 5)) -+ -+/* -+** ld1sh_gather_6_u32_offset: -+** ld1sh z0\.s, p0/z, \[z0\.s, #6\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_6_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svld1sh_gather_u32base_offset_u32 (p0, z0, 6), -+ z0_res = svld1sh_gather_offset_u32 (p0, z0, 6)) -+ -+/* -+** ld1sh_gather_62_u32_offset: -+** ld1sh z0\.s, p0/z, \[z0\.s, #62\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_62_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svld1sh_gather_u32base_offset_u32 (p0, z0, 62), -+ z0_res = svld1sh_gather_offset_u32 (p0, z0, 62)) -+ -+/* -+** ld1sh_gather_64_u32_offset: -+** mov (x[0-9]+), #?64 -+** ld1sh z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_64_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svld1sh_gather_u32base_offset_u32 (p0, z0, 64), -+ z0_res = svld1sh_gather_offset_u32 (p0, z0, 64)) -+ -+/* -+** ld1sh_gather_x0_u32_index: -+** lsl (x[0-9]+), x0, #?1 -+** ld1sh z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_x0_u32_index, svuint32_t, svuint32_t, -+ z0_res = svld1sh_gather_u32base_index_u32 (p0, z0, x0), -+ z0_res = svld1sh_gather_index_u32 (p0, z0, x0)) -+ -+/* -+** ld1sh_gather_m1_u32_index: -+** mov (x[0-9]+), #?-2 -+** ld1sh z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_m1_u32_index, svuint32_t, svuint32_t, -+ z0_res = svld1sh_gather_u32base_index_u32 (p0, z0, -1), -+ z0_res = svld1sh_gather_index_u32 (p0, z0, -1)) -+ -+/* -+** ld1sh_gather_0_u32_index: -+** ld1sh z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_0_u32_index, svuint32_t, svuint32_t, -+ z0_res = svld1sh_gather_u32base_index_u32 (p0, z0, 0), -+ z0_res = svld1sh_gather_index_u32 (p0, z0, 0)) -+ -+/* -+** ld1sh_gather_5_u32_index: -+** ld1sh z0\.s, p0/z, \[z0\.s, #10\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_5_u32_index, svuint32_t, svuint32_t, -+ z0_res = svld1sh_gather_u32base_index_u32 (p0, z0, 5), -+ z0_res = svld1sh_gather_index_u32 (p0, z0, 5)) -+ -+/* -+** ld1sh_gather_31_u32_index: -+** ld1sh z0\.s, p0/z, \[z0\.s, #62\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_31_u32_index, svuint32_t, svuint32_t, -+ z0_res = svld1sh_gather_u32base_index_u32 (p0, z0, 31), -+ z0_res = svld1sh_gather_index_u32 (p0, z0, 31)) -+ -+/* -+** ld1sh_gather_32_u32_index: -+** mov (x[0-9]+), #?64 -+** ld1sh z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_32_u32_index, svuint32_t, svuint32_t, -+ z0_res = svld1sh_gather_u32base_index_u32 (p0, z0, 32), -+ z0_res = svld1sh_gather_index_u32 (p0, z0, 32)) -+ -+/* -+** ld1sh_gather_x0_u32_s32offset: -+** ld1sh z0\.s, p0/z, \[x0, z0\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_x0_u32_s32offset, svuint32_t, int16_t, svint32_t, -+ z0_res = svld1sh_gather_s32offset_u32 (p0, x0, z0), -+ z0_res = svld1sh_gather_offset_u32 (p0, x0, z0)) -+ -+/* -+** ld1sh_gather_tied1_u32_s32offset: -+** ld1sh z0\.s, p0/z, \[x0, z0\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_tied1_u32_s32offset, svuint32_t, int16_t, svint32_t, -+ z0_res = svld1sh_gather_s32offset_u32 (p0, x0, z0), -+ z0_res = svld1sh_gather_offset_u32 (p0, x0, z0)) -+ -+/* -+** ld1sh_gather_untied_u32_s32offset: -+** ld1sh z0\.s, p0/z, \[x0, z1\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_untied_u32_s32offset, svuint32_t, int16_t, svint32_t, -+ z0_res = svld1sh_gather_s32offset_u32 (p0, x0, z1), -+ z0_res = svld1sh_gather_offset_u32 (p0, x0, z1)) -+ -+/* -+** ld1sh_gather_x0_u32_u32offset: -+** ld1sh z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_x0_u32_u32offset, svuint32_t, int16_t, svuint32_t, -+ z0_res = svld1sh_gather_u32offset_u32 (p0, x0, z0), -+ z0_res = svld1sh_gather_offset_u32 (p0, x0, z0)) -+ -+/* -+** ld1sh_gather_tied1_u32_u32offset: -+** ld1sh z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_tied1_u32_u32offset, svuint32_t, int16_t, svuint32_t, -+ z0_res = svld1sh_gather_u32offset_u32 (p0, x0, z0), -+ z0_res = svld1sh_gather_offset_u32 (p0, x0, z0)) -+ -+/* -+** ld1sh_gather_untied_u32_u32offset: -+** ld1sh z0\.s, p0/z, \[x0, z1\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_untied_u32_u32offset, svuint32_t, int16_t, svuint32_t, -+ z0_res = svld1sh_gather_u32offset_u32 (p0, x0, z1), -+ z0_res = svld1sh_gather_offset_u32 (p0, x0, z1)) -+ -+/* -+** ld1sh_gather_x0_u32_s32index: -+** ld1sh z0\.s, p0/z, \[x0, z0\.s, sxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_x0_u32_s32index, svuint32_t, int16_t, svint32_t, -+ z0_res = svld1sh_gather_s32index_u32 (p0, x0, z0), -+ z0_res = svld1sh_gather_index_u32 (p0, x0, z0)) -+ -+/* -+** ld1sh_gather_tied1_u32_s32index: -+** ld1sh z0\.s, p0/z, \[x0, z0\.s, sxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_tied1_u32_s32index, svuint32_t, int16_t, svint32_t, -+ z0_res = svld1sh_gather_s32index_u32 (p0, x0, z0), -+ z0_res = svld1sh_gather_index_u32 (p0, x0, z0)) -+ -+/* -+** ld1sh_gather_untied_u32_s32index: -+** ld1sh z0\.s, p0/z, \[x0, z1\.s, sxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_untied_u32_s32index, svuint32_t, int16_t, svint32_t, -+ z0_res = svld1sh_gather_s32index_u32 (p0, x0, z1), -+ z0_res = svld1sh_gather_index_u32 (p0, x0, z1)) -+ -+/* -+** ld1sh_gather_x0_u32_u32index: -+** ld1sh z0\.s, p0/z, \[x0, z0\.s, uxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_x0_u32_u32index, svuint32_t, int16_t, svuint32_t, -+ z0_res = svld1sh_gather_u32index_u32 (p0, x0, z0), -+ z0_res = svld1sh_gather_index_u32 (p0, x0, z0)) -+ -+/* -+** ld1sh_gather_tied1_u32_u32index: -+** ld1sh z0\.s, p0/z, \[x0, z0\.s, uxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_tied1_u32_u32index, svuint32_t, int16_t, svuint32_t, -+ z0_res = svld1sh_gather_u32index_u32 (p0, x0, z0), -+ z0_res = svld1sh_gather_index_u32 (p0, x0, z0)) -+ -+/* -+** ld1sh_gather_untied_u32_u32index: -+** ld1sh z0\.s, p0/z, \[x0, z1\.s, uxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_untied_u32_u32index, svuint32_t, int16_t, svuint32_t, -+ z0_res = svld1sh_gather_u32index_u32 (p0, x0, z1), -+ z0_res = svld1sh_gather_index_u32 (p0, x0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_u64.c -new file mode 100644 -index 000000000..3a0094fba ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_gather_u64.c -@@ -0,0 +1,288 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1sh_gather_u64_tied1: -+** ld1sh z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_u64_tied1, svuint64_t, svuint64_t, -+ z0_res = svld1sh_gather_u64base_u64 (p0, z0), -+ z0_res = svld1sh_gather_u64 (p0, z0)) -+ -+/* -+** ld1sh_gather_u64_untied: -+** ld1sh z0\.d, p0/z, \[z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_u64_untied, svuint64_t, svuint64_t, -+ z0_res = svld1sh_gather_u64base_u64 (p0, z1), -+ z0_res = svld1sh_gather_u64 (p0, z1)) -+ -+/* -+** ld1sh_gather_x0_u64_offset: -+** ld1sh z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_x0_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1sh_gather_u64base_offset_u64 (p0, z0, x0), -+ z0_res = svld1sh_gather_offset_u64 (p0, z0, x0)) -+ -+/* -+** ld1sh_gather_m2_u64_offset: -+** mov (x[0-9]+), #?-2 -+** ld1sh z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_m2_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1sh_gather_u64base_offset_u64 (p0, z0, -2), -+ z0_res = svld1sh_gather_offset_u64 (p0, z0, -2)) -+ -+/* -+** ld1sh_gather_0_u64_offset: -+** ld1sh z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_0_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1sh_gather_u64base_offset_u64 (p0, z0, 0), -+ z0_res = svld1sh_gather_offset_u64 (p0, z0, 0)) -+ -+/* -+** ld1sh_gather_5_u64_offset: -+** mov (x[0-9]+), #?5 -+** ld1sh z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_5_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1sh_gather_u64base_offset_u64 (p0, z0, 5), -+ z0_res = svld1sh_gather_offset_u64 (p0, z0, 5)) -+ -+/* -+** ld1sh_gather_6_u64_offset: -+** ld1sh z0\.d, p0/z, \[z0\.d, #6\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_6_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1sh_gather_u64base_offset_u64 (p0, z0, 6), -+ z0_res = svld1sh_gather_offset_u64 (p0, z0, 6)) -+ -+/* -+** ld1sh_gather_62_u64_offset: -+** ld1sh z0\.d, p0/z, \[z0\.d, #62\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_62_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1sh_gather_u64base_offset_u64 (p0, z0, 62), -+ z0_res = svld1sh_gather_offset_u64 (p0, z0, 62)) -+ -+/* -+** ld1sh_gather_64_u64_offset: -+** mov (x[0-9]+), #?64 -+** ld1sh z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_64_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1sh_gather_u64base_offset_u64 (p0, z0, 64), -+ z0_res = svld1sh_gather_offset_u64 (p0, z0, 64)) -+ -+/* -+** ld1sh_gather_x0_u64_index: -+** lsl (x[0-9]+), x0, #?1 -+** ld1sh z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_x0_u64_index, svuint64_t, svuint64_t, -+ z0_res = svld1sh_gather_u64base_index_u64 (p0, z0, x0), -+ z0_res = svld1sh_gather_index_u64 (p0, z0, x0)) -+ -+/* -+** ld1sh_gather_m1_u64_index: -+** mov (x[0-9]+), #?-2 -+** ld1sh z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_m1_u64_index, svuint64_t, svuint64_t, -+ z0_res = svld1sh_gather_u64base_index_u64 (p0, z0, -1), -+ z0_res = svld1sh_gather_index_u64 (p0, z0, -1)) -+ -+/* -+** ld1sh_gather_0_u64_index: -+** ld1sh z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_0_u64_index, svuint64_t, svuint64_t, -+ z0_res = svld1sh_gather_u64base_index_u64 (p0, z0, 0), -+ z0_res = svld1sh_gather_index_u64 (p0, z0, 0)) -+ -+/* -+** ld1sh_gather_5_u64_index: -+** ld1sh z0\.d, p0/z, \[z0\.d, #10\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_5_u64_index, svuint64_t, svuint64_t, -+ z0_res = svld1sh_gather_u64base_index_u64 (p0, z0, 5), -+ z0_res = svld1sh_gather_index_u64 (p0, z0, 5)) -+ -+/* -+** ld1sh_gather_31_u64_index: -+** ld1sh z0\.d, p0/z, \[z0\.d, #62\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_31_u64_index, svuint64_t, svuint64_t, -+ z0_res = svld1sh_gather_u64base_index_u64 (p0, z0, 31), -+ z0_res = svld1sh_gather_index_u64 (p0, z0, 31)) -+ -+/* -+** ld1sh_gather_32_u64_index: -+** mov (x[0-9]+), #?64 -+** ld1sh z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sh_gather_32_u64_index, svuint64_t, svuint64_t, -+ z0_res = svld1sh_gather_u64base_index_u64 (p0, z0, 32), -+ z0_res = svld1sh_gather_index_u64 (p0, z0, 32)) -+ -+/* -+** ld1sh_gather_x0_u64_s64offset: -+** ld1sh z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_x0_u64_s64offset, svuint64_t, int16_t, svint64_t, -+ z0_res = svld1sh_gather_s64offset_u64 (p0, x0, z0), -+ z0_res = svld1sh_gather_offset_u64 (p0, x0, z0)) -+ -+/* -+** ld1sh_gather_tied1_u64_s64offset: -+** ld1sh z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_tied1_u64_s64offset, svuint64_t, int16_t, svint64_t, -+ z0_res = svld1sh_gather_s64offset_u64 (p0, x0, z0), -+ z0_res = svld1sh_gather_offset_u64 (p0, x0, z0)) -+ -+/* -+** ld1sh_gather_untied_u64_s64offset: -+** ld1sh z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_untied_u64_s64offset, svuint64_t, int16_t, svint64_t, -+ z0_res = svld1sh_gather_s64offset_u64 (p0, x0, z1), -+ z0_res = svld1sh_gather_offset_u64 (p0, x0, z1)) -+ -+/* -+** ld1sh_gather_ext_u64_s64offset: -+** ld1sh z0\.d, p0/z, \[x0, z1\.d, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_ext_u64_s64offset, svuint64_t, int16_t, svint64_t, -+ z0_res = svld1sh_gather_s64offset_u64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svld1sh_gather_offset_u64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ld1sh_gather_x0_u64_u64offset: -+** ld1sh z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_x0_u64_u64offset, svuint64_t, int16_t, svuint64_t, -+ z0_res = svld1sh_gather_u64offset_u64 (p0, x0, z0), -+ z0_res = svld1sh_gather_offset_u64 (p0, x0, z0)) -+ -+/* -+** ld1sh_gather_tied1_u64_u64offset: -+** ld1sh z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_tied1_u64_u64offset, svuint64_t, int16_t, svuint64_t, -+ z0_res = svld1sh_gather_u64offset_u64 (p0, x0, z0), -+ z0_res = svld1sh_gather_offset_u64 (p0, x0, z0)) -+ -+/* -+** ld1sh_gather_untied_u64_u64offset: -+** ld1sh z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_untied_u64_u64offset, svuint64_t, int16_t, svuint64_t, -+ z0_res = svld1sh_gather_u64offset_u64 (p0, x0, z1), -+ z0_res = svld1sh_gather_offset_u64 (p0, x0, z1)) -+ -+/* -+** ld1sh_gather_ext_u64_u64offset: -+** ld1sh z0\.d, p0/z, \[x0, z1\.d, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_ext_u64_u64offset, svuint64_t, int16_t, svuint64_t, -+ z0_res = svld1sh_gather_u64offset_u64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svld1sh_gather_offset_u64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ld1sh_gather_x0_u64_s64index: -+** ld1sh z0\.d, p0/z, \[x0, z0\.d, lsl 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_x0_u64_s64index, svuint64_t, int16_t, svint64_t, -+ z0_res = svld1sh_gather_s64index_u64 (p0, x0, z0), -+ z0_res = svld1sh_gather_index_u64 (p0, x0, z0)) -+ -+/* -+** ld1sh_gather_tied1_u64_s64index: -+** ld1sh z0\.d, p0/z, \[x0, z0\.d, lsl 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_tied1_u64_s64index, svuint64_t, int16_t, svint64_t, -+ z0_res = svld1sh_gather_s64index_u64 (p0, x0, z0), -+ z0_res = svld1sh_gather_index_u64 (p0, x0, z0)) -+ -+/* -+** ld1sh_gather_untied_u64_s64index: -+** ld1sh z0\.d, p0/z, \[x0, z1\.d, lsl 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_untied_u64_s64index, svuint64_t, int16_t, svint64_t, -+ z0_res = svld1sh_gather_s64index_u64 (p0, x0, z1), -+ z0_res = svld1sh_gather_index_u64 (p0, x0, z1)) -+ -+/* -+** ld1sh_gather_ext_u64_s64index: -+** ld1sh z0\.d, p0/z, \[x0, z1\.d, sxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_ext_u64_s64index, svuint64_t, int16_t, svint64_t, -+ z0_res = svld1sh_gather_s64index_u64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svld1sh_gather_index_u64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ld1sh_gather_x0_u64_u64index: -+** ld1sh z0\.d, p0/z, \[x0, z0\.d, lsl 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_x0_u64_u64index, svuint64_t, int16_t, svuint64_t, -+ z0_res = svld1sh_gather_u64index_u64 (p0, x0, z0), -+ z0_res = svld1sh_gather_index_u64 (p0, x0, z0)) -+ -+/* -+** ld1sh_gather_tied1_u64_u64index: -+** ld1sh z0\.d, p0/z, \[x0, z0\.d, lsl 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_tied1_u64_u64index, svuint64_t, int16_t, svuint64_t, -+ z0_res = svld1sh_gather_u64index_u64 (p0, x0, z0), -+ z0_res = svld1sh_gather_index_u64 (p0, x0, z0)) -+ -+/* -+** ld1sh_gather_untied_u64_u64index: -+** ld1sh z0\.d, p0/z, \[x0, z1\.d, lsl 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_untied_u64_u64index, svuint64_t, int16_t, svuint64_t, -+ z0_res = svld1sh_gather_u64index_u64 (p0, x0, z1), -+ z0_res = svld1sh_gather_index_u64 (p0, x0, z1)) -+ -+/* -+** ld1sh_gather_ext_u64_u64index: -+** ld1sh z0\.d, p0/z, \[x0, z1\.d, uxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sh_gather_ext_u64_u64index, svuint64_t, int16_t, svuint64_t, -+ z0_res = svld1sh_gather_u64index_u64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svld1sh_gather_index_u64 (p0, x0, svextw_x (p0, z1))) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_s32.c -new file mode 100644 -index 000000000..8614f52c5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_s32.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1sh_s32_base: -+** ld1sh z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sh_s32_base, svint32_t, int16_t, -+ z0 = svld1sh_s32 (p0, x0), -+ z0 = svld1sh_s32 (p0, x0)) -+ -+/* -+** ld1sh_s32_index: -+** ld1sh z0\.s, p0/z, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_LOAD (ld1sh_s32_index, svint32_t, int16_t, -+ z0 = svld1sh_s32 (p0, x0 + x1), -+ z0 = svld1sh_s32 (p0, x0 + x1)) -+ -+/* -+** ld1sh_s32_1: -+** ld1sh z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sh_s32_1, svint32_t, int16_t, -+ z0 = svld1sh_s32 (p0, x0 + svcntw ()), -+ z0 = svld1sh_s32 (p0, x0 + svcntw ())) -+ -+/* -+** ld1sh_s32_7: -+** ld1sh z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sh_s32_7, svint32_t, int16_t, -+ z0 = svld1sh_s32 (p0, x0 + svcntw () * 7), -+ z0 = svld1sh_s32 (p0, x0 + svcntw () * 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1sh_s32_8: -+** incb x0, all, mul #4 -+** ld1sh z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sh_s32_8, svint32_t, int16_t, -+ z0 = svld1sh_s32 (p0, x0 + svcntw () * 8), -+ z0 = svld1sh_s32 (p0, x0 + svcntw () * 8)) -+ -+/* -+** ld1sh_s32_m1: -+** ld1sh z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sh_s32_m1, svint32_t, int16_t, -+ z0 = svld1sh_s32 (p0, x0 - svcntw ()), -+ z0 = svld1sh_s32 (p0, x0 - svcntw ())) -+ -+/* -+** ld1sh_s32_m8: -+** ld1sh z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sh_s32_m8, svint32_t, int16_t, -+ z0 = svld1sh_s32 (p0, x0 - svcntw () * 8), -+ z0 = svld1sh_s32 (p0, x0 - svcntw () * 8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1sh_s32_m9: -+** dech x0, all, mul #9 -+** ld1sh z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sh_s32_m9, svint32_t, int16_t, -+ z0 = svld1sh_s32 (p0, x0 - svcntw () * 9), -+ z0 = svld1sh_s32 (p0, x0 - svcntw () * 9)) -+ -+/* -+** ld1sh_vnum_s32_0: -+** ld1sh z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sh_vnum_s32_0, svint32_t, int16_t, -+ z0 = svld1sh_vnum_s32 (p0, x0, 0), -+ z0 = svld1sh_vnum_s32 (p0, x0, 0)) -+ -+/* -+** ld1sh_vnum_s32_1: -+** ld1sh z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sh_vnum_s32_1, svint32_t, int16_t, -+ z0 = svld1sh_vnum_s32 (p0, x0, 1), -+ z0 = svld1sh_vnum_s32 (p0, x0, 1)) -+ -+/* -+** ld1sh_vnum_s32_7: -+** ld1sh z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sh_vnum_s32_7, svint32_t, int16_t, -+ z0 = svld1sh_vnum_s32 (p0, x0, 7), -+ z0 = svld1sh_vnum_s32 (p0, x0, 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1sh_vnum_s32_8: -+** incb x0, all, mul #4 -+** ld1sh z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sh_vnum_s32_8, svint32_t, int16_t, -+ z0 = svld1sh_vnum_s32 (p0, x0, 8), -+ z0 = svld1sh_vnum_s32 (p0, x0, 8)) -+ -+/* -+** ld1sh_vnum_s32_m1: -+** ld1sh z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sh_vnum_s32_m1, svint32_t, int16_t, -+ z0 = svld1sh_vnum_s32 (p0, x0, -1), -+ z0 = svld1sh_vnum_s32 (p0, x0, -1)) -+ -+/* -+** ld1sh_vnum_s32_m8: -+** ld1sh z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sh_vnum_s32_m8, svint32_t, int16_t, -+ z0 = svld1sh_vnum_s32 (p0, x0, -8), -+ z0 = svld1sh_vnum_s32 (p0, x0, -8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1sh_vnum_s32_m9: -+** dech x0, all, mul #9 -+** ld1sh z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sh_vnum_s32_m9, svint32_t, int16_t, -+ z0 = svld1sh_vnum_s32 (p0, x0, -9), -+ z0 = svld1sh_vnum_s32 (p0, x0, -9)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld1sh_vnum_s32_x1: -+** cnth (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld1sh z0\.s, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld1sh_vnum_s32_x1, svint32_t, int16_t, -+ z0 = svld1sh_vnum_s32 (p0, x0, x1), -+ z0 = svld1sh_vnum_s32 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_s64.c -new file mode 100644 -index 000000000..c02b40a76 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_s64.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1sh_s64_base: -+** ld1sh z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sh_s64_base, svint64_t, int16_t, -+ z0 = svld1sh_s64 (p0, x0), -+ z0 = svld1sh_s64 (p0, x0)) -+ -+/* -+** ld1sh_s64_index: -+** ld1sh z0\.d, p0/z, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_LOAD (ld1sh_s64_index, svint64_t, int16_t, -+ z0 = svld1sh_s64 (p0, x0 + x1), -+ z0 = svld1sh_s64 (p0, x0 + x1)) -+ -+/* -+** ld1sh_s64_1: -+** ld1sh z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sh_s64_1, svint64_t, int16_t, -+ z0 = svld1sh_s64 (p0, x0 + svcntd ()), -+ z0 = svld1sh_s64 (p0, x0 + svcntd ())) -+ -+/* -+** ld1sh_s64_7: -+** ld1sh z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sh_s64_7, svint64_t, int16_t, -+ z0 = svld1sh_s64 (p0, x0 + svcntd () * 7), -+ z0 = svld1sh_s64 (p0, x0 + svcntd () * 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1sh_s64_8: -+** incb x0, all, mul #2 -+** ld1sh z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sh_s64_8, svint64_t, int16_t, -+ z0 = svld1sh_s64 (p0, x0 + svcntd () * 8), -+ z0 = svld1sh_s64 (p0, x0 + svcntd () * 8)) -+ -+/* -+** ld1sh_s64_m1: -+** ld1sh z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sh_s64_m1, svint64_t, int16_t, -+ z0 = svld1sh_s64 (p0, x0 - svcntd ()), -+ z0 = svld1sh_s64 (p0, x0 - svcntd ())) -+ -+/* -+** ld1sh_s64_m8: -+** ld1sh z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sh_s64_m8, svint64_t, int16_t, -+ z0 = svld1sh_s64 (p0, x0 - svcntd () * 8), -+ z0 = svld1sh_s64 (p0, x0 - svcntd () * 8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1sh_s64_m9: -+** decw x0, all, mul #9 -+** ld1sh z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sh_s64_m9, svint64_t, int16_t, -+ z0 = svld1sh_s64 (p0, x0 - svcntd () * 9), -+ z0 = svld1sh_s64 (p0, x0 - svcntd () * 9)) -+ -+/* -+** ld1sh_vnum_s64_0: -+** ld1sh z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sh_vnum_s64_0, svint64_t, int16_t, -+ z0 = svld1sh_vnum_s64 (p0, x0, 0), -+ z0 = svld1sh_vnum_s64 (p0, x0, 0)) -+ -+/* -+** ld1sh_vnum_s64_1: -+** ld1sh z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sh_vnum_s64_1, svint64_t, int16_t, -+ z0 = svld1sh_vnum_s64 (p0, x0, 1), -+ z0 = svld1sh_vnum_s64 (p0, x0, 1)) -+ -+/* -+** ld1sh_vnum_s64_7: -+** ld1sh z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sh_vnum_s64_7, svint64_t, int16_t, -+ z0 = svld1sh_vnum_s64 (p0, x0, 7), -+ z0 = svld1sh_vnum_s64 (p0, x0, 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1sh_vnum_s64_8: -+** incb x0, all, mul #2 -+** ld1sh z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sh_vnum_s64_8, svint64_t, int16_t, -+ z0 = svld1sh_vnum_s64 (p0, x0, 8), -+ z0 = svld1sh_vnum_s64 (p0, x0, 8)) -+ -+/* -+** ld1sh_vnum_s64_m1: -+** ld1sh z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sh_vnum_s64_m1, svint64_t, int16_t, -+ z0 = svld1sh_vnum_s64 (p0, x0, -1), -+ z0 = svld1sh_vnum_s64 (p0, x0, -1)) -+ -+/* -+** ld1sh_vnum_s64_m8: -+** ld1sh z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sh_vnum_s64_m8, svint64_t, int16_t, -+ z0 = svld1sh_vnum_s64 (p0, x0, -8), -+ z0 = svld1sh_vnum_s64 (p0, x0, -8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1sh_vnum_s64_m9: -+** decw x0, all, mul #9 -+** ld1sh z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sh_vnum_s64_m9, svint64_t, int16_t, -+ z0 = svld1sh_vnum_s64 (p0, x0, -9), -+ z0 = svld1sh_vnum_s64 (p0, x0, -9)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld1sh_vnum_s64_x1: -+** cntw (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld1sh z0\.d, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld1sh_vnum_s64_x1, svint64_t, int16_t, -+ z0 = svld1sh_vnum_s64 (p0, x0, x1), -+ z0 = svld1sh_vnum_s64 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_u32.c -new file mode 100644 -index 000000000..ead96174a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_u32.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1sh_u32_base: -+** ld1sh z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sh_u32_base, svuint32_t, int16_t, -+ z0 = svld1sh_u32 (p0, x0), -+ z0 = svld1sh_u32 (p0, x0)) -+ -+/* -+** ld1sh_u32_index: -+** ld1sh z0\.s, p0/z, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_LOAD (ld1sh_u32_index, svuint32_t, int16_t, -+ z0 = svld1sh_u32 (p0, x0 + x1), -+ z0 = svld1sh_u32 (p0, x0 + x1)) -+ -+/* -+** ld1sh_u32_1: -+** ld1sh z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sh_u32_1, svuint32_t, int16_t, -+ z0 = svld1sh_u32 (p0, x0 + svcntw ()), -+ z0 = svld1sh_u32 (p0, x0 + svcntw ())) -+ -+/* -+** ld1sh_u32_7: -+** ld1sh z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sh_u32_7, svuint32_t, int16_t, -+ z0 = svld1sh_u32 (p0, x0 + svcntw () * 7), -+ z0 = svld1sh_u32 (p0, x0 + svcntw () * 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1sh_u32_8: -+** incb x0, all, mul #4 -+** ld1sh z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sh_u32_8, svuint32_t, int16_t, -+ z0 = svld1sh_u32 (p0, x0 + svcntw () * 8), -+ z0 = svld1sh_u32 (p0, x0 + svcntw () * 8)) -+ -+/* -+** ld1sh_u32_m1: -+** ld1sh z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sh_u32_m1, svuint32_t, int16_t, -+ z0 = svld1sh_u32 (p0, x0 - svcntw ()), -+ z0 = svld1sh_u32 (p0, x0 - svcntw ())) -+ -+/* -+** ld1sh_u32_m8: -+** ld1sh z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sh_u32_m8, svuint32_t, int16_t, -+ z0 = svld1sh_u32 (p0, x0 - svcntw () * 8), -+ z0 = svld1sh_u32 (p0, x0 - svcntw () * 8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1sh_u32_m9: -+** dech x0, all, mul #9 -+** ld1sh z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sh_u32_m9, svuint32_t, int16_t, -+ z0 = svld1sh_u32 (p0, x0 - svcntw () * 9), -+ z0 = svld1sh_u32 (p0, x0 - svcntw () * 9)) -+ -+/* -+** ld1sh_vnum_u32_0: -+** ld1sh z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sh_vnum_u32_0, svuint32_t, int16_t, -+ z0 = svld1sh_vnum_u32 (p0, x0, 0), -+ z0 = svld1sh_vnum_u32 (p0, x0, 0)) -+ -+/* -+** ld1sh_vnum_u32_1: -+** ld1sh z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sh_vnum_u32_1, svuint32_t, int16_t, -+ z0 = svld1sh_vnum_u32 (p0, x0, 1), -+ z0 = svld1sh_vnum_u32 (p0, x0, 1)) -+ -+/* -+** ld1sh_vnum_u32_7: -+** ld1sh z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sh_vnum_u32_7, svuint32_t, int16_t, -+ z0 = svld1sh_vnum_u32 (p0, x0, 7), -+ z0 = svld1sh_vnum_u32 (p0, x0, 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1sh_vnum_u32_8: -+** incb x0, all, mul #4 -+** ld1sh z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sh_vnum_u32_8, svuint32_t, int16_t, -+ z0 = svld1sh_vnum_u32 (p0, x0, 8), -+ z0 = svld1sh_vnum_u32 (p0, x0, 8)) -+ -+/* -+** ld1sh_vnum_u32_m1: -+** ld1sh z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sh_vnum_u32_m1, svuint32_t, int16_t, -+ z0 = svld1sh_vnum_u32 (p0, x0, -1), -+ z0 = svld1sh_vnum_u32 (p0, x0, -1)) -+ -+/* -+** ld1sh_vnum_u32_m8: -+** ld1sh z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sh_vnum_u32_m8, svuint32_t, int16_t, -+ z0 = svld1sh_vnum_u32 (p0, x0, -8), -+ z0 = svld1sh_vnum_u32 (p0, x0, -8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1sh_vnum_u32_m9: -+** dech x0, all, mul #9 -+** ld1sh z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sh_vnum_u32_m9, svuint32_t, int16_t, -+ z0 = svld1sh_vnum_u32 (p0, x0, -9), -+ z0 = svld1sh_vnum_u32 (p0, x0, -9)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld1sh_vnum_u32_x1: -+** cnth (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld1sh z0\.s, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld1sh_vnum_u32_x1, svuint32_t, int16_t, -+ z0 = svld1sh_vnum_u32 (p0, x0, x1), -+ z0 = svld1sh_vnum_u32 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_u64.c -new file mode 100644 -index 000000000..e407a08a6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sh_u64.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1sh_u64_base: -+** ld1sh z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sh_u64_base, svuint64_t, int16_t, -+ z0 = svld1sh_u64 (p0, x0), -+ z0 = svld1sh_u64 (p0, x0)) -+ -+/* -+** ld1sh_u64_index: -+** ld1sh z0\.d, p0/z, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_LOAD (ld1sh_u64_index, svuint64_t, int16_t, -+ z0 = svld1sh_u64 (p0, x0 + x1), -+ z0 = svld1sh_u64 (p0, x0 + x1)) -+ -+/* -+** ld1sh_u64_1: -+** ld1sh z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sh_u64_1, svuint64_t, int16_t, -+ z0 = svld1sh_u64 (p0, x0 + svcntd ()), -+ z0 = svld1sh_u64 (p0, x0 + svcntd ())) -+ -+/* -+** ld1sh_u64_7: -+** ld1sh z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sh_u64_7, svuint64_t, int16_t, -+ z0 = svld1sh_u64 (p0, x0 + svcntd () * 7), -+ z0 = svld1sh_u64 (p0, x0 + svcntd () * 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1sh_u64_8: -+** incb x0, all, mul #2 -+** ld1sh z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sh_u64_8, svuint64_t, int16_t, -+ z0 = svld1sh_u64 (p0, x0 + svcntd () * 8), -+ z0 = svld1sh_u64 (p0, x0 + svcntd () * 8)) -+ -+/* -+** ld1sh_u64_m1: -+** ld1sh z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sh_u64_m1, svuint64_t, int16_t, -+ z0 = svld1sh_u64 (p0, x0 - svcntd ()), -+ z0 = svld1sh_u64 (p0, x0 - svcntd ())) -+ -+/* -+** ld1sh_u64_m8: -+** ld1sh z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sh_u64_m8, svuint64_t, int16_t, -+ z0 = svld1sh_u64 (p0, x0 - svcntd () * 8), -+ z0 = svld1sh_u64 (p0, x0 - svcntd () * 8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1sh_u64_m9: -+** decw x0, all, mul #9 -+** ld1sh z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sh_u64_m9, svuint64_t, int16_t, -+ z0 = svld1sh_u64 (p0, x0 - svcntd () * 9), -+ z0 = svld1sh_u64 (p0, x0 - svcntd () * 9)) -+ -+/* -+** ld1sh_vnum_u64_0: -+** ld1sh z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sh_vnum_u64_0, svuint64_t, int16_t, -+ z0 = svld1sh_vnum_u64 (p0, x0, 0), -+ z0 = svld1sh_vnum_u64 (p0, x0, 0)) -+ -+/* -+** ld1sh_vnum_u64_1: -+** ld1sh z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sh_vnum_u64_1, svuint64_t, int16_t, -+ z0 = svld1sh_vnum_u64 (p0, x0, 1), -+ z0 = svld1sh_vnum_u64 (p0, x0, 1)) -+ -+/* -+** ld1sh_vnum_u64_7: -+** ld1sh z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sh_vnum_u64_7, svuint64_t, int16_t, -+ z0 = svld1sh_vnum_u64 (p0, x0, 7), -+ z0 = svld1sh_vnum_u64 (p0, x0, 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1sh_vnum_u64_8: -+** incb x0, all, mul #2 -+** ld1sh z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sh_vnum_u64_8, svuint64_t, int16_t, -+ z0 = svld1sh_vnum_u64 (p0, x0, 8), -+ z0 = svld1sh_vnum_u64 (p0, x0, 8)) -+ -+/* -+** ld1sh_vnum_u64_m1: -+** ld1sh z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sh_vnum_u64_m1, svuint64_t, int16_t, -+ z0 = svld1sh_vnum_u64 (p0, x0, -1), -+ z0 = svld1sh_vnum_u64 (p0, x0, -1)) -+ -+/* -+** ld1sh_vnum_u64_m8: -+** ld1sh z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sh_vnum_u64_m8, svuint64_t, int16_t, -+ z0 = svld1sh_vnum_u64 (p0, x0, -8), -+ z0 = svld1sh_vnum_u64 (p0, x0, -8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1sh_vnum_u64_m9: -+** decw x0, all, mul #9 -+** ld1sh z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sh_vnum_u64_m9, svuint64_t, int16_t, -+ z0 = svld1sh_vnum_u64 (p0, x0, -9), -+ z0 = svld1sh_vnum_u64 (p0, x0, -9)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld1sh_vnum_u64_x1: -+** cntw (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld1sh z0\.d, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld1sh_vnum_u64_x1, svuint64_t, int16_t, -+ z0 = svld1sh_vnum_u64 (p0, x0, x1), -+ z0 = svld1sh_vnum_u64 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sw_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sw_gather_s64.c -new file mode 100644 -index 000000000..4d076b486 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sw_gather_s64.c -@@ -0,0 +1,308 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1sw_gather_s64_tied1: -+** ld1sw z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sw_gather_s64_tied1, svint64_t, svuint64_t, -+ z0_res = svld1sw_gather_u64base_s64 (p0, z0), -+ z0_res = svld1sw_gather_s64 (p0, z0)) -+ -+/* -+** ld1sw_gather_s64_untied: -+** ld1sw z0\.d, p0/z, \[z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sw_gather_s64_untied, svint64_t, svuint64_t, -+ z0_res = svld1sw_gather_u64base_s64 (p0, z1), -+ z0_res = svld1sw_gather_s64 (p0, z1)) -+ -+/* -+** ld1sw_gather_x0_s64_offset: -+** ld1sw z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sw_gather_x0_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1sw_gather_u64base_offset_s64 (p0, z0, x0), -+ z0_res = svld1sw_gather_offset_s64 (p0, z0, x0)) -+ -+/* -+** ld1sw_gather_m4_s64_offset: -+** mov (x[0-9]+), #?-4 -+** ld1sw z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sw_gather_m4_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1sw_gather_u64base_offset_s64 (p0, z0, -4), -+ z0_res = svld1sw_gather_offset_s64 (p0, z0, -4)) -+ -+/* -+** ld1sw_gather_0_s64_offset: -+** ld1sw z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sw_gather_0_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1sw_gather_u64base_offset_s64 (p0, z0, 0), -+ z0_res = svld1sw_gather_offset_s64 (p0, z0, 0)) -+ -+/* -+** ld1sw_gather_5_s64_offset: -+** mov (x[0-9]+), #?5 -+** ld1sw z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sw_gather_5_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1sw_gather_u64base_offset_s64 (p0, z0, 5), -+ z0_res = svld1sw_gather_offset_s64 (p0, z0, 5)) -+ -+/* -+** ld1sw_gather_6_s64_offset: -+** mov (x[0-9]+), #?6 -+** ld1sw z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sw_gather_6_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1sw_gather_u64base_offset_s64 (p0, z0, 6), -+ z0_res = svld1sw_gather_offset_s64 (p0, z0, 6)) -+ -+/* -+** ld1sw_gather_7_s64_offset: -+** mov (x[0-9]+), #?7 -+** ld1sw z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sw_gather_7_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1sw_gather_u64base_offset_s64 (p0, z0, 7), -+ z0_res = svld1sw_gather_offset_s64 (p0, z0, 7)) -+ -+/* -+** ld1sw_gather_8_s64_offset: -+** ld1sw z0\.d, p0/z, \[z0\.d, #8\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sw_gather_8_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1sw_gather_u64base_offset_s64 (p0, z0, 8), -+ z0_res = svld1sw_gather_offset_s64 (p0, z0, 8)) -+ -+/* -+** ld1sw_gather_124_s64_offset: -+** ld1sw z0\.d, p0/z, \[z0\.d, #124\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sw_gather_124_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1sw_gather_u64base_offset_s64 (p0, z0, 124), -+ z0_res = svld1sw_gather_offset_s64 (p0, z0, 124)) -+ -+/* -+** ld1sw_gather_128_s64_offset: -+** mov (x[0-9]+), #?128 -+** ld1sw z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sw_gather_128_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1sw_gather_u64base_offset_s64 (p0, z0, 128), -+ z0_res = svld1sw_gather_offset_s64 (p0, z0, 128)) -+ -+/* -+** ld1sw_gather_x0_s64_index: -+** lsl (x[0-9]+), x0, #?2 -+** ld1sw z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sw_gather_x0_s64_index, svint64_t, svuint64_t, -+ z0_res = svld1sw_gather_u64base_index_s64 (p0, z0, x0), -+ z0_res = svld1sw_gather_index_s64 (p0, z0, x0)) -+ -+/* -+** ld1sw_gather_m1_s64_index: -+** mov (x[0-9]+), #?-4 -+** ld1sw z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sw_gather_m1_s64_index, svint64_t, svuint64_t, -+ z0_res = svld1sw_gather_u64base_index_s64 (p0, z0, -1), -+ z0_res = svld1sw_gather_index_s64 (p0, z0, -1)) -+ -+/* -+** ld1sw_gather_0_s64_index: -+** ld1sw z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sw_gather_0_s64_index, svint64_t, svuint64_t, -+ z0_res = svld1sw_gather_u64base_index_s64 (p0, z0, 0), -+ z0_res = svld1sw_gather_index_s64 (p0, z0, 0)) -+ -+/* -+** ld1sw_gather_5_s64_index: -+** ld1sw z0\.d, p0/z, \[z0\.d, #20\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sw_gather_5_s64_index, svint64_t, svuint64_t, -+ z0_res = svld1sw_gather_u64base_index_s64 (p0, z0, 5), -+ z0_res = svld1sw_gather_index_s64 (p0, z0, 5)) -+ -+/* -+** ld1sw_gather_31_s64_index: -+** ld1sw z0\.d, p0/z, \[z0\.d, #124\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sw_gather_31_s64_index, svint64_t, svuint64_t, -+ z0_res = svld1sw_gather_u64base_index_s64 (p0, z0, 31), -+ z0_res = svld1sw_gather_index_s64 (p0, z0, 31)) -+ -+/* -+** ld1sw_gather_32_s64_index: -+** mov (x[0-9]+), #?128 -+** ld1sw z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sw_gather_32_s64_index, svint64_t, svuint64_t, -+ z0_res = svld1sw_gather_u64base_index_s64 (p0, z0, 32), -+ z0_res = svld1sw_gather_index_s64 (p0, z0, 32)) -+ -+/* -+** ld1sw_gather_x0_s64_s64offset: -+** ld1sw z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sw_gather_x0_s64_s64offset, svint64_t, int32_t, svint64_t, -+ z0_res = svld1sw_gather_s64offset_s64 (p0, x0, z0), -+ z0_res = svld1sw_gather_offset_s64 (p0, x0, z0)) -+ -+/* -+** ld1sw_gather_tied1_s64_s64offset: -+** ld1sw z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sw_gather_tied1_s64_s64offset, svint64_t, int32_t, svint64_t, -+ z0_res = svld1sw_gather_s64offset_s64 (p0, x0, z0), -+ z0_res = svld1sw_gather_offset_s64 (p0, x0, z0)) -+ -+/* -+** ld1sw_gather_untied_s64_s64offset: -+** ld1sw z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sw_gather_untied_s64_s64offset, svint64_t, int32_t, svint64_t, -+ z0_res = svld1sw_gather_s64offset_s64 (p0, x0, z1), -+ z0_res = svld1sw_gather_offset_s64 (p0, x0, z1)) -+ -+/* -+** ld1sw_gather_ext_s64_s64offset: -+** ld1sw z0\.d, p0/z, \[x0, z1\.d, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sw_gather_ext_s64_s64offset, svint64_t, int32_t, svint64_t, -+ z0_res = svld1sw_gather_s64offset_s64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svld1sw_gather_offset_s64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ld1sw_gather_x0_s64_u64offset: -+** ld1sw z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sw_gather_x0_s64_u64offset, svint64_t, int32_t, svuint64_t, -+ z0_res = svld1sw_gather_u64offset_s64 (p0, x0, z0), -+ z0_res = svld1sw_gather_offset_s64 (p0, x0, z0)) -+ -+/* -+** ld1sw_gather_tied1_s64_u64offset: -+** ld1sw z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sw_gather_tied1_s64_u64offset, svint64_t, int32_t, svuint64_t, -+ z0_res = svld1sw_gather_u64offset_s64 (p0, x0, z0), -+ z0_res = svld1sw_gather_offset_s64 (p0, x0, z0)) -+ -+/* -+** ld1sw_gather_untied_s64_u64offset: -+** ld1sw z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sw_gather_untied_s64_u64offset, svint64_t, int32_t, svuint64_t, -+ z0_res = svld1sw_gather_u64offset_s64 (p0, x0, z1), -+ z0_res = svld1sw_gather_offset_s64 (p0, x0, z1)) -+ -+/* -+** ld1sw_gather_ext_s64_u64offset: -+** ld1sw z0\.d, p0/z, \[x0, z1\.d, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sw_gather_ext_s64_u64offset, svint64_t, int32_t, svuint64_t, -+ z0_res = svld1sw_gather_u64offset_s64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svld1sw_gather_offset_s64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ld1sw_gather_x0_s64_s64index: -+** ld1sw z0\.d, p0/z, \[x0, z0\.d, lsl 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sw_gather_x0_s64_s64index, svint64_t, int32_t, svint64_t, -+ z0_res = svld1sw_gather_s64index_s64 (p0, x0, z0), -+ z0_res = svld1sw_gather_index_s64 (p0, x0, z0)) -+ -+/* -+** ld1sw_gather_tied1_s64_s64index: -+** ld1sw z0\.d, p0/z, \[x0, z0\.d, lsl 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sw_gather_tied1_s64_s64index, svint64_t, int32_t, svint64_t, -+ z0_res = svld1sw_gather_s64index_s64 (p0, x0, z0), -+ z0_res = svld1sw_gather_index_s64 (p0, x0, z0)) -+ -+/* -+** ld1sw_gather_untied_s64_s64index: -+** ld1sw z0\.d, p0/z, \[x0, z1\.d, lsl 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sw_gather_untied_s64_s64index, svint64_t, int32_t, svint64_t, -+ z0_res = svld1sw_gather_s64index_s64 (p0, x0, z1), -+ z0_res = svld1sw_gather_index_s64 (p0, x0, z1)) -+ -+/* -+** ld1sw_gather_ext_s64_s64index: -+** ld1sw z0\.d, p0/z, \[x0, z1\.d, sxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sw_gather_ext_s64_s64index, svint64_t, int32_t, svint64_t, -+ z0_res = svld1sw_gather_s64index_s64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svld1sw_gather_index_s64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ld1sw_gather_x0_s64_u64index: -+** ld1sw z0\.d, p0/z, \[x0, z0\.d, lsl 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sw_gather_x0_s64_u64index, svint64_t, int32_t, svuint64_t, -+ z0_res = svld1sw_gather_u64index_s64 (p0, x0, z0), -+ z0_res = svld1sw_gather_index_s64 (p0, x0, z0)) -+ -+/* -+** ld1sw_gather_tied1_s64_u64index: -+** ld1sw z0\.d, p0/z, \[x0, z0\.d, lsl 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sw_gather_tied1_s64_u64index, svint64_t, int32_t, svuint64_t, -+ z0_res = svld1sw_gather_u64index_s64 (p0, x0, z0), -+ z0_res = svld1sw_gather_index_s64 (p0, x0, z0)) -+ -+/* -+** ld1sw_gather_untied_s64_u64index: -+** ld1sw z0\.d, p0/z, \[x0, z1\.d, lsl 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sw_gather_untied_s64_u64index, svint64_t, int32_t, svuint64_t, -+ z0_res = svld1sw_gather_u64index_s64 (p0, x0, z1), -+ z0_res = svld1sw_gather_index_s64 (p0, x0, z1)) -+ -+/* -+** ld1sw_gather_ext_s64_u64index: -+** ld1sw z0\.d, p0/z, \[x0, z1\.d, uxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sw_gather_ext_s64_u64index, svint64_t, int32_t, svuint64_t, -+ z0_res = svld1sw_gather_u64index_s64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svld1sw_gather_index_s64 (p0, x0, svextw_x (p0, z1))) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sw_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sw_gather_u64.c -new file mode 100644 -index 000000000..ffa85eb3e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sw_gather_u64.c -@@ -0,0 +1,308 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1sw_gather_u64_tied1: -+** ld1sw z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sw_gather_u64_tied1, svuint64_t, svuint64_t, -+ z0_res = svld1sw_gather_u64base_u64 (p0, z0), -+ z0_res = svld1sw_gather_u64 (p0, z0)) -+ -+/* -+** ld1sw_gather_u64_untied: -+** ld1sw z0\.d, p0/z, \[z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sw_gather_u64_untied, svuint64_t, svuint64_t, -+ z0_res = svld1sw_gather_u64base_u64 (p0, z1), -+ z0_res = svld1sw_gather_u64 (p0, z1)) -+ -+/* -+** ld1sw_gather_x0_u64_offset: -+** ld1sw z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sw_gather_x0_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1sw_gather_u64base_offset_u64 (p0, z0, x0), -+ z0_res = svld1sw_gather_offset_u64 (p0, z0, x0)) -+ -+/* -+** ld1sw_gather_m4_u64_offset: -+** mov (x[0-9]+), #?-4 -+** ld1sw z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sw_gather_m4_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1sw_gather_u64base_offset_u64 (p0, z0, -4), -+ z0_res = svld1sw_gather_offset_u64 (p0, z0, -4)) -+ -+/* -+** ld1sw_gather_0_u64_offset: -+** ld1sw z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sw_gather_0_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1sw_gather_u64base_offset_u64 (p0, z0, 0), -+ z0_res = svld1sw_gather_offset_u64 (p0, z0, 0)) -+ -+/* -+** ld1sw_gather_5_u64_offset: -+** mov (x[0-9]+), #?5 -+** ld1sw z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sw_gather_5_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1sw_gather_u64base_offset_u64 (p0, z0, 5), -+ z0_res = svld1sw_gather_offset_u64 (p0, z0, 5)) -+ -+/* -+** ld1sw_gather_6_u64_offset: -+** mov (x[0-9]+), #?6 -+** ld1sw z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sw_gather_6_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1sw_gather_u64base_offset_u64 (p0, z0, 6), -+ z0_res = svld1sw_gather_offset_u64 (p0, z0, 6)) -+ -+/* -+** ld1sw_gather_7_u64_offset: -+** mov (x[0-9]+), #?7 -+** ld1sw z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sw_gather_7_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1sw_gather_u64base_offset_u64 (p0, z0, 7), -+ z0_res = svld1sw_gather_offset_u64 (p0, z0, 7)) -+ -+/* -+** ld1sw_gather_8_u64_offset: -+** ld1sw z0\.d, p0/z, \[z0\.d, #8\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sw_gather_8_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1sw_gather_u64base_offset_u64 (p0, z0, 8), -+ z0_res = svld1sw_gather_offset_u64 (p0, z0, 8)) -+ -+/* -+** ld1sw_gather_124_u64_offset: -+** ld1sw z0\.d, p0/z, \[z0\.d, #124\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sw_gather_124_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1sw_gather_u64base_offset_u64 (p0, z0, 124), -+ z0_res = svld1sw_gather_offset_u64 (p0, z0, 124)) -+ -+/* -+** ld1sw_gather_128_u64_offset: -+** mov (x[0-9]+), #?128 -+** ld1sw z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sw_gather_128_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1sw_gather_u64base_offset_u64 (p0, z0, 128), -+ z0_res = svld1sw_gather_offset_u64 (p0, z0, 128)) -+ -+/* -+** ld1sw_gather_x0_u64_index: -+** lsl (x[0-9]+), x0, #?2 -+** ld1sw z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sw_gather_x0_u64_index, svuint64_t, svuint64_t, -+ z0_res = svld1sw_gather_u64base_index_u64 (p0, z0, x0), -+ z0_res = svld1sw_gather_index_u64 (p0, z0, x0)) -+ -+/* -+** ld1sw_gather_m1_u64_index: -+** mov (x[0-9]+), #?-4 -+** ld1sw z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sw_gather_m1_u64_index, svuint64_t, svuint64_t, -+ z0_res = svld1sw_gather_u64base_index_u64 (p0, z0, -1), -+ z0_res = svld1sw_gather_index_u64 (p0, z0, -1)) -+ -+/* -+** ld1sw_gather_0_u64_index: -+** ld1sw z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sw_gather_0_u64_index, svuint64_t, svuint64_t, -+ z0_res = svld1sw_gather_u64base_index_u64 (p0, z0, 0), -+ z0_res = svld1sw_gather_index_u64 (p0, z0, 0)) -+ -+/* -+** ld1sw_gather_5_u64_index: -+** ld1sw z0\.d, p0/z, \[z0\.d, #20\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sw_gather_5_u64_index, svuint64_t, svuint64_t, -+ z0_res = svld1sw_gather_u64base_index_u64 (p0, z0, 5), -+ z0_res = svld1sw_gather_index_u64 (p0, z0, 5)) -+ -+/* -+** ld1sw_gather_31_u64_index: -+** ld1sw z0\.d, p0/z, \[z0\.d, #124\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sw_gather_31_u64_index, svuint64_t, svuint64_t, -+ z0_res = svld1sw_gather_u64base_index_u64 (p0, z0, 31), -+ z0_res = svld1sw_gather_index_u64 (p0, z0, 31)) -+ -+/* -+** ld1sw_gather_32_u64_index: -+** mov (x[0-9]+), #?128 -+** ld1sw z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1sw_gather_32_u64_index, svuint64_t, svuint64_t, -+ z0_res = svld1sw_gather_u64base_index_u64 (p0, z0, 32), -+ z0_res = svld1sw_gather_index_u64 (p0, z0, 32)) -+ -+/* -+** ld1sw_gather_x0_u64_s64offset: -+** ld1sw z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sw_gather_x0_u64_s64offset, svuint64_t, int32_t, svint64_t, -+ z0_res = svld1sw_gather_s64offset_u64 (p0, x0, z0), -+ z0_res = svld1sw_gather_offset_u64 (p0, x0, z0)) -+ -+/* -+** ld1sw_gather_tied1_u64_s64offset: -+** ld1sw z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sw_gather_tied1_u64_s64offset, svuint64_t, int32_t, svint64_t, -+ z0_res = svld1sw_gather_s64offset_u64 (p0, x0, z0), -+ z0_res = svld1sw_gather_offset_u64 (p0, x0, z0)) -+ -+/* -+** ld1sw_gather_untied_u64_s64offset: -+** ld1sw z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sw_gather_untied_u64_s64offset, svuint64_t, int32_t, svint64_t, -+ z0_res = svld1sw_gather_s64offset_u64 (p0, x0, z1), -+ z0_res = svld1sw_gather_offset_u64 (p0, x0, z1)) -+ -+/* -+** ld1sw_gather_ext_u64_s64offset: -+** ld1sw z0\.d, p0/z, \[x0, z1\.d, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sw_gather_ext_u64_s64offset, svuint64_t, int32_t, svint64_t, -+ z0_res = svld1sw_gather_s64offset_u64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svld1sw_gather_offset_u64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ld1sw_gather_x0_u64_u64offset: -+** ld1sw z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sw_gather_x0_u64_u64offset, svuint64_t, int32_t, svuint64_t, -+ z0_res = svld1sw_gather_u64offset_u64 (p0, x0, z0), -+ z0_res = svld1sw_gather_offset_u64 (p0, x0, z0)) -+ -+/* -+** ld1sw_gather_tied1_u64_u64offset: -+** ld1sw z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sw_gather_tied1_u64_u64offset, svuint64_t, int32_t, svuint64_t, -+ z0_res = svld1sw_gather_u64offset_u64 (p0, x0, z0), -+ z0_res = svld1sw_gather_offset_u64 (p0, x0, z0)) -+ -+/* -+** ld1sw_gather_untied_u64_u64offset: -+** ld1sw z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sw_gather_untied_u64_u64offset, svuint64_t, int32_t, svuint64_t, -+ z0_res = svld1sw_gather_u64offset_u64 (p0, x0, z1), -+ z0_res = svld1sw_gather_offset_u64 (p0, x0, z1)) -+ -+/* -+** ld1sw_gather_ext_u64_u64offset: -+** ld1sw z0\.d, p0/z, \[x0, z1\.d, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sw_gather_ext_u64_u64offset, svuint64_t, int32_t, svuint64_t, -+ z0_res = svld1sw_gather_u64offset_u64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svld1sw_gather_offset_u64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ld1sw_gather_x0_u64_s64index: -+** ld1sw z0\.d, p0/z, \[x0, z0\.d, lsl 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sw_gather_x0_u64_s64index, svuint64_t, int32_t, svint64_t, -+ z0_res = svld1sw_gather_s64index_u64 (p0, x0, z0), -+ z0_res = svld1sw_gather_index_u64 (p0, x0, z0)) -+ -+/* -+** ld1sw_gather_tied1_u64_s64index: -+** ld1sw z0\.d, p0/z, \[x0, z0\.d, lsl 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sw_gather_tied1_u64_s64index, svuint64_t, int32_t, svint64_t, -+ z0_res = svld1sw_gather_s64index_u64 (p0, x0, z0), -+ z0_res = svld1sw_gather_index_u64 (p0, x0, z0)) -+ -+/* -+** ld1sw_gather_untied_u64_s64index: -+** ld1sw z0\.d, p0/z, \[x0, z1\.d, lsl 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sw_gather_untied_u64_s64index, svuint64_t, int32_t, svint64_t, -+ z0_res = svld1sw_gather_s64index_u64 (p0, x0, z1), -+ z0_res = svld1sw_gather_index_u64 (p0, x0, z1)) -+ -+/* -+** ld1sw_gather_ext_u64_s64index: -+** ld1sw z0\.d, p0/z, \[x0, z1\.d, sxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sw_gather_ext_u64_s64index, svuint64_t, int32_t, svint64_t, -+ z0_res = svld1sw_gather_s64index_u64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svld1sw_gather_index_u64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ld1sw_gather_x0_u64_u64index: -+** ld1sw z0\.d, p0/z, \[x0, z0\.d, lsl 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sw_gather_x0_u64_u64index, svuint64_t, int32_t, svuint64_t, -+ z0_res = svld1sw_gather_u64index_u64 (p0, x0, z0), -+ z0_res = svld1sw_gather_index_u64 (p0, x0, z0)) -+ -+/* -+** ld1sw_gather_tied1_u64_u64index: -+** ld1sw z0\.d, p0/z, \[x0, z0\.d, lsl 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sw_gather_tied1_u64_u64index, svuint64_t, int32_t, svuint64_t, -+ z0_res = svld1sw_gather_u64index_u64 (p0, x0, z0), -+ z0_res = svld1sw_gather_index_u64 (p0, x0, z0)) -+ -+/* -+** ld1sw_gather_untied_u64_u64index: -+** ld1sw z0\.d, p0/z, \[x0, z1\.d, lsl 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sw_gather_untied_u64_u64index, svuint64_t, int32_t, svuint64_t, -+ z0_res = svld1sw_gather_u64index_u64 (p0, x0, z1), -+ z0_res = svld1sw_gather_index_u64 (p0, x0, z1)) -+ -+/* -+** ld1sw_gather_ext_u64_u64index: -+** ld1sw z0\.d, p0/z, \[x0, z1\.d, uxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1sw_gather_ext_u64_u64index, svuint64_t, int32_t, svuint64_t, -+ z0_res = svld1sw_gather_u64index_u64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svld1sw_gather_index_u64 (p0, x0, svextw_x (p0, z1))) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sw_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sw_s64.c -new file mode 100644 -index 000000000..019a12b20 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sw_s64.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1sw_s64_base: -+** ld1sw z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sw_s64_base, svint64_t, int32_t, -+ z0 = svld1sw_s64 (p0, x0), -+ z0 = svld1sw_s64 (p0, x0)) -+ -+/* -+** ld1sw_s64_index: -+** ld1sw z0\.d, p0/z, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_LOAD (ld1sw_s64_index, svint64_t, int32_t, -+ z0 = svld1sw_s64 (p0, x0 + x1), -+ z0 = svld1sw_s64 (p0, x0 + x1)) -+ -+/* -+** ld1sw_s64_1: -+** ld1sw z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sw_s64_1, svint64_t, int32_t, -+ z0 = svld1sw_s64 (p0, x0 + svcntd ()), -+ z0 = svld1sw_s64 (p0, x0 + svcntd ())) -+ -+/* -+** ld1sw_s64_7: -+** ld1sw z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sw_s64_7, svint64_t, int32_t, -+ z0 = svld1sw_s64 (p0, x0 + svcntd () * 7), -+ z0 = svld1sw_s64 (p0, x0 + svcntd () * 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1sw_s64_8: -+** incb x0, all, mul #4 -+** ld1sw z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sw_s64_8, svint64_t, int32_t, -+ z0 = svld1sw_s64 (p0, x0 + svcntd () * 8), -+ z0 = svld1sw_s64 (p0, x0 + svcntd () * 8)) -+ -+/* -+** ld1sw_s64_m1: -+** ld1sw z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sw_s64_m1, svint64_t, int32_t, -+ z0 = svld1sw_s64 (p0, x0 - svcntd ()), -+ z0 = svld1sw_s64 (p0, x0 - svcntd ())) -+ -+/* -+** ld1sw_s64_m8: -+** ld1sw z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sw_s64_m8, svint64_t, int32_t, -+ z0 = svld1sw_s64 (p0, x0 - svcntd () * 8), -+ z0 = svld1sw_s64 (p0, x0 - svcntd () * 8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1sw_s64_m9: -+** dech x0, all, mul #9 -+** ld1sw z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sw_s64_m9, svint64_t, int32_t, -+ z0 = svld1sw_s64 (p0, x0 - svcntd () * 9), -+ z0 = svld1sw_s64 (p0, x0 - svcntd () * 9)) -+ -+/* -+** ld1sw_vnum_s64_0: -+** ld1sw z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sw_vnum_s64_0, svint64_t, int32_t, -+ z0 = svld1sw_vnum_s64 (p0, x0, 0), -+ z0 = svld1sw_vnum_s64 (p0, x0, 0)) -+ -+/* -+** ld1sw_vnum_s64_1: -+** ld1sw z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sw_vnum_s64_1, svint64_t, int32_t, -+ z0 = svld1sw_vnum_s64 (p0, x0, 1), -+ z0 = svld1sw_vnum_s64 (p0, x0, 1)) -+ -+/* -+** ld1sw_vnum_s64_7: -+** ld1sw z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sw_vnum_s64_7, svint64_t, int32_t, -+ z0 = svld1sw_vnum_s64 (p0, x0, 7), -+ z0 = svld1sw_vnum_s64 (p0, x0, 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1sw_vnum_s64_8: -+** incb x0, all, mul #4 -+** ld1sw z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sw_vnum_s64_8, svint64_t, int32_t, -+ z0 = svld1sw_vnum_s64 (p0, x0, 8), -+ z0 = svld1sw_vnum_s64 (p0, x0, 8)) -+ -+/* -+** ld1sw_vnum_s64_m1: -+** ld1sw z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sw_vnum_s64_m1, svint64_t, int32_t, -+ z0 = svld1sw_vnum_s64 (p0, x0, -1), -+ z0 = svld1sw_vnum_s64 (p0, x0, -1)) -+ -+/* -+** ld1sw_vnum_s64_m8: -+** ld1sw z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sw_vnum_s64_m8, svint64_t, int32_t, -+ z0 = svld1sw_vnum_s64 (p0, x0, -8), -+ z0 = svld1sw_vnum_s64 (p0, x0, -8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1sw_vnum_s64_m9: -+** dech x0, all, mul #9 -+** ld1sw z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sw_vnum_s64_m9, svint64_t, int32_t, -+ z0 = svld1sw_vnum_s64 (p0, x0, -9), -+ z0 = svld1sw_vnum_s64 (p0, x0, -9)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld1sw_vnum_s64_x1: -+** cnth (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld1sw z0\.d, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld1sw_vnum_s64_x1, svint64_t, int32_t, -+ z0 = svld1sw_vnum_s64 (p0, x0, x1), -+ z0 = svld1sw_vnum_s64 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sw_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sw_u64.c -new file mode 100644 -index 000000000..4c291c243 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1sw_u64.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1sw_u64_base: -+** ld1sw z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sw_u64_base, svuint64_t, int32_t, -+ z0 = svld1sw_u64 (p0, x0), -+ z0 = svld1sw_u64 (p0, x0)) -+ -+/* -+** ld1sw_u64_index: -+** ld1sw z0\.d, p0/z, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_LOAD (ld1sw_u64_index, svuint64_t, int32_t, -+ z0 = svld1sw_u64 (p0, x0 + x1), -+ z0 = svld1sw_u64 (p0, x0 + x1)) -+ -+/* -+** ld1sw_u64_1: -+** ld1sw z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sw_u64_1, svuint64_t, int32_t, -+ z0 = svld1sw_u64 (p0, x0 + svcntd ()), -+ z0 = svld1sw_u64 (p0, x0 + svcntd ())) -+ -+/* -+** ld1sw_u64_7: -+** ld1sw z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sw_u64_7, svuint64_t, int32_t, -+ z0 = svld1sw_u64 (p0, x0 + svcntd () * 7), -+ z0 = svld1sw_u64 (p0, x0 + svcntd () * 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1sw_u64_8: -+** incb x0, all, mul #4 -+** ld1sw z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sw_u64_8, svuint64_t, int32_t, -+ z0 = svld1sw_u64 (p0, x0 + svcntd () * 8), -+ z0 = svld1sw_u64 (p0, x0 + svcntd () * 8)) -+ -+/* -+** ld1sw_u64_m1: -+** ld1sw z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sw_u64_m1, svuint64_t, int32_t, -+ z0 = svld1sw_u64 (p0, x0 - svcntd ()), -+ z0 = svld1sw_u64 (p0, x0 - svcntd ())) -+ -+/* -+** ld1sw_u64_m8: -+** ld1sw z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sw_u64_m8, svuint64_t, int32_t, -+ z0 = svld1sw_u64 (p0, x0 - svcntd () * 8), -+ z0 = svld1sw_u64 (p0, x0 - svcntd () * 8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1sw_u64_m9: -+** dech x0, all, mul #9 -+** ld1sw z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sw_u64_m9, svuint64_t, int32_t, -+ z0 = svld1sw_u64 (p0, x0 - svcntd () * 9), -+ z0 = svld1sw_u64 (p0, x0 - svcntd () * 9)) -+ -+/* -+** ld1sw_vnum_u64_0: -+** ld1sw z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sw_vnum_u64_0, svuint64_t, int32_t, -+ z0 = svld1sw_vnum_u64 (p0, x0, 0), -+ z0 = svld1sw_vnum_u64 (p0, x0, 0)) -+ -+/* -+** ld1sw_vnum_u64_1: -+** ld1sw z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sw_vnum_u64_1, svuint64_t, int32_t, -+ z0 = svld1sw_vnum_u64 (p0, x0, 1), -+ z0 = svld1sw_vnum_u64 (p0, x0, 1)) -+ -+/* -+** ld1sw_vnum_u64_7: -+** ld1sw z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sw_vnum_u64_7, svuint64_t, int32_t, -+ z0 = svld1sw_vnum_u64 (p0, x0, 7), -+ z0 = svld1sw_vnum_u64 (p0, x0, 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1sw_vnum_u64_8: -+** incb x0, all, mul #4 -+** ld1sw z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sw_vnum_u64_8, svuint64_t, int32_t, -+ z0 = svld1sw_vnum_u64 (p0, x0, 8), -+ z0 = svld1sw_vnum_u64 (p0, x0, 8)) -+ -+/* -+** ld1sw_vnum_u64_m1: -+** ld1sw z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sw_vnum_u64_m1, svuint64_t, int32_t, -+ z0 = svld1sw_vnum_u64 (p0, x0, -1), -+ z0 = svld1sw_vnum_u64 (p0, x0, -1)) -+ -+/* -+** ld1sw_vnum_u64_m8: -+** ld1sw z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1sw_vnum_u64_m8, svuint64_t, int32_t, -+ z0 = svld1sw_vnum_u64 (p0, x0, -8), -+ z0 = svld1sw_vnum_u64 (p0, x0, -8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1sw_vnum_u64_m9: -+** dech x0, all, mul #9 -+** ld1sw z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1sw_vnum_u64_m9, svuint64_t, int32_t, -+ z0 = svld1sw_vnum_u64 (p0, x0, -9), -+ z0 = svld1sw_vnum_u64 (p0, x0, -9)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld1sw_vnum_u64_x1: -+** cnth (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld1sw z0\.d, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld1sw_vnum_u64_x1, svuint64_t, int32_t, -+ z0 = svld1sw_vnum_u64 (p0, x0, x1), -+ z0 = svld1sw_vnum_u64 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_s32.c -new file mode 100644 -index 000000000..a9c418265 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_s32.c -@@ -0,0 +1,131 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1ub_gather_s32_tied1: -+** ld1b z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1ub_gather_s32_tied1, svint32_t, svuint32_t, -+ z0_res = svld1ub_gather_u32base_s32 (p0, z0), -+ z0_res = svld1ub_gather_s32 (p0, z0)) -+ -+/* -+** ld1ub_gather_s32_untied: -+** ld1b z0\.s, p0/z, \[z1\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1ub_gather_s32_untied, svint32_t, svuint32_t, -+ z0_res = svld1ub_gather_u32base_s32 (p0, z1), -+ z0_res = svld1ub_gather_s32 (p0, z1)) -+ -+/* -+** ld1ub_gather_x0_s32_offset: -+** ld1b z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1ub_gather_x0_s32_offset, svint32_t, svuint32_t, -+ z0_res = svld1ub_gather_u32base_offset_s32 (p0, z0, x0), -+ z0_res = svld1ub_gather_offset_s32 (p0, z0, x0)) -+ -+/* -+** ld1ub_gather_m1_s32_offset: -+** mov (x[0-9]+), #?-1 -+** ld1b z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1ub_gather_m1_s32_offset, svint32_t, svuint32_t, -+ z0_res = svld1ub_gather_u32base_offset_s32 (p0, z0, -1), -+ z0_res = svld1ub_gather_offset_s32 (p0, z0, -1)) -+ -+/* -+** ld1ub_gather_0_s32_offset: -+** ld1b z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1ub_gather_0_s32_offset, svint32_t, svuint32_t, -+ z0_res = svld1ub_gather_u32base_offset_s32 (p0, z0, 0), -+ z0_res = svld1ub_gather_offset_s32 (p0, z0, 0)) -+ -+/* -+** ld1ub_gather_5_s32_offset: -+** ld1b z0\.s, p0/z, \[z0\.s, #5\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1ub_gather_5_s32_offset, svint32_t, svuint32_t, -+ z0_res = svld1ub_gather_u32base_offset_s32 (p0, z0, 5), -+ z0_res = svld1ub_gather_offset_s32 (p0, z0, 5)) -+ -+/* -+** ld1ub_gather_31_s32_offset: -+** ld1b z0\.s, p0/z, \[z0\.s, #31\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1ub_gather_31_s32_offset, svint32_t, svuint32_t, -+ z0_res = svld1ub_gather_u32base_offset_s32 (p0, z0, 31), -+ z0_res = svld1ub_gather_offset_s32 (p0, z0, 31)) -+ -+/* -+** ld1ub_gather_32_s32_offset: -+** mov (x[0-9]+), #?32 -+** ld1b z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1ub_gather_32_s32_offset, svint32_t, svuint32_t, -+ z0_res = svld1ub_gather_u32base_offset_s32 (p0, z0, 32), -+ z0_res = svld1ub_gather_offset_s32 (p0, z0, 32)) -+ -+/* -+** ld1ub_gather_x0_s32_s32offset: -+** ld1b z0\.s, p0/z, \[x0, z0\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1ub_gather_x0_s32_s32offset, svint32_t, uint8_t, svint32_t, -+ z0_res = svld1ub_gather_s32offset_s32 (p0, x0, z0), -+ z0_res = svld1ub_gather_offset_s32 (p0, x0, z0)) -+ -+/* -+** ld1ub_gather_tied1_s32_s32offset: -+** ld1b z0\.s, p0/z, \[x0, z0\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1ub_gather_tied1_s32_s32offset, svint32_t, uint8_t, svint32_t, -+ z0_res = svld1ub_gather_s32offset_s32 (p0, x0, z0), -+ z0_res = svld1ub_gather_offset_s32 (p0, x0, z0)) -+ -+/* -+** ld1ub_gather_untied_s32_s32offset: -+** ld1b z0\.s, p0/z, \[x0, z1\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1ub_gather_untied_s32_s32offset, svint32_t, uint8_t, svint32_t, -+ z0_res = svld1ub_gather_s32offset_s32 (p0, x0, z1), -+ z0_res = svld1ub_gather_offset_s32 (p0, x0, z1)) -+ -+/* -+** ld1ub_gather_x0_s32_u32offset: -+** ld1b z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1ub_gather_x0_s32_u32offset, svint32_t, uint8_t, svuint32_t, -+ z0_res = svld1ub_gather_u32offset_s32 (p0, x0, z0), -+ z0_res = svld1ub_gather_offset_s32 (p0, x0, z0)) -+ -+/* -+** ld1ub_gather_tied1_s32_u32offset: -+** ld1b z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1ub_gather_tied1_s32_u32offset, svint32_t, uint8_t, svuint32_t, -+ z0_res = svld1ub_gather_u32offset_s32 (p0, x0, z0), -+ z0_res = svld1ub_gather_offset_s32 (p0, x0, z0)) -+ -+/* -+** ld1ub_gather_untied_s32_u32offset: -+** ld1b z0\.s, p0/z, \[x0, z1\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1ub_gather_untied_s32_u32offset, svint32_t, uint8_t, svuint32_t, -+ z0_res = svld1ub_gather_u32offset_s32 (p0, x0, z1), -+ z0_res = svld1ub_gather_offset_s32 (p0, x0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_s64.c -new file mode 100644 -index 000000000..99af86ddf ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_s64.c -@@ -0,0 +1,149 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1ub_gather_s64_tied1: -+** ld1b z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1ub_gather_s64_tied1, svint64_t, svuint64_t, -+ z0_res = svld1ub_gather_u64base_s64 (p0, z0), -+ z0_res = svld1ub_gather_s64 (p0, z0)) -+ -+/* -+** ld1ub_gather_s64_untied: -+** ld1b z0\.d, p0/z, \[z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1ub_gather_s64_untied, svint64_t, svuint64_t, -+ z0_res = svld1ub_gather_u64base_s64 (p0, z1), -+ z0_res = svld1ub_gather_s64 (p0, z1)) -+ -+/* -+** ld1ub_gather_x0_s64_offset: -+** ld1b z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1ub_gather_x0_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1ub_gather_u64base_offset_s64 (p0, z0, x0), -+ z0_res = svld1ub_gather_offset_s64 (p0, z0, x0)) -+ -+/* -+** ld1ub_gather_m1_s64_offset: -+** mov (x[0-9]+), #?-1 -+** ld1b z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1ub_gather_m1_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1ub_gather_u64base_offset_s64 (p0, z0, -1), -+ z0_res = svld1ub_gather_offset_s64 (p0, z0, -1)) -+ -+/* -+** ld1ub_gather_0_s64_offset: -+** ld1b z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1ub_gather_0_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1ub_gather_u64base_offset_s64 (p0, z0, 0), -+ z0_res = svld1ub_gather_offset_s64 (p0, z0, 0)) -+ -+/* -+** ld1ub_gather_5_s64_offset: -+** ld1b z0\.d, p0/z, \[z0\.d, #5\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1ub_gather_5_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1ub_gather_u64base_offset_s64 (p0, z0, 5), -+ z0_res = svld1ub_gather_offset_s64 (p0, z0, 5)) -+ -+/* -+** ld1ub_gather_31_s64_offset: -+** ld1b z0\.d, p0/z, \[z0\.d, #31\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1ub_gather_31_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1ub_gather_u64base_offset_s64 (p0, z0, 31), -+ z0_res = svld1ub_gather_offset_s64 (p0, z0, 31)) -+ -+/* -+** ld1ub_gather_32_s64_offset: -+** mov (x[0-9]+), #?32 -+** ld1b z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1ub_gather_32_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1ub_gather_u64base_offset_s64 (p0, z0, 32), -+ z0_res = svld1ub_gather_offset_s64 (p0, z0, 32)) -+ -+/* -+** ld1ub_gather_x0_s64_s64offset: -+** ld1b z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1ub_gather_x0_s64_s64offset, svint64_t, uint8_t, svint64_t, -+ z0_res = svld1ub_gather_s64offset_s64 (p0, x0, z0), -+ z0_res = svld1ub_gather_offset_s64 (p0, x0, z0)) -+ -+/* -+** ld1ub_gather_tied1_s64_s64offset: -+** ld1b z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1ub_gather_tied1_s64_s64offset, svint64_t, uint8_t, svint64_t, -+ z0_res = svld1ub_gather_s64offset_s64 (p0, x0, z0), -+ z0_res = svld1ub_gather_offset_s64 (p0, x0, z0)) -+ -+/* -+** ld1ub_gather_untied_s64_s64offset: -+** ld1b z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1ub_gather_untied_s64_s64offset, svint64_t, uint8_t, svint64_t, -+ z0_res = svld1ub_gather_s64offset_s64 (p0, x0, z1), -+ z0_res = svld1ub_gather_offset_s64 (p0, x0, z1)) -+ -+/* -+** ld1ub_gather_ext_s64_s64offset: -+** ld1b z0\.d, p0/z, \[x0, z1\.d, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1ub_gather_ext_s64_s64offset, svint64_t, uint8_t, svint64_t, -+ z0_res = svld1ub_gather_s64offset_s64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svld1ub_gather_offset_s64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ld1ub_gather_x0_s64_u64offset: -+** ld1b z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1ub_gather_x0_s64_u64offset, svint64_t, uint8_t, svuint64_t, -+ z0_res = svld1ub_gather_u64offset_s64 (p0, x0, z0), -+ z0_res = svld1ub_gather_offset_s64 (p0, x0, z0)) -+ -+/* -+** ld1ub_gather_tied1_s64_u64offset: -+** ld1b z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1ub_gather_tied1_s64_u64offset, svint64_t, uint8_t, svuint64_t, -+ z0_res = svld1ub_gather_u64offset_s64 (p0, x0, z0), -+ z0_res = svld1ub_gather_offset_s64 (p0, x0, z0)) -+ -+/* -+** ld1ub_gather_untied_s64_u64offset: -+** ld1b z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1ub_gather_untied_s64_u64offset, svint64_t, uint8_t, svuint64_t, -+ z0_res = svld1ub_gather_u64offset_s64 (p0, x0, z1), -+ z0_res = svld1ub_gather_offset_s64 (p0, x0, z1)) -+ -+/* -+** ld1ub_gather_ext_s64_u64offset: -+** ld1b z0\.d, p0/z, \[x0, z1\.d, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1ub_gather_ext_s64_u64offset, svint64_t, uint8_t, svuint64_t, -+ z0_res = svld1ub_gather_u64offset_s64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svld1ub_gather_offset_s64 (p0, x0, svextw_x (p0, z1))) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_u32.c -new file mode 100644 -index 000000000..77c7e0a2d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_u32.c -@@ -0,0 +1,131 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1ub_gather_u32_tied1: -+** ld1b z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1ub_gather_u32_tied1, svuint32_t, svuint32_t, -+ z0_res = svld1ub_gather_u32base_u32 (p0, z0), -+ z0_res = svld1ub_gather_u32 (p0, z0)) -+ -+/* -+** ld1ub_gather_u32_untied: -+** ld1b z0\.s, p0/z, \[z1\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1ub_gather_u32_untied, svuint32_t, svuint32_t, -+ z0_res = svld1ub_gather_u32base_u32 (p0, z1), -+ z0_res = svld1ub_gather_u32 (p0, z1)) -+ -+/* -+** ld1ub_gather_x0_u32_offset: -+** ld1b z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1ub_gather_x0_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svld1ub_gather_u32base_offset_u32 (p0, z0, x0), -+ z0_res = svld1ub_gather_offset_u32 (p0, z0, x0)) -+ -+/* -+** ld1ub_gather_m1_u32_offset: -+** mov (x[0-9]+), #?-1 -+** ld1b z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1ub_gather_m1_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svld1ub_gather_u32base_offset_u32 (p0, z0, -1), -+ z0_res = svld1ub_gather_offset_u32 (p0, z0, -1)) -+ -+/* -+** ld1ub_gather_0_u32_offset: -+** ld1b z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1ub_gather_0_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svld1ub_gather_u32base_offset_u32 (p0, z0, 0), -+ z0_res = svld1ub_gather_offset_u32 (p0, z0, 0)) -+ -+/* -+** ld1ub_gather_5_u32_offset: -+** ld1b z0\.s, p0/z, \[z0\.s, #5\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1ub_gather_5_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svld1ub_gather_u32base_offset_u32 (p0, z0, 5), -+ z0_res = svld1ub_gather_offset_u32 (p0, z0, 5)) -+ -+/* -+** ld1ub_gather_31_u32_offset: -+** ld1b z0\.s, p0/z, \[z0\.s, #31\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1ub_gather_31_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svld1ub_gather_u32base_offset_u32 (p0, z0, 31), -+ z0_res = svld1ub_gather_offset_u32 (p0, z0, 31)) -+ -+/* -+** ld1ub_gather_32_u32_offset: -+** mov (x[0-9]+), #?32 -+** ld1b z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1ub_gather_32_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svld1ub_gather_u32base_offset_u32 (p0, z0, 32), -+ z0_res = svld1ub_gather_offset_u32 (p0, z0, 32)) -+ -+/* -+** ld1ub_gather_x0_u32_s32offset: -+** ld1b z0\.s, p0/z, \[x0, z0\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1ub_gather_x0_u32_s32offset, svuint32_t, uint8_t, svint32_t, -+ z0_res = svld1ub_gather_s32offset_u32 (p0, x0, z0), -+ z0_res = svld1ub_gather_offset_u32 (p0, x0, z0)) -+ -+/* -+** ld1ub_gather_tied1_u32_s32offset: -+** ld1b z0\.s, p0/z, \[x0, z0\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1ub_gather_tied1_u32_s32offset, svuint32_t, uint8_t, svint32_t, -+ z0_res = svld1ub_gather_s32offset_u32 (p0, x0, z0), -+ z0_res = svld1ub_gather_offset_u32 (p0, x0, z0)) -+ -+/* -+** ld1ub_gather_untied_u32_s32offset: -+** ld1b z0\.s, p0/z, \[x0, z1\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1ub_gather_untied_u32_s32offset, svuint32_t, uint8_t, svint32_t, -+ z0_res = svld1ub_gather_s32offset_u32 (p0, x0, z1), -+ z0_res = svld1ub_gather_offset_u32 (p0, x0, z1)) -+ -+/* -+** ld1ub_gather_x0_u32_u32offset: -+** ld1b z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1ub_gather_x0_u32_u32offset, svuint32_t, uint8_t, svuint32_t, -+ z0_res = svld1ub_gather_u32offset_u32 (p0, x0, z0), -+ z0_res = svld1ub_gather_offset_u32 (p0, x0, z0)) -+ -+/* -+** ld1ub_gather_tied1_u32_u32offset: -+** ld1b z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1ub_gather_tied1_u32_u32offset, svuint32_t, uint8_t, svuint32_t, -+ z0_res = svld1ub_gather_u32offset_u32 (p0, x0, z0), -+ z0_res = svld1ub_gather_offset_u32 (p0, x0, z0)) -+ -+/* -+** ld1ub_gather_untied_u32_u32offset: -+** ld1b z0\.s, p0/z, \[x0, z1\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1ub_gather_untied_u32_u32offset, svuint32_t, uint8_t, svuint32_t, -+ z0_res = svld1ub_gather_u32offset_u32 (p0, x0, z1), -+ z0_res = svld1ub_gather_offset_u32 (p0, x0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_u64.c -new file mode 100644 -index 000000000..b605f8b67 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_gather_u64.c -@@ -0,0 +1,149 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1ub_gather_u64_tied1: -+** ld1b z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1ub_gather_u64_tied1, svuint64_t, svuint64_t, -+ z0_res = svld1ub_gather_u64base_u64 (p0, z0), -+ z0_res = svld1ub_gather_u64 (p0, z0)) -+ -+/* -+** ld1ub_gather_u64_untied: -+** ld1b z0\.d, p0/z, \[z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1ub_gather_u64_untied, svuint64_t, svuint64_t, -+ z0_res = svld1ub_gather_u64base_u64 (p0, z1), -+ z0_res = svld1ub_gather_u64 (p0, z1)) -+ -+/* -+** ld1ub_gather_x0_u64_offset: -+** ld1b z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1ub_gather_x0_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1ub_gather_u64base_offset_u64 (p0, z0, x0), -+ z0_res = svld1ub_gather_offset_u64 (p0, z0, x0)) -+ -+/* -+** ld1ub_gather_m1_u64_offset: -+** mov (x[0-9]+), #?-1 -+** ld1b z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1ub_gather_m1_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1ub_gather_u64base_offset_u64 (p0, z0, -1), -+ z0_res = svld1ub_gather_offset_u64 (p0, z0, -1)) -+ -+/* -+** ld1ub_gather_0_u64_offset: -+** ld1b z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1ub_gather_0_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1ub_gather_u64base_offset_u64 (p0, z0, 0), -+ z0_res = svld1ub_gather_offset_u64 (p0, z0, 0)) -+ -+/* -+** ld1ub_gather_5_u64_offset: -+** ld1b z0\.d, p0/z, \[z0\.d, #5\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1ub_gather_5_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1ub_gather_u64base_offset_u64 (p0, z0, 5), -+ z0_res = svld1ub_gather_offset_u64 (p0, z0, 5)) -+ -+/* -+** ld1ub_gather_31_u64_offset: -+** ld1b z0\.d, p0/z, \[z0\.d, #31\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1ub_gather_31_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1ub_gather_u64base_offset_u64 (p0, z0, 31), -+ z0_res = svld1ub_gather_offset_u64 (p0, z0, 31)) -+ -+/* -+** ld1ub_gather_32_u64_offset: -+** mov (x[0-9]+), #?32 -+** ld1b z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1ub_gather_32_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1ub_gather_u64base_offset_u64 (p0, z0, 32), -+ z0_res = svld1ub_gather_offset_u64 (p0, z0, 32)) -+ -+/* -+** ld1ub_gather_x0_u64_s64offset: -+** ld1b z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1ub_gather_x0_u64_s64offset, svuint64_t, uint8_t, svint64_t, -+ z0_res = svld1ub_gather_s64offset_u64 (p0, x0, z0), -+ z0_res = svld1ub_gather_offset_u64 (p0, x0, z0)) -+ -+/* -+** ld1ub_gather_tied1_u64_s64offset: -+** ld1b z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1ub_gather_tied1_u64_s64offset, svuint64_t, uint8_t, svint64_t, -+ z0_res = svld1ub_gather_s64offset_u64 (p0, x0, z0), -+ z0_res = svld1ub_gather_offset_u64 (p0, x0, z0)) -+ -+/* -+** ld1ub_gather_untied_u64_s64offset: -+** ld1b z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1ub_gather_untied_u64_s64offset, svuint64_t, uint8_t, svint64_t, -+ z0_res = svld1ub_gather_s64offset_u64 (p0, x0, z1), -+ z0_res = svld1ub_gather_offset_u64 (p0, x0, z1)) -+ -+/* -+** ld1ub_gather_ext_u64_s64offset: -+** ld1b z0\.d, p0/z, \[x0, z1\.d, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1ub_gather_ext_u64_s64offset, svuint64_t, uint8_t, svint64_t, -+ z0_res = svld1ub_gather_s64offset_u64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svld1ub_gather_offset_u64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ld1ub_gather_x0_u64_u64offset: -+** ld1b z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1ub_gather_x0_u64_u64offset, svuint64_t, uint8_t, svuint64_t, -+ z0_res = svld1ub_gather_u64offset_u64 (p0, x0, z0), -+ z0_res = svld1ub_gather_offset_u64 (p0, x0, z0)) -+ -+/* -+** ld1ub_gather_tied1_u64_u64offset: -+** ld1b z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1ub_gather_tied1_u64_u64offset, svuint64_t, uint8_t, svuint64_t, -+ z0_res = svld1ub_gather_u64offset_u64 (p0, x0, z0), -+ z0_res = svld1ub_gather_offset_u64 (p0, x0, z0)) -+ -+/* -+** ld1ub_gather_untied_u64_u64offset: -+** ld1b z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1ub_gather_untied_u64_u64offset, svuint64_t, uint8_t, svuint64_t, -+ z0_res = svld1ub_gather_u64offset_u64 (p0, x0, z1), -+ z0_res = svld1ub_gather_offset_u64 (p0, x0, z1)) -+ -+/* -+** ld1ub_gather_ext_u64_u64offset: -+** ld1b z0\.d, p0/z, \[x0, z1\.d, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1ub_gather_ext_u64_u64offset, svuint64_t, uint8_t, svuint64_t, -+ z0_res = svld1ub_gather_u64offset_u64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svld1ub_gather_offset_u64 (p0, x0, svextw_x (p0, z1))) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_s16.c -new file mode 100644 -index 000000000..c492086b5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_s16.c -@@ -0,0 +1,162 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1ub_s16_base: -+** ld1b z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1ub_s16_base, svint16_t, uint8_t, -+ z0 = svld1ub_s16 (p0, x0), -+ z0 = svld1ub_s16 (p0, x0)) -+ -+/* -+** ld1ub_s16_index: -+** ld1b z0\.h, p0/z, \[x0, x1\] -+** ret -+*/ -+TEST_LOAD (ld1ub_s16_index, svint16_t, uint8_t, -+ z0 = svld1ub_s16 (p0, x0 + x1), -+ z0 = svld1ub_s16 (p0, x0 + x1)) -+ -+/* -+** ld1ub_s16_1: -+** ld1b z0\.h, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1ub_s16_1, svint16_t, uint8_t, -+ z0 = svld1ub_s16 (p0, x0 + svcnth ()), -+ z0 = svld1ub_s16 (p0, x0 + svcnth ())) -+ -+/* -+** ld1ub_s16_7: -+** ld1b z0\.h, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1ub_s16_7, svint16_t, uint8_t, -+ z0 = svld1ub_s16 (p0, x0 + svcnth () * 7), -+ z0 = svld1ub_s16 (p0, x0 + svcnth () * 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1ub_s16_8: -+** incb x0, all, mul #4 -+** ld1b z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1ub_s16_8, svint16_t, uint8_t, -+ z0 = svld1ub_s16 (p0, x0 + svcnth () * 8), -+ z0 = svld1ub_s16 (p0, x0 + svcnth () * 8)) -+ -+/* -+** ld1ub_s16_m1: -+** ld1b z0\.h, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1ub_s16_m1, svint16_t, uint8_t, -+ z0 = svld1ub_s16 (p0, x0 - svcnth ()), -+ z0 = svld1ub_s16 (p0, x0 - svcnth ())) -+ -+/* -+** ld1ub_s16_m8: -+** ld1b z0\.h, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1ub_s16_m8, svint16_t, uint8_t, -+ z0 = svld1ub_s16 (p0, x0 - svcnth () * 8), -+ z0 = svld1ub_s16 (p0, x0 - svcnth () * 8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1ub_s16_m9: -+** dech x0, all, mul #9 -+** ld1b z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1ub_s16_m9, svint16_t, uint8_t, -+ z0 = svld1ub_s16 (p0, x0 - svcnth () * 9), -+ z0 = svld1ub_s16 (p0, x0 - svcnth () * 9)) -+ -+/* -+** ld1ub_vnum_s16_0: -+** ld1b z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1ub_vnum_s16_0, svint16_t, uint8_t, -+ z0 = svld1ub_vnum_s16 (p0, x0, 0), -+ z0 = svld1ub_vnum_s16 (p0, x0, 0)) -+ -+/* -+** ld1ub_vnum_s16_1: -+** ld1b z0\.h, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1ub_vnum_s16_1, svint16_t, uint8_t, -+ z0 = svld1ub_vnum_s16 (p0, x0, 1), -+ z0 = svld1ub_vnum_s16 (p0, x0, 1)) -+ -+/* -+** ld1ub_vnum_s16_7: -+** ld1b z0\.h, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1ub_vnum_s16_7, svint16_t, uint8_t, -+ z0 = svld1ub_vnum_s16 (p0, x0, 7), -+ z0 = svld1ub_vnum_s16 (p0, x0, 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1ub_vnum_s16_8: -+** incb x0, all, mul #4 -+** ld1b z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1ub_vnum_s16_8, svint16_t, uint8_t, -+ z0 = svld1ub_vnum_s16 (p0, x0, 8), -+ z0 = svld1ub_vnum_s16 (p0, x0, 8)) -+ -+/* -+** ld1ub_vnum_s16_m1: -+** ld1b z0\.h, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1ub_vnum_s16_m1, svint16_t, uint8_t, -+ z0 = svld1ub_vnum_s16 (p0, x0, -1), -+ z0 = svld1ub_vnum_s16 (p0, x0, -1)) -+ -+/* -+** ld1ub_vnum_s16_m8: -+** ld1b z0\.h, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1ub_vnum_s16_m8, svint16_t, uint8_t, -+ z0 = svld1ub_vnum_s16 (p0, x0, -8), -+ z0 = svld1ub_vnum_s16 (p0, x0, -8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1ub_vnum_s16_m9: -+** dech x0, all, mul #9 -+** ld1b z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1ub_vnum_s16_m9, svint16_t, uint8_t, -+ z0 = svld1ub_vnum_s16 (p0, x0, -9), -+ z0 = svld1ub_vnum_s16 (p0, x0, -9)) -+ -+/* -+** ld1ub_vnum_s16_x1: -+** cnth (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ld1b z0\.h, p0/z, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** ld1b z0\.h, p0/z, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_LOAD (ld1ub_vnum_s16_x1, svint16_t, uint8_t, -+ z0 = svld1ub_vnum_s16 (p0, x0, x1), -+ z0 = svld1ub_vnum_s16 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_s32.c -new file mode 100644 -index 000000000..b2f8c4b04 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_s32.c -@@ -0,0 +1,162 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1ub_s32_base: -+** ld1b z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1ub_s32_base, svint32_t, uint8_t, -+ z0 = svld1ub_s32 (p0, x0), -+ z0 = svld1ub_s32 (p0, x0)) -+ -+/* -+** ld1ub_s32_index: -+** ld1b z0\.s, p0/z, \[x0, x1\] -+** ret -+*/ -+TEST_LOAD (ld1ub_s32_index, svint32_t, uint8_t, -+ z0 = svld1ub_s32 (p0, x0 + x1), -+ z0 = svld1ub_s32 (p0, x0 + x1)) -+ -+/* -+** ld1ub_s32_1: -+** ld1b z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1ub_s32_1, svint32_t, uint8_t, -+ z0 = svld1ub_s32 (p0, x0 + svcntw ()), -+ z0 = svld1ub_s32 (p0, x0 + svcntw ())) -+ -+/* -+** ld1ub_s32_7: -+** ld1b z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1ub_s32_7, svint32_t, uint8_t, -+ z0 = svld1ub_s32 (p0, x0 + svcntw () * 7), -+ z0 = svld1ub_s32 (p0, x0 + svcntw () * 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1ub_s32_8: -+** incb x0, all, mul #2 -+** ld1b z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1ub_s32_8, svint32_t, uint8_t, -+ z0 = svld1ub_s32 (p0, x0 + svcntw () * 8), -+ z0 = svld1ub_s32 (p0, x0 + svcntw () * 8)) -+ -+/* -+** ld1ub_s32_m1: -+** ld1b z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1ub_s32_m1, svint32_t, uint8_t, -+ z0 = svld1ub_s32 (p0, x0 - svcntw ()), -+ z0 = svld1ub_s32 (p0, x0 - svcntw ())) -+ -+/* -+** ld1ub_s32_m8: -+** ld1b z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1ub_s32_m8, svint32_t, uint8_t, -+ z0 = svld1ub_s32 (p0, x0 - svcntw () * 8), -+ z0 = svld1ub_s32 (p0, x0 - svcntw () * 8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1ub_s32_m9: -+** decw x0, all, mul #9 -+** ld1b z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1ub_s32_m9, svint32_t, uint8_t, -+ z0 = svld1ub_s32 (p0, x0 - svcntw () * 9), -+ z0 = svld1ub_s32 (p0, x0 - svcntw () * 9)) -+ -+/* -+** ld1ub_vnum_s32_0: -+** ld1b z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1ub_vnum_s32_0, svint32_t, uint8_t, -+ z0 = svld1ub_vnum_s32 (p0, x0, 0), -+ z0 = svld1ub_vnum_s32 (p0, x0, 0)) -+ -+/* -+** ld1ub_vnum_s32_1: -+** ld1b z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1ub_vnum_s32_1, svint32_t, uint8_t, -+ z0 = svld1ub_vnum_s32 (p0, x0, 1), -+ z0 = svld1ub_vnum_s32 (p0, x0, 1)) -+ -+/* -+** ld1ub_vnum_s32_7: -+** ld1b z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1ub_vnum_s32_7, svint32_t, uint8_t, -+ z0 = svld1ub_vnum_s32 (p0, x0, 7), -+ z0 = svld1ub_vnum_s32 (p0, x0, 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1ub_vnum_s32_8: -+** incb x0, all, mul #2 -+** ld1b z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1ub_vnum_s32_8, svint32_t, uint8_t, -+ z0 = svld1ub_vnum_s32 (p0, x0, 8), -+ z0 = svld1ub_vnum_s32 (p0, x0, 8)) -+ -+/* -+** ld1ub_vnum_s32_m1: -+** ld1b z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1ub_vnum_s32_m1, svint32_t, uint8_t, -+ z0 = svld1ub_vnum_s32 (p0, x0, -1), -+ z0 = svld1ub_vnum_s32 (p0, x0, -1)) -+ -+/* -+** ld1ub_vnum_s32_m8: -+** ld1b z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1ub_vnum_s32_m8, svint32_t, uint8_t, -+ z0 = svld1ub_vnum_s32 (p0, x0, -8), -+ z0 = svld1ub_vnum_s32 (p0, x0, -8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1ub_vnum_s32_m9: -+** decw x0, all, mul #9 -+** ld1b z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1ub_vnum_s32_m9, svint32_t, uint8_t, -+ z0 = svld1ub_vnum_s32 (p0, x0, -9), -+ z0 = svld1ub_vnum_s32 (p0, x0, -9)) -+ -+/* -+** ld1ub_vnum_s32_x1: -+** cntw (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ld1b z0\.s, p0/z, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** ld1b z0\.s, p0/z, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_LOAD (ld1ub_vnum_s32_x1, svint32_t, uint8_t, -+ z0 = svld1ub_vnum_s32 (p0, x0, x1), -+ z0 = svld1ub_vnum_s32 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_s64.c -new file mode 100644 -index 000000000..d8694bf28 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_s64.c -@@ -0,0 +1,162 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1ub_s64_base: -+** ld1b z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1ub_s64_base, svint64_t, uint8_t, -+ z0 = svld1ub_s64 (p0, x0), -+ z0 = svld1ub_s64 (p0, x0)) -+ -+/* -+** ld1ub_s64_index: -+** ld1b z0\.d, p0/z, \[x0, x1\] -+** ret -+*/ -+TEST_LOAD (ld1ub_s64_index, svint64_t, uint8_t, -+ z0 = svld1ub_s64 (p0, x0 + x1), -+ z0 = svld1ub_s64 (p0, x0 + x1)) -+ -+/* -+** ld1ub_s64_1: -+** ld1b z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1ub_s64_1, svint64_t, uint8_t, -+ z0 = svld1ub_s64 (p0, x0 + svcntd ()), -+ z0 = svld1ub_s64 (p0, x0 + svcntd ())) -+ -+/* -+** ld1ub_s64_7: -+** ld1b z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1ub_s64_7, svint64_t, uint8_t, -+ z0 = svld1ub_s64 (p0, x0 + svcntd () * 7), -+ z0 = svld1ub_s64 (p0, x0 + svcntd () * 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1ub_s64_8: -+** incb x0 -+** ld1b z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1ub_s64_8, svint64_t, uint8_t, -+ z0 = svld1ub_s64 (p0, x0 + svcntd () * 8), -+ z0 = svld1ub_s64 (p0, x0 + svcntd () * 8)) -+ -+/* -+** ld1ub_s64_m1: -+** ld1b z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1ub_s64_m1, svint64_t, uint8_t, -+ z0 = svld1ub_s64 (p0, x0 - svcntd ()), -+ z0 = svld1ub_s64 (p0, x0 - svcntd ())) -+ -+/* -+** ld1ub_s64_m8: -+** ld1b z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1ub_s64_m8, svint64_t, uint8_t, -+ z0 = svld1ub_s64 (p0, x0 - svcntd () * 8), -+ z0 = svld1ub_s64 (p0, x0 - svcntd () * 8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1ub_s64_m9: -+** decd x0, all, mul #9 -+** ld1b z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1ub_s64_m9, svint64_t, uint8_t, -+ z0 = svld1ub_s64 (p0, x0 - svcntd () * 9), -+ z0 = svld1ub_s64 (p0, x0 - svcntd () * 9)) -+ -+/* -+** ld1ub_vnum_s64_0: -+** ld1b z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1ub_vnum_s64_0, svint64_t, uint8_t, -+ z0 = svld1ub_vnum_s64 (p0, x0, 0), -+ z0 = svld1ub_vnum_s64 (p0, x0, 0)) -+ -+/* -+** ld1ub_vnum_s64_1: -+** ld1b z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1ub_vnum_s64_1, svint64_t, uint8_t, -+ z0 = svld1ub_vnum_s64 (p0, x0, 1), -+ z0 = svld1ub_vnum_s64 (p0, x0, 1)) -+ -+/* -+** ld1ub_vnum_s64_7: -+** ld1b z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1ub_vnum_s64_7, svint64_t, uint8_t, -+ z0 = svld1ub_vnum_s64 (p0, x0, 7), -+ z0 = svld1ub_vnum_s64 (p0, x0, 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1ub_vnum_s64_8: -+** incb x0 -+** ld1b z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1ub_vnum_s64_8, svint64_t, uint8_t, -+ z0 = svld1ub_vnum_s64 (p0, x0, 8), -+ z0 = svld1ub_vnum_s64 (p0, x0, 8)) -+ -+/* -+** ld1ub_vnum_s64_m1: -+** ld1b z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1ub_vnum_s64_m1, svint64_t, uint8_t, -+ z0 = svld1ub_vnum_s64 (p0, x0, -1), -+ z0 = svld1ub_vnum_s64 (p0, x0, -1)) -+ -+/* -+** ld1ub_vnum_s64_m8: -+** ld1b z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1ub_vnum_s64_m8, svint64_t, uint8_t, -+ z0 = svld1ub_vnum_s64 (p0, x0, -8), -+ z0 = svld1ub_vnum_s64 (p0, x0, -8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1ub_vnum_s64_m9: -+** decd x0, all, mul #9 -+** ld1b z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1ub_vnum_s64_m9, svint64_t, uint8_t, -+ z0 = svld1ub_vnum_s64 (p0, x0, -9), -+ z0 = svld1ub_vnum_s64 (p0, x0, -9)) -+ -+/* -+** ld1ub_vnum_s64_x1: -+** cntd (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ld1b z0\.d, p0/z, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** ld1b z0\.d, p0/z, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_LOAD (ld1ub_vnum_s64_x1, svint64_t, uint8_t, -+ z0 = svld1ub_vnum_s64 (p0, x0, x1), -+ z0 = svld1ub_vnum_s64 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_u16.c -new file mode 100644 -index 000000000..049234ee4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_u16.c -@@ -0,0 +1,162 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1ub_u16_base: -+** ld1b z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1ub_u16_base, svuint16_t, uint8_t, -+ z0 = svld1ub_u16 (p0, x0), -+ z0 = svld1ub_u16 (p0, x0)) -+ -+/* -+** ld1ub_u16_index: -+** ld1b z0\.h, p0/z, \[x0, x1\] -+** ret -+*/ -+TEST_LOAD (ld1ub_u16_index, svuint16_t, uint8_t, -+ z0 = svld1ub_u16 (p0, x0 + x1), -+ z0 = svld1ub_u16 (p0, x0 + x1)) -+ -+/* -+** ld1ub_u16_1: -+** ld1b z0\.h, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1ub_u16_1, svuint16_t, uint8_t, -+ z0 = svld1ub_u16 (p0, x0 + svcnth ()), -+ z0 = svld1ub_u16 (p0, x0 + svcnth ())) -+ -+/* -+** ld1ub_u16_7: -+** ld1b z0\.h, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1ub_u16_7, svuint16_t, uint8_t, -+ z0 = svld1ub_u16 (p0, x0 + svcnth () * 7), -+ z0 = svld1ub_u16 (p0, x0 + svcnth () * 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1ub_u16_8: -+** incb x0, all, mul #4 -+** ld1b z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1ub_u16_8, svuint16_t, uint8_t, -+ z0 = svld1ub_u16 (p0, x0 + svcnth () * 8), -+ z0 = svld1ub_u16 (p0, x0 + svcnth () * 8)) -+ -+/* -+** ld1ub_u16_m1: -+** ld1b z0\.h, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1ub_u16_m1, svuint16_t, uint8_t, -+ z0 = svld1ub_u16 (p0, x0 - svcnth ()), -+ z0 = svld1ub_u16 (p0, x0 - svcnth ())) -+ -+/* -+** ld1ub_u16_m8: -+** ld1b z0\.h, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1ub_u16_m8, svuint16_t, uint8_t, -+ z0 = svld1ub_u16 (p0, x0 - svcnth () * 8), -+ z0 = svld1ub_u16 (p0, x0 - svcnth () * 8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1ub_u16_m9: -+** dech x0, all, mul #9 -+** ld1b z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1ub_u16_m9, svuint16_t, uint8_t, -+ z0 = svld1ub_u16 (p0, x0 - svcnth () * 9), -+ z0 = svld1ub_u16 (p0, x0 - svcnth () * 9)) -+ -+/* -+** ld1ub_vnum_u16_0: -+** ld1b z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1ub_vnum_u16_0, svuint16_t, uint8_t, -+ z0 = svld1ub_vnum_u16 (p0, x0, 0), -+ z0 = svld1ub_vnum_u16 (p0, x0, 0)) -+ -+/* -+** ld1ub_vnum_u16_1: -+** ld1b z0\.h, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1ub_vnum_u16_1, svuint16_t, uint8_t, -+ z0 = svld1ub_vnum_u16 (p0, x0, 1), -+ z0 = svld1ub_vnum_u16 (p0, x0, 1)) -+ -+/* -+** ld1ub_vnum_u16_7: -+** ld1b z0\.h, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1ub_vnum_u16_7, svuint16_t, uint8_t, -+ z0 = svld1ub_vnum_u16 (p0, x0, 7), -+ z0 = svld1ub_vnum_u16 (p0, x0, 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1ub_vnum_u16_8: -+** incb x0, all, mul #4 -+** ld1b z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1ub_vnum_u16_8, svuint16_t, uint8_t, -+ z0 = svld1ub_vnum_u16 (p0, x0, 8), -+ z0 = svld1ub_vnum_u16 (p0, x0, 8)) -+ -+/* -+** ld1ub_vnum_u16_m1: -+** ld1b z0\.h, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1ub_vnum_u16_m1, svuint16_t, uint8_t, -+ z0 = svld1ub_vnum_u16 (p0, x0, -1), -+ z0 = svld1ub_vnum_u16 (p0, x0, -1)) -+ -+/* -+** ld1ub_vnum_u16_m8: -+** ld1b z0\.h, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1ub_vnum_u16_m8, svuint16_t, uint8_t, -+ z0 = svld1ub_vnum_u16 (p0, x0, -8), -+ z0 = svld1ub_vnum_u16 (p0, x0, -8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1ub_vnum_u16_m9: -+** dech x0, all, mul #9 -+** ld1b z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1ub_vnum_u16_m9, svuint16_t, uint8_t, -+ z0 = svld1ub_vnum_u16 (p0, x0, -9), -+ z0 = svld1ub_vnum_u16 (p0, x0, -9)) -+ -+/* -+** ld1ub_vnum_u16_x1: -+** cnth (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ld1b z0\.h, p0/z, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** ld1b z0\.h, p0/z, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_LOAD (ld1ub_vnum_u16_x1, svuint16_t, uint8_t, -+ z0 = svld1ub_vnum_u16 (p0, x0, x1), -+ z0 = svld1ub_vnum_u16 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_u32.c -new file mode 100644 -index 000000000..58d2ef527 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_u32.c -@@ -0,0 +1,162 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1ub_u32_base: -+** ld1b z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1ub_u32_base, svuint32_t, uint8_t, -+ z0 = svld1ub_u32 (p0, x0), -+ z0 = svld1ub_u32 (p0, x0)) -+ -+/* -+** ld1ub_u32_index: -+** ld1b z0\.s, p0/z, \[x0, x1\] -+** ret -+*/ -+TEST_LOAD (ld1ub_u32_index, svuint32_t, uint8_t, -+ z0 = svld1ub_u32 (p0, x0 + x1), -+ z0 = svld1ub_u32 (p0, x0 + x1)) -+ -+/* -+** ld1ub_u32_1: -+** ld1b z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1ub_u32_1, svuint32_t, uint8_t, -+ z0 = svld1ub_u32 (p0, x0 + svcntw ()), -+ z0 = svld1ub_u32 (p0, x0 + svcntw ())) -+ -+/* -+** ld1ub_u32_7: -+** ld1b z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1ub_u32_7, svuint32_t, uint8_t, -+ z0 = svld1ub_u32 (p0, x0 + svcntw () * 7), -+ z0 = svld1ub_u32 (p0, x0 + svcntw () * 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1ub_u32_8: -+** incb x0, all, mul #2 -+** ld1b z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1ub_u32_8, svuint32_t, uint8_t, -+ z0 = svld1ub_u32 (p0, x0 + svcntw () * 8), -+ z0 = svld1ub_u32 (p0, x0 + svcntw () * 8)) -+ -+/* -+** ld1ub_u32_m1: -+** ld1b z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1ub_u32_m1, svuint32_t, uint8_t, -+ z0 = svld1ub_u32 (p0, x0 - svcntw ()), -+ z0 = svld1ub_u32 (p0, x0 - svcntw ())) -+ -+/* -+** ld1ub_u32_m8: -+** ld1b z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1ub_u32_m8, svuint32_t, uint8_t, -+ z0 = svld1ub_u32 (p0, x0 - svcntw () * 8), -+ z0 = svld1ub_u32 (p0, x0 - svcntw () * 8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1ub_u32_m9: -+** decw x0, all, mul #9 -+** ld1b z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1ub_u32_m9, svuint32_t, uint8_t, -+ z0 = svld1ub_u32 (p0, x0 - svcntw () * 9), -+ z0 = svld1ub_u32 (p0, x0 - svcntw () * 9)) -+ -+/* -+** ld1ub_vnum_u32_0: -+** ld1b z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1ub_vnum_u32_0, svuint32_t, uint8_t, -+ z0 = svld1ub_vnum_u32 (p0, x0, 0), -+ z0 = svld1ub_vnum_u32 (p0, x0, 0)) -+ -+/* -+** ld1ub_vnum_u32_1: -+** ld1b z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1ub_vnum_u32_1, svuint32_t, uint8_t, -+ z0 = svld1ub_vnum_u32 (p0, x0, 1), -+ z0 = svld1ub_vnum_u32 (p0, x0, 1)) -+ -+/* -+** ld1ub_vnum_u32_7: -+** ld1b z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1ub_vnum_u32_7, svuint32_t, uint8_t, -+ z0 = svld1ub_vnum_u32 (p0, x0, 7), -+ z0 = svld1ub_vnum_u32 (p0, x0, 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1ub_vnum_u32_8: -+** incb x0, all, mul #2 -+** ld1b z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1ub_vnum_u32_8, svuint32_t, uint8_t, -+ z0 = svld1ub_vnum_u32 (p0, x0, 8), -+ z0 = svld1ub_vnum_u32 (p0, x0, 8)) -+ -+/* -+** ld1ub_vnum_u32_m1: -+** ld1b z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1ub_vnum_u32_m1, svuint32_t, uint8_t, -+ z0 = svld1ub_vnum_u32 (p0, x0, -1), -+ z0 = svld1ub_vnum_u32 (p0, x0, -1)) -+ -+/* -+** ld1ub_vnum_u32_m8: -+** ld1b z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1ub_vnum_u32_m8, svuint32_t, uint8_t, -+ z0 = svld1ub_vnum_u32 (p0, x0, -8), -+ z0 = svld1ub_vnum_u32 (p0, x0, -8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1ub_vnum_u32_m9: -+** decw x0, all, mul #9 -+** ld1b z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1ub_vnum_u32_m9, svuint32_t, uint8_t, -+ z0 = svld1ub_vnum_u32 (p0, x0, -9), -+ z0 = svld1ub_vnum_u32 (p0, x0, -9)) -+ -+/* -+** ld1ub_vnum_u32_x1: -+** cntw (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ld1b z0\.s, p0/z, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** ld1b z0\.s, p0/z, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_LOAD (ld1ub_vnum_u32_x1, svuint32_t, uint8_t, -+ z0 = svld1ub_vnum_u32 (p0, x0, x1), -+ z0 = svld1ub_vnum_u32 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_u64.c -new file mode 100644 -index 000000000..46d7250f0 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1ub_u64.c -@@ -0,0 +1,162 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1ub_u64_base: -+** ld1b z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1ub_u64_base, svuint64_t, uint8_t, -+ z0 = svld1ub_u64 (p0, x0), -+ z0 = svld1ub_u64 (p0, x0)) -+ -+/* -+** ld1ub_u64_index: -+** ld1b z0\.d, p0/z, \[x0, x1\] -+** ret -+*/ -+TEST_LOAD (ld1ub_u64_index, svuint64_t, uint8_t, -+ z0 = svld1ub_u64 (p0, x0 + x1), -+ z0 = svld1ub_u64 (p0, x0 + x1)) -+ -+/* -+** ld1ub_u64_1: -+** ld1b z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1ub_u64_1, svuint64_t, uint8_t, -+ z0 = svld1ub_u64 (p0, x0 + svcntd ()), -+ z0 = svld1ub_u64 (p0, x0 + svcntd ())) -+ -+/* -+** ld1ub_u64_7: -+** ld1b z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1ub_u64_7, svuint64_t, uint8_t, -+ z0 = svld1ub_u64 (p0, x0 + svcntd () * 7), -+ z0 = svld1ub_u64 (p0, x0 + svcntd () * 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1ub_u64_8: -+** incb x0 -+** ld1b z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1ub_u64_8, svuint64_t, uint8_t, -+ z0 = svld1ub_u64 (p0, x0 + svcntd () * 8), -+ z0 = svld1ub_u64 (p0, x0 + svcntd () * 8)) -+ -+/* -+** ld1ub_u64_m1: -+** ld1b z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1ub_u64_m1, svuint64_t, uint8_t, -+ z0 = svld1ub_u64 (p0, x0 - svcntd ()), -+ z0 = svld1ub_u64 (p0, x0 - svcntd ())) -+ -+/* -+** ld1ub_u64_m8: -+** ld1b z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1ub_u64_m8, svuint64_t, uint8_t, -+ z0 = svld1ub_u64 (p0, x0 - svcntd () * 8), -+ z0 = svld1ub_u64 (p0, x0 - svcntd () * 8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1ub_u64_m9: -+** decd x0, all, mul #9 -+** ld1b z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1ub_u64_m9, svuint64_t, uint8_t, -+ z0 = svld1ub_u64 (p0, x0 - svcntd () * 9), -+ z0 = svld1ub_u64 (p0, x0 - svcntd () * 9)) -+ -+/* -+** ld1ub_vnum_u64_0: -+** ld1b z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1ub_vnum_u64_0, svuint64_t, uint8_t, -+ z0 = svld1ub_vnum_u64 (p0, x0, 0), -+ z0 = svld1ub_vnum_u64 (p0, x0, 0)) -+ -+/* -+** ld1ub_vnum_u64_1: -+** ld1b z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1ub_vnum_u64_1, svuint64_t, uint8_t, -+ z0 = svld1ub_vnum_u64 (p0, x0, 1), -+ z0 = svld1ub_vnum_u64 (p0, x0, 1)) -+ -+/* -+** ld1ub_vnum_u64_7: -+** ld1b z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1ub_vnum_u64_7, svuint64_t, uint8_t, -+ z0 = svld1ub_vnum_u64 (p0, x0, 7), -+ z0 = svld1ub_vnum_u64 (p0, x0, 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1ub_vnum_u64_8: -+** incb x0 -+** ld1b z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1ub_vnum_u64_8, svuint64_t, uint8_t, -+ z0 = svld1ub_vnum_u64 (p0, x0, 8), -+ z0 = svld1ub_vnum_u64 (p0, x0, 8)) -+ -+/* -+** ld1ub_vnum_u64_m1: -+** ld1b z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1ub_vnum_u64_m1, svuint64_t, uint8_t, -+ z0 = svld1ub_vnum_u64 (p0, x0, -1), -+ z0 = svld1ub_vnum_u64 (p0, x0, -1)) -+ -+/* -+** ld1ub_vnum_u64_m8: -+** ld1b z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1ub_vnum_u64_m8, svuint64_t, uint8_t, -+ z0 = svld1ub_vnum_u64 (p0, x0, -8), -+ z0 = svld1ub_vnum_u64 (p0, x0, -8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1ub_vnum_u64_m9: -+** decd x0, all, mul #9 -+** ld1b z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1ub_vnum_u64_m9, svuint64_t, uint8_t, -+ z0 = svld1ub_vnum_u64 (p0, x0, -9), -+ z0 = svld1ub_vnum_u64 (p0, x0, -9)) -+ -+/* -+** ld1ub_vnum_u64_x1: -+** cntd (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ld1b z0\.d, p0/z, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** ld1b z0\.d, p0/z, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_LOAD (ld1ub_vnum_u64_x1, svuint64_t, uint8_t, -+ z0 = svld1ub_vnum_u64 (p0, x0, x1), -+ z0 = svld1ub_vnum_u64 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_s32.c -new file mode 100644 -index 000000000..84fb5c335 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_s32.c -@@ -0,0 +1,252 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1uh_gather_s32_tied1: -+** ld1h z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_s32_tied1, svint32_t, svuint32_t, -+ z0_res = svld1uh_gather_u32base_s32 (p0, z0), -+ z0_res = svld1uh_gather_s32 (p0, z0)) -+ -+/* -+** ld1uh_gather_s32_untied: -+** ld1h z0\.s, p0/z, \[z1\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_s32_untied, svint32_t, svuint32_t, -+ z0_res = svld1uh_gather_u32base_s32 (p0, z1), -+ z0_res = svld1uh_gather_s32 (p0, z1)) -+ -+/* -+** ld1uh_gather_x0_s32_offset: -+** ld1h z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_x0_s32_offset, svint32_t, svuint32_t, -+ z0_res = svld1uh_gather_u32base_offset_s32 (p0, z0, x0), -+ z0_res = svld1uh_gather_offset_s32 (p0, z0, x0)) -+ -+/* -+** ld1uh_gather_m2_s32_offset: -+** mov (x[0-9]+), #?-2 -+** ld1h z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_m2_s32_offset, svint32_t, svuint32_t, -+ z0_res = svld1uh_gather_u32base_offset_s32 (p0, z0, -2), -+ z0_res = svld1uh_gather_offset_s32 (p0, z0, -2)) -+ -+/* -+** ld1uh_gather_0_s32_offset: -+** ld1h z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_0_s32_offset, svint32_t, svuint32_t, -+ z0_res = svld1uh_gather_u32base_offset_s32 (p0, z0, 0), -+ z0_res = svld1uh_gather_offset_s32 (p0, z0, 0)) -+ -+/* -+** ld1uh_gather_5_s32_offset: -+** mov (x[0-9]+), #?5 -+** ld1h z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_5_s32_offset, svint32_t, svuint32_t, -+ z0_res = svld1uh_gather_u32base_offset_s32 (p0, z0, 5), -+ z0_res = svld1uh_gather_offset_s32 (p0, z0, 5)) -+ -+/* -+** ld1uh_gather_6_s32_offset: -+** ld1h z0\.s, p0/z, \[z0\.s, #6\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_6_s32_offset, svint32_t, svuint32_t, -+ z0_res = svld1uh_gather_u32base_offset_s32 (p0, z0, 6), -+ z0_res = svld1uh_gather_offset_s32 (p0, z0, 6)) -+ -+/* -+** ld1uh_gather_62_s32_offset: -+** ld1h z0\.s, p0/z, \[z0\.s, #62\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_62_s32_offset, svint32_t, svuint32_t, -+ z0_res = svld1uh_gather_u32base_offset_s32 (p0, z0, 62), -+ z0_res = svld1uh_gather_offset_s32 (p0, z0, 62)) -+ -+/* -+** ld1uh_gather_64_s32_offset: -+** mov (x[0-9]+), #?64 -+** ld1h z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_64_s32_offset, svint32_t, svuint32_t, -+ z0_res = svld1uh_gather_u32base_offset_s32 (p0, z0, 64), -+ z0_res = svld1uh_gather_offset_s32 (p0, z0, 64)) -+ -+/* -+** ld1uh_gather_x0_s32_index: -+** lsl (x[0-9]+), x0, #?1 -+** ld1h z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_x0_s32_index, svint32_t, svuint32_t, -+ z0_res = svld1uh_gather_u32base_index_s32 (p0, z0, x0), -+ z0_res = svld1uh_gather_index_s32 (p0, z0, x0)) -+ -+/* -+** ld1uh_gather_m1_s32_index: -+** mov (x[0-9]+), #?-2 -+** ld1h z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_m1_s32_index, svint32_t, svuint32_t, -+ z0_res = svld1uh_gather_u32base_index_s32 (p0, z0, -1), -+ z0_res = svld1uh_gather_index_s32 (p0, z0, -1)) -+ -+/* -+** ld1uh_gather_0_s32_index: -+** ld1h z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_0_s32_index, svint32_t, svuint32_t, -+ z0_res = svld1uh_gather_u32base_index_s32 (p0, z0, 0), -+ z0_res = svld1uh_gather_index_s32 (p0, z0, 0)) -+ -+/* -+** ld1uh_gather_5_s32_index: -+** ld1h z0\.s, p0/z, \[z0\.s, #10\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_5_s32_index, svint32_t, svuint32_t, -+ z0_res = svld1uh_gather_u32base_index_s32 (p0, z0, 5), -+ z0_res = svld1uh_gather_index_s32 (p0, z0, 5)) -+ -+/* -+** ld1uh_gather_31_s32_index: -+** ld1h z0\.s, p0/z, \[z0\.s, #62\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_31_s32_index, svint32_t, svuint32_t, -+ z0_res = svld1uh_gather_u32base_index_s32 (p0, z0, 31), -+ z0_res = svld1uh_gather_index_s32 (p0, z0, 31)) -+ -+/* -+** ld1uh_gather_32_s32_index: -+** mov (x[0-9]+), #?64 -+** ld1h z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_32_s32_index, svint32_t, svuint32_t, -+ z0_res = svld1uh_gather_u32base_index_s32 (p0, z0, 32), -+ z0_res = svld1uh_gather_index_s32 (p0, z0, 32)) -+ -+/* -+** ld1uh_gather_x0_s32_s32offset: -+** ld1h z0\.s, p0/z, \[x0, z0\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_x0_s32_s32offset, svint32_t, uint16_t, svint32_t, -+ z0_res = svld1uh_gather_s32offset_s32 (p0, x0, z0), -+ z0_res = svld1uh_gather_offset_s32 (p0, x0, z0)) -+ -+/* -+** ld1uh_gather_tied1_s32_s32offset: -+** ld1h z0\.s, p0/z, \[x0, z0\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_tied1_s32_s32offset, svint32_t, uint16_t, svint32_t, -+ z0_res = svld1uh_gather_s32offset_s32 (p0, x0, z0), -+ z0_res = svld1uh_gather_offset_s32 (p0, x0, z0)) -+ -+/* -+** ld1uh_gather_untied_s32_s32offset: -+** ld1h z0\.s, p0/z, \[x0, z1\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_untied_s32_s32offset, svint32_t, uint16_t, svint32_t, -+ z0_res = svld1uh_gather_s32offset_s32 (p0, x0, z1), -+ z0_res = svld1uh_gather_offset_s32 (p0, x0, z1)) -+ -+/* -+** ld1uh_gather_x0_s32_u32offset: -+** ld1h z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_x0_s32_u32offset, svint32_t, uint16_t, svuint32_t, -+ z0_res = svld1uh_gather_u32offset_s32 (p0, x0, z0), -+ z0_res = svld1uh_gather_offset_s32 (p0, x0, z0)) -+ -+/* -+** ld1uh_gather_tied1_s32_u32offset: -+** ld1h z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_tied1_s32_u32offset, svint32_t, uint16_t, svuint32_t, -+ z0_res = svld1uh_gather_u32offset_s32 (p0, x0, z0), -+ z0_res = svld1uh_gather_offset_s32 (p0, x0, z0)) -+ -+/* -+** ld1uh_gather_untied_s32_u32offset: -+** ld1h z0\.s, p0/z, \[x0, z1\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_untied_s32_u32offset, svint32_t, uint16_t, svuint32_t, -+ z0_res = svld1uh_gather_u32offset_s32 (p0, x0, z1), -+ z0_res = svld1uh_gather_offset_s32 (p0, x0, z1)) -+ -+/* -+** ld1uh_gather_x0_s32_s32index: -+** ld1h z0\.s, p0/z, \[x0, z0\.s, sxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_x0_s32_s32index, svint32_t, uint16_t, svint32_t, -+ z0_res = svld1uh_gather_s32index_s32 (p0, x0, z0), -+ z0_res = svld1uh_gather_index_s32 (p0, x0, z0)) -+ -+/* -+** ld1uh_gather_tied1_s32_s32index: -+** ld1h z0\.s, p0/z, \[x0, z0\.s, sxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_tied1_s32_s32index, svint32_t, uint16_t, svint32_t, -+ z0_res = svld1uh_gather_s32index_s32 (p0, x0, z0), -+ z0_res = svld1uh_gather_index_s32 (p0, x0, z0)) -+ -+/* -+** ld1uh_gather_untied_s32_s32index: -+** ld1h z0\.s, p0/z, \[x0, z1\.s, sxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_untied_s32_s32index, svint32_t, uint16_t, svint32_t, -+ z0_res = svld1uh_gather_s32index_s32 (p0, x0, z1), -+ z0_res = svld1uh_gather_index_s32 (p0, x0, z1)) -+ -+/* -+** ld1uh_gather_x0_s32_u32index: -+** ld1h z0\.s, p0/z, \[x0, z0\.s, uxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_x0_s32_u32index, svint32_t, uint16_t, svuint32_t, -+ z0_res = svld1uh_gather_u32index_s32 (p0, x0, z0), -+ z0_res = svld1uh_gather_index_s32 (p0, x0, z0)) -+ -+/* -+** ld1uh_gather_tied1_s32_u32index: -+** ld1h z0\.s, p0/z, \[x0, z0\.s, uxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_tied1_s32_u32index, svint32_t, uint16_t, svuint32_t, -+ z0_res = svld1uh_gather_u32index_s32 (p0, x0, z0), -+ z0_res = svld1uh_gather_index_s32 (p0, x0, z0)) -+ -+/* -+** ld1uh_gather_untied_s32_u32index: -+** ld1h z0\.s, p0/z, \[x0, z1\.s, uxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_untied_s32_u32index, svint32_t, uint16_t, svuint32_t, -+ z0_res = svld1uh_gather_u32index_s32 (p0, x0, z1), -+ z0_res = svld1uh_gather_index_s32 (p0, x0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_s64.c -new file mode 100644 -index 000000000..447001793 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_s64.c -@@ -0,0 +1,288 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1uh_gather_s64_tied1: -+** ld1h z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_s64_tied1, svint64_t, svuint64_t, -+ z0_res = svld1uh_gather_u64base_s64 (p0, z0), -+ z0_res = svld1uh_gather_s64 (p0, z0)) -+ -+/* -+** ld1uh_gather_s64_untied: -+** ld1h z0\.d, p0/z, \[z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_s64_untied, svint64_t, svuint64_t, -+ z0_res = svld1uh_gather_u64base_s64 (p0, z1), -+ z0_res = svld1uh_gather_s64 (p0, z1)) -+ -+/* -+** ld1uh_gather_x0_s64_offset: -+** ld1h z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_x0_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1uh_gather_u64base_offset_s64 (p0, z0, x0), -+ z0_res = svld1uh_gather_offset_s64 (p0, z0, x0)) -+ -+/* -+** ld1uh_gather_m2_s64_offset: -+** mov (x[0-9]+), #?-2 -+** ld1h z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_m2_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1uh_gather_u64base_offset_s64 (p0, z0, -2), -+ z0_res = svld1uh_gather_offset_s64 (p0, z0, -2)) -+ -+/* -+** ld1uh_gather_0_s64_offset: -+** ld1h z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_0_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1uh_gather_u64base_offset_s64 (p0, z0, 0), -+ z0_res = svld1uh_gather_offset_s64 (p0, z0, 0)) -+ -+/* -+** ld1uh_gather_5_s64_offset: -+** mov (x[0-9]+), #?5 -+** ld1h z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_5_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1uh_gather_u64base_offset_s64 (p0, z0, 5), -+ z0_res = svld1uh_gather_offset_s64 (p0, z0, 5)) -+ -+/* -+** ld1uh_gather_6_s64_offset: -+** ld1h z0\.d, p0/z, \[z0\.d, #6\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_6_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1uh_gather_u64base_offset_s64 (p0, z0, 6), -+ z0_res = svld1uh_gather_offset_s64 (p0, z0, 6)) -+ -+/* -+** ld1uh_gather_62_s64_offset: -+** ld1h z0\.d, p0/z, \[z0\.d, #62\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_62_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1uh_gather_u64base_offset_s64 (p0, z0, 62), -+ z0_res = svld1uh_gather_offset_s64 (p0, z0, 62)) -+ -+/* -+** ld1uh_gather_64_s64_offset: -+** mov (x[0-9]+), #?64 -+** ld1h z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_64_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1uh_gather_u64base_offset_s64 (p0, z0, 64), -+ z0_res = svld1uh_gather_offset_s64 (p0, z0, 64)) -+ -+/* -+** ld1uh_gather_x0_s64_index: -+** lsl (x[0-9]+), x0, #?1 -+** ld1h z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_x0_s64_index, svint64_t, svuint64_t, -+ z0_res = svld1uh_gather_u64base_index_s64 (p0, z0, x0), -+ z0_res = svld1uh_gather_index_s64 (p0, z0, x0)) -+ -+/* -+** ld1uh_gather_m1_s64_index: -+** mov (x[0-9]+), #?-2 -+** ld1h z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_m1_s64_index, svint64_t, svuint64_t, -+ z0_res = svld1uh_gather_u64base_index_s64 (p0, z0, -1), -+ z0_res = svld1uh_gather_index_s64 (p0, z0, -1)) -+ -+/* -+** ld1uh_gather_0_s64_index: -+** ld1h z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_0_s64_index, svint64_t, svuint64_t, -+ z0_res = svld1uh_gather_u64base_index_s64 (p0, z0, 0), -+ z0_res = svld1uh_gather_index_s64 (p0, z0, 0)) -+ -+/* -+** ld1uh_gather_5_s64_index: -+** ld1h z0\.d, p0/z, \[z0\.d, #10\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_5_s64_index, svint64_t, svuint64_t, -+ z0_res = svld1uh_gather_u64base_index_s64 (p0, z0, 5), -+ z0_res = svld1uh_gather_index_s64 (p0, z0, 5)) -+ -+/* -+** ld1uh_gather_31_s64_index: -+** ld1h z0\.d, p0/z, \[z0\.d, #62\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_31_s64_index, svint64_t, svuint64_t, -+ z0_res = svld1uh_gather_u64base_index_s64 (p0, z0, 31), -+ z0_res = svld1uh_gather_index_s64 (p0, z0, 31)) -+ -+/* -+** ld1uh_gather_32_s64_index: -+** mov (x[0-9]+), #?64 -+** ld1h z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_32_s64_index, svint64_t, svuint64_t, -+ z0_res = svld1uh_gather_u64base_index_s64 (p0, z0, 32), -+ z0_res = svld1uh_gather_index_s64 (p0, z0, 32)) -+ -+/* -+** ld1uh_gather_x0_s64_s64offset: -+** ld1h z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_x0_s64_s64offset, svint64_t, uint16_t, svint64_t, -+ z0_res = svld1uh_gather_s64offset_s64 (p0, x0, z0), -+ z0_res = svld1uh_gather_offset_s64 (p0, x0, z0)) -+ -+/* -+** ld1uh_gather_tied1_s64_s64offset: -+** ld1h z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_tied1_s64_s64offset, svint64_t, uint16_t, svint64_t, -+ z0_res = svld1uh_gather_s64offset_s64 (p0, x0, z0), -+ z0_res = svld1uh_gather_offset_s64 (p0, x0, z0)) -+ -+/* -+** ld1uh_gather_untied_s64_s64offset: -+** ld1h z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_untied_s64_s64offset, svint64_t, uint16_t, svint64_t, -+ z0_res = svld1uh_gather_s64offset_s64 (p0, x0, z1), -+ z0_res = svld1uh_gather_offset_s64 (p0, x0, z1)) -+ -+/* -+** ld1uh_gather_ext_s64_s64offset: -+** ld1h z0\.d, p0/z, \[x0, z1\.d, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_ext_s64_s64offset, svint64_t, uint16_t, svint64_t, -+ z0_res = svld1uh_gather_s64offset_s64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svld1uh_gather_offset_s64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ld1uh_gather_x0_s64_u64offset: -+** ld1h z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_x0_s64_u64offset, svint64_t, uint16_t, svuint64_t, -+ z0_res = svld1uh_gather_u64offset_s64 (p0, x0, z0), -+ z0_res = svld1uh_gather_offset_s64 (p0, x0, z0)) -+ -+/* -+** ld1uh_gather_tied1_s64_u64offset: -+** ld1h z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_tied1_s64_u64offset, svint64_t, uint16_t, svuint64_t, -+ z0_res = svld1uh_gather_u64offset_s64 (p0, x0, z0), -+ z0_res = svld1uh_gather_offset_s64 (p0, x0, z0)) -+ -+/* -+** ld1uh_gather_untied_s64_u64offset: -+** ld1h z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_untied_s64_u64offset, svint64_t, uint16_t, svuint64_t, -+ z0_res = svld1uh_gather_u64offset_s64 (p0, x0, z1), -+ z0_res = svld1uh_gather_offset_s64 (p0, x0, z1)) -+ -+/* -+** ld1uh_gather_ext_s64_u64offset: -+** ld1h z0\.d, p0/z, \[x0, z1\.d, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_ext_s64_u64offset, svint64_t, uint16_t, svuint64_t, -+ z0_res = svld1uh_gather_u64offset_s64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svld1uh_gather_offset_s64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ld1uh_gather_x0_s64_s64index: -+** ld1h z0\.d, p0/z, \[x0, z0\.d, lsl 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_x0_s64_s64index, svint64_t, uint16_t, svint64_t, -+ z0_res = svld1uh_gather_s64index_s64 (p0, x0, z0), -+ z0_res = svld1uh_gather_index_s64 (p0, x0, z0)) -+ -+/* -+** ld1uh_gather_tied1_s64_s64index: -+** ld1h z0\.d, p0/z, \[x0, z0\.d, lsl 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_tied1_s64_s64index, svint64_t, uint16_t, svint64_t, -+ z0_res = svld1uh_gather_s64index_s64 (p0, x0, z0), -+ z0_res = svld1uh_gather_index_s64 (p0, x0, z0)) -+ -+/* -+** ld1uh_gather_untied_s64_s64index: -+** ld1h z0\.d, p0/z, \[x0, z1\.d, lsl 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_untied_s64_s64index, svint64_t, uint16_t, svint64_t, -+ z0_res = svld1uh_gather_s64index_s64 (p0, x0, z1), -+ z0_res = svld1uh_gather_index_s64 (p0, x0, z1)) -+ -+/* -+** ld1uh_gather_ext_s64_s64index: -+** ld1h z0\.d, p0/z, \[x0, z1\.d, sxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_ext_s64_s64index, svint64_t, uint16_t, svint64_t, -+ z0_res = svld1uh_gather_s64index_s64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svld1uh_gather_index_s64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ld1uh_gather_x0_s64_u64index: -+** ld1h z0\.d, p0/z, \[x0, z0\.d, lsl 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_x0_s64_u64index, svint64_t, uint16_t, svuint64_t, -+ z0_res = svld1uh_gather_u64index_s64 (p0, x0, z0), -+ z0_res = svld1uh_gather_index_s64 (p0, x0, z0)) -+ -+/* -+** ld1uh_gather_tied1_s64_u64index: -+** ld1h z0\.d, p0/z, \[x0, z0\.d, lsl 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_tied1_s64_u64index, svint64_t, uint16_t, svuint64_t, -+ z0_res = svld1uh_gather_u64index_s64 (p0, x0, z0), -+ z0_res = svld1uh_gather_index_s64 (p0, x0, z0)) -+ -+/* -+** ld1uh_gather_untied_s64_u64index: -+** ld1h z0\.d, p0/z, \[x0, z1\.d, lsl 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_untied_s64_u64index, svint64_t, uint16_t, svuint64_t, -+ z0_res = svld1uh_gather_u64index_s64 (p0, x0, z1), -+ z0_res = svld1uh_gather_index_s64 (p0, x0, z1)) -+ -+/* -+** ld1uh_gather_ext_s64_u64index: -+** ld1h z0\.d, p0/z, \[x0, z1\.d, uxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_ext_s64_u64index, svint64_t, uint16_t, svuint64_t, -+ z0_res = svld1uh_gather_u64index_s64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svld1uh_gather_index_s64 (p0, x0, svextw_x (p0, z1))) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_u32.c -new file mode 100644 -index 000000000..09d3cc8c2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_u32.c -@@ -0,0 +1,252 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1uh_gather_u32_tied1: -+** ld1h z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_u32_tied1, svuint32_t, svuint32_t, -+ z0_res = svld1uh_gather_u32base_u32 (p0, z0), -+ z0_res = svld1uh_gather_u32 (p0, z0)) -+ -+/* -+** ld1uh_gather_u32_untied: -+** ld1h z0\.s, p0/z, \[z1\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_u32_untied, svuint32_t, svuint32_t, -+ z0_res = svld1uh_gather_u32base_u32 (p0, z1), -+ z0_res = svld1uh_gather_u32 (p0, z1)) -+ -+/* -+** ld1uh_gather_x0_u32_offset: -+** ld1h z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_x0_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svld1uh_gather_u32base_offset_u32 (p0, z0, x0), -+ z0_res = svld1uh_gather_offset_u32 (p0, z0, x0)) -+ -+/* -+** ld1uh_gather_m2_u32_offset: -+** mov (x[0-9]+), #?-2 -+** ld1h z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_m2_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svld1uh_gather_u32base_offset_u32 (p0, z0, -2), -+ z0_res = svld1uh_gather_offset_u32 (p0, z0, -2)) -+ -+/* -+** ld1uh_gather_0_u32_offset: -+** ld1h z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_0_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svld1uh_gather_u32base_offset_u32 (p0, z0, 0), -+ z0_res = svld1uh_gather_offset_u32 (p0, z0, 0)) -+ -+/* -+** ld1uh_gather_5_u32_offset: -+** mov (x[0-9]+), #?5 -+** ld1h z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_5_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svld1uh_gather_u32base_offset_u32 (p0, z0, 5), -+ z0_res = svld1uh_gather_offset_u32 (p0, z0, 5)) -+ -+/* -+** ld1uh_gather_6_u32_offset: -+** ld1h z0\.s, p0/z, \[z0\.s, #6\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_6_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svld1uh_gather_u32base_offset_u32 (p0, z0, 6), -+ z0_res = svld1uh_gather_offset_u32 (p0, z0, 6)) -+ -+/* -+** ld1uh_gather_62_u32_offset: -+** ld1h z0\.s, p0/z, \[z0\.s, #62\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_62_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svld1uh_gather_u32base_offset_u32 (p0, z0, 62), -+ z0_res = svld1uh_gather_offset_u32 (p0, z0, 62)) -+ -+/* -+** ld1uh_gather_64_u32_offset: -+** mov (x[0-9]+), #?64 -+** ld1h z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_64_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svld1uh_gather_u32base_offset_u32 (p0, z0, 64), -+ z0_res = svld1uh_gather_offset_u32 (p0, z0, 64)) -+ -+/* -+** ld1uh_gather_x0_u32_index: -+** lsl (x[0-9]+), x0, #?1 -+** ld1h z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_x0_u32_index, svuint32_t, svuint32_t, -+ z0_res = svld1uh_gather_u32base_index_u32 (p0, z0, x0), -+ z0_res = svld1uh_gather_index_u32 (p0, z0, x0)) -+ -+/* -+** ld1uh_gather_m1_u32_index: -+** mov (x[0-9]+), #?-2 -+** ld1h z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_m1_u32_index, svuint32_t, svuint32_t, -+ z0_res = svld1uh_gather_u32base_index_u32 (p0, z0, -1), -+ z0_res = svld1uh_gather_index_u32 (p0, z0, -1)) -+ -+/* -+** ld1uh_gather_0_u32_index: -+** ld1h z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_0_u32_index, svuint32_t, svuint32_t, -+ z0_res = svld1uh_gather_u32base_index_u32 (p0, z0, 0), -+ z0_res = svld1uh_gather_index_u32 (p0, z0, 0)) -+ -+/* -+** ld1uh_gather_5_u32_index: -+** ld1h z0\.s, p0/z, \[z0\.s, #10\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_5_u32_index, svuint32_t, svuint32_t, -+ z0_res = svld1uh_gather_u32base_index_u32 (p0, z0, 5), -+ z0_res = svld1uh_gather_index_u32 (p0, z0, 5)) -+ -+/* -+** ld1uh_gather_31_u32_index: -+** ld1h z0\.s, p0/z, \[z0\.s, #62\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_31_u32_index, svuint32_t, svuint32_t, -+ z0_res = svld1uh_gather_u32base_index_u32 (p0, z0, 31), -+ z0_res = svld1uh_gather_index_u32 (p0, z0, 31)) -+ -+/* -+** ld1uh_gather_32_u32_index: -+** mov (x[0-9]+), #?64 -+** ld1h z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_32_u32_index, svuint32_t, svuint32_t, -+ z0_res = svld1uh_gather_u32base_index_u32 (p0, z0, 32), -+ z0_res = svld1uh_gather_index_u32 (p0, z0, 32)) -+ -+/* -+** ld1uh_gather_x0_u32_s32offset: -+** ld1h z0\.s, p0/z, \[x0, z0\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_x0_u32_s32offset, svuint32_t, uint16_t, svint32_t, -+ z0_res = svld1uh_gather_s32offset_u32 (p0, x0, z0), -+ z0_res = svld1uh_gather_offset_u32 (p0, x0, z0)) -+ -+/* -+** ld1uh_gather_tied1_u32_s32offset: -+** ld1h z0\.s, p0/z, \[x0, z0\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_tied1_u32_s32offset, svuint32_t, uint16_t, svint32_t, -+ z0_res = svld1uh_gather_s32offset_u32 (p0, x0, z0), -+ z0_res = svld1uh_gather_offset_u32 (p0, x0, z0)) -+ -+/* -+** ld1uh_gather_untied_u32_s32offset: -+** ld1h z0\.s, p0/z, \[x0, z1\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_untied_u32_s32offset, svuint32_t, uint16_t, svint32_t, -+ z0_res = svld1uh_gather_s32offset_u32 (p0, x0, z1), -+ z0_res = svld1uh_gather_offset_u32 (p0, x0, z1)) -+ -+/* -+** ld1uh_gather_x0_u32_u32offset: -+** ld1h z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_x0_u32_u32offset, svuint32_t, uint16_t, svuint32_t, -+ z0_res = svld1uh_gather_u32offset_u32 (p0, x0, z0), -+ z0_res = svld1uh_gather_offset_u32 (p0, x0, z0)) -+ -+/* -+** ld1uh_gather_tied1_u32_u32offset: -+** ld1h z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_tied1_u32_u32offset, svuint32_t, uint16_t, svuint32_t, -+ z0_res = svld1uh_gather_u32offset_u32 (p0, x0, z0), -+ z0_res = svld1uh_gather_offset_u32 (p0, x0, z0)) -+ -+/* -+** ld1uh_gather_untied_u32_u32offset: -+** ld1h z0\.s, p0/z, \[x0, z1\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_untied_u32_u32offset, svuint32_t, uint16_t, svuint32_t, -+ z0_res = svld1uh_gather_u32offset_u32 (p0, x0, z1), -+ z0_res = svld1uh_gather_offset_u32 (p0, x0, z1)) -+ -+/* -+** ld1uh_gather_x0_u32_s32index: -+** ld1h z0\.s, p0/z, \[x0, z0\.s, sxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_x0_u32_s32index, svuint32_t, uint16_t, svint32_t, -+ z0_res = svld1uh_gather_s32index_u32 (p0, x0, z0), -+ z0_res = svld1uh_gather_index_u32 (p0, x0, z0)) -+ -+/* -+** ld1uh_gather_tied1_u32_s32index: -+** ld1h z0\.s, p0/z, \[x0, z0\.s, sxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_tied1_u32_s32index, svuint32_t, uint16_t, svint32_t, -+ z0_res = svld1uh_gather_s32index_u32 (p0, x0, z0), -+ z0_res = svld1uh_gather_index_u32 (p0, x0, z0)) -+ -+/* -+** ld1uh_gather_untied_u32_s32index: -+** ld1h z0\.s, p0/z, \[x0, z1\.s, sxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_untied_u32_s32index, svuint32_t, uint16_t, svint32_t, -+ z0_res = svld1uh_gather_s32index_u32 (p0, x0, z1), -+ z0_res = svld1uh_gather_index_u32 (p0, x0, z1)) -+ -+/* -+** ld1uh_gather_x0_u32_u32index: -+** ld1h z0\.s, p0/z, \[x0, z0\.s, uxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_x0_u32_u32index, svuint32_t, uint16_t, svuint32_t, -+ z0_res = svld1uh_gather_u32index_u32 (p0, x0, z0), -+ z0_res = svld1uh_gather_index_u32 (p0, x0, z0)) -+ -+/* -+** ld1uh_gather_tied1_u32_u32index: -+** ld1h z0\.s, p0/z, \[x0, z0\.s, uxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_tied1_u32_u32index, svuint32_t, uint16_t, svuint32_t, -+ z0_res = svld1uh_gather_u32index_u32 (p0, x0, z0), -+ z0_res = svld1uh_gather_index_u32 (p0, x0, z0)) -+ -+/* -+** ld1uh_gather_untied_u32_u32index: -+** ld1h z0\.s, p0/z, \[x0, z1\.s, uxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_untied_u32_u32index, svuint32_t, uint16_t, svuint32_t, -+ z0_res = svld1uh_gather_u32index_u32 (p0, x0, z1), -+ z0_res = svld1uh_gather_index_u32 (p0, x0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_u64.c -new file mode 100644 -index 000000000..f3dcf03cd ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_gather_u64.c -@@ -0,0 +1,288 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1uh_gather_u64_tied1: -+** ld1h z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_u64_tied1, svuint64_t, svuint64_t, -+ z0_res = svld1uh_gather_u64base_u64 (p0, z0), -+ z0_res = svld1uh_gather_u64 (p0, z0)) -+ -+/* -+** ld1uh_gather_u64_untied: -+** ld1h z0\.d, p0/z, \[z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_u64_untied, svuint64_t, svuint64_t, -+ z0_res = svld1uh_gather_u64base_u64 (p0, z1), -+ z0_res = svld1uh_gather_u64 (p0, z1)) -+ -+/* -+** ld1uh_gather_x0_u64_offset: -+** ld1h z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_x0_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1uh_gather_u64base_offset_u64 (p0, z0, x0), -+ z0_res = svld1uh_gather_offset_u64 (p0, z0, x0)) -+ -+/* -+** ld1uh_gather_m2_u64_offset: -+** mov (x[0-9]+), #?-2 -+** ld1h z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_m2_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1uh_gather_u64base_offset_u64 (p0, z0, -2), -+ z0_res = svld1uh_gather_offset_u64 (p0, z0, -2)) -+ -+/* -+** ld1uh_gather_0_u64_offset: -+** ld1h z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_0_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1uh_gather_u64base_offset_u64 (p0, z0, 0), -+ z0_res = svld1uh_gather_offset_u64 (p0, z0, 0)) -+ -+/* -+** ld1uh_gather_5_u64_offset: -+** mov (x[0-9]+), #?5 -+** ld1h z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_5_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1uh_gather_u64base_offset_u64 (p0, z0, 5), -+ z0_res = svld1uh_gather_offset_u64 (p0, z0, 5)) -+ -+/* -+** ld1uh_gather_6_u64_offset: -+** ld1h z0\.d, p0/z, \[z0\.d, #6\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_6_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1uh_gather_u64base_offset_u64 (p0, z0, 6), -+ z0_res = svld1uh_gather_offset_u64 (p0, z0, 6)) -+ -+/* -+** ld1uh_gather_62_u64_offset: -+** ld1h z0\.d, p0/z, \[z0\.d, #62\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_62_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1uh_gather_u64base_offset_u64 (p0, z0, 62), -+ z0_res = svld1uh_gather_offset_u64 (p0, z0, 62)) -+ -+/* -+** ld1uh_gather_64_u64_offset: -+** mov (x[0-9]+), #?64 -+** ld1h z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_64_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1uh_gather_u64base_offset_u64 (p0, z0, 64), -+ z0_res = svld1uh_gather_offset_u64 (p0, z0, 64)) -+ -+/* -+** ld1uh_gather_x0_u64_index: -+** lsl (x[0-9]+), x0, #?1 -+** ld1h z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_x0_u64_index, svuint64_t, svuint64_t, -+ z0_res = svld1uh_gather_u64base_index_u64 (p0, z0, x0), -+ z0_res = svld1uh_gather_index_u64 (p0, z0, x0)) -+ -+/* -+** ld1uh_gather_m1_u64_index: -+** mov (x[0-9]+), #?-2 -+** ld1h z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_m1_u64_index, svuint64_t, svuint64_t, -+ z0_res = svld1uh_gather_u64base_index_u64 (p0, z0, -1), -+ z0_res = svld1uh_gather_index_u64 (p0, z0, -1)) -+ -+/* -+** ld1uh_gather_0_u64_index: -+** ld1h z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_0_u64_index, svuint64_t, svuint64_t, -+ z0_res = svld1uh_gather_u64base_index_u64 (p0, z0, 0), -+ z0_res = svld1uh_gather_index_u64 (p0, z0, 0)) -+ -+/* -+** ld1uh_gather_5_u64_index: -+** ld1h z0\.d, p0/z, \[z0\.d, #10\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_5_u64_index, svuint64_t, svuint64_t, -+ z0_res = svld1uh_gather_u64base_index_u64 (p0, z0, 5), -+ z0_res = svld1uh_gather_index_u64 (p0, z0, 5)) -+ -+/* -+** ld1uh_gather_31_u64_index: -+** ld1h z0\.d, p0/z, \[z0\.d, #62\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_31_u64_index, svuint64_t, svuint64_t, -+ z0_res = svld1uh_gather_u64base_index_u64 (p0, z0, 31), -+ z0_res = svld1uh_gather_index_u64 (p0, z0, 31)) -+ -+/* -+** ld1uh_gather_32_u64_index: -+** mov (x[0-9]+), #?64 -+** ld1h z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uh_gather_32_u64_index, svuint64_t, svuint64_t, -+ z0_res = svld1uh_gather_u64base_index_u64 (p0, z0, 32), -+ z0_res = svld1uh_gather_index_u64 (p0, z0, 32)) -+ -+/* -+** ld1uh_gather_x0_u64_s64offset: -+** ld1h z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_x0_u64_s64offset, svuint64_t, uint16_t, svint64_t, -+ z0_res = svld1uh_gather_s64offset_u64 (p0, x0, z0), -+ z0_res = svld1uh_gather_offset_u64 (p0, x0, z0)) -+ -+/* -+** ld1uh_gather_tied1_u64_s64offset: -+** ld1h z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_tied1_u64_s64offset, svuint64_t, uint16_t, svint64_t, -+ z0_res = svld1uh_gather_s64offset_u64 (p0, x0, z0), -+ z0_res = svld1uh_gather_offset_u64 (p0, x0, z0)) -+ -+/* -+** ld1uh_gather_untied_u64_s64offset: -+** ld1h z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_untied_u64_s64offset, svuint64_t, uint16_t, svint64_t, -+ z0_res = svld1uh_gather_s64offset_u64 (p0, x0, z1), -+ z0_res = svld1uh_gather_offset_u64 (p0, x0, z1)) -+ -+/* -+** ld1uh_gather_ext_u64_s64offset: -+** ld1h z0\.d, p0/z, \[x0, z1\.d, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_ext_u64_s64offset, svuint64_t, uint16_t, svint64_t, -+ z0_res = svld1uh_gather_s64offset_u64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svld1uh_gather_offset_u64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ld1uh_gather_x0_u64_u64offset: -+** ld1h z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_x0_u64_u64offset, svuint64_t, uint16_t, svuint64_t, -+ z0_res = svld1uh_gather_u64offset_u64 (p0, x0, z0), -+ z0_res = svld1uh_gather_offset_u64 (p0, x0, z0)) -+ -+/* -+** ld1uh_gather_tied1_u64_u64offset: -+** ld1h z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_tied1_u64_u64offset, svuint64_t, uint16_t, svuint64_t, -+ z0_res = svld1uh_gather_u64offset_u64 (p0, x0, z0), -+ z0_res = svld1uh_gather_offset_u64 (p0, x0, z0)) -+ -+/* -+** ld1uh_gather_untied_u64_u64offset: -+** ld1h z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_untied_u64_u64offset, svuint64_t, uint16_t, svuint64_t, -+ z0_res = svld1uh_gather_u64offset_u64 (p0, x0, z1), -+ z0_res = svld1uh_gather_offset_u64 (p0, x0, z1)) -+ -+/* -+** ld1uh_gather_ext_u64_u64offset: -+** ld1h z0\.d, p0/z, \[x0, z1\.d, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_ext_u64_u64offset, svuint64_t, uint16_t, svuint64_t, -+ z0_res = svld1uh_gather_u64offset_u64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svld1uh_gather_offset_u64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ld1uh_gather_x0_u64_s64index: -+** ld1h z0\.d, p0/z, \[x0, z0\.d, lsl 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_x0_u64_s64index, svuint64_t, uint16_t, svint64_t, -+ z0_res = svld1uh_gather_s64index_u64 (p0, x0, z0), -+ z0_res = svld1uh_gather_index_u64 (p0, x0, z0)) -+ -+/* -+** ld1uh_gather_tied1_u64_s64index: -+** ld1h z0\.d, p0/z, \[x0, z0\.d, lsl 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_tied1_u64_s64index, svuint64_t, uint16_t, svint64_t, -+ z0_res = svld1uh_gather_s64index_u64 (p0, x0, z0), -+ z0_res = svld1uh_gather_index_u64 (p0, x0, z0)) -+ -+/* -+** ld1uh_gather_untied_u64_s64index: -+** ld1h z0\.d, p0/z, \[x0, z1\.d, lsl 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_untied_u64_s64index, svuint64_t, uint16_t, svint64_t, -+ z0_res = svld1uh_gather_s64index_u64 (p0, x0, z1), -+ z0_res = svld1uh_gather_index_u64 (p0, x0, z1)) -+ -+/* -+** ld1uh_gather_ext_u64_s64index: -+** ld1h z0\.d, p0/z, \[x0, z1\.d, sxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_ext_u64_s64index, svuint64_t, uint16_t, svint64_t, -+ z0_res = svld1uh_gather_s64index_u64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svld1uh_gather_index_u64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ld1uh_gather_x0_u64_u64index: -+** ld1h z0\.d, p0/z, \[x0, z0\.d, lsl 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_x0_u64_u64index, svuint64_t, uint16_t, svuint64_t, -+ z0_res = svld1uh_gather_u64index_u64 (p0, x0, z0), -+ z0_res = svld1uh_gather_index_u64 (p0, x0, z0)) -+ -+/* -+** ld1uh_gather_tied1_u64_u64index: -+** ld1h z0\.d, p0/z, \[x0, z0\.d, lsl 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_tied1_u64_u64index, svuint64_t, uint16_t, svuint64_t, -+ z0_res = svld1uh_gather_u64index_u64 (p0, x0, z0), -+ z0_res = svld1uh_gather_index_u64 (p0, x0, z0)) -+ -+/* -+** ld1uh_gather_untied_u64_u64index: -+** ld1h z0\.d, p0/z, \[x0, z1\.d, lsl 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_untied_u64_u64index, svuint64_t, uint16_t, svuint64_t, -+ z0_res = svld1uh_gather_u64index_u64 (p0, x0, z1), -+ z0_res = svld1uh_gather_index_u64 (p0, x0, z1)) -+ -+/* -+** ld1uh_gather_ext_u64_u64index: -+** ld1h z0\.d, p0/z, \[x0, z1\.d, uxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uh_gather_ext_u64_u64index, svuint64_t, uint16_t, svuint64_t, -+ z0_res = svld1uh_gather_u64index_u64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svld1uh_gather_index_u64 (p0, x0, svextw_x (p0, z1))) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_s32.c -new file mode 100644 -index 000000000..df1ce974b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_s32.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1uh_s32_base: -+** ld1h z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1uh_s32_base, svint32_t, uint16_t, -+ z0 = svld1uh_s32 (p0, x0), -+ z0 = svld1uh_s32 (p0, x0)) -+ -+/* -+** ld1uh_s32_index: -+** ld1h z0\.s, p0/z, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_LOAD (ld1uh_s32_index, svint32_t, uint16_t, -+ z0 = svld1uh_s32 (p0, x0 + x1), -+ z0 = svld1uh_s32 (p0, x0 + x1)) -+ -+/* -+** ld1uh_s32_1: -+** ld1h z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1uh_s32_1, svint32_t, uint16_t, -+ z0 = svld1uh_s32 (p0, x0 + svcntw ()), -+ z0 = svld1uh_s32 (p0, x0 + svcntw ())) -+ -+/* -+** ld1uh_s32_7: -+** ld1h z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1uh_s32_7, svint32_t, uint16_t, -+ z0 = svld1uh_s32 (p0, x0 + svcntw () * 7), -+ z0 = svld1uh_s32 (p0, x0 + svcntw () * 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1uh_s32_8: -+** incb x0, all, mul #4 -+** ld1h z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1uh_s32_8, svint32_t, uint16_t, -+ z0 = svld1uh_s32 (p0, x0 + svcntw () * 8), -+ z0 = svld1uh_s32 (p0, x0 + svcntw () * 8)) -+ -+/* -+** ld1uh_s32_m1: -+** ld1h z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1uh_s32_m1, svint32_t, uint16_t, -+ z0 = svld1uh_s32 (p0, x0 - svcntw ()), -+ z0 = svld1uh_s32 (p0, x0 - svcntw ())) -+ -+/* -+** ld1uh_s32_m8: -+** ld1h z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1uh_s32_m8, svint32_t, uint16_t, -+ z0 = svld1uh_s32 (p0, x0 - svcntw () * 8), -+ z0 = svld1uh_s32 (p0, x0 - svcntw () * 8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1uh_s32_m9: -+** dech x0, all, mul #9 -+** ld1h z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1uh_s32_m9, svint32_t, uint16_t, -+ z0 = svld1uh_s32 (p0, x0 - svcntw () * 9), -+ z0 = svld1uh_s32 (p0, x0 - svcntw () * 9)) -+ -+/* -+** ld1uh_vnum_s32_0: -+** ld1h z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1uh_vnum_s32_0, svint32_t, uint16_t, -+ z0 = svld1uh_vnum_s32 (p0, x0, 0), -+ z0 = svld1uh_vnum_s32 (p0, x0, 0)) -+ -+/* -+** ld1uh_vnum_s32_1: -+** ld1h z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1uh_vnum_s32_1, svint32_t, uint16_t, -+ z0 = svld1uh_vnum_s32 (p0, x0, 1), -+ z0 = svld1uh_vnum_s32 (p0, x0, 1)) -+ -+/* -+** ld1uh_vnum_s32_7: -+** ld1h z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1uh_vnum_s32_7, svint32_t, uint16_t, -+ z0 = svld1uh_vnum_s32 (p0, x0, 7), -+ z0 = svld1uh_vnum_s32 (p0, x0, 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1uh_vnum_s32_8: -+** incb x0, all, mul #4 -+** ld1h z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1uh_vnum_s32_8, svint32_t, uint16_t, -+ z0 = svld1uh_vnum_s32 (p0, x0, 8), -+ z0 = svld1uh_vnum_s32 (p0, x0, 8)) -+ -+/* -+** ld1uh_vnum_s32_m1: -+** ld1h z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1uh_vnum_s32_m1, svint32_t, uint16_t, -+ z0 = svld1uh_vnum_s32 (p0, x0, -1), -+ z0 = svld1uh_vnum_s32 (p0, x0, -1)) -+ -+/* -+** ld1uh_vnum_s32_m8: -+** ld1h z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1uh_vnum_s32_m8, svint32_t, uint16_t, -+ z0 = svld1uh_vnum_s32 (p0, x0, -8), -+ z0 = svld1uh_vnum_s32 (p0, x0, -8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1uh_vnum_s32_m9: -+** dech x0, all, mul #9 -+** ld1h z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1uh_vnum_s32_m9, svint32_t, uint16_t, -+ z0 = svld1uh_vnum_s32 (p0, x0, -9), -+ z0 = svld1uh_vnum_s32 (p0, x0, -9)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld1uh_vnum_s32_x1: -+** cnth (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld1h z0\.s, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld1uh_vnum_s32_x1, svint32_t, uint16_t, -+ z0 = svld1uh_vnum_s32 (p0, x0, x1), -+ z0 = svld1uh_vnum_s32 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_s64.c -new file mode 100644 -index 000000000..7c3ab0aee ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_s64.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1uh_s64_base: -+** ld1h z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1uh_s64_base, svint64_t, uint16_t, -+ z0 = svld1uh_s64 (p0, x0), -+ z0 = svld1uh_s64 (p0, x0)) -+ -+/* -+** ld1uh_s64_index: -+** ld1h z0\.d, p0/z, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_LOAD (ld1uh_s64_index, svint64_t, uint16_t, -+ z0 = svld1uh_s64 (p0, x0 + x1), -+ z0 = svld1uh_s64 (p0, x0 + x1)) -+ -+/* -+** ld1uh_s64_1: -+** ld1h z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1uh_s64_1, svint64_t, uint16_t, -+ z0 = svld1uh_s64 (p0, x0 + svcntd ()), -+ z0 = svld1uh_s64 (p0, x0 + svcntd ())) -+ -+/* -+** ld1uh_s64_7: -+** ld1h z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1uh_s64_7, svint64_t, uint16_t, -+ z0 = svld1uh_s64 (p0, x0 + svcntd () * 7), -+ z0 = svld1uh_s64 (p0, x0 + svcntd () * 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1uh_s64_8: -+** incb x0, all, mul #2 -+** ld1h z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1uh_s64_8, svint64_t, uint16_t, -+ z0 = svld1uh_s64 (p0, x0 + svcntd () * 8), -+ z0 = svld1uh_s64 (p0, x0 + svcntd () * 8)) -+ -+/* -+** ld1uh_s64_m1: -+** ld1h z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1uh_s64_m1, svint64_t, uint16_t, -+ z0 = svld1uh_s64 (p0, x0 - svcntd ()), -+ z0 = svld1uh_s64 (p0, x0 - svcntd ())) -+ -+/* -+** ld1uh_s64_m8: -+** ld1h z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1uh_s64_m8, svint64_t, uint16_t, -+ z0 = svld1uh_s64 (p0, x0 - svcntd () * 8), -+ z0 = svld1uh_s64 (p0, x0 - svcntd () * 8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1uh_s64_m9: -+** decw x0, all, mul #9 -+** ld1h z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1uh_s64_m9, svint64_t, uint16_t, -+ z0 = svld1uh_s64 (p0, x0 - svcntd () * 9), -+ z0 = svld1uh_s64 (p0, x0 - svcntd () * 9)) -+ -+/* -+** ld1uh_vnum_s64_0: -+** ld1h z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1uh_vnum_s64_0, svint64_t, uint16_t, -+ z0 = svld1uh_vnum_s64 (p0, x0, 0), -+ z0 = svld1uh_vnum_s64 (p0, x0, 0)) -+ -+/* -+** ld1uh_vnum_s64_1: -+** ld1h z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1uh_vnum_s64_1, svint64_t, uint16_t, -+ z0 = svld1uh_vnum_s64 (p0, x0, 1), -+ z0 = svld1uh_vnum_s64 (p0, x0, 1)) -+ -+/* -+** ld1uh_vnum_s64_7: -+** ld1h z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1uh_vnum_s64_7, svint64_t, uint16_t, -+ z0 = svld1uh_vnum_s64 (p0, x0, 7), -+ z0 = svld1uh_vnum_s64 (p0, x0, 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1uh_vnum_s64_8: -+** incb x0, all, mul #2 -+** ld1h z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1uh_vnum_s64_8, svint64_t, uint16_t, -+ z0 = svld1uh_vnum_s64 (p0, x0, 8), -+ z0 = svld1uh_vnum_s64 (p0, x0, 8)) -+ -+/* -+** ld1uh_vnum_s64_m1: -+** ld1h z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1uh_vnum_s64_m1, svint64_t, uint16_t, -+ z0 = svld1uh_vnum_s64 (p0, x0, -1), -+ z0 = svld1uh_vnum_s64 (p0, x0, -1)) -+ -+/* -+** ld1uh_vnum_s64_m8: -+** ld1h z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1uh_vnum_s64_m8, svint64_t, uint16_t, -+ z0 = svld1uh_vnum_s64 (p0, x0, -8), -+ z0 = svld1uh_vnum_s64 (p0, x0, -8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1uh_vnum_s64_m9: -+** decw x0, all, mul #9 -+** ld1h z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1uh_vnum_s64_m9, svint64_t, uint16_t, -+ z0 = svld1uh_vnum_s64 (p0, x0, -9), -+ z0 = svld1uh_vnum_s64 (p0, x0, -9)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld1uh_vnum_s64_x1: -+** cntw (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld1h z0\.d, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld1uh_vnum_s64_x1, svint64_t, uint16_t, -+ z0 = svld1uh_vnum_s64 (p0, x0, x1), -+ z0 = svld1uh_vnum_s64 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_u32.c -new file mode 100644 -index 000000000..a07b19259 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_u32.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1uh_u32_base: -+** ld1h z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1uh_u32_base, svuint32_t, uint16_t, -+ z0 = svld1uh_u32 (p0, x0), -+ z0 = svld1uh_u32 (p0, x0)) -+ -+/* -+** ld1uh_u32_index: -+** ld1h z0\.s, p0/z, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_LOAD (ld1uh_u32_index, svuint32_t, uint16_t, -+ z0 = svld1uh_u32 (p0, x0 + x1), -+ z0 = svld1uh_u32 (p0, x0 + x1)) -+ -+/* -+** ld1uh_u32_1: -+** ld1h z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1uh_u32_1, svuint32_t, uint16_t, -+ z0 = svld1uh_u32 (p0, x0 + svcntw ()), -+ z0 = svld1uh_u32 (p0, x0 + svcntw ())) -+ -+/* -+** ld1uh_u32_7: -+** ld1h z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1uh_u32_7, svuint32_t, uint16_t, -+ z0 = svld1uh_u32 (p0, x0 + svcntw () * 7), -+ z0 = svld1uh_u32 (p0, x0 + svcntw () * 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1uh_u32_8: -+** incb x0, all, mul #4 -+** ld1h z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1uh_u32_8, svuint32_t, uint16_t, -+ z0 = svld1uh_u32 (p0, x0 + svcntw () * 8), -+ z0 = svld1uh_u32 (p0, x0 + svcntw () * 8)) -+ -+/* -+** ld1uh_u32_m1: -+** ld1h z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1uh_u32_m1, svuint32_t, uint16_t, -+ z0 = svld1uh_u32 (p0, x0 - svcntw ()), -+ z0 = svld1uh_u32 (p0, x0 - svcntw ())) -+ -+/* -+** ld1uh_u32_m8: -+** ld1h z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1uh_u32_m8, svuint32_t, uint16_t, -+ z0 = svld1uh_u32 (p0, x0 - svcntw () * 8), -+ z0 = svld1uh_u32 (p0, x0 - svcntw () * 8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1uh_u32_m9: -+** dech x0, all, mul #9 -+** ld1h z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1uh_u32_m9, svuint32_t, uint16_t, -+ z0 = svld1uh_u32 (p0, x0 - svcntw () * 9), -+ z0 = svld1uh_u32 (p0, x0 - svcntw () * 9)) -+ -+/* -+** ld1uh_vnum_u32_0: -+** ld1h z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1uh_vnum_u32_0, svuint32_t, uint16_t, -+ z0 = svld1uh_vnum_u32 (p0, x0, 0), -+ z0 = svld1uh_vnum_u32 (p0, x0, 0)) -+ -+/* -+** ld1uh_vnum_u32_1: -+** ld1h z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1uh_vnum_u32_1, svuint32_t, uint16_t, -+ z0 = svld1uh_vnum_u32 (p0, x0, 1), -+ z0 = svld1uh_vnum_u32 (p0, x0, 1)) -+ -+/* -+** ld1uh_vnum_u32_7: -+** ld1h z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1uh_vnum_u32_7, svuint32_t, uint16_t, -+ z0 = svld1uh_vnum_u32 (p0, x0, 7), -+ z0 = svld1uh_vnum_u32 (p0, x0, 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1uh_vnum_u32_8: -+** incb x0, all, mul #4 -+** ld1h z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1uh_vnum_u32_8, svuint32_t, uint16_t, -+ z0 = svld1uh_vnum_u32 (p0, x0, 8), -+ z0 = svld1uh_vnum_u32 (p0, x0, 8)) -+ -+/* -+** ld1uh_vnum_u32_m1: -+** ld1h z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1uh_vnum_u32_m1, svuint32_t, uint16_t, -+ z0 = svld1uh_vnum_u32 (p0, x0, -1), -+ z0 = svld1uh_vnum_u32 (p0, x0, -1)) -+ -+/* -+** ld1uh_vnum_u32_m8: -+** ld1h z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1uh_vnum_u32_m8, svuint32_t, uint16_t, -+ z0 = svld1uh_vnum_u32 (p0, x0, -8), -+ z0 = svld1uh_vnum_u32 (p0, x0, -8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1uh_vnum_u32_m9: -+** dech x0, all, mul #9 -+** ld1h z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1uh_vnum_u32_m9, svuint32_t, uint16_t, -+ z0 = svld1uh_vnum_u32 (p0, x0, -9), -+ z0 = svld1uh_vnum_u32 (p0, x0, -9)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld1uh_vnum_u32_x1: -+** cnth (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld1h z0\.s, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld1uh_vnum_u32_x1, svuint32_t, uint16_t, -+ z0 = svld1uh_vnum_u32 (p0, x0, x1), -+ z0 = svld1uh_vnum_u32 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_u64.c -new file mode 100644 -index 000000000..79be01fbd ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uh_u64.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1uh_u64_base: -+** ld1h z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1uh_u64_base, svuint64_t, uint16_t, -+ z0 = svld1uh_u64 (p0, x0), -+ z0 = svld1uh_u64 (p0, x0)) -+ -+/* -+** ld1uh_u64_index: -+** ld1h z0\.d, p0/z, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_LOAD (ld1uh_u64_index, svuint64_t, uint16_t, -+ z0 = svld1uh_u64 (p0, x0 + x1), -+ z0 = svld1uh_u64 (p0, x0 + x1)) -+ -+/* -+** ld1uh_u64_1: -+** ld1h z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1uh_u64_1, svuint64_t, uint16_t, -+ z0 = svld1uh_u64 (p0, x0 + svcntd ()), -+ z0 = svld1uh_u64 (p0, x0 + svcntd ())) -+ -+/* -+** ld1uh_u64_7: -+** ld1h z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1uh_u64_7, svuint64_t, uint16_t, -+ z0 = svld1uh_u64 (p0, x0 + svcntd () * 7), -+ z0 = svld1uh_u64 (p0, x0 + svcntd () * 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1uh_u64_8: -+** incb x0, all, mul #2 -+** ld1h z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1uh_u64_8, svuint64_t, uint16_t, -+ z0 = svld1uh_u64 (p0, x0 + svcntd () * 8), -+ z0 = svld1uh_u64 (p0, x0 + svcntd () * 8)) -+ -+/* -+** ld1uh_u64_m1: -+** ld1h z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1uh_u64_m1, svuint64_t, uint16_t, -+ z0 = svld1uh_u64 (p0, x0 - svcntd ()), -+ z0 = svld1uh_u64 (p0, x0 - svcntd ())) -+ -+/* -+** ld1uh_u64_m8: -+** ld1h z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1uh_u64_m8, svuint64_t, uint16_t, -+ z0 = svld1uh_u64 (p0, x0 - svcntd () * 8), -+ z0 = svld1uh_u64 (p0, x0 - svcntd () * 8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1uh_u64_m9: -+** decw x0, all, mul #9 -+** ld1h z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1uh_u64_m9, svuint64_t, uint16_t, -+ z0 = svld1uh_u64 (p0, x0 - svcntd () * 9), -+ z0 = svld1uh_u64 (p0, x0 - svcntd () * 9)) -+ -+/* -+** ld1uh_vnum_u64_0: -+** ld1h z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1uh_vnum_u64_0, svuint64_t, uint16_t, -+ z0 = svld1uh_vnum_u64 (p0, x0, 0), -+ z0 = svld1uh_vnum_u64 (p0, x0, 0)) -+ -+/* -+** ld1uh_vnum_u64_1: -+** ld1h z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1uh_vnum_u64_1, svuint64_t, uint16_t, -+ z0 = svld1uh_vnum_u64 (p0, x0, 1), -+ z0 = svld1uh_vnum_u64 (p0, x0, 1)) -+ -+/* -+** ld1uh_vnum_u64_7: -+** ld1h z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1uh_vnum_u64_7, svuint64_t, uint16_t, -+ z0 = svld1uh_vnum_u64 (p0, x0, 7), -+ z0 = svld1uh_vnum_u64 (p0, x0, 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1uh_vnum_u64_8: -+** incb x0, all, mul #2 -+** ld1h z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1uh_vnum_u64_8, svuint64_t, uint16_t, -+ z0 = svld1uh_vnum_u64 (p0, x0, 8), -+ z0 = svld1uh_vnum_u64 (p0, x0, 8)) -+ -+/* -+** ld1uh_vnum_u64_m1: -+** ld1h z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1uh_vnum_u64_m1, svuint64_t, uint16_t, -+ z0 = svld1uh_vnum_u64 (p0, x0, -1), -+ z0 = svld1uh_vnum_u64 (p0, x0, -1)) -+ -+/* -+** ld1uh_vnum_u64_m8: -+** ld1h z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1uh_vnum_u64_m8, svuint64_t, uint16_t, -+ z0 = svld1uh_vnum_u64 (p0, x0, -8), -+ z0 = svld1uh_vnum_u64 (p0, x0, -8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1uh_vnum_u64_m9: -+** decw x0, all, mul #9 -+** ld1h z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1uh_vnum_u64_m9, svuint64_t, uint16_t, -+ z0 = svld1uh_vnum_u64 (p0, x0, -9), -+ z0 = svld1uh_vnum_u64 (p0, x0, -9)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld1uh_vnum_u64_x1: -+** cntw (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld1h z0\.d, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld1uh_vnum_u64_x1, svuint64_t, uint16_t, -+ z0 = svld1uh_vnum_u64 (p0, x0, x1), -+ z0 = svld1uh_vnum_u64 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uw_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uw_gather_s64.c -new file mode 100644 -index 000000000..f4e9d5db9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uw_gather_s64.c -@@ -0,0 +1,308 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1uw_gather_s64_tied1: -+** ld1w z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uw_gather_s64_tied1, svint64_t, svuint64_t, -+ z0_res = svld1uw_gather_u64base_s64 (p0, z0), -+ z0_res = svld1uw_gather_s64 (p0, z0)) -+ -+/* -+** ld1uw_gather_s64_untied: -+** ld1w z0\.d, p0/z, \[z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uw_gather_s64_untied, svint64_t, svuint64_t, -+ z0_res = svld1uw_gather_u64base_s64 (p0, z1), -+ z0_res = svld1uw_gather_s64 (p0, z1)) -+ -+/* -+** ld1uw_gather_x0_s64_offset: -+** ld1w z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uw_gather_x0_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1uw_gather_u64base_offset_s64 (p0, z0, x0), -+ z0_res = svld1uw_gather_offset_s64 (p0, z0, x0)) -+ -+/* -+** ld1uw_gather_m4_s64_offset: -+** mov (x[0-9]+), #?-4 -+** ld1w z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uw_gather_m4_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1uw_gather_u64base_offset_s64 (p0, z0, -4), -+ z0_res = svld1uw_gather_offset_s64 (p0, z0, -4)) -+ -+/* -+** ld1uw_gather_0_s64_offset: -+** ld1w z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uw_gather_0_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1uw_gather_u64base_offset_s64 (p0, z0, 0), -+ z0_res = svld1uw_gather_offset_s64 (p0, z0, 0)) -+ -+/* -+** ld1uw_gather_5_s64_offset: -+** mov (x[0-9]+), #?5 -+** ld1w z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uw_gather_5_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1uw_gather_u64base_offset_s64 (p0, z0, 5), -+ z0_res = svld1uw_gather_offset_s64 (p0, z0, 5)) -+ -+/* -+** ld1uw_gather_6_s64_offset: -+** mov (x[0-9]+), #?6 -+** ld1w z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uw_gather_6_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1uw_gather_u64base_offset_s64 (p0, z0, 6), -+ z0_res = svld1uw_gather_offset_s64 (p0, z0, 6)) -+ -+/* -+** ld1uw_gather_7_s64_offset: -+** mov (x[0-9]+), #?7 -+** ld1w z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uw_gather_7_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1uw_gather_u64base_offset_s64 (p0, z0, 7), -+ z0_res = svld1uw_gather_offset_s64 (p0, z0, 7)) -+ -+/* -+** ld1uw_gather_8_s64_offset: -+** ld1w z0\.d, p0/z, \[z0\.d, #8\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uw_gather_8_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1uw_gather_u64base_offset_s64 (p0, z0, 8), -+ z0_res = svld1uw_gather_offset_s64 (p0, z0, 8)) -+ -+/* -+** ld1uw_gather_124_s64_offset: -+** ld1w z0\.d, p0/z, \[z0\.d, #124\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uw_gather_124_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1uw_gather_u64base_offset_s64 (p0, z0, 124), -+ z0_res = svld1uw_gather_offset_s64 (p0, z0, 124)) -+ -+/* -+** ld1uw_gather_128_s64_offset: -+** mov (x[0-9]+), #?128 -+** ld1w z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uw_gather_128_s64_offset, svint64_t, svuint64_t, -+ z0_res = svld1uw_gather_u64base_offset_s64 (p0, z0, 128), -+ z0_res = svld1uw_gather_offset_s64 (p0, z0, 128)) -+ -+/* -+** ld1uw_gather_x0_s64_index: -+** lsl (x[0-9]+), x0, #?2 -+** ld1w z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uw_gather_x0_s64_index, svint64_t, svuint64_t, -+ z0_res = svld1uw_gather_u64base_index_s64 (p0, z0, x0), -+ z0_res = svld1uw_gather_index_s64 (p0, z0, x0)) -+ -+/* -+** ld1uw_gather_m1_s64_index: -+** mov (x[0-9]+), #?-4 -+** ld1w z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uw_gather_m1_s64_index, svint64_t, svuint64_t, -+ z0_res = svld1uw_gather_u64base_index_s64 (p0, z0, -1), -+ z0_res = svld1uw_gather_index_s64 (p0, z0, -1)) -+ -+/* -+** ld1uw_gather_0_s64_index: -+** ld1w z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uw_gather_0_s64_index, svint64_t, svuint64_t, -+ z0_res = svld1uw_gather_u64base_index_s64 (p0, z0, 0), -+ z0_res = svld1uw_gather_index_s64 (p0, z0, 0)) -+ -+/* -+** ld1uw_gather_5_s64_index: -+** ld1w z0\.d, p0/z, \[z0\.d, #20\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uw_gather_5_s64_index, svint64_t, svuint64_t, -+ z0_res = svld1uw_gather_u64base_index_s64 (p0, z0, 5), -+ z0_res = svld1uw_gather_index_s64 (p0, z0, 5)) -+ -+/* -+** ld1uw_gather_31_s64_index: -+** ld1w z0\.d, p0/z, \[z0\.d, #124\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uw_gather_31_s64_index, svint64_t, svuint64_t, -+ z0_res = svld1uw_gather_u64base_index_s64 (p0, z0, 31), -+ z0_res = svld1uw_gather_index_s64 (p0, z0, 31)) -+ -+/* -+** ld1uw_gather_32_s64_index: -+** mov (x[0-9]+), #?128 -+** ld1w z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uw_gather_32_s64_index, svint64_t, svuint64_t, -+ z0_res = svld1uw_gather_u64base_index_s64 (p0, z0, 32), -+ z0_res = svld1uw_gather_index_s64 (p0, z0, 32)) -+ -+/* -+** ld1uw_gather_x0_s64_s64offset: -+** ld1w z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uw_gather_x0_s64_s64offset, svint64_t, uint32_t, svint64_t, -+ z0_res = svld1uw_gather_s64offset_s64 (p0, x0, z0), -+ z0_res = svld1uw_gather_offset_s64 (p0, x0, z0)) -+ -+/* -+** ld1uw_gather_tied1_s64_s64offset: -+** ld1w z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uw_gather_tied1_s64_s64offset, svint64_t, uint32_t, svint64_t, -+ z0_res = svld1uw_gather_s64offset_s64 (p0, x0, z0), -+ z0_res = svld1uw_gather_offset_s64 (p0, x0, z0)) -+ -+/* -+** ld1uw_gather_untied_s64_s64offset: -+** ld1w z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uw_gather_untied_s64_s64offset, svint64_t, uint32_t, svint64_t, -+ z0_res = svld1uw_gather_s64offset_s64 (p0, x0, z1), -+ z0_res = svld1uw_gather_offset_s64 (p0, x0, z1)) -+ -+/* -+** ld1uw_gather_ext_s64_s64offset: -+** ld1w z0\.d, p0/z, \[x0, z1\.d, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uw_gather_ext_s64_s64offset, svint64_t, uint32_t, svint64_t, -+ z0_res = svld1uw_gather_s64offset_s64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svld1uw_gather_offset_s64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ld1uw_gather_x0_s64_u64offset: -+** ld1w z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uw_gather_x0_s64_u64offset, svint64_t, uint32_t, svuint64_t, -+ z0_res = svld1uw_gather_u64offset_s64 (p0, x0, z0), -+ z0_res = svld1uw_gather_offset_s64 (p0, x0, z0)) -+ -+/* -+** ld1uw_gather_tied1_s64_u64offset: -+** ld1w z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uw_gather_tied1_s64_u64offset, svint64_t, uint32_t, svuint64_t, -+ z0_res = svld1uw_gather_u64offset_s64 (p0, x0, z0), -+ z0_res = svld1uw_gather_offset_s64 (p0, x0, z0)) -+ -+/* -+** ld1uw_gather_untied_s64_u64offset: -+** ld1w z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uw_gather_untied_s64_u64offset, svint64_t, uint32_t, svuint64_t, -+ z0_res = svld1uw_gather_u64offset_s64 (p0, x0, z1), -+ z0_res = svld1uw_gather_offset_s64 (p0, x0, z1)) -+ -+/* -+** ld1uw_gather_ext_s64_u64offset: -+** ld1w z0\.d, p0/z, \[x0, z1\.d, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uw_gather_ext_s64_u64offset, svint64_t, uint32_t, svuint64_t, -+ z0_res = svld1uw_gather_u64offset_s64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svld1uw_gather_offset_s64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ld1uw_gather_x0_s64_s64index: -+** ld1w z0\.d, p0/z, \[x0, z0\.d, lsl 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uw_gather_x0_s64_s64index, svint64_t, uint32_t, svint64_t, -+ z0_res = svld1uw_gather_s64index_s64 (p0, x0, z0), -+ z0_res = svld1uw_gather_index_s64 (p0, x0, z0)) -+ -+/* -+** ld1uw_gather_tied1_s64_s64index: -+** ld1w z0\.d, p0/z, \[x0, z0\.d, lsl 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uw_gather_tied1_s64_s64index, svint64_t, uint32_t, svint64_t, -+ z0_res = svld1uw_gather_s64index_s64 (p0, x0, z0), -+ z0_res = svld1uw_gather_index_s64 (p0, x0, z0)) -+ -+/* -+** ld1uw_gather_untied_s64_s64index: -+** ld1w z0\.d, p0/z, \[x0, z1\.d, lsl 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uw_gather_untied_s64_s64index, svint64_t, uint32_t, svint64_t, -+ z0_res = svld1uw_gather_s64index_s64 (p0, x0, z1), -+ z0_res = svld1uw_gather_index_s64 (p0, x0, z1)) -+ -+/* -+** ld1uw_gather_ext_s64_s64index: -+** ld1w z0\.d, p0/z, \[x0, z1\.d, sxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uw_gather_ext_s64_s64index, svint64_t, uint32_t, svint64_t, -+ z0_res = svld1uw_gather_s64index_s64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svld1uw_gather_index_s64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ld1uw_gather_x0_s64_u64index: -+** ld1w z0\.d, p0/z, \[x0, z0\.d, lsl 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uw_gather_x0_s64_u64index, svint64_t, uint32_t, svuint64_t, -+ z0_res = svld1uw_gather_u64index_s64 (p0, x0, z0), -+ z0_res = svld1uw_gather_index_s64 (p0, x0, z0)) -+ -+/* -+** ld1uw_gather_tied1_s64_u64index: -+** ld1w z0\.d, p0/z, \[x0, z0\.d, lsl 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uw_gather_tied1_s64_u64index, svint64_t, uint32_t, svuint64_t, -+ z0_res = svld1uw_gather_u64index_s64 (p0, x0, z0), -+ z0_res = svld1uw_gather_index_s64 (p0, x0, z0)) -+ -+/* -+** ld1uw_gather_untied_s64_u64index: -+** ld1w z0\.d, p0/z, \[x0, z1\.d, lsl 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uw_gather_untied_s64_u64index, svint64_t, uint32_t, svuint64_t, -+ z0_res = svld1uw_gather_u64index_s64 (p0, x0, z1), -+ z0_res = svld1uw_gather_index_s64 (p0, x0, z1)) -+ -+/* -+** ld1uw_gather_ext_s64_u64index: -+** ld1w z0\.d, p0/z, \[x0, z1\.d, uxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uw_gather_ext_s64_u64index, svint64_t, uint32_t, svuint64_t, -+ z0_res = svld1uw_gather_u64index_s64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svld1uw_gather_index_s64 (p0, x0, svextw_x (p0, z1))) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uw_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uw_gather_u64.c -new file mode 100644 -index 000000000..854d19233 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uw_gather_u64.c -@@ -0,0 +1,308 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1uw_gather_u64_tied1: -+** ld1w z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uw_gather_u64_tied1, svuint64_t, svuint64_t, -+ z0_res = svld1uw_gather_u64base_u64 (p0, z0), -+ z0_res = svld1uw_gather_u64 (p0, z0)) -+ -+/* -+** ld1uw_gather_u64_untied: -+** ld1w z0\.d, p0/z, \[z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uw_gather_u64_untied, svuint64_t, svuint64_t, -+ z0_res = svld1uw_gather_u64base_u64 (p0, z1), -+ z0_res = svld1uw_gather_u64 (p0, z1)) -+ -+/* -+** ld1uw_gather_x0_u64_offset: -+** ld1w z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uw_gather_x0_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1uw_gather_u64base_offset_u64 (p0, z0, x0), -+ z0_res = svld1uw_gather_offset_u64 (p0, z0, x0)) -+ -+/* -+** ld1uw_gather_m4_u64_offset: -+** mov (x[0-9]+), #?-4 -+** ld1w z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uw_gather_m4_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1uw_gather_u64base_offset_u64 (p0, z0, -4), -+ z0_res = svld1uw_gather_offset_u64 (p0, z0, -4)) -+ -+/* -+** ld1uw_gather_0_u64_offset: -+** ld1w z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uw_gather_0_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1uw_gather_u64base_offset_u64 (p0, z0, 0), -+ z0_res = svld1uw_gather_offset_u64 (p0, z0, 0)) -+ -+/* -+** ld1uw_gather_5_u64_offset: -+** mov (x[0-9]+), #?5 -+** ld1w z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uw_gather_5_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1uw_gather_u64base_offset_u64 (p0, z0, 5), -+ z0_res = svld1uw_gather_offset_u64 (p0, z0, 5)) -+ -+/* -+** ld1uw_gather_6_u64_offset: -+** mov (x[0-9]+), #?6 -+** ld1w z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uw_gather_6_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1uw_gather_u64base_offset_u64 (p0, z0, 6), -+ z0_res = svld1uw_gather_offset_u64 (p0, z0, 6)) -+ -+/* -+** ld1uw_gather_7_u64_offset: -+** mov (x[0-9]+), #?7 -+** ld1w z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uw_gather_7_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1uw_gather_u64base_offset_u64 (p0, z0, 7), -+ z0_res = svld1uw_gather_offset_u64 (p0, z0, 7)) -+ -+/* -+** ld1uw_gather_8_u64_offset: -+** ld1w z0\.d, p0/z, \[z0\.d, #8\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uw_gather_8_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1uw_gather_u64base_offset_u64 (p0, z0, 8), -+ z0_res = svld1uw_gather_offset_u64 (p0, z0, 8)) -+ -+/* -+** ld1uw_gather_124_u64_offset: -+** ld1w z0\.d, p0/z, \[z0\.d, #124\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uw_gather_124_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1uw_gather_u64base_offset_u64 (p0, z0, 124), -+ z0_res = svld1uw_gather_offset_u64 (p0, z0, 124)) -+ -+/* -+** ld1uw_gather_128_u64_offset: -+** mov (x[0-9]+), #?128 -+** ld1w z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uw_gather_128_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svld1uw_gather_u64base_offset_u64 (p0, z0, 128), -+ z0_res = svld1uw_gather_offset_u64 (p0, z0, 128)) -+ -+/* -+** ld1uw_gather_x0_u64_index: -+** lsl (x[0-9]+), x0, #?2 -+** ld1w z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uw_gather_x0_u64_index, svuint64_t, svuint64_t, -+ z0_res = svld1uw_gather_u64base_index_u64 (p0, z0, x0), -+ z0_res = svld1uw_gather_index_u64 (p0, z0, x0)) -+ -+/* -+** ld1uw_gather_m1_u64_index: -+** mov (x[0-9]+), #?-4 -+** ld1w z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uw_gather_m1_u64_index, svuint64_t, svuint64_t, -+ z0_res = svld1uw_gather_u64base_index_u64 (p0, z0, -1), -+ z0_res = svld1uw_gather_index_u64 (p0, z0, -1)) -+ -+/* -+** ld1uw_gather_0_u64_index: -+** ld1w z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uw_gather_0_u64_index, svuint64_t, svuint64_t, -+ z0_res = svld1uw_gather_u64base_index_u64 (p0, z0, 0), -+ z0_res = svld1uw_gather_index_u64 (p0, z0, 0)) -+ -+/* -+** ld1uw_gather_5_u64_index: -+** ld1w z0\.d, p0/z, \[z0\.d, #20\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uw_gather_5_u64_index, svuint64_t, svuint64_t, -+ z0_res = svld1uw_gather_u64base_index_u64 (p0, z0, 5), -+ z0_res = svld1uw_gather_index_u64 (p0, z0, 5)) -+ -+/* -+** ld1uw_gather_31_u64_index: -+** ld1w z0\.d, p0/z, \[z0\.d, #124\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uw_gather_31_u64_index, svuint64_t, svuint64_t, -+ z0_res = svld1uw_gather_u64base_index_u64 (p0, z0, 31), -+ z0_res = svld1uw_gather_index_u64 (p0, z0, 31)) -+ -+/* -+** ld1uw_gather_32_u64_index: -+** mov (x[0-9]+), #?128 -+** ld1w z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ld1uw_gather_32_u64_index, svuint64_t, svuint64_t, -+ z0_res = svld1uw_gather_u64base_index_u64 (p0, z0, 32), -+ z0_res = svld1uw_gather_index_u64 (p0, z0, 32)) -+ -+/* -+** ld1uw_gather_x0_u64_s64offset: -+** ld1w z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uw_gather_x0_u64_s64offset, svuint64_t, uint32_t, svint64_t, -+ z0_res = svld1uw_gather_s64offset_u64 (p0, x0, z0), -+ z0_res = svld1uw_gather_offset_u64 (p0, x0, z0)) -+ -+/* -+** ld1uw_gather_tied1_u64_s64offset: -+** ld1w z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uw_gather_tied1_u64_s64offset, svuint64_t, uint32_t, svint64_t, -+ z0_res = svld1uw_gather_s64offset_u64 (p0, x0, z0), -+ z0_res = svld1uw_gather_offset_u64 (p0, x0, z0)) -+ -+/* -+** ld1uw_gather_untied_u64_s64offset: -+** ld1w z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uw_gather_untied_u64_s64offset, svuint64_t, uint32_t, svint64_t, -+ z0_res = svld1uw_gather_s64offset_u64 (p0, x0, z1), -+ z0_res = svld1uw_gather_offset_u64 (p0, x0, z1)) -+ -+/* -+** ld1uw_gather_ext_u64_s64offset: -+** ld1w z0\.d, p0/z, \[x0, z1\.d, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uw_gather_ext_u64_s64offset, svuint64_t, uint32_t, svint64_t, -+ z0_res = svld1uw_gather_s64offset_u64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svld1uw_gather_offset_u64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ld1uw_gather_x0_u64_u64offset: -+** ld1w z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uw_gather_x0_u64_u64offset, svuint64_t, uint32_t, svuint64_t, -+ z0_res = svld1uw_gather_u64offset_u64 (p0, x0, z0), -+ z0_res = svld1uw_gather_offset_u64 (p0, x0, z0)) -+ -+/* -+** ld1uw_gather_tied1_u64_u64offset: -+** ld1w z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uw_gather_tied1_u64_u64offset, svuint64_t, uint32_t, svuint64_t, -+ z0_res = svld1uw_gather_u64offset_u64 (p0, x0, z0), -+ z0_res = svld1uw_gather_offset_u64 (p0, x0, z0)) -+ -+/* -+** ld1uw_gather_untied_u64_u64offset: -+** ld1w z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uw_gather_untied_u64_u64offset, svuint64_t, uint32_t, svuint64_t, -+ z0_res = svld1uw_gather_u64offset_u64 (p0, x0, z1), -+ z0_res = svld1uw_gather_offset_u64 (p0, x0, z1)) -+ -+/* -+** ld1uw_gather_ext_u64_u64offset: -+** ld1w z0\.d, p0/z, \[x0, z1\.d, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uw_gather_ext_u64_u64offset, svuint64_t, uint32_t, svuint64_t, -+ z0_res = svld1uw_gather_u64offset_u64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svld1uw_gather_offset_u64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ld1uw_gather_x0_u64_s64index: -+** ld1w z0\.d, p0/z, \[x0, z0\.d, lsl 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uw_gather_x0_u64_s64index, svuint64_t, uint32_t, svint64_t, -+ z0_res = svld1uw_gather_s64index_u64 (p0, x0, z0), -+ z0_res = svld1uw_gather_index_u64 (p0, x0, z0)) -+ -+/* -+** ld1uw_gather_tied1_u64_s64index: -+** ld1w z0\.d, p0/z, \[x0, z0\.d, lsl 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uw_gather_tied1_u64_s64index, svuint64_t, uint32_t, svint64_t, -+ z0_res = svld1uw_gather_s64index_u64 (p0, x0, z0), -+ z0_res = svld1uw_gather_index_u64 (p0, x0, z0)) -+ -+/* -+** ld1uw_gather_untied_u64_s64index: -+** ld1w z0\.d, p0/z, \[x0, z1\.d, lsl 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uw_gather_untied_u64_s64index, svuint64_t, uint32_t, svint64_t, -+ z0_res = svld1uw_gather_s64index_u64 (p0, x0, z1), -+ z0_res = svld1uw_gather_index_u64 (p0, x0, z1)) -+ -+/* -+** ld1uw_gather_ext_u64_s64index: -+** ld1w z0\.d, p0/z, \[x0, z1\.d, sxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uw_gather_ext_u64_s64index, svuint64_t, uint32_t, svint64_t, -+ z0_res = svld1uw_gather_s64index_u64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svld1uw_gather_index_u64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ld1uw_gather_x0_u64_u64index: -+** ld1w z0\.d, p0/z, \[x0, z0\.d, lsl 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uw_gather_x0_u64_u64index, svuint64_t, uint32_t, svuint64_t, -+ z0_res = svld1uw_gather_u64index_u64 (p0, x0, z0), -+ z0_res = svld1uw_gather_index_u64 (p0, x0, z0)) -+ -+/* -+** ld1uw_gather_tied1_u64_u64index: -+** ld1w z0\.d, p0/z, \[x0, z0\.d, lsl 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uw_gather_tied1_u64_u64index, svuint64_t, uint32_t, svuint64_t, -+ z0_res = svld1uw_gather_u64index_u64 (p0, x0, z0), -+ z0_res = svld1uw_gather_index_u64 (p0, x0, z0)) -+ -+/* -+** ld1uw_gather_untied_u64_u64index: -+** ld1w z0\.d, p0/z, \[x0, z1\.d, lsl 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uw_gather_untied_u64_u64index, svuint64_t, uint32_t, svuint64_t, -+ z0_res = svld1uw_gather_u64index_u64 (p0, x0, z1), -+ z0_res = svld1uw_gather_index_u64 (p0, x0, z1)) -+ -+/* -+** ld1uw_gather_ext_u64_u64index: -+** ld1w z0\.d, p0/z, \[x0, z1\.d, uxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ld1uw_gather_ext_u64_u64index, svuint64_t, uint32_t, svuint64_t, -+ z0_res = svld1uw_gather_u64index_u64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svld1uw_gather_index_u64 (p0, x0, svextw_x (p0, z1))) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uw_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uw_s64.c -new file mode 100644 -index 000000000..55f5cbad3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uw_s64.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1uw_s64_base: -+** ld1w z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1uw_s64_base, svint64_t, uint32_t, -+ z0 = svld1uw_s64 (p0, x0), -+ z0 = svld1uw_s64 (p0, x0)) -+ -+/* -+** ld1uw_s64_index: -+** ld1w z0\.d, p0/z, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_LOAD (ld1uw_s64_index, svint64_t, uint32_t, -+ z0 = svld1uw_s64 (p0, x0 + x1), -+ z0 = svld1uw_s64 (p0, x0 + x1)) -+ -+/* -+** ld1uw_s64_1: -+** ld1w z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1uw_s64_1, svint64_t, uint32_t, -+ z0 = svld1uw_s64 (p0, x0 + svcntd ()), -+ z0 = svld1uw_s64 (p0, x0 + svcntd ())) -+ -+/* -+** ld1uw_s64_7: -+** ld1w z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1uw_s64_7, svint64_t, uint32_t, -+ z0 = svld1uw_s64 (p0, x0 + svcntd () * 7), -+ z0 = svld1uw_s64 (p0, x0 + svcntd () * 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1uw_s64_8: -+** incb x0, all, mul #4 -+** ld1w z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1uw_s64_8, svint64_t, uint32_t, -+ z0 = svld1uw_s64 (p0, x0 + svcntd () * 8), -+ z0 = svld1uw_s64 (p0, x0 + svcntd () * 8)) -+ -+/* -+** ld1uw_s64_m1: -+** ld1w z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1uw_s64_m1, svint64_t, uint32_t, -+ z0 = svld1uw_s64 (p0, x0 - svcntd ()), -+ z0 = svld1uw_s64 (p0, x0 - svcntd ())) -+ -+/* -+** ld1uw_s64_m8: -+** ld1w z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1uw_s64_m8, svint64_t, uint32_t, -+ z0 = svld1uw_s64 (p0, x0 - svcntd () * 8), -+ z0 = svld1uw_s64 (p0, x0 - svcntd () * 8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1uw_s64_m9: -+** dech x0, all, mul #9 -+** ld1w z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1uw_s64_m9, svint64_t, uint32_t, -+ z0 = svld1uw_s64 (p0, x0 - svcntd () * 9), -+ z0 = svld1uw_s64 (p0, x0 - svcntd () * 9)) -+ -+/* -+** ld1uw_vnum_s64_0: -+** ld1w z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1uw_vnum_s64_0, svint64_t, uint32_t, -+ z0 = svld1uw_vnum_s64 (p0, x0, 0), -+ z0 = svld1uw_vnum_s64 (p0, x0, 0)) -+ -+/* -+** ld1uw_vnum_s64_1: -+** ld1w z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1uw_vnum_s64_1, svint64_t, uint32_t, -+ z0 = svld1uw_vnum_s64 (p0, x0, 1), -+ z0 = svld1uw_vnum_s64 (p0, x0, 1)) -+ -+/* -+** ld1uw_vnum_s64_7: -+** ld1w z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1uw_vnum_s64_7, svint64_t, uint32_t, -+ z0 = svld1uw_vnum_s64 (p0, x0, 7), -+ z0 = svld1uw_vnum_s64 (p0, x0, 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1uw_vnum_s64_8: -+** incb x0, all, mul #4 -+** ld1w z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1uw_vnum_s64_8, svint64_t, uint32_t, -+ z0 = svld1uw_vnum_s64 (p0, x0, 8), -+ z0 = svld1uw_vnum_s64 (p0, x0, 8)) -+ -+/* -+** ld1uw_vnum_s64_m1: -+** ld1w z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1uw_vnum_s64_m1, svint64_t, uint32_t, -+ z0 = svld1uw_vnum_s64 (p0, x0, -1), -+ z0 = svld1uw_vnum_s64 (p0, x0, -1)) -+ -+/* -+** ld1uw_vnum_s64_m8: -+** ld1w z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1uw_vnum_s64_m8, svint64_t, uint32_t, -+ z0 = svld1uw_vnum_s64 (p0, x0, -8), -+ z0 = svld1uw_vnum_s64 (p0, x0, -8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1uw_vnum_s64_m9: -+** dech x0, all, mul #9 -+** ld1w z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1uw_vnum_s64_m9, svint64_t, uint32_t, -+ z0 = svld1uw_vnum_s64 (p0, x0, -9), -+ z0 = svld1uw_vnum_s64 (p0, x0, -9)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld1uw_vnum_s64_x1: -+** cnth (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld1w z0\.d, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld1uw_vnum_s64_x1, svint64_t, uint32_t, -+ z0 = svld1uw_vnum_s64 (p0, x0, x1), -+ z0 = svld1uw_vnum_s64 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uw_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uw_u64.c -new file mode 100644 -index 000000000..175b593f2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld1uw_u64.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld1uw_u64_base: -+** ld1w z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1uw_u64_base, svuint64_t, uint32_t, -+ z0 = svld1uw_u64 (p0, x0), -+ z0 = svld1uw_u64 (p0, x0)) -+ -+/* -+** ld1uw_u64_index: -+** ld1w z0\.d, p0/z, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_LOAD (ld1uw_u64_index, svuint64_t, uint32_t, -+ z0 = svld1uw_u64 (p0, x0 + x1), -+ z0 = svld1uw_u64 (p0, x0 + x1)) -+ -+/* -+** ld1uw_u64_1: -+** ld1w z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1uw_u64_1, svuint64_t, uint32_t, -+ z0 = svld1uw_u64 (p0, x0 + svcntd ()), -+ z0 = svld1uw_u64 (p0, x0 + svcntd ())) -+ -+/* -+** ld1uw_u64_7: -+** ld1w z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1uw_u64_7, svuint64_t, uint32_t, -+ z0 = svld1uw_u64 (p0, x0 + svcntd () * 7), -+ z0 = svld1uw_u64 (p0, x0 + svcntd () * 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1uw_u64_8: -+** incb x0, all, mul #4 -+** ld1w z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1uw_u64_8, svuint64_t, uint32_t, -+ z0 = svld1uw_u64 (p0, x0 + svcntd () * 8), -+ z0 = svld1uw_u64 (p0, x0 + svcntd () * 8)) -+ -+/* -+** ld1uw_u64_m1: -+** ld1w z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1uw_u64_m1, svuint64_t, uint32_t, -+ z0 = svld1uw_u64 (p0, x0 - svcntd ()), -+ z0 = svld1uw_u64 (p0, x0 - svcntd ())) -+ -+/* -+** ld1uw_u64_m8: -+** ld1w z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1uw_u64_m8, svuint64_t, uint32_t, -+ z0 = svld1uw_u64 (p0, x0 - svcntd () * 8), -+ z0 = svld1uw_u64 (p0, x0 - svcntd () * 8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1uw_u64_m9: -+** dech x0, all, mul #9 -+** ld1w z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1uw_u64_m9, svuint64_t, uint32_t, -+ z0 = svld1uw_u64 (p0, x0 - svcntd () * 9), -+ z0 = svld1uw_u64 (p0, x0 - svcntd () * 9)) -+ -+/* -+** ld1uw_vnum_u64_0: -+** ld1w z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1uw_vnum_u64_0, svuint64_t, uint32_t, -+ z0 = svld1uw_vnum_u64 (p0, x0, 0), -+ z0 = svld1uw_vnum_u64 (p0, x0, 0)) -+ -+/* -+** ld1uw_vnum_u64_1: -+** ld1w z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1uw_vnum_u64_1, svuint64_t, uint32_t, -+ z0 = svld1uw_vnum_u64 (p0, x0, 1), -+ z0 = svld1uw_vnum_u64 (p0, x0, 1)) -+ -+/* -+** ld1uw_vnum_u64_7: -+** ld1w z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1uw_vnum_u64_7, svuint64_t, uint32_t, -+ z0 = svld1uw_vnum_u64 (p0, x0, 7), -+ z0 = svld1uw_vnum_u64 (p0, x0, 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1uw_vnum_u64_8: -+** incb x0, all, mul #4 -+** ld1w z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1uw_vnum_u64_8, svuint64_t, uint32_t, -+ z0 = svld1uw_vnum_u64 (p0, x0, 8), -+ z0 = svld1uw_vnum_u64 (p0, x0, 8)) -+ -+/* -+** ld1uw_vnum_u64_m1: -+** ld1w z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1uw_vnum_u64_m1, svuint64_t, uint32_t, -+ z0 = svld1uw_vnum_u64 (p0, x0, -1), -+ z0 = svld1uw_vnum_u64 (p0, x0, -1)) -+ -+/* -+** ld1uw_vnum_u64_m8: -+** ld1w z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld1uw_vnum_u64_m8, svuint64_t, uint32_t, -+ z0 = svld1uw_vnum_u64 (p0, x0, -8), -+ z0 = svld1uw_vnum_u64 (p0, x0, -8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld1uw_vnum_u64_m9: -+** dech x0, all, mul #9 -+** ld1w z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld1uw_vnum_u64_m9, svuint64_t, uint32_t, -+ z0 = svld1uw_vnum_u64 (p0, x0, -9), -+ z0 = svld1uw_vnum_u64 (p0, x0, -9)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld1uw_vnum_u64_x1: -+** cnth (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld1w z0\.d, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld1uw_vnum_u64_x1, svuint64_t, uint32_t, -+ z0 = svld1uw_vnum_u64 (p0, x0, x1), -+ z0 = svld1uw_vnum_u64 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_bf16.c -new file mode 100644 -index 000000000..5d08c1e6e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_bf16.c -@@ -0,0 +1,200 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld2_bf16_base: -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_bf16_base, svbfloat16x2_t, bfloat16_t, -+ z0 = svld2_bf16 (p0, x0), -+ z0 = svld2 (p0, x0)) -+ -+/* -+** ld2_bf16_index: -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_LOAD (ld2_bf16_index, svbfloat16x2_t, bfloat16_t, -+ z0 = svld2_bf16 (p0, x0 + x1), -+ z0 = svld2 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_bf16_1: -+** incb x0 -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_bf16_1, svbfloat16x2_t, bfloat16_t, -+ z0 = svld2_bf16 (p0, x0 + svcnth ()), -+ z0 = svld2 (p0, x0 + svcnth ())) -+ -+/* -+** ld2_bf16_2: -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #2, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_bf16_2, svbfloat16x2_t, bfloat16_t, -+ z0 = svld2_bf16 (p0, x0 + svcnth () * 2), -+ z0 = svld2 (p0, x0 + svcnth () * 2)) -+ -+/* -+** ld2_bf16_14: -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #14, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_bf16_14, svbfloat16x2_t, bfloat16_t, -+ z0 = svld2_bf16 (p0, x0 + svcnth () * 14), -+ z0 = svld2 (p0, x0 + svcnth () * 14)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_bf16_16: -+** incb x0, all, mul #16 -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_bf16_16, svbfloat16x2_t, bfloat16_t, -+ z0 = svld2_bf16 (p0, x0 + svcnth () * 16), -+ z0 = svld2 (p0, x0 + svcnth () * 16)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_bf16_m1: -+** decb x0 -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_bf16_m1, svbfloat16x2_t, bfloat16_t, -+ z0 = svld2_bf16 (p0, x0 - svcnth ()), -+ z0 = svld2 (p0, x0 - svcnth ())) -+ -+/* -+** ld2_bf16_m2: -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #-2, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_bf16_m2, svbfloat16x2_t, bfloat16_t, -+ z0 = svld2_bf16 (p0, x0 - svcnth () * 2), -+ z0 = svld2 (p0, x0 - svcnth () * 2)) -+ -+/* -+** ld2_bf16_m16: -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #-16, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_bf16_m16, svbfloat16x2_t, bfloat16_t, -+ z0 = svld2_bf16 (p0, x0 - svcnth () * 16), -+ z0 = svld2 (p0, x0 - svcnth () * 16)) -+ -+/* -+** ld2_bf16_m18: -+** addvl (x[0-9]+), x0, #-18 -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld2_bf16_m18, svbfloat16x2_t, bfloat16_t, -+ z0 = svld2_bf16 (p0, x0 - svcnth () * 18), -+ z0 = svld2 (p0, x0 - svcnth () * 18)) -+ -+/* -+** ld2_vnum_bf16_0: -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_bf16_0, svbfloat16x2_t, bfloat16_t, -+ z0 = svld2_vnum_bf16 (p0, x0, 0), -+ z0 = svld2_vnum (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_vnum_bf16_1: -+** incb x0 -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_bf16_1, svbfloat16x2_t, bfloat16_t, -+ z0 = svld2_vnum_bf16 (p0, x0, 1), -+ z0 = svld2_vnum (p0, x0, 1)) -+ -+/* -+** ld2_vnum_bf16_2: -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #2, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_bf16_2, svbfloat16x2_t, bfloat16_t, -+ z0 = svld2_vnum_bf16 (p0, x0, 2), -+ z0 = svld2_vnum (p0, x0, 2)) -+ -+/* -+** ld2_vnum_bf16_14: -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #14, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_bf16_14, svbfloat16x2_t, bfloat16_t, -+ z0 = svld2_vnum_bf16 (p0, x0, 14), -+ z0 = svld2_vnum (p0, x0, 14)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_vnum_bf16_16: -+** incb x0, all, mul #16 -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_bf16_16, svbfloat16x2_t, bfloat16_t, -+ z0 = svld2_vnum_bf16 (p0, x0, 16), -+ z0 = svld2_vnum (p0, x0, 16)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_vnum_bf16_m1: -+** decb x0 -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_bf16_m1, svbfloat16x2_t, bfloat16_t, -+ z0 = svld2_vnum_bf16 (p0, x0, -1), -+ z0 = svld2_vnum (p0, x0, -1)) -+ -+/* -+** ld2_vnum_bf16_m2: -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #-2, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_bf16_m2, svbfloat16x2_t, bfloat16_t, -+ z0 = svld2_vnum_bf16 (p0, x0, -2), -+ z0 = svld2_vnum (p0, x0, -2)) -+ -+/* -+** ld2_vnum_bf16_m16: -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #-16, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_bf16_m16, svbfloat16x2_t, bfloat16_t, -+ z0 = svld2_vnum_bf16 (p0, x0, -16), -+ z0 = svld2_vnum (p0, x0, -16)) -+ -+/* -+** ld2_vnum_bf16_m18: -+** addvl (x[0-9]+), x0, #-18 -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_bf16_m18, svbfloat16x2_t, bfloat16_t, -+ z0 = svld2_vnum_bf16 (p0, x0, -18), -+ z0 = svld2_vnum (p0, x0, -18)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld2_vnum_bf16_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_bf16_x1, svbfloat16x2_t, bfloat16_t, -+ z0 = svld2_vnum_bf16 (p0, x0, x1), -+ z0 = svld2_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_f16.c -new file mode 100644 -index 000000000..43392b2b2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_f16.c -@@ -0,0 +1,200 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld2_f16_base: -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_f16_base, svfloat16x2_t, float16_t, -+ z0 = svld2_f16 (p0, x0), -+ z0 = svld2 (p0, x0)) -+ -+/* -+** ld2_f16_index: -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_LOAD (ld2_f16_index, svfloat16x2_t, float16_t, -+ z0 = svld2_f16 (p0, x0 + x1), -+ z0 = svld2 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_f16_1: -+** incb x0 -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_f16_1, svfloat16x2_t, float16_t, -+ z0 = svld2_f16 (p0, x0 + svcnth ()), -+ z0 = svld2 (p0, x0 + svcnth ())) -+ -+/* -+** ld2_f16_2: -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #2, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_f16_2, svfloat16x2_t, float16_t, -+ z0 = svld2_f16 (p0, x0 + svcnth () * 2), -+ z0 = svld2 (p0, x0 + svcnth () * 2)) -+ -+/* -+** ld2_f16_14: -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #14, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_f16_14, svfloat16x2_t, float16_t, -+ z0 = svld2_f16 (p0, x0 + svcnth () * 14), -+ z0 = svld2 (p0, x0 + svcnth () * 14)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_f16_16: -+** incb x0, all, mul #16 -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_f16_16, svfloat16x2_t, float16_t, -+ z0 = svld2_f16 (p0, x0 + svcnth () * 16), -+ z0 = svld2 (p0, x0 + svcnth () * 16)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_f16_m1: -+** decb x0 -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_f16_m1, svfloat16x2_t, float16_t, -+ z0 = svld2_f16 (p0, x0 - svcnth ()), -+ z0 = svld2 (p0, x0 - svcnth ())) -+ -+/* -+** ld2_f16_m2: -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #-2, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_f16_m2, svfloat16x2_t, float16_t, -+ z0 = svld2_f16 (p0, x0 - svcnth () * 2), -+ z0 = svld2 (p0, x0 - svcnth () * 2)) -+ -+/* -+** ld2_f16_m16: -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #-16, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_f16_m16, svfloat16x2_t, float16_t, -+ z0 = svld2_f16 (p0, x0 - svcnth () * 16), -+ z0 = svld2 (p0, x0 - svcnth () * 16)) -+ -+/* -+** ld2_f16_m18: -+** addvl (x[0-9]+), x0, #-18 -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld2_f16_m18, svfloat16x2_t, float16_t, -+ z0 = svld2_f16 (p0, x0 - svcnth () * 18), -+ z0 = svld2 (p0, x0 - svcnth () * 18)) -+ -+/* -+** ld2_vnum_f16_0: -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_f16_0, svfloat16x2_t, float16_t, -+ z0 = svld2_vnum_f16 (p0, x0, 0), -+ z0 = svld2_vnum (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_vnum_f16_1: -+** incb x0 -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_f16_1, svfloat16x2_t, float16_t, -+ z0 = svld2_vnum_f16 (p0, x0, 1), -+ z0 = svld2_vnum (p0, x0, 1)) -+ -+/* -+** ld2_vnum_f16_2: -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #2, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_f16_2, svfloat16x2_t, float16_t, -+ z0 = svld2_vnum_f16 (p0, x0, 2), -+ z0 = svld2_vnum (p0, x0, 2)) -+ -+/* -+** ld2_vnum_f16_14: -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #14, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_f16_14, svfloat16x2_t, float16_t, -+ z0 = svld2_vnum_f16 (p0, x0, 14), -+ z0 = svld2_vnum (p0, x0, 14)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_vnum_f16_16: -+** incb x0, all, mul #16 -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_f16_16, svfloat16x2_t, float16_t, -+ z0 = svld2_vnum_f16 (p0, x0, 16), -+ z0 = svld2_vnum (p0, x0, 16)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_vnum_f16_m1: -+** decb x0 -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_f16_m1, svfloat16x2_t, float16_t, -+ z0 = svld2_vnum_f16 (p0, x0, -1), -+ z0 = svld2_vnum (p0, x0, -1)) -+ -+/* -+** ld2_vnum_f16_m2: -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #-2, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_f16_m2, svfloat16x2_t, float16_t, -+ z0 = svld2_vnum_f16 (p0, x0, -2), -+ z0 = svld2_vnum (p0, x0, -2)) -+ -+/* -+** ld2_vnum_f16_m16: -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #-16, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_f16_m16, svfloat16x2_t, float16_t, -+ z0 = svld2_vnum_f16 (p0, x0, -16), -+ z0 = svld2_vnum (p0, x0, -16)) -+ -+/* -+** ld2_vnum_f16_m18: -+** addvl (x[0-9]+), x0, #-18 -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_f16_m18, svfloat16x2_t, float16_t, -+ z0 = svld2_vnum_f16 (p0, x0, -18), -+ z0 = svld2_vnum (p0, x0, -18)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld2_vnum_f16_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_f16_x1, svfloat16x2_t, float16_t, -+ z0 = svld2_vnum_f16 (p0, x0, x1), -+ z0 = svld2_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_f32.c -new file mode 100644 -index 000000000..379145e0c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_f32.c -@@ -0,0 +1,200 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld2_f32_base: -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_f32_base, svfloat32x2_t, float32_t, -+ z0 = svld2_f32 (p0, x0), -+ z0 = svld2 (p0, x0)) -+ -+/* -+** ld2_f32_index: -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_LOAD (ld2_f32_index, svfloat32x2_t, float32_t, -+ z0 = svld2_f32 (p0, x0 + x1), -+ z0 = svld2 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_f32_1: -+** incb x0 -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_f32_1, svfloat32x2_t, float32_t, -+ z0 = svld2_f32 (p0, x0 + svcntw ()), -+ z0 = svld2 (p0, x0 + svcntw ())) -+ -+/* -+** ld2_f32_2: -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, #2, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_f32_2, svfloat32x2_t, float32_t, -+ z0 = svld2_f32 (p0, x0 + svcntw () * 2), -+ z0 = svld2 (p0, x0 + svcntw () * 2)) -+ -+/* -+** ld2_f32_14: -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, #14, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_f32_14, svfloat32x2_t, float32_t, -+ z0 = svld2_f32 (p0, x0 + svcntw () * 14), -+ z0 = svld2 (p0, x0 + svcntw () * 14)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_f32_16: -+** incb x0, all, mul #16 -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_f32_16, svfloat32x2_t, float32_t, -+ z0 = svld2_f32 (p0, x0 + svcntw () * 16), -+ z0 = svld2 (p0, x0 + svcntw () * 16)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_f32_m1: -+** decb x0 -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_f32_m1, svfloat32x2_t, float32_t, -+ z0 = svld2_f32 (p0, x0 - svcntw ()), -+ z0 = svld2 (p0, x0 - svcntw ())) -+ -+/* -+** ld2_f32_m2: -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, #-2, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_f32_m2, svfloat32x2_t, float32_t, -+ z0 = svld2_f32 (p0, x0 - svcntw () * 2), -+ z0 = svld2 (p0, x0 - svcntw () * 2)) -+ -+/* -+** ld2_f32_m16: -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, #-16, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_f32_m16, svfloat32x2_t, float32_t, -+ z0 = svld2_f32 (p0, x0 - svcntw () * 16), -+ z0 = svld2 (p0, x0 - svcntw () * 16)) -+ -+/* -+** ld2_f32_m18: -+** addvl (x[0-9]+), x0, #-18 -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld2_f32_m18, svfloat32x2_t, float32_t, -+ z0 = svld2_f32 (p0, x0 - svcntw () * 18), -+ z0 = svld2 (p0, x0 - svcntw () * 18)) -+ -+/* -+** ld2_vnum_f32_0: -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_f32_0, svfloat32x2_t, float32_t, -+ z0 = svld2_vnum_f32 (p0, x0, 0), -+ z0 = svld2_vnum (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_vnum_f32_1: -+** incb x0 -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_f32_1, svfloat32x2_t, float32_t, -+ z0 = svld2_vnum_f32 (p0, x0, 1), -+ z0 = svld2_vnum (p0, x0, 1)) -+ -+/* -+** ld2_vnum_f32_2: -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, #2, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_f32_2, svfloat32x2_t, float32_t, -+ z0 = svld2_vnum_f32 (p0, x0, 2), -+ z0 = svld2_vnum (p0, x0, 2)) -+ -+/* -+** ld2_vnum_f32_14: -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, #14, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_f32_14, svfloat32x2_t, float32_t, -+ z0 = svld2_vnum_f32 (p0, x0, 14), -+ z0 = svld2_vnum (p0, x0, 14)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_vnum_f32_16: -+** incb x0, all, mul #16 -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_f32_16, svfloat32x2_t, float32_t, -+ z0 = svld2_vnum_f32 (p0, x0, 16), -+ z0 = svld2_vnum (p0, x0, 16)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_vnum_f32_m1: -+** decb x0 -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_f32_m1, svfloat32x2_t, float32_t, -+ z0 = svld2_vnum_f32 (p0, x0, -1), -+ z0 = svld2_vnum (p0, x0, -1)) -+ -+/* -+** ld2_vnum_f32_m2: -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, #-2, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_f32_m2, svfloat32x2_t, float32_t, -+ z0 = svld2_vnum_f32 (p0, x0, -2), -+ z0 = svld2_vnum (p0, x0, -2)) -+ -+/* -+** ld2_vnum_f32_m16: -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, #-16, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_f32_m16, svfloat32x2_t, float32_t, -+ z0 = svld2_vnum_f32 (p0, x0, -16), -+ z0 = svld2_vnum (p0, x0, -16)) -+ -+/* -+** ld2_vnum_f32_m18: -+** addvl (x[0-9]+), x0, #-18 -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_f32_m18, svfloat32x2_t, float32_t, -+ z0 = svld2_vnum_f32 (p0, x0, -18), -+ z0 = svld2_vnum (p0, x0, -18)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld2_vnum_f32_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_f32_x1, svfloat32x2_t, float32_t, -+ z0 = svld2_vnum_f32 (p0, x0, x1), -+ z0 = svld2_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_f64.c -new file mode 100644 -index 000000000..1911612c6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_f64.c -@@ -0,0 +1,200 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld2_f64_base: -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_f64_base, svfloat64x2_t, float64_t, -+ z0 = svld2_f64 (p0, x0), -+ z0 = svld2 (p0, x0)) -+ -+/* -+** ld2_f64_index: -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, x1, lsl 3\] -+** ret -+*/ -+TEST_LOAD (ld2_f64_index, svfloat64x2_t, float64_t, -+ z0 = svld2_f64 (p0, x0 + x1), -+ z0 = svld2 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_f64_1: -+** incb x0 -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_f64_1, svfloat64x2_t, float64_t, -+ z0 = svld2_f64 (p0, x0 + svcntd ()), -+ z0 = svld2 (p0, x0 + svcntd ())) -+ -+/* -+** ld2_f64_2: -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, #2, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_f64_2, svfloat64x2_t, float64_t, -+ z0 = svld2_f64 (p0, x0 + svcntd () * 2), -+ z0 = svld2 (p0, x0 + svcntd () * 2)) -+ -+/* -+** ld2_f64_14: -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, #14, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_f64_14, svfloat64x2_t, float64_t, -+ z0 = svld2_f64 (p0, x0 + svcntd () * 14), -+ z0 = svld2 (p0, x0 + svcntd () * 14)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_f64_16: -+** incb x0, all, mul #16 -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_f64_16, svfloat64x2_t, float64_t, -+ z0 = svld2_f64 (p0, x0 + svcntd () * 16), -+ z0 = svld2 (p0, x0 + svcntd () * 16)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_f64_m1: -+** decb x0 -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_f64_m1, svfloat64x2_t, float64_t, -+ z0 = svld2_f64 (p0, x0 - svcntd ()), -+ z0 = svld2 (p0, x0 - svcntd ())) -+ -+/* -+** ld2_f64_m2: -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, #-2, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_f64_m2, svfloat64x2_t, float64_t, -+ z0 = svld2_f64 (p0, x0 - svcntd () * 2), -+ z0 = svld2 (p0, x0 - svcntd () * 2)) -+ -+/* -+** ld2_f64_m16: -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, #-16, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_f64_m16, svfloat64x2_t, float64_t, -+ z0 = svld2_f64 (p0, x0 - svcntd () * 16), -+ z0 = svld2 (p0, x0 - svcntd () * 16)) -+ -+/* -+** ld2_f64_m18: -+** addvl (x[0-9]+), x0, #-18 -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld2_f64_m18, svfloat64x2_t, float64_t, -+ z0 = svld2_f64 (p0, x0 - svcntd () * 18), -+ z0 = svld2 (p0, x0 - svcntd () * 18)) -+ -+/* -+** ld2_vnum_f64_0: -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_f64_0, svfloat64x2_t, float64_t, -+ z0 = svld2_vnum_f64 (p0, x0, 0), -+ z0 = svld2_vnum (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_vnum_f64_1: -+** incb x0 -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_f64_1, svfloat64x2_t, float64_t, -+ z0 = svld2_vnum_f64 (p0, x0, 1), -+ z0 = svld2_vnum (p0, x0, 1)) -+ -+/* -+** ld2_vnum_f64_2: -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, #2, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_f64_2, svfloat64x2_t, float64_t, -+ z0 = svld2_vnum_f64 (p0, x0, 2), -+ z0 = svld2_vnum (p0, x0, 2)) -+ -+/* -+** ld2_vnum_f64_14: -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, #14, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_f64_14, svfloat64x2_t, float64_t, -+ z0 = svld2_vnum_f64 (p0, x0, 14), -+ z0 = svld2_vnum (p0, x0, 14)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_vnum_f64_16: -+** incb x0, all, mul #16 -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_f64_16, svfloat64x2_t, float64_t, -+ z0 = svld2_vnum_f64 (p0, x0, 16), -+ z0 = svld2_vnum (p0, x0, 16)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_vnum_f64_m1: -+** decb x0 -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_f64_m1, svfloat64x2_t, float64_t, -+ z0 = svld2_vnum_f64 (p0, x0, -1), -+ z0 = svld2_vnum (p0, x0, -1)) -+ -+/* -+** ld2_vnum_f64_m2: -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, #-2, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_f64_m2, svfloat64x2_t, float64_t, -+ z0 = svld2_vnum_f64 (p0, x0, -2), -+ z0 = svld2_vnum (p0, x0, -2)) -+ -+/* -+** ld2_vnum_f64_m16: -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, #-16, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_f64_m16, svfloat64x2_t, float64_t, -+ z0 = svld2_vnum_f64 (p0, x0, -16), -+ z0 = svld2_vnum (p0, x0, -16)) -+ -+/* -+** ld2_vnum_f64_m18: -+** addvl (x[0-9]+), x0, #-18 -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_f64_m18, svfloat64x2_t, float64_t, -+ z0 = svld2_vnum_f64 (p0, x0, -18), -+ z0 = svld2_vnum (p0, x0, -18)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld2_vnum_f64_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_f64_x1, svfloat64x2_t, float64_t, -+ z0 = svld2_vnum_f64 (p0, x0, x1), -+ z0 = svld2_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_s16.c -new file mode 100644 -index 000000000..90677d837 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_s16.c -@@ -0,0 +1,200 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld2_s16_base: -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_s16_base, svint16x2_t, int16_t, -+ z0 = svld2_s16 (p0, x0), -+ z0 = svld2 (p0, x0)) -+ -+/* -+** ld2_s16_index: -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_LOAD (ld2_s16_index, svint16x2_t, int16_t, -+ z0 = svld2_s16 (p0, x0 + x1), -+ z0 = svld2 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_s16_1: -+** incb x0 -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_s16_1, svint16x2_t, int16_t, -+ z0 = svld2_s16 (p0, x0 + svcnth ()), -+ z0 = svld2 (p0, x0 + svcnth ())) -+ -+/* -+** ld2_s16_2: -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #2, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_s16_2, svint16x2_t, int16_t, -+ z0 = svld2_s16 (p0, x0 + svcnth () * 2), -+ z0 = svld2 (p0, x0 + svcnth () * 2)) -+ -+/* -+** ld2_s16_14: -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #14, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_s16_14, svint16x2_t, int16_t, -+ z0 = svld2_s16 (p0, x0 + svcnth () * 14), -+ z0 = svld2 (p0, x0 + svcnth () * 14)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_s16_16: -+** incb x0, all, mul #16 -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_s16_16, svint16x2_t, int16_t, -+ z0 = svld2_s16 (p0, x0 + svcnth () * 16), -+ z0 = svld2 (p0, x0 + svcnth () * 16)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_s16_m1: -+** decb x0 -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_s16_m1, svint16x2_t, int16_t, -+ z0 = svld2_s16 (p0, x0 - svcnth ()), -+ z0 = svld2 (p0, x0 - svcnth ())) -+ -+/* -+** ld2_s16_m2: -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #-2, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_s16_m2, svint16x2_t, int16_t, -+ z0 = svld2_s16 (p0, x0 - svcnth () * 2), -+ z0 = svld2 (p0, x0 - svcnth () * 2)) -+ -+/* -+** ld2_s16_m16: -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #-16, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_s16_m16, svint16x2_t, int16_t, -+ z0 = svld2_s16 (p0, x0 - svcnth () * 16), -+ z0 = svld2 (p0, x0 - svcnth () * 16)) -+ -+/* -+** ld2_s16_m18: -+** addvl (x[0-9]+), x0, #-18 -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld2_s16_m18, svint16x2_t, int16_t, -+ z0 = svld2_s16 (p0, x0 - svcnth () * 18), -+ z0 = svld2 (p0, x0 - svcnth () * 18)) -+ -+/* -+** ld2_vnum_s16_0: -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_s16_0, svint16x2_t, int16_t, -+ z0 = svld2_vnum_s16 (p0, x0, 0), -+ z0 = svld2_vnum (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_vnum_s16_1: -+** incb x0 -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_s16_1, svint16x2_t, int16_t, -+ z0 = svld2_vnum_s16 (p0, x0, 1), -+ z0 = svld2_vnum (p0, x0, 1)) -+ -+/* -+** ld2_vnum_s16_2: -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #2, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_s16_2, svint16x2_t, int16_t, -+ z0 = svld2_vnum_s16 (p0, x0, 2), -+ z0 = svld2_vnum (p0, x0, 2)) -+ -+/* -+** ld2_vnum_s16_14: -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #14, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_s16_14, svint16x2_t, int16_t, -+ z0 = svld2_vnum_s16 (p0, x0, 14), -+ z0 = svld2_vnum (p0, x0, 14)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_vnum_s16_16: -+** incb x0, all, mul #16 -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_s16_16, svint16x2_t, int16_t, -+ z0 = svld2_vnum_s16 (p0, x0, 16), -+ z0 = svld2_vnum (p0, x0, 16)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_vnum_s16_m1: -+** decb x0 -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_s16_m1, svint16x2_t, int16_t, -+ z0 = svld2_vnum_s16 (p0, x0, -1), -+ z0 = svld2_vnum (p0, x0, -1)) -+ -+/* -+** ld2_vnum_s16_m2: -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #-2, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_s16_m2, svint16x2_t, int16_t, -+ z0 = svld2_vnum_s16 (p0, x0, -2), -+ z0 = svld2_vnum (p0, x0, -2)) -+ -+/* -+** ld2_vnum_s16_m16: -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #-16, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_s16_m16, svint16x2_t, int16_t, -+ z0 = svld2_vnum_s16 (p0, x0, -16), -+ z0 = svld2_vnum (p0, x0, -16)) -+ -+/* -+** ld2_vnum_s16_m18: -+** addvl (x[0-9]+), x0, #-18 -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_s16_m18, svint16x2_t, int16_t, -+ z0 = svld2_vnum_s16 (p0, x0, -18), -+ z0 = svld2_vnum (p0, x0, -18)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld2_vnum_s16_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_s16_x1, svint16x2_t, int16_t, -+ z0 = svld2_vnum_s16 (p0, x0, x1), -+ z0 = svld2_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_s32.c -new file mode 100644 -index 000000000..10913c2d0 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_s32.c -@@ -0,0 +1,200 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld2_s32_base: -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_s32_base, svint32x2_t, int32_t, -+ z0 = svld2_s32 (p0, x0), -+ z0 = svld2 (p0, x0)) -+ -+/* -+** ld2_s32_index: -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_LOAD (ld2_s32_index, svint32x2_t, int32_t, -+ z0 = svld2_s32 (p0, x0 + x1), -+ z0 = svld2 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_s32_1: -+** incb x0 -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_s32_1, svint32x2_t, int32_t, -+ z0 = svld2_s32 (p0, x0 + svcntw ()), -+ z0 = svld2 (p0, x0 + svcntw ())) -+ -+/* -+** ld2_s32_2: -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, #2, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_s32_2, svint32x2_t, int32_t, -+ z0 = svld2_s32 (p0, x0 + svcntw () * 2), -+ z0 = svld2 (p0, x0 + svcntw () * 2)) -+ -+/* -+** ld2_s32_14: -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, #14, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_s32_14, svint32x2_t, int32_t, -+ z0 = svld2_s32 (p0, x0 + svcntw () * 14), -+ z0 = svld2 (p0, x0 + svcntw () * 14)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_s32_16: -+** incb x0, all, mul #16 -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_s32_16, svint32x2_t, int32_t, -+ z0 = svld2_s32 (p0, x0 + svcntw () * 16), -+ z0 = svld2 (p0, x0 + svcntw () * 16)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_s32_m1: -+** decb x0 -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_s32_m1, svint32x2_t, int32_t, -+ z0 = svld2_s32 (p0, x0 - svcntw ()), -+ z0 = svld2 (p0, x0 - svcntw ())) -+ -+/* -+** ld2_s32_m2: -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, #-2, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_s32_m2, svint32x2_t, int32_t, -+ z0 = svld2_s32 (p0, x0 - svcntw () * 2), -+ z0 = svld2 (p0, x0 - svcntw () * 2)) -+ -+/* -+** ld2_s32_m16: -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, #-16, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_s32_m16, svint32x2_t, int32_t, -+ z0 = svld2_s32 (p0, x0 - svcntw () * 16), -+ z0 = svld2 (p0, x0 - svcntw () * 16)) -+ -+/* -+** ld2_s32_m18: -+** addvl (x[0-9]+), x0, #-18 -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld2_s32_m18, svint32x2_t, int32_t, -+ z0 = svld2_s32 (p0, x0 - svcntw () * 18), -+ z0 = svld2 (p0, x0 - svcntw () * 18)) -+ -+/* -+** ld2_vnum_s32_0: -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_s32_0, svint32x2_t, int32_t, -+ z0 = svld2_vnum_s32 (p0, x0, 0), -+ z0 = svld2_vnum (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_vnum_s32_1: -+** incb x0 -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_s32_1, svint32x2_t, int32_t, -+ z0 = svld2_vnum_s32 (p0, x0, 1), -+ z0 = svld2_vnum (p0, x0, 1)) -+ -+/* -+** ld2_vnum_s32_2: -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, #2, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_s32_2, svint32x2_t, int32_t, -+ z0 = svld2_vnum_s32 (p0, x0, 2), -+ z0 = svld2_vnum (p0, x0, 2)) -+ -+/* -+** ld2_vnum_s32_14: -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, #14, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_s32_14, svint32x2_t, int32_t, -+ z0 = svld2_vnum_s32 (p0, x0, 14), -+ z0 = svld2_vnum (p0, x0, 14)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_vnum_s32_16: -+** incb x0, all, mul #16 -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_s32_16, svint32x2_t, int32_t, -+ z0 = svld2_vnum_s32 (p0, x0, 16), -+ z0 = svld2_vnum (p0, x0, 16)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_vnum_s32_m1: -+** decb x0 -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_s32_m1, svint32x2_t, int32_t, -+ z0 = svld2_vnum_s32 (p0, x0, -1), -+ z0 = svld2_vnum (p0, x0, -1)) -+ -+/* -+** ld2_vnum_s32_m2: -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, #-2, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_s32_m2, svint32x2_t, int32_t, -+ z0 = svld2_vnum_s32 (p0, x0, -2), -+ z0 = svld2_vnum (p0, x0, -2)) -+ -+/* -+** ld2_vnum_s32_m16: -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, #-16, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_s32_m16, svint32x2_t, int32_t, -+ z0 = svld2_vnum_s32 (p0, x0, -16), -+ z0 = svld2_vnum (p0, x0, -16)) -+ -+/* -+** ld2_vnum_s32_m18: -+** addvl (x[0-9]+), x0, #-18 -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_s32_m18, svint32x2_t, int32_t, -+ z0 = svld2_vnum_s32 (p0, x0, -18), -+ z0 = svld2_vnum (p0, x0, -18)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld2_vnum_s32_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_s32_x1, svint32x2_t, int32_t, -+ z0 = svld2_vnum_s32 (p0, x0, x1), -+ z0 = svld2_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_s64.c -new file mode 100644 -index 000000000..9a43e86d5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_s64.c -@@ -0,0 +1,200 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld2_s64_base: -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_s64_base, svint64x2_t, int64_t, -+ z0 = svld2_s64 (p0, x0), -+ z0 = svld2 (p0, x0)) -+ -+/* -+** ld2_s64_index: -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, x1, lsl 3\] -+** ret -+*/ -+TEST_LOAD (ld2_s64_index, svint64x2_t, int64_t, -+ z0 = svld2_s64 (p0, x0 + x1), -+ z0 = svld2 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_s64_1: -+** incb x0 -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_s64_1, svint64x2_t, int64_t, -+ z0 = svld2_s64 (p0, x0 + svcntd ()), -+ z0 = svld2 (p0, x0 + svcntd ())) -+ -+/* -+** ld2_s64_2: -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, #2, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_s64_2, svint64x2_t, int64_t, -+ z0 = svld2_s64 (p0, x0 + svcntd () * 2), -+ z0 = svld2 (p0, x0 + svcntd () * 2)) -+ -+/* -+** ld2_s64_14: -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, #14, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_s64_14, svint64x2_t, int64_t, -+ z0 = svld2_s64 (p0, x0 + svcntd () * 14), -+ z0 = svld2 (p0, x0 + svcntd () * 14)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_s64_16: -+** incb x0, all, mul #16 -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_s64_16, svint64x2_t, int64_t, -+ z0 = svld2_s64 (p0, x0 + svcntd () * 16), -+ z0 = svld2 (p0, x0 + svcntd () * 16)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_s64_m1: -+** decb x0 -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_s64_m1, svint64x2_t, int64_t, -+ z0 = svld2_s64 (p0, x0 - svcntd ()), -+ z0 = svld2 (p0, x0 - svcntd ())) -+ -+/* -+** ld2_s64_m2: -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, #-2, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_s64_m2, svint64x2_t, int64_t, -+ z0 = svld2_s64 (p0, x0 - svcntd () * 2), -+ z0 = svld2 (p0, x0 - svcntd () * 2)) -+ -+/* -+** ld2_s64_m16: -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, #-16, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_s64_m16, svint64x2_t, int64_t, -+ z0 = svld2_s64 (p0, x0 - svcntd () * 16), -+ z0 = svld2 (p0, x0 - svcntd () * 16)) -+ -+/* -+** ld2_s64_m18: -+** addvl (x[0-9]+), x0, #-18 -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld2_s64_m18, svint64x2_t, int64_t, -+ z0 = svld2_s64 (p0, x0 - svcntd () * 18), -+ z0 = svld2 (p0, x0 - svcntd () * 18)) -+ -+/* -+** ld2_vnum_s64_0: -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_s64_0, svint64x2_t, int64_t, -+ z0 = svld2_vnum_s64 (p0, x0, 0), -+ z0 = svld2_vnum (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_vnum_s64_1: -+** incb x0 -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_s64_1, svint64x2_t, int64_t, -+ z0 = svld2_vnum_s64 (p0, x0, 1), -+ z0 = svld2_vnum (p0, x0, 1)) -+ -+/* -+** ld2_vnum_s64_2: -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, #2, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_s64_2, svint64x2_t, int64_t, -+ z0 = svld2_vnum_s64 (p0, x0, 2), -+ z0 = svld2_vnum (p0, x0, 2)) -+ -+/* -+** ld2_vnum_s64_14: -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, #14, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_s64_14, svint64x2_t, int64_t, -+ z0 = svld2_vnum_s64 (p0, x0, 14), -+ z0 = svld2_vnum (p0, x0, 14)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_vnum_s64_16: -+** incb x0, all, mul #16 -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_s64_16, svint64x2_t, int64_t, -+ z0 = svld2_vnum_s64 (p0, x0, 16), -+ z0 = svld2_vnum (p0, x0, 16)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_vnum_s64_m1: -+** decb x0 -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_s64_m1, svint64x2_t, int64_t, -+ z0 = svld2_vnum_s64 (p0, x0, -1), -+ z0 = svld2_vnum (p0, x0, -1)) -+ -+/* -+** ld2_vnum_s64_m2: -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, #-2, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_s64_m2, svint64x2_t, int64_t, -+ z0 = svld2_vnum_s64 (p0, x0, -2), -+ z0 = svld2_vnum (p0, x0, -2)) -+ -+/* -+** ld2_vnum_s64_m16: -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, #-16, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_s64_m16, svint64x2_t, int64_t, -+ z0 = svld2_vnum_s64 (p0, x0, -16), -+ z0 = svld2_vnum (p0, x0, -16)) -+ -+/* -+** ld2_vnum_s64_m18: -+** addvl (x[0-9]+), x0, #-18 -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_s64_m18, svint64x2_t, int64_t, -+ z0 = svld2_vnum_s64 (p0, x0, -18), -+ z0 = svld2_vnum (p0, x0, -18)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld2_vnum_s64_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_s64_x1, svint64x2_t, int64_t, -+ z0 = svld2_vnum_s64 (p0, x0, x1), -+ z0 = svld2_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_s8.c -new file mode 100644 -index 000000000..af5c04c66 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_s8.c -@@ -0,0 +1,204 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld2_s8_base: -+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_s8_base, svint8x2_t, int8_t, -+ z0 = svld2_s8 (p0, x0), -+ z0 = svld2 (p0, x0)) -+ -+/* -+** ld2_s8_index: -+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, x1\] -+** ret -+*/ -+TEST_LOAD (ld2_s8_index, svint8x2_t, int8_t, -+ z0 = svld2_s8 (p0, x0 + x1), -+ z0 = svld2 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_s8_1: -+** incb x0 -+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_s8_1, svint8x2_t, int8_t, -+ z0 = svld2_s8 (p0, x0 + svcntb ()), -+ z0 = svld2 (p0, x0 + svcntb ())) -+ -+/* -+** ld2_s8_2: -+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, #2, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_s8_2, svint8x2_t, int8_t, -+ z0 = svld2_s8 (p0, x0 + svcntb () * 2), -+ z0 = svld2 (p0, x0 + svcntb () * 2)) -+ -+/* -+** ld2_s8_14: -+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, #14, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_s8_14, svint8x2_t, int8_t, -+ z0 = svld2_s8 (p0, x0 + svcntb () * 14), -+ z0 = svld2 (p0, x0 + svcntb () * 14)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_s8_16: -+** incb x0, all, mul #16 -+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_s8_16, svint8x2_t, int8_t, -+ z0 = svld2_s8 (p0, x0 + svcntb () * 16), -+ z0 = svld2 (p0, x0 + svcntb () * 16)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_s8_m1: -+** decb x0 -+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_s8_m1, svint8x2_t, int8_t, -+ z0 = svld2_s8 (p0, x0 - svcntb ()), -+ z0 = svld2 (p0, x0 - svcntb ())) -+ -+/* -+** ld2_s8_m2: -+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, #-2, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_s8_m2, svint8x2_t, int8_t, -+ z0 = svld2_s8 (p0, x0 - svcntb () * 2), -+ z0 = svld2 (p0, x0 - svcntb () * 2)) -+ -+/* -+** ld2_s8_m16: -+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, #-16, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_s8_m16, svint8x2_t, int8_t, -+ z0 = svld2_s8 (p0, x0 - svcntb () * 16), -+ z0 = svld2 (p0, x0 - svcntb () * 16)) -+ -+/* -+** ld2_s8_m18: -+** addvl (x[0-9]+), x0, #-18 -+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld2_s8_m18, svint8x2_t, int8_t, -+ z0 = svld2_s8 (p0, x0 - svcntb () * 18), -+ z0 = svld2 (p0, x0 - svcntb () * 18)) -+ -+/* -+** ld2_vnum_s8_0: -+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_s8_0, svint8x2_t, int8_t, -+ z0 = svld2_vnum_s8 (p0, x0, 0), -+ z0 = svld2_vnum (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_vnum_s8_1: -+** incb x0 -+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_s8_1, svint8x2_t, int8_t, -+ z0 = svld2_vnum_s8 (p0, x0, 1), -+ z0 = svld2_vnum (p0, x0, 1)) -+ -+/* -+** ld2_vnum_s8_2: -+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, #2, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_s8_2, svint8x2_t, int8_t, -+ z0 = svld2_vnum_s8 (p0, x0, 2), -+ z0 = svld2_vnum (p0, x0, 2)) -+ -+/* -+** ld2_vnum_s8_14: -+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, #14, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_s8_14, svint8x2_t, int8_t, -+ z0 = svld2_vnum_s8 (p0, x0, 14), -+ z0 = svld2_vnum (p0, x0, 14)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_vnum_s8_16: -+** incb x0, all, mul #16 -+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_s8_16, svint8x2_t, int8_t, -+ z0 = svld2_vnum_s8 (p0, x0, 16), -+ z0 = svld2_vnum (p0, x0, 16)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_vnum_s8_m1: -+** decb x0 -+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_s8_m1, svint8x2_t, int8_t, -+ z0 = svld2_vnum_s8 (p0, x0, -1), -+ z0 = svld2_vnum (p0, x0, -1)) -+ -+/* -+** ld2_vnum_s8_m2: -+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, #-2, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_s8_m2, svint8x2_t, int8_t, -+ z0 = svld2_vnum_s8 (p0, x0, -2), -+ z0 = svld2_vnum (p0, x0, -2)) -+ -+/* -+** ld2_vnum_s8_m16: -+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, #-16, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_s8_m16, svint8x2_t, int8_t, -+ z0 = svld2_vnum_s8 (p0, x0, -16), -+ z0 = svld2_vnum (p0, x0, -16)) -+ -+/* -+** ld2_vnum_s8_m18: -+** addvl (x[0-9]+), x0, #-18 -+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_s8_m18, svint8x2_t, int8_t, -+ z0 = svld2_vnum_s8 (p0, x0, -18), -+ z0 = svld2_vnum (p0, x0, -18)) -+ -+/* -+** ld2_vnum_s8_x1: -+** cntb (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_LOAD (ld2_vnum_s8_x1, svint8x2_t, int8_t, -+ z0 = svld2_vnum_s8 (p0, x0, x1), -+ z0 = svld2_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_u16.c -new file mode 100644 -index 000000000..6c33322c1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_u16.c -@@ -0,0 +1,200 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld2_u16_base: -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_u16_base, svuint16x2_t, uint16_t, -+ z0 = svld2_u16 (p0, x0), -+ z0 = svld2 (p0, x0)) -+ -+/* -+** ld2_u16_index: -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_LOAD (ld2_u16_index, svuint16x2_t, uint16_t, -+ z0 = svld2_u16 (p0, x0 + x1), -+ z0 = svld2 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_u16_1: -+** incb x0 -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_u16_1, svuint16x2_t, uint16_t, -+ z0 = svld2_u16 (p0, x0 + svcnth ()), -+ z0 = svld2 (p0, x0 + svcnth ())) -+ -+/* -+** ld2_u16_2: -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #2, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_u16_2, svuint16x2_t, uint16_t, -+ z0 = svld2_u16 (p0, x0 + svcnth () * 2), -+ z0 = svld2 (p0, x0 + svcnth () * 2)) -+ -+/* -+** ld2_u16_14: -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #14, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_u16_14, svuint16x2_t, uint16_t, -+ z0 = svld2_u16 (p0, x0 + svcnth () * 14), -+ z0 = svld2 (p0, x0 + svcnth () * 14)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_u16_16: -+** incb x0, all, mul #16 -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_u16_16, svuint16x2_t, uint16_t, -+ z0 = svld2_u16 (p0, x0 + svcnth () * 16), -+ z0 = svld2 (p0, x0 + svcnth () * 16)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_u16_m1: -+** decb x0 -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_u16_m1, svuint16x2_t, uint16_t, -+ z0 = svld2_u16 (p0, x0 - svcnth ()), -+ z0 = svld2 (p0, x0 - svcnth ())) -+ -+/* -+** ld2_u16_m2: -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #-2, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_u16_m2, svuint16x2_t, uint16_t, -+ z0 = svld2_u16 (p0, x0 - svcnth () * 2), -+ z0 = svld2 (p0, x0 - svcnth () * 2)) -+ -+/* -+** ld2_u16_m16: -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #-16, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_u16_m16, svuint16x2_t, uint16_t, -+ z0 = svld2_u16 (p0, x0 - svcnth () * 16), -+ z0 = svld2 (p0, x0 - svcnth () * 16)) -+ -+/* -+** ld2_u16_m18: -+** addvl (x[0-9]+), x0, #-18 -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld2_u16_m18, svuint16x2_t, uint16_t, -+ z0 = svld2_u16 (p0, x0 - svcnth () * 18), -+ z0 = svld2 (p0, x0 - svcnth () * 18)) -+ -+/* -+** ld2_vnum_u16_0: -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_u16_0, svuint16x2_t, uint16_t, -+ z0 = svld2_vnum_u16 (p0, x0, 0), -+ z0 = svld2_vnum (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_vnum_u16_1: -+** incb x0 -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_u16_1, svuint16x2_t, uint16_t, -+ z0 = svld2_vnum_u16 (p0, x0, 1), -+ z0 = svld2_vnum (p0, x0, 1)) -+ -+/* -+** ld2_vnum_u16_2: -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #2, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_u16_2, svuint16x2_t, uint16_t, -+ z0 = svld2_vnum_u16 (p0, x0, 2), -+ z0 = svld2_vnum (p0, x0, 2)) -+ -+/* -+** ld2_vnum_u16_14: -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #14, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_u16_14, svuint16x2_t, uint16_t, -+ z0 = svld2_vnum_u16 (p0, x0, 14), -+ z0 = svld2_vnum (p0, x0, 14)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_vnum_u16_16: -+** incb x0, all, mul #16 -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_u16_16, svuint16x2_t, uint16_t, -+ z0 = svld2_vnum_u16 (p0, x0, 16), -+ z0 = svld2_vnum (p0, x0, 16)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_vnum_u16_m1: -+** decb x0 -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_u16_m1, svuint16x2_t, uint16_t, -+ z0 = svld2_vnum_u16 (p0, x0, -1), -+ z0 = svld2_vnum (p0, x0, -1)) -+ -+/* -+** ld2_vnum_u16_m2: -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #-2, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_u16_m2, svuint16x2_t, uint16_t, -+ z0 = svld2_vnum_u16 (p0, x0, -2), -+ z0 = svld2_vnum (p0, x0, -2)) -+ -+/* -+** ld2_vnum_u16_m16: -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[x0, #-16, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_u16_m16, svuint16x2_t, uint16_t, -+ z0 = svld2_vnum_u16 (p0, x0, -16), -+ z0 = svld2_vnum (p0, x0, -16)) -+ -+/* -+** ld2_vnum_u16_m18: -+** addvl (x[0-9]+), x0, #-18 -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_u16_m18, svuint16x2_t, uint16_t, -+ z0 = svld2_vnum_u16 (p0, x0, -18), -+ z0 = svld2_vnum (p0, x0, -18)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld2_vnum_u16_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld2h {z0\.h(?: - |, )z1\.h}, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_u16_x1, svuint16x2_t, uint16_t, -+ z0 = svld2_vnum_u16 (p0, x0, x1), -+ z0 = svld2_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_u32.c -new file mode 100644 -index 000000000..84a23cf47 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_u32.c -@@ -0,0 +1,200 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld2_u32_base: -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_u32_base, svuint32x2_t, uint32_t, -+ z0 = svld2_u32 (p0, x0), -+ z0 = svld2 (p0, x0)) -+ -+/* -+** ld2_u32_index: -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_LOAD (ld2_u32_index, svuint32x2_t, uint32_t, -+ z0 = svld2_u32 (p0, x0 + x1), -+ z0 = svld2 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_u32_1: -+** incb x0 -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_u32_1, svuint32x2_t, uint32_t, -+ z0 = svld2_u32 (p0, x0 + svcntw ()), -+ z0 = svld2 (p0, x0 + svcntw ())) -+ -+/* -+** ld2_u32_2: -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, #2, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_u32_2, svuint32x2_t, uint32_t, -+ z0 = svld2_u32 (p0, x0 + svcntw () * 2), -+ z0 = svld2 (p0, x0 + svcntw () * 2)) -+ -+/* -+** ld2_u32_14: -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, #14, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_u32_14, svuint32x2_t, uint32_t, -+ z0 = svld2_u32 (p0, x0 + svcntw () * 14), -+ z0 = svld2 (p0, x0 + svcntw () * 14)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_u32_16: -+** incb x0, all, mul #16 -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_u32_16, svuint32x2_t, uint32_t, -+ z0 = svld2_u32 (p0, x0 + svcntw () * 16), -+ z0 = svld2 (p0, x0 + svcntw () * 16)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_u32_m1: -+** decb x0 -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_u32_m1, svuint32x2_t, uint32_t, -+ z0 = svld2_u32 (p0, x0 - svcntw ()), -+ z0 = svld2 (p0, x0 - svcntw ())) -+ -+/* -+** ld2_u32_m2: -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, #-2, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_u32_m2, svuint32x2_t, uint32_t, -+ z0 = svld2_u32 (p0, x0 - svcntw () * 2), -+ z0 = svld2 (p0, x0 - svcntw () * 2)) -+ -+/* -+** ld2_u32_m16: -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, #-16, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_u32_m16, svuint32x2_t, uint32_t, -+ z0 = svld2_u32 (p0, x0 - svcntw () * 16), -+ z0 = svld2 (p0, x0 - svcntw () * 16)) -+ -+/* -+** ld2_u32_m18: -+** addvl (x[0-9]+), x0, #-18 -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld2_u32_m18, svuint32x2_t, uint32_t, -+ z0 = svld2_u32 (p0, x0 - svcntw () * 18), -+ z0 = svld2 (p0, x0 - svcntw () * 18)) -+ -+/* -+** ld2_vnum_u32_0: -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_u32_0, svuint32x2_t, uint32_t, -+ z0 = svld2_vnum_u32 (p0, x0, 0), -+ z0 = svld2_vnum (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_vnum_u32_1: -+** incb x0 -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_u32_1, svuint32x2_t, uint32_t, -+ z0 = svld2_vnum_u32 (p0, x0, 1), -+ z0 = svld2_vnum (p0, x0, 1)) -+ -+/* -+** ld2_vnum_u32_2: -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, #2, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_u32_2, svuint32x2_t, uint32_t, -+ z0 = svld2_vnum_u32 (p0, x0, 2), -+ z0 = svld2_vnum (p0, x0, 2)) -+ -+/* -+** ld2_vnum_u32_14: -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, #14, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_u32_14, svuint32x2_t, uint32_t, -+ z0 = svld2_vnum_u32 (p0, x0, 14), -+ z0 = svld2_vnum (p0, x0, 14)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_vnum_u32_16: -+** incb x0, all, mul #16 -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_u32_16, svuint32x2_t, uint32_t, -+ z0 = svld2_vnum_u32 (p0, x0, 16), -+ z0 = svld2_vnum (p0, x0, 16)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_vnum_u32_m1: -+** decb x0 -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_u32_m1, svuint32x2_t, uint32_t, -+ z0 = svld2_vnum_u32 (p0, x0, -1), -+ z0 = svld2_vnum (p0, x0, -1)) -+ -+/* -+** ld2_vnum_u32_m2: -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, #-2, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_u32_m2, svuint32x2_t, uint32_t, -+ z0 = svld2_vnum_u32 (p0, x0, -2), -+ z0 = svld2_vnum (p0, x0, -2)) -+ -+/* -+** ld2_vnum_u32_m16: -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[x0, #-16, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_u32_m16, svuint32x2_t, uint32_t, -+ z0 = svld2_vnum_u32 (p0, x0, -16), -+ z0 = svld2_vnum (p0, x0, -16)) -+ -+/* -+** ld2_vnum_u32_m18: -+** addvl (x[0-9]+), x0, #-18 -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_u32_m18, svuint32x2_t, uint32_t, -+ z0 = svld2_vnum_u32 (p0, x0, -18), -+ z0 = svld2_vnum (p0, x0, -18)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld2_vnum_u32_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld2w {z0\.s(?: - |, )z1\.s}, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_u32_x1, svuint32x2_t, uint32_t, -+ z0 = svld2_vnum_u32 (p0, x0, x1), -+ z0 = svld2_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_u64.c -new file mode 100644 -index 000000000..350b05792 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_u64.c -@@ -0,0 +1,200 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld2_u64_base: -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_u64_base, svuint64x2_t, uint64_t, -+ z0 = svld2_u64 (p0, x0), -+ z0 = svld2 (p0, x0)) -+ -+/* -+** ld2_u64_index: -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, x1, lsl 3\] -+** ret -+*/ -+TEST_LOAD (ld2_u64_index, svuint64x2_t, uint64_t, -+ z0 = svld2_u64 (p0, x0 + x1), -+ z0 = svld2 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_u64_1: -+** incb x0 -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_u64_1, svuint64x2_t, uint64_t, -+ z0 = svld2_u64 (p0, x0 + svcntd ()), -+ z0 = svld2 (p0, x0 + svcntd ())) -+ -+/* -+** ld2_u64_2: -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, #2, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_u64_2, svuint64x2_t, uint64_t, -+ z0 = svld2_u64 (p0, x0 + svcntd () * 2), -+ z0 = svld2 (p0, x0 + svcntd () * 2)) -+ -+/* -+** ld2_u64_14: -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, #14, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_u64_14, svuint64x2_t, uint64_t, -+ z0 = svld2_u64 (p0, x0 + svcntd () * 14), -+ z0 = svld2 (p0, x0 + svcntd () * 14)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_u64_16: -+** incb x0, all, mul #16 -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_u64_16, svuint64x2_t, uint64_t, -+ z0 = svld2_u64 (p0, x0 + svcntd () * 16), -+ z0 = svld2 (p0, x0 + svcntd () * 16)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_u64_m1: -+** decb x0 -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_u64_m1, svuint64x2_t, uint64_t, -+ z0 = svld2_u64 (p0, x0 - svcntd ()), -+ z0 = svld2 (p0, x0 - svcntd ())) -+ -+/* -+** ld2_u64_m2: -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, #-2, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_u64_m2, svuint64x2_t, uint64_t, -+ z0 = svld2_u64 (p0, x0 - svcntd () * 2), -+ z0 = svld2 (p0, x0 - svcntd () * 2)) -+ -+/* -+** ld2_u64_m16: -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, #-16, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_u64_m16, svuint64x2_t, uint64_t, -+ z0 = svld2_u64 (p0, x0 - svcntd () * 16), -+ z0 = svld2 (p0, x0 - svcntd () * 16)) -+ -+/* -+** ld2_u64_m18: -+** addvl (x[0-9]+), x0, #-18 -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld2_u64_m18, svuint64x2_t, uint64_t, -+ z0 = svld2_u64 (p0, x0 - svcntd () * 18), -+ z0 = svld2 (p0, x0 - svcntd () * 18)) -+ -+/* -+** ld2_vnum_u64_0: -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_u64_0, svuint64x2_t, uint64_t, -+ z0 = svld2_vnum_u64 (p0, x0, 0), -+ z0 = svld2_vnum (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_vnum_u64_1: -+** incb x0 -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_u64_1, svuint64x2_t, uint64_t, -+ z0 = svld2_vnum_u64 (p0, x0, 1), -+ z0 = svld2_vnum (p0, x0, 1)) -+ -+/* -+** ld2_vnum_u64_2: -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, #2, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_u64_2, svuint64x2_t, uint64_t, -+ z0 = svld2_vnum_u64 (p0, x0, 2), -+ z0 = svld2_vnum (p0, x0, 2)) -+ -+/* -+** ld2_vnum_u64_14: -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, #14, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_u64_14, svuint64x2_t, uint64_t, -+ z0 = svld2_vnum_u64 (p0, x0, 14), -+ z0 = svld2_vnum (p0, x0, 14)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_vnum_u64_16: -+** incb x0, all, mul #16 -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_u64_16, svuint64x2_t, uint64_t, -+ z0 = svld2_vnum_u64 (p0, x0, 16), -+ z0 = svld2_vnum (p0, x0, 16)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_vnum_u64_m1: -+** decb x0 -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_u64_m1, svuint64x2_t, uint64_t, -+ z0 = svld2_vnum_u64 (p0, x0, -1), -+ z0 = svld2_vnum (p0, x0, -1)) -+ -+/* -+** ld2_vnum_u64_m2: -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, #-2, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_u64_m2, svuint64x2_t, uint64_t, -+ z0 = svld2_vnum_u64 (p0, x0, -2), -+ z0 = svld2_vnum (p0, x0, -2)) -+ -+/* -+** ld2_vnum_u64_m16: -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[x0, #-16, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_u64_m16, svuint64x2_t, uint64_t, -+ z0 = svld2_vnum_u64 (p0, x0, -16), -+ z0 = svld2_vnum (p0, x0, -16)) -+ -+/* -+** ld2_vnum_u64_m18: -+** addvl (x[0-9]+), x0, #-18 -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_u64_m18, svuint64x2_t, uint64_t, -+ z0 = svld2_vnum_u64 (p0, x0, -18), -+ z0 = svld2_vnum (p0, x0, -18)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld2_vnum_u64_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld2d {z0\.d(?: - |, )z1\.d}, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_u64_x1, svuint64x2_t, uint64_t, -+ z0 = svld2_vnum_u64 (p0, x0, x1), -+ z0 = svld2_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_u8.c -new file mode 100644 -index 000000000..e67634c4c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld2_u8.c -@@ -0,0 +1,204 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld2_u8_base: -+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_u8_base, svuint8x2_t, uint8_t, -+ z0 = svld2_u8 (p0, x0), -+ z0 = svld2 (p0, x0)) -+ -+/* -+** ld2_u8_index: -+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, x1\] -+** ret -+*/ -+TEST_LOAD (ld2_u8_index, svuint8x2_t, uint8_t, -+ z0 = svld2_u8 (p0, x0 + x1), -+ z0 = svld2 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_u8_1: -+** incb x0 -+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_u8_1, svuint8x2_t, uint8_t, -+ z0 = svld2_u8 (p0, x0 + svcntb ()), -+ z0 = svld2 (p0, x0 + svcntb ())) -+ -+/* -+** ld2_u8_2: -+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, #2, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_u8_2, svuint8x2_t, uint8_t, -+ z0 = svld2_u8 (p0, x0 + svcntb () * 2), -+ z0 = svld2 (p0, x0 + svcntb () * 2)) -+ -+/* -+** ld2_u8_14: -+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, #14, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_u8_14, svuint8x2_t, uint8_t, -+ z0 = svld2_u8 (p0, x0 + svcntb () * 14), -+ z0 = svld2 (p0, x0 + svcntb () * 14)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_u8_16: -+** incb x0, all, mul #16 -+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_u8_16, svuint8x2_t, uint8_t, -+ z0 = svld2_u8 (p0, x0 + svcntb () * 16), -+ z0 = svld2 (p0, x0 + svcntb () * 16)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_u8_m1: -+** decb x0 -+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_u8_m1, svuint8x2_t, uint8_t, -+ z0 = svld2_u8 (p0, x0 - svcntb ()), -+ z0 = svld2 (p0, x0 - svcntb ())) -+ -+/* -+** ld2_u8_m2: -+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, #-2, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_u8_m2, svuint8x2_t, uint8_t, -+ z0 = svld2_u8 (p0, x0 - svcntb () * 2), -+ z0 = svld2 (p0, x0 - svcntb () * 2)) -+ -+/* -+** ld2_u8_m16: -+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, #-16, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_u8_m16, svuint8x2_t, uint8_t, -+ z0 = svld2_u8 (p0, x0 - svcntb () * 16), -+ z0 = svld2 (p0, x0 - svcntb () * 16)) -+ -+/* -+** ld2_u8_m18: -+** addvl (x[0-9]+), x0, #-18 -+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld2_u8_m18, svuint8x2_t, uint8_t, -+ z0 = svld2_u8 (p0, x0 - svcntb () * 18), -+ z0 = svld2 (p0, x0 - svcntb () * 18)) -+ -+/* -+** ld2_vnum_u8_0: -+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_u8_0, svuint8x2_t, uint8_t, -+ z0 = svld2_vnum_u8 (p0, x0, 0), -+ z0 = svld2_vnum (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_vnum_u8_1: -+** incb x0 -+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_u8_1, svuint8x2_t, uint8_t, -+ z0 = svld2_vnum_u8 (p0, x0, 1), -+ z0 = svld2_vnum (p0, x0, 1)) -+ -+/* -+** ld2_vnum_u8_2: -+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, #2, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_u8_2, svuint8x2_t, uint8_t, -+ z0 = svld2_vnum_u8 (p0, x0, 2), -+ z0 = svld2_vnum (p0, x0, 2)) -+ -+/* -+** ld2_vnum_u8_14: -+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, #14, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_u8_14, svuint8x2_t, uint8_t, -+ z0 = svld2_vnum_u8 (p0, x0, 14), -+ z0 = svld2_vnum (p0, x0, 14)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_vnum_u8_16: -+** incb x0, all, mul #16 -+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_u8_16, svuint8x2_t, uint8_t, -+ z0 = svld2_vnum_u8 (p0, x0, 16), -+ z0 = svld2_vnum (p0, x0, 16)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld2_vnum_u8_m1: -+** decb x0 -+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_u8_m1, svuint8x2_t, uint8_t, -+ z0 = svld2_vnum_u8 (p0, x0, -1), -+ z0 = svld2_vnum (p0, x0, -1)) -+ -+/* -+** ld2_vnum_u8_m2: -+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, #-2, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_u8_m2, svuint8x2_t, uint8_t, -+ z0 = svld2_vnum_u8 (p0, x0, -2), -+ z0 = svld2_vnum (p0, x0, -2)) -+ -+/* -+** ld2_vnum_u8_m16: -+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, #-16, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_u8_m16, svuint8x2_t, uint8_t, -+ z0 = svld2_vnum_u8 (p0, x0, -16), -+ z0 = svld2_vnum (p0, x0, -16)) -+ -+/* -+** ld2_vnum_u8_m18: -+** addvl (x[0-9]+), x0, #-18 -+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld2_vnum_u8_m18, svuint8x2_t, uint8_t, -+ z0 = svld2_vnum_u8 (p0, x0, -18), -+ z0 = svld2_vnum (p0, x0, -18)) -+ -+/* -+** ld2_vnum_u8_x1: -+** cntb (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** ld2b {z0\.b(?: - |, )z1\.b}, p0/z, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_LOAD (ld2_vnum_u8_x1, svuint8x2_t, uint8_t, -+ z0 = svld2_vnum_u8 (p0, x0, x1), -+ z0 = svld2_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_bf16.c -new file mode 100644 -index 000000000..e0b4fb1af ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_bf16.c -@@ -0,0 +1,242 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld3_bf16_base: -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_bf16_base, svbfloat16x3_t, bfloat16_t, -+ z0 = svld3_bf16 (p0, x0), -+ z0 = svld3 (p0, x0)) -+ -+/* -+** ld3_bf16_index: -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_LOAD (ld3_bf16_index, svbfloat16x3_t, bfloat16_t, -+ z0 = svld3_bf16 (p0, x0 + x1), -+ z0 = svld3 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_bf16_1: -+** incb x0 -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_bf16_1, svbfloat16x3_t, bfloat16_t, -+ z0 = svld3_bf16 (p0, x0 + svcnth ()), -+ z0 = svld3 (p0, x0 + svcnth ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_bf16_2: -+** incb x0, all, mul #2 -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_bf16_2, svbfloat16x3_t, bfloat16_t, -+ z0 = svld3_bf16 (p0, x0 + svcnth () * 2), -+ z0 = svld3 (p0, x0 + svcnth () * 2)) -+ -+/* -+** ld3_bf16_3: -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #3, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_bf16_3, svbfloat16x3_t, bfloat16_t, -+ z0 = svld3_bf16 (p0, x0 + svcnth () * 3), -+ z0 = svld3 (p0, x0 + svcnth () * 3)) -+ -+/* -+** ld3_bf16_21: -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #21, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_bf16_21, svbfloat16x3_t, bfloat16_t, -+ z0 = svld3_bf16 (p0, x0 + svcnth () * 21), -+ z0 = svld3 (p0, x0 + svcnth () * 21)) -+ -+/* -+** ld3_bf16_24: -+** addvl (x[0-9]+), x0, #24 -+** ld3h {z0\.h - z2\.h}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld3_bf16_24, svbfloat16x3_t, bfloat16_t, -+ z0 = svld3_bf16 (p0, x0 + svcnth () * 24), -+ z0 = svld3 (p0, x0 + svcnth () * 24)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_bf16_m1: -+** decb x0 -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_bf16_m1, svbfloat16x3_t, bfloat16_t, -+ z0 = svld3_bf16 (p0, x0 - svcnth ()), -+ z0 = svld3 (p0, x0 - svcnth ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_bf16_m2: -+** decb x0, all, mul #2 -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_bf16_m2, svbfloat16x3_t, bfloat16_t, -+ z0 = svld3_bf16 (p0, x0 - svcnth () * 2), -+ z0 = svld3 (p0, x0 - svcnth () * 2)) -+ -+/* -+** ld3_bf16_m3: -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #-3, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_bf16_m3, svbfloat16x3_t, bfloat16_t, -+ z0 = svld3_bf16 (p0, x0 - svcnth () * 3), -+ z0 = svld3 (p0, x0 - svcnth () * 3)) -+ -+/* -+** ld3_bf16_m24: -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #-24, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_bf16_m24, svbfloat16x3_t, bfloat16_t, -+ z0 = svld3_bf16 (p0, x0 - svcnth () * 24), -+ z0 = svld3 (p0, x0 - svcnth () * 24)) -+ -+/* -+** ld3_bf16_m27: -+** addvl (x[0-9]+), x0, #-27 -+** ld3h {z0\.h - z2\.h}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld3_bf16_m27, svbfloat16x3_t, bfloat16_t, -+ z0 = svld3_bf16 (p0, x0 - svcnth () * 27), -+ z0 = svld3 (p0, x0 - svcnth () * 27)) -+ -+/* -+** ld3_vnum_bf16_0: -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_bf16_0, svbfloat16x3_t, bfloat16_t, -+ z0 = svld3_vnum_bf16 (p0, x0, 0), -+ z0 = svld3_vnum (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_vnum_bf16_1: -+** incb x0 -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_bf16_1, svbfloat16x3_t, bfloat16_t, -+ z0 = svld3_vnum_bf16 (p0, x0, 1), -+ z0 = svld3_vnum (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_vnum_bf16_2: -+** incb x0, all, mul #2 -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_bf16_2, svbfloat16x3_t, bfloat16_t, -+ z0 = svld3_vnum_bf16 (p0, x0, 2), -+ z0 = svld3_vnum (p0, x0, 2)) -+ -+/* -+** ld3_vnum_bf16_3: -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #3, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_bf16_3, svbfloat16x3_t, bfloat16_t, -+ z0 = svld3_vnum_bf16 (p0, x0, 3), -+ z0 = svld3_vnum (p0, x0, 3)) -+ -+/* -+** ld3_vnum_bf16_21: -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #21, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_bf16_21, svbfloat16x3_t, bfloat16_t, -+ z0 = svld3_vnum_bf16 (p0, x0, 21), -+ z0 = svld3_vnum (p0, x0, 21)) -+ -+/* -+** ld3_vnum_bf16_24: -+** addvl (x[0-9]+), x0, #24 -+** ld3h {z0\.h - z2\.h}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_bf16_24, svbfloat16x3_t, bfloat16_t, -+ z0 = svld3_vnum_bf16 (p0, x0, 24), -+ z0 = svld3_vnum (p0, x0, 24)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_vnum_bf16_m1: -+** decb x0 -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_bf16_m1, svbfloat16x3_t, bfloat16_t, -+ z0 = svld3_vnum_bf16 (p0, x0, -1), -+ z0 = svld3_vnum (p0, x0, -1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_vnum_bf16_m2: -+** decb x0, all, mul #2 -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_bf16_m2, svbfloat16x3_t, bfloat16_t, -+ z0 = svld3_vnum_bf16 (p0, x0, -2), -+ z0 = svld3_vnum (p0, x0, -2)) -+ -+/* -+** ld3_vnum_bf16_m3: -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #-3, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_bf16_m3, svbfloat16x3_t, bfloat16_t, -+ z0 = svld3_vnum_bf16 (p0, x0, -3), -+ z0 = svld3_vnum (p0, x0, -3)) -+ -+/* -+** ld3_vnum_bf16_m24: -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #-24, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_bf16_m24, svbfloat16x3_t, bfloat16_t, -+ z0 = svld3_vnum_bf16 (p0, x0, -24), -+ z0 = svld3_vnum (p0, x0, -24)) -+ -+/* -+** ld3_vnum_bf16_m27: -+** addvl (x[0-9]+), x0, #-27 -+** ld3h {z0\.h - z2\.h}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_bf16_m27, svbfloat16x3_t, bfloat16_t, -+ z0 = svld3_vnum_bf16 (p0, x0, -27), -+ z0 = svld3_vnum (p0, x0, -27)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld3_vnum_bf16_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld3h {z0\.h - z2\.h}, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_bf16_x1, svbfloat16x3_t, bfloat16_t, -+ z0 = svld3_vnum_bf16 (p0, x0, x1), -+ z0 = svld3_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_f16.c -new file mode 100644 -index 000000000..3d7777e52 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_f16.c -@@ -0,0 +1,242 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld3_f16_base: -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_f16_base, svfloat16x3_t, float16_t, -+ z0 = svld3_f16 (p0, x0), -+ z0 = svld3 (p0, x0)) -+ -+/* -+** ld3_f16_index: -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_LOAD (ld3_f16_index, svfloat16x3_t, float16_t, -+ z0 = svld3_f16 (p0, x0 + x1), -+ z0 = svld3 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_f16_1: -+** incb x0 -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_f16_1, svfloat16x3_t, float16_t, -+ z0 = svld3_f16 (p0, x0 + svcnth ()), -+ z0 = svld3 (p0, x0 + svcnth ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_f16_2: -+** incb x0, all, mul #2 -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_f16_2, svfloat16x3_t, float16_t, -+ z0 = svld3_f16 (p0, x0 + svcnth () * 2), -+ z0 = svld3 (p0, x0 + svcnth () * 2)) -+ -+/* -+** ld3_f16_3: -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #3, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_f16_3, svfloat16x3_t, float16_t, -+ z0 = svld3_f16 (p0, x0 + svcnth () * 3), -+ z0 = svld3 (p0, x0 + svcnth () * 3)) -+ -+/* -+** ld3_f16_21: -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #21, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_f16_21, svfloat16x3_t, float16_t, -+ z0 = svld3_f16 (p0, x0 + svcnth () * 21), -+ z0 = svld3 (p0, x0 + svcnth () * 21)) -+ -+/* -+** ld3_f16_24: -+** addvl (x[0-9]+), x0, #24 -+** ld3h {z0\.h - z2\.h}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld3_f16_24, svfloat16x3_t, float16_t, -+ z0 = svld3_f16 (p0, x0 + svcnth () * 24), -+ z0 = svld3 (p0, x0 + svcnth () * 24)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_f16_m1: -+** decb x0 -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_f16_m1, svfloat16x3_t, float16_t, -+ z0 = svld3_f16 (p0, x0 - svcnth ()), -+ z0 = svld3 (p0, x0 - svcnth ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_f16_m2: -+** decb x0, all, mul #2 -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_f16_m2, svfloat16x3_t, float16_t, -+ z0 = svld3_f16 (p0, x0 - svcnth () * 2), -+ z0 = svld3 (p0, x0 - svcnth () * 2)) -+ -+/* -+** ld3_f16_m3: -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #-3, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_f16_m3, svfloat16x3_t, float16_t, -+ z0 = svld3_f16 (p0, x0 - svcnth () * 3), -+ z0 = svld3 (p0, x0 - svcnth () * 3)) -+ -+/* -+** ld3_f16_m24: -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #-24, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_f16_m24, svfloat16x3_t, float16_t, -+ z0 = svld3_f16 (p0, x0 - svcnth () * 24), -+ z0 = svld3 (p0, x0 - svcnth () * 24)) -+ -+/* -+** ld3_f16_m27: -+** addvl (x[0-9]+), x0, #-27 -+** ld3h {z0\.h - z2\.h}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld3_f16_m27, svfloat16x3_t, float16_t, -+ z0 = svld3_f16 (p0, x0 - svcnth () * 27), -+ z0 = svld3 (p0, x0 - svcnth () * 27)) -+ -+/* -+** ld3_vnum_f16_0: -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_f16_0, svfloat16x3_t, float16_t, -+ z0 = svld3_vnum_f16 (p0, x0, 0), -+ z0 = svld3_vnum (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_vnum_f16_1: -+** incb x0 -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_f16_1, svfloat16x3_t, float16_t, -+ z0 = svld3_vnum_f16 (p0, x0, 1), -+ z0 = svld3_vnum (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_vnum_f16_2: -+** incb x0, all, mul #2 -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_f16_2, svfloat16x3_t, float16_t, -+ z0 = svld3_vnum_f16 (p0, x0, 2), -+ z0 = svld3_vnum (p0, x0, 2)) -+ -+/* -+** ld3_vnum_f16_3: -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #3, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_f16_3, svfloat16x3_t, float16_t, -+ z0 = svld3_vnum_f16 (p0, x0, 3), -+ z0 = svld3_vnum (p0, x0, 3)) -+ -+/* -+** ld3_vnum_f16_21: -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #21, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_f16_21, svfloat16x3_t, float16_t, -+ z0 = svld3_vnum_f16 (p0, x0, 21), -+ z0 = svld3_vnum (p0, x0, 21)) -+ -+/* -+** ld3_vnum_f16_24: -+** addvl (x[0-9]+), x0, #24 -+** ld3h {z0\.h - z2\.h}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_f16_24, svfloat16x3_t, float16_t, -+ z0 = svld3_vnum_f16 (p0, x0, 24), -+ z0 = svld3_vnum (p0, x0, 24)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_vnum_f16_m1: -+** decb x0 -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_f16_m1, svfloat16x3_t, float16_t, -+ z0 = svld3_vnum_f16 (p0, x0, -1), -+ z0 = svld3_vnum (p0, x0, -1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_vnum_f16_m2: -+** decb x0, all, mul #2 -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_f16_m2, svfloat16x3_t, float16_t, -+ z0 = svld3_vnum_f16 (p0, x0, -2), -+ z0 = svld3_vnum (p0, x0, -2)) -+ -+/* -+** ld3_vnum_f16_m3: -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #-3, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_f16_m3, svfloat16x3_t, float16_t, -+ z0 = svld3_vnum_f16 (p0, x0, -3), -+ z0 = svld3_vnum (p0, x0, -3)) -+ -+/* -+** ld3_vnum_f16_m24: -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #-24, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_f16_m24, svfloat16x3_t, float16_t, -+ z0 = svld3_vnum_f16 (p0, x0, -24), -+ z0 = svld3_vnum (p0, x0, -24)) -+ -+/* -+** ld3_vnum_f16_m27: -+** addvl (x[0-9]+), x0, #-27 -+** ld3h {z0\.h - z2\.h}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_f16_m27, svfloat16x3_t, float16_t, -+ z0 = svld3_vnum_f16 (p0, x0, -27), -+ z0 = svld3_vnum (p0, x0, -27)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld3_vnum_f16_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld3h {z0\.h - z2\.h}, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_f16_x1, svfloat16x3_t, float16_t, -+ z0 = svld3_vnum_f16 (p0, x0, x1), -+ z0 = svld3_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_f32.c -new file mode 100644 -index 000000000..4e4ad7521 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_f32.c -@@ -0,0 +1,242 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld3_f32_base: -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_f32_base, svfloat32x3_t, float32_t, -+ z0 = svld3_f32 (p0, x0), -+ z0 = svld3 (p0, x0)) -+ -+/* -+** ld3_f32_index: -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_LOAD (ld3_f32_index, svfloat32x3_t, float32_t, -+ z0 = svld3_f32 (p0, x0 + x1), -+ z0 = svld3 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_f32_1: -+** incb x0 -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_f32_1, svfloat32x3_t, float32_t, -+ z0 = svld3_f32 (p0, x0 + svcntw ()), -+ z0 = svld3 (p0, x0 + svcntw ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_f32_2: -+** incb x0, all, mul #2 -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_f32_2, svfloat32x3_t, float32_t, -+ z0 = svld3_f32 (p0, x0 + svcntw () * 2), -+ z0 = svld3 (p0, x0 + svcntw () * 2)) -+ -+/* -+** ld3_f32_3: -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0, #3, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_f32_3, svfloat32x3_t, float32_t, -+ z0 = svld3_f32 (p0, x0 + svcntw () * 3), -+ z0 = svld3 (p0, x0 + svcntw () * 3)) -+ -+/* -+** ld3_f32_21: -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0, #21, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_f32_21, svfloat32x3_t, float32_t, -+ z0 = svld3_f32 (p0, x0 + svcntw () * 21), -+ z0 = svld3 (p0, x0 + svcntw () * 21)) -+ -+/* -+** ld3_f32_24: -+** addvl (x[0-9]+), x0, #24 -+** ld3w {z0\.s - z2\.s}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld3_f32_24, svfloat32x3_t, float32_t, -+ z0 = svld3_f32 (p0, x0 + svcntw () * 24), -+ z0 = svld3 (p0, x0 + svcntw () * 24)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_f32_m1: -+** decb x0 -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_f32_m1, svfloat32x3_t, float32_t, -+ z0 = svld3_f32 (p0, x0 - svcntw ()), -+ z0 = svld3 (p0, x0 - svcntw ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_f32_m2: -+** decb x0, all, mul #2 -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_f32_m2, svfloat32x3_t, float32_t, -+ z0 = svld3_f32 (p0, x0 - svcntw () * 2), -+ z0 = svld3 (p0, x0 - svcntw () * 2)) -+ -+/* -+** ld3_f32_m3: -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0, #-3, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_f32_m3, svfloat32x3_t, float32_t, -+ z0 = svld3_f32 (p0, x0 - svcntw () * 3), -+ z0 = svld3 (p0, x0 - svcntw () * 3)) -+ -+/* -+** ld3_f32_m24: -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0, #-24, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_f32_m24, svfloat32x3_t, float32_t, -+ z0 = svld3_f32 (p0, x0 - svcntw () * 24), -+ z0 = svld3 (p0, x0 - svcntw () * 24)) -+ -+/* -+** ld3_f32_m27: -+** addvl (x[0-9]+), x0, #-27 -+** ld3w {z0\.s - z2\.s}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld3_f32_m27, svfloat32x3_t, float32_t, -+ z0 = svld3_f32 (p0, x0 - svcntw () * 27), -+ z0 = svld3 (p0, x0 - svcntw () * 27)) -+ -+/* -+** ld3_vnum_f32_0: -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_f32_0, svfloat32x3_t, float32_t, -+ z0 = svld3_vnum_f32 (p0, x0, 0), -+ z0 = svld3_vnum (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_vnum_f32_1: -+** incb x0 -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_f32_1, svfloat32x3_t, float32_t, -+ z0 = svld3_vnum_f32 (p0, x0, 1), -+ z0 = svld3_vnum (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_vnum_f32_2: -+** incb x0, all, mul #2 -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_f32_2, svfloat32x3_t, float32_t, -+ z0 = svld3_vnum_f32 (p0, x0, 2), -+ z0 = svld3_vnum (p0, x0, 2)) -+ -+/* -+** ld3_vnum_f32_3: -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0, #3, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_f32_3, svfloat32x3_t, float32_t, -+ z0 = svld3_vnum_f32 (p0, x0, 3), -+ z0 = svld3_vnum (p0, x0, 3)) -+ -+/* -+** ld3_vnum_f32_21: -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0, #21, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_f32_21, svfloat32x3_t, float32_t, -+ z0 = svld3_vnum_f32 (p0, x0, 21), -+ z0 = svld3_vnum (p0, x0, 21)) -+ -+/* -+** ld3_vnum_f32_24: -+** addvl (x[0-9]+), x0, #24 -+** ld3w {z0\.s - z2\.s}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_f32_24, svfloat32x3_t, float32_t, -+ z0 = svld3_vnum_f32 (p0, x0, 24), -+ z0 = svld3_vnum (p0, x0, 24)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_vnum_f32_m1: -+** decb x0 -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_f32_m1, svfloat32x3_t, float32_t, -+ z0 = svld3_vnum_f32 (p0, x0, -1), -+ z0 = svld3_vnum (p0, x0, -1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_vnum_f32_m2: -+** decb x0, all, mul #2 -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_f32_m2, svfloat32x3_t, float32_t, -+ z0 = svld3_vnum_f32 (p0, x0, -2), -+ z0 = svld3_vnum (p0, x0, -2)) -+ -+/* -+** ld3_vnum_f32_m3: -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0, #-3, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_f32_m3, svfloat32x3_t, float32_t, -+ z0 = svld3_vnum_f32 (p0, x0, -3), -+ z0 = svld3_vnum (p0, x0, -3)) -+ -+/* -+** ld3_vnum_f32_m24: -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0, #-24, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_f32_m24, svfloat32x3_t, float32_t, -+ z0 = svld3_vnum_f32 (p0, x0, -24), -+ z0 = svld3_vnum (p0, x0, -24)) -+ -+/* -+** ld3_vnum_f32_m27: -+** addvl (x[0-9]+), x0, #-27 -+** ld3w {z0\.s - z2\.s}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_f32_m27, svfloat32x3_t, float32_t, -+ z0 = svld3_vnum_f32 (p0, x0, -27), -+ z0 = svld3_vnum (p0, x0, -27)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld3_vnum_f32_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld3w {z0\.s - z2\.s}, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_f32_x1, svfloat32x3_t, float32_t, -+ z0 = svld3_vnum_f32 (p0, x0, x1), -+ z0 = svld3_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_f64.c -new file mode 100644 -index 000000000..7e6e1e749 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_f64.c -@@ -0,0 +1,242 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld3_f64_base: -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_f64_base, svfloat64x3_t, float64_t, -+ z0 = svld3_f64 (p0, x0), -+ z0 = svld3 (p0, x0)) -+ -+/* -+** ld3_f64_index: -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0, x1, lsl 3\] -+** ret -+*/ -+TEST_LOAD (ld3_f64_index, svfloat64x3_t, float64_t, -+ z0 = svld3_f64 (p0, x0 + x1), -+ z0 = svld3 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_f64_1: -+** incb x0 -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_f64_1, svfloat64x3_t, float64_t, -+ z0 = svld3_f64 (p0, x0 + svcntd ()), -+ z0 = svld3 (p0, x0 + svcntd ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_f64_2: -+** incb x0, all, mul #2 -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_f64_2, svfloat64x3_t, float64_t, -+ z0 = svld3_f64 (p0, x0 + svcntd () * 2), -+ z0 = svld3 (p0, x0 + svcntd () * 2)) -+ -+/* -+** ld3_f64_3: -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0, #3, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_f64_3, svfloat64x3_t, float64_t, -+ z0 = svld3_f64 (p0, x0 + svcntd () * 3), -+ z0 = svld3 (p0, x0 + svcntd () * 3)) -+ -+/* -+** ld3_f64_21: -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0, #21, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_f64_21, svfloat64x3_t, float64_t, -+ z0 = svld3_f64 (p0, x0 + svcntd () * 21), -+ z0 = svld3 (p0, x0 + svcntd () * 21)) -+ -+/* -+** ld3_f64_24: -+** addvl (x[0-9]+), x0, #24 -+** ld3d {z0\.d - z2\.d}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld3_f64_24, svfloat64x3_t, float64_t, -+ z0 = svld3_f64 (p0, x0 + svcntd () * 24), -+ z0 = svld3 (p0, x0 + svcntd () * 24)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_f64_m1: -+** decb x0 -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_f64_m1, svfloat64x3_t, float64_t, -+ z0 = svld3_f64 (p0, x0 - svcntd ()), -+ z0 = svld3 (p0, x0 - svcntd ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_f64_m2: -+** decb x0, all, mul #2 -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_f64_m2, svfloat64x3_t, float64_t, -+ z0 = svld3_f64 (p0, x0 - svcntd () * 2), -+ z0 = svld3 (p0, x0 - svcntd () * 2)) -+ -+/* -+** ld3_f64_m3: -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0, #-3, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_f64_m3, svfloat64x3_t, float64_t, -+ z0 = svld3_f64 (p0, x0 - svcntd () * 3), -+ z0 = svld3 (p0, x0 - svcntd () * 3)) -+ -+/* -+** ld3_f64_m24: -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0, #-24, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_f64_m24, svfloat64x3_t, float64_t, -+ z0 = svld3_f64 (p0, x0 - svcntd () * 24), -+ z0 = svld3 (p0, x0 - svcntd () * 24)) -+ -+/* -+** ld3_f64_m27: -+** addvl (x[0-9]+), x0, #-27 -+** ld3d {z0\.d - z2\.d}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld3_f64_m27, svfloat64x3_t, float64_t, -+ z0 = svld3_f64 (p0, x0 - svcntd () * 27), -+ z0 = svld3 (p0, x0 - svcntd () * 27)) -+ -+/* -+** ld3_vnum_f64_0: -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_f64_0, svfloat64x3_t, float64_t, -+ z0 = svld3_vnum_f64 (p0, x0, 0), -+ z0 = svld3_vnum (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_vnum_f64_1: -+** incb x0 -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_f64_1, svfloat64x3_t, float64_t, -+ z0 = svld3_vnum_f64 (p0, x0, 1), -+ z0 = svld3_vnum (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_vnum_f64_2: -+** incb x0, all, mul #2 -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_f64_2, svfloat64x3_t, float64_t, -+ z0 = svld3_vnum_f64 (p0, x0, 2), -+ z0 = svld3_vnum (p0, x0, 2)) -+ -+/* -+** ld3_vnum_f64_3: -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0, #3, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_f64_3, svfloat64x3_t, float64_t, -+ z0 = svld3_vnum_f64 (p0, x0, 3), -+ z0 = svld3_vnum (p0, x0, 3)) -+ -+/* -+** ld3_vnum_f64_21: -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0, #21, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_f64_21, svfloat64x3_t, float64_t, -+ z0 = svld3_vnum_f64 (p0, x0, 21), -+ z0 = svld3_vnum (p0, x0, 21)) -+ -+/* -+** ld3_vnum_f64_24: -+** addvl (x[0-9]+), x0, #24 -+** ld3d {z0\.d - z2\.d}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_f64_24, svfloat64x3_t, float64_t, -+ z0 = svld3_vnum_f64 (p0, x0, 24), -+ z0 = svld3_vnum (p0, x0, 24)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_vnum_f64_m1: -+** decb x0 -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_f64_m1, svfloat64x3_t, float64_t, -+ z0 = svld3_vnum_f64 (p0, x0, -1), -+ z0 = svld3_vnum (p0, x0, -1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_vnum_f64_m2: -+** decb x0, all, mul #2 -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_f64_m2, svfloat64x3_t, float64_t, -+ z0 = svld3_vnum_f64 (p0, x0, -2), -+ z0 = svld3_vnum (p0, x0, -2)) -+ -+/* -+** ld3_vnum_f64_m3: -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0, #-3, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_f64_m3, svfloat64x3_t, float64_t, -+ z0 = svld3_vnum_f64 (p0, x0, -3), -+ z0 = svld3_vnum (p0, x0, -3)) -+ -+/* -+** ld3_vnum_f64_m24: -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0, #-24, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_f64_m24, svfloat64x3_t, float64_t, -+ z0 = svld3_vnum_f64 (p0, x0, -24), -+ z0 = svld3_vnum (p0, x0, -24)) -+ -+/* -+** ld3_vnum_f64_m27: -+** addvl (x[0-9]+), x0, #-27 -+** ld3d {z0\.d - z2\.d}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_f64_m27, svfloat64x3_t, float64_t, -+ z0 = svld3_vnum_f64 (p0, x0, -27), -+ z0 = svld3_vnum (p0, x0, -27)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld3_vnum_f64_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld3d {z0\.d - z2\.d}, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_f64_x1, svfloat64x3_t, float64_t, -+ z0 = svld3_vnum_f64 (p0, x0, x1), -+ z0 = svld3_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_s16.c -new file mode 100644 -index 000000000..d4a046c64 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_s16.c -@@ -0,0 +1,242 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld3_s16_base: -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_s16_base, svint16x3_t, int16_t, -+ z0 = svld3_s16 (p0, x0), -+ z0 = svld3 (p0, x0)) -+ -+/* -+** ld3_s16_index: -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_LOAD (ld3_s16_index, svint16x3_t, int16_t, -+ z0 = svld3_s16 (p0, x0 + x1), -+ z0 = svld3 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_s16_1: -+** incb x0 -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_s16_1, svint16x3_t, int16_t, -+ z0 = svld3_s16 (p0, x0 + svcnth ()), -+ z0 = svld3 (p0, x0 + svcnth ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_s16_2: -+** incb x0, all, mul #2 -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_s16_2, svint16x3_t, int16_t, -+ z0 = svld3_s16 (p0, x0 + svcnth () * 2), -+ z0 = svld3 (p0, x0 + svcnth () * 2)) -+ -+/* -+** ld3_s16_3: -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #3, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_s16_3, svint16x3_t, int16_t, -+ z0 = svld3_s16 (p0, x0 + svcnth () * 3), -+ z0 = svld3 (p0, x0 + svcnth () * 3)) -+ -+/* -+** ld3_s16_21: -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #21, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_s16_21, svint16x3_t, int16_t, -+ z0 = svld3_s16 (p0, x0 + svcnth () * 21), -+ z0 = svld3 (p0, x0 + svcnth () * 21)) -+ -+/* -+** ld3_s16_24: -+** addvl (x[0-9]+), x0, #24 -+** ld3h {z0\.h - z2\.h}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld3_s16_24, svint16x3_t, int16_t, -+ z0 = svld3_s16 (p0, x0 + svcnth () * 24), -+ z0 = svld3 (p0, x0 + svcnth () * 24)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_s16_m1: -+** decb x0 -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_s16_m1, svint16x3_t, int16_t, -+ z0 = svld3_s16 (p0, x0 - svcnth ()), -+ z0 = svld3 (p0, x0 - svcnth ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_s16_m2: -+** decb x0, all, mul #2 -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_s16_m2, svint16x3_t, int16_t, -+ z0 = svld3_s16 (p0, x0 - svcnth () * 2), -+ z0 = svld3 (p0, x0 - svcnth () * 2)) -+ -+/* -+** ld3_s16_m3: -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #-3, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_s16_m3, svint16x3_t, int16_t, -+ z0 = svld3_s16 (p0, x0 - svcnth () * 3), -+ z0 = svld3 (p0, x0 - svcnth () * 3)) -+ -+/* -+** ld3_s16_m24: -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #-24, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_s16_m24, svint16x3_t, int16_t, -+ z0 = svld3_s16 (p0, x0 - svcnth () * 24), -+ z0 = svld3 (p0, x0 - svcnth () * 24)) -+ -+/* -+** ld3_s16_m27: -+** addvl (x[0-9]+), x0, #-27 -+** ld3h {z0\.h - z2\.h}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld3_s16_m27, svint16x3_t, int16_t, -+ z0 = svld3_s16 (p0, x0 - svcnth () * 27), -+ z0 = svld3 (p0, x0 - svcnth () * 27)) -+ -+/* -+** ld3_vnum_s16_0: -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_s16_0, svint16x3_t, int16_t, -+ z0 = svld3_vnum_s16 (p0, x0, 0), -+ z0 = svld3_vnum (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_vnum_s16_1: -+** incb x0 -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_s16_1, svint16x3_t, int16_t, -+ z0 = svld3_vnum_s16 (p0, x0, 1), -+ z0 = svld3_vnum (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_vnum_s16_2: -+** incb x0, all, mul #2 -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_s16_2, svint16x3_t, int16_t, -+ z0 = svld3_vnum_s16 (p0, x0, 2), -+ z0 = svld3_vnum (p0, x0, 2)) -+ -+/* -+** ld3_vnum_s16_3: -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #3, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_s16_3, svint16x3_t, int16_t, -+ z0 = svld3_vnum_s16 (p0, x0, 3), -+ z0 = svld3_vnum (p0, x0, 3)) -+ -+/* -+** ld3_vnum_s16_21: -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #21, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_s16_21, svint16x3_t, int16_t, -+ z0 = svld3_vnum_s16 (p0, x0, 21), -+ z0 = svld3_vnum (p0, x0, 21)) -+ -+/* -+** ld3_vnum_s16_24: -+** addvl (x[0-9]+), x0, #24 -+** ld3h {z0\.h - z2\.h}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_s16_24, svint16x3_t, int16_t, -+ z0 = svld3_vnum_s16 (p0, x0, 24), -+ z0 = svld3_vnum (p0, x0, 24)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_vnum_s16_m1: -+** decb x0 -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_s16_m1, svint16x3_t, int16_t, -+ z0 = svld3_vnum_s16 (p0, x0, -1), -+ z0 = svld3_vnum (p0, x0, -1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_vnum_s16_m2: -+** decb x0, all, mul #2 -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_s16_m2, svint16x3_t, int16_t, -+ z0 = svld3_vnum_s16 (p0, x0, -2), -+ z0 = svld3_vnum (p0, x0, -2)) -+ -+/* -+** ld3_vnum_s16_m3: -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #-3, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_s16_m3, svint16x3_t, int16_t, -+ z0 = svld3_vnum_s16 (p0, x0, -3), -+ z0 = svld3_vnum (p0, x0, -3)) -+ -+/* -+** ld3_vnum_s16_m24: -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #-24, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_s16_m24, svint16x3_t, int16_t, -+ z0 = svld3_vnum_s16 (p0, x0, -24), -+ z0 = svld3_vnum (p0, x0, -24)) -+ -+/* -+** ld3_vnum_s16_m27: -+** addvl (x[0-9]+), x0, #-27 -+** ld3h {z0\.h - z2\.h}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_s16_m27, svint16x3_t, int16_t, -+ z0 = svld3_vnum_s16 (p0, x0, -27), -+ z0 = svld3_vnum (p0, x0, -27)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld3_vnum_s16_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld3h {z0\.h - z2\.h}, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_s16_x1, svint16x3_t, int16_t, -+ z0 = svld3_vnum_s16 (p0, x0, x1), -+ z0 = svld3_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_s32.c -new file mode 100644 -index 000000000..3b0ba6e2a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_s32.c -@@ -0,0 +1,242 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld3_s32_base: -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_s32_base, svint32x3_t, int32_t, -+ z0 = svld3_s32 (p0, x0), -+ z0 = svld3 (p0, x0)) -+ -+/* -+** ld3_s32_index: -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_LOAD (ld3_s32_index, svint32x3_t, int32_t, -+ z0 = svld3_s32 (p0, x0 + x1), -+ z0 = svld3 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_s32_1: -+** incb x0 -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_s32_1, svint32x3_t, int32_t, -+ z0 = svld3_s32 (p0, x0 + svcntw ()), -+ z0 = svld3 (p0, x0 + svcntw ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_s32_2: -+** incb x0, all, mul #2 -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_s32_2, svint32x3_t, int32_t, -+ z0 = svld3_s32 (p0, x0 + svcntw () * 2), -+ z0 = svld3 (p0, x0 + svcntw () * 2)) -+ -+/* -+** ld3_s32_3: -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0, #3, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_s32_3, svint32x3_t, int32_t, -+ z0 = svld3_s32 (p0, x0 + svcntw () * 3), -+ z0 = svld3 (p0, x0 + svcntw () * 3)) -+ -+/* -+** ld3_s32_21: -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0, #21, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_s32_21, svint32x3_t, int32_t, -+ z0 = svld3_s32 (p0, x0 + svcntw () * 21), -+ z0 = svld3 (p0, x0 + svcntw () * 21)) -+ -+/* -+** ld3_s32_24: -+** addvl (x[0-9]+), x0, #24 -+** ld3w {z0\.s - z2\.s}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld3_s32_24, svint32x3_t, int32_t, -+ z0 = svld3_s32 (p0, x0 + svcntw () * 24), -+ z0 = svld3 (p0, x0 + svcntw () * 24)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_s32_m1: -+** decb x0 -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_s32_m1, svint32x3_t, int32_t, -+ z0 = svld3_s32 (p0, x0 - svcntw ()), -+ z0 = svld3 (p0, x0 - svcntw ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_s32_m2: -+** decb x0, all, mul #2 -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_s32_m2, svint32x3_t, int32_t, -+ z0 = svld3_s32 (p0, x0 - svcntw () * 2), -+ z0 = svld3 (p0, x0 - svcntw () * 2)) -+ -+/* -+** ld3_s32_m3: -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0, #-3, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_s32_m3, svint32x3_t, int32_t, -+ z0 = svld3_s32 (p0, x0 - svcntw () * 3), -+ z0 = svld3 (p0, x0 - svcntw () * 3)) -+ -+/* -+** ld3_s32_m24: -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0, #-24, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_s32_m24, svint32x3_t, int32_t, -+ z0 = svld3_s32 (p0, x0 - svcntw () * 24), -+ z0 = svld3 (p0, x0 - svcntw () * 24)) -+ -+/* -+** ld3_s32_m27: -+** addvl (x[0-9]+), x0, #-27 -+** ld3w {z0\.s - z2\.s}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld3_s32_m27, svint32x3_t, int32_t, -+ z0 = svld3_s32 (p0, x0 - svcntw () * 27), -+ z0 = svld3 (p0, x0 - svcntw () * 27)) -+ -+/* -+** ld3_vnum_s32_0: -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_s32_0, svint32x3_t, int32_t, -+ z0 = svld3_vnum_s32 (p0, x0, 0), -+ z0 = svld3_vnum (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_vnum_s32_1: -+** incb x0 -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_s32_1, svint32x3_t, int32_t, -+ z0 = svld3_vnum_s32 (p0, x0, 1), -+ z0 = svld3_vnum (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_vnum_s32_2: -+** incb x0, all, mul #2 -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_s32_2, svint32x3_t, int32_t, -+ z0 = svld3_vnum_s32 (p0, x0, 2), -+ z0 = svld3_vnum (p0, x0, 2)) -+ -+/* -+** ld3_vnum_s32_3: -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0, #3, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_s32_3, svint32x3_t, int32_t, -+ z0 = svld3_vnum_s32 (p0, x0, 3), -+ z0 = svld3_vnum (p0, x0, 3)) -+ -+/* -+** ld3_vnum_s32_21: -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0, #21, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_s32_21, svint32x3_t, int32_t, -+ z0 = svld3_vnum_s32 (p0, x0, 21), -+ z0 = svld3_vnum (p0, x0, 21)) -+ -+/* -+** ld3_vnum_s32_24: -+** addvl (x[0-9]+), x0, #24 -+** ld3w {z0\.s - z2\.s}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_s32_24, svint32x3_t, int32_t, -+ z0 = svld3_vnum_s32 (p0, x0, 24), -+ z0 = svld3_vnum (p0, x0, 24)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_vnum_s32_m1: -+** decb x0 -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_s32_m1, svint32x3_t, int32_t, -+ z0 = svld3_vnum_s32 (p0, x0, -1), -+ z0 = svld3_vnum (p0, x0, -1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_vnum_s32_m2: -+** decb x0, all, mul #2 -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_s32_m2, svint32x3_t, int32_t, -+ z0 = svld3_vnum_s32 (p0, x0, -2), -+ z0 = svld3_vnum (p0, x0, -2)) -+ -+/* -+** ld3_vnum_s32_m3: -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0, #-3, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_s32_m3, svint32x3_t, int32_t, -+ z0 = svld3_vnum_s32 (p0, x0, -3), -+ z0 = svld3_vnum (p0, x0, -3)) -+ -+/* -+** ld3_vnum_s32_m24: -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0, #-24, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_s32_m24, svint32x3_t, int32_t, -+ z0 = svld3_vnum_s32 (p0, x0, -24), -+ z0 = svld3_vnum (p0, x0, -24)) -+ -+/* -+** ld3_vnum_s32_m27: -+** addvl (x[0-9]+), x0, #-27 -+** ld3w {z0\.s - z2\.s}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_s32_m27, svint32x3_t, int32_t, -+ z0 = svld3_vnum_s32 (p0, x0, -27), -+ z0 = svld3_vnum (p0, x0, -27)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld3_vnum_s32_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld3w {z0\.s - z2\.s}, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_s32_x1, svint32x3_t, int32_t, -+ z0 = svld3_vnum_s32 (p0, x0, x1), -+ z0 = svld3_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_s64.c -new file mode 100644 -index 000000000..080a10b8f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_s64.c -@@ -0,0 +1,242 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld3_s64_base: -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_s64_base, svint64x3_t, int64_t, -+ z0 = svld3_s64 (p0, x0), -+ z0 = svld3 (p0, x0)) -+ -+/* -+** ld3_s64_index: -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0, x1, lsl 3\] -+** ret -+*/ -+TEST_LOAD (ld3_s64_index, svint64x3_t, int64_t, -+ z0 = svld3_s64 (p0, x0 + x1), -+ z0 = svld3 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_s64_1: -+** incb x0 -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_s64_1, svint64x3_t, int64_t, -+ z0 = svld3_s64 (p0, x0 + svcntd ()), -+ z0 = svld3 (p0, x0 + svcntd ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_s64_2: -+** incb x0, all, mul #2 -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_s64_2, svint64x3_t, int64_t, -+ z0 = svld3_s64 (p0, x0 + svcntd () * 2), -+ z0 = svld3 (p0, x0 + svcntd () * 2)) -+ -+/* -+** ld3_s64_3: -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0, #3, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_s64_3, svint64x3_t, int64_t, -+ z0 = svld3_s64 (p0, x0 + svcntd () * 3), -+ z0 = svld3 (p0, x0 + svcntd () * 3)) -+ -+/* -+** ld3_s64_21: -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0, #21, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_s64_21, svint64x3_t, int64_t, -+ z0 = svld3_s64 (p0, x0 + svcntd () * 21), -+ z0 = svld3 (p0, x0 + svcntd () * 21)) -+ -+/* -+** ld3_s64_24: -+** addvl (x[0-9]+), x0, #24 -+** ld3d {z0\.d - z2\.d}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld3_s64_24, svint64x3_t, int64_t, -+ z0 = svld3_s64 (p0, x0 + svcntd () * 24), -+ z0 = svld3 (p0, x0 + svcntd () * 24)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_s64_m1: -+** decb x0 -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_s64_m1, svint64x3_t, int64_t, -+ z0 = svld3_s64 (p0, x0 - svcntd ()), -+ z0 = svld3 (p0, x0 - svcntd ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_s64_m2: -+** decb x0, all, mul #2 -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_s64_m2, svint64x3_t, int64_t, -+ z0 = svld3_s64 (p0, x0 - svcntd () * 2), -+ z0 = svld3 (p0, x0 - svcntd () * 2)) -+ -+/* -+** ld3_s64_m3: -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0, #-3, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_s64_m3, svint64x3_t, int64_t, -+ z0 = svld3_s64 (p0, x0 - svcntd () * 3), -+ z0 = svld3 (p0, x0 - svcntd () * 3)) -+ -+/* -+** ld3_s64_m24: -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0, #-24, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_s64_m24, svint64x3_t, int64_t, -+ z0 = svld3_s64 (p0, x0 - svcntd () * 24), -+ z0 = svld3 (p0, x0 - svcntd () * 24)) -+ -+/* -+** ld3_s64_m27: -+** addvl (x[0-9]+), x0, #-27 -+** ld3d {z0\.d - z2\.d}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld3_s64_m27, svint64x3_t, int64_t, -+ z0 = svld3_s64 (p0, x0 - svcntd () * 27), -+ z0 = svld3 (p0, x0 - svcntd () * 27)) -+ -+/* -+** ld3_vnum_s64_0: -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_s64_0, svint64x3_t, int64_t, -+ z0 = svld3_vnum_s64 (p0, x0, 0), -+ z0 = svld3_vnum (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_vnum_s64_1: -+** incb x0 -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_s64_1, svint64x3_t, int64_t, -+ z0 = svld3_vnum_s64 (p0, x0, 1), -+ z0 = svld3_vnum (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_vnum_s64_2: -+** incb x0, all, mul #2 -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_s64_2, svint64x3_t, int64_t, -+ z0 = svld3_vnum_s64 (p0, x0, 2), -+ z0 = svld3_vnum (p0, x0, 2)) -+ -+/* -+** ld3_vnum_s64_3: -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0, #3, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_s64_3, svint64x3_t, int64_t, -+ z0 = svld3_vnum_s64 (p0, x0, 3), -+ z0 = svld3_vnum (p0, x0, 3)) -+ -+/* -+** ld3_vnum_s64_21: -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0, #21, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_s64_21, svint64x3_t, int64_t, -+ z0 = svld3_vnum_s64 (p0, x0, 21), -+ z0 = svld3_vnum (p0, x0, 21)) -+ -+/* -+** ld3_vnum_s64_24: -+** addvl (x[0-9]+), x0, #24 -+** ld3d {z0\.d - z2\.d}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_s64_24, svint64x3_t, int64_t, -+ z0 = svld3_vnum_s64 (p0, x0, 24), -+ z0 = svld3_vnum (p0, x0, 24)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_vnum_s64_m1: -+** decb x0 -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_s64_m1, svint64x3_t, int64_t, -+ z0 = svld3_vnum_s64 (p0, x0, -1), -+ z0 = svld3_vnum (p0, x0, -1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_vnum_s64_m2: -+** decb x0, all, mul #2 -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_s64_m2, svint64x3_t, int64_t, -+ z0 = svld3_vnum_s64 (p0, x0, -2), -+ z0 = svld3_vnum (p0, x0, -2)) -+ -+/* -+** ld3_vnum_s64_m3: -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0, #-3, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_s64_m3, svint64x3_t, int64_t, -+ z0 = svld3_vnum_s64 (p0, x0, -3), -+ z0 = svld3_vnum (p0, x0, -3)) -+ -+/* -+** ld3_vnum_s64_m24: -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0, #-24, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_s64_m24, svint64x3_t, int64_t, -+ z0 = svld3_vnum_s64 (p0, x0, -24), -+ z0 = svld3_vnum (p0, x0, -24)) -+ -+/* -+** ld3_vnum_s64_m27: -+** addvl (x[0-9]+), x0, #-27 -+** ld3d {z0\.d - z2\.d}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_s64_m27, svint64x3_t, int64_t, -+ z0 = svld3_vnum_s64 (p0, x0, -27), -+ z0 = svld3_vnum (p0, x0, -27)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld3_vnum_s64_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld3d {z0\.d - z2\.d}, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_s64_x1, svint64x3_t, int64_t, -+ z0 = svld3_vnum_s64 (p0, x0, x1), -+ z0 = svld3_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_s8.c -new file mode 100644 -index 000000000..e0c551472 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_s8.c -@@ -0,0 +1,246 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld3_s8_base: -+** ld3b {z0\.b - z2\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_s8_base, svint8x3_t, int8_t, -+ z0 = svld3_s8 (p0, x0), -+ z0 = svld3 (p0, x0)) -+ -+/* -+** ld3_s8_index: -+** ld3b {z0\.b - z2\.b}, p0/z, \[x0, x1\] -+** ret -+*/ -+TEST_LOAD (ld3_s8_index, svint8x3_t, int8_t, -+ z0 = svld3_s8 (p0, x0 + x1), -+ z0 = svld3 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_s8_1: -+** incb x0 -+** ld3b {z0\.b - z2\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_s8_1, svint8x3_t, int8_t, -+ z0 = svld3_s8 (p0, x0 + svcntb ()), -+ z0 = svld3 (p0, x0 + svcntb ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_s8_2: -+** incb x0, all, mul #2 -+** ld3b {z0\.b - z2\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_s8_2, svint8x3_t, int8_t, -+ z0 = svld3_s8 (p0, x0 + svcntb () * 2), -+ z0 = svld3 (p0, x0 + svcntb () * 2)) -+ -+/* -+** ld3_s8_3: -+** ld3b {z0\.b - z2\.b}, p0/z, \[x0, #3, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_s8_3, svint8x3_t, int8_t, -+ z0 = svld3_s8 (p0, x0 + svcntb () * 3), -+ z0 = svld3 (p0, x0 + svcntb () * 3)) -+ -+/* -+** ld3_s8_21: -+** ld3b {z0\.b - z2\.b}, p0/z, \[x0, #21, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_s8_21, svint8x3_t, int8_t, -+ z0 = svld3_s8 (p0, x0 + svcntb () * 21), -+ z0 = svld3 (p0, x0 + svcntb () * 21)) -+ -+/* -+** ld3_s8_24: -+** addvl (x[0-9]+), x0, #24 -+** ld3b {z0\.b - z2\.b}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld3_s8_24, svint8x3_t, int8_t, -+ z0 = svld3_s8 (p0, x0 + svcntb () * 24), -+ z0 = svld3 (p0, x0 + svcntb () * 24)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_s8_m1: -+** decb x0 -+** ld3b {z0\.b - z2\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_s8_m1, svint8x3_t, int8_t, -+ z0 = svld3_s8 (p0, x0 - svcntb ()), -+ z0 = svld3 (p0, x0 - svcntb ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_s8_m2: -+** decb x0, all, mul #2 -+** ld3b {z0\.b - z2\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_s8_m2, svint8x3_t, int8_t, -+ z0 = svld3_s8 (p0, x0 - svcntb () * 2), -+ z0 = svld3 (p0, x0 - svcntb () * 2)) -+ -+/* -+** ld3_s8_m3: -+** ld3b {z0\.b - z2\.b}, p0/z, \[x0, #-3, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_s8_m3, svint8x3_t, int8_t, -+ z0 = svld3_s8 (p0, x0 - svcntb () * 3), -+ z0 = svld3 (p0, x0 - svcntb () * 3)) -+ -+/* -+** ld3_s8_m24: -+** ld3b {z0\.b - z2\.b}, p0/z, \[x0, #-24, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_s8_m24, svint8x3_t, int8_t, -+ z0 = svld3_s8 (p0, x0 - svcntb () * 24), -+ z0 = svld3 (p0, x0 - svcntb () * 24)) -+ -+/* -+** ld3_s8_m27: -+** addvl (x[0-9]+), x0, #-27 -+** ld3b {z0\.b - z2\.b}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld3_s8_m27, svint8x3_t, int8_t, -+ z0 = svld3_s8 (p0, x0 - svcntb () * 27), -+ z0 = svld3 (p0, x0 - svcntb () * 27)) -+ -+/* -+** ld3_vnum_s8_0: -+** ld3b {z0\.b - z2\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_s8_0, svint8x3_t, int8_t, -+ z0 = svld3_vnum_s8 (p0, x0, 0), -+ z0 = svld3_vnum (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_vnum_s8_1: -+** incb x0 -+** ld3b {z0\.b - z2\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_s8_1, svint8x3_t, int8_t, -+ z0 = svld3_vnum_s8 (p0, x0, 1), -+ z0 = svld3_vnum (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_vnum_s8_2: -+** incb x0, all, mul #2 -+** ld3b {z0\.b - z2\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_s8_2, svint8x3_t, int8_t, -+ z0 = svld3_vnum_s8 (p0, x0, 2), -+ z0 = svld3_vnum (p0, x0, 2)) -+ -+/* -+** ld3_vnum_s8_3: -+** ld3b {z0\.b - z2\.b}, p0/z, \[x0, #3, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_s8_3, svint8x3_t, int8_t, -+ z0 = svld3_vnum_s8 (p0, x0, 3), -+ z0 = svld3_vnum (p0, x0, 3)) -+ -+/* -+** ld3_vnum_s8_21: -+** ld3b {z0\.b - z2\.b}, p0/z, \[x0, #21, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_s8_21, svint8x3_t, int8_t, -+ z0 = svld3_vnum_s8 (p0, x0, 21), -+ z0 = svld3_vnum (p0, x0, 21)) -+ -+/* -+** ld3_vnum_s8_24: -+** addvl (x[0-9]+), x0, #24 -+** ld3b {z0\.b - z2\.b}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_s8_24, svint8x3_t, int8_t, -+ z0 = svld3_vnum_s8 (p0, x0, 24), -+ z0 = svld3_vnum (p0, x0, 24)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_vnum_s8_m1: -+** decb x0 -+** ld3b {z0\.b - z2\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_s8_m1, svint8x3_t, int8_t, -+ z0 = svld3_vnum_s8 (p0, x0, -1), -+ z0 = svld3_vnum (p0, x0, -1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_vnum_s8_m2: -+** decb x0, all, mul #2 -+** ld3b {z0\.b - z2\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_s8_m2, svint8x3_t, int8_t, -+ z0 = svld3_vnum_s8 (p0, x0, -2), -+ z0 = svld3_vnum (p0, x0, -2)) -+ -+/* -+** ld3_vnum_s8_m3: -+** ld3b {z0\.b - z2\.b}, p0/z, \[x0, #-3, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_s8_m3, svint8x3_t, int8_t, -+ z0 = svld3_vnum_s8 (p0, x0, -3), -+ z0 = svld3_vnum (p0, x0, -3)) -+ -+/* -+** ld3_vnum_s8_m24: -+** ld3b {z0\.b - z2\.b}, p0/z, \[x0, #-24, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_s8_m24, svint8x3_t, int8_t, -+ z0 = svld3_vnum_s8 (p0, x0, -24), -+ z0 = svld3_vnum (p0, x0, -24)) -+ -+/* -+** ld3_vnum_s8_m27: -+** addvl (x[0-9]+), x0, #-27 -+** ld3b {z0\.b - z2\.b}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_s8_m27, svint8x3_t, int8_t, -+ z0 = svld3_vnum_s8 (p0, x0, -27), -+ z0 = svld3_vnum (p0, x0, -27)) -+ -+/* -+** ld3_vnum_s8_x1: -+** cntb (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ld3b {z0\.b - z2\.b}, p0/z, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** ld3b {z0\.b - z2\.b}, p0/z, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_LOAD (ld3_vnum_s8_x1, svint8x3_t, int8_t, -+ z0 = svld3_vnum_s8 (p0, x0, x1), -+ z0 = svld3_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_u16.c -new file mode 100644 -index 000000000..12f6dd092 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_u16.c -@@ -0,0 +1,242 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld3_u16_base: -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_u16_base, svuint16x3_t, uint16_t, -+ z0 = svld3_u16 (p0, x0), -+ z0 = svld3 (p0, x0)) -+ -+/* -+** ld3_u16_index: -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_LOAD (ld3_u16_index, svuint16x3_t, uint16_t, -+ z0 = svld3_u16 (p0, x0 + x1), -+ z0 = svld3 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_u16_1: -+** incb x0 -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_u16_1, svuint16x3_t, uint16_t, -+ z0 = svld3_u16 (p0, x0 + svcnth ()), -+ z0 = svld3 (p0, x0 + svcnth ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_u16_2: -+** incb x0, all, mul #2 -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_u16_2, svuint16x3_t, uint16_t, -+ z0 = svld3_u16 (p0, x0 + svcnth () * 2), -+ z0 = svld3 (p0, x0 + svcnth () * 2)) -+ -+/* -+** ld3_u16_3: -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #3, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_u16_3, svuint16x3_t, uint16_t, -+ z0 = svld3_u16 (p0, x0 + svcnth () * 3), -+ z0 = svld3 (p0, x0 + svcnth () * 3)) -+ -+/* -+** ld3_u16_21: -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #21, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_u16_21, svuint16x3_t, uint16_t, -+ z0 = svld3_u16 (p0, x0 + svcnth () * 21), -+ z0 = svld3 (p0, x0 + svcnth () * 21)) -+ -+/* -+** ld3_u16_24: -+** addvl (x[0-9]+), x0, #24 -+** ld3h {z0\.h - z2\.h}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld3_u16_24, svuint16x3_t, uint16_t, -+ z0 = svld3_u16 (p0, x0 + svcnth () * 24), -+ z0 = svld3 (p0, x0 + svcnth () * 24)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_u16_m1: -+** decb x0 -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_u16_m1, svuint16x3_t, uint16_t, -+ z0 = svld3_u16 (p0, x0 - svcnth ()), -+ z0 = svld3 (p0, x0 - svcnth ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_u16_m2: -+** decb x0, all, mul #2 -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_u16_m2, svuint16x3_t, uint16_t, -+ z0 = svld3_u16 (p0, x0 - svcnth () * 2), -+ z0 = svld3 (p0, x0 - svcnth () * 2)) -+ -+/* -+** ld3_u16_m3: -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #-3, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_u16_m3, svuint16x3_t, uint16_t, -+ z0 = svld3_u16 (p0, x0 - svcnth () * 3), -+ z0 = svld3 (p0, x0 - svcnth () * 3)) -+ -+/* -+** ld3_u16_m24: -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #-24, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_u16_m24, svuint16x3_t, uint16_t, -+ z0 = svld3_u16 (p0, x0 - svcnth () * 24), -+ z0 = svld3 (p0, x0 - svcnth () * 24)) -+ -+/* -+** ld3_u16_m27: -+** addvl (x[0-9]+), x0, #-27 -+** ld3h {z0\.h - z2\.h}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld3_u16_m27, svuint16x3_t, uint16_t, -+ z0 = svld3_u16 (p0, x0 - svcnth () * 27), -+ z0 = svld3 (p0, x0 - svcnth () * 27)) -+ -+/* -+** ld3_vnum_u16_0: -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_u16_0, svuint16x3_t, uint16_t, -+ z0 = svld3_vnum_u16 (p0, x0, 0), -+ z0 = svld3_vnum (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_vnum_u16_1: -+** incb x0 -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_u16_1, svuint16x3_t, uint16_t, -+ z0 = svld3_vnum_u16 (p0, x0, 1), -+ z0 = svld3_vnum (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_vnum_u16_2: -+** incb x0, all, mul #2 -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_u16_2, svuint16x3_t, uint16_t, -+ z0 = svld3_vnum_u16 (p0, x0, 2), -+ z0 = svld3_vnum (p0, x0, 2)) -+ -+/* -+** ld3_vnum_u16_3: -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #3, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_u16_3, svuint16x3_t, uint16_t, -+ z0 = svld3_vnum_u16 (p0, x0, 3), -+ z0 = svld3_vnum (p0, x0, 3)) -+ -+/* -+** ld3_vnum_u16_21: -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #21, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_u16_21, svuint16x3_t, uint16_t, -+ z0 = svld3_vnum_u16 (p0, x0, 21), -+ z0 = svld3_vnum (p0, x0, 21)) -+ -+/* -+** ld3_vnum_u16_24: -+** addvl (x[0-9]+), x0, #24 -+** ld3h {z0\.h - z2\.h}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_u16_24, svuint16x3_t, uint16_t, -+ z0 = svld3_vnum_u16 (p0, x0, 24), -+ z0 = svld3_vnum (p0, x0, 24)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_vnum_u16_m1: -+** decb x0 -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_u16_m1, svuint16x3_t, uint16_t, -+ z0 = svld3_vnum_u16 (p0, x0, -1), -+ z0 = svld3_vnum (p0, x0, -1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_vnum_u16_m2: -+** decb x0, all, mul #2 -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_u16_m2, svuint16x3_t, uint16_t, -+ z0 = svld3_vnum_u16 (p0, x0, -2), -+ z0 = svld3_vnum (p0, x0, -2)) -+ -+/* -+** ld3_vnum_u16_m3: -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #-3, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_u16_m3, svuint16x3_t, uint16_t, -+ z0 = svld3_vnum_u16 (p0, x0, -3), -+ z0 = svld3_vnum (p0, x0, -3)) -+ -+/* -+** ld3_vnum_u16_m24: -+** ld3h {z0\.h - z2\.h}, p0/z, \[x0, #-24, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_u16_m24, svuint16x3_t, uint16_t, -+ z0 = svld3_vnum_u16 (p0, x0, -24), -+ z0 = svld3_vnum (p0, x0, -24)) -+ -+/* -+** ld3_vnum_u16_m27: -+** addvl (x[0-9]+), x0, #-27 -+** ld3h {z0\.h - z2\.h}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_u16_m27, svuint16x3_t, uint16_t, -+ z0 = svld3_vnum_u16 (p0, x0, -27), -+ z0 = svld3_vnum (p0, x0, -27)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld3_vnum_u16_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld3h {z0\.h - z2\.h}, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_u16_x1, svuint16x3_t, uint16_t, -+ z0 = svld3_vnum_u16 (p0, x0, x1), -+ z0 = svld3_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_u32.c -new file mode 100644 -index 000000000..ffc6edfdc ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_u32.c -@@ -0,0 +1,242 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld3_u32_base: -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_u32_base, svuint32x3_t, uint32_t, -+ z0 = svld3_u32 (p0, x0), -+ z0 = svld3 (p0, x0)) -+ -+/* -+** ld3_u32_index: -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_LOAD (ld3_u32_index, svuint32x3_t, uint32_t, -+ z0 = svld3_u32 (p0, x0 + x1), -+ z0 = svld3 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_u32_1: -+** incb x0 -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_u32_1, svuint32x3_t, uint32_t, -+ z0 = svld3_u32 (p0, x0 + svcntw ()), -+ z0 = svld3 (p0, x0 + svcntw ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_u32_2: -+** incb x0, all, mul #2 -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_u32_2, svuint32x3_t, uint32_t, -+ z0 = svld3_u32 (p0, x0 + svcntw () * 2), -+ z0 = svld3 (p0, x0 + svcntw () * 2)) -+ -+/* -+** ld3_u32_3: -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0, #3, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_u32_3, svuint32x3_t, uint32_t, -+ z0 = svld3_u32 (p0, x0 + svcntw () * 3), -+ z0 = svld3 (p0, x0 + svcntw () * 3)) -+ -+/* -+** ld3_u32_21: -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0, #21, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_u32_21, svuint32x3_t, uint32_t, -+ z0 = svld3_u32 (p0, x0 + svcntw () * 21), -+ z0 = svld3 (p0, x0 + svcntw () * 21)) -+ -+/* -+** ld3_u32_24: -+** addvl (x[0-9]+), x0, #24 -+** ld3w {z0\.s - z2\.s}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld3_u32_24, svuint32x3_t, uint32_t, -+ z0 = svld3_u32 (p0, x0 + svcntw () * 24), -+ z0 = svld3 (p0, x0 + svcntw () * 24)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_u32_m1: -+** decb x0 -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_u32_m1, svuint32x3_t, uint32_t, -+ z0 = svld3_u32 (p0, x0 - svcntw ()), -+ z0 = svld3 (p0, x0 - svcntw ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_u32_m2: -+** decb x0, all, mul #2 -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_u32_m2, svuint32x3_t, uint32_t, -+ z0 = svld3_u32 (p0, x0 - svcntw () * 2), -+ z0 = svld3 (p0, x0 - svcntw () * 2)) -+ -+/* -+** ld3_u32_m3: -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0, #-3, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_u32_m3, svuint32x3_t, uint32_t, -+ z0 = svld3_u32 (p0, x0 - svcntw () * 3), -+ z0 = svld3 (p0, x0 - svcntw () * 3)) -+ -+/* -+** ld3_u32_m24: -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0, #-24, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_u32_m24, svuint32x3_t, uint32_t, -+ z0 = svld3_u32 (p0, x0 - svcntw () * 24), -+ z0 = svld3 (p0, x0 - svcntw () * 24)) -+ -+/* -+** ld3_u32_m27: -+** addvl (x[0-9]+), x0, #-27 -+** ld3w {z0\.s - z2\.s}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld3_u32_m27, svuint32x3_t, uint32_t, -+ z0 = svld3_u32 (p0, x0 - svcntw () * 27), -+ z0 = svld3 (p0, x0 - svcntw () * 27)) -+ -+/* -+** ld3_vnum_u32_0: -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_u32_0, svuint32x3_t, uint32_t, -+ z0 = svld3_vnum_u32 (p0, x0, 0), -+ z0 = svld3_vnum (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_vnum_u32_1: -+** incb x0 -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_u32_1, svuint32x3_t, uint32_t, -+ z0 = svld3_vnum_u32 (p0, x0, 1), -+ z0 = svld3_vnum (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_vnum_u32_2: -+** incb x0, all, mul #2 -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_u32_2, svuint32x3_t, uint32_t, -+ z0 = svld3_vnum_u32 (p0, x0, 2), -+ z0 = svld3_vnum (p0, x0, 2)) -+ -+/* -+** ld3_vnum_u32_3: -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0, #3, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_u32_3, svuint32x3_t, uint32_t, -+ z0 = svld3_vnum_u32 (p0, x0, 3), -+ z0 = svld3_vnum (p0, x0, 3)) -+ -+/* -+** ld3_vnum_u32_21: -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0, #21, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_u32_21, svuint32x3_t, uint32_t, -+ z0 = svld3_vnum_u32 (p0, x0, 21), -+ z0 = svld3_vnum (p0, x0, 21)) -+ -+/* -+** ld3_vnum_u32_24: -+** addvl (x[0-9]+), x0, #24 -+** ld3w {z0\.s - z2\.s}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_u32_24, svuint32x3_t, uint32_t, -+ z0 = svld3_vnum_u32 (p0, x0, 24), -+ z0 = svld3_vnum (p0, x0, 24)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_vnum_u32_m1: -+** decb x0 -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_u32_m1, svuint32x3_t, uint32_t, -+ z0 = svld3_vnum_u32 (p0, x0, -1), -+ z0 = svld3_vnum (p0, x0, -1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_vnum_u32_m2: -+** decb x0, all, mul #2 -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_u32_m2, svuint32x3_t, uint32_t, -+ z0 = svld3_vnum_u32 (p0, x0, -2), -+ z0 = svld3_vnum (p0, x0, -2)) -+ -+/* -+** ld3_vnum_u32_m3: -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0, #-3, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_u32_m3, svuint32x3_t, uint32_t, -+ z0 = svld3_vnum_u32 (p0, x0, -3), -+ z0 = svld3_vnum (p0, x0, -3)) -+ -+/* -+** ld3_vnum_u32_m24: -+** ld3w {z0\.s - z2\.s}, p0/z, \[x0, #-24, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_u32_m24, svuint32x3_t, uint32_t, -+ z0 = svld3_vnum_u32 (p0, x0, -24), -+ z0 = svld3_vnum (p0, x0, -24)) -+ -+/* -+** ld3_vnum_u32_m27: -+** addvl (x[0-9]+), x0, #-27 -+** ld3w {z0\.s - z2\.s}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_u32_m27, svuint32x3_t, uint32_t, -+ z0 = svld3_vnum_u32 (p0, x0, -27), -+ z0 = svld3_vnum (p0, x0, -27)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld3_vnum_u32_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld3w {z0\.s - z2\.s}, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_u32_x1, svuint32x3_t, uint32_t, -+ z0 = svld3_vnum_u32 (p0, x0, x1), -+ z0 = svld3_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_u64.c -new file mode 100644 -index 000000000..2c0dc2f1a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_u64.c -@@ -0,0 +1,242 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld3_u64_base: -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_u64_base, svuint64x3_t, uint64_t, -+ z0 = svld3_u64 (p0, x0), -+ z0 = svld3 (p0, x0)) -+ -+/* -+** ld3_u64_index: -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0, x1, lsl 3\] -+** ret -+*/ -+TEST_LOAD (ld3_u64_index, svuint64x3_t, uint64_t, -+ z0 = svld3_u64 (p0, x0 + x1), -+ z0 = svld3 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_u64_1: -+** incb x0 -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_u64_1, svuint64x3_t, uint64_t, -+ z0 = svld3_u64 (p0, x0 + svcntd ()), -+ z0 = svld3 (p0, x0 + svcntd ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_u64_2: -+** incb x0, all, mul #2 -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_u64_2, svuint64x3_t, uint64_t, -+ z0 = svld3_u64 (p0, x0 + svcntd () * 2), -+ z0 = svld3 (p0, x0 + svcntd () * 2)) -+ -+/* -+** ld3_u64_3: -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0, #3, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_u64_3, svuint64x3_t, uint64_t, -+ z0 = svld3_u64 (p0, x0 + svcntd () * 3), -+ z0 = svld3 (p0, x0 + svcntd () * 3)) -+ -+/* -+** ld3_u64_21: -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0, #21, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_u64_21, svuint64x3_t, uint64_t, -+ z0 = svld3_u64 (p0, x0 + svcntd () * 21), -+ z0 = svld3 (p0, x0 + svcntd () * 21)) -+ -+/* -+** ld3_u64_24: -+** addvl (x[0-9]+), x0, #24 -+** ld3d {z0\.d - z2\.d}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld3_u64_24, svuint64x3_t, uint64_t, -+ z0 = svld3_u64 (p0, x0 + svcntd () * 24), -+ z0 = svld3 (p0, x0 + svcntd () * 24)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_u64_m1: -+** decb x0 -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_u64_m1, svuint64x3_t, uint64_t, -+ z0 = svld3_u64 (p0, x0 - svcntd ()), -+ z0 = svld3 (p0, x0 - svcntd ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_u64_m2: -+** decb x0, all, mul #2 -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_u64_m2, svuint64x3_t, uint64_t, -+ z0 = svld3_u64 (p0, x0 - svcntd () * 2), -+ z0 = svld3 (p0, x0 - svcntd () * 2)) -+ -+/* -+** ld3_u64_m3: -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0, #-3, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_u64_m3, svuint64x3_t, uint64_t, -+ z0 = svld3_u64 (p0, x0 - svcntd () * 3), -+ z0 = svld3 (p0, x0 - svcntd () * 3)) -+ -+/* -+** ld3_u64_m24: -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0, #-24, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_u64_m24, svuint64x3_t, uint64_t, -+ z0 = svld3_u64 (p0, x0 - svcntd () * 24), -+ z0 = svld3 (p0, x0 - svcntd () * 24)) -+ -+/* -+** ld3_u64_m27: -+** addvl (x[0-9]+), x0, #-27 -+** ld3d {z0\.d - z2\.d}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld3_u64_m27, svuint64x3_t, uint64_t, -+ z0 = svld3_u64 (p0, x0 - svcntd () * 27), -+ z0 = svld3 (p0, x0 - svcntd () * 27)) -+ -+/* -+** ld3_vnum_u64_0: -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_u64_0, svuint64x3_t, uint64_t, -+ z0 = svld3_vnum_u64 (p0, x0, 0), -+ z0 = svld3_vnum (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_vnum_u64_1: -+** incb x0 -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_u64_1, svuint64x3_t, uint64_t, -+ z0 = svld3_vnum_u64 (p0, x0, 1), -+ z0 = svld3_vnum (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_vnum_u64_2: -+** incb x0, all, mul #2 -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_u64_2, svuint64x3_t, uint64_t, -+ z0 = svld3_vnum_u64 (p0, x0, 2), -+ z0 = svld3_vnum (p0, x0, 2)) -+ -+/* -+** ld3_vnum_u64_3: -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0, #3, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_u64_3, svuint64x3_t, uint64_t, -+ z0 = svld3_vnum_u64 (p0, x0, 3), -+ z0 = svld3_vnum (p0, x0, 3)) -+ -+/* -+** ld3_vnum_u64_21: -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0, #21, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_u64_21, svuint64x3_t, uint64_t, -+ z0 = svld3_vnum_u64 (p0, x0, 21), -+ z0 = svld3_vnum (p0, x0, 21)) -+ -+/* -+** ld3_vnum_u64_24: -+** addvl (x[0-9]+), x0, #24 -+** ld3d {z0\.d - z2\.d}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_u64_24, svuint64x3_t, uint64_t, -+ z0 = svld3_vnum_u64 (p0, x0, 24), -+ z0 = svld3_vnum (p0, x0, 24)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_vnum_u64_m1: -+** decb x0 -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_u64_m1, svuint64x3_t, uint64_t, -+ z0 = svld3_vnum_u64 (p0, x0, -1), -+ z0 = svld3_vnum (p0, x0, -1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_vnum_u64_m2: -+** decb x0, all, mul #2 -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_u64_m2, svuint64x3_t, uint64_t, -+ z0 = svld3_vnum_u64 (p0, x0, -2), -+ z0 = svld3_vnum (p0, x0, -2)) -+ -+/* -+** ld3_vnum_u64_m3: -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0, #-3, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_u64_m3, svuint64x3_t, uint64_t, -+ z0 = svld3_vnum_u64 (p0, x0, -3), -+ z0 = svld3_vnum (p0, x0, -3)) -+ -+/* -+** ld3_vnum_u64_m24: -+** ld3d {z0\.d - z2\.d}, p0/z, \[x0, #-24, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_u64_m24, svuint64x3_t, uint64_t, -+ z0 = svld3_vnum_u64 (p0, x0, -24), -+ z0 = svld3_vnum (p0, x0, -24)) -+ -+/* -+** ld3_vnum_u64_m27: -+** addvl (x[0-9]+), x0, #-27 -+** ld3d {z0\.d - z2\.d}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_u64_m27, svuint64x3_t, uint64_t, -+ z0 = svld3_vnum_u64 (p0, x0, -27), -+ z0 = svld3_vnum (p0, x0, -27)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld3_vnum_u64_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld3d {z0\.d - z2\.d}, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_u64_x1, svuint64x3_t, uint64_t, -+ z0 = svld3_vnum_u64 (p0, x0, x1), -+ z0 = svld3_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_u8.c -new file mode 100644 -index 000000000..e9d1ab495 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld3_u8.c -@@ -0,0 +1,246 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld3_u8_base: -+** ld3b {z0\.b - z2\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_u8_base, svuint8x3_t, uint8_t, -+ z0 = svld3_u8 (p0, x0), -+ z0 = svld3 (p0, x0)) -+ -+/* -+** ld3_u8_index: -+** ld3b {z0\.b - z2\.b}, p0/z, \[x0, x1\] -+** ret -+*/ -+TEST_LOAD (ld3_u8_index, svuint8x3_t, uint8_t, -+ z0 = svld3_u8 (p0, x0 + x1), -+ z0 = svld3 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_u8_1: -+** incb x0 -+** ld3b {z0\.b - z2\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_u8_1, svuint8x3_t, uint8_t, -+ z0 = svld3_u8 (p0, x0 + svcntb ()), -+ z0 = svld3 (p0, x0 + svcntb ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_u8_2: -+** incb x0, all, mul #2 -+** ld3b {z0\.b - z2\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_u8_2, svuint8x3_t, uint8_t, -+ z0 = svld3_u8 (p0, x0 + svcntb () * 2), -+ z0 = svld3 (p0, x0 + svcntb () * 2)) -+ -+/* -+** ld3_u8_3: -+** ld3b {z0\.b - z2\.b}, p0/z, \[x0, #3, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_u8_3, svuint8x3_t, uint8_t, -+ z0 = svld3_u8 (p0, x0 + svcntb () * 3), -+ z0 = svld3 (p0, x0 + svcntb () * 3)) -+ -+/* -+** ld3_u8_21: -+** ld3b {z0\.b - z2\.b}, p0/z, \[x0, #21, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_u8_21, svuint8x3_t, uint8_t, -+ z0 = svld3_u8 (p0, x0 + svcntb () * 21), -+ z0 = svld3 (p0, x0 + svcntb () * 21)) -+ -+/* -+** ld3_u8_24: -+** addvl (x[0-9]+), x0, #24 -+** ld3b {z0\.b - z2\.b}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld3_u8_24, svuint8x3_t, uint8_t, -+ z0 = svld3_u8 (p0, x0 + svcntb () * 24), -+ z0 = svld3 (p0, x0 + svcntb () * 24)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_u8_m1: -+** decb x0 -+** ld3b {z0\.b - z2\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_u8_m1, svuint8x3_t, uint8_t, -+ z0 = svld3_u8 (p0, x0 - svcntb ()), -+ z0 = svld3 (p0, x0 - svcntb ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_u8_m2: -+** decb x0, all, mul #2 -+** ld3b {z0\.b - z2\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_u8_m2, svuint8x3_t, uint8_t, -+ z0 = svld3_u8 (p0, x0 - svcntb () * 2), -+ z0 = svld3 (p0, x0 - svcntb () * 2)) -+ -+/* -+** ld3_u8_m3: -+** ld3b {z0\.b - z2\.b}, p0/z, \[x0, #-3, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_u8_m3, svuint8x3_t, uint8_t, -+ z0 = svld3_u8 (p0, x0 - svcntb () * 3), -+ z0 = svld3 (p0, x0 - svcntb () * 3)) -+ -+/* -+** ld3_u8_m24: -+** ld3b {z0\.b - z2\.b}, p0/z, \[x0, #-24, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_u8_m24, svuint8x3_t, uint8_t, -+ z0 = svld3_u8 (p0, x0 - svcntb () * 24), -+ z0 = svld3 (p0, x0 - svcntb () * 24)) -+ -+/* -+** ld3_u8_m27: -+** addvl (x[0-9]+), x0, #-27 -+** ld3b {z0\.b - z2\.b}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld3_u8_m27, svuint8x3_t, uint8_t, -+ z0 = svld3_u8 (p0, x0 - svcntb () * 27), -+ z0 = svld3 (p0, x0 - svcntb () * 27)) -+ -+/* -+** ld3_vnum_u8_0: -+** ld3b {z0\.b - z2\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_u8_0, svuint8x3_t, uint8_t, -+ z0 = svld3_vnum_u8 (p0, x0, 0), -+ z0 = svld3_vnum (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_vnum_u8_1: -+** incb x0 -+** ld3b {z0\.b - z2\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_u8_1, svuint8x3_t, uint8_t, -+ z0 = svld3_vnum_u8 (p0, x0, 1), -+ z0 = svld3_vnum (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_vnum_u8_2: -+** incb x0, all, mul #2 -+** ld3b {z0\.b - z2\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_u8_2, svuint8x3_t, uint8_t, -+ z0 = svld3_vnum_u8 (p0, x0, 2), -+ z0 = svld3_vnum (p0, x0, 2)) -+ -+/* -+** ld3_vnum_u8_3: -+** ld3b {z0\.b - z2\.b}, p0/z, \[x0, #3, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_u8_3, svuint8x3_t, uint8_t, -+ z0 = svld3_vnum_u8 (p0, x0, 3), -+ z0 = svld3_vnum (p0, x0, 3)) -+ -+/* -+** ld3_vnum_u8_21: -+** ld3b {z0\.b - z2\.b}, p0/z, \[x0, #21, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_u8_21, svuint8x3_t, uint8_t, -+ z0 = svld3_vnum_u8 (p0, x0, 21), -+ z0 = svld3_vnum (p0, x0, 21)) -+ -+/* -+** ld3_vnum_u8_24: -+** addvl (x[0-9]+), x0, #24 -+** ld3b {z0\.b - z2\.b}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_u8_24, svuint8x3_t, uint8_t, -+ z0 = svld3_vnum_u8 (p0, x0, 24), -+ z0 = svld3_vnum (p0, x0, 24)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_vnum_u8_m1: -+** decb x0 -+** ld3b {z0\.b - z2\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_u8_m1, svuint8x3_t, uint8_t, -+ z0 = svld3_vnum_u8 (p0, x0, -1), -+ z0 = svld3_vnum (p0, x0, -1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld3_vnum_u8_m2: -+** decb x0, all, mul #2 -+** ld3b {z0\.b - z2\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_u8_m2, svuint8x3_t, uint8_t, -+ z0 = svld3_vnum_u8 (p0, x0, -2), -+ z0 = svld3_vnum (p0, x0, -2)) -+ -+/* -+** ld3_vnum_u8_m3: -+** ld3b {z0\.b - z2\.b}, p0/z, \[x0, #-3, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_u8_m3, svuint8x3_t, uint8_t, -+ z0 = svld3_vnum_u8 (p0, x0, -3), -+ z0 = svld3_vnum (p0, x0, -3)) -+ -+/* -+** ld3_vnum_u8_m24: -+** ld3b {z0\.b - z2\.b}, p0/z, \[x0, #-24, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_u8_m24, svuint8x3_t, uint8_t, -+ z0 = svld3_vnum_u8 (p0, x0, -24), -+ z0 = svld3_vnum (p0, x0, -24)) -+ -+/* -+** ld3_vnum_u8_m27: -+** addvl (x[0-9]+), x0, #-27 -+** ld3b {z0\.b - z2\.b}, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ld3_vnum_u8_m27, svuint8x3_t, uint8_t, -+ z0 = svld3_vnum_u8 (p0, x0, -27), -+ z0 = svld3_vnum (p0, x0, -27)) -+ -+/* -+** ld3_vnum_u8_x1: -+** cntb (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ld3b {z0\.b - z2\.b}, p0/z, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** ld3b {z0\.b - z2\.b}, p0/z, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_LOAD (ld3_vnum_u8_x1, svuint8x3_t, uint8_t, -+ z0 = svld3_vnum_u8 (p0, x0, x1), -+ z0 = svld3_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_bf16.c -new file mode 100644 -index 000000000..123ff6355 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_bf16.c -@@ -0,0 +1,286 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld4_bf16_base: -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_bf16_base, svbfloat16x4_t, bfloat16_t, -+ z0 = svld4_bf16 (p0, x0), -+ z0 = svld4 (p0, x0)) -+ -+/* -+** ld4_bf16_index: -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_LOAD (ld4_bf16_index, svbfloat16x4_t, bfloat16_t, -+ z0 = svld4_bf16 (p0, x0 + x1), -+ z0 = svld4 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_bf16_1: -+** incb x0 -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_bf16_1, svbfloat16x4_t, bfloat16_t, -+ z0 = svld4_bf16 (p0, x0 + svcnth ()), -+ z0 = svld4 (p0, x0 + svcnth ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_bf16_2: -+** incb x0, all, mul #2 -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_bf16_2, svbfloat16x4_t, bfloat16_t, -+ z0 = svld4_bf16 (p0, x0 + svcnth () * 2), -+ z0 = svld4 (p0, x0 + svcnth () * 2)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_bf16_3: -+** incb x0, all, mul #3 -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_bf16_3, svbfloat16x4_t, bfloat16_t, -+ z0 = svld4_bf16 (p0, x0 + svcnth () * 3), -+ z0 = svld4 (p0, x0 + svcnth () * 3)) -+ -+/* -+** ld4_bf16_4: -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #4, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_bf16_4, svbfloat16x4_t, bfloat16_t, -+ z0 = svld4_bf16 (p0, x0 + svcnth () * 4), -+ z0 = svld4 (p0, x0 + svcnth () * 4)) -+ -+/* -+** ld4_bf16_28: -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #28, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_bf16_28, svbfloat16x4_t, bfloat16_t, -+ z0 = svld4_bf16 (p0, x0 + svcnth () * 28), -+ z0 = svld4 (p0, x0 + svcnth () * 28)) -+ -+/* -+** ld4_bf16_32: -+** [^{]* -+** ld4h {z0\.h - z3\.h}, p0/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_LOAD (ld4_bf16_32, svbfloat16x4_t, bfloat16_t, -+ z0 = svld4_bf16 (p0, x0 + svcnth () * 32), -+ z0 = svld4 (p0, x0 + svcnth () * 32)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_bf16_m1: -+** decb x0 -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_bf16_m1, svbfloat16x4_t, bfloat16_t, -+ z0 = svld4_bf16 (p0, x0 - svcnth ()), -+ z0 = svld4 (p0, x0 - svcnth ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_bf16_m2: -+** decb x0, all, mul #2 -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_bf16_m2, svbfloat16x4_t, bfloat16_t, -+ z0 = svld4_bf16 (p0, x0 - svcnth () * 2), -+ z0 = svld4 (p0, x0 - svcnth () * 2)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_bf16_m3: -+** decb x0, all, mul #3 -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_bf16_m3, svbfloat16x4_t, bfloat16_t, -+ z0 = svld4_bf16 (p0, x0 - svcnth () * 3), -+ z0 = svld4 (p0, x0 - svcnth () * 3)) -+ -+/* -+** ld4_bf16_m4: -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #-4, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_bf16_m4, svbfloat16x4_t, bfloat16_t, -+ z0 = svld4_bf16 (p0, x0 - svcnth () * 4), -+ z0 = svld4 (p0, x0 - svcnth () * 4)) -+ -+/* -+** ld4_bf16_m32: -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #-32, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_bf16_m32, svbfloat16x4_t, bfloat16_t, -+ z0 = svld4_bf16 (p0, x0 - svcnth () * 32), -+ z0 = svld4 (p0, x0 - svcnth () * 32)) -+ -+/* -+** ld4_bf16_m36: -+** [^{]* -+** ld4h {z0\.h - z3\.h}, p0/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_LOAD (ld4_bf16_m36, svbfloat16x4_t, bfloat16_t, -+ z0 = svld4_bf16 (p0, x0 - svcnth () * 36), -+ z0 = svld4 (p0, x0 - svcnth () * 36)) -+ -+/* -+** ld4_vnum_bf16_0: -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_bf16_0, svbfloat16x4_t, bfloat16_t, -+ z0 = svld4_vnum_bf16 (p0, x0, 0), -+ z0 = svld4_vnum (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_bf16_1: -+** incb x0 -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_bf16_1, svbfloat16x4_t, bfloat16_t, -+ z0 = svld4_vnum_bf16 (p0, x0, 1), -+ z0 = svld4_vnum (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_bf16_2: -+** incb x0, all, mul #2 -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_bf16_2, svbfloat16x4_t, bfloat16_t, -+ z0 = svld4_vnum_bf16 (p0, x0, 2), -+ z0 = svld4_vnum (p0, x0, 2)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_bf16_3: -+** incb x0, all, mul #3 -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_bf16_3, svbfloat16x4_t, bfloat16_t, -+ z0 = svld4_vnum_bf16 (p0, x0, 3), -+ z0 = svld4_vnum (p0, x0, 3)) -+ -+/* -+** ld4_vnum_bf16_4: -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #4, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_bf16_4, svbfloat16x4_t, bfloat16_t, -+ z0 = svld4_vnum_bf16 (p0, x0, 4), -+ z0 = svld4_vnum (p0, x0, 4)) -+ -+/* -+** ld4_vnum_bf16_28: -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #28, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_bf16_28, svbfloat16x4_t, bfloat16_t, -+ z0 = svld4_vnum_bf16 (p0, x0, 28), -+ z0 = svld4_vnum (p0, x0, 28)) -+ -+/* -+** ld4_vnum_bf16_32: -+** [^{]* -+** ld4h {z0\.h - z3\.h}, p0/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_bf16_32, svbfloat16x4_t, bfloat16_t, -+ z0 = svld4_vnum_bf16 (p0, x0, 32), -+ z0 = svld4_vnum (p0, x0, 32)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_bf16_m1: -+** decb x0 -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_bf16_m1, svbfloat16x4_t, bfloat16_t, -+ z0 = svld4_vnum_bf16 (p0, x0, -1), -+ z0 = svld4_vnum (p0, x0, -1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_bf16_m2: -+** decb x0, all, mul #2 -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_bf16_m2, svbfloat16x4_t, bfloat16_t, -+ z0 = svld4_vnum_bf16 (p0, x0, -2), -+ z0 = svld4_vnum (p0, x0, -2)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_bf16_m3: -+** decb x0, all, mul #3 -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_bf16_m3, svbfloat16x4_t, bfloat16_t, -+ z0 = svld4_vnum_bf16 (p0, x0, -3), -+ z0 = svld4_vnum (p0, x0, -3)) -+ -+/* -+** ld4_vnum_bf16_m4: -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #-4, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_bf16_m4, svbfloat16x4_t, bfloat16_t, -+ z0 = svld4_vnum_bf16 (p0, x0, -4), -+ z0 = svld4_vnum (p0, x0, -4)) -+ -+/* -+** ld4_vnum_bf16_m32: -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #-32, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_bf16_m32, svbfloat16x4_t, bfloat16_t, -+ z0 = svld4_vnum_bf16 (p0, x0, -32), -+ z0 = svld4_vnum (p0, x0, -32)) -+ -+/* -+** ld4_vnum_bf16_m36: -+** [^{]* -+** ld4h {z0\.h - z3\.h}, p0/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_bf16_m36, svbfloat16x4_t, bfloat16_t, -+ z0 = svld4_vnum_bf16 (p0, x0, -36), -+ z0 = svld4_vnum (p0, x0, -36)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld4_vnum_bf16_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld4h {z0\.h - z3\.h}, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_bf16_x1, svbfloat16x4_t, bfloat16_t, -+ z0 = svld4_vnum_bf16 (p0, x0, x1), -+ z0 = svld4_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_f16.c -new file mode 100644 -index 000000000..0d0ecf0af ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_f16.c -@@ -0,0 +1,286 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld4_f16_base: -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_f16_base, svfloat16x4_t, float16_t, -+ z0 = svld4_f16 (p0, x0), -+ z0 = svld4 (p0, x0)) -+ -+/* -+** ld4_f16_index: -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_LOAD (ld4_f16_index, svfloat16x4_t, float16_t, -+ z0 = svld4_f16 (p0, x0 + x1), -+ z0 = svld4 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_f16_1: -+** incb x0 -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_f16_1, svfloat16x4_t, float16_t, -+ z0 = svld4_f16 (p0, x0 + svcnth ()), -+ z0 = svld4 (p0, x0 + svcnth ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_f16_2: -+** incb x0, all, mul #2 -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_f16_2, svfloat16x4_t, float16_t, -+ z0 = svld4_f16 (p0, x0 + svcnth () * 2), -+ z0 = svld4 (p0, x0 + svcnth () * 2)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_f16_3: -+** incb x0, all, mul #3 -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_f16_3, svfloat16x4_t, float16_t, -+ z0 = svld4_f16 (p0, x0 + svcnth () * 3), -+ z0 = svld4 (p0, x0 + svcnth () * 3)) -+ -+/* -+** ld4_f16_4: -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #4, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_f16_4, svfloat16x4_t, float16_t, -+ z0 = svld4_f16 (p0, x0 + svcnth () * 4), -+ z0 = svld4 (p0, x0 + svcnth () * 4)) -+ -+/* -+** ld4_f16_28: -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #28, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_f16_28, svfloat16x4_t, float16_t, -+ z0 = svld4_f16 (p0, x0 + svcnth () * 28), -+ z0 = svld4 (p0, x0 + svcnth () * 28)) -+ -+/* -+** ld4_f16_32: -+** [^{]* -+** ld4h {z0\.h - z3\.h}, p0/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_LOAD (ld4_f16_32, svfloat16x4_t, float16_t, -+ z0 = svld4_f16 (p0, x0 + svcnth () * 32), -+ z0 = svld4 (p0, x0 + svcnth () * 32)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_f16_m1: -+** decb x0 -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_f16_m1, svfloat16x4_t, float16_t, -+ z0 = svld4_f16 (p0, x0 - svcnth ()), -+ z0 = svld4 (p0, x0 - svcnth ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_f16_m2: -+** decb x0, all, mul #2 -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_f16_m2, svfloat16x4_t, float16_t, -+ z0 = svld4_f16 (p0, x0 - svcnth () * 2), -+ z0 = svld4 (p0, x0 - svcnth () * 2)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_f16_m3: -+** decb x0, all, mul #3 -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_f16_m3, svfloat16x4_t, float16_t, -+ z0 = svld4_f16 (p0, x0 - svcnth () * 3), -+ z0 = svld4 (p0, x0 - svcnth () * 3)) -+ -+/* -+** ld4_f16_m4: -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #-4, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_f16_m4, svfloat16x4_t, float16_t, -+ z0 = svld4_f16 (p0, x0 - svcnth () * 4), -+ z0 = svld4 (p0, x0 - svcnth () * 4)) -+ -+/* -+** ld4_f16_m32: -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #-32, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_f16_m32, svfloat16x4_t, float16_t, -+ z0 = svld4_f16 (p0, x0 - svcnth () * 32), -+ z0 = svld4 (p0, x0 - svcnth () * 32)) -+ -+/* -+** ld4_f16_m36: -+** [^{]* -+** ld4h {z0\.h - z3\.h}, p0/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_LOAD (ld4_f16_m36, svfloat16x4_t, float16_t, -+ z0 = svld4_f16 (p0, x0 - svcnth () * 36), -+ z0 = svld4 (p0, x0 - svcnth () * 36)) -+ -+/* -+** ld4_vnum_f16_0: -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_f16_0, svfloat16x4_t, float16_t, -+ z0 = svld4_vnum_f16 (p0, x0, 0), -+ z0 = svld4_vnum (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_f16_1: -+** incb x0 -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_f16_1, svfloat16x4_t, float16_t, -+ z0 = svld4_vnum_f16 (p0, x0, 1), -+ z0 = svld4_vnum (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_f16_2: -+** incb x0, all, mul #2 -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_f16_2, svfloat16x4_t, float16_t, -+ z0 = svld4_vnum_f16 (p0, x0, 2), -+ z0 = svld4_vnum (p0, x0, 2)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_f16_3: -+** incb x0, all, mul #3 -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_f16_3, svfloat16x4_t, float16_t, -+ z0 = svld4_vnum_f16 (p0, x0, 3), -+ z0 = svld4_vnum (p0, x0, 3)) -+ -+/* -+** ld4_vnum_f16_4: -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #4, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_f16_4, svfloat16x4_t, float16_t, -+ z0 = svld4_vnum_f16 (p0, x0, 4), -+ z0 = svld4_vnum (p0, x0, 4)) -+ -+/* -+** ld4_vnum_f16_28: -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #28, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_f16_28, svfloat16x4_t, float16_t, -+ z0 = svld4_vnum_f16 (p0, x0, 28), -+ z0 = svld4_vnum (p0, x0, 28)) -+ -+/* -+** ld4_vnum_f16_32: -+** [^{]* -+** ld4h {z0\.h - z3\.h}, p0/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_f16_32, svfloat16x4_t, float16_t, -+ z0 = svld4_vnum_f16 (p0, x0, 32), -+ z0 = svld4_vnum (p0, x0, 32)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_f16_m1: -+** decb x0 -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_f16_m1, svfloat16x4_t, float16_t, -+ z0 = svld4_vnum_f16 (p0, x0, -1), -+ z0 = svld4_vnum (p0, x0, -1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_f16_m2: -+** decb x0, all, mul #2 -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_f16_m2, svfloat16x4_t, float16_t, -+ z0 = svld4_vnum_f16 (p0, x0, -2), -+ z0 = svld4_vnum (p0, x0, -2)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_f16_m3: -+** decb x0, all, mul #3 -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_f16_m3, svfloat16x4_t, float16_t, -+ z0 = svld4_vnum_f16 (p0, x0, -3), -+ z0 = svld4_vnum (p0, x0, -3)) -+ -+/* -+** ld4_vnum_f16_m4: -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #-4, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_f16_m4, svfloat16x4_t, float16_t, -+ z0 = svld4_vnum_f16 (p0, x0, -4), -+ z0 = svld4_vnum (p0, x0, -4)) -+ -+/* -+** ld4_vnum_f16_m32: -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #-32, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_f16_m32, svfloat16x4_t, float16_t, -+ z0 = svld4_vnum_f16 (p0, x0, -32), -+ z0 = svld4_vnum (p0, x0, -32)) -+ -+/* -+** ld4_vnum_f16_m36: -+** [^{]* -+** ld4h {z0\.h - z3\.h}, p0/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_f16_m36, svfloat16x4_t, float16_t, -+ z0 = svld4_vnum_f16 (p0, x0, -36), -+ z0 = svld4_vnum (p0, x0, -36)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld4_vnum_f16_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld4h {z0\.h - z3\.h}, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_f16_x1, svfloat16x4_t, float16_t, -+ z0 = svld4_vnum_f16 (p0, x0, x1), -+ z0 = svld4_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_f32.c -new file mode 100644 -index 000000000..a433d1ffe ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_f32.c -@@ -0,0 +1,286 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld4_f32_base: -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_f32_base, svfloat32x4_t, float32_t, -+ z0 = svld4_f32 (p0, x0), -+ z0 = svld4 (p0, x0)) -+ -+/* -+** ld4_f32_index: -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_LOAD (ld4_f32_index, svfloat32x4_t, float32_t, -+ z0 = svld4_f32 (p0, x0 + x1), -+ z0 = svld4 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_f32_1: -+** incb x0 -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_f32_1, svfloat32x4_t, float32_t, -+ z0 = svld4_f32 (p0, x0 + svcntw ()), -+ z0 = svld4 (p0, x0 + svcntw ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_f32_2: -+** incb x0, all, mul #2 -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_f32_2, svfloat32x4_t, float32_t, -+ z0 = svld4_f32 (p0, x0 + svcntw () * 2), -+ z0 = svld4 (p0, x0 + svcntw () * 2)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_f32_3: -+** incb x0, all, mul #3 -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_f32_3, svfloat32x4_t, float32_t, -+ z0 = svld4_f32 (p0, x0 + svcntw () * 3), -+ z0 = svld4 (p0, x0 + svcntw () * 3)) -+ -+/* -+** ld4_f32_4: -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0, #4, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_f32_4, svfloat32x4_t, float32_t, -+ z0 = svld4_f32 (p0, x0 + svcntw () * 4), -+ z0 = svld4 (p0, x0 + svcntw () * 4)) -+ -+/* -+** ld4_f32_28: -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0, #28, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_f32_28, svfloat32x4_t, float32_t, -+ z0 = svld4_f32 (p0, x0 + svcntw () * 28), -+ z0 = svld4 (p0, x0 + svcntw () * 28)) -+ -+/* -+** ld4_f32_32: -+** [^{]* -+** ld4w {z0\.s - z3\.s}, p0/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_LOAD (ld4_f32_32, svfloat32x4_t, float32_t, -+ z0 = svld4_f32 (p0, x0 + svcntw () * 32), -+ z0 = svld4 (p0, x0 + svcntw () * 32)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_f32_m1: -+** decb x0 -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_f32_m1, svfloat32x4_t, float32_t, -+ z0 = svld4_f32 (p0, x0 - svcntw ()), -+ z0 = svld4 (p0, x0 - svcntw ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_f32_m2: -+** decb x0, all, mul #2 -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_f32_m2, svfloat32x4_t, float32_t, -+ z0 = svld4_f32 (p0, x0 - svcntw () * 2), -+ z0 = svld4 (p0, x0 - svcntw () * 2)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_f32_m3: -+** decb x0, all, mul #3 -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_f32_m3, svfloat32x4_t, float32_t, -+ z0 = svld4_f32 (p0, x0 - svcntw () * 3), -+ z0 = svld4 (p0, x0 - svcntw () * 3)) -+ -+/* -+** ld4_f32_m4: -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0, #-4, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_f32_m4, svfloat32x4_t, float32_t, -+ z0 = svld4_f32 (p0, x0 - svcntw () * 4), -+ z0 = svld4 (p0, x0 - svcntw () * 4)) -+ -+/* -+** ld4_f32_m32: -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0, #-32, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_f32_m32, svfloat32x4_t, float32_t, -+ z0 = svld4_f32 (p0, x0 - svcntw () * 32), -+ z0 = svld4 (p0, x0 - svcntw () * 32)) -+ -+/* -+** ld4_f32_m36: -+** [^{]* -+** ld4w {z0\.s - z3\.s}, p0/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_LOAD (ld4_f32_m36, svfloat32x4_t, float32_t, -+ z0 = svld4_f32 (p0, x0 - svcntw () * 36), -+ z0 = svld4 (p0, x0 - svcntw () * 36)) -+ -+/* -+** ld4_vnum_f32_0: -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_f32_0, svfloat32x4_t, float32_t, -+ z0 = svld4_vnum_f32 (p0, x0, 0), -+ z0 = svld4_vnum (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_f32_1: -+** incb x0 -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_f32_1, svfloat32x4_t, float32_t, -+ z0 = svld4_vnum_f32 (p0, x0, 1), -+ z0 = svld4_vnum (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_f32_2: -+** incb x0, all, mul #2 -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_f32_2, svfloat32x4_t, float32_t, -+ z0 = svld4_vnum_f32 (p0, x0, 2), -+ z0 = svld4_vnum (p0, x0, 2)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_f32_3: -+** incb x0, all, mul #3 -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_f32_3, svfloat32x4_t, float32_t, -+ z0 = svld4_vnum_f32 (p0, x0, 3), -+ z0 = svld4_vnum (p0, x0, 3)) -+ -+/* -+** ld4_vnum_f32_4: -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0, #4, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_f32_4, svfloat32x4_t, float32_t, -+ z0 = svld4_vnum_f32 (p0, x0, 4), -+ z0 = svld4_vnum (p0, x0, 4)) -+ -+/* -+** ld4_vnum_f32_28: -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0, #28, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_f32_28, svfloat32x4_t, float32_t, -+ z0 = svld4_vnum_f32 (p0, x0, 28), -+ z0 = svld4_vnum (p0, x0, 28)) -+ -+/* -+** ld4_vnum_f32_32: -+** [^{]* -+** ld4w {z0\.s - z3\.s}, p0/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_f32_32, svfloat32x4_t, float32_t, -+ z0 = svld4_vnum_f32 (p0, x0, 32), -+ z0 = svld4_vnum (p0, x0, 32)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_f32_m1: -+** decb x0 -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_f32_m1, svfloat32x4_t, float32_t, -+ z0 = svld4_vnum_f32 (p0, x0, -1), -+ z0 = svld4_vnum (p0, x0, -1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_f32_m2: -+** decb x0, all, mul #2 -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_f32_m2, svfloat32x4_t, float32_t, -+ z0 = svld4_vnum_f32 (p0, x0, -2), -+ z0 = svld4_vnum (p0, x0, -2)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_f32_m3: -+** decb x0, all, mul #3 -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_f32_m3, svfloat32x4_t, float32_t, -+ z0 = svld4_vnum_f32 (p0, x0, -3), -+ z0 = svld4_vnum (p0, x0, -3)) -+ -+/* -+** ld4_vnum_f32_m4: -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0, #-4, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_f32_m4, svfloat32x4_t, float32_t, -+ z0 = svld4_vnum_f32 (p0, x0, -4), -+ z0 = svld4_vnum (p0, x0, -4)) -+ -+/* -+** ld4_vnum_f32_m32: -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0, #-32, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_f32_m32, svfloat32x4_t, float32_t, -+ z0 = svld4_vnum_f32 (p0, x0, -32), -+ z0 = svld4_vnum (p0, x0, -32)) -+ -+/* -+** ld4_vnum_f32_m36: -+** [^{]* -+** ld4w {z0\.s - z3\.s}, p0/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_f32_m36, svfloat32x4_t, float32_t, -+ z0 = svld4_vnum_f32 (p0, x0, -36), -+ z0 = svld4_vnum (p0, x0, -36)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld4_vnum_f32_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld4w {z0\.s - z3\.s}, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_f32_x1, svfloat32x4_t, float32_t, -+ z0 = svld4_vnum_f32 (p0, x0, x1), -+ z0 = svld4_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_f64.c -new file mode 100644 -index 000000000..bb18decec ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_f64.c -@@ -0,0 +1,286 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld4_f64_base: -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_f64_base, svfloat64x4_t, float64_t, -+ z0 = svld4_f64 (p0, x0), -+ z0 = svld4 (p0, x0)) -+ -+/* -+** ld4_f64_index: -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0, x1, lsl 3\] -+** ret -+*/ -+TEST_LOAD (ld4_f64_index, svfloat64x4_t, float64_t, -+ z0 = svld4_f64 (p0, x0 + x1), -+ z0 = svld4 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_f64_1: -+** incb x0 -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_f64_1, svfloat64x4_t, float64_t, -+ z0 = svld4_f64 (p0, x0 + svcntd ()), -+ z0 = svld4 (p0, x0 + svcntd ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_f64_2: -+** incb x0, all, mul #2 -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_f64_2, svfloat64x4_t, float64_t, -+ z0 = svld4_f64 (p0, x0 + svcntd () * 2), -+ z0 = svld4 (p0, x0 + svcntd () * 2)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_f64_3: -+** incb x0, all, mul #3 -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_f64_3, svfloat64x4_t, float64_t, -+ z0 = svld4_f64 (p0, x0 + svcntd () * 3), -+ z0 = svld4 (p0, x0 + svcntd () * 3)) -+ -+/* -+** ld4_f64_4: -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0, #4, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_f64_4, svfloat64x4_t, float64_t, -+ z0 = svld4_f64 (p0, x0 + svcntd () * 4), -+ z0 = svld4 (p0, x0 + svcntd () * 4)) -+ -+/* -+** ld4_f64_28: -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0, #28, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_f64_28, svfloat64x4_t, float64_t, -+ z0 = svld4_f64 (p0, x0 + svcntd () * 28), -+ z0 = svld4 (p0, x0 + svcntd () * 28)) -+ -+/* -+** ld4_f64_32: -+** [^{]* -+** ld4d {z0\.d - z3\.d}, p0/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_LOAD (ld4_f64_32, svfloat64x4_t, float64_t, -+ z0 = svld4_f64 (p0, x0 + svcntd () * 32), -+ z0 = svld4 (p0, x0 + svcntd () * 32)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_f64_m1: -+** decb x0 -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_f64_m1, svfloat64x4_t, float64_t, -+ z0 = svld4_f64 (p0, x0 - svcntd ()), -+ z0 = svld4 (p0, x0 - svcntd ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_f64_m2: -+** decb x0, all, mul #2 -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_f64_m2, svfloat64x4_t, float64_t, -+ z0 = svld4_f64 (p0, x0 - svcntd () * 2), -+ z0 = svld4 (p0, x0 - svcntd () * 2)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_f64_m3: -+** decb x0, all, mul #3 -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_f64_m3, svfloat64x4_t, float64_t, -+ z0 = svld4_f64 (p0, x0 - svcntd () * 3), -+ z0 = svld4 (p0, x0 - svcntd () * 3)) -+ -+/* -+** ld4_f64_m4: -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0, #-4, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_f64_m4, svfloat64x4_t, float64_t, -+ z0 = svld4_f64 (p0, x0 - svcntd () * 4), -+ z0 = svld4 (p0, x0 - svcntd () * 4)) -+ -+/* -+** ld4_f64_m32: -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0, #-32, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_f64_m32, svfloat64x4_t, float64_t, -+ z0 = svld4_f64 (p0, x0 - svcntd () * 32), -+ z0 = svld4 (p0, x0 - svcntd () * 32)) -+ -+/* -+** ld4_f64_m36: -+** [^{]* -+** ld4d {z0\.d - z3\.d}, p0/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_LOAD (ld4_f64_m36, svfloat64x4_t, float64_t, -+ z0 = svld4_f64 (p0, x0 - svcntd () * 36), -+ z0 = svld4 (p0, x0 - svcntd () * 36)) -+ -+/* -+** ld4_vnum_f64_0: -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_f64_0, svfloat64x4_t, float64_t, -+ z0 = svld4_vnum_f64 (p0, x0, 0), -+ z0 = svld4_vnum (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_f64_1: -+** incb x0 -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_f64_1, svfloat64x4_t, float64_t, -+ z0 = svld4_vnum_f64 (p0, x0, 1), -+ z0 = svld4_vnum (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_f64_2: -+** incb x0, all, mul #2 -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_f64_2, svfloat64x4_t, float64_t, -+ z0 = svld4_vnum_f64 (p0, x0, 2), -+ z0 = svld4_vnum (p0, x0, 2)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_f64_3: -+** incb x0, all, mul #3 -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_f64_3, svfloat64x4_t, float64_t, -+ z0 = svld4_vnum_f64 (p0, x0, 3), -+ z0 = svld4_vnum (p0, x0, 3)) -+ -+/* -+** ld4_vnum_f64_4: -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0, #4, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_f64_4, svfloat64x4_t, float64_t, -+ z0 = svld4_vnum_f64 (p0, x0, 4), -+ z0 = svld4_vnum (p0, x0, 4)) -+ -+/* -+** ld4_vnum_f64_28: -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0, #28, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_f64_28, svfloat64x4_t, float64_t, -+ z0 = svld4_vnum_f64 (p0, x0, 28), -+ z0 = svld4_vnum (p0, x0, 28)) -+ -+/* -+** ld4_vnum_f64_32: -+** [^{]* -+** ld4d {z0\.d - z3\.d}, p0/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_f64_32, svfloat64x4_t, float64_t, -+ z0 = svld4_vnum_f64 (p0, x0, 32), -+ z0 = svld4_vnum (p0, x0, 32)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_f64_m1: -+** decb x0 -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_f64_m1, svfloat64x4_t, float64_t, -+ z0 = svld4_vnum_f64 (p0, x0, -1), -+ z0 = svld4_vnum (p0, x0, -1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_f64_m2: -+** decb x0, all, mul #2 -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_f64_m2, svfloat64x4_t, float64_t, -+ z0 = svld4_vnum_f64 (p0, x0, -2), -+ z0 = svld4_vnum (p0, x0, -2)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_f64_m3: -+** decb x0, all, mul #3 -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_f64_m3, svfloat64x4_t, float64_t, -+ z0 = svld4_vnum_f64 (p0, x0, -3), -+ z0 = svld4_vnum (p0, x0, -3)) -+ -+/* -+** ld4_vnum_f64_m4: -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0, #-4, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_f64_m4, svfloat64x4_t, float64_t, -+ z0 = svld4_vnum_f64 (p0, x0, -4), -+ z0 = svld4_vnum (p0, x0, -4)) -+ -+/* -+** ld4_vnum_f64_m32: -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0, #-32, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_f64_m32, svfloat64x4_t, float64_t, -+ z0 = svld4_vnum_f64 (p0, x0, -32), -+ z0 = svld4_vnum (p0, x0, -32)) -+ -+/* -+** ld4_vnum_f64_m36: -+** [^{]* -+** ld4d {z0\.d - z3\.d}, p0/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_f64_m36, svfloat64x4_t, float64_t, -+ z0 = svld4_vnum_f64 (p0, x0, -36), -+ z0 = svld4_vnum (p0, x0, -36)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld4_vnum_f64_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld4d {z0\.d - z3\.d}, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_f64_x1, svfloat64x4_t, float64_t, -+ z0 = svld4_vnum_f64 (p0, x0, x1), -+ z0 = svld4_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_s16.c -new file mode 100644 -index 000000000..15fb1b595 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_s16.c -@@ -0,0 +1,286 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld4_s16_base: -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_s16_base, svint16x4_t, int16_t, -+ z0 = svld4_s16 (p0, x0), -+ z0 = svld4 (p0, x0)) -+ -+/* -+** ld4_s16_index: -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_LOAD (ld4_s16_index, svint16x4_t, int16_t, -+ z0 = svld4_s16 (p0, x0 + x1), -+ z0 = svld4 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_s16_1: -+** incb x0 -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_s16_1, svint16x4_t, int16_t, -+ z0 = svld4_s16 (p0, x0 + svcnth ()), -+ z0 = svld4 (p0, x0 + svcnth ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_s16_2: -+** incb x0, all, mul #2 -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_s16_2, svint16x4_t, int16_t, -+ z0 = svld4_s16 (p0, x0 + svcnth () * 2), -+ z0 = svld4 (p0, x0 + svcnth () * 2)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_s16_3: -+** incb x0, all, mul #3 -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_s16_3, svint16x4_t, int16_t, -+ z0 = svld4_s16 (p0, x0 + svcnth () * 3), -+ z0 = svld4 (p0, x0 + svcnth () * 3)) -+ -+/* -+** ld4_s16_4: -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #4, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_s16_4, svint16x4_t, int16_t, -+ z0 = svld4_s16 (p0, x0 + svcnth () * 4), -+ z0 = svld4 (p0, x0 + svcnth () * 4)) -+ -+/* -+** ld4_s16_28: -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #28, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_s16_28, svint16x4_t, int16_t, -+ z0 = svld4_s16 (p0, x0 + svcnth () * 28), -+ z0 = svld4 (p0, x0 + svcnth () * 28)) -+ -+/* -+** ld4_s16_32: -+** [^{]* -+** ld4h {z0\.h - z3\.h}, p0/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_LOAD (ld4_s16_32, svint16x4_t, int16_t, -+ z0 = svld4_s16 (p0, x0 + svcnth () * 32), -+ z0 = svld4 (p0, x0 + svcnth () * 32)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_s16_m1: -+** decb x0 -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_s16_m1, svint16x4_t, int16_t, -+ z0 = svld4_s16 (p0, x0 - svcnth ()), -+ z0 = svld4 (p0, x0 - svcnth ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_s16_m2: -+** decb x0, all, mul #2 -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_s16_m2, svint16x4_t, int16_t, -+ z0 = svld4_s16 (p0, x0 - svcnth () * 2), -+ z0 = svld4 (p0, x0 - svcnth () * 2)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_s16_m3: -+** decb x0, all, mul #3 -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_s16_m3, svint16x4_t, int16_t, -+ z0 = svld4_s16 (p0, x0 - svcnth () * 3), -+ z0 = svld4 (p0, x0 - svcnth () * 3)) -+ -+/* -+** ld4_s16_m4: -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #-4, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_s16_m4, svint16x4_t, int16_t, -+ z0 = svld4_s16 (p0, x0 - svcnth () * 4), -+ z0 = svld4 (p0, x0 - svcnth () * 4)) -+ -+/* -+** ld4_s16_m32: -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #-32, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_s16_m32, svint16x4_t, int16_t, -+ z0 = svld4_s16 (p0, x0 - svcnth () * 32), -+ z0 = svld4 (p0, x0 - svcnth () * 32)) -+ -+/* -+** ld4_s16_m36: -+** [^{]* -+** ld4h {z0\.h - z3\.h}, p0/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_LOAD (ld4_s16_m36, svint16x4_t, int16_t, -+ z0 = svld4_s16 (p0, x0 - svcnth () * 36), -+ z0 = svld4 (p0, x0 - svcnth () * 36)) -+ -+/* -+** ld4_vnum_s16_0: -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s16_0, svint16x4_t, int16_t, -+ z0 = svld4_vnum_s16 (p0, x0, 0), -+ z0 = svld4_vnum (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_s16_1: -+** incb x0 -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s16_1, svint16x4_t, int16_t, -+ z0 = svld4_vnum_s16 (p0, x0, 1), -+ z0 = svld4_vnum (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_s16_2: -+** incb x0, all, mul #2 -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s16_2, svint16x4_t, int16_t, -+ z0 = svld4_vnum_s16 (p0, x0, 2), -+ z0 = svld4_vnum (p0, x0, 2)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_s16_3: -+** incb x0, all, mul #3 -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s16_3, svint16x4_t, int16_t, -+ z0 = svld4_vnum_s16 (p0, x0, 3), -+ z0 = svld4_vnum (p0, x0, 3)) -+ -+/* -+** ld4_vnum_s16_4: -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #4, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s16_4, svint16x4_t, int16_t, -+ z0 = svld4_vnum_s16 (p0, x0, 4), -+ z0 = svld4_vnum (p0, x0, 4)) -+ -+/* -+** ld4_vnum_s16_28: -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #28, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s16_28, svint16x4_t, int16_t, -+ z0 = svld4_vnum_s16 (p0, x0, 28), -+ z0 = svld4_vnum (p0, x0, 28)) -+ -+/* -+** ld4_vnum_s16_32: -+** [^{]* -+** ld4h {z0\.h - z3\.h}, p0/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s16_32, svint16x4_t, int16_t, -+ z0 = svld4_vnum_s16 (p0, x0, 32), -+ z0 = svld4_vnum (p0, x0, 32)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_s16_m1: -+** decb x0 -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s16_m1, svint16x4_t, int16_t, -+ z0 = svld4_vnum_s16 (p0, x0, -1), -+ z0 = svld4_vnum (p0, x0, -1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_s16_m2: -+** decb x0, all, mul #2 -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s16_m2, svint16x4_t, int16_t, -+ z0 = svld4_vnum_s16 (p0, x0, -2), -+ z0 = svld4_vnum (p0, x0, -2)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_s16_m3: -+** decb x0, all, mul #3 -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s16_m3, svint16x4_t, int16_t, -+ z0 = svld4_vnum_s16 (p0, x0, -3), -+ z0 = svld4_vnum (p0, x0, -3)) -+ -+/* -+** ld4_vnum_s16_m4: -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #-4, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s16_m4, svint16x4_t, int16_t, -+ z0 = svld4_vnum_s16 (p0, x0, -4), -+ z0 = svld4_vnum (p0, x0, -4)) -+ -+/* -+** ld4_vnum_s16_m32: -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #-32, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s16_m32, svint16x4_t, int16_t, -+ z0 = svld4_vnum_s16 (p0, x0, -32), -+ z0 = svld4_vnum (p0, x0, -32)) -+ -+/* -+** ld4_vnum_s16_m36: -+** [^{]* -+** ld4h {z0\.h - z3\.h}, p0/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s16_m36, svint16x4_t, int16_t, -+ z0 = svld4_vnum_s16 (p0, x0, -36), -+ z0 = svld4_vnum (p0, x0, -36)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld4_vnum_s16_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld4h {z0\.h - z3\.h}, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s16_x1, svint16x4_t, int16_t, -+ z0 = svld4_vnum_s16 (p0, x0, x1), -+ z0 = svld4_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_s32.c -new file mode 100644 -index 000000000..81c67710f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_s32.c -@@ -0,0 +1,286 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld4_s32_base: -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_s32_base, svint32x4_t, int32_t, -+ z0 = svld4_s32 (p0, x0), -+ z0 = svld4 (p0, x0)) -+ -+/* -+** ld4_s32_index: -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_LOAD (ld4_s32_index, svint32x4_t, int32_t, -+ z0 = svld4_s32 (p0, x0 + x1), -+ z0 = svld4 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_s32_1: -+** incb x0 -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_s32_1, svint32x4_t, int32_t, -+ z0 = svld4_s32 (p0, x0 + svcntw ()), -+ z0 = svld4 (p0, x0 + svcntw ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_s32_2: -+** incb x0, all, mul #2 -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_s32_2, svint32x4_t, int32_t, -+ z0 = svld4_s32 (p0, x0 + svcntw () * 2), -+ z0 = svld4 (p0, x0 + svcntw () * 2)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_s32_3: -+** incb x0, all, mul #3 -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_s32_3, svint32x4_t, int32_t, -+ z0 = svld4_s32 (p0, x0 + svcntw () * 3), -+ z0 = svld4 (p0, x0 + svcntw () * 3)) -+ -+/* -+** ld4_s32_4: -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0, #4, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_s32_4, svint32x4_t, int32_t, -+ z0 = svld4_s32 (p0, x0 + svcntw () * 4), -+ z0 = svld4 (p0, x0 + svcntw () * 4)) -+ -+/* -+** ld4_s32_28: -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0, #28, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_s32_28, svint32x4_t, int32_t, -+ z0 = svld4_s32 (p0, x0 + svcntw () * 28), -+ z0 = svld4 (p0, x0 + svcntw () * 28)) -+ -+/* -+** ld4_s32_32: -+** [^{]* -+** ld4w {z0\.s - z3\.s}, p0/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_LOAD (ld4_s32_32, svint32x4_t, int32_t, -+ z0 = svld4_s32 (p0, x0 + svcntw () * 32), -+ z0 = svld4 (p0, x0 + svcntw () * 32)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_s32_m1: -+** decb x0 -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_s32_m1, svint32x4_t, int32_t, -+ z0 = svld4_s32 (p0, x0 - svcntw ()), -+ z0 = svld4 (p0, x0 - svcntw ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_s32_m2: -+** decb x0, all, mul #2 -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_s32_m2, svint32x4_t, int32_t, -+ z0 = svld4_s32 (p0, x0 - svcntw () * 2), -+ z0 = svld4 (p0, x0 - svcntw () * 2)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_s32_m3: -+** decb x0, all, mul #3 -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_s32_m3, svint32x4_t, int32_t, -+ z0 = svld4_s32 (p0, x0 - svcntw () * 3), -+ z0 = svld4 (p0, x0 - svcntw () * 3)) -+ -+/* -+** ld4_s32_m4: -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0, #-4, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_s32_m4, svint32x4_t, int32_t, -+ z0 = svld4_s32 (p0, x0 - svcntw () * 4), -+ z0 = svld4 (p0, x0 - svcntw () * 4)) -+ -+/* -+** ld4_s32_m32: -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0, #-32, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_s32_m32, svint32x4_t, int32_t, -+ z0 = svld4_s32 (p0, x0 - svcntw () * 32), -+ z0 = svld4 (p0, x0 - svcntw () * 32)) -+ -+/* -+** ld4_s32_m36: -+** [^{]* -+** ld4w {z0\.s - z3\.s}, p0/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_LOAD (ld4_s32_m36, svint32x4_t, int32_t, -+ z0 = svld4_s32 (p0, x0 - svcntw () * 36), -+ z0 = svld4 (p0, x0 - svcntw () * 36)) -+ -+/* -+** ld4_vnum_s32_0: -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s32_0, svint32x4_t, int32_t, -+ z0 = svld4_vnum_s32 (p0, x0, 0), -+ z0 = svld4_vnum (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_s32_1: -+** incb x0 -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s32_1, svint32x4_t, int32_t, -+ z0 = svld4_vnum_s32 (p0, x0, 1), -+ z0 = svld4_vnum (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_s32_2: -+** incb x0, all, mul #2 -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s32_2, svint32x4_t, int32_t, -+ z0 = svld4_vnum_s32 (p0, x0, 2), -+ z0 = svld4_vnum (p0, x0, 2)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_s32_3: -+** incb x0, all, mul #3 -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s32_3, svint32x4_t, int32_t, -+ z0 = svld4_vnum_s32 (p0, x0, 3), -+ z0 = svld4_vnum (p0, x0, 3)) -+ -+/* -+** ld4_vnum_s32_4: -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0, #4, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s32_4, svint32x4_t, int32_t, -+ z0 = svld4_vnum_s32 (p0, x0, 4), -+ z0 = svld4_vnum (p0, x0, 4)) -+ -+/* -+** ld4_vnum_s32_28: -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0, #28, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s32_28, svint32x4_t, int32_t, -+ z0 = svld4_vnum_s32 (p0, x0, 28), -+ z0 = svld4_vnum (p0, x0, 28)) -+ -+/* -+** ld4_vnum_s32_32: -+** [^{]* -+** ld4w {z0\.s - z3\.s}, p0/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s32_32, svint32x4_t, int32_t, -+ z0 = svld4_vnum_s32 (p0, x0, 32), -+ z0 = svld4_vnum (p0, x0, 32)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_s32_m1: -+** decb x0 -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s32_m1, svint32x4_t, int32_t, -+ z0 = svld4_vnum_s32 (p0, x0, -1), -+ z0 = svld4_vnum (p0, x0, -1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_s32_m2: -+** decb x0, all, mul #2 -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s32_m2, svint32x4_t, int32_t, -+ z0 = svld4_vnum_s32 (p0, x0, -2), -+ z0 = svld4_vnum (p0, x0, -2)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_s32_m3: -+** decb x0, all, mul #3 -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s32_m3, svint32x4_t, int32_t, -+ z0 = svld4_vnum_s32 (p0, x0, -3), -+ z0 = svld4_vnum (p0, x0, -3)) -+ -+/* -+** ld4_vnum_s32_m4: -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0, #-4, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s32_m4, svint32x4_t, int32_t, -+ z0 = svld4_vnum_s32 (p0, x0, -4), -+ z0 = svld4_vnum (p0, x0, -4)) -+ -+/* -+** ld4_vnum_s32_m32: -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0, #-32, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s32_m32, svint32x4_t, int32_t, -+ z0 = svld4_vnum_s32 (p0, x0, -32), -+ z0 = svld4_vnum (p0, x0, -32)) -+ -+/* -+** ld4_vnum_s32_m36: -+** [^{]* -+** ld4w {z0\.s - z3\.s}, p0/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s32_m36, svint32x4_t, int32_t, -+ z0 = svld4_vnum_s32 (p0, x0, -36), -+ z0 = svld4_vnum (p0, x0, -36)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld4_vnum_s32_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld4w {z0\.s - z3\.s}, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s32_x1, svint32x4_t, int32_t, -+ z0 = svld4_vnum_s32 (p0, x0, x1), -+ z0 = svld4_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_s64.c -new file mode 100644 -index 000000000..d24c30dcf ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_s64.c -@@ -0,0 +1,286 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld4_s64_base: -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_s64_base, svint64x4_t, int64_t, -+ z0 = svld4_s64 (p0, x0), -+ z0 = svld4 (p0, x0)) -+ -+/* -+** ld4_s64_index: -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0, x1, lsl 3\] -+** ret -+*/ -+TEST_LOAD (ld4_s64_index, svint64x4_t, int64_t, -+ z0 = svld4_s64 (p0, x0 + x1), -+ z0 = svld4 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_s64_1: -+** incb x0 -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_s64_1, svint64x4_t, int64_t, -+ z0 = svld4_s64 (p0, x0 + svcntd ()), -+ z0 = svld4 (p0, x0 + svcntd ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_s64_2: -+** incb x0, all, mul #2 -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_s64_2, svint64x4_t, int64_t, -+ z0 = svld4_s64 (p0, x0 + svcntd () * 2), -+ z0 = svld4 (p0, x0 + svcntd () * 2)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_s64_3: -+** incb x0, all, mul #3 -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_s64_3, svint64x4_t, int64_t, -+ z0 = svld4_s64 (p0, x0 + svcntd () * 3), -+ z0 = svld4 (p0, x0 + svcntd () * 3)) -+ -+/* -+** ld4_s64_4: -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0, #4, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_s64_4, svint64x4_t, int64_t, -+ z0 = svld4_s64 (p0, x0 + svcntd () * 4), -+ z0 = svld4 (p0, x0 + svcntd () * 4)) -+ -+/* -+** ld4_s64_28: -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0, #28, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_s64_28, svint64x4_t, int64_t, -+ z0 = svld4_s64 (p0, x0 + svcntd () * 28), -+ z0 = svld4 (p0, x0 + svcntd () * 28)) -+ -+/* -+** ld4_s64_32: -+** [^{]* -+** ld4d {z0\.d - z3\.d}, p0/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_LOAD (ld4_s64_32, svint64x4_t, int64_t, -+ z0 = svld4_s64 (p0, x0 + svcntd () * 32), -+ z0 = svld4 (p0, x0 + svcntd () * 32)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_s64_m1: -+** decb x0 -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_s64_m1, svint64x4_t, int64_t, -+ z0 = svld4_s64 (p0, x0 - svcntd ()), -+ z0 = svld4 (p0, x0 - svcntd ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_s64_m2: -+** decb x0, all, mul #2 -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_s64_m2, svint64x4_t, int64_t, -+ z0 = svld4_s64 (p0, x0 - svcntd () * 2), -+ z0 = svld4 (p0, x0 - svcntd () * 2)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_s64_m3: -+** decb x0, all, mul #3 -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_s64_m3, svint64x4_t, int64_t, -+ z0 = svld4_s64 (p0, x0 - svcntd () * 3), -+ z0 = svld4 (p0, x0 - svcntd () * 3)) -+ -+/* -+** ld4_s64_m4: -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0, #-4, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_s64_m4, svint64x4_t, int64_t, -+ z0 = svld4_s64 (p0, x0 - svcntd () * 4), -+ z0 = svld4 (p0, x0 - svcntd () * 4)) -+ -+/* -+** ld4_s64_m32: -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0, #-32, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_s64_m32, svint64x4_t, int64_t, -+ z0 = svld4_s64 (p0, x0 - svcntd () * 32), -+ z0 = svld4 (p0, x0 - svcntd () * 32)) -+ -+/* -+** ld4_s64_m36: -+** [^{]* -+** ld4d {z0\.d - z3\.d}, p0/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_LOAD (ld4_s64_m36, svint64x4_t, int64_t, -+ z0 = svld4_s64 (p0, x0 - svcntd () * 36), -+ z0 = svld4 (p0, x0 - svcntd () * 36)) -+ -+/* -+** ld4_vnum_s64_0: -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s64_0, svint64x4_t, int64_t, -+ z0 = svld4_vnum_s64 (p0, x0, 0), -+ z0 = svld4_vnum (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_s64_1: -+** incb x0 -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s64_1, svint64x4_t, int64_t, -+ z0 = svld4_vnum_s64 (p0, x0, 1), -+ z0 = svld4_vnum (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_s64_2: -+** incb x0, all, mul #2 -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s64_2, svint64x4_t, int64_t, -+ z0 = svld4_vnum_s64 (p0, x0, 2), -+ z0 = svld4_vnum (p0, x0, 2)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_s64_3: -+** incb x0, all, mul #3 -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s64_3, svint64x4_t, int64_t, -+ z0 = svld4_vnum_s64 (p0, x0, 3), -+ z0 = svld4_vnum (p0, x0, 3)) -+ -+/* -+** ld4_vnum_s64_4: -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0, #4, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s64_4, svint64x4_t, int64_t, -+ z0 = svld4_vnum_s64 (p0, x0, 4), -+ z0 = svld4_vnum (p0, x0, 4)) -+ -+/* -+** ld4_vnum_s64_28: -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0, #28, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s64_28, svint64x4_t, int64_t, -+ z0 = svld4_vnum_s64 (p0, x0, 28), -+ z0 = svld4_vnum (p0, x0, 28)) -+ -+/* -+** ld4_vnum_s64_32: -+** [^{]* -+** ld4d {z0\.d - z3\.d}, p0/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s64_32, svint64x4_t, int64_t, -+ z0 = svld4_vnum_s64 (p0, x0, 32), -+ z0 = svld4_vnum (p0, x0, 32)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_s64_m1: -+** decb x0 -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s64_m1, svint64x4_t, int64_t, -+ z0 = svld4_vnum_s64 (p0, x0, -1), -+ z0 = svld4_vnum (p0, x0, -1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_s64_m2: -+** decb x0, all, mul #2 -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s64_m2, svint64x4_t, int64_t, -+ z0 = svld4_vnum_s64 (p0, x0, -2), -+ z0 = svld4_vnum (p0, x0, -2)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_s64_m3: -+** decb x0, all, mul #3 -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s64_m3, svint64x4_t, int64_t, -+ z0 = svld4_vnum_s64 (p0, x0, -3), -+ z0 = svld4_vnum (p0, x0, -3)) -+ -+/* -+** ld4_vnum_s64_m4: -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0, #-4, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s64_m4, svint64x4_t, int64_t, -+ z0 = svld4_vnum_s64 (p0, x0, -4), -+ z0 = svld4_vnum (p0, x0, -4)) -+ -+/* -+** ld4_vnum_s64_m32: -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0, #-32, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s64_m32, svint64x4_t, int64_t, -+ z0 = svld4_vnum_s64 (p0, x0, -32), -+ z0 = svld4_vnum (p0, x0, -32)) -+ -+/* -+** ld4_vnum_s64_m36: -+** [^{]* -+** ld4d {z0\.d - z3\.d}, p0/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s64_m36, svint64x4_t, int64_t, -+ z0 = svld4_vnum_s64 (p0, x0, -36), -+ z0 = svld4_vnum (p0, x0, -36)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld4_vnum_s64_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld4d {z0\.d - z3\.d}, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s64_x1, svint64x4_t, int64_t, -+ z0 = svld4_vnum_s64 (p0, x0, x1), -+ z0 = svld4_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_s8.c -new file mode 100644 -index 000000000..d7a17e266 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_s8.c -@@ -0,0 +1,290 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld4_s8_base: -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_s8_base, svint8x4_t, int8_t, -+ z0 = svld4_s8 (p0, x0), -+ z0 = svld4 (p0, x0)) -+ -+/* -+** ld4_s8_index: -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0, x1\] -+** ret -+*/ -+TEST_LOAD (ld4_s8_index, svint8x4_t, int8_t, -+ z0 = svld4_s8 (p0, x0 + x1), -+ z0 = svld4 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_s8_1: -+** incb x0 -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_s8_1, svint8x4_t, int8_t, -+ z0 = svld4_s8 (p0, x0 + svcntb ()), -+ z0 = svld4 (p0, x0 + svcntb ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_s8_2: -+** incb x0, all, mul #2 -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_s8_2, svint8x4_t, int8_t, -+ z0 = svld4_s8 (p0, x0 + svcntb () * 2), -+ z0 = svld4 (p0, x0 + svcntb () * 2)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_s8_3: -+** incb x0, all, mul #3 -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_s8_3, svint8x4_t, int8_t, -+ z0 = svld4_s8 (p0, x0 + svcntb () * 3), -+ z0 = svld4 (p0, x0 + svcntb () * 3)) -+ -+/* -+** ld4_s8_4: -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0, #4, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_s8_4, svint8x4_t, int8_t, -+ z0 = svld4_s8 (p0, x0 + svcntb () * 4), -+ z0 = svld4 (p0, x0 + svcntb () * 4)) -+ -+/* -+** ld4_s8_28: -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0, #28, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_s8_28, svint8x4_t, int8_t, -+ z0 = svld4_s8 (p0, x0 + svcntb () * 28), -+ z0 = svld4 (p0, x0 + svcntb () * 28)) -+ -+/* -+** ld4_s8_32: -+** [^{]* -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0, x[0-9]+\] -+** ret -+*/ -+TEST_LOAD (ld4_s8_32, svint8x4_t, int8_t, -+ z0 = svld4_s8 (p0, x0 + svcntb () * 32), -+ z0 = svld4 (p0, x0 + svcntb () * 32)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_s8_m1: -+** decb x0 -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_s8_m1, svint8x4_t, int8_t, -+ z0 = svld4_s8 (p0, x0 - svcntb ()), -+ z0 = svld4 (p0, x0 - svcntb ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_s8_m2: -+** decb x0, all, mul #2 -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_s8_m2, svint8x4_t, int8_t, -+ z0 = svld4_s8 (p0, x0 - svcntb () * 2), -+ z0 = svld4 (p0, x0 - svcntb () * 2)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_s8_m3: -+** decb x0, all, mul #3 -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_s8_m3, svint8x4_t, int8_t, -+ z0 = svld4_s8 (p0, x0 - svcntb () * 3), -+ z0 = svld4 (p0, x0 - svcntb () * 3)) -+ -+/* -+** ld4_s8_m4: -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0, #-4, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_s8_m4, svint8x4_t, int8_t, -+ z0 = svld4_s8 (p0, x0 - svcntb () * 4), -+ z0 = svld4 (p0, x0 - svcntb () * 4)) -+ -+/* -+** ld4_s8_m32: -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0, #-32, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_s8_m32, svint8x4_t, int8_t, -+ z0 = svld4_s8 (p0, x0 - svcntb () * 32), -+ z0 = svld4 (p0, x0 - svcntb () * 32)) -+ -+/* -+** ld4_s8_m36: -+** [^{]* -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0, x[0-9]+\] -+** ret -+*/ -+TEST_LOAD (ld4_s8_m36, svint8x4_t, int8_t, -+ z0 = svld4_s8 (p0, x0 - svcntb () * 36), -+ z0 = svld4 (p0, x0 - svcntb () * 36)) -+ -+/* -+** ld4_vnum_s8_0: -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s8_0, svint8x4_t, int8_t, -+ z0 = svld4_vnum_s8 (p0, x0, 0), -+ z0 = svld4_vnum (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_s8_1: -+** incb x0 -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s8_1, svint8x4_t, int8_t, -+ z0 = svld4_vnum_s8 (p0, x0, 1), -+ z0 = svld4_vnum (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_s8_2: -+** incb x0, all, mul #2 -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s8_2, svint8x4_t, int8_t, -+ z0 = svld4_vnum_s8 (p0, x0, 2), -+ z0 = svld4_vnum (p0, x0, 2)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_s8_3: -+** incb x0, all, mul #3 -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s8_3, svint8x4_t, int8_t, -+ z0 = svld4_vnum_s8 (p0, x0, 3), -+ z0 = svld4_vnum (p0, x0, 3)) -+ -+/* -+** ld4_vnum_s8_4: -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0, #4, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s8_4, svint8x4_t, int8_t, -+ z0 = svld4_vnum_s8 (p0, x0, 4), -+ z0 = svld4_vnum (p0, x0, 4)) -+ -+/* -+** ld4_vnum_s8_28: -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0, #28, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s8_28, svint8x4_t, int8_t, -+ z0 = svld4_vnum_s8 (p0, x0, 28), -+ z0 = svld4_vnum (p0, x0, 28)) -+ -+/* -+** ld4_vnum_s8_32: -+** [^{]* -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0, x[0-9]+\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s8_32, svint8x4_t, int8_t, -+ z0 = svld4_vnum_s8 (p0, x0, 32), -+ z0 = svld4_vnum (p0, x0, 32)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_s8_m1: -+** decb x0 -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s8_m1, svint8x4_t, int8_t, -+ z0 = svld4_vnum_s8 (p0, x0, -1), -+ z0 = svld4_vnum (p0, x0, -1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_s8_m2: -+** decb x0, all, mul #2 -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s8_m2, svint8x4_t, int8_t, -+ z0 = svld4_vnum_s8 (p0, x0, -2), -+ z0 = svld4_vnum (p0, x0, -2)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_s8_m3: -+** decb x0, all, mul #3 -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s8_m3, svint8x4_t, int8_t, -+ z0 = svld4_vnum_s8 (p0, x0, -3), -+ z0 = svld4_vnum (p0, x0, -3)) -+ -+/* -+** ld4_vnum_s8_m4: -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0, #-4, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s8_m4, svint8x4_t, int8_t, -+ z0 = svld4_vnum_s8 (p0, x0, -4), -+ z0 = svld4_vnum (p0, x0, -4)) -+ -+/* -+** ld4_vnum_s8_m32: -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0, #-32, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s8_m32, svint8x4_t, int8_t, -+ z0 = svld4_vnum_s8 (p0, x0, -32), -+ z0 = svld4_vnum (p0, x0, -32)) -+ -+/* -+** ld4_vnum_s8_m36: -+** [^{]* -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0, x[0-9]+\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s8_m36, svint8x4_t, int8_t, -+ z0 = svld4_vnum_s8 (p0, x0, -36), -+ z0 = svld4_vnum (p0, x0, -36)) -+ -+/* -+** ld4_vnum_s8_x1: -+** cntb (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ld4b {z0\.b - z3\.b}, p0/z, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_LOAD (ld4_vnum_s8_x1, svint8x4_t, int8_t, -+ z0 = svld4_vnum_s8 (p0, x0, x1), -+ z0 = svld4_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_u16.c -new file mode 100644 -index 000000000..234593d10 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_u16.c -@@ -0,0 +1,286 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld4_u16_base: -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_u16_base, svuint16x4_t, uint16_t, -+ z0 = svld4_u16 (p0, x0), -+ z0 = svld4 (p0, x0)) -+ -+/* -+** ld4_u16_index: -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_LOAD (ld4_u16_index, svuint16x4_t, uint16_t, -+ z0 = svld4_u16 (p0, x0 + x1), -+ z0 = svld4 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_u16_1: -+** incb x0 -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_u16_1, svuint16x4_t, uint16_t, -+ z0 = svld4_u16 (p0, x0 + svcnth ()), -+ z0 = svld4 (p0, x0 + svcnth ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_u16_2: -+** incb x0, all, mul #2 -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_u16_2, svuint16x4_t, uint16_t, -+ z0 = svld4_u16 (p0, x0 + svcnth () * 2), -+ z0 = svld4 (p0, x0 + svcnth () * 2)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_u16_3: -+** incb x0, all, mul #3 -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_u16_3, svuint16x4_t, uint16_t, -+ z0 = svld4_u16 (p0, x0 + svcnth () * 3), -+ z0 = svld4 (p0, x0 + svcnth () * 3)) -+ -+/* -+** ld4_u16_4: -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #4, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_u16_4, svuint16x4_t, uint16_t, -+ z0 = svld4_u16 (p0, x0 + svcnth () * 4), -+ z0 = svld4 (p0, x0 + svcnth () * 4)) -+ -+/* -+** ld4_u16_28: -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #28, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_u16_28, svuint16x4_t, uint16_t, -+ z0 = svld4_u16 (p0, x0 + svcnth () * 28), -+ z0 = svld4 (p0, x0 + svcnth () * 28)) -+ -+/* -+** ld4_u16_32: -+** [^{]* -+** ld4h {z0\.h - z3\.h}, p0/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_LOAD (ld4_u16_32, svuint16x4_t, uint16_t, -+ z0 = svld4_u16 (p0, x0 + svcnth () * 32), -+ z0 = svld4 (p0, x0 + svcnth () * 32)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_u16_m1: -+** decb x0 -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_u16_m1, svuint16x4_t, uint16_t, -+ z0 = svld4_u16 (p0, x0 - svcnth ()), -+ z0 = svld4 (p0, x0 - svcnth ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_u16_m2: -+** decb x0, all, mul #2 -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_u16_m2, svuint16x4_t, uint16_t, -+ z0 = svld4_u16 (p0, x0 - svcnth () * 2), -+ z0 = svld4 (p0, x0 - svcnth () * 2)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_u16_m3: -+** decb x0, all, mul #3 -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_u16_m3, svuint16x4_t, uint16_t, -+ z0 = svld4_u16 (p0, x0 - svcnth () * 3), -+ z0 = svld4 (p0, x0 - svcnth () * 3)) -+ -+/* -+** ld4_u16_m4: -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #-4, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_u16_m4, svuint16x4_t, uint16_t, -+ z0 = svld4_u16 (p0, x0 - svcnth () * 4), -+ z0 = svld4 (p0, x0 - svcnth () * 4)) -+ -+/* -+** ld4_u16_m32: -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #-32, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_u16_m32, svuint16x4_t, uint16_t, -+ z0 = svld4_u16 (p0, x0 - svcnth () * 32), -+ z0 = svld4 (p0, x0 - svcnth () * 32)) -+ -+/* -+** ld4_u16_m36: -+** [^{]* -+** ld4h {z0\.h - z3\.h}, p0/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_LOAD (ld4_u16_m36, svuint16x4_t, uint16_t, -+ z0 = svld4_u16 (p0, x0 - svcnth () * 36), -+ z0 = svld4 (p0, x0 - svcnth () * 36)) -+ -+/* -+** ld4_vnum_u16_0: -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u16_0, svuint16x4_t, uint16_t, -+ z0 = svld4_vnum_u16 (p0, x0, 0), -+ z0 = svld4_vnum (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_u16_1: -+** incb x0 -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u16_1, svuint16x4_t, uint16_t, -+ z0 = svld4_vnum_u16 (p0, x0, 1), -+ z0 = svld4_vnum (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_u16_2: -+** incb x0, all, mul #2 -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u16_2, svuint16x4_t, uint16_t, -+ z0 = svld4_vnum_u16 (p0, x0, 2), -+ z0 = svld4_vnum (p0, x0, 2)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_u16_3: -+** incb x0, all, mul #3 -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u16_3, svuint16x4_t, uint16_t, -+ z0 = svld4_vnum_u16 (p0, x0, 3), -+ z0 = svld4_vnum (p0, x0, 3)) -+ -+/* -+** ld4_vnum_u16_4: -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #4, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u16_4, svuint16x4_t, uint16_t, -+ z0 = svld4_vnum_u16 (p0, x0, 4), -+ z0 = svld4_vnum (p0, x0, 4)) -+ -+/* -+** ld4_vnum_u16_28: -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #28, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u16_28, svuint16x4_t, uint16_t, -+ z0 = svld4_vnum_u16 (p0, x0, 28), -+ z0 = svld4_vnum (p0, x0, 28)) -+ -+/* -+** ld4_vnum_u16_32: -+** [^{]* -+** ld4h {z0\.h - z3\.h}, p0/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u16_32, svuint16x4_t, uint16_t, -+ z0 = svld4_vnum_u16 (p0, x0, 32), -+ z0 = svld4_vnum (p0, x0, 32)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_u16_m1: -+** decb x0 -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u16_m1, svuint16x4_t, uint16_t, -+ z0 = svld4_vnum_u16 (p0, x0, -1), -+ z0 = svld4_vnum (p0, x0, -1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_u16_m2: -+** decb x0, all, mul #2 -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u16_m2, svuint16x4_t, uint16_t, -+ z0 = svld4_vnum_u16 (p0, x0, -2), -+ z0 = svld4_vnum (p0, x0, -2)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_u16_m3: -+** decb x0, all, mul #3 -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u16_m3, svuint16x4_t, uint16_t, -+ z0 = svld4_vnum_u16 (p0, x0, -3), -+ z0 = svld4_vnum (p0, x0, -3)) -+ -+/* -+** ld4_vnum_u16_m4: -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #-4, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u16_m4, svuint16x4_t, uint16_t, -+ z0 = svld4_vnum_u16 (p0, x0, -4), -+ z0 = svld4_vnum (p0, x0, -4)) -+ -+/* -+** ld4_vnum_u16_m32: -+** ld4h {z0\.h - z3\.h}, p0/z, \[x0, #-32, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u16_m32, svuint16x4_t, uint16_t, -+ z0 = svld4_vnum_u16 (p0, x0, -32), -+ z0 = svld4_vnum (p0, x0, -32)) -+ -+/* -+** ld4_vnum_u16_m36: -+** [^{]* -+** ld4h {z0\.h - z3\.h}, p0/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u16_m36, svuint16x4_t, uint16_t, -+ z0 = svld4_vnum_u16 (p0, x0, -36), -+ z0 = svld4_vnum (p0, x0, -36)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld4_vnum_u16_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld4h {z0\.h - z3\.h}, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u16_x1, svuint16x4_t, uint16_t, -+ z0 = svld4_vnum_u16 (p0, x0, x1), -+ z0 = svld4_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_u32.c -new file mode 100644 -index 000000000..ad2627800 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_u32.c -@@ -0,0 +1,286 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld4_u32_base: -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_u32_base, svuint32x4_t, uint32_t, -+ z0 = svld4_u32 (p0, x0), -+ z0 = svld4 (p0, x0)) -+ -+/* -+** ld4_u32_index: -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_LOAD (ld4_u32_index, svuint32x4_t, uint32_t, -+ z0 = svld4_u32 (p0, x0 + x1), -+ z0 = svld4 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_u32_1: -+** incb x0 -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_u32_1, svuint32x4_t, uint32_t, -+ z0 = svld4_u32 (p0, x0 + svcntw ()), -+ z0 = svld4 (p0, x0 + svcntw ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_u32_2: -+** incb x0, all, mul #2 -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_u32_2, svuint32x4_t, uint32_t, -+ z0 = svld4_u32 (p0, x0 + svcntw () * 2), -+ z0 = svld4 (p0, x0 + svcntw () * 2)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_u32_3: -+** incb x0, all, mul #3 -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_u32_3, svuint32x4_t, uint32_t, -+ z0 = svld4_u32 (p0, x0 + svcntw () * 3), -+ z0 = svld4 (p0, x0 + svcntw () * 3)) -+ -+/* -+** ld4_u32_4: -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0, #4, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_u32_4, svuint32x4_t, uint32_t, -+ z0 = svld4_u32 (p0, x0 + svcntw () * 4), -+ z0 = svld4 (p0, x0 + svcntw () * 4)) -+ -+/* -+** ld4_u32_28: -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0, #28, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_u32_28, svuint32x4_t, uint32_t, -+ z0 = svld4_u32 (p0, x0 + svcntw () * 28), -+ z0 = svld4 (p0, x0 + svcntw () * 28)) -+ -+/* -+** ld4_u32_32: -+** [^{]* -+** ld4w {z0\.s - z3\.s}, p0/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_LOAD (ld4_u32_32, svuint32x4_t, uint32_t, -+ z0 = svld4_u32 (p0, x0 + svcntw () * 32), -+ z0 = svld4 (p0, x0 + svcntw () * 32)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_u32_m1: -+** decb x0 -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_u32_m1, svuint32x4_t, uint32_t, -+ z0 = svld4_u32 (p0, x0 - svcntw ()), -+ z0 = svld4 (p0, x0 - svcntw ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_u32_m2: -+** decb x0, all, mul #2 -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_u32_m2, svuint32x4_t, uint32_t, -+ z0 = svld4_u32 (p0, x0 - svcntw () * 2), -+ z0 = svld4 (p0, x0 - svcntw () * 2)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_u32_m3: -+** decb x0, all, mul #3 -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_u32_m3, svuint32x4_t, uint32_t, -+ z0 = svld4_u32 (p0, x0 - svcntw () * 3), -+ z0 = svld4 (p0, x0 - svcntw () * 3)) -+ -+/* -+** ld4_u32_m4: -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0, #-4, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_u32_m4, svuint32x4_t, uint32_t, -+ z0 = svld4_u32 (p0, x0 - svcntw () * 4), -+ z0 = svld4 (p0, x0 - svcntw () * 4)) -+ -+/* -+** ld4_u32_m32: -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0, #-32, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_u32_m32, svuint32x4_t, uint32_t, -+ z0 = svld4_u32 (p0, x0 - svcntw () * 32), -+ z0 = svld4 (p0, x0 - svcntw () * 32)) -+ -+/* -+** ld4_u32_m36: -+** [^{]* -+** ld4w {z0\.s - z3\.s}, p0/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_LOAD (ld4_u32_m36, svuint32x4_t, uint32_t, -+ z0 = svld4_u32 (p0, x0 - svcntw () * 36), -+ z0 = svld4 (p0, x0 - svcntw () * 36)) -+ -+/* -+** ld4_vnum_u32_0: -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u32_0, svuint32x4_t, uint32_t, -+ z0 = svld4_vnum_u32 (p0, x0, 0), -+ z0 = svld4_vnum (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_u32_1: -+** incb x0 -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u32_1, svuint32x4_t, uint32_t, -+ z0 = svld4_vnum_u32 (p0, x0, 1), -+ z0 = svld4_vnum (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_u32_2: -+** incb x0, all, mul #2 -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u32_2, svuint32x4_t, uint32_t, -+ z0 = svld4_vnum_u32 (p0, x0, 2), -+ z0 = svld4_vnum (p0, x0, 2)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_u32_3: -+** incb x0, all, mul #3 -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u32_3, svuint32x4_t, uint32_t, -+ z0 = svld4_vnum_u32 (p0, x0, 3), -+ z0 = svld4_vnum (p0, x0, 3)) -+ -+/* -+** ld4_vnum_u32_4: -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0, #4, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u32_4, svuint32x4_t, uint32_t, -+ z0 = svld4_vnum_u32 (p0, x0, 4), -+ z0 = svld4_vnum (p0, x0, 4)) -+ -+/* -+** ld4_vnum_u32_28: -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0, #28, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u32_28, svuint32x4_t, uint32_t, -+ z0 = svld4_vnum_u32 (p0, x0, 28), -+ z0 = svld4_vnum (p0, x0, 28)) -+ -+/* -+** ld4_vnum_u32_32: -+** [^{]* -+** ld4w {z0\.s - z3\.s}, p0/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u32_32, svuint32x4_t, uint32_t, -+ z0 = svld4_vnum_u32 (p0, x0, 32), -+ z0 = svld4_vnum (p0, x0, 32)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_u32_m1: -+** decb x0 -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u32_m1, svuint32x4_t, uint32_t, -+ z0 = svld4_vnum_u32 (p0, x0, -1), -+ z0 = svld4_vnum (p0, x0, -1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_u32_m2: -+** decb x0, all, mul #2 -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u32_m2, svuint32x4_t, uint32_t, -+ z0 = svld4_vnum_u32 (p0, x0, -2), -+ z0 = svld4_vnum (p0, x0, -2)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_u32_m3: -+** decb x0, all, mul #3 -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u32_m3, svuint32x4_t, uint32_t, -+ z0 = svld4_vnum_u32 (p0, x0, -3), -+ z0 = svld4_vnum (p0, x0, -3)) -+ -+/* -+** ld4_vnum_u32_m4: -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0, #-4, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u32_m4, svuint32x4_t, uint32_t, -+ z0 = svld4_vnum_u32 (p0, x0, -4), -+ z0 = svld4_vnum (p0, x0, -4)) -+ -+/* -+** ld4_vnum_u32_m32: -+** ld4w {z0\.s - z3\.s}, p0/z, \[x0, #-32, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u32_m32, svuint32x4_t, uint32_t, -+ z0 = svld4_vnum_u32 (p0, x0, -32), -+ z0 = svld4_vnum (p0, x0, -32)) -+ -+/* -+** ld4_vnum_u32_m36: -+** [^{]* -+** ld4w {z0\.s - z3\.s}, p0/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u32_m36, svuint32x4_t, uint32_t, -+ z0 = svld4_vnum_u32 (p0, x0, -36), -+ z0 = svld4_vnum (p0, x0, -36)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld4_vnum_u32_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld4w {z0\.s - z3\.s}, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u32_x1, svuint32x4_t, uint32_t, -+ z0 = svld4_vnum_u32 (p0, x0, x1), -+ z0 = svld4_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_u64.c -new file mode 100644 -index 000000000..8772ba42d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_u64.c -@@ -0,0 +1,286 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld4_u64_base: -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_u64_base, svuint64x4_t, uint64_t, -+ z0 = svld4_u64 (p0, x0), -+ z0 = svld4 (p0, x0)) -+ -+/* -+** ld4_u64_index: -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0, x1, lsl 3\] -+** ret -+*/ -+TEST_LOAD (ld4_u64_index, svuint64x4_t, uint64_t, -+ z0 = svld4_u64 (p0, x0 + x1), -+ z0 = svld4 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_u64_1: -+** incb x0 -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_u64_1, svuint64x4_t, uint64_t, -+ z0 = svld4_u64 (p0, x0 + svcntd ()), -+ z0 = svld4 (p0, x0 + svcntd ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_u64_2: -+** incb x0, all, mul #2 -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_u64_2, svuint64x4_t, uint64_t, -+ z0 = svld4_u64 (p0, x0 + svcntd () * 2), -+ z0 = svld4 (p0, x0 + svcntd () * 2)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_u64_3: -+** incb x0, all, mul #3 -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_u64_3, svuint64x4_t, uint64_t, -+ z0 = svld4_u64 (p0, x0 + svcntd () * 3), -+ z0 = svld4 (p0, x0 + svcntd () * 3)) -+ -+/* -+** ld4_u64_4: -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0, #4, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_u64_4, svuint64x4_t, uint64_t, -+ z0 = svld4_u64 (p0, x0 + svcntd () * 4), -+ z0 = svld4 (p0, x0 + svcntd () * 4)) -+ -+/* -+** ld4_u64_28: -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0, #28, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_u64_28, svuint64x4_t, uint64_t, -+ z0 = svld4_u64 (p0, x0 + svcntd () * 28), -+ z0 = svld4 (p0, x0 + svcntd () * 28)) -+ -+/* -+** ld4_u64_32: -+** [^{]* -+** ld4d {z0\.d - z3\.d}, p0/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_LOAD (ld4_u64_32, svuint64x4_t, uint64_t, -+ z0 = svld4_u64 (p0, x0 + svcntd () * 32), -+ z0 = svld4 (p0, x0 + svcntd () * 32)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_u64_m1: -+** decb x0 -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_u64_m1, svuint64x4_t, uint64_t, -+ z0 = svld4_u64 (p0, x0 - svcntd ()), -+ z0 = svld4 (p0, x0 - svcntd ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_u64_m2: -+** decb x0, all, mul #2 -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_u64_m2, svuint64x4_t, uint64_t, -+ z0 = svld4_u64 (p0, x0 - svcntd () * 2), -+ z0 = svld4 (p0, x0 - svcntd () * 2)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_u64_m3: -+** decb x0, all, mul #3 -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_u64_m3, svuint64x4_t, uint64_t, -+ z0 = svld4_u64 (p0, x0 - svcntd () * 3), -+ z0 = svld4 (p0, x0 - svcntd () * 3)) -+ -+/* -+** ld4_u64_m4: -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0, #-4, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_u64_m4, svuint64x4_t, uint64_t, -+ z0 = svld4_u64 (p0, x0 - svcntd () * 4), -+ z0 = svld4 (p0, x0 - svcntd () * 4)) -+ -+/* -+** ld4_u64_m32: -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0, #-32, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_u64_m32, svuint64x4_t, uint64_t, -+ z0 = svld4_u64 (p0, x0 - svcntd () * 32), -+ z0 = svld4 (p0, x0 - svcntd () * 32)) -+ -+/* -+** ld4_u64_m36: -+** [^{]* -+** ld4d {z0\.d - z3\.d}, p0/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_LOAD (ld4_u64_m36, svuint64x4_t, uint64_t, -+ z0 = svld4_u64 (p0, x0 - svcntd () * 36), -+ z0 = svld4 (p0, x0 - svcntd () * 36)) -+ -+/* -+** ld4_vnum_u64_0: -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u64_0, svuint64x4_t, uint64_t, -+ z0 = svld4_vnum_u64 (p0, x0, 0), -+ z0 = svld4_vnum (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_u64_1: -+** incb x0 -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u64_1, svuint64x4_t, uint64_t, -+ z0 = svld4_vnum_u64 (p0, x0, 1), -+ z0 = svld4_vnum (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_u64_2: -+** incb x0, all, mul #2 -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u64_2, svuint64x4_t, uint64_t, -+ z0 = svld4_vnum_u64 (p0, x0, 2), -+ z0 = svld4_vnum (p0, x0, 2)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_u64_3: -+** incb x0, all, mul #3 -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u64_3, svuint64x4_t, uint64_t, -+ z0 = svld4_vnum_u64 (p0, x0, 3), -+ z0 = svld4_vnum (p0, x0, 3)) -+ -+/* -+** ld4_vnum_u64_4: -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0, #4, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u64_4, svuint64x4_t, uint64_t, -+ z0 = svld4_vnum_u64 (p0, x0, 4), -+ z0 = svld4_vnum (p0, x0, 4)) -+ -+/* -+** ld4_vnum_u64_28: -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0, #28, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u64_28, svuint64x4_t, uint64_t, -+ z0 = svld4_vnum_u64 (p0, x0, 28), -+ z0 = svld4_vnum (p0, x0, 28)) -+ -+/* -+** ld4_vnum_u64_32: -+** [^{]* -+** ld4d {z0\.d - z3\.d}, p0/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u64_32, svuint64x4_t, uint64_t, -+ z0 = svld4_vnum_u64 (p0, x0, 32), -+ z0 = svld4_vnum (p0, x0, 32)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_u64_m1: -+** decb x0 -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u64_m1, svuint64x4_t, uint64_t, -+ z0 = svld4_vnum_u64 (p0, x0, -1), -+ z0 = svld4_vnum (p0, x0, -1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_u64_m2: -+** decb x0, all, mul #2 -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u64_m2, svuint64x4_t, uint64_t, -+ z0 = svld4_vnum_u64 (p0, x0, -2), -+ z0 = svld4_vnum (p0, x0, -2)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_u64_m3: -+** decb x0, all, mul #3 -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u64_m3, svuint64x4_t, uint64_t, -+ z0 = svld4_vnum_u64 (p0, x0, -3), -+ z0 = svld4_vnum (p0, x0, -3)) -+ -+/* -+** ld4_vnum_u64_m4: -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0, #-4, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u64_m4, svuint64x4_t, uint64_t, -+ z0 = svld4_vnum_u64 (p0, x0, -4), -+ z0 = svld4_vnum (p0, x0, -4)) -+ -+/* -+** ld4_vnum_u64_m32: -+** ld4d {z0\.d - z3\.d}, p0/z, \[x0, #-32, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u64_m32, svuint64x4_t, uint64_t, -+ z0 = svld4_vnum_u64 (p0, x0, -32), -+ z0 = svld4_vnum (p0, x0, -32)) -+ -+/* -+** ld4_vnum_u64_m36: -+** [^{]* -+** ld4d {z0\.d - z3\.d}, p0/z, \[x[0-9]+\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u64_m36, svuint64x4_t, uint64_t, -+ z0 = svld4_vnum_u64 (p0, x0, -36), -+ z0 = svld4_vnum (p0, x0, -36)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ld4_vnum_u64_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ld4d {z0\.d - z3\.d}, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u64_x1, svuint64x4_t, uint64_t, -+ z0 = svld4_vnum_u64 (p0, x0, x1), -+ z0 = svld4_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_u8.c -new file mode 100644 -index 000000000..85b2987ce ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ld4_u8.c -@@ -0,0 +1,290 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ld4_u8_base: -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_u8_base, svuint8x4_t, uint8_t, -+ z0 = svld4_u8 (p0, x0), -+ z0 = svld4 (p0, x0)) -+ -+/* -+** ld4_u8_index: -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0, x1\] -+** ret -+*/ -+TEST_LOAD (ld4_u8_index, svuint8x4_t, uint8_t, -+ z0 = svld4_u8 (p0, x0 + x1), -+ z0 = svld4 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_u8_1: -+** incb x0 -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_u8_1, svuint8x4_t, uint8_t, -+ z0 = svld4_u8 (p0, x0 + svcntb ()), -+ z0 = svld4 (p0, x0 + svcntb ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_u8_2: -+** incb x0, all, mul #2 -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_u8_2, svuint8x4_t, uint8_t, -+ z0 = svld4_u8 (p0, x0 + svcntb () * 2), -+ z0 = svld4 (p0, x0 + svcntb () * 2)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_u8_3: -+** incb x0, all, mul #3 -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_u8_3, svuint8x4_t, uint8_t, -+ z0 = svld4_u8 (p0, x0 + svcntb () * 3), -+ z0 = svld4 (p0, x0 + svcntb () * 3)) -+ -+/* -+** ld4_u8_4: -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0, #4, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_u8_4, svuint8x4_t, uint8_t, -+ z0 = svld4_u8 (p0, x0 + svcntb () * 4), -+ z0 = svld4 (p0, x0 + svcntb () * 4)) -+ -+/* -+** ld4_u8_28: -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0, #28, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_u8_28, svuint8x4_t, uint8_t, -+ z0 = svld4_u8 (p0, x0 + svcntb () * 28), -+ z0 = svld4 (p0, x0 + svcntb () * 28)) -+ -+/* -+** ld4_u8_32: -+** [^{]* -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0, x[0-9]+\] -+** ret -+*/ -+TEST_LOAD (ld4_u8_32, svuint8x4_t, uint8_t, -+ z0 = svld4_u8 (p0, x0 + svcntb () * 32), -+ z0 = svld4 (p0, x0 + svcntb () * 32)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_u8_m1: -+** decb x0 -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_u8_m1, svuint8x4_t, uint8_t, -+ z0 = svld4_u8 (p0, x0 - svcntb ()), -+ z0 = svld4 (p0, x0 - svcntb ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_u8_m2: -+** decb x0, all, mul #2 -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_u8_m2, svuint8x4_t, uint8_t, -+ z0 = svld4_u8 (p0, x0 - svcntb () * 2), -+ z0 = svld4 (p0, x0 - svcntb () * 2)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_u8_m3: -+** decb x0, all, mul #3 -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_u8_m3, svuint8x4_t, uint8_t, -+ z0 = svld4_u8 (p0, x0 - svcntb () * 3), -+ z0 = svld4 (p0, x0 - svcntb () * 3)) -+ -+/* -+** ld4_u8_m4: -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0, #-4, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_u8_m4, svuint8x4_t, uint8_t, -+ z0 = svld4_u8 (p0, x0 - svcntb () * 4), -+ z0 = svld4 (p0, x0 - svcntb () * 4)) -+ -+/* -+** ld4_u8_m32: -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0, #-32, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_u8_m32, svuint8x4_t, uint8_t, -+ z0 = svld4_u8 (p0, x0 - svcntb () * 32), -+ z0 = svld4 (p0, x0 - svcntb () * 32)) -+ -+/* -+** ld4_u8_m36: -+** [^{]* -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0, x[0-9]+\] -+** ret -+*/ -+TEST_LOAD (ld4_u8_m36, svuint8x4_t, uint8_t, -+ z0 = svld4_u8 (p0, x0 - svcntb () * 36), -+ z0 = svld4 (p0, x0 - svcntb () * 36)) -+ -+/* -+** ld4_vnum_u8_0: -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u8_0, svuint8x4_t, uint8_t, -+ z0 = svld4_vnum_u8 (p0, x0, 0), -+ z0 = svld4_vnum (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_u8_1: -+** incb x0 -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u8_1, svuint8x4_t, uint8_t, -+ z0 = svld4_vnum_u8 (p0, x0, 1), -+ z0 = svld4_vnum (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_u8_2: -+** incb x0, all, mul #2 -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u8_2, svuint8x4_t, uint8_t, -+ z0 = svld4_vnum_u8 (p0, x0, 2), -+ z0 = svld4_vnum (p0, x0, 2)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_u8_3: -+** incb x0, all, mul #3 -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u8_3, svuint8x4_t, uint8_t, -+ z0 = svld4_vnum_u8 (p0, x0, 3), -+ z0 = svld4_vnum (p0, x0, 3)) -+ -+/* -+** ld4_vnum_u8_4: -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0, #4, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u8_4, svuint8x4_t, uint8_t, -+ z0 = svld4_vnum_u8 (p0, x0, 4), -+ z0 = svld4_vnum (p0, x0, 4)) -+ -+/* -+** ld4_vnum_u8_28: -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0, #28, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u8_28, svuint8x4_t, uint8_t, -+ z0 = svld4_vnum_u8 (p0, x0, 28), -+ z0 = svld4_vnum (p0, x0, 28)) -+ -+/* -+** ld4_vnum_u8_32: -+** [^{]* -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0, x[0-9]+\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u8_32, svuint8x4_t, uint8_t, -+ z0 = svld4_vnum_u8 (p0, x0, 32), -+ z0 = svld4_vnum (p0, x0, 32)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_u8_m1: -+** decb x0 -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u8_m1, svuint8x4_t, uint8_t, -+ z0 = svld4_vnum_u8 (p0, x0, -1), -+ z0 = svld4_vnum (p0, x0, -1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_u8_m2: -+** decb x0, all, mul #2 -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u8_m2, svuint8x4_t, uint8_t, -+ z0 = svld4_vnum_u8 (p0, x0, -2), -+ z0 = svld4_vnum (p0, x0, -2)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ld4_vnum_u8_m3: -+** decb x0, all, mul #3 -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u8_m3, svuint8x4_t, uint8_t, -+ z0 = svld4_vnum_u8 (p0, x0, -3), -+ z0 = svld4_vnum (p0, x0, -3)) -+ -+/* -+** ld4_vnum_u8_m4: -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0, #-4, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u8_m4, svuint8x4_t, uint8_t, -+ z0 = svld4_vnum_u8 (p0, x0, -4), -+ z0 = svld4_vnum (p0, x0, -4)) -+ -+/* -+** ld4_vnum_u8_m32: -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0, #-32, mul vl\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u8_m32, svuint8x4_t, uint8_t, -+ z0 = svld4_vnum_u8 (p0, x0, -32), -+ z0 = svld4_vnum (p0, x0, -32)) -+ -+/* -+** ld4_vnum_u8_m36: -+** [^{]* -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0, x[0-9]+\] -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u8_m36, svuint8x4_t, uint8_t, -+ z0 = svld4_vnum_u8 (p0, x0, -36), -+ z0 = svld4_vnum (p0, x0, -36)) -+ -+/* -+** ld4_vnum_u8_x1: -+** cntb (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ld4b {z0\.b - z3\.b}, p0/z, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** ld4b {z0\.b - z3\.b}, p0/z, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_LOAD (ld4_vnum_u8_x1, svuint8x4_t, uint8_t, -+ z0 = svld4_vnum_u8 (p0, x0, x1), -+ z0 = svld4_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_bf16.c -new file mode 100644 -index 000000000..80f646870 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_bf16.c -@@ -0,0 +1,86 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1_bf16_base: -+** ldff1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_bf16_base, svbfloat16_t, bfloat16_t, -+ z0 = svldff1_bf16 (p0, x0), -+ z0 = svldff1 (p0, x0)) -+ -+/* -+** ldff1_bf16_index: -+** ldff1h z0\.h, p0/z, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_LOAD (ldff1_bf16_index, svbfloat16_t, bfloat16_t, -+ z0 = svldff1_bf16 (p0, x0 + x1), -+ z0 = svldff1 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1_bf16_1: -+** incb x0 -+** ldff1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_bf16_1, svbfloat16_t, bfloat16_t, -+ z0 = svldff1_bf16 (p0, x0 + svcnth ()), -+ z0 = svldff1 (p0, x0 + svcnth ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1_bf16_m1: -+** decb x0 -+** ldff1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_bf16_m1, svbfloat16_t, bfloat16_t, -+ z0 = svldff1_bf16 (p0, x0 - svcnth ()), -+ z0 = svldff1 (p0, x0 - svcnth ())) -+ -+/* -+** ldff1_vnum_bf16_0: -+** ldff1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_vnum_bf16_0, svbfloat16_t, bfloat16_t, -+ z0 = svldff1_vnum_bf16 (p0, x0, 0), -+ z0 = svldff1_vnum (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1_vnum_bf16_1: -+** incb x0 -+** ldff1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_vnum_bf16_1, svbfloat16_t, bfloat16_t, -+ z0 = svldff1_vnum_bf16 (p0, x0, 1), -+ z0 = svldff1_vnum (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1_vnum_bf16_m1: -+** decb x0 -+** ldff1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_vnum_bf16_m1, svbfloat16_t, bfloat16_t, -+ z0 = svldff1_vnum_bf16 (p0, x0, -1), -+ z0 = svldff1_vnum (p0, x0, -1)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ldff1_vnum_bf16_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ldff1h z0\.h, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldff1_vnum_bf16_x1, svbfloat16_t, bfloat16_t, -+ z0 = svldff1_vnum_bf16 (p0, x0, x1), -+ z0 = svldff1_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_f16.c -new file mode 100644 -index 000000000..13ce863c9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_f16.c -@@ -0,0 +1,86 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1_f16_base: -+** ldff1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_f16_base, svfloat16_t, float16_t, -+ z0 = svldff1_f16 (p0, x0), -+ z0 = svldff1 (p0, x0)) -+ -+/* -+** ldff1_f16_index: -+** ldff1h z0\.h, p0/z, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_LOAD (ldff1_f16_index, svfloat16_t, float16_t, -+ z0 = svldff1_f16 (p0, x0 + x1), -+ z0 = svldff1 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1_f16_1: -+** incb x0 -+** ldff1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_f16_1, svfloat16_t, float16_t, -+ z0 = svldff1_f16 (p0, x0 + svcnth ()), -+ z0 = svldff1 (p0, x0 + svcnth ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1_f16_m1: -+** decb x0 -+** ldff1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_f16_m1, svfloat16_t, float16_t, -+ z0 = svldff1_f16 (p0, x0 - svcnth ()), -+ z0 = svldff1 (p0, x0 - svcnth ())) -+ -+/* -+** ldff1_vnum_f16_0: -+** ldff1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_vnum_f16_0, svfloat16_t, float16_t, -+ z0 = svldff1_vnum_f16 (p0, x0, 0), -+ z0 = svldff1_vnum (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1_vnum_f16_1: -+** incb x0 -+** ldff1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_vnum_f16_1, svfloat16_t, float16_t, -+ z0 = svldff1_vnum_f16 (p0, x0, 1), -+ z0 = svldff1_vnum (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1_vnum_f16_m1: -+** decb x0 -+** ldff1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_vnum_f16_m1, svfloat16_t, float16_t, -+ z0 = svldff1_vnum_f16 (p0, x0, -1), -+ z0 = svldff1_vnum (p0, x0, -1)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ldff1_vnum_f16_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ldff1h z0\.h, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldff1_vnum_f16_x1, svfloat16_t, float16_t, -+ z0 = svldff1_vnum_f16 (p0, x0, x1), -+ z0 = svldff1_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_f32.c -new file mode 100644 -index 000000000..2fcc63390 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_f32.c -@@ -0,0 +1,86 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1_f32_base: -+** ldff1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_f32_base, svfloat32_t, float32_t, -+ z0 = svldff1_f32 (p0, x0), -+ z0 = svldff1 (p0, x0)) -+ -+/* -+** ldff1_f32_index: -+** ldff1w z0\.s, p0/z, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_LOAD (ldff1_f32_index, svfloat32_t, float32_t, -+ z0 = svldff1_f32 (p0, x0 + x1), -+ z0 = svldff1 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1_f32_1: -+** incb x0 -+** ldff1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_f32_1, svfloat32_t, float32_t, -+ z0 = svldff1_f32 (p0, x0 + svcntw ()), -+ z0 = svldff1 (p0, x0 + svcntw ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1_f32_m1: -+** decb x0 -+** ldff1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_f32_m1, svfloat32_t, float32_t, -+ z0 = svldff1_f32 (p0, x0 - svcntw ()), -+ z0 = svldff1 (p0, x0 - svcntw ())) -+ -+/* -+** ldff1_vnum_f32_0: -+** ldff1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_vnum_f32_0, svfloat32_t, float32_t, -+ z0 = svldff1_vnum_f32 (p0, x0, 0), -+ z0 = svldff1_vnum (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1_vnum_f32_1: -+** incb x0 -+** ldff1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_vnum_f32_1, svfloat32_t, float32_t, -+ z0 = svldff1_vnum_f32 (p0, x0, 1), -+ z0 = svldff1_vnum (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1_vnum_f32_m1: -+** decb x0 -+** ldff1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_vnum_f32_m1, svfloat32_t, float32_t, -+ z0 = svldff1_vnum_f32 (p0, x0, -1), -+ z0 = svldff1_vnum (p0, x0, -1)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ldff1_vnum_f32_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ldff1w z0\.s, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldff1_vnum_f32_x1, svfloat32_t, float32_t, -+ z0 = svldff1_vnum_f32 (p0, x0, x1), -+ z0 = svldff1_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_f64.c -new file mode 100644 -index 000000000..cc15b927a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_f64.c -@@ -0,0 +1,86 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1_f64_base: -+** ldff1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_f64_base, svfloat64_t, float64_t, -+ z0 = svldff1_f64 (p0, x0), -+ z0 = svldff1 (p0, x0)) -+ -+/* -+** ldff1_f64_index: -+** ldff1d z0\.d, p0/z, \[x0, x1, lsl 3\] -+** ret -+*/ -+TEST_LOAD (ldff1_f64_index, svfloat64_t, float64_t, -+ z0 = svldff1_f64 (p0, x0 + x1), -+ z0 = svldff1 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1_f64_1: -+** incb x0 -+** ldff1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_f64_1, svfloat64_t, float64_t, -+ z0 = svldff1_f64 (p0, x0 + svcntd ()), -+ z0 = svldff1 (p0, x0 + svcntd ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1_f64_m1: -+** decb x0 -+** ldff1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_f64_m1, svfloat64_t, float64_t, -+ z0 = svldff1_f64 (p0, x0 - svcntd ()), -+ z0 = svldff1 (p0, x0 - svcntd ())) -+ -+/* -+** ldff1_vnum_f64_0: -+** ldff1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_vnum_f64_0, svfloat64_t, float64_t, -+ z0 = svldff1_vnum_f64 (p0, x0, 0), -+ z0 = svldff1_vnum (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1_vnum_f64_1: -+** incb x0 -+** ldff1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_vnum_f64_1, svfloat64_t, float64_t, -+ z0 = svldff1_vnum_f64 (p0, x0, 1), -+ z0 = svldff1_vnum (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1_vnum_f64_m1: -+** decb x0 -+** ldff1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_vnum_f64_m1, svfloat64_t, float64_t, -+ z0 = svldff1_vnum_f64 (p0, x0, -1), -+ z0 = svldff1_vnum (p0, x0, -1)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ldff1_vnum_f64_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ldff1d z0\.d, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldff1_vnum_f64_x1, svfloat64_t, float64_t, -+ z0 = svldff1_vnum_f64 (p0, x0, x1), -+ z0 = svldff1_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_f32.c -new file mode 100644 -index 000000000..7e330c042 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_f32.c -@@ -0,0 +1,272 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1_gather_f32_tied1: -+** ldff1w z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_f32_tied1, svfloat32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_f32 (p0, z0), -+ z0_res = svldff1_gather_f32 (p0, z0)) -+ -+/* -+** ldff1_gather_f32_untied: -+** ldff1w z0\.s, p0/z, \[z1\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_f32_untied, svfloat32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_f32 (p0, z1), -+ z0_res = svldff1_gather_f32 (p0, z1)) -+ -+/* -+** ldff1_gather_x0_f32_offset: -+** ldff1w z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_x0_f32_offset, svfloat32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_offset_f32 (p0, z0, x0), -+ z0_res = svldff1_gather_offset_f32 (p0, z0, x0)) -+ -+/* -+** ldff1_gather_m4_f32_offset: -+** mov (x[0-9]+), #?-4 -+** ldff1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_m4_f32_offset, svfloat32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_offset_f32 (p0, z0, -4), -+ z0_res = svldff1_gather_offset_f32 (p0, z0, -4)) -+ -+/* -+** ldff1_gather_0_f32_offset: -+** ldff1w z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_0_f32_offset, svfloat32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_offset_f32 (p0, z0, 0), -+ z0_res = svldff1_gather_offset_f32 (p0, z0, 0)) -+ -+/* -+** ldff1_gather_5_f32_offset: -+** mov (x[0-9]+), #?5 -+** ldff1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_5_f32_offset, svfloat32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_offset_f32 (p0, z0, 5), -+ z0_res = svldff1_gather_offset_f32 (p0, z0, 5)) -+ -+/* -+** ldff1_gather_6_f32_offset: -+** mov (x[0-9]+), #?6 -+** ldff1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_6_f32_offset, svfloat32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_offset_f32 (p0, z0, 6), -+ z0_res = svldff1_gather_offset_f32 (p0, z0, 6)) -+ -+/* -+** ldff1_gather_7_f32_offset: -+** mov (x[0-9]+), #?7 -+** ldff1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_7_f32_offset, svfloat32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_offset_f32 (p0, z0, 7), -+ z0_res = svldff1_gather_offset_f32 (p0, z0, 7)) -+ -+/* -+** ldff1_gather_8_f32_offset: -+** ldff1w z0\.s, p0/z, \[z0\.s, #8\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_8_f32_offset, svfloat32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_offset_f32 (p0, z0, 8), -+ z0_res = svldff1_gather_offset_f32 (p0, z0, 8)) -+ -+/* -+** ldff1_gather_124_f32_offset: -+** ldff1w z0\.s, p0/z, \[z0\.s, #124\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_124_f32_offset, svfloat32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_offset_f32 (p0, z0, 124), -+ z0_res = svldff1_gather_offset_f32 (p0, z0, 124)) -+ -+/* -+** ldff1_gather_128_f32_offset: -+** mov (x[0-9]+), #?128 -+** ldff1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_128_f32_offset, svfloat32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_offset_f32 (p0, z0, 128), -+ z0_res = svldff1_gather_offset_f32 (p0, z0, 128)) -+ -+/* -+** ldff1_gather_x0_f32_index: -+** lsl (x[0-9]+), x0, #?2 -+** ldff1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_x0_f32_index, svfloat32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_index_f32 (p0, z0, x0), -+ z0_res = svldff1_gather_index_f32 (p0, z0, x0)) -+ -+/* -+** ldff1_gather_m1_f32_index: -+** mov (x[0-9]+), #?-4 -+** ldff1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_m1_f32_index, svfloat32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_index_f32 (p0, z0, -1), -+ z0_res = svldff1_gather_index_f32 (p0, z0, -1)) -+ -+/* -+** ldff1_gather_0_f32_index: -+** ldff1w z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_0_f32_index, svfloat32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_index_f32 (p0, z0, 0), -+ z0_res = svldff1_gather_index_f32 (p0, z0, 0)) -+ -+/* -+** ldff1_gather_5_f32_index: -+** ldff1w z0\.s, p0/z, \[z0\.s, #20\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_5_f32_index, svfloat32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_index_f32 (p0, z0, 5), -+ z0_res = svldff1_gather_index_f32 (p0, z0, 5)) -+ -+/* -+** ldff1_gather_31_f32_index: -+** ldff1w z0\.s, p0/z, \[z0\.s, #124\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_31_f32_index, svfloat32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_index_f32 (p0, z0, 31), -+ z0_res = svldff1_gather_index_f32 (p0, z0, 31)) -+ -+/* -+** ldff1_gather_32_f32_index: -+** mov (x[0-9]+), #?128 -+** ldff1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_32_f32_index, svfloat32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_index_f32 (p0, z0, 32), -+ z0_res = svldff1_gather_index_f32 (p0, z0, 32)) -+ -+/* -+** ldff1_gather_x0_f32_s32offset: -+** ldff1w z0\.s, p0/z, \[x0, z0\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_x0_f32_s32offset, svfloat32_t, float32_t, svint32_t, -+ z0_res = svldff1_gather_s32offset_f32 (p0, x0, z0), -+ z0_res = svldff1_gather_offset (p0, x0, z0)) -+ -+/* -+** ldff1_gather_tied1_f32_s32offset: -+** ldff1w z0\.s, p0/z, \[x0, z0\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_tied1_f32_s32offset, svfloat32_t, float32_t, svint32_t, -+ z0_res = svldff1_gather_s32offset_f32 (p0, x0, z0), -+ z0_res = svldff1_gather_offset (p0, x0, z0)) -+ -+/* -+** ldff1_gather_untied_f32_s32offset: -+** ldff1w z0\.s, p0/z, \[x0, z1\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_untied_f32_s32offset, svfloat32_t, float32_t, svint32_t, -+ z0_res = svldff1_gather_s32offset_f32 (p0, x0, z1), -+ z0_res = svldff1_gather_offset (p0, x0, z1)) -+ -+/* -+** ldff1_gather_x0_f32_u32offset: -+** ldff1w z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_x0_f32_u32offset, svfloat32_t, float32_t, svuint32_t, -+ z0_res = svldff1_gather_u32offset_f32 (p0, x0, z0), -+ z0_res = svldff1_gather_offset (p0, x0, z0)) -+ -+/* -+** ldff1_gather_tied1_f32_u32offset: -+** ldff1w z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_tied1_f32_u32offset, svfloat32_t, float32_t, svuint32_t, -+ z0_res = svldff1_gather_u32offset_f32 (p0, x0, z0), -+ z0_res = svldff1_gather_offset (p0, x0, z0)) -+ -+/* -+** ldff1_gather_untied_f32_u32offset: -+** ldff1w z0\.s, p0/z, \[x0, z1\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_untied_f32_u32offset, svfloat32_t, float32_t, svuint32_t, -+ z0_res = svldff1_gather_u32offset_f32 (p0, x0, z1), -+ z0_res = svldff1_gather_offset (p0, x0, z1)) -+ -+/* -+** ldff1_gather_x0_f32_s32index: -+** ldff1w z0\.s, p0/z, \[x0, z0\.s, sxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_x0_f32_s32index, svfloat32_t, float32_t, svint32_t, -+ z0_res = svldff1_gather_s32index_f32 (p0, x0, z0), -+ z0_res = svldff1_gather_index (p0, x0, z0)) -+ -+/* -+** ldff1_gather_tied1_f32_s32index: -+** ldff1w z0\.s, p0/z, \[x0, z0\.s, sxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_tied1_f32_s32index, svfloat32_t, float32_t, svint32_t, -+ z0_res = svldff1_gather_s32index_f32 (p0, x0, z0), -+ z0_res = svldff1_gather_index (p0, x0, z0)) -+ -+/* -+** ldff1_gather_untied_f32_s32index: -+** ldff1w z0\.s, p0/z, \[x0, z1\.s, sxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_untied_f32_s32index, svfloat32_t, float32_t, svint32_t, -+ z0_res = svldff1_gather_s32index_f32 (p0, x0, z1), -+ z0_res = svldff1_gather_index (p0, x0, z1)) -+ -+/* -+** ldff1_gather_x0_f32_u32index: -+** ldff1w z0\.s, p0/z, \[x0, z0\.s, uxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_x0_f32_u32index, svfloat32_t, float32_t, svuint32_t, -+ z0_res = svldff1_gather_u32index_f32 (p0, x0, z0), -+ z0_res = svldff1_gather_index (p0, x0, z0)) -+ -+/* -+** ldff1_gather_tied1_f32_u32index: -+** ldff1w z0\.s, p0/z, \[x0, z0\.s, uxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_tied1_f32_u32index, svfloat32_t, float32_t, svuint32_t, -+ z0_res = svldff1_gather_u32index_f32 (p0, x0, z0), -+ z0_res = svldff1_gather_index (p0, x0, z0)) -+ -+/* -+** ldff1_gather_untied_f32_u32index: -+** ldff1w z0\.s, p0/z, \[x0, z1\.s, uxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_untied_f32_u32index, svfloat32_t, float32_t, svuint32_t, -+ z0_res = svldff1_gather_u32index_f32 (p0, x0, z1), -+ z0_res = svldff1_gather_index (p0, x0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_f64.c -new file mode 100644 -index 000000000..d0e47f0bf ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_f64.c -@@ -0,0 +1,348 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1_gather_f64_tied1: -+** ldff1d z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_f64_tied1, svfloat64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_f64 (p0, z0), -+ z0_res = svldff1_gather_f64 (p0, z0)) -+ -+/* -+** ldff1_gather_f64_untied: -+** ldff1d z0\.d, p0/z, \[z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_f64_untied, svfloat64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_f64 (p0, z1), -+ z0_res = svldff1_gather_f64 (p0, z1)) -+ -+/* -+** ldff1_gather_x0_f64_offset: -+** ldff1d z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_x0_f64_offset, svfloat64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_offset_f64 (p0, z0, x0), -+ z0_res = svldff1_gather_offset_f64 (p0, z0, x0)) -+ -+/* -+** ldff1_gather_m8_f64_offset: -+** mov (x[0-9]+), #?-8 -+** ldff1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_m8_f64_offset, svfloat64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_offset_f64 (p0, z0, -8), -+ z0_res = svldff1_gather_offset_f64 (p0, z0, -8)) -+ -+/* -+** ldff1_gather_0_f64_offset: -+** ldff1d z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_0_f64_offset, svfloat64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_offset_f64 (p0, z0, 0), -+ z0_res = svldff1_gather_offset_f64 (p0, z0, 0)) -+ -+/* -+** ldff1_gather_9_f64_offset: -+** mov (x[0-9]+), #?9 -+** ldff1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_9_f64_offset, svfloat64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_offset_f64 (p0, z0, 9), -+ z0_res = svldff1_gather_offset_f64 (p0, z0, 9)) -+ -+/* -+** ldff1_gather_10_f64_offset: -+** mov (x[0-9]+), #?10 -+** ldff1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_10_f64_offset, svfloat64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_offset_f64 (p0, z0, 10), -+ z0_res = svldff1_gather_offset_f64 (p0, z0, 10)) -+ -+/* -+** ldff1_gather_11_f64_offset: -+** mov (x[0-9]+), #?11 -+** ldff1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_11_f64_offset, svfloat64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_offset_f64 (p0, z0, 11), -+ z0_res = svldff1_gather_offset_f64 (p0, z0, 11)) -+ -+/* -+** ldff1_gather_12_f64_offset: -+** mov (x[0-9]+), #?12 -+** ldff1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_12_f64_offset, svfloat64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_offset_f64 (p0, z0, 12), -+ z0_res = svldff1_gather_offset_f64 (p0, z0, 12)) -+ -+/* -+** ldff1_gather_13_f64_offset: -+** mov (x[0-9]+), #?13 -+** ldff1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_13_f64_offset, svfloat64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_offset_f64 (p0, z0, 13), -+ z0_res = svldff1_gather_offset_f64 (p0, z0, 13)) -+ -+/* -+** ldff1_gather_14_f64_offset: -+** mov (x[0-9]+), #?14 -+** ldff1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_14_f64_offset, svfloat64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_offset_f64 (p0, z0, 14), -+ z0_res = svldff1_gather_offset_f64 (p0, z0, 14)) -+ -+/* -+** ldff1_gather_15_f64_offset: -+** mov (x[0-9]+), #?15 -+** ldff1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_15_f64_offset, svfloat64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_offset_f64 (p0, z0, 15), -+ z0_res = svldff1_gather_offset_f64 (p0, z0, 15)) -+ -+/* -+** ldff1_gather_16_f64_offset: -+** ldff1d z0\.d, p0/z, \[z0\.d, #16\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_16_f64_offset, svfloat64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_offset_f64 (p0, z0, 16), -+ z0_res = svldff1_gather_offset_f64 (p0, z0, 16)) -+ -+/* -+** ldff1_gather_248_f64_offset: -+** ldff1d z0\.d, p0/z, \[z0\.d, #248\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_248_f64_offset, svfloat64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_offset_f64 (p0, z0, 248), -+ z0_res = svldff1_gather_offset_f64 (p0, z0, 248)) -+ -+/* -+** ldff1_gather_256_f64_offset: -+** mov (x[0-9]+), #?256 -+** ldff1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_256_f64_offset, svfloat64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_offset_f64 (p0, z0, 256), -+ z0_res = svldff1_gather_offset_f64 (p0, z0, 256)) -+ -+/* -+** ldff1_gather_x0_f64_index: -+** lsl (x[0-9]+), x0, #?3 -+** ldff1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_x0_f64_index, svfloat64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_index_f64 (p0, z0, x0), -+ z0_res = svldff1_gather_index_f64 (p0, z0, x0)) -+ -+/* -+** ldff1_gather_m1_f64_index: -+** mov (x[0-9]+), #?-8 -+** ldff1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_m1_f64_index, svfloat64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_index_f64 (p0, z0, -1), -+ z0_res = svldff1_gather_index_f64 (p0, z0, -1)) -+ -+/* -+** ldff1_gather_0_f64_index: -+** ldff1d z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_0_f64_index, svfloat64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_index_f64 (p0, z0, 0), -+ z0_res = svldff1_gather_index_f64 (p0, z0, 0)) -+ -+/* -+** ldff1_gather_5_f64_index: -+** ldff1d z0\.d, p0/z, \[z0\.d, #40\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_5_f64_index, svfloat64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_index_f64 (p0, z0, 5), -+ z0_res = svldff1_gather_index_f64 (p0, z0, 5)) -+ -+/* -+** ldff1_gather_31_f64_index: -+** ldff1d z0\.d, p0/z, \[z0\.d, #248\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_31_f64_index, svfloat64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_index_f64 (p0, z0, 31), -+ z0_res = svldff1_gather_index_f64 (p0, z0, 31)) -+ -+/* -+** ldff1_gather_32_f64_index: -+** mov (x[0-9]+), #?256 -+** ldff1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_32_f64_index, svfloat64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_index_f64 (p0, z0, 32), -+ z0_res = svldff1_gather_index_f64 (p0, z0, 32)) -+ -+/* -+** ldff1_gather_x0_f64_s64offset: -+** ldff1d z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_x0_f64_s64offset, svfloat64_t, float64_t, svint64_t, -+ z0_res = svldff1_gather_s64offset_f64 (p0, x0, z0), -+ z0_res = svldff1_gather_offset (p0, x0, z0)) -+ -+/* -+** ldff1_gather_tied1_f64_s64offset: -+** ldff1d z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_tied1_f64_s64offset, svfloat64_t, float64_t, svint64_t, -+ z0_res = svldff1_gather_s64offset_f64 (p0, x0, z0), -+ z0_res = svldff1_gather_offset (p0, x0, z0)) -+ -+/* -+** ldff1_gather_untied_f64_s64offset: -+** ldff1d z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_untied_f64_s64offset, svfloat64_t, float64_t, svint64_t, -+ z0_res = svldff1_gather_s64offset_f64 (p0, x0, z1), -+ z0_res = svldff1_gather_offset (p0, x0, z1)) -+ -+/* -+** ldff1_gather_ext_f64_s64offset: -+** ldff1d z0\.d, p0/z, \[x0, z1\.d, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_ext_f64_s64offset, svfloat64_t, float64_t, svint64_t, -+ z0_res = svldff1_gather_s64offset_f64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svldff1_gather_offset (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ldff1_gather_x0_f64_u64offset: -+** ldff1d z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_x0_f64_u64offset, svfloat64_t, float64_t, svuint64_t, -+ z0_res = svldff1_gather_u64offset_f64 (p0, x0, z0), -+ z0_res = svldff1_gather_offset (p0, x0, z0)) -+ -+/* -+** ldff1_gather_tied1_f64_u64offset: -+** ldff1d z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_tied1_f64_u64offset, svfloat64_t, float64_t, svuint64_t, -+ z0_res = svldff1_gather_u64offset_f64 (p0, x0, z0), -+ z0_res = svldff1_gather_offset (p0, x0, z0)) -+ -+/* -+** ldff1_gather_untied_f64_u64offset: -+** ldff1d z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_untied_f64_u64offset, svfloat64_t, float64_t, svuint64_t, -+ z0_res = svldff1_gather_u64offset_f64 (p0, x0, z1), -+ z0_res = svldff1_gather_offset (p0, x0, z1)) -+ -+/* -+** ldff1_gather_ext_f64_u64offset: -+** ldff1d z0\.d, p0/z, \[x0, z1\.d, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_ext_f64_u64offset, svfloat64_t, float64_t, svuint64_t, -+ z0_res = svldff1_gather_u64offset_f64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svldff1_gather_offset (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ldff1_gather_x0_f64_s64index: -+** ldff1d z0\.d, p0/z, \[x0, z0\.d, lsl 3\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_x0_f64_s64index, svfloat64_t, float64_t, svint64_t, -+ z0_res = svldff1_gather_s64index_f64 (p0, x0, z0), -+ z0_res = svldff1_gather_index (p0, x0, z0)) -+ -+/* -+** ldff1_gather_tied1_f64_s64index: -+** ldff1d z0\.d, p0/z, \[x0, z0\.d, lsl 3\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_tied1_f64_s64index, svfloat64_t, float64_t, svint64_t, -+ z0_res = svldff1_gather_s64index_f64 (p0, x0, z0), -+ z0_res = svldff1_gather_index (p0, x0, z0)) -+ -+/* -+** ldff1_gather_untied_f64_s64index: -+** ldff1d z0\.d, p0/z, \[x0, z1\.d, lsl 3\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_untied_f64_s64index, svfloat64_t, float64_t, svint64_t, -+ z0_res = svldff1_gather_s64index_f64 (p0, x0, z1), -+ z0_res = svldff1_gather_index (p0, x0, z1)) -+ -+/* -+** ldff1_gather_ext_f64_s64index: -+** ldff1d z0\.d, p0/z, \[x0, z1\.d, sxtw 3\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_ext_f64_s64index, svfloat64_t, float64_t, svint64_t, -+ z0_res = svldff1_gather_s64index_f64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svldff1_gather_index (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ldff1_gather_x0_f64_u64index: -+** ldff1d z0\.d, p0/z, \[x0, z0\.d, lsl 3\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_x0_f64_u64index, svfloat64_t, float64_t, svuint64_t, -+ z0_res = svldff1_gather_u64index_f64 (p0, x0, z0), -+ z0_res = svldff1_gather_index (p0, x0, z0)) -+ -+/* -+** ldff1_gather_tied1_f64_u64index: -+** ldff1d z0\.d, p0/z, \[x0, z0\.d, lsl 3\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_tied1_f64_u64index, svfloat64_t, float64_t, svuint64_t, -+ z0_res = svldff1_gather_u64index_f64 (p0, x0, z0), -+ z0_res = svldff1_gather_index (p0, x0, z0)) -+ -+/* -+** ldff1_gather_untied_f64_u64index: -+** ldff1d z0\.d, p0/z, \[x0, z1\.d, lsl 3\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_untied_f64_u64index, svfloat64_t, float64_t, svuint64_t, -+ z0_res = svldff1_gather_u64index_f64 (p0, x0, z1), -+ z0_res = svldff1_gather_index (p0, x0, z1)) -+ -+/* -+** ldff1_gather_ext_f64_u64index: -+** ldff1d z0\.d, p0/z, \[x0, z1\.d, uxtw 3\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_ext_f64_u64index, svfloat64_t, float64_t, svuint64_t, -+ z0_res = svldff1_gather_u64index_f64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svldff1_gather_index (p0, x0, svextw_x (p0, z1))) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_s32.c -new file mode 100644 -index 000000000..66bf0f746 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_s32.c -@@ -0,0 +1,272 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1_gather_s32_tied1: -+** ldff1w z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_s32_tied1, svint32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_s32 (p0, z0), -+ z0_res = svldff1_gather_s32 (p0, z0)) -+ -+/* -+** ldff1_gather_s32_untied: -+** ldff1w z0\.s, p0/z, \[z1\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_s32_untied, svint32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_s32 (p0, z1), -+ z0_res = svldff1_gather_s32 (p0, z1)) -+ -+/* -+** ldff1_gather_x0_s32_offset: -+** ldff1w z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_x0_s32_offset, svint32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_offset_s32 (p0, z0, x0), -+ z0_res = svldff1_gather_offset_s32 (p0, z0, x0)) -+ -+/* -+** ldff1_gather_m4_s32_offset: -+** mov (x[0-9]+), #?-4 -+** ldff1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_m4_s32_offset, svint32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_offset_s32 (p0, z0, -4), -+ z0_res = svldff1_gather_offset_s32 (p0, z0, -4)) -+ -+/* -+** ldff1_gather_0_s32_offset: -+** ldff1w z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_0_s32_offset, svint32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_offset_s32 (p0, z0, 0), -+ z0_res = svldff1_gather_offset_s32 (p0, z0, 0)) -+ -+/* -+** ldff1_gather_5_s32_offset: -+** mov (x[0-9]+), #?5 -+** ldff1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_5_s32_offset, svint32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_offset_s32 (p0, z0, 5), -+ z0_res = svldff1_gather_offset_s32 (p0, z0, 5)) -+ -+/* -+** ldff1_gather_6_s32_offset: -+** mov (x[0-9]+), #?6 -+** ldff1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_6_s32_offset, svint32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_offset_s32 (p0, z0, 6), -+ z0_res = svldff1_gather_offset_s32 (p0, z0, 6)) -+ -+/* -+** ldff1_gather_7_s32_offset: -+** mov (x[0-9]+), #?7 -+** ldff1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_7_s32_offset, svint32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_offset_s32 (p0, z0, 7), -+ z0_res = svldff1_gather_offset_s32 (p0, z0, 7)) -+ -+/* -+** ldff1_gather_8_s32_offset: -+** ldff1w z0\.s, p0/z, \[z0\.s, #8\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_8_s32_offset, svint32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_offset_s32 (p0, z0, 8), -+ z0_res = svldff1_gather_offset_s32 (p0, z0, 8)) -+ -+/* -+** ldff1_gather_124_s32_offset: -+** ldff1w z0\.s, p0/z, \[z0\.s, #124\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_124_s32_offset, svint32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_offset_s32 (p0, z0, 124), -+ z0_res = svldff1_gather_offset_s32 (p0, z0, 124)) -+ -+/* -+** ldff1_gather_128_s32_offset: -+** mov (x[0-9]+), #?128 -+** ldff1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_128_s32_offset, svint32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_offset_s32 (p0, z0, 128), -+ z0_res = svldff1_gather_offset_s32 (p0, z0, 128)) -+ -+/* -+** ldff1_gather_x0_s32_index: -+** lsl (x[0-9]+), x0, #?2 -+** ldff1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_x0_s32_index, svint32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_index_s32 (p0, z0, x0), -+ z0_res = svldff1_gather_index_s32 (p0, z0, x0)) -+ -+/* -+** ldff1_gather_m1_s32_index: -+** mov (x[0-9]+), #?-4 -+** ldff1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_m1_s32_index, svint32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_index_s32 (p0, z0, -1), -+ z0_res = svldff1_gather_index_s32 (p0, z0, -1)) -+ -+/* -+** ldff1_gather_0_s32_index: -+** ldff1w z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_0_s32_index, svint32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_index_s32 (p0, z0, 0), -+ z0_res = svldff1_gather_index_s32 (p0, z0, 0)) -+ -+/* -+** ldff1_gather_5_s32_index: -+** ldff1w z0\.s, p0/z, \[z0\.s, #20\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_5_s32_index, svint32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_index_s32 (p0, z0, 5), -+ z0_res = svldff1_gather_index_s32 (p0, z0, 5)) -+ -+/* -+** ldff1_gather_31_s32_index: -+** ldff1w z0\.s, p0/z, \[z0\.s, #124\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_31_s32_index, svint32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_index_s32 (p0, z0, 31), -+ z0_res = svldff1_gather_index_s32 (p0, z0, 31)) -+ -+/* -+** ldff1_gather_32_s32_index: -+** mov (x[0-9]+), #?128 -+** ldff1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_32_s32_index, svint32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_index_s32 (p0, z0, 32), -+ z0_res = svldff1_gather_index_s32 (p0, z0, 32)) -+ -+/* -+** ldff1_gather_x0_s32_s32offset: -+** ldff1w z0\.s, p0/z, \[x0, z0\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_x0_s32_s32offset, svint32_t, int32_t, svint32_t, -+ z0_res = svldff1_gather_s32offset_s32 (p0, x0, z0), -+ z0_res = svldff1_gather_offset (p0, x0, z0)) -+ -+/* -+** ldff1_gather_tied1_s32_s32offset: -+** ldff1w z0\.s, p0/z, \[x0, z0\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_tied1_s32_s32offset, svint32_t, int32_t, svint32_t, -+ z0_res = svldff1_gather_s32offset_s32 (p0, x0, z0), -+ z0_res = svldff1_gather_offset (p0, x0, z0)) -+ -+/* -+** ldff1_gather_untied_s32_s32offset: -+** ldff1w z0\.s, p0/z, \[x0, z1\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_untied_s32_s32offset, svint32_t, int32_t, svint32_t, -+ z0_res = svldff1_gather_s32offset_s32 (p0, x0, z1), -+ z0_res = svldff1_gather_offset (p0, x0, z1)) -+ -+/* -+** ldff1_gather_x0_s32_u32offset: -+** ldff1w z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_x0_s32_u32offset, svint32_t, int32_t, svuint32_t, -+ z0_res = svldff1_gather_u32offset_s32 (p0, x0, z0), -+ z0_res = svldff1_gather_offset (p0, x0, z0)) -+ -+/* -+** ldff1_gather_tied1_s32_u32offset: -+** ldff1w z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_tied1_s32_u32offset, svint32_t, int32_t, svuint32_t, -+ z0_res = svldff1_gather_u32offset_s32 (p0, x0, z0), -+ z0_res = svldff1_gather_offset (p0, x0, z0)) -+ -+/* -+** ldff1_gather_untied_s32_u32offset: -+** ldff1w z0\.s, p0/z, \[x0, z1\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_untied_s32_u32offset, svint32_t, int32_t, svuint32_t, -+ z0_res = svldff1_gather_u32offset_s32 (p0, x0, z1), -+ z0_res = svldff1_gather_offset (p0, x0, z1)) -+ -+/* -+** ldff1_gather_x0_s32_s32index: -+** ldff1w z0\.s, p0/z, \[x0, z0\.s, sxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_x0_s32_s32index, svint32_t, int32_t, svint32_t, -+ z0_res = svldff1_gather_s32index_s32 (p0, x0, z0), -+ z0_res = svldff1_gather_index (p0, x0, z0)) -+ -+/* -+** ldff1_gather_tied1_s32_s32index: -+** ldff1w z0\.s, p0/z, \[x0, z0\.s, sxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_tied1_s32_s32index, svint32_t, int32_t, svint32_t, -+ z0_res = svldff1_gather_s32index_s32 (p0, x0, z0), -+ z0_res = svldff1_gather_index (p0, x0, z0)) -+ -+/* -+** ldff1_gather_untied_s32_s32index: -+** ldff1w z0\.s, p0/z, \[x0, z1\.s, sxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_untied_s32_s32index, svint32_t, int32_t, svint32_t, -+ z0_res = svldff1_gather_s32index_s32 (p0, x0, z1), -+ z0_res = svldff1_gather_index (p0, x0, z1)) -+ -+/* -+** ldff1_gather_x0_s32_u32index: -+** ldff1w z0\.s, p0/z, \[x0, z0\.s, uxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_x0_s32_u32index, svint32_t, int32_t, svuint32_t, -+ z0_res = svldff1_gather_u32index_s32 (p0, x0, z0), -+ z0_res = svldff1_gather_index (p0, x0, z0)) -+ -+/* -+** ldff1_gather_tied1_s32_u32index: -+** ldff1w z0\.s, p0/z, \[x0, z0\.s, uxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_tied1_s32_u32index, svint32_t, int32_t, svuint32_t, -+ z0_res = svldff1_gather_u32index_s32 (p0, x0, z0), -+ z0_res = svldff1_gather_index (p0, x0, z0)) -+ -+/* -+** ldff1_gather_untied_s32_u32index: -+** ldff1w z0\.s, p0/z, \[x0, z1\.s, uxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_untied_s32_u32index, svint32_t, int32_t, svuint32_t, -+ z0_res = svldff1_gather_u32index_s32 (p0, x0, z1), -+ z0_res = svldff1_gather_index (p0, x0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_s64.c -new file mode 100644 -index 000000000..faf71bf9d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_s64.c -@@ -0,0 +1,348 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1_gather_s64_tied1: -+** ldff1d z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_s64_tied1, svint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_s64 (p0, z0), -+ z0_res = svldff1_gather_s64 (p0, z0)) -+ -+/* -+** ldff1_gather_s64_untied: -+** ldff1d z0\.d, p0/z, \[z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_s64_untied, svint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_s64 (p0, z1), -+ z0_res = svldff1_gather_s64 (p0, z1)) -+ -+/* -+** ldff1_gather_x0_s64_offset: -+** ldff1d z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_x0_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_offset_s64 (p0, z0, x0), -+ z0_res = svldff1_gather_offset_s64 (p0, z0, x0)) -+ -+/* -+** ldff1_gather_m8_s64_offset: -+** mov (x[0-9]+), #?-8 -+** ldff1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_m8_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_offset_s64 (p0, z0, -8), -+ z0_res = svldff1_gather_offset_s64 (p0, z0, -8)) -+ -+/* -+** ldff1_gather_0_s64_offset: -+** ldff1d z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_0_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_offset_s64 (p0, z0, 0), -+ z0_res = svldff1_gather_offset_s64 (p0, z0, 0)) -+ -+/* -+** ldff1_gather_9_s64_offset: -+** mov (x[0-9]+), #?9 -+** ldff1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_9_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_offset_s64 (p0, z0, 9), -+ z0_res = svldff1_gather_offset_s64 (p0, z0, 9)) -+ -+/* -+** ldff1_gather_10_s64_offset: -+** mov (x[0-9]+), #?10 -+** ldff1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_10_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_offset_s64 (p0, z0, 10), -+ z0_res = svldff1_gather_offset_s64 (p0, z0, 10)) -+ -+/* -+** ldff1_gather_11_s64_offset: -+** mov (x[0-9]+), #?11 -+** ldff1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_11_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_offset_s64 (p0, z0, 11), -+ z0_res = svldff1_gather_offset_s64 (p0, z0, 11)) -+ -+/* -+** ldff1_gather_12_s64_offset: -+** mov (x[0-9]+), #?12 -+** ldff1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_12_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_offset_s64 (p0, z0, 12), -+ z0_res = svldff1_gather_offset_s64 (p0, z0, 12)) -+ -+/* -+** ldff1_gather_13_s64_offset: -+** mov (x[0-9]+), #?13 -+** ldff1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_13_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_offset_s64 (p0, z0, 13), -+ z0_res = svldff1_gather_offset_s64 (p0, z0, 13)) -+ -+/* -+** ldff1_gather_14_s64_offset: -+** mov (x[0-9]+), #?14 -+** ldff1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_14_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_offset_s64 (p0, z0, 14), -+ z0_res = svldff1_gather_offset_s64 (p0, z0, 14)) -+ -+/* -+** ldff1_gather_15_s64_offset: -+** mov (x[0-9]+), #?15 -+** ldff1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_15_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_offset_s64 (p0, z0, 15), -+ z0_res = svldff1_gather_offset_s64 (p0, z0, 15)) -+ -+/* -+** ldff1_gather_16_s64_offset: -+** ldff1d z0\.d, p0/z, \[z0\.d, #16\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_16_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_offset_s64 (p0, z0, 16), -+ z0_res = svldff1_gather_offset_s64 (p0, z0, 16)) -+ -+/* -+** ldff1_gather_248_s64_offset: -+** ldff1d z0\.d, p0/z, \[z0\.d, #248\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_248_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_offset_s64 (p0, z0, 248), -+ z0_res = svldff1_gather_offset_s64 (p0, z0, 248)) -+ -+/* -+** ldff1_gather_256_s64_offset: -+** mov (x[0-9]+), #?256 -+** ldff1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_256_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_offset_s64 (p0, z0, 256), -+ z0_res = svldff1_gather_offset_s64 (p0, z0, 256)) -+ -+/* -+** ldff1_gather_x0_s64_index: -+** lsl (x[0-9]+), x0, #?3 -+** ldff1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_x0_s64_index, svint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_index_s64 (p0, z0, x0), -+ z0_res = svldff1_gather_index_s64 (p0, z0, x0)) -+ -+/* -+** ldff1_gather_m1_s64_index: -+** mov (x[0-9]+), #?-8 -+** ldff1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_m1_s64_index, svint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_index_s64 (p0, z0, -1), -+ z0_res = svldff1_gather_index_s64 (p0, z0, -1)) -+ -+/* -+** ldff1_gather_0_s64_index: -+** ldff1d z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_0_s64_index, svint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_index_s64 (p0, z0, 0), -+ z0_res = svldff1_gather_index_s64 (p0, z0, 0)) -+ -+/* -+** ldff1_gather_5_s64_index: -+** ldff1d z0\.d, p0/z, \[z0\.d, #40\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_5_s64_index, svint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_index_s64 (p0, z0, 5), -+ z0_res = svldff1_gather_index_s64 (p0, z0, 5)) -+ -+/* -+** ldff1_gather_31_s64_index: -+** ldff1d z0\.d, p0/z, \[z0\.d, #248\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_31_s64_index, svint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_index_s64 (p0, z0, 31), -+ z0_res = svldff1_gather_index_s64 (p0, z0, 31)) -+ -+/* -+** ldff1_gather_32_s64_index: -+** mov (x[0-9]+), #?256 -+** ldff1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_32_s64_index, svint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_index_s64 (p0, z0, 32), -+ z0_res = svldff1_gather_index_s64 (p0, z0, 32)) -+ -+/* -+** ldff1_gather_x0_s64_s64offset: -+** ldff1d z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_x0_s64_s64offset, svint64_t, int64_t, svint64_t, -+ z0_res = svldff1_gather_s64offset_s64 (p0, x0, z0), -+ z0_res = svldff1_gather_offset (p0, x0, z0)) -+ -+/* -+** ldff1_gather_tied1_s64_s64offset: -+** ldff1d z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_tied1_s64_s64offset, svint64_t, int64_t, svint64_t, -+ z0_res = svldff1_gather_s64offset_s64 (p0, x0, z0), -+ z0_res = svldff1_gather_offset (p0, x0, z0)) -+ -+/* -+** ldff1_gather_untied_s64_s64offset: -+** ldff1d z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_untied_s64_s64offset, svint64_t, int64_t, svint64_t, -+ z0_res = svldff1_gather_s64offset_s64 (p0, x0, z1), -+ z0_res = svldff1_gather_offset (p0, x0, z1)) -+ -+/* -+** ldff1_gather_ext_s64_s64offset: -+** ldff1d z0\.d, p0/z, \[x0, z1\.d, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_ext_s64_s64offset, svint64_t, int64_t, svint64_t, -+ z0_res = svldff1_gather_s64offset_s64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svldff1_gather_offset (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ldff1_gather_x0_s64_u64offset: -+** ldff1d z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_x0_s64_u64offset, svint64_t, int64_t, svuint64_t, -+ z0_res = svldff1_gather_u64offset_s64 (p0, x0, z0), -+ z0_res = svldff1_gather_offset (p0, x0, z0)) -+ -+/* -+** ldff1_gather_tied1_s64_u64offset: -+** ldff1d z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_tied1_s64_u64offset, svint64_t, int64_t, svuint64_t, -+ z0_res = svldff1_gather_u64offset_s64 (p0, x0, z0), -+ z0_res = svldff1_gather_offset (p0, x0, z0)) -+ -+/* -+** ldff1_gather_untied_s64_u64offset: -+** ldff1d z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_untied_s64_u64offset, svint64_t, int64_t, svuint64_t, -+ z0_res = svldff1_gather_u64offset_s64 (p0, x0, z1), -+ z0_res = svldff1_gather_offset (p0, x0, z1)) -+ -+/* -+** ldff1_gather_ext_s64_u64offset: -+** ldff1d z0\.d, p0/z, \[x0, z1\.d, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_ext_s64_u64offset, svint64_t, int64_t, svuint64_t, -+ z0_res = svldff1_gather_u64offset_s64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svldff1_gather_offset (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ldff1_gather_x0_s64_s64index: -+** ldff1d z0\.d, p0/z, \[x0, z0\.d, lsl 3\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_x0_s64_s64index, svint64_t, int64_t, svint64_t, -+ z0_res = svldff1_gather_s64index_s64 (p0, x0, z0), -+ z0_res = svldff1_gather_index (p0, x0, z0)) -+ -+/* -+** ldff1_gather_tied1_s64_s64index: -+** ldff1d z0\.d, p0/z, \[x0, z0\.d, lsl 3\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_tied1_s64_s64index, svint64_t, int64_t, svint64_t, -+ z0_res = svldff1_gather_s64index_s64 (p0, x0, z0), -+ z0_res = svldff1_gather_index (p0, x0, z0)) -+ -+/* -+** ldff1_gather_untied_s64_s64index: -+** ldff1d z0\.d, p0/z, \[x0, z1\.d, lsl 3\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_untied_s64_s64index, svint64_t, int64_t, svint64_t, -+ z0_res = svldff1_gather_s64index_s64 (p0, x0, z1), -+ z0_res = svldff1_gather_index (p0, x0, z1)) -+ -+/* -+** ldff1_gather_ext_s64_s64index: -+** ldff1d z0\.d, p0/z, \[x0, z1\.d, sxtw 3\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_ext_s64_s64index, svint64_t, int64_t, svint64_t, -+ z0_res = svldff1_gather_s64index_s64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svldff1_gather_index (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ldff1_gather_x0_s64_u64index: -+** ldff1d z0\.d, p0/z, \[x0, z0\.d, lsl 3\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_x0_s64_u64index, svint64_t, int64_t, svuint64_t, -+ z0_res = svldff1_gather_u64index_s64 (p0, x0, z0), -+ z0_res = svldff1_gather_index (p0, x0, z0)) -+ -+/* -+** ldff1_gather_tied1_s64_u64index: -+** ldff1d z0\.d, p0/z, \[x0, z0\.d, lsl 3\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_tied1_s64_u64index, svint64_t, int64_t, svuint64_t, -+ z0_res = svldff1_gather_u64index_s64 (p0, x0, z0), -+ z0_res = svldff1_gather_index (p0, x0, z0)) -+ -+/* -+** ldff1_gather_untied_s64_u64index: -+** ldff1d z0\.d, p0/z, \[x0, z1\.d, lsl 3\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_untied_s64_u64index, svint64_t, int64_t, svuint64_t, -+ z0_res = svldff1_gather_u64index_s64 (p0, x0, z1), -+ z0_res = svldff1_gather_index (p0, x0, z1)) -+ -+/* -+** ldff1_gather_ext_s64_u64index: -+** ldff1d z0\.d, p0/z, \[x0, z1\.d, uxtw 3\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_ext_s64_u64index, svint64_t, int64_t, svuint64_t, -+ z0_res = svldff1_gather_u64index_s64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svldff1_gather_index (p0, x0, svextw_x (p0, z1))) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_u32.c -new file mode 100644 -index 000000000..41c7dc9cf ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_u32.c -@@ -0,0 +1,272 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1_gather_u32_tied1: -+** ldff1w z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_u32_tied1, svuint32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_u32 (p0, z0), -+ z0_res = svldff1_gather_u32 (p0, z0)) -+ -+/* -+** ldff1_gather_u32_untied: -+** ldff1w z0\.s, p0/z, \[z1\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_u32_untied, svuint32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_u32 (p0, z1), -+ z0_res = svldff1_gather_u32 (p0, z1)) -+ -+/* -+** ldff1_gather_x0_u32_offset: -+** ldff1w z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_x0_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_offset_u32 (p0, z0, x0), -+ z0_res = svldff1_gather_offset_u32 (p0, z0, x0)) -+ -+/* -+** ldff1_gather_m4_u32_offset: -+** mov (x[0-9]+), #?-4 -+** ldff1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_m4_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_offset_u32 (p0, z0, -4), -+ z0_res = svldff1_gather_offset_u32 (p0, z0, -4)) -+ -+/* -+** ldff1_gather_0_u32_offset: -+** ldff1w z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_0_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_offset_u32 (p0, z0, 0), -+ z0_res = svldff1_gather_offset_u32 (p0, z0, 0)) -+ -+/* -+** ldff1_gather_5_u32_offset: -+** mov (x[0-9]+), #?5 -+** ldff1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_5_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_offset_u32 (p0, z0, 5), -+ z0_res = svldff1_gather_offset_u32 (p0, z0, 5)) -+ -+/* -+** ldff1_gather_6_u32_offset: -+** mov (x[0-9]+), #?6 -+** ldff1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_6_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_offset_u32 (p0, z0, 6), -+ z0_res = svldff1_gather_offset_u32 (p0, z0, 6)) -+ -+/* -+** ldff1_gather_7_u32_offset: -+** mov (x[0-9]+), #?7 -+** ldff1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_7_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_offset_u32 (p0, z0, 7), -+ z0_res = svldff1_gather_offset_u32 (p0, z0, 7)) -+ -+/* -+** ldff1_gather_8_u32_offset: -+** ldff1w z0\.s, p0/z, \[z0\.s, #8\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_8_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_offset_u32 (p0, z0, 8), -+ z0_res = svldff1_gather_offset_u32 (p0, z0, 8)) -+ -+/* -+** ldff1_gather_124_u32_offset: -+** ldff1w z0\.s, p0/z, \[z0\.s, #124\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_124_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_offset_u32 (p0, z0, 124), -+ z0_res = svldff1_gather_offset_u32 (p0, z0, 124)) -+ -+/* -+** ldff1_gather_128_u32_offset: -+** mov (x[0-9]+), #?128 -+** ldff1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_128_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_offset_u32 (p0, z0, 128), -+ z0_res = svldff1_gather_offset_u32 (p0, z0, 128)) -+ -+/* -+** ldff1_gather_x0_u32_index: -+** lsl (x[0-9]+), x0, #?2 -+** ldff1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_x0_u32_index, svuint32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_index_u32 (p0, z0, x0), -+ z0_res = svldff1_gather_index_u32 (p0, z0, x0)) -+ -+/* -+** ldff1_gather_m1_u32_index: -+** mov (x[0-9]+), #?-4 -+** ldff1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_m1_u32_index, svuint32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_index_u32 (p0, z0, -1), -+ z0_res = svldff1_gather_index_u32 (p0, z0, -1)) -+ -+/* -+** ldff1_gather_0_u32_index: -+** ldff1w z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_0_u32_index, svuint32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_index_u32 (p0, z0, 0), -+ z0_res = svldff1_gather_index_u32 (p0, z0, 0)) -+ -+/* -+** ldff1_gather_5_u32_index: -+** ldff1w z0\.s, p0/z, \[z0\.s, #20\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_5_u32_index, svuint32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_index_u32 (p0, z0, 5), -+ z0_res = svldff1_gather_index_u32 (p0, z0, 5)) -+ -+/* -+** ldff1_gather_31_u32_index: -+** ldff1w z0\.s, p0/z, \[z0\.s, #124\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_31_u32_index, svuint32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_index_u32 (p0, z0, 31), -+ z0_res = svldff1_gather_index_u32 (p0, z0, 31)) -+ -+/* -+** ldff1_gather_32_u32_index: -+** mov (x[0-9]+), #?128 -+** ldff1w z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_32_u32_index, svuint32_t, svuint32_t, -+ z0_res = svldff1_gather_u32base_index_u32 (p0, z0, 32), -+ z0_res = svldff1_gather_index_u32 (p0, z0, 32)) -+ -+/* -+** ldff1_gather_x0_u32_s32offset: -+** ldff1w z0\.s, p0/z, \[x0, z0\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_x0_u32_s32offset, svuint32_t, uint32_t, svint32_t, -+ z0_res = svldff1_gather_s32offset_u32 (p0, x0, z0), -+ z0_res = svldff1_gather_offset (p0, x0, z0)) -+ -+/* -+** ldff1_gather_tied1_u32_s32offset: -+** ldff1w z0\.s, p0/z, \[x0, z0\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_tied1_u32_s32offset, svuint32_t, uint32_t, svint32_t, -+ z0_res = svldff1_gather_s32offset_u32 (p0, x0, z0), -+ z0_res = svldff1_gather_offset (p0, x0, z0)) -+ -+/* -+** ldff1_gather_untied_u32_s32offset: -+** ldff1w z0\.s, p0/z, \[x0, z1\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_untied_u32_s32offset, svuint32_t, uint32_t, svint32_t, -+ z0_res = svldff1_gather_s32offset_u32 (p0, x0, z1), -+ z0_res = svldff1_gather_offset (p0, x0, z1)) -+ -+/* -+** ldff1_gather_x0_u32_u32offset: -+** ldff1w z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_x0_u32_u32offset, svuint32_t, uint32_t, svuint32_t, -+ z0_res = svldff1_gather_u32offset_u32 (p0, x0, z0), -+ z0_res = svldff1_gather_offset (p0, x0, z0)) -+ -+/* -+** ldff1_gather_tied1_u32_u32offset: -+** ldff1w z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_tied1_u32_u32offset, svuint32_t, uint32_t, svuint32_t, -+ z0_res = svldff1_gather_u32offset_u32 (p0, x0, z0), -+ z0_res = svldff1_gather_offset (p0, x0, z0)) -+ -+/* -+** ldff1_gather_untied_u32_u32offset: -+** ldff1w z0\.s, p0/z, \[x0, z1\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_untied_u32_u32offset, svuint32_t, uint32_t, svuint32_t, -+ z0_res = svldff1_gather_u32offset_u32 (p0, x0, z1), -+ z0_res = svldff1_gather_offset (p0, x0, z1)) -+ -+/* -+** ldff1_gather_x0_u32_s32index: -+** ldff1w z0\.s, p0/z, \[x0, z0\.s, sxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_x0_u32_s32index, svuint32_t, uint32_t, svint32_t, -+ z0_res = svldff1_gather_s32index_u32 (p0, x0, z0), -+ z0_res = svldff1_gather_index (p0, x0, z0)) -+ -+/* -+** ldff1_gather_tied1_u32_s32index: -+** ldff1w z0\.s, p0/z, \[x0, z0\.s, sxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_tied1_u32_s32index, svuint32_t, uint32_t, svint32_t, -+ z0_res = svldff1_gather_s32index_u32 (p0, x0, z0), -+ z0_res = svldff1_gather_index (p0, x0, z0)) -+ -+/* -+** ldff1_gather_untied_u32_s32index: -+** ldff1w z0\.s, p0/z, \[x0, z1\.s, sxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_untied_u32_s32index, svuint32_t, uint32_t, svint32_t, -+ z0_res = svldff1_gather_s32index_u32 (p0, x0, z1), -+ z0_res = svldff1_gather_index (p0, x0, z1)) -+ -+/* -+** ldff1_gather_x0_u32_u32index: -+** ldff1w z0\.s, p0/z, \[x0, z0\.s, uxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_x0_u32_u32index, svuint32_t, uint32_t, svuint32_t, -+ z0_res = svldff1_gather_u32index_u32 (p0, x0, z0), -+ z0_res = svldff1_gather_index (p0, x0, z0)) -+ -+/* -+** ldff1_gather_tied1_u32_u32index: -+** ldff1w z0\.s, p0/z, \[x0, z0\.s, uxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_tied1_u32_u32index, svuint32_t, uint32_t, svuint32_t, -+ z0_res = svldff1_gather_u32index_u32 (p0, x0, z0), -+ z0_res = svldff1_gather_index (p0, x0, z0)) -+ -+/* -+** ldff1_gather_untied_u32_u32index: -+** ldff1w z0\.s, p0/z, \[x0, z1\.s, uxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_untied_u32_u32index, svuint32_t, uint32_t, svuint32_t, -+ z0_res = svldff1_gather_u32index_u32 (p0, x0, z1), -+ z0_res = svldff1_gather_index (p0, x0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_u64.c -new file mode 100644 -index 000000000..8b53ce94f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_gather_u64.c -@@ -0,0 +1,348 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1_gather_u64_tied1: -+** ldff1d z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_u64_tied1, svuint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_u64 (p0, z0), -+ z0_res = svldff1_gather_u64 (p0, z0)) -+ -+/* -+** ldff1_gather_u64_untied: -+** ldff1d z0\.d, p0/z, \[z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_u64_untied, svuint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_u64 (p0, z1), -+ z0_res = svldff1_gather_u64 (p0, z1)) -+ -+/* -+** ldff1_gather_x0_u64_offset: -+** ldff1d z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_x0_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_offset_u64 (p0, z0, x0), -+ z0_res = svldff1_gather_offset_u64 (p0, z0, x0)) -+ -+/* -+** ldff1_gather_m8_u64_offset: -+** mov (x[0-9]+), #?-8 -+** ldff1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_m8_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_offset_u64 (p0, z0, -8), -+ z0_res = svldff1_gather_offset_u64 (p0, z0, -8)) -+ -+/* -+** ldff1_gather_0_u64_offset: -+** ldff1d z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_0_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_offset_u64 (p0, z0, 0), -+ z0_res = svldff1_gather_offset_u64 (p0, z0, 0)) -+ -+/* -+** ldff1_gather_9_u64_offset: -+** mov (x[0-9]+), #?9 -+** ldff1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_9_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_offset_u64 (p0, z0, 9), -+ z0_res = svldff1_gather_offset_u64 (p0, z0, 9)) -+ -+/* -+** ldff1_gather_10_u64_offset: -+** mov (x[0-9]+), #?10 -+** ldff1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_10_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_offset_u64 (p0, z0, 10), -+ z0_res = svldff1_gather_offset_u64 (p0, z0, 10)) -+ -+/* -+** ldff1_gather_11_u64_offset: -+** mov (x[0-9]+), #?11 -+** ldff1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_11_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_offset_u64 (p0, z0, 11), -+ z0_res = svldff1_gather_offset_u64 (p0, z0, 11)) -+ -+/* -+** ldff1_gather_12_u64_offset: -+** mov (x[0-9]+), #?12 -+** ldff1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_12_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_offset_u64 (p0, z0, 12), -+ z0_res = svldff1_gather_offset_u64 (p0, z0, 12)) -+ -+/* -+** ldff1_gather_13_u64_offset: -+** mov (x[0-9]+), #?13 -+** ldff1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_13_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_offset_u64 (p0, z0, 13), -+ z0_res = svldff1_gather_offset_u64 (p0, z0, 13)) -+ -+/* -+** ldff1_gather_14_u64_offset: -+** mov (x[0-9]+), #?14 -+** ldff1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_14_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_offset_u64 (p0, z0, 14), -+ z0_res = svldff1_gather_offset_u64 (p0, z0, 14)) -+ -+/* -+** ldff1_gather_15_u64_offset: -+** mov (x[0-9]+), #?15 -+** ldff1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_15_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_offset_u64 (p0, z0, 15), -+ z0_res = svldff1_gather_offset_u64 (p0, z0, 15)) -+ -+/* -+** ldff1_gather_16_u64_offset: -+** ldff1d z0\.d, p0/z, \[z0\.d, #16\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_16_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_offset_u64 (p0, z0, 16), -+ z0_res = svldff1_gather_offset_u64 (p0, z0, 16)) -+ -+/* -+** ldff1_gather_248_u64_offset: -+** ldff1d z0\.d, p0/z, \[z0\.d, #248\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_248_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_offset_u64 (p0, z0, 248), -+ z0_res = svldff1_gather_offset_u64 (p0, z0, 248)) -+ -+/* -+** ldff1_gather_256_u64_offset: -+** mov (x[0-9]+), #?256 -+** ldff1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_256_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_offset_u64 (p0, z0, 256), -+ z0_res = svldff1_gather_offset_u64 (p0, z0, 256)) -+ -+/* -+** ldff1_gather_x0_u64_index: -+** lsl (x[0-9]+), x0, #?3 -+** ldff1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_x0_u64_index, svuint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_index_u64 (p0, z0, x0), -+ z0_res = svldff1_gather_index_u64 (p0, z0, x0)) -+ -+/* -+** ldff1_gather_m1_u64_index: -+** mov (x[0-9]+), #?-8 -+** ldff1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_m1_u64_index, svuint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_index_u64 (p0, z0, -1), -+ z0_res = svldff1_gather_index_u64 (p0, z0, -1)) -+ -+/* -+** ldff1_gather_0_u64_index: -+** ldff1d z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_0_u64_index, svuint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_index_u64 (p0, z0, 0), -+ z0_res = svldff1_gather_index_u64 (p0, z0, 0)) -+ -+/* -+** ldff1_gather_5_u64_index: -+** ldff1d z0\.d, p0/z, \[z0\.d, #40\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_5_u64_index, svuint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_index_u64 (p0, z0, 5), -+ z0_res = svldff1_gather_index_u64 (p0, z0, 5)) -+ -+/* -+** ldff1_gather_31_u64_index: -+** ldff1d z0\.d, p0/z, \[z0\.d, #248\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_31_u64_index, svuint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_index_u64 (p0, z0, 31), -+ z0_res = svldff1_gather_index_u64 (p0, z0, 31)) -+ -+/* -+** ldff1_gather_32_u64_index: -+** mov (x[0-9]+), #?256 -+** ldff1d z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1_gather_32_u64_index, svuint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64base_index_u64 (p0, z0, 32), -+ z0_res = svldff1_gather_index_u64 (p0, z0, 32)) -+ -+/* -+** ldff1_gather_x0_u64_s64offset: -+** ldff1d z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_x0_u64_s64offset, svuint64_t, uint64_t, svint64_t, -+ z0_res = svldff1_gather_s64offset_u64 (p0, x0, z0), -+ z0_res = svldff1_gather_offset (p0, x0, z0)) -+ -+/* -+** ldff1_gather_tied1_u64_s64offset: -+** ldff1d z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_tied1_u64_s64offset, svuint64_t, uint64_t, svint64_t, -+ z0_res = svldff1_gather_s64offset_u64 (p0, x0, z0), -+ z0_res = svldff1_gather_offset (p0, x0, z0)) -+ -+/* -+** ldff1_gather_untied_u64_s64offset: -+** ldff1d z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_untied_u64_s64offset, svuint64_t, uint64_t, svint64_t, -+ z0_res = svldff1_gather_s64offset_u64 (p0, x0, z1), -+ z0_res = svldff1_gather_offset (p0, x0, z1)) -+ -+/* -+** ldff1_gather_ext_u64_s64offset: -+** ldff1d z0\.d, p0/z, \[x0, z1\.d, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_ext_u64_s64offset, svuint64_t, uint64_t, svint64_t, -+ z0_res = svldff1_gather_s64offset_u64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svldff1_gather_offset (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ldff1_gather_x0_u64_u64offset: -+** ldff1d z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_x0_u64_u64offset, svuint64_t, uint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64offset_u64 (p0, x0, z0), -+ z0_res = svldff1_gather_offset (p0, x0, z0)) -+ -+/* -+** ldff1_gather_tied1_u64_u64offset: -+** ldff1d z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_tied1_u64_u64offset, svuint64_t, uint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64offset_u64 (p0, x0, z0), -+ z0_res = svldff1_gather_offset (p0, x0, z0)) -+ -+/* -+** ldff1_gather_untied_u64_u64offset: -+** ldff1d z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_untied_u64_u64offset, svuint64_t, uint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64offset_u64 (p0, x0, z1), -+ z0_res = svldff1_gather_offset (p0, x0, z1)) -+ -+/* -+** ldff1_gather_ext_u64_u64offset: -+** ldff1d z0\.d, p0/z, \[x0, z1\.d, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_ext_u64_u64offset, svuint64_t, uint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64offset_u64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svldff1_gather_offset (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ldff1_gather_x0_u64_s64index: -+** ldff1d z0\.d, p0/z, \[x0, z0\.d, lsl 3\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_x0_u64_s64index, svuint64_t, uint64_t, svint64_t, -+ z0_res = svldff1_gather_s64index_u64 (p0, x0, z0), -+ z0_res = svldff1_gather_index (p0, x0, z0)) -+ -+/* -+** ldff1_gather_tied1_u64_s64index: -+** ldff1d z0\.d, p0/z, \[x0, z0\.d, lsl 3\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_tied1_u64_s64index, svuint64_t, uint64_t, svint64_t, -+ z0_res = svldff1_gather_s64index_u64 (p0, x0, z0), -+ z0_res = svldff1_gather_index (p0, x0, z0)) -+ -+/* -+** ldff1_gather_untied_u64_s64index: -+** ldff1d z0\.d, p0/z, \[x0, z1\.d, lsl 3\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_untied_u64_s64index, svuint64_t, uint64_t, svint64_t, -+ z0_res = svldff1_gather_s64index_u64 (p0, x0, z1), -+ z0_res = svldff1_gather_index (p0, x0, z1)) -+ -+/* -+** ldff1_gather_ext_u64_s64index: -+** ldff1d z0\.d, p0/z, \[x0, z1\.d, sxtw 3\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_ext_u64_s64index, svuint64_t, uint64_t, svint64_t, -+ z0_res = svldff1_gather_s64index_u64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svldff1_gather_index (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ldff1_gather_x0_u64_u64index: -+** ldff1d z0\.d, p0/z, \[x0, z0\.d, lsl 3\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_x0_u64_u64index, svuint64_t, uint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64index_u64 (p0, x0, z0), -+ z0_res = svldff1_gather_index (p0, x0, z0)) -+ -+/* -+** ldff1_gather_tied1_u64_u64index: -+** ldff1d z0\.d, p0/z, \[x0, z0\.d, lsl 3\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_tied1_u64_u64index, svuint64_t, uint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64index_u64 (p0, x0, z0), -+ z0_res = svldff1_gather_index (p0, x0, z0)) -+ -+/* -+** ldff1_gather_untied_u64_u64index: -+** ldff1d z0\.d, p0/z, \[x0, z1\.d, lsl 3\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_untied_u64_u64index, svuint64_t, uint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64index_u64 (p0, x0, z1), -+ z0_res = svldff1_gather_index (p0, x0, z1)) -+ -+/* -+** ldff1_gather_ext_u64_u64index: -+** ldff1d z0\.d, p0/z, \[x0, z1\.d, uxtw 3\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1_gather_ext_u64_u64index, svuint64_t, uint64_t, svuint64_t, -+ z0_res = svldff1_gather_u64index_u64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svldff1_gather_index (p0, x0, svextw_x (p0, z1))) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s16.c -new file mode 100644 -index 000000000..1d5fde0e6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s16.c -@@ -0,0 +1,86 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1_s16_base: -+** ldff1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_s16_base, svint16_t, int16_t, -+ z0 = svldff1_s16 (p0, x0), -+ z0 = svldff1 (p0, x0)) -+ -+/* -+** ldff1_s16_index: -+** ldff1h z0\.h, p0/z, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_LOAD (ldff1_s16_index, svint16_t, int16_t, -+ z0 = svldff1_s16 (p0, x0 + x1), -+ z0 = svldff1 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1_s16_1: -+** incb x0 -+** ldff1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_s16_1, svint16_t, int16_t, -+ z0 = svldff1_s16 (p0, x0 + svcnth ()), -+ z0 = svldff1 (p0, x0 + svcnth ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1_s16_m1: -+** decb x0 -+** ldff1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_s16_m1, svint16_t, int16_t, -+ z0 = svldff1_s16 (p0, x0 - svcnth ()), -+ z0 = svldff1 (p0, x0 - svcnth ())) -+ -+/* -+** ldff1_vnum_s16_0: -+** ldff1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_vnum_s16_0, svint16_t, int16_t, -+ z0 = svldff1_vnum_s16 (p0, x0, 0), -+ z0 = svldff1_vnum (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1_vnum_s16_1: -+** incb x0 -+** ldff1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_vnum_s16_1, svint16_t, int16_t, -+ z0 = svldff1_vnum_s16 (p0, x0, 1), -+ z0 = svldff1_vnum (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1_vnum_s16_m1: -+** decb x0 -+** ldff1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_vnum_s16_m1, svint16_t, int16_t, -+ z0 = svldff1_vnum_s16 (p0, x0, -1), -+ z0 = svldff1_vnum (p0, x0, -1)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ldff1_vnum_s16_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ldff1h z0\.h, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldff1_vnum_s16_x1, svint16_t, int16_t, -+ z0 = svldff1_vnum_s16 (p0, x0, x1), -+ z0 = svldff1_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s32.c -new file mode 100644 -index 000000000..97a36e884 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s32.c -@@ -0,0 +1,86 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1_s32_base: -+** ldff1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_s32_base, svint32_t, int32_t, -+ z0 = svldff1_s32 (p0, x0), -+ z0 = svldff1 (p0, x0)) -+ -+/* -+** ldff1_s32_index: -+** ldff1w z0\.s, p0/z, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_LOAD (ldff1_s32_index, svint32_t, int32_t, -+ z0 = svldff1_s32 (p0, x0 + x1), -+ z0 = svldff1 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1_s32_1: -+** incb x0 -+** ldff1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_s32_1, svint32_t, int32_t, -+ z0 = svldff1_s32 (p0, x0 + svcntw ()), -+ z0 = svldff1 (p0, x0 + svcntw ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1_s32_m1: -+** decb x0 -+** ldff1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_s32_m1, svint32_t, int32_t, -+ z0 = svldff1_s32 (p0, x0 - svcntw ()), -+ z0 = svldff1 (p0, x0 - svcntw ())) -+ -+/* -+** ldff1_vnum_s32_0: -+** ldff1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_vnum_s32_0, svint32_t, int32_t, -+ z0 = svldff1_vnum_s32 (p0, x0, 0), -+ z0 = svldff1_vnum (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1_vnum_s32_1: -+** incb x0 -+** ldff1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_vnum_s32_1, svint32_t, int32_t, -+ z0 = svldff1_vnum_s32 (p0, x0, 1), -+ z0 = svldff1_vnum (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1_vnum_s32_m1: -+** decb x0 -+** ldff1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_vnum_s32_m1, svint32_t, int32_t, -+ z0 = svldff1_vnum_s32 (p0, x0, -1), -+ z0 = svldff1_vnum (p0, x0, -1)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ldff1_vnum_s32_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ldff1w z0\.s, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldff1_vnum_s32_x1, svint32_t, int32_t, -+ z0 = svldff1_vnum_s32 (p0, x0, x1), -+ z0 = svldff1_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s64.c -new file mode 100644 -index 000000000..c018a4c1c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s64.c -@@ -0,0 +1,86 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1_s64_base: -+** ldff1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_s64_base, svint64_t, int64_t, -+ z0 = svldff1_s64 (p0, x0), -+ z0 = svldff1 (p0, x0)) -+ -+/* -+** ldff1_s64_index: -+** ldff1d z0\.d, p0/z, \[x0, x1, lsl 3\] -+** ret -+*/ -+TEST_LOAD (ldff1_s64_index, svint64_t, int64_t, -+ z0 = svldff1_s64 (p0, x0 + x1), -+ z0 = svldff1 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1_s64_1: -+** incb x0 -+** ldff1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_s64_1, svint64_t, int64_t, -+ z0 = svldff1_s64 (p0, x0 + svcntd ()), -+ z0 = svldff1 (p0, x0 + svcntd ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1_s64_m1: -+** decb x0 -+** ldff1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_s64_m1, svint64_t, int64_t, -+ z0 = svldff1_s64 (p0, x0 - svcntd ()), -+ z0 = svldff1 (p0, x0 - svcntd ())) -+ -+/* -+** ldff1_vnum_s64_0: -+** ldff1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_vnum_s64_0, svint64_t, int64_t, -+ z0 = svldff1_vnum_s64 (p0, x0, 0), -+ z0 = svldff1_vnum (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1_vnum_s64_1: -+** incb x0 -+** ldff1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_vnum_s64_1, svint64_t, int64_t, -+ z0 = svldff1_vnum_s64 (p0, x0, 1), -+ z0 = svldff1_vnum (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1_vnum_s64_m1: -+** decb x0 -+** ldff1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_vnum_s64_m1, svint64_t, int64_t, -+ z0 = svldff1_vnum_s64 (p0, x0, -1), -+ z0 = svldff1_vnum (p0, x0, -1)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ldff1_vnum_s64_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ldff1d z0\.d, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldff1_vnum_s64_x1, svint64_t, int64_t, -+ z0 = svldff1_vnum_s64 (p0, x0, x1), -+ z0 = svldff1_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s8.c -new file mode 100644 -index 000000000..cf620d1f4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_s8.c -@@ -0,0 +1,90 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1_s8_base: -+** ldff1b z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_s8_base, svint8_t, int8_t, -+ z0 = svldff1_s8 (p0, x0), -+ z0 = svldff1 (p0, x0)) -+ -+/* -+** ldff1_s8_index: -+** ldff1b z0\.b, p0/z, \[x0, x1\] -+** ret -+*/ -+TEST_LOAD (ldff1_s8_index, svint8_t, int8_t, -+ z0 = svldff1_s8 (p0, x0 + x1), -+ z0 = svldff1 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1_s8_1: -+** incb x0 -+** ldff1b z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_s8_1, svint8_t, int8_t, -+ z0 = svldff1_s8 (p0, x0 + svcntb ()), -+ z0 = svldff1 (p0, x0 + svcntb ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1_s8_m1: -+** decb x0 -+** ldff1b z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_s8_m1, svint8_t, int8_t, -+ z0 = svldff1_s8 (p0, x0 - svcntb ()), -+ z0 = svldff1 (p0, x0 - svcntb ())) -+ -+/* -+** ldff1_vnum_s8_0: -+** ldff1b z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_vnum_s8_0, svint8_t, int8_t, -+ z0 = svldff1_vnum_s8 (p0, x0, 0), -+ z0 = svldff1_vnum (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1_vnum_s8_1: -+** incb x0 -+** ldff1b z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_vnum_s8_1, svint8_t, int8_t, -+ z0 = svldff1_vnum_s8 (p0, x0, 1), -+ z0 = svldff1_vnum (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1_vnum_s8_m1: -+** decb x0 -+** ldff1b z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_vnum_s8_m1, svint8_t, int8_t, -+ z0 = svldff1_vnum_s8 (p0, x0, -1), -+ z0 = svldff1_vnum (p0, x0, -1)) -+ -+/* -+** ldff1_vnum_s8_x1: -+** cntb (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldff1b z0\.b, p0/z, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** ldff1b z0\.b, p0/z, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_LOAD (ldff1_vnum_s8_x1, svint8_t, int8_t, -+ z0 = svldff1_vnum_s8 (p0, x0, x1), -+ z0 = svldff1_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u16.c -new file mode 100644 -index 000000000..1fa819296 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u16.c -@@ -0,0 +1,86 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1_u16_base: -+** ldff1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_u16_base, svuint16_t, uint16_t, -+ z0 = svldff1_u16 (p0, x0), -+ z0 = svldff1 (p0, x0)) -+ -+/* -+** ldff1_u16_index: -+** ldff1h z0\.h, p0/z, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_LOAD (ldff1_u16_index, svuint16_t, uint16_t, -+ z0 = svldff1_u16 (p0, x0 + x1), -+ z0 = svldff1 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1_u16_1: -+** incb x0 -+** ldff1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_u16_1, svuint16_t, uint16_t, -+ z0 = svldff1_u16 (p0, x0 + svcnth ()), -+ z0 = svldff1 (p0, x0 + svcnth ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1_u16_m1: -+** decb x0 -+** ldff1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_u16_m1, svuint16_t, uint16_t, -+ z0 = svldff1_u16 (p0, x0 - svcnth ()), -+ z0 = svldff1 (p0, x0 - svcnth ())) -+ -+/* -+** ldff1_vnum_u16_0: -+** ldff1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_vnum_u16_0, svuint16_t, uint16_t, -+ z0 = svldff1_vnum_u16 (p0, x0, 0), -+ z0 = svldff1_vnum (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1_vnum_u16_1: -+** incb x0 -+** ldff1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_vnum_u16_1, svuint16_t, uint16_t, -+ z0 = svldff1_vnum_u16 (p0, x0, 1), -+ z0 = svldff1_vnum (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1_vnum_u16_m1: -+** decb x0 -+** ldff1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_vnum_u16_m1, svuint16_t, uint16_t, -+ z0 = svldff1_vnum_u16 (p0, x0, -1), -+ z0 = svldff1_vnum (p0, x0, -1)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ldff1_vnum_u16_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ldff1h z0\.h, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldff1_vnum_u16_x1, svuint16_t, uint16_t, -+ z0 = svldff1_vnum_u16 (p0, x0, x1), -+ z0 = svldff1_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u32.c -new file mode 100644 -index 000000000..5224ec40a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u32.c -@@ -0,0 +1,86 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1_u32_base: -+** ldff1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_u32_base, svuint32_t, uint32_t, -+ z0 = svldff1_u32 (p0, x0), -+ z0 = svldff1 (p0, x0)) -+ -+/* -+** ldff1_u32_index: -+** ldff1w z0\.s, p0/z, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_LOAD (ldff1_u32_index, svuint32_t, uint32_t, -+ z0 = svldff1_u32 (p0, x0 + x1), -+ z0 = svldff1 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1_u32_1: -+** incb x0 -+** ldff1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_u32_1, svuint32_t, uint32_t, -+ z0 = svldff1_u32 (p0, x0 + svcntw ()), -+ z0 = svldff1 (p0, x0 + svcntw ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1_u32_m1: -+** decb x0 -+** ldff1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_u32_m1, svuint32_t, uint32_t, -+ z0 = svldff1_u32 (p0, x0 - svcntw ()), -+ z0 = svldff1 (p0, x0 - svcntw ())) -+ -+/* -+** ldff1_vnum_u32_0: -+** ldff1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_vnum_u32_0, svuint32_t, uint32_t, -+ z0 = svldff1_vnum_u32 (p0, x0, 0), -+ z0 = svldff1_vnum (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1_vnum_u32_1: -+** incb x0 -+** ldff1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_vnum_u32_1, svuint32_t, uint32_t, -+ z0 = svldff1_vnum_u32 (p0, x0, 1), -+ z0 = svldff1_vnum (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1_vnum_u32_m1: -+** decb x0 -+** ldff1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_vnum_u32_m1, svuint32_t, uint32_t, -+ z0 = svldff1_vnum_u32 (p0, x0, -1), -+ z0 = svldff1_vnum (p0, x0, -1)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ldff1_vnum_u32_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ldff1w z0\.s, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldff1_vnum_u32_x1, svuint32_t, uint32_t, -+ z0 = svldff1_vnum_u32 (p0, x0, x1), -+ z0 = svldff1_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u64.c -new file mode 100644 -index 000000000..18e87f2b8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u64.c -@@ -0,0 +1,86 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1_u64_base: -+** ldff1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_u64_base, svuint64_t, uint64_t, -+ z0 = svldff1_u64 (p0, x0), -+ z0 = svldff1 (p0, x0)) -+ -+/* -+** ldff1_u64_index: -+** ldff1d z0\.d, p0/z, \[x0, x1, lsl 3\] -+** ret -+*/ -+TEST_LOAD (ldff1_u64_index, svuint64_t, uint64_t, -+ z0 = svldff1_u64 (p0, x0 + x1), -+ z0 = svldff1 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1_u64_1: -+** incb x0 -+** ldff1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_u64_1, svuint64_t, uint64_t, -+ z0 = svldff1_u64 (p0, x0 + svcntd ()), -+ z0 = svldff1 (p0, x0 + svcntd ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1_u64_m1: -+** decb x0 -+** ldff1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_u64_m1, svuint64_t, uint64_t, -+ z0 = svldff1_u64 (p0, x0 - svcntd ()), -+ z0 = svldff1 (p0, x0 - svcntd ())) -+ -+/* -+** ldff1_vnum_u64_0: -+** ldff1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_vnum_u64_0, svuint64_t, uint64_t, -+ z0 = svldff1_vnum_u64 (p0, x0, 0), -+ z0 = svldff1_vnum (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1_vnum_u64_1: -+** incb x0 -+** ldff1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_vnum_u64_1, svuint64_t, uint64_t, -+ z0 = svldff1_vnum_u64 (p0, x0, 1), -+ z0 = svldff1_vnum (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1_vnum_u64_m1: -+** decb x0 -+** ldff1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_vnum_u64_m1, svuint64_t, uint64_t, -+ z0 = svldff1_vnum_u64 (p0, x0, -1), -+ z0 = svldff1_vnum (p0, x0, -1)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ldff1_vnum_u64_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ldff1d z0\.d, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldff1_vnum_u64_x1, svuint64_t, uint64_t, -+ z0 = svldff1_vnum_u64 (p0, x0, x1), -+ z0 = svldff1_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u8.c -new file mode 100644 -index 000000000..83883fca4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1_u8.c -@@ -0,0 +1,90 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1_u8_base: -+** ldff1b z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_u8_base, svuint8_t, uint8_t, -+ z0 = svldff1_u8 (p0, x0), -+ z0 = svldff1 (p0, x0)) -+ -+/* -+** ldff1_u8_index: -+** ldff1b z0\.b, p0/z, \[x0, x1\] -+** ret -+*/ -+TEST_LOAD (ldff1_u8_index, svuint8_t, uint8_t, -+ z0 = svldff1_u8 (p0, x0 + x1), -+ z0 = svldff1 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1_u8_1: -+** incb x0 -+** ldff1b z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_u8_1, svuint8_t, uint8_t, -+ z0 = svldff1_u8 (p0, x0 + svcntb ()), -+ z0 = svldff1 (p0, x0 + svcntb ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1_u8_m1: -+** decb x0 -+** ldff1b z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_u8_m1, svuint8_t, uint8_t, -+ z0 = svldff1_u8 (p0, x0 - svcntb ()), -+ z0 = svldff1 (p0, x0 - svcntb ())) -+ -+/* -+** ldff1_vnum_u8_0: -+** ldff1b z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_vnum_u8_0, svuint8_t, uint8_t, -+ z0 = svldff1_vnum_u8 (p0, x0, 0), -+ z0 = svldff1_vnum (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1_vnum_u8_1: -+** incb x0 -+** ldff1b z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_vnum_u8_1, svuint8_t, uint8_t, -+ z0 = svldff1_vnum_u8 (p0, x0, 1), -+ z0 = svldff1_vnum (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1_vnum_u8_m1: -+** decb x0 -+** ldff1b z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1_vnum_u8_m1, svuint8_t, uint8_t, -+ z0 = svldff1_vnum_u8 (p0, x0, -1), -+ z0 = svldff1_vnum (p0, x0, -1)) -+ -+/* -+** ldff1_vnum_u8_x1: -+** cntb (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldff1b z0\.b, p0/z, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** ldff1b z0\.b, p0/z, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_LOAD (ldff1_vnum_u8_x1, svuint8_t, uint8_t, -+ z0 = svldff1_vnum_u8 (p0, x0, x1), -+ z0 = svldff1_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_s32.c -new file mode 100644 -index 000000000..c2a676807 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_s32.c -@@ -0,0 +1,131 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1sb_gather_s32_tied1: -+** ldff1sb z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sb_gather_s32_tied1, svint32_t, svuint32_t, -+ z0_res = svldff1sb_gather_u32base_s32 (p0, z0), -+ z0_res = svldff1sb_gather_s32 (p0, z0)) -+ -+/* -+** ldff1sb_gather_s32_untied: -+** ldff1sb z0\.s, p0/z, \[z1\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sb_gather_s32_untied, svint32_t, svuint32_t, -+ z0_res = svldff1sb_gather_u32base_s32 (p0, z1), -+ z0_res = svldff1sb_gather_s32 (p0, z1)) -+ -+/* -+** ldff1sb_gather_x0_s32_offset: -+** ldff1sb z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sb_gather_x0_s32_offset, svint32_t, svuint32_t, -+ z0_res = svldff1sb_gather_u32base_offset_s32 (p0, z0, x0), -+ z0_res = svldff1sb_gather_offset_s32 (p0, z0, x0)) -+ -+/* -+** ldff1sb_gather_m1_s32_offset: -+** mov (x[0-9]+), #?-1 -+** ldff1sb z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sb_gather_m1_s32_offset, svint32_t, svuint32_t, -+ z0_res = svldff1sb_gather_u32base_offset_s32 (p0, z0, -1), -+ z0_res = svldff1sb_gather_offset_s32 (p0, z0, -1)) -+ -+/* -+** ldff1sb_gather_0_s32_offset: -+** ldff1sb z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sb_gather_0_s32_offset, svint32_t, svuint32_t, -+ z0_res = svldff1sb_gather_u32base_offset_s32 (p0, z0, 0), -+ z0_res = svldff1sb_gather_offset_s32 (p0, z0, 0)) -+ -+/* -+** ldff1sb_gather_5_s32_offset: -+** ldff1sb z0\.s, p0/z, \[z0\.s, #5\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sb_gather_5_s32_offset, svint32_t, svuint32_t, -+ z0_res = svldff1sb_gather_u32base_offset_s32 (p0, z0, 5), -+ z0_res = svldff1sb_gather_offset_s32 (p0, z0, 5)) -+ -+/* -+** ldff1sb_gather_31_s32_offset: -+** ldff1sb z0\.s, p0/z, \[z0\.s, #31\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sb_gather_31_s32_offset, svint32_t, svuint32_t, -+ z0_res = svldff1sb_gather_u32base_offset_s32 (p0, z0, 31), -+ z0_res = svldff1sb_gather_offset_s32 (p0, z0, 31)) -+ -+/* -+** ldff1sb_gather_32_s32_offset: -+** mov (x[0-9]+), #?32 -+** ldff1sb z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sb_gather_32_s32_offset, svint32_t, svuint32_t, -+ z0_res = svldff1sb_gather_u32base_offset_s32 (p0, z0, 32), -+ z0_res = svldff1sb_gather_offset_s32 (p0, z0, 32)) -+ -+/* -+** ldff1sb_gather_x0_s32_s32offset: -+** ldff1sb z0\.s, p0/z, \[x0, z0\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sb_gather_x0_s32_s32offset, svint32_t, int8_t, svint32_t, -+ z0_res = svldff1sb_gather_s32offset_s32 (p0, x0, z0), -+ z0_res = svldff1sb_gather_offset_s32 (p0, x0, z0)) -+ -+/* -+** ldff1sb_gather_tied1_s32_s32offset: -+** ldff1sb z0\.s, p0/z, \[x0, z0\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sb_gather_tied1_s32_s32offset, svint32_t, int8_t, svint32_t, -+ z0_res = svldff1sb_gather_s32offset_s32 (p0, x0, z0), -+ z0_res = svldff1sb_gather_offset_s32 (p0, x0, z0)) -+ -+/* -+** ldff1sb_gather_untied_s32_s32offset: -+** ldff1sb z0\.s, p0/z, \[x0, z1\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sb_gather_untied_s32_s32offset, svint32_t, int8_t, svint32_t, -+ z0_res = svldff1sb_gather_s32offset_s32 (p0, x0, z1), -+ z0_res = svldff1sb_gather_offset_s32 (p0, x0, z1)) -+ -+/* -+** ldff1sb_gather_x0_s32_u32offset: -+** ldff1sb z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sb_gather_x0_s32_u32offset, svint32_t, int8_t, svuint32_t, -+ z0_res = svldff1sb_gather_u32offset_s32 (p0, x0, z0), -+ z0_res = svldff1sb_gather_offset_s32 (p0, x0, z0)) -+ -+/* -+** ldff1sb_gather_tied1_s32_u32offset: -+** ldff1sb z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sb_gather_tied1_s32_u32offset, svint32_t, int8_t, svuint32_t, -+ z0_res = svldff1sb_gather_u32offset_s32 (p0, x0, z0), -+ z0_res = svldff1sb_gather_offset_s32 (p0, x0, z0)) -+ -+/* -+** ldff1sb_gather_untied_s32_u32offset: -+** ldff1sb z0\.s, p0/z, \[x0, z1\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sb_gather_untied_s32_u32offset, svint32_t, int8_t, svuint32_t, -+ z0_res = svldff1sb_gather_u32offset_s32 (p0, x0, z1), -+ z0_res = svldff1sb_gather_offset_s32 (p0, x0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_s64.c -new file mode 100644 -index 000000000..2f2a04d24 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_s64.c -@@ -0,0 +1,149 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1sb_gather_s64_tied1: -+** ldff1sb z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sb_gather_s64_tied1, svint64_t, svuint64_t, -+ z0_res = svldff1sb_gather_u64base_s64 (p0, z0), -+ z0_res = svldff1sb_gather_s64 (p0, z0)) -+ -+/* -+** ldff1sb_gather_s64_untied: -+** ldff1sb z0\.d, p0/z, \[z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sb_gather_s64_untied, svint64_t, svuint64_t, -+ z0_res = svldff1sb_gather_u64base_s64 (p0, z1), -+ z0_res = svldff1sb_gather_s64 (p0, z1)) -+ -+/* -+** ldff1sb_gather_x0_s64_offset: -+** ldff1sb z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sb_gather_x0_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1sb_gather_u64base_offset_s64 (p0, z0, x0), -+ z0_res = svldff1sb_gather_offset_s64 (p0, z0, x0)) -+ -+/* -+** ldff1sb_gather_m1_s64_offset: -+** mov (x[0-9]+), #?-1 -+** ldff1sb z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sb_gather_m1_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1sb_gather_u64base_offset_s64 (p0, z0, -1), -+ z0_res = svldff1sb_gather_offset_s64 (p0, z0, -1)) -+ -+/* -+** ldff1sb_gather_0_s64_offset: -+** ldff1sb z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sb_gather_0_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1sb_gather_u64base_offset_s64 (p0, z0, 0), -+ z0_res = svldff1sb_gather_offset_s64 (p0, z0, 0)) -+ -+/* -+** ldff1sb_gather_5_s64_offset: -+** ldff1sb z0\.d, p0/z, \[z0\.d, #5\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sb_gather_5_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1sb_gather_u64base_offset_s64 (p0, z0, 5), -+ z0_res = svldff1sb_gather_offset_s64 (p0, z0, 5)) -+ -+/* -+** ldff1sb_gather_31_s64_offset: -+** ldff1sb z0\.d, p0/z, \[z0\.d, #31\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sb_gather_31_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1sb_gather_u64base_offset_s64 (p0, z0, 31), -+ z0_res = svldff1sb_gather_offset_s64 (p0, z0, 31)) -+ -+/* -+** ldff1sb_gather_32_s64_offset: -+** mov (x[0-9]+), #?32 -+** ldff1sb z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sb_gather_32_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1sb_gather_u64base_offset_s64 (p0, z0, 32), -+ z0_res = svldff1sb_gather_offset_s64 (p0, z0, 32)) -+ -+/* -+** ldff1sb_gather_x0_s64_s64offset: -+** ldff1sb z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sb_gather_x0_s64_s64offset, svint64_t, int8_t, svint64_t, -+ z0_res = svldff1sb_gather_s64offset_s64 (p0, x0, z0), -+ z0_res = svldff1sb_gather_offset_s64 (p0, x0, z0)) -+ -+/* -+** ldff1sb_gather_tied1_s64_s64offset: -+** ldff1sb z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sb_gather_tied1_s64_s64offset, svint64_t, int8_t, svint64_t, -+ z0_res = svldff1sb_gather_s64offset_s64 (p0, x0, z0), -+ z0_res = svldff1sb_gather_offset_s64 (p0, x0, z0)) -+ -+/* -+** ldff1sb_gather_untied_s64_s64offset: -+** ldff1sb z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sb_gather_untied_s64_s64offset, svint64_t, int8_t, svint64_t, -+ z0_res = svldff1sb_gather_s64offset_s64 (p0, x0, z1), -+ z0_res = svldff1sb_gather_offset_s64 (p0, x0, z1)) -+ -+/* -+** ldff1sb_gather_ext_s64_s64offset: -+** ldff1sb z0\.d, p0/z, \[x0, z1\.d, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sb_gather_ext_s64_s64offset, svint64_t, int8_t, svint64_t, -+ z0_res = svldff1sb_gather_s64offset_s64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svldff1sb_gather_offset_s64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ldff1sb_gather_x0_s64_u64offset: -+** ldff1sb z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sb_gather_x0_s64_u64offset, svint64_t, int8_t, svuint64_t, -+ z0_res = svldff1sb_gather_u64offset_s64 (p0, x0, z0), -+ z0_res = svldff1sb_gather_offset_s64 (p0, x0, z0)) -+ -+/* -+** ldff1sb_gather_tied1_s64_u64offset: -+** ldff1sb z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sb_gather_tied1_s64_u64offset, svint64_t, int8_t, svuint64_t, -+ z0_res = svldff1sb_gather_u64offset_s64 (p0, x0, z0), -+ z0_res = svldff1sb_gather_offset_s64 (p0, x0, z0)) -+ -+/* -+** ldff1sb_gather_untied_s64_u64offset: -+** ldff1sb z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sb_gather_untied_s64_u64offset, svint64_t, int8_t, svuint64_t, -+ z0_res = svldff1sb_gather_u64offset_s64 (p0, x0, z1), -+ z0_res = svldff1sb_gather_offset_s64 (p0, x0, z1)) -+ -+/* -+** ldff1sb_gather_ext_s64_u64offset: -+** ldff1sb z0\.d, p0/z, \[x0, z1\.d, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sb_gather_ext_s64_u64offset, svint64_t, int8_t, svuint64_t, -+ z0_res = svldff1sb_gather_u64offset_s64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svldff1sb_gather_offset_s64 (p0, x0, svextw_x (p0, z1))) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_u32.c -new file mode 100644 -index 000000000..e3e83a205 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_u32.c -@@ -0,0 +1,131 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1sb_gather_u32_tied1: -+** ldff1sb z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sb_gather_u32_tied1, svuint32_t, svuint32_t, -+ z0_res = svldff1sb_gather_u32base_u32 (p0, z0), -+ z0_res = svldff1sb_gather_u32 (p0, z0)) -+ -+/* -+** ldff1sb_gather_u32_untied: -+** ldff1sb z0\.s, p0/z, \[z1\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sb_gather_u32_untied, svuint32_t, svuint32_t, -+ z0_res = svldff1sb_gather_u32base_u32 (p0, z1), -+ z0_res = svldff1sb_gather_u32 (p0, z1)) -+ -+/* -+** ldff1sb_gather_x0_u32_offset: -+** ldff1sb z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sb_gather_x0_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svldff1sb_gather_u32base_offset_u32 (p0, z0, x0), -+ z0_res = svldff1sb_gather_offset_u32 (p0, z0, x0)) -+ -+/* -+** ldff1sb_gather_m1_u32_offset: -+** mov (x[0-9]+), #?-1 -+** ldff1sb z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sb_gather_m1_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svldff1sb_gather_u32base_offset_u32 (p0, z0, -1), -+ z0_res = svldff1sb_gather_offset_u32 (p0, z0, -1)) -+ -+/* -+** ldff1sb_gather_0_u32_offset: -+** ldff1sb z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sb_gather_0_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svldff1sb_gather_u32base_offset_u32 (p0, z0, 0), -+ z0_res = svldff1sb_gather_offset_u32 (p0, z0, 0)) -+ -+/* -+** ldff1sb_gather_5_u32_offset: -+** ldff1sb z0\.s, p0/z, \[z0\.s, #5\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sb_gather_5_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svldff1sb_gather_u32base_offset_u32 (p0, z0, 5), -+ z0_res = svldff1sb_gather_offset_u32 (p0, z0, 5)) -+ -+/* -+** ldff1sb_gather_31_u32_offset: -+** ldff1sb z0\.s, p0/z, \[z0\.s, #31\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sb_gather_31_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svldff1sb_gather_u32base_offset_u32 (p0, z0, 31), -+ z0_res = svldff1sb_gather_offset_u32 (p0, z0, 31)) -+ -+/* -+** ldff1sb_gather_32_u32_offset: -+** mov (x[0-9]+), #?32 -+** ldff1sb z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sb_gather_32_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svldff1sb_gather_u32base_offset_u32 (p0, z0, 32), -+ z0_res = svldff1sb_gather_offset_u32 (p0, z0, 32)) -+ -+/* -+** ldff1sb_gather_x0_u32_s32offset: -+** ldff1sb z0\.s, p0/z, \[x0, z0\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sb_gather_x0_u32_s32offset, svuint32_t, int8_t, svint32_t, -+ z0_res = svldff1sb_gather_s32offset_u32 (p0, x0, z0), -+ z0_res = svldff1sb_gather_offset_u32 (p0, x0, z0)) -+ -+/* -+** ldff1sb_gather_tied1_u32_s32offset: -+** ldff1sb z0\.s, p0/z, \[x0, z0\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sb_gather_tied1_u32_s32offset, svuint32_t, int8_t, svint32_t, -+ z0_res = svldff1sb_gather_s32offset_u32 (p0, x0, z0), -+ z0_res = svldff1sb_gather_offset_u32 (p0, x0, z0)) -+ -+/* -+** ldff1sb_gather_untied_u32_s32offset: -+** ldff1sb z0\.s, p0/z, \[x0, z1\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sb_gather_untied_u32_s32offset, svuint32_t, int8_t, svint32_t, -+ z0_res = svldff1sb_gather_s32offset_u32 (p0, x0, z1), -+ z0_res = svldff1sb_gather_offset_u32 (p0, x0, z1)) -+ -+/* -+** ldff1sb_gather_x0_u32_u32offset: -+** ldff1sb z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sb_gather_x0_u32_u32offset, svuint32_t, int8_t, svuint32_t, -+ z0_res = svldff1sb_gather_u32offset_u32 (p0, x0, z0), -+ z0_res = svldff1sb_gather_offset_u32 (p0, x0, z0)) -+ -+/* -+** ldff1sb_gather_tied1_u32_u32offset: -+** ldff1sb z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sb_gather_tied1_u32_u32offset, svuint32_t, int8_t, svuint32_t, -+ z0_res = svldff1sb_gather_u32offset_u32 (p0, x0, z0), -+ z0_res = svldff1sb_gather_offset_u32 (p0, x0, z0)) -+ -+/* -+** ldff1sb_gather_untied_u32_u32offset: -+** ldff1sb z0\.s, p0/z, \[x0, z1\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sb_gather_untied_u32_u32offset, svuint32_t, int8_t, svuint32_t, -+ z0_res = svldff1sb_gather_u32offset_u32 (p0, x0, z1), -+ z0_res = svldff1sb_gather_offset_u32 (p0, x0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_u64.c -new file mode 100644 -index 000000000..769f2c266 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_gather_u64.c -@@ -0,0 +1,149 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1sb_gather_u64_tied1: -+** ldff1sb z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sb_gather_u64_tied1, svuint64_t, svuint64_t, -+ z0_res = svldff1sb_gather_u64base_u64 (p0, z0), -+ z0_res = svldff1sb_gather_u64 (p0, z0)) -+ -+/* -+** ldff1sb_gather_u64_untied: -+** ldff1sb z0\.d, p0/z, \[z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sb_gather_u64_untied, svuint64_t, svuint64_t, -+ z0_res = svldff1sb_gather_u64base_u64 (p0, z1), -+ z0_res = svldff1sb_gather_u64 (p0, z1)) -+ -+/* -+** ldff1sb_gather_x0_u64_offset: -+** ldff1sb z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sb_gather_x0_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1sb_gather_u64base_offset_u64 (p0, z0, x0), -+ z0_res = svldff1sb_gather_offset_u64 (p0, z0, x0)) -+ -+/* -+** ldff1sb_gather_m1_u64_offset: -+** mov (x[0-9]+), #?-1 -+** ldff1sb z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sb_gather_m1_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1sb_gather_u64base_offset_u64 (p0, z0, -1), -+ z0_res = svldff1sb_gather_offset_u64 (p0, z0, -1)) -+ -+/* -+** ldff1sb_gather_0_u64_offset: -+** ldff1sb z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sb_gather_0_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1sb_gather_u64base_offset_u64 (p0, z0, 0), -+ z0_res = svldff1sb_gather_offset_u64 (p0, z0, 0)) -+ -+/* -+** ldff1sb_gather_5_u64_offset: -+** ldff1sb z0\.d, p0/z, \[z0\.d, #5\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sb_gather_5_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1sb_gather_u64base_offset_u64 (p0, z0, 5), -+ z0_res = svldff1sb_gather_offset_u64 (p0, z0, 5)) -+ -+/* -+** ldff1sb_gather_31_u64_offset: -+** ldff1sb z0\.d, p0/z, \[z0\.d, #31\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sb_gather_31_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1sb_gather_u64base_offset_u64 (p0, z0, 31), -+ z0_res = svldff1sb_gather_offset_u64 (p0, z0, 31)) -+ -+/* -+** ldff1sb_gather_32_u64_offset: -+** mov (x[0-9]+), #?32 -+** ldff1sb z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sb_gather_32_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1sb_gather_u64base_offset_u64 (p0, z0, 32), -+ z0_res = svldff1sb_gather_offset_u64 (p0, z0, 32)) -+ -+/* -+** ldff1sb_gather_x0_u64_s64offset: -+** ldff1sb z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sb_gather_x0_u64_s64offset, svuint64_t, int8_t, svint64_t, -+ z0_res = svldff1sb_gather_s64offset_u64 (p0, x0, z0), -+ z0_res = svldff1sb_gather_offset_u64 (p0, x0, z0)) -+ -+/* -+** ldff1sb_gather_tied1_u64_s64offset: -+** ldff1sb z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sb_gather_tied1_u64_s64offset, svuint64_t, int8_t, svint64_t, -+ z0_res = svldff1sb_gather_s64offset_u64 (p0, x0, z0), -+ z0_res = svldff1sb_gather_offset_u64 (p0, x0, z0)) -+ -+/* -+** ldff1sb_gather_untied_u64_s64offset: -+** ldff1sb z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sb_gather_untied_u64_s64offset, svuint64_t, int8_t, svint64_t, -+ z0_res = svldff1sb_gather_s64offset_u64 (p0, x0, z1), -+ z0_res = svldff1sb_gather_offset_u64 (p0, x0, z1)) -+ -+/* -+** ldff1sb_gather_ext_u64_s64offset: -+** ldff1sb z0\.d, p0/z, \[x0, z1\.d, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sb_gather_ext_u64_s64offset, svuint64_t, int8_t, svint64_t, -+ z0_res = svldff1sb_gather_s64offset_u64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svldff1sb_gather_offset_u64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ldff1sb_gather_x0_u64_u64offset: -+** ldff1sb z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sb_gather_x0_u64_u64offset, svuint64_t, int8_t, svuint64_t, -+ z0_res = svldff1sb_gather_u64offset_u64 (p0, x0, z0), -+ z0_res = svldff1sb_gather_offset_u64 (p0, x0, z0)) -+ -+/* -+** ldff1sb_gather_tied1_u64_u64offset: -+** ldff1sb z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sb_gather_tied1_u64_u64offset, svuint64_t, int8_t, svuint64_t, -+ z0_res = svldff1sb_gather_u64offset_u64 (p0, x0, z0), -+ z0_res = svldff1sb_gather_offset_u64 (p0, x0, z0)) -+ -+/* -+** ldff1sb_gather_untied_u64_u64offset: -+** ldff1sb z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sb_gather_untied_u64_u64offset, svuint64_t, int8_t, svuint64_t, -+ z0_res = svldff1sb_gather_u64offset_u64 (p0, x0, z1), -+ z0_res = svldff1sb_gather_offset_u64 (p0, x0, z1)) -+ -+/* -+** ldff1sb_gather_ext_u64_u64offset: -+** ldff1sb z0\.d, p0/z, \[x0, z1\.d, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sb_gather_ext_u64_u64offset, svuint64_t, int8_t, svuint64_t, -+ z0_res = svldff1sb_gather_u64offset_u64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svldff1sb_gather_offset_u64 (p0, x0, svextw_x (p0, z1))) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_s16.c -new file mode 100644 -index 000000000..e0a748c6a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_s16.c -@@ -0,0 +1,90 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1sb_s16_base: -+** ldff1sb z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sb_s16_base, svint16_t, int8_t, -+ z0 = svldff1sb_s16 (p0, x0), -+ z0 = svldff1sb_s16 (p0, x0)) -+ -+/* -+** ldff1sb_s16_index: -+** ldff1sb z0\.h, p0/z, \[x0, x1\] -+** ret -+*/ -+TEST_LOAD (ldff1sb_s16_index, svint16_t, int8_t, -+ z0 = svldff1sb_s16 (p0, x0 + x1), -+ z0 = svldff1sb_s16 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1sb_s16_1: -+** inch x0 -+** ldff1sb z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sb_s16_1, svint16_t, int8_t, -+ z0 = svldff1sb_s16 (p0, x0 + svcnth ()), -+ z0 = svldff1sb_s16 (p0, x0 + svcnth ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1sb_s16_m1: -+** dech x0 -+** ldff1sb z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sb_s16_m1, svint16_t, int8_t, -+ z0 = svldff1sb_s16 (p0, x0 - svcnth ()), -+ z0 = svldff1sb_s16 (p0, x0 - svcnth ())) -+ -+/* -+** ldff1sb_vnum_s16_0: -+** ldff1sb z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sb_vnum_s16_0, svint16_t, int8_t, -+ z0 = svldff1sb_vnum_s16 (p0, x0, 0), -+ z0 = svldff1sb_vnum_s16 (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1sb_vnum_s16_1: -+** inch x0 -+** ldff1sb z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sb_vnum_s16_1, svint16_t, int8_t, -+ z0 = svldff1sb_vnum_s16 (p0, x0, 1), -+ z0 = svldff1sb_vnum_s16 (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1sb_vnum_s16_m1: -+** dech x0 -+** ldff1sb z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sb_vnum_s16_m1, svint16_t, int8_t, -+ z0 = svldff1sb_vnum_s16 (p0, x0, -1), -+ z0 = svldff1sb_vnum_s16 (p0, x0, -1)) -+ -+/* -+** ldff1sb_vnum_s16_x1: -+** cnth (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldff1sb z0\.h, p0/z, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** ldff1sb z0\.h, p0/z, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_LOAD (ldff1sb_vnum_s16_x1, svint16_t, int8_t, -+ z0 = svldff1sb_vnum_s16 (p0, x0, x1), -+ z0 = svldff1sb_vnum_s16 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_s32.c -new file mode 100644 -index 000000000..86716da9b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_s32.c -@@ -0,0 +1,90 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1sb_s32_base: -+** ldff1sb z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sb_s32_base, svint32_t, int8_t, -+ z0 = svldff1sb_s32 (p0, x0), -+ z0 = svldff1sb_s32 (p0, x0)) -+ -+/* -+** ldff1sb_s32_index: -+** ldff1sb z0\.s, p0/z, \[x0, x1\] -+** ret -+*/ -+TEST_LOAD (ldff1sb_s32_index, svint32_t, int8_t, -+ z0 = svldff1sb_s32 (p0, x0 + x1), -+ z0 = svldff1sb_s32 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1sb_s32_1: -+** incw x0 -+** ldff1sb z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sb_s32_1, svint32_t, int8_t, -+ z0 = svldff1sb_s32 (p0, x0 + svcntw ()), -+ z0 = svldff1sb_s32 (p0, x0 + svcntw ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1sb_s32_m1: -+** decw x0 -+** ldff1sb z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sb_s32_m1, svint32_t, int8_t, -+ z0 = svldff1sb_s32 (p0, x0 - svcntw ()), -+ z0 = svldff1sb_s32 (p0, x0 - svcntw ())) -+ -+/* -+** ldff1sb_vnum_s32_0: -+** ldff1sb z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sb_vnum_s32_0, svint32_t, int8_t, -+ z0 = svldff1sb_vnum_s32 (p0, x0, 0), -+ z0 = svldff1sb_vnum_s32 (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1sb_vnum_s32_1: -+** incw x0 -+** ldff1sb z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sb_vnum_s32_1, svint32_t, int8_t, -+ z0 = svldff1sb_vnum_s32 (p0, x0, 1), -+ z0 = svldff1sb_vnum_s32 (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1sb_vnum_s32_m1: -+** decw x0 -+** ldff1sb z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sb_vnum_s32_m1, svint32_t, int8_t, -+ z0 = svldff1sb_vnum_s32 (p0, x0, -1), -+ z0 = svldff1sb_vnum_s32 (p0, x0, -1)) -+ -+/* -+** ldff1sb_vnum_s32_x1: -+** cntw (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldff1sb z0\.s, p0/z, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** ldff1sb z0\.s, p0/z, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_LOAD (ldff1sb_vnum_s32_x1, svint32_t, int8_t, -+ z0 = svldff1sb_vnum_s32 (p0, x0, x1), -+ z0 = svldff1sb_vnum_s32 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_s64.c -new file mode 100644 -index 000000000..e7a4aa6e9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_s64.c -@@ -0,0 +1,90 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1sb_s64_base: -+** ldff1sb z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sb_s64_base, svint64_t, int8_t, -+ z0 = svldff1sb_s64 (p0, x0), -+ z0 = svldff1sb_s64 (p0, x0)) -+ -+/* -+** ldff1sb_s64_index: -+** ldff1sb z0\.d, p0/z, \[x0, x1\] -+** ret -+*/ -+TEST_LOAD (ldff1sb_s64_index, svint64_t, int8_t, -+ z0 = svldff1sb_s64 (p0, x0 + x1), -+ z0 = svldff1sb_s64 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1sb_s64_1: -+** incd x0 -+** ldff1sb z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sb_s64_1, svint64_t, int8_t, -+ z0 = svldff1sb_s64 (p0, x0 + svcntd ()), -+ z0 = svldff1sb_s64 (p0, x0 + svcntd ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1sb_s64_m1: -+** decd x0 -+** ldff1sb z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sb_s64_m1, svint64_t, int8_t, -+ z0 = svldff1sb_s64 (p0, x0 - svcntd ()), -+ z0 = svldff1sb_s64 (p0, x0 - svcntd ())) -+ -+/* -+** ldff1sb_vnum_s64_0: -+** ldff1sb z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sb_vnum_s64_0, svint64_t, int8_t, -+ z0 = svldff1sb_vnum_s64 (p0, x0, 0), -+ z0 = svldff1sb_vnum_s64 (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1sb_vnum_s64_1: -+** incd x0 -+** ldff1sb z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sb_vnum_s64_1, svint64_t, int8_t, -+ z0 = svldff1sb_vnum_s64 (p0, x0, 1), -+ z0 = svldff1sb_vnum_s64 (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1sb_vnum_s64_m1: -+** decd x0 -+** ldff1sb z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sb_vnum_s64_m1, svint64_t, int8_t, -+ z0 = svldff1sb_vnum_s64 (p0, x0, -1), -+ z0 = svldff1sb_vnum_s64 (p0, x0, -1)) -+ -+/* -+** ldff1sb_vnum_s64_x1: -+** cntd (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldff1sb z0\.d, p0/z, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** ldff1sb z0\.d, p0/z, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_LOAD (ldff1sb_vnum_s64_x1, svint64_t, int8_t, -+ z0 = svldff1sb_vnum_s64 (p0, x0, x1), -+ z0 = svldff1sb_vnum_s64 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_u16.c -new file mode 100644 -index 000000000..69ba96d52 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_u16.c -@@ -0,0 +1,90 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1sb_u16_base: -+** ldff1sb z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sb_u16_base, svuint16_t, int8_t, -+ z0 = svldff1sb_u16 (p0, x0), -+ z0 = svldff1sb_u16 (p0, x0)) -+ -+/* -+** ldff1sb_u16_index: -+** ldff1sb z0\.h, p0/z, \[x0, x1\] -+** ret -+*/ -+TEST_LOAD (ldff1sb_u16_index, svuint16_t, int8_t, -+ z0 = svldff1sb_u16 (p0, x0 + x1), -+ z0 = svldff1sb_u16 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1sb_u16_1: -+** inch x0 -+** ldff1sb z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sb_u16_1, svuint16_t, int8_t, -+ z0 = svldff1sb_u16 (p0, x0 + svcnth ()), -+ z0 = svldff1sb_u16 (p0, x0 + svcnth ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1sb_u16_m1: -+** dech x0 -+** ldff1sb z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sb_u16_m1, svuint16_t, int8_t, -+ z0 = svldff1sb_u16 (p0, x0 - svcnth ()), -+ z0 = svldff1sb_u16 (p0, x0 - svcnth ())) -+ -+/* -+** ldff1sb_vnum_u16_0: -+** ldff1sb z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sb_vnum_u16_0, svuint16_t, int8_t, -+ z0 = svldff1sb_vnum_u16 (p0, x0, 0), -+ z0 = svldff1sb_vnum_u16 (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1sb_vnum_u16_1: -+** inch x0 -+** ldff1sb z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sb_vnum_u16_1, svuint16_t, int8_t, -+ z0 = svldff1sb_vnum_u16 (p0, x0, 1), -+ z0 = svldff1sb_vnum_u16 (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1sb_vnum_u16_m1: -+** dech x0 -+** ldff1sb z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sb_vnum_u16_m1, svuint16_t, int8_t, -+ z0 = svldff1sb_vnum_u16 (p0, x0, -1), -+ z0 = svldff1sb_vnum_u16 (p0, x0, -1)) -+ -+/* -+** ldff1sb_vnum_u16_x1: -+** cnth (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldff1sb z0\.h, p0/z, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** ldff1sb z0\.h, p0/z, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_LOAD (ldff1sb_vnum_u16_x1, svuint16_t, int8_t, -+ z0 = svldff1sb_vnum_u16 (p0, x0, x1), -+ z0 = svldff1sb_vnum_u16 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_u32.c -new file mode 100644 -index 000000000..e1a1873f0 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_u32.c -@@ -0,0 +1,90 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1sb_u32_base: -+** ldff1sb z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sb_u32_base, svuint32_t, int8_t, -+ z0 = svldff1sb_u32 (p0, x0), -+ z0 = svldff1sb_u32 (p0, x0)) -+ -+/* -+** ldff1sb_u32_index: -+** ldff1sb z0\.s, p0/z, \[x0, x1\] -+** ret -+*/ -+TEST_LOAD (ldff1sb_u32_index, svuint32_t, int8_t, -+ z0 = svldff1sb_u32 (p0, x0 + x1), -+ z0 = svldff1sb_u32 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1sb_u32_1: -+** incw x0 -+** ldff1sb z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sb_u32_1, svuint32_t, int8_t, -+ z0 = svldff1sb_u32 (p0, x0 + svcntw ()), -+ z0 = svldff1sb_u32 (p0, x0 + svcntw ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1sb_u32_m1: -+** decw x0 -+** ldff1sb z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sb_u32_m1, svuint32_t, int8_t, -+ z0 = svldff1sb_u32 (p0, x0 - svcntw ()), -+ z0 = svldff1sb_u32 (p0, x0 - svcntw ())) -+ -+/* -+** ldff1sb_vnum_u32_0: -+** ldff1sb z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sb_vnum_u32_0, svuint32_t, int8_t, -+ z0 = svldff1sb_vnum_u32 (p0, x0, 0), -+ z0 = svldff1sb_vnum_u32 (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1sb_vnum_u32_1: -+** incw x0 -+** ldff1sb z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sb_vnum_u32_1, svuint32_t, int8_t, -+ z0 = svldff1sb_vnum_u32 (p0, x0, 1), -+ z0 = svldff1sb_vnum_u32 (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1sb_vnum_u32_m1: -+** decw x0 -+** ldff1sb z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sb_vnum_u32_m1, svuint32_t, int8_t, -+ z0 = svldff1sb_vnum_u32 (p0, x0, -1), -+ z0 = svldff1sb_vnum_u32 (p0, x0, -1)) -+ -+/* -+** ldff1sb_vnum_u32_x1: -+** cntw (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldff1sb z0\.s, p0/z, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** ldff1sb z0\.s, p0/z, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_LOAD (ldff1sb_vnum_u32_x1, svuint32_t, int8_t, -+ z0 = svldff1sb_vnum_u32 (p0, x0, x1), -+ z0 = svldff1sb_vnum_u32 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_u64.c -new file mode 100644 -index 000000000..0a49cbcc0 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sb_u64.c -@@ -0,0 +1,90 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1sb_u64_base: -+** ldff1sb z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sb_u64_base, svuint64_t, int8_t, -+ z0 = svldff1sb_u64 (p0, x0), -+ z0 = svldff1sb_u64 (p0, x0)) -+ -+/* -+** ldff1sb_u64_index: -+** ldff1sb z0\.d, p0/z, \[x0, x1\] -+** ret -+*/ -+TEST_LOAD (ldff1sb_u64_index, svuint64_t, int8_t, -+ z0 = svldff1sb_u64 (p0, x0 + x1), -+ z0 = svldff1sb_u64 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1sb_u64_1: -+** incd x0 -+** ldff1sb z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sb_u64_1, svuint64_t, int8_t, -+ z0 = svldff1sb_u64 (p0, x0 + svcntd ()), -+ z0 = svldff1sb_u64 (p0, x0 + svcntd ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1sb_u64_m1: -+** decd x0 -+** ldff1sb z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sb_u64_m1, svuint64_t, int8_t, -+ z0 = svldff1sb_u64 (p0, x0 - svcntd ()), -+ z0 = svldff1sb_u64 (p0, x0 - svcntd ())) -+ -+/* -+** ldff1sb_vnum_u64_0: -+** ldff1sb z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sb_vnum_u64_0, svuint64_t, int8_t, -+ z0 = svldff1sb_vnum_u64 (p0, x0, 0), -+ z0 = svldff1sb_vnum_u64 (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1sb_vnum_u64_1: -+** incd x0 -+** ldff1sb z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sb_vnum_u64_1, svuint64_t, int8_t, -+ z0 = svldff1sb_vnum_u64 (p0, x0, 1), -+ z0 = svldff1sb_vnum_u64 (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1sb_vnum_u64_m1: -+** decd x0 -+** ldff1sb z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sb_vnum_u64_m1, svuint64_t, int8_t, -+ z0 = svldff1sb_vnum_u64 (p0, x0, -1), -+ z0 = svldff1sb_vnum_u64 (p0, x0, -1)) -+ -+/* -+** ldff1sb_vnum_u64_x1: -+** cntd (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldff1sb z0\.d, p0/z, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** ldff1sb z0\.d, p0/z, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_LOAD (ldff1sb_vnum_u64_x1, svuint64_t, int8_t, -+ z0 = svldff1sb_vnum_u64 (p0, x0, x1), -+ z0 = svldff1sb_vnum_u64 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_s32.c -new file mode 100644 -index 000000000..b633335dc ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_s32.c -@@ -0,0 +1,252 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1sh_gather_s32_tied1: -+** ldff1sh z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_s32_tied1, svint32_t, svuint32_t, -+ z0_res = svldff1sh_gather_u32base_s32 (p0, z0), -+ z0_res = svldff1sh_gather_s32 (p0, z0)) -+ -+/* -+** ldff1sh_gather_s32_untied: -+** ldff1sh z0\.s, p0/z, \[z1\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_s32_untied, svint32_t, svuint32_t, -+ z0_res = svldff1sh_gather_u32base_s32 (p0, z1), -+ z0_res = svldff1sh_gather_s32 (p0, z1)) -+ -+/* -+** ldff1sh_gather_x0_s32_offset: -+** ldff1sh z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_x0_s32_offset, svint32_t, svuint32_t, -+ z0_res = svldff1sh_gather_u32base_offset_s32 (p0, z0, x0), -+ z0_res = svldff1sh_gather_offset_s32 (p0, z0, x0)) -+ -+/* -+** ldff1sh_gather_m2_s32_offset: -+** mov (x[0-9]+), #?-2 -+** ldff1sh z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_m2_s32_offset, svint32_t, svuint32_t, -+ z0_res = svldff1sh_gather_u32base_offset_s32 (p0, z0, -2), -+ z0_res = svldff1sh_gather_offset_s32 (p0, z0, -2)) -+ -+/* -+** ldff1sh_gather_0_s32_offset: -+** ldff1sh z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_0_s32_offset, svint32_t, svuint32_t, -+ z0_res = svldff1sh_gather_u32base_offset_s32 (p0, z0, 0), -+ z0_res = svldff1sh_gather_offset_s32 (p0, z0, 0)) -+ -+/* -+** ldff1sh_gather_5_s32_offset: -+** mov (x[0-9]+), #?5 -+** ldff1sh z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_5_s32_offset, svint32_t, svuint32_t, -+ z0_res = svldff1sh_gather_u32base_offset_s32 (p0, z0, 5), -+ z0_res = svldff1sh_gather_offset_s32 (p0, z0, 5)) -+ -+/* -+** ldff1sh_gather_6_s32_offset: -+** ldff1sh z0\.s, p0/z, \[z0\.s, #6\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_6_s32_offset, svint32_t, svuint32_t, -+ z0_res = svldff1sh_gather_u32base_offset_s32 (p0, z0, 6), -+ z0_res = svldff1sh_gather_offset_s32 (p0, z0, 6)) -+ -+/* -+** ldff1sh_gather_62_s32_offset: -+** ldff1sh z0\.s, p0/z, \[z0\.s, #62\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_62_s32_offset, svint32_t, svuint32_t, -+ z0_res = svldff1sh_gather_u32base_offset_s32 (p0, z0, 62), -+ z0_res = svldff1sh_gather_offset_s32 (p0, z0, 62)) -+ -+/* -+** ldff1sh_gather_64_s32_offset: -+** mov (x[0-9]+), #?64 -+** ldff1sh z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_64_s32_offset, svint32_t, svuint32_t, -+ z0_res = svldff1sh_gather_u32base_offset_s32 (p0, z0, 64), -+ z0_res = svldff1sh_gather_offset_s32 (p0, z0, 64)) -+ -+/* -+** ldff1sh_gather_x0_s32_index: -+** lsl (x[0-9]+), x0, #?1 -+** ldff1sh z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_x0_s32_index, svint32_t, svuint32_t, -+ z0_res = svldff1sh_gather_u32base_index_s32 (p0, z0, x0), -+ z0_res = svldff1sh_gather_index_s32 (p0, z0, x0)) -+ -+/* -+** ldff1sh_gather_m1_s32_index: -+** mov (x[0-9]+), #?-2 -+** ldff1sh z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_m1_s32_index, svint32_t, svuint32_t, -+ z0_res = svldff1sh_gather_u32base_index_s32 (p0, z0, -1), -+ z0_res = svldff1sh_gather_index_s32 (p0, z0, -1)) -+ -+/* -+** ldff1sh_gather_0_s32_index: -+** ldff1sh z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_0_s32_index, svint32_t, svuint32_t, -+ z0_res = svldff1sh_gather_u32base_index_s32 (p0, z0, 0), -+ z0_res = svldff1sh_gather_index_s32 (p0, z0, 0)) -+ -+/* -+** ldff1sh_gather_5_s32_index: -+** ldff1sh z0\.s, p0/z, \[z0\.s, #10\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_5_s32_index, svint32_t, svuint32_t, -+ z0_res = svldff1sh_gather_u32base_index_s32 (p0, z0, 5), -+ z0_res = svldff1sh_gather_index_s32 (p0, z0, 5)) -+ -+/* -+** ldff1sh_gather_31_s32_index: -+** ldff1sh z0\.s, p0/z, \[z0\.s, #62\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_31_s32_index, svint32_t, svuint32_t, -+ z0_res = svldff1sh_gather_u32base_index_s32 (p0, z0, 31), -+ z0_res = svldff1sh_gather_index_s32 (p0, z0, 31)) -+ -+/* -+** ldff1sh_gather_32_s32_index: -+** mov (x[0-9]+), #?64 -+** ldff1sh z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_32_s32_index, svint32_t, svuint32_t, -+ z0_res = svldff1sh_gather_u32base_index_s32 (p0, z0, 32), -+ z0_res = svldff1sh_gather_index_s32 (p0, z0, 32)) -+ -+/* -+** ldff1sh_gather_x0_s32_s32offset: -+** ldff1sh z0\.s, p0/z, \[x0, z0\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_x0_s32_s32offset, svint32_t, int16_t, svint32_t, -+ z0_res = svldff1sh_gather_s32offset_s32 (p0, x0, z0), -+ z0_res = svldff1sh_gather_offset_s32 (p0, x0, z0)) -+ -+/* -+** ldff1sh_gather_tied1_s32_s32offset: -+** ldff1sh z0\.s, p0/z, \[x0, z0\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_tied1_s32_s32offset, svint32_t, int16_t, svint32_t, -+ z0_res = svldff1sh_gather_s32offset_s32 (p0, x0, z0), -+ z0_res = svldff1sh_gather_offset_s32 (p0, x0, z0)) -+ -+/* -+** ldff1sh_gather_untied_s32_s32offset: -+** ldff1sh z0\.s, p0/z, \[x0, z1\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_untied_s32_s32offset, svint32_t, int16_t, svint32_t, -+ z0_res = svldff1sh_gather_s32offset_s32 (p0, x0, z1), -+ z0_res = svldff1sh_gather_offset_s32 (p0, x0, z1)) -+ -+/* -+** ldff1sh_gather_x0_s32_u32offset: -+** ldff1sh z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_x0_s32_u32offset, svint32_t, int16_t, svuint32_t, -+ z0_res = svldff1sh_gather_u32offset_s32 (p0, x0, z0), -+ z0_res = svldff1sh_gather_offset_s32 (p0, x0, z0)) -+ -+/* -+** ldff1sh_gather_tied1_s32_u32offset: -+** ldff1sh z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_tied1_s32_u32offset, svint32_t, int16_t, svuint32_t, -+ z0_res = svldff1sh_gather_u32offset_s32 (p0, x0, z0), -+ z0_res = svldff1sh_gather_offset_s32 (p0, x0, z0)) -+ -+/* -+** ldff1sh_gather_untied_s32_u32offset: -+** ldff1sh z0\.s, p0/z, \[x0, z1\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_untied_s32_u32offset, svint32_t, int16_t, svuint32_t, -+ z0_res = svldff1sh_gather_u32offset_s32 (p0, x0, z1), -+ z0_res = svldff1sh_gather_offset_s32 (p0, x0, z1)) -+ -+/* -+** ldff1sh_gather_x0_s32_s32index: -+** ldff1sh z0\.s, p0/z, \[x0, z0\.s, sxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_x0_s32_s32index, svint32_t, int16_t, svint32_t, -+ z0_res = svldff1sh_gather_s32index_s32 (p0, x0, z0), -+ z0_res = svldff1sh_gather_index_s32 (p0, x0, z0)) -+ -+/* -+** ldff1sh_gather_tied1_s32_s32index: -+** ldff1sh z0\.s, p0/z, \[x0, z0\.s, sxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_tied1_s32_s32index, svint32_t, int16_t, svint32_t, -+ z0_res = svldff1sh_gather_s32index_s32 (p0, x0, z0), -+ z0_res = svldff1sh_gather_index_s32 (p0, x0, z0)) -+ -+/* -+** ldff1sh_gather_untied_s32_s32index: -+** ldff1sh z0\.s, p0/z, \[x0, z1\.s, sxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_untied_s32_s32index, svint32_t, int16_t, svint32_t, -+ z0_res = svldff1sh_gather_s32index_s32 (p0, x0, z1), -+ z0_res = svldff1sh_gather_index_s32 (p0, x0, z1)) -+ -+/* -+** ldff1sh_gather_x0_s32_u32index: -+** ldff1sh z0\.s, p0/z, \[x0, z0\.s, uxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_x0_s32_u32index, svint32_t, int16_t, svuint32_t, -+ z0_res = svldff1sh_gather_u32index_s32 (p0, x0, z0), -+ z0_res = svldff1sh_gather_index_s32 (p0, x0, z0)) -+ -+/* -+** ldff1sh_gather_tied1_s32_u32index: -+** ldff1sh z0\.s, p0/z, \[x0, z0\.s, uxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_tied1_s32_u32index, svint32_t, int16_t, svuint32_t, -+ z0_res = svldff1sh_gather_u32index_s32 (p0, x0, z0), -+ z0_res = svldff1sh_gather_index_s32 (p0, x0, z0)) -+ -+/* -+** ldff1sh_gather_untied_s32_u32index: -+** ldff1sh z0\.s, p0/z, \[x0, z1\.s, uxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_untied_s32_u32index, svint32_t, int16_t, svuint32_t, -+ z0_res = svldff1sh_gather_u32index_s32 (p0, x0, z1), -+ z0_res = svldff1sh_gather_index_s32 (p0, x0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_s64.c -new file mode 100644 -index 000000000..32a4309b6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_s64.c -@@ -0,0 +1,288 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1sh_gather_s64_tied1: -+** ldff1sh z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_s64_tied1, svint64_t, svuint64_t, -+ z0_res = svldff1sh_gather_u64base_s64 (p0, z0), -+ z0_res = svldff1sh_gather_s64 (p0, z0)) -+ -+/* -+** ldff1sh_gather_s64_untied: -+** ldff1sh z0\.d, p0/z, \[z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_s64_untied, svint64_t, svuint64_t, -+ z0_res = svldff1sh_gather_u64base_s64 (p0, z1), -+ z0_res = svldff1sh_gather_s64 (p0, z1)) -+ -+/* -+** ldff1sh_gather_x0_s64_offset: -+** ldff1sh z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_x0_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1sh_gather_u64base_offset_s64 (p0, z0, x0), -+ z0_res = svldff1sh_gather_offset_s64 (p0, z0, x0)) -+ -+/* -+** ldff1sh_gather_m2_s64_offset: -+** mov (x[0-9]+), #?-2 -+** ldff1sh z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_m2_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1sh_gather_u64base_offset_s64 (p0, z0, -2), -+ z0_res = svldff1sh_gather_offset_s64 (p0, z0, -2)) -+ -+/* -+** ldff1sh_gather_0_s64_offset: -+** ldff1sh z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_0_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1sh_gather_u64base_offset_s64 (p0, z0, 0), -+ z0_res = svldff1sh_gather_offset_s64 (p0, z0, 0)) -+ -+/* -+** ldff1sh_gather_5_s64_offset: -+** mov (x[0-9]+), #?5 -+** ldff1sh z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_5_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1sh_gather_u64base_offset_s64 (p0, z0, 5), -+ z0_res = svldff1sh_gather_offset_s64 (p0, z0, 5)) -+ -+/* -+** ldff1sh_gather_6_s64_offset: -+** ldff1sh z0\.d, p0/z, \[z0\.d, #6\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_6_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1sh_gather_u64base_offset_s64 (p0, z0, 6), -+ z0_res = svldff1sh_gather_offset_s64 (p0, z0, 6)) -+ -+/* -+** ldff1sh_gather_62_s64_offset: -+** ldff1sh z0\.d, p0/z, \[z0\.d, #62\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_62_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1sh_gather_u64base_offset_s64 (p0, z0, 62), -+ z0_res = svldff1sh_gather_offset_s64 (p0, z0, 62)) -+ -+/* -+** ldff1sh_gather_64_s64_offset: -+** mov (x[0-9]+), #?64 -+** ldff1sh z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_64_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1sh_gather_u64base_offset_s64 (p0, z0, 64), -+ z0_res = svldff1sh_gather_offset_s64 (p0, z0, 64)) -+ -+/* -+** ldff1sh_gather_x0_s64_index: -+** lsl (x[0-9]+), x0, #?1 -+** ldff1sh z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_x0_s64_index, svint64_t, svuint64_t, -+ z0_res = svldff1sh_gather_u64base_index_s64 (p0, z0, x0), -+ z0_res = svldff1sh_gather_index_s64 (p0, z0, x0)) -+ -+/* -+** ldff1sh_gather_m1_s64_index: -+** mov (x[0-9]+), #?-2 -+** ldff1sh z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_m1_s64_index, svint64_t, svuint64_t, -+ z0_res = svldff1sh_gather_u64base_index_s64 (p0, z0, -1), -+ z0_res = svldff1sh_gather_index_s64 (p0, z0, -1)) -+ -+/* -+** ldff1sh_gather_0_s64_index: -+** ldff1sh z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_0_s64_index, svint64_t, svuint64_t, -+ z0_res = svldff1sh_gather_u64base_index_s64 (p0, z0, 0), -+ z0_res = svldff1sh_gather_index_s64 (p0, z0, 0)) -+ -+/* -+** ldff1sh_gather_5_s64_index: -+** ldff1sh z0\.d, p0/z, \[z0\.d, #10\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_5_s64_index, svint64_t, svuint64_t, -+ z0_res = svldff1sh_gather_u64base_index_s64 (p0, z0, 5), -+ z0_res = svldff1sh_gather_index_s64 (p0, z0, 5)) -+ -+/* -+** ldff1sh_gather_31_s64_index: -+** ldff1sh z0\.d, p0/z, \[z0\.d, #62\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_31_s64_index, svint64_t, svuint64_t, -+ z0_res = svldff1sh_gather_u64base_index_s64 (p0, z0, 31), -+ z0_res = svldff1sh_gather_index_s64 (p0, z0, 31)) -+ -+/* -+** ldff1sh_gather_32_s64_index: -+** mov (x[0-9]+), #?64 -+** ldff1sh z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_32_s64_index, svint64_t, svuint64_t, -+ z0_res = svldff1sh_gather_u64base_index_s64 (p0, z0, 32), -+ z0_res = svldff1sh_gather_index_s64 (p0, z0, 32)) -+ -+/* -+** ldff1sh_gather_x0_s64_s64offset: -+** ldff1sh z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_x0_s64_s64offset, svint64_t, int16_t, svint64_t, -+ z0_res = svldff1sh_gather_s64offset_s64 (p0, x0, z0), -+ z0_res = svldff1sh_gather_offset_s64 (p0, x0, z0)) -+ -+/* -+** ldff1sh_gather_tied1_s64_s64offset: -+** ldff1sh z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_tied1_s64_s64offset, svint64_t, int16_t, svint64_t, -+ z0_res = svldff1sh_gather_s64offset_s64 (p0, x0, z0), -+ z0_res = svldff1sh_gather_offset_s64 (p0, x0, z0)) -+ -+/* -+** ldff1sh_gather_untied_s64_s64offset: -+** ldff1sh z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_untied_s64_s64offset, svint64_t, int16_t, svint64_t, -+ z0_res = svldff1sh_gather_s64offset_s64 (p0, x0, z1), -+ z0_res = svldff1sh_gather_offset_s64 (p0, x0, z1)) -+ -+/* -+** ldff1sh_gather_ext_s64_s64offset: -+** ldff1sh z0\.d, p0/z, \[x0, z1\.d, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_ext_s64_s64offset, svint64_t, int16_t, svint64_t, -+ z0_res = svldff1sh_gather_s64offset_s64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svldff1sh_gather_offset_s64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ldff1sh_gather_x0_s64_u64offset: -+** ldff1sh z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_x0_s64_u64offset, svint64_t, int16_t, svuint64_t, -+ z0_res = svldff1sh_gather_u64offset_s64 (p0, x0, z0), -+ z0_res = svldff1sh_gather_offset_s64 (p0, x0, z0)) -+ -+/* -+** ldff1sh_gather_tied1_s64_u64offset: -+** ldff1sh z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_tied1_s64_u64offset, svint64_t, int16_t, svuint64_t, -+ z0_res = svldff1sh_gather_u64offset_s64 (p0, x0, z0), -+ z0_res = svldff1sh_gather_offset_s64 (p0, x0, z0)) -+ -+/* -+** ldff1sh_gather_untied_s64_u64offset: -+** ldff1sh z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_untied_s64_u64offset, svint64_t, int16_t, svuint64_t, -+ z0_res = svldff1sh_gather_u64offset_s64 (p0, x0, z1), -+ z0_res = svldff1sh_gather_offset_s64 (p0, x0, z1)) -+ -+/* -+** ldff1sh_gather_ext_s64_u64offset: -+** ldff1sh z0\.d, p0/z, \[x0, z1\.d, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_ext_s64_u64offset, svint64_t, int16_t, svuint64_t, -+ z0_res = svldff1sh_gather_u64offset_s64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svldff1sh_gather_offset_s64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ldff1sh_gather_x0_s64_s64index: -+** ldff1sh z0\.d, p0/z, \[x0, z0\.d, lsl 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_x0_s64_s64index, svint64_t, int16_t, svint64_t, -+ z0_res = svldff1sh_gather_s64index_s64 (p0, x0, z0), -+ z0_res = svldff1sh_gather_index_s64 (p0, x0, z0)) -+ -+/* -+** ldff1sh_gather_tied1_s64_s64index: -+** ldff1sh z0\.d, p0/z, \[x0, z0\.d, lsl 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_tied1_s64_s64index, svint64_t, int16_t, svint64_t, -+ z0_res = svldff1sh_gather_s64index_s64 (p0, x0, z0), -+ z0_res = svldff1sh_gather_index_s64 (p0, x0, z0)) -+ -+/* -+** ldff1sh_gather_untied_s64_s64index: -+** ldff1sh z0\.d, p0/z, \[x0, z1\.d, lsl 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_untied_s64_s64index, svint64_t, int16_t, svint64_t, -+ z0_res = svldff1sh_gather_s64index_s64 (p0, x0, z1), -+ z0_res = svldff1sh_gather_index_s64 (p0, x0, z1)) -+ -+/* -+** ldff1sh_gather_ext_s64_s64index: -+** ldff1sh z0\.d, p0/z, \[x0, z1\.d, sxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_ext_s64_s64index, svint64_t, int16_t, svint64_t, -+ z0_res = svldff1sh_gather_s64index_s64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svldff1sh_gather_index_s64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ldff1sh_gather_x0_s64_u64index: -+** ldff1sh z0\.d, p0/z, \[x0, z0\.d, lsl 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_x0_s64_u64index, svint64_t, int16_t, svuint64_t, -+ z0_res = svldff1sh_gather_u64index_s64 (p0, x0, z0), -+ z0_res = svldff1sh_gather_index_s64 (p0, x0, z0)) -+ -+/* -+** ldff1sh_gather_tied1_s64_u64index: -+** ldff1sh z0\.d, p0/z, \[x0, z0\.d, lsl 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_tied1_s64_u64index, svint64_t, int16_t, svuint64_t, -+ z0_res = svldff1sh_gather_u64index_s64 (p0, x0, z0), -+ z0_res = svldff1sh_gather_index_s64 (p0, x0, z0)) -+ -+/* -+** ldff1sh_gather_untied_s64_u64index: -+** ldff1sh z0\.d, p0/z, \[x0, z1\.d, lsl 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_untied_s64_u64index, svint64_t, int16_t, svuint64_t, -+ z0_res = svldff1sh_gather_u64index_s64 (p0, x0, z1), -+ z0_res = svldff1sh_gather_index_s64 (p0, x0, z1)) -+ -+/* -+** ldff1sh_gather_ext_s64_u64index: -+** ldff1sh z0\.d, p0/z, \[x0, z1\.d, uxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_ext_s64_u64index, svint64_t, int16_t, svuint64_t, -+ z0_res = svldff1sh_gather_u64index_s64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svldff1sh_gather_index_s64 (p0, x0, svextw_x (p0, z1))) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_u32.c -new file mode 100644 -index 000000000..73a9be892 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_u32.c -@@ -0,0 +1,252 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1sh_gather_u32_tied1: -+** ldff1sh z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_u32_tied1, svuint32_t, svuint32_t, -+ z0_res = svldff1sh_gather_u32base_u32 (p0, z0), -+ z0_res = svldff1sh_gather_u32 (p0, z0)) -+ -+/* -+** ldff1sh_gather_u32_untied: -+** ldff1sh z0\.s, p0/z, \[z1\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_u32_untied, svuint32_t, svuint32_t, -+ z0_res = svldff1sh_gather_u32base_u32 (p0, z1), -+ z0_res = svldff1sh_gather_u32 (p0, z1)) -+ -+/* -+** ldff1sh_gather_x0_u32_offset: -+** ldff1sh z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_x0_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svldff1sh_gather_u32base_offset_u32 (p0, z0, x0), -+ z0_res = svldff1sh_gather_offset_u32 (p0, z0, x0)) -+ -+/* -+** ldff1sh_gather_m2_u32_offset: -+** mov (x[0-9]+), #?-2 -+** ldff1sh z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_m2_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svldff1sh_gather_u32base_offset_u32 (p0, z0, -2), -+ z0_res = svldff1sh_gather_offset_u32 (p0, z0, -2)) -+ -+/* -+** ldff1sh_gather_0_u32_offset: -+** ldff1sh z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_0_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svldff1sh_gather_u32base_offset_u32 (p0, z0, 0), -+ z0_res = svldff1sh_gather_offset_u32 (p0, z0, 0)) -+ -+/* -+** ldff1sh_gather_5_u32_offset: -+** mov (x[0-9]+), #?5 -+** ldff1sh z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_5_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svldff1sh_gather_u32base_offset_u32 (p0, z0, 5), -+ z0_res = svldff1sh_gather_offset_u32 (p0, z0, 5)) -+ -+/* -+** ldff1sh_gather_6_u32_offset: -+** ldff1sh z0\.s, p0/z, \[z0\.s, #6\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_6_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svldff1sh_gather_u32base_offset_u32 (p0, z0, 6), -+ z0_res = svldff1sh_gather_offset_u32 (p0, z0, 6)) -+ -+/* -+** ldff1sh_gather_62_u32_offset: -+** ldff1sh z0\.s, p0/z, \[z0\.s, #62\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_62_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svldff1sh_gather_u32base_offset_u32 (p0, z0, 62), -+ z0_res = svldff1sh_gather_offset_u32 (p0, z0, 62)) -+ -+/* -+** ldff1sh_gather_64_u32_offset: -+** mov (x[0-9]+), #?64 -+** ldff1sh z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_64_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svldff1sh_gather_u32base_offset_u32 (p0, z0, 64), -+ z0_res = svldff1sh_gather_offset_u32 (p0, z0, 64)) -+ -+/* -+** ldff1sh_gather_x0_u32_index: -+** lsl (x[0-9]+), x0, #?1 -+** ldff1sh z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_x0_u32_index, svuint32_t, svuint32_t, -+ z0_res = svldff1sh_gather_u32base_index_u32 (p0, z0, x0), -+ z0_res = svldff1sh_gather_index_u32 (p0, z0, x0)) -+ -+/* -+** ldff1sh_gather_m1_u32_index: -+** mov (x[0-9]+), #?-2 -+** ldff1sh z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_m1_u32_index, svuint32_t, svuint32_t, -+ z0_res = svldff1sh_gather_u32base_index_u32 (p0, z0, -1), -+ z0_res = svldff1sh_gather_index_u32 (p0, z0, -1)) -+ -+/* -+** ldff1sh_gather_0_u32_index: -+** ldff1sh z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_0_u32_index, svuint32_t, svuint32_t, -+ z0_res = svldff1sh_gather_u32base_index_u32 (p0, z0, 0), -+ z0_res = svldff1sh_gather_index_u32 (p0, z0, 0)) -+ -+/* -+** ldff1sh_gather_5_u32_index: -+** ldff1sh z0\.s, p0/z, \[z0\.s, #10\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_5_u32_index, svuint32_t, svuint32_t, -+ z0_res = svldff1sh_gather_u32base_index_u32 (p0, z0, 5), -+ z0_res = svldff1sh_gather_index_u32 (p0, z0, 5)) -+ -+/* -+** ldff1sh_gather_31_u32_index: -+** ldff1sh z0\.s, p0/z, \[z0\.s, #62\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_31_u32_index, svuint32_t, svuint32_t, -+ z0_res = svldff1sh_gather_u32base_index_u32 (p0, z0, 31), -+ z0_res = svldff1sh_gather_index_u32 (p0, z0, 31)) -+ -+/* -+** ldff1sh_gather_32_u32_index: -+** mov (x[0-9]+), #?64 -+** ldff1sh z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_32_u32_index, svuint32_t, svuint32_t, -+ z0_res = svldff1sh_gather_u32base_index_u32 (p0, z0, 32), -+ z0_res = svldff1sh_gather_index_u32 (p0, z0, 32)) -+ -+/* -+** ldff1sh_gather_x0_u32_s32offset: -+** ldff1sh z0\.s, p0/z, \[x0, z0\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_x0_u32_s32offset, svuint32_t, int16_t, svint32_t, -+ z0_res = svldff1sh_gather_s32offset_u32 (p0, x0, z0), -+ z0_res = svldff1sh_gather_offset_u32 (p0, x0, z0)) -+ -+/* -+** ldff1sh_gather_tied1_u32_s32offset: -+** ldff1sh z0\.s, p0/z, \[x0, z0\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_tied1_u32_s32offset, svuint32_t, int16_t, svint32_t, -+ z0_res = svldff1sh_gather_s32offset_u32 (p0, x0, z0), -+ z0_res = svldff1sh_gather_offset_u32 (p0, x0, z0)) -+ -+/* -+** ldff1sh_gather_untied_u32_s32offset: -+** ldff1sh z0\.s, p0/z, \[x0, z1\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_untied_u32_s32offset, svuint32_t, int16_t, svint32_t, -+ z0_res = svldff1sh_gather_s32offset_u32 (p0, x0, z1), -+ z0_res = svldff1sh_gather_offset_u32 (p0, x0, z1)) -+ -+/* -+** ldff1sh_gather_x0_u32_u32offset: -+** ldff1sh z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_x0_u32_u32offset, svuint32_t, int16_t, svuint32_t, -+ z0_res = svldff1sh_gather_u32offset_u32 (p0, x0, z0), -+ z0_res = svldff1sh_gather_offset_u32 (p0, x0, z0)) -+ -+/* -+** ldff1sh_gather_tied1_u32_u32offset: -+** ldff1sh z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_tied1_u32_u32offset, svuint32_t, int16_t, svuint32_t, -+ z0_res = svldff1sh_gather_u32offset_u32 (p0, x0, z0), -+ z0_res = svldff1sh_gather_offset_u32 (p0, x0, z0)) -+ -+/* -+** ldff1sh_gather_untied_u32_u32offset: -+** ldff1sh z0\.s, p0/z, \[x0, z1\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_untied_u32_u32offset, svuint32_t, int16_t, svuint32_t, -+ z0_res = svldff1sh_gather_u32offset_u32 (p0, x0, z1), -+ z0_res = svldff1sh_gather_offset_u32 (p0, x0, z1)) -+ -+/* -+** ldff1sh_gather_x0_u32_s32index: -+** ldff1sh z0\.s, p0/z, \[x0, z0\.s, sxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_x0_u32_s32index, svuint32_t, int16_t, svint32_t, -+ z0_res = svldff1sh_gather_s32index_u32 (p0, x0, z0), -+ z0_res = svldff1sh_gather_index_u32 (p0, x0, z0)) -+ -+/* -+** ldff1sh_gather_tied1_u32_s32index: -+** ldff1sh z0\.s, p0/z, \[x0, z0\.s, sxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_tied1_u32_s32index, svuint32_t, int16_t, svint32_t, -+ z0_res = svldff1sh_gather_s32index_u32 (p0, x0, z0), -+ z0_res = svldff1sh_gather_index_u32 (p0, x0, z0)) -+ -+/* -+** ldff1sh_gather_untied_u32_s32index: -+** ldff1sh z0\.s, p0/z, \[x0, z1\.s, sxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_untied_u32_s32index, svuint32_t, int16_t, svint32_t, -+ z0_res = svldff1sh_gather_s32index_u32 (p0, x0, z1), -+ z0_res = svldff1sh_gather_index_u32 (p0, x0, z1)) -+ -+/* -+** ldff1sh_gather_x0_u32_u32index: -+** ldff1sh z0\.s, p0/z, \[x0, z0\.s, uxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_x0_u32_u32index, svuint32_t, int16_t, svuint32_t, -+ z0_res = svldff1sh_gather_u32index_u32 (p0, x0, z0), -+ z0_res = svldff1sh_gather_index_u32 (p0, x0, z0)) -+ -+/* -+** ldff1sh_gather_tied1_u32_u32index: -+** ldff1sh z0\.s, p0/z, \[x0, z0\.s, uxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_tied1_u32_u32index, svuint32_t, int16_t, svuint32_t, -+ z0_res = svldff1sh_gather_u32index_u32 (p0, x0, z0), -+ z0_res = svldff1sh_gather_index_u32 (p0, x0, z0)) -+ -+/* -+** ldff1sh_gather_untied_u32_u32index: -+** ldff1sh z0\.s, p0/z, \[x0, z1\.s, uxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_untied_u32_u32index, svuint32_t, int16_t, svuint32_t, -+ z0_res = svldff1sh_gather_u32index_u32 (p0, x0, z1), -+ z0_res = svldff1sh_gather_index_u32 (p0, x0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_u64.c -new file mode 100644 -index 000000000..94ea73b63 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_gather_u64.c -@@ -0,0 +1,288 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1sh_gather_u64_tied1: -+** ldff1sh z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_u64_tied1, svuint64_t, svuint64_t, -+ z0_res = svldff1sh_gather_u64base_u64 (p0, z0), -+ z0_res = svldff1sh_gather_u64 (p0, z0)) -+ -+/* -+** ldff1sh_gather_u64_untied: -+** ldff1sh z0\.d, p0/z, \[z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_u64_untied, svuint64_t, svuint64_t, -+ z0_res = svldff1sh_gather_u64base_u64 (p0, z1), -+ z0_res = svldff1sh_gather_u64 (p0, z1)) -+ -+/* -+** ldff1sh_gather_x0_u64_offset: -+** ldff1sh z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_x0_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1sh_gather_u64base_offset_u64 (p0, z0, x0), -+ z0_res = svldff1sh_gather_offset_u64 (p0, z0, x0)) -+ -+/* -+** ldff1sh_gather_m2_u64_offset: -+** mov (x[0-9]+), #?-2 -+** ldff1sh z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_m2_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1sh_gather_u64base_offset_u64 (p0, z0, -2), -+ z0_res = svldff1sh_gather_offset_u64 (p0, z0, -2)) -+ -+/* -+** ldff1sh_gather_0_u64_offset: -+** ldff1sh z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_0_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1sh_gather_u64base_offset_u64 (p0, z0, 0), -+ z0_res = svldff1sh_gather_offset_u64 (p0, z0, 0)) -+ -+/* -+** ldff1sh_gather_5_u64_offset: -+** mov (x[0-9]+), #?5 -+** ldff1sh z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_5_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1sh_gather_u64base_offset_u64 (p0, z0, 5), -+ z0_res = svldff1sh_gather_offset_u64 (p0, z0, 5)) -+ -+/* -+** ldff1sh_gather_6_u64_offset: -+** ldff1sh z0\.d, p0/z, \[z0\.d, #6\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_6_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1sh_gather_u64base_offset_u64 (p0, z0, 6), -+ z0_res = svldff1sh_gather_offset_u64 (p0, z0, 6)) -+ -+/* -+** ldff1sh_gather_62_u64_offset: -+** ldff1sh z0\.d, p0/z, \[z0\.d, #62\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_62_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1sh_gather_u64base_offset_u64 (p0, z0, 62), -+ z0_res = svldff1sh_gather_offset_u64 (p0, z0, 62)) -+ -+/* -+** ldff1sh_gather_64_u64_offset: -+** mov (x[0-9]+), #?64 -+** ldff1sh z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_64_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1sh_gather_u64base_offset_u64 (p0, z0, 64), -+ z0_res = svldff1sh_gather_offset_u64 (p0, z0, 64)) -+ -+/* -+** ldff1sh_gather_x0_u64_index: -+** lsl (x[0-9]+), x0, #?1 -+** ldff1sh z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_x0_u64_index, svuint64_t, svuint64_t, -+ z0_res = svldff1sh_gather_u64base_index_u64 (p0, z0, x0), -+ z0_res = svldff1sh_gather_index_u64 (p0, z0, x0)) -+ -+/* -+** ldff1sh_gather_m1_u64_index: -+** mov (x[0-9]+), #?-2 -+** ldff1sh z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_m1_u64_index, svuint64_t, svuint64_t, -+ z0_res = svldff1sh_gather_u64base_index_u64 (p0, z0, -1), -+ z0_res = svldff1sh_gather_index_u64 (p0, z0, -1)) -+ -+/* -+** ldff1sh_gather_0_u64_index: -+** ldff1sh z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_0_u64_index, svuint64_t, svuint64_t, -+ z0_res = svldff1sh_gather_u64base_index_u64 (p0, z0, 0), -+ z0_res = svldff1sh_gather_index_u64 (p0, z0, 0)) -+ -+/* -+** ldff1sh_gather_5_u64_index: -+** ldff1sh z0\.d, p0/z, \[z0\.d, #10\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_5_u64_index, svuint64_t, svuint64_t, -+ z0_res = svldff1sh_gather_u64base_index_u64 (p0, z0, 5), -+ z0_res = svldff1sh_gather_index_u64 (p0, z0, 5)) -+ -+/* -+** ldff1sh_gather_31_u64_index: -+** ldff1sh z0\.d, p0/z, \[z0\.d, #62\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_31_u64_index, svuint64_t, svuint64_t, -+ z0_res = svldff1sh_gather_u64base_index_u64 (p0, z0, 31), -+ z0_res = svldff1sh_gather_index_u64 (p0, z0, 31)) -+ -+/* -+** ldff1sh_gather_32_u64_index: -+** mov (x[0-9]+), #?64 -+** ldff1sh z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sh_gather_32_u64_index, svuint64_t, svuint64_t, -+ z0_res = svldff1sh_gather_u64base_index_u64 (p0, z0, 32), -+ z0_res = svldff1sh_gather_index_u64 (p0, z0, 32)) -+ -+/* -+** ldff1sh_gather_x0_u64_s64offset: -+** ldff1sh z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_x0_u64_s64offset, svuint64_t, int16_t, svint64_t, -+ z0_res = svldff1sh_gather_s64offset_u64 (p0, x0, z0), -+ z0_res = svldff1sh_gather_offset_u64 (p0, x0, z0)) -+ -+/* -+** ldff1sh_gather_tied1_u64_s64offset: -+** ldff1sh z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_tied1_u64_s64offset, svuint64_t, int16_t, svint64_t, -+ z0_res = svldff1sh_gather_s64offset_u64 (p0, x0, z0), -+ z0_res = svldff1sh_gather_offset_u64 (p0, x0, z0)) -+ -+/* -+** ldff1sh_gather_untied_u64_s64offset: -+** ldff1sh z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_untied_u64_s64offset, svuint64_t, int16_t, svint64_t, -+ z0_res = svldff1sh_gather_s64offset_u64 (p0, x0, z1), -+ z0_res = svldff1sh_gather_offset_u64 (p0, x0, z1)) -+ -+/* -+** ldff1sh_gather_ext_u64_s64offset: -+** ldff1sh z0\.d, p0/z, \[x0, z1\.d, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_ext_u64_s64offset, svuint64_t, int16_t, svint64_t, -+ z0_res = svldff1sh_gather_s64offset_u64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svldff1sh_gather_offset_u64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ldff1sh_gather_x0_u64_u64offset: -+** ldff1sh z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_x0_u64_u64offset, svuint64_t, int16_t, svuint64_t, -+ z0_res = svldff1sh_gather_u64offset_u64 (p0, x0, z0), -+ z0_res = svldff1sh_gather_offset_u64 (p0, x0, z0)) -+ -+/* -+** ldff1sh_gather_tied1_u64_u64offset: -+** ldff1sh z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_tied1_u64_u64offset, svuint64_t, int16_t, svuint64_t, -+ z0_res = svldff1sh_gather_u64offset_u64 (p0, x0, z0), -+ z0_res = svldff1sh_gather_offset_u64 (p0, x0, z0)) -+ -+/* -+** ldff1sh_gather_untied_u64_u64offset: -+** ldff1sh z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_untied_u64_u64offset, svuint64_t, int16_t, svuint64_t, -+ z0_res = svldff1sh_gather_u64offset_u64 (p0, x0, z1), -+ z0_res = svldff1sh_gather_offset_u64 (p0, x0, z1)) -+ -+/* -+** ldff1sh_gather_ext_u64_u64offset: -+** ldff1sh z0\.d, p0/z, \[x0, z1\.d, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_ext_u64_u64offset, svuint64_t, int16_t, svuint64_t, -+ z0_res = svldff1sh_gather_u64offset_u64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svldff1sh_gather_offset_u64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ldff1sh_gather_x0_u64_s64index: -+** ldff1sh z0\.d, p0/z, \[x0, z0\.d, lsl 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_x0_u64_s64index, svuint64_t, int16_t, svint64_t, -+ z0_res = svldff1sh_gather_s64index_u64 (p0, x0, z0), -+ z0_res = svldff1sh_gather_index_u64 (p0, x0, z0)) -+ -+/* -+** ldff1sh_gather_tied1_u64_s64index: -+** ldff1sh z0\.d, p0/z, \[x0, z0\.d, lsl 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_tied1_u64_s64index, svuint64_t, int16_t, svint64_t, -+ z0_res = svldff1sh_gather_s64index_u64 (p0, x0, z0), -+ z0_res = svldff1sh_gather_index_u64 (p0, x0, z0)) -+ -+/* -+** ldff1sh_gather_untied_u64_s64index: -+** ldff1sh z0\.d, p0/z, \[x0, z1\.d, lsl 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_untied_u64_s64index, svuint64_t, int16_t, svint64_t, -+ z0_res = svldff1sh_gather_s64index_u64 (p0, x0, z1), -+ z0_res = svldff1sh_gather_index_u64 (p0, x0, z1)) -+ -+/* -+** ldff1sh_gather_ext_u64_s64index: -+** ldff1sh z0\.d, p0/z, \[x0, z1\.d, sxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_ext_u64_s64index, svuint64_t, int16_t, svint64_t, -+ z0_res = svldff1sh_gather_s64index_u64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svldff1sh_gather_index_u64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ldff1sh_gather_x0_u64_u64index: -+** ldff1sh z0\.d, p0/z, \[x0, z0\.d, lsl 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_x0_u64_u64index, svuint64_t, int16_t, svuint64_t, -+ z0_res = svldff1sh_gather_u64index_u64 (p0, x0, z0), -+ z0_res = svldff1sh_gather_index_u64 (p0, x0, z0)) -+ -+/* -+** ldff1sh_gather_tied1_u64_u64index: -+** ldff1sh z0\.d, p0/z, \[x0, z0\.d, lsl 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_tied1_u64_u64index, svuint64_t, int16_t, svuint64_t, -+ z0_res = svldff1sh_gather_u64index_u64 (p0, x0, z0), -+ z0_res = svldff1sh_gather_index_u64 (p0, x0, z0)) -+ -+/* -+** ldff1sh_gather_untied_u64_u64index: -+** ldff1sh z0\.d, p0/z, \[x0, z1\.d, lsl 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_untied_u64_u64index, svuint64_t, int16_t, svuint64_t, -+ z0_res = svldff1sh_gather_u64index_u64 (p0, x0, z1), -+ z0_res = svldff1sh_gather_index_u64 (p0, x0, z1)) -+ -+/* -+** ldff1sh_gather_ext_u64_u64index: -+** ldff1sh z0\.d, p0/z, \[x0, z1\.d, uxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sh_gather_ext_u64_u64index, svuint64_t, int16_t, svuint64_t, -+ z0_res = svldff1sh_gather_u64index_u64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svldff1sh_gather_index_u64 (p0, x0, svextw_x (p0, z1))) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_s32.c -new file mode 100644 -index 000000000..81b64e836 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_s32.c -@@ -0,0 +1,86 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1sh_s32_base: -+** ldff1sh z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sh_s32_base, svint32_t, int16_t, -+ z0 = svldff1sh_s32 (p0, x0), -+ z0 = svldff1sh_s32 (p0, x0)) -+ -+/* -+** ldff1sh_s32_index: -+** ldff1sh z0\.s, p0/z, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_LOAD (ldff1sh_s32_index, svint32_t, int16_t, -+ z0 = svldff1sh_s32 (p0, x0 + x1), -+ z0 = svldff1sh_s32 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1sh_s32_1: -+** inch x0 -+** ldff1sh z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sh_s32_1, svint32_t, int16_t, -+ z0 = svldff1sh_s32 (p0, x0 + svcntw ()), -+ z0 = svldff1sh_s32 (p0, x0 + svcntw ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1sh_s32_m1: -+** dech x0 -+** ldff1sh z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sh_s32_m1, svint32_t, int16_t, -+ z0 = svldff1sh_s32 (p0, x0 - svcntw ()), -+ z0 = svldff1sh_s32 (p0, x0 - svcntw ())) -+ -+/* -+** ldff1sh_vnum_s32_0: -+** ldff1sh z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sh_vnum_s32_0, svint32_t, int16_t, -+ z0 = svldff1sh_vnum_s32 (p0, x0, 0), -+ z0 = svldff1sh_vnum_s32 (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1sh_vnum_s32_1: -+** inch x0 -+** ldff1sh z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sh_vnum_s32_1, svint32_t, int16_t, -+ z0 = svldff1sh_vnum_s32 (p0, x0, 1), -+ z0 = svldff1sh_vnum_s32 (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1sh_vnum_s32_m1: -+** dech x0 -+** ldff1sh z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sh_vnum_s32_m1, svint32_t, int16_t, -+ z0 = svldff1sh_vnum_s32 (p0, x0, -1), -+ z0 = svldff1sh_vnum_s32 (p0, x0, -1)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ldff1sh_vnum_s32_x1: -+** cnth (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ldff1sh z0\.s, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldff1sh_vnum_s32_x1, svint32_t, int16_t, -+ z0 = svldff1sh_vnum_s32 (p0, x0, x1), -+ z0 = svldff1sh_vnum_s32 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_s64.c -new file mode 100644 -index 000000000..453b3ff24 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_s64.c -@@ -0,0 +1,86 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1sh_s64_base: -+** ldff1sh z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sh_s64_base, svint64_t, int16_t, -+ z0 = svldff1sh_s64 (p0, x0), -+ z0 = svldff1sh_s64 (p0, x0)) -+ -+/* -+** ldff1sh_s64_index: -+** ldff1sh z0\.d, p0/z, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_LOAD (ldff1sh_s64_index, svint64_t, int16_t, -+ z0 = svldff1sh_s64 (p0, x0 + x1), -+ z0 = svldff1sh_s64 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1sh_s64_1: -+** incw x0 -+** ldff1sh z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sh_s64_1, svint64_t, int16_t, -+ z0 = svldff1sh_s64 (p0, x0 + svcntd ()), -+ z0 = svldff1sh_s64 (p0, x0 + svcntd ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1sh_s64_m1: -+** decw x0 -+** ldff1sh z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sh_s64_m1, svint64_t, int16_t, -+ z0 = svldff1sh_s64 (p0, x0 - svcntd ()), -+ z0 = svldff1sh_s64 (p0, x0 - svcntd ())) -+ -+/* -+** ldff1sh_vnum_s64_0: -+** ldff1sh z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sh_vnum_s64_0, svint64_t, int16_t, -+ z0 = svldff1sh_vnum_s64 (p0, x0, 0), -+ z0 = svldff1sh_vnum_s64 (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1sh_vnum_s64_1: -+** incw x0 -+** ldff1sh z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sh_vnum_s64_1, svint64_t, int16_t, -+ z0 = svldff1sh_vnum_s64 (p0, x0, 1), -+ z0 = svldff1sh_vnum_s64 (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1sh_vnum_s64_m1: -+** decw x0 -+** ldff1sh z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sh_vnum_s64_m1, svint64_t, int16_t, -+ z0 = svldff1sh_vnum_s64 (p0, x0, -1), -+ z0 = svldff1sh_vnum_s64 (p0, x0, -1)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ldff1sh_vnum_s64_x1: -+** cntw (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ldff1sh z0\.d, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldff1sh_vnum_s64_x1, svint64_t, int16_t, -+ z0 = svldff1sh_vnum_s64 (p0, x0, x1), -+ z0 = svldff1sh_vnum_s64 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_u32.c -new file mode 100644 -index 000000000..bbbed79dc ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_u32.c -@@ -0,0 +1,86 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1sh_u32_base: -+** ldff1sh z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sh_u32_base, svuint32_t, int16_t, -+ z0 = svldff1sh_u32 (p0, x0), -+ z0 = svldff1sh_u32 (p0, x0)) -+ -+/* -+** ldff1sh_u32_index: -+** ldff1sh z0\.s, p0/z, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_LOAD (ldff1sh_u32_index, svuint32_t, int16_t, -+ z0 = svldff1sh_u32 (p0, x0 + x1), -+ z0 = svldff1sh_u32 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1sh_u32_1: -+** inch x0 -+** ldff1sh z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sh_u32_1, svuint32_t, int16_t, -+ z0 = svldff1sh_u32 (p0, x0 + svcntw ()), -+ z0 = svldff1sh_u32 (p0, x0 + svcntw ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1sh_u32_m1: -+** dech x0 -+** ldff1sh z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sh_u32_m1, svuint32_t, int16_t, -+ z0 = svldff1sh_u32 (p0, x0 - svcntw ()), -+ z0 = svldff1sh_u32 (p0, x0 - svcntw ())) -+ -+/* -+** ldff1sh_vnum_u32_0: -+** ldff1sh z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sh_vnum_u32_0, svuint32_t, int16_t, -+ z0 = svldff1sh_vnum_u32 (p0, x0, 0), -+ z0 = svldff1sh_vnum_u32 (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1sh_vnum_u32_1: -+** inch x0 -+** ldff1sh z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sh_vnum_u32_1, svuint32_t, int16_t, -+ z0 = svldff1sh_vnum_u32 (p0, x0, 1), -+ z0 = svldff1sh_vnum_u32 (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1sh_vnum_u32_m1: -+** dech x0 -+** ldff1sh z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sh_vnum_u32_m1, svuint32_t, int16_t, -+ z0 = svldff1sh_vnum_u32 (p0, x0, -1), -+ z0 = svldff1sh_vnum_u32 (p0, x0, -1)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ldff1sh_vnum_u32_x1: -+** cnth (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ldff1sh z0\.s, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldff1sh_vnum_u32_x1, svuint32_t, int16_t, -+ z0 = svldff1sh_vnum_u32 (p0, x0, x1), -+ z0 = svldff1sh_vnum_u32 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_u64.c -new file mode 100644 -index 000000000..5430e256b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sh_u64.c -@@ -0,0 +1,86 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1sh_u64_base: -+** ldff1sh z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sh_u64_base, svuint64_t, int16_t, -+ z0 = svldff1sh_u64 (p0, x0), -+ z0 = svldff1sh_u64 (p0, x0)) -+ -+/* -+** ldff1sh_u64_index: -+** ldff1sh z0\.d, p0/z, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_LOAD (ldff1sh_u64_index, svuint64_t, int16_t, -+ z0 = svldff1sh_u64 (p0, x0 + x1), -+ z0 = svldff1sh_u64 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1sh_u64_1: -+** incw x0 -+** ldff1sh z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sh_u64_1, svuint64_t, int16_t, -+ z0 = svldff1sh_u64 (p0, x0 + svcntd ()), -+ z0 = svldff1sh_u64 (p0, x0 + svcntd ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1sh_u64_m1: -+** decw x0 -+** ldff1sh z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sh_u64_m1, svuint64_t, int16_t, -+ z0 = svldff1sh_u64 (p0, x0 - svcntd ()), -+ z0 = svldff1sh_u64 (p0, x0 - svcntd ())) -+ -+/* -+** ldff1sh_vnum_u64_0: -+** ldff1sh z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sh_vnum_u64_0, svuint64_t, int16_t, -+ z0 = svldff1sh_vnum_u64 (p0, x0, 0), -+ z0 = svldff1sh_vnum_u64 (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1sh_vnum_u64_1: -+** incw x0 -+** ldff1sh z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sh_vnum_u64_1, svuint64_t, int16_t, -+ z0 = svldff1sh_vnum_u64 (p0, x0, 1), -+ z0 = svldff1sh_vnum_u64 (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1sh_vnum_u64_m1: -+** decw x0 -+** ldff1sh z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sh_vnum_u64_m1, svuint64_t, int16_t, -+ z0 = svldff1sh_vnum_u64 (p0, x0, -1), -+ z0 = svldff1sh_vnum_u64 (p0, x0, -1)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ldff1sh_vnum_u64_x1: -+** cntw (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ldff1sh z0\.d, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldff1sh_vnum_u64_x1, svuint64_t, int16_t, -+ z0 = svldff1sh_vnum_u64 (p0, x0, x1), -+ z0 = svldff1sh_vnum_u64 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_gather_s64.c -new file mode 100644 -index 000000000..e5da8a83d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_gather_s64.c -@@ -0,0 +1,308 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1sw_gather_s64_tied1: -+** ldff1sw z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sw_gather_s64_tied1, svint64_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64base_s64 (p0, z0), -+ z0_res = svldff1sw_gather_s64 (p0, z0)) -+ -+/* -+** ldff1sw_gather_s64_untied: -+** ldff1sw z0\.d, p0/z, \[z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sw_gather_s64_untied, svint64_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64base_s64 (p0, z1), -+ z0_res = svldff1sw_gather_s64 (p0, z1)) -+ -+/* -+** ldff1sw_gather_x0_s64_offset: -+** ldff1sw z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sw_gather_x0_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64base_offset_s64 (p0, z0, x0), -+ z0_res = svldff1sw_gather_offset_s64 (p0, z0, x0)) -+ -+/* -+** ldff1sw_gather_m4_s64_offset: -+** mov (x[0-9]+), #?-4 -+** ldff1sw z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sw_gather_m4_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64base_offset_s64 (p0, z0, -4), -+ z0_res = svldff1sw_gather_offset_s64 (p0, z0, -4)) -+ -+/* -+** ldff1sw_gather_0_s64_offset: -+** ldff1sw z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sw_gather_0_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64base_offset_s64 (p0, z0, 0), -+ z0_res = svldff1sw_gather_offset_s64 (p0, z0, 0)) -+ -+/* -+** ldff1sw_gather_5_s64_offset: -+** mov (x[0-9]+), #?5 -+** ldff1sw z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sw_gather_5_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64base_offset_s64 (p0, z0, 5), -+ z0_res = svldff1sw_gather_offset_s64 (p0, z0, 5)) -+ -+/* -+** ldff1sw_gather_6_s64_offset: -+** mov (x[0-9]+), #?6 -+** ldff1sw z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sw_gather_6_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64base_offset_s64 (p0, z0, 6), -+ z0_res = svldff1sw_gather_offset_s64 (p0, z0, 6)) -+ -+/* -+** ldff1sw_gather_7_s64_offset: -+** mov (x[0-9]+), #?7 -+** ldff1sw z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sw_gather_7_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64base_offset_s64 (p0, z0, 7), -+ z0_res = svldff1sw_gather_offset_s64 (p0, z0, 7)) -+ -+/* -+** ldff1sw_gather_8_s64_offset: -+** ldff1sw z0\.d, p0/z, \[z0\.d, #8\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sw_gather_8_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64base_offset_s64 (p0, z0, 8), -+ z0_res = svldff1sw_gather_offset_s64 (p0, z0, 8)) -+ -+/* -+** ldff1sw_gather_124_s64_offset: -+** ldff1sw z0\.d, p0/z, \[z0\.d, #124\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sw_gather_124_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64base_offset_s64 (p0, z0, 124), -+ z0_res = svldff1sw_gather_offset_s64 (p0, z0, 124)) -+ -+/* -+** ldff1sw_gather_128_s64_offset: -+** mov (x[0-9]+), #?128 -+** ldff1sw z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sw_gather_128_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64base_offset_s64 (p0, z0, 128), -+ z0_res = svldff1sw_gather_offset_s64 (p0, z0, 128)) -+ -+/* -+** ldff1sw_gather_x0_s64_index: -+** lsl (x[0-9]+), x0, #?2 -+** ldff1sw z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sw_gather_x0_s64_index, svint64_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64base_index_s64 (p0, z0, x0), -+ z0_res = svldff1sw_gather_index_s64 (p0, z0, x0)) -+ -+/* -+** ldff1sw_gather_m1_s64_index: -+** mov (x[0-9]+), #?-4 -+** ldff1sw z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sw_gather_m1_s64_index, svint64_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64base_index_s64 (p0, z0, -1), -+ z0_res = svldff1sw_gather_index_s64 (p0, z0, -1)) -+ -+/* -+** ldff1sw_gather_0_s64_index: -+** ldff1sw z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sw_gather_0_s64_index, svint64_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64base_index_s64 (p0, z0, 0), -+ z0_res = svldff1sw_gather_index_s64 (p0, z0, 0)) -+ -+/* -+** ldff1sw_gather_5_s64_index: -+** ldff1sw z0\.d, p0/z, \[z0\.d, #20\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sw_gather_5_s64_index, svint64_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64base_index_s64 (p0, z0, 5), -+ z0_res = svldff1sw_gather_index_s64 (p0, z0, 5)) -+ -+/* -+** ldff1sw_gather_31_s64_index: -+** ldff1sw z0\.d, p0/z, \[z0\.d, #124\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sw_gather_31_s64_index, svint64_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64base_index_s64 (p0, z0, 31), -+ z0_res = svldff1sw_gather_index_s64 (p0, z0, 31)) -+ -+/* -+** ldff1sw_gather_32_s64_index: -+** mov (x[0-9]+), #?128 -+** ldff1sw z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sw_gather_32_s64_index, svint64_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64base_index_s64 (p0, z0, 32), -+ z0_res = svldff1sw_gather_index_s64 (p0, z0, 32)) -+ -+/* -+** ldff1sw_gather_x0_s64_s64offset: -+** ldff1sw z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sw_gather_x0_s64_s64offset, svint64_t, int32_t, svint64_t, -+ z0_res = svldff1sw_gather_s64offset_s64 (p0, x0, z0), -+ z0_res = svldff1sw_gather_offset_s64 (p0, x0, z0)) -+ -+/* -+** ldff1sw_gather_tied1_s64_s64offset: -+** ldff1sw z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sw_gather_tied1_s64_s64offset, svint64_t, int32_t, svint64_t, -+ z0_res = svldff1sw_gather_s64offset_s64 (p0, x0, z0), -+ z0_res = svldff1sw_gather_offset_s64 (p0, x0, z0)) -+ -+/* -+** ldff1sw_gather_untied_s64_s64offset: -+** ldff1sw z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sw_gather_untied_s64_s64offset, svint64_t, int32_t, svint64_t, -+ z0_res = svldff1sw_gather_s64offset_s64 (p0, x0, z1), -+ z0_res = svldff1sw_gather_offset_s64 (p0, x0, z1)) -+ -+/* -+** ldff1sw_gather_ext_s64_s64offset: -+** ldff1sw z0\.d, p0/z, \[x0, z1\.d, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sw_gather_ext_s64_s64offset, svint64_t, int32_t, svint64_t, -+ z0_res = svldff1sw_gather_s64offset_s64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svldff1sw_gather_offset_s64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ldff1sw_gather_x0_s64_u64offset: -+** ldff1sw z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sw_gather_x0_s64_u64offset, svint64_t, int32_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64offset_s64 (p0, x0, z0), -+ z0_res = svldff1sw_gather_offset_s64 (p0, x0, z0)) -+ -+/* -+** ldff1sw_gather_tied1_s64_u64offset: -+** ldff1sw z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sw_gather_tied1_s64_u64offset, svint64_t, int32_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64offset_s64 (p0, x0, z0), -+ z0_res = svldff1sw_gather_offset_s64 (p0, x0, z0)) -+ -+/* -+** ldff1sw_gather_untied_s64_u64offset: -+** ldff1sw z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sw_gather_untied_s64_u64offset, svint64_t, int32_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64offset_s64 (p0, x0, z1), -+ z0_res = svldff1sw_gather_offset_s64 (p0, x0, z1)) -+ -+/* -+** ldff1sw_gather_ext_s64_u64offset: -+** ldff1sw z0\.d, p0/z, \[x0, z1\.d, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sw_gather_ext_s64_u64offset, svint64_t, int32_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64offset_s64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svldff1sw_gather_offset_s64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ldff1sw_gather_x0_s64_s64index: -+** ldff1sw z0\.d, p0/z, \[x0, z0\.d, lsl 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sw_gather_x0_s64_s64index, svint64_t, int32_t, svint64_t, -+ z0_res = svldff1sw_gather_s64index_s64 (p0, x0, z0), -+ z0_res = svldff1sw_gather_index_s64 (p0, x0, z0)) -+ -+/* -+** ldff1sw_gather_tied1_s64_s64index: -+** ldff1sw z0\.d, p0/z, \[x0, z0\.d, lsl 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sw_gather_tied1_s64_s64index, svint64_t, int32_t, svint64_t, -+ z0_res = svldff1sw_gather_s64index_s64 (p0, x0, z0), -+ z0_res = svldff1sw_gather_index_s64 (p0, x0, z0)) -+ -+/* -+** ldff1sw_gather_untied_s64_s64index: -+** ldff1sw z0\.d, p0/z, \[x0, z1\.d, lsl 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sw_gather_untied_s64_s64index, svint64_t, int32_t, svint64_t, -+ z0_res = svldff1sw_gather_s64index_s64 (p0, x0, z1), -+ z0_res = svldff1sw_gather_index_s64 (p0, x0, z1)) -+ -+/* -+** ldff1sw_gather_ext_s64_s64index: -+** ldff1sw z0\.d, p0/z, \[x0, z1\.d, sxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sw_gather_ext_s64_s64index, svint64_t, int32_t, svint64_t, -+ z0_res = svldff1sw_gather_s64index_s64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svldff1sw_gather_index_s64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ldff1sw_gather_x0_s64_u64index: -+** ldff1sw z0\.d, p0/z, \[x0, z0\.d, lsl 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sw_gather_x0_s64_u64index, svint64_t, int32_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64index_s64 (p0, x0, z0), -+ z0_res = svldff1sw_gather_index_s64 (p0, x0, z0)) -+ -+/* -+** ldff1sw_gather_tied1_s64_u64index: -+** ldff1sw z0\.d, p0/z, \[x0, z0\.d, lsl 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sw_gather_tied1_s64_u64index, svint64_t, int32_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64index_s64 (p0, x0, z0), -+ z0_res = svldff1sw_gather_index_s64 (p0, x0, z0)) -+ -+/* -+** ldff1sw_gather_untied_s64_u64index: -+** ldff1sw z0\.d, p0/z, \[x0, z1\.d, lsl 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sw_gather_untied_s64_u64index, svint64_t, int32_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64index_s64 (p0, x0, z1), -+ z0_res = svldff1sw_gather_index_s64 (p0, x0, z1)) -+ -+/* -+** ldff1sw_gather_ext_s64_u64index: -+** ldff1sw z0\.d, p0/z, \[x0, z1\.d, uxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sw_gather_ext_s64_u64index, svint64_t, int32_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64index_s64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svldff1sw_gather_index_s64 (p0, x0, svextw_x (p0, z1))) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_gather_u64.c -new file mode 100644 -index 000000000..411428756 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_gather_u64.c -@@ -0,0 +1,308 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1sw_gather_u64_tied1: -+** ldff1sw z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sw_gather_u64_tied1, svuint64_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64base_u64 (p0, z0), -+ z0_res = svldff1sw_gather_u64 (p0, z0)) -+ -+/* -+** ldff1sw_gather_u64_untied: -+** ldff1sw z0\.d, p0/z, \[z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sw_gather_u64_untied, svuint64_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64base_u64 (p0, z1), -+ z0_res = svldff1sw_gather_u64 (p0, z1)) -+ -+/* -+** ldff1sw_gather_x0_u64_offset: -+** ldff1sw z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sw_gather_x0_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64base_offset_u64 (p0, z0, x0), -+ z0_res = svldff1sw_gather_offset_u64 (p0, z0, x0)) -+ -+/* -+** ldff1sw_gather_m4_u64_offset: -+** mov (x[0-9]+), #?-4 -+** ldff1sw z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sw_gather_m4_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64base_offset_u64 (p0, z0, -4), -+ z0_res = svldff1sw_gather_offset_u64 (p0, z0, -4)) -+ -+/* -+** ldff1sw_gather_0_u64_offset: -+** ldff1sw z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sw_gather_0_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64base_offset_u64 (p0, z0, 0), -+ z0_res = svldff1sw_gather_offset_u64 (p0, z0, 0)) -+ -+/* -+** ldff1sw_gather_5_u64_offset: -+** mov (x[0-9]+), #?5 -+** ldff1sw z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sw_gather_5_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64base_offset_u64 (p0, z0, 5), -+ z0_res = svldff1sw_gather_offset_u64 (p0, z0, 5)) -+ -+/* -+** ldff1sw_gather_6_u64_offset: -+** mov (x[0-9]+), #?6 -+** ldff1sw z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sw_gather_6_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64base_offset_u64 (p0, z0, 6), -+ z0_res = svldff1sw_gather_offset_u64 (p0, z0, 6)) -+ -+/* -+** ldff1sw_gather_7_u64_offset: -+** mov (x[0-9]+), #?7 -+** ldff1sw z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sw_gather_7_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64base_offset_u64 (p0, z0, 7), -+ z0_res = svldff1sw_gather_offset_u64 (p0, z0, 7)) -+ -+/* -+** ldff1sw_gather_8_u64_offset: -+** ldff1sw z0\.d, p0/z, \[z0\.d, #8\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sw_gather_8_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64base_offset_u64 (p0, z0, 8), -+ z0_res = svldff1sw_gather_offset_u64 (p0, z0, 8)) -+ -+/* -+** ldff1sw_gather_124_u64_offset: -+** ldff1sw z0\.d, p0/z, \[z0\.d, #124\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sw_gather_124_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64base_offset_u64 (p0, z0, 124), -+ z0_res = svldff1sw_gather_offset_u64 (p0, z0, 124)) -+ -+/* -+** ldff1sw_gather_128_u64_offset: -+** mov (x[0-9]+), #?128 -+** ldff1sw z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sw_gather_128_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64base_offset_u64 (p0, z0, 128), -+ z0_res = svldff1sw_gather_offset_u64 (p0, z0, 128)) -+ -+/* -+** ldff1sw_gather_x0_u64_index: -+** lsl (x[0-9]+), x0, #?2 -+** ldff1sw z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sw_gather_x0_u64_index, svuint64_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64base_index_u64 (p0, z0, x0), -+ z0_res = svldff1sw_gather_index_u64 (p0, z0, x0)) -+ -+/* -+** ldff1sw_gather_m1_u64_index: -+** mov (x[0-9]+), #?-4 -+** ldff1sw z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sw_gather_m1_u64_index, svuint64_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64base_index_u64 (p0, z0, -1), -+ z0_res = svldff1sw_gather_index_u64 (p0, z0, -1)) -+ -+/* -+** ldff1sw_gather_0_u64_index: -+** ldff1sw z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sw_gather_0_u64_index, svuint64_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64base_index_u64 (p0, z0, 0), -+ z0_res = svldff1sw_gather_index_u64 (p0, z0, 0)) -+ -+/* -+** ldff1sw_gather_5_u64_index: -+** ldff1sw z0\.d, p0/z, \[z0\.d, #20\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sw_gather_5_u64_index, svuint64_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64base_index_u64 (p0, z0, 5), -+ z0_res = svldff1sw_gather_index_u64 (p0, z0, 5)) -+ -+/* -+** ldff1sw_gather_31_u64_index: -+** ldff1sw z0\.d, p0/z, \[z0\.d, #124\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sw_gather_31_u64_index, svuint64_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64base_index_u64 (p0, z0, 31), -+ z0_res = svldff1sw_gather_index_u64 (p0, z0, 31)) -+ -+/* -+** ldff1sw_gather_32_u64_index: -+** mov (x[0-9]+), #?128 -+** ldff1sw z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1sw_gather_32_u64_index, svuint64_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64base_index_u64 (p0, z0, 32), -+ z0_res = svldff1sw_gather_index_u64 (p0, z0, 32)) -+ -+/* -+** ldff1sw_gather_x0_u64_s64offset: -+** ldff1sw z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sw_gather_x0_u64_s64offset, svuint64_t, int32_t, svint64_t, -+ z0_res = svldff1sw_gather_s64offset_u64 (p0, x0, z0), -+ z0_res = svldff1sw_gather_offset_u64 (p0, x0, z0)) -+ -+/* -+** ldff1sw_gather_tied1_u64_s64offset: -+** ldff1sw z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sw_gather_tied1_u64_s64offset, svuint64_t, int32_t, svint64_t, -+ z0_res = svldff1sw_gather_s64offset_u64 (p0, x0, z0), -+ z0_res = svldff1sw_gather_offset_u64 (p0, x0, z0)) -+ -+/* -+** ldff1sw_gather_untied_u64_s64offset: -+** ldff1sw z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sw_gather_untied_u64_s64offset, svuint64_t, int32_t, svint64_t, -+ z0_res = svldff1sw_gather_s64offset_u64 (p0, x0, z1), -+ z0_res = svldff1sw_gather_offset_u64 (p0, x0, z1)) -+ -+/* -+** ldff1sw_gather_ext_u64_s64offset: -+** ldff1sw z0\.d, p0/z, \[x0, z1\.d, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sw_gather_ext_u64_s64offset, svuint64_t, int32_t, svint64_t, -+ z0_res = svldff1sw_gather_s64offset_u64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svldff1sw_gather_offset_u64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ldff1sw_gather_x0_u64_u64offset: -+** ldff1sw z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sw_gather_x0_u64_u64offset, svuint64_t, int32_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64offset_u64 (p0, x0, z0), -+ z0_res = svldff1sw_gather_offset_u64 (p0, x0, z0)) -+ -+/* -+** ldff1sw_gather_tied1_u64_u64offset: -+** ldff1sw z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sw_gather_tied1_u64_u64offset, svuint64_t, int32_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64offset_u64 (p0, x0, z0), -+ z0_res = svldff1sw_gather_offset_u64 (p0, x0, z0)) -+ -+/* -+** ldff1sw_gather_untied_u64_u64offset: -+** ldff1sw z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sw_gather_untied_u64_u64offset, svuint64_t, int32_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64offset_u64 (p0, x0, z1), -+ z0_res = svldff1sw_gather_offset_u64 (p0, x0, z1)) -+ -+/* -+** ldff1sw_gather_ext_u64_u64offset: -+** ldff1sw z0\.d, p0/z, \[x0, z1\.d, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sw_gather_ext_u64_u64offset, svuint64_t, int32_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64offset_u64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svldff1sw_gather_offset_u64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ldff1sw_gather_x0_u64_s64index: -+** ldff1sw z0\.d, p0/z, \[x0, z0\.d, lsl 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sw_gather_x0_u64_s64index, svuint64_t, int32_t, svint64_t, -+ z0_res = svldff1sw_gather_s64index_u64 (p0, x0, z0), -+ z0_res = svldff1sw_gather_index_u64 (p0, x0, z0)) -+ -+/* -+** ldff1sw_gather_tied1_u64_s64index: -+** ldff1sw z0\.d, p0/z, \[x0, z0\.d, lsl 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sw_gather_tied1_u64_s64index, svuint64_t, int32_t, svint64_t, -+ z0_res = svldff1sw_gather_s64index_u64 (p0, x0, z0), -+ z0_res = svldff1sw_gather_index_u64 (p0, x0, z0)) -+ -+/* -+** ldff1sw_gather_untied_u64_s64index: -+** ldff1sw z0\.d, p0/z, \[x0, z1\.d, lsl 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sw_gather_untied_u64_s64index, svuint64_t, int32_t, svint64_t, -+ z0_res = svldff1sw_gather_s64index_u64 (p0, x0, z1), -+ z0_res = svldff1sw_gather_index_u64 (p0, x0, z1)) -+ -+/* -+** ldff1sw_gather_ext_u64_s64index: -+** ldff1sw z0\.d, p0/z, \[x0, z1\.d, sxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sw_gather_ext_u64_s64index, svuint64_t, int32_t, svint64_t, -+ z0_res = svldff1sw_gather_s64index_u64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svldff1sw_gather_index_u64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ldff1sw_gather_x0_u64_u64index: -+** ldff1sw z0\.d, p0/z, \[x0, z0\.d, lsl 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sw_gather_x0_u64_u64index, svuint64_t, int32_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64index_u64 (p0, x0, z0), -+ z0_res = svldff1sw_gather_index_u64 (p0, x0, z0)) -+ -+/* -+** ldff1sw_gather_tied1_u64_u64index: -+** ldff1sw z0\.d, p0/z, \[x0, z0\.d, lsl 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sw_gather_tied1_u64_u64index, svuint64_t, int32_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64index_u64 (p0, x0, z0), -+ z0_res = svldff1sw_gather_index_u64 (p0, x0, z0)) -+ -+/* -+** ldff1sw_gather_untied_u64_u64index: -+** ldff1sw z0\.d, p0/z, \[x0, z1\.d, lsl 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sw_gather_untied_u64_u64index, svuint64_t, int32_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64index_u64 (p0, x0, z1), -+ z0_res = svldff1sw_gather_index_u64 (p0, x0, z1)) -+ -+/* -+** ldff1sw_gather_ext_u64_u64index: -+** ldff1sw z0\.d, p0/z, \[x0, z1\.d, uxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1sw_gather_ext_u64_u64index, svuint64_t, int32_t, svuint64_t, -+ z0_res = svldff1sw_gather_u64index_u64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svldff1sw_gather_index_u64 (p0, x0, svextw_x (p0, z1))) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_s64.c -new file mode 100644 -index 000000000..d795ace63 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_s64.c -@@ -0,0 +1,86 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1sw_s64_base: -+** ldff1sw z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sw_s64_base, svint64_t, int32_t, -+ z0 = svldff1sw_s64 (p0, x0), -+ z0 = svldff1sw_s64 (p0, x0)) -+ -+/* -+** ldff1sw_s64_index: -+** ldff1sw z0\.d, p0/z, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_LOAD (ldff1sw_s64_index, svint64_t, int32_t, -+ z0 = svldff1sw_s64 (p0, x0 + x1), -+ z0 = svldff1sw_s64 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1sw_s64_1: -+** inch x0 -+** ldff1sw z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sw_s64_1, svint64_t, int32_t, -+ z0 = svldff1sw_s64 (p0, x0 + svcntd ()), -+ z0 = svldff1sw_s64 (p0, x0 + svcntd ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1sw_s64_m1: -+** dech x0 -+** ldff1sw z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sw_s64_m1, svint64_t, int32_t, -+ z0 = svldff1sw_s64 (p0, x0 - svcntd ()), -+ z0 = svldff1sw_s64 (p0, x0 - svcntd ())) -+ -+/* -+** ldff1sw_vnum_s64_0: -+** ldff1sw z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sw_vnum_s64_0, svint64_t, int32_t, -+ z0 = svldff1sw_vnum_s64 (p0, x0, 0), -+ z0 = svldff1sw_vnum_s64 (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1sw_vnum_s64_1: -+** inch x0 -+** ldff1sw z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sw_vnum_s64_1, svint64_t, int32_t, -+ z0 = svldff1sw_vnum_s64 (p0, x0, 1), -+ z0 = svldff1sw_vnum_s64 (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1sw_vnum_s64_m1: -+** dech x0 -+** ldff1sw z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sw_vnum_s64_m1, svint64_t, int32_t, -+ z0 = svldff1sw_vnum_s64 (p0, x0, -1), -+ z0 = svldff1sw_vnum_s64 (p0, x0, -1)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ldff1sw_vnum_s64_x1: -+** cnth (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ldff1sw z0\.d, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldff1sw_vnum_s64_x1, svint64_t, int32_t, -+ z0 = svldff1sw_vnum_s64 (p0, x0, x1), -+ z0 = svldff1sw_vnum_s64 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_u64.c -new file mode 100644 -index 000000000..6caf2f504 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1sw_u64.c -@@ -0,0 +1,86 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1sw_u64_base: -+** ldff1sw z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sw_u64_base, svuint64_t, int32_t, -+ z0 = svldff1sw_u64 (p0, x0), -+ z0 = svldff1sw_u64 (p0, x0)) -+ -+/* -+** ldff1sw_u64_index: -+** ldff1sw z0\.d, p0/z, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_LOAD (ldff1sw_u64_index, svuint64_t, int32_t, -+ z0 = svldff1sw_u64 (p0, x0 + x1), -+ z0 = svldff1sw_u64 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1sw_u64_1: -+** inch x0 -+** ldff1sw z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sw_u64_1, svuint64_t, int32_t, -+ z0 = svldff1sw_u64 (p0, x0 + svcntd ()), -+ z0 = svldff1sw_u64 (p0, x0 + svcntd ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1sw_u64_m1: -+** dech x0 -+** ldff1sw z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sw_u64_m1, svuint64_t, int32_t, -+ z0 = svldff1sw_u64 (p0, x0 - svcntd ()), -+ z0 = svldff1sw_u64 (p0, x0 - svcntd ())) -+ -+/* -+** ldff1sw_vnum_u64_0: -+** ldff1sw z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sw_vnum_u64_0, svuint64_t, int32_t, -+ z0 = svldff1sw_vnum_u64 (p0, x0, 0), -+ z0 = svldff1sw_vnum_u64 (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1sw_vnum_u64_1: -+** inch x0 -+** ldff1sw z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sw_vnum_u64_1, svuint64_t, int32_t, -+ z0 = svldff1sw_vnum_u64 (p0, x0, 1), -+ z0 = svldff1sw_vnum_u64 (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1sw_vnum_u64_m1: -+** dech x0 -+** ldff1sw z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1sw_vnum_u64_m1, svuint64_t, int32_t, -+ z0 = svldff1sw_vnum_u64 (p0, x0, -1), -+ z0 = svldff1sw_vnum_u64 (p0, x0, -1)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ldff1sw_vnum_u64_x1: -+** cnth (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ldff1sw z0\.d, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldff1sw_vnum_u64_x1, svuint64_t, int32_t, -+ z0 = svldff1sw_vnum_u64 (p0, x0, x1), -+ z0 = svldff1sw_vnum_u64 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_s32.c -new file mode 100644 -index 000000000..af0be08d2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_s32.c -@@ -0,0 +1,131 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1ub_gather_s32_tied1: -+** ldff1b z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1ub_gather_s32_tied1, svint32_t, svuint32_t, -+ z0_res = svldff1ub_gather_u32base_s32 (p0, z0), -+ z0_res = svldff1ub_gather_s32 (p0, z0)) -+ -+/* -+** ldff1ub_gather_s32_untied: -+** ldff1b z0\.s, p0/z, \[z1\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1ub_gather_s32_untied, svint32_t, svuint32_t, -+ z0_res = svldff1ub_gather_u32base_s32 (p0, z1), -+ z0_res = svldff1ub_gather_s32 (p0, z1)) -+ -+/* -+** ldff1ub_gather_x0_s32_offset: -+** ldff1b z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1ub_gather_x0_s32_offset, svint32_t, svuint32_t, -+ z0_res = svldff1ub_gather_u32base_offset_s32 (p0, z0, x0), -+ z0_res = svldff1ub_gather_offset_s32 (p0, z0, x0)) -+ -+/* -+** ldff1ub_gather_m1_s32_offset: -+** mov (x[0-9]+), #?-1 -+** ldff1b z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1ub_gather_m1_s32_offset, svint32_t, svuint32_t, -+ z0_res = svldff1ub_gather_u32base_offset_s32 (p0, z0, -1), -+ z0_res = svldff1ub_gather_offset_s32 (p0, z0, -1)) -+ -+/* -+** ldff1ub_gather_0_s32_offset: -+** ldff1b z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1ub_gather_0_s32_offset, svint32_t, svuint32_t, -+ z0_res = svldff1ub_gather_u32base_offset_s32 (p0, z0, 0), -+ z0_res = svldff1ub_gather_offset_s32 (p0, z0, 0)) -+ -+/* -+** ldff1ub_gather_5_s32_offset: -+** ldff1b z0\.s, p0/z, \[z0\.s, #5\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1ub_gather_5_s32_offset, svint32_t, svuint32_t, -+ z0_res = svldff1ub_gather_u32base_offset_s32 (p0, z0, 5), -+ z0_res = svldff1ub_gather_offset_s32 (p0, z0, 5)) -+ -+/* -+** ldff1ub_gather_31_s32_offset: -+** ldff1b z0\.s, p0/z, \[z0\.s, #31\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1ub_gather_31_s32_offset, svint32_t, svuint32_t, -+ z0_res = svldff1ub_gather_u32base_offset_s32 (p0, z0, 31), -+ z0_res = svldff1ub_gather_offset_s32 (p0, z0, 31)) -+ -+/* -+** ldff1ub_gather_32_s32_offset: -+** mov (x[0-9]+), #?32 -+** ldff1b z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1ub_gather_32_s32_offset, svint32_t, svuint32_t, -+ z0_res = svldff1ub_gather_u32base_offset_s32 (p0, z0, 32), -+ z0_res = svldff1ub_gather_offset_s32 (p0, z0, 32)) -+ -+/* -+** ldff1ub_gather_x0_s32_s32offset: -+** ldff1b z0\.s, p0/z, \[x0, z0\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1ub_gather_x0_s32_s32offset, svint32_t, uint8_t, svint32_t, -+ z0_res = svldff1ub_gather_s32offset_s32 (p0, x0, z0), -+ z0_res = svldff1ub_gather_offset_s32 (p0, x0, z0)) -+ -+/* -+** ldff1ub_gather_tied1_s32_s32offset: -+** ldff1b z0\.s, p0/z, \[x0, z0\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1ub_gather_tied1_s32_s32offset, svint32_t, uint8_t, svint32_t, -+ z0_res = svldff1ub_gather_s32offset_s32 (p0, x0, z0), -+ z0_res = svldff1ub_gather_offset_s32 (p0, x0, z0)) -+ -+/* -+** ldff1ub_gather_untied_s32_s32offset: -+** ldff1b z0\.s, p0/z, \[x0, z1\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1ub_gather_untied_s32_s32offset, svint32_t, uint8_t, svint32_t, -+ z0_res = svldff1ub_gather_s32offset_s32 (p0, x0, z1), -+ z0_res = svldff1ub_gather_offset_s32 (p0, x0, z1)) -+ -+/* -+** ldff1ub_gather_x0_s32_u32offset: -+** ldff1b z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1ub_gather_x0_s32_u32offset, svint32_t, uint8_t, svuint32_t, -+ z0_res = svldff1ub_gather_u32offset_s32 (p0, x0, z0), -+ z0_res = svldff1ub_gather_offset_s32 (p0, x0, z0)) -+ -+/* -+** ldff1ub_gather_tied1_s32_u32offset: -+** ldff1b z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1ub_gather_tied1_s32_u32offset, svint32_t, uint8_t, svuint32_t, -+ z0_res = svldff1ub_gather_u32offset_s32 (p0, x0, z0), -+ z0_res = svldff1ub_gather_offset_s32 (p0, x0, z0)) -+ -+/* -+** ldff1ub_gather_untied_s32_u32offset: -+** ldff1b z0\.s, p0/z, \[x0, z1\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1ub_gather_untied_s32_u32offset, svint32_t, uint8_t, svuint32_t, -+ z0_res = svldff1ub_gather_u32offset_s32 (p0, x0, z1), -+ z0_res = svldff1ub_gather_offset_s32 (p0, x0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_s64.c -new file mode 100644 -index 000000000..43124dd89 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_s64.c -@@ -0,0 +1,149 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1ub_gather_s64_tied1: -+** ldff1b z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1ub_gather_s64_tied1, svint64_t, svuint64_t, -+ z0_res = svldff1ub_gather_u64base_s64 (p0, z0), -+ z0_res = svldff1ub_gather_s64 (p0, z0)) -+ -+/* -+** ldff1ub_gather_s64_untied: -+** ldff1b z0\.d, p0/z, \[z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1ub_gather_s64_untied, svint64_t, svuint64_t, -+ z0_res = svldff1ub_gather_u64base_s64 (p0, z1), -+ z0_res = svldff1ub_gather_s64 (p0, z1)) -+ -+/* -+** ldff1ub_gather_x0_s64_offset: -+** ldff1b z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1ub_gather_x0_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1ub_gather_u64base_offset_s64 (p0, z0, x0), -+ z0_res = svldff1ub_gather_offset_s64 (p0, z0, x0)) -+ -+/* -+** ldff1ub_gather_m1_s64_offset: -+** mov (x[0-9]+), #?-1 -+** ldff1b z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1ub_gather_m1_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1ub_gather_u64base_offset_s64 (p0, z0, -1), -+ z0_res = svldff1ub_gather_offset_s64 (p0, z0, -1)) -+ -+/* -+** ldff1ub_gather_0_s64_offset: -+** ldff1b z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1ub_gather_0_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1ub_gather_u64base_offset_s64 (p0, z0, 0), -+ z0_res = svldff1ub_gather_offset_s64 (p0, z0, 0)) -+ -+/* -+** ldff1ub_gather_5_s64_offset: -+** ldff1b z0\.d, p0/z, \[z0\.d, #5\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1ub_gather_5_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1ub_gather_u64base_offset_s64 (p0, z0, 5), -+ z0_res = svldff1ub_gather_offset_s64 (p0, z0, 5)) -+ -+/* -+** ldff1ub_gather_31_s64_offset: -+** ldff1b z0\.d, p0/z, \[z0\.d, #31\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1ub_gather_31_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1ub_gather_u64base_offset_s64 (p0, z0, 31), -+ z0_res = svldff1ub_gather_offset_s64 (p0, z0, 31)) -+ -+/* -+** ldff1ub_gather_32_s64_offset: -+** mov (x[0-9]+), #?32 -+** ldff1b z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1ub_gather_32_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1ub_gather_u64base_offset_s64 (p0, z0, 32), -+ z0_res = svldff1ub_gather_offset_s64 (p0, z0, 32)) -+ -+/* -+** ldff1ub_gather_x0_s64_s64offset: -+** ldff1b z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1ub_gather_x0_s64_s64offset, svint64_t, uint8_t, svint64_t, -+ z0_res = svldff1ub_gather_s64offset_s64 (p0, x0, z0), -+ z0_res = svldff1ub_gather_offset_s64 (p0, x0, z0)) -+ -+/* -+** ldff1ub_gather_tied1_s64_s64offset: -+** ldff1b z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1ub_gather_tied1_s64_s64offset, svint64_t, uint8_t, svint64_t, -+ z0_res = svldff1ub_gather_s64offset_s64 (p0, x0, z0), -+ z0_res = svldff1ub_gather_offset_s64 (p0, x0, z0)) -+ -+/* -+** ldff1ub_gather_untied_s64_s64offset: -+** ldff1b z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1ub_gather_untied_s64_s64offset, svint64_t, uint8_t, svint64_t, -+ z0_res = svldff1ub_gather_s64offset_s64 (p0, x0, z1), -+ z0_res = svldff1ub_gather_offset_s64 (p0, x0, z1)) -+ -+/* -+** ldff1ub_gather_ext_s64_s64offset: -+** ldff1b z0\.d, p0/z, \[x0, z1\.d, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1ub_gather_ext_s64_s64offset, svint64_t, uint8_t, svint64_t, -+ z0_res = svldff1ub_gather_s64offset_s64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svldff1ub_gather_offset_s64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ldff1ub_gather_x0_s64_u64offset: -+** ldff1b z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1ub_gather_x0_s64_u64offset, svint64_t, uint8_t, svuint64_t, -+ z0_res = svldff1ub_gather_u64offset_s64 (p0, x0, z0), -+ z0_res = svldff1ub_gather_offset_s64 (p0, x0, z0)) -+ -+/* -+** ldff1ub_gather_tied1_s64_u64offset: -+** ldff1b z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1ub_gather_tied1_s64_u64offset, svint64_t, uint8_t, svuint64_t, -+ z0_res = svldff1ub_gather_u64offset_s64 (p0, x0, z0), -+ z0_res = svldff1ub_gather_offset_s64 (p0, x0, z0)) -+ -+/* -+** ldff1ub_gather_untied_s64_u64offset: -+** ldff1b z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1ub_gather_untied_s64_u64offset, svint64_t, uint8_t, svuint64_t, -+ z0_res = svldff1ub_gather_u64offset_s64 (p0, x0, z1), -+ z0_res = svldff1ub_gather_offset_s64 (p0, x0, z1)) -+ -+/* -+** ldff1ub_gather_ext_s64_u64offset: -+** ldff1b z0\.d, p0/z, \[x0, z1\.d, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1ub_gather_ext_s64_u64offset, svint64_t, uint8_t, svuint64_t, -+ z0_res = svldff1ub_gather_u64offset_s64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svldff1ub_gather_offset_s64 (p0, x0, svextw_x (p0, z1))) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_u32.c -new file mode 100644 -index 000000000..90c4e58a2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_u32.c -@@ -0,0 +1,131 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1ub_gather_u32_tied1: -+** ldff1b z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1ub_gather_u32_tied1, svuint32_t, svuint32_t, -+ z0_res = svldff1ub_gather_u32base_u32 (p0, z0), -+ z0_res = svldff1ub_gather_u32 (p0, z0)) -+ -+/* -+** ldff1ub_gather_u32_untied: -+** ldff1b z0\.s, p0/z, \[z1\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1ub_gather_u32_untied, svuint32_t, svuint32_t, -+ z0_res = svldff1ub_gather_u32base_u32 (p0, z1), -+ z0_res = svldff1ub_gather_u32 (p0, z1)) -+ -+/* -+** ldff1ub_gather_x0_u32_offset: -+** ldff1b z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1ub_gather_x0_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svldff1ub_gather_u32base_offset_u32 (p0, z0, x0), -+ z0_res = svldff1ub_gather_offset_u32 (p0, z0, x0)) -+ -+/* -+** ldff1ub_gather_m1_u32_offset: -+** mov (x[0-9]+), #?-1 -+** ldff1b z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1ub_gather_m1_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svldff1ub_gather_u32base_offset_u32 (p0, z0, -1), -+ z0_res = svldff1ub_gather_offset_u32 (p0, z0, -1)) -+ -+/* -+** ldff1ub_gather_0_u32_offset: -+** ldff1b z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1ub_gather_0_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svldff1ub_gather_u32base_offset_u32 (p0, z0, 0), -+ z0_res = svldff1ub_gather_offset_u32 (p0, z0, 0)) -+ -+/* -+** ldff1ub_gather_5_u32_offset: -+** ldff1b z0\.s, p0/z, \[z0\.s, #5\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1ub_gather_5_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svldff1ub_gather_u32base_offset_u32 (p0, z0, 5), -+ z0_res = svldff1ub_gather_offset_u32 (p0, z0, 5)) -+ -+/* -+** ldff1ub_gather_31_u32_offset: -+** ldff1b z0\.s, p0/z, \[z0\.s, #31\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1ub_gather_31_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svldff1ub_gather_u32base_offset_u32 (p0, z0, 31), -+ z0_res = svldff1ub_gather_offset_u32 (p0, z0, 31)) -+ -+/* -+** ldff1ub_gather_32_u32_offset: -+** mov (x[0-9]+), #?32 -+** ldff1b z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1ub_gather_32_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svldff1ub_gather_u32base_offset_u32 (p0, z0, 32), -+ z0_res = svldff1ub_gather_offset_u32 (p0, z0, 32)) -+ -+/* -+** ldff1ub_gather_x0_u32_s32offset: -+** ldff1b z0\.s, p0/z, \[x0, z0\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1ub_gather_x0_u32_s32offset, svuint32_t, uint8_t, svint32_t, -+ z0_res = svldff1ub_gather_s32offset_u32 (p0, x0, z0), -+ z0_res = svldff1ub_gather_offset_u32 (p0, x0, z0)) -+ -+/* -+** ldff1ub_gather_tied1_u32_s32offset: -+** ldff1b z0\.s, p0/z, \[x0, z0\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1ub_gather_tied1_u32_s32offset, svuint32_t, uint8_t, svint32_t, -+ z0_res = svldff1ub_gather_s32offset_u32 (p0, x0, z0), -+ z0_res = svldff1ub_gather_offset_u32 (p0, x0, z0)) -+ -+/* -+** ldff1ub_gather_untied_u32_s32offset: -+** ldff1b z0\.s, p0/z, \[x0, z1\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1ub_gather_untied_u32_s32offset, svuint32_t, uint8_t, svint32_t, -+ z0_res = svldff1ub_gather_s32offset_u32 (p0, x0, z1), -+ z0_res = svldff1ub_gather_offset_u32 (p0, x0, z1)) -+ -+/* -+** ldff1ub_gather_x0_u32_u32offset: -+** ldff1b z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1ub_gather_x0_u32_u32offset, svuint32_t, uint8_t, svuint32_t, -+ z0_res = svldff1ub_gather_u32offset_u32 (p0, x0, z0), -+ z0_res = svldff1ub_gather_offset_u32 (p0, x0, z0)) -+ -+/* -+** ldff1ub_gather_tied1_u32_u32offset: -+** ldff1b z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1ub_gather_tied1_u32_u32offset, svuint32_t, uint8_t, svuint32_t, -+ z0_res = svldff1ub_gather_u32offset_u32 (p0, x0, z0), -+ z0_res = svldff1ub_gather_offset_u32 (p0, x0, z0)) -+ -+/* -+** ldff1ub_gather_untied_u32_u32offset: -+** ldff1b z0\.s, p0/z, \[x0, z1\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1ub_gather_untied_u32_u32offset, svuint32_t, uint8_t, svuint32_t, -+ z0_res = svldff1ub_gather_u32offset_u32 (p0, x0, z1), -+ z0_res = svldff1ub_gather_offset_u32 (p0, x0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_u64.c -new file mode 100644 -index 000000000..302623a40 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_gather_u64.c -@@ -0,0 +1,149 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1ub_gather_u64_tied1: -+** ldff1b z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1ub_gather_u64_tied1, svuint64_t, svuint64_t, -+ z0_res = svldff1ub_gather_u64base_u64 (p0, z0), -+ z0_res = svldff1ub_gather_u64 (p0, z0)) -+ -+/* -+** ldff1ub_gather_u64_untied: -+** ldff1b z0\.d, p0/z, \[z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1ub_gather_u64_untied, svuint64_t, svuint64_t, -+ z0_res = svldff1ub_gather_u64base_u64 (p0, z1), -+ z0_res = svldff1ub_gather_u64 (p0, z1)) -+ -+/* -+** ldff1ub_gather_x0_u64_offset: -+** ldff1b z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1ub_gather_x0_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1ub_gather_u64base_offset_u64 (p0, z0, x0), -+ z0_res = svldff1ub_gather_offset_u64 (p0, z0, x0)) -+ -+/* -+** ldff1ub_gather_m1_u64_offset: -+** mov (x[0-9]+), #?-1 -+** ldff1b z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1ub_gather_m1_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1ub_gather_u64base_offset_u64 (p0, z0, -1), -+ z0_res = svldff1ub_gather_offset_u64 (p0, z0, -1)) -+ -+/* -+** ldff1ub_gather_0_u64_offset: -+** ldff1b z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1ub_gather_0_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1ub_gather_u64base_offset_u64 (p0, z0, 0), -+ z0_res = svldff1ub_gather_offset_u64 (p0, z0, 0)) -+ -+/* -+** ldff1ub_gather_5_u64_offset: -+** ldff1b z0\.d, p0/z, \[z0\.d, #5\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1ub_gather_5_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1ub_gather_u64base_offset_u64 (p0, z0, 5), -+ z0_res = svldff1ub_gather_offset_u64 (p0, z0, 5)) -+ -+/* -+** ldff1ub_gather_31_u64_offset: -+** ldff1b z0\.d, p0/z, \[z0\.d, #31\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1ub_gather_31_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1ub_gather_u64base_offset_u64 (p0, z0, 31), -+ z0_res = svldff1ub_gather_offset_u64 (p0, z0, 31)) -+ -+/* -+** ldff1ub_gather_32_u64_offset: -+** mov (x[0-9]+), #?32 -+** ldff1b z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1ub_gather_32_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1ub_gather_u64base_offset_u64 (p0, z0, 32), -+ z0_res = svldff1ub_gather_offset_u64 (p0, z0, 32)) -+ -+/* -+** ldff1ub_gather_x0_u64_s64offset: -+** ldff1b z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1ub_gather_x0_u64_s64offset, svuint64_t, uint8_t, svint64_t, -+ z0_res = svldff1ub_gather_s64offset_u64 (p0, x0, z0), -+ z0_res = svldff1ub_gather_offset_u64 (p0, x0, z0)) -+ -+/* -+** ldff1ub_gather_tied1_u64_s64offset: -+** ldff1b z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1ub_gather_tied1_u64_s64offset, svuint64_t, uint8_t, svint64_t, -+ z0_res = svldff1ub_gather_s64offset_u64 (p0, x0, z0), -+ z0_res = svldff1ub_gather_offset_u64 (p0, x0, z0)) -+ -+/* -+** ldff1ub_gather_untied_u64_s64offset: -+** ldff1b z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1ub_gather_untied_u64_s64offset, svuint64_t, uint8_t, svint64_t, -+ z0_res = svldff1ub_gather_s64offset_u64 (p0, x0, z1), -+ z0_res = svldff1ub_gather_offset_u64 (p0, x0, z1)) -+ -+/* -+** ldff1ub_gather_ext_u64_s64offset: -+** ldff1b z0\.d, p0/z, \[x0, z1\.d, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1ub_gather_ext_u64_s64offset, svuint64_t, uint8_t, svint64_t, -+ z0_res = svldff1ub_gather_s64offset_u64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svldff1ub_gather_offset_u64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ldff1ub_gather_x0_u64_u64offset: -+** ldff1b z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1ub_gather_x0_u64_u64offset, svuint64_t, uint8_t, svuint64_t, -+ z0_res = svldff1ub_gather_u64offset_u64 (p0, x0, z0), -+ z0_res = svldff1ub_gather_offset_u64 (p0, x0, z0)) -+ -+/* -+** ldff1ub_gather_tied1_u64_u64offset: -+** ldff1b z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1ub_gather_tied1_u64_u64offset, svuint64_t, uint8_t, svuint64_t, -+ z0_res = svldff1ub_gather_u64offset_u64 (p0, x0, z0), -+ z0_res = svldff1ub_gather_offset_u64 (p0, x0, z0)) -+ -+/* -+** ldff1ub_gather_untied_u64_u64offset: -+** ldff1b z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1ub_gather_untied_u64_u64offset, svuint64_t, uint8_t, svuint64_t, -+ z0_res = svldff1ub_gather_u64offset_u64 (p0, x0, z1), -+ z0_res = svldff1ub_gather_offset_u64 (p0, x0, z1)) -+ -+/* -+** ldff1ub_gather_ext_u64_u64offset: -+** ldff1b z0\.d, p0/z, \[x0, z1\.d, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1ub_gather_ext_u64_u64offset, svuint64_t, uint8_t, svuint64_t, -+ z0_res = svldff1ub_gather_u64offset_u64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svldff1ub_gather_offset_u64 (p0, x0, svextw_x (p0, z1))) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_s16.c -new file mode 100644 -index 000000000..88ad2d1dc ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_s16.c -@@ -0,0 +1,90 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1ub_s16_base: -+** ldff1b z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1ub_s16_base, svint16_t, uint8_t, -+ z0 = svldff1ub_s16 (p0, x0), -+ z0 = svldff1ub_s16 (p0, x0)) -+ -+/* -+** ldff1ub_s16_index: -+** ldff1b z0\.h, p0/z, \[x0, x1\] -+** ret -+*/ -+TEST_LOAD (ldff1ub_s16_index, svint16_t, uint8_t, -+ z0 = svldff1ub_s16 (p0, x0 + x1), -+ z0 = svldff1ub_s16 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1ub_s16_1: -+** inch x0 -+** ldff1b z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1ub_s16_1, svint16_t, uint8_t, -+ z0 = svldff1ub_s16 (p0, x0 + svcnth ()), -+ z0 = svldff1ub_s16 (p0, x0 + svcnth ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1ub_s16_m1: -+** dech x0 -+** ldff1b z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1ub_s16_m1, svint16_t, uint8_t, -+ z0 = svldff1ub_s16 (p0, x0 - svcnth ()), -+ z0 = svldff1ub_s16 (p0, x0 - svcnth ())) -+ -+/* -+** ldff1ub_vnum_s16_0: -+** ldff1b z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1ub_vnum_s16_0, svint16_t, uint8_t, -+ z0 = svldff1ub_vnum_s16 (p0, x0, 0), -+ z0 = svldff1ub_vnum_s16 (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1ub_vnum_s16_1: -+** inch x0 -+** ldff1b z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1ub_vnum_s16_1, svint16_t, uint8_t, -+ z0 = svldff1ub_vnum_s16 (p0, x0, 1), -+ z0 = svldff1ub_vnum_s16 (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1ub_vnum_s16_m1: -+** dech x0 -+** ldff1b z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1ub_vnum_s16_m1, svint16_t, uint8_t, -+ z0 = svldff1ub_vnum_s16 (p0, x0, -1), -+ z0 = svldff1ub_vnum_s16 (p0, x0, -1)) -+ -+/* -+** ldff1ub_vnum_s16_x1: -+** cnth (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldff1b z0\.h, p0/z, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** ldff1b z0\.h, p0/z, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_LOAD (ldff1ub_vnum_s16_x1, svint16_t, uint8_t, -+ z0 = svldff1ub_vnum_s16 (p0, x0, x1), -+ z0 = svldff1ub_vnum_s16 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_s32.c -new file mode 100644 -index 000000000..e8e06411f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_s32.c -@@ -0,0 +1,90 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1ub_s32_base: -+** ldff1b z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1ub_s32_base, svint32_t, uint8_t, -+ z0 = svldff1ub_s32 (p0, x0), -+ z0 = svldff1ub_s32 (p0, x0)) -+ -+/* -+** ldff1ub_s32_index: -+** ldff1b z0\.s, p0/z, \[x0, x1\] -+** ret -+*/ -+TEST_LOAD (ldff1ub_s32_index, svint32_t, uint8_t, -+ z0 = svldff1ub_s32 (p0, x0 + x1), -+ z0 = svldff1ub_s32 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1ub_s32_1: -+** incw x0 -+** ldff1b z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1ub_s32_1, svint32_t, uint8_t, -+ z0 = svldff1ub_s32 (p0, x0 + svcntw ()), -+ z0 = svldff1ub_s32 (p0, x0 + svcntw ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1ub_s32_m1: -+** decw x0 -+** ldff1b z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1ub_s32_m1, svint32_t, uint8_t, -+ z0 = svldff1ub_s32 (p0, x0 - svcntw ()), -+ z0 = svldff1ub_s32 (p0, x0 - svcntw ())) -+ -+/* -+** ldff1ub_vnum_s32_0: -+** ldff1b z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1ub_vnum_s32_0, svint32_t, uint8_t, -+ z0 = svldff1ub_vnum_s32 (p0, x0, 0), -+ z0 = svldff1ub_vnum_s32 (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1ub_vnum_s32_1: -+** incw x0 -+** ldff1b z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1ub_vnum_s32_1, svint32_t, uint8_t, -+ z0 = svldff1ub_vnum_s32 (p0, x0, 1), -+ z0 = svldff1ub_vnum_s32 (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1ub_vnum_s32_m1: -+** decw x0 -+** ldff1b z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1ub_vnum_s32_m1, svint32_t, uint8_t, -+ z0 = svldff1ub_vnum_s32 (p0, x0, -1), -+ z0 = svldff1ub_vnum_s32 (p0, x0, -1)) -+ -+/* -+** ldff1ub_vnum_s32_x1: -+** cntw (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldff1b z0\.s, p0/z, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** ldff1b z0\.s, p0/z, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_LOAD (ldff1ub_vnum_s32_x1, svint32_t, uint8_t, -+ z0 = svldff1ub_vnum_s32 (p0, x0, x1), -+ z0 = svldff1ub_vnum_s32 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_s64.c -new file mode 100644 -index 000000000..21d02ddb7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_s64.c -@@ -0,0 +1,90 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1ub_s64_base: -+** ldff1b z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1ub_s64_base, svint64_t, uint8_t, -+ z0 = svldff1ub_s64 (p0, x0), -+ z0 = svldff1ub_s64 (p0, x0)) -+ -+/* -+** ldff1ub_s64_index: -+** ldff1b z0\.d, p0/z, \[x0, x1\] -+** ret -+*/ -+TEST_LOAD (ldff1ub_s64_index, svint64_t, uint8_t, -+ z0 = svldff1ub_s64 (p0, x0 + x1), -+ z0 = svldff1ub_s64 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1ub_s64_1: -+** incd x0 -+** ldff1b z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1ub_s64_1, svint64_t, uint8_t, -+ z0 = svldff1ub_s64 (p0, x0 + svcntd ()), -+ z0 = svldff1ub_s64 (p0, x0 + svcntd ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1ub_s64_m1: -+** decd x0 -+** ldff1b z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1ub_s64_m1, svint64_t, uint8_t, -+ z0 = svldff1ub_s64 (p0, x0 - svcntd ()), -+ z0 = svldff1ub_s64 (p0, x0 - svcntd ())) -+ -+/* -+** ldff1ub_vnum_s64_0: -+** ldff1b z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1ub_vnum_s64_0, svint64_t, uint8_t, -+ z0 = svldff1ub_vnum_s64 (p0, x0, 0), -+ z0 = svldff1ub_vnum_s64 (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1ub_vnum_s64_1: -+** incd x0 -+** ldff1b z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1ub_vnum_s64_1, svint64_t, uint8_t, -+ z0 = svldff1ub_vnum_s64 (p0, x0, 1), -+ z0 = svldff1ub_vnum_s64 (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1ub_vnum_s64_m1: -+** decd x0 -+** ldff1b z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1ub_vnum_s64_m1, svint64_t, uint8_t, -+ z0 = svldff1ub_vnum_s64 (p0, x0, -1), -+ z0 = svldff1ub_vnum_s64 (p0, x0, -1)) -+ -+/* -+** ldff1ub_vnum_s64_x1: -+** cntd (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldff1b z0\.d, p0/z, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** ldff1b z0\.d, p0/z, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_LOAD (ldff1ub_vnum_s64_x1, svint64_t, uint8_t, -+ z0 = svldff1ub_vnum_s64 (p0, x0, x1), -+ z0 = svldff1ub_vnum_s64 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_u16.c -new file mode 100644 -index 000000000..904cb027e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_u16.c -@@ -0,0 +1,90 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1ub_u16_base: -+** ldff1b z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1ub_u16_base, svuint16_t, uint8_t, -+ z0 = svldff1ub_u16 (p0, x0), -+ z0 = svldff1ub_u16 (p0, x0)) -+ -+/* -+** ldff1ub_u16_index: -+** ldff1b z0\.h, p0/z, \[x0, x1\] -+** ret -+*/ -+TEST_LOAD (ldff1ub_u16_index, svuint16_t, uint8_t, -+ z0 = svldff1ub_u16 (p0, x0 + x1), -+ z0 = svldff1ub_u16 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1ub_u16_1: -+** inch x0 -+** ldff1b z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1ub_u16_1, svuint16_t, uint8_t, -+ z0 = svldff1ub_u16 (p0, x0 + svcnth ()), -+ z0 = svldff1ub_u16 (p0, x0 + svcnth ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1ub_u16_m1: -+** dech x0 -+** ldff1b z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1ub_u16_m1, svuint16_t, uint8_t, -+ z0 = svldff1ub_u16 (p0, x0 - svcnth ()), -+ z0 = svldff1ub_u16 (p0, x0 - svcnth ())) -+ -+/* -+** ldff1ub_vnum_u16_0: -+** ldff1b z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1ub_vnum_u16_0, svuint16_t, uint8_t, -+ z0 = svldff1ub_vnum_u16 (p0, x0, 0), -+ z0 = svldff1ub_vnum_u16 (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1ub_vnum_u16_1: -+** inch x0 -+** ldff1b z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1ub_vnum_u16_1, svuint16_t, uint8_t, -+ z0 = svldff1ub_vnum_u16 (p0, x0, 1), -+ z0 = svldff1ub_vnum_u16 (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1ub_vnum_u16_m1: -+** dech x0 -+** ldff1b z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1ub_vnum_u16_m1, svuint16_t, uint8_t, -+ z0 = svldff1ub_vnum_u16 (p0, x0, -1), -+ z0 = svldff1ub_vnum_u16 (p0, x0, -1)) -+ -+/* -+** ldff1ub_vnum_u16_x1: -+** cnth (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldff1b z0\.h, p0/z, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** ldff1b z0\.h, p0/z, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_LOAD (ldff1ub_vnum_u16_x1, svuint16_t, uint8_t, -+ z0 = svldff1ub_vnum_u16 (p0, x0, x1), -+ z0 = svldff1ub_vnum_u16 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_u32.c -new file mode 100644 -index 000000000..a40012318 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_u32.c -@@ -0,0 +1,90 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1ub_u32_base: -+** ldff1b z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1ub_u32_base, svuint32_t, uint8_t, -+ z0 = svldff1ub_u32 (p0, x0), -+ z0 = svldff1ub_u32 (p0, x0)) -+ -+/* -+** ldff1ub_u32_index: -+** ldff1b z0\.s, p0/z, \[x0, x1\] -+** ret -+*/ -+TEST_LOAD (ldff1ub_u32_index, svuint32_t, uint8_t, -+ z0 = svldff1ub_u32 (p0, x0 + x1), -+ z0 = svldff1ub_u32 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1ub_u32_1: -+** incw x0 -+** ldff1b z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1ub_u32_1, svuint32_t, uint8_t, -+ z0 = svldff1ub_u32 (p0, x0 + svcntw ()), -+ z0 = svldff1ub_u32 (p0, x0 + svcntw ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1ub_u32_m1: -+** decw x0 -+** ldff1b z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1ub_u32_m1, svuint32_t, uint8_t, -+ z0 = svldff1ub_u32 (p0, x0 - svcntw ()), -+ z0 = svldff1ub_u32 (p0, x0 - svcntw ())) -+ -+/* -+** ldff1ub_vnum_u32_0: -+** ldff1b z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1ub_vnum_u32_0, svuint32_t, uint8_t, -+ z0 = svldff1ub_vnum_u32 (p0, x0, 0), -+ z0 = svldff1ub_vnum_u32 (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1ub_vnum_u32_1: -+** incw x0 -+** ldff1b z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1ub_vnum_u32_1, svuint32_t, uint8_t, -+ z0 = svldff1ub_vnum_u32 (p0, x0, 1), -+ z0 = svldff1ub_vnum_u32 (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1ub_vnum_u32_m1: -+** decw x0 -+** ldff1b z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1ub_vnum_u32_m1, svuint32_t, uint8_t, -+ z0 = svldff1ub_vnum_u32 (p0, x0, -1), -+ z0 = svldff1ub_vnum_u32 (p0, x0, -1)) -+ -+/* -+** ldff1ub_vnum_u32_x1: -+** cntw (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldff1b z0\.s, p0/z, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** ldff1b z0\.s, p0/z, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_LOAD (ldff1ub_vnum_u32_x1, svuint32_t, uint8_t, -+ z0 = svldff1ub_vnum_u32 (p0, x0, x1), -+ z0 = svldff1ub_vnum_u32 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_u64.c -new file mode 100644 -index 000000000..a9a98a683 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1ub_u64.c -@@ -0,0 +1,90 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1ub_u64_base: -+** ldff1b z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1ub_u64_base, svuint64_t, uint8_t, -+ z0 = svldff1ub_u64 (p0, x0), -+ z0 = svldff1ub_u64 (p0, x0)) -+ -+/* -+** ldff1ub_u64_index: -+** ldff1b z0\.d, p0/z, \[x0, x1\] -+** ret -+*/ -+TEST_LOAD (ldff1ub_u64_index, svuint64_t, uint8_t, -+ z0 = svldff1ub_u64 (p0, x0 + x1), -+ z0 = svldff1ub_u64 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1ub_u64_1: -+** incd x0 -+** ldff1b z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1ub_u64_1, svuint64_t, uint8_t, -+ z0 = svldff1ub_u64 (p0, x0 + svcntd ()), -+ z0 = svldff1ub_u64 (p0, x0 + svcntd ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1ub_u64_m1: -+** decd x0 -+** ldff1b z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1ub_u64_m1, svuint64_t, uint8_t, -+ z0 = svldff1ub_u64 (p0, x0 - svcntd ()), -+ z0 = svldff1ub_u64 (p0, x0 - svcntd ())) -+ -+/* -+** ldff1ub_vnum_u64_0: -+** ldff1b z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1ub_vnum_u64_0, svuint64_t, uint8_t, -+ z0 = svldff1ub_vnum_u64 (p0, x0, 0), -+ z0 = svldff1ub_vnum_u64 (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1ub_vnum_u64_1: -+** incd x0 -+** ldff1b z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1ub_vnum_u64_1, svuint64_t, uint8_t, -+ z0 = svldff1ub_vnum_u64 (p0, x0, 1), -+ z0 = svldff1ub_vnum_u64 (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1ub_vnum_u64_m1: -+** decd x0 -+** ldff1b z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1ub_vnum_u64_m1, svuint64_t, uint8_t, -+ z0 = svldff1ub_vnum_u64 (p0, x0, -1), -+ z0 = svldff1ub_vnum_u64 (p0, x0, -1)) -+ -+/* -+** ldff1ub_vnum_u64_x1: -+** cntd (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldff1b z0\.d, p0/z, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** ldff1b z0\.d, p0/z, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_LOAD (ldff1ub_vnum_u64_x1, svuint64_t, uint8_t, -+ z0 = svldff1ub_vnum_u64 (p0, x0, x1), -+ z0 = svldff1ub_vnum_u64 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_s32.c -new file mode 100644 -index 000000000..d02e44342 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_s32.c -@@ -0,0 +1,252 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1uh_gather_s32_tied1: -+** ldff1h z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_s32_tied1, svint32_t, svuint32_t, -+ z0_res = svldff1uh_gather_u32base_s32 (p0, z0), -+ z0_res = svldff1uh_gather_s32 (p0, z0)) -+ -+/* -+** ldff1uh_gather_s32_untied: -+** ldff1h z0\.s, p0/z, \[z1\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_s32_untied, svint32_t, svuint32_t, -+ z0_res = svldff1uh_gather_u32base_s32 (p0, z1), -+ z0_res = svldff1uh_gather_s32 (p0, z1)) -+ -+/* -+** ldff1uh_gather_x0_s32_offset: -+** ldff1h z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_x0_s32_offset, svint32_t, svuint32_t, -+ z0_res = svldff1uh_gather_u32base_offset_s32 (p0, z0, x0), -+ z0_res = svldff1uh_gather_offset_s32 (p0, z0, x0)) -+ -+/* -+** ldff1uh_gather_m2_s32_offset: -+** mov (x[0-9]+), #?-2 -+** ldff1h z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_m2_s32_offset, svint32_t, svuint32_t, -+ z0_res = svldff1uh_gather_u32base_offset_s32 (p0, z0, -2), -+ z0_res = svldff1uh_gather_offset_s32 (p0, z0, -2)) -+ -+/* -+** ldff1uh_gather_0_s32_offset: -+** ldff1h z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_0_s32_offset, svint32_t, svuint32_t, -+ z0_res = svldff1uh_gather_u32base_offset_s32 (p0, z0, 0), -+ z0_res = svldff1uh_gather_offset_s32 (p0, z0, 0)) -+ -+/* -+** ldff1uh_gather_5_s32_offset: -+** mov (x[0-9]+), #?5 -+** ldff1h z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_5_s32_offset, svint32_t, svuint32_t, -+ z0_res = svldff1uh_gather_u32base_offset_s32 (p0, z0, 5), -+ z0_res = svldff1uh_gather_offset_s32 (p0, z0, 5)) -+ -+/* -+** ldff1uh_gather_6_s32_offset: -+** ldff1h z0\.s, p0/z, \[z0\.s, #6\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_6_s32_offset, svint32_t, svuint32_t, -+ z0_res = svldff1uh_gather_u32base_offset_s32 (p0, z0, 6), -+ z0_res = svldff1uh_gather_offset_s32 (p0, z0, 6)) -+ -+/* -+** ldff1uh_gather_62_s32_offset: -+** ldff1h z0\.s, p0/z, \[z0\.s, #62\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_62_s32_offset, svint32_t, svuint32_t, -+ z0_res = svldff1uh_gather_u32base_offset_s32 (p0, z0, 62), -+ z0_res = svldff1uh_gather_offset_s32 (p0, z0, 62)) -+ -+/* -+** ldff1uh_gather_64_s32_offset: -+** mov (x[0-9]+), #?64 -+** ldff1h z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_64_s32_offset, svint32_t, svuint32_t, -+ z0_res = svldff1uh_gather_u32base_offset_s32 (p0, z0, 64), -+ z0_res = svldff1uh_gather_offset_s32 (p0, z0, 64)) -+ -+/* -+** ldff1uh_gather_x0_s32_index: -+** lsl (x[0-9]+), x0, #?1 -+** ldff1h z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_x0_s32_index, svint32_t, svuint32_t, -+ z0_res = svldff1uh_gather_u32base_index_s32 (p0, z0, x0), -+ z0_res = svldff1uh_gather_index_s32 (p0, z0, x0)) -+ -+/* -+** ldff1uh_gather_m1_s32_index: -+** mov (x[0-9]+), #?-2 -+** ldff1h z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_m1_s32_index, svint32_t, svuint32_t, -+ z0_res = svldff1uh_gather_u32base_index_s32 (p0, z0, -1), -+ z0_res = svldff1uh_gather_index_s32 (p0, z0, -1)) -+ -+/* -+** ldff1uh_gather_0_s32_index: -+** ldff1h z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_0_s32_index, svint32_t, svuint32_t, -+ z0_res = svldff1uh_gather_u32base_index_s32 (p0, z0, 0), -+ z0_res = svldff1uh_gather_index_s32 (p0, z0, 0)) -+ -+/* -+** ldff1uh_gather_5_s32_index: -+** ldff1h z0\.s, p0/z, \[z0\.s, #10\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_5_s32_index, svint32_t, svuint32_t, -+ z0_res = svldff1uh_gather_u32base_index_s32 (p0, z0, 5), -+ z0_res = svldff1uh_gather_index_s32 (p0, z0, 5)) -+ -+/* -+** ldff1uh_gather_31_s32_index: -+** ldff1h z0\.s, p0/z, \[z0\.s, #62\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_31_s32_index, svint32_t, svuint32_t, -+ z0_res = svldff1uh_gather_u32base_index_s32 (p0, z0, 31), -+ z0_res = svldff1uh_gather_index_s32 (p0, z0, 31)) -+ -+/* -+** ldff1uh_gather_32_s32_index: -+** mov (x[0-9]+), #?64 -+** ldff1h z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_32_s32_index, svint32_t, svuint32_t, -+ z0_res = svldff1uh_gather_u32base_index_s32 (p0, z0, 32), -+ z0_res = svldff1uh_gather_index_s32 (p0, z0, 32)) -+ -+/* -+** ldff1uh_gather_x0_s32_s32offset: -+** ldff1h z0\.s, p0/z, \[x0, z0\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_x0_s32_s32offset, svint32_t, uint16_t, svint32_t, -+ z0_res = svldff1uh_gather_s32offset_s32 (p0, x0, z0), -+ z0_res = svldff1uh_gather_offset_s32 (p0, x0, z0)) -+ -+/* -+** ldff1uh_gather_tied1_s32_s32offset: -+** ldff1h z0\.s, p0/z, \[x0, z0\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_tied1_s32_s32offset, svint32_t, uint16_t, svint32_t, -+ z0_res = svldff1uh_gather_s32offset_s32 (p0, x0, z0), -+ z0_res = svldff1uh_gather_offset_s32 (p0, x0, z0)) -+ -+/* -+** ldff1uh_gather_untied_s32_s32offset: -+** ldff1h z0\.s, p0/z, \[x0, z1\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_untied_s32_s32offset, svint32_t, uint16_t, svint32_t, -+ z0_res = svldff1uh_gather_s32offset_s32 (p0, x0, z1), -+ z0_res = svldff1uh_gather_offset_s32 (p0, x0, z1)) -+ -+/* -+** ldff1uh_gather_x0_s32_u32offset: -+** ldff1h z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_x0_s32_u32offset, svint32_t, uint16_t, svuint32_t, -+ z0_res = svldff1uh_gather_u32offset_s32 (p0, x0, z0), -+ z0_res = svldff1uh_gather_offset_s32 (p0, x0, z0)) -+ -+/* -+** ldff1uh_gather_tied1_s32_u32offset: -+** ldff1h z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_tied1_s32_u32offset, svint32_t, uint16_t, svuint32_t, -+ z0_res = svldff1uh_gather_u32offset_s32 (p0, x0, z0), -+ z0_res = svldff1uh_gather_offset_s32 (p0, x0, z0)) -+ -+/* -+** ldff1uh_gather_untied_s32_u32offset: -+** ldff1h z0\.s, p0/z, \[x0, z1\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_untied_s32_u32offset, svint32_t, uint16_t, svuint32_t, -+ z0_res = svldff1uh_gather_u32offset_s32 (p0, x0, z1), -+ z0_res = svldff1uh_gather_offset_s32 (p0, x0, z1)) -+ -+/* -+** ldff1uh_gather_x0_s32_s32index: -+** ldff1h z0\.s, p0/z, \[x0, z0\.s, sxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_x0_s32_s32index, svint32_t, uint16_t, svint32_t, -+ z0_res = svldff1uh_gather_s32index_s32 (p0, x0, z0), -+ z0_res = svldff1uh_gather_index_s32 (p0, x0, z0)) -+ -+/* -+** ldff1uh_gather_tied1_s32_s32index: -+** ldff1h z0\.s, p0/z, \[x0, z0\.s, sxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_tied1_s32_s32index, svint32_t, uint16_t, svint32_t, -+ z0_res = svldff1uh_gather_s32index_s32 (p0, x0, z0), -+ z0_res = svldff1uh_gather_index_s32 (p0, x0, z0)) -+ -+/* -+** ldff1uh_gather_untied_s32_s32index: -+** ldff1h z0\.s, p0/z, \[x0, z1\.s, sxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_untied_s32_s32index, svint32_t, uint16_t, svint32_t, -+ z0_res = svldff1uh_gather_s32index_s32 (p0, x0, z1), -+ z0_res = svldff1uh_gather_index_s32 (p0, x0, z1)) -+ -+/* -+** ldff1uh_gather_x0_s32_u32index: -+** ldff1h z0\.s, p0/z, \[x0, z0\.s, uxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_x0_s32_u32index, svint32_t, uint16_t, svuint32_t, -+ z0_res = svldff1uh_gather_u32index_s32 (p0, x0, z0), -+ z0_res = svldff1uh_gather_index_s32 (p0, x0, z0)) -+ -+/* -+** ldff1uh_gather_tied1_s32_u32index: -+** ldff1h z0\.s, p0/z, \[x0, z0\.s, uxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_tied1_s32_u32index, svint32_t, uint16_t, svuint32_t, -+ z0_res = svldff1uh_gather_u32index_s32 (p0, x0, z0), -+ z0_res = svldff1uh_gather_index_s32 (p0, x0, z0)) -+ -+/* -+** ldff1uh_gather_untied_s32_u32index: -+** ldff1h z0\.s, p0/z, \[x0, z1\.s, uxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_untied_s32_u32index, svint32_t, uint16_t, svuint32_t, -+ z0_res = svldff1uh_gather_u32index_s32 (p0, x0, z1), -+ z0_res = svldff1uh_gather_index_s32 (p0, x0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_s64.c -new file mode 100644 -index 000000000..663a73d27 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_s64.c -@@ -0,0 +1,288 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1uh_gather_s64_tied1: -+** ldff1h z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_s64_tied1, svint64_t, svuint64_t, -+ z0_res = svldff1uh_gather_u64base_s64 (p0, z0), -+ z0_res = svldff1uh_gather_s64 (p0, z0)) -+ -+/* -+** ldff1uh_gather_s64_untied: -+** ldff1h z0\.d, p0/z, \[z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_s64_untied, svint64_t, svuint64_t, -+ z0_res = svldff1uh_gather_u64base_s64 (p0, z1), -+ z0_res = svldff1uh_gather_s64 (p0, z1)) -+ -+/* -+** ldff1uh_gather_x0_s64_offset: -+** ldff1h z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_x0_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1uh_gather_u64base_offset_s64 (p0, z0, x0), -+ z0_res = svldff1uh_gather_offset_s64 (p0, z0, x0)) -+ -+/* -+** ldff1uh_gather_m2_s64_offset: -+** mov (x[0-9]+), #?-2 -+** ldff1h z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_m2_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1uh_gather_u64base_offset_s64 (p0, z0, -2), -+ z0_res = svldff1uh_gather_offset_s64 (p0, z0, -2)) -+ -+/* -+** ldff1uh_gather_0_s64_offset: -+** ldff1h z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_0_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1uh_gather_u64base_offset_s64 (p0, z0, 0), -+ z0_res = svldff1uh_gather_offset_s64 (p0, z0, 0)) -+ -+/* -+** ldff1uh_gather_5_s64_offset: -+** mov (x[0-9]+), #?5 -+** ldff1h z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_5_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1uh_gather_u64base_offset_s64 (p0, z0, 5), -+ z0_res = svldff1uh_gather_offset_s64 (p0, z0, 5)) -+ -+/* -+** ldff1uh_gather_6_s64_offset: -+** ldff1h z0\.d, p0/z, \[z0\.d, #6\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_6_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1uh_gather_u64base_offset_s64 (p0, z0, 6), -+ z0_res = svldff1uh_gather_offset_s64 (p0, z0, 6)) -+ -+/* -+** ldff1uh_gather_62_s64_offset: -+** ldff1h z0\.d, p0/z, \[z0\.d, #62\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_62_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1uh_gather_u64base_offset_s64 (p0, z0, 62), -+ z0_res = svldff1uh_gather_offset_s64 (p0, z0, 62)) -+ -+/* -+** ldff1uh_gather_64_s64_offset: -+** mov (x[0-9]+), #?64 -+** ldff1h z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_64_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1uh_gather_u64base_offset_s64 (p0, z0, 64), -+ z0_res = svldff1uh_gather_offset_s64 (p0, z0, 64)) -+ -+/* -+** ldff1uh_gather_x0_s64_index: -+** lsl (x[0-9]+), x0, #?1 -+** ldff1h z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_x0_s64_index, svint64_t, svuint64_t, -+ z0_res = svldff1uh_gather_u64base_index_s64 (p0, z0, x0), -+ z0_res = svldff1uh_gather_index_s64 (p0, z0, x0)) -+ -+/* -+** ldff1uh_gather_m1_s64_index: -+** mov (x[0-9]+), #?-2 -+** ldff1h z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_m1_s64_index, svint64_t, svuint64_t, -+ z0_res = svldff1uh_gather_u64base_index_s64 (p0, z0, -1), -+ z0_res = svldff1uh_gather_index_s64 (p0, z0, -1)) -+ -+/* -+** ldff1uh_gather_0_s64_index: -+** ldff1h z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_0_s64_index, svint64_t, svuint64_t, -+ z0_res = svldff1uh_gather_u64base_index_s64 (p0, z0, 0), -+ z0_res = svldff1uh_gather_index_s64 (p0, z0, 0)) -+ -+/* -+** ldff1uh_gather_5_s64_index: -+** ldff1h z0\.d, p0/z, \[z0\.d, #10\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_5_s64_index, svint64_t, svuint64_t, -+ z0_res = svldff1uh_gather_u64base_index_s64 (p0, z0, 5), -+ z0_res = svldff1uh_gather_index_s64 (p0, z0, 5)) -+ -+/* -+** ldff1uh_gather_31_s64_index: -+** ldff1h z0\.d, p0/z, \[z0\.d, #62\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_31_s64_index, svint64_t, svuint64_t, -+ z0_res = svldff1uh_gather_u64base_index_s64 (p0, z0, 31), -+ z0_res = svldff1uh_gather_index_s64 (p0, z0, 31)) -+ -+/* -+** ldff1uh_gather_32_s64_index: -+** mov (x[0-9]+), #?64 -+** ldff1h z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_32_s64_index, svint64_t, svuint64_t, -+ z0_res = svldff1uh_gather_u64base_index_s64 (p0, z0, 32), -+ z0_res = svldff1uh_gather_index_s64 (p0, z0, 32)) -+ -+/* -+** ldff1uh_gather_x0_s64_s64offset: -+** ldff1h z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_x0_s64_s64offset, svint64_t, uint16_t, svint64_t, -+ z0_res = svldff1uh_gather_s64offset_s64 (p0, x0, z0), -+ z0_res = svldff1uh_gather_offset_s64 (p0, x0, z0)) -+ -+/* -+** ldff1uh_gather_tied1_s64_s64offset: -+** ldff1h z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_tied1_s64_s64offset, svint64_t, uint16_t, svint64_t, -+ z0_res = svldff1uh_gather_s64offset_s64 (p0, x0, z0), -+ z0_res = svldff1uh_gather_offset_s64 (p0, x0, z0)) -+ -+/* -+** ldff1uh_gather_untied_s64_s64offset: -+** ldff1h z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_untied_s64_s64offset, svint64_t, uint16_t, svint64_t, -+ z0_res = svldff1uh_gather_s64offset_s64 (p0, x0, z1), -+ z0_res = svldff1uh_gather_offset_s64 (p0, x0, z1)) -+ -+/* -+** ldff1uh_gather_ext_s64_s64offset: -+** ldff1h z0\.d, p0/z, \[x0, z1\.d, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_ext_s64_s64offset, svint64_t, uint16_t, svint64_t, -+ z0_res = svldff1uh_gather_s64offset_s64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svldff1uh_gather_offset_s64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ldff1uh_gather_x0_s64_u64offset: -+** ldff1h z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_x0_s64_u64offset, svint64_t, uint16_t, svuint64_t, -+ z0_res = svldff1uh_gather_u64offset_s64 (p0, x0, z0), -+ z0_res = svldff1uh_gather_offset_s64 (p0, x0, z0)) -+ -+/* -+** ldff1uh_gather_tied1_s64_u64offset: -+** ldff1h z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_tied1_s64_u64offset, svint64_t, uint16_t, svuint64_t, -+ z0_res = svldff1uh_gather_u64offset_s64 (p0, x0, z0), -+ z0_res = svldff1uh_gather_offset_s64 (p0, x0, z0)) -+ -+/* -+** ldff1uh_gather_untied_s64_u64offset: -+** ldff1h z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_untied_s64_u64offset, svint64_t, uint16_t, svuint64_t, -+ z0_res = svldff1uh_gather_u64offset_s64 (p0, x0, z1), -+ z0_res = svldff1uh_gather_offset_s64 (p0, x0, z1)) -+ -+/* -+** ldff1uh_gather_ext_s64_u64offset: -+** ldff1h z0\.d, p0/z, \[x0, z1\.d, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_ext_s64_u64offset, svint64_t, uint16_t, svuint64_t, -+ z0_res = svldff1uh_gather_u64offset_s64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svldff1uh_gather_offset_s64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ldff1uh_gather_x0_s64_s64index: -+** ldff1h z0\.d, p0/z, \[x0, z0\.d, lsl 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_x0_s64_s64index, svint64_t, uint16_t, svint64_t, -+ z0_res = svldff1uh_gather_s64index_s64 (p0, x0, z0), -+ z0_res = svldff1uh_gather_index_s64 (p0, x0, z0)) -+ -+/* -+** ldff1uh_gather_tied1_s64_s64index: -+** ldff1h z0\.d, p0/z, \[x0, z0\.d, lsl 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_tied1_s64_s64index, svint64_t, uint16_t, svint64_t, -+ z0_res = svldff1uh_gather_s64index_s64 (p0, x0, z0), -+ z0_res = svldff1uh_gather_index_s64 (p0, x0, z0)) -+ -+/* -+** ldff1uh_gather_untied_s64_s64index: -+** ldff1h z0\.d, p0/z, \[x0, z1\.d, lsl 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_untied_s64_s64index, svint64_t, uint16_t, svint64_t, -+ z0_res = svldff1uh_gather_s64index_s64 (p0, x0, z1), -+ z0_res = svldff1uh_gather_index_s64 (p0, x0, z1)) -+ -+/* -+** ldff1uh_gather_ext_s64_s64index: -+** ldff1h z0\.d, p0/z, \[x0, z1\.d, sxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_ext_s64_s64index, svint64_t, uint16_t, svint64_t, -+ z0_res = svldff1uh_gather_s64index_s64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svldff1uh_gather_index_s64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ldff1uh_gather_x0_s64_u64index: -+** ldff1h z0\.d, p0/z, \[x0, z0\.d, lsl 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_x0_s64_u64index, svint64_t, uint16_t, svuint64_t, -+ z0_res = svldff1uh_gather_u64index_s64 (p0, x0, z0), -+ z0_res = svldff1uh_gather_index_s64 (p0, x0, z0)) -+ -+/* -+** ldff1uh_gather_tied1_s64_u64index: -+** ldff1h z0\.d, p0/z, \[x0, z0\.d, lsl 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_tied1_s64_u64index, svint64_t, uint16_t, svuint64_t, -+ z0_res = svldff1uh_gather_u64index_s64 (p0, x0, z0), -+ z0_res = svldff1uh_gather_index_s64 (p0, x0, z0)) -+ -+/* -+** ldff1uh_gather_untied_s64_u64index: -+** ldff1h z0\.d, p0/z, \[x0, z1\.d, lsl 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_untied_s64_u64index, svint64_t, uint16_t, svuint64_t, -+ z0_res = svldff1uh_gather_u64index_s64 (p0, x0, z1), -+ z0_res = svldff1uh_gather_index_s64 (p0, x0, z1)) -+ -+/* -+** ldff1uh_gather_ext_s64_u64index: -+** ldff1h z0\.d, p0/z, \[x0, z1\.d, uxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_ext_s64_u64index, svint64_t, uint16_t, svuint64_t, -+ z0_res = svldff1uh_gather_u64index_s64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svldff1uh_gather_index_s64 (p0, x0, svextw_x (p0, z1))) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_u32.c -new file mode 100644 -index 000000000..5e0ef067f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_u32.c -@@ -0,0 +1,252 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1uh_gather_u32_tied1: -+** ldff1h z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_u32_tied1, svuint32_t, svuint32_t, -+ z0_res = svldff1uh_gather_u32base_u32 (p0, z0), -+ z0_res = svldff1uh_gather_u32 (p0, z0)) -+ -+/* -+** ldff1uh_gather_u32_untied: -+** ldff1h z0\.s, p0/z, \[z1\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_u32_untied, svuint32_t, svuint32_t, -+ z0_res = svldff1uh_gather_u32base_u32 (p0, z1), -+ z0_res = svldff1uh_gather_u32 (p0, z1)) -+ -+/* -+** ldff1uh_gather_x0_u32_offset: -+** ldff1h z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_x0_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svldff1uh_gather_u32base_offset_u32 (p0, z0, x0), -+ z0_res = svldff1uh_gather_offset_u32 (p0, z0, x0)) -+ -+/* -+** ldff1uh_gather_m2_u32_offset: -+** mov (x[0-9]+), #?-2 -+** ldff1h z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_m2_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svldff1uh_gather_u32base_offset_u32 (p0, z0, -2), -+ z0_res = svldff1uh_gather_offset_u32 (p0, z0, -2)) -+ -+/* -+** ldff1uh_gather_0_u32_offset: -+** ldff1h z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_0_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svldff1uh_gather_u32base_offset_u32 (p0, z0, 0), -+ z0_res = svldff1uh_gather_offset_u32 (p0, z0, 0)) -+ -+/* -+** ldff1uh_gather_5_u32_offset: -+** mov (x[0-9]+), #?5 -+** ldff1h z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_5_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svldff1uh_gather_u32base_offset_u32 (p0, z0, 5), -+ z0_res = svldff1uh_gather_offset_u32 (p0, z0, 5)) -+ -+/* -+** ldff1uh_gather_6_u32_offset: -+** ldff1h z0\.s, p0/z, \[z0\.s, #6\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_6_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svldff1uh_gather_u32base_offset_u32 (p0, z0, 6), -+ z0_res = svldff1uh_gather_offset_u32 (p0, z0, 6)) -+ -+/* -+** ldff1uh_gather_62_u32_offset: -+** ldff1h z0\.s, p0/z, \[z0\.s, #62\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_62_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svldff1uh_gather_u32base_offset_u32 (p0, z0, 62), -+ z0_res = svldff1uh_gather_offset_u32 (p0, z0, 62)) -+ -+/* -+** ldff1uh_gather_64_u32_offset: -+** mov (x[0-9]+), #?64 -+** ldff1h z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_64_u32_offset, svuint32_t, svuint32_t, -+ z0_res = svldff1uh_gather_u32base_offset_u32 (p0, z0, 64), -+ z0_res = svldff1uh_gather_offset_u32 (p0, z0, 64)) -+ -+/* -+** ldff1uh_gather_x0_u32_index: -+** lsl (x[0-9]+), x0, #?1 -+** ldff1h z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_x0_u32_index, svuint32_t, svuint32_t, -+ z0_res = svldff1uh_gather_u32base_index_u32 (p0, z0, x0), -+ z0_res = svldff1uh_gather_index_u32 (p0, z0, x0)) -+ -+/* -+** ldff1uh_gather_m1_u32_index: -+** mov (x[0-9]+), #?-2 -+** ldff1h z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_m1_u32_index, svuint32_t, svuint32_t, -+ z0_res = svldff1uh_gather_u32base_index_u32 (p0, z0, -1), -+ z0_res = svldff1uh_gather_index_u32 (p0, z0, -1)) -+ -+/* -+** ldff1uh_gather_0_u32_index: -+** ldff1h z0\.s, p0/z, \[z0\.s\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_0_u32_index, svuint32_t, svuint32_t, -+ z0_res = svldff1uh_gather_u32base_index_u32 (p0, z0, 0), -+ z0_res = svldff1uh_gather_index_u32 (p0, z0, 0)) -+ -+/* -+** ldff1uh_gather_5_u32_index: -+** ldff1h z0\.s, p0/z, \[z0\.s, #10\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_5_u32_index, svuint32_t, svuint32_t, -+ z0_res = svldff1uh_gather_u32base_index_u32 (p0, z0, 5), -+ z0_res = svldff1uh_gather_index_u32 (p0, z0, 5)) -+ -+/* -+** ldff1uh_gather_31_u32_index: -+** ldff1h z0\.s, p0/z, \[z0\.s, #62\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_31_u32_index, svuint32_t, svuint32_t, -+ z0_res = svldff1uh_gather_u32base_index_u32 (p0, z0, 31), -+ z0_res = svldff1uh_gather_index_u32 (p0, z0, 31)) -+ -+/* -+** ldff1uh_gather_32_u32_index: -+** mov (x[0-9]+), #?64 -+** ldff1h z0\.s, p0/z, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_32_u32_index, svuint32_t, svuint32_t, -+ z0_res = svldff1uh_gather_u32base_index_u32 (p0, z0, 32), -+ z0_res = svldff1uh_gather_index_u32 (p0, z0, 32)) -+ -+/* -+** ldff1uh_gather_x0_u32_s32offset: -+** ldff1h z0\.s, p0/z, \[x0, z0\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_x0_u32_s32offset, svuint32_t, uint16_t, svint32_t, -+ z0_res = svldff1uh_gather_s32offset_u32 (p0, x0, z0), -+ z0_res = svldff1uh_gather_offset_u32 (p0, x0, z0)) -+ -+/* -+** ldff1uh_gather_tied1_u32_s32offset: -+** ldff1h z0\.s, p0/z, \[x0, z0\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_tied1_u32_s32offset, svuint32_t, uint16_t, svint32_t, -+ z0_res = svldff1uh_gather_s32offset_u32 (p0, x0, z0), -+ z0_res = svldff1uh_gather_offset_u32 (p0, x0, z0)) -+ -+/* -+** ldff1uh_gather_untied_u32_s32offset: -+** ldff1h z0\.s, p0/z, \[x0, z1\.s, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_untied_u32_s32offset, svuint32_t, uint16_t, svint32_t, -+ z0_res = svldff1uh_gather_s32offset_u32 (p0, x0, z1), -+ z0_res = svldff1uh_gather_offset_u32 (p0, x0, z1)) -+ -+/* -+** ldff1uh_gather_x0_u32_u32offset: -+** ldff1h z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_x0_u32_u32offset, svuint32_t, uint16_t, svuint32_t, -+ z0_res = svldff1uh_gather_u32offset_u32 (p0, x0, z0), -+ z0_res = svldff1uh_gather_offset_u32 (p0, x0, z0)) -+ -+/* -+** ldff1uh_gather_tied1_u32_u32offset: -+** ldff1h z0\.s, p0/z, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_tied1_u32_u32offset, svuint32_t, uint16_t, svuint32_t, -+ z0_res = svldff1uh_gather_u32offset_u32 (p0, x0, z0), -+ z0_res = svldff1uh_gather_offset_u32 (p0, x0, z0)) -+ -+/* -+** ldff1uh_gather_untied_u32_u32offset: -+** ldff1h z0\.s, p0/z, \[x0, z1\.s, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_untied_u32_u32offset, svuint32_t, uint16_t, svuint32_t, -+ z0_res = svldff1uh_gather_u32offset_u32 (p0, x0, z1), -+ z0_res = svldff1uh_gather_offset_u32 (p0, x0, z1)) -+ -+/* -+** ldff1uh_gather_x0_u32_s32index: -+** ldff1h z0\.s, p0/z, \[x0, z0\.s, sxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_x0_u32_s32index, svuint32_t, uint16_t, svint32_t, -+ z0_res = svldff1uh_gather_s32index_u32 (p0, x0, z0), -+ z0_res = svldff1uh_gather_index_u32 (p0, x0, z0)) -+ -+/* -+** ldff1uh_gather_tied1_u32_s32index: -+** ldff1h z0\.s, p0/z, \[x0, z0\.s, sxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_tied1_u32_s32index, svuint32_t, uint16_t, svint32_t, -+ z0_res = svldff1uh_gather_s32index_u32 (p0, x0, z0), -+ z0_res = svldff1uh_gather_index_u32 (p0, x0, z0)) -+ -+/* -+** ldff1uh_gather_untied_u32_s32index: -+** ldff1h z0\.s, p0/z, \[x0, z1\.s, sxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_untied_u32_s32index, svuint32_t, uint16_t, svint32_t, -+ z0_res = svldff1uh_gather_s32index_u32 (p0, x0, z1), -+ z0_res = svldff1uh_gather_index_u32 (p0, x0, z1)) -+ -+/* -+** ldff1uh_gather_x0_u32_u32index: -+** ldff1h z0\.s, p0/z, \[x0, z0\.s, uxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_x0_u32_u32index, svuint32_t, uint16_t, svuint32_t, -+ z0_res = svldff1uh_gather_u32index_u32 (p0, x0, z0), -+ z0_res = svldff1uh_gather_index_u32 (p0, x0, z0)) -+ -+/* -+** ldff1uh_gather_tied1_u32_u32index: -+** ldff1h z0\.s, p0/z, \[x0, z0\.s, uxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_tied1_u32_u32index, svuint32_t, uint16_t, svuint32_t, -+ z0_res = svldff1uh_gather_u32index_u32 (p0, x0, z0), -+ z0_res = svldff1uh_gather_index_u32 (p0, x0, z0)) -+ -+/* -+** ldff1uh_gather_untied_u32_u32index: -+** ldff1h z0\.s, p0/z, \[x0, z1\.s, uxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_untied_u32_u32index, svuint32_t, uint16_t, svuint32_t, -+ z0_res = svldff1uh_gather_u32index_u32 (p0, x0, z1), -+ z0_res = svldff1uh_gather_index_u32 (p0, x0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_u64.c -new file mode 100644 -index 000000000..1cfae1b95 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_gather_u64.c -@@ -0,0 +1,288 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1uh_gather_u64_tied1: -+** ldff1h z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_u64_tied1, svuint64_t, svuint64_t, -+ z0_res = svldff1uh_gather_u64base_u64 (p0, z0), -+ z0_res = svldff1uh_gather_u64 (p0, z0)) -+ -+/* -+** ldff1uh_gather_u64_untied: -+** ldff1h z0\.d, p0/z, \[z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_u64_untied, svuint64_t, svuint64_t, -+ z0_res = svldff1uh_gather_u64base_u64 (p0, z1), -+ z0_res = svldff1uh_gather_u64 (p0, z1)) -+ -+/* -+** ldff1uh_gather_x0_u64_offset: -+** ldff1h z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_x0_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1uh_gather_u64base_offset_u64 (p0, z0, x0), -+ z0_res = svldff1uh_gather_offset_u64 (p0, z0, x0)) -+ -+/* -+** ldff1uh_gather_m2_u64_offset: -+** mov (x[0-9]+), #?-2 -+** ldff1h z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_m2_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1uh_gather_u64base_offset_u64 (p0, z0, -2), -+ z0_res = svldff1uh_gather_offset_u64 (p0, z0, -2)) -+ -+/* -+** ldff1uh_gather_0_u64_offset: -+** ldff1h z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_0_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1uh_gather_u64base_offset_u64 (p0, z0, 0), -+ z0_res = svldff1uh_gather_offset_u64 (p0, z0, 0)) -+ -+/* -+** ldff1uh_gather_5_u64_offset: -+** mov (x[0-9]+), #?5 -+** ldff1h z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_5_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1uh_gather_u64base_offset_u64 (p0, z0, 5), -+ z0_res = svldff1uh_gather_offset_u64 (p0, z0, 5)) -+ -+/* -+** ldff1uh_gather_6_u64_offset: -+** ldff1h z0\.d, p0/z, \[z0\.d, #6\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_6_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1uh_gather_u64base_offset_u64 (p0, z0, 6), -+ z0_res = svldff1uh_gather_offset_u64 (p0, z0, 6)) -+ -+/* -+** ldff1uh_gather_62_u64_offset: -+** ldff1h z0\.d, p0/z, \[z0\.d, #62\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_62_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1uh_gather_u64base_offset_u64 (p0, z0, 62), -+ z0_res = svldff1uh_gather_offset_u64 (p0, z0, 62)) -+ -+/* -+** ldff1uh_gather_64_u64_offset: -+** mov (x[0-9]+), #?64 -+** ldff1h z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_64_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1uh_gather_u64base_offset_u64 (p0, z0, 64), -+ z0_res = svldff1uh_gather_offset_u64 (p0, z0, 64)) -+ -+/* -+** ldff1uh_gather_x0_u64_index: -+** lsl (x[0-9]+), x0, #?1 -+** ldff1h z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_x0_u64_index, svuint64_t, svuint64_t, -+ z0_res = svldff1uh_gather_u64base_index_u64 (p0, z0, x0), -+ z0_res = svldff1uh_gather_index_u64 (p0, z0, x0)) -+ -+/* -+** ldff1uh_gather_m1_u64_index: -+** mov (x[0-9]+), #?-2 -+** ldff1h z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_m1_u64_index, svuint64_t, svuint64_t, -+ z0_res = svldff1uh_gather_u64base_index_u64 (p0, z0, -1), -+ z0_res = svldff1uh_gather_index_u64 (p0, z0, -1)) -+ -+/* -+** ldff1uh_gather_0_u64_index: -+** ldff1h z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_0_u64_index, svuint64_t, svuint64_t, -+ z0_res = svldff1uh_gather_u64base_index_u64 (p0, z0, 0), -+ z0_res = svldff1uh_gather_index_u64 (p0, z0, 0)) -+ -+/* -+** ldff1uh_gather_5_u64_index: -+** ldff1h z0\.d, p0/z, \[z0\.d, #10\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_5_u64_index, svuint64_t, svuint64_t, -+ z0_res = svldff1uh_gather_u64base_index_u64 (p0, z0, 5), -+ z0_res = svldff1uh_gather_index_u64 (p0, z0, 5)) -+ -+/* -+** ldff1uh_gather_31_u64_index: -+** ldff1h z0\.d, p0/z, \[z0\.d, #62\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_31_u64_index, svuint64_t, svuint64_t, -+ z0_res = svldff1uh_gather_u64base_index_u64 (p0, z0, 31), -+ z0_res = svldff1uh_gather_index_u64 (p0, z0, 31)) -+ -+/* -+** ldff1uh_gather_32_u64_index: -+** mov (x[0-9]+), #?64 -+** ldff1h z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uh_gather_32_u64_index, svuint64_t, svuint64_t, -+ z0_res = svldff1uh_gather_u64base_index_u64 (p0, z0, 32), -+ z0_res = svldff1uh_gather_index_u64 (p0, z0, 32)) -+ -+/* -+** ldff1uh_gather_x0_u64_s64offset: -+** ldff1h z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_x0_u64_s64offset, svuint64_t, uint16_t, svint64_t, -+ z0_res = svldff1uh_gather_s64offset_u64 (p0, x0, z0), -+ z0_res = svldff1uh_gather_offset_u64 (p0, x0, z0)) -+ -+/* -+** ldff1uh_gather_tied1_u64_s64offset: -+** ldff1h z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_tied1_u64_s64offset, svuint64_t, uint16_t, svint64_t, -+ z0_res = svldff1uh_gather_s64offset_u64 (p0, x0, z0), -+ z0_res = svldff1uh_gather_offset_u64 (p0, x0, z0)) -+ -+/* -+** ldff1uh_gather_untied_u64_s64offset: -+** ldff1h z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_untied_u64_s64offset, svuint64_t, uint16_t, svint64_t, -+ z0_res = svldff1uh_gather_s64offset_u64 (p0, x0, z1), -+ z0_res = svldff1uh_gather_offset_u64 (p0, x0, z1)) -+ -+/* -+** ldff1uh_gather_ext_u64_s64offset: -+** ldff1h z0\.d, p0/z, \[x0, z1\.d, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_ext_u64_s64offset, svuint64_t, uint16_t, svint64_t, -+ z0_res = svldff1uh_gather_s64offset_u64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svldff1uh_gather_offset_u64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ldff1uh_gather_x0_u64_u64offset: -+** ldff1h z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_x0_u64_u64offset, svuint64_t, uint16_t, svuint64_t, -+ z0_res = svldff1uh_gather_u64offset_u64 (p0, x0, z0), -+ z0_res = svldff1uh_gather_offset_u64 (p0, x0, z0)) -+ -+/* -+** ldff1uh_gather_tied1_u64_u64offset: -+** ldff1h z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_tied1_u64_u64offset, svuint64_t, uint16_t, svuint64_t, -+ z0_res = svldff1uh_gather_u64offset_u64 (p0, x0, z0), -+ z0_res = svldff1uh_gather_offset_u64 (p0, x0, z0)) -+ -+/* -+** ldff1uh_gather_untied_u64_u64offset: -+** ldff1h z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_untied_u64_u64offset, svuint64_t, uint16_t, svuint64_t, -+ z0_res = svldff1uh_gather_u64offset_u64 (p0, x0, z1), -+ z0_res = svldff1uh_gather_offset_u64 (p0, x0, z1)) -+ -+/* -+** ldff1uh_gather_ext_u64_u64offset: -+** ldff1h z0\.d, p0/z, \[x0, z1\.d, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_ext_u64_u64offset, svuint64_t, uint16_t, svuint64_t, -+ z0_res = svldff1uh_gather_u64offset_u64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svldff1uh_gather_offset_u64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ldff1uh_gather_x0_u64_s64index: -+** ldff1h z0\.d, p0/z, \[x0, z0\.d, lsl 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_x0_u64_s64index, svuint64_t, uint16_t, svint64_t, -+ z0_res = svldff1uh_gather_s64index_u64 (p0, x0, z0), -+ z0_res = svldff1uh_gather_index_u64 (p0, x0, z0)) -+ -+/* -+** ldff1uh_gather_tied1_u64_s64index: -+** ldff1h z0\.d, p0/z, \[x0, z0\.d, lsl 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_tied1_u64_s64index, svuint64_t, uint16_t, svint64_t, -+ z0_res = svldff1uh_gather_s64index_u64 (p0, x0, z0), -+ z0_res = svldff1uh_gather_index_u64 (p0, x0, z0)) -+ -+/* -+** ldff1uh_gather_untied_u64_s64index: -+** ldff1h z0\.d, p0/z, \[x0, z1\.d, lsl 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_untied_u64_s64index, svuint64_t, uint16_t, svint64_t, -+ z0_res = svldff1uh_gather_s64index_u64 (p0, x0, z1), -+ z0_res = svldff1uh_gather_index_u64 (p0, x0, z1)) -+ -+/* -+** ldff1uh_gather_ext_u64_s64index: -+** ldff1h z0\.d, p0/z, \[x0, z1\.d, sxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_ext_u64_s64index, svuint64_t, uint16_t, svint64_t, -+ z0_res = svldff1uh_gather_s64index_u64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svldff1uh_gather_index_u64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ldff1uh_gather_x0_u64_u64index: -+** ldff1h z0\.d, p0/z, \[x0, z0\.d, lsl 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_x0_u64_u64index, svuint64_t, uint16_t, svuint64_t, -+ z0_res = svldff1uh_gather_u64index_u64 (p0, x0, z0), -+ z0_res = svldff1uh_gather_index_u64 (p0, x0, z0)) -+ -+/* -+** ldff1uh_gather_tied1_u64_u64index: -+** ldff1h z0\.d, p0/z, \[x0, z0\.d, lsl 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_tied1_u64_u64index, svuint64_t, uint16_t, svuint64_t, -+ z0_res = svldff1uh_gather_u64index_u64 (p0, x0, z0), -+ z0_res = svldff1uh_gather_index_u64 (p0, x0, z0)) -+ -+/* -+** ldff1uh_gather_untied_u64_u64index: -+** ldff1h z0\.d, p0/z, \[x0, z1\.d, lsl 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_untied_u64_u64index, svuint64_t, uint16_t, svuint64_t, -+ z0_res = svldff1uh_gather_u64index_u64 (p0, x0, z1), -+ z0_res = svldff1uh_gather_index_u64 (p0, x0, z1)) -+ -+/* -+** ldff1uh_gather_ext_u64_u64index: -+** ldff1h z0\.d, p0/z, \[x0, z1\.d, uxtw 1\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uh_gather_ext_u64_u64index, svuint64_t, uint16_t, svuint64_t, -+ z0_res = svldff1uh_gather_u64index_u64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svldff1uh_gather_index_u64 (p0, x0, svextw_x (p0, z1))) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_s32.c -new file mode 100644 -index 000000000..abb3d769a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_s32.c -@@ -0,0 +1,86 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1uh_s32_base: -+** ldff1h z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1uh_s32_base, svint32_t, uint16_t, -+ z0 = svldff1uh_s32 (p0, x0), -+ z0 = svldff1uh_s32 (p0, x0)) -+ -+/* -+** ldff1uh_s32_index: -+** ldff1h z0\.s, p0/z, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_LOAD (ldff1uh_s32_index, svint32_t, uint16_t, -+ z0 = svldff1uh_s32 (p0, x0 + x1), -+ z0 = svldff1uh_s32 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1uh_s32_1: -+** inch x0 -+** ldff1h z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1uh_s32_1, svint32_t, uint16_t, -+ z0 = svldff1uh_s32 (p0, x0 + svcntw ()), -+ z0 = svldff1uh_s32 (p0, x0 + svcntw ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1uh_s32_m1: -+** dech x0 -+** ldff1h z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1uh_s32_m1, svint32_t, uint16_t, -+ z0 = svldff1uh_s32 (p0, x0 - svcntw ()), -+ z0 = svldff1uh_s32 (p0, x0 - svcntw ())) -+ -+/* -+** ldff1uh_vnum_s32_0: -+** ldff1h z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1uh_vnum_s32_0, svint32_t, uint16_t, -+ z0 = svldff1uh_vnum_s32 (p0, x0, 0), -+ z0 = svldff1uh_vnum_s32 (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1uh_vnum_s32_1: -+** inch x0 -+** ldff1h z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1uh_vnum_s32_1, svint32_t, uint16_t, -+ z0 = svldff1uh_vnum_s32 (p0, x0, 1), -+ z0 = svldff1uh_vnum_s32 (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1uh_vnum_s32_m1: -+** dech x0 -+** ldff1h z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1uh_vnum_s32_m1, svint32_t, uint16_t, -+ z0 = svldff1uh_vnum_s32 (p0, x0, -1), -+ z0 = svldff1uh_vnum_s32 (p0, x0, -1)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ldff1uh_vnum_s32_x1: -+** cnth (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ldff1h z0\.s, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldff1uh_vnum_s32_x1, svint32_t, uint16_t, -+ z0 = svldff1uh_vnum_s32 (p0, x0, x1), -+ z0 = svldff1uh_vnum_s32 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_s64.c -new file mode 100644 -index 000000000..6e330e8e8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_s64.c -@@ -0,0 +1,86 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1uh_s64_base: -+** ldff1h z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1uh_s64_base, svint64_t, uint16_t, -+ z0 = svldff1uh_s64 (p0, x0), -+ z0 = svldff1uh_s64 (p0, x0)) -+ -+/* -+** ldff1uh_s64_index: -+** ldff1h z0\.d, p0/z, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_LOAD (ldff1uh_s64_index, svint64_t, uint16_t, -+ z0 = svldff1uh_s64 (p0, x0 + x1), -+ z0 = svldff1uh_s64 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1uh_s64_1: -+** incw x0 -+** ldff1h z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1uh_s64_1, svint64_t, uint16_t, -+ z0 = svldff1uh_s64 (p0, x0 + svcntd ()), -+ z0 = svldff1uh_s64 (p0, x0 + svcntd ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1uh_s64_m1: -+** decw x0 -+** ldff1h z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1uh_s64_m1, svint64_t, uint16_t, -+ z0 = svldff1uh_s64 (p0, x0 - svcntd ()), -+ z0 = svldff1uh_s64 (p0, x0 - svcntd ())) -+ -+/* -+** ldff1uh_vnum_s64_0: -+** ldff1h z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1uh_vnum_s64_0, svint64_t, uint16_t, -+ z0 = svldff1uh_vnum_s64 (p0, x0, 0), -+ z0 = svldff1uh_vnum_s64 (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1uh_vnum_s64_1: -+** incw x0 -+** ldff1h z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1uh_vnum_s64_1, svint64_t, uint16_t, -+ z0 = svldff1uh_vnum_s64 (p0, x0, 1), -+ z0 = svldff1uh_vnum_s64 (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1uh_vnum_s64_m1: -+** decw x0 -+** ldff1h z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1uh_vnum_s64_m1, svint64_t, uint16_t, -+ z0 = svldff1uh_vnum_s64 (p0, x0, -1), -+ z0 = svldff1uh_vnum_s64 (p0, x0, -1)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ldff1uh_vnum_s64_x1: -+** cntw (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ldff1h z0\.d, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldff1uh_vnum_s64_x1, svint64_t, uint16_t, -+ z0 = svldff1uh_vnum_s64 (p0, x0, x1), -+ z0 = svldff1uh_vnum_s64 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_u32.c -new file mode 100644 -index 000000000..4eb5323e9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_u32.c -@@ -0,0 +1,86 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1uh_u32_base: -+** ldff1h z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1uh_u32_base, svuint32_t, uint16_t, -+ z0 = svldff1uh_u32 (p0, x0), -+ z0 = svldff1uh_u32 (p0, x0)) -+ -+/* -+** ldff1uh_u32_index: -+** ldff1h z0\.s, p0/z, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_LOAD (ldff1uh_u32_index, svuint32_t, uint16_t, -+ z0 = svldff1uh_u32 (p0, x0 + x1), -+ z0 = svldff1uh_u32 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1uh_u32_1: -+** inch x0 -+** ldff1h z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1uh_u32_1, svuint32_t, uint16_t, -+ z0 = svldff1uh_u32 (p0, x0 + svcntw ()), -+ z0 = svldff1uh_u32 (p0, x0 + svcntw ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1uh_u32_m1: -+** dech x0 -+** ldff1h z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1uh_u32_m1, svuint32_t, uint16_t, -+ z0 = svldff1uh_u32 (p0, x0 - svcntw ()), -+ z0 = svldff1uh_u32 (p0, x0 - svcntw ())) -+ -+/* -+** ldff1uh_vnum_u32_0: -+** ldff1h z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1uh_vnum_u32_0, svuint32_t, uint16_t, -+ z0 = svldff1uh_vnum_u32 (p0, x0, 0), -+ z0 = svldff1uh_vnum_u32 (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1uh_vnum_u32_1: -+** inch x0 -+** ldff1h z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1uh_vnum_u32_1, svuint32_t, uint16_t, -+ z0 = svldff1uh_vnum_u32 (p0, x0, 1), -+ z0 = svldff1uh_vnum_u32 (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1uh_vnum_u32_m1: -+** dech x0 -+** ldff1h z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1uh_vnum_u32_m1, svuint32_t, uint16_t, -+ z0 = svldff1uh_vnum_u32 (p0, x0, -1), -+ z0 = svldff1uh_vnum_u32 (p0, x0, -1)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ldff1uh_vnum_u32_x1: -+** cnth (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ldff1h z0\.s, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldff1uh_vnum_u32_x1, svuint32_t, uint16_t, -+ z0 = svldff1uh_vnum_u32 (p0, x0, x1), -+ z0 = svldff1uh_vnum_u32 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_u64.c -new file mode 100644 -index 000000000..ebac26e7d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uh_u64.c -@@ -0,0 +1,86 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1uh_u64_base: -+** ldff1h z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1uh_u64_base, svuint64_t, uint16_t, -+ z0 = svldff1uh_u64 (p0, x0), -+ z0 = svldff1uh_u64 (p0, x0)) -+ -+/* -+** ldff1uh_u64_index: -+** ldff1h z0\.d, p0/z, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_LOAD (ldff1uh_u64_index, svuint64_t, uint16_t, -+ z0 = svldff1uh_u64 (p0, x0 + x1), -+ z0 = svldff1uh_u64 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1uh_u64_1: -+** incw x0 -+** ldff1h z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1uh_u64_1, svuint64_t, uint16_t, -+ z0 = svldff1uh_u64 (p0, x0 + svcntd ()), -+ z0 = svldff1uh_u64 (p0, x0 + svcntd ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1uh_u64_m1: -+** decw x0 -+** ldff1h z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1uh_u64_m1, svuint64_t, uint16_t, -+ z0 = svldff1uh_u64 (p0, x0 - svcntd ()), -+ z0 = svldff1uh_u64 (p0, x0 - svcntd ())) -+ -+/* -+** ldff1uh_vnum_u64_0: -+** ldff1h z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1uh_vnum_u64_0, svuint64_t, uint16_t, -+ z0 = svldff1uh_vnum_u64 (p0, x0, 0), -+ z0 = svldff1uh_vnum_u64 (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1uh_vnum_u64_1: -+** incw x0 -+** ldff1h z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1uh_vnum_u64_1, svuint64_t, uint16_t, -+ z0 = svldff1uh_vnum_u64 (p0, x0, 1), -+ z0 = svldff1uh_vnum_u64 (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1uh_vnum_u64_m1: -+** decw x0 -+** ldff1h z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1uh_vnum_u64_m1, svuint64_t, uint16_t, -+ z0 = svldff1uh_vnum_u64 (p0, x0, -1), -+ z0 = svldff1uh_vnum_u64 (p0, x0, -1)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ldff1uh_vnum_u64_x1: -+** cntw (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ldff1h z0\.d, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldff1uh_vnum_u64_x1, svuint64_t, uint16_t, -+ z0 = svldff1uh_vnum_u64 (p0, x0, x1), -+ z0 = svldff1uh_vnum_u64 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_gather_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_gather_s64.c -new file mode 100644 -index 000000000..6c0daea52 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_gather_s64.c -@@ -0,0 +1,308 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1uw_gather_s64_tied1: -+** ldff1w z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uw_gather_s64_tied1, svint64_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64base_s64 (p0, z0), -+ z0_res = svldff1uw_gather_s64 (p0, z0)) -+ -+/* -+** ldff1uw_gather_s64_untied: -+** ldff1w z0\.d, p0/z, \[z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uw_gather_s64_untied, svint64_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64base_s64 (p0, z1), -+ z0_res = svldff1uw_gather_s64 (p0, z1)) -+ -+/* -+** ldff1uw_gather_x0_s64_offset: -+** ldff1w z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uw_gather_x0_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64base_offset_s64 (p0, z0, x0), -+ z0_res = svldff1uw_gather_offset_s64 (p0, z0, x0)) -+ -+/* -+** ldff1uw_gather_m4_s64_offset: -+** mov (x[0-9]+), #?-4 -+** ldff1w z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uw_gather_m4_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64base_offset_s64 (p0, z0, -4), -+ z0_res = svldff1uw_gather_offset_s64 (p0, z0, -4)) -+ -+/* -+** ldff1uw_gather_0_s64_offset: -+** ldff1w z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uw_gather_0_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64base_offset_s64 (p0, z0, 0), -+ z0_res = svldff1uw_gather_offset_s64 (p0, z0, 0)) -+ -+/* -+** ldff1uw_gather_5_s64_offset: -+** mov (x[0-9]+), #?5 -+** ldff1w z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uw_gather_5_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64base_offset_s64 (p0, z0, 5), -+ z0_res = svldff1uw_gather_offset_s64 (p0, z0, 5)) -+ -+/* -+** ldff1uw_gather_6_s64_offset: -+** mov (x[0-9]+), #?6 -+** ldff1w z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uw_gather_6_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64base_offset_s64 (p0, z0, 6), -+ z0_res = svldff1uw_gather_offset_s64 (p0, z0, 6)) -+ -+/* -+** ldff1uw_gather_7_s64_offset: -+** mov (x[0-9]+), #?7 -+** ldff1w z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uw_gather_7_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64base_offset_s64 (p0, z0, 7), -+ z0_res = svldff1uw_gather_offset_s64 (p0, z0, 7)) -+ -+/* -+** ldff1uw_gather_8_s64_offset: -+** ldff1w z0\.d, p0/z, \[z0\.d, #8\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uw_gather_8_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64base_offset_s64 (p0, z0, 8), -+ z0_res = svldff1uw_gather_offset_s64 (p0, z0, 8)) -+ -+/* -+** ldff1uw_gather_124_s64_offset: -+** ldff1w z0\.d, p0/z, \[z0\.d, #124\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uw_gather_124_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64base_offset_s64 (p0, z0, 124), -+ z0_res = svldff1uw_gather_offset_s64 (p0, z0, 124)) -+ -+/* -+** ldff1uw_gather_128_s64_offset: -+** mov (x[0-9]+), #?128 -+** ldff1w z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uw_gather_128_s64_offset, svint64_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64base_offset_s64 (p0, z0, 128), -+ z0_res = svldff1uw_gather_offset_s64 (p0, z0, 128)) -+ -+/* -+** ldff1uw_gather_x0_s64_index: -+** lsl (x[0-9]+), x0, #?2 -+** ldff1w z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uw_gather_x0_s64_index, svint64_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64base_index_s64 (p0, z0, x0), -+ z0_res = svldff1uw_gather_index_s64 (p0, z0, x0)) -+ -+/* -+** ldff1uw_gather_m1_s64_index: -+** mov (x[0-9]+), #?-4 -+** ldff1w z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uw_gather_m1_s64_index, svint64_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64base_index_s64 (p0, z0, -1), -+ z0_res = svldff1uw_gather_index_s64 (p0, z0, -1)) -+ -+/* -+** ldff1uw_gather_0_s64_index: -+** ldff1w z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uw_gather_0_s64_index, svint64_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64base_index_s64 (p0, z0, 0), -+ z0_res = svldff1uw_gather_index_s64 (p0, z0, 0)) -+ -+/* -+** ldff1uw_gather_5_s64_index: -+** ldff1w z0\.d, p0/z, \[z0\.d, #20\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uw_gather_5_s64_index, svint64_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64base_index_s64 (p0, z0, 5), -+ z0_res = svldff1uw_gather_index_s64 (p0, z0, 5)) -+ -+/* -+** ldff1uw_gather_31_s64_index: -+** ldff1w z0\.d, p0/z, \[z0\.d, #124\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uw_gather_31_s64_index, svint64_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64base_index_s64 (p0, z0, 31), -+ z0_res = svldff1uw_gather_index_s64 (p0, z0, 31)) -+ -+/* -+** ldff1uw_gather_32_s64_index: -+** mov (x[0-9]+), #?128 -+** ldff1w z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uw_gather_32_s64_index, svint64_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64base_index_s64 (p0, z0, 32), -+ z0_res = svldff1uw_gather_index_s64 (p0, z0, 32)) -+ -+/* -+** ldff1uw_gather_x0_s64_s64offset: -+** ldff1w z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uw_gather_x0_s64_s64offset, svint64_t, uint32_t, svint64_t, -+ z0_res = svldff1uw_gather_s64offset_s64 (p0, x0, z0), -+ z0_res = svldff1uw_gather_offset_s64 (p0, x0, z0)) -+ -+/* -+** ldff1uw_gather_tied1_s64_s64offset: -+** ldff1w z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uw_gather_tied1_s64_s64offset, svint64_t, uint32_t, svint64_t, -+ z0_res = svldff1uw_gather_s64offset_s64 (p0, x0, z0), -+ z0_res = svldff1uw_gather_offset_s64 (p0, x0, z0)) -+ -+/* -+** ldff1uw_gather_untied_s64_s64offset: -+** ldff1w z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uw_gather_untied_s64_s64offset, svint64_t, uint32_t, svint64_t, -+ z0_res = svldff1uw_gather_s64offset_s64 (p0, x0, z1), -+ z0_res = svldff1uw_gather_offset_s64 (p0, x0, z1)) -+ -+/* -+** ldff1uw_gather_ext_s64_s64offset: -+** ldff1w z0\.d, p0/z, \[x0, z1\.d, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uw_gather_ext_s64_s64offset, svint64_t, uint32_t, svint64_t, -+ z0_res = svldff1uw_gather_s64offset_s64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svldff1uw_gather_offset_s64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ldff1uw_gather_x0_s64_u64offset: -+** ldff1w z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uw_gather_x0_s64_u64offset, svint64_t, uint32_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64offset_s64 (p0, x0, z0), -+ z0_res = svldff1uw_gather_offset_s64 (p0, x0, z0)) -+ -+/* -+** ldff1uw_gather_tied1_s64_u64offset: -+** ldff1w z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uw_gather_tied1_s64_u64offset, svint64_t, uint32_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64offset_s64 (p0, x0, z0), -+ z0_res = svldff1uw_gather_offset_s64 (p0, x0, z0)) -+ -+/* -+** ldff1uw_gather_untied_s64_u64offset: -+** ldff1w z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uw_gather_untied_s64_u64offset, svint64_t, uint32_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64offset_s64 (p0, x0, z1), -+ z0_res = svldff1uw_gather_offset_s64 (p0, x0, z1)) -+ -+/* -+** ldff1uw_gather_ext_s64_u64offset: -+** ldff1w z0\.d, p0/z, \[x0, z1\.d, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uw_gather_ext_s64_u64offset, svint64_t, uint32_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64offset_s64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svldff1uw_gather_offset_s64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ldff1uw_gather_x0_s64_s64index: -+** ldff1w z0\.d, p0/z, \[x0, z0\.d, lsl 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uw_gather_x0_s64_s64index, svint64_t, uint32_t, svint64_t, -+ z0_res = svldff1uw_gather_s64index_s64 (p0, x0, z0), -+ z0_res = svldff1uw_gather_index_s64 (p0, x0, z0)) -+ -+/* -+** ldff1uw_gather_tied1_s64_s64index: -+** ldff1w z0\.d, p0/z, \[x0, z0\.d, lsl 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uw_gather_tied1_s64_s64index, svint64_t, uint32_t, svint64_t, -+ z0_res = svldff1uw_gather_s64index_s64 (p0, x0, z0), -+ z0_res = svldff1uw_gather_index_s64 (p0, x0, z0)) -+ -+/* -+** ldff1uw_gather_untied_s64_s64index: -+** ldff1w z0\.d, p0/z, \[x0, z1\.d, lsl 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uw_gather_untied_s64_s64index, svint64_t, uint32_t, svint64_t, -+ z0_res = svldff1uw_gather_s64index_s64 (p0, x0, z1), -+ z0_res = svldff1uw_gather_index_s64 (p0, x0, z1)) -+ -+/* -+** ldff1uw_gather_ext_s64_s64index: -+** ldff1w z0\.d, p0/z, \[x0, z1\.d, sxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uw_gather_ext_s64_s64index, svint64_t, uint32_t, svint64_t, -+ z0_res = svldff1uw_gather_s64index_s64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svldff1uw_gather_index_s64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ldff1uw_gather_x0_s64_u64index: -+** ldff1w z0\.d, p0/z, \[x0, z0\.d, lsl 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uw_gather_x0_s64_u64index, svint64_t, uint32_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64index_s64 (p0, x0, z0), -+ z0_res = svldff1uw_gather_index_s64 (p0, x0, z0)) -+ -+/* -+** ldff1uw_gather_tied1_s64_u64index: -+** ldff1w z0\.d, p0/z, \[x0, z0\.d, lsl 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uw_gather_tied1_s64_u64index, svint64_t, uint32_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64index_s64 (p0, x0, z0), -+ z0_res = svldff1uw_gather_index_s64 (p0, x0, z0)) -+ -+/* -+** ldff1uw_gather_untied_s64_u64index: -+** ldff1w z0\.d, p0/z, \[x0, z1\.d, lsl 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uw_gather_untied_s64_u64index, svint64_t, uint32_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64index_s64 (p0, x0, z1), -+ z0_res = svldff1uw_gather_index_s64 (p0, x0, z1)) -+ -+/* -+** ldff1uw_gather_ext_s64_u64index: -+** ldff1w z0\.d, p0/z, \[x0, z1\.d, uxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uw_gather_ext_s64_u64index, svint64_t, uint32_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64index_s64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svldff1uw_gather_index_s64 (p0, x0, svextw_x (p0, z1))) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_gather_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_gather_u64.c -new file mode 100644 -index 000000000..0e400c679 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_gather_u64.c -@@ -0,0 +1,308 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1uw_gather_u64_tied1: -+** ldff1w z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uw_gather_u64_tied1, svuint64_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64base_u64 (p0, z0), -+ z0_res = svldff1uw_gather_u64 (p0, z0)) -+ -+/* -+** ldff1uw_gather_u64_untied: -+** ldff1w z0\.d, p0/z, \[z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uw_gather_u64_untied, svuint64_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64base_u64 (p0, z1), -+ z0_res = svldff1uw_gather_u64 (p0, z1)) -+ -+/* -+** ldff1uw_gather_x0_u64_offset: -+** ldff1w z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uw_gather_x0_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64base_offset_u64 (p0, z0, x0), -+ z0_res = svldff1uw_gather_offset_u64 (p0, z0, x0)) -+ -+/* -+** ldff1uw_gather_m4_u64_offset: -+** mov (x[0-9]+), #?-4 -+** ldff1w z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uw_gather_m4_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64base_offset_u64 (p0, z0, -4), -+ z0_res = svldff1uw_gather_offset_u64 (p0, z0, -4)) -+ -+/* -+** ldff1uw_gather_0_u64_offset: -+** ldff1w z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uw_gather_0_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64base_offset_u64 (p0, z0, 0), -+ z0_res = svldff1uw_gather_offset_u64 (p0, z0, 0)) -+ -+/* -+** ldff1uw_gather_5_u64_offset: -+** mov (x[0-9]+), #?5 -+** ldff1w z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uw_gather_5_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64base_offset_u64 (p0, z0, 5), -+ z0_res = svldff1uw_gather_offset_u64 (p0, z0, 5)) -+ -+/* -+** ldff1uw_gather_6_u64_offset: -+** mov (x[0-9]+), #?6 -+** ldff1w z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uw_gather_6_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64base_offset_u64 (p0, z0, 6), -+ z0_res = svldff1uw_gather_offset_u64 (p0, z0, 6)) -+ -+/* -+** ldff1uw_gather_7_u64_offset: -+** mov (x[0-9]+), #?7 -+** ldff1w z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uw_gather_7_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64base_offset_u64 (p0, z0, 7), -+ z0_res = svldff1uw_gather_offset_u64 (p0, z0, 7)) -+ -+/* -+** ldff1uw_gather_8_u64_offset: -+** ldff1w z0\.d, p0/z, \[z0\.d, #8\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uw_gather_8_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64base_offset_u64 (p0, z0, 8), -+ z0_res = svldff1uw_gather_offset_u64 (p0, z0, 8)) -+ -+/* -+** ldff1uw_gather_124_u64_offset: -+** ldff1w z0\.d, p0/z, \[z0\.d, #124\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uw_gather_124_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64base_offset_u64 (p0, z0, 124), -+ z0_res = svldff1uw_gather_offset_u64 (p0, z0, 124)) -+ -+/* -+** ldff1uw_gather_128_u64_offset: -+** mov (x[0-9]+), #?128 -+** ldff1w z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uw_gather_128_u64_offset, svuint64_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64base_offset_u64 (p0, z0, 128), -+ z0_res = svldff1uw_gather_offset_u64 (p0, z0, 128)) -+ -+/* -+** ldff1uw_gather_x0_u64_index: -+** lsl (x[0-9]+), x0, #?2 -+** ldff1w z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uw_gather_x0_u64_index, svuint64_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64base_index_u64 (p0, z0, x0), -+ z0_res = svldff1uw_gather_index_u64 (p0, z0, x0)) -+ -+/* -+** ldff1uw_gather_m1_u64_index: -+** mov (x[0-9]+), #?-4 -+** ldff1w z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uw_gather_m1_u64_index, svuint64_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64base_index_u64 (p0, z0, -1), -+ z0_res = svldff1uw_gather_index_u64 (p0, z0, -1)) -+ -+/* -+** ldff1uw_gather_0_u64_index: -+** ldff1w z0\.d, p0/z, \[z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uw_gather_0_u64_index, svuint64_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64base_index_u64 (p0, z0, 0), -+ z0_res = svldff1uw_gather_index_u64 (p0, z0, 0)) -+ -+/* -+** ldff1uw_gather_5_u64_index: -+** ldff1w z0\.d, p0/z, \[z0\.d, #20\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uw_gather_5_u64_index, svuint64_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64base_index_u64 (p0, z0, 5), -+ z0_res = svldff1uw_gather_index_u64 (p0, z0, 5)) -+ -+/* -+** ldff1uw_gather_31_u64_index: -+** ldff1w z0\.d, p0/z, \[z0\.d, #124\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uw_gather_31_u64_index, svuint64_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64base_index_u64 (p0, z0, 31), -+ z0_res = svldff1uw_gather_index_u64 (p0, z0, 31)) -+ -+/* -+** ldff1uw_gather_32_u64_index: -+** mov (x[0-9]+), #?128 -+** ldff1w z0\.d, p0/z, \[\1, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_ZS (ldff1uw_gather_32_u64_index, svuint64_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64base_index_u64 (p0, z0, 32), -+ z0_res = svldff1uw_gather_index_u64 (p0, z0, 32)) -+ -+/* -+** ldff1uw_gather_x0_u64_s64offset: -+** ldff1w z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uw_gather_x0_u64_s64offset, svuint64_t, uint32_t, svint64_t, -+ z0_res = svldff1uw_gather_s64offset_u64 (p0, x0, z0), -+ z0_res = svldff1uw_gather_offset_u64 (p0, x0, z0)) -+ -+/* -+** ldff1uw_gather_tied1_u64_s64offset: -+** ldff1w z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uw_gather_tied1_u64_s64offset, svuint64_t, uint32_t, svint64_t, -+ z0_res = svldff1uw_gather_s64offset_u64 (p0, x0, z0), -+ z0_res = svldff1uw_gather_offset_u64 (p0, x0, z0)) -+ -+/* -+** ldff1uw_gather_untied_u64_s64offset: -+** ldff1w z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uw_gather_untied_u64_s64offset, svuint64_t, uint32_t, svint64_t, -+ z0_res = svldff1uw_gather_s64offset_u64 (p0, x0, z1), -+ z0_res = svldff1uw_gather_offset_u64 (p0, x0, z1)) -+ -+/* -+** ldff1uw_gather_ext_u64_s64offset: -+** ldff1w z0\.d, p0/z, \[x0, z1\.d, sxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uw_gather_ext_u64_s64offset, svuint64_t, uint32_t, svint64_t, -+ z0_res = svldff1uw_gather_s64offset_u64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svldff1uw_gather_offset_u64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ldff1uw_gather_x0_u64_u64offset: -+** ldff1w z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uw_gather_x0_u64_u64offset, svuint64_t, uint32_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64offset_u64 (p0, x0, z0), -+ z0_res = svldff1uw_gather_offset_u64 (p0, x0, z0)) -+ -+/* -+** ldff1uw_gather_tied1_u64_u64offset: -+** ldff1w z0\.d, p0/z, \[x0, z0\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uw_gather_tied1_u64_u64offset, svuint64_t, uint32_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64offset_u64 (p0, x0, z0), -+ z0_res = svldff1uw_gather_offset_u64 (p0, x0, z0)) -+ -+/* -+** ldff1uw_gather_untied_u64_u64offset: -+** ldff1w z0\.d, p0/z, \[x0, z1\.d\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uw_gather_untied_u64_u64offset, svuint64_t, uint32_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64offset_u64 (p0, x0, z1), -+ z0_res = svldff1uw_gather_offset_u64 (p0, x0, z1)) -+ -+/* -+** ldff1uw_gather_ext_u64_u64offset: -+** ldff1w z0\.d, p0/z, \[x0, z1\.d, uxtw\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uw_gather_ext_u64_u64offset, svuint64_t, uint32_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64offset_u64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svldff1uw_gather_offset_u64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ldff1uw_gather_x0_u64_s64index: -+** ldff1w z0\.d, p0/z, \[x0, z0\.d, lsl 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uw_gather_x0_u64_s64index, svuint64_t, uint32_t, svint64_t, -+ z0_res = svldff1uw_gather_s64index_u64 (p0, x0, z0), -+ z0_res = svldff1uw_gather_index_u64 (p0, x0, z0)) -+ -+/* -+** ldff1uw_gather_tied1_u64_s64index: -+** ldff1w z0\.d, p0/z, \[x0, z0\.d, lsl 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uw_gather_tied1_u64_s64index, svuint64_t, uint32_t, svint64_t, -+ z0_res = svldff1uw_gather_s64index_u64 (p0, x0, z0), -+ z0_res = svldff1uw_gather_index_u64 (p0, x0, z0)) -+ -+/* -+** ldff1uw_gather_untied_u64_s64index: -+** ldff1w z0\.d, p0/z, \[x0, z1\.d, lsl 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uw_gather_untied_u64_s64index, svuint64_t, uint32_t, svint64_t, -+ z0_res = svldff1uw_gather_s64index_u64 (p0, x0, z1), -+ z0_res = svldff1uw_gather_index_u64 (p0, x0, z1)) -+ -+/* -+** ldff1uw_gather_ext_u64_s64index: -+** ldff1w z0\.d, p0/z, \[x0, z1\.d, sxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uw_gather_ext_u64_s64index, svuint64_t, uint32_t, svint64_t, -+ z0_res = svldff1uw_gather_s64index_u64 (p0, x0, svextw_s64_x (p0, z1)), -+ z0_res = svldff1uw_gather_index_u64 (p0, x0, svextw_x (p0, z1))) -+ -+/* -+** ldff1uw_gather_x0_u64_u64index: -+** ldff1w z0\.d, p0/z, \[x0, z0\.d, lsl 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uw_gather_x0_u64_u64index, svuint64_t, uint32_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64index_u64 (p0, x0, z0), -+ z0_res = svldff1uw_gather_index_u64 (p0, x0, z0)) -+ -+/* -+** ldff1uw_gather_tied1_u64_u64index: -+** ldff1w z0\.d, p0/z, \[x0, z0\.d, lsl 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uw_gather_tied1_u64_u64index, svuint64_t, uint32_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64index_u64 (p0, x0, z0), -+ z0_res = svldff1uw_gather_index_u64 (p0, x0, z0)) -+ -+/* -+** ldff1uw_gather_untied_u64_u64index: -+** ldff1w z0\.d, p0/z, \[x0, z1\.d, lsl 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uw_gather_untied_u64_u64index, svuint64_t, uint32_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64index_u64 (p0, x0, z1), -+ z0_res = svldff1uw_gather_index_u64 (p0, x0, z1)) -+ -+/* -+** ldff1uw_gather_ext_u64_u64index: -+** ldff1w z0\.d, p0/z, \[x0, z1\.d, uxtw 2\] -+** ret -+*/ -+TEST_LOAD_GATHER_SZ (ldff1uw_gather_ext_u64_u64index, svuint64_t, uint32_t, svuint64_t, -+ z0_res = svldff1uw_gather_u64index_u64 (p0, x0, svextw_u64_x (p0, z1)), -+ z0_res = svldff1uw_gather_index_u64 (p0, x0, svextw_x (p0, z1))) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_s64.c -new file mode 100644 -index 000000000..ac9779899 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_s64.c -@@ -0,0 +1,86 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1uw_s64_base: -+** ldff1w z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1uw_s64_base, svint64_t, uint32_t, -+ z0 = svldff1uw_s64 (p0, x0), -+ z0 = svldff1uw_s64 (p0, x0)) -+ -+/* -+** ldff1uw_s64_index: -+** ldff1w z0\.d, p0/z, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_LOAD (ldff1uw_s64_index, svint64_t, uint32_t, -+ z0 = svldff1uw_s64 (p0, x0 + x1), -+ z0 = svldff1uw_s64 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1uw_s64_1: -+** inch x0 -+** ldff1w z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1uw_s64_1, svint64_t, uint32_t, -+ z0 = svldff1uw_s64 (p0, x0 + svcntd ()), -+ z0 = svldff1uw_s64 (p0, x0 + svcntd ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1uw_s64_m1: -+** dech x0 -+** ldff1w z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1uw_s64_m1, svint64_t, uint32_t, -+ z0 = svldff1uw_s64 (p0, x0 - svcntd ()), -+ z0 = svldff1uw_s64 (p0, x0 - svcntd ())) -+ -+/* -+** ldff1uw_vnum_s64_0: -+** ldff1w z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1uw_vnum_s64_0, svint64_t, uint32_t, -+ z0 = svldff1uw_vnum_s64 (p0, x0, 0), -+ z0 = svldff1uw_vnum_s64 (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1uw_vnum_s64_1: -+** inch x0 -+** ldff1w z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1uw_vnum_s64_1, svint64_t, uint32_t, -+ z0 = svldff1uw_vnum_s64 (p0, x0, 1), -+ z0 = svldff1uw_vnum_s64 (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1uw_vnum_s64_m1: -+** dech x0 -+** ldff1w z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1uw_vnum_s64_m1, svint64_t, uint32_t, -+ z0 = svldff1uw_vnum_s64 (p0, x0, -1), -+ z0 = svldff1uw_vnum_s64 (p0, x0, -1)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ldff1uw_vnum_s64_x1: -+** cnth (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ldff1w z0\.d, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldff1uw_vnum_s64_x1, svint64_t, uint32_t, -+ z0 = svldff1uw_vnum_s64 (p0, x0, x1), -+ z0 = svldff1uw_vnum_s64 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_u64.c -new file mode 100644 -index 000000000..c7ab06171 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldff1uw_u64.c -@@ -0,0 +1,86 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldff1uw_u64_base: -+** ldff1w z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1uw_u64_base, svuint64_t, uint32_t, -+ z0 = svldff1uw_u64 (p0, x0), -+ z0 = svldff1uw_u64 (p0, x0)) -+ -+/* -+** ldff1uw_u64_index: -+** ldff1w z0\.d, p0/z, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_LOAD (ldff1uw_u64_index, svuint64_t, uint32_t, -+ z0 = svldff1uw_u64 (p0, x0 + x1), -+ z0 = svldff1uw_u64 (p0, x0 + x1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1uw_u64_1: -+** inch x0 -+** ldff1w z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1uw_u64_1, svuint64_t, uint32_t, -+ z0 = svldff1uw_u64 (p0, x0 + svcntd ()), -+ z0 = svldff1uw_u64 (p0, x0 + svcntd ())) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1uw_u64_m1: -+** dech x0 -+** ldff1w z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1uw_u64_m1, svuint64_t, uint32_t, -+ z0 = svldff1uw_u64 (p0, x0 - svcntd ()), -+ z0 = svldff1uw_u64 (p0, x0 - svcntd ())) -+ -+/* -+** ldff1uw_vnum_u64_0: -+** ldff1w z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1uw_vnum_u64_0, svuint64_t, uint32_t, -+ z0 = svldff1uw_vnum_u64 (p0, x0, 0), -+ z0 = svldff1uw_vnum_u64 (p0, x0, 0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1uw_vnum_u64_1: -+** inch x0 -+** ldff1w z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1uw_vnum_u64_1, svuint64_t, uint32_t, -+ z0 = svldff1uw_vnum_u64 (p0, x0, 1), -+ z0 = svldff1uw_vnum_u64 (p0, x0, 1)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldff1uw_vnum_u64_m1: -+** dech x0 -+** ldff1w z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldff1uw_vnum_u64_m1, svuint64_t, uint32_t, -+ z0 = svldff1uw_vnum_u64 (p0, x0, -1), -+ z0 = svldff1uw_vnum_u64 (p0, x0, -1)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ldff1uw_vnum_u64_x1: -+** cnth (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ldff1w z0\.d, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldff1uw_vnum_u64_x1, svuint64_t, uint32_t, -+ z0 = svldff1uw_vnum_u64 (p0, x0, x1), -+ z0 = svldff1uw_vnum_u64 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_bf16.c -new file mode 100644 -index 000000000..947a896e7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_bf16.c -@@ -0,0 +1,154 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldnf1_bf16_base: -+** ldnf1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_bf16_base, svbfloat16_t, bfloat16_t, -+ z0 = svldnf1_bf16 (p0, x0), -+ z0 = svldnf1 (p0, x0)) -+ -+/* -+** ldnf1_bf16_index: -+** add (x[0-9]+), x0, x1, lsl 1 -+** ldnf1h z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ldnf1_bf16_index, svbfloat16_t, bfloat16_t, -+ z0 = svldnf1_bf16 (p0, x0 + x1), -+ z0 = svldnf1 (p0, x0 + x1)) -+ -+/* -+** ldnf1_bf16_1: -+** ldnf1h z0\.h, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_bf16_1, svbfloat16_t, bfloat16_t, -+ z0 = svldnf1_bf16 (p0, x0 + svcnth ()), -+ z0 = svldnf1 (p0, x0 + svcnth ())) -+ -+/* -+** ldnf1_bf16_7: -+** ldnf1h z0\.h, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_bf16_7, svbfloat16_t, bfloat16_t, -+ z0 = svldnf1_bf16 (p0, x0 + svcnth () * 7), -+ z0 = svldnf1 (p0, x0 + svcnth () * 7)) -+ -+/* -+** ldnf1_bf16_8: -+** incb x0, all, mul #8 -+** ldnf1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_bf16_8, svbfloat16_t, bfloat16_t, -+ z0 = svldnf1_bf16 (p0, x0 + svcnth () * 8), -+ z0 = svldnf1 (p0, x0 + svcnth () * 8)) -+ -+/* -+** ldnf1_bf16_m1: -+** ldnf1h z0\.h, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_bf16_m1, svbfloat16_t, bfloat16_t, -+ z0 = svldnf1_bf16 (p0, x0 - svcnth ()), -+ z0 = svldnf1 (p0, x0 - svcnth ())) -+ -+/* -+** ldnf1_bf16_m8: -+** ldnf1h z0\.h, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_bf16_m8, svbfloat16_t, bfloat16_t, -+ z0 = svldnf1_bf16 (p0, x0 - svcnth () * 8), -+ z0 = svldnf1 (p0, x0 - svcnth () * 8)) -+ -+/* -+** ldnf1_bf16_m9: -+** decb x0, all, mul #9 -+** ldnf1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_bf16_m9, svbfloat16_t, bfloat16_t, -+ z0 = svldnf1_bf16 (p0, x0 - svcnth () * 9), -+ z0 = svldnf1 (p0, x0 - svcnth () * 9)) -+ -+/* -+** ldnf1_vnum_bf16_0: -+** ldnf1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_bf16_0, svbfloat16_t, bfloat16_t, -+ z0 = svldnf1_vnum_bf16 (p0, x0, 0), -+ z0 = svldnf1_vnum (p0, x0, 0)) -+ -+/* -+** ldnf1_vnum_bf16_1: -+** ldnf1h z0\.h, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_bf16_1, svbfloat16_t, bfloat16_t, -+ z0 = svldnf1_vnum_bf16 (p0, x0, 1), -+ z0 = svldnf1_vnum (p0, x0, 1)) -+ -+/* -+** ldnf1_vnum_bf16_7: -+** ldnf1h z0\.h, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_bf16_7, svbfloat16_t, bfloat16_t, -+ z0 = svldnf1_vnum_bf16 (p0, x0, 7), -+ z0 = svldnf1_vnum (p0, x0, 7)) -+ -+/* -+** ldnf1_vnum_bf16_8: -+** incb x0, all, mul #8 -+** ldnf1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_bf16_8, svbfloat16_t, bfloat16_t, -+ z0 = svldnf1_vnum_bf16 (p0, x0, 8), -+ z0 = svldnf1_vnum (p0, x0, 8)) -+ -+/* -+** ldnf1_vnum_bf16_m1: -+** ldnf1h z0\.h, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_bf16_m1, svbfloat16_t, bfloat16_t, -+ z0 = svldnf1_vnum_bf16 (p0, x0, -1), -+ z0 = svldnf1_vnum (p0, x0, -1)) -+ -+/* -+** ldnf1_vnum_bf16_m8: -+** ldnf1h z0\.h, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_bf16_m8, svbfloat16_t, bfloat16_t, -+ z0 = svldnf1_vnum_bf16 (p0, x0, -8), -+ z0 = svldnf1_vnum (p0, x0, -8)) -+ -+/* -+** ldnf1_vnum_bf16_m9: -+** decb x0, all, mul #9 -+** ldnf1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_bf16_m9, svbfloat16_t, bfloat16_t, -+ z0 = svldnf1_vnum_bf16 (p0, x0, -9), -+ z0 = svldnf1_vnum (p0, x0, -9)) -+ -+/* -+** ldnf1_vnum_bf16_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldnf1h z0\.h, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_bf16_x1, svbfloat16_t, bfloat16_t, -+ z0 = svldnf1_vnum_bf16 (p0, x0, x1), -+ z0 = svldnf1_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_f16.c -new file mode 100644 -index 000000000..cf0178688 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_f16.c -@@ -0,0 +1,154 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldnf1_f16_base: -+** ldnf1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_f16_base, svfloat16_t, float16_t, -+ z0 = svldnf1_f16 (p0, x0), -+ z0 = svldnf1 (p0, x0)) -+ -+/* -+** ldnf1_f16_index: -+** add (x[0-9]+), x0, x1, lsl 1 -+** ldnf1h z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ldnf1_f16_index, svfloat16_t, float16_t, -+ z0 = svldnf1_f16 (p0, x0 + x1), -+ z0 = svldnf1 (p0, x0 + x1)) -+ -+/* -+** ldnf1_f16_1: -+** ldnf1h z0\.h, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_f16_1, svfloat16_t, float16_t, -+ z0 = svldnf1_f16 (p0, x0 + svcnth ()), -+ z0 = svldnf1 (p0, x0 + svcnth ())) -+ -+/* -+** ldnf1_f16_7: -+** ldnf1h z0\.h, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_f16_7, svfloat16_t, float16_t, -+ z0 = svldnf1_f16 (p0, x0 + svcnth () * 7), -+ z0 = svldnf1 (p0, x0 + svcnth () * 7)) -+ -+/* -+** ldnf1_f16_8: -+** incb x0, all, mul #8 -+** ldnf1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_f16_8, svfloat16_t, float16_t, -+ z0 = svldnf1_f16 (p0, x0 + svcnth () * 8), -+ z0 = svldnf1 (p0, x0 + svcnth () * 8)) -+ -+/* -+** ldnf1_f16_m1: -+** ldnf1h z0\.h, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_f16_m1, svfloat16_t, float16_t, -+ z0 = svldnf1_f16 (p0, x0 - svcnth ()), -+ z0 = svldnf1 (p0, x0 - svcnth ())) -+ -+/* -+** ldnf1_f16_m8: -+** ldnf1h z0\.h, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_f16_m8, svfloat16_t, float16_t, -+ z0 = svldnf1_f16 (p0, x0 - svcnth () * 8), -+ z0 = svldnf1 (p0, x0 - svcnth () * 8)) -+ -+/* -+** ldnf1_f16_m9: -+** decb x0, all, mul #9 -+** ldnf1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_f16_m9, svfloat16_t, float16_t, -+ z0 = svldnf1_f16 (p0, x0 - svcnth () * 9), -+ z0 = svldnf1 (p0, x0 - svcnth () * 9)) -+ -+/* -+** ldnf1_vnum_f16_0: -+** ldnf1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_f16_0, svfloat16_t, float16_t, -+ z0 = svldnf1_vnum_f16 (p0, x0, 0), -+ z0 = svldnf1_vnum (p0, x0, 0)) -+ -+/* -+** ldnf1_vnum_f16_1: -+** ldnf1h z0\.h, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_f16_1, svfloat16_t, float16_t, -+ z0 = svldnf1_vnum_f16 (p0, x0, 1), -+ z0 = svldnf1_vnum (p0, x0, 1)) -+ -+/* -+** ldnf1_vnum_f16_7: -+** ldnf1h z0\.h, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_f16_7, svfloat16_t, float16_t, -+ z0 = svldnf1_vnum_f16 (p0, x0, 7), -+ z0 = svldnf1_vnum (p0, x0, 7)) -+ -+/* -+** ldnf1_vnum_f16_8: -+** incb x0, all, mul #8 -+** ldnf1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_f16_8, svfloat16_t, float16_t, -+ z0 = svldnf1_vnum_f16 (p0, x0, 8), -+ z0 = svldnf1_vnum (p0, x0, 8)) -+ -+/* -+** ldnf1_vnum_f16_m1: -+** ldnf1h z0\.h, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_f16_m1, svfloat16_t, float16_t, -+ z0 = svldnf1_vnum_f16 (p0, x0, -1), -+ z0 = svldnf1_vnum (p0, x0, -1)) -+ -+/* -+** ldnf1_vnum_f16_m8: -+** ldnf1h z0\.h, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_f16_m8, svfloat16_t, float16_t, -+ z0 = svldnf1_vnum_f16 (p0, x0, -8), -+ z0 = svldnf1_vnum (p0, x0, -8)) -+ -+/* -+** ldnf1_vnum_f16_m9: -+** decb x0, all, mul #9 -+** ldnf1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_f16_m9, svfloat16_t, float16_t, -+ z0 = svldnf1_vnum_f16 (p0, x0, -9), -+ z0 = svldnf1_vnum (p0, x0, -9)) -+ -+/* -+** ldnf1_vnum_f16_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldnf1h z0\.h, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_f16_x1, svfloat16_t, float16_t, -+ z0 = svldnf1_vnum_f16 (p0, x0, x1), -+ z0 = svldnf1_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_f32.c -new file mode 100644 -index 000000000..83b73ec8e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_f32.c -@@ -0,0 +1,154 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldnf1_f32_base: -+** ldnf1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_f32_base, svfloat32_t, float32_t, -+ z0 = svldnf1_f32 (p0, x0), -+ z0 = svldnf1 (p0, x0)) -+ -+/* -+** ldnf1_f32_index: -+** add (x[0-9]+), x0, x1, lsl 2 -+** ldnf1w z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ldnf1_f32_index, svfloat32_t, float32_t, -+ z0 = svldnf1_f32 (p0, x0 + x1), -+ z0 = svldnf1 (p0, x0 + x1)) -+ -+/* -+** ldnf1_f32_1: -+** ldnf1w z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_f32_1, svfloat32_t, float32_t, -+ z0 = svldnf1_f32 (p0, x0 + svcntw ()), -+ z0 = svldnf1 (p0, x0 + svcntw ())) -+ -+/* -+** ldnf1_f32_7: -+** ldnf1w z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_f32_7, svfloat32_t, float32_t, -+ z0 = svldnf1_f32 (p0, x0 + svcntw () * 7), -+ z0 = svldnf1 (p0, x0 + svcntw () * 7)) -+ -+/* -+** ldnf1_f32_8: -+** incb x0, all, mul #8 -+** ldnf1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_f32_8, svfloat32_t, float32_t, -+ z0 = svldnf1_f32 (p0, x0 + svcntw () * 8), -+ z0 = svldnf1 (p0, x0 + svcntw () * 8)) -+ -+/* -+** ldnf1_f32_m1: -+** ldnf1w z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_f32_m1, svfloat32_t, float32_t, -+ z0 = svldnf1_f32 (p0, x0 - svcntw ()), -+ z0 = svldnf1 (p0, x0 - svcntw ())) -+ -+/* -+** ldnf1_f32_m8: -+** ldnf1w z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_f32_m8, svfloat32_t, float32_t, -+ z0 = svldnf1_f32 (p0, x0 - svcntw () * 8), -+ z0 = svldnf1 (p0, x0 - svcntw () * 8)) -+ -+/* -+** ldnf1_f32_m9: -+** decb x0, all, mul #9 -+** ldnf1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_f32_m9, svfloat32_t, float32_t, -+ z0 = svldnf1_f32 (p0, x0 - svcntw () * 9), -+ z0 = svldnf1 (p0, x0 - svcntw () * 9)) -+ -+/* -+** ldnf1_vnum_f32_0: -+** ldnf1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_f32_0, svfloat32_t, float32_t, -+ z0 = svldnf1_vnum_f32 (p0, x0, 0), -+ z0 = svldnf1_vnum (p0, x0, 0)) -+ -+/* -+** ldnf1_vnum_f32_1: -+** ldnf1w z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_f32_1, svfloat32_t, float32_t, -+ z0 = svldnf1_vnum_f32 (p0, x0, 1), -+ z0 = svldnf1_vnum (p0, x0, 1)) -+ -+/* -+** ldnf1_vnum_f32_7: -+** ldnf1w z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_f32_7, svfloat32_t, float32_t, -+ z0 = svldnf1_vnum_f32 (p0, x0, 7), -+ z0 = svldnf1_vnum (p0, x0, 7)) -+ -+/* -+** ldnf1_vnum_f32_8: -+** incb x0, all, mul #8 -+** ldnf1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_f32_8, svfloat32_t, float32_t, -+ z0 = svldnf1_vnum_f32 (p0, x0, 8), -+ z0 = svldnf1_vnum (p0, x0, 8)) -+ -+/* -+** ldnf1_vnum_f32_m1: -+** ldnf1w z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_f32_m1, svfloat32_t, float32_t, -+ z0 = svldnf1_vnum_f32 (p0, x0, -1), -+ z0 = svldnf1_vnum (p0, x0, -1)) -+ -+/* -+** ldnf1_vnum_f32_m8: -+** ldnf1w z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_f32_m8, svfloat32_t, float32_t, -+ z0 = svldnf1_vnum_f32 (p0, x0, -8), -+ z0 = svldnf1_vnum (p0, x0, -8)) -+ -+/* -+** ldnf1_vnum_f32_m9: -+** decb x0, all, mul #9 -+** ldnf1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_f32_m9, svfloat32_t, float32_t, -+ z0 = svldnf1_vnum_f32 (p0, x0, -9), -+ z0 = svldnf1_vnum (p0, x0, -9)) -+ -+/* -+** ldnf1_vnum_f32_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldnf1w z0\.s, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_f32_x1, svfloat32_t, float32_t, -+ z0 = svldnf1_vnum_f32 (p0, x0, x1), -+ z0 = svldnf1_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_f64.c -new file mode 100644 -index 000000000..778096e82 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_f64.c -@@ -0,0 +1,154 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldnf1_f64_base: -+** ldnf1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_f64_base, svfloat64_t, float64_t, -+ z0 = svldnf1_f64 (p0, x0), -+ z0 = svldnf1 (p0, x0)) -+ -+/* -+** ldnf1_f64_index: -+** add (x[0-9]+), x0, x1, lsl 3 -+** ldnf1d z0\.d, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ldnf1_f64_index, svfloat64_t, float64_t, -+ z0 = svldnf1_f64 (p0, x0 + x1), -+ z0 = svldnf1 (p0, x0 + x1)) -+ -+/* -+** ldnf1_f64_1: -+** ldnf1d z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_f64_1, svfloat64_t, float64_t, -+ z0 = svldnf1_f64 (p0, x0 + svcntd ()), -+ z0 = svldnf1 (p0, x0 + svcntd ())) -+ -+/* -+** ldnf1_f64_7: -+** ldnf1d z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_f64_7, svfloat64_t, float64_t, -+ z0 = svldnf1_f64 (p0, x0 + svcntd () * 7), -+ z0 = svldnf1 (p0, x0 + svcntd () * 7)) -+ -+/* -+** ldnf1_f64_8: -+** incb x0, all, mul #8 -+** ldnf1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_f64_8, svfloat64_t, float64_t, -+ z0 = svldnf1_f64 (p0, x0 + svcntd () * 8), -+ z0 = svldnf1 (p0, x0 + svcntd () * 8)) -+ -+/* -+** ldnf1_f64_m1: -+** ldnf1d z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_f64_m1, svfloat64_t, float64_t, -+ z0 = svldnf1_f64 (p0, x0 - svcntd ()), -+ z0 = svldnf1 (p0, x0 - svcntd ())) -+ -+/* -+** ldnf1_f64_m8: -+** ldnf1d z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_f64_m8, svfloat64_t, float64_t, -+ z0 = svldnf1_f64 (p0, x0 - svcntd () * 8), -+ z0 = svldnf1 (p0, x0 - svcntd () * 8)) -+ -+/* -+** ldnf1_f64_m9: -+** decb x0, all, mul #9 -+** ldnf1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_f64_m9, svfloat64_t, float64_t, -+ z0 = svldnf1_f64 (p0, x0 - svcntd () * 9), -+ z0 = svldnf1 (p0, x0 - svcntd () * 9)) -+ -+/* -+** ldnf1_vnum_f64_0: -+** ldnf1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_f64_0, svfloat64_t, float64_t, -+ z0 = svldnf1_vnum_f64 (p0, x0, 0), -+ z0 = svldnf1_vnum (p0, x0, 0)) -+ -+/* -+** ldnf1_vnum_f64_1: -+** ldnf1d z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_f64_1, svfloat64_t, float64_t, -+ z0 = svldnf1_vnum_f64 (p0, x0, 1), -+ z0 = svldnf1_vnum (p0, x0, 1)) -+ -+/* -+** ldnf1_vnum_f64_7: -+** ldnf1d z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_f64_7, svfloat64_t, float64_t, -+ z0 = svldnf1_vnum_f64 (p0, x0, 7), -+ z0 = svldnf1_vnum (p0, x0, 7)) -+ -+/* -+** ldnf1_vnum_f64_8: -+** incb x0, all, mul #8 -+** ldnf1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_f64_8, svfloat64_t, float64_t, -+ z0 = svldnf1_vnum_f64 (p0, x0, 8), -+ z0 = svldnf1_vnum (p0, x0, 8)) -+ -+/* -+** ldnf1_vnum_f64_m1: -+** ldnf1d z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_f64_m1, svfloat64_t, float64_t, -+ z0 = svldnf1_vnum_f64 (p0, x0, -1), -+ z0 = svldnf1_vnum (p0, x0, -1)) -+ -+/* -+** ldnf1_vnum_f64_m8: -+** ldnf1d z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_f64_m8, svfloat64_t, float64_t, -+ z0 = svldnf1_vnum_f64 (p0, x0, -8), -+ z0 = svldnf1_vnum (p0, x0, -8)) -+ -+/* -+** ldnf1_vnum_f64_m9: -+** decb x0, all, mul #9 -+** ldnf1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_f64_m9, svfloat64_t, float64_t, -+ z0 = svldnf1_vnum_f64 (p0, x0, -9), -+ z0 = svldnf1_vnum (p0, x0, -9)) -+ -+/* -+** ldnf1_vnum_f64_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldnf1d z0\.d, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_f64_x1, svfloat64_t, float64_t, -+ z0 = svldnf1_vnum_f64 (p0, x0, x1), -+ z0 = svldnf1_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s16.c -new file mode 100644 -index 000000000..592c8237d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s16.c -@@ -0,0 +1,154 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldnf1_s16_base: -+** ldnf1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_s16_base, svint16_t, int16_t, -+ z0 = svldnf1_s16 (p0, x0), -+ z0 = svldnf1 (p0, x0)) -+ -+/* -+** ldnf1_s16_index: -+** add (x[0-9]+), x0, x1, lsl 1 -+** ldnf1h z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ldnf1_s16_index, svint16_t, int16_t, -+ z0 = svldnf1_s16 (p0, x0 + x1), -+ z0 = svldnf1 (p0, x0 + x1)) -+ -+/* -+** ldnf1_s16_1: -+** ldnf1h z0\.h, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_s16_1, svint16_t, int16_t, -+ z0 = svldnf1_s16 (p0, x0 + svcnth ()), -+ z0 = svldnf1 (p0, x0 + svcnth ())) -+ -+/* -+** ldnf1_s16_7: -+** ldnf1h z0\.h, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_s16_7, svint16_t, int16_t, -+ z0 = svldnf1_s16 (p0, x0 + svcnth () * 7), -+ z0 = svldnf1 (p0, x0 + svcnth () * 7)) -+ -+/* -+** ldnf1_s16_8: -+** incb x0, all, mul #8 -+** ldnf1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_s16_8, svint16_t, int16_t, -+ z0 = svldnf1_s16 (p0, x0 + svcnth () * 8), -+ z0 = svldnf1 (p0, x0 + svcnth () * 8)) -+ -+/* -+** ldnf1_s16_m1: -+** ldnf1h z0\.h, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_s16_m1, svint16_t, int16_t, -+ z0 = svldnf1_s16 (p0, x0 - svcnth ()), -+ z0 = svldnf1 (p0, x0 - svcnth ())) -+ -+/* -+** ldnf1_s16_m8: -+** ldnf1h z0\.h, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_s16_m8, svint16_t, int16_t, -+ z0 = svldnf1_s16 (p0, x0 - svcnth () * 8), -+ z0 = svldnf1 (p0, x0 - svcnth () * 8)) -+ -+/* -+** ldnf1_s16_m9: -+** decb x0, all, mul #9 -+** ldnf1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_s16_m9, svint16_t, int16_t, -+ z0 = svldnf1_s16 (p0, x0 - svcnth () * 9), -+ z0 = svldnf1 (p0, x0 - svcnth () * 9)) -+ -+/* -+** ldnf1_vnum_s16_0: -+** ldnf1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_s16_0, svint16_t, int16_t, -+ z0 = svldnf1_vnum_s16 (p0, x0, 0), -+ z0 = svldnf1_vnum (p0, x0, 0)) -+ -+/* -+** ldnf1_vnum_s16_1: -+** ldnf1h z0\.h, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_s16_1, svint16_t, int16_t, -+ z0 = svldnf1_vnum_s16 (p0, x0, 1), -+ z0 = svldnf1_vnum (p0, x0, 1)) -+ -+/* -+** ldnf1_vnum_s16_7: -+** ldnf1h z0\.h, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_s16_7, svint16_t, int16_t, -+ z0 = svldnf1_vnum_s16 (p0, x0, 7), -+ z0 = svldnf1_vnum (p0, x0, 7)) -+ -+/* -+** ldnf1_vnum_s16_8: -+** incb x0, all, mul #8 -+** ldnf1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_s16_8, svint16_t, int16_t, -+ z0 = svldnf1_vnum_s16 (p0, x0, 8), -+ z0 = svldnf1_vnum (p0, x0, 8)) -+ -+/* -+** ldnf1_vnum_s16_m1: -+** ldnf1h z0\.h, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_s16_m1, svint16_t, int16_t, -+ z0 = svldnf1_vnum_s16 (p0, x0, -1), -+ z0 = svldnf1_vnum (p0, x0, -1)) -+ -+/* -+** ldnf1_vnum_s16_m8: -+** ldnf1h z0\.h, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_s16_m8, svint16_t, int16_t, -+ z0 = svldnf1_vnum_s16 (p0, x0, -8), -+ z0 = svldnf1_vnum (p0, x0, -8)) -+ -+/* -+** ldnf1_vnum_s16_m9: -+** decb x0, all, mul #9 -+** ldnf1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_s16_m9, svint16_t, int16_t, -+ z0 = svldnf1_vnum_s16 (p0, x0, -9), -+ z0 = svldnf1_vnum (p0, x0, -9)) -+ -+/* -+** ldnf1_vnum_s16_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldnf1h z0\.h, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_s16_x1, svint16_t, int16_t, -+ z0 = svldnf1_vnum_s16 (p0, x0, x1), -+ z0 = svldnf1_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s32.c -new file mode 100644 -index 000000000..634092af8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s32.c -@@ -0,0 +1,154 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldnf1_s32_base: -+** ldnf1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_s32_base, svint32_t, int32_t, -+ z0 = svldnf1_s32 (p0, x0), -+ z0 = svldnf1 (p0, x0)) -+ -+/* -+** ldnf1_s32_index: -+** add (x[0-9]+), x0, x1, lsl 2 -+** ldnf1w z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ldnf1_s32_index, svint32_t, int32_t, -+ z0 = svldnf1_s32 (p0, x0 + x1), -+ z0 = svldnf1 (p0, x0 + x1)) -+ -+/* -+** ldnf1_s32_1: -+** ldnf1w z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_s32_1, svint32_t, int32_t, -+ z0 = svldnf1_s32 (p0, x0 + svcntw ()), -+ z0 = svldnf1 (p0, x0 + svcntw ())) -+ -+/* -+** ldnf1_s32_7: -+** ldnf1w z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_s32_7, svint32_t, int32_t, -+ z0 = svldnf1_s32 (p0, x0 + svcntw () * 7), -+ z0 = svldnf1 (p0, x0 + svcntw () * 7)) -+ -+/* -+** ldnf1_s32_8: -+** incb x0, all, mul #8 -+** ldnf1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_s32_8, svint32_t, int32_t, -+ z0 = svldnf1_s32 (p0, x0 + svcntw () * 8), -+ z0 = svldnf1 (p0, x0 + svcntw () * 8)) -+ -+/* -+** ldnf1_s32_m1: -+** ldnf1w z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_s32_m1, svint32_t, int32_t, -+ z0 = svldnf1_s32 (p0, x0 - svcntw ()), -+ z0 = svldnf1 (p0, x0 - svcntw ())) -+ -+/* -+** ldnf1_s32_m8: -+** ldnf1w z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_s32_m8, svint32_t, int32_t, -+ z0 = svldnf1_s32 (p0, x0 - svcntw () * 8), -+ z0 = svldnf1 (p0, x0 - svcntw () * 8)) -+ -+/* -+** ldnf1_s32_m9: -+** decb x0, all, mul #9 -+** ldnf1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_s32_m9, svint32_t, int32_t, -+ z0 = svldnf1_s32 (p0, x0 - svcntw () * 9), -+ z0 = svldnf1 (p0, x0 - svcntw () * 9)) -+ -+/* -+** ldnf1_vnum_s32_0: -+** ldnf1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_s32_0, svint32_t, int32_t, -+ z0 = svldnf1_vnum_s32 (p0, x0, 0), -+ z0 = svldnf1_vnum (p0, x0, 0)) -+ -+/* -+** ldnf1_vnum_s32_1: -+** ldnf1w z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_s32_1, svint32_t, int32_t, -+ z0 = svldnf1_vnum_s32 (p0, x0, 1), -+ z0 = svldnf1_vnum (p0, x0, 1)) -+ -+/* -+** ldnf1_vnum_s32_7: -+** ldnf1w z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_s32_7, svint32_t, int32_t, -+ z0 = svldnf1_vnum_s32 (p0, x0, 7), -+ z0 = svldnf1_vnum (p0, x0, 7)) -+ -+/* -+** ldnf1_vnum_s32_8: -+** incb x0, all, mul #8 -+** ldnf1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_s32_8, svint32_t, int32_t, -+ z0 = svldnf1_vnum_s32 (p0, x0, 8), -+ z0 = svldnf1_vnum (p0, x0, 8)) -+ -+/* -+** ldnf1_vnum_s32_m1: -+** ldnf1w z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_s32_m1, svint32_t, int32_t, -+ z0 = svldnf1_vnum_s32 (p0, x0, -1), -+ z0 = svldnf1_vnum (p0, x0, -1)) -+ -+/* -+** ldnf1_vnum_s32_m8: -+** ldnf1w z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_s32_m8, svint32_t, int32_t, -+ z0 = svldnf1_vnum_s32 (p0, x0, -8), -+ z0 = svldnf1_vnum (p0, x0, -8)) -+ -+/* -+** ldnf1_vnum_s32_m9: -+** decb x0, all, mul #9 -+** ldnf1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_s32_m9, svint32_t, int32_t, -+ z0 = svldnf1_vnum_s32 (p0, x0, -9), -+ z0 = svldnf1_vnum (p0, x0, -9)) -+ -+/* -+** ldnf1_vnum_s32_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldnf1w z0\.s, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_s32_x1, svint32_t, int32_t, -+ z0 = svldnf1_vnum_s32 (p0, x0, x1), -+ z0 = svldnf1_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s64.c -new file mode 100644 -index 000000000..4a03f6676 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s64.c -@@ -0,0 +1,154 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldnf1_s64_base: -+** ldnf1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_s64_base, svint64_t, int64_t, -+ z0 = svldnf1_s64 (p0, x0), -+ z0 = svldnf1 (p0, x0)) -+ -+/* -+** ldnf1_s64_index: -+** add (x[0-9]+), x0, x1, lsl 3 -+** ldnf1d z0\.d, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ldnf1_s64_index, svint64_t, int64_t, -+ z0 = svldnf1_s64 (p0, x0 + x1), -+ z0 = svldnf1 (p0, x0 + x1)) -+ -+/* -+** ldnf1_s64_1: -+** ldnf1d z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_s64_1, svint64_t, int64_t, -+ z0 = svldnf1_s64 (p0, x0 + svcntd ()), -+ z0 = svldnf1 (p0, x0 + svcntd ())) -+ -+/* -+** ldnf1_s64_7: -+** ldnf1d z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_s64_7, svint64_t, int64_t, -+ z0 = svldnf1_s64 (p0, x0 + svcntd () * 7), -+ z0 = svldnf1 (p0, x0 + svcntd () * 7)) -+ -+/* -+** ldnf1_s64_8: -+** incb x0, all, mul #8 -+** ldnf1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_s64_8, svint64_t, int64_t, -+ z0 = svldnf1_s64 (p0, x0 + svcntd () * 8), -+ z0 = svldnf1 (p0, x0 + svcntd () * 8)) -+ -+/* -+** ldnf1_s64_m1: -+** ldnf1d z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_s64_m1, svint64_t, int64_t, -+ z0 = svldnf1_s64 (p0, x0 - svcntd ()), -+ z0 = svldnf1 (p0, x0 - svcntd ())) -+ -+/* -+** ldnf1_s64_m8: -+** ldnf1d z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_s64_m8, svint64_t, int64_t, -+ z0 = svldnf1_s64 (p0, x0 - svcntd () * 8), -+ z0 = svldnf1 (p0, x0 - svcntd () * 8)) -+ -+/* -+** ldnf1_s64_m9: -+** decb x0, all, mul #9 -+** ldnf1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_s64_m9, svint64_t, int64_t, -+ z0 = svldnf1_s64 (p0, x0 - svcntd () * 9), -+ z0 = svldnf1 (p0, x0 - svcntd () * 9)) -+ -+/* -+** ldnf1_vnum_s64_0: -+** ldnf1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_s64_0, svint64_t, int64_t, -+ z0 = svldnf1_vnum_s64 (p0, x0, 0), -+ z0 = svldnf1_vnum (p0, x0, 0)) -+ -+/* -+** ldnf1_vnum_s64_1: -+** ldnf1d z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_s64_1, svint64_t, int64_t, -+ z0 = svldnf1_vnum_s64 (p0, x0, 1), -+ z0 = svldnf1_vnum (p0, x0, 1)) -+ -+/* -+** ldnf1_vnum_s64_7: -+** ldnf1d z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_s64_7, svint64_t, int64_t, -+ z0 = svldnf1_vnum_s64 (p0, x0, 7), -+ z0 = svldnf1_vnum (p0, x0, 7)) -+ -+/* -+** ldnf1_vnum_s64_8: -+** incb x0, all, mul #8 -+** ldnf1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_s64_8, svint64_t, int64_t, -+ z0 = svldnf1_vnum_s64 (p0, x0, 8), -+ z0 = svldnf1_vnum (p0, x0, 8)) -+ -+/* -+** ldnf1_vnum_s64_m1: -+** ldnf1d z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_s64_m1, svint64_t, int64_t, -+ z0 = svldnf1_vnum_s64 (p0, x0, -1), -+ z0 = svldnf1_vnum (p0, x0, -1)) -+ -+/* -+** ldnf1_vnum_s64_m8: -+** ldnf1d z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_s64_m8, svint64_t, int64_t, -+ z0 = svldnf1_vnum_s64 (p0, x0, -8), -+ z0 = svldnf1_vnum (p0, x0, -8)) -+ -+/* -+** ldnf1_vnum_s64_m9: -+** decb x0, all, mul #9 -+** ldnf1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_s64_m9, svint64_t, int64_t, -+ z0 = svldnf1_vnum_s64 (p0, x0, -9), -+ z0 = svldnf1_vnum (p0, x0, -9)) -+ -+/* -+** ldnf1_vnum_s64_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldnf1d z0\.d, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_s64_x1, svint64_t, int64_t, -+ z0 = svldnf1_vnum_s64 (p0, x0, x1), -+ z0 = svldnf1_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s8.c -new file mode 100644 -index 000000000..162ee176a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_s8.c -@@ -0,0 +1,154 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldnf1_s8_base: -+** ldnf1b z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_s8_base, svint8_t, int8_t, -+ z0 = svldnf1_s8 (p0, x0), -+ z0 = svldnf1 (p0, x0)) -+ -+/* -+** ldnf1_s8_index: -+** add (x[0-9]+), x0, x1 -+** ldnf1b z0\.b, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ldnf1_s8_index, svint8_t, int8_t, -+ z0 = svldnf1_s8 (p0, x0 + x1), -+ z0 = svldnf1 (p0, x0 + x1)) -+ -+/* -+** ldnf1_s8_1: -+** ldnf1b z0\.b, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_s8_1, svint8_t, int8_t, -+ z0 = svldnf1_s8 (p0, x0 + svcntb ()), -+ z0 = svldnf1 (p0, x0 + svcntb ())) -+ -+/* -+** ldnf1_s8_7: -+** ldnf1b z0\.b, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_s8_7, svint8_t, int8_t, -+ z0 = svldnf1_s8 (p0, x0 + svcntb () * 7), -+ z0 = svldnf1 (p0, x0 + svcntb () * 7)) -+ -+/* -+** ldnf1_s8_8: -+** incb x0, all, mul #8 -+** ldnf1b z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_s8_8, svint8_t, int8_t, -+ z0 = svldnf1_s8 (p0, x0 + svcntb () * 8), -+ z0 = svldnf1 (p0, x0 + svcntb () * 8)) -+ -+/* -+** ldnf1_s8_m1: -+** ldnf1b z0\.b, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_s8_m1, svint8_t, int8_t, -+ z0 = svldnf1_s8 (p0, x0 - svcntb ()), -+ z0 = svldnf1 (p0, x0 - svcntb ())) -+ -+/* -+** ldnf1_s8_m8: -+** ldnf1b z0\.b, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_s8_m8, svint8_t, int8_t, -+ z0 = svldnf1_s8 (p0, x0 - svcntb () * 8), -+ z0 = svldnf1 (p0, x0 - svcntb () * 8)) -+ -+/* -+** ldnf1_s8_m9: -+** decb x0, all, mul #9 -+** ldnf1b z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_s8_m9, svint8_t, int8_t, -+ z0 = svldnf1_s8 (p0, x0 - svcntb () * 9), -+ z0 = svldnf1 (p0, x0 - svcntb () * 9)) -+ -+/* -+** ldnf1_vnum_s8_0: -+** ldnf1b z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_s8_0, svint8_t, int8_t, -+ z0 = svldnf1_vnum_s8 (p0, x0, 0), -+ z0 = svldnf1_vnum (p0, x0, 0)) -+ -+/* -+** ldnf1_vnum_s8_1: -+** ldnf1b z0\.b, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_s8_1, svint8_t, int8_t, -+ z0 = svldnf1_vnum_s8 (p0, x0, 1), -+ z0 = svldnf1_vnum (p0, x0, 1)) -+ -+/* -+** ldnf1_vnum_s8_7: -+** ldnf1b z0\.b, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_s8_7, svint8_t, int8_t, -+ z0 = svldnf1_vnum_s8 (p0, x0, 7), -+ z0 = svldnf1_vnum (p0, x0, 7)) -+ -+/* -+** ldnf1_vnum_s8_8: -+** incb x0, all, mul #8 -+** ldnf1b z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_s8_8, svint8_t, int8_t, -+ z0 = svldnf1_vnum_s8 (p0, x0, 8), -+ z0 = svldnf1_vnum (p0, x0, 8)) -+ -+/* -+** ldnf1_vnum_s8_m1: -+** ldnf1b z0\.b, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_s8_m1, svint8_t, int8_t, -+ z0 = svldnf1_vnum_s8 (p0, x0, -1), -+ z0 = svldnf1_vnum (p0, x0, -1)) -+ -+/* -+** ldnf1_vnum_s8_m8: -+** ldnf1b z0\.b, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_s8_m8, svint8_t, int8_t, -+ z0 = svldnf1_vnum_s8 (p0, x0, -8), -+ z0 = svldnf1_vnum (p0, x0, -8)) -+ -+/* -+** ldnf1_vnum_s8_m9: -+** decb x0, all, mul #9 -+** ldnf1b z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_s8_m9, svint8_t, int8_t, -+ z0 = svldnf1_vnum_s8 (p0, x0, -9), -+ z0 = svldnf1_vnum (p0, x0, -9)) -+ -+/* -+** ldnf1_vnum_s8_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldnf1b z0\.b, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_s8_x1, svint8_t, int8_t, -+ z0 = svldnf1_vnum_s8 (p0, x0, x1), -+ z0 = svldnf1_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u16.c -new file mode 100644 -index 000000000..e920ac43b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u16.c -@@ -0,0 +1,154 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldnf1_u16_base: -+** ldnf1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_u16_base, svuint16_t, uint16_t, -+ z0 = svldnf1_u16 (p0, x0), -+ z0 = svldnf1 (p0, x0)) -+ -+/* -+** ldnf1_u16_index: -+** add (x[0-9]+), x0, x1, lsl 1 -+** ldnf1h z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ldnf1_u16_index, svuint16_t, uint16_t, -+ z0 = svldnf1_u16 (p0, x0 + x1), -+ z0 = svldnf1 (p0, x0 + x1)) -+ -+/* -+** ldnf1_u16_1: -+** ldnf1h z0\.h, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_u16_1, svuint16_t, uint16_t, -+ z0 = svldnf1_u16 (p0, x0 + svcnth ()), -+ z0 = svldnf1 (p0, x0 + svcnth ())) -+ -+/* -+** ldnf1_u16_7: -+** ldnf1h z0\.h, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_u16_7, svuint16_t, uint16_t, -+ z0 = svldnf1_u16 (p0, x0 + svcnth () * 7), -+ z0 = svldnf1 (p0, x0 + svcnth () * 7)) -+ -+/* -+** ldnf1_u16_8: -+** incb x0, all, mul #8 -+** ldnf1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_u16_8, svuint16_t, uint16_t, -+ z0 = svldnf1_u16 (p0, x0 + svcnth () * 8), -+ z0 = svldnf1 (p0, x0 + svcnth () * 8)) -+ -+/* -+** ldnf1_u16_m1: -+** ldnf1h z0\.h, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_u16_m1, svuint16_t, uint16_t, -+ z0 = svldnf1_u16 (p0, x0 - svcnth ()), -+ z0 = svldnf1 (p0, x0 - svcnth ())) -+ -+/* -+** ldnf1_u16_m8: -+** ldnf1h z0\.h, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_u16_m8, svuint16_t, uint16_t, -+ z0 = svldnf1_u16 (p0, x0 - svcnth () * 8), -+ z0 = svldnf1 (p0, x0 - svcnth () * 8)) -+ -+/* -+** ldnf1_u16_m9: -+** decb x0, all, mul #9 -+** ldnf1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_u16_m9, svuint16_t, uint16_t, -+ z0 = svldnf1_u16 (p0, x0 - svcnth () * 9), -+ z0 = svldnf1 (p0, x0 - svcnth () * 9)) -+ -+/* -+** ldnf1_vnum_u16_0: -+** ldnf1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_u16_0, svuint16_t, uint16_t, -+ z0 = svldnf1_vnum_u16 (p0, x0, 0), -+ z0 = svldnf1_vnum (p0, x0, 0)) -+ -+/* -+** ldnf1_vnum_u16_1: -+** ldnf1h z0\.h, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_u16_1, svuint16_t, uint16_t, -+ z0 = svldnf1_vnum_u16 (p0, x0, 1), -+ z0 = svldnf1_vnum (p0, x0, 1)) -+ -+/* -+** ldnf1_vnum_u16_7: -+** ldnf1h z0\.h, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_u16_7, svuint16_t, uint16_t, -+ z0 = svldnf1_vnum_u16 (p0, x0, 7), -+ z0 = svldnf1_vnum (p0, x0, 7)) -+ -+/* -+** ldnf1_vnum_u16_8: -+** incb x0, all, mul #8 -+** ldnf1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_u16_8, svuint16_t, uint16_t, -+ z0 = svldnf1_vnum_u16 (p0, x0, 8), -+ z0 = svldnf1_vnum (p0, x0, 8)) -+ -+/* -+** ldnf1_vnum_u16_m1: -+** ldnf1h z0\.h, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_u16_m1, svuint16_t, uint16_t, -+ z0 = svldnf1_vnum_u16 (p0, x0, -1), -+ z0 = svldnf1_vnum (p0, x0, -1)) -+ -+/* -+** ldnf1_vnum_u16_m8: -+** ldnf1h z0\.h, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_u16_m8, svuint16_t, uint16_t, -+ z0 = svldnf1_vnum_u16 (p0, x0, -8), -+ z0 = svldnf1_vnum (p0, x0, -8)) -+ -+/* -+** ldnf1_vnum_u16_m9: -+** decb x0, all, mul #9 -+** ldnf1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_u16_m9, svuint16_t, uint16_t, -+ z0 = svldnf1_vnum_u16 (p0, x0, -9), -+ z0 = svldnf1_vnum (p0, x0, -9)) -+ -+/* -+** ldnf1_vnum_u16_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldnf1h z0\.h, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_u16_x1, svuint16_t, uint16_t, -+ z0 = svldnf1_vnum_u16 (p0, x0, x1), -+ z0 = svldnf1_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u32.c -new file mode 100644 -index 000000000..65e28c5c2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u32.c -@@ -0,0 +1,154 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldnf1_u32_base: -+** ldnf1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_u32_base, svuint32_t, uint32_t, -+ z0 = svldnf1_u32 (p0, x0), -+ z0 = svldnf1 (p0, x0)) -+ -+/* -+** ldnf1_u32_index: -+** add (x[0-9]+), x0, x1, lsl 2 -+** ldnf1w z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ldnf1_u32_index, svuint32_t, uint32_t, -+ z0 = svldnf1_u32 (p0, x0 + x1), -+ z0 = svldnf1 (p0, x0 + x1)) -+ -+/* -+** ldnf1_u32_1: -+** ldnf1w z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_u32_1, svuint32_t, uint32_t, -+ z0 = svldnf1_u32 (p0, x0 + svcntw ()), -+ z0 = svldnf1 (p0, x0 + svcntw ())) -+ -+/* -+** ldnf1_u32_7: -+** ldnf1w z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_u32_7, svuint32_t, uint32_t, -+ z0 = svldnf1_u32 (p0, x0 + svcntw () * 7), -+ z0 = svldnf1 (p0, x0 + svcntw () * 7)) -+ -+/* -+** ldnf1_u32_8: -+** incb x0, all, mul #8 -+** ldnf1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_u32_8, svuint32_t, uint32_t, -+ z0 = svldnf1_u32 (p0, x0 + svcntw () * 8), -+ z0 = svldnf1 (p0, x0 + svcntw () * 8)) -+ -+/* -+** ldnf1_u32_m1: -+** ldnf1w z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_u32_m1, svuint32_t, uint32_t, -+ z0 = svldnf1_u32 (p0, x0 - svcntw ()), -+ z0 = svldnf1 (p0, x0 - svcntw ())) -+ -+/* -+** ldnf1_u32_m8: -+** ldnf1w z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_u32_m8, svuint32_t, uint32_t, -+ z0 = svldnf1_u32 (p0, x0 - svcntw () * 8), -+ z0 = svldnf1 (p0, x0 - svcntw () * 8)) -+ -+/* -+** ldnf1_u32_m9: -+** decb x0, all, mul #9 -+** ldnf1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_u32_m9, svuint32_t, uint32_t, -+ z0 = svldnf1_u32 (p0, x0 - svcntw () * 9), -+ z0 = svldnf1 (p0, x0 - svcntw () * 9)) -+ -+/* -+** ldnf1_vnum_u32_0: -+** ldnf1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_u32_0, svuint32_t, uint32_t, -+ z0 = svldnf1_vnum_u32 (p0, x0, 0), -+ z0 = svldnf1_vnum (p0, x0, 0)) -+ -+/* -+** ldnf1_vnum_u32_1: -+** ldnf1w z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_u32_1, svuint32_t, uint32_t, -+ z0 = svldnf1_vnum_u32 (p0, x0, 1), -+ z0 = svldnf1_vnum (p0, x0, 1)) -+ -+/* -+** ldnf1_vnum_u32_7: -+** ldnf1w z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_u32_7, svuint32_t, uint32_t, -+ z0 = svldnf1_vnum_u32 (p0, x0, 7), -+ z0 = svldnf1_vnum (p0, x0, 7)) -+ -+/* -+** ldnf1_vnum_u32_8: -+** incb x0, all, mul #8 -+** ldnf1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_u32_8, svuint32_t, uint32_t, -+ z0 = svldnf1_vnum_u32 (p0, x0, 8), -+ z0 = svldnf1_vnum (p0, x0, 8)) -+ -+/* -+** ldnf1_vnum_u32_m1: -+** ldnf1w z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_u32_m1, svuint32_t, uint32_t, -+ z0 = svldnf1_vnum_u32 (p0, x0, -1), -+ z0 = svldnf1_vnum (p0, x0, -1)) -+ -+/* -+** ldnf1_vnum_u32_m8: -+** ldnf1w z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_u32_m8, svuint32_t, uint32_t, -+ z0 = svldnf1_vnum_u32 (p0, x0, -8), -+ z0 = svldnf1_vnum (p0, x0, -8)) -+ -+/* -+** ldnf1_vnum_u32_m9: -+** decb x0, all, mul #9 -+** ldnf1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_u32_m9, svuint32_t, uint32_t, -+ z0 = svldnf1_vnum_u32 (p0, x0, -9), -+ z0 = svldnf1_vnum (p0, x0, -9)) -+ -+/* -+** ldnf1_vnum_u32_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldnf1w z0\.s, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_u32_x1, svuint32_t, uint32_t, -+ z0 = svldnf1_vnum_u32 (p0, x0, x1), -+ z0 = svldnf1_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u64.c -new file mode 100644 -index 000000000..70d3f27d8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u64.c -@@ -0,0 +1,154 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldnf1_u64_base: -+** ldnf1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_u64_base, svuint64_t, uint64_t, -+ z0 = svldnf1_u64 (p0, x0), -+ z0 = svldnf1 (p0, x0)) -+ -+/* -+** ldnf1_u64_index: -+** add (x[0-9]+), x0, x1, lsl 3 -+** ldnf1d z0\.d, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ldnf1_u64_index, svuint64_t, uint64_t, -+ z0 = svldnf1_u64 (p0, x0 + x1), -+ z0 = svldnf1 (p0, x0 + x1)) -+ -+/* -+** ldnf1_u64_1: -+** ldnf1d z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_u64_1, svuint64_t, uint64_t, -+ z0 = svldnf1_u64 (p0, x0 + svcntd ()), -+ z0 = svldnf1 (p0, x0 + svcntd ())) -+ -+/* -+** ldnf1_u64_7: -+** ldnf1d z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_u64_7, svuint64_t, uint64_t, -+ z0 = svldnf1_u64 (p0, x0 + svcntd () * 7), -+ z0 = svldnf1 (p0, x0 + svcntd () * 7)) -+ -+/* -+** ldnf1_u64_8: -+** incb x0, all, mul #8 -+** ldnf1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_u64_8, svuint64_t, uint64_t, -+ z0 = svldnf1_u64 (p0, x0 + svcntd () * 8), -+ z0 = svldnf1 (p0, x0 + svcntd () * 8)) -+ -+/* -+** ldnf1_u64_m1: -+** ldnf1d z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_u64_m1, svuint64_t, uint64_t, -+ z0 = svldnf1_u64 (p0, x0 - svcntd ()), -+ z0 = svldnf1 (p0, x0 - svcntd ())) -+ -+/* -+** ldnf1_u64_m8: -+** ldnf1d z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_u64_m8, svuint64_t, uint64_t, -+ z0 = svldnf1_u64 (p0, x0 - svcntd () * 8), -+ z0 = svldnf1 (p0, x0 - svcntd () * 8)) -+ -+/* -+** ldnf1_u64_m9: -+** decb x0, all, mul #9 -+** ldnf1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_u64_m9, svuint64_t, uint64_t, -+ z0 = svldnf1_u64 (p0, x0 - svcntd () * 9), -+ z0 = svldnf1 (p0, x0 - svcntd () * 9)) -+ -+/* -+** ldnf1_vnum_u64_0: -+** ldnf1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_u64_0, svuint64_t, uint64_t, -+ z0 = svldnf1_vnum_u64 (p0, x0, 0), -+ z0 = svldnf1_vnum (p0, x0, 0)) -+ -+/* -+** ldnf1_vnum_u64_1: -+** ldnf1d z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_u64_1, svuint64_t, uint64_t, -+ z0 = svldnf1_vnum_u64 (p0, x0, 1), -+ z0 = svldnf1_vnum (p0, x0, 1)) -+ -+/* -+** ldnf1_vnum_u64_7: -+** ldnf1d z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_u64_7, svuint64_t, uint64_t, -+ z0 = svldnf1_vnum_u64 (p0, x0, 7), -+ z0 = svldnf1_vnum (p0, x0, 7)) -+ -+/* -+** ldnf1_vnum_u64_8: -+** incb x0, all, mul #8 -+** ldnf1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_u64_8, svuint64_t, uint64_t, -+ z0 = svldnf1_vnum_u64 (p0, x0, 8), -+ z0 = svldnf1_vnum (p0, x0, 8)) -+ -+/* -+** ldnf1_vnum_u64_m1: -+** ldnf1d z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_u64_m1, svuint64_t, uint64_t, -+ z0 = svldnf1_vnum_u64 (p0, x0, -1), -+ z0 = svldnf1_vnum (p0, x0, -1)) -+ -+/* -+** ldnf1_vnum_u64_m8: -+** ldnf1d z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_u64_m8, svuint64_t, uint64_t, -+ z0 = svldnf1_vnum_u64 (p0, x0, -8), -+ z0 = svldnf1_vnum (p0, x0, -8)) -+ -+/* -+** ldnf1_vnum_u64_m9: -+** decb x0, all, mul #9 -+** ldnf1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_u64_m9, svuint64_t, uint64_t, -+ z0 = svldnf1_vnum_u64 (p0, x0, -9), -+ z0 = svldnf1_vnum (p0, x0, -9)) -+ -+/* -+** ldnf1_vnum_u64_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldnf1d z0\.d, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_u64_x1, svuint64_t, uint64_t, -+ z0 = svldnf1_vnum_u64 (p0, x0, x1), -+ z0 = svldnf1_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u8.c -new file mode 100644 -index 000000000..5c29f1d19 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1_u8.c -@@ -0,0 +1,154 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldnf1_u8_base: -+** ldnf1b z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_u8_base, svuint8_t, uint8_t, -+ z0 = svldnf1_u8 (p0, x0), -+ z0 = svldnf1 (p0, x0)) -+ -+/* -+** ldnf1_u8_index: -+** add (x[0-9]+), x0, x1 -+** ldnf1b z0\.b, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ldnf1_u8_index, svuint8_t, uint8_t, -+ z0 = svldnf1_u8 (p0, x0 + x1), -+ z0 = svldnf1 (p0, x0 + x1)) -+ -+/* -+** ldnf1_u8_1: -+** ldnf1b z0\.b, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_u8_1, svuint8_t, uint8_t, -+ z0 = svldnf1_u8 (p0, x0 + svcntb ()), -+ z0 = svldnf1 (p0, x0 + svcntb ())) -+ -+/* -+** ldnf1_u8_7: -+** ldnf1b z0\.b, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_u8_7, svuint8_t, uint8_t, -+ z0 = svldnf1_u8 (p0, x0 + svcntb () * 7), -+ z0 = svldnf1 (p0, x0 + svcntb () * 7)) -+ -+/* -+** ldnf1_u8_8: -+** incb x0, all, mul #8 -+** ldnf1b z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_u8_8, svuint8_t, uint8_t, -+ z0 = svldnf1_u8 (p0, x0 + svcntb () * 8), -+ z0 = svldnf1 (p0, x0 + svcntb () * 8)) -+ -+/* -+** ldnf1_u8_m1: -+** ldnf1b z0\.b, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_u8_m1, svuint8_t, uint8_t, -+ z0 = svldnf1_u8 (p0, x0 - svcntb ()), -+ z0 = svldnf1 (p0, x0 - svcntb ())) -+ -+/* -+** ldnf1_u8_m8: -+** ldnf1b z0\.b, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_u8_m8, svuint8_t, uint8_t, -+ z0 = svldnf1_u8 (p0, x0 - svcntb () * 8), -+ z0 = svldnf1 (p0, x0 - svcntb () * 8)) -+ -+/* -+** ldnf1_u8_m9: -+** decb x0, all, mul #9 -+** ldnf1b z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_u8_m9, svuint8_t, uint8_t, -+ z0 = svldnf1_u8 (p0, x0 - svcntb () * 9), -+ z0 = svldnf1 (p0, x0 - svcntb () * 9)) -+ -+/* -+** ldnf1_vnum_u8_0: -+** ldnf1b z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_u8_0, svuint8_t, uint8_t, -+ z0 = svldnf1_vnum_u8 (p0, x0, 0), -+ z0 = svldnf1_vnum (p0, x0, 0)) -+ -+/* -+** ldnf1_vnum_u8_1: -+** ldnf1b z0\.b, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_u8_1, svuint8_t, uint8_t, -+ z0 = svldnf1_vnum_u8 (p0, x0, 1), -+ z0 = svldnf1_vnum (p0, x0, 1)) -+ -+/* -+** ldnf1_vnum_u8_7: -+** ldnf1b z0\.b, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_u8_7, svuint8_t, uint8_t, -+ z0 = svldnf1_vnum_u8 (p0, x0, 7), -+ z0 = svldnf1_vnum (p0, x0, 7)) -+ -+/* -+** ldnf1_vnum_u8_8: -+** incb x0, all, mul #8 -+** ldnf1b z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_u8_8, svuint8_t, uint8_t, -+ z0 = svldnf1_vnum_u8 (p0, x0, 8), -+ z0 = svldnf1_vnum (p0, x0, 8)) -+ -+/* -+** ldnf1_vnum_u8_m1: -+** ldnf1b z0\.b, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_u8_m1, svuint8_t, uint8_t, -+ z0 = svldnf1_vnum_u8 (p0, x0, -1), -+ z0 = svldnf1_vnum (p0, x0, -1)) -+ -+/* -+** ldnf1_vnum_u8_m8: -+** ldnf1b z0\.b, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_u8_m8, svuint8_t, uint8_t, -+ z0 = svldnf1_vnum_u8 (p0, x0, -8), -+ z0 = svldnf1_vnum (p0, x0, -8)) -+ -+/* -+** ldnf1_vnum_u8_m9: -+** decb x0, all, mul #9 -+** ldnf1b z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_u8_m9, svuint8_t, uint8_t, -+ z0 = svldnf1_vnum_u8 (p0, x0, -9), -+ z0 = svldnf1_vnum (p0, x0, -9)) -+ -+/* -+** ldnf1_vnum_u8_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldnf1b z0\.b, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldnf1_vnum_u8_x1, svuint8_t, uint8_t, -+ z0 = svldnf1_vnum_u8 (p0, x0, x1), -+ z0 = svldnf1_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_s16.c -new file mode 100644 -index 000000000..e04b9a788 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_s16.c -@@ -0,0 +1,154 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldnf1sb_s16_base: -+** ldnf1sb z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_s16_base, svint16_t, int8_t, -+ z0 = svldnf1sb_s16 (p0, x0), -+ z0 = svldnf1sb_s16 (p0, x0)) -+ -+/* -+** ldnf1sb_s16_index: -+** add (x[0-9]+), x0, x1 -+** ldnf1sb z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_s16_index, svint16_t, int8_t, -+ z0 = svldnf1sb_s16 (p0, x0 + x1), -+ z0 = svldnf1sb_s16 (p0, x0 + x1)) -+ -+/* -+** ldnf1sb_s16_1: -+** ldnf1sb z0\.h, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_s16_1, svint16_t, int8_t, -+ z0 = svldnf1sb_s16 (p0, x0 + svcnth ()), -+ z0 = svldnf1sb_s16 (p0, x0 + svcnth ())) -+ -+/* -+** ldnf1sb_s16_7: -+** ldnf1sb z0\.h, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_s16_7, svint16_t, int8_t, -+ z0 = svldnf1sb_s16 (p0, x0 + svcnth () * 7), -+ z0 = svldnf1sb_s16 (p0, x0 + svcnth () * 7)) -+ -+/* -+** ldnf1sb_s16_8: -+** incb x0, all, mul #4 -+** ldnf1sb z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_s16_8, svint16_t, int8_t, -+ z0 = svldnf1sb_s16 (p0, x0 + svcnth () * 8), -+ z0 = svldnf1sb_s16 (p0, x0 + svcnth () * 8)) -+ -+/* -+** ldnf1sb_s16_m1: -+** ldnf1sb z0\.h, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_s16_m1, svint16_t, int8_t, -+ z0 = svldnf1sb_s16 (p0, x0 - svcnth ()), -+ z0 = svldnf1sb_s16 (p0, x0 - svcnth ())) -+ -+/* -+** ldnf1sb_s16_m8: -+** ldnf1sb z0\.h, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_s16_m8, svint16_t, int8_t, -+ z0 = svldnf1sb_s16 (p0, x0 - svcnth () * 8), -+ z0 = svldnf1sb_s16 (p0, x0 - svcnth () * 8)) -+ -+/* -+** ldnf1sb_s16_m9: -+** dech x0, all, mul #9 -+** ldnf1sb z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_s16_m9, svint16_t, int8_t, -+ z0 = svldnf1sb_s16 (p0, x0 - svcnth () * 9), -+ z0 = svldnf1sb_s16 (p0, x0 - svcnth () * 9)) -+ -+/* -+** ldnf1sb_vnum_s16_0: -+** ldnf1sb z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_vnum_s16_0, svint16_t, int8_t, -+ z0 = svldnf1sb_vnum_s16 (p0, x0, 0), -+ z0 = svldnf1sb_vnum_s16 (p0, x0, 0)) -+ -+/* -+** ldnf1sb_vnum_s16_1: -+** ldnf1sb z0\.h, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_vnum_s16_1, svint16_t, int8_t, -+ z0 = svldnf1sb_vnum_s16 (p0, x0, 1), -+ z0 = svldnf1sb_vnum_s16 (p0, x0, 1)) -+ -+/* -+** ldnf1sb_vnum_s16_7: -+** ldnf1sb z0\.h, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_vnum_s16_7, svint16_t, int8_t, -+ z0 = svldnf1sb_vnum_s16 (p0, x0, 7), -+ z0 = svldnf1sb_vnum_s16 (p0, x0, 7)) -+ -+/* -+** ldnf1sb_vnum_s16_8: -+** incb x0, all, mul #4 -+** ldnf1sb z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_vnum_s16_8, svint16_t, int8_t, -+ z0 = svldnf1sb_vnum_s16 (p0, x0, 8), -+ z0 = svldnf1sb_vnum_s16 (p0, x0, 8)) -+ -+/* -+** ldnf1sb_vnum_s16_m1: -+** ldnf1sb z0\.h, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_vnum_s16_m1, svint16_t, int8_t, -+ z0 = svldnf1sb_vnum_s16 (p0, x0, -1), -+ z0 = svldnf1sb_vnum_s16 (p0, x0, -1)) -+ -+/* -+** ldnf1sb_vnum_s16_m8: -+** ldnf1sb z0\.h, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_vnum_s16_m8, svint16_t, int8_t, -+ z0 = svldnf1sb_vnum_s16 (p0, x0, -8), -+ z0 = svldnf1sb_vnum_s16 (p0, x0, -8)) -+ -+/* -+** ldnf1sb_vnum_s16_m9: -+** dech x0, all, mul #9 -+** ldnf1sb z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_vnum_s16_m9, svint16_t, int8_t, -+ z0 = svldnf1sb_vnum_s16 (p0, x0, -9), -+ z0 = svldnf1sb_vnum_s16 (p0, x0, -9)) -+ -+/* -+** ldnf1sb_vnum_s16_x1: -+** cnth (x[0-9]+) -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldnf1sb z0\.h, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_vnum_s16_x1, svint16_t, int8_t, -+ z0 = svldnf1sb_vnum_s16 (p0, x0, x1), -+ z0 = svldnf1sb_vnum_s16 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_s32.c -new file mode 100644 -index 000000000..0553fc98d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_s32.c -@@ -0,0 +1,154 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldnf1sb_s32_base: -+** ldnf1sb z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_s32_base, svint32_t, int8_t, -+ z0 = svldnf1sb_s32 (p0, x0), -+ z0 = svldnf1sb_s32 (p0, x0)) -+ -+/* -+** ldnf1sb_s32_index: -+** add (x[0-9]+), x0, x1 -+** ldnf1sb z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_s32_index, svint32_t, int8_t, -+ z0 = svldnf1sb_s32 (p0, x0 + x1), -+ z0 = svldnf1sb_s32 (p0, x0 + x1)) -+ -+/* -+** ldnf1sb_s32_1: -+** ldnf1sb z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_s32_1, svint32_t, int8_t, -+ z0 = svldnf1sb_s32 (p0, x0 + svcntw ()), -+ z0 = svldnf1sb_s32 (p0, x0 + svcntw ())) -+ -+/* -+** ldnf1sb_s32_7: -+** ldnf1sb z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_s32_7, svint32_t, int8_t, -+ z0 = svldnf1sb_s32 (p0, x0 + svcntw () * 7), -+ z0 = svldnf1sb_s32 (p0, x0 + svcntw () * 7)) -+ -+/* -+** ldnf1sb_s32_8: -+** incb x0, all, mul #2 -+** ldnf1sb z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_s32_8, svint32_t, int8_t, -+ z0 = svldnf1sb_s32 (p0, x0 + svcntw () * 8), -+ z0 = svldnf1sb_s32 (p0, x0 + svcntw () * 8)) -+ -+/* -+** ldnf1sb_s32_m1: -+** ldnf1sb z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_s32_m1, svint32_t, int8_t, -+ z0 = svldnf1sb_s32 (p0, x0 - svcntw ()), -+ z0 = svldnf1sb_s32 (p0, x0 - svcntw ())) -+ -+/* -+** ldnf1sb_s32_m8: -+** ldnf1sb z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_s32_m8, svint32_t, int8_t, -+ z0 = svldnf1sb_s32 (p0, x0 - svcntw () * 8), -+ z0 = svldnf1sb_s32 (p0, x0 - svcntw () * 8)) -+ -+/* -+** ldnf1sb_s32_m9: -+** decw x0, all, mul #9 -+** ldnf1sb z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_s32_m9, svint32_t, int8_t, -+ z0 = svldnf1sb_s32 (p0, x0 - svcntw () * 9), -+ z0 = svldnf1sb_s32 (p0, x0 - svcntw () * 9)) -+ -+/* -+** ldnf1sb_vnum_s32_0: -+** ldnf1sb z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_vnum_s32_0, svint32_t, int8_t, -+ z0 = svldnf1sb_vnum_s32 (p0, x0, 0), -+ z0 = svldnf1sb_vnum_s32 (p0, x0, 0)) -+ -+/* -+** ldnf1sb_vnum_s32_1: -+** ldnf1sb z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_vnum_s32_1, svint32_t, int8_t, -+ z0 = svldnf1sb_vnum_s32 (p0, x0, 1), -+ z0 = svldnf1sb_vnum_s32 (p0, x0, 1)) -+ -+/* -+** ldnf1sb_vnum_s32_7: -+** ldnf1sb z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_vnum_s32_7, svint32_t, int8_t, -+ z0 = svldnf1sb_vnum_s32 (p0, x0, 7), -+ z0 = svldnf1sb_vnum_s32 (p0, x0, 7)) -+ -+/* -+** ldnf1sb_vnum_s32_8: -+** incb x0, all, mul #2 -+** ldnf1sb z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_vnum_s32_8, svint32_t, int8_t, -+ z0 = svldnf1sb_vnum_s32 (p0, x0, 8), -+ z0 = svldnf1sb_vnum_s32 (p0, x0, 8)) -+ -+/* -+** ldnf1sb_vnum_s32_m1: -+** ldnf1sb z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_vnum_s32_m1, svint32_t, int8_t, -+ z0 = svldnf1sb_vnum_s32 (p0, x0, -1), -+ z0 = svldnf1sb_vnum_s32 (p0, x0, -1)) -+ -+/* -+** ldnf1sb_vnum_s32_m8: -+** ldnf1sb z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_vnum_s32_m8, svint32_t, int8_t, -+ z0 = svldnf1sb_vnum_s32 (p0, x0, -8), -+ z0 = svldnf1sb_vnum_s32 (p0, x0, -8)) -+ -+/* -+** ldnf1sb_vnum_s32_m9: -+** decw x0, all, mul #9 -+** ldnf1sb z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_vnum_s32_m9, svint32_t, int8_t, -+ z0 = svldnf1sb_vnum_s32 (p0, x0, -9), -+ z0 = svldnf1sb_vnum_s32 (p0, x0, -9)) -+ -+/* -+** ldnf1sb_vnum_s32_x1: -+** cntw (x[0-9]+) -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldnf1sb z0\.s, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_vnum_s32_x1, svint32_t, int8_t, -+ z0 = svldnf1sb_vnum_s32 (p0, x0, x1), -+ z0 = svldnf1sb_vnum_s32 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_s64.c -new file mode 100644 -index 000000000..61a474fdf ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_s64.c -@@ -0,0 +1,154 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldnf1sb_s64_base: -+** ldnf1sb z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_s64_base, svint64_t, int8_t, -+ z0 = svldnf1sb_s64 (p0, x0), -+ z0 = svldnf1sb_s64 (p0, x0)) -+ -+/* -+** ldnf1sb_s64_index: -+** add (x[0-9]+), x0, x1 -+** ldnf1sb z0\.d, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_s64_index, svint64_t, int8_t, -+ z0 = svldnf1sb_s64 (p0, x0 + x1), -+ z0 = svldnf1sb_s64 (p0, x0 + x1)) -+ -+/* -+** ldnf1sb_s64_1: -+** ldnf1sb z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_s64_1, svint64_t, int8_t, -+ z0 = svldnf1sb_s64 (p0, x0 + svcntd ()), -+ z0 = svldnf1sb_s64 (p0, x0 + svcntd ())) -+ -+/* -+** ldnf1sb_s64_7: -+** ldnf1sb z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_s64_7, svint64_t, int8_t, -+ z0 = svldnf1sb_s64 (p0, x0 + svcntd () * 7), -+ z0 = svldnf1sb_s64 (p0, x0 + svcntd () * 7)) -+ -+/* -+** ldnf1sb_s64_8: -+** incb x0 -+** ldnf1sb z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_s64_8, svint64_t, int8_t, -+ z0 = svldnf1sb_s64 (p0, x0 + svcntd () * 8), -+ z0 = svldnf1sb_s64 (p0, x0 + svcntd () * 8)) -+ -+/* -+** ldnf1sb_s64_m1: -+** ldnf1sb z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_s64_m1, svint64_t, int8_t, -+ z0 = svldnf1sb_s64 (p0, x0 - svcntd ()), -+ z0 = svldnf1sb_s64 (p0, x0 - svcntd ())) -+ -+/* -+** ldnf1sb_s64_m8: -+** ldnf1sb z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_s64_m8, svint64_t, int8_t, -+ z0 = svldnf1sb_s64 (p0, x0 - svcntd () * 8), -+ z0 = svldnf1sb_s64 (p0, x0 - svcntd () * 8)) -+ -+/* -+** ldnf1sb_s64_m9: -+** decd x0, all, mul #9 -+** ldnf1sb z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_s64_m9, svint64_t, int8_t, -+ z0 = svldnf1sb_s64 (p0, x0 - svcntd () * 9), -+ z0 = svldnf1sb_s64 (p0, x0 - svcntd () * 9)) -+ -+/* -+** ldnf1sb_vnum_s64_0: -+** ldnf1sb z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_vnum_s64_0, svint64_t, int8_t, -+ z0 = svldnf1sb_vnum_s64 (p0, x0, 0), -+ z0 = svldnf1sb_vnum_s64 (p0, x0, 0)) -+ -+/* -+** ldnf1sb_vnum_s64_1: -+** ldnf1sb z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_vnum_s64_1, svint64_t, int8_t, -+ z0 = svldnf1sb_vnum_s64 (p0, x0, 1), -+ z0 = svldnf1sb_vnum_s64 (p0, x0, 1)) -+ -+/* -+** ldnf1sb_vnum_s64_7: -+** ldnf1sb z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_vnum_s64_7, svint64_t, int8_t, -+ z0 = svldnf1sb_vnum_s64 (p0, x0, 7), -+ z0 = svldnf1sb_vnum_s64 (p0, x0, 7)) -+ -+/* -+** ldnf1sb_vnum_s64_8: -+** incb x0 -+** ldnf1sb z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_vnum_s64_8, svint64_t, int8_t, -+ z0 = svldnf1sb_vnum_s64 (p0, x0, 8), -+ z0 = svldnf1sb_vnum_s64 (p0, x0, 8)) -+ -+/* -+** ldnf1sb_vnum_s64_m1: -+** ldnf1sb z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_vnum_s64_m1, svint64_t, int8_t, -+ z0 = svldnf1sb_vnum_s64 (p0, x0, -1), -+ z0 = svldnf1sb_vnum_s64 (p0, x0, -1)) -+ -+/* -+** ldnf1sb_vnum_s64_m8: -+** ldnf1sb z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_vnum_s64_m8, svint64_t, int8_t, -+ z0 = svldnf1sb_vnum_s64 (p0, x0, -8), -+ z0 = svldnf1sb_vnum_s64 (p0, x0, -8)) -+ -+/* -+** ldnf1sb_vnum_s64_m9: -+** decd x0, all, mul #9 -+** ldnf1sb z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_vnum_s64_m9, svint64_t, int8_t, -+ z0 = svldnf1sb_vnum_s64 (p0, x0, -9), -+ z0 = svldnf1sb_vnum_s64 (p0, x0, -9)) -+ -+/* -+** ldnf1sb_vnum_s64_x1: -+** cntd (x[0-9]+) -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldnf1sb z0\.d, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_vnum_s64_x1, svint64_t, int8_t, -+ z0 = svldnf1sb_vnum_s64 (p0, x0, x1), -+ z0 = svldnf1sb_vnum_s64 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_u16.c -new file mode 100644 -index 000000000..be63d8bf9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_u16.c -@@ -0,0 +1,154 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldnf1sb_u16_base: -+** ldnf1sb z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_u16_base, svuint16_t, int8_t, -+ z0 = svldnf1sb_u16 (p0, x0), -+ z0 = svldnf1sb_u16 (p0, x0)) -+ -+/* -+** ldnf1sb_u16_index: -+** add (x[0-9]+), x0, x1 -+** ldnf1sb z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_u16_index, svuint16_t, int8_t, -+ z0 = svldnf1sb_u16 (p0, x0 + x1), -+ z0 = svldnf1sb_u16 (p0, x0 + x1)) -+ -+/* -+** ldnf1sb_u16_1: -+** ldnf1sb z0\.h, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_u16_1, svuint16_t, int8_t, -+ z0 = svldnf1sb_u16 (p0, x0 + svcnth ()), -+ z0 = svldnf1sb_u16 (p0, x0 + svcnth ())) -+ -+/* -+** ldnf1sb_u16_7: -+** ldnf1sb z0\.h, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_u16_7, svuint16_t, int8_t, -+ z0 = svldnf1sb_u16 (p0, x0 + svcnth () * 7), -+ z0 = svldnf1sb_u16 (p0, x0 + svcnth () * 7)) -+ -+/* -+** ldnf1sb_u16_8: -+** incb x0, all, mul #4 -+** ldnf1sb z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_u16_8, svuint16_t, int8_t, -+ z0 = svldnf1sb_u16 (p0, x0 + svcnth () * 8), -+ z0 = svldnf1sb_u16 (p0, x0 + svcnth () * 8)) -+ -+/* -+** ldnf1sb_u16_m1: -+** ldnf1sb z0\.h, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_u16_m1, svuint16_t, int8_t, -+ z0 = svldnf1sb_u16 (p0, x0 - svcnth ()), -+ z0 = svldnf1sb_u16 (p0, x0 - svcnth ())) -+ -+/* -+** ldnf1sb_u16_m8: -+** ldnf1sb z0\.h, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_u16_m8, svuint16_t, int8_t, -+ z0 = svldnf1sb_u16 (p0, x0 - svcnth () * 8), -+ z0 = svldnf1sb_u16 (p0, x0 - svcnth () * 8)) -+ -+/* -+** ldnf1sb_u16_m9: -+** dech x0, all, mul #9 -+** ldnf1sb z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_u16_m9, svuint16_t, int8_t, -+ z0 = svldnf1sb_u16 (p0, x0 - svcnth () * 9), -+ z0 = svldnf1sb_u16 (p0, x0 - svcnth () * 9)) -+ -+/* -+** ldnf1sb_vnum_u16_0: -+** ldnf1sb z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_vnum_u16_0, svuint16_t, int8_t, -+ z0 = svldnf1sb_vnum_u16 (p0, x0, 0), -+ z0 = svldnf1sb_vnum_u16 (p0, x0, 0)) -+ -+/* -+** ldnf1sb_vnum_u16_1: -+** ldnf1sb z0\.h, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_vnum_u16_1, svuint16_t, int8_t, -+ z0 = svldnf1sb_vnum_u16 (p0, x0, 1), -+ z0 = svldnf1sb_vnum_u16 (p0, x0, 1)) -+ -+/* -+** ldnf1sb_vnum_u16_7: -+** ldnf1sb z0\.h, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_vnum_u16_7, svuint16_t, int8_t, -+ z0 = svldnf1sb_vnum_u16 (p0, x0, 7), -+ z0 = svldnf1sb_vnum_u16 (p0, x0, 7)) -+ -+/* -+** ldnf1sb_vnum_u16_8: -+** incb x0, all, mul #4 -+** ldnf1sb z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_vnum_u16_8, svuint16_t, int8_t, -+ z0 = svldnf1sb_vnum_u16 (p0, x0, 8), -+ z0 = svldnf1sb_vnum_u16 (p0, x0, 8)) -+ -+/* -+** ldnf1sb_vnum_u16_m1: -+** ldnf1sb z0\.h, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_vnum_u16_m1, svuint16_t, int8_t, -+ z0 = svldnf1sb_vnum_u16 (p0, x0, -1), -+ z0 = svldnf1sb_vnum_u16 (p0, x0, -1)) -+ -+/* -+** ldnf1sb_vnum_u16_m8: -+** ldnf1sb z0\.h, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_vnum_u16_m8, svuint16_t, int8_t, -+ z0 = svldnf1sb_vnum_u16 (p0, x0, -8), -+ z0 = svldnf1sb_vnum_u16 (p0, x0, -8)) -+ -+/* -+** ldnf1sb_vnum_u16_m9: -+** dech x0, all, mul #9 -+** ldnf1sb z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_vnum_u16_m9, svuint16_t, int8_t, -+ z0 = svldnf1sb_vnum_u16 (p0, x0, -9), -+ z0 = svldnf1sb_vnum_u16 (p0, x0, -9)) -+ -+/* -+** ldnf1sb_vnum_u16_x1: -+** cnth (x[0-9]+) -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldnf1sb z0\.h, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_vnum_u16_x1, svuint16_t, int8_t, -+ z0 = svldnf1sb_vnum_u16 (p0, x0, x1), -+ z0 = svldnf1sb_vnum_u16 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_u32.c -new file mode 100644 -index 000000000..4f52490b4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_u32.c -@@ -0,0 +1,154 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldnf1sb_u32_base: -+** ldnf1sb z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_u32_base, svuint32_t, int8_t, -+ z0 = svldnf1sb_u32 (p0, x0), -+ z0 = svldnf1sb_u32 (p0, x0)) -+ -+/* -+** ldnf1sb_u32_index: -+** add (x[0-9]+), x0, x1 -+** ldnf1sb z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_u32_index, svuint32_t, int8_t, -+ z0 = svldnf1sb_u32 (p0, x0 + x1), -+ z0 = svldnf1sb_u32 (p0, x0 + x1)) -+ -+/* -+** ldnf1sb_u32_1: -+** ldnf1sb z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_u32_1, svuint32_t, int8_t, -+ z0 = svldnf1sb_u32 (p0, x0 + svcntw ()), -+ z0 = svldnf1sb_u32 (p0, x0 + svcntw ())) -+ -+/* -+** ldnf1sb_u32_7: -+** ldnf1sb z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_u32_7, svuint32_t, int8_t, -+ z0 = svldnf1sb_u32 (p0, x0 + svcntw () * 7), -+ z0 = svldnf1sb_u32 (p0, x0 + svcntw () * 7)) -+ -+/* -+** ldnf1sb_u32_8: -+** incb x0, all, mul #2 -+** ldnf1sb z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_u32_8, svuint32_t, int8_t, -+ z0 = svldnf1sb_u32 (p0, x0 + svcntw () * 8), -+ z0 = svldnf1sb_u32 (p0, x0 + svcntw () * 8)) -+ -+/* -+** ldnf1sb_u32_m1: -+** ldnf1sb z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_u32_m1, svuint32_t, int8_t, -+ z0 = svldnf1sb_u32 (p0, x0 - svcntw ()), -+ z0 = svldnf1sb_u32 (p0, x0 - svcntw ())) -+ -+/* -+** ldnf1sb_u32_m8: -+** ldnf1sb z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_u32_m8, svuint32_t, int8_t, -+ z0 = svldnf1sb_u32 (p0, x0 - svcntw () * 8), -+ z0 = svldnf1sb_u32 (p0, x0 - svcntw () * 8)) -+ -+/* -+** ldnf1sb_u32_m9: -+** decw x0, all, mul #9 -+** ldnf1sb z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_u32_m9, svuint32_t, int8_t, -+ z0 = svldnf1sb_u32 (p0, x0 - svcntw () * 9), -+ z0 = svldnf1sb_u32 (p0, x0 - svcntw () * 9)) -+ -+/* -+** ldnf1sb_vnum_u32_0: -+** ldnf1sb z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_vnum_u32_0, svuint32_t, int8_t, -+ z0 = svldnf1sb_vnum_u32 (p0, x0, 0), -+ z0 = svldnf1sb_vnum_u32 (p0, x0, 0)) -+ -+/* -+** ldnf1sb_vnum_u32_1: -+** ldnf1sb z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_vnum_u32_1, svuint32_t, int8_t, -+ z0 = svldnf1sb_vnum_u32 (p0, x0, 1), -+ z0 = svldnf1sb_vnum_u32 (p0, x0, 1)) -+ -+/* -+** ldnf1sb_vnum_u32_7: -+** ldnf1sb z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_vnum_u32_7, svuint32_t, int8_t, -+ z0 = svldnf1sb_vnum_u32 (p0, x0, 7), -+ z0 = svldnf1sb_vnum_u32 (p0, x0, 7)) -+ -+/* -+** ldnf1sb_vnum_u32_8: -+** incb x0, all, mul #2 -+** ldnf1sb z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_vnum_u32_8, svuint32_t, int8_t, -+ z0 = svldnf1sb_vnum_u32 (p0, x0, 8), -+ z0 = svldnf1sb_vnum_u32 (p0, x0, 8)) -+ -+/* -+** ldnf1sb_vnum_u32_m1: -+** ldnf1sb z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_vnum_u32_m1, svuint32_t, int8_t, -+ z0 = svldnf1sb_vnum_u32 (p0, x0, -1), -+ z0 = svldnf1sb_vnum_u32 (p0, x0, -1)) -+ -+/* -+** ldnf1sb_vnum_u32_m8: -+** ldnf1sb z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_vnum_u32_m8, svuint32_t, int8_t, -+ z0 = svldnf1sb_vnum_u32 (p0, x0, -8), -+ z0 = svldnf1sb_vnum_u32 (p0, x0, -8)) -+ -+/* -+** ldnf1sb_vnum_u32_m9: -+** decw x0, all, mul #9 -+** ldnf1sb z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_vnum_u32_m9, svuint32_t, int8_t, -+ z0 = svldnf1sb_vnum_u32 (p0, x0, -9), -+ z0 = svldnf1sb_vnum_u32 (p0, x0, -9)) -+ -+/* -+** ldnf1sb_vnum_u32_x1: -+** cntw (x[0-9]+) -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldnf1sb z0\.s, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_vnum_u32_x1, svuint32_t, int8_t, -+ z0 = svldnf1sb_vnum_u32 (p0, x0, x1), -+ z0 = svldnf1sb_vnum_u32 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_u64.c -new file mode 100644 -index 000000000..73f50d182 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sb_u64.c -@@ -0,0 +1,154 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldnf1sb_u64_base: -+** ldnf1sb z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_u64_base, svuint64_t, int8_t, -+ z0 = svldnf1sb_u64 (p0, x0), -+ z0 = svldnf1sb_u64 (p0, x0)) -+ -+/* -+** ldnf1sb_u64_index: -+** add (x[0-9]+), x0, x1 -+** ldnf1sb z0\.d, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_u64_index, svuint64_t, int8_t, -+ z0 = svldnf1sb_u64 (p0, x0 + x1), -+ z0 = svldnf1sb_u64 (p0, x0 + x1)) -+ -+/* -+** ldnf1sb_u64_1: -+** ldnf1sb z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_u64_1, svuint64_t, int8_t, -+ z0 = svldnf1sb_u64 (p0, x0 + svcntd ()), -+ z0 = svldnf1sb_u64 (p0, x0 + svcntd ())) -+ -+/* -+** ldnf1sb_u64_7: -+** ldnf1sb z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_u64_7, svuint64_t, int8_t, -+ z0 = svldnf1sb_u64 (p0, x0 + svcntd () * 7), -+ z0 = svldnf1sb_u64 (p0, x0 + svcntd () * 7)) -+ -+/* -+** ldnf1sb_u64_8: -+** incb x0 -+** ldnf1sb z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_u64_8, svuint64_t, int8_t, -+ z0 = svldnf1sb_u64 (p0, x0 + svcntd () * 8), -+ z0 = svldnf1sb_u64 (p0, x0 + svcntd () * 8)) -+ -+/* -+** ldnf1sb_u64_m1: -+** ldnf1sb z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_u64_m1, svuint64_t, int8_t, -+ z0 = svldnf1sb_u64 (p0, x0 - svcntd ()), -+ z0 = svldnf1sb_u64 (p0, x0 - svcntd ())) -+ -+/* -+** ldnf1sb_u64_m8: -+** ldnf1sb z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_u64_m8, svuint64_t, int8_t, -+ z0 = svldnf1sb_u64 (p0, x0 - svcntd () * 8), -+ z0 = svldnf1sb_u64 (p0, x0 - svcntd () * 8)) -+ -+/* -+** ldnf1sb_u64_m9: -+** decd x0, all, mul #9 -+** ldnf1sb z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_u64_m9, svuint64_t, int8_t, -+ z0 = svldnf1sb_u64 (p0, x0 - svcntd () * 9), -+ z0 = svldnf1sb_u64 (p0, x0 - svcntd () * 9)) -+ -+/* -+** ldnf1sb_vnum_u64_0: -+** ldnf1sb z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_vnum_u64_0, svuint64_t, int8_t, -+ z0 = svldnf1sb_vnum_u64 (p0, x0, 0), -+ z0 = svldnf1sb_vnum_u64 (p0, x0, 0)) -+ -+/* -+** ldnf1sb_vnum_u64_1: -+** ldnf1sb z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_vnum_u64_1, svuint64_t, int8_t, -+ z0 = svldnf1sb_vnum_u64 (p0, x0, 1), -+ z0 = svldnf1sb_vnum_u64 (p0, x0, 1)) -+ -+/* -+** ldnf1sb_vnum_u64_7: -+** ldnf1sb z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_vnum_u64_7, svuint64_t, int8_t, -+ z0 = svldnf1sb_vnum_u64 (p0, x0, 7), -+ z0 = svldnf1sb_vnum_u64 (p0, x0, 7)) -+ -+/* -+** ldnf1sb_vnum_u64_8: -+** incb x0 -+** ldnf1sb z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_vnum_u64_8, svuint64_t, int8_t, -+ z0 = svldnf1sb_vnum_u64 (p0, x0, 8), -+ z0 = svldnf1sb_vnum_u64 (p0, x0, 8)) -+ -+/* -+** ldnf1sb_vnum_u64_m1: -+** ldnf1sb z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_vnum_u64_m1, svuint64_t, int8_t, -+ z0 = svldnf1sb_vnum_u64 (p0, x0, -1), -+ z0 = svldnf1sb_vnum_u64 (p0, x0, -1)) -+ -+/* -+** ldnf1sb_vnum_u64_m8: -+** ldnf1sb z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_vnum_u64_m8, svuint64_t, int8_t, -+ z0 = svldnf1sb_vnum_u64 (p0, x0, -8), -+ z0 = svldnf1sb_vnum_u64 (p0, x0, -8)) -+ -+/* -+** ldnf1sb_vnum_u64_m9: -+** decd x0, all, mul #9 -+** ldnf1sb z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_vnum_u64_m9, svuint64_t, int8_t, -+ z0 = svldnf1sb_vnum_u64 (p0, x0, -9), -+ z0 = svldnf1sb_vnum_u64 (p0, x0, -9)) -+ -+/* -+** ldnf1sb_vnum_u64_x1: -+** cntd (x[0-9]+) -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldnf1sb z0\.d, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldnf1sb_vnum_u64_x1, svuint64_t, int8_t, -+ z0 = svldnf1sb_vnum_u64 (p0, x0, x1), -+ z0 = svldnf1sb_vnum_u64 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_s32.c -new file mode 100644 -index 000000000..08c7dc6dd ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_s32.c -@@ -0,0 +1,154 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldnf1sh_s32_base: -+** ldnf1sh z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_s32_base, svint32_t, int16_t, -+ z0 = svldnf1sh_s32 (p0, x0), -+ z0 = svldnf1sh_s32 (p0, x0)) -+ -+/* -+** ldnf1sh_s32_index: -+** add (x[0-9]+), x0, x1, lsl 1 -+** ldnf1sh z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_s32_index, svint32_t, int16_t, -+ z0 = svldnf1sh_s32 (p0, x0 + x1), -+ z0 = svldnf1sh_s32 (p0, x0 + x1)) -+ -+/* -+** ldnf1sh_s32_1: -+** ldnf1sh z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_s32_1, svint32_t, int16_t, -+ z0 = svldnf1sh_s32 (p0, x0 + svcntw ()), -+ z0 = svldnf1sh_s32 (p0, x0 + svcntw ())) -+ -+/* -+** ldnf1sh_s32_7: -+** ldnf1sh z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_s32_7, svint32_t, int16_t, -+ z0 = svldnf1sh_s32 (p0, x0 + svcntw () * 7), -+ z0 = svldnf1sh_s32 (p0, x0 + svcntw () * 7)) -+ -+/* -+** ldnf1sh_s32_8: -+** incb x0, all, mul #4 -+** ldnf1sh z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_s32_8, svint32_t, int16_t, -+ z0 = svldnf1sh_s32 (p0, x0 + svcntw () * 8), -+ z0 = svldnf1sh_s32 (p0, x0 + svcntw () * 8)) -+ -+/* -+** ldnf1sh_s32_m1: -+** ldnf1sh z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_s32_m1, svint32_t, int16_t, -+ z0 = svldnf1sh_s32 (p0, x0 - svcntw ()), -+ z0 = svldnf1sh_s32 (p0, x0 - svcntw ())) -+ -+/* -+** ldnf1sh_s32_m8: -+** ldnf1sh z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_s32_m8, svint32_t, int16_t, -+ z0 = svldnf1sh_s32 (p0, x0 - svcntw () * 8), -+ z0 = svldnf1sh_s32 (p0, x0 - svcntw () * 8)) -+ -+/* -+** ldnf1sh_s32_m9: -+** dech x0, all, mul #9 -+** ldnf1sh z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_s32_m9, svint32_t, int16_t, -+ z0 = svldnf1sh_s32 (p0, x0 - svcntw () * 9), -+ z0 = svldnf1sh_s32 (p0, x0 - svcntw () * 9)) -+ -+/* -+** ldnf1sh_vnum_s32_0: -+** ldnf1sh z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_vnum_s32_0, svint32_t, int16_t, -+ z0 = svldnf1sh_vnum_s32 (p0, x0, 0), -+ z0 = svldnf1sh_vnum_s32 (p0, x0, 0)) -+ -+/* -+** ldnf1sh_vnum_s32_1: -+** ldnf1sh z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_vnum_s32_1, svint32_t, int16_t, -+ z0 = svldnf1sh_vnum_s32 (p0, x0, 1), -+ z0 = svldnf1sh_vnum_s32 (p0, x0, 1)) -+ -+/* -+** ldnf1sh_vnum_s32_7: -+** ldnf1sh z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_vnum_s32_7, svint32_t, int16_t, -+ z0 = svldnf1sh_vnum_s32 (p0, x0, 7), -+ z0 = svldnf1sh_vnum_s32 (p0, x0, 7)) -+ -+/* -+** ldnf1sh_vnum_s32_8: -+** incb x0, all, mul #4 -+** ldnf1sh z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_vnum_s32_8, svint32_t, int16_t, -+ z0 = svldnf1sh_vnum_s32 (p0, x0, 8), -+ z0 = svldnf1sh_vnum_s32 (p0, x0, 8)) -+ -+/* -+** ldnf1sh_vnum_s32_m1: -+** ldnf1sh z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_vnum_s32_m1, svint32_t, int16_t, -+ z0 = svldnf1sh_vnum_s32 (p0, x0, -1), -+ z0 = svldnf1sh_vnum_s32 (p0, x0, -1)) -+ -+/* -+** ldnf1sh_vnum_s32_m8: -+** ldnf1sh z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_vnum_s32_m8, svint32_t, int16_t, -+ z0 = svldnf1sh_vnum_s32 (p0, x0, -8), -+ z0 = svldnf1sh_vnum_s32 (p0, x0, -8)) -+ -+/* -+** ldnf1sh_vnum_s32_m9: -+** dech x0, all, mul #9 -+** ldnf1sh z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_vnum_s32_m9, svint32_t, int16_t, -+ z0 = svldnf1sh_vnum_s32 (p0, x0, -9), -+ z0 = svldnf1sh_vnum_s32 (p0, x0, -9)) -+ -+/* -+** ldnf1sh_vnum_s32_x1: -+** cnth (x[0-9]+) -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldnf1sh z0\.s, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_vnum_s32_x1, svint32_t, int16_t, -+ z0 = svldnf1sh_vnum_s32 (p0, x0, x1), -+ z0 = svldnf1sh_vnum_s32 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_s64.c -new file mode 100644 -index 000000000..6a41bc26b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_s64.c -@@ -0,0 +1,154 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldnf1sh_s64_base: -+** ldnf1sh z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_s64_base, svint64_t, int16_t, -+ z0 = svldnf1sh_s64 (p0, x0), -+ z0 = svldnf1sh_s64 (p0, x0)) -+ -+/* -+** ldnf1sh_s64_index: -+** add (x[0-9]+), x0, x1, lsl 1 -+** ldnf1sh z0\.d, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_s64_index, svint64_t, int16_t, -+ z0 = svldnf1sh_s64 (p0, x0 + x1), -+ z0 = svldnf1sh_s64 (p0, x0 + x1)) -+ -+/* -+** ldnf1sh_s64_1: -+** ldnf1sh z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_s64_1, svint64_t, int16_t, -+ z0 = svldnf1sh_s64 (p0, x0 + svcntd ()), -+ z0 = svldnf1sh_s64 (p0, x0 + svcntd ())) -+ -+/* -+** ldnf1sh_s64_7: -+** ldnf1sh z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_s64_7, svint64_t, int16_t, -+ z0 = svldnf1sh_s64 (p0, x0 + svcntd () * 7), -+ z0 = svldnf1sh_s64 (p0, x0 + svcntd () * 7)) -+ -+/* -+** ldnf1sh_s64_8: -+** incb x0, all, mul #2 -+** ldnf1sh z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_s64_8, svint64_t, int16_t, -+ z0 = svldnf1sh_s64 (p0, x0 + svcntd () * 8), -+ z0 = svldnf1sh_s64 (p0, x0 + svcntd () * 8)) -+ -+/* -+** ldnf1sh_s64_m1: -+** ldnf1sh z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_s64_m1, svint64_t, int16_t, -+ z0 = svldnf1sh_s64 (p0, x0 - svcntd ()), -+ z0 = svldnf1sh_s64 (p0, x0 - svcntd ())) -+ -+/* -+** ldnf1sh_s64_m8: -+** ldnf1sh z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_s64_m8, svint64_t, int16_t, -+ z0 = svldnf1sh_s64 (p0, x0 - svcntd () * 8), -+ z0 = svldnf1sh_s64 (p0, x0 - svcntd () * 8)) -+ -+/* -+** ldnf1sh_s64_m9: -+** decw x0, all, mul #9 -+** ldnf1sh z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_s64_m9, svint64_t, int16_t, -+ z0 = svldnf1sh_s64 (p0, x0 - svcntd () * 9), -+ z0 = svldnf1sh_s64 (p0, x0 - svcntd () * 9)) -+ -+/* -+** ldnf1sh_vnum_s64_0: -+** ldnf1sh z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_vnum_s64_0, svint64_t, int16_t, -+ z0 = svldnf1sh_vnum_s64 (p0, x0, 0), -+ z0 = svldnf1sh_vnum_s64 (p0, x0, 0)) -+ -+/* -+** ldnf1sh_vnum_s64_1: -+** ldnf1sh z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_vnum_s64_1, svint64_t, int16_t, -+ z0 = svldnf1sh_vnum_s64 (p0, x0, 1), -+ z0 = svldnf1sh_vnum_s64 (p0, x0, 1)) -+ -+/* -+** ldnf1sh_vnum_s64_7: -+** ldnf1sh z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_vnum_s64_7, svint64_t, int16_t, -+ z0 = svldnf1sh_vnum_s64 (p0, x0, 7), -+ z0 = svldnf1sh_vnum_s64 (p0, x0, 7)) -+ -+/* -+** ldnf1sh_vnum_s64_8: -+** incb x0, all, mul #2 -+** ldnf1sh z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_vnum_s64_8, svint64_t, int16_t, -+ z0 = svldnf1sh_vnum_s64 (p0, x0, 8), -+ z0 = svldnf1sh_vnum_s64 (p0, x0, 8)) -+ -+/* -+** ldnf1sh_vnum_s64_m1: -+** ldnf1sh z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_vnum_s64_m1, svint64_t, int16_t, -+ z0 = svldnf1sh_vnum_s64 (p0, x0, -1), -+ z0 = svldnf1sh_vnum_s64 (p0, x0, -1)) -+ -+/* -+** ldnf1sh_vnum_s64_m8: -+** ldnf1sh z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_vnum_s64_m8, svint64_t, int16_t, -+ z0 = svldnf1sh_vnum_s64 (p0, x0, -8), -+ z0 = svldnf1sh_vnum_s64 (p0, x0, -8)) -+ -+/* -+** ldnf1sh_vnum_s64_m9: -+** decw x0, all, mul #9 -+** ldnf1sh z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_vnum_s64_m9, svint64_t, int16_t, -+ z0 = svldnf1sh_vnum_s64 (p0, x0, -9), -+ z0 = svldnf1sh_vnum_s64 (p0, x0, -9)) -+ -+/* -+** ldnf1sh_vnum_s64_x1: -+** cntw (x[0-9]+) -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldnf1sh z0\.d, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_vnum_s64_x1, svint64_t, int16_t, -+ z0 = svldnf1sh_vnum_s64 (p0, x0, x1), -+ z0 = svldnf1sh_vnum_s64 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_u32.c -new file mode 100644 -index 000000000..2f7718730 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_u32.c -@@ -0,0 +1,154 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldnf1sh_u32_base: -+** ldnf1sh z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_u32_base, svuint32_t, int16_t, -+ z0 = svldnf1sh_u32 (p0, x0), -+ z0 = svldnf1sh_u32 (p0, x0)) -+ -+/* -+** ldnf1sh_u32_index: -+** add (x[0-9]+), x0, x1, lsl 1 -+** ldnf1sh z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_u32_index, svuint32_t, int16_t, -+ z0 = svldnf1sh_u32 (p0, x0 + x1), -+ z0 = svldnf1sh_u32 (p0, x0 + x1)) -+ -+/* -+** ldnf1sh_u32_1: -+** ldnf1sh z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_u32_1, svuint32_t, int16_t, -+ z0 = svldnf1sh_u32 (p0, x0 + svcntw ()), -+ z0 = svldnf1sh_u32 (p0, x0 + svcntw ())) -+ -+/* -+** ldnf1sh_u32_7: -+** ldnf1sh z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_u32_7, svuint32_t, int16_t, -+ z0 = svldnf1sh_u32 (p0, x0 + svcntw () * 7), -+ z0 = svldnf1sh_u32 (p0, x0 + svcntw () * 7)) -+ -+/* -+** ldnf1sh_u32_8: -+** incb x0, all, mul #4 -+** ldnf1sh z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_u32_8, svuint32_t, int16_t, -+ z0 = svldnf1sh_u32 (p0, x0 + svcntw () * 8), -+ z0 = svldnf1sh_u32 (p0, x0 + svcntw () * 8)) -+ -+/* -+** ldnf1sh_u32_m1: -+** ldnf1sh z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_u32_m1, svuint32_t, int16_t, -+ z0 = svldnf1sh_u32 (p0, x0 - svcntw ()), -+ z0 = svldnf1sh_u32 (p0, x0 - svcntw ())) -+ -+/* -+** ldnf1sh_u32_m8: -+** ldnf1sh z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_u32_m8, svuint32_t, int16_t, -+ z0 = svldnf1sh_u32 (p0, x0 - svcntw () * 8), -+ z0 = svldnf1sh_u32 (p0, x0 - svcntw () * 8)) -+ -+/* -+** ldnf1sh_u32_m9: -+** dech x0, all, mul #9 -+** ldnf1sh z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_u32_m9, svuint32_t, int16_t, -+ z0 = svldnf1sh_u32 (p0, x0 - svcntw () * 9), -+ z0 = svldnf1sh_u32 (p0, x0 - svcntw () * 9)) -+ -+/* -+** ldnf1sh_vnum_u32_0: -+** ldnf1sh z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_vnum_u32_0, svuint32_t, int16_t, -+ z0 = svldnf1sh_vnum_u32 (p0, x0, 0), -+ z0 = svldnf1sh_vnum_u32 (p0, x0, 0)) -+ -+/* -+** ldnf1sh_vnum_u32_1: -+** ldnf1sh z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_vnum_u32_1, svuint32_t, int16_t, -+ z0 = svldnf1sh_vnum_u32 (p0, x0, 1), -+ z0 = svldnf1sh_vnum_u32 (p0, x0, 1)) -+ -+/* -+** ldnf1sh_vnum_u32_7: -+** ldnf1sh z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_vnum_u32_7, svuint32_t, int16_t, -+ z0 = svldnf1sh_vnum_u32 (p0, x0, 7), -+ z0 = svldnf1sh_vnum_u32 (p0, x0, 7)) -+ -+/* -+** ldnf1sh_vnum_u32_8: -+** incb x0, all, mul #4 -+** ldnf1sh z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_vnum_u32_8, svuint32_t, int16_t, -+ z0 = svldnf1sh_vnum_u32 (p0, x0, 8), -+ z0 = svldnf1sh_vnum_u32 (p0, x0, 8)) -+ -+/* -+** ldnf1sh_vnum_u32_m1: -+** ldnf1sh z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_vnum_u32_m1, svuint32_t, int16_t, -+ z0 = svldnf1sh_vnum_u32 (p0, x0, -1), -+ z0 = svldnf1sh_vnum_u32 (p0, x0, -1)) -+ -+/* -+** ldnf1sh_vnum_u32_m8: -+** ldnf1sh z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_vnum_u32_m8, svuint32_t, int16_t, -+ z0 = svldnf1sh_vnum_u32 (p0, x0, -8), -+ z0 = svldnf1sh_vnum_u32 (p0, x0, -8)) -+ -+/* -+** ldnf1sh_vnum_u32_m9: -+** dech x0, all, mul #9 -+** ldnf1sh z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_vnum_u32_m9, svuint32_t, int16_t, -+ z0 = svldnf1sh_vnum_u32 (p0, x0, -9), -+ z0 = svldnf1sh_vnum_u32 (p0, x0, -9)) -+ -+/* -+** ldnf1sh_vnum_u32_x1: -+** cnth (x[0-9]+) -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldnf1sh z0\.s, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_vnum_u32_x1, svuint32_t, int16_t, -+ z0 = svldnf1sh_vnum_u32 (p0, x0, x1), -+ z0 = svldnf1sh_vnum_u32 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_u64.c -new file mode 100644 -index 000000000..d7f1a68a4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sh_u64.c -@@ -0,0 +1,154 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldnf1sh_u64_base: -+** ldnf1sh z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_u64_base, svuint64_t, int16_t, -+ z0 = svldnf1sh_u64 (p0, x0), -+ z0 = svldnf1sh_u64 (p0, x0)) -+ -+/* -+** ldnf1sh_u64_index: -+** add (x[0-9]+), x0, x1, lsl 1 -+** ldnf1sh z0\.d, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_u64_index, svuint64_t, int16_t, -+ z0 = svldnf1sh_u64 (p0, x0 + x1), -+ z0 = svldnf1sh_u64 (p0, x0 + x1)) -+ -+/* -+** ldnf1sh_u64_1: -+** ldnf1sh z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_u64_1, svuint64_t, int16_t, -+ z0 = svldnf1sh_u64 (p0, x0 + svcntd ()), -+ z0 = svldnf1sh_u64 (p0, x0 + svcntd ())) -+ -+/* -+** ldnf1sh_u64_7: -+** ldnf1sh z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_u64_7, svuint64_t, int16_t, -+ z0 = svldnf1sh_u64 (p0, x0 + svcntd () * 7), -+ z0 = svldnf1sh_u64 (p0, x0 + svcntd () * 7)) -+ -+/* -+** ldnf1sh_u64_8: -+** incb x0, all, mul #2 -+** ldnf1sh z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_u64_8, svuint64_t, int16_t, -+ z0 = svldnf1sh_u64 (p0, x0 + svcntd () * 8), -+ z0 = svldnf1sh_u64 (p0, x0 + svcntd () * 8)) -+ -+/* -+** ldnf1sh_u64_m1: -+** ldnf1sh z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_u64_m1, svuint64_t, int16_t, -+ z0 = svldnf1sh_u64 (p0, x0 - svcntd ()), -+ z0 = svldnf1sh_u64 (p0, x0 - svcntd ())) -+ -+/* -+** ldnf1sh_u64_m8: -+** ldnf1sh z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_u64_m8, svuint64_t, int16_t, -+ z0 = svldnf1sh_u64 (p0, x0 - svcntd () * 8), -+ z0 = svldnf1sh_u64 (p0, x0 - svcntd () * 8)) -+ -+/* -+** ldnf1sh_u64_m9: -+** decw x0, all, mul #9 -+** ldnf1sh z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_u64_m9, svuint64_t, int16_t, -+ z0 = svldnf1sh_u64 (p0, x0 - svcntd () * 9), -+ z0 = svldnf1sh_u64 (p0, x0 - svcntd () * 9)) -+ -+/* -+** ldnf1sh_vnum_u64_0: -+** ldnf1sh z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_vnum_u64_0, svuint64_t, int16_t, -+ z0 = svldnf1sh_vnum_u64 (p0, x0, 0), -+ z0 = svldnf1sh_vnum_u64 (p0, x0, 0)) -+ -+/* -+** ldnf1sh_vnum_u64_1: -+** ldnf1sh z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_vnum_u64_1, svuint64_t, int16_t, -+ z0 = svldnf1sh_vnum_u64 (p0, x0, 1), -+ z0 = svldnf1sh_vnum_u64 (p0, x0, 1)) -+ -+/* -+** ldnf1sh_vnum_u64_7: -+** ldnf1sh z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_vnum_u64_7, svuint64_t, int16_t, -+ z0 = svldnf1sh_vnum_u64 (p0, x0, 7), -+ z0 = svldnf1sh_vnum_u64 (p0, x0, 7)) -+ -+/* -+** ldnf1sh_vnum_u64_8: -+** incb x0, all, mul #2 -+** ldnf1sh z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_vnum_u64_8, svuint64_t, int16_t, -+ z0 = svldnf1sh_vnum_u64 (p0, x0, 8), -+ z0 = svldnf1sh_vnum_u64 (p0, x0, 8)) -+ -+/* -+** ldnf1sh_vnum_u64_m1: -+** ldnf1sh z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_vnum_u64_m1, svuint64_t, int16_t, -+ z0 = svldnf1sh_vnum_u64 (p0, x0, -1), -+ z0 = svldnf1sh_vnum_u64 (p0, x0, -1)) -+ -+/* -+** ldnf1sh_vnum_u64_m8: -+** ldnf1sh z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_vnum_u64_m8, svuint64_t, int16_t, -+ z0 = svldnf1sh_vnum_u64 (p0, x0, -8), -+ z0 = svldnf1sh_vnum_u64 (p0, x0, -8)) -+ -+/* -+** ldnf1sh_vnum_u64_m9: -+** decw x0, all, mul #9 -+** ldnf1sh z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_vnum_u64_m9, svuint64_t, int16_t, -+ z0 = svldnf1sh_vnum_u64 (p0, x0, -9), -+ z0 = svldnf1sh_vnum_u64 (p0, x0, -9)) -+ -+/* -+** ldnf1sh_vnum_u64_x1: -+** cntw (x[0-9]+) -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldnf1sh z0\.d, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldnf1sh_vnum_u64_x1, svuint64_t, int16_t, -+ z0 = svldnf1sh_vnum_u64 (p0, x0, x1), -+ z0 = svldnf1sh_vnum_u64 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sw_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sw_s64.c -new file mode 100644 -index 000000000..5b483e4aa ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sw_s64.c -@@ -0,0 +1,154 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldnf1sw_s64_base: -+** ldnf1sw z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sw_s64_base, svint64_t, int32_t, -+ z0 = svldnf1sw_s64 (p0, x0), -+ z0 = svldnf1sw_s64 (p0, x0)) -+ -+/* -+** ldnf1sw_s64_index: -+** add (x[0-9]+), x0, x1, lsl 2 -+** ldnf1sw z0\.d, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ldnf1sw_s64_index, svint64_t, int32_t, -+ z0 = svldnf1sw_s64 (p0, x0 + x1), -+ z0 = svldnf1sw_s64 (p0, x0 + x1)) -+ -+/* -+** ldnf1sw_s64_1: -+** ldnf1sw z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sw_s64_1, svint64_t, int32_t, -+ z0 = svldnf1sw_s64 (p0, x0 + svcntd ()), -+ z0 = svldnf1sw_s64 (p0, x0 + svcntd ())) -+ -+/* -+** ldnf1sw_s64_7: -+** ldnf1sw z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sw_s64_7, svint64_t, int32_t, -+ z0 = svldnf1sw_s64 (p0, x0 + svcntd () * 7), -+ z0 = svldnf1sw_s64 (p0, x0 + svcntd () * 7)) -+ -+/* -+** ldnf1sw_s64_8: -+** incb x0, all, mul #4 -+** ldnf1sw z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sw_s64_8, svint64_t, int32_t, -+ z0 = svldnf1sw_s64 (p0, x0 + svcntd () * 8), -+ z0 = svldnf1sw_s64 (p0, x0 + svcntd () * 8)) -+ -+/* -+** ldnf1sw_s64_m1: -+** ldnf1sw z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sw_s64_m1, svint64_t, int32_t, -+ z0 = svldnf1sw_s64 (p0, x0 - svcntd ()), -+ z0 = svldnf1sw_s64 (p0, x0 - svcntd ())) -+ -+/* -+** ldnf1sw_s64_m8: -+** ldnf1sw z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sw_s64_m8, svint64_t, int32_t, -+ z0 = svldnf1sw_s64 (p0, x0 - svcntd () * 8), -+ z0 = svldnf1sw_s64 (p0, x0 - svcntd () * 8)) -+ -+/* -+** ldnf1sw_s64_m9: -+** dech x0, all, mul #9 -+** ldnf1sw z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sw_s64_m9, svint64_t, int32_t, -+ z0 = svldnf1sw_s64 (p0, x0 - svcntd () * 9), -+ z0 = svldnf1sw_s64 (p0, x0 - svcntd () * 9)) -+ -+/* -+** ldnf1sw_vnum_s64_0: -+** ldnf1sw z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sw_vnum_s64_0, svint64_t, int32_t, -+ z0 = svldnf1sw_vnum_s64 (p0, x0, 0), -+ z0 = svldnf1sw_vnum_s64 (p0, x0, 0)) -+ -+/* -+** ldnf1sw_vnum_s64_1: -+** ldnf1sw z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sw_vnum_s64_1, svint64_t, int32_t, -+ z0 = svldnf1sw_vnum_s64 (p0, x0, 1), -+ z0 = svldnf1sw_vnum_s64 (p0, x0, 1)) -+ -+/* -+** ldnf1sw_vnum_s64_7: -+** ldnf1sw z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sw_vnum_s64_7, svint64_t, int32_t, -+ z0 = svldnf1sw_vnum_s64 (p0, x0, 7), -+ z0 = svldnf1sw_vnum_s64 (p0, x0, 7)) -+ -+/* -+** ldnf1sw_vnum_s64_8: -+** incb x0, all, mul #4 -+** ldnf1sw z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sw_vnum_s64_8, svint64_t, int32_t, -+ z0 = svldnf1sw_vnum_s64 (p0, x0, 8), -+ z0 = svldnf1sw_vnum_s64 (p0, x0, 8)) -+ -+/* -+** ldnf1sw_vnum_s64_m1: -+** ldnf1sw z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sw_vnum_s64_m1, svint64_t, int32_t, -+ z0 = svldnf1sw_vnum_s64 (p0, x0, -1), -+ z0 = svldnf1sw_vnum_s64 (p0, x0, -1)) -+ -+/* -+** ldnf1sw_vnum_s64_m8: -+** ldnf1sw z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sw_vnum_s64_m8, svint64_t, int32_t, -+ z0 = svldnf1sw_vnum_s64 (p0, x0, -8), -+ z0 = svldnf1sw_vnum_s64 (p0, x0, -8)) -+ -+/* -+** ldnf1sw_vnum_s64_m9: -+** dech x0, all, mul #9 -+** ldnf1sw z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sw_vnum_s64_m9, svint64_t, int32_t, -+ z0 = svldnf1sw_vnum_s64 (p0, x0, -9), -+ z0 = svldnf1sw_vnum_s64 (p0, x0, -9)) -+ -+/* -+** ldnf1sw_vnum_s64_x1: -+** cnth (x[0-9]+) -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldnf1sw z0\.d, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldnf1sw_vnum_s64_x1, svint64_t, int32_t, -+ z0 = svldnf1sw_vnum_s64 (p0, x0, x1), -+ z0 = svldnf1sw_vnum_s64 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sw_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sw_u64.c -new file mode 100644 -index 000000000..62121ce0a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1sw_u64.c -@@ -0,0 +1,154 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldnf1sw_u64_base: -+** ldnf1sw z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sw_u64_base, svuint64_t, int32_t, -+ z0 = svldnf1sw_u64 (p0, x0), -+ z0 = svldnf1sw_u64 (p0, x0)) -+ -+/* -+** ldnf1sw_u64_index: -+** add (x[0-9]+), x0, x1, lsl 2 -+** ldnf1sw z0\.d, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ldnf1sw_u64_index, svuint64_t, int32_t, -+ z0 = svldnf1sw_u64 (p0, x0 + x1), -+ z0 = svldnf1sw_u64 (p0, x0 + x1)) -+ -+/* -+** ldnf1sw_u64_1: -+** ldnf1sw z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sw_u64_1, svuint64_t, int32_t, -+ z0 = svldnf1sw_u64 (p0, x0 + svcntd ()), -+ z0 = svldnf1sw_u64 (p0, x0 + svcntd ())) -+ -+/* -+** ldnf1sw_u64_7: -+** ldnf1sw z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sw_u64_7, svuint64_t, int32_t, -+ z0 = svldnf1sw_u64 (p0, x0 + svcntd () * 7), -+ z0 = svldnf1sw_u64 (p0, x0 + svcntd () * 7)) -+ -+/* -+** ldnf1sw_u64_8: -+** incb x0, all, mul #4 -+** ldnf1sw z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sw_u64_8, svuint64_t, int32_t, -+ z0 = svldnf1sw_u64 (p0, x0 + svcntd () * 8), -+ z0 = svldnf1sw_u64 (p0, x0 + svcntd () * 8)) -+ -+/* -+** ldnf1sw_u64_m1: -+** ldnf1sw z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sw_u64_m1, svuint64_t, int32_t, -+ z0 = svldnf1sw_u64 (p0, x0 - svcntd ()), -+ z0 = svldnf1sw_u64 (p0, x0 - svcntd ())) -+ -+/* -+** ldnf1sw_u64_m8: -+** ldnf1sw z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sw_u64_m8, svuint64_t, int32_t, -+ z0 = svldnf1sw_u64 (p0, x0 - svcntd () * 8), -+ z0 = svldnf1sw_u64 (p0, x0 - svcntd () * 8)) -+ -+/* -+** ldnf1sw_u64_m9: -+** dech x0, all, mul #9 -+** ldnf1sw z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sw_u64_m9, svuint64_t, int32_t, -+ z0 = svldnf1sw_u64 (p0, x0 - svcntd () * 9), -+ z0 = svldnf1sw_u64 (p0, x0 - svcntd () * 9)) -+ -+/* -+** ldnf1sw_vnum_u64_0: -+** ldnf1sw z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sw_vnum_u64_0, svuint64_t, int32_t, -+ z0 = svldnf1sw_vnum_u64 (p0, x0, 0), -+ z0 = svldnf1sw_vnum_u64 (p0, x0, 0)) -+ -+/* -+** ldnf1sw_vnum_u64_1: -+** ldnf1sw z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sw_vnum_u64_1, svuint64_t, int32_t, -+ z0 = svldnf1sw_vnum_u64 (p0, x0, 1), -+ z0 = svldnf1sw_vnum_u64 (p0, x0, 1)) -+ -+/* -+** ldnf1sw_vnum_u64_7: -+** ldnf1sw z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sw_vnum_u64_7, svuint64_t, int32_t, -+ z0 = svldnf1sw_vnum_u64 (p0, x0, 7), -+ z0 = svldnf1sw_vnum_u64 (p0, x0, 7)) -+ -+/* -+** ldnf1sw_vnum_u64_8: -+** incb x0, all, mul #4 -+** ldnf1sw z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sw_vnum_u64_8, svuint64_t, int32_t, -+ z0 = svldnf1sw_vnum_u64 (p0, x0, 8), -+ z0 = svldnf1sw_vnum_u64 (p0, x0, 8)) -+ -+/* -+** ldnf1sw_vnum_u64_m1: -+** ldnf1sw z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sw_vnum_u64_m1, svuint64_t, int32_t, -+ z0 = svldnf1sw_vnum_u64 (p0, x0, -1), -+ z0 = svldnf1sw_vnum_u64 (p0, x0, -1)) -+ -+/* -+** ldnf1sw_vnum_u64_m8: -+** ldnf1sw z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1sw_vnum_u64_m8, svuint64_t, int32_t, -+ z0 = svldnf1sw_vnum_u64 (p0, x0, -8), -+ z0 = svldnf1sw_vnum_u64 (p0, x0, -8)) -+ -+/* -+** ldnf1sw_vnum_u64_m9: -+** dech x0, all, mul #9 -+** ldnf1sw z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1sw_vnum_u64_m9, svuint64_t, int32_t, -+ z0 = svldnf1sw_vnum_u64 (p0, x0, -9), -+ z0 = svldnf1sw_vnum_u64 (p0, x0, -9)) -+ -+/* -+** ldnf1sw_vnum_u64_x1: -+** cnth (x[0-9]+) -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldnf1sw z0\.d, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldnf1sw_vnum_u64_x1, svuint64_t, int32_t, -+ z0 = svldnf1sw_vnum_u64 (p0, x0, x1), -+ z0 = svldnf1sw_vnum_u64 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_s16.c -new file mode 100644 -index 000000000..8fe13411f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_s16.c -@@ -0,0 +1,154 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldnf1ub_s16_base: -+** ldnf1b z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_s16_base, svint16_t, uint8_t, -+ z0 = svldnf1ub_s16 (p0, x0), -+ z0 = svldnf1ub_s16 (p0, x0)) -+ -+/* -+** ldnf1ub_s16_index: -+** add (x[0-9]+), x0, x1 -+** ldnf1b z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_s16_index, svint16_t, uint8_t, -+ z0 = svldnf1ub_s16 (p0, x0 + x1), -+ z0 = svldnf1ub_s16 (p0, x0 + x1)) -+ -+/* -+** ldnf1ub_s16_1: -+** ldnf1b z0\.h, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_s16_1, svint16_t, uint8_t, -+ z0 = svldnf1ub_s16 (p0, x0 + svcnth ()), -+ z0 = svldnf1ub_s16 (p0, x0 + svcnth ())) -+ -+/* -+** ldnf1ub_s16_7: -+** ldnf1b z0\.h, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_s16_7, svint16_t, uint8_t, -+ z0 = svldnf1ub_s16 (p0, x0 + svcnth () * 7), -+ z0 = svldnf1ub_s16 (p0, x0 + svcnth () * 7)) -+ -+/* -+** ldnf1ub_s16_8: -+** incb x0, all, mul #4 -+** ldnf1b z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_s16_8, svint16_t, uint8_t, -+ z0 = svldnf1ub_s16 (p0, x0 + svcnth () * 8), -+ z0 = svldnf1ub_s16 (p0, x0 + svcnth () * 8)) -+ -+/* -+** ldnf1ub_s16_m1: -+** ldnf1b z0\.h, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_s16_m1, svint16_t, uint8_t, -+ z0 = svldnf1ub_s16 (p0, x0 - svcnth ()), -+ z0 = svldnf1ub_s16 (p0, x0 - svcnth ())) -+ -+/* -+** ldnf1ub_s16_m8: -+** ldnf1b z0\.h, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_s16_m8, svint16_t, uint8_t, -+ z0 = svldnf1ub_s16 (p0, x0 - svcnth () * 8), -+ z0 = svldnf1ub_s16 (p0, x0 - svcnth () * 8)) -+ -+/* -+** ldnf1ub_s16_m9: -+** dech x0, all, mul #9 -+** ldnf1b z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_s16_m9, svint16_t, uint8_t, -+ z0 = svldnf1ub_s16 (p0, x0 - svcnth () * 9), -+ z0 = svldnf1ub_s16 (p0, x0 - svcnth () * 9)) -+ -+/* -+** ldnf1ub_vnum_s16_0: -+** ldnf1b z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_vnum_s16_0, svint16_t, uint8_t, -+ z0 = svldnf1ub_vnum_s16 (p0, x0, 0), -+ z0 = svldnf1ub_vnum_s16 (p0, x0, 0)) -+ -+/* -+** ldnf1ub_vnum_s16_1: -+** ldnf1b z0\.h, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_vnum_s16_1, svint16_t, uint8_t, -+ z0 = svldnf1ub_vnum_s16 (p0, x0, 1), -+ z0 = svldnf1ub_vnum_s16 (p0, x0, 1)) -+ -+/* -+** ldnf1ub_vnum_s16_7: -+** ldnf1b z0\.h, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_vnum_s16_7, svint16_t, uint8_t, -+ z0 = svldnf1ub_vnum_s16 (p0, x0, 7), -+ z0 = svldnf1ub_vnum_s16 (p0, x0, 7)) -+ -+/* -+** ldnf1ub_vnum_s16_8: -+** incb x0, all, mul #4 -+** ldnf1b z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_vnum_s16_8, svint16_t, uint8_t, -+ z0 = svldnf1ub_vnum_s16 (p0, x0, 8), -+ z0 = svldnf1ub_vnum_s16 (p0, x0, 8)) -+ -+/* -+** ldnf1ub_vnum_s16_m1: -+** ldnf1b z0\.h, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_vnum_s16_m1, svint16_t, uint8_t, -+ z0 = svldnf1ub_vnum_s16 (p0, x0, -1), -+ z0 = svldnf1ub_vnum_s16 (p0, x0, -1)) -+ -+/* -+** ldnf1ub_vnum_s16_m8: -+** ldnf1b z0\.h, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_vnum_s16_m8, svint16_t, uint8_t, -+ z0 = svldnf1ub_vnum_s16 (p0, x0, -8), -+ z0 = svldnf1ub_vnum_s16 (p0, x0, -8)) -+ -+/* -+** ldnf1ub_vnum_s16_m9: -+** dech x0, all, mul #9 -+** ldnf1b z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_vnum_s16_m9, svint16_t, uint8_t, -+ z0 = svldnf1ub_vnum_s16 (p0, x0, -9), -+ z0 = svldnf1ub_vnum_s16 (p0, x0, -9)) -+ -+/* -+** ldnf1ub_vnum_s16_x1: -+** cnth (x[0-9]+) -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldnf1b z0\.h, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_vnum_s16_x1, svint16_t, uint8_t, -+ z0 = svldnf1ub_vnum_s16 (p0, x0, x1), -+ z0 = svldnf1ub_vnum_s16 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_s32.c -new file mode 100644 -index 000000000..50122e3b7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_s32.c -@@ -0,0 +1,154 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldnf1ub_s32_base: -+** ldnf1b z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_s32_base, svint32_t, uint8_t, -+ z0 = svldnf1ub_s32 (p0, x0), -+ z0 = svldnf1ub_s32 (p0, x0)) -+ -+/* -+** ldnf1ub_s32_index: -+** add (x[0-9]+), x0, x1 -+** ldnf1b z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_s32_index, svint32_t, uint8_t, -+ z0 = svldnf1ub_s32 (p0, x0 + x1), -+ z0 = svldnf1ub_s32 (p0, x0 + x1)) -+ -+/* -+** ldnf1ub_s32_1: -+** ldnf1b z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_s32_1, svint32_t, uint8_t, -+ z0 = svldnf1ub_s32 (p0, x0 + svcntw ()), -+ z0 = svldnf1ub_s32 (p0, x0 + svcntw ())) -+ -+/* -+** ldnf1ub_s32_7: -+** ldnf1b z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_s32_7, svint32_t, uint8_t, -+ z0 = svldnf1ub_s32 (p0, x0 + svcntw () * 7), -+ z0 = svldnf1ub_s32 (p0, x0 + svcntw () * 7)) -+ -+/* -+** ldnf1ub_s32_8: -+** incb x0, all, mul #2 -+** ldnf1b z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_s32_8, svint32_t, uint8_t, -+ z0 = svldnf1ub_s32 (p0, x0 + svcntw () * 8), -+ z0 = svldnf1ub_s32 (p0, x0 + svcntw () * 8)) -+ -+/* -+** ldnf1ub_s32_m1: -+** ldnf1b z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_s32_m1, svint32_t, uint8_t, -+ z0 = svldnf1ub_s32 (p0, x0 - svcntw ()), -+ z0 = svldnf1ub_s32 (p0, x0 - svcntw ())) -+ -+/* -+** ldnf1ub_s32_m8: -+** ldnf1b z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_s32_m8, svint32_t, uint8_t, -+ z0 = svldnf1ub_s32 (p0, x0 - svcntw () * 8), -+ z0 = svldnf1ub_s32 (p0, x0 - svcntw () * 8)) -+ -+/* -+** ldnf1ub_s32_m9: -+** decw x0, all, mul #9 -+** ldnf1b z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_s32_m9, svint32_t, uint8_t, -+ z0 = svldnf1ub_s32 (p0, x0 - svcntw () * 9), -+ z0 = svldnf1ub_s32 (p0, x0 - svcntw () * 9)) -+ -+/* -+** ldnf1ub_vnum_s32_0: -+** ldnf1b z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_vnum_s32_0, svint32_t, uint8_t, -+ z0 = svldnf1ub_vnum_s32 (p0, x0, 0), -+ z0 = svldnf1ub_vnum_s32 (p0, x0, 0)) -+ -+/* -+** ldnf1ub_vnum_s32_1: -+** ldnf1b z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_vnum_s32_1, svint32_t, uint8_t, -+ z0 = svldnf1ub_vnum_s32 (p0, x0, 1), -+ z0 = svldnf1ub_vnum_s32 (p0, x0, 1)) -+ -+/* -+** ldnf1ub_vnum_s32_7: -+** ldnf1b z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_vnum_s32_7, svint32_t, uint8_t, -+ z0 = svldnf1ub_vnum_s32 (p0, x0, 7), -+ z0 = svldnf1ub_vnum_s32 (p0, x0, 7)) -+ -+/* -+** ldnf1ub_vnum_s32_8: -+** incb x0, all, mul #2 -+** ldnf1b z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_vnum_s32_8, svint32_t, uint8_t, -+ z0 = svldnf1ub_vnum_s32 (p0, x0, 8), -+ z0 = svldnf1ub_vnum_s32 (p0, x0, 8)) -+ -+/* -+** ldnf1ub_vnum_s32_m1: -+** ldnf1b z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_vnum_s32_m1, svint32_t, uint8_t, -+ z0 = svldnf1ub_vnum_s32 (p0, x0, -1), -+ z0 = svldnf1ub_vnum_s32 (p0, x0, -1)) -+ -+/* -+** ldnf1ub_vnum_s32_m8: -+** ldnf1b z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_vnum_s32_m8, svint32_t, uint8_t, -+ z0 = svldnf1ub_vnum_s32 (p0, x0, -8), -+ z0 = svldnf1ub_vnum_s32 (p0, x0, -8)) -+ -+/* -+** ldnf1ub_vnum_s32_m9: -+** decw x0, all, mul #9 -+** ldnf1b z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_vnum_s32_m9, svint32_t, uint8_t, -+ z0 = svldnf1ub_vnum_s32 (p0, x0, -9), -+ z0 = svldnf1ub_vnum_s32 (p0, x0, -9)) -+ -+/* -+** ldnf1ub_vnum_s32_x1: -+** cntw (x[0-9]+) -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldnf1b z0\.s, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_vnum_s32_x1, svint32_t, uint8_t, -+ z0 = svldnf1ub_vnum_s32 (p0, x0, x1), -+ z0 = svldnf1ub_vnum_s32 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_s64.c -new file mode 100644 -index 000000000..d7cce11b6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_s64.c -@@ -0,0 +1,154 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldnf1ub_s64_base: -+** ldnf1b z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_s64_base, svint64_t, uint8_t, -+ z0 = svldnf1ub_s64 (p0, x0), -+ z0 = svldnf1ub_s64 (p0, x0)) -+ -+/* -+** ldnf1ub_s64_index: -+** add (x[0-9]+), x0, x1 -+** ldnf1b z0\.d, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_s64_index, svint64_t, uint8_t, -+ z0 = svldnf1ub_s64 (p0, x0 + x1), -+ z0 = svldnf1ub_s64 (p0, x0 + x1)) -+ -+/* -+** ldnf1ub_s64_1: -+** ldnf1b z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_s64_1, svint64_t, uint8_t, -+ z0 = svldnf1ub_s64 (p0, x0 + svcntd ()), -+ z0 = svldnf1ub_s64 (p0, x0 + svcntd ())) -+ -+/* -+** ldnf1ub_s64_7: -+** ldnf1b z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_s64_7, svint64_t, uint8_t, -+ z0 = svldnf1ub_s64 (p0, x0 + svcntd () * 7), -+ z0 = svldnf1ub_s64 (p0, x0 + svcntd () * 7)) -+ -+/* -+** ldnf1ub_s64_8: -+** incb x0 -+** ldnf1b z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_s64_8, svint64_t, uint8_t, -+ z0 = svldnf1ub_s64 (p0, x0 + svcntd () * 8), -+ z0 = svldnf1ub_s64 (p0, x0 + svcntd () * 8)) -+ -+/* -+** ldnf1ub_s64_m1: -+** ldnf1b z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_s64_m1, svint64_t, uint8_t, -+ z0 = svldnf1ub_s64 (p0, x0 - svcntd ()), -+ z0 = svldnf1ub_s64 (p0, x0 - svcntd ())) -+ -+/* -+** ldnf1ub_s64_m8: -+** ldnf1b z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_s64_m8, svint64_t, uint8_t, -+ z0 = svldnf1ub_s64 (p0, x0 - svcntd () * 8), -+ z0 = svldnf1ub_s64 (p0, x0 - svcntd () * 8)) -+ -+/* -+** ldnf1ub_s64_m9: -+** decd x0, all, mul #9 -+** ldnf1b z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_s64_m9, svint64_t, uint8_t, -+ z0 = svldnf1ub_s64 (p0, x0 - svcntd () * 9), -+ z0 = svldnf1ub_s64 (p0, x0 - svcntd () * 9)) -+ -+/* -+** ldnf1ub_vnum_s64_0: -+** ldnf1b z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_vnum_s64_0, svint64_t, uint8_t, -+ z0 = svldnf1ub_vnum_s64 (p0, x0, 0), -+ z0 = svldnf1ub_vnum_s64 (p0, x0, 0)) -+ -+/* -+** ldnf1ub_vnum_s64_1: -+** ldnf1b z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_vnum_s64_1, svint64_t, uint8_t, -+ z0 = svldnf1ub_vnum_s64 (p0, x0, 1), -+ z0 = svldnf1ub_vnum_s64 (p0, x0, 1)) -+ -+/* -+** ldnf1ub_vnum_s64_7: -+** ldnf1b z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_vnum_s64_7, svint64_t, uint8_t, -+ z0 = svldnf1ub_vnum_s64 (p0, x0, 7), -+ z0 = svldnf1ub_vnum_s64 (p0, x0, 7)) -+ -+/* -+** ldnf1ub_vnum_s64_8: -+** incb x0 -+** ldnf1b z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_vnum_s64_8, svint64_t, uint8_t, -+ z0 = svldnf1ub_vnum_s64 (p0, x0, 8), -+ z0 = svldnf1ub_vnum_s64 (p0, x0, 8)) -+ -+/* -+** ldnf1ub_vnum_s64_m1: -+** ldnf1b z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_vnum_s64_m1, svint64_t, uint8_t, -+ z0 = svldnf1ub_vnum_s64 (p0, x0, -1), -+ z0 = svldnf1ub_vnum_s64 (p0, x0, -1)) -+ -+/* -+** ldnf1ub_vnum_s64_m8: -+** ldnf1b z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_vnum_s64_m8, svint64_t, uint8_t, -+ z0 = svldnf1ub_vnum_s64 (p0, x0, -8), -+ z0 = svldnf1ub_vnum_s64 (p0, x0, -8)) -+ -+/* -+** ldnf1ub_vnum_s64_m9: -+** decd x0, all, mul #9 -+** ldnf1b z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_vnum_s64_m9, svint64_t, uint8_t, -+ z0 = svldnf1ub_vnum_s64 (p0, x0, -9), -+ z0 = svldnf1ub_vnum_s64 (p0, x0, -9)) -+ -+/* -+** ldnf1ub_vnum_s64_x1: -+** cntd (x[0-9]+) -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldnf1b z0\.d, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_vnum_s64_x1, svint64_t, uint8_t, -+ z0 = svldnf1ub_vnum_s64 (p0, x0, x1), -+ z0 = svldnf1ub_vnum_s64 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_u16.c -new file mode 100644 -index 000000000..7bf82c3b6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_u16.c -@@ -0,0 +1,154 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldnf1ub_u16_base: -+** ldnf1b z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_u16_base, svuint16_t, uint8_t, -+ z0 = svldnf1ub_u16 (p0, x0), -+ z0 = svldnf1ub_u16 (p0, x0)) -+ -+/* -+** ldnf1ub_u16_index: -+** add (x[0-9]+), x0, x1 -+** ldnf1b z0\.h, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_u16_index, svuint16_t, uint8_t, -+ z0 = svldnf1ub_u16 (p0, x0 + x1), -+ z0 = svldnf1ub_u16 (p0, x0 + x1)) -+ -+/* -+** ldnf1ub_u16_1: -+** ldnf1b z0\.h, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_u16_1, svuint16_t, uint8_t, -+ z0 = svldnf1ub_u16 (p0, x0 + svcnth ()), -+ z0 = svldnf1ub_u16 (p0, x0 + svcnth ())) -+ -+/* -+** ldnf1ub_u16_7: -+** ldnf1b z0\.h, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_u16_7, svuint16_t, uint8_t, -+ z0 = svldnf1ub_u16 (p0, x0 + svcnth () * 7), -+ z0 = svldnf1ub_u16 (p0, x0 + svcnth () * 7)) -+ -+/* -+** ldnf1ub_u16_8: -+** incb x0, all, mul #4 -+** ldnf1b z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_u16_8, svuint16_t, uint8_t, -+ z0 = svldnf1ub_u16 (p0, x0 + svcnth () * 8), -+ z0 = svldnf1ub_u16 (p0, x0 + svcnth () * 8)) -+ -+/* -+** ldnf1ub_u16_m1: -+** ldnf1b z0\.h, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_u16_m1, svuint16_t, uint8_t, -+ z0 = svldnf1ub_u16 (p0, x0 - svcnth ()), -+ z0 = svldnf1ub_u16 (p0, x0 - svcnth ())) -+ -+/* -+** ldnf1ub_u16_m8: -+** ldnf1b z0\.h, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_u16_m8, svuint16_t, uint8_t, -+ z0 = svldnf1ub_u16 (p0, x0 - svcnth () * 8), -+ z0 = svldnf1ub_u16 (p0, x0 - svcnth () * 8)) -+ -+/* -+** ldnf1ub_u16_m9: -+** dech x0, all, mul #9 -+** ldnf1b z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_u16_m9, svuint16_t, uint8_t, -+ z0 = svldnf1ub_u16 (p0, x0 - svcnth () * 9), -+ z0 = svldnf1ub_u16 (p0, x0 - svcnth () * 9)) -+ -+/* -+** ldnf1ub_vnum_u16_0: -+** ldnf1b z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_vnum_u16_0, svuint16_t, uint8_t, -+ z0 = svldnf1ub_vnum_u16 (p0, x0, 0), -+ z0 = svldnf1ub_vnum_u16 (p0, x0, 0)) -+ -+/* -+** ldnf1ub_vnum_u16_1: -+** ldnf1b z0\.h, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_vnum_u16_1, svuint16_t, uint8_t, -+ z0 = svldnf1ub_vnum_u16 (p0, x0, 1), -+ z0 = svldnf1ub_vnum_u16 (p0, x0, 1)) -+ -+/* -+** ldnf1ub_vnum_u16_7: -+** ldnf1b z0\.h, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_vnum_u16_7, svuint16_t, uint8_t, -+ z0 = svldnf1ub_vnum_u16 (p0, x0, 7), -+ z0 = svldnf1ub_vnum_u16 (p0, x0, 7)) -+ -+/* -+** ldnf1ub_vnum_u16_8: -+** incb x0, all, mul #4 -+** ldnf1b z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_vnum_u16_8, svuint16_t, uint8_t, -+ z0 = svldnf1ub_vnum_u16 (p0, x0, 8), -+ z0 = svldnf1ub_vnum_u16 (p0, x0, 8)) -+ -+/* -+** ldnf1ub_vnum_u16_m1: -+** ldnf1b z0\.h, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_vnum_u16_m1, svuint16_t, uint8_t, -+ z0 = svldnf1ub_vnum_u16 (p0, x0, -1), -+ z0 = svldnf1ub_vnum_u16 (p0, x0, -1)) -+ -+/* -+** ldnf1ub_vnum_u16_m8: -+** ldnf1b z0\.h, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_vnum_u16_m8, svuint16_t, uint8_t, -+ z0 = svldnf1ub_vnum_u16 (p0, x0, -8), -+ z0 = svldnf1ub_vnum_u16 (p0, x0, -8)) -+ -+/* -+** ldnf1ub_vnum_u16_m9: -+** dech x0, all, mul #9 -+** ldnf1b z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_vnum_u16_m9, svuint16_t, uint8_t, -+ z0 = svldnf1ub_vnum_u16 (p0, x0, -9), -+ z0 = svldnf1ub_vnum_u16 (p0, x0, -9)) -+ -+/* -+** ldnf1ub_vnum_u16_x1: -+** cnth (x[0-9]+) -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldnf1b z0\.h, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_vnum_u16_x1, svuint16_t, uint8_t, -+ z0 = svldnf1ub_vnum_u16 (p0, x0, x1), -+ z0 = svldnf1ub_vnum_u16 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_u32.c -new file mode 100644 -index 000000000..e2fef064b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_u32.c -@@ -0,0 +1,154 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldnf1ub_u32_base: -+** ldnf1b z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_u32_base, svuint32_t, uint8_t, -+ z0 = svldnf1ub_u32 (p0, x0), -+ z0 = svldnf1ub_u32 (p0, x0)) -+ -+/* -+** ldnf1ub_u32_index: -+** add (x[0-9]+), x0, x1 -+** ldnf1b z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_u32_index, svuint32_t, uint8_t, -+ z0 = svldnf1ub_u32 (p0, x0 + x1), -+ z0 = svldnf1ub_u32 (p0, x0 + x1)) -+ -+/* -+** ldnf1ub_u32_1: -+** ldnf1b z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_u32_1, svuint32_t, uint8_t, -+ z0 = svldnf1ub_u32 (p0, x0 + svcntw ()), -+ z0 = svldnf1ub_u32 (p0, x0 + svcntw ())) -+ -+/* -+** ldnf1ub_u32_7: -+** ldnf1b z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_u32_7, svuint32_t, uint8_t, -+ z0 = svldnf1ub_u32 (p0, x0 + svcntw () * 7), -+ z0 = svldnf1ub_u32 (p0, x0 + svcntw () * 7)) -+ -+/* -+** ldnf1ub_u32_8: -+** incb x0, all, mul #2 -+** ldnf1b z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_u32_8, svuint32_t, uint8_t, -+ z0 = svldnf1ub_u32 (p0, x0 + svcntw () * 8), -+ z0 = svldnf1ub_u32 (p0, x0 + svcntw () * 8)) -+ -+/* -+** ldnf1ub_u32_m1: -+** ldnf1b z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_u32_m1, svuint32_t, uint8_t, -+ z0 = svldnf1ub_u32 (p0, x0 - svcntw ()), -+ z0 = svldnf1ub_u32 (p0, x0 - svcntw ())) -+ -+/* -+** ldnf1ub_u32_m8: -+** ldnf1b z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_u32_m8, svuint32_t, uint8_t, -+ z0 = svldnf1ub_u32 (p0, x0 - svcntw () * 8), -+ z0 = svldnf1ub_u32 (p0, x0 - svcntw () * 8)) -+ -+/* -+** ldnf1ub_u32_m9: -+** decw x0, all, mul #9 -+** ldnf1b z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_u32_m9, svuint32_t, uint8_t, -+ z0 = svldnf1ub_u32 (p0, x0 - svcntw () * 9), -+ z0 = svldnf1ub_u32 (p0, x0 - svcntw () * 9)) -+ -+/* -+** ldnf1ub_vnum_u32_0: -+** ldnf1b z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_vnum_u32_0, svuint32_t, uint8_t, -+ z0 = svldnf1ub_vnum_u32 (p0, x0, 0), -+ z0 = svldnf1ub_vnum_u32 (p0, x0, 0)) -+ -+/* -+** ldnf1ub_vnum_u32_1: -+** ldnf1b z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_vnum_u32_1, svuint32_t, uint8_t, -+ z0 = svldnf1ub_vnum_u32 (p0, x0, 1), -+ z0 = svldnf1ub_vnum_u32 (p0, x0, 1)) -+ -+/* -+** ldnf1ub_vnum_u32_7: -+** ldnf1b z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_vnum_u32_7, svuint32_t, uint8_t, -+ z0 = svldnf1ub_vnum_u32 (p0, x0, 7), -+ z0 = svldnf1ub_vnum_u32 (p0, x0, 7)) -+ -+/* -+** ldnf1ub_vnum_u32_8: -+** incb x0, all, mul #2 -+** ldnf1b z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_vnum_u32_8, svuint32_t, uint8_t, -+ z0 = svldnf1ub_vnum_u32 (p0, x0, 8), -+ z0 = svldnf1ub_vnum_u32 (p0, x0, 8)) -+ -+/* -+** ldnf1ub_vnum_u32_m1: -+** ldnf1b z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_vnum_u32_m1, svuint32_t, uint8_t, -+ z0 = svldnf1ub_vnum_u32 (p0, x0, -1), -+ z0 = svldnf1ub_vnum_u32 (p0, x0, -1)) -+ -+/* -+** ldnf1ub_vnum_u32_m8: -+** ldnf1b z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_vnum_u32_m8, svuint32_t, uint8_t, -+ z0 = svldnf1ub_vnum_u32 (p0, x0, -8), -+ z0 = svldnf1ub_vnum_u32 (p0, x0, -8)) -+ -+/* -+** ldnf1ub_vnum_u32_m9: -+** decw x0, all, mul #9 -+** ldnf1b z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_vnum_u32_m9, svuint32_t, uint8_t, -+ z0 = svldnf1ub_vnum_u32 (p0, x0, -9), -+ z0 = svldnf1ub_vnum_u32 (p0, x0, -9)) -+ -+/* -+** ldnf1ub_vnum_u32_x1: -+** cntw (x[0-9]+) -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldnf1b z0\.s, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_vnum_u32_x1, svuint32_t, uint8_t, -+ z0 = svldnf1ub_vnum_u32 (p0, x0, x1), -+ z0 = svldnf1ub_vnum_u32 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_u64.c -new file mode 100644 -index 000000000..57c61e122 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1ub_u64.c -@@ -0,0 +1,154 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldnf1ub_u64_base: -+** ldnf1b z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_u64_base, svuint64_t, uint8_t, -+ z0 = svldnf1ub_u64 (p0, x0), -+ z0 = svldnf1ub_u64 (p0, x0)) -+ -+/* -+** ldnf1ub_u64_index: -+** add (x[0-9]+), x0, x1 -+** ldnf1b z0\.d, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_u64_index, svuint64_t, uint8_t, -+ z0 = svldnf1ub_u64 (p0, x0 + x1), -+ z0 = svldnf1ub_u64 (p0, x0 + x1)) -+ -+/* -+** ldnf1ub_u64_1: -+** ldnf1b z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_u64_1, svuint64_t, uint8_t, -+ z0 = svldnf1ub_u64 (p0, x0 + svcntd ()), -+ z0 = svldnf1ub_u64 (p0, x0 + svcntd ())) -+ -+/* -+** ldnf1ub_u64_7: -+** ldnf1b z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_u64_7, svuint64_t, uint8_t, -+ z0 = svldnf1ub_u64 (p0, x0 + svcntd () * 7), -+ z0 = svldnf1ub_u64 (p0, x0 + svcntd () * 7)) -+ -+/* -+** ldnf1ub_u64_8: -+** incb x0 -+** ldnf1b z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_u64_8, svuint64_t, uint8_t, -+ z0 = svldnf1ub_u64 (p0, x0 + svcntd () * 8), -+ z0 = svldnf1ub_u64 (p0, x0 + svcntd () * 8)) -+ -+/* -+** ldnf1ub_u64_m1: -+** ldnf1b z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_u64_m1, svuint64_t, uint8_t, -+ z0 = svldnf1ub_u64 (p0, x0 - svcntd ()), -+ z0 = svldnf1ub_u64 (p0, x0 - svcntd ())) -+ -+/* -+** ldnf1ub_u64_m8: -+** ldnf1b z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_u64_m8, svuint64_t, uint8_t, -+ z0 = svldnf1ub_u64 (p0, x0 - svcntd () * 8), -+ z0 = svldnf1ub_u64 (p0, x0 - svcntd () * 8)) -+ -+/* -+** ldnf1ub_u64_m9: -+** decd x0, all, mul #9 -+** ldnf1b z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_u64_m9, svuint64_t, uint8_t, -+ z0 = svldnf1ub_u64 (p0, x0 - svcntd () * 9), -+ z0 = svldnf1ub_u64 (p0, x0 - svcntd () * 9)) -+ -+/* -+** ldnf1ub_vnum_u64_0: -+** ldnf1b z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_vnum_u64_0, svuint64_t, uint8_t, -+ z0 = svldnf1ub_vnum_u64 (p0, x0, 0), -+ z0 = svldnf1ub_vnum_u64 (p0, x0, 0)) -+ -+/* -+** ldnf1ub_vnum_u64_1: -+** ldnf1b z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_vnum_u64_1, svuint64_t, uint8_t, -+ z0 = svldnf1ub_vnum_u64 (p0, x0, 1), -+ z0 = svldnf1ub_vnum_u64 (p0, x0, 1)) -+ -+/* -+** ldnf1ub_vnum_u64_7: -+** ldnf1b z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_vnum_u64_7, svuint64_t, uint8_t, -+ z0 = svldnf1ub_vnum_u64 (p0, x0, 7), -+ z0 = svldnf1ub_vnum_u64 (p0, x0, 7)) -+ -+/* -+** ldnf1ub_vnum_u64_8: -+** incb x0 -+** ldnf1b z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_vnum_u64_8, svuint64_t, uint8_t, -+ z0 = svldnf1ub_vnum_u64 (p0, x0, 8), -+ z0 = svldnf1ub_vnum_u64 (p0, x0, 8)) -+ -+/* -+** ldnf1ub_vnum_u64_m1: -+** ldnf1b z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_vnum_u64_m1, svuint64_t, uint8_t, -+ z0 = svldnf1ub_vnum_u64 (p0, x0, -1), -+ z0 = svldnf1ub_vnum_u64 (p0, x0, -1)) -+ -+/* -+** ldnf1ub_vnum_u64_m8: -+** ldnf1b z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_vnum_u64_m8, svuint64_t, uint8_t, -+ z0 = svldnf1ub_vnum_u64 (p0, x0, -8), -+ z0 = svldnf1ub_vnum_u64 (p0, x0, -8)) -+ -+/* -+** ldnf1ub_vnum_u64_m9: -+** decd x0, all, mul #9 -+** ldnf1b z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_vnum_u64_m9, svuint64_t, uint8_t, -+ z0 = svldnf1ub_vnum_u64 (p0, x0, -9), -+ z0 = svldnf1ub_vnum_u64 (p0, x0, -9)) -+ -+/* -+** ldnf1ub_vnum_u64_x1: -+** cntd (x[0-9]+) -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldnf1b z0\.d, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldnf1ub_vnum_u64_x1, svuint64_t, uint8_t, -+ z0 = svldnf1ub_vnum_u64 (p0, x0, x1), -+ z0 = svldnf1ub_vnum_u64 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_s32.c -new file mode 100644 -index 000000000..ed9686c4e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_s32.c -@@ -0,0 +1,154 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldnf1uh_s32_base: -+** ldnf1h z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_s32_base, svint32_t, uint16_t, -+ z0 = svldnf1uh_s32 (p0, x0), -+ z0 = svldnf1uh_s32 (p0, x0)) -+ -+/* -+** ldnf1uh_s32_index: -+** add (x[0-9]+), x0, x1, lsl 1 -+** ldnf1h z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_s32_index, svint32_t, uint16_t, -+ z0 = svldnf1uh_s32 (p0, x0 + x1), -+ z0 = svldnf1uh_s32 (p0, x0 + x1)) -+ -+/* -+** ldnf1uh_s32_1: -+** ldnf1h z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_s32_1, svint32_t, uint16_t, -+ z0 = svldnf1uh_s32 (p0, x0 + svcntw ()), -+ z0 = svldnf1uh_s32 (p0, x0 + svcntw ())) -+ -+/* -+** ldnf1uh_s32_7: -+** ldnf1h z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_s32_7, svint32_t, uint16_t, -+ z0 = svldnf1uh_s32 (p0, x0 + svcntw () * 7), -+ z0 = svldnf1uh_s32 (p0, x0 + svcntw () * 7)) -+ -+/* -+** ldnf1uh_s32_8: -+** incb x0, all, mul #4 -+** ldnf1h z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_s32_8, svint32_t, uint16_t, -+ z0 = svldnf1uh_s32 (p0, x0 + svcntw () * 8), -+ z0 = svldnf1uh_s32 (p0, x0 + svcntw () * 8)) -+ -+/* -+** ldnf1uh_s32_m1: -+** ldnf1h z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_s32_m1, svint32_t, uint16_t, -+ z0 = svldnf1uh_s32 (p0, x0 - svcntw ()), -+ z0 = svldnf1uh_s32 (p0, x0 - svcntw ())) -+ -+/* -+** ldnf1uh_s32_m8: -+** ldnf1h z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_s32_m8, svint32_t, uint16_t, -+ z0 = svldnf1uh_s32 (p0, x0 - svcntw () * 8), -+ z0 = svldnf1uh_s32 (p0, x0 - svcntw () * 8)) -+ -+/* -+** ldnf1uh_s32_m9: -+** dech x0, all, mul #9 -+** ldnf1h z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_s32_m9, svint32_t, uint16_t, -+ z0 = svldnf1uh_s32 (p0, x0 - svcntw () * 9), -+ z0 = svldnf1uh_s32 (p0, x0 - svcntw () * 9)) -+ -+/* -+** ldnf1uh_vnum_s32_0: -+** ldnf1h z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_vnum_s32_0, svint32_t, uint16_t, -+ z0 = svldnf1uh_vnum_s32 (p0, x0, 0), -+ z0 = svldnf1uh_vnum_s32 (p0, x0, 0)) -+ -+/* -+** ldnf1uh_vnum_s32_1: -+** ldnf1h z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_vnum_s32_1, svint32_t, uint16_t, -+ z0 = svldnf1uh_vnum_s32 (p0, x0, 1), -+ z0 = svldnf1uh_vnum_s32 (p0, x0, 1)) -+ -+/* -+** ldnf1uh_vnum_s32_7: -+** ldnf1h z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_vnum_s32_7, svint32_t, uint16_t, -+ z0 = svldnf1uh_vnum_s32 (p0, x0, 7), -+ z0 = svldnf1uh_vnum_s32 (p0, x0, 7)) -+ -+/* -+** ldnf1uh_vnum_s32_8: -+** incb x0, all, mul #4 -+** ldnf1h z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_vnum_s32_8, svint32_t, uint16_t, -+ z0 = svldnf1uh_vnum_s32 (p0, x0, 8), -+ z0 = svldnf1uh_vnum_s32 (p0, x0, 8)) -+ -+/* -+** ldnf1uh_vnum_s32_m1: -+** ldnf1h z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_vnum_s32_m1, svint32_t, uint16_t, -+ z0 = svldnf1uh_vnum_s32 (p0, x0, -1), -+ z0 = svldnf1uh_vnum_s32 (p0, x0, -1)) -+ -+/* -+** ldnf1uh_vnum_s32_m8: -+** ldnf1h z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_vnum_s32_m8, svint32_t, uint16_t, -+ z0 = svldnf1uh_vnum_s32 (p0, x0, -8), -+ z0 = svldnf1uh_vnum_s32 (p0, x0, -8)) -+ -+/* -+** ldnf1uh_vnum_s32_m9: -+** dech x0, all, mul #9 -+** ldnf1h z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_vnum_s32_m9, svint32_t, uint16_t, -+ z0 = svldnf1uh_vnum_s32 (p0, x0, -9), -+ z0 = svldnf1uh_vnum_s32 (p0, x0, -9)) -+ -+/* -+** ldnf1uh_vnum_s32_x1: -+** cnth (x[0-9]+) -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldnf1h z0\.s, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_vnum_s32_x1, svint32_t, uint16_t, -+ z0 = svldnf1uh_vnum_s32 (p0, x0, x1), -+ z0 = svldnf1uh_vnum_s32 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_s64.c -new file mode 100644 -index 000000000..a3107f562 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_s64.c -@@ -0,0 +1,154 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldnf1uh_s64_base: -+** ldnf1h z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_s64_base, svint64_t, uint16_t, -+ z0 = svldnf1uh_s64 (p0, x0), -+ z0 = svldnf1uh_s64 (p0, x0)) -+ -+/* -+** ldnf1uh_s64_index: -+** add (x[0-9]+), x0, x1, lsl 1 -+** ldnf1h z0\.d, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_s64_index, svint64_t, uint16_t, -+ z0 = svldnf1uh_s64 (p0, x0 + x1), -+ z0 = svldnf1uh_s64 (p0, x0 + x1)) -+ -+/* -+** ldnf1uh_s64_1: -+** ldnf1h z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_s64_1, svint64_t, uint16_t, -+ z0 = svldnf1uh_s64 (p0, x0 + svcntd ()), -+ z0 = svldnf1uh_s64 (p0, x0 + svcntd ())) -+ -+/* -+** ldnf1uh_s64_7: -+** ldnf1h z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_s64_7, svint64_t, uint16_t, -+ z0 = svldnf1uh_s64 (p0, x0 + svcntd () * 7), -+ z0 = svldnf1uh_s64 (p0, x0 + svcntd () * 7)) -+ -+/* -+** ldnf1uh_s64_8: -+** incb x0, all, mul #2 -+** ldnf1h z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_s64_8, svint64_t, uint16_t, -+ z0 = svldnf1uh_s64 (p0, x0 + svcntd () * 8), -+ z0 = svldnf1uh_s64 (p0, x0 + svcntd () * 8)) -+ -+/* -+** ldnf1uh_s64_m1: -+** ldnf1h z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_s64_m1, svint64_t, uint16_t, -+ z0 = svldnf1uh_s64 (p0, x0 - svcntd ()), -+ z0 = svldnf1uh_s64 (p0, x0 - svcntd ())) -+ -+/* -+** ldnf1uh_s64_m8: -+** ldnf1h z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_s64_m8, svint64_t, uint16_t, -+ z0 = svldnf1uh_s64 (p0, x0 - svcntd () * 8), -+ z0 = svldnf1uh_s64 (p0, x0 - svcntd () * 8)) -+ -+/* -+** ldnf1uh_s64_m9: -+** decw x0, all, mul #9 -+** ldnf1h z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_s64_m9, svint64_t, uint16_t, -+ z0 = svldnf1uh_s64 (p0, x0 - svcntd () * 9), -+ z0 = svldnf1uh_s64 (p0, x0 - svcntd () * 9)) -+ -+/* -+** ldnf1uh_vnum_s64_0: -+** ldnf1h z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_vnum_s64_0, svint64_t, uint16_t, -+ z0 = svldnf1uh_vnum_s64 (p0, x0, 0), -+ z0 = svldnf1uh_vnum_s64 (p0, x0, 0)) -+ -+/* -+** ldnf1uh_vnum_s64_1: -+** ldnf1h z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_vnum_s64_1, svint64_t, uint16_t, -+ z0 = svldnf1uh_vnum_s64 (p0, x0, 1), -+ z0 = svldnf1uh_vnum_s64 (p0, x0, 1)) -+ -+/* -+** ldnf1uh_vnum_s64_7: -+** ldnf1h z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_vnum_s64_7, svint64_t, uint16_t, -+ z0 = svldnf1uh_vnum_s64 (p0, x0, 7), -+ z0 = svldnf1uh_vnum_s64 (p0, x0, 7)) -+ -+/* -+** ldnf1uh_vnum_s64_8: -+** incb x0, all, mul #2 -+** ldnf1h z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_vnum_s64_8, svint64_t, uint16_t, -+ z0 = svldnf1uh_vnum_s64 (p0, x0, 8), -+ z0 = svldnf1uh_vnum_s64 (p0, x0, 8)) -+ -+/* -+** ldnf1uh_vnum_s64_m1: -+** ldnf1h z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_vnum_s64_m1, svint64_t, uint16_t, -+ z0 = svldnf1uh_vnum_s64 (p0, x0, -1), -+ z0 = svldnf1uh_vnum_s64 (p0, x0, -1)) -+ -+/* -+** ldnf1uh_vnum_s64_m8: -+** ldnf1h z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_vnum_s64_m8, svint64_t, uint16_t, -+ z0 = svldnf1uh_vnum_s64 (p0, x0, -8), -+ z0 = svldnf1uh_vnum_s64 (p0, x0, -8)) -+ -+/* -+** ldnf1uh_vnum_s64_m9: -+** decw x0, all, mul #9 -+** ldnf1h z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_vnum_s64_m9, svint64_t, uint16_t, -+ z0 = svldnf1uh_vnum_s64 (p0, x0, -9), -+ z0 = svldnf1uh_vnum_s64 (p0, x0, -9)) -+ -+/* -+** ldnf1uh_vnum_s64_x1: -+** cntw (x[0-9]+) -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldnf1h z0\.d, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_vnum_s64_x1, svint64_t, uint16_t, -+ z0 = svldnf1uh_vnum_s64 (p0, x0, x1), -+ z0 = svldnf1uh_vnum_s64 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_u32.c -new file mode 100644 -index 000000000..93d5abaf7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_u32.c -@@ -0,0 +1,154 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldnf1uh_u32_base: -+** ldnf1h z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_u32_base, svuint32_t, uint16_t, -+ z0 = svldnf1uh_u32 (p0, x0), -+ z0 = svldnf1uh_u32 (p0, x0)) -+ -+/* -+** ldnf1uh_u32_index: -+** add (x[0-9]+), x0, x1, lsl 1 -+** ldnf1h z0\.s, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_u32_index, svuint32_t, uint16_t, -+ z0 = svldnf1uh_u32 (p0, x0 + x1), -+ z0 = svldnf1uh_u32 (p0, x0 + x1)) -+ -+/* -+** ldnf1uh_u32_1: -+** ldnf1h z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_u32_1, svuint32_t, uint16_t, -+ z0 = svldnf1uh_u32 (p0, x0 + svcntw ()), -+ z0 = svldnf1uh_u32 (p0, x0 + svcntw ())) -+ -+/* -+** ldnf1uh_u32_7: -+** ldnf1h z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_u32_7, svuint32_t, uint16_t, -+ z0 = svldnf1uh_u32 (p0, x0 + svcntw () * 7), -+ z0 = svldnf1uh_u32 (p0, x0 + svcntw () * 7)) -+ -+/* -+** ldnf1uh_u32_8: -+** incb x0, all, mul #4 -+** ldnf1h z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_u32_8, svuint32_t, uint16_t, -+ z0 = svldnf1uh_u32 (p0, x0 + svcntw () * 8), -+ z0 = svldnf1uh_u32 (p0, x0 + svcntw () * 8)) -+ -+/* -+** ldnf1uh_u32_m1: -+** ldnf1h z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_u32_m1, svuint32_t, uint16_t, -+ z0 = svldnf1uh_u32 (p0, x0 - svcntw ()), -+ z0 = svldnf1uh_u32 (p0, x0 - svcntw ())) -+ -+/* -+** ldnf1uh_u32_m8: -+** ldnf1h z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_u32_m8, svuint32_t, uint16_t, -+ z0 = svldnf1uh_u32 (p0, x0 - svcntw () * 8), -+ z0 = svldnf1uh_u32 (p0, x0 - svcntw () * 8)) -+ -+/* -+** ldnf1uh_u32_m9: -+** dech x0, all, mul #9 -+** ldnf1h z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_u32_m9, svuint32_t, uint16_t, -+ z0 = svldnf1uh_u32 (p0, x0 - svcntw () * 9), -+ z0 = svldnf1uh_u32 (p0, x0 - svcntw () * 9)) -+ -+/* -+** ldnf1uh_vnum_u32_0: -+** ldnf1h z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_vnum_u32_0, svuint32_t, uint16_t, -+ z0 = svldnf1uh_vnum_u32 (p0, x0, 0), -+ z0 = svldnf1uh_vnum_u32 (p0, x0, 0)) -+ -+/* -+** ldnf1uh_vnum_u32_1: -+** ldnf1h z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_vnum_u32_1, svuint32_t, uint16_t, -+ z0 = svldnf1uh_vnum_u32 (p0, x0, 1), -+ z0 = svldnf1uh_vnum_u32 (p0, x0, 1)) -+ -+/* -+** ldnf1uh_vnum_u32_7: -+** ldnf1h z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_vnum_u32_7, svuint32_t, uint16_t, -+ z0 = svldnf1uh_vnum_u32 (p0, x0, 7), -+ z0 = svldnf1uh_vnum_u32 (p0, x0, 7)) -+ -+/* -+** ldnf1uh_vnum_u32_8: -+** incb x0, all, mul #4 -+** ldnf1h z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_vnum_u32_8, svuint32_t, uint16_t, -+ z0 = svldnf1uh_vnum_u32 (p0, x0, 8), -+ z0 = svldnf1uh_vnum_u32 (p0, x0, 8)) -+ -+/* -+** ldnf1uh_vnum_u32_m1: -+** ldnf1h z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_vnum_u32_m1, svuint32_t, uint16_t, -+ z0 = svldnf1uh_vnum_u32 (p0, x0, -1), -+ z0 = svldnf1uh_vnum_u32 (p0, x0, -1)) -+ -+/* -+** ldnf1uh_vnum_u32_m8: -+** ldnf1h z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_vnum_u32_m8, svuint32_t, uint16_t, -+ z0 = svldnf1uh_vnum_u32 (p0, x0, -8), -+ z0 = svldnf1uh_vnum_u32 (p0, x0, -8)) -+ -+/* -+** ldnf1uh_vnum_u32_m9: -+** dech x0, all, mul #9 -+** ldnf1h z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_vnum_u32_m9, svuint32_t, uint16_t, -+ z0 = svldnf1uh_vnum_u32 (p0, x0, -9), -+ z0 = svldnf1uh_vnum_u32 (p0, x0, -9)) -+ -+/* -+** ldnf1uh_vnum_u32_x1: -+** cnth (x[0-9]+) -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldnf1h z0\.s, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_vnum_u32_x1, svuint32_t, uint16_t, -+ z0 = svldnf1uh_vnum_u32 (p0, x0, x1), -+ z0 = svldnf1uh_vnum_u32 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_u64.c -new file mode 100644 -index 000000000..32d36a84c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uh_u64.c -@@ -0,0 +1,154 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldnf1uh_u64_base: -+** ldnf1h z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_u64_base, svuint64_t, uint16_t, -+ z0 = svldnf1uh_u64 (p0, x0), -+ z0 = svldnf1uh_u64 (p0, x0)) -+ -+/* -+** ldnf1uh_u64_index: -+** add (x[0-9]+), x0, x1, lsl 1 -+** ldnf1h z0\.d, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_u64_index, svuint64_t, uint16_t, -+ z0 = svldnf1uh_u64 (p0, x0 + x1), -+ z0 = svldnf1uh_u64 (p0, x0 + x1)) -+ -+/* -+** ldnf1uh_u64_1: -+** ldnf1h z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_u64_1, svuint64_t, uint16_t, -+ z0 = svldnf1uh_u64 (p0, x0 + svcntd ()), -+ z0 = svldnf1uh_u64 (p0, x0 + svcntd ())) -+ -+/* -+** ldnf1uh_u64_7: -+** ldnf1h z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_u64_7, svuint64_t, uint16_t, -+ z0 = svldnf1uh_u64 (p0, x0 + svcntd () * 7), -+ z0 = svldnf1uh_u64 (p0, x0 + svcntd () * 7)) -+ -+/* -+** ldnf1uh_u64_8: -+** incb x0, all, mul #2 -+** ldnf1h z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_u64_8, svuint64_t, uint16_t, -+ z0 = svldnf1uh_u64 (p0, x0 + svcntd () * 8), -+ z0 = svldnf1uh_u64 (p0, x0 + svcntd () * 8)) -+ -+/* -+** ldnf1uh_u64_m1: -+** ldnf1h z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_u64_m1, svuint64_t, uint16_t, -+ z0 = svldnf1uh_u64 (p0, x0 - svcntd ()), -+ z0 = svldnf1uh_u64 (p0, x0 - svcntd ())) -+ -+/* -+** ldnf1uh_u64_m8: -+** ldnf1h z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_u64_m8, svuint64_t, uint16_t, -+ z0 = svldnf1uh_u64 (p0, x0 - svcntd () * 8), -+ z0 = svldnf1uh_u64 (p0, x0 - svcntd () * 8)) -+ -+/* -+** ldnf1uh_u64_m9: -+** decw x0, all, mul #9 -+** ldnf1h z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_u64_m9, svuint64_t, uint16_t, -+ z0 = svldnf1uh_u64 (p0, x0 - svcntd () * 9), -+ z0 = svldnf1uh_u64 (p0, x0 - svcntd () * 9)) -+ -+/* -+** ldnf1uh_vnum_u64_0: -+** ldnf1h z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_vnum_u64_0, svuint64_t, uint16_t, -+ z0 = svldnf1uh_vnum_u64 (p0, x0, 0), -+ z0 = svldnf1uh_vnum_u64 (p0, x0, 0)) -+ -+/* -+** ldnf1uh_vnum_u64_1: -+** ldnf1h z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_vnum_u64_1, svuint64_t, uint16_t, -+ z0 = svldnf1uh_vnum_u64 (p0, x0, 1), -+ z0 = svldnf1uh_vnum_u64 (p0, x0, 1)) -+ -+/* -+** ldnf1uh_vnum_u64_7: -+** ldnf1h z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_vnum_u64_7, svuint64_t, uint16_t, -+ z0 = svldnf1uh_vnum_u64 (p0, x0, 7), -+ z0 = svldnf1uh_vnum_u64 (p0, x0, 7)) -+ -+/* -+** ldnf1uh_vnum_u64_8: -+** incb x0, all, mul #2 -+** ldnf1h z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_vnum_u64_8, svuint64_t, uint16_t, -+ z0 = svldnf1uh_vnum_u64 (p0, x0, 8), -+ z0 = svldnf1uh_vnum_u64 (p0, x0, 8)) -+ -+/* -+** ldnf1uh_vnum_u64_m1: -+** ldnf1h z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_vnum_u64_m1, svuint64_t, uint16_t, -+ z0 = svldnf1uh_vnum_u64 (p0, x0, -1), -+ z0 = svldnf1uh_vnum_u64 (p0, x0, -1)) -+ -+/* -+** ldnf1uh_vnum_u64_m8: -+** ldnf1h z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_vnum_u64_m8, svuint64_t, uint16_t, -+ z0 = svldnf1uh_vnum_u64 (p0, x0, -8), -+ z0 = svldnf1uh_vnum_u64 (p0, x0, -8)) -+ -+/* -+** ldnf1uh_vnum_u64_m9: -+** decw x0, all, mul #9 -+** ldnf1h z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_vnum_u64_m9, svuint64_t, uint16_t, -+ z0 = svldnf1uh_vnum_u64 (p0, x0, -9), -+ z0 = svldnf1uh_vnum_u64 (p0, x0, -9)) -+ -+/* -+** ldnf1uh_vnum_u64_x1: -+** cntw (x[0-9]+) -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldnf1h z0\.d, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldnf1uh_vnum_u64_x1, svuint64_t, uint16_t, -+ z0 = svldnf1uh_vnum_u64 (p0, x0, x1), -+ z0 = svldnf1uh_vnum_u64 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uw_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uw_s64.c -new file mode 100644 -index 000000000..373922791 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uw_s64.c -@@ -0,0 +1,154 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldnf1uw_s64_base: -+** ldnf1w z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1uw_s64_base, svint64_t, uint32_t, -+ z0 = svldnf1uw_s64 (p0, x0), -+ z0 = svldnf1uw_s64 (p0, x0)) -+ -+/* -+** ldnf1uw_s64_index: -+** add (x[0-9]+), x0, x1, lsl 2 -+** ldnf1w z0\.d, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ldnf1uw_s64_index, svint64_t, uint32_t, -+ z0 = svldnf1uw_s64 (p0, x0 + x1), -+ z0 = svldnf1uw_s64 (p0, x0 + x1)) -+ -+/* -+** ldnf1uw_s64_1: -+** ldnf1w z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1uw_s64_1, svint64_t, uint32_t, -+ z0 = svldnf1uw_s64 (p0, x0 + svcntd ()), -+ z0 = svldnf1uw_s64 (p0, x0 + svcntd ())) -+ -+/* -+** ldnf1uw_s64_7: -+** ldnf1w z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1uw_s64_7, svint64_t, uint32_t, -+ z0 = svldnf1uw_s64 (p0, x0 + svcntd () * 7), -+ z0 = svldnf1uw_s64 (p0, x0 + svcntd () * 7)) -+ -+/* -+** ldnf1uw_s64_8: -+** incb x0, all, mul #4 -+** ldnf1w z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1uw_s64_8, svint64_t, uint32_t, -+ z0 = svldnf1uw_s64 (p0, x0 + svcntd () * 8), -+ z0 = svldnf1uw_s64 (p0, x0 + svcntd () * 8)) -+ -+/* -+** ldnf1uw_s64_m1: -+** ldnf1w z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1uw_s64_m1, svint64_t, uint32_t, -+ z0 = svldnf1uw_s64 (p0, x0 - svcntd ()), -+ z0 = svldnf1uw_s64 (p0, x0 - svcntd ())) -+ -+/* -+** ldnf1uw_s64_m8: -+** ldnf1w z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1uw_s64_m8, svint64_t, uint32_t, -+ z0 = svldnf1uw_s64 (p0, x0 - svcntd () * 8), -+ z0 = svldnf1uw_s64 (p0, x0 - svcntd () * 8)) -+ -+/* -+** ldnf1uw_s64_m9: -+** dech x0, all, mul #9 -+** ldnf1w z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1uw_s64_m9, svint64_t, uint32_t, -+ z0 = svldnf1uw_s64 (p0, x0 - svcntd () * 9), -+ z0 = svldnf1uw_s64 (p0, x0 - svcntd () * 9)) -+ -+/* -+** ldnf1uw_vnum_s64_0: -+** ldnf1w z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1uw_vnum_s64_0, svint64_t, uint32_t, -+ z0 = svldnf1uw_vnum_s64 (p0, x0, 0), -+ z0 = svldnf1uw_vnum_s64 (p0, x0, 0)) -+ -+/* -+** ldnf1uw_vnum_s64_1: -+** ldnf1w z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1uw_vnum_s64_1, svint64_t, uint32_t, -+ z0 = svldnf1uw_vnum_s64 (p0, x0, 1), -+ z0 = svldnf1uw_vnum_s64 (p0, x0, 1)) -+ -+/* -+** ldnf1uw_vnum_s64_7: -+** ldnf1w z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1uw_vnum_s64_7, svint64_t, uint32_t, -+ z0 = svldnf1uw_vnum_s64 (p0, x0, 7), -+ z0 = svldnf1uw_vnum_s64 (p0, x0, 7)) -+ -+/* -+** ldnf1uw_vnum_s64_8: -+** incb x0, all, mul #4 -+** ldnf1w z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1uw_vnum_s64_8, svint64_t, uint32_t, -+ z0 = svldnf1uw_vnum_s64 (p0, x0, 8), -+ z0 = svldnf1uw_vnum_s64 (p0, x0, 8)) -+ -+/* -+** ldnf1uw_vnum_s64_m1: -+** ldnf1w z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1uw_vnum_s64_m1, svint64_t, uint32_t, -+ z0 = svldnf1uw_vnum_s64 (p0, x0, -1), -+ z0 = svldnf1uw_vnum_s64 (p0, x0, -1)) -+ -+/* -+** ldnf1uw_vnum_s64_m8: -+** ldnf1w z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1uw_vnum_s64_m8, svint64_t, uint32_t, -+ z0 = svldnf1uw_vnum_s64 (p0, x0, -8), -+ z0 = svldnf1uw_vnum_s64 (p0, x0, -8)) -+ -+/* -+** ldnf1uw_vnum_s64_m9: -+** dech x0, all, mul #9 -+** ldnf1w z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1uw_vnum_s64_m9, svint64_t, uint32_t, -+ z0 = svldnf1uw_vnum_s64 (p0, x0, -9), -+ z0 = svldnf1uw_vnum_s64 (p0, x0, -9)) -+ -+/* -+** ldnf1uw_vnum_s64_x1: -+** cnth (x[0-9]+) -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldnf1w z0\.d, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldnf1uw_vnum_s64_x1, svint64_t, uint32_t, -+ z0 = svldnf1uw_vnum_s64 (p0, x0, x1), -+ z0 = svldnf1uw_vnum_s64 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uw_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uw_u64.c -new file mode 100644 -index 000000000..b3c3be1d0 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnf1uw_u64.c -@@ -0,0 +1,154 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldnf1uw_u64_base: -+** ldnf1w z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1uw_u64_base, svuint64_t, uint32_t, -+ z0 = svldnf1uw_u64 (p0, x0), -+ z0 = svldnf1uw_u64 (p0, x0)) -+ -+/* -+** ldnf1uw_u64_index: -+** add (x[0-9]+), x0, x1, lsl 2 -+** ldnf1w z0\.d, p0/z, \[\1\] -+** ret -+*/ -+TEST_LOAD (ldnf1uw_u64_index, svuint64_t, uint32_t, -+ z0 = svldnf1uw_u64 (p0, x0 + x1), -+ z0 = svldnf1uw_u64 (p0, x0 + x1)) -+ -+/* -+** ldnf1uw_u64_1: -+** ldnf1w z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1uw_u64_1, svuint64_t, uint32_t, -+ z0 = svldnf1uw_u64 (p0, x0 + svcntd ()), -+ z0 = svldnf1uw_u64 (p0, x0 + svcntd ())) -+ -+/* -+** ldnf1uw_u64_7: -+** ldnf1w z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1uw_u64_7, svuint64_t, uint32_t, -+ z0 = svldnf1uw_u64 (p0, x0 + svcntd () * 7), -+ z0 = svldnf1uw_u64 (p0, x0 + svcntd () * 7)) -+ -+/* -+** ldnf1uw_u64_8: -+** incb x0, all, mul #4 -+** ldnf1w z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1uw_u64_8, svuint64_t, uint32_t, -+ z0 = svldnf1uw_u64 (p0, x0 + svcntd () * 8), -+ z0 = svldnf1uw_u64 (p0, x0 + svcntd () * 8)) -+ -+/* -+** ldnf1uw_u64_m1: -+** ldnf1w z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1uw_u64_m1, svuint64_t, uint32_t, -+ z0 = svldnf1uw_u64 (p0, x0 - svcntd ()), -+ z0 = svldnf1uw_u64 (p0, x0 - svcntd ())) -+ -+/* -+** ldnf1uw_u64_m8: -+** ldnf1w z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1uw_u64_m8, svuint64_t, uint32_t, -+ z0 = svldnf1uw_u64 (p0, x0 - svcntd () * 8), -+ z0 = svldnf1uw_u64 (p0, x0 - svcntd () * 8)) -+ -+/* -+** ldnf1uw_u64_m9: -+** dech x0, all, mul #9 -+** ldnf1w z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1uw_u64_m9, svuint64_t, uint32_t, -+ z0 = svldnf1uw_u64 (p0, x0 - svcntd () * 9), -+ z0 = svldnf1uw_u64 (p0, x0 - svcntd () * 9)) -+ -+/* -+** ldnf1uw_vnum_u64_0: -+** ldnf1w z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1uw_vnum_u64_0, svuint64_t, uint32_t, -+ z0 = svldnf1uw_vnum_u64 (p0, x0, 0), -+ z0 = svldnf1uw_vnum_u64 (p0, x0, 0)) -+ -+/* -+** ldnf1uw_vnum_u64_1: -+** ldnf1w z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1uw_vnum_u64_1, svuint64_t, uint32_t, -+ z0 = svldnf1uw_vnum_u64 (p0, x0, 1), -+ z0 = svldnf1uw_vnum_u64 (p0, x0, 1)) -+ -+/* -+** ldnf1uw_vnum_u64_7: -+** ldnf1w z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1uw_vnum_u64_7, svuint64_t, uint32_t, -+ z0 = svldnf1uw_vnum_u64 (p0, x0, 7), -+ z0 = svldnf1uw_vnum_u64 (p0, x0, 7)) -+ -+/* -+** ldnf1uw_vnum_u64_8: -+** incb x0, all, mul #4 -+** ldnf1w z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1uw_vnum_u64_8, svuint64_t, uint32_t, -+ z0 = svldnf1uw_vnum_u64 (p0, x0, 8), -+ z0 = svldnf1uw_vnum_u64 (p0, x0, 8)) -+ -+/* -+** ldnf1uw_vnum_u64_m1: -+** ldnf1w z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1uw_vnum_u64_m1, svuint64_t, uint32_t, -+ z0 = svldnf1uw_vnum_u64 (p0, x0, -1), -+ z0 = svldnf1uw_vnum_u64 (p0, x0, -1)) -+ -+/* -+** ldnf1uw_vnum_u64_m8: -+** ldnf1w z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnf1uw_vnum_u64_m8, svuint64_t, uint32_t, -+ z0 = svldnf1uw_vnum_u64 (p0, x0, -8), -+ z0 = svldnf1uw_vnum_u64 (p0, x0, -8)) -+ -+/* -+** ldnf1uw_vnum_u64_m9: -+** dech x0, all, mul #9 -+** ldnf1w z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnf1uw_vnum_u64_m9, svuint64_t, uint32_t, -+ z0 = svldnf1uw_vnum_u64 (p0, x0, -9), -+ z0 = svldnf1uw_vnum_u64 (p0, x0, -9)) -+ -+/* -+** ldnf1uw_vnum_u64_x1: -+** cnth (x[0-9]+) -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldnf1w z0\.d, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldnf1uw_vnum_u64_x1, svuint64_t, uint32_t, -+ z0 = svldnf1uw_vnum_u64 (p0, x0, x1), -+ z0 = svldnf1uw_vnum_u64 (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_bf16.c -new file mode 100644 -index 000000000..b083901fa ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_bf16.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldnt1_bf16_base: -+** ldnt1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_bf16_base, svbfloat16_t, bfloat16_t, -+ z0 = svldnt1_bf16 (p0, x0), -+ z0 = svldnt1 (p0, x0)) -+ -+/* -+** ldnt1_bf16_index: -+** ldnt1h z0\.h, p0/z, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_LOAD (ldnt1_bf16_index, svbfloat16_t, bfloat16_t, -+ z0 = svldnt1_bf16 (p0, x0 + x1), -+ z0 = svldnt1 (p0, x0 + x1)) -+ -+/* -+** ldnt1_bf16_1: -+** ldnt1h z0\.h, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_bf16_1, svbfloat16_t, bfloat16_t, -+ z0 = svldnt1_bf16 (p0, x0 + svcnth ()), -+ z0 = svldnt1 (p0, x0 + svcnth ())) -+ -+/* -+** ldnt1_bf16_7: -+** ldnt1h z0\.h, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_bf16_7, svbfloat16_t, bfloat16_t, -+ z0 = svldnt1_bf16 (p0, x0 + svcnth () * 7), -+ z0 = svldnt1 (p0, x0 + svcnth () * 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldnt1_bf16_8: -+** incb x0, all, mul #8 -+** ldnt1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_bf16_8, svbfloat16_t, bfloat16_t, -+ z0 = svldnt1_bf16 (p0, x0 + svcnth () * 8), -+ z0 = svldnt1 (p0, x0 + svcnth () * 8)) -+ -+/* -+** ldnt1_bf16_m1: -+** ldnt1h z0\.h, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_bf16_m1, svbfloat16_t, bfloat16_t, -+ z0 = svldnt1_bf16 (p0, x0 - svcnth ()), -+ z0 = svldnt1 (p0, x0 - svcnth ())) -+ -+/* -+** ldnt1_bf16_m8: -+** ldnt1h z0\.h, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_bf16_m8, svbfloat16_t, bfloat16_t, -+ z0 = svldnt1_bf16 (p0, x0 - svcnth () * 8), -+ z0 = svldnt1 (p0, x0 - svcnth () * 8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldnt1_bf16_m9: -+** decb x0, all, mul #9 -+** ldnt1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_bf16_m9, svbfloat16_t, bfloat16_t, -+ z0 = svldnt1_bf16 (p0, x0 - svcnth () * 9), -+ z0 = svldnt1 (p0, x0 - svcnth () * 9)) -+ -+/* -+** ldnt1_vnum_bf16_0: -+** ldnt1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_bf16_0, svbfloat16_t, bfloat16_t, -+ z0 = svldnt1_vnum_bf16 (p0, x0, 0), -+ z0 = svldnt1_vnum (p0, x0, 0)) -+ -+/* -+** ldnt1_vnum_bf16_1: -+** ldnt1h z0\.h, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_bf16_1, svbfloat16_t, bfloat16_t, -+ z0 = svldnt1_vnum_bf16 (p0, x0, 1), -+ z0 = svldnt1_vnum (p0, x0, 1)) -+ -+/* -+** ldnt1_vnum_bf16_7: -+** ldnt1h z0\.h, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_bf16_7, svbfloat16_t, bfloat16_t, -+ z0 = svldnt1_vnum_bf16 (p0, x0, 7), -+ z0 = svldnt1_vnum (p0, x0, 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldnt1_vnum_bf16_8: -+** incb x0, all, mul #8 -+** ldnt1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_bf16_8, svbfloat16_t, bfloat16_t, -+ z0 = svldnt1_vnum_bf16 (p0, x0, 8), -+ z0 = svldnt1_vnum (p0, x0, 8)) -+ -+/* -+** ldnt1_vnum_bf16_m1: -+** ldnt1h z0\.h, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_bf16_m1, svbfloat16_t, bfloat16_t, -+ z0 = svldnt1_vnum_bf16 (p0, x0, -1), -+ z0 = svldnt1_vnum (p0, x0, -1)) -+ -+/* -+** ldnt1_vnum_bf16_m8: -+** ldnt1h z0\.h, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_bf16_m8, svbfloat16_t, bfloat16_t, -+ z0 = svldnt1_vnum_bf16 (p0, x0, -8), -+ z0 = svldnt1_vnum (p0, x0, -8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldnt1_vnum_bf16_m9: -+** decb x0, all, mul #9 -+** ldnt1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_bf16_m9, svbfloat16_t, bfloat16_t, -+ z0 = svldnt1_vnum_bf16 (p0, x0, -9), -+ z0 = svldnt1_vnum (p0, x0, -9)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ldnt1_vnum_bf16_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ldnt1h z0\.h, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_bf16_x1, svbfloat16_t, bfloat16_t, -+ z0 = svldnt1_vnum_bf16 (p0, x0, x1), -+ z0 = svldnt1_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_f16.c -new file mode 100644 -index 000000000..c98ab2da4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_f16.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldnt1_f16_base: -+** ldnt1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_f16_base, svfloat16_t, float16_t, -+ z0 = svldnt1_f16 (p0, x0), -+ z0 = svldnt1 (p0, x0)) -+ -+/* -+** ldnt1_f16_index: -+** ldnt1h z0\.h, p0/z, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_LOAD (ldnt1_f16_index, svfloat16_t, float16_t, -+ z0 = svldnt1_f16 (p0, x0 + x1), -+ z0 = svldnt1 (p0, x0 + x1)) -+ -+/* -+** ldnt1_f16_1: -+** ldnt1h z0\.h, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_f16_1, svfloat16_t, float16_t, -+ z0 = svldnt1_f16 (p0, x0 + svcnth ()), -+ z0 = svldnt1 (p0, x0 + svcnth ())) -+ -+/* -+** ldnt1_f16_7: -+** ldnt1h z0\.h, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_f16_7, svfloat16_t, float16_t, -+ z0 = svldnt1_f16 (p0, x0 + svcnth () * 7), -+ z0 = svldnt1 (p0, x0 + svcnth () * 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldnt1_f16_8: -+** incb x0, all, mul #8 -+** ldnt1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_f16_8, svfloat16_t, float16_t, -+ z0 = svldnt1_f16 (p0, x0 + svcnth () * 8), -+ z0 = svldnt1 (p0, x0 + svcnth () * 8)) -+ -+/* -+** ldnt1_f16_m1: -+** ldnt1h z0\.h, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_f16_m1, svfloat16_t, float16_t, -+ z0 = svldnt1_f16 (p0, x0 - svcnth ()), -+ z0 = svldnt1 (p0, x0 - svcnth ())) -+ -+/* -+** ldnt1_f16_m8: -+** ldnt1h z0\.h, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_f16_m8, svfloat16_t, float16_t, -+ z0 = svldnt1_f16 (p0, x0 - svcnth () * 8), -+ z0 = svldnt1 (p0, x0 - svcnth () * 8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldnt1_f16_m9: -+** decb x0, all, mul #9 -+** ldnt1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_f16_m9, svfloat16_t, float16_t, -+ z0 = svldnt1_f16 (p0, x0 - svcnth () * 9), -+ z0 = svldnt1 (p0, x0 - svcnth () * 9)) -+ -+/* -+** ldnt1_vnum_f16_0: -+** ldnt1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_f16_0, svfloat16_t, float16_t, -+ z0 = svldnt1_vnum_f16 (p0, x0, 0), -+ z0 = svldnt1_vnum (p0, x0, 0)) -+ -+/* -+** ldnt1_vnum_f16_1: -+** ldnt1h z0\.h, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_f16_1, svfloat16_t, float16_t, -+ z0 = svldnt1_vnum_f16 (p0, x0, 1), -+ z0 = svldnt1_vnum (p0, x0, 1)) -+ -+/* -+** ldnt1_vnum_f16_7: -+** ldnt1h z0\.h, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_f16_7, svfloat16_t, float16_t, -+ z0 = svldnt1_vnum_f16 (p0, x0, 7), -+ z0 = svldnt1_vnum (p0, x0, 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldnt1_vnum_f16_8: -+** incb x0, all, mul #8 -+** ldnt1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_f16_8, svfloat16_t, float16_t, -+ z0 = svldnt1_vnum_f16 (p0, x0, 8), -+ z0 = svldnt1_vnum (p0, x0, 8)) -+ -+/* -+** ldnt1_vnum_f16_m1: -+** ldnt1h z0\.h, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_f16_m1, svfloat16_t, float16_t, -+ z0 = svldnt1_vnum_f16 (p0, x0, -1), -+ z0 = svldnt1_vnum (p0, x0, -1)) -+ -+/* -+** ldnt1_vnum_f16_m8: -+** ldnt1h z0\.h, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_f16_m8, svfloat16_t, float16_t, -+ z0 = svldnt1_vnum_f16 (p0, x0, -8), -+ z0 = svldnt1_vnum (p0, x0, -8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldnt1_vnum_f16_m9: -+** decb x0, all, mul #9 -+** ldnt1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_f16_m9, svfloat16_t, float16_t, -+ z0 = svldnt1_vnum_f16 (p0, x0, -9), -+ z0 = svldnt1_vnum (p0, x0, -9)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ldnt1_vnum_f16_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ldnt1h z0\.h, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_f16_x1, svfloat16_t, float16_t, -+ z0 = svldnt1_vnum_f16 (p0, x0, x1), -+ z0 = svldnt1_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_f32.c -new file mode 100644 -index 000000000..fb09a8a6d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_f32.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldnt1_f32_base: -+** ldnt1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_f32_base, svfloat32_t, float32_t, -+ z0 = svldnt1_f32 (p0, x0), -+ z0 = svldnt1 (p0, x0)) -+ -+/* -+** ldnt1_f32_index: -+** ldnt1w z0\.s, p0/z, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_LOAD (ldnt1_f32_index, svfloat32_t, float32_t, -+ z0 = svldnt1_f32 (p0, x0 + x1), -+ z0 = svldnt1 (p0, x0 + x1)) -+ -+/* -+** ldnt1_f32_1: -+** ldnt1w z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_f32_1, svfloat32_t, float32_t, -+ z0 = svldnt1_f32 (p0, x0 + svcntw ()), -+ z0 = svldnt1 (p0, x0 + svcntw ())) -+ -+/* -+** ldnt1_f32_7: -+** ldnt1w z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_f32_7, svfloat32_t, float32_t, -+ z0 = svldnt1_f32 (p0, x0 + svcntw () * 7), -+ z0 = svldnt1 (p0, x0 + svcntw () * 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldnt1_f32_8: -+** incb x0, all, mul #8 -+** ldnt1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_f32_8, svfloat32_t, float32_t, -+ z0 = svldnt1_f32 (p0, x0 + svcntw () * 8), -+ z0 = svldnt1 (p0, x0 + svcntw () * 8)) -+ -+/* -+** ldnt1_f32_m1: -+** ldnt1w z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_f32_m1, svfloat32_t, float32_t, -+ z0 = svldnt1_f32 (p0, x0 - svcntw ()), -+ z0 = svldnt1 (p0, x0 - svcntw ())) -+ -+/* -+** ldnt1_f32_m8: -+** ldnt1w z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_f32_m8, svfloat32_t, float32_t, -+ z0 = svldnt1_f32 (p0, x0 - svcntw () * 8), -+ z0 = svldnt1 (p0, x0 - svcntw () * 8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldnt1_f32_m9: -+** decb x0, all, mul #9 -+** ldnt1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_f32_m9, svfloat32_t, float32_t, -+ z0 = svldnt1_f32 (p0, x0 - svcntw () * 9), -+ z0 = svldnt1 (p0, x0 - svcntw () * 9)) -+ -+/* -+** ldnt1_vnum_f32_0: -+** ldnt1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_f32_0, svfloat32_t, float32_t, -+ z0 = svldnt1_vnum_f32 (p0, x0, 0), -+ z0 = svldnt1_vnum (p0, x0, 0)) -+ -+/* -+** ldnt1_vnum_f32_1: -+** ldnt1w z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_f32_1, svfloat32_t, float32_t, -+ z0 = svldnt1_vnum_f32 (p0, x0, 1), -+ z0 = svldnt1_vnum (p0, x0, 1)) -+ -+/* -+** ldnt1_vnum_f32_7: -+** ldnt1w z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_f32_7, svfloat32_t, float32_t, -+ z0 = svldnt1_vnum_f32 (p0, x0, 7), -+ z0 = svldnt1_vnum (p0, x0, 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldnt1_vnum_f32_8: -+** incb x0, all, mul #8 -+** ldnt1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_f32_8, svfloat32_t, float32_t, -+ z0 = svldnt1_vnum_f32 (p0, x0, 8), -+ z0 = svldnt1_vnum (p0, x0, 8)) -+ -+/* -+** ldnt1_vnum_f32_m1: -+** ldnt1w z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_f32_m1, svfloat32_t, float32_t, -+ z0 = svldnt1_vnum_f32 (p0, x0, -1), -+ z0 = svldnt1_vnum (p0, x0, -1)) -+ -+/* -+** ldnt1_vnum_f32_m8: -+** ldnt1w z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_f32_m8, svfloat32_t, float32_t, -+ z0 = svldnt1_vnum_f32 (p0, x0, -8), -+ z0 = svldnt1_vnum (p0, x0, -8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldnt1_vnum_f32_m9: -+** decb x0, all, mul #9 -+** ldnt1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_f32_m9, svfloat32_t, float32_t, -+ z0 = svldnt1_vnum_f32 (p0, x0, -9), -+ z0 = svldnt1_vnum (p0, x0, -9)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ldnt1_vnum_f32_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ldnt1w z0\.s, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_f32_x1, svfloat32_t, float32_t, -+ z0 = svldnt1_vnum_f32 (p0, x0, x1), -+ z0 = svldnt1_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_f64.c -new file mode 100644 -index 000000000..2a7863282 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_f64.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldnt1_f64_base: -+** ldnt1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_f64_base, svfloat64_t, float64_t, -+ z0 = svldnt1_f64 (p0, x0), -+ z0 = svldnt1 (p0, x0)) -+ -+/* -+** ldnt1_f64_index: -+** ldnt1d z0\.d, p0/z, \[x0, x1, lsl 3\] -+** ret -+*/ -+TEST_LOAD (ldnt1_f64_index, svfloat64_t, float64_t, -+ z0 = svldnt1_f64 (p0, x0 + x1), -+ z0 = svldnt1 (p0, x0 + x1)) -+ -+/* -+** ldnt1_f64_1: -+** ldnt1d z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_f64_1, svfloat64_t, float64_t, -+ z0 = svldnt1_f64 (p0, x0 + svcntd ()), -+ z0 = svldnt1 (p0, x0 + svcntd ())) -+ -+/* -+** ldnt1_f64_7: -+** ldnt1d z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_f64_7, svfloat64_t, float64_t, -+ z0 = svldnt1_f64 (p0, x0 + svcntd () * 7), -+ z0 = svldnt1 (p0, x0 + svcntd () * 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldnt1_f64_8: -+** incb x0, all, mul #8 -+** ldnt1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_f64_8, svfloat64_t, float64_t, -+ z0 = svldnt1_f64 (p0, x0 + svcntd () * 8), -+ z0 = svldnt1 (p0, x0 + svcntd () * 8)) -+ -+/* -+** ldnt1_f64_m1: -+** ldnt1d z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_f64_m1, svfloat64_t, float64_t, -+ z0 = svldnt1_f64 (p0, x0 - svcntd ()), -+ z0 = svldnt1 (p0, x0 - svcntd ())) -+ -+/* -+** ldnt1_f64_m8: -+** ldnt1d z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_f64_m8, svfloat64_t, float64_t, -+ z0 = svldnt1_f64 (p0, x0 - svcntd () * 8), -+ z0 = svldnt1 (p0, x0 - svcntd () * 8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldnt1_f64_m9: -+** decb x0, all, mul #9 -+** ldnt1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_f64_m9, svfloat64_t, float64_t, -+ z0 = svldnt1_f64 (p0, x0 - svcntd () * 9), -+ z0 = svldnt1 (p0, x0 - svcntd () * 9)) -+ -+/* -+** ldnt1_vnum_f64_0: -+** ldnt1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_f64_0, svfloat64_t, float64_t, -+ z0 = svldnt1_vnum_f64 (p0, x0, 0), -+ z0 = svldnt1_vnum (p0, x0, 0)) -+ -+/* -+** ldnt1_vnum_f64_1: -+** ldnt1d z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_f64_1, svfloat64_t, float64_t, -+ z0 = svldnt1_vnum_f64 (p0, x0, 1), -+ z0 = svldnt1_vnum (p0, x0, 1)) -+ -+/* -+** ldnt1_vnum_f64_7: -+** ldnt1d z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_f64_7, svfloat64_t, float64_t, -+ z0 = svldnt1_vnum_f64 (p0, x0, 7), -+ z0 = svldnt1_vnum (p0, x0, 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldnt1_vnum_f64_8: -+** incb x0, all, mul #8 -+** ldnt1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_f64_8, svfloat64_t, float64_t, -+ z0 = svldnt1_vnum_f64 (p0, x0, 8), -+ z0 = svldnt1_vnum (p0, x0, 8)) -+ -+/* -+** ldnt1_vnum_f64_m1: -+** ldnt1d z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_f64_m1, svfloat64_t, float64_t, -+ z0 = svldnt1_vnum_f64 (p0, x0, -1), -+ z0 = svldnt1_vnum (p0, x0, -1)) -+ -+/* -+** ldnt1_vnum_f64_m8: -+** ldnt1d z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_f64_m8, svfloat64_t, float64_t, -+ z0 = svldnt1_vnum_f64 (p0, x0, -8), -+ z0 = svldnt1_vnum (p0, x0, -8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldnt1_vnum_f64_m9: -+** decb x0, all, mul #9 -+** ldnt1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_f64_m9, svfloat64_t, float64_t, -+ z0 = svldnt1_vnum_f64 (p0, x0, -9), -+ z0 = svldnt1_vnum (p0, x0, -9)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ldnt1_vnum_f64_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ldnt1d z0\.d, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_f64_x1, svfloat64_t, float64_t, -+ z0 = svldnt1_vnum_f64 (p0, x0, x1), -+ z0 = svldnt1_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_s16.c -new file mode 100644 -index 000000000..c307ed51f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_s16.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldnt1_s16_base: -+** ldnt1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_s16_base, svint16_t, int16_t, -+ z0 = svldnt1_s16 (p0, x0), -+ z0 = svldnt1 (p0, x0)) -+ -+/* -+** ldnt1_s16_index: -+** ldnt1h z0\.h, p0/z, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_LOAD (ldnt1_s16_index, svint16_t, int16_t, -+ z0 = svldnt1_s16 (p0, x0 + x1), -+ z0 = svldnt1 (p0, x0 + x1)) -+ -+/* -+** ldnt1_s16_1: -+** ldnt1h z0\.h, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_s16_1, svint16_t, int16_t, -+ z0 = svldnt1_s16 (p0, x0 + svcnth ()), -+ z0 = svldnt1 (p0, x0 + svcnth ())) -+ -+/* -+** ldnt1_s16_7: -+** ldnt1h z0\.h, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_s16_7, svint16_t, int16_t, -+ z0 = svldnt1_s16 (p0, x0 + svcnth () * 7), -+ z0 = svldnt1 (p0, x0 + svcnth () * 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldnt1_s16_8: -+** incb x0, all, mul #8 -+** ldnt1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_s16_8, svint16_t, int16_t, -+ z0 = svldnt1_s16 (p0, x0 + svcnth () * 8), -+ z0 = svldnt1 (p0, x0 + svcnth () * 8)) -+ -+/* -+** ldnt1_s16_m1: -+** ldnt1h z0\.h, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_s16_m1, svint16_t, int16_t, -+ z0 = svldnt1_s16 (p0, x0 - svcnth ()), -+ z0 = svldnt1 (p0, x0 - svcnth ())) -+ -+/* -+** ldnt1_s16_m8: -+** ldnt1h z0\.h, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_s16_m8, svint16_t, int16_t, -+ z0 = svldnt1_s16 (p0, x0 - svcnth () * 8), -+ z0 = svldnt1 (p0, x0 - svcnth () * 8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldnt1_s16_m9: -+** decb x0, all, mul #9 -+** ldnt1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_s16_m9, svint16_t, int16_t, -+ z0 = svldnt1_s16 (p0, x0 - svcnth () * 9), -+ z0 = svldnt1 (p0, x0 - svcnth () * 9)) -+ -+/* -+** ldnt1_vnum_s16_0: -+** ldnt1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_s16_0, svint16_t, int16_t, -+ z0 = svldnt1_vnum_s16 (p0, x0, 0), -+ z0 = svldnt1_vnum (p0, x0, 0)) -+ -+/* -+** ldnt1_vnum_s16_1: -+** ldnt1h z0\.h, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_s16_1, svint16_t, int16_t, -+ z0 = svldnt1_vnum_s16 (p0, x0, 1), -+ z0 = svldnt1_vnum (p0, x0, 1)) -+ -+/* -+** ldnt1_vnum_s16_7: -+** ldnt1h z0\.h, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_s16_7, svint16_t, int16_t, -+ z0 = svldnt1_vnum_s16 (p0, x0, 7), -+ z0 = svldnt1_vnum (p0, x0, 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldnt1_vnum_s16_8: -+** incb x0, all, mul #8 -+** ldnt1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_s16_8, svint16_t, int16_t, -+ z0 = svldnt1_vnum_s16 (p0, x0, 8), -+ z0 = svldnt1_vnum (p0, x0, 8)) -+ -+/* -+** ldnt1_vnum_s16_m1: -+** ldnt1h z0\.h, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_s16_m1, svint16_t, int16_t, -+ z0 = svldnt1_vnum_s16 (p0, x0, -1), -+ z0 = svldnt1_vnum (p0, x0, -1)) -+ -+/* -+** ldnt1_vnum_s16_m8: -+** ldnt1h z0\.h, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_s16_m8, svint16_t, int16_t, -+ z0 = svldnt1_vnum_s16 (p0, x0, -8), -+ z0 = svldnt1_vnum (p0, x0, -8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldnt1_vnum_s16_m9: -+** decb x0, all, mul #9 -+** ldnt1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_s16_m9, svint16_t, int16_t, -+ z0 = svldnt1_vnum_s16 (p0, x0, -9), -+ z0 = svldnt1_vnum (p0, x0, -9)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ldnt1_vnum_s16_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ldnt1h z0\.h, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_s16_x1, svint16_t, int16_t, -+ z0 = svldnt1_vnum_s16 (p0, x0, x1), -+ z0 = svldnt1_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_s32.c -new file mode 100644 -index 000000000..2b9df1781 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_s32.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldnt1_s32_base: -+** ldnt1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_s32_base, svint32_t, int32_t, -+ z0 = svldnt1_s32 (p0, x0), -+ z0 = svldnt1 (p0, x0)) -+ -+/* -+** ldnt1_s32_index: -+** ldnt1w z0\.s, p0/z, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_LOAD (ldnt1_s32_index, svint32_t, int32_t, -+ z0 = svldnt1_s32 (p0, x0 + x1), -+ z0 = svldnt1 (p0, x0 + x1)) -+ -+/* -+** ldnt1_s32_1: -+** ldnt1w z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_s32_1, svint32_t, int32_t, -+ z0 = svldnt1_s32 (p0, x0 + svcntw ()), -+ z0 = svldnt1 (p0, x0 + svcntw ())) -+ -+/* -+** ldnt1_s32_7: -+** ldnt1w z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_s32_7, svint32_t, int32_t, -+ z0 = svldnt1_s32 (p0, x0 + svcntw () * 7), -+ z0 = svldnt1 (p0, x0 + svcntw () * 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldnt1_s32_8: -+** incb x0, all, mul #8 -+** ldnt1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_s32_8, svint32_t, int32_t, -+ z0 = svldnt1_s32 (p0, x0 + svcntw () * 8), -+ z0 = svldnt1 (p0, x0 + svcntw () * 8)) -+ -+/* -+** ldnt1_s32_m1: -+** ldnt1w z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_s32_m1, svint32_t, int32_t, -+ z0 = svldnt1_s32 (p0, x0 - svcntw ()), -+ z0 = svldnt1 (p0, x0 - svcntw ())) -+ -+/* -+** ldnt1_s32_m8: -+** ldnt1w z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_s32_m8, svint32_t, int32_t, -+ z0 = svldnt1_s32 (p0, x0 - svcntw () * 8), -+ z0 = svldnt1 (p0, x0 - svcntw () * 8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldnt1_s32_m9: -+** decb x0, all, mul #9 -+** ldnt1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_s32_m9, svint32_t, int32_t, -+ z0 = svldnt1_s32 (p0, x0 - svcntw () * 9), -+ z0 = svldnt1 (p0, x0 - svcntw () * 9)) -+ -+/* -+** ldnt1_vnum_s32_0: -+** ldnt1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_s32_0, svint32_t, int32_t, -+ z0 = svldnt1_vnum_s32 (p0, x0, 0), -+ z0 = svldnt1_vnum (p0, x0, 0)) -+ -+/* -+** ldnt1_vnum_s32_1: -+** ldnt1w z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_s32_1, svint32_t, int32_t, -+ z0 = svldnt1_vnum_s32 (p0, x0, 1), -+ z0 = svldnt1_vnum (p0, x0, 1)) -+ -+/* -+** ldnt1_vnum_s32_7: -+** ldnt1w z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_s32_7, svint32_t, int32_t, -+ z0 = svldnt1_vnum_s32 (p0, x0, 7), -+ z0 = svldnt1_vnum (p0, x0, 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldnt1_vnum_s32_8: -+** incb x0, all, mul #8 -+** ldnt1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_s32_8, svint32_t, int32_t, -+ z0 = svldnt1_vnum_s32 (p0, x0, 8), -+ z0 = svldnt1_vnum (p0, x0, 8)) -+ -+/* -+** ldnt1_vnum_s32_m1: -+** ldnt1w z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_s32_m1, svint32_t, int32_t, -+ z0 = svldnt1_vnum_s32 (p0, x0, -1), -+ z0 = svldnt1_vnum (p0, x0, -1)) -+ -+/* -+** ldnt1_vnum_s32_m8: -+** ldnt1w z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_s32_m8, svint32_t, int32_t, -+ z0 = svldnt1_vnum_s32 (p0, x0, -8), -+ z0 = svldnt1_vnum (p0, x0, -8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldnt1_vnum_s32_m9: -+** decb x0, all, mul #9 -+** ldnt1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_s32_m9, svint32_t, int32_t, -+ z0 = svldnt1_vnum_s32 (p0, x0, -9), -+ z0 = svldnt1_vnum (p0, x0, -9)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ldnt1_vnum_s32_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ldnt1w z0\.s, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_s32_x1, svint32_t, int32_t, -+ z0 = svldnt1_vnum_s32 (p0, x0, x1), -+ z0 = svldnt1_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_s64.c -new file mode 100644 -index 000000000..5bc7ac6ed ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_s64.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldnt1_s64_base: -+** ldnt1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_s64_base, svint64_t, int64_t, -+ z0 = svldnt1_s64 (p0, x0), -+ z0 = svldnt1 (p0, x0)) -+ -+/* -+** ldnt1_s64_index: -+** ldnt1d z0\.d, p0/z, \[x0, x1, lsl 3\] -+** ret -+*/ -+TEST_LOAD (ldnt1_s64_index, svint64_t, int64_t, -+ z0 = svldnt1_s64 (p0, x0 + x1), -+ z0 = svldnt1 (p0, x0 + x1)) -+ -+/* -+** ldnt1_s64_1: -+** ldnt1d z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_s64_1, svint64_t, int64_t, -+ z0 = svldnt1_s64 (p0, x0 + svcntd ()), -+ z0 = svldnt1 (p0, x0 + svcntd ())) -+ -+/* -+** ldnt1_s64_7: -+** ldnt1d z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_s64_7, svint64_t, int64_t, -+ z0 = svldnt1_s64 (p0, x0 + svcntd () * 7), -+ z0 = svldnt1 (p0, x0 + svcntd () * 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldnt1_s64_8: -+** incb x0, all, mul #8 -+** ldnt1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_s64_8, svint64_t, int64_t, -+ z0 = svldnt1_s64 (p0, x0 + svcntd () * 8), -+ z0 = svldnt1 (p0, x0 + svcntd () * 8)) -+ -+/* -+** ldnt1_s64_m1: -+** ldnt1d z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_s64_m1, svint64_t, int64_t, -+ z0 = svldnt1_s64 (p0, x0 - svcntd ()), -+ z0 = svldnt1 (p0, x0 - svcntd ())) -+ -+/* -+** ldnt1_s64_m8: -+** ldnt1d z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_s64_m8, svint64_t, int64_t, -+ z0 = svldnt1_s64 (p0, x0 - svcntd () * 8), -+ z0 = svldnt1 (p0, x0 - svcntd () * 8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldnt1_s64_m9: -+** decb x0, all, mul #9 -+** ldnt1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_s64_m9, svint64_t, int64_t, -+ z0 = svldnt1_s64 (p0, x0 - svcntd () * 9), -+ z0 = svldnt1 (p0, x0 - svcntd () * 9)) -+ -+/* -+** ldnt1_vnum_s64_0: -+** ldnt1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_s64_0, svint64_t, int64_t, -+ z0 = svldnt1_vnum_s64 (p0, x0, 0), -+ z0 = svldnt1_vnum (p0, x0, 0)) -+ -+/* -+** ldnt1_vnum_s64_1: -+** ldnt1d z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_s64_1, svint64_t, int64_t, -+ z0 = svldnt1_vnum_s64 (p0, x0, 1), -+ z0 = svldnt1_vnum (p0, x0, 1)) -+ -+/* -+** ldnt1_vnum_s64_7: -+** ldnt1d z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_s64_7, svint64_t, int64_t, -+ z0 = svldnt1_vnum_s64 (p0, x0, 7), -+ z0 = svldnt1_vnum (p0, x0, 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldnt1_vnum_s64_8: -+** incb x0, all, mul #8 -+** ldnt1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_s64_8, svint64_t, int64_t, -+ z0 = svldnt1_vnum_s64 (p0, x0, 8), -+ z0 = svldnt1_vnum (p0, x0, 8)) -+ -+/* -+** ldnt1_vnum_s64_m1: -+** ldnt1d z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_s64_m1, svint64_t, int64_t, -+ z0 = svldnt1_vnum_s64 (p0, x0, -1), -+ z0 = svldnt1_vnum (p0, x0, -1)) -+ -+/* -+** ldnt1_vnum_s64_m8: -+** ldnt1d z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_s64_m8, svint64_t, int64_t, -+ z0 = svldnt1_vnum_s64 (p0, x0, -8), -+ z0 = svldnt1_vnum (p0, x0, -8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldnt1_vnum_s64_m9: -+** decb x0, all, mul #9 -+** ldnt1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_s64_m9, svint64_t, int64_t, -+ z0 = svldnt1_vnum_s64 (p0, x0, -9), -+ z0 = svldnt1_vnum (p0, x0, -9)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ldnt1_vnum_s64_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ldnt1d z0\.d, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_s64_x1, svint64_t, int64_t, -+ z0 = svldnt1_vnum_s64 (p0, x0, x1), -+ z0 = svldnt1_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_s8.c -new file mode 100644 -index 000000000..eb8e2e548 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_s8.c -@@ -0,0 +1,162 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldnt1_s8_base: -+** ldnt1b z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_s8_base, svint8_t, int8_t, -+ z0 = svldnt1_s8 (p0, x0), -+ z0 = svldnt1 (p0, x0)) -+ -+/* -+** ldnt1_s8_index: -+** ldnt1b z0\.b, p0/z, \[x0, x1\] -+** ret -+*/ -+TEST_LOAD (ldnt1_s8_index, svint8_t, int8_t, -+ z0 = svldnt1_s8 (p0, x0 + x1), -+ z0 = svldnt1 (p0, x0 + x1)) -+ -+/* -+** ldnt1_s8_1: -+** ldnt1b z0\.b, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_s8_1, svint8_t, int8_t, -+ z0 = svldnt1_s8 (p0, x0 + svcntb ()), -+ z0 = svldnt1 (p0, x0 + svcntb ())) -+ -+/* -+** ldnt1_s8_7: -+** ldnt1b z0\.b, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_s8_7, svint8_t, int8_t, -+ z0 = svldnt1_s8 (p0, x0 + svcntb () * 7), -+ z0 = svldnt1 (p0, x0 + svcntb () * 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldnt1_s8_8: -+** incb x0, all, mul #8 -+** ldnt1b z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_s8_8, svint8_t, int8_t, -+ z0 = svldnt1_s8 (p0, x0 + svcntb () * 8), -+ z0 = svldnt1 (p0, x0 + svcntb () * 8)) -+ -+/* -+** ldnt1_s8_m1: -+** ldnt1b z0\.b, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_s8_m1, svint8_t, int8_t, -+ z0 = svldnt1_s8 (p0, x0 - svcntb ()), -+ z0 = svldnt1 (p0, x0 - svcntb ())) -+ -+/* -+** ldnt1_s8_m8: -+** ldnt1b z0\.b, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_s8_m8, svint8_t, int8_t, -+ z0 = svldnt1_s8 (p0, x0 - svcntb () * 8), -+ z0 = svldnt1 (p0, x0 - svcntb () * 8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldnt1_s8_m9: -+** decb x0, all, mul #9 -+** ldnt1b z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_s8_m9, svint8_t, int8_t, -+ z0 = svldnt1_s8 (p0, x0 - svcntb () * 9), -+ z0 = svldnt1 (p0, x0 - svcntb () * 9)) -+ -+/* -+** ldnt1_vnum_s8_0: -+** ldnt1b z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_s8_0, svint8_t, int8_t, -+ z0 = svldnt1_vnum_s8 (p0, x0, 0), -+ z0 = svldnt1_vnum (p0, x0, 0)) -+ -+/* -+** ldnt1_vnum_s8_1: -+** ldnt1b z0\.b, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_s8_1, svint8_t, int8_t, -+ z0 = svldnt1_vnum_s8 (p0, x0, 1), -+ z0 = svldnt1_vnum (p0, x0, 1)) -+ -+/* -+** ldnt1_vnum_s8_7: -+** ldnt1b z0\.b, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_s8_7, svint8_t, int8_t, -+ z0 = svldnt1_vnum_s8 (p0, x0, 7), -+ z0 = svldnt1_vnum (p0, x0, 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldnt1_vnum_s8_8: -+** incb x0, all, mul #8 -+** ldnt1b z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_s8_8, svint8_t, int8_t, -+ z0 = svldnt1_vnum_s8 (p0, x0, 8), -+ z0 = svldnt1_vnum (p0, x0, 8)) -+ -+/* -+** ldnt1_vnum_s8_m1: -+** ldnt1b z0\.b, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_s8_m1, svint8_t, int8_t, -+ z0 = svldnt1_vnum_s8 (p0, x0, -1), -+ z0 = svldnt1_vnum (p0, x0, -1)) -+ -+/* -+** ldnt1_vnum_s8_m8: -+** ldnt1b z0\.b, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_s8_m8, svint8_t, int8_t, -+ z0 = svldnt1_vnum_s8 (p0, x0, -8), -+ z0 = svldnt1_vnum (p0, x0, -8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldnt1_vnum_s8_m9: -+** decb x0, all, mul #9 -+** ldnt1b z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_s8_m9, svint8_t, int8_t, -+ z0 = svldnt1_vnum_s8 (p0, x0, -9), -+ z0 = svldnt1_vnum (p0, x0, -9)) -+ -+/* -+** ldnt1_vnum_s8_x1: -+** cntb (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldnt1b z0\.b, p0/z, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** ldnt1b z0\.b, p0/z, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_s8_x1, svint8_t, int8_t, -+ z0 = svldnt1_vnum_s8 (p0, x0, x1), -+ z0 = svldnt1_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_u16.c -new file mode 100644 -index 000000000..c032c3d93 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_u16.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldnt1_u16_base: -+** ldnt1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_u16_base, svuint16_t, uint16_t, -+ z0 = svldnt1_u16 (p0, x0), -+ z0 = svldnt1 (p0, x0)) -+ -+/* -+** ldnt1_u16_index: -+** ldnt1h z0\.h, p0/z, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_LOAD (ldnt1_u16_index, svuint16_t, uint16_t, -+ z0 = svldnt1_u16 (p0, x0 + x1), -+ z0 = svldnt1 (p0, x0 + x1)) -+ -+/* -+** ldnt1_u16_1: -+** ldnt1h z0\.h, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_u16_1, svuint16_t, uint16_t, -+ z0 = svldnt1_u16 (p0, x0 + svcnth ()), -+ z0 = svldnt1 (p0, x0 + svcnth ())) -+ -+/* -+** ldnt1_u16_7: -+** ldnt1h z0\.h, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_u16_7, svuint16_t, uint16_t, -+ z0 = svldnt1_u16 (p0, x0 + svcnth () * 7), -+ z0 = svldnt1 (p0, x0 + svcnth () * 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldnt1_u16_8: -+** incb x0, all, mul #8 -+** ldnt1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_u16_8, svuint16_t, uint16_t, -+ z0 = svldnt1_u16 (p0, x0 + svcnth () * 8), -+ z0 = svldnt1 (p0, x0 + svcnth () * 8)) -+ -+/* -+** ldnt1_u16_m1: -+** ldnt1h z0\.h, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_u16_m1, svuint16_t, uint16_t, -+ z0 = svldnt1_u16 (p0, x0 - svcnth ()), -+ z0 = svldnt1 (p0, x0 - svcnth ())) -+ -+/* -+** ldnt1_u16_m8: -+** ldnt1h z0\.h, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_u16_m8, svuint16_t, uint16_t, -+ z0 = svldnt1_u16 (p0, x0 - svcnth () * 8), -+ z0 = svldnt1 (p0, x0 - svcnth () * 8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldnt1_u16_m9: -+** decb x0, all, mul #9 -+** ldnt1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_u16_m9, svuint16_t, uint16_t, -+ z0 = svldnt1_u16 (p0, x0 - svcnth () * 9), -+ z0 = svldnt1 (p0, x0 - svcnth () * 9)) -+ -+/* -+** ldnt1_vnum_u16_0: -+** ldnt1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_u16_0, svuint16_t, uint16_t, -+ z0 = svldnt1_vnum_u16 (p0, x0, 0), -+ z0 = svldnt1_vnum (p0, x0, 0)) -+ -+/* -+** ldnt1_vnum_u16_1: -+** ldnt1h z0\.h, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_u16_1, svuint16_t, uint16_t, -+ z0 = svldnt1_vnum_u16 (p0, x0, 1), -+ z0 = svldnt1_vnum (p0, x0, 1)) -+ -+/* -+** ldnt1_vnum_u16_7: -+** ldnt1h z0\.h, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_u16_7, svuint16_t, uint16_t, -+ z0 = svldnt1_vnum_u16 (p0, x0, 7), -+ z0 = svldnt1_vnum (p0, x0, 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldnt1_vnum_u16_8: -+** incb x0, all, mul #8 -+** ldnt1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_u16_8, svuint16_t, uint16_t, -+ z0 = svldnt1_vnum_u16 (p0, x0, 8), -+ z0 = svldnt1_vnum (p0, x0, 8)) -+ -+/* -+** ldnt1_vnum_u16_m1: -+** ldnt1h z0\.h, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_u16_m1, svuint16_t, uint16_t, -+ z0 = svldnt1_vnum_u16 (p0, x0, -1), -+ z0 = svldnt1_vnum (p0, x0, -1)) -+ -+/* -+** ldnt1_vnum_u16_m8: -+** ldnt1h z0\.h, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_u16_m8, svuint16_t, uint16_t, -+ z0 = svldnt1_vnum_u16 (p0, x0, -8), -+ z0 = svldnt1_vnum (p0, x0, -8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldnt1_vnum_u16_m9: -+** decb x0, all, mul #9 -+** ldnt1h z0\.h, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_u16_m9, svuint16_t, uint16_t, -+ z0 = svldnt1_vnum_u16 (p0, x0, -9), -+ z0 = svldnt1_vnum (p0, x0, -9)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ldnt1_vnum_u16_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ldnt1h z0\.h, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_u16_x1, svuint16_t, uint16_t, -+ z0 = svldnt1_vnum_u16 (p0, x0, x1), -+ z0 = svldnt1_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_u32.c -new file mode 100644 -index 000000000..278794459 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_u32.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldnt1_u32_base: -+** ldnt1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_u32_base, svuint32_t, uint32_t, -+ z0 = svldnt1_u32 (p0, x0), -+ z0 = svldnt1 (p0, x0)) -+ -+/* -+** ldnt1_u32_index: -+** ldnt1w z0\.s, p0/z, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_LOAD (ldnt1_u32_index, svuint32_t, uint32_t, -+ z0 = svldnt1_u32 (p0, x0 + x1), -+ z0 = svldnt1 (p0, x0 + x1)) -+ -+/* -+** ldnt1_u32_1: -+** ldnt1w z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_u32_1, svuint32_t, uint32_t, -+ z0 = svldnt1_u32 (p0, x0 + svcntw ()), -+ z0 = svldnt1 (p0, x0 + svcntw ())) -+ -+/* -+** ldnt1_u32_7: -+** ldnt1w z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_u32_7, svuint32_t, uint32_t, -+ z0 = svldnt1_u32 (p0, x0 + svcntw () * 7), -+ z0 = svldnt1 (p0, x0 + svcntw () * 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldnt1_u32_8: -+** incb x0, all, mul #8 -+** ldnt1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_u32_8, svuint32_t, uint32_t, -+ z0 = svldnt1_u32 (p0, x0 + svcntw () * 8), -+ z0 = svldnt1 (p0, x0 + svcntw () * 8)) -+ -+/* -+** ldnt1_u32_m1: -+** ldnt1w z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_u32_m1, svuint32_t, uint32_t, -+ z0 = svldnt1_u32 (p0, x0 - svcntw ()), -+ z0 = svldnt1 (p0, x0 - svcntw ())) -+ -+/* -+** ldnt1_u32_m8: -+** ldnt1w z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_u32_m8, svuint32_t, uint32_t, -+ z0 = svldnt1_u32 (p0, x0 - svcntw () * 8), -+ z0 = svldnt1 (p0, x0 - svcntw () * 8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldnt1_u32_m9: -+** decb x0, all, mul #9 -+** ldnt1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_u32_m9, svuint32_t, uint32_t, -+ z0 = svldnt1_u32 (p0, x0 - svcntw () * 9), -+ z0 = svldnt1 (p0, x0 - svcntw () * 9)) -+ -+/* -+** ldnt1_vnum_u32_0: -+** ldnt1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_u32_0, svuint32_t, uint32_t, -+ z0 = svldnt1_vnum_u32 (p0, x0, 0), -+ z0 = svldnt1_vnum (p0, x0, 0)) -+ -+/* -+** ldnt1_vnum_u32_1: -+** ldnt1w z0\.s, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_u32_1, svuint32_t, uint32_t, -+ z0 = svldnt1_vnum_u32 (p0, x0, 1), -+ z0 = svldnt1_vnum (p0, x0, 1)) -+ -+/* -+** ldnt1_vnum_u32_7: -+** ldnt1w z0\.s, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_u32_7, svuint32_t, uint32_t, -+ z0 = svldnt1_vnum_u32 (p0, x0, 7), -+ z0 = svldnt1_vnum (p0, x0, 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldnt1_vnum_u32_8: -+** incb x0, all, mul #8 -+** ldnt1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_u32_8, svuint32_t, uint32_t, -+ z0 = svldnt1_vnum_u32 (p0, x0, 8), -+ z0 = svldnt1_vnum (p0, x0, 8)) -+ -+/* -+** ldnt1_vnum_u32_m1: -+** ldnt1w z0\.s, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_u32_m1, svuint32_t, uint32_t, -+ z0 = svldnt1_vnum_u32 (p0, x0, -1), -+ z0 = svldnt1_vnum (p0, x0, -1)) -+ -+/* -+** ldnt1_vnum_u32_m8: -+** ldnt1w z0\.s, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_u32_m8, svuint32_t, uint32_t, -+ z0 = svldnt1_vnum_u32 (p0, x0, -8), -+ z0 = svldnt1_vnum (p0, x0, -8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldnt1_vnum_u32_m9: -+** decb x0, all, mul #9 -+** ldnt1w z0\.s, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_u32_m9, svuint32_t, uint32_t, -+ z0 = svldnt1_vnum_u32 (p0, x0, -9), -+ z0 = svldnt1_vnum (p0, x0, -9)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ldnt1_vnum_u32_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ldnt1w z0\.s, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_u32_x1, svuint32_t, uint32_t, -+ z0 = svldnt1_vnum_u32 (p0, x0, x1), -+ z0 = svldnt1_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_u64.c -new file mode 100644 -index 000000000..abafee6f7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_u64.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldnt1_u64_base: -+** ldnt1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_u64_base, svuint64_t, uint64_t, -+ z0 = svldnt1_u64 (p0, x0), -+ z0 = svldnt1 (p0, x0)) -+ -+/* -+** ldnt1_u64_index: -+** ldnt1d z0\.d, p0/z, \[x0, x1, lsl 3\] -+** ret -+*/ -+TEST_LOAD (ldnt1_u64_index, svuint64_t, uint64_t, -+ z0 = svldnt1_u64 (p0, x0 + x1), -+ z0 = svldnt1 (p0, x0 + x1)) -+ -+/* -+** ldnt1_u64_1: -+** ldnt1d z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_u64_1, svuint64_t, uint64_t, -+ z0 = svldnt1_u64 (p0, x0 + svcntd ()), -+ z0 = svldnt1 (p0, x0 + svcntd ())) -+ -+/* -+** ldnt1_u64_7: -+** ldnt1d z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_u64_7, svuint64_t, uint64_t, -+ z0 = svldnt1_u64 (p0, x0 + svcntd () * 7), -+ z0 = svldnt1 (p0, x0 + svcntd () * 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldnt1_u64_8: -+** incb x0, all, mul #8 -+** ldnt1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_u64_8, svuint64_t, uint64_t, -+ z0 = svldnt1_u64 (p0, x0 + svcntd () * 8), -+ z0 = svldnt1 (p0, x0 + svcntd () * 8)) -+ -+/* -+** ldnt1_u64_m1: -+** ldnt1d z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_u64_m1, svuint64_t, uint64_t, -+ z0 = svldnt1_u64 (p0, x0 - svcntd ()), -+ z0 = svldnt1 (p0, x0 - svcntd ())) -+ -+/* -+** ldnt1_u64_m8: -+** ldnt1d z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_u64_m8, svuint64_t, uint64_t, -+ z0 = svldnt1_u64 (p0, x0 - svcntd () * 8), -+ z0 = svldnt1 (p0, x0 - svcntd () * 8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldnt1_u64_m9: -+** decb x0, all, mul #9 -+** ldnt1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_u64_m9, svuint64_t, uint64_t, -+ z0 = svldnt1_u64 (p0, x0 - svcntd () * 9), -+ z0 = svldnt1 (p0, x0 - svcntd () * 9)) -+ -+/* -+** ldnt1_vnum_u64_0: -+** ldnt1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_u64_0, svuint64_t, uint64_t, -+ z0 = svldnt1_vnum_u64 (p0, x0, 0), -+ z0 = svldnt1_vnum (p0, x0, 0)) -+ -+/* -+** ldnt1_vnum_u64_1: -+** ldnt1d z0\.d, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_u64_1, svuint64_t, uint64_t, -+ z0 = svldnt1_vnum_u64 (p0, x0, 1), -+ z0 = svldnt1_vnum (p0, x0, 1)) -+ -+/* -+** ldnt1_vnum_u64_7: -+** ldnt1d z0\.d, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_u64_7, svuint64_t, uint64_t, -+ z0 = svldnt1_vnum_u64 (p0, x0, 7), -+ z0 = svldnt1_vnum (p0, x0, 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldnt1_vnum_u64_8: -+** incb x0, all, mul #8 -+** ldnt1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_u64_8, svuint64_t, uint64_t, -+ z0 = svldnt1_vnum_u64 (p0, x0, 8), -+ z0 = svldnt1_vnum (p0, x0, 8)) -+ -+/* -+** ldnt1_vnum_u64_m1: -+** ldnt1d z0\.d, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_u64_m1, svuint64_t, uint64_t, -+ z0 = svldnt1_vnum_u64 (p0, x0, -1), -+ z0 = svldnt1_vnum (p0, x0, -1)) -+ -+/* -+** ldnt1_vnum_u64_m8: -+** ldnt1d z0\.d, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_u64_m8, svuint64_t, uint64_t, -+ z0 = svldnt1_vnum_u64 (p0, x0, -8), -+ z0 = svldnt1_vnum (p0, x0, -8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldnt1_vnum_u64_m9: -+** decb x0, all, mul #9 -+** ldnt1d z0\.d, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_u64_m9, svuint64_t, uint64_t, -+ z0 = svldnt1_vnum_u64 (p0, x0, -9), -+ z0 = svldnt1_vnum (p0, x0, -9)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** ldnt1_vnum_u64_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** ldnt1d z0\.d, p0/z, \[\2\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_u64_x1, svuint64_t, uint64_t, -+ z0 = svldnt1_vnum_u64 (p0, x0, x1), -+ z0 = svldnt1_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_u8.c -new file mode 100644 -index 000000000..7bf9acc26 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ldnt1_u8.c -@@ -0,0 +1,162 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ldnt1_u8_base: -+** ldnt1b z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_u8_base, svuint8_t, uint8_t, -+ z0 = svldnt1_u8 (p0, x0), -+ z0 = svldnt1 (p0, x0)) -+ -+/* -+** ldnt1_u8_index: -+** ldnt1b z0\.b, p0/z, \[x0, x1\] -+** ret -+*/ -+TEST_LOAD (ldnt1_u8_index, svuint8_t, uint8_t, -+ z0 = svldnt1_u8 (p0, x0 + x1), -+ z0 = svldnt1 (p0, x0 + x1)) -+ -+/* -+** ldnt1_u8_1: -+** ldnt1b z0\.b, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_u8_1, svuint8_t, uint8_t, -+ z0 = svldnt1_u8 (p0, x0 + svcntb ()), -+ z0 = svldnt1 (p0, x0 + svcntb ())) -+ -+/* -+** ldnt1_u8_7: -+** ldnt1b z0\.b, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_u8_7, svuint8_t, uint8_t, -+ z0 = svldnt1_u8 (p0, x0 + svcntb () * 7), -+ z0 = svldnt1 (p0, x0 + svcntb () * 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldnt1_u8_8: -+** incb x0, all, mul #8 -+** ldnt1b z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_u8_8, svuint8_t, uint8_t, -+ z0 = svldnt1_u8 (p0, x0 + svcntb () * 8), -+ z0 = svldnt1 (p0, x0 + svcntb () * 8)) -+ -+/* -+** ldnt1_u8_m1: -+** ldnt1b z0\.b, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_u8_m1, svuint8_t, uint8_t, -+ z0 = svldnt1_u8 (p0, x0 - svcntb ()), -+ z0 = svldnt1 (p0, x0 - svcntb ())) -+ -+/* -+** ldnt1_u8_m8: -+** ldnt1b z0\.b, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_u8_m8, svuint8_t, uint8_t, -+ z0 = svldnt1_u8 (p0, x0 - svcntb () * 8), -+ z0 = svldnt1 (p0, x0 - svcntb () * 8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldnt1_u8_m9: -+** decb x0, all, mul #9 -+** ldnt1b z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_u8_m9, svuint8_t, uint8_t, -+ z0 = svldnt1_u8 (p0, x0 - svcntb () * 9), -+ z0 = svldnt1 (p0, x0 - svcntb () * 9)) -+ -+/* -+** ldnt1_vnum_u8_0: -+** ldnt1b z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_u8_0, svuint8_t, uint8_t, -+ z0 = svldnt1_vnum_u8 (p0, x0, 0), -+ z0 = svldnt1_vnum (p0, x0, 0)) -+ -+/* -+** ldnt1_vnum_u8_1: -+** ldnt1b z0\.b, p0/z, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_u8_1, svuint8_t, uint8_t, -+ z0 = svldnt1_vnum_u8 (p0, x0, 1), -+ z0 = svldnt1_vnum (p0, x0, 1)) -+ -+/* -+** ldnt1_vnum_u8_7: -+** ldnt1b z0\.b, p0/z, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_u8_7, svuint8_t, uint8_t, -+ z0 = svldnt1_vnum_u8 (p0, x0, 7), -+ z0 = svldnt1_vnum (p0, x0, 7)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldnt1_vnum_u8_8: -+** incb x0, all, mul #8 -+** ldnt1b z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_u8_8, svuint8_t, uint8_t, -+ z0 = svldnt1_vnum_u8 (p0, x0, 8), -+ z0 = svldnt1_vnum (p0, x0, 8)) -+ -+/* -+** ldnt1_vnum_u8_m1: -+** ldnt1b z0\.b, p0/z, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_u8_m1, svuint8_t, uint8_t, -+ z0 = svldnt1_vnum_u8 (p0, x0, -1), -+ z0 = svldnt1_vnum (p0, x0, -1)) -+ -+/* -+** ldnt1_vnum_u8_m8: -+** ldnt1b z0\.b, p0/z, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_u8_m8, svuint8_t, uint8_t, -+ z0 = svldnt1_vnum_u8 (p0, x0, -8), -+ z0 = svldnt1_vnum (p0, x0, -8)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** ldnt1_vnum_u8_m9: -+** decb x0, all, mul #9 -+** ldnt1b z0\.b, p0/z, \[x0\] -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_u8_m9, svuint8_t, uint8_t, -+ z0 = svldnt1_vnum_u8 (p0, x0, -9), -+ z0 = svldnt1_vnum (p0, x0, -9)) -+ -+/* -+** ldnt1_vnum_u8_x1: -+** cntb (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** ldnt1b z0\.b, p0/z, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** ldnt1b z0\.b, p0/z, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_LOAD (ldnt1_vnum_u8_x1, svuint8_t, uint8_t, -+ z0 = svldnt1_vnum_u8 (p0, x0, x1), -+ z0 = svldnt1_vnum (p0, x0, x1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_bf16.c -new file mode 100644 -index 000000000..cd91ff48d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_bf16.c -@@ -0,0 +1,12 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** len_x0_bf16: -+** cnth x0 -+** ret -+*/ -+TEST_REDUCTION_X (len_x0_bf16, uint64_t, svbfloat16_t, -+ x0 = svlen_bf16 (z0), -+ x0 = svlen (z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_f16.c -new file mode 100644 -index 000000000..aa6d94bbc ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_f16.c -@@ -0,0 +1,12 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** len_x0_f16: -+** cnth x0 -+** ret -+*/ -+TEST_REDUCTION_X (len_x0_f16, uint64_t, svfloat16_t, -+ x0 = svlen_f16 (z0), -+ x0 = svlen (z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_f32.c -new file mode 100644 -index 000000000..1dd50cee0 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_f32.c -@@ -0,0 +1,12 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** len_x0_f32: -+** cntw x0 -+** ret -+*/ -+TEST_REDUCTION_X (len_x0_f32, uint64_t, svfloat32_t, -+ x0 = svlen_f32 (z0), -+ x0 = svlen (z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_f64.c -new file mode 100644 -index 000000000..1f210653e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_f64.c -@@ -0,0 +1,12 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** len_x0_f64: -+** cntd x0 -+** ret -+*/ -+TEST_REDUCTION_X (len_x0_f64, uint64_t, svfloat64_t, -+ x0 = svlen_f64 (z0), -+ x0 = svlen (z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_s16.c -new file mode 100644 -index 000000000..f56796182 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_s16.c -@@ -0,0 +1,12 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** len_x0_s16: -+** cnth x0 -+** ret -+*/ -+TEST_REDUCTION_X (len_x0_s16, uint64_t, svint16_t, -+ x0 = svlen_s16 (z0), -+ x0 = svlen (z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_s32.c -new file mode 100644 -index 000000000..662fac177 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_s32.c -@@ -0,0 +1,12 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** len_x0_s32: -+** cntw x0 -+** ret -+*/ -+TEST_REDUCTION_X (len_x0_s32, uint64_t, svint32_t, -+ x0 = svlen_s32 (z0), -+ x0 = svlen (z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_s64.c -new file mode 100644 -index 000000000..f95770302 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_s64.c -@@ -0,0 +1,12 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** len_x0_s64: -+** cntd x0 -+** ret -+*/ -+TEST_REDUCTION_X (len_x0_s64, uint64_t, svint64_t, -+ x0 = svlen_s64 (z0), -+ x0 = svlen (z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_s8.c -new file mode 100644 -index 000000000..6ed8a7177 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_s8.c -@@ -0,0 +1,12 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** len_x0_s8: -+** cntb x0 -+** ret -+*/ -+TEST_REDUCTION_X (len_x0_s8, uint64_t, svint8_t, -+ x0 = svlen_s8 (z0), -+ x0 = svlen (z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_u16.c -new file mode 100644 -index 000000000..13692c927 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_u16.c -@@ -0,0 +1,12 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** len_x0_u16: -+** cnth x0 -+** ret -+*/ -+TEST_REDUCTION_X (len_x0_u16, uint64_t, svuint16_t, -+ x0 = svlen_u16 (z0), -+ x0 = svlen (z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_u32.c -new file mode 100644 -index 000000000..b03146089 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_u32.c -@@ -0,0 +1,12 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** len_x0_u32: -+** cntw x0 -+** ret -+*/ -+TEST_REDUCTION_X (len_x0_u32, uint64_t, svuint32_t, -+ x0 = svlen_u32 (z0), -+ x0 = svlen (z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_u64.c -new file mode 100644 -index 000000000..11f2e4b81 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_u64.c -@@ -0,0 +1,12 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** len_x0_u64: -+** cntd x0 -+** ret -+*/ -+TEST_REDUCTION_X (len_x0_u64, uint64_t, svuint64_t, -+ x0 = svlen_u64 (z0), -+ x0 = svlen (z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_u8.c -new file mode 100644 -index 000000000..fbd39a432 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/len_u8.c -@@ -0,0 +1,12 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** len_x0_u8: -+** cntb x0 -+** ret -+*/ -+TEST_REDUCTION_X (len_x0_u8, uint64_t, svuint8_t, -+ x0 = svlen_u8 (z0), -+ x0 = svlen (z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_s16.c -new file mode 100644 -index 000000000..edaaca5f1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_s16.c -@@ -0,0 +1,351 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** lsl_s16_m_tied1: -+** lsl z0\.h, p0/m, z0\.h, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (lsl_s16_m_tied1, svint16_t, svuint16_t, -+ z0 = svlsl_s16_m (p0, z0, z4), -+ z0 = svlsl_m (p0, z0, z4)) -+ -+/* -+** lsl_s16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** lsl z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (lsl_s16_m_tied2, svint16_t, svuint16_t, -+ z0_res = svlsl_s16_m (p0, z4, z0), -+ z0_res = svlsl_m (p0, z4, z0)) -+ -+/* -+** lsl_s16_m_untied: -+** movprfx z0, z1 -+** lsl z0\.h, p0/m, z0\.h, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (lsl_s16_m_untied, svint16_t, svuint16_t, -+ z0 = svlsl_s16_m (p0, z1, z4), -+ z0 = svlsl_m (p0, z1, z4)) -+ -+/* -+** lsl_w0_s16_m_tied1: -+** mov (z[0-9]+\.h), w0 -+** lsl z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_w0_s16_m_tied1, svint16_t, uint16_t, -+ z0 = svlsl_n_s16_m (p0, z0, x0), -+ z0 = svlsl_m (p0, z0, x0)) -+ -+/* -+** lsl_w0_s16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), w0 -+** movprfx z0, z1 -+** lsl z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_w0_s16_m_untied, svint16_t, uint16_t, -+ z0 = svlsl_n_s16_m (p0, z1, x0), -+ z0 = svlsl_m (p0, z1, x0)) -+ -+/* -+** lsl_1_s16_m_tied1: -+** lsl z0\.h, p0/m, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_1_s16_m_tied1, svint16_t, -+ z0 = svlsl_n_s16_m (p0, z0, 1), -+ z0 = svlsl_m (p0, z0, 1)) -+ -+/* -+** lsl_1_s16_m_untied: -+** movprfx z0, z1 -+** lsl z0\.h, p0/m, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_1_s16_m_untied, svint16_t, -+ z0 = svlsl_n_s16_m (p0, z1, 1), -+ z0 = svlsl_m (p0, z1, 1)) -+ -+/* -+** lsl_15_s16_m_tied1: -+** lsl z0\.h, p0/m, z0\.h, #15 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_15_s16_m_tied1, svint16_t, -+ z0 = svlsl_n_s16_m (p0, z0, 15), -+ z0 = svlsl_m (p0, z0, 15)) -+ -+/* -+** lsl_15_s16_m_untied: -+** movprfx z0, z1 -+** lsl z0\.h, p0/m, z0\.h, #15 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_15_s16_m_untied, svint16_t, -+ z0 = svlsl_n_s16_m (p0, z1, 15), -+ z0 = svlsl_m (p0, z1, 15)) -+ -+/* -+** lsl_16_s16_m_tied1: -+** mov (z[0-9]+\.h), #16 -+** lsl z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_16_s16_m_tied1, svint16_t, -+ z0 = svlsl_n_s16_m (p0, z0, 16), -+ z0 = svlsl_m (p0, z0, 16)) -+ -+/* -+** lsl_16_s16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), #16 -+** movprfx z0, z1 -+** lsl z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_16_s16_m_untied, svint16_t, -+ z0 = svlsl_n_s16_m (p0, z1, 16), -+ z0 = svlsl_m (p0, z1, 16)) -+ -+/* -+** lsl_s16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** lsl z0\.h, p0/m, z0\.h, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (lsl_s16_z_tied1, svint16_t, svuint16_t, -+ z0 = svlsl_s16_z (p0, z0, z4), -+ z0 = svlsl_z (p0, z0, z4)) -+ -+/* -+** lsl_s16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** lslr z0\.h, p0/m, z0\.h, z4\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (lsl_s16_z_tied2, svint16_t, svuint16_t, -+ z0_res = svlsl_s16_z (p0, z4, z0), -+ z0_res = svlsl_z (p0, z4, z0)) -+ -+/* -+** lsl_s16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** lsl z0\.h, p0/m, z0\.h, z4\.h -+** | -+** movprfx z0\.h, p0/z, z4\.h -+** lslr z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_DUAL_Z (lsl_s16_z_untied, svint16_t, svuint16_t, -+ z0 = svlsl_s16_z (p0, z1, z4), -+ z0 = svlsl_z (p0, z1, z4)) -+ -+/* -+** lsl_w0_s16_z_tied1: -+** mov (z[0-9]+\.h), w0 -+** movprfx z0\.h, p0/z, z0\.h -+** lsl z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_w0_s16_z_tied1, svint16_t, uint16_t, -+ z0 = svlsl_n_s16_z (p0, z0, x0), -+ z0 = svlsl_z (p0, z0, x0)) -+ -+/* -+** lsl_w0_s16_z_untied: -+** mov (z[0-9]+\.h), w0 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** lsl z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** lslr z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_w0_s16_z_untied, svint16_t, uint16_t, -+ z0 = svlsl_n_s16_z (p0, z1, x0), -+ z0 = svlsl_z (p0, z1, x0)) -+ -+/* -+** lsl_1_s16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** lsl z0\.h, p0/m, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_1_s16_z_tied1, svint16_t, -+ z0 = svlsl_n_s16_z (p0, z0, 1), -+ z0 = svlsl_z (p0, z0, 1)) -+ -+/* -+** lsl_1_s16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** lsl z0\.h, p0/m, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_1_s16_z_untied, svint16_t, -+ z0 = svlsl_n_s16_z (p0, z1, 1), -+ z0 = svlsl_z (p0, z1, 1)) -+ -+/* -+** lsl_15_s16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** lsl z0\.h, p0/m, z0\.h, #15 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_15_s16_z_tied1, svint16_t, -+ z0 = svlsl_n_s16_z (p0, z0, 15), -+ z0 = svlsl_z (p0, z0, 15)) -+ -+/* -+** lsl_15_s16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** lsl z0\.h, p0/m, z0\.h, #15 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_15_s16_z_untied, svint16_t, -+ z0 = svlsl_n_s16_z (p0, z1, 15), -+ z0 = svlsl_z (p0, z1, 15)) -+ -+/* -+** lsl_16_s16_z_tied1: -+** mov (z[0-9]+\.h), #16 -+** movprfx z0\.h, p0/z, z0\.h -+** lsl z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_16_s16_z_tied1, svint16_t, -+ z0 = svlsl_n_s16_z (p0, z0, 16), -+ z0 = svlsl_z (p0, z0, 16)) -+ -+/* -+** lsl_16_s16_z_untied: -+** mov (z[0-9]+\.h), #16 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** lsl z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** lslr z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_16_s16_z_untied, svint16_t, -+ z0 = svlsl_n_s16_z (p0, z1, 16), -+ z0 = svlsl_z (p0, z1, 16)) -+ -+/* -+** lsl_s16_x_tied1: -+** lsl z0\.h, p0/m, z0\.h, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (lsl_s16_x_tied1, svint16_t, svuint16_t, -+ z0 = svlsl_s16_x (p0, z0, z4), -+ z0 = svlsl_x (p0, z0, z4)) -+ -+/* -+** lsl_s16_x_tied2: -+** lslr z0\.h, p0/m, z0\.h, z4\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (lsl_s16_x_tied2, svint16_t, svuint16_t, -+ z0_res = svlsl_s16_x (p0, z4, z0), -+ z0_res = svlsl_x (p0, z4, z0)) -+ -+/* -+** lsl_s16_x_untied: -+** ( -+** movprfx z0, z1 -+** lsl z0\.h, p0/m, z0\.h, z4\.h -+** | -+** movprfx z0, z4 -+** lslr z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_DUAL_Z (lsl_s16_x_untied, svint16_t, svuint16_t, -+ z0 = svlsl_s16_x (p0, z1, z4), -+ z0 = svlsl_x (p0, z1, z4)) -+ -+/* -+** lsl_w0_s16_x_tied1: -+** mov (z[0-9]+\.h), w0 -+** lsl z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_w0_s16_x_tied1, svint16_t, uint16_t, -+ z0 = svlsl_n_s16_x (p0, z0, x0), -+ z0 = svlsl_x (p0, z0, x0)) -+ -+/* -+** lsl_w0_s16_x_untied: -+** mov z0\.h, w0 -+** lslr z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_w0_s16_x_untied, svint16_t, uint16_t, -+ z0 = svlsl_n_s16_x (p0, z1, x0), -+ z0 = svlsl_x (p0, z1, x0)) -+ -+/* -+** lsl_1_s16_x_tied1: -+** lsl z0\.h, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_1_s16_x_tied1, svint16_t, -+ z0 = svlsl_n_s16_x (p0, z0, 1), -+ z0 = svlsl_x (p0, z0, 1)) -+ -+/* -+** lsl_1_s16_x_untied: -+** lsl z0\.h, z1\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_1_s16_x_untied, svint16_t, -+ z0 = svlsl_n_s16_x (p0, z1, 1), -+ z0 = svlsl_x (p0, z1, 1)) -+ -+/* -+** lsl_15_s16_x_tied1: -+** lsl z0\.h, z0\.h, #15 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_15_s16_x_tied1, svint16_t, -+ z0 = svlsl_n_s16_x (p0, z0, 15), -+ z0 = svlsl_x (p0, z0, 15)) -+ -+/* -+** lsl_15_s16_x_untied: -+** lsl z0\.h, z1\.h, #15 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_15_s16_x_untied, svint16_t, -+ z0 = svlsl_n_s16_x (p0, z1, 15), -+ z0 = svlsl_x (p0, z1, 15)) -+ -+/* -+** lsl_16_s16_x_tied1: -+** mov (z[0-9]+\.h), #16 -+** lsl z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_16_s16_x_tied1, svint16_t, -+ z0 = svlsl_n_s16_x (p0, z0, 16), -+ z0 = svlsl_x (p0, z0, 16)) -+ -+/* -+** lsl_16_s16_x_untied: -+** mov z0\.h, #16 -+** lslr z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_16_s16_x_untied, svint16_t, -+ z0 = svlsl_n_s16_x (p0, z1, 16), -+ z0 = svlsl_x (p0, z1, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_s32.c -new file mode 100644 -index 000000000..f98f1f94b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_s32.c -@@ -0,0 +1,351 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** lsl_s32_m_tied1: -+** lsl z0\.s, p0/m, z0\.s, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (lsl_s32_m_tied1, svint32_t, svuint32_t, -+ z0 = svlsl_s32_m (p0, z0, z4), -+ z0 = svlsl_m (p0, z0, z4)) -+ -+/* -+** lsl_s32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** lsl z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (lsl_s32_m_tied2, svint32_t, svuint32_t, -+ z0_res = svlsl_s32_m (p0, z4, z0), -+ z0_res = svlsl_m (p0, z4, z0)) -+ -+/* -+** lsl_s32_m_untied: -+** movprfx z0, z1 -+** lsl z0\.s, p0/m, z0\.s, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (lsl_s32_m_untied, svint32_t, svuint32_t, -+ z0 = svlsl_s32_m (p0, z1, z4), -+ z0 = svlsl_m (p0, z1, z4)) -+ -+/* -+** lsl_w0_s32_m_tied1: -+** mov (z[0-9]+\.s), w0 -+** lsl z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_w0_s32_m_tied1, svint32_t, uint32_t, -+ z0 = svlsl_n_s32_m (p0, z0, x0), -+ z0 = svlsl_m (p0, z0, x0)) -+ -+/* -+** lsl_w0_s32_m_untied: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0, z1 -+** lsl z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_w0_s32_m_untied, svint32_t, uint32_t, -+ z0 = svlsl_n_s32_m (p0, z1, x0), -+ z0 = svlsl_m (p0, z1, x0)) -+ -+/* -+** lsl_1_s32_m_tied1: -+** lsl z0\.s, p0/m, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_1_s32_m_tied1, svint32_t, -+ z0 = svlsl_n_s32_m (p0, z0, 1), -+ z0 = svlsl_m (p0, z0, 1)) -+ -+/* -+** lsl_1_s32_m_untied: -+** movprfx z0, z1 -+** lsl z0\.s, p0/m, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_1_s32_m_untied, svint32_t, -+ z0 = svlsl_n_s32_m (p0, z1, 1), -+ z0 = svlsl_m (p0, z1, 1)) -+ -+/* -+** lsl_31_s32_m_tied1: -+** lsl z0\.s, p0/m, z0\.s, #31 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_31_s32_m_tied1, svint32_t, -+ z0 = svlsl_n_s32_m (p0, z0, 31), -+ z0 = svlsl_m (p0, z0, 31)) -+ -+/* -+** lsl_31_s32_m_untied: -+** movprfx z0, z1 -+** lsl z0\.s, p0/m, z0\.s, #31 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_31_s32_m_untied, svint32_t, -+ z0 = svlsl_n_s32_m (p0, z1, 31), -+ z0 = svlsl_m (p0, z1, 31)) -+ -+/* -+** lsl_32_s32_m_tied1: -+** mov (z[0-9]+\.s), #32 -+** lsl z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_32_s32_m_tied1, svint32_t, -+ z0 = svlsl_n_s32_m (p0, z0, 32), -+ z0 = svlsl_m (p0, z0, 32)) -+ -+/* -+** lsl_32_s32_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.s), #32 -+** movprfx z0, z1 -+** lsl z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_32_s32_m_untied, svint32_t, -+ z0 = svlsl_n_s32_m (p0, z1, 32), -+ z0 = svlsl_m (p0, z1, 32)) -+ -+/* -+** lsl_s32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** lsl z0\.s, p0/m, z0\.s, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (lsl_s32_z_tied1, svint32_t, svuint32_t, -+ z0 = svlsl_s32_z (p0, z0, z4), -+ z0 = svlsl_z (p0, z0, z4)) -+ -+/* -+** lsl_s32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** lslr z0\.s, p0/m, z0\.s, z4\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (lsl_s32_z_tied2, svint32_t, svuint32_t, -+ z0_res = svlsl_s32_z (p0, z4, z0), -+ z0_res = svlsl_z (p0, z4, z0)) -+ -+/* -+** lsl_s32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** lsl z0\.s, p0/m, z0\.s, z4\.s -+** | -+** movprfx z0\.s, p0/z, z4\.s -+** lslr z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_DUAL_Z (lsl_s32_z_untied, svint32_t, svuint32_t, -+ z0 = svlsl_s32_z (p0, z1, z4), -+ z0 = svlsl_z (p0, z1, z4)) -+ -+/* -+** lsl_w0_s32_z_tied1: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** lsl z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_w0_s32_z_tied1, svint32_t, uint32_t, -+ z0 = svlsl_n_s32_z (p0, z0, x0), -+ z0 = svlsl_z (p0, z0, x0)) -+ -+/* -+** lsl_w0_s32_z_untied: -+** mov (z[0-9]+\.s), w0 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** lsl z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** lslr z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_w0_s32_z_untied, svint32_t, uint32_t, -+ z0 = svlsl_n_s32_z (p0, z1, x0), -+ z0 = svlsl_z (p0, z1, x0)) -+ -+/* -+** lsl_1_s32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** lsl z0\.s, p0/m, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_1_s32_z_tied1, svint32_t, -+ z0 = svlsl_n_s32_z (p0, z0, 1), -+ z0 = svlsl_z (p0, z0, 1)) -+ -+/* -+** lsl_1_s32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** lsl z0\.s, p0/m, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_1_s32_z_untied, svint32_t, -+ z0 = svlsl_n_s32_z (p0, z1, 1), -+ z0 = svlsl_z (p0, z1, 1)) -+ -+/* -+** lsl_31_s32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** lsl z0\.s, p0/m, z0\.s, #31 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_31_s32_z_tied1, svint32_t, -+ z0 = svlsl_n_s32_z (p0, z0, 31), -+ z0 = svlsl_z (p0, z0, 31)) -+ -+/* -+** lsl_31_s32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** lsl z0\.s, p0/m, z0\.s, #31 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_31_s32_z_untied, svint32_t, -+ z0 = svlsl_n_s32_z (p0, z1, 31), -+ z0 = svlsl_z (p0, z1, 31)) -+ -+/* -+** lsl_32_s32_z_tied1: -+** mov (z[0-9]+\.s), #32 -+** movprfx z0\.s, p0/z, z0\.s -+** lsl z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_32_s32_z_tied1, svint32_t, -+ z0 = svlsl_n_s32_z (p0, z0, 32), -+ z0 = svlsl_z (p0, z0, 32)) -+ -+/* -+** lsl_32_s32_z_untied: -+** mov (z[0-9]+\.s), #32 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** lsl z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** lslr z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_32_s32_z_untied, svint32_t, -+ z0 = svlsl_n_s32_z (p0, z1, 32), -+ z0 = svlsl_z (p0, z1, 32)) -+ -+/* -+** lsl_s32_x_tied1: -+** lsl z0\.s, p0/m, z0\.s, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (lsl_s32_x_tied1, svint32_t, svuint32_t, -+ z0 = svlsl_s32_x (p0, z0, z4), -+ z0 = svlsl_x (p0, z0, z4)) -+ -+/* -+** lsl_s32_x_tied2: -+** lslr z0\.s, p0/m, z0\.s, z4\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (lsl_s32_x_tied2, svint32_t, svuint32_t, -+ z0_res = svlsl_s32_x (p0, z4, z0), -+ z0_res = svlsl_x (p0, z4, z0)) -+ -+/* -+** lsl_s32_x_untied: -+** ( -+** movprfx z0, z1 -+** lsl z0\.s, p0/m, z0\.s, z4\.s -+** | -+** movprfx z0, z4 -+** lslr z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_DUAL_Z (lsl_s32_x_untied, svint32_t, svuint32_t, -+ z0 = svlsl_s32_x (p0, z1, z4), -+ z0 = svlsl_x (p0, z1, z4)) -+ -+/* -+** lsl_w0_s32_x_tied1: -+** mov (z[0-9]+\.s), w0 -+** lsl z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_w0_s32_x_tied1, svint32_t, uint32_t, -+ z0 = svlsl_n_s32_x (p0, z0, x0), -+ z0 = svlsl_x (p0, z0, x0)) -+ -+/* -+** lsl_w0_s32_x_untied: -+** mov z0\.s, w0 -+** lslr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_w0_s32_x_untied, svint32_t, uint32_t, -+ z0 = svlsl_n_s32_x (p0, z1, x0), -+ z0 = svlsl_x (p0, z1, x0)) -+ -+/* -+** lsl_1_s32_x_tied1: -+** lsl z0\.s, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_1_s32_x_tied1, svint32_t, -+ z0 = svlsl_n_s32_x (p0, z0, 1), -+ z0 = svlsl_x (p0, z0, 1)) -+ -+/* -+** lsl_1_s32_x_untied: -+** lsl z0\.s, z1\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_1_s32_x_untied, svint32_t, -+ z0 = svlsl_n_s32_x (p0, z1, 1), -+ z0 = svlsl_x (p0, z1, 1)) -+ -+/* -+** lsl_31_s32_x_tied1: -+** lsl z0\.s, z0\.s, #31 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_31_s32_x_tied1, svint32_t, -+ z0 = svlsl_n_s32_x (p0, z0, 31), -+ z0 = svlsl_x (p0, z0, 31)) -+ -+/* -+** lsl_31_s32_x_untied: -+** lsl z0\.s, z1\.s, #31 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_31_s32_x_untied, svint32_t, -+ z0 = svlsl_n_s32_x (p0, z1, 31), -+ z0 = svlsl_x (p0, z1, 31)) -+ -+/* -+** lsl_32_s32_x_tied1: -+** mov (z[0-9]+\.s), #32 -+** lsl z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_32_s32_x_tied1, svint32_t, -+ z0 = svlsl_n_s32_x (p0, z0, 32), -+ z0 = svlsl_x (p0, z0, 32)) -+ -+/* -+** lsl_32_s32_x_untied: -+** mov z0\.s, #32 -+** lslr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_32_s32_x_untied, svint32_t, -+ z0 = svlsl_n_s32_x (p0, z1, 32), -+ z0 = svlsl_x (p0, z1, 32)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_s64.c -new file mode 100644 -index 000000000..39753986b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_s64.c -@@ -0,0 +1,351 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** lsl_s64_m_tied1: -+** lsl z0\.d, p0/m, z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsl_s64_m_tied1, svint64_t, svuint64_t, -+ z0 = svlsl_s64_m (p0, z0, z4), -+ z0 = svlsl_m (p0, z0, z4)) -+ -+/* -+** lsl_s64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z4 -+** lsl z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (lsl_s64_m_tied2, svint64_t, svuint64_t, -+ z0_res = svlsl_s64_m (p0, z4, z0), -+ z0_res = svlsl_m (p0, z4, z0)) -+ -+/* -+** lsl_s64_m_untied: -+** movprfx z0, z1 -+** lsl z0\.d, p0/m, z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsl_s64_m_untied, svint64_t, svuint64_t, -+ z0 = svlsl_s64_m (p0, z1, z4), -+ z0 = svlsl_m (p0, z1, z4)) -+ -+/* -+** lsl_x0_s64_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** lsl z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_x0_s64_m_tied1, svint64_t, uint64_t, -+ z0 = svlsl_n_s64_m (p0, z0, x0), -+ z0 = svlsl_m (p0, z0, x0)) -+ -+/* -+** lsl_x0_s64_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** lsl z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_x0_s64_m_untied, svint64_t, uint64_t, -+ z0 = svlsl_n_s64_m (p0, z1, x0), -+ z0 = svlsl_m (p0, z1, x0)) -+ -+/* -+** lsl_1_s64_m_tied1: -+** lsl z0\.d, p0/m, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_1_s64_m_tied1, svint64_t, -+ z0 = svlsl_n_s64_m (p0, z0, 1), -+ z0 = svlsl_m (p0, z0, 1)) -+ -+/* -+** lsl_1_s64_m_untied: -+** movprfx z0, z1 -+** lsl z0\.d, p0/m, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_1_s64_m_untied, svint64_t, -+ z0 = svlsl_n_s64_m (p0, z1, 1), -+ z0 = svlsl_m (p0, z1, 1)) -+ -+/* -+** lsl_63_s64_m_tied1: -+** lsl z0\.d, p0/m, z0\.d, #63 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_63_s64_m_tied1, svint64_t, -+ z0 = svlsl_n_s64_m (p0, z0, 63), -+ z0 = svlsl_m (p0, z0, 63)) -+ -+/* -+** lsl_63_s64_m_untied: -+** movprfx z0, z1 -+** lsl z0\.d, p0/m, z0\.d, #63 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_63_s64_m_untied, svint64_t, -+ z0 = svlsl_n_s64_m (p0, z1, 63), -+ z0 = svlsl_m (p0, z1, 63)) -+ -+/* -+** lsl_64_s64_m_tied1: -+** mov (z[0-9]+\.d), #64 -+** lsl z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_64_s64_m_tied1, svint64_t, -+ z0 = svlsl_n_s64_m (p0, z0, 64), -+ z0 = svlsl_m (p0, z0, 64)) -+ -+/* -+** lsl_64_s64_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #64 -+** movprfx z0, z1 -+** lsl z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_64_s64_m_untied, svint64_t, -+ z0 = svlsl_n_s64_m (p0, z1, 64), -+ z0 = svlsl_m (p0, z1, 64)) -+ -+/* -+** lsl_s64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** lsl z0\.d, p0/m, z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsl_s64_z_tied1, svint64_t, svuint64_t, -+ z0 = svlsl_s64_z (p0, z0, z4), -+ z0 = svlsl_z (p0, z0, z4)) -+ -+/* -+** lsl_s64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** lslr z0\.d, p0/m, z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z_REV (lsl_s64_z_tied2, svint64_t, svuint64_t, -+ z0_res = svlsl_s64_z (p0, z4, z0), -+ z0_res = svlsl_z (p0, z4, z0)) -+ -+/* -+** lsl_s64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** lsl z0\.d, p0/m, z0\.d, z4\.d -+** | -+** movprfx z0\.d, p0/z, z4\.d -+** lslr z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_DUAL_Z (lsl_s64_z_untied, svint64_t, svuint64_t, -+ z0 = svlsl_s64_z (p0, z1, z4), -+ z0 = svlsl_z (p0, z1, z4)) -+ -+/* -+** lsl_x0_s64_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** lsl z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_x0_s64_z_tied1, svint64_t, uint64_t, -+ z0 = svlsl_n_s64_z (p0, z0, x0), -+ z0 = svlsl_z (p0, z0, x0)) -+ -+/* -+** lsl_x0_s64_z_untied: -+** mov (z[0-9]+\.d), x0 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** lsl z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** lslr z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_x0_s64_z_untied, svint64_t, uint64_t, -+ z0 = svlsl_n_s64_z (p0, z1, x0), -+ z0 = svlsl_z (p0, z1, x0)) -+ -+/* -+** lsl_1_s64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** lsl z0\.d, p0/m, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_1_s64_z_tied1, svint64_t, -+ z0 = svlsl_n_s64_z (p0, z0, 1), -+ z0 = svlsl_z (p0, z0, 1)) -+ -+/* -+** lsl_1_s64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** lsl z0\.d, p0/m, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_1_s64_z_untied, svint64_t, -+ z0 = svlsl_n_s64_z (p0, z1, 1), -+ z0 = svlsl_z (p0, z1, 1)) -+ -+/* -+** lsl_63_s64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** lsl z0\.d, p0/m, z0\.d, #63 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_63_s64_z_tied1, svint64_t, -+ z0 = svlsl_n_s64_z (p0, z0, 63), -+ z0 = svlsl_z (p0, z0, 63)) -+ -+/* -+** lsl_63_s64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** lsl z0\.d, p0/m, z0\.d, #63 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_63_s64_z_untied, svint64_t, -+ z0 = svlsl_n_s64_z (p0, z1, 63), -+ z0 = svlsl_z (p0, z1, 63)) -+ -+/* -+** lsl_64_s64_z_tied1: -+** mov (z[0-9]+\.d), #64 -+** movprfx z0\.d, p0/z, z0\.d -+** lsl z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_64_s64_z_tied1, svint64_t, -+ z0 = svlsl_n_s64_z (p0, z0, 64), -+ z0 = svlsl_z (p0, z0, 64)) -+ -+/* -+** lsl_64_s64_z_untied: -+** mov (z[0-9]+\.d), #64 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** lsl z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** lslr z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_64_s64_z_untied, svint64_t, -+ z0 = svlsl_n_s64_z (p0, z1, 64), -+ z0 = svlsl_z (p0, z1, 64)) -+ -+/* -+** lsl_s64_x_tied1: -+** lsl z0\.d, p0/m, z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsl_s64_x_tied1, svint64_t, svuint64_t, -+ z0 = svlsl_s64_x (p0, z0, z4), -+ z0 = svlsl_x (p0, z0, z4)) -+ -+/* -+** lsl_s64_x_tied2: -+** lslr z0\.d, p0/m, z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z_REV (lsl_s64_x_tied2, svint64_t, svuint64_t, -+ z0_res = svlsl_s64_x (p0, z4, z0), -+ z0_res = svlsl_x (p0, z4, z0)) -+ -+/* -+** lsl_s64_x_untied: -+** ( -+** movprfx z0, z1 -+** lsl z0\.d, p0/m, z0\.d, z4\.d -+** | -+** movprfx z0, z4 -+** lslr z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_DUAL_Z (lsl_s64_x_untied, svint64_t, svuint64_t, -+ z0 = svlsl_s64_x (p0, z1, z4), -+ z0 = svlsl_x (p0, z1, z4)) -+ -+/* -+** lsl_x0_s64_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** lsl z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_x0_s64_x_tied1, svint64_t, uint64_t, -+ z0 = svlsl_n_s64_x (p0, z0, x0), -+ z0 = svlsl_x (p0, z0, x0)) -+ -+/* -+** lsl_x0_s64_x_untied: -+** mov z0\.d, x0 -+** lslr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_x0_s64_x_untied, svint64_t, uint64_t, -+ z0 = svlsl_n_s64_x (p0, z1, x0), -+ z0 = svlsl_x (p0, z1, x0)) -+ -+/* -+** lsl_1_s64_x_tied1: -+** lsl z0\.d, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_1_s64_x_tied1, svint64_t, -+ z0 = svlsl_n_s64_x (p0, z0, 1), -+ z0 = svlsl_x (p0, z0, 1)) -+ -+/* -+** lsl_1_s64_x_untied: -+** lsl z0\.d, z1\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_1_s64_x_untied, svint64_t, -+ z0 = svlsl_n_s64_x (p0, z1, 1), -+ z0 = svlsl_x (p0, z1, 1)) -+ -+/* -+** lsl_63_s64_x_tied1: -+** lsl z0\.d, z0\.d, #63 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_63_s64_x_tied1, svint64_t, -+ z0 = svlsl_n_s64_x (p0, z0, 63), -+ z0 = svlsl_x (p0, z0, 63)) -+ -+/* -+** lsl_63_s64_x_untied: -+** lsl z0\.d, z1\.d, #63 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_63_s64_x_untied, svint64_t, -+ z0 = svlsl_n_s64_x (p0, z1, 63), -+ z0 = svlsl_x (p0, z1, 63)) -+ -+/* -+** lsl_64_s64_x_tied1: -+** mov (z[0-9]+\.d), #64 -+** lsl z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_64_s64_x_tied1, svint64_t, -+ z0 = svlsl_n_s64_x (p0, z0, 64), -+ z0 = svlsl_x (p0, z0, 64)) -+ -+/* -+** lsl_64_s64_x_untied: -+** mov z0\.d, #64 -+** lslr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_64_s64_x_untied, svint64_t, -+ z0 = svlsl_n_s64_x (p0, z1, 64), -+ z0 = svlsl_x (p0, z1, 64)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_s8.c -new file mode 100644 -index 000000000..9a9cc959c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_s8.c -@@ -0,0 +1,351 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** lsl_s8_m_tied1: -+** lsl z0\.b, p0/m, z0\.b, z4\.b -+** ret -+*/ -+TEST_DUAL_Z (lsl_s8_m_tied1, svint8_t, svuint8_t, -+ z0 = svlsl_s8_m (p0, z0, z4), -+ z0 = svlsl_m (p0, z0, z4)) -+ -+/* -+** lsl_s8_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** lsl z0\.b, p0/m, z0\.b, \1\.b -+** ret -+*/ -+TEST_DUAL_Z_REV (lsl_s8_m_tied2, svint8_t, svuint8_t, -+ z0_res = svlsl_s8_m (p0, z4, z0), -+ z0_res = svlsl_m (p0, z4, z0)) -+ -+/* -+** lsl_s8_m_untied: -+** movprfx z0, z1 -+** lsl z0\.b, p0/m, z0\.b, z4\.b -+** ret -+*/ -+TEST_DUAL_Z (lsl_s8_m_untied, svint8_t, svuint8_t, -+ z0 = svlsl_s8_m (p0, z1, z4), -+ z0 = svlsl_m (p0, z1, z4)) -+ -+/* -+** lsl_w0_s8_m_tied1: -+** mov (z[0-9]+\.b), w0 -+** lsl z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_w0_s8_m_tied1, svint8_t, uint8_t, -+ z0 = svlsl_n_s8_m (p0, z0, x0), -+ z0 = svlsl_m (p0, z0, x0)) -+ -+/* -+** lsl_w0_s8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), w0 -+** movprfx z0, z1 -+** lsl z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_w0_s8_m_untied, svint8_t, uint8_t, -+ z0 = svlsl_n_s8_m (p0, z1, x0), -+ z0 = svlsl_m (p0, z1, x0)) -+ -+/* -+** lsl_1_s8_m_tied1: -+** lsl z0\.b, p0/m, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_1_s8_m_tied1, svint8_t, -+ z0 = svlsl_n_s8_m (p0, z0, 1), -+ z0 = svlsl_m (p0, z0, 1)) -+ -+/* -+** lsl_1_s8_m_untied: -+** movprfx z0, z1 -+** lsl z0\.b, p0/m, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_1_s8_m_untied, svint8_t, -+ z0 = svlsl_n_s8_m (p0, z1, 1), -+ z0 = svlsl_m (p0, z1, 1)) -+ -+/* -+** lsl_7_s8_m_tied1: -+** lsl z0\.b, p0/m, z0\.b, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_7_s8_m_tied1, svint8_t, -+ z0 = svlsl_n_s8_m (p0, z0, 7), -+ z0 = svlsl_m (p0, z0, 7)) -+ -+/* -+** lsl_7_s8_m_untied: -+** movprfx z0, z1 -+** lsl z0\.b, p0/m, z0\.b, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_7_s8_m_untied, svint8_t, -+ z0 = svlsl_n_s8_m (p0, z1, 7), -+ z0 = svlsl_m (p0, z1, 7)) -+ -+/* -+** lsl_8_s8_m_tied1: -+** mov (z[0-9]+\.b), #8 -+** lsl z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_8_s8_m_tied1, svint8_t, -+ z0 = svlsl_n_s8_m (p0, z0, 8), -+ z0 = svlsl_m (p0, z0, 8)) -+ -+/* -+** lsl_8_s8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), #8 -+** movprfx z0, z1 -+** lsl z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_8_s8_m_untied, svint8_t, -+ z0 = svlsl_n_s8_m (p0, z1, 8), -+ z0 = svlsl_m (p0, z1, 8)) -+ -+/* -+** lsl_s8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** lsl z0\.b, p0/m, z0\.b, z4\.b -+** ret -+*/ -+TEST_DUAL_Z (lsl_s8_z_tied1, svint8_t, svuint8_t, -+ z0 = svlsl_s8_z (p0, z0, z4), -+ z0 = svlsl_z (p0, z0, z4)) -+ -+/* -+** lsl_s8_z_tied2: -+** movprfx z0\.b, p0/z, z0\.b -+** lslr z0\.b, p0/m, z0\.b, z4\.b -+** ret -+*/ -+TEST_DUAL_Z_REV (lsl_s8_z_tied2, svint8_t, svuint8_t, -+ z0_res = svlsl_s8_z (p0, z4, z0), -+ z0_res = svlsl_z (p0, z4, z0)) -+ -+/* -+** lsl_s8_z_untied: -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** lsl z0\.b, p0/m, z0\.b, z4\.b -+** | -+** movprfx z0\.b, p0/z, z4\.b -+** lslr z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_DUAL_Z (lsl_s8_z_untied, svint8_t, svuint8_t, -+ z0 = svlsl_s8_z (p0, z1, z4), -+ z0 = svlsl_z (p0, z1, z4)) -+ -+/* -+** lsl_w0_s8_z_tied1: -+** mov (z[0-9]+\.b), w0 -+** movprfx z0\.b, p0/z, z0\.b -+** lsl z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_w0_s8_z_tied1, svint8_t, uint8_t, -+ z0 = svlsl_n_s8_z (p0, z0, x0), -+ z0 = svlsl_z (p0, z0, x0)) -+ -+/* -+** lsl_w0_s8_z_untied: -+** mov (z[0-9]+\.b), w0 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** lsl z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** lslr z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_w0_s8_z_untied, svint8_t, uint8_t, -+ z0 = svlsl_n_s8_z (p0, z1, x0), -+ z0 = svlsl_z (p0, z1, x0)) -+ -+/* -+** lsl_1_s8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** lsl z0\.b, p0/m, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_1_s8_z_tied1, svint8_t, -+ z0 = svlsl_n_s8_z (p0, z0, 1), -+ z0 = svlsl_z (p0, z0, 1)) -+ -+/* -+** lsl_1_s8_z_untied: -+** movprfx z0\.b, p0/z, z1\.b -+** lsl z0\.b, p0/m, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_1_s8_z_untied, svint8_t, -+ z0 = svlsl_n_s8_z (p0, z1, 1), -+ z0 = svlsl_z (p0, z1, 1)) -+ -+/* -+** lsl_7_s8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** lsl z0\.b, p0/m, z0\.b, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_7_s8_z_tied1, svint8_t, -+ z0 = svlsl_n_s8_z (p0, z0, 7), -+ z0 = svlsl_z (p0, z0, 7)) -+ -+/* -+** lsl_7_s8_z_untied: -+** movprfx z0\.b, p0/z, z1\.b -+** lsl z0\.b, p0/m, z0\.b, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_7_s8_z_untied, svint8_t, -+ z0 = svlsl_n_s8_z (p0, z1, 7), -+ z0 = svlsl_z (p0, z1, 7)) -+ -+/* -+** lsl_8_s8_z_tied1: -+** mov (z[0-9]+\.b), #8 -+** movprfx z0\.b, p0/z, z0\.b -+** lsl z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_8_s8_z_tied1, svint8_t, -+ z0 = svlsl_n_s8_z (p0, z0, 8), -+ z0 = svlsl_z (p0, z0, 8)) -+ -+/* -+** lsl_8_s8_z_untied: -+** mov (z[0-9]+\.b), #8 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** lsl z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** lslr z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_8_s8_z_untied, svint8_t, -+ z0 = svlsl_n_s8_z (p0, z1, 8), -+ z0 = svlsl_z (p0, z1, 8)) -+ -+/* -+** lsl_s8_x_tied1: -+** lsl z0\.b, p0/m, z0\.b, z4\.b -+** ret -+*/ -+TEST_DUAL_Z (lsl_s8_x_tied1, svint8_t, svuint8_t, -+ z0 = svlsl_s8_x (p0, z0, z4), -+ z0 = svlsl_x (p0, z0, z4)) -+ -+/* -+** lsl_s8_x_tied2: -+** lslr z0\.b, p0/m, z0\.b, z4\.b -+** ret -+*/ -+TEST_DUAL_Z_REV (lsl_s8_x_tied2, svint8_t, svuint8_t, -+ z0_res = svlsl_s8_x (p0, z4, z0), -+ z0_res = svlsl_x (p0, z4, z0)) -+ -+/* -+** lsl_s8_x_untied: -+** ( -+** movprfx z0, z1 -+** lsl z0\.b, p0/m, z0\.b, z4\.b -+** | -+** movprfx z0, z4 -+** lslr z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_DUAL_Z (lsl_s8_x_untied, svint8_t, svuint8_t, -+ z0 = svlsl_s8_x (p0, z1, z4), -+ z0 = svlsl_x (p0, z1, z4)) -+ -+/* -+** lsl_w0_s8_x_tied1: -+** mov (z[0-9]+\.b), w0 -+** lsl z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_w0_s8_x_tied1, svint8_t, uint8_t, -+ z0 = svlsl_n_s8_x (p0, z0, x0), -+ z0 = svlsl_x (p0, z0, x0)) -+ -+/* -+** lsl_w0_s8_x_untied: -+** mov z0\.b, w0 -+** lslr z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_w0_s8_x_untied, svint8_t, uint8_t, -+ z0 = svlsl_n_s8_x (p0, z1, x0), -+ z0 = svlsl_x (p0, z1, x0)) -+ -+/* -+** lsl_1_s8_x_tied1: -+** lsl z0\.b, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_1_s8_x_tied1, svint8_t, -+ z0 = svlsl_n_s8_x (p0, z0, 1), -+ z0 = svlsl_x (p0, z0, 1)) -+ -+/* -+** lsl_1_s8_x_untied: -+** lsl z0\.b, z1\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_1_s8_x_untied, svint8_t, -+ z0 = svlsl_n_s8_x (p0, z1, 1), -+ z0 = svlsl_x (p0, z1, 1)) -+ -+/* -+** lsl_7_s8_x_tied1: -+** lsl z0\.b, z0\.b, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_7_s8_x_tied1, svint8_t, -+ z0 = svlsl_n_s8_x (p0, z0, 7), -+ z0 = svlsl_x (p0, z0, 7)) -+ -+/* -+** lsl_7_s8_x_untied: -+** lsl z0\.b, z1\.b, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_7_s8_x_untied, svint8_t, -+ z0 = svlsl_n_s8_x (p0, z1, 7), -+ z0 = svlsl_x (p0, z1, 7)) -+ -+/* -+** lsl_8_s8_x_tied1: -+** mov (z[0-9]+\.b), #8 -+** lsl z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_8_s8_x_tied1, svint8_t, -+ z0 = svlsl_n_s8_x (p0, z0, 8), -+ z0 = svlsl_x (p0, z0, 8)) -+ -+/* -+** lsl_8_s8_x_untied: -+** mov z0\.b, #8 -+** lslr z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_8_s8_x_untied, svint8_t, -+ z0 = svlsl_n_s8_x (p0, z1, 8), -+ z0 = svlsl_x (p0, z1, 8)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_u16.c -new file mode 100644 -index 000000000..57db0fda6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_u16.c -@@ -0,0 +1,351 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** lsl_u16_m_tied1: -+** lsl z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_u16_m_tied1, svuint16_t, -+ z0 = svlsl_u16_m (p0, z0, z1), -+ z0 = svlsl_m (p0, z0, z1)) -+ -+/* -+** lsl_u16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** lsl z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_u16_m_tied2, svuint16_t, -+ z0 = svlsl_u16_m (p0, z1, z0), -+ z0 = svlsl_m (p0, z1, z0)) -+ -+/* -+** lsl_u16_m_untied: -+** movprfx z0, z1 -+** lsl z0\.h, p0/m, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_u16_m_untied, svuint16_t, -+ z0 = svlsl_u16_m (p0, z1, z2), -+ z0 = svlsl_m (p0, z1, z2)) -+ -+/* -+** lsl_w0_u16_m_tied1: -+** mov (z[0-9]+\.h), w0 -+** lsl z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_w0_u16_m_tied1, svuint16_t, uint16_t, -+ z0 = svlsl_n_u16_m (p0, z0, x0), -+ z0 = svlsl_m (p0, z0, x0)) -+ -+/* -+** lsl_w0_u16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), w0 -+** movprfx z0, z1 -+** lsl z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_w0_u16_m_untied, svuint16_t, uint16_t, -+ z0 = svlsl_n_u16_m (p0, z1, x0), -+ z0 = svlsl_m (p0, z1, x0)) -+ -+/* -+** lsl_1_u16_m_tied1: -+** lsl z0\.h, p0/m, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_1_u16_m_tied1, svuint16_t, -+ z0 = svlsl_n_u16_m (p0, z0, 1), -+ z0 = svlsl_m (p0, z0, 1)) -+ -+/* -+** lsl_1_u16_m_untied: -+** movprfx z0, z1 -+** lsl z0\.h, p0/m, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_1_u16_m_untied, svuint16_t, -+ z0 = svlsl_n_u16_m (p0, z1, 1), -+ z0 = svlsl_m (p0, z1, 1)) -+ -+/* -+** lsl_15_u16_m_tied1: -+** lsl z0\.h, p0/m, z0\.h, #15 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_15_u16_m_tied1, svuint16_t, -+ z0 = svlsl_n_u16_m (p0, z0, 15), -+ z0 = svlsl_m (p0, z0, 15)) -+ -+/* -+** lsl_15_u16_m_untied: -+** movprfx z0, z1 -+** lsl z0\.h, p0/m, z0\.h, #15 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_15_u16_m_untied, svuint16_t, -+ z0 = svlsl_n_u16_m (p0, z1, 15), -+ z0 = svlsl_m (p0, z1, 15)) -+ -+/* -+** lsl_16_u16_m_tied1: -+** mov (z[0-9]+\.h), #16 -+** lsl z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_16_u16_m_tied1, svuint16_t, -+ z0 = svlsl_n_u16_m (p0, z0, 16), -+ z0 = svlsl_m (p0, z0, 16)) -+ -+/* -+** lsl_16_u16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), #16 -+** movprfx z0, z1 -+** lsl z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_16_u16_m_untied, svuint16_t, -+ z0 = svlsl_n_u16_m (p0, z1, 16), -+ z0 = svlsl_m (p0, z1, 16)) -+ -+/* -+** lsl_u16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** lsl z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_u16_z_tied1, svuint16_t, -+ z0 = svlsl_u16_z (p0, z0, z1), -+ z0 = svlsl_z (p0, z0, z1)) -+ -+/* -+** lsl_u16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** lslr z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_u16_z_tied2, svuint16_t, -+ z0 = svlsl_u16_z (p0, z1, z0), -+ z0 = svlsl_z (p0, z1, z0)) -+ -+/* -+** lsl_u16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** lsl z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** lslr z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_u16_z_untied, svuint16_t, -+ z0 = svlsl_u16_z (p0, z1, z2), -+ z0 = svlsl_z (p0, z1, z2)) -+ -+/* -+** lsl_w0_u16_z_tied1: -+** mov (z[0-9]+\.h), w0 -+** movprfx z0\.h, p0/z, z0\.h -+** lsl z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_w0_u16_z_tied1, svuint16_t, uint16_t, -+ z0 = svlsl_n_u16_z (p0, z0, x0), -+ z0 = svlsl_z (p0, z0, x0)) -+ -+/* -+** lsl_w0_u16_z_untied: -+** mov (z[0-9]+\.h), w0 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** lsl z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** lslr z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_w0_u16_z_untied, svuint16_t, uint16_t, -+ z0 = svlsl_n_u16_z (p0, z1, x0), -+ z0 = svlsl_z (p0, z1, x0)) -+ -+/* -+** lsl_1_u16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** lsl z0\.h, p0/m, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_1_u16_z_tied1, svuint16_t, -+ z0 = svlsl_n_u16_z (p0, z0, 1), -+ z0 = svlsl_z (p0, z0, 1)) -+ -+/* -+** lsl_1_u16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** lsl z0\.h, p0/m, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_1_u16_z_untied, svuint16_t, -+ z0 = svlsl_n_u16_z (p0, z1, 1), -+ z0 = svlsl_z (p0, z1, 1)) -+ -+/* -+** lsl_15_u16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** lsl z0\.h, p0/m, z0\.h, #15 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_15_u16_z_tied1, svuint16_t, -+ z0 = svlsl_n_u16_z (p0, z0, 15), -+ z0 = svlsl_z (p0, z0, 15)) -+ -+/* -+** lsl_15_u16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** lsl z0\.h, p0/m, z0\.h, #15 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_15_u16_z_untied, svuint16_t, -+ z0 = svlsl_n_u16_z (p0, z1, 15), -+ z0 = svlsl_z (p0, z1, 15)) -+ -+/* -+** lsl_16_u16_z_tied1: -+** mov (z[0-9]+\.h), #16 -+** movprfx z0\.h, p0/z, z0\.h -+** lsl z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_16_u16_z_tied1, svuint16_t, -+ z0 = svlsl_n_u16_z (p0, z0, 16), -+ z0 = svlsl_z (p0, z0, 16)) -+ -+/* -+** lsl_16_u16_z_untied: -+** mov (z[0-9]+\.h), #16 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** lsl z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** lslr z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_16_u16_z_untied, svuint16_t, -+ z0 = svlsl_n_u16_z (p0, z1, 16), -+ z0 = svlsl_z (p0, z1, 16)) -+ -+/* -+** lsl_u16_x_tied1: -+** lsl z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_u16_x_tied1, svuint16_t, -+ z0 = svlsl_u16_x (p0, z0, z1), -+ z0 = svlsl_x (p0, z0, z1)) -+ -+/* -+** lsl_u16_x_tied2: -+** lslr z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_u16_x_tied2, svuint16_t, -+ z0 = svlsl_u16_x (p0, z1, z0), -+ z0 = svlsl_x (p0, z1, z0)) -+ -+/* -+** lsl_u16_x_untied: -+** ( -+** movprfx z0, z1 -+** lsl z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0, z2 -+** lslr z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_u16_x_untied, svuint16_t, -+ z0 = svlsl_u16_x (p0, z1, z2), -+ z0 = svlsl_x (p0, z1, z2)) -+ -+/* -+** lsl_w0_u16_x_tied1: -+** mov (z[0-9]+\.h), w0 -+** lsl z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_w0_u16_x_tied1, svuint16_t, uint16_t, -+ z0 = svlsl_n_u16_x (p0, z0, x0), -+ z0 = svlsl_x (p0, z0, x0)) -+ -+/* -+** lsl_w0_u16_x_untied: -+** mov z0\.h, w0 -+** lslr z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_w0_u16_x_untied, svuint16_t, uint16_t, -+ z0 = svlsl_n_u16_x (p0, z1, x0), -+ z0 = svlsl_x (p0, z1, x0)) -+ -+/* -+** lsl_1_u16_x_tied1: -+** lsl z0\.h, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_1_u16_x_tied1, svuint16_t, -+ z0 = svlsl_n_u16_x (p0, z0, 1), -+ z0 = svlsl_x (p0, z0, 1)) -+ -+/* -+** lsl_1_u16_x_untied: -+** lsl z0\.h, z1\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_1_u16_x_untied, svuint16_t, -+ z0 = svlsl_n_u16_x (p0, z1, 1), -+ z0 = svlsl_x (p0, z1, 1)) -+ -+/* -+** lsl_15_u16_x_tied1: -+** lsl z0\.h, z0\.h, #15 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_15_u16_x_tied1, svuint16_t, -+ z0 = svlsl_n_u16_x (p0, z0, 15), -+ z0 = svlsl_x (p0, z0, 15)) -+ -+/* -+** lsl_15_u16_x_untied: -+** lsl z0\.h, z1\.h, #15 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_15_u16_x_untied, svuint16_t, -+ z0 = svlsl_n_u16_x (p0, z1, 15), -+ z0 = svlsl_x (p0, z1, 15)) -+ -+/* -+** lsl_16_u16_x_tied1: -+** mov (z[0-9]+\.h), #16 -+** lsl z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_16_u16_x_tied1, svuint16_t, -+ z0 = svlsl_n_u16_x (p0, z0, 16), -+ z0 = svlsl_x (p0, z0, 16)) -+ -+/* -+** lsl_16_u16_x_untied: -+** mov z0\.h, #16 -+** lslr z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_16_u16_x_untied, svuint16_t, -+ z0 = svlsl_n_u16_x (p0, z1, 16), -+ z0 = svlsl_x (p0, z1, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_u32.c -new file mode 100644 -index 000000000..8773f15db ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_u32.c -@@ -0,0 +1,351 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** lsl_u32_m_tied1: -+** lsl z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_u32_m_tied1, svuint32_t, -+ z0 = svlsl_u32_m (p0, z0, z1), -+ z0 = svlsl_m (p0, z0, z1)) -+ -+/* -+** lsl_u32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** lsl z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_u32_m_tied2, svuint32_t, -+ z0 = svlsl_u32_m (p0, z1, z0), -+ z0 = svlsl_m (p0, z1, z0)) -+ -+/* -+** lsl_u32_m_untied: -+** movprfx z0, z1 -+** lsl z0\.s, p0/m, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_u32_m_untied, svuint32_t, -+ z0 = svlsl_u32_m (p0, z1, z2), -+ z0 = svlsl_m (p0, z1, z2)) -+ -+/* -+** lsl_w0_u32_m_tied1: -+** mov (z[0-9]+\.s), w0 -+** lsl z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_w0_u32_m_tied1, svuint32_t, uint32_t, -+ z0 = svlsl_n_u32_m (p0, z0, x0), -+ z0 = svlsl_m (p0, z0, x0)) -+ -+/* -+** lsl_w0_u32_m_untied: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0, z1 -+** lsl z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_w0_u32_m_untied, svuint32_t, uint32_t, -+ z0 = svlsl_n_u32_m (p0, z1, x0), -+ z0 = svlsl_m (p0, z1, x0)) -+ -+/* -+** lsl_1_u32_m_tied1: -+** lsl z0\.s, p0/m, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_1_u32_m_tied1, svuint32_t, -+ z0 = svlsl_n_u32_m (p0, z0, 1), -+ z0 = svlsl_m (p0, z0, 1)) -+ -+/* -+** lsl_1_u32_m_untied: -+** movprfx z0, z1 -+** lsl z0\.s, p0/m, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_1_u32_m_untied, svuint32_t, -+ z0 = svlsl_n_u32_m (p0, z1, 1), -+ z0 = svlsl_m (p0, z1, 1)) -+ -+/* -+** lsl_31_u32_m_tied1: -+** lsl z0\.s, p0/m, z0\.s, #31 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_31_u32_m_tied1, svuint32_t, -+ z0 = svlsl_n_u32_m (p0, z0, 31), -+ z0 = svlsl_m (p0, z0, 31)) -+ -+/* -+** lsl_31_u32_m_untied: -+** movprfx z0, z1 -+** lsl z0\.s, p0/m, z0\.s, #31 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_31_u32_m_untied, svuint32_t, -+ z0 = svlsl_n_u32_m (p0, z1, 31), -+ z0 = svlsl_m (p0, z1, 31)) -+ -+/* -+** lsl_32_u32_m_tied1: -+** mov (z[0-9]+\.s), #32 -+** lsl z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_32_u32_m_tied1, svuint32_t, -+ z0 = svlsl_n_u32_m (p0, z0, 32), -+ z0 = svlsl_m (p0, z0, 32)) -+ -+/* -+** lsl_32_u32_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.s), #32 -+** movprfx z0, z1 -+** lsl z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_32_u32_m_untied, svuint32_t, -+ z0 = svlsl_n_u32_m (p0, z1, 32), -+ z0 = svlsl_m (p0, z1, 32)) -+ -+/* -+** lsl_u32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** lsl z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_u32_z_tied1, svuint32_t, -+ z0 = svlsl_u32_z (p0, z0, z1), -+ z0 = svlsl_z (p0, z0, z1)) -+ -+/* -+** lsl_u32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** lslr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_u32_z_tied2, svuint32_t, -+ z0 = svlsl_u32_z (p0, z1, z0), -+ z0 = svlsl_z (p0, z1, z0)) -+ -+/* -+** lsl_u32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** lsl z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** lslr z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_u32_z_untied, svuint32_t, -+ z0 = svlsl_u32_z (p0, z1, z2), -+ z0 = svlsl_z (p0, z1, z2)) -+ -+/* -+** lsl_w0_u32_z_tied1: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** lsl z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_w0_u32_z_tied1, svuint32_t, uint32_t, -+ z0 = svlsl_n_u32_z (p0, z0, x0), -+ z0 = svlsl_z (p0, z0, x0)) -+ -+/* -+** lsl_w0_u32_z_untied: -+** mov (z[0-9]+\.s), w0 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** lsl z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** lslr z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_w0_u32_z_untied, svuint32_t, uint32_t, -+ z0 = svlsl_n_u32_z (p0, z1, x0), -+ z0 = svlsl_z (p0, z1, x0)) -+ -+/* -+** lsl_1_u32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** lsl z0\.s, p0/m, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_1_u32_z_tied1, svuint32_t, -+ z0 = svlsl_n_u32_z (p0, z0, 1), -+ z0 = svlsl_z (p0, z0, 1)) -+ -+/* -+** lsl_1_u32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** lsl z0\.s, p0/m, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_1_u32_z_untied, svuint32_t, -+ z0 = svlsl_n_u32_z (p0, z1, 1), -+ z0 = svlsl_z (p0, z1, 1)) -+ -+/* -+** lsl_31_u32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** lsl z0\.s, p0/m, z0\.s, #31 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_31_u32_z_tied1, svuint32_t, -+ z0 = svlsl_n_u32_z (p0, z0, 31), -+ z0 = svlsl_z (p0, z0, 31)) -+ -+/* -+** lsl_31_u32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** lsl z0\.s, p0/m, z0\.s, #31 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_31_u32_z_untied, svuint32_t, -+ z0 = svlsl_n_u32_z (p0, z1, 31), -+ z0 = svlsl_z (p0, z1, 31)) -+ -+/* -+** lsl_32_u32_z_tied1: -+** mov (z[0-9]+\.s), #32 -+** movprfx z0\.s, p0/z, z0\.s -+** lsl z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_32_u32_z_tied1, svuint32_t, -+ z0 = svlsl_n_u32_z (p0, z0, 32), -+ z0 = svlsl_z (p0, z0, 32)) -+ -+/* -+** lsl_32_u32_z_untied: -+** mov (z[0-9]+\.s), #32 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** lsl z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** lslr z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_32_u32_z_untied, svuint32_t, -+ z0 = svlsl_n_u32_z (p0, z1, 32), -+ z0 = svlsl_z (p0, z1, 32)) -+ -+/* -+** lsl_u32_x_tied1: -+** lsl z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_u32_x_tied1, svuint32_t, -+ z0 = svlsl_u32_x (p0, z0, z1), -+ z0 = svlsl_x (p0, z0, z1)) -+ -+/* -+** lsl_u32_x_tied2: -+** lslr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_u32_x_tied2, svuint32_t, -+ z0 = svlsl_u32_x (p0, z1, z0), -+ z0 = svlsl_x (p0, z1, z0)) -+ -+/* -+** lsl_u32_x_untied: -+** ( -+** movprfx z0, z1 -+** lsl z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0, z2 -+** lslr z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_u32_x_untied, svuint32_t, -+ z0 = svlsl_u32_x (p0, z1, z2), -+ z0 = svlsl_x (p0, z1, z2)) -+ -+/* -+** lsl_w0_u32_x_tied1: -+** mov (z[0-9]+\.s), w0 -+** lsl z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_w0_u32_x_tied1, svuint32_t, uint32_t, -+ z0 = svlsl_n_u32_x (p0, z0, x0), -+ z0 = svlsl_x (p0, z0, x0)) -+ -+/* -+** lsl_w0_u32_x_untied: -+** mov z0\.s, w0 -+** lslr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_w0_u32_x_untied, svuint32_t, uint32_t, -+ z0 = svlsl_n_u32_x (p0, z1, x0), -+ z0 = svlsl_x (p0, z1, x0)) -+ -+/* -+** lsl_1_u32_x_tied1: -+** lsl z0\.s, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_1_u32_x_tied1, svuint32_t, -+ z0 = svlsl_n_u32_x (p0, z0, 1), -+ z0 = svlsl_x (p0, z0, 1)) -+ -+/* -+** lsl_1_u32_x_untied: -+** lsl z0\.s, z1\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_1_u32_x_untied, svuint32_t, -+ z0 = svlsl_n_u32_x (p0, z1, 1), -+ z0 = svlsl_x (p0, z1, 1)) -+ -+/* -+** lsl_31_u32_x_tied1: -+** lsl z0\.s, z0\.s, #31 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_31_u32_x_tied1, svuint32_t, -+ z0 = svlsl_n_u32_x (p0, z0, 31), -+ z0 = svlsl_x (p0, z0, 31)) -+ -+/* -+** lsl_31_u32_x_untied: -+** lsl z0\.s, z1\.s, #31 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_31_u32_x_untied, svuint32_t, -+ z0 = svlsl_n_u32_x (p0, z1, 31), -+ z0 = svlsl_x (p0, z1, 31)) -+ -+/* -+** lsl_32_u32_x_tied1: -+** mov (z[0-9]+\.s), #32 -+** lsl z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_32_u32_x_tied1, svuint32_t, -+ z0 = svlsl_n_u32_x (p0, z0, 32), -+ z0 = svlsl_x (p0, z0, 32)) -+ -+/* -+** lsl_32_u32_x_untied: -+** mov z0\.s, #32 -+** lslr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_32_u32_x_untied, svuint32_t, -+ z0 = svlsl_n_u32_x (p0, z1, 32), -+ z0 = svlsl_x (p0, z1, 32)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_u64.c -new file mode 100644 -index 000000000..7b12bd43e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_u64.c -@@ -0,0 +1,351 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** lsl_u64_m_tied1: -+** lsl z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_u64_m_tied1, svuint64_t, -+ z0 = svlsl_u64_m (p0, z0, z1), -+ z0 = svlsl_m (p0, z0, z1)) -+ -+/* -+** lsl_u64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** lsl z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_u64_m_tied2, svuint64_t, -+ z0 = svlsl_u64_m (p0, z1, z0), -+ z0 = svlsl_m (p0, z1, z0)) -+ -+/* -+** lsl_u64_m_untied: -+** movprfx z0, z1 -+** lsl z0\.d, p0/m, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_u64_m_untied, svuint64_t, -+ z0 = svlsl_u64_m (p0, z1, z2), -+ z0 = svlsl_m (p0, z1, z2)) -+ -+/* -+** lsl_x0_u64_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** lsl z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_x0_u64_m_tied1, svuint64_t, uint64_t, -+ z0 = svlsl_n_u64_m (p0, z0, x0), -+ z0 = svlsl_m (p0, z0, x0)) -+ -+/* -+** lsl_x0_u64_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** lsl z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_x0_u64_m_untied, svuint64_t, uint64_t, -+ z0 = svlsl_n_u64_m (p0, z1, x0), -+ z0 = svlsl_m (p0, z1, x0)) -+ -+/* -+** lsl_1_u64_m_tied1: -+** lsl z0\.d, p0/m, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_1_u64_m_tied1, svuint64_t, -+ z0 = svlsl_n_u64_m (p0, z0, 1), -+ z0 = svlsl_m (p0, z0, 1)) -+ -+/* -+** lsl_1_u64_m_untied: -+** movprfx z0, z1 -+** lsl z0\.d, p0/m, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_1_u64_m_untied, svuint64_t, -+ z0 = svlsl_n_u64_m (p0, z1, 1), -+ z0 = svlsl_m (p0, z1, 1)) -+ -+/* -+** lsl_63_u64_m_tied1: -+** lsl z0\.d, p0/m, z0\.d, #63 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_63_u64_m_tied1, svuint64_t, -+ z0 = svlsl_n_u64_m (p0, z0, 63), -+ z0 = svlsl_m (p0, z0, 63)) -+ -+/* -+** lsl_63_u64_m_untied: -+** movprfx z0, z1 -+** lsl z0\.d, p0/m, z0\.d, #63 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_63_u64_m_untied, svuint64_t, -+ z0 = svlsl_n_u64_m (p0, z1, 63), -+ z0 = svlsl_m (p0, z1, 63)) -+ -+/* -+** lsl_64_u64_m_tied1: -+** mov (z[0-9]+\.d), #64 -+** lsl z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_64_u64_m_tied1, svuint64_t, -+ z0 = svlsl_n_u64_m (p0, z0, 64), -+ z0 = svlsl_m (p0, z0, 64)) -+ -+/* -+** lsl_64_u64_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #64 -+** movprfx z0, z1 -+** lsl z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_64_u64_m_untied, svuint64_t, -+ z0 = svlsl_n_u64_m (p0, z1, 64), -+ z0 = svlsl_m (p0, z1, 64)) -+ -+/* -+** lsl_u64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** lsl z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_u64_z_tied1, svuint64_t, -+ z0 = svlsl_u64_z (p0, z0, z1), -+ z0 = svlsl_z (p0, z0, z1)) -+ -+/* -+** lsl_u64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** lslr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_u64_z_tied2, svuint64_t, -+ z0 = svlsl_u64_z (p0, z1, z0), -+ z0 = svlsl_z (p0, z1, z0)) -+ -+/* -+** lsl_u64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** lsl z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** lslr z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_u64_z_untied, svuint64_t, -+ z0 = svlsl_u64_z (p0, z1, z2), -+ z0 = svlsl_z (p0, z1, z2)) -+ -+/* -+** lsl_x0_u64_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** lsl z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_x0_u64_z_tied1, svuint64_t, uint64_t, -+ z0 = svlsl_n_u64_z (p0, z0, x0), -+ z0 = svlsl_z (p0, z0, x0)) -+ -+/* -+** lsl_x0_u64_z_untied: -+** mov (z[0-9]+\.d), x0 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** lsl z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** lslr z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_x0_u64_z_untied, svuint64_t, uint64_t, -+ z0 = svlsl_n_u64_z (p0, z1, x0), -+ z0 = svlsl_z (p0, z1, x0)) -+ -+/* -+** lsl_1_u64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** lsl z0\.d, p0/m, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_1_u64_z_tied1, svuint64_t, -+ z0 = svlsl_n_u64_z (p0, z0, 1), -+ z0 = svlsl_z (p0, z0, 1)) -+ -+/* -+** lsl_1_u64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** lsl z0\.d, p0/m, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_1_u64_z_untied, svuint64_t, -+ z0 = svlsl_n_u64_z (p0, z1, 1), -+ z0 = svlsl_z (p0, z1, 1)) -+ -+/* -+** lsl_63_u64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** lsl z0\.d, p0/m, z0\.d, #63 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_63_u64_z_tied1, svuint64_t, -+ z0 = svlsl_n_u64_z (p0, z0, 63), -+ z0 = svlsl_z (p0, z0, 63)) -+ -+/* -+** lsl_63_u64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** lsl z0\.d, p0/m, z0\.d, #63 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_63_u64_z_untied, svuint64_t, -+ z0 = svlsl_n_u64_z (p0, z1, 63), -+ z0 = svlsl_z (p0, z1, 63)) -+ -+/* -+** lsl_64_u64_z_tied1: -+** mov (z[0-9]+\.d), #64 -+** movprfx z0\.d, p0/z, z0\.d -+** lsl z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_64_u64_z_tied1, svuint64_t, -+ z0 = svlsl_n_u64_z (p0, z0, 64), -+ z0 = svlsl_z (p0, z0, 64)) -+ -+/* -+** lsl_64_u64_z_untied: -+** mov (z[0-9]+\.d), #64 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** lsl z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** lslr z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_64_u64_z_untied, svuint64_t, -+ z0 = svlsl_n_u64_z (p0, z1, 64), -+ z0 = svlsl_z (p0, z1, 64)) -+ -+/* -+** lsl_u64_x_tied1: -+** lsl z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_u64_x_tied1, svuint64_t, -+ z0 = svlsl_u64_x (p0, z0, z1), -+ z0 = svlsl_x (p0, z0, z1)) -+ -+/* -+** lsl_u64_x_tied2: -+** lslr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_u64_x_tied2, svuint64_t, -+ z0 = svlsl_u64_x (p0, z1, z0), -+ z0 = svlsl_x (p0, z1, z0)) -+ -+/* -+** lsl_u64_x_untied: -+** ( -+** movprfx z0, z1 -+** lsl z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0, z2 -+** lslr z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_u64_x_untied, svuint64_t, -+ z0 = svlsl_u64_x (p0, z1, z2), -+ z0 = svlsl_x (p0, z1, z2)) -+ -+/* -+** lsl_x0_u64_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** lsl z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_x0_u64_x_tied1, svuint64_t, uint64_t, -+ z0 = svlsl_n_u64_x (p0, z0, x0), -+ z0 = svlsl_x (p0, z0, x0)) -+ -+/* -+** lsl_x0_u64_x_untied: -+** mov z0\.d, x0 -+** lslr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_x0_u64_x_untied, svuint64_t, uint64_t, -+ z0 = svlsl_n_u64_x (p0, z1, x0), -+ z0 = svlsl_x (p0, z1, x0)) -+ -+/* -+** lsl_1_u64_x_tied1: -+** lsl z0\.d, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_1_u64_x_tied1, svuint64_t, -+ z0 = svlsl_n_u64_x (p0, z0, 1), -+ z0 = svlsl_x (p0, z0, 1)) -+ -+/* -+** lsl_1_u64_x_untied: -+** lsl z0\.d, z1\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_1_u64_x_untied, svuint64_t, -+ z0 = svlsl_n_u64_x (p0, z1, 1), -+ z0 = svlsl_x (p0, z1, 1)) -+ -+/* -+** lsl_63_u64_x_tied1: -+** lsl z0\.d, z0\.d, #63 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_63_u64_x_tied1, svuint64_t, -+ z0 = svlsl_n_u64_x (p0, z0, 63), -+ z0 = svlsl_x (p0, z0, 63)) -+ -+/* -+** lsl_63_u64_x_untied: -+** lsl z0\.d, z1\.d, #63 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_63_u64_x_untied, svuint64_t, -+ z0 = svlsl_n_u64_x (p0, z1, 63), -+ z0 = svlsl_x (p0, z1, 63)) -+ -+/* -+** lsl_64_u64_x_tied1: -+** mov (z[0-9]+\.d), #64 -+** lsl z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_64_u64_x_tied1, svuint64_t, -+ z0 = svlsl_n_u64_x (p0, z0, 64), -+ z0 = svlsl_x (p0, z0, 64)) -+ -+/* -+** lsl_64_u64_x_untied: -+** mov z0\.d, #64 -+** lslr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_64_u64_x_untied, svuint64_t, -+ z0 = svlsl_n_u64_x (p0, z1, 64), -+ z0 = svlsl_x (p0, z1, 64)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_u8.c -new file mode 100644 -index 000000000..894b55138 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_u8.c -@@ -0,0 +1,351 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** lsl_u8_m_tied1: -+** lsl z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_u8_m_tied1, svuint8_t, -+ z0 = svlsl_u8_m (p0, z0, z1), -+ z0 = svlsl_m (p0, z0, z1)) -+ -+/* -+** lsl_u8_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** lsl z0\.b, p0/m, z0\.b, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_u8_m_tied2, svuint8_t, -+ z0 = svlsl_u8_m (p0, z1, z0), -+ z0 = svlsl_m (p0, z1, z0)) -+ -+/* -+** lsl_u8_m_untied: -+** movprfx z0, z1 -+** lsl z0\.b, p0/m, z0\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_u8_m_untied, svuint8_t, -+ z0 = svlsl_u8_m (p0, z1, z2), -+ z0 = svlsl_m (p0, z1, z2)) -+ -+/* -+** lsl_w0_u8_m_tied1: -+** mov (z[0-9]+\.b), w0 -+** lsl z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_w0_u8_m_tied1, svuint8_t, uint8_t, -+ z0 = svlsl_n_u8_m (p0, z0, x0), -+ z0 = svlsl_m (p0, z0, x0)) -+ -+/* -+** lsl_w0_u8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), w0 -+** movprfx z0, z1 -+** lsl z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_w0_u8_m_untied, svuint8_t, uint8_t, -+ z0 = svlsl_n_u8_m (p0, z1, x0), -+ z0 = svlsl_m (p0, z1, x0)) -+ -+/* -+** lsl_1_u8_m_tied1: -+** lsl z0\.b, p0/m, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_1_u8_m_tied1, svuint8_t, -+ z0 = svlsl_n_u8_m (p0, z0, 1), -+ z0 = svlsl_m (p0, z0, 1)) -+ -+/* -+** lsl_1_u8_m_untied: -+** movprfx z0, z1 -+** lsl z0\.b, p0/m, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_1_u8_m_untied, svuint8_t, -+ z0 = svlsl_n_u8_m (p0, z1, 1), -+ z0 = svlsl_m (p0, z1, 1)) -+ -+/* -+** lsl_7_u8_m_tied1: -+** lsl z0\.b, p0/m, z0\.b, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_7_u8_m_tied1, svuint8_t, -+ z0 = svlsl_n_u8_m (p0, z0, 7), -+ z0 = svlsl_m (p0, z0, 7)) -+ -+/* -+** lsl_7_u8_m_untied: -+** movprfx z0, z1 -+** lsl z0\.b, p0/m, z0\.b, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_7_u8_m_untied, svuint8_t, -+ z0 = svlsl_n_u8_m (p0, z1, 7), -+ z0 = svlsl_m (p0, z1, 7)) -+ -+/* -+** lsl_8_u8_m_tied1: -+** mov (z[0-9]+\.b), #8 -+** lsl z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_8_u8_m_tied1, svuint8_t, -+ z0 = svlsl_n_u8_m (p0, z0, 8), -+ z0 = svlsl_m (p0, z0, 8)) -+ -+/* -+** lsl_8_u8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), #8 -+** movprfx z0, z1 -+** lsl z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_8_u8_m_untied, svuint8_t, -+ z0 = svlsl_n_u8_m (p0, z1, 8), -+ z0 = svlsl_m (p0, z1, 8)) -+ -+/* -+** lsl_u8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** lsl z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_u8_z_tied1, svuint8_t, -+ z0 = svlsl_u8_z (p0, z0, z1), -+ z0 = svlsl_z (p0, z0, z1)) -+ -+/* -+** lsl_u8_z_tied2: -+** movprfx z0\.b, p0/z, z0\.b -+** lslr z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_u8_z_tied2, svuint8_t, -+ z0 = svlsl_u8_z (p0, z1, z0), -+ z0 = svlsl_z (p0, z1, z0)) -+ -+/* -+** lsl_u8_z_untied: -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** lsl z0\.b, p0/m, z0\.b, z2\.b -+** | -+** movprfx z0\.b, p0/z, z2\.b -+** lslr z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_u8_z_untied, svuint8_t, -+ z0 = svlsl_u8_z (p0, z1, z2), -+ z0 = svlsl_z (p0, z1, z2)) -+ -+/* -+** lsl_w0_u8_z_tied1: -+** mov (z[0-9]+\.b), w0 -+** movprfx z0\.b, p0/z, z0\.b -+** lsl z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_w0_u8_z_tied1, svuint8_t, uint8_t, -+ z0 = svlsl_n_u8_z (p0, z0, x0), -+ z0 = svlsl_z (p0, z0, x0)) -+ -+/* -+** lsl_w0_u8_z_untied: -+** mov (z[0-9]+\.b), w0 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** lsl z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** lslr z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_w0_u8_z_untied, svuint8_t, uint8_t, -+ z0 = svlsl_n_u8_z (p0, z1, x0), -+ z0 = svlsl_z (p0, z1, x0)) -+ -+/* -+** lsl_1_u8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** lsl z0\.b, p0/m, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_1_u8_z_tied1, svuint8_t, -+ z0 = svlsl_n_u8_z (p0, z0, 1), -+ z0 = svlsl_z (p0, z0, 1)) -+ -+/* -+** lsl_1_u8_z_untied: -+** movprfx z0\.b, p0/z, z1\.b -+** lsl z0\.b, p0/m, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_1_u8_z_untied, svuint8_t, -+ z0 = svlsl_n_u8_z (p0, z1, 1), -+ z0 = svlsl_z (p0, z1, 1)) -+ -+/* -+** lsl_7_u8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** lsl z0\.b, p0/m, z0\.b, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_7_u8_z_tied1, svuint8_t, -+ z0 = svlsl_n_u8_z (p0, z0, 7), -+ z0 = svlsl_z (p0, z0, 7)) -+ -+/* -+** lsl_7_u8_z_untied: -+** movprfx z0\.b, p0/z, z1\.b -+** lsl z0\.b, p0/m, z0\.b, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_7_u8_z_untied, svuint8_t, -+ z0 = svlsl_n_u8_z (p0, z1, 7), -+ z0 = svlsl_z (p0, z1, 7)) -+ -+/* -+** lsl_8_u8_z_tied1: -+** mov (z[0-9]+\.b), #8 -+** movprfx z0\.b, p0/z, z0\.b -+** lsl z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_8_u8_z_tied1, svuint8_t, -+ z0 = svlsl_n_u8_z (p0, z0, 8), -+ z0 = svlsl_z (p0, z0, 8)) -+ -+/* -+** lsl_8_u8_z_untied: -+** mov (z[0-9]+\.b), #8 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** lsl z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** lslr z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_8_u8_z_untied, svuint8_t, -+ z0 = svlsl_n_u8_z (p0, z1, 8), -+ z0 = svlsl_z (p0, z1, 8)) -+ -+/* -+** lsl_u8_x_tied1: -+** lsl z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_u8_x_tied1, svuint8_t, -+ z0 = svlsl_u8_x (p0, z0, z1), -+ z0 = svlsl_x (p0, z0, z1)) -+ -+/* -+** lsl_u8_x_tied2: -+** lslr z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_u8_x_tied2, svuint8_t, -+ z0 = svlsl_u8_x (p0, z1, z0), -+ z0 = svlsl_x (p0, z1, z0)) -+ -+/* -+** lsl_u8_x_untied: -+** ( -+** movprfx z0, z1 -+** lsl z0\.b, p0/m, z0\.b, z2\.b -+** | -+** movprfx z0, z2 -+** lslr z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_u8_x_untied, svuint8_t, -+ z0 = svlsl_u8_x (p0, z1, z2), -+ z0 = svlsl_x (p0, z1, z2)) -+ -+/* -+** lsl_w0_u8_x_tied1: -+** mov (z[0-9]+\.b), w0 -+** lsl z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_w0_u8_x_tied1, svuint8_t, uint8_t, -+ z0 = svlsl_n_u8_x (p0, z0, x0), -+ z0 = svlsl_x (p0, z0, x0)) -+ -+/* -+** lsl_w0_u8_x_untied: -+** mov z0\.b, w0 -+** lslr z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_w0_u8_x_untied, svuint8_t, uint8_t, -+ z0 = svlsl_n_u8_x (p0, z1, x0), -+ z0 = svlsl_x (p0, z1, x0)) -+ -+/* -+** lsl_1_u8_x_tied1: -+** lsl z0\.b, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_1_u8_x_tied1, svuint8_t, -+ z0 = svlsl_n_u8_x (p0, z0, 1), -+ z0 = svlsl_x (p0, z0, 1)) -+ -+/* -+** lsl_1_u8_x_untied: -+** lsl z0\.b, z1\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_1_u8_x_untied, svuint8_t, -+ z0 = svlsl_n_u8_x (p0, z1, 1), -+ z0 = svlsl_x (p0, z1, 1)) -+ -+/* -+** lsl_7_u8_x_tied1: -+** lsl z0\.b, z0\.b, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_7_u8_x_tied1, svuint8_t, -+ z0 = svlsl_n_u8_x (p0, z0, 7), -+ z0 = svlsl_x (p0, z0, 7)) -+ -+/* -+** lsl_7_u8_x_untied: -+** lsl z0\.b, z1\.b, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_7_u8_x_untied, svuint8_t, -+ z0 = svlsl_n_u8_x (p0, z1, 7), -+ z0 = svlsl_x (p0, z1, 7)) -+ -+/* -+** lsl_8_u8_x_tied1: -+** mov (z[0-9]+\.b), #8 -+** lsl z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_8_u8_x_tied1, svuint8_t, -+ z0 = svlsl_n_u8_x (p0, z0, 8), -+ z0 = svlsl_x (p0, z0, 8)) -+ -+/* -+** lsl_8_u8_x_untied: -+** mov z0\.b, #8 -+** lslr z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_8_u8_x_untied, svuint8_t, -+ z0 = svlsl_n_u8_x (p0, z1, 8), -+ z0 = svlsl_x (p0, z1, 8)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_wide_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_wide_s16.c -new file mode 100644 -index 000000000..8d63d3909 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_wide_s16.c -@@ -0,0 +1,331 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** lsl_wide_s16_m_tied1: -+** lsl z0\.h, p0/m, z0\.h, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsl_wide_s16_m_tied1, svint16_t, svuint64_t, -+ z0 = svlsl_wide_s16_m (p0, z0, z4), -+ z0 = svlsl_wide_m (p0, z0, z4)) -+ -+/* -+** lsl_wide_s16_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z4 -+** lsl z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (lsl_wide_s16_m_tied2, svint16_t, svuint64_t, -+ z0_res = svlsl_wide_s16_m (p0, z4, z0), -+ z0_res = svlsl_wide_m (p0, z4, z0)) -+ -+/* -+** lsl_wide_s16_m_untied: -+** movprfx z0, z1 -+** lsl z0\.h, p0/m, z0\.h, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsl_wide_s16_m_untied, svint16_t, svuint64_t, -+ z0 = svlsl_wide_s16_m (p0, z1, z4), -+ z0 = svlsl_wide_m (p0, z1, z4)) -+ -+/* -+** lsl_wide_x0_s16_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** lsl z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_wide_x0_s16_m_tied1, svint16_t, uint64_t, -+ z0 = svlsl_wide_n_s16_m (p0, z0, x0), -+ z0 = svlsl_wide_m (p0, z0, x0)) -+ -+/* -+** lsl_wide_x0_s16_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** lsl z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_wide_x0_s16_m_untied, svint16_t, uint64_t, -+ z0 = svlsl_wide_n_s16_m (p0, z1, x0), -+ z0 = svlsl_wide_m (p0, z1, x0)) -+ -+/* -+** lsl_wide_1_s16_m_tied1: -+** lsl z0\.h, p0/m, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_1_s16_m_tied1, svint16_t, -+ z0 = svlsl_wide_n_s16_m (p0, z0, 1), -+ z0 = svlsl_wide_m (p0, z0, 1)) -+ -+/* -+** lsl_wide_1_s16_m_untied: -+** movprfx z0, z1 -+** lsl z0\.h, p0/m, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_1_s16_m_untied, svint16_t, -+ z0 = svlsl_wide_n_s16_m (p0, z1, 1), -+ z0 = svlsl_wide_m (p0, z1, 1)) -+ -+/* -+** lsl_wide_15_s16_m_tied1: -+** lsl z0\.h, p0/m, z0\.h, #15 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_15_s16_m_tied1, svint16_t, -+ z0 = svlsl_wide_n_s16_m (p0, z0, 15), -+ z0 = svlsl_wide_m (p0, z0, 15)) -+ -+/* -+** lsl_wide_15_s16_m_untied: -+** movprfx z0, z1 -+** lsl z0\.h, p0/m, z0\.h, #15 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_15_s16_m_untied, svint16_t, -+ z0 = svlsl_wide_n_s16_m (p0, z1, 15), -+ z0 = svlsl_wide_m (p0, z1, 15)) -+ -+/* -+** lsl_wide_16_s16_m_tied1: -+** mov (z[0-9]+\.d), #16 -+** lsl z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_16_s16_m_tied1, svint16_t, -+ z0 = svlsl_wide_n_s16_m (p0, z0, 16), -+ z0 = svlsl_wide_m (p0, z0, 16)) -+ -+/* -+** lsl_wide_16_s16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #16 -+** movprfx z0, z1 -+** lsl z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_16_s16_m_untied, svint16_t, -+ z0 = svlsl_wide_n_s16_m (p0, z1, 16), -+ z0 = svlsl_wide_m (p0, z1, 16)) -+ -+/* -+** lsl_wide_s16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** lsl z0\.h, p0/m, z0\.h, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsl_wide_s16_z_tied1, svint16_t, svuint64_t, -+ z0 = svlsl_wide_s16_z (p0, z0, z4), -+ z0 = svlsl_wide_z (p0, z0, z4)) -+ -+/* -+** lsl_wide_s16_z_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.h, p0/z, z4\.h -+** lsl z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (lsl_wide_s16_z_tied2, svint16_t, svuint64_t, -+ z0_res = svlsl_wide_s16_z (p0, z4, z0), -+ z0_res = svlsl_wide_z (p0, z4, z0)) -+ -+/* -+** lsl_wide_s16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** lsl z0\.h, p0/m, z0\.h, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsl_wide_s16_z_untied, svint16_t, svuint64_t, -+ z0 = svlsl_wide_s16_z (p0, z1, z4), -+ z0 = svlsl_wide_z (p0, z1, z4)) -+ -+/* -+** lsl_wide_x0_s16_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.h, p0/z, z0\.h -+** lsl z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_wide_x0_s16_z_tied1, svint16_t, uint64_t, -+ z0 = svlsl_wide_n_s16_z (p0, z0, x0), -+ z0 = svlsl_wide_z (p0, z0, x0)) -+ -+/* -+** lsl_wide_x0_s16_z_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.h, p0/z, z1\.h -+** lsl z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_wide_x0_s16_z_untied, svint16_t, uint64_t, -+ z0 = svlsl_wide_n_s16_z (p0, z1, x0), -+ z0 = svlsl_wide_z (p0, z1, x0)) -+ -+/* -+** lsl_wide_1_s16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** lsl z0\.h, p0/m, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_1_s16_z_tied1, svint16_t, -+ z0 = svlsl_wide_n_s16_z (p0, z0, 1), -+ z0 = svlsl_wide_z (p0, z0, 1)) -+ -+/* -+** lsl_wide_1_s16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** lsl z0\.h, p0/m, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_1_s16_z_untied, svint16_t, -+ z0 = svlsl_wide_n_s16_z (p0, z1, 1), -+ z0 = svlsl_wide_z (p0, z1, 1)) -+ -+/* -+** lsl_wide_15_s16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** lsl z0\.h, p0/m, z0\.h, #15 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_15_s16_z_tied1, svint16_t, -+ z0 = svlsl_wide_n_s16_z (p0, z0, 15), -+ z0 = svlsl_wide_z (p0, z0, 15)) -+ -+/* -+** lsl_wide_15_s16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** lsl z0\.h, p0/m, z0\.h, #15 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_15_s16_z_untied, svint16_t, -+ z0 = svlsl_wide_n_s16_z (p0, z1, 15), -+ z0 = svlsl_wide_z (p0, z1, 15)) -+ -+/* -+** lsl_wide_16_s16_z_tied1: -+** mov (z[0-9]+\.d), #16 -+** movprfx z0\.h, p0/z, z0\.h -+** lsl z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_16_s16_z_tied1, svint16_t, -+ z0 = svlsl_wide_n_s16_z (p0, z0, 16), -+ z0 = svlsl_wide_z (p0, z0, 16)) -+ -+/* -+** lsl_wide_16_s16_z_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #16 -+** movprfx z0\.h, p0/z, z1\.h -+** lsl z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_16_s16_z_untied, svint16_t, -+ z0 = svlsl_wide_n_s16_z (p0, z1, 16), -+ z0 = svlsl_wide_z (p0, z1, 16)) -+ -+/* -+** lsl_wide_s16_x_tied1: -+** lsl z0\.h, z0\.h, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsl_wide_s16_x_tied1, svint16_t, svuint64_t, -+ z0 = svlsl_wide_s16_x (p0, z0, z4), -+ z0 = svlsl_wide_x (p0, z0, z4)) -+ -+/* -+** lsl_wide_s16_x_tied2: -+** lsl z0\.h, z4\.h, z0\.d -+** ret -+*/ -+TEST_DUAL_Z_REV (lsl_wide_s16_x_tied2, svint16_t, svuint64_t, -+ z0_res = svlsl_wide_s16_x (p0, z4, z0), -+ z0_res = svlsl_wide_x (p0, z4, z0)) -+ -+/* -+** lsl_wide_s16_x_untied: -+** lsl z0\.h, z1\.h, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsl_wide_s16_x_untied, svint16_t, svuint64_t, -+ z0 = svlsl_wide_s16_x (p0, z1, z4), -+ z0 = svlsl_wide_x (p0, z1, z4)) -+ -+/* -+** lsl_wide_x0_s16_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** lsl z0\.h, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_wide_x0_s16_x_tied1, svint16_t, uint64_t, -+ z0 = svlsl_wide_n_s16_x (p0, z0, x0), -+ z0 = svlsl_wide_x (p0, z0, x0)) -+ -+/* -+** lsl_wide_x0_s16_x_untied: -+** mov (z[0-9]+\.d), x0 -+** lsl z0\.h, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_wide_x0_s16_x_untied, svint16_t, uint64_t, -+ z0 = svlsl_wide_n_s16_x (p0, z1, x0), -+ z0 = svlsl_wide_x (p0, z1, x0)) -+ -+/* -+** lsl_wide_1_s16_x_tied1: -+** lsl z0\.h, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_1_s16_x_tied1, svint16_t, -+ z0 = svlsl_wide_n_s16_x (p0, z0, 1), -+ z0 = svlsl_wide_x (p0, z0, 1)) -+ -+/* -+** lsl_wide_1_s16_x_untied: -+** lsl z0\.h, z1\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_1_s16_x_untied, svint16_t, -+ z0 = svlsl_wide_n_s16_x (p0, z1, 1), -+ z0 = svlsl_wide_x (p0, z1, 1)) -+ -+/* -+** lsl_wide_15_s16_x_tied1: -+** lsl z0\.h, z0\.h, #15 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_15_s16_x_tied1, svint16_t, -+ z0 = svlsl_wide_n_s16_x (p0, z0, 15), -+ z0 = svlsl_wide_x (p0, z0, 15)) -+ -+/* -+** lsl_wide_15_s16_x_untied: -+** lsl z0\.h, z1\.h, #15 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_15_s16_x_untied, svint16_t, -+ z0 = svlsl_wide_n_s16_x (p0, z1, 15), -+ z0 = svlsl_wide_x (p0, z1, 15)) -+ -+/* -+** lsl_wide_16_s16_x_tied1: -+** mov (z[0-9]+\.d), #16 -+** lsl z0\.h, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_16_s16_x_tied1, svint16_t, -+ z0 = svlsl_wide_n_s16_x (p0, z0, 16), -+ z0 = svlsl_wide_x (p0, z0, 16)) -+ -+/* -+** lsl_wide_16_s16_x_untied: -+** mov (z[0-9]+\.d), #16 -+** lsl z0\.h, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_16_s16_x_untied, svint16_t, -+ z0 = svlsl_wide_n_s16_x (p0, z1, 16), -+ z0 = svlsl_wide_x (p0, z1, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_wide_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_wide_s32.c -new file mode 100644 -index 000000000..acd813df3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_wide_s32.c -@@ -0,0 +1,331 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** lsl_wide_s32_m_tied1: -+** lsl z0\.s, p0/m, z0\.s, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsl_wide_s32_m_tied1, svint32_t, svuint64_t, -+ z0 = svlsl_wide_s32_m (p0, z0, z4), -+ z0 = svlsl_wide_m (p0, z0, z4)) -+ -+/* -+** lsl_wide_s32_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z4 -+** lsl z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (lsl_wide_s32_m_tied2, svint32_t, svuint64_t, -+ z0_res = svlsl_wide_s32_m (p0, z4, z0), -+ z0_res = svlsl_wide_m (p0, z4, z0)) -+ -+/* -+** lsl_wide_s32_m_untied: -+** movprfx z0, z1 -+** lsl z0\.s, p0/m, z0\.s, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsl_wide_s32_m_untied, svint32_t, svuint64_t, -+ z0 = svlsl_wide_s32_m (p0, z1, z4), -+ z0 = svlsl_wide_m (p0, z1, z4)) -+ -+/* -+** lsl_wide_x0_s32_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** lsl z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_wide_x0_s32_m_tied1, svint32_t, uint64_t, -+ z0 = svlsl_wide_n_s32_m (p0, z0, x0), -+ z0 = svlsl_wide_m (p0, z0, x0)) -+ -+/* -+** lsl_wide_x0_s32_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** lsl z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_wide_x0_s32_m_untied, svint32_t, uint64_t, -+ z0 = svlsl_wide_n_s32_m (p0, z1, x0), -+ z0 = svlsl_wide_m (p0, z1, x0)) -+ -+/* -+** lsl_wide_1_s32_m_tied1: -+** lsl z0\.s, p0/m, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_1_s32_m_tied1, svint32_t, -+ z0 = svlsl_wide_n_s32_m (p0, z0, 1), -+ z0 = svlsl_wide_m (p0, z0, 1)) -+ -+/* -+** lsl_wide_1_s32_m_untied: -+** movprfx z0, z1 -+** lsl z0\.s, p0/m, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_1_s32_m_untied, svint32_t, -+ z0 = svlsl_wide_n_s32_m (p0, z1, 1), -+ z0 = svlsl_wide_m (p0, z1, 1)) -+ -+/* -+** lsl_wide_31_s32_m_tied1: -+** lsl z0\.s, p0/m, z0\.s, #31 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_31_s32_m_tied1, svint32_t, -+ z0 = svlsl_wide_n_s32_m (p0, z0, 31), -+ z0 = svlsl_wide_m (p0, z0, 31)) -+ -+/* -+** lsl_wide_31_s32_m_untied: -+** movprfx z0, z1 -+** lsl z0\.s, p0/m, z0\.s, #31 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_31_s32_m_untied, svint32_t, -+ z0 = svlsl_wide_n_s32_m (p0, z1, 31), -+ z0 = svlsl_wide_m (p0, z1, 31)) -+ -+/* -+** lsl_wide_32_s32_m_tied1: -+** mov (z[0-9]+\.d), #32 -+** lsl z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_32_s32_m_tied1, svint32_t, -+ z0 = svlsl_wide_n_s32_m (p0, z0, 32), -+ z0 = svlsl_wide_m (p0, z0, 32)) -+ -+/* -+** lsl_wide_32_s32_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #32 -+** movprfx z0, z1 -+** lsl z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_32_s32_m_untied, svint32_t, -+ z0 = svlsl_wide_n_s32_m (p0, z1, 32), -+ z0 = svlsl_wide_m (p0, z1, 32)) -+ -+/* -+** lsl_wide_s32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** lsl z0\.s, p0/m, z0\.s, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsl_wide_s32_z_tied1, svint32_t, svuint64_t, -+ z0 = svlsl_wide_s32_z (p0, z0, z4), -+ z0 = svlsl_wide_z (p0, z0, z4)) -+ -+/* -+** lsl_wide_s32_z_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.s, p0/z, z4\.s -+** lsl z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (lsl_wide_s32_z_tied2, svint32_t, svuint64_t, -+ z0_res = svlsl_wide_s32_z (p0, z4, z0), -+ z0_res = svlsl_wide_z (p0, z4, z0)) -+ -+/* -+** lsl_wide_s32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** lsl z0\.s, p0/m, z0\.s, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsl_wide_s32_z_untied, svint32_t, svuint64_t, -+ z0 = svlsl_wide_s32_z (p0, z1, z4), -+ z0 = svlsl_wide_z (p0, z1, z4)) -+ -+/* -+** lsl_wide_x0_s32_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.s, p0/z, z0\.s -+** lsl z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_wide_x0_s32_z_tied1, svint32_t, uint64_t, -+ z0 = svlsl_wide_n_s32_z (p0, z0, x0), -+ z0 = svlsl_wide_z (p0, z0, x0)) -+ -+/* -+** lsl_wide_x0_s32_z_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.s, p0/z, z1\.s -+** lsl z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_wide_x0_s32_z_untied, svint32_t, uint64_t, -+ z0 = svlsl_wide_n_s32_z (p0, z1, x0), -+ z0 = svlsl_wide_z (p0, z1, x0)) -+ -+/* -+** lsl_wide_1_s32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** lsl z0\.s, p0/m, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_1_s32_z_tied1, svint32_t, -+ z0 = svlsl_wide_n_s32_z (p0, z0, 1), -+ z0 = svlsl_wide_z (p0, z0, 1)) -+ -+/* -+** lsl_wide_1_s32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** lsl z0\.s, p0/m, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_1_s32_z_untied, svint32_t, -+ z0 = svlsl_wide_n_s32_z (p0, z1, 1), -+ z0 = svlsl_wide_z (p0, z1, 1)) -+ -+/* -+** lsl_wide_31_s32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** lsl z0\.s, p0/m, z0\.s, #31 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_31_s32_z_tied1, svint32_t, -+ z0 = svlsl_wide_n_s32_z (p0, z0, 31), -+ z0 = svlsl_wide_z (p0, z0, 31)) -+ -+/* -+** lsl_wide_31_s32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** lsl z0\.s, p0/m, z0\.s, #31 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_31_s32_z_untied, svint32_t, -+ z0 = svlsl_wide_n_s32_z (p0, z1, 31), -+ z0 = svlsl_wide_z (p0, z1, 31)) -+ -+/* -+** lsl_wide_32_s32_z_tied1: -+** mov (z[0-9]+\.d), #32 -+** movprfx z0\.s, p0/z, z0\.s -+** lsl z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_32_s32_z_tied1, svint32_t, -+ z0 = svlsl_wide_n_s32_z (p0, z0, 32), -+ z0 = svlsl_wide_z (p0, z0, 32)) -+ -+/* -+** lsl_wide_32_s32_z_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #32 -+** movprfx z0\.s, p0/z, z1\.s -+** lsl z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_32_s32_z_untied, svint32_t, -+ z0 = svlsl_wide_n_s32_z (p0, z1, 32), -+ z0 = svlsl_wide_z (p0, z1, 32)) -+ -+/* -+** lsl_wide_s32_x_tied1: -+** lsl z0\.s, z0\.s, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsl_wide_s32_x_tied1, svint32_t, svuint64_t, -+ z0 = svlsl_wide_s32_x (p0, z0, z4), -+ z0 = svlsl_wide_x (p0, z0, z4)) -+ -+/* -+** lsl_wide_s32_x_tied2: -+** lsl z0\.s, z4\.s, z0\.d -+** ret -+*/ -+TEST_DUAL_Z_REV (lsl_wide_s32_x_tied2, svint32_t, svuint64_t, -+ z0_res = svlsl_wide_s32_x (p0, z4, z0), -+ z0_res = svlsl_wide_x (p0, z4, z0)) -+ -+/* -+** lsl_wide_s32_x_untied: -+** lsl z0\.s, z1\.s, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsl_wide_s32_x_untied, svint32_t, svuint64_t, -+ z0 = svlsl_wide_s32_x (p0, z1, z4), -+ z0 = svlsl_wide_x (p0, z1, z4)) -+ -+/* -+** lsl_wide_x0_s32_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** lsl z0\.s, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_wide_x0_s32_x_tied1, svint32_t, uint64_t, -+ z0 = svlsl_wide_n_s32_x (p0, z0, x0), -+ z0 = svlsl_wide_x (p0, z0, x0)) -+ -+/* -+** lsl_wide_x0_s32_x_untied: -+** mov (z[0-9]+\.d), x0 -+** lsl z0\.s, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_wide_x0_s32_x_untied, svint32_t, uint64_t, -+ z0 = svlsl_wide_n_s32_x (p0, z1, x0), -+ z0 = svlsl_wide_x (p0, z1, x0)) -+ -+/* -+** lsl_wide_1_s32_x_tied1: -+** lsl z0\.s, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_1_s32_x_tied1, svint32_t, -+ z0 = svlsl_wide_n_s32_x (p0, z0, 1), -+ z0 = svlsl_wide_x (p0, z0, 1)) -+ -+/* -+** lsl_wide_1_s32_x_untied: -+** lsl z0\.s, z1\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_1_s32_x_untied, svint32_t, -+ z0 = svlsl_wide_n_s32_x (p0, z1, 1), -+ z0 = svlsl_wide_x (p0, z1, 1)) -+ -+/* -+** lsl_wide_31_s32_x_tied1: -+** lsl z0\.s, z0\.s, #31 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_31_s32_x_tied1, svint32_t, -+ z0 = svlsl_wide_n_s32_x (p0, z0, 31), -+ z0 = svlsl_wide_x (p0, z0, 31)) -+ -+/* -+** lsl_wide_31_s32_x_untied: -+** lsl z0\.s, z1\.s, #31 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_31_s32_x_untied, svint32_t, -+ z0 = svlsl_wide_n_s32_x (p0, z1, 31), -+ z0 = svlsl_wide_x (p0, z1, 31)) -+ -+/* -+** lsl_wide_32_s32_x_tied1: -+** mov (z[0-9]+\.d), #32 -+** lsl z0\.s, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_32_s32_x_tied1, svint32_t, -+ z0 = svlsl_wide_n_s32_x (p0, z0, 32), -+ z0 = svlsl_wide_x (p0, z0, 32)) -+ -+/* -+** lsl_wide_32_s32_x_untied: -+** mov (z[0-9]+\.d), #32 -+** lsl z0\.s, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_32_s32_x_untied, svint32_t, -+ z0 = svlsl_wide_n_s32_x (p0, z1, 32), -+ z0 = svlsl_wide_x (p0, z1, 32)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_wide_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_wide_s8.c -new file mode 100644 -index 000000000..17e8e8685 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_wide_s8.c -@@ -0,0 +1,331 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** lsl_wide_s8_m_tied1: -+** lsl z0\.b, p0/m, z0\.b, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsl_wide_s8_m_tied1, svint8_t, svuint64_t, -+ z0 = svlsl_wide_s8_m (p0, z0, z4), -+ z0 = svlsl_wide_m (p0, z0, z4)) -+ -+/* -+** lsl_wide_s8_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z4 -+** lsl z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (lsl_wide_s8_m_tied2, svint8_t, svuint64_t, -+ z0_res = svlsl_wide_s8_m (p0, z4, z0), -+ z0_res = svlsl_wide_m (p0, z4, z0)) -+ -+/* -+** lsl_wide_s8_m_untied: -+** movprfx z0, z1 -+** lsl z0\.b, p0/m, z0\.b, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsl_wide_s8_m_untied, svint8_t, svuint64_t, -+ z0 = svlsl_wide_s8_m (p0, z1, z4), -+ z0 = svlsl_wide_m (p0, z1, z4)) -+ -+/* -+** lsl_wide_x0_s8_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** lsl z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_wide_x0_s8_m_tied1, svint8_t, uint64_t, -+ z0 = svlsl_wide_n_s8_m (p0, z0, x0), -+ z0 = svlsl_wide_m (p0, z0, x0)) -+ -+/* -+** lsl_wide_x0_s8_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** lsl z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_wide_x0_s8_m_untied, svint8_t, uint64_t, -+ z0 = svlsl_wide_n_s8_m (p0, z1, x0), -+ z0 = svlsl_wide_m (p0, z1, x0)) -+ -+/* -+** lsl_wide_1_s8_m_tied1: -+** lsl z0\.b, p0/m, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_1_s8_m_tied1, svint8_t, -+ z0 = svlsl_wide_n_s8_m (p0, z0, 1), -+ z0 = svlsl_wide_m (p0, z0, 1)) -+ -+/* -+** lsl_wide_1_s8_m_untied: -+** movprfx z0, z1 -+** lsl z0\.b, p0/m, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_1_s8_m_untied, svint8_t, -+ z0 = svlsl_wide_n_s8_m (p0, z1, 1), -+ z0 = svlsl_wide_m (p0, z1, 1)) -+ -+/* -+** lsl_wide_7_s8_m_tied1: -+** lsl z0\.b, p0/m, z0\.b, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_7_s8_m_tied1, svint8_t, -+ z0 = svlsl_wide_n_s8_m (p0, z0, 7), -+ z0 = svlsl_wide_m (p0, z0, 7)) -+ -+/* -+** lsl_wide_7_s8_m_untied: -+** movprfx z0, z1 -+** lsl z0\.b, p0/m, z0\.b, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_7_s8_m_untied, svint8_t, -+ z0 = svlsl_wide_n_s8_m (p0, z1, 7), -+ z0 = svlsl_wide_m (p0, z1, 7)) -+ -+/* -+** lsl_wide_8_s8_m_tied1: -+** mov (z[0-9]+\.d), #8 -+** lsl z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_8_s8_m_tied1, svint8_t, -+ z0 = svlsl_wide_n_s8_m (p0, z0, 8), -+ z0 = svlsl_wide_m (p0, z0, 8)) -+ -+/* -+** lsl_wide_8_s8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #8 -+** movprfx z0, z1 -+** lsl z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_8_s8_m_untied, svint8_t, -+ z0 = svlsl_wide_n_s8_m (p0, z1, 8), -+ z0 = svlsl_wide_m (p0, z1, 8)) -+ -+/* -+** lsl_wide_s8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** lsl z0\.b, p0/m, z0\.b, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsl_wide_s8_z_tied1, svint8_t, svuint64_t, -+ z0 = svlsl_wide_s8_z (p0, z0, z4), -+ z0 = svlsl_wide_z (p0, z0, z4)) -+ -+/* -+** lsl_wide_s8_z_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.b, p0/z, z4\.b -+** lsl z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (lsl_wide_s8_z_tied2, svint8_t, svuint64_t, -+ z0_res = svlsl_wide_s8_z (p0, z4, z0), -+ z0_res = svlsl_wide_z (p0, z4, z0)) -+ -+/* -+** lsl_wide_s8_z_untied: -+** movprfx z0\.b, p0/z, z1\.b -+** lsl z0\.b, p0/m, z0\.b, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsl_wide_s8_z_untied, svint8_t, svuint64_t, -+ z0 = svlsl_wide_s8_z (p0, z1, z4), -+ z0 = svlsl_wide_z (p0, z1, z4)) -+ -+/* -+** lsl_wide_x0_s8_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.b, p0/z, z0\.b -+** lsl z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_wide_x0_s8_z_tied1, svint8_t, uint64_t, -+ z0 = svlsl_wide_n_s8_z (p0, z0, x0), -+ z0 = svlsl_wide_z (p0, z0, x0)) -+ -+/* -+** lsl_wide_x0_s8_z_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.b, p0/z, z1\.b -+** lsl z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_wide_x0_s8_z_untied, svint8_t, uint64_t, -+ z0 = svlsl_wide_n_s8_z (p0, z1, x0), -+ z0 = svlsl_wide_z (p0, z1, x0)) -+ -+/* -+** lsl_wide_1_s8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** lsl z0\.b, p0/m, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_1_s8_z_tied1, svint8_t, -+ z0 = svlsl_wide_n_s8_z (p0, z0, 1), -+ z0 = svlsl_wide_z (p0, z0, 1)) -+ -+/* -+** lsl_wide_1_s8_z_untied: -+** movprfx z0\.b, p0/z, z1\.b -+** lsl z0\.b, p0/m, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_1_s8_z_untied, svint8_t, -+ z0 = svlsl_wide_n_s8_z (p0, z1, 1), -+ z0 = svlsl_wide_z (p0, z1, 1)) -+ -+/* -+** lsl_wide_7_s8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** lsl z0\.b, p0/m, z0\.b, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_7_s8_z_tied1, svint8_t, -+ z0 = svlsl_wide_n_s8_z (p0, z0, 7), -+ z0 = svlsl_wide_z (p0, z0, 7)) -+ -+/* -+** lsl_wide_7_s8_z_untied: -+** movprfx z0\.b, p0/z, z1\.b -+** lsl z0\.b, p0/m, z0\.b, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_7_s8_z_untied, svint8_t, -+ z0 = svlsl_wide_n_s8_z (p0, z1, 7), -+ z0 = svlsl_wide_z (p0, z1, 7)) -+ -+/* -+** lsl_wide_8_s8_z_tied1: -+** mov (z[0-9]+\.d), #8 -+** movprfx z0\.b, p0/z, z0\.b -+** lsl z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_8_s8_z_tied1, svint8_t, -+ z0 = svlsl_wide_n_s8_z (p0, z0, 8), -+ z0 = svlsl_wide_z (p0, z0, 8)) -+ -+/* -+** lsl_wide_8_s8_z_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #8 -+** movprfx z0\.b, p0/z, z1\.b -+** lsl z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_8_s8_z_untied, svint8_t, -+ z0 = svlsl_wide_n_s8_z (p0, z1, 8), -+ z0 = svlsl_wide_z (p0, z1, 8)) -+ -+/* -+** lsl_wide_s8_x_tied1: -+** lsl z0\.b, z0\.b, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsl_wide_s8_x_tied1, svint8_t, svuint64_t, -+ z0 = svlsl_wide_s8_x (p0, z0, z4), -+ z0 = svlsl_wide_x (p0, z0, z4)) -+ -+/* -+** lsl_wide_s8_x_tied2: -+** lsl z0\.b, z4\.b, z0\.d -+** ret -+*/ -+TEST_DUAL_Z_REV (lsl_wide_s8_x_tied2, svint8_t, svuint64_t, -+ z0_res = svlsl_wide_s8_x (p0, z4, z0), -+ z0_res = svlsl_wide_x (p0, z4, z0)) -+ -+/* -+** lsl_wide_s8_x_untied: -+** lsl z0\.b, z1\.b, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsl_wide_s8_x_untied, svint8_t, svuint64_t, -+ z0 = svlsl_wide_s8_x (p0, z1, z4), -+ z0 = svlsl_wide_x (p0, z1, z4)) -+ -+/* -+** lsl_wide_x0_s8_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** lsl z0\.b, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_wide_x0_s8_x_tied1, svint8_t, uint64_t, -+ z0 = svlsl_wide_n_s8_x (p0, z0, x0), -+ z0 = svlsl_wide_x (p0, z0, x0)) -+ -+/* -+** lsl_wide_x0_s8_x_untied: -+** mov (z[0-9]+\.d), x0 -+** lsl z0\.b, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_wide_x0_s8_x_untied, svint8_t, uint64_t, -+ z0 = svlsl_wide_n_s8_x (p0, z1, x0), -+ z0 = svlsl_wide_x (p0, z1, x0)) -+ -+/* -+** lsl_wide_1_s8_x_tied1: -+** lsl z0\.b, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_1_s8_x_tied1, svint8_t, -+ z0 = svlsl_wide_n_s8_x (p0, z0, 1), -+ z0 = svlsl_wide_x (p0, z0, 1)) -+ -+/* -+** lsl_wide_1_s8_x_untied: -+** lsl z0\.b, z1\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_1_s8_x_untied, svint8_t, -+ z0 = svlsl_wide_n_s8_x (p0, z1, 1), -+ z0 = svlsl_wide_x (p0, z1, 1)) -+ -+/* -+** lsl_wide_7_s8_x_tied1: -+** lsl z0\.b, z0\.b, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_7_s8_x_tied1, svint8_t, -+ z0 = svlsl_wide_n_s8_x (p0, z0, 7), -+ z0 = svlsl_wide_x (p0, z0, 7)) -+ -+/* -+** lsl_wide_7_s8_x_untied: -+** lsl z0\.b, z1\.b, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_7_s8_x_untied, svint8_t, -+ z0 = svlsl_wide_n_s8_x (p0, z1, 7), -+ z0 = svlsl_wide_x (p0, z1, 7)) -+ -+/* -+** lsl_wide_8_s8_x_tied1: -+** mov (z[0-9]+\.d), #8 -+** lsl z0\.b, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_8_s8_x_tied1, svint8_t, -+ z0 = svlsl_wide_n_s8_x (p0, z0, 8), -+ z0 = svlsl_wide_x (p0, z0, 8)) -+ -+/* -+** lsl_wide_8_s8_x_untied: -+** mov (z[0-9]+\.d), #8 -+** lsl z0\.b, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_8_s8_x_untied, svint8_t, -+ z0 = svlsl_wide_n_s8_x (p0, z1, 8), -+ z0 = svlsl_wide_x (p0, z1, 8)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_wide_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_wide_u16.c -new file mode 100644 -index 000000000..cff24a850 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_wide_u16.c -@@ -0,0 +1,331 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** lsl_wide_u16_m_tied1: -+** lsl z0\.h, p0/m, z0\.h, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsl_wide_u16_m_tied1, svuint16_t, svuint64_t, -+ z0 = svlsl_wide_u16_m (p0, z0, z4), -+ z0 = svlsl_wide_m (p0, z0, z4)) -+ -+/* -+** lsl_wide_u16_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z4 -+** lsl z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (lsl_wide_u16_m_tied2, svuint16_t, svuint64_t, -+ z0_res = svlsl_wide_u16_m (p0, z4, z0), -+ z0_res = svlsl_wide_m (p0, z4, z0)) -+ -+/* -+** lsl_wide_u16_m_untied: -+** movprfx z0, z1 -+** lsl z0\.h, p0/m, z0\.h, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsl_wide_u16_m_untied, svuint16_t, svuint64_t, -+ z0 = svlsl_wide_u16_m (p0, z1, z4), -+ z0 = svlsl_wide_m (p0, z1, z4)) -+ -+/* -+** lsl_wide_x0_u16_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** lsl z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_wide_x0_u16_m_tied1, svuint16_t, uint64_t, -+ z0 = svlsl_wide_n_u16_m (p0, z0, x0), -+ z0 = svlsl_wide_m (p0, z0, x0)) -+ -+/* -+** lsl_wide_x0_u16_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** lsl z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_wide_x0_u16_m_untied, svuint16_t, uint64_t, -+ z0 = svlsl_wide_n_u16_m (p0, z1, x0), -+ z0 = svlsl_wide_m (p0, z1, x0)) -+ -+/* -+** lsl_wide_1_u16_m_tied1: -+** lsl z0\.h, p0/m, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_1_u16_m_tied1, svuint16_t, -+ z0 = svlsl_wide_n_u16_m (p0, z0, 1), -+ z0 = svlsl_wide_m (p0, z0, 1)) -+ -+/* -+** lsl_wide_1_u16_m_untied: -+** movprfx z0, z1 -+** lsl z0\.h, p0/m, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_1_u16_m_untied, svuint16_t, -+ z0 = svlsl_wide_n_u16_m (p0, z1, 1), -+ z0 = svlsl_wide_m (p0, z1, 1)) -+ -+/* -+** lsl_wide_15_u16_m_tied1: -+** lsl z0\.h, p0/m, z0\.h, #15 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_15_u16_m_tied1, svuint16_t, -+ z0 = svlsl_wide_n_u16_m (p0, z0, 15), -+ z0 = svlsl_wide_m (p0, z0, 15)) -+ -+/* -+** lsl_wide_15_u16_m_untied: -+** movprfx z0, z1 -+** lsl z0\.h, p0/m, z0\.h, #15 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_15_u16_m_untied, svuint16_t, -+ z0 = svlsl_wide_n_u16_m (p0, z1, 15), -+ z0 = svlsl_wide_m (p0, z1, 15)) -+ -+/* -+** lsl_wide_16_u16_m_tied1: -+** mov (z[0-9]+\.d), #16 -+** lsl z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_16_u16_m_tied1, svuint16_t, -+ z0 = svlsl_wide_n_u16_m (p0, z0, 16), -+ z0 = svlsl_wide_m (p0, z0, 16)) -+ -+/* -+** lsl_wide_16_u16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #16 -+** movprfx z0, z1 -+** lsl z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_16_u16_m_untied, svuint16_t, -+ z0 = svlsl_wide_n_u16_m (p0, z1, 16), -+ z0 = svlsl_wide_m (p0, z1, 16)) -+ -+/* -+** lsl_wide_u16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** lsl z0\.h, p0/m, z0\.h, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsl_wide_u16_z_tied1, svuint16_t, svuint64_t, -+ z0 = svlsl_wide_u16_z (p0, z0, z4), -+ z0 = svlsl_wide_z (p0, z0, z4)) -+ -+/* -+** lsl_wide_u16_z_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.h, p0/z, z4\.h -+** lsl z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (lsl_wide_u16_z_tied2, svuint16_t, svuint64_t, -+ z0_res = svlsl_wide_u16_z (p0, z4, z0), -+ z0_res = svlsl_wide_z (p0, z4, z0)) -+ -+/* -+** lsl_wide_u16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** lsl z0\.h, p0/m, z0\.h, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsl_wide_u16_z_untied, svuint16_t, svuint64_t, -+ z0 = svlsl_wide_u16_z (p0, z1, z4), -+ z0 = svlsl_wide_z (p0, z1, z4)) -+ -+/* -+** lsl_wide_x0_u16_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.h, p0/z, z0\.h -+** lsl z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_wide_x0_u16_z_tied1, svuint16_t, uint64_t, -+ z0 = svlsl_wide_n_u16_z (p0, z0, x0), -+ z0 = svlsl_wide_z (p0, z0, x0)) -+ -+/* -+** lsl_wide_x0_u16_z_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.h, p0/z, z1\.h -+** lsl z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_wide_x0_u16_z_untied, svuint16_t, uint64_t, -+ z0 = svlsl_wide_n_u16_z (p0, z1, x0), -+ z0 = svlsl_wide_z (p0, z1, x0)) -+ -+/* -+** lsl_wide_1_u16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** lsl z0\.h, p0/m, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_1_u16_z_tied1, svuint16_t, -+ z0 = svlsl_wide_n_u16_z (p0, z0, 1), -+ z0 = svlsl_wide_z (p0, z0, 1)) -+ -+/* -+** lsl_wide_1_u16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** lsl z0\.h, p0/m, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_1_u16_z_untied, svuint16_t, -+ z0 = svlsl_wide_n_u16_z (p0, z1, 1), -+ z0 = svlsl_wide_z (p0, z1, 1)) -+ -+/* -+** lsl_wide_15_u16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** lsl z0\.h, p0/m, z0\.h, #15 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_15_u16_z_tied1, svuint16_t, -+ z0 = svlsl_wide_n_u16_z (p0, z0, 15), -+ z0 = svlsl_wide_z (p0, z0, 15)) -+ -+/* -+** lsl_wide_15_u16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** lsl z0\.h, p0/m, z0\.h, #15 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_15_u16_z_untied, svuint16_t, -+ z0 = svlsl_wide_n_u16_z (p0, z1, 15), -+ z0 = svlsl_wide_z (p0, z1, 15)) -+ -+/* -+** lsl_wide_16_u16_z_tied1: -+** mov (z[0-9]+\.d), #16 -+** movprfx z0\.h, p0/z, z0\.h -+** lsl z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_16_u16_z_tied1, svuint16_t, -+ z0 = svlsl_wide_n_u16_z (p0, z0, 16), -+ z0 = svlsl_wide_z (p0, z0, 16)) -+ -+/* -+** lsl_wide_16_u16_z_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #16 -+** movprfx z0\.h, p0/z, z1\.h -+** lsl z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_16_u16_z_untied, svuint16_t, -+ z0 = svlsl_wide_n_u16_z (p0, z1, 16), -+ z0 = svlsl_wide_z (p0, z1, 16)) -+ -+/* -+** lsl_wide_u16_x_tied1: -+** lsl z0\.h, z0\.h, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsl_wide_u16_x_tied1, svuint16_t, svuint64_t, -+ z0 = svlsl_wide_u16_x (p0, z0, z4), -+ z0 = svlsl_wide_x (p0, z0, z4)) -+ -+/* -+** lsl_wide_u16_x_tied2: -+** lsl z0\.h, z4\.h, z0\.d -+** ret -+*/ -+TEST_DUAL_Z_REV (lsl_wide_u16_x_tied2, svuint16_t, svuint64_t, -+ z0_res = svlsl_wide_u16_x (p0, z4, z0), -+ z0_res = svlsl_wide_x (p0, z4, z0)) -+ -+/* -+** lsl_wide_u16_x_untied: -+** lsl z0\.h, z1\.h, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsl_wide_u16_x_untied, svuint16_t, svuint64_t, -+ z0 = svlsl_wide_u16_x (p0, z1, z4), -+ z0 = svlsl_wide_x (p0, z1, z4)) -+ -+/* -+** lsl_wide_x0_u16_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** lsl z0\.h, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_wide_x0_u16_x_tied1, svuint16_t, uint64_t, -+ z0 = svlsl_wide_n_u16_x (p0, z0, x0), -+ z0 = svlsl_wide_x (p0, z0, x0)) -+ -+/* -+** lsl_wide_x0_u16_x_untied: -+** mov (z[0-9]+\.d), x0 -+** lsl z0\.h, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_wide_x0_u16_x_untied, svuint16_t, uint64_t, -+ z0 = svlsl_wide_n_u16_x (p0, z1, x0), -+ z0 = svlsl_wide_x (p0, z1, x0)) -+ -+/* -+** lsl_wide_1_u16_x_tied1: -+** lsl z0\.h, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_1_u16_x_tied1, svuint16_t, -+ z0 = svlsl_wide_n_u16_x (p0, z0, 1), -+ z0 = svlsl_wide_x (p0, z0, 1)) -+ -+/* -+** lsl_wide_1_u16_x_untied: -+** lsl z0\.h, z1\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_1_u16_x_untied, svuint16_t, -+ z0 = svlsl_wide_n_u16_x (p0, z1, 1), -+ z0 = svlsl_wide_x (p0, z1, 1)) -+ -+/* -+** lsl_wide_15_u16_x_tied1: -+** lsl z0\.h, z0\.h, #15 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_15_u16_x_tied1, svuint16_t, -+ z0 = svlsl_wide_n_u16_x (p0, z0, 15), -+ z0 = svlsl_wide_x (p0, z0, 15)) -+ -+/* -+** lsl_wide_15_u16_x_untied: -+** lsl z0\.h, z1\.h, #15 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_15_u16_x_untied, svuint16_t, -+ z0 = svlsl_wide_n_u16_x (p0, z1, 15), -+ z0 = svlsl_wide_x (p0, z1, 15)) -+ -+/* -+** lsl_wide_16_u16_x_tied1: -+** mov (z[0-9]+\.d), #16 -+** lsl z0\.h, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_16_u16_x_tied1, svuint16_t, -+ z0 = svlsl_wide_n_u16_x (p0, z0, 16), -+ z0 = svlsl_wide_x (p0, z0, 16)) -+ -+/* -+** lsl_wide_16_u16_x_untied: -+** mov (z[0-9]+\.d), #16 -+** lsl z0\.h, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_16_u16_x_untied, svuint16_t, -+ z0 = svlsl_wide_n_u16_x (p0, z1, 16), -+ z0 = svlsl_wide_x (p0, z1, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_wide_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_wide_u32.c -new file mode 100644 -index 000000000..7b1afab49 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_wide_u32.c -@@ -0,0 +1,331 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** lsl_wide_u32_m_tied1: -+** lsl z0\.s, p0/m, z0\.s, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsl_wide_u32_m_tied1, svuint32_t, svuint64_t, -+ z0 = svlsl_wide_u32_m (p0, z0, z4), -+ z0 = svlsl_wide_m (p0, z0, z4)) -+ -+/* -+** lsl_wide_u32_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z4 -+** lsl z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (lsl_wide_u32_m_tied2, svuint32_t, svuint64_t, -+ z0_res = svlsl_wide_u32_m (p0, z4, z0), -+ z0_res = svlsl_wide_m (p0, z4, z0)) -+ -+/* -+** lsl_wide_u32_m_untied: -+** movprfx z0, z1 -+** lsl z0\.s, p0/m, z0\.s, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsl_wide_u32_m_untied, svuint32_t, svuint64_t, -+ z0 = svlsl_wide_u32_m (p0, z1, z4), -+ z0 = svlsl_wide_m (p0, z1, z4)) -+ -+/* -+** lsl_wide_x0_u32_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** lsl z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_wide_x0_u32_m_tied1, svuint32_t, uint64_t, -+ z0 = svlsl_wide_n_u32_m (p0, z0, x0), -+ z0 = svlsl_wide_m (p0, z0, x0)) -+ -+/* -+** lsl_wide_x0_u32_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** lsl z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_wide_x0_u32_m_untied, svuint32_t, uint64_t, -+ z0 = svlsl_wide_n_u32_m (p0, z1, x0), -+ z0 = svlsl_wide_m (p0, z1, x0)) -+ -+/* -+** lsl_wide_1_u32_m_tied1: -+** lsl z0\.s, p0/m, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_1_u32_m_tied1, svuint32_t, -+ z0 = svlsl_wide_n_u32_m (p0, z0, 1), -+ z0 = svlsl_wide_m (p0, z0, 1)) -+ -+/* -+** lsl_wide_1_u32_m_untied: -+** movprfx z0, z1 -+** lsl z0\.s, p0/m, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_1_u32_m_untied, svuint32_t, -+ z0 = svlsl_wide_n_u32_m (p0, z1, 1), -+ z0 = svlsl_wide_m (p0, z1, 1)) -+ -+/* -+** lsl_wide_31_u32_m_tied1: -+** lsl z0\.s, p0/m, z0\.s, #31 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_31_u32_m_tied1, svuint32_t, -+ z0 = svlsl_wide_n_u32_m (p0, z0, 31), -+ z0 = svlsl_wide_m (p0, z0, 31)) -+ -+/* -+** lsl_wide_31_u32_m_untied: -+** movprfx z0, z1 -+** lsl z0\.s, p0/m, z0\.s, #31 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_31_u32_m_untied, svuint32_t, -+ z0 = svlsl_wide_n_u32_m (p0, z1, 31), -+ z0 = svlsl_wide_m (p0, z1, 31)) -+ -+/* -+** lsl_wide_32_u32_m_tied1: -+** mov (z[0-9]+\.d), #32 -+** lsl z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_32_u32_m_tied1, svuint32_t, -+ z0 = svlsl_wide_n_u32_m (p0, z0, 32), -+ z0 = svlsl_wide_m (p0, z0, 32)) -+ -+/* -+** lsl_wide_32_u32_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #32 -+** movprfx z0, z1 -+** lsl z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_32_u32_m_untied, svuint32_t, -+ z0 = svlsl_wide_n_u32_m (p0, z1, 32), -+ z0 = svlsl_wide_m (p0, z1, 32)) -+ -+/* -+** lsl_wide_u32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** lsl z0\.s, p0/m, z0\.s, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsl_wide_u32_z_tied1, svuint32_t, svuint64_t, -+ z0 = svlsl_wide_u32_z (p0, z0, z4), -+ z0 = svlsl_wide_z (p0, z0, z4)) -+ -+/* -+** lsl_wide_u32_z_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.s, p0/z, z4\.s -+** lsl z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (lsl_wide_u32_z_tied2, svuint32_t, svuint64_t, -+ z0_res = svlsl_wide_u32_z (p0, z4, z0), -+ z0_res = svlsl_wide_z (p0, z4, z0)) -+ -+/* -+** lsl_wide_u32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** lsl z0\.s, p0/m, z0\.s, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsl_wide_u32_z_untied, svuint32_t, svuint64_t, -+ z0 = svlsl_wide_u32_z (p0, z1, z4), -+ z0 = svlsl_wide_z (p0, z1, z4)) -+ -+/* -+** lsl_wide_x0_u32_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.s, p0/z, z0\.s -+** lsl z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_wide_x0_u32_z_tied1, svuint32_t, uint64_t, -+ z0 = svlsl_wide_n_u32_z (p0, z0, x0), -+ z0 = svlsl_wide_z (p0, z0, x0)) -+ -+/* -+** lsl_wide_x0_u32_z_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.s, p0/z, z1\.s -+** lsl z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_wide_x0_u32_z_untied, svuint32_t, uint64_t, -+ z0 = svlsl_wide_n_u32_z (p0, z1, x0), -+ z0 = svlsl_wide_z (p0, z1, x0)) -+ -+/* -+** lsl_wide_1_u32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** lsl z0\.s, p0/m, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_1_u32_z_tied1, svuint32_t, -+ z0 = svlsl_wide_n_u32_z (p0, z0, 1), -+ z0 = svlsl_wide_z (p0, z0, 1)) -+ -+/* -+** lsl_wide_1_u32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** lsl z0\.s, p0/m, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_1_u32_z_untied, svuint32_t, -+ z0 = svlsl_wide_n_u32_z (p0, z1, 1), -+ z0 = svlsl_wide_z (p0, z1, 1)) -+ -+/* -+** lsl_wide_31_u32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** lsl z0\.s, p0/m, z0\.s, #31 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_31_u32_z_tied1, svuint32_t, -+ z0 = svlsl_wide_n_u32_z (p0, z0, 31), -+ z0 = svlsl_wide_z (p0, z0, 31)) -+ -+/* -+** lsl_wide_31_u32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** lsl z0\.s, p0/m, z0\.s, #31 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_31_u32_z_untied, svuint32_t, -+ z0 = svlsl_wide_n_u32_z (p0, z1, 31), -+ z0 = svlsl_wide_z (p0, z1, 31)) -+ -+/* -+** lsl_wide_32_u32_z_tied1: -+** mov (z[0-9]+\.d), #32 -+** movprfx z0\.s, p0/z, z0\.s -+** lsl z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_32_u32_z_tied1, svuint32_t, -+ z0 = svlsl_wide_n_u32_z (p0, z0, 32), -+ z0 = svlsl_wide_z (p0, z0, 32)) -+ -+/* -+** lsl_wide_32_u32_z_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #32 -+** movprfx z0\.s, p0/z, z1\.s -+** lsl z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_32_u32_z_untied, svuint32_t, -+ z0 = svlsl_wide_n_u32_z (p0, z1, 32), -+ z0 = svlsl_wide_z (p0, z1, 32)) -+ -+/* -+** lsl_wide_u32_x_tied1: -+** lsl z0\.s, z0\.s, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsl_wide_u32_x_tied1, svuint32_t, svuint64_t, -+ z0 = svlsl_wide_u32_x (p0, z0, z4), -+ z0 = svlsl_wide_x (p0, z0, z4)) -+ -+/* -+** lsl_wide_u32_x_tied2: -+** lsl z0\.s, z4\.s, z0\.d -+** ret -+*/ -+TEST_DUAL_Z_REV (lsl_wide_u32_x_tied2, svuint32_t, svuint64_t, -+ z0_res = svlsl_wide_u32_x (p0, z4, z0), -+ z0_res = svlsl_wide_x (p0, z4, z0)) -+ -+/* -+** lsl_wide_u32_x_untied: -+** lsl z0\.s, z1\.s, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsl_wide_u32_x_untied, svuint32_t, svuint64_t, -+ z0 = svlsl_wide_u32_x (p0, z1, z4), -+ z0 = svlsl_wide_x (p0, z1, z4)) -+ -+/* -+** lsl_wide_x0_u32_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** lsl z0\.s, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_wide_x0_u32_x_tied1, svuint32_t, uint64_t, -+ z0 = svlsl_wide_n_u32_x (p0, z0, x0), -+ z0 = svlsl_wide_x (p0, z0, x0)) -+ -+/* -+** lsl_wide_x0_u32_x_untied: -+** mov (z[0-9]+\.d), x0 -+** lsl z0\.s, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_wide_x0_u32_x_untied, svuint32_t, uint64_t, -+ z0 = svlsl_wide_n_u32_x (p0, z1, x0), -+ z0 = svlsl_wide_x (p0, z1, x0)) -+ -+/* -+** lsl_wide_1_u32_x_tied1: -+** lsl z0\.s, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_1_u32_x_tied1, svuint32_t, -+ z0 = svlsl_wide_n_u32_x (p0, z0, 1), -+ z0 = svlsl_wide_x (p0, z0, 1)) -+ -+/* -+** lsl_wide_1_u32_x_untied: -+** lsl z0\.s, z1\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_1_u32_x_untied, svuint32_t, -+ z0 = svlsl_wide_n_u32_x (p0, z1, 1), -+ z0 = svlsl_wide_x (p0, z1, 1)) -+ -+/* -+** lsl_wide_31_u32_x_tied1: -+** lsl z0\.s, z0\.s, #31 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_31_u32_x_tied1, svuint32_t, -+ z0 = svlsl_wide_n_u32_x (p0, z0, 31), -+ z0 = svlsl_wide_x (p0, z0, 31)) -+ -+/* -+** lsl_wide_31_u32_x_untied: -+** lsl z0\.s, z1\.s, #31 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_31_u32_x_untied, svuint32_t, -+ z0 = svlsl_wide_n_u32_x (p0, z1, 31), -+ z0 = svlsl_wide_x (p0, z1, 31)) -+ -+/* -+** lsl_wide_32_u32_x_tied1: -+** mov (z[0-9]+\.d), #32 -+** lsl z0\.s, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_32_u32_x_tied1, svuint32_t, -+ z0 = svlsl_wide_n_u32_x (p0, z0, 32), -+ z0 = svlsl_wide_x (p0, z0, 32)) -+ -+/* -+** lsl_wide_32_u32_x_untied: -+** mov (z[0-9]+\.d), #32 -+** lsl z0\.s, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_32_u32_x_untied, svuint32_t, -+ z0 = svlsl_wide_n_u32_x (p0, z1, 32), -+ z0 = svlsl_wide_x (p0, z1, 32)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_wide_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_wide_u8.c -new file mode 100644 -index 000000000..df8b1ec86 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsl_wide_u8.c -@@ -0,0 +1,331 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** lsl_wide_u8_m_tied1: -+** lsl z0\.b, p0/m, z0\.b, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsl_wide_u8_m_tied1, svuint8_t, svuint64_t, -+ z0 = svlsl_wide_u8_m (p0, z0, z4), -+ z0 = svlsl_wide_m (p0, z0, z4)) -+ -+/* -+** lsl_wide_u8_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z4 -+** lsl z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (lsl_wide_u8_m_tied2, svuint8_t, svuint64_t, -+ z0_res = svlsl_wide_u8_m (p0, z4, z0), -+ z0_res = svlsl_wide_m (p0, z4, z0)) -+ -+/* -+** lsl_wide_u8_m_untied: -+** movprfx z0, z1 -+** lsl z0\.b, p0/m, z0\.b, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsl_wide_u8_m_untied, svuint8_t, svuint64_t, -+ z0 = svlsl_wide_u8_m (p0, z1, z4), -+ z0 = svlsl_wide_m (p0, z1, z4)) -+ -+/* -+** lsl_wide_x0_u8_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** lsl z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_wide_x0_u8_m_tied1, svuint8_t, uint64_t, -+ z0 = svlsl_wide_n_u8_m (p0, z0, x0), -+ z0 = svlsl_wide_m (p0, z0, x0)) -+ -+/* -+** lsl_wide_x0_u8_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** lsl z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_wide_x0_u8_m_untied, svuint8_t, uint64_t, -+ z0 = svlsl_wide_n_u8_m (p0, z1, x0), -+ z0 = svlsl_wide_m (p0, z1, x0)) -+ -+/* -+** lsl_wide_1_u8_m_tied1: -+** lsl z0\.b, p0/m, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_1_u8_m_tied1, svuint8_t, -+ z0 = svlsl_wide_n_u8_m (p0, z0, 1), -+ z0 = svlsl_wide_m (p0, z0, 1)) -+ -+/* -+** lsl_wide_1_u8_m_untied: -+** movprfx z0, z1 -+** lsl z0\.b, p0/m, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_1_u8_m_untied, svuint8_t, -+ z0 = svlsl_wide_n_u8_m (p0, z1, 1), -+ z0 = svlsl_wide_m (p0, z1, 1)) -+ -+/* -+** lsl_wide_7_u8_m_tied1: -+** lsl z0\.b, p0/m, z0\.b, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_7_u8_m_tied1, svuint8_t, -+ z0 = svlsl_wide_n_u8_m (p0, z0, 7), -+ z0 = svlsl_wide_m (p0, z0, 7)) -+ -+/* -+** lsl_wide_7_u8_m_untied: -+** movprfx z0, z1 -+** lsl z0\.b, p0/m, z0\.b, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_7_u8_m_untied, svuint8_t, -+ z0 = svlsl_wide_n_u8_m (p0, z1, 7), -+ z0 = svlsl_wide_m (p0, z1, 7)) -+ -+/* -+** lsl_wide_8_u8_m_tied1: -+** mov (z[0-9]+\.d), #8 -+** lsl z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_8_u8_m_tied1, svuint8_t, -+ z0 = svlsl_wide_n_u8_m (p0, z0, 8), -+ z0 = svlsl_wide_m (p0, z0, 8)) -+ -+/* -+** lsl_wide_8_u8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #8 -+** movprfx z0, z1 -+** lsl z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_8_u8_m_untied, svuint8_t, -+ z0 = svlsl_wide_n_u8_m (p0, z1, 8), -+ z0 = svlsl_wide_m (p0, z1, 8)) -+ -+/* -+** lsl_wide_u8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** lsl z0\.b, p0/m, z0\.b, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsl_wide_u8_z_tied1, svuint8_t, svuint64_t, -+ z0 = svlsl_wide_u8_z (p0, z0, z4), -+ z0 = svlsl_wide_z (p0, z0, z4)) -+ -+/* -+** lsl_wide_u8_z_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.b, p0/z, z4\.b -+** lsl z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (lsl_wide_u8_z_tied2, svuint8_t, svuint64_t, -+ z0_res = svlsl_wide_u8_z (p0, z4, z0), -+ z0_res = svlsl_wide_z (p0, z4, z0)) -+ -+/* -+** lsl_wide_u8_z_untied: -+** movprfx z0\.b, p0/z, z1\.b -+** lsl z0\.b, p0/m, z0\.b, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsl_wide_u8_z_untied, svuint8_t, svuint64_t, -+ z0 = svlsl_wide_u8_z (p0, z1, z4), -+ z0 = svlsl_wide_z (p0, z1, z4)) -+ -+/* -+** lsl_wide_x0_u8_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.b, p0/z, z0\.b -+** lsl z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_wide_x0_u8_z_tied1, svuint8_t, uint64_t, -+ z0 = svlsl_wide_n_u8_z (p0, z0, x0), -+ z0 = svlsl_wide_z (p0, z0, x0)) -+ -+/* -+** lsl_wide_x0_u8_z_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.b, p0/z, z1\.b -+** lsl z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_wide_x0_u8_z_untied, svuint8_t, uint64_t, -+ z0 = svlsl_wide_n_u8_z (p0, z1, x0), -+ z0 = svlsl_wide_z (p0, z1, x0)) -+ -+/* -+** lsl_wide_1_u8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** lsl z0\.b, p0/m, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_1_u8_z_tied1, svuint8_t, -+ z0 = svlsl_wide_n_u8_z (p0, z0, 1), -+ z0 = svlsl_wide_z (p0, z0, 1)) -+ -+/* -+** lsl_wide_1_u8_z_untied: -+** movprfx z0\.b, p0/z, z1\.b -+** lsl z0\.b, p0/m, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_1_u8_z_untied, svuint8_t, -+ z0 = svlsl_wide_n_u8_z (p0, z1, 1), -+ z0 = svlsl_wide_z (p0, z1, 1)) -+ -+/* -+** lsl_wide_7_u8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** lsl z0\.b, p0/m, z0\.b, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_7_u8_z_tied1, svuint8_t, -+ z0 = svlsl_wide_n_u8_z (p0, z0, 7), -+ z0 = svlsl_wide_z (p0, z0, 7)) -+ -+/* -+** lsl_wide_7_u8_z_untied: -+** movprfx z0\.b, p0/z, z1\.b -+** lsl z0\.b, p0/m, z0\.b, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_7_u8_z_untied, svuint8_t, -+ z0 = svlsl_wide_n_u8_z (p0, z1, 7), -+ z0 = svlsl_wide_z (p0, z1, 7)) -+ -+/* -+** lsl_wide_8_u8_z_tied1: -+** mov (z[0-9]+\.d), #8 -+** movprfx z0\.b, p0/z, z0\.b -+** lsl z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_8_u8_z_tied1, svuint8_t, -+ z0 = svlsl_wide_n_u8_z (p0, z0, 8), -+ z0 = svlsl_wide_z (p0, z0, 8)) -+ -+/* -+** lsl_wide_8_u8_z_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #8 -+** movprfx z0\.b, p0/z, z1\.b -+** lsl z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_8_u8_z_untied, svuint8_t, -+ z0 = svlsl_wide_n_u8_z (p0, z1, 8), -+ z0 = svlsl_wide_z (p0, z1, 8)) -+ -+/* -+** lsl_wide_u8_x_tied1: -+** lsl z0\.b, z0\.b, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsl_wide_u8_x_tied1, svuint8_t, svuint64_t, -+ z0 = svlsl_wide_u8_x (p0, z0, z4), -+ z0 = svlsl_wide_x (p0, z0, z4)) -+ -+/* -+** lsl_wide_u8_x_tied2: -+** lsl z0\.b, z4\.b, z0\.d -+** ret -+*/ -+TEST_DUAL_Z_REV (lsl_wide_u8_x_tied2, svuint8_t, svuint64_t, -+ z0_res = svlsl_wide_u8_x (p0, z4, z0), -+ z0_res = svlsl_wide_x (p0, z4, z0)) -+ -+/* -+** lsl_wide_u8_x_untied: -+** lsl z0\.b, z1\.b, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsl_wide_u8_x_untied, svuint8_t, svuint64_t, -+ z0 = svlsl_wide_u8_x (p0, z1, z4), -+ z0 = svlsl_wide_x (p0, z1, z4)) -+ -+/* -+** lsl_wide_x0_u8_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** lsl z0\.b, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_wide_x0_u8_x_tied1, svuint8_t, uint64_t, -+ z0 = svlsl_wide_n_u8_x (p0, z0, x0), -+ z0 = svlsl_wide_x (p0, z0, x0)) -+ -+/* -+** lsl_wide_x0_u8_x_untied: -+** mov (z[0-9]+\.d), x0 -+** lsl z0\.b, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsl_wide_x0_u8_x_untied, svuint8_t, uint64_t, -+ z0 = svlsl_wide_n_u8_x (p0, z1, x0), -+ z0 = svlsl_wide_x (p0, z1, x0)) -+ -+/* -+** lsl_wide_1_u8_x_tied1: -+** lsl z0\.b, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_1_u8_x_tied1, svuint8_t, -+ z0 = svlsl_wide_n_u8_x (p0, z0, 1), -+ z0 = svlsl_wide_x (p0, z0, 1)) -+ -+/* -+** lsl_wide_1_u8_x_untied: -+** lsl z0\.b, z1\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_1_u8_x_untied, svuint8_t, -+ z0 = svlsl_wide_n_u8_x (p0, z1, 1), -+ z0 = svlsl_wide_x (p0, z1, 1)) -+ -+/* -+** lsl_wide_7_u8_x_tied1: -+** lsl z0\.b, z0\.b, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_7_u8_x_tied1, svuint8_t, -+ z0 = svlsl_wide_n_u8_x (p0, z0, 7), -+ z0 = svlsl_wide_x (p0, z0, 7)) -+ -+/* -+** lsl_wide_7_u8_x_untied: -+** lsl z0\.b, z1\.b, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_7_u8_x_untied, svuint8_t, -+ z0 = svlsl_wide_n_u8_x (p0, z1, 7), -+ z0 = svlsl_wide_x (p0, z1, 7)) -+ -+/* -+** lsl_wide_8_u8_x_tied1: -+** mov (z[0-9]+\.d), #8 -+** lsl z0\.b, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_8_u8_x_tied1, svuint8_t, -+ z0 = svlsl_wide_n_u8_x (p0, z0, 8), -+ z0 = svlsl_wide_x (p0, z0, 8)) -+ -+/* -+** lsl_wide_8_u8_x_untied: -+** mov (z[0-9]+\.d), #8 -+** lsl z0\.b, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsl_wide_8_u8_x_untied, svuint8_t, -+ z0 = svlsl_wide_n_u8_x (p0, z1, 8), -+ z0 = svlsl_wide_x (p0, z1, 8)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsr_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsr_u16.c -new file mode 100644 -index 000000000..61575645f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsr_u16.c -@@ -0,0 +1,340 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** lsr_u16_m_tied1: -+** lsr z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_u16_m_tied1, svuint16_t, -+ z0 = svlsr_u16_m (p0, z0, z1), -+ z0 = svlsr_m (p0, z0, z1)) -+ -+/* -+** lsr_u16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** lsr z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_u16_m_tied2, svuint16_t, -+ z0 = svlsr_u16_m (p0, z1, z0), -+ z0 = svlsr_m (p0, z1, z0)) -+ -+/* -+** lsr_u16_m_untied: -+** movprfx z0, z1 -+** lsr z0\.h, p0/m, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_u16_m_untied, svuint16_t, -+ z0 = svlsr_u16_m (p0, z1, z2), -+ z0 = svlsr_m (p0, z1, z2)) -+ -+/* -+** lsr_w0_u16_m_tied1: -+** mov (z[0-9]+\.h), w0 -+** lsr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsr_w0_u16_m_tied1, svuint16_t, uint16_t, -+ z0 = svlsr_n_u16_m (p0, z0, x0), -+ z0 = svlsr_m (p0, z0, x0)) -+ -+/* -+** lsr_w0_u16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), w0 -+** movprfx z0, z1 -+** lsr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsr_w0_u16_m_untied, svuint16_t, uint16_t, -+ z0 = svlsr_n_u16_m (p0, z1, x0), -+ z0 = svlsr_m (p0, z1, x0)) -+ -+/* -+** lsr_1_u16_m_tied1: -+** lsr z0\.h, p0/m, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_1_u16_m_tied1, svuint16_t, -+ z0 = svlsr_n_u16_m (p0, z0, 1), -+ z0 = svlsr_m (p0, z0, 1)) -+ -+/* -+** lsr_1_u16_m_untied: -+** movprfx z0, z1 -+** lsr z0\.h, p0/m, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_1_u16_m_untied, svuint16_t, -+ z0 = svlsr_n_u16_m (p0, z1, 1), -+ z0 = svlsr_m (p0, z1, 1)) -+ -+/* -+** lsr_15_u16_m_tied1: -+** lsr z0\.h, p0/m, z0\.h, #15 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_15_u16_m_tied1, svuint16_t, -+ z0 = svlsr_n_u16_m (p0, z0, 15), -+ z0 = svlsr_m (p0, z0, 15)) -+ -+/* -+** lsr_15_u16_m_untied: -+** movprfx z0, z1 -+** lsr z0\.h, p0/m, z0\.h, #15 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_15_u16_m_untied, svuint16_t, -+ z0 = svlsr_n_u16_m (p0, z1, 15), -+ z0 = svlsr_m (p0, z1, 15)) -+ -+/* -+** lsr_16_u16_m_tied1: -+** lsr z0\.h, p0/m, z0\.h, #16 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_16_u16_m_tied1, svuint16_t, -+ z0 = svlsr_n_u16_m (p0, z0, 16), -+ z0 = svlsr_m (p0, z0, 16)) -+ -+/* -+** lsr_16_u16_m_untied: -+** movprfx z0, z1 -+** lsr z0\.h, p0/m, z0\.h, #16 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_16_u16_m_untied, svuint16_t, -+ z0 = svlsr_n_u16_m (p0, z1, 16), -+ z0 = svlsr_m (p0, z1, 16)) -+ -+/* -+** lsr_u16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** lsr z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_u16_z_tied1, svuint16_t, -+ z0 = svlsr_u16_z (p0, z0, z1), -+ z0 = svlsr_z (p0, z0, z1)) -+ -+/* -+** lsr_u16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** lsrr z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_u16_z_tied2, svuint16_t, -+ z0 = svlsr_u16_z (p0, z1, z0), -+ z0 = svlsr_z (p0, z1, z0)) -+ -+/* -+** lsr_u16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** lsr z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** lsrr z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_u16_z_untied, svuint16_t, -+ z0 = svlsr_u16_z (p0, z1, z2), -+ z0 = svlsr_z (p0, z1, z2)) -+ -+/* -+** lsr_w0_u16_z_tied1: -+** mov (z[0-9]+\.h), w0 -+** movprfx z0\.h, p0/z, z0\.h -+** lsr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsr_w0_u16_z_tied1, svuint16_t, uint16_t, -+ z0 = svlsr_n_u16_z (p0, z0, x0), -+ z0 = svlsr_z (p0, z0, x0)) -+ -+/* -+** lsr_w0_u16_z_untied: -+** mov (z[0-9]+\.h), w0 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** lsr z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** lsrr z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (lsr_w0_u16_z_untied, svuint16_t, uint16_t, -+ z0 = svlsr_n_u16_z (p0, z1, x0), -+ z0 = svlsr_z (p0, z1, x0)) -+ -+/* -+** lsr_1_u16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** lsr z0\.h, p0/m, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_1_u16_z_tied1, svuint16_t, -+ z0 = svlsr_n_u16_z (p0, z0, 1), -+ z0 = svlsr_z (p0, z0, 1)) -+ -+/* -+** lsr_1_u16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** lsr z0\.h, p0/m, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_1_u16_z_untied, svuint16_t, -+ z0 = svlsr_n_u16_z (p0, z1, 1), -+ z0 = svlsr_z (p0, z1, 1)) -+ -+/* -+** lsr_15_u16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** lsr z0\.h, p0/m, z0\.h, #15 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_15_u16_z_tied1, svuint16_t, -+ z0 = svlsr_n_u16_z (p0, z0, 15), -+ z0 = svlsr_z (p0, z0, 15)) -+ -+/* -+** lsr_15_u16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** lsr z0\.h, p0/m, z0\.h, #15 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_15_u16_z_untied, svuint16_t, -+ z0 = svlsr_n_u16_z (p0, z1, 15), -+ z0 = svlsr_z (p0, z1, 15)) -+ -+/* -+** lsr_16_u16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** lsr z0\.h, p0/m, z0\.h, #16 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_16_u16_z_tied1, svuint16_t, -+ z0 = svlsr_n_u16_z (p0, z0, 16), -+ z0 = svlsr_z (p0, z0, 16)) -+ -+/* -+** lsr_16_u16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** lsr z0\.h, p0/m, z0\.h, #16 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_16_u16_z_untied, svuint16_t, -+ z0 = svlsr_n_u16_z (p0, z1, 16), -+ z0 = svlsr_z (p0, z1, 16)) -+ -+/* -+** lsr_u16_x_tied1: -+** lsr z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_u16_x_tied1, svuint16_t, -+ z0 = svlsr_u16_x (p0, z0, z1), -+ z0 = svlsr_x (p0, z0, z1)) -+ -+/* -+** lsr_u16_x_tied2: -+** lsrr z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_u16_x_tied2, svuint16_t, -+ z0 = svlsr_u16_x (p0, z1, z0), -+ z0 = svlsr_x (p0, z1, z0)) -+ -+/* -+** lsr_u16_x_untied: -+** ( -+** movprfx z0, z1 -+** lsr z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0, z2 -+** lsrr z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_u16_x_untied, svuint16_t, -+ z0 = svlsr_u16_x (p0, z1, z2), -+ z0 = svlsr_x (p0, z1, z2)) -+ -+/* -+** lsr_w0_u16_x_tied1: -+** mov (z[0-9]+\.h), w0 -+** lsr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsr_w0_u16_x_tied1, svuint16_t, uint16_t, -+ z0 = svlsr_n_u16_x (p0, z0, x0), -+ z0 = svlsr_x (p0, z0, x0)) -+ -+/* -+** lsr_w0_u16_x_untied: -+** mov z0\.h, w0 -+** lsrr z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZX (lsr_w0_u16_x_untied, svuint16_t, uint16_t, -+ z0 = svlsr_n_u16_x (p0, z1, x0), -+ z0 = svlsr_x (p0, z1, x0)) -+ -+/* -+** lsr_1_u16_x_tied1: -+** lsr z0\.h, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_1_u16_x_tied1, svuint16_t, -+ z0 = svlsr_n_u16_x (p0, z0, 1), -+ z0 = svlsr_x (p0, z0, 1)) -+ -+/* -+** lsr_1_u16_x_untied: -+** lsr z0\.h, z1\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_1_u16_x_untied, svuint16_t, -+ z0 = svlsr_n_u16_x (p0, z1, 1), -+ z0 = svlsr_x (p0, z1, 1)) -+ -+/* -+** lsr_15_u16_x_tied1: -+** lsr z0\.h, z0\.h, #15 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_15_u16_x_tied1, svuint16_t, -+ z0 = svlsr_n_u16_x (p0, z0, 15), -+ z0 = svlsr_x (p0, z0, 15)) -+ -+/* -+** lsr_15_u16_x_untied: -+** lsr z0\.h, z1\.h, #15 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_15_u16_x_untied, svuint16_t, -+ z0 = svlsr_n_u16_x (p0, z1, 15), -+ z0 = svlsr_x (p0, z1, 15)) -+ -+/* -+** lsr_16_u16_x_tied1: -+** lsr z0\.h, z0\.h, #16 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_16_u16_x_tied1, svuint16_t, -+ z0 = svlsr_n_u16_x (p0, z0, 16), -+ z0 = svlsr_x (p0, z0, 16)) -+ -+/* -+** lsr_16_u16_x_untied: -+** lsr z0\.h, z1\.h, #16 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_16_u16_x_untied, svuint16_t, -+ z0 = svlsr_n_u16_x (p0, z1, 16), -+ z0 = svlsr_x (p0, z1, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsr_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsr_u32.c -new file mode 100644 -index 000000000..796867ef8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsr_u32.c -@@ -0,0 +1,340 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** lsr_u32_m_tied1: -+** lsr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_u32_m_tied1, svuint32_t, -+ z0 = svlsr_u32_m (p0, z0, z1), -+ z0 = svlsr_m (p0, z0, z1)) -+ -+/* -+** lsr_u32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** lsr z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_u32_m_tied2, svuint32_t, -+ z0 = svlsr_u32_m (p0, z1, z0), -+ z0 = svlsr_m (p0, z1, z0)) -+ -+/* -+** lsr_u32_m_untied: -+** movprfx z0, z1 -+** lsr z0\.s, p0/m, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_u32_m_untied, svuint32_t, -+ z0 = svlsr_u32_m (p0, z1, z2), -+ z0 = svlsr_m (p0, z1, z2)) -+ -+/* -+** lsr_w0_u32_m_tied1: -+** mov (z[0-9]+\.s), w0 -+** lsr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsr_w0_u32_m_tied1, svuint32_t, uint32_t, -+ z0 = svlsr_n_u32_m (p0, z0, x0), -+ z0 = svlsr_m (p0, z0, x0)) -+ -+/* -+** lsr_w0_u32_m_untied: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0, z1 -+** lsr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsr_w0_u32_m_untied, svuint32_t, uint32_t, -+ z0 = svlsr_n_u32_m (p0, z1, x0), -+ z0 = svlsr_m (p0, z1, x0)) -+ -+/* -+** lsr_1_u32_m_tied1: -+** lsr z0\.s, p0/m, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_1_u32_m_tied1, svuint32_t, -+ z0 = svlsr_n_u32_m (p0, z0, 1), -+ z0 = svlsr_m (p0, z0, 1)) -+ -+/* -+** lsr_1_u32_m_untied: -+** movprfx z0, z1 -+** lsr z0\.s, p0/m, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_1_u32_m_untied, svuint32_t, -+ z0 = svlsr_n_u32_m (p0, z1, 1), -+ z0 = svlsr_m (p0, z1, 1)) -+ -+/* -+** lsr_31_u32_m_tied1: -+** lsr z0\.s, p0/m, z0\.s, #31 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_31_u32_m_tied1, svuint32_t, -+ z0 = svlsr_n_u32_m (p0, z0, 31), -+ z0 = svlsr_m (p0, z0, 31)) -+ -+/* -+** lsr_31_u32_m_untied: -+** movprfx z0, z1 -+** lsr z0\.s, p0/m, z0\.s, #31 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_31_u32_m_untied, svuint32_t, -+ z0 = svlsr_n_u32_m (p0, z1, 31), -+ z0 = svlsr_m (p0, z1, 31)) -+ -+/* -+** lsr_32_u32_m_tied1: -+** lsr z0\.s, p0/m, z0\.s, #32 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_32_u32_m_tied1, svuint32_t, -+ z0 = svlsr_n_u32_m (p0, z0, 32), -+ z0 = svlsr_m (p0, z0, 32)) -+ -+/* -+** lsr_32_u32_m_untied: -+** movprfx z0, z1 -+** lsr z0\.s, p0/m, z0\.s, #32 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_32_u32_m_untied, svuint32_t, -+ z0 = svlsr_n_u32_m (p0, z1, 32), -+ z0 = svlsr_m (p0, z1, 32)) -+ -+/* -+** lsr_u32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** lsr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_u32_z_tied1, svuint32_t, -+ z0 = svlsr_u32_z (p0, z0, z1), -+ z0 = svlsr_z (p0, z0, z1)) -+ -+/* -+** lsr_u32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** lsrr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_u32_z_tied2, svuint32_t, -+ z0 = svlsr_u32_z (p0, z1, z0), -+ z0 = svlsr_z (p0, z1, z0)) -+ -+/* -+** lsr_u32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** lsr z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** lsrr z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_u32_z_untied, svuint32_t, -+ z0 = svlsr_u32_z (p0, z1, z2), -+ z0 = svlsr_z (p0, z1, z2)) -+ -+/* -+** lsr_w0_u32_z_tied1: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** lsr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsr_w0_u32_z_tied1, svuint32_t, uint32_t, -+ z0 = svlsr_n_u32_z (p0, z0, x0), -+ z0 = svlsr_z (p0, z0, x0)) -+ -+/* -+** lsr_w0_u32_z_untied: -+** mov (z[0-9]+\.s), w0 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** lsr z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** lsrr z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (lsr_w0_u32_z_untied, svuint32_t, uint32_t, -+ z0 = svlsr_n_u32_z (p0, z1, x0), -+ z0 = svlsr_z (p0, z1, x0)) -+ -+/* -+** lsr_1_u32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** lsr z0\.s, p0/m, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_1_u32_z_tied1, svuint32_t, -+ z0 = svlsr_n_u32_z (p0, z0, 1), -+ z0 = svlsr_z (p0, z0, 1)) -+ -+/* -+** lsr_1_u32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** lsr z0\.s, p0/m, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_1_u32_z_untied, svuint32_t, -+ z0 = svlsr_n_u32_z (p0, z1, 1), -+ z0 = svlsr_z (p0, z1, 1)) -+ -+/* -+** lsr_31_u32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** lsr z0\.s, p0/m, z0\.s, #31 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_31_u32_z_tied1, svuint32_t, -+ z0 = svlsr_n_u32_z (p0, z0, 31), -+ z0 = svlsr_z (p0, z0, 31)) -+ -+/* -+** lsr_31_u32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** lsr z0\.s, p0/m, z0\.s, #31 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_31_u32_z_untied, svuint32_t, -+ z0 = svlsr_n_u32_z (p0, z1, 31), -+ z0 = svlsr_z (p0, z1, 31)) -+ -+/* -+** lsr_32_u32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** lsr z0\.s, p0/m, z0\.s, #32 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_32_u32_z_tied1, svuint32_t, -+ z0 = svlsr_n_u32_z (p0, z0, 32), -+ z0 = svlsr_z (p0, z0, 32)) -+ -+/* -+** lsr_32_u32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** lsr z0\.s, p0/m, z0\.s, #32 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_32_u32_z_untied, svuint32_t, -+ z0 = svlsr_n_u32_z (p0, z1, 32), -+ z0 = svlsr_z (p0, z1, 32)) -+ -+/* -+** lsr_u32_x_tied1: -+** lsr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_u32_x_tied1, svuint32_t, -+ z0 = svlsr_u32_x (p0, z0, z1), -+ z0 = svlsr_x (p0, z0, z1)) -+ -+/* -+** lsr_u32_x_tied2: -+** lsrr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_u32_x_tied2, svuint32_t, -+ z0 = svlsr_u32_x (p0, z1, z0), -+ z0 = svlsr_x (p0, z1, z0)) -+ -+/* -+** lsr_u32_x_untied: -+** ( -+** movprfx z0, z1 -+** lsr z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0, z2 -+** lsrr z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_u32_x_untied, svuint32_t, -+ z0 = svlsr_u32_x (p0, z1, z2), -+ z0 = svlsr_x (p0, z1, z2)) -+ -+/* -+** lsr_w0_u32_x_tied1: -+** mov (z[0-9]+\.s), w0 -+** lsr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsr_w0_u32_x_tied1, svuint32_t, uint32_t, -+ z0 = svlsr_n_u32_x (p0, z0, x0), -+ z0 = svlsr_x (p0, z0, x0)) -+ -+/* -+** lsr_w0_u32_x_untied: -+** mov z0\.s, w0 -+** lsrr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZX (lsr_w0_u32_x_untied, svuint32_t, uint32_t, -+ z0 = svlsr_n_u32_x (p0, z1, x0), -+ z0 = svlsr_x (p0, z1, x0)) -+ -+/* -+** lsr_1_u32_x_tied1: -+** lsr z0\.s, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_1_u32_x_tied1, svuint32_t, -+ z0 = svlsr_n_u32_x (p0, z0, 1), -+ z0 = svlsr_x (p0, z0, 1)) -+ -+/* -+** lsr_1_u32_x_untied: -+** lsr z0\.s, z1\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_1_u32_x_untied, svuint32_t, -+ z0 = svlsr_n_u32_x (p0, z1, 1), -+ z0 = svlsr_x (p0, z1, 1)) -+ -+/* -+** lsr_31_u32_x_tied1: -+** lsr z0\.s, z0\.s, #31 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_31_u32_x_tied1, svuint32_t, -+ z0 = svlsr_n_u32_x (p0, z0, 31), -+ z0 = svlsr_x (p0, z0, 31)) -+ -+/* -+** lsr_31_u32_x_untied: -+** lsr z0\.s, z1\.s, #31 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_31_u32_x_untied, svuint32_t, -+ z0 = svlsr_n_u32_x (p0, z1, 31), -+ z0 = svlsr_x (p0, z1, 31)) -+ -+/* -+** lsr_32_u32_x_tied1: -+** lsr z0\.s, z0\.s, #32 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_32_u32_x_tied1, svuint32_t, -+ z0 = svlsr_n_u32_x (p0, z0, 32), -+ z0 = svlsr_x (p0, z0, 32)) -+ -+/* -+** lsr_32_u32_x_untied: -+** lsr z0\.s, z1\.s, #32 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_32_u32_x_untied, svuint32_t, -+ z0 = svlsr_n_u32_x (p0, z1, 32), -+ z0 = svlsr_x (p0, z1, 32)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsr_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsr_u64.c -new file mode 100644 -index 000000000..b50777f50 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsr_u64.c -@@ -0,0 +1,340 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** lsr_u64_m_tied1: -+** lsr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_u64_m_tied1, svuint64_t, -+ z0 = svlsr_u64_m (p0, z0, z1), -+ z0 = svlsr_m (p0, z0, z1)) -+ -+/* -+** lsr_u64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** lsr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_u64_m_tied2, svuint64_t, -+ z0 = svlsr_u64_m (p0, z1, z0), -+ z0 = svlsr_m (p0, z1, z0)) -+ -+/* -+** lsr_u64_m_untied: -+** movprfx z0, z1 -+** lsr z0\.d, p0/m, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_u64_m_untied, svuint64_t, -+ z0 = svlsr_u64_m (p0, z1, z2), -+ z0 = svlsr_m (p0, z1, z2)) -+ -+/* -+** lsr_x0_u64_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** lsr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsr_x0_u64_m_tied1, svuint64_t, uint64_t, -+ z0 = svlsr_n_u64_m (p0, z0, x0), -+ z0 = svlsr_m (p0, z0, x0)) -+ -+/* -+** lsr_x0_u64_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** lsr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsr_x0_u64_m_untied, svuint64_t, uint64_t, -+ z0 = svlsr_n_u64_m (p0, z1, x0), -+ z0 = svlsr_m (p0, z1, x0)) -+ -+/* -+** lsr_1_u64_m_tied1: -+** lsr z0\.d, p0/m, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_1_u64_m_tied1, svuint64_t, -+ z0 = svlsr_n_u64_m (p0, z0, 1), -+ z0 = svlsr_m (p0, z0, 1)) -+ -+/* -+** lsr_1_u64_m_untied: -+** movprfx z0, z1 -+** lsr z0\.d, p0/m, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_1_u64_m_untied, svuint64_t, -+ z0 = svlsr_n_u64_m (p0, z1, 1), -+ z0 = svlsr_m (p0, z1, 1)) -+ -+/* -+** lsr_63_u64_m_tied1: -+** lsr z0\.d, p0/m, z0\.d, #63 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_63_u64_m_tied1, svuint64_t, -+ z0 = svlsr_n_u64_m (p0, z0, 63), -+ z0 = svlsr_m (p0, z0, 63)) -+ -+/* -+** lsr_63_u64_m_untied: -+** movprfx z0, z1 -+** lsr z0\.d, p0/m, z0\.d, #63 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_63_u64_m_untied, svuint64_t, -+ z0 = svlsr_n_u64_m (p0, z1, 63), -+ z0 = svlsr_m (p0, z1, 63)) -+ -+/* -+** lsr_64_u64_m_tied1: -+** lsr z0\.d, p0/m, z0\.d, #64 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_64_u64_m_tied1, svuint64_t, -+ z0 = svlsr_n_u64_m (p0, z0, 64), -+ z0 = svlsr_m (p0, z0, 64)) -+ -+/* -+** lsr_64_u64_m_untied: -+** movprfx z0, z1 -+** lsr z0\.d, p0/m, z0\.d, #64 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_64_u64_m_untied, svuint64_t, -+ z0 = svlsr_n_u64_m (p0, z1, 64), -+ z0 = svlsr_m (p0, z1, 64)) -+ -+/* -+** lsr_u64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** lsr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_u64_z_tied1, svuint64_t, -+ z0 = svlsr_u64_z (p0, z0, z1), -+ z0 = svlsr_z (p0, z0, z1)) -+ -+/* -+** lsr_u64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** lsrr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_u64_z_tied2, svuint64_t, -+ z0 = svlsr_u64_z (p0, z1, z0), -+ z0 = svlsr_z (p0, z1, z0)) -+ -+/* -+** lsr_u64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** lsr z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** lsrr z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_u64_z_untied, svuint64_t, -+ z0 = svlsr_u64_z (p0, z1, z2), -+ z0 = svlsr_z (p0, z1, z2)) -+ -+/* -+** lsr_x0_u64_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** lsr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsr_x0_u64_z_tied1, svuint64_t, uint64_t, -+ z0 = svlsr_n_u64_z (p0, z0, x0), -+ z0 = svlsr_z (p0, z0, x0)) -+ -+/* -+** lsr_x0_u64_z_untied: -+** mov (z[0-9]+\.d), x0 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** lsr z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** lsrr z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (lsr_x0_u64_z_untied, svuint64_t, uint64_t, -+ z0 = svlsr_n_u64_z (p0, z1, x0), -+ z0 = svlsr_z (p0, z1, x0)) -+ -+/* -+** lsr_1_u64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** lsr z0\.d, p0/m, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_1_u64_z_tied1, svuint64_t, -+ z0 = svlsr_n_u64_z (p0, z0, 1), -+ z0 = svlsr_z (p0, z0, 1)) -+ -+/* -+** lsr_1_u64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** lsr z0\.d, p0/m, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_1_u64_z_untied, svuint64_t, -+ z0 = svlsr_n_u64_z (p0, z1, 1), -+ z0 = svlsr_z (p0, z1, 1)) -+ -+/* -+** lsr_63_u64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** lsr z0\.d, p0/m, z0\.d, #63 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_63_u64_z_tied1, svuint64_t, -+ z0 = svlsr_n_u64_z (p0, z0, 63), -+ z0 = svlsr_z (p0, z0, 63)) -+ -+/* -+** lsr_63_u64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** lsr z0\.d, p0/m, z0\.d, #63 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_63_u64_z_untied, svuint64_t, -+ z0 = svlsr_n_u64_z (p0, z1, 63), -+ z0 = svlsr_z (p0, z1, 63)) -+ -+/* -+** lsr_64_u64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** lsr z0\.d, p0/m, z0\.d, #64 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_64_u64_z_tied1, svuint64_t, -+ z0 = svlsr_n_u64_z (p0, z0, 64), -+ z0 = svlsr_z (p0, z0, 64)) -+ -+/* -+** lsr_64_u64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** lsr z0\.d, p0/m, z0\.d, #64 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_64_u64_z_untied, svuint64_t, -+ z0 = svlsr_n_u64_z (p0, z1, 64), -+ z0 = svlsr_z (p0, z1, 64)) -+ -+/* -+** lsr_u64_x_tied1: -+** lsr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_u64_x_tied1, svuint64_t, -+ z0 = svlsr_u64_x (p0, z0, z1), -+ z0 = svlsr_x (p0, z0, z1)) -+ -+/* -+** lsr_u64_x_tied2: -+** lsrr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_u64_x_tied2, svuint64_t, -+ z0 = svlsr_u64_x (p0, z1, z0), -+ z0 = svlsr_x (p0, z1, z0)) -+ -+/* -+** lsr_u64_x_untied: -+** ( -+** movprfx z0, z1 -+** lsr z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0, z2 -+** lsrr z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_u64_x_untied, svuint64_t, -+ z0 = svlsr_u64_x (p0, z1, z2), -+ z0 = svlsr_x (p0, z1, z2)) -+ -+/* -+** lsr_x0_u64_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** lsr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsr_x0_u64_x_tied1, svuint64_t, uint64_t, -+ z0 = svlsr_n_u64_x (p0, z0, x0), -+ z0 = svlsr_x (p0, z0, x0)) -+ -+/* -+** lsr_x0_u64_x_untied: -+** mov z0\.d, x0 -+** lsrr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (lsr_x0_u64_x_untied, svuint64_t, uint64_t, -+ z0 = svlsr_n_u64_x (p0, z1, x0), -+ z0 = svlsr_x (p0, z1, x0)) -+ -+/* -+** lsr_1_u64_x_tied1: -+** lsr z0\.d, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_1_u64_x_tied1, svuint64_t, -+ z0 = svlsr_n_u64_x (p0, z0, 1), -+ z0 = svlsr_x (p0, z0, 1)) -+ -+/* -+** lsr_1_u64_x_untied: -+** lsr z0\.d, z1\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_1_u64_x_untied, svuint64_t, -+ z0 = svlsr_n_u64_x (p0, z1, 1), -+ z0 = svlsr_x (p0, z1, 1)) -+ -+/* -+** lsr_63_u64_x_tied1: -+** lsr z0\.d, z0\.d, #63 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_63_u64_x_tied1, svuint64_t, -+ z0 = svlsr_n_u64_x (p0, z0, 63), -+ z0 = svlsr_x (p0, z0, 63)) -+ -+/* -+** lsr_63_u64_x_untied: -+** lsr z0\.d, z1\.d, #63 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_63_u64_x_untied, svuint64_t, -+ z0 = svlsr_n_u64_x (p0, z1, 63), -+ z0 = svlsr_x (p0, z1, 63)) -+ -+/* -+** lsr_64_u64_x_tied1: -+** lsr z0\.d, z0\.d, #64 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_64_u64_x_tied1, svuint64_t, -+ z0 = svlsr_n_u64_x (p0, z0, 64), -+ z0 = svlsr_x (p0, z0, 64)) -+ -+/* -+** lsr_64_u64_x_untied: -+** lsr z0\.d, z1\.d, #64 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_64_u64_x_untied, svuint64_t, -+ z0 = svlsr_n_u64_x (p0, z1, 64), -+ z0 = svlsr_x (p0, z1, 64)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsr_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsr_u8.c -new file mode 100644 -index 000000000..a049ca905 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsr_u8.c -@@ -0,0 +1,340 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** lsr_u8_m_tied1: -+** lsr z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_u8_m_tied1, svuint8_t, -+ z0 = svlsr_u8_m (p0, z0, z1), -+ z0 = svlsr_m (p0, z0, z1)) -+ -+/* -+** lsr_u8_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** lsr z0\.b, p0/m, z0\.b, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_u8_m_tied2, svuint8_t, -+ z0 = svlsr_u8_m (p0, z1, z0), -+ z0 = svlsr_m (p0, z1, z0)) -+ -+/* -+** lsr_u8_m_untied: -+** movprfx z0, z1 -+** lsr z0\.b, p0/m, z0\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_u8_m_untied, svuint8_t, -+ z0 = svlsr_u8_m (p0, z1, z2), -+ z0 = svlsr_m (p0, z1, z2)) -+ -+/* -+** lsr_w0_u8_m_tied1: -+** mov (z[0-9]+\.b), w0 -+** lsr z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsr_w0_u8_m_tied1, svuint8_t, uint8_t, -+ z0 = svlsr_n_u8_m (p0, z0, x0), -+ z0 = svlsr_m (p0, z0, x0)) -+ -+/* -+** lsr_w0_u8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), w0 -+** movprfx z0, z1 -+** lsr z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsr_w0_u8_m_untied, svuint8_t, uint8_t, -+ z0 = svlsr_n_u8_m (p0, z1, x0), -+ z0 = svlsr_m (p0, z1, x0)) -+ -+/* -+** lsr_1_u8_m_tied1: -+** lsr z0\.b, p0/m, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_1_u8_m_tied1, svuint8_t, -+ z0 = svlsr_n_u8_m (p0, z0, 1), -+ z0 = svlsr_m (p0, z0, 1)) -+ -+/* -+** lsr_1_u8_m_untied: -+** movprfx z0, z1 -+** lsr z0\.b, p0/m, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_1_u8_m_untied, svuint8_t, -+ z0 = svlsr_n_u8_m (p0, z1, 1), -+ z0 = svlsr_m (p0, z1, 1)) -+ -+/* -+** lsr_7_u8_m_tied1: -+** lsr z0\.b, p0/m, z0\.b, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_7_u8_m_tied1, svuint8_t, -+ z0 = svlsr_n_u8_m (p0, z0, 7), -+ z0 = svlsr_m (p0, z0, 7)) -+ -+/* -+** lsr_7_u8_m_untied: -+** movprfx z0, z1 -+** lsr z0\.b, p0/m, z0\.b, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_7_u8_m_untied, svuint8_t, -+ z0 = svlsr_n_u8_m (p0, z1, 7), -+ z0 = svlsr_m (p0, z1, 7)) -+ -+/* -+** lsr_8_u8_m_tied1: -+** lsr z0\.b, p0/m, z0\.b, #8 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_8_u8_m_tied1, svuint8_t, -+ z0 = svlsr_n_u8_m (p0, z0, 8), -+ z0 = svlsr_m (p0, z0, 8)) -+ -+/* -+** lsr_8_u8_m_untied: -+** movprfx z0, z1 -+** lsr z0\.b, p0/m, z0\.b, #8 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_8_u8_m_untied, svuint8_t, -+ z0 = svlsr_n_u8_m (p0, z1, 8), -+ z0 = svlsr_m (p0, z1, 8)) -+ -+/* -+** lsr_u8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** lsr z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_u8_z_tied1, svuint8_t, -+ z0 = svlsr_u8_z (p0, z0, z1), -+ z0 = svlsr_z (p0, z0, z1)) -+ -+/* -+** lsr_u8_z_tied2: -+** movprfx z0\.b, p0/z, z0\.b -+** lsrr z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_u8_z_tied2, svuint8_t, -+ z0 = svlsr_u8_z (p0, z1, z0), -+ z0 = svlsr_z (p0, z1, z0)) -+ -+/* -+** lsr_u8_z_untied: -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** lsr z0\.b, p0/m, z0\.b, z2\.b -+** | -+** movprfx z0\.b, p0/z, z2\.b -+** lsrr z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_u8_z_untied, svuint8_t, -+ z0 = svlsr_u8_z (p0, z1, z2), -+ z0 = svlsr_z (p0, z1, z2)) -+ -+/* -+** lsr_w0_u8_z_tied1: -+** mov (z[0-9]+\.b), w0 -+** movprfx z0\.b, p0/z, z0\.b -+** lsr z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsr_w0_u8_z_tied1, svuint8_t, uint8_t, -+ z0 = svlsr_n_u8_z (p0, z0, x0), -+ z0 = svlsr_z (p0, z0, x0)) -+ -+/* -+** lsr_w0_u8_z_untied: -+** mov (z[0-9]+\.b), w0 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** lsr z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** lsrr z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (lsr_w0_u8_z_untied, svuint8_t, uint8_t, -+ z0 = svlsr_n_u8_z (p0, z1, x0), -+ z0 = svlsr_z (p0, z1, x0)) -+ -+/* -+** lsr_1_u8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** lsr z0\.b, p0/m, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_1_u8_z_tied1, svuint8_t, -+ z0 = svlsr_n_u8_z (p0, z0, 1), -+ z0 = svlsr_z (p0, z0, 1)) -+ -+/* -+** lsr_1_u8_z_untied: -+** movprfx z0\.b, p0/z, z1\.b -+** lsr z0\.b, p0/m, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_1_u8_z_untied, svuint8_t, -+ z0 = svlsr_n_u8_z (p0, z1, 1), -+ z0 = svlsr_z (p0, z1, 1)) -+ -+/* -+** lsr_7_u8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** lsr z0\.b, p0/m, z0\.b, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_7_u8_z_tied1, svuint8_t, -+ z0 = svlsr_n_u8_z (p0, z0, 7), -+ z0 = svlsr_z (p0, z0, 7)) -+ -+/* -+** lsr_7_u8_z_untied: -+** movprfx z0\.b, p0/z, z1\.b -+** lsr z0\.b, p0/m, z0\.b, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_7_u8_z_untied, svuint8_t, -+ z0 = svlsr_n_u8_z (p0, z1, 7), -+ z0 = svlsr_z (p0, z1, 7)) -+ -+/* -+** lsr_8_u8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** lsr z0\.b, p0/m, z0\.b, #8 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_8_u8_z_tied1, svuint8_t, -+ z0 = svlsr_n_u8_z (p0, z0, 8), -+ z0 = svlsr_z (p0, z0, 8)) -+ -+/* -+** lsr_8_u8_z_untied: -+** movprfx z0\.b, p0/z, z1\.b -+** lsr z0\.b, p0/m, z0\.b, #8 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_8_u8_z_untied, svuint8_t, -+ z0 = svlsr_n_u8_z (p0, z1, 8), -+ z0 = svlsr_z (p0, z1, 8)) -+ -+/* -+** lsr_u8_x_tied1: -+** lsr z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_u8_x_tied1, svuint8_t, -+ z0 = svlsr_u8_x (p0, z0, z1), -+ z0 = svlsr_x (p0, z0, z1)) -+ -+/* -+** lsr_u8_x_tied2: -+** lsrr z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_u8_x_tied2, svuint8_t, -+ z0 = svlsr_u8_x (p0, z1, z0), -+ z0 = svlsr_x (p0, z1, z0)) -+ -+/* -+** lsr_u8_x_untied: -+** ( -+** movprfx z0, z1 -+** lsr z0\.b, p0/m, z0\.b, z2\.b -+** | -+** movprfx z0, z2 -+** lsrr z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_u8_x_untied, svuint8_t, -+ z0 = svlsr_u8_x (p0, z1, z2), -+ z0 = svlsr_x (p0, z1, z2)) -+ -+/* -+** lsr_w0_u8_x_tied1: -+** mov (z[0-9]+\.b), w0 -+** lsr z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsr_w0_u8_x_tied1, svuint8_t, uint8_t, -+ z0 = svlsr_n_u8_x (p0, z0, x0), -+ z0 = svlsr_x (p0, z0, x0)) -+ -+/* -+** lsr_w0_u8_x_untied: -+** mov z0\.b, w0 -+** lsrr z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_ZX (lsr_w0_u8_x_untied, svuint8_t, uint8_t, -+ z0 = svlsr_n_u8_x (p0, z1, x0), -+ z0 = svlsr_x (p0, z1, x0)) -+ -+/* -+** lsr_1_u8_x_tied1: -+** lsr z0\.b, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_1_u8_x_tied1, svuint8_t, -+ z0 = svlsr_n_u8_x (p0, z0, 1), -+ z0 = svlsr_x (p0, z0, 1)) -+ -+/* -+** lsr_1_u8_x_untied: -+** lsr z0\.b, z1\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_1_u8_x_untied, svuint8_t, -+ z0 = svlsr_n_u8_x (p0, z1, 1), -+ z0 = svlsr_x (p0, z1, 1)) -+ -+/* -+** lsr_7_u8_x_tied1: -+** lsr z0\.b, z0\.b, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_7_u8_x_tied1, svuint8_t, -+ z0 = svlsr_n_u8_x (p0, z0, 7), -+ z0 = svlsr_x (p0, z0, 7)) -+ -+/* -+** lsr_7_u8_x_untied: -+** lsr z0\.b, z1\.b, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_7_u8_x_untied, svuint8_t, -+ z0 = svlsr_n_u8_x (p0, z1, 7), -+ z0 = svlsr_x (p0, z1, 7)) -+ -+/* -+** lsr_8_u8_x_tied1: -+** lsr z0\.b, z0\.b, #8 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_8_u8_x_tied1, svuint8_t, -+ z0 = svlsr_n_u8_x (p0, z0, 8), -+ z0 = svlsr_x (p0, z0, 8)) -+ -+/* -+** lsr_8_u8_x_untied: -+** lsr z0\.b, z1\.b, #8 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_8_u8_x_untied, svuint8_t, -+ z0 = svlsr_n_u8_x (p0, z1, 8), -+ z0 = svlsr_x (p0, z1, 8)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsr_wide_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsr_wide_u16.c -new file mode 100644 -index 000000000..863b51a2f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsr_wide_u16.c -@@ -0,0 +1,325 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** lsr_wide_u16_m_tied1: -+** lsr z0\.h, p0/m, z0\.h, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsr_wide_u16_m_tied1, svuint16_t, svuint64_t, -+ z0 = svlsr_wide_u16_m (p0, z0, z4), -+ z0 = svlsr_wide_m (p0, z0, z4)) -+ -+/* -+** lsr_wide_u16_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z4 -+** lsr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (lsr_wide_u16_m_tied2, svuint16_t, svuint64_t, -+ z0_res = svlsr_wide_u16_m (p0, z4, z0), -+ z0_res = svlsr_wide_m (p0, z4, z0)) -+ -+/* -+** lsr_wide_u16_m_untied: -+** movprfx z0, z1 -+** lsr z0\.h, p0/m, z0\.h, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsr_wide_u16_m_untied, svuint16_t, svuint64_t, -+ z0 = svlsr_wide_u16_m (p0, z1, z4), -+ z0 = svlsr_wide_m (p0, z1, z4)) -+ -+/* -+** lsr_wide_x0_u16_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** lsr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsr_wide_x0_u16_m_tied1, svuint16_t, uint64_t, -+ z0 = svlsr_wide_n_u16_m (p0, z0, x0), -+ z0 = svlsr_wide_m (p0, z0, x0)) -+ -+/* -+** lsr_wide_x0_u16_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** lsr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsr_wide_x0_u16_m_untied, svuint16_t, uint64_t, -+ z0 = svlsr_wide_n_u16_m (p0, z1, x0), -+ z0 = svlsr_wide_m (p0, z1, x0)) -+ -+/* -+** lsr_wide_1_u16_m_tied1: -+** lsr z0\.h, p0/m, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_1_u16_m_tied1, svuint16_t, -+ z0 = svlsr_wide_n_u16_m (p0, z0, 1), -+ z0 = svlsr_wide_m (p0, z0, 1)) -+ -+/* -+** lsr_wide_1_u16_m_untied: -+** movprfx z0, z1 -+** lsr z0\.h, p0/m, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_1_u16_m_untied, svuint16_t, -+ z0 = svlsr_wide_n_u16_m (p0, z1, 1), -+ z0 = svlsr_wide_m (p0, z1, 1)) -+ -+/* -+** lsr_wide_15_u16_m_tied1: -+** lsr z0\.h, p0/m, z0\.h, #15 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_15_u16_m_tied1, svuint16_t, -+ z0 = svlsr_wide_n_u16_m (p0, z0, 15), -+ z0 = svlsr_wide_m (p0, z0, 15)) -+ -+/* -+** lsr_wide_15_u16_m_untied: -+** movprfx z0, z1 -+** lsr z0\.h, p0/m, z0\.h, #15 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_15_u16_m_untied, svuint16_t, -+ z0 = svlsr_wide_n_u16_m (p0, z1, 15), -+ z0 = svlsr_wide_m (p0, z1, 15)) -+ -+/* -+** lsr_wide_16_u16_m_tied1: -+** lsr z0\.h, p0/m, z0\.h, #16 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_16_u16_m_tied1, svuint16_t, -+ z0 = svlsr_wide_n_u16_m (p0, z0, 16), -+ z0 = svlsr_wide_m (p0, z0, 16)) -+ -+/* -+** lsr_wide_16_u16_m_untied: -+** movprfx z0, z1 -+** lsr z0\.h, p0/m, z0\.h, #16 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_16_u16_m_untied, svuint16_t, -+ z0 = svlsr_wide_n_u16_m (p0, z1, 16), -+ z0 = svlsr_wide_m (p0, z1, 16)) -+ -+/* -+** lsr_wide_u16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** lsr z0\.h, p0/m, z0\.h, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsr_wide_u16_z_tied1, svuint16_t, svuint64_t, -+ z0 = svlsr_wide_u16_z (p0, z0, z4), -+ z0 = svlsr_wide_z (p0, z0, z4)) -+ -+/* -+** lsr_wide_u16_z_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.h, p0/z, z4\.h -+** lsr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (lsr_wide_u16_z_tied2, svuint16_t, svuint64_t, -+ z0_res = svlsr_wide_u16_z (p0, z4, z0), -+ z0_res = svlsr_wide_z (p0, z4, z0)) -+ -+/* -+** lsr_wide_u16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** lsr z0\.h, p0/m, z0\.h, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsr_wide_u16_z_untied, svuint16_t, svuint64_t, -+ z0 = svlsr_wide_u16_z (p0, z1, z4), -+ z0 = svlsr_wide_z (p0, z1, z4)) -+ -+/* -+** lsr_wide_x0_u16_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.h, p0/z, z0\.h -+** lsr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsr_wide_x0_u16_z_tied1, svuint16_t, uint64_t, -+ z0 = svlsr_wide_n_u16_z (p0, z0, x0), -+ z0 = svlsr_wide_z (p0, z0, x0)) -+ -+/* -+** lsr_wide_x0_u16_z_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.h, p0/z, z1\.h -+** lsr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsr_wide_x0_u16_z_untied, svuint16_t, uint64_t, -+ z0 = svlsr_wide_n_u16_z (p0, z1, x0), -+ z0 = svlsr_wide_z (p0, z1, x0)) -+ -+/* -+** lsr_wide_1_u16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** lsr z0\.h, p0/m, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_1_u16_z_tied1, svuint16_t, -+ z0 = svlsr_wide_n_u16_z (p0, z0, 1), -+ z0 = svlsr_wide_z (p0, z0, 1)) -+ -+/* -+** lsr_wide_1_u16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** lsr z0\.h, p0/m, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_1_u16_z_untied, svuint16_t, -+ z0 = svlsr_wide_n_u16_z (p0, z1, 1), -+ z0 = svlsr_wide_z (p0, z1, 1)) -+ -+/* -+** lsr_wide_15_u16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** lsr z0\.h, p0/m, z0\.h, #15 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_15_u16_z_tied1, svuint16_t, -+ z0 = svlsr_wide_n_u16_z (p0, z0, 15), -+ z0 = svlsr_wide_z (p0, z0, 15)) -+ -+/* -+** lsr_wide_15_u16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** lsr z0\.h, p0/m, z0\.h, #15 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_15_u16_z_untied, svuint16_t, -+ z0 = svlsr_wide_n_u16_z (p0, z1, 15), -+ z0 = svlsr_wide_z (p0, z1, 15)) -+ -+/* -+** lsr_wide_16_u16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** lsr z0\.h, p0/m, z0\.h, #16 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_16_u16_z_tied1, svuint16_t, -+ z0 = svlsr_wide_n_u16_z (p0, z0, 16), -+ z0 = svlsr_wide_z (p0, z0, 16)) -+ -+/* -+** lsr_wide_16_u16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** lsr z0\.h, p0/m, z0\.h, #16 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_16_u16_z_untied, svuint16_t, -+ z0 = svlsr_wide_n_u16_z (p0, z1, 16), -+ z0 = svlsr_wide_z (p0, z1, 16)) -+ -+/* -+** lsr_wide_u16_x_tied1: -+** lsr z0\.h, z0\.h, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsr_wide_u16_x_tied1, svuint16_t, svuint64_t, -+ z0 = svlsr_wide_u16_x (p0, z0, z4), -+ z0 = svlsr_wide_x (p0, z0, z4)) -+ -+/* -+** lsr_wide_u16_x_tied2: -+** lsr z0\.h, z4\.h, z0\.d -+** ret -+*/ -+TEST_DUAL_Z_REV (lsr_wide_u16_x_tied2, svuint16_t, svuint64_t, -+ z0_res = svlsr_wide_u16_x (p0, z4, z0), -+ z0_res = svlsr_wide_x (p0, z4, z0)) -+ -+/* -+** lsr_wide_u16_x_untied: -+** lsr z0\.h, z1\.h, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsr_wide_u16_x_untied, svuint16_t, svuint64_t, -+ z0 = svlsr_wide_u16_x (p0, z1, z4), -+ z0 = svlsr_wide_x (p0, z1, z4)) -+ -+/* -+** lsr_wide_x0_u16_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** lsr z0\.h, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsr_wide_x0_u16_x_tied1, svuint16_t, uint64_t, -+ z0 = svlsr_wide_n_u16_x (p0, z0, x0), -+ z0 = svlsr_wide_x (p0, z0, x0)) -+ -+/* -+** lsr_wide_x0_u16_x_untied: -+** mov (z[0-9]+\.d), x0 -+** lsr z0\.h, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsr_wide_x0_u16_x_untied, svuint16_t, uint64_t, -+ z0 = svlsr_wide_n_u16_x (p0, z1, x0), -+ z0 = svlsr_wide_x (p0, z1, x0)) -+ -+/* -+** lsr_wide_1_u16_x_tied1: -+** lsr z0\.h, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_1_u16_x_tied1, svuint16_t, -+ z0 = svlsr_wide_n_u16_x (p0, z0, 1), -+ z0 = svlsr_wide_x (p0, z0, 1)) -+ -+/* -+** lsr_wide_1_u16_x_untied: -+** lsr z0\.h, z1\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_1_u16_x_untied, svuint16_t, -+ z0 = svlsr_wide_n_u16_x (p0, z1, 1), -+ z0 = svlsr_wide_x (p0, z1, 1)) -+ -+/* -+** lsr_wide_15_u16_x_tied1: -+** lsr z0\.h, z0\.h, #15 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_15_u16_x_tied1, svuint16_t, -+ z0 = svlsr_wide_n_u16_x (p0, z0, 15), -+ z0 = svlsr_wide_x (p0, z0, 15)) -+ -+/* -+** lsr_wide_15_u16_x_untied: -+** lsr z0\.h, z1\.h, #15 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_15_u16_x_untied, svuint16_t, -+ z0 = svlsr_wide_n_u16_x (p0, z1, 15), -+ z0 = svlsr_wide_x (p0, z1, 15)) -+ -+/* -+** lsr_wide_16_u16_x_tied1: -+** lsr z0\.h, z0\.h, #16 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_16_u16_x_tied1, svuint16_t, -+ z0 = svlsr_wide_n_u16_x (p0, z0, 16), -+ z0 = svlsr_wide_x (p0, z0, 16)) -+ -+/* -+** lsr_wide_16_u16_x_untied: -+** lsr z0\.h, z1\.h, #16 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_16_u16_x_untied, svuint16_t, -+ z0 = svlsr_wide_n_u16_x (p0, z1, 16), -+ z0 = svlsr_wide_x (p0, z1, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsr_wide_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsr_wide_u32.c -new file mode 100644 -index 000000000..73c2cf86e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsr_wide_u32.c -@@ -0,0 +1,325 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** lsr_wide_u32_m_tied1: -+** lsr z0\.s, p0/m, z0\.s, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsr_wide_u32_m_tied1, svuint32_t, svuint64_t, -+ z0 = svlsr_wide_u32_m (p0, z0, z4), -+ z0 = svlsr_wide_m (p0, z0, z4)) -+ -+/* -+** lsr_wide_u32_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z4 -+** lsr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (lsr_wide_u32_m_tied2, svuint32_t, svuint64_t, -+ z0_res = svlsr_wide_u32_m (p0, z4, z0), -+ z0_res = svlsr_wide_m (p0, z4, z0)) -+ -+/* -+** lsr_wide_u32_m_untied: -+** movprfx z0, z1 -+** lsr z0\.s, p0/m, z0\.s, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsr_wide_u32_m_untied, svuint32_t, svuint64_t, -+ z0 = svlsr_wide_u32_m (p0, z1, z4), -+ z0 = svlsr_wide_m (p0, z1, z4)) -+ -+/* -+** lsr_wide_x0_u32_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** lsr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsr_wide_x0_u32_m_tied1, svuint32_t, uint64_t, -+ z0 = svlsr_wide_n_u32_m (p0, z0, x0), -+ z0 = svlsr_wide_m (p0, z0, x0)) -+ -+/* -+** lsr_wide_x0_u32_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** lsr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsr_wide_x0_u32_m_untied, svuint32_t, uint64_t, -+ z0 = svlsr_wide_n_u32_m (p0, z1, x0), -+ z0 = svlsr_wide_m (p0, z1, x0)) -+ -+/* -+** lsr_wide_1_u32_m_tied1: -+** lsr z0\.s, p0/m, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_1_u32_m_tied1, svuint32_t, -+ z0 = svlsr_wide_n_u32_m (p0, z0, 1), -+ z0 = svlsr_wide_m (p0, z0, 1)) -+ -+/* -+** lsr_wide_1_u32_m_untied: -+** movprfx z0, z1 -+** lsr z0\.s, p0/m, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_1_u32_m_untied, svuint32_t, -+ z0 = svlsr_wide_n_u32_m (p0, z1, 1), -+ z0 = svlsr_wide_m (p0, z1, 1)) -+ -+/* -+** lsr_wide_31_u32_m_tied1: -+** lsr z0\.s, p0/m, z0\.s, #31 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_31_u32_m_tied1, svuint32_t, -+ z0 = svlsr_wide_n_u32_m (p0, z0, 31), -+ z0 = svlsr_wide_m (p0, z0, 31)) -+ -+/* -+** lsr_wide_31_u32_m_untied: -+** movprfx z0, z1 -+** lsr z0\.s, p0/m, z0\.s, #31 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_31_u32_m_untied, svuint32_t, -+ z0 = svlsr_wide_n_u32_m (p0, z1, 31), -+ z0 = svlsr_wide_m (p0, z1, 31)) -+ -+/* -+** lsr_wide_32_u32_m_tied1: -+** lsr z0\.s, p0/m, z0\.s, #32 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_32_u32_m_tied1, svuint32_t, -+ z0 = svlsr_wide_n_u32_m (p0, z0, 32), -+ z0 = svlsr_wide_m (p0, z0, 32)) -+ -+/* -+** lsr_wide_32_u32_m_untied: -+** movprfx z0, z1 -+** lsr z0\.s, p0/m, z0\.s, #32 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_32_u32_m_untied, svuint32_t, -+ z0 = svlsr_wide_n_u32_m (p0, z1, 32), -+ z0 = svlsr_wide_m (p0, z1, 32)) -+ -+/* -+** lsr_wide_u32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** lsr z0\.s, p0/m, z0\.s, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsr_wide_u32_z_tied1, svuint32_t, svuint64_t, -+ z0 = svlsr_wide_u32_z (p0, z0, z4), -+ z0 = svlsr_wide_z (p0, z0, z4)) -+ -+/* -+** lsr_wide_u32_z_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.s, p0/z, z4\.s -+** lsr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (lsr_wide_u32_z_tied2, svuint32_t, svuint64_t, -+ z0_res = svlsr_wide_u32_z (p0, z4, z0), -+ z0_res = svlsr_wide_z (p0, z4, z0)) -+ -+/* -+** lsr_wide_u32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** lsr z0\.s, p0/m, z0\.s, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsr_wide_u32_z_untied, svuint32_t, svuint64_t, -+ z0 = svlsr_wide_u32_z (p0, z1, z4), -+ z0 = svlsr_wide_z (p0, z1, z4)) -+ -+/* -+** lsr_wide_x0_u32_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.s, p0/z, z0\.s -+** lsr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsr_wide_x0_u32_z_tied1, svuint32_t, uint64_t, -+ z0 = svlsr_wide_n_u32_z (p0, z0, x0), -+ z0 = svlsr_wide_z (p0, z0, x0)) -+ -+/* -+** lsr_wide_x0_u32_z_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.s, p0/z, z1\.s -+** lsr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsr_wide_x0_u32_z_untied, svuint32_t, uint64_t, -+ z0 = svlsr_wide_n_u32_z (p0, z1, x0), -+ z0 = svlsr_wide_z (p0, z1, x0)) -+ -+/* -+** lsr_wide_1_u32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** lsr z0\.s, p0/m, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_1_u32_z_tied1, svuint32_t, -+ z0 = svlsr_wide_n_u32_z (p0, z0, 1), -+ z0 = svlsr_wide_z (p0, z0, 1)) -+ -+/* -+** lsr_wide_1_u32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** lsr z0\.s, p0/m, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_1_u32_z_untied, svuint32_t, -+ z0 = svlsr_wide_n_u32_z (p0, z1, 1), -+ z0 = svlsr_wide_z (p0, z1, 1)) -+ -+/* -+** lsr_wide_31_u32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** lsr z0\.s, p0/m, z0\.s, #31 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_31_u32_z_tied1, svuint32_t, -+ z0 = svlsr_wide_n_u32_z (p0, z0, 31), -+ z0 = svlsr_wide_z (p0, z0, 31)) -+ -+/* -+** lsr_wide_31_u32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** lsr z0\.s, p0/m, z0\.s, #31 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_31_u32_z_untied, svuint32_t, -+ z0 = svlsr_wide_n_u32_z (p0, z1, 31), -+ z0 = svlsr_wide_z (p0, z1, 31)) -+ -+/* -+** lsr_wide_32_u32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** lsr z0\.s, p0/m, z0\.s, #32 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_32_u32_z_tied1, svuint32_t, -+ z0 = svlsr_wide_n_u32_z (p0, z0, 32), -+ z0 = svlsr_wide_z (p0, z0, 32)) -+ -+/* -+** lsr_wide_32_u32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** lsr z0\.s, p0/m, z0\.s, #32 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_32_u32_z_untied, svuint32_t, -+ z0 = svlsr_wide_n_u32_z (p0, z1, 32), -+ z0 = svlsr_wide_z (p0, z1, 32)) -+ -+/* -+** lsr_wide_u32_x_tied1: -+** lsr z0\.s, z0\.s, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsr_wide_u32_x_tied1, svuint32_t, svuint64_t, -+ z0 = svlsr_wide_u32_x (p0, z0, z4), -+ z0 = svlsr_wide_x (p0, z0, z4)) -+ -+/* -+** lsr_wide_u32_x_tied2: -+** lsr z0\.s, z4\.s, z0\.d -+** ret -+*/ -+TEST_DUAL_Z_REV (lsr_wide_u32_x_tied2, svuint32_t, svuint64_t, -+ z0_res = svlsr_wide_u32_x (p0, z4, z0), -+ z0_res = svlsr_wide_x (p0, z4, z0)) -+ -+/* -+** lsr_wide_u32_x_untied: -+** lsr z0\.s, z1\.s, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsr_wide_u32_x_untied, svuint32_t, svuint64_t, -+ z0 = svlsr_wide_u32_x (p0, z1, z4), -+ z0 = svlsr_wide_x (p0, z1, z4)) -+ -+/* -+** lsr_wide_x0_u32_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** lsr z0\.s, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsr_wide_x0_u32_x_tied1, svuint32_t, uint64_t, -+ z0 = svlsr_wide_n_u32_x (p0, z0, x0), -+ z0 = svlsr_wide_x (p0, z0, x0)) -+ -+/* -+** lsr_wide_x0_u32_x_untied: -+** mov (z[0-9]+\.d), x0 -+** lsr z0\.s, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsr_wide_x0_u32_x_untied, svuint32_t, uint64_t, -+ z0 = svlsr_wide_n_u32_x (p0, z1, x0), -+ z0 = svlsr_wide_x (p0, z1, x0)) -+ -+/* -+** lsr_wide_1_u32_x_tied1: -+** lsr z0\.s, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_1_u32_x_tied1, svuint32_t, -+ z0 = svlsr_wide_n_u32_x (p0, z0, 1), -+ z0 = svlsr_wide_x (p0, z0, 1)) -+ -+/* -+** lsr_wide_1_u32_x_untied: -+** lsr z0\.s, z1\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_1_u32_x_untied, svuint32_t, -+ z0 = svlsr_wide_n_u32_x (p0, z1, 1), -+ z0 = svlsr_wide_x (p0, z1, 1)) -+ -+/* -+** lsr_wide_31_u32_x_tied1: -+** lsr z0\.s, z0\.s, #31 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_31_u32_x_tied1, svuint32_t, -+ z0 = svlsr_wide_n_u32_x (p0, z0, 31), -+ z0 = svlsr_wide_x (p0, z0, 31)) -+ -+/* -+** lsr_wide_31_u32_x_untied: -+** lsr z0\.s, z1\.s, #31 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_31_u32_x_untied, svuint32_t, -+ z0 = svlsr_wide_n_u32_x (p0, z1, 31), -+ z0 = svlsr_wide_x (p0, z1, 31)) -+ -+/* -+** lsr_wide_32_u32_x_tied1: -+** lsr z0\.s, z0\.s, #32 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_32_u32_x_tied1, svuint32_t, -+ z0 = svlsr_wide_n_u32_x (p0, z0, 32), -+ z0 = svlsr_wide_x (p0, z0, 32)) -+ -+/* -+** lsr_wide_32_u32_x_untied: -+** lsr z0\.s, z1\.s, #32 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_32_u32_x_untied, svuint32_t, -+ z0 = svlsr_wide_n_u32_x (p0, z1, 32), -+ z0 = svlsr_wide_x (p0, z1, 32)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsr_wide_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsr_wide_u8.c -new file mode 100644 -index 000000000..fe44eabda ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/lsr_wide_u8.c -@@ -0,0 +1,325 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** lsr_wide_u8_m_tied1: -+** lsr z0\.b, p0/m, z0\.b, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsr_wide_u8_m_tied1, svuint8_t, svuint64_t, -+ z0 = svlsr_wide_u8_m (p0, z0, z4), -+ z0 = svlsr_wide_m (p0, z0, z4)) -+ -+/* -+** lsr_wide_u8_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z4 -+** lsr z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (lsr_wide_u8_m_tied2, svuint8_t, svuint64_t, -+ z0_res = svlsr_wide_u8_m (p0, z4, z0), -+ z0_res = svlsr_wide_m (p0, z4, z0)) -+ -+/* -+** lsr_wide_u8_m_untied: -+** movprfx z0, z1 -+** lsr z0\.b, p0/m, z0\.b, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsr_wide_u8_m_untied, svuint8_t, svuint64_t, -+ z0 = svlsr_wide_u8_m (p0, z1, z4), -+ z0 = svlsr_wide_m (p0, z1, z4)) -+ -+/* -+** lsr_wide_x0_u8_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** lsr z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsr_wide_x0_u8_m_tied1, svuint8_t, uint64_t, -+ z0 = svlsr_wide_n_u8_m (p0, z0, x0), -+ z0 = svlsr_wide_m (p0, z0, x0)) -+ -+/* -+** lsr_wide_x0_u8_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** lsr z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsr_wide_x0_u8_m_untied, svuint8_t, uint64_t, -+ z0 = svlsr_wide_n_u8_m (p0, z1, x0), -+ z0 = svlsr_wide_m (p0, z1, x0)) -+ -+/* -+** lsr_wide_1_u8_m_tied1: -+** lsr z0\.b, p0/m, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_1_u8_m_tied1, svuint8_t, -+ z0 = svlsr_wide_n_u8_m (p0, z0, 1), -+ z0 = svlsr_wide_m (p0, z0, 1)) -+ -+/* -+** lsr_wide_1_u8_m_untied: -+** movprfx z0, z1 -+** lsr z0\.b, p0/m, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_1_u8_m_untied, svuint8_t, -+ z0 = svlsr_wide_n_u8_m (p0, z1, 1), -+ z0 = svlsr_wide_m (p0, z1, 1)) -+ -+/* -+** lsr_wide_7_u8_m_tied1: -+** lsr z0\.b, p0/m, z0\.b, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_7_u8_m_tied1, svuint8_t, -+ z0 = svlsr_wide_n_u8_m (p0, z0, 7), -+ z0 = svlsr_wide_m (p0, z0, 7)) -+ -+/* -+** lsr_wide_7_u8_m_untied: -+** movprfx z0, z1 -+** lsr z0\.b, p0/m, z0\.b, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_7_u8_m_untied, svuint8_t, -+ z0 = svlsr_wide_n_u8_m (p0, z1, 7), -+ z0 = svlsr_wide_m (p0, z1, 7)) -+ -+/* -+** lsr_wide_8_u8_m_tied1: -+** lsr z0\.b, p0/m, z0\.b, #8 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_8_u8_m_tied1, svuint8_t, -+ z0 = svlsr_wide_n_u8_m (p0, z0, 8), -+ z0 = svlsr_wide_m (p0, z0, 8)) -+ -+/* -+** lsr_wide_8_u8_m_untied: -+** movprfx z0, z1 -+** lsr z0\.b, p0/m, z0\.b, #8 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_8_u8_m_untied, svuint8_t, -+ z0 = svlsr_wide_n_u8_m (p0, z1, 8), -+ z0 = svlsr_wide_m (p0, z1, 8)) -+ -+/* -+** lsr_wide_u8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** lsr z0\.b, p0/m, z0\.b, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsr_wide_u8_z_tied1, svuint8_t, svuint64_t, -+ z0 = svlsr_wide_u8_z (p0, z0, z4), -+ z0 = svlsr_wide_z (p0, z0, z4)) -+ -+/* -+** lsr_wide_u8_z_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.b, p0/z, z4\.b -+** lsr z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (lsr_wide_u8_z_tied2, svuint8_t, svuint64_t, -+ z0_res = svlsr_wide_u8_z (p0, z4, z0), -+ z0_res = svlsr_wide_z (p0, z4, z0)) -+ -+/* -+** lsr_wide_u8_z_untied: -+** movprfx z0\.b, p0/z, z1\.b -+** lsr z0\.b, p0/m, z0\.b, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsr_wide_u8_z_untied, svuint8_t, svuint64_t, -+ z0 = svlsr_wide_u8_z (p0, z1, z4), -+ z0 = svlsr_wide_z (p0, z1, z4)) -+ -+/* -+** lsr_wide_x0_u8_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.b, p0/z, z0\.b -+** lsr z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsr_wide_x0_u8_z_tied1, svuint8_t, uint64_t, -+ z0 = svlsr_wide_n_u8_z (p0, z0, x0), -+ z0 = svlsr_wide_z (p0, z0, x0)) -+ -+/* -+** lsr_wide_x0_u8_z_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.b, p0/z, z1\.b -+** lsr z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsr_wide_x0_u8_z_untied, svuint8_t, uint64_t, -+ z0 = svlsr_wide_n_u8_z (p0, z1, x0), -+ z0 = svlsr_wide_z (p0, z1, x0)) -+ -+/* -+** lsr_wide_1_u8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** lsr z0\.b, p0/m, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_1_u8_z_tied1, svuint8_t, -+ z0 = svlsr_wide_n_u8_z (p0, z0, 1), -+ z0 = svlsr_wide_z (p0, z0, 1)) -+ -+/* -+** lsr_wide_1_u8_z_untied: -+** movprfx z0\.b, p0/z, z1\.b -+** lsr z0\.b, p0/m, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_1_u8_z_untied, svuint8_t, -+ z0 = svlsr_wide_n_u8_z (p0, z1, 1), -+ z0 = svlsr_wide_z (p0, z1, 1)) -+ -+/* -+** lsr_wide_7_u8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** lsr z0\.b, p0/m, z0\.b, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_7_u8_z_tied1, svuint8_t, -+ z0 = svlsr_wide_n_u8_z (p0, z0, 7), -+ z0 = svlsr_wide_z (p0, z0, 7)) -+ -+/* -+** lsr_wide_7_u8_z_untied: -+** movprfx z0\.b, p0/z, z1\.b -+** lsr z0\.b, p0/m, z0\.b, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_7_u8_z_untied, svuint8_t, -+ z0 = svlsr_wide_n_u8_z (p0, z1, 7), -+ z0 = svlsr_wide_z (p0, z1, 7)) -+ -+/* -+** lsr_wide_8_u8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** lsr z0\.b, p0/m, z0\.b, #8 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_8_u8_z_tied1, svuint8_t, -+ z0 = svlsr_wide_n_u8_z (p0, z0, 8), -+ z0 = svlsr_wide_z (p0, z0, 8)) -+ -+/* -+** lsr_wide_8_u8_z_untied: -+** movprfx z0\.b, p0/z, z1\.b -+** lsr z0\.b, p0/m, z0\.b, #8 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_8_u8_z_untied, svuint8_t, -+ z0 = svlsr_wide_n_u8_z (p0, z1, 8), -+ z0 = svlsr_wide_z (p0, z1, 8)) -+ -+/* -+** lsr_wide_u8_x_tied1: -+** lsr z0\.b, z0\.b, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsr_wide_u8_x_tied1, svuint8_t, svuint64_t, -+ z0 = svlsr_wide_u8_x (p0, z0, z4), -+ z0 = svlsr_wide_x (p0, z0, z4)) -+ -+/* -+** lsr_wide_u8_x_tied2: -+** lsr z0\.b, z4\.b, z0\.d -+** ret -+*/ -+TEST_DUAL_Z_REV (lsr_wide_u8_x_tied2, svuint8_t, svuint64_t, -+ z0_res = svlsr_wide_u8_x (p0, z4, z0), -+ z0_res = svlsr_wide_x (p0, z4, z0)) -+ -+/* -+** lsr_wide_u8_x_untied: -+** lsr z0\.b, z1\.b, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (lsr_wide_u8_x_untied, svuint8_t, svuint64_t, -+ z0 = svlsr_wide_u8_x (p0, z1, z4), -+ z0 = svlsr_wide_x (p0, z1, z4)) -+ -+/* -+** lsr_wide_x0_u8_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** lsr z0\.b, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsr_wide_x0_u8_x_tied1, svuint8_t, uint64_t, -+ z0 = svlsr_wide_n_u8_x (p0, z0, x0), -+ z0 = svlsr_wide_x (p0, z0, x0)) -+ -+/* -+** lsr_wide_x0_u8_x_untied: -+** mov (z[0-9]+\.d), x0 -+** lsr z0\.b, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (lsr_wide_x0_u8_x_untied, svuint8_t, uint64_t, -+ z0 = svlsr_wide_n_u8_x (p0, z1, x0), -+ z0 = svlsr_wide_x (p0, z1, x0)) -+ -+/* -+** lsr_wide_1_u8_x_tied1: -+** lsr z0\.b, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_1_u8_x_tied1, svuint8_t, -+ z0 = svlsr_wide_n_u8_x (p0, z0, 1), -+ z0 = svlsr_wide_x (p0, z0, 1)) -+ -+/* -+** lsr_wide_1_u8_x_untied: -+** lsr z0\.b, z1\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_1_u8_x_untied, svuint8_t, -+ z0 = svlsr_wide_n_u8_x (p0, z1, 1), -+ z0 = svlsr_wide_x (p0, z1, 1)) -+ -+/* -+** lsr_wide_7_u8_x_tied1: -+** lsr z0\.b, z0\.b, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_7_u8_x_tied1, svuint8_t, -+ z0 = svlsr_wide_n_u8_x (p0, z0, 7), -+ z0 = svlsr_wide_x (p0, z0, 7)) -+ -+/* -+** lsr_wide_7_u8_x_untied: -+** lsr z0\.b, z1\.b, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_7_u8_x_untied, svuint8_t, -+ z0 = svlsr_wide_n_u8_x (p0, z1, 7), -+ z0 = svlsr_wide_x (p0, z1, 7)) -+ -+/* -+** lsr_wide_8_u8_x_tied1: -+** lsr z0\.b, z0\.b, #8 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_8_u8_x_tied1, svuint8_t, -+ z0 = svlsr_wide_n_u8_x (p0, z0, 8), -+ z0 = svlsr_wide_x (p0, z0, 8)) -+ -+/* -+** lsr_wide_8_u8_x_untied: -+** lsr z0\.b, z1\.b, #8 -+** ret -+*/ -+TEST_UNIFORM_Z (lsr_wide_8_u8_x_untied, svuint8_t, -+ z0 = svlsr_wide_n_u8_x (p0, z1, 8), -+ z0 = svlsr_wide_x (p0, z1, 8)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_f16.c -new file mode 100644 -index 000000000..7656f9e54 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_f16.c -@@ -0,0 +1,398 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mad_f16_m_tied1: -+** fmad z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mad_f16_m_tied1, svfloat16_t, -+ z0 = svmad_f16_m (p0, z0, z1, z2), -+ z0 = svmad_m (p0, z0, z1, z2)) -+ -+/* -+** mad_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fmad z0\.h, p0/m, \1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mad_f16_m_tied2, svfloat16_t, -+ z0 = svmad_f16_m (p0, z1, z0, z2), -+ z0 = svmad_m (p0, z1, z0, z2)) -+ -+/* -+** mad_f16_m_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fmad z0\.h, p0/m, z2\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mad_f16_m_tied3, svfloat16_t, -+ z0 = svmad_f16_m (p0, z1, z2, z0), -+ z0 = svmad_m (p0, z1, z2, z0)) -+ -+/* -+** mad_f16_m_untied: -+** movprfx z0, z1 -+** fmad z0\.h, p0/m, z2\.h, z3\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mad_f16_m_untied, svfloat16_t, -+ z0 = svmad_f16_m (p0, z1, z2, z3), -+ z0 = svmad_m (p0, z1, z2, z3)) -+ -+/* -+** mad_h4_f16_m_tied1: -+** mov (z[0-9]+\.h), h4 -+** fmad z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mad_h4_f16_m_tied1, svfloat16_t, __fp16, -+ z0 = svmad_n_f16_m (p0, z0, z1, d4), -+ z0 = svmad_m (p0, z0, z1, d4)) -+ -+/* -+** mad_h4_f16_m_untied: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0, z1 -+** fmad z0\.h, p0/m, z2\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mad_h4_f16_m_untied, svfloat16_t, __fp16, -+ z0 = svmad_n_f16_m (p0, z1, z2, d4), -+ z0 = svmad_m (p0, z1, z2, d4)) -+ -+/* -+** mad_2_f16_m_tied1: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** fmad z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_2_f16_m_tied1, svfloat16_t, -+ z0 = svmad_n_f16_m (p0, z0, z1, 2), -+ z0 = svmad_m (p0, z0, z1, 2)) -+ -+/* -+** mad_2_f16_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** movprfx z0, z1 -+** fmad z0\.h, p0/m, z2\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_2_f16_m_untied, svfloat16_t, -+ z0 = svmad_n_f16_m (p0, z1, z2, 2), -+ z0 = svmad_m (p0, z1, z2, 2)) -+ -+/* -+** mad_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fmad z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mad_f16_z_tied1, svfloat16_t, -+ z0 = svmad_f16_z (p0, z0, z1, z2), -+ z0 = svmad_z (p0, z0, z1, z2)) -+ -+/* -+** mad_f16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** fmad z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mad_f16_z_tied2, svfloat16_t, -+ z0 = svmad_f16_z (p0, z1, z0, z2), -+ z0 = svmad_z (p0, z1, z0, z2)) -+ -+/* -+** mad_f16_z_tied3: -+** movprfx z0\.h, p0/z, z0\.h -+** fmla z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mad_f16_z_tied3, svfloat16_t, -+ z0 = svmad_f16_z (p0, z1, z2, z0), -+ z0 = svmad_z (p0, z1, z2, z0)) -+ -+/* -+** mad_f16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fmad z0\.h, p0/m, z2\.h, z3\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** fmad z0\.h, p0/m, z1\.h, z3\.h -+** | -+** movprfx z0\.h, p0/z, z3\.h -+** fmla z0\.h, p0/m, z1\.h, z2\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mad_f16_z_untied, svfloat16_t, -+ z0 = svmad_f16_z (p0, z1, z2, z3), -+ z0 = svmad_z (p0, z1, z2, z3)) -+ -+/* -+** mad_h4_f16_z_tied1: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0\.h, p0/z, z0\.h -+** fmad z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mad_h4_f16_z_tied1, svfloat16_t, __fp16, -+ z0 = svmad_n_f16_z (p0, z0, z1, d4), -+ z0 = svmad_z (p0, z0, z1, d4)) -+ -+/* -+** mad_h4_f16_z_tied2: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0\.h, p0/z, z0\.h -+** fmad z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mad_h4_f16_z_tied2, svfloat16_t, __fp16, -+ z0 = svmad_n_f16_z (p0, z1, z0, d4), -+ z0 = svmad_z (p0, z1, z0, d4)) -+ -+/* -+** mad_h4_f16_z_untied: -+** mov (z[0-9]+\.h), h4 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fmad z0\.h, p0/m, z2\.h, \1 -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** fmad z0\.h, p0/m, z1\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** fmla z0\.h, p0/m, z1\.h, z2\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (mad_h4_f16_z_untied, svfloat16_t, __fp16, -+ z0 = svmad_n_f16_z (p0, z1, z2, d4), -+ z0 = svmad_z (p0, z1, z2, d4)) -+ -+/* -+** mad_2_f16_z_tied1: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** movprfx z0\.h, p0/z, z0\.h -+** fmad z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_2_f16_z_tied1, svfloat16_t, -+ z0 = svmad_n_f16_z (p0, z0, z1, 2), -+ z0 = svmad_z (p0, z0, z1, 2)) -+ -+/* -+** mad_2_f16_z_tied2: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** movprfx z0\.h, p0/z, z0\.h -+** fmad z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_2_f16_z_tied2, svfloat16_t, -+ z0 = svmad_n_f16_z (p0, z1, z0, 2), -+ z0 = svmad_z (p0, z1, z0, 2)) -+ -+/* -+** mad_2_f16_z_untied: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fmad z0\.h, p0/m, z2\.h, \1 -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** fmad z0\.h, p0/m, z1\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** fmla z0\.h, p0/m, z1\.h, z2\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mad_2_f16_z_untied, svfloat16_t, -+ z0 = svmad_n_f16_z (p0, z1, z2, 2), -+ z0 = svmad_z (p0, z1, z2, 2)) -+ -+/* -+** mad_f16_x_tied1: -+** fmad z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mad_f16_x_tied1, svfloat16_t, -+ z0 = svmad_f16_x (p0, z0, z1, z2), -+ z0 = svmad_x (p0, z0, z1, z2)) -+ -+/* -+** mad_f16_x_tied2: -+** fmad z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mad_f16_x_tied2, svfloat16_t, -+ z0 = svmad_f16_x (p0, z1, z0, z2), -+ z0 = svmad_x (p0, z1, z0, z2)) -+ -+/* -+** mad_f16_x_tied3: -+** fmla z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mad_f16_x_tied3, svfloat16_t, -+ z0 = svmad_f16_x (p0, z1, z2, z0), -+ z0 = svmad_x (p0, z1, z2, z0)) -+ -+/* -+** mad_f16_x_untied: -+** ( -+** movprfx z0, z1 -+** fmad z0\.h, p0/m, z2\.h, z3\.h -+** | -+** movprfx z0, z2 -+** fmad z0\.h, p0/m, z1\.h, z3\.h -+** | -+** movprfx z0, z3 -+** fmla z0\.h, p0/m, z1\.h, z2\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mad_f16_x_untied, svfloat16_t, -+ z0 = svmad_f16_x (p0, z1, z2, z3), -+ z0 = svmad_x (p0, z1, z2, z3)) -+ -+/* -+** mad_h4_f16_x_tied1: -+** mov (z[0-9]+\.h), h4 -+** fmad z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mad_h4_f16_x_tied1, svfloat16_t, __fp16, -+ z0 = svmad_n_f16_x (p0, z0, z1, d4), -+ z0 = svmad_x (p0, z0, z1, d4)) -+ -+/* -+** mad_h4_f16_x_tied2: -+** mov (z[0-9]+\.h), h4 -+** fmad z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mad_h4_f16_x_tied2, svfloat16_t, __fp16, -+ z0 = svmad_n_f16_x (p0, z1, z0, d4), -+ z0 = svmad_x (p0, z1, z0, d4)) -+ -+/* -+** mad_h4_f16_x_untied: { xfail *-*-* } -+** mov z0\.h, h4 -+** fmla z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_ZD (mad_h4_f16_x_untied, svfloat16_t, __fp16, -+ z0 = svmad_n_f16_x (p0, z1, z2, d4), -+ z0 = svmad_x (p0, z1, z2, d4)) -+ -+/* -+** mad_2_f16_x_tied1: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** fmad z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_2_f16_x_tied1, svfloat16_t, -+ z0 = svmad_n_f16_x (p0, z0, z1, 2), -+ z0 = svmad_x (p0, z0, z1, 2)) -+ -+/* -+** mad_2_f16_x_tied2: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** fmad z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_2_f16_x_tied2, svfloat16_t, -+ z0 = svmad_n_f16_x (p0, z1, z0, 2), -+ z0 = svmad_x (p0, z1, z0, 2)) -+ -+/* -+** mad_2_f16_x_untied: -+** fmov z0\.h, #2\.0(?:e\+0)? -+** fmla z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mad_2_f16_x_untied, svfloat16_t, -+ z0 = svmad_n_f16_x (p0, z1, z2, 2), -+ z0 = svmad_x (p0, z1, z2, 2)) -+ -+/* -+** ptrue_mad_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mad_f16_x_tied1, svfloat16_t, -+ z0 = svmad_f16_x (svptrue_b16 (), z0, z1, z2), -+ z0 = svmad_x (svptrue_b16 (), z0, z1, z2)) -+ -+/* -+** ptrue_mad_f16_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mad_f16_x_tied2, svfloat16_t, -+ z0 = svmad_f16_x (svptrue_b16 (), z1, z0, z2), -+ z0 = svmad_x (svptrue_b16 (), z1, z0, z2)) -+ -+/* -+** ptrue_mad_f16_x_tied3: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mad_f16_x_tied3, svfloat16_t, -+ z0 = svmad_f16_x (svptrue_b16 (), z1, z2, z0), -+ z0 = svmad_x (svptrue_b16 (), z1, z2, z0)) -+ -+/* -+** ptrue_mad_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mad_f16_x_untied, svfloat16_t, -+ z0 = svmad_f16_x (svptrue_b16 (), z1, z2, z3), -+ z0 = svmad_x (svptrue_b16 (), z1, z2, z3)) -+ -+/* -+** ptrue_mad_2_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mad_2_f16_x_tied1, svfloat16_t, -+ z0 = svmad_n_f16_x (svptrue_b16 (), z0, z1, 2), -+ z0 = svmad_x (svptrue_b16 (), z0, z1, 2)) -+ -+/* -+** ptrue_mad_2_f16_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mad_2_f16_x_tied2, svfloat16_t, -+ z0 = svmad_n_f16_x (svptrue_b16 (), z1, z0, 2), -+ z0 = svmad_x (svptrue_b16 (), z1, z0, 2)) -+ -+/* -+** ptrue_mad_2_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mad_2_f16_x_untied, svfloat16_t, -+ z0 = svmad_n_f16_x (svptrue_b16 (), z1, z2, 2), -+ z0 = svmad_x (svptrue_b16 (), z1, z2, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_f32.c -new file mode 100644 -index 000000000..dbdd2b9d1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_f32.c -@@ -0,0 +1,398 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mad_f32_m_tied1: -+** fmad z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mad_f32_m_tied1, svfloat32_t, -+ z0 = svmad_f32_m (p0, z0, z1, z2), -+ z0 = svmad_m (p0, z0, z1, z2)) -+ -+/* -+** mad_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fmad z0\.s, p0/m, \1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mad_f32_m_tied2, svfloat32_t, -+ z0 = svmad_f32_m (p0, z1, z0, z2), -+ z0 = svmad_m (p0, z1, z0, z2)) -+ -+/* -+** mad_f32_m_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fmad z0\.s, p0/m, z2\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mad_f32_m_tied3, svfloat32_t, -+ z0 = svmad_f32_m (p0, z1, z2, z0), -+ z0 = svmad_m (p0, z1, z2, z0)) -+ -+/* -+** mad_f32_m_untied: -+** movprfx z0, z1 -+** fmad z0\.s, p0/m, z2\.s, z3\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mad_f32_m_untied, svfloat32_t, -+ z0 = svmad_f32_m (p0, z1, z2, z3), -+ z0 = svmad_m (p0, z1, z2, z3)) -+ -+/* -+** mad_s4_f32_m_tied1: -+** mov (z[0-9]+\.s), s4 -+** fmad z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mad_s4_f32_m_tied1, svfloat32_t, float, -+ z0 = svmad_n_f32_m (p0, z0, z1, d4), -+ z0 = svmad_m (p0, z0, z1, d4)) -+ -+/* -+** mad_s4_f32_m_untied: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0, z1 -+** fmad z0\.s, p0/m, z2\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mad_s4_f32_m_untied, svfloat32_t, float, -+ z0 = svmad_n_f32_m (p0, z1, z2, d4), -+ z0 = svmad_m (p0, z1, z2, d4)) -+ -+/* -+** mad_2_f32_m_tied1: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** fmad z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_2_f32_m_tied1, svfloat32_t, -+ z0 = svmad_n_f32_m (p0, z0, z1, 2), -+ z0 = svmad_m (p0, z0, z1, 2)) -+ -+/* -+** mad_2_f32_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** movprfx z0, z1 -+** fmad z0\.s, p0/m, z2\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_2_f32_m_untied, svfloat32_t, -+ z0 = svmad_n_f32_m (p0, z1, z2, 2), -+ z0 = svmad_m (p0, z1, z2, 2)) -+ -+/* -+** mad_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fmad z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mad_f32_z_tied1, svfloat32_t, -+ z0 = svmad_f32_z (p0, z0, z1, z2), -+ z0 = svmad_z (p0, z0, z1, z2)) -+ -+/* -+** mad_f32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** fmad z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mad_f32_z_tied2, svfloat32_t, -+ z0 = svmad_f32_z (p0, z1, z0, z2), -+ z0 = svmad_z (p0, z1, z0, z2)) -+ -+/* -+** mad_f32_z_tied3: -+** movprfx z0\.s, p0/z, z0\.s -+** fmla z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mad_f32_z_tied3, svfloat32_t, -+ z0 = svmad_f32_z (p0, z1, z2, z0), -+ z0 = svmad_z (p0, z1, z2, z0)) -+ -+/* -+** mad_f32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fmad z0\.s, p0/m, z2\.s, z3\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** fmad z0\.s, p0/m, z1\.s, z3\.s -+** | -+** movprfx z0\.s, p0/z, z3\.s -+** fmla z0\.s, p0/m, z1\.s, z2\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mad_f32_z_untied, svfloat32_t, -+ z0 = svmad_f32_z (p0, z1, z2, z3), -+ z0 = svmad_z (p0, z1, z2, z3)) -+ -+/* -+** mad_s4_f32_z_tied1: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0\.s, p0/z, z0\.s -+** fmad z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mad_s4_f32_z_tied1, svfloat32_t, float, -+ z0 = svmad_n_f32_z (p0, z0, z1, d4), -+ z0 = svmad_z (p0, z0, z1, d4)) -+ -+/* -+** mad_s4_f32_z_tied2: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0\.s, p0/z, z0\.s -+** fmad z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mad_s4_f32_z_tied2, svfloat32_t, float, -+ z0 = svmad_n_f32_z (p0, z1, z0, d4), -+ z0 = svmad_z (p0, z1, z0, d4)) -+ -+/* -+** mad_s4_f32_z_untied: -+** mov (z[0-9]+\.s), s4 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fmad z0\.s, p0/m, z2\.s, \1 -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** fmad z0\.s, p0/m, z1\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** fmla z0\.s, p0/m, z1\.s, z2\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (mad_s4_f32_z_untied, svfloat32_t, float, -+ z0 = svmad_n_f32_z (p0, z1, z2, d4), -+ z0 = svmad_z (p0, z1, z2, d4)) -+ -+/* -+** mad_2_f32_z_tied1: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** movprfx z0\.s, p0/z, z0\.s -+** fmad z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_2_f32_z_tied1, svfloat32_t, -+ z0 = svmad_n_f32_z (p0, z0, z1, 2), -+ z0 = svmad_z (p0, z0, z1, 2)) -+ -+/* -+** mad_2_f32_z_tied2: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** movprfx z0\.s, p0/z, z0\.s -+** fmad z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_2_f32_z_tied2, svfloat32_t, -+ z0 = svmad_n_f32_z (p0, z1, z0, 2), -+ z0 = svmad_z (p0, z1, z0, 2)) -+ -+/* -+** mad_2_f32_z_untied: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fmad z0\.s, p0/m, z2\.s, \1 -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** fmad z0\.s, p0/m, z1\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** fmla z0\.s, p0/m, z1\.s, z2\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mad_2_f32_z_untied, svfloat32_t, -+ z0 = svmad_n_f32_z (p0, z1, z2, 2), -+ z0 = svmad_z (p0, z1, z2, 2)) -+ -+/* -+** mad_f32_x_tied1: -+** fmad z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mad_f32_x_tied1, svfloat32_t, -+ z0 = svmad_f32_x (p0, z0, z1, z2), -+ z0 = svmad_x (p0, z0, z1, z2)) -+ -+/* -+** mad_f32_x_tied2: -+** fmad z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mad_f32_x_tied2, svfloat32_t, -+ z0 = svmad_f32_x (p0, z1, z0, z2), -+ z0 = svmad_x (p0, z1, z0, z2)) -+ -+/* -+** mad_f32_x_tied3: -+** fmla z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mad_f32_x_tied3, svfloat32_t, -+ z0 = svmad_f32_x (p0, z1, z2, z0), -+ z0 = svmad_x (p0, z1, z2, z0)) -+ -+/* -+** mad_f32_x_untied: -+** ( -+** movprfx z0, z1 -+** fmad z0\.s, p0/m, z2\.s, z3\.s -+** | -+** movprfx z0, z2 -+** fmad z0\.s, p0/m, z1\.s, z3\.s -+** | -+** movprfx z0, z3 -+** fmla z0\.s, p0/m, z1\.s, z2\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mad_f32_x_untied, svfloat32_t, -+ z0 = svmad_f32_x (p0, z1, z2, z3), -+ z0 = svmad_x (p0, z1, z2, z3)) -+ -+/* -+** mad_s4_f32_x_tied1: -+** mov (z[0-9]+\.s), s4 -+** fmad z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mad_s4_f32_x_tied1, svfloat32_t, float, -+ z0 = svmad_n_f32_x (p0, z0, z1, d4), -+ z0 = svmad_x (p0, z0, z1, d4)) -+ -+/* -+** mad_s4_f32_x_tied2: -+** mov (z[0-9]+\.s), s4 -+** fmad z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mad_s4_f32_x_tied2, svfloat32_t, float, -+ z0 = svmad_n_f32_x (p0, z1, z0, d4), -+ z0 = svmad_x (p0, z1, z0, d4)) -+ -+/* -+** mad_s4_f32_x_untied: { xfail *-*-* } -+** mov z0\.s, s4 -+** fmla z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_ZD (mad_s4_f32_x_untied, svfloat32_t, float, -+ z0 = svmad_n_f32_x (p0, z1, z2, d4), -+ z0 = svmad_x (p0, z1, z2, d4)) -+ -+/* -+** mad_2_f32_x_tied1: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** fmad z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_2_f32_x_tied1, svfloat32_t, -+ z0 = svmad_n_f32_x (p0, z0, z1, 2), -+ z0 = svmad_x (p0, z0, z1, 2)) -+ -+/* -+** mad_2_f32_x_tied2: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** fmad z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_2_f32_x_tied2, svfloat32_t, -+ z0 = svmad_n_f32_x (p0, z1, z0, 2), -+ z0 = svmad_x (p0, z1, z0, 2)) -+ -+/* -+** mad_2_f32_x_untied: -+** fmov z0\.s, #2\.0(?:e\+0)? -+** fmla z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mad_2_f32_x_untied, svfloat32_t, -+ z0 = svmad_n_f32_x (p0, z1, z2, 2), -+ z0 = svmad_x (p0, z1, z2, 2)) -+ -+/* -+** ptrue_mad_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mad_f32_x_tied1, svfloat32_t, -+ z0 = svmad_f32_x (svptrue_b32 (), z0, z1, z2), -+ z0 = svmad_x (svptrue_b32 (), z0, z1, z2)) -+ -+/* -+** ptrue_mad_f32_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mad_f32_x_tied2, svfloat32_t, -+ z0 = svmad_f32_x (svptrue_b32 (), z1, z0, z2), -+ z0 = svmad_x (svptrue_b32 (), z1, z0, z2)) -+ -+/* -+** ptrue_mad_f32_x_tied3: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mad_f32_x_tied3, svfloat32_t, -+ z0 = svmad_f32_x (svptrue_b32 (), z1, z2, z0), -+ z0 = svmad_x (svptrue_b32 (), z1, z2, z0)) -+ -+/* -+** ptrue_mad_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mad_f32_x_untied, svfloat32_t, -+ z0 = svmad_f32_x (svptrue_b32 (), z1, z2, z3), -+ z0 = svmad_x (svptrue_b32 (), z1, z2, z3)) -+ -+/* -+** ptrue_mad_2_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mad_2_f32_x_tied1, svfloat32_t, -+ z0 = svmad_n_f32_x (svptrue_b32 (), z0, z1, 2), -+ z0 = svmad_x (svptrue_b32 (), z0, z1, 2)) -+ -+/* -+** ptrue_mad_2_f32_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mad_2_f32_x_tied2, svfloat32_t, -+ z0 = svmad_n_f32_x (svptrue_b32 (), z1, z0, 2), -+ z0 = svmad_x (svptrue_b32 (), z1, z0, 2)) -+ -+/* -+** ptrue_mad_2_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mad_2_f32_x_untied, svfloat32_t, -+ z0 = svmad_n_f32_x (svptrue_b32 (), z1, z2, 2), -+ z0 = svmad_x (svptrue_b32 (), z1, z2, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_f64.c -new file mode 100644 -index 000000000..978281295 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_f64.c -@@ -0,0 +1,398 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mad_f64_m_tied1: -+** fmad z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mad_f64_m_tied1, svfloat64_t, -+ z0 = svmad_f64_m (p0, z0, z1, z2), -+ z0 = svmad_m (p0, z0, z1, z2)) -+ -+/* -+** mad_f64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fmad z0\.d, p0/m, \1, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mad_f64_m_tied2, svfloat64_t, -+ z0 = svmad_f64_m (p0, z1, z0, z2), -+ z0 = svmad_m (p0, z1, z0, z2)) -+ -+/* -+** mad_f64_m_tied3: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fmad z0\.d, p0/m, z2\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_f64_m_tied3, svfloat64_t, -+ z0 = svmad_f64_m (p0, z1, z2, z0), -+ z0 = svmad_m (p0, z1, z2, z0)) -+ -+/* -+** mad_f64_m_untied: -+** movprfx z0, z1 -+** fmad z0\.d, p0/m, z2\.d, z3\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mad_f64_m_untied, svfloat64_t, -+ z0 = svmad_f64_m (p0, z1, z2, z3), -+ z0 = svmad_m (p0, z1, z2, z3)) -+ -+/* -+** mad_d4_f64_m_tied1: -+** mov (z[0-9]+\.d), d4 -+** fmad z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mad_d4_f64_m_tied1, svfloat64_t, double, -+ z0 = svmad_n_f64_m (p0, z0, z1, d4), -+ z0 = svmad_m (p0, z0, z1, d4)) -+ -+/* -+** mad_d4_f64_m_untied: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0, z1 -+** fmad z0\.d, p0/m, z2\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mad_d4_f64_m_untied, svfloat64_t, double, -+ z0 = svmad_n_f64_m (p0, z1, z2, d4), -+ z0 = svmad_m (p0, z1, z2, d4)) -+ -+/* -+** mad_2_f64_m_tied1: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** fmad z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_2_f64_m_tied1, svfloat64_t, -+ z0 = svmad_n_f64_m (p0, z0, z1, 2), -+ z0 = svmad_m (p0, z0, z1, 2)) -+ -+/* -+** mad_2_f64_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** movprfx z0, z1 -+** fmad z0\.d, p0/m, z2\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_2_f64_m_untied, svfloat64_t, -+ z0 = svmad_n_f64_m (p0, z1, z2, 2), -+ z0 = svmad_m (p0, z1, z2, 2)) -+ -+/* -+** mad_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fmad z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mad_f64_z_tied1, svfloat64_t, -+ z0 = svmad_f64_z (p0, z0, z1, z2), -+ z0 = svmad_z (p0, z0, z1, z2)) -+ -+/* -+** mad_f64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** fmad z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mad_f64_z_tied2, svfloat64_t, -+ z0 = svmad_f64_z (p0, z1, z0, z2), -+ z0 = svmad_z (p0, z1, z0, z2)) -+ -+/* -+** mad_f64_z_tied3: -+** movprfx z0\.d, p0/z, z0\.d -+** fmla z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mad_f64_z_tied3, svfloat64_t, -+ z0 = svmad_f64_z (p0, z1, z2, z0), -+ z0 = svmad_z (p0, z1, z2, z0)) -+ -+/* -+** mad_f64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fmad z0\.d, p0/m, z2\.d, z3\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** fmad z0\.d, p0/m, z1\.d, z3\.d -+** | -+** movprfx z0\.d, p0/z, z3\.d -+** fmla z0\.d, p0/m, z1\.d, z2\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mad_f64_z_untied, svfloat64_t, -+ z0 = svmad_f64_z (p0, z1, z2, z3), -+ z0 = svmad_z (p0, z1, z2, z3)) -+ -+/* -+** mad_d4_f64_z_tied1: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0\.d, p0/z, z0\.d -+** fmad z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mad_d4_f64_z_tied1, svfloat64_t, double, -+ z0 = svmad_n_f64_z (p0, z0, z1, d4), -+ z0 = svmad_z (p0, z0, z1, d4)) -+ -+/* -+** mad_d4_f64_z_tied2: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0\.d, p0/z, z0\.d -+** fmad z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mad_d4_f64_z_tied2, svfloat64_t, double, -+ z0 = svmad_n_f64_z (p0, z1, z0, d4), -+ z0 = svmad_z (p0, z1, z0, d4)) -+ -+/* -+** mad_d4_f64_z_untied: -+** mov (z[0-9]+\.d), d4 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fmad z0\.d, p0/m, z2\.d, \1 -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** fmad z0\.d, p0/m, z1\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** fmla z0\.d, p0/m, z1\.d, z2\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (mad_d4_f64_z_untied, svfloat64_t, double, -+ z0 = svmad_n_f64_z (p0, z1, z2, d4), -+ z0 = svmad_z (p0, z1, z2, d4)) -+ -+/* -+** mad_2_f64_z_tied1: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** movprfx z0\.d, p0/z, z0\.d -+** fmad z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_2_f64_z_tied1, svfloat64_t, -+ z0 = svmad_n_f64_z (p0, z0, z1, 2), -+ z0 = svmad_z (p0, z0, z1, 2)) -+ -+/* -+** mad_2_f64_z_tied2: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** movprfx z0\.d, p0/z, z0\.d -+** fmad z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_2_f64_z_tied2, svfloat64_t, -+ z0 = svmad_n_f64_z (p0, z1, z0, 2), -+ z0 = svmad_z (p0, z1, z0, 2)) -+ -+/* -+** mad_2_f64_z_untied: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fmad z0\.d, p0/m, z2\.d, \1 -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** fmad z0\.d, p0/m, z1\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** fmla z0\.d, p0/m, z1\.d, z2\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mad_2_f64_z_untied, svfloat64_t, -+ z0 = svmad_n_f64_z (p0, z1, z2, 2), -+ z0 = svmad_z (p0, z1, z2, 2)) -+ -+/* -+** mad_f64_x_tied1: -+** fmad z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mad_f64_x_tied1, svfloat64_t, -+ z0 = svmad_f64_x (p0, z0, z1, z2), -+ z0 = svmad_x (p0, z0, z1, z2)) -+ -+/* -+** mad_f64_x_tied2: -+** fmad z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mad_f64_x_tied2, svfloat64_t, -+ z0 = svmad_f64_x (p0, z1, z0, z2), -+ z0 = svmad_x (p0, z1, z0, z2)) -+ -+/* -+** mad_f64_x_tied3: -+** fmla z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mad_f64_x_tied3, svfloat64_t, -+ z0 = svmad_f64_x (p0, z1, z2, z0), -+ z0 = svmad_x (p0, z1, z2, z0)) -+ -+/* -+** mad_f64_x_untied: -+** ( -+** movprfx z0, z1 -+** fmad z0\.d, p0/m, z2\.d, z3\.d -+** | -+** movprfx z0, z2 -+** fmad z0\.d, p0/m, z1\.d, z3\.d -+** | -+** movprfx z0, z3 -+** fmla z0\.d, p0/m, z1\.d, z2\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mad_f64_x_untied, svfloat64_t, -+ z0 = svmad_f64_x (p0, z1, z2, z3), -+ z0 = svmad_x (p0, z1, z2, z3)) -+ -+/* -+** mad_d4_f64_x_tied1: -+** mov (z[0-9]+\.d), d4 -+** fmad z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mad_d4_f64_x_tied1, svfloat64_t, double, -+ z0 = svmad_n_f64_x (p0, z0, z1, d4), -+ z0 = svmad_x (p0, z0, z1, d4)) -+ -+/* -+** mad_d4_f64_x_tied2: -+** mov (z[0-9]+\.d), d4 -+** fmad z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mad_d4_f64_x_tied2, svfloat64_t, double, -+ z0 = svmad_n_f64_x (p0, z1, z0, d4), -+ z0 = svmad_x (p0, z1, z0, d4)) -+ -+/* -+** mad_d4_f64_x_untied: { xfail *-*-* } -+** mov z0\.d, d4 -+** fmla z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_ZD (mad_d4_f64_x_untied, svfloat64_t, double, -+ z0 = svmad_n_f64_x (p0, z1, z2, d4), -+ z0 = svmad_x (p0, z1, z2, d4)) -+ -+/* -+** mad_2_f64_x_tied1: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** fmad z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_2_f64_x_tied1, svfloat64_t, -+ z0 = svmad_n_f64_x (p0, z0, z1, 2), -+ z0 = svmad_x (p0, z0, z1, 2)) -+ -+/* -+** mad_2_f64_x_tied2: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** fmad z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_2_f64_x_tied2, svfloat64_t, -+ z0 = svmad_n_f64_x (p0, z1, z0, 2), -+ z0 = svmad_x (p0, z1, z0, 2)) -+ -+/* -+** mad_2_f64_x_untied: -+** fmov z0\.d, #2\.0(?:e\+0)? -+** fmla z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mad_2_f64_x_untied, svfloat64_t, -+ z0 = svmad_n_f64_x (p0, z1, z2, 2), -+ z0 = svmad_x (p0, z1, z2, 2)) -+ -+/* -+** ptrue_mad_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mad_f64_x_tied1, svfloat64_t, -+ z0 = svmad_f64_x (svptrue_b64 (), z0, z1, z2), -+ z0 = svmad_x (svptrue_b64 (), z0, z1, z2)) -+ -+/* -+** ptrue_mad_f64_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mad_f64_x_tied2, svfloat64_t, -+ z0 = svmad_f64_x (svptrue_b64 (), z1, z0, z2), -+ z0 = svmad_x (svptrue_b64 (), z1, z0, z2)) -+ -+/* -+** ptrue_mad_f64_x_tied3: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mad_f64_x_tied3, svfloat64_t, -+ z0 = svmad_f64_x (svptrue_b64 (), z1, z2, z0), -+ z0 = svmad_x (svptrue_b64 (), z1, z2, z0)) -+ -+/* -+** ptrue_mad_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mad_f64_x_untied, svfloat64_t, -+ z0 = svmad_f64_x (svptrue_b64 (), z1, z2, z3), -+ z0 = svmad_x (svptrue_b64 (), z1, z2, z3)) -+ -+/* -+** ptrue_mad_2_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mad_2_f64_x_tied1, svfloat64_t, -+ z0 = svmad_n_f64_x (svptrue_b64 (), z0, z1, 2), -+ z0 = svmad_x (svptrue_b64 (), z0, z1, 2)) -+ -+/* -+** ptrue_mad_2_f64_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mad_2_f64_x_tied2, svfloat64_t, -+ z0 = svmad_n_f64_x (svptrue_b64 (), z1, z0, 2), -+ z0 = svmad_x (svptrue_b64 (), z1, z0, 2)) -+ -+/* -+** ptrue_mad_2_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mad_2_f64_x_untied, svfloat64_t, -+ z0 = svmad_n_f64_x (svptrue_b64 (), z1, z2, 2), -+ z0 = svmad_x (svptrue_b64 (), z1, z2, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_s16.c -new file mode 100644 -index 000000000..02a6d4588 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_s16.c -@@ -0,0 +1,321 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mad_s16_m_tied1: -+** mad z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mad_s16_m_tied1, svint16_t, -+ z0 = svmad_s16_m (p0, z0, z1, z2), -+ z0 = svmad_m (p0, z0, z1, z2)) -+ -+/* -+** mad_s16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** mad z0\.h, p0/m, \1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mad_s16_m_tied2, svint16_t, -+ z0 = svmad_s16_m (p0, z1, z0, z2), -+ z0 = svmad_m (p0, z1, z0, z2)) -+ -+/* -+** mad_s16_m_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** mad z0\.h, p0/m, z2\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mad_s16_m_tied3, svint16_t, -+ z0 = svmad_s16_m (p0, z1, z2, z0), -+ z0 = svmad_m (p0, z1, z2, z0)) -+ -+/* -+** mad_s16_m_untied: -+** movprfx z0, z1 -+** mad z0\.h, p0/m, z2\.h, z3\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mad_s16_m_untied, svint16_t, -+ z0 = svmad_s16_m (p0, z1, z2, z3), -+ z0 = svmad_m (p0, z1, z2, z3)) -+ -+/* -+** mad_w0_s16_m_tied1: -+** mov (z[0-9]+\.h), w0 -+** mad z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_w0_s16_m_tied1, svint16_t, int16_t, -+ z0 = svmad_n_s16_m (p0, z0, z1, x0), -+ z0 = svmad_m (p0, z0, z1, x0)) -+ -+/* -+** mad_w0_s16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), w0 -+** movprfx z0, z1 -+** mad z0\.h, p0/m, z2\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_w0_s16_m_untied, svint16_t, int16_t, -+ z0 = svmad_n_s16_m (p0, z1, z2, x0), -+ z0 = svmad_m (p0, z1, z2, x0)) -+ -+/* -+** mad_11_s16_m_tied1: -+** mov (z[0-9]+\.h), #11 -+** mad z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_s16_m_tied1, svint16_t, -+ z0 = svmad_n_s16_m (p0, z0, z1, 11), -+ z0 = svmad_m (p0, z0, z1, 11)) -+ -+/* -+** mad_11_s16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), #11 -+** movprfx z0, z1 -+** mad z0\.h, p0/m, z2\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_s16_m_untied, svint16_t, -+ z0 = svmad_n_s16_m (p0, z1, z2, 11), -+ z0 = svmad_m (p0, z1, z2, 11)) -+ -+/* -+** mad_s16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** mad z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mad_s16_z_tied1, svint16_t, -+ z0 = svmad_s16_z (p0, z0, z1, z2), -+ z0 = svmad_z (p0, z0, z1, z2)) -+ -+/* -+** mad_s16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** mad z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mad_s16_z_tied2, svint16_t, -+ z0 = svmad_s16_z (p0, z1, z0, z2), -+ z0 = svmad_z (p0, z1, z0, z2)) -+ -+/* -+** mad_s16_z_tied3: -+** movprfx z0\.h, p0/z, z0\.h -+** mla z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mad_s16_z_tied3, svint16_t, -+ z0 = svmad_s16_z (p0, z1, z2, z0), -+ z0 = svmad_z (p0, z1, z2, z0)) -+ -+/* -+** mad_s16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** mad z0\.h, p0/m, z2\.h, z3\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** mad z0\.h, p0/m, z1\.h, z3\.h -+** | -+** movprfx z0\.h, p0/z, z3\.h -+** mla z0\.h, p0/m, z1\.h, z2\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mad_s16_z_untied, svint16_t, -+ z0 = svmad_s16_z (p0, z1, z2, z3), -+ z0 = svmad_z (p0, z1, z2, z3)) -+ -+/* -+** mad_w0_s16_z_tied1: -+** mov (z[0-9]+\.h), w0 -+** movprfx z0\.h, p0/z, z0\.h -+** mad z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_w0_s16_z_tied1, svint16_t, int16_t, -+ z0 = svmad_n_s16_z (p0, z0, z1, x0), -+ z0 = svmad_z (p0, z0, z1, x0)) -+ -+/* -+** mad_w0_s16_z_tied2: -+** mov (z[0-9]+\.h), w0 -+** movprfx z0\.h, p0/z, z0\.h -+** mad z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_w0_s16_z_tied2, svint16_t, int16_t, -+ z0 = svmad_n_s16_z (p0, z1, z0, x0), -+ z0 = svmad_z (p0, z1, z0, x0)) -+ -+/* -+** mad_w0_s16_z_untied: -+** mov (z[0-9]+\.h), w0 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** mad z0\.h, p0/m, z2\.h, \1 -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** mad z0\.h, p0/m, z1\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** mla z0\.h, p0/m, z1\.h, z2\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_w0_s16_z_untied, svint16_t, int16_t, -+ z0 = svmad_n_s16_z (p0, z1, z2, x0), -+ z0 = svmad_z (p0, z1, z2, x0)) -+ -+/* -+** mad_11_s16_z_tied1: -+** mov (z[0-9]+\.h), #11 -+** movprfx z0\.h, p0/z, z0\.h -+** mad z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_s16_z_tied1, svint16_t, -+ z0 = svmad_n_s16_z (p0, z0, z1, 11), -+ z0 = svmad_z (p0, z0, z1, 11)) -+ -+/* -+** mad_11_s16_z_tied2: -+** mov (z[0-9]+\.h), #11 -+** movprfx z0\.h, p0/z, z0\.h -+** mad z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_s16_z_tied2, svint16_t, -+ z0 = svmad_n_s16_z (p0, z1, z0, 11), -+ z0 = svmad_z (p0, z1, z0, 11)) -+ -+/* -+** mad_11_s16_z_untied: -+** mov (z[0-9]+\.h), #11 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** mad z0\.h, p0/m, z2\.h, \1 -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** mad z0\.h, p0/m, z1\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** mla z0\.h, p0/m, z1\.h, z2\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_s16_z_untied, svint16_t, -+ z0 = svmad_n_s16_z (p0, z1, z2, 11), -+ z0 = svmad_z (p0, z1, z2, 11)) -+ -+/* -+** mad_s16_x_tied1: -+** mad z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mad_s16_x_tied1, svint16_t, -+ z0 = svmad_s16_x (p0, z0, z1, z2), -+ z0 = svmad_x (p0, z0, z1, z2)) -+ -+/* -+** mad_s16_x_tied2: -+** mad z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mad_s16_x_tied2, svint16_t, -+ z0 = svmad_s16_x (p0, z1, z0, z2), -+ z0 = svmad_x (p0, z1, z0, z2)) -+ -+/* -+** mad_s16_x_tied3: -+** mla z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mad_s16_x_tied3, svint16_t, -+ z0 = svmad_s16_x (p0, z1, z2, z0), -+ z0 = svmad_x (p0, z1, z2, z0)) -+ -+/* -+** mad_s16_x_untied: -+** ( -+** movprfx z0, z1 -+** mad z0\.h, p0/m, z2\.h, z3\.h -+** | -+** movprfx z0, z2 -+** mad z0\.h, p0/m, z1\.h, z3\.h -+** | -+** movprfx z0, z3 -+** mla z0\.h, p0/m, z1\.h, z2\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mad_s16_x_untied, svint16_t, -+ z0 = svmad_s16_x (p0, z1, z2, z3), -+ z0 = svmad_x (p0, z1, z2, z3)) -+ -+/* -+** mad_w0_s16_x_tied1: -+** mov (z[0-9]+\.h), w0 -+** mad z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_w0_s16_x_tied1, svint16_t, int16_t, -+ z0 = svmad_n_s16_x (p0, z0, z1, x0), -+ z0 = svmad_x (p0, z0, z1, x0)) -+ -+/* -+** mad_w0_s16_x_tied2: -+** mov (z[0-9]+\.h), w0 -+** mad z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_w0_s16_x_tied2, svint16_t, int16_t, -+ z0 = svmad_n_s16_x (p0, z1, z0, x0), -+ z0 = svmad_x (p0, z1, z0, x0)) -+ -+/* -+** mad_w0_s16_x_untied: -+** mov z0\.h, w0 -+** mla z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_w0_s16_x_untied, svint16_t, int16_t, -+ z0 = svmad_n_s16_x (p0, z1, z2, x0), -+ z0 = svmad_x (p0, z1, z2, x0)) -+ -+/* -+** mad_11_s16_x_tied1: -+** mov (z[0-9]+\.h), #11 -+** mad z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_s16_x_tied1, svint16_t, -+ z0 = svmad_n_s16_x (p0, z0, z1, 11), -+ z0 = svmad_x (p0, z0, z1, 11)) -+ -+/* -+** mad_11_s16_x_tied2: -+** mov (z[0-9]+\.h), #11 -+** mad z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_s16_x_tied2, svint16_t, -+ z0 = svmad_n_s16_x (p0, z1, z0, 11), -+ z0 = svmad_x (p0, z1, z0, 11)) -+ -+/* -+** mad_11_s16_x_untied: -+** mov z0\.h, #11 -+** mla z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_s16_x_untied, svint16_t, -+ z0 = svmad_n_s16_x (p0, z1, z2, 11), -+ z0 = svmad_x (p0, z1, z2, 11)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_s32.c -new file mode 100644 -index 000000000..d676a0c11 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_s32.c -@@ -0,0 +1,321 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mad_s32_m_tied1: -+** mad z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mad_s32_m_tied1, svint32_t, -+ z0 = svmad_s32_m (p0, z0, z1, z2), -+ z0 = svmad_m (p0, z0, z1, z2)) -+ -+/* -+** mad_s32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** mad z0\.s, p0/m, \1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mad_s32_m_tied2, svint32_t, -+ z0 = svmad_s32_m (p0, z1, z0, z2), -+ z0 = svmad_m (p0, z1, z0, z2)) -+ -+/* -+** mad_s32_m_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** mad z0\.s, p0/m, z2\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mad_s32_m_tied3, svint32_t, -+ z0 = svmad_s32_m (p0, z1, z2, z0), -+ z0 = svmad_m (p0, z1, z2, z0)) -+ -+/* -+** mad_s32_m_untied: -+** movprfx z0, z1 -+** mad z0\.s, p0/m, z2\.s, z3\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mad_s32_m_untied, svint32_t, -+ z0 = svmad_s32_m (p0, z1, z2, z3), -+ z0 = svmad_m (p0, z1, z2, z3)) -+ -+/* -+** mad_w0_s32_m_tied1: -+** mov (z[0-9]+\.s), w0 -+** mad z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_w0_s32_m_tied1, svint32_t, int32_t, -+ z0 = svmad_n_s32_m (p0, z0, z1, x0), -+ z0 = svmad_m (p0, z0, z1, x0)) -+ -+/* -+** mad_w0_s32_m_untied: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0, z1 -+** mad z0\.s, p0/m, z2\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_w0_s32_m_untied, svint32_t, int32_t, -+ z0 = svmad_n_s32_m (p0, z1, z2, x0), -+ z0 = svmad_m (p0, z1, z2, x0)) -+ -+/* -+** mad_11_s32_m_tied1: -+** mov (z[0-9]+\.s), #11 -+** mad z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_s32_m_tied1, svint32_t, -+ z0 = svmad_n_s32_m (p0, z0, z1, 11), -+ z0 = svmad_m (p0, z0, z1, 11)) -+ -+/* -+** mad_11_s32_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.s), #11 -+** movprfx z0, z1 -+** mad z0\.s, p0/m, z2\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_s32_m_untied, svint32_t, -+ z0 = svmad_n_s32_m (p0, z1, z2, 11), -+ z0 = svmad_m (p0, z1, z2, 11)) -+ -+/* -+** mad_s32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** mad z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mad_s32_z_tied1, svint32_t, -+ z0 = svmad_s32_z (p0, z0, z1, z2), -+ z0 = svmad_z (p0, z0, z1, z2)) -+ -+/* -+** mad_s32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** mad z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mad_s32_z_tied2, svint32_t, -+ z0 = svmad_s32_z (p0, z1, z0, z2), -+ z0 = svmad_z (p0, z1, z0, z2)) -+ -+/* -+** mad_s32_z_tied3: -+** movprfx z0\.s, p0/z, z0\.s -+** mla z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mad_s32_z_tied3, svint32_t, -+ z0 = svmad_s32_z (p0, z1, z2, z0), -+ z0 = svmad_z (p0, z1, z2, z0)) -+ -+/* -+** mad_s32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** mad z0\.s, p0/m, z2\.s, z3\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** mad z0\.s, p0/m, z1\.s, z3\.s -+** | -+** movprfx z0\.s, p0/z, z3\.s -+** mla z0\.s, p0/m, z1\.s, z2\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mad_s32_z_untied, svint32_t, -+ z0 = svmad_s32_z (p0, z1, z2, z3), -+ z0 = svmad_z (p0, z1, z2, z3)) -+ -+/* -+** mad_w0_s32_z_tied1: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** mad z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_w0_s32_z_tied1, svint32_t, int32_t, -+ z0 = svmad_n_s32_z (p0, z0, z1, x0), -+ z0 = svmad_z (p0, z0, z1, x0)) -+ -+/* -+** mad_w0_s32_z_tied2: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** mad z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_w0_s32_z_tied2, svint32_t, int32_t, -+ z0 = svmad_n_s32_z (p0, z1, z0, x0), -+ z0 = svmad_z (p0, z1, z0, x0)) -+ -+/* -+** mad_w0_s32_z_untied: -+** mov (z[0-9]+\.s), w0 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** mad z0\.s, p0/m, z2\.s, \1 -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** mad z0\.s, p0/m, z1\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** mla z0\.s, p0/m, z1\.s, z2\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_w0_s32_z_untied, svint32_t, int32_t, -+ z0 = svmad_n_s32_z (p0, z1, z2, x0), -+ z0 = svmad_z (p0, z1, z2, x0)) -+ -+/* -+** mad_11_s32_z_tied1: -+** mov (z[0-9]+\.s), #11 -+** movprfx z0\.s, p0/z, z0\.s -+** mad z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_s32_z_tied1, svint32_t, -+ z0 = svmad_n_s32_z (p0, z0, z1, 11), -+ z0 = svmad_z (p0, z0, z1, 11)) -+ -+/* -+** mad_11_s32_z_tied2: -+** mov (z[0-9]+\.s), #11 -+** movprfx z0\.s, p0/z, z0\.s -+** mad z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_s32_z_tied2, svint32_t, -+ z0 = svmad_n_s32_z (p0, z1, z0, 11), -+ z0 = svmad_z (p0, z1, z0, 11)) -+ -+/* -+** mad_11_s32_z_untied: -+** mov (z[0-9]+\.s), #11 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** mad z0\.s, p0/m, z2\.s, \1 -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** mad z0\.s, p0/m, z1\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** mla z0\.s, p0/m, z1\.s, z2\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_s32_z_untied, svint32_t, -+ z0 = svmad_n_s32_z (p0, z1, z2, 11), -+ z0 = svmad_z (p0, z1, z2, 11)) -+ -+/* -+** mad_s32_x_tied1: -+** mad z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mad_s32_x_tied1, svint32_t, -+ z0 = svmad_s32_x (p0, z0, z1, z2), -+ z0 = svmad_x (p0, z0, z1, z2)) -+ -+/* -+** mad_s32_x_tied2: -+** mad z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mad_s32_x_tied2, svint32_t, -+ z0 = svmad_s32_x (p0, z1, z0, z2), -+ z0 = svmad_x (p0, z1, z0, z2)) -+ -+/* -+** mad_s32_x_tied3: -+** mla z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mad_s32_x_tied3, svint32_t, -+ z0 = svmad_s32_x (p0, z1, z2, z0), -+ z0 = svmad_x (p0, z1, z2, z0)) -+ -+/* -+** mad_s32_x_untied: -+** ( -+** movprfx z0, z1 -+** mad z0\.s, p0/m, z2\.s, z3\.s -+** | -+** movprfx z0, z2 -+** mad z0\.s, p0/m, z1\.s, z3\.s -+** | -+** movprfx z0, z3 -+** mla z0\.s, p0/m, z1\.s, z2\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mad_s32_x_untied, svint32_t, -+ z0 = svmad_s32_x (p0, z1, z2, z3), -+ z0 = svmad_x (p0, z1, z2, z3)) -+ -+/* -+** mad_w0_s32_x_tied1: -+** mov (z[0-9]+\.s), w0 -+** mad z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_w0_s32_x_tied1, svint32_t, int32_t, -+ z0 = svmad_n_s32_x (p0, z0, z1, x0), -+ z0 = svmad_x (p0, z0, z1, x0)) -+ -+/* -+** mad_w0_s32_x_tied2: -+** mov (z[0-9]+\.s), w0 -+** mad z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_w0_s32_x_tied2, svint32_t, int32_t, -+ z0 = svmad_n_s32_x (p0, z1, z0, x0), -+ z0 = svmad_x (p0, z1, z0, x0)) -+ -+/* -+** mad_w0_s32_x_untied: -+** mov z0\.s, w0 -+** mla z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_w0_s32_x_untied, svint32_t, int32_t, -+ z0 = svmad_n_s32_x (p0, z1, z2, x0), -+ z0 = svmad_x (p0, z1, z2, x0)) -+ -+/* -+** mad_11_s32_x_tied1: -+** mov (z[0-9]+\.s), #11 -+** mad z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_s32_x_tied1, svint32_t, -+ z0 = svmad_n_s32_x (p0, z0, z1, 11), -+ z0 = svmad_x (p0, z0, z1, 11)) -+ -+/* -+** mad_11_s32_x_tied2: -+** mov (z[0-9]+\.s), #11 -+** mad z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_s32_x_tied2, svint32_t, -+ z0 = svmad_n_s32_x (p0, z1, z0, 11), -+ z0 = svmad_x (p0, z1, z0, 11)) -+ -+/* -+** mad_11_s32_x_untied: -+** mov z0\.s, #11 -+** mla z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_s32_x_untied, svint32_t, -+ z0 = svmad_n_s32_x (p0, z1, z2, 11), -+ z0 = svmad_x (p0, z1, z2, 11)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_s64.c -new file mode 100644 -index 000000000..7aa017536 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_s64.c -@@ -0,0 +1,321 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mad_s64_m_tied1: -+** mad z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mad_s64_m_tied1, svint64_t, -+ z0 = svmad_s64_m (p0, z0, z1, z2), -+ z0 = svmad_m (p0, z0, z1, z2)) -+ -+/* -+** mad_s64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** mad z0\.d, p0/m, \1, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mad_s64_m_tied2, svint64_t, -+ z0 = svmad_s64_m (p0, z1, z0, z2), -+ z0 = svmad_m (p0, z1, z0, z2)) -+ -+/* -+** mad_s64_m_tied3: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** mad z0\.d, p0/m, z2\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_s64_m_tied3, svint64_t, -+ z0 = svmad_s64_m (p0, z1, z2, z0), -+ z0 = svmad_m (p0, z1, z2, z0)) -+ -+/* -+** mad_s64_m_untied: -+** movprfx z0, z1 -+** mad z0\.d, p0/m, z2\.d, z3\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mad_s64_m_untied, svint64_t, -+ z0 = svmad_s64_m (p0, z1, z2, z3), -+ z0 = svmad_m (p0, z1, z2, z3)) -+ -+/* -+** mad_x0_s64_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** mad z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_x0_s64_m_tied1, svint64_t, int64_t, -+ z0 = svmad_n_s64_m (p0, z0, z1, x0), -+ z0 = svmad_m (p0, z0, z1, x0)) -+ -+/* -+** mad_x0_s64_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** mad z0\.d, p0/m, z2\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_x0_s64_m_untied, svint64_t, int64_t, -+ z0 = svmad_n_s64_m (p0, z1, z2, x0), -+ z0 = svmad_m (p0, z1, z2, x0)) -+ -+/* -+** mad_11_s64_m_tied1: -+** mov (z[0-9]+\.d), #11 -+** mad z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_s64_m_tied1, svint64_t, -+ z0 = svmad_n_s64_m (p0, z0, z1, 11), -+ z0 = svmad_m (p0, z0, z1, 11)) -+ -+/* -+** mad_11_s64_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #11 -+** movprfx z0, z1 -+** mad z0\.d, p0/m, z2\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_s64_m_untied, svint64_t, -+ z0 = svmad_n_s64_m (p0, z1, z2, 11), -+ z0 = svmad_m (p0, z1, z2, 11)) -+ -+/* -+** mad_s64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** mad z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mad_s64_z_tied1, svint64_t, -+ z0 = svmad_s64_z (p0, z0, z1, z2), -+ z0 = svmad_z (p0, z0, z1, z2)) -+ -+/* -+** mad_s64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** mad z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mad_s64_z_tied2, svint64_t, -+ z0 = svmad_s64_z (p0, z1, z0, z2), -+ z0 = svmad_z (p0, z1, z0, z2)) -+ -+/* -+** mad_s64_z_tied3: -+** movprfx z0\.d, p0/z, z0\.d -+** mla z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mad_s64_z_tied3, svint64_t, -+ z0 = svmad_s64_z (p0, z1, z2, z0), -+ z0 = svmad_z (p0, z1, z2, z0)) -+ -+/* -+** mad_s64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** mad z0\.d, p0/m, z2\.d, z3\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** mad z0\.d, p0/m, z1\.d, z3\.d -+** | -+** movprfx z0\.d, p0/z, z3\.d -+** mla z0\.d, p0/m, z1\.d, z2\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mad_s64_z_untied, svint64_t, -+ z0 = svmad_s64_z (p0, z1, z2, z3), -+ z0 = svmad_z (p0, z1, z2, z3)) -+ -+/* -+** mad_x0_s64_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** mad z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_x0_s64_z_tied1, svint64_t, int64_t, -+ z0 = svmad_n_s64_z (p0, z0, z1, x0), -+ z0 = svmad_z (p0, z0, z1, x0)) -+ -+/* -+** mad_x0_s64_z_tied2: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** mad z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_x0_s64_z_tied2, svint64_t, int64_t, -+ z0 = svmad_n_s64_z (p0, z1, z0, x0), -+ z0 = svmad_z (p0, z1, z0, x0)) -+ -+/* -+** mad_x0_s64_z_untied: -+** mov (z[0-9]+\.d), x0 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** mad z0\.d, p0/m, z2\.d, \1 -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** mad z0\.d, p0/m, z1\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** mla z0\.d, p0/m, z1\.d, z2\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_x0_s64_z_untied, svint64_t, int64_t, -+ z0 = svmad_n_s64_z (p0, z1, z2, x0), -+ z0 = svmad_z (p0, z1, z2, x0)) -+ -+/* -+** mad_11_s64_z_tied1: -+** mov (z[0-9]+\.d), #11 -+** movprfx z0\.d, p0/z, z0\.d -+** mad z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_s64_z_tied1, svint64_t, -+ z0 = svmad_n_s64_z (p0, z0, z1, 11), -+ z0 = svmad_z (p0, z0, z1, 11)) -+ -+/* -+** mad_11_s64_z_tied2: -+** mov (z[0-9]+\.d), #11 -+** movprfx z0\.d, p0/z, z0\.d -+** mad z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_s64_z_tied2, svint64_t, -+ z0 = svmad_n_s64_z (p0, z1, z0, 11), -+ z0 = svmad_z (p0, z1, z0, 11)) -+ -+/* -+** mad_11_s64_z_untied: -+** mov (z[0-9]+\.d), #11 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** mad z0\.d, p0/m, z2\.d, \1 -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** mad z0\.d, p0/m, z1\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** mla z0\.d, p0/m, z1\.d, z2\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_s64_z_untied, svint64_t, -+ z0 = svmad_n_s64_z (p0, z1, z2, 11), -+ z0 = svmad_z (p0, z1, z2, 11)) -+ -+/* -+** mad_s64_x_tied1: -+** mad z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mad_s64_x_tied1, svint64_t, -+ z0 = svmad_s64_x (p0, z0, z1, z2), -+ z0 = svmad_x (p0, z0, z1, z2)) -+ -+/* -+** mad_s64_x_tied2: -+** mad z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mad_s64_x_tied2, svint64_t, -+ z0 = svmad_s64_x (p0, z1, z0, z2), -+ z0 = svmad_x (p0, z1, z0, z2)) -+ -+/* -+** mad_s64_x_tied3: -+** mla z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mad_s64_x_tied3, svint64_t, -+ z0 = svmad_s64_x (p0, z1, z2, z0), -+ z0 = svmad_x (p0, z1, z2, z0)) -+ -+/* -+** mad_s64_x_untied: -+** ( -+** movprfx z0, z1 -+** mad z0\.d, p0/m, z2\.d, z3\.d -+** | -+** movprfx z0, z2 -+** mad z0\.d, p0/m, z1\.d, z3\.d -+** | -+** movprfx z0, z3 -+** mla z0\.d, p0/m, z1\.d, z2\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mad_s64_x_untied, svint64_t, -+ z0 = svmad_s64_x (p0, z1, z2, z3), -+ z0 = svmad_x (p0, z1, z2, z3)) -+ -+/* -+** mad_x0_s64_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** mad z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_x0_s64_x_tied1, svint64_t, int64_t, -+ z0 = svmad_n_s64_x (p0, z0, z1, x0), -+ z0 = svmad_x (p0, z0, z1, x0)) -+ -+/* -+** mad_x0_s64_x_tied2: -+** mov (z[0-9]+\.d), x0 -+** mad z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_x0_s64_x_tied2, svint64_t, int64_t, -+ z0 = svmad_n_s64_x (p0, z1, z0, x0), -+ z0 = svmad_x (p0, z1, z0, x0)) -+ -+/* -+** mad_x0_s64_x_untied: -+** mov z0\.d, x0 -+** mla z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_x0_s64_x_untied, svint64_t, int64_t, -+ z0 = svmad_n_s64_x (p0, z1, z2, x0), -+ z0 = svmad_x (p0, z1, z2, x0)) -+ -+/* -+** mad_11_s64_x_tied1: -+** mov (z[0-9]+\.d), #11 -+** mad z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_s64_x_tied1, svint64_t, -+ z0 = svmad_n_s64_x (p0, z0, z1, 11), -+ z0 = svmad_x (p0, z0, z1, 11)) -+ -+/* -+** mad_11_s64_x_tied2: -+** mov (z[0-9]+\.d), #11 -+** mad z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_s64_x_tied2, svint64_t, -+ z0 = svmad_n_s64_x (p0, z1, z0, 11), -+ z0 = svmad_x (p0, z1, z0, 11)) -+ -+/* -+** mad_11_s64_x_untied: -+** mov z0\.d, #11 -+** mla z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_s64_x_untied, svint64_t, -+ z0 = svmad_n_s64_x (p0, z1, z2, 11), -+ z0 = svmad_x (p0, z1, z2, 11)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_s8.c -new file mode 100644 -index 000000000..90d712686 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_s8.c -@@ -0,0 +1,321 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mad_s8_m_tied1: -+** mad z0\.b, p0/m, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mad_s8_m_tied1, svint8_t, -+ z0 = svmad_s8_m (p0, z0, z1, z2), -+ z0 = svmad_m (p0, z0, z1, z2)) -+ -+/* -+** mad_s8_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** mad z0\.b, p0/m, \1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mad_s8_m_tied2, svint8_t, -+ z0 = svmad_s8_m (p0, z1, z0, z2), -+ z0 = svmad_m (p0, z1, z0, z2)) -+ -+/* -+** mad_s8_m_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** mad z0\.b, p0/m, z2\.b, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mad_s8_m_tied3, svint8_t, -+ z0 = svmad_s8_m (p0, z1, z2, z0), -+ z0 = svmad_m (p0, z1, z2, z0)) -+ -+/* -+** mad_s8_m_untied: -+** movprfx z0, z1 -+** mad z0\.b, p0/m, z2\.b, z3\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mad_s8_m_untied, svint8_t, -+ z0 = svmad_s8_m (p0, z1, z2, z3), -+ z0 = svmad_m (p0, z1, z2, z3)) -+ -+/* -+** mad_w0_s8_m_tied1: -+** mov (z[0-9]+\.b), w0 -+** mad z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_w0_s8_m_tied1, svint8_t, int8_t, -+ z0 = svmad_n_s8_m (p0, z0, z1, x0), -+ z0 = svmad_m (p0, z0, z1, x0)) -+ -+/* -+** mad_w0_s8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), w0 -+** movprfx z0, z1 -+** mad z0\.b, p0/m, z2\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_w0_s8_m_untied, svint8_t, int8_t, -+ z0 = svmad_n_s8_m (p0, z1, z2, x0), -+ z0 = svmad_m (p0, z1, z2, x0)) -+ -+/* -+** mad_11_s8_m_tied1: -+** mov (z[0-9]+\.b), #11 -+** mad z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_s8_m_tied1, svint8_t, -+ z0 = svmad_n_s8_m (p0, z0, z1, 11), -+ z0 = svmad_m (p0, z0, z1, 11)) -+ -+/* -+** mad_11_s8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), #11 -+** movprfx z0, z1 -+** mad z0\.b, p0/m, z2\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_s8_m_untied, svint8_t, -+ z0 = svmad_n_s8_m (p0, z1, z2, 11), -+ z0 = svmad_m (p0, z1, z2, 11)) -+ -+/* -+** mad_s8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** mad z0\.b, p0/m, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mad_s8_z_tied1, svint8_t, -+ z0 = svmad_s8_z (p0, z0, z1, z2), -+ z0 = svmad_z (p0, z0, z1, z2)) -+ -+/* -+** mad_s8_z_tied2: -+** movprfx z0\.b, p0/z, z0\.b -+** mad z0\.b, p0/m, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mad_s8_z_tied2, svint8_t, -+ z0 = svmad_s8_z (p0, z1, z0, z2), -+ z0 = svmad_z (p0, z1, z0, z2)) -+ -+/* -+** mad_s8_z_tied3: -+** movprfx z0\.b, p0/z, z0\.b -+** mla z0\.b, p0/m, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mad_s8_z_tied3, svint8_t, -+ z0 = svmad_s8_z (p0, z1, z2, z0), -+ z0 = svmad_z (p0, z1, z2, z0)) -+ -+/* -+** mad_s8_z_untied: -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** mad z0\.b, p0/m, z2\.b, z3\.b -+** | -+** movprfx z0\.b, p0/z, z2\.b -+** mad z0\.b, p0/m, z1\.b, z3\.b -+** | -+** movprfx z0\.b, p0/z, z3\.b -+** mla z0\.b, p0/m, z1\.b, z2\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mad_s8_z_untied, svint8_t, -+ z0 = svmad_s8_z (p0, z1, z2, z3), -+ z0 = svmad_z (p0, z1, z2, z3)) -+ -+/* -+** mad_w0_s8_z_tied1: -+** mov (z[0-9]+\.b), w0 -+** movprfx z0\.b, p0/z, z0\.b -+** mad z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_w0_s8_z_tied1, svint8_t, int8_t, -+ z0 = svmad_n_s8_z (p0, z0, z1, x0), -+ z0 = svmad_z (p0, z0, z1, x0)) -+ -+/* -+** mad_w0_s8_z_tied2: -+** mov (z[0-9]+\.b), w0 -+** movprfx z0\.b, p0/z, z0\.b -+** mad z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_w0_s8_z_tied2, svint8_t, int8_t, -+ z0 = svmad_n_s8_z (p0, z1, z0, x0), -+ z0 = svmad_z (p0, z1, z0, x0)) -+ -+/* -+** mad_w0_s8_z_untied: -+** mov (z[0-9]+\.b), w0 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** mad z0\.b, p0/m, z2\.b, \1 -+** | -+** movprfx z0\.b, p0/z, z2\.b -+** mad z0\.b, p0/m, z1\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** mla z0\.b, p0/m, z1\.b, z2\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_w0_s8_z_untied, svint8_t, int8_t, -+ z0 = svmad_n_s8_z (p0, z1, z2, x0), -+ z0 = svmad_z (p0, z1, z2, x0)) -+ -+/* -+** mad_11_s8_z_tied1: -+** mov (z[0-9]+\.b), #11 -+** movprfx z0\.b, p0/z, z0\.b -+** mad z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_s8_z_tied1, svint8_t, -+ z0 = svmad_n_s8_z (p0, z0, z1, 11), -+ z0 = svmad_z (p0, z0, z1, 11)) -+ -+/* -+** mad_11_s8_z_tied2: -+** mov (z[0-9]+\.b), #11 -+** movprfx z0\.b, p0/z, z0\.b -+** mad z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_s8_z_tied2, svint8_t, -+ z0 = svmad_n_s8_z (p0, z1, z0, 11), -+ z0 = svmad_z (p0, z1, z0, 11)) -+ -+/* -+** mad_11_s8_z_untied: -+** mov (z[0-9]+\.b), #11 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** mad z0\.b, p0/m, z2\.b, \1 -+** | -+** movprfx z0\.b, p0/z, z2\.b -+** mad z0\.b, p0/m, z1\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** mla z0\.b, p0/m, z1\.b, z2\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_s8_z_untied, svint8_t, -+ z0 = svmad_n_s8_z (p0, z1, z2, 11), -+ z0 = svmad_z (p0, z1, z2, 11)) -+ -+/* -+** mad_s8_x_tied1: -+** mad z0\.b, p0/m, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mad_s8_x_tied1, svint8_t, -+ z0 = svmad_s8_x (p0, z0, z1, z2), -+ z0 = svmad_x (p0, z0, z1, z2)) -+ -+/* -+** mad_s8_x_tied2: -+** mad z0\.b, p0/m, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mad_s8_x_tied2, svint8_t, -+ z0 = svmad_s8_x (p0, z1, z0, z2), -+ z0 = svmad_x (p0, z1, z0, z2)) -+ -+/* -+** mad_s8_x_tied3: -+** mla z0\.b, p0/m, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mad_s8_x_tied3, svint8_t, -+ z0 = svmad_s8_x (p0, z1, z2, z0), -+ z0 = svmad_x (p0, z1, z2, z0)) -+ -+/* -+** mad_s8_x_untied: -+** ( -+** movprfx z0, z1 -+** mad z0\.b, p0/m, z2\.b, z3\.b -+** | -+** movprfx z0, z2 -+** mad z0\.b, p0/m, z1\.b, z3\.b -+** | -+** movprfx z0, z3 -+** mla z0\.b, p0/m, z1\.b, z2\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mad_s8_x_untied, svint8_t, -+ z0 = svmad_s8_x (p0, z1, z2, z3), -+ z0 = svmad_x (p0, z1, z2, z3)) -+ -+/* -+** mad_w0_s8_x_tied1: -+** mov (z[0-9]+\.b), w0 -+** mad z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_w0_s8_x_tied1, svint8_t, int8_t, -+ z0 = svmad_n_s8_x (p0, z0, z1, x0), -+ z0 = svmad_x (p0, z0, z1, x0)) -+ -+/* -+** mad_w0_s8_x_tied2: -+** mov (z[0-9]+\.b), w0 -+** mad z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_w0_s8_x_tied2, svint8_t, int8_t, -+ z0 = svmad_n_s8_x (p0, z1, z0, x0), -+ z0 = svmad_x (p0, z1, z0, x0)) -+ -+/* -+** mad_w0_s8_x_untied: -+** mov z0\.b, w0 -+** mla z0\.b, p0/m, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_w0_s8_x_untied, svint8_t, int8_t, -+ z0 = svmad_n_s8_x (p0, z1, z2, x0), -+ z0 = svmad_x (p0, z1, z2, x0)) -+ -+/* -+** mad_11_s8_x_tied1: -+** mov (z[0-9]+\.b), #11 -+** mad z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_s8_x_tied1, svint8_t, -+ z0 = svmad_n_s8_x (p0, z0, z1, 11), -+ z0 = svmad_x (p0, z0, z1, 11)) -+ -+/* -+** mad_11_s8_x_tied2: -+** mov (z[0-9]+\.b), #11 -+** mad z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_s8_x_tied2, svint8_t, -+ z0 = svmad_n_s8_x (p0, z1, z0, 11), -+ z0 = svmad_x (p0, z1, z0, 11)) -+ -+/* -+** mad_11_s8_x_untied: -+** mov z0\.b, #11 -+** mla z0\.b, p0/m, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_s8_x_untied, svint8_t, -+ z0 = svmad_n_s8_x (p0, z1, z2, 11), -+ z0 = svmad_x (p0, z1, z2, 11)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_u16.c -new file mode 100644 -index 000000000..1d2ad9c5f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_u16.c -@@ -0,0 +1,321 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mad_u16_m_tied1: -+** mad z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mad_u16_m_tied1, svuint16_t, -+ z0 = svmad_u16_m (p0, z0, z1, z2), -+ z0 = svmad_m (p0, z0, z1, z2)) -+ -+/* -+** mad_u16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** mad z0\.h, p0/m, \1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mad_u16_m_tied2, svuint16_t, -+ z0 = svmad_u16_m (p0, z1, z0, z2), -+ z0 = svmad_m (p0, z1, z0, z2)) -+ -+/* -+** mad_u16_m_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** mad z0\.h, p0/m, z2\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mad_u16_m_tied3, svuint16_t, -+ z0 = svmad_u16_m (p0, z1, z2, z0), -+ z0 = svmad_m (p0, z1, z2, z0)) -+ -+/* -+** mad_u16_m_untied: -+** movprfx z0, z1 -+** mad z0\.h, p0/m, z2\.h, z3\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mad_u16_m_untied, svuint16_t, -+ z0 = svmad_u16_m (p0, z1, z2, z3), -+ z0 = svmad_m (p0, z1, z2, z3)) -+ -+/* -+** mad_w0_u16_m_tied1: -+** mov (z[0-9]+\.h), w0 -+** mad z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_w0_u16_m_tied1, svuint16_t, uint16_t, -+ z0 = svmad_n_u16_m (p0, z0, z1, x0), -+ z0 = svmad_m (p0, z0, z1, x0)) -+ -+/* -+** mad_w0_u16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), w0 -+** movprfx z0, z1 -+** mad z0\.h, p0/m, z2\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_w0_u16_m_untied, svuint16_t, uint16_t, -+ z0 = svmad_n_u16_m (p0, z1, z2, x0), -+ z0 = svmad_m (p0, z1, z2, x0)) -+ -+/* -+** mad_11_u16_m_tied1: -+** mov (z[0-9]+\.h), #11 -+** mad z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_u16_m_tied1, svuint16_t, -+ z0 = svmad_n_u16_m (p0, z0, z1, 11), -+ z0 = svmad_m (p0, z0, z1, 11)) -+ -+/* -+** mad_11_u16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), #11 -+** movprfx z0, z1 -+** mad z0\.h, p0/m, z2\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_u16_m_untied, svuint16_t, -+ z0 = svmad_n_u16_m (p0, z1, z2, 11), -+ z0 = svmad_m (p0, z1, z2, 11)) -+ -+/* -+** mad_u16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** mad z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mad_u16_z_tied1, svuint16_t, -+ z0 = svmad_u16_z (p0, z0, z1, z2), -+ z0 = svmad_z (p0, z0, z1, z2)) -+ -+/* -+** mad_u16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** mad z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mad_u16_z_tied2, svuint16_t, -+ z0 = svmad_u16_z (p0, z1, z0, z2), -+ z0 = svmad_z (p0, z1, z0, z2)) -+ -+/* -+** mad_u16_z_tied3: -+** movprfx z0\.h, p0/z, z0\.h -+** mla z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mad_u16_z_tied3, svuint16_t, -+ z0 = svmad_u16_z (p0, z1, z2, z0), -+ z0 = svmad_z (p0, z1, z2, z0)) -+ -+/* -+** mad_u16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** mad z0\.h, p0/m, z2\.h, z3\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** mad z0\.h, p0/m, z1\.h, z3\.h -+** | -+** movprfx z0\.h, p0/z, z3\.h -+** mla z0\.h, p0/m, z1\.h, z2\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mad_u16_z_untied, svuint16_t, -+ z0 = svmad_u16_z (p0, z1, z2, z3), -+ z0 = svmad_z (p0, z1, z2, z3)) -+ -+/* -+** mad_w0_u16_z_tied1: -+** mov (z[0-9]+\.h), w0 -+** movprfx z0\.h, p0/z, z0\.h -+** mad z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_w0_u16_z_tied1, svuint16_t, uint16_t, -+ z0 = svmad_n_u16_z (p0, z0, z1, x0), -+ z0 = svmad_z (p0, z0, z1, x0)) -+ -+/* -+** mad_w0_u16_z_tied2: -+** mov (z[0-9]+\.h), w0 -+** movprfx z0\.h, p0/z, z0\.h -+** mad z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_w0_u16_z_tied2, svuint16_t, uint16_t, -+ z0 = svmad_n_u16_z (p0, z1, z0, x0), -+ z0 = svmad_z (p0, z1, z0, x0)) -+ -+/* -+** mad_w0_u16_z_untied: -+** mov (z[0-9]+\.h), w0 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** mad z0\.h, p0/m, z2\.h, \1 -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** mad z0\.h, p0/m, z1\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** mla z0\.h, p0/m, z1\.h, z2\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_w0_u16_z_untied, svuint16_t, uint16_t, -+ z0 = svmad_n_u16_z (p0, z1, z2, x0), -+ z0 = svmad_z (p0, z1, z2, x0)) -+ -+/* -+** mad_11_u16_z_tied1: -+** mov (z[0-9]+\.h), #11 -+** movprfx z0\.h, p0/z, z0\.h -+** mad z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_u16_z_tied1, svuint16_t, -+ z0 = svmad_n_u16_z (p0, z0, z1, 11), -+ z0 = svmad_z (p0, z0, z1, 11)) -+ -+/* -+** mad_11_u16_z_tied2: -+** mov (z[0-9]+\.h), #11 -+** movprfx z0\.h, p0/z, z0\.h -+** mad z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_u16_z_tied2, svuint16_t, -+ z0 = svmad_n_u16_z (p0, z1, z0, 11), -+ z0 = svmad_z (p0, z1, z0, 11)) -+ -+/* -+** mad_11_u16_z_untied: -+** mov (z[0-9]+\.h), #11 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** mad z0\.h, p0/m, z2\.h, \1 -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** mad z0\.h, p0/m, z1\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** mla z0\.h, p0/m, z1\.h, z2\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_u16_z_untied, svuint16_t, -+ z0 = svmad_n_u16_z (p0, z1, z2, 11), -+ z0 = svmad_z (p0, z1, z2, 11)) -+ -+/* -+** mad_u16_x_tied1: -+** mad z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mad_u16_x_tied1, svuint16_t, -+ z0 = svmad_u16_x (p0, z0, z1, z2), -+ z0 = svmad_x (p0, z0, z1, z2)) -+ -+/* -+** mad_u16_x_tied2: -+** mad z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mad_u16_x_tied2, svuint16_t, -+ z0 = svmad_u16_x (p0, z1, z0, z2), -+ z0 = svmad_x (p0, z1, z0, z2)) -+ -+/* -+** mad_u16_x_tied3: -+** mla z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mad_u16_x_tied3, svuint16_t, -+ z0 = svmad_u16_x (p0, z1, z2, z0), -+ z0 = svmad_x (p0, z1, z2, z0)) -+ -+/* -+** mad_u16_x_untied: -+** ( -+** movprfx z0, z1 -+** mad z0\.h, p0/m, z2\.h, z3\.h -+** | -+** movprfx z0, z2 -+** mad z0\.h, p0/m, z1\.h, z3\.h -+** | -+** movprfx z0, z3 -+** mla z0\.h, p0/m, z1\.h, z2\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mad_u16_x_untied, svuint16_t, -+ z0 = svmad_u16_x (p0, z1, z2, z3), -+ z0 = svmad_x (p0, z1, z2, z3)) -+ -+/* -+** mad_w0_u16_x_tied1: -+** mov (z[0-9]+\.h), w0 -+** mad z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_w0_u16_x_tied1, svuint16_t, uint16_t, -+ z0 = svmad_n_u16_x (p0, z0, z1, x0), -+ z0 = svmad_x (p0, z0, z1, x0)) -+ -+/* -+** mad_w0_u16_x_tied2: -+** mov (z[0-9]+\.h), w0 -+** mad z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_w0_u16_x_tied2, svuint16_t, uint16_t, -+ z0 = svmad_n_u16_x (p0, z1, z0, x0), -+ z0 = svmad_x (p0, z1, z0, x0)) -+ -+/* -+** mad_w0_u16_x_untied: -+** mov z0\.h, w0 -+** mla z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_w0_u16_x_untied, svuint16_t, uint16_t, -+ z0 = svmad_n_u16_x (p0, z1, z2, x0), -+ z0 = svmad_x (p0, z1, z2, x0)) -+ -+/* -+** mad_11_u16_x_tied1: -+** mov (z[0-9]+\.h), #11 -+** mad z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_u16_x_tied1, svuint16_t, -+ z0 = svmad_n_u16_x (p0, z0, z1, 11), -+ z0 = svmad_x (p0, z0, z1, 11)) -+ -+/* -+** mad_11_u16_x_tied2: -+** mov (z[0-9]+\.h), #11 -+** mad z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_u16_x_tied2, svuint16_t, -+ z0 = svmad_n_u16_x (p0, z1, z0, 11), -+ z0 = svmad_x (p0, z1, z0, 11)) -+ -+/* -+** mad_11_u16_x_untied: -+** mov z0\.h, #11 -+** mla z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_u16_x_untied, svuint16_t, -+ z0 = svmad_n_u16_x (p0, z1, z2, 11), -+ z0 = svmad_x (p0, z1, z2, 11)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_u32.c -new file mode 100644 -index 000000000..4b51958b1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_u32.c -@@ -0,0 +1,321 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mad_u32_m_tied1: -+** mad z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mad_u32_m_tied1, svuint32_t, -+ z0 = svmad_u32_m (p0, z0, z1, z2), -+ z0 = svmad_m (p0, z0, z1, z2)) -+ -+/* -+** mad_u32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** mad z0\.s, p0/m, \1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mad_u32_m_tied2, svuint32_t, -+ z0 = svmad_u32_m (p0, z1, z0, z2), -+ z0 = svmad_m (p0, z1, z0, z2)) -+ -+/* -+** mad_u32_m_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** mad z0\.s, p0/m, z2\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mad_u32_m_tied3, svuint32_t, -+ z0 = svmad_u32_m (p0, z1, z2, z0), -+ z0 = svmad_m (p0, z1, z2, z0)) -+ -+/* -+** mad_u32_m_untied: -+** movprfx z0, z1 -+** mad z0\.s, p0/m, z2\.s, z3\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mad_u32_m_untied, svuint32_t, -+ z0 = svmad_u32_m (p0, z1, z2, z3), -+ z0 = svmad_m (p0, z1, z2, z3)) -+ -+/* -+** mad_w0_u32_m_tied1: -+** mov (z[0-9]+\.s), w0 -+** mad z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_w0_u32_m_tied1, svuint32_t, uint32_t, -+ z0 = svmad_n_u32_m (p0, z0, z1, x0), -+ z0 = svmad_m (p0, z0, z1, x0)) -+ -+/* -+** mad_w0_u32_m_untied: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0, z1 -+** mad z0\.s, p0/m, z2\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_w0_u32_m_untied, svuint32_t, uint32_t, -+ z0 = svmad_n_u32_m (p0, z1, z2, x0), -+ z0 = svmad_m (p0, z1, z2, x0)) -+ -+/* -+** mad_11_u32_m_tied1: -+** mov (z[0-9]+\.s), #11 -+** mad z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_u32_m_tied1, svuint32_t, -+ z0 = svmad_n_u32_m (p0, z0, z1, 11), -+ z0 = svmad_m (p0, z0, z1, 11)) -+ -+/* -+** mad_11_u32_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.s), #11 -+** movprfx z0, z1 -+** mad z0\.s, p0/m, z2\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_u32_m_untied, svuint32_t, -+ z0 = svmad_n_u32_m (p0, z1, z2, 11), -+ z0 = svmad_m (p0, z1, z2, 11)) -+ -+/* -+** mad_u32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** mad z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mad_u32_z_tied1, svuint32_t, -+ z0 = svmad_u32_z (p0, z0, z1, z2), -+ z0 = svmad_z (p0, z0, z1, z2)) -+ -+/* -+** mad_u32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** mad z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mad_u32_z_tied2, svuint32_t, -+ z0 = svmad_u32_z (p0, z1, z0, z2), -+ z0 = svmad_z (p0, z1, z0, z2)) -+ -+/* -+** mad_u32_z_tied3: -+** movprfx z0\.s, p0/z, z0\.s -+** mla z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mad_u32_z_tied3, svuint32_t, -+ z0 = svmad_u32_z (p0, z1, z2, z0), -+ z0 = svmad_z (p0, z1, z2, z0)) -+ -+/* -+** mad_u32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** mad z0\.s, p0/m, z2\.s, z3\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** mad z0\.s, p0/m, z1\.s, z3\.s -+** | -+** movprfx z0\.s, p0/z, z3\.s -+** mla z0\.s, p0/m, z1\.s, z2\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mad_u32_z_untied, svuint32_t, -+ z0 = svmad_u32_z (p0, z1, z2, z3), -+ z0 = svmad_z (p0, z1, z2, z3)) -+ -+/* -+** mad_w0_u32_z_tied1: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** mad z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_w0_u32_z_tied1, svuint32_t, uint32_t, -+ z0 = svmad_n_u32_z (p0, z0, z1, x0), -+ z0 = svmad_z (p0, z0, z1, x0)) -+ -+/* -+** mad_w0_u32_z_tied2: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** mad z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_w0_u32_z_tied2, svuint32_t, uint32_t, -+ z0 = svmad_n_u32_z (p0, z1, z0, x0), -+ z0 = svmad_z (p0, z1, z0, x0)) -+ -+/* -+** mad_w0_u32_z_untied: -+** mov (z[0-9]+\.s), w0 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** mad z0\.s, p0/m, z2\.s, \1 -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** mad z0\.s, p0/m, z1\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** mla z0\.s, p0/m, z1\.s, z2\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_w0_u32_z_untied, svuint32_t, uint32_t, -+ z0 = svmad_n_u32_z (p0, z1, z2, x0), -+ z0 = svmad_z (p0, z1, z2, x0)) -+ -+/* -+** mad_11_u32_z_tied1: -+** mov (z[0-9]+\.s), #11 -+** movprfx z0\.s, p0/z, z0\.s -+** mad z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_u32_z_tied1, svuint32_t, -+ z0 = svmad_n_u32_z (p0, z0, z1, 11), -+ z0 = svmad_z (p0, z0, z1, 11)) -+ -+/* -+** mad_11_u32_z_tied2: -+** mov (z[0-9]+\.s), #11 -+** movprfx z0\.s, p0/z, z0\.s -+** mad z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_u32_z_tied2, svuint32_t, -+ z0 = svmad_n_u32_z (p0, z1, z0, 11), -+ z0 = svmad_z (p0, z1, z0, 11)) -+ -+/* -+** mad_11_u32_z_untied: -+** mov (z[0-9]+\.s), #11 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** mad z0\.s, p0/m, z2\.s, \1 -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** mad z0\.s, p0/m, z1\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** mla z0\.s, p0/m, z1\.s, z2\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_u32_z_untied, svuint32_t, -+ z0 = svmad_n_u32_z (p0, z1, z2, 11), -+ z0 = svmad_z (p0, z1, z2, 11)) -+ -+/* -+** mad_u32_x_tied1: -+** mad z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mad_u32_x_tied1, svuint32_t, -+ z0 = svmad_u32_x (p0, z0, z1, z2), -+ z0 = svmad_x (p0, z0, z1, z2)) -+ -+/* -+** mad_u32_x_tied2: -+** mad z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mad_u32_x_tied2, svuint32_t, -+ z0 = svmad_u32_x (p0, z1, z0, z2), -+ z0 = svmad_x (p0, z1, z0, z2)) -+ -+/* -+** mad_u32_x_tied3: -+** mla z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mad_u32_x_tied3, svuint32_t, -+ z0 = svmad_u32_x (p0, z1, z2, z0), -+ z0 = svmad_x (p0, z1, z2, z0)) -+ -+/* -+** mad_u32_x_untied: -+** ( -+** movprfx z0, z1 -+** mad z0\.s, p0/m, z2\.s, z3\.s -+** | -+** movprfx z0, z2 -+** mad z0\.s, p0/m, z1\.s, z3\.s -+** | -+** movprfx z0, z3 -+** mla z0\.s, p0/m, z1\.s, z2\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mad_u32_x_untied, svuint32_t, -+ z0 = svmad_u32_x (p0, z1, z2, z3), -+ z0 = svmad_x (p0, z1, z2, z3)) -+ -+/* -+** mad_w0_u32_x_tied1: -+** mov (z[0-9]+\.s), w0 -+** mad z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_w0_u32_x_tied1, svuint32_t, uint32_t, -+ z0 = svmad_n_u32_x (p0, z0, z1, x0), -+ z0 = svmad_x (p0, z0, z1, x0)) -+ -+/* -+** mad_w0_u32_x_tied2: -+** mov (z[0-9]+\.s), w0 -+** mad z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_w0_u32_x_tied2, svuint32_t, uint32_t, -+ z0 = svmad_n_u32_x (p0, z1, z0, x0), -+ z0 = svmad_x (p0, z1, z0, x0)) -+ -+/* -+** mad_w0_u32_x_untied: -+** mov z0\.s, w0 -+** mla z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_w0_u32_x_untied, svuint32_t, uint32_t, -+ z0 = svmad_n_u32_x (p0, z1, z2, x0), -+ z0 = svmad_x (p0, z1, z2, x0)) -+ -+/* -+** mad_11_u32_x_tied1: -+** mov (z[0-9]+\.s), #11 -+** mad z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_u32_x_tied1, svuint32_t, -+ z0 = svmad_n_u32_x (p0, z0, z1, 11), -+ z0 = svmad_x (p0, z0, z1, 11)) -+ -+/* -+** mad_11_u32_x_tied2: -+** mov (z[0-9]+\.s), #11 -+** mad z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_u32_x_tied2, svuint32_t, -+ z0 = svmad_n_u32_x (p0, z1, z0, 11), -+ z0 = svmad_x (p0, z1, z0, 11)) -+ -+/* -+** mad_11_u32_x_untied: -+** mov z0\.s, #11 -+** mla z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_u32_x_untied, svuint32_t, -+ z0 = svmad_n_u32_x (p0, z1, z2, 11), -+ z0 = svmad_x (p0, z1, z2, 11)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_u64.c -new file mode 100644 -index 000000000..c4939093e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_u64.c -@@ -0,0 +1,321 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mad_u64_m_tied1: -+** mad z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mad_u64_m_tied1, svuint64_t, -+ z0 = svmad_u64_m (p0, z0, z1, z2), -+ z0 = svmad_m (p0, z0, z1, z2)) -+ -+/* -+** mad_u64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** mad z0\.d, p0/m, \1, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mad_u64_m_tied2, svuint64_t, -+ z0 = svmad_u64_m (p0, z1, z0, z2), -+ z0 = svmad_m (p0, z1, z0, z2)) -+ -+/* -+** mad_u64_m_tied3: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** mad z0\.d, p0/m, z2\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_u64_m_tied3, svuint64_t, -+ z0 = svmad_u64_m (p0, z1, z2, z0), -+ z0 = svmad_m (p0, z1, z2, z0)) -+ -+/* -+** mad_u64_m_untied: -+** movprfx z0, z1 -+** mad z0\.d, p0/m, z2\.d, z3\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mad_u64_m_untied, svuint64_t, -+ z0 = svmad_u64_m (p0, z1, z2, z3), -+ z0 = svmad_m (p0, z1, z2, z3)) -+ -+/* -+** mad_x0_u64_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** mad z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_x0_u64_m_tied1, svuint64_t, uint64_t, -+ z0 = svmad_n_u64_m (p0, z0, z1, x0), -+ z0 = svmad_m (p0, z0, z1, x0)) -+ -+/* -+** mad_x0_u64_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** mad z0\.d, p0/m, z2\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_x0_u64_m_untied, svuint64_t, uint64_t, -+ z0 = svmad_n_u64_m (p0, z1, z2, x0), -+ z0 = svmad_m (p0, z1, z2, x0)) -+ -+/* -+** mad_11_u64_m_tied1: -+** mov (z[0-9]+\.d), #11 -+** mad z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_u64_m_tied1, svuint64_t, -+ z0 = svmad_n_u64_m (p0, z0, z1, 11), -+ z0 = svmad_m (p0, z0, z1, 11)) -+ -+/* -+** mad_11_u64_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #11 -+** movprfx z0, z1 -+** mad z0\.d, p0/m, z2\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_u64_m_untied, svuint64_t, -+ z0 = svmad_n_u64_m (p0, z1, z2, 11), -+ z0 = svmad_m (p0, z1, z2, 11)) -+ -+/* -+** mad_u64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** mad z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mad_u64_z_tied1, svuint64_t, -+ z0 = svmad_u64_z (p0, z0, z1, z2), -+ z0 = svmad_z (p0, z0, z1, z2)) -+ -+/* -+** mad_u64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** mad z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mad_u64_z_tied2, svuint64_t, -+ z0 = svmad_u64_z (p0, z1, z0, z2), -+ z0 = svmad_z (p0, z1, z0, z2)) -+ -+/* -+** mad_u64_z_tied3: -+** movprfx z0\.d, p0/z, z0\.d -+** mla z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mad_u64_z_tied3, svuint64_t, -+ z0 = svmad_u64_z (p0, z1, z2, z0), -+ z0 = svmad_z (p0, z1, z2, z0)) -+ -+/* -+** mad_u64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** mad z0\.d, p0/m, z2\.d, z3\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** mad z0\.d, p0/m, z1\.d, z3\.d -+** | -+** movprfx z0\.d, p0/z, z3\.d -+** mla z0\.d, p0/m, z1\.d, z2\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mad_u64_z_untied, svuint64_t, -+ z0 = svmad_u64_z (p0, z1, z2, z3), -+ z0 = svmad_z (p0, z1, z2, z3)) -+ -+/* -+** mad_x0_u64_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** mad z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_x0_u64_z_tied1, svuint64_t, uint64_t, -+ z0 = svmad_n_u64_z (p0, z0, z1, x0), -+ z0 = svmad_z (p0, z0, z1, x0)) -+ -+/* -+** mad_x0_u64_z_tied2: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** mad z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_x0_u64_z_tied2, svuint64_t, uint64_t, -+ z0 = svmad_n_u64_z (p0, z1, z0, x0), -+ z0 = svmad_z (p0, z1, z0, x0)) -+ -+/* -+** mad_x0_u64_z_untied: -+** mov (z[0-9]+\.d), x0 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** mad z0\.d, p0/m, z2\.d, \1 -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** mad z0\.d, p0/m, z1\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** mla z0\.d, p0/m, z1\.d, z2\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_x0_u64_z_untied, svuint64_t, uint64_t, -+ z0 = svmad_n_u64_z (p0, z1, z2, x0), -+ z0 = svmad_z (p0, z1, z2, x0)) -+ -+/* -+** mad_11_u64_z_tied1: -+** mov (z[0-9]+\.d), #11 -+** movprfx z0\.d, p0/z, z0\.d -+** mad z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_u64_z_tied1, svuint64_t, -+ z0 = svmad_n_u64_z (p0, z0, z1, 11), -+ z0 = svmad_z (p0, z0, z1, 11)) -+ -+/* -+** mad_11_u64_z_tied2: -+** mov (z[0-9]+\.d), #11 -+** movprfx z0\.d, p0/z, z0\.d -+** mad z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_u64_z_tied2, svuint64_t, -+ z0 = svmad_n_u64_z (p0, z1, z0, 11), -+ z0 = svmad_z (p0, z1, z0, 11)) -+ -+/* -+** mad_11_u64_z_untied: -+** mov (z[0-9]+\.d), #11 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** mad z0\.d, p0/m, z2\.d, \1 -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** mad z0\.d, p0/m, z1\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** mla z0\.d, p0/m, z1\.d, z2\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_u64_z_untied, svuint64_t, -+ z0 = svmad_n_u64_z (p0, z1, z2, 11), -+ z0 = svmad_z (p0, z1, z2, 11)) -+ -+/* -+** mad_u64_x_tied1: -+** mad z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mad_u64_x_tied1, svuint64_t, -+ z0 = svmad_u64_x (p0, z0, z1, z2), -+ z0 = svmad_x (p0, z0, z1, z2)) -+ -+/* -+** mad_u64_x_tied2: -+** mad z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mad_u64_x_tied2, svuint64_t, -+ z0 = svmad_u64_x (p0, z1, z0, z2), -+ z0 = svmad_x (p0, z1, z0, z2)) -+ -+/* -+** mad_u64_x_tied3: -+** mla z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mad_u64_x_tied3, svuint64_t, -+ z0 = svmad_u64_x (p0, z1, z2, z0), -+ z0 = svmad_x (p0, z1, z2, z0)) -+ -+/* -+** mad_u64_x_untied: -+** ( -+** movprfx z0, z1 -+** mad z0\.d, p0/m, z2\.d, z3\.d -+** | -+** movprfx z0, z2 -+** mad z0\.d, p0/m, z1\.d, z3\.d -+** | -+** movprfx z0, z3 -+** mla z0\.d, p0/m, z1\.d, z2\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mad_u64_x_untied, svuint64_t, -+ z0 = svmad_u64_x (p0, z1, z2, z3), -+ z0 = svmad_x (p0, z1, z2, z3)) -+ -+/* -+** mad_x0_u64_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** mad z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_x0_u64_x_tied1, svuint64_t, uint64_t, -+ z0 = svmad_n_u64_x (p0, z0, z1, x0), -+ z0 = svmad_x (p0, z0, z1, x0)) -+ -+/* -+** mad_x0_u64_x_tied2: -+** mov (z[0-9]+\.d), x0 -+** mad z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_x0_u64_x_tied2, svuint64_t, uint64_t, -+ z0 = svmad_n_u64_x (p0, z1, z0, x0), -+ z0 = svmad_x (p0, z1, z0, x0)) -+ -+/* -+** mad_x0_u64_x_untied: -+** mov z0\.d, x0 -+** mla z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_x0_u64_x_untied, svuint64_t, uint64_t, -+ z0 = svmad_n_u64_x (p0, z1, z2, x0), -+ z0 = svmad_x (p0, z1, z2, x0)) -+ -+/* -+** mad_11_u64_x_tied1: -+** mov (z[0-9]+\.d), #11 -+** mad z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_u64_x_tied1, svuint64_t, -+ z0 = svmad_n_u64_x (p0, z0, z1, 11), -+ z0 = svmad_x (p0, z0, z1, 11)) -+ -+/* -+** mad_11_u64_x_tied2: -+** mov (z[0-9]+\.d), #11 -+** mad z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_u64_x_tied2, svuint64_t, -+ z0 = svmad_n_u64_x (p0, z1, z0, 11), -+ z0 = svmad_x (p0, z1, z0, 11)) -+ -+/* -+** mad_11_u64_x_untied: -+** mov z0\.d, #11 -+** mla z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_u64_x_untied, svuint64_t, -+ z0 = svmad_n_u64_x (p0, z1, z2, 11), -+ z0 = svmad_x (p0, z1, z2, 11)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_u8.c -new file mode 100644 -index 000000000..0b4b1b8cf ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mad_u8.c -@@ -0,0 +1,321 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mad_u8_m_tied1: -+** mad z0\.b, p0/m, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mad_u8_m_tied1, svuint8_t, -+ z0 = svmad_u8_m (p0, z0, z1, z2), -+ z0 = svmad_m (p0, z0, z1, z2)) -+ -+/* -+** mad_u8_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** mad z0\.b, p0/m, \1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mad_u8_m_tied2, svuint8_t, -+ z0 = svmad_u8_m (p0, z1, z0, z2), -+ z0 = svmad_m (p0, z1, z0, z2)) -+ -+/* -+** mad_u8_m_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** mad z0\.b, p0/m, z2\.b, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mad_u8_m_tied3, svuint8_t, -+ z0 = svmad_u8_m (p0, z1, z2, z0), -+ z0 = svmad_m (p0, z1, z2, z0)) -+ -+/* -+** mad_u8_m_untied: -+** movprfx z0, z1 -+** mad z0\.b, p0/m, z2\.b, z3\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mad_u8_m_untied, svuint8_t, -+ z0 = svmad_u8_m (p0, z1, z2, z3), -+ z0 = svmad_m (p0, z1, z2, z3)) -+ -+/* -+** mad_w0_u8_m_tied1: -+** mov (z[0-9]+\.b), w0 -+** mad z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_w0_u8_m_tied1, svuint8_t, uint8_t, -+ z0 = svmad_n_u8_m (p0, z0, z1, x0), -+ z0 = svmad_m (p0, z0, z1, x0)) -+ -+/* -+** mad_w0_u8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), w0 -+** movprfx z0, z1 -+** mad z0\.b, p0/m, z2\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_w0_u8_m_untied, svuint8_t, uint8_t, -+ z0 = svmad_n_u8_m (p0, z1, z2, x0), -+ z0 = svmad_m (p0, z1, z2, x0)) -+ -+/* -+** mad_11_u8_m_tied1: -+** mov (z[0-9]+\.b), #11 -+** mad z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_u8_m_tied1, svuint8_t, -+ z0 = svmad_n_u8_m (p0, z0, z1, 11), -+ z0 = svmad_m (p0, z0, z1, 11)) -+ -+/* -+** mad_11_u8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), #11 -+** movprfx z0, z1 -+** mad z0\.b, p0/m, z2\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_u8_m_untied, svuint8_t, -+ z0 = svmad_n_u8_m (p0, z1, z2, 11), -+ z0 = svmad_m (p0, z1, z2, 11)) -+ -+/* -+** mad_u8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** mad z0\.b, p0/m, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mad_u8_z_tied1, svuint8_t, -+ z0 = svmad_u8_z (p0, z0, z1, z2), -+ z0 = svmad_z (p0, z0, z1, z2)) -+ -+/* -+** mad_u8_z_tied2: -+** movprfx z0\.b, p0/z, z0\.b -+** mad z0\.b, p0/m, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mad_u8_z_tied2, svuint8_t, -+ z0 = svmad_u8_z (p0, z1, z0, z2), -+ z0 = svmad_z (p0, z1, z0, z2)) -+ -+/* -+** mad_u8_z_tied3: -+** movprfx z0\.b, p0/z, z0\.b -+** mla z0\.b, p0/m, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mad_u8_z_tied3, svuint8_t, -+ z0 = svmad_u8_z (p0, z1, z2, z0), -+ z0 = svmad_z (p0, z1, z2, z0)) -+ -+/* -+** mad_u8_z_untied: -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** mad z0\.b, p0/m, z2\.b, z3\.b -+** | -+** movprfx z0\.b, p0/z, z2\.b -+** mad z0\.b, p0/m, z1\.b, z3\.b -+** | -+** movprfx z0\.b, p0/z, z3\.b -+** mla z0\.b, p0/m, z1\.b, z2\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mad_u8_z_untied, svuint8_t, -+ z0 = svmad_u8_z (p0, z1, z2, z3), -+ z0 = svmad_z (p0, z1, z2, z3)) -+ -+/* -+** mad_w0_u8_z_tied1: -+** mov (z[0-9]+\.b), w0 -+** movprfx z0\.b, p0/z, z0\.b -+** mad z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_w0_u8_z_tied1, svuint8_t, uint8_t, -+ z0 = svmad_n_u8_z (p0, z0, z1, x0), -+ z0 = svmad_z (p0, z0, z1, x0)) -+ -+/* -+** mad_w0_u8_z_tied2: -+** mov (z[0-9]+\.b), w0 -+** movprfx z0\.b, p0/z, z0\.b -+** mad z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_w0_u8_z_tied2, svuint8_t, uint8_t, -+ z0 = svmad_n_u8_z (p0, z1, z0, x0), -+ z0 = svmad_z (p0, z1, z0, x0)) -+ -+/* -+** mad_w0_u8_z_untied: -+** mov (z[0-9]+\.b), w0 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** mad z0\.b, p0/m, z2\.b, \1 -+** | -+** movprfx z0\.b, p0/z, z2\.b -+** mad z0\.b, p0/m, z1\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** mla z0\.b, p0/m, z1\.b, z2\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_w0_u8_z_untied, svuint8_t, uint8_t, -+ z0 = svmad_n_u8_z (p0, z1, z2, x0), -+ z0 = svmad_z (p0, z1, z2, x0)) -+ -+/* -+** mad_11_u8_z_tied1: -+** mov (z[0-9]+\.b), #11 -+** movprfx z0\.b, p0/z, z0\.b -+** mad z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_u8_z_tied1, svuint8_t, -+ z0 = svmad_n_u8_z (p0, z0, z1, 11), -+ z0 = svmad_z (p0, z0, z1, 11)) -+ -+/* -+** mad_11_u8_z_tied2: -+** mov (z[0-9]+\.b), #11 -+** movprfx z0\.b, p0/z, z0\.b -+** mad z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_u8_z_tied2, svuint8_t, -+ z0 = svmad_n_u8_z (p0, z1, z0, 11), -+ z0 = svmad_z (p0, z1, z0, 11)) -+ -+/* -+** mad_11_u8_z_untied: -+** mov (z[0-9]+\.b), #11 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** mad z0\.b, p0/m, z2\.b, \1 -+** | -+** movprfx z0\.b, p0/z, z2\.b -+** mad z0\.b, p0/m, z1\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** mla z0\.b, p0/m, z1\.b, z2\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_u8_z_untied, svuint8_t, -+ z0 = svmad_n_u8_z (p0, z1, z2, 11), -+ z0 = svmad_z (p0, z1, z2, 11)) -+ -+/* -+** mad_u8_x_tied1: -+** mad z0\.b, p0/m, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mad_u8_x_tied1, svuint8_t, -+ z0 = svmad_u8_x (p0, z0, z1, z2), -+ z0 = svmad_x (p0, z0, z1, z2)) -+ -+/* -+** mad_u8_x_tied2: -+** mad z0\.b, p0/m, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mad_u8_x_tied2, svuint8_t, -+ z0 = svmad_u8_x (p0, z1, z0, z2), -+ z0 = svmad_x (p0, z1, z0, z2)) -+ -+/* -+** mad_u8_x_tied3: -+** mla z0\.b, p0/m, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mad_u8_x_tied3, svuint8_t, -+ z0 = svmad_u8_x (p0, z1, z2, z0), -+ z0 = svmad_x (p0, z1, z2, z0)) -+ -+/* -+** mad_u8_x_untied: -+** ( -+** movprfx z0, z1 -+** mad z0\.b, p0/m, z2\.b, z3\.b -+** | -+** movprfx z0, z2 -+** mad z0\.b, p0/m, z1\.b, z3\.b -+** | -+** movprfx z0, z3 -+** mla z0\.b, p0/m, z1\.b, z2\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mad_u8_x_untied, svuint8_t, -+ z0 = svmad_u8_x (p0, z1, z2, z3), -+ z0 = svmad_x (p0, z1, z2, z3)) -+ -+/* -+** mad_w0_u8_x_tied1: -+** mov (z[0-9]+\.b), w0 -+** mad z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_w0_u8_x_tied1, svuint8_t, uint8_t, -+ z0 = svmad_n_u8_x (p0, z0, z1, x0), -+ z0 = svmad_x (p0, z0, z1, x0)) -+ -+/* -+** mad_w0_u8_x_tied2: -+** mov (z[0-9]+\.b), w0 -+** mad z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_w0_u8_x_tied2, svuint8_t, uint8_t, -+ z0 = svmad_n_u8_x (p0, z1, z0, x0), -+ z0 = svmad_x (p0, z1, z0, x0)) -+ -+/* -+** mad_w0_u8_x_untied: -+** mov z0\.b, w0 -+** mla z0\.b, p0/m, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_ZX (mad_w0_u8_x_untied, svuint8_t, uint8_t, -+ z0 = svmad_n_u8_x (p0, z1, z2, x0), -+ z0 = svmad_x (p0, z1, z2, x0)) -+ -+/* -+** mad_11_u8_x_tied1: -+** mov (z[0-9]+\.b), #11 -+** mad z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_u8_x_tied1, svuint8_t, -+ z0 = svmad_n_u8_x (p0, z0, z1, 11), -+ z0 = svmad_x (p0, z0, z1, 11)) -+ -+/* -+** mad_11_u8_x_tied2: -+** mov (z[0-9]+\.b), #11 -+** mad z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_u8_x_tied2, svuint8_t, -+ z0 = svmad_n_u8_x (p0, z1, z0, 11), -+ z0 = svmad_x (p0, z1, z0, 11)) -+ -+/* -+** mad_11_u8_x_untied: -+** mov z0\.b, #11 -+** mla z0\.b, p0/m, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mad_11_u8_x_untied, svuint8_t, -+ z0 = svmad_n_u8_x (p0, z1, z2, 11), -+ z0 = svmad_x (p0, z1, z2, 11)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_f16.c -new file mode 100644 -index 000000000..f21099a24 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_f16.c -@@ -0,0 +1,425 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** max_f16_m_tied1: -+** fmax z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (max_f16_m_tied1, svfloat16_t, -+ z0 = svmax_f16_m (p0, z0, z1), -+ z0 = svmax_m (p0, z0, z1)) -+ -+/* -+** max_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fmax z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (max_f16_m_tied2, svfloat16_t, -+ z0 = svmax_f16_m (p0, z1, z0), -+ z0 = svmax_m (p0, z1, z0)) -+ -+/* -+** max_f16_m_untied: -+** movprfx z0, z1 -+** fmax z0\.h, p0/m, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (max_f16_m_untied, svfloat16_t, -+ z0 = svmax_f16_m (p0, z1, z2), -+ z0 = svmax_m (p0, z1, z2)) -+ -+/* -+** max_h4_f16_m_tied1: -+** mov (z[0-9]+\.h), h4 -+** fmax z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (max_h4_f16_m_tied1, svfloat16_t, __fp16, -+ z0 = svmax_n_f16_m (p0, z0, d4), -+ z0 = svmax_m (p0, z0, d4)) -+ -+/* -+** max_h4_f16_m_untied: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0, z1 -+** fmax z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (max_h4_f16_m_untied, svfloat16_t, __fp16, -+ z0 = svmax_n_f16_m (p0, z1, d4), -+ z0 = svmax_m (p0, z1, d4)) -+ -+/* -+** max_0_f16_m_tied1: -+** fmax z0\.h, p0/m, z0\.h, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (max_0_f16_m_tied1, svfloat16_t, -+ z0 = svmax_n_f16_m (p0, z0, 0), -+ z0 = svmax_m (p0, z0, 0)) -+ -+/* -+** max_0_f16_m_untied: -+** movprfx z0, z1 -+** fmax z0\.h, p0/m, z0\.h, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (max_0_f16_m_untied, svfloat16_t, -+ z0 = svmax_n_f16_m (p0, z1, 0), -+ z0 = svmax_m (p0, z1, 0)) -+ -+/* -+** max_1_f16_m_tied1: -+** fmax z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_f16_m_tied1, svfloat16_t, -+ z0 = svmax_n_f16_m (p0, z0, 1), -+ z0 = svmax_m (p0, z0, 1)) -+ -+/* -+** max_1_f16_m_untied: -+** movprfx z0, z1 -+** fmax z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_f16_m_untied, svfloat16_t, -+ z0 = svmax_n_f16_m (p0, z1, 1), -+ z0 = svmax_m (p0, z1, 1)) -+ -+/* -+** max_2_f16_m: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** fmax z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_2_f16_m, svfloat16_t, -+ z0 = svmax_n_f16_m (p0, z0, 2), -+ z0 = svmax_m (p0, z0, 2)) -+ -+/* -+** max_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fmax z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (max_f16_z_tied1, svfloat16_t, -+ z0 = svmax_f16_z (p0, z0, z1), -+ z0 = svmax_z (p0, z0, z1)) -+ -+/* -+** max_f16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** fmax z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (max_f16_z_tied2, svfloat16_t, -+ z0 = svmax_f16_z (p0, z1, z0), -+ z0 = svmax_z (p0, z1, z0)) -+ -+/* -+** max_f16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fmax z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** fmax z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (max_f16_z_untied, svfloat16_t, -+ z0 = svmax_f16_z (p0, z1, z2), -+ z0 = svmax_z (p0, z1, z2)) -+ -+/* -+** max_h4_f16_z_tied1: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0\.h, p0/z, z0\.h -+** fmax z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (max_h4_f16_z_tied1, svfloat16_t, __fp16, -+ z0 = svmax_n_f16_z (p0, z0, d4), -+ z0 = svmax_z (p0, z0, d4)) -+ -+/* -+** max_h4_f16_z_untied: -+** mov (z[0-9]+\.h), h4 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fmax z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** fmax z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (max_h4_f16_z_untied, svfloat16_t, __fp16, -+ z0 = svmax_n_f16_z (p0, z1, d4), -+ z0 = svmax_z (p0, z1, d4)) -+ -+/* -+** max_0_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fmax z0\.h, p0/m, z0\.h, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (max_0_f16_z_tied1, svfloat16_t, -+ z0 = svmax_n_f16_z (p0, z0, 0), -+ z0 = svmax_z (p0, z0, 0)) -+ -+/* -+** max_0_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** fmax z0\.h, p0/m, z0\.h, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (max_0_f16_z_untied, svfloat16_t, -+ z0 = svmax_n_f16_z (p0, z1, 0), -+ z0 = svmax_z (p0, z1, 0)) -+ -+/* -+** max_1_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fmax z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_f16_z_tied1, svfloat16_t, -+ z0 = svmax_n_f16_z (p0, z0, 1), -+ z0 = svmax_z (p0, z0, 1)) -+ -+/* -+** max_1_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** fmax z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_f16_z_untied, svfloat16_t, -+ z0 = svmax_n_f16_z (p0, z1, 1), -+ z0 = svmax_z (p0, z1, 1)) -+ -+/* -+** max_2_f16_z: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** movprfx z0\.h, p0/z, z0\.h -+** fmax z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_2_f16_z, svfloat16_t, -+ z0 = svmax_n_f16_z (p0, z0, 2), -+ z0 = svmax_z (p0, z0, 2)) -+ -+/* -+** max_f16_x_tied1: -+** fmax z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (max_f16_x_tied1, svfloat16_t, -+ z0 = svmax_f16_x (p0, z0, z1), -+ z0 = svmax_x (p0, z0, z1)) -+ -+/* -+** max_f16_x_tied2: -+** fmax z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (max_f16_x_tied2, svfloat16_t, -+ z0 = svmax_f16_x (p0, z1, z0), -+ z0 = svmax_x (p0, z1, z0)) -+ -+/* -+** max_f16_x_untied: -+** ( -+** movprfx z0, z1 -+** fmax z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0, z2 -+** fmax z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (max_f16_x_untied, svfloat16_t, -+ z0 = svmax_f16_x (p0, z1, z2), -+ z0 = svmax_x (p0, z1, z2)) -+ -+/* -+** max_h4_f16_x_tied1: -+** mov (z[0-9]+\.h), h4 -+** fmax z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (max_h4_f16_x_tied1, svfloat16_t, __fp16, -+ z0 = svmax_n_f16_x (p0, z0, d4), -+ z0 = svmax_x (p0, z0, d4)) -+ -+/* -+** max_h4_f16_x_untied: -+** mov z0\.h, h4 -+** fmax z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZD (max_h4_f16_x_untied, svfloat16_t, __fp16, -+ z0 = svmax_n_f16_x (p0, z1, d4), -+ z0 = svmax_x (p0, z1, d4)) -+ -+/* -+** max_0_f16_x_tied1: -+** fmax z0\.h, p0/m, z0\.h, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (max_0_f16_x_tied1, svfloat16_t, -+ z0 = svmax_n_f16_x (p0, z0, 0), -+ z0 = svmax_x (p0, z0, 0)) -+ -+/* -+** max_0_f16_x_untied: -+** movprfx z0, z1 -+** fmax z0\.h, p0/m, z0\.h, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (max_0_f16_x_untied, svfloat16_t, -+ z0 = svmax_n_f16_x (p0, z1, 0), -+ z0 = svmax_x (p0, z1, 0)) -+ -+/* -+** max_1_f16_x_tied1: -+** fmax z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_f16_x_tied1, svfloat16_t, -+ z0 = svmax_n_f16_x (p0, z0, 1), -+ z0 = svmax_x (p0, z0, 1)) -+ -+/* -+** max_1_f16_x_untied: -+** movprfx z0, z1 -+** fmax z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_f16_x_untied, svfloat16_t, -+ z0 = svmax_n_f16_x (p0, z1, 1), -+ z0 = svmax_x (p0, z1, 1)) -+ -+/* -+** max_2_f16_x_tied1: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** fmax z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_2_f16_x_tied1, svfloat16_t, -+ z0 = svmax_n_f16_x (p0, z0, 2), -+ z0 = svmax_x (p0, z0, 2)) -+ -+/* -+** max_2_f16_x_untied: -+** fmov z0\.h, #2\.0(?:e\+0)? -+** fmax z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (max_2_f16_x_untied, svfloat16_t, -+ z0 = svmax_n_f16_x (p0, z1, 2), -+ z0 = svmax_x (p0, z1, 2)) -+ -+/* -+** ptrue_max_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_max_f16_x_tied1, svfloat16_t, -+ z0 = svmax_f16_x (svptrue_b16 (), z0, z1), -+ z0 = svmax_x (svptrue_b16 (), z0, z1)) -+ -+/* -+** ptrue_max_f16_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_max_f16_x_tied2, svfloat16_t, -+ z0 = svmax_f16_x (svptrue_b16 (), z1, z0), -+ z0 = svmax_x (svptrue_b16 (), z1, z0)) -+ -+/* -+** ptrue_max_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_max_f16_x_untied, svfloat16_t, -+ z0 = svmax_f16_x (svptrue_b16 (), z1, z2), -+ z0 = svmax_x (svptrue_b16 (), z1, z2)) -+ -+/* -+** ptrue_max_0_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_max_0_f16_x_tied1, svfloat16_t, -+ z0 = svmax_n_f16_x (svptrue_b16 (), z0, 0), -+ z0 = svmax_x (svptrue_b16 (), z0, 0)) -+ -+/* -+** ptrue_max_0_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_max_0_f16_x_untied, svfloat16_t, -+ z0 = svmax_n_f16_x (svptrue_b16 (), z1, 0), -+ z0 = svmax_x (svptrue_b16 (), z1, 0)) -+ -+/* -+** ptrue_max_1_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_max_1_f16_x_tied1, svfloat16_t, -+ z0 = svmax_n_f16_x (svptrue_b16 (), z0, 1), -+ z0 = svmax_x (svptrue_b16 (), z0, 1)) -+ -+/* -+** ptrue_max_1_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_max_1_f16_x_untied, svfloat16_t, -+ z0 = svmax_n_f16_x (svptrue_b16 (), z1, 1), -+ z0 = svmax_x (svptrue_b16 (), z1, 1)) -+ -+/* -+** ptrue_max_2_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_max_2_f16_x_tied1, svfloat16_t, -+ z0 = svmax_n_f16_x (svptrue_b16 (), z0, 2), -+ z0 = svmax_x (svptrue_b16 (), z0, 2)) -+ -+/* -+** ptrue_max_2_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_max_2_f16_x_untied, svfloat16_t, -+ z0 = svmax_n_f16_x (svptrue_b16 (), z1, 2), -+ z0 = svmax_x (svptrue_b16 (), z1, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_f32.c -new file mode 100644 -index 000000000..6f5c92c9f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_f32.c -@@ -0,0 +1,425 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** max_f32_m_tied1: -+** fmax z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (max_f32_m_tied1, svfloat32_t, -+ z0 = svmax_f32_m (p0, z0, z1), -+ z0 = svmax_m (p0, z0, z1)) -+ -+/* -+** max_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fmax z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (max_f32_m_tied2, svfloat32_t, -+ z0 = svmax_f32_m (p0, z1, z0), -+ z0 = svmax_m (p0, z1, z0)) -+ -+/* -+** max_f32_m_untied: -+** movprfx z0, z1 -+** fmax z0\.s, p0/m, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (max_f32_m_untied, svfloat32_t, -+ z0 = svmax_f32_m (p0, z1, z2), -+ z0 = svmax_m (p0, z1, z2)) -+ -+/* -+** max_s4_f32_m_tied1: -+** mov (z[0-9]+\.s), s4 -+** fmax z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (max_s4_f32_m_tied1, svfloat32_t, float, -+ z0 = svmax_n_f32_m (p0, z0, d4), -+ z0 = svmax_m (p0, z0, d4)) -+ -+/* -+** max_s4_f32_m_untied: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0, z1 -+** fmax z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (max_s4_f32_m_untied, svfloat32_t, float, -+ z0 = svmax_n_f32_m (p0, z1, d4), -+ z0 = svmax_m (p0, z1, d4)) -+ -+/* -+** max_0_f32_m_tied1: -+** fmax z0\.s, p0/m, z0\.s, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (max_0_f32_m_tied1, svfloat32_t, -+ z0 = svmax_n_f32_m (p0, z0, 0), -+ z0 = svmax_m (p0, z0, 0)) -+ -+/* -+** max_0_f32_m_untied: -+** movprfx z0, z1 -+** fmax z0\.s, p0/m, z0\.s, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (max_0_f32_m_untied, svfloat32_t, -+ z0 = svmax_n_f32_m (p0, z1, 0), -+ z0 = svmax_m (p0, z1, 0)) -+ -+/* -+** max_1_f32_m_tied1: -+** fmax z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_f32_m_tied1, svfloat32_t, -+ z0 = svmax_n_f32_m (p0, z0, 1), -+ z0 = svmax_m (p0, z0, 1)) -+ -+/* -+** max_1_f32_m_untied: -+** movprfx z0, z1 -+** fmax z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_f32_m_untied, svfloat32_t, -+ z0 = svmax_n_f32_m (p0, z1, 1), -+ z0 = svmax_m (p0, z1, 1)) -+ -+/* -+** max_2_f32_m: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** fmax z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_2_f32_m, svfloat32_t, -+ z0 = svmax_n_f32_m (p0, z0, 2), -+ z0 = svmax_m (p0, z0, 2)) -+ -+/* -+** max_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fmax z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (max_f32_z_tied1, svfloat32_t, -+ z0 = svmax_f32_z (p0, z0, z1), -+ z0 = svmax_z (p0, z0, z1)) -+ -+/* -+** max_f32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** fmax z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (max_f32_z_tied2, svfloat32_t, -+ z0 = svmax_f32_z (p0, z1, z0), -+ z0 = svmax_z (p0, z1, z0)) -+ -+/* -+** max_f32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fmax z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** fmax z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (max_f32_z_untied, svfloat32_t, -+ z0 = svmax_f32_z (p0, z1, z2), -+ z0 = svmax_z (p0, z1, z2)) -+ -+/* -+** max_s4_f32_z_tied1: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0\.s, p0/z, z0\.s -+** fmax z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (max_s4_f32_z_tied1, svfloat32_t, float, -+ z0 = svmax_n_f32_z (p0, z0, d4), -+ z0 = svmax_z (p0, z0, d4)) -+ -+/* -+** max_s4_f32_z_untied: -+** mov (z[0-9]+\.s), s4 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fmax z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** fmax z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (max_s4_f32_z_untied, svfloat32_t, float, -+ z0 = svmax_n_f32_z (p0, z1, d4), -+ z0 = svmax_z (p0, z1, d4)) -+ -+/* -+** max_0_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fmax z0\.s, p0/m, z0\.s, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (max_0_f32_z_tied1, svfloat32_t, -+ z0 = svmax_n_f32_z (p0, z0, 0), -+ z0 = svmax_z (p0, z0, 0)) -+ -+/* -+** max_0_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** fmax z0\.s, p0/m, z0\.s, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (max_0_f32_z_untied, svfloat32_t, -+ z0 = svmax_n_f32_z (p0, z1, 0), -+ z0 = svmax_z (p0, z1, 0)) -+ -+/* -+** max_1_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fmax z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_f32_z_tied1, svfloat32_t, -+ z0 = svmax_n_f32_z (p0, z0, 1), -+ z0 = svmax_z (p0, z0, 1)) -+ -+/* -+** max_1_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** fmax z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_f32_z_untied, svfloat32_t, -+ z0 = svmax_n_f32_z (p0, z1, 1), -+ z0 = svmax_z (p0, z1, 1)) -+ -+/* -+** max_2_f32_z: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** movprfx z0\.s, p0/z, z0\.s -+** fmax z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_2_f32_z, svfloat32_t, -+ z0 = svmax_n_f32_z (p0, z0, 2), -+ z0 = svmax_z (p0, z0, 2)) -+ -+/* -+** max_f32_x_tied1: -+** fmax z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (max_f32_x_tied1, svfloat32_t, -+ z0 = svmax_f32_x (p0, z0, z1), -+ z0 = svmax_x (p0, z0, z1)) -+ -+/* -+** max_f32_x_tied2: -+** fmax z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (max_f32_x_tied2, svfloat32_t, -+ z0 = svmax_f32_x (p0, z1, z0), -+ z0 = svmax_x (p0, z1, z0)) -+ -+/* -+** max_f32_x_untied: -+** ( -+** movprfx z0, z1 -+** fmax z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0, z2 -+** fmax z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (max_f32_x_untied, svfloat32_t, -+ z0 = svmax_f32_x (p0, z1, z2), -+ z0 = svmax_x (p0, z1, z2)) -+ -+/* -+** max_s4_f32_x_tied1: -+** mov (z[0-9]+\.s), s4 -+** fmax z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (max_s4_f32_x_tied1, svfloat32_t, float, -+ z0 = svmax_n_f32_x (p0, z0, d4), -+ z0 = svmax_x (p0, z0, d4)) -+ -+/* -+** max_s4_f32_x_untied: -+** mov z0\.s, s4 -+** fmax z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZD (max_s4_f32_x_untied, svfloat32_t, float, -+ z0 = svmax_n_f32_x (p0, z1, d4), -+ z0 = svmax_x (p0, z1, d4)) -+ -+/* -+** max_0_f32_x_tied1: -+** fmax z0\.s, p0/m, z0\.s, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (max_0_f32_x_tied1, svfloat32_t, -+ z0 = svmax_n_f32_x (p0, z0, 0), -+ z0 = svmax_x (p0, z0, 0)) -+ -+/* -+** max_0_f32_x_untied: -+** movprfx z0, z1 -+** fmax z0\.s, p0/m, z0\.s, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (max_0_f32_x_untied, svfloat32_t, -+ z0 = svmax_n_f32_x (p0, z1, 0), -+ z0 = svmax_x (p0, z1, 0)) -+ -+/* -+** max_1_f32_x_tied1: -+** fmax z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_f32_x_tied1, svfloat32_t, -+ z0 = svmax_n_f32_x (p0, z0, 1), -+ z0 = svmax_x (p0, z0, 1)) -+ -+/* -+** max_1_f32_x_untied: -+** movprfx z0, z1 -+** fmax z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_f32_x_untied, svfloat32_t, -+ z0 = svmax_n_f32_x (p0, z1, 1), -+ z0 = svmax_x (p0, z1, 1)) -+ -+/* -+** max_2_f32_x_tied1: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** fmax z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_2_f32_x_tied1, svfloat32_t, -+ z0 = svmax_n_f32_x (p0, z0, 2), -+ z0 = svmax_x (p0, z0, 2)) -+ -+/* -+** max_2_f32_x_untied: -+** fmov z0\.s, #2\.0(?:e\+0)? -+** fmax z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (max_2_f32_x_untied, svfloat32_t, -+ z0 = svmax_n_f32_x (p0, z1, 2), -+ z0 = svmax_x (p0, z1, 2)) -+ -+/* -+** ptrue_max_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_max_f32_x_tied1, svfloat32_t, -+ z0 = svmax_f32_x (svptrue_b32 (), z0, z1), -+ z0 = svmax_x (svptrue_b32 (), z0, z1)) -+ -+/* -+** ptrue_max_f32_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_max_f32_x_tied2, svfloat32_t, -+ z0 = svmax_f32_x (svptrue_b32 (), z1, z0), -+ z0 = svmax_x (svptrue_b32 (), z1, z0)) -+ -+/* -+** ptrue_max_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_max_f32_x_untied, svfloat32_t, -+ z0 = svmax_f32_x (svptrue_b32 (), z1, z2), -+ z0 = svmax_x (svptrue_b32 (), z1, z2)) -+ -+/* -+** ptrue_max_0_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_max_0_f32_x_tied1, svfloat32_t, -+ z0 = svmax_n_f32_x (svptrue_b32 (), z0, 0), -+ z0 = svmax_x (svptrue_b32 (), z0, 0)) -+ -+/* -+** ptrue_max_0_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_max_0_f32_x_untied, svfloat32_t, -+ z0 = svmax_n_f32_x (svptrue_b32 (), z1, 0), -+ z0 = svmax_x (svptrue_b32 (), z1, 0)) -+ -+/* -+** ptrue_max_1_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_max_1_f32_x_tied1, svfloat32_t, -+ z0 = svmax_n_f32_x (svptrue_b32 (), z0, 1), -+ z0 = svmax_x (svptrue_b32 (), z0, 1)) -+ -+/* -+** ptrue_max_1_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_max_1_f32_x_untied, svfloat32_t, -+ z0 = svmax_n_f32_x (svptrue_b32 (), z1, 1), -+ z0 = svmax_x (svptrue_b32 (), z1, 1)) -+ -+/* -+** ptrue_max_2_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_max_2_f32_x_tied1, svfloat32_t, -+ z0 = svmax_n_f32_x (svptrue_b32 (), z0, 2), -+ z0 = svmax_x (svptrue_b32 (), z0, 2)) -+ -+/* -+** ptrue_max_2_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_max_2_f32_x_untied, svfloat32_t, -+ z0 = svmax_n_f32_x (svptrue_b32 (), z1, 2), -+ z0 = svmax_x (svptrue_b32 (), z1, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_f64.c -new file mode 100644 -index 000000000..8ac6cca75 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_f64.c -@@ -0,0 +1,425 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** max_f64_m_tied1: -+** fmax z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (max_f64_m_tied1, svfloat64_t, -+ z0 = svmax_f64_m (p0, z0, z1), -+ z0 = svmax_m (p0, z0, z1)) -+ -+/* -+** max_f64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fmax z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_f64_m_tied2, svfloat64_t, -+ z0 = svmax_f64_m (p0, z1, z0), -+ z0 = svmax_m (p0, z1, z0)) -+ -+/* -+** max_f64_m_untied: -+** movprfx z0, z1 -+** fmax z0\.d, p0/m, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (max_f64_m_untied, svfloat64_t, -+ z0 = svmax_f64_m (p0, z1, z2), -+ z0 = svmax_m (p0, z1, z2)) -+ -+/* -+** max_d4_f64_m_tied1: -+** mov (z[0-9]+\.d), d4 -+** fmax z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (max_d4_f64_m_tied1, svfloat64_t, double, -+ z0 = svmax_n_f64_m (p0, z0, d4), -+ z0 = svmax_m (p0, z0, d4)) -+ -+/* -+** max_d4_f64_m_untied: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0, z1 -+** fmax z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (max_d4_f64_m_untied, svfloat64_t, double, -+ z0 = svmax_n_f64_m (p0, z1, d4), -+ z0 = svmax_m (p0, z1, d4)) -+ -+/* -+** max_0_f64_m_tied1: -+** fmax z0\.d, p0/m, z0\.d, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (max_0_f64_m_tied1, svfloat64_t, -+ z0 = svmax_n_f64_m (p0, z0, 0), -+ z0 = svmax_m (p0, z0, 0)) -+ -+/* -+** max_0_f64_m_untied: -+** movprfx z0, z1 -+** fmax z0\.d, p0/m, z0\.d, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (max_0_f64_m_untied, svfloat64_t, -+ z0 = svmax_n_f64_m (p0, z1, 0), -+ z0 = svmax_m (p0, z1, 0)) -+ -+/* -+** max_1_f64_m_tied1: -+** fmax z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_f64_m_tied1, svfloat64_t, -+ z0 = svmax_n_f64_m (p0, z0, 1), -+ z0 = svmax_m (p0, z0, 1)) -+ -+/* -+** max_1_f64_m_untied: -+** movprfx z0, z1 -+** fmax z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_f64_m_untied, svfloat64_t, -+ z0 = svmax_n_f64_m (p0, z1, 1), -+ z0 = svmax_m (p0, z1, 1)) -+ -+/* -+** max_2_f64_m: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** fmax z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_2_f64_m, svfloat64_t, -+ z0 = svmax_n_f64_m (p0, z0, 2), -+ z0 = svmax_m (p0, z0, 2)) -+ -+/* -+** max_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fmax z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (max_f64_z_tied1, svfloat64_t, -+ z0 = svmax_f64_z (p0, z0, z1), -+ z0 = svmax_z (p0, z0, z1)) -+ -+/* -+** max_f64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** fmax z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (max_f64_z_tied2, svfloat64_t, -+ z0 = svmax_f64_z (p0, z1, z0), -+ z0 = svmax_z (p0, z1, z0)) -+ -+/* -+** max_f64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fmax z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** fmax z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (max_f64_z_untied, svfloat64_t, -+ z0 = svmax_f64_z (p0, z1, z2), -+ z0 = svmax_z (p0, z1, z2)) -+ -+/* -+** max_d4_f64_z_tied1: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0\.d, p0/z, z0\.d -+** fmax z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (max_d4_f64_z_tied1, svfloat64_t, double, -+ z0 = svmax_n_f64_z (p0, z0, d4), -+ z0 = svmax_z (p0, z0, d4)) -+ -+/* -+** max_d4_f64_z_untied: -+** mov (z[0-9]+\.d), d4 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fmax z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** fmax z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (max_d4_f64_z_untied, svfloat64_t, double, -+ z0 = svmax_n_f64_z (p0, z1, d4), -+ z0 = svmax_z (p0, z1, d4)) -+ -+/* -+** max_0_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fmax z0\.d, p0/m, z0\.d, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (max_0_f64_z_tied1, svfloat64_t, -+ z0 = svmax_n_f64_z (p0, z0, 0), -+ z0 = svmax_z (p0, z0, 0)) -+ -+/* -+** max_0_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** fmax z0\.d, p0/m, z0\.d, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (max_0_f64_z_untied, svfloat64_t, -+ z0 = svmax_n_f64_z (p0, z1, 0), -+ z0 = svmax_z (p0, z1, 0)) -+ -+/* -+** max_1_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fmax z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_f64_z_tied1, svfloat64_t, -+ z0 = svmax_n_f64_z (p0, z0, 1), -+ z0 = svmax_z (p0, z0, 1)) -+ -+/* -+** max_1_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** fmax z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_f64_z_untied, svfloat64_t, -+ z0 = svmax_n_f64_z (p0, z1, 1), -+ z0 = svmax_z (p0, z1, 1)) -+ -+/* -+** max_2_f64_z: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** movprfx z0\.d, p0/z, z0\.d -+** fmax z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_2_f64_z, svfloat64_t, -+ z0 = svmax_n_f64_z (p0, z0, 2), -+ z0 = svmax_z (p0, z0, 2)) -+ -+/* -+** max_f64_x_tied1: -+** fmax z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (max_f64_x_tied1, svfloat64_t, -+ z0 = svmax_f64_x (p0, z0, z1), -+ z0 = svmax_x (p0, z0, z1)) -+ -+/* -+** max_f64_x_tied2: -+** fmax z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (max_f64_x_tied2, svfloat64_t, -+ z0 = svmax_f64_x (p0, z1, z0), -+ z0 = svmax_x (p0, z1, z0)) -+ -+/* -+** max_f64_x_untied: -+** ( -+** movprfx z0, z1 -+** fmax z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0, z2 -+** fmax z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (max_f64_x_untied, svfloat64_t, -+ z0 = svmax_f64_x (p0, z1, z2), -+ z0 = svmax_x (p0, z1, z2)) -+ -+/* -+** max_d4_f64_x_tied1: -+** mov (z[0-9]+\.d), d4 -+** fmax z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (max_d4_f64_x_tied1, svfloat64_t, double, -+ z0 = svmax_n_f64_x (p0, z0, d4), -+ z0 = svmax_x (p0, z0, d4)) -+ -+/* -+** max_d4_f64_x_untied: -+** mov z0\.d, d4 -+** fmax z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZD (max_d4_f64_x_untied, svfloat64_t, double, -+ z0 = svmax_n_f64_x (p0, z1, d4), -+ z0 = svmax_x (p0, z1, d4)) -+ -+/* -+** max_0_f64_x_tied1: -+** fmax z0\.d, p0/m, z0\.d, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (max_0_f64_x_tied1, svfloat64_t, -+ z0 = svmax_n_f64_x (p0, z0, 0), -+ z0 = svmax_x (p0, z0, 0)) -+ -+/* -+** max_0_f64_x_untied: -+** movprfx z0, z1 -+** fmax z0\.d, p0/m, z0\.d, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (max_0_f64_x_untied, svfloat64_t, -+ z0 = svmax_n_f64_x (p0, z1, 0), -+ z0 = svmax_x (p0, z1, 0)) -+ -+/* -+** max_1_f64_x_tied1: -+** fmax z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_f64_x_tied1, svfloat64_t, -+ z0 = svmax_n_f64_x (p0, z0, 1), -+ z0 = svmax_x (p0, z0, 1)) -+ -+/* -+** max_1_f64_x_untied: -+** movprfx z0, z1 -+** fmax z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_f64_x_untied, svfloat64_t, -+ z0 = svmax_n_f64_x (p0, z1, 1), -+ z0 = svmax_x (p0, z1, 1)) -+ -+/* -+** max_2_f64_x_tied1: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** fmax z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_2_f64_x_tied1, svfloat64_t, -+ z0 = svmax_n_f64_x (p0, z0, 2), -+ z0 = svmax_x (p0, z0, 2)) -+ -+/* -+** max_2_f64_x_untied: -+** fmov z0\.d, #2\.0(?:e\+0)? -+** fmax z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (max_2_f64_x_untied, svfloat64_t, -+ z0 = svmax_n_f64_x (p0, z1, 2), -+ z0 = svmax_x (p0, z1, 2)) -+ -+/* -+** ptrue_max_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_max_f64_x_tied1, svfloat64_t, -+ z0 = svmax_f64_x (svptrue_b64 (), z0, z1), -+ z0 = svmax_x (svptrue_b64 (), z0, z1)) -+ -+/* -+** ptrue_max_f64_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_max_f64_x_tied2, svfloat64_t, -+ z0 = svmax_f64_x (svptrue_b64 (), z1, z0), -+ z0 = svmax_x (svptrue_b64 (), z1, z0)) -+ -+/* -+** ptrue_max_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_max_f64_x_untied, svfloat64_t, -+ z0 = svmax_f64_x (svptrue_b64 (), z1, z2), -+ z0 = svmax_x (svptrue_b64 (), z1, z2)) -+ -+/* -+** ptrue_max_0_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_max_0_f64_x_tied1, svfloat64_t, -+ z0 = svmax_n_f64_x (svptrue_b64 (), z0, 0), -+ z0 = svmax_x (svptrue_b64 (), z0, 0)) -+ -+/* -+** ptrue_max_0_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_max_0_f64_x_untied, svfloat64_t, -+ z0 = svmax_n_f64_x (svptrue_b64 (), z1, 0), -+ z0 = svmax_x (svptrue_b64 (), z1, 0)) -+ -+/* -+** ptrue_max_1_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_max_1_f64_x_tied1, svfloat64_t, -+ z0 = svmax_n_f64_x (svptrue_b64 (), z0, 1), -+ z0 = svmax_x (svptrue_b64 (), z0, 1)) -+ -+/* -+** ptrue_max_1_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_max_1_f64_x_untied, svfloat64_t, -+ z0 = svmax_n_f64_x (svptrue_b64 (), z1, 1), -+ z0 = svmax_x (svptrue_b64 (), z1, 1)) -+ -+/* -+** ptrue_max_2_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_max_2_f64_x_tied1, svfloat64_t, -+ z0 = svmax_n_f64_x (svptrue_b64 (), z0, 2), -+ z0 = svmax_x (svptrue_b64 (), z0, 2)) -+ -+/* -+** ptrue_max_2_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_max_2_f64_x_untied, svfloat64_t, -+ z0 = svmax_n_f64_x (svptrue_b64 (), z1, 2), -+ z0 = svmax_x (svptrue_b64 (), z1, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_s16.c -new file mode 100644 -index 000000000..6a2167522 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_s16.c -@@ -0,0 +1,293 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** max_s16_m_tied1: -+** smax z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (max_s16_m_tied1, svint16_t, -+ z0 = svmax_s16_m (p0, z0, z1), -+ z0 = svmax_m (p0, z0, z1)) -+ -+/* -+** max_s16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** smax z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (max_s16_m_tied2, svint16_t, -+ z0 = svmax_s16_m (p0, z1, z0), -+ z0 = svmax_m (p0, z1, z0)) -+ -+/* -+** max_s16_m_untied: -+** movprfx z0, z1 -+** smax z0\.h, p0/m, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (max_s16_m_untied, svint16_t, -+ z0 = svmax_s16_m (p0, z1, z2), -+ z0 = svmax_m (p0, z1, z2)) -+ -+/* -+** max_w0_s16_m_tied1: -+** mov (z[0-9]+\.h), w0 -+** smax z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (max_w0_s16_m_tied1, svint16_t, int16_t, -+ z0 = svmax_n_s16_m (p0, z0, x0), -+ z0 = svmax_m (p0, z0, x0)) -+ -+/* -+** max_w0_s16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), w0 -+** movprfx z0, z1 -+** smax z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (max_w0_s16_m_untied, svint16_t, int16_t, -+ z0 = svmax_n_s16_m (p0, z1, x0), -+ z0 = svmax_m (p0, z1, x0)) -+ -+/* -+** max_1_s16_m_tied1: -+** mov (z[0-9]+\.h), #1 -+** smax z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_s16_m_tied1, svint16_t, -+ z0 = svmax_n_s16_m (p0, z0, 1), -+ z0 = svmax_m (p0, z0, 1)) -+ -+/* -+** max_1_s16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), #1 -+** movprfx z0, z1 -+** smax z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_s16_m_untied, svint16_t, -+ z0 = svmax_n_s16_m (p0, z1, 1), -+ z0 = svmax_m (p0, z1, 1)) -+ -+/* -+** max_m1_s16_m: -+** mov (z[0-9]+)\.b, #-1 -+** smax z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (max_m1_s16_m, svint16_t, -+ z0 = svmax_n_s16_m (p0, z0, -1), -+ z0 = svmax_m (p0, z0, -1)) -+ -+/* -+** max_s16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** smax z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (max_s16_z_tied1, svint16_t, -+ z0 = svmax_s16_z (p0, z0, z1), -+ z0 = svmax_z (p0, z0, z1)) -+ -+/* -+** max_s16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** smax z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (max_s16_z_tied2, svint16_t, -+ z0 = svmax_s16_z (p0, z1, z0), -+ z0 = svmax_z (p0, z1, z0)) -+ -+/* -+** max_s16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** smax z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** smax z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (max_s16_z_untied, svint16_t, -+ z0 = svmax_s16_z (p0, z1, z2), -+ z0 = svmax_z (p0, z1, z2)) -+ -+/* -+** max_w0_s16_z_tied1: -+** mov (z[0-9]+\.h), w0 -+** movprfx z0\.h, p0/z, z0\.h -+** smax z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (max_w0_s16_z_tied1, svint16_t, int16_t, -+ z0 = svmax_n_s16_z (p0, z0, x0), -+ z0 = svmax_z (p0, z0, x0)) -+ -+/* -+** max_w0_s16_z_untied: -+** mov (z[0-9]+\.h), w0 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** smax z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** smax z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (max_w0_s16_z_untied, svint16_t, int16_t, -+ z0 = svmax_n_s16_z (p0, z1, x0), -+ z0 = svmax_z (p0, z1, x0)) -+ -+/* -+** max_1_s16_z_tied1: -+** mov (z[0-9]+\.h), #1 -+** movprfx z0\.h, p0/z, z0\.h -+** smax z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_s16_z_tied1, svint16_t, -+ z0 = svmax_n_s16_z (p0, z0, 1), -+ z0 = svmax_z (p0, z0, 1)) -+ -+/* -+** max_1_s16_z_untied: -+** mov (z[0-9]+\.h), #1 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** smax z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** smax z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_s16_z_untied, svint16_t, -+ z0 = svmax_n_s16_z (p0, z1, 1), -+ z0 = svmax_z (p0, z1, 1)) -+ -+/* -+** max_s16_x_tied1: -+** smax z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (max_s16_x_tied1, svint16_t, -+ z0 = svmax_s16_x (p0, z0, z1), -+ z0 = svmax_x (p0, z0, z1)) -+ -+/* -+** max_s16_x_tied2: -+** smax z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (max_s16_x_tied2, svint16_t, -+ z0 = svmax_s16_x (p0, z1, z0), -+ z0 = svmax_x (p0, z1, z0)) -+ -+/* -+** max_s16_x_untied: -+** ( -+** movprfx z0, z1 -+** smax z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0, z2 -+** smax z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (max_s16_x_untied, svint16_t, -+ z0 = svmax_s16_x (p0, z1, z2), -+ z0 = svmax_x (p0, z1, z2)) -+ -+/* -+** max_w0_s16_x_tied1: -+** mov (z[0-9]+\.h), w0 -+** smax z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (max_w0_s16_x_tied1, svint16_t, int16_t, -+ z0 = svmax_n_s16_x (p0, z0, x0), -+ z0 = svmax_x (p0, z0, x0)) -+ -+/* -+** max_w0_s16_x_untied: -+** mov z0\.h, w0 -+** smax z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZX (max_w0_s16_x_untied, svint16_t, int16_t, -+ z0 = svmax_n_s16_x (p0, z1, x0), -+ z0 = svmax_x (p0, z1, x0)) -+ -+/* -+** max_1_s16_x_tied1: -+** smax z0\.h, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_s16_x_tied1, svint16_t, -+ z0 = svmax_n_s16_x (p0, z0, 1), -+ z0 = svmax_x (p0, z0, 1)) -+ -+/* -+** max_1_s16_x_untied: -+** movprfx z0, z1 -+** smax z0\.h, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_s16_x_untied, svint16_t, -+ z0 = svmax_n_s16_x (p0, z1, 1), -+ z0 = svmax_x (p0, z1, 1)) -+ -+/* -+** max_127_s16_x: -+** smax z0\.h, z0\.h, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (max_127_s16_x, svint16_t, -+ z0 = svmax_n_s16_x (p0, z0, 127), -+ z0 = svmax_x (p0, z0, 127)) -+ -+/* -+** max_128_s16_x: -+** mov (z[0-9]+\.h), #128 -+** smax z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_128_s16_x, svint16_t, -+ z0 = svmax_n_s16_x (p0, z0, 128), -+ z0 = svmax_x (p0, z0, 128)) -+ -+/* -+** max_m1_s16_x: -+** smax z0\.h, z0\.h, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_m1_s16_x, svint16_t, -+ z0 = svmax_n_s16_x (p0, z0, -1), -+ z0 = svmax_x (p0, z0, -1)) -+ -+/* -+** max_m128_s16_x: -+** smax z0\.h, z0\.h, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (max_m128_s16_x, svint16_t, -+ z0 = svmax_n_s16_x (p0, z0, -128), -+ z0 = svmax_x (p0, z0, -128)) -+ -+/* -+** max_m129_s16_x: -+** mov (z[0-9]+\.h), #-129 -+** smax z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_m129_s16_x, svint16_t, -+ z0 = svmax_n_s16_x (p0, z0, -129), -+ z0 = svmax_x (p0, z0, -129)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_s32.c -new file mode 100644 -index 000000000..07402c7a9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_s32.c -@@ -0,0 +1,293 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** max_s32_m_tied1: -+** smax z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (max_s32_m_tied1, svint32_t, -+ z0 = svmax_s32_m (p0, z0, z1), -+ z0 = svmax_m (p0, z0, z1)) -+ -+/* -+** max_s32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** smax z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (max_s32_m_tied2, svint32_t, -+ z0 = svmax_s32_m (p0, z1, z0), -+ z0 = svmax_m (p0, z1, z0)) -+ -+/* -+** max_s32_m_untied: -+** movprfx z0, z1 -+** smax z0\.s, p0/m, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (max_s32_m_untied, svint32_t, -+ z0 = svmax_s32_m (p0, z1, z2), -+ z0 = svmax_m (p0, z1, z2)) -+ -+/* -+** max_w0_s32_m_tied1: -+** mov (z[0-9]+\.s), w0 -+** smax z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (max_w0_s32_m_tied1, svint32_t, int32_t, -+ z0 = svmax_n_s32_m (p0, z0, x0), -+ z0 = svmax_m (p0, z0, x0)) -+ -+/* -+** max_w0_s32_m_untied: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0, z1 -+** smax z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (max_w0_s32_m_untied, svint32_t, int32_t, -+ z0 = svmax_n_s32_m (p0, z1, x0), -+ z0 = svmax_m (p0, z1, x0)) -+ -+/* -+** max_1_s32_m_tied1: -+** mov (z[0-9]+\.s), #1 -+** smax z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_s32_m_tied1, svint32_t, -+ z0 = svmax_n_s32_m (p0, z0, 1), -+ z0 = svmax_m (p0, z0, 1)) -+ -+/* -+** max_1_s32_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.s), #1 -+** movprfx z0, z1 -+** smax z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_s32_m_untied, svint32_t, -+ z0 = svmax_n_s32_m (p0, z1, 1), -+ z0 = svmax_m (p0, z1, 1)) -+ -+/* -+** max_m1_s32_m: -+** mov (z[0-9]+)\.b, #-1 -+** smax z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (max_m1_s32_m, svint32_t, -+ z0 = svmax_n_s32_m (p0, z0, -1), -+ z0 = svmax_m (p0, z0, -1)) -+ -+/* -+** max_s32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** smax z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (max_s32_z_tied1, svint32_t, -+ z0 = svmax_s32_z (p0, z0, z1), -+ z0 = svmax_z (p0, z0, z1)) -+ -+/* -+** max_s32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** smax z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (max_s32_z_tied2, svint32_t, -+ z0 = svmax_s32_z (p0, z1, z0), -+ z0 = svmax_z (p0, z1, z0)) -+ -+/* -+** max_s32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** smax z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** smax z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (max_s32_z_untied, svint32_t, -+ z0 = svmax_s32_z (p0, z1, z2), -+ z0 = svmax_z (p0, z1, z2)) -+ -+/* -+** max_w0_s32_z_tied1: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** smax z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (max_w0_s32_z_tied1, svint32_t, int32_t, -+ z0 = svmax_n_s32_z (p0, z0, x0), -+ z0 = svmax_z (p0, z0, x0)) -+ -+/* -+** max_w0_s32_z_untied: -+** mov (z[0-9]+\.s), w0 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** smax z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** smax z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (max_w0_s32_z_untied, svint32_t, int32_t, -+ z0 = svmax_n_s32_z (p0, z1, x0), -+ z0 = svmax_z (p0, z1, x0)) -+ -+/* -+** max_1_s32_z_tied1: -+** mov (z[0-9]+\.s), #1 -+** movprfx z0\.s, p0/z, z0\.s -+** smax z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_s32_z_tied1, svint32_t, -+ z0 = svmax_n_s32_z (p0, z0, 1), -+ z0 = svmax_z (p0, z0, 1)) -+ -+/* -+** max_1_s32_z_untied: -+** mov (z[0-9]+\.s), #1 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** smax z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** smax z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_s32_z_untied, svint32_t, -+ z0 = svmax_n_s32_z (p0, z1, 1), -+ z0 = svmax_z (p0, z1, 1)) -+ -+/* -+** max_s32_x_tied1: -+** smax z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (max_s32_x_tied1, svint32_t, -+ z0 = svmax_s32_x (p0, z0, z1), -+ z0 = svmax_x (p0, z0, z1)) -+ -+/* -+** max_s32_x_tied2: -+** smax z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (max_s32_x_tied2, svint32_t, -+ z0 = svmax_s32_x (p0, z1, z0), -+ z0 = svmax_x (p0, z1, z0)) -+ -+/* -+** max_s32_x_untied: -+** ( -+** movprfx z0, z1 -+** smax z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0, z2 -+** smax z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (max_s32_x_untied, svint32_t, -+ z0 = svmax_s32_x (p0, z1, z2), -+ z0 = svmax_x (p0, z1, z2)) -+ -+/* -+** max_w0_s32_x_tied1: -+** mov (z[0-9]+\.s), w0 -+** smax z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (max_w0_s32_x_tied1, svint32_t, int32_t, -+ z0 = svmax_n_s32_x (p0, z0, x0), -+ z0 = svmax_x (p0, z0, x0)) -+ -+/* -+** max_w0_s32_x_untied: -+** mov z0\.s, w0 -+** smax z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZX (max_w0_s32_x_untied, svint32_t, int32_t, -+ z0 = svmax_n_s32_x (p0, z1, x0), -+ z0 = svmax_x (p0, z1, x0)) -+ -+/* -+** max_1_s32_x_tied1: -+** smax z0\.s, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_s32_x_tied1, svint32_t, -+ z0 = svmax_n_s32_x (p0, z0, 1), -+ z0 = svmax_x (p0, z0, 1)) -+ -+/* -+** max_1_s32_x_untied: -+** movprfx z0, z1 -+** smax z0\.s, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_s32_x_untied, svint32_t, -+ z0 = svmax_n_s32_x (p0, z1, 1), -+ z0 = svmax_x (p0, z1, 1)) -+ -+/* -+** max_127_s32_x: -+** smax z0\.s, z0\.s, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (max_127_s32_x, svint32_t, -+ z0 = svmax_n_s32_x (p0, z0, 127), -+ z0 = svmax_x (p0, z0, 127)) -+ -+/* -+** max_128_s32_x: -+** mov (z[0-9]+\.s), #128 -+** smax z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_128_s32_x, svint32_t, -+ z0 = svmax_n_s32_x (p0, z0, 128), -+ z0 = svmax_x (p0, z0, 128)) -+ -+/* -+** max_m1_s32_x: -+** smax z0\.s, z0\.s, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_m1_s32_x, svint32_t, -+ z0 = svmax_n_s32_x (p0, z0, -1), -+ z0 = svmax_x (p0, z0, -1)) -+ -+/* -+** max_m128_s32_x: -+** smax z0\.s, z0\.s, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (max_m128_s32_x, svint32_t, -+ z0 = svmax_n_s32_x (p0, z0, -128), -+ z0 = svmax_x (p0, z0, -128)) -+ -+/* -+** max_m129_s32_x: -+** mov (z[0-9]+\.s), #-129 -+** smax z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_m129_s32_x, svint32_t, -+ z0 = svmax_n_s32_x (p0, z0, -129), -+ z0 = svmax_x (p0, z0, -129)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_s64.c -new file mode 100644 -index 000000000..66f00fdf1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_s64.c -@@ -0,0 +1,293 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** max_s64_m_tied1: -+** smax z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (max_s64_m_tied1, svint64_t, -+ z0 = svmax_s64_m (p0, z0, z1), -+ z0 = svmax_m (p0, z0, z1)) -+ -+/* -+** max_s64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** smax z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_s64_m_tied2, svint64_t, -+ z0 = svmax_s64_m (p0, z1, z0), -+ z0 = svmax_m (p0, z1, z0)) -+ -+/* -+** max_s64_m_untied: -+** movprfx z0, z1 -+** smax z0\.d, p0/m, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (max_s64_m_untied, svint64_t, -+ z0 = svmax_s64_m (p0, z1, z2), -+ z0 = svmax_m (p0, z1, z2)) -+ -+/* -+** max_x0_s64_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** smax z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (max_x0_s64_m_tied1, svint64_t, int64_t, -+ z0 = svmax_n_s64_m (p0, z0, x0), -+ z0 = svmax_m (p0, z0, x0)) -+ -+/* -+** max_x0_s64_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** smax z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (max_x0_s64_m_untied, svint64_t, int64_t, -+ z0 = svmax_n_s64_m (p0, z1, x0), -+ z0 = svmax_m (p0, z1, x0)) -+ -+/* -+** max_1_s64_m_tied1: -+** mov (z[0-9]+\.d), #1 -+** smax z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_s64_m_tied1, svint64_t, -+ z0 = svmax_n_s64_m (p0, z0, 1), -+ z0 = svmax_m (p0, z0, 1)) -+ -+/* -+** max_1_s64_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #1 -+** movprfx z0, z1 -+** smax z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_s64_m_untied, svint64_t, -+ z0 = svmax_n_s64_m (p0, z1, 1), -+ z0 = svmax_m (p0, z1, 1)) -+ -+/* -+** max_m1_s64_m: -+** mov (z[0-9]+)\.b, #-1 -+** smax z0\.d, p0/m, z0\.d, \1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (max_m1_s64_m, svint64_t, -+ z0 = svmax_n_s64_m (p0, z0, -1), -+ z0 = svmax_m (p0, z0, -1)) -+ -+/* -+** max_s64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** smax z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (max_s64_z_tied1, svint64_t, -+ z0 = svmax_s64_z (p0, z0, z1), -+ z0 = svmax_z (p0, z0, z1)) -+ -+/* -+** max_s64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** smax z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (max_s64_z_tied2, svint64_t, -+ z0 = svmax_s64_z (p0, z1, z0), -+ z0 = svmax_z (p0, z1, z0)) -+ -+/* -+** max_s64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** smax z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** smax z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (max_s64_z_untied, svint64_t, -+ z0 = svmax_s64_z (p0, z1, z2), -+ z0 = svmax_z (p0, z1, z2)) -+ -+/* -+** max_x0_s64_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** smax z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (max_x0_s64_z_tied1, svint64_t, int64_t, -+ z0 = svmax_n_s64_z (p0, z0, x0), -+ z0 = svmax_z (p0, z0, x0)) -+ -+/* -+** max_x0_s64_z_untied: -+** mov (z[0-9]+\.d), x0 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** smax z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** smax z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (max_x0_s64_z_untied, svint64_t, int64_t, -+ z0 = svmax_n_s64_z (p0, z1, x0), -+ z0 = svmax_z (p0, z1, x0)) -+ -+/* -+** max_1_s64_z_tied1: -+** mov (z[0-9]+\.d), #1 -+** movprfx z0\.d, p0/z, z0\.d -+** smax z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_s64_z_tied1, svint64_t, -+ z0 = svmax_n_s64_z (p0, z0, 1), -+ z0 = svmax_z (p0, z0, 1)) -+ -+/* -+** max_1_s64_z_untied: -+** mov (z[0-9]+\.d), #1 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** smax z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** smax z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_s64_z_untied, svint64_t, -+ z0 = svmax_n_s64_z (p0, z1, 1), -+ z0 = svmax_z (p0, z1, 1)) -+ -+/* -+** max_s64_x_tied1: -+** smax z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (max_s64_x_tied1, svint64_t, -+ z0 = svmax_s64_x (p0, z0, z1), -+ z0 = svmax_x (p0, z0, z1)) -+ -+/* -+** max_s64_x_tied2: -+** smax z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (max_s64_x_tied2, svint64_t, -+ z0 = svmax_s64_x (p0, z1, z0), -+ z0 = svmax_x (p0, z1, z0)) -+ -+/* -+** max_s64_x_untied: -+** ( -+** movprfx z0, z1 -+** smax z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0, z2 -+** smax z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (max_s64_x_untied, svint64_t, -+ z0 = svmax_s64_x (p0, z1, z2), -+ z0 = svmax_x (p0, z1, z2)) -+ -+/* -+** max_x0_s64_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** smax z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (max_x0_s64_x_tied1, svint64_t, int64_t, -+ z0 = svmax_n_s64_x (p0, z0, x0), -+ z0 = svmax_x (p0, z0, x0)) -+ -+/* -+** max_x0_s64_x_untied: -+** mov z0\.d, x0 -+** smax z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (max_x0_s64_x_untied, svint64_t, int64_t, -+ z0 = svmax_n_s64_x (p0, z1, x0), -+ z0 = svmax_x (p0, z1, x0)) -+ -+/* -+** max_1_s64_x_tied1: -+** smax z0\.d, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_s64_x_tied1, svint64_t, -+ z0 = svmax_n_s64_x (p0, z0, 1), -+ z0 = svmax_x (p0, z0, 1)) -+ -+/* -+** max_1_s64_x_untied: -+** movprfx z0, z1 -+** smax z0\.d, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_s64_x_untied, svint64_t, -+ z0 = svmax_n_s64_x (p0, z1, 1), -+ z0 = svmax_x (p0, z1, 1)) -+ -+/* -+** max_127_s64_x: -+** smax z0\.d, z0\.d, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (max_127_s64_x, svint64_t, -+ z0 = svmax_n_s64_x (p0, z0, 127), -+ z0 = svmax_x (p0, z0, 127)) -+ -+/* -+** max_128_s64_x: -+** mov (z[0-9]+\.d), #128 -+** smax z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_128_s64_x, svint64_t, -+ z0 = svmax_n_s64_x (p0, z0, 128), -+ z0 = svmax_x (p0, z0, 128)) -+ -+/* -+** max_m1_s64_x: -+** smax z0\.d, z0\.d, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_m1_s64_x, svint64_t, -+ z0 = svmax_n_s64_x (p0, z0, -1), -+ z0 = svmax_x (p0, z0, -1)) -+ -+/* -+** max_m128_s64_x: -+** smax z0\.d, z0\.d, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (max_m128_s64_x, svint64_t, -+ z0 = svmax_n_s64_x (p0, z0, -128), -+ z0 = svmax_x (p0, z0, -128)) -+ -+/* -+** max_m129_s64_x: -+** mov (z[0-9]+\.d), #-129 -+** smax z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_m129_s64_x, svint64_t, -+ z0 = svmax_n_s64_x (p0, z0, -129), -+ z0 = svmax_x (p0, z0, -129)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_s8.c -new file mode 100644 -index 000000000..c651a26f0 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_s8.c -@@ -0,0 +1,273 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** max_s8_m_tied1: -+** smax z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (max_s8_m_tied1, svint8_t, -+ z0 = svmax_s8_m (p0, z0, z1), -+ z0 = svmax_m (p0, z0, z1)) -+ -+/* -+** max_s8_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** smax z0\.b, p0/m, z0\.b, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (max_s8_m_tied2, svint8_t, -+ z0 = svmax_s8_m (p0, z1, z0), -+ z0 = svmax_m (p0, z1, z0)) -+ -+/* -+** max_s8_m_untied: -+** movprfx z0, z1 -+** smax z0\.b, p0/m, z0\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (max_s8_m_untied, svint8_t, -+ z0 = svmax_s8_m (p0, z1, z2), -+ z0 = svmax_m (p0, z1, z2)) -+ -+/* -+** max_w0_s8_m_tied1: -+** mov (z[0-9]+\.b), w0 -+** smax z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (max_w0_s8_m_tied1, svint8_t, int8_t, -+ z0 = svmax_n_s8_m (p0, z0, x0), -+ z0 = svmax_m (p0, z0, x0)) -+ -+/* -+** max_w0_s8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), w0 -+** movprfx z0, z1 -+** smax z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (max_w0_s8_m_untied, svint8_t, int8_t, -+ z0 = svmax_n_s8_m (p0, z1, x0), -+ z0 = svmax_m (p0, z1, x0)) -+ -+/* -+** max_1_s8_m_tied1: -+** mov (z[0-9]+\.b), #1 -+** smax z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_s8_m_tied1, svint8_t, -+ z0 = svmax_n_s8_m (p0, z0, 1), -+ z0 = svmax_m (p0, z0, 1)) -+ -+/* -+** max_1_s8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), #1 -+** movprfx z0, z1 -+** smax z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_s8_m_untied, svint8_t, -+ z0 = svmax_n_s8_m (p0, z1, 1), -+ z0 = svmax_m (p0, z1, 1)) -+ -+/* -+** max_m1_s8_m: -+** mov (z[0-9]+\.b), #-1 -+** smax z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_m1_s8_m, svint8_t, -+ z0 = svmax_n_s8_m (p0, z0, -1), -+ z0 = svmax_m (p0, z0, -1)) -+ -+/* -+** max_s8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** smax z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (max_s8_z_tied1, svint8_t, -+ z0 = svmax_s8_z (p0, z0, z1), -+ z0 = svmax_z (p0, z0, z1)) -+ -+/* -+** max_s8_z_tied2: -+** movprfx z0\.b, p0/z, z0\.b -+** smax z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (max_s8_z_tied2, svint8_t, -+ z0 = svmax_s8_z (p0, z1, z0), -+ z0 = svmax_z (p0, z1, z0)) -+ -+/* -+** max_s8_z_untied: -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** smax z0\.b, p0/m, z0\.b, z2\.b -+** | -+** movprfx z0\.b, p0/z, z2\.b -+** smax z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (max_s8_z_untied, svint8_t, -+ z0 = svmax_s8_z (p0, z1, z2), -+ z0 = svmax_z (p0, z1, z2)) -+ -+/* -+** max_w0_s8_z_tied1: -+** mov (z[0-9]+\.b), w0 -+** movprfx z0\.b, p0/z, z0\.b -+** smax z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (max_w0_s8_z_tied1, svint8_t, int8_t, -+ z0 = svmax_n_s8_z (p0, z0, x0), -+ z0 = svmax_z (p0, z0, x0)) -+ -+/* -+** max_w0_s8_z_untied: -+** mov (z[0-9]+\.b), w0 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** smax z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** smax z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (max_w0_s8_z_untied, svint8_t, int8_t, -+ z0 = svmax_n_s8_z (p0, z1, x0), -+ z0 = svmax_z (p0, z1, x0)) -+ -+/* -+** max_1_s8_z_tied1: -+** mov (z[0-9]+\.b), #1 -+** movprfx z0\.b, p0/z, z0\.b -+** smax z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_s8_z_tied1, svint8_t, -+ z0 = svmax_n_s8_z (p0, z0, 1), -+ z0 = svmax_z (p0, z0, 1)) -+ -+/* -+** max_1_s8_z_untied: -+** mov (z[0-9]+\.b), #1 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** smax z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** smax z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_s8_z_untied, svint8_t, -+ z0 = svmax_n_s8_z (p0, z1, 1), -+ z0 = svmax_z (p0, z1, 1)) -+ -+/* -+** max_s8_x_tied1: -+** smax z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (max_s8_x_tied1, svint8_t, -+ z0 = svmax_s8_x (p0, z0, z1), -+ z0 = svmax_x (p0, z0, z1)) -+ -+/* -+** max_s8_x_tied2: -+** smax z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (max_s8_x_tied2, svint8_t, -+ z0 = svmax_s8_x (p0, z1, z0), -+ z0 = svmax_x (p0, z1, z0)) -+ -+/* -+** max_s8_x_untied: -+** ( -+** movprfx z0, z1 -+** smax z0\.b, p0/m, z0\.b, z2\.b -+** | -+** movprfx z0, z2 -+** smax z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (max_s8_x_untied, svint8_t, -+ z0 = svmax_s8_x (p0, z1, z2), -+ z0 = svmax_x (p0, z1, z2)) -+ -+/* -+** max_w0_s8_x_tied1: -+** mov (z[0-9]+\.b), w0 -+** smax z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (max_w0_s8_x_tied1, svint8_t, int8_t, -+ z0 = svmax_n_s8_x (p0, z0, x0), -+ z0 = svmax_x (p0, z0, x0)) -+ -+/* -+** max_w0_s8_x_untied: -+** mov z0\.b, w0 -+** smax z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_ZX (max_w0_s8_x_untied, svint8_t, int8_t, -+ z0 = svmax_n_s8_x (p0, z1, x0), -+ z0 = svmax_x (p0, z1, x0)) -+ -+/* -+** max_1_s8_x_tied1: -+** smax z0\.b, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_s8_x_tied1, svint8_t, -+ z0 = svmax_n_s8_x (p0, z0, 1), -+ z0 = svmax_x (p0, z0, 1)) -+ -+/* -+** max_1_s8_x_untied: -+** movprfx z0, z1 -+** smax z0\.b, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_s8_x_untied, svint8_t, -+ z0 = svmax_n_s8_x (p0, z1, 1), -+ z0 = svmax_x (p0, z1, 1)) -+ -+/* -+** max_127_s8_x: -+** smax z0\.b, z0\.b, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (max_127_s8_x, svint8_t, -+ z0 = svmax_n_s8_x (p0, z0, 127), -+ z0 = svmax_x (p0, z0, 127)) -+ -+/* -+** max_m1_s8_x: -+** smax z0\.b, z0\.b, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_m1_s8_x, svint8_t, -+ z0 = svmax_n_s8_x (p0, z0, -1), -+ z0 = svmax_x (p0, z0, -1)) -+ -+/* -+** max_m127_s8_x: -+** smax z0\.b, z0\.b, #-127 -+** ret -+*/ -+TEST_UNIFORM_Z (max_m127_s8_x, svint8_t, -+ z0 = svmax_n_s8_x (p0, z0, -127), -+ z0 = svmax_x (p0, z0, -127)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_u16.c -new file mode 100644 -index 000000000..9a0b95431 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_u16.c -@@ -0,0 +1,293 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** max_u16_m_tied1: -+** umax z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (max_u16_m_tied1, svuint16_t, -+ z0 = svmax_u16_m (p0, z0, z1), -+ z0 = svmax_m (p0, z0, z1)) -+ -+/* -+** max_u16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** umax z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (max_u16_m_tied2, svuint16_t, -+ z0 = svmax_u16_m (p0, z1, z0), -+ z0 = svmax_m (p0, z1, z0)) -+ -+/* -+** max_u16_m_untied: -+** movprfx z0, z1 -+** umax z0\.h, p0/m, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (max_u16_m_untied, svuint16_t, -+ z0 = svmax_u16_m (p0, z1, z2), -+ z0 = svmax_m (p0, z1, z2)) -+ -+/* -+** max_w0_u16_m_tied1: -+** mov (z[0-9]+\.h), w0 -+** umax z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (max_w0_u16_m_tied1, svuint16_t, uint16_t, -+ z0 = svmax_n_u16_m (p0, z0, x0), -+ z0 = svmax_m (p0, z0, x0)) -+ -+/* -+** max_w0_u16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), w0 -+** movprfx z0, z1 -+** umax z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (max_w0_u16_m_untied, svuint16_t, uint16_t, -+ z0 = svmax_n_u16_m (p0, z1, x0), -+ z0 = svmax_m (p0, z1, x0)) -+ -+/* -+** max_1_u16_m_tied1: -+** mov (z[0-9]+\.h), #1 -+** umax z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_u16_m_tied1, svuint16_t, -+ z0 = svmax_n_u16_m (p0, z0, 1), -+ z0 = svmax_m (p0, z0, 1)) -+ -+/* -+** max_1_u16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), #1 -+** movprfx z0, z1 -+** umax z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_u16_m_untied, svuint16_t, -+ z0 = svmax_n_u16_m (p0, z1, 1), -+ z0 = svmax_m (p0, z1, 1)) -+ -+/* -+** max_m1_u16_m: -+** mov (z[0-9]+)\.b, #-1 -+** umax z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (max_m1_u16_m, svuint16_t, -+ z0 = svmax_n_u16_m (p0, z0, -1), -+ z0 = svmax_m (p0, z0, -1)) -+ -+/* -+** max_u16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** umax z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (max_u16_z_tied1, svuint16_t, -+ z0 = svmax_u16_z (p0, z0, z1), -+ z0 = svmax_z (p0, z0, z1)) -+ -+/* -+** max_u16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** umax z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (max_u16_z_tied2, svuint16_t, -+ z0 = svmax_u16_z (p0, z1, z0), -+ z0 = svmax_z (p0, z1, z0)) -+ -+/* -+** max_u16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** umax z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** umax z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (max_u16_z_untied, svuint16_t, -+ z0 = svmax_u16_z (p0, z1, z2), -+ z0 = svmax_z (p0, z1, z2)) -+ -+/* -+** max_w0_u16_z_tied1: -+** mov (z[0-9]+\.h), w0 -+** movprfx z0\.h, p0/z, z0\.h -+** umax z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (max_w0_u16_z_tied1, svuint16_t, uint16_t, -+ z0 = svmax_n_u16_z (p0, z0, x0), -+ z0 = svmax_z (p0, z0, x0)) -+ -+/* -+** max_w0_u16_z_untied: -+** mov (z[0-9]+\.h), w0 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** umax z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** umax z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (max_w0_u16_z_untied, svuint16_t, uint16_t, -+ z0 = svmax_n_u16_z (p0, z1, x0), -+ z0 = svmax_z (p0, z1, x0)) -+ -+/* -+** max_1_u16_z_tied1: -+** mov (z[0-9]+\.h), #1 -+** movprfx z0\.h, p0/z, z0\.h -+** umax z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_u16_z_tied1, svuint16_t, -+ z0 = svmax_n_u16_z (p0, z0, 1), -+ z0 = svmax_z (p0, z0, 1)) -+ -+/* -+** max_1_u16_z_untied: -+** mov (z[0-9]+\.h), #1 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** umax z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** umax z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_u16_z_untied, svuint16_t, -+ z0 = svmax_n_u16_z (p0, z1, 1), -+ z0 = svmax_z (p0, z1, 1)) -+ -+/* -+** max_u16_x_tied1: -+** umax z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (max_u16_x_tied1, svuint16_t, -+ z0 = svmax_u16_x (p0, z0, z1), -+ z0 = svmax_x (p0, z0, z1)) -+ -+/* -+** max_u16_x_tied2: -+** umax z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (max_u16_x_tied2, svuint16_t, -+ z0 = svmax_u16_x (p0, z1, z0), -+ z0 = svmax_x (p0, z1, z0)) -+ -+/* -+** max_u16_x_untied: -+** ( -+** movprfx z0, z1 -+** umax z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0, z2 -+** umax z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (max_u16_x_untied, svuint16_t, -+ z0 = svmax_u16_x (p0, z1, z2), -+ z0 = svmax_x (p0, z1, z2)) -+ -+/* -+** max_w0_u16_x_tied1: -+** mov (z[0-9]+\.h), w0 -+** umax z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (max_w0_u16_x_tied1, svuint16_t, uint16_t, -+ z0 = svmax_n_u16_x (p0, z0, x0), -+ z0 = svmax_x (p0, z0, x0)) -+ -+/* -+** max_w0_u16_x_untied: -+** mov z0\.h, w0 -+** umax z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZX (max_w0_u16_x_untied, svuint16_t, uint16_t, -+ z0 = svmax_n_u16_x (p0, z1, x0), -+ z0 = svmax_x (p0, z1, x0)) -+ -+/* -+** max_1_u16_x_tied1: -+** umax z0\.h, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_u16_x_tied1, svuint16_t, -+ z0 = svmax_n_u16_x (p0, z0, 1), -+ z0 = svmax_x (p0, z0, 1)) -+ -+/* -+** max_1_u16_x_untied: -+** movprfx z0, z1 -+** umax z0\.h, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_u16_x_untied, svuint16_t, -+ z0 = svmax_n_u16_x (p0, z1, 1), -+ z0 = svmax_x (p0, z1, 1)) -+ -+/* -+** max_127_u16_x: -+** umax z0\.h, z0\.h, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (max_127_u16_x, svuint16_t, -+ z0 = svmax_n_u16_x (p0, z0, 127), -+ z0 = svmax_x (p0, z0, 127)) -+ -+/* -+** max_128_u16_x: -+** umax z0\.h, z0\.h, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (max_128_u16_x, svuint16_t, -+ z0 = svmax_n_u16_x (p0, z0, 128), -+ z0 = svmax_x (p0, z0, 128)) -+ -+/* -+** max_255_u16_x: -+** umax z0\.h, z0\.h, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (max_255_u16_x, svuint16_t, -+ z0 = svmax_n_u16_x (p0, z0, 255), -+ z0 = svmax_x (p0, z0, 255)) -+ -+/* -+** max_256_u16_x: -+** mov (z[0-9]+\.h), #256 -+** umax z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_256_u16_x, svuint16_t, -+ z0 = svmax_n_u16_x (p0, z0, 256), -+ z0 = svmax_x (p0, z0, 256)) -+ -+/* -+** max_m2_u16_x: -+** mov (z[0-9]+\.h), #-2 -+** umax z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_m2_u16_x, svuint16_t, -+ z0 = svmax_n_u16_x (p0, z0, -2), -+ z0 = svmax_x (p0, z0, -2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_u32.c -new file mode 100644 -index 000000000..91eba25c1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_u32.c -@@ -0,0 +1,293 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** max_u32_m_tied1: -+** umax z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (max_u32_m_tied1, svuint32_t, -+ z0 = svmax_u32_m (p0, z0, z1), -+ z0 = svmax_m (p0, z0, z1)) -+ -+/* -+** max_u32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** umax z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (max_u32_m_tied2, svuint32_t, -+ z0 = svmax_u32_m (p0, z1, z0), -+ z0 = svmax_m (p0, z1, z0)) -+ -+/* -+** max_u32_m_untied: -+** movprfx z0, z1 -+** umax z0\.s, p0/m, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (max_u32_m_untied, svuint32_t, -+ z0 = svmax_u32_m (p0, z1, z2), -+ z0 = svmax_m (p0, z1, z2)) -+ -+/* -+** max_w0_u32_m_tied1: -+** mov (z[0-9]+\.s), w0 -+** umax z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (max_w0_u32_m_tied1, svuint32_t, uint32_t, -+ z0 = svmax_n_u32_m (p0, z0, x0), -+ z0 = svmax_m (p0, z0, x0)) -+ -+/* -+** max_w0_u32_m_untied: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0, z1 -+** umax z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (max_w0_u32_m_untied, svuint32_t, uint32_t, -+ z0 = svmax_n_u32_m (p0, z1, x0), -+ z0 = svmax_m (p0, z1, x0)) -+ -+/* -+** max_1_u32_m_tied1: -+** mov (z[0-9]+\.s), #1 -+** umax z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_u32_m_tied1, svuint32_t, -+ z0 = svmax_n_u32_m (p0, z0, 1), -+ z0 = svmax_m (p0, z0, 1)) -+ -+/* -+** max_1_u32_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.s), #1 -+** movprfx z0, z1 -+** umax z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_u32_m_untied, svuint32_t, -+ z0 = svmax_n_u32_m (p0, z1, 1), -+ z0 = svmax_m (p0, z1, 1)) -+ -+/* -+** max_m1_u32_m: -+** mov (z[0-9]+)\.b, #-1 -+** umax z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (max_m1_u32_m, svuint32_t, -+ z0 = svmax_n_u32_m (p0, z0, -1), -+ z0 = svmax_m (p0, z0, -1)) -+ -+/* -+** max_u32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** umax z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (max_u32_z_tied1, svuint32_t, -+ z0 = svmax_u32_z (p0, z0, z1), -+ z0 = svmax_z (p0, z0, z1)) -+ -+/* -+** max_u32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** umax z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (max_u32_z_tied2, svuint32_t, -+ z0 = svmax_u32_z (p0, z1, z0), -+ z0 = svmax_z (p0, z1, z0)) -+ -+/* -+** max_u32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** umax z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** umax z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (max_u32_z_untied, svuint32_t, -+ z0 = svmax_u32_z (p0, z1, z2), -+ z0 = svmax_z (p0, z1, z2)) -+ -+/* -+** max_w0_u32_z_tied1: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** umax z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (max_w0_u32_z_tied1, svuint32_t, uint32_t, -+ z0 = svmax_n_u32_z (p0, z0, x0), -+ z0 = svmax_z (p0, z0, x0)) -+ -+/* -+** max_w0_u32_z_untied: -+** mov (z[0-9]+\.s), w0 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** umax z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** umax z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (max_w0_u32_z_untied, svuint32_t, uint32_t, -+ z0 = svmax_n_u32_z (p0, z1, x0), -+ z0 = svmax_z (p0, z1, x0)) -+ -+/* -+** max_1_u32_z_tied1: -+** mov (z[0-9]+\.s), #1 -+** movprfx z0\.s, p0/z, z0\.s -+** umax z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_u32_z_tied1, svuint32_t, -+ z0 = svmax_n_u32_z (p0, z0, 1), -+ z0 = svmax_z (p0, z0, 1)) -+ -+/* -+** max_1_u32_z_untied: -+** mov (z[0-9]+\.s), #1 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** umax z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** umax z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_u32_z_untied, svuint32_t, -+ z0 = svmax_n_u32_z (p0, z1, 1), -+ z0 = svmax_z (p0, z1, 1)) -+ -+/* -+** max_u32_x_tied1: -+** umax z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (max_u32_x_tied1, svuint32_t, -+ z0 = svmax_u32_x (p0, z0, z1), -+ z0 = svmax_x (p0, z0, z1)) -+ -+/* -+** max_u32_x_tied2: -+** umax z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (max_u32_x_tied2, svuint32_t, -+ z0 = svmax_u32_x (p0, z1, z0), -+ z0 = svmax_x (p0, z1, z0)) -+ -+/* -+** max_u32_x_untied: -+** ( -+** movprfx z0, z1 -+** umax z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0, z2 -+** umax z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (max_u32_x_untied, svuint32_t, -+ z0 = svmax_u32_x (p0, z1, z2), -+ z0 = svmax_x (p0, z1, z2)) -+ -+/* -+** max_w0_u32_x_tied1: -+** mov (z[0-9]+\.s), w0 -+** umax z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (max_w0_u32_x_tied1, svuint32_t, uint32_t, -+ z0 = svmax_n_u32_x (p0, z0, x0), -+ z0 = svmax_x (p0, z0, x0)) -+ -+/* -+** max_w0_u32_x_untied: -+** mov z0\.s, w0 -+** umax z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZX (max_w0_u32_x_untied, svuint32_t, uint32_t, -+ z0 = svmax_n_u32_x (p0, z1, x0), -+ z0 = svmax_x (p0, z1, x0)) -+ -+/* -+** max_1_u32_x_tied1: -+** umax z0\.s, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_u32_x_tied1, svuint32_t, -+ z0 = svmax_n_u32_x (p0, z0, 1), -+ z0 = svmax_x (p0, z0, 1)) -+ -+/* -+** max_1_u32_x_untied: -+** movprfx z0, z1 -+** umax z0\.s, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_u32_x_untied, svuint32_t, -+ z0 = svmax_n_u32_x (p0, z1, 1), -+ z0 = svmax_x (p0, z1, 1)) -+ -+/* -+** max_127_u32_x: -+** umax z0\.s, z0\.s, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (max_127_u32_x, svuint32_t, -+ z0 = svmax_n_u32_x (p0, z0, 127), -+ z0 = svmax_x (p0, z0, 127)) -+ -+/* -+** max_128_u32_x: -+** umax z0\.s, z0\.s, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (max_128_u32_x, svuint32_t, -+ z0 = svmax_n_u32_x (p0, z0, 128), -+ z0 = svmax_x (p0, z0, 128)) -+ -+/* -+** max_255_u32_x: -+** umax z0\.s, z0\.s, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (max_255_u32_x, svuint32_t, -+ z0 = svmax_n_u32_x (p0, z0, 255), -+ z0 = svmax_x (p0, z0, 255)) -+ -+/* -+** max_256_u32_x: -+** mov (z[0-9]+\.s), #256 -+** umax z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_256_u32_x, svuint32_t, -+ z0 = svmax_n_u32_x (p0, z0, 256), -+ z0 = svmax_x (p0, z0, 256)) -+ -+/* -+** max_m2_u32_x: -+** mov (z[0-9]+\.s), #-2 -+** umax z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_m2_u32_x, svuint32_t, -+ z0 = svmax_n_u32_x (p0, z0, -2), -+ z0 = svmax_x (p0, z0, -2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_u64.c -new file mode 100644 -index 000000000..5be4c9fb7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_u64.c -@@ -0,0 +1,293 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** max_u64_m_tied1: -+** umax z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (max_u64_m_tied1, svuint64_t, -+ z0 = svmax_u64_m (p0, z0, z1), -+ z0 = svmax_m (p0, z0, z1)) -+ -+/* -+** max_u64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** umax z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_u64_m_tied2, svuint64_t, -+ z0 = svmax_u64_m (p0, z1, z0), -+ z0 = svmax_m (p0, z1, z0)) -+ -+/* -+** max_u64_m_untied: -+** movprfx z0, z1 -+** umax z0\.d, p0/m, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (max_u64_m_untied, svuint64_t, -+ z0 = svmax_u64_m (p0, z1, z2), -+ z0 = svmax_m (p0, z1, z2)) -+ -+/* -+** max_x0_u64_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** umax z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (max_x0_u64_m_tied1, svuint64_t, uint64_t, -+ z0 = svmax_n_u64_m (p0, z0, x0), -+ z0 = svmax_m (p0, z0, x0)) -+ -+/* -+** max_x0_u64_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** umax z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (max_x0_u64_m_untied, svuint64_t, uint64_t, -+ z0 = svmax_n_u64_m (p0, z1, x0), -+ z0 = svmax_m (p0, z1, x0)) -+ -+/* -+** max_1_u64_m_tied1: -+** mov (z[0-9]+\.d), #1 -+** umax z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_u64_m_tied1, svuint64_t, -+ z0 = svmax_n_u64_m (p0, z0, 1), -+ z0 = svmax_m (p0, z0, 1)) -+ -+/* -+** max_1_u64_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #1 -+** movprfx z0, z1 -+** umax z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_u64_m_untied, svuint64_t, -+ z0 = svmax_n_u64_m (p0, z1, 1), -+ z0 = svmax_m (p0, z1, 1)) -+ -+/* -+** max_m1_u64_m: -+** mov (z[0-9]+)\.b, #-1 -+** umax z0\.d, p0/m, z0\.d, \1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (max_m1_u64_m, svuint64_t, -+ z0 = svmax_n_u64_m (p0, z0, -1), -+ z0 = svmax_m (p0, z0, -1)) -+ -+/* -+** max_u64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** umax z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (max_u64_z_tied1, svuint64_t, -+ z0 = svmax_u64_z (p0, z0, z1), -+ z0 = svmax_z (p0, z0, z1)) -+ -+/* -+** max_u64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** umax z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (max_u64_z_tied2, svuint64_t, -+ z0 = svmax_u64_z (p0, z1, z0), -+ z0 = svmax_z (p0, z1, z0)) -+ -+/* -+** max_u64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** umax z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** umax z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (max_u64_z_untied, svuint64_t, -+ z0 = svmax_u64_z (p0, z1, z2), -+ z0 = svmax_z (p0, z1, z2)) -+ -+/* -+** max_x0_u64_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** umax z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (max_x0_u64_z_tied1, svuint64_t, uint64_t, -+ z0 = svmax_n_u64_z (p0, z0, x0), -+ z0 = svmax_z (p0, z0, x0)) -+ -+/* -+** max_x0_u64_z_untied: -+** mov (z[0-9]+\.d), x0 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** umax z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** umax z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (max_x0_u64_z_untied, svuint64_t, uint64_t, -+ z0 = svmax_n_u64_z (p0, z1, x0), -+ z0 = svmax_z (p0, z1, x0)) -+ -+/* -+** max_1_u64_z_tied1: -+** mov (z[0-9]+\.d), #1 -+** movprfx z0\.d, p0/z, z0\.d -+** umax z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_u64_z_tied1, svuint64_t, -+ z0 = svmax_n_u64_z (p0, z0, 1), -+ z0 = svmax_z (p0, z0, 1)) -+ -+/* -+** max_1_u64_z_untied: -+** mov (z[0-9]+\.d), #1 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** umax z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** umax z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_u64_z_untied, svuint64_t, -+ z0 = svmax_n_u64_z (p0, z1, 1), -+ z0 = svmax_z (p0, z1, 1)) -+ -+/* -+** max_u64_x_tied1: -+** umax z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (max_u64_x_tied1, svuint64_t, -+ z0 = svmax_u64_x (p0, z0, z1), -+ z0 = svmax_x (p0, z0, z1)) -+ -+/* -+** max_u64_x_tied2: -+** umax z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (max_u64_x_tied2, svuint64_t, -+ z0 = svmax_u64_x (p0, z1, z0), -+ z0 = svmax_x (p0, z1, z0)) -+ -+/* -+** max_u64_x_untied: -+** ( -+** movprfx z0, z1 -+** umax z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0, z2 -+** umax z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (max_u64_x_untied, svuint64_t, -+ z0 = svmax_u64_x (p0, z1, z2), -+ z0 = svmax_x (p0, z1, z2)) -+ -+/* -+** max_x0_u64_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** umax z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (max_x0_u64_x_tied1, svuint64_t, uint64_t, -+ z0 = svmax_n_u64_x (p0, z0, x0), -+ z0 = svmax_x (p0, z0, x0)) -+ -+/* -+** max_x0_u64_x_untied: -+** mov z0\.d, x0 -+** umax z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (max_x0_u64_x_untied, svuint64_t, uint64_t, -+ z0 = svmax_n_u64_x (p0, z1, x0), -+ z0 = svmax_x (p0, z1, x0)) -+ -+/* -+** max_1_u64_x_tied1: -+** umax z0\.d, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_u64_x_tied1, svuint64_t, -+ z0 = svmax_n_u64_x (p0, z0, 1), -+ z0 = svmax_x (p0, z0, 1)) -+ -+/* -+** max_1_u64_x_untied: -+** movprfx z0, z1 -+** umax z0\.d, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_u64_x_untied, svuint64_t, -+ z0 = svmax_n_u64_x (p0, z1, 1), -+ z0 = svmax_x (p0, z1, 1)) -+ -+/* -+** max_127_u64_x: -+** umax z0\.d, z0\.d, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (max_127_u64_x, svuint64_t, -+ z0 = svmax_n_u64_x (p0, z0, 127), -+ z0 = svmax_x (p0, z0, 127)) -+ -+/* -+** max_128_u64_x: -+** umax z0\.d, z0\.d, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (max_128_u64_x, svuint64_t, -+ z0 = svmax_n_u64_x (p0, z0, 128), -+ z0 = svmax_x (p0, z0, 128)) -+ -+/* -+** max_255_u64_x: -+** umax z0\.d, z0\.d, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (max_255_u64_x, svuint64_t, -+ z0 = svmax_n_u64_x (p0, z0, 255), -+ z0 = svmax_x (p0, z0, 255)) -+ -+/* -+** max_256_u64_x: -+** mov (z[0-9]+\.d), #256 -+** umax z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_256_u64_x, svuint64_t, -+ z0 = svmax_n_u64_x (p0, z0, 256), -+ z0 = svmax_x (p0, z0, 256)) -+ -+/* -+** max_m2_u64_x: -+** mov (z[0-9]+\.d), #-2 -+** umax z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_m2_u64_x, svuint64_t, -+ z0 = svmax_n_u64_x (p0, z0, -2), -+ z0 = svmax_x (p0, z0, -2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_u8.c -new file mode 100644 -index 000000000..04c9ddb36 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/max_u8.c -@@ -0,0 +1,273 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** max_u8_m_tied1: -+** umax z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (max_u8_m_tied1, svuint8_t, -+ z0 = svmax_u8_m (p0, z0, z1), -+ z0 = svmax_m (p0, z0, z1)) -+ -+/* -+** max_u8_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** umax z0\.b, p0/m, z0\.b, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (max_u8_m_tied2, svuint8_t, -+ z0 = svmax_u8_m (p0, z1, z0), -+ z0 = svmax_m (p0, z1, z0)) -+ -+/* -+** max_u8_m_untied: -+** movprfx z0, z1 -+** umax z0\.b, p0/m, z0\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (max_u8_m_untied, svuint8_t, -+ z0 = svmax_u8_m (p0, z1, z2), -+ z0 = svmax_m (p0, z1, z2)) -+ -+/* -+** max_w0_u8_m_tied1: -+** mov (z[0-9]+\.b), w0 -+** umax z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (max_w0_u8_m_tied1, svuint8_t, uint8_t, -+ z0 = svmax_n_u8_m (p0, z0, x0), -+ z0 = svmax_m (p0, z0, x0)) -+ -+/* -+** max_w0_u8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), w0 -+** movprfx z0, z1 -+** umax z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (max_w0_u8_m_untied, svuint8_t, uint8_t, -+ z0 = svmax_n_u8_m (p0, z1, x0), -+ z0 = svmax_m (p0, z1, x0)) -+ -+/* -+** max_1_u8_m_tied1: -+** mov (z[0-9]+\.b), #1 -+** umax z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_u8_m_tied1, svuint8_t, -+ z0 = svmax_n_u8_m (p0, z0, 1), -+ z0 = svmax_m (p0, z0, 1)) -+ -+/* -+** max_1_u8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), #1 -+** movprfx z0, z1 -+** umax z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_u8_m_untied, svuint8_t, -+ z0 = svmax_n_u8_m (p0, z1, 1), -+ z0 = svmax_m (p0, z1, 1)) -+ -+/* -+** max_m1_u8_m: -+** mov (z[0-9]+\.b), #-1 -+** umax z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_m1_u8_m, svuint8_t, -+ z0 = svmax_n_u8_m (p0, z0, -1), -+ z0 = svmax_m (p0, z0, -1)) -+ -+/* -+** max_u8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** umax z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (max_u8_z_tied1, svuint8_t, -+ z0 = svmax_u8_z (p0, z0, z1), -+ z0 = svmax_z (p0, z0, z1)) -+ -+/* -+** max_u8_z_tied2: -+** movprfx z0\.b, p0/z, z0\.b -+** umax z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (max_u8_z_tied2, svuint8_t, -+ z0 = svmax_u8_z (p0, z1, z0), -+ z0 = svmax_z (p0, z1, z0)) -+ -+/* -+** max_u8_z_untied: -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** umax z0\.b, p0/m, z0\.b, z2\.b -+** | -+** movprfx z0\.b, p0/z, z2\.b -+** umax z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (max_u8_z_untied, svuint8_t, -+ z0 = svmax_u8_z (p0, z1, z2), -+ z0 = svmax_z (p0, z1, z2)) -+ -+/* -+** max_w0_u8_z_tied1: -+** mov (z[0-9]+\.b), w0 -+** movprfx z0\.b, p0/z, z0\.b -+** umax z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (max_w0_u8_z_tied1, svuint8_t, uint8_t, -+ z0 = svmax_n_u8_z (p0, z0, x0), -+ z0 = svmax_z (p0, z0, x0)) -+ -+/* -+** max_w0_u8_z_untied: -+** mov (z[0-9]+\.b), w0 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** umax z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** umax z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (max_w0_u8_z_untied, svuint8_t, uint8_t, -+ z0 = svmax_n_u8_z (p0, z1, x0), -+ z0 = svmax_z (p0, z1, x0)) -+ -+/* -+** max_1_u8_z_tied1: -+** mov (z[0-9]+\.b), #1 -+** movprfx z0\.b, p0/z, z0\.b -+** umax z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_u8_z_tied1, svuint8_t, -+ z0 = svmax_n_u8_z (p0, z0, 1), -+ z0 = svmax_z (p0, z0, 1)) -+ -+/* -+** max_1_u8_z_untied: -+** mov (z[0-9]+\.b), #1 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** umax z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** umax z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_u8_z_untied, svuint8_t, -+ z0 = svmax_n_u8_z (p0, z1, 1), -+ z0 = svmax_z (p0, z1, 1)) -+ -+/* -+** max_u8_x_tied1: -+** umax z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (max_u8_x_tied1, svuint8_t, -+ z0 = svmax_u8_x (p0, z0, z1), -+ z0 = svmax_x (p0, z0, z1)) -+ -+/* -+** max_u8_x_tied2: -+** umax z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (max_u8_x_tied2, svuint8_t, -+ z0 = svmax_u8_x (p0, z1, z0), -+ z0 = svmax_x (p0, z1, z0)) -+ -+/* -+** max_u8_x_untied: -+** ( -+** movprfx z0, z1 -+** umax z0\.b, p0/m, z0\.b, z2\.b -+** | -+** movprfx z0, z2 -+** umax z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (max_u8_x_untied, svuint8_t, -+ z0 = svmax_u8_x (p0, z1, z2), -+ z0 = svmax_x (p0, z1, z2)) -+ -+/* -+** max_w0_u8_x_tied1: -+** mov (z[0-9]+\.b), w0 -+** umax z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (max_w0_u8_x_tied1, svuint8_t, uint8_t, -+ z0 = svmax_n_u8_x (p0, z0, x0), -+ z0 = svmax_x (p0, z0, x0)) -+ -+/* -+** max_w0_u8_x_untied: -+** mov z0\.b, w0 -+** umax z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_ZX (max_w0_u8_x_untied, svuint8_t, uint8_t, -+ z0 = svmax_n_u8_x (p0, z1, x0), -+ z0 = svmax_x (p0, z1, x0)) -+ -+/* -+** max_1_u8_x_tied1: -+** umax z0\.b, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_u8_x_tied1, svuint8_t, -+ z0 = svmax_n_u8_x (p0, z0, 1), -+ z0 = svmax_x (p0, z0, 1)) -+ -+/* -+** max_1_u8_x_untied: -+** movprfx z0, z1 -+** umax z0\.b, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (max_1_u8_x_untied, svuint8_t, -+ z0 = svmax_n_u8_x (p0, z1, 1), -+ z0 = svmax_x (p0, z1, 1)) -+ -+/* -+** max_127_u8_x: -+** umax z0\.b, z0\.b, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (max_127_u8_x, svuint8_t, -+ z0 = svmax_n_u8_x (p0, z0, 127), -+ z0 = svmax_x (p0, z0, 127)) -+ -+/* -+** max_128_u8_x: -+** umax z0\.b, z0\.b, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (max_128_u8_x, svuint8_t, -+ z0 = svmax_n_u8_x (p0, z0, 128), -+ z0 = svmax_x (p0, z0, 128)) -+ -+/* -+** max_254_u8_x: -+** umax z0\.b, z0\.b, #254 -+** ret -+*/ -+TEST_UNIFORM_Z (max_254_u8_x, svuint8_t, -+ z0 = svmax_n_u8_x (p0, z0, 254), -+ z0 = svmax_x (p0, z0, 254)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxnm_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxnm_f16.c -new file mode 100644 -index 000000000..a9da710d0 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxnm_f16.c -@@ -0,0 +1,425 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** maxnm_f16_m_tied1: -+** fmaxnm z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_f16_m_tied1, svfloat16_t, -+ z0 = svmaxnm_f16_m (p0, z0, z1), -+ z0 = svmaxnm_m (p0, z0, z1)) -+ -+/* -+** maxnm_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fmaxnm z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_f16_m_tied2, svfloat16_t, -+ z0 = svmaxnm_f16_m (p0, z1, z0), -+ z0 = svmaxnm_m (p0, z1, z0)) -+ -+/* -+** maxnm_f16_m_untied: -+** movprfx z0, z1 -+** fmaxnm z0\.h, p0/m, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_f16_m_untied, svfloat16_t, -+ z0 = svmaxnm_f16_m (p0, z1, z2), -+ z0 = svmaxnm_m (p0, z1, z2)) -+ -+/* -+** maxnm_h4_f16_m_tied1: -+** mov (z[0-9]+\.h), h4 -+** fmaxnm z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (maxnm_h4_f16_m_tied1, svfloat16_t, __fp16, -+ z0 = svmaxnm_n_f16_m (p0, z0, d4), -+ z0 = svmaxnm_m (p0, z0, d4)) -+ -+/* -+** maxnm_h4_f16_m_untied: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0, z1 -+** fmaxnm z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (maxnm_h4_f16_m_untied, svfloat16_t, __fp16, -+ z0 = svmaxnm_n_f16_m (p0, z1, d4), -+ z0 = svmaxnm_m (p0, z1, d4)) -+ -+/* -+** maxnm_0_f16_m_tied1: -+** fmaxnm z0\.h, p0/m, z0\.h, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_0_f16_m_tied1, svfloat16_t, -+ z0 = svmaxnm_n_f16_m (p0, z0, 0), -+ z0 = svmaxnm_m (p0, z0, 0)) -+ -+/* -+** maxnm_0_f16_m_untied: -+** movprfx z0, z1 -+** fmaxnm z0\.h, p0/m, z0\.h, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_0_f16_m_untied, svfloat16_t, -+ z0 = svmaxnm_n_f16_m (p0, z1, 0), -+ z0 = svmaxnm_m (p0, z1, 0)) -+ -+/* -+** maxnm_1_f16_m_tied1: -+** fmaxnm z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_1_f16_m_tied1, svfloat16_t, -+ z0 = svmaxnm_n_f16_m (p0, z0, 1), -+ z0 = svmaxnm_m (p0, z0, 1)) -+ -+/* -+** maxnm_1_f16_m_untied: -+** movprfx z0, z1 -+** fmaxnm z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_1_f16_m_untied, svfloat16_t, -+ z0 = svmaxnm_n_f16_m (p0, z1, 1), -+ z0 = svmaxnm_m (p0, z1, 1)) -+ -+/* -+** maxnm_2_f16_m: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** fmaxnm z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_2_f16_m, svfloat16_t, -+ z0 = svmaxnm_n_f16_m (p0, z0, 2), -+ z0 = svmaxnm_m (p0, z0, 2)) -+ -+/* -+** maxnm_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fmaxnm z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_f16_z_tied1, svfloat16_t, -+ z0 = svmaxnm_f16_z (p0, z0, z1), -+ z0 = svmaxnm_z (p0, z0, z1)) -+ -+/* -+** maxnm_f16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** fmaxnm z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_f16_z_tied2, svfloat16_t, -+ z0 = svmaxnm_f16_z (p0, z1, z0), -+ z0 = svmaxnm_z (p0, z1, z0)) -+ -+/* -+** maxnm_f16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fmaxnm z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** fmaxnm z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_f16_z_untied, svfloat16_t, -+ z0 = svmaxnm_f16_z (p0, z1, z2), -+ z0 = svmaxnm_z (p0, z1, z2)) -+ -+/* -+** maxnm_h4_f16_z_tied1: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0\.h, p0/z, z0\.h -+** fmaxnm z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (maxnm_h4_f16_z_tied1, svfloat16_t, __fp16, -+ z0 = svmaxnm_n_f16_z (p0, z0, d4), -+ z0 = svmaxnm_z (p0, z0, d4)) -+ -+/* -+** maxnm_h4_f16_z_untied: -+** mov (z[0-9]+\.h), h4 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fmaxnm z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** fmaxnm z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (maxnm_h4_f16_z_untied, svfloat16_t, __fp16, -+ z0 = svmaxnm_n_f16_z (p0, z1, d4), -+ z0 = svmaxnm_z (p0, z1, d4)) -+ -+/* -+** maxnm_0_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fmaxnm z0\.h, p0/m, z0\.h, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_0_f16_z_tied1, svfloat16_t, -+ z0 = svmaxnm_n_f16_z (p0, z0, 0), -+ z0 = svmaxnm_z (p0, z0, 0)) -+ -+/* -+** maxnm_0_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** fmaxnm z0\.h, p0/m, z0\.h, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_0_f16_z_untied, svfloat16_t, -+ z0 = svmaxnm_n_f16_z (p0, z1, 0), -+ z0 = svmaxnm_z (p0, z1, 0)) -+ -+/* -+** maxnm_1_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fmaxnm z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_1_f16_z_tied1, svfloat16_t, -+ z0 = svmaxnm_n_f16_z (p0, z0, 1), -+ z0 = svmaxnm_z (p0, z0, 1)) -+ -+/* -+** maxnm_1_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** fmaxnm z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_1_f16_z_untied, svfloat16_t, -+ z0 = svmaxnm_n_f16_z (p0, z1, 1), -+ z0 = svmaxnm_z (p0, z1, 1)) -+ -+/* -+** maxnm_2_f16_z: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** movprfx z0\.h, p0/z, z0\.h -+** fmaxnm z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_2_f16_z, svfloat16_t, -+ z0 = svmaxnm_n_f16_z (p0, z0, 2), -+ z0 = svmaxnm_z (p0, z0, 2)) -+ -+/* -+** maxnm_f16_x_tied1: -+** fmaxnm z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_f16_x_tied1, svfloat16_t, -+ z0 = svmaxnm_f16_x (p0, z0, z1), -+ z0 = svmaxnm_x (p0, z0, z1)) -+ -+/* -+** maxnm_f16_x_tied2: -+** fmaxnm z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_f16_x_tied2, svfloat16_t, -+ z0 = svmaxnm_f16_x (p0, z1, z0), -+ z0 = svmaxnm_x (p0, z1, z0)) -+ -+/* -+** maxnm_f16_x_untied: -+** ( -+** movprfx z0, z1 -+** fmaxnm z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0, z2 -+** fmaxnm z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_f16_x_untied, svfloat16_t, -+ z0 = svmaxnm_f16_x (p0, z1, z2), -+ z0 = svmaxnm_x (p0, z1, z2)) -+ -+/* -+** maxnm_h4_f16_x_tied1: -+** mov (z[0-9]+\.h), h4 -+** fmaxnm z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (maxnm_h4_f16_x_tied1, svfloat16_t, __fp16, -+ z0 = svmaxnm_n_f16_x (p0, z0, d4), -+ z0 = svmaxnm_x (p0, z0, d4)) -+ -+/* -+** maxnm_h4_f16_x_untied: -+** mov z0\.h, h4 -+** fmaxnm z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZD (maxnm_h4_f16_x_untied, svfloat16_t, __fp16, -+ z0 = svmaxnm_n_f16_x (p0, z1, d4), -+ z0 = svmaxnm_x (p0, z1, d4)) -+ -+/* -+** maxnm_0_f16_x_tied1: -+** fmaxnm z0\.h, p0/m, z0\.h, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_0_f16_x_tied1, svfloat16_t, -+ z0 = svmaxnm_n_f16_x (p0, z0, 0), -+ z0 = svmaxnm_x (p0, z0, 0)) -+ -+/* -+** maxnm_0_f16_x_untied: -+** movprfx z0, z1 -+** fmaxnm z0\.h, p0/m, z0\.h, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_0_f16_x_untied, svfloat16_t, -+ z0 = svmaxnm_n_f16_x (p0, z1, 0), -+ z0 = svmaxnm_x (p0, z1, 0)) -+ -+/* -+** maxnm_1_f16_x_tied1: -+** fmaxnm z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_1_f16_x_tied1, svfloat16_t, -+ z0 = svmaxnm_n_f16_x (p0, z0, 1), -+ z0 = svmaxnm_x (p0, z0, 1)) -+ -+/* -+** maxnm_1_f16_x_untied: -+** movprfx z0, z1 -+** fmaxnm z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_1_f16_x_untied, svfloat16_t, -+ z0 = svmaxnm_n_f16_x (p0, z1, 1), -+ z0 = svmaxnm_x (p0, z1, 1)) -+ -+/* -+** maxnm_2_f16_x_tied1: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** fmaxnm z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_2_f16_x_tied1, svfloat16_t, -+ z0 = svmaxnm_n_f16_x (p0, z0, 2), -+ z0 = svmaxnm_x (p0, z0, 2)) -+ -+/* -+** maxnm_2_f16_x_untied: -+** fmov z0\.h, #2\.0(?:e\+0)? -+** fmaxnm z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_2_f16_x_untied, svfloat16_t, -+ z0 = svmaxnm_n_f16_x (p0, z1, 2), -+ z0 = svmaxnm_x (p0, z1, 2)) -+ -+/* -+** ptrue_maxnm_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_maxnm_f16_x_tied1, svfloat16_t, -+ z0 = svmaxnm_f16_x (svptrue_b16 (), z0, z1), -+ z0 = svmaxnm_x (svptrue_b16 (), z0, z1)) -+ -+/* -+** ptrue_maxnm_f16_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_maxnm_f16_x_tied2, svfloat16_t, -+ z0 = svmaxnm_f16_x (svptrue_b16 (), z1, z0), -+ z0 = svmaxnm_x (svptrue_b16 (), z1, z0)) -+ -+/* -+** ptrue_maxnm_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_maxnm_f16_x_untied, svfloat16_t, -+ z0 = svmaxnm_f16_x (svptrue_b16 (), z1, z2), -+ z0 = svmaxnm_x (svptrue_b16 (), z1, z2)) -+ -+/* -+** ptrue_maxnm_0_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_maxnm_0_f16_x_tied1, svfloat16_t, -+ z0 = svmaxnm_n_f16_x (svptrue_b16 (), z0, 0), -+ z0 = svmaxnm_x (svptrue_b16 (), z0, 0)) -+ -+/* -+** ptrue_maxnm_0_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_maxnm_0_f16_x_untied, svfloat16_t, -+ z0 = svmaxnm_n_f16_x (svptrue_b16 (), z1, 0), -+ z0 = svmaxnm_x (svptrue_b16 (), z1, 0)) -+ -+/* -+** ptrue_maxnm_1_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_maxnm_1_f16_x_tied1, svfloat16_t, -+ z0 = svmaxnm_n_f16_x (svptrue_b16 (), z0, 1), -+ z0 = svmaxnm_x (svptrue_b16 (), z0, 1)) -+ -+/* -+** ptrue_maxnm_1_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_maxnm_1_f16_x_untied, svfloat16_t, -+ z0 = svmaxnm_n_f16_x (svptrue_b16 (), z1, 1), -+ z0 = svmaxnm_x (svptrue_b16 (), z1, 1)) -+ -+/* -+** ptrue_maxnm_2_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_maxnm_2_f16_x_tied1, svfloat16_t, -+ z0 = svmaxnm_n_f16_x (svptrue_b16 (), z0, 2), -+ z0 = svmaxnm_x (svptrue_b16 (), z0, 2)) -+ -+/* -+** ptrue_maxnm_2_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_maxnm_2_f16_x_untied, svfloat16_t, -+ z0 = svmaxnm_n_f16_x (svptrue_b16 (), z1, 2), -+ z0 = svmaxnm_x (svptrue_b16 (), z1, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxnm_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxnm_f32.c -new file mode 100644 -index 000000000..4657d57c0 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxnm_f32.c -@@ -0,0 +1,425 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** maxnm_f32_m_tied1: -+** fmaxnm z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_f32_m_tied1, svfloat32_t, -+ z0 = svmaxnm_f32_m (p0, z0, z1), -+ z0 = svmaxnm_m (p0, z0, z1)) -+ -+/* -+** maxnm_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fmaxnm z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_f32_m_tied2, svfloat32_t, -+ z0 = svmaxnm_f32_m (p0, z1, z0), -+ z0 = svmaxnm_m (p0, z1, z0)) -+ -+/* -+** maxnm_f32_m_untied: -+** movprfx z0, z1 -+** fmaxnm z0\.s, p0/m, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_f32_m_untied, svfloat32_t, -+ z0 = svmaxnm_f32_m (p0, z1, z2), -+ z0 = svmaxnm_m (p0, z1, z2)) -+ -+/* -+** maxnm_s4_f32_m_tied1: -+** mov (z[0-9]+\.s), s4 -+** fmaxnm z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (maxnm_s4_f32_m_tied1, svfloat32_t, float, -+ z0 = svmaxnm_n_f32_m (p0, z0, d4), -+ z0 = svmaxnm_m (p0, z0, d4)) -+ -+/* -+** maxnm_s4_f32_m_untied: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0, z1 -+** fmaxnm z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (maxnm_s4_f32_m_untied, svfloat32_t, float, -+ z0 = svmaxnm_n_f32_m (p0, z1, d4), -+ z0 = svmaxnm_m (p0, z1, d4)) -+ -+/* -+** maxnm_0_f32_m_tied1: -+** fmaxnm z0\.s, p0/m, z0\.s, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_0_f32_m_tied1, svfloat32_t, -+ z0 = svmaxnm_n_f32_m (p0, z0, 0), -+ z0 = svmaxnm_m (p0, z0, 0)) -+ -+/* -+** maxnm_0_f32_m_untied: -+** movprfx z0, z1 -+** fmaxnm z0\.s, p0/m, z0\.s, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_0_f32_m_untied, svfloat32_t, -+ z0 = svmaxnm_n_f32_m (p0, z1, 0), -+ z0 = svmaxnm_m (p0, z1, 0)) -+ -+/* -+** maxnm_1_f32_m_tied1: -+** fmaxnm z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_1_f32_m_tied1, svfloat32_t, -+ z0 = svmaxnm_n_f32_m (p0, z0, 1), -+ z0 = svmaxnm_m (p0, z0, 1)) -+ -+/* -+** maxnm_1_f32_m_untied: -+** movprfx z0, z1 -+** fmaxnm z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_1_f32_m_untied, svfloat32_t, -+ z0 = svmaxnm_n_f32_m (p0, z1, 1), -+ z0 = svmaxnm_m (p0, z1, 1)) -+ -+/* -+** maxnm_2_f32_m: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** fmaxnm z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_2_f32_m, svfloat32_t, -+ z0 = svmaxnm_n_f32_m (p0, z0, 2), -+ z0 = svmaxnm_m (p0, z0, 2)) -+ -+/* -+** maxnm_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fmaxnm z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_f32_z_tied1, svfloat32_t, -+ z0 = svmaxnm_f32_z (p0, z0, z1), -+ z0 = svmaxnm_z (p0, z0, z1)) -+ -+/* -+** maxnm_f32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** fmaxnm z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_f32_z_tied2, svfloat32_t, -+ z0 = svmaxnm_f32_z (p0, z1, z0), -+ z0 = svmaxnm_z (p0, z1, z0)) -+ -+/* -+** maxnm_f32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fmaxnm z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** fmaxnm z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_f32_z_untied, svfloat32_t, -+ z0 = svmaxnm_f32_z (p0, z1, z2), -+ z0 = svmaxnm_z (p0, z1, z2)) -+ -+/* -+** maxnm_s4_f32_z_tied1: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0\.s, p0/z, z0\.s -+** fmaxnm z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (maxnm_s4_f32_z_tied1, svfloat32_t, float, -+ z0 = svmaxnm_n_f32_z (p0, z0, d4), -+ z0 = svmaxnm_z (p0, z0, d4)) -+ -+/* -+** maxnm_s4_f32_z_untied: -+** mov (z[0-9]+\.s), s4 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fmaxnm z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** fmaxnm z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (maxnm_s4_f32_z_untied, svfloat32_t, float, -+ z0 = svmaxnm_n_f32_z (p0, z1, d4), -+ z0 = svmaxnm_z (p0, z1, d4)) -+ -+/* -+** maxnm_0_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fmaxnm z0\.s, p0/m, z0\.s, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_0_f32_z_tied1, svfloat32_t, -+ z0 = svmaxnm_n_f32_z (p0, z0, 0), -+ z0 = svmaxnm_z (p0, z0, 0)) -+ -+/* -+** maxnm_0_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** fmaxnm z0\.s, p0/m, z0\.s, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_0_f32_z_untied, svfloat32_t, -+ z0 = svmaxnm_n_f32_z (p0, z1, 0), -+ z0 = svmaxnm_z (p0, z1, 0)) -+ -+/* -+** maxnm_1_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fmaxnm z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_1_f32_z_tied1, svfloat32_t, -+ z0 = svmaxnm_n_f32_z (p0, z0, 1), -+ z0 = svmaxnm_z (p0, z0, 1)) -+ -+/* -+** maxnm_1_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** fmaxnm z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_1_f32_z_untied, svfloat32_t, -+ z0 = svmaxnm_n_f32_z (p0, z1, 1), -+ z0 = svmaxnm_z (p0, z1, 1)) -+ -+/* -+** maxnm_2_f32_z: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** movprfx z0\.s, p0/z, z0\.s -+** fmaxnm z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_2_f32_z, svfloat32_t, -+ z0 = svmaxnm_n_f32_z (p0, z0, 2), -+ z0 = svmaxnm_z (p0, z0, 2)) -+ -+/* -+** maxnm_f32_x_tied1: -+** fmaxnm z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_f32_x_tied1, svfloat32_t, -+ z0 = svmaxnm_f32_x (p0, z0, z1), -+ z0 = svmaxnm_x (p0, z0, z1)) -+ -+/* -+** maxnm_f32_x_tied2: -+** fmaxnm z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_f32_x_tied2, svfloat32_t, -+ z0 = svmaxnm_f32_x (p0, z1, z0), -+ z0 = svmaxnm_x (p0, z1, z0)) -+ -+/* -+** maxnm_f32_x_untied: -+** ( -+** movprfx z0, z1 -+** fmaxnm z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0, z2 -+** fmaxnm z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_f32_x_untied, svfloat32_t, -+ z0 = svmaxnm_f32_x (p0, z1, z2), -+ z0 = svmaxnm_x (p0, z1, z2)) -+ -+/* -+** maxnm_s4_f32_x_tied1: -+** mov (z[0-9]+\.s), s4 -+** fmaxnm z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (maxnm_s4_f32_x_tied1, svfloat32_t, float, -+ z0 = svmaxnm_n_f32_x (p0, z0, d4), -+ z0 = svmaxnm_x (p0, z0, d4)) -+ -+/* -+** maxnm_s4_f32_x_untied: -+** mov z0\.s, s4 -+** fmaxnm z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZD (maxnm_s4_f32_x_untied, svfloat32_t, float, -+ z0 = svmaxnm_n_f32_x (p0, z1, d4), -+ z0 = svmaxnm_x (p0, z1, d4)) -+ -+/* -+** maxnm_0_f32_x_tied1: -+** fmaxnm z0\.s, p0/m, z0\.s, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_0_f32_x_tied1, svfloat32_t, -+ z0 = svmaxnm_n_f32_x (p0, z0, 0), -+ z0 = svmaxnm_x (p0, z0, 0)) -+ -+/* -+** maxnm_0_f32_x_untied: -+** movprfx z0, z1 -+** fmaxnm z0\.s, p0/m, z0\.s, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_0_f32_x_untied, svfloat32_t, -+ z0 = svmaxnm_n_f32_x (p0, z1, 0), -+ z0 = svmaxnm_x (p0, z1, 0)) -+ -+/* -+** maxnm_1_f32_x_tied1: -+** fmaxnm z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_1_f32_x_tied1, svfloat32_t, -+ z0 = svmaxnm_n_f32_x (p0, z0, 1), -+ z0 = svmaxnm_x (p0, z0, 1)) -+ -+/* -+** maxnm_1_f32_x_untied: -+** movprfx z0, z1 -+** fmaxnm z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_1_f32_x_untied, svfloat32_t, -+ z0 = svmaxnm_n_f32_x (p0, z1, 1), -+ z0 = svmaxnm_x (p0, z1, 1)) -+ -+/* -+** maxnm_2_f32_x_tied1: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** fmaxnm z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_2_f32_x_tied1, svfloat32_t, -+ z0 = svmaxnm_n_f32_x (p0, z0, 2), -+ z0 = svmaxnm_x (p0, z0, 2)) -+ -+/* -+** maxnm_2_f32_x_untied: -+** fmov z0\.s, #2\.0(?:e\+0)? -+** fmaxnm z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_2_f32_x_untied, svfloat32_t, -+ z0 = svmaxnm_n_f32_x (p0, z1, 2), -+ z0 = svmaxnm_x (p0, z1, 2)) -+ -+/* -+** ptrue_maxnm_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_maxnm_f32_x_tied1, svfloat32_t, -+ z0 = svmaxnm_f32_x (svptrue_b32 (), z0, z1), -+ z0 = svmaxnm_x (svptrue_b32 (), z0, z1)) -+ -+/* -+** ptrue_maxnm_f32_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_maxnm_f32_x_tied2, svfloat32_t, -+ z0 = svmaxnm_f32_x (svptrue_b32 (), z1, z0), -+ z0 = svmaxnm_x (svptrue_b32 (), z1, z0)) -+ -+/* -+** ptrue_maxnm_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_maxnm_f32_x_untied, svfloat32_t, -+ z0 = svmaxnm_f32_x (svptrue_b32 (), z1, z2), -+ z0 = svmaxnm_x (svptrue_b32 (), z1, z2)) -+ -+/* -+** ptrue_maxnm_0_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_maxnm_0_f32_x_tied1, svfloat32_t, -+ z0 = svmaxnm_n_f32_x (svptrue_b32 (), z0, 0), -+ z0 = svmaxnm_x (svptrue_b32 (), z0, 0)) -+ -+/* -+** ptrue_maxnm_0_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_maxnm_0_f32_x_untied, svfloat32_t, -+ z0 = svmaxnm_n_f32_x (svptrue_b32 (), z1, 0), -+ z0 = svmaxnm_x (svptrue_b32 (), z1, 0)) -+ -+/* -+** ptrue_maxnm_1_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_maxnm_1_f32_x_tied1, svfloat32_t, -+ z0 = svmaxnm_n_f32_x (svptrue_b32 (), z0, 1), -+ z0 = svmaxnm_x (svptrue_b32 (), z0, 1)) -+ -+/* -+** ptrue_maxnm_1_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_maxnm_1_f32_x_untied, svfloat32_t, -+ z0 = svmaxnm_n_f32_x (svptrue_b32 (), z1, 1), -+ z0 = svmaxnm_x (svptrue_b32 (), z1, 1)) -+ -+/* -+** ptrue_maxnm_2_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_maxnm_2_f32_x_tied1, svfloat32_t, -+ z0 = svmaxnm_n_f32_x (svptrue_b32 (), z0, 2), -+ z0 = svmaxnm_x (svptrue_b32 (), z0, 2)) -+ -+/* -+** ptrue_maxnm_2_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_maxnm_2_f32_x_untied, svfloat32_t, -+ z0 = svmaxnm_n_f32_x (svptrue_b32 (), z1, 2), -+ z0 = svmaxnm_x (svptrue_b32 (), z1, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxnm_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxnm_f64.c -new file mode 100644 -index 000000000..07d88e6c1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxnm_f64.c -@@ -0,0 +1,425 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** maxnm_f64_m_tied1: -+** fmaxnm z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_f64_m_tied1, svfloat64_t, -+ z0 = svmaxnm_f64_m (p0, z0, z1), -+ z0 = svmaxnm_m (p0, z0, z1)) -+ -+/* -+** maxnm_f64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fmaxnm z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_f64_m_tied2, svfloat64_t, -+ z0 = svmaxnm_f64_m (p0, z1, z0), -+ z0 = svmaxnm_m (p0, z1, z0)) -+ -+/* -+** maxnm_f64_m_untied: -+** movprfx z0, z1 -+** fmaxnm z0\.d, p0/m, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_f64_m_untied, svfloat64_t, -+ z0 = svmaxnm_f64_m (p0, z1, z2), -+ z0 = svmaxnm_m (p0, z1, z2)) -+ -+/* -+** maxnm_d4_f64_m_tied1: -+** mov (z[0-9]+\.d), d4 -+** fmaxnm z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (maxnm_d4_f64_m_tied1, svfloat64_t, double, -+ z0 = svmaxnm_n_f64_m (p0, z0, d4), -+ z0 = svmaxnm_m (p0, z0, d4)) -+ -+/* -+** maxnm_d4_f64_m_untied: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0, z1 -+** fmaxnm z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (maxnm_d4_f64_m_untied, svfloat64_t, double, -+ z0 = svmaxnm_n_f64_m (p0, z1, d4), -+ z0 = svmaxnm_m (p0, z1, d4)) -+ -+/* -+** maxnm_0_f64_m_tied1: -+** fmaxnm z0\.d, p0/m, z0\.d, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_0_f64_m_tied1, svfloat64_t, -+ z0 = svmaxnm_n_f64_m (p0, z0, 0), -+ z0 = svmaxnm_m (p0, z0, 0)) -+ -+/* -+** maxnm_0_f64_m_untied: -+** movprfx z0, z1 -+** fmaxnm z0\.d, p0/m, z0\.d, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_0_f64_m_untied, svfloat64_t, -+ z0 = svmaxnm_n_f64_m (p0, z1, 0), -+ z0 = svmaxnm_m (p0, z1, 0)) -+ -+/* -+** maxnm_1_f64_m_tied1: -+** fmaxnm z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_1_f64_m_tied1, svfloat64_t, -+ z0 = svmaxnm_n_f64_m (p0, z0, 1), -+ z0 = svmaxnm_m (p0, z0, 1)) -+ -+/* -+** maxnm_1_f64_m_untied: -+** movprfx z0, z1 -+** fmaxnm z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_1_f64_m_untied, svfloat64_t, -+ z0 = svmaxnm_n_f64_m (p0, z1, 1), -+ z0 = svmaxnm_m (p0, z1, 1)) -+ -+/* -+** maxnm_2_f64_m: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** fmaxnm z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_2_f64_m, svfloat64_t, -+ z0 = svmaxnm_n_f64_m (p0, z0, 2), -+ z0 = svmaxnm_m (p0, z0, 2)) -+ -+/* -+** maxnm_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fmaxnm z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_f64_z_tied1, svfloat64_t, -+ z0 = svmaxnm_f64_z (p0, z0, z1), -+ z0 = svmaxnm_z (p0, z0, z1)) -+ -+/* -+** maxnm_f64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** fmaxnm z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_f64_z_tied2, svfloat64_t, -+ z0 = svmaxnm_f64_z (p0, z1, z0), -+ z0 = svmaxnm_z (p0, z1, z0)) -+ -+/* -+** maxnm_f64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fmaxnm z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** fmaxnm z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_f64_z_untied, svfloat64_t, -+ z0 = svmaxnm_f64_z (p0, z1, z2), -+ z0 = svmaxnm_z (p0, z1, z2)) -+ -+/* -+** maxnm_d4_f64_z_tied1: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0\.d, p0/z, z0\.d -+** fmaxnm z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (maxnm_d4_f64_z_tied1, svfloat64_t, double, -+ z0 = svmaxnm_n_f64_z (p0, z0, d4), -+ z0 = svmaxnm_z (p0, z0, d4)) -+ -+/* -+** maxnm_d4_f64_z_untied: -+** mov (z[0-9]+\.d), d4 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fmaxnm z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** fmaxnm z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (maxnm_d4_f64_z_untied, svfloat64_t, double, -+ z0 = svmaxnm_n_f64_z (p0, z1, d4), -+ z0 = svmaxnm_z (p0, z1, d4)) -+ -+/* -+** maxnm_0_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fmaxnm z0\.d, p0/m, z0\.d, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_0_f64_z_tied1, svfloat64_t, -+ z0 = svmaxnm_n_f64_z (p0, z0, 0), -+ z0 = svmaxnm_z (p0, z0, 0)) -+ -+/* -+** maxnm_0_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** fmaxnm z0\.d, p0/m, z0\.d, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_0_f64_z_untied, svfloat64_t, -+ z0 = svmaxnm_n_f64_z (p0, z1, 0), -+ z0 = svmaxnm_z (p0, z1, 0)) -+ -+/* -+** maxnm_1_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fmaxnm z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_1_f64_z_tied1, svfloat64_t, -+ z0 = svmaxnm_n_f64_z (p0, z0, 1), -+ z0 = svmaxnm_z (p0, z0, 1)) -+ -+/* -+** maxnm_1_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** fmaxnm z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_1_f64_z_untied, svfloat64_t, -+ z0 = svmaxnm_n_f64_z (p0, z1, 1), -+ z0 = svmaxnm_z (p0, z1, 1)) -+ -+/* -+** maxnm_2_f64_z: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** movprfx z0\.d, p0/z, z0\.d -+** fmaxnm z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_2_f64_z, svfloat64_t, -+ z0 = svmaxnm_n_f64_z (p0, z0, 2), -+ z0 = svmaxnm_z (p0, z0, 2)) -+ -+/* -+** maxnm_f64_x_tied1: -+** fmaxnm z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_f64_x_tied1, svfloat64_t, -+ z0 = svmaxnm_f64_x (p0, z0, z1), -+ z0 = svmaxnm_x (p0, z0, z1)) -+ -+/* -+** maxnm_f64_x_tied2: -+** fmaxnm z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_f64_x_tied2, svfloat64_t, -+ z0 = svmaxnm_f64_x (p0, z1, z0), -+ z0 = svmaxnm_x (p0, z1, z0)) -+ -+/* -+** maxnm_f64_x_untied: -+** ( -+** movprfx z0, z1 -+** fmaxnm z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0, z2 -+** fmaxnm z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_f64_x_untied, svfloat64_t, -+ z0 = svmaxnm_f64_x (p0, z1, z2), -+ z0 = svmaxnm_x (p0, z1, z2)) -+ -+/* -+** maxnm_d4_f64_x_tied1: -+** mov (z[0-9]+\.d), d4 -+** fmaxnm z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (maxnm_d4_f64_x_tied1, svfloat64_t, double, -+ z0 = svmaxnm_n_f64_x (p0, z0, d4), -+ z0 = svmaxnm_x (p0, z0, d4)) -+ -+/* -+** maxnm_d4_f64_x_untied: -+** mov z0\.d, d4 -+** fmaxnm z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZD (maxnm_d4_f64_x_untied, svfloat64_t, double, -+ z0 = svmaxnm_n_f64_x (p0, z1, d4), -+ z0 = svmaxnm_x (p0, z1, d4)) -+ -+/* -+** maxnm_0_f64_x_tied1: -+** fmaxnm z0\.d, p0/m, z0\.d, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_0_f64_x_tied1, svfloat64_t, -+ z0 = svmaxnm_n_f64_x (p0, z0, 0), -+ z0 = svmaxnm_x (p0, z0, 0)) -+ -+/* -+** maxnm_0_f64_x_untied: -+** movprfx z0, z1 -+** fmaxnm z0\.d, p0/m, z0\.d, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_0_f64_x_untied, svfloat64_t, -+ z0 = svmaxnm_n_f64_x (p0, z1, 0), -+ z0 = svmaxnm_x (p0, z1, 0)) -+ -+/* -+** maxnm_1_f64_x_tied1: -+** fmaxnm z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_1_f64_x_tied1, svfloat64_t, -+ z0 = svmaxnm_n_f64_x (p0, z0, 1), -+ z0 = svmaxnm_x (p0, z0, 1)) -+ -+/* -+** maxnm_1_f64_x_untied: -+** movprfx z0, z1 -+** fmaxnm z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_1_f64_x_untied, svfloat64_t, -+ z0 = svmaxnm_n_f64_x (p0, z1, 1), -+ z0 = svmaxnm_x (p0, z1, 1)) -+ -+/* -+** maxnm_2_f64_x_tied1: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** fmaxnm z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_2_f64_x_tied1, svfloat64_t, -+ z0 = svmaxnm_n_f64_x (p0, z0, 2), -+ z0 = svmaxnm_x (p0, z0, 2)) -+ -+/* -+** maxnm_2_f64_x_untied: -+** fmov z0\.d, #2\.0(?:e\+0)? -+** fmaxnm z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (maxnm_2_f64_x_untied, svfloat64_t, -+ z0 = svmaxnm_n_f64_x (p0, z1, 2), -+ z0 = svmaxnm_x (p0, z1, 2)) -+ -+/* -+** ptrue_maxnm_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_maxnm_f64_x_tied1, svfloat64_t, -+ z0 = svmaxnm_f64_x (svptrue_b64 (), z0, z1), -+ z0 = svmaxnm_x (svptrue_b64 (), z0, z1)) -+ -+/* -+** ptrue_maxnm_f64_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_maxnm_f64_x_tied2, svfloat64_t, -+ z0 = svmaxnm_f64_x (svptrue_b64 (), z1, z0), -+ z0 = svmaxnm_x (svptrue_b64 (), z1, z0)) -+ -+/* -+** ptrue_maxnm_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_maxnm_f64_x_untied, svfloat64_t, -+ z0 = svmaxnm_f64_x (svptrue_b64 (), z1, z2), -+ z0 = svmaxnm_x (svptrue_b64 (), z1, z2)) -+ -+/* -+** ptrue_maxnm_0_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_maxnm_0_f64_x_tied1, svfloat64_t, -+ z0 = svmaxnm_n_f64_x (svptrue_b64 (), z0, 0), -+ z0 = svmaxnm_x (svptrue_b64 (), z0, 0)) -+ -+/* -+** ptrue_maxnm_0_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_maxnm_0_f64_x_untied, svfloat64_t, -+ z0 = svmaxnm_n_f64_x (svptrue_b64 (), z1, 0), -+ z0 = svmaxnm_x (svptrue_b64 (), z1, 0)) -+ -+/* -+** ptrue_maxnm_1_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_maxnm_1_f64_x_tied1, svfloat64_t, -+ z0 = svmaxnm_n_f64_x (svptrue_b64 (), z0, 1), -+ z0 = svmaxnm_x (svptrue_b64 (), z0, 1)) -+ -+/* -+** ptrue_maxnm_1_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_maxnm_1_f64_x_untied, svfloat64_t, -+ z0 = svmaxnm_n_f64_x (svptrue_b64 (), z1, 1), -+ z0 = svmaxnm_x (svptrue_b64 (), z1, 1)) -+ -+/* -+** ptrue_maxnm_2_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_maxnm_2_f64_x_tied1, svfloat64_t, -+ z0 = svmaxnm_n_f64_x (svptrue_b64 (), z0, 2), -+ z0 = svmaxnm_x (svptrue_b64 (), z0, 2)) -+ -+/* -+** ptrue_maxnm_2_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_maxnm_2_f64_x_untied, svfloat64_t, -+ z0 = svmaxnm_n_f64_x (svptrue_b64 (), z1, 2), -+ z0 = svmaxnm_x (svptrue_b64 (), z1, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxnmv_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxnmv_f16.c -new file mode 100644 -index 000000000..086bcf974 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxnmv_f16.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** maxnmv_d0_f16_tied: -+** fmaxnmv h0, p0, z0\.h -+** ret -+*/ -+TEST_REDUCTION_D (maxnmv_d0_f16_tied, float16_t, svfloat16_t, -+ d0 = svmaxnmv_f16 (p0, z0), -+ d0 = svmaxnmv (p0, z0)) -+ -+/* -+** maxnmv_d0_f16_untied: -+** fmaxnmv h0, p0, z1\.h -+** ret -+*/ -+TEST_REDUCTION_D (maxnmv_d0_f16_untied, float16_t, svfloat16_t, -+ d0 = svmaxnmv_f16 (p0, z1), -+ d0 = svmaxnmv (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxnmv_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxnmv_f32.c -new file mode 100644 -index 000000000..7fca8bc9e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxnmv_f32.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** maxnmv_d0_f32_tied: -+** fmaxnmv s0, p0, z0\.s -+** ret -+*/ -+TEST_REDUCTION_D (maxnmv_d0_f32_tied, float32_t, svfloat32_t, -+ d0 = svmaxnmv_f32 (p0, z0), -+ d0 = svmaxnmv (p0, z0)) -+ -+/* -+** maxnmv_d0_f32_untied: -+** fmaxnmv s0, p0, z1\.s -+** ret -+*/ -+TEST_REDUCTION_D (maxnmv_d0_f32_untied, float32_t, svfloat32_t, -+ d0 = svmaxnmv_f32 (p0, z1), -+ d0 = svmaxnmv (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxnmv_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxnmv_f64.c -new file mode 100644 -index 000000000..8b0884479 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxnmv_f64.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** maxnmv_d0_f64_tied: -+** fmaxnmv d0, p0, z0\.d -+** ret -+*/ -+TEST_REDUCTION_D (maxnmv_d0_f64_tied, float64_t, svfloat64_t, -+ d0 = svmaxnmv_f64 (p0, z0), -+ d0 = svmaxnmv (p0, z0)) -+ -+/* -+** maxnmv_d0_f64_untied: -+** fmaxnmv d0, p0, z1\.d -+** ret -+*/ -+TEST_REDUCTION_D (maxnmv_d0_f64_untied, float64_t, svfloat64_t, -+ d0 = svmaxnmv_f64 (p0, z1), -+ d0 = svmaxnmv (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_f16.c -new file mode 100644 -index 000000000..a16823987 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_f16.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** maxv_d0_f16_tied: -+** fmaxv h0, p0, z0\.h -+** ret -+*/ -+TEST_REDUCTION_D (maxv_d0_f16_tied, float16_t, svfloat16_t, -+ d0 = svmaxv_f16 (p0, z0), -+ d0 = svmaxv (p0, z0)) -+ -+/* -+** maxv_d0_f16_untied: -+** fmaxv h0, p0, z1\.h -+** ret -+*/ -+TEST_REDUCTION_D (maxv_d0_f16_untied, float16_t, svfloat16_t, -+ d0 = svmaxv_f16 (p0, z1), -+ d0 = svmaxv (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_f32.c -new file mode 100644 -index 000000000..64e5edfef ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_f32.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** maxv_d0_f32_tied: -+** fmaxv s0, p0, z0\.s -+** ret -+*/ -+TEST_REDUCTION_D (maxv_d0_f32_tied, float32_t, svfloat32_t, -+ d0 = svmaxv_f32 (p0, z0), -+ d0 = svmaxv (p0, z0)) -+ -+/* -+** maxv_d0_f32_untied: -+** fmaxv s0, p0, z1\.s -+** ret -+*/ -+TEST_REDUCTION_D (maxv_d0_f32_untied, float32_t, svfloat32_t, -+ d0 = svmaxv_f32 (p0, z1), -+ d0 = svmaxv (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_f64.c -new file mode 100644 -index 000000000..837d6dfdc ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_f64.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** maxv_d0_f64_tied: -+** fmaxv d0, p0, z0\.d -+** ret -+*/ -+TEST_REDUCTION_D (maxv_d0_f64_tied, float64_t, svfloat64_t, -+ d0 = svmaxv_f64 (p0, z0), -+ d0 = svmaxv (p0, z0)) -+ -+/* -+** maxv_d0_f64_untied: -+** fmaxv d0, p0, z1\.d -+** ret -+*/ -+TEST_REDUCTION_D (maxv_d0_f64_untied, float64_t, svfloat64_t, -+ d0 = svmaxv_f64 (p0, z1), -+ d0 = svmaxv (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_s16.c -new file mode 100644 -index 000000000..bbf36a110 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_s16.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** maxv_x0_s16: -+** smaxv h([0-9]+), p0, z0\.h -+** umov w0, v\1\.h\[0\] -+** ret -+*/ -+TEST_REDUCTION_X (maxv_x0_s16, int16_t, svint16_t, -+ x0 = svmaxv_s16 (p0, z0), -+ x0 = svmaxv (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_s32.c -new file mode 100644 -index 000000000..645169ee8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_s32.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** maxv_x0_s32: -+** smaxv (s[0-9]+), p0, z0\.s -+** fmov w0, \1 -+** ret -+*/ -+TEST_REDUCTION_X (maxv_x0_s32, int32_t, svint32_t, -+ x0 = svmaxv_s32 (p0, z0), -+ x0 = svmaxv (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_s64.c -new file mode 100644 -index 000000000..009c1e9e2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_s64.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** maxv_x0_s64: -+** smaxv (d[0-9]+), p0, z0\.d -+** fmov x0, \1 -+** ret -+*/ -+TEST_REDUCTION_X (maxv_x0_s64, int64_t, svint64_t, -+ x0 = svmaxv_s64 (p0, z0), -+ x0 = svmaxv (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_s8.c -new file mode 100644 -index 000000000..2c1f1b9b3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_s8.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** maxv_x0_s8: -+** smaxv b([0-9]+), p0, z0\.b -+** umov w0, v\1\.b\[0\] -+** ret -+*/ -+TEST_REDUCTION_X (maxv_x0_s8, int8_t, svint8_t, -+ x0 = svmaxv_s8 (p0, z0), -+ x0 = svmaxv (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_u16.c -new file mode 100644 -index 000000000..978b8251a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_u16.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** maxv_x0_u16: -+** umaxv h([0-9]+), p0, z0\.h -+** umov w0, v\1\.h\[0\] -+** ret -+*/ -+TEST_REDUCTION_X (maxv_x0_u16, uint16_t, svuint16_t, -+ x0 = svmaxv_u16 (p0, z0), -+ x0 = svmaxv (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_u32.c -new file mode 100644 -index 000000000..85853b4b0 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_u32.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** maxv_x0_u32: -+** umaxv (s[0-9]+), p0, z0\.s -+** fmov w0, \1 -+** ret -+*/ -+TEST_REDUCTION_X (maxv_x0_u32, uint32_t, svuint32_t, -+ x0 = svmaxv_u32 (p0, z0), -+ x0 = svmaxv (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_u64.c -new file mode 100644 -index 000000000..95980ed34 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_u64.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** maxv_x0_u64: -+** umaxv (d[0-9]+), p0, z0\.d -+** fmov x0, \1 -+** ret -+*/ -+TEST_REDUCTION_X (maxv_x0_u64, uint64_t, svuint64_t, -+ x0 = svmaxv_u64 (p0, z0), -+ x0 = svmaxv (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_u8.c -new file mode 100644 -index 000000000..a0b23d242 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/maxv_u8.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** maxv_x0_u8: -+** umaxv b([0-9]+), p0, z0\.b -+** umov w0, v\1\.b\[0\] -+** ret -+*/ -+TEST_REDUCTION_X (maxv_x0_u8, uint8_t, svuint8_t, -+ x0 = svmaxv_u8 (p0, z0), -+ x0 = svmaxv (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_f16.c -new file mode 100644 -index 000000000..721ee7389 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_f16.c -@@ -0,0 +1,425 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** min_f16_m_tied1: -+** fmin z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (min_f16_m_tied1, svfloat16_t, -+ z0 = svmin_f16_m (p0, z0, z1), -+ z0 = svmin_m (p0, z0, z1)) -+ -+/* -+** min_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fmin z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (min_f16_m_tied2, svfloat16_t, -+ z0 = svmin_f16_m (p0, z1, z0), -+ z0 = svmin_m (p0, z1, z0)) -+ -+/* -+** min_f16_m_untied: -+** movprfx z0, z1 -+** fmin z0\.h, p0/m, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (min_f16_m_untied, svfloat16_t, -+ z0 = svmin_f16_m (p0, z1, z2), -+ z0 = svmin_m (p0, z1, z2)) -+ -+/* -+** min_h4_f16_m_tied1: -+** mov (z[0-9]+\.h), h4 -+** fmin z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (min_h4_f16_m_tied1, svfloat16_t, __fp16, -+ z0 = svmin_n_f16_m (p0, z0, d4), -+ z0 = svmin_m (p0, z0, d4)) -+ -+/* -+** min_h4_f16_m_untied: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0, z1 -+** fmin z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (min_h4_f16_m_untied, svfloat16_t, __fp16, -+ z0 = svmin_n_f16_m (p0, z1, d4), -+ z0 = svmin_m (p0, z1, d4)) -+ -+/* -+** min_0_f16_m_tied1: -+** fmin z0\.h, p0/m, z0\.h, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (min_0_f16_m_tied1, svfloat16_t, -+ z0 = svmin_n_f16_m (p0, z0, 0), -+ z0 = svmin_m (p0, z0, 0)) -+ -+/* -+** min_0_f16_m_untied: -+** movprfx z0, z1 -+** fmin z0\.h, p0/m, z0\.h, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (min_0_f16_m_untied, svfloat16_t, -+ z0 = svmin_n_f16_m (p0, z1, 0), -+ z0 = svmin_m (p0, z1, 0)) -+ -+/* -+** min_1_f16_m_tied1: -+** fmin z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_f16_m_tied1, svfloat16_t, -+ z0 = svmin_n_f16_m (p0, z0, 1), -+ z0 = svmin_m (p0, z0, 1)) -+ -+/* -+** min_1_f16_m_untied: -+** movprfx z0, z1 -+** fmin z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_f16_m_untied, svfloat16_t, -+ z0 = svmin_n_f16_m (p0, z1, 1), -+ z0 = svmin_m (p0, z1, 1)) -+ -+/* -+** min_2_f16_m: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** fmin z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_2_f16_m, svfloat16_t, -+ z0 = svmin_n_f16_m (p0, z0, 2), -+ z0 = svmin_m (p0, z0, 2)) -+ -+/* -+** min_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fmin z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (min_f16_z_tied1, svfloat16_t, -+ z0 = svmin_f16_z (p0, z0, z1), -+ z0 = svmin_z (p0, z0, z1)) -+ -+/* -+** min_f16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** fmin z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (min_f16_z_tied2, svfloat16_t, -+ z0 = svmin_f16_z (p0, z1, z0), -+ z0 = svmin_z (p0, z1, z0)) -+ -+/* -+** min_f16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fmin z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** fmin z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (min_f16_z_untied, svfloat16_t, -+ z0 = svmin_f16_z (p0, z1, z2), -+ z0 = svmin_z (p0, z1, z2)) -+ -+/* -+** min_h4_f16_z_tied1: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0\.h, p0/z, z0\.h -+** fmin z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (min_h4_f16_z_tied1, svfloat16_t, __fp16, -+ z0 = svmin_n_f16_z (p0, z0, d4), -+ z0 = svmin_z (p0, z0, d4)) -+ -+/* -+** min_h4_f16_z_untied: -+** mov (z[0-9]+\.h), h4 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fmin z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** fmin z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (min_h4_f16_z_untied, svfloat16_t, __fp16, -+ z0 = svmin_n_f16_z (p0, z1, d4), -+ z0 = svmin_z (p0, z1, d4)) -+ -+/* -+** min_0_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fmin z0\.h, p0/m, z0\.h, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (min_0_f16_z_tied1, svfloat16_t, -+ z0 = svmin_n_f16_z (p0, z0, 0), -+ z0 = svmin_z (p0, z0, 0)) -+ -+/* -+** min_0_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** fmin z0\.h, p0/m, z0\.h, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (min_0_f16_z_untied, svfloat16_t, -+ z0 = svmin_n_f16_z (p0, z1, 0), -+ z0 = svmin_z (p0, z1, 0)) -+ -+/* -+** min_1_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fmin z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_f16_z_tied1, svfloat16_t, -+ z0 = svmin_n_f16_z (p0, z0, 1), -+ z0 = svmin_z (p0, z0, 1)) -+ -+/* -+** min_1_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** fmin z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_f16_z_untied, svfloat16_t, -+ z0 = svmin_n_f16_z (p0, z1, 1), -+ z0 = svmin_z (p0, z1, 1)) -+ -+/* -+** min_2_f16_z: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** movprfx z0\.h, p0/z, z0\.h -+** fmin z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_2_f16_z, svfloat16_t, -+ z0 = svmin_n_f16_z (p0, z0, 2), -+ z0 = svmin_z (p0, z0, 2)) -+ -+/* -+** min_f16_x_tied1: -+** fmin z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (min_f16_x_tied1, svfloat16_t, -+ z0 = svmin_f16_x (p0, z0, z1), -+ z0 = svmin_x (p0, z0, z1)) -+ -+/* -+** min_f16_x_tied2: -+** fmin z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (min_f16_x_tied2, svfloat16_t, -+ z0 = svmin_f16_x (p0, z1, z0), -+ z0 = svmin_x (p0, z1, z0)) -+ -+/* -+** min_f16_x_untied: -+** ( -+** movprfx z0, z1 -+** fmin z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0, z2 -+** fmin z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (min_f16_x_untied, svfloat16_t, -+ z0 = svmin_f16_x (p0, z1, z2), -+ z0 = svmin_x (p0, z1, z2)) -+ -+/* -+** min_h4_f16_x_tied1: -+** mov (z[0-9]+\.h), h4 -+** fmin z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (min_h4_f16_x_tied1, svfloat16_t, __fp16, -+ z0 = svmin_n_f16_x (p0, z0, d4), -+ z0 = svmin_x (p0, z0, d4)) -+ -+/* -+** min_h4_f16_x_untied: -+** mov z0\.h, h4 -+** fmin z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZD (min_h4_f16_x_untied, svfloat16_t, __fp16, -+ z0 = svmin_n_f16_x (p0, z1, d4), -+ z0 = svmin_x (p0, z1, d4)) -+ -+/* -+** min_0_f16_x_tied1: -+** fmin z0\.h, p0/m, z0\.h, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (min_0_f16_x_tied1, svfloat16_t, -+ z0 = svmin_n_f16_x (p0, z0, 0), -+ z0 = svmin_x (p0, z0, 0)) -+ -+/* -+** min_0_f16_x_untied: -+** movprfx z0, z1 -+** fmin z0\.h, p0/m, z0\.h, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (min_0_f16_x_untied, svfloat16_t, -+ z0 = svmin_n_f16_x (p0, z1, 0), -+ z0 = svmin_x (p0, z1, 0)) -+ -+/* -+** min_1_f16_x_tied1: -+** fmin z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_f16_x_tied1, svfloat16_t, -+ z0 = svmin_n_f16_x (p0, z0, 1), -+ z0 = svmin_x (p0, z0, 1)) -+ -+/* -+** min_1_f16_x_untied: -+** movprfx z0, z1 -+** fmin z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_f16_x_untied, svfloat16_t, -+ z0 = svmin_n_f16_x (p0, z1, 1), -+ z0 = svmin_x (p0, z1, 1)) -+ -+/* -+** min_2_f16_x_tied1: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** fmin z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_2_f16_x_tied1, svfloat16_t, -+ z0 = svmin_n_f16_x (p0, z0, 2), -+ z0 = svmin_x (p0, z0, 2)) -+ -+/* -+** min_2_f16_x_untied: -+** fmov z0\.h, #2\.0(?:e\+0)? -+** fmin z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (min_2_f16_x_untied, svfloat16_t, -+ z0 = svmin_n_f16_x (p0, z1, 2), -+ z0 = svmin_x (p0, z1, 2)) -+ -+/* -+** ptrue_min_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_min_f16_x_tied1, svfloat16_t, -+ z0 = svmin_f16_x (svptrue_b16 (), z0, z1), -+ z0 = svmin_x (svptrue_b16 (), z0, z1)) -+ -+/* -+** ptrue_min_f16_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_min_f16_x_tied2, svfloat16_t, -+ z0 = svmin_f16_x (svptrue_b16 (), z1, z0), -+ z0 = svmin_x (svptrue_b16 (), z1, z0)) -+ -+/* -+** ptrue_min_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_min_f16_x_untied, svfloat16_t, -+ z0 = svmin_f16_x (svptrue_b16 (), z1, z2), -+ z0 = svmin_x (svptrue_b16 (), z1, z2)) -+ -+/* -+** ptrue_min_0_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_min_0_f16_x_tied1, svfloat16_t, -+ z0 = svmin_n_f16_x (svptrue_b16 (), z0, 0), -+ z0 = svmin_x (svptrue_b16 (), z0, 0)) -+ -+/* -+** ptrue_min_0_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_min_0_f16_x_untied, svfloat16_t, -+ z0 = svmin_n_f16_x (svptrue_b16 (), z1, 0), -+ z0 = svmin_x (svptrue_b16 (), z1, 0)) -+ -+/* -+** ptrue_min_1_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_min_1_f16_x_tied1, svfloat16_t, -+ z0 = svmin_n_f16_x (svptrue_b16 (), z0, 1), -+ z0 = svmin_x (svptrue_b16 (), z0, 1)) -+ -+/* -+** ptrue_min_1_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_min_1_f16_x_untied, svfloat16_t, -+ z0 = svmin_n_f16_x (svptrue_b16 (), z1, 1), -+ z0 = svmin_x (svptrue_b16 (), z1, 1)) -+ -+/* -+** ptrue_min_2_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_min_2_f16_x_tied1, svfloat16_t, -+ z0 = svmin_n_f16_x (svptrue_b16 (), z0, 2), -+ z0 = svmin_x (svptrue_b16 (), z0, 2)) -+ -+/* -+** ptrue_min_2_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_min_2_f16_x_untied, svfloat16_t, -+ z0 = svmin_n_f16_x (svptrue_b16 (), z1, 2), -+ z0 = svmin_x (svptrue_b16 (), z1, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_f32.c -new file mode 100644 -index 000000000..a3b1cf5c5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_f32.c -@@ -0,0 +1,425 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** min_f32_m_tied1: -+** fmin z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (min_f32_m_tied1, svfloat32_t, -+ z0 = svmin_f32_m (p0, z0, z1), -+ z0 = svmin_m (p0, z0, z1)) -+ -+/* -+** min_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fmin z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (min_f32_m_tied2, svfloat32_t, -+ z0 = svmin_f32_m (p0, z1, z0), -+ z0 = svmin_m (p0, z1, z0)) -+ -+/* -+** min_f32_m_untied: -+** movprfx z0, z1 -+** fmin z0\.s, p0/m, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (min_f32_m_untied, svfloat32_t, -+ z0 = svmin_f32_m (p0, z1, z2), -+ z0 = svmin_m (p0, z1, z2)) -+ -+/* -+** min_s4_f32_m_tied1: -+** mov (z[0-9]+\.s), s4 -+** fmin z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (min_s4_f32_m_tied1, svfloat32_t, float, -+ z0 = svmin_n_f32_m (p0, z0, d4), -+ z0 = svmin_m (p0, z0, d4)) -+ -+/* -+** min_s4_f32_m_untied: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0, z1 -+** fmin z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (min_s4_f32_m_untied, svfloat32_t, float, -+ z0 = svmin_n_f32_m (p0, z1, d4), -+ z0 = svmin_m (p0, z1, d4)) -+ -+/* -+** min_0_f32_m_tied1: -+** fmin z0\.s, p0/m, z0\.s, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (min_0_f32_m_tied1, svfloat32_t, -+ z0 = svmin_n_f32_m (p0, z0, 0), -+ z0 = svmin_m (p0, z0, 0)) -+ -+/* -+** min_0_f32_m_untied: -+** movprfx z0, z1 -+** fmin z0\.s, p0/m, z0\.s, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (min_0_f32_m_untied, svfloat32_t, -+ z0 = svmin_n_f32_m (p0, z1, 0), -+ z0 = svmin_m (p0, z1, 0)) -+ -+/* -+** min_1_f32_m_tied1: -+** fmin z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_f32_m_tied1, svfloat32_t, -+ z0 = svmin_n_f32_m (p0, z0, 1), -+ z0 = svmin_m (p0, z0, 1)) -+ -+/* -+** min_1_f32_m_untied: -+** movprfx z0, z1 -+** fmin z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_f32_m_untied, svfloat32_t, -+ z0 = svmin_n_f32_m (p0, z1, 1), -+ z0 = svmin_m (p0, z1, 1)) -+ -+/* -+** min_2_f32_m: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** fmin z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_2_f32_m, svfloat32_t, -+ z0 = svmin_n_f32_m (p0, z0, 2), -+ z0 = svmin_m (p0, z0, 2)) -+ -+/* -+** min_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fmin z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (min_f32_z_tied1, svfloat32_t, -+ z0 = svmin_f32_z (p0, z0, z1), -+ z0 = svmin_z (p0, z0, z1)) -+ -+/* -+** min_f32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** fmin z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (min_f32_z_tied2, svfloat32_t, -+ z0 = svmin_f32_z (p0, z1, z0), -+ z0 = svmin_z (p0, z1, z0)) -+ -+/* -+** min_f32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fmin z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** fmin z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (min_f32_z_untied, svfloat32_t, -+ z0 = svmin_f32_z (p0, z1, z2), -+ z0 = svmin_z (p0, z1, z2)) -+ -+/* -+** min_s4_f32_z_tied1: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0\.s, p0/z, z0\.s -+** fmin z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (min_s4_f32_z_tied1, svfloat32_t, float, -+ z0 = svmin_n_f32_z (p0, z0, d4), -+ z0 = svmin_z (p0, z0, d4)) -+ -+/* -+** min_s4_f32_z_untied: -+** mov (z[0-9]+\.s), s4 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fmin z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** fmin z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (min_s4_f32_z_untied, svfloat32_t, float, -+ z0 = svmin_n_f32_z (p0, z1, d4), -+ z0 = svmin_z (p0, z1, d4)) -+ -+/* -+** min_0_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fmin z0\.s, p0/m, z0\.s, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (min_0_f32_z_tied1, svfloat32_t, -+ z0 = svmin_n_f32_z (p0, z0, 0), -+ z0 = svmin_z (p0, z0, 0)) -+ -+/* -+** min_0_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** fmin z0\.s, p0/m, z0\.s, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (min_0_f32_z_untied, svfloat32_t, -+ z0 = svmin_n_f32_z (p0, z1, 0), -+ z0 = svmin_z (p0, z1, 0)) -+ -+/* -+** min_1_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fmin z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_f32_z_tied1, svfloat32_t, -+ z0 = svmin_n_f32_z (p0, z0, 1), -+ z0 = svmin_z (p0, z0, 1)) -+ -+/* -+** min_1_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** fmin z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_f32_z_untied, svfloat32_t, -+ z0 = svmin_n_f32_z (p0, z1, 1), -+ z0 = svmin_z (p0, z1, 1)) -+ -+/* -+** min_2_f32_z: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** movprfx z0\.s, p0/z, z0\.s -+** fmin z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_2_f32_z, svfloat32_t, -+ z0 = svmin_n_f32_z (p0, z0, 2), -+ z0 = svmin_z (p0, z0, 2)) -+ -+/* -+** min_f32_x_tied1: -+** fmin z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (min_f32_x_tied1, svfloat32_t, -+ z0 = svmin_f32_x (p0, z0, z1), -+ z0 = svmin_x (p0, z0, z1)) -+ -+/* -+** min_f32_x_tied2: -+** fmin z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (min_f32_x_tied2, svfloat32_t, -+ z0 = svmin_f32_x (p0, z1, z0), -+ z0 = svmin_x (p0, z1, z0)) -+ -+/* -+** min_f32_x_untied: -+** ( -+** movprfx z0, z1 -+** fmin z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0, z2 -+** fmin z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (min_f32_x_untied, svfloat32_t, -+ z0 = svmin_f32_x (p0, z1, z2), -+ z0 = svmin_x (p0, z1, z2)) -+ -+/* -+** min_s4_f32_x_tied1: -+** mov (z[0-9]+\.s), s4 -+** fmin z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (min_s4_f32_x_tied1, svfloat32_t, float, -+ z0 = svmin_n_f32_x (p0, z0, d4), -+ z0 = svmin_x (p0, z0, d4)) -+ -+/* -+** min_s4_f32_x_untied: -+** mov z0\.s, s4 -+** fmin z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZD (min_s4_f32_x_untied, svfloat32_t, float, -+ z0 = svmin_n_f32_x (p0, z1, d4), -+ z0 = svmin_x (p0, z1, d4)) -+ -+/* -+** min_0_f32_x_tied1: -+** fmin z0\.s, p0/m, z0\.s, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (min_0_f32_x_tied1, svfloat32_t, -+ z0 = svmin_n_f32_x (p0, z0, 0), -+ z0 = svmin_x (p0, z0, 0)) -+ -+/* -+** min_0_f32_x_untied: -+** movprfx z0, z1 -+** fmin z0\.s, p0/m, z0\.s, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (min_0_f32_x_untied, svfloat32_t, -+ z0 = svmin_n_f32_x (p0, z1, 0), -+ z0 = svmin_x (p0, z1, 0)) -+ -+/* -+** min_1_f32_x_tied1: -+** fmin z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_f32_x_tied1, svfloat32_t, -+ z0 = svmin_n_f32_x (p0, z0, 1), -+ z0 = svmin_x (p0, z0, 1)) -+ -+/* -+** min_1_f32_x_untied: -+** movprfx z0, z1 -+** fmin z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_f32_x_untied, svfloat32_t, -+ z0 = svmin_n_f32_x (p0, z1, 1), -+ z0 = svmin_x (p0, z1, 1)) -+ -+/* -+** min_2_f32_x_tied1: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** fmin z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_2_f32_x_tied1, svfloat32_t, -+ z0 = svmin_n_f32_x (p0, z0, 2), -+ z0 = svmin_x (p0, z0, 2)) -+ -+/* -+** min_2_f32_x_untied: -+** fmov z0\.s, #2\.0(?:e\+0)? -+** fmin z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (min_2_f32_x_untied, svfloat32_t, -+ z0 = svmin_n_f32_x (p0, z1, 2), -+ z0 = svmin_x (p0, z1, 2)) -+ -+/* -+** ptrue_min_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_min_f32_x_tied1, svfloat32_t, -+ z0 = svmin_f32_x (svptrue_b32 (), z0, z1), -+ z0 = svmin_x (svptrue_b32 (), z0, z1)) -+ -+/* -+** ptrue_min_f32_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_min_f32_x_tied2, svfloat32_t, -+ z0 = svmin_f32_x (svptrue_b32 (), z1, z0), -+ z0 = svmin_x (svptrue_b32 (), z1, z0)) -+ -+/* -+** ptrue_min_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_min_f32_x_untied, svfloat32_t, -+ z0 = svmin_f32_x (svptrue_b32 (), z1, z2), -+ z0 = svmin_x (svptrue_b32 (), z1, z2)) -+ -+/* -+** ptrue_min_0_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_min_0_f32_x_tied1, svfloat32_t, -+ z0 = svmin_n_f32_x (svptrue_b32 (), z0, 0), -+ z0 = svmin_x (svptrue_b32 (), z0, 0)) -+ -+/* -+** ptrue_min_0_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_min_0_f32_x_untied, svfloat32_t, -+ z0 = svmin_n_f32_x (svptrue_b32 (), z1, 0), -+ z0 = svmin_x (svptrue_b32 (), z1, 0)) -+ -+/* -+** ptrue_min_1_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_min_1_f32_x_tied1, svfloat32_t, -+ z0 = svmin_n_f32_x (svptrue_b32 (), z0, 1), -+ z0 = svmin_x (svptrue_b32 (), z0, 1)) -+ -+/* -+** ptrue_min_1_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_min_1_f32_x_untied, svfloat32_t, -+ z0 = svmin_n_f32_x (svptrue_b32 (), z1, 1), -+ z0 = svmin_x (svptrue_b32 (), z1, 1)) -+ -+/* -+** ptrue_min_2_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_min_2_f32_x_tied1, svfloat32_t, -+ z0 = svmin_n_f32_x (svptrue_b32 (), z0, 2), -+ z0 = svmin_x (svptrue_b32 (), z0, 2)) -+ -+/* -+** ptrue_min_2_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_min_2_f32_x_untied, svfloat32_t, -+ z0 = svmin_n_f32_x (svptrue_b32 (), z1, 2), -+ z0 = svmin_x (svptrue_b32 (), z1, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_f64.c -new file mode 100644 -index 000000000..bb31102e2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_f64.c -@@ -0,0 +1,425 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** min_f64_m_tied1: -+** fmin z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (min_f64_m_tied1, svfloat64_t, -+ z0 = svmin_f64_m (p0, z0, z1), -+ z0 = svmin_m (p0, z0, z1)) -+ -+/* -+** min_f64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fmin z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_f64_m_tied2, svfloat64_t, -+ z0 = svmin_f64_m (p0, z1, z0), -+ z0 = svmin_m (p0, z1, z0)) -+ -+/* -+** min_f64_m_untied: -+** movprfx z0, z1 -+** fmin z0\.d, p0/m, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (min_f64_m_untied, svfloat64_t, -+ z0 = svmin_f64_m (p0, z1, z2), -+ z0 = svmin_m (p0, z1, z2)) -+ -+/* -+** min_d4_f64_m_tied1: -+** mov (z[0-9]+\.d), d4 -+** fmin z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (min_d4_f64_m_tied1, svfloat64_t, double, -+ z0 = svmin_n_f64_m (p0, z0, d4), -+ z0 = svmin_m (p0, z0, d4)) -+ -+/* -+** min_d4_f64_m_untied: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0, z1 -+** fmin z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (min_d4_f64_m_untied, svfloat64_t, double, -+ z0 = svmin_n_f64_m (p0, z1, d4), -+ z0 = svmin_m (p0, z1, d4)) -+ -+/* -+** min_0_f64_m_tied1: -+** fmin z0\.d, p0/m, z0\.d, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (min_0_f64_m_tied1, svfloat64_t, -+ z0 = svmin_n_f64_m (p0, z0, 0), -+ z0 = svmin_m (p0, z0, 0)) -+ -+/* -+** min_0_f64_m_untied: -+** movprfx z0, z1 -+** fmin z0\.d, p0/m, z0\.d, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (min_0_f64_m_untied, svfloat64_t, -+ z0 = svmin_n_f64_m (p0, z1, 0), -+ z0 = svmin_m (p0, z1, 0)) -+ -+/* -+** min_1_f64_m_tied1: -+** fmin z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_f64_m_tied1, svfloat64_t, -+ z0 = svmin_n_f64_m (p0, z0, 1), -+ z0 = svmin_m (p0, z0, 1)) -+ -+/* -+** min_1_f64_m_untied: -+** movprfx z0, z1 -+** fmin z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_f64_m_untied, svfloat64_t, -+ z0 = svmin_n_f64_m (p0, z1, 1), -+ z0 = svmin_m (p0, z1, 1)) -+ -+/* -+** min_2_f64_m: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** fmin z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_2_f64_m, svfloat64_t, -+ z0 = svmin_n_f64_m (p0, z0, 2), -+ z0 = svmin_m (p0, z0, 2)) -+ -+/* -+** min_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fmin z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (min_f64_z_tied1, svfloat64_t, -+ z0 = svmin_f64_z (p0, z0, z1), -+ z0 = svmin_z (p0, z0, z1)) -+ -+/* -+** min_f64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** fmin z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (min_f64_z_tied2, svfloat64_t, -+ z0 = svmin_f64_z (p0, z1, z0), -+ z0 = svmin_z (p0, z1, z0)) -+ -+/* -+** min_f64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fmin z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** fmin z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (min_f64_z_untied, svfloat64_t, -+ z0 = svmin_f64_z (p0, z1, z2), -+ z0 = svmin_z (p0, z1, z2)) -+ -+/* -+** min_d4_f64_z_tied1: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0\.d, p0/z, z0\.d -+** fmin z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (min_d4_f64_z_tied1, svfloat64_t, double, -+ z0 = svmin_n_f64_z (p0, z0, d4), -+ z0 = svmin_z (p0, z0, d4)) -+ -+/* -+** min_d4_f64_z_untied: -+** mov (z[0-9]+\.d), d4 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fmin z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** fmin z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (min_d4_f64_z_untied, svfloat64_t, double, -+ z0 = svmin_n_f64_z (p0, z1, d4), -+ z0 = svmin_z (p0, z1, d4)) -+ -+/* -+** min_0_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fmin z0\.d, p0/m, z0\.d, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (min_0_f64_z_tied1, svfloat64_t, -+ z0 = svmin_n_f64_z (p0, z0, 0), -+ z0 = svmin_z (p0, z0, 0)) -+ -+/* -+** min_0_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** fmin z0\.d, p0/m, z0\.d, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (min_0_f64_z_untied, svfloat64_t, -+ z0 = svmin_n_f64_z (p0, z1, 0), -+ z0 = svmin_z (p0, z1, 0)) -+ -+/* -+** min_1_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fmin z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_f64_z_tied1, svfloat64_t, -+ z0 = svmin_n_f64_z (p0, z0, 1), -+ z0 = svmin_z (p0, z0, 1)) -+ -+/* -+** min_1_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** fmin z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_f64_z_untied, svfloat64_t, -+ z0 = svmin_n_f64_z (p0, z1, 1), -+ z0 = svmin_z (p0, z1, 1)) -+ -+/* -+** min_2_f64_z: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** movprfx z0\.d, p0/z, z0\.d -+** fmin z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_2_f64_z, svfloat64_t, -+ z0 = svmin_n_f64_z (p0, z0, 2), -+ z0 = svmin_z (p0, z0, 2)) -+ -+/* -+** min_f64_x_tied1: -+** fmin z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (min_f64_x_tied1, svfloat64_t, -+ z0 = svmin_f64_x (p0, z0, z1), -+ z0 = svmin_x (p0, z0, z1)) -+ -+/* -+** min_f64_x_tied2: -+** fmin z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (min_f64_x_tied2, svfloat64_t, -+ z0 = svmin_f64_x (p0, z1, z0), -+ z0 = svmin_x (p0, z1, z0)) -+ -+/* -+** min_f64_x_untied: -+** ( -+** movprfx z0, z1 -+** fmin z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0, z2 -+** fmin z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (min_f64_x_untied, svfloat64_t, -+ z0 = svmin_f64_x (p0, z1, z2), -+ z0 = svmin_x (p0, z1, z2)) -+ -+/* -+** min_d4_f64_x_tied1: -+** mov (z[0-9]+\.d), d4 -+** fmin z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (min_d4_f64_x_tied1, svfloat64_t, double, -+ z0 = svmin_n_f64_x (p0, z0, d4), -+ z0 = svmin_x (p0, z0, d4)) -+ -+/* -+** min_d4_f64_x_untied: -+** mov z0\.d, d4 -+** fmin z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZD (min_d4_f64_x_untied, svfloat64_t, double, -+ z0 = svmin_n_f64_x (p0, z1, d4), -+ z0 = svmin_x (p0, z1, d4)) -+ -+/* -+** min_0_f64_x_tied1: -+** fmin z0\.d, p0/m, z0\.d, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (min_0_f64_x_tied1, svfloat64_t, -+ z0 = svmin_n_f64_x (p0, z0, 0), -+ z0 = svmin_x (p0, z0, 0)) -+ -+/* -+** min_0_f64_x_untied: -+** movprfx z0, z1 -+** fmin z0\.d, p0/m, z0\.d, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (min_0_f64_x_untied, svfloat64_t, -+ z0 = svmin_n_f64_x (p0, z1, 0), -+ z0 = svmin_x (p0, z1, 0)) -+ -+/* -+** min_1_f64_x_tied1: -+** fmin z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_f64_x_tied1, svfloat64_t, -+ z0 = svmin_n_f64_x (p0, z0, 1), -+ z0 = svmin_x (p0, z0, 1)) -+ -+/* -+** min_1_f64_x_untied: -+** movprfx z0, z1 -+** fmin z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_f64_x_untied, svfloat64_t, -+ z0 = svmin_n_f64_x (p0, z1, 1), -+ z0 = svmin_x (p0, z1, 1)) -+ -+/* -+** min_2_f64_x_tied1: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** fmin z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_2_f64_x_tied1, svfloat64_t, -+ z0 = svmin_n_f64_x (p0, z0, 2), -+ z0 = svmin_x (p0, z0, 2)) -+ -+/* -+** min_2_f64_x_untied: -+** fmov z0\.d, #2\.0(?:e\+0)? -+** fmin z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (min_2_f64_x_untied, svfloat64_t, -+ z0 = svmin_n_f64_x (p0, z1, 2), -+ z0 = svmin_x (p0, z1, 2)) -+ -+/* -+** ptrue_min_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_min_f64_x_tied1, svfloat64_t, -+ z0 = svmin_f64_x (svptrue_b64 (), z0, z1), -+ z0 = svmin_x (svptrue_b64 (), z0, z1)) -+ -+/* -+** ptrue_min_f64_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_min_f64_x_tied2, svfloat64_t, -+ z0 = svmin_f64_x (svptrue_b64 (), z1, z0), -+ z0 = svmin_x (svptrue_b64 (), z1, z0)) -+ -+/* -+** ptrue_min_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_min_f64_x_untied, svfloat64_t, -+ z0 = svmin_f64_x (svptrue_b64 (), z1, z2), -+ z0 = svmin_x (svptrue_b64 (), z1, z2)) -+ -+/* -+** ptrue_min_0_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_min_0_f64_x_tied1, svfloat64_t, -+ z0 = svmin_n_f64_x (svptrue_b64 (), z0, 0), -+ z0 = svmin_x (svptrue_b64 (), z0, 0)) -+ -+/* -+** ptrue_min_0_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_min_0_f64_x_untied, svfloat64_t, -+ z0 = svmin_n_f64_x (svptrue_b64 (), z1, 0), -+ z0 = svmin_x (svptrue_b64 (), z1, 0)) -+ -+/* -+** ptrue_min_1_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_min_1_f64_x_tied1, svfloat64_t, -+ z0 = svmin_n_f64_x (svptrue_b64 (), z0, 1), -+ z0 = svmin_x (svptrue_b64 (), z0, 1)) -+ -+/* -+** ptrue_min_1_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_min_1_f64_x_untied, svfloat64_t, -+ z0 = svmin_n_f64_x (svptrue_b64 (), z1, 1), -+ z0 = svmin_x (svptrue_b64 (), z1, 1)) -+ -+/* -+** ptrue_min_2_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_min_2_f64_x_tied1, svfloat64_t, -+ z0 = svmin_n_f64_x (svptrue_b64 (), z0, 2), -+ z0 = svmin_x (svptrue_b64 (), z0, 2)) -+ -+/* -+** ptrue_min_2_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_min_2_f64_x_untied, svfloat64_t, -+ z0 = svmin_n_f64_x (svptrue_b64 (), z1, 2), -+ z0 = svmin_x (svptrue_b64 (), z1, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_s16.c -new file mode 100644 -index 000000000..14dfcc4c3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_s16.c -@@ -0,0 +1,293 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** min_s16_m_tied1: -+** smin z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (min_s16_m_tied1, svint16_t, -+ z0 = svmin_s16_m (p0, z0, z1), -+ z0 = svmin_m (p0, z0, z1)) -+ -+/* -+** min_s16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** smin z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (min_s16_m_tied2, svint16_t, -+ z0 = svmin_s16_m (p0, z1, z0), -+ z0 = svmin_m (p0, z1, z0)) -+ -+/* -+** min_s16_m_untied: -+** movprfx z0, z1 -+** smin z0\.h, p0/m, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (min_s16_m_untied, svint16_t, -+ z0 = svmin_s16_m (p0, z1, z2), -+ z0 = svmin_m (p0, z1, z2)) -+ -+/* -+** min_w0_s16_m_tied1: -+** mov (z[0-9]+\.h), w0 -+** smin z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (min_w0_s16_m_tied1, svint16_t, int16_t, -+ z0 = svmin_n_s16_m (p0, z0, x0), -+ z0 = svmin_m (p0, z0, x0)) -+ -+/* -+** min_w0_s16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), w0 -+** movprfx z0, z1 -+** smin z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (min_w0_s16_m_untied, svint16_t, int16_t, -+ z0 = svmin_n_s16_m (p0, z1, x0), -+ z0 = svmin_m (p0, z1, x0)) -+ -+/* -+** min_1_s16_m_tied1: -+** mov (z[0-9]+\.h), #1 -+** smin z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_s16_m_tied1, svint16_t, -+ z0 = svmin_n_s16_m (p0, z0, 1), -+ z0 = svmin_m (p0, z0, 1)) -+ -+/* -+** min_1_s16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), #1 -+** movprfx z0, z1 -+** smin z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_s16_m_untied, svint16_t, -+ z0 = svmin_n_s16_m (p0, z1, 1), -+ z0 = svmin_m (p0, z1, 1)) -+ -+/* -+** min_m1_s16_m: -+** mov (z[0-9]+)\.b, #-1 -+** smin z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (min_m1_s16_m, svint16_t, -+ z0 = svmin_n_s16_m (p0, z0, -1), -+ z0 = svmin_m (p0, z0, -1)) -+ -+/* -+** min_s16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** smin z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (min_s16_z_tied1, svint16_t, -+ z0 = svmin_s16_z (p0, z0, z1), -+ z0 = svmin_z (p0, z0, z1)) -+ -+/* -+** min_s16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** smin z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (min_s16_z_tied2, svint16_t, -+ z0 = svmin_s16_z (p0, z1, z0), -+ z0 = svmin_z (p0, z1, z0)) -+ -+/* -+** min_s16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** smin z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** smin z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (min_s16_z_untied, svint16_t, -+ z0 = svmin_s16_z (p0, z1, z2), -+ z0 = svmin_z (p0, z1, z2)) -+ -+/* -+** min_w0_s16_z_tied1: -+** mov (z[0-9]+\.h), w0 -+** movprfx z0\.h, p0/z, z0\.h -+** smin z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (min_w0_s16_z_tied1, svint16_t, int16_t, -+ z0 = svmin_n_s16_z (p0, z0, x0), -+ z0 = svmin_z (p0, z0, x0)) -+ -+/* -+** min_w0_s16_z_untied: -+** mov (z[0-9]+\.h), w0 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** smin z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** smin z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (min_w0_s16_z_untied, svint16_t, int16_t, -+ z0 = svmin_n_s16_z (p0, z1, x0), -+ z0 = svmin_z (p0, z1, x0)) -+ -+/* -+** min_1_s16_z_tied1: -+** mov (z[0-9]+\.h), #1 -+** movprfx z0\.h, p0/z, z0\.h -+** smin z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_s16_z_tied1, svint16_t, -+ z0 = svmin_n_s16_z (p0, z0, 1), -+ z0 = svmin_z (p0, z0, 1)) -+ -+/* -+** min_1_s16_z_untied: -+** mov (z[0-9]+\.h), #1 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** smin z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** smin z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_s16_z_untied, svint16_t, -+ z0 = svmin_n_s16_z (p0, z1, 1), -+ z0 = svmin_z (p0, z1, 1)) -+ -+/* -+** min_s16_x_tied1: -+** smin z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (min_s16_x_tied1, svint16_t, -+ z0 = svmin_s16_x (p0, z0, z1), -+ z0 = svmin_x (p0, z0, z1)) -+ -+/* -+** min_s16_x_tied2: -+** smin z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (min_s16_x_tied2, svint16_t, -+ z0 = svmin_s16_x (p0, z1, z0), -+ z0 = svmin_x (p0, z1, z0)) -+ -+/* -+** min_s16_x_untied: -+** ( -+** movprfx z0, z1 -+** smin z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0, z2 -+** smin z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (min_s16_x_untied, svint16_t, -+ z0 = svmin_s16_x (p0, z1, z2), -+ z0 = svmin_x (p0, z1, z2)) -+ -+/* -+** min_w0_s16_x_tied1: -+** mov (z[0-9]+\.h), w0 -+** smin z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (min_w0_s16_x_tied1, svint16_t, int16_t, -+ z0 = svmin_n_s16_x (p0, z0, x0), -+ z0 = svmin_x (p0, z0, x0)) -+ -+/* -+** min_w0_s16_x_untied: -+** mov z0\.h, w0 -+** smin z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZX (min_w0_s16_x_untied, svint16_t, int16_t, -+ z0 = svmin_n_s16_x (p0, z1, x0), -+ z0 = svmin_x (p0, z1, x0)) -+ -+/* -+** min_1_s16_x_tied1: -+** smin z0\.h, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_s16_x_tied1, svint16_t, -+ z0 = svmin_n_s16_x (p0, z0, 1), -+ z0 = svmin_x (p0, z0, 1)) -+ -+/* -+** min_1_s16_x_untied: -+** movprfx z0, z1 -+** smin z0\.h, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_s16_x_untied, svint16_t, -+ z0 = svmin_n_s16_x (p0, z1, 1), -+ z0 = svmin_x (p0, z1, 1)) -+ -+/* -+** min_127_s16_x: -+** smin z0\.h, z0\.h, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (min_127_s16_x, svint16_t, -+ z0 = svmin_n_s16_x (p0, z0, 127), -+ z0 = svmin_x (p0, z0, 127)) -+ -+/* -+** min_128_s16_x: -+** mov (z[0-9]+\.h), #128 -+** smin z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_128_s16_x, svint16_t, -+ z0 = svmin_n_s16_x (p0, z0, 128), -+ z0 = svmin_x (p0, z0, 128)) -+ -+/* -+** min_m1_s16_x: -+** smin z0\.h, z0\.h, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_m1_s16_x, svint16_t, -+ z0 = svmin_n_s16_x (p0, z0, -1), -+ z0 = svmin_x (p0, z0, -1)) -+ -+/* -+** min_m128_s16_x: -+** smin z0\.h, z0\.h, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (min_m128_s16_x, svint16_t, -+ z0 = svmin_n_s16_x (p0, z0, -128), -+ z0 = svmin_x (p0, z0, -128)) -+ -+/* -+** min_m129_s16_x: -+** mov (z[0-9]+\.h), #-129 -+** smin z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_m129_s16_x, svint16_t, -+ z0 = svmin_n_s16_x (p0, z0, -129), -+ z0 = svmin_x (p0, z0, -129)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_s32.c -new file mode 100644 -index 000000000..cee2b649d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_s32.c -@@ -0,0 +1,293 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** min_s32_m_tied1: -+** smin z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (min_s32_m_tied1, svint32_t, -+ z0 = svmin_s32_m (p0, z0, z1), -+ z0 = svmin_m (p0, z0, z1)) -+ -+/* -+** min_s32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** smin z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (min_s32_m_tied2, svint32_t, -+ z0 = svmin_s32_m (p0, z1, z0), -+ z0 = svmin_m (p0, z1, z0)) -+ -+/* -+** min_s32_m_untied: -+** movprfx z0, z1 -+** smin z0\.s, p0/m, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (min_s32_m_untied, svint32_t, -+ z0 = svmin_s32_m (p0, z1, z2), -+ z0 = svmin_m (p0, z1, z2)) -+ -+/* -+** min_w0_s32_m_tied1: -+** mov (z[0-9]+\.s), w0 -+** smin z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (min_w0_s32_m_tied1, svint32_t, int32_t, -+ z0 = svmin_n_s32_m (p0, z0, x0), -+ z0 = svmin_m (p0, z0, x0)) -+ -+/* -+** min_w0_s32_m_untied: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0, z1 -+** smin z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (min_w0_s32_m_untied, svint32_t, int32_t, -+ z0 = svmin_n_s32_m (p0, z1, x0), -+ z0 = svmin_m (p0, z1, x0)) -+ -+/* -+** min_1_s32_m_tied1: -+** mov (z[0-9]+\.s), #1 -+** smin z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_s32_m_tied1, svint32_t, -+ z0 = svmin_n_s32_m (p0, z0, 1), -+ z0 = svmin_m (p0, z0, 1)) -+ -+/* -+** min_1_s32_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.s), #1 -+** movprfx z0, z1 -+** smin z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_s32_m_untied, svint32_t, -+ z0 = svmin_n_s32_m (p0, z1, 1), -+ z0 = svmin_m (p0, z1, 1)) -+ -+/* -+** min_m1_s32_m: -+** mov (z[0-9]+)\.b, #-1 -+** smin z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (min_m1_s32_m, svint32_t, -+ z0 = svmin_n_s32_m (p0, z0, -1), -+ z0 = svmin_m (p0, z0, -1)) -+ -+/* -+** min_s32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** smin z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (min_s32_z_tied1, svint32_t, -+ z0 = svmin_s32_z (p0, z0, z1), -+ z0 = svmin_z (p0, z0, z1)) -+ -+/* -+** min_s32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** smin z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (min_s32_z_tied2, svint32_t, -+ z0 = svmin_s32_z (p0, z1, z0), -+ z0 = svmin_z (p0, z1, z0)) -+ -+/* -+** min_s32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** smin z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** smin z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (min_s32_z_untied, svint32_t, -+ z0 = svmin_s32_z (p0, z1, z2), -+ z0 = svmin_z (p0, z1, z2)) -+ -+/* -+** min_w0_s32_z_tied1: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** smin z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (min_w0_s32_z_tied1, svint32_t, int32_t, -+ z0 = svmin_n_s32_z (p0, z0, x0), -+ z0 = svmin_z (p0, z0, x0)) -+ -+/* -+** min_w0_s32_z_untied: -+** mov (z[0-9]+\.s), w0 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** smin z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** smin z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (min_w0_s32_z_untied, svint32_t, int32_t, -+ z0 = svmin_n_s32_z (p0, z1, x0), -+ z0 = svmin_z (p0, z1, x0)) -+ -+/* -+** min_1_s32_z_tied1: -+** mov (z[0-9]+\.s), #1 -+** movprfx z0\.s, p0/z, z0\.s -+** smin z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_s32_z_tied1, svint32_t, -+ z0 = svmin_n_s32_z (p0, z0, 1), -+ z0 = svmin_z (p0, z0, 1)) -+ -+/* -+** min_1_s32_z_untied: -+** mov (z[0-9]+\.s), #1 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** smin z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** smin z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_s32_z_untied, svint32_t, -+ z0 = svmin_n_s32_z (p0, z1, 1), -+ z0 = svmin_z (p0, z1, 1)) -+ -+/* -+** min_s32_x_tied1: -+** smin z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (min_s32_x_tied1, svint32_t, -+ z0 = svmin_s32_x (p0, z0, z1), -+ z0 = svmin_x (p0, z0, z1)) -+ -+/* -+** min_s32_x_tied2: -+** smin z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (min_s32_x_tied2, svint32_t, -+ z0 = svmin_s32_x (p0, z1, z0), -+ z0 = svmin_x (p0, z1, z0)) -+ -+/* -+** min_s32_x_untied: -+** ( -+** movprfx z0, z1 -+** smin z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0, z2 -+** smin z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (min_s32_x_untied, svint32_t, -+ z0 = svmin_s32_x (p0, z1, z2), -+ z0 = svmin_x (p0, z1, z2)) -+ -+/* -+** min_w0_s32_x_tied1: -+** mov (z[0-9]+\.s), w0 -+** smin z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (min_w0_s32_x_tied1, svint32_t, int32_t, -+ z0 = svmin_n_s32_x (p0, z0, x0), -+ z0 = svmin_x (p0, z0, x0)) -+ -+/* -+** min_w0_s32_x_untied: -+** mov z0\.s, w0 -+** smin z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZX (min_w0_s32_x_untied, svint32_t, int32_t, -+ z0 = svmin_n_s32_x (p0, z1, x0), -+ z0 = svmin_x (p0, z1, x0)) -+ -+/* -+** min_1_s32_x_tied1: -+** smin z0\.s, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_s32_x_tied1, svint32_t, -+ z0 = svmin_n_s32_x (p0, z0, 1), -+ z0 = svmin_x (p0, z0, 1)) -+ -+/* -+** min_1_s32_x_untied: -+** movprfx z0, z1 -+** smin z0\.s, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_s32_x_untied, svint32_t, -+ z0 = svmin_n_s32_x (p0, z1, 1), -+ z0 = svmin_x (p0, z1, 1)) -+ -+/* -+** min_127_s32_x: -+** smin z0\.s, z0\.s, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (min_127_s32_x, svint32_t, -+ z0 = svmin_n_s32_x (p0, z0, 127), -+ z0 = svmin_x (p0, z0, 127)) -+ -+/* -+** min_128_s32_x: -+** mov (z[0-9]+\.s), #128 -+** smin z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_128_s32_x, svint32_t, -+ z0 = svmin_n_s32_x (p0, z0, 128), -+ z0 = svmin_x (p0, z0, 128)) -+ -+/* -+** min_m1_s32_x: -+** smin z0\.s, z0\.s, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_m1_s32_x, svint32_t, -+ z0 = svmin_n_s32_x (p0, z0, -1), -+ z0 = svmin_x (p0, z0, -1)) -+ -+/* -+** min_m128_s32_x: -+** smin z0\.s, z0\.s, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (min_m128_s32_x, svint32_t, -+ z0 = svmin_n_s32_x (p0, z0, -128), -+ z0 = svmin_x (p0, z0, -128)) -+ -+/* -+** min_m129_s32_x: -+** mov (z[0-9]+\.s), #-129 -+** smin z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_m129_s32_x, svint32_t, -+ z0 = svmin_n_s32_x (p0, z0, -129), -+ z0 = svmin_x (p0, z0, -129)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_s64.c -new file mode 100644 -index 000000000..0d20bd0b2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_s64.c -@@ -0,0 +1,293 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** min_s64_m_tied1: -+** smin z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (min_s64_m_tied1, svint64_t, -+ z0 = svmin_s64_m (p0, z0, z1), -+ z0 = svmin_m (p0, z0, z1)) -+ -+/* -+** min_s64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** smin z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_s64_m_tied2, svint64_t, -+ z0 = svmin_s64_m (p0, z1, z0), -+ z0 = svmin_m (p0, z1, z0)) -+ -+/* -+** min_s64_m_untied: -+** movprfx z0, z1 -+** smin z0\.d, p0/m, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (min_s64_m_untied, svint64_t, -+ z0 = svmin_s64_m (p0, z1, z2), -+ z0 = svmin_m (p0, z1, z2)) -+ -+/* -+** min_x0_s64_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** smin z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (min_x0_s64_m_tied1, svint64_t, int64_t, -+ z0 = svmin_n_s64_m (p0, z0, x0), -+ z0 = svmin_m (p0, z0, x0)) -+ -+/* -+** min_x0_s64_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** smin z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (min_x0_s64_m_untied, svint64_t, int64_t, -+ z0 = svmin_n_s64_m (p0, z1, x0), -+ z0 = svmin_m (p0, z1, x0)) -+ -+/* -+** min_1_s64_m_tied1: -+** mov (z[0-9]+\.d), #1 -+** smin z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_s64_m_tied1, svint64_t, -+ z0 = svmin_n_s64_m (p0, z0, 1), -+ z0 = svmin_m (p0, z0, 1)) -+ -+/* -+** min_1_s64_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #1 -+** movprfx z0, z1 -+** smin z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_s64_m_untied, svint64_t, -+ z0 = svmin_n_s64_m (p0, z1, 1), -+ z0 = svmin_m (p0, z1, 1)) -+ -+/* -+** min_m1_s64_m: -+** mov (z[0-9]+)\.b, #-1 -+** smin z0\.d, p0/m, z0\.d, \1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (min_m1_s64_m, svint64_t, -+ z0 = svmin_n_s64_m (p0, z0, -1), -+ z0 = svmin_m (p0, z0, -1)) -+ -+/* -+** min_s64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** smin z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (min_s64_z_tied1, svint64_t, -+ z0 = svmin_s64_z (p0, z0, z1), -+ z0 = svmin_z (p0, z0, z1)) -+ -+/* -+** min_s64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** smin z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (min_s64_z_tied2, svint64_t, -+ z0 = svmin_s64_z (p0, z1, z0), -+ z0 = svmin_z (p0, z1, z0)) -+ -+/* -+** min_s64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** smin z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** smin z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (min_s64_z_untied, svint64_t, -+ z0 = svmin_s64_z (p0, z1, z2), -+ z0 = svmin_z (p0, z1, z2)) -+ -+/* -+** min_x0_s64_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** smin z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (min_x0_s64_z_tied1, svint64_t, int64_t, -+ z0 = svmin_n_s64_z (p0, z0, x0), -+ z0 = svmin_z (p0, z0, x0)) -+ -+/* -+** min_x0_s64_z_untied: -+** mov (z[0-9]+\.d), x0 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** smin z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** smin z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (min_x0_s64_z_untied, svint64_t, int64_t, -+ z0 = svmin_n_s64_z (p0, z1, x0), -+ z0 = svmin_z (p0, z1, x0)) -+ -+/* -+** min_1_s64_z_tied1: -+** mov (z[0-9]+\.d), #1 -+** movprfx z0\.d, p0/z, z0\.d -+** smin z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_s64_z_tied1, svint64_t, -+ z0 = svmin_n_s64_z (p0, z0, 1), -+ z0 = svmin_z (p0, z0, 1)) -+ -+/* -+** min_1_s64_z_untied: -+** mov (z[0-9]+\.d), #1 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** smin z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** smin z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_s64_z_untied, svint64_t, -+ z0 = svmin_n_s64_z (p0, z1, 1), -+ z0 = svmin_z (p0, z1, 1)) -+ -+/* -+** min_s64_x_tied1: -+** smin z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (min_s64_x_tied1, svint64_t, -+ z0 = svmin_s64_x (p0, z0, z1), -+ z0 = svmin_x (p0, z0, z1)) -+ -+/* -+** min_s64_x_tied2: -+** smin z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (min_s64_x_tied2, svint64_t, -+ z0 = svmin_s64_x (p0, z1, z0), -+ z0 = svmin_x (p0, z1, z0)) -+ -+/* -+** min_s64_x_untied: -+** ( -+** movprfx z0, z1 -+** smin z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0, z2 -+** smin z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (min_s64_x_untied, svint64_t, -+ z0 = svmin_s64_x (p0, z1, z2), -+ z0 = svmin_x (p0, z1, z2)) -+ -+/* -+** min_x0_s64_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** smin z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (min_x0_s64_x_tied1, svint64_t, int64_t, -+ z0 = svmin_n_s64_x (p0, z0, x0), -+ z0 = svmin_x (p0, z0, x0)) -+ -+/* -+** min_x0_s64_x_untied: -+** mov z0\.d, x0 -+** smin z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (min_x0_s64_x_untied, svint64_t, int64_t, -+ z0 = svmin_n_s64_x (p0, z1, x0), -+ z0 = svmin_x (p0, z1, x0)) -+ -+/* -+** min_1_s64_x_tied1: -+** smin z0\.d, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_s64_x_tied1, svint64_t, -+ z0 = svmin_n_s64_x (p0, z0, 1), -+ z0 = svmin_x (p0, z0, 1)) -+ -+/* -+** min_1_s64_x_untied: -+** movprfx z0, z1 -+** smin z0\.d, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_s64_x_untied, svint64_t, -+ z0 = svmin_n_s64_x (p0, z1, 1), -+ z0 = svmin_x (p0, z1, 1)) -+ -+/* -+** min_127_s64_x: -+** smin z0\.d, z0\.d, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (min_127_s64_x, svint64_t, -+ z0 = svmin_n_s64_x (p0, z0, 127), -+ z0 = svmin_x (p0, z0, 127)) -+ -+/* -+** min_128_s64_x: -+** mov (z[0-9]+\.d), #128 -+** smin z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_128_s64_x, svint64_t, -+ z0 = svmin_n_s64_x (p0, z0, 128), -+ z0 = svmin_x (p0, z0, 128)) -+ -+/* -+** min_m1_s64_x: -+** smin z0\.d, z0\.d, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_m1_s64_x, svint64_t, -+ z0 = svmin_n_s64_x (p0, z0, -1), -+ z0 = svmin_x (p0, z0, -1)) -+ -+/* -+** min_m128_s64_x: -+** smin z0\.d, z0\.d, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (min_m128_s64_x, svint64_t, -+ z0 = svmin_n_s64_x (p0, z0, -128), -+ z0 = svmin_x (p0, z0, -128)) -+ -+/* -+** min_m129_s64_x: -+** mov (z[0-9]+\.d), #-129 -+** smin z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_m129_s64_x, svint64_t, -+ z0 = svmin_n_s64_x (p0, z0, -129), -+ z0 = svmin_x (p0, z0, -129)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_s8.c -new file mode 100644 -index 000000000..714b1576d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_s8.c -@@ -0,0 +1,273 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** min_s8_m_tied1: -+** smin z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (min_s8_m_tied1, svint8_t, -+ z0 = svmin_s8_m (p0, z0, z1), -+ z0 = svmin_m (p0, z0, z1)) -+ -+/* -+** min_s8_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** smin z0\.b, p0/m, z0\.b, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (min_s8_m_tied2, svint8_t, -+ z0 = svmin_s8_m (p0, z1, z0), -+ z0 = svmin_m (p0, z1, z0)) -+ -+/* -+** min_s8_m_untied: -+** movprfx z0, z1 -+** smin z0\.b, p0/m, z0\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (min_s8_m_untied, svint8_t, -+ z0 = svmin_s8_m (p0, z1, z2), -+ z0 = svmin_m (p0, z1, z2)) -+ -+/* -+** min_w0_s8_m_tied1: -+** mov (z[0-9]+\.b), w0 -+** smin z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (min_w0_s8_m_tied1, svint8_t, int8_t, -+ z0 = svmin_n_s8_m (p0, z0, x0), -+ z0 = svmin_m (p0, z0, x0)) -+ -+/* -+** min_w0_s8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), w0 -+** movprfx z0, z1 -+** smin z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (min_w0_s8_m_untied, svint8_t, int8_t, -+ z0 = svmin_n_s8_m (p0, z1, x0), -+ z0 = svmin_m (p0, z1, x0)) -+ -+/* -+** min_1_s8_m_tied1: -+** mov (z[0-9]+\.b), #1 -+** smin z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_s8_m_tied1, svint8_t, -+ z0 = svmin_n_s8_m (p0, z0, 1), -+ z0 = svmin_m (p0, z0, 1)) -+ -+/* -+** min_1_s8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), #1 -+** movprfx z0, z1 -+** smin z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_s8_m_untied, svint8_t, -+ z0 = svmin_n_s8_m (p0, z1, 1), -+ z0 = svmin_m (p0, z1, 1)) -+ -+/* -+** min_m1_s8_m: -+** mov (z[0-9]+\.b), #-1 -+** smin z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_m1_s8_m, svint8_t, -+ z0 = svmin_n_s8_m (p0, z0, -1), -+ z0 = svmin_m (p0, z0, -1)) -+ -+/* -+** min_s8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** smin z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (min_s8_z_tied1, svint8_t, -+ z0 = svmin_s8_z (p0, z0, z1), -+ z0 = svmin_z (p0, z0, z1)) -+ -+/* -+** min_s8_z_tied2: -+** movprfx z0\.b, p0/z, z0\.b -+** smin z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (min_s8_z_tied2, svint8_t, -+ z0 = svmin_s8_z (p0, z1, z0), -+ z0 = svmin_z (p0, z1, z0)) -+ -+/* -+** min_s8_z_untied: -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** smin z0\.b, p0/m, z0\.b, z2\.b -+** | -+** movprfx z0\.b, p0/z, z2\.b -+** smin z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (min_s8_z_untied, svint8_t, -+ z0 = svmin_s8_z (p0, z1, z2), -+ z0 = svmin_z (p0, z1, z2)) -+ -+/* -+** min_w0_s8_z_tied1: -+** mov (z[0-9]+\.b), w0 -+** movprfx z0\.b, p0/z, z0\.b -+** smin z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (min_w0_s8_z_tied1, svint8_t, int8_t, -+ z0 = svmin_n_s8_z (p0, z0, x0), -+ z0 = svmin_z (p0, z0, x0)) -+ -+/* -+** min_w0_s8_z_untied: -+** mov (z[0-9]+\.b), w0 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** smin z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** smin z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (min_w0_s8_z_untied, svint8_t, int8_t, -+ z0 = svmin_n_s8_z (p0, z1, x0), -+ z0 = svmin_z (p0, z1, x0)) -+ -+/* -+** min_1_s8_z_tied1: -+** mov (z[0-9]+\.b), #1 -+** movprfx z0\.b, p0/z, z0\.b -+** smin z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_s8_z_tied1, svint8_t, -+ z0 = svmin_n_s8_z (p0, z0, 1), -+ z0 = svmin_z (p0, z0, 1)) -+ -+/* -+** min_1_s8_z_untied: -+** mov (z[0-9]+\.b), #1 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** smin z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** smin z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_s8_z_untied, svint8_t, -+ z0 = svmin_n_s8_z (p0, z1, 1), -+ z0 = svmin_z (p0, z1, 1)) -+ -+/* -+** min_s8_x_tied1: -+** smin z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (min_s8_x_tied1, svint8_t, -+ z0 = svmin_s8_x (p0, z0, z1), -+ z0 = svmin_x (p0, z0, z1)) -+ -+/* -+** min_s8_x_tied2: -+** smin z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (min_s8_x_tied2, svint8_t, -+ z0 = svmin_s8_x (p0, z1, z0), -+ z0 = svmin_x (p0, z1, z0)) -+ -+/* -+** min_s8_x_untied: -+** ( -+** movprfx z0, z1 -+** smin z0\.b, p0/m, z0\.b, z2\.b -+** | -+** movprfx z0, z2 -+** smin z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (min_s8_x_untied, svint8_t, -+ z0 = svmin_s8_x (p0, z1, z2), -+ z0 = svmin_x (p0, z1, z2)) -+ -+/* -+** min_w0_s8_x_tied1: -+** mov (z[0-9]+\.b), w0 -+** smin z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (min_w0_s8_x_tied1, svint8_t, int8_t, -+ z0 = svmin_n_s8_x (p0, z0, x0), -+ z0 = svmin_x (p0, z0, x0)) -+ -+/* -+** min_w0_s8_x_untied: -+** mov z0\.b, w0 -+** smin z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_ZX (min_w0_s8_x_untied, svint8_t, int8_t, -+ z0 = svmin_n_s8_x (p0, z1, x0), -+ z0 = svmin_x (p0, z1, x0)) -+ -+/* -+** min_1_s8_x_tied1: -+** smin z0\.b, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_s8_x_tied1, svint8_t, -+ z0 = svmin_n_s8_x (p0, z0, 1), -+ z0 = svmin_x (p0, z0, 1)) -+ -+/* -+** min_1_s8_x_untied: -+** movprfx z0, z1 -+** smin z0\.b, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_s8_x_untied, svint8_t, -+ z0 = svmin_n_s8_x (p0, z1, 1), -+ z0 = svmin_x (p0, z1, 1)) -+ -+/* -+** min_127_s8_x: -+** smin z0\.b, z0\.b, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (min_127_s8_x, svint8_t, -+ z0 = svmin_n_s8_x (p0, z0, 127), -+ z0 = svmin_x (p0, z0, 127)) -+ -+/* -+** min_m1_s8_x: -+** smin z0\.b, z0\.b, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_m1_s8_x, svint8_t, -+ z0 = svmin_n_s8_x (p0, z0, -1), -+ z0 = svmin_x (p0, z0, -1)) -+ -+/* -+** min_m127_s8_x: -+** smin z0\.b, z0\.b, #-127 -+** ret -+*/ -+TEST_UNIFORM_Z (min_m127_s8_x, svint8_t, -+ z0 = svmin_n_s8_x (p0, z0, -127), -+ z0 = svmin_x (p0, z0, -127)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_u16.c -new file mode 100644 -index 000000000..df35cf113 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_u16.c -@@ -0,0 +1,293 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** min_u16_m_tied1: -+** umin z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (min_u16_m_tied1, svuint16_t, -+ z0 = svmin_u16_m (p0, z0, z1), -+ z0 = svmin_m (p0, z0, z1)) -+ -+/* -+** min_u16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** umin z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (min_u16_m_tied2, svuint16_t, -+ z0 = svmin_u16_m (p0, z1, z0), -+ z0 = svmin_m (p0, z1, z0)) -+ -+/* -+** min_u16_m_untied: -+** movprfx z0, z1 -+** umin z0\.h, p0/m, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (min_u16_m_untied, svuint16_t, -+ z0 = svmin_u16_m (p0, z1, z2), -+ z0 = svmin_m (p0, z1, z2)) -+ -+/* -+** min_w0_u16_m_tied1: -+** mov (z[0-9]+\.h), w0 -+** umin z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (min_w0_u16_m_tied1, svuint16_t, uint16_t, -+ z0 = svmin_n_u16_m (p0, z0, x0), -+ z0 = svmin_m (p0, z0, x0)) -+ -+/* -+** min_w0_u16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), w0 -+** movprfx z0, z1 -+** umin z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (min_w0_u16_m_untied, svuint16_t, uint16_t, -+ z0 = svmin_n_u16_m (p0, z1, x0), -+ z0 = svmin_m (p0, z1, x0)) -+ -+/* -+** min_1_u16_m_tied1: -+** mov (z[0-9]+\.h), #1 -+** umin z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_u16_m_tied1, svuint16_t, -+ z0 = svmin_n_u16_m (p0, z0, 1), -+ z0 = svmin_m (p0, z0, 1)) -+ -+/* -+** min_1_u16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), #1 -+** movprfx z0, z1 -+** umin z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_u16_m_untied, svuint16_t, -+ z0 = svmin_n_u16_m (p0, z1, 1), -+ z0 = svmin_m (p0, z1, 1)) -+ -+/* -+** min_m1_u16_m: -+** mov (z[0-9]+)\.b, #-1 -+** umin z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (min_m1_u16_m, svuint16_t, -+ z0 = svmin_n_u16_m (p0, z0, -1), -+ z0 = svmin_m (p0, z0, -1)) -+ -+/* -+** min_u16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** umin z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (min_u16_z_tied1, svuint16_t, -+ z0 = svmin_u16_z (p0, z0, z1), -+ z0 = svmin_z (p0, z0, z1)) -+ -+/* -+** min_u16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** umin z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (min_u16_z_tied2, svuint16_t, -+ z0 = svmin_u16_z (p0, z1, z0), -+ z0 = svmin_z (p0, z1, z0)) -+ -+/* -+** min_u16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** umin z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** umin z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (min_u16_z_untied, svuint16_t, -+ z0 = svmin_u16_z (p0, z1, z2), -+ z0 = svmin_z (p0, z1, z2)) -+ -+/* -+** min_w0_u16_z_tied1: -+** mov (z[0-9]+\.h), w0 -+** movprfx z0\.h, p0/z, z0\.h -+** umin z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (min_w0_u16_z_tied1, svuint16_t, uint16_t, -+ z0 = svmin_n_u16_z (p0, z0, x0), -+ z0 = svmin_z (p0, z0, x0)) -+ -+/* -+** min_w0_u16_z_untied: -+** mov (z[0-9]+\.h), w0 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** umin z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** umin z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (min_w0_u16_z_untied, svuint16_t, uint16_t, -+ z0 = svmin_n_u16_z (p0, z1, x0), -+ z0 = svmin_z (p0, z1, x0)) -+ -+/* -+** min_1_u16_z_tied1: -+** mov (z[0-9]+\.h), #1 -+** movprfx z0\.h, p0/z, z0\.h -+** umin z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_u16_z_tied1, svuint16_t, -+ z0 = svmin_n_u16_z (p0, z0, 1), -+ z0 = svmin_z (p0, z0, 1)) -+ -+/* -+** min_1_u16_z_untied: -+** mov (z[0-9]+\.h), #1 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** umin z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** umin z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_u16_z_untied, svuint16_t, -+ z0 = svmin_n_u16_z (p0, z1, 1), -+ z0 = svmin_z (p0, z1, 1)) -+ -+/* -+** min_u16_x_tied1: -+** umin z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (min_u16_x_tied1, svuint16_t, -+ z0 = svmin_u16_x (p0, z0, z1), -+ z0 = svmin_x (p0, z0, z1)) -+ -+/* -+** min_u16_x_tied2: -+** umin z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (min_u16_x_tied2, svuint16_t, -+ z0 = svmin_u16_x (p0, z1, z0), -+ z0 = svmin_x (p0, z1, z0)) -+ -+/* -+** min_u16_x_untied: -+** ( -+** movprfx z0, z1 -+** umin z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0, z2 -+** umin z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (min_u16_x_untied, svuint16_t, -+ z0 = svmin_u16_x (p0, z1, z2), -+ z0 = svmin_x (p0, z1, z2)) -+ -+/* -+** min_w0_u16_x_tied1: -+** mov (z[0-9]+\.h), w0 -+** umin z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (min_w0_u16_x_tied1, svuint16_t, uint16_t, -+ z0 = svmin_n_u16_x (p0, z0, x0), -+ z0 = svmin_x (p0, z0, x0)) -+ -+/* -+** min_w0_u16_x_untied: -+** mov z0\.h, w0 -+** umin z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZX (min_w0_u16_x_untied, svuint16_t, uint16_t, -+ z0 = svmin_n_u16_x (p0, z1, x0), -+ z0 = svmin_x (p0, z1, x0)) -+ -+/* -+** min_1_u16_x_tied1: -+** umin z0\.h, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_u16_x_tied1, svuint16_t, -+ z0 = svmin_n_u16_x (p0, z0, 1), -+ z0 = svmin_x (p0, z0, 1)) -+ -+/* -+** min_1_u16_x_untied: -+** movprfx z0, z1 -+** umin z0\.h, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_u16_x_untied, svuint16_t, -+ z0 = svmin_n_u16_x (p0, z1, 1), -+ z0 = svmin_x (p0, z1, 1)) -+ -+/* -+** min_127_u16_x: -+** umin z0\.h, z0\.h, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (min_127_u16_x, svuint16_t, -+ z0 = svmin_n_u16_x (p0, z0, 127), -+ z0 = svmin_x (p0, z0, 127)) -+ -+/* -+** min_128_u16_x: -+** umin z0\.h, z0\.h, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (min_128_u16_x, svuint16_t, -+ z0 = svmin_n_u16_x (p0, z0, 128), -+ z0 = svmin_x (p0, z0, 128)) -+ -+/* -+** min_255_u16_x: -+** umin z0\.h, z0\.h, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (min_255_u16_x, svuint16_t, -+ z0 = svmin_n_u16_x (p0, z0, 255), -+ z0 = svmin_x (p0, z0, 255)) -+ -+/* -+** min_256_u16_x: -+** mov (z[0-9]+\.h), #256 -+** umin z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_256_u16_x, svuint16_t, -+ z0 = svmin_n_u16_x (p0, z0, 256), -+ z0 = svmin_x (p0, z0, 256)) -+ -+/* -+** min_m2_u16_x: -+** mov (z[0-9]+\.h), #-2 -+** umin z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_m2_u16_x, svuint16_t, -+ z0 = svmin_n_u16_x (p0, z0, -2), -+ z0 = svmin_x (p0, z0, -2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_u32.c -new file mode 100644 -index 000000000..7f84d099d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_u32.c -@@ -0,0 +1,293 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** min_u32_m_tied1: -+** umin z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (min_u32_m_tied1, svuint32_t, -+ z0 = svmin_u32_m (p0, z0, z1), -+ z0 = svmin_m (p0, z0, z1)) -+ -+/* -+** min_u32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** umin z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (min_u32_m_tied2, svuint32_t, -+ z0 = svmin_u32_m (p0, z1, z0), -+ z0 = svmin_m (p0, z1, z0)) -+ -+/* -+** min_u32_m_untied: -+** movprfx z0, z1 -+** umin z0\.s, p0/m, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (min_u32_m_untied, svuint32_t, -+ z0 = svmin_u32_m (p0, z1, z2), -+ z0 = svmin_m (p0, z1, z2)) -+ -+/* -+** min_w0_u32_m_tied1: -+** mov (z[0-9]+\.s), w0 -+** umin z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (min_w0_u32_m_tied1, svuint32_t, uint32_t, -+ z0 = svmin_n_u32_m (p0, z0, x0), -+ z0 = svmin_m (p0, z0, x0)) -+ -+/* -+** min_w0_u32_m_untied: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0, z1 -+** umin z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (min_w0_u32_m_untied, svuint32_t, uint32_t, -+ z0 = svmin_n_u32_m (p0, z1, x0), -+ z0 = svmin_m (p0, z1, x0)) -+ -+/* -+** min_1_u32_m_tied1: -+** mov (z[0-9]+\.s), #1 -+** umin z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_u32_m_tied1, svuint32_t, -+ z0 = svmin_n_u32_m (p0, z0, 1), -+ z0 = svmin_m (p0, z0, 1)) -+ -+/* -+** min_1_u32_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.s), #1 -+** movprfx z0, z1 -+** umin z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_u32_m_untied, svuint32_t, -+ z0 = svmin_n_u32_m (p0, z1, 1), -+ z0 = svmin_m (p0, z1, 1)) -+ -+/* -+** min_m1_u32_m: -+** mov (z[0-9]+)\.b, #-1 -+** umin z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (min_m1_u32_m, svuint32_t, -+ z0 = svmin_n_u32_m (p0, z0, -1), -+ z0 = svmin_m (p0, z0, -1)) -+ -+/* -+** min_u32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** umin z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (min_u32_z_tied1, svuint32_t, -+ z0 = svmin_u32_z (p0, z0, z1), -+ z0 = svmin_z (p0, z0, z1)) -+ -+/* -+** min_u32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** umin z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (min_u32_z_tied2, svuint32_t, -+ z0 = svmin_u32_z (p0, z1, z0), -+ z0 = svmin_z (p0, z1, z0)) -+ -+/* -+** min_u32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** umin z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** umin z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (min_u32_z_untied, svuint32_t, -+ z0 = svmin_u32_z (p0, z1, z2), -+ z0 = svmin_z (p0, z1, z2)) -+ -+/* -+** min_w0_u32_z_tied1: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** umin z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (min_w0_u32_z_tied1, svuint32_t, uint32_t, -+ z0 = svmin_n_u32_z (p0, z0, x0), -+ z0 = svmin_z (p0, z0, x0)) -+ -+/* -+** min_w0_u32_z_untied: -+** mov (z[0-9]+\.s), w0 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** umin z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** umin z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (min_w0_u32_z_untied, svuint32_t, uint32_t, -+ z0 = svmin_n_u32_z (p0, z1, x0), -+ z0 = svmin_z (p0, z1, x0)) -+ -+/* -+** min_1_u32_z_tied1: -+** mov (z[0-9]+\.s), #1 -+** movprfx z0\.s, p0/z, z0\.s -+** umin z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_u32_z_tied1, svuint32_t, -+ z0 = svmin_n_u32_z (p0, z0, 1), -+ z0 = svmin_z (p0, z0, 1)) -+ -+/* -+** min_1_u32_z_untied: -+** mov (z[0-9]+\.s), #1 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** umin z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** umin z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_u32_z_untied, svuint32_t, -+ z0 = svmin_n_u32_z (p0, z1, 1), -+ z0 = svmin_z (p0, z1, 1)) -+ -+/* -+** min_u32_x_tied1: -+** umin z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (min_u32_x_tied1, svuint32_t, -+ z0 = svmin_u32_x (p0, z0, z1), -+ z0 = svmin_x (p0, z0, z1)) -+ -+/* -+** min_u32_x_tied2: -+** umin z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (min_u32_x_tied2, svuint32_t, -+ z0 = svmin_u32_x (p0, z1, z0), -+ z0 = svmin_x (p0, z1, z0)) -+ -+/* -+** min_u32_x_untied: -+** ( -+** movprfx z0, z1 -+** umin z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0, z2 -+** umin z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (min_u32_x_untied, svuint32_t, -+ z0 = svmin_u32_x (p0, z1, z2), -+ z0 = svmin_x (p0, z1, z2)) -+ -+/* -+** min_w0_u32_x_tied1: -+** mov (z[0-9]+\.s), w0 -+** umin z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (min_w0_u32_x_tied1, svuint32_t, uint32_t, -+ z0 = svmin_n_u32_x (p0, z0, x0), -+ z0 = svmin_x (p0, z0, x0)) -+ -+/* -+** min_w0_u32_x_untied: -+** mov z0\.s, w0 -+** umin z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZX (min_w0_u32_x_untied, svuint32_t, uint32_t, -+ z0 = svmin_n_u32_x (p0, z1, x0), -+ z0 = svmin_x (p0, z1, x0)) -+ -+/* -+** min_1_u32_x_tied1: -+** umin z0\.s, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_u32_x_tied1, svuint32_t, -+ z0 = svmin_n_u32_x (p0, z0, 1), -+ z0 = svmin_x (p0, z0, 1)) -+ -+/* -+** min_1_u32_x_untied: -+** movprfx z0, z1 -+** umin z0\.s, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_u32_x_untied, svuint32_t, -+ z0 = svmin_n_u32_x (p0, z1, 1), -+ z0 = svmin_x (p0, z1, 1)) -+ -+/* -+** min_127_u32_x: -+** umin z0\.s, z0\.s, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (min_127_u32_x, svuint32_t, -+ z0 = svmin_n_u32_x (p0, z0, 127), -+ z0 = svmin_x (p0, z0, 127)) -+ -+/* -+** min_128_u32_x: -+** umin z0\.s, z0\.s, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (min_128_u32_x, svuint32_t, -+ z0 = svmin_n_u32_x (p0, z0, 128), -+ z0 = svmin_x (p0, z0, 128)) -+ -+/* -+** min_255_u32_x: -+** umin z0\.s, z0\.s, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (min_255_u32_x, svuint32_t, -+ z0 = svmin_n_u32_x (p0, z0, 255), -+ z0 = svmin_x (p0, z0, 255)) -+ -+/* -+** min_256_u32_x: -+** mov (z[0-9]+\.s), #256 -+** umin z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_256_u32_x, svuint32_t, -+ z0 = svmin_n_u32_x (p0, z0, 256), -+ z0 = svmin_x (p0, z0, 256)) -+ -+/* -+** min_m2_u32_x: -+** mov (z[0-9]+\.s), #-2 -+** umin z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_m2_u32_x, svuint32_t, -+ z0 = svmin_n_u32_x (p0, z0, -2), -+ z0 = svmin_x (p0, z0, -2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_u64.c -new file mode 100644 -index 000000000..06e6e5099 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_u64.c -@@ -0,0 +1,293 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** min_u64_m_tied1: -+** umin z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (min_u64_m_tied1, svuint64_t, -+ z0 = svmin_u64_m (p0, z0, z1), -+ z0 = svmin_m (p0, z0, z1)) -+ -+/* -+** min_u64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** umin z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_u64_m_tied2, svuint64_t, -+ z0 = svmin_u64_m (p0, z1, z0), -+ z0 = svmin_m (p0, z1, z0)) -+ -+/* -+** min_u64_m_untied: -+** movprfx z0, z1 -+** umin z0\.d, p0/m, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (min_u64_m_untied, svuint64_t, -+ z0 = svmin_u64_m (p0, z1, z2), -+ z0 = svmin_m (p0, z1, z2)) -+ -+/* -+** min_x0_u64_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** umin z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (min_x0_u64_m_tied1, svuint64_t, uint64_t, -+ z0 = svmin_n_u64_m (p0, z0, x0), -+ z0 = svmin_m (p0, z0, x0)) -+ -+/* -+** min_x0_u64_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** umin z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (min_x0_u64_m_untied, svuint64_t, uint64_t, -+ z0 = svmin_n_u64_m (p0, z1, x0), -+ z0 = svmin_m (p0, z1, x0)) -+ -+/* -+** min_1_u64_m_tied1: -+** mov (z[0-9]+\.d), #1 -+** umin z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_u64_m_tied1, svuint64_t, -+ z0 = svmin_n_u64_m (p0, z0, 1), -+ z0 = svmin_m (p0, z0, 1)) -+ -+/* -+** min_1_u64_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #1 -+** movprfx z0, z1 -+** umin z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_u64_m_untied, svuint64_t, -+ z0 = svmin_n_u64_m (p0, z1, 1), -+ z0 = svmin_m (p0, z1, 1)) -+ -+/* -+** min_m1_u64_m: -+** mov (z[0-9]+)\.b, #-1 -+** umin z0\.d, p0/m, z0\.d, \1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (min_m1_u64_m, svuint64_t, -+ z0 = svmin_n_u64_m (p0, z0, -1), -+ z0 = svmin_m (p0, z0, -1)) -+ -+/* -+** min_u64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** umin z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (min_u64_z_tied1, svuint64_t, -+ z0 = svmin_u64_z (p0, z0, z1), -+ z0 = svmin_z (p0, z0, z1)) -+ -+/* -+** min_u64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** umin z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (min_u64_z_tied2, svuint64_t, -+ z0 = svmin_u64_z (p0, z1, z0), -+ z0 = svmin_z (p0, z1, z0)) -+ -+/* -+** min_u64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** umin z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** umin z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (min_u64_z_untied, svuint64_t, -+ z0 = svmin_u64_z (p0, z1, z2), -+ z0 = svmin_z (p0, z1, z2)) -+ -+/* -+** min_x0_u64_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** umin z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (min_x0_u64_z_tied1, svuint64_t, uint64_t, -+ z0 = svmin_n_u64_z (p0, z0, x0), -+ z0 = svmin_z (p0, z0, x0)) -+ -+/* -+** min_x0_u64_z_untied: -+** mov (z[0-9]+\.d), x0 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** umin z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** umin z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (min_x0_u64_z_untied, svuint64_t, uint64_t, -+ z0 = svmin_n_u64_z (p0, z1, x0), -+ z0 = svmin_z (p0, z1, x0)) -+ -+/* -+** min_1_u64_z_tied1: -+** mov (z[0-9]+\.d), #1 -+** movprfx z0\.d, p0/z, z0\.d -+** umin z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_u64_z_tied1, svuint64_t, -+ z0 = svmin_n_u64_z (p0, z0, 1), -+ z0 = svmin_z (p0, z0, 1)) -+ -+/* -+** min_1_u64_z_untied: -+** mov (z[0-9]+\.d), #1 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** umin z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** umin z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_u64_z_untied, svuint64_t, -+ z0 = svmin_n_u64_z (p0, z1, 1), -+ z0 = svmin_z (p0, z1, 1)) -+ -+/* -+** min_u64_x_tied1: -+** umin z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (min_u64_x_tied1, svuint64_t, -+ z0 = svmin_u64_x (p0, z0, z1), -+ z0 = svmin_x (p0, z0, z1)) -+ -+/* -+** min_u64_x_tied2: -+** umin z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (min_u64_x_tied2, svuint64_t, -+ z0 = svmin_u64_x (p0, z1, z0), -+ z0 = svmin_x (p0, z1, z0)) -+ -+/* -+** min_u64_x_untied: -+** ( -+** movprfx z0, z1 -+** umin z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0, z2 -+** umin z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (min_u64_x_untied, svuint64_t, -+ z0 = svmin_u64_x (p0, z1, z2), -+ z0 = svmin_x (p0, z1, z2)) -+ -+/* -+** min_x0_u64_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** umin z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (min_x0_u64_x_tied1, svuint64_t, uint64_t, -+ z0 = svmin_n_u64_x (p0, z0, x0), -+ z0 = svmin_x (p0, z0, x0)) -+ -+/* -+** min_x0_u64_x_untied: -+** mov z0\.d, x0 -+** umin z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (min_x0_u64_x_untied, svuint64_t, uint64_t, -+ z0 = svmin_n_u64_x (p0, z1, x0), -+ z0 = svmin_x (p0, z1, x0)) -+ -+/* -+** min_1_u64_x_tied1: -+** umin z0\.d, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_u64_x_tied1, svuint64_t, -+ z0 = svmin_n_u64_x (p0, z0, 1), -+ z0 = svmin_x (p0, z0, 1)) -+ -+/* -+** min_1_u64_x_untied: -+** movprfx z0, z1 -+** umin z0\.d, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_u64_x_untied, svuint64_t, -+ z0 = svmin_n_u64_x (p0, z1, 1), -+ z0 = svmin_x (p0, z1, 1)) -+ -+/* -+** min_127_u64_x: -+** umin z0\.d, z0\.d, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (min_127_u64_x, svuint64_t, -+ z0 = svmin_n_u64_x (p0, z0, 127), -+ z0 = svmin_x (p0, z0, 127)) -+ -+/* -+** min_128_u64_x: -+** umin z0\.d, z0\.d, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (min_128_u64_x, svuint64_t, -+ z0 = svmin_n_u64_x (p0, z0, 128), -+ z0 = svmin_x (p0, z0, 128)) -+ -+/* -+** min_255_u64_x: -+** umin z0\.d, z0\.d, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (min_255_u64_x, svuint64_t, -+ z0 = svmin_n_u64_x (p0, z0, 255), -+ z0 = svmin_x (p0, z0, 255)) -+ -+/* -+** min_256_u64_x: -+** mov (z[0-9]+\.d), #256 -+** umin z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_256_u64_x, svuint64_t, -+ z0 = svmin_n_u64_x (p0, z0, 256), -+ z0 = svmin_x (p0, z0, 256)) -+ -+/* -+** min_m2_u64_x: -+** mov (z[0-9]+\.d), #-2 -+** umin z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_m2_u64_x, svuint64_t, -+ z0 = svmin_n_u64_x (p0, z0, -2), -+ z0 = svmin_x (p0, z0, -2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_u8.c -new file mode 100644 -index 000000000..2ca274278 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/min_u8.c -@@ -0,0 +1,273 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** min_u8_m_tied1: -+** umin z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (min_u8_m_tied1, svuint8_t, -+ z0 = svmin_u8_m (p0, z0, z1), -+ z0 = svmin_m (p0, z0, z1)) -+ -+/* -+** min_u8_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** umin z0\.b, p0/m, z0\.b, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (min_u8_m_tied2, svuint8_t, -+ z0 = svmin_u8_m (p0, z1, z0), -+ z0 = svmin_m (p0, z1, z0)) -+ -+/* -+** min_u8_m_untied: -+** movprfx z0, z1 -+** umin z0\.b, p0/m, z0\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (min_u8_m_untied, svuint8_t, -+ z0 = svmin_u8_m (p0, z1, z2), -+ z0 = svmin_m (p0, z1, z2)) -+ -+/* -+** min_w0_u8_m_tied1: -+** mov (z[0-9]+\.b), w0 -+** umin z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (min_w0_u8_m_tied1, svuint8_t, uint8_t, -+ z0 = svmin_n_u8_m (p0, z0, x0), -+ z0 = svmin_m (p0, z0, x0)) -+ -+/* -+** min_w0_u8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), w0 -+** movprfx z0, z1 -+** umin z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (min_w0_u8_m_untied, svuint8_t, uint8_t, -+ z0 = svmin_n_u8_m (p0, z1, x0), -+ z0 = svmin_m (p0, z1, x0)) -+ -+/* -+** min_1_u8_m_tied1: -+** mov (z[0-9]+\.b), #1 -+** umin z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_u8_m_tied1, svuint8_t, -+ z0 = svmin_n_u8_m (p0, z0, 1), -+ z0 = svmin_m (p0, z0, 1)) -+ -+/* -+** min_1_u8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), #1 -+** movprfx z0, z1 -+** umin z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_u8_m_untied, svuint8_t, -+ z0 = svmin_n_u8_m (p0, z1, 1), -+ z0 = svmin_m (p0, z1, 1)) -+ -+/* -+** min_m1_u8_m: -+** mov (z[0-9]+\.b), #-1 -+** umin z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_m1_u8_m, svuint8_t, -+ z0 = svmin_n_u8_m (p0, z0, -1), -+ z0 = svmin_m (p0, z0, -1)) -+ -+/* -+** min_u8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** umin z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (min_u8_z_tied1, svuint8_t, -+ z0 = svmin_u8_z (p0, z0, z1), -+ z0 = svmin_z (p0, z0, z1)) -+ -+/* -+** min_u8_z_tied2: -+** movprfx z0\.b, p0/z, z0\.b -+** umin z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (min_u8_z_tied2, svuint8_t, -+ z0 = svmin_u8_z (p0, z1, z0), -+ z0 = svmin_z (p0, z1, z0)) -+ -+/* -+** min_u8_z_untied: -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** umin z0\.b, p0/m, z0\.b, z2\.b -+** | -+** movprfx z0\.b, p0/z, z2\.b -+** umin z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (min_u8_z_untied, svuint8_t, -+ z0 = svmin_u8_z (p0, z1, z2), -+ z0 = svmin_z (p0, z1, z2)) -+ -+/* -+** min_w0_u8_z_tied1: -+** mov (z[0-9]+\.b), w0 -+** movprfx z0\.b, p0/z, z0\.b -+** umin z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (min_w0_u8_z_tied1, svuint8_t, uint8_t, -+ z0 = svmin_n_u8_z (p0, z0, x0), -+ z0 = svmin_z (p0, z0, x0)) -+ -+/* -+** min_w0_u8_z_untied: -+** mov (z[0-9]+\.b), w0 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** umin z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** umin z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (min_w0_u8_z_untied, svuint8_t, uint8_t, -+ z0 = svmin_n_u8_z (p0, z1, x0), -+ z0 = svmin_z (p0, z1, x0)) -+ -+/* -+** min_1_u8_z_tied1: -+** mov (z[0-9]+\.b), #1 -+** movprfx z0\.b, p0/z, z0\.b -+** umin z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_u8_z_tied1, svuint8_t, -+ z0 = svmin_n_u8_z (p0, z0, 1), -+ z0 = svmin_z (p0, z0, 1)) -+ -+/* -+** min_1_u8_z_untied: -+** mov (z[0-9]+\.b), #1 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** umin z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** umin z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_u8_z_untied, svuint8_t, -+ z0 = svmin_n_u8_z (p0, z1, 1), -+ z0 = svmin_z (p0, z1, 1)) -+ -+/* -+** min_u8_x_tied1: -+** umin z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (min_u8_x_tied1, svuint8_t, -+ z0 = svmin_u8_x (p0, z0, z1), -+ z0 = svmin_x (p0, z0, z1)) -+ -+/* -+** min_u8_x_tied2: -+** umin z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (min_u8_x_tied2, svuint8_t, -+ z0 = svmin_u8_x (p0, z1, z0), -+ z0 = svmin_x (p0, z1, z0)) -+ -+/* -+** min_u8_x_untied: -+** ( -+** movprfx z0, z1 -+** umin z0\.b, p0/m, z0\.b, z2\.b -+** | -+** movprfx z0, z2 -+** umin z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (min_u8_x_untied, svuint8_t, -+ z0 = svmin_u8_x (p0, z1, z2), -+ z0 = svmin_x (p0, z1, z2)) -+ -+/* -+** min_w0_u8_x_tied1: -+** mov (z[0-9]+\.b), w0 -+** umin z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (min_w0_u8_x_tied1, svuint8_t, uint8_t, -+ z0 = svmin_n_u8_x (p0, z0, x0), -+ z0 = svmin_x (p0, z0, x0)) -+ -+/* -+** min_w0_u8_x_untied: -+** mov z0\.b, w0 -+** umin z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_ZX (min_w0_u8_x_untied, svuint8_t, uint8_t, -+ z0 = svmin_n_u8_x (p0, z1, x0), -+ z0 = svmin_x (p0, z1, x0)) -+ -+/* -+** min_1_u8_x_tied1: -+** umin z0\.b, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_u8_x_tied1, svuint8_t, -+ z0 = svmin_n_u8_x (p0, z0, 1), -+ z0 = svmin_x (p0, z0, 1)) -+ -+/* -+** min_1_u8_x_untied: -+** movprfx z0, z1 -+** umin z0\.b, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (min_1_u8_x_untied, svuint8_t, -+ z0 = svmin_n_u8_x (p0, z1, 1), -+ z0 = svmin_x (p0, z1, 1)) -+ -+/* -+** min_127_u8_x: -+** umin z0\.b, z0\.b, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (min_127_u8_x, svuint8_t, -+ z0 = svmin_n_u8_x (p0, z0, 127), -+ z0 = svmin_x (p0, z0, 127)) -+ -+/* -+** min_128_u8_x: -+** umin z0\.b, z0\.b, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (min_128_u8_x, svuint8_t, -+ z0 = svmin_n_u8_x (p0, z0, 128), -+ z0 = svmin_x (p0, z0, 128)) -+ -+/* -+** min_254_u8_x: -+** umin z0\.b, z0\.b, #254 -+** ret -+*/ -+TEST_UNIFORM_Z (min_254_u8_x, svuint8_t, -+ z0 = svmin_n_u8_x (p0, z0, 254), -+ z0 = svmin_x (p0, z0, 254)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minnm_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minnm_f16.c -new file mode 100644 -index 000000000..43caaa14e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minnm_f16.c -@@ -0,0 +1,425 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** minnm_f16_m_tied1: -+** fminnm z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_f16_m_tied1, svfloat16_t, -+ z0 = svminnm_f16_m (p0, z0, z1), -+ z0 = svminnm_m (p0, z0, z1)) -+ -+/* -+** minnm_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fminnm z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_f16_m_tied2, svfloat16_t, -+ z0 = svminnm_f16_m (p0, z1, z0), -+ z0 = svminnm_m (p0, z1, z0)) -+ -+/* -+** minnm_f16_m_untied: -+** movprfx z0, z1 -+** fminnm z0\.h, p0/m, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_f16_m_untied, svfloat16_t, -+ z0 = svminnm_f16_m (p0, z1, z2), -+ z0 = svminnm_m (p0, z1, z2)) -+ -+/* -+** minnm_h4_f16_m_tied1: -+** mov (z[0-9]+\.h), h4 -+** fminnm z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (minnm_h4_f16_m_tied1, svfloat16_t, __fp16, -+ z0 = svminnm_n_f16_m (p0, z0, d4), -+ z0 = svminnm_m (p0, z0, d4)) -+ -+/* -+** minnm_h4_f16_m_untied: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0, z1 -+** fminnm z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (minnm_h4_f16_m_untied, svfloat16_t, __fp16, -+ z0 = svminnm_n_f16_m (p0, z1, d4), -+ z0 = svminnm_m (p0, z1, d4)) -+ -+/* -+** minnm_0_f16_m_tied1: -+** fminnm z0\.h, p0/m, z0\.h, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_0_f16_m_tied1, svfloat16_t, -+ z0 = svminnm_n_f16_m (p0, z0, 0), -+ z0 = svminnm_m (p0, z0, 0)) -+ -+/* -+** minnm_0_f16_m_untied: -+** movprfx z0, z1 -+** fminnm z0\.h, p0/m, z0\.h, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_0_f16_m_untied, svfloat16_t, -+ z0 = svminnm_n_f16_m (p0, z1, 0), -+ z0 = svminnm_m (p0, z1, 0)) -+ -+/* -+** minnm_1_f16_m_tied1: -+** fminnm z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_1_f16_m_tied1, svfloat16_t, -+ z0 = svminnm_n_f16_m (p0, z0, 1), -+ z0 = svminnm_m (p0, z0, 1)) -+ -+/* -+** minnm_1_f16_m_untied: -+** movprfx z0, z1 -+** fminnm z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_1_f16_m_untied, svfloat16_t, -+ z0 = svminnm_n_f16_m (p0, z1, 1), -+ z0 = svminnm_m (p0, z1, 1)) -+ -+/* -+** minnm_2_f16_m: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** fminnm z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_2_f16_m, svfloat16_t, -+ z0 = svminnm_n_f16_m (p0, z0, 2), -+ z0 = svminnm_m (p0, z0, 2)) -+ -+/* -+** minnm_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fminnm z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_f16_z_tied1, svfloat16_t, -+ z0 = svminnm_f16_z (p0, z0, z1), -+ z0 = svminnm_z (p0, z0, z1)) -+ -+/* -+** minnm_f16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** fminnm z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_f16_z_tied2, svfloat16_t, -+ z0 = svminnm_f16_z (p0, z1, z0), -+ z0 = svminnm_z (p0, z1, z0)) -+ -+/* -+** minnm_f16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fminnm z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** fminnm z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_f16_z_untied, svfloat16_t, -+ z0 = svminnm_f16_z (p0, z1, z2), -+ z0 = svminnm_z (p0, z1, z2)) -+ -+/* -+** minnm_h4_f16_z_tied1: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0\.h, p0/z, z0\.h -+** fminnm z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (minnm_h4_f16_z_tied1, svfloat16_t, __fp16, -+ z0 = svminnm_n_f16_z (p0, z0, d4), -+ z0 = svminnm_z (p0, z0, d4)) -+ -+/* -+** minnm_h4_f16_z_untied: -+** mov (z[0-9]+\.h), h4 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fminnm z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** fminnm z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (minnm_h4_f16_z_untied, svfloat16_t, __fp16, -+ z0 = svminnm_n_f16_z (p0, z1, d4), -+ z0 = svminnm_z (p0, z1, d4)) -+ -+/* -+** minnm_0_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fminnm z0\.h, p0/m, z0\.h, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_0_f16_z_tied1, svfloat16_t, -+ z0 = svminnm_n_f16_z (p0, z0, 0), -+ z0 = svminnm_z (p0, z0, 0)) -+ -+/* -+** minnm_0_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** fminnm z0\.h, p0/m, z0\.h, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_0_f16_z_untied, svfloat16_t, -+ z0 = svminnm_n_f16_z (p0, z1, 0), -+ z0 = svminnm_z (p0, z1, 0)) -+ -+/* -+** minnm_1_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fminnm z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_1_f16_z_tied1, svfloat16_t, -+ z0 = svminnm_n_f16_z (p0, z0, 1), -+ z0 = svminnm_z (p0, z0, 1)) -+ -+/* -+** minnm_1_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** fminnm z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_1_f16_z_untied, svfloat16_t, -+ z0 = svminnm_n_f16_z (p0, z1, 1), -+ z0 = svminnm_z (p0, z1, 1)) -+ -+/* -+** minnm_2_f16_z: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** movprfx z0\.h, p0/z, z0\.h -+** fminnm z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_2_f16_z, svfloat16_t, -+ z0 = svminnm_n_f16_z (p0, z0, 2), -+ z0 = svminnm_z (p0, z0, 2)) -+ -+/* -+** minnm_f16_x_tied1: -+** fminnm z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_f16_x_tied1, svfloat16_t, -+ z0 = svminnm_f16_x (p0, z0, z1), -+ z0 = svminnm_x (p0, z0, z1)) -+ -+/* -+** minnm_f16_x_tied2: -+** fminnm z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_f16_x_tied2, svfloat16_t, -+ z0 = svminnm_f16_x (p0, z1, z0), -+ z0 = svminnm_x (p0, z1, z0)) -+ -+/* -+** minnm_f16_x_untied: -+** ( -+** movprfx z0, z1 -+** fminnm z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0, z2 -+** fminnm z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_f16_x_untied, svfloat16_t, -+ z0 = svminnm_f16_x (p0, z1, z2), -+ z0 = svminnm_x (p0, z1, z2)) -+ -+/* -+** minnm_h4_f16_x_tied1: -+** mov (z[0-9]+\.h), h4 -+** fminnm z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (minnm_h4_f16_x_tied1, svfloat16_t, __fp16, -+ z0 = svminnm_n_f16_x (p0, z0, d4), -+ z0 = svminnm_x (p0, z0, d4)) -+ -+/* -+** minnm_h4_f16_x_untied: -+** mov z0\.h, h4 -+** fminnm z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZD (minnm_h4_f16_x_untied, svfloat16_t, __fp16, -+ z0 = svminnm_n_f16_x (p0, z1, d4), -+ z0 = svminnm_x (p0, z1, d4)) -+ -+/* -+** minnm_0_f16_x_tied1: -+** fminnm z0\.h, p0/m, z0\.h, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_0_f16_x_tied1, svfloat16_t, -+ z0 = svminnm_n_f16_x (p0, z0, 0), -+ z0 = svminnm_x (p0, z0, 0)) -+ -+/* -+** minnm_0_f16_x_untied: -+** movprfx z0, z1 -+** fminnm z0\.h, p0/m, z0\.h, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_0_f16_x_untied, svfloat16_t, -+ z0 = svminnm_n_f16_x (p0, z1, 0), -+ z0 = svminnm_x (p0, z1, 0)) -+ -+/* -+** minnm_1_f16_x_tied1: -+** fminnm z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_1_f16_x_tied1, svfloat16_t, -+ z0 = svminnm_n_f16_x (p0, z0, 1), -+ z0 = svminnm_x (p0, z0, 1)) -+ -+/* -+** minnm_1_f16_x_untied: -+** movprfx z0, z1 -+** fminnm z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_1_f16_x_untied, svfloat16_t, -+ z0 = svminnm_n_f16_x (p0, z1, 1), -+ z0 = svminnm_x (p0, z1, 1)) -+ -+/* -+** minnm_2_f16_x_tied1: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** fminnm z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_2_f16_x_tied1, svfloat16_t, -+ z0 = svminnm_n_f16_x (p0, z0, 2), -+ z0 = svminnm_x (p0, z0, 2)) -+ -+/* -+** minnm_2_f16_x_untied: -+** fmov z0\.h, #2\.0(?:e\+0)? -+** fminnm z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_2_f16_x_untied, svfloat16_t, -+ z0 = svminnm_n_f16_x (p0, z1, 2), -+ z0 = svminnm_x (p0, z1, 2)) -+ -+/* -+** ptrue_minnm_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_minnm_f16_x_tied1, svfloat16_t, -+ z0 = svminnm_f16_x (svptrue_b16 (), z0, z1), -+ z0 = svminnm_x (svptrue_b16 (), z0, z1)) -+ -+/* -+** ptrue_minnm_f16_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_minnm_f16_x_tied2, svfloat16_t, -+ z0 = svminnm_f16_x (svptrue_b16 (), z1, z0), -+ z0 = svminnm_x (svptrue_b16 (), z1, z0)) -+ -+/* -+** ptrue_minnm_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_minnm_f16_x_untied, svfloat16_t, -+ z0 = svminnm_f16_x (svptrue_b16 (), z1, z2), -+ z0 = svminnm_x (svptrue_b16 (), z1, z2)) -+ -+/* -+** ptrue_minnm_0_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_minnm_0_f16_x_tied1, svfloat16_t, -+ z0 = svminnm_n_f16_x (svptrue_b16 (), z0, 0), -+ z0 = svminnm_x (svptrue_b16 (), z0, 0)) -+ -+/* -+** ptrue_minnm_0_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_minnm_0_f16_x_untied, svfloat16_t, -+ z0 = svminnm_n_f16_x (svptrue_b16 (), z1, 0), -+ z0 = svminnm_x (svptrue_b16 (), z1, 0)) -+ -+/* -+** ptrue_minnm_1_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_minnm_1_f16_x_tied1, svfloat16_t, -+ z0 = svminnm_n_f16_x (svptrue_b16 (), z0, 1), -+ z0 = svminnm_x (svptrue_b16 (), z0, 1)) -+ -+/* -+** ptrue_minnm_1_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_minnm_1_f16_x_untied, svfloat16_t, -+ z0 = svminnm_n_f16_x (svptrue_b16 (), z1, 1), -+ z0 = svminnm_x (svptrue_b16 (), z1, 1)) -+ -+/* -+** ptrue_minnm_2_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_minnm_2_f16_x_tied1, svfloat16_t, -+ z0 = svminnm_n_f16_x (svptrue_b16 (), z0, 2), -+ z0 = svminnm_x (svptrue_b16 (), z0, 2)) -+ -+/* -+** ptrue_minnm_2_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_minnm_2_f16_x_untied, svfloat16_t, -+ z0 = svminnm_n_f16_x (svptrue_b16 (), z1, 2), -+ z0 = svminnm_x (svptrue_b16 (), z1, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minnm_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minnm_f32.c -new file mode 100644 -index 000000000..4fac8e8ad ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minnm_f32.c -@@ -0,0 +1,425 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** minnm_f32_m_tied1: -+** fminnm z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_f32_m_tied1, svfloat32_t, -+ z0 = svminnm_f32_m (p0, z0, z1), -+ z0 = svminnm_m (p0, z0, z1)) -+ -+/* -+** minnm_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fminnm z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_f32_m_tied2, svfloat32_t, -+ z0 = svminnm_f32_m (p0, z1, z0), -+ z0 = svminnm_m (p0, z1, z0)) -+ -+/* -+** minnm_f32_m_untied: -+** movprfx z0, z1 -+** fminnm z0\.s, p0/m, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_f32_m_untied, svfloat32_t, -+ z0 = svminnm_f32_m (p0, z1, z2), -+ z0 = svminnm_m (p0, z1, z2)) -+ -+/* -+** minnm_s4_f32_m_tied1: -+** mov (z[0-9]+\.s), s4 -+** fminnm z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (minnm_s4_f32_m_tied1, svfloat32_t, float, -+ z0 = svminnm_n_f32_m (p0, z0, d4), -+ z0 = svminnm_m (p0, z0, d4)) -+ -+/* -+** minnm_s4_f32_m_untied: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0, z1 -+** fminnm z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (minnm_s4_f32_m_untied, svfloat32_t, float, -+ z0 = svminnm_n_f32_m (p0, z1, d4), -+ z0 = svminnm_m (p0, z1, d4)) -+ -+/* -+** minnm_0_f32_m_tied1: -+** fminnm z0\.s, p0/m, z0\.s, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_0_f32_m_tied1, svfloat32_t, -+ z0 = svminnm_n_f32_m (p0, z0, 0), -+ z0 = svminnm_m (p0, z0, 0)) -+ -+/* -+** minnm_0_f32_m_untied: -+** movprfx z0, z1 -+** fminnm z0\.s, p0/m, z0\.s, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_0_f32_m_untied, svfloat32_t, -+ z0 = svminnm_n_f32_m (p0, z1, 0), -+ z0 = svminnm_m (p0, z1, 0)) -+ -+/* -+** minnm_1_f32_m_tied1: -+** fminnm z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_1_f32_m_tied1, svfloat32_t, -+ z0 = svminnm_n_f32_m (p0, z0, 1), -+ z0 = svminnm_m (p0, z0, 1)) -+ -+/* -+** minnm_1_f32_m_untied: -+** movprfx z0, z1 -+** fminnm z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_1_f32_m_untied, svfloat32_t, -+ z0 = svminnm_n_f32_m (p0, z1, 1), -+ z0 = svminnm_m (p0, z1, 1)) -+ -+/* -+** minnm_2_f32_m: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** fminnm z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_2_f32_m, svfloat32_t, -+ z0 = svminnm_n_f32_m (p0, z0, 2), -+ z0 = svminnm_m (p0, z0, 2)) -+ -+/* -+** minnm_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fminnm z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_f32_z_tied1, svfloat32_t, -+ z0 = svminnm_f32_z (p0, z0, z1), -+ z0 = svminnm_z (p0, z0, z1)) -+ -+/* -+** minnm_f32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** fminnm z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_f32_z_tied2, svfloat32_t, -+ z0 = svminnm_f32_z (p0, z1, z0), -+ z0 = svminnm_z (p0, z1, z0)) -+ -+/* -+** minnm_f32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fminnm z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** fminnm z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_f32_z_untied, svfloat32_t, -+ z0 = svminnm_f32_z (p0, z1, z2), -+ z0 = svminnm_z (p0, z1, z2)) -+ -+/* -+** minnm_s4_f32_z_tied1: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0\.s, p0/z, z0\.s -+** fminnm z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (minnm_s4_f32_z_tied1, svfloat32_t, float, -+ z0 = svminnm_n_f32_z (p0, z0, d4), -+ z0 = svminnm_z (p0, z0, d4)) -+ -+/* -+** minnm_s4_f32_z_untied: -+** mov (z[0-9]+\.s), s4 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fminnm z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** fminnm z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (minnm_s4_f32_z_untied, svfloat32_t, float, -+ z0 = svminnm_n_f32_z (p0, z1, d4), -+ z0 = svminnm_z (p0, z1, d4)) -+ -+/* -+** minnm_0_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fminnm z0\.s, p0/m, z0\.s, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_0_f32_z_tied1, svfloat32_t, -+ z0 = svminnm_n_f32_z (p0, z0, 0), -+ z0 = svminnm_z (p0, z0, 0)) -+ -+/* -+** minnm_0_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** fminnm z0\.s, p0/m, z0\.s, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_0_f32_z_untied, svfloat32_t, -+ z0 = svminnm_n_f32_z (p0, z1, 0), -+ z0 = svminnm_z (p0, z1, 0)) -+ -+/* -+** minnm_1_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fminnm z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_1_f32_z_tied1, svfloat32_t, -+ z0 = svminnm_n_f32_z (p0, z0, 1), -+ z0 = svminnm_z (p0, z0, 1)) -+ -+/* -+** minnm_1_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** fminnm z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_1_f32_z_untied, svfloat32_t, -+ z0 = svminnm_n_f32_z (p0, z1, 1), -+ z0 = svminnm_z (p0, z1, 1)) -+ -+/* -+** minnm_2_f32_z: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** movprfx z0\.s, p0/z, z0\.s -+** fminnm z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_2_f32_z, svfloat32_t, -+ z0 = svminnm_n_f32_z (p0, z0, 2), -+ z0 = svminnm_z (p0, z0, 2)) -+ -+/* -+** minnm_f32_x_tied1: -+** fminnm z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_f32_x_tied1, svfloat32_t, -+ z0 = svminnm_f32_x (p0, z0, z1), -+ z0 = svminnm_x (p0, z0, z1)) -+ -+/* -+** minnm_f32_x_tied2: -+** fminnm z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_f32_x_tied2, svfloat32_t, -+ z0 = svminnm_f32_x (p0, z1, z0), -+ z0 = svminnm_x (p0, z1, z0)) -+ -+/* -+** minnm_f32_x_untied: -+** ( -+** movprfx z0, z1 -+** fminnm z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0, z2 -+** fminnm z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_f32_x_untied, svfloat32_t, -+ z0 = svminnm_f32_x (p0, z1, z2), -+ z0 = svminnm_x (p0, z1, z2)) -+ -+/* -+** minnm_s4_f32_x_tied1: -+** mov (z[0-9]+\.s), s4 -+** fminnm z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (minnm_s4_f32_x_tied1, svfloat32_t, float, -+ z0 = svminnm_n_f32_x (p0, z0, d4), -+ z0 = svminnm_x (p0, z0, d4)) -+ -+/* -+** minnm_s4_f32_x_untied: -+** mov z0\.s, s4 -+** fminnm z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZD (minnm_s4_f32_x_untied, svfloat32_t, float, -+ z0 = svminnm_n_f32_x (p0, z1, d4), -+ z0 = svminnm_x (p0, z1, d4)) -+ -+/* -+** minnm_0_f32_x_tied1: -+** fminnm z0\.s, p0/m, z0\.s, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_0_f32_x_tied1, svfloat32_t, -+ z0 = svminnm_n_f32_x (p0, z0, 0), -+ z0 = svminnm_x (p0, z0, 0)) -+ -+/* -+** minnm_0_f32_x_untied: -+** movprfx z0, z1 -+** fminnm z0\.s, p0/m, z0\.s, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_0_f32_x_untied, svfloat32_t, -+ z0 = svminnm_n_f32_x (p0, z1, 0), -+ z0 = svminnm_x (p0, z1, 0)) -+ -+/* -+** minnm_1_f32_x_tied1: -+** fminnm z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_1_f32_x_tied1, svfloat32_t, -+ z0 = svminnm_n_f32_x (p0, z0, 1), -+ z0 = svminnm_x (p0, z0, 1)) -+ -+/* -+** minnm_1_f32_x_untied: -+** movprfx z0, z1 -+** fminnm z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_1_f32_x_untied, svfloat32_t, -+ z0 = svminnm_n_f32_x (p0, z1, 1), -+ z0 = svminnm_x (p0, z1, 1)) -+ -+/* -+** minnm_2_f32_x_tied1: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** fminnm z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_2_f32_x_tied1, svfloat32_t, -+ z0 = svminnm_n_f32_x (p0, z0, 2), -+ z0 = svminnm_x (p0, z0, 2)) -+ -+/* -+** minnm_2_f32_x_untied: -+** fmov z0\.s, #2\.0(?:e\+0)? -+** fminnm z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_2_f32_x_untied, svfloat32_t, -+ z0 = svminnm_n_f32_x (p0, z1, 2), -+ z0 = svminnm_x (p0, z1, 2)) -+ -+/* -+** ptrue_minnm_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_minnm_f32_x_tied1, svfloat32_t, -+ z0 = svminnm_f32_x (svptrue_b32 (), z0, z1), -+ z0 = svminnm_x (svptrue_b32 (), z0, z1)) -+ -+/* -+** ptrue_minnm_f32_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_minnm_f32_x_tied2, svfloat32_t, -+ z0 = svminnm_f32_x (svptrue_b32 (), z1, z0), -+ z0 = svminnm_x (svptrue_b32 (), z1, z0)) -+ -+/* -+** ptrue_minnm_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_minnm_f32_x_untied, svfloat32_t, -+ z0 = svminnm_f32_x (svptrue_b32 (), z1, z2), -+ z0 = svminnm_x (svptrue_b32 (), z1, z2)) -+ -+/* -+** ptrue_minnm_0_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_minnm_0_f32_x_tied1, svfloat32_t, -+ z0 = svminnm_n_f32_x (svptrue_b32 (), z0, 0), -+ z0 = svminnm_x (svptrue_b32 (), z0, 0)) -+ -+/* -+** ptrue_minnm_0_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_minnm_0_f32_x_untied, svfloat32_t, -+ z0 = svminnm_n_f32_x (svptrue_b32 (), z1, 0), -+ z0 = svminnm_x (svptrue_b32 (), z1, 0)) -+ -+/* -+** ptrue_minnm_1_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_minnm_1_f32_x_tied1, svfloat32_t, -+ z0 = svminnm_n_f32_x (svptrue_b32 (), z0, 1), -+ z0 = svminnm_x (svptrue_b32 (), z0, 1)) -+ -+/* -+** ptrue_minnm_1_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_minnm_1_f32_x_untied, svfloat32_t, -+ z0 = svminnm_n_f32_x (svptrue_b32 (), z1, 1), -+ z0 = svminnm_x (svptrue_b32 (), z1, 1)) -+ -+/* -+** ptrue_minnm_2_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_minnm_2_f32_x_tied1, svfloat32_t, -+ z0 = svminnm_n_f32_x (svptrue_b32 (), z0, 2), -+ z0 = svminnm_x (svptrue_b32 (), z0, 2)) -+ -+/* -+** ptrue_minnm_2_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_minnm_2_f32_x_untied, svfloat32_t, -+ z0 = svminnm_n_f32_x (svptrue_b32 (), z1, 2), -+ z0 = svminnm_x (svptrue_b32 (), z1, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minnm_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minnm_f64.c -new file mode 100644 -index 000000000..67993928f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minnm_f64.c -@@ -0,0 +1,425 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** minnm_f64_m_tied1: -+** fminnm z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_f64_m_tied1, svfloat64_t, -+ z0 = svminnm_f64_m (p0, z0, z1), -+ z0 = svminnm_m (p0, z0, z1)) -+ -+/* -+** minnm_f64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fminnm z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_f64_m_tied2, svfloat64_t, -+ z0 = svminnm_f64_m (p0, z1, z0), -+ z0 = svminnm_m (p0, z1, z0)) -+ -+/* -+** minnm_f64_m_untied: -+** movprfx z0, z1 -+** fminnm z0\.d, p0/m, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_f64_m_untied, svfloat64_t, -+ z0 = svminnm_f64_m (p0, z1, z2), -+ z0 = svminnm_m (p0, z1, z2)) -+ -+/* -+** minnm_d4_f64_m_tied1: -+** mov (z[0-9]+\.d), d4 -+** fminnm z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (minnm_d4_f64_m_tied1, svfloat64_t, double, -+ z0 = svminnm_n_f64_m (p0, z0, d4), -+ z0 = svminnm_m (p0, z0, d4)) -+ -+/* -+** minnm_d4_f64_m_untied: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0, z1 -+** fminnm z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (minnm_d4_f64_m_untied, svfloat64_t, double, -+ z0 = svminnm_n_f64_m (p0, z1, d4), -+ z0 = svminnm_m (p0, z1, d4)) -+ -+/* -+** minnm_0_f64_m_tied1: -+** fminnm z0\.d, p0/m, z0\.d, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_0_f64_m_tied1, svfloat64_t, -+ z0 = svminnm_n_f64_m (p0, z0, 0), -+ z0 = svminnm_m (p0, z0, 0)) -+ -+/* -+** minnm_0_f64_m_untied: -+** movprfx z0, z1 -+** fminnm z0\.d, p0/m, z0\.d, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_0_f64_m_untied, svfloat64_t, -+ z0 = svminnm_n_f64_m (p0, z1, 0), -+ z0 = svminnm_m (p0, z1, 0)) -+ -+/* -+** minnm_1_f64_m_tied1: -+** fminnm z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_1_f64_m_tied1, svfloat64_t, -+ z0 = svminnm_n_f64_m (p0, z0, 1), -+ z0 = svminnm_m (p0, z0, 1)) -+ -+/* -+** minnm_1_f64_m_untied: -+** movprfx z0, z1 -+** fminnm z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_1_f64_m_untied, svfloat64_t, -+ z0 = svminnm_n_f64_m (p0, z1, 1), -+ z0 = svminnm_m (p0, z1, 1)) -+ -+/* -+** minnm_2_f64_m: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** fminnm z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_2_f64_m, svfloat64_t, -+ z0 = svminnm_n_f64_m (p0, z0, 2), -+ z0 = svminnm_m (p0, z0, 2)) -+ -+/* -+** minnm_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fminnm z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_f64_z_tied1, svfloat64_t, -+ z0 = svminnm_f64_z (p0, z0, z1), -+ z0 = svminnm_z (p0, z0, z1)) -+ -+/* -+** minnm_f64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** fminnm z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_f64_z_tied2, svfloat64_t, -+ z0 = svminnm_f64_z (p0, z1, z0), -+ z0 = svminnm_z (p0, z1, z0)) -+ -+/* -+** minnm_f64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fminnm z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** fminnm z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_f64_z_untied, svfloat64_t, -+ z0 = svminnm_f64_z (p0, z1, z2), -+ z0 = svminnm_z (p0, z1, z2)) -+ -+/* -+** minnm_d4_f64_z_tied1: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0\.d, p0/z, z0\.d -+** fminnm z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (minnm_d4_f64_z_tied1, svfloat64_t, double, -+ z0 = svminnm_n_f64_z (p0, z0, d4), -+ z0 = svminnm_z (p0, z0, d4)) -+ -+/* -+** minnm_d4_f64_z_untied: -+** mov (z[0-9]+\.d), d4 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fminnm z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** fminnm z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (minnm_d4_f64_z_untied, svfloat64_t, double, -+ z0 = svminnm_n_f64_z (p0, z1, d4), -+ z0 = svminnm_z (p0, z1, d4)) -+ -+/* -+** minnm_0_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fminnm z0\.d, p0/m, z0\.d, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_0_f64_z_tied1, svfloat64_t, -+ z0 = svminnm_n_f64_z (p0, z0, 0), -+ z0 = svminnm_z (p0, z0, 0)) -+ -+/* -+** minnm_0_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** fminnm z0\.d, p0/m, z0\.d, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_0_f64_z_untied, svfloat64_t, -+ z0 = svminnm_n_f64_z (p0, z1, 0), -+ z0 = svminnm_z (p0, z1, 0)) -+ -+/* -+** minnm_1_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fminnm z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_1_f64_z_tied1, svfloat64_t, -+ z0 = svminnm_n_f64_z (p0, z0, 1), -+ z0 = svminnm_z (p0, z0, 1)) -+ -+/* -+** minnm_1_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** fminnm z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_1_f64_z_untied, svfloat64_t, -+ z0 = svminnm_n_f64_z (p0, z1, 1), -+ z0 = svminnm_z (p0, z1, 1)) -+ -+/* -+** minnm_2_f64_z: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** movprfx z0\.d, p0/z, z0\.d -+** fminnm z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_2_f64_z, svfloat64_t, -+ z0 = svminnm_n_f64_z (p0, z0, 2), -+ z0 = svminnm_z (p0, z0, 2)) -+ -+/* -+** minnm_f64_x_tied1: -+** fminnm z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_f64_x_tied1, svfloat64_t, -+ z0 = svminnm_f64_x (p0, z0, z1), -+ z0 = svminnm_x (p0, z0, z1)) -+ -+/* -+** minnm_f64_x_tied2: -+** fminnm z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_f64_x_tied2, svfloat64_t, -+ z0 = svminnm_f64_x (p0, z1, z0), -+ z0 = svminnm_x (p0, z1, z0)) -+ -+/* -+** minnm_f64_x_untied: -+** ( -+** movprfx z0, z1 -+** fminnm z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0, z2 -+** fminnm z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_f64_x_untied, svfloat64_t, -+ z0 = svminnm_f64_x (p0, z1, z2), -+ z0 = svminnm_x (p0, z1, z2)) -+ -+/* -+** minnm_d4_f64_x_tied1: -+** mov (z[0-9]+\.d), d4 -+** fminnm z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (minnm_d4_f64_x_tied1, svfloat64_t, double, -+ z0 = svminnm_n_f64_x (p0, z0, d4), -+ z0 = svminnm_x (p0, z0, d4)) -+ -+/* -+** minnm_d4_f64_x_untied: -+** mov z0\.d, d4 -+** fminnm z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZD (minnm_d4_f64_x_untied, svfloat64_t, double, -+ z0 = svminnm_n_f64_x (p0, z1, d4), -+ z0 = svminnm_x (p0, z1, d4)) -+ -+/* -+** minnm_0_f64_x_tied1: -+** fminnm z0\.d, p0/m, z0\.d, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_0_f64_x_tied1, svfloat64_t, -+ z0 = svminnm_n_f64_x (p0, z0, 0), -+ z0 = svminnm_x (p0, z0, 0)) -+ -+/* -+** minnm_0_f64_x_untied: -+** movprfx z0, z1 -+** fminnm z0\.d, p0/m, z0\.d, #0\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_0_f64_x_untied, svfloat64_t, -+ z0 = svminnm_n_f64_x (p0, z1, 0), -+ z0 = svminnm_x (p0, z1, 0)) -+ -+/* -+** minnm_1_f64_x_tied1: -+** fminnm z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_1_f64_x_tied1, svfloat64_t, -+ z0 = svminnm_n_f64_x (p0, z0, 1), -+ z0 = svminnm_x (p0, z0, 1)) -+ -+/* -+** minnm_1_f64_x_untied: -+** movprfx z0, z1 -+** fminnm z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_1_f64_x_untied, svfloat64_t, -+ z0 = svminnm_n_f64_x (p0, z1, 1), -+ z0 = svminnm_x (p0, z1, 1)) -+ -+/* -+** minnm_2_f64_x_tied1: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** fminnm z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_2_f64_x_tied1, svfloat64_t, -+ z0 = svminnm_n_f64_x (p0, z0, 2), -+ z0 = svminnm_x (p0, z0, 2)) -+ -+/* -+** minnm_2_f64_x_untied: -+** fmov z0\.d, #2\.0(?:e\+0)? -+** fminnm z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (minnm_2_f64_x_untied, svfloat64_t, -+ z0 = svminnm_n_f64_x (p0, z1, 2), -+ z0 = svminnm_x (p0, z1, 2)) -+ -+/* -+** ptrue_minnm_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_minnm_f64_x_tied1, svfloat64_t, -+ z0 = svminnm_f64_x (svptrue_b64 (), z0, z1), -+ z0 = svminnm_x (svptrue_b64 (), z0, z1)) -+ -+/* -+** ptrue_minnm_f64_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_minnm_f64_x_tied2, svfloat64_t, -+ z0 = svminnm_f64_x (svptrue_b64 (), z1, z0), -+ z0 = svminnm_x (svptrue_b64 (), z1, z0)) -+ -+/* -+** ptrue_minnm_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_minnm_f64_x_untied, svfloat64_t, -+ z0 = svminnm_f64_x (svptrue_b64 (), z1, z2), -+ z0 = svminnm_x (svptrue_b64 (), z1, z2)) -+ -+/* -+** ptrue_minnm_0_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_minnm_0_f64_x_tied1, svfloat64_t, -+ z0 = svminnm_n_f64_x (svptrue_b64 (), z0, 0), -+ z0 = svminnm_x (svptrue_b64 (), z0, 0)) -+ -+/* -+** ptrue_minnm_0_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_minnm_0_f64_x_untied, svfloat64_t, -+ z0 = svminnm_n_f64_x (svptrue_b64 (), z1, 0), -+ z0 = svminnm_x (svptrue_b64 (), z1, 0)) -+ -+/* -+** ptrue_minnm_1_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_minnm_1_f64_x_tied1, svfloat64_t, -+ z0 = svminnm_n_f64_x (svptrue_b64 (), z0, 1), -+ z0 = svminnm_x (svptrue_b64 (), z0, 1)) -+ -+/* -+** ptrue_minnm_1_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_minnm_1_f64_x_untied, svfloat64_t, -+ z0 = svminnm_n_f64_x (svptrue_b64 (), z1, 1), -+ z0 = svminnm_x (svptrue_b64 (), z1, 1)) -+ -+/* -+** ptrue_minnm_2_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_minnm_2_f64_x_tied1, svfloat64_t, -+ z0 = svminnm_n_f64_x (svptrue_b64 (), z0, 2), -+ z0 = svminnm_x (svptrue_b64 (), z0, 2)) -+ -+/* -+** ptrue_minnm_2_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_minnm_2_f64_x_untied, svfloat64_t, -+ z0 = svminnm_n_f64_x (svptrue_b64 (), z1, 2), -+ z0 = svminnm_x (svptrue_b64 (), z1, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minnmv_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minnmv_f16.c -new file mode 100644 -index 000000000..827f41bfe ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minnmv_f16.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** minnmv_d0_f16_tied: -+** fminnmv h0, p0, z0\.h -+** ret -+*/ -+TEST_REDUCTION_D (minnmv_d0_f16_tied, float16_t, svfloat16_t, -+ d0 = svminnmv_f16 (p0, z0), -+ d0 = svminnmv (p0, z0)) -+ -+/* -+** minnmv_d0_f16_untied: -+** fminnmv h0, p0, z1\.h -+** ret -+*/ -+TEST_REDUCTION_D (minnmv_d0_f16_untied, float16_t, svfloat16_t, -+ d0 = svminnmv_f16 (p0, z1), -+ d0 = svminnmv (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minnmv_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minnmv_f32.c -new file mode 100644 -index 000000000..2352ec2a3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minnmv_f32.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** minnmv_d0_f32_tied: -+** fminnmv s0, p0, z0\.s -+** ret -+*/ -+TEST_REDUCTION_D (minnmv_d0_f32_tied, float32_t, svfloat32_t, -+ d0 = svminnmv_f32 (p0, z0), -+ d0 = svminnmv (p0, z0)) -+ -+/* -+** minnmv_d0_f32_untied: -+** fminnmv s0, p0, z1\.s -+** ret -+*/ -+TEST_REDUCTION_D (minnmv_d0_f32_untied, float32_t, svfloat32_t, -+ d0 = svminnmv_f32 (p0, z1), -+ d0 = svminnmv (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minnmv_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minnmv_f64.c -new file mode 100644 -index 000000000..3d769a3d5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minnmv_f64.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** minnmv_d0_f64_tied: -+** fminnmv d0, p0, z0\.d -+** ret -+*/ -+TEST_REDUCTION_D (minnmv_d0_f64_tied, float64_t, svfloat64_t, -+ d0 = svminnmv_f64 (p0, z0), -+ d0 = svminnmv (p0, z0)) -+ -+/* -+** minnmv_d0_f64_untied: -+** fminnmv d0, p0, z1\.d -+** ret -+*/ -+TEST_REDUCTION_D (minnmv_d0_f64_untied, float64_t, svfloat64_t, -+ d0 = svminnmv_f64 (p0, z1), -+ d0 = svminnmv (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_f16.c -new file mode 100644 -index 000000000..190aa16e1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_f16.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** minv_d0_f16_tied: -+** fminv h0, p0, z0\.h -+** ret -+*/ -+TEST_REDUCTION_D (minv_d0_f16_tied, float16_t, svfloat16_t, -+ d0 = svminv_f16 (p0, z0), -+ d0 = svminv (p0, z0)) -+ -+/* -+** minv_d0_f16_untied: -+** fminv h0, p0, z1\.h -+** ret -+*/ -+TEST_REDUCTION_D (minv_d0_f16_untied, float16_t, svfloat16_t, -+ d0 = svminv_f16 (p0, z1), -+ d0 = svminv (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_f32.c -new file mode 100644 -index 000000000..07871b893 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_f32.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** minv_d0_f32_tied: -+** fminv s0, p0, z0\.s -+** ret -+*/ -+TEST_REDUCTION_D (minv_d0_f32_tied, float32_t, svfloat32_t, -+ d0 = svminv_f32 (p0, z0), -+ d0 = svminv (p0, z0)) -+ -+/* -+** minv_d0_f32_untied: -+** fminv s0, p0, z1\.s -+** ret -+*/ -+TEST_REDUCTION_D (minv_d0_f32_untied, float32_t, svfloat32_t, -+ d0 = svminv_f32 (p0, z1), -+ d0 = svminv (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_f64.c -new file mode 100644 -index 000000000..7435f306f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_f64.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** minv_d0_f64_tied: -+** fminv d0, p0, z0\.d -+** ret -+*/ -+TEST_REDUCTION_D (minv_d0_f64_tied, float64_t, svfloat64_t, -+ d0 = svminv_f64 (p0, z0), -+ d0 = svminv (p0, z0)) -+ -+/* -+** minv_d0_f64_untied: -+** fminv d0, p0, z1\.d -+** ret -+*/ -+TEST_REDUCTION_D (minv_d0_f64_untied, float64_t, svfloat64_t, -+ d0 = svminv_f64 (p0, z1), -+ d0 = svminv (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_s16.c -new file mode 100644 -index 000000000..dfb66a9f7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_s16.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** minv_x0_s16: -+** sminv h([0-9]+), p0, z0\.h -+** umov w0, v\1\.h\[0\] -+** ret -+*/ -+TEST_REDUCTION_X (minv_x0_s16, int16_t, svint16_t, -+ x0 = svminv_s16 (p0, z0), -+ x0 = svminv (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_s32.c -new file mode 100644 -index 000000000..c02df5dd3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_s32.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** minv_x0_s32: -+** sminv (s[0-9]+), p0, z0\.s -+** fmov w0, \1 -+** ret -+*/ -+TEST_REDUCTION_X (minv_x0_s32, int32_t, svint32_t, -+ x0 = svminv_s32 (p0, z0), -+ x0 = svminv (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_s64.c -new file mode 100644 -index 000000000..784973231 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_s64.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** minv_x0_s64: -+** sminv (d[0-9]+), p0, z0\.d -+** fmov x0, \1 -+** ret -+*/ -+TEST_REDUCTION_X (minv_x0_s64, int64_t, svint64_t, -+ x0 = svminv_s64 (p0, z0), -+ x0 = svminv (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_s8.c -new file mode 100644 -index 000000000..0b1bce5de ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_s8.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** minv_x0_s8: -+** sminv b([0-9]+), p0, z0\.b -+** umov w0, v\1\.b\[0\] -+** ret -+*/ -+TEST_REDUCTION_X (minv_x0_s8, int8_t, svint8_t, -+ x0 = svminv_s8 (p0, z0), -+ x0 = svminv (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_u16.c -new file mode 100644 -index 000000000..b499de33e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_u16.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** minv_x0_u16: -+** uminv h([0-9]+), p0, z0\.h -+** umov w0, v\1\.h\[0\] -+** ret -+*/ -+TEST_REDUCTION_X (minv_x0_u16, uint16_t, svuint16_t, -+ x0 = svminv_u16 (p0, z0), -+ x0 = svminv (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_u32.c -new file mode 100644 -index 000000000..18c9d8c6d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_u32.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** minv_x0_u32: -+** uminv (s[0-9]+), p0, z0\.s -+** fmov w0, \1 -+** ret -+*/ -+TEST_REDUCTION_X (minv_x0_u32, uint32_t, svuint32_t, -+ x0 = svminv_u32 (p0, z0), -+ x0 = svminv (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_u64.c -new file mode 100644 -index 000000000..374d5e426 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_u64.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** minv_x0_u64: -+** uminv (d[0-9]+), p0, z0\.d -+** fmov x0, \1 -+** ret -+*/ -+TEST_REDUCTION_X (minv_x0_u64, uint64_t, svuint64_t, -+ x0 = svminv_u64 (p0, z0), -+ x0 = svminv (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_u8.c -new file mode 100644 -index 000000000..d9f6f5835 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/minv_u8.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** minv_x0_u8: -+** uminv b([0-9]+), p0, z0\.b -+** umov w0, v\1\.b\[0\] -+** ret -+*/ -+TEST_REDUCTION_X (minv_x0_u8, uint8_t, svuint8_t, -+ x0 = svminv_u8 (p0, z0), -+ x0 = svminv (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_f16.c -new file mode 100644 -index 000000000..f22a582ef ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_f16.c -@@ -0,0 +1,398 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mla_f16_m_tied1: -+** fmla z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mla_f16_m_tied1, svfloat16_t, -+ z0 = svmla_f16_m (p0, z0, z1, z2), -+ z0 = svmla_m (p0, z0, z1, z2)) -+ -+/* -+** mla_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fmla z0\.h, p0/m, \1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mla_f16_m_tied2, svfloat16_t, -+ z0 = svmla_f16_m (p0, z1, z0, z2), -+ z0 = svmla_m (p0, z1, z0, z2)) -+ -+/* -+** mla_f16_m_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fmla z0\.h, p0/m, z2\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mla_f16_m_tied3, svfloat16_t, -+ z0 = svmla_f16_m (p0, z1, z2, z0), -+ z0 = svmla_m (p0, z1, z2, z0)) -+ -+/* -+** mla_f16_m_untied: -+** movprfx z0, z1 -+** fmla z0\.h, p0/m, z2\.h, z3\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mla_f16_m_untied, svfloat16_t, -+ z0 = svmla_f16_m (p0, z1, z2, z3), -+ z0 = svmla_m (p0, z1, z2, z3)) -+ -+/* -+** mla_h4_f16_m_tied1: -+** mov (z[0-9]+\.h), h4 -+** fmla z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mla_h4_f16_m_tied1, svfloat16_t, __fp16, -+ z0 = svmla_n_f16_m (p0, z0, z1, d4), -+ z0 = svmla_m (p0, z0, z1, d4)) -+ -+/* -+** mla_h4_f16_m_untied: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0, z1 -+** fmla z0\.h, p0/m, z2\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mla_h4_f16_m_untied, svfloat16_t, __fp16, -+ z0 = svmla_n_f16_m (p0, z1, z2, d4), -+ z0 = svmla_m (p0, z1, z2, d4)) -+ -+/* -+** mla_2_f16_m_tied1: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** fmla z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mla_2_f16_m_tied1, svfloat16_t, -+ z0 = svmla_n_f16_m (p0, z0, z1, 2), -+ z0 = svmla_m (p0, z0, z1, 2)) -+ -+/* -+** mla_2_f16_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** movprfx z0, z1 -+** fmla z0\.h, p0/m, z2\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mla_2_f16_m_untied, svfloat16_t, -+ z0 = svmla_n_f16_m (p0, z1, z2, 2), -+ z0 = svmla_m (p0, z1, z2, 2)) -+ -+/* -+** mla_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fmla z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mla_f16_z_tied1, svfloat16_t, -+ z0 = svmla_f16_z (p0, z0, z1, z2), -+ z0 = svmla_z (p0, z0, z1, z2)) -+ -+/* -+** mla_f16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** fmad z0\.h, p0/m, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mla_f16_z_tied2, svfloat16_t, -+ z0 = svmla_f16_z (p0, z1, z0, z2), -+ z0 = svmla_z (p0, z1, z0, z2)) -+ -+/* -+** mla_f16_z_tied3: -+** movprfx z0\.h, p0/z, z0\.h -+** fmad z0\.h, p0/m, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mla_f16_z_tied3, svfloat16_t, -+ z0 = svmla_f16_z (p0, z1, z2, z0), -+ z0 = svmla_z (p0, z1, z2, z0)) -+ -+/* -+** mla_f16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fmla z0\.h, p0/m, z2\.h, z3\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** fmad z0\.h, p0/m, z3\.h, z1\.h -+** | -+** movprfx z0\.h, p0/z, z3\.h -+** fmad z0\.h, p0/m, z2\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mla_f16_z_untied, svfloat16_t, -+ z0 = svmla_f16_z (p0, z1, z2, z3), -+ z0 = svmla_z (p0, z1, z2, z3)) -+ -+/* -+** mla_h4_f16_z_tied1: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0\.h, p0/z, z0\.h -+** fmla z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mla_h4_f16_z_tied1, svfloat16_t, __fp16, -+ z0 = svmla_n_f16_z (p0, z0, z1, d4), -+ z0 = svmla_z (p0, z0, z1, d4)) -+ -+/* -+** mla_h4_f16_z_tied2: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0\.h, p0/z, z0\.h -+** fmad z0\.h, p0/m, \1, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZD (mla_h4_f16_z_tied2, svfloat16_t, __fp16, -+ z0 = svmla_n_f16_z (p0, z1, z0, d4), -+ z0 = svmla_z (p0, z1, z0, d4)) -+ -+/* -+** mla_h4_f16_z_untied: -+** mov (z[0-9]+\.h), h4 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fmla z0\.h, p0/m, z2\.h, \1 -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** fmad z0\.h, p0/m, \1, z1\.h -+** | -+** movprfx z0\.h, p0/z, \1 -+** fmad z0\.h, p0/m, z2\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (mla_h4_f16_z_untied, svfloat16_t, __fp16, -+ z0 = svmla_n_f16_z (p0, z1, z2, d4), -+ z0 = svmla_z (p0, z1, z2, d4)) -+ -+/* -+** mla_2_f16_z_tied1: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** movprfx z0\.h, p0/z, z0\.h -+** fmla z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mla_2_f16_z_tied1, svfloat16_t, -+ z0 = svmla_n_f16_z (p0, z0, z1, 2), -+ z0 = svmla_z (p0, z0, z1, 2)) -+ -+/* -+** mla_2_f16_z_tied2: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** movprfx z0\.h, p0/z, z0\.h -+** fmad z0\.h, p0/m, \1, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mla_2_f16_z_tied2, svfloat16_t, -+ z0 = svmla_n_f16_z (p0, z1, z0, 2), -+ z0 = svmla_z (p0, z1, z0, 2)) -+ -+/* -+** mla_2_f16_z_untied: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fmla z0\.h, p0/m, z2\.h, \1 -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** fmad z0\.h, p0/m, \1, z1\.h -+** | -+** movprfx z0\.h, p0/z, \1 -+** fmad z0\.h, p0/m, z2\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mla_2_f16_z_untied, svfloat16_t, -+ z0 = svmla_n_f16_z (p0, z1, z2, 2), -+ z0 = svmla_z (p0, z1, z2, 2)) -+ -+/* -+** mla_f16_x_tied1: -+** fmla z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mla_f16_x_tied1, svfloat16_t, -+ z0 = svmla_f16_x (p0, z0, z1, z2), -+ z0 = svmla_x (p0, z0, z1, z2)) -+ -+/* -+** mla_f16_x_tied2: -+** fmad z0\.h, p0/m, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mla_f16_x_tied2, svfloat16_t, -+ z0 = svmla_f16_x (p0, z1, z0, z2), -+ z0 = svmla_x (p0, z1, z0, z2)) -+ -+/* -+** mla_f16_x_tied3: -+** fmad z0\.h, p0/m, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mla_f16_x_tied3, svfloat16_t, -+ z0 = svmla_f16_x (p0, z1, z2, z0), -+ z0 = svmla_x (p0, z1, z2, z0)) -+ -+/* -+** mla_f16_x_untied: -+** ( -+** movprfx z0, z1 -+** fmla z0\.h, p0/m, z2\.h, z3\.h -+** | -+** movprfx z0, z2 -+** fmad z0\.h, p0/m, z3\.h, z1\.h -+** | -+** movprfx z0, z3 -+** fmad z0\.h, p0/m, z2\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mla_f16_x_untied, svfloat16_t, -+ z0 = svmla_f16_x (p0, z1, z2, z3), -+ z0 = svmla_x (p0, z1, z2, z3)) -+ -+/* -+** mla_h4_f16_x_tied1: -+** mov (z[0-9]+\.h), h4 -+** fmla z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mla_h4_f16_x_tied1, svfloat16_t, __fp16, -+ z0 = svmla_n_f16_x (p0, z0, z1, d4), -+ z0 = svmla_x (p0, z0, z1, d4)) -+ -+/* -+** mla_h4_f16_x_tied2: -+** mov (z[0-9]+\.h), h4 -+** fmad z0\.h, p0/m, \1, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZD (mla_h4_f16_x_tied2, svfloat16_t, __fp16, -+ z0 = svmla_n_f16_x (p0, z1, z0, d4), -+ z0 = svmla_x (p0, z1, z0, d4)) -+ -+/* -+** mla_h4_f16_x_untied: { xfail *-*-* } -+** mov z0\.h, h4 -+** fmad z0\.h, p0/m, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZD (mla_h4_f16_x_untied, svfloat16_t, __fp16, -+ z0 = svmla_n_f16_x (p0, z1, z2, d4), -+ z0 = svmla_x (p0, z1, z2, d4)) -+ -+/* -+** mla_2_f16_x_tied1: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** fmla z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mla_2_f16_x_tied1, svfloat16_t, -+ z0 = svmla_n_f16_x (p0, z0, z1, 2), -+ z0 = svmla_x (p0, z0, z1, 2)) -+ -+/* -+** mla_2_f16_x_tied2: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** fmad z0\.h, p0/m, \1, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mla_2_f16_x_tied2, svfloat16_t, -+ z0 = svmla_n_f16_x (p0, z1, z0, 2), -+ z0 = svmla_x (p0, z1, z0, 2)) -+ -+/* -+** mla_2_f16_x_untied: -+** fmov z0\.h, #2\.0(?:e\+0)? -+** fmad z0\.h, p0/m, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mla_2_f16_x_untied, svfloat16_t, -+ z0 = svmla_n_f16_x (p0, z1, z2, 2), -+ z0 = svmla_x (p0, z1, z2, 2)) -+ -+/* -+** ptrue_mla_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mla_f16_x_tied1, svfloat16_t, -+ z0 = svmla_f16_x (svptrue_b16 (), z0, z1, z2), -+ z0 = svmla_x (svptrue_b16 (), z0, z1, z2)) -+ -+/* -+** ptrue_mla_f16_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mla_f16_x_tied2, svfloat16_t, -+ z0 = svmla_f16_x (svptrue_b16 (), z1, z0, z2), -+ z0 = svmla_x (svptrue_b16 (), z1, z0, z2)) -+ -+/* -+** ptrue_mla_f16_x_tied3: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mla_f16_x_tied3, svfloat16_t, -+ z0 = svmla_f16_x (svptrue_b16 (), z1, z2, z0), -+ z0 = svmla_x (svptrue_b16 (), z1, z2, z0)) -+ -+/* -+** ptrue_mla_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mla_f16_x_untied, svfloat16_t, -+ z0 = svmla_f16_x (svptrue_b16 (), z1, z2, z3), -+ z0 = svmla_x (svptrue_b16 (), z1, z2, z3)) -+ -+/* -+** ptrue_mla_2_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mla_2_f16_x_tied1, svfloat16_t, -+ z0 = svmla_n_f16_x (svptrue_b16 (), z0, z1, 2), -+ z0 = svmla_x (svptrue_b16 (), z0, z1, 2)) -+ -+/* -+** ptrue_mla_2_f16_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mla_2_f16_x_tied2, svfloat16_t, -+ z0 = svmla_n_f16_x (svptrue_b16 (), z1, z0, 2), -+ z0 = svmla_x (svptrue_b16 (), z1, z0, 2)) -+ -+/* -+** ptrue_mla_2_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mla_2_f16_x_untied, svfloat16_t, -+ z0 = svmla_n_f16_x (svptrue_b16 (), z1, z2, 2), -+ z0 = svmla_x (svptrue_b16 (), z1, z2, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_f32.c -new file mode 100644 -index 000000000..1d95eb0a7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_f32.c -@@ -0,0 +1,398 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mla_f32_m_tied1: -+** fmla z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mla_f32_m_tied1, svfloat32_t, -+ z0 = svmla_f32_m (p0, z0, z1, z2), -+ z0 = svmla_m (p0, z0, z1, z2)) -+ -+/* -+** mla_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fmla z0\.s, p0/m, \1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mla_f32_m_tied2, svfloat32_t, -+ z0 = svmla_f32_m (p0, z1, z0, z2), -+ z0 = svmla_m (p0, z1, z0, z2)) -+ -+/* -+** mla_f32_m_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fmla z0\.s, p0/m, z2\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mla_f32_m_tied3, svfloat32_t, -+ z0 = svmla_f32_m (p0, z1, z2, z0), -+ z0 = svmla_m (p0, z1, z2, z0)) -+ -+/* -+** mla_f32_m_untied: -+** movprfx z0, z1 -+** fmla z0\.s, p0/m, z2\.s, z3\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mla_f32_m_untied, svfloat32_t, -+ z0 = svmla_f32_m (p0, z1, z2, z3), -+ z0 = svmla_m (p0, z1, z2, z3)) -+ -+/* -+** mla_s4_f32_m_tied1: -+** mov (z[0-9]+\.s), s4 -+** fmla z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mla_s4_f32_m_tied1, svfloat32_t, float, -+ z0 = svmla_n_f32_m (p0, z0, z1, d4), -+ z0 = svmla_m (p0, z0, z1, d4)) -+ -+/* -+** mla_s4_f32_m_untied: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0, z1 -+** fmla z0\.s, p0/m, z2\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mla_s4_f32_m_untied, svfloat32_t, float, -+ z0 = svmla_n_f32_m (p0, z1, z2, d4), -+ z0 = svmla_m (p0, z1, z2, d4)) -+ -+/* -+** mla_2_f32_m_tied1: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** fmla z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mla_2_f32_m_tied1, svfloat32_t, -+ z0 = svmla_n_f32_m (p0, z0, z1, 2), -+ z0 = svmla_m (p0, z0, z1, 2)) -+ -+/* -+** mla_2_f32_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** movprfx z0, z1 -+** fmla z0\.s, p0/m, z2\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mla_2_f32_m_untied, svfloat32_t, -+ z0 = svmla_n_f32_m (p0, z1, z2, 2), -+ z0 = svmla_m (p0, z1, z2, 2)) -+ -+/* -+** mla_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fmla z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mla_f32_z_tied1, svfloat32_t, -+ z0 = svmla_f32_z (p0, z0, z1, z2), -+ z0 = svmla_z (p0, z0, z1, z2)) -+ -+/* -+** mla_f32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** fmad z0\.s, p0/m, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mla_f32_z_tied2, svfloat32_t, -+ z0 = svmla_f32_z (p0, z1, z0, z2), -+ z0 = svmla_z (p0, z1, z0, z2)) -+ -+/* -+** mla_f32_z_tied3: -+** movprfx z0\.s, p0/z, z0\.s -+** fmad z0\.s, p0/m, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mla_f32_z_tied3, svfloat32_t, -+ z0 = svmla_f32_z (p0, z1, z2, z0), -+ z0 = svmla_z (p0, z1, z2, z0)) -+ -+/* -+** mla_f32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fmla z0\.s, p0/m, z2\.s, z3\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** fmad z0\.s, p0/m, z3\.s, z1\.s -+** | -+** movprfx z0\.s, p0/z, z3\.s -+** fmad z0\.s, p0/m, z2\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mla_f32_z_untied, svfloat32_t, -+ z0 = svmla_f32_z (p0, z1, z2, z3), -+ z0 = svmla_z (p0, z1, z2, z3)) -+ -+/* -+** mla_s4_f32_z_tied1: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0\.s, p0/z, z0\.s -+** fmla z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mla_s4_f32_z_tied1, svfloat32_t, float, -+ z0 = svmla_n_f32_z (p0, z0, z1, d4), -+ z0 = svmla_z (p0, z0, z1, d4)) -+ -+/* -+** mla_s4_f32_z_tied2: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0\.s, p0/z, z0\.s -+** fmad z0\.s, p0/m, \1, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZD (mla_s4_f32_z_tied2, svfloat32_t, float, -+ z0 = svmla_n_f32_z (p0, z1, z0, d4), -+ z0 = svmla_z (p0, z1, z0, d4)) -+ -+/* -+** mla_s4_f32_z_untied: -+** mov (z[0-9]+\.s), s4 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fmla z0\.s, p0/m, z2\.s, \1 -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** fmad z0\.s, p0/m, \1, z1\.s -+** | -+** movprfx z0\.s, p0/z, \1 -+** fmad z0\.s, p0/m, z2\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (mla_s4_f32_z_untied, svfloat32_t, float, -+ z0 = svmla_n_f32_z (p0, z1, z2, d4), -+ z0 = svmla_z (p0, z1, z2, d4)) -+ -+/* -+** mla_2_f32_z_tied1: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** movprfx z0\.s, p0/z, z0\.s -+** fmla z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mla_2_f32_z_tied1, svfloat32_t, -+ z0 = svmla_n_f32_z (p0, z0, z1, 2), -+ z0 = svmla_z (p0, z0, z1, 2)) -+ -+/* -+** mla_2_f32_z_tied2: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** movprfx z0\.s, p0/z, z0\.s -+** fmad z0\.s, p0/m, \1, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mla_2_f32_z_tied2, svfloat32_t, -+ z0 = svmla_n_f32_z (p0, z1, z0, 2), -+ z0 = svmla_z (p0, z1, z0, 2)) -+ -+/* -+** mla_2_f32_z_untied: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fmla z0\.s, p0/m, z2\.s, \1 -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** fmad z0\.s, p0/m, \1, z1\.s -+** | -+** movprfx z0\.s, p0/z, \1 -+** fmad z0\.s, p0/m, z2\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mla_2_f32_z_untied, svfloat32_t, -+ z0 = svmla_n_f32_z (p0, z1, z2, 2), -+ z0 = svmla_z (p0, z1, z2, 2)) -+ -+/* -+** mla_f32_x_tied1: -+** fmla z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mla_f32_x_tied1, svfloat32_t, -+ z0 = svmla_f32_x (p0, z0, z1, z2), -+ z0 = svmla_x (p0, z0, z1, z2)) -+ -+/* -+** mla_f32_x_tied2: -+** fmad z0\.s, p0/m, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mla_f32_x_tied2, svfloat32_t, -+ z0 = svmla_f32_x (p0, z1, z0, z2), -+ z0 = svmla_x (p0, z1, z0, z2)) -+ -+/* -+** mla_f32_x_tied3: -+** fmad z0\.s, p0/m, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mla_f32_x_tied3, svfloat32_t, -+ z0 = svmla_f32_x (p0, z1, z2, z0), -+ z0 = svmla_x (p0, z1, z2, z0)) -+ -+/* -+** mla_f32_x_untied: -+** ( -+** movprfx z0, z1 -+** fmla z0\.s, p0/m, z2\.s, z3\.s -+** | -+** movprfx z0, z2 -+** fmad z0\.s, p0/m, z3\.s, z1\.s -+** | -+** movprfx z0, z3 -+** fmad z0\.s, p0/m, z2\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mla_f32_x_untied, svfloat32_t, -+ z0 = svmla_f32_x (p0, z1, z2, z3), -+ z0 = svmla_x (p0, z1, z2, z3)) -+ -+/* -+** mla_s4_f32_x_tied1: -+** mov (z[0-9]+\.s), s4 -+** fmla z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mla_s4_f32_x_tied1, svfloat32_t, float, -+ z0 = svmla_n_f32_x (p0, z0, z1, d4), -+ z0 = svmla_x (p0, z0, z1, d4)) -+ -+/* -+** mla_s4_f32_x_tied2: -+** mov (z[0-9]+\.s), s4 -+** fmad z0\.s, p0/m, \1, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZD (mla_s4_f32_x_tied2, svfloat32_t, float, -+ z0 = svmla_n_f32_x (p0, z1, z0, d4), -+ z0 = svmla_x (p0, z1, z0, d4)) -+ -+/* -+** mla_s4_f32_x_untied: { xfail *-*-* } -+** mov z0\.s, s4 -+** fmad z0\.s, p0/m, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZD (mla_s4_f32_x_untied, svfloat32_t, float, -+ z0 = svmla_n_f32_x (p0, z1, z2, d4), -+ z0 = svmla_x (p0, z1, z2, d4)) -+ -+/* -+** mla_2_f32_x_tied1: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** fmla z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mla_2_f32_x_tied1, svfloat32_t, -+ z0 = svmla_n_f32_x (p0, z0, z1, 2), -+ z0 = svmla_x (p0, z0, z1, 2)) -+ -+/* -+** mla_2_f32_x_tied2: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** fmad z0\.s, p0/m, \1, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mla_2_f32_x_tied2, svfloat32_t, -+ z0 = svmla_n_f32_x (p0, z1, z0, 2), -+ z0 = svmla_x (p0, z1, z0, 2)) -+ -+/* -+** mla_2_f32_x_untied: -+** fmov z0\.s, #2\.0(?:e\+0)? -+** fmad z0\.s, p0/m, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mla_2_f32_x_untied, svfloat32_t, -+ z0 = svmla_n_f32_x (p0, z1, z2, 2), -+ z0 = svmla_x (p0, z1, z2, 2)) -+ -+/* -+** ptrue_mla_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mla_f32_x_tied1, svfloat32_t, -+ z0 = svmla_f32_x (svptrue_b32 (), z0, z1, z2), -+ z0 = svmla_x (svptrue_b32 (), z0, z1, z2)) -+ -+/* -+** ptrue_mla_f32_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mla_f32_x_tied2, svfloat32_t, -+ z0 = svmla_f32_x (svptrue_b32 (), z1, z0, z2), -+ z0 = svmla_x (svptrue_b32 (), z1, z0, z2)) -+ -+/* -+** ptrue_mla_f32_x_tied3: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mla_f32_x_tied3, svfloat32_t, -+ z0 = svmla_f32_x (svptrue_b32 (), z1, z2, z0), -+ z0 = svmla_x (svptrue_b32 (), z1, z2, z0)) -+ -+/* -+** ptrue_mla_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mla_f32_x_untied, svfloat32_t, -+ z0 = svmla_f32_x (svptrue_b32 (), z1, z2, z3), -+ z0 = svmla_x (svptrue_b32 (), z1, z2, z3)) -+ -+/* -+** ptrue_mla_2_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mla_2_f32_x_tied1, svfloat32_t, -+ z0 = svmla_n_f32_x (svptrue_b32 (), z0, z1, 2), -+ z0 = svmla_x (svptrue_b32 (), z0, z1, 2)) -+ -+/* -+** ptrue_mla_2_f32_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mla_2_f32_x_tied2, svfloat32_t, -+ z0 = svmla_n_f32_x (svptrue_b32 (), z1, z0, 2), -+ z0 = svmla_x (svptrue_b32 (), z1, z0, 2)) -+ -+/* -+** ptrue_mla_2_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mla_2_f32_x_untied, svfloat32_t, -+ z0 = svmla_n_f32_x (svptrue_b32 (), z1, z2, 2), -+ z0 = svmla_x (svptrue_b32 (), z1, z2, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_f64.c -new file mode 100644 -index 000000000..74fd29267 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_f64.c -@@ -0,0 +1,398 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mla_f64_m_tied1: -+** fmla z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mla_f64_m_tied1, svfloat64_t, -+ z0 = svmla_f64_m (p0, z0, z1, z2), -+ z0 = svmla_m (p0, z0, z1, z2)) -+ -+/* -+** mla_f64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fmla z0\.d, p0/m, \1, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mla_f64_m_tied2, svfloat64_t, -+ z0 = svmla_f64_m (p0, z1, z0, z2), -+ z0 = svmla_m (p0, z1, z0, z2)) -+ -+/* -+** mla_f64_m_tied3: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fmla z0\.d, p0/m, z2\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mla_f64_m_tied3, svfloat64_t, -+ z0 = svmla_f64_m (p0, z1, z2, z0), -+ z0 = svmla_m (p0, z1, z2, z0)) -+ -+/* -+** mla_f64_m_untied: -+** movprfx z0, z1 -+** fmla z0\.d, p0/m, z2\.d, z3\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mla_f64_m_untied, svfloat64_t, -+ z0 = svmla_f64_m (p0, z1, z2, z3), -+ z0 = svmla_m (p0, z1, z2, z3)) -+ -+/* -+** mla_d4_f64_m_tied1: -+** mov (z[0-9]+\.d), d4 -+** fmla z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mla_d4_f64_m_tied1, svfloat64_t, double, -+ z0 = svmla_n_f64_m (p0, z0, z1, d4), -+ z0 = svmla_m (p0, z0, z1, d4)) -+ -+/* -+** mla_d4_f64_m_untied: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0, z1 -+** fmla z0\.d, p0/m, z2\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mla_d4_f64_m_untied, svfloat64_t, double, -+ z0 = svmla_n_f64_m (p0, z1, z2, d4), -+ z0 = svmla_m (p0, z1, z2, d4)) -+ -+/* -+** mla_2_f64_m_tied1: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** fmla z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mla_2_f64_m_tied1, svfloat64_t, -+ z0 = svmla_n_f64_m (p0, z0, z1, 2), -+ z0 = svmla_m (p0, z0, z1, 2)) -+ -+/* -+** mla_2_f64_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** movprfx z0, z1 -+** fmla z0\.d, p0/m, z2\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mla_2_f64_m_untied, svfloat64_t, -+ z0 = svmla_n_f64_m (p0, z1, z2, 2), -+ z0 = svmla_m (p0, z1, z2, 2)) -+ -+/* -+** mla_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fmla z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mla_f64_z_tied1, svfloat64_t, -+ z0 = svmla_f64_z (p0, z0, z1, z2), -+ z0 = svmla_z (p0, z0, z1, z2)) -+ -+/* -+** mla_f64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** fmad z0\.d, p0/m, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mla_f64_z_tied2, svfloat64_t, -+ z0 = svmla_f64_z (p0, z1, z0, z2), -+ z0 = svmla_z (p0, z1, z0, z2)) -+ -+/* -+** mla_f64_z_tied3: -+** movprfx z0\.d, p0/z, z0\.d -+** fmad z0\.d, p0/m, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mla_f64_z_tied3, svfloat64_t, -+ z0 = svmla_f64_z (p0, z1, z2, z0), -+ z0 = svmla_z (p0, z1, z2, z0)) -+ -+/* -+** mla_f64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fmla z0\.d, p0/m, z2\.d, z3\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** fmad z0\.d, p0/m, z3\.d, z1\.d -+** | -+** movprfx z0\.d, p0/z, z3\.d -+** fmad z0\.d, p0/m, z2\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mla_f64_z_untied, svfloat64_t, -+ z0 = svmla_f64_z (p0, z1, z2, z3), -+ z0 = svmla_z (p0, z1, z2, z3)) -+ -+/* -+** mla_d4_f64_z_tied1: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0\.d, p0/z, z0\.d -+** fmla z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mla_d4_f64_z_tied1, svfloat64_t, double, -+ z0 = svmla_n_f64_z (p0, z0, z1, d4), -+ z0 = svmla_z (p0, z0, z1, d4)) -+ -+/* -+** mla_d4_f64_z_tied2: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0\.d, p0/z, z0\.d -+** fmad z0\.d, p0/m, \1, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZD (mla_d4_f64_z_tied2, svfloat64_t, double, -+ z0 = svmla_n_f64_z (p0, z1, z0, d4), -+ z0 = svmla_z (p0, z1, z0, d4)) -+ -+/* -+** mla_d4_f64_z_untied: -+** mov (z[0-9]+\.d), d4 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fmla z0\.d, p0/m, z2\.d, \1 -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** fmad z0\.d, p0/m, \1, z1\.d -+** | -+** movprfx z0\.d, p0/z, \1 -+** fmad z0\.d, p0/m, z2\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (mla_d4_f64_z_untied, svfloat64_t, double, -+ z0 = svmla_n_f64_z (p0, z1, z2, d4), -+ z0 = svmla_z (p0, z1, z2, d4)) -+ -+/* -+** mla_2_f64_z_tied1: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** movprfx z0\.d, p0/z, z0\.d -+** fmla z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mla_2_f64_z_tied1, svfloat64_t, -+ z0 = svmla_n_f64_z (p0, z0, z1, 2), -+ z0 = svmla_z (p0, z0, z1, 2)) -+ -+/* -+** mla_2_f64_z_tied2: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** movprfx z0\.d, p0/z, z0\.d -+** fmad z0\.d, p0/m, \1, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mla_2_f64_z_tied2, svfloat64_t, -+ z0 = svmla_n_f64_z (p0, z1, z0, 2), -+ z0 = svmla_z (p0, z1, z0, 2)) -+ -+/* -+** mla_2_f64_z_untied: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fmla z0\.d, p0/m, z2\.d, \1 -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** fmad z0\.d, p0/m, \1, z1\.d -+** | -+** movprfx z0\.d, p0/z, \1 -+** fmad z0\.d, p0/m, z2\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mla_2_f64_z_untied, svfloat64_t, -+ z0 = svmla_n_f64_z (p0, z1, z2, 2), -+ z0 = svmla_z (p0, z1, z2, 2)) -+ -+/* -+** mla_f64_x_tied1: -+** fmla z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mla_f64_x_tied1, svfloat64_t, -+ z0 = svmla_f64_x (p0, z0, z1, z2), -+ z0 = svmla_x (p0, z0, z1, z2)) -+ -+/* -+** mla_f64_x_tied2: -+** fmad z0\.d, p0/m, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mla_f64_x_tied2, svfloat64_t, -+ z0 = svmla_f64_x (p0, z1, z0, z2), -+ z0 = svmla_x (p0, z1, z0, z2)) -+ -+/* -+** mla_f64_x_tied3: -+** fmad z0\.d, p0/m, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mla_f64_x_tied3, svfloat64_t, -+ z0 = svmla_f64_x (p0, z1, z2, z0), -+ z0 = svmla_x (p0, z1, z2, z0)) -+ -+/* -+** mla_f64_x_untied: -+** ( -+** movprfx z0, z1 -+** fmla z0\.d, p0/m, z2\.d, z3\.d -+** | -+** movprfx z0, z2 -+** fmad z0\.d, p0/m, z3\.d, z1\.d -+** | -+** movprfx z0, z3 -+** fmad z0\.d, p0/m, z2\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mla_f64_x_untied, svfloat64_t, -+ z0 = svmla_f64_x (p0, z1, z2, z3), -+ z0 = svmla_x (p0, z1, z2, z3)) -+ -+/* -+** mla_d4_f64_x_tied1: -+** mov (z[0-9]+\.d), d4 -+** fmla z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mla_d4_f64_x_tied1, svfloat64_t, double, -+ z0 = svmla_n_f64_x (p0, z0, z1, d4), -+ z0 = svmla_x (p0, z0, z1, d4)) -+ -+/* -+** mla_d4_f64_x_tied2: -+** mov (z[0-9]+\.d), d4 -+** fmad z0\.d, p0/m, \1, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZD (mla_d4_f64_x_tied2, svfloat64_t, double, -+ z0 = svmla_n_f64_x (p0, z1, z0, d4), -+ z0 = svmla_x (p0, z1, z0, d4)) -+ -+/* -+** mla_d4_f64_x_untied: { xfail *-*-* } -+** mov z0\.d, d4 -+** fmad z0\.d, p0/m, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZD (mla_d4_f64_x_untied, svfloat64_t, double, -+ z0 = svmla_n_f64_x (p0, z1, z2, d4), -+ z0 = svmla_x (p0, z1, z2, d4)) -+ -+/* -+** mla_2_f64_x_tied1: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** fmla z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mla_2_f64_x_tied1, svfloat64_t, -+ z0 = svmla_n_f64_x (p0, z0, z1, 2), -+ z0 = svmla_x (p0, z0, z1, 2)) -+ -+/* -+** mla_2_f64_x_tied2: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** fmad z0\.d, p0/m, \1, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mla_2_f64_x_tied2, svfloat64_t, -+ z0 = svmla_n_f64_x (p0, z1, z0, 2), -+ z0 = svmla_x (p0, z1, z0, 2)) -+ -+/* -+** mla_2_f64_x_untied: -+** fmov z0\.d, #2\.0(?:e\+0)? -+** fmad z0\.d, p0/m, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mla_2_f64_x_untied, svfloat64_t, -+ z0 = svmla_n_f64_x (p0, z1, z2, 2), -+ z0 = svmla_x (p0, z1, z2, 2)) -+ -+/* -+** ptrue_mla_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mla_f64_x_tied1, svfloat64_t, -+ z0 = svmla_f64_x (svptrue_b64 (), z0, z1, z2), -+ z0 = svmla_x (svptrue_b64 (), z0, z1, z2)) -+ -+/* -+** ptrue_mla_f64_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mla_f64_x_tied2, svfloat64_t, -+ z0 = svmla_f64_x (svptrue_b64 (), z1, z0, z2), -+ z0 = svmla_x (svptrue_b64 (), z1, z0, z2)) -+ -+/* -+** ptrue_mla_f64_x_tied3: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mla_f64_x_tied3, svfloat64_t, -+ z0 = svmla_f64_x (svptrue_b64 (), z1, z2, z0), -+ z0 = svmla_x (svptrue_b64 (), z1, z2, z0)) -+ -+/* -+** ptrue_mla_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mla_f64_x_untied, svfloat64_t, -+ z0 = svmla_f64_x (svptrue_b64 (), z1, z2, z3), -+ z0 = svmla_x (svptrue_b64 (), z1, z2, z3)) -+ -+/* -+** ptrue_mla_2_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mla_2_f64_x_tied1, svfloat64_t, -+ z0 = svmla_n_f64_x (svptrue_b64 (), z0, z1, 2), -+ z0 = svmla_x (svptrue_b64 (), z0, z1, 2)) -+ -+/* -+** ptrue_mla_2_f64_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mla_2_f64_x_tied2, svfloat64_t, -+ z0 = svmla_n_f64_x (svptrue_b64 (), z1, z0, 2), -+ z0 = svmla_x (svptrue_b64 (), z1, z0, 2)) -+ -+/* -+** ptrue_mla_2_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mla_2_f64_x_untied, svfloat64_t, -+ z0 = svmla_n_f64_x (svptrue_b64 (), z1, z2, 2), -+ z0 = svmla_x (svptrue_b64 (), z1, z2, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_lane_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_lane_f16.c -new file mode 100644 -index 000000000..949e3bb47 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_lane_f16.c -@@ -0,0 +1,128 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mla_lane_0_f16_tied1: -+** fmla z0\.h, z1\.h, z2\.h\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (mla_lane_0_f16_tied1, svfloat16_t, -+ z0 = svmla_lane_f16 (z0, z1, z2, 0), -+ z0 = svmla_lane (z0, z1, z2, 0)) -+ -+/* -+** mla_lane_0_f16_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fmla z0\.h, \1\.h, z2\.h\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (mla_lane_0_f16_tied2, svfloat16_t, -+ z0 = svmla_lane_f16 (z1, z0, z2, 0), -+ z0 = svmla_lane (z1, z0, z2, 0)) -+ -+/* -+** mla_lane_0_f16_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fmla z0\.h, z2\.h, \1\.h\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (mla_lane_0_f16_tied3, svfloat16_t, -+ z0 = svmla_lane_f16 (z1, z2, z0, 0), -+ z0 = svmla_lane (z1, z2, z0, 0)) -+ -+/* -+** mla_lane_0_f16_untied: -+** movprfx z0, z1 -+** fmla z0\.h, z2\.h, z3\.h\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (mla_lane_0_f16_untied, svfloat16_t, -+ z0 = svmla_lane_f16 (z1, z2, z3, 0), -+ z0 = svmla_lane (z1, z2, z3, 0)) -+ -+/* -+** mla_lane_1_f16: -+** fmla z0\.h, z1\.h, z2\.h\[1\] -+** ret -+*/ -+TEST_UNIFORM_Z (mla_lane_1_f16, svfloat16_t, -+ z0 = svmla_lane_f16 (z0, z1, z2, 1), -+ z0 = svmla_lane (z0, z1, z2, 1)) -+ -+/* -+** mla_lane_2_f16: -+** fmla z0\.h, z1\.h, z2\.h\[2\] -+** ret -+*/ -+TEST_UNIFORM_Z (mla_lane_2_f16, svfloat16_t, -+ z0 = svmla_lane_f16 (z0, z1, z2, 2), -+ z0 = svmla_lane (z0, z1, z2, 2)) -+ -+/* -+** mla_lane_3_f16: -+** fmla z0\.h, z1\.h, z2\.h\[3\] -+** ret -+*/ -+TEST_UNIFORM_Z (mla_lane_3_f16, svfloat16_t, -+ z0 = svmla_lane_f16 (z0, z1, z2, 3), -+ z0 = svmla_lane (z0, z1, z2, 3)) -+ -+/* -+** mla_lane_4_f16: -+** fmla z0\.h, z1\.h, z2\.h\[4\] -+** ret -+*/ -+TEST_UNIFORM_Z (mla_lane_4_f16, svfloat16_t, -+ z0 = svmla_lane_f16 (z0, z1, z2, 4), -+ z0 = svmla_lane (z0, z1, z2, 4)) -+ -+/* -+** mla_lane_5_f16: -+** fmla z0\.h, z1\.h, z2\.h\[5\] -+** ret -+*/ -+TEST_UNIFORM_Z (mla_lane_5_f16, svfloat16_t, -+ z0 = svmla_lane_f16 (z0, z1, z2, 5), -+ z0 = svmla_lane (z0, z1, z2, 5)) -+ -+/* -+** mla_lane_6_f16: -+** fmla z0\.h, z1\.h, z2\.h\[6\] -+** ret -+*/ -+TEST_UNIFORM_Z (mla_lane_6_f16, svfloat16_t, -+ z0 = svmla_lane_f16 (z0, z1, z2, 6), -+ z0 = svmla_lane (z0, z1, z2, 6)) -+ -+/* -+** mla_lane_7_f16: -+** fmla z0\.h, z1\.h, z2\.h\[7\] -+** ret -+*/ -+TEST_UNIFORM_Z (mla_lane_7_f16, svfloat16_t, -+ z0 = svmla_lane_f16 (z0, z1, z2, 7), -+ z0 = svmla_lane (z0, z1, z2, 7)) -+ -+/* -+** mla_lane_z7_f16: -+** fmla z0\.h, z1\.h, z7\.h\[7\] -+** ret -+*/ -+TEST_DUAL_Z (mla_lane_z7_f16, svfloat16_t, svfloat16_t, -+ z0 = svmla_lane_f16 (z0, z1, z7, 7), -+ z0 = svmla_lane (z0, z1, z7, 7)) -+ -+/* -+** mla_lane_z8_f16: -+** str d8, \[sp, -16\]! -+** mov (z[0-7])\.d, z8\.d -+** fmla z0\.h, z1\.h, \1\.h\[7\] -+** ldr d8, \[sp\], 16 -+** ret -+*/ -+TEST_DUAL_LANE_REG (mla_lane_z8_f16, svfloat16_t, svfloat16_t, z8, -+ z0 = svmla_lane_f16 (z0, z1, z8, 7), -+ z0 = svmla_lane (z0, z1, z8, 7)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_lane_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_lane_f32.c -new file mode 100644 -index 000000000..d376532d6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_lane_f32.c -@@ -0,0 +1,92 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mla_lane_0_f32_tied1: -+** fmla z0\.s, z1\.s, z2\.s\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (mla_lane_0_f32_tied1, svfloat32_t, -+ z0 = svmla_lane_f32 (z0, z1, z2, 0), -+ z0 = svmla_lane (z0, z1, z2, 0)) -+ -+/* -+** mla_lane_0_f32_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fmla z0\.s, \1\.s, z2\.s\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (mla_lane_0_f32_tied2, svfloat32_t, -+ z0 = svmla_lane_f32 (z1, z0, z2, 0), -+ z0 = svmla_lane (z1, z0, z2, 0)) -+ -+/* -+** mla_lane_0_f32_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fmla z0\.s, z2\.s, \1\.s\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (mla_lane_0_f32_tied3, svfloat32_t, -+ z0 = svmla_lane_f32 (z1, z2, z0, 0), -+ z0 = svmla_lane (z1, z2, z0, 0)) -+ -+/* -+** mla_lane_0_f32_untied: -+** movprfx z0, z1 -+** fmla z0\.s, z2\.s, z3\.s\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (mla_lane_0_f32_untied, svfloat32_t, -+ z0 = svmla_lane_f32 (z1, z2, z3, 0), -+ z0 = svmla_lane (z1, z2, z3, 0)) -+ -+/* -+** mla_lane_1_f32: -+** fmla z0\.s, z1\.s, z2\.s\[1\] -+** ret -+*/ -+TEST_UNIFORM_Z (mla_lane_1_f32, svfloat32_t, -+ z0 = svmla_lane_f32 (z0, z1, z2, 1), -+ z0 = svmla_lane (z0, z1, z2, 1)) -+ -+/* -+** mla_lane_2_f32: -+** fmla z0\.s, z1\.s, z2\.s\[2\] -+** ret -+*/ -+TEST_UNIFORM_Z (mla_lane_2_f32, svfloat32_t, -+ z0 = svmla_lane_f32 (z0, z1, z2, 2), -+ z0 = svmla_lane (z0, z1, z2, 2)) -+ -+/* -+** mla_lane_3_f32: -+** fmla z0\.s, z1\.s, z2\.s\[3\] -+** ret -+*/ -+TEST_UNIFORM_Z (mla_lane_3_f32, svfloat32_t, -+ z0 = svmla_lane_f32 (z0, z1, z2, 3), -+ z0 = svmla_lane (z0, z1, z2, 3)) -+ -+/* -+** mla_lane_z7_f32: -+** fmla z0\.s, z1\.s, z7\.s\[3\] -+** ret -+*/ -+TEST_DUAL_Z (mla_lane_z7_f32, svfloat32_t, svfloat32_t, -+ z0 = svmla_lane_f32 (z0, z1, z7, 3), -+ z0 = svmla_lane (z0, z1, z7, 3)) -+ -+/* -+** mla_lane_z8_f32: -+** str d8, \[sp, -16\]! -+** mov (z[0-7])\.d, z8\.d -+** fmla z0\.s, z1\.s, \1\.s\[3\] -+** ldr d8, \[sp\], 16 -+** ret -+*/ -+TEST_DUAL_LANE_REG (mla_lane_z8_f32, svfloat32_t, svfloat32_t, z8, -+ z0 = svmla_lane_f32 (z0, z1, z8, 3), -+ z0 = svmla_lane (z0, z1, z8, 3)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_lane_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_lane_f64.c -new file mode 100644 -index 000000000..7c58a8a57 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_lane_f64.c -@@ -0,0 +1,83 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mla_lane_0_f64_tied1: -+** fmla z0\.d, z1\.d, z2\.d\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (mla_lane_0_f64_tied1, svfloat64_t, -+ z0 = svmla_lane_f64 (z0, z1, z2, 0), -+ z0 = svmla_lane (z0, z1, z2, 0)) -+ -+/* -+** mla_lane_0_f64_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fmla z0\.d, \1, z2\.d\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (mla_lane_0_f64_tied2, svfloat64_t, -+ z0 = svmla_lane_f64 (z1, z0, z2, 0), -+ z0 = svmla_lane (z1, z0, z2, 0)) -+ -+/* -+** mla_lane_0_f64_tied3: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fmla z0\.d, z2\.d, \1\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (mla_lane_0_f64_tied3, svfloat64_t, -+ z0 = svmla_lane_f64 (z1, z2, z0, 0), -+ z0 = svmla_lane (z1, z2, z0, 0)) -+ -+/* -+** mla_lane_0_f64_untied: -+** movprfx z0, z1 -+** fmla z0\.d, z2\.d, z3\.d\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (mla_lane_0_f64_untied, svfloat64_t, -+ z0 = svmla_lane_f64 (z1, z2, z3, 0), -+ z0 = svmla_lane (z1, z2, z3, 0)) -+ -+/* -+** mla_lane_1_f64: -+** fmla z0\.d, z1\.d, z2\.d\[1\] -+** ret -+*/ -+TEST_UNIFORM_Z (mla_lane_1_f64, svfloat64_t, -+ z0 = svmla_lane_f64 (z0, z1, z2, 1), -+ z0 = svmla_lane (z0, z1, z2, 1)) -+ -+/* -+** mla_lane_z7_f64: -+** fmla z0\.d, z1\.d, z7\.d\[1\] -+** ret -+*/ -+TEST_DUAL_Z (mla_lane_z7_f64, svfloat64_t, svfloat64_t, -+ z0 = svmla_lane_f64 (z0, z1, z7, 1), -+ z0 = svmla_lane (z0, z1, z7, 1)) -+ -+/* -+** mla_lane_z15_f64: -+** str d15, \[sp, -16\]! -+** fmla z0\.d, z1\.d, z15\.d\[1\] -+** ldr d15, \[sp\], 16 -+** ret -+*/ -+TEST_DUAL_LANE_REG (mla_lane_z15_f64, svfloat64_t, svfloat64_t, z15, -+ z0 = svmla_lane_f64 (z0, z1, z15, 1), -+ z0 = svmla_lane (z0, z1, z15, 1)) -+ -+/* -+** mla_lane_z16_f64: -+** mov (z[0-9]|z1[0-5])\.d, z16\.d -+** fmla z0\.d, z1\.d, \1\.d\[1\] -+** ret -+*/ -+TEST_DUAL_LANE_REG (mla_lane_z16_f64, svfloat64_t, svfloat64_t, z16, -+ z0 = svmla_lane_f64 (z0, z1, z16, 1), -+ z0 = svmla_lane (z0, z1, z16, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_s16.c -new file mode 100644 -index 000000000..f3ed191db ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_s16.c -@@ -0,0 +1,321 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mla_s16_m_tied1: -+** mla z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mla_s16_m_tied1, svint16_t, -+ z0 = svmla_s16_m (p0, z0, z1, z2), -+ z0 = svmla_m (p0, z0, z1, z2)) -+ -+/* -+** mla_s16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** mla z0\.h, p0/m, \1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mla_s16_m_tied2, svint16_t, -+ z0 = svmla_s16_m (p0, z1, z0, z2), -+ z0 = svmla_m (p0, z1, z0, z2)) -+ -+/* -+** mla_s16_m_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** mla z0\.h, p0/m, z2\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mla_s16_m_tied3, svint16_t, -+ z0 = svmla_s16_m (p0, z1, z2, z0), -+ z0 = svmla_m (p0, z1, z2, z0)) -+ -+/* -+** mla_s16_m_untied: -+** movprfx z0, z1 -+** mla z0\.h, p0/m, z2\.h, z3\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mla_s16_m_untied, svint16_t, -+ z0 = svmla_s16_m (p0, z1, z2, z3), -+ z0 = svmla_m (p0, z1, z2, z3)) -+ -+/* -+** mla_w0_s16_m_tied1: -+** mov (z[0-9]+\.h), w0 -+** mla z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_w0_s16_m_tied1, svint16_t, int16_t, -+ z0 = svmla_n_s16_m (p0, z0, z1, x0), -+ z0 = svmla_m (p0, z0, z1, x0)) -+ -+/* -+** mla_w0_s16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), w0 -+** movprfx z0, z1 -+** mla z0\.h, p0/m, z2\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_w0_s16_m_untied, svint16_t, int16_t, -+ z0 = svmla_n_s16_m (p0, z1, z2, x0), -+ z0 = svmla_m (p0, z1, z2, x0)) -+ -+/* -+** mla_11_s16_m_tied1: -+** mov (z[0-9]+\.h), #11 -+** mla z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_s16_m_tied1, svint16_t, -+ z0 = svmla_n_s16_m (p0, z0, z1, 11), -+ z0 = svmla_m (p0, z0, z1, 11)) -+ -+/* -+** mla_11_s16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), #11 -+** movprfx z0, z1 -+** mla z0\.h, p0/m, z2\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_s16_m_untied, svint16_t, -+ z0 = svmla_n_s16_m (p0, z1, z2, 11), -+ z0 = svmla_m (p0, z1, z2, 11)) -+ -+/* -+** mla_s16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** mla z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mla_s16_z_tied1, svint16_t, -+ z0 = svmla_s16_z (p0, z0, z1, z2), -+ z0 = svmla_z (p0, z0, z1, z2)) -+ -+/* -+** mla_s16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** mad z0\.h, p0/m, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mla_s16_z_tied2, svint16_t, -+ z0 = svmla_s16_z (p0, z1, z0, z2), -+ z0 = svmla_z (p0, z1, z0, z2)) -+ -+/* -+** mla_s16_z_tied3: -+** movprfx z0\.h, p0/z, z0\.h -+** mad z0\.h, p0/m, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mla_s16_z_tied3, svint16_t, -+ z0 = svmla_s16_z (p0, z1, z2, z0), -+ z0 = svmla_z (p0, z1, z2, z0)) -+ -+/* -+** mla_s16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** mla z0\.h, p0/m, z2\.h, z3\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** mad z0\.h, p0/m, z3\.h, z1\.h -+** | -+** movprfx z0\.h, p0/z, z3\.h -+** mad z0\.h, p0/m, z2\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mla_s16_z_untied, svint16_t, -+ z0 = svmla_s16_z (p0, z1, z2, z3), -+ z0 = svmla_z (p0, z1, z2, z3)) -+ -+/* -+** mla_w0_s16_z_tied1: -+** mov (z[0-9]+\.h), w0 -+** movprfx z0\.h, p0/z, z0\.h -+** mla z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_w0_s16_z_tied1, svint16_t, int16_t, -+ z0 = svmla_n_s16_z (p0, z0, z1, x0), -+ z0 = svmla_z (p0, z0, z1, x0)) -+ -+/* -+** mla_w0_s16_z_tied2: -+** mov (z[0-9]+\.h), w0 -+** movprfx z0\.h, p0/z, z0\.h -+** mad z0\.h, p0/m, \1, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_w0_s16_z_tied2, svint16_t, int16_t, -+ z0 = svmla_n_s16_z (p0, z1, z0, x0), -+ z0 = svmla_z (p0, z1, z0, x0)) -+ -+/* -+** mla_w0_s16_z_untied: -+** mov (z[0-9]+\.h), w0 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** mla z0\.h, p0/m, z2\.h, \1 -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** mad z0\.h, p0/m, \1, z1\.h -+** | -+** movprfx z0\.h, p0/z, \1 -+** mad z0\.h, p0/m, z2\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_w0_s16_z_untied, svint16_t, int16_t, -+ z0 = svmla_n_s16_z (p0, z1, z2, x0), -+ z0 = svmla_z (p0, z1, z2, x0)) -+ -+/* -+** mla_11_s16_z_tied1: -+** mov (z[0-9]+\.h), #11 -+** movprfx z0\.h, p0/z, z0\.h -+** mla z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_s16_z_tied1, svint16_t, -+ z0 = svmla_n_s16_z (p0, z0, z1, 11), -+ z0 = svmla_z (p0, z0, z1, 11)) -+ -+/* -+** mla_11_s16_z_tied2: -+** mov (z[0-9]+\.h), #11 -+** movprfx z0\.h, p0/z, z0\.h -+** mad z0\.h, p0/m, \1, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_s16_z_tied2, svint16_t, -+ z0 = svmla_n_s16_z (p0, z1, z0, 11), -+ z0 = svmla_z (p0, z1, z0, 11)) -+ -+/* -+** mla_11_s16_z_untied: -+** mov (z[0-9]+\.h), #11 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** mla z0\.h, p0/m, z2\.h, \1 -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** mad z0\.h, p0/m, \1, z1\.h -+** | -+** movprfx z0\.h, p0/z, \1 -+** mad z0\.h, p0/m, z2\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_s16_z_untied, svint16_t, -+ z0 = svmla_n_s16_z (p0, z1, z2, 11), -+ z0 = svmla_z (p0, z1, z2, 11)) -+ -+/* -+** mla_s16_x_tied1: -+** mla z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mla_s16_x_tied1, svint16_t, -+ z0 = svmla_s16_x (p0, z0, z1, z2), -+ z0 = svmla_x (p0, z0, z1, z2)) -+ -+/* -+** mla_s16_x_tied2: -+** mad z0\.h, p0/m, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mla_s16_x_tied2, svint16_t, -+ z0 = svmla_s16_x (p0, z1, z0, z2), -+ z0 = svmla_x (p0, z1, z0, z2)) -+ -+/* -+** mla_s16_x_tied3: -+** mad z0\.h, p0/m, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mla_s16_x_tied3, svint16_t, -+ z0 = svmla_s16_x (p0, z1, z2, z0), -+ z0 = svmla_x (p0, z1, z2, z0)) -+ -+/* -+** mla_s16_x_untied: -+** ( -+** movprfx z0, z1 -+** mla z0\.h, p0/m, z2\.h, z3\.h -+** | -+** movprfx z0, z2 -+** mad z0\.h, p0/m, z3\.h, z1\.h -+** | -+** movprfx z0, z3 -+** mad z0\.h, p0/m, z2\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mla_s16_x_untied, svint16_t, -+ z0 = svmla_s16_x (p0, z1, z2, z3), -+ z0 = svmla_x (p0, z1, z2, z3)) -+ -+/* -+** mla_w0_s16_x_tied1: -+** mov (z[0-9]+\.h), w0 -+** mla z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_w0_s16_x_tied1, svint16_t, int16_t, -+ z0 = svmla_n_s16_x (p0, z0, z1, x0), -+ z0 = svmla_x (p0, z0, z1, x0)) -+ -+/* -+** mla_w0_s16_x_tied2: -+** mov (z[0-9]+\.h), w0 -+** mad z0\.h, p0/m, \1, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_w0_s16_x_tied2, svint16_t, int16_t, -+ z0 = svmla_n_s16_x (p0, z1, z0, x0), -+ z0 = svmla_x (p0, z1, z0, x0)) -+ -+/* -+** mla_w0_s16_x_untied: -+** mov z0\.h, w0 -+** mad z0\.h, p0/m, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_w0_s16_x_untied, svint16_t, int16_t, -+ z0 = svmla_n_s16_x (p0, z1, z2, x0), -+ z0 = svmla_x (p0, z1, z2, x0)) -+ -+/* -+** mla_11_s16_x_tied1: -+** mov (z[0-9]+\.h), #11 -+** mla z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_s16_x_tied1, svint16_t, -+ z0 = svmla_n_s16_x (p0, z0, z1, 11), -+ z0 = svmla_x (p0, z0, z1, 11)) -+ -+/* -+** mla_11_s16_x_tied2: -+** mov (z[0-9]+\.h), #11 -+** mad z0\.h, p0/m, \1, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_s16_x_tied2, svint16_t, -+ z0 = svmla_n_s16_x (p0, z1, z0, 11), -+ z0 = svmla_x (p0, z1, z0, 11)) -+ -+/* -+** mla_11_s16_x_untied: -+** mov z0\.h, #11 -+** mad z0\.h, p0/m, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_s16_x_untied, svint16_t, -+ z0 = svmla_n_s16_x (p0, z1, z2, 11), -+ z0 = svmla_x (p0, z1, z2, 11)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_s32.c -new file mode 100644 -index 000000000..5e8001a71 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_s32.c -@@ -0,0 +1,321 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mla_s32_m_tied1: -+** mla z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mla_s32_m_tied1, svint32_t, -+ z0 = svmla_s32_m (p0, z0, z1, z2), -+ z0 = svmla_m (p0, z0, z1, z2)) -+ -+/* -+** mla_s32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** mla z0\.s, p0/m, \1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mla_s32_m_tied2, svint32_t, -+ z0 = svmla_s32_m (p0, z1, z0, z2), -+ z0 = svmla_m (p0, z1, z0, z2)) -+ -+/* -+** mla_s32_m_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** mla z0\.s, p0/m, z2\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mla_s32_m_tied3, svint32_t, -+ z0 = svmla_s32_m (p0, z1, z2, z0), -+ z0 = svmla_m (p0, z1, z2, z0)) -+ -+/* -+** mla_s32_m_untied: -+** movprfx z0, z1 -+** mla z0\.s, p0/m, z2\.s, z3\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mla_s32_m_untied, svint32_t, -+ z0 = svmla_s32_m (p0, z1, z2, z3), -+ z0 = svmla_m (p0, z1, z2, z3)) -+ -+/* -+** mla_w0_s32_m_tied1: -+** mov (z[0-9]+\.s), w0 -+** mla z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_w0_s32_m_tied1, svint32_t, int32_t, -+ z0 = svmla_n_s32_m (p0, z0, z1, x0), -+ z0 = svmla_m (p0, z0, z1, x0)) -+ -+/* -+** mla_w0_s32_m_untied: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0, z1 -+** mla z0\.s, p0/m, z2\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_w0_s32_m_untied, svint32_t, int32_t, -+ z0 = svmla_n_s32_m (p0, z1, z2, x0), -+ z0 = svmla_m (p0, z1, z2, x0)) -+ -+/* -+** mla_11_s32_m_tied1: -+** mov (z[0-9]+\.s), #11 -+** mla z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_s32_m_tied1, svint32_t, -+ z0 = svmla_n_s32_m (p0, z0, z1, 11), -+ z0 = svmla_m (p0, z0, z1, 11)) -+ -+/* -+** mla_11_s32_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.s), #11 -+** movprfx z0, z1 -+** mla z0\.s, p0/m, z2\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_s32_m_untied, svint32_t, -+ z0 = svmla_n_s32_m (p0, z1, z2, 11), -+ z0 = svmla_m (p0, z1, z2, 11)) -+ -+/* -+** mla_s32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** mla z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mla_s32_z_tied1, svint32_t, -+ z0 = svmla_s32_z (p0, z0, z1, z2), -+ z0 = svmla_z (p0, z0, z1, z2)) -+ -+/* -+** mla_s32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** mad z0\.s, p0/m, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mla_s32_z_tied2, svint32_t, -+ z0 = svmla_s32_z (p0, z1, z0, z2), -+ z0 = svmla_z (p0, z1, z0, z2)) -+ -+/* -+** mla_s32_z_tied3: -+** movprfx z0\.s, p0/z, z0\.s -+** mad z0\.s, p0/m, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mla_s32_z_tied3, svint32_t, -+ z0 = svmla_s32_z (p0, z1, z2, z0), -+ z0 = svmla_z (p0, z1, z2, z0)) -+ -+/* -+** mla_s32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** mla z0\.s, p0/m, z2\.s, z3\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** mad z0\.s, p0/m, z3\.s, z1\.s -+** | -+** movprfx z0\.s, p0/z, z3\.s -+** mad z0\.s, p0/m, z2\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mla_s32_z_untied, svint32_t, -+ z0 = svmla_s32_z (p0, z1, z2, z3), -+ z0 = svmla_z (p0, z1, z2, z3)) -+ -+/* -+** mla_w0_s32_z_tied1: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** mla z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_w0_s32_z_tied1, svint32_t, int32_t, -+ z0 = svmla_n_s32_z (p0, z0, z1, x0), -+ z0 = svmla_z (p0, z0, z1, x0)) -+ -+/* -+** mla_w0_s32_z_tied2: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** mad z0\.s, p0/m, \1, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_w0_s32_z_tied2, svint32_t, int32_t, -+ z0 = svmla_n_s32_z (p0, z1, z0, x0), -+ z0 = svmla_z (p0, z1, z0, x0)) -+ -+/* -+** mla_w0_s32_z_untied: -+** mov (z[0-9]+\.s), w0 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** mla z0\.s, p0/m, z2\.s, \1 -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** mad z0\.s, p0/m, \1, z1\.s -+** | -+** movprfx z0\.s, p0/z, \1 -+** mad z0\.s, p0/m, z2\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_w0_s32_z_untied, svint32_t, int32_t, -+ z0 = svmla_n_s32_z (p0, z1, z2, x0), -+ z0 = svmla_z (p0, z1, z2, x0)) -+ -+/* -+** mla_11_s32_z_tied1: -+** mov (z[0-9]+\.s), #11 -+** movprfx z0\.s, p0/z, z0\.s -+** mla z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_s32_z_tied1, svint32_t, -+ z0 = svmla_n_s32_z (p0, z0, z1, 11), -+ z0 = svmla_z (p0, z0, z1, 11)) -+ -+/* -+** mla_11_s32_z_tied2: -+** mov (z[0-9]+\.s), #11 -+** movprfx z0\.s, p0/z, z0\.s -+** mad z0\.s, p0/m, \1, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_s32_z_tied2, svint32_t, -+ z0 = svmla_n_s32_z (p0, z1, z0, 11), -+ z0 = svmla_z (p0, z1, z0, 11)) -+ -+/* -+** mla_11_s32_z_untied: -+** mov (z[0-9]+\.s), #11 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** mla z0\.s, p0/m, z2\.s, \1 -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** mad z0\.s, p0/m, \1, z1\.s -+** | -+** movprfx z0\.s, p0/z, \1 -+** mad z0\.s, p0/m, z2\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_s32_z_untied, svint32_t, -+ z0 = svmla_n_s32_z (p0, z1, z2, 11), -+ z0 = svmla_z (p0, z1, z2, 11)) -+ -+/* -+** mla_s32_x_tied1: -+** mla z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mla_s32_x_tied1, svint32_t, -+ z0 = svmla_s32_x (p0, z0, z1, z2), -+ z0 = svmla_x (p0, z0, z1, z2)) -+ -+/* -+** mla_s32_x_tied2: -+** mad z0\.s, p0/m, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mla_s32_x_tied2, svint32_t, -+ z0 = svmla_s32_x (p0, z1, z0, z2), -+ z0 = svmla_x (p0, z1, z0, z2)) -+ -+/* -+** mla_s32_x_tied3: -+** mad z0\.s, p0/m, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mla_s32_x_tied3, svint32_t, -+ z0 = svmla_s32_x (p0, z1, z2, z0), -+ z0 = svmla_x (p0, z1, z2, z0)) -+ -+/* -+** mla_s32_x_untied: -+** ( -+** movprfx z0, z1 -+** mla z0\.s, p0/m, z2\.s, z3\.s -+** | -+** movprfx z0, z2 -+** mad z0\.s, p0/m, z3\.s, z1\.s -+** | -+** movprfx z0, z3 -+** mad z0\.s, p0/m, z2\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mla_s32_x_untied, svint32_t, -+ z0 = svmla_s32_x (p0, z1, z2, z3), -+ z0 = svmla_x (p0, z1, z2, z3)) -+ -+/* -+** mla_w0_s32_x_tied1: -+** mov (z[0-9]+\.s), w0 -+** mla z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_w0_s32_x_tied1, svint32_t, int32_t, -+ z0 = svmla_n_s32_x (p0, z0, z1, x0), -+ z0 = svmla_x (p0, z0, z1, x0)) -+ -+/* -+** mla_w0_s32_x_tied2: -+** mov (z[0-9]+\.s), w0 -+** mad z0\.s, p0/m, \1, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_w0_s32_x_tied2, svint32_t, int32_t, -+ z0 = svmla_n_s32_x (p0, z1, z0, x0), -+ z0 = svmla_x (p0, z1, z0, x0)) -+ -+/* -+** mla_w0_s32_x_untied: -+** mov z0\.s, w0 -+** mad z0\.s, p0/m, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_w0_s32_x_untied, svint32_t, int32_t, -+ z0 = svmla_n_s32_x (p0, z1, z2, x0), -+ z0 = svmla_x (p0, z1, z2, x0)) -+ -+/* -+** mla_11_s32_x_tied1: -+** mov (z[0-9]+\.s), #11 -+** mla z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_s32_x_tied1, svint32_t, -+ z0 = svmla_n_s32_x (p0, z0, z1, 11), -+ z0 = svmla_x (p0, z0, z1, 11)) -+ -+/* -+** mla_11_s32_x_tied2: -+** mov (z[0-9]+\.s), #11 -+** mad z0\.s, p0/m, \1, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_s32_x_tied2, svint32_t, -+ z0 = svmla_n_s32_x (p0, z1, z0, 11), -+ z0 = svmla_x (p0, z1, z0, 11)) -+ -+/* -+** mla_11_s32_x_untied: -+** mov z0\.s, #11 -+** mad z0\.s, p0/m, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_s32_x_untied, svint32_t, -+ z0 = svmla_n_s32_x (p0, z1, z2, 11), -+ z0 = svmla_x (p0, z1, z2, 11)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_s64.c -new file mode 100644 -index 000000000..7b619e521 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_s64.c -@@ -0,0 +1,321 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mla_s64_m_tied1: -+** mla z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mla_s64_m_tied1, svint64_t, -+ z0 = svmla_s64_m (p0, z0, z1, z2), -+ z0 = svmla_m (p0, z0, z1, z2)) -+ -+/* -+** mla_s64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** mla z0\.d, p0/m, \1, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mla_s64_m_tied2, svint64_t, -+ z0 = svmla_s64_m (p0, z1, z0, z2), -+ z0 = svmla_m (p0, z1, z0, z2)) -+ -+/* -+** mla_s64_m_tied3: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** mla z0\.d, p0/m, z2\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mla_s64_m_tied3, svint64_t, -+ z0 = svmla_s64_m (p0, z1, z2, z0), -+ z0 = svmla_m (p0, z1, z2, z0)) -+ -+/* -+** mla_s64_m_untied: -+** movprfx z0, z1 -+** mla z0\.d, p0/m, z2\.d, z3\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mla_s64_m_untied, svint64_t, -+ z0 = svmla_s64_m (p0, z1, z2, z3), -+ z0 = svmla_m (p0, z1, z2, z3)) -+ -+/* -+** mla_x0_s64_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** mla z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_x0_s64_m_tied1, svint64_t, int64_t, -+ z0 = svmla_n_s64_m (p0, z0, z1, x0), -+ z0 = svmla_m (p0, z0, z1, x0)) -+ -+/* -+** mla_x0_s64_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** mla z0\.d, p0/m, z2\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_x0_s64_m_untied, svint64_t, int64_t, -+ z0 = svmla_n_s64_m (p0, z1, z2, x0), -+ z0 = svmla_m (p0, z1, z2, x0)) -+ -+/* -+** mla_11_s64_m_tied1: -+** mov (z[0-9]+\.d), #11 -+** mla z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_s64_m_tied1, svint64_t, -+ z0 = svmla_n_s64_m (p0, z0, z1, 11), -+ z0 = svmla_m (p0, z0, z1, 11)) -+ -+/* -+** mla_11_s64_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #11 -+** movprfx z0, z1 -+** mla z0\.d, p0/m, z2\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_s64_m_untied, svint64_t, -+ z0 = svmla_n_s64_m (p0, z1, z2, 11), -+ z0 = svmla_m (p0, z1, z2, 11)) -+ -+/* -+** mla_s64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** mla z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mla_s64_z_tied1, svint64_t, -+ z0 = svmla_s64_z (p0, z0, z1, z2), -+ z0 = svmla_z (p0, z0, z1, z2)) -+ -+/* -+** mla_s64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** mad z0\.d, p0/m, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mla_s64_z_tied2, svint64_t, -+ z0 = svmla_s64_z (p0, z1, z0, z2), -+ z0 = svmla_z (p0, z1, z0, z2)) -+ -+/* -+** mla_s64_z_tied3: -+** movprfx z0\.d, p0/z, z0\.d -+** mad z0\.d, p0/m, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mla_s64_z_tied3, svint64_t, -+ z0 = svmla_s64_z (p0, z1, z2, z0), -+ z0 = svmla_z (p0, z1, z2, z0)) -+ -+/* -+** mla_s64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** mla z0\.d, p0/m, z2\.d, z3\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** mad z0\.d, p0/m, z3\.d, z1\.d -+** | -+** movprfx z0\.d, p0/z, z3\.d -+** mad z0\.d, p0/m, z2\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mla_s64_z_untied, svint64_t, -+ z0 = svmla_s64_z (p0, z1, z2, z3), -+ z0 = svmla_z (p0, z1, z2, z3)) -+ -+/* -+** mla_x0_s64_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** mla z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_x0_s64_z_tied1, svint64_t, int64_t, -+ z0 = svmla_n_s64_z (p0, z0, z1, x0), -+ z0 = svmla_z (p0, z0, z1, x0)) -+ -+/* -+** mla_x0_s64_z_tied2: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** mad z0\.d, p0/m, \1, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_x0_s64_z_tied2, svint64_t, int64_t, -+ z0 = svmla_n_s64_z (p0, z1, z0, x0), -+ z0 = svmla_z (p0, z1, z0, x0)) -+ -+/* -+** mla_x0_s64_z_untied: -+** mov (z[0-9]+\.d), x0 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** mla z0\.d, p0/m, z2\.d, \1 -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** mad z0\.d, p0/m, \1, z1\.d -+** | -+** movprfx z0\.d, p0/z, \1 -+** mad z0\.d, p0/m, z2\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_x0_s64_z_untied, svint64_t, int64_t, -+ z0 = svmla_n_s64_z (p0, z1, z2, x0), -+ z0 = svmla_z (p0, z1, z2, x0)) -+ -+/* -+** mla_11_s64_z_tied1: -+** mov (z[0-9]+\.d), #11 -+** movprfx z0\.d, p0/z, z0\.d -+** mla z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_s64_z_tied1, svint64_t, -+ z0 = svmla_n_s64_z (p0, z0, z1, 11), -+ z0 = svmla_z (p0, z0, z1, 11)) -+ -+/* -+** mla_11_s64_z_tied2: -+** mov (z[0-9]+\.d), #11 -+** movprfx z0\.d, p0/z, z0\.d -+** mad z0\.d, p0/m, \1, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_s64_z_tied2, svint64_t, -+ z0 = svmla_n_s64_z (p0, z1, z0, 11), -+ z0 = svmla_z (p0, z1, z0, 11)) -+ -+/* -+** mla_11_s64_z_untied: -+** mov (z[0-9]+\.d), #11 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** mla z0\.d, p0/m, z2\.d, \1 -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** mad z0\.d, p0/m, \1, z1\.d -+** | -+** movprfx z0\.d, p0/z, \1 -+** mad z0\.d, p0/m, z2\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_s64_z_untied, svint64_t, -+ z0 = svmla_n_s64_z (p0, z1, z2, 11), -+ z0 = svmla_z (p0, z1, z2, 11)) -+ -+/* -+** mla_s64_x_tied1: -+** mla z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mla_s64_x_tied1, svint64_t, -+ z0 = svmla_s64_x (p0, z0, z1, z2), -+ z0 = svmla_x (p0, z0, z1, z2)) -+ -+/* -+** mla_s64_x_tied2: -+** mad z0\.d, p0/m, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mla_s64_x_tied2, svint64_t, -+ z0 = svmla_s64_x (p0, z1, z0, z2), -+ z0 = svmla_x (p0, z1, z0, z2)) -+ -+/* -+** mla_s64_x_tied3: -+** mad z0\.d, p0/m, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mla_s64_x_tied3, svint64_t, -+ z0 = svmla_s64_x (p0, z1, z2, z0), -+ z0 = svmla_x (p0, z1, z2, z0)) -+ -+/* -+** mla_s64_x_untied: -+** ( -+** movprfx z0, z1 -+** mla z0\.d, p0/m, z2\.d, z3\.d -+** | -+** movprfx z0, z2 -+** mad z0\.d, p0/m, z3\.d, z1\.d -+** | -+** movprfx z0, z3 -+** mad z0\.d, p0/m, z2\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mla_s64_x_untied, svint64_t, -+ z0 = svmla_s64_x (p0, z1, z2, z3), -+ z0 = svmla_x (p0, z1, z2, z3)) -+ -+/* -+** mla_x0_s64_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** mla z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_x0_s64_x_tied1, svint64_t, int64_t, -+ z0 = svmla_n_s64_x (p0, z0, z1, x0), -+ z0 = svmla_x (p0, z0, z1, x0)) -+ -+/* -+** mla_x0_s64_x_tied2: -+** mov (z[0-9]+\.d), x0 -+** mad z0\.d, p0/m, \1, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_x0_s64_x_tied2, svint64_t, int64_t, -+ z0 = svmla_n_s64_x (p0, z1, z0, x0), -+ z0 = svmla_x (p0, z1, z0, x0)) -+ -+/* -+** mla_x0_s64_x_untied: -+** mov z0\.d, x0 -+** mad z0\.d, p0/m, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_x0_s64_x_untied, svint64_t, int64_t, -+ z0 = svmla_n_s64_x (p0, z1, z2, x0), -+ z0 = svmla_x (p0, z1, z2, x0)) -+ -+/* -+** mla_11_s64_x_tied1: -+** mov (z[0-9]+\.d), #11 -+** mla z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_s64_x_tied1, svint64_t, -+ z0 = svmla_n_s64_x (p0, z0, z1, 11), -+ z0 = svmla_x (p0, z0, z1, 11)) -+ -+/* -+** mla_11_s64_x_tied2: -+** mov (z[0-9]+\.d), #11 -+** mad z0\.d, p0/m, \1, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_s64_x_tied2, svint64_t, -+ z0 = svmla_n_s64_x (p0, z1, z0, 11), -+ z0 = svmla_x (p0, z1, z0, 11)) -+ -+/* -+** mla_11_s64_x_untied: -+** mov z0\.d, #11 -+** mad z0\.d, p0/m, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_s64_x_untied, svint64_t, -+ z0 = svmla_n_s64_x (p0, z1, z2, 11), -+ z0 = svmla_x (p0, z1, z2, 11)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_s8.c -new file mode 100644 -index 000000000..47468947d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_s8.c -@@ -0,0 +1,321 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mla_s8_m_tied1: -+** mla z0\.b, p0/m, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mla_s8_m_tied1, svint8_t, -+ z0 = svmla_s8_m (p0, z0, z1, z2), -+ z0 = svmla_m (p0, z0, z1, z2)) -+ -+/* -+** mla_s8_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** mla z0\.b, p0/m, \1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mla_s8_m_tied2, svint8_t, -+ z0 = svmla_s8_m (p0, z1, z0, z2), -+ z0 = svmla_m (p0, z1, z0, z2)) -+ -+/* -+** mla_s8_m_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** mla z0\.b, p0/m, z2\.b, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mla_s8_m_tied3, svint8_t, -+ z0 = svmla_s8_m (p0, z1, z2, z0), -+ z0 = svmla_m (p0, z1, z2, z0)) -+ -+/* -+** mla_s8_m_untied: -+** movprfx z0, z1 -+** mla z0\.b, p0/m, z2\.b, z3\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mla_s8_m_untied, svint8_t, -+ z0 = svmla_s8_m (p0, z1, z2, z3), -+ z0 = svmla_m (p0, z1, z2, z3)) -+ -+/* -+** mla_w0_s8_m_tied1: -+** mov (z[0-9]+\.b), w0 -+** mla z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_w0_s8_m_tied1, svint8_t, int8_t, -+ z0 = svmla_n_s8_m (p0, z0, z1, x0), -+ z0 = svmla_m (p0, z0, z1, x0)) -+ -+/* -+** mla_w0_s8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), w0 -+** movprfx z0, z1 -+** mla z0\.b, p0/m, z2\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_w0_s8_m_untied, svint8_t, int8_t, -+ z0 = svmla_n_s8_m (p0, z1, z2, x0), -+ z0 = svmla_m (p0, z1, z2, x0)) -+ -+/* -+** mla_11_s8_m_tied1: -+** mov (z[0-9]+\.b), #11 -+** mla z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_s8_m_tied1, svint8_t, -+ z0 = svmla_n_s8_m (p0, z0, z1, 11), -+ z0 = svmla_m (p0, z0, z1, 11)) -+ -+/* -+** mla_11_s8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), #11 -+** movprfx z0, z1 -+** mla z0\.b, p0/m, z2\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_s8_m_untied, svint8_t, -+ z0 = svmla_n_s8_m (p0, z1, z2, 11), -+ z0 = svmla_m (p0, z1, z2, 11)) -+ -+/* -+** mla_s8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** mla z0\.b, p0/m, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mla_s8_z_tied1, svint8_t, -+ z0 = svmla_s8_z (p0, z0, z1, z2), -+ z0 = svmla_z (p0, z0, z1, z2)) -+ -+/* -+** mla_s8_z_tied2: -+** movprfx z0\.b, p0/z, z0\.b -+** mad z0\.b, p0/m, z2\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mla_s8_z_tied2, svint8_t, -+ z0 = svmla_s8_z (p0, z1, z0, z2), -+ z0 = svmla_z (p0, z1, z0, z2)) -+ -+/* -+** mla_s8_z_tied3: -+** movprfx z0\.b, p0/z, z0\.b -+** mad z0\.b, p0/m, z2\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mla_s8_z_tied3, svint8_t, -+ z0 = svmla_s8_z (p0, z1, z2, z0), -+ z0 = svmla_z (p0, z1, z2, z0)) -+ -+/* -+** mla_s8_z_untied: -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** mla z0\.b, p0/m, z2\.b, z3\.b -+** | -+** movprfx z0\.b, p0/z, z2\.b -+** mad z0\.b, p0/m, z3\.b, z1\.b -+** | -+** movprfx z0\.b, p0/z, z3\.b -+** mad z0\.b, p0/m, z2\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mla_s8_z_untied, svint8_t, -+ z0 = svmla_s8_z (p0, z1, z2, z3), -+ z0 = svmla_z (p0, z1, z2, z3)) -+ -+/* -+** mla_w0_s8_z_tied1: -+** mov (z[0-9]+\.b), w0 -+** movprfx z0\.b, p0/z, z0\.b -+** mla z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_w0_s8_z_tied1, svint8_t, int8_t, -+ z0 = svmla_n_s8_z (p0, z0, z1, x0), -+ z0 = svmla_z (p0, z0, z1, x0)) -+ -+/* -+** mla_w0_s8_z_tied2: -+** mov (z[0-9]+\.b), w0 -+** movprfx z0\.b, p0/z, z0\.b -+** mad z0\.b, p0/m, \1, z1\.b -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_w0_s8_z_tied2, svint8_t, int8_t, -+ z0 = svmla_n_s8_z (p0, z1, z0, x0), -+ z0 = svmla_z (p0, z1, z0, x0)) -+ -+/* -+** mla_w0_s8_z_untied: -+** mov (z[0-9]+\.b), w0 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** mla z0\.b, p0/m, z2\.b, \1 -+** | -+** movprfx z0\.b, p0/z, z2\.b -+** mad z0\.b, p0/m, \1, z1\.b -+** | -+** movprfx z0\.b, p0/z, \1 -+** mad z0\.b, p0/m, z2\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_w0_s8_z_untied, svint8_t, int8_t, -+ z0 = svmla_n_s8_z (p0, z1, z2, x0), -+ z0 = svmla_z (p0, z1, z2, x0)) -+ -+/* -+** mla_11_s8_z_tied1: -+** mov (z[0-9]+\.b), #11 -+** movprfx z0\.b, p0/z, z0\.b -+** mla z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_s8_z_tied1, svint8_t, -+ z0 = svmla_n_s8_z (p0, z0, z1, 11), -+ z0 = svmla_z (p0, z0, z1, 11)) -+ -+/* -+** mla_11_s8_z_tied2: -+** mov (z[0-9]+\.b), #11 -+** movprfx z0\.b, p0/z, z0\.b -+** mad z0\.b, p0/m, \1, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_s8_z_tied2, svint8_t, -+ z0 = svmla_n_s8_z (p0, z1, z0, 11), -+ z0 = svmla_z (p0, z1, z0, 11)) -+ -+/* -+** mla_11_s8_z_untied: -+** mov (z[0-9]+\.b), #11 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** mla z0\.b, p0/m, z2\.b, \1 -+** | -+** movprfx z0\.b, p0/z, z2\.b -+** mad z0\.b, p0/m, \1, z1\.b -+** | -+** movprfx z0\.b, p0/z, \1 -+** mad z0\.b, p0/m, z2\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_s8_z_untied, svint8_t, -+ z0 = svmla_n_s8_z (p0, z1, z2, 11), -+ z0 = svmla_z (p0, z1, z2, 11)) -+ -+/* -+** mla_s8_x_tied1: -+** mla z0\.b, p0/m, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mla_s8_x_tied1, svint8_t, -+ z0 = svmla_s8_x (p0, z0, z1, z2), -+ z0 = svmla_x (p0, z0, z1, z2)) -+ -+/* -+** mla_s8_x_tied2: -+** mad z0\.b, p0/m, z2\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mla_s8_x_tied2, svint8_t, -+ z0 = svmla_s8_x (p0, z1, z0, z2), -+ z0 = svmla_x (p0, z1, z0, z2)) -+ -+/* -+** mla_s8_x_tied3: -+** mad z0\.b, p0/m, z2\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mla_s8_x_tied3, svint8_t, -+ z0 = svmla_s8_x (p0, z1, z2, z0), -+ z0 = svmla_x (p0, z1, z2, z0)) -+ -+/* -+** mla_s8_x_untied: -+** ( -+** movprfx z0, z1 -+** mla z0\.b, p0/m, z2\.b, z3\.b -+** | -+** movprfx z0, z2 -+** mad z0\.b, p0/m, z3\.b, z1\.b -+** | -+** movprfx z0, z3 -+** mad z0\.b, p0/m, z2\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mla_s8_x_untied, svint8_t, -+ z0 = svmla_s8_x (p0, z1, z2, z3), -+ z0 = svmla_x (p0, z1, z2, z3)) -+ -+/* -+** mla_w0_s8_x_tied1: -+** mov (z[0-9]+\.b), w0 -+** mla z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_w0_s8_x_tied1, svint8_t, int8_t, -+ z0 = svmla_n_s8_x (p0, z0, z1, x0), -+ z0 = svmla_x (p0, z0, z1, x0)) -+ -+/* -+** mla_w0_s8_x_tied2: -+** mov (z[0-9]+\.b), w0 -+** mad z0\.b, p0/m, \1, z1\.b -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_w0_s8_x_tied2, svint8_t, int8_t, -+ z0 = svmla_n_s8_x (p0, z1, z0, x0), -+ z0 = svmla_x (p0, z1, z0, x0)) -+ -+/* -+** mla_w0_s8_x_untied: -+** mov z0\.b, w0 -+** mad z0\.b, p0/m, z2\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_w0_s8_x_untied, svint8_t, int8_t, -+ z0 = svmla_n_s8_x (p0, z1, z2, x0), -+ z0 = svmla_x (p0, z1, z2, x0)) -+ -+/* -+** mla_11_s8_x_tied1: -+** mov (z[0-9]+\.b), #11 -+** mla z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_s8_x_tied1, svint8_t, -+ z0 = svmla_n_s8_x (p0, z0, z1, 11), -+ z0 = svmla_x (p0, z0, z1, 11)) -+ -+/* -+** mla_11_s8_x_tied2: -+** mov (z[0-9]+\.b), #11 -+** mad z0\.b, p0/m, \1, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_s8_x_tied2, svint8_t, -+ z0 = svmla_n_s8_x (p0, z1, z0, 11), -+ z0 = svmla_x (p0, z1, z0, 11)) -+ -+/* -+** mla_11_s8_x_untied: -+** mov z0\.b, #11 -+** mad z0\.b, p0/m, z2\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_s8_x_untied, svint8_t, -+ z0 = svmla_n_s8_x (p0, z1, z2, 11), -+ z0 = svmla_x (p0, z1, z2, 11)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_u16.c -new file mode 100644 -index 000000000..7238e428f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_u16.c -@@ -0,0 +1,321 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mla_u16_m_tied1: -+** mla z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mla_u16_m_tied1, svuint16_t, -+ z0 = svmla_u16_m (p0, z0, z1, z2), -+ z0 = svmla_m (p0, z0, z1, z2)) -+ -+/* -+** mla_u16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** mla z0\.h, p0/m, \1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mla_u16_m_tied2, svuint16_t, -+ z0 = svmla_u16_m (p0, z1, z0, z2), -+ z0 = svmla_m (p0, z1, z0, z2)) -+ -+/* -+** mla_u16_m_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** mla z0\.h, p0/m, z2\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mla_u16_m_tied3, svuint16_t, -+ z0 = svmla_u16_m (p0, z1, z2, z0), -+ z0 = svmla_m (p0, z1, z2, z0)) -+ -+/* -+** mla_u16_m_untied: -+** movprfx z0, z1 -+** mla z0\.h, p0/m, z2\.h, z3\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mla_u16_m_untied, svuint16_t, -+ z0 = svmla_u16_m (p0, z1, z2, z3), -+ z0 = svmla_m (p0, z1, z2, z3)) -+ -+/* -+** mla_w0_u16_m_tied1: -+** mov (z[0-9]+\.h), w0 -+** mla z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_w0_u16_m_tied1, svuint16_t, uint16_t, -+ z0 = svmla_n_u16_m (p0, z0, z1, x0), -+ z0 = svmla_m (p0, z0, z1, x0)) -+ -+/* -+** mla_w0_u16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), w0 -+** movprfx z0, z1 -+** mla z0\.h, p0/m, z2\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_w0_u16_m_untied, svuint16_t, uint16_t, -+ z0 = svmla_n_u16_m (p0, z1, z2, x0), -+ z0 = svmla_m (p0, z1, z2, x0)) -+ -+/* -+** mla_11_u16_m_tied1: -+** mov (z[0-9]+\.h), #11 -+** mla z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_u16_m_tied1, svuint16_t, -+ z0 = svmla_n_u16_m (p0, z0, z1, 11), -+ z0 = svmla_m (p0, z0, z1, 11)) -+ -+/* -+** mla_11_u16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), #11 -+** movprfx z0, z1 -+** mla z0\.h, p0/m, z2\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_u16_m_untied, svuint16_t, -+ z0 = svmla_n_u16_m (p0, z1, z2, 11), -+ z0 = svmla_m (p0, z1, z2, 11)) -+ -+/* -+** mla_u16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** mla z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mla_u16_z_tied1, svuint16_t, -+ z0 = svmla_u16_z (p0, z0, z1, z2), -+ z0 = svmla_z (p0, z0, z1, z2)) -+ -+/* -+** mla_u16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** mad z0\.h, p0/m, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mla_u16_z_tied2, svuint16_t, -+ z0 = svmla_u16_z (p0, z1, z0, z2), -+ z0 = svmla_z (p0, z1, z0, z2)) -+ -+/* -+** mla_u16_z_tied3: -+** movprfx z0\.h, p0/z, z0\.h -+** mad z0\.h, p0/m, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mla_u16_z_tied3, svuint16_t, -+ z0 = svmla_u16_z (p0, z1, z2, z0), -+ z0 = svmla_z (p0, z1, z2, z0)) -+ -+/* -+** mla_u16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** mla z0\.h, p0/m, z2\.h, z3\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** mad z0\.h, p0/m, z3\.h, z1\.h -+** | -+** movprfx z0\.h, p0/z, z3\.h -+** mad z0\.h, p0/m, z2\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mla_u16_z_untied, svuint16_t, -+ z0 = svmla_u16_z (p0, z1, z2, z3), -+ z0 = svmla_z (p0, z1, z2, z3)) -+ -+/* -+** mla_w0_u16_z_tied1: -+** mov (z[0-9]+\.h), w0 -+** movprfx z0\.h, p0/z, z0\.h -+** mla z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_w0_u16_z_tied1, svuint16_t, uint16_t, -+ z0 = svmla_n_u16_z (p0, z0, z1, x0), -+ z0 = svmla_z (p0, z0, z1, x0)) -+ -+/* -+** mla_w0_u16_z_tied2: -+** mov (z[0-9]+\.h), w0 -+** movprfx z0\.h, p0/z, z0\.h -+** mad z0\.h, p0/m, \1, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_w0_u16_z_tied2, svuint16_t, uint16_t, -+ z0 = svmla_n_u16_z (p0, z1, z0, x0), -+ z0 = svmla_z (p0, z1, z0, x0)) -+ -+/* -+** mla_w0_u16_z_untied: -+** mov (z[0-9]+\.h), w0 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** mla z0\.h, p0/m, z2\.h, \1 -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** mad z0\.h, p0/m, \1, z1\.h -+** | -+** movprfx z0\.h, p0/z, \1 -+** mad z0\.h, p0/m, z2\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_w0_u16_z_untied, svuint16_t, uint16_t, -+ z0 = svmla_n_u16_z (p0, z1, z2, x0), -+ z0 = svmla_z (p0, z1, z2, x0)) -+ -+/* -+** mla_11_u16_z_tied1: -+** mov (z[0-9]+\.h), #11 -+** movprfx z0\.h, p0/z, z0\.h -+** mla z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_u16_z_tied1, svuint16_t, -+ z0 = svmla_n_u16_z (p0, z0, z1, 11), -+ z0 = svmla_z (p0, z0, z1, 11)) -+ -+/* -+** mla_11_u16_z_tied2: -+** mov (z[0-9]+\.h), #11 -+** movprfx z0\.h, p0/z, z0\.h -+** mad z0\.h, p0/m, \1, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_u16_z_tied2, svuint16_t, -+ z0 = svmla_n_u16_z (p0, z1, z0, 11), -+ z0 = svmla_z (p0, z1, z0, 11)) -+ -+/* -+** mla_11_u16_z_untied: -+** mov (z[0-9]+\.h), #11 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** mla z0\.h, p0/m, z2\.h, \1 -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** mad z0\.h, p0/m, \1, z1\.h -+** | -+** movprfx z0\.h, p0/z, \1 -+** mad z0\.h, p0/m, z2\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_u16_z_untied, svuint16_t, -+ z0 = svmla_n_u16_z (p0, z1, z2, 11), -+ z0 = svmla_z (p0, z1, z2, 11)) -+ -+/* -+** mla_u16_x_tied1: -+** mla z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mla_u16_x_tied1, svuint16_t, -+ z0 = svmla_u16_x (p0, z0, z1, z2), -+ z0 = svmla_x (p0, z0, z1, z2)) -+ -+/* -+** mla_u16_x_tied2: -+** mad z0\.h, p0/m, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mla_u16_x_tied2, svuint16_t, -+ z0 = svmla_u16_x (p0, z1, z0, z2), -+ z0 = svmla_x (p0, z1, z0, z2)) -+ -+/* -+** mla_u16_x_tied3: -+** mad z0\.h, p0/m, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mla_u16_x_tied3, svuint16_t, -+ z0 = svmla_u16_x (p0, z1, z2, z0), -+ z0 = svmla_x (p0, z1, z2, z0)) -+ -+/* -+** mla_u16_x_untied: -+** ( -+** movprfx z0, z1 -+** mla z0\.h, p0/m, z2\.h, z3\.h -+** | -+** movprfx z0, z2 -+** mad z0\.h, p0/m, z3\.h, z1\.h -+** | -+** movprfx z0, z3 -+** mad z0\.h, p0/m, z2\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mla_u16_x_untied, svuint16_t, -+ z0 = svmla_u16_x (p0, z1, z2, z3), -+ z0 = svmla_x (p0, z1, z2, z3)) -+ -+/* -+** mla_w0_u16_x_tied1: -+** mov (z[0-9]+\.h), w0 -+** mla z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_w0_u16_x_tied1, svuint16_t, uint16_t, -+ z0 = svmla_n_u16_x (p0, z0, z1, x0), -+ z0 = svmla_x (p0, z0, z1, x0)) -+ -+/* -+** mla_w0_u16_x_tied2: -+** mov (z[0-9]+\.h), w0 -+** mad z0\.h, p0/m, \1, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_w0_u16_x_tied2, svuint16_t, uint16_t, -+ z0 = svmla_n_u16_x (p0, z1, z0, x0), -+ z0 = svmla_x (p0, z1, z0, x0)) -+ -+/* -+** mla_w0_u16_x_untied: -+** mov z0\.h, w0 -+** mad z0\.h, p0/m, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_w0_u16_x_untied, svuint16_t, uint16_t, -+ z0 = svmla_n_u16_x (p0, z1, z2, x0), -+ z0 = svmla_x (p0, z1, z2, x0)) -+ -+/* -+** mla_11_u16_x_tied1: -+** mov (z[0-9]+\.h), #11 -+** mla z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_u16_x_tied1, svuint16_t, -+ z0 = svmla_n_u16_x (p0, z0, z1, 11), -+ z0 = svmla_x (p0, z0, z1, 11)) -+ -+/* -+** mla_11_u16_x_tied2: -+** mov (z[0-9]+\.h), #11 -+** mad z0\.h, p0/m, \1, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_u16_x_tied2, svuint16_t, -+ z0 = svmla_n_u16_x (p0, z1, z0, 11), -+ z0 = svmla_x (p0, z1, z0, 11)) -+ -+/* -+** mla_11_u16_x_untied: -+** mov z0\.h, #11 -+** mad z0\.h, p0/m, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_u16_x_untied, svuint16_t, -+ z0 = svmla_n_u16_x (p0, z1, z2, 11), -+ z0 = svmla_x (p0, z1, z2, 11)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_u32.c -new file mode 100644 -index 000000000..7a68bce3d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_u32.c -@@ -0,0 +1,321 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mla_u32_m_tied1: -+** mla z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mla_u32_m_tied1, svuint32_t, -+ z0 = svmla_u32_m (p0, z0, z1, z2), -+ z0 = svmla_m (p0, z0, z1, z2)) -+ -+/* -+** mla_u32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** mla z0\.s, p0/m, \1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mla_u32_m_tied2, svuint32_t, -+ z0 = svmla_u32_m (p0, z1, z0, z2), -+ z0 = svmla_m (p0, z1, z0, z2)) -+ -+/* -+** mla_u32_m_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** mla z0\.s, p0/m, z2\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mla_u32_m_tied3, svuint32_t, -+ z0 = svmla_u32_m (p0, z1, z2, z0), -+ z0 = svmla_m (p0, z1, z2, z0)) -+ -+/* -+** mla_u32_m_untied: -+** movprfx z0, z1 -+** mla z0\.s, p0/m, z2\.s, z3\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mla_u32_m_untied, svuint32_t, -+ z0 = svmla_u32_m (p0, z1, z2, z3), -+ z0 = svmla_m (p0, z1, z2, z3)) -+ -+/* -+** mla_w0_u32_m_tied1: -+** mov (z[0-9]+\.s), w0 -+** mla z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_w0_u32_m_tied1, svuint32_t, uint32_t, -+ z0 = svmla_n_u32_m (p0, z0, z1, x0), -+ z0 = svmla_m (p0, z0, z1, x0)) -+ -+/* -+** mla_w0_u32_m_untied: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0, z1 -+** mla z0\.s, p0/m, z2\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_w0_u32_m_untied, svuint32_t, uint32_t, -+ z0 = svmla_n_u32_m (p0, z1, z2, x0), -+ z0 = svmla_m (p0, z1, z2, x0)) -+ -+/* -+** mla_11_u32_m_tied1: -+** mov (z[0-9]+\.s), #11 -+** mla z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_u32_m_tied1, svuint32_t, -+ z0 = svmla_n_u32_m (p0, z0, z1, 11), -+ z0 = svmla_m (p0, z0, z1, 11)) -+ -+/* -+** mla_11_u32_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.s), #11 -+** movprfx z0, z1 -+** mla z0\.s, p0/m, z2\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_u32_m_untied, svuint32_t, -+ z0 = svmla_n_u32_m (p0, z1, z2, 11), -+ z0 = svmla_m (p0, z1, z2, 11)) -+ -+/* -+** mla_u32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** mla z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mla_u32_z_tied1, svuint32_t, -+ z0 = svmla_u32_z (p0, z0, z1, z2), -+ z0 = svmla_z (p0, z0, z1, z2)) -+ -+/* -+** mla_u32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** mad z0\.s, p0/m, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mla_u32_z_tied2, svuint32_t, -+ z0 = svmla_u32_z (p0, z1, z0, z2), -+ z0 = svmla_z (p0, z1, z0, z2)) -+ -+/* -+** mla_u32_z_tied3: -+** movprfx z0\.s, p0/z, z0\.s -+** mad z0\.s, p0/m, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mla_u32_z_tied3, svuint32_t, -+ z0 = svmla_u32_z (p0, z1, z2, z0), -+ z0 = svmla_z (p0, z1, z2, z0)) -+ -+/* -+** mla_u32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** mla z0\.s, p0/m, z2\.s, z3\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** mad z0\.s, p0/m, z3\.s, z1\.s -+** | -+** movprfx z0\.s, p0/z, z3\.s -+** mad z0\.s, p0/m, z2\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mla_u32_z_untied, svuint32_t, -+ z0 = svmla_u32_z (p0, z1, z2, z3), -+ z0 = svmla_z (p0, z1, z2, z3)) -+ -+/* -+** mla_w0_u32_z_tied1: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** mla z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_w0_u32_z_tied1, svuint32_t, uint32_t, -+ z0 = svmla_n_u32_z (p0, z0, z1, x0), -+ z0 = svmla_z (p0, z0, z1, x0)) -+ -+/* -+** mla_w0_u32_z_tied2: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** mad z0\.s, p0/m, \1, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_w0_u32_z_tied2, svuint32_t, uint32_t, -+ z0 = svmla_n_u32_z (p0, z1, z0, x0), -+ z0 = svmla_z (p0, z1, z0, x0)) -+ -+/* -+** mla_w0_u32_z_untied: -+** mov (z[0-9]+\.s), w0 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** mla z0\.s, p0/m, z2\.s, \1 -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** mad z0\.s, p0/m, \1, z1\.s -+** | -+** movprfx z0\.s, p0/z, \1 -+** mad z0\.s, p0/m, z2\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_w0_u32_z_untied, svuint32_t, uint32_t, -+ z0 = svmla_n_u32_z (p0, z1, z2, x0), -+ z0 = svmla_z (p0, z1, z2, x0)) -+ -+/* -+** mla_11_u32_z_tied1: -+** mov (z[0-9]+\.s), #11 -+** movprfx z0\.s, p0/z, z0\.s -+** mla z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_u32_z_tied1, svuint32_t, -+ z0 = svmla_n_u32_z (p0, z0, z1, 11), -+ z0 = svmla_z (p0, z0, z1, 11)) -+ -+/* -+** mla_11_u32_z_tied2: -+** mov (z[0-9]+\.s), #11 -+** movprfx z0\.s, p0/z, z0\.s -+** mad z0\.s, p0/m, \1, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_u32_z_tied2, svuint32_t, -+ z0 = svmla_n_u32_z (p0, z1, z0, 11), -+ z0 = svmla_z (p0, z1, z0, 11)) -+ -+/* -+** mla_11_u32_z_untied: -+** mov (z[0-9]+\.s), #11 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** mla z0\.s, p0/m, z2\.s, \1 -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** mad z0\.s, p0/m, \1, z1\.s -+** | -+** movprfx z0\.s, p0/z, \1 -+** mad z0\.s, p0/m, z2\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_u32_z_untied, svuint32_t, -+ z0 = svmla_n_u32_z (p0, z1, z2, 11), -+ z0 = svmla_z (p0, z1, z2, 11)) -+ -+/* -+** mla_u32_x_tied1: -+** mla z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mla_u32_x_tied1, svuint32_t, -+ z0 = svmla_u32_x (p0, z0, z1, z2), -+ z0 = svmla_x (p0, z0, z1, z2)) -+ -+/* -+** mla_u32_x_tied2: -+** mad z0\.s, p0/m, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mla_u32_x_tied2, svuint32_t, -+ z0 = svmla_u32_x (p0, z1, z0, z2), -+ z0 = svmla_x (p0, z1, z0, z2)) -+ -+/* -+** mla_u32_x_tied3: -+** mad z0\.s, p0/m, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mla_u32_x_tied3, svuint32_t, -+ z0 = svmla_u32_x (p0, z1, z2, z0), -+ z0 = svmla_x (p0, z1, z2, z0)) -+ -+/* -+** mla_u32_x_untied: -+** ( -+** movprfx z0, z1 -+** mla z0\.s, p0/m, z2\.s, z3\.s -+** | -+** movprfx z0, z2 -+** mad z0\.s, p0/m, z3\.s, z1\.s -+** | -+** movprfx z0, z3 -+** mad z0\.s, p0/m, z2\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mla_u32_x_untied, svuint32_t, -+ z0 = svmla_u32_x (p0, z1, z2, z3), -+ z0 = svmla_x (p0, z1, z2, z3)) -+ -+/* -+** mla_w0_u32_x_tied1: -+** mov (z[0-9]+\.s), w0 -+** mla z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_w0_u32_x_tied1, svuint32_t, uint32_t, -+ z0 = svmla_n_u32_x (p0, z0, z1, x0), -+ z0 = svmla_x (p0, z0, z1, x0)) -+ -+/* -+** mla_w0_u32_x_tied2: -+** mov (z[0-9]+\.s), w0 -+** mad z0\.s, p0/m, \1, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_w0_u32_x_tied2, svuint32_t, uint32_t, -+ z0 = svmla_n_u32_x (p0, z1, z0, x0), -+ z0 = svmla_x (p0, z1, z0, x0)) -+ -+/* -+** mla_w0_u32_x_untied: -+** mov z0\.s, w0 -+** mad z0\.s, p0/m, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_w0_u32_x_untied, svuint32_t, uint32_t, -+ z0 = svmla_n_u32_x (p0, z1, z2, x0), -+ z0 = svmla_x (p0, z1, z2, x0)) -+ -+/* -+** mla_11_u32_x_tied1: -+** mov (z[0-9]+\.s), #11 -+** mla z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_u32_x_tied1, svuint32_t, -+ z0 = svmla_n_u32_x (p0, z0, z1, 11), -+ z0 = svmla_x (p0, z0, z1, 11)) -+ -+/* -+** mla_11_u32_x_tied2: -+** mov (z[0-9]+\.s), #11 -+** mad z0\.s, p0/m, \1, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_u32_x_tied2, svuint32_t, -+ z0 = svmla_n_u32_x (p0, z1, z0, 11), -+ z0 = svmla_x (p0, z1, z0, 11)) -+ -+/* -+** mla_11_u32_x_untied: -+** mov z0\.s, #11 -+** mad z0\.s, p0/m, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_u32_x_untied, svuint32_t, -+ z0 = svmla_n_u32_x (p0, z1, z2, 11), -+ z0 = svmla_x (p0, z1, z2, 11)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_u64.c -new file mode 100644 -index 000000000..6233265c8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_u64.c -@@ -0,0 +1,321 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mla_u64_m_tied1: -+** mla z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mla_u64_m_tied1, svuint64_t, -+ z0 = svmla_u64_m (p0, z0, z1, z2), -+ z0 = svmla_m (p0, z0, z1, z2)) -+ -+/* -+** mla_u64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** mla z0\.d, p0/m, \1, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mla_u64_m_tied2, svuint64_t, -+ z0 = svmla_u64_m (p0, z1, z0, z2), -+ z0 = svmla_m (p0, z1, z0, z2)) -+ -+/* -+** mla_u64_m_tied3: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** mla z0\.d, p0/m, z2\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mla_u64_m_tied3, svuint64_t, -+ z0 = svmla_u64_m (p0, z1, z2, z0), -+ z0 = svmla_m (p0, z1, z2, z0)) -+ -+/* -+** mla_u64_m_untied: -+** movprfx z0, z1 -+** mla z0\.d, p0/m, z2\.d, z3\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mla_u64_m_untied, svuint64_t, -+ z0 = svmla_u64_m (p0, z1, z2, z3), -+ z0 = svmla_m (p0, z1, z2, z3)) -+ -+/* -+** mla_x0_u64_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** mla z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_x0_u64_m_tied1, svuint64_t, uint64_t, -+ z0 = svmla_n_u64_m (p0, z0, z1, x0), -+ z0 = svmla_m (p0, z0, z1, x0)) -+ -+/* -+** mla_x0_u64_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** mla z0\.d, p0/m, z2\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_x0_u64_m_untied, svuint64_t, uint64_t, -+ z0 = svmla_n_u64_m (p0, z1, z2, x0), -+ z0 = svmla_m (p0, z1, z2, x0)) -+ -+/* -+** mla_11_u64_m_tied1: -+** mov (z[0-9]+\.d), #11 -+** mla z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_u64_m_tied1, svuint64_t, -+ z0 = svmla_n_u64_m (p0, z0, z1, 11), -+ z0 = svmla_m (p0, z0, z1, 11)) -+ -+/* -+** mla_11_u64_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #11 -+** movprfx z0, z1 -+** mla z0\.d, p0/m, z2\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_u64_m_untied, svuint64_t, -+ z0 = svmla_n_u64_m (p0, z1, z2, 11), -+ z0 = svmla_m (p0, z1, z2, 11)) -+ -+/* -+** mla_u64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** mla z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mla_u64_z_tied1, svuint64_t, -+ z0 = svmla_u64_z (p0, z0, z1, z2), -+ z0 = svmla_z (p0, z0, z1, z2)) -+ -+/* -+** mla_u64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** mad z0\.d, p0/m, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mla_u64_z_tied2, svuint64_t, -+ z0 = svmla_u64_z (p0, z1, z0, z2), -+ z0 = svmla_z (p0, z1, z0, z2)) -+ -+/* -+** mla_u64_z_tied3: -+** movprfx z0\.d, p0/z, z0\.d -+** mad z0\.d, p0/m, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mla_u64_z_tied3, svuint64_t, -+ z0 = svmla_u64_z (p0, z1, z2, z0), -+ z0 = svmla_z (p0, z1, z2, z0)) -+ -+/* -+** mla_u64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** mla z0\.d, p0/m, z2\.d, z3\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** mad z0\.d, p0/m, z3\.d, z1\.d -+** | -+** movprfx z0\.d, p0/z, z3\.d -+** mad z0\.d, p0/m, z2\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mla_u64_z_untied, svuint64_t, -+ z0 = svmla_u64_z (p0, z1, z2, z3), -+ z0 = svmla_z (p0, z1, z2, z3)) -+ -+/* -+** mla_x0_u64_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** mla z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_x0_u64_z_tied1, svuint64_t, uint64_t, -+ z0 = svmla_n_u64_z (p0, z0, z1, x0), -+ z0 = svmla_z (p0, z0, z1, x0)) -+ -+/* -+** mla_x0_u64_z_tied2: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** mad z0\.d, p0/m, \1, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_x0_u64_z_tied2, svuint64_t, uint64_t, -+ z0 = svmla_n_u64_z (p0, z1, z0, x0), -+ z0 = svmla_z (p0, z1, z0, x0)) -+ -+/* -+** mla_x0_u64_z_untied: -+** mov (z[0-9]+\.d), x0 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** mla z0\.d, p0/m, z2\.d, \1 -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** mad z0\.d, p0/m, \1, z1\.d -+** | -+** movprfx z0\.d, p0/z, \1 -+** mad z0\.d, p0/m, z2\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_x0_u64_z_untied, svuint64_t, uint64_t, -+ z0 = svmla_n_u64_z (p0, z1, z2, x0), -+ z0 = svmla_z (p0, z1, z2, x0)) -+ -+/* -+** mla_11_u64_z_tied1: -+** mov (z[0-9]+\.d), #11 -+** movprfx z0\.d, p0/z, z0\.d -+** mla z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_u64_z_tied1, svuint64_t, -+ z0 = svmla_n_u64_z (p0, z0, z1, 11), -+ z0 = svmla_z (p0, z0, z1, 11)) -+ -+/* -+** mla_11_u64_z_tied2: -+** mov (z[0-9]+\.d), #11 -+** movprfx z0\.d, p0/z, z0\.d -+** mad z0\.d, p0/m, \1, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_u64_z_tied2, svuint64_t, -+ z0 = svmla_n_u64_z (p0, z1, z0, 11), -+ z0 = svmla_z (p0, z1, z0, 11)) -+ -+/* -+** mla_11_u64_z_untied: -+** mov (z[0-9]+\.d), #11 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** mla z0\.d, p0/m, z2\.d, \1 -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** mad z0\.d, p0/m, \1, z1\.d -+** | -+** movprfx z0\.d, p0/z, \1 -+** mad z0\.d, p0/m, z2\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_u64_z_untied, svuint64_t, -+ z0 = svmla_n_u64_z (p0, z1, z2, 11), -+ z0 = svmla_z (p0, z1, z2, 11)) -+ -+/* -+** mla_u64_x_tied1: -+** mla z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mla_u64_x_tied1, svuint64_t, -+ z0 = svmla_u64_x (p0, z0, z1, z2), -+ z0 = svmla_x (p0, z0, z1, z2)) -+ -+/* -+** mla_u64_x_tied2: -+** mad z0\.d, p0/m, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mla_u64_x_tied2, svuint64_t, -+ z0 = svmla_u64_x (p0, z1, z0, z2), -+ z0 = svmla_x (p0, z1, z0, z2)) -+ -+/* -+** mla_u64_x_tied3: -+** mad z0\.d, p0/m, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mla_u64_x_tied3, svuint64_t, -+ z0 = svmla_u64_x (p0, z1, z2, z0), -+ z0 = svmla_x (p0, z1, z2, z0)) -+ -+/* -+** mla_u64_x_untied: -+** ( -+** movprfx z0, z1 -+** mla z0\.d, p0/m, z2\.d, z3\.d -+** | -+** movprfx z0, z2 -+** mad z0\.d, p0/m, z3\.d, z1\.d -+** | -+** movprfx z0, z3 -+** mad z0\.d, p0/m, z2\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mla_u64_x_untied, svuint64_t, -+ z0 = svmla_u64_x (p0, z1, z2, z3), -+ z0 = svmla_x (p0, z1, z2, z3)) -+ -+/* -+** mla_x0_u64_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** mla z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_x0_u64_x_tied1, svuint64_t, uint64_t, -+ z0 = svmla_n_u64_x (p0, z0, z1, x0), -+ z0 = svmla_x (p0, z0, z1, x0)) -+ -+/* -+** mla_x0_u64_x_tied2: -+** mov (z[0-9]+\.d), x0 -+** mad z0\.d, p0/m, \1, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_x0_u64_x_tied2, svuint64_t, uint64_t, -+ z0 = svmla_n_u64_x (p0, z1, z0, x0), -+ z0 = svmla_x (p0, z1, z0, x0)) -+ -+/* -+** mla_x0_u64_x_untied: -+** mov z0\.d, x0 -+** mad z0\.d, p0/m, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_x0_u64_x_untied, svuint64_t, uint64_t, -+ z0 = svmla_n_u64_x (p0, z1, z2, x0), -+ z0 = svmla_x (p0, z1, z2, x0)) -+ -+/* -+** mla_11_u64_x_tied1: -+** mov (z[0-9]+\.d), #11 -+** mla z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_u64_x_tied1, svuint64_t, -+ z0 = svmla_n_u64_x (p0, z0, z1, 11), -+ z0 = svmla_x (p0, z0, z1, 11)) -+ -+/* -+** mla_11_u64_x_tied2: -+** mov (z[0-9]+\.d), #11 -+** mad z0\.d, p0/m, \1, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_u64_x_tied2, svuint64_t, -+ z0 = svmla_n_u64_x (p0, z1, z0, 11), -+ z0 = svmla_x (p0, z1, z0, 11)) -+ -+/* -+** mla_11_u64_x_untied: -+** mov z0\.d, #11 -+** mad z0\.d, p0/m, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_u64_x_untied, svuint64_t, -+ z0 = svmla_n_u64_x (p0, z1, z2, 11), -+ z0 = svmla_x (p0, z1, z2, 11)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_u8.c -new file mode 100644 -index 000000000..832ed4141 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mla_u8.c -@@ -0,0 +1,321 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mla_u8_m_tied1: -+** mla z0\.b, p0/m, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mla_u8_m_tied1, svuint8_t, -+ z0 = svmla_u8_m (p0, z0, z1, z2), -+ z0 = svmla_m (p0, z0, z1, z2)) -+ -+/* -+** mla_u8_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** mla z0\.b, p0/m, \1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mla_u8_m_tied2, svuint8_t, -+ z0 = svmla_u8_m (p0, z1, z0, z2), -+ z0 = svmla_m (p0, z1, z0, z2)) -+ -+/* -+** mla_u8_m_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** mla z0\.b, p0/m, z2\.b, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mla_u8_m_tied3, svuint8_t, -+ z0 = svmla_u8_m (p0, z1, z2, z0), -+ z0 = svmla_m (p0, z1, z2, z0)) -+ -+/* -+** mla_u8_m_untied: -+** movprfx z0, z1 -+** mla z0\.b, p0/m, z2\.b, z3\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mla_u8_m_untied, svuint8_t, -+ z0 = svmla_u8_m (p0, z1, z2, z3), -+ z0 = svmla_m (p0, z1, z2, z3)) -+ -+/* -+** mla_w0_u8_m_tied1: -+** mov (z[0-9]+\.b), w0 -+** mla z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_w0_u8_m_tied1, svuint8_t, uint8_t, -+ z0 = svmla_n_u8_m (p0, z0, z1, x0), -+ z0 = svmla_m (p0, z0, z1, x0)) -+ -+/* -+** mla_w0_u8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), w0 -+** movprfx z0, z1 -+** mla z0\.b, p0/m, z2\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_w0_u8_m_untied, svuint8_t, uint8_t, -+ z0 = svmla_n_u8_m (p0, z1, z2, x0), -+ z0 = svmla_m (p0, z1, z2, x0)) -+ -+/* -+** mla_11_u8_m_tied1: -+** mov (z[0-9]+\.b), #11 -+** mla z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_u8_m_tied1, svuint8_t, -+ z0 = svmla_n_u8_m (p0, z0, z1, 11), -+ z0 = svmla_m (p0, z0, z1, 11)) -+ -+/* -+** mla_11_u8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), #11 -+** movprfx z0, z1 -+** mla z0\.b, p0/m, z2\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_u8_m_untied, svuint8_t, -+ z0 = svmla_n_u8_m (p0, z1, z2, 11), -+ z0 = svmla_m (p0, z1, z2, 11)) -+ -+/* -+** mla_u8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** mla z0\.b, p0/m, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mla_u8_z_tied1, svuint8_t, -+ z0 = svmla_u8_z (p0, z0, z1, z2), -+ z0 = svmla_z (p0, z0, z1, z2)) -+ -+/* -+** mla_u8_z_tied2: -+** movprfx z0\.b, p0/z, z0\.b -+** mad z0\.b, p0/m, z2\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mla_u8_z_tied2, svuint8_t, -+ z0 = svmla_u8_z (p0, z1, z0, z2), -+ z0 = svmla_z (p0, z1, z0, z2)) -+ -+/* -+** mla_u8_z_tied3: -+** movprfx z0\.b, p0/z, z0\.b -+** mad z0\.b, p0/m, z2\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mla_u8_z_tied3, svuint8_t, -+ z0 = svmla_u8_z (p0, z1, z2, z0), -+ z0 = svmla_z (p0, z1, z2, z0)) -+ -+/* -+** mla_u8_z_untied: -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** mla z0\.b, p0/m, z2\.b, z3\.b -+** | -+** movprfx z0\.b, p0/z, z2\.b -+** mad z0\.b, p0/m, z3\.b, z1\.b -+** | -+** movprfx z0\.b, p0/z, z3\.b -+** mad z0\.b, p0/m, z2\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mla_u8_z_untied, svuint8_t, -+ z0 = svmla_u8_z (p0, z1, z2, z3), -+ z0 = svmla_z (p0, z1, z2, z3)) -+ -+/* -+** mla_w0_u8_z_tied1: -+** mov (z[0-9]+\.b), w0 -+** movprfx z0\.b, p0/z, z0\.b -+** mla z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_w0_u8_z_tied1, svuint8_t, uint8_t, -+ z0 = svmla_n_u8_z (p0, z0, z1, x0), -+ z0 = svmla_z (p0, z0, z1, x0)) -+ -+/* -+** mla_w0_u8_z_tied2: -+** mov (z[0-9]+\.b), w0 -+** movprfx z0\.b, p0/z, z0\.b -+** mad z0\.b, p0/m, \1, z1\.b -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_w0_u8_z_tied2, svuint8_t, uint8_t, -+ z0 = svmla_n_u8_z (p0, z1, z0, x0), -+ z0 = svmla_z (p0, z1, z0, x0)) -+ -+/* -+** mla_w0_u8_z_untied: -+** mov (z[0-9]+\.b), w0 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** mla z0\.b, p0/m, z2\.b, \1 -+** | -+** movprfx z0\.b, p0/z, z2\.b -+** mad z0\.b, p0/m, \1, z1\.b -+** | -+** movprfx z0\.b, p0/z, \1 -+** mad z0\.b, p0/m, z2\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_w0_u8_z_untied, svuint8_t, uint8_t, -+ z0 = svmla_n_u8_z (p0, z1, z2, x0), -+ z0 = svmla_z (p0, z1, z2, x0)) -+ -+/* -+** mla_11_u8_z_tied1: -+** mov (z[0-9]+\.b), #11 -+** movprfx z0\.b, p0/z, z0\.b -+** mla z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_u8_z_tied1, svuint8_t, -+ z0 = svmla_n_u8_z (p0, z0, z1, 11), -+ z0 = svmla_z (p0, z0, z1, 11)) -+ -+/* -+** mla_11_u8_z_tied2: -+** mov (z[0-9]+\.b), #11 -+** movprfx z0\.b, p0/z, z0\.b -+** mad z0\.b, p0/m, \1, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_u8_z_tied2, svuint8_t, -+ z0 = svmla_n_u8_z (p0, z1, z0, 11), -+ z0 = svmla_z (p0, z1, z0, 11)) -+ -+/* -+** mla_11_u8_z_untied: -+** mov (z[0-9]+\.b), #11 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** mla z0\.b, p0/m, z2\.b, \1 -+** | -+** movprfx z0\.b, p0/z, z2\.b -+** mad z0\.b, p0/m, \1, z1\.b -+** | -+** movprfx z0\.b, p0/z, \1 -+** mad z0\.b, p0/m, z2\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_u8_z_untied, svuint8_t, -+ z0 = svmla_n_u8_z (p0, z1, z2, 11), -+ z0 = svmla_z (p0, z1, z2, 11)) -+ -+/* -+** mla_u8_x_tied1: -+** mla z0\.b, p0/m, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mla_u8_x_tied1, svuint8_t, -+ z0 = svmla_u8_x (p0, z0, z1, z2), -+ z0 = svmla_x (p0, z0, z1, z2)) -+ -+/* -+** mla_u8_x_tied2: -+** mad z0\.b, p0/m, z2\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mla_u8_x_tied2, svuint8_t, -+ z0 = svmla_u8_x (p0, z1, z0, z2), -+ z0 = svmla_x (p0, z1, z0, z2)) -+ -+/* -+** mla_u8_x_tied3: -+** mad z0\.b, p0/m, z2\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mla_u8_x_tied3, svuint8_t, -+ z0 = svmla_u8_x (p0, z1, z2, z0), -+ z0 = svmla_x (p0, z1, z2, z0)) -+ -+/* -+** mla_u8_x_untied: -+** ( -+** movprfx z0, z1 -+** mla z0\.b, p0/m, z2\.b, z3\.b -+** | -+** movprfx z0, z2 -+** mad z0\.b, p0/m, z3\.b, z1\.b -+** | -+** movprfx z0, z3 -+** mad z0\.b, p0/m, z2\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mla_u8_x_untied, svuint8_t, -+ z0 = svmla_u8_x (p0, z1, z2, z3), -+ z0 = svmla_x (p0, z1, z2, z3)) -+ -+/* -+** mla_w0_u8_x_tied1: -+** mov (z[0-9]+\.b), w0 -+** mla z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_w0_u8_x_tied1, svuint8_t, uint8_t, -+ z0 = svmla_n_u8_x (p0, z0, z1, x0), -+ z0 = svmla_x (p0, z0, z1, x0)) -+ -+/* -+** mla_w0_u8_x_tied2: -+** mov (z[0-9]+\.b), w0 -+** mad z0\.b, p0/m, \1, z1\.b -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_w0_u8_x_tied2, svuint8_t, uint8_t, -+ z0 = svmla_n_u8_x (p0, z1, z0, x0), -+ z0 = svmla_x (p0, z1, z0, x0)) -+ -+/* -+** mla_w0_u8_x_untied: -+** mov z0\.b, w0 -+** mad z0\.b, p0/m, z2\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_ZX (mla_w0_u8_x_untied, svuint8_t, uint8_t, -+ z0 = svmla_n_u8_x (p0, z1, z2, x0), -+ z0 = svmla_x (p0, z1, z2, x0)) -+ -+/* -+** mla_11_u8_x_tied1: -+** mov (z[0-9]+\.b), #11 -+** mla z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_u8_x_tied1, svuint8_t, -+ z0 = svmla_n_u8_x (p0, z0, z1, 11), -+ z0 = svmla_x (p0, z0, z1, 11)) -+ -+/* -+** mla_11_u8_x_tied2: -+** mov (z[0-9]+\.b), #11 -+** mad z0\.b, p0/m, \1, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_u8_x_tied2, svuint8_t, -+ z0 = svmla_n_u8_x (p0, z1, z0, 11), -+ z0 = svmla_x (p0, z1, z0, 11)) -+ -+/* -+** mla_11_u8_x_untied: -+** mov z0\.b, #11 -+** mad z0\.b, p0/m, z2\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mla_11_u8_x_untied, svuint8_t, -+ z0 = svmla_n_u8_x (p0, z1, z2, 11), -+ z0 = svmla_x (p0, z1, z2, 11)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_f16.c -new file mode 100644 -index 000000000..87fba3da7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_f16.c -@@ -0,0 +1,398 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mls_f16_m_tied1: -+** fmls z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mls_f16_m_tied1, svfloat16_t, -+ z0 = svmls_f16_m (p0, z0, z1, z2), -+ z0 = svmls_m (p0, z0, z1, z2)) -+ -+/* -+** mls_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fmls z0\.h, p0/m, \1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mls_f16_m_tied2, svfloat16_t, -+ z0 = svmls_f16_m (p0, z1, z0, z2), -+ z0 = svmls_m (p0, z1, z0, z2)) -+ -+/* -+** mls_f16_m_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fmls z0\.h, p0/m, z2\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mls_f16_m_tied3, svfloat16_t, -+ z0 = svmls_f16_m (p0, z1, z2, z0), -+ z0 = svmls_m (p0, z1, z2, z0)) -+ -+/* -+** mls_f16_m_untied: -+** movprfx z0, z1 -+** fmls z0\.h, p0/m, z2\.h, z3\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mls_f16_m_untied, svfloat16_t, -+ z0 = svmls_f16_m (p0, z1, z2, z3), -+ z0 = svmls_m (p0, z1, z2, z3)) -+ -+/* -+** mls_h4_f16_m_tied1: -+** mov (z[0-9]+\.h), h4 -+** fmls z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mls_h4_f16_m_tied1, svfloat16_t, __fp16, -+ z0 = svmls_n_f16_m (p0, z0, z1, d4), -+ z0 = svmls_m (p0, z0, z1, d4)) -+ -+/* -+** mls_h4_f16_m_untied: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0, z1 -+** fmls z0\.h, p0/m, z2\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mls_h4_f16_m_untied, svfloat16_t, __fp16, -+ z0 = svmls_n_f16_m (p0, z1, z2, d4), -+ z0 = svmls_m (p0, z1, z2, d4)) -+ -+/* -+** mls_2_f16_m_tied1: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** fmls z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mls_2_f16_m_tied1, svfloat16_t, -+ z0 = svmls_n_f16_m (p0, z0, z1, 2), -+ z0 = svmls_m (p0, z0, z1, 2)) -+ -+/* -+** mls_2_f16_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** movprfx z0, z1 -+** fmls z0\.h, p0/m, z2\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mls_2_f16_m_untied, svfloat16_t, -+ z0 = svmls_n_f16_m (p0, z1, z2, 2), -+ z0 = svmls_m (p0, z1, z2, 2)) -+ -+/* -+** mls_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fmls z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mls_f16_z_tied1, svfloat16_t, -+ z0 = svmls_f16_z (p0, z0, z1, z2), -+ z0 = svmls_z (p0, z0, z1, z2)) -+ -+/* -+** mls_f16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** fmsb z0\.h, p0/m, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mls_f16_z_tied2, svfloat16_t, -+ z0 = svmls_f16_z (p0, z1, z0, z2), -+ z0 = svmls_z (p0, z1, z0, z2)) -+ -+/* -+** mls_f16_z_tied3: -+** movprfx z0\.h, p0/z, z0\.h -+** fmsb z0\.h, p0/m, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mls_f16_z_tied3, svfloat16_t, -+ z0 = svmls_f16_z (p0, z1, z2, z0), -+ z0 = svmls_z (p0, z1, z2, z0)) -+ -+/* -+** mls_f16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fmls z0\.h, p0/m, z2\.h, z3\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** fmsb z0\.h, p0/m, z3\.h, z1\.h -+** | -+** movprfx z0\.h, p0/z, z3\.h -+** fmsb z0\.h, p0/m, z2\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mls_f16_z_untied, svfloat16_t, -+ z0 = svmls_f16_z (p0, z1, z2, z3), -+ z0 = svmls_z (p0, z1, z2, z3)) -+ -+/* -+** mls_h4_f16_z_tied1: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0\.h, p0/z, z0\.h -+** fmls z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mls_h4_f16_z_tied1, svfloat16_t, __fp16, -+ z0 = svmls_n_f16_z (p0, z0, z1, d4), -+ z0 = svmls_z (p0, z0, z1, d4)) -+ -+/* -+** mls_h4_f16_z_tied2: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0\.h, p0/z, z0\.h -+** fmsb z0\.h, p0/m, \1, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZD (mls_h4_f16_z_tied2, svfloat16_t, __fp16, -+ z0 = svmls_n_f16_z (p0, z1, z0, d4), -+ z0 = svmls_z (p0, z1, z0, d4)) -+ -+/* -+** mls_h4_f16_z_untied: -+** mov (z[0-9]+\.h), h4 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fmls z0\.h, p0/m, z2\.h, \1 -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** fmsb z0\.h, p0/m, \1, z1\.h -+** | -+** movprfx z0\.h, p0/z, \1 -+** fmsb z0\.h, p0/m, z2\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (mls_h4_f16_z_untied, svfloat16_t, __fp16, -+ z0 = svmls_n_f16_z (p0, z1, z2, d4), -+ z0 = svmls_z (p0, z1, z2, d4)) -+ -+/* -+** mls_2_f16_z_tied1: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** movprfx z0\.h, p0/z, z0\.h -+** fmls z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mls_2_f16_z_tied1, svfloat16_t, -+ z0 = svmls_n_f16_z (p0, z0, z1, 2), -+ z0 = svmls_z (p0, z0, z1, 2)) -+ -+/* -+** mls_2_f16_z_tied2: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** movprfx z0\.h, p0/z, z0\.h -+** fmsb z0\.h, p0/m, \1, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mls_2_f16_z_tied2, svfloat16_t, -+ z0 = svmls_n_f16_z (p0, z1, z0, 2), -+ z0 = svmls_z (p0, z1, z0, 2)) -+ -+/* -+** mls_2_f16_z_untied: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fmls z0\.h, p0/m, z2\.h, \1 -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** fmsb z0\.h, p0/m, \1, z1\.h -+** | -+** movprfx z0\.h, p0/z, \1 -+** fmsb z0\.h, p0/m, z2\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mls_2_f16_z_untied, svfloat16_t, -+ z0 = svmls_n_f16_z (p0, z1, z2, 2), -+ z0 = svmls_z (p0, z1, z2, 2)) -+ -+/* -+** mls_f16_x_tied1: -+** fmls z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mls_f16_x_tied1, svfloat16_t, -+ z0 = svmls_f16_x (p0, z0, z1, z2), -+ z0 = svmls_x (p0, z0, z1, z2)) -+ -+/* -+** mls_f16_x_tied2: -+** fmsb z0\.h, p0/m, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mls_f16_x_tied2, svfloat16_t, -+ z0 = svmls_f16_x (p0, z1, z0, z2), -+ z0 = svmls_x (p0, z1, z0, z2)) -+ -+/* -+** mls_f16_x_tied3: -+** fmsb z0\.h, p0/m, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mls_f16_x_tied3, svfloat16_t, -+ z0 = svmls_f16_x (p0, z1, z2, z0), -+ z0 = svmls_x (p0, z1, z2, z0)) -+ -+/* -+** mls_f16_x_untied: -+** ( -+** movprfx z0, z1 -+** fmls z0\.h, p0/m, z2\.h, z3\.h -+** | -+** movprfx z0, z2 -+** fmsb z0\.h, p0/m, z3\.h, z1\.h -+** | -+** movprfx z0, z3 -+** fmsb z0\.h, p0/m, z2\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mls_f16_x_untied, svfloat16_t, -+ z0 = svmls_f16_x (p0, z1, z2, z3), -+ z0 = svmls_x (p0, z1, z2, z3)) -+ -+/* -+** mls_h4_f16_x_tied1: -+** mov (z[0-9]+\.h), h4 -+** fmls z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mls_h4_f16_x_tied1, svfloat16_t, __fp16, -+ z0 = svmls_n_f16_x (p0, z0, z1, d4), -+ z0 = svmls_x (p0, z0, z1, d4)) -+ -+/* -+** mls_h4_f16_x_tied2: -+** mov (z[0-9]+\.h), h4 -+** fmsb z0\.h, p0/m, \1, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZD (mls_h4_f16_x_tied2, svfloat16_t, __fp16, -+ z0 = svmls_n_f16_x (p0, z1, z0, d4), -+ z0 = svmls_x (p0, z1, z0, d4)) -+ -+/* -+** mls_h4_f16_x_untied: { xfail *-*-* } -+** mov z0\.h, h4 -+** fmsb z0\.h, p0/m, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZD (mls_h4_f16_x_untied, svfloat16_t, __fp16, -+ z0 = svmls_n_f16_x (p0, z1, z2, d4), -+ z0 = svmls_x (p0, z1, z2, d4)) -+ -+/* -+** mls_2_f16_x_tied1: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** fmls z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mls_2_f16_x_tied1, svfloat16_t, -+ z0 = svmls_n_f16_x (p0, z0, z1, 2), -+ z0 = svmls_x (p0, z0, z1, 2)) -+ -+/* -+** mls_2_f16_x_tied2: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** fmsb z0\.h, p0/m, \1, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mls_2_f16_x_tied2, svfloat16_t, -+ z0 = svmls_n_f16_x (p0, z1, z0, 2), -+ z0 = svmls_x (p0, z1, z0, 2)) -+ -+/* -+** mls_2_f16_x_untied: -+** fmov z0\.h, #2\.0(?:e\+0)? -+** fmsb z0\.h, p0/m, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mls_2_f16_x_untied, svfloat16_t, -+ z0 = svmls_n_f16_x (p0, z1, z2, 2), -+ z0 = svmls_x (p0, z1, z2, 2)) -+ -+/* -+** ptrue_mls_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mls_f16_x_tied1, svfloat16_t, -+ z0 = svmls_f16_x (svptrue_b16 (), z0, z1, z2), -+ z0 = svmls_x (svptrue_b16 (), z0, z1, z2)) -+ -+/* -+** ptrue_mls_f16_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mls_f16_x_tied2, svfloat16_t, -+ z0 = svmls_f16_x (svptrue_b16 (), z1, z0, z2), -+ z0 = svmls_x (svptrue_b16 (), z1, z0, z2)) -+ -+/* -+** ptrue_mls_f16_x_tied3: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mls_f16_x_tied3, svfloat16_t, -+ z0 = svmls_f16_x (svptrue_b16 (), z1, z2, z0), -+ z0 = svmls_x (svptrue_b16 (), z1, z2, z0)) -+ -+/* -+** ptrue_mls_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mls_f16_x_untied, svfloat16_t, -+ z0 = svmls_f16_x (svptrue_b16 (), z1, z2, z3), -+ z0 = svmls_x (svptrue_b16 (), z1, z2, z3)) -+ -+/* -+** ptrue_mls_2_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mls_2_f16_x_tied1, svfloat16_t, -+ z0 = svmls_n_f16_x (svptrue_b16 (), z0, z1, 2), -+ z0 = svmls_x (svptrue_b16 (), z0, z1, 2)) -+ -+/* -+** ptrue_mls_2_f16_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mls_2_f16_x_tied2, svfloat16_t, -+ z0 = svmls_n_f16_x (svptrue_b16 (), z1, z0, 2), -+ z0 = svmls_x (svptrue_b16 (), z1, z0, 2)) -+ -+/* -+** ptrue_mls_2_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mls_2_f16_x_untied, svfloat16_t, -+ z0 = svmls_n_f16_x (svptrue_b16 (), z1, z2, 2), -+ z0 = svmls_x (svptrue_b16 (), z1, z2, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_f32.c -new file mode 100644 -index 000000000..04ce1ec46 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_f32.c -@@ -0,0 +1,398 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mls_f32_m_tied1: -+** fmls z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mls_f32_m_tied1, svfloat32_t, -+ z0 = svmls_f32_m (p0, z0, z1, z2), -+ z0 = svmls_m (p0, z0, z1, z2)) -+ -+/* -+** mls_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fmls z0\.s, p0/m, \1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mls_f32_m_tied2, svfloat32_t, -+ z0 = svmls_f32_m (p0, z1, z0, z2), -+ z0 = svmls_m (p0, z1, z0, z2)) -+ -+/* -+** mls_f32_m_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fmls z0\.s, p0/m, z2\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mls_f32_m_tied3, svfloat32_t, -+ z0 = svmls_f32_m (p0, z1, z2, z0), -+ z0 = svmls_m (p0, z1, z2, z0)) -+ -+/* -+** mls_f32_m_untied: -+** movprfx z0, z1 -+** fmls z0\.s, p0/m, z2\.s, z3\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mls_f32_m_untied, svfloat32_t, -+ z0 = svmls_f32_m (p0, z1, z2, z3), -+ z0 = svmls_m (p0, z1, z2, z3)) -+ -+/* -+** mls_s4_f32_m_tied1: -+** mov (z[0-9]+\.s), s4 -+** fmls z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mls_s4_f32_m_tied1, svfloat32_t, float, -+ z0 = svmls_n_f32_m (p0, z0, z1, d4), -+ z0 = svmls_m (p0, z0, z1, d4)) -+ -+/* -+** mls_s4_f32_m_untied: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0, z1 -+** fmls z0\.s, p0/m, z2\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mls_s4_f32_m_untied, svfloat32_t, float, -+ z0 = svmls_n_f32_m (p0, z1, z2, d4), -+ z0 = svmls_m (p0, z1, z2, d4)) -+ -+/* -+** mls_2_f32_m_tied1: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** fmls z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mls_2_f32_m_tied1, svfloat32_t, -+ z0 = svmls_n_f32_m (p0, z0, z1, 2), -+ z0 = svmls_m (p0, z0, z1, 2)) -+ -+/* -+** mls_2_f32_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** movprfx z0, z1 -+** fmls z0\.s, p0/m, z2\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mls_2_f32_m_untied, svfloat32_t, -+ z0 = svmls_n_f32_m (p0, z1, z2, 2), -+ z0 = svmls_m (p0, z1, z2, 2)) -+ -+/* -+** mls_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fmls z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mls_f32_z_tied1, svfloat32_t, -+ z0 = svmls_f32_z (p0, z0, z1, z2), -+ z0 = svmls_z (p0, z0, z1, z2)) -+ -+/* -+** mls_f32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** fmsb z0\.s, p0/m, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mls_f32_z_tied2, svfloat32_t, -+ z0 = svmls_f32_z (p0, z1, z0, z2), -+ z0 = svmls_z (p0, z1, z0, z2)) -+ -+/* -+** mls_f32_z_tied3: -+** movprfx z0\.s, p0/z, z0\.s -+** fmsb z0\.s, p0/m, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mls_f32_z_tied3, svfloat32_t, -+ z0 = svmls_f32_z (p0, z1, z2, z0), -+ z0 = svmls_z (p0, z1, z2, z0)) -+ -+/* -+** mls_f32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fmls z0\.s, p0/m, z2\.s, z3\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** fmsb z0\.s, p0/m, z3\.s, z1\.s -+** | -+** movprfx z0\.s, p0/z, z3\.s -+** fmsb z0\.s, p0/m, z2\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mls_f32_z_untied, svfloat32_t, -+ z0 = svmls_f32_z (p0, z1, z2, z3), -+ z0 = svmls_z (p0, z1, z2, z3)) -+ -+/* -+** mls_s4_f32_z_tied1: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0\.s, p0/z, z0\.s -+** fmls z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mls_s4_f32_z_tied1, svfloat32_t, float, -+ z0 = svmls_n_f32_z (p0, z0, z1, d4), -+ z0 = svmls_z (p0, z0, z1, d4)) -+ -+/* -+** mls_s4_f32_z_tied2: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0\.s, p0/z, z0\.s -+** fmsb z0\.s, p0/m, \1, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZD (mls_s4_f32_z_tied2, svfloat32_t, float, -+ z0 = svmls_n_f32_z (p0, z1, z0, d4), -+ z0 = svmls_z (p0, z1, z0, d4)) -+ -+/* -+** mls_s4_f32_z_untied: -+** mov (z[0-9]+\.s), s4 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fmls z0\.s, p0/m, z2\.s, \1 -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** fmsb z0\.s, p0/m, \1, z1\.s -+** | -+** movprfx z0\.s, p0/z, \1 -+** fmsb z0\.s, p0/m, z2\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (mls_s4_f32_z_untied, svfloat32_t, float, -+ z0 = svmls_n_f32_z (p0, z1, z2, d4), -+ z0 = svmls_z (p0, z1, z2, d4)) -+ -+/* -+** mls_2_f32_z_tied1: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** movprfx z0\.s, p0/z, z0\.s -+** fmls z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mls_2_f32_z_tied1, svfloat32_t, -+ z0 = svmls_n_f32_z (p0, z0, z1, 2), -+ z0 = svmls_z (p0, z0, z1, 2)) -+ -+/* -+** mls_2_f32_z_tied2: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** movprfx z0\.s, p0/z, z0\.s -+** fmsb z0\.s, p0/m, \1, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mls_2_f32_z_tied2, svfloat32_t, -+ z0 = svmls_n_f32_z (p0, z1, z0, 2), -+ z0 = svmls_z (p0, z1, z0, 2)) -+ -+/* -+** mls_2_f32_z_untied: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fmls z0\.s, p0/m, z2\.s, \1 -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** fmsb z0\.s, p0/m, \1, z1\.s -+** | -+** movprfx z0\.s, p0/z, \1 -+** fmsb z0\.s, p0/m, z2\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mls_2_f32_z_untied, svfloat32_t, -+ z0 = svmls_n_f32_z (p0, z1, z2, 2), -+ z0 = svmls_z (p0, z1, z2, 2)) -+ -+/* -+** mls_f32_x_tied1: -+** fmls z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mls_f32_x_tied1, svfloat32_t, -+ z0 = svmls_f32_x (p0, z0, z1, z2), -+ z0 = svmls_x (p0, z0, z1, z2)) -+ -+/* -+** mls_f32_x_tied2: -+** fmsb z0\.s, p0/m, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mls_f32_x_tied2, svfloat32_t, -+ z0 = svmls_f32_x (p0, z1, z0, z2), -+ z0 = svmls_x (p0, z1, z0, z2)) -+ -+/* -+** mls_f32_x_tied3: -+** fmsb z0\.s, p0/m, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mls_f32_x_tied3, svfloat32_t, -+ z0 = svmls_f32_x (p0, z1, z2, z0), -+ z0 = svmls_x (p0, z1, z2, z0)) -+ -+/* -+** mls_f32_x_untied: -+** ( -+** movprfx z0, z1 -+** fmls z0\.s, p0/m, z2\.s, z3\.s -+** | -+** movprfx z0, z2 -+** fmsb z0\.s, p0/m, z3\.s, z1\.s -+** | -+** movprfx z0, z3 -+** fmsb z0\.s, p0/m, z2\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mls_f32_x_untied, svfloat32_t, -+ z0 = svmls_f32_x (p0, z1, z2, z3), -+ z0 = svmls_x (p0, z1, z2, z3)) -+ -+/* -+** mls_s4_f32_x_tied1: -+** mov (z[0-9]+\.s), s4 -+** fmls z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mls_s4_f32_x_tied1, svfloat32_t, float, -+ z0 = svmls_n_f32_x (p0, z0, z1, d4), -+ z0 = svmls_x (p0, z0, z1, d4)) -+ -+/* -+** mls_s4_f32_x_tied2: -+** mov (z[0-9]+\.s), s4 -+** fmsb z0\.s, p0/m, \1, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZD (mls_s4_f32_x_tied2, svfloat32_t, float, -+ z0 = svmls_n_f32_x (p0, z1, z0, d4), -+ z0 = svmls_x (p0, z1, z0, d4)) -+ -+/* -+** mls_s4_f32_x_untied: { xfail *-*-* } -+** mov z0\.s, s4 -+** fmsb z0\.s, p0/m, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZD (mls_s4_f32_x_untied, svfloat32_t, float, -+ z0 = svmls_n_f32_x (p0, z1, z2, d4), -+ z0 = svmls_x (p0, z1, z2, d4)) -+ -+/* -+** mls_2_f32_x_tied1: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** fmls z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mls_2_f32_x_tied1, svfloat32_t, -+ z0 = svmls_n_f32_x (p0, z0, z1, 2), -+ z0 = svmls_x (p0, z0, z1, 2)) -+ -+/* -+** mls_2_f32_x_tied2: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** fmsb z0\.s, p0/m, \1, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mls_2_f32_x_tied2, svfloat32_t, -+ z0 = svmls_n_f32_x (p0, z1, z0, 2), -+ z0 = svmls_x (p0, z1, z0, 2)) -+ -+/* -+** mls_2_f32_x_untied: -+** fmov z0\.s, #2\.0(?:e\+0)? -+** fmsb z0\.s, p0/m, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mls_2_f32_x_untied, svfloat32_t, -+ z0 = svmls_n_f32_x (p0, z1, z2, 2), -+ z0 = svmls_x (p0, z1, z2, 2)) -+ -+/* -+** ptrue_mls_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mls_f32_x_tied1, svfloat32_t, -+ z0 = svmls_f32_x (svptrue_b32 (), z0, z1, z2), -+ z0 = svmls_x (svptrue_b32 (), z0, z1, z2)) -+ -+/* -+** ptrue_mls_f32_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mls_f32_x_tied2, svfloat32_t, -+ z0 = svmls_f32_x (svptrue_b32 (), z1, z0, z2), -+ z0 = svmls_x (svptrue_b32 (), z1, z0, z2)) -+ -+/* -+** ptrue_mls_f32_x_tied3: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mls_f32_x_tied3, svfloat32_t, -+ z0 = svmls_f32_x (svptrue_b32 (), z1, z2, z0), -+ z0 = svmls_x (svptrue_b32 (), z1, z2, z0)) -+ -+/* -+** ptrue_mls_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mls_f32_x_untied, svfloat32_t, -+ z0 = svmls_f32_x (svptrue_b32 (), z1, z2, z3), -+ z0 = svmls_x (svptrue_b32 (), z1, z2, z3)) -+ -+/* -+** ptrue_mls_2_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mls_2_f32_x_tied1, svfloat32_t, -+ z0 = svmls_n_f32_x (svptrue_b32 (), z0, z1, 2), -+ z0 = svmls_x (svptrue_b32 (), z0, z1, 2)) -+ -+/* -+** ptrue_mls_2_f32_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mls_2_f32_x_tied2, svfloat32_t, -+ z0 = svmls_n_f32_x (svptrue_b32 (), z1, z0, 2), -+ z0 = svmls_x (svptrue_b32 (), z1, z0, 2)) -+ -+/* -+** ptrue_mls_2_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mls_2_f32_x_untied, svfloat32_t, -+ z0 = svmls_n_f32_x (svptrue_b32 (), z1, z2, 2), -+ z0 = svmls_x (svptrue_b32 (), z1, z2, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_f64.c -new file mode 100644 -index 000000000..1e2108af6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_f64.c -@@ -0,0 +1,398 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mls_f64_m_tied1: -+** fmls z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mls_f64_m_tied1, svfloat64_t, -+ z0 = svmls_f64_m (p0, z0, z1, z2), -+ z0 = svmls_m (p0, z0, z1, z2)) -+ -+/* -+** mls_f64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fmls z0\.d, p0/m, \1, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mls_f64_m_tied2, svfloat64_t, -+ z0 = svmls_f64_m (p0, z1, z0, z2), -+ z0 = svmls_m (p0, z1, z0, z2)) -+ -+/* -+** mls_f64_m_tied3: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fmls z0\.d, p0/m, z2\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mls_f64_m_tied3, svfloat64_t, -+ z0 = svmls_f64_m (p0, z1, z2, z0), -+ z0 = svmls_m (p0, z1, z2, z0)) -+ -+/* -+** mls_f64_m_untied: -+** movprfx z0, z1 -+** fmls z0\.d, p0/m, z2\.d, z3\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mls_f64_m_untied, svfloat64_t, -+ z0 = svmls_f64_m (p0, z1, z2, z3), -+ z0 = svmls_m (p0, z1, z2, z3)) -+ -+/* -+** mls_d4_f64_m_tied1: -+** mov (z[0-9]+\.d), d4 -+** fmls z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mls_d4_f64_m_tied1, svfloat64_t, double, -+ z0 = svmls_n_f64_m (p0, z0, z1, d4), -+ z0 = svmls_m (p0, z0, z1, d4)) -+ -+/* -+** mls_d4_f64_m_untied: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0, z1 -+** fmls z0\.d, p0/m, z2\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mls_d4_f64_m_untied, svfloat64_t, double, -+ z0 = svmls_n_f64_m (p0, z1, z2, d4), -+ z0 = svmls_m (p0, z1, z2, d4)) -+ -+/* -+** mls_2_f64_m_tied1: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** fmls z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mls_2_f64_m_tied1, svfloat64_t, -+ z0 = svmls_n_f64_m (p0, z0, z1, 2), -+ z0 = svmls_m (p0, z0, z1, 2)) -+ -+/* -+** mls_2_f64_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** movprfx z0, z1 -+** fmls z0\.d, p0/m, z2\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mls_2_f64_m_untied, svfloat64_t, -+ z0 = svmls_n_f64_m (p0, z1, z2, 2), -+ z0 = svmls_m (p0, z1, z2, 2)) -+ -+/* -+** mls_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fmls z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mls_f64_z_tied1, svfloat64_t, -+ z0 = svmls_f64_z (p0, z0, z1, z2), -+ z0 = svmls_z (p0, z0, z1, z2)) -+ -+/* -+** mls_f64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** fmsb z0\.d, p0/m, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mls_f64_z_tied2, svfloat64_t, -+ z0 = svmls_f64_z (p0, z1, z0, z2), -+ z0 = svmls_z (p0, z1, z0, z2)) -+ -+/* -+** mls_f64_z_tied3: -+** movprfx z0\.d, p0/z, z0\.d -+** fmsb z0\.d, p0/m, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mls_f64_z_tied3, svfloat64_t, -+ z0 = svmls_f64_z (p0, z1, z2, z0), -+ z0 = svmls_z (p0, z1, z2, z0)) -+ -+/* -+** mls_f64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fmls z0\.d, p0/m, z2\.d, z3\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** fmsb z0\.d, p0/m, z3\.d, z1\.d -+** | -+** movprfx z0\.d, p0/z, z3\.d -+** fmsb z0\.d, p0/m, z2\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mls_f64_z_untied, svfloat64_t, -+ z0 = svmls_f64_z (p0, z1, z2, z3), -+ z0 = svmls_z (p0, z1, z2, z3)) -+ -+/* -+** mls_d4_f64_z_tied1: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0\.d, p0/z, z0\.d -+** fmls z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mls_d4_f64_z_tied1, svfloat64_t, double, -+ z0 = svmls_n_f64_z (p0, z0, z1, d4), -+ z0 = svmls_z (p0, z0, z1, d4)) -+ -+/* -+** mls_d4_f64_z_tied2: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0\.d, p0/z, z0\.d -+** fmsb z0\.d, p0/m, \1, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZD (mls_d4_f64_z_tied2, svfloat64_t, double, -+ z0 = svmls_n_f64_z (p0, z1, z0, d4), -+ z0 = svmls_z (p0, z1, z0, d4)) -+ -+/* -+** mls_d4_f64_z_untied: -+** mov (z[0-9]+\.d), d4 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fmls z0\.d, p0/m, z2\.d, \1 -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** fmsb z0\.d, p0/m, \1, z1\.d -+** | -+** movprfx z0\.d, p0/z, \1 -+** fmsb z0\.d, p0/m, z2\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (mls_d4_f64_z_untied, svfloat64_t, double, -+ z0 = svmls_n_f64_z (p0, z1, z2, d4), -+ z0 = svmls_z (p0, z1, z2, d4)) -+ -+/* -+** mls_2_f64_z_tied1: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** movprfx z0\.d, p0/z, z0\.d -+** fmls z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mls_2_f64_z_tied1, svfloat64_t, -+ z0 = svmls_n_f64_z (p0, z0, z1, 2), -+ z0 = svmls_z (p0, z0, z1, 2)) -+ -+/* -+** mls_2_f64_z_tied2: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** movprfx z0\.d, p0/z, z0\.d -+** fmsb z0\.d, p0/m, \1, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mls_2_f64_z_tied2, svfloat64_t, -+ z0 = svmls_n_f64_z (p0, z1, z0, 2), -+ z0 = svmls_z (p0, z1, z0, 2)) -+ -+/* -+** mls_2_f64_z_untied: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fmls z0\.d, p0/m, z2\.d, \1 -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** fmsb z0\.d, p0/m, \1, z1\.d -+** | -+** movprfx z0\.d, p0/z, \1 -+** fmsb z0\.d, p0/m, z2\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mls_2_f64_z_untied, svfloat64_t, -+ z0 = svmls_n_f64_z (p0, z1, z2, 2), -+ z0 = svmls_z (p0, z1, z2, 2)) -+ -+/* -+** mls_f64_x_tied1: -+** fmls z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mls_f64_x_tied1, svfloat64_t, -+ z0 = svmls_f64_x (p0, z0, z1, z2), -+ z0 = svmls_x (p0, z0, z1, z2)) -+ -+/* -+** mls_f64_x_tied2: -+** fmsb z0\.d, p0/m, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mls_f64_x_tied2, svfloat64_t, -+ z0 = svmls_f64_x (p0, z1, z0, z2), -+ z0 = svmls_x (p0, z1, z0, z2)) -+ -+/* -+** mls_f64_x_tied3: -+** fmsb z0\.d, p0/m, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mls_f64_x_tied3, svfloat64_t, -+ z0 = svmls_f64_x (p0, z1, z2, z0), -+ z0 = svmls_x (p0, z1, z2, z0)) -+ -+/* -+** mls_f64_x_untied: -+** ( -+** movprfx z0, z1 -+** fmls z0\.d, p0/m, z2\.d, z3\.d -+** | -+** movprfx z0, z2 -+** fmsb z0\.d, p0/m, z3\.d, z1\.d -+** | -+** movprfx z0, z3 -+** fmsb z0\.d, p0/m, z2\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mls_f64_x_untied, svfloat64_t, -+ z0 = svmls_f64_x (p0, z1, z2, z3), -+ z0 = svmls_x (p0, z1, z2, z3)) -+ -+/* -+** mls_d4_f64_x_tied1: -+** mov (z[0-9]+\.d), d4 -+** fmls z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mls_d4_f64_x_tied1, svfloat64_t, double, -+ z0 = svmls_n_f64_x (p0, z0, z1, d4), -+ z0 = svmls_x (p0, z0, z1, d4)) -+ -+/* -+** mls_d4_f64_x_tied2: -+** mov (z[0-9]+\.d), d4 -+** fmsb z0\.d, p0/m, \1, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZD (mls_d4_f64_x_tied2, svfloat64_t, double, -+ z0 = svmls_n_f64_x (p0, z1, z0, d4), -+ z0 = svmls_x (p0, z1, z0, d4)) -+ -+/* -+** mls_d4_f64_x_untied: { xfail *-*-* } -+** mov z0\.d, d4 -+** fmsb z0\.d, p0/m, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZD (mls_d4_f64_x_untied, svfloat64_t, double, -+ z0 = svmls_n_f64_x (p0, z1, z2, d4), -+ z0 = svmls_x (p0, z1, z2, d4)) -+ -+/* -+** mls_2_f64_x_tied1: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** fmls z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mls_2_f64_x_tied1, svfloat64_t, -+ z0 = svmls_n_f64_x (p0, z0, z1, 2), -+ z0 = svmls_x (p0, z0, z1, 2)) -+ -+/* -+** mls_2_f64_x_tied2: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** fmsb z0\.d, p0/m, \1, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mls_2_f64_x_tied2, svfloat64_t, -+ z0 = svmls_n_f64_x (p0, z1, z0, 2), -+ z0 = svmls_x (p0, z1, z0, 2)) -+ -+/* -+** mls_2_f64_x_untied: -+** fmov z0\.d, #2\.0(?:e\+0)? -+** fmsb z0\.d, p0/m, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mls_2_f64_x_untied, svfloat64_t, -+ z0 = svmls_n_f64_x (p0, z1, z2, 2), -+ z0 = svmls_x (p0, z1, z2, 2)) -+ -+/* -+** ptrue_mls_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mls_f64_x_tied1, svfloat64_t, -+ z0 = svmls_f64_x (svptrue_b64 (), z0, z1, z2), -+ z0 = svmls_x (svptrue_b64 (), z0, z1, z2)) -+ -+/* -+** ptrue_mls_f64_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mls_f64_x_tied2, svfloat64_t, -+ z0 = svmls_f64_x (svptrue_b64 (), z1, z0, z2), -+ z0 = svmls_x (svptrue_b64 (), z1, z0, z2)) -+ -+/* -+** ptrue_mls_f64_x_tied3: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mls_f64_x_tied3, svfloat64_t, -+ z0 = svmls_f64_x (svptrue_b64 (), z1, z2, z0), -+ z0 = svmls_x (svptrue_b64 (), z1, z2, z0)) -+ -+/* -+** ptrue_mls_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mls_f64_x_untied, svfloat64_t, -+ z0 = svmls_f64_x (svptrue_b64 (), z1, z2, z3), -+ z0 = svmls_x (svptrue_b64 (), z1, z2, z3)) -+ -+/* -+** ptrue_mls_2_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mls_2_f64_x_tied1, svfloat64_t, -+ z0 = svmls_n_f64_x (svptrue_b64 (), z0, z1, 2), -+ z0 = svmls_x (svptrue_b64 (), z0, z1, 2)) -+ -+/* -+** ptrue_mls_2_f64_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mls_2_f64_x_tied2, svfloat64_t, -+ z0 = svmls_n_f64_x (svptrue_b64 (), z1, z0, 2), -+ z0 = svmls_x (svptrue_b64 (), z1, z0, 2)) -+ -+/* -+** ptrue_mls_2_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mls_2_f64_x_untied, svfloat64_t, -+ z0 = svmls_n_f64_x (svptrue_b64 (), z1, z2, 2), -+ z0 = svmls_x (svptrue_b64 (), z1, z2, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_lane_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_lane_f16.c -new file mode 100644 -index 000000000..832376d0b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_lane_f16.c -@@ -0,0 +1,128 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mls_lane_0_f16_tied1: -+** fmls z0\.h, z1\.h, z2\.h\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (mls_lane_0_f16_tied1, svfloat16_t, -+ z0 = svmls_lane_f16 (z0, z1, z2, 0), -+ z0 = svmls_lane (z0, z1, z2, 0)) -+ -+/* -+** mls_lane_0_f16_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fmls z0\.h, \1\.h, z2\.h\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (mls_lane_0_f16_tied2, svfloat16_t, -+ z0 = svmls_lane_f16 (z1, z0, z2, 0), -+ z0 = svmls_lane (z1, z0, z2, 0)) -+ -+/* -+** mls_lane_0_f16_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fmls z0\.h, z2\.h, \1\.h\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (mls_lane_0_f16_tied3, svfloat16_t, -+ z0 = svmls_lane_f16 (z1, z2, z0, 0), -+ z0 = svmls_lane (z1, z2, z0, 0)) -+ -+/* -+** mls_lane_0_f16_untied: -+** movprfx z0, z1 -+** fmls z0\.h, z2\.h, z3\.h\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (mls_lane_0_f16_untied, svfloat16_t, -+ z0 = svmls_lane_f16 (z1, z2, z3, 0), -+ z0 = svmls_lane (z1, z2, z3, 0)) -+ -+/* -+** mls_lane_1_f16: -+** fmls z0\.h, z1\.h, z2\.h\[1\] -+** ret -+*/ -+TEST_UNIFORM_Z (mls_lane_1_f16, svfloat16_t, -+ z0 = svmls_lane_f16 (z0, z1, z2, 1), -+ z0 = svmls_lane (z0, z1, z2, 1)) -+ -+/* -+** mls_lane_2_f16: -+** fmls z0\.h, z1\.h, z2\.h\[2\] -+** ret -+*/ -+TEST_UNIFORM_Z (mls_lane_2_f16, svfloat16_t, -+ z0 = svmls_lane_f16 (z0, z1, z2, 2), -+ z0 = svmls_lane (z0, z1, z2, 2)) -+ -+/* -+** mls_lane_3_f16: -+** fmls z0\.h, z1\.h, z2\.h\[3\] -+** ret -+*/ -+TEST_UNIFORM_Z (mls_lane_3_f16, svfloat16_t, -+ z0 = svmls_lane_f16 (z0, z1, z2, 3), -+ z0 = svmls_lane (z0, z1, z2, 3)) -+ -+/* -+** mls_lane_4_f16: -+** fmls z0\.h, z1\.h, z2\.h\[4\] -+** ret -+*/ -+TEST_UNIFORM_Z (mls_lane_4_f16, svfloat16_t, -+ z0 = svmls_lane_f16 (z0, z1, z2, 4), -+ z0 = svmls_lane (z0, z1, z2, 4)) -+ -+/* -+** mls_lane_5_f16: -+** fmls z0\.h, z1\.h, z2\.h\[5\] -+** ret -+*/ -+TEST_UNIFORM_Z (mls_lane_5_f16, svfloat16_t, -+ z0 = svmls_lane_f16 (z0, z1, z2, 5), -+ z0 = svmls_lane (z0, z1, z2, 5)) -+ -+/* -+** mls_lane_6_f16: -+** fmls z0\.h, z1\.h, z2\.h\[6\] -+** ret -+*/ -+TEST_UNIFORM_Z (mls_lane_6_f16, svfloat16_t, -+ z0 = svmls_lane_f16 (z0, z1, z2, 6), -+ z0 = svmls_lane (z0, z1, z2, 6)) -+ -+/* -+** mls_lane_7_f16: -+** fmls z0\.h, z1\.h, z2\.h\[7\] -+** ret -+*/ -+TEST_UNIFORM_Z (mls_lane_7_f16, svfloat16_t, -+ z0 = svmls_lane_f16 (z0, z1, z2, 7), -+ z0 = svmls_lane (z0, z1, z2, 7)) -+ -+/* -+** mls_lane_z7_f16: -+** fmls z0\.h, z1\.h, z7\.h\[7\] -+** ret -+*/ -+TEST_DUAL_Z (mls_lane_z7_f16, svfloat16_t, svfloat16_t, -+ z0 = svmls_lane_f16 (z0, z1, z7, 7), -+ z0 = svmls_lane (z0, z1, z7, 7)) -+ -+/* -+** mls_lane_z8_f16: -+** str d8, \[sp, -16\]! -+** mov (z[0-7])\.d, z8\.d -+** fmls z0\.h, z1\.h, \1\.h\[7\] -+** ldr d8, \[sp\], 16 -+** ret -+*/ -+TEST_DUAL_LANE_REG (mls_lane_z8_f16, svfloat16_t, svfloat16_t, z8, -+ z0 = svmls_lane_f16 (z0, z1, z8, 7), -+ z0 = svmls_lane (z0, z1, z8, 7)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_lane_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_lane_f32.c -new file mode 100644 -index 000000000..3244b972f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_lane_f32.c -@@ -0,0 +1,92 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mls_lane_0_f32_tied1: -+** fmls z0\.s, z1\.s, z2\.s\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (mls_lane_0_f32_tied1, svfloat32_t, -+ z0 = svmls_lane_f32 (z0, z1, z2, 0), -+ z0 = svmls_lane (z0, z1, z2, 0)) -+ -+/* -+** mls_lane_0_f32_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fmls z0\.s, \1\.s, z2\.s\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (mls_lane_0_f32_tied2, svfloat32_t, -+ z0 = svmls_lane_f32 (z1, z0, z2, 0), -+ z0 = svmls_lane (z1, z0, z2, 0)) -+ -+/* -+** mls_lane_0_f32_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fmls z0\.s, z2\.s, \1\.s\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (mls_lane_0_f32_tied3, svfloat32_t, -+ z0 = svmls_lane_f32 (z1, z2, z0, 0), -+ z0 = svmls_lane (z1, z2, z0, 0)) -+ -+/* -+** mls_lane_0_f32_untied: -+** movprfx z0, z1 -+** fmls z0\.s, z2\.s, z3\.s\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (mls_lane_0_f32_untied, svfloat32_t, -+ z0 = svmls_lane_f32 (z1, z2, z3, 0), -+ z0 = svmls_lane (z1, z2, z3, 0)) -+ -+/* -+** mls_lane_1_f32: -+** fmls z0\.s, z1\.s, z2\.s\[1\] -+** ret -+*/ -+TEST_UNIFORM_Z (mls_lane_1_f32, svfloat32_t, -+ z0 = svmls_lane_f32 (z0, z1, z2, 1), -+ z0 = svmls_lane (z0, z1, z2, 1)) -+ -+/* -+** mls_lane_2_f32: -+** fmls z0\.s, z1\.s, z2\.s\[2\] -+** ret -+*/ -+TEST_UNIFORM_Z (mls_lane_2_f32, svfloat32_t, -+ z0 = svmls_lane_f32 (z0, z1, z2, 2), -+ z0 = svmls_lane (z0, z1, z2, 2)) -+ -+/* -+** mls_lane_3_f32: -+** fmls z0\.s, z1\.s, z2\.s\[3\] -+** ret -+*/ -+TEST_UNIFORM_Z (mls_lane_3_f32, svfloat32_t, -+ z0 = svmls_lane_f32 (z0, z1, z2, 3), -+ z0 = svmls_lane (z0, z1, z2, 3)) -+ -+/* -+** mls_lane_z7_f32: -+** fmls z0\.s, z1\.s, z7\.s\[3\] -+** ret -+*/ -+TEST_DUAL_Z (mls_lane_z7_f32, svfloat32_t, svfloat32_t, -+ z0 = svmls_lane_f32 (z0, z1, z7, 3), -+ z0 = svmls_lane (z0, z1, z7, 3)) -+ -+/* -+** mls_lane_z8_f32: -+** str d8, \[sp, -16\]! -+** mov (z[0-7])\.d, z8\.d -+** fmls z0\.s, z1\.s, \1\.s\[3\] -+** ldr d8, \[sp\], 16 -+** ret -+*/ -+TEST_DUAL_LANE_REG (mls_lane_z8_f32, svfloat32_t, svfloat32_t, z8, -+ z0 = svmls_lane_f32 (z0, z1, z8, 3), -+ z0 = svmls_lane (z0, z1, z8, 3)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_lane_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_lane_f64.c -new file mode 100644 -index 000000000..16f20ca53 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_lane_f64.c -@@ -0,0 +1,83 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mls_lane_0_f64_tied1: -+** fmls z0\.d, z1\.d, z2\.d\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (mls_lane_0_f64_tied1, svfloat64_t, -+ z0 = svmls_lane_f64 (z0, z1, z2, 0), -+ z0 = svmls_lane (z0, z1, z2, 0)) -+ -+/* -+** mls_lane_0_f64_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fmls z0\.d, \1, z2\.d\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (mls_lane_0_f64_tied2, svfloat64_t, -+ z0 = svmls_lane_f64 (z1, z0, z2, 0), -+ z0 = svmls_lane (z1, z0, z2, 0)) -+ -+/* -+** mls_lane_0_f64_tied3: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fmls z0\.d, z2\.d, \1\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (mls_lane_0_f64_tied3, svfloat64_t, -+ z0 = svmls_lane_f64 (z1, z2, z0, 0), -+ z0 = svmls_lane (z1, z2, z0, 0)) -+ -+/* -+** mls_lane_0_f64_untied: -+** movprfx z0, z1 -+** fmls z0\.d, z2\.d, z3\.d\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (mls_lane_0_f64_untied, svfloat64_t, -+ z0 = svmls_lane_f64 (z1, z2, z3, 0), -+ z0 = svmls_lane (z1, z2, z3, 0)) -+ -+/* -+** mls_lane_1_f64: -+** fmls z0\.d, z1\.d, z2\.d\[1\] -+** ret -+*/ -+TEST_UNIFORM_Z (mls_lane_1_f64, svfloat64_t, -+ z0 = svmls_lane_f64 (z0, z1, z2, 1), -+ z0 = svmls_lane (z0, z1, z2, 1)) -+ -+/* -+** mls_lane_z7_f64: -+** fmls z0\.d, z1\.d, z7\.d\[1\] -+** ret -+*/ -+TEST_DUAL_Z (mls_lane_z7_f64, svfloat64_t, svfloat64_t, -+ z0 = svmls_lane_f64 (z0, z1, z7, 1), -+ z0 = svmls_lane (z0, z1, z7, 1)) -+ -+/* -+** mls_lane_z15_f64: -+** str d15, \[sp, -16\]! -+** fmls z0\.d, z1\.d, z15\.d\[1\] -+** ldr d15, \[sp\], 16 -+** ret -+*/ -+TEST_DUAL_LANE_REG (mls_lane_z15_f64, svfloat64_t, svfloat64_t, z15, -+ z0 = svmls_lane_f64 (z0, z1, z15, 1), -+ z0 = svmls_lane (z0, z1, z15, 1)) -+ -+/* -+** mls_lane_z16_f64: -+** mov (z[0-9]|z1[0-5])\.d, z16\.d -+** fmls z0\.d, z1\.d, \1\.d\[1\] -+** ret -+*/ -+TEST_DUAL_LANE_REG (mls_lane_z16_f64, svfloat64_t, svfloat64_t, z16, -+ z0 = svmls_lane_f64 (z0, z1, z16, 1), -+ z0 = svmls_lane (z0, z1, z16, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_s16.c -new file mode 100644 -index 000000000..e199829c4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_s16.c -@@ -0,0 +1,321 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mls_s16_m_tied1: -+** mls z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mls_s16_m_tied1, svint16_t, -+ z0 = svmls_s16_m (p0, z0, z1, z2), -+ z0 = svmls_m (p0, z0, z1, z2)) -+ -+/* -+** mls_s16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** mls z0\.h, p0/m, \1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mls_s16_m_tied2, svint16_t, -+ z0 = svmls_s16_m (p0, z1, z0, z2), -+ z0 = svmls_m (p0, z1, z0, z2)) -+ -+/* -+** mls_s16_m_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** mls z0\.h, p0/m, z2\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mls_s16_m_tied3, svint16_t, -+ z0 = svmls_s16_m (p0, z1, z2, z0), -+ z0 = svmls_m (p0, z1, z2, z0)) -+ -+/* -+** mls_s16_m_untied: -+** movprfx z0, z1 -+** mls z0\.h, p0/m, z2\.h, z3\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mls_s16_m_untied, svint16_t, -+ z0 = svmls_s16_m (p0, z1, z2, z3), -+ z0 = svmls_m (p0, z1, z2, z3)) -+ -+/* -+** mls_w0_s16_m_tied1: -+** mov (z[0-9]+\.h), w0 -+** mls z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_w0_s16_m_tied1, svint16_t, int16_t, -+ z0 = svmls_n_s16_m (p0, z0, z1, x0), -+ z0 = svmls_m (p0, z0, z1, x0)) -+ -+/* -+** mls_w0_s16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), w0 -+** movprfx z0, z1 -+** mls z0\.h, p0/m, z2\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_w0_s16_m_untied, svint16_t, int16_t, -+ z0 = svmls_n_s16_m (p0, z1, z2, x0), -+ z0 = svmls_m (p0, z1, z2, x0)) -+ -+/* -+** mls_11_s16_m_tied1: -+** mov (z[0-9]+\.h), #11 -+** mls z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_s16_m_tied1, svint16_t, -+ z0 = svmls_n_s16_m (p0, z0, z1, 11), -+ z0 = svmls_m (p0, z0, z1, 11)) -+ -+/* -+** mls_11_s16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), #11 -+** movprfx z0, z1 -+** mls z0\.h, p0/m, z2\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_s16_m_untied, svint16_t, -+ z0 = svmls_n_s16_m (p0, z1, z2, 11), -+ z0 = svmls_m (p0, z1, z2, 11)) -+ -+/* -+** mls_s16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** mls z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mls_s16_z_tied1, svint16_t, -+ z0 = svmls_s16_z (p0, z0, z1, z2), -+ z0 = svmls_z (p0, z0, z1, z2)) -+ -+/* -+** mls_s16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** msb z0\.h, p0/m, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mls_s16_z_tied2, svint16_t, -+ z0 = svmls_s16_z (p0, z1, z0, z2), -+ z0 = svmls_z (p0, z1, z0, z2)) -+ -+/* -+** mls_s16_z_tied3: -+** movprfx z0\.h, p0/z, z0\.h -+** msb z0\.h, p0/m, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mls_s16_z_tied3, svint16_t, -+ z0 = svmls_s16_z (p0, z1, z2, z0), -+ z0 = svmls_z (p0, z1, z2, z0)) -+ -+/* -+** mls_s16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** mls z0\.h, p0/m, z2\.h, z3\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** msb z0\.h, p0/m, z3\.h, z1\.h -+** | -+** movprfx z0\.h, p0/z, z3\.h -+** msb z0\.h, p0/m, z2\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mls_s16_z_untied, svint16_t, -+ z0 = svmls_s16_z (p0, z1, z2, z3), -+ z0 = svmls_z (p0, z1, z2, z3)) -+ -+/* -+** mls_w0_s16_z_tied1: -+** mov (z[0-9]+\.h), w0 -+** movprfx z0\.h, p0/z, z0\.h -+** mls z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_w0_s16_z_tied1, svint16_t, int16_t, -+ z0 = svmls_n_s16_z (p0, z0, z1, x0), -+ z0 = svmls_z (p0, z0, z1, x0)) -+ -+/* -+** mls_w0_s16_z_tied2: -+** mov (z[0-9]+\.h), w0 -+** movprfx z0\.h, p0/z, z0\.h -+** msb z0\.h, p0/m, \1, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_w0_s16_z_tied2, svint16_t, int16_t, -+ z0 = svmls_n_s16_z (p0, z1, z0, x0), -+ z0 = svmls_z (p0, z1, z0, x0)) -+ -+/* -+** mls_w0_s16_z_untied: -+** mov (z[0-9]+\.h), w0 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** mls z0\.h, p0/m, z2\.h, \1 -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** msb z0\.h, p0/m, \1, z1\.h -+** | -+** movprfx z0\.h, p0/z, \1 -+** msb z0\.h, p0/m, z2\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_w0_s16_z_untied, svint16_t, int16_t, -+ z0 = svmls_n_s16_z (p0, z1, z2, x0), -+ z0 = svmls_z (p0, z1, z2, x0)) -+ -+/* -+** mls_11_s16_z_tied1: -+** mov (z[0-9]+\.h), #11 -+** movprfx z0\.h, p0/z, z0\.h -+** mls z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_s16_z_tied1, svint16_t, -+ z0 = svmls_n_s16_z (p0, z0, z1, 11), -+ z0 = svmls_z (p0, z0, z1, 11)) -+ -+/* -+** mls_11_s16_z_tied2: -+** mov (z[0-9]+\.h), #11 -+** movprfx z0\.h, p0/z, z0\.h -+** msb z0\.h, p0/m, \1, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_s16_z_tied2, svint16_t, -+ z0 = svmls_n_s16_z (p0, z1, z0, 11), -+ z0 = svmls_z (p0, z1, z0, 11)) -+ -+/* -+** mls_11_s16_z_untied: -+** mov (z[0-9]+\.h), #11 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** mls z0\.h, p0/m, z2\.h, \1 -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** msb z0\.h, p0/m, \1, z1\.h -+** | -+** movprfx z0\.h, p0/z, \1 -+** msb z0\.h, p0/m, z2\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_s16_z_untied, svint16_t, -+ z0 = svmls_n_s16_z (p0, z1, z2, 11), -+ z0 = svmls_z (p0, z1, z2, 11)) -+ -+/* -+** mls_s16_x_tied1: -+** mls z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mls_s16_x_tied1, svint16_t, -+ z0 = svmls_s16_x (p0, z0, z1, z2), -+ z0 = svmls_x (p0, z0, z1, z2)) -+ -+/* -+** mls_s16_x_tied2: -+** msb z0\.h, p0/m, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mls_s16_x_tied2, svint16_t, -+ z0 = svmls_s16_x (p0, z1, z0, z2), -+ z0 = svmls_x (p0, z1, z0, z2)) -+ -+/* -+** mls_s16_x_tied3: -+** msb z0\.h, p0/m, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mls_s16_x_tied3, svint16_t, -+ z0 = svmls_s16_x (p0, z1, z2, z0), -+ z0 = svmls_x (p0, z1, z2, z0)) -+ -+/* -+** mls_s16_x_untied: -+** ( -+** movprfx z0, z1 -+** mls z0\.h, p0/m, z2\.h, z3\.h -+** | -+** movprfx z0, z2 -+** msb z0\.h, p0/m, z3\.h, z1\.h -+** | -+** movprfx z0, z3 -+** msb z0\.h, p0/m, z2\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mls_s16_x_untied, svint16_t, -+ z0 = svmls_s16_x (p0, z1, z2, z3), -+ z0 = svmls_x (p0, z1, z2, z3)) -+ -+/* -+** mls_w0_s16_x_tied1: -+** mov (z[0-9]+\.h), w0 -+** mls z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_w0_s16_x_tied1, svint16_t, int16_t, -+ z0 = svmls_n_s16_x (p0, z0, z1, x0), -+ z0 = svmls_x (p0, z0, z1, x0)) -+ -+/* -+** mls_w0_s16_x_tied2: -+** mov (z[0-9]+\.h), w0 -+** msb z0\.h, p0/m, \1, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_w0_s16_x_tied2, svint16_t, int16_t, -+ z0 = svmls_n_s16_x (p0, z1, z0, x0), -+ z0 = svmls_x (p0, z1, z0, x0)) -+ -+/* -+** mls_w0_s16_x_untied: -+** mov z0\.h, w0 -+** msb z0\.h, p0/m, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_w0_s16_x_untied, svint16_t, int16_t, -+ z0 = svmls_n_s16_x (p0, z1, z2, x0), -+ z0 = svmls_x (p0, z1, z2, x0)) -+ -+/* -+** mls_11_s16_x_tied1: -+** mov (z[0-9]+\.h), #11 -+** mls z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_s16_x_tied1, svint16_t, -+ z0 = svmls_n_s16_x (p0, z0, z1, 11), -+ z0 = svmls_x (p0, z0, z1, 11)) -+ -+/* -+** mls_11_s16_x_tied2: -+** mov (z[0-9]+\.h), #11 -+** msb z0\.h, p0/m, \1, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_s16_x_tied2, svint16_t, -+ z0 = svmls_n_s16_x (p0, z1, z0, 11), -+ z0 = svmls_x (p0, z1, z0, 11)) -+ -+/* -+** mls_11_s16_x_untied: -+** mov z0\.h, #11 -+** msb z0\.h, p0/m, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_s16_x_untied, svint16_t, -+ z0 = svmls_n_s16_x (p0, z1, z2, 11), -+ z0 = svmls_x (p0, z1, z2, 11)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_s32.c -new file mode 100644 -index 000000000..fe386d01c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_s32.c -@@ -0,0 +1,321 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mls_s32_m_tied1: -+** mls z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mls_s32_m_tied1, svint32_t, -+ z0 = svmls_s32_m (p0, z0, z1, z2), -+ z0 = svmls_m (p0, z0, z1, z2)) -+ -+/* -+** mls_s32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** mls z0\.s, p0/m, \1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mls_s32_m_tied2, svint32_t, -+ z0 = svmls_s32_m (p0, z1, z0, z2), -+ z0 = svmls_m (p0, z1, z0, z2)) -+ -+/* -+** mls_s32_m_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** mls z0\.s, p0/m, z2\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mls_s32_m_tied3, svint32_t, -+ z0 = svmls_s32_m (p0, z1, z2, z0), -+ z0 = svmls_m (p0, z1, z2, z0)) -+ -+/* -+** mls_s32_m_untied: -+** movprfx z0, z1 -+** mls z0\.s, p0/m, z2\.s, z3\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mls_s32_m_untied, svint32_t, -+ z0 = svmls_s32_m (p0, z1, z2, z3), -+ z0 = svmls_m (p0, z1, z2, z3)) -+ -+/* -+** mls_w0_s32_m_tied1: -+** mov (z[0-9]+\.s), w0 -+** mls z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_w0_s32_m_tied1, svint32_t, int32_t, -+ z0 = svmls_n_s32_m (p0, z0, z1, x0), -+ z0 = svmls_m (p0, z0, z1, x0)) -+ -+/* -+** mls_w0_s32_m_untied: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0, z1 -+** mls z0\.s, p0/m, z2\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_w0_s32_m_untied, svint32_t, int32_t, -+ z0 = svmls_n_s32_m (p0, z1, z2, x0), -+ z0 = svmls_m (p0, z1, z2, x0)) -+ -+/* -+** mls_11_s32_m_tied1: -+** mov (z[0-9]+\.s), #11 -+** mls z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_s32_m_tied1, svint32_t, -+ z0 = svmls_n_s32_m (p0, z0, z1, 11), -+ z0 = svmls_m (p0, z0, z1, 11)) -+ -+/* -+** mls_11_s32_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.s), #11 -+** movprfx z0, z1 -+** mls z0\.s, p0/m, z2\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_s32_m_untied, svint32_t, -+ z0 = svmls_n_s32_m (p0, z1, z2, 11), -+ z0 = svmls_m (p0, z1, z2, 11)) -+ -+/* -+** mls_s32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** mls z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mls_s32_z_tied1, svint32_t, -+ z0 = svmls_s32_z (p0, z0, z1, z2), -+ z0 = svmls_z (p0, z0, z1, z2)) -+ -+/* -+** mls_s32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** msb z0\.s, p0/m, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mls_s32_z_tied2, svint32_t, -+ z0 = svmls_s32_z (p0, z1, z0, z2), -+ z0 = svmls_z (p0, z1, z0, z2)) -+ -+/* -+** mls_s32_z_tied3: -+** movprfx z0\.s, p0/z, z0\.s -+** msb z0\.s, p0/m, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mls_s32_z_tied3, svint32_t, -+ z0 = svmls_s32_z (p0, z1, z2, z0), -+ z0 = svmls_z (p0, z1, z2, z0)) -+ -+/* -+** mls_s32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** mls z0\.s, p0/m, z2\.s, z3\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** msb z0\.s, p0/m, z3\.s, z1\.s -+** | -+** movprfx z0\.s, p0/z, z3\.s -+** msb z0\.s, p0/m, z2\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mls_s32_z_untied, svint32_t, -+ z0 = svmls_s32_z (p0, z1, z2, z3), -+ z0 = svmls_z (p0, z1, z2, z3)) -+ -+/* -+** mls_w0_s32_z_tied1: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** mls z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_w0_s32_z_tied1, svint32_t, int32_t, -+ z0 = svmls_n_s32_z (p0, z0, z1, x0), -+ z0 = svmls_z (p0, z0, z1, x0)) -+ -+/* -+** mls_w0_s32_z_tied2: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** msb z0\.s, p0/m, \1, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_w0_s32_z_tied2, svint32_t, int32_t, -+ z0 = svmls_n_s32_z (p0, z1, z0, x0), -+ z0 = svmls_z (p0, z1, z0, x0)) -+ -+/* -+** mls_w0_s32_z_untied: -+** mov (z[0-9]+\.s), w0 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** mls z0\.s, p0/m, z2\.s, \1 -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** msb z0\.s, p0/m, \1, z1\.s -+** | -+** movprfx z0\.s, p0/z, \1 -+** msb z0\.s, p0/m, z2\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_w0_s32_z_untied, svint32_t, int32_t, -+ z0 = svmls_n_s32_z (p0, z1, z2, x0), -+ z0 = svmls_z (p0, z1, z2, x0)) -+ -+/* -+** mls_11_s32_z_tied1: -+** mov (z[0-9]+\.s), #11 -+** movprfx z0\.s, p0/z, z0\.s -+** mls z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_s32_z_tied1, svint32_t, -+ z0 = svmls_n_s32_z (p0, z0, z1, 11), -+ z0 = svmls_z (p0, z0, z1, 11)) -+ -+/* -+** mls_11_s32_z_tied2: -+** mov (z[0-9]+\.s), #11 -+** movprfx z0\.s, p0/z, z0\.s -+** msb z0\.s, p0/m, \1, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_s32_z_tied2, svint32_t, -+ z0 = svmls_n_s32_z (p0, z1, z0, 11), -+ z0 = svmls_z (p0, z1, z0, 11)) -+ -+/* -+** mls_11_s32_z_untied: -+** mov (z[0-9]+\.s), #11 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** mls z0\.s, p0/m, z2\.s, \1 -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** msb z0\.s, p0/m, \1, z1\.s -+** | -+** movprfx z0\.s, p0/z, \1 -+** msb z0\.s, p0/m, z2\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_s32_z_untied, svint32_t, -+ z0 = svmls_n_s32_z (p0, z1, z2, 11), -+ z0 = svmls_z (p0, z1, z2, 11)) -+ -+/* -+** mls_s32_x_tied1: -+** mls z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mls_s32_x_tied1, svint32_t, -+ z0 = svmls_s32_x (p0, z0, z1, z2), -+ z0 = svmls_x (p0, z0, z1, z2)) -+ -+/* -+** mls_s32_x_tied2: -+** msb z0\.s, p0/m, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mls_s32_x_tied2, svint32_t, -+ z0 = svmls_s32_x (p0, z1, z0, z2), -+ z0 = svmls_x (p0, z1, z0, z2)) -+ -+/* -+** mls_s32_x_tied3: -+** msb z0\.s, p0/m, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mls_s32_x_tied3, svint32_t, -+ z0 = svmls_s32_x (p0, z1, z2, z0), -+ z0 = svmls_x (p0, z1, z2, z0)) -+ -+/* -+** mls_s32_x_untied: -+** ( -+** movprfx z0, z1 -+** mls z0\.s, p0/m, z2\.s, z3\.s -+** | -+** movprfx z0, z2 -+** msb z0\.s, p0/m, z3\.s, z1\.s -+** | -+** movprfx z0, z3 -+** msb z0\.s, p0/m, z2\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mls_s32_x_untied, svint32_t, -+ z0 = svmls_s32_x (p0, z1, z2, z3), -+ z0 = svmls_x (p0, z1, z2, z3)) -+ -+/* -+** mls_w0_s32_x_tied1: -+** mov (z[0-9]+\.s), w0 -+** mls z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_w0_s32_x_tied1, svint32_t, int32_t, -+ z0 = svmls_n_s32_x (p0, z0, z1, x0), -+ z0 = svmls_x (p0, z0, z1, x0)) -+ -+/* -+** mls_w0_s32_x_tied2: -+** mov (z[0-9]+\.s), w0 -+** msb z0\.s, p0/m, \1, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_w0_s32_x_tied2, svint32_t, int32_t, -+ z0 = svmls_n_s32_x (p0, z1, z0, x0), -+ z0 = svmls_x (p0, z1, z0, x0)) -+ -+/* -+** mls_w0_s32_x_untied: -+** mov z0\.s, w0 -+** msb z0\.s, p0/m, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_w0_s32_x_untied, svint32_t, int32_t, -+ z0 = svmls_n_s32_x (p0, z1, z2, x0), -+ z0 = svmls_x (p0, z1, z2, x0)) -+ -+/* -+** mls_11_s32_x_tied1: -+** mov (z[0-9]+\.s), #11 -+** mls z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_s32_x_tied1, svint32_t, -+ z0 = svmls_n_s32_x (p0, z0, z1, 11), -+ z0 = svmls_x (p0, z0, z1, 11)) -+ -+/* -+** mls_11_s32_x_tied2: -+** mov (z[0-9]+\.s), #11 -+** msb z0\.s, p0/m, \1, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_s32_x_tied2, svint32_t, -+ z0 = svmls_n_s32_x (p0, z1, z0, 11), -+ z0 = svmls_x (p0, z1, z0, 11)) -+ -+/* -+** mls_11_s32_x_untied: -+** mov z0\.s, #11 -+** msb z0\.s, p0/m, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_s32_x_untied, svint32_t, -+ z0 = svmls_n_s32_x (p0, z1, z2, 11), -+ z0 = svmls_x (p0, z1, z2, 11)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_s64.c -new file mode 100644 -index 000000000..2998d733f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_s64.c -@@ -0,0 +1,321 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mls_s64_m_tied1: -+** mls z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mls_s64_m_tied1, svint64_t, -+ z0 = svmls_s64_m (p0, z0, z1, z2), -+ z0 = svmls_m (p0, z0, z1, z2)) -+ -+/* -+** mls_s64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** mls z0\.d, p0/m, \1, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mls_s64_m_tied2, svint64_t, -+ z0 = svmls_s64_m (p0, z1, z0, z2), -+ z0 = svmls_m (p0, z1, z0, z2)) -+ -+/* -+** mls_s64_m_tied3: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** mls z0\.d, p0/m, z2\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mls_s64_m_tied3, svint64_t, -+ z0 = svmls_s64_m (p0, z1, z2, z0), -+ z0 = svmls_m (p0, z1, z2, z0)) -+ -+/* -+** mls_s64_m_untied: -+** movprfx z0, z1 -+** mls z0\.d, p0/m, z2\.d, z3\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mls_s64_m_untied, svint64_t, -+ z0 = svmls_s64_m (p0, z1, z2, z3), -+ z0 = svmls_m (p0, z1, z2, z3)) -+ -+/* -+** mls_x0_s64_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** mls z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_x0_s64_m_tied1, svint64_t, int64_t, -+ z0 = svmls_n_s64_m (p0, z0, z1, x0), -+ z0 = svmls_m (p0, z0, z1, x0)) -+ -+/* -+** mls_x0_s64_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** mls z0\.d, p0/m, z2\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_x0_s64_m_untied, svint64_t, int64_t, -+ z0 = svmls_n_s64_m (p0, z1, z2, x0), -+ z0 = svmls_m (p0, z1, z2, x0)) -+ -+/* -+** mls_11_s64_m_tied1: -+** mov (z[0-9]+\.d), #11 -+** mls z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_s64_m_tied1, svint64_t, -+ z0 = svmls_n_s64_m (p0, z0, z1, 11), -+ z0 = svmls_m (p0, z0, z1, 11)) -+ -+/* -+** mls_11_s64_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #11 -+** movprfx z0, z1 -+** mls z0\.d, p0/m, z2\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_s64_m_untied, svint64_t, -+ z0 = svmls_n_s64_m (p0, z1, z2, 11), -+ z0 = svmls_m (p0, z1, z2, 11)) -+ -+/* -+** mls_s64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** mls z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mls_s64_z_tied1, svint64_t, -+ z0 = svmls_s64_z (p0, z0, z1, z2), -+ z0 = svmls_z (p0, z0, z1, z2)) -+ -+/* -+** mls_s64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** msb z0\.d, p0/m, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mls_s64_z_tied2, svint64_t, -+ z0 = svmls_s64_z (p0, z1, z0, z2), -+ z0 = svmls_z (p0, z1, z0, z2)) -+ -+/* -+** mls_s64_z_tied3: -+** movprfx z0\.d, p0/z, z0\.d -+** msb z0\.d, p0/m, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mls_s64_z_tied3, svint64_t, -+ z0 = svmls_s64_z (p0, z1, z2, z0), -+ z0 = svmls_z (p0, z1, z2, z0)) -+ -+/* -+** mls_s64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** mls z0\.d, p0/m, z2\.d, z3\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** msb z0\.d, p0/m, z3\.d, z1\.d -+** | -+** movprfx z0\.d, p0/z, z3\.d -+** msb z0\.d, p0/m, z2\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mls_s64_z_untied, svint64_t, -+ z0 = svmls_s64_z (p0, z1, z2, z3), -+ z0 = svmls_z (p0, z1, z2, z3)) -+ -+/* -+** mls_x0_s64_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** mls z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_x0_s64_z_tied1, svint64_t, int64_t, -+ z0 = svmls_n_s64_z (p0, z0, z1, x0), -+ z0 = svmls_z (p0, z0, z1, x0)) -+ -+/* -+** mls_x0_s64_z_tied2: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** msb z0\.d, p0/m, \1, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_x0_s64_z_tied2, svint64_t, int64_t, -+ z0 = svmls_n_s64_z (p0, z1, z0, x0), -+ z0 = svmls_z (p0, z1, z0, x0)) -+ -+/* -+** mls_x0_s64_z_untied: -+** mov (z[0-9]+\.d), x0 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** mls z0\.d, p0/m, z2\.d, \1 -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** msb z0\.d, p0/m, \1, z1\.d -+** | -+** movprfx z0\.d, p0/z, \1 -+** msb z0\.d, p0/m, z2\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_x0_s64_z_untied, svint64_t, int64_t, -+ z0 = svmls_n_s64_z (p0, z1, z2, x0), -+ z0 = svmls_z (p0, z1, z2, x0)) -+ -+/* -+** mls_11_s64_z_tied1: -+** mov (z[0-9]+\.d), #11 -+** movprfx z0\.d, p0/z, z0\.d -+** mls z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_s64_z_tied1, svint64_t, -+ z0 = svmls_n_s64_z (p0, z0, z1, 11), -+ z0 = svmls_z (p0, z0, z1, 11)) -+ -+/* -+** mls_11_s64_z_tied2: -+** mov (z[0-9]+\.d), #11 -+** movprfx z0\.d, p0/z, z0\.d -+** msb z0\.d, p0/m, \1, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_s64_z_tied2, svint64_t, -+ z0 = svmls_n_s64_z (p0, z1, z0, 11), -+ z0 = svmls_z (p0, z1, z0, 11)) -+ -+/* -+** mls_11_s64_z_untied: -+** mov (z[0-9]+\.d), #11 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** mls z0\.d, p0/m, z2\.d, \1 -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** msb z0\.d, p0/m, \1, z1\.d -+** | -+** movprfx z0\.d, p0/z, \1 -+** msb z0\.d, p0/m, z2\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_s64_z_untied, svint64_t, -+ z0 = svmls_n_s64_z (p0, z1, z2, 11), -+ z0 = svmls_z (p0, z1, z2, 11)) -+ -+/* -+** mls_s64_x_tied1: -+** mls z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mls_s64_x_tied1, svint64_t, -+ z0 = svmls_s64_x (p0, z0, z1, z2), -+ z0 = svmls_x (p0, z0, z1, z2)) -+ -+/* -+** mls_s64_x_tied2: -+** msb z0\.d, p0/m, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mls_s64_x_tied2, svint64_t, -+ z0 = svmls_s64_x (p0, z1, z0, z2), -+ z0 = svmls_x (p0, z1, z0, z2)) -+ -+/* -+** mls_s64_x_tied3: -+** msb z0\.d, p0/m, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mls_s64_x_tied3, svint64_t, -+ z0 = svmls_s64_x (p0, z1, z2, z0), -+ z0 = svmls_x (p0, z1, z2, z0)) -+ -+/* -+** mls_s64_x_untied: -+** ( -+** movprfx z0, z1 -+** mls z0\.d, p0/m, z2\.d, z3\.d -+** | -+** movprfx z0, z2 -+** msb z0\.d, p0/m, z3\.d, z1\.d -+** | -+** movprfx z0, z3 -+** msb z0\.d, p0/m, z2\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mls_s64_x_untied, svint64_t, -+ z0 = svmls_s64_x (p0, z1, z2, z3), -+ z0 = svmls_x (p0, z1, z2, z3)) -+ -+/* -+** mls_x0_s64_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** mls z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_x0_s64_x_tied1, svint64_t, int64_t, -+ z0 = svmls_n_s64_x (p0, z0, z1, x0), -+ z0 = svmls_x (p0, z0, z1, x0)) -+ -+/* -+** mls_x0_s64_x_tied2: -+** mov (z[0-9]+\.d), x0 -+** msb z0\.d, p0/m, \1, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_x0_s64_x_tied2, svint64_t, int64_t, -+ z0 = svmls_n_s64_x (p0, z1, z0, x0), -+ z0 = svmls_x (p0, z1, z0, x0)) -+ -+/* -+** mls_x0_s64_x_untied: -+** mov z0\.d, x0 -+** msb z0\.d, p0/m, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_x0_s64_x_untied, svint64_t, int64_t, -+ z0 = svmls_n_s64_x (p0, z1, z2, x0), -+ z0 = svmls_x (p0, z1, z2, x0)) -+ -+/* -+** mls_11_s64_x_tied1: -+** mov (z[0-9]+\.d), #11 -+** mls z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_s64_x_tied1, svint64_t, -+ z0 = svmls_n_s64_x (p0, z0, z1, 11), -+ z0 = svmls_x (p0, z0, z1, 11)) -+ -+/* -+** mls_11_s64_x_tied2: -+** mov (z[0-9]+\.d), #11 -+** msb z0\.d, p0/m, \1, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_s64_x_tied2, svint64_t, -+ z0 = svmls_n_s64_x (p0, z1, z0, 11), -+ z0 = svmls_x (p0, z1, z0, 11)) -+ -+/* -+** mls_11_s64_x_untied: -+** mov z0\.d, #11 -+** msb z0\.d, p0/m, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_s64_x_untied, svint64_t, -+ z0 = svmls_n_s64_x (p0, z1, z2, 11), -+ z0 = svmls_x (p0, z1, z2, 11)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_s8.c -new file mode 100644 -index 000000000..c60c43145 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_s8.c -@@ -0,0 +1,321 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mls_s8_m_tied1: -+** mls z0\.b, p0/m, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mls_s8_m_tied1, svint8_t, -+ z0 = svmls_s8_m (p0, z0, z1, z2), -+ z0 = svmls_m (p0, z0, z1, z2)) -+ -+/* -+** mls_s8_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** mls z0\.b, p0/m, \1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mls_s8_m_tied2, svint8_t, -+ z0 = svmls_s8_m (p0, z1, z0, z2), -+ z0 = svmls_m (p0, z1, z0, z2)) -+ -+/* -+** mls_s8_m_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** mls z0\.b, p0/m, z2\.b, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mls_s8_m_tied3, svint8_t, -+ z0 = svmls_s8_m (p0, z1, z2, z0), -+ z0 = svmls_m (p0, z1, z2, z0)) -+ -+/* -+** mls_s8_m_untied: -+** movprfx z0, z1 -+** mls z0\.b, p0/m, z2\.b, z3\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mls_s8_m_untied, svint8_t, -+ z0 = svmls_s8_m (p0, z1, z2, z3), -+ z0 = svmls_m (p0, z1, z2, z3)) -+ -+/* -+** mls_w0_s8_m_tied1: -+** mov (z[0-9]+\.b), w0 -+** mls z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_w0_s8_m_tied1, svint8_t, int8_t, -+ z0 = svmls_n_s8_m (p0, z0, z1, x0), -+ z0 = svmls_m (p0, z0, z1, x0)) -+ -+/* -+** mls_w0_s8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), w0 -+** movprfx z0, z1 -+** mls z0\.b, p0/m, z2\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_w0_s8_m_untied, svint8_t, int8_t, -+ z0 = svmls_n_s8_m (p0, z1, z2, x0), -+ z0 = svmls_m (p0, z1, z2, x0)) -+ -+/* -+** mls_11_s8_m_tied1: -+** mov (z[0-9]+\.b), #11 -+** mls z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_s8_m_tied1, svint8_t, -+ z0 = svmls_n_s8_m (p0, z0, z1, 11), -+ z0 = svmls_m (p0, z0, z1, 11)) -+ -+/* -+** mls_11_s8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), #11 -+** movprfx z0, z1 -+** mls z0\.b, p0/m, z2\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_s8_m_untied, svint8_t, -+ z0 = svmls_n_s8_m (p0, z1, z2, 11), -+ z0 = svmls_m (p0, z1, z2, 11)) -+ -+/* -+** mls_s8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** mls z0\.b, p0/m, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mls_s8_z_tied1, svint8_t, -+ z0 = svmls_s8_z (p0, z0, z1, z2), -+ z0 = svmls_z (p0, z0, z1, z2)) -+ -+/* -+** mls_s8_z_tied2: -+** movprfx z0\.b, p0/z, z0\.b -+** msb z0\.b, p0/m, z2\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mls_s8_z_tied2, svint8_t, -+ z0 = svmls_s8_z (p0, z1, z0, z2), -+ z0 = svmls_z (p0, z1, z0, z2)) -+ -+/* -+** mls_s8_z_tied3: -+** movprfx z0\.b, p0/z, z0\.b -+** msb z0\.b, p0/m, z2\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mls_s8_z_tied3, svint8_t, -+ z0 = svmls_s8_z (p0, z1, z2, z0), -+ z0 = svmls_z (p0, z1, z2, z0)) -+ -+/* -+** mls_s8_z_untied: -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** mls z0\.b, p0/m, z2\.b, z3\.b -+** | -+** movprfx z0\.b, p0/z, z2\.b -+** msb z0\.b, p0/m, z3\.b, z1\.b -+** | -+** movprfx z0\.b, p0/z, z3\.b -+** msb z0\.b, p0/m, z2\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mls_s8_z_untied, svint8_t, -+ z0 = svmls_s8_z (p0, z1, z2, z3), -+ z0 = svmls_z (p0, z1, z2, z3)) -+ -+/* -+** mls_w0_s8_z_tied1: -+** mov (z[0-9]+\.b), w0 -+** movprfx z0\.b, p0/z, z0\.b -+** mls z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_w0_s8_z_tied1, svint8_t, int8_t, -+ z0 = svmls_n_s8_z (p0, z0, z1, x0), -+ z0 = svmls_z (p0, z0, z1, x0)) -+ -+/* -+** mls_w0_s8_z_tied2: -+** mov (z[0-9]+\.b), w0 -+** movprfx z0\.b, p0/z, z0\.b -+** msb z0\.b, p0/m, \1, z1\.b -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_w0_s8_z_tied2, svint8_t, int8_t, -+ z0 = svmls_n_s8_z (p0, z1, z0, x0), -+ z0 = svmls_z (p0, z1, z0, x0)) -+ -+/* -+** mls_w0_s8_z_untied: -+** mov (z[0-9]+\.b), w0 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** mls z0\.b, p0/m, z2\.b, \1 -+** | -+** movprfx z0\.b, p0/z, z2\.b -+** msb z0\.b, p0/m, \1, z1\.b -+** | -+** movprfx z0\.b, p0/z, \1 -+** msb z0\.b, p0/m, z2\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_w0_s8_z_untied, svint8_t, int8_t, -+ z0 = svmls_n_s8_z (p0, z1, z2, x0), -+ z0 = svmls_z (p0, z1, z2, x0)) -+ -+/* -+** mls_11_s8_z_tied1: -+** mov (z[0-9]+\.b), #11 -+** movprfx z0\.b, p0/z, z0\.b -+** mls z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_s8_z_tied1, svint8_t, -+ z0 = svmls_n_s8_z (p0, z0, z1, 11), -+ z0 = svmls_z (p0, z0, z1, 11)) -+ -+/* -+** mls_11_s8_z_tied2: -+** mov (z[0-9]+\.b), #11 -+** movprfx z0\.b, p0/z, z0\.b -+** msb z0\.b, p0/m, \1, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_s8_z_tied2, svint8_t, -+ z0 = svmls_n_s8_z (p0, z1, z0, 11), -+ z0 = svmls_z (p0, z1, z0, 11)) -+ -+/* -+** mls_11_s8_z_untied: -+** mov (z[0-9]+\.b), #11 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** mls z0\.b, p0/m, z2\.b, \1 -+** | -+** movprfx z0\.b, p0/z, z2\.b -+** msb z0\.b, p0/m, \1, z1\.b -+** | -+** movprfx z0\.b, p0/z, \1 -+** msb z0\.b, p0/m, z2\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_s8_z_untied, svint8_t, -+ z0 = svmls_n_s8_z (p0, z1, z2, 11), -+ z0 = svmls_z (p0, z1, z2, 11)) -+ -+/* -+** mls_s8_x_tied1: -+** mls z0\.b, p0/m, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mls_s8_x_tied1, svint8_t, -+ z0 = svmls_s8_x (p0, z0, z1, z2), -+ z0 = svmls_x (p0, z0, z1, z2)) -+ -+/* -+** mls_s8_x_tied2: -+** msb z0\.b, p0/m, z2\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mls_s8_x_tied2, svint8_t, -+ z0 = svmls_s8_x (p0, z1, z0, z2), -+ z0 = svmls_x (p0, z1, z0, z2)) -+ -+/* -+** mls_s8_x_tied3: -+** msb z0\.b, p0/m, z2\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mls_s8_x_tied3, svint8_t, -+ z0 = svmls_s8_x (p0, z1, z2, z0), -+ z0 = svmls_x (p0, z1, z2, z0)) -+ -+/* -+** mls_s8_x_untied: -+** ( -+** movprfx z0, z1 -+** mls z0\.b, p0/m, z2\.b, z3\.b -+** | -+** movprfx z0, z2 -+** msb z0\.b, p0/m, z3\.b, z1\.b -+** | -+** movprfx z0, z3 -+** msb z0\.b, p0/m, z2\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mls_s8_x_untied, svint8_t, -+ z0 = svmls_s8_x (p0, z1, z2, z3), -+ z0 = svmls_x (p0, z1, z2, z3)) -+ -+/* -+** mls_w0_s8_x_tied1: -+** mov (z[0-9]+\.b), w0 -+** mls z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_w0_s8_x_tied1, svint8_t, int8_t, -+ z0 = svmls_n_s8_x (p0, z0, z1, x0), -+ z0 = svmls_x (p0, z0, z1, x0)) -+ -+/* -+** mls_w0_s8_x_tied2: -+** mov (z[0-9]+\.b), w0 -+** msb z0\.b, p0/m, \1, z1\.b -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_w0_s8_x_tied2, svint8_t, int8_t, -+ z0 = svmls_n_s8_x (p0, z1, z0, x0), -+ z0 = svmls_x (p0, z1, z0, x0)) -+ -+/* -+** mls_w0_s8_x_untied: -+** mov z0\.b, w0 -+** msb z0\.b, p0/m, z2\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_w0_s8_x_untied, svint8_t, int8_t, -+ z0 = svmls_n_s8_x (p0, z1, z2, x0), -+ z0 = svmls_x (p0, z1, z2, x0)) -+ -+/* -+** mls_11_s8_x_tied1: -+** mov (z[0-9]+\.b), #11 -+** mls z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_s8_x_tied1, svint8_t, -+ z0 = svmls_n_s8_x (p0, z0, z1, 11), -+ z0 = svmls_x (p0, z0, z1, 11)) -+ -+/* -+** mls_11_s8_x_tied2: -+** mov (z[0-9]+\.b), #11 -+** msb z0\.b, p0/m, \1, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_s8_x_tied2, svint8_t, -+ z0 = svmls_n_s8_x (p0, z1, z0, 11), -+ z0 = svmls_x (p0, z1, z0, 11)) -+ -+/* -+** mls_11_s8_x_untied: -+** mov z0\.b, #11 -+** msb z0\.b, p0/m, z2\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_s8_x_untied, svint8_t, -+ z0 = svmls_n_s8_x (p0, z1, z2, 11), -+ z0 = svmls_x (p0, z1, z2, 11)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_u16.c -new file mode 100644 -index 000000000..e8a9f5cd9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_u16.c -@@ -0,0 +1,321 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mls_u16_m_tied1: -+** mls z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mls_u16_m_tied1, svuint16_t, -+ z0 = svmls_u16_m (p0, z0, z1, z2), -+ z0 = svmls_m (p0, z0, z1, z2)) -+ -+/* -+** mls_u16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** mls z0\.h, p0/m, \1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mls_u16_m_tied2, svuint16_t, -+ z0 = svmls_u16_m (p0, z1, z0, z2), -+ z0 = svmls_m (p0, z1, z0, z2)) -+ -+/* -+** mls_u16_m_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** mls z0\.h, p0/m, z2\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mls_u16_m_tied3, svuint16_t, -+ z0 = svmls_u16_m (p0, z1, z2, z0), -+ z0 = svmls_m (p0, z1, z2, z0)) -+ -+/* -+** mls_u16_m_untied: -+** movprfx z0, z1 -+** mls z0\.h, p0/m, z2\.h, z3\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mls_u16_m_untied, svuint16_t, -+ z0 = svmls_u16_m (p0, z1, z2, z3), -+ z0 = svmls_m (p0, z1, z2, z3)) -+ -+/* -+** mls_w0_u16_m_tied1: -+** mov (z[0-9]+\.h), w0 -+** mls z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_w0_u16_m_tied1, svuint16_t, uint16_t, -+ z0 = svmls_n_u16_m (p0, z0, z1, x0), -+ z0 = svmls_m (p0, z0, z1, x0)) -+ -+/* -+** mls_w0_u16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), w0 -+** movprfx z0, z1 -+** mls z0\.h, p0/m, z2\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_w0_u16_m_untied, svuint16_t, uint16_t, -+ z0 = svmls_n_u16_m (p0, z1, z2, x0), -+ z0 = svmls_m (p0, z1, z2, x0)) -+ -+/* -+** mls_11_u16_m_tied1: -+** mov (z[0-9]+\.h), #11 -+** mls z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_u16_m_tied1, svuint16_t, -+ z0 = svmls_n_u16_m (p0, z0, z1, 11), -+ z0 = svmls_m (p0, z0, z1, 11)) -+ -+/* -+** mls_11_u16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), #11 -+** movprfx z0, z1 -+** mls z0\.h, p0/m, z2\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_u16_m_untied, svuint16_t, -+ z0 = svmls_n_u16_m (p0, z1, z2, 11), -+ z0 = svmls_m (p0, z1, z2, 11)) -+ -+/* -+** mls_u16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** mls z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mls_u16_z_tied1, svuint16_t, -+ z0 = svmls_u16_z (p0, z0, z1, z2), -+ z0 = svmls_z (p0, z0, z1, z2)) -+ -+/* -+** mls_u16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** msb z0\.h, p0/m, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mls_u16_z_tied2, svuint16_t, -+ z0 = svmls_u16_z (p0, z1, z0, z2), -+ z0 = svmls_z (p0, z1, z0, z2)) -+ -+/* -+** mls_u16_z_tied3: -+** movprfx z0\.h, p0/z, z0\.h -+** msb z0\.h, p0/m, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mls_u16_z_tied3, svuint16_t, -+ z0 = svmls_u16_z (p0, z1, z2, z0), -+ z0 = svmls_z (p0, z1, z2, z0)) -+ -+/* -+** mls_u16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** mls z0\.h, p0/m, z2\.h, z3\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** msb z0\.h, p0/m, z3\.h, z1\.h -+** | -+** movprfx z0\.h, p0/z, z3\.h -+** msb z0\.h, p0/m, z2\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mls_u16_z_untied, svuint16_t, -+ z0 = svmls_u16_z (p0, z1, z2, z3), -+ z0 = svmls_z (p0, z1, z2, z3)) -+ -+/* -+** mls_w0_u16_z_tied1: -+** mov (z[0-9]+\.h), w0 -+** movprfx z0\.h, p0/z, z0\.h -+** mls z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_w0_u16_z_tied1, svuint16_t, uint16_t, -+ z0 = svmls_n_u16_z (p0, z0, z1, x0), -+ z0 = svmls_z (p0, z0, z1, x0)) -+ -+/* -+** mls_w0_u16_z_tied2: -+** mov (z[0-9]+\.h), w0 -+** movprfx z0\.h, p0/z, z0\.h -+** msb z0\.h, p0/m, \1, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_w0_u16_z_tied2, svuint16_t, uint16_t, -+ z0 = svmls_n_u16_z (p0, z1, z0, x0), -+ z0 = svmls_z (p0, z1, z0, x0)) -+ -+/* -+** mls_w0_u16_z_untied: -+** mov (z[0-9]+\.h), w0 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** mls z0\.h, p0/m, z2\.h, \1 -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** msb z0\.h, p0/m, \1, z1\.h -+** | -+** movprfx z0\.h, p0/z, \1 -+** msb z0\.h, p0/m, z2\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_w0_u16_z_untied, svuint16_t, uint16_t, -+ z0 = svmls_n_u16_z (p0, z1, z2, x0), -+ z0 = svmls_z (p0, z1, z2, x0)) -+ -+/* -+** mls_11_u16_z_tied1: -+** mov (z[0-9]+\.h), #11 -+** movprfx z0\.h, p0/z, z0\.h -+** mls z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_u16_z_tied1, svuint16_t, -+ z0 = svmls_n_u16_z (p0, z0, z1, 11), -+ z0 = svmls_z (p0, z0, z1, 11)) -+ -+/* -+** mls_11_u16_z_tied2: -+** mov (z[0-9]+\.h), #11 -+** movprfx z0\.h, p0/z, z0\.h -+** msb z0\.h, p0/m, \1, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_u16_z_tied2, svuint16_t, -+ z0 = svmls_n_u16_z (p0, z1, z0, 11), -+ z0 = svmls_z (p0, z1, z0, 11)) -+ -+/* -+** mls_11_u16_z_untied: -+** mov (z[0-9]+\.h), #11 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** mls z0\.h, p0/m, z2\.h, \1 -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** msb z0\.h, p0/m, \1, z1\.h -+** | -+** movprfx z0\.h, p0/z, \1 -+** msb z0\.h, p0/m, z2\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_u16_z_untied, svuint16_t, -+ z0 = svmls_n_u16_z (p0, z1, z2, 11), -+ z0 = svmls_z (p0, z1, z2, 11)) -+ -+/* -+** mls_u16_x_tied1: -+** mls z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mls_u16_x_tied1, svuint16_t, -+ z0 = svmls_u16_x (p0, z0, z1, z2), -+ z0 = svmls_x (p0, z0, z1, z2)) -+ -+/* -+** mls_u16_x_tied2: -+** msb z0\.h, p0/m, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mls_u16_x_tied2, svuint16_t, -+ z0 = svmls_u16_x (p0, z1, z0, z2), -+ z0 = svmls_x (p0, z1, z0, z2)) -+ -+/* -+** mls_u16_x_tied3: -+** msb z0\.h, p0/m, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mls_u16_x_tied3, svuint16_t, -+ z0 = svmls_u16_x (p0, z1, z2, z0), -+ z0 = svmls_x (p0, z1, z2, z0)) -+ -+/* -+** mls_u16_x_untied: -+** ( -+** movprfx z0, z1 -+** mls z0\.h, p0/m, z2\.h, z3\.h -+** | -+** movprfx z0, z2 -+** msb z0\.h, p0/m, z3\.h, z1\.h -+** | -+** movprfx z0, z3 -+** msb z0\.h, p0/m, z2\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mls_u16_x_untied, svuint16_t, -+ z0 = svmls_u16_x (p0, z1, z2, z3), -+ z0 = svmls_x (p0, z1, z2, z3)) -+ -+/* -+** mls_w0_u16_x_tied1: -+** mov (z[0-9]+\.h), w0 -+** mls z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_w0_u16_x_tied1, svuint16_t, uint16_t, -+ z0 = svmls_n_u16_x (p0, z0, z1, x0), -+ z0 = svmls_x (p0, z0, z1, x0)) -+ -+/* -+** mls_w0_u16_x_tied2: -+** mov (z[0-9]+\.h), w0 -+** msb z0\.h, p0/m, \1, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_w0_u16_x_tied2, svuint16_t, uint16_t, -+ z0 = svmls_n_u16_x (p0, z1, z0, x0), -+ z0 = svmls_x (p0, z1, z0, x0)) -+ -+/* -+** mls_w0_u16_x_untied: -+** mov z0\.h, w0 -+** msb z0\.h, p0/m, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_w0_u16_x_untied, svuint16_t, uint16_t, -+ z0 = svmls_n_u16_x (p0, z1, z2, x0), -+ z0 = svmls_x (p0, z1, z2, x0)) -+ -+/* -+** mls_11_u16_x_tied1: -+** mov (z[0-9]+\.h), #11 -+** mls z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_u16_x_tied1, svuint16_t, -+ z0 = svmls_n_u16_x (p0, z0, z1, 11), -+ z0 = svmls_x (p0, z0, z1, 11)) -+ -+/* -+** mls_11_u16_x_tied2: -+** mov (z[0-9]+\.h), #11 -+** msb z0\.h, p0/m, \1, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_u16_x_tied2, svuint16_t, -+ z0 = svmls_n_u16_x (p0, z1, z0, 11), -+ z0 = svmls_x (p0, z1, z0, 11)) -+ -+/* -+** mls_11_u16_x_untied: -+** mov z0\.h, #11 -+** msb z0\.h, p0/m, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_u16_x_untied, svuint16_t, -+ z0 = svmls_n_u16_x (p0, z1, z2, 11), -+ z0 = svmls_x (p0, z1, z2, 11)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_u32.c -new file mode 100644 -index 000000000..47e885012 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_u32.c -@@ -0,0 +1,321 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mls_u32_m_tied1: -+** mls z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mls_u32_m_tied1, svuint32_t, -+ z0 = svmls_u32_m (p0, z0, z1, z2), -+ z0 = svmls_m (p0, z0, z1, z2)) -+ -+/* -+** mls_u32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** mls z0\.s, p0/m, \1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mls_u32_m_tied2, svuint32_t, -+ z0 = svmls_u32_m (p0, z1, z0, z2), -+ z0 = svmls_m (p0, z1, z0, z2)) -+ -+/* -+** mls_u32_m_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** mls z0\.s, p0/m, z2\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mls_u32_m_tied3, svuint32_t, -+ z0 = svmls_u32_m (p0, z1, z2, z0), -+ z0 = svmls_m (p0, z1, z2, z0)) -+ -+/* -+** mls_u32_m_untied: -+** movprfx z0, z1 -+** mls z0\.s, p0/m, z2\.s, z3\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mls_u32_m_untied, svuint32_t, -+ z0 = svmls_u32_m (p0, z1, z2, z3), -+ z0 = svmls_m (p0, z1, z2, z3)) -+ -+/* -+** mls_w0_u32_m_tied1: -+** mov (z[0-9]+\.s), w0 -+** mls z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_w0_u32_m_tied1, svuint32_t, uint32_t, -+ z0 = svmls_n_u32_m (p0, z0, z1, x0), -+ z0 = svmls_m (p0, z0, z1, x0)) -+ -+/* -+** mls_w0_u32_m_untied: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0, z1 -+** mls z0\.s, p0/m, z2\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_w0_u32_m_untied, svuint32_t, uint32_t, -+ z0 = svmls_n_u32_m (p0, z1, z2, x0), -+ z0 = svmls_m (p0, z1, z2, x0)) -+ -+/* -+** mls_11_u32_m_tied1: -+** mov (z[0-9]+\.s), #11 -+** mls z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_u32_m_tied1, svuint32_t, -+ z0 = svmls_n_u32_m (p0, z0, z1, 11), -+ z0 = svmls_m (p0, z0, z1, 11)) -+ -+/* -+** mls_11_u32_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.s), #11 -+** movprfx z0, z1 -+** mls z0\.s, p0/m, z2\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_u32_m_untied, svuint32_t, -+ z0 = svmls_n_u32_m (p0, z1, z2, 11), -+ z0 = svmls_m (p0, z1, z2, 11)) -+ -+/* -+** mls_u32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** mls z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mls_u32_z_tied1, svuint32_t, -+ z0 = svmls_u32_z (p0, z0, z1, z2), -+ z0 = svmls_z (p0, z0, z1, z2)) -+ -+/* -+** mls_u32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** msb z0\.s, p0/m, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mls_u32_z_tied2, svuint32_t, -+ z0 = svmls_u32_z (p0, z1, z0, z2), -+ z0 = svmls_z (p0, z1, z0, z2)) -+ -+/* -+** mls_u32_z_tied3: -+** movprfx z0\.s, p0/z, z0\.s -+** msb z0\.s, p0/m, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mls_u32_z_tied3, svuint32_t, -+ z0 = svmls_u32_z (p0, z1, z2, z0), -+ z0 = svmls_z (p0, z1, z2, z0)) -+ -+/* -+** mls_u32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** mls z0\.s, p0/m, z2\.s, z3\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** msb z0\.s, p0/m, z3\.s, z1\.s -+** | -+** movprfx z0\.s, p0/z, z3\.s -+** msb z0\.s, p0/m, z2\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mls_u32_z_untied, svuint32_t, -+ z0 = svmls_u32_z (p0, z1, z2, z3), -+ z0 = svmls_z (p0, z1, z2, z3)) -+ -+/* -+** mls_w0_u32_z_tied1: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** mls z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_w0_u32_z_tied1, svuint32_t, uint32_t, -+ z0 = svmls_n_u32_z (p0, z0, z1, x0), -+ z0 = svmls_z (p0, z0, z1, x0)) -+ -+/* -+** mls_w0_u32_z_tied2: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** msb z0\.s, p0/m, \1, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_w0_u32_z_tied2, svuint32_t, uint32_t, -+ z0 = svmls_n_u32_z (p0, z1, z0, x0), -+ z0 = svmls_z (p0, z1, z0, x0)) -+ -+/* -+** mls_w0_u32_z_untied: -+** mov (z[0-9]+\.s), w0 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** mls z0\.s, p0/m, z2\.s, \1 -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** msb z0\.s, p0/m, \1, z1\.s -+** | -+** movprfx z0\.s, p0/z, \1 -+** msb z0\.s, p0/m, z2\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_w0_u32_z_untied, svuint32_t, uint32_t, -+ z0 = svmls_n_u32_z (p0, z1, z2, x0), -+ z0 = svmls_z (p0, z1, z2, x0)) -+ -+/* -+** mls_11_u32_z_tied1: -+** mov (z[0-9]+\.s), #11 -+** movprfx z0\.s, p0/z, z0\.s -+** mls z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_u32_z_tied1, svuint32_t, -+ z0 = svmls_n_u32_z (p0, z0, z1, 11), -+ z0 = svmls_z (p0, z0, z1, 11)) -+ -+/* -+** mls_11_u32_z_tied2: -+** mov (z[0-9]+\.s), #11 -+** movprfx z0\.s, p0/z, z0\.s -+** msb z0\.s, p0/m, \1, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_u32_z_tied2, svuint32_t, -+ z0 = svmls_n_u32_z (p0, z1, z0, 11), -+ z0 = svmls_z (p0, z1, z0, 11)) -+ -+/* -+** mls_11_u32_z_untied: -+** mov (z[0-9]+\.s), #11 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** mls z0\.s, p0/m, z2\.s, \1 -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** msb z0\.s, p0/m, \1, z1\.s -+** | -+** movprfx z0\.s, p0/z, \1 -+** msb z0\.s, p0/m, z2\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_u32_z_untied, svuint32_t, -+ z0 = svmls_n_u32_z (p0, z1, z2, 11), -+ z0 = svmls_z (p0, z1, z2, 11)) -+ -+/* -+** mls_u32_x_tied1: -+** mls z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mls_u32_x_tied1, svuint32_t, -+ z0 = svmls_u32_x (p0, z0, z1, z2), -+ z0 = svmls_x (p0, z0, z1, z2)) -+ -+/* -+** mls_u32_x_tied2: -+** msb z0\.s, p0/m, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mls_u32_x_tied2, svuint32_t, -+ z0 = svmls_u32_x (p0, z1, z0, z2), -+ z0 = svmls_x (p0, z1, z0, z2)) -+ -+/* -+** mls_u32_x_tied3: -+** msb z0\.s, p0/m, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mls_u32_x_tied3, svuint32_t, -+ z0 = svmls_u32_x (p0, z1, z2, z0), -+ z0 = svmls_x (p0, z1, z2, z0)) -+ -+/* -+** mls_u32_x_untied: -+** ( -+** movprfx z0, z1 -+** mls z0\.s, p0/m, z2\.s, z3\.s -+** | -+** movprfx z0, z2 -+** msb z0\.s, p0/m, z3\.s, z1\.s -+** | -+** movprfx z0, z3 -+** msb z0\.s, p0/m, z2\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mls_u32_x_untied, svuint32_t, -+ z0 = svmls_u32_x (p0, z1, z2, z3), -+ z0 = svmls_x (p0, z1, z2, z3)) -+ -+/* -+** mls_w0_u32_x_tied1: -+** mov (z[0-9]+\.s), w0 -+** mls z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_w0_u32_x_tied1, svuint32_t, uint32_t, -+ z0 = svmls_n_u32_x (p0, z0, z1, x0), -+ z0 = svmls_x (p0, z0, z1, x0)) -+ -+/* -+** mls_w0_u32_x_tied2: -+** mov (z[0-9]+\.s), w0 -+** msb z0\.s, p0/m, \1, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_w0_u32_x_tied2, svuint32_t, uint32_t, -+ z0 = svmls_n_u32_x (p0, z1, z0, x0), -+ z0 = svmls_x (p0, z1, z0, x0)) -+ -+/* -+** mls_w0_u32_x_untied: -+** mov z0\.s, w0 -+** msb z0\.s, p0/m, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_w0_u32_x_untied, svuint32_t, uint32_t, -+ z0 = svmls_n_u32_x (p0, z1, z2, x0), -+ z0 = svmls_x (p0, z1, z2, x0)) -+ -+/* -+** mls_11_u32_x_tied1: -+** mov (z[0-9]+\.s), #11 -+** mls z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_u32_x_tied1, svuint32_t, -+ z0 = svmls_n_u32_x (p0, z0, z1, 11), -+ z0 = svmls_x (p0, z0, z1, 11)) -+ -+/* -+** mls_11_u32_x_tied2: -+** mov (z[0-9]+\.s), #11 -+** msb z0\.s, p0/m, \1, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_u32_x_tied2, svuint32_t, -+ z0 = svmls_n_u32_x (p0, z1, z0, 11), -+ z0 = svmls_x (p0, z1, z0, 11)) -+ -+/* -+** mls_11_u32_x_untied: -+** mov z0\.s, #11 -+** msb z0\.s, p0/m, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_u32_x_untied, svuint32_t, -+ z0 = svmls_n_u32_x (p0, z1, z2, 11), -+ z0 = svmls_x (p0, z1, z2, 11)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_u64.c -new file mode 100644 -index 000000000..4d441b759 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_u64.c -@@ -0,0 +1,321 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mls_u64_m_tied1: -+** mls z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mls_u64_m_tied1, svuint64_t, -+ z0 = svmls_u64_m (p0, z0, z1, z2), -+ z0 = svmls_m (p0, z0, z1, z2)) -+ -+/* -+** mls_u64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** mls z0\.d, p0/m, \1, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mls_u64_m_tied2, svuint64_t, -+ z0 = svmls_u64_m (p0, z1, z0, z2), -+ z0 = svmls_m (p0, z1, z0, z2)) -+ -+/* -+** mls_u64_m_tied3: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** mls z0\.d, p0/m, z2\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mls_u64_m_tied3, svuint64_t, -+ z0 = svmls_u64_m (p0, z1, z2, z0), -+ z0 = svmls_m (p0, z1, z2, z0)) -+ -+/* -+** mls_u64_m_untied: -+** movprfx z0, z1 -+** mls z0\.d, p0/m, z2\.d, z3\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mls_u64_m_untied, svuint64_t, -+ z0 = svmls_u64_m (p0, z1, z2, z3), -+ z0 = svmls_m (p0, z1, z2, z3)) -+ -+/* -+** mls_x0_u64_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** mls z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_x0_u64_m_tied1, svuint64_t, uint64_t, -+ z0 = svmls_n_u64_m (p0, z0, z1, x0), -+ z0 = svmls_m (p0, z0, z1, x0)) -+ -+/* -+** mls_x0_u64_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** mls z0\.d, p0/m, z2\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_x0_u64_m_untied, svuint64_t, uint64_t, -+ z0 = svmls_n_u64_m (p0, z1, z2, x0), -+ z0 = svmls_m (p0, z1, z2, x0)) -+ -+/* -+** mls_11_u64_m_tied1: -+** mov (z[0-9]+\.d), #11 -+** mls z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_u64_m_tied1, svuint64_t, -+ z0 = svmls_n_u64_m (p0, z0, z1, 11), -+ z0 = svmls_m (p0, z0, z1, 11)) -+ -+/* -+** mls_11_u64_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #11 -+** movprfx z0, z1 -+** mls z0\.d, p0/m, z2\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_u64_m_untied, svuint64_t, -+ z0 = svmls_n_u64_m (p0, z1, z2, 11), -+ z0 = svmls_m (p0, z1, z2, 11)) -+ -+/* -+** mls_u64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** mls z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mls_u64_z_tied1, svuint64_t, -+ z0 = svmls_u64_z (p0, z0, z1, z2), -+ z0 = svmls_z (p0, z0, z1, z2)) -+ -+/* -+** mls_u64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** msb z0\.d, p0/m, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mls_u64_z_tied2, svuint64_t, -+ z0 = svmls_u64_z (p0, z1, z0, z2), -+ z0 = svmls_z (p0, z1, z0, z2)) -+ -+/* -+** mls_u64_z_tied3: -+** movprfx z0\.d, p0/z, z0\.d -+** msb z0\.d, p0/m, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mls_u64_z_tied3, svuint64_t, -+ z0 = svmls_u64_z (p0, z1, z2, z0), -+ z0 = svmls_z (p0, z1, z2, z0)) -+ -+/* -+** mls_u64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** mls z0\.d, p0/m, z2\.d, z3\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** msb z0\.d, p0/m, z3\.d, z1\.d -+** | -+** movprfx z0\.d, p0/z, z3\.d -+** msb z0\.d, p0/m, z2\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mls_u64_z_untied, svuint64_t, -+ z0 = svmls_u64_z (p0, z1, z2, z3), -+ z0 = svmls_z (p0, z1, z2, z3)) -+ -+/* -+** mls_x0_u64_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** mls z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_x0_u64_z_tied1, svuint64_t, uint64_t, -+ z0 = svmls_n_u64_z (p0, z0, z1, x0), -+ z0 = svmls_z (p0, z0, z1, x0)) -+ -+/* -+** mls_x0_u64_z_tied2: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** msb z0\.d, p0/m, \1, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_x0_u64_z_tied2, svuint64_t, uint64_t, -+ z0 = svmls_n_u64_z (p0, z1, z0, x0), -+ z0 = svmls_z (p0, z1, z0, x0)) -+ -+/* -+** mls_x0_u64_z_untied: -+** mov (z[0-9]+\.d), x0 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** mls z0\.d, p0/m, z2\.d, \1 -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** msb z0\.d, p0/m, \1, z1\.d -+** | -+** movprfx z0\.d, p0/z, \1 -+** msb z0\.d, p0/m, z2\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_x0_u64_z_untied, svuint64_t, uint64_t, -+ z0 = svmls_n_u64_z (p0, z1, z2, x0), -+ z0 = svmls_z (p0, z1, z2, x0)) -+ -+/* -+** mls_11_u64_z_tied1: -+** mov (z[0-9]+\.d), #11 -+** movprfx z0\.d, p0/z, z0\.d -+** mls z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_u64_z_tied1, svuint64_t, -+ z0 = svmls_n_u64_z (p0, z0, z1, 11), -+ z0 = svmls_z (p0, z0, z1, 11)) -+ -+/* -+** mls_11_u64_z_tied2: -+** mov (z[0-9]+\.d), #11 -+** movprfx z0\.d, p0/z, z0\.d -+** msb z0\.d, p0/m, \1, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_u64_z_tied2, svuint64_t, -+ z0 = svmls_n_u64_z (p0, z1, z0, 11), -+ z0 = svmls_z (p0, z1, z0, 11)) -+ -+/* -+** mls_11_u64_z_untied: -+** mov (z[0-9]+\.d), #11 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** mls z0\.d, p0/m, z2\.d, \1 -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** msb z0\.d, p0/m, \1, z1\.d -+** | -+** movprfx z0\.d, p0/z, \1 -+** msb z0\.d, p0/m, z2\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_u64_z_untied, svuint64_t, -+ z0 = svmls_n_u64_z (p0, z1, z2, 11), -+ z0 = svmls_z (p0, z1, z2, 11)) -+ -+/* -+** mls_u64_x_tied1: -+** mls z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mls_u64_x_tied1, svuint64_t, -+ z0 = svmls_u64_x (p0, z0, z1, z2), -+ z0 = svmls_x (p0, z0, z1, z2)) -+ -+/* -+** mls_u64_x_tied2: -+** msb z0\.d, p0/m, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mls_u64_x_tied2, svuint64_t, -+ z0 = svmls_u64_x (p0, z1, z0, z2), -+ z0 = svmls_x (p0, z1, z0, z2)) -+ -+/* -+** mls_u64_x_tied3: -+** msb z0\.d, p0/m, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mls_u64_x_tied3, svuint64_t, -+ z0 = svmls_u64_x (p0, z1, z2, z0), -+ z0 = svmls_x (p0, z1, z2, z0)) -+ -+/* -+** mls_u64_x_untied: -+** ( -+** movprfx z0, z1 -+** mls z0\.d, p0/m, z2\.d, z3\.d -+** | -+** movprfx z0, z2 -+** msb z0\.d, p0/m, z3\.d, z1\.d -+** | -+** movprfx z0, z3 -+** msb z0\.d, p0/m, z2\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mls_u64_x_untied, svuint64_t, -+ z0 = svmls_u64_x (p0, z1, z2, z3), -+ z0 = svmls_x (p0, z1, z2, z3)) -+ -+/* -+** mls_x0_u64_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** mls z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_x0_u64_x_tied1, svuint64_t, uint64_t, -+ z0 = svmls_n_u64_x (p0, z0, z1, x0), -+ z0 = svmls_x (p0, z0, z1, x0)) -+ -+/* -+** mls_x0_u64_x_tied2: -+** mov (z[0-9]+\.d), x0 -+** msb z0\.d, p0/m, \1, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_x0_u64_x_tied2, svuint64_t, uint64_t, -+ z0 = svmls_n_u64_x (p0, z1, z0, x0), -+ z0 = svmls_x (p0, z1, z0, x0)) -+ -+/* -+** mls_x0_u64_x_untied: -+** mov z0\.d, x0 -+** msb z0\.d, p0/m, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_x0_u64_x_untied, svuint64_t, uint64_t, -+ z0 = svmls_n_u64_x (p0, z1, z2, x0), -+ z0 = svmls_x (p0, z1, z2, x0)) -+ -+/* -+** mls_11_u64_x_tied1: -+** mov (z[0-9]+\.d), #11 -+** mls z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_u64_x_tied1, svuint64_t, -+ z0 = svmls_n_u64_x (p0, z0, z1, 11), -+ z0 = svmls_x (p0, z0, z1, 11)) -+ -+/* -+** mls_11_u64_x_tied2: -+** mov (z[0-9]+\.d), #11 -+** msb z0\.d, p0/m, \1, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_u64_x_tied2, svuint64_t, -+ z0 = svmls_n_u64_x (p0, z1, z0, 11), -+ z0 = svmls_x (p0, z1, z0, 11)) -+ -+/* -+** mls_11_u64_x_untied: -+** mov z0\.d, #11 -+** msb z0\.d, p0/m, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_u64_x_untied, svuint64_t, -+ z0 = svmls_n_u64_x (p0, z1, z2, 11), -+ z0 = svmls_x (p0, z1, z2, 11)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_u8.c -new file mode 100644 -index 000000000..0489aaa7c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mls_u8.c -@@ -0,0 +1,321 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mls_u8_m_tied1: -+** mls z0\.b, p0/m, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mls_u8_m_tied1, svuint8_t, -+ z0 = svmls_u8_m (p0, z0, z1, z2), -+ z0 = svmls_m (p0, z0, z1, z2)) -+ -+/* -+** mls_u8_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** mls z0\.b, p0/m, \1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mls_u8_m_tied2, svuint8_t, -+ z0 = svmls_u8_m (p0, z1, z0, z2), -+ z0 = svmls_m (p0, z1, z0, z2)) -+ -+/* -+** mls_u8_m_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** mls z0\.b, p0/m, z2\.b, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mls_u8_m_tied3, svuint8_t, -+ z0 = svmls_u8_m (p0, z1, z2, z0), -+ z0 = svmls_m (p0, z1, z2, z0)) -+ -+/* -+** mls_u8_m_untied: -+** movprfx z0, z1 -+** mls z0\.b, p0/m, z2\.b, z3\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mls_u8_m_untied, svuint8_t, -+ z0 = svmls_u8_m (p0, z1, z2, z3), -+ z0 = svmls_m (p0, z1, z2, z3)) -+ -+/* -+** mls_w0_u8_m_tied1: -+** mov (z[0-9]+\.b), w0 -+** mls z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_w0_u8_m_tied1, svuint8_t, uint8_t, -+ z0 = svmls_n_u8_m (p0, z0, z1, x0), -+ z0 = svmls_m (p0, z0, z1, x0)) -+ -+/* -+** mls_w0_u8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), w0 -+** movprfx z0, z1 -+** mls z0\.b, p0/m, z2\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_w0_u8_m_untied, svuint8_t, uint8_t, -+ z0 = svmls_n_u8_m (p0, z1, z2, x0), -+ z0 = svmls_m (p0, z1, z2, x0)) -+ -+/* -+** mls_11_u8_m_tied1: -+** mov (z[0-9]+\.b), #11 -+** mls z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_u8_m_tied1, svuint8_t, -+ z0 = svmls_n_u8_m (p0, z0, z1, 11), -+ z0 = svmls_m (p0, z0, z1, 11)) -+ -+/* -+** mls_11_u8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), #11 -+** movprfx z0, z1 -+** mls z0\.b, p0/m, z2\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_u8_m_untied, svuint8_t, -+ z0 = svmls_n_u8_m (p0, z1, z2, 11), -+ z0 = svmls_m (p0, z1, z2, 11)) -+ -+/* -+** mls_u8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** mls z0\.b, p0/m, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mls_u8_z_tied1, svuint8_t, -+ z0 = svmls_u8_z (p0, z0, z1, z2), -+ z0 = svmls_z (p0, z0, z1, z2)) -+ -+/* -+** mls_u8_z_tied2: -+** movprfx z0\.b, p0/z, z0\.b -+** msb z0\.b, p0/m, z2\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mls_u8_z_tied2, svuint8_t, -+ z0 = svmls_u8_z (p0, z1, z0, z2), -+ z0 = svmls_z (p0, z1, z0, z2)) -+ -+/* -+** mls_u8_z_tied3: -+** movprfx z0\.b, p0/z, z0\.b -+** msb z0\.b, p0/m, z2\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mls_u8_z_tied3, svuint8_t, -+ z0 = svmls_u8_z (p0, z1, z2, z0), -+ z0 = svmls_z (p0, z1, z2, z0)) -+ -+/* -+** mls_u8_z_untied: -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** mls z0\.b, p0/m, z2\.b, z3\.b -+** | -+** movprfx z0\.b, p0/z, z2\.b -+** msb z0\.b, p0/m, z3\.b, z1\.b -+** | -+** movprfx z0\.b, p0/z, z3\.b -+** msb z0\.b, p0/m, z2\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mls_u8_z_untied, svuint8_t, -+ z0 = svmls_u8_z (p0, z1, z2, z3), -+ z0 = svmls_z (p0, z1, z2, z3)) -+ -+/* -+** mls_w0_u8_z_tied1: -+** mov (z[0-9]+\.b), w0 -+** movprfx z0\.b, p0/z, z0\.b -+** mls z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_w0_u8_z_tied1, svuint8_t, uint8_t, -+ z0 = svmls_n_u8_z (p0, z0, z1, x0), -+ z0 = svmls_z (p0, z0, z1, x0)) -+ -+/* -+** mls_w0_u8_z_tied2: -+** mov (z[0-9]+\.b), w0 -+** movprfx z0\.b, p0/z, z0\.b -+** msb z0\.b, p0/m, \1, z1\.b -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_w0_u8_z_tied2, svuint8_t, uint8_t, -+ z0 = svmls_n_u8_z (p0, z1, z0, x0), -+ z0 = svmls_z (p0, z1, z0, x0)) -+ -+/* -+** mls_w0_u8_z_untied: -+** mov (z[0-9]+\.b), w0 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** mls z0\.b, p0/m, z2\.b, \1 -+** | -+** movprfx z0\.b, p0/z, z2\.b -+** msb z0\.b, p0/m, \1, z1\.b -+** | -+** movprfx z0\.b, p0/z, \1 -+** msb z0\.b, p0/m, z2\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_w0_u8_z_untied, svuint8_t, uint8_t, -+ z0 = svmls_n_u8_z (p0, z1, z2, x0), -+ z0 = svmls_z (p0, z1, z2, x0)) -+ -+/* -+** mls_11_u8_z_tied1: -+** mov (z[0-9]+\.b), #11 -+** movprfx z0\.b, p0/z, z0\.b -+** mls z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_u8_z_tied1, svuint8_t, -+ z0 = svmls_n_u8_z (p0, z0, z1, 11), -+ z0 = svmls_z (p0, z0, z1, 11)) -+ -+/* -+** mls_11_u8_z_tied2: -+** mov (z[0-9]+\.b), #11 -+** movprfx z0\.b, p0/z, z0\.b -+** msb z0\.b, p0/m, \1, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_u8_z_tied2, svuint8_t, -+ z0 = svmls_n_u8_z (p0, z1, z0, 11), -+ z0 = svmls_z (p0, z1, z0, 11)) -+ -+/* -+** mls_11_u8_z_untied: -+** mov (z[0-9]+\.b), #11 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** mls z0\.b, p0/m, z2\.b, \1 -+** | -+** movprfx z0\.b, p0/z, z2\.b -+** msb z0\.b, p0/m, \1, z1\.b -+** | -+** movprfx z0\.b, p0/z, \1 -+** msb z0\.b, p0/m, z2\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_u8_z_untied, svuint8_t, -+ z0 = svmls_n_u8_z (p0, z1, z2, 11), -+ z0 = svmls_z (p0, z1, z2, 11)) -+ -+/* -+** mls_u8_x_tied1: -+** mls z0\.b, p0/m, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mls_u8_x_tied1, svuint8_t, -+ z0 = svmls_u8_x (p0, z0, z1, z2), -+ z0 = svmls_x (p0, z0, z1, z2)) -+ -+/* -+** mls_u8_x_tied2: -+** msb z0\.b, p0/m, z2\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mls_u8_x_tied2, svuint8_t, -+ z0 = svmls_u8_x (p0, z1, z0, z2), -+ z0 = svmls_x (p0, z1, z0, z2)) -+ -+/* -+** mls_u8_x_tied3: -+** msb z0\.b, p0/m, z2\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mls_u8_x_tied3, svuint8_t, -+ z0 = svmls_u8_x (p0, z1, z2, z0), -+ z0 = svmls_x (p0, z1, z2, z0)) -+ -+/* -+** mls_u8_x_untied: -+** ( -+** movprfx z0, z1 -+** mls z0\.b, p0/m, z2\.b, z3\.b -+** | -+** movprfx z0, z2 -+** msb z0\.b, p0/m, z3\.b, z1\.b -+** | -+** movprfx z0, z3 -+** msb z0\.b, p0/m, z2\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mls_u8_x_untied, svuint8_t, -+ z0 = svmls_u8_x (p0, z1, z2, z3), -+ z0 = svmls_x (p0, z1, z2, z3)) -+ -+/* -+** mls_w0_u8_x_tied1: -+** mov (z[0-9]+\.b), w0 -+** mls z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_w0_u8_x_tied1, svuint8_t, uint8_t, -+ z0 = svmls_n_u8_x (p0, z0, z1, x0), -+ z0 = svmls_x (p0, z0, z1, x0)) -+ -+/* -+** mls_w0_u8_x_tied2: -+** mov (z[0-9]+\.b), w0 -+** msb z0\.b, p0/m, \1, z1\.b -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_w0_u8_x_tied2, svuint8_t, uint8_t, -+ z0 = svmls_n_u8_x (p0, z1, z0, x0), -+ z0 = svmls_x (p0, z1, z0, x0)) -+ -+/* -+** mls_w0_u8_x_untied: -+** mov z0\.b, w0 -+** msb z0\.b, p0/m, z2\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_ZX (mls_w0_u8_x_untied, svuint8_t, uint8_t, -+ z0 = svmls_n_u8_x (p0, z1, z2, x0), -+ z0 = svmls_x (p0, z1, z2, x0)) -+ -+/* -+** mls_11_u8_x_tied1: -+** mov (z[0-9]+\.b), #11 -+** mls z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_u8_x_tied1, svuint8_t, -+ z0 = svmls_n_u8_x (p0, z0, z1, 11), -+ z0 = svmls_x (p0, z0, z1, 11)) -+ -+/* -+** mls_11_u8_x_tied2: -+** mov (z[0-9]+\.b), #11 -+** msb z0\.b, p0/m, \1, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_u8_x_tied2, svuint8_t, -+ z0 = svmls_n_u8_x (p0, z1, z0, 11), -+ z0 = svmls_x (p0, z1, z0, 11)) -+ -+/* -+** mls_11_u8_x_untied: -+** mov z0\.b, #11 -+** msb z0\.b, p0/m, z2\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mls_11_u8_x_untied, svuint8_t, -+ z0 = svmls_n_u8_x (p0, z1, z2, 11), -+ z0 = svmls_x (p0, z1, z2, 11)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_f32.c -new file mode 100644 -index 000000000..f66dbf397 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_f32.c -@@ -0,0 +1,46 @@ -+/* { dg-require-effective-target aarch64_asm_f32mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f32mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mmla_f32_tied1: -+** fmmla z0\.s, z4\.s, z5\.s -+** ret -+*/ -+TEST_DUAL_Z (mmla_f32_tied1, svfloat32_t, svfloat32_t, -+ z0 = svmmla_f32 (z0, z4, z5), -+ z0 = svmmla (z0, z4, z5)) -+ -+/* -+** mmla_f32_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** fmmla z0\.s, \1\.s, z1\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (mmla_f32_tied2, svfloat32_t, svfloat32_t, -+ z0_res = svmmla_f32 (z4, z0, z1), -+ z0_res = svmmla (z4, z0, z1)) -+ -+/* -+** mmla_f32_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** fmmla z0\.s, z1\.s, \1\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (mmla_f32_tied3, svfloat32_t, svfloat32_t, -+ z0_res = svmmla_f32 (z4, z1, z0), -+ z0_res = svmmla (z4, z1, z0)) -+ -+/* -+** mmla_f32_untied: -+** movprfx z0, z1 -+** fmmla z0\.s, z4\.s, z5\.s -+** ret -+*/ -+TEST_DUAL_Z (mmla_f32_untied, svfloat32_t, svfloat32_t, -+ z0 = svmmla_f32 (z1, z4, z5), -+ z0 = svmmla (z1, z4, z5)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_f64.c -new file mode 100644 -index 000000000..49dc0607c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_f64.c -@@ -0,0 +1,46 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mmla_f64_tied1: -+** fmmla z0\.d, z4\.d, z5\.d -+** ret -+*/ -+TEST_DUAL_Z (mmla_f64_tied1, svfloat64_t, svfloat64_t, -+ z0 = svmmla_f64 (z0, z4, z5), -+ z0 = svmmla (z0, z4, z5)) -+ -+/* -+** mmla_f64_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z4 -+** fmmla z0\.d, \1, z1\.d -+** ret -+*/ -+TEST_DUAL_Z_REV (mmla_f64_tied2, svfloat64_t, svfloat64_t, -+ z0_res = svmmla_f64 (z4, z0, z1), -+ z0_res = svmmla (z4, z0, z1)) -+ -+/* -+** mmla_f64_tied3: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z4 -+** fmmla z0\.d, z1\.d, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (mmla_f64_tied3, svfloat64_t, svfloat64_t, -+ z0_res = svmmla_f64 (z4, z1, z0), -+ z0_res = svmmla (z4, z1, z0)) -+ -+/* -+** mmla_f64_untied: -+** movprfx z0, z1 -+** fmmla z0\.d, z4\.d, z5\.d -+** ret -+*/ -+TEST_DUAL_Z (mmla_f64_untied, svfloat64_t, svfloat64_t, -+ z0 = svmmla_f64 (z1, z4, z5), -+ z0 = svmmla (z1, z4, z5)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_s32.c -new file mode 100644 -index 000000000..e7ce009ac ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_s32.c -@@ -0,0 +1,46 @@ -+/* { dg-require-effective-target aarch64_asm_i8mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+sve+i8mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mmla_s32_tied1: -+** smmla z0\.s, z4\.b, z5\.b -+** ret -+*/ -+TEST_DUAL_Z (mmla_s32_tied1, svint32_t, svint8_t, -+ z0 = svmmla_s32 (z0, z4, z5), -+ z0 = svmmla (z0, z4, z5)) -+ -+/* -+** mmla_s32_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** smmla z0\.s, \1\.b, z1\.b -+** ret -+*/ -+TEST_DUAL_Z_REV (mmla_s32_tied2, svint32_t, svint8_t, -+ z0_res = svmmla_s32 (z4, z0, z1), -+ z0_res = svmmla (z4, z0, z1)) -+ -+/* -+** mmla_s32_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** smmla z0\.s, z1\.b, \1\.b -+** ret -+*/ -+TEST_DUAL_Z_REV (mmla_s32_tied3, svint32_t, svint8_t, -+ z0_res = svmmla_s32 (z4, z1, z0), -+ z0_res = svmmla (z4, z1, z0)) -+ -+/* -+** mmla_s32_untied: -+** movprfx z0, z1 -+** smmla z0\.s, z4\.b, z5\.b -+** ret -+*/ -+TEST_DUAL_Z (mmla_s32_untied, svint32_t, svint8_t, -+ z0 = svmmla_s32 (z1, z4, z5), -+ z0 = svmmla (z1, z4, z5)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_u32.c -new file mode 100644 -index 000000000..81f5166fb ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mmla_u32.c -@@ -0,0 +1,46 @@ -+/* { dg-require-effective-target aarch64_asm_i8mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+sve+i8mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mmla_u32_tied1: -+** ummla z0\.s, z4\.b, z5\.b -+** ret -+*/ -+TEST_DUAL_Z (mmla_u32_tied1, svuint32_t, svuint8_t, -+ z0 = svmmla_u32 (z0, z4, z5), -+ z0 = svmmla (z0, z4, z5)) -+ -+/* -+** mmla_u32_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** ummla z0\.s, \1\.b, z1\.b -+** ret -+*/ -+TEST_DUAL_Z_REV (mmla_u32_tied2, svuint32_t, svuint8_t, -+ z0_res = svmmla_u32 (z4, z0, z1), -+ z0_res = svmmla (z4, z0, z1)) -+ -+/* -+** mmla_u32_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** ummla z0\.s, z1\.b, \1\.b -+** ret -+*/ -+TEST_DUAL_Z_REV (mmla_u32_tied3, svuint32_t, svuint8_t, -+ z0_res = svmmla_u32 (z4, z1, z0), -+ z0_res = svmmla (z4, z1, z0)) -+ -+/* -+** mmla_u32_untied: -+** movprfx z0, z1 -+** ummla z0\.s, z4\.b, z5\.b -+** ret -+*/ -+TEST_DUAL_Z (mmla_u32_untied, svuint32_t, svuint8_t, -+ z0 = svmmla_u32 (z1, z4, z5), -+ z0 = svmmla (z1, z4, z5)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mov_b.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mov_b.c -new file mode 100644 -index 000000000..6b78f348f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mov_b.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mov_b_z_tied1: -+** and p0\.b, (?:p3/z, p0\.b, p0\.b|p0/z, p3\.b, p3\.b) -+** ret -+*/ -+TEST_UNIFORM_P (mov_b_z_tied1, -+ p0 = svmov_b_z (p3, p0), -+ p0 = svmov_z (p3, p0)) -+ -+/* -+** mov_b_z_untied: -+** and p0\.b, (?:p3/z, p1\.b, p1\.b|p1/z, p3\.b, p3\.b) -+** ret -+*/ -+TEST_UNIFORM_P (mov_b_z_untied, -+ p0 = svmov_b_z (p3, p1), -+ p0 = svmov_z (p3, p1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_f16.c -new file mode 100644 -index 000000000..fe11457c4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_f16.c -@@ -0,0 +1,398 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** msb_f16_m_tied1: -+** fmsb z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (msb_f16_m_tied1, svfloat16_t, -+ z0 = svmsb_f16_m (p0, z0, z1, z2), -+ z0 = svmsb_m (p0, z0, z1, z2)) -+ -+/* -+** msb_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fmsb z0\.h, p0/m, \1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (msb_f16_m_tied2, svfloat16_t, -+ z0 = svmsb_f16_m (p0, z1, z0, z2), -+ z0 = svmsb_m (p0, z1, z0, z2)) -+ -+/* -+** msb_f16_m_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fmsb z0\.h, p0/m, z2\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (msb_f16_m_tied3, svfloat16_t, -+ z0 = svmsb_f16_m (p0, z1, z2, z0), -+ z0 = svmsb_m (p0, z1, z2, z0)) -+ -+/* -+** msb_f16_m_untied: -+** movprfx z0, z1 -+** fmsb z0\.h, p0/m, z2\.h, z3\.h -+** ret -+*/ -+TEST_UNIFORM_Z (msb_f16_m_untied, svfloat16_t, -+ z0 = svmsb_f16_m (p0, z1, z2, z3), -+ z0 = svmsb_m (p0, z1, z2, z3)) -+ -+/* -+** msb_h4_f16_m_tied1: -+** mov (z[0-9]+\.h), h4 -+** fmsb z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (msb_h4_f16_m_tied1, svfloat16_t, __fp16, -+ z0 = svmsb_n_f16_m (p0, z0, z1, d4), -+ z0 = svmsb_m (p0, z0, z1, d4)) -+ -+/* -+** msb_h4_f16_m_untied: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0, z1 -+** fmsb z0\.h, p0/m, z2\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (msb_h4_f16_m_untied, svfloat16_t, __fp16, -+ z0 = svmsb_n_f16_m (p0, z1, z2, d4), -+ z0 = svmsb_m (p0, z1, z2, d4)) -+ -+/* -+** msb_2_f16_m_tied1: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** fmsb z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_2_f16_m_tied1, svfloat16_t, -+ z0 = svmsb_n_f16_m (p0, z0, z1, 2), -+ z0 = svmsb_m (p0, z0, z1, 2)) -+ -+/* -+** msb_2_f16_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** movprfx z0, z1 -+** fmsb z0\.h, p0/m, z2\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_2_f16_m_untied, svfloat16_t, -+ z0 = svmsb_n_f16_m (p0, z1, z2, 2), -+ z0 = svmsb_m (p0, z1, z2, 2)) -+ -+/* -+** msb_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fmsb z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (msb_f16_z_tied1, svfloat16_t, -+ z0 = svmsb_f16_z (p0, z0, z1, z2), -+ z0 = svmsb_z (p0, z0, z1, z2)) -+ -+/* -+** msb_f16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** fmsb z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (msb_f16_z_tied2, svfloat16_t, -+ z0 = svmsb_f16_z (p0, z1, z0, z2), -+ z0 = svmsb_z (p0, z1, z0, z2)) -+ -+/* -+** msb_f16_z_tied3: -+** movprfx z0\.h, p0/z, z0\.h -+** fmls z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (msb_f16_z_tied3, svfloat16_t, -+ z0 = svmsb_f16_z (p0, z1, z2, z0), -+ z0 = svmsb_z (p0, z1, z2, z0)) -+ -+/* -+** msb_f16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fmsb z0\.h, p0/m, z2\.h, z3\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** fmsb z0\.h, p0/m, z1\.h, z3\.h -+** | -+** movprfx z0\.h, p0/z, z3\.h -+** fmls z0\.h, p0/m, z1\.h, z2\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (msb_f16_z_untied, svfloat16_t, -+ z0 = svmsb_f16_z (p0, z1, z2, z3), -+ z0 = svmsb_z (p0, z1, z2, z3)) -+ -+/* -+** msb_h4_f16_z_tied1: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0\.h, p0/z, z0\.h -+** fmsb z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (msb_h4_f16_z_tied1, svfloat16_t, __fp16, -+ z0 = svmsb_n_f16_z (p0, z0, z1, d4), -+ z0 = svmsb_z (p0, z0, z1, d4)) -+ -+/* -+** msb_h4_f16_z_tied2: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0\.h, p0/z, z0\.h -+** fmsb z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (msb_h4_f16_z_tied2, svfloat16_t, __fp16, -+ z0 = svmsb_n_f16_z (p0, z1, z0, d4), -+ z0 = svmsb_z (p0, z1, z0, d4)) -+ -+/* -+** msb_h4_f16_z_untied: -+** mov (z[0-9]+\.h), h4 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fmsb z0\.h, p0/m, z2\.h, \1 -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** fmsb z0\.h, p0/m, z1\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** fmls z0\.h, p0/m, z1\.h, z2\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (msb_h4_f16_z_untied, svfloat16_t, __fp16, -+ z0 = svmsb_n_f16_z (p0, z1, z2, d4), -+ z0 = svmsb_z (p0, z1, z2, d4)) -+ -+/* -+** msb_2_f16_z_tied1: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** movprfx z0\.h, p0/z, z0\.h -+** fmsb z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_2_f16_z_tied1, svfloat16_t, -+ z0 = svmsb_n_f16_z (p0, z0, z1, 2), -+ z0 = svmsb_z (p0, z0, z1, 2)) -+ -+/* -+** msb_2_f16_z_tied2: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** movprfx z0\.h, p0/z, z0\.h -+** fmsb z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_2_f16_z_tied2, svfloat16_t, -+ z0 = svmsb_n_f16_z (p0, z1, z0, 2), -+ z0 = svmsb_z (p0, z1, z0, 2)) -+ -+/* -+** msb_2_f16_z_untied: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fmsb z0\.h, p0/m, z2\.h, \1 -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** fmsb z0\.h, p0/m, z1\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** fmls z0\.h, p0/m, z1\.h, z2\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (msb_2_f16_z_untied, svfloat16_t, -+ z0 = svmsb_n_f16_z (p0, z1, z2, 2), -+ z0 = svmsb_z (p0, z1, z2, 2)) -+ -+/* -+** msb_f16_x_tied1: -+** fmsb z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (msb_f16_x_tied1, svfloat16_t, -+ z0 = svmsb_f16_x (p0, z0, z1, z2), -+ z0 = svmsb_x (p0, z0, z1, z2)) -+ -+/* -+** msb_f16_x_tied2: -+** fmsb z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (msb_f16_x_tied2, svfloat16_t, -+ z0 = svmsb_f16_x (p0, z1, z0, z2), -+ z0 = svmsb_x (p0, z1, z0, z2)) -+ -+/* -+** msb_f16_x_tied3: -+** fmls z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (msb_f16_x_tied3, svfloat16_t, -+ z0 = svmsb_f16_x (p0, z1, z2, z0), -+ z0 = svmsb_x (p0, z1, z2, z0)) -+ -+/* -+** msb_f16_x_untied: -+** ( -+** movprfx z0, z1 -+** fmsb z0\.h, p0/m, z2\.h, z3\.h -+** | -+** movprfx z0, z2 -+** fmsb z0\.h, p0/m, z1\.h, z3\.h -+** | -+** movprfx z0, z3 -+** fmls z0\.h, p0/m, z1\.h, z2\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (msb_f16_x_untied, svfloat16_t, -+ z0 = svmsb_f16_x (p0, z1, z2, z3), -+ z0 = svmsb_x (p0, z1, z2, z3)) -+ -+/* -+** msb_h4_f16_x_tied1: -+** mov (z[0-9]+\.h), h4 -+** fmsb z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (msb_h4_f16_x_tied1, svfloat16_t, __fp16, -+ z0 = svmsb_n_f16_x (p0, z0, z1, d4), -+ z0 = svmsb_x (p0, z0, z1, d4)) -+ -+/* -+** msb_h4_f16_x_tied2: -+** mov (z[0-9]+\.h), h4 -+** fmsb z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (msb_h4_f16_x_tied2, svfloat16_t, __fp16, -+ z0 = svmsb_n_f16_x (p0, z1, z0, d4), -+ z0 = svmsb_x (p0, z1, z0, d4)) -+ -+/* -+** msb_h4_f16_x_untied: { xfail *-*-* } -+** mov z0\.h, h4 -+** fmls z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_ZD (msb_h4_f16_x_untied, svfloat16_t, __fp16, -+ z0 = svmsb_n_f16_x (p0, z1, z2, d4), -+ z0 = svmsb_x (p0, z1, z2, d4)) -+ -+/* -+** msb_2_f16_x_tied1: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** fmsb z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_2_f16_x_tied1, svfloat16_t, -+ z0 = svmsb_n_f16_x (p0, z0, z1, 2), -+ z0 = svmsb_x (p0, z0, z1, 2)) -+ -+/* -+** msb_2_f16_x_tied2: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** fmsb z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_2_f16_x_tied2, svfloat16_t, -+ z0 = svmsb_n_f16_x (p0, z1, z0, 2), -+ z0 = svmsb_x (p0, z1, z0, 2)) -+ -+/* -+** msb_2_f16_x_untied: -+** fmov z0\.h, #2\.0(?:e\+0)? -+** fmls z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (msb_2_f16_x_untied, svfloat16_t, -+ z0 = svmsb_n_f16_x (p0, z1, z2, 2), -+ z0 = svmsb_x (p0, z1, z2, 2)) -+ -+/* -+** ptrue_msb_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_msb_f16_x_tied1, svfloat16_t, -+ z0 = svmsb_f16_x (svptrue_b16 (), z0, z1, z2), -+ z0 = svmsb_x (svptrue_b16 (), z0, z1, z2)) -+ -+/* -+** ptrue_msb_f16_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_msb_f16_x_tied2, svfloat16_t, -+ z0 = svmsb_f16_x (svptrue_b16 (), z1, z0, z2), -+ z0 = svmsb_x (svptrue_b16 (), z1, z0, z2)) -+ -+/* -+** ptrue_msb_f16_x_tied3: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_msb_f16_x_tied3, svfloat16_t, -+ z0 = svmsb_f16_x (svptrue_b16 (), z1, z2, z0), -+ z0 = svmsb_x (svptrue_b16 (), z1, z2, z0)) -+ -+/* -+** ptrue_msb_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_msb_f16_x_untied, svfloat16_t, -+ z0 = svmsb_f16_x (svptrue_b16 (), z1, z2, z3), -+ z0 = svmsb_x (svptrue_b16 (), z1, z2, z3)) -+ -+/* -+** ptrue_msb_2_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_msb_2_f16_x_tied1, svfloat16_t, -+ z0 = svmsb_n_f16_x (svptrue_b16 (), z0, z1, 2), -+ z0 = svmsb_x (svptrue_b16 (), z0, z1, 2)) -+ -+/* -+** ptrue_msb_2_f16_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_msb_2_f16_x_tied2, svfloat16_t, -+ z0 = svmsb_n_f16_x (svptrue_b16 (), z1, z0, 2), -+ z0 = svmsb_x (svptrue_b16 (), z1, z0, 2)) -+ -+/* -+** ptrue_msb_2_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_msb_2_f16_x_untied, svfloat16_t, -+ z0 = svmsb_n_f16_x (svptrue_b16 (), z1, z2, 2), -+ z0 = svmsb_x (svptrue_b16 (), z1, z2, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_f32.c -new file mode 100644 -index 000000000..f7a9f2767 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_f32.c -@@ -0,0 +1,398 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** msb_f32_m_tied1: -+** fmsb z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (msb_f32_m_tied1, svfloat32_t, -+ z0 = svmsb_f32_m (p0, z0, z1, z2), -+ z0 = svmsb_m (p0, z0, z1, z2)) -+ -+/* -+** msb_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fmsb z0\.s, p0/m, \1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (msb_f32_m_tied2, svfloat32_t, -+ z0 = svmsb_f32_m (p0, z1, z0, z2), -+ z0 = svmsb_m (p0, z1, z0, z2)) -+ -+/* -+** msb_f32_m_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fmsb z0\.s, p0/m, z2\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (msb_f32_m_tied3, svfloat32_t, -+ z0 = svmsb_f32_m (p0, z1, z2, z0), -+ z0 = svmsb_m (p0, z1, z2, z0)) -+ -+/* -+** msb_f32_m_untied: -+** movprfx z0, z1 -+** fmsb z0\.s, p0/m, z2\.s, z3\.s -+** ret -+*/ -+TEST_UNIFORM_Z (msb_f32_m_untied, svfloat32_t, -+ z0 = svmsb_f32_m (p0, z1, z2, z3), -+ z0 = svmsb_m (p0, z1, z2, z3)) -+ -+/* -+** msb_s4_f32_m_tied1: -+** mov (z[0-9]+\.s), s4 -+** fmsb z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (msb_s4_f32_m_tied1, svfloat32_t, float, -+ z0 = svmsb_n_f32_m (p0, z0, z1, d4), -+ z0 = svmsb_m (p0, z0, z1, d4)) -+ -+/* -+** msb_s4_f32_m_untied: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0, z1 -+** fmsb z0\.s, p0/m, z2\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (msb_s4_f32_m_untied, svfloat32_t, float, -+ z0 = svmsb_n_f32_m (p0, z1, z2, d4), -+ z0 = svmsb_m (p0, z1, z2, d4)) -+ -+/* -+** msb_2_f32_m_tied1: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** fmsb z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_2_f32_m_tied1, svfloat32_t, -+ z0 = svmsb_n_f32_m (p0, z0, z1, 2), -+ z0 = svmsb_m (p0, z0, z1, 2)) -+ -+/* -+** msb_2_f32_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** movprfx z0, z1 -+** fmsb z0\.s, p0/m, z2\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_2_f32_m_untied, svfloat32_t, -+ z0 = svmsb_n_f32_m (p0, z1, z2, 2), -+ z0 = svmsb_m (p0, z1, z2, 2)) -+ -+/* -+** msb_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fmsb z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (msb_f32_z_tied1, svfloat32_t, -+ z0 = svmsb_f32_z (p0, z0, z1, z2), -+ z0 = svmsb_z (p0, z0, z1, z2)) -+ -+/* -+** msb_f32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** fmsb z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (msb_f32_z_tied2, svfloat32_t, -+ z0 = svmsb_f32_z (p0, z1, z0, z2), -+ z0 = svmsb_z (p0, z1, z0, z2)) -+ -+/* -+** msb_f32_z_tied3: -+** movprfx z0\.s, p0/z, z0\.s -+** fmls z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (msb_f32_z_tied3, svfloat32_t, -+ z0 = svmsb_f32_z (p0, z1, z2, z0), -+ z0 = svmsb_z (p0, z1, z2, z0)) -+ -+/* -+** msb_f32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fmsb z0\.s, p0/m, z2\.s, z3\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** fmsb z0\.s, p0/m, z1\.s, z3\.s -+** | -+** movprfx z0\.s, p0/z, z3\.s -+** fmls z0\.s, p0/m, z1\.s, z2\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (msb_f32_z_untied, svfloat32_t, -+ z0 = svmsb_f32_z (p0, z1, z2, z3), -+ z0 = svmsb_z (p0, z1, z2, z3)) -+ -+/* -+** msb_s4_f32_z_tied1: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0\.s, p0/z, z0\.s -+** fmsb z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (msb_s4_f32_z_tied1, svfloat32_t, float, -+ z0 = svmsb_n_f32_z (p0, z0, z1, d4), -+ z0 = svmsb_z (p0, z0, z1, d4)) -+ -+/* -+** msb_s4_f32_z_tied2: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0\.s, p0/z, z0\.s -+** fmsb z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (msb_s4_f32_z_tied2, svfloat32_t, float, -+ z0 = svmsb_n_f32_z (p0, z1, z0, d4), -+ z0 = svmsb_z (p0, z1, z0, d4)) -+ -+/* -+** msb_s4_f32_z_untied: -+** mov (z[0-9]+\.s), s4 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fmsb z0\.s, p0/m, z2\.s, \1 -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** fmsb z0\.s, p0/m, z1\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** fmls z0\.s, p0/m, z1\.s, z2\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (msb_s4_f32_z_untied, svfloat32_t, float, -+ z0 = svmsb_n_f32_z (p0, z1, z2, d4), -+ z0 = svmsb_z (p0, z1, z2, d4)) -+ -+/* -+** msb_2_f32_z_tied1: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** movprfx z0\.s, p0/z, z0\.s -+** fmsb z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_2_f32_z_tied1, svfloat32_t, -+ z0 = svmsb_n_f32_z (p0, z0, z1, 2), -+ z0 = svmsb_z (p0, z0, z1, 2)) -+ -+/* -+** msb_2_f32_z_tied2: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** movprfx z0\.s, p0/z, z0\.s -+** fmsb z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_2_f32_z_tied2, svfloat32_t, -+ z0 = svmsb_n_f32_z (p0, z1, z0, 2), -+ z0 = svmsb_z (p0, z1, z0, 2)) -+ -+/* -+** msb_2_f32_z_untied: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fmsb z0\.s, p0/m, z2\.s, \1 -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** fmsb z0\.s, p0/m, z1\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** fmls z0\.s, p0/m, z1\.s, z2\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (msb_2_f32_z_untied, svfloat32_t, -+ z0 = svmsb_n_f32_z (p0, z1, z2, 2), -+ z0 = svmsb_z (p0, z1, z2, 2)) -+ -+/* -+** msb_f32_x_tied1: -+** fmsb z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (msb_f32_x_tied1, svfloat32_t, -+ z0 = svmsb_f32_x (p0, z0, z1, z2), -+ z0 = svmsb_x (p0, z0, z1, z2)) -+ -+/* -+** msb_f32_x_tied2: -+** fmsb z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (msb_f32_x_tied2, svfloat32_t, -+ z0 = svmsb_f32_x (p0, z1, z0, z2), -+ z0 = svmsb_x (p0, z1, z0, z2)) -+ -+/* -+** msb_f32_x_tied3: -+** fmls z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (msb_f32_x_tied3, svfloat32_t, -+ z0 = svmsb_f32_x (p0, z1, z2, z0), -+ z0 = svmsb_x (p0, z1, z2, z0)) -+ -+/* -+** msb_f32_x_untied: -+** ( -+** movprfx z0, z1 -+** fmsb z0\.s, p0/m, z2\.s, z3\.s -+** | -+** movprfx z0, z2 -+** fmsb z0\.s, p0/m, z1\.s, z3\.s -+** | -+** movprfx z0, z3 -+** fmls z0\.s, p0/m, z1\.s, z2\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (msb_f32_x_untied, svfloat32_t, -+ z0 = svmsb_f32_x (p0, z1, z2, z3), -+ z0 = svmsb_x (p0, z1, z2, z3)) -+ -+/* -+** msb_s4_f32_x_tied1: -+** mov (z[0-9]+\.s), s4 -+** fmsb z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (msb_s4_f32_x_tied1, svfloat32_t, float, -+ z0 = svmsb_n_f32_x (p0, z0, z1, d4), -+ z0 = svmsb_x (p0, z0, z1, d4)) -+ -+/* -+** msb_s4_f32_x_tied2: -+** mov (z[0-9]+\.s), s4 -+** fmsb z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (msb_s4_f32_x_tied2, svfloat32_t, float, -+ z0 = svmsb_n_f32_x (p0, z1, z0, d4), -+ z0 = svmsb_x (p0, z1, z0, d4)) -+ -+/* -+** msb_s4_f32_x_untied: { xfail *-*-* } -+** mov z0\.s, s4 -+** fmls z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_ZD (msb_s4_f32_x_untied, svfloat32_t, float, -+ z0 = svmsb_n_f32_x (p0, z1, z2, d4), -+ z0 = svmsb_x (p0, z1, z2, d4)) -+ -+/* -+** msb_2_f32_x_tied1: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** fmsb z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_2_f32_x_tied1, svfloat32_t, -+ z0 = svmsb_n_f32_x (p0, z0, z1, 2), -+ z0 = svmsb_x (p0, z0, z1, 2)) -+ -+/* -+** msb_2_f32_x_tied2: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** fmsb z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_2_f32_x_tied2, svfloat32_t, -+ z0 = svmsb_n_f32_x (p0, z1, z0, 2), -+ z0 = svmsb_x (p0, z1, z0, 2)) -+ -+/* -+** msb_2_f32_x_untied: -+** fmov z0\.s, #2\.0(?:e\+0)? -+** fmls z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (msb_2_f32_x_untied, svfloat32_t, -+ z0 = svmsb_n_f32_x (p0, z1, z2, 2), -+ z0 = svmsb_x (p0, z1, z2, 2)) -+ -+/* -+** ptrue_msb_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_msb_f32_x_tied1, svfloat32_t, -+ z0 = svmsb_f32_x (svptrue_b32 (), z0, z1, z2), -+ z0 = svmsb_x (svptrue_b32 (), z0, z1, z2)) -+ -+/* -+** ptrue_msb_f32_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_msb_f32_x_tied2, svfloat32_t, -+ z0 = svmsb_f32_x (svptrue_b32 (), z1, z0, z2), -+ z0 = svmsb_x (svptrue_b32 (), z1, z0, z2)) -+ -+/* -+** ptrue_msb_f32_x_tied3: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_msb_f32_x_tied3, svfloat32_t, -+ z0 = svmsb_f32_x (svptrue_b32 (), z1, z2, z0), -+ z0 = svmsb_x (svptrue_b32 (), z1, z2, z0)) -+ -+/* -+** ptrue_msb_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_msb_f32_x_untied, svfloat32_t, -+ z0 = svmsb_f32_x (svptrue_b32 (), z1, z2, z3), -+ z0 = svmsb_x (svptrue_b32 (), z1, z2, z3)) -+ -+/* -+** ptrue_msb_2_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_msb_2_f32_x_tied1, svfloat32_t, -+ z0 = svmsb_n_f32_x (svptrue_b32 (), z0, z1, 2), -+ z0 = svmsb_x (svptrue_b32 (), z0, z1, 2)) -+ -+/* -+** ptrue_msb_2_f32_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_msb_2_f32_x_tied2, svfloat32_t, -+ z0 = svmsb_n_f32_x (svptrue_b32 (), z1, z0, 2), -+ z0 = svmsb_x (svptrue_b32 (), z1, z0, 2)) -+ -+/* -+** ptrue_msb_2_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_msb_2_f32_x_untied, svfloat32_t, -+ z0 = svmsb_n_f32_x (svptrue_b32 (), z1, z2, 2), -+ z0 = svmsb_x (svptrue_b32 (), z1, z2, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_f64.c -new file mode 100644 -index 000000000..e3ff414d8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_f64.c -@@ -0,0 +1,398 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** msb_f64_m_tied1: -+** fmsb z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (msb_f64_m_tied1, svfloat64_t, -+ z0 = svmsb_f64_m (p0, z0, z1, z2), -+ z0 = svmsb_m (p0, z0, z1, z2)) -+ -+/* -+** msb_f64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fmsb z0\.d, p0/m, \1, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (msb_f64_m_tied2, svfloat64_t, -+ z0 = svmsb_f64_m (p0, z1, z0, z2), -+ z0 = svmsb_m (p0, z1, z0, z2)) -+ -+/* -+** msb_f64_m_tied3: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fmsb z0\.d, p0/m, z2\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_f64_m_tied3, svfloat64_t, -+ z0 = svmsb_f64_m (p0, z1, z2, z0), -+ z0 = svmsb_m (p0, z1, z2, z0)) -+ -+/* -+** msb_f64_m_untied: -+** movprfx z0, z1 -+** fmsb z0\.d, p0/m, z2\.d, z3\.d -+** ret -+*/ -+TEST_UNIFORM_Z (msb_f64_m_untied, svfloat64_t, -+ z0 = svmsb_f64_m (p0, z1, z2, z3), -+ z0 = svmsb_m (p0, z1, z2, z3)) -+ -+/* -+** msb_d4_f64_m_tied1: -+** mov (z[0-9]+\.d), d4 -+** fmsb z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (msb_d4_f64_m_tied1, svfloat64_t, double, -+ z0 = svmsb_n_f64_m (p0, z0, z1, d4), -+ z0 = svmsb_m (p0, z0, z1, d4)) -+ -+/* -+** msb_d4_f64_m_untied: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0, z1 -+** fmsb z0\.d, p0/m, z2\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (msb_d4_f64_m_untied, svfloat64_t, double, -+ z0 = svmsb_n_f64_m (p0, z1, z2, d4), -+ z0 = svmsb_m (p0, z1, z2, d4)) -+ -+/* -+** msb_2_f64_m_tied1: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** fmsb z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_2_f64_m_tied1, svfloat64_t, -+ z0 = svmsb_n_f64_m (p0, z0, z1, 2), -+ z0 = svmsb_m (p0, z0, z1, 2)) -+ -+/* -+** msb_2_f64_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** movprfx z0, z1 -+** fmsb z0\.d, p0/m, z2\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_2_f64_m_untied, svfloat64_t, -+ z0 = svmsb_n_f64_m (p0, z1, z2, 2), -+ z0 = svmsb_m (p0, z1, z2, 2)) -+ -+/* -+** msb_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fmsb z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (msb_f64_z_tied1, svfloat64_t, -+ z0 = svmsb_f64_z (p0, z0, z1, z2), -+ z0 = svmsb_z (p0, z0, z1, z2)) -+ -+/* -+** msb_f64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** fmsb z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (msb_f64_z_tied2, svfloat64_t, -+ z0 = svmsb_f64_z (p0, z1, z0, z2), -+ z0 = svmsb_z (p0, z1, z0, z2)) -+ -+/* -+** msb_f64_z_tied3: -+** movprfx z0\.d, p0/z, z0\.d -+** fmls z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (msb_f64_z_tied3, svfloat64_t, -+ z0 = svmsb_f64_z (p0, z1, z2, z0), -+ z0 = svmsb_z (p0, z1, z2, z0)) -+ -+/* -+** msb_f64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fmsb z0\.d, p0/m, z2\.d, z3\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** fmsb z0\.d, p0/m, z1\.d, z3\.d -+** | -+** movprfx z0\.d, p0/z, z3\.d -+** fmls z0\.d, p0/m, z1\.d, z2\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (msb_f64_z_untied, svfloat64_t, -+ z0 = svmsb_f64_z (p0, z1, z2, z3), -+ z0 = svmsb_z (p0, z1, z2, z3)) -+ -+/* -+** msb_d4_f64_z_tied1: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0\.d, p0/z, z0\.d -+** fmsb z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (msb_d4_f64_z_tied1, svfloat64_t, double, -+ z0 = svmsb_n_f64_z (p0, z0, z1, d4), -+ z0 = svmsb_z (p0, z0, z1, d4)) -+ -+/* -+** msb_d4_f64_z_tied2: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0\.d, p0/z, z0\.d -+** fmsb z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (msb_d4_f64_z_tied2, svfloat64_t, double, -+ z0 = svmsb_n_f64_z (p0, z1, z0, d4), -+ z0 = svmsb_z (p0, z1, z0, d4)) -+ -+/* -+** msb_d4_f64_z_untied: -+** mov (z[0-9]+\.d), d4 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fmsb z0\.d, p0/m, z2\.d, \1 -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** fmsb z0\.d, p0/m, z1\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** fmls z0\.d, p0/m, z1\.d, z2\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (msb_d4_f64_z_untied, svfloat64_t, double, -+ z0 = svmsb_n_f64_z (p0, z1, z2, d4), -+ z0 = svmsb_z (p0, z1, z2, d4)) -+ -+/* -+** msb_2_f64_z_tied1: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** movprfx z0\.d, p0/z, z0\.d -+** fmsb z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_2_f64_z_tied1, svfloat64_t, -+ z0 = svmsb_n_f64_z (p0, z0, z1, 2), -+ z0 = svmsb_z (p0, z0, z1, 2)) -+ -+/* -+** msb_2_f64_z_tied2: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** movprfx z0\.d, p0/z, z0\.d -+** fmsb z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_2_f64_z_tied2, svfloat64_t, -+ z0 = svmsb_n_f64_z (p0, z1, z0, 2), -+ z0 = svmsb_z (p0, z1, z0, 2)) -+ -+/* -+** msb_2_f64_z_untied: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fmsb z0\.d, p0/m, z2\.d, \1 -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** fmsb z0\.d, p0/m, z1\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** fmls z0\.d, p0/m, z1\.d, z2\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (msb_2_f64_z_untied, svfloat64_t, -+ z0 = svmsb_n_f64_z (p0, z1, z2, 2), -+ z0 = svmsb_z (p0, z1, z2, 2)) -+ -+/* -+** msb_f64_x_tied1: -+** fmsb z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (msb_f64_x_tied1, svfloat64_t, -+ z0 = svmsb_f64_x (p0, z0, z1, z2), -+ z0 = svmsb_x (p0, z0, z1, z2)) -+ -+/* -+** msb_f64_x_tied2: -+** fmsb z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (msb_f64_x_tied2, svfloat64_t, -+ z0 = svmsb_f64_x (p0, z1, z0, z2), -+ z0 = svmsb_x (p0, z1, z0, z2)) -+ -+/* -+** msb_f64_x_tied3: -+** fmls z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (msb_f64_x_tied3, svfloat64_t, -+ z0 = svmsb_f64_x (p0, z1, z2, z0), -+ z0 = svmsb_x (p0, z1, z2, z0)) -+ -+/* -+** msb_f64_x_untied: -+** ( -+** movprfx z0, z1 -+** fmsb z0\.d, p0/m, z2\.d, z3\.d -+** | -+** movprfx z0, z2 -+** fmsb z0\.d, p0/m, z1\.d, z3\.d -+** | -+** movprfx z0, z3 -+** fmls z0\.d, p0/m, z1\.d, z2\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (msb_f64_x_untied, svfloat64_t, -+ z0 = svmsb_f64_x (p0, z1, z2, z3), -+ z0 = svmsb_x (p0, z1, z2, z3)) -+ -+/* -+** msb_d4_f64_x_tied1: -+** mov (z[0-9]+\.d), d4 -+** fmsb z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (msb_d4_f64_x_tied1, svfloat64_t, double, -+ z0 = svmsb_n_f64_x (p0, z0, z1, d4), -+ z0 = svmsb_x (p0, z0, z1, d4)) -+ -+/* -+** msb_d4_f64_x_tied2: -+** mov (z[0-9]+\.d), d4 -+** fmsb z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (msb_d4_f64_x_tied2, svfloat64_t, double, -+ z0 = svmsb_n_f64_x (p0, z1, z0, d4), -+ z0 = svmsb_x (p0, z1, z0, d4)) -+ -+/* -+** msb_d4_f64_x_untied: { xfail *-*-* } -+** mov z0\.d, d4 -+** fmls z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_ZD (msb_d4_f64_x_untied, svfloat64_t, double, -+ z0 = svmsb_n_f64_x (p0, z1, z2, d4), -+ z0 = svmsb_x (p0, z1, z2, d4)) -+ -+/* -+** msb_2_f64_x_tied1: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** fmsb z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_2_f64_x_tied1, svfloat64_t, -+ z0 = svmsb_n_f64_x (p0, z0, z1, 2), -+ z0 = svmsb_x (p0, z0, z1, 2)) -+ -+/* -+** msb_2_f64_x_tied2: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** fmsb z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_2_f64_x_tied2, svfloat64_t, -+ z0 = svmsb_n_f64_x (p0, z1, z0, 2), -+ z0 = svmsb_x (p0, z1, z0, 2)) -+ -+/* -+** msb_2_f64_x_untied: -+** fmov z0\.d, #2\.0(?:e\+0)? -+** fmls z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (msb_2_f64_x_untied, svfloat64_t, -+ z0 = svmsb_n_f64_x (p0, z1, z2, 2), -+ z0 = svmsb_x (p0, z1, z2, 2)) -+ -+/* -+** ptrue_msb_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_msb_f64_x_tied1, svfloat64_t, -+ z0 = svmsb_f64_x (svptrue_b64 (), z0, z1, z2), -+ z0 = svmsb_x (svptrue_b64 (), z0, z1, z2)) -+ -+/* -+** ptrue_msb_f64_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_msb_f64_x_tied2, svfloat64_t, -+ z0 = svmsb_f64_x (svptrue_b64 (), z1, z0, z2), -+ z0 = svmsb_x (svptrue_b64 (), z1, z0, z2)) -+ -+/* -+** ptrue_msb_f64_x_tied3: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_msb_f64_x_tied3, svfloat64_t, -+ z0 = svmsb_f64_x (svptrue_b64 (), z1, z2, z0), -+ z0 = svmsb_x (svptrue_b64 (), z1, z2, z0)) -+ -+/* -+** ptrue_msb_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_msb_f64_x_untied, svfloat64_t, -+ z0 = svmsb_f64_x (svptrue_b64 (), z1, z2, z3), -+ z0 = svmsb_x (svptrue_b64 (), z1, z2, z3)) -+ -+/* -+** ptrue_msb_2_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_msb_2_f64_x_tied1, svfloat64_t, -+ z0 = svmsb_n_f64_x (svptrue_b64 (), z0, z1, 2), -+ z0 = svmsb_x (svptrue_b64 (), z0, z1, 2)) -+ -+/* -+** ptrue_msb_2_f64_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_msb_2_f64_x_tied2, svfloat64_t, -+ z0 = svmsb_n_f64_x (svptrue_b64 (), z1, z0, 2), -+ z0 = svmsb_x (svptrue_b64 (), z1, z0, 2)) -+ -+/* -+** ptrue_msb_2_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_msb_2_f64_x_untied, svfloat64_t, -+ z0 = svmsb_n_f64_x (svptrue_b64 (), z1, z2, 2), -+ z0 = svmsb_x (svptrue_b64 (), z1, z2, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_s16.c -new file mode 100644 -index 000000000..56347cfb9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_s16.c -@@ -0,0 +1,321 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** msb_s16_m_tied1: -+** msb z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (msb_s16_m_tied1, svint16_t, -+ z0 = svmsb_s16_m (p0, z0, z1, z2), -+ z0 = svmsb_m (p0, z0, z1, z2)) -+ -+/* -+** msb_s16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** msb z0\.h, p0/m, \1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (msb_s16_m_tied2, svint16_t, -+ z0 = svmsb_s16_m (p0, z1, z0, z2), -+ z0 = svmsb_m (p0, z1, z0, z2)) -+ -+/* -+** msb_s16_m_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** msb z0\.h, p0/m, z2\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (msb_s16_m_tied3, svint16_t, -+ z0 = svmsb_s16_m (p0, z1, z2, z0), -+ z0 = svmsb_m (p0, z1, z2, z0)) -+ -+/* -+** msb_s16_m_untied: -+** movprfx z0, z1 -+** msb z0\.h, p0/m, z2\.h, z3\.h -+** ret -+*/ -+TEST_UNIFORM_Z (msb_s16_m_untied, svint16_t, -+ z0 = svmsb_s16_m (p0, z1, z2, z3), -+ z0 = svmsb_m (p0, z1, z2, z3)) -+ -+/* -+** msb_w0_s16_m_tied1: -+** mov (z[0-9]+\.h), w0 -+** msb z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_w0_s16_m_tied1, svint16_t, int16_t, -+ z0 = svmsb_n_s16_m (p0, z0, z1, x0), -+ z0 = svmsb_m (p0, z0, z1, x0)) -+ -+/* -+** msb_w0_s16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), w0 -+** movprfx z0, z1 -+** msb z0\.h, p0/m, z2\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_w0_s16_m_untied, svint16_t, int16_t, -+ z0 = svmsb_n_s16_m (p0, z1, z2, x0), -+ z0 = svmsb_m (p0, z1, z2, x0)) -+ -+/* -+** msb_11_s16_m_tied1: -+** mov (z[0-9]+\.h), #11 -+** msb z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_s16_m_tied1, svint16_t, -+ z0 = svmsb_n_s16_m (p0, z0, z1, 11), -+ z0 = svmsb_m (p0, z0, z1, 11)) -+ -+/* -+** msb_11_s16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), #11 -+** movprfx z0, z1 -+** msb z0\.h, p0/m, z2\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_s16_m_untied, svint16_t, -+ z0 = svmsb_n_s16_m (p0, z1, z2, 11), -+ z0 = svmsb_m (p0, z1, z2, 11)) -+ -+/* -+** msb_s16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** msb z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (msb_s16_z_tied1, svint16_t, -+ z0 = svmsb_s16_z (p0, z0, z1, z2), -+ z0 = svmsb_z (p0, z0, z1, z2)) -+ -+/* -+** msb_s16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** msb z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (msb_s16_z_tied2, svint16_t, -+ z0 = svmsb_s16_z (p0, z1, z0, z2), -+ z0 = svmsb_z (p0, z1, z0, z2)) -+ -+/* -+** msb_s16_z_tied3: -+** movprfx z0\.h, p0/z, z0\.h -+** mls z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (msb_s16_z_tied3, svint16_t, -+ z0 = svmsb_s16_z (p0, z1, z2, z0), -+ z0 = svmsb_z (p0, z1, z2, z0)) -+ -+/* -+** msb_s16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** msb z0\.h, p0/m, z2\.h, z3\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** msb z0\.h, p0/m, z1\.h, z3\.h -+** | -+** movprfx z0\.h, p0/z, z3\.h -+** mls z0\.h, p0/m, z1\.h, z2\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (msb_s16_z_untied, svint16_t, -+ z0 = svmsb_s16_z (p0, z1, z2, z3), -+ z0 = svmsb_z (p0, z1, z2, z3)) -+ -+/* -+** msb_w0_s16_z_tied1: -+** mov (z[0-9]+\.h), w0 -+** movprfx z0\.h, p0/z, z0\.h -+** msb z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_w0_s16_z_tied1, svint16_t, int16_t, -+ z0 = svmsb_n_s16_z (p0, z0, z1, x0), -+ z0 = svmsb_z (p0, z0, z1, x0)) -+ -+/* -+** msb_w0_s16_z_tied2: -+** mov (z[0-9]+\.h), w0 -+** movprfx z0\.h, p0/z, z0\.h -+** msb z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_w0_s16_z_tied2, svint16_t, int16_t, -+ z0 = svmsb_n_s16_z (p0, z1, z0, x0), -+ z0 = svmsb_z (p0, z1, z0, x0)) -+ -+/* -+** msb_w0_s16_z_untied: -+** mov (z[0-9]+\.h), w0 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** msb z0\.h, p0/m, z2\.h, \1 -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** msb z0\.h, p0/m, z1\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** mls z0\.h, p0/m, z1\.h, z2\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_w0_s16_z_untied, svint16_t, int16_t, -+ z0 = svmsb_n_s16_z (p0, z1, z2, x0), -+ z0 = svmsb_z (p0, z1, z2, x0)) -+ -+/* -+** msb_11_s16_z_tied1: -+** mov (z[0-9]+\.h), #11 -+** movprfx z0\.h, p0/z, z0\.h -+** msb z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_s16_z_tied1, svint16_t, -+ z0 = svmsb_n_s16_z (p0, z0, z1, 11), -+ z0 = svmsb_z (p0, z0, z1, 11)) -+ -+/* -+** msb_11_s16_z_tied2: -+** mov (z[0-9]+\.h), #11 -+** movprfx z0\.h, p0/z, z0\.h -+** msb z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_s16_z_tied2, svint16_t, -+ z0 = svmsb_n_s16_z (p0, z1, z0, 11), -+ z0 = svmsb_z (p0, z1, z0, 11)) -+ -+/* -+** msb_11_s16_z_untied: -+** mov (z[0-9]+\.h), #11 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** msb z0\.h, p0/m, z2\.h, \1 -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** msb z0\.h, p0/m, z1\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** mls z0\.h, p0/m, z1\.h, z2\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_s16_z_untied, svint16_t, -+ z0 = svmsb_n_s16_z (p0, z1, z2, 11), -+ z0 = svmsb_z (p0, z1, z2, 11)) -+ -+/* -+** msb_s16_x_tied1: -+** msb z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (msb_s16_x_tied1, svint16_t, -+ z0 = svmsb_s16_x (p0, z0, z1, z2), -+ z0 = svmsb_x (p0, z0, z1, z2)) -+ -+/* -+** msb_s16_x_tied2: -+** msb z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (msb_s16_x_tied2, svint16_t, -+ z0 = svmsb_s16_x (p0, z1, z0, z2), -+ z0 = svmsb_x (p0, z1, z0, z2)) -+ -+/* -+** msb_s16_x_tied3: -+** mls z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (msb_s16_x_tied3, svint16_t, -+ z0 = svmsb_s16_x (p0, z1, z2, z0), -+ z0 = svmsb_x (p0, z1, z2, z0)) -+ -+/* -+** msb_s16_x_untied: -+** ( -+** movprfx z0, z1 -+** msb z0\.h, p0/m, z2\.h, z3\.h -+** | -+** movprfx z0, z2 -+** msb z0\.h, p0/m, z1\.h, z3\.h -+** | -+** movprfx z0, z3 -+** mls z0\.h, p0/m, z1\.h, z2\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (msb_s16_x_untied, svint16_t, -+ z0 = svmsb_s16_x (p0, z1, z2, z3), -+ z0 = svmsb_x (p0, z1, z2, z3)) -+ -+/* -+** msb_w0_s16_x_tied1: -+** mov (z[0-9]+\.h), w0 -+** msb z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_w0_s16_x_tied1, svint16_t, int16_t, -+ z0 = svmsb_n_s16_x (p0, z0, z1, x0), -+ z0 = svmsb_x (p0, z0, z1, x0)) -+ -+/* -+** msb_w0_s16_x_tied2: -+** mov (z[0-9]+\.h), w0 -+** msb z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_w0_s16_x_tied2, svint16_t, int16_t, -+ z0 = svmsb_n_s16_x (p0, z1, z0, x0), -+ z0 = svmsb_x (p0, z1, z0, x0)) -+ -+/* -+** msb_w0_s16_x_untied: -+** mov z0\.h, w0 -+** mls z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_w0_s16_x_untied, svint16_t, int16_t, -+ z0 = svmsb_n_s16_x (p0, z1, z2, x0), -+ z0 = svmsb_x (p0, z1, z2, x0)) -+ -+/* -+** msb_11_s16_x_tied1: -+** mov (z[0-9]+\.h), #11 -+** msb z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_s16_x_tied1, svint16_t, -+ z0 = svmsb_n_s16_x (p0, z0, z1, 11), -+ z0 = svmsb_x (p0, z0, z1, 11)) -+ -+/* -+** msb_11_s16_x_tied2: -+** mov (z[0-9]+\.h), #11 -+** msb z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_s16_x_tied2, svint16_t, -+ z0 = svmsb_n_s16_x (p0, z1, z0, 11), -+ z0 = svmsb_x (p0, z1, z0, 11)) -+ -+/* -+** msb_11_s16_x_untied: -+** mov z0\.h, #11 -+** mls z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_s16_x_untied, svint16_t, -+ z0 = svmsb_n_s16_x (p0, z1, z2, 11), -+ z0 = svmsb_x (p0, z1, z2, 11)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_s32.c -new file mode 100644 -index 000000000..fb7a7815b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_s32.c -@@ -0,0 +1,321 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** msb_s32_m_tied1: -+** msb z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (msb_s32_m_tied1, svint32_t, -+ z0 = svmsb_s32_m (p0, z0, z1, z2), -+ z0 = svmsb_m (p0, z0, z1, z2)) -+ -+/* -+** msb_s32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** msb z0\.s, p0/m, \1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (msb_s32_m_tied2, svint32_t, -+ z0 = svmsb_s32_m (p0, z1, z0, z2), -+ z0 = svmsb_m (p0, z1, z0, z2)) -+ -+/* -+** msb_s32_m_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** msb z0\.s, p0/m, z2\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (msb_s32_m_tied3, svint32_t, -+ z0 = svmsb_s32_m (p0, z1, z2, z0), -+ z0 = svmsb_m (p0, z1, z2, z0)) -+ -+/* -+** msb_s32_m_untied: -+** movprfx z0, z1 -+** msb z0\.s, p0/m, z2\.s, z3\.s -+** ret -+*/ -+TEST_UNIFORM_Z (msb_s32_m_untied, svint32_t, -+ z0 = svmsb_s32_m (p0, z1, z2, z3), -+ z0 = svmsb_m (p0, z1, z2, z3)) -+ -+/* -+** msb_w0_s32_m_tied1: -+** mov (z[0-9]+\.s), w0 -+** msb z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_w0_s32_m_tied1, svint32_t, int32_t, -+ z0 = svmsb_n_s32_m (p0, z0, z1, x0), -+ z0 = svmsb_m (p0, z0, z1, x0)) -+ -+/* -+** msb_w0_s32_m_untied: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0, z1 -+** msb z0\.s, p0/m, z2\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_w0_s32_m_untied, svint32_t, int32_t, -+ z0 = svmsb_n_s32_m (p0, z1, z2, x0), -+ z0 = svmsb_m (p0, z1, z2, x0)) -+ -+/* -+** msb_11_s32_m_tied1: -+** mov (z[0-9]+\.s), #11 -+** msb z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_s32_m_tied1, svint32_t, -+ z0 = svmsb_n_s32_m (p0, z0, z1, 11), -+ z0 = svmsb_m (p0, z0, z1, 11)) -+ -+/* -+** msb_11_s32_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.s), #11 -+** movprfx z0, z1 -+** msb z0\.s, p0/m, z2\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_s32_m_untied, svint32_t, -+ z0 = svmsb_n_s32_m (p0, z1, z2, 11), -+ z0 = svmsb_m (p0, z1, z2, 11)) -+ -+/* -+** msb_s32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** msb z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (msb_s32_z_tied1, svint32_t, -+ z0 = svmsb_s32_z (p0, z0, z1, z2), -+ z0 = svmsb_z (p0, z0, z1, z2)) -+ -+/* -+** msb_s32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** msb z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (msb_s32_z_tied2, svint32_t, -+ z0 = svmsb_s32_z (p0, z1, z0, z2), -+ z0 = svmsb_z (p0, z1, z0, z2)) -+ -+/* -+** msb_s32_z_tied3: -+** movprfx z0\.s, p0/z, z0\.s -+** mls z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (msb_s32_z_tied3, svint32_t, -+ z0 = svmsb_s32_z (p0, z1, z2, z0), -+ z0 = svmsb_z (p0, z1, z2, z0)) -+ -+/* -+** msb_s32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** msb z0\.s, p0/m, z2\.s, z3\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** msb z0\.s, p0/m, z1\.s, z3\.s -+** | -+** movprfx z0\.s, p0/z, z3\.s -+** mls z0\.s, p0/m, z1\.s, z2\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (msb_s32_z_untied, svint32_t, -+ z0 = svmsb_s32_z (p0, z1, z2, z3), -+ z0 = svmsb_z (p0, z1, z2, z3)) -+ -+/* -+** msb_w0_s32_z_tied1: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** msb z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_w0_s32_z_tied1, svint32_t, int32_t, -+ z0 = svmsb_n_s32_z (p0, z0, z1, x0), -+ z0 = svmsb_z (p0, z0, z1, x0)) -+ -+/* -+** msb_w0_s32_z_tied2: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** msb z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_w0_s32_z_tied2, svint32_t, int32_t, -+ z0 = svmsb_n_s32_z (p0, z1, z0, x0), -+ z0 = svmsb_z (p0, z1, z0, x0)) -+ -+/* -+** msb_w0_s32_z_untied: -+** mov (z[0-9]+\.s), w0 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** msb z0\.s, p0/m, z2\.s, \1 -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** msb z0\.s, p0/m, z1\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** mls z0\.s, p0/m, z1\.s, z2\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_w0_s32_z_untied, svint32_t, int32_t, -+ z0 = svmsb_n_s32_z (p0, z1, z2, x0), -+ z0 = svmsb_z (p0, z1, z2, x0)) -+ -+/* -+** msb_11_s32_z_tied1: -+** mov (z[0-9]+\.s), #11 -+** movprfx z0\.s, p0/z, z0\.s -+** msb z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_s32_z_tied1, svint32_t, -+ z0 = svmsb_n_s32_z (p0, z0, z1, 11), -+ z0 = svmsb_z (p0, z0, z1, 11)) -+ -+/* -+** msb_11_s32_z_tied2: -+** mov (z[0-9]+\.s), #11 -+** movprfx z0\.s, p0/z, z0\.s -+** msb z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_s32_z_tied2, svint32_t, -+ z0 = svmsb_n_s32_z (p0, z1, z0, 11), -+ z0 = svmsb_z (p0, z1, z0, 11)) -+ -+/* -+** msb_11_s32_z_untied: -+** mov (z[0-9]+\.s), #11 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** msb z0\.s, p0/m, z2\.s, \1 -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** msb z0\.s, p0/m, z1\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** mls z0\.s, p0/m, z1\.s, z2\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_s32_z_untied, svint32_t, -+ z0 = svmsb_n_s32_z (p0, z1, z2, 11), -+ z0 = svmsb_z (p0, z1, z2, 11)) -+ -+/* -+** msb_s32_x_tied1: -+** msb z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (msb_s32_x_tied1, svint32_t, -+ z0 = svmsb_s32_x (p0, z0, z1, z2), -+ z0 = svmsb_x (p0, z0, z1, z2)) -+ -+/* -+** msb_s32_x_tied2: -+** msb z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (msb_s32_x_tied2, svint32_t, -+ z0 = svmsb_s32_x (p0, z1, z0, z2), -+ z0 = svmsb_x (p0, z1, z0, z2)) -+ -+/* -+** msb_s32_x_tied3: -+** mls z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (msb_s32_x_tied3, svint32_t, -+ z0 = svmsb_s32_x (p0, z1, z2, z0), -+ z0 = svmsb_x (p0, z1, z2, z0)) -+ -+/* -+** msb_s32_x_untied: -+** ( -+** movprfx z0, z1 -+** msb z0\.s, p0/m, z2\.s, z3\.s -+** | -+** movprfx z0, z2 -+** msb z0\.s, p0/m, z1\.s, z3\.s -+** | -+** movprfx z0, z3 -+** mls z0\.s, p0/m, z1\.s, z2\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (msb_s32_x_untied, svint32_t, -+ z0 = svmsb_s32_x (p0, z1, z2, z3), -+ z0 = svmsb_x (p0, z1, z2, z3)) -+ -+/* -+** msb_w0_s32_x_tied1: -+** mov (z[0-9]+\.s), w0 -+** msb z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_w0_s32_x_tied1, svint32_t, int32_t, -+ z0 = svmsb_n_s32_x (p0, z0, z1, x0), -+ z0 = svmsb_x (p0, z0, z1, x0)) -+ -+/* -+** msb_w0_s32_x_tied2: -+** mov (z[0-9]+\.s), w0 -+** msb z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_w0_s32_x_tied2, svint32_t, int32_t, -+ z0 = svmsb_n_s32_x (p0, z1, z0, x0), -+ z0 = svmsb_x (p0, z1, z0, x0)) -+ -+/* -+** msb_w0_s32_x_untied: -+** mov z0\.s, w0 -+** mls z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_w0_s32_x_untied, svint32_t, int32_t, -+ z0 = svmsb_n_s32_x (p0, z1, z2, x0), -+ z0 = svmsb_x (p0, z1, z2, x0)) -+ -+/* -+** msb_11_s32_x_tied1: -+** mov (z[0-9]+\.s), #11 -+** msb z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_s32_x_tied1, svint32_t, -+ z0 = svmsb_n_s32_x (p0, z0, z1, 11), -+ z0 = svmsb_x (p0, z0, z1, 11)) -+ -+/* -+** msb_11_s32_x_tied2: -+** mov (z[0-9]+\.s), #11 -+** msb z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_s32_x_tied2, svint32_t, -+ z0 = svmsb_n_s32_x (p0, z1, z0, 11), -+ z0 = svmsb_x (p0, z1, z0, 11)) -+ -+/* -+** msb_11_s32_x_untied: -+** mov z0\.s, #11 -+** mls z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_s32_x_untied, svint32_t, -+ z0 = svmsb_n_s32_x (p0, z1, z2, 11), -+ z0 = svmsb_x (p0, z1, z2, 11)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_s64.c -new file mode 100644 -index 000000000..6829fab36 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_s64.c -@@ -0,0 +1,321 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** msb_s64_m_tied1: -+** msb z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (msb_s64_m_tied1, svint64_t, -+ z0 = svmsb_s64_m (p0, z0, z1, z2), -+ z0 = svmsb_m (p0, z0, z1, z2)) -+ -+/* -+** msb_s64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** msb z0\.d, p0/m, \1, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (msb_s64_m_tied2, svint64_t, -+ z0 = svmsb_s64_m (p0, z1, z0, z2), -+ z0 = svmsb_m (p0, z1, z0, z2)) -+ -+/* -+** msb_s64_m_tied3: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** msb z0\.d, p0/m, z2\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_s64_m_tied3, svint64_t, -+ z0 = svmsb_s64_m (p0, z1, z2, z0), -+ z0 = svmsb_m (p0, z1, z2, z0)) -+ -+/* -+** msb_s64_m_untied: -+** movprfx z0, z1 -+** msb z0\.d, p0/m, z2\.d, z3\.d -+** ret -+*/ -+TEST_UNIFORM_Z (msb_s64_m_untied, svint64_t, -+ z0 = svmsb_s64_m (p0, z1, z2, z3), -+ z0 = svmsb_m (p0, z1, z2, z3)) -+ -+/* -+** msb_x0_s64_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** msb z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_x0_s64_m_tied1, svint64_t, int64_t, -+ z0 = svmsb_n_s64_m (p0, z0, z1, x0), -+ z0 = svmsb_m (p0, z0, z1, x0)) -+ -+/* -+** msb_x0_s64_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** msb z0\.d, p0/m, z2\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_x0_s64_m_untied, svint64_t, int64_t, -+ z0 = svmsb_n_s64_m (p0, z1, z2, x0), -+ z0 = svmsb_m (p0, z1, z2, x0)) -+ -+/* -+** msb_11_s64_m_tied1: -+** mov (z[0-9]+\.d), #11 -+** msb z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_s64_m_tied1, svint64_t, -+ z0 = svmsb_n_s64_m (p0, z0, z1, 11), -+ z0 = svmsb_m (p0, z0, z1, 11)) -+ -+/* -+** msb_11_s64_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #11 -+** movprfx z0, z1 -+** msb z0\.d, p0/m, z2\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_s64_m_untied, svint64_t, -+ z0 = svmsb_n_s64_m (p0, z1, z2, 11), -+ z0 = svmsb_m (p0, z1, z2, 11)) -+ -+/* -+** msb_s64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** msb z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (msb_s64_z_tied1, svint64_t, -+ z0 = svmsb_s64_z (p0, z0, z1, z2), -+ z0 = svmsb_z (p0, z0, z1, z2)) -+ -+/* -+** msb_s64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** msb z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (msb_s64_z_tied2, svint64_t, -+ z0 = svmsb_s64_z (p0, z1, z0, z2), -+ z0 = svmsb_z (p0, z1, z0, z2)) -+ -+/* -+** msb_s64_z_tied3: -+** movprfx z0\.d, p0/z, z0\.d -+** mls z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (msb_s64_z_tied3, svint64_t, -+ z0 = svmsb_s64_z (p0, z1, z2, z0), -+ z0 = svmsb_z (p0, z1, z2, z0)) -+ -+/* -+** msb_s64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** msb z0\.d, p0/m, z2\.d, z3\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** msb z0\.d, p0/m, z1\.d, z3\.d -+** | -+** movprfx z0\.d, p0/z, z3\.d -+** mls z0\.d, p0/m, z1\.d, z2\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (msb_s64_z_untied, svint64_t, -+ z0 = svmsb_s64_z (p0, z1, z2, z3), -+ z0 = svmsb_z (p0, z1, z2, z3)) -+ -+/* -+** msb_x0_s64_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** msb z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_x0_s64_z_tied1, svint64_t, int64_t, -+ z0 = svmsb_n_s64_z (p0, z0, z1, x0), -+ z0 = svmsb_z (p0, z0, z1, x0)) -+ -+/* -+** msb_x0_s64_z_tied2: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** msb z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_x0_s64_z_tied2, svint64_t, int64_t, -+ z0 = svmsb_n_s64_z (p0, z1, z0, x0), -+ z0 = svmsb_z (p0, z1, z0, x0)) -+ -+/* -+** msb_x0_s64_z_untied: -+** mov (z[0-9]+\.d), x0 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** msb z0\.d, p0/m, z2\.d, \1 -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** msb z0\.d, p0/m, z1\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** mls z0\.d, p0/m, z1\.d, z2\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_x0_s64_z_untied, svint64_t, int64_t, -+ z0 = svmsb_n_s64_z (p0, z1, z2, x0), -+ z0 = svmsb_z (p0, z1, z2, x0)) -+ -+/* -+** msb_11_s64_z_tied1: -+** mov (z[0-9]+\.d), #11 -+** movprfx z0\.d, p0/z, z0\.d -+** msb z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_s64_z_tied1, svint64_t, -+ z0 = svmsb_n_s64_z (p0, z0, z1, 11), -+ z0 = svmsb_z (p0, z0, z1, 11)) -+ -+/* -+** msb_11_s64_z_tied2: -+** mov (z[0-9]+\.d), #11 -+** movprfx z0\.d, p0/z, z0\.d -+** msb z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_s64_z_tied2, svint64_t, -+ z0 = svmsb_n_s64_z (p0, z1, z0, 11), -+ z0 = svmsb_z (p0, z1, z0, 11)) -+ -+/* -+** msb_11_s64_z_untied: -+** mov (z[0-9]+\.d), #11 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** msb z0\.d, p0/m, z2\.d, \1 -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** msb z0\.d, p0/m, z1\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** mls z0\.d, p0/m, z1\.d, z2\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_s64_z_untied, svint64_t, -+ z0 = svmsb_n_s64_z (p0, z1, z2, 11), -+ z0 = svmsb_z (p0, z1, z2, 11)) -+ -+/* -+** msb_s64_x_tied1: -+** msb z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (msb_s64_x_tied1, svint64_t, -+ z0 = svmsb_s64_x (p0, z0, z1, z2), -+ z0 = svmsb_x (p0, z0, z1, z2)) -+ -+/* -+** msb_s64_x_tied2: -+** msb z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (msb_s64_x_tied2, svint64_t, -+ z0 = svmsb_s64_x (p0, z1, z0, z2), -+ z0 = svmsb_x (p0, z1, z0, z2)) -+ -+/* -+** msb_s64_x_tied3: -+** mls z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (msb_s64_x_tied3, svint64_t, -+ z0 = svmsb_s64_x (p0, z1, z2, z0), -+ z0 = svmsb_x (p0, z1, z2, z0)) -+ -+/* -+** msb_s64_x_untied: -+** ( -+** movprfx z0, z1 -+** msb z0\.d, p0/m, z2\.d, z3\.d -+** | -+** movprfx z0, z2 -+** msb z0\.d, p0/m, z1\.d, z3\.d -+** | -+** movprfx z0, z3 -+** mls z0\.d, p0/m, z1\.d, z2\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (msb_s64_x_untied, svint64_t, -+ z0 = svmsb_s64_x (p0, z1, z2, z3), -+ z0 = svmsb_x (p0, z1, z2, z3)) -+ -+/* -+** msb_x0_s64_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** msb z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_x0_s64_x_tied1, svint64_t, int64_t, -+ z0 = svmsb_n_s64_x (p0, z0, z1, x0), -+ z0 = svmsb_x (p0, z0, z1, x0)) -+ -+/* -+** msb_x0_s64_x_tied2: -+** mov (z[0-9]+\.d), x0 -+** msb z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_x0_s64_x_tied2, svint64_t, int64_t, -+ z0 = svmsb_n_s64_x (p0, z1, z0, x0), -+ z0 = svmsb_x (p0, z1, z0, x0)) -+ -+/* -+** msb_x0_s64_x_untied: -+** mov z0\.d, x0 -+** mls z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_x0_s64_x_untied, svint64_t, int64_t, -+ z0 = svmsb_n_s64_x (p0, z1, z2, x0), -+ z0 = svmsb_x (p0, z1, z2, x0)) -+ -+/* -+** msb_11_s64_x_tied1: -+** mov (z[0-9]+\.d), #11 -+** msb z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_s64_x_tied1, svint64_t, -+ z0 = svmsb_n_s64_x (p0, z0, z1, 11), -+ z0 = svmsb_x (p0, z0, z1, 11)) -+ -+/* -+** msb_11_s64_x_tied2: -+** mov (z[0-9]+\.d), #11 -+** msb z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_s64_x_tied2, svint64_t, -+ z0 = svmsb_n_s64_x (p0, z1, z0, 11), -+ z0 = svmsb_x (p0, z1, z0, 11)) -+ -+/* -+** msb_11_s64_x_untied: -+** mov z0\.d, #11 -+** mls z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_s64_x_untied, svint64_t, -+ z0 = svmsb_n_s64_x (p0, z1, z2, 11), -+ z0 = svmsb_x (p0, z1, z2, 11)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_s8.c -new file mode 100644 -index 000000000..d7fcafdd0 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_s8.c -@@ -0,0 +1,321 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** msb_s8_m_tied1: -+** msb z0\.b, p0/m, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (msb_s8_m_tied1, svint8_t, -+ z0 = svmsb_s8_m (p0, z0, z1, z2), -+ z0 = svmsb_m (p0, z0, z1, z2)) -+ -+/* -+** msb_s8_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** msb z0\.b, p0/m, \1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (msb_s8_m_tied2, svint8_t, -+ z0 = svmsb_s8_m (p0, z1, z0, z2), -+ z0 = svmsb_m (p0, z1, z0, z2)) -+ -+/* -+** msb_s8_m_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** msb z0\.b, p0/m, z2\.b, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (msb_s8_m_tied3, svint8_t, -+ z0 = svmsb_s8_m (p0, z1, z2, z0), -+ z0 = svmsb_m (p0, z1, z2, z0)) -+ -+/* -+** msb_s8_m_untied: -+** movprfx z0, z1 -+** msb z0\.b, p0/m, z2\.b, z3\.b -+** ret -+*/ -+TEST_UNIFORM_Z (msb_s8_m_untied, svint8_t, -+ z0 = svmsb_s8_m (p0, z1, z2, z3), -+ z0 = svmsb_m (p0, z1, z2, z3)) -+ -+/* -+** msb_w0_s8_m_tied1: -+** mov (z[0-9]+\.b), w0 -+** msb z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_w0_s8_m_tied1, svint8_t, int8_t, -+ z0 = svmsb_n_s8_m (p0, z0, z1, x0), -+ z0 = svmsb_m (p0, z0, z1, x0)) -+ -+/* -+** msb_w0_s8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), w0 -+** movprfx z0, z1 -+** msb z0\.b, p0/m, z2\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_w0_s8_m_untied, svint8_t, int8_t, -+ z0 = svmsb_n_s8_m (p0, z1, z2, x0), -+ z0 = svmsb_m (p0, z1, z2, x0)) -+ -+/* -+** msb_11_s8_m_tied1: -+** mov (z[0-9]+\.b), #11 -+** msb z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_s8_m_tied1, svint8_t, -+ z0 = svmsb_n_s8_m (p0, z0, z1, 11), -+ z0 = svmsb_m (p0, z0, z1, 11)) -+ -+/* -+** msb_11_s8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), #11 -+** movprfx z0, z1 -+** msb z0\.b, p0/m, z2\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_s8_m_untied, svint8_t, -+ z0 = svmsb_n_s8_m (p0, z1, z2, 11), -+ z0 = svmsb_m (p0, z1, z2, 11)) -+ -+/* -+** msb_s8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** msb z0\.b, p0/m, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (msb_s8_z_tied1, svint8_t, -+ z0 = svmsb_s8_z (p0, z0, z1, z2), -+ z0 = svmsb_z (p0, z0, z1, z2)) -+ -+/* -+** msb_s8_z_tied2: -+** movprfx z0\.b, p0/z, z0\.b -+** msb z0\.b, p0/m, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (msb_s8_z_tied2, svint8_t, -+ z0 = svmsb_s8_z (p0, z1, z0, z2), -+ z0 = svmsb_z (p0, z1, z0, z2)) -+ -+/* -+** msb_s8_z_tied3: -+** movprfx z0\.b, p0/z, z0\.b -+** mls z0\.b, p0/m, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (msb_s8_z_tied3, svint8_t, -+ z0 = svmsb_s8_z (p0, z1, z2, z0), -+ z0 = svmsb_z (p0, z1, z2, z0)) -+ -+/* -+** msb_s8_z_untied: -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** msb z0\.b, p0/m, z2\.b, z3\.b -+** | -+** movprfx z0\.b, p0/z, z2\.b -+** msb z0\.b, p0/m, z1\.b, z3\.b -+** | -+** movprfx z0\.b, p0/z, z3\.b -+** mls z0\.b, p0/m, z1\.b, z2\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (msb_s8_z_untied, svint8_t, -+ z0 = svmsb_s8_z (p0, z1, z2, z3), -+ z0 = svmsb_z (p0, z1, z2, z3)) -+ -+/* -+** msb_w0_s8_z_tied1: -+** mov (z[0-9]+\.b), w0 -+** movprfx z0\.b, p0/z, z0\.b -+** msb z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_w0_s8_z_tied1, svint8_t, int8_t, -+ z0 = svmsb_n_s8_z (p0, z0, z1, x0), -+ z0 = svmsb_z (p0, z0, z1, x0)) -+ -+/* -+** msb_w0_s8_z_tied2: -+** mov (z[0-9]+\.b), w0 -+** movprfx z0\.b, p0/z, z0\.b -+** msb z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_w0_s8_z_tied2, svint8_t, int8_t, -+ z0 = svmsb_n_s8_z (p0, z1, z0, x0), -+ z0 = svmsb_z (p0, z1, z0, x0)) -+ -+/* -+** msb_w0_s8_z_untied: -+** mov (z[0-9]+\.b), w0 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** msb z0\.b, p0/m, z2\.b, \1 -+** | -+** movprfx z0\.b, p0/z, z2\.b -+** msb z0\.b, p0/m, z1\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** mls z0\.b, p0/m, z1\.b, z2\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_w0_s8_z_untied, svint8_t, int8_t, -+ z0 = svmsb_n_s8_z (p0, z1, z2, x0), -+ z0 = svmsb_z (p0, z1, z2, x0)) -+ -+/* -+** msb_11_s8_z_tied1: -+** mov (z[0-9]+\.b), #11 -+** movprfx z0\.b, p0/z, z0\.b -+** msb z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_s8_z_tied1, svint8_t, -+ z0 = svmsb_n_s8_z (p0, z0, z1, 11), -+ z0 = svmsb_z (p0, z0, z1, 11)) -+ -+/* -+** msb_11_s8_z_tied2: -+** mov (z[0-9]+\.b), #11 -+** movprfx z0\.b, p0/z, z0\.b -+** msb z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_s8_z_tied2, svint8_t, -+ z0 = svmsb_n_s8_z (p0, z1, z0, 11), -+ z0 = svmsb_z (p0, z1, z0, 11)) -+ -+/* -+** msb_11_s8_z_untied: -+** mov (z[0-9]+\.b), #11 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** msb z0\.b, p0/m, z2\.b, \1 -+** | -+** movprfx z0\.b, p0/z, z2\.b -+** msb z0\.b, p0/m, z1\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** mls z0\.b, p0/m, z1\.b, z2\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_s8_z_untied, svint8_t, -+ z0 = svmsb_n_s8_z (p0, z1, z2, 11), -+ z0 = svmsb_z (p0, z1, z2, 11)) -+ -+/* -+** msb_s8_x_tied1: -+** msb z0\.b, p0/m, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (msb_s8_x_tied1, svint8_t, -+ z0 = svmsb_s8_x (p0, z0, z1, z2), -+ z0 = svmsb_x (p0, z0, z1, z2)) -+ -+/* -+** msb_s8_x_tied2: -+** msb z0\.b, p0/m, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (msb_s8_x_tied2, svint8_t, -+ z0 = svmsb_s8_x (p0, z1, z0, z2), -+ z0 = svmsb_x (p0, z1, z0, z2)) -+ -+/* -+** msb_s8_x_tied3: -+** mls z0\.b, p0/m, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (msb_s8_x_tied3, svint8_t, -+ z0 = svmsb_s8_x (p0, z1, z2, z0), -+ z0 = svmsb_x (p0, z1, z2, z0)) -+ -+/* -+** msb_s8_x_untied: -+** ( -+** movprfx z0, z1 -+** msb z0\.b, p0/m, z2\.b, z3\.b -+** | -+** movprfx z0, z2 -+** msb z0\.b, p0/m, z1\.b, z3\.b -+** | -+** movprfx z0, z3 -+** mls z0\.b, p0/m, z1\.b, z2\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (msb_s8_x_untied, svint8_t, -+ z0 = svmsb_s8_x (p0, z1, z2, z3), -+ z0 = svmsb_x (p0, z1, z2, z3)) -+ -+/* -+** msb_w0_s8_x_tied1: -+** mov (z[0-9]+\.b), w0 -+** msb z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_w0_s8_x_tied1, svint8_t, int8_t, -+ z0 = svmsb_n_s8_x (p0, z0, z1, x0), -+ z0 = svmsb_x (p0, z0, z1, x0)) -+ -+/* -+** msb_w0_s8_x_tied2: -+** mov (z[0-9]+\.b), w0 -+** msb z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_w0_s8_x_tied2, svint8_t, int8_t, -+ z0 = svmsb_n_s8_x (p0, z1, z0, x0), -+ z0 = svmsb_x (p0, z1, z0, x0)) -+ -+/* -+** msb_w0_s8_x_untied: -+** mov z0\.b, w0 -+** mls z0\.b, p0/m, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_w0_s8_x_untied, svint8_t, int8_t, -+ z0 = svmsb_n_s8_x (p0, z1, z2, x0), -+ z0 = svmsb_x (p0, z1, z2, x0)) -+ -+/* -+** msb_11_s8_x_tied1: -+** mov (z[0-9]+\.b), #11 -+** msb z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_s8_x_tied1, svint8_t, -+ z0 = svmsb_n_s8_x (p0, z0, z1, 11), -+ z0 = svmsb_x (p0, z0, z1, 11)) -+ -+/* -+** msb_11_s8_x_tied2: -+** mov (z[0-9]+\.b), #11 -+** msb z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_s8_x_tied2, svint8_t, -+ z0 = svmsb_n_s8_x (p0, z1, z0, 11), -+ z0 = svmsb_x (p0, z1, z0, 11)) -+ -+/* -+** msb_11_s8_x_untied: -+** mov z0\.b, #11 -+** mls z0\.b, p0/m, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_s8_x_untied, svint8_t, -+ z0 = svmsb_n_s8_x (p0, z1, z2, 11), -+ z0 = svmsb_x (p0, z1, z2, 11)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_u16.c -new file mode 100644 -index 000000000..437a96040 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_u16.c -@@ -0,0 +1,321 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** msb_u16_m_tied1: -+** msb z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (msb_u16_m_tied1, svuint16_t, -+ z0 = svmsb_u16_m (p0, z0, z1, z2), -+ z0 = svmsb_m (p0, z0, z1, z2)) -+ -+/* -+** msb_u16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** msb z0\.h, p0/m, \1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (msb_u16_m_tied2, svuint16_t, -+ z0 = svmsb_u16_m (p0, z1, z0, z2), -+ z0 = svmsb_m (p0, z1, z0, z2)) -+ -+/* -+** msb_u16_m_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** msb z0\.h, p0/m, z2\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (msb_u16_m_tied3, svuint16_t, -+ z0 = svmsb_u16_m (p0, z1, z2, z0), -+ z0 = svmsb_m (p0, z1, z2, z0)) -+ -+/* -+** msb_u16_m_untied: -+** movprfx z0, z1 -+** msb z0\.h, p0/m, z2\.h, z3\.h -+** ret -+*/ -+TEST_UNIFORM_Z (msb_u16_m_untied, svuint16_t, -+ z0 = svmsb_u16_m (p0, z1, z2, z3), -+ z0 = svmsb_m (p0, z1, z2, z3)) -+ -+/* -+** msb_w0_u16_m_tied1: -+** mov (z[0-9]+\.h), w0 -+** msb z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_w0_u16_m_tied1, svuint16_t, uint16_t, -+ z0 = svmsb_n_u16_m (p0, z0, z1, x0), -+ z0 = svmsb_m (p0, z0, z1, x0)) -+ -+/* -+** msb_w0_u16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), w0 -+** movprfx z0, z1 -+** msb z0\.h, p0/m, z2\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_w0_u16_m_untied, svuint16_t, uint16_t, -+ z0 = svmsb_n_u16_m (p0, z1, z2, x0), -+ z0 = svmsb_m (p0, z1, z2, x0)) -+ -+/* -+** msb_11_u16_m_tied1: -+** mov (z[0-9]+\.h), #11 -+** msb z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_u16_m_tied1, svuint16_t, -+ z0 = svmsb_n_u16_m (p0, z0, z1, 11), -+ z0 = svmsb_m (p0, z0, z1, 11)) -+ -+/* -+** msb_11_u16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), #11 -+** movprfx z0, z1 -+** msb z0\.h, p0/m, z2\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_u16_m_untied, svuint16_t, -+ z0 = svmsb_n_u16_m (p0, z1, z2, 11), -+ z0 = svmsb_m (p0, z1, z2, 11)) -+ -+/* -+** msb_u16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** msb z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (msb_u16_z_tied1, svuint16_t, -+ z0 = svmsb_u16_z (p0, z0, z1, z2), -+ z0 = svmsb_z (p0, z0, z1, z2)) -+ -+/* -+** msb_u16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** msb z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (msb_u16_z_tied2, svuint16_t, -+ z0 = svmsb_u16_z (p0, z1, z0, z2), -+ z0 = svmsb_z (p0, z1, z0, z2)) -+ -+/* -+** msb_u16_z_tied3: -+** movprfx z0\.h, p0/z, z0\.h -+** mls z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (msb_u16_z_tied3, svuint16_t, -+ z0 = svmsb_u16_z (p0, z1, z2, z0), -+ z0 = svmsb_z (p0, z1, z2, z0)) -+ -+/* -+** msb_u16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** msb z0\.h, p0/m, z2\.h, z3\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** msb z0\.h, p0/m, z1\.h, z3\.h -+** | -+** movprfx z0\.h, p0/z, z3\.h -+** mls z0\.h, p0/m, z1\.h, z2\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (msb_u16_z_untied, svuint16_t, -+ z0 = svmsb_u16_z (p0, z1, z2, z3), -+ z0 = svmsb_z (p0, z1, z2, z3)) -+ -+/* -+** msb_w0_u16_z_tied1: -+** mov (z[0-9]+\.h), w0 -+** movprfx z0\.h, p0/z, z0\.h -+** msb z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_w0_u16_z_tied1, svuint16_t, uint16_t, -+ z0 = svmsb_n_u16_z (p0, z0, z1, x0), -+ z0 = svmsb_z (p0, z0, z1, x0)) -+ -+/* -+** msb_w0_u16_z_tied2: -+** mov (z[0-9]+\.h), w0 -+** movprfx z0\.h, p0/z, z0\.h -+** msb z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_w0_u16_z_tied2, svuint16_t, uint16_t, -+ z0 = svmsb_n_u16_z (p0, z1, z0, x0), -+ z0 = svmsb_z (p0, z1, z0, x0)) -+ -+/* -+** msb_w0_u16_z_untied: -+** mov (z[0-9]+\.h), w0 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** msb z0\.h, p0/m, z2\.h, \1 -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** msb z0\.h, p0/m, z1\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** mls z0\.h, p0/m, z1\.h, z2\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_w0_u16_z_untied, svuint16_t, uint16_t, -+ z0 = svmsb_n_u16_z (p0, z1, z2, x0), -+ z0 = svmsb_z (p0, z1, z2, x0)) -+ -+/* -+** msb_11_u16_z_tied1: -+** mov (z[0-9]+\.h), #11 -+** movprfx z0\.h, p0/z, z0\.h -+** msb z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_u16_z_tied1, svuint16_t, -+ z0 = svmsb_n_u16_z (p0, z0, z1, 11), -+ z0 = svmsb_z (p0, z0, z1, 11)) -+ -+/* -+** msb_11_u16_z_tied2: -+** mov (z[0-9]+\.h), #11 -+** movprfx z0\.h, p0/z, z0\.h -+** msb z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_u16_z_tied2, svuint16_t, -+ z0 = svmsb_n_u16_z (p0, z1, z0, 11), -+ z0 = svmsb_z (p0, z1, z0, 11)) -+ -+/* -+** msb_11_u16_z_untied: -+** mov (z[0-9]+\.h), #11 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** msb z0\.h, p0/m, z2\.h, \1 -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** msb z0\.h, p0/m, z1\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** mls z0\.h, p0/m, z1\.h, z2\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_u16_z_untied, svuint16_t, -+ z0 = svmsb_n_u16_z (p0, z1, z2, 11), -+ z0 = svmsb_z (p0, z1, z2, 11)) -+ -+/* -+** msb_u16_x_tied1: -+** msb z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (msb_u16_x_tied1, svuint16_t, -+ z0 = svmsb_u16_x (p0, z0, z1, z2), -+ z0 = svmsb_x (p0, z0, z1, z2)) -+ -+/* -+** msb_u16_x_tied2: -+** msb z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (msb_u16_x_tied2, svuint16_t, -+ z0 = svmsb_u16_x (p0, z1, z0, z2), -+ z0 = svmsb_x (p0, z1, z0, z2)) -+ -+/* -+** msb_u16_x_tied3: -+** mls z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (msb_u16_x_tied3, svuint16_t, -+ z0 = svmsb_u16_x (p0, z1, z2, z0), -+ z0 = svmsb_x (p0, z1, z2, z0)) -+ -+/* -+** msb_u16_x_untied: -+** ( -+** movprfx z0, z1 -+** msb z0\.h, p0/m, z2\.h, z3\.h -+** | -+** movprfx z0, z2 -+** msb z0\.h, p0/m, z1\.h, z3\.h -+** | -+** movprfx z0, z3 -+** mls z0\.h, p0/m, z1\.h, z2\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (msb_u16_x_untied, svuint16_t, -+ z0 = svmsb_u16_x (p0, z1, z2, z3), -+ z0 = svmsb_x (p0, z1, z2, z3)) -+ -+/* -+** msb_w0_u16_x_tied1: -+** mov (z[0-9]+\.h), w0 -+** msb z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_w0_u16_x_tied1, svuint16_t, uint16_t, -+ z0 = svmsb_n_u16_x (p0, z0, z1, x0), -+ z0 = svmsb_x (p0, z0, z1, x0)) -+ -+/* -+** msb_w0_u16_x_tied2: -+** mov (z[0-9]+\.h), w0 -+** msb z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_w0_u16_x_tied2, svuint16_t, uint16_t, -+ z0 = svmsb_n_u16_x (p0, z1, z0, x0), -+ z0 = svmsb_x (p0, z1, z0, x0)) -+ -+/* -+** msb_w0_u16_x_untied: -+** mov z0\.h, w0 -+** mls z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_w0_u16_x_untied, svuint16_t, uint16_t, -+ z0 = svmsb_n_u16_x (p0, z1, z2, x0), -+ z0 = svmsb_x (p0, z1, z2, x0)) -+ -+/* -+** msb_11_u16_x_tied1: -+** mov (z[0-9]+\.h), #11 -+** msb z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_u16_x_tied1, svuint16_t, -+ z0 = svmsb_n_u16_x (p0, z0, z1, 11), -+ z0 = svmsb_x (p0, z0, z1, 11)) -+ -+/* -+** msb_11_u16_x_tied2: -+** mov (z[0-9]+\.h), #11 -+** msb z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_u16_x_tied2, svuint16_t, -+ z0 = svmsb_n_u16_x (p0, z1, z0, 11), -+ z0 = svmsb_x (p0, z1, z0, 11)) -+ -+/* -+** msb_11_u16_x_untied: -+** mov z0\.h, #11 -+** mls z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_u16_x_untied, svuint16_t, -+ z0 = svmsb_n_u16_x (p0, z1, z2, 11), -+ z0 = svmsb_x (p0, z1, z2, 11)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_u32.c -new file mode 100644 -index 000000000..aaaf0344a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_u32.c -@@ -0,0 +1,321 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** msb_u32_m_tied1: -+** msb z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (msb_u32_m_tied1, svuint32_t, -+ z0 = svmsb_u32_m (p0, z0, z1, z2), -+ z0 = svmsb_m (p0, z0, z1, z2)) -+ -+/* -+** msb_u32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** msb z0\.s, p0/m, \1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (msb_u32_m_tied2, svuint32_t, -+ z0 = svmsb_u32_m (p0, z1, z0, z2), -+ z0 = svmsb_m (p0, z1, z0, z2)) -+ -+/* -+** msb_u32_m_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** msb z0\.s, p0/m, z2\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (msb_u32_m_tied3, svuint32_t, -+ z0 = svmsb_u32_m (p0, z1, z2, z0), -+ z0 = svmsb_m (p0, z1, z2, z0)) -+ -+/* -+** msb_u32_m_untied: -+** movprfx z0, z1 -+** msb z0\.s, p0/m, z2\.s, z3\.s -+** ret -+*/ -+TEST_UNIFORM_Z (msb_u32_m_untied, svuint32_t, -+ z0 = svmsb_u32_m (p0, z1, z2, z3), -+ z0 = svmsb_m (p0, z1, z2, z3)) -+ -+/* -+** msb_w0_u32_m_tied1: -+** mov (z[0-9]+\.s), w0 -+** msb z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_w0_u32_m_tied1, svuint32_t, uint32_t, -+ z0 = svmsb_n_u32_m (p0, z0, z1, x0), -+ z0 = svmsb_m (p0, z0, z1, x0)) -+ -+/* -+** msb_w0_u32_m_untied: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0, z1 -+** msb z0\.s, p0/m, z2\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_w0_u32_m_untied, svuint32_t, uint32_t, -+ z0 = svmsb_n_u32_m (p0, z1, z2, x0), -+ z0 = svmsb_m (p0, z1, z2, x0)) -+ -+/* -+** msb_11_u32_m_tied1: -+** mov (z[0-9]+\.s), #11 -+** msb z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_u32_m_tied1, svuint32_t, -+ z0 = svmsb_n_u32_m (p0, z0, z1, 11), -+ z0 = svmsb_m (p0, z0, z1, 11)) -+ -+/* -+** msb_11_u32_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.s), #11 -+** movprfx z0, z1 -+** msb z0\.s, p0/m, z2\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_u32_m_untied, svuint32_t, -+ z0 = svmsb_n_u32_m (p0, z1, z2, 11), -+ z0 = svmsb_m (p0, z1, z2, 11)) -+ -+/* -+** msb_u32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** msb z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (msb_u32_z_tied1, svuint32_t, -+ z0 = svmsb_u32_z (p0, z0, z1, z2), -+ z0 = svmsb_z (p0, z0, z1, z2)) -+ -+/* -+** msb_u32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** msb z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (msb_u32_z_tied2, svuint32_t, -+ z0 = svmsb_u32_z (p0, z1, z0, z2), -+ z0 = svmsb_z (p0, z1, z0, z2)) -+ -+/* -+** msb_u32_z_tied3: -+** movprfx z0\.s, p0/z, z0\.s -+** mls z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (msb_u32_z_tied3, svuint32_t, -+ z0 = svmsb_u32_z (p0, z1, z2, z0), -+ z0 = svmsb_z (p0, z1, z2, z0)) -+ -+/* -+** msb_u32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** msb z0\.s, p0/m, z2\.s, z3\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** msb z0\.s, p0/m, z1\.s, z3\.s -+** | -+** movprfx z0\.s, p0/z, z3\.s -+** mls z0\.s, p0/m, z1\.s, z2\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (msb_u32_z_untied, svuint32_t, -+ z0 = svmsb_u32_z (p0, z1, z2, z3), -+ z0 = svmsb_z (p0, z1, z2, z3)) -+ -+/* -+** msb_w0_u32_z_tied1: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** msb z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_w0_u32_z_tied1, svuint32_t, uint32_t, -+ z0 = svmsb_n_u32_z (p0, z0, z1, x0), -+ z0 = svmsb_z (p0, z0, z1, x0)) -+ -+/* -+** msb_w0_u32_z_tied2: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** msb z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_w0_u32_z_tied2, svuint32_t, uint32_t, -+ z0 = svmsb_n_u32_z (p0, z1, z0, x0), -+ z0 = svmsb_z (p0, z1, z0, x0)) -+ -+/* -+** msb_w0_u32_z_untied: -+** mov (z[0-9]+\.s), w0 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** msb z0\.s, p0/m, z2\.s, \1 -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** msb z0\.s, p0/m, z1\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** mls z0\.s, p0/m, z1\.s, z2\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_w0_u32_z_untied, svuint32_t, uint32_t, -+ z0 = svmsb_n_u32_z (p0, z1, z2, x0), -+ z0 = svmsb_z (p0, z1, z2, x0)) -+ -+/* -+** msb_11_u32_z_tied1: -+** mov (z[0-9]+\.s), #11 -+** movprfx z0\.s, p0/z, z0\.s -+** msb z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_u32_z_tied1, svuint32_t, -+ z0 = svmsb_n_u32_z (p0, z0, z1, 11), -+ z0 = svmsb_z (p0, z0, z1, 11)) -+ -+/* -+** msb_11_u32_z_tied2: -+** mov (z[0-9]+\.s), #11 -+** movprfx z0\.s, p0/z, z0\.s -+** msb z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_u32_z_tied2, svuint32_t, -+ z0 = svmsb_n_u32_z (p0, z1, z0, 11), -+ z0 = svmsb_z (p0, z1, z0, 11)) -+ -+/* -+** msb_11_u32_z_untied: -+** mov (z[0-9]+\.s), #11 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** msb z0\.s, p0/m, z2\.s, \1 -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** msb z0\.s, p0/m, z1\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** mls z0\.s, p0/m, z1\.s, z2\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_u32_z_untied, svuint32_t, -+ z0 = svmsb_n_u32_z (p0, z1, z2, 11), -+ z0 = svmsb_z (p0, z1, z2, 11)) -+ -+/* -+** msb_u32_x_tied1: -+** msb z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (msb_u32_x_tied1, svuint32_t, -+ z0 = svmsb_u32_x (p0, z0, z1, z2), -+ z0 = svmsb_x (p0, z0, z1, z2)) -+ -+/* -+** msb_u32_x_tied2: -+** msb z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (msb_u32_x_tied2, svuint32_t, -+ z0 = svmsb_u32_x (p0, z1, z0, z2), -+ z0 = svmsb_x (p0, z1, z0, z2)) -+ -+/* -+** msb_u32_x_tied3: -+** mls z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (msb_u32_x_tied3, svuint32_t, -+ z0 = svmsb_u32_x (p0, z1, z2, z0), -+ z0 = svmsb_x (p0, z1, z2, z0)) -+ -+/* -+** msb_u32_x_untied: -+** ( -+** movprfx z0, z1 -+** msb z0\.s, p0/m, z2\.s, z3\.s -+** | -+** movprfx z0, z2 -+** msb z0\.s, p0/m, z1\.s, z3\.s -+** | -+** movprfx z0, z3 -+** mls z0\.s, p0/m, z1\.s, z2\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (msb_u32_x_untied, svuint32_t, -+ z0 = svmsb_u32_x (p0, z1, z2, z3), -+ z0 = svmsb_x (p0, z1, z2, z3)) -+ -+/* -+** msb_w0_u32_x_tied1: -+** mov (z[0-9]+\.s), w0 -+** msb z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_w0_u32_x_tied1, svuint32_t, uint32_t, -+ z0 = svmsb_n_u32_x (p0, z0, z1, x0), -+ z0 = svmsb_x (p0, z0, z1, x0)) -+ -+/* -+** msb_w0_u32_x_tied2: -+** mov (z[0-9]+\.s), w0 -+** msb z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_w0_u32_x_tied2, svuint32_t, uint32_t, -+ z0 = svmsb_n_u32_x (p0, z1, z0, x0), -+ z0 = svmsb_x (p0, z1, z0, x0)) -+ -+/* -+** msb_w0_u32_x_untied: -+** mov z0\.s, w0 -+** mls z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_w0_u32_x_untied, svuint32_t, uint32_t, -+ z0 = svmsb_n_u32_x (p0, z1, z2, x0), -+ z0 = svmsb_x (p0, z1, z2, x0)) -+ -+/* -+** msb_11_u32_x_tied1: -+** mov (z[0-9]+\.s), #11 -+** msb z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_u32_x_tied1, svuint32_t, -+ z0 = svmsb_n_u32_x (p0, z0, z1, 11), -+ z0 = svmsb_x (p0, z0, z1, 11)) -+ -+/* -+** msb_11_u32_x_tied2: -+** mov (z[0-9]+\.s), #11 -+** msb z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_u32_x_tied2, svuint32_t, -+ z0 = svmsb_n_u32_x (p0, z1, z0, 11), -+ z0 = svmsb_x (p0, z1, z0, 11)) -+ -+/* -+** msb_11_u32_x_untied: -+** mov z0\.s, #11 -+** mls z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_u32_x_untied, svuint32_t, -+ z0 = svmsb_n_u32_x (p0, z1, z2, 11), -+ z0 = svmsb_x (p0, z1, z2, 11)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_u64.c -new file mode 100644 -index 000000000..5c5d33073 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_u64.c -@@ -0,0 +1,321 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** msb_u64_m_tied1: -+** msb z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (msb_u64_m_tied1, svuint64_t, -+ z0 = svmsb_u64_m (p0, z0, z1, z2), -+ z0 = svmsb_m (p0, z0, z1, z2)) -+ -+/* -+** msb_u64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** msb z0\.d, p0/m, \1, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (msb_u64_m_tied2, svuint64_t, -+ z0 = svmsb_u64_m (p0, z1, z0, z2), -+ z0 = svmsb_m (p0, z1, z0, z2)) -+ -+/* -+** msb_u64_m_tied3: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** msb z0\.d, p0/m, z2\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_u64_m_tied3, svuint64_t, -+ z0 = svmsb_u64_m (p0, z1, z2, z0), -+ z0 = svmsb_m (p0, z1, z2, z0)) -+ -+/* -+** msb_u64_m_untied: -+** movprfx z0, z1 -+** msb z0\.d, p0/m, z2\.d, z3\.d -+** ret -+*/ -+TEST_UNIFORM_Z (msb_u64_m_untied, svuint64_t, -+ z0 = svmsb_u64_m (p0, z1, z2, z3), -+ z0 = svmsb_m (p0, z1, z2, z3)) -+ -+/* -+** msb_x0_u64_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** msb z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_x0_u64_m_tied1, svuint64_t, uint64_t, -+ z0 = svmsb_n_u64_m (p0, z0, z1, x0), -+ z0 = svmsb_m (p0, z0, z1, x0)) -+ -+/* -+** msb_x0_u64_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** msb z0\.d, p0/m, z2\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_x0_u64_m_untied, svuint64_t, uint64_t, -+ z0 = svmsb_n_u64_m (p0, z1, z2, x0), -+ z0 = svmsb_m (p0, z1, z2, x0)) -+ -+/* -+** msb_11_u64_m_tied1: -+** mov (z[0-9]+\.d), #11 -+** msb z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_u64_m_tied1, svuint64_t, -+ z0 = svmsb_n_u64_m (p0, z0, z1, 11), -+ z0 = svmsb_m (p0, z0, z1, 11)) -+ -+/* -+** msb_11_u64_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #11 -+** movprfx z0, z1 -+** msb z0\.d, p0/m, z2\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_u64_m_untied, svuint64_t, -+ z0 = svmsb_n_u64_m (p0, z1, z2, 11), -+ z0 = svmsb_m (p0, z1, z2, 11)) -+ -+/* -+** msb_u64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** msb z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (msb_u64_z_tied1, svuint64_t, -+ z0 = svmsb_u64_z (p0, z0, z1, z2), -+ z0 = svmsb_z (p0, z0, z1, z2)) -+ -+/* -+** msb_u64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** msb z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (msb_u64_z_tied2, svuint64_t, -+ z0 = svmsb_u64_z (p0, z1, z0, z2), -+ z0 = svmsb_z (p0, z1, z0, z2)) -+ -+/* -+** msb_u64_z_tied3: -+** movprfx z0\.d, p0/z, z0\.d -+** mls z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (msb_u64_z_tied3, svuint64_t, -+ z0 = svmsb_u64_z (p0, z1, z2, z0), -+ z0 = svmsb_z (p0, z1, z2, z0)) -+ -+/* -+** msb_u64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** msb z0\.d, p0/m, z2\.d, z3\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** msb z0\.d, p0/m, z1\.d, z3\.d -+** | -+** movprfx z0\.d, p0/z, z3\.d -+** mls z0\.d, p0/m, z1\.d, z2\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (msb_u64_z_untied, svuint64_t, -+ z0 = svmsb_u64_z (p0, z1, z2, z3), -+ z0 = svmsb_z (p0, z1, z2, z3)) -+ -+/* -+** msb_x0_u64_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** msb z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_x0_u64_z_tied1, svuint64_t, uint64_t, -+ z0 = svmsb_n_u64_z (p0, z0, z1, x0), -+ z0 = svmsb_z (p0, z0, z1, x0)) -+ -+/* -+** msb_x0_u64_z_tied2: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** msb z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_x0_u64_z_tied2, svuint64_t, uint64_t, -+ z0 = svmsb_n_u64_z (p0, z1, z0, x0), -+ z0 = svmsb_z (p0, z1, z0, x0)) -+ -+/* -+** msb_x0_u64_z_untied: -+** mov (z[0-9]+\.d), x0 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** msb z0\.d, p0/m, z2\.d, \1 -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** msb z0\.d, p0/m, z1\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** mls z0\.d, p0/m, z1\.d, z2\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_x0_u64_z_untied, svuint64_t, uint64_t, -+ z0 = svmsb_n_u64_z (p0, z1, z2, x0), -+ z0 = svmsb_z (p0, z1, z2, x0)) -+ -+/* -+** msb_11_u64_z_tied1: -+** mov (z[0-9]+\.d), #11 -+** movprfx z0\.d, p0/z, z0\.d -+** msb z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_u64_z_tied1, svuint64_t, -+ z0 = svmsb_n_u64_z (p0, z0, z1, 11), -+ z0 = svmsb_z (p0, z0, z1, 11)) -+ -+/* -+** msb_11_u64_z_tied2: -+** mov (z[0-9]+\.d), #11 -+** movprfx z0\.d, p0/z, z0\.d -+** msb z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_u64_z_tied2, svuint64_t, -+ z0 = svmsb_n_u64_z (p0, z1, z0, 11), -+ z0 = svmsb_z (p0, z1, z0, 11)) -+ -+/* -+** msb_11_u64_z_untied: -+** mov (z[0-9]+\.d), #11 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** msb z0\.d, p0/m, z2\.d, \1 -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** msb z0\.d, p0/m, z1\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** mls z0\.d, p0/m, z1\.d, z2\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_u64_z_untied, svuint64_t, -+ z0 = svmsb_n_u64_z (p0, z1, z2, 11), -+ z0 = svmsb_z (p0, z1, z2, 11)) -+ -+/* -+** msb_u64_x_tied1: -+** msb z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (msb_u64_x_tied1, svuint64_t, -+ z0 = svmsb_u64_x (p0, z0, z1, z2), -+ z0 = svmsb_x (p0, z0, z1, z2)) -+ -+/* -+** msb_u64_x_tied2: -+** msb z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (msb_u64_x_tied2, svuint64_t, -+ z0 = svmsb_u64_x (p0, z1, z0, z2), -+ z0 = svmsb_x (p0, z1, z0, z2)) -+ -+/* -+** msb_u64_x_tied3: -+** mls z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (msb_u64_x_tied3, svuint64_t, -+ z0 = svmsb_u64_x (p0, z1, z2, z0), -+ z0 = svmsb_x (p0, z1, z2, z0)) -+ -+/* -+** msb_u64_x_untied: -+** ( -+** movprfx z0, z1 -+** msb z0\.d, p0/m, z2\.d, z3\.d -+** | -+** movprfx z0, z2 -+** msb z0\.d, p0/m, z1\.d, z3\.d -+** | -+** movprfx z0, z3 -+** mls z0\.d, p0/m, z1\.d, z2\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (msb_u64_x_untied, svuint64_t, -+ z0 = svmsb_u64_x (p0, z1, z2, z3), -+ z0 = svmsb_x (p0, z1, z2, z3)) -+ -+/* -+** msb_x0_u64_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** msb z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_x0_u64_x_tied1, svuint64_t, uint64_t, -+ z0 = svmsb_n_u64_x (p0, z0, z1, x0), -+ z0 = svmsb_x (p0, z0, z1, x0)) -+ -+/* -+** msb_x0_u64_x_tied2: -+** mov (z[0-9]+\.d), x0 -+** msb z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_x0_u64_x_tied2, svuint64_t, uint64_t, -+ z0 = svmsb_n_u64_x (p0, z1, z0, x0), -+ z0 = svmsb_x (p0, z1, z0, x0)) -+ -+/* -+** msb_x0_u64_x_untied: -+** mov z0\.d, x0 -+** mls z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_x0_u64_x_untied, svuint64_t, uint64_t, -+ z0 = svmsb_n_u64_x (p0, z1, z2, x0), -+ z0 = svmsb_x (p0, z1, z2, x0)) -+ -+/* -+** msb_11_u64_x_tied1: -+** mov (z[0-9]+\.d), #11 -+** msb z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_u64_x_tied1, svuint64_t, -+ z0 = svmsb_n_u64_x (p0, z0, z1, 11), -+ z0 = svmsb_x (p0, z0, z1, 11)) -+ -+/* -+** msb_11_u64_x_tied2: -+** mov (z[0-9]+\.d), #11 -+** msb z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_u64_x_tied2, svuint64_t, -+ z0 = svmsb_n_u64_x (p0, z1, z0, 11), -+ z0 = svmsb_x (p0, z1, z0, 11)) -+ -+/* -+** msb_11_u64_x_untied: -+** mov z0\.d, #11 -+** mls z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_u64_x_untied, svuint64_t, -+ z0 = svmsb_n_u64_x (p0, z1, z2, 11), -+ z0 = svmsb_x (p0, z1, z2, 11)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_u8.c -new file mode 100644 -index 000000000..5665ec9e3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/msb_u8.c -@@ -0,0 +1,321 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** msb_u8_m_tied1: -+** msb z0\.b, p0/m, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (msb_u8_m_tied1, svuint8_t, -+ z0 = svmsb_u8_m (p0, z0, z1, z2), -+ z0 = svmsb_m (p0, z0, z1, z2)) -+ -+/* -+** msb_u8_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** msb z0\.b, p0/m, \1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (msb_u8_m_tied2, svuint8_t, -+ z0 = svmsb_u8_m (p0, z1, z0, z2), -+ z0 = svmsb_m (p0, z1, z0, z2)) -+ -+/* -+** msb_u8_m_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** msb z0\.b, p0/m, z2\.b, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (msb_u8_m_tied3, svuint8_t, -+ z0 = svmsb_u8_m (p0, z1, z2, z0), -+ z0 = svmsb_m (p0, z1, z2, z0)) -+ -+/* -+** msb_u8_m_untied: -+** movprfx z0, z1 -+** msb z0\.b, p0/m, z2\.b, z3\.b -+** ret -+*/ -+TEST_UNIFORM_Z (msb_u8_m_untied, svuint8_t, -+ z0 = svmsb_u8_m (p0, z1, z2, z3), -+ z0 = svmsb_m (p0, z1, z2, z3)) -+ -+/* -+** msb_w0_u8_m_tied1: -+** mov (z[0-9]+\.b), w0 -+** msb z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_w0_u8_m_tied1, svuint8_t, uint8_t, -+ z0 = svmsb_n_u8_m (p0, z0, z1, x0), -+ z0 = svmsb_m (p0, z0, z1, x0)) -+ -+/* -+** msb_w0_u8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), w0 -+** movprfx z0, z1 -+** msb z0\.b, p0/m, z2\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_w0_u8_m_untied, svuint8_t, uint8_t, -+ z0 = svmsb_n_u8_m (p0, z1, z2, x0), -+ z0 = svmsb_m (p0, z1, z2, x0)) -+ -+/* -+** msb_11_u8_m_tied1: -+** mov (z[0-9]+\.b), #11 -+** msb z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_u8_m_tied1, svuint8_t, -+ z0 = svmsb_n_u8_m (p0, z0, z1, 11), -+ z0 = svmsb_m (p0, z0, z1, 11)) -+ -+/* -+** msb_11_u8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), #11 -+** movprfx z0, z1 -+** msb z0\.b, p0/m, z2\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_u8_m_untied, svuint8_t, -+ z0 = svmsb_n_u8_m (p0, z1, z2, 11), -+ z0 = svmsb_m (p0, z1, z2, 11)) -+ -+/* -+** msb_u8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** msb z0\.b, p0/m, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (msb_u8_z_tied1, svuint8_t, -+ z0 = svmsb_u8_z (p0, z0, z1, z2), -+ z0 = svmsb_z (p0, z0, z1, z2)) -+ -+/* -+** msb_u8_z_tied2: -+** movprfx z0\.b, p0/z, z0\.b -+** msb z0\.b, p0/m, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (msb_u8_z_tied2, svuint8_t, -+ z0 = svmsb_u8_z (p0, z1, z0, z2), -+ z0 = svmsb_z (p0, z1, z0, z2)) -+ -+/* -+** msb_u8_z_tied3: -+** movprfx z0\.b, p0/z, z0\.b -+** mls z0\.b, p0/m, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (msb_u8_z_tied3, svuint8_t, -+ z0 = svmsb_u8_z (p0, z1, z2, z0), -+ z0 = svmsb_z (p0, z1, z2, z0)) -+ -+/* -+** msb_u8_z_untied: -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** msb z0\.b, p0/m, z2\.b, z3\.b -+** | -+** movprfx z0\.b, p0/z, z2\.b -+** msb z0\.b, p0/m, z1\.b, z3\.b -+** | -+** movprfx z0\.b, p0/z, z3\.b -+** mls z0\.b, p0/m, z1\.b, z2\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (msb_u8_z_untied, svuint8_t, -+ z0 = svmsb_u8_z (p0, z1, z2, z3), -+ z0 = svmsb_z (p0, z1, z2, z3)) -+ -+/* -+** msb_w0_u8_z_tied1: -+** mov (z[0-9]+\.b), w0 -+** movprfx z0\.b, p0/z, z0\.b -+** msb z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_w0_u8_z_tied1, svuint8_t, uint8_t, -+ z0 = svmsb_n_u8_z (p0, z0, z1, x0), -+ z0 = svmsb_z (p0, z0, z1, x0)) -+ -+/* -+** msb_w0_u8_z_tied2: -+** mov (z[0-9]+\.b), w0 -+** movprfx z0\.b, p0/z, z0\.b -+** msb z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_w0_u8_z_tied2, svuint8_t, uint8_t, -+ z0 = svmsb_n_u8_z (p0, z1, z0, x0), -+ z0 = svmsb_z (p0, z1, z0, x0)) -+ -+/* -+** msb_w0_u8_z_untied: -+** mov (z[0-9]+\.b), w0 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** msb z0\.b, p0/m, z2\.b, \1 -+** | -+** movprfx z0\.b, p0/z, z2\.b -+** msb z0\.b, p0/m, z1\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** mls z0\.b, p0/m, z1\.b, z2\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_w0_u8_z_untied, svuint8_t, uint8_t, -+ z0 = svmsb_n_u8_z (p0, z1, z2, x0), -+ z0 = svmsb_z (p0, z1, z2, x0)) -+ -+/* -+** msb_11_u8_z_tied1: -+** mov (z[0-9]+\.b), #11 -+** movprfx z0\.b, p0/z, z0\.b -+** msb z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_u8_z_tied1, svuint8_t, -+ z0 = svmsb_n_u8_z (p0, z0, z1, 11), -+ z0 = svmsb_z (p0, z0, z1, 11)) -+ -+/* -+** msb_11_u8_z_tied2: -+** mov (z[0-9]+\.b), #11 -+** movprfx z0\.b, p0/z, z0\.b -+** msb z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_u8_z_tied2, svuint8_t, -+ z0 = svmsb_n_u8_z (p0, z1, z0, 11), -+ z0 = svmsb_z (p0, z1, z0, 11)) -+ -+/* -+** msb_11_u8_z_untied: -+** mov (z[0-9]+\.b), #11 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** msb z0\.b, p0/m, z2\.b, \1 -+** | -+** movprfx z0\.b, p0/z, z2\.b -+** msb z0\.b, p0/m, z1\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** mls z0\.b, p0/m, z1\.b, z2\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_u8_z_untied, svuint8_t, -+ z0 = svmsb_n_u8_z (p0, z1, z2, 11), -+ z0 = svmsb_z (p0, z1, z2, 11)) -+ -+/* -+** msb_u8_x_tied1: -+** msb z0\.b, p0/m, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (msb_u8_x_tied1, svuint8_t, -+ z0 = svmsb_u8_x (p0, z0, z1, z2), -+ z0 = svmsb_x (p0, z0, z1, z2)) -+ -+/* -+** msb_u8_x_tied2: -+** msb z0\.b, p0/m, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (msb_u8_x_tied2, svuint8_t, -+ z0 = svmsb_u8_x (p0, z1, z0, z2), -+ z0 = svmsb_x (p0, z1, z0, z2)) -+ -+/* -+** msb_u8_x_tied3: -+** mls z0\.b, p0/m, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (msb_u8_x_tied3, svuint8_t, -+ z0 = svmsb_u8_x (p0, z1, z2, z0), -+ z0 = svmsb_x (p0, z1, z2, z0)) -+ -+/* -+** msb_u8_x_untied: -+** ( -+** movprfx z0, z1 -+** msb z0\.b, p0/m, z2\.b, z3\.b -+** | -+** movprfx z0, z2 -+** msb z0\.b, p0/m, z1\.b, z3\.b -+** | -+** movprfx z0, z3 -+** mls z0\.b, p0/m, z1\.b, z2\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (msb_u8_x_untied, svuint8_t, -+ z0 = svmsb_u8_x (p0, z1, z2, z3), -+ z0 = svmsb_x (p0, z1, z2, z3)) -+ -+/* -+** msb_w0_u8_x_tied1: -+** mov (z[0-9]+\.b), w0 -+** msb z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_w0_u8_x_tied1, svuint8_t, uint8_t, -+ z0 = svmsb_n_u8_x (p0, z0, z1, x0), -+ z0 = svmsb_x (p0, z0, z1, x0)) -+ -+/* -+** msb_w0_u8_x_tied2: -+** mov (z[0-9]+\.b), w0 -+** msb z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_w0_u8_x_tied2, svuint8_t, uint8_t, -+ z0 = svmsb_n_u8_x (p0, z1, z0, x0), -+ z0 = svmsb_x (p0, z1, z0, x0)) -+ -+/* -+** msb_w0_u8_x_untied: -+** mov z0\.b, w0 -+** mls z0\.b, p0/m, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_ZX (msb_w0_u8_x_untied, svuint8_t, uint8_t, -+ z0 = svmsb_n_u8_x (p0, z1, z2, x0), -+ z0 = svmsb_x (p0, z1, z2, x0)) -+ -+/* -+** msb_11_u8_x_tied1: -+** mov (z[0-9]+\.b), #11 -+** msb z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_u8_x_tied1, svuint8_t, -+ z0 = svmsb_n_u8_x (p0, z0, z1, 11), -+ z0 = svmsb_x (p0, z0, z1, 11)) -+ -+/* -+** msb_11_u8_x_tied2: -+** mov (z[0-9]+\.b), #11 -+** msb z0\.b, p0/m, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_u8_x_tied2, svuint8_t, -+ z0 = svmsb_n_u8_x (p0, z1, z0, 11), -+ z0 = svmsb_x (p0, z1, z0, 11)) -+ -+/* -+** msb_11_u8_x_untied: -+** mov z0\.b, #11 -+** mls z0\.b, p0/m, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (msb_11_u8_x_untied, svuint8_t, -+ z0 = svmsb_n_u8_x (p0, z1, z2, 11), -+ z0 = svmsb_x (p0, z1, z2, 11)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_f16.c -new file mode 100644 -index 000000000..ef3de0c59 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_f16.c -@@ -0,0 +1,444 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mul_f16_m_tied1: -+** fmul z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f16_m_tied1, svfloat16_t, -+ z0 = svmul_f16_m (p0, z0, z1), -+ z0 = svmul_m (p0, z0, z1)) -+ -+/* -+** mul_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fmul z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f16_m_tied2, svfloat16_t, -+ z0 = svmul_f16_m (p0, z1, z0), -+ z0 = svmul_m (p0, z1, z0)) -+ -+/* -+** mul_f16_m_untied: -+** movprfx z0, z1 -+** fmul z0\.h, p0/m, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f16_m_untied, svfloat16_t, -+ z0 = svmul_f16_m (p0, z1, z2), -+ z0 = svmul_m (p0, z1, z2)) -+ -+/* -+** mul_h4_f16_m_tied1: -+** mov (z[0-9]+\.h), h4 -+** fmul z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mul_h4_f16_m_tied1, svfloat16_t, __fp16, -+ z0 = svmul_n_f16_m (p0, z0, d4), -+ z0 = svmul_m (p0, z0, d4)) -+ -+/* -+** mul_h4_f16_m_untied: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0, z1 -+** fmul z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mul_h4_f16_m_untied, svfloat16_t, __fp16, -+ z0 = svmul_n_f16_m (p0, z1, d4), -+ z0 = svmul_m (p0, z1, d4)) -+ -+/* -+** mul_1_f16_m_tied1: -+** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? -+** fmul z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_1_f16_m_tied1, svfloat16_t, -+ z0 = svmul_n_f16_m (p0, z0, 1), -+ z0 = svmul_m (p0, z0, 1)) -+ -+/* -+** mul_1_f16_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? -+** movprfx z0, z1 -+** fmul z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_1_f16_m_untied, svfloat16_t, -+ z0 = svmul_n_f16_m (p0, z1, 1), -+ z0 = svmul_m (p0, z1, 1)) -+ -+/* -+** mul_0p5_f16_m_tied1: -+** fmul z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_0p5_f16_m_tied1, svfloat16_t, -+ z0 = svmul_n_f16_m (p0, z0, 0.5), -+ z0 = svmul_m (p0, z0, 0.5)) -+ -+/* -+** mul_0p5_f16_m_untied: -+** movprfx z0, z1 -+** fmul z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_0p5_f16_m_untied, svfloat16_t, -+ z0 = svmul_n_f16_m (p0, z1, 0.5), -+ z0 = svmul_m (p0, z1, 0.5)) -+ -+/* -+** mul_2_f16_m_tied1: -+** fmul z0\.h, p0/m, z0\.h, #2\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_f16_m_tied1, svfloat16_t, -+ z0 = svmul_n_f16_m (p0, z0, 2), -+ z0 = svmul_m (p0, z0, 2)) -+ -+/* -+** mul_2_f16_m_untied: -+** movprfx z0, z1 -+** fmul z0\.h, p0/m, z0\.h, #2\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_f16_m_untied, svfloat16_t, -+ z0 = svmul_n_f16_m (p0, z1, 2), -+ z0 = svmul_m (p0, z1, 2)) -+ -+/* -+** mul_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fmul z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f16_z_tied1, svfloat16_t, -+ z0 = svmul_f16_z (p0, z0, z1), -+ z0 = svmul_z (p0, z0, z1)) -+ -+/* -+** mul_f16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** fmul z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f16_z_tied2, svfloat16_t, -+ z0 = svmul_f16_z (p0, z1, z0), -+ z0 = svmul_z (p0, z1, z0)) -+ -+/* -+** mul_f16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fmul z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** fmul z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f16_z_untied, svfloat16_t, -+ z0 = svmul_f16_z (p0, z1, z2), -+ z0 = svmul_z (p0, z1, z2)) -+ -+/* -+** mul_h4_f16_z_tied1: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0\.h, p0/z, z0\.h -+** fmul z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mul_h4_f16_z_tied1, svfloat16_t, __fp16, -+ z0 = svmul_n_f16_z (p0, z0, d4), -+ z0 = svmul_z (p0, z0, d4)) -+ -+/* -+** mul_h4_f16_z_untied: -+** mov (z[0-9]+\.h), h4 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fmul z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** fmul z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (mul_h4_f16_z_untied, svfloat16_t, __fp16, -+ z0 = svmul_n_f16_z (p0, z1, d4), -+ z0 = svmul_z (p0, z1, d4)) -+ -+/* -+** mul_1_f16_z_tied1: -+** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? -+** movprfx z0\.h, p0/z, z0\.h -+** fmul z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_1_f16_z_tied1, svfloat16_t, -+ z0 = svmul_n_f16_z (p0, z0, 1), -+ z0 = svmul_z (p0, z0, 1)) -+ -+/* -+** mul_1_f16_z_untied: -+** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fmul z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** fmul z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_1_f16_z_untied, svfloat16_t, -+ z0 = svmul_n_f16_z (p0, z1, 1), -+ z0 = svmul_z (p0, z1, 1)) -+ -+/* -+** mul_0p5_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fmul z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_0p5_f16_z_tied1, svfloat16_t, -+ z0 = svmul_n_f16_z (p0, z0, 0.5), -+ z0 = svmul_z (p0, z0, 0.5)) -+ -+/* -+** mul_0p5_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** fmul z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_0p5_f16_z_untied, svfloat16_t, -+ z0 = svmul_n_f16_z (p0, z1, 0.5), -+ z0 = svmul_z (p0, z1, 0.5)) -+ -+/* -+** mul_2_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fmul z0\.h, p0/m, z0\.h, #2\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_f16_z_tied1, svfloat16_t, -+ z0 = svmul_n_f16_z (p0, z0, 2), -+ z0 = svmul_z (p0, z0, 2)) -+ -+/* -+** mul_2_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** fmul z0\.h, p0/m, z0\.h, #2\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_f16_z_untied, svfloat16_t, -+ z0 = svmul_n_f16_z (p0, z1, 2), -+ z0 = svmul_z (p0, z1, 2)) -+ -+/* -+** mul_f16_x_tied1: -+** fmul z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f16_x_tied1, svfloat16_t, -+ z0 = svmul_f16_x (p0, z0, z1), -+ z0 = svmul_x (p0, z0, z1)) -+ -+/* -+** mul_f16_x_tied2: -+** fmul z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f16_x_tied2, svfloat16_t, -+ z0 = svmul_f16_x (p0, z1, z0), -+ z0 = svmul_x (p0, z1, z0)) -+ -+/* -+** mul_f16_x_untied: -+** ( -+** movprfx z0, z1 -+** fmul z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0, z2 -+** fmul z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f16_x_untied, svfloat16_t, -+ z0 = svmul_f16_x (p0, z1, z2), -+ z0 = svmul_x (p0, z1, z2)) -+ -+/* -+** mul_h4_f16_x_tied1: -+** mov (z[0-9]+\.h), h4 -+** fmul z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mul_h4_f16_x_tied1, svfloat16_t, __fp16, -+ z0 = svmul_n_f16_x (p0, z0, d4), -+ z0 = svmul_x (p0, z0, d4)) -+ -+/* -+** mul_h4_f16_x_untied: -+** mov z0\.h, h4 -+** fmul z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZD (mul_h4_f16_x_untied, svfloat16_t, __fp16, -+ z0 = svmul_n_f16_x (p0, z1, d4), -+ z0 = svmul_x (p0, z1, d4)) -+ -+/* -+** mul_1_f16_x_tied1: -+** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? -+** fmul z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_1_f16_x_tied1, svfloat16_t, -+ z0 = svmul_n_f16_x (p0, z0, 1), -+ z0 = svmul_x (p0, z0, 1)) -+ -+/* -+** mul_1_f16_x_untied: -+** fmov z0\.h, #1\.0(?:e\+0)? -+** fmul z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mul_1_f16_x_untied, svfloat16_t, -+ z0 = svmul_n_f16_x (p0, z1, 1), -+ z0 = svmul_x (p0, z1, 1)) -+ -+/* -+** mul_0p5_f16_x_tied1: -+** fmul z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_0p5_f16_x_tied1, svfloat16_t, -+ z0 = svmul_n_f16_x (p0, z0, 0.5), -+ z0 = svmul_x (p0, z0, 0.5)) -+ -+/* -+** mul_0p5_f16_x_untied: -+** movprfx z0, z1 -+** fmul z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_0p5_f16_x_untied, svfloat16_t, -+ z0 = svmul_n_f16_x (p0, z1, 0.5), -+ z0 = svmul_x (p0, z1, 0.5)) -+ -+/* -+** mul_2_f16_x_tied1: -+** fmul z0\.h, p0/m, z0\.h, #2\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_f16_x_tied1, svfloat16_t, -+ z0 = svmul_n_f16_x (p0, z0, 2), -+ z0 = svmul_x (p0, z0, 2)) -+ -+/* -+** mul_2_f16_x_untied: -+** movprfx z0, z1 -+** fmul z0\.h, p0/m, z0\.h, #2\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_f16_x_untied, svfloat16_t, -+ z0 = svmul_n_f16_x (p0, z1, 2), -+ z0 = svmul_x (p0, z1, 2)) -+ -+/* -+** ptrue_mul_f16_x_tied1: -+** fmul z0\.h, (z0\.h, z1\.h|z1\.h, z0\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_f16_x_tied1, svfloat16_t, -+ z0 = svmul_f16_x (svptrue_b16 (), z0, z1), -+ z0 = svmul_x (svptrue_b16 (), z0, z1)) -+ -+/* -+** ptrue_mul_f16_x_tied2: -+** fmul z0\.h, (z0\.h, z1\.h|z1\.h, z0\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_f16_x_tied2, svfloat16_t, -+ z0 = svmul_f16_x (svptrue_b16 (), z1, z0), -+ z0 = svmul_x (svptrue_b16 (), z1, z0)) -+ -+/* -+** ptrue_mul_f16_x_untied: -+** fmul z0\.h, (z1\.h, z2\.h|z2\.h, z1\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_f16_x_untied, svfloat16_t, -+ z0 = svmul_f16_x (svptrue_b16 (), z1, z2), -+ z0 = svmul_x (svptrue_b16 (), z1, z2)) -+ -+/* -+** ptrue_mul_1_f16_x_tied1: -+** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? -+** fmul z0\.h, (z0\.h, \1|\1, z0\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_1_f16_x_tied1, svfloat16_t, -+ z0 = svmul_n_f16_x (svptrue_b16 (), z0, 1), -+ z0 = svmul_x (svptrue_b16 (), z0, 1)) -+ -+/* -+** ptrue_mul_1_f16_x_untied: -+** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? -+** fmul z0\.h, (z1\.h, \1|\1, z1\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_1_f16_x_untied, svfloat16_t, -+ z0 = svmul_n_f16_x (svptrue_b16 (), z1, 1), -+ z0 = svmul_x (svptrue_b16 (), z1, 1)) -+ -+/* -+** ptrue_mul_0p5_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_0p5_f16_x_tied1, svfloat16_t, -+ z0 = svmul_n_f16_x (svptrue_b16 (), z0, 0.5), -+ z0 = svmul_x (svptrue_b16 (), z0, 0.5)) -+ -+/* -+** ptrue_mul_0p5_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_0p5_f16_x_untied, svfloat16_t, -+ z0 = svmul_n_f16_x (svptrue_b16 (), z1, 0.5), -+ z0 = svmul_x (svptrue_b16 (), z1, 0.5)) -+ -+/* -+** ptrue_mul_2_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_2_f16_x_tied1, svfloat16_t, -+ z0 = svmul_n_f16_x (svptrue_b16 (), z0, 2), -+ z0 = svmul_x (svptrue_b16 (), z0, 2)) -+ -+/* -+** ptrue_mul_2_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_2_f16_x_untied, svfloat16_t, -+ z0 = svmul_n_f16_x (svptrue_b16 (), z1, 2), -+ z0 = svmul_x (svptrue_b16 (), z1, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_f16_notrap.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_f16_notrap.c -new file mode 100644 -index 000000000..481fe999c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_f16_notrap.c -@@ -0,0 +1,439 @@ -+/* { dg-additional-options "-fno-trapping-math" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mul_f16_m_tied1: -+** fmul z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f16_m_tied1, svfloat16_t, -+ z0 = svmul_f16_m (p0, z0, z1), -+ z0 = svmul_m (p0, z0, z1)) -+ -+/* -+** mul_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fmul z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f16_m_tied2, svfloat16_t, -+ z0 = svmul_f16_m (p0, z1, z0), -+ z0 = svmul_m (p0, z1, z0)) -+ -+/* -+** mul_f16_m_untied: -+** movprfx z0, z1 -+** fmul z0\.h, p0/m, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f16_m_untied, svfloat16_t, -+ z0 = svmul_f16_m (p0, z1, z2), -+ z0 = svmul_m (p0, z1, z2)) -+ -+/* -+** mul_h4_f16_m_tied1: -+** mov (z[0-9]+\.h), h4 -+** fmul z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mul_h4_f16_m_tied1, svfloat16_t, __fp16, -+ z0 = svmul_n_f16_m (p0, z0, d4), -+ z0 = svmul_m (p0, z0, d4)) -+ -+/* -+** mul_h4_f16_m_untied: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0, z1 -+** fmul z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mul_h4_f16_m_untied, svfloat16_t, __fp16, -+ z0 = svmul_n_f16_m (p0, z1, d4), -+ z0 = svmul_m (p0, z1, d4)) -+ -+/* -+** mul_1_f16_m_tied1: -+** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? -+** fmul z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_1_f16_m_tied1, svfloat16_t, -+ z0 = svmul_n_f16_m (p0, z0, 1), -+ z0 = svmul_m (p0, z0, 1)) -+ -+/* -+** mul_1_f16_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? -+** movprfx z0, z1 -+** fmul z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_1_f16_m_untied, svfloat16_t, -+ z0 = svmul_n_f16_m (p0, z1, 1), -+ z0 = svmul_m (p0, z1, 1)) -+ -+/* -+** mul_0p5_f16_m_tied1: -+** fmul z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_0p5_f16_m_tied1, svfloat16_t, -+ z0 = svmul_n_f16_m (p0, z0, 0.5), -+ z0 = svmul_m (p0, z0, 0.5)) -+ -+/* -+** mul_0p5_f16_m_untied: -+** movprfx z0, z1 -+** fmul z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_0p5_f16_m_untied, svfloat16_t, -+ z0 = svmul_n_f16_m (p0, z1, 0.5), -+ z0 = svmul_m (p0, z1, 0.5)) -+ -+/* -+** mul_2_f16_m_tied1: -+** fmul z0\.h, p0/m, z0\.h, #2\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_f16_m_tied1, svfloat16_t, -+ z0 = svmul_n_f16_m (p0, z0, 2), -+ z0 = svmul_m (p0, z0, 2)) -+ -+/* -+** mul_2_f16_m_untied: -+** movprfx z0, z1 -+** fmul z0\.h, p0/m, z0\.h, #2\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_f16_m_untied, svfloat16_t, -+ z0 = svmul_n_f16_m (p0, z1, 2), -+ z0 = svmul_m (p0, z1, 2)) -+ -+/* -+** mul_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fmul z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f16_z_tied1, svfloat16_t, -+ z0 = svmul_f16_z (p0, z0, z1), -+ z0 = svmul_z (p0, z0, z1)) -+ -+/* -+** mul_f16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** fmul z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f16_z_tied2, svfloat16_t, -+ z0 = svmul_f16_z (p0, z1, z0), -+ z0 = svmul_z (p0, z1, z0)) -+ -+/* -+** mul_f16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fmul z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** fmul z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f16_z_untied, svfloat16_t, -+ z0 = svmul_f16_z (p0, z1, z2), -+ z0 = svmul_z (p0, z1, z2)) -+ -+/* -+** mul_h4_f16_z_tied1: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0\.h, p0/z, z0\.h -+** fmul z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mul_h4_f16_z_tied1, svfloat16_t, __fp16, -+ z0 = svmul_n_f16_z (p0, z0, d4), -+ z0 = svmul_z (p0, z0, d4)) -+ -+/* -+** mul_h4_f16_z_untied: -+** mov (z[0-9]+\.h), h4 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fmul z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** fmul z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (mul_h4_f16_z_untied, svfloat16_t, __fp16, -+ z0 = svmul_n_f16_z (p0, z1, d4), -+ z0 = svmul_z (p0, z1, d4)) -+ -+/* -+** mul_1_f16_z_tied1: -+** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? -+** movprfx z0\.h, p0/z, z0\.h -+** fmul z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_1_f16_z_tied1, svfloat16_t, -+ z0 = svmul_n_f16_z (p0, z0, 1), -+ z0 = svmul_z (p0, z0, 1)) -+ -+/* -+** mul_1_f16_z_untied: -+** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fmul z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** fmul z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_1_f16_z_untied, svfloat16_t, -+ z0 = svmul_n_f16_z (p0, z1, 1), -+ z0 = svmul_z (p0, z1, 1)) -+ -+/* -+** mul_0p5_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fmul z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_0p5_f16_z_tied1, svfloat16_t, -+ z0 = svmul_n_f16_z (p0, z0, 0.5), -+ z0 = svmul_z (p0, z0, 0.5)) -+ -+/* -+** mul_0p5_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** fmul z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_0p5_f16_z_untied, svfloat16_t, -+ z0 = svmul_n_f16_z (p0, z1, 0.5), -+ z0 = svmul_z (p0, z1, 0.5)) -+ -+/* -+** mul_2_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fmul z0\.h, p0/m, z0\.h, #2\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_f16_z_tied1, svfloat16_t, -+ z0 = svmul_n_f16_z (p0, z0, 2), -+ z0 = svmul_z (p0, z0, 2)) -+ -+/* -+** mul_2_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** fmul z0\.h, p0/m, z0\.h, #2\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_f16_z_untied, svfloat16_t, -+ z0 = svmul_n_f16_z (p0, z1, 2), -+ z0 = svmul_z (p0, z1, 2)) -+ -+/* -+** mul_f16_x_tied1: -+** fmul z0\.h, (z0\.h, z1\.h|z1\.h, z0\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f16_x_tied1, svfloat16_t, -+ z0 = svmul_f16_x (p0, z0, z1), -+ z0 = svmul_x (p0, z0, z1)) -+ -+/* -+** mul_f16_x_tied2: -+** fmul z0\.h, (z0\.h, z1\.h|z1\.h, z0\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f16_x_tied2, svfloat16_t, -+ z0 = svmul_f16_x (p0, z1, z0), -+ z0 = svmul_x (p0, z1, z0)) -+ -+/* -+** mul_f16_x_untied: -+** fmul z0\.h, (z1\.h, z2\.h|z2\.h, z1\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f16_x_untied, svfloat16_t, -+ z0 = svmul_f16_x (p0, z1, z2), -+ z0 = svmul_x (p0, z1, z2)) -+ -+/* -+** mul_h4_f16_x_tied1: -+** mov (z[0-9]+\.h), h4 -+** fmul z0\.h, (z0\.h, \1|\1, z0\.h) -+** ret -+*/ -+TEST_UNIFORM_ZD (mul_h4_f16_x_tied1, svfloat16_t, __fp16, -+ z0 = svmul_n_f16_x (p0, z0, d4), -+ z0 = svmul_x (p0, z0, d4)) -+ -+/* -+** mul_h4_f16_x_untied: -+** mov (z[0-9]+\.h), h4 -+** fmul z0\.h, (z1\.h, \1|\1, z1\.h) -+** ret -+*/ -+TEST_UNIFORM_ZD (mul_h4_f16_x_untied, svfloat16_t, __fp16, -+ z0 = svmul_n_f16_x (p0, z1, d4), -+ z0 = svmul_x (p0, z1, d4)) -+ -+/* -+** mul_1_f16_x_tied1: -+** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? -+** fmul z0\.h, (z0\.h, \1|\1, z0\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_1_f16_x_tied1, svfloat16_t, -+ z0 = svmul_n_f16_x (p0, z0, 1), -+ z0 = svmul_x (p0, z0, 1)) -+ -+/* -+** mul_1_f16_x_untied: -+** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? -+** fmul z0\.h, (z1\.h, \1|\1, z1\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_1_f16_x_untied, svfloat16_t, -+ z0 = svmul_n_f16_x (p0, z1, 1), -+ z0 = svmul_x (p0, z1, 1)) -+ -+/* -+** mul_0p5_f16_x_tied1: -+** fmul z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_0p5_f16_x_tied1, svfloat16_t, -+ z0 = svmul_n_f16_x (p0, z0, 0.5), -+ z0 = svmul_x (p0, z0, 0.5)) -+ -+/* -+** mul_0p5_f16_x_untied: -+** movprfx z0, z1 -+** fmul z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_0p5_f16_x_untied, svfloat16_t, -+ z0 = svmul_n_f16_x (p0, z1, 0.5), -+ z0 = svmul_x (p0, z1, 0.5)) -+ -+/* -+** mul_2_f16_x_tied1: -+** fmul z0\.h, p0/m, z0\.h, #2\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_f16_x_tied1, svfloat16_t, -+ z0 = svmul_n_f16_x (p0, z0, 2), -+ z0 = svmul_x (p0, z0, 2)) -+ -+/* -+** mul_2_f16_x_untied: -+** movprfx z0, z1 -+** fmul z0\.h, p0/m, z0\.h, #2\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_f16_x_untied, svfloat16_t, -+ z0 = svmul_n_f16_x (p0, z1, 2), -+ z0 = svmul_x (p0, z1, 2)) -+ -+/* -+** ptrue_mul_f16_x_tied1: -+** fmul z0\.h, (z0\.h, z1\.h|z1\.h, z0\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_f16_x_tied1, svfloat16_t, -+ z0 = svmul_f16_x (svptrue_b16 (), z0, z1), -+ z0 = svmul_x (svptrue_b16 (), z0, z1)) -+ -+/* -+** ptrue_mul_f16_x_tied2: -+** fmul z0\.h, (z0\.h, z1\.h|z1\.h, z0\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_f16_x_tied2, svfloat16_t, -+ z0 = svmul_f16_x (svptrue_b16 (), z1, z0), -+ z0 = svmul_x (svptrue_b16 (), z1, z0)) -+ -+/* -+** ptrue_mul_f16_x_untied: -+** fmul z0\.h, (z1\.h, z2\.h|z2\.h, z1\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_f16_x_untied, svfloat16_t, -+ z0 = svmul_f16_x (svptrue_b16 (), z1, z2), -+ z0 = svmul_x (svptrue_b16 (), z1, z2)) -+ -+/* -+** ptrue_mul_1_f16_x_tied1: -+** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? -+** fmul z0\.h, (z0\.h, \1|\1, z0\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_1_f16_x_tied1, svfloat16_t, -+ z0 = svmul_n_f16_x (svptrue_b16 (), z0, 1), -+ z0 = svmul_x (svptrue_b16 (), z0, 1)) -+ -+/* -+** ptrue_mul_1_f16_x_untied: -+** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? -+** fmul z0\.h, (z1\.h, \1|\1, z1\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_1_f16_x_untied, svfloat16_t, -+ z0 = svmul_n_f16_x (svptrue_b16 (), z1, 1), -+ z0 = svmul_x (svptrue_b16 (), z1, 1)) -+ -+/* -+** ptrue_mul_0p5_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_0p5_f16_x_tied1, svfloat16_t, -+ z0 = svmul_n_f16_x (svptrue_b16 (), z0, 0.5), -+ z0 = svmul_x (svptrue_b16 (), z0, 0.5)) -+ -+/* -+** ptrue_mul_0p5_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_0p5_f16_x_untied, svfloat16_t, -+ z0 = svmul_n_f16_x (svptrue_b16 (), z1, 0.5), -+ z0 = svmul_x (svptrue_b16 (), z1, 0.5)) -+ -+/* -+** ptrue_mul_2_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_2_f16_x_tied1, svfloat16_t, -+ z0 = svmul_n_f16_x (svptrue_b16 (), z0, 2), -+ z0 = svmul_x (svptrue_b16 (), z0, 2)) -+ -+/* -+** ptrue_mul_2_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_2_f16_x_untied, svfloat16_t, -+ z0 = svmul_n_f16_x (svptrue_b16 (), z1, 2), -+ z0 = svmul_x (svptrue_b16 (), z1, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_f32.c -new file mode 100644 -index 000000000..5b3df6fde ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_f32.c -@@ -0,0 +1,444 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mul_f32_m_tied1: -+** fmul z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f32_m_tied1, svfloat32_t, -+ z0 = svmul_f32_m (p0, z0, z1), -+ z0 = svmul_m (p0, z0, z1)) -+ -+/* -+** mul_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fmul z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f32_m_tied2, svfloat32_t, -+ z0 = svmul_f32_m (p0, z1, z0), -+ z0 = svmul_m (p0, z1, z0)) -+ -+/* -+** mul_f32_m_untied: -+** movprfx z0, z1 -+** fmul z0\.s, p0/m, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f32_m_untied, svfloat32_t, -+ z0 = svmul_f32_m (p0, z1, z2), -+ z0 = svmul_m (p0, z1, z2)) -+ -+/* -+** mul_s4_f32_m_tied1: -+** mov (z[0-9]+\.s), s4 -+** fmul z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mul_s4_f32_m_tied1, svfloat32_t, float, -+ z0 = svmul_n_f32_m (p0, z0, d4), -+ z0 = svmul_m (p0, z0, d4)) -+ -+/* -+** mul_s4_f32_m_untied: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0, z1 -+** fmul z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mul_s4_f32_m_untied, svfloat32_t, float, -+ z0 = svmul_n_f32_m (p0, z1, d4), -+ z0 = svmul_m (p0, z1, d4)) -+ -+/* -+** mul_1_f32_m_tied1: -+** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? -+** fmul z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_1_f32_m_tied1, svfloat32_t, -+ z0 = svmul_n_f32_m (p0, z0, 1), -+ z0 = svmul_m (p0, z0, 1)) -+ -+/* -+** mul_1_f32_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? -+** movprfx z0, z1 -+** fmul z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_1_f32_m_untied, svfloat32_t, -+ z0 = svmul_n_f32_m (p0, z1, 1), -+ z0 = svmul_m (p0, z1, 1)) -+ -+/* -+** mul_0p5_f32_m_tied1: -+** fmul z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_0p5_f32_m_tied1, svfloat32_t, -+ z0 = svmul_n_f32_m (p0, z0, 0.5), -+ z0 = svmul_m (p0, z0, 0.5)) -+ -+/* -+** mul_0p5_f32_m_untied: -+** movprfx z0, z1 -+** fmul z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_0p5_f32_m_untied, svfloat32_t, -+ z0 = svmul_n_f32_m (p0, z1, 0.5), -+ z0 = svmul_m (p0, z1, 0.5)) -+ -+/* -+** mul_2_f32_m_tied1: -+** fmul z0\.s, p0/m, z0\.s, #2\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_f32_m_tied1, svfloat32_t, -+ z0 = svmul_n_f32_m (p0, z0, 2), -+ z0 = svmul_m (p0, z0, 2)) -+ -+/* -+** mul_2_f32_m_untied: -+** movprfx z0, z1 -+** fmul z0\.s, p0/m, z0\.s, #2\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_f32_m_untied, svfloat32_t, -+ z0 = svmul_n_f32_m (p0, z1, 2), -+ z0 = svmul_m (p0, z1, 2)) -+ -+/* -+** mul_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fmul z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f32_z_tied1, svfloat32_t, -+ z0 = svmul_f32_z (p0, z0, z1), -+ z0 = svmul_z (p0, z0, z1)) -+ -+/* -+** mul_f32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** fmul z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f32_z_tied2, svfloat32_t, -+ z0 = svmul_f32_z (p0, z1, z0), -+ z0 = svmul_z (p0, z1, z0)) -+ -+/* -+** mul_f32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fmul z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** fmul z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f32_z_untied, svfloat32_t, -+ z0 = svmul_f32_z (p0, z1, z2), -+ z0 = svmul_z (p0, z1, z2)) -+ -+/* -+** mul_s4_f32_z_tied1: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0\.s, p0/z, z0\.s -+** fmul z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mul_s4_f32_z_tied1, svfloat32_t, float, -+ z0 = svmul_n_f32_z (p0, z0, d4), -+ z0 = svmul_z (p0, z0, d4)) -+ -+/* -+** mul_s4_f32_z_untied: -+** mov (z[0-9]+\.s), s4 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fmul z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** fmul z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (mul_s4_f32_z_untied, svfloat32_t, float, -+ z0 = svmul_n_f32_z (p0, z1, d4), -+ z0 = svmul_z (p0, z1, d4)) -+ -+/* -+** mul_1_f32_z_tied1: -+** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? -+** movprfx z0\.s, p0/z, z0\.s -+** fmul z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_1_f32_z_tied1, svfloat32_t, -+ z0 = svmul_n_f32_z (p0, z0, 1), -+ z0 = svmul_z (p0, z0, 1)) -+ -+/* -+** mul_1_f32_z_untied: -+** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fmul z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** fmul z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_1_f32_z_untied, svfloat32_t, -+ z0 = svmul_n_f32_z (p0, z1, 1), -+ z0 = svmul_z (p0, z1, 1)) -+ -+/* -+** mul_0p5_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fmul z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_0p5_f32_z_tied1, svfloat32_t, -+ z0 = svmul_n_f32_z (p0, z0, 0.5), -+ z0 = svmul_z (p0, z0, 0.5)) -+ -+/* -+** mul_0p5_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** fmul z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_0p5_f32_z_untied, svfloat32_t, -+ z0 = svmul_n_f32_z (p0, z1, 0.5), -+ z0 = svmul_z (p0, z1, 0.5)) -+ -+/* -+** mul_2_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fmul z0\.s, p0/m, z0\.s, #2\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_f32_z_tied1, svfloat32_t, -+ z0 = svmul_n_f32_z (p0, z0, 2), -+ z0 = svmul_z (p0, z0, 2)) -+ -+/* -+** mul_2_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** fmul z0\.s, p0/m, z0\.s, #2\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_f32_z_untied, svfloat32_t, -+ z0 = svmul_n_f32_z (p0, z1, 2), -+ z0 = svmul_z (p0, z1, 2)) -+ -+/* -+** mul_f32_x_tied1: -+** fmul z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f32_x_tied1, svfloat32_t, -+ z0 = svmul_f32_x (p0, z0, z1), -+ z0 = svmul_x (p0, z0, z1)) -+ -+/* -+** mul_f32_x_tied2: -+** fmul z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f32_x_tied2, svfloat32_t, -+ z0 = svmul_f32_x (p0, z1, z0), -+ z0 = svmul_x (p0, z1, z0)) -+ -+/* -+** mul_f32_x_untied: -+** ( -+** movprfx z0, z1 -+** fmul z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0, z2 -+** fmul z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f32_x_untied, svfloat32_t, -+ z0 = svmul_f32_x (p0, z1, z2), -+ z0 = svmul_x (p0, z1, z2)) -+ -+/* -+** mul_s4_f32_x_tied1: -+** mov (z[0-9]+\.s), s4 -+** fmul z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mul_s4_f32_x_tied1, svfloat32_t, float, -+ z0 = svmul_n_f32_x (p0, z0, d4), -+ z0 = svmul_x (p0, z0, d4)) -+ -+/* -+** mul_s4_f32_x_untied: -+** mov z0\.s, s4 -+** fmul z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZD (mul_s4_f32_x_untied, svfloat32_t, float, -+ z0 = svmul_n_f32_x (p0, z1, d4), -+ z0 = svmul_x (p0, z1, d4)) -+ -+/* -+** mul_1_f32_x_tied1: -+** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? -+** fmul z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_1_f32_x_tied1, svfloat32_t, -+ z0 = svmul_n_f32_x (p0, z0, 1), -+ z0 = svmul_x (p0, z0, 1)) -+ -+/* -+** mul_1_f32_x_untied: -+** fmov z0\.s, #1\.0(?:e\+0)? -+** fmul z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mul_1_f32_x_untied, svfloat32_t, -+ z0 = svmul_n_f32_x (p0, z1, 1), -+ z0 = svmul_x (p0, z1, 1)) -+ -+/* -+** mul_0p5_f32_x_tied1: -+** fmul z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_0p5_f32_x_tied1, svfloat32_t, -+ z0 = svmul_n_f32_x (p0, z0, 0.5), -+ z0 = svmul_x (p0, z0, 0.5)) -+ -+/* -+** mul_0p5_f32_x_untied: -+** movprfx z0, z1 -+** fmul z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_0p5_f32_x_untied, svfloat32_t, -+ z0 = svmul_n_f32_x (p0, z1, 0.5), -+ z0 = svmul_x (p0, z1, 0.5)) -+ -+/* -+** mul_2_f32_x_tied1: -+** fmul z0\.s, p0/m, z0\.s, #2\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_f32_x_tied1, svfloat32_t, -+ z0 = svmul_n_f32_x (p0, z0, 2), -+ z0 = svmul_x (p0, z0, 2)) -+ -+/* -+** mul_2_f32_x_untied: -+** movprfx z0, z1 -+** fmul z0\.s, p0/m, z0\.s, #2\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_f32_x_untied, svfloat32_t, -+ z0 = svmul_n_f32_x (p0, z1, 2), -+ z0 = svmul_x (p0, z1, 2)) -+ -+/* -+** ptrue_mul_f32_x_tied1: -+** fmul z0\.s, (z0\.s, z1\.s|z1\.s, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_f32_x_tied1, svfloat32_t, -+ z0 = svmul_f32_x (svptrue_b32 (), z0, z1), -+ z0 = svmul_x (svptrue_b32 (), z0, z1)) -+ -+/* -+** ptrue_mul_f32_x_tied2: -+** fmul z0\.s, (z0\.s, z1\.s|z1\.s, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_f32_x_tied2, svfloat32_t, -+ z0 = svmul_f32_x (svptrue_b32 (), z1, z0), -+ z0 = svmul_x (svptrue_b32 (), z1, z0)) -+ -+/* -+** ptrue_mul_f32_x_untied: -+** fmul z0\.s, (z1\.s, z2\.s|z2\.s, z1\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_f32_x_untied, svfloat32_t, -+ z0 = svmul_f32_x (svptrue_b32 (), z1, z2), -+ z0 = svmul_x (svptrue_b32 (), z1, z2)) -+ -+/* -+** ptrue_mul_1_f32_x_tied1: -+** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? -+** fmul z0\.s, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_1_f32_x_tied1, svfloat32_t, -+ z0 = svmul_n_f32_x (svptrue_b32 (), z0, 1), -+ z0 = svmul_x (svptrue_b32 (), z0, 1)) -+ -+/* -+** ptrue_mul_1_f32_x_untied: -+** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? -+** fmul z0\.s, (z1\.s, \1|\1, z1\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_1_f32_x_untied, svfloat32_t, -+ z0 = svmul_n_f32_x (svptrue_b32 (), z1, 1), -+ z0 = svmul_x (svptrue_b32 (), z1, 1)) -+ -+/* -+** ptrue_mul_0p5_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_0p5_f32_x_tied1, svfloat32_t, -+ z0 = svmul_n_f32_x (svptrue_b32 (), z0, 0.5), -+ z0 = svmul_x (svptrue_b32 (), z0, 0.5)) -+ -+/* -+** ptrue_mul_0p5_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_0p5_f32_x_untied, svfloat32_t, -+ z0 = svmul_n_f32_x (svptrue_b32 (), z1, 0.5), -+ z0 = svmul_x (svptrue_b32 (), z1, 0.5)) -+ -+/* -+** ptrue_mul_2_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_2_f32_x_tied1, svfloat32_t, -+ z0 = svmul_n_f32_x (svptrue_b32 (), z0, 2), -+ z0 = svmul_x (svptrue_b32 (), z0, 2)) -+ -+/* -+** ptrue_mul_2_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_2_f32_x_untied, svfloat32_t, -+ z0 = svmul_n_f32_x (svptrue_b32 (), z1, 2), -+ z0 = svmul_x (svptrue_b32 (), z1, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_f32_notrap.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_f32_notrap.c -new file mode 100644 -index 000000000..eb2d240ef ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_f32_notrap.c -@@ -0,0 +1,439 @@ -+/* { dg-additional-options "-fno-trapping-math" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mul_f32_m_tied1: -+** fmul z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f32_m_tied1, svfloat32_t, -+ z0 = svmul_f32_m (p0, z0, z1), -+ z0 = svmul_m (p0, z0, z1)) -+ -+/* -+** mul_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fmul z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f32_m_tied2, svfloat32_t, -+ z0 = svmul_f32_m (p0, z1, z0), -+ z0 = svmul_m (p0, z1, z0)) -+ -+/* -+** mul_f32_m_untied: -+** movprfx z0, z1 -+** fmul z0\.s, p0/m, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f32_m_untied, svfloat32_t, -+ z0 = svmul_f32_m (p0, z1, z2), -+ z0 = svmul_m (p0, z1, z2)) -+ -+/* -+** mul_s4_f32_m_tied1: -+** mov (z[0-9]+\.s), s4 -+** fmul z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mul_s4_f32_m_tied1, svfloat32_t, float, -+ z0 = svmul_n_f32_m (p0, z0, d4), -+ z0 = svmul_m (p0, z0, d4)) -+ -+/* -+** mul_s4_f32_m_untied: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0, z1 -+** fmul z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mul_s4_f32_m_untied, svfloat32_t, float, -+ z0 = svmul_n_f32_m (p0, z1, d4), -+ z0 = svmul_m (p0, z1, d4)) -+ -+/* -+** mul_1_f32_m_tied1: -+** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? -+** fmul z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_1_f32_m_tied1, svfloat32_t, -+ z0 = svmul_n_f32_m (p0, z0, 1), -+ z0 = svmul_m (p0, z0, 1)) -+ -+/* -+** mul_1_f32_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? -+** movprfx z0, z1 -+** fmul z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_1_f32_m_untied, svfloat32_t, -+ z0 = svmul_n_f32_m (p0, z1, 1), -+ z0 = svmul_m (p0, z1, 1)) -+ -+/* -+** mul_0p5_f32_m_tied1: -+** fmul z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_0p5_f32_m_tied1, svfloat32_t, -+ z0 = svmul_n_f32_m (p0, z0, 0.5), -+ z0 = svmul_m (p0, z0, 0.5)) -+ -+/* -+** mul_0p5_f32_m_untied: -+** movprfx z0, z1 -+** fmul z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_0p5_f32_m_untied, svfloat32_t, -+ z0 = svmul_n_f32_m (p0, z1, 0.5), -+ z0 = svmul_m (p0, z1, 0.5)) -+ -+/* -+** mul_2_f32_m_tied1: -+** fmul z0\.s, p0/m, z0\.s, #2\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_f32_m_tied1, svfloat32_t, -+ z0 = svmul_n_f32_m (p0, z0, 2), -+ z0 = svmul_m (p0, z0, 2)) -+ -+/* -+** mul_2_f32_m_untied: -+** movprfx z0, z1 -+** fmul z0\.s, p0/m, z0\.s, #2\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_f32_m_untied, svfloat32_t, -+ z0 = svmul_n_f32_m (p0, z1, 2), -+ z0 = svmul_m (p0, z1, 2)) -+ -+/* -+** mul_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fmul z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f32_z_tied1, svfloat32_t, -+ z0 = svmul_f32_z (p0, z0, z1), -+ z0 = svmul_z (p0, z0, z1)) -+ -+/* -+** mul_f32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** fmul z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f32_z_tied2, svfloat32_t, -+ z0 = svmul_f32_z (p0, z1, z0), -+ z0 = svmul_z (p0, z1, z0)) -+ -+/* -+** mul_f32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fmul z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** fmul z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f32_z_untied, svfloat32_t, -+ z0 = svmul_f32_z (p0, z1, z2), -+ z0 = svmul_z (p0, z1, z2)) -+ -+/* -+** mul_s4_f32_z_tied1: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0\.s, p0/z, z0\.s -+** fmul z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mul_s4_f32_z_tied1, svfloat32_t, float, -+ z0 = svmul_n_f32_z (p0, z0, d4), -+ z0 = svmul_z (p0, z0, d4)) -+ -+/* -+** mul_s4_f32_z_untied: -+** mov (z[0-9]+\.s), s4 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fmul z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** fmul z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (mul_s4_f32_z_untied, svfloat32_t, float, -+ z0 = svmul_n_f32_z (p0, z1, d4), -+ z0 = svmul_z (p0, z1, d4)) -+ -+/* -+** mul_1_f32_z_tied1: -+** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? -+** movprfx z0\.s, p0/z, z0\.s -+** fmul z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_1_f32_z_tied1, svfloat32_t, -+ z0 = svmul_n_f32_z (p0, z0, 1), -+ z0 = svmul_z (p0, z0, 1)) -+ -+/* -+** mul_1_f32_z_untied: -+** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fmul z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** fmul z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_1_f32_z_untied, svfloat32_t, -+ z0 = svmul_n_f32_z (p0, z1, 1), -+ z0 = svmul_z (p0, z1, 1)) -+ -+/* -+** mul_0p5_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fmul z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_0p5_f32_z_tied1, svfloat32_t, -+ z0 = svmul_n_f32_z (p0, z0, 0.5), -+ z0 = svmul_z (p0, z0, 0.5)) -+ -+/* -+** mul_0p5_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** fmul z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_0p5_f32_z_untied, svfloat32_t, -+ z0 = svmul_n_f32_z (p0, z1, 0.5), -+ z0 = svmul_z (p0, z1, 0.5)) -+ -+/* -+** mul_2_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fmul z0\.s, p0/m, z0\.s, #2\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_f32_z_tied1, svfloat32_t, -+ z0 = svmul_n_f32_z (p0, z0, 2), -+ z0 = svmul_z (p0, z0, 2)) -+ -+/* -+** mul_2_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** fmul z0\.s, p0/m, z0\.s, #2\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_f32_z_untied, svfloat32_t, -+ z0 = svmul_n_f32_z (p0, z1, 2), -+ z0 = svmul_z (p0, z1, 2)) -+ -+/* -+** mul_f32_x_tied1: -+** fmul z0\.s, (z0\.s, z1\.s|z1\.s, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f32_x_tied1, svfloat32_t, -+ z0 = svmul_f32_x (p0, z0, z1), -+ z0 = svmul_x (p0, z0, z1)) -+ -+/* -+** mul_f32_x_tied2: -+** fmul z0\.s, (z0\.s, z1\.s|z1\.s, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f32_x_tied2, svfloat32_t, -+ z0 = svmul_f32_x (p0, z1, z0), -+ z0 = svmul_x (p0, z1, z0)) -+ -+/* -+** mul_f32_x_untied: -+** fmul z0\.s, (z1\.s, z2\.s|z2\.s, z1\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f32_x_untied, svfloat32_t, -+ z0 = svmul_f32_x (p0, z1, z2), -+ z0 = svmul_x (p0, z1, z2)) -+ -+/* -+** mul_s4_f32_x_tied1: -+** mov (z[0-9]+\.s), s4 -+** fmul z0\.s, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_ZD (mul_s4_f32_x_tied1, svfloat32_t, float, -+ z0 = svmul_n_f32_x (p0, z0, d4), -+ z0 = svmul_x (p0, z0, d4)) -+ -+/* -+** mul_s4_f32_x_untied: -+** mov (z[0-9]+\.s), s4 -+** fmul z0\.s, (z1\.s, \1|\1, z1\.s) -+** ret -+*/ -+TEST_UNIFORM_ZD (mul_s4_f32_x_untied, svfloat32_t, float, -+ z0 = svmul_n_f32_x (p0, z1, d4), -+ z0 = svmul_x (p0, z1, d4)) -+ -+/* -+** mul_1_f32_x_tied1: -+** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? -+** fmul z0\.s, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_1_f32_x_tied1, svfloat32_t, -+ z0 = svmul_n_f32_x (p0, z0, 1), -+ z0 = svmul_x (p0, z0, 1)) -+ -+/* -+** mul_1_f32_x_untied: -+** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? -+** fmul z0\.s, (z1\.s, \1|\1, z1\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_1_f32_x_untied, svfloat32_t, -+ z0 = svmul_n_f32_x (p0, z1, 1), -+ z0 = svmul_x (p0, z1, 1)) -+ -+/* -+** mul_0p5_f32_x_tied1: -+** fmul z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_0p5_f32_x_tied1, svfloat32_t, -+ z0 = svmul_n_f32_x (p0, z0, 0.5), -+ z0 = svmul_x (p0, z0, 0.5)) -+ -+/* -+** mul_0p5_f32_x_untied: -+** movprfx z0, z1 -+** fmul z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_0p5_f32_x_untied, svfloat32_t, -+ z0 = svmul_n_f32_x (p0, z1, 0.5), -+ z0 = svmul_x (p0, z1, 0.5)) -+ -+/* -+** mul_2_f32_x_tied1: -+** fmul z0\.s, p0/m, z0\.s, #2\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_f32_x_tied1, svfloat32_t, -+ z0 = svmul_n_f32_x (p0, z0, 2), -+ z0 = svmul_x (p0, z0, 2)) -+ -+/* -+** mul_2_f32_x_untied: -+** movprfx z0, z1 -+** fmul z0\.s, p0/m, z0\.s, #2\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_f32_x_untied, svfloat32_t, -+ z0 = svmul_n_f32_x (p0, z1, 2), -+ z0 = svmul_x (p0, z1, 2)) -+ -+/* -+** ptrue_mul_f32_x_tied1: -+** fmul z0\.s, (z0\.s, z1\.s|z1\.s, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_f32_x_tied1, svfloat32_t, -+ z0 = svmul_f32_x (svptrue_b32 (), z0, z1), -+ z0 = svmul_x (svptrue_b32 (), z0, z1)) -+ -+/* -+** ptrue_mul_f32_x_tied2: -+** fmul z0\.s, (z0\.s, z1\.s|z1\.s, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_f32_x_tied2, svfloat32_t, -+ z0 = svmul_f32_x (svptrue_b32 (), z1, z0), -+ z0 = svmul_x (svptrue_b32 (), z1, z0)) -+ -+/* -+** ptrue_mul_f32_x_untied: -+** fmul z0\.s, (z1\.s, z2\.s|z2\.s, z1\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_f32_x_untied, svfloat32_t, -+ z0 = svmul_f32_x (svptrue_b32 (), z1, z2), -+ z0 = svmul_x (svptrue_b32 (), z1, z2)) -+ -+/* -+** ptrue_mul_1_f32_x_tied1: -+** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? -+** fmul z0\.s, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_1_f32_x_tied1, svfloat32_t, -+ z0 = svmul_n_f32_x (svptrue_b32 (), z0, 1), -+ z0 = svmul_x (svptrue_b32 (), z0, 1)) -+ -+/* -+** ptrue_mul_1_f32_x_untied: -+** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? -+** fmul z0\.s, (z1\.s, \1|\1, z1\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_1_f32_x_untied, svfloat32_t, -+ z0 = svmul_n_f32_x (svptrue_b32 (), z1, 1), -+ z0 = svmul_x (svptrue_b32 (), z1, 1)) -+ -+/* -+** ptrue_mul_0p5_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_0p5_f32_x_tied1, svfloat32_t, -+ z0 = svmul_n_f32_x (svptrue_b32 (), z0, 0.5), -+ z0 = svmul_x (svptrue_b32 (), z0, 0.5)) -+ -+/* -+** ptrue_mul_0p5_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_0p5_f32_x_untied, svfloat32_t, -+ z0 = svmul_n_f32_x (svptrue_b32 (), z1, 0.5), -+ z0 = svmul_x (svptrue_b32 (), z1, 0.5)) -+ -+/* -+** ptrue_mul_2_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_2_f32_x_tied1, svfloat32_t, -+ z0 = svmul_n_f32_x (svptrue_b32 (), z0, 2), -+ z0 = svmul_x (svptrue_b32 (), z0, 2)) -+ -+/* -+** ptrue_mul_2_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_2_f32_x_untied, svfloat32_t, -+ z0 = svmul_n_f32_x (svptrue_b32 (), z1, 2), -+ z0 = svmul_x (svptrue_b32 (), z1, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_f64.c -new file mode 100644 -index 000000000..f5654a9f1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_f64.c -@@ -0,0 +1,444 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mul_f64_m_tied1: -+** fmul z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f64_m_tied1, svfloat64_t, -+ z0 = svmul_f64_m (p0, z0, z1), -+ z0 = svmul_m (p0, z0, z1)) -+ -+/* -+** mul_f64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fmul z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f64_m_tied2, svfloat64_t, -+ z0 = svmul_f64_m (p0, z1, z0), -+ z0 = svmul_m (p0, z1, z0)) -+ -+/* -+** mul_f64_m_untied: -+** movprfx z0, z1 -+** fmul z0\.d, p0/m, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f64_m_untied, svfloat64_t, -+ z0 = svmul_f64_m (p0, z1, z2), -+ z0 = svmul_m (p0, z1, z2)) -+ -+/* -+** mul_d4_f64_m_tied1: -+** mov (z[0-9]+\.d), d4 -+** fmul z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mul_d4_f64_m_tied1, svfloat64_t, double, -+ z0 = svmul_n_f64_m (p0, z0, d4), -+ z0 = svmul_m (p0, z0, d4)) -+ -+/* -+** mul_d4_f64_m_untied: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0, z1 -+** fmul z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mul_d4_f64_m_untied, svfloat64_t, double, -+ z0 = svmul_n_f64_m (p0, z1, d4), -+ z0 = svmul_m (p0, z1, d4)) -+ -+/* -+** mul_1_f64_m_tied1: -+** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? -+** fmul z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_1_f64_m_tied1, svfloat64_t, -+ z0 = svmul_n_f64_m (p0, z0, 1), -+ z0 = svmul_m (p0, z0, 1)) -+ -+/* -+** mul_1_f64_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? -+** movprfx z0, z1 -+** fmul z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_1_f64_m_untied, svfloat64_t, -+ z0 = svmul_n_f64_m (p0, z1, 1), -+ z0 = svmul_m (p0, z1, 1)) -+ -+/* -+** mul_0p5_f64_m_tied1: -+** fmul z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_0p5_f64_m_tied1, svfloat64_t, -+ z0 = svmul_n_f64_m (p0, z0, 0.5), -+ z0 = svmul_m (p0, z0, 0.5)) -+ -+/* -+** mul_0p5_f64_m_untied: -+** movprfx z0, z1 -+** fmul z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_0p5_f64_m_untied, svfloat64_t, -+ z0 = svmul_n_f64_m (p0, z1, 0.5), -+ z0 = svmul_m (p0, z1, 0.5)) -+ -+/* -+** mul_2_f64_m_tied1: -+** fmul z0\.d, p0/m, z0\.d, #2\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_f64_m_tied1, svfloat64_t, -+ z0 = svmul_n_f64_m (p0, z0, 2), -+ z0 = svmul_m (p0, z0, 2)) -+ -+/* -+** mul_2_f64_m_untied: -+** movprfx z0, z1 -+** fmul z0\.d, p0/m, z0\.d, #2\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_f64_m_untied, svfloat64_t, -+ z0 = svmul_n_f64_m (p0, z1, 2), -+ z0 = svmul_m (p0, z1, 2)) -+ -+/* -+** mul_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fmul z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f64_z_tied1, svfloat64_t, -+ z0 = svmul_f64_z (p0, z0, z1), -+ z0 = svmul_z (p0, z0, z1)) -+ -+/* -+** mul_f64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** fmul z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f64_z_tied2, svfloat64_t, -+ z0 = svmul_f64_z (p0, z1, z0), -+ z0 = svmul_z (p0, z1, z0)) -+ -+/* -+** mul_f64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fmul z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** fmul z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f64_z_untied, svfloat64_t, -+ z0 = svmul_f64_z (p0, z1, z2), -+ z0 = svmul_z (p0, z1, z2)) -+ -+/* -+** mul_d4_f64_z_tied1: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0\.d, p0/z, z0\.d -+** fmul z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mul_d4_f64_z_tied1, svfloat64_t, double, -+ z0 = svmul_n_f64_z (p0, z0, d4), -+ z0 = svmul_z (p0, z0, d4)) -+ -+/* -+** mul_d4_f64_z_untied: -+** mov (z[0-9]+\.d), d4 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fmul z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** fmul z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (mul_d4_f64_z_untied, svfloat64_t, double, -+ z0 = svmul_n_f64_z (p0, z1, d4), -+ z0 = svmul_z (p0, z1, d4)) -+ -+/* -+** mul_1_f64_z_tied1: -+** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? -+** movprfx z0\.d, p0/z, z0\.d -+** fmul z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_1_f64_z_tied1, svfloat64_t, -+ z0 = svmul_n_f64_z (p0, z0, 1), -+ z0 = svmul_z (p0, z0, 1)) -+ -+/* -+** mul_1_f64_z_untied: -+** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fmul z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** fmul z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_1_f64_z_untied, svfloat64_t, -+ z0 = svmul_n_f64_z (p0, z1, 1), -+ z0 = svmul_z (p0, z1, 1)) -+ -+/* -+** mul_0p5_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fmul z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_0p5_f64_z_tied1, svfloat64_t, -+ z0 = svmul_n_f64_z (p0, z0, 0.5), -+ z0 = svmul_z (p0, z0, 0.5)) -+ -+/* -+** mul_0p5_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** fmul z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_0p5_f64_z_untied, svfloat64_t, -+ z0 = svmul_n_f64_z (p0, z1, 0.5), -+ z0 = svmul_z (p0, z1, 0.5)) -+ -+/* -+** mul_2_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fmul z0\.d, p0/m, z0\.d, #2\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_f64_z_tied1, svfloat64_t, -+ z0 = svmul_n_f64_z (p0, z0, 2), -+ z0 = svmul_z (p0, z0, 2)) -+ -+/* -+** mul_2_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** fmul z0\.d, p0/m, z0\.d, #2\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_f64_z_untied, svfloat64_t, -+ z0 = svmul_n_f64_z (p0, z1, 2), -+ z0 = svmul_z (p0, z1, 2)) -+ -+/* -+** mul_f64_x_tied1: -+** fmul z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f64_x_tied1, svfloat64_t, -+ z0 = svmul_f64_x (p0, z0, z1), -+ z0 = svmul_x (p0, z0, z1)) -+ -+/* -+** mul_f64_x_tied2: -+** fmul z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f64_x_tied2, svfloat64_t, -+ z0 = svmul_f64_x (p0, z1, z0), -+ z0 = svmul_x (p0, z1, z0)) -+ -+/* -+** mul_f64_x_untied: -+** ( -+** movprfx z0, z1 -+** fmul z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0, z2 -+** fmul z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f64_x_untied, svfloat64_t, -+ z0 = svmul_f64_x (p0, z1, z2), -+ z0 = svmul_x (p0, z1, z2)) -+ -+/* -+** mul_d4_f64_x_tied1: -+** mov (z[0-9]+\.d), d4 -+** fmul z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mul_d4_f64_x_tied1, svfloat64_t, double, -+ z0 = svmul_n_f64_x (p0, z0, d4), -+ z0 = svmul_x (p0, z0, d4)) -+ -+/* -+** mul_d4_f64_x_untied: -+** mov z0\.d, d4 -+** fmul z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZD (mul_d4_f64_x_untied, svfloat64_t, double, -+ z0 = svmul_n_f64_x (p0, z1, d4), -+ z0 = svmul_x (p0, z1, d4)) -+ -+/* -+** mul_1_f64_x_tied1: -+** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? -+** fmul z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_1_f64_x_tied1, svfloat64_t, -+ z0 = svmul_n_f64_x (p0, z0, 1), -+ z0 = svmul_x (p0, z0, 1)) -+ -+/* -+** mul_1_f64_x_untied: -+** fmov z0\.d, #1\.0(?:e\+0)? -+** fmul z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mul_1_f64_x_untied, svfloat64_t, -+ z0 = svmul_n_f64_x (p0, z1, 1), -+ z0 = svmul_x (p0, z1, 1)) -+ -+/* -+** mul_0p5_f64_x_tied1: -+** fmul z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_0p5_f64_x_tied1, svfloat64_t, -+ z0 = svmul_n_f64_x (p0, z0, 0.5), -+ z0 = svmul_x (p0, z0, 0.5)) -+ -+/* -+** mul_0p5_f64_x_untied: -+** movprfx z0, z1 -+** fmul z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_0p5_f64_x_untied, svfloat64_t, -+ z0 = svmul_n_f64_x (p0, z1, 0.5), -+ z0 = svmul_x (p0, z1, 0.5)) -+ -+/* -+** mul_2_f64_x_tied1: -+** fmul z0\.d, p0/m, z0\.d, #2\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_f64_x_tied1, svfloat64_t, -+ z0 = svmul_n_f64_x (p0, z0, 2), -+ z0 = svmul_x (p0, z0, 2)) -+ -+/* -+** mul_2_f64_x_untied: -+** movprfx z0, z1 -+** fmul z0\.d, p0/m, z0\.d, #2\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_f64_x_untied, svfloat64_t, -+ z0 = svmul_n_f64_x (p0, z1, 2), -+ z0 = svmul_x (p0, z1, 2)) -+ -+/* -+** ptrue_mul_f64_x_tied1: -+** fmul z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_f64_x_tied1, svfloat64_t, -+ z0 = svmul_f64_x (svptrue_b64 (), z0, z1), -+ z0 = svmul_x (svptrue_b64 (), z0, z1)) -+ -+/* -+** ptrue_mul_f64_x_tied2: -+** fmul z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_f64_x_tied2, svfloat64_t, -+ z0 = svmul_f64_x (svptrue_b64 (), z1, z0), -+ z0 = svmul_x (svptrue_b64 (), z1, z0)) -+ -+/* -+** ptrue_mul_f64_x_untied: -+** fmul z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_f64_x_untied, svfloat64_t, -+ z0 = svmul_f64_x (svptrue_b64 (), z1, z2), -+ z0 = svmul_x (svptrue_b64 (), z1, z2)) -+ -+/* -+** ptrue_mul_1_f64_x_tied1: -+** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? -+** fmul z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_1_f64_x_tied1, svfloat64_t, -+ z0 = svmul_n_f64_x (svptrue_b64 (), z0, 1), -+ z0 = svmul_x (svptrue_b64 (), z0, 1)) -+ -+/* -+** ptrue_mul_1_f64_x_untied: -+** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? -+** fmul z0\.d, (z1\.d, \1|\1, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_1_f64_x_untied, svfloat64_t, -+ z0 = svmul_n_f64_x (svptrue_b64 (), z1, 1), -+ z0 = svmul_x (svptrue_b64 (), z1, 1)) -+ -+/* -+** ptrue_mul_0p5_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_0p5_f64_x_tied1, svfloat64_t, -+ z0 = svmul_n_f64_x (svptrue_b64 (), z0, 0.5), -+ z0 = svmul_x (svptrue_b64 (), z0, 0.5)) -+ -+/* -+** ptrue_mul_0p5_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_0p5_f64_x_untied, svfloat64_t, -+ z0 = svmul_n_f64_x (svptrue_b64 (), z1, 0.5), -+ z0 = svmul_x (svptrue_b64 (), z1, 0.5)) -+ -+/* -+** ptrue_mul_2_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_2_f64_x_tied1, svfloat64_t, -+ z0 = svmul_n_f64_x (svptrue_b64 (), z0, 2), -+ z0 = svmul_x (svptrue_b64 (), z0, 2)) -+ -+/* -+** ptrue_mul_2_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_2_f64_x_untied, svfloat64_t, -+ z0 = svmul_n_f64_x (svptrue_b64 (), z1, 2), -+ z0 = svmul_x (svptrue_b64 (), z1, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_f64_notrap.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_f64_notrap.c -new file mode 100644 -index 000000000..d865618d4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_f64_notrap.c -@@ -0,0 +1,439 @@ -+/* { dg-additional-options "-fno-trapping-math" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mul_f64_m_tied1: -+** fmul z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f64_m_tied1, svfloat64_t, -+ z0 = svmul_f64_m (p0, z0, z1), -+ z0 = svmul_m (p0, z0, z1)) -+ -+/* -+** mul_f64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fmul z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f64_m_tied2, svfloat64_t, -+ z0 = svmul_f64_m (p0, z1, z0), -+ z0 = svmul_m (p0, z1, z0)) -+ -+/* -+** mul_f64_m_untied: -+** movprfx z0, z1 -+** fmul z0\.d, p0/m, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f64_m_untied, svfloat64_t, -+ z0 = svmul_f64_m (p0, z1, z2), -+ z0 = svmul_m (p0, z1, z2)) -+ -+/* -+** mul_d4_f64_m_tied1: -+** mov (z[0-9]+\.d), d4 -+** fmul z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mul_d4_f64_m_tied1, svfloat64_t, double, -+ z0 = svmul_n_f64_m (p0, z0, d4), -+ z0 = svmul_m (p0, z0, d4)) -+ -+/* -+** mul_d4_f64_m_untied: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0, z1 -+** fmul z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mul_d4_f64_m_untied, svfloat64_t, double, -+ z0 = svmul_n_f64_m (p0, z1, d4), -+ z0 = svmul_m (p0, z1, d4)) -+ -+/* -+** mul_1_f64_m_tied1: -+** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? -+** fmul z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_1_f64_m_tied1, svfloat64_t, -+ z0 = svmul_n_f64_m (p0, z0, 1), -+ z0 = svmul_m (p0, z0, 1)) -+ -+/* -+** mul_1_f64_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? -+** movprfx z0, z1 -+** fmul z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_1_f64_m_untied, svfloat64_t, -+ z0 = svmul_n_f64_m (p0, z1, 1), -+ z0 = svmul_m (p0, z1, 1)) -+ -+/* -+** mul_0p5_f64_m_tied1: -+** fmul z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_0p5_f64_m_tied1, svfloat64_t, -+ z0 = svmul_n_f64_m (p0, z0, 0.5), -+ z0 = svmul_m (p0, z0, 0.5)) -+ -+/* -+** mul_0p5_f64_m_untied: -+** movprfx z0, z1 -+** fmul z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_0p5_f64_m_untied, svfloat64_t, -+ z0 = svmul_n_f64_m (p0, z1, 0.5), -+ z0 = svmul_m (p0, z1, 0.5)) -+ -+/* -+** mul_2_f64_m_tied1: -+** fmul z0\.d, p0/m, z0\.d, #2\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_f64_m_tied1, svfloat64_t, -+ z0 = svmul_n_f64_m (p0, z0, 2), -+ z0 = svmul_m (p0, z0, 2)) -+ -+/* -+** mul_2_f64_m_untied: -+** movprfx z0, z1 -+** fmul z0\.d, p0/m, z0\.d, #2\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_f64_m_untied, svfloat64_t, -+ z0 = svmul_n_f64_m (p0, z1, 2), -+ z0 = svmul_m (p0, z1, 2)) -+ -+/* -+** mul_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fmul z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f64_z_tied1, svfloat64_t, -+ z0 = svmul_f64_z (p0, z0, z1), -+ z0 = svmul_z (p0, z0, z1)) -+ -+/* -+** mul_f64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** fmul z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f64_z_tied2, svfloat64_t, -+ z0 = svmul_f64_z (p0, z1, z0), -+ z0 = svmul_z (p0, z1, z0)) -+ -+/* -+** mul_f64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fmul z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** fmul z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f64_z_untied, svfloat64_t, -+ z0 = svmul_f64_z (p0, z1, z2), -+ z0 = svmul_z (p0, z1, z2)) -+ -+/* -+** mul_d4_f64_z_tied1: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0\.d, p0/z, z0\.d -+** fmul z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mul_d4_f64_z_tied1, svfloat64_t, double, -+ z0 = svmul_n_f64_z (p0, z0, d4), -+ z0 = svmul_z (p0, z0, d4)) -+ -+/* -+** mul_d4_f64_z_untied: -+** mov (z[0-9]+\.d), d4 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fmul z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** fmul z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (mul_d4_f64_z_untied, svfloat64_t, double, -+ z0 = svmul_n_f64_z (p0, z1, d4), -+ z0 = svmul_z (p0, z1, d4)) -+ -+/* -+** mul_1_f64_z_tied1: -+** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? -+** movprfx z0\.d, p0/z, z0\.d -+** fmul z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_1_f64_z_tied1, svfloat64_t, -+ z0 = svmul_n_f64_z (p0, z0, 1), -+ z0 = svmul_z (p0, z0, 1)) -+ -+/* -+** mul_1_f64_z_untied: -+** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fmul z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** fmul z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_1_f64_z_untied, svfloat64_t, -+ z0 = svmul_n_f64_z (p0, z1, 1), -+ z0 = svmul_z (p0, z1, 1)) -+ -+/* -+** mul_0p5_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fmul z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_0p5_f64_z_tied1, svfloat64_t, -+ z0 = svmul_n_f64_z (p0, z0, 0.5), -+ z0 = svmul_z (p0, z0, 0.5)) -+ -+/* -+** mul_0p5_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** fmul z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_0p5_f64_z_untied, svfloat64_t, -+ z0 = svmul_n_f64_z (p0, z1, 0.5), -+ z0 = svmul_z (p0, z1, 0.5)) -+ -+/* -+** mul_2_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fmul z0\.d, p0/m, z0\.d, #2\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_f64_z_tied1, svfloat64_t, -+ z0 = svmul_n_f64_z (p0, z0, 2), -+ z0 = svmul_z (p0, z0, 2)) -+ -+/* -+** mul_2_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** fmul z0\.d, p0/m, z0\.d, #2\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_f64_z_untied, svfloat64_t, -+ z0 = svmul_n_f64_z (p0, z1, 2), -+ z0 = svmul_z (p0, z1, 2)) -+ -+/* -+** mul_f64_x_tied1: -+** fmul z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f64_x_tied1, svfloat64_t, -+ z0 = svmul_f64_x (p0, z0, z1), -+ z0 = svmul_x (p0, z0, z1)) -+ -+/* -+** mul_f64_x_tied2: -+** fmul z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f64_x_tied2, svfloat64_t, -+ z0 = svmul_f64_x (p0, z1, z0), -+ z0 = svmul_x (p0, z1, z0)) -+ -+/* -+** mul_f64_x_untied: -+** fmul z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_f64_x_untied, svfloat64_t, -+ z0 = svmul_f64_x (p0, z1, z2), -+ z0 = svmul_x (p0, z1, z2)) -+ -+/* -+** mul_d4_f64_x_tied1: -+** mov (z[0-9]+\.d), d4 -+** fmul z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_ZD (mul_d4_f64_x_tied1, svfloat64_t, double, -+ z0 = svmul_n_f64_x (p0, z0, d4), -+ z0 = svmul_x (p0, z0, d4)) -+ -+/* -+** mul_d4_f64_x_untied: -+** mov (z[0-9]+\.d), d4 -+** fmul z0\.d, (z1\.d, \1|\1, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_ZD (mul_d4_f64_x_untied, svfloat64_t, double, -+ z0 = svmul_n_f64_x (p0, z1, d4), -+ z0 = svmul_x (p0, z1, d4)) -+ -+/* -+** mul_1_f64_x_tied1: -+** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? -+** fmul z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_1_f64_x_tied1, svfloat64_t, -+ z0 = svmul_n_f64_x (p0, z0, 1), -+ z0 = svmul_x (p0, z0, 1)) -+ -+/* -+** mul_1_f64_x_untied: -+** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? -+** fmul z0\.d, (z1\.d, \1|\1, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_1_f64_x_untied, svfloat64_t, -+ z0 = svmul_n_f64_x (p0, z1, 1), -+ z0 = svmul_x (p0, z1, 1)) -+ -+/* -+** mul_0p5_f64_x_tied1: -+** fmul z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_0p5_f64_x_tied1, svfloat64_t, -+ z0 = svmul_n_f64_x (p0, z0, 0.5), -+ z0 = svmul_x (p0, z0, 0.5)) -+ -+/* -+** mul_0p5_f64_x_untied: -+** movprfx z0, z1 -+** fmul z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_0p5_f64_x_untied, svfloat64_t, -+ z0 = svmul_n_f64_x (p0, z1, 0.5), -+ z0 = svmul_x (p0, z1, 0.5)) -+ -+/* -+** mul_2_f64_x_tied1: -+** fmul z0\.d, p0/m, z0\.d, #2\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_f64_x_tied1, svfloat64_t, -+ z0 = svmul_n_f64_x (p0, z0, 2), -+ z0 = svmul_x (p0, z0, 2)) -+ -+/* -+** mul_2_f64_x_untied: -+** movprfx z0, z1 -+** fmul z0\.d, p0/m, z0\.d, #2\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_f64_x_untied, svfloat64_t, -+ z0 = svmul_n_f64_x (p0, z1, 2), -+ z0 = svmul_x (p0, z1, 2)) -+ -+/* -+** ptrue_mul_f64_x_tied1: -+** fmul z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_f64_x_tied1, svfloat64_t, -+ z0 = svmul_f64_x (svptrue_b64 (), z0, z1), -+ z0 = svmul_x (svptrue_b64 (), z0, z1)) -+ -+/* -+** ptrue_mul_f64_x_tied2: -+** fmul z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_f64_x_tied2, svfloat64_t, -+ z0 = svmul_f64_x (svptrue_b64 (), z1, z0), -+ z0 = svmul_x (svptrue_b64 (), z1, z0)) -+ -+/* -+** ptrue_mul_f64_x_untied: -+** fmul z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_f64_x_untied, svfloat64_t, -+ z0 = svmul_f64_x (svptrue_b64 (), z1, z2), -+ z0 = svmul_x (svptrue_b64 (), z1, z2)) -+ -+/* -+** ptrue_mul_1_f64_x_tied1: -+** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? -+** fmul z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_1_f64_x_tied1, svfloat64_t, -+ z0 = svmul_n_f64_x (svptrue_b64 (), z0, 1), -+ z0 = svmul_x (svptrue_b64 (), z0, 1)) -+ -+/* -+** ptrue_mul_1_f64_x_untied: -+** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? -+** fmul z0\.d, (z1\.d, \1|\1, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_1_f64_x_untied, svfloat64_t, -+ z0 = svmul_n_f64_x (svptrue_b64 (), z1, 1), -+ z0 = svmul_x (svptrue_b64 (), z1, 1)) -+ -+/* -+** ptrue_mul_0p5_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_0p5_f64_x_tied1, svfloat64_t, -+ z0 = svmul_n_f64_x (svptrue_b64 (), z0, 0.5), -+ z0 = svmul_x (svptrue_b64 (), z0, 0.5)) -+ -+/* -+** ptrue_mul_0p5_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_0p5_f64_x_untied, svfloat64_t, -+ z0 = svmul_n_f64_x (svptrue_b64 (), z1, 0.5), -+ z0 = svmul_x (svptrue_b64 (), z1, 0.5)) -+ -+/* -+** ptrue_mul_2_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_2_f64_x_tied1, svfloat64_t, -+ z0 = svmul_n_f64_x (svptrue_b64 (), z0, 2), -+ z0 = svmul_x (svptrue_b64 (), z0, 2)) -+ -+/* -+** ptrue_mul_2_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mul_2_f64_x_untied, svfloat64_t, -+ z0 = svmul_n_f64_x (svptrue_b64 (), z1, 2), -+ z0 = svmul_x (svptrue_b64 (), z1, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_lane_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_lane_f16.c -new file mode 100644 -index 000000000..1c7503bfd ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_lane_f16.c -@@ -0,0 +1,114 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mul_lane_0_f16_tied1: -+** fmul z0\.h, z0\.h, z1\.h\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (mul_lane_0_f16_tied1, svfloat16_t, -+ z0 = svmul_lane_f16 (z0, z1, 0), -+ z0 = svmul_lane (z0, z1, 0)) -+ -+/* -+** mul_lane_0_f16_tied2: -+** fmul z0\.h, z1\.h, z0\.h\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (mul_lane_0_f16_tied2, svfloat16_t, -+ z0 = svmul_lane_f16 (z1, z0, 0), -+ z0 = svmul_lane (z1, z0, 0)) -+ -+/* -+** mul_lane_0_f16_untied: -+** fmul z0\.h, z1\.h, z2\.h\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (mul_lane_0_f16_untied, svfloat16_t, -+ z0 = svmul_lane_f16 (z1, z2, 0), -+ z0 = svmul_lane (z1, z2, 0)) -+ -+/* -+** mul_lane_1_f16: -+** fmul z0\.h, z1\.h, z2\.h\[1\] -+** ret -+*/ -+TEST_UNIFORM_Z (mul_lane_1_f16, svfloat16_t, -+ z0 = svmul_lane_f16 (z1, z2, 1), -+ z0 = svmul_lane (z1, z2, 1)) -+ -+/* -+** mul_lane_2_f16: -+** fmul z0\.h, z1\.h, z2\.h\[2\] -+** ret -+*/ -+TEST_UNIFORM_Z (mul_lane_2_f16, svfloat16_t, -+ z0 = svmul_lane_f16 (z1, z2, 2), -+ z0 = svmul_lane (z1, z2, 2)) -+ -+/* -+** mul_lane_3_f16: -+** fmul z0\.h, z1\.h, z2\.h\[3\] -+** ret -+*/ -+TEST_UNIFORM_Z (mul_lane_3_f16, svfloat16_t, -+ z0 = svmul_lane_f16 (z1, z2, 3), -+ z0 = svmul_lane (z1, z2, 3)) -+ -+/* -+** mul_lane_4_f16: -+** fmul z0\.h, z1\.h, z2\.h\[4\] -+** ret -+*/ -+TEST_UNIFORM_Z (mul_lane_4_f16, svfloat16_t, -+ z0 = svmul_lane_f16 (z1, z2, 4), -+ z0 = svmul_lane (z1, z2, 4)) -+ -+/* -+** mul_lane_5_f16: -+** fmul z0\.h, z1\.h, z2\.h\[5\] -+** ret -+*/ -+TEST_UNIFORM_Z (mul_lane_5_f16, svfloat16_t, -+ z0 = svmul_lane_f16 (z1, z2, 5), -+ z0 = svmul_lane (z1, z2, 5)) -+ -+/* -+** mul_lane_6_f16: -+** fmul z0\.h, z1\.h, z2\.h\[6\] -+** ret -+*/ -+TEST_UNIFORM_Z (mul_lane_6_f16, svfloat16_t, -+ z0 = svmul_lane_f16 (z1, z2, 6), -+ z0 = svmul_lane (z1, z2, 6)) -+ -+/* -+** mul_lane_7_f16: -+** fmul z0\.h, z1\.h, z2\.h\[7\] -+** ret -+*/ -+TEST_UNIFORM_Z (mul_lane_7_f16, svfloat16_t, -+ z0 = svmul_lane_f16 (z1, z2, 7), -+ z0 = svmul_lane (z1, z2, 7)) -+ -+/* -+** mul_lane_z7_f16: -+** fmul z0\.h, z1\.h, z7\.h\[7\] -+** ret -+*/ -+TEST_DUAL_Z (mul_lane_z7_f16, svfloat16_t, svfloat16_t, -+ z0 = svmul_lane_f16 (z1, z7, 7), -+ z0 = svmul_lane (z1, z7, 7)) -+ -+/* -+** mul_lane_z8_f16: -+** str d8, \[sp, -16\]! -+** mov (z[0-7])\.d, z8\.d -+** fmul z0\.h, z1\.h, \1\.h\[7\] -+** ldr d8, \[sp\], 16 -+** ret -+*/ -+TEST_DUAL_LANE_REG (mul_lane_z8_f16, svfloat16_t, svfloat16_t, z8, -+ z0 = svmul_lane_f16 (z1, z8, 7), -+ z0 = svmul_lane (z1, z8, 7)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_lane_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_lane_f32.c -new file mode 100644 -index 000000000..5355e7e0b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_lane_f32.c -@@ -0,0 +1,78 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mul_lane_0_f32_tied1: -+** fmul z0\.s, z0\.s, z1\.s\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (mul_lane_0_f32_tied1, svfloat32_t, -+ z0 = svmul_lane_f32 (z0, z1, 0), -+ z0 = svmul_lane (z0, z1, 0)) -+ -+/* -+** mul_lane_0_f32_tied2: -+** fmul z0\.s, z1\.s, z0\.s\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (mul_lane_0_f32_tied2, svfloat32_t, -+ z0 = svmul_lane_f32 (z1, z0, 0), -+ z0 = svmul_lane (z1, z0, 0)) -+ -+/* -+** mul_lane_0_f32_untied: -+** fmul z0\.s, z1\.s, z2\.s\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (mul_lane_0_f32_untied, svfloat32_t, -+ z0 = svmul_lane_f32 (z1, z2, 0), -+ z0 = svmul_lane (z1, z2, 0)) -+ -+/* -+** mul_lane_1_f32: -+** fmul z0\.s, z1\.s, z2\.s\[1\] -+** ret -+*/ -+TEST_UNIFORM_Z (mul_lane_1_f32, svfloat32_t, -+ z0 = svmul_lane_f32 (z1, z2, 1), -+ z0 = svmul_lane (z1, z2, 1)) -+ -+/* -+** mul_lane_2_f32: -+** fmul z0\.s, z1\.s, z2\.s\[2\] -+** ret -+*/ -+TEST_UNIFORM_Z (mul_lane_2_f32, svfloat32_t, -+ z0 = svmul_lane_f32 (z1, z2, 2), -+ z0 = svmul_lane (z1, z2, 2)) -+ -+/* -+** mul_lane_3_f32: -+** fmul z0\.s, z1\.s, z2\.s\[3\] -+** ret -+*/ -+TEST_UNIFORM_Z (mul_lane_3_f32, svfloat32_t, -+ z0 = svmul_lane_f32 (z1, z2, 3), -+ z0 = svmul_lane (z1, z2, 3)) -+ -+/* -+** mul_lane_z7_f32: -+** fmul z0\.s, z1\.s, z7\.s\[3\] -+** ret -+*/ -+TEST_DUAL_Z (mul_lane_z7_f32, svfloat32_t, svfloat32_t, -+ z0 = svmul_lane_f32 (z1, z7, 3), -+ z0 = svmul_lane (z1, z7, 3)) -+ -+/* -+** mul_lane_z8_f32: -+** str d8, \[sp, -16\]! -+** mov (z[0-7])\.d, z8\.d -+** fmul z0\.s, z1\.s, \1\.s\[3\] -+** ldr d8, \[sp\], 16 -+** ret -+*/ -+TEST_DUAL_LANE_REG (mul_lane_z8_f32, svfloat32_t, svfloat32_t, z8, -+ z0 = svmul_lane_f32 (z1, z8, 3), -+ z0 = svmul_lane (z1, z8, 3)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_lane_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_lane_f64.c -new file mode 100644 -index 000000000..a53a013c5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_lane_f64.c -@@ -0,0 +1,69 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mul_lane_0_f64_tied1: -+** fmul z0\.d, z0\.d, z1\.d\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (mul_lane_0_f64_tied1, svfloat64_t, -+ z0 = svmul_lane_f64 (z0, z1, 0), -+ z0 = svmul_lane (z0, z1, 0)) -+ -+/* -+** mul_lane_0_f64_tied2: -+** fmul z0\.d, z1\.d, z0\.d\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (mul_lane_0_f64_tied2, svfloat64_t, -+ z0 = svmul_lane_f64 (z1, z0, 0), -+ z0 = svmul_lane (z1, z0, 0)) -+ -+/* -+** mul_lane_0_f64_untied: -+** fmul z0\.d, z1\.d, z2\.d\[0\] -+** ret -+*/ -+TEST_UNIFORM_Z (mul_lane_0_f64_untied, svfloat64_t, -+ z0 = svmul_lane_f64 (z1, z2, 0), -+ z0 = svmul_lane (z1, z2, 0)) -+ -+/* -+** mul_lane_1_f64: -+** fmul z0\.d, z1\.d, z2\.d\[1\] -+** ret -+*/ -+TEST_UNIFORM_Z (mul_lane_1_f64, svfloat64_t, -+ z0 = svmul_lane_f64 (z1, z2, 1), -+ z0 = svmul_lane (z1, z2, 1)) -+ -+/* -+** mul_lane_z7_f64: -+** fmul z0\.d, z1\.d, z7\.d\[1\] -+** ret -+*/ -+TEST_DUAL_Z (mul_lane_z7_f64, svfloat64_t, svfloat64_t, -+ z0 = svmul_lane_f64 (z1, z7, 1), -+ z0 = svmul_lane (z1, z7, 1)) -+ -+/* -+** mul_lane_z15_f64: -+** str d15, \[sp, -16\]! -+** fmul z0\.d, z1\.d, z15\.d\[1\] -+** ldr d15, \[sp\], 16 -+** ret -+*/ -+TEST_DUAL_LANE_REG (mul_lane_z15_f64, svfloat64_t, svfloat64_t, z15, -+ z0 = svmul_lane_f64 (z1, z15, 1), -+ z0 = svmul_lane (z1, z15, 1)) -+ -+/* -+** mul_lane_z16_f64: -+** mov (z[0-9]|z1[0-5])\.d, z16\.d -+** fmul z0\.d, z1\.d, \1\.d\[1\] -+** ret -+*/ -+TEST_DUAL_LANE_REG (mul_lane_z16_f64, svfloat64_t, svfloat64_t, z16, -+ z0 = svmul_lane_f64 (z1, z16, 1), -+ z0 = svmul_lane (z1, z16, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s16.c -new file mode 100644 -index 000000000..aa08bc274 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s16.c -@@ -0,0 +1,302 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mul_s16_m_tied1: -+** mul z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mul_s16_m_tied1, svint16_t, -+ z0 = svmul_s16_m (p0, z0, z1), -+ z0 = svmul_m (p0, z0, z1)) -+ -+/* -+** mul_s16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** mul z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mul_s16_m_tied2, svint16_t, -+ z0 = svmul_s16_m (p0, z1, z0), -+ z0 = svmul_m (p0, z1, z0)) -+ -+/* -+** mul_s16_m_untied: -+** movprfx z0, z1 -+** mul z0\.h, p0/m, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mul_s16_m_untied, svint16_t, -+ z0 = svmul_s16_m (p0, z1, z2), -+ z0 = svmul_m (p0, z1, z2)) -+ -+/* -+** mul_w0_s16_m_tied1: -+** mov (z[0-9]+\.h), w0 -+** mul z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mul_w0_s16_m_tied1, svint16_t, int16_t, -+ z0 = svmul_n_s16_m (p0, z0, x0), -+ z0 = svmul_m (p0, z0, x0)) -+ -+/* -+** mul_w0_s16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), w0 -+** movprfx z0, z1 -+** mul z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mul_w0_s16_m_untied, svint16_t, int16_t, -+ z0 = svmul_n_s16_m (p0, z1, x0), -+ z0 = svmul_m (p0, z1, x0)) -+ -+/* -+** mul_2_s16_m_tied1: -+** mov (z[0-9]+\.h), #2 -+** mul z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_s16_m_tied1, svint16_t, -+ z0 = svmul_n_s16_m (p0, z0, 2), -+ z0 = svmul_m (p0, z0, 2)) -+ -+/* -+** mul_2_s16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), #2 -+** movprfx z0, z1 -+** mul z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_s16_m_untied, svint16_t, -+ z0 = svmul_n_s16_m (p0, z1, 2), -+ z0 = svmul_m (p0, z1, 2)) -+ -+/* -+** mul_m1_s16_m: -+** mov (z[0-9]+)\.b, #-1 -+** mul z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mul_m1_s16_m, svint16_t, -+ z0 = svmul_n_s16_m (p0, z0, -1), -+ z0 = svmul_m (p0, z0, -1)) -+ -+/* -+** mul_s16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** mul z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mul_s16_z_tied1, svint16_t, -+ z0 = svmul_s16_z (p0, z0, z1), -+ z0 = svmul_z (p0, z0, z1)) -+ -+/* -+** mul_s16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** mul z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mul_s16_z_tied2, svint16_t, -+ z0 = svmul_s16_z (p0, z1, z0), -+ z0 = svmul_z (p0, z1, z0)) -+ -+/* -+** mul_s16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** mul z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** mul z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_s16_z_untied, svint16_t, -+ z0 = svmul_s16_z (p0, z1, z2), -+ z0 = svmul_z (p0, z1, z2)) -+ -+/* -+** mul_w0_s16_z_tied1: -+** mov (z[0-9]+\.h), w0 -+** movprfx z0\.h, p0/z, z0\.h -+** mul z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mul_w0_s16_z_tied1, svint16_t, int16_t, -+ z0 = svmul_n_s16_z (p0, z0, x0), -+ z0 = svmul_z (p0, z0, x0)) -+ -+/* -+** mul_w0_s16_z_untied: -+** mov (z[0-9]+\.h), w0 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** mul z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** mul z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (mul_w0_s16_z_untied, svint16_t, int16_t, -+ z0 = svmul_n_s16_z (p0, z1, x0), -+ z0 = svmul_z (p0, z1, x0)) -+ -+/* -+** mul_2_s16_z_tied1: -+** mov (z[0-9]+\.h), #2 -+** movprfx z0\.h, p0/z, z0\.h -+** mul z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_s16_z_tied1, svint16_t, -+ z0 = svmul_n_s16_z (p0, z0, 2), -+ z0 = svmul_z (p0, z0, 2)) -+ -+/* -+** mul_2_s16_z_untied: -+** mov (z[0-9]+\.h), #2 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** mul z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** mul z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_s16_z_untied, svint16_t, -+ z0 = svmul_n_s16_z (p0, z1, 2), -+ z0 = svmul_z (p0, z1, 2)) -+ -+/* -+** mul_s16_x_tied1: -+** mul z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mul_s16_x_tied1, svint16_t, -+ z0 = svmul_s16_x (p0, z0, z1), -+ z0 = svmul_x (p0, z0, z1)) -+ -+/* -+** mul_s16_x_tied2: -+** mul z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mul_s16_x_tied2, svint16_t, -+ z0 = svmul_s16_x (p0, z1, z0), -+ z0 = svmul_x (p0, z1, z0)) -+ -+/* -+** mul_s16_x_untied: -+** ( -+** movprfx z0, z1 -+** mul z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0, z2 -+** mul z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_s16_x_untied, svint16_t, -+ z0 = svmul_s16_x (p0, z1, z2), -+ z0 = svmul_x (p0, z1, z2)) -+ -+/* -+** mul_w0_s16_x_tied1: -+** mov (z[0-9]+\.h), w0 -+** mul z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mul_w0_s16_x_tied1, svint16_t, int16_t, -+ z0 = svmul_n_s16_x (p0, z0, x0), -+ z0 = svmul_x (p0, z0, x0)) -+ -+/* -+** mul_w0_s16_x_untied: -+** mov z0\.h, w0 -+** mul z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZX (mul_w0_s16_x_untied, svint16_t, int16_t, -+ z0 = svmul_n_s16_x (p0, z1, x0), -+ z0 = svmul_x (p0, z1, x0)) -+ -+/* -+** mul_2_s16_x_tied1: -+** mul z0\.h, z0\.h, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_s16_x_tied1, svint16_t, -+ z0 = svmul_n_s16_x (p0, z0, 2), -+ z0 = svmul_x (p0, z0, 2)) -+ -+/* -+** mul_2_s16_x_untied: -+** movprfx z0, z1 -+** mul z0\.h, z0\.h, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_s16_x_untied, svint16_t, -+ z0 = svmul_n_s16_x (p0, z1, 2), -+ z0 = svmul_x (p0, z1, 2)) -+ -+/* -+** mul_127_s16_x: -+** mul z0\.h, z0\.h, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_127_s16_x, svint16_t, -+ z0 = svmul_n_s16_x (p0, z0, 127), -+ z0 = svmul_x (p0, z0, 127)) -+ -+/* -+** mul_128_s16_x: -+** mov (z[0-9]+\.h), #128 -+** mul z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_128_s16_x, svint16_t, -+ z0 = svmul_n_s16_x (p0, z0, 128), -+ z0 = svmul_x (p0, z0, 128)) -+ -+/* -+** mul_255_s16_x: -+** mov (z[0-9]+\.h), #255 -+** mul z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_255_s16_x, svint16_t, -+ z0 = svmul_n_s16_x (p0, z0, 255), -+ z0 = svmul_x (p0, z0, 255)) -+ -+/* -+** mul_m1_s16_x: -+** mul z0\.h, z0\.h, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_m1_s16_x, svint16_t, -+ z0 = svmul_n_s16_x (p0, z0, -1), -+ z0 = svmul_x (p0, z0, -1)) -+ -+/* -+** mul_m127_s16_x: -+** mul z0\.h, z0\.h, #-127 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_m127_s16_x, svint16_t, -+ z0 = svmul_n_s16_x (p0, z0, -127), -+ z0 = svmul_x (p0, z0, -127)) -+ -+/* -+** mul_m128_s16_x: -+** mul z0\.h, z0\.h, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_m128_s16_x, svint16_t, -+ z0 = svmul_n_s16_x (p0, z0, -128), -+ z0 = svmul_x (p0, z0, -128)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s32.c -new file mode 100644 -index 000000000..7acf77fdb ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s32.c -@@ -0,0 +1,302 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mul_s32_m_tied1: -+** mul z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mul_s32_m_tied1, svint32_t, -+ z0 = svmul_s32_m (p0, z0, z1), -+ z0 = svmul_m (p0, z0, z1)) -+ -+/* -+** mul_s32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** mul z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mul_s32_m_tied2, svint32_t, -+ z0 = svmul_s32_m (p0, z1, z0), -+ z0 = svmul_m (p0, z1, z0)) -+ -+/* -+** mul_s32_m_untied: -+** movprfx z0, z1 -+** mul z0\.s, p0/m, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mul_s32_m_untied, svint32_t, -+ z0 = svmul_s32_m (p0, z1, z2), -+ z0 = svmul_m (p0, z1, z2)) -+ -+/* -+** mul_w0_s32_m_tied1: -+** mov (z[0-9]+\.s), w0 -+** mul z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mul_w0_s32_m_tied1, svint32_t, int32_t, -+ z0 = svmul_n_s32_m (p0, z0, x0), -+ z0 = svmul_m (p0, z0, x0)) -+ -+/* -+** mul_w0_s32_m_untied: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0, z1 -+** mul z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mul_w0_s32_m_untied, svint32_t, int32_t, -+ z0 = svmul_n_s32_m (p0, z1, x0), -+ z0 = svmul_m (p0, z1, x0)) -+ -+/* -+** mul_2_s32_m_tied1: -+** mov (z[0-9]+\.s), #2 -+** mul z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_s32_m_tied1, svint32_t, -+ z0 = svmul_n_s32_m (p0, z0, 2), -+ z0 = svmul_m (p0, z0, 2)) -+ -+/* -+** mul_2_s32_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.s), #2 -+** movprfx z0, z1 -+** mul z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_s32_m_untied, svint32_t, -+ z0 = svmul_n_s32_m (p0, z1, 2), -+ z0 = svmul_m (p0, z1, 2)) -+ -+/* -+** mul_m1_s32_m: -+** mov (z[0-9]+)\.b, #-1 -+** mul z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mul_m1_s32_m, svint32_t, -+ z0 = svmul_n_s32_m (p0, z0, -1), -+ z0 = svmul_m (p0, z0, -1)) -+ -+/* -+** mul_s32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** mul z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mul_s32_z_tied1, svint32_t, -+ z0 = svmul_s32_z (p0, z0, z1), -+ z0 = svmul_z (p0, z0, z1)) -+ -+/* -+** mul_s32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** mul z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mul_s32_z_tied2, svint32_t, -+ z0 = svmul_s32_z (p0, z1, z0), -+ z0 = svmul_z (p0, z1, z0)) -+ -+/* -+** mul_s32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** mul z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** mul z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_s32_z_untied, svint32_t, -+ z0 = svmul_s32_z (p0, z1, z2), -+ z0 = svmul_z (p0, z1, z2)) -+ -+/* -+** mul_w0_s32_z_tied1: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** mul z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mul_w0_s32_z_tied1, svint32_t, int32_t, -+ z0 = svmul_n_s32_z (p0, z0, x0), -+ z0 = svmul_z (p0, z0, x0)) -+ -+/* -+** mul_w0_s32_z_untied: -+** mov (z[0-9]+\.s), w0 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** mul z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** mul z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (mul_w0_s32_z_untied, svint32_t, int32_t, -+ z0 = svmul_n_s32_z (p0, z1, x0), -+ z0 = svmul_z (p0, z1, x0)) -+ -+/* -+** mul_2_s32_z_tied1: -+** mov (z[0-9]+\.s), #2 -+** movprfx z0\.s, p0/z, z0\.s -+** mul z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_s32_z_tied1, svint32_t, -+ z0 = svmul_n_s32_z (p0, z0, 2), -+ z0 = svmul_z (p0, z0, 2)) -+ -+/* -+** mul_2_s32_z_untied: -+** mov (z[0-9]+\.s), #2 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** mul z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** mul z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_s32_z_untied, svint32_t, -+ z0 = svmul_n_s32_z (p0, z1, 2), -+ z0 = svmul_z (p0, z1, 2)) -+ -+/* -+** mul_s32_x_tied1: -+** mul z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mul_s32_x_tied1, svint32_t, -+ z0 = svmul_s32_x (p0, z0, z1), -+ z0 = svmul_x (p0, z0, z1)) -+ -+/* -+** mul_s32_x_tied2: -+** mul z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mul_s32_x_tied2, svint32_t, -+ z0 = svmul_s32_x (p0, z1, z0), -+ z0 = svmul_x (p0, z1, z0)) -+ -+/* -+** mul_s32_x_untied: -+** ( -+** movprfx z0, z1 -+** mul z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0, z2 -+** mul z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_s32_x_untied, svint32_t, -+ z0 = svmul_s32_x (p0, z1, z2), -+ z0 = svmul_x (p0, z1, z2)) -+ -+/* -+** mul_w0_s32_x_tied1: -+** mov (z[0-9]+\.s), w0 -+** mul z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mul_w0_s32_x_tied1, svint32_t, int32_t, -+ z0 = svmul_n_s32_x (p0, z0, x0), -+ z0 = svmul_x (p0, z0, x0)) -+ -+/* -+** mul_w0_s32_x_untied: -+** mov z0\.s, w0 -+** mul z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZX (mul_w0_s32_x_untied, svint32_t, int32_t, -+ z0 = svmul_n_s32_x (p0, z1, x0), -+ z0 = svmul_x (p0, z1, x0)) -+ -+/* -+** mul_2_s32_x_tied1: -+** mul z0\.s, z0\.s, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_s32_x_tied1, svint32_t, -+ z0 = svmul_n_s32_x (p0, z0, 2), -+ z0 = svmul_x (p0, z0, 2)) -+ -+/* -+** mul_2_s32_x_untied: -+** movprfx z0, z1 -+** mul z0\.s, z0\.s, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_s32_x_untied, svint32_t, -+ z0 = svmul_n_s32_x (p0, z1, 2), -+ z0 = svmul_x (p0, z1, 2)) -+ -+/* -+** mul_127_s32_x: -+** mul z0\.s, z0\.s, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_127_s32_x, svint32_t, -+ z0 = svmul_n_s32_x (p0, z0, 127), -+ z0 = svmul_x (p0, z0, 127)) -+ -+/* -+** mul_128_s32_x: -+** mov (z[0-9]+\.s), #128 -+** mul z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_128_s32_x, svint32_t, -+ z0 = svmul_n_s32_x (p0, z0, 128), -+ z0 = svmul_x (p0, z0, 128)) -+ -+/* -+** mul_255_s32_x: -+** mov (z[0-9]+\.s), #255 -+** mul z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_255_s32_x, svint32_t, -+ z0 = svmul_n_s32_x (p0, z0, 255), -+ z0 = svmul_x (p0, z0, 255)) -+ -+/* -+** mul_m1_s32_x: -+** mul z0\.s, z0\.s, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_m1_s32_x, svint32_t, -+ z0 = svmul_n_s32_x (p0, z0, -1), -+ z0 = svmul_x (p0, z0, -1)) -+ -+/* -+** mul_m127_s32_x: -+** mul z0\.s, z0\.s, #-127 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_m127_s32_x, svint32_t, -+ z0 = svmul_n_s32_x (p0, z0, -127), -+ z0 = svmul_x (p0, z0, -127)) -+ -+/* -+** mul_m128_s32_x: -+** mul z0\.s, z0\.s, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_m128_s32_x, svint32_t, -+ z0 = svmul_n_s32_x (p0, z0, -128), -+ z0 = svmul_x (p0, z0, -128)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s64.c -new file mode 100644 -index 000000000..549105f1e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s64.c -@@ -0,0 +1,302 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mul_s64_m_tied1: -+** mul z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mul_s64_m_tied1, svint64_t, -+ z0 = svmul_s64_m (p0, z0, z1), -+ z0 = svmul_m (p0, z0, z1)) -+ -+/* -+** mul_s64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** mul z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_s64_m_tied2, svint64_t, -+ z0 = svmul_s64_m (p0, z1, z0), -+ z0 = svmul_m (p0, z1, z0)) -+ -+/* -+** mul_s64_m_untied: -+** movprfx z0, z1 -+** mul z0\.d, p0/m, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mul_s64_m_untied, svint64_t, -+ z0 = svmul_s64_m (p0, z1, z2), -+ z0 = svmul_m (p0, z1, z2)) -+ -+/* -+** mul_x0_s64_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** mul z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mul_x0_s64_m_tied1, svint64_t, int64_t, -+ z0 = svmul_n_s64_m (p0, z0, x0), -+ z0 = svmul_m (p0, z0, x0)) -+ -+/* -+** mul_x0_s64_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** mul z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mul_x0_s64_m_untied, svint64_t, int64_t, -+ z0 = svmul_n_s64_m (p0, z1, x0), -+ z0 = svmul_m (p0, z1, x0)) -+ -+/* -+** mul_2_s64_m_tied1: -+** mov (z[0-9]+\.d), #2 -+** mul z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_s64_m_tied1, svint64_t, -+ z0 = svmul_n_s64_m (p0, z0, 2), -+ z0 = svmul_m (p0, z0, 2)) -+ -+/* -+** mul_2_s64_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #2 -+** movprfx z0, z1 -+** mul z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_s64_m_untied, svint64_t, -+ z0 = svmul_n_s64_m (p0, z1, 2), -+ z0 = svmul_m (p0, z1, 2)) -+ -+/* -+** mul_m1_s64_m: -+** mov (z[0-9]+)\.b, #-1 -+** mul z0\.d, p0/m, z0\.d, \1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mul_m1_s64_m, svint64_t, -+ z0 = svmul_n_s64_m (p0, z0, -1), -+ z0 = svmul_m (p0, z0, -1)) -+ -+/* -+** mul_s64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** mul z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mul_s64_z_tied1, svint64_t, -+ z0 = svmul_s64_z (p0, z0, z1), -+ z0 = svmul_z (p0, z0, z1)) -+ -+/* -+** mul_s64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** mul z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mul_s64_z_tied2, svint64_t, -+ z0 = svmul_s64_z (p0, z1, z0), -+ z0 = svmul_z (p0, z1, z0)) -+ -+/* -+** mul_s64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** mul z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** mul z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_s64_z_untied, svint64_t, -+ z0 = svmul_s64_z (p0, z1, z2), -+ z0 = svmul_z (p0, z1, z2)) -+ -+/* -+** mul_x0_s64_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** mul z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mul_x0_s64_z_tied1, svint64_t, int64_t, -+ z0 = svmul_n_s64_z (p0, z0, x0), -+ z0 = svmul_z (p0, z0, x0)) -+ -+/* -+** mul_x0_s64_z_untied: -+** mov (z[0-9]+\.d), x0 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** mul z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** mul z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (mul_x0_s64_z_untied, svint64_t, int64_t, -+ z0 = svmul_n_s64_z (p0, z1, x0), -+ z0 = svmul_z (p0, z1, x0)) -+ -+/* -+** mul_2_s64_z_tied1: -+** mov (z[0-9]+\.d), #2 -+** movprfx z0\.d, p0/z, z0\.d -+** mul z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_s64_z_tied1, svint64_t, -+ z0 = svmul_n_s64_z (p0, z0, 2), -+ z0 = svmul_z (p0, z0, 2)) -+ -+/* -+** mul_2_s64_z_untied: -+** mov (z[0-9]+\.d), #2 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** mul z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** mul z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_s64_z_untied, svint64_t, -+ z0 = svmul_n_s64_z (p0, z1, 2), -+ z0 = svmul_z (p0, z1, 2)) -+ -+/* -+** mul_s64_x_tied1: -+** mul z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mul_s64_x_tied1, svint64_t, -+ z0 = svmul_s64_x (p0, z0, z1), -+ z0 = svmul_x (p0, z0, z1)) -+ -+/* -+** mul_s64_x_tied2: -+** mul z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mul_s64_x_tied2, svint64_t, -+ z0 = svmul_s64_x (p0, z1, z0), -+ z0 = svmul_x (p0, z1, z0)) -+ -+/* -+** mul_s64_x_untied: -+** ( -+** movprfx z0, z1 -+** mul z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0, z2 -+** mul z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_s64_x_untied, svint64_t, -+ z0 = svmul_s64_x (p0, z1, z2), -+ z0 = svmul_x (p0, z1, z2)) -+ -+/* -+** mul_x0_s64_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** mul z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mul_x0_s64_x_tied1, svint64_t, int64_t, -+ z0 = svmul_n_s64_x (p0, z0, x0), -+ z0 = svmul_x (p0, z0, x0)) -+ -+/* -+** mul_x0_s64_x_untied: -+** mov z0\.d, x0 -+** mul z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (mul_x0_s64_x_untied, svint64_t, int64_t, -+ z0 = svmul_n_s64_x (p0, z1, x0), -+ z0 = svmul_x (p0, z1, x0)) -+ -+/* -+** mul_2_s64_x_tied1: -+** mul z0\.d, z0\.d, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_s64_x_tied1, svint64_t, -+ z0 = svmul_n_s64_x (p0, z0, 2), -+ z0 = svmul_x (p0, z0, 2)) -+ -+/* -+** mul_2_s64_x_untied: -+** movprfx z0, z1 -+** mul z0\.d, z0\.d, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_s64_x_untied, svint64_t, -+ z0 = svmul_n_s64_x (p0, z1, 2), -+ z0 = svmul_x (p0, z1, 2)) -+ -+/* -+** mul_127_s64_x: -+** mul z0\.d, z0\.d, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_127_s64_x, svint64_t, -+ z0 = svmul_n_s64_x (p0, z0, 127), -+ z0 = svmul_x (p0, z0, 127)) -+ -+/* -+** mul_128_s64_x: -+** mov (z[0-9]+\.d), #128 -+** mul z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_128_s64_x, svint64_t, -+ z0 = svmul_n_s64_x (p0, z0, 128), -+ z0 = svmul_x (p0, z0, 128)) -+ -+/* -+** mul_255_s64_x: -+** mov (z[0-9]+\.d), #255 -+** mul z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_255_s64_x, svint64_t, -+ z0 = svmul_n_s64_x (p0, z0, 255), -+ z0 = svmul_x (p0, z0, 255)) -+ -+/* -+** mul_m1_s64_x: -+** mul z0\.d, z0\.d, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_m1_s64_x, svint64_t, -+ z0 = svmul_n_s64_x (p0, z0, -1), -+ z0 = svmul_x (p0, z0, -1)) -+ -+/* -+** mul_m127_s64_x: -+** mul z0\.d, z0\.d, #-127 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_m127_s64_x, svint64_t, -+ z0 = svmul_n_s64_x (p0, z0, -127), -+ z0 = svmul_x (p0, z0, -127)) -+ -+/* -+** mul_m128_s64_x: -+** mul z0\.d, z0\.d, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_m128_s64_x, svint64_t, -+ z0 = svmul_n_s64_x (p0, z0, -128), -+ z0 = svmul_x (p0, z0, -128)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s8.c -new file mode 100644 -index 000000000..012e6f250 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_s8.c -@@ -0,0 +1,300 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mul_s8_m_tied1: -+** mul z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mul_s8_m_tied1, svint8_t, -+ z0 = svmul_s8_m (p0, z0, z1), -+ z0 = svmul_m (p0, z0, z1)) -+ -+/* -+** mul_s8_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** mul z0\.b, p0/m, z0\.b, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mul_s8_m_tied2, svint8_t, -+ z0 = svmul_s8_m (p0, z1, z0), -+ z0 = svmul_m (p0, z1, z0)) -+ -+/* -+** mul_s8_m_untied: -+** movprfx z0, z1 -+** mul z0\.b, p0/m, z0\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mul_s8_m_untied, svint8_t, -+ z0 = svmul_s8_m (p0, z1, z2), -+ z0 = svmul_m (p0, z1, z2)) -+ -+/* -+** mul_w0_s8_m_tied1: -+** mov (z[0-9]+\.b), w0 -+** mul z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mul_w0_s8_m_tied1, svint8_t, int8_t, -+ z0 = svmul_n_s8_m (p0, z0, x0), -+ z0 = svmul_m (p0, z0, x0)) -+ -+/* -+** mul_w0_s8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), w0 -+** movprfx z0, z1 -+** mul z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mul_w0_s8_m_untied, svint8_t, int8_t, -+ z0 = svmul_n_s8_m (p0, z1, x0), -+ z0 = svmul_m (p0, z1, x0)) -+ -+/* -+** mul_2_s8_m_tied1: -+** mov (z[0-9]+\.b), #2 -+** mul z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_s8_m_tied1, svint8_t, -+ z0 = svmul_n_s8_m (p0, z0, 2), -+ z0 = svmul_m (p0, z0, 2)) -+ -+/* -+** mul_2_s8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), #2 -+** movprfx z0, z1 -+** mul z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_s8_m_untied, svint8_t, -+ z0 = svmul_n_s8_m (p0, z1, 2), -+ z0 = svmul_m (p0, z1, 2)) -+ -+/* -+** mul_m1_s8_m: -+** mov (z[0-9]+\.b), #-1 -+** mul z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_m1_s8_m, svint8_t, -+ z0 = svmul_n_s8_m (p0, z0, -1), -+ z0 = svmul_m (p0, z0, -1)) -+ -+/* -+** mul_s8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** mul z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mul_s8_z_tied1, svint8_t, -+ z0 = svmul_s8_z (p0, z0, z1), -+ z0 = svmul_z (p0, z0, z1)) -+ -+/* -+** mul_s8_z_tied2: -+** movprfx z0\.b, p0/z, z0\.b -+** mul z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mul_s8_z_tied2, svint8_t, -+ z0 = svmul_s8_z (p0, z1, z0), -+ z0 = svmul_z (p0, z1, z0)) -+ -+/* -+** mul_s8_z_untied: -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** mul z0\.b, p0/m, z0\.b, z2\.b -+** | -+** movprfx z0\.b, p0/z, z2\.b -+** mul z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_s8_z_untied, svint8_t, -+ z0 = svmul_s8_z (p0, z1, z2), -+ z0 = svmul_z (p0, z1, z2)) -+ -+/* -+** mul_w0_s8_z_tied1: -+** mov (z[0-9]+\.b), w0 -+** movprfx z0\.b, p0/z, z0\.b -+** mul z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mul_w0_s8_z_tied1, svint8_t, int8_t, -+ z0 = svmul_n_s8_z (p0, z0, x0), -+ z0 = svmul_z (p0, z0, x0)) -+ -+/* -+** mul_w0_s8_z_untied: -+** mov (z[0-9]+\.b), w0 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** mul z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** mul z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (mul_w0_s8_z_untied, svint8_t, int8_t, -+ z0 = svmul_n_s8_z (p0, z1, x0), -+ z0 = svmul_z (p0, z1, x0)) -+ -+/* -+** mul_2_s8_z_tied1: -+** mov (z[0-9]+\.b), #2 -+** movprfx z0\.b, p0/z, z0\.b -+** mul z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_s8_z_tied1, svint8_t, -+ z0 = svmul_n_s8_z (p0, z0, 2), -+ z0 = svmul_z (p0, z0, 2)) -+ -+/* -+** mul_2_s8_z_untied: -+** mov (z[0-9]+\.b), #2 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** mul z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** mul z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_s8_z_untied, svint8_t, -+ z0 = svmul_n_s8_z (p0, z1, 2), -+ z0 = svmul_z (p0, z1, 2)) -+ -+/* -+** mul_s8_x_tied1: -+** mul z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mul_s8_x_tied1, svint8_t, -+ z0 = svmul_s8_x (p0, z0, z1), -+ z0 = svmul_x (p0, z0, z1)) -+ -+/* -+** mul_s8_x_tied2: -+** mul z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mul_s8_x_tied2, svint8_t, -+ z0 = svmul_s8_x (p0, z1, z0), -+ z0 = svmul_x (p0, z1, z0)) -+ -+/* -+** mul_s8_x_untied: -+** ( -+** movprfx z0, z1 -+** mul z0\.b, p0/m, z0\.b, z2\.b -+** | -+** movprfx z0, z2 -+** mul z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_s8_x_untied, svint8_t, -+ z0 = svmul_s8_x (p0, z1, z2), -+ z0 = svmul_x (p0, z1, z2)) -+ -+/* -+** mul_w0_s8_x_tied1: -+** mov (z[0-9]+\.b), w0 -+** mul z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mul_w0_s8_x_tied1, svint8_t, int8_t, -+ z0 = svmul_n_s8_x (p0, z0, x0), -+ z0 = svmul_x (p0, z0, x0)) -+ -+/* -+** mul_w0_s8_x_untied: -+** mov z0\.b, w0 -+** mul z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_ZX (mul_w0_s8_x_untied, svint8_t, int8_t, -+ z0 = svmul_n_s8_x (p0, z1, x0), -+ z0 = svmul_x (p0, z1, x0)) -+ -+/* -+** mul_2_s8_x_tied1: -+** mul z0\.b, z0\.b, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_s8_x_tied1, svint8_t, -+ z0 = svmul_n_s8_x (p0, z0, 2), -+ z0 = svmul_x (p0, z0, 2)) -+ -+/* -+** mul_2_s8_x_untied: -+** movprfx z0, z1 -+** mul z0\.b, z0\.b, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_s8_x_untied, svint8_t, -+ z0 = svmul_n_s8_x (p0, z1, 2), -+ z0 = svmul_x (p0, z1, 2)) -+ -+/* -+** mul_127_s8_x: -+** mul z0\.b, z0\.b, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_127_s8_x, svint8_t, -+ z0 = svmul_n_s8_x (p0, z0, 127), -+ z0 = svmul_x (p0, z0, 127)) -+ -+/* -+** mul_128_s8_x: -+** mul z0\.b, z0\.b, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_128_s8_x, svint8_t, -+ z0 = svmul_n_s8_x (p0, z0, 128), -+ z0 = svmul_x (p0, z0, 128)) -+ -+/* -+** mul_255_s8_x: -+** mul z0\.b, z0\.b, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_255_s8_x, svint8_t, -+ z0 = svmul_n_s8_x (p0, z0, 255), -+ z0 = svmul_x (p0, z0, 255)) -+ -+/* -+** mul_m1_s8_x: -+** mul z0\.b, z0\.b, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_m1_s8_x, svint8_t, -+ z0 = svmul_n_s8_x (p0, z0, -1), -+ z0 = svmul_x (p0, z0, -1)) -+ -+/* -+** mul_m127_s8_x: -+** mul z0\.b, z0\.b, #-127 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_m127_s8_x, svint8_t, -+ z0 = svmul_n_s8_x (p0, z0, -127), -+ z0 = svmul_x (p0, z0, -127)) -+ -+/* -+** mul_m128_s8_x: -+** mul z0\.b, z0\.b, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_m128_s8_x, svint8_t, -+ z0 = svmul_n_s8_x (p0, z0, -128), -+ z0 = svmul_x (p0, z0, -128)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u16.c -new file mode 100644 -index 000000000..300987eb6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u16.c -@@ -0,0 +1,302 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mul_u16_m_tied1: -+** mul z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mul_u16_m_tied1, svuint16_t, -+ z0 = svmul_u16_m (p0, z0, z1), -+ z0 = svmul_m (p0, z0, z1)) -+ -+/* -+** mul_u16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** mul z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mul_u16_m_tied2, svuint16_t, -+ z0 = svmul_u16_m (p0, z1, z0), -+ z0 = svmul_m (p0, z1, z0)) -+ -+/* -+** mul_u16_m_untied: -+** movprfx z0, z1 -+** mul z0\.h, p0/m, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mul_u16_m_untied, svuint16_t, -+ z0 = svmul_u16_m (p0, z1, z2), -+ z0 = svmul_m (p0, z1, z2)) -+ -+/* -+** mul_w0_u16_m_tied1: -+** mov (z[0-9]+\.h), w0 -+** mul z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mul_w0_u16_m_tied1, svuint16_t, uint16_t, -+ z0 = svmul_n_u16_m (p0, z0, x0), -+ z0 = svmul_m (p0, z0, x0)) -+ -+/* -+** mul_w0_u16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), w0 -+** movprfx z0, z1 -+** mul z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mul_w0_u16_m_untied, svuint16_t, uint16_t, -+ z0 = svmul_n_u16_m (p0, z1, x0), -+ z0 = svmul_m (p0, z1, x0)) -+ -+/* -+** mul_2_u16_m_tied1: -+** mov (z[0-9]+\.h), #2 -+** mul z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_u16_m_tied1, svuint16_t, -+ z0 = svmul_n_u16_m (p0, z0, 2), -+ z0 = svmul_m (p0, z0, 2)) -+ -+/* -+** mul_2_u16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), #2 -+** movprfx z0, z1 -+** mul z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_u16_m_untied, svuint16_t, -+ z0 = svmul_n_u16_m (p0, z1, 2), -+ z0 = svmul_m (p0, z1, 2)) -+ -+/* -+** mul_m1_u16_m: -+** mov (z[0-9]+)\.b, #-1 -+** mul z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mul_m1_u16_m, svuint16_t, -+ z0 = svmul_n_u16_m (p0, z0, -1), -+ z0 = svmul_m (p0, z0, -1)) -+ -+/* -+** mul_u16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** mul z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mul_u16_z_tied1, svuint16_t, -+ z0 = svmul_u16_z (p0, z0, z1), -+ z0 = svmul_z (p0, z0, z1)) -+ -+/* -+** mul_u16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** mul z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mul_u16_z_tied2, svuint16_t, -+ z0 = svmul_u16_z (p0, z1, z0), -+ z0 = svmul_z (p0, z1, z0)) -+ -+/* -+** mul_u16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** mul z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** mul z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_u16_z_untied, svuint16_t, -+ z0 = svmul_u16_z (p0, z1, z2), -+ z0 = svmul_z (p0, z1, z2)) -+ -+/* -+** mul_w0_u16_z_tied1: -+** mov (z[0-9]+\.h), w0 -+** movprfx z0\.h, p0/z, z0\.h -+** mul z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mul_w0_u16_z_tied1, svuint16_t, uint16_t, -+ z0 = svmul_n_u16_z (p0, z0, x0), -+ z0 = svmul_z (p0, z0, x0)) -+ -+/* -+** mul_w0_u16_z_untied: -+** mov (z[0-9]+\.h), w0 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** mul z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** mul z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (mul_w0_u16_z_untied, svuint16_t, uint16_t, -+ z0 = svmul_n_u16_z (p0, z1, x0), -+ z0 = svmul_z (p0, z1, x0)) -+ -+/* -+** mul_2_u16_z_tied1: -+** mov (z[0-9]+\.h), #2 -+** movprfx z0\.h, p0/z, z0\.h -+** mul z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_u16_z_tied1, svuint16_t, -+ z0 = svmul_n_u16_z (p0, z0, 2), -+ z0 = svmul_z (p0, z0, 2)) -+ -+/* -+** mul_2_u16_z_untied: -+** mov (z[0-9]+\.h), #2 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** mul z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** mul z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_u16_z_untied, svuint16_t, -+ z0 = svmul_n_u16_z (p0, z1, 2), -+ z0 = svmul_z (p0, z1, 2)) -+ -+/* -+** mul_u16_x_tied1: -+** mul z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mul_u16_x_tied1, svuint16_t, -+ z0 = svmul_u16_x (p0, z0, z1), -+ z0 = svmul_x (p0, z0, z1)) -+ -+/* -+** mul_u16_x_tied2: -+** mul z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mul_u16_x_tied2, svuint16_t, -+ z0 = svmul_u16_x (p0, z1, z0), -+ z0 = svmul_x (p0, z1, z0)) -+ -+/* -+** mul_u16_x_untied: -+** ( -+** movprfx z0, z1 -+** mul z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0, z2 -+** mul z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_u16_x_untied, svuint16_t, -+ z0 = svmul_u16_x (p0, z1, z2), -+ z0 = svmul_x (p0, z1, z2)) -+ -+/* -+** mul_w0_u16_x_tied1: -+** mov (z[0-9]+\.h), w0 -+** mul z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mul_w0_u16_x_tied1, svuint16_t, uint16_t, -+ z0 = svmul_n_u16_x (p0, z0, x0), -+ z0 = svmul_x (p0, z0, x0)) -+ -+/* -+** mul_w0_u16_x_untied: -+** mov z0\.h, w0 -+** mul z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZX (mul_w0_u16_x_untied, svuint16_t, uint16_t, -+ z0 = svmul_n_u16_x (p0, z1, x0), -+ z0 = svmul_x (p0, z1, x0)) -+ -+/* -+** mul_2_u16_x_tied1: -+** mul z0\.h, z0\.h, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_u16_x_tied1, svuint16_t, -+ z0 = svmul_n_u16_x (p0, z0, 2), -+ z0 = svmul_x (p0, z0, 2)) -+ -+/* -+** mul_2_u16_x_untied: -+** movprfx z0, z1 -+** mul z0\.h, z0\.h, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_u16_x_untied, svuint16_t, -+ z0 = svmul_n_u16_x (p0, z1, 2), -+ z0 = svmul_x (p0, z1, 2)) -+ -+/* -+** mul_127_u16_x: -+** mul z0\.h, z0\.h, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_127_u16_x, svuint16_t, -+ z0 = svmul_n_u16_x (p0, z0, 127), -+ z0 = svmul_x (p0, z0, 127)) -+ -+/* -+** mul_128_u16_x: -+** mov (z[0-9]+\.h), #128 -+** mul z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_128_u16_x, svuint16_t, -+ z0 = svmul_n_u16_x (p0, z0, 128), -+ z0 = svmul_x (p0, z0, 128)) -+ -+/* -+** mul_255_u16_x: -+** mov (z[0-9]+\.h), #255 -+** mul z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_255_u16_x, svuint16_t, -+ z0 = svmul_n_u16_x (p0, z0, 255), -+ z0 = svmul_x (p0, z0, 255)) -+ -+/* -+** mul_m1_u16_x: -+** mul z0\.h, z0\.h, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_m1_u16_x, svuint16_t, -+ z0 = svmul_n_u16_x (p0, z0, -1), -+ z0 = svmul_x (p0, z0, -1)) -+ -+/* -+** mul_m127_u16_x: -+** mul z0\.h, z0\.h, #-127 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_m127_u16_x, svuint16_t, -+ z0 = svmul_n_u16_x (p0, z0, -127), -+ z0 = svmul_x (p0, z0, -127)) -+ -+/* -+** mul_m128_u16_x: -+** mul z0\.h, z0\.h, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_m128_u16_x, svuint16_t, -+ z0 = svmul_n_u16_x (p0, z0, -128), -+ z0 = svmul_x (p0, z0, -128)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u32.c -new file mode 100644 -index 000000000..288d17b16 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u32.c -@@ -0,0 +1,302 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mul_u32_m_tied1: -+** mul z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mul_u32_m_tied1, svuint32_t, -+ z0 = svmul_u32_m (p0, z0, z1), -+ z0 = svmul_m (p0, z0, z1)) -+ -+/* -+** mul_u32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** mul z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mul_u32_m_tied2, svuint32_t, -+ z0 = svmul_u32_m (p0, z1, z0), -+ z0 = svmul_m (p0, z1, z0)) -+ -+/* -+** mul_u32_m_untied: -+** movprfx z0, z1 -+** mul z0\.s, p0/m, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mul_u32_m_untied, svuint32_t, -+ z0 = svmul_u32_m (p0, z1, z2), -+ z0 = svmul_m (p0, z1, z2)) -+ -+/* -+** mul_w0_u32_m_tied1: -+** mov (z[0-9]+\.s), w0 -+** mul z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mul_w0_u32_m_tied1, svuint32_t, uint32_t, -+ z0 = svmul_n_u32_m (p0, z0, x0), -+ z0 = svmul_m (p0, z0, x0)) -+ -+/* -+** mul_w0_u32_m_untied: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0, z1 -+** mul z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mul_w0_u32_m_untied, svuint32_t, uint32_t, -+ z0 = svmul_n_u32_m (p0, z1, x0), -+ z0 = svmul_m (p0, z1, x0)) -+ -+/* -+** mul_2_u32_m_tied1: -+** mov (z[0-9]+\.s), #2 -+** mul z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_u32_m_tied1, svuint32_t, -+ z0 = svmul_n_u32_m (p0, z0, 2), -+ z0 = svmul_m (p0, z0, 2)) -+ -+/* -+** mul_2_u32_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.s), #2 -+** movprfx z0, z1 -+** mul z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_u32_m_untied, svuint32_t, -+ z0 = svmul_n_u32_m (p0, z1, 2), -+ z0 = svmul_m (p0, z1, 2)) -+ -+/* -+** mul_m1_u32_m: -+** mov (z[0-9]+)\.b, #-1 -+** mul z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mul_m1_u32_m, svuint32_t, -+ z0 = svmul_n_u32_m (p0, z0, -1), -+ z0 = svmul_m (p0, z0, -1)) -+ -+/* -+** mul_u32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** mul z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mul_u32_z_tied1, svuint32_t, -+ z0 = svmul_u32_z (p0, z0, z1), -+ z0 = svmul_z (p0, z0, z1)) -+ -+/* -+** mul_u32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** mul z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mul_u32_z_tied2, svuint32_t, -+ z0 = svmul_u32_z (p0, z1, z0), -+ z0 = svmul_z (p0, z1, z0)) -+ -+/* -+** mul_u32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** mul z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** mul z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_u32_z_untied, svuint32_t, -+ z0 = svmul_u32_z (p0, z1, z2), -+ z0 = svmul_z (p0, z1, z2)) -+ -+/* -+** mul_w0_u32_z_tied1: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** mul z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mul_w0_u32_z_tied1, svuint32_t, uint32_t, -+ z0 = svmul_n_u32_z (p0, z0, x0), -+ z0 = svmul_z (p0, z0, x0)) -+ -+/* -+** mul_w0_u32_z_untied: -+** mov (z[0-9]+\.s), w0 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** mul z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** mul z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (mul_w0_u32_z_untied, svuint32_t, uint32_t, -+ z0 = svmul_n_u32_z (p0, z1, x0), -+ z0 = svmul_z (p0, z1, x0)) -+ -+/* -+** mul_2_u32_z_tied1: -+** mov (z[0-9]+\.s), #2 -+** movprfx z0\.s, p0/z, z0\.s -+** mul z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_u32_z_tied1, svuint32_t, -+ z0 = svmul_n_u32_z (p0, z0, 2), -+ z0 = svmul_z (p0, z0, 2)) -+ -+/* -+** mul_2_u32_z_untied: -+** mov (z[0-9]+\.s), #2 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** mul z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** mul z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_u32_z_untied, svuint32_t, -+ z0 = svmul_n_u32_z (p0, z1, 2), -+ z0 = svmul_z (p0, z1, 2)) -+ -+/* -+** mul_u32_x_tied1: -+** mul z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mul_u32_x_tied1, svuint32_t, -+ z0 = svmul_u32_x (p0, z0, z1), -+ z0 = svmul_x (p0, z0, z1)) -+ -+/* -+** mul_u32_x_tied2: -+** mul z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mul_u32_x_tied2, svuint32_t, -+ z0 = svmul_u32_x (p0, z1, z0), -+ z0 = svmul_x (p0, z1, z0)) -+ -+/* -+** mul_u32_x_untied: -+** ( -+** movprfx z0, z1 -+** mul z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0, z2 -+** mul z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_u32_x_untied, svuint32_t, -+ z0 = svmul_u32_x (p0, z1, z2), -+ z0 = svmul_x (p0, z1, z2)) -+ -+/* -+** mul_w0_u32_x_tied1: -+** mov (z[0-9]+\.s), w0 -+** mul z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mul_w0_u32_x_tied1, svuint32_t, uint32_t, -+ z0 = svmul_n_u32_x (p0, z0, x0), -+ z0 = svmul_x (p0, z0, x0)) -+ -+/* -+** mul_w0_u32_x_untied: -+** mov z0\.s, w0 -+** mul z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZX (mul_w0_u32_x_untied, svuint32_t, uint32_t, -+ z0 = svmul_n_u32_x (p0, z1, x0), -+ z0 = svmul_x (p0, z1, x0)) -+ -+/* -+** mul_2_u32_x_tied1: -+** mul z0\.s, z0\.s, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_u32_x_tied1, svuint32_t, -+ z0 = svmul_n_u32_x (p0, z0, 2), -+ z0 = svmul_x (p0, z0, 2)) -+ -+/* -+** mul_2_u32_x_untied: -+** movprfx z0, z1 -+** mul z0\.s, z0\.s, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_u32_x_untied, svuint32_t, -+ z0 = svmul_n_u32_x (p0, z1, 2), -+ z0 = svmul_x (p0, z1, 2)) -+ -+/* -+** mul_127_u32_x: -+** mul z0\.s, z0\.s, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_127_u32_x, svuint32_t, -+ z0 = svmul_n_u32_x (p0, z0, 127), -+ z0 = svmul_x (p0, z0, 127)) -+ -+/* -+** mul_128_u32_x: -+** mov (z[0-9]+\.s), #128 -+** mul z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_128_u32_x, svuint32_t, -+ z0 = svmul_n_u32_x (p0, z0, 128), -+ z0 = svmul_x (p0, z0, 128)) -+ -+/* -+** mul_255_u32_x: -+** mov (z[0-9]+\.s), #255 -+** mul z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_255_u32_x, svuint32_t, -+ z0 = svmul_n_u32_x (p0, z0, 255), -+ z0 = svmul_x (p0, z0, 255)) -+ -+/* -+** mul_m1_u32_x: -+** mul z0\.s, z0\.s, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_m1_u32_x, svuint32_t, -+ z0 = svmul_n_u32_x (p0, z0, -1), -+ z0 = svmul_x (p0, z0, -1)) -+ -+/* -+** mul_m127_u32_x: -+** mul z0\.s, z0\.s, #-127 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_m127_u32_x, svuint32_t, -+ z0 = svmul_n_u32_x (p0, z0, -127), -+ z0 = svmul_x (p0, z0, -127)) -+ -+/* -+** mul_m128_u32_x: -+** mul z0\.s, z0\.s, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_m128_u32_x, svuint32_t, -+ z0 = svmul_n_u32_x (p0, z0, -128), -+ z0 = svmul_x (p0, z0, -128)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u64.c -new file mode 100644 -index 000000000..f6959dbc7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u64.c -@@ -0,0 +1,302 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mul_u64_m_tied1: -+** mul z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mul_u64_m_tied1, svuint64_t, -+ z0 = svmul_u64_m (p0, z0, z1), -+ z0 = svmul_m (p0, z0, z1)) -+ -+/* -+** mul_u64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** mul z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_u64_m_tied2, svuint64_t, -+ z0 = svmul_u64_m (p0, z1, z0), -+ z0 = svmul_m (p0, z1, z0)) -+ -+/* -+** mul_u64_m_untied: -+** movprfx z0, z1 -+** mul z0\.d, p0/m, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mul_u64_m_untied, svuint64_t, -+ z0 = svmul_u64_m (p0, z1, z2), -+ z0 = svmul_m (p0, z1, z2)) -+ -+/* -+** mul_x0_u64_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** mul z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mul_x0_u64_m_tied1, svuint64_t, uint64_t, -+ z0 = svmul_n_u64_m (p0, z0, x0), -+ z0 = svmul_m (p0, z0, x0)) -+ -+/* -+** mul_x0_u64_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** mul z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mul_x0_u64_m_untied, svuint64_t, uint64_t, -+ z0 = svmul_n_u64_m (p0, z1, x0), -+ z0 = svmul_m (p0, z1, x0)) -+ -+/* -+** mul_2_u64_m_tied1: -+** mov (z[0-9]+\.d), #2 -+** mul z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_u64_m_tied1, svuint64_t, -+ z0 = svmul_n_u64_m (p0, z0, 2), -+ z0 = svmul_m (p0, z0, 2)) -+ -+/* -+** mul_2_u64_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #2 -+** movprfx z0, z1 -+** mul z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_u64_m_untied, svuint64_t, -+ z0 = svmul_n_u64_m (p0, z1, 2), -+ z0 = svmul_m (p0, z1, 2)) -+ -+/* -+** mul_m1_u64_m: -+** mov (z[0-9]+)\.b, #-1 -+** mul z0\.d, p0/m, z0\.d, \1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mul_m1_u64_m, svuint64_t, -+ z0 = svmul_n_u64_m (p0, z0, -1), -+ z0 = svmul_m (p0, z0, -1)) -+ -+/* -+** mul_u64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** mul z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mul_u64_z_tied1, svuint64_t, -+ z0 = svmul_u64_z (p0, z0, z1), -+ z0 = svmul_z (p0, z0, z1)) -+ -+/* -+** mul_u64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** mul z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mul_u64_z_tied2, svuint64_t, -+ z0 = svmul_u64_z (p0, z1, z0), -+ z0 = svmul_z (p0, z1, z0)) -+ -+/* -+** mul_u64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** mul z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** mul z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_u64_z_untied, svuint64_t, -+ z0 = svmul_u64_z (p0, z1, z2), -+ z0 = svmul_z (p0, z1, z2)) -+ -+/* -+** mul_x0_u64_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** mul z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mul_x0_u64_z_tied1, svuint64_t, uint64_t, -+ z0 = svmul_n_u64_z (p0, z0, x0), -+ z0 = svmul_z (p0, z0, x0)) -+ -+/* -+** mul_x0_u64_z_untied: -+** mov (z[0-9]+\.d), x0 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** mul z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** mul z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (mul_x0_u64_z_untied, svuint64_t, uint64_t, -+ z0 = svmul_n_u64_z (p0, z1, x0), -+ z0 = svmul_z (p0, z1, x0)) -+ -+/* -+** mul_2_u64_z_tied1: -+** mov (z[0-9]+\.d), #2 -+** movprfx z0\.d, p0/z, z0\.d -+** mul z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_u64_z_tied1, svuint64_t, -+ z0 = svmul_n_u64_z (p0, z0, 2), -+ z0 = svmul_z (p0, z0, 2)) -+ -+/* -+** mul_2_u64_z_untied: -+** mov (z[0-9]+\.d), #2 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** mul z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** mul z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_u64_z_untied, svuint64_t, -+ z0 = svmul_n_u64_z (p0, z1, 2), -+ z0 = svmul_z (p0, z1, 2)) -+ -+/* -+** mul_u64_x_tied1: -+** mul z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mul_u64_x_tied1, svuint64_t, -+ z0 = svmul_u64_x (p0, z0, z1), -+ z0 = svmul_x (p0, z0, z1)) -+ -+/* -+** mul_u64_x_tied2: -+** mul z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mul_u64_x_tied2, svuint64_t, -+ z0 = svmul_u64_x (p0, z1, z0), -+ z0 = svmul_x (p0, z1, z0)) -+ -+/* -+** mul_u64_x_untied: -+** ( -+** movprfx z0, z1 -+** mul z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0, z2 -+** mul z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_u64_x_untied, svuint64_t, -+ z0 = svmul_u64_x (p0, z1, z2), -+ z0 = svmul_x (p0, z1, z2)) -+ -+/* -+** mul_x0_u64_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** mul z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mul_x0_u64_x_tied1, svuint64_t, uint64_t, -+ z0 = svmul_n_u64_x (p0, z0, x0), -+ z0 = svmul_x (p0, z0, x0)) -+ -+/* -+** mul_x0_u64_x_untied: -+** mov z0\.d, x0 -+** mul z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (mul_x0_u64_x_untied, svuint64_t, uint64_t, -+ z0 = svmul_n_u64_x (p0, z1, x0), -+ z0 = svmul_x (p0, z1, x0)) -+ -+/* -+** mul_2_u64_x_tied1: -+** mul z0\.d, z0\.d, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_u64_x_tied1, svuint64_t, -+ z0 = svmul_n_u64_x (p0, z0, 2), -+ z0 = svmul_x (p0, z0, 2)) -+ -+/* -+** mul_2_u64_x_untied: -+** movprfx z0, z1 -+** mul z0\.d, z0\.d, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_u64_x_untied, svuint64_t, -+ z0 = svmul_n_u64_x (p0, z1, 2), -+ z0 = svmul_x (p0, z1, 2)) -+ -+/* -+** mul_127_u64_x: -+** mul z0\.d, z0\.d, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_127_u64_x, svuint64_t, -+ z0 = svmul_n_u64_x (p0, z0, 127), -+ z0 = svmul_x (p0, z0, 127)) -+ -+/* -+** mul_128_u64_x: -+** mov (z[0-9]+\.d), #128 -+** mul z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_128_u64_x, svuint64_t, -+ z0 = svmul_n_u64_x (p0, z0, 128), -+ z0 = svmul_x (p0, z0, 128)) -+ -+/* -+** mul_255_u64_x: -+** mov (z[0-9]+\.d), #255 -+** mul z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_255_u64_x, svuint64_t, -+ z0 = svmul_n_u64_x (p0, z0, 255), -+ z0 = svmul_x (p0, z0, 255)) -+ -+/* -+** mul_m1_u64_x: -+** mul z0\.d, z0\.d, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_m1_u64_x, svuint64_t, -+ z0 = svmul_n_u64_x (p0, z0, -1), -+ z0 = svmul_x (p0, z0, -1)) -+ -+/* -+** mul_m127_u64_x: -+** mul z0\.d, z0\.d, #-127 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_m127_u64_x, svuint64_t, -+ z0 = svmul_n_u64_x (p0, z0, -127), -+ z0 = svmul_x (p0, z0, -127)) -+ -+/* -+** mul_m128_u64_x: -+** mul z0\.d, z0\.d, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_m128_u64_x, svuint64_t, -+ z0 = svmul_n_u64_x (p0, z0, -128), -+ z0 = svmul_x (p0, z0, -128)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u8.c -new file mode 100644 -index 000000000..b2745a48f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mul_u8.c -@@ -0,0 +1,300 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mul_u8_m_tied1: -+** mul z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mul_u8_m_tied1, svuint8_t, -+ z0 = svmul_u8_m (p0, z0, z1), -+ z0 = svmul_m (p0, z0, z1)) -+ -+/* -+** mul_u8_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** mul z0\.b, p0/m, z0\.b, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mul_u8_m_tied2, svuint8_t, -+ z0 = svmul_u8_m (p0, z1, z0), -+ z0 = svmul_m (p0, z1, z0)) -+ -+/* -+** mul_u8_m_untied: -+** movprfx z0, z1 -+** mul z0\.b, p0/m, z0\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mul_u8_m_untied, svuint8_t, -+ z0 = svmul_u8_m (p0, z1, z2), -+ z0 = svmul_m (p0, z1, z2)) -+ -+/* -+** mul_w0_u8_m_tied1: -+** mov (z[0-9]+\.b), w0 -+** mul z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mul_w0_u8_m_tied1, svuint8_t, uint8_t, -+ z0 = svmul_n_u8_m (p0, z0, x0), -+ z0 = svmul_m (p0, z0, x0)) -+ -+/* -+** mul_w0_u8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), w0 -+** movprfx z0, z1 -+** mul z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mul_w0_u8_m_untied, svuint8_t, uint8_t, -+ z0 = svmul_n_u8_m (p0, z1, x0), -+ z0 = svmul_m (p0, z1, x0)) -+ -+/* -+** mul_2_u8_m_tied1: -+** mov (z[0-9]+\.b), #2 -+** mul z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_u8_m_tied1, svuint8_t, -+ z0 = svmul_n_u8_m (p0, z0, 2), -+ z0 = svmul_m (p0, z0, 2)) -+ -+/* -+** mul_2_u8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), #2 -+** movprfx z0, z1 -+** mul z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_u8_m_untied, svuint8_t, -+ z0 = svmul_n_u8_m (p0, z1, 2), -+ z0 = svmul_m (p0, z1, 2)) -+ -+/* -+** mul_m1_u8_m: -+** mov (z[0-9]+\.b), #-1 -+** mul z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_m1_u8_m, svuint8_t, -+ z0 = svmul_n_u8_m (p0, z0, -1), -+ z0 = svmul_m (p0, z0, -1)) -+ -+/* -+** mul_u8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** mul z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mul_u8_z_tied1, svuint8_t, -+ z0 = svmul_u8_z (p0, z0, z1), -+ z0 = svmul_z (p0, z0, z1)) -+ -+/* -+** mul_u8_z_tied2: -+** movprfx z0\.b, p0/z, z0\.b -+** mul z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mul_u8_z_tied2, svuint8_t, -+ z0 = svmul_u8_z (p0, z1, z0), -+ z0 = svmul_z (p0, z1, z0)) -+ -+/* -+** mul_u8_z_untied: -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** mul z0\.b, p0/m, z0\.b, z2\.b -+** | -+** movprfx z0\.b, p0/z, z2\.b -+** mul z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_u8_z_untied, svuint8_t, -+ z0 = svmul_u8_z (p0, z1, z2), -+ z0 = svmul_z (p0, z1, z2)) -+ -+/* -+** mul_w0_u8_z_tied1: -+** mov (z[0-9]+\.b), w0 -+** movprfx z0\.b, p0/z, z0\.b -+** mul z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mul_w0_u8_z_tied1, svuint8_t, uint8_t, -+ z0 = svmul_n_u8_z (p0, z0, x0), -+ z0 = svmul_z (p0, z0, x0)) -+ -+/* -+** mul_w0_u8_z_untied: -+** mov (z[0-9]+\.b), w0 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** mul z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** mul z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (mul_w0_u8_z_untied, svuint8_t, uint8_t, -+ z0 = svmul_n_u8_z (p0, z1, x0), -+ z0 = svmul_z (p0, z1, x0)) -+ -+/* -+** mul_2_u8_z_tied1: -+** mov (z[0-9]+\.b), #2 -+** movprfx z0\.b, p0/z, z0\.b -+** mul z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_u8_z_tied1, svuint8_t, -+ z0 = svmul_n_u8_z (p0, z0, 2), -+ z0 = svmul_z (p0, z0, 2)) -+ -+/* -+** mul_2_u8_z_untied: -+** mov (z[0-9]+\.b), #2 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** mul z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** mul z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_u8_z_untied, svuint8_t, -+ z0 = svmul_n_u8_z (p0, z1, 2), -+ z0 = svmul_z (p0, z1, 2)) -+ -+/* -+** mul_u8_x_tied1: -+** mul z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mul_u8_x_tied1, svuint8_t, -+ z0 = svmul_u8_x (p0, z0, z1), -+ z0 = svmul_x (p0, z0, z1)) -+ -+/* -+** mul_u8_x_tied2: -+** mul z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mul_u8_x_tied2, svuint8_t, -+ z0 = svmul_u8_x (p0, z1, z0), -+ z0 = svmul_x (p0, z1, z0)) -+ -+/* -+** mul_u8_x_untied: -+** ( -+** movprfx z0, z1 -+** mul z0\.b, p0/m, z0\.b, z2\.b -+** | -+** movprfx z0, z2 -+** mul z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mul_u8_x_untied, svuint8_t, -+ z0 = svmul_u8_x (p0, z1, z2), -+ z0 = svmul_x (p0, z1, z2)) -+ -+/* -+** mul_w0_u8_x_tied1: -+** mov (z[0-9]+\.b), w0 -+** mul z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mul_w0_u8_x_tied1, svuint8_t, uint8_t, -+ z0 = svmul_n_u8_x (p0, z0, x0), -+ z0 = svmul_x (p0, z0, x0)) -+ -+/* -+** mul_w0_u8_x_untied: -+** mov z0\.b, w0 -+** mul z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_ZX (mul_w0_u8_x_untied, svuint8_t, uint8_t, -+ z0 = svmul_n_u8_x (p0, z1, x0), -+ z0 = svmul_x (p0, z1, x0)) -+ -+/* -+** mul_2_u8_x_tied1: -+** mul z0\.b, z0\.b, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_u8_x_tied1, svuint8_t, -+ z0 = svmul_n_u8_x (p0, z0, 2), -+ z0 = svmul_x (p0, z0, 2)) -+ -+/* -+** mul_2_u8_x_untied: -+** movprfx z0, z1 -+** mul z0\.b, z0\.b, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_2_u8_x_untied, svuint8_t, -+ z0 = svmul_n_u8_x (p0, z1, 2), -+ z0 = svmul_x (p0, z1, 2)) -+ -+/* -+** mul_127_u8_x: -+** mul z0\.b, z0\.b, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_127_u8_x, svuint8_t, -+ z0 = svmul_n_u8_x (p0, z0, 127), -+ z0 = svmul_x (p0, z0, 127)) -+ -+/* -+** mul_128_u8_x: -+** mul z0\.b, z0\.b, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_128_u8_x, svuint8_t, -+ z0 = svmul_n_u8_x (p0, z0, 128), -+ z0 = svmul_x (p0, z0, 128)) -+ -+/* -+** mul_255_u8_x: -+** mul z0\.b, z0\.b, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_255_u8_x, svuint8_t, -+ z0 = svmul_n_u8_x (p0, z0, 255), -+ z0 = svmul_x (p0, z0, 255)) -+ -+/* -+** mul_m1_u8_x: -+** mul z0\.b, z0\.b, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_m1_u8_x, svuint8_t, -+ z0 = svmul_n_u8_x (p0, z0, -1), -+ z0 = svmul_x (p0, z0, -1)) -+ -+/* -+** mul_m127_u8_x: -+** mul z0\.b, z0\.b, #-127 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_m127_u8_x, svuint8_t, -+ z0 = svmul_n_u8_x (p0, z0, -127), -+ z0 = svmul_x (p0, z0, -127)) -+ -+/* -+** mul_m128_u8_x: -+** mul z0\.b, z0\.b, #-128 -+** ret -+*/ -+TEST_UNIFORM_Z (mul_m128_u8_x, svuint8_t, -+ z0 = svmul_n_u8_x (p0, z0, -128), -+ z0 = svmul_x (p0, z0, -128)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulh_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulh_s16.c -new file mode 100644 -index 000000000..a81532f5d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulh_s16.c -@@ -0,0 +1,237 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mulh_s16_m_tied1: -+** smulh z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_s16_m_tied1, svint16_t, -+ z0 = svmulh_s16_m (p0, z0, z1), -+ z0 = svmulh_m (p0, z0, z1)) -+ -+/* -+** mulh_s16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** smulh z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_s16_m_tied2, svint16_t, -+ z0 = svmulh_s16_m (p0, z1, z0), -+ z0 = svmulh_m (p0, z1, z0)) -+ -+/* -+** mulh_s16_m_untied: -+** movprfx z0, z1 -+** smulh z0\.h, p0/m, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_s16_m_untied, svint16_t, -+ z0 = svmulh_s16_m (p0, z1, z2), -+ z0 = svmulh_m (p0, z1, z2)) -+ -+/* -+** mulh_w0_s16_m_tied1: -+** mov (z[0-9]+\.h), w0 -+** smulh z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mulh_w0_s16_m_tied1, svint16_t, int16_t, -+ z0 = svmulh_n_s16_m (p0, z0, x0), -+ z0 = svmulh_m (p0, z0, x0)) -+ -+/* -+** mulh_w0_s16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), w0 -+** movprfx z0, z1 -+** smulh z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mulh_w0_s16_m_untied, svint16_t, int16_t, -+ z0 = svmulh_n_s16_m (p0, z1, x0), -+ z0 = svmulh_m (p0, z1, x0)) -+ -+/* -+** mulh_11_s16_m_tied1: -+** mov (z[0-9]+\.h), #11 -+** smulh z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_11_s16_m_tied1, svint16_t, -+ z0 = svmulh_n_s16_m (p0, z0, 11), -+ z0 = svmulh_m (p0, z0, 11)) -+ -+/* -+** mulh_11_s16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), #11 -+** movprfx z0, z1 -+** smulh z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_11_s16_m_untied, svint16_t, -+ z0 = svmulh_n_s16_m (p0, z1, 11), -+ z0 = svmulh_m (p0, z1, 11)) -+ -+/* -+** mulh_s16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** smulh z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_s16_z_tied1, svint16_t, -+ z0 = svmulh_s16_z (p0, z0, z1), -+ z0 = svmulh_z (p0, z0, z1)) -+ -+/* -+** mulh_s16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** smulh z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_s16_z_tied2, svint16_t, -+ z0 = svmulh_s16_z (p0, z1, z0), -+ z0 = svmulh_z (p0, z1, z0)) -+ -+/* -+** mulh_s16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** smulh z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** smulh z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_s16_z_untied, svint16_t, -+ z0 = svmulh_s16_z (p0, z1, z2), -+ z0 = svmulh_z (p0, z1, z2)) -+ -+/* -+** mulh_w0_s16_z_tied1: -+** mov (z[0-9]+\.h), w0 -+** movprfx z0\.h, p0/z, z0\.h -+** smulh z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mulh_w0_s16_z_tied1, svint16_t, int16_t, -+ z0 = svmulh_n_s16_z (p0, z0, x0), -+ z0 = svmulh_z (p0, z0, x0)) -+ -+/* -+** mulh_w0_s16_z_untied: -+** mov (z[0-9]+\.h), w0 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** smulh z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** smulh z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (mulh_w0_s16_z_untied, svint16_t, int16_t, -+ z0 = svmulh_n_s16_z (p0, z1, x0), -+ z0 = svmulh_z (p0, z1, x0)) -+ -+/* -+** mulh_11_s16_z_tied1: -+** mov (z[0-9]+\.h), #11 -+** movprfx z0\.h, p0/z, z0\.h -+** smulh z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_11_s16_z_tied1, svint16_t, -+ z0 = svmulh_n_s16_z (p0, z0, 11), -+ z0 = svmulh_z (p0, z0, 11)) -+ -+/* -+** mulh_11_s16_z_untied: -+** mov (z[0-9]+\.h), #11 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** smulh z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** smulh z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_11_s16_z_untied, svint16_t, -+ z0 = svmulh_n_s16_z (p0, z1, 11), -+ z0 = svmulh_z (p0, z1, 11)) -+ -+/* -+** mulh_s16_x_tied1: -+** smulh z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_s16_x_tied1, svint16_t, -+ z0 = svmulh_s16_x (p0, z0, z1), -+ z0 = svmulh_x (p0, z0, z1)) -+ -+/* -+** mulh_s16_x_tied2: -+** smulh z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_s16_x_tied2, svint16_t, -+ z0 = svmulh_s16_x (p0, z1, z0), -+ z0 = svmulh_x (p0, z1, z0)) -+ -+/* -+** mulh_s16_x_untied: -+** ( -+** movprfx z0, z1 -+** smulh z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0, z2 -+** smulh z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_s16_x_untied, svint16_t, -+ z0 = svmulh_s16_x (p0, z1, z2), -+ z0 = svmulh_x (p0, z1, z2)) -+ -+/* -+** mulh_w0_s16_x_tied1: -+** mov (z[0-9]+\.h), w0 -+** smulh z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mulh_w0_s16_x_tied1, svint16_t, int16_t, -+ z0 = svmulh_n_s16_x (p0, z0, x0), -+ z0 = svmulh_x (p0, z0, x0)) -+ -+/* -+** mulh_w0_s16_x_untied: -+** mov z0\.h, w0 -+** smulh z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZX (mulh_w0_s16_x_untied, svint16_t, int16_t, -+ z0 = svmulh_n_s16_x (p0, z1, x0), -+ z0 = svmulh_x (p0, z1, x0)) -+ -+/* -+** mulh_11_s16_x_tied1: -+** mov (z[0-9]+\.h), #11 -+** smulh z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_11_s16_x_tied1, svint16_t, -+ z0 = svmulh_n_s16_x (p0, z0, 11), -+ z0 = svmulh_x (p0, z0, 11)) -+ -+/* -+** mulh_11_s16_x_untied: -+** mov z0\.h, #11 -+** smulh z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_11_s16_x_untied, svint16_t, -+ z0 = svmulh_n_s16_x (p0, z1, 11), -+ z0 = svmulh_x (p0, z1, 11)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulh_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulh_s32.c -new file mode 100644 -index 000000000..078feeb6a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulh_s32.c -@@ -0,0 +1,237 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mulh_s32_m_tied1: -+** smulh z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_s32_m_tied1, svint32_t, -+ z0 = svmulh_s32_m (p0, z0, z1), -+ z0 = svmulh_m (p0, z0, z1)) -+ -+/* -+** mulh_s32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** smulh z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_s32_m_tied2, svint32_t, -+ z0 = svmulh_s32_m (p0, z1, z0), -+ z0 = svmulh_m (p0, z1, z0)) -+ -+/* -+** mulh_s32_m_untied: -+** movprfx z0, z1 -+** smulh z0\.s, p0/m, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_s32_m_untied, svint32_t, -+ z0 = svmulh_s32_m (p0, z1, z2), -+ z0 = svmulh_m (p0, z1, z2)) -+ -+/* -+** mulh_w0_s32_m_tied1: -+** mov (z[0-9]+\.s), w0 -+** smulh z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mulh_w0_s32_m_tied1, svint32_t, int32_t, -+ z0 = svmulh_n_s32_m (p0, z0, x0), -+ z0 = svmulh_m (p0, z0, x0)) -+ -+/* -+** mulh_w0_s32_m_untied: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0, z1 -+** smulh z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mulh_w0_s32_m_untied, svint32_t, int32_t, -+ z0 = svmulh_n_s32_m (p0, z1, x0), -+ z0 = svmulh_m (p0, z1, x0)) -+ -+/* -+** mulh_11_s32_m_tied1: -+** mov (z[0-9]+\.s), #11 -+** smulh z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_11_s32_m_tied1, svint32_t, -+ z0 = svmulh_n_s32_m (p0, z0, 11), -+ z0 = svmulh_m (p0, z0, 11)) -+ -+/* -+** mulh_11_s32_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.s), #11 -+** movprfx z0, z1 -+** smulh z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_11_s32_m_untied, svint32_t, -+ z0 = svmulh_n_s32_m (p0, z1, 11), -+ z0 = svmulh_m (p0, z1, 11)) -+ -+/* -+** mulh_s32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** smulh z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_s32_z_tied1, svint32_t, -+ z0 = svmulh_s32_z (p0, z0, z1), -+ z0 = svmulh_z (p0, z0, z1)) -+ -+/* -+** mulh_s32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** smulh z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_s32_z_tied2, svint32_t, -+ z0 = svmulh_s32_z (p0, z1, z0), -+ z0 = svmulh_z (p0, z1, z0)) -+ -+/* -+** mulh_s32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** smulh z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** smulh z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_s32_z_untied, svint32_t, -+ z0 = svmulh_s32_z (p0, z1, z2), -+ z0 = svmulh_z (p0, z1, z2)) -+ -+/* -+** mulh_w0_s32_z_tied1: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** smulh z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mulh_w0_s32_z_tied1, svint32_t, int32_t, -+ z0 = svmulh_n_s32_z (p0, z0, x0), -+ z0 = svmulh_z (p0, z0, x0)) -+ -+/* -+** mulh_w0_s32_z_untied: -+** mov (z[0-9]+\.s), w0 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** smulh z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** smulh z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (mulh_w0_s32_z_untied, svint32_t, int32_t, -+ z0 = svmulh_n_s32_z (p0, z1, x0), -+ z0 = svmulh_z (p0, z1, x0)) -+ -+/* -+** mulh_11_s32_z_tied1: -+** mov (z[0-9]+\.s), #11 -+** movprfx z0\.s, p0/z, z0\.s -+** smulh z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_11_s32_z_tied1, svint32_t, -+ z0 = svmulh_n_s32_z (p0, z0, 11), -+ z0 = svmulh_z (p0, z0, 11)) -+ -+/* -+** mulh_11_s32_z_untied: -+** mov (z[0-9]+\.s), #11 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** smulh z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** smulh z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_11_s32_z_untied, svint32_t, -+ z0 = svmulh_n_s32_z (p0, z1, 11), -+ z0 = svmulh_z (p0, z1, 11)) -+ -+/* -+** mulh_s32_x_tied1: -+** smulh z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_s32_x_tied1, svint32_t, -+ z0 = svmulh_s32_x (p0, z0, z1), -+ z0 = svmulh_x (p0, z0, z1)) -+ -+/* -+** mulh_s32_x_tied2: -+** smulh z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_s32_x_tied2, svint32_t, -+ z0 = svmulh_s32_x (p0, z1, z0), -+ z0 = svmulh_x (p0, z1, z0)) -+ -+/* -+** mulh_s32_x_untied: -+** ( -+** movprfx z0, z1 -+** smulh z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0, z2 -+** smulh z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_s32_x_untied, svint32_t, -+ z0 = svmulh_s32_x (p0, z1, z2), -+ z0 = svmulh_x (p0, z1, z2)) -+ -+/* -+** mulh_w0_s32_x_tied1: -+** mov (z[0-9]+\.s), w0 -+** smulh z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mulh_w0_s32_x_tied1, svint32_t, int32_t, -+ z0 = svmulh_n_s32_x (p0, z0, x0), -+ z0 = svmulh_x (p0, z0, x0)) -+ -+/* -+** mulh_w0_s32_x_untied: -+** mov z0\.s, w0 -+** smulh z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZX (mulh_w0_s32_x_untied, svint32_t, int32_t, -+ z0 = svmulh_n_s32_x (p0, z1, x0), -+ z0 = svmulh_x (p0, z1, x0)) -+ -+/* -+** mulh_11_s32_x_tied1: -+** mov (z[0-9]+\.s), #11 -+** smulh z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_11_s32_x_tied1, svint32_t, -+ z0 = svmulh_n_s32_x (p0, z0, 11), -+ z0 = svmulh_x (p0, z0, 11)) -+ -+/* -+** mulh_11_s32_x_untied: -+** mov z0\.s, #11 -+** smulh z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_11_s32_x_untied, svint32_t, -+ z0 = svmulh_n_s32_x (p0, z1, 11), -+ z0 = svmulh_x (p0, z1, 11)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulh_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulh_s64.c -new file mode 100644 -index 000000000..a87d4d5ce ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulh_s64.c -@@ -0,0 +1,237 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mulh_s64_m_tied1: -+** smulh z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_s64_m_tied1, svint64_t, -+ z0 = svmulh_s64_m (p0, z0, z1), -+ z0 = svmulh_m (p0, z0, z1)) -+ -+/* -+** mulh_s64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** smulh z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_s64_m_tied2, svint64_t, -+ z0 = svmulh_s64_m (p0, z1, z0), -+ z0 = svmulh_m (p0, z1, z0)) -+ -+/* -+** mulh_s64_m_untied: -+** movprfx z0, z1 -+** smulh z0\.d, p0/m, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_s64_m_untied, svint64_t, -+ z0 = svmulh_s64_m (p0, z1, z2), -+ z0 = svmulh_m (p0, z1, z2)) -+ -+/* -+** mulh_x0_s64_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** smulh z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mulh_x0_s64_m_tied1, svint64_t, int64_t, -+ z0 = svmulh_n_s64_m (p0, z0, x0), -+ z0 = svmulh_m (p0, z0, x0)) -+ -+/* -+** mulh_x0_s64_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** smulh z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mulh_x0_s64_m_untied, svint64_t, int64_t, -+ z0 = svmulh_n_s64_m (p0, z1, x0), -+ z0 = svmulh_m (p0, z1, x0)) -+ -+/* -+** mulh_11_s64_m_tied1: -+** mov (z[0-9]+\.d), #11 -+** smulh z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_11_s64_m_tied1, svint64_t, -+ z0 = svmulh_n_s64_m (p0, z0, 11), -+ z0 = svmulh_m (p0, z0, 11)) -+ -+/* -+** mulh_11_s64_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #11 -+** movprfx z0, z1 -+** smulh z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_11_s64_m_untied, svint64_t, -+ z0 = svmulh_n_s64_m (p0, z1, 11), -+ z0 = svmulh_m (p0, z1, 11)) -+ -+/* -+** mulh_s64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** smulh z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_s64_z_tied1, svint64_t, -+ z0 = svmulh_s64_z (p0, z0, z1), -+ z0 = svmulh_z (p0, z0, z1)) -+ -+/* -+** mulh_s64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** smulh z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_s64_z_tied2, svint64_t, -+ z0 = svmulh_s64_z (p0, z1, z0), -+ z0 = svmulh_z (p0, z1, z0)) -+ -+/* -+** mulh_s64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** smulh z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** smulh z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_s64_z_untied, svint64_t, -+ z0 = svmulh_s64_z (p0, z1, z2), -+ z0 = svmulh_z (p0, z1, z2)) -+ -+/* -+** mulh_x0_s64_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** smulh z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mulh_x0_s64_z_tied1, svint64_t, int64_t, -+ z0 = svmulh_n_s64_z (p0, z0, x0), -+ z0 = svmulh_z (p0, z0, x0)) -+ -+/* -+** mulh_x0_s64_z_untied: -+** mov (z[0-9]+\.d), x0 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** smulh z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** smulh z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (mulh_x0_s64_z_untied, svint64_t, int64_t, -+ z0 = svmulh_n_s64_z (p0, z1, x0), -+ z0 = svmulh_z (p0, z1, x0)) -+ -+/* -+** mulh_11_s64_z_tied1: -+** mov (z[0-9]+\.d), #11 -+** movprfx z0\.d, p0/z, z0\.d -+** smulh z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_11_s64_z_tied1, svint64_t, -+ z0 = svmulh_n_s64_z (p0, z0, 11), -+ z0 = svmulh_z (p0, z0, 11)) -+ -+/* -+** mulh_11_s64_z_untied: -+** mov (z[0-9]+\.d), #11 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** smulh z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** smulh z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_11_s64_z_untied, svint64_t, -+ z0 = svmulh_n_s64_z (p0, z1, 11), -+ z0 = svmulh_z (p0, z1, 11)) -+ -+/* -+** mulh_s64_x_tied1: -+** smulh z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_s64_x_tied1, svint64_t, -+ z0 = svmulh_s64_x (p0, z0, z1), -+ z0 = svmulh_x (p0, z0, z1)) -+ -+/* -+** mulh_s64_x_tied2: -+** smulh z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_s64_x_tied2, svint64_t, -+ z0 = svmulh_s64_x (p0, z1, z0), -+ z0 = svmulh_x (p0, z1, z0)) -+ -+/* -+** mulh_s64_x_untied: -+** ( -+** movprfx z0, z1 -+** smulh z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0, z2 -+** smulh z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_s64_x_untied, svint64_t, -+ z0 = svmulh_s64_x (p0, z1, z2), -+ z0 = svmulh_x (p0, z1, z2)) -+ -+/* -+** mulh_x0_s64_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** smulh z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mulh_x0_s64_x_tied1, svint64_t, int64_t, -+ z0 = svmulh_n_s64_x (p0, z0, x0), -+ z0 = svmulh_x (p0, z0, x0)) -+ -+/* -+** mulh_x0_s64_x_untied: -+** mov z0\.d, x0 -+** smulh z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (mulh_x0_s64_x_untied, svint64_t, int64_t, -+ z0 = svmulh_n_s64_x (p0, z1, x0), -+ z0 = svmulh_x (p0, z1, x0)) -+ -+/* -+** mulh_11_s64_x_tied1: -+** mov (z[0-9]+\.d), #11 -+** smulh z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_11_s64_x_tied1, svint64_t, -+ z0 = svmulh_n_s64_x (p0, z0, 11), -+ z0 = svmulh_x (p0, z0, 11)) -+ -+/* -+** mulh_11_s64_x_untied: -+** mov z0\.d, #11 -+** smulh z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_11_s64_x_untied, svint64_t, -+ z0 = svmulh_n_s64_x (p0, z1, 11), -+ z0 = svmulh_x (p0, z1, 11)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulh_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulh_s8.c -new file mode 100644 -index 000000000..f9cd01afd ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulh_s8.c -@@ -0,0 +1,237 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mulh_s8_m_tied1: -+** smulh z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_s8_m_tied1, svint8_t, -+ z0 = svmulh_s8_m (p0, z0, z1), -+ z0 = svmulh_m (p0, z0, z1)) -+ -+/* -+** mulh_s8_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** smulh z0\.b, p0/m, z0\.b, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_s8_m_tied2, svint8_t, -+ z0 = svmulh_s8_m (p0, z1, z0), -+ z0 = svmulh_m (p0, z1, z0)) -+ -+/* -+** mulh_s8_m_untied: -+** movprfx z0, z1 -+** smulh z0\.b, p0/m, z0\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_s8_m_untied, svint8_t, -+ z0 = svmulh_s8_m (p0, z1, z2), -+ z0 = svmulh_m (p0, z1, z2)) -+ -+/* -+** mulh_w0_s8_m_tied1: -+** mov (z[0-9]+\.b), w0 -+** smulh z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mulh_w0_s8_m_tied1, svint8_t, int8_t, -+ z0 = svmulh_n_s8_m (p0, z0, x0), -+ z0 = svmulh_m (p0, z0, x0)) -+ -+/* -+** mulh_w0_s8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), w0 -+** movprfx z0, z1 -+** smulh z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mulh_w0_s8_m_untied, svint8_t, int8_t, -+ z0 = svmulh_n_s8_m (p0, z1, x0), -+ z0 = svmulh_m (p0, z1, x0)) -+ -+/* -+** mulh_11_s8_m_tied1: -+** mov (z[0-9]+\.b), #11 -+** smulh z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_11_s8_m_tied1, svint8_t, -+ z0 = svmulh_n_s8_m (p0, z0, 11), -+ z0 = svmulh_m (p0, z0, 11)) -+ -+/* -+** mulh_11_s8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), #11 -+** movprfx z0, z1 -+** smulh z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_11_s8_m_untied, svint8_t, -+ z0 = svmulh_n_s8_m (p0, z1, 11), -+ z0 = svmulh_m (p0, z1, 11)) -+ -+/* -+** mulh_s8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** smulh z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_s8_z_tied1, svint8_t, -+ z0 = svmulh_s8_z (p0, z0, z1), -+ z0 = svmulh_z (p0, z0, z1)) -+ -+/* -+** mulh_s8_z_tied2: -+** movprfx z0\.b, p0/z, z0\.b -+** smulh z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_s8_z_tied2, svint8_t, -+ z0 = svmulh_s8_z (p0, z1, z0), -+ z0 = svmulh_z (p0, z1, z0)) -+ -+/* -+** mulh_s8_z_untied: -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** smulh z0\.b, p0/m, z0\.b, z2\.b -+** | -+** movprfx z0\.b, p0/z, z2\.b -+** smulh z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_s8_z_untied, svint8_t, -+ z0 = svmulh_s8_z (p0, z1, z2), -+ z0 = svmulh_z (p0, z1, z2)) -+ -+/* -+** mulh_w0_s8_z_tied1: -+** mov (z[0-9]+\.b), w0 -+** movprfx z0\.b, p0/z, z0\.b -+** smulh z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mulh_w0_s8_z_tied1, svint8_t, int8_t, -+ z0 = svmulh_n_s8_z (p0, z0, x0), -+ z0 = svmulh_z (p0, z0, x0)) -+ -+/* -+** mulh_w0_s8_z_untied: -+** mov (z[0-9]+\.b), w0 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** smulh z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** smulh z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (mulh_w0_s8_z_untied, svint8_t, int8_t, -+ z0 = svmulh_n_s8_z (p0, z1, x0), -+ z0 = svmulh_z (p0, z1, x0)) -+ -+/* -+** mulh_11_s8_z_tied1: -+** mov (z[0-9]+\.b), #11 -+** movprfx z0\.b, p0/z, z0\.b -+** smulh z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_11_s8_z_tied1, svint8_t, -+ z0 = svmulh_n_s8_z (p0, z0, 11), -+ z0 = svmulh_z (p0, z0, 11)) -+ -+/* -+** mulh_11_s8_z_untied: -+** mov (z[0-9]+\.b), #11 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** smulh z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** smulh z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_11_s8_z_untied, svint8_t, -+ z0 = svmulh_n_s8_z (p0, z1, 11), -+ z0 = svmulh_z (p0, z1, 11)) -+ -+/* -+** mulh_s8_x_tied1: -+** smulh z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_s8_x_tied1, svint8_t, -+ z0 = svmulh_s8_x (p0, z0, z1), -+ z0 = svmulh_x (p0, z0, z1)) -+ -+/* -+** mulh_s8_x_tied2: -+** smulh z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_s8_x_tied2, svint8_t, -+ z0 = svmulh_s8_x (p0, z1, z0), -+ z0 = svmulh_x (p0, z1, z0)) -+ -+/* -+** mulh_s8_x_untied: -+** ( -+** movprfx z0, z1 -+** smulh z0\.b, p0/m, z0\.b, z2\.b -+** | -+** movprfx z0, z2 -+** smulh z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_s8_x_untied, svint8_t, -+ z0 = svmulh_s8_x (p0, z1, z2), -+ z0 = svmulh_x (p0, z1, z2)) -+ -+/* -+** mulh_w0_s8_x_tied1: -+** mov (z[0-9]+\.b), w0 -+** smulh z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mulh_w0_s8_x_tied1, svint8_t, int8_t, -+ z0 = svmulh_n_s8_x (p0, z0, x0), -+ z0 = svmulh_x (p0, z0, x0)) -+ -+/* -+** mulh_w0_s8_x_untied: -+** mov z0\.b, w0 -+** smulh z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_ZX (mulh_w0_s8_x_untied, svint8_t, int8_t, -+ z0 = svmulh_n_s8_x (p0, z1, x0), -+ z0 = svmulh_x (p0, z1, x0)) -+ -+/* -+** mulh_11_s8_x_tied1: -+** mov (z[0-9]+\.b), #11 -+** smulh z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_11_s8_x_tied1, svint8_t, -+ z0 = svmulh_n_s8_x (p0, z0, 11), -+ z0 = svmulh_x (p0, z0, 11)) -+ -+/* -+** mulh_11_s8_x_untied: -+** mov z0\.b, #11 -+** smulh z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_11_s8_x_untied, svint8_t, -+ z0 = svmulh_n_s8_x (p0, z1, 11), -+ z0 = svmulh_x (p0, z1, 11)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulh_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulh_u16.c -new file mode 100644 -index 000000000..e9173eb24 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulh_u16.c -@@ -0,0 +1,237 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mulh_u16_m_tied1: -+** umulh z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_u16_m_tied1, svuint16_t, -+ z0 = svmulh_u16_m (p0, z0, z1), -+ z0 = svmulh_m (p0, z0, z1)) -+ -+/* -+** mulh_u16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** umulh z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_u16_m_tied2, svuint16_t, -+ z0 = svmulh_u16_m (p0, z1, z0), -+ z0 = svmulh_m (p0, z1, z0)) -+ -+/* -+** mulh_u16_m_untied: -+** movprfx z0, z1 -+** umulh z0\.h, p0/m, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_u16_m_untied, svuint16_t, -+ z0 = svmulh_u16_m (p0, z1, z2), -+ z0 = svmulh_m (p0, z1, z2)) -+ -+/* -+** mulh_w0_u16_m_tied1: -+** mov (z[0-9]+\.h), w0 -+** umulh z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mulh_w0_u16_m_tied1, svuint16_t, uint16_t, -+ z0 = svmulh_n_u16_m (p0, z0, x0), -+ z0 = svmulh_m (p0, z0, x0)) -+ -+/* -+** mulh_w0_u16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), w0 -+** movprfx z0, z1 -+** umulh z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mulh_w0_u16_m_untied, svuint16_t, uint16_t, -+ z0 = svmulh_n_u16_m (p0, z1, x0), -+ z0 = svmulh_m (p0, z1, x0)) -+ -+/* -+** mulh_11_u16_m_tied1: -+** mov (z[0-9]+\.h), #11 -+** umulh z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_11_u16_m_tied1, svuint16_t, -+ z0 = svmulh_n_u16_m (p0, z0, 11), -+ z0 = svmulh_m (p0, z0, 11)) -+ -+/* -+** mulh_11_u16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), #11 -+** movprfx z0, z1 -+** umulh z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_11_u16_m_untied, svuint16_t, -+ z0 = svmulh_n_u16_m (p0, z1, 11), -+ z0 = svmulh_m (p0, z1, 11)) -+ -+/* -+** mulh_u16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** umulh z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_u16_z_tied1, svuint16_t, -+ z0 = svmulh_u16_z (p0, z0, z1), -+ z0 = svmulh_z (p0, z0, z1)) -+ -+/* -+** mulh_u16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** umulh z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_u16_z_tied2, svuint16_t, -+ z0 = svmulh_u16_z (p0, z1, z0), -+ z0 = svmulh_z (p0, z1, z0)) -+ -+/* -+** mulh_u16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** umulh z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** umulh z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_u16_z_untied, svuint16_t, -+ z0 = svmulh_u16_z (p0, z1, z2), -+ z0 = svmulh_z (p0, z1, z2)) -+ -+/* -+** mulh_w0_u16_z_tied1: -+** mov (z[0-9]+\.h), w0 -+** movprfx z0\.h, p0/z, z0\.h -+** umulh z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mulh_w0_u16_z_tied1, svuint16_t, uint16_t, -+ z0 = svmulh_n_u16_z (p0, z0, x0), -+ z0 = svmulh_z (p0, z0, x0)) -+ -+/* -+** mulh_w0_u16_z_untied: -+** mov (z[0-9]+\.h), w0 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** umulh z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** umulh z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (mulh_w0_u16_z_untied, svuint16_t, uint16_t, -+ z0 = svmulh_n_u16_z (p0, z1, x0), -+ z0 = svmulh_z (p0, z1, x0)) -+ -+/* -+** mulh_11_u16_z_tied1: -+** mov (z[0-9]+\.h), #11 -+** movprfx z0\.h, p0/z, z0\.h -+** umulh z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_11_u16_z_tied1, svuint16_t, -+ z0 = svmulh_n_u16_z (p0, z0, 11), -+ z0 = svmulh_z (p0, z0, 11)) -+ -+/* -+** mulh_11_u16_z_untied: -+** mov (z[0-9]+\.h), #11 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** umulh z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** umulh z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_11_u16_z_untied, svuint16_t, -+ z0 = svmulh_n_u16_z (p0, z1, 11), -+ z0 = svmulh_z (p0, z1, 11)) -+ -+/* -+** mulh_u16_x_tied1: -+** umulh z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_u16_x_tied1, svuint16_t, -+ z0 = svmulh_u16_x (p0, z0, z1), -+ z0 = svmulh_x (p0, z0, z1)) -+ -+/* -+** mulh_u16_x_tied2: -+** umulh z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_u16_x_tied2, svuint16_t, -+ z0 = svmulh_u16_x (p0, z1, z0), -+ z0 = svmulh_x (p0, z1, z0)) -+ -+/* -+** mulh_u16_x_untied: -+** ( -+** movprfx z0, z1 -+** umulh z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0, z2 -+** umulh z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_u16_x_untied, svuint16_t, -+ z0 = svmulh_u16_x (p0, z1, z2), -+ z0 = svmulh_x (p0, z1, z2)) -+ -+/* -+** mulh_w0_u16_x_tied1: -+** mov (z[0-9]+\.h), w0 -+** umulh z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mulh_w0_u16_x_tied1, svuint16_t, uint16_t, -+ z0 = svmulh_n_u16_x (p0, z0, x0), -+ z0 = svmulh_x (p0, z0, x0)) -+ -+/* -+** mulh_w0_u16_x_untied: -+** mov z0\.h, w0 -+** umulh z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZX (mulh_w0_u16_x_untied, svuint16_t, uint16_t, -+ z0 = svmulh_n_u16_x (p0, z1, x0), -+ z0 = svmulh_x (p0, z1, x0)) -+ -+/* -+** mulh_11_u16_x_tied1: -+** mov (z[0-9]+\.h), #11 -+** umulh z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_11_u16_x_tied1, svuint16_t, -+ z0 = svmulh_n_u16_x (p0, z0, 11), -+ z0 = svmulh_x (p0, z0, 11)) -+ -+/* -+** mulh_11_u16_x_untied: -+** mov z0\.h, #11 -+** umulh z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_11_u16_x_untied, svuint16_t, -+ z0 = svmulh_n_u16_x (p0, z1, 11), -+ z0 = svmulh_x (p0, z1, 11)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulh_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulh_u32.c -new file mode 100644 -index 000000000..de1f24f09 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulh_u32.c -@@ -0,0 +1,237 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mulh_u32_m_tied1: -+** umulh z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_u32_m_tied1, svuint32_t, -+ z0 = svmulh_u32_m (p0, z0, z1), -+ z0 = svmulh_m (p0, z0, z1)) -+ -+/* -+** mulh_u32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** umulh z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_u32_m_tied2, svuint32_t, -+ z0 = svmulh_u32_m (p0, z1, z0), -+ z0 = svmulh_m (p0, z1, z0)) -+ -+/* -+** mulh_u32_m_untied: -+** movprfx z0, z1 -+** umulh z0\.s, p0/m, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_u32_m_untied, svuint32_t, -+ z0 = svmulh_u32_m (p0, z1, z2), -+ z0 = svmulh_m (p0, z1, z2)) -+ -+/* -+** mulh_w0_u32_m_tied1: -+** mov (z[0-9]+\.s), w0 -+** umulh z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mulh_w0_u32_m_tied1, svuint32_t, uint32_t, -+ z0 = svmulh_n_u32_m (p0, z0, x0), -+ z0 = svmulh_m (p0, z0, x0)) -+ -+/* -+** mulh_w0_u32_m_untied: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0, z1 -+** umulh z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mulh_w0_u32_m_untied, svuint32_t, uint32_t, -+ z0 = svmulh_n_u32_m (p0, z1, x0), -+ z0 = svmulh_m (p0, z1, x0)) -+ -+/* -+** mulh_11_u32_m_tied1: -+** mov (z[0-9]+\.s), #11 -+** umulh z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_11_u32_m_tied1, svuint32_t, -+ z0 = svmulh_n_u32_m (p0, z0, 11), -+ z0 = svmulh_m (p0, z0, 11)) -+ -+/* -+** mulh_11_u32_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.s), #11 -+** movprfx z0, z1 -+** umulh z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_11_u32_m_untied, svuint32_t, -+ z0 = svmulh_n_u32_m (p0, z1, 11), -+ z0 = svmulh_m (p0, z1, 11)) -+ -+/* -+** mulh_u32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** umulh z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_u32_z_tied1, svuint32_t, -+ z0 = svmulh_u32_z (p0, z0, z1), -+ z0 = svmulh_z (p0, z0, z1)) -+ -+/* -+** mulh_u32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** umulh z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_u32_z_tied2, svuint32_t, -+ z0 = svmulh_u32_z (p0, z1, z0), -+ z0 = svmulh_z (p0, z1, z0)) -+ -+/* -+** mulh_u32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** umulh z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** umulh z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_u32_z_untied, svuint32_t, -+ z0 = svmulh_u32_z (p0, z1, z2), -+ z0 = svmulh_z (p0, z1, z2)) -+ -+/* -+** mulh_w0_u32_z_tied1: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** umulh z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mulh_w0_u32_z_tied1, svuint32_t, uint32_t, -+ z0 = svmulh_n_u32_z (p0, z0, x0), -+ z0 = svmulh_z (p0, z0, x0)) -+ -+/* -+** mulh_w0_u32_z_untied: -+** mov (z[0-9]+\.s), w0 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** umulh z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** umulh z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (mulh_w0_u32_z_untied, svuint32_t, uint32_t, -+ z0 = svmulh_n_u32_z (p0, z1, x0), -+ z0 = svmulh_z (p0, z1, x0)) -+ -+/* -+** mulh_11_u32_z_tied1: -+** mov (z[0-9]+\.s), #11 -+** movprfx z0\.s, p0/z, z0\.s -+** umulh z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_11_u32_z_tied1, svuint32_t, -+ z0 = svmulh_n_u32_z (p0, z0, 11), -+ z0 = svmulh_z (p0, z0, 11)) -+ -+/* -+** mulh_11_u32_z_untied: -+** mov (z[0-9]+\.s), #11 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** umulh z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** umulh z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_11_u32_z_untied, svuint32_t, -+ z0 = svmulh_n_u32_z (p0, z1, 11), -+ z0 = svmulh_z (p0, z1, 11)) -+ -+/* -+** mulh_u32_x_tied1: -+** umulh z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_u32_x_tied1, svuint32_t, -+ z0 = svmulh_u32_x (p0, z0, z1), -+ z0 = svmulh_x (p0, z0, z1)) -+ -+/* -+** mulh_u32_x_tied2: -+** umulh z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_u32_x_tied2, svuint32_t, -+ z0 = svmulh_u32_x (p0, z1, z0), -+ z0 = svmulh_x (p0, z1, z0)) -+ -+/* -+** mulh_u32_x_untied: -+** ( -+** movprfx z0, z1 -+** umulh z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0, z2 -+** umulh z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_u32_x_untied, svuint32_t, -+ z0 = svmulh_u32_x (p0, z1, z2), -+ z0 = svmulh_x (p0, z1, z2)) -+ -+/* -+** mulh_w0_u32_x_tied1: -+** mov (z[0-9]+\.s), w0 -+** umulh z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mulh_w0_u32_x_tied1, svuint32_t, uint32_t, -+ z0 = svmulh_n_u32_x (p0, z0, x0), -+ z0 = svmulh_x (p0, z0, x0)) -+ -+/* -+** mulh_w0_u32_x_untied: -+** mov z0\.s, w0 -+** umulh z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZX (mulh_w0_u32_x_untied, svuint32_t, uint32_t, -+ z0 = svmulh_n_u32_x (p0, z1, x0), -+ z0 = svmulh_x (p0, z1, x0)) -+ -+/* -+** mulh_11_u32_x_tied1: -+** mov (z[0-9]+\.s), #11 -+** umulh z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_11_u32_x_tied1, svuint32_t, -+ z0 = svmulh_n_u32_x (p0, z0, 11), -+ z0 = svmulh_x (p0, z0, 11)) -+ -+/* -+** mulh_11_u32_x_untied: -+** mov z0\.s, #11 -+** umulh z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_11_u32_x_untied, svuint32_t, -+ z0 = svmulh_n_u32_x (p0, z1, 11), -+ z0 = svmulh_x (p0, z1, 11)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulh_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulh_u64.c -new file mode 100644 -index 000000000..0d7e12a7c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulh_u64.c -@@ -0,0 +1,237 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mulh_u64_m_tied1: -+** umulh z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_u64_m_tied1, svuint64_t, -+ z0 = svmulh_u64_m (p0, z0, z1), -+ z0 = svmulh_m (p0, z0, z1)) -+ -+/* -+** mulh_u64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** umulh z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_u64_m_tied2, svuint64_t, -+ z0 = svmulh_u64_m (p0, z1, z0), -+ z0 = svmulh_m (p0, z1, z0)) -+ -+/* -+** mulh_u64_m_untied: -+** movprfx z0, z1 -+** umulh z0\.d, p0/m, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_u64_m_untied, svuint64_t, -+ z0 = svmulh_u64_m (p0, z1, z2), -+ z0 = svmulh_m (p0, z1, z2)) -+ -+/* -+** mulh_x0_u64_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** umulh z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mulh_x0_u64_m_tied1, svuint64_t, uint64_t, -+ z0 = svmulh_n_u64_m (p0, z0, x0), -+ z0 = svmulh_m (p0, z0, x0)) -+ -+/* -+** mulh_x0_u64_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** umulh z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mulh_x0_u64_m_untied, svuint64_t, uint64_t, -+ z0 = svmulh_n_u64_m (p0, z1, x0), -+ z0 = svmulh_m (p0, z1, x0)) -+ -+/* -+** mulh_11_u64_m_tied1: -+** mov (z[0-9]+\.d), #11 -+** umulh z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_11_u64_m_tied1, svuint64_t, -+ z0 = svmulh_n_u64_m (p0, z0, 11), -+ z0 = svmulh_m (p0, z0, 11)) -+ -+/* -+** mulh_11_u64_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #11 -+** movprfx z0, z1 -+** umulh z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_11_u64_m_untied, svuint64_t, -+ z0 = svmulh_n_u64_m (p0, z1, 11), -+ z0 = svmulh_m (p0, z1, 11)) -+ -+/* -+** mulh_u64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** umulh z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_u64_z_tied1, svuint64_t, -+ z0 = svmulh_u64_z (p0, z0, z1), -+ z0 = svmulh_z (p0, z0, z1)) -+ -+/* -+** mulh_u64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** umulh z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_u64_z_tied2, svuint64_t, -+ z0 = svmulh_u64_z (p0, z1, z0), -+ z0 = svmulh_z (p0, z1, z0)) -+ -+/* -+** mulh_u64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** umulh z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** umulh z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_u64_z_untied, svuint64_t, -+ z0 = svmulh_u64_z (p0, z1, z2), -+ z0 = svmulh_z (p0, z1, z2)) -+ -+/* -+** mulh_x0_u64_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** umulh z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mulh_x0_u64_z_tied1, svuint64_t, uint64_t, -+ z0 = svmulh_n_u64_z (p0, z0, x0), -+ z0 = svmulh_z (p0, z0, x0)) -+ -+/* -+** mulh_x0_u64_z_untied: -+** mov (z[0-9]+\.d), x0 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** umulh z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** umulh z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (mulh_x0_u64_z_untied, svuint64_t, uint64_t, -+ z0 = svmulh_n_u64_z (p0, z1, x0), -+ z0 = svmulh_z (p0, z1, x0)) -+ -+/* -+** mulh_11_u64_z_tied1: -+** mov (z[0-9]+\.d), #11 -+** movprfx z0\.d, p0/z, z0\.d -+** umulh z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_11_u64_z_tied1, svuint64_t, -+ z0 = svmulh_n_u64_z (p0, z0, 11), -+ z0 = svmulh_z (p0, z0, 11)) -+ -+/* -+** mulh_11_u64_z_untied: -+** mov (z[0-9]+\.d), #11 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** umulh z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** umulh z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_11_u64_z_untied, svuint64_t, -+ z0 = svmulh_n_u64_z (p0, z1, 11), -+ z0 = svmulh_z (p0, z1, 11)) -+ -+/* -+** mulh_u64_x_tied1: -+** umulh z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_u64_x_tied1, svuint64_t, -+ z0 = svmulh_u64_x (p0, z0, z1), -+ z0 = svmulh_x (p0, z0, z1)) -+ -+/* -+** mulh_u64_x_tied2: -+** umulh z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_u64_x_tied2, svuint64_t, -+ z0 = svmulh_u64_x (p0, z1, z0), -+ z0 = svmulh_x (p0, z1, z0)) -+ -+/* -+** mulh_u64_x_untied: -+** ( -+** movprfx z0, z1 -+** umulh z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0, z2 -+** umulh z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_u64_x_untied, svuint64_t, -+ z0 = svmulh_u64_x (p0, z1, z2), -+ z0 = svmulh_x (p0, z1, z2)) -+ -+/* -+** mulh_x0_u64_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** umulh z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mulh_x0_u64_x_tied1, svuint64_t, uint64_t, -+ z0 = svmulh_n_u64_x (p0, z0, x0), -+ z0 = svmulh_x (p0, z0, x0)) -+ -+/* -+** mulh_x0_u64_x_untied: -+** mov z0\.d, x0 -+** umulh z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (mulh_x0_u64_x_untied, svuint64_t, uint64_t, -+ z0 = svmulh_n_u64_x (p0, z1, x0), -+ z0 = svmulh_x (p0, z1, x0)) -+ -+/* -+** mulh_11_u64_x_tied1: -+** mov (z[0-9]+\.d), #11 -+** umulh z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_11_u64_x_tied1, svuint64_t, -+ z0 = svmulh_n_u64_x (p0, z0, 11), -+ z0 = svmulh_x (p0, z0, 11)) -+ -+/* -+** mulh_11_u64_x_untied: -+** mov z0\.d, #11 -+** umulh z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_11_u64_x_untied, svuint64_t, -+ z0 = svmulh_n_u64_x (p0, z1, 11), -+ z0 = svmulh_x (p0, z1, 11)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulh_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulh_u8.c -new file mode 100644 -index 000000000..db7b1be1b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulh_u8.c -@@ -0,0 +1,237 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mulh_u8_m_tied1: -+** umulh z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_u8_m_tied1, svuint8_t, -+ z0 = svmulh_u8_m (p0, z0, z1), -+ z0 = svmulh_m (p0, z0, z1)) -+ -+/* -+** mulh_u8_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** umulh z0\.b, p0/m, z0\.b, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_u8_m_tied2, svuint8_t, -+ z0 = svmulh_u8_m (p0, z1, z0), -+ z0 = svmulh_m (p0, z1, z0)) -+ -+/* -+** mulh_u8_m_untied: -+** movprfx z0, z1 -+** umulh z0\.b, p0/m, z0\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_u8_m_untied, svuint8_t, -+ z0 = svmulh_u8_m (p0, z1, z2), -+ z0 = svmulh_m (p0, z1, z2)) -+ -+/* -+** mulh_w0_u8_m_tied1: -+** mov (z[0-9]+\.b), w0 -+** umulh z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mulh_w0_u8_m_tied1, svuint8_t, uint8_t, -+ z0 = svmulh_n_u8_m (p0, z0, x0), -+ z0 = svmulh_m (p0, z0, x0)) -+ -+/* -+** mulh_w0_u8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), w0 -+** movprfx z0, z1 -+** umulh z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mulh_w0_u8_m_untied, svuint8_t, uint8_t, -+ z0 = svmulh_n_u8_m (p0, z1, x0), -+ z0 = svmulh_m (p0, z1, x0)) -+ -+/* -+** mulh_11_u8_m_tied1: -+** mov (z[0-9]+\.b), #11 -+** umulh z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_11_u8_m_tied1, svuint8_t, -+ z0 = svmulh_n_u8_m (p0, z0, 11), -+ z0 = svmulh_m (p0, z0, 11)) -+ -+/* -+** mulh_11_u8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), #11 -+** movprfx z0, z1 -+** umulh z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_11_u8_m_untied, svuint8_t, -+ z0 = svmulh_n_u8_m (p0, z1, 11), -+ z0 = svmulh_m (p0, z1, 11)) -+ -+/* -+** mulh_u8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** umulh z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_u8_z_tied1, svuint8_t, -+ z0 = svmulh_u8_z (p0, z0, z1), -+ z0 = svmulh_z (p0, z0, z1)) -+ -+/* -+** mulh_u8_z_tied2: -+** movprfx z0\.b, p0/z, z0\.b -+** umulh z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_u8_z_tied2, svuint8_t, -+ z0 = svmulh_u8_z (p0, z1, z0), -+ z0 = svmulh_z (p0, z1, z0)) -+ -+/* -+** mulh_u8_z_untied: -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** umulh z0\.b, p0/m, z0\.b, z2\.b -+** | -+** movprfx z0\.b, p0/z, z2\.b -+** umulh z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_u8_z_untied, svuint8_t, -+ z0 = svmulh_u8_z (p0, z1, z2), -+ z0 = svmulh_z (p0, z1, z2)) -+ -+/* -+** mulh_w0_u8_z_tied1: -+** mov (z[0-9]+\.b), w0 -+** movprfx z0\.b, p0/z, z0\.b -+** umulh z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mulh_w0_u8_z_tied1, svuint8_t, uint8_t, -+ z0 = svmulh_n_u8_z (p0, z0, x0), -+ z0 = svmulh_z (p0, z0, x0)) -+ -+/* -+** mulh_w0_u8_z_untied: -+** mov (z[0-9]+\.b), w0 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** umulh z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** umulh z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (mulh_w0_u8_z_untied, svuint8_t, uint8_t, -+ z0 = svmulh_n_u8_z (p0, z1, x0), -+ z0 = svmulh_z (p0, z1, x0)) -+ -+/* -+** mulh_11_u8_z_tied1: -+** mov (z[0-9]+\.b), #11 -+** movprfx z0\.b, p0/z, z0\.b -+** umulh z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_11_u8_z_tied1, svuint8_t, -+ z0 = svmulh_n_u8_z (p0, z0, 11), -+ z0 = svmulh_z (p0, z0, 11)) -+ -+/* -+** mulh_11_u8_z_untied: -+** mov (z[0-9]+\.b), #11 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** umulh z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** umulh z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_11_u8_z_untied, svuint8_t, -+ z0 = svmulh_n_u8_z (p0, z1, 11), -+ z0 = svmulh_z (p0, z1, 11)) -+ -+/* -+** mulh_u8_x_tied1: -+** umulh z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_u8_x_tied1, svuint8_t, -+ z0 = svmulh_u8_x (p0, z0, z1), -+ z0 = svmulh_x (p0, z0, z1)) -+ -+/* -+** mulh_u8_x_tied2: -+** umulh z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_u8_x_tied2, svuint8_t, -+ z0 = svmulh_u8_x (p0, z1, z0), -+ z0 = svmulh_x (p0, z1, z0)) -+ -+/* -+** mulh_u8_x_untied: -+** ( -+** movprfx z0, z1 -+** umulh z0\.b, p0/m, z0\.b, z2\.b -+** | -+** movprfx z0, z2 -+** umulh z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_u8_x_untied, svuint8_t, -+ z0 = svmulh_u8_x (p0, z1, z2), -+ z0 = svmulh_x (p0, z1, z2)) -+ -+/* -+** mulh_w0_u8_x_tied1: -+** mov (z[0-9]+\.b), w0 -+** umulh z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (mulh_w0_u8_x_tied1, svuint8_t, uint8_t, -+ z0 = svmulh_n_u8_x (p0, z0, x0), -+ z0 = svmulh_x (p0, z0, x0)) -+ -+/* -+** mulh_w0_u8_x_untied: -+** mov z0\.b, w0 -+** umulh z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_ZX (mulh_w0_u8_x_untied, svuint8_t, uint8_t, -+ z0 = svmulh_n_u8_x (p0, z1, x0), -+ z0 = svmulh_x (p0, z1, x0)) -+ -+/* -+** mulh_11_u8_x_tied1: -+** mov (z[0-9]+\.b), #11 -+** umulh z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_11_u8_x_tied1, svuint8_t, -+ z0 = svmulh_n_u8_x (p0, z0, 11), -+ z0 = svmulh_x (p0, z0, 11)) -+ -+/* -+** mulh_11_u8_x_untied: -+** mov z0\.b, #11 -+** umulh z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (mulh_11_u8_x_untied, svuint8_t, -+ z0 = svmulh_n_u8_x (p0, z1, 11), -+ z0 = svmulh_x (p0, z1, 11)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulx_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulx_f16.c -new file mode 100644 -index 000000000..ce02c3caa ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulx_f16.c -@@ -0,0 +1,472 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mulx_f16_m_tied1: -+** fmulx z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_f16_m_tied1, svfloat16_t, -+ z0 = svmulx_f16_m (p0, z0, z1), -+ z0 = svmulx_m (p0, z0, z1)) -+ -+/* -+** mulx_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fmulx z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_f16_m_tied2, svfloat16_t, -+ z0 = svmulx_f16_m (p0, z1, z0), -+ z0 = svmulx_m (p0, z1, z0)) -+ -+/* -+** mulx_f16_m_untied: -+** movprfx z0, z1 -+** fmulx z0\.h, p0/m, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_f16_m_untied, svfloat16_t, -+ z0 = svmulx_f16_m (p0, z1, z2), -+ z0 = svmulx_m (p0, z1, z2)) -+ -+/* -+** mulx_h4_f16_m_tied1: -+** mov (z[0-9]+\.h), h4 -+** fmulx z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mulx_h4_f16_m_tied1, svfloat16_t, __fp16, -+ z0 = svmulx_n_f16_m (p0, z0, d4), -+ z0 = svmulx_m (p0, z0, d4)) -+ -+/* -+** mulx_h4_f16_m_untied: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0, z1 -+** fmulx z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mulx_h4_f16_m_untied, svfloat16_t, __fp16, -+ z0 = svmulx_n_f16_m (p0, z1, d4), -+ z0 = svmulx_m (p0, z1, d4)) -+ -+/* -+** mulx_1_f16_m_tied1: -+** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? -+** fmulx z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_1_f16_m_tied1, svfloat16_t, -+ z0 = svmulx_n_f16_m (p0, z0, 1), -+ z0 = svmulx_m (p0, z0, 1)) -+ -+/* -+** mulx_1_f16_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? -+** movprfx z0, z1 -+** fmulx z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_1_f16_m_untied, svfloat16_t, -+ z0 = svmulx_n_f16_m (p0, z1, 1), -+ z0 = svmulx_m (p0, z1, 1)) -+ -+/* -+** mulx_0p5_f16_m_tied1: -+** fmov (z[0-9]+\.h), #(?:0\.5|5\.0e-1) -+** fmulx z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_0p5_f16_m_tied1, svfloat16_t, -+ z0 = svmulx_n_f16_m (p0, z0, 0.5), -+ z0 = svmulx_m (p0, z0, 0.5)) -+ -+/* -+** mulx_0p5_f16_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.h), #(?:0\.5|5\.0e-1) -+** movprfx z0, z1 -+** fmulx z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_0p5_f16_m_untied, svfloat16_t, -+ z0 = svmulx_n_f16_m (p0, z1, 0.5), -+ z0 = svmulx_m (p0, z1, 0.5)) -+ -+/* -+** mulx_2_f16_m_tied1: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** fmulx z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_2_f16_m_tied1, svfloat16_t, -+ z0 = svmulx_n_f16_m (p0, z0, 2), -+ z0 = svmulx_m (p0, z0, 2)) -+ -+/* -+** mulx_2_f16_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** movprfx z0, z1 -+** fmulx z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_2_f16_m_untied, svfloat16_t, -+ z0 = svmulx_n_f16_m (p0, z1, 2), -+ z0 = svmulx_m (p0, z1, 2)) -+ -+/* -+** mulx_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fmulx z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_f16_z_tied1, svfloat16_t, -+ z0 = svmulx_f16_z (p0, z0, z1), -+ z0 = svmulx_z (p0, z0, z1)) -+ -+/* -+** mulx_f16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** fmulx z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_f16_z_tied2, svfloat16_t, -+ z0 = svmulx_f16_z (p0, z1, z0), -+ z0 = svmulx_z (p0, z1, z0)) -+ -+/* -+** mulx_f16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fmulx z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** fmulx z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_f16_z_untied, svfloat16_t, -+ z0 = svmulx_f16_z (p0, z1, z2), -+ z0 = svmulx_z (p0, z1, z2)) -+ -+/* -+** mulx_h4_f16_z_tied1: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0\.h, p0/z, z0\.h -+** fmulx z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mulx_h4_f16_z_tied1, svfloat16_t, __fp16, -+ z0 = svmulx_n_f16_z (p0, z0, d4), -+ z0 = svmulx_z (p0, z0, d4)) -+ -+/* -+** mulx_h4_f16_z_untied: -+** mov (z[0-9]+\.h), h4 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fmulx z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** fmulx z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (mulx_h4_f16_z_untied, svfloat16_t, __fp16, -+ z0 = svmulx_n_f16_z (p0, z1, d4), -+ z0 = svmulx_z (p0, z1, d4)) -+ -+/* -+** mulx_1_f16_z_tied1: -+** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? -+** movprfx z0\.h, p0/z, z0\.h -+** fmulx z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_1_f16_z_tied1, svfloat16_t, -+ z0 = svmulx_n_f16_z (p0, z0, 1), -+ z0 = svmulx_z (p0, z0, 1)) -+ -+/* -+** mulx_1_f16_z_untied: -+** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fmulx z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** fmulx z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_1_f16_z_untied, svfloat16_t, -+ z0 = svmulx_n_f16_z (p0, z1, 1), -+ z0 = svmulx_z (p0, z1, 1)) -+ -+/* -+** mulx_0p5_f16_z_tied1: -+** fmov (z[0-9]+\.h), #(?:0\.5|5\.0e-1) -+** movprfx z0\.h, p0/z, z0\.h -+** fmulx z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_0p5_f16_z_tied1, svfloat16_t, -+ z0 = svmulx_n_f16_z (p0, z0, 0.5), -+ z0 = svmulx_z (p0, z0, 0.5)) -+ -+/* -+** mulx_0p5_f16_z_untied: -+** fmov (z[0-9]+\.h), #(?:0\.5|5\.0e-1) -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fmulx z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** fmulx z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_0p5_f16_z_untied, svfloat16_t, -+ z0 = svmulx_n_f16_z (p0, z1, 0.5), -+ z0 = svmulx_z (p0, z1, 0.5)) -+ -+/* -+** mulx_2_f16_z_tied1: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** movprfx z0\.h, p0/z, z0\.h -+** fmulx z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_2_f16_z_tied1, svfloat16_t, -+ z0 = svmulx_n_f16_z (p0, z0, 2), -+ z0 = svmulx_z (p0, z0, 2)) -+ -+/* -+** mulx_2_f16_z_untied: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fmulx z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** fmulx z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_2_f16_z_untied, svfloat16_t, -+ z0 = svmulx_n_f16_z (p0, z1, 2), -+ z0 = svmulx_z (p0, z1, 2)) -+ -+/* -+** mulx_f16_x_tied1: -+** fmulx z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_f16_x_tied1, svfloat16_t, -+ z0 = svmulx_f16_x (p0, z0, z1), -+ z0 = svmulx_x (p0, z0, z1)) -+ -+/* -+** mulx_f16_x_tied2: -+** fmulx z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_f16_x_tied2, svfloat16_t, -+ z0 = svmulx_f16_x (p0, z1, z0), -+ z0 = svmulx_x (p0, z1, z0)) -+ -+/* -+** mulx_f16_x_untied: -+** ( -+** movprfx z0, z1 -+** fmulx z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0, z2 -+** fmulx z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_f16_x_untied, svfloat16_t, -+ z0 = svmulx_f16_x (p0, z1, z2), -+ z0 = svmulx_x (p0, z1, z2)) -+ -+/* -+** mulx_h4_f16_x_tied1: -+** mov (z[0-9]+\.h), h4 -+** fmulx z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mulx_h4_f16_x_tied1, svfloat16_t, __fp16, -+ z0 = svmulx_n_f16_x (p0, z0, d4), -+ z0 = svmulx_x (p0, z0, d4)) -+ -+/* -+** mulx_h4_f16_x_untied: { xfail *-*-* } -+** mov z0\.h, h4 -+** fmulx z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZD (mulx_h4_f16_x_untied, svfloat16_t, __fp16, -+ z0 = svmulx_n_f16_x (p0, z1, d4), -+ z0 = svmulx_x (p0, z1, d4)) -+ -+/* -+** mulx_1_f16_x_tied1: -+** fmov (z[0-9]+\.h), #1\.0(?:e\+0)? -+** fmulx z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_1_f16_x_tied1, svfloat16_t, -+ z0 = svmulx_n_f16_x (p0, z0, 1), -+ z0 = svmulx_x (p0, z0, 1)) -+ -+/* -+** mulx_1_f16_x_untied: -+** fmov z0\.h, #1\.0(?:e\+0)? -+** fmulx z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_1_f16_x_untied, svfloat16_t, -+ z0 = svmulx_n_f16_x (p0, z1, 1), -+ z0 = svmulx_x (p0, z1, 1)) -+ -+/* -+** mulx_0p5_f16_x_tied1: -+** fmov (z[0-9]+\.h), #(?:0\.5|5\.0e-1) -+** fmulx z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_0p5_f16_x_tied1, svfloat16_t, -+ z0 = svmulx_n_f16_x (p0, z0, 0.5), -+ z0 = svmulx_x (p0, z0, 0.5)) -+ -+/* -+** mulx_0p5_f16_x_untied: -+** fmov z0\.h, #(?:0\.5|5\.0e-1) -+** fmulx z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_0p5_f16_x_untied, svfloat16_t, -+ z0 = svmulx_n_f16_x (p0, z1, 0.5), -+ z0 = svmulx_x (p0, z1, 0.5)) -+ -+/* -+** mulx_2_f16_x_tied1: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** fmulx z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_2_f16_x_tied1, svfloat16_t, -+ z0 = svmulx_n_f16_x (p0, z0, 2), -+ z0 = svmulx_x (p0, z0, 2)) -+ -+/* -+** mulx_2_f16_x_untied: -+** fmov z0\.h, #2\.0(?:e\+0)? -+** fmulx z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_2_f16_x_untied, svfloat16_t, -+ z0 = svmulx_n_f16_x (p0, z1, 2), -+ z0 = svmulx_x (p0, z1, 2)) -+ -+/* -+** ptrue_mulx_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mulx_f16_x_tied1, svfloat16_t, -+ z0 = svmulx_f16_x (svptrue_b16 (), z0, z1), -+ z0 = svmulx_x (svptrue_b16 (), z0, z1)) -+ -+/* -+** ptrue_mulx_f16_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mulx_f16_x_tied2, svfloat16_t, -+ z0 = svmulx_f16_x (svptrue_b16 (), z1, z0), -+ z0 = svmulx_x (svptrue_b16 (), z1, z0)) -+ -+/* -+** ptrue_mulx_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mulx_f16_x_untied, svfloat16_t, -+ z0 = svmulx_f16_x (svptrue_b16 (), z1, z2), -+ z0 = svmulx_x (svptrue_b16 (), z1, z2)) -+ -+/* -+** ptrue_mulx_1_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mulx_1_f16_x_tied1, svfloat16_t, -+ z0 = svmulx_n_f16_x (svptrue_b16 (), z0, 1), -+ z0 = svmulx_x (svptrue_b16 (), z0, 1)) -+ -+/* -+** ptrue_mulx_1_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mulx_1_f16_x_untied, svfloat16_t, -+ z0 = svmulx_n_f16_x (svptrue_b16 (), z1, 1), -+ z0 = svmulx_x (svptrue_b16 (), z1, 1)) -+ -+/* -+** ptrue_mulx_0p5_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mulx_0p5_f16_x_tied1, svfloat16_t, -+ z0 = svmulx_n_f16_x (svptrue_b16 (), z0, 0.5), -+ z0 = svmulx_x (svptrue_b16 (), z0, 0.5)) -+ -+/* -+** ptrue_mulx_0p5_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mulx_0p5_f16_x_untied, svfloat16_t, -+ z0 = svmulx_n_f16_x (svptrue_b16 (), z1, 0.5), -+ z0 = svmulx_x (svptrue_b16 (), z1, 0.5)) -+ -+/* -+** ptrue_mulx_2_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mulx_2_f16_x_tied1, svfloat16_t, -+ z0 = svmulx_n_f16_x (svptrue_b16 (), z0, 2), -+ z0 = svmulx_x (svptrue_b16 (), z0, 2)) -+ -+/* -+** ptrue_mulx_2_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mulx_2_f16_x_untied, svfloat16_t, -+ z0 = svmulx_n_f16_x (svptrue_b16 (), z1, 2), -+ z0 = svmulx_x (svptrue_b16 (), z1, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulx_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulx_f32.c -new file mode 100644 -index 000000000..e0d369593 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulx_f32.c -@@ -0,0 +1,472 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mulx_f32_m_tied1: -+** fmulx z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_f32_m_tied1, svfloat32_t, -+ z0 = svmulx_f32_m (p0, z0, z1), -+ z0 = svmulx_m (p0, z0, z1)) -+ -+/* -+** mulx_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fmulx z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_f32_m_tied2, svfloat32_t, -+ z0 = svmulx_f32_m (p0, z1, z0), -+ z0 = svmulx_m (p0, z1, z0)) -+ -+/* -+** mulx_f32_m_untied: -+** movprfx z0, z1 -+** fmulx z0\.s, p0/m, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_f32_m_untied, svfloat32_t, -+ z0 = svmulx_f32_m (p0, z1, z2), -+ z0 = svmulx_m (p0, z1, z2)) -+ -+/* -+** mulx_s4_f32_m_tied1: -+** mov (z[0-9]+\.s), s4 -+** fmulx z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mulx_s4_f32_m_tied1, svfloat32_t, float, -+ z0 = svmulx_n_f32_m (p0, z0, d4), -+ z0 = svmulx_m (p0, z0, d4)) -+ -+/* -+** mulx_s4_f32_m_untied: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0, z1 -+** fmulx z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mulx_s4_f32_m_untied, svfloat32_t, float, -+ z0 = svmulx_n_f32_m (p0, z1, d4), -+ z0 = svmulx_m (p0, z1, d4)) -+ -+/* -+** mulx_1_f32_m_tied1: -+** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? -+** fmulx z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_1_f32_m_tied1, svfloat32_t, -+ z0 = svmulx_n_f32_m (p0, z0, 1), -+ z0 = svmulx_m (p0, z0, 1)) -+ -+/* -+** mulx_1_f32_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? -+** movprfx z0, z1 -+** fmulx z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_1_f32_m_untied, svfloat32_t, -+ z0 = svmulx_n_f32_m (p0, z1, 1), -+ z0 = svmulx_m (p0, z1, 1)) -+ -+/* -+** mulx_0p5_f32_m_tied1: -+** fmov (z[0-9]+\.s), #(?:0\.5|5\.0e-1) -+** fmulx z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_0p5_f32_m_tied1, svfloat32_t, -+ z0 = svmulx_n_f32_m (p0, z0, 0.5), -+ z0 = svmulx_m (p0, z0, 0.5)) -+ -+/* -+** mulx_0p5_f32_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.s), #(?:0\.5|5\.0e-1) -+** movprfx z0, z1 -+** fmulx z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_0p5_f32_m_untied, svfloat32_t, -+ z0 = svmulx_n_f32_m (p0, z1, 0.5), -+ z0 = svmulx_m (p0, z1, 0.5)) -+ -+/* -+** mulx_2_f32_m_tied1: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** fmulx z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_2_f32_m_tied1, svfloat32_t, -+ z0 = svmulx_n_f32_m (p0, z0, 2), -+ z0 = svmulx_m (p0, z0, 2)) -+ -+/* -+** mulx_2_f32_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** movprfx z0, z1 -+** fmulx z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_2_f32_m_untied, svfloat32_t, -+ z0 = svmulx_n_f32_m (p0, z1, 2), -+ z0 = svmulx_m (p0, z1, 2)) -+ -+/* -+** mulx_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fmulx z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_f32_z_tied1, svfloat32_t, -+ z0 = svmulx_f32_z (p0, z0, z1), -+ z0 = svmulx_z (p0, z0, z1)) -+ -+/* -+** mulx_f32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** fmulx z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_f32_z_tied2, svfloat32_t, -+ z0 = svmulx_f32_z (p0, z1, z0), -+ z0 = svmulx_z (p0, z1, z0)) -+ -+/* -+** mulx_f32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fmulx z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** fmulx z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_f32_z_untied, svfloat32_t, -+ z0 = svmulx_f32_z (p0, z1, z2), -+ z0 = svmulx_z (p0, z1, z2)) -+ -+/* -+** mulx_s4_f32_z_tied1: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0\.s, p0/z, z0\.s -+** fmulx z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mulx_s4_f32_z_tied1, svfloat32_t, float, -+ z0 = svmulx_n_f32_z (p0, z0, d4), -+ z0 = svmulx_z (p0, z0, d4)) -+ -+/* -+** mulx_s4_f32_z_untied: -+** mov (z[0-9]+\.s), s4 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fmulx z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** fmulx z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (mulx_s4_f32_z_untied, svfloat32_t, float, -+ z0 = svmulx_n_f32_z (p0, z1, d4), -+ z0 = svmulx_z (p0, z1, d4)) -+ -+/* -+** mulx_1_f32_z_tied1: -+** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? -+** movprfx z0\.s, p0/z, z0\.s -+** fmulx z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_1_f32_z_tied1, svfloat32_t, -+ z0 = svmulx_n_f32_z (p0, z0, 1), -+ z0 = svmulx_z (p0, z0, 1)) -+ -+/* -+** mulx_1_f32_z_untied: -+** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fmulx z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** fmulx z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_1_f32_z_untied, svfloat32_t, -+ z0 = svmulx_n_f32_z (p0, z1, 1), -+ z0 = svmulx_z (p0, z1, 1)) -+ -+/* -+** mulx_0p5_f32_z_tied1: -+** fmov (z[0-9]+\.s), #(?:0\.5|5\.0e-1) -+** movprfx z0\.s, p0/z, z0\.s -+** fmulx z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_0p5_f32_z_tied1, svfloat32_t, -+ z0 = svmulx_n_f32_z (p0, z0, 0.5), -+ z0 = svmulx_z (p0, z0, 0.5)) -+ -+/* -+** mulx_0p5_f32_z_untied: -+** fmov (z[0-9]+\.s), #(?:0\.5|5\.0e-1) -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fmulx z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** fmulx z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_0p5_f32_z_untied, svfloat32_t, -+ z0 = svmulx_n_f32_z (p0, z1, 0.5), -+ z0 = svmulx_z (p0, z1, 0.5)) -+ -+/* -+** mulx_2_f32_z_tied1: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** movprfx z0\.s, p0/z, z0\.s -+** fmulx z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_2_f32_z_tied1, svfloat32_t, -+ z0 = svmulx_n_f32_z (p0, z0, 2), -+ z0 = svmulx_z (p0, z0, 2)) -+ -+/* -+** mulx_2_f32_z_untied: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fmulx z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** fmulx z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_2_f32_z_untied, svfloat32_t, -+ z0 = svmulx_n_f32_z (p0, z1, 2), -+ z0 = svmulx_z (p0, z1, 2)) -+ -+/* -+** mulx_f32_x_tied1: -+** fmulx z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_f32_x_tied1, svfloat32_t, -+ z0 = svmulx_f32_x (p0, z0, z1), -+ z0 = svmulx_x (p0, z0, z1)) -+ -+/* -+** mulx_f32_x_tied2: -+** fmulx z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_f32_x_tied2, svfloat32_t, -+ z0 = svmulx_f32_x (p0, z1, z0), -+ z0 = svmulx_x (p0, z1, z0)) -+ -+/* -+** mulx_f32_x_untied: -+** ( -+** movprfx z0, z1 -+** fmulx z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0, z2 -+** fmulx z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_f32_x_untied, svfloat32_t, -+ z0 = svmulx_f32_x (p0, z1, z2), -+ z0 = svmulx_x (p0, z1, z2)) -+ -+/* -+** mulx_s4_f32_x_tied1: -+** mov (z[0-9]+\.s), s4 -+** fmulx z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mulx_s4_f32_x_tied1, svfloat32_t, float, -+ z0 = svmulx_n_f32_x (p0, z0, d4), -+ z0 = svmulx_x (p0, z0, d4)) -+ -+/* -+** mulx_s4_f32_x_untied: { xfail *-*-* } -+** mov z0\.s, s4 -+** fmulx z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZD (mulx_s4_f32_x_untied, svfloat32_t, float, -+ z0 = svmulx_n_f32_x (p0, z1, d4), -+ z0 = svmulx_x (p0, z1, d4)) -+ -+/* -+** mulx_1_f32_x_tied1: -+** fmov (z[0-9]+\.s), #1\.0(?:e\+0)? -+** fmulx z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_1_f32_x_tied1, svfloat32_t, -+ z0 = svmulx_n_f32_x (p0, z0, 1), -+ z0 = svmulx_x (p0, z0, 1)) -+ -+/* -+** mulx_1_f32_x_untied: -+** fmov z0\.s, #1\.0(?:e\+0)? -+** fmulx z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_1_f32_x_untied, svfloat32_t, -+ z0 = svmulx_n_f32_x (p0, z1, 1), -+ z0 = svmulx_x (p0, z1, 1)) -+ -+/* -+** mulx_0p5_f32_x_tied1: -+** fmov (z[0-9]+\.s), #(?:0\.5|5\.0e-1) -+** fmulx z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_0p5_f32_x_tied1, svfloat32_t, -+ z0 = svmulx_n_f32_x (p0, z0, 0.5), -+ z0 = svmulx_x (p0, z0, 0.5)) -+ -+/* -+** mulx_0p5_f32_x_untied: -+** fmov z0\.s, #(?:0\.5|5\.0e-1) -+** fmulx z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_0p5_f32_x_untied, svfloat32_t, -+ z0 = svmulx_n_f32_x (p0, z1, 0.5), -+ z0 = svmulx_x (p0, z1, 0.5)) -+ -+/* -+** mulx_2_f32_x_tied1: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** fmulx z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_2_f32_x_tied1, svfloat32_t, -+ z0 = svmulx_n_f32_x (p0, z0, 2), -+ z0 = svmulx_x (p0, z0, 2)) -+ -+/* -+** mulx_2_f32_x_untied: -+** fmov z0\.s, #2\.0(?:e\+0)? -+** fmulx z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_2_f32_x_untied, svfloat32_t, -+ z0 = svmulx_n_f32_x (p0, z1, 2), -+ z0 = svmulx_x (p0, z1, 2)) -+ -+/* -+** ptrue_mulx_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mulx_f32_x_tied1, svfloat32_t, -+ z0 = svmulx_f32_x (svptrue_b32 (), z0, z1), -+ z0 = svmulx_x (svptrue_b32 (), z0, z1)) -+ -+/* -+** ptrue_mulx_f32_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mulx_f32_x_tied2, svfloat32_t, -+ z0 = svmulx_f32_x (svptrue_b32 (), z1, z0), -+ z0 = svmulx_x (svptrue_b32 (), z1, z0)) -+ -+/* -+** ptrue_mulx_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mulx_f32_x_untied, svfloat32_t, -+ z0 = svmulx_f32_x (svptrue_b32 (), z1, z2), -+ z0 = svmulx_x (svptrue_b32 (), z1, z2)) -+ -+/* -+** ptrue_mulx_1_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mulx_1_f32_x_tied1, svfloat32_t, -+ z0 = svmulx_n_f32_x (svptrue_b32 (), z0, 1), -+ z0 = svmulx_x (svptrue_b32 (), z0, 1)) -+ -+/* -+** ptrue_mulx_1_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mulx_1_f32_x_untied, svfloat32_t, -+ z0 = svmulx_n_f32_x (svptrue_b32 (), z1, 1), -+ z0 = svmulx_x (svptrue_b32 (), z1, 1)) -+ -+/* -+** ptrue_mulx_0p5_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mulx_0p5_f32_x_tied1, svfloat32_t, -+ z0 = svmulx_n_f32_x (svptrue_b32 (), z0, 0.5), -+ z0 = svmulx_x (svptrue_b32 (), z0, 0.5)) -+ -+/* -+** ptrue_mulx_0p5_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mulx_0p5_f32_x_untied, svfloat32_t, -+ z0 = svmulx_n_f32_x (svptrue_b32 (), z1, 0.5), -+ z0 = svmulx_x (svptrue_b32 (), z1, 0.5)) -+ -+/* -+** ptrue_mulx_2_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mulx_2_f32_x_tied1, svfloat32_t, -+ z0 = svmulx_n_f32_x (svptrue_b32 (), z0, 2), -+ z0 = svmulx_x (svptrue_b32 (), z0, 2)) -+ -+/* -+** ptrue_mulx_2_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mulx_2_f32_x_untied, svfloat32_t, -+ z0 = svmulx_n_f32_x (svptrue_b32 (), z1, 2), -+ z0 = svmulx_x (svptrue_b32 (), z1, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulx_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulx_f64.c -new file mode 100644 -index 000000000..6af5703ff ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/mulx_f64.c -@@ -0,0 +1,472 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** mulx_f64_m_tied1: -+** fmulx z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_f64_m_tied1, svfloat64_t, -+ z0 = svmulx_f64_m (p0, z0, z1), -+ z0 = svmulx_m (p0, z0, z1)) -+ -+/* -+** mulx_f64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fmulx z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_f64_m_tied2, svfloat64_t, -+ z0 = svmulx_f64_m (p0, z1, z0), -+ z0 = svmulx_m (p0, z1, z0)) -+ -+/* -+** mulx_f64_m_untied: -+** movprfx z0, z1 -+** fmulx z0\.d, p0/m, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_f64_m_untied, svfloat64_t, -+ z0 = svmulx_f64_m (p0, z1, z2), -+ z0 = svmulx_m (p0, z1, z2)) -+ -+/* -+** mulx_d4_f64_m_tied1: -+** mov (z[0-9]+\.d), d4 -+** fmulx z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mulx_d4_f64_m_tied1, svfloat64_t, double, -+ z0 = svmulx_n_f64_m (p0, z0, d4), -+ z0 = svmulx_m (p0, z0, d4)) -+ -+/* -+** mulx_d4_f64_m_untied: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0, z1 -+** fmulx z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mulx_d4_f64_m_untied, svfloat64_t, double, -+ z0 = svmulx_n_f64_m (p0, z1, d4), -+ z0 = svmulx_m (p0, z1, d4)) -+ -+/* -+** mulx_1_f64_m_tied1: -+** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? -+** fmulx z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_1_f64_m_tied1, svfloat64_t, -+ z0 = svmulx_n_f64_m (p0, z0, 1), -+ z0 = svmulx_m (p0, z0, 1)) -+ -+/* -+** mulx_1_f64_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? -+** movprfx z0, z1 -+** fmulx z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_1_f64_m_untied, svfloat64_t, -+ z0 = svmulx_n_f64_m (p0, z1, 1), -+ z0 = svmulx_m (p0, z1, 1)) -+ -+/* -+** mulx_0p5_f64_m_tied1: -+** fmov (z[0-9]+\.d), #(?:0\.5|5\.0e-1) -+** fmulx z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_0p5_f64_m_tied1, svfloat64_t, -+ z0 = svmulx_n_f64_m (p0, z0, 0.5), -+ z0 = svmulx_m (p0, z0, 0.5)) -+ -+/* -+** mulx_0p5_f64_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.d), #(?:0\.5|5\.0e-1) -+** movprfx z0, z1 -+** fmulx z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_0p5_f64_m_untied, svfloat64_t, -+ z0 = svmulx_n_f64_m (p0, z1, 0.5), -+ z0 = svmulx_m (p0, z1, 0.5)) -+ -+/* -+** mulx_2_f64_m_tied1: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** fmulx z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_2_f64_m_tied1, svfloat64_t, -+ z0 = svmulx_n_f64_m (p0, z0, 2), -+ z0 = svmulx_m (p0, z0, 2)) -+ -+/* -+** mulx_2_f64_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** movprfx z0, z1 -+** fmulx z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_2_f64_m_untied, svfloat64_t, -+ z0 = svmulx_n_f64_m (p0, z1, 2), -+ z0 = svmulx_m (p0, z1, 2)) -+ -+/* -+** mulx_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fmulx z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_f64_z_tied1, svfloat64_t, -+ z0 = svmulx_f64_z (p0, z0, z1), -+ z0 = svmulx_z (p0, z0, z1)) -+ -+/* -+** mulx_f64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** fmulx z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_f64_z_tied2, svfloat64_t, -+ z0 = svmulx_f64_z (p0, z1, z0), -+ z0 = svmulx_z (p0, z1, z0)) -+ -+/* -+** mulx_f64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fmulx z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** fmulx z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_f64_z_untied, svfloat64_t, -+ z0 = svmulx_f64_z (p0, z1, z2), -+ z0 = svmulx_z (p0, z1, z2)) -+ -+/* -+** mulx_d4_f64_z_tied1: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0\.d, p0/z, z0\.d -+** fmulx z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mulx_d4_f64_z_tied1, svfloat64_t, double, -+ z0 = svmulx_n_f64_z (p0, z0, d4), -+ z0 = svmulx_z (p0, z0, d4)) -+ -+/* -+** mulx_d4_f64_z_untied: -+** mov (z[0-9]+\.d), d4 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fmulx z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** fmulx z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (mulx_d4_f64_z_untied, svfloat64_t, double, -+ z0 = svmulx_n_f64_z (p0, z1, d4), -+ z0 = svmulx_z (p0, z1, d4)) -+ -+/* -+** mulx_1_f64_z_tied1: -+** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? -+** movprfx z0\.d, p0/z, z0\.d -+** fmulx z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_1_f64_z_tied1, svfloat64_t, -+ z0 = svmulx_n_f64_z (p0, z0, 1), -+ z0 = svmulx_z (p0, z0, 1)) -+ -+/* -+** mulx_1_f64_z_untied: -+** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fmulx z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** fmulx z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_1_f64_z_untied, svfloat64_t, -+ z0 = svmulx_n_f64_z (p0, z1, 1), -+ z0 = svmulx_z (p0, z1, 1)) -+ -+/* -+** mulx_0p5_f64_z_tied1: -+** fmov (z[0-9]+\.d), #(?:0\.5|5\.0e-1) -+** movprfx z0\.d, p0/z, z0\.d -+** fmulx z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_0p5_f64_z_tied1, svfloat64_t, -+ z0 = svmulx_n_f64_z (p0, z0, 0.5), -+ z0 = svmulx_z (p0, z0, 0.5)) -+ -+/* -+** mulx_0p5_f64_z_untied: -+** fmov (z[0-9]+\.d), #(?:0\.5|5\.0e-1) -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fmulx z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** fmulx z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_0p5_f64_z_untied, svfloat64_t, -+ z0 = svmulx_n_f64_z (p0, z1, 0.5), -+ z0 = svmulx_z (p0, z1, 0.5)) -+ -+/* -+** mulx_2_f64_z_tied1: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** movprfx z0\.d, p0/z, z0\.d -+** fmulx z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_2_f64_z_tied1, svfloat64_t, -+ z0 = svmulx_n_f64_z (p0, z0, 2), -+ z0 = svmulx_z (p0, z0, 2)) -+ -+/* -+** mulx_2_f64_z_untied: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fmulx z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** fmulx z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_2_f64_z_untied, svfloat64_t, -+ z0 = svmulx_n_f64_z (p0, z1, 2), -+ z0 = svmulx_z (p0, z1, 2)) -+ -+/* -+** mulx_f64_x_tied1: -+** fmulx z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_f64_x_tied1, svfloat64_t, -+ z0 = svmulx_f64_x (p0, z0, z1), -+ z0 = svmulx_x (p0, z0, z1)) -+ -+/* -+** mulx_f64_x_tied2: -+** fmulx z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_f64_x_tied2, svfloat64_t, -+ z0 = svmulx_f64_x (p0, z1, z0), -+ z0 = svmulx_x (p0, z1, z0)) -+ -+/* -+** mulx_f64_x_untied: -+** ( -+** movprfx z0, z1 -+** fmulx z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0, z2 -+** fmulx z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_f64_x_untied, svfloat64_t, -+ z0 = svmulx_f64_x (p0, z1, z2), -+ z0 = svmulx_x (p0, z1, z2)) -+ -+/* -+** mulx_d4_f64_x_tied1: -+** mov (z[0-9]+\.d), d4 -+** fmulx z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (mulx_d4_f64_x_tied1, svfloat64_t, double, -+ z0 = svmulx_n_f64_x (p0, z0, d4), -+ z0 = svmulx_x (p0, z0, d4)) -+ -+/* -+** mulx_d4_f64_x_untied: { xfail *-*-* } -+** mov z0\.d, d4 -+** fmulx z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZD (mulx_d4_f64_x_untied, svfloat64_t, double, -+ z0 = svmulx_n_f64_x (p0, z1, d4), -+ z0 = svmulx_x (p0, z1, d4)) -+ -+/* -+** mulx_1_f64_x_tied1: -+** fmov (z[0-9]+\.d), #1\.0(?:e\+0)? -+** fmulx z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_1_f64_x_tied1, svfloat64_t, -+ z0 = svmulx_n_f64_x (p0, z0, 1), -+ z0 = svmulx_x (p0, z0, 1)) -+ -+/* -+** mulx_1_f64_x_untied: -+** fmov z0\.d, #1\.0(?:e\+0)? -+** fmulx z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_1_f64_x_untied, svfloat64_t, -+ z0 = svmulx_n_f64_x (p0, z1, 1), -+ z0 = svmulx_x (p0, z1, 1)) -+ -+/* -+** mulx_0p5_f64_x_tied1: -+** fmov (z[0-9]+\.d), #(?:0\.5|5\.0e-1) -+** fmulx z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_0p5_f64_x_tied1, svfloat64_t, -+ z0 = svmulx_n_f64_x (p0, z0, 0.5), -+ z0 = svmulx_x (p0, z0, 0.5)) -+ -+/* -+** mulx_0p5_f64_x_untied: -+** fmov z0\.d, #(?:0\.5|5\.0e-1) -+** fmulx z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_0p5_f64_x_untied, svfloat64_t, -+ z0 = svmulx_n_f64_x (p0, z1, 0.5), -+ z0 = svmulx_x (p0, z1, 0.5)) -+ -+/* -+** mulx_2_f64_x_tied1: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** fmulx z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_2_f64_x_tied1, svfloat64_t, -+ z0 = svmulx_n_f64_x (p0, z0, 2), -+ z0 = svmulx_x (p0, z0, 2)) -+ -+/* -+** mulx_2_f64_x_untied: -+** fmov z0\.d, #2\.0(?:e\+0)? -+** fmulx z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (mulx_2_f64_x_untied, svfloat64_t, -+ z0 = svmulx_n_f64_x (p0, z1, 2), -+ z0 = svmulx_x (p0, z1, 2)) -+ -+/* -+** ptrue_mulx_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mulx_f64_x_tied1, svfloat64_t, -+ z0 = svmulx_f64_x (svptrue_b64 (), z0, z1), -+ z0 = svmulx_x (svptrue_b64 (), z0, z1)) -+ -+/* -+** ptrue_mulx_f64_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mulx_f64_x_tied2, svfloat64_t, -+ z0 = svmulx_f64_x (svptrue_b64 (), z1, z0), -+ z0 = svmulx_x (svptrue_b64 (), z1, z0)) -+ -+/* -+** ptrue_mulx_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mulx_f64_x_untied, svfloat64_t, -+ z0 = svmulx_f64_x (svptrue_b64 (), z1, z2), -+ z0 = svmulx_x (svptrue_b64 (), z1, z2)) -+ -+/* -+** ptrue_mulx_1_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mulx_1_f64_x_tied1, svfloat64_t, -+ z0 = svmulx_n_f64_x (svptrue_b64 (), z0, 1), -+ z0 = svmulx_x (svptrue_b64 (), z0, 1)) -+ -+/* -+** ptrue_mulx_1_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mulx_1_f64_x_untied, svfloat64_t, -+ z0 = svmulx_n_f64_x (svptrue_b64 (), z1, 1), -+ z0 = svmulx_x (svptrue_b64 (), z1, 1)) -+ -+/* -+** ptrue_mulx_0p5_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mulx_0p5_f64_x_tied1, svfloat64_t, -+ z0 = svmulx_n_f64_x (svptrue_b64 (), z0, 0.5), -+ z0 = svmulx_x (svptrue_b64 (), z0, 0.5)) -+ -+/* -+** ptrue_mulx_0p5_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mulx_0p5_f64_x_untied, svfloat64_t, -+ z0 = svmulx_n_f64_x (svptrue_b64 (), z1, 0.5), -+ z0 = svmulx_x (svptrue_b64 (), z1, 0.5)) -+ -+/* -+** ptrue_mulx_2_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mulx_2_f64_x_tied1, svfloat64_t, -+ z0 = svmulx_n_f64_x (svptrue_b64 (), z0, 2), -+ z0 = svmulx_x (svptrue_b64 (), z0, 2)) -+ -+/* -+** ptrue_mulx_2_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_mulx_2_f64_x_untied, svfloat64_t, -+ z0 = svmulx_n_f64_x (svptrue_b64 (), z1, 2), -+ z0 = svmulx_x (svptrue_b64 (), z1, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nand_b.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nand_b.c -new file mode 100644 -index 000000000..c306b80c7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nand_b.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** nand_b_z_tied1: -+** nand p0\.b, p3/z, p0\.b, p1\.b -+** ret -+*/ -+TEST_UNIFORM_P (nand_b_z_tied1, -+ p0 = svnand_b_z (p3, p0, p1), -+ p0 = svnand_z (p3, p0, p1)) -+ -+/* -+** nand_b_z_tied2: -+** nand p0\.b, p3/z, p1\.b, p0\.b -+** ret -+*/ -+TEST_UNIFORM_P (nand_b_z_tied2, -+ p0 = svnand_b_z (p3, p1, p0), -+ p0 = svnand_z (p3, p1, p0)) -+ -+/* -+** nand_b_z_untied: -+** nand p0\.b, p3/z, p1\.b, p2\.b -+** ret -+*/ -+TEST_UNIFORM_P (nand_b_z_untied, -+ p0 = svnand_b_z (p3, p1, p2), -+ p0 = svnand_z (p3, p1, p2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_f16.c -new file mode 100644 -index 000000000..c31eba922 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_f16.c -@@ -0,0 +1,103 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** neg_f16_m_tied12: -+** fneg z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (neg_f16_m_tied12, svfloat16_t, -+ z0 = svneg_f16_m (z0, p0, z0), -+ z0 = svneg_m (z0, p0, z0)) -+ -+/* -+** neg_f16_m_tied1: -+** fneg z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (neg_f16_m_tied1, svfloat16_t, -+ z0 = svneg_f16_m (z0, p0, z1), -+ z0 = svneg_m (z0, p0, z1)) -+ -+/* -+** neg_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fneg z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (neg_f16_m_tied2, svfloat16_t, -+ z0 = svneg_f16_m (z1, p0, z0), -+ z0 = svneg_m (z1, p0, z0)) -+ -+/* -+** neg_f16_m_untied: -+** movprfx z0, z2 -+** fneg z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (neg_f16_m_untied, svfloat16_t, -+ z0 = svneg_f16_m (z2, p0, z1), -+ z0 = svneg_m (z2, p0, z1)) -+ -+/* -+** neg_f16_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.h, p0/z, \1\.h -+** fneg z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (neg_f16_z_tied1, svfloat16_t, -+ z0 = svneg_f16_z (p0, z0), -+ z0 = svneg_z (p0, z0)) -+ -+/* -+** neg_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** fneg z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (neg_f16_z_untied, svfloat16_t, -+ z0 = svneg_f16_z (p0, z1), -+ z0 = svneg_z (p0, z1)) -+ -+/* -+** neg_f16_x_tied1: -+** fneg z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (neg_f16_x_tied1, svfloat16_t, -+ z0 = svneg_f16_x (p0, z0), -+ z0 = svneg_x (p0, z0)) -+ -+/* -+** neg_f16_x_untied: -+** fneg z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (neg_f16_x_untied, svfloat16_t, -+ z0 = svneg_f16_x (p0, z1), -+ z0 = svneg_x (p0, z1)) -+ -+/* -+** ptrue_neg_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_neg_f16_x_tied1, svfloat16_t, -+ z0 = svneg_f16_x (svptrue_b16 (), z0), -+ z0 = svneg_x (svptrue_b16 (), z0)) -+ -+/* -+** ptrue_neg_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_neg_f16_x_untied, svfloat16_t, -+ z0 = svneg_f16_x (svptrue_b16 (), z1), -+ z0 = svneg_x (svptrue_b16 (), z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_f32.c -new file mode 100644 -index 000000000..a57d264ad ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_f32.c -@@ -0,0 +1,103 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** neg_f32_m_tied12: -+** fneg z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (neg_f32_m_tied12, svfloat32_t, -+ z0 = svneg_f32_m (z0, p0, z0), -+ z0 = svneg_m (z0, p0, z0)) -+ -+/* -+** neg_f32_m_tied1: -+** fneg z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (neg_f32_m_tied1, svfloat32_t, -+ z0 = svneg_f32_m (z0, p0, z1), -+ z0 = svneg_m (z0, p0, z1)) -+ -+/* -+** neg_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fneg z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (neg_f32_m_tied2, svfloat32_t, -+ z0 = svneg_f32_m (z1, p0, z0), -+ z0 = svneg_m (z1, p0, z0)) -+ -+/* -+** neg_f32_m_untied: -+** movprfx z0, z2 -+** fneg z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (neg_f32_m_untied, svfloat32_t, -+ z0 = svneg_f32_m (z2, p0, z1), -+ z0 = svneg_m (z2, p0, z1)) -+ -+/* -+** neg_f32_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, \1\.s -+** fneg z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (neg_f32_z_tied1, svfloat32_t, -+ z0 = svneg_f32_z (p0, z0), -+ z0 = svneg_z (p0, z0)) -+ -+/* -+** neg_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** fneg z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (neg_f32_z_untied, svfloat32_t, -+ z0 = svneg_f32_z (p0, z1), -+ z0 = svneg_z (p0, z1)) -+ -+/* -+** neg_f32_x_tied1: -+** fneg z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (neg_f32_x_tied1, svfloat32_t, -+ z0 = svneg_f32_x (p0, z0), -+ z0 = svneg_x (p0, z0)) -+ -+/* -+** neg_f32_x_untied: -+** fneg z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (neg_f32_x_untied, svfloat32_t, -+ z0 = svneg_f32_x (p0, z1), -+ z0 = svneg_x (p0, z1)) -+ -+/* -+** ptrue_neg_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_neg_f32_x_tied1, svfloat32_t, -+ z0 = svneg_f32_x (svptrue_b32 (), z0), -+ z0 = svneg_x (svptrue_b32 (), z0)) -+ -+/* -+** ptrue_neg_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_neg_f32_x_untied, svfloat32_t, -+ z0 = svneg_f32_x (svptrue_b32 (), z1), -+ z0 = svneg_x (svptrue_b32 (), z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_f64.c -new file mode 100644 -index 000000000..90cadd4f9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_f64.c -@@ -0,0 +1,103 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** neg_f64_m_tied12: -+** fneg z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (neg_f64_m_tied12, svfloat64_t, -+ z0 = svneg_f64_m (z0, p0, z0), -+ z0 = svneg_m (z0, p0, z0)) -+ -+/* -+** neg_f64_m_tied1: -+** fneg z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (neg_f64_m_tied1, svfloat64_t, -+ z0 = svneg_f64_m (z0, p0, z1), -+ z0 = svneg_m (z0, p0, z1)) -+ -+/* -+** neg_f64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fneg z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (neg_f64_m_tied2, svfloat64_t, -+ z0 = svneg_f64_m (z1, p0, z0), -+ z0 = svneg_m (z1, p0, z0)) -+ -+/* -+** neg_f64_m_untied: -+** movprfx z0, z2 -+** fneg z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (neg_f64_m_untied, svfloat64_t, -+ z0 = svneg_f64_m (z2, p0, z1), -+ z0 = svneg_m (z2, p0, z1)) -+ -+/* -+** neg_f64_z_tied1: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** fneg z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (neg_f64_z_tied1, svfloat64_t, -+ z0 = svneg_f64_z (p0, z0), -+ z0 = svneg_z (p0, z0)) -+ -+/* -+** neg_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** fneg z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (neg_f64_z_untied, svfloat64_t, -+ z0 = svneg_f64_z (p0, z1), -+ z0 = svneg_z (p0, z1)) -+ -+/* -+** neg_f64_x_tied1: -+** fneg z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (neg_f64_x_tied1, svfloat64_t, -+ z0 = svneg_f64_x (p0, z0), -+ z0 = svneg_x (p0, z0)) -+ -+/* -+** neg_f64_x_untied: -+** fneg z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (neg_f64_x_untied, svfloat64_t, -+ z0 = svneg_f64_x (p0, z1), -+ z0 = svneg_x (p0, z1)) -+ -+/* -+** ptrue_neg_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_neg_f64_x_tied1, svfloat64_t, -+ z0 = svneg_f64_x (svptrue_b64 (), z0), -+ z0 = svneg_x (svptrue_b64 (), z0)) -+ -+/* -+** ptrue_neg_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_neg_f64_x_untied, svfloat64_t, -+ z0 = svneg_f64_x (svptrue_b64 (), z1), -+ z0 = svneg_x (svptrue_b64 (), z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_s16.c -new file mode 100644 -index 000000000..80b2ee0f7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_s16.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** neg_s16_m_tied12: -+** neg z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (neg_s16_m_tied12, svint16_t, -+ z0 = svneg_s16_m (z0, p0, z0), -+ z0 = svneg_m (z0, p0, z0)) -+ -+/* -+** neg_s16_m_tied1: -+** neg z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (neg_s16_m_tied1, svint16_t, -+ z0 = svneg_s16_m (z0, p0, z1), -+ z0 = svneg_m (z0, p0, z1)) -+ -+/* -+** neg_s16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** neg z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (neg_s16_m_tied2, svint16_t, -+ z0 = svneg_s16_m (z1, p0, z0), -+ z0 = svneg_m (z1, p0, z0)) -+ -+/* -+** neg_s16_m_untied: -+** movprfx z0, z2 -+** neg z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (neg_s16_m_untied, svint16_t, -+ z0 = svneg_s16_m (z2, p0, z1), -+ z0 = svneg_m (z2, p0, z1)) -+ -+/* -+** neg_s16_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.h, p0/z, \1\.h -+** neg z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (neg_s16_z_tied1, svint16_t, -+ z0 = svneg_s16_z (p0, z0), -+ z0 = svneg_z (p0, z0)) -+ -+/* -+** neg_s16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** neg z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (neg_s16_z_untied, svint16_t, -+ z0 = svneg_s16_z (p0, z1), -+ z0 = svneg_z (p0, z1)) -+ -+/* -+** neg_s16_x_tied1: -+** neg z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (neg_s16_x_tied1, svint16_t, -+ z0 = svneg_s16_x (p0, z0), -+ z0 = svneg_x (p0, z0)) -+ -+/* -+** neg_s16_x_untied: -+** neg z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (neg_s16_x_untied, svint16_t, -+ z0 = svneg_s16_x (p0, z1), -+ z0 = svneg_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_s32.c -new file mode 100644 -index 000000000..b8805034e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_s32.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** neg_s32_m_tied12: -+** neg z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (neg_s32_m_tied12, svint32_t, -+ z0 = svneg_s32_m (z0, p0, z0), -+ z0 = svneg_m (z0, p0, z0)) -+ -+/* -+** neg_s32_m_tied1: -+** neg z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (neg_s32_m_tied1, svint32_t, -+ z0 = svneg_s32_m (z0, p0, z1), -+ z0 = svneg_m (z0, p0, z1)) -+ -+/* -+** neg_s32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** neg z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (neg_s32_m_tied2, svint32_t, -+ z0 = svneg_s32_m (z1, p0, z0), -+ z0 = svneg_m (z1, p0, z0)) -+ -+/* -+** neg_s32_m_untied: -+** movprfx z0, z2 -+** neg z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (neg_s32_m_untied, svint32_t, -+ z0 = svneg_s32_m (z2, p0, z1), -+ z0 = svneg_m (z2, p0, z1)) -+ -+/* -+** neg_s32_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, \1\.s -+** neg z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (neg_s32_z_tied1, svint32_t, -+ z0 = svneg_s32_z (p0, z0), -+ z0 = svneg_z (p0, z0)) -+ -+/* -+** neg_s32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** neg z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (neg_s32_z_untied, svint32_t, -+ z0 = svneg_s32_z (p0, z1), -+ z0 = svneg_z (p0, z1)) -+ -+/* -+** neg_s32_x_tied1: -+** neg z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (neg_s32_x_tied1, svint32_t, -+ z0 = svneg_s32_x (p0, z0), -+ z0 = svneg_x (p0, z0)) -+ -+/* -+** neg_s32_x_untied: -+** neg z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (neg_s32_x_untied, svint32_t, -+ z0 = svneg_s32_x (p0, z1), -+ z0 = svneg_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_s64.c -new file mode 100644 -index 000000000..82abe6723 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_s64.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** neg_s64_m_tied12: -+** neg z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (neg_s64_m_tied12, svint64_t, -+ z0 = svneg_s64_m (z0, p0, z0), -+ z0 = svneg_m (z0, p0, z0)) -+ -+/* -+** neg_s64_m_tied1: -+** neg z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (neg_s64_m_tied1, svint64_t, -+ z0 = svneg_s64_m (z0, p0, z1), -+ z0 = svneg_m (z0, p0, z1)) -+ -+/* -+** neg_s64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** neg z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (neg_s64_m_tied2, svint64_t, -+ z0 = svneg_s64_m (z1, p0, z0), -+ z0 = svneg_m (z1, p0, z0)) -+ -+/* -+** neg_s64_m_untied: -+** movprfx z0, z2 -+** neg z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (neg_s64_m_untied, svint64_t, -+ z0 = svneg_s64_m (z2, p0, z1), -+ z0 = svneg_m (z2, p0, z1)) -+ -+/* -+** neg_s64_z_tied1: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** neg z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (neg_s64_z_tied1, svint64_t, -+ z0 = svneg_s64_z (p0, z0), -+ z0 = svneg_z (p0, z0)) -+ -+/* -+** neg_s64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** neg z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (neg_s64_z_untied, svint64_t, -+ z0 = svneg_s64_z (p0, z1), -+ z0 = svneg_z (p0, z1)) -+ -+/* -+** neg_s64_x_tied1: -+** neg z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (neg_s64_x_tied1, svint64_t, -+ z0 = svneg_s64_x (p0, z0), -+ z0 = svneg_x (p0, z0)) -+ -+/* -+** neg_s64_x_untied: -+** neg z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (neg_s64_x_untied, svint64_t, -+ z0 = svneg_s64_x (p0, z1), -+ z0 = svneg_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_s8.c -new file mode 100644 -index 000000000..b7c9949ad ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/neg_s8.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** neg_s8_m_tied12: -+** neg z0\.b, p0/m, z0\.b -+** ret -+*/ -+TEST_UNIFORM_Z (neg_s8_m_tied12, svint8_t, -+ z0 = svneg_s8_m (z0, p0, z0), -+ z0 = svneg_m (z0, p0, z0)) -+ -+/* -+** neg_s8_m_tied1: -+** neg z0\.b, p0/m, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (neg_s8_m_tied1, svint8_t, -+ z0 = svneg_s8_m (z0, p0, z1), -+ z0 = svneg_m (z0, p0, z1)) -+ -+/* -+** neg_s8_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** neg z0\.b, p0/m, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (neg_s8_m_tied2, svint8_t, -+ z0 = svneg_s8_m (z1, p0, z0), -+ z0 = svneg_m (z1, p0, z0)) -+ -+/* -+** neg_s8_m_untied: -+** movprfx z0, z2 -+** neg z0\.b, p0/m, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (neg_s8_m_untied, svint8_t, -+ z0 = svneg_s8_m (z2, p0, z1), -+ z0 = svneg_m (z2, p0, z1)) -+ -+/* -+** neg_s8_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.b, p0/z, \1\.b -+** neg z0\.b, p0/m, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (neg_s8_z_tied1, svint8_t, -+ z0 = svneg_s8_z (p0, z0), -+ z0 = svneg_z (p0, z0)) -+ -+/* -+** neg_s8_z_untied: -+** movprfx z0\.b, p0/z, z1\.b -+** neg z0\.b, p0/m, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (neg_s8_z_untied, svint8_t, -+ z0 = svneg_s8_z (p0, z1), -+ z0 = svneg_z (p0, z1)) -+ -+/* -+** neg_s8_x_tied1: -+** neg z0\.b, p0/m, z0\.b -+** ret -+*/ -+TEST_UNIFORM_Z (neg_s8_x_tied1, svint8_t, -+ z0 = svneg_s8_x (p0, z0), -+ z0 = svneg_x (p0, z0)) -+ -+/* -+** neg_s8_x_untied: -+** neg z0\.b, p0/m, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (neg_s8_x_untied, svint8_t, -+ z0 = svneg_s8_x (p0, z1), -+ z0 = svneg_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmad_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmad_f16.c -new file mode 100644 -index 000000000..abfe0a0c0 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmad_f16.c -@@ -0,0 +1,398 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** nmad_f16_m_tied1: -+** fnmad z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_f16_m_tied1, svfloat16_t, -+ z0 = svnmad_f16_m (p0, z0, z1, z2), -+ z0 = svnmad_m (p0, z0, z1, z2)) -+ -+/* -+** nmad_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fnmad z0\.h, p0/m, \1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_f16_m_tied2, svfloat16_t, -+ z0 = svnmad_f16_m (p0, z1, z0, z2), -+ z0 = svnmad_m (p0, z1, z0, z2)) -+ -+/* -+** nmad_f16_m_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fnmad z0\.h, p0/m, z2\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_f16_m_tied3, svfloat16_t, -+ z0 = svnmad_f16_m (p0, z1, z2, z0), -+ z0 = svnmad_m (p0, z1, z2, z0)) -+ -+/* -+** nmad_f16_m_untied: -+** movprfx z0, z1 -+** fnmad z0\.h, p0/m, z2\.h, z3\.h -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_f16_m_untied, svfloat16_t, -+ z0 = svnmad_f16_m (p0, z1, z2, z3), -+ z0 = svnmad_m (p0, z1, z2, z3)) -+ -+/* -+** nmad_h4_f16_m_tied1: -+** mov (z[0-9]+\.h), h4 -+** fnmad z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmad_h4_f16_m_tied1, svfloat16_t, __fp16, -+ z0 = svnmad_n_f16_m (p0, z0, z1, d4), -+ z0 = svnmad_m (p0, z0, z1, d4)) -+ -+/* -+** nmad_h4_f16_m_untied: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0, z1 -+** fnmad z0\.h, p0/m, z2\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmad_h4_f16_m_untied, svfloat16_t, __fp16, -+ z0 = svnmad_n_f16_m (p0, z1, z2, d4), -+ z0 = svnmad_m (p0, z1, z2, d4)) -+ -+/* -+** nmad_2_f16_m_tied1: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** fnmad z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_2_f16_m_tied1, svfloat16_t, -+ z0 = svnmad_n_f16_m (p0, z0, z1, 2), -+ z0 = svnmad_m (p0, z0, z1, 2)) -+ -+/* -+** nmad_2_f16_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** movprfx z0, z1 -+** fnmad z0\.h, p0/m, z2\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_2_f16_m_untied, svfloat16_t, -+ z0 = svnmad_n_f16_m (p0, z1, z2, 2), -+ z0 = svnmad_m (p0, z1, z2, 2)) -+ -+/* -+** nmad_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fnmad z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_f16_z_tied1, svfloat16_t, -+ z0 = svnmad_f16_z (p0, z0, z1, z2), -+ z0 = svnmad_z (p0, z0, z1, z2)) -+ -+/* -+** nmad_f16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** fnmad z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_f16_z_tied2, svfloat16_t, -+ z0 = svnmad_f16_z (p0, z1, z0, z2), -+ z0 = svnmad_z (p0, z1, z0, z2)) -+ -+/* -+** nmad_f16_z_tied3: -+** movprfx z0\.h, p0/z, z0\.h -+** fnmla z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_f16_z_tied3, svfloat16_t, -+ z0 = svnmad_f16_z (p0, z1, z2, z0), -+ z0 = svnmad_z (p0, z1, z2, z0)) -+ -+/* -+** nmad_f16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fnmad z0\.h, p0/m, z2\.h, z3\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** fnmad z0\.h, p0/m, z1\.h, z3\.h -+** | -+** movprfx z0\.h, p0/z, z3\.h -+** fnmla z0\.h, p0/m, z1\.h, z2\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_f16_z_untied, svfloat16_t, -+ z0 = svnmad_f16_z (p0, z1, z2, z3), -+ z0 = svnmad_z (p0, z1, z2, z3)) -+ -+/* -+** nmad_h4_f16_z_tied1: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0\.h, p0/z, z0\.h -+** fnmad z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmad_h4_f16_z_tied1, svfloat16_t, __fp16, -+ z0 = svnmad_n_f16_z (p0, z0, z1, d4), -+ z0 = svnmad_z (p0, z0, z1, d4)) -+ -+/* -+** nmad_h4_f16_z_tied2: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0\.h, p0/z, z0\.h -+** fnmad z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmad_h4_f16_z_tied2, svfloat16_t, __fp16, -+ z0 = svnmad_n_f16_z (p0, z1, z0, d4), -+ z0 = svnmad_z (p0, z1, z0, d4)) -+ -+/* -+** nmad_h4_f16_z_untied: -+** mov (z[0-9]+\.h), h4 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fnmad z0\.h, p0/m, z2\.h, \1 -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** fnmad z0\.h, p0/m, z1\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** fnmla z0\.h, p0/m, z1\.h, z2\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (nmad_h4_f16_z_untied, svfloat16_t, __fp16, -+ z0 = svnmad_n_f16_z (p0, z1, z2, d4), -+ z0 = svnmad_z (p0, z1, z2, d4)) -+ -+/* -+** nmad_2_f16_z_tied1: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** movprfx z0\.h, p0/z, z0\.h -+** fnmad z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_2_f16_z_tied1, svfloat16_t, -+ z0 = svnmad_n_f16_z (p0, z0, z1, 2), -+ z0 = svnmad_z (p0, z0, z1, 2)) -+ -+/* -+** nmad_2_f16_z_tied2: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** movprfx z0\.h, p0/z, z0\.h -+** fnmad z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_2_f16_z_tied2, svfloat16_t, -+ z0 = svnmad_n_f16_z (p0, z1, z0, 2), -+ z0 = svnmad_z (p0, z1, z0, 2)) -+ -+/* -+** nmad_2_f16_z_untied: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fnmad z0\.h, p0/m, z2\.h, \1 -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** fnmad z0\.h, p0/m, z1\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** fnmla z0\.h, p0/m, z1\.h, z2\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_2_f16_z_untied, svfloat16_t, -+ z0 = svnmad_n_f16_z (p0, z1, z2, 2), -+ z0 = svnmad_z (p0, z1, z2, 2)) -+ -+/* -+** nmad_f16_x_tied1: -+** fnmad z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_f16_x_tied1, svfloat16_t, -+ z0 = svnmad_f16_x (p0, z0, z1, z2), -+ z0 = svnmad_x (p0, z0, z1, z2)) -+ -+/* -+** nmad_f16_x_tied2: -+** fnmad z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_f16_x_tied2, svfloat16_t, -+ z0 = svnmad_f16_x (p0, z1, z0, z2), -+ z0 = svnmad_x (p0, z1, z0, z2)) -+ -+/* -+** nmad_f16_x_tied3: -+** fnmla z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_f16_x_tied3, svfloat16_t, -+ z0 = svnmad_f16_x (p0, z1, z2, z0), -+ z0 = svnmad_x (p0, z1, z2, z0)) -+ -+/* -+** nmad_f16_x_untied: -+** ( -+** movprfx z0, z1 -+** fnmad z0\.h, p0/m, z2\.h, z3\.h -+** | -+** movprfx z0, z2 -+** fnmad z0\.h, p0/m, z1\.h, z3\.h -+** | -+** movprfx z0, z3 -+** fnmla z0\.h, p0/m, z1\.h, z2\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_f16_x_untied, svfloat16_t, -+ z0 = svnmad_f16_x (p0, z1, z2, z3), -+ z0 = svnmad_x (p0, z1, z2, z3)) -+ -+/* -+** nmad_h4_f16_x_tied1: -+** mov (z[0-9]+\.h), h4 -+** fnmad z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmad_h4_f16_x_tied1, svfloat16_t, __fp16, -+ z0 = svnmad_n_f16_x (p0, z0, z1, d4), -+ z0 = svnmad_x (p0, z0, z1, d4)) -+ -+/* -+** nmad_h4_f16_x_tied2: -+** mov (z[0-9]+\.h), h4 -+** fnmad z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmad_h4_f16_x_tied2, svfloat16_t, __fp16, -+ z0 = svnmad_n_f16_x (p0, z1, z0, d4), -+ z0 = svnmad_x (p0, z1, z0, d4)) -+ -+/* -+** nmad_h4_f16_x_untied: { xfail *-*-* } -+** mov z0\.h, h4 -+** fnmla z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_ZD (nmad_h4_f16_x_untied, svfloat16_t, __fp16, -+ z0 = svnmad_n_f16_x (p0, z1, z2, d4), -+ z0 = svnmad_x (p0, z1, z2, d4)) -+ -+/* -+** nmad_2_f16_x_tied1: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** fnmad z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_2_f16_x_tied1, svfloat16_t, -+ z0 = svnmad_n_f16_x (p0, z0, z1, 2), -+ z0 = svnmad_x (p0, z0, z1, 2)) -+ -+/* -+** nmad_2_f16_x_tied2: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** fnmad z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_2_f16_x_tied2, svfloat16_t, -+ z0 = svnmad_n_f16_x (p0, z1, z0, 2), -+ z0 = svnmad_x (p0, z1, z0, 2)) -+ -+/* -+** nmad_2_f16_x_untied: -+** fmov z0\.h, #2\.0(?:e\+0)? -+** fnmla z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_2_f16_x_untied, svfloat16_t, -+ z0 = svnmad_n_f16_x (p0, z1, z2, 2), -+ z0 = svnmad_x (p0, z1, z2, 2)) -+ -+/* -+** ptrue_nmad_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmad_f16_x_tied1, svfloat16_t, -+ z0 = svnmad_f16_x (svptrue_b16 (), z0, z1, z2), -+ z0 = svnmad_x (svptrue_b16 (), z0, z1, z2)) -+ -+/* -+** ptrue_nmad_f16_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmad_f16_x_tied2, svfloat16_t, -+ z0 = svnmad_f16_x (svptrue_b16 (), z1, z0, z2), -+ z0 = svnmad_x (svptrue_b16 (), z1, z0, z2)) -+ -+/* -+** ptrue_nmad_f16_x_tied3: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmad_f16_x_tied3, svfloat16_t, -+ z0 = svnmad_f16_x (svptrue_b16 (), z1, z2, z0), -+ z0 = svnmad_x (svptrue_b16 (), z1, z2, z0)) -+ -+/* -+** ptrue_nmad_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmad_f16_x_untied, svfloat16_t, -+ z0 = svnmad_f16_x (svptrue_b16 (), z1, z2, z3), -+ z0 = svnmad_x (svptrue_b16 (), z1, z2, z3)) -+ -+/* -+** ptrue_nmad_2_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmad_2_f16_x_tied1, svfloat16_t, -+ z0 = svnmad_n_f16_x (svptrue_b16 (), z0, z1, 2), -+ z0 = svnmad_x (svptrue_b16 (), z0, z1, 2)) -+ -+/* -+** ptrue_nmad_2_f16_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmad_2_f16_x_tied2, svfloat16_t, -+ z0 = svnmad_n_f16_x (svptrue_b16 (), z1, z0, 2), -+ z0 = svnmad_x (svptrue_b16 (), z1, z0, 2)) -+ -+/* -+** ptrue_nmad_2_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmad_2_f16_x_untied, svfloat16_t, -+ z0 = svnmad_n_f16_x (svptrue_b16 (), z1, z2, 2), -+ z0 = svnmad_x (svptrue_b16 (), z1, z2, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmad_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmad_f32.c -new file mode 100644 -index 000000000..ab86385c3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmad_f32.c -@@ -0,0 +1,398 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** nmad_f32_m_tied1: -+** fnmad z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_f32_m_tied1, svfloat32_t, -+ z0 = svnmad_f32_m (p0, z0, z1, z2), -+ z0 = svnmad_m (p0, z0, z1, z2)) -+ -+/* -+** nmad_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fnmad z0\.s, p0/m, \1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_f32_m_tied2, svfloat32_t, -+ z0 = svnmad_f32_m (p0, z1, z0, z2), -+ z0 = svnmad_m (p0, z1, z0, z2)) -+ -+/* -+** nmad_f32_m_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fnmad z0\.s, p0/m, z2\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_f32_m_tied3, svfloat32_t, -+ z0 = svnmad_f32_m (p0, z1, z2, z0), -+ z0 = svnmad_m (p0, z1, z2, z0)) -+ -+/* -+** nmad_f32_m_untied: -+** movprfx z0, z1 -+** fnmad z0\.s, p0/m, z2\.s, z3\.s -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_f32_m_untied, svfloat32_t, -+ z0 = svnmad_f32_m (p0, z1, z2, z3), -+ z0 = svnmad_m (p0, z1, z2, z3)) -+ -+/* -+** nmad_s4_f32_m_tied1: -+** mov (z[0-9]+\.s), s4 -+** fnmad z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmad_s4_f32_m_tied1, svfloat32_t, float, -+ z0 = svnmad_n_f32_m (p0, z0, z1, d4), -+ z0 = svnmad_m (p0, z0, z1, d4)) -+ -+/* -+** nmad_s4_f32_m_untied: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0, z1 -+** fnmad z0\.s, p0/m, z2\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmad_s4_f32_m_untied, svfloat32_t, float, -+ z0 = svnmad_n_f32_m (p0, z1, z2, d4), -+ z0 = svnmad_m (p0, z1, z2, d4)) -+ -+/* -+** nmad_2_f32_m_tied1: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** fnmad z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_2_f32_m_tied1, svfloat32_t, -+ z0 = svnmad_n_f32_m (p0, z0, z1, 2), -+ z0 = svnmad_m (p0, z0, z1, 2)) -+ -+/* -+** nmad_2_f32_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** movprfx z0, z1 -+** fnmad z0\.s, p0/m, z2\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_2_f32_m_untied, svfloat32_t, -+ z0 = svnmad_n_f32_m (p0, z1, z2, 2), -+ z0 = svnmad_m (p0, z1, z2, 2)) -+ -+/* -+** nmad_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fnmad z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_f32_z_tied1, svfloat32_t, -+ z0 = svnmad_f32_z (p0, z0, z1, z2), -+ z0 = svnmad_z (p0, z0, z1, z2)) -+ -+/* -+** nmad_f32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** fnmad z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_f32_z_tied2, svfloat32_t, -+ z0 = svnmad_f32_z (p0, z1, z0, z2), -+ z0 = svnmad_z (p0, z1, z0, z2)) -+ -+/* -+** nmad_f32_z_tied3: -+** movprfx z0\.s, p0/z, z0\.s -+** fnmla z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_f32_z_tied3, svfloat32_t, -+ z0 = svnmad_f32_z (p0, z1, z2, z0), -+ z0 = svnmad_z (p0, z1, z2, z0)) -+ -+/* -+** nmad_f32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fnmad z0\.s, p0/m, z2\.s, z3\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** fnmad z0\.s, p0/m, z1\.s, z3\.s -+** | -+** movprfx z0\.s, p0/z, z3\.s -+** fnmla z0\.s, p0/m, z1\.s, z2\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_f32_z_untied, svfloat32_t, -+ z0 = svnmad_f32_z (p0, z1, z2, z3), -+ z0 = svnmad_z (p0, z1, z2, z3)) -+ -+/* -+** nmad_s4_f32_z_tied1: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0\.s, p0/z, z0\.s -+** fnmad z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmad_s4_f32_z_tied1, svfloat32_t, float, -+ z0 = svnmad_n_f32_z (p0, z0, z1, d4), -+ z0 = svnmad_z (p0, z0, z1, d4)) -+ -+/* -+** nmad_s4_f32_z_tied2: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0\.s, p0/z, z0\.s -+** fnmad z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmad_s4_f32_z_tied2, svfloat32_t, float, -+ z0 = svnmad_n_f32_z (p0, z1, z0, d4), -+ z0 = svnmad_z (p0, z1, z0, d4)) -+ -+/* -+** nmad_s4_f32_z_untied: -+** mov (z[0-9]+\.s), s4 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fnmad z0\.s, p0/m, z2\.s, \1 -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** fnmad z0\.s, p0/m, z1\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** fnmla z0\.s, p0/m, z1\.s, z2\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (nmad_s4_f32_z_untied, svfloat32_t, float, -+ z0 = svnmad_n_f32_z (p0, z1, z2, d4), -+ z0 = svnmad_z (p0, z1, z2, d4)) -+ -+/* -+** nmad_2_f32_z_tied1: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** movprfx z0\.s, p0/z, z0\.s -+** fnmad z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_2_f32_z_tied1, svfloat32_t, -+ z0 = svnmad_n_f32_z (p0, z0, z1, 2), -+ z0 = svnmad_z (p0, z0, z1, 2)) -+ -+/* -+** nmad_2_f32_z_tied2: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** movprfx z0\.s, p0/z, z0\.s -+** fnmad z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_2_f32_z_tied2, svfloat32_t, -+ z0 = svnmad_n_f32_z (p0, z1, z0, 2), -+ z0 = svnmad_z (p0, z1, z0, 2)) -+ -+/* -+** nmad_2_f32_z_untied: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fnmad z0\.s, p0/m, z2\.s, \1 -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** fnmad z0\.s, p0/m, z1\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** fnmla z0\.s, p0/m, z1\.s, z2\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_2_f32_z_untied, svfloat32_t, -+ z0 = svnmad_n_f32_z (p0, z1, z2, 2), -+ z0 = svnmad_z (p0, z1, z2, 2)) -+ -+/* -+** nmad_f32_x_tied1: -+** fnmad z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_f32_x_tied1, svfloat32_t, -+ z0 = svnmad_f32_x (p0, z0, z1, z2), -+ z0 = svnmad_x (p0, z0, z1, z2)) -+ -+/* -+** nmad_f32_x_tied2: -+** fnmad z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_f32_x_tied2, svfloat32_t, -+ z0 = svnmad_f32_x (p0, z1, z0, z2), -+ z0 = svnmad_x (p0, z1, z0, z2)) -+ -+/* -+** nmad_f32_x_tied3: -+** fnmla z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_f32_x_tied3, svfloat32_t, -+ z0 = svnmad_f32_x (p0, z1, z2, z0), -+ z0 = svnmad_x (p0, z1, z2, z0)) -+ -+/* -+** nmad_f32_x_untied: -+** ( -+** movprfx z0, z1 -+** fnmad z0\.s, p0/m, z2\.s, z3\.s -+** | -+** movprfx z0, z2 -+** fnmad z0\.s, p0/m, z1\.s, z3\.s -+** | -+** movprfx z0, z3 -+** fnmla z0\.s, p0/m, z1\.s, z2\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_f32_x_untied, svfloat32_t, -+ z0 = svnmad_f32_x (p0, z1, z2, z3), -+ z0 = svnmad_x (p0, z1, z2, z3)) -+ -+/* -+** nmad_s4_f32_x_tied1: -+** mov (z[0-9]+\.s), s4 -+** fnmad z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmad_s4_f32_x_tied1, svfloat32_t, float, -+ z0 = svnmad_n_f32_x (p0, z0, z1, d4), -+ z0 = svnmad_x (p0, z0, z1, d4)) -+ -+/* -+** nmad_s4_f32_x_tied2: -+** mov (z[0-9]+\.s), s4 -+** fnmad z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmad_s4_f32_x_tied2, svfloat32_t, float, -+ z0 = svnmad_n_f32_x (p0, z1, z0, d4), -+ z0 = svnmad_x (p0, z1, z0, d4)) -+ -+/* -+** nmad_s4_f32_x_untied: { xfail *-*-* } -+** mov z0\.s, s4 -+** fnmla z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_ZD (nmad_s4_f32_x_untied, svfloat32_t, float, -+ z0 = svnmad_n_f32_x (p0, z1, z2, d4), -+ z0 = svnmad_x (p0, z1, z2, d4)) -+ -+/* -+** nmad_2_f32_x_tied1: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** fnmad z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_2_f32_x_tied1, svfloat32_t, -+ z0 = svnmad_n_f32_x (p0, z0, z1, 2), -+ z0 = svnmad_x (p0, z0, z1, 2)) -+ -+/* -+** nmad_2_f32_x_tied2: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** fnmad z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_2_f32_x_tied2, svfloat32_t, -+ z0 = svnmad_n_f32_x (p0, z1, z0, 2), -+ z0 = svnmad_x (p0, z1, z0, 2)) -+ -+/* -+** nmad_2_f32_x_untied: -+** fmov z0\.s, #2\.0(?:e\+0)? -+** fnmla z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_2_f32_x_untied, svfloat32_t, -+ z0 = svnmad_n_f32_x (p0, z1, z2, 2), -+ z0 = svnmad_x (p0, z1, z2, 2)) -+ -+/* -+** ptrue_nmad_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmad_f32_x_tied1, svfloat32_t, -+ z0 = svnmad_f32_x (svptrue_b32 (), z0, z1, z2), -+ z0 = svnmad_x (svptrue_b32 (), z0, z1, z2)) -+ -+/* -+** ptrue_nmad_f32_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmad_f32_x_tied2, svfloat32_t, -+ z0 = svnmad_f32_x (svptrue_b32 (), z1, z0, z2), -+ z0 = svnmad_x (svptrue_b32 (), z1, z0, z2)) -+ -+/* -+** ptrue_nmad_f32_x_tied3: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmad_f32_x_tied3, svfloat32_t, -+ z0 = svnmad_f32_x (svptrue_b32 (), z1, z2, z0), -+ z0 = svnmad_x (svptrue_b32 (), z1, z2, z0)) -+ -+/* -+** ptrue_nmad_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmad_f32_x_untied, svfloat32_t, -+ z0 = svnmad_f32_x (svptrue_b32 (), z1, z2, z3), -+ z0 = svnmad_x (svptrue_b32 (), z1, z2, z3)) -+ -+/* -+** ptrue_nmad_2_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmad_2_f32_x_tied1, svfloat32_t, -+ z0 = svnmad_n_f32_x (svptrue_b32 (), z0, z1, 2), -+ z0 = svnmad_x (svptrue_b32 (), z0, z1, 2)) -+ -+/* -+** ptrue_nmad_2_f32_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmad_2_f32_x_tied2, svfloat32_t, -+ z0 = svnmad_n_f32_x (svptrue_b32 (), z1, z0, 2), -+ z0 = svnmad_x (svptrue_b32 (), z1, z0, 2)) -+ -+/* -+** ptrue_nmad_2_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmad_2_f32_x_untied, svfloat32_t, -+ z0 = svnmad_n_f32_x (svptrue_b32 (), z1, z2, 2), -+ z0 = svnmad_x (svptrue_b32 (), z1, z2, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmad_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmad_f64.c -new file mode 100644 -index 000000000..c236ff5a1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmad_f64.c -@@ -0,0 +1,398 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** nmad_f64_m_tied1: -+** fnmad z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_f64_m_tied1, svfloat64_t, -+ z0 = svnmad_f64_m (p0, z0, z1, z2), -+ z0 = svnmad_m (p0, z0, z1, z2)) -+ -+/* -+** nmad_f64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fnmad z0\.d, p0/m, \1, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_f64_m_tied2, svfloat64_t, -+ z0 = svnmad_f64_m (p0, z1, z0, z2), -+ z0 = svnmad_m (p0, z1, z0, z2)) -+ -+/* -+** nmad_f64_m_tied3: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fnmad z0\.d, p0/m, z2\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_f64_m_tied3, svfloat64_t, -+ z0 = svnmad_f64_m (p0, z1, z2, z0), -+ z0 = svnmad_m (p0, z1, z2, z0)) -+ -+/* -+** nmad_f64_m_untied: -+** movprfx z0, z1 -+** fnmad z0\.d, p0/m, z2\.d, z3\.d -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_f64_m_untied, svfloat64_t, -+ z0 = svnmad_f64_m (p0, z1, z2, z3), -+ z0 = svnmad_m (p0, z1, z2, z3)) -+ -+/* -+** nmad_d4_f64_m_tied1: -+** mov (z[0-9]+\.d), d4 -+** fnmad z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmad_d4_f64_m_tied1, svfloat64_t, double, -+ z0 = svnmad_n_f64_m (p0, z0, z1, d4), -+ z0 = svnmad_m (p0, z0, z1, d4)) -+ -+/* -+** nmad_d4_f64_m_untied: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0, z1 -+** fnmad z0\.d, p0/m, z2\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmad_d4_f64_m_untied, svfloat64_t, double, -+ z0 = svnmad_n_f64_m (p0, z1, z2, d4), -+ z0 = svnmad_m (p0, z1, z2, d4)) -+ -+/* -+** nmad_2_f64_m_tied1: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** fnmad z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_2_f64_m_tied1, svfloat64_t, -+ z0 = svnmad_n_f64_m (p0, z0, z1, 2), -+ z0 = svnmad_m (p0, z0, z1, 2)) -+ -+/* -+** nmad_2_f64_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** movprfx z0, z1 -+** fnmad z0\.d, p0/m, z2\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_2_f64_m_untied, svfloat64_t, -+ z0 = svnmad_n_f64_m (p0, z1, z2, 2), -+ z0 = svnmad_m (p0, z1, z2, 2)) -+ -+/* -+** nmad_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fnmad z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_f64_z_tied1, svfloat64_t, -+ z0 = svnmad_f64_z (p0, z0, z1, z2), -+ z0 = svnmad_z (p0, z0, z1, z2)) -+ -+/* -+** nmad_f64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** fnmad z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_f64_z_tied2, svfloat64_t, -+ z0 = svnmad_f64_z (p0, z1, z0, z2), -+ z0 = svnmad_z (p0, z1, z0, z2)) -+ -+/* -+** nmad_f64_z_tied3: -+** movprfx z0\.d, p0/z, z0\.d -+** fnmla z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_f64_z_tied3, svfloat64_t, -+ z0 = svnmad_f64_z (p0, z1, z2, z0), -+ z0 = svnmad_z (p0, z1, z2, z0)) -+ -+/* -+** nmad_f64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fnmad z0\.d, p0/m, z2\.d, z3\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** fnmad z0\.d, p0/m, z1\.d, z3\.d -+** | -+** movprfx z0\.d, p0/z, z3\.d -+** fnmla z0\.d, p0/m, z1\.d, z2\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_f64_z_untied, svfloat64_t, -+ z0 = svnmad_f64_z (p0, z1, z2, z3), -+ z0 = svnmad_z (p0, z1, z2, z3)) -+ -+/* -+** nmad_d4_f64_z_tied1: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0\.d, p0/z, z0\.d -+** fnmad z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmad_d4_f64_z_tied1, svfloat64_t, double, -+ z0 = svnmad_n_f64_z (p0, z0, z1, d4), -+ z0 = svnmad_z (p0, z0, z1, d4)) -+ -+/* -+** nmad_d4_f64_z_tied2: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0\.d, p0/z, z0\.d -+** fnmad z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmad_d4_f64_z_tied2, svfloat64_t, double, -+ z0 = svnmad_n_f64_z (p0, z1, z0, d4), -+ z0 = svnmad_z (p0, z1, z0, d4)) -+ -+/* -+** nmad_d4_f64_z_untied: -+** mov (z[0-9]+\.d), d4 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fnmad z0\.d, p0/m, z2\.d, \1 -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** fnmad z0\.d, p0/m, z1\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** fnmla z0\.d, p0/m, z1\.d, z2\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (nmad_d4_f64_z_untied, svfloat64_t, double, -+ z0 = svnmad_n_f64_z (p0, z1, z2, d4), -+ z0 = svnmad_z (p0, z1, z2, d4)) -+ -+/* -+** nmad_2_f64_z_tied1: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** movprfx z0\.d, p0/z, z0\.d -+** fnmad z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_2_f64_z_tied1, svfloat64_t, -+ z0 = svnmad_n_f64_z (p0, z0, z1, 2), -+ z0 = svnmad_z (p0, z0, z1, 2)) -+ -+/* -+** nmad_2_f64_z_tied2: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** movprfx z0\.d, p0/z, z0\.d -+** fnmad z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_2_f64_z_tied2, svfloat64_t, -+ z0 = svnmad_n_f64_z (p0, z1, z0, 2), -+ z0 = svnmad_z (p0, z1, z0, 2)) -+ -+/* -+** nmad_2_f64_z_untied: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fnmad z0\.d, p0/m, z2\.d, \1 -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** fnmad z0\.d, p0/m, z1\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** fnmla z0\.d, p0/m, z1\.d, z2\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_2_f64_z_untied, svfloat64_t, -+ z0 = svnmad_n_f64_z (p0, z1, z2, 2), -+ z0 = svnmad_z (p0, z1, z2, 2)) -+ -+/* -+** nmad_f64_x_tied1: -+** fnmad z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_f64_x_tied1, svfloat64_t, -+ z0 = svnmad_f64_x (p0, z0, z1, z2), -+ z0 = svnmad_x (p0, z0, z1, z2)) -+ -+/* -+** nmad_f64_x_tied2: -+** fnmad z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_f64_x_tied2, svfloat64_t, -+ z0 = svnmad_f64_x (p0, z1, z0, z2), -+ z0 = svnmad_x (p0, z1, z0, z2)) -+ -+/* -+** nmad_f64_x_tied3: -+** fnmla z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_f64_x_tied3, svfloat64_t, -+ z0 = svnmad_f64_x (p0, z1, z2, z0), -+ z0 = svnmad_x (p0, z1, z2, z0)) -+ -+/* -+** nmad_f64_x_untied: -+** ( -+** movprfx z0, z1 -+** fnmad z0\.d, p0/m, z2\.d, z3\.d -+** | -+** movprfx z0, z2 -+** fnmad z0\.d, p0/m, z1\.d, z3\.d -+** | -+** movprfx z0, z3 -+** fnmla z0\.d, p0/m, z1\.d, z2\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_f64_x_untied, svfloat64_t, -+ z0 = svnmad_f64_x (p0, z1, z2, z3), -+ z0 = svnmad_x (p0, z1, z2, z3)) -+ -+/* -+** nmad_d4_f64_x_tied1: -+** mov (z[0-9]+\.d), d4 -+** fnmad z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmad_d4_f64_x_tied1, svfloat64_t, double, -+ z0 = svnmad_n_f64_x (p0, z0, z1, d4), -+ z0 = svnmad_x (p0, z0, z1, d4)) -+ -+/* -+** nmad_d4_f64_x_tied2: -+** mov (z[0-9]+\.d), d4 -+** fnmad z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmad_d4_f64_x_tied2, svfloat64_t, double, -+ z0 = svnmad_n_f64_x (p0, z1, z0, d4), -+ z0 = svnmad_x (p0, z1, z0, d4)) -+ -+/* -+** nmad_d4_f64_x_untied: { xfail *-*-* } -+** mov z0\.d, d4 -+** fnmla z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_ZD (nmad_d4_f64_x_untied, svfloat64_t, double, -+ z0 = svnmad_n_f64_x (p0, z1, z2, d4), -+ z0 = svnmad_x (p0, z1, z2, d4)) -+ -+/* -+** nmad_2_f64_x_tied1: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** fnmad z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_2_f64_x_tied1, svfloat64_t, -+ z0 = svnmad_n_f64_x (p0, z0, z1, 2), -+ z0 = svnmad_x (p0, z0, z1, 2)) -+ -+/* -+** nmad_2_f64_x_tied2: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** fnmad z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_2_f64_x_tied2, svfloat64_t, -+ z0 = svnmad_n_f64_x (p0, z1, z0, 2), -+ z0 = svnmad_x (p0, z1, z0, 2)) -+ -+/* -+** nmad_2_f64_x_untied: -+** fmov z0\.d, #2\.0(?:e\+0)? -+** fnmla z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (nmad_2_f64_x_untied, svfloat64_t, -+ z0 = svnmad_n_f64_x (p0, z1, z2, 2), -+ z0 = svnmad_x (p0, z1, z2, 2)) -+ -+/* -+** ptrue_nmad_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmad_f64_x_tied1, svfloat64_t, -+ z0 = svnmad_f64_x (svptrue_b64 (), z0, z1, z2), -+ z0 = svnmad_x (svptrue_b64 (), z0, z1, z2)) -+ -+/* -+** ptrue_nmad_f64_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmad_f64_x_tied2, svfloat64_t, -+ z0 = svnmad_f64_x (svptrue_b64 (), z1, z0, z2), -+ z0 = svnmad_x (svptrue_b64 (), z1, z0, z2)) -+ -+/* -+** ptrue_nmad_f64_x_tied3: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmad_f64_x_tied3, svfloat64_t, -+ z0 = svnmad_f64_x (svptrue_b64 (), z1, z2, z0), -+ z0 = svnmad_x (svptrue_b64 (), z1, z2, z0)) -+ -+/* -+** ptrue_nmad_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmad_f64_x_untied, svfloat64_t, -+ z0 = svnmad_f64_x (svptrue_b64 (), z1, z2, z3), -+ z0 = svnmad_x (svptrue_b64 (), z1, z2, z3)) -+ -+/* -+** ptrue_nmad_2_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmad_2_f64_x_tied1, svfloat64_t, -+ z0 = svnmad_n_f64_x (svptrue_b64 (), z0, z1, 2), -+ z0 = svnmad_x (svptrue_b64 (), z0, z1, 2)) -+ -+/* -+** ptrue_nmad_2_f64_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmad_2_f64_x_tied2, svfloat64_t, -+ z0 = svnmad_n_f64_x (svptrue_b64 (), z1, z0, 2), -+ z0 = svnmad_x (svptrue_b64 (), z1, z0, 2)) -+ -+/* -+** ptrue_nmad_2_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmad_2_f64_x_untied, svfloat64_t, -+ z0 = svnmad_n_f64_x (svptrue_b64 (), z1, z2, 2), -+ z0 = svnmad_x (svptrue_b64 (), z1, z2, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmla_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmla_f16.c -new file mode 100644 -index 000000000..f7ac377fd ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmla_f16.c -@@ -0,0 +1,398 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** nmla_f16_m_tied1: -+** fnmla z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_f16_m_tied1, svfloat16_t, -+ z0 = svnmla_f16_m (p0, z0, z1, z2), -+ z0 = svnmla_m (p0, z0, z1, z2)) -+ -+/* -+** nmla_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fnmla z0\.h, p0/m, \1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_f16_m_tied2, svfloat16_t, -+ z0 = svnmla_f16_m (p0, z1, z0, z2), -+ z0 = svnmla_m (p0, z1, z0, z2)) -+ -+/* -+** nmla_f16_m_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fnmla z0\.h, p0/m, z2\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_f16_m_tied3, svfloat16_t, -+ z0 = svnmla_f16_m (p0, z1, z2, z0), -+ z0 = svnmla_m (p0, z1, z2, z0)) -+ -+/* -+** nmla_f16_m_untied: -+** movprfx z0, z1 -+** fnmla z0\.h, p0/m, z2\.h, z3\.h -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_f16_m_untied, svfloat16_t, -+ z0 = svnmla_f16_m (p0, z1, z2, z3), -+ z0 = svnmla_m (p0, z1, z2, z3)) -+ -+/* -+** nmla_h4_f16_m_tied1: -+** mov (z[0-9]+\.h), h4 -+** fnmla z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmla_h4_f16_m_tied1, svfloat16_t, __fp16, -+ z0 = svnmla_n_f16_m (p0, z0, z1, d4), -+ z0 = svnmla_m (p0, z0, z1, d4)) -+ -+/* -+** nmla_h4_f16_m_untied: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0, z1 -+** fnmla z0\.h, p0/m, z2\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmla_h4_f16_m_untied, svfloat16_t, __fp16, -+ z0 = svnmla_n_f16_m (p0, z1, z2, d4), -+ z0 = svnmla_m (p0, z1, z2, d4)) -+ -+/* -+** nmla_2_f16_m_tied1: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** fnmla z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_2_f16_m_tied1, svfloat16_t, -+ z0 = svnmla_n_f16_m (p0, z0, z1, 2), -+ z0 = svnmla_m (p0, z0, z1, 2)) -+ -+/* -+** nmla_2_f16_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** movprfx z0, z1 -+** fnmla z0\.h, p0/m, z2\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_2_f16_m_untied, svfloat16_t, -+ z0 = svnmla_n_f16_m (p0, z1, z2, 2), -+ z0 = svnmla_m (p0, z1, z2, 2)) -+ -+/* -+** nmla_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fnmla z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_f16_z_tied1, svfloat16_t, -+ z0 = svnmla_f16_z (p0, z0, z1, z2), -+ z0 = svnmla_z (p0, z0, z1, z2)) -+ -+/* -+** nmla_f16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** fnmad z0\.h, p0/m, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_f16_z_tied2, svfloat16_t, -+ z0 = svnmla_f16_z (p0, z1, z0, z2), -+ z0 = svnmla_z (p0, z1, z0, z2)) -+ -+/* -+** nmla_f16_z_tied3: -+** movprfx z0\.h, p0/z, z0\.h -+** fnmad z0\.h, p0/m, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_f16_z_tied3, svfloat16_t, -+ z0 = svnmla_f16_z (p0, z1, z2, z0), -+ z0 = svnmla_z (p0, z1, z2, z0)) -+ -+/* -+** nmla_f16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fnmla z0\.h, p0/m, z2\.h, z3\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** fnmad z0\.h, p0/m, z3\.h, z1\.h -+** | -+** movprfx z0\.h, p0/z, z3\.h -+** fnmad z0\.h, p0/m, z2\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_f16_z_untied, svfloat16_t, -+ z0 = svnmla_f16_z (p0, z1, z2, z3), -+ z0 = svnmla_z (p0, z1, z2, z3)) -+ -+/* -+** nmla_h4_f16_z_tied1: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0\.h, p0/z, z0\.h -+** fnmla z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmla_h4_f16_z_tied1, svfloat16_t, __fp16, -+ z0 = svnmla_n_f16_z (p0, z0, z1, d4), -+ z0 = svnmla_z (p0, z0, z1, d4)) -+ -+/* -+** nmla_h4_f16_z_tied2: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0\.h, p0/z, z0\.h -+** fnmad z0\.h, p0/m, \1, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZD (nmla_h4_f16_z_tied2, svfloat16_t, __fp16, -+ z0 = svnmla_n_f16_z (p0, z1, z0, d4), -+ z0 = svnmla_z (p0, z1, z0, d4)) -+ -+/* -+** nmla_h4_f16_z_untied: -+** mov (z[0-9]+\.h), h4 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fnmla z0\.h, p0/m, z2\.h, \1 -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** fnmad z0\.h, p0/m, \1, z1\.h -+** | -+** movprfx z0\.h, p0/z, \1 -+** fnmad z0\.h, p0/m, z2\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (nmla_h4_f16_z_untied, svfloat16_t, __fp16, -+ z0 = svnmla_n_f16_z (p0, z1, z2, d4), -+ z0 = svnmla_z (p0, z1, z2, d4)) -+ -+/* -+** nmla_2_f16_z_tied1: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** movprfx z0\.h, p0/z, z0\.h -+** fnmla z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_2_f16_z_tied1, svfloat16_t, -+ z0 = svnmla_n_f16_z (p0, z0, z1, 2), -+ z0 = svnmla_z (p0, z0, z1, 2)) -+ -+/* -+** nmla_2_f16_z_tied2: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** movprfx z0\.h, p0/z, z0\.h -+** fnmad z0\.h, p0/m, \1, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_2_f16_z_tied2, svfloat16_t, -+ z0 = svnmla_n_f16_z (p0, z1, z0, 2), -+ z0 = svnmla_z (p0, z1, z0, 2)) -+ -+/* -+** nmla_2_f16_z_untied: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fnmla z0\.h, p0/m, z2\.h, \1 -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** fnmad z0\.h, p0/m, \1, z1\.h -+** | -+** movprfx z0\.h, p0/z, \1 -+** fnmad z0\.h, p0/m, z2\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_2_f16_z_untied, svfloat16_t, -+ z0 = svnmla_n_f16_z (p0, z1, z2, 2), -+ z0 = svnmla_z (p0, z1, z2, 2)) -+ -+/* -+** nmla_f16_x_tied1: -+** fnmla z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_f16_x_tied1, svfloat16_t, -+ z0 = svnmla_f16_x (p0, z0, z1, z2), -+ z0 = svnmla_x (p0, z0, z1, z2)) -+ -+/* -+** nmla_f16_x_tied2: -+** fnmad z0\.h, p0/m, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_f16_x_tied2, svfloat16_t, -+ z0 = svnmla_f16_x (p0, z1, z0, z2), -+ z0 = svnmla_x (p0, z1, z0, z2)) -+ -+/* -+** nmla_f16_x_tied3: -+** fnmad z0\.h, p0/m, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_f16_x_tied3, svfloat16_t, -+ z0 = svnmla_f16_x (p0, z1, z2, z0), -+ z0 = svnmla_x (p0, z1, z2, z0)) -+ -+/* -+** nmla_f16_x_untied: -+** ( -+** movprfx z0, z1 -+** fnmla z0\.h, p0/m, z2\.h, z3\.h -+** | -+** movprfx z0, z2 -+** fnmad z0\.h, p0/m, z3\.h, z1\.h -+** | -+** movprfx z0, z3 -+** fnmad z0\.h, p0/m, z2\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_f16_x_untied, svfloat16_t, -+ z0 = svnmla_f16_x (p0, z1, z2, z3), -+ z0 = svnmla_x (p0, z1, z2, z3)) -+ -+/* -+** nmla_h4_f16_x_tied1: -+** mov (z[0-9]+\.h), h4 -+** fnmla z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmla_h4_f16_x_tied1, svfloat16_t, __fp16, -+ z0 = svnmla_n_f16_x (p0, z0, z1, d4), -+ z0 = svnmla_x (p0, z0, z1, d4)) -+ -+/* -+** nmla_h4_f16_x_tied2: -+** mov (z[0-9]+\.h), h4 -+** fnmad z0\.h, p0/m, \1, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZD (nmla_h4_f16_x_tied2, svfloat16_t, __fp16, -+ z0 = svnmla_n_f16_x (p0, z1, z0, d4), -+ z0 = svnmla_x (p0, z1, z0, d4)) -+ -+/* -+** nmla_h4_f16_x_untied: { xfail *-*-* } -+** mov z0\.h, h4 -+** fnmad z0\.h, p0/m, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZD (nmla_h4_f16_x_untied, svfloat16_t, __fp16, -+ z0 = svnmla_n_f16_x (p0, z1, z2, d4), -+ z0 = svnmla_x (p0, z1, z2, d4)) -+ -+/* -+** nmla_2_f16_x_tied1: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** fnmla z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_2_f16_x_tied1, svfloat16_t, -+ z0 = svnmla_n_f16_x (p0, z0, z1, 2), -+ z0 = svnmla_x (p0, z0, z1, 2)) -+ -+/* -+** nmla_2_f16_x_tied2: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** fnmad z0\.h, p0/m, \1, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_2_f16_x_tied2, svfloat16_t, -+ z0 = svnmla_n_f16_x (p0, z1, z0, 2), -+ z0 = svnmla_x (p0, z1, z0, 2)) -+ -+/* -+** nmla_2_f16_x_untied: -+** fmov z0\.h, #2\.0(?:e\+0)? -+** fnmad z0\.h, p0/m, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_2_f16_x_untied, svfloat16_t, -+ z0 = svnmla_n_f16_x (p0, z1, z2, 2), -+ z0 = svnmla_x (p0, z1, z2, 2)) -+ -+/* -+** ptrue_nmla_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmla_f16_x_tied1, svfloat16_t, -+ z0 = svnmla_f16_x (svptrue_b16 (), z0, z1, z2), -+ z0 = svnmla_x (svptrue_b16 (), z0, z1, z2)) -+ -+/* -+** ptrue_nmla_f16_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmla_f16_x_tied2, svfloat16_t, -+ z0 = svnmla_f16_x (svptrue_b16 (), z1, z0, z2), -+ z0 = svnmla_x (svptrue_b16 (), z1, z0, z2)) -+ -+/* -+** ptrue_nmla_f16_x_tied3: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmla_f16_x_tied3, svfloat16_t, -+ z0 = svnmla_f16_x (svptrue_b16 (), z1, z2, z0), -+ z0 = svnmla_x (svptrue_b16 (), z1, z2, z0)) -+ -+/* -+** ptrue_nmla_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmla_f16_x_untied, svfloat16_t, -+ z0 = svnmla_f16_x (svptrue_b16 (), z1, z2, z3), -+ z0 = svnmla_x (svptrue_b16 (), z1, z2, z3)) -+ -+/* -+** ptrue_nmla_2_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmla_2_f16_x_tied1, svfloat16_t, -+ z0 = svnmla_n_f16_x (svptrue_b16 (), z0, z1, 2), -+ z0 = svnmla_x (svptrue_b16 (), z0, z1, 2)) -+ -+/* -+** ptrue_nmla_2_f16_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmla_2_f16_x_tied2, svfloat16_t, -+ z0 = svnmla_n_f16_x (svptrue_b16 (), z1, z0, 2), -+ z0 = svnmla_x (svptrue_b16 (), z1, z0, 2)) -+ -+/* -+** ptrue_nmla_2_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmla_2_f16_x_untied, svfloat16_t, -+ z0 = svnmla_n_f16_x (svptrue_b16 (), z1, z2, 2), -+ z0 = svnmla_x (svptrue_b16 (), z1, z2, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmla_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmla_f32.c -new file mode 100644 -index 000000000..ef9542d74 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmla_f32.c -@@ -0,0 +1,398 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** nmla_f32_m_tied1: -+** fnmla z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_f32_m_tied1, svfloat32_t, -+ z0 = svnmla_f32_m (p0, z0, z1, z2), -+ z0 = svnmla_m (p0, z0, z1, z2)) -+ -+/* -+** nmla_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fnmla z0\.s, p0/m, \1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_f32_m_tied2, svfloat32_t, -+ z0 = svnmla_f32_m (p0, z1, z0, z2), -+ z0 = svnmla_m (p0, z1, z0, z2)) -+ -+/* -+** nmla_f32_m_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fnmla z0\.s, p0/m, z2\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_f32_m_tied3, svfloat32_t, -+ z0 = svnmla_f32_m (p0, z1, z2, z0), -+ z0 = svnmla_m (p0, z1, z2, z0)) -+ -+/* -+** nmla_f32_m_untied: -+** movprfx z0, z1 -+** fnmla z0\.s, p0/m, z2\.s, z3\.s -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_f32_m_untied, svfloat32_t, -+ z0 = svnmla_f32_m (p0, z1, z2, z3), -+ z0 = svnmla_m (p0, z1, z2, z3)) -+ -+/* -+** nmla_s4_f32_m_tied1: -+** mov (z[0-9]+\.s), s4 -+** fnmla z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmla_s4_f32_m_tied1, svfloat32_t, float, -+ z0 = svnmla_n_f32_m (p0, z0, z1, d4), -+ z0 = svnmla_m (p0, z0, z1, d4)) -+ -+/* -+** nmla_s4_f32_m_untied: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0, z1 -+** fnmla z0\.s, p0/m, z2\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmla_s4_f32_m_untied, svfloat32_t, float, -+ z0 = svnmla_n_f32_m (p0, z1, z2, d4), -+ z0 = svnmla_m (p0, z1, z2, d4)) -+ -+/* -+** nmla_2_f32_m_tied1: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** fnmla z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_2_f32_m_tied1, svfloat32_t, -+ z0 = svnmla_n_f32_m (p0, z0, z1, 2), -+ z0 = svnmla_m (p0, z0, z1, 2)) -+ -+/* -+** nmla_2_f32_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** movprfx z0, z1 -+** fnmla z0\.s, p0/m, z2\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_2_f32_m_untied, svfloat32_t, -+ z0 = svnmla_n_f32_m (p0, z1, z2, 2), -+ z0 = svnmla_m (p0, z1, z2, 2)) -+ -+/* -+** nmla_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fnmla z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_f32_z_tied1, svfloat32_t, -+ z0 = svnmla_f32_z (p0, z0, z1, z2), -+ z0 = svnmla_z (p0, z0, z1, z2)) -+ -+/* -+** nmla_f32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** fnmad z0\.s, p0/m, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_f32_z_tied2, svfloat32_t, -+ z0 = svnmla_f32_z (p0, z1, z0, z2), -+ z0 = svnmla_z (p0, z1, z0, z2)) -+ -+/* -+** nmla_f32_z_tied3: -+** movprfx z0\.s, p0/z, z0\.s -+** fnmad z0\.s, p0/m, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_f32_z_tied3, svfloat32_t, -+ z0 = svnmla_f32_z (p0, z1, z2, z0), -+ z0 = svnmla_z (p0, z1, z2, z0)) -+ -+/* -+** nmla_f32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fnmla z0\.s, p0/m, z2\.s, z3\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** fnmad z0\.s, p0/m, z3\.s, z1\.s -+** | -+** movprfx z0\.s, p0/z, z3\.s -+** fnmad z0\.s, p0/m, z2\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_f32_z_untied, svfloat32_t, -+ z0 = svnmla_f32_z (p0, z1, z2, z3), -+ z0 = svnmla_z (p0, z1, z2, z3)) -+ -+/* -+** nmla_s4_f32_z_tied1: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0\.s, p0/z, z0\.s -+** fnmla z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmla_s4_f32_z_tied1, svfloat32_t, float, -+ z0 = svnmla_n_f32_z (p0, z0, z1, d4), -+ z0 = svnmla_z (p0, z0, z1, d4)) -+ -+/* -+** nmla_s4_f32_z_tied2: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0\.s, p0/z, z0\.s -+** fnmad z0\.s, p0/m, \1, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZD (nmla_s4_f32_z_tied2, svfloat32_t, float, -+ z0 = svnmla_n_f32_z (p0, z1, z0, d4), -+ z0 = svnmla_z (p0, z1, z0, d4)) -+ -+/* -+** nmla_s4_f32_z_untied: -+** mov (z[0-9]+\.s), s4 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fnmla z0\.s, p0/m, z2\.s, \1 -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** fnmad z0\.s, p0/m, \1, z1\.s -+** | -+** movprfx z0\.s, p0/z, \1 -+** fnmad z0\.s, p0/m, z2\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (nmla_s4_f32_z_untied, svfloat32_t, float, -+ z0 = svnmla_n_f32_z (p0, z1, z2, d4), -+ z0 = svnmla_z (p0, z1, z2, d4)) -+ -+/* -+** nmla_2_f32_z_tied1: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** movprfx z0\.s, p0/z, z0\.s -+** fnmla z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_2_f32_z_tied1, svfloat32_t, -+ z0 = svnmla_n_f32_z (p0, z0, z1, 2), -+ z0 = svnmla_z (p0, z0, z1, 2)) -+ -+/* -+** nmla_2_f32_z_tied2: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** movprfx z0\.s, p0/z, z0\.s -+** fnmad z0\.s, p0/m, \1, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_2_f32_z_tied2, svfloat32_t, -+ z0 = svnmla_n_f32_z (p0, z1, z0, 2), -+ z0 = svnmla_z (p0, z1, z0, 2)) -+ -+/* -+** nmla_2_f32_z_untied: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fnmla z0\.s, p0/m, z2\.s, \1 -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** fnmad z0\.s, p0/m, \1, z1\.s -+** | -+** movprfx z0\.s, p0/z, \1 -+** fnmad z0\.s, p0/m, z2\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_2_f32_z_untied, svfloat32_t, -+ z0 = svnmla_n_f32_z (p0, z1, z2, 2), -+ z0 = svnmla_z (p0, z1, z2, 2)) -+ -+/* -+** nmla_f32_x_tied1: -+** fnmla z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_f32_x_tied1, svfloat32_t, -+ z0 = svnmla_f32_x (p0, z0, z1, z2), -+ z0 = svnmla_x (p0, z0, z1, z2)) -+ -+/* -+** nmla_f32_x_tied2: -+** fnmad z0\.s, p0/m, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_f32_x_tied2, svfloat32_t, -+ z0 = svnmla_f32_x (p0, z1, z0, z2), -+ z0 = svnmla_x (p0, z1, z0, z2)) -+ -+/* -+** nmla_f32_x_tied3: -+** fnmad z0\.s, p0/m, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_f32_x_tied3, svfloat32_t, -+ z0 = svnmla_f32_x (p0, z1, z2, z0), -+ z0 = svnmla_x (p0, z1, z2, z0)) -+ -+/* -+** nmla_f32_x_untied: -+** ( -+** movprfx z0, z1 -+** fnmla z0\.s, p0/m, z2\.s, z3\.s -+** | -+** movprfx z0, z2 -+** fnmad z0\.s, p0/m, z3\.s, z1\.s -+** | -+** movprfx z0, z3 -+** fnmad z0\.s, p0/m, z2\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_f32_x_untied, svfloat32_t, -+ z0 = svnmla_f32_x (p0, z1, z2, z3), -+ z0 = svnmla_x (p0, z1, z2, z3)) -+ -+/* -+** nmla_s4_f32_x_tied1: -+** mov (z[0-9]+\.s), s4 -+** fnmla z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmla_s4_f32_x_tied1, svfloat32_t, float, -+ z0 = svnmla_n_f32_x (p0, z0, z1, d4), -+ z0 = svnmla_x (p0, z0, z1, d4)) -+ -+/* -+** nmla_s4_f32_x_tied2: -+** mov (z[0-9]+\.s), s4 -+** fnmad z0\.s, p0/m, \1, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZD (nmla_s4_f32_x_tied2, svfloat32_t, float, -+ z0 = svnmla_n_f32_x (p0, z1, z0, d4), -+ z0 = svnmla_x (p0, z1, z0, d4)) -+ -+/* -+** nmla_s4_f32_x_untied: { xfail *-*-* } -+** mov z0\.s, s4 -+** fnmad z0\.s, p0/m, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZD (nmla_s4_f32_x_untied, svfloat32_t, float, -+ z0 = svnmla_n_f32_x (p0, z1, z2, d4), -+ z0 = svnmla_x (p0, z1, z2, d4)) -+ -+/* -+** nmla_2_f32_x_tied1: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** fnmla z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_2_f32_x_tied1, svfloat32_t, -+ z0 = svnmla_n_f32_x (p0, z0, z1, 2), -+ z0 = svnmla_x (p0, z0, z1, 2)) -+ -+/* -+** nmla_2_f32_x_tied2: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** fnmad z0\.s, p0/m, \1, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_2_f32_x_tied2, svfloat32_t, -+ z0 = svnmla_n_f32_x (p0, z1, z0, 2), -+ z0 = svnmla_x (p0, z1, z0, 2)) -+ -+/* -+** nmla_2_f32_x_untied: -+** fmov z0\.s, #2\.0(?:e\+0)? -+** fnmad z0\.s, p0/m, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_2_f32_x_untied, svfloat32_t, -+ z0 = svnmla_n_f32_x (p0, z1, z2, 2), -+ z0 = svnmla_x (p0, z1, z2, 2)) -+ -+/* -+** ptrue_nmla_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmla_f32_x_tied1, svfloat32_t, -+ z0 = svnmla_f32_x (svptrue_b32 (), z0, z1, z2), -+ z0 = svnmla_x (svptrue_b32 (), z0, z1, z2)) -+ -+/* -+** ptrue_nmla_f32_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmla_f32_x_tied2, svfloat32_t, -+ z0 = svnmla_f32_x (svptrue_b32 (), z1, z0, z2), -+ z0 = svnmla_x (svptrue_b32 (), z1, z0, z2)) -+ -+/* -+** ptrue_nmla_f32_x_tied3: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmla_f32_x_tied3, svfloat32_t, -+ z0 = svnmla_f32_x (svptrue_b32 (), z1, z2, z0), -+ z0 = svnmla_x (svptrue_b32 (), z1, z2, z0)) -+ -+/* -+** ptrue_nmla_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmla_f32_x_untied, svfloat32_t, -+ z0 = svnmla_f32_x (svptrue_b32 (), z1, z2, z3), -+ z0 = svnmla_x (svptrue_b32 (), z1, z2, z3)) -+ -+/* -+** ptrue_nmla_2_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmla_2_f32_x_tied1, svfloat32_t, -+ z0 = svnmla_n_f32_x (svptrue_b32 (), z0, z1, 2), -+ z0 = svnmla_x (svptrue_b32 (), z0, z1, 2)) -+ -+/* -+** ptrue_nmla_2_f32_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmla_2_f32_x_tied2, svfloat32_t, -+ z0 = svnmla_n_f32_x (svptrue_b32 (), z1, z0, 2), -+ z0 = svnmla_x (svptrue_b32 (), z1, z0, 2)) -+ -+/* -+** ptrue_nmla_2_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmla_2_f32_x_untied, svfloat32_t, -+ z0 = svnmla_n_f32_x (svptrue_b32 (), z1, z2, 2), -+ z0 = svnmla_x (svptrue_b32 (), z1, z2, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmla_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmla_f64.c -new file mode 100644 -index 000000000..441821f60 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmla_f64.c -@@ -0,0 +1,398 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** nmla_f64_m_tied1: -+** fnmla z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_f64_m_tied1, svfloat64_t, -+ z0 = svnmla_f64_m (p0, z0, z1, z2), -+ z0 = svnmla_m (p0, z0, z1, z2)) -+ -+/* -+** nmla_f64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fnmla z0\.d, p0/m, \1, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_f64_m_tied2, svfloat64_t, -+ z0 = svnmla_f64_m (p0, z1, z0, z2), -+ z0 = svnmla_m (p0, z1, z0, z2)) -+ -+/* -+** nmla_f64_m_tied3: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fnmla z0\.d, p0/m, z2\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_f64_m_tied3, svfloat64_t, -+ z0 = svnmla_f64_m (p0, z1, z2, z0), -+ z0 = svnmla_m (p0, z1, z2, z0)) -+ -+/* -+** nmla_f64_m_untied: -+** movprfx z0, z1 -+** fnmla z0\.d, p0/m, z2\.d, z3\.d -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_f64_m_untied, svfloat64_t, -+ z0 = svnmla_f64_m (p0, z1, z2, z3), -+ z0 = svnmla_m (p0, z1, z2, z3)) -+ -+/* -+** nmla_d4_f64_m_tied1: -+** mov (z[0-9]+\.d), d4 -+** fnmla z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmla_d4_f64_m_tied1, svfloat64_t, double, -+ z0 = svnmla_n_f64_m (p0, z0, z1, d4), -+ z0 = svnmla_m (p0, z0, z1, d4)) -+ -+/* -+** nmla_d4_f64_m_untied: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0, z1 -+** fnmla z0\.d, p0/m, z2\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmla_d4_f64_m_untied, svfloat64_t, double, -+ z0 = svnmla_n_f64_m (p0, z1, z2, d4), -+ z0 = svnmla_m (p0, z1, z2, d4)) -+ -+/* -+** nmla_2_f64_m_tied1: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** fnmla z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_2_f64_m_tied1, svfloat64_t, -+ z0 = svnmla_n_f64_m (p0, z0, z1, 2), -+ z0 = svnmla_m (p0, z0, z1, 2)) -+ -+/* -+** nmla_2_f64_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** movprfx z0, z1 -+** fnmla z0\.d, p0/m, z2\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_2_f64_m_untied, svfloat64_t, -+ z0 = svnmla_n_f64_m (p0, z1, z2, 2), -+ z0 = svnmla_m (p0, z1, z2, 2)) -+ -+/* -+** nmla_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fnmla z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_f64_z_tied1, svfloat64_t, -+ z0 = svnmla_f64_z (p0, z0, z1, z2), -+ z0 = svnmla_z (p0, z0, z1, z2)) -+ -+/* -+** nmla_f64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** fnmad z0\.d, p0/m, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_f64_z_tied2, svfloat64_t, -+ z0 = svnmla_f64_z (p0, z1, z0, z2), -+ z0 = svnmla_z (p0, z1, z0, z2)) -+ -+/* -+** nmla_f64_z_tied3: -+** movprfx z0\.d, p0/z, z0\.d -+** fnmad z0\.d, p0/m, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_f64_z_tied3, svfloat64_t, -+ z0 = svnmla_f64_z (p0, z1, z2, z0), -+ z0 = svnmla_z (p0, z1, z2, z0)) -+ -+/* -+** nmla_f64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fnmla z0\.d, p0/m, z2\.d, z3\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** fnmad z0\.d, p0/m, z3\.d, z1\.d -+** | -+** movprfx z0\.d, p0/z, z3\.d -+** fnmad z0\.d, p0/m, z2\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_f64_z_untied, svfloat64_t, -+ z0 = svnmla_f64_z (p0, z1, z2, z3), -+ z0 = svnmla_z (p0, z1, z2, z3)) -+ -+/* -+** nmla_d4_f64_z_tied1: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0\.d, p0/z, z0\.d -+** fnmla z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmla_d4_f64_z_tied1, svfloat64_t, double, -+ z0 = svnmla_n_f64_z (p0, z0, z1, d4), -+ z0 = svnmla_z (p0, z0, z1, d4)) -+ -+/* -+** nmla_d4_f64_z_tied2: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0\.d, p0/z, z0\.d -+** fnmad z0\.d, p0/m, \1, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZD (nmla_d4_f64_z_tied2, svfloat64_t, double, -+ z0 = svnmla_n_f64_z (p0, z1, z0, d4), -+ z0 = svnmla_z (p0, z1, z0, d4)) -+ -+/* -+** nmla_d4_f64_z_untied: -+** mov (z[0-9]+\.d), d4 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fnmla z0\.d, p0/m, z2\.d, \1 -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** fnmad z0\.d, p0/m, \1, z1\.d -+** | -+** movprfx z0\.d, p0/z, \1 -+** fnmad z0\.d, p0/m, z2\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (nmla_d4_f64_z_untied, svfloat64_t, double, -+ z0 = svnmla_n_f64_z (p0, z1, z2, d4), -+ z0 = svnmla_z (p0, z1, z2, d4)) -+ -+/* -+** nmla_2_f64_z_tied1: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** movprfx z0\.d, p0/z, z0\.d -+** fnmla z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_2_f64_z_tied1, svfloat64_t, -+ z0 = svnmla_n_f64_z (p0, z0, z1, 2), -+ z0 = svnmla_z (p0, z0, z1, 2)) -+ -+/* -+** nmla_2_f64_z_tied2: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** movprfx z0\.d, p0/z, z0\.d -+** fnmad z0\.d, p0/m, \1, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_2_f64_z_tied2, svfloat64_t, -+ z0 = svnmla_n_f64_z (p0, z1, z0, 2), -+ z0 = svnmla_z (p0, z1, z0, 2)) -+ -+/* -+** nmla_2_f64_z_untied: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fnmla z0\.d, p0/m, z2\.d, \1 -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** fnmad z0\.d, p0/m, \1, z1\.d -+** | -+** movprfx z0\.d, p0/z, \1 -+** fnmad z0\.d, p0/m, z2\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_2_f64_z_untied, svfloat64_t, -+ z0 = svnmla_n_f64_z (p0, z1, z2, 2), -+ z0 = svnmla_z (p0, z1, z2, 2)) -+ -+/* -+** nmla_f64_x_tied1: -+** fnmla z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_f64_x_tied1, svfloat64_t, -+ z0 = svnmla_f64_x (p0, z0, z1, z2), -+ z0 = svnmla_x (p0, z0, z1, z2)) -+ -+/* -+** nmla_f64_x_tied2: -+** fnmad z0\.d, p0/m, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_f64_x_tied2, svfloat64_t, -+ z0 = svnmla_f64_x (p0, z1, z0, z2), -+ z0 = svnmla_x (p0, z1, z0, z2)) -+ -+/* -+** nmla_f64_x_tied3: -+** fnmad z0\.d, p0/m, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_f64_x_tied3, svfloat64_t, -+ z0 = svnmla_f64_x (p0, z1, z2, z0), -+ z0 = svnmla_x (p0, z1, z2, z0)) -+ -+/* -+** nmla_f64_x_untied: -+** ( -+** movprfx z0, z1 -+** fnmla z0\.d, p0/m, z2\.d, z3\.d -+** | -+** movprfx z0, z2 -+** fnmad z0\.d, p0/m, z3\.d, z1\.d -+** | -+** movprfx z0, z3 -+** fnmad z0\.d, p0/m, z2\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_f64_x_untied, svfloat64_t, -+ z0 = svnmla_f64_x (p0, z1, z2, z3), -+ z0 = svnmla_x (p0, z1, z2, z3)) -+ -+/* -+** nmla_d4_f64_x_tied1: -+** mov (z[0-9]+\.d), d4 -+** fnmla z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmla_d4_f64_x_tied1, svfloat64_t, double, -+ z0 = svnmla_n_f64_x (p0, z0, z1, d4), -+ z0 = svnmla_x (p0, z0, z1, d4)) -+ -+/* -+** nmla_d4_f64_x_tied2: -+** mov (z[0-9]+\.d), d4 -+** fnmad z0\.d, p0/m, \1, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZD (nmla_d4_f64_x_tied2, svfloat64_t, double, -+ z0 = svnmla_n_f64_x (p0, z1, z0, d4), -+ z0 = svnmla_x (p0, z1, z0, d4)) -+ -+/* -+** nmla_d4_f64_x_untied: { xfail *-*-* } -+** mov z0\.d, d4 -+** fnmad z0\.d, p0/m, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZD (nmla_d4_f64_x_untied, svfloat64_t, double, -+ z0 = svnmla_n_f64_x (p0, z1, z2, d4), -+ z0 = svnmla_x (p0, z1, z2, d4)) -+ -+/* -+** nmla_2_f64_x_tied1: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** fnmla z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_2_f64_x_tied1, svfloat64_t, -+ z0 = svnmla_n_f64_x (p0, z0, z1, 2), -+ z0 = svnmla_x (p0, z0, z1, 2)) -+ -+/* -+** nmla_2_f64_x_tied2: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** fnmad z0\.d, p0/m, \1, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_2_f64_x_tied2, svfloat64_t, -+ z0 = svnmla_n_f64_x (p0, z1, z0, 2), -+ z0 = svnmla_x (p0, z1, z0, 2)) -+ -+/* -+** nmla_2_f64_x_untied: -+** fmov z0\.d, #2\.0(?:e\+0)? -+** fnmad z0\.d, p0/m, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (nmla_2_f64_x_untied, svfloat64_t, -+ z0 = svnmla_n_f64_x (p0, z1, z2, 2), -+ z0 = svnmla_x (p0, z1, z2, 2)) -+ -+/* -+** ptrue_nmla_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmla_f64_x_tied1, svfloat64_t, -+ z0 = svnmla_f64_x (svptrue_b64 (), z0, z1, z2), -+ z0 = svnmla_x (svptrue_b64 (), z0, z1, z2)) -+ -+/* -+** ptrue_nmla_f64_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmla_f64_x_tied2, svfloat64_t, -+ z0 = svnmla_f64_x (svptrue_b64 (), z1, z0, z2), -+ z0 = svnmla_x (svptrue_b64 (), z1, z0, z2)) -+ -+/* -+** ptrue_nmla_f64_x_tied3: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmla_f64_x_tied3, svfloat64_t, -+ z0 = svnmla_f64_x (svptrue_b64 (), z1, z2, z0), -+ z0 = svnmla_x (svptrue_b64 (), z1, z2, z0)) -+ -+/* -+** ptrue_nmla_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmla_f64_x_untied, svfloat64_t, -+ z0 = svnmla_f64_x (svptrue_b64 (), z1, z2, z3), -+ z0 = svnmla_x (svptrue_b64 (), z1, z2, z3)) -+ -+/* -+** ptrue_nmla_2_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmla_2_f64_x_tied1, svfloat64_t, -+ z0 = svnmla_n_f64_x (svptrue_b64 (), z0, z1, 2), -+ z0 = svnmla_x (svptrue_b64 (), z0, z1, 2)) -+ -+/* -+** ptrue_nmla_2_f64_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmla_2_f64_x_tied2, svfloat64_t, -+ z0 = svnmla_n_f64_x (svptrue_b64 (), z1, z0, 2), -+ z0 = svnmla_x (svptrue_b64 (), z1, z0, 2)) -+ -+/* -+** ptrue_nmla_2_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmla_2_f64_x_untied, svfloat64_t, -+ z0 = svnmla_n_f64_x (svptrue_b64 (), z1, z2, 2), -+ z0 = svnmla_x (svptrue_b64 (), z1, z2, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmls_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmls_f16.c -new file mode 100644 -index 000000000..8aa6c7509 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmls_f16.c -@@ -0,0 +1,398 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** nmls_f16_m_tied1: -+** fnmls z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_f16_m_tied1, svfloat16_t, -+ z0 = svnmls_f16_m (p0, z0, z1, z2), -+ z0 = svnmls_m (p0, z0, z1, z2)) -+ -+/* -+** nmls_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fnmls z0\.h, p0/m, \1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_f16_m_tied2, svfloat16_t, -+ z0 = svnmls_f16_m (p0, z1, z0, z2), -+ z0 = svnmls_m (p0, z1, z0, z2)) -+ -+/* -+** nmls_f16_m_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fnmls z0\.h, p0/m, z2\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_f16_m_tied3, svfloat16_t, -+ z0 = svnmls_f16_m (p0, z1, z2, z0), -+ z0 = svnmls_m (p0, z1, z2, z0)) -+ -+/* -+** nmls_f16_m_untied: -+** movprfx z0, z1 -+** fnmls z0\.h, p0/m, z2\.h, z3\.h -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_f16_m_untied, svfloat16_t, -+ z0 = svnmls_f16_m (p0, z1, z2, z3), -+ z0 = svnmls_m (p0, z1, z2, z3)) -+ -+/* -+** nmls_h4_f16_m_tied1: -+** mov (z[0-9]+\.h), h4 -+** fnmls z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmls_h4_f16_m_tied1, svfloat16_t, __fp16, -+ z0 = svnmls_n_f16_m (p0, z0, z1, d4), -+ z0 = svnmls_m (p0, z0, z1, d4)) -+ -+/* -+** nmls_h4_f16_m_untied: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0, z1 -+** fnmls z0\.h, p0/m, z2\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmls_h4_f16_m_untied, svfloat16_t, __fp16, -+ z0 = svnmls_n_f16_m (p0, z1, z2, d4), -+ z0 = svnmls_m (p0, z1, z2, d4)) -+ -+/* -+** nmls_2_f16_m_tied1: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** fnmls z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_2_f16_m_tied1, svfloat16_t, -+ z0 = svnmls_n_f16_m (p0, z0, z1, 2), -+ z0 = svnmls_m (p0, z0, z1, 2)) -+ -+/* -+** nmls_2_f16_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** movprfx z0, z1 -+** fnmls z0\.h, p0/m, z2\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_2_f16_m_untied, svfloat16_t, -+ z0 = svnmls_n_f16_m (p0, z1, z2, 2), -+ z0 = svnmls_m (p0, z1, z2, 2)) -+ -+/* -+** nmls_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fnmls z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_f16_z_tied1, svfloat16_t, -+ z0 = svnmls_f16_z (p0, z0, z1, z2), -+ z0 = svnmls_z (p0, z0, z1, z2)) -+ -+/* -+** nmls_f16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** fnmsb z0\.h, p0/m, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_f16_z_tied2, svfloat16_t, -+ z0 = svnmls_f16_z (p0, z1, z0, z2), -+ z0 = svnmls_z (p0, z1, z0, z2)) -+ -+/* -+** nmls_f16_z_tied3: -+** movprfx z0\.h, p0/z, z0\.h -+** fnmsb z0\.h, p0/m, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_f16_z_tied3, svfloat16_t, -+ z0 = svnmls_f16_z (p0, z1, z2, z0), -+ z0 = svnmls_z (p0, z1, z2, z0)) -+ -+/* -+** nmls_f16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fnmls z0\.h, p0/m, z2\.h, z3\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** fnmsb z0\.h, p0/m, z3\.h, z1\.h -+** | -+** movprfx z0\.h, p0/z, z3\.h -+** fnmsb z0\.h, p0/m, z2\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_f16_z_untied, svfloat16_t, -+ z0 = svnmls_f16_z (p0, z1, z2, z3), -+ z0 = svnmls_z (p0, z1, z2, z3)) -+ -+/* -+** nmls_h4_f16_z_tied1: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0\.h, p0/z, z0\.h -+** fnmls z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmls_h4_f16_z_tied1, svfloat16_t, __fp16, -+ z0 = svnmls_n_f16_z (p0, z0, z1, d4), -+ z0 = svnmls_z (p0, z0, z1, d4)) -+ -+/* -+** nmls_h4_f16_z_tied2: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0\.h, p0/z, z0\.h -+** fnmsb z0\.h, p0/m, \1, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZD (nmls_h4_f16_z_tied2, svfloat16_t, __fp16, -+ z0 = svnmls_n_f16_z (p0, z1, z0, d4), -+ z0 = svnmls_z (p0, z1, z0, d4)) -+ -+/* -+** nmls_h4_f16_z_untied: -+** mov (z[0-9]+\.h), h4 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fnmls z0\.h, p0/m, z2\.h, \1 -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** fnmsb z0\.h, p0/m, \1, z1\.h -+** | -+** movprfx z0\.h, p0/z, \1 -+** fnmsb z0\.h, p0/m, z2\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (nmls_h4_f16_z_untied, svfloat16_t, __fp16, -+ z0 = svnmls_n_f16_z (p0, z1, z2, d4), -+ z0 = svnmls_z (p0, z1, z2, d4)) -+ -+/* -+** nmls_2_f16_z_tied1: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** movprfx z0\.h, p0/z, z0\.h -+** fnmls z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_2_f16_z_tied1, svfloat16_t, -+ z0 = svnmls_n_f16_z (p0, z0, z1, 2), -+ z0 = svnmls_z (p0, z0, z1, 2)) -+ -+/* -+** nmls_2_f16_z_tied2: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** movprfx z0\.h, p0/z, z0\.h -+** fnmsb z0\.h, p0/m, \1, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_2_f16_z_tied2, svfloat16_t, -+ z0 = svnmls_n_f16_z (p0, z1, z0, 2), -+ z0 = svnmls_z (p0, z1, z0, 2)) -+ -+/* -+** nmls_2_f16_z_untied: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fnmls z0\.h, p0/m, z2\.h, \1 -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** fnmsb z0\.h, p0/m, \1, z1\.h -+** | -+** movprfx z0\.h, p0/z, \1 -+** fnmsb z0\.h, p0/m, z2\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_2_f16_z_untied, svfloat16_t, -+ z0 = svnmls_n_f16_z (p0, z1, z2, 2), -+ z0 = svnmls_z (p0, z1, z2, 2)) -+ -+/* -+** nmls_f16_x_tied1: -+** fnmls z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_f16_x_tied1, svfloat16_t, -+ z0 = svnmls_f16_x (p0, z0, z1, z2), -+ z0 = svnmls_x (p0, z0, z1, z2)) -+ -+/* -+** nmls_f16_x_tied2: -+** fnmsb z0\.h, p0/m, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_f16_x_tied2, svfloat16_t, -+ z0 = svnmls_f16_x (p0, z1, z0, z2), -+ z0 = svnmls_x (p0, z1, z0, z2)) -+ -+/* -+** nmls_f16_x_tied3: -+** fnmsb z0\.h, p0/m, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_f16_x_tied3, svfloat16_t, -+ z0 = svnmls_f16_x (p0, z1, z2, z0), -+ z0 = svnmls_x (p0, z1, z2, z0)) -+ -+/* -+** nmls_f16_x_untied: -+** ( -+** movprfx z0, z1 -+** fnmls z0\.h, p0/m, z2\.h, z3\.h -+** | -+** movprfx z0, z2 -+** fnmsb z0\.h, p0/m, z3\.h, z1\.h -+** | -+** movprfx z0, z3 -+** fnmsb z0\.h, p0/m, z2\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_f16_x_untied, svfloat16_t, -+ z0 = svnmls_f16_x (p0, z1, z2, z3), -+ z0 = svnmls_x (p0, z1, z2, z3)) -+ -+/* -+** nmls_h4_f16_x_tied1: -+** mov (z[0-9]+\.h), h4 -+** fnmls z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmls_h4_f16_x_tied1, svfloat16_t, __fp16, -+ z0 = svnmls_n_f16_x (p0, z0, z1, d4), -+ z0 = svnmls_x (p0, z0, z1, d4)) -+ -+/* -+** nmls_h4_f16_x_tied2: -+** mov (z[0-9]+\.h), h4 -+** fnmsb z0\.h, p0/m, \1, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZD (nmls_h4_f16_x_tied2, svfloat16_t, __fp16, -+ z0 = svnmls_n_f16_x (p0, z1, z0, d4), -+ z0 = svnmls_x (p0, z1, z0, d4)) -+ -+/* -+** nmls_h4_f16_x_untied: { xfail *-*-* } -+** mov z0\.h, h4 -+** fnmsb z0\.h, p0/m, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZD (nmls_h4_f16_x_untied, svfloat16_t, __fp16, -+ z0 = svnmls_n_f16_x (p0, z1, z2, d4), -+ z0 = svnmls_x (p0, z1, z2, d4)) -+ -+/* -+** nmls_2_f16_x_tied1: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** fnmls z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_2_f16_x_tied1, svfloat16_t, -+ z0 = svnmls_n_f16_x (p0, z0, z1, 2), -+ z0 = svnmls_x (p0, z0, z1, 2)) -+ -+/* -+** nmls_2_f16_x_tied2: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** fnmsb z0\.h, p0/m, \1, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_2_f16_x_tied2, svfloat16_t, -+ z0 = svnmls_n_f16_x (p0, z1, z0, 2), -+ z0 = svnmls_x (p0, z1, z0, 2)) -+ -+/* -+** nmls_2_f16_x_untied: -+** fmov z0\.h, #2\.0(?:e\+0)? -+** fnmsb z0\.h, p0/m, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_2_f16_x_untied, svfloat16_t, -+ z0 = svnmls_n_f16_x (p0, z1, z2, 2), -+ z0 = svnmls_x (p0, z1, z2, 2)) -+ -+/* -+** ptrue_nmls_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmls_f16_x_tied1, svfloat16_t, -+ z0 = svnmls_f16_x (svptrue_b16 (), z0, z1, z2), -+ z0 = svnmls_x (svptrue_b16 (), z0, z1, z2)) -+ -+/* -+** ptrue_nmls_f16_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmls_f16_x_tied2, svfloat16_t, -+ z0 = svnmls_f16_x (svptrue_b16 (), z1, z0, z2), -+ z0 = svnmls_x (svptrue_b16 (), z1, z0, z2)) -+ -+/* -+** ptrue_nmls_f16_x_tied3: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmls_f16_x_tied3, svfloat16_t, -+ z0 = svnmls_f16_x (svptrue_b16 (), z1, z2, z0), -+ z0 = svnmls_x (svptrue_b16 (), z1, z2, z0)) -+ -+/* -+** ptrue_nmls_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmls_f16_x_untied, svfloat16_t, -+ z0 = svnmls_f16_x (svptrue_b16 (), z1, z2, z3), -+ z0 = svnmls_x (svptrue_b16 (), z1, z2, z3)) -+ -+/* -+** ptrue_nmls_2_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmls_2_f16_x_tied1, svfloat16_t, -+ z0 = svnmls_n_f16_x (svptrue_b16 (), z0, z1, 2), -+ z0 = svnmls_x (svptrue_b16 (), z0, z1, 2)) -+ -+/* -+** ptrue_nmls_2_f16_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmls_2_f16_x_tied2, svfloat16_t, -+ z0 = svnmls_n_f16_x (svptrue_b16 (), z1, z0, 2), -+ z0 = svnmls_x (svptrue_b16 (), z1, z0, 2)) -+ -+/* -+** ptrue_nmls_2_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmls_2_f16_x_untied, svfloat16_t, -+ z0 = svnmls_n_f16_x (svptrue_b16 (), z1, z2, 2), -+ z0 = svnmls_x (svptrue_b16 (), z1, z2, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmls_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmls_f32.c -new file mode 100644 -index 000000000..42ea13fac ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmls_f32.c -@@ -0,0 +1,398 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** nmls_f32_m_tied1: -+** fnmls z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_f32_m_tied1, svfloat32_t, -+ z0 = svnmls_f32_m (p0, z0, z1, z2), -+ z0 = svnmls_m (p0, z0, z1, z2)) -+ -+/* -+** nmls_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fnmls z0\.s, p0/m, \1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_f32_m_tied2, svfloat32_t, -+ z0 = svnmls_f32_m (p0, z1, z0, z2), -+ z0 = svnmls_m (p0, z1, z0, z2)) -+ -+/* -+** nmls_f32_m_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fnmls z0\.s, p0/m, z2\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_f32_m_tied3, svfloat32_t, -+ z0 = svnmls_f32_m (p0, z1, z2, z0), -+ z0 = svnmls_m (p0, z1, z2, z0)) -+ -+/* -+** nmls_f32_m_untied: -+** movprfx z0, z1 -+** fnmls z0\.s, p0/m, z2\.s, z3\.s -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_f32_m_untied, svfloat32_t, -+ z0 = svnmls_f32_m (p0, z1, z2, z3), -+ z0 = svnmls_m (p0, z1, z2, z3)) -+ -+/* -+** nmls_s4_f32_m_tied1: -+** mov (z[0-9]+\.s), s4 -+** fnmls z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmls_s4_f32_m_tied1, svfloat32_t, float, -+ z0 = svnmls_n_f32_m (p0, z0, z1, d4), -+ z0 = svnmls_m (p0, z0, z1, d4)) -+ -+/* -+** nmls_s4_f32_m_untied: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0, z1 -+** fnmls z0\.s, p0/m, z2\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmls_s4_f32_m_untied, svfloat32_t, float, -+ z0 = svnmls_n_f32_m (p0, z1, z2, d4), -+ z0 = svnmls_m (p0, z1, z2, d4)) -+ -+/* -+** nmls_2_f32_m_tied1: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** fnmls z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_2_f32_m_tied1, svfloat32_t, -+ z0 = svnmls_n_f32_m (p0, z0, z1, 2), -+ z0 = svnmls_m (p0, z0, z1, 2)) -+ -+/* -+** nmls_2_f32_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** movprfx z0, z1 -+** fnmls z0\.s, p0/m, z2\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_2_f32_m_untied, svfloat32_t, -+ z0 = svnmls_n_f32_m (p0, z1, z2, 2), -+ z0 = svnmls_m (p0, z1, z2, 2)) -+ -+/* -+** nmls_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fnmls z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_f32_z_tied1, svfloat32_t, -+ z0 = svnmls_f32_z (p0, z0, z1, z2), -+ z0 = svnmls_z (p0, z0, z1, z2)) -+ -+/* -+** nmls_f32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** fnmsb z0\.s, p0/m, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_f32_z_tied2, svfloat32_t, -+ z0 = svnmls_f32_z (p0, z1, z0, z2), -+ z0 = svnmls_z (p0, z1, z0, z2)) -+ -+/* -+** nmls_f32_z_tied3: -+** movprfx z0\.s, p0/z, z0\.s -+** fnmsb z0\.s, p0/m, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_f32_z_tied3, svfloat32_t, -+ z0 = svnmls_f32_z (p0, z1, z2, z0), -+ z0 = svnmls_z (p0, z1, z2, z0)) -+ -+/* -+** nmls_f32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fnmls z0\.s, p0/m, z2\.s, z3\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** fnmsb z0\.s, p0/m, z3\.s, z1\.s -+** | -+** movprfx z0\.s, p0/z, z3\.s -+** fnmsb z0\.s, p0/m, z2\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_f32_z_untied, svfloat32_t, -+ z0 = svnmls_f32_z (p0, z1, z2, z3), -+ z0 = svnmls_z (p0, z1, z2, z3)) -+ -+/* -+** nmls_s4_f32_z_tied1: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0\.s, p0/z, z0\.s -+** fnmls z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmls_s4_f32_z_tied1, svfloat32_t, float, -+ z0 = svnmls_n_f32_z (p0, z0, z1, d4), -+ z0 = svnmls_z (p0, z0, z1, d4)) -+ -+/* -+** nmls_s4_f32_z_tied2: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0\.s, p0/z, z0\.s -+** fnmsb z0\.s, p0/m, \1, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZD (nmls_s4_f32_z_tied2, svfloat32_t, float, -+ z0 = svnmls_n_f32_z (p0, z1, z0, d4), -+ z0 = svnmls_z (p0, z1, z0, d4)) -+ -+/* -+** nmls_s4_f32_z_untied: -+** mov (z[0-9]+\.s), s4 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fnmls z0\.s, p0/m, z2\.s, \1 -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** fnmsb z0\.s, p0/m, \1, z1\.s -+** | -+** movprfx z0\.s, p0/z, \1 -+** fnmsb z0\.s, p0/m, z2\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (nmls_s4_f32_z_untied, svfloat32_t, float, -+ z0 = svnmls_n_f32_z (p0, z1, z2, d4), -+ z0 = svnmls_z (p0, z1, z2, d4)) -+ -+/* -+** nmls_2_f32_z_tied1: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** movprfx z0\.s, p0/z, z0\.s -+** fnmls z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_2_f32_z_tied1, svfloat32_t, -+ z0 = svnmls_n_f32_z (p0, z0, z1, 2), -+ z0 = svnmls_z (p0, z0, z1, 2)) -+ -+/* -+** nmls_2_f32_z_tied2: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** movprfx z0\.s, p0/z, z0\.s -+** fnmsb z0\.s, p0/m, \1, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_2_f32_z_tied2, svfloat32_t, -+ z0 = svnmls_n_f32_z (p0, z1, z0, 2), -+ z0 = svnmls_z (p0, z1, z0, 2)) -+ -+/* -+** nmls_2_f32_z_untied: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fnmls z0\.s, p0/m, z2\.s, \1 -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** fnmsb z0\.s, p0/m, \1, z1\.s -+** | -+** movprfx z0\.s, p0/z, \1 -+** fnmsb z0\.s, p0/m, z2\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_2_f32_z_untied, svfloat32_t, -+ z0 = svnmls_n_f32_z (p0, z1, z2, 2), -+ z0 = svnmls_z (p0, z1, z2, 2)) -+ -+/* -+** nmls_f32_x_tied1: -+** fnmls z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_f32_x_tied1, svfloat32_t, -+ z0 = svnmls_f32_x (p0, z0, z1, z2), -+ z0 = svnmls_x (p0, z0, z1, z2)) -+ -+/* -+** nmls_f32_x_tied2: -+** fnmsb z0\.s, p0/m, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_f32_x_tied2, svfloat32_t, -+ z0 = svnmls_f32_x (p0, z1, z0, z2), -+ z0 = svnmls_x (p0, z1, z0, z2)) -+ -+/* -+** nmls_f32_x_tied3: -+** fnmsb z0\.s, p0/m, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_f32_x_tied3, svfloat32_t, -+ z0 = svnmls_f32_x (p0, z1, z2, z0), -+ z0 = svnmls_x (p0, z1, z2, z0)) -+ -+/* -+** nmls_f32_x_untied: -+** ( -+** movprfx z0, z1 -+** fnmls z0\.s, p0/m, z2\.s, z3\.s -+** | -+** movprfx z0, z2 -+** fnmsb z0\.s, p0/m, z3\.s, z1\.s -+** | -+** movprfx z0, z3 -+** fnmsb z0\.s, p0/m, z2\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_f32_x_untied, svfloat32_t, -+ z0 = svnmls_f32_x (p0, z1, z2, z3), -+ z0 = svnmls_x (p0, z1, z2, z3)) -+ -+/* -+** nmls_s4_f32_x_tied1: -+** mov (z[0-9]+\.s), s4 -+** fnmls z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmls_s4_f32_x_tied1, svfloat32_t, float, -+ z0 = svnmls_n_f32_x (p0, z0, z1, d4), -+ z0 = svnmls_x (p0, z0, z1, d4)) -+ -+/* -+** nmls_s4_f32_x_tied2: -+** mov (z[0-9]+\.s), s4 -+** fnmsb z0\.s, p0/m, \1, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZD (nmls_s4_f32_x_tied2, svfloat32_t, float, -+ z0 = svnmls_n_f32_x (p0, z1, z0, d4), -+ z0 = svnmls_x (p0, z1, z0, d4)) -+ -+/* -+** nmls_s4_f32_x_untied: { xfail *-*-* } -+** mov z0\.s, s4 -+** fnmsb z0\.s, p0/m, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZD (nmls_s4_f32_x_untied, svfloat32_t, float, -+ z0 = svnmls_n_f32_x (p0, z1, z2, d4), -+ z0 = svnmls_x (p0, z1, z2, d4)) -+ -+/* -+** nmls_2_f32_x_tied1: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** fnmls z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_2_f32_x_tied1, svfloat32_t, -+ z0 = svnmls_n_f32_x (p0, z0, z1, 2), -+ z0 = svnmls_x (p0, z0, z1, 2)) -+ -+/* -+** nmls_2_f32_x_tied2: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** fnmsb z0\.s, p0/m, \1, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_2_f32_x_tied2, svfloat32_t, -+ z0 = svnmls_n_f32_x (p0, z1, z0, 2), -+ z0 = svnmls_x (p0, z1, z0, 2)) -+ -+/* -+** nmls_2_f32_x_untied: -+** fmov z0\.s, #2\.0(?:e\+0)? -+** fnmsb z0\.s, p0/m, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_2_f32_x_untied, svfloat32_t, -+ z0 = svnmls_n_f32_x (p0, z1, z2, 2), -+ z0 = svnmls_x (p0, z1, z2, 2)) -+ -+/* -+** ptrue_nmls_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmls_f32_x_tied1, svfloat32_t, -+ z0 = svnmls_f32_x (svptrue_b32 (), z0, z1, z2), -+ z0 = svnmls_x (svptrue_b32 (), z0, z1, z2)) -+ -+/* -+** ptrue_nmls_f32_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmls_f32_x_tied2, svfloat32_t, -+ z0 = svnmls_f32_x (svptrue_b32 (), z1, z0, z2), -+ z0 = svnmls_x (svptrue_b32 (), z1, z0, z2)) -+ -+/* -+** ptrue_nmls_f32_x_tied3: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmls_f32_x_tied3, svfloat32_t, -+ z0 = svnmls_f32_x (svptrue_b32 (), z1, z2, z0), -+ z0 = svnmls_x (svptrue_b32 (), z1, z2, z0)) -+ -+/* -+** ptrue_nmls_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmls_f32_x_untied, svfloat32_t, -+ z0 = svnmls_f32_x (svptrue_b32 (), z1, z2, z3), -+ z0 = svnmls_x (svptrue_b32 (), z1, z2, z3)) -+ -+/* -+** ptrue_nmls_2_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmls_2_f32_x_tied1, svfloat32_t, -+ z0 = svnmls_n_f32_x (svptrue_b32 (), z0, z1, 2), -+ z0 = svnmls_x (svptrue_b32 (), z0, z1, 2)) -+ -+/* -+** ptrue_nmls_2_f32_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmls_2_f32_x_tied2, svfloat32_t, -+ z0 = svnmls_n_f32_x (svptrue_b32 (), z1, z0, 2), -+ z0 = svnmls_x (svptrue_b32 (), z1, z0, 2)) -+ -+/* -+** ptrue_nmls_2_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmls_2_f32_x_untied, svfloat32_t, -+ z0 = svnmls_n_f32_x (svptrue_b32 (), z1, z2, 2), -+ z0 = svnmls_x (svptrue_b32 (), z1, z2, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmls_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmls_f64.c -new file mode 100644 -index 000000000..994c2a74e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmls_f64.c -@@ -0,0 +1,398 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** nmls_f64_m_tied1: -+** fnmls z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_f64_m_tied1, svfloat64_t, -+ z0 = svnmls_f64_m (p0, z0, z1, z2), -+ z0 = svnmls_m (p0, z0, z1, z2)) -+ -+/* -+** nmls_f64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fnmls z0\.d, p0/m, \1, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_f64_m_tied2, svfloat64_t, -+ z0 = svnmls_f64_m (p0, z1, z0, z2), -+ z0 = svnmls_m (p0, z1, z0, z2)) -+ -+/* -+** nmls_f64_m_tied3: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fnmls z0\.d, p0/m, z2\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_f64_m_tied3, svfloat64_t, -+ z0 = svnmls_f64_m (p0, z1, z2, z0), -+ z0 = svnmls_m (p0, z1, z2, z0)) -+ -+/* -+** nmls_f64_m_untied: -+** movprfx z0, z1 -+** fnmls z0\.d, p0/m, z2\.d, z3\.d -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_f64_m_untied, svfloat64_t, -+ z0 = svnmls_f64_m (p0, z1, z2, z3), -+ z0 = svnmls_m (p0, z1, z2, z3)) -+ -+/* -+** nmls_d4_f64_m_tied1: -+** mov (z[0-9]+\.d), d4 -+** fnmls z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmls_d4_f64_m_tied1, svfloat64_t, double, -+ z0 = svnmls_n_f64_m (p0, z0, z1, d4), -+ z0 = svnmls_m (p0, z0, z1, d4)) -+ -+/* -+** nmls_d4_f64_m_untied: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0, z1 -+** fnmls z0\.d, p0/m, z2\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmls_d4_f64_m_untied, svfloat64_t, double, -+ z0 = svnmls_n_f64_m (p0, z1, z2, d4), -+ z0 = svnmls_m (p0, z1, z2, d4)) -+ -+/* -+** nmls_2_f64_m_tied1: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** fnmls z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_2_f64_m_tied1, svfloat64_t, -+ z0 = svnmls_n_f64_m (p0, z0, z1, 2), -+ z0 = svnmls_m (p0, z0, z1, 2)) -+ -+/* -+** nmls_2_f64_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** movprfx z0, z1 -+** fnmls z0\.d, p0/m, z2\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_2_f64_m_untied, svfloat64_t, -+ z0 = svnmls_n_f64_m (p0, z1, z2, 2), -+ z0 = svnmls_m (p0, z1, z2, 2)) -+ -+/* -+** nmls_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fnmls z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_f64_z_tied1, svfloat64_t, -+ z0 = svnmls_f64_z (p0, z0, z1, z2), -+ z0 = svnmls_z (p0, z0, z1, z2)) -+ -+/* -+** nmls_f64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** fnmsb z0\.d, p0/m, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_f64_z_tied2, svfloat64_t, -+ z0 = svnmls_f64_z (p0, z1, z0, z2), -+ z0 = svnmls_z (p0, z1, z0, z2)) -+ -+/* -+** nmls_f64_z_tied3: -+** movprfx z0\.d, p0/z, z0\.d -+** fnmsb z0\.d, p0/m, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_f64_z_tied3, svfloat64_t, -+ z0 = svnmls_f64_z (p0, z1, z2, z0), -+ z0 = svnmls_z (p0, z1, z2, z0)) -+ -+/* -+** nmls_f64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fnmls z0\.d, p0/m, z2\.d, z3\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** fnmsb z0\.d, p0/m, z3\.d, z1\.d -+** | -+** movprfx z0\.d, p0/z, z3\.d -+** fnmsb z0\.d, p0/m, z2\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_f64_z_untied, svfloat64_t, -+ z0 = svnmls_f64_z (p0, z1, z2, z3), -+ z0 = svnmls_z (p0, z1, z2, z3)) -+ -+/* -+** nmls_d4_f64_z_tied1: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0\.d, p0/z, z0\.d -+** fnmls z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmls_d4_f64_z_tied1, svfloat64_t, double, -+ z0 = svnmls_n_f64_z (p0, z0, z1, d4), -+ z0 = svnmls_z (p0, z0, z1, d4)) -+ -+/* -+** nmls_d4_f64_z_tied2: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0\.d, p0/z, z0\.d -+** fnmsb z0\.d, p0/m, \1, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZD (nmls_d4_f64_z_tied2, svfloat64_t, double, -+ z0 = svnmls_n_f64_z (p0, z1, z0, d4), -+ z0 = svnmls_z (p0, z1, z0, d4)) -+ -+/* -+** nmls_d4_f64_z_untied: -+** mov (z[0-9]+\.d), d4 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fnmls z0\.d, p0/m, z2\.d, \1 -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** fnmsb z0\.d, p0/m, \1, z1\.d -+** | -+** movprfx z0\.d, p0/z, \1 -+** fnmsb z0\.d, p0/m, z2\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (nmls_d4_f64_z_untied, svfloat64_t, double, -+ z0 = svnmls_n_f64_z (p0, z1, z2, d4), -+ z0 = svnmls_z (p0, z1, z2, d4)) -+ -+/* -+** nmls_2_f64_z_tied1: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** movprfx z0\.d, p0/z, z0\.d -+** fnmls z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_2_f64_z_tied1, svfloat64_t, -+ z0 = svnmls_n_f64_z (p0, z0, z1, 2), -+ z0 = svnmls_z (p0, z0, z1, 2)) -+ -+/* -+** nmls_2_f64_z_tied2: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** movprfx z0\.d, p0/z, z0\.d -+** fnmsb z0\.d, p0/m, \1, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_2_f64_z_tied2, svfloat64_t, -+ z0 = svnmls_n_f64_z (p0, z1, z0, 2), -+ z0 = svnmls_z (p0, z1, z0, 2)) -+ -+/* -+** nmls_2_f64_z_untied: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fnmls z0\.d, p0/m, z2\.d, \1 -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** fnmsb z0\.d, p0/m, \1, z1\.d -+** | -+** movprfx z0\.d, p0/z, \1 -+** fnmsb z0\.d, p0/m, z2\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_2_f64_z_untied, svfloat64_t, -+ z0 = svnmls_n_f64_z (p0, z1, z2, 2), -+ z0 = svnmls_z (p0, z1, z2, 2)) -+ -+/* -+** nmls_f64_x_tied1: -+** fnmls z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_f64_x_tied1, svfloat64_t, -+ z0 = svnmls_f64_x (p0, z0, z1, z2), -+ z0 = svnmls_x (p0, z0, z1, z2)) -+ -+/* -+** nmls_f64_x_tied2: -+** fnmsb z0\.d, p0/m, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_f64_x_tied2, svfloat64_t, -+ z0 = svnmls_f64_x (p0, z1, z0, z2), -+ z0 = svnmls_x (p0, z1, z0, z2)) -+ -+/* -+** nmls_f64_x_tied3: -+** fnmsb z0\.d, p0/m, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_f64_x_tied3, svfloat64_t, -+ z0 = svnmls_f64_x (p0, z1, z2, z0), -+ z0 = svnmls_x (p0, z1, z2, z0)) -+ -+/* -+** nmls_f64_x_untied: -+** ( -+** movprfx z0, z1 -+** fnmls z0\.d, p0/m, z2\.d, z3\.d -+** | -+** movprfx z0, z2 -+** fnmsb z0\.d, p0/m, z3\.d, z1\.d -+** | -+** movprfx z0, z3 -+** fnmsb z0\.d, p0/m, z2\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_f64_x_untied, svfloat64_t, -+ z0 = svnmls_f64_x (p0, z1, z2, z3), -+ z0 = svnmls_x (p0, z1, z2, z3)) -+ -+/* -+** nmls_d4_f64_x_tied1: -+** mov (z[0-9]+\.d), d4 -+** fnmls z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmls_d4_f64_x_tied1, svfloat64_t, double, -+ z0 = svnmls_n_f64_x (p0, z0, z1, d4), -+ z0 = svnmls_x (p0, z0, z1, d4)) -+ -+/* -+** nmls_d4_f64_x_tied2: -+** mov (z[0-9]+\.d), d4 -+** fnmsb z0\.d, p0/m, \1, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZD (nmls_d4_f64_x_tied2, svfloat64_t, double, -+ z0 = svnmls_n_f64_x (p0, z1, z0, d4), -+ z0 = svnmls_x (p0, z1, z0, d4)) -+ -+/* -+** nmls_d4_f64_x_untied: { xfail *-*-* } -+** mov z0\.d, d4 -+** fnmsb z0\.d, p0/m, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZD (nmls_d4_f64_x_untied, svfloat64_t, double, -+ z0 = svnmls_n_f64_x (p0, z1, z2, d4), -+ z0 = svnmls_x (p0, z1, z2, d4)) -+ -+/* -+** nmls_2_f64_x_tied1: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** fnmls z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_2_f64_x_tied1, svfloat64_t, -+ z0 = svnmls_n_f64_x (p0, z0, z1, 2), -+ z0 = svnmls_x (p0, z0, z1, 2)) -+ -+/* -+** nmls_2_f64_x_tied2: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** fnmsb z0\.d, p0/m, \1, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_2_f64_x_tied2, svfloat64_t, -+ z0 = svnmls_n_f64_x (p0, z1, z0, 2), -+ z0 = svnmls_x (p0, z1, z0, 2)) -+ -+/* -+** nmls_2_f64_x_untied: -+** fmov z0\.d, #2\.0(?:e\+0)? -+** fnmsb z0\.d, p0/m, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (nmls_2_f64_x_untied, svfloat64_t, -+ z0 = svnmls_n_f64_x (p0, z1, z2, 2), -+ z0 = svnmls_x (p0, z1, z2, 2)) -+ -+/* -+** ptrue_nmls_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmls_f64_x_tied1, svfloat64_t, -+ z0 = svnmls_f64_x (svptrue_b64 (), z0, z1, z2), -+ z0 = svnmls_x (svptrue_b64 (), z0, z1, z2)) -+ -+/* -+** ptrue_nmls_f64_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmls_f64_x_tied2, svfloat64_t, -+ z0 = svnmls_f64_x (svptrue_b64 (), z1, z0, z2), -+ z0 = svnmls_x (svptrue_b64 (), z1, z0, z2)) -+ -+/* -+** ptrue_nmls_f64_x_tied3: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmls_f64_x_tied3, svfloat64_t, -+ z0 = svnmls_f64_x (svptrue_b64 (), z1, z2, z0), -+ z0 = svnmls_x (svptrue_b64 (), z1, z2, z0)) -+ -+/* -+** ptrue_nmls_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmls_f64_x_untied, svfloat64_t, -+ z0 = svnmls_f64_x (svptrue_b64 (), z1, z2, z3), -+ z0 = svnmls_x (svptrue_b64 (), z1, z2, z3)) -+ -+/* -+** ptrue_nmls_2_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmls_2_f64_x_tied1, svfloat64_t, -+ z0 = svnmls_n_f64_x (svptrue_b64 (), z0, z1, 2), -+ z0 = svnmls_x (svptrue_b64 (), z0, z1, 2)) -+ -+/* -+** ptrue_nmls_2_f64_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmls_2_f64_x_tied2, svfloat64_t, -+ z0 = svnmls_n_f64_x (svptrue_b64 (), z1, z0, 2), -+ z0 = svnmls_x (svptrue_b64 (), z1, z0, 2)) -+ -+/* -+** ptrue_nmls_2_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmls_2_f64_x_untied, svfloat64_t, -+ z0 = svnmls_n_f64_x (svptrue_b64 (), z1, z2, 2), -+ z0 = svnmls_x (svptrue_b64 (), z1, z2, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmsb_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmsb_f16.c -new file mode 100644 -index 000000000..c11401485 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmsb_f16.c -@@ -0,0 +1,398 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** nmsb_f16_m_tied1: -+** fnmsb z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_f16_m_tied1, svfloat16_t, -+ z0 = svnmsb_f16_m (p0, z0, z1, z2), -+ z0 = svnmsb_m (p0, z0, z1, z2)) -+ -+/* -+** nmsb_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fnmsb z0\.h, p0/m, \1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_f16_m_tied2, svfloat16_t, -+ z0 = svnmsb_f16_m (p0, z1, z0, z2), -+ z0 = svnmsb_m (p0, z1, z0, z2)) -+ -+/* -+** nmsb_f16_m_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fnmsb z0\.h, p0/m, z2\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_f16_m_tied3, svfloat16_t, -+ z0 = svnmsb_f16_m (p0, z1, z2, z0), -+ z0 = svnmsb_m (p0, z1, z2, z0)) -+ -+/* -+** nmsb_f16_m_untied: -+** movprfx z0, z1 -+** fnmsb z0\.h, p0/m, z2\.h, z3\.h -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_f16_m_untied, svfloat16_t, -+ z0 = svnmsb_f16_m (p0, z1, z2, z3), -+ z0 = svnmsb_m (p0, z1, z2, z3)) -+ -+/* -+** nmsb_h4_f16_m_tied1: -+** mov (z[0-9]+\.h), h4 -+** fnmsb z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmsb_h4_f16_m_tied1, svfloat16_t, __fp16, -+ z0 = svnmsb_n_f16_m (p0, z0, z1, d4), -+ z0 = svnmsb_m (p0, z0, z1, d4)) -+ -+/* -+** nmsb_h4_f16_m_untied: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0, z1 -+** fnmsb z0\.h, p0/m, z2\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmsb_h4_f16_m_untied, svfloat16_t, __fp16, -+ z0 = svnmsb_n_f16_m (p0, z1, z2, d4), -+ z0 = svnmsb_m (p0, z1, z2, d4)) -+ -+/* -+** nmsb_2_f16_m_tied1: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** fnmsb z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_2_f16_m_tied1, svfloat16_t, -+ z0 = svnmsb_n_f16_m (p0, z0, z1, 2), -+ z0 = svnmsb_m (p0, z0, z1, 2)) -+ -+/* -+** nmsb_2_f16_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** movprfx z0, z1 -+** fnmsb z0\.h, p0/m, z2\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_2_f16_m_untied, svfloat16_t, -+ z0 = svnmsb_n_f16_m (p0, z1, z2, 2), -+ z0 = svnmsb_m (p0, z1, z2, 2)) -+ -+/* -+** nmsb_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fnmsb z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_f16_z_tied1, svfloat16_t, -+ z0 = svnmsb_f16_z (p0, z0, z1, z2), -+ z0 = svnmsb_z (p0, z0, z1, z2)) -+ -+/* -+** nmsb_f16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** fnmsb z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_f16_z_tied2, svfloat16_t, -+ z0 = svnmsb_f16_z (p0, z1, z0, z2), -+ z0 = svnmsb_z (p0, z1, z0, z2)) -+ -+/* -+** nmsb_f16_z_tied3: -+** movprfx z0\.h, p0/z, z0\.h -+** fnmls z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_f16_z_tied3, svfloat16_t, -+ z0 = svnmsb_f16_z (p0, z1, z2, z0), -+ z0 = svnmsb_z (p0, z1, z2, z0)) -+ -+/* -+** nmsb_f16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fnmsb z0\.h, p0/m, z2\.h, z3\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** fnmsb z0\.h, p0/m, z1\.h, z3\.h -+** | -+** movprfx z0\.h, p0/z, z3\.h -+** fnmls z0\.h, p0/m, z1\.h, z2\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_f16_z_untied, svfloat16_t, -+ z0 = svnmsb_f16_z (p0, z1, z2, z3), -+ z0 = svnmsb_z (p0, z1, z2, z3)) -+ -+/* -+** nmsb_h4_f16_z_tied1: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0\.h, p0/z, z0\.h -+** fnmsb z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmsb_h4_f16_z_tied1, svfloat16_t, __fp16, -+ z0 = svnmsb_n_f16_z (p0, z0, z1, d4), -+ z0 = svnmsb_z (p0, z0, z1, d4)) -+ -+/* -+** nmsb_h4_f16_z_tied2: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0\.h, p0/z, z0\.h -+** fnmsb z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmsb_h4_f16_z_tied2, svfloat16_t, __fp16, -+ z0 = svnmsb_n_f16_z (p0, z1, z0, d4), -+ z0 = svnmsb_z (p0, z1, z0, d4)) -+ -+/* -+** nmsb_h4_f16_z_untied: -+** mov (z[0-9]+\.h), h4 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fnmsb z0\.h, p0/m, z2\.h, \1 -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** fnmsb z0\.h, p0/m, z1\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** fnmls z0\.h, p0/m, z1\.h, z2\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (nmsb_h4_f16_z_untied, svfloat16_t, __fp16, -+ z0 = svnmsb_n_f16_z (p0, z1, z2, d4), -+ z0 = svnmsb_z (p0, z1, z2, d4)) -+ -+/* -+** nmsb_2_f16_z_tied1: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** movprfx z0\.h, p0/z, z0\.h -+** fnmsb z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_2_f16_z_tied1, svfloat16_t, -+ z0 = svnmsb_n_f16_z (p0, z0, z1, 2), -+ z0 = svnmsb_z (p0, z0, z1, 2)) -+ -+/* -+** nmsb_2_f16_z_tied2: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** movprfx z0\.h, p0/z, z0\.h -+** fnmsb z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_2_f16_z_tied2, svfloat16_t, -+ z0 = svnmsb_n_f16_z (p0, z1, z0, 2), -+ z0 = svnmsb_z (p0, z1, z0, 2)) -+ -+/* -+** nmsb_2_f16_z_untied: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fnmsb z0\.h, p0/m, z2\.h, \1 -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** fnmsb z0\.h, p0/m, z1\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** fnmls z0\.h, p0/m, z1\.h, z2\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_2_f16_z_untied, svfloat16_t, -+ z0 = svnmsb_n_f16_z (p0, z1, z2, 2), -+ z0 = svnmsb_z (p0, z1, z2, 2)) -+ -+/* -+** nmsb_f16_x_tied1: -+** fnmsb z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_f16_x_tied1, svfloat16_t, -+ z0 = svnmsb_f16_x (p0, z0, z1, z2), -+ z0 = svnmsb_x (p0, z0, z1, z2)) -+ -+/* -+** nmsb_f16_x_tied2: -+** fnmsb z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_f16_x_tied2, svfloat16_t, -+ z0 = svnmsb_f16_x (p0, z1, z0, z2), -+ z0 = svnmsb_x (p0, z1, z0, z2)) -+ -+/* -+** nmsb_f16_x_tied3: -+** fnmls z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_f16_x_tied3, svfloat16_t, -+ z0 = svnmsb_f16_x (p0, z1, z2, z0), -+ z0 = svnmsb_x (p0, z1, z2, z0)) -+ -+/* -+** nmsb_f16_x_untied: -+** ( -+** movprfx z0, z1 -+** fnmsb z0\.h, p0/m, z2\.h, z3\.h -+** | -+** movprfx z0, z2 -+** fnmsb z0\.h, p0/m, z1\.h, z3\.h -+** | -+** movprfx z0, z3 -+** fnmls z0\.h, p0/m, z1\.h, z2\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_f16_x_untied, svfloat16_t, -+ z0 = svnmsb_f16_x (p0, z1, z2, z3), -+ z0 = svnmsb_x (p0, z1, z2, z3)) -+ -+/* -+** nmsb_h4_f16_x_tied1: -+** mov (z[0-9]+\.h), h4 -+** fnmsb z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmsb_h4_f16_x_tied1, svfloat16_t, __fp16, -+ z0 = svnmsb_n_f16_x (p0, z0, z1, d4), -+ z0 = svnmsb_x (p0, z0, z1, d4)) -+ -+/* -+** nmsb_h4_f16_x_tied2: -+** mov (z[0-9]+\.h), h4 -+** fnmsb z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmsb_h4_f16_x_tied2, svfloat16_t, __fp16, -+ z0 = svnmsb_n_f16_x (p0, z1, z0, d4), -+ z0 = svnmsb_x (p0, z1, z0, d4)) -+ -+/* -+** nmsb_h4_f16_x_untied: { xfail *-*-* } -+** mov z0\.h, h4 -+** fnmls z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_ZD (nmsb_h4_f16_x_untied, svfloat16_t, __fp16, -+ z0 = svnmsb_n_f16_x (p0, z1, z2, d4), -+ z0 = svnmsb_x (p0, z1, z2, d4)) -+ -+/* -+** nmsb_2_f16_x_tied1: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** fnmsb z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_2_f16_x_tied1, svfloat16_t, -+ z0 = svnmsb_n_f16_x (p0, z0, z1, 2), -+ z0 = svnmsb_x (p0, z0, z1, 2)) -+ -+/* -+** nmsb_2_f16_x_tied2: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** fnmsb z0\.h, p0/m, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_2_f16_x_tied2, svfloat16_t, -+ z0 = svnmsb_n_f16_x (p0, z1, z0, 2), -+ z0 = svnmsb_x (p0, z1, z0, 2)) -+ -+/* -+** nmsb_2_f16_x_untied: -+** fmov z0\.h, #2\.0(?:e\+0)? -+** fnmls z0\.h, p0/m, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_2_f16_x_untied, svfloat16_t, -+ z0 = svnmsb_n_f16_x (p0, z1, z2, 2), -+ z0 = svnmsb_x (p0, z1, z2, 2)) -+ -+/* -+** ptrue_nmsb_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmsb_f16_x_tied1, svfloat16_t, -+ z0 = svnmsb_f16_x (svptrue_b16 (), z0, z1, z2), -+ z0 = svnmsb_x (svptrue_b16 (), z0, z1, z2)) -+ -+/* -+** ptrue_nmsb_f16_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmsb_f16_x_tied2, svfloat16_t, -+ z0 = svnmsb_f16_x (svptrue_b16 (), z1, z0, z2), -+ z0 = svnmsb_x (svptrue_b16 (), z1, z0, z2)) -+ -+/* -+** ptrue_nmsb_f16_x_tied3: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmsb_f16_x_tied3, svfloat16_t, -+ z0 = svnmsb_f16_x (svptrue_b16 (), z1, z2, z0), -+ z0 = svnmsb_x (svptrue_b16 (), z1, z2, z0)) -+ -+/* -+** ptrue_nmsb_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmsb_f16_x_untied, svfloat16_t, -+ z0 = svnmsb_f16_x (svptrue_b16 (), z1, z2, z3), -+ z0 = svnmsb_x (svptrue_b16 (), z1, z2, z3)) -+ -+/* -+** ptrue_nmsb_2_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmsb_2_f16_x_tied1, svfloat16_t, -+ z0 = svnmsb_n_f16_x (svptrue_b16 (), z0, z1, 2), -+ z0 = svnmsb_x (svptrue_b16 (), z0, z1, 2)) -+ -+/* -+** ptrue_nmsb_2_f16_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmsb_2_f16_x_tied2, svfloat16_t, -+ z0 = svnmsb_n_f16_x (svptrue_b16 (), z1, z0, 2), -+ z0 = svnmsb_x (svptrue_b16 (), z1, z0, 2)) -+ -+/* -+** ptrue_nmsb_2_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmsb_2_f16_x_untied, svfloat16_t, -+ z0 = svnmsb_n_f16_x (svptrue_b16 (), z1, z2, 2), -+ z0 = svnmsb_x (svptrue_b16 (), z1, z2, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmsb_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmsb_f32.c -new file mode 100644 -index 000000000..c2204e040 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmsb_f32.c -@@ -0,0 +1,398 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** nmsb_f32_m_tied1: -+** fnmsb z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_f32_m_tied1, svfloat32_t, -+ z0 = svnmsb_f32_m (p0, z0, z1, z2), -+ z0 = svnmsb_m (p0, z0, z1, z2)) -+ -+/* -+** nmsb_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fnmsb z0\.s, p0/m, \1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_f32_m_tied2, svfloat32_t, -+ z0 = svnmsb_f32_m (p0, z1, z0, z2), -+ z0 = svnmsb_m (p0, z1, z0, z2)) -+ -+/* -+** nmsb_f32_m_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fnmsb z0\.s, p0/m, z2\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_f32_m_tied3, svfloat32_t, -+ z0 = svnmsb_f32_m (p0, z1, z2, z0), -+ z0 = svnmsb_m (p0, z1, z2, z0)) -+ -+/* -+** nmsb_f32_m_untied: -+** movprfx z0, z1 -+** fnmsb z0\.s, p0/m, z2\.s, z3\.s -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_f32_m_untied, svfloat32_t, -+ z0 = svnmsb_f32_m (p0, z1, z2, z3), -+ z0 = svnmsb_m (p0, z1, z2, z3)) -+ -+/* -+** nmsb_s4_f32_m_tied1: -+** mov (z[0-9]+\.s), s4 -+** fnmsb z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmsb_s4_f32_m_tied1, svfloat32_t, float, -+ z0 = svnmsb_n_f32_m (p0, z0, z1, d4), -+ z0 = svnmsb_m (p0, z0, z1, d4)) -+ -+/* -+** nmsb_s4_f32_m_untied: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0, z1 -+** fnmsb z0\.s, p0/m, z2\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmsb_s4_f32_m_untied, svfloat32_t, float, -+ z0 = svnmsb_n_f32_m (p0, z1, z2, d4), -+ z0 = svnmsb_m (p0, z1, z2, d4)) -+ -+/* -+** nmsb_2_f32_m_tied1: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** fnmsb z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_2_f32_m_tied1, svfloat32_t, -+ z0 = svnmsb_n_f32_m (p0, z0, z1, 2), -+ z0 = svnmsb_m (p0, z0, z1, 2)) -+ -+/* -+** nmsb_2_f32_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** movprfx z0, z1 -+** fnmsb z0\.s, p0/m, z2\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_2_f32_m_untied, svfloat32_t, -+ z0 = svnmsb_n_f32_m (p0, z1, z2, 2), -+ z0 = svnmsb_m (p0, z1, z2, 2)) -+ -+/* -+** nmsb_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fnmsb z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_f32_z_tied1, svfloat32_t, -+ z0 = svnmsb_f32_z (p0, z0, z1, z2), -+ z0 = svnmsb_z (p0, z0, z1, z2)) -+ -+/* -+** nmsb_f32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** fnmsb z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_f32_z_tied2, svfloat32_t, -+ z0 = svnmsb_f32_z (p0, z1, z0, z2), -+ z0 = svnmsb_z (p0, z1, z0, z2)) -+ -+/* -+** nmsb_f32_z_tied3: -+** movprfx z0\.s, p0/z, z0\.s -+** fnmls z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_f32_z_tied3, svfloat32_t, -+ z0 = svnmsb_f32_z (p0, z1, z2, z0), -+ z0 = svnmsb_z (p0, z1, z2, z0)) -+ -+/* -+** nmsb_f32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fnmsb z0\.s, p0/m, z2\.s, z3\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** fnmsb z0\.s, p0/m, z1\.s, z3\.s -+** | -+** movprfx z0\.s, p0/z, z3\.s -+** fnmls z0\.s, p0/m, z1\.s, z2\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_f32_z_untied, svfloat32_t, -+ z0 = svnmsb_f32_z (p0, z1, z2, z3), -+ z0 = svnmsb_z (p0, z1, z2, z3)) -+ -+/* -+** nmsb_s4_f32_z_tied1: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0\.s, p0/z, z0\.s -+** fnmsb z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmsb_s4_f32_z_tied1, svfloat32_t, float, -+ z0 = svnmsb_n_f32_z (p0, z0, z1, d4), -+ z0 = svnmsb_z (p0, z0, z1, d4)) -+ -+/* -+** nmsb_s4_f32_z_tied2: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0\.s, p0/z, z0\.s -+** fnmsb z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmsb_s4_f32_z_tied2, svfloat32_t, float, -+ z0 = svnmsb_n_f32_z (p0, z1, z0, d4), -+ z0 = svnmsb_z (p0, z1, z0, d4)) -+ -+/* -+** nmsb_s4_f32_z_untied: -+** mov (z[0-9]+\.s), s4 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fnmsb z0\.s, p0/m, z2\.s, \1 -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** fnmsb z0\.s, p0/m, z1\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** fnmls z0\.s, p0/m, z1\.s, z2\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (nmsb_s4_f32_z_untied, svfloat32_t, float, -+ z0 = svnmsb_n_f32_z (p0, z1, z2, d4), -+ z0 = svnmsb_z (p0, z1, z2, d4)) -+ -+/* -+** nmsb_2_f32_z_tied1: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** movprfx z0\.s, p0/z, z0\.s -+** fnmsb z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_2_f32_z_tied1, svfloat32_t, -+ z0 = svnmsb_n_f32_z (p0, z0, z1, 2), -+ z0 = svnmsb_z (p0, z0, z1, 2)) -+ -+/* -+** nmsb_2_f32_z_tied2: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** movprfx z0\.s, p0/z, z0\.s -+** fnmsb z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_2_f32_z_tied2, svfloat32_t, -+ z0 = svnmsb_n_f32_z (p0, z1, z0, 2), -+ z0 = svnmsb_z (p0, z1, z0, 2)) -+ -+/* -+** nmsb_2_f32_z_untied: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fnmsb z0\.s, p0/m, z2\.s, \1 -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** fnmsb z0\.s, p0/m, z1\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** fnmls z0\.s, p0/m, z1\.s, z2\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_2_f32_z_untied, svfloat32_t, -+ z0 = svnmsb_n_f32_z (p0, z1, z2, 2), -+ z0 = svnmsb_z (p0, z1, z2, 2)) -+ -+/* -+** nmsb_f32_x_tied1: -+** fnmsb z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_f32_x_tied1, svfloat32_t, -+ z0 = svnmsb_f32_x (p0, z0, z1, z2), -+ z0 = svnmsb_x (p0, z0, z1, z2)) -+ -+/* -+** nmsb_f32_x_tied2: -+** fnmsb z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_f32_x_tied2, svfloat32_t, -+ z0 = svnmsb_f32_x (p0, z1, z0, z2), -+ z0 = svnmsb_x (p0, z1, z0, z2)) -+ -+/* -+** nmsb_f32_x_tied3: -+** fnmls z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_f32_x_tied3, svfloat32_t, -+ z0 = svnmsb_f32_x (p0, z1, z2, z0), -+ z0 = svnmsb_x (p0, z1, z2, z0)) -+ -+/* -+** nmsb_f32_x_untied: -+** ( -+** movprfx z0, z1 -+** fnmsb z0\.s, p0/m, z2\.s, z3\.s -+** | -+** movprfx z0, z2 -+** fnmsb z0\.s, p0/m, z1\.s, z3\.s -+** | -+** movprfx z0, z3 -+** fnmls z0\.s, p0/m, z1\.s, z2\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_f32_x_untied, svfloat32_t, -+ z0 = svnmsb_f32_x (p0, z1, z2, z3), -+ z0 = svnmsb_x (p0, z1, z2, z3)) -+ -+/* -+** nmsb_s4_f32_x_tied1: -+** mov (z[0-9]+\.s), s4 -+** fnmsb z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmsb_s4_f32_x_tied1, svfloat32_t, float, -+ z0 = svnmsb_n_f32_x (p0, z0, z1, d4), -+ z0 = svnmsb_x (p0, z0, z1, d4)) -+ -+/* -+** nmsb_s4_f32_x_tied2: -+** mov (z[0-9]+\.s), s4 -+** fnmsb z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmsb_s4_f32_x_tied2, svfloat32_t, float, -+ z0 = svnmsb_n_f32_x (p0, z1, z0, d4), -+ z0 = svnmsb_x (p0, z1, z0, d4)) -+ -+/* -+** nmsb_s4_f32_x_untied: { xfail *-*-* } -+** mov z0\.s, s4 -+** fnmls z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_ZD (nmsb_s4_f32_x_untied, svfloat32_t, float, -+ z0 = svnmsb_n_f32_x (p0, z1, z2, d4), -+ z0 = svnmsb_x (p0, z1, z2, d4)) -+ -+/* -+** nmsb_2_f32_x_tied1: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** fnmsb z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_2_f32_x_tied1, svfloat32_t, -+ z0 = svnmsb_n_f32_x (p0, z0, z1, 2), -+ z0 = svnmsb_x (p0, z0, z1, 2)) -+ -+/* -+** nmsb_2_f32_x_tied2: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** fnmsb z0\.s, p0/m, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_2_f32_x_tied2, svfloat32_t, -+ z0 = svnmsb_n_f32_x (p0, z1, z0, 2), -+ z0 = svnmsb_x (p0, z1, z0, 2)) -+ -+/* -+** nmsb_2_f32_x_untied: -+** fmov z0\.s, #2\.0(?:e\+0)? -+** fnmls z0\.s, p0/m, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_2_f32_x_untied, svfloat32_t, -+ z0 = svnmsb_n_f32_x (p0, z1, z2, 2), -+ z0 = svnmsb_x (p0, z1, z2, 2)) -+ -+/* -+** ptrue_nmsb_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmsb_f32_x_tied1, svfloat32_t, -+ z0 = svnmsb_f32_x (svptrue_b32 (), z0, z1, z2), -+ z0 = svnmsb_x (svptrue_b32 (), z0, z1, z2)) -+ -+/* -+** ptrue_nmsb_f32_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmsb_f32_x_tied2, svfloat32_t, -+ z0 = svnmsb_f32_x (svptrue_b32 (), z1, z0, z2), -+ z0 = svnmsb_x (svptrue_b32 (), z1, z0, z2)) -+ -+/* -+** ptrue_nmsb_f32_x_tied3: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmsb_f32_x_tied3, svfloat32_t, -+ z0 = svnmsb_f32_x (svptrue_b32 (), z1, z2, z0), -+ z0 = svnmsb_x (svptrue_b32 (), z1, z2, z0)) -+ -+/* -+** ptrue_nmsb_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmsb_f32_x_untied, svfloat32_t, -+ z0 = svnmsb_f32_x (svptrue_b32 (), z1, z2, z3), -+ z0 = svnmsb_x (svptrue_b32 (), z1, z2, z3)) -+ -+/* -+** ptrue_nmsb_2_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmsb_2_f32_x_tied1, svfloat32_t, -+ z0 = svnmsb_n_f32_x (svptrue_b32 (), z0, z1, 2), -+ z0 = svnmsb_x (svptrue_b32 (), z0, z1, 2)) -+ -+/* -+** ptrue_nmsb_2_f32_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmsb_2_f32_x_tied2, svfloat32_t, -+ z0 = svnmsb_n_f32_x (svptrue_b32 (), z1, z0, 2), -+ z0 = svnmsb_x (svptrue_b32 (), z1, z0, 2)) -+ -+/* -+** ptrue_nmsb_2_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmsb_2_f32_x_untied, svfloat32_t, -+ z0 = svnmsb_n_f32_x (svptrue_b32 (), z1, z2, 2), -+ z0 = svnmsb_x (svptrue_b32 (), z1, z2, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmsb_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmsb_f64.c -new file mode 100644 -index 000000000..56592d3ae ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nmsb_f64.c -@@ -0,0 +1,398 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** nmsb_f64_m_tied1: -+** fnmsb z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_f64_m_tied1, svfloat64_t, -+ z0 = svnmsb_f64_m (p0, z0, z1, z2), -+ z0 = svnmsb_m (p0, z0, z1, z2)) -+ -+/* -+** nmsb_f64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fnmsb z0\.d, p0/m, \1, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_f64_m_tied2, svfloat64_t, -+ z0 = svnmsb_f64_m (p0, z1, z0, z2), -+ z0 = svnmsb_m (p0, z1, z0, z2)) -+ -+/* -+** nmsb_f64_m_tied3: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fnmsb z0\.d, p0/m, z2\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_f64_m_tied3, svfloat64_t, -+ z0 = svnmsb_f64_m (p0, z1, z2, z0), -+ z0 = svnmsb_m (p0, z1, z2, z0)) -+ -+/* -+** nmsb_f64_m_untied: -+** movprfx z0, z1 -+** fnmsb z0\.d, p0/m, z2\.d, z3\.d -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_f64_m_untied, svfloat64_t, -+ z0 = svnmsb_f64_m (p0, z1, z2, z3), -+ z0 = svnmsb_m (p0, z1, z2, z3)) -+ -+/* -+** nmsb_d4_f64_m_tied1: -+** mov (z[0-9]+\.d), d4 -+** fnmsb z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmsb_d4_f64_m_tied1, svfloat64_t, double, -+ z0 = svnmsb_n_f64_m (p0, z0, z1, d4), -+ z0 = svnmsb_m (p0, z0, z1, d4)) -+ -+/* -+** nmsb_d4_f64_m_untied: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0, z1 -+** fnmsb z0\.d, p0/m, z2\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmsb_d4_f64_m_untied, svfloat64_t, double, -+ z0 = svnmsb_n_f64_m (p0, z1, z2, d4), -+ z0 = svnmsb_m (p0, z1, z2, d4)) -+ -+/* -+** nmsb_2_f64_m_tied1: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** fnmsb z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_2_f64_m_tied1, svfloat64_t, -+ z0 = svnmsb_n_f64_m (p0, z0, z1, 2), -+ z0 = svnmsb_m (p0, z0, z1, 2)) -+ -+/* -+** nmsb_2_f64_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** movprfx z0, z1 -+** fnmsb z0\.d, p0/m, z2\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_2_f64_m_untied, svfloat64_t, -+ z0 = svnmsb_n_f64_m (p0, z1, z2, 2), -+ z0 = svnmsb_m (p0, z1, z2, 2)) -+ -+/* -+** nmsb_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fnmsb z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_f64_z_tied1, svfloat64_t, -+ z0 = svnmsb_f64_z (p0, z0, z1, z2), -+ z0 = svnmsb_z (p0, z0, z1, z2)) -+ -+/* -+** nmsb_f64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** fnmsb z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_f64_z_tied2, svfloat64_t, -+ z0 = svnmsb_f64_z (p0, z1, z0, z2), -+ z0 = svnmsb_z (p0, z1, z0, z2)) -+ -+/* -+** nmsb_f64_z_tied3: -+** movprfx z0\.d, p0/z, z0\.d -+** fnmls z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_f64_z_tied3, svfloat64_t, -+ z0 = svnmsb_f64_z (p0, z1, z2, z0), -+ z0 = svnmsb_z (p0, z1, z2, z0)) -+ -+/* -+** nmsb_f64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fnmsb z0\.d, p0/m, z2\.d, z3\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** fnmsb z0\.d, p0/m, z1\.d, z3\.d -+** | -+** movprfx z0\.d, p0/z, z3\.d -+** fnmls z0\.d, p0/m, z1\.d, z2\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_f64_z_untied, svfloat64_t, -+ z0 = svnmsb_f64_z (p0, z1, z2, z3), -+ z0 = svnmsb_z (p0, z1, z2, z3)) -+ -+/* -+** nmsb_d4_f64_z_tied1: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0\.d, p0/z, z0\.d -+** fnmsb z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmsb_d4_f64_z_tied1, svfloat64_t, double, -+ z0 = svnmsb_n_f64_z (p0, z0, z1, d4), -+ z0 = svnmsb_z (p0, z0, z1, d4)) -+ -+/* -+** nmsb_d4_f64_z_tied2: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0\.d, p0/z, z0\.d -+** fnmsb z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmsb_d4_f64_z_tied2, svfloat64_t, double, -+ z0 = svnmsb_n_f64_z (p0, z1, z0, d4), -+ z0 = svnmsb_z (p0, z1, z0, d4)) -+ -+/* -+** nmsb_d4_f64_z_untied: -+** mov (z[0-9]+\.d), d4 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fnmsb z0\.d, p0/m, z2\.d, \1 -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** fnmsb z0\.d, p0/m, z1\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** fnmls z0\.d, p0/m, z1\.d, z2\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (nmsb_d4_f64_z_untied, svfloat64_t, double, -+ z0 = svnmsb_n_f64_z (p0, z1, z2, d4), -+ z0 = svnmsb_z (p0, z1, z2, d4)) -+ -+/* -+** nmsb_2_f64_z_tied1: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** movprfx z0\.d, p0/z, z0\.d -+** fnmsb z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_2_f64_z_tied1, svfloat64_t, -+ z0 = svnmsb_n_f64_z (p0, z0, z1, 2), -+ z0 = svnmsb_z (p0, z0, z1, 2)) -+ -+/* -+** nmsb_2_f64_z_tied2: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** movprfx z0\.d, p0/z, z0\.d -+** fnmsb z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_2_f64_z_tied2, svfloat64_t, -+ z0 = svnmsb_n_f64_z (p0, z1, z0, 2), -+ z0 = svnmsb_z (p0, z1, z0, 2)) -+ -+/* -+** nmsb_2_f64_z_untied: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fnmsb z0\.d, p0/m, z2\.d, \1 -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** fnmsb z0\.d, p0/m, z1\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** fnmls z0\.d, p0/m, z1\.d, z2\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_2_f64_z_untied, svfloat64_t, -+ z0 = svnmsb_n_f64_z (p0, z1, z2, 2), -+ z0 = svnmsb_z (p0, z1, z2, 2)) -+ -+/* -+** nmsb_f64_x_tied1: -+** fnmsb z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_f64_x_tied1, svfloat64_t, -+ z0 = svnmsb_f64_x (p0, z0, z1, z2), -+ z0 = svnmsb_x (p0, z0, z1, z2)) -+ -+/* -+** nmsb_f64_x_tied2: -+** fnmsb z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_f64_x_tied2, svfloat64_t, -+ z0 = svnmsb_f64_x (p0, z1, z0, z2), -+ z0 = svnmsb_x (p0, z1, z0, z2)) -+ -+/* -+** nmsb_f64_x_tied3: -+** fnmls z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_f64_x_tied3, svfloat64_t, -+ z0 = svnmsb_f64_x (p0, z1, z2, z0), -+ z0 = svnmsb_x (p0, z1, z2, z0)) -+ -+/* -+** nmsb_f64_x_untied: -+** ( -+** movprfx z0, z1 -+** fnmsb z0\.d, p0/m, z2\.d, z3\.d -+** | -+** movprfx z0, z2 -+** fnmsb z0\.d, p0/m, z1\.d, z3\.d -+** | -+** movprfx z0, z3 -+** fnmls z0\.d, p0/m, z1\.d, z2\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_f64_x_untied, svfloat64_t, -+ z0 = svnmsb_f64_x (p0, z1, z2, z3), -+ z0 = svnmsb_x (p0, z1, z2, z3)) -+ -+/* -+** nmsb_d4_f64_x_tied1: -+** mov (z[0-9]+\.d), d4 -+** fnmsb z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmsb_d4_f64_x_tied1, svfloat64_t, double, -+ z0 = svnmsb_n_f64_x (p0, z0, z1, d4), -+ z0 = svnmsb_x (p0, z0, z1, d4)) -+ -+/* -+** nmsb_d4_f64_x_tied2: -+** mov (z[0-9]+\.d), d4 -+** fnmsb z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (nmsb_d4_f64_x_tied2, svfloat64_t, double, -+ z0 = svnmsb_n_f64_x (p0, z1, z0, d4), -+ z0 = svnmsb_x (p0, z1, z0, d4)) -+ -+/* -+** nmsb_d4_f64_x_untied: { xfail *-*-* } -+** mov z0\.d, d4 -+** fnmls z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_ZD (nmsb_d4_f64_x_untied, svfloat64_t, double, -+ z0 = svnmsb_n_f64_x (p0, z1, z2, d4), -+ z0 = svnmsb_x (p0, z1, z2, d4)) -+ -+/* -+** nmsb_2_f64_x_tied1: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** fnmsb z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_2_f64_x_tied1, svfloat64_t, -+ z0 = svnmsb_n_f64_x (p0, z0, z1, 2), -+ z0 = svnmsb_x (p0, z0, z1, 2)) -+ -+/* -+** nmsb_2_f64_x_tied2: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** fnmsb z0\.d, p0/m, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_2_f64_x_tied2, svfloat64_t, -+ z0 = svnmsb_n_f64_x (p0, z1, z0, 2), -+ z0 = svnmsb_x (p0, z1, z0, 2)) -+ -+/* -+** nmsb_2_f64_x_untied: -+** fmov z0\.d, #2\.0(?:e\+0)? -+** fnmls z0\.d, p0/m, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (nmsb_2_f64_x_untied, svfloat64_t, -+ z0 = svnmsb_n_f64_x (p0, z1, z2, 2), -+ z0 = svnmsb_x (p0, z1, z2, 2)) -+ -+/* -+** ptrue_nmsb_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmsb_f64_x_tied1, svfloat64_t, -+ z0 = svnmsb_f64_x (svptrue_b64 (), z0, z1, z2), -+ z0 = svnmsb_x (svptrue_b64 (), z0, z1, z2)) -+ -+/* -+** ptrue_nmsb_f64_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmsb_f64_x_tied2, svfloat64_t, -+ z0 = svnmsb_f64_x (svptrue_b64 (), z1, z0, z2), -+ z0 = svnmsb_x (svptrue_b64 (), z1, z0, z2)) -+ -+/* -+** ptrue_nmsb_f64_x_tied3: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmsb_f64_x_tied3, svfloat64_t, -+ z0 = svnmsb_f64_x (svptrue_b64 (), z1, z2, z0), -+ z0 = svnmsb_x (svptrue_b64 (), z1, z2, z0)) -+ -+/* -+** ptrue_nmsb_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmsb_f64_x_untied, svfloat64_t, -+ z0 = svnmsb_f64_x (svptrue_b64 (), z1, z2, z3), -+ z0 = svnmsb_x (svptrue_b64 (), z1, z2, z3)) -+ -+/* -+** ptrue_nmsb_2_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmsb_2_f64_x_tied1, svfloat64_t, -+ z0 = svnmsb_n_f64_x (svptrue_b64 (), z0, z1, 2), -+ z0 = svnmsb_x (svptrue_b64 (), z0, z1, 2)) -+ -+/* -+** ptrue_nmsb_2_f64_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmsb_2_f64_x_tied2, svfloat64_t, -+ z0 = svnmsb_n_f64_x (svptrue_b64 (), z1, z0, 2), -+ z0 = svnmsb_x (svptrue_b64 (), z1, z0, 2)) -+ -+/* -+** ptrue_nmsb_2_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_nmsb_2_f64_x_untied, svfloat64_t, -+ z0 = svnmsb_n_f64_x (svptrue_b64 (), z1, z2, 2), -+ z0 = svnmsb_x (svptrue_b64 (), z1, z2, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nor_b.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nor_b.c -new file mode 100644 -index 000000000..997e34537 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/nor_b.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** nor_b_z_tied1: -+** nor p0\.b, p3/z, p0\.b, p1\.b -+** ret -+*/ -+TEST_UNIFORM_P (nor_b_z_tied1, -+ p0 = svnor_b_z (p3, p0, p1), -+ p0 = svnor_z (p3, p0, p1)) -+ -+/* -+** nor_b_z_tied2: -+** nor p0\.b, p3/z, p1\.b, p0\.b -+** ret -+*/ -+TEST_UNIFORM_P (nor_b_z_tied2, -+ p0 = svnor_b_z (p3, p1, p0), -+ p0 = svnor_z (p3, p1, p0)) -+ -+/* -+** nor_b_z_untied: -+** nor p0\.b, p3/z, p1\.b, p2\.b -+** ret -+*/ -+TEST_UNIFORM_P (nor_b_z_untied, -+ p0 = svnor_b_z (p3, p1, p2), -+ p0 = svnor_z (p3, p1, p2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_b.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_b.c -new file mode 100644 -index 000000000..23a3a6aae ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_b.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** not_b_z_tied1: -+** not p0\.b, p3/z, p0\.b -+** ret -+*/ -+TEST_UNIFORM_P (not_b_z_tied1, -+ p0 = svnot_b_z (p3, p0), -+ p0 = svnot_z (p3, p0)) -+ -+/* -+** not_b_z_untied: -+** not p0\.b, p3/z, p1\.b -+** ret -+*/ -+TEST_UNIFORM_P (not_b_z_untied, -+ p0 = svnot_b_z (p3, p1), -+ p0 = svnot_z (p3, p1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_s16.c -new file mode 100644 -index 000000000..bacd6b12c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_s16.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** not_s16_m_tied12: -+** not z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (not_s16_m_tied12, svint16_t, -+ z0 = svnot_s16_m (z0, p0, z0), -+ z0 = svnot_m (z0, p0, z0)) -+ -+/* -+** not_s16_m_tied1: -+** not z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (not_s16_m_tied1, svint16_t, -+ z0 = svnot_s16_m (z0, p0, z1), -+ z0 = svnot_m (z0, p0, z1)) -+ -+/* -+** not_s16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** not z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (not_s16_m_tied2, svint16_t, -+ z0 = svnot_s16_m (z1, p0, z0), -+ z0 = svnot_m (z1, p0, z0)) -+ -+/* -+** not_s16_m_untied: -+** movprfx z0, z2 -+** not z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (not_s16_m_untied, svint16_t, -+ z0 = svnot_s16_m (z2, p0, z1), -+ z0 = svnot_m (z2, p0, z1)) -+ -+/* -+** not_s16_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.h, p0/z, \1\.h -+** not z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (not_s16_z_tied1, svint16_t, -+ z0 = svnot_s16_z (p0, z0), -+ z0 = svnot_z (p0, z0)) -+ -+/* -+** not_s16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** not z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (not_s16_z_untied, svint16_t, -+ z0 = svnot_s16_z (p0, z1), -+ z0 = svnot_z (p0, z1)) -+ -+/* -+** not_s16_x_tied1: -+** not z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (not_s16_x_tied1, svint16_t, -+ z0 = svnot_s16_x (p0, z0), -+ z0 = svnot_x (p0, z0)) -+ -+/* -+** not_s16_x_untied: -+** not z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (not_s16_x_untied, svint16_t, -+ z0 = svnot_s16_x (p0, z1), -+ z0 = svnot_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_s32.c -new file mode 100644 -index 000000000..8b15d6e91 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_s32.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** not_s32_m_tied12: -+** not z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (not_s32_m_tied12, svint32_t, -+ z0 = svnot_s32_m (z0, p0, z0), -+ z0 = svnot_m (z0, p0, z0)) -+ -+/* -+** not_s32_m_tied1: -+** not z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (not_s32_m_tied1, svint32_t, -+ z0 = svnot_s32_m (z0, p0, z1), -+ z0 = svnot_m (z0, p0, z1)) -+ -+/* -+** not_s32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** not z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (not_s32_m_tied2, svint32_t, -+ z0 = svnot_s32_m (z1, p0, z0), -+ z0 = svnot_m (z1, p0, z0)) -+ -+/* -+** not_s32_m_untied: -+** movprfx z0, z2 -+** not z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (not_s32_m_untied, svint32_t, -+ z0 = svnot_s32_m (z2, p0, z1), -+ z0 = svnot_m (z2, p0, z1)) -+ -+/* -+** not_s32_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, \1\.s -+** not z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (not_s32_z_tied1, svint32_t, -+ z0 = svnot_s32_z (p0, z0), -+ z0 = svnot_z (p0, z0)) -+ -+/* -+** not_s32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** not z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (not_s32_z_untied, svint32_t, -+ z0 = svnot_s32_z (p0, z1), -+ z0 = svnot_z (p0, z1)) -+ -+/* -+** not_s32_x_tied1: -+** not z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (not_s32_x_tied1, svint32_t, -+ z0 = svnot_s32_x (p0, z0), -+ z0 = svnot_x (p0, z0)) -+ -+/* -+** not_s32_x_untied: -+** not z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (not_s32_x_untied, svint32_t, -+ z0 = svnot_s32_x (p0, z1), -+ z0 = svnot_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_s64.c -new file mode 100644 -index 000000000..8e7f7b9e8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_s64.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** not_s64_m_tied12: -+** not z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (not_s64_m_tied12, svint64_t, -+ z0 = svnot_s64_m (z0, p0, z0), -+ z0 = svnot_m (z0, p0, z0)) -+ -+/* -+** not_s64_m_tied1: -+** not z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (not_s64_m_tied1, svint64_t, -+ z0 = svnot_s64_m (z0, p0, z1), -+ z0 = svnot_m (z0, p0, z1)) -+ -+/* -+** not_s64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** not z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (not_s64_m_tied2, svint64_t, -+ z0 = svnot_s64_m (z1, p0, z0), -+ z0 = svnot_m (z1, p0, z0)) -+ -+/* -+** not_s64_m_untied: -+** movprfx z0, z2 -+** not z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (not_s64_m_untied, svint64_t, -+ z0 = svnot_s64_m (z2, p0, z1), -+ z0 = svnot_m (z2, p0, z1)) -+ -+/* -+** not_s64_z_tied1: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** not z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (not_s64_z_tied1, svint64_t, -+ z0 = svnot_s64_z (p0, z0), -+ z0 = svnot_z (p0, z0)) -+ -+/* -+** not_s64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** not z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (not_s64_z_untied, svint64_t, -+ z0 = svnot_s64_z (p0, z1), -+ z0 = svnot_z (p0, z1)) -+ -+/* -+** not_s64_x_tied1: -+** not z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (not_s64_x_tied1, svint64_t, -+ z0 = svnot_s64_x (p0, z0), -+ z0 = svnot_x (p0, z0)) -+ -+/* -+** not_s64_x_untied: -+** not z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (not_s64_x_untied, svint64_t, -+ z0 = svnot_s64_x (p0, z1), -+ z0 = svnot_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_s8.c -new file mode 100644 -index 000000000..e807f08f8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_s8.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** not_s8_m_tied12: -+** not z0\.b, p0/m, z0\.b -+** ret -+*/ -+TEST_UNIFORM_Z (not_s8_m_tied12, svint8_t, -+ z0 = svnot_s8_m (z0, p0, z0), -+ z0 = svnot_m (z0, p0, z0)) -+ -+/* -+** not_s8_m_tied1: -+** not z0\.b, p0/m, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (not_s8_m_tied1, svint8_t, -+ z0 = svnot_s8_m (z0, p0, z1), -+ z0 = svnot_m (z0, p0, z1)) -+ -+/* -+** not_s8_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** not z0\.b, p0/m, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (not_s8_m_tied2, svint8_t, -+ z0 = svnot_s8_m (z1, p0, z0), -+ z0 = svnot_m (z1, p0, z0)) -+ -+/* -+** not_s8_m_untied: -+** movprfx z0, z2 -+** not z0\.b, p0/m, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (not_s8_m_untied, svint8_t, -+ z0 = svnot_s8_m (z2, p0, z1), -+ z0 = svnot_m (z2, p0, z1)) -+ -+/* -+** not_s8_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.b, p0/z, \1\.b -+** not z0\.b, p0/m, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (not_s8_z_tied1, svint8_t, -+ z0 = svnot_s8_z (p0, z0), -+ z0 = svnot_z (p0, z0)) -+ -+/* -+** not_s8_z_untied: -+** movprfx z0\.b, p0/z, z1\.b -+** not z0\.b, p0/m, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (not_s8_z_untied, svint8_t, -+ z0 = svnot_s8_z (p0, z1), -+ z0 = svnot_z (p0, z1)) -+ -+/* -+** not_s8_x_tied1: -+** not z0\.b, p0/m, z0\.b -+** ret -+*/ -+TEST_UNIFORM_Z (not_s8_x_tied1, svint8_t, -+ z0 = svnot_s8_x (p0, z0), -+ z0 = svnot_x (p0, z0)) -+ -+/* -+** not_s8_x_untied: -+** not z0\.b, p0/m, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (not_s8_x_untied, svint8_t, -+ z0 = svnot_s8_x (p0, z1), -+ z0 = svnot_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_u16.c -new file mode 100644 -index 000000000..c812005f1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_u16.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** not_u16_m_tied12: -+** not z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (not_u16_m_tied12, svuint16_t, -+ z0 = svnot_u16_m (z0, p0, z0), -+ z0 = svnot_m (z0, p0, z0)) -+ -+/* -+** not_u16_m_tied1: -+** not z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (not_u16_m_tied1, svuint16_t, -+ z0 = svnot_u16_m (z0, p0, z1), -+ z0 = svnot_m (z0, p0, z1)) -+ -+/* -+** not_u16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** not z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (not_u16_m_tied2, svuint16_t, -+ z0 = svnot_u16_m (z1, p0, z0), -+ z0 = svnot_m (z1, p0, z0)) -+ -+/* -+** not_u16_m_untied: -+** movprfx z0, z2 -+** not z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (not_u16_m_untied, svuint16_t, -+ z0 = svnot_u16_m (z2, p0, z1), -+ z0 = svnot_m (z2, p0, z1)) -+ -+/* -+** not_u16_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.h, p0/z, \1\.h -+** not z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (not_u16_z_tied1, svuint16_t, -+ z0 = svnot_u16_z (p0, z0), -+ z0 = svnot_z (p0, z0)) -+ -+/* -+** not_u16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** not z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (not_u16_z_untied, svuint16_t, -+ z0 = svnot_u16_z (p0, z1), -+ z0 = svnot_z (p0, z1)) -+ -+/* -+** not_u16_x_tied1: -+** not z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (not_u16_x_tied1, svuint16_t, -+ z0 = svnot_u16_x (p0, z0), -+ z0 = svnot_x (p0, z0)) -+ -+/* -+** not_u16_x_untied: -+** not z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (not_u16_x_untied, svuint16_t, -+ z0 = svnot_u16_x (p0, z1), -+ z0 = svnot_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_u32.c -new file mode 100644 -index 000000000..7b7e9ca21 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_u32.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** not_u32_m_tied12: -+** not z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (not_u32_m_tied12, svuint32_t, -+ z0 = svnot_u32_m (z0, p0, z0), -+ z0 = svnot_m (z0, p0, z0)) -+ -+/* -+** not_u32_m_tied1: -+** not z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (not_u32_m_tied1, svuint32_t, -+ z0 = svnot_u32_m (z0, p0, z1), -+ z0 = svnot_m (z0, p0, z1)) -+ -+/* -+** not_u32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** not z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (not_u32_m_tied2, svuint32_t, -+ z0 = svnot_u32_m (z1, p0, z0), -+ z0 = svnot_m (z1, p0, z0)) -+ -+/* -+** not_u32_m_untied: -+** movprfx z0, z2 -+** not z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (not_u32_m_untied, svuint32_t, -+ z0 = svnot_u32_m (z2, p0, z1), -+ z0 = svnot_m (z2, p0, z1)) -+ -+/* -+** not_u32_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, \1\.s -+** not z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (not_u32_z_tied1, svuint32_t, -+ z0 = svnot_u32_z (p0, z0), -+ z0 = svnot_z (p0, z0)) -+ -+/* -+** not_u32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** not z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (not_u32_z_untied, svuint32_t, -+ z0 = svnot_u32_z (p0, z1), -+ z0 = svnot_z (p0, z1)) -+ -+/* -+** not_u32_x_tied1: -+** not z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (not_u32_x_tied1, svuint32_t, -+ z0 = svnot_u32_x (p0, z0), -+ z0 = svnot_x (p0, z0)) -+ -+/* -+** not_u32_x_untied: -+** not z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (not_u32_x_untied, svuint32_t, -+ z0 = svnot_u32_x (p0, z1), -+ z0 = svnot_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_u64.c -new file mode 100644 -index 000000000..27b92ad84 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_u64.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** not_u64_m_tied12: -+** not z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (not_u64_m_tied12, svuint64_t, -+ z0 = svnot_u64_m (z0, p0, z0), -+ z0 = svnot_m (z0, p0, z0)) -+ -+/* -+** not_u64_m_tied1: -+** not z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (not_u64_m_tied1, svuint64_t, -+ z0 = svnot_u64_m (z0, p0, z1), -+ z0 = svnot_m (z0, p0, z1)) -+ -+/* -+** not_u64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** not z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (not_u64_m_tied2, svuint64_t, -+ z0 = svnot_u64_m (z1, p0, z0), -+ z0 = svnot_m (z1, p0, z0)) -+ -+/* -+** not_u64_m_untied: -+** movprfx z0, z2 -+** not z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (not_u64_m_untied, svuint64_t, -+ z0 = svnot_u64_m (z2, p0, z1), -+ z0 = svnot_m (z2, p0, z1)) -+ -+/* -+** not_u64_z_tied1: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** not z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (not_u64_z_tied1, svuint64_t, -+ z0 = svnot_u64_z (p0, z0), -+ z0 = svnot_z (p0, z0)) -+ -+/* -+** not_u64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** not z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (not_u64_z_untied, svuint64_t, -+ z0 = svnot_u64_z (p0, z1), -+ z0 = svnot_z (p0, z1)) -+ -+/* -+** not_u64_x_tied1: -+** not z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (not_u64_x_tied1, svuint64_t, -+ z0 = svnot_u64_x (p0, z0), -+ z0 = svnot_x (p0, z0)) -+ -+/* -+** not_u64_x_untied: -+** not z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (not_u64_x_untied, svuint64_t, -+ z0 = svnot_u64_x (p0, z1), -+ z0 = svnot_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_u8.c -new file mode 100644 -index 000000000..bd2f36cad ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/not_u8.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** not_u8_m_tied12: -+** not z0\.b, p0/m, z0\.b -+** ret -+*/ -+TEST_UNIFORM_Z (not_u8_m_tied12, svuint8_t, -+ z0 = svnot_u8_m (z0, p0, z0), -+ z0 = svnot_m (z0, p0, z0)) -+ -+/* -+** not_u8_m_tied1: -+** not z0\.b, p0/m, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (not_u8_m_tied1, svuint8_t, -+ z0 = svnot_u8_m (z0, p0, z1), -+ z0 = svnot_m (z0, p0, z1)) -+ -+/* -+** not_u8_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** not z0\.b, p0/m, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (not_u8_m_tied2, svuint8_t, -+ z0 = svnot_u8_m (z1, p0, z0), -+ z0 = svnot_m (z1, p0, z0)) -+ -+/* -+** not_u8_m_untied: -+** movprfx z0, z2 -+** not z0\.b, p0/m, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (not_u8_m_untied, svuint8_t, -+ z0 = svnot_u8_m (z2, p0, z1), -+ z0 = svnot_m (z2, p0, z1)) -+ -+/* -+** not_u8_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.b, p0/z, \1\.b -+** not z0\.b, p0/m, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (not_u8_z_tied1, svuint8_t, -+ z0 = svnot_u8_z (p0, z0), -+ z0 = svnot_z (p0, z0)) -+ -+/* -+** not_u8_z_untied: -+** movprfx z0\.b, p0/z, z1\.b -+** not z0\.b, p0/m, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (not_u8_z_untied, svuint8_t, -+ z0 = svnot_u8_z (p0, z1), -+ z0 = svnot_z (p0, z1)) -+ -+/* -+** not_u8_x_tied1: -+** not z0\.b, p0/m, z0\.b -+** ret -+*/ -+TEST_UNIFORM_Z (not_u8_x_tied1, svuint8_t, -+ z0 = svnot_u8_x (p0, z0), -+ z0 = svnot_x (p0, z0)) -+ -+/* -+** not_u8_x_untied: -+** not z0\.b, p0/m, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (not_u8_x_untied, svuint8_t, -+ z0 = svnot_u8_x (p0, z1), -+ z0 = svnot_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orn_b.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orn_b.c -new file mode 100644 -index 000000000..423a18bc7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orn_b.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** orn_b_z_tied1: -+** orn p0\.b, p3/z, p0\.b, p1\.b -+** ret -+*/ -+TEST_UNIFORM_P (orn_b_z_tied1, -+ p0 = svorn_b_z (p3, p0, p1), -+ p0 = svorn_z (p3, p0, p1)) -+ -+/* -+** orn_b_z_tied2: -+** orn p0\.b, p3/z, p1\.b, p0\.b -+** ret -+*/ -+TEST_UNIFORM_P (orn_b_z_tied2, -+ p0 = svorn_b_z (p3, p1, p0), -+ p0 = svorn_z (p3, p1, p0)) -+ -+/* -+** orn_b_z_untied: -+** orn p0\.b, p3/z, p1\.b, p2\.b -+** ret -+*/ -+TEST_UNIFORM_P (orn_b_z_untied, -+ p0 = svorn_b_z (p3, p1, p2), -+ p0 = svorn_z (p3, p1, p2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_b.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_b.c -new file mode 100644 -index 000000000..fba9ba7df ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_b.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** orr_b_z_tied1: -+** orr p0\.b, p3/z, (p0\.b, p1\.b|p1\.b, p0\.b) -+** ret -+*/ -+TEST_UNIFORM_P (orr_b_z_tied1, -+ p0 = svorr_b_z (p3, p0, p1), -+ p0 = svorr_z (p3, p0, p1)) -+ -+/* -+** orr_b_z_tied2: -+** orr p0\.b, p3/z, (p0\.b, p1\.b|p1\.b, p0\.b) -+** ret -+*/ -+TEST_UNIFORM_P (orr_b_z_tied2, -+ p0 = svorr_b_z (p3, p1, p0), -+ p0 = svorr_z (p3, p1, p0)) -+ -+/* -+** orr_b_z_untied: -+** orr p0\.b, p3/z, (p1\.b, p2\.b|p2\.b, p1\.b) -+** ret -+*/ -+TEST_UNIFORM_P (orr_b_z_untied, -+ p0 = svorr_b_z (p3, p1, p2), -+ p0 = svorr_z (p3, p1, p2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_s16.c -new file mode 100644 -index 000000000..62b707a9c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_s16.c -@@ -0,0 +1,376 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** orr_s16_m_tied1: -+** orr z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (orr_s16_m_tied1, svint16_t, -+ z0 = svorr_s16_m (p0, z0, z1), -+ z0 = svorr_m (p0, z0, z1)) -+ -+/* -+** orr_s16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** orr z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (orr_s16_m_tied2, svint16_t, -+ z0 = svorr_s16_m (p0, z1, z0), -+ z0 = svorr_m (p0, z1, z0)) -+ -+/* -+** orr_s16_m_untied: -+** movprfx z0, z1 -+** orr z0\.h, p0/m, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (orr_s16_m_untied, svint16_t, -+ z0 = svorr_s16_m (p0, z1, z2), -+ z0 = svorr_m (p0, z1, z2)) -+ -+/* -+** orr_w0_s16_m_tied1: -+** mov (z[0-9]+\.h), w0 -+** orr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (orr_w0_s16_m_tied1, svint16_t, int16_t, -+ z0 = svorr_n_s16_m (p0, z0, x0), -+ z0 = svorr_m (p0, z0, x0)) -+ -+/* -+** orr_w0_s16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), w0 -+** movprfx z0, z1 -+** orr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (orr_w0_s16_m_untied, svint16_t, int16_t, -+ z0 = svorr_n_s16_m (p0, z1, x0), -+ z0 = svorr_m (p0, z1, x0)) -+ -+/* -+** orr_1_s16_m_tied1: -+** mov (z[0-9]+\.h), #1 -+** orr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_1_s16_m_tied1, svint16_t, -+ z0 = svorr_n_s16_m (p0, z0, 1), -+ z0 = svorr_m (p0, z0, 1)) -+ -+/* -+** orr_1_s16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), #1 -+** movprfx z0, z1 -+** orr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_1_s16_m_untied, svint16_t, -+ z0 = svorr_n_s16_m (p0, z1, 1), -+ z0 = svorr_m (p0, z1, 1)) -+ -+/* -+** orr_m2_s16_m: -+** mov (z[0-9]+\.h), #-2 -+** orr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m2_s16_m, svint16_t, -+ z0 = svorr_n_s16_m (p0, z0, -2), -+ z0 = svorr_m (p0, z0, -2)) -+ -+/* -+** orr_s16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** orr z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (orr_s16_z_tied1, svint16_t, -+ z0 = svorr_s16_z (p0, z0, z1), -+ z0 = svorr_z (p0, z0, z1)) -+ -+/* -+** orr_s16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** orr z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (orr_s16_z_tied2, svint16_t, -+ z0 = svorr_s16_z (p0, z1, z0), -+ z0 = svorr_z (p0, z1, z0)) -+ -+/* -+** orr_s16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** orr z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** orr z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (orr_s16_z_untied, svint16_t, -+ z0 = svorr_s16_z (p0, z1, z2), -+ z0 = svorr_z (p0, z1, z2)) -+ -+/* -+** orr_w0_s16_z_tied1: -+** mov (z[0-9]+\.h), w0 -+** movprfx z0\.h, p0/z, z0\.h -+** orr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (orr_w0_s16_z_tied1, svint16_t, int16_t, -+ z0 = svorr_n_s16_z (p0, z0, x0), -+ z0 = svorr_z (p0, z0, x0)) -+ -+/* -+** orr_w0_s16_z_untied: -+** mov (z[0-9]+\.h), w0 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** orr z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** orr z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (orr_w0_s16_z_untied, svint16_t, int16_t, -+ z0 = svorr_n_s16_z (p0, z1, x0), -+ z0 = svorr_z (p0, z1, x0)) -+ -+/* -+** orr_1_s16_z_tied1: -+** mov (z[0-9]+\.h), #1 -+** movprfx z0\.h, p0/z, z0\.h -+** orr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_1_s16_z_tied1, svint16_t, -+ z0 = svorr_n_s16_z (p0, z0, 1), -+ z0 = svorr_z (p0, z0, 1)) -+ -+/* -+** orr_1_s16_z_untied: -+** mov (z[0-9]+\.h), #1 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** orr z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** orr z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (orr_1_s16_z_untied, svint16_t, -+ z0 = svorr_n_s16_z (p0, z1, 1), -+ z0 = svorr_z (p0, z1, 1)) -+ -+/* -+** orr_s16_x_tied1: -+** orr z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (orr_s16_x_tied1, svint16_t, -+ z0 = svorr_s16_x (p0, z0, z1), -+ z0 = svorr_x (p0, z0, z1)) -+ -+/* -+** orr_s16_x_tied2: -+** orr z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (orr_s16_x_tied2, svint16_t, -+ z0 = svorr_s16_x (p0, z1, z0), -+ z0 = svorr_x (p0, z1, z0)) -+ -+/* -+** orr_s16_x_untied: -+** orr z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (orr_s16_x_untied, svint16_t, -+ z0 = svorr_s16_x (p0, z1, z2), -+ z0 = svorr_x (p0, z1, z2)) -+ -+/* -+** orr_w0_s16_x_tied1: -+** mov (z[0-9]+)\.h, w0 -+** orr z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (orr_w0_s16_x_tied1, svint16_t, int16_t, -+ z0 = svorr_n_s16_x (p0, z0, x0), -+ z0 = svorr_x (p0, z0, x0)) -+ -+/* -+** orr_w0_s16_x_untied: -+** mov (z[0-9]+)\.h, w0 -+** orr z0\.d, (z1\.d, \1\.d|\1\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (orr_w0_s16_x_untied, svint16_t, int16_t, -+ z0 = svorr_n_s16_x (p0, z1, x0), -+ z0 = svorr_x (p0, z1, x0)) -+ -+/* -+** orr_1_s16_x_tied1: -+** orr z0\.h, z0\.h, #0x1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_1_s16_x_tied1, svint16_t, -+ z0 = svorr_n_s16_x (p0, z0, 1), -+ z0 = svorr_x (p0, z0, 1)) -+ -+/* -+** orr_1_s16_x_untied: -+** movprfx z0, z1 -+** orr z0\.h, z0\.h, #0x1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_1_s16_x_untied, svint16_t, -+ z0 = svorr_n_s16_x (p0, z1, 1), -+ z0 = svorr_x (p0, z1, 1)) -+ -+/* -+** orr_127_s16_x: -+** orr z0\.h, z0\.h, #0x7f -+** ret -+*/ -+TEST_UNIFORM_Z (orr_127_s16_x, svint16_t, -+ z0 = svorr_n_s16_x (p0, z0, 127), -+ z0 = svorr_x (p0, z0, 127)) -+ -+/* -+** orr_128_s16_x: -+** orr z0\.h, z0\.h, #0x80 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_128_s16_x, svint16_t, -+ z0 = svorr_n_s16_x (p0, z0, 128), -+ z0 = svorr_x (p0, z0, 128)) -+ -+/* -+** orr_255_s16_x: -+** orr z0\.h, z0\.h, #0xff -+** ret -+*/ -+TEST_UNIFORM_Z (orr_255_s16_x, svint16_t, -+ z0 = svorr_n_s16_x (p0, z0, 255), -+ z0 = svorr_x (p0, z0, 255)) -+ -+/* -+** orr_256_s16_x: -+** orr z0\.h, z0\.h, #0x100 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_256_s16_x, svint16_t, -+ z0 = svorr_n_s16_x (p0, z0, 256), -+ z0 = svorr_x (p0, z0, 256)) -+ -+/* -+** orr_257_s16_x: -+** orr z0\.h, z0\.h, #0x101 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_257_s16_x, svint16_t, -+ z0 = svorr_n_s16_x (p0, z0, 257), -+ z0 = svorr_x (p0, z0, 257)) -+ -+/* -+** orr_512_s16_x: -+** orr z0\.h, z0\.h, #0x200 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_512_s16_x, svint16_t, -+ z0 = svorr_n_s16_x (p0, z0, 512), -+ z0 = svorr_x (p0, z0, 512)) -+ -+/* -+** orr_65280_s16_x: -+** orr z0\.h, z0\.h, #0xff00 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_65280_s16_x, svint16_t, -+ z0 = svorr_n_s16_x (p0, z0, 0xff00), -+ z0 = svorr_x (p0, z0, 0xff00)) -+ -+/* -+** orr_m127_s16_x: -+** orr z0\.h, z0\.h, #0xff81 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m127_s16_x, svint16_t, -+ z0 = svorr_n_s16_x (p0, z0, -127), -+ z0 = svorr_x (p0, z0, -127)) -+ -+/* -+** orr_m128_s16_x: -+** orr z0\.h, z0\.h, #0xff80 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m128_s16_x, svint16_t, -+ z0 = svorr_n_s16_x (p0, z0, -128), -+ z0 = svorr_x (p0, z0, -128)) -+ -+/* -+** orr_m255_s16_x: -+** orr z0\.h, z0\.h, #0xff01 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m255_s16_x, svint16_t, -+ z0 = svorr_n_s16_x (p0, z0, -255), -+ z0 = svorr_x (p0, z0, -255)) -+ -+/* -+** orr_m256_s16_x: -+** orr z0\.h, z0\.h, #0xff00 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m256_s16_x, svint16_t, -+ z0 = svorr_n_s16_x (p0, z0, -256), -+ z0 = svorr_x (p0, z0, -256)) -+ -+/* -+** orr_m257_s16_x: -+** orr z0\.h, z0\.h, #0xfeff -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m257_s16_x, svint16_t, -+ z0 = svorr_n_s16_x (p0, z0, -257), -+ z0 = svorr_x (p0, z0, -257)) -+ -+/* -+** orr_m512_s16_x: -+** orr z0\.h, z0\.h, #0xfe00 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m512_s16_x, svint16_t, -+ z0 = svorr_n_s16_x (p0, z0, -512), -+ z0 = svorr_x (p0, z0, -512)) -+ -+/* -+** orr_m32768_s16_x: -+** orr z0\.h, z0\.h, #0x8000 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m32768_s16_x, svint16_t, -+ z0 = svorr_n_s16_x (p0, z0, -0x8000), -+ z0 = svorr_x (p0, z0, -0x8000)) -+ -+/* -+** orr_5_s16_x: -+** mov (z[0-9]+)\.h, #5 -+** orr z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (orr_5_s16_x, svint16_t, -+ z0 = svorr_n_s16_x (p0, z0, 5), -+ z0 = svorr_x (p0, z0, 5)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_s32.c -new file mode 100644 -index 000000000..2e0e1e888 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_s32.c -@@ -0,0 +1,372 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** orr_s32_m_tied1: -+** orr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (orr_s32_m_tied1, svint32_t, -+ z0 = svorr_s32_m (p0, z0, z1), -+ z0 = svorr_m (p0, z0, z1)) -+ -+/* -+** orr_s32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** orr z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (orr_s32_m_tied2, svint32_t, -+ z0 = svorr_s32_m (p0, z1, z0), -+ z0 = svorr_m (p0, z1, z0)) -+ -+/* -+** orr_s32_m_untied: -+** movprfx z0, z1 -+** orr z0\.s, p0/m, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (orr_s32_m_untied, svint32_t, -+ z0 = svorr_s32_m (p0, z1, z2), -+ z0 = svorr_m (p0, z1, z2)) -+ -+/* -+** orr_w0_s32_m_tied1: -+** mov (z[0-9]+\.s), w0 -+** orr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (orr_w0_s32_m_tied1, svint32_t, int32_t, -+ z0 = svorr_n_s32_m (p0, z0, x0), -+ z0 = svorr_m (p0, z0, x0)) -+ -+/* -+** orr_w0_s32_m_untied: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0, z1 -+** orr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (orr_w0_s32_m_untied, svint32_t, int32_t, -+ z0 = svorr_n_s32_m (p0, z1, x0), -+ z0 = svorr_m (p0, z1, x0)) -+ -+/* -+** orr_1_s32_m_tied1: -+** mov (z[0-9]+\.s), #1 -+** orr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_1_s32_m_tied1, svint32_t, -+ z0 = svorr_n_s32_m (p0, z0, 1), -+ z0 = svorr_m (p0, z0, 1)) -+ -+/* -+** orr_1_s32_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.s), #1 -+** movprfx z0, z1 -+** orr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_1_s32_m_untied, svint32_t, -+ z0 = svorr_n_s32_m (p0, z1, 1), -+ z0 = svorr_m (p0, z1, 1)) -+ -+/* -+** orr_m2_s32_m: -+** mov (z[0-9]+\.s), #-2 -+** orr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m2_s32_m, svint32_t, -+ z0 = svorr_n_s32_m (p0, z0, -2), -+ z0 = svorr_m (p0, z0, -2)) -+ -+/* -+** orr_s32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** orr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (orr_s32_z_tied1, svint32_t, -+ z0 = svorr_s32_z (p0, z0, z1), -+ z0 = svorr_z (p0, z0, z1)) -+ -+/* -+** orr_s32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** orr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (orr_s32_z_tied2, svint32_t, -+ z0 = svorr_s32_z (p0, z1, z0), -+ z0 = svorr_z (p0, z1, z0)) -+ -+/* -+** orr_s32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** orr z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** orr z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (orr_s32_z_untied, svint32_t, -+ z0 = svorr_s32_z (p0, z1, z2), -+ z0 = svorr_z (p0, z1, z2)) -+ -+/* -+** orr_w0_s32_z_tied1: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** orr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (orr_w0_s32_z_tied1, svint32_t, int32_t, -+ z0 = svorr_n_s32_z (p0, z0, x0), -+ z0 = svorr_z (p0, z0, x0)) -+ -+/* -+** orr_w0_s32_z_untied: -+** mov (z[0-9]+\.s), w0 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** orr z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** orr z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (orr_w0_s32_z_untied, svint32_t, int32_t, -+ z0 = svorr_n_s32_z (p0, z1, x0), -+ z0 = svorr_z (p0, z1, x0)) -+ -+/* -+** orr_1_s32_z_tied1: -+** mov (z[0-9]+\.s), #1 -+** movprfx z0\.s, p0/z, z0\.s -+** orr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_1_s32_z_tied1, svint32_t, -+ z0 = svorr_n_s32_z (p0, z0, 1), -+ z0 = svorr_z (p0, z0, 1)) -+ -+/* -+** orr_1_s32_z_untied: -+** mov (z[0-9]+\.s), #1 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** orr z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** orr z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (orr_1_s32_z_untied, svint32_t, -+ z0 = svorr_n_s32_z (p0, z1, 1), -+ z0 = svorr_z (p0, z1, 1)) -+ -+/* -+** orr_s32_x_tied1: -+** orr z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (orr_s32_x_tied1, svint32_t, -+ z0 = svorr_s32_x (p0, z0, z1), -+ z0 = svorr_x (p0, z0, z1)) -+ -+/* -+** orr_s32_x_tied2: -+** orr z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (orr_s32_x_tied2, svint32_t, -+ z0 = svorr_s32_x (p0, z1, z0), -+ z0 = svorr_x (p0, z1, z0)) -+ -+/* -+** orr_s32_x_untied: -+** orr z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (orr_s32_x_untied, svint32_t, -+ z0 = svorr_s32_x (p0, z1, z2), -+ z0 = svorr_x (p0, z1, z2)) -+ -+/* -+** orr_w0_s32_x_tied1: -+** mov (z[0-9]+)\.s, w0 -+** orr z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (orr_w0_s32_x_tied1, svint32_t, int32_t, -+ z0 = svorr_n_s32_x (p0, z0, x0), -+ z0 = svorr_x (p0, z0, x0)) -+ -+/* -+** orr_w0_s32_x_untied: -+** mov (z[0-9]+)\.s, w0 -+** orr z0\.d, (z1\.d, \1\.d|\1\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (orr_w0_s32_x_untied, svint32_t, int32_t, -+ z0 = svorr_n_s32_x (p0, z1, x0), -+ z0 = svorr_x (p0, z1, x0)) -+ -+/* -+** orr_1_s32_x_tied1: -+** orr z0\.s, z0\.s, #0x1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_1_s32_x_tied1, svint32_t, -+ z0 = svorr_n_s32_x (p0, z0, 1), -+ z0 = svorr_x (p0, z0, 1)) -+ -+/* -+** orr_1_s32_x_untied: -+** movprfx z0, z1 -+** orr z0\.s, z0\.s, #0x1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_1_s32_x_untied, svint32_t, -+ z0 = svorr_n_s32_x (p0, z1, 1), -+ z0 = svorr_x (p0, z1, 1)) -+ -+/* -+** orr_127_s32_x: -+** orr z0\.s, z0\.s, #0x7f -+** ret -+*/ -+TEST_UNIFORM_Z (orr_127_s32_x, svint32_t, -+ z0 = svorr_n_s32_x (p0, z0, 127), -+ z0 = svorr_x (p0, z0, 127)) -+ -+/* -+** orr_128_s32_x: -+** orr z0\.s, z0\.s, #0x80 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_128_s32_x, svint32_t, -+ z0 = svorr_n_s32_x (p0, z0, 128), -+ z0 = svorr_x (p0, z0, 128)) -+ -+/* -+** orr_255_s32_x: -+** orr z0\.s, z0\.s, #0xff -+** ret -+*/ -+TEST_UNIFORM_Z (orr_255_s32_x, svint32_t, -+ z0 = svorr_n_s32_x (p0, z0, 255), -+ z0 = svorr_x (p0, z0, 255)) -+ -+/* -+** orr_256_s32_x: -+** orr z0\.s, z0\.s, #0x100 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_256_s32_x, svint32_t, -+ z0 = svorr_n_s32_x (p0, z0, 256), -+ z0 = svorr_x (p0, z0, 256)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (orr_257_s32_x, svint32_t, -+ z0 = svorr_n_s32_x (p0, z0, 257), -+ z0 = svorr_x (p0, z0, 257)) -+ -+/* -+** orr_512_s32_x: -+** orr z0\.s, z0\.s, #0x200 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_512_s32_x, svint32_t, -+ z0 = svorr_n_s32_x (p0, z0, 512), -+ z0 = svorr_x (p0, z0, 512)) -+ -+/* -+** orr_65280_s32_x: -+** orr z0\.s, z0\.s, #0xff00 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_65280_s32_x, svint32_t, -+ z0 = svorr_n_s32_x (p0, z0, 0xff00), -+ z0 = svorr_x (p0, z0, 0xff00)) -+ -+/* -+** orr_m127_s32_x: -+** orr z0\.s, z0\.s, #0xffffff81 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m127_s32_x, svint32_t, -+ z0 = svorr_n_s32_x (p0, z0, -127), -+ z0 = svorr_x (p0, z0, -127)) -+ -+/* -+** orr_m128_s32_x: -+** orr z0\.s, z0\.s, #0xffffff80 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m128_s32_x, svint32_t, -+ z0 = svorr_n_s32_x (p0, z0, -128), -+ z0 = svorr_x (p0, z0, -128)) -+ -+/* -+** orr_m255_s32_x: -+** orr z0\.s, z0\.s, #0xffffff01 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m255_s32_x, svint32_t, -+ z0 = svorr_n_s32_x (p0, z0, -255), -+ z0 = svorr_x (p0, z0, -255)) -+ -+/* -+** orr_m256_s32_x: -+** orr z0\.s, z0\.s, #0xffffff00 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m256_s32_x, svint32_t, -+ z0 = svorr_n_s32_x (p0, z0, -256), -+ z0 = svorr_x (p0, z0, -256)) -+ -+/* -+** orr_m257_s32_x: -+** orr z0\.s, z0\.s, #0xfffffeff -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m257_s32_x, svint32_t, -+ z0 = svorr_n_s32_x (p0, z0, -257), -+ z0 = svorr_x (p0, z0, -257)) -+ -+/* -+** orr_m512_s32_x: -+** orr z0\.s, z0\.s, #0xfffffe00 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m512_s32_x, svint32_t, -+ z0 = svorr_n_s32_x (p0, z0, -512), -+ z0 = svorr_x (p0, z0, -512)) -+ -+/* -+** orr_m32768_s32_x: -+** orr z0\.s, z0\.s, #0xffff8000 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m32768_s32_x, svint32_t, -+ z0 = svorr_n_s32_x (p0, z0, -0x8000), -+ z0 = svorr_x (p0, z0, -0x8000)) -+ -+/* -+** orr_5_s32_x: -+** mov (z[0-9]+)\.s, #5 -+** orr z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (orr_5_s32_x, svint32_t, -+ z0 = svorr_n_s32_x (p0, z0, 5), -+ z0 = svorr_x (p0, z0, 5)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_s64.c -new file mode 100644 -index 000000000..1538fdd14 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_s64.c -@@ -0,0 +1,372 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** orr_s64_m_tied1: -+** orr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (orr_s64_m_tied1, svint64_t, -+ z0 = svorr_s64_m (p0, z0, z1), -+ z0 = svorr_m (p0, z0, z1)) -+ -+/* -+** orr_s64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** orr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_s64_m_tied2, svint64_t, -+ z0 = svorr_s64_m (p0, z1, z0), -+ z0 = svorr_m (p0, z1, z0)) -+ -+/* -+** orr_s64_m_untied: -+** movprfx z0, z1 -+** orr z0\.d, p0/m, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (orr_s64_m_untied, svint64_t, -+ z0 = svorr_s64_m (p0, z1, z2), -+ z0 = svorr_m (p0, z1, z2)) -+ -+/* -+** orr_x0_s64_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** orr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (orr_x0_s64_m_tied1, svint64_t, int64_t, -+ z0 = svorr_n_s64_m (p0, z0, x0), -+ z0 = svorr_m (p0, z0, x0)) -+ -+/* -+** orr_x0_s64_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** orr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (orr_x0_s64_m_untied, svint64_t, int64_t, -+ z0 = svorr_n_s64_m (p0, z1, x0), -+ z0 = svorr_m (p0, z1, x0)) -+ -+/* -+** orr_1_s64_m_tied1: -+** mov (z[0-9]+\.d), #1 -+** orr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_1_s64_m_tied1, svint64_t, -+ z0 = svorr_n_s64_m (p0, z0, 1), -+ z0 = svorr_m (p0, z0, 1)) -+ -+/* -+** orr_1_s64_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #1 -+** movprfx z0, z1 -+** orr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_1_s64_m_untied, svint64_t, -+ z0 = svorr_n_s64_m (p0, z1, 1), -+ z0 = svorr_m (p0, z1, 1)) -+ -+/* -+** orr_m2_s64_m: -+** mov (z[0-9]+\.d), #-2 -+** orr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m2_s64_m, svint64_t, -+ z0 = svorr_n_s64_m (p0, z0, -2), -+ z0 = svorr_m (p0, z0, -2)) -+ -+/* -+** orr_s64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** orr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (orr_s64_z_tied1, svint64_t, -+ z0 = svorr_s64_z (p0, z0, z1), -+ z0 = svorr_z (p0, z0, z1)) -+ -+/* -+** orr_s64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** orr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (orr_s64_z_tied2, svint64_t, -+ z0 = svorr_s64_z (p0, z1, z0), -+ z0 = svorr_z (p0, z1, z0)) -+ -+/* -+** orr_s64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** orr z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** orr z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (orr_s64_z_untied, svint64_t, -+ z0 = svorr_s64_z (p0, z1, z2), -+ z0 = svorr_z (p0, z1, z2)) -+ -+/* -+** orr_x0_s64_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** orr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (orr_x0_s64_z_tied1, svint64_t, int64_t, -+ z0 = svorr_n_s64_z (p0, z0, x0), -+ z0 = svorr_z (p0, z0, x0)) -+ -+/* -+** orr_x0_s64_z_untied: -+** mov (z[0-9]+\.d), x0 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** orr z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** orr z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (orr_x0_s64_z_untied, svint64_t, int64_t, -+ z0 = svorr_n_s64_z (p0, z1, x0), -+ z0 = svorr_z (p0, z1, x0)) -+ -+/* -+** orr_1_s64_z_tied1: -+** mov (z[0-9]+\.d), #1 -+** movprfx z0\.d, p0/z, z0\.d -+** orr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_1_s64_z_tied1, svint64_t, -+ z0 = svorr_n_s64_z (p0, z0, 1), -+ z0 = svorr_z (p0, z0, 1)) -+ -+/* -+** orr_1_s64_z_untied: -+** mov (z[0-9]+\.d), #1 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** orr z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** orr z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (orr_1_s64_z_untied, svint64_t, -+ z0 = svorr_n_s64_z (p0, z1, 1), -+ z0 = svorr_z (p0, z1, 1)) -+ -+/* -+** orr_s64_x_tied1: -+** orr z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (orr_s64_x_tied1, svint64_t, -+ z0 = svorr_s64_x (p0, z0, z1), -+ z0 = svorr_x (p0, z0, z1)) -+ -+/* -+** orr_s64_x_tied2: -+** orr z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (orr_s64_x_tied2, svint64_t, -+ z0 = svorr_s64_x (p0, z1, z0), -+ z0 = svorr_x (p0, z1, z0)) -+ -+/* -+** orr_s64_x_untied: -+** orr z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (orr_s64_x_untied, svint64_t, -+ z0 = svorr_s64_x (p0, z1, z2), -+ z0 = svorr_x (p0, z1, z2)) -+ -+/* -+** orr_x0_s64_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** orr z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (orr_x0_s64_x_tied1, svint64_t, int64_t, -+ z0 = svorr_n_s64_x (p0, z0, x0), -+ z0 = svorr_x (p0, z0, x0)) -+ -+/* -+** orr_x0_s64_x_untied: -+** mov (z[0-9]+\.d), x0 -+** orr z0\.d, (z1\.d, \1|\1, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (orr_x0_s64_x_untied, svint64_t, int64_t, -+ z0 = svorr_n_s64_x (p0, z1, x0), -+ z0 = svorr_x (p0, z1, x0)) -+ -+/* -+** orr_1_s64_x_tied1: -+** orr z0\.d, z0\.d, #0x1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_1_s64_x_tied1, svint64_t, -+ z0 = svorr_n_s64_x (p0, z0, 1), -+ z0 = svorr_x (p0, z0, 1)) -+ -+/* -+** orr_1_s64_x_untied: -+** movprfx z0, z1 -+** orr z0\.d, z0\.d, #0x1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_1_s64_x_untied, svint64_t, -+ z0 = svorr_n_s64_x (p0, z1, 1), -+ z0 = svorr_x (p0, z1, 1)) -+ -+/* -+** orr_127_s64_x: -+** orr z0\.d, z0\.d, #0x7f -+** ret -+*/ -+TEST_UNIFORM_Z (orr_127_s64_x, svint64_t, -+ z0 = svorr_n_s64_x (p0, z0, 127), -+ z0 = svorr_x (p0, z0, 127)) -+ -+/* -+** orr_128_s64_x: -+** orr z0\.d, z0\.d, #0x80 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_128_s64_x, svint64_t, -+ z0 = svorr_n_s64_x (p0, z0, 128), -+ z0 = svorr_x (p0, z0, 128)) -+ -+/* -+** orr_255_s64_x: -+** orr z0\.d, z0\.d, #0xff -+** ret -+*/ -+TEST_UNIFORM_Z (orr_255_s64_x, svint64_t, -+ z0 = svorr_n_s64_x (p0, z0, 255), -+ z0 = svorr_x (p0, z0, 255)) -+ -+/* -+** orr_256_s64_x: -+** orr z0\.d, z0\.d, #0x100 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_256_s64_x, svint64_t, -+ z0 = svorr_n_s64_x (p0, z0, 256), -+ z0 = svorr_x (p0, z0, 256)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (orr_257_s64_x, svint64_t, -+ z0 = svorr_n_s64_x (p0, z0, 257), -+ z0 = svorr_x (p0, z0, 257)) -+ -+/* -+** orr_512_s64_x: -+** orr z0\.d, z0\.d, #0x200 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_512_s64_x, svint64_t, -+ z0 = svorr_n_s64_x (p0, z0, 512), -+ z0 = svorr_x (p0, z0, 512)) -+ -+/* -+** orr_65280_s64_x: -+** orr z0\.d, z0\.d, #0xff00 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_65280_s64_x, svint64_t, -+ z0 = svorr_n_s64_x (p0, z0, 0xff00), -+ z0 = svorr_x (p0, z0, 0xff00)) -+ -+/* -+** orr_m127_s64_x: -+** orr z0\.d, z0\.d, #0xffffffffffffff81 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m127_s64_x, svint64_t, -+ z0 = svorr_n_s64_x (p0, z0, -127), -+ z0 = svorr_x (p0, z0, -127)) -+ -+/* -+** orr_m128_s64_x: -+** orr z0\.d, z0\.d, #0xffffffffffffff80 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m128_s64_x, svint64_t, -+ z0 = svorr_n_s64_x (p0, z0, -128), -+ z0 = svorr_x (p0, z0, -128)) -+ -+/* -+** orr_m255_s64_x: -+** orr z0\.d, z0\.d, #0xffffffffffffff01 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m255_s64_x, svint64_t, -+ z0 = svorr_n_s64_x (p0, z0, -255), -+ z0 = svorr_x (p0, z0, -255)) -+ -+/* -+** orr_m256_s64_x: -+** orr z0\.d, z0\.d, #0xffffffffffffff00 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m256_s64_x, svint64_t, -+ z0 = svorr_n_s64_x (p0, z0, -256), -+ z0 = svorr_x (p0, z0, -256)) -+ -+/* -+** orr_m257_s64_x: -+** orr z0\.d, z0\.d, #0xfffffffffffffeff -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m257_s64_x, svint64_t, -+ z0 = svorr_n_s64_x (p0, z0, -257), -+ z0 = svorr_x (p0, z0, -257)) -+ -+/* -+** orr_m512_s64_x: -+** orr z0\.d, z0\.d, #0xfffffffffffffe00 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m512_s64_x, svint64_t, -+ z0 = svorr_n_s64_x (p0, z0, -512), -+ z0 = svorr_x (p0, z0, -512)) -+ -+/* -+** orr_m32768_s64_x: -+** orr z0\.d, z0\.d, #0xffffffffffff8000 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m32768_s64_x, svint64_t, -+ z0 = svorr_n_s64_x (p0, z0, -0x8000), -+ z0 = svorr_x (p0, z0, -0x8000)) -+ -+/* -+** orr_5_s64_x: -+** mov (z[0-9]+\.d), #5 -+** orr z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (orr_5_s64_x, svint64_t, -+ z0 = svorr_n_s64_x (p0, z0, 5), -+ z0 = svorr_x (p0, z0, 5)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_s8.c -new file mode 100644 -index 000000000..b6483b6e7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_s8.c -@@ -0,0 +1,295 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** orr_s8_m_tied1: -+** orr z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (orr_s8_m_tied1, svint8_t, -+ z0 = svorr_s8_m (p0, z0, z1), -+ z0 = svorr_m (p0, z0, z1)) -+ -+/* -+** orr_s8_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** orr z0\.b, p0/m, z0\.b, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (orr_s8_m_tied2, svint8_t, -+ z0 = svorr_s8_m (p0, z1, z0), -+ z0 = svorr_m (p0, z1, z0)) -+ -+/* -+** orr_s8_m_untied: -+** movprfx z0, z1 -+** orr z0\.b, p0/m, z0\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (orr_s8_m_untied, svint8_t, -+ z0 = svorr_s8_m (p0, z1, z2), -+ z0 = svorr_m (p0, z1, z2)) -+ -+/* -+** orr_w0_s8_m_tied1: -+** mov (z[0-9]+\.b), w0 -+** orr z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (orr_w0_s8_m_tied1, svint8_t, int8_t, -+ z0 = svorr_n_s8_m (p0, z0, x0), -+ z0 = svorr_m (p0, z0, x0)) -+ -+/* -+** orr_w0_s8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), w0 -+** movprfx z0, z1 -+** orr z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (orr_w0_s8_m_untied, svint8_t, int8_t, -+ z0 = svorr_n_s8_m (p0, z1, x0), -+ z0 = svorr_m (p0, z1, x0)) -+ -+/* -+** orr_1_s8_m_tied1: -+** mov (z[0-9]+\.b), #1 -+** orr z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_1_s8_m_tied1, svint8_t, -+ z0 = svorr_n_s8_m (p0, z0, 1), -+ z0 = svorr_m (p0, z0, 1)) -+ -+/* -+** orr_1_s8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), #1 -+** movprfx z0, z1 -+** orr z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_1_s8_m_untied, svint8_t, -+ z0 = svorr_n_s8_m (p0, z1, 1), -+ z0 = svorr_m (p0, z1, 1)) -+ -+/* -+** orr_m2_s8_m: -+** mov (z[0-9]+\.b), #-2 -+** orr z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m2_s8_m, svint8_t, -+ z0 = svorr_n_s8_m (p0, z0, -2), -+ z0 = svorr_m (p0, z0, -2)) -+ -+/* -+** orr_s8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** orr z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (orr_s8_z_tied1, svint8_t, -+ z0 = svorr_s8_z (p0, z0, z1), -+ z0 = svorr_z (p0, z0, z1)) -+ -+/* -+** orr_s8_z_tied2: -+** movprfx z0\.b, p0/z, z0\.b -+** orr z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (orr_s8_z_tied2, svint8_t, -+ z0 = svorr_s8_z (p0, z1, z0), -+ z0 = svorr_z (p0, z1, z0)) -+ -+/* -+** orr_s8_z_untied: -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** orr z0\.b, p0/m, z0\.b, z2\.b -+** | -+** movprfx z0\.b, p0/z, z2\.b -+** orr z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (orr_s8_z_untied, svint8_t, -+ z0 = svorr_s8_z (p0, z1, z2), -+ z0 = svorr_z (p0, z1, z2)) -+ -+/* -+** orr_w0_s8_z_tied1: -+** mov (z[0-9]+\.b), w0 -+** movprfx z0\.b, p0/z, z0\.b -+** orr z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (orr_w0_s8_z_tied1, svint8_t, int8_t, -+ z0 = svorr_n_s8_z (p0, z0, x0), -+ z0 = svorr_z (p0, z0, x0)) -+ -+/* -+** orr_w0_s8_z_untied: -+** mov (z[0-9]+\.b), w0 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** orr z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** orr z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (orr_w0_s8_z_untied, svint8_t, int8_t, -+ z0 = svorr_n_s8_z (p0, z1, x0), -+ z0 = svorr_z (p0, z1, x0)) -+ -+/* -+** orr_1_s8_z_tied1: -+** mov (z[0-9]+\.b), #1 -+** movprfx z0\.b, p0/z, z0\.b -+** orr z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_1_s8_z_tied1, svint8_t, -+ z0 = svorr_n_s8_z (p0, z0, 1), -+ z0 = svorr_z (p0, z0, 1)) -+ -+/* -+** orr_1_s8_z_untied: -+** mov (z[0-9]+\.b), #1 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** orr z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** orr z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (orr_1_s8_z_untied, svint8_t, -+ z0 = svorr_n_s8_z (p0, z1, 1), -+ z0 = svorr_z (p0, z1, 1)) -+ -+/* -+** orr_s8_x_tied1: -+** orr z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (orr_s8_x_tied1, svint8_t, -+ z0 = svorr_s8_x (p0, z0, z1), -+ z0 = svorr_x (p0, z0, z1)) -+ -+/* -+** orr_s8_x_tied2: -+** orr z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (orr_s8_x_tied2, svint8_t, -+ z0 = svorr_s8_x (p0, z1, z0), -+ z0 = svorr_x (p0, z1, z0)) -+ -+/* -+** orr_s8_x_untied: -+** orr z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (orr_s8_x_untied, svint8_t, -+ z0 = svorr_s8_x (p0, z1, z2), -+ z0 = svorr_x (p0, z1, z2)) -+ -+/* -+** orr_w0_s8_x_tied1: -+** mov (z[0-9]+)\.b, w0 -+** orr z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (orr_w0_s8_x_tied1, svint8_t, int8_t, -+ z0 = svorr_n_s8_x (p0, z0, x0), -+ z0 = svorr_x (p0, z0, x0)) -+ -+/* -+** orr_w0_s8_x_untied: -+** mov (z[0-9]+)\.b, w0 -+** orr z0\.d, (z1\.d, \1\.d|\1\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (orr_w0_s8_x_untied, svint8_t, int8_t, -+ z0 = svorr_n_s8_x (p0, z1, x0), -+ z0 = svorr_x (p0, z1, x0)) -+ -+/* -+** orr_1_s8_x_tied1: -+** orr z0\.b, z0\.b, #0x1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_1_s8_x_tied1, svint8_t, -+ z0 = svorr_n_s8_x (p0, z0, 1), -+ z0 = svorr_x (p0, z0, 1)) -+ -+/* -+** orr_1_s8_x_untied: -+** movprfx z0, z1 -+** orr z0\.b, z0\.b, #0x1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_1_s8_x_untied, svint8_t, -+ z0 = svorr_n_s8_x (p0, z1, 1), -+ z0 = svorr_x (p0, z1, 1)) -+ -+/* -+** orr_127_s8_x: -+** orr z0\.b, z0\.b, #0x7f -+** ret -+*/ -+TEST_UNIFORM_Z (orr_127_s8_x, svint8_t, -+ z0 = svorr_n_s8_x (p0, z0, 127), -+ z0 = svorr_x (p0, z0, 127)) -+ -+/* -+** orr_128_s8_x: -+** orr z0\.b, z0\.b, #0x80 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_128_s8_x, svint8_t, -+ z0 = svorr_n_s8_x (p0, z0, 128), -+ z0 = svorr_x (p0, z0, 128)) -+ -+/* -+** orr_255_s8_x: -+** mov z0\.b, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_255_s8_x, svint8_t, -+ z0 = svorr_n_s8_x (p0, z0, 255), -+ z0 = svorr_x (p0, z0, 255)) -+ -+/* -+** orr_m127_s8_x: -+** orr z0\.b, z0\.b, #0x81 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m127_s8_x, svint8_t, -+ z0 = svorr_n_s8_x (p0, z0, -127), -+ z0 = svorr_x (p0, z0, -127)) -+ -+/* -+** orr_m128_s8_x: -+** orr z0\.b, z0\.b, #0x80 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m128_s8_x, svint8_t, -+ z0 = svorr_n_s8_x (p0, z0, -128), -+ z0 = svorr_x (p0, z0, -128)) -+ -+/* -+** orr_5_s8_x: -+** mov (z[0-9]+)\.b, #5 -+** orr z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (orr_5_s8_x, svint8_t, -+ z0 = svorr_n_s8_x (p0, z0, 5), -+ z0 = svorr_x (p0, z0, 5)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_u16.c -new file mode 100644 -index 000000000..000a0444c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_u16.c -@@ -0,0 +1,376 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** orr_u16_m_tied1: -+** orr z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (orr_u16_m_tied1, svuint16_t, -+ z0 = svorr_u16_m (p0, z0, z1), -+ z0 = svorr_m (p0, z0, z1)) -+ -+/* -+** orr_u16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** orr z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (orr_u16_m_tied2, svuint16_t, -+ z0 = svorr_u16_m (p0, z1, z0), -+ z0 = svorr_m (p0, z1, z0)) -+ -+/* -+** orr_u16_m_untied: -+** movprfx z0, z1 -+** orr z0\.h, p0/m, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (orr_u16_m_untied, svuint16_t, -+ z0 = svorr_u16_m (p0, z1, z2), -+ z0 = svorr_m (p0, z1, z2)) -+ -+/* -+** orr_w0_u16_m_tied1: -+** mov (z[0-9]+\.h), w0 -+** orr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (orr_w0_u16_m_tied1, svuint16_t, uint16_t, -+ z0 = svorr_n_u16_m (p0, z0, x0), -+ z0 = svorr_m (p0, z0, x0)) -+ -+/* -+** orr_w0_u16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), w0 -+** movprfx z0, z1 -+** orr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (orr_w0_u16_m_untied, svuint16_t, uint16_t, -+ z0 = svorr_n_u16_m (p0, z1, x0), -+ z0 = svorr_m (p0, z1, x0)) -+ -+/* -+** orr_1_u16_m_tied1: -+** mov (z[0-9]+\.h), #1 -+** orr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_1_u16_m_tied1, svuint16_t, -+ z0 = svorr_n_u16_m (p0, z0, 1), -+ z0 = svorr_m (p0, z0, 1)) -+ -+/* -+** orr_1_u16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), #1 -+** movprfx z0, z1 -+** orr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_1_u16_m_untied, svuint16_t, -+ z0 = svorr_n_u16_m (p0, z1, 1), -+ z0 = svorr_m (p0, z1, 1)) -+ -+/* -+** orr_m2_u16_m: -+** mov (z[0-9]+\.h), #-2 -+** orr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m2_u16_m, svuint16_t, -+ z0 = svorr_n_u16_m (p0, z0, -2), -+ z0 = svorr_m (p0, z0, -2)) -+ -+/* -+** orr_u16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** orr z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (orr_u16_z_tied1, svuint16_t, -+ z0 = svorr_u16_z (p0, z0, z1), -+ z0 = svorr_z (p0, z0, z1)) -+ -+/* -+** orr_u16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** orr z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (orr_u16_z_tied2, svuint16_t, -+ z0 = svorr_u16_z (p0, z1, z0), -+ z0 = svorr_z (p0, z1, z0)) -+ -+/* -+** orr_u16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** orr z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** orr z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (orr_u16_z_untied, svuint16_t, -+ z0 = svorr_u16_z (p0, z1, z2), -+ z0 = svorr_z (p0, z1, z2)) -+ -+/* -+** orr_w0_u16_z_tied1: -+** mov (z[0-9]+\.h), w0 -+** movprfx z0\.h, p0/z, z0\.h -+** orr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (orr_w0_u16_z_tied1, svuint16_t, uint16_t, -+ z0 = svorr_n_u16_z (p0, z0, x0), -+ z0 = svorr_z (p0, z0, x0)) -+ -+/* -+** orr_w0_u16_z_untied: -+** mov (z[0-9]+\.h), w0 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** orr z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** orr z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (orr_w0_u16_z_untied, svuint16_t, uint16_t, -+ z0 = svorr_n_u16_z (p0, z1, x0), -+ z0 = svorr_z (p0, z1, x0)) -+ -+/* -+** orr_1_u16_z_tied1: -+** mov (z[0-9]+\.h), #1 -+** movprfx z0\.h, p0/z, z0\.h -+** orr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_1_u16_z_tied1, svuint16_t, -+ z0 = svorr_n_u16_z (p0, z0, 1), -+ z0 = svorr_z (p0, z0, 1)) -+ -+/* -+** orr_1_u16_z_untied: -+** mov (z[0-9]+\.h), #1 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** orr z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** orr z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (orr_1_u16_z_untied, svuint16_t, -+ z0 = svorr_n_u16_z (p0, z1, 1), -+ z0 = svorr_z (p0, z1, 1)) -+ -+/* -+** orr_u16_x_tied1: -+** orr z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (orr_u16_x_tied1, svuint16_t, -+ z0 = svorr_u16_x (p0, z0, z1), -+ z0 = svorr_x (p0, z0, z1)) -+ -+/* -+** orr_u16_x_tied2: -+** orr z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (orr_u16_x_tied2, svuint16_t, -+ z0 = svorr_u16_x (p0, z1, z0), -+ z0 = svorr_x (p0, z1, z0)) -+ -+/* -+** orr_u16_x_untied: -+** orr z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (orr_u16_x_untied, svuint16_t, -+ z0 = svorr_u16_x (p0, z1, z2), -+ z0 = svorr_x (p0, z1, z2)) -+ -+/* -+** orr_w0_u16_x_tied1: -+** mov (z[0-9]+)\.h, w0 -+** orr z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (orr_w0_u16_x_tied1, svuint16_t, uint16_t, -+ z0 = svorr_n_u16_x (p0, z0, x0), -+ z0 = svorr_x (p0, z0, x0)) -+ -+/* -+** orr_w0_u16_x_untied: -+** mov (z[0-9]+)\.h, w0 -+** orr z0\.d, (z1\.d, \1\.d|\1\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (orr_w0_u16_x_untied, svuint16_t, uint16_t, -+ z0 = svorr_n_u16_x (p0, z1, x0), -+ z0 = svorr_x (p0, z1, x0)) -+ -+/* -+** orr_1_u16_x_tied1: -+** orr z0\.h, z0\.h, #0x1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_1_u16_x_tied1, svuint16_t, -+ z0 = svorr_n_u16_x (p0, z0, 1), -+ z0 = svorr_x (p0, z0, 1)) -+ -+/* -+** orr_1_u16_x_untied: -+** movprfx z0, z1 -+** orr z0\.h, z0\.h, #0x1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_1_u16_x_untied, svuint16_t, -+ z0 = svorr_n_u16_x (p0, z1, 1), -+ z0 = svorr_x (p0, z1, 1)) -+ -+/* -+** orr_127_u16_x: -+** orr z0\.h, z0\.h, #0x7f -+** ret -+*/ -+TEST_UNIFORM_Z (orr_127_u16_x, svuint16_t, -+ z0 = svorr_n_u16_x (p0, z0, 127), -+ z0 = svorr_x (p0, z0, 127)) -+ -+/* -+** orr_128_u16_x: -+** orr z0\.h, z0\.h, #0x80 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_128_u16_x, svuint16_t, -+ z0 = svorr_n_u16_x (p0, z0, 128), -+ z0 = svorr_x (p0, z0, 128)) -+ -+/* -+** orr_255_u16_x: -+** orr z0\.h, z0\.h, #0xff -+** ret -+*/ -+TEST_UNIFORM_Z (orr_255_u16_x, svuint16_t, -+ z0 = svorr_n_u16_x (p0, z0, 255), -+ z0 = svorr_x (p0, z0, 255)) -+ -+/* -+** orr_256_u16_x: -+** orr z0\.h, z0\.h, #0x100 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_256_u16_x, svuint16_t, -+ z0 = svorr_n_u16_x (p0, z0, 256), -+ z0 = svorr_x (p0, z0, 256)) -+ -+/* -+** orr_257_u16_x: -+** orr z0\.h, z0\.h, #0x101 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_257_u16_x, svuint16_t, -+ z0 = svorr_n_u16_x (p0, z0, 257), -+ z0 = svorr_x (p0, z0, 257)) -+ -+/* -+** orr_512_u16_x: -+** orr z0\.h, z0\.h, #0x200 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_512_u16_x, svuint16_t, -+ z0 = svorr_n_u16_x (p0, z0, 512), -+ z0 = svorr_x (p0, z0, 512)) -+ -+/* -+** orr_65280_u16_x: -+** orr z0\.h, z0\.h, #0xff00 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_65280_u16_x, svuint16_t, -+ z0 = svorr_n_u16_x (p0, z0, 0xff00), -+ z0 = svorr_x (p0, z0, 0xff00)) -+ -+/* -+** orr_m127_u16_x: -+** orr z0\.h, z0\.h, #0xff81 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m127_u16_x, svuint16_t, -+ z0 = svorr_n_u16_x (p0, z0, -127), -+ z0 = svorr_x (p0, z0, -127)) -+ -+/* -+** orr_m128_u16_x: -+** orr z0\.h, z0\.h, #0xff80 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m128_u16_x, svuint16_t, -+ z0 = svorr_n_u16_x (p0, z0, -128), -+ z0 = svorr_x (p0, z0, -128)) -+ -+/* -+** orr_m255_u16_x: -+** orr z0\.h, z0\.h, #0xff01 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m255_u16_x, svuint16_t, -+ z0 = svorr_n_u16_x (p0, z0, -255), -+ z0 = svorr_x (p0, z0, -255)) -+ -+/* -+** orr_m256_u16_x: -+** orr z0\.h, z0\.h, #0xff00 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m256_u16_x, svuint16_t, -+ z0 = svorr_n_u16_x (p0, z0, -256), -+ z0 = svorr_x (p0, z0, -256)) -+ -+/* -+** orr_m257_u16_x: -+** orr z0\.h, z0\.h, #0xfeff -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m257_u16_x, svuint16_t, -+ z0 = svorr_n_u16_x (p0, z0, -257), -+ z0 = svorr_x (p0, z0, -257)) -+ -+/* -+** orr_m512_u16_x: -+** orr z0\.h, z0\.h, #0xfe00 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m512_u16_x, svuint16_t, -+ z0 = svorr_n_u16_x (p0, z0, -512), -+ z0 = svorr_x (p0, z0, -512)) -+ -+/* -+** orr_m32768_u16_x: -+** orr z0\.h, z0\.h, #0x8000 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m32768_u16_x, svuint16_t, -+ z0 = svorr_n_u16_x (p0, z0, -0x8000), -+ z0 = svorr_x (p0, z0, -0x8000)) -+ -+/* -+** orr_5_u16_x: -+** mov (z[0-9]+)\.h, #5 -+** orr z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (orr_5_u16_x, svuint16_t, -+ z0 = svorr_n_u16_x (p0, z0, 5), -+ z0 = svorr_x (p0, z0, 5)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_u32.c -new file mode 100644 -index 000000000..8e2351d16 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_u32.c -@@ -0,0 +1,372 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** orr_u32_m_tied1: -+** orr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (orr_u32_m_tied1, svuint32_t, -+ z0 = svorr_u32_m (p0, z0, z1), -+ z0 = svorr_m (p0, z0, z1)) -+ -+/* -+** orr_u32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** orr z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (orr_u32_m_tied2, svuint32_t, -+ z0 = svorr_u32_m (p0, z1, z0), -+ z0 = svorr_m (p0, z1, z0)) -+ -+/* -+** orr_u32_m_untied: -+** movprfx z0, z1 -+** orr z0\.s, p0/m, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (orr_u32_m_untied, svuint32_t, -+ z0 = svorr_u32_m (p0, z1, z2), -+ z0 = svorr_m (p0, z1, z2)) -+ -+/* -+** orr_w0_u32_m_tied1: -+** mov (z[0-9]+\.s), w0 -+** orr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (orr_w0_u32_m_tied1, svuint32_t, uint32_t, -+ z0 = svorr_n_u32_m (p0, z0, x0), -+ z0 = svorr_m (p0, z0, x0)) -+ -+/* -+** orr_w0_u32_m_untied: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0, z1 -+** orr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (orr_w0_u32_m_untied, svuint32_t, uint32_t, -+ z0 = svorr_n_u32_m (p0, z1, x0), -+ z0 = svorr_m (p0, z1, x0)) -+ -+/* -+** orr_1_u32_m_tied1: -+** mov (z[0-9]+\.s), #1 -+** orr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_1_u32_m_tied1, svuint32_t, -+ z0 = svorr_n_u32_m (p0, z0, 1), -+ z0 = svorr_m (p0, z0, 1)) -+ -+/* -+** orr_1_u32_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.s), #1 -+** movprfx z0, z1 -+** orr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_1_u32_m_untied, svuint32_t, -+ z0 = svorr_n_u32_m (p0, z1, 1), -+ z0 = svorr_m (p0, z1, 1)) -+ -+/* -+** orr_m2_u32_m: -+** mov (z[0-9]+\.s), #-2 -+** orr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m2_u32_m, svuint32_t, -+ z0 = svorr_n_u32_m (p0, z0, -2), -+ z0 = svorr_m (p0, z0, -2)) -+ -+/* -+** orr_u32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** orr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (orr_u32_z_tied1, svuint32_t, -+ z0 = svorr_u32_z (p0, z0, z1), -+ z0 = svorr_z (p0, z0, z1)) -+ -+/* -+** orr_u32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** orr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (orr_u32_z_tied2, svuint32_t, -+ z0 = svorr_u32_z (p0, z1, z0), -+ z0 = svorr_z (p0, z1, z0)) -+ -+/* -+** orr_u32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** orr z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** orr z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (orr_u32_z_untied, svuint32_t, -+ z0 = svorr_u32_z (p0, z1, z2), -+ z0 = svorr_z (p0, z1, z2)) -+ -+/* -+** orr_w0_u32_z_tied1: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** orr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (orr_w0_u32_z_tied1, svuint32_t, uint32_t, -+ z0 = svorr_n_u32_z (p0, z0, x0), -+ z0 = svorr_z (p0, z0, x0)) -+ -+/* -+** orr_w0_u32_z_untied: -+** mov (z[0-9]+\.s), w0 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** orr z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** orr z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (orr_w0_u32_z_untied, svuint32_t, uint32_t, -+ z0 = svorr_n_u32_z (p0, z1, x0), -+ z0 = svorr_z (p0, z1, x0)) -+ -+/* -+** orr_1_u32_z_tied1: -+** mov (z[0-9]+\.s), #1 -+** movprfx z0\.s, p0/z, z0\.s -+** orr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_1_u32_z_tied1, svuint32_t, -+ z0 = svorr_n_u32_z (p0, z0, 1), -+ z0 = svorr_z (p0, z0, 1)) -+ -+/* -+** orr_1_u32_z_untied: -+** mov (z[0-9]+\.s), #1 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** orr z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** orr z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (orr_1_u32_z_untied, svuint32_t, -+ z0 = svorr_n_u32_z (p0, z1, 1), -+ z0 = svorr_z (p0, z1, 1)) -+ -+/* -+** orr_u32_x_tied1: -+** orr z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (orr_u32_x_tied1, svuint32_t, -+ z0 = svorr_u32_x (p0, z0, z1), -+ z0 = svorr_x (p0, z0, z1)) -+ -+/* -+** orr_u32_x_tied2: -+** orr z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (orr_u32_x_tied2, svuint32_t, -+ z0 = svorr_u32_x (p0, z1, z0), -+ z0 = svorr_x (p0, z1, z0)) -+ -+/* -+** orr_u32_x_untied: -+** orr z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (orr_u32_x_untied, svuint32_t, -+ z0 = svorr_u32_x (p0, z1, z2), -+ z0 = svorr_x (p0, z1, z2)) -+ -+/* -+** orr_w0_u32_x_tied1: -+** mov (z[0-9]+)\.s, w0 -+** orr z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (orr_w0_u32_x_tied1, svuint32_t, uint32_t, -+ z0 = svorr_n_u32_x (p0, z0, x0), -+ z0 = svorr_x (p0, z0, x0)) -+ -+/* -+** orr_w0_u32_x_untied: -+** mov (z[0-9]+)\.s, w0 -+** orr z0\.d, (z1\.d, \1\.d|\1\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (orr_w0_u32_x_untied, svuint32_t, uint32_t, -+ z0 = svorr_n_u32_x (p0, z1, x0), -+ z0 = svorr_x (p0, z1, x0)) -+ -+/* -+** orr_1_u32_x_tied1: -+** orr z0\.s, z0\.s, #0x1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_1_u32_x_tied1, svuint32_t, -+ z0 = svorr_n_u32_x (p0, z0, 1), -+ z0 = svorr_x (p0, z0, 1)) -+ -+/* -+** orr_1_u32_x_untied: -+** movprfx z0, z1 -+** orr z0\.s, z0\.s, #0x1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_1_u32_x_untied, svuint32_t, -+ z0 = svorr_n_u32_x (p0, z1, 1), -+ z0 = svorr_x (p0, z1, 1)) -+ -+/* -+** orr_127_u32_x: -+** orr z0\.s, z0\.s, #0x7f -+** ret -+*/ -+TEST_UNIFORM_Z (orr_127_u32_x, svuint32_t, -+ z0 = svorr_n_u32_x (p0, z0, 127), -+ z0 = svorr_x (p0, z0, 127)) -+ -+/* -+** orr_128_u32_x: -+** orr z0\.s, z0\.s, #0x80 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_128_u32_x, svuint32_t, -+ z0 = svorr_n_u32_x (p0, z0, 128), -+ z0 = svorr_x (p0, z0, 128)) -+ -+/* -+** orr_255_u32_x: -+** orr z0\.s, z0\.s, #0xff -+** ret -+*/ -+TEST_UNIFORM_Z (orr_255_u32_x, svuint32_t, -+ z0 = svorr_n_u32_x (p0, z0, 255), -+ z0 = svorr_x (p0, z0, 255)) -+ -+/* -+** orr_256_u32_x: -+** orr z0\.s, z0\.s, #0x100 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_256_u32_x, svuint32_t, -+ z0 = svorr_n_u32_x (p0, z0, 256), -+ z0 = svorr_x (p0, z0, 256)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (orr_257_u32_x, svuint32_t, -+ z0 = svorr_n_u32_x (p0, z0, 257), -+ z0 = svorr_x (p0, z0, 257)) -+ -+/* -+** orr_512_u32_x: -+** orr z0\.s, z0\.s, #0x200 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_512_u32_x, svuint32_t, -+ z0 = svorr_n_u32_x (p0, z0, 512), -+ z0 = svorr_x (p0, z0, 512)) -+ -+/* -+** orr_65280_u32_x: -+** orr z0\.s, z0\.s, #0xff00 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_65280_u32_x, svuint32_t, -+ z0 = svorr_n_u32_x (p0, z0, 0xff00), -+ z0 = svorr_x (p0, z0, 0xff00)) -+ -+/* -+** orr_m127_u32_x: -+** orr z0\.s, z0\.s, #0xffffff81 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m127_u32_x, svuint32_t, -+ z0 = svorr_n_u32_x (p0, z0, -127), -+ z0 = svorr_x (p0, z0, -127)) -+ -+/* -+** orr_m128_u32_x: -+** orr z0\.s, z0\.s, #0xffffff80 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m128_u32_x, svuint32_t, -+ z0 = svorr_n_u32_x (p0, z0, -128), -+ z0 = svorr_x (p0, z0, -128)) -+ -+/* -+** orr_m255_u32_x: -+** orr z0\.s, z0\.s, #0xffffff01 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m255_u32_x, svuint32_t, -+ z0 = svorr_n_u32_x (p0, z0, -255), -+ z0 = svorr_x (p0, z0, -255)) -+ -+/* -+** orr_m256_u32_x: -+** orr z0\.s, z0\.s, #0xffffff00 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m256_u32_x, svuint32_t, -+ z0 = svorr_n_u32_x (p0, z0, -256), -+ z0 = svorr_x (p0, z0, -256)) -+ -+/* -+** orr_m257_u32_x: -+** orr z0\.s, z0\.s, #0xfffffeff -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m257_u32_x, svuint32_t, -+ z0 = svorr_n_u32_x (p0, z0, -257), -+ z0 = svorr_x (p0, z0, -257)) -+ -+/* -+** orr_m512_u32_x: -+** orr z0\.s, z0\.s, #0xfffffe00 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m512_u32_x, svuint32_t, -+ z0 = svorr_n_u32_x (p0, z0, -512), -+ z0 = svorr_x (p0, z0, -512)) -+ -+/* -+** orr_m32768_u32_x: -+** orr z0\.s, z0\.s, #0xffff8000 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m32768_u32_x, svuint32_t, -+ z0 = svorr_n_u32_x (p0, z0, -0x8000), -+ z0 = svorr_x (p0, z0, -0x8000)) -+ -+/* -+** orr_5_u32_x: -+** mov (z[0-9]+)\.s, #5 -+** orr z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (orr_5_u32_x, svuint32_t, -+ z0 = svorr_n_u32_x (p0, z0, 5), -+ z0 = svorr_x (p0, z0, 5)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_u64.c -new file mode 100644 -index 000000000..323e2101e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_u64.c -@@ -0,0 +1,372 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** orr_u64_m_tied1: -+** orr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (orr_u64_m_tied1, svuint64_t, -+ z0 = svorr_u64_m (p0, z0, z1), -+ z0 = svorr_m (p0, z0, z1)) -+ -+/* -+** orr_u64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** orr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_u64_m_tied2, svuint64_t, -+ z0 = svorr_u64_m (p0, z1, z0), -+ z0 = svorr_m (p0, z1, z0)) -+ -+/* -+** orr_u64_m_untied: -+** movprfx z0, z1 -+** orr z0\.d, p0/m, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (orr_u64_m_untied, svuint64_t, -+ z0 = svorr_u64_m (p0, z1, z2), -+ z0 = svorr_m (p0, z1, z2)) -+ -+/* -+** orr_x0_u64_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** orr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (orr_x0_u64_m_tied1, svuint64_t, uint64_t, -+ z0 = svorr_n_u64_m (p0, z0, x0), -+ z0 = svorr_m (p0, z0, x0)) -+ -+/* -+** orr_x0_u64_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** orr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (orr_x0_u64_m_untied, svuint64_t, uint64_t, -+ z0 = svorr_n_u64_m (p0, z1, x0), -+ z0 = svorr_m (p0, z1, x0)) -+ -+/* -+** orr_1_u64_m_tied1: -+** mov (z[0-9]+\.d), #1 -+** orr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_1_u64_m_tied1, svuint64_t, -+ z0 = svorr_n_u64_m (p0, z0, 1), -+ z0 = svorr_m (p0, z0, 1)) -+ -+/* -+** orr_1_u64_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #1 -+** movprfx z0, z1 -+** orr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_1_u64_m_untied, svuint64_t, -+ z0 = svorr_n_u64_m (p0, z1, 1), -+ z0 = svorr_m (p0, z1, 1)) -+ -+/* -+** orr_m2_u64_m: -+** mov (z[0-9]+\.d), #-2 -+** orr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m2_u64_m, svuint64_t, -+ z0 = svorr_n_u64_m (p0, z0, -2), -+ z0 = svorr_m (p0, z0, -2)) -+ -+/* -+** orr_u64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** orr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (orr_u64_z_tied1, svuint64_t, -+ z0 = svorr_u64_z (p0, z0, z1), -+ z0 = svorr_z (p0, z0, z1)) -+ -+/* -+** orr_u64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** orr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (orr_u64_z_tied2, svuint64_t, -+ z0 = svorr_u64_z (p0, z1, z0), -+ z0 = svorr_z (p0, z1, z0)) -+ -+/* -+** orr_u64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** orr z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** orr z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (orr_u64_z_untied, svuint64_t, -+ z0 = svorr_u64_z (p0, z1, z2), -+ z0 = svorr_z (p0, z1, z2)) -+ -+/* -+** orr_x0_u64_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** orr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (orr_x0_u64_z_tied1, svuint64_t, uint64_t, -+ z0 = svorr_n_u64_z (p0, z0, x0), -+ z0 = svorr_z (p0, z0, x0)) -+ -+/* -+** orr_x0_u64_z_untied: -+** mov (z[0-9]+\.d), x0 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** orr z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** orr z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (orr_x0_u64_z_untied, svuint64_t, uint64_t, -+ z0 = svorr_n_u64_z (p0, z1, x0), -+ z0 = svorr_z (p0, z1, x0)) -+ -+/* -+** orr_1_u64_z_tied1: -+** mov (z[0-9]+\.d), #1 -+** movprfx z0\.d, p0/z, z0\.d -+** orr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_1_u64_z_tied1, svuint64_t, -+ z0 = svorr_n_u64_z (p0, z0, 1), -+ z0 = svorr_z (p0, z0, 1)) -+ -+/* -+** orr_1_u64_z_untied: -+** mov (z[0-9]+\.d), #1 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** orr z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** orr z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (orr_1_u64_z_untied, svuint64_t, -+ z0 = svorr_n_u64_z (p0, z1, 1), -+ z0 = svorr_z (p0, z1, 1)) -+ -+/* -+** orr_u64_x_tied1: -+** orr z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (orr_u64_x_tied1, svuint64_t, -+ z0 = svorr_u64_x (p0, z0, z1), -+ z0 = svorr_x (p0, z0, z1)) -+ -+/* -+** orr_u64_x_tied2: -+** orr z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (orr_u64_x_tied2, svuint64_t, -+ z0 = svorr_u64_x (p0, z1, z0), -+ z0 = svorr_x (p0, z1, z0)) -+ -+/* -+** orr_u64_x_untied: -+** orr z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (orr_u64_x_untied, svuint64_t, -+ z0 = svorr_u64_x (p0, z1, z2), -+ z0 = svorr_x (p0, z1, z2)) -+ -+/* -+** orr_x0_u64_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** orr z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (orr_x0_u64_x_tied1, svuint64_t, uint64_t, -+ z0 = svorr_n_u64_x (p0, z0, x0), -+ z0 = svorr_x (p0, z0, x0)) -+ -+/* -+** orr_x0_u64_x_untied: -+** mov (z[0-9]+\.d), x0 -+** orr z0\.d, (z1\.d, \1|\1, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (orr_x0_u64_x_untied, svuint64_t, uint64_t, -+ z0 = svorr_n_u64_x (p0, z1, x0), -+ z0 = svorr_x (p0, z1, x0)) -+ -+/* -+** orr_1_u64_x_tied1: -+** orr z0\.d, z0\.d, #0x1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_1_u64_x_tied1, svuint64_t, -+ z0 = svorr_n_u64_x (p0, z0, 1), -+ z0 = svorr_x (p0, z0, 1)) -+ -+/* -+** orr_1_u64_x_untied: -+** movprfx z0, z1 -+** orr z0\.d, z0\.d, #0x1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_1_u64_x_untied, svuint64_t, -+ z0 = svorr_n_u64_x (p0, z1, 1), -+ z0 = svorr_x (p0, z1, 1)) -+ -+/* -+** orr_127_u64_x: -+** orr z0\.d, z0\.d, #0x7f -+** ret -+*/ -+TEST_UNIFORM_Z (orr_127_u64_x, svuint64_t, -+ z0 = svorr_n_u64_x (p0, z0, 127), -+ z0 = svorr_x (p0, z0, 127)) -+ -+/* -+** orr_128_u64_x: -+** orr z0\.d, z0\.d, #0x80 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_128_u64_x, svuint64_t, -+ z0 = svorr_n_u64_x (p0, z0, 128), -+ z0 = svorr_x (p0, z0, 128)) -+ -+/* -+** orr_255_u64_x: -+** orr z0\.d, z0\.d, #0xff -+** ret -+*/ -+TEST_UNIFORM_Z (orr_255_u64_x, svuint64_t, -+ z0 = svorr_n_u64_x (p0, z0, 255), -+ z0 = svorr_x (p0, z0, 255)) -+ -+/* -+** orr_256_u64_x: -+** orr z0\.d, z0\.d, #0x100 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_256_u64_x, svuint64_t, -+ z0 = svorr_n_u64_x (p0, z0, 256), -+ z0 = svorr_x (p0, z0, 256)) -+ -+/* TODO: Bad code and needs fixing. */ -+TEST_UNIFORM_Z (orr_257_u64_x, svuint64_t, -+ z0 = svorr_n_u64_x (p0, z0, 257), -+ z0 = svorr_x (p0, z0, 257)) -+ -+/* -+** orr_512_u64_x: -+** orr z0\.d, z0\.d, #0x200 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_512_u64_x, svuint64_t, -+ z0 = svorr_n_u64_x (p0, z0, 512), -+ z0 = svorr_x (p0, z0, 512)) -+ -+/* -+** orr_65280_u64_x: -+** orr z0\.d, z0\.d, #0xff00 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_65280_u64_x, svuint64_t, -+ z0 = svorr_n_u64_x (p0, z0, 0xff00), -+ z0 = svorr_x (p0, z0, 0xff00)) -+ -+/* -+** orr_m127_u64_x: -+** orr z0\.d, z0\.d, #0xffffffffffffff81 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m127_u64_x, svuint64_t, -+ z0 = svorr_n_u64_x (p0, z0, -127), -+ z0 = svorr_x (p0, z0, -127)) -+ -+/* -+** orr_m128_u64_x: -+** orr z0\.d, z0\.d, #0xffffffffffffff80 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m128_u64_x, svuint64_t, -+ z0 = svorr_n_u64_x (p0, z0, -128), -+ z0 = svorr_x (p0, z0, -128)) -+ -+/* -+** orr_m255_u64_x: -+** orr z0\.d, z0\.d, #0xffffffffffffff01 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m255_u64_x, svuint64_t, -+ z0 = svorr_n_u64_x (p0, z0, -255), -+ z0 = svorr_x (p0, z0, -255)) -+ -+/* -+** orr_m256_u64_x: -+** orr z0\.d, z0\.d, #0xffffffffffffff00 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m256_u64_x, svuint64_t, -+ z0 = svorr_n_u64_x (p0, z0, -256), -+ z0 = svorr_x (p0, z0, -256)) -+ -+/* -+** orr_m257_u64_x: -+** orr z0\.d, z0\.d, #0xfffffffffffffeff -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m257_u64_x, svuint64_t, -+ z0 = svorr_n_u64_x (p0, z0, -257), -+ z0 = svorr_x (p0, z0, -257)) -+ -+/* -+** orr_m512_u64_x: -+** orr z0\.d, z0\.d, #0xfffffffffffffe00 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m512_u64_x, svuint64_t, -+ z0 = svorr_n_u64_x (p0, z0, -512), -+ z0 = svorr_x (p0, z0, -512)) -+ -+/* -+** orr_m32768_u64_x: -+** orr z0\.d, z0\.d, #0xffffffffffff8000 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m32768_u64_x, svuint64_t, -+ z0 = svorr_n_u64_x (p0, z0, -0x8000), -+ z0 = svorr_x (p0, z0, -0x8000)) -+ -+/* -+** orr_5_u64_x: -+** mov (z[0-9]+\.d), #5 -+** orr z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (orr_5_u64_x, svuint64_t, -+ z0 = svorr_n_u64_x (p0, z0, 5), -+ z0 = svorr_x (p0, z0, 5)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_u8.c -new file mode 100644 -index 000000000..efe5591b4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orr_u8.c -@@ -0,0 +1,295 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** orr_u8_m_tied1: -+** orr z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (orr_u8_m_tied1, svuint8_t, -+ z0 = svorr_u8_m (p0, z0, z1), -+ z0 = svorr_m (p0, z0, z1)) -+ -+/* -+** orr_u8_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** orr z0\.b, p0/m, z0\.b, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (orr_u8_m_tied2, svuint8_t, -+ z0 = svorr_u8_m (p0, z1, z0), -+ z0 = svorr_m (p0, z1, z0)) -+ -+/* -+** orr_u8_m_untied: -+** movprfx z0, z1 -+** orr z0\.b, p0/m, z0\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (orr_u8_m_untied, svuint8_t, -+ z0 = svorr_u8_m (p0, z1, z2), -+ z0 = svorr_m (p0, z1, z2)) -+ -+/* -+** orr_w0_u8_m_tied1: -+** mov (z[0-9]+\.b), w0 -+** orr z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (orr_w0_u8_m_tied1, svuint8_t, uint8_t, -+ z0 = svorr_n_u8_m (p0, z0, x0), -+ z0 = svorr_m (p0, z0, x0)) -+ -+/* -+** orr_w0_u8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), w0 -+** movprfx z0, z1 -+** orr z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (orr_w0_u8_m_untied, svuint8_t, uint8_t, -+ z0 = svorr_n_u8_m (p0, z1, x0), -+ z0 = svorr_m (p0, z1, x0)) -+ -+/* -+** orr_1_u8_m_tied1: -+** mov (z[0-9]+\.b), #1 -+** orr z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_1_u8_m_tied1, svuint8_t, -+ z0 = svorr_n_u8_m (p0, z0, 1), -+ z0 = svorr_m (p0, z0, 1)) -+ -+/* -+** orr_1_u8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), #1 -+** movprfx z0, z1 -+** orr z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_1_u8_m_untied, svuint8_t, -+ z0 = svorr_n_u8_m (p0, z1, 1), -+ z0 = svorr_m (p0, z1, 1)) -+ -+/* -+** orr_m2_u8_m: -+** mov (z[0-9]+\.b), #-2 -+** orr z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m2_u8_m, svuint8_t, -+ z0 = svorr_n_u8_m (p0, z0, -2), -+ z0 = svorr_m (p0, z0, -2)) -+ -+/* -+** orr_u8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** orr z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (orr_u8_z_tied1, svuint8_t, -+ z0 = svorr_u8_z (p0, z0, z1), -+ z0 = svorr_z (p0, z0, z1)) -+ -+/* -+** orr_u8_z_tied2: -+** movprfx z0\.b, p0/z, z0\.b -+** orr z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (orr_u8_z_tied2, svuint8_t, -+ z0 = svorr_u8_z (p0, z1, z0), -+ z0 = svorr_z (p0, z1, z0)) -+ -+/* -+** orr_u8_z_untied: -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** orr z0\.b, p0/m, z0\.b, z2\.b -+** | -+** movprfx z0\.b, p0/z, z2\.b -+** orr z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (orr_u8_z_untied, svuint8_t, -+ z0 = svorr_u8_z (p0, z1, z2), -+ z0 = svorr_z (p0, z1, z2)) -+ -+/* -+** orr_w0_u8_z_tied1: -+** mov (z[0-9]+\.b), w0 -+** movprfx z0\.b, p0/z, z0\.b -+** orr z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (orr_w0_u8_z_tied1, svuint8_t, uint8_t, -+ z0 = svorr_n_u8_z (p0, z0, x0), -+ z0 = svorr_z (p0, z0, x0)) -+ -+/* -+** orr_w0_u8_z_untied: -+** mov (z[0-9]+\.b), w0 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** orr z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** orr z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (orr_w0_u8_z_untied, svuint8_t, uint8_t, -+ z0 = svorr_n_u8_z (p0, z1, x0), -+ z0 = svorr_z (p0, z1, x0)) -+ -+/* -+** orr_1_u8_z_tied1: -+** mov (z[0-9]+\.b), #1 -+** movprfx z0\.b, p0/z, z0\.b -+** orr z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_1_u8_z_tied1, svuint8_t, -+ z0 = svorr_n_u8_z (p0, z0, 1), -+ z0 = svorr_z (p0, z0, 1)) -+ -+/* -+** orr_1_u8_z_untied: -+** mov (z[0-9]+\.b), #1 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** orr z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** orr z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (orr_1_u8_z_untied, svuint8_t, -+ z0 = svorr_n_u8_z (p0, z1, 1), -+ z0 = svorr_z (p0, z1, 1)) -+ -+/* -+** orr_u8_x_tied1: -+** orr z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (orr_u8_x_tied1, svuint8_t, -+ z0 = svorr_u8_x (p0, z0, z1), -+ z0 = svorr_x (p0, z0, z1)) -+ -+/* -+** orr_u8_x_tied2: -+** orr z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (orr_u8_x_tied2, svuint8_t, -+ z0 = svorr_u8_x (p0, z1, z0), -+ z0 = svorr_x (p0, z1, z0)) -+ -+/* -+** orr_u8_x_untied: -+** orr z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (orr_u8_x_untied, svuint8_t, -+ z0 = svorr_u8_x (p0, z1, z2), -+ z0 = svorr_x (p0, z1, z2)) -+ -+/* -+** orr_w0_u8_x_tied1: -+** mov (z[0-9]+)\.b, w0 -+** orr z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (orr_w0_u8_x_tied1, svuint8_t, uint8_t, -+ z0 = svorr_n_u8_x (p0, z0, x0), -+ z0 = svorr_x (p0, z0, x0)) -+ -+/* -+** orr_w0_u8_x_untied: -+** mov (z[0-9]+)\.b, w0 -+** orr z0\.d, (z1\.d, \1\.d|\1\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (orr_w0_u8_x_untied, svuint8_t, uint8_t, -+ z0 = svorr_n_u8_x (p0, z1, x0), -+ z0 = svorr_x (p0, z1, x0)) -+ -+/* -+** orr_1_u8_x_tied1: -+** orr z0\.b, z0\.b, #0x1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_1_u8_x_tied1, svuint8_t, -+ z0 = svorr_n_u8_x (p0, z0, 1), -+ z0 = svorr_x (p0, z0, 1)) -+ -+/* -+** orr_1_u8_x_untied: -+** movprfx z0, z1 -+** orr z0\.b, z0\.b, #0x1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_1_u8_x_untied, svuint8_t, -+ z0 = svorr_n_u8_x (p0, z1, 1), -+ z0 = svorr_x (p0, z1, 1)) -+ -+/* -+** orr_127_u8_x: -+** orr z0\.b, z0\.b, #0x7f -+** ret -+*/ -+TEST_UNIFORM_Z (orr_127_u8_x, svuint8_t, -+ z0 = svorr_n_u8_x (p0, z0, 127), -+ z0 = svorr_x (p0, z0, 127)) -+ -+/* -+** orr_128_u8_x: -+** orr z0\.b, z0\.b, #0x80 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_128_u8_x, svuint8_t, -+ z0 = svorr_n_u8_x (p0, z0, 128), -+ z0 = svorr_x (p0, z0, 128)) -+ -+/* -+** orr_255_u8_x: -+** mov z0\.b, #-1 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_255_u8_x, svuint8_t, -+ z0 = svorr_n_u8_x (p0, z0, 255), -+ z0 = svorr_x (p0, z0, 255)) -+ -+/* -+** orr_m127_u8_x: -+** orr z0\.b, z0\.b, #0x81 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m127_u8_x, svuint8_t, -+ z0 = svorr_n_u8_x (p0, z0, -127), -+ z0 = svorr_x (p0, z0, -127)) -+ -+/* -+** orr_m128_u8_x: -+** orr z0\.b, z0\.b, #0x80 -+** ret -+*/ -+TEST_UNIFORM_Z (orr_m128_u8_x, svuint8_t, -+ z0 = svorr_n_u8_x (p0, z0, -128), -+ z0 = svorr_x (p0, z0, -128)) -+ -+/* -+** orr_5_u8_x: -+** mov (z[0-9]+)\.b, #5 -+** orr z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (orr_5_u8_x, svuint8_t, -+ z0 = svorr_n_u8_x (p0, z0, 5), -+ z0 = svorr_x (p0, z0, 5)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orv_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orv_s16.c -new file mode 100644 -index 000000000..c9b268d3d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orv_s16.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** orv_x0_s16: -+** orv h([0-9]+), p0, z0\.h -+** umov w0, v\1\.h\[0\] -+** ret -+*/ -+TEST_REDUCTION_X (orv_x0_s16, int16_t, svint16_t, -+ x0 = svorv_s16 (p0, z0), -+ x0 = svorv (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orv_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orv_s32.c -new file mode 100644 -index 000000000..df4025f54 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orv_s32.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** orv_x0_s32: -+** orv (s[0-9]+), p0, z0\.s -+** fmov w0, \1 -+** ret -+*/ -+TEST_REDUCTION_X (orv_x0_s32, int32_t, svint32_t, -+ x0 = svorv_s32 (p0, z0), -+ x0 = svorv (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orv_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orv_s64.c -new file mode 100644 -index 000000000..76a835ce3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orv_s64.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** orv_x0_s64: -+** orv (d[0-9]+), p0, z0\.d -+** fmov x0, \1 -+** ret -+*/ -+TEST_REDUCTION_X (orv_x0_s64, int64_t, svint64_t, -+ x0 = svorv_s64 (p0, z0), -+ x0 = svorv (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orv_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orv_s8.c -new file mode 100644 -index 000000000..3f2031d9c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orv_s8.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** orv_x0_s8: -+** orv b([0-9]+), p0, z0\.b -+** umov w0, v\1\.b\[0\] -+** ret -+*/ -+TEST_REDUCTION_X (orv_x0_s8, int8_t, svint8_t, -+ x0 = svorv_s8 (p0, z0), -+ x0 = svorv (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orv_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orv_u16.c -new file mode 100644 -index 000000000..28bfbecb0 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orv_u16.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** orv_x0_u16: -+** orv h([0-9]+), p0, z0\.h -+** umov w0, v\1\.h\[0\] -+** ret -+*/ -+TEST_REDUCTION_X (orv_x0_u16, uint16_t, svuint16_t, -+ x0 = svorv_u16 (p0, z0), -+ x0 = svorv (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orv_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orv_u32.c -new file mode 100644 -index 000000000..1988d5623 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orv_u32.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** orv_x0_u32: -+** orv (s[0-9]+), p0, z0\.s -+** fmov w0, \1 -+** ret -+*/ -+TEST_REDUCTION_X (orv_x0_u32, uint32_t, svuint32_t, -+ x0 = svorv_u32 (p0, z0), -+ x0 = svorv (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orv_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orv_u64.c -new file mode 100644 -index 000000000..c8a8429a7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orv_u64.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** orv_x0_u64: -+** orv (d[0-9]+), p0, z0\.d -+** fmov x0, \1 -+** ret -+*/ -+TEST_REDUCTION_X (orv_x0_u64, uint64_t, svuint64_t, -+ x0 = svorv_u64 (p0, z0), -+ x0 = svorv (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orv_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orv_u8.c -new file mode 100644 -index 000000000..bcab32d8b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/orv_u8.c -@@ -0,0 +1,13 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** orv_x0_u8: -+** orv b([0-9]+), p0, z0\.b -+** umov w0, v\1\.b\[0\] -+** ret -+*/ -+TEST_REDUCTION_X (orv_x0_u8, uint8_t, svuint8_t, -+ x0 = svorv_u8 (p0, z0), -+ x0 = svorv (p0, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pfalse.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pfalse.c -new file mode 100644 -index 000000000..a74a59283 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pfalse.c -@@ -0,0 +1,13 @@ -+/* { dg-additional-options "-msve-vector-bits=scalable" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** pfalse_b: -+** pfalse p0\.b -+** ret -+*/ -+TEST_P (pfalse_b, -+ p0 = svpfalse_b (), -+ p0 = svpfalse ()); -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pfirst_b.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pfirst_b.c -new file mode 100644 -index 000000000..a32099656 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pfirst_b.c -@@ -0,0 +1,22 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** pfirst_b_tied1: -+** pfirst p0\.b, p3, p0\.b -+** ret -+*/ -+TEST_UNIFORM_P (pfirst_b_tied1, -+ p0 = svpfirst_b (p3, p0), -+ p0 = svpfirst (p3, p0)) -+ -+/* -+** pfirst_b_untied: -+** mov p0\.b, p1\.b -+** pfirst p0\.b, p3, p0\.b -+** ret -+*/ -+TEST_UNIFORM_P (pfirst_b_untied, -+ p0 = svpfirst_b (p3, p1), -+ p0 = svpfirst (p3, p1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b16.c -new file mode 100644 -index 000000000..ad0efe5e7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b16.c -@@ -0,0 +1,22 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** pnext_b16_tied1: -+** pnext p0\.h, p3, p0\.h -+** ret -+*/ -+TEST_UNIFORM_P (pnext_b16_tied1, -+ p0 = svpnext_b16 (p3, p0), -+ p0 = svpnext_b16 (p3, p0)) -+ -+/* -+** pnext_b16_untied: -+** mov p0\.b, p1\.b -+** pnext p0\.h, p3, p0\.h -+** ret -+*/ -+TEST_UNIFORM_P (pnext_b16_untied, -+ p0 = svpnext_b16 (p3, p1), -+ p0 = svpnext_b16 (p3, p1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b32.c -new file mode 100644 -index 000000000..a0030fae1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b32.c -@@ -0,0 +1,22 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** pnext_b32_tied1: -+** pnext p0\.s, p3, p0\.s -+** ret -+*/ -+TEST_UNIFORM_P (pnext_b32_tied1, -+ p0 = svpnext_b32 (p3, p0), -+ p0 = svpnext_b32 (p3, p0)) -+ -+/* -+** pnext_b32_untied: -+** mov p0\.b, p1\.b -+** pnext p0\.s, p3, p0\.s -+** ret -+*/ -+TEST_UNIFORM_P (pnext_b32_untied, -+ p0 = svpnext_b32 (p3, p1), -+ p0 = svpnext_b32 (p3, p1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b64.c -new file mode 100644 -index 000000000..59db2f04f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b64.c -@@ -0,0 +1,22 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** pnext_b64_tied1: -+** pnext p0\.d, p3, p0\.d -+** ret -+*/ -+TEST_UNIFORM_P (pnext_b64_tied1, -+ p0 = svpnext_b64 (p3, p0), -+ p0 = svpnext_b64 (p3, p0)) -+ -+/* -+** pnext_b64_untied: -+** mov p0\.b, p1\.b -+** pnext p0\.d, p3, p0\.d -+** ret -+*/ -+TEST_UNIFORM_P (pnext_b64_untied, -+ p0 = svpnext_b64 (p3, p1), -+ p0 = svpnext_b64 (p3, p1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b8.c -new file mode 100644 -index 000000000..cfc2e907c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/pnext_b8.c -@@ -0,0 +1,22 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** pnext_b8_tied1: -+** pnext p0\.b, p3, p0\.b -+** ret -+*/ -+TEST_UNIFORM_P (pnext_b8_tied1, -+ p0 = svpnext_b8 (p3, p0), -+ p0 = svpnext_b8 (p3, p0)) -+ -+/* -+** pnext_b8_untied: -+** mov p0\.b, p1\.b -+** pnext p0\.b, p3, p0\.b -+** ret -+*/ -+TEST_UNIFORM_P (pnext_b8_untied, -+ p0 = svpnext_b8 (p3, p1), -+ p0 = svpnext_b8 (p3, p1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfb.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfb.c -new file mode 100644 -index 000000000..d2b2777e6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfb.c -@@ -0,0 +1,245 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** prfb_base: -+** prfb pldl1keep, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfb_base, uint8_t, -+ svprfb (p0, x0, SV_PLDL1KEEP), -+ svprfb (p0, x0, SV_PLDL1KEEP)) -+ -+/* -+** prfb_u8_index: -+** prfb pldl1keep, p0, \[x0, x1\] -+** ret -+*/ -+TEST_PREFETCH (prfb_u8_index, uint8_t, -+ svprfb (p0, x0 + x1, SV_PLDL1KEEP), -+ svprfb (p0, x0 + x1, SV_PLDL1KEEP)) -+ -+/* -+** prfb_u8_1: -+** add (x[0-9+]), x0, #?1 -+** prfb pldl1keep, p0, \[\1\] -+** ret -+*/ -+TEST_PREFETCH (prfb_u8_1, uint8_t, -+ svprfb (p0, x0 + 1, SV_PLDL1KEEP), -+ svprfb (p0, x0 + 1, SV_PLDL1KEEP)) -+ -+/* -+** prfb_u16_index: -+** add (x[0-9+]), x0, x1, lsl #?1 -+** prfb pldl1keep, p0, \[\1\] -+** ret -+*/ -+TEST_PREFETCH (prfb_u16_index, uint16_t, -+ svprfb (p0, x0 + x1, SV_PLDL1KEEP), -+ svprfb (p0, x0 + x1, SV_PLDL1KEEP)) -+ -+/* -+** prfb_u16_1: -+** add (x[0-9+]), x0, #?2 -+** prfb pldl1keep, p0, \[\1\] -+** ret -+*/ -+TEST_PREFETCH (prfb_u16_1, uint16_t, -+ svprfb (p0, x0 + 1, SV_PLDL1KEEP), -+ svprfb (p0, x0 + 1, SV_PLDL1KEEP)) -+ -+/* -+** prfb_u32_index: -+** add (x[0-9+]), x0, x1, lsl #?2 -+** prfb pldl1keep, p0, \[\1\] -+** ret -+*/ -+TEST_PREFETCH (prfb_u32_index, uint32_t, -+ svprfb (p0, x0 + x1, SV_PLDL1KEEP), -+ svprfb (p0, x0 + x1, SV_PLDL1KEEP)) -+ -+/* -+** prfb_u32_1: -+** add (x[0-9+]), x0, #?4 -+** prfb pldl1keep, p0, \[\1\] -+** ret -+*/ -+TEST_PREFETCH (prfb_u32_1, uint32_t, -+ svprfb (p0, x0 + 1, SV_PLDL1KEEP), -+ svprfb (p0, x0 + 1, SV_PLDL1KEEP)) -+ -+/* -+** prfb_u64_index: -+** add (x[0-9+]), x0, x1, lsl #?3 -+** prfb pldl1keep, p0, \[\1\] -+** ret -+*/ -+TEST_PREFETCH (prfb_u64_index, uint64_t, -+ svprfb (p0, x0 + x1, SV_PLDL1KEEP), -+ svprfb (p0, x0 + x1, SV_PLDL1KEEP)) -+ -+/* -+** prfb_u64_1: -+** add (x[0-9+]), x0, #?8 -+** prfb pldl1keep, p0, \[\1\] -+** ret -+*/ -+TEST_PREFETCH (prfb_u64_1, uint64_t, -+ svprfb (p0, x0 + 1, SV_PLDL1KEEP), -+ svprfb (p0, x0 + 1, SV_PLDL1KEEP)) -+ -+/* -+** prfb_pldl1strm: -+** prfb pldl1strm, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfb_pldl1strm, uint8_t, -+ svprfb (p0, x0, SV_PLDL1STRM), -+ svprfb (p0, x0, SV_PLDL1STRM)) -+ -+/* -+** prfb_pldl2keep: -+** prfb pldl2keep, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfb_pldl2keep, uint8_t, -+ svprfb (p0, x0, SV_PLDL2KEEP), -+ svprfb (p0, x0, SV_PLDL2KEEP)) -+ -+/* -+** prfb_pldl2strm: -+** prfb pldl2strm, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfb_pldl2strm, uint8_t, -+ svprfb (p0, x0, SV_PLDL2STRM), -+ svprfb (p0, x0, SV_PLDL2STRM)) -+ -+/* -+** prfb_pldl3keep: -+** prfb pldl3keep, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfb_pldl3keep, uint8_t, -+ svprfb (p0, x0, SV_PLDL3KEEP), -+ svprfb (p0, x0, SV_PLDL3KEEP)) -+ -+/* -+** prfb_pldl3strm: -+** prfb pldl3strm, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfb_pldl3strm, uint8_t, -+ svprfb (p0, x0, SV_PLDL3STRM), -+ svprfb (p0, x0, SV_PLDL3STRM)) -+ -+/* -+** prfb_pstl1keep: -+** prfb pstl1keep, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfb_pstl1keep, uint8_t, -+ svprfb (p0, x0, SV_PSTL1KEEP), -+ svprfb (p0, x0, SV_PSTL1KEEP)) -+ -+/* -+** prfb_pstl1strm: -+** prfb pstl1strm, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfb_pstl1strm, uint8_t, -+ svprfb (p0, x0, SV_PSTL1STRM), -+ svprfb (p0, x0, SV_PSTL1STRM)) -+ -+/* -+** prfb_pstl2keep: -+** prfb pstl2keep, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfb_pstl2keep, uint8_t, -+ svprfb (p0, x0, SV_PSTL2KEEP), -+ svprfb (p0, x0, SV_PSTL2KEEP)) -+ -+/* -+** prfb_pstl2strm: -+** prfb pstl2strm, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfb_pstl2strm, uint8_t, -+ svprfb (p0, x0, SV_PSTL2STRM), -+ svprfb (p0, x0, SV_PSTL2STRM)) -+ -+/* -+** prfb_pstl3keep: -+** prfb pstl3keep, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfb_pstl3keep, uint8_t, -+ svprfb (p0, x0, SV_PSTL3KEEP), -+ svprfb (p0, x0, SV_PSTL3KEEP)) -+ -+/* -+** prfb_pstl3strm: -+** prfb pstl3strm, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfb_pstl3strm, uint8_t, -+ svprfb (p0, x0, SV_PSTL3STRM), -+ svprfb (p0, x0, SV_PSTL3STRM)) -+ -+/* -+** prfb_vnum_0: -+** prfb pldl1keep, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfb_vnum_0, uint8_t, -+ svprfb_vnum (p0, x0, 0, SV_PLDL1KEEP), -+ svprfb_vnum (p0, x0, 0, SV_PLDL1KEEP)) -+ -+/* -+** prfb_vnum_1: -+** incb x0 -+** prfb pldl1keep, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfb_vnum_1, uint16_t, -+ svprfb_vnum (p0, x0, 1, SV_PLDL1KEEP), -+ svprfb_vnum (p0, x0, 1, SV_PLDL1KEEP)) -+ -+/* -+** prfb_vnum_2: -+** incb x0, all, mul #2 -+** prfb pldl1keep, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfb_vnum_2, uint32_t, -+ svprfb_vnum (p0, x0, 2, SV_PLDL1KEEP), -+ svprfb_vnum (p0, x0, 2, SV_PLDL1KEEP)) -+ -+/* -+** prfb_vnum_3: -+** incb x0, all, mul #3 -+** prfb pldl1keep, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfb_vnum_3, uint64_t, -+ svprfb_vnum (p0, x0, 3, SV_PLDL1KEEP), -+ svprfb_vnum (p0, x0, 3, SV_PLDL1KEEP)) -+ -+/* -+** prfb_vnum_x1: -+** cntb (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** prfb pldl1keep, p0, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** prfb zldl1keep, p0, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_PREFETCH (prfb_vnum_x1, uint64_t, -+ svprfb_vnum (p0, x0, x1, SV_PLDL1KEEP), -+ svprfb_vnum (p0, x0, x1, SV_PLDL1KEEP)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfb_gather.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfb_gather.c -new file mode 100644 -index 000000000..c4bfbbbf7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfb_gather.c -@@ -0,0 +1,223 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** prfb_gather_u32base: -+** prfb pldl1keep, p0, \[z0\.s\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfb_gather_u32base, svuint32_t, -+ svprfb_gather_u32base (p0, z0, SV_PLDL1KEEP), -+ svprfb_gather (p0, z0, SV_PLDL1KEEP)) -+ -+/* -+** prfb_gather_u64base: -+** prfb pldl1strm, p0, \[z0\.d\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfb_gather_u64base, svuint64_t, -+ svprfb_gather_u64base (p0, z0, SV_PLDL1STRM), -+ svprfb_gather (p0, z0, SV_PLDL1STRM)) -+ -+/* -+** prfb_gather_x0_u32base_offset: -+** prfb pldl2keep, p0, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfb_gather_x0_u32base_offset, svuint32_t, -+ svprfb_gather_u32base_offset (p0, z0, x0, SV_PLDL2KEEP), -+ svprfb_gather_offset (p0, z0, x0, SV_PLDL2KEEP)) -+ -+/* -+** prfb_gather_m1_u32base_offset: -+** mov (x[0-9]+), #?-1 -+** prfb pldl2strm, p0, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfb_gather_m1_u32base_offset, svuint32_t, -+ svprfb_gather_u32base_offset (p0, z0, -1, SV_PLDL2STRM), -+ svprfb_gather_offset (p0, z0, -1, SV_PLDL2STRM)) -+ -+/* -+** prfb_gather_0_u32base_offset: -+** prfb pldl3keep, p0, \[z0\.s\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfb_gather_0_u32base_offset, svuint32_t, -+ svprfb_gather_u32base_offset (p0, z0, 0, SV_PLDL3KEEP), -+ svprfb_gather_offset (p0, z0, 0, SV_PLDL3KEEP)) -+ -+/* -+** prfb_gather_5_u32base_offset: -+** prfb pldl3strm, p0, \[z0\.s, #5\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfb_gather_5_u32base_offset, svuint32_t, -+ svprfb_gather_u32base_offset (p0, z0, 5, SV_PLDL3STRM), -+ svprfb_gather_offset (p0, z0, 5, SV_PLDL3STRM)) -+ -+/* -+** prfb_gather_31_u32base_offset: -+** prfb pstl1keep, p0, \[z0\.s, #31\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfb_gather_31_u32base_offset, svuint32_t, -+ svprfb_gather_u32base_offset (p0, z0, 31, SV_PSTL1KEEP), -+ svprfb_gather_offset (p0, z0, 31, SV_PSTL1KEEP)) -+ -+/* -+** prfb_gather_32_u32base_offset: -+** mov (x[0-9]+), #?32 -+** prfb pstl1strm, p0, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfb_gather_32_u32base_offset, svuint32_t, -+ svprfb_gather_u32base_offset (p0, z0, 32, SV_PSTL1STRM), -+ svprfb_gather_offset (p0, z0, 32, SV_PSTL1STRM)) -+ -+/* -+** prfb_gather_x0_u64base_offset: -+** prfb pstl2keep, p0, \[x0, z0\.d\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfb_gather_x0_u64base_offset, svuint64_t, -+ svprfb_gather_u64base_offset (p0, z0, x0, SV_PSTL2KEEP), -+ svprfb_gather_offset (p0, z0, x0, SV_PSTL2KEEP)) -+ -+/* -+** prfb_gather_m1_u64base_offset: -+** mov (x[0-9]+), #?-1 -+** prfb pstl2strm, p0, \[\1, z0\.d\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfb_gather_m1_u64base_offset, svuint64_t, -+ svprfb_gather_u64base_offset (p0, z0, -1, SV_PSTL2STRM), -+ svprfb_gather_offset (p0, z0, -1, SV_PSTL2STRM)) -+ -+/* -+** prfb_gather_0_u64base_offset: -+** prfb pstl3keep, p0, \[z0\.d\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfb_gather_0_u64base_offset, svuint64_t, -+ svprfb_gather_u64base_offset (p0, z0, 0, SV_PSTL3KEEP), -+ svprfb_gather_offset (p0, z0, 0, SV_PSTL3KEEP)) -+ -+/* -+** prfb_gather_5_u64base_offset: -+** prfb pstl3strm, p0, \[z0\.d, #5\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfb_gather_5_u64base_offset, svuint64_t, -+ svprfb_gather_u64base_offset (p0, z0, 5, SV_PSTL3STRM), -+ svprfb_gather_offset (p0, z0, 5, SV_PSTL3STRM)) -+ -+/* -+** prfb_gather_31_u64base_offset: -+** prfb pldl1keep, p0, \[z0\.d, #31\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfb_gather_31_u64base_offset, svuint64_t, -+ svprfb_gather_u64base_offset (p0, z0, 31, SV_PLDL1KEEP), -+ svprfb_gather_offset (p0, z0, 31, SV_PLDL1KEEP)) -+ -+/* -+** prfb_gather_32_u64base_offset: -+** mov (x[0-9]+), #?32 -+** prfb pldl1strm, p0, \[\1, z0\.d\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfb_gather_32_u64base_offset, svuint64_t, -+ svprfb_gather_u64base_offset (p0, z0, 32, SV_PLDL1STRM), -+ svprfb_gather_offset (p0, z0, 32, SV_PLDL1STRM)) -+ -+/* -+** prfb_gather_x0_s32offset: -+** prfb pldl2keep, p0, \[x0, z0\.s, sxtw\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_SZ (prfb_gather_x0_s32offset, svint32_t, -+ svprfb_gather_s32offset (p0, x0, z0, SV_PLDL2KEEP), -+ svprfb_gather_offset (p0, x0, z0, SV_PLDL2KEEP)) -+ -+/* -+** prfb_gather_s32offset: -+** prfb pldl2strm, p0, \[x0, z1\.s, sxtw\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_SZ (prfb_gather_s32offset, svint32_t, -+ svprfb_gather_s32offset (p0, x0, z1, SV_PLDL2STRM), -+ svprfb_gather_offset (p0, x0, z1, SV_PLDL2STRM)) -+ -+/* -+** prfb_gather_x0_u32offset: -+** prfb pldl3keep, p0, \[x0, z0\.s, uxtw\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_SZ (prfb_gather_x0_u32offset, svuint32_t, -+ svprfb_gather_u32offset (p0, x0, z0, SV_PLDL3KEEP), -+ svprfb_gather_offset (p0, x0, z0, SV_PLDL3KEEP)) -+ -+/* -+** prfb_gather_u32offset: -+** prfb pldl3strm, p0, \[x0, z1\.s, uxtw\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_SZ (prfb_gather_u32offset, svuint32_t, -+ svprfb_gather_u32offset (p0, x0, z1, SV_PLDL3STRM), -+ svprfb_gather_offset (p0, x0, z1, SV_PLDL3STRM)) -+ -+/* -+** prfb_gather_x0_s64offset: -+** prfb pstl1keep, p0, \[x0, z0\.d\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_SZ (prfb_gather_x0_s64offset, svint64_t, -+ svprfb_gather_s64offset (p0, x0, z0, SV_PSTL1KEEP), -+ svprfb_gather_offset (p0, x0, z0, SV_PSTL1KEEP)) -+ -+/* -+** prfb_gather_s64offset: -+** prfb pstl1strm, p0, \[x0, z1\.d\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_SZ (prfb_gather_s64offset, svint64_t, -+ svprfb_gather_s64offset (p0, x0, z1, SV_PSTL1STRM), -+ svprfb_gather_offset (p0, x0, z1, SV_PSTL1STRM)) -+ -+/* -+** prfb_gather_ext_s64offset: -+** prfb pstl1strm, p0, \[x0, z1\.d, sxtw\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_SZ (prfb_gather_ext_s64offset, svint64_t, -+ svprfb_gather_s64offset (p0, x0, svextw_s64_x (p0, z1), SV_PSTL1STRM), -+ svprfb_gather_offset (p0, x0, svextw_x (p0, z1), SV_PSTL1STRM)) -+ -+/* -+** prfb_gather_x0_u64offset: -+** prfb pstl2keep, p0, \[x0, z0\.d\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_SZ (prfb_gather_x0_u64offset, svuint64_t, -+ svprfb_gather_u64offset (p0, x0, z0, SV_PSTL2KEEP), -+ svprfb_gather_offset (p0, x0, z0, SV_PSTL2KEEP)) -+ -+/* -+** prfb_gather_u64offset: -+** prfb pstl2strm, p0, \[x0, z1\.d\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_SZ (prfb_gather_u64offset, svuint64_t, -+ svprfb_gather_u64offset (p0, x0, z1, SV_PSTL2STRM), -+ svprfb_gather_offset (p0, x0, z1, SV_PSTL2STRM)) -+ -+/* -+** prfb_gather_ext_u64offset: -+** prfb pstl2strm, p0, \[x0, z1\.d, uxtw\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_SZ (prfb_gather_ext_u64offset, svuint64_t, -+ svprfb_gather_u64offset (p0, x0, svextw_u64_x (p0, z1), SV_PSTL2STRM), -+ svprfb_gather_offset (p0, x0, svextw_x (p0, z1), SV_PSTL2STRM)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfd.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfd.c -new file mode 100644 -index 000000000..72b2e6415 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfd.c -@@ -0,0 +1,245 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** prfd_base: -+** prfd pldl1keep, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfd_base, uint8_t, -+ svprfd (p0, x0, SV_PLDL1KEEP), -+ svprfd (p0, x0, SV_PLDL1KEEP)) -+ -+/* -+** prfd_u8_index: -+** add (x[0-9+]), (x0, x1|x1, x0) -+** prfd pldl1keep, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfd_u8_index, uint8_t, -+ svprfd (p0, x0 + x1, SV_PLDL1KEEP), -+ svprfd (p0, x0 + x1, SV_PLDL1KEEP)) -+ -+/* -+** prfd_u8_1: -+** add (x[0-9+]), x0, #?1 -+** prfd pldl1keep, p0, \[\1\] -+** ret -+*/ -+TEST_PREFETCH (prfd_u8_1, uint8_t, -+ svprfd (p0, x0 + 1, SV_PLDL1KEEP), -+ svprfd (p0, x0 + 1, SV_PLDL1KEEP)) -+ -+/* -+** prfd_u16_index: -+** add (x[0-9+]), x0, x1, lsl #?1 -+** prfd pldl1keep, p0, \[\1\] -+** ret -+*/ -+TEST_PREFETCH (prfd_u16_index, uint16_t, -+ svprfd (p0, x0 + x1, SV_PLDL1KEEP), -+ svprfd (p0, x0 + x1, SV_PLDL1KEEP)) -+ -+/* -+** prfd_u16_1: -+** add (x[0-9+]), x0, #?2 -+** prfd pldl1keep, p0, \[\1\] -+** ret -+*/ -+TEST_PREFETCH (prfd_u16_1, uint16_t, -+ svprfd (p0, x0 + 1, SV_PLDL1KEEP), -+ svprfd (p0, x0 + 1, SV_PLDL1KEEP)) -+ -+/* -+** prfd_u32_index: -+** add (x[0-9+]), x0, x1, lsl #?2 -+** prfd pldl1keep, p0, \[\1\] -+** ret -+*/ -+TEST_PREFETCH (prfd_u32_index, uint32_t, -+ svprfd (p0, x0 + x1, SV_PLDL1KEEP), -+ svprfd (p0, x0 + x1, SV_PLDL1KEEP)) -+ -+/* -+** prfd_u32_1: -+** add (x[0-9+]), x0, #?4 -+** prfd pldl1keep, p0, \[\1\] -+** ret -+*/ -+TEST_PREFETCH (prfd_u32_1, uint32_t, -+ svprfd (p0, x0 + 1, SV_PLDL1KEEP), -+ svprfd (p0, x0 + 1, SV_PLDL1KEEP)) -+ -+/* -+** prfd_u64_index: -+** prfd pldl1keep, p0, \[x0, x1, lsl #?3\] -+** ret -+*/ -+TEST_PREFETCH (prfd_u64_index, uint64_t, -+ svprfd (p0, x0 + x1, SV_PLDL1KEEP), -+ svprfd (p0, x0 + x1, SV_PLDL1KEEP)) -+ -+/* -+** prfd_u64_1: -+** add (x[0-9+]), x0, #?8 -+** prfd pldl1keep, p0, \[\1\] -+** ret -+*/ -+TEST_PREFETCH (prfd_u64_1, uint64_t, -+ svprfd (p0, x0 + 1, SV_PLDL1KEEP), -+ svprfd (p0, x0 + 1, SV_PLDL1KEEP)) -+ -+/* -+** prfd_pldl1strm: -+** prfd pldl1strm, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfd_pldl1strm, uint8_t, -+ svprfd (p0, x0, SV_PLDL1STRM), -+ svprfd (p0, x0, SV_PLDL1STRM)) -+ -+/* -+** prfd_pldl2keep: -+** prfd pldl2keep, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfd_pldl2keep, uint8_t, -+ svprfd (p0, x0, SV_PLDL2KEEP), -+ svprfd (p0, x0, SV_PLDL2KEEP)) -+ -+/* -+** prfd_pldl2strm: -+** prfd pldl2strm, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfd_pldl2strm, uint8_t, -+ svprfd (p0, x0, SV_PLDL2STRM), -+ svprfd (p0, x0, SV_PLDL2STRM)) -+ -+/* -+** prfd_pldl3keep: -+** prfd pldl3keep, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfd_pldl3keep, uint8_t, -+ svprfd (p0, x0, SV_PLDL3KEEP), -+ svprfd (p0, x0, SV_PLDL3KEEP)) -+ -+/* -+** prfd_pldl3strm: -+** prfd pldl3strm, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfd_pldl3strm, uint8_t, -+ svprfd (p0, x0, SV_PLDL3STRM), -+ svprfd (p0, x0, SV_PLDL3STRM)) -+ -+/* -+** prfd_pstl1keep: -+** prfd pstl1keep, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfd_pstl1keep, uint8_t, -+ svprfd (p0, x0, SV_PSTL1KEEP), -+ svprfd (p0, x0, SV_PSTL1KEEP)) -+ -+/* -+** prfd_pstl1strm: -+** prfd pstl1strm, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfd_pstl1strm, uint8_t, -+ svprfd (p0, x0, SV_PSTL1STRM), -+ svprfd (p0, x0, SV_PSTL1STRM)) -+ -+/* -+** prfd_pstl2keep: -+** prfd pstl2keep, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfd_pstl2keep, uint8_t, -+ svprfd (p0, x0, SV_PSTL2KEEP), -+ svprfd (p0, x0, SV_PSTL2KEEP)) -+ -+/* -+** prfd_pstl2strm: -+** prfd pstl2strm, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfd_pstl2strm, uint8_t, -+ svprfd (p0, x0, SV_PSTL2STRM), -+ svprfd (p0, x0, SV_PSTL2STRM)) -+ -+/* -+** prfd_pstl3keep: -+** prfd pstl3keep, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfd_pstl3keep, uint8_t, -+ svprfd (p0, x0, SV_PSTL3KEEP), -+ svprfd (p0, x0, SV_PSTL3KEEP)) -+ -+/* -+** prfd_pstl3strm: -+** prfd pstl3strm, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfd_pstl3strm, uint8_t, -+ svprfd (p0, x0, SV_PSTL3STRM), -+ svprfd (p0, x0, SV_PSTL3STRM)) -+ -+/* -+** prfd_vnum_0: -+** prfd pldl1keep, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfd_vnum_0, uint8_t, -+ svprfd_vnum (p0, x0, 0, SV_PLDL1KEEP), -+ svprfd_vnum (p0, x0, 0, SV_PLDL1KEEP)) -+ -+/* -+** prfd_vnum_1: -+** incb x0 -+** prfd pldl1keep, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfd_vnum_1, uint16_t, -+ svprfd_vnum (p0, x0, 1, SV_PLDL1KEEP), -+ svprfd_vnum (p0, x0, 1, SV_PLDL1KEEP)) -+ -+/* -+** prfd_vnum_2: -+** incb x0, all, mul #2 -+** prfd pldl1keep, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfd_vnum_2, uint32_t, -+ svprfd_vnum (p0, x0, 2, SV_PLDL1KEEP), -+ svprfd_vnum (p0, x0, 2, SV_PLDL1KEEP)) -+ -+/* -+** prfd_vnum_3: -+** incb x0, all, mul #3 -+** prfd pldl1keep, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfd_vnum_3, uint64_t, -+ svprfd_vnum (p0, x0, 3, SV_PLDL1KEEP), -+ svprfd_vnum (p0, x0, 3, SV_PLDL1KEEP)) -+ -+/* -+** prfd_vnum_x1: -+** cntb (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** prfd pldl1keep, p0, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** prfd zldl1keep, p0, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_PREFETCH (prfd_vnum_x1, uint64_t, -+ svprfd_vnum (p0, x0, x1, SV_PLDL1KEEP), -+ svprfd_vnum (p0, x0, x1, SV_PLDL1KEEP)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfd_gather.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfd_gather.c -new file mode 100644 -index 000000000..a84acb1a1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfd_gather.c -@@ -0,0 +1,225 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** prfd_gather_u32base: -+** prfd pldl1keep, p0, \[z0\.s\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfd_gather_u32base, svuint32_t, -+ svprfd_gather_u32base (p0, z0, SV_PLDL1KEEP), -+ svprfd_gather (p0, z0, SV_PLDL1KEEP)) -+ -+/* -+** prfd_gather_u64base: -+** prfd pldl1strm, p0, \[z0\.d\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfd_gather_u64base, svuint64_t, -+ svprfd_gather_u64base (p0, z0, SV_PLDL1STRM), -+ svprfd_gather (p0, z0, SV_PLDL1STRM)) -+ -+/* -+** prfd_gather_x0_u32base_index: -+** lsl (x[0-9]+), x0, #?3 -+** prfb pldl2keep, p0, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfd_gather_x0_u32base_index, svuint32_t, -+ svprfd_gather_u32base_index (p0, z0, x0, SV_PLDL2KEEP), -+ svprfd_gather_index (p0, z0, x0, SV_PLDL2KEEP)) -+ -+/* -+** prfd_gather_m1_u32base_index: -+** mov (x[0-9]+), #?-8 -+** prfb pldl2strm, p0, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfd_gather_m1_u32base_index, svuint32_t, -+ svprfd_gather_u32base_index (p0, z0, -1, SV_PLDL2STRM), -+ svprfd_gather_index (p0, z0, -1, SV_PLDL2STRM)) -+ -+/* -+** prfd_gather_0_u32base_index: -+** prfd pldl3keep, p0, \[z0\.s\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfd_gather_0_u32base_index, svuint32_t, -+ svprfd_gather_u32base_index (p0, z0, 0, SV_PLDL3KEEP), -+ svprfd_gather_index (p0, z0, 0, SV_PLDL3KEEP)) -+ -+/* -+** prfd_gather_5_u32base_index: -+** prfd pldl3strm, p0, \[z0\.s, #40\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfd_gather_5_u32base_index, svuint32_t, -+ svprfd_gather_u32base_index (p0, z0, 5, SV_PLDL3STRM), -+ svprfd_gather_index (p0, z0, 5, SV_PLDL3STRM)) -+ -+/* -+** prfd_gather_31_u32base_index: -+** prfd pstl1keep, p0, \[z0\.s, #248\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfd_gather_31_u32base_index, svuint32_t, -+ svprfd_gather_u32base_index (p0, z0, 31, SV_PSTL1KEEP), -+ svprfd_gather_index (p0, z0, 31, SV_PSTL1KEEP)) -+ -+/* -+** prfd_gather_32_u32base_index: -+** mov (x[0-9]+), #?256 -+** prfb pstl1strm, p0, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfd_gather_32_u32base_index, svuint32_t, -+ svprfd_gather_u32base_index (p0, z0, 32, SV_PSTL1STRM), -+ svprfd_gather_index (p0, z0, 32, SV_PSTL1STRM)) -+ -+/* -+** prfd_gather_x0_u64base_index: -+** lsl (x[0-9]+), x0, #?3 -+** prfb pstl2keep, p0, \[\1, z0\.d\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfd_gather_x0_u64base_index, svuint64_t, -+ svprfd_gather_u64base_index (p0, z0, x0, SV_PSTL2KEEP), -+ svprfd_gather_index (p0, z0, x0, SV_PSTL2KEEP)) -+ -+/* -+** prfd_gather_m1_u64base_index: -+** mov (x[0-9]+), #?-8 -+** prfb pstl2strm, p0, \[\1, z0\.d\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfd_gather_m1_u64base_index, svuint64_t, -+ svprfd_gather_u64base_index (p0, z0, -1, SV_PSTL2STRM), -+ svprfd_gather_index (p0, z0, -1, SV_PSTL2STRM)) -+ -+/* -+** prfd_gather_0_u64base_index: -+** prfd pstl3keep, p0, \[z0\.d\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfd_gather_0_u64base_index, svuint64_t, -+ svprfd_gather_u64base_index (p0, z0, 0, SV_PSTL3KEEP), -+ svprfd_gather_index (p0, z0, 0, SV_PSTL3KEEP)) -+ -+/* -+** prfd_gather_5_u64base_index: -+** prfd pstl3strm, p0, \[z0\.d, #40\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfd_gather_5_u64base_index, svuint64_t, -+ svprfd_gather_u64base_index (p0, z0, 5, SV_PSTL3STRM), -+ svprfd_gather_index (p0, z0, 5, SV_PSTL3STRM)) -+ -+/* -+** prfd_gather_31_u64base_index: -+** prfd pldl1keep, p0, \[z0\.d, #248\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfd_gather_31_u64base_index, svuint64_t, -+ svprfd_gather_u64base_index (p0, z0, 31, SV_PLDL1KEEP), -+ svprfd_gather_index (p0, z0, 31, SV_PLDL1KEEP)) -+ -+/* -+** prfd_gather_32_u64base_index: -+** mov (x[0-9]+), #?256 -+** prfb pldl1strm, p0, \[\1, z0\.d\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfd_gather_32_u64base_index, svuint64_t, -+ svprfd_gather_u64base_index (p0, z0, 32, SV_PLDL1STRM), -+ svprfd_gather_index (p0, z0, 32, SV_PLDL1STRM)) -+ -+/* -+** prfd_gather_x0_s32index: -+** prfd pldl2keep, p0, \[x0, z0\.s, sxtw 3\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_SZ (prfd_gather_x0_s32index, svint32_t, -+ svprfd_gather_s32index (p0, x0, z0, SV_PLDL2KEEP), -+ svprfd_gather_index (p0, x0, z0, SV_PLDL2KEEP)) -+ -+/* -+** prfd_gather_s32index: -+** prfd pldl2strm, p0, \[x0, z1\.s, sxtw 3\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_SZ (prfd_gather_s32index, svint32_t, -+ svprfd_gather_s32index (p0, x0, z1, SV_PLDL2STRM), -+ svprfd_gather_index (p0, x0, z1, SV_PLDL2STRM)) -+ -+/* -+** prfd_gather_x0_u32index: -+** prfd pldl3keep, p0, \[x0, z0\.s, uxtw 3\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_SZ (prfd_gather_x0_u32index, svuint32_t, -+ svprfd_gather_u32index (p0, x0, z0, SV_PLDL3KEEP), -+ svprfd_gather_index (p0, x0, z0, SV_PLDL3KEEP)) -+ -+/* -+** prfd_gather_u32index: -+** prfd pldl3strm, p0, \[x0, z1\.s, uxtw 3\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_SZ (prfd_gather_u32index, svuint32_t, -+ svprfd_gather_u32index (p0, x0, z1, SV_PLDL3STRM), -+ svprfd_gather_index (p0, x0, z1, SV_PLDL3STRM)) -+ -+/* -+** prfd_gather_x0_s64index: -+** prfd pstl1keep, p0, \[x0, z0\.d, lsl 3\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_SZ (prfd_gather_x0_s64index, svint64_t, -+ svprfd_gather_s64index (p0, x0, z0, SV_PSTL1KEEP), -+ svprfd_gather_index (p0, x0, z0, SV_PSTL1KEEP)) -+ -+/* -+** prfd_gather_s64index: -+** prfd pstl1strm, p0, \[x0, z1\.d, lsl 3\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_SZ (prfd_gather_s64index, svint64_t, -+ svprfd_gather_s64index (p0, x0, z1, SV_PSTL1STRM), -+ svprfd_gather_index (p0, x0, z1, SV_PSTL1STRM)) -+ -+/* -+** prfd_gather_ext_s64index: -+** prfd pstl1strm, p0, \[x0, z1\.d, sxtw 3\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_SZ (prfd_gather_ext_s64index, svint64_t, -+ svprfd_gather_s64index (p0, x0, svextw_s64_x (p0, z1), SV_PSTL1STRM), -+ svprfd_gather_index (p0, x0, svextw_x (p0, z1), SV_PSTL1STRM)) -+ -+/* -+** prfd_gather_x0_u64index: -+** prfd pstl2keep, p0, \[x0, z0\.d, lsl 3\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_SZ (prfd_gather_x0_u64index, svuint64_t, -+ svprfd_gather_u64index (p0, x0, z0, SV_PSTL2KEEP), -+ svprfd_gather_index (p0, x0, z0, SV_PSTL2KEEP)) -+ -+/* -+** prfd_gather_u64index: -+** prfd pstl2strm, p0, \[x0, z1\.d, lsl 3\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_SZ (prfd_gather_u64index, svuint64_t, -+ svprfd_gather_u64index (p0, x0, z1, SV_PSTL2STRM), -+ svprfd_gather_index (p0, x0, z1, SV_PSTL2STRM)) -+ -+/* -+** prfd_gather_ext_u64index: -+** prfd pstl2strm, p0, \[x0, z1\.d, uxtw 3\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_SZ (prfd_gather_ext_u64index, svuint64_t, -+ svprfd_gather_u64index (p0, x0, svextw_u64_x (p0, z1), SV_PSTL2STRM), -+ svprfd_gather_index (p0, x0, svextw_x (p0, z1), SV_PSTL2STRM)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfh.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfh.c -new file mode 100644 -index 000000000..89069f9b7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfh.c -@@ -0,0 +1,245 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** prfh_base: -+** prfh pldl1keep, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfh_base, uint8_t, -+ svprfh (p0, x0, SV_PLDL1KEEP), -+ svprfh (p0, x0, SV_PLDL1KEEP)) -+ -+/* -+** prfh_u8_index: -+** add (x[0-9+]), (x0, x1|x1, x0) -+** prfh pldl1keep, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfh_u8_index, uint8_t, -+ svprfh (p0, x0 + x1, SV_PLDL1KEEP), -+ svprfh (p0, x0 + x1, SV_PLDL1KEEP)) -+ -+/* -+** prfh_u8_1: -+** add (x[0-9+]), x0, #?1 -+** prfh pldl1keep, p0, \[\1\] -+** ret -+*/ -+TEST_PREFETCH (prfh_u8_1, uint8_t, -+ svprfh (p0, x0 + 1, SV_PLDL1KEEP), -+ svprfh (p0, x0 + 1, SV_PLDL1KEEP)) -+ -+/* -+** prfh_u16_index: -+** prfh pldl1keep, p0, \[x0, x1, lsl #?1\] -+** ret -+*/ -+TEST_PREFETCH (prfh_u16_index, uint16_t, -+ svprfh (p0, x0 + x1, SV_PLDL1KEEP), -+ svprfh (p0, x0 + x1, SV_PLDL1KEEP)) -+ -+/* -+** prfh_u16_1: -+** add (x[0-9+]), x0, #?2 -+** prfh pldl1keep, p0, \[\1\] -+** ret -+*/ -+TEST_PREFETCH (prfh_u16_1, uint16_t, -+ svprfh (p0, x0 + 1, SV_PLDL1KEEP), -+ svprfh (p0, x0 + 1, SV_PLDL1KEEP)) -+ -+/* -+** prfh_u32_index: -+** add (x[0-9+]), x0, x1, lsl #?2 -+** prfh pldl1keep, p0, \[\1\] -+** ret -+*/ -+TEST_PREFETCH (prfh_u32_index, uint32_t, -+ svprfh (p0, x0 + x1, SV_PLDL1KEEP), -+ svprfh (p0, x0 + x1, SV_PLDL1KEEP)) -+ -+/* -+** prfh_u32_1: -+** add (x[0-9+]), x0, #?4 -+** prfh pldl1keep, p0, \[\1\] -+** ret -+*/ -+TEST_PREFETCH (prfh_u32_1, uint32_t, -+ svprfh (p0, x0 + 1, SV_PLDL1KEEP), -+ svprfh (p0, x0 + 1, SV_PLDL1KEEP)) -+ -+/* -+** prfh_u64_index: -+** add (x[0-9+]), x0, x1, lsl #?3 -+** prfh pldl1keep, p0, \[\1\] -+** ret -+*/ -+TEST_PREFETCH (prfh_u64_index, uint64_t, -+ svprfh (p0, x0 + x1, SV_PLDL1KEEP), -+ svprfh (p0, x0 + x1, SV_PLDL1KEEP)) -+ -+/* -+** prfh_u64_1: -+** add (x[0-9+]), x0, #?8 -+** prfh pldl1keep, p0, \[\1\] -+** ret -+*/ -+TEST_PREFETCH (prfh_u64_1, uint64_t, -+ svprfh (p0, x0 + 1, SV_PLDL1KEEP), -+ svprfh (p0, x0 + 1, SV_PLDL1KEEP)) -+ -+/* -+** prfh_pldl1strm: -+** prfh pldl1strm, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfh_pldl1strm, uint8_t, -+ svprfh (p0, x0, SV_PLDL1STRM), -+ svprfh (p0, x0, SV_PLDL1STRM)) -+ -+/* -+** prfh_pldl2keep: -+** prfh pldl2keep, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfh_pldl2keep, uint8_t, -+ svprfh (p0, x0, SV_PLDL2KEEP), -+ svprfh (p0, x0, SV_PLDL2KEEP)) -+ -+/* -+** prfh_pldl2strm: -+** prfh pldl2strm, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfh_pldl2strm, uint8_t, -+ svprfh (p0, x0, SV_PLDL2STRM), -+ svprfh (p0, x0, SV_PLDL2STRM)) -+ -+/* -+** prfh_pldl3keep: -+** prfh pldl3keep, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfh_pldl3keep, uint8_t, -+ svprfh (p0, x0, SV_PLDL3KEEP), -+ svprfh (p0, x0, SV_PLDL3KEEP)) -+ -+/* -+** prfh_pldl3strm: -+** prfh pldl3strm, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfh_pldl3strm, uint8_t, -+ svprfh (p0, x0, SV_PLDL3STRM), -+ svprfh (p0, x0, SV_PLDL3STRM)) -+ -+/* -+** prfh_pstl1keep: -+** prfh pstl1keep, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfh_pstl1keep, uint8_t, -+ svprfh (p0, x0, SV_PSTL1KEEP), -+ svprfh (p0, x0, SV_PSTL1KEEP)) -+ -+/* -+** prfh_pstl1strm: -+** prfh pstl1strm, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfh_pstl1strm, uint8_t, -+ svprfh (p0, x0, SV_PSTL1STRM), -+ svprfh (p0, x0, SV_PSTL1STRM)) -+ -+/* -+** prfh_pstl2keep: -+** prfh pstl2keep, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfh_pstl2keep, uint8_t, -+ svprfh (p0, x0, SV_PSTL2KEEP), -+ svprfh (p0, x0, SV_PSTL2KEEP)) -+ -+/* -+** prfh_pstl2strm: -+** prfh pstl2strm, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfh_pstl2strm, uint8_t, -+ svprfh (p0, x0, SV_PSTL2STRM), -+ svprfh (p0, x0, SV_PSTL2STRM)) -+ -+/* -+** prfh_pstl3keep: -+** prfh pstl3keep, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfh_pstl3keep, uint8_t, -+ svprfh (p0, x0, SV_PSTL3KEEP), -+ svprfh (p0, x0, SV_PSTL3KEEP)) -+ -+/* -+** prfh_pstl3strm: -+** prfh pstl3strm, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfh_pstl3strm, uint8_t, -+ svprfh (p0, x0, SV_PSTL3STRM), -+ svprfh (p0, x0, SV_PSTL3STRM)) -+ -+/* -+** prfh_vnum_0: -+** prfh pldl1keep, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfh_vnum_0, uint8_t, -+ svprfh_vnum (p0, x0, 0, SV_PLDL1KEEP), -+ svprfh_vnum (p0, x0, 0, SV_PLDL1KEEP)) -+ -+/* -+** prfh_vnum_1: -+** incb x0 -+** prfh pldl1keep, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfh_vnum_1, uint16_t, -+ svprfh_vnum (p0, x0, 1, SV_PLDL1KEEP), -+ svprfh_vnum (p0, x0, 1, SV_PLDL1KEEP)) -+ -+/* -+** prfh_vnum_2: -+** incb x0, all, mul #2 -+** prfh pldl1keep, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfh_vnum_2, uint32_t, -+ svprfh_vnum (p0, x0, 2, SV_PLDL1KEEP), -+ svprfh_vnum (p0, x0, 2, SV_PLDL1KEEP)) -+ -+/* -+** prfh_vnum_3: -+** incb x0, all, mul #3 -+** prfh pldl1keep, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfh_vnum_3, uint64_t, -+ svprfh_vnum (p0, x0, 3, SV_PLDL1KEEP), -+ svprfh_vnum (p0, x0, 3, SV_PLDL1KEEP)) -+ -+/* -+** prfh_vnum_x1: -+** cntb (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** prfh pldl1keep, p0, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** prfh zldl1keep, p0, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_PREFETCH (prfh_vnum_x1, uint64_t, -+ svprfh_vnum (p0, x0, x1, SV_PLDL1KEEP), -+ svprfh_vnum (p0, x0, x1, SV_PLDL1KEEP)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfh_gather.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfh_gather.c -new file mode 100644 -index 000000000..04b7a1575 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfh_gather.c -@@ -0,0 +1,225 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** prfh_gather_u32base: -+** prfh pldl1keep, p0, \[z0\.s\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfh_gather_u32base, svuint32_t, -+ svprfh_gather_u32base (p0, z0, SV_PLDL1KEEP), -+ svprfh_gather (p0, z0, SV_PLDL1KEEP)) -+ -+/* -+** prfh_gather_u64base: -+** prfh pldl1strm, p0, \[z0\.d\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfh_gather_u64base, svuint64_t, -+ svprfh_gather_u64base (p0, z0, SV_PLDL1STRM), -+ svprfh_gather (p0, z0, SV_PLDL1STRM)) -+ -+/* -+** prfh_gather_x0_u32base_index: -+** lsl (x[0-9]+), x0, #?1 -+** prfb pldl2keep, p0, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfh_gather_x0_u32base_index, svuint32_t, -+ svprfh_gather_u32base_index (p0, z0, x0, SV_PLDL2KEEP), -+ svprfh_gather_index (p0, z0, x0, SV_PLDL2KEEP)) -+ -+/* -+** prfh_gather_m1_u32base_index: -+** mov (x[0-9]+), #?-2 -+** prfb pldl2strm, p0, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfh_gather_m1_u32base_index, svuint32_t, -+ svprfh_gather_u32base_index (p0, z0, -1, SV_PLDL2STRM), -+ svprfh_gather_index (p0, z0, -1, SV_PLDL2STRM)) -+ -+/* -+** prfh_gather_0_u32base_index: -+** prfh pldl3keep, p0, \[z0\.s\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfh_gather_0_u32base_index, svuint32_t, -+ svprfh_gather_u32base_index (p0, z0, 0, SV_PLDL3KEEP), -+ svprfh_gather_index (p0, z0, 0, SV_PLDL3KEEP)) -+ -+/* -+** prfh_gather_5_u32base_index: -+** prfh pldl3strm, p0, \[z0\.s, #10\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfh_gather_5_u32base_index, svuint32_t, -+ svprfh_gather_u32base_index (p0, z0, 5, SV_PLDL3STRM), -+ svprfh_gather_index (p0, z0, 5, SV_PLDL3STRM)) -+ -+/* -+** prfh_gather_31_u32base_index: -+** prfh pstl1keep, p0, \[z0\.s, #62\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfh_gather_31_u32base_index, svuint32_t, -+ svprfh_gather_u32base_index (p0, z0, 31, SV_PSTL1KEEP), -+ svprfh_gather_index (p0, z0, 31, SV_PSTL1KEEP)) -+ -+/* -+** prfh_gather_32_u32base_index: -+** mov (x[0-9]+), #?64 -+** prfb pstl1strm, p0, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfh_gather_32_u32base_index, svuint32_t, -+ svprfh_gather_u32base_index (p0, z0, 32, SV_PSTL1STRM), -+ svprfh_gather_index (p0, z0, 32, SV_PSTL1STRM)) -+ -+/* -+** prfh_gather_x0_u64base_index: -+** lsl (x[0-9]+), x0, #?1 -+** prfb pstl2keep, p0, \[\1, z0\.d\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfh_gather_x0_u64base_index, svuint64_t, -+ svprfh_gather_u64base_index (p0, z0, x0, SV_PSTL2KEEP), -+ svprfh_gather_index (p0, z0, x0, SV_PSTL2KEEP)) -+ -+/* -+** prfh_gather_m1_u64base_index: -+** mov (x[0-9]+), #?-2 -+** prfb pstl2strm, p0, \[\1, z0\.d\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfh_gather_m1_u64base_index, svuint64_t, -+ svprfh_gather_u64base_index (p0, z0, -1, SV_PSTL2STRM), -+ svprfh_gather_index (p0, z0, -1, SV_PSTL2STRM)) -+ -+/* -+** prfh_gather_0_u64base_index: -+** prfh pstl3keep, p0, \[z0\.d\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfh_gather_0_u64base_index, svuint64_t, -+ svprfh_gather_u64base_index (p0, z0, 0, SV_PSTL3KEEP), -+ svprfh_gather_index (p0, z0, 0, SV_PSTL3KEEP)) -+ -+/* -+** prfh_gather_5_u64base_index: -+** prfh pstl3strm, p0, \[z0\.d, #10\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfh_gather_5_u64base_index, svuint64_t, -+ svprfh_gather_u64base_index (p0, z0, 5, SV_PSTL3STRM), -+ svprfh_gather_index (p0, z0, 5, SV_PSTL3STRM)) -+ -+/* -+** prfh_gather_31_u64base_index: -+** prfh pldl1keep, p0, \[z0\.d, #62\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfh_gather_31_u64base_index, svuint64_t, -+ svprfh_gather_u64base_index (p0, z0, 31, SV_PLDL1KEEP), -+ svprfh_gather_index (p0, z0, 31, SV_PLDL1KEEP)) -+ -+/* -+** prfh_gather_32_u64base_index: -+** mov (x[0-9]+), #?64 -+** prfb pldl1strm, p0, \[\1, z0\.d\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfh_gather_32_u64base_index, svuint64_t, -+ svprfh_gather_u64base_index (p0, z0, 32, SV_PLDL1STRM), -+ svprfh_gather_index (p0, z0, 32, SV_PLDL1STRM)) -+ -+/* -+** prfh_gather_x0_s32index: -+** prfh pldl2keep, p0, \[x0, z0\.s, sxtw 1\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_SZ (prfh_gather_x0_s32index, svint32_t, -+ svprfh_gather_s32index (p0, x0, z0, SV_PLDL2KEEP), -+ svprfh_gather_index (p0, x0, z0, SV_PLDL2KEEP)) -+ -+/* -+** prfh_gather_s32index: -+** prfh pldl2strm, p0, \[x0, z1\.s, sxtw 1\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_SZ (prfh_gather_s32index, svint32_t, -+ svprfh_gather_s32index (p0, x0, z1, SV_PLDL2STRM), -+ svprfh_gather_index (p0, x0, z1, SV_PLDL2STRM)) -+ -+/* -+** prfh_gather_x0_u32index: -+** prfh pldl3keep, p0, \[x0, z0\.s, uxtw 1\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_SZ (prfh_gather_x0_u32index, svuint32_t, -+ svprfh_gather_u32index (p0, x0, z0, SV_PLDL3KEEP), -+ svprfh_gather_index (p0, x0, z0, SV_PLDL3KEEP)) -+ -+/* -+** prfh_gather_u32index: -+** prfh pldl3strm, p0, \[x0, z1\.s, uxtw 1\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_SZ (prfh_gather_u32index, svuint32_t, -+ svprfh_gather_u32index (p0, x0, z1, SV_PLDL3STRM), -+ svprfh_gather_index (p0, x0, z1, SV_PLDL3STRM)) -+ -+/* -+** prfh_gather_x0_s64index: -+** prfh pstl1keep, p0, \[x0, z0\.d, lsl 1\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_SZ (prfh_gather_x0_s64index, svint64_t, -+ svprfh_gather_s64index (p0, x0, z0, SV_PSTL1KEEP), -+ svprfh_gather_index (p0, x0, z0, SV_PSTL1KEEP)) -+ -+/* -+** prfh_gather_s64index: -+** prfh pstl1strm, p0, \[x0, z1\.d, lsl 1\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_SZ (prfh_gather_s64index, svint64_t, -+ svprfh_gather_s64index (p0, x0, z1, SV_PSTL1STRM), -+ svprfh_gather_index (p0, x0, z1, SV_PSTL1STRM)) -+ -+/* -+** prfh_gather_ext_s64index: -+** prfh pstl1strm, p0, \[x0, z1\.d, sxtw 1\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_SZ (prfh_gather_ext_s64index, svint64_t, -+ svprfh_gather_s64index (p0, x0, svextw_s64_x (p0, z1), SV_PSTL1STRM), -+ svprfh_gather_index (p0, x0, svextw_x (p0, z1), SV_PSTL1STRM)) -+ -+/* -+** prfh_gather_x0_u64index: -+** prfh pstl2keep, p0, \[x0, z0\.d, lsl 1\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_SZ (prfh_gather_x0_u64index, svuint64_t, -+ svprfh_gather_u64index (p0, x0, z0, SV_PSTL2KEEP), -+ svprfh_gather_index (p0, x0, z0, SV_PSTL2KEEP)) -+ -+/* -+** prfh_gather_u64index: -+** prfh pstl2strm, p0, \[x0, z1\.d, lsl 1\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_SZ (prfh_gather_u64index, svuint64_t, -+ svprfh_gather_u64index (p0, x0, z1, SV_PSTL2STRM), -+ svprfh_gather_index (p0, x0, z1, SV_PSTL2STRM)) -+ -+/* -+** prfh_gather_ext_u64index: -+** prfh pstl2strm, p0, \[x0, z1\.d, uxtw 1\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_SZ (prfh_gather_ext_u64index, svuint64_t, -+ svprfh_gather_u64index (p0, x0, svextw_u64_x (p0, z1), SV_PSTL2STRM), -+ svprfh_gather_index (p0, x0, svextw_x (p0, z1), SV_PSTL2STRM)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfw.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfw.c -new file mode 100644 -index 000000000..bbf6a45c9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfw.c -@@ -0,0 +1,245 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** prfw_base: -+** prfw pldl1keep, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfw_base, uint8_t, -+ svprfw (p0, x0, SV_PLDL1KEEP), -+ svprfw (p0, x0, SV_PLDL1KEEP)) -+ -+/* -+** prfw_u8_index: -+** add (x[0-9+]), (x0, x1|x1, x0) -+** prfw pldl1keep, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfw_u8_index, uint8_t, -+ svprfw (p0, x0 + x1, SV_PLDL1KEEP), -+ svprfw (p0, x0 + x1, SV_PLDL1KEEP)) -+ -+/* -+** prfw_u8_1: -+** add (x[0-9+]), x0, #?1 -+** prfw pldl1keep, p0, \[\1\] -+** ret -+*/ -+TEST_PREFETCH (prfw_u8_1, uint8_t, -+ svprfw (p0, x0 + 1, SV_PLDL1KEEP), -+ svprfw (p0, x0 + 1, SV_PLDL1KEEP)) -+ -+/* -+** prfw_u16_index: -+** add (x[0-9+]), x0, x1, lsl #?1 -+** prfw pldl1keep, p0, \[\1\] -+** ret -+*/ -+TEST_PREFETCH (prfw_u16_index, uint16_t, -+ svprfw (p0, x0 + x1, SV_PLDL1KEEP), -+ svprfw (p0, x0 + x1, SV_PLDL1KEEP)) -+ -+/* -+** prfw_u16_1: -+** add (x[0-9+]), x0, #?2 -+** prfw pldl1keep, p0, \[\1\] -+** ret -+*/ -+TEST_PREFETCH (prfw_u16_1, uint16_t, -+ svprfw (p0, x0 + 1, SV_PLDL1KEEP), -+ svprfw (p0, x0 + 1, SV_PLDL1KEEP)) -+ -+/* -+** prfw_u32_index: -+** prfw pldl1keep, p0, \[x0, x1, lsl #?2\] -+** ret -+*/ -+TEST_PREFETCH (prfw_u32_index, uint32_t, -+ svprfw (p0, x0 + x1, SV_PLDL1KEEP), -+ svprfw (p0, x0 + x1, SV_PLDL1KEEP)) -+ -+/* -+** prfw_u32_1: -+** add (x[0-9+]), x0, #?4 -+** prfw pldl1keep, p0, \[\1\] -+** ret -+*/ -+TEST_PREFETCH (prfw_u32_1, uint32_t, -+ svprfw (p0, x0 + 1, SV_PLDL1KEEP), -+ svprfw (p0, x0 + 1, SV_PLDL1KEEP)) -+ -+/* -+** prfw_u64_index: -+** add (x[0-9+]), x0, x1, lsl #?3 -+** prfw pldl1keep, p0, \[\1\] -+** ret -+*/ -+TEST_PREFETCH (prfw_u64_index, uint64_t, -+ svprfw (p0, x0 + x1, SV_PLDL1KEEP), -+ svprfw (p0, x0 + x1, SV_PLDL1KEEP)) -+ -+/* -+** prfw_u64_1: -+** add (x[0-9+]), x0, #?8 -+** prfw pldl1keep, p0, \[\1\] -+** ret -+*/ -+TEST_PREFETCH (prfw_u64_1, uint64_t, -+ svprfw (p0, x0 + 1, SV_PLDL1KEEP), -+ svprfw (p0, x0 + 1, SV_PLDL1KEEP)) -+ -+/* -+** prfw_pldl1strm: -+** prfw pldl1strm, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfw_pldl1strm, uint8_t, -+ svprfw (p0, x0, SV_PLDL1STRM), -+ svprfw (p0, x0, SV_PLDL1STRM)) -+ -+/* -+** prfw_pldl2keep: -+** prfw pldl2keep, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfw_pldl2keep, uint8_t, -+ svprfw (p0, x0, SV_PLDL2KEEP), -+ svprfw (p0, x0, SV_PLDL2KEEP)) -+ -+/* -+** prfw_pldl2strm: -+** prfw pldl2strm, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfw_pldl2strm, uint8_t, -+ svprfw (p0, x0, SV_PLDL2STRM), -+ svprfw (p0, x0, SV_PLDL2STRM)) -+ -+/* -+** prfw_pldl3keep: -+** prfw pldl3keep, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfw_pldl3keep, uint8_t, -+ svprfw (p0, x0, SV_PLDL3KEEP), -+ svprfw (p0, x0, SV_PLDL3KEEP)) -+ -+/* -+** prfw_pldl3strm: -+** prfw pldl3strm, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfw_pldl3strm, uint8_t, -+ svprfw (p0, x0, SV_PLDL3STRM), -+ svprfw (p0, x0, SV_PLDL3STRM)) -+ -+/* -+** prfw_pstl1keep: -+** prfw pstl1keep, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfw_pstl1keep, uint8_t, -+ svprfw (p0, x0, SV_PSTL1KEEP), -+ svprfw (p0, x0, SV_PSTL1KEEP)) -+ -+/* -+** prfw_pstl1strm: -+** prfw pstl1strm, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfw_pstl1strm, uint8_t, -+ svprfw (p0, x0, SV_PSTL1STRM), -+ svprfw (p0, x0, SV_PSTL1STRM)) -+ -+/* -+** prfw_pstl2keep: -+** prfw pstl2keep, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfw_pstl2keep, uint8_t, -+ svprfw (p0, x0, SV_PSTL2KEEP), -+ svprfw (p0, x0, SV_PSTL2KEEP)) -+ -+/* -+** prfw_pstl2strm: -+** prfw pstl2strm, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfw_pstl2strm, uint8_t, -+ svprfw (p0, x0, SV_PSTL2STRM), -+ svprfw (p0, x0, SV_PSTL2STRM)) -+ -+/* -+** prfw_pstl3keep: -+** prfw pstl3keep, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfw_pstl3keep, uint8_t, -+ svprfw (p0, x0, SV_PSTL3KEEP), -+ svprfw (p0, x0, SV_PSTL3KEEP)) -+ -+/* -+** prfw_pstl3strm: -+** prfw pstl3strm, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfw_pstl3strm, uint8_t, -+ svprfw (p0, x0, SV_PSTL3STRM), -+ svprfw (p0, x0, SV_PSTL3STRM)) -+ -+/* -+** prfw_vnum_0: -+** prfw pldl1keep, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfw_vnum_0, uint8_t, -+ svprfw_vnum (p0, x0, 0, SV_PLDL1KEEP), -+ svprfw_vnum (p0, x0, 0, SV_PLDL1KEEP)) -+ -+/* -+** prfw_vnum_1: -+** incb x0 -+** prfw pldl1keep, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfw_vnum_1, uint16_t, -+ svprfw_vnum (p0, x0, 1, SV_PLDL1KEEP), -+ svprfw_vnum (p0, x0, 1, SV_PLDL1KEEP)) -+ -+/* -+** prfw_vnum_2: -+** incb x0, all, mul #2 -+** prfw pldl1keep, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfw_vnum_2, uint32_t, -+ svprfw_vnum (p0, x0, 2, SV_PLDL1KEEP), -+ svprfw_vnum (p0, x0, 2, SV_PLDL1KEEP)) -+ -+/* -+** prfw_vnum_3: -+** incb x0, all, mul #3 -+** prfw pldl1keep, p0, \[x0\] -+** ret -+*/ -+TEST_PREFETCH (prfw_vnum_3, uint64_t, -+ svprfw_vnum (p0, x0, 3, SV_PLDL1KEEP), -+ svprfw_vnum (p0, x0, 3, SV_PLDL1KEEP)) -+ -+/* -+** prfw_vnum_x1: -+** cntb (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** prfw pldl1keep, p0, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** prfw zldl1keep, p0, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_PREFETCH (prfw_vnum_x1, uint64_t, -+ svprfw_vnum (p0, x0, x1, SV_PLDL1KEEP), -+ svprfw_vnum (p0, x0, x1, SV_PLDL1KEEP)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfw_gather.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfw_gather.c -new file mode 100644 -index 000000000..2bbae1b9e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfw_gather.c -@@ -0,0 +1,225 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** prfw_gather_u32base: -+** prfw pldl1keep, p0, \[z0\.s\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfw_gather_u32base, svuint32_t, -+ svprfw_gather_u32base (p0, z0, SV_PLDL1KEEP), -+ svprfw_gather (p0, z0, SV_PLDL1KEEP)) -+ -+/* -+** prfw_gather_u64base: -+** prfw pldl1strm, p0, \[z0\.d\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfw_gather_u64base, svuint64_t, -+ svprfw_gather_u64base (p0, z0, SV_PLDL1STRM), -+ svprfw_gather (p0, z0, SV_PLDL1STRM)) -+ -+/* -+** prfw_gather_x0_u32base_index: -+** lsl (x[0-9]+), x0, #?2 -+** prfb pldl2keep, p0, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfw_gather_x0_u32base_index, svuint32_t, -+ svprfw_gather_u32base_index (p0, z0, x0, SV_PLDL2KEEP), -+ svprfw_gather_index (p0, z0, x0, SV_PLDL2KEEP)) -+ -+/* -+** prfw_gather_m1_u32base_index: -+** mov (x[0-9]+), #?-4 -+** prfb pldl2strm, p0, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfw_gather_m1_u32base_index, svuint32_t, -+ svprfw_gather_u32base_index (p0, z0, -1, SV_PLDL2STRM), -+ svprfw_gather_index (p0, z0, -1, SV_PLDL2STRM)) -+ -+/* -+** prfw_gather_0_u32base_index: -+** prfw pldl3keep, p0, \[z0\.s\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfw_gather_0_u32base_index, svuint32_t, -+ svprfw_gather_u32base_index (p0, z0, 0, SV_PLDL3KEEP), -+ svprfw_gather_index (p0, z0, 0, SV_PLDL3KEEP)) -+ -+/* -+** prfw_gather_5_u32base_index: -+** prfw pldl3strm, p0, \[z0\.s, #20\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfw_gather_5_u32base_index, svuint32_t, -+ svprfw_gather_u32base_index (p0, z0, 5, SV_PLDL3STRM), -+ svprfw_gather_index (p0, z0, 5, SV_PLDL3STRM)) -+ -+/* -+** prfw_gather_31_u32base_index: -+** prfw pstl1keep, p0, \[z0\.s, #124\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfw_gather_31_u32base_index, svuint32_t, -+ svprfw_gather_u32base_index (p0, z0, 31, SV_PSTL1KEEP), -+ svprfw_gather_index (p0, z0, 31, SV_PSTL1KEEP)) -+ -+/* -+** prfw_gather_32_u32base_index: -+** mov (x[0-9]+), #?128 -+** prfb pstl1strm, p0, \[\1, z0\.s, uxtw\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfw_gather_32_u32base_index, svuint32_t, -+ svprfw_gather_u32base_index (p0, z0, 32, SV_PSTL1STRM), -+ svprfw_gather_index (p0, z0, 32, SV_PSTL1STRM)) -+ -+/* -+** prfw_gather_x0_u64base_index: -+** lsl (x[0-9]+), x0, #?2 -+** prfb pstl2keep, p0, \[\1, z0\.d\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfw_gather_x0_u64base_index, svuint64_t, -+ svprfw_gather_u64base_index (p0, z0, x0, SV_PSTL2KEEP), -+ svprfw_gather_index (p0, z0, x0, SV_PSTL2KEEP)) -+ -+/* -+** prfw_gather_m1_u64base_index: -+** mov (x[0-9]+), #?-4 -+** prfb pstl2strm, p0, \[\1, z0\.d\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfw_gather_m1_u64base_index, svuint64_t, -+ svprfw_gather_u64base_index (p0, z0, -1, SV_PSTL2STRM), -+ svprfw_gather_index (p0, z0, -1, SV_PSTL2STRM)) -+ -+/* -+** prfw_gather_0_u64base_index: -+** prfw pstl3keep, p0, \[z0\.d\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfw_gather_0_u64base_index, svuint64_t, -+ svprfw_gather_u64base_index (p0, z0, 0, SV_PSTL3KEEP), -+ svprfw_gather_index (p0, z0, 0, SV_PSTL3KEEP)) -+ -+/* -+** prfw_gather_5_u64base_index: -+** prfw pstl3strm, p0, \[z0\.d, #20\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfw_gather_5_u64base_index, svuint64_t, -+ svprfw_gather_u64base_index (p0, z0, 5, SV_PSTL3STRM), -+ svprfw_gather_index (p0, z0, 5, SV_PSTL3STRM)) -+ -+/* -+** prfw_gather_31_u64base_index: -+** prfw pldl1keep, p0, \[z0\.d, #124\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfw_gather_31_u64base_index, svuint64_t, -+ svprfw_gather_u64base_index (p0, z0, 31, SV_PLDL1KEEP), -+ svprfw_gather_index (p0, z0, 31, SV_PLDL1KEEP)) -+ -+/* -+** prfw_gather_32_u64base_index: -+** mov (x[0-9]+), #?128 -+** prfb pldl1strm, p0, \[\1, z0\.d\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_ZS (prfw_gather_32_u64base_index, svuint64_t, -+ svprfw_gather_u64base_index (p0, z0, 32, SV_PLDL1STRM), -+ svprfw_gather_index (p0, z0, 32, SV_PLDL1STRM)) -+ -+/* -+** prfw_gather_x0_s32index: -+** prfw pldl2keep, p0, \[x0, z0\.s, sxtw 2\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_SZ (prfw_gather_x0_s32index, svint32_t, -+ svprfw_gather_s32index (p0, x0, z0, SV_PLDL2KEEP), -+ svprfw_gather_index (p0, x0, z0, SV_PLDL2KEEP)) -+ -+/* -+** prfw_gather_s32index: -+** prfw pldl2strm, p0, \[x0, z1\.s, sxtw 2\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_SZ (prfw_gather_s32index, svint32_t, -+ svprfw_gather_s32index (p0, x0, z1, SV_PLDL2STRM), -+ svprfw_gather_index (p0, x0, z1, SV_PLDL2STRM)) -+ -+/* -+** prfw_gather_x0_u32index: -+** prfw pldl3keep, p0, \[x0, z0\.s, uxtw 2\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_SZ (prfw_gather_x0_u32index, svuint32_t, -+ svprfw_gather_u32index (p0, x0, z0, SV_PLDL3KEEP), -+ svprfw_gather_index (p0, x0, z0, SV_PLDL3KEEP)) -+ -+/* -+** prfw_gather_u32index: -+** prfw pldl3strm, p0, \[x0, z1\.s, uxtw 2\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_SZ (prfw_gather_u32index, svuint32_t, -+ svprfw_gather_u32index (p0, x0, z1, SV_PLDL3STRM), -+ svprfw_gather_index (p0, x0, z1, SV_PLDL3STRM)) -+ -+/* -+** prfw_gather_x0_s64index: -+** prfw pstl1keep, p0, \[x0, z0\.d, lsl 2\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_SZ (prfw_gather_x0_s64index, svint64_t, -+ svprfw_gather_s64index (p0, x0, z0, SV_PSTL1KEEP), -+ svprfw_gather_index (p0, x0, z0, SV_PSTL1KEEP)) -+ -+/* -+** prfw_gather_s64index: -+** prfw pstl1strm, p0, \[x0, z1\.d, lsl 2\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_SZ (prfw_gather_s64index, svint64_t, -+ svprfw_gather_s64index (p0, x0, z1, SV_PSTL1STRM), -+ svprfw_gather_index (p0, x0, z1, SV_PSTL1STRM)) -+ -+/* -+** prfw_gather_ext_s64index: -+** prfw pstl1strm, p0, \[x0, z1\.d, sxtw 2\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_SZ (prfw_gather_ext_s64index, svint64_t, -+ svprfw_gather_s64index (p0, x0, svextw_s64_x (p0, z1), SV_PSTL1STRM), -+ svprfw_gather_index (p0, x0, svextw_x (p0, z1), SV_PSTL1STRM)) -+ -+/* -+** prfw_gather_x0_u64index: -+** prfw pstl2keep, p0, \[x0, z0\.d, lsl 2\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_SZ (prfw_gather_x0_u64index, svuint64_t, -+ svprfw_gather_u64index (p0, x0, z0, SV_PSTL2KEEP), -+ svprfw_gather_index (p0, x0, z0, SV_PSTL2KEEP)) -+ -+/* -+** prfw_gather_u64index: -+** prfw pstl2strm, p0, \[x0, z1\.d, lsl 2\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_SZ (prfw_gather_u64index, svuint64_t, -+ svprfw_gather_u64index (p0, x0, z1, SV_PSTL2STRM), -+ svprfw_gather_index (p0, x0, z1, SV_PSTL2STRM)) -+ -+/* -+** prfw_gather_ext_u64index: -+** prfw pstl2strm, p0, \[x0, z1\.d, uxtw 2\] -+** ret -+*/ -+TEST_PREFETCH_GATHER_SZ (prfw_gather_ext_u64index, svuint64_t, -+ svprfw_gather_u64index (p0, x0, svextw_u64_x (p0, z1), SV_PSTL2STRM), -+ svprfw_gather_index (p0, x0, svextw_x (p0, z1), SV_PSTL2STRM)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ptest_any.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ptest_any.c -new file mode 100644 -index 000000000..33280d388 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ptest_any.c -@@ -0,0 +1,77 @@ -+/* { dg-additional-options "-msve-vector-bits=scalable" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+#include -+ -+/* -+** test_bool_any: -+** ptest p0, p1\.b -+** cset [wx]0, any -+** ret -+*/ -+TEST_PTEST (test_bool_any, bool, -+ x0 = svptest_any (p0, p1)); -+ -+/* -+** test_bool_none: -+** ptest p0, p1\.b -+** cset [wx]0, none -+** ret -+*/ -+TEST_PTEST (test_bool_none, bool, -+ x0 = !svptest_any (p0, p1)); -+ -+/* -+** test_int_any: -+** ptest p0, p1\.b -+** cset [wx]0, any -+** ret -+*/ -+TEST_PTEST (test_int_any, int, -+ x0 = svptest_any (p0, p1)); -+ -+/* -+** test_int_none: -+** ptest p0, p1\.b -+** cset [wx]0, none -+** ret -+*/ -+TEST_PTEST (test_int_none, int, -+ x0 = !svptest_any (p0, p1)); -+ -+/* -+** test_int64_t_any: -+** ptest p0, p1\.b -+** cset [wx]0, any -+** ret -+*/ -+TEST_PTEST (test_int64_t_any, int64_t, -+ x0 = svptest_any (p0, p1)); -+ -+/* -+** test_int64_t_none: -+** ptest p0, p1\.b -+** cset [wx]0, none -+** ret -+*/ -+TEST_PTEST (test_int64_t_none, int64_t, -+ x0 = !svptest_any (p0, p1)); -+ -+/* -+** sel_any: -+** ptest p0, p1\.b -+** csel x0, (x0, x1, any|x1, x0, none) -+** ret -+*/ -+TEST_PTEST (sel_any, int64_t, -+ x0 = svptest_any (p0, p1) ? x0 : x1); -+ -+/* -+** sel_none: -+** ptest p0, p1\.b -+** csel x0, (x0, x1, none|x1, x0, any) -+** ret -+*/ -+TEST_PTEST (sel_none, int64_t, -+ x0 = !svptest_any (p0, p1) ? x0 : x1); -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ptest_first.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ptest_first.c -new file mode 100644 -index 000000000..991dabd3d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ptest_first.c -@@ -0,0 +1,77 @@ -+/* { dg-additional-options "-msve-vector-bits=scalable" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+#include -+ -+/* -+** test_bool_first: -+** ptest p0, p1\.b -+** cset [wx]0, first -+** ret -+*/ -+TEST_PTEST (test_bool_first, bool, -+ x0 = svptest_first (p0, p1)); -+ -+/* -+** test_bool_nfrst: -+** ptest p0, p1\.b -+** cset [wx]0, nfrst -+** ret -+*/ -+TEST_PTEST (test_bool_nfrst, bool, -+ x0 = !svptest_first (p0, p1)); -+ -+/* -+** test_int_first: -+** ptest p0, p1\.b -+** cset [wx]0, first -+** ret -+*/ -+TEST_PTEST (test_int_first, int, -+ x0 = svptest_first (p0, p1)); -+ -+/* -+** test_int_nfrst: -+** ptest p0, p1\.b -+** cset [wx]0, nfrst -+** ret -+*/ -+TEST_PTEST (test_int_nfrst, int, -+ x0 = !svptest_first (p0, p1)); -+ -+/* -+** test_int64_t_first: -+** ptest p0, p1\.b -+** cset [wx]0, first -+** ret -+*/ -+TEST_PTEST (test_int64_t_first, int64_t, -+ x0 = svptest_first (p0, p1)); -+ -+/* -+** test_int64_t_nfrst: -+** ptest p0, p1\.b -+** cset [wx]0, nfrst -+** ret -+*/ -+TEST_PTEST (test_int64_t_nfrst, int64_t, -+ x0 = !svptest_first (p0, p1)); -+ -+/* -+** sel_first: -+** ptest p0, p1\.b -+** csel x0, (x0, x1, first|x1, x0, nfrst) -+** ret -+*/ -+TEST_PTEST (sel_first, int64_t, -+ x0 = svptest_first (p0, p1) ? x0 : x1); -+ -+/* -+** sel_nfrst: -+** ptest p0, p1\.b -+** csel x0, (x0, x1, nfrst|x1, x0, first) -+** ret -+*/ -+TEST_PTEST (sel_nfrst, int64_t, -+ x0 = !svptest_first (p0, p1) ? x0 : x1); -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ptest_last.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ptest_last.c -new file mode 100644 -index 000000000..b952a4149 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ptest_last.c -@@ -0,0 +1,77 @@ -+/* { dg-additional-options "-msve-vector-bits=scalable" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+#include -+ -+/* -+** test_bool_last: -+** ptest p0, p1\.b -+** cset [wx]0, last -+** ret -+*/ -+TEST_PTEST (test_bool_last, bool, -+ x0 = svptest_last (p0, p1)); -+ -+/* -+** test_bool_nlast: -+** ptest p0, p1\.b -+** cset [wx]0, nlast -+** ret -+*/ -+TEST_PTEST (test_bool_nlast, bool, -+ x0 = !svptest_last (p0, p1)); -+ -+/* -+** test_int_last: -+** ptest p0, p1\.b -+** cset [wx]0, last -+** ret -+*/ -+TEST_PTEST (test_int_last, int, -+ x0 = svptest_last (p0, p1)); -+ -+/* -+** test_int_nlast: -+** ptest p0, p1\.b -+** cset [wx]0, nlast -+** ret -+*/ -+TEST_PTEST (test_int_nlast, int, -+ x0 = !svptest_last (p0, p1)); -+ -+/* -+** test_int64_t_last: -+** ptest p0, p1\.b -+** cset [wx]0, last -+** ret -+*/ -+TEST_PTEST (test_int64_t_last, int64_t, -+ x0 = svptest_last (p0, p1)); -+ -+/* -+** test_int64_t_nlast: -+** ptest p0, p1\.b -+** cset [wx]0, nlast -+** ret -+*/ -+TEST_PTEST (test_int64_t_nlast, int64_t, -+ x0 = !svptest_last (p0, p1)); -+ -+/* -+** sel_last: -+** ptest p0, p1\.b -+** csel x0, (x0, x1, last|x1, x0, nlast) -+** ret -+*/ -+TEST_PTEST (sel_last, int64_t, -+ x0 = svptest_last (p0, p1) ? x0 : x1); -+ -+/* -+** sel_nlast: -+** ptest p0, p1\.b -+** csel x0, (x0, x1, nlast|x1, x0, last) -+** ret -+*/ -+TEST_PTEST (sel_nlast, int64_t, -+ x0 = !svptest_last (p0, p1) ? x0 : x1); -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ptrue.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ptrue.c -new file mode 100644 -index 000000000..9c86170cb ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ptrue.c -@@ -0,0 +1,40 @@ -+/* { dg-additional-options "-msve-vector-bits=scalable" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ptrue_b8: -+** ptrue p0\.b, all -+** ret -+*/ -+TEST_P (ptrue_b8, -+ p0 = svptrue_b8 (), -+ p0 = svptrue_b8 ()); -+ -+/* -+** ptrue_b16: -+** ptrue p0\.h, all -+** ret -+*/ -+TEST_P (ptrue_b16, -+ p0 = svptrue_b16 (), -+ p0 = svptrue_b16 ()); -+ -+/* -+** ptrue_b32: -+** ptrue p0\.s, all -+** ret -+*/ -+TEST_P (ptrue_b32, -+ p0 = svptrue_b32 (), -+ p0 = svptrue_b32 ()); -+ -+/* -+** ptrue_b64: -+** ptrue p0\.d, all -+** ret -+*/ -+TEST_P (ptrue_b64, -+ p0 = svptrue_b64 (), -+ p0 = svptrue_b64 ()); -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ptrue_pat_b16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ptrue_pat_b16.c -new file mode 100644 -index 000000000..d7f83f5c6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ptrue_pat_b16.c -@@ -0,0 +1,156 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ptrue_pat_pow2_b16: -+** ptrue p0\.h, pow2 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_pow2_b16, -+ p0 = svptrue_pat_b16 (SV_POW2), -+ p0 = svptrue_pat_b16 (SV_POW2)) -+ -+/* -+** ptrue_pat_vl1_b16: -+** ptrue p0\.[bhsd], vl1 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl1_b16, -+ p0 = svptrue_pat_b16 (SV_VL1), -+ p0 = svptrue_pat_b16 (SV_VL1)) -+ -+/* -+** ptrue_pat_vl2_b16: -+** ptrue p0\.h, vl2 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl2_b16, -+ p0 = svptrue_pat_b16 (SV_VL2), -+ p0 = svptrue_pat_b16 (SV_VL2)) -+ -+/* -+** ptrue_pat_vl3_b16: -+** ptrue p0\.h, vl3 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl3_b16, -+ p0 = svptrue_pat_b16 (SV_VL3), -+ p0 = svptrue_pat_b16 (SV_VL3)) -+ -+/* -+** ptrue_pat_vl4_b16: -+** ptrue p0\.h, vl4 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl4_b16, -+ p0 = svptrue_pat_b16 (SV_VL4), -+ p0 = svptrue_pat_b16 (SV_VL4)) -+ -+/* -+** ptrue_pat_vl5_b16: -+** ptrue p0\.h, vl5 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl5_b16, -+ p0 = svptrue_pat_b16 (SV_VL5), -+ p0 = svptrue_pat_b16 (SV_VL5)) -+ -+/* -+** ptrue_pat_vl6_b16: -+** ptrue p0\.h, vl6 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl6_b16, -+ p0 = svptrue_pat_b16 (SV_VL6), -+ p0 = svptrue_pat_b16 (SV_VL6)) -+ -+/* -+** ptrue_pat_vl7_b16: -+** ptrue p0\.h, vl7 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl7_b16, -+ p0 = svptrue_pat_b16 (SV_VL7), -+ p0 = svptrue_pat_b16 (SV_VL7)) -+ -+/* -+** ptrue_pat_vl8_b16: -+** ptrue p0\.h, vl8 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl8_b16, -+ p0 = svptrue_pat_b16 (SV_VL8), -+ p0 = svptrue_pat_b16 (SV_VL8)) -+ -+/* -+** ptrue_pat_vl16_b16: -+** ptrue p0\.[bhsd], vl16 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl16_b16, -+ p0 = svptrue_pat_b16 (SV_VL16), -+ p0 = svptrue_pat_b16 (SV_VL16)) -+ -+/* -+** ptrue_pat_vl32_b16: -+** ptrue p0\.h, vl32 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl32_b16, -+ p0 = svptrue_pat_b16 (SV_VL32), -+ p0 = svptrue_pat_b16 (SV_VL32)) -+ -+/* -+** ptrue_pat_vl64_b16: -+** ptrue p0\.h, vl64 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl64_b16, -+ p0 = svptrue_pat_b16 (SV_VL64), -+ p0 = svptrue_pat_b16 (SV_VL64)) -+ -+/* -+** ptrue_pat_vl128_b16: -+** ptrue p0\.[bhsd], vl128 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl128_b16, -+ p0 = svptrue_pat_b16 (SV_VL128), -+ p0 = svptrue_pat_b16 (SV_VL128)) -+ -+/* -+** ptrue_pat_vl256_b16: -+** ptrue p0\.h, vl256 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl256_b16, -+ p0 = svptrue_pat_b16 (SV_VL256), -+ p0 = svptrue_pat_b16 (SV_VL256)) -+ -+/* -+** ptrue_pat_mul4_b16: -+** ptrue p0\.h, mul4 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_mul4_b16, -+ p0 = svptrue_pat_b16 (SV_MUL4), -+ p0 = svptrue_pat_b16 (SV_MUL4)) -+ -+/* -+** ptrue_pat_mul3_b16: -+** ptrue p0\.h, mul3 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_mul3_b16, -+ p0 = svptrue_pat_b16 (SV_MUL3), -+ p0 = svptrue_pat_b16 (SV_MUL3)) -+ -+/* -+** ptrue_pat_all_b16: -+** ptrue p0\.h[^\n]* -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_all_b16, -+ p0 = svptrue_pat_b16 (SV_ALL), -+ p0 = svptrue_pat_b16 (SV_ALL)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ptrue_pat_b32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ptrue_pat_b32.c -new file mode 100644 -index 000000000..11cf5aebb ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ptrue_pat_b32.c -@@ -0,0 +1,156 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ptrue_pat_pow2_b32: -+** ptrue p0\.s, pow2 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_pow2_b32, -+ p0 = svptrue_pat_b32 (SV_POW2), -+ p0 = svptrue_pat_b32 (SV_POW2)) -+ -+/* -+** ptrue_pat_vl1_b32: -+** ptrue p0\.[bhsd], vl1 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl1_b32, -+ p0 = svptrue_pat_b32 (SV_VL1), -+ p0 = svptrue_pat_b32 (SV_VL1)) -+ -+/* -+** ptrue_pat_vl2_b32: -+** ptrue p0\.s, vl2 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl2_b32, -+ p0 = svptrue_pat_b32 (SV_VL2), -+ p0 = svptrue_pat_b32 (SV_VL2)) -+ -+/* -+** ptrue_pat_vl3_b32: -+** ptrue p0\.s, vl3 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl3_b32, -+ p0 = svptrue_pat_b32 (SV_VL3), -+ p0 = svptrue_pat_b32 (SV_VL3)) -+ -+/* -+** ptrue_pat_vl4_b32: -+** ptrue p0\.s, vl4 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl4_b32, -+ p0 = svptrue_pat_b32 (SV_VL4), -+ p0 = svptrue_pat_b32 (SV_VL4)) -+ -+/* -+** ptrue_pat_vl5_b32: -+** ptrue p0\.s, vl5 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl5_b32, -+ p0 = svptrue_pat_b32 (SV_VL5), -+ p0 = svptrue_pat_b32 (SV_VL5)) -+ -+/* -+** ptrue_pat_vl6_b32: -+** ptrue p0\.s, vl6 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl6_b32, -+ p0 = svptrue_pat_b32 (SV_VL6), -+ p0 = svptrue_pat_b32 (SV_VL6)) -+ -+/* -+** ptrue_pat_vl7_b32: -+** ptrue p0\.s, vl7 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl7_b32, -+ p0 = svptrue_pat_b32 (SV_VL7), -+ p0 = svptrue_pat_b32 (SV_VL7)) -+ -+/* -+** ptrue_pat_vl8_b32: -+** ptrue p0\.s, vl8 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl8_b32, -+ p0 = svptrue_pat_b32 (SV_VL8), -+ p0 = svptrue_pat_b32 (SV_VL8)) -+ -+/* -+** ptrue_pat_vl16_b32: -+** ptrue p0\.[bhsd], vl16 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl16_b32, -+ p0 = svptrue_pat_b32 (SV_VL16), -+ p0 = svptrue_pat_b32 (SV_VL16)) -+ -+/* -+** ptrue_pat_vl32_b32: -+** ptrue p0\.s, vl32 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl32_b32, -+ p0 = svptrue_pat_b32 (SV_VL32), -+ p0 = svptrue_pat_b32 (SV_VL32)) -+ -+/* -+** ptrue_pat_vl64_b32: -+** ptrue p0\.s, vl64 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl64_b32, -+ p0 = svptrue_pat_b32 (SV_VL64), -+ p0 = svptrue_pat_b32 (SV_VL64)) -+ -+/* -+** ptrue_pat_vl128_b32: -+** ptrue p0\.[bhsd], vl128 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl128_b32, -+ p0 = svptrue_pat_b32 (SV_VL128), -+ p0 = svptrue_pat_b32 (SV_VL128)) -+ -+/* -+** ptrue_pat_vl256_b32: -+** ptrue p0\.s, vl256 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl256_b32, -+ p0 = svptrue_pat_b32 (SV_VL256), -+ p0 = svptrue_pat_b32 (SV_VL256)) -+ -+/* -+** ptrue_pat_mul4_b32: -+** ptrue p0\.s, mul4 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_mul4_b32, -+ p0 = svptrue_pat_b32 (SV_MUL4), -+ p0 = svptrue_pat_b32 (SV_MUL4)) -+ -+/* -+** ptrue_pat_mul3_b32: -+** ptrue p0\.s, mul3 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_mul3_b32, -+ p0 = svptrue_pat_b32 (SV_MUL3), -+ p0 = svptrue_pat_b32 (SV_MUL3)) -+ -+/* -+** ptrue_pat_all_b32: -+** ptrue p0\.s[^\n]* -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_all_b32, -+ p0 = svptrue_pat_b32 (SV_ALL), -+ p0 = svptrue_pat_b32 (SV_ALL)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ptrue_pat_b64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ptrue_pat_b64.c -new file mode 100644 -index 000000000..4c4202bb3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ptrue_pat_b64.c -@@ -0,0 +1,156 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ptrue_pat_pow2_b64: -+** ptrue p0\.d, pow2 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_pow2_b64, -+ p0 = svptrue_pat_b64 (SV_POW2), -+ p0 = svptrue_pat_b64 (SV_POW2)) -+ -+/* -+** ptrue_pat_vl1_b64: -+** ptrue p0\.[bhsd], vl1 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl1_b64, -+ p0 = svptrue_pat_b64 (SV_VL1), -+ p0 = svptrue_pat_b64 (SV_VL1)) -+ -+/* -+** ptrue_pat_vl2_b64: -+** ptrue p0\.d, vl2 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl2_b64, -+ p0 = svptrue_pat_b64 (SV_VL2), -+ p0 = svptrue_pat_b64 (SV_VL2)) -+ -+/* -+** ptrue_pat_vl3_b64: -+** ptrue p0\.d, vl3 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl3_b64, -+ p0 = svptrue_pat_b64 (SV_VL3), -+ p0 = svptrue_pat_b64 (SV_VL3)) -+ -+/* -+** ptrue_pat_vl4_b64: -+** ptrue p0\.d, vl4 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl4_b64, -+ p0 = svptrue_pat_b64 (SV_VL4), -+ p0 = svptrue_pat_b64 (SV_VL4)) -+ -+/* -+** ptrue_pat_vl5_b64: -+** ptrue p0\.d, vl5 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl5_b64, -+ p0 = svptrue_pat_b64 (SV_VL5), -+ p0 = svptrue_pat_b64 (SV_VL5)) -+ -+/* -+** ptrue_pat_vl6_b64: -+** ptrue p0\.d, vl6 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl6_b64, -+ p0 = svptrue_pat_b64 (SV_VL6), -+ p0 = svptrue_pat_b64 (SV_VL6)) -+ -+/* -+** ptrue_pat_vl7_b64: -+** ptrue p0\.d, vl7 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl7_b64, -+ p0 = svptrue_pat_b64 (SV_VL7), -+ p0 = svptrue_pat_b64 (SV_VL7)) -+ -+/* -+** ptrue_pat_vl8_b64: -+** ptrue p0\.d, vl8 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl8_b64, -+ p0 = svptrue_pat_b64 (SV_VL8), -+ p0 = svptrue_pat_b64 (SV_VL8)) -+ -+/* -+** ptrue_pat_vl16_b64: -+** ptrue p0\.[bhsd], vl16 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl16_b64, -+ p0 = svptrue_pat_b64 (SV_VL16), -+ p0 = svptrue_pat_b64 (SV_VL16)) -+ -+/* -+** ptrue_pat_vl32_b64: -+** ptrue p0\.d, vl32 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl32_b64, -+ p0 = svptrue_pat_b64 (SV_VL32), -+ p0 = svptrue_pat_b64 (SV_VL32)) -+ -+/* -+** ptrue_pat_vl64_b64: -+** ptrue p0\.d, vl64 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl64_b64, -+ p0 = svptrue_pat_b64 (SV_VL64), -+ p0 = svptrue_pat_b64 (SV_VL64)) -+ -+/* -+** ptrue_pat_vl128_b64: -+** ptrue p0\.[bhsd], vl128 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl128_b64, -+ p0 = svptrue_pat_b64 (SV_VL128), -+ p0 = svptrue_pat_b64 (SV_VL128)) -+ -+/* -+** ptrue_pat_vl256_b64: -+** ptrue p0\.d, vl256 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl256_b64, -+ p0 = svptrue_pat_b64 (SV_VL256), -+ p0 = svptrue_pat_b64 (SV_VL256)) -+ -+/* -+** ptrue_pat_mul4_b64: -+** ptrue p0\.d, mul4 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_mul4_b64, -+ p0 = svptrue_pat_b64 (SV_MUL4), -+ p0 = svptrue_pat_b64 (SV_MUL4)) -+ -+/* -+** ptrue_pat_mul3_b64: -+** ptrue p0\.d, mul3 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_mul3_b64, -+ p0 = svptrue_pat_b64 (SV_MUL3), -+ p0 = svptrue_pat_b64 (SV_MUL3)) -+ -+/* -+** ptrue_pat_all_b64: -+** ptrue p0\.d[^\n]* -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_all_b64, -+ p0 = svptrue_pat_b64 (SV_ALL), -+ p0 = svptrue_pat_b64 (SV_ALL)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ptrue_pat_b8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ptrue_pat_b8.c -new file mode 100644 -index 000000000..49fb8c555 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/ptrue_pat_b8.c -@@ -0,0 +1,156 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** ptrue_pat_pow2_b8: -+** ptrue p0\.b, pow2 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_pow2_b8, -+ p0 = svptrue_pat_b8 (SV_POW2), -+ p0 = svptrue_pat_b8 (SV_POW2)) -+ -+/* -+** ptrue_pat_vl1_b8: -+** ptrue p0\.[bhsd], vl1 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl1_b8, -+ p0 = svptrue_pat_b8 (SV_VL1), -+ p0 = svptrue_pat_b8 (SV_VL1)) -+ -+/* -+** ptrue_pat_vl2_b8: -+** ptrue p0\.b, vl2 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl2_b8, -+ p0 = svptrue_pat_b8 (SV_VL2), -+ p0 = svptrue_pat_b8 (SV_VL2)) -+ -+/* -+** ptrue_pat_vl3_b8: -+** ptrue p0\.b, vl3 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl3_b8, -+ p0 = svptrue_pat_b8 (SV_VL3), -+ p0 = svptrue_pat_b8 (SV_VL3)) -+ -+/* -+** ptrue_pat_vl4_b8: -+** ptrue p0\.b, vl4 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl4_b8, -+ p0 = svptrue_pat_b8 (SV_VL4), -+ p0 = svptrue_pat_b8 (SV_VL4)) -+ -+/* -+** ptrue_pat_vl5_b8: -+** ptrue p0\.b, vl5 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl5_b8, -+ p0 = svptrue_pat_b8 (SV_VL5), -+ p0 = svptrue_pat_b8 (SV_VL5)) -+ -+/* -+** ptrue_pat_vl6_b8: -+** ptrue p0\.b, vl6 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl6_b8, -+ p0 = svptrue_pat_b8 (SV_VL6), -+ p0 = svptrue_pat_b8 (SV_VL6)) -+ -+/* -+** ptrue_pat_vl7_b8: -+** ptrue p0\.b, vl7 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl7_b8, -+ p0 = svptrue_pat_b8 (SV_VL7), -+ p0 = svptrue_pat_b8 (SV_VL7)) -+ -+/* -+** ptrue_pat_vl8_b8: -+** ptrue p0\.b, vl8 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl8_b8, -+ p0 = svptrue_pat_b8 (SV_VL8), -+ p0 = svptrue_pat_b8 (SV_VL8)) -+ -+/* -+** ptrue_pat_vl16_b8: -+** ptrue p0\.[bhsd], vl16 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl16_b8, -+ p0 = svptrue_pat_b8 (SV_VL16), -+ p0 = svptrue_pat_b8 (SV_VL16)) -+ -+/* -+** ptrue_pat_vl32_b8: -+** ptrue p0\.b, vl32 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl32_b8, -+ p0 = svptrue_pat_b8 (SV_VL32), -+ p0 = svptrue_pat_b8 (SV_VL32)) -+ -+/* -+** ptrue_pat_vl64_b8: -+** ptrue p0\.b, vl64 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl64_b8, -+ p0 = svptrue_pat_b8 (SV_VL64), -+ p0 = svptrue_pat_b8 (SV_VL64)) -+ -+/* -+** ptrue_pat_vl128_b8: -+** ptrue p0\.[bhsd], vl128 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl128_b8, -+ p0 = svptrue_pat_b8 (SV_VL128), -+ p0 = svptrue_pat_b8 (SV_VL128)) -+ -+/* -+** ptrue_pat_vl256_b8: -+** ptrue p0\.b, vl256 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_vl256_b8, -+ p0 = svptrue_pat_b8 (SV_VL256), -+ p0 = svptrue_pat_b8 (SV_VL256)) -+ -+/* -+** ptrue_pat_mul4_b8: -+** ptrue p0\.b, mul4 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_mul4_b8, -+ p0 = svptrue_pat_b8 (SV_MUL4), -+ p0 = svptrue_pat_b8 (SV_MUL4)) -+ -+/* -+** ptrue_pat_mul3_b8: -+** ptrue p0\.b, mul3 -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_mul3_b8, -+ p0 = svptrue_pat_b8 (SV_MUL3), -+ p0 = svptrue_pat_b8 (SV_MUL3)) -+ -+/* -+** ptrue_pat_all_b8: -+** ptrue p0\.b[^\n]* -+** ret -+*/ -+TEST_UNIFORM_P (ptrue_pat_all_b8, -+ p0 = svptrue_pat_b8 (SV_ALL), -+ p0 = svptrue_pat_b8 (SV_ALL)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qadd_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qadd_s16.c -new file mode 100644 -index 000000000..03255c41c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qadd_s16.c -@@ -0,0 +1,123 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qadd_s16_tied1: -+** sqadd z0\.h, (z0\.h, z1\.h|z1\.h, z0\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_s16_tied1, svint16_t, -+ z0 = svqadd_s16 (z0, z1), -+ z0 = svqadd (z0, z1)) -+ -+/* -+** qadd_s16_tied2: -+** sqadd z0\.h, (z0\.h, z1\.h|z1\.h, z0\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_s16_tied2, svint16_t, -+ z0 = svqadd_s16 (z1, z0), -+ z0 = svqadd (z1, z0)) -+ -+/* -+** qadd_s16_untied: -+** sqadd z0\.h, (z1\.h, z2\.h|z2\.h, z1\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_s16_untied, svint16_t, -+ z0 = svqadd_s16 (z1, z2), -+ z0 = svqadd (z1, z2)) -+ -+/* -+** qadd_w0_s16_tied1: -+** mov (z[0-9]+\.h), w0 -+** sqadd z0\.h, (z0\.h, \1|\1, z0\.h) -+** ret -+*/ -+TEST_UNIFORM_ZX (qadd_w0_s16_tied1, svint16_t, int16_t, -+ z0 = svqadd_n_s16 (z0, x0), -+ z0 = svqadd (z0, x0)) -+ -+/* -+** qadd_w0_s16_untied: -+** mov (z[0-9]+\.h), w0 -+** sqadd z0\.h, (z1\.h, \1|\1, z1\.h) -+** ret -+*/ -+TEST_UNIFORM_ZX (qadd_w0_s16_untied, svint16_t, int16_t, -+ z0 = svqadd_n_s16 (z1, x0), -+ z0 = svqadd (z1, x0)) -+ -+/* -+** qadd_1_s16_tied1: -+** sqadd z0\.h, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_1_s16_tied1, svint16_t, -+ z0 = svqadd_n_s16 (z0, 1), -+ z0 = svqadd (z0, 1)) -+ -+/* -+** qadd_1_s16_untied: -+** movprfx z0, z1 -+** sqadd z0\.h, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_1_s16_untied, svint16_t, -+ z0 = svqadd_n_s16 (z1, 1), -+ z0 = svqadd (z1, 1)) -+ -+/* -+** qadd_127_s16: -+** sqadd z0\.h, z0\.h, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_127_s16, svint16_t, -+ z0 = svqadd_n_s16 (z0, 127), -+ z0 = svqadd (z0, 127)) -+ -+/* -+** qadd_128_s16: -+** sqadd z0\.h, z0\.h, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_128_s16, svint16_t, -+ z0 = svqadd_n_s16 (z0, 128), -+ z0 = svqadd (z0, 128)) -+ -+/* -+** qadd_255_s16: -+** sqadd z0\.h, z0\.h, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_255_s16, svint16_t, -+ z0 = svqadd_n_s16 (z0, 255), -+ z0 = svqadd (z0, 255)) -+ -+/* -+** qadd_m1_s16: -+** sqsub z0\.h, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_m1_s16, svint16_t, -+ z0 = svqadd_n_s16 (z0, -1), -+ z0 = svqadd (z0, -1)) -+ -+/* -+** qadd_m127_s16: -+** sqsub z0\.h, z0\.h, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_m127_s16, svint16_t, -+ z0 = svqadd_n_s16 (z0, -127), -+ z0 = svqadd (z0, -127)) -+ -+/* -+** qadd_m128_s16: -+** sqsub z0\.h, z0\.h, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_m128_s16, svint16_t, -+ z0 = svqadd_n_s16 (z0, -128), -+ z0 = svqadd (z0, -128)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qadd_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qadd_s32.c -new file mode 100644 -index 000000000..197cc3840 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qadd_s32.c -@@ -0,0 +1,123 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qadd_s32_tied1: -+** sqadd z0\.s, (z0\.s, z1\.s|z1\.s, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_s32_tied1, svint32_t, -+ z0 = svqadd_s32 (z0, z1), -+ z0 = svqadd (z0, z1)) -+ -+/* -+** qadd_s32_tied2: -+** sqadd z0\.s, (z0\.s, z1\.s|z1\.s, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_s32_tied2, svint32_t, -+ z0 = svqadd_s32 (z1, z0), -+ z0 = svqadd (z1, z0)) -+ -+/* -+** qadd_s32_untied: -+** sqadd z0\.s, (z1\.s, z2\.s|z2\.s, z1\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_s32_untied, svint32_t, -+ z0 = svqadd_s32 (z1, z2), -+ z0 = svqadd (z1, z2)) -+ -+/* -+** qadd_w0_s32_tied1: -+** mov (z[0-9]+\.s), w0 -+** sqadd z0\.s, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_ZX (qadd_w0_s32_tied1, svint32_t, int32_t, -+ z0 = svqadd_n_s32 (z0, x0), -+ z0 = svqadd (z0, x0)) -+ -+/* -+** qadd_w0_s32_untied: -+** mov (z[0-9]+\.s), w0 -+** sqadd z0\.s, (z1\.s, \1|\1, z1\.s) -+** ret -+*/ -+TEST_UNIFORM_ZX (qadd_w0_s32_untied, svint32_t, int32_t, -+ z0 = svqadd_n_s32 (z1, x0), -+ z0 = svqadd (z1, x0)) -+ -+/* -+** qadd_1_s32_tied1: -+** sqadd z0\.s, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_1_s32_tied1, svint32_t, -+ z0 = svqadd_n_s32 (z0, 1), -+ z0 = svqadd (z0, 1)) -+ -+/* -+** qadd_1_s32_untied: -+** movprfx z0, z1 -+** sqadd z0\.s, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_1_s32_untied, svint32_t, -+ z0 = svqadd_n_s32 (z1, 1), -+ z0 = svqadd (z1, 1)) -+ -+/* -+** qadd_127_s32: -+** sqadd z0\.s, z0\.s, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_127_s32, svint32_t, -+ z0 = svqadd_n_s32 (z0, 127), -+ z0 = svqadd (z0, 127)) -+ -+/* -+** qadd_128_s32: -+** sqadd z0\.s, z0\.s, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_128_s32, svint32_t, -+ z0 = svqadd_n_s32 (z0, 128), -+ z0 = svqadd (z0, 128)) -+ -+/* -+** qadd_255_s32: -+** sqadd z0\.s, z0\.s, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_255_s32, svint32_t, -+ z0 = svqadd_n_s32 (z0, 255), -+ z0 = svqadd (z0, 255)) -+ -+/* -+** qadd_m1_s32: -+** sqsub z0\.s, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_m1_s32, svint32_t, -+ z0 = svqadd_n_s32 (z0, -1), -+ z0 = svqadd (z0, -1)) -+ -+/* -+** qadd_m127_s32: -+** sqsub z0\.s, z0\.s, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_m127_s32, svint32_t, -+ z0 = svqadd_n_s32 (z0, -127), -+ z0 = svqadd (z0, -127)) -+ -+/* -+** qadd_m128_s32: -+** sqsub z0\.s, z0\.s, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_m128_s32, svint32_t, -+ z0 = svqadd_n_s32 (z0, -128), -+ z0 = svqadd (z0, -128)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qadd_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qadd_s64.c -new file mode 100644 -index 000000000..0218866ea ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qadd_s64.c -@@ -0,0 +1,123 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qadd_s64_tied1: -+** sqadd z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_s64_tied1, svint64_t, -+ z0 = svqadd_s64 (z0, z1), -+ z0 = svqadd (z0, z1)) -+ -+/* -+** qadd_s64_tied2: -+** sqadd z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_s64_tied2, svint64_t, -+ z0 = svqadd_s64 (z1, z0), -+ z0 = svqadd (z1, z0)) -+ -+/* -+** qadd_s64_untied: -+** sqadd z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_s64_untied, svint64_t, -+ z0 = svqadd_s64 (z1, z2), -+ z0 = svqadd (z1, z2)) -+ -+/* -+** qadd_x0_s64_tied1: -+** mov (z[0-9]+\.d), x0 -+** sqadd z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (qadd_x0_s64_tied1, svint64_t, int64_t, -+ z0 = svqadd_n_s64 (z0, x0), -+ z0 = svqadd (z0, x0)) -+ -+/* -+** qadd_x0_s64_untied: -+** mov (z[0-9]+\.d), x0 -+** sqadd z0\.d, (z1\.d, \1|\1, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (qadd_x0_s64_untied, svint64_t, int64_t, -+ z0 = svqadd_n_s64 (z1, x0), -+ z0 = svqadd (z1, x0)) -+ -+/* -+** qadd_1_s64_tied1: -+** sqadd z0\.d, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_1_s64_tied1, svint64_t, -+ z0 = svqadd_n_s64 (z0, 1), -+ z0 = svqadd (z0, 1)) -+ -+/* -+** qadd_1_s64_untied: -+** movprfx z0, z1 -+** sqadd z0\.d, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_1_s64_untied, svint64_t, -+ z0 = svqadd_n_s64 (z1, 1), -+ z0 = svqadd (z1, 1)) -+ -+/* -+** qadd_127_s64: -+** sqadd z0\.d, z0\.d, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_127_s64, svint64_t, -+ z0 = svqadd_n_s64 (z0, 127), -+ z0 = svqadd (z0, 127)) -+ -+/* -+** qadd_128_s64: -+** sqadd z0\.d, z0\.d, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_128_s64, svint64_t, -+ z0 = svqadd_n_s64 (z0, 128), -+ z0 = svqadd (z0, 128)) -+ -+/* -+** qadd_255_s64: -+** sqadd z0\.d, z0\.d, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_255_s64, svint64_t, -+ z0 = svqadd_n_s64 (z0, 255), -+ z0 = svqadd (z0, 255)) -+ -+/* -+** qadd_m1_s64: -+** sqsub z0\.d, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_m1_s64, svint64_t, -+ z0 = svqadd_n_s64 (z0, -1), -+ z0 = svqadd (z0, -1)) -+ -+/* -+** qadd_m127_s64: -+** sqsub z0\.d, z0\.d, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_m127_s64, svint64_t, -+ z0 = svqadd_n_s64 (z0, -127), -+ z0 = svqadd (z0, -127)) -+ -+/* -+** qadd_m128_s64: -+** sqsub z0\.d, z0\.d, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_m128_s64, svint64_t, -+ z0 = svqadd_n_s64 (z0, -128), -+ z0 = svqadd (z0, -128)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qadd_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qadd_s8.c -new file mode 100644 -index 000000000..c8b88fa82 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qadd_s8.c -@@ -0,0 +1,123 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qadd_s8_tied1: -+** sqadd z0\.b, (z0\.b, z1\.b|z1\.b, z0\.b) -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_s8_tied1, svint8_t, -+ z0 = svqadd_s8 (z0, z1), -+ z0 = svqadd (z0, z1)) -+ -+/* -+** qadd_s8_tied2: -+** sqadd z0\.b, (z0\.b, z1\.b|z1\.b, z0\.b) -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_s8_tied2, svint8_t, -+ z0 = svqadd_s8 (z1, z0), -+ z0 = svqadd (z1, z0)) -+ -+/* -+** qadd_s8_untied: -+** sqadd z0\.b, (z1\.b, z2\.b|z2\.b, z1\.b) -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_s8_untied, svint8_t, -+ z0 = svqadd_s8 (z1, z2), -+ z0 = svqadd (z1, z2)) -+ -+/* -+** qadd_w0_s8_tied1: -+** mov (z[0-9]+\.b), w0 -+** sqadd z0\.b, (z0\.b, \1|\1, z0\.b) -+** ret -+*/ -+TEST_UNIFORM_ZX (qadd_w0_s8_tied1, svint8_t, int8_t, -+ z0 = svqadd_n_s8 (z0, x0), -+ z0 = svqadd (z0, x0)) -+ -+/* -+** qadd_w0_s8_untied: -+** mov (z[0-9]+\.b), w0 -+** sqadd z0\.b, (z1\.b, \1|\1, z1\.b) -+** ret -+*/ -+TEST_UNIFORM_ZX (qadd_w0_s8_untied, svint8_t, int8_t, -+ z0 = svqadd_n_s8 (z1, x0), -+ z0 = svqadd (z1, x0)) -+ -+/* -+** qadd_1_s8_tied1: -+** sqadd z0\.b, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_1_s8_tied1, svint8_t, -+ z0 = svqadd_n_s8 (z0, 1), -+ z0 = svqadd (z0, 1)) -+ -+/* -+** qadd_1_s8_untied: -+** movprfx z0, z1 -+** sqadd z0\.b, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_1_s8_untied, svint8_t, -+ z0 = svqadd_n_s8 (z1, 1), -+ z0 = svqadd (z1, 1)) -+ -+/* -+** qadd_127_s8: -+** sqadd z0\.b, z0\.b, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_127_s8, svint8_t, -+ z0 = svqadd_n_s8 (z0, 127), -+ z0 = svqadd (z0, 127)) -+ -+/* -+** qadd_128_s8: -+** sqsub z0\.b, z0\.b, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_128_s8, svint8_t, -+ z0 = svqadd_n_s8 (z0, 128), -+ z0 = svqadd (z0, 128)) -+ -+/* -+** qadd_255_s8: -+** sqsub z0\.b, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_255_s8, svint8_t, -+ z0 = svqadd_n_s8 (z0, 255), -+ z0 = svqadd (z0, 255)) -+ -+/* -+** qadd_m1_s8: -+** sqsub z0\.b, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_m1_s8, svint8_t, -+ z0 = svqadd_n_s8 (z0, -1), -+ z0 = svqadd (z0, -1)) -+ -+/* -+** qadd_m127_s8: -+** sqsub z0\.b, z0\.b, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_m127_s8, svint8_t, -+ z0 = svqadd_n_s8 (z0, -127), -+ z0 = svqadd (z0, -127)) -+ -+/* -+** qadd_m128_s8: -+** sqsub z0\.b, z0\.b, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_m128_s8, svint8_t, -+ z0 = svqadd_n_s8 (z0, -128), -+ z0 = svqadd (z0, -128)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qadd_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qadd_u16.c -new file mode 100644 -index 000000000..dd7bc5b6a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qadd_u16.c -@@ -0,0 +1,126 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qadd_u16_tied1: -+** uqadd z0\.h, (z0\.h, z1\.h|z1\.h, z0\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_u16_tied1, svuint16_t, -+ z0 = svqadd_u16 (z0, z1), -+ z0 = svqadd (z0, z1)) -+ -+/* -+** qadd_u16_tied2: -+** uqadd z0\.h, (z0\.h, z1\.h|z1\.h, z0\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_u16_tied2, svuint16_t, -+ z0 = svqadd_u16 (z1, z0), -+ z0 = svqadd (z1, z0)) -+ -+/* -+** qadd_u16_untied: -+** uqadd z0\.h, (z1\.h, z2\.h|z2\.h, z1\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_u16_untied, svuint16_t, -+ z0 = svqadd_u16 (z1, z2), -+ z0 = svqadd (z1, z2)) -+ -+/* -+** qadd_w0_u16_tied1: -+** mov (z[0-9]+\.h), w0 -+** uqadd z0\.h, (z0\.h, \1|\1, z0\.h) -+** ret -+*/ -+TEST_UNIFORM_ZX (qadd_w0_u16_tied1, svuint16_t, uint16_t, -+ z0 = svqadd_n_u16 (z0, x0), -+ z0 = svqadd (z0, x0)) -+ -+/* -+** qadd_w0_u16_untied: -+** mov (z[0-9]+\.h), w0 -+** uqadd z0\.h, (z1\.h, \1|\1, z1\.h) -+** ret -+*/ -+TEST_UNIFORM_ZX (qadd_w0_u16_untied, svuint16_t, uint16_t, -+ z0 = svqadd_n_u16 (z1, x0), -+ z0 = svqadd (z1, x0)) -+ -+/* -+** qadd_1_u16_tied1: -+** uqadd z0\.h, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_1_u16_tied1, svuint16_t, -+ z0 = svqadd_n_u16 (z0, 1), -+ z0 = svqadd (z0, 1)) -+ -+/* -+** qadd_1_u16_untied: -+** movprfx z0, z1 -+** uqadd z0\.h, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_1_u16_untied, svuint16_t, -+ z0 = svqadd_n_u16 (z1, 1), -+ z0 = svqadd (z1, 1)) -+ -+/* -+** qadd_127_u16: -+** uqadd z0\.h, z0\.h, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_127_u16, svuint16_t, -+ z0 = svqadd_n_u16 (z0, 127), -+ z0 = svqadd (z0, 127)) -+ -+/* -+** qadd_128_u16: -+** uqadd z0\.h, z0\.h, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_128_u16, svuint16_t, -+ z0 = svqadd_n_u16 (z0, 128), -+ z0 = svqadd (z0, 128)) -+ -+/* -+** qadd_255_u16: -+** uqadd z0\.h, z0\.h, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_255_u16, svuint16_t, -+ z0 = svqadd_n_u16 (z0, 255), -+ z0 = svqadd (z0, 255)) -+ -+/* -+** qadd_m1_u16: -+** mov (z[0-9]+)\.b, #-1 -+** uqadd z0\.h, (z0\.h, \1\.h|\1\.h, z0\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_m1_u16, svuint16_t, -+ z0 = svqadd_n_u16 (z0, -1), -+ z0 = svqadd (z0, -1)) -+ -+/* -+** qadd_m127_u16: -+** mov (z[0-9]+\.h), #-127 -+** uqadd z0\.h, (z0\.h, \1|\1, z0\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_m127_u16, svuint16_t, -+ z0 = svqadd_n_u16 (z0, -127), -+ z0 = svqadd (z0, -127)) -+ -+/* -+** qadd_m128_u16: -+** mov (z[0-9]+\.h), #-128 -+** uqadd z0\.h, (z0\.h, \1|\1, z0\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_m128_u16, svuint16_t, -+ z0 = svqadd_n_u16 (z0, -128), -+ z0 = svqadd (z0, -128)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qadd_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qadd_u32.c -new file mode 100644 -index 000000000..0f846e44e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qadd_u32.c -@@ -0,0 +1,126 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qadd_u32_tied1: -+** uqadd z0\.s, (z0\.s, z1\.s|z1\.s, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_u32_tied1, svuint32_t, -+ z0 = svqadd_u32 (z0, z1), -+ z0 = svqadd (z0, z1)) -+ -+/* -+** qadd_u32_tied2: -+** uqadd z0\.s, (z0\.s, z1\.s|z1\.s, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_u32_tied2, svuint32_t, -+ z0 = svqadd_u32 (z1, z0), -+ z0 = svqadd (z1, z0)) -+ -+/* -+** qadd_u32_untied: -+** uqadd z0\.s, (z1\.s, z2\.s|z2\.s, z1\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_u32_untied, svuint32_t, -+ z0 = svqadd_u32 (z1, z2), -+ z0 = svqadd (z1, z2)) -+ -+/* -+** qadd_w0_u32_tied1: -+** mov (z[0-9]+\.s), w0 -+** uqadd z0\.s, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_ZX (qadd_w0_u32_tied1, svuint32_t, uint32_t, -+ z0 = svqadd_n_u32 (z0, x0), -+ z0 = svqadd (z0, x0)) -+ -+/* -+** qadd_w0_u32_untied: -+** mov (z[0-9]+\.s), w0 -+** uqadd z0\.s, (z1\.s, \1|\1, z1\.s) -+** ret -+*/ -+TEST_UNIFORM_ZX (qadd_w0_u32_untied, svuint32_t, uint32_t, -+ z0 = svqadd_n_u32 (z1, x0), -+ z0 = svqadd (z1, x0)) -+ -+/* -+** qadd_1_u32_tied1: -+** uqadd z0\.s, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_1_u32_tied1, svuint32_t, -+ z0 = svqadd_n_u32 (z0, 1), -+ z0 = svqadd (z0, 1)) -+ -+/* -+** qadd_1_u32_untied: -+** movprfx z0, z1 -+** uqadd z0\.s, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_1_u32_untied, svuint32_t, -+ z0 = svqadd_n_u32 (z1, 1), -+ z0 = svqadd (z1, 1)) -+ -+/* -+** qadd_127_u32: -+** uqadd z0\.s, z0\.s, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_127_u32, svuint32_t, -+ z0 = svqadd_n_u32 (z0, 127), -+ z0 = svqadd (z0, 127)) -+ -+/* -+** qadd_128_u32: -+** uqadd z0\.s, z0\.s, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_128_u32, svuint32_t, -+ z0 = svqadd_n_u32 (z0, 128), -+ z0 = svqadd (z0, 128)) -+ -+/* -+** qadd_255_u32: -+** uqadd z0\.s, z0\.s, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_255_u32, svuint32_t, -+ z0 = svqadd_n_u32 (z0, 255), -+ z0 = svqadd (z0, 255)) -+ -+/* -+** qadd_m1_u32: -+** mov (z[0-9]+)\.b, #-1 -+** uqadd z0\.s, (z0\.s, \1\.s|\1\.s, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_m1_u32, svuint32_t, -+ z0 = svqadd_n_u32 (z0, -1), -+ z0 = svqadd (z0, -1)) -+ -+/* -+** qadd_m127_u32: -+** mov (z[0-9]+\.s), #-127 -+** uqadd z0\.s, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_m127_u32, svuint32_t, -+ z0 = svqadd_n_u32 (z0, -127), -+ z0 = svqadd (z0, -127)) -+ -+/* -+** qadd_m128_u32: -+** mov (z[0-9]+\.s), #-128 -+** uqadd z0\.s, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_m128_u32, svuint32_t, -+ z0 = svqadd_n_u32 (z0, -128), -+ z0 = svqadd (z0, -128)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qadd_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qadd_u64.c -new file mode 100644 -index 000000000..454fb1d63 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qadd_u64.c -@@ -0,0 +1,126 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qadd_u64_tied1: -+** uqadd z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_u64_tied1, svuint64_t, -+ z0 = svqadd_u64 (z0, z1), -+ z0 = svqadd (z0, z1)) -+ -+/* -+** qadd_u64_tied2: -+** uqadd z0\.d, (z0\.d, z1\.d|z1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_u64_tied2, svuint64_t, -+ z0 = svqadd_u64 (z1, z0), -+ z0 = svqadd (z1, z0)) -+ -+/* -+** qadd_u64_untied: -+** uqadd z0\.d, (z1\.d, z2\.d|z2\.d, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_u64_untied, svuint64_t, -+ z0 = svqadd_u64 (z1, z2), -+ z0 = svqadd (z1, z2)) -+ -+/* -+** qadd_x0_u64_tied1: -+** mov (z[0-9]+\.d), x0 -+** uqadd z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (qadd_x0_u64_tied1, svuint64_t, uint64_t, -+ z0 = svqadd_n_u64 (z0, x0), -+ z0 = svqadd (z0, x0)) -+ -+/* -+** qadd_x0_u64_untied: -+** mov (z[0-9]+\.d), x0 -+** uqadd z0\.d, (z1\.d, \1|\1, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_ZX (qadd_x0_u64_untied, svuint64_t, uint64_t, -+ z0 = svqadd_n_u64 (z1, x0), -+ z0 = svqadd (z1, x0)) -+ -+/* -+** qadd_1_u64_tied1: -+** uqadd z0\.d, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_1_u64_tied1, svuint64_t, -+ z0 = svqadd_n_u64 (z0, 1), -+ z0 = svqadd (z0, 1)) -+ -+/* -+** qadd_1_u64_untied: -+** movprfx z0, z1 -+** uqadd z0\.d, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_1_u64_untied, svuint64_t, -+ z0 = svqadd_n_u64 (z1, 1), -+ z0 = svqadd (z1, 1)) -+ -+/* -+** qadd_127_u64: -+** uqadd z0\.d, z0\.d, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_127_u64, svuint64_t, -+ z0 = svqadd_n_u64 (z0, 127), -+ z0 = svqadd (z0, 127)) -+ -+/* -+** qadd_128_u64: -+** uqadd z0\.d, z0\.d, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_128_u64, svuint64_t, -+ z0 = svqadd_n_u64 (z0, 128), -+ z0 = svqadd (z0, 128)) -+ -+/* -+** qadd_255_u64: -+** uqadd z0\.d, z0\.d, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_255_u64, svuint64_t, -+ z0 = svqadd_n_u64 (z0, 255), -+ z0 = svqadd (z0, 255)) -+ -+/* -+** qadd_m1_u64: -+** mov (z[0-9]+)\.b, #-1 -+** uqadd z0\.d, (z0\.d, \1\.d|\1\.d, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_m1_u64, svuint64_t, -+ z0 = svqadd_n_u64 (z0, -1), -+ z0 = svqadd (z0, -1)) -+ -+/* -+** qadd_m127_u64: -+** mov (z[0-9]+\.d), #-127 -+** uqadd z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_m127_u64, svuint64_t, -+ z0 = svqadd_n_u64 (z0, -127), -+ z0 = svqadd (z0, -127)) -+ -+/* -+** qadd_m128_u64: -+** mov (z[0-9]+\.d), #-128 -+** uqadd z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_m128_u64, svuint64_t, -+ z0 = svqadd_n_u64 (z0, -128), -+ z0 = svqadd (z0, -128)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qadd_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qadd_u8.c -new file mode 100644 -index 000000000..e86b8988c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qadd_u8.c -@@ -0,0 +1,123 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qadd_u8_tied1: -+** uqadd z0\.b, (z0\.b, z1\.b|z1\.b, z0\.b) -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_u8_tied1, svuint8_t, -+ z0 = svqadd_u8 (z0, z1), -+ z0 = svqadd (z0, z1)) -+ -+/* -+** qadd_u8_tied2: -+** uqadd z0\.b, (z0\.b, z1\.b|z1\.b, z0\.b) -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_u8_tied2, svuint8_t, -+ z0 = svqadd_u8 (z1, z0), -+ z0 = svqadd (z1, z0)) -+ -+/* -+** qadd_u8_untied: -+** uqadd z0\.b, (z1\.b, z2\.b|z2\.b, z1\.b) -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_u8_untied, svuint8_t, -+ z0 = svqadd_u8 (z1, z2), -+ z0 = svqadd (z1, z2)) -+ -+/* -+** qadd_w0_u8_tied1: -+** mov (z[0-9]+\.b), w0 -+** uqadd z0\.b, (z0\.b, \1|\1, z0\.b) -+** ret -+*/ -+TEST_UNIFORM_ZX (qadd_w0_u8_tied1, svuint8_t, uint8_t, -+ z0 = svqadd_n_u8 (z0, x0), -+ z0 = svqadd (z0, x0)) -+ -+/* -+** qadd_w0_u8_untied: -+** mov (z[0-9]+\.b), w0 -+** uqadd z0\.b, (z1\.b, \1|\1, z1\.b) -+** ret -+*/ -+TEST_UNIFORM_ZX (qadd_w0_u8_untied, svuint8_t, uint8_t, -+ z0 = svqadd_n_u8 (z1, x0), -+ z0 = svqadd (z1, x0)) -+ -+/* -+** qadd_1_u8_tied1: -+** uqadd z0\.b, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_1_u8_tied1, svuint8_t, -+ z0 = svqadd_n_u8 (z0, 1), -+ z0 = svqadd (z0, 1)) -+ -+/* -+** qadd_1_u8_untied: -+** movprfx z0, z1 -+** uqadd z0\.b, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_1_u8_untied, svuint8_t, -+ z0 = svqadd_n_u8 (z1, 1), -+ z0 = svqadd (z1, 1)) -+ -+/* -+** qadd_127_u8: -+** uqadd z0\.b, z0\.b, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_127_u8, svuint8_t, -+ z0 = svqadd_n_u8 (z0, 127), -+ z0 = svqadd (z0, 127)) -+ -+/* -+** qadd_128_u8: -+** uqadd z0\.b, z0\.b, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_128_u8, svuint8_t, -+ z0 = svqadd_n_u8 (z0, 128), -+ z0 = svqadd (z0, 128)) -+ -+/* -+** qadd_255_u8: -+** uqadd z0\.b, z0\.b, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_255_u8, svuint8_t, -+ z0 = svqadd_n_u8 (z0, 255), -+ z0 = svqadd (z0, 255)) -+ -+/* -+** qadd_m1_u8: -+** uqadd z0\.b, z0\.b, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_m1_u8, svuint8_t, -+ z0 = svqadd_n_u8 (z0, -1), -+ z0 = svqadd (z0, -1)) -+ -+/* -+** qadd_m127_u8: -+** uqadd z0\.b, z0\.b, #129 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_m127_u8, svuint8_t, -+ z0 = svqadd_n_u8 (z0, -127), -+ z0 = svqadd (z0, -127)) -+ -+/* -+** qadd_m128_u8: -+** uqadd z0\.b, z0\.b, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (qadd_m128_u8, svuint8_t, -+ z0 = svqadd_n_u8 (z0, -128), -+ z0 = svqadd (z0, -128)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecb_pat_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecb_pat_s32.c -new file mode 100644 -index 000000000..22b3afef7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecb_pat_s32.c -@@ -0,0 +1,202 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qdecb_pat_n_1_s32_tied: -+** sqdecb x0, w0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_1_s32_tied, int32_t, -+ x0 = svqdecb_pat_n_s32 (x0, SV_POW2, 1), -+ x0 = svqdecb_pat (x0, SV_POW2, 1)) -+ -+/* -+** qdecb_pat_n_1_s32_untied: -+** mov w0, w1 -+** sqdecb x0, w0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_1_s32_untied, int32_t, -+ x0 = svqdecb_pat_n_s32 (x1, SV_POW2, 1), -+ x0 = svqdecb_pat (x1, SV_POW2, 1)) -+ -+/* -+** qdecb_pat_n_2_s32: -+** sqdecb x0, w0, pow2, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_2_s32, int32_t, -+ x0 = svqdecb_pat_n_s32 (x0, SV_POW2, 2), -+ x0 = svqdecb_pat (x0, SV_POW2, 2)) -+ -+/* -+** qdecb_pat_n_7_s32: -+** sqdecb x0, w0, pow2, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_7_s32, int32_t, -+ x0 = svqdecb_pat_n_s32 (x0, SV_POW2, 7), -+ x0 = svqdecb_pat (x0, SV_POW2, 7)) -+ -+/* -+** qdecb_pat_n_15_s32: -+** sqdecb x0, w0, pow2, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_15_s32, int32_t, -+ x0 = svqdecb_pat_n_s32 (x0, SV_POW2, 15), -+ x0 = svqdecb_pat (x0, SV_POW2, 15)) -+ -+/* -+** qdecb_pat_n_16_s32: -+** sqdecb x0, w0, pow2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_16_s32, int32_t, -+ x0 = svqdecb_pat_n_s32 (x0, SV_POW2, 16), -+ x0 = svqdecb_pat (x0, SV_POW2, 16)) -+ -+/* -+** qdecb_pat_n_vl1_s32: -+** sqdecb x0, w0, vl1, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl1_s32, int32_t, -+ x0 = svqdecb_pat_n_s32 (x0, SV_VL1, 16), -+ x0 = svqdecb_pat (x0, SV_VL1, 16)) -+ -+/* -+** qdecb_pat_n_vl2_s32: -+** sqdecb x0, w0, vl2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl2_s32, int32_t, -+ x0 = svqdecb_pat_n_s32 (x0, SV_VL2, 16), -+ x0 = svqdecb_pat (x0, SV_VL2, 16)) -+ -+/* -+** qdecb_pat_n_vl3_s32: -+** sqdecb x0, w0, vl3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl3_s32, int32_t, -+ x0 = svqdecb_pat_n_s32 (x0, SV_VL3, 16), -+ x0 = svqdecb_pat (x0, SV_VL3, 16)) -+ -+/* -+** qdecb_pat_n_vl4_s32: -+** sqdecb x0, w0, vl4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl4_s32, int32_t, -+ x0 = svqdecb_pat_n_s32 (x0, SV_VL4, 16), -+ x0 = svqdecb_pat (x0, SV_VL4, 16)) -+ -+/* -+** qdecb_pat_n_vl5_s32: -+** sqdecb x0, w0, vl5, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl5_s32, int32_t, -+ x0 = svqdecb_pat_n_s32 (x0, SV_VL5, 16), -+ x0 = svqdecb_pat (x0, SV_VL5, 16)) -+ -+/* -+** qdecb_pat_n_vl6_s32: -+** sqdecb x0, w0, vl6, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl6_s32, int32_t, -+ x0 = svqdecb_pat_n_s32 (x0, SV_VL6, 16), -+ x0 = svqdecb_pat (x0, SV_VL6, 16)) -+ -+/* -+** qdecb_pat_n_vl7_s32: -+** sqdecb x0, w0, vl7, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl7_s32, int32_t, -+ x0 = svqdecb_pat_n_s32 (x0, SV_VL7, 16), -+ x0 = svqdecb_pat (x0, SV_VL7, 16)) -+ -+/* -+** qdecb_pat_n_vl8_s32: -+** sqdecb x0, w0, vl8, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl8_s32, int32_t, -+ x0 = svqdecb_pat_n_s32 (x0, SV_VL8, 16), -+ x0 = svqdecb_pat (x0, SV_VL8, 16)) -+ -+/* -+** qdecb_pat_n_vl16_s32: -+** sqdecb x0, w0, vl16, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl16_s32, int32_t, -+ x0 = svqdecb_pat_n_s32 (x0, SV_VL16, 16), -+ x0 = svqdecb_pat (x0, SV_VL16, 16)) -+ -+/* -+** qdecb_pat_n_vl32_s32: -+** sqdecb x0, w0, vl32, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl32_s32, int32_t, -+ x0 = svqdecb_pat_n_s32 (x0, SV_VL32, 16), -+ x0 = svqdecb_pat (x0, SV_VL32, 16)) -+ -+/* -+** qdecb_pat_n_vl64_s32: -+** sqdecb x0, w0, vl64, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl64_s32, int32_t, -+ x0 = svqdecb_pat_n_s32 (x0, SV_VL64, 16), -+ x0 = svqdecb_pat (x0, SV_VL64, 16)) -+ -+/* -+** qdecb_pat_n_vl128_s32: -+** sqdecb x0, w0, vl128, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl128_s32, int32_t, -+ x0 = svqdecb_pat_n_s32 (x0, SV_VL128, 16), -+ x0 = svqdecb_pat (x0, SV_VL128, 16)) -+ -+/* -+** qdecb_pat_n_vl256_s32: -+** sqdecb x0, w0, vl256, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl256_s32, int32_t, -+ x0 = svqdecb_pat_n_s32 (x0, SV_VL256, 16), -+ x0 = svqdecb_pat (x0, SV_VL256, 16)) -+ -+/* -+** qdecb_pat_n_mul4_s32: -+** sqdecb x0, w0, mul4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_mul4_s32, int32_t, -+ x0 = svqdecb_pat_n_s32 (x0, SV_MUL4, 16), -+ x0 = svqdecb_pat (x0, SV_MUL4, 16)) -+ -+/* -+** qdecb_pat_n_mul3_s32: -+** sqdecb x0, w0, mul3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_mul3_s32, int32_t, -+ x0 = svqdecb_pat_n_s32 (x0, SV_MUL3, 16), -+ x0 = svqdecb_pat (x0, SV_MUL3, 16)) -+ -+/* -+** qdecb_pat_n_all_s32: -+** sqdecb x0, w0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_all_s32, int32_t, -+ x0 = svqdecb_pat_n_s32 (x0, SV_ALL, 16), -+ x0 = svqdecb_pat (x0, SV_ALL, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecb_pat_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecb_pat_s64.c -new file mode 100644 -index 000000000..1380e6c8e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecb_pat_s64.c -@@ -0,0 +1,202 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qdecb_pat_n_1_s64_tied: -+** sqdecb x0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_1_s64_tied, int64_t, -+ x0 = svqdecb_pat_n_s64 (x0, SV_POW2, 1), -+ x0 = svqdecb_pat (x0, SV_POW2, 1)) -+ -+/* -+** qdecb_pat_n_1_s64_untied: -+** mov x0, x1 -+** sqdecb x0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_1_s64_untied, int64_t, -+ x0 = svqdecb_pat_n_s64 (x1, SV_POW2, 1), -+ x0 = svqdecb_pat (x1, SV_POW2, 1)) -+ -+/* -+** qdecb_pat_n_2_s64: -+** sqdecb x0, pow2, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_2_s64, int64_t, -+ x0 = svqdecb_pat_n_s64 (x0, SV_POW2, 2), -+ x0 = svqdecb_pat (x0, SV_POW2, 2)) -+ -+/* -+** qdecb_pat_n_7_s64: -+** sqdecb x0, pow2, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_7_s64, int64_t, -+ x0 = svqdecb_pat_n_s64 (x0, SV_POW2, 7), -+ x0 = svqdecb_pat (x0, SV_POW2, 7)) -+ -+/* -+** qdecb_pat_n_15_s64: -+** sqdecb x0, pow2, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_15_s64, int64_t, -+ x0 = svqdecb_pat_n_s64 (x0, SV_POW2, 15), -+ x0 = svqdecb_pat (x0, SV_POW2, 15)) -+ -+/* -+** qdecb_pat_n_16_s64: -+** sqdecb x0, pow2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_16_s64, int64_t, -+ x0 = svqdecb_pat_n_s64 (x0, SV_POW2, 16), -+ x0 = svqdecb_pat (x0, SV_POW2, 16)) -+ -+/* -+** qdecb_pat_n_vl1_s64: -+** sqdecb x0, vl1, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl1_s64, int64_t, -+ x0 = svqdecb_pat_n_s64 (x0, SV_VL1, 16), -+ x0 = svqdecb_pat (x0, SV_VL1, 16)) -+ -+/* -+** qdecb_pat_n_vl2_s64: -+** sqdecb x0, vl2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl2_s64, int64_t, -+ x0 = svqdecb_pat_n_s64 (x0, SV_VL2, 16), -+ x0 = svqdecb_pat (x0, SV_VL2, 16)) -+ -+/* -+** qdecb_pat_n_vl3_s64: -+** sqdecb x0, vl3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl3_s64, int64_t, -+ x0 = svqdecb_pat_n_s64 (x0, SV_VL3, 16), -+ x0 = svqdecb_pat (x0, SV_VL3, 16)) -+ -+/* -+** qdecb_pat_n_vl4_s64: -+** sqdecb x0, vl4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl4_s64, int64_t, -+ x0 = svqdecb_pat_n_s64 (x0, SV_VL4, 16), -+ x0 = svqdecb_pat (x0, SV_VL4, 16)) -+ -+/* -+** qdecb_pat_n_vl5_s64: -+** sqdecb x0, vl5, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl5_s64, int64_t, -+ x0 = svqdecb_pat_n_s64 (x0, SV_VL5, 16), -+ x0 = svqdecb_pat (x0, SV_VL5, 16)) -+ -+/* -+** qdecb_pat_n_vl6_s64: -+** sqdecb x0, vl6, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl6_s64, int64_t, -+ x0 = svqdecb_pat_n_s64 (x0, SV_VL6, 16), -+ x0 = svqdecb_pat (x0, SV_VL6, 16)) -+ -+/* -+** qdecb_pat_n_vl7_s64: -+** sqdecb x0, vl7, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl7_s64, int64_t, -+ x0 = svqdecb_pat_n_s64 (x0, SV_VL7, 16), -+ x0 = svqdecb_pat (x0, SV_VL7, 16)) -+ -+/* -+** qdecb_pat_n_vl8_s64: -+** sqdecb x0, vl8, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl8_s64, int64_t, -+ x0 = svqdecb_pat_n_s64 (x0, SV_VL8, 16), -+ x0 = svqdecb_pat (x0, SV_VL8, 16)) -+ -+/* -+** qdecb_pat_n_vl16_s64: -+** sqdecb x0, vl16, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl16_s64, int64_t, -+ x0 = svqdecb_pat_n_s64 (x0, SV_VL16, 16), -+ x0 = svqdecb_pat (x0, SV_VL16, 16)) -+ -+/* -+** qdecb_pat_n_vl32_s64: -+** sqdecb x0, vl32, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl32_s64, int64_t, -+ x0 = svqdecb_pat_n_s64 (x0, SV_VL32, 16), -+ x0 = svqdecb_pat (x0, SV_VL32, 16)) -+ -+/* -+** qdecb_pat_n_vl64_s64: -+** sqdecb x0, vl64, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl64_s64, int64_t, -+ x0 = svqdecb_pat_n_s64 (x0, SV_VL64, 16), -+ x0 = svqdecb_pat (x0, SV_VL64, 16)) -+ -+/* -+** qdecb_pat_n_vl128_s64: -+** sqdecb x0, vl128, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl128_s64, int64_t, -+ x0 = svqdecb_pat_n_s64 (x0, SV_VL128, 16), -+ x0 = svqdecb_pat (x0, SV_VL128, 16)) -+ -+/* -+** qdecb_pat_n_vl256_s64: -+** sqdecb x0, vl256, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl256_s64, int64_t, -+ x0 = svqdecb_pat_n_s64 (x0, SV_VL256, 16), -+ x0 = svqdecb_pat (x0, SV_VL256, 16)) -+ -+/* -+** qdecb_pat_n_mul4_s64: -+** sqdecb x0, mul4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_mul4_s64, int64_t, -+ x0 = svqdecb_pat_n_s64 (x0, SV_MUL4, 16), -+ x0 = svqdecb_pat (x0, SV_MUL4, 16)) -+ -+/* -+** qdecb_pat_n_mul3_s64: -+** sqdecb x0, mul3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_mul3_s64, int64_t, -+ x0 = svqdecb_pat_n_s64 (x0, SV_MUL3, 16), -+ x0 = svqdecb_pat (x0, SV_MUL3, 16)) -+ -+/* -+** qdecb_pat_n_all_s64: -+** sqdecb x0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_all_s64, int64_t, -+ x0 = svqdecb_pat_n_s64 (x0, SV_ALL, 16), -+ x0 = svqdecb_pat (x0, SV_ALL, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecb_pat_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecb_pat_u32.c -new file mode 100644 -index 000000000..3db3da866 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecb_pat_u32.c -@@ -0,0 +1,202 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qdecb_pat_n_1_u32_tied: -+** uqdecb w0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_1_u32_tied, uint32_t, -+ x0 = svqdecb_pat_n_u32 (x0, SV_POW2, 1), -+ x0 = svqdecb_pat (x0, SV_POW2, 1)) -+ -+/* -+** qdecb_pat_n_1_u32_untied: -+** mov w0, w1 -+** uqdecb w0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_1_u32_untied, uint32_t, -+ x0 = svqdecb_pat_n_u32 (x1, SV_POW2, 1), -+ x0 = svqdecb_pat (x1, SV_POW2, 1)) -+ -+/* -+** qdecb_pat_n_2_u32: -+** uqdecb w0, pow2, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_2_u32, uint32_t, -+ x0 = svqdecb_pat_n_u32 (x0, SV_POW2, 2), -+ x0 = svqdecb_pat (x0, SV_POW2, 2)) -+ -+/* -+** qdecb_pat_n_7_u32: -+** uqdecb w0, pow2, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_7_u32, uint32_t, -+ x0 = svqdecb_pat_n_u32 (x0, SV_POW2, 7), -+ x0 = svqdecb_pat (x0, SV_POW2, 7)) -+ -+/* -+** qdecb_pat_n_15_u32: -+** uqdecb w0, pow2, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_15_u32, uint32_t, -+ x0 = svqdecb_pat_n_u32 (x0, SV_POW2, 15), -+ x0 = svqdecb_pat (x0, SV_POW2, 15)) -+ -+/* -+** qdecb_pat_n_16_u32: -+** uqdecb w0, pow2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_16_u32, uint32_t, -+ x0 = svqdecb_pat_n_u32 (x0, SV_POW2, 16), -+ x0 = svqdecb_pat (x0, SV_POW2, 16)) -+ -+/* -+** qdecb_pat_n_vl1_u32: -+** uqdecb w0, vl1, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl1_u32, uint32_t, -+ x0 = svqdecb_pat_n_u32 (x0, SV_VL1, 16), -+ x0 = svqdecb_pat (x0, SV_VL1, 16)) -+ -+/* -+** qdecb_pat_n_vl2_u32: -+** uqdecb w0, vl2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl2_u32, uint32_t, -+ x0 = svqdecb_pat_n_u32 (x0, SV_VL2, 16), -+ x0 = svqdecb_pat (x0, SV_VL2, 16)) -+ -+/* -+** qdecb_pat_n_vl3_u32: -+** uqdecb w0, vl3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl3_u32, uint32_t, -+ x0 = svqdecb_pat_n_u32 (x0, SV_VL3, 16), -+ x0 = svqdecb_pat (x0, SV_VL3, 16)) -+ -+/* -+** qdecb_pat_n_vl4_u32: -+** uqdecb w0, vl4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl4_u32, uint32_t, -+ x0 = svqdecb_pat_n_u32 (x0, SV_VL4, 16), -+ x0 = svqdecb_pat (x0, SV_VL4, 16)) -+ -+/* -+** qdecb_pat_n_vl5_u32: -+** uqdecb w0, vl5, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl5_u32, uint32_t, -+ x0 = svqdecb_pat_n_u32 (x0, SV_VL5, 16), -+ x0 = svqdecb_pat (x0, SV_VL5, 16)) -+ -+/* -+** qdecb_pat_n_vl6_u32: -+** uqdecb w0, vl6, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl6_u32, uint32_t, -+ x0 = svqdecb_pat_n_u32 (x0, SV_VL6, 16), -+ x0 = svqdecb_pat (x0, SV_VL6, 16)) -+ -+/* -+** qdecb_pat_n_vl7_u32: -+** uqdecb w0, vl7, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl7_u32, uint32_t, -+ x0 = svqdecb_pat_n_u32 (x0, SV_VL7, 16), -+ x0 = svqdecb_pat (x0, SV_VL7, 16)) -+ -+/* -+** qdecb_pat_n_vl8_u32: -+** uqdecb w0, vl8, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl8_u32, uint32_t, -+ x0 = svqdecb_pat_n_u32 (x0, SV_VL8, 16), -+ x0 = svqdecb_pat (x0, SV_VL8, 16)) -+ -+/* -+** qdecb_pat_n_vl16_u32: -+** uqdecb w0, vl16, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl16_u32, uint32_t, -+ x0 = svqdecb_pat_n_u32 (x0, SV_VL16, 16), -+ x0 = svqdecb_pat (x0, SV_VL16, 16)) -+ -+/* -+** qdecb_pat_n_vl32_u32: -+** uqdecb w0, vl32, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl32_u32, uint32_t, -+ x0 = svqdecb_pat_n_u32 (x0, SV_VL32, 16), -+ x0 = svqdecb_pat (x0, SV_VL32, 16)) -+ -+/* -+** qdecb_pat_n_vl64_u32: -+** uqdecb w0, vl64, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl64_u32, uint32_t, -+ x0 = svqdecb_pat_n_u32 (x0, SV_VL64, 16), -+ x0 = svqdecb_pat (x0, SV_VL64, 16)) -+ -+/* -+** qdecb_pat_n_vl128_u32: -+** uqdecb w0, vl128, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl128_u32, uint32_t, -+ x0 = svqdecb_pat_n_u32 (x0, SV_VL128, 16), -+ x0 = svqdecb_pat (x0, SV_VL128, 16)) -+ -+/* -+** qdecb_pat_n_vl256_u32: -+** uqdecb w0, vl256, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl256_u32, uint32_t, -+ x0 = svqdecb_pat_n_u32 (x0, SV_VL256, 16), -+ x0 = svqdecb_pat (x0, SV_VL256, 16)) -+ -+/* -+** qdecb_pat_n_mul4_u32: -+** uqdecb w0, mul4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_mul4_u32, uint32_t, -+ x0 = svqdecb_pat_n_u32 (x0, SV_MUL4, 16), -+ x0 = svqdecb_pat (x0, SV_MUL4, 16)) -+ -+/* -+** qdecb_pat_n_mul3_u32: -+** uqdecb w0, mul3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_mul3_u32, uint32_t, -+ x0 = svqdecb_pat_n_u32 (x0, SV_MUL3, 16), -+ x0 = svqdecb_pat (x0, SV_MUL3, 16)) -+ -+/* -+** qdecb_pat_n_all_u32: -+** uqdecb w0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_all_u32, uint32_t, -+ x0 = svqdecb_pat_n_u32 (x0, SV_ALL, 16), -+ x0 = svqdecb_pat (x0, SV_ALL, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecb_pat_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecb_pat_u64.c -new file mode 100644 -index 000000000..2f4c3c7aa ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecb_pat_u64.c -@@ -0,0 +1,202 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qdecb_pat_n_1_u64_tied: -+** uqdecb x0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_1_u64_tied, uint64_t, -+ x0 = svqdecb_pat_n_u64 (x0, SV_POW2, 1), -+ x0 = svqdecb_pat (x0, SV_POW2, 1)) -+ -+/* -+** qdecb_pat_n_1_u64_untied: -+** mov x0, x1 -+** uqdecb x0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_1_u64_untied, uint64_t, -+ x0 = svqdecb_pat_n_u64 (x1, SV_POW2, 1), -+ x0 = svqdecb_pat (x1, SV_POW2, 1)) -+ -+/* -+** qdecb_pat_n_2_u64: -+** uqdecb x0, pow2, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_2_u64, uint64_t, -+ x0 = svqdecb_pat_n_u64 (x0, SV_POW2, 2), -+ x0 = svqdecb_pat (x0, SV_POW2, 2)) -+ -+/* -+** qdecb_pat_n_7_u64: -+** uqdecb x0, pow2, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_7_u64, uint64_t, -+ x0 = svqdecb_pat_n_u64 (x0, SV_POW2, 7), -+ x0 = svqdecb_pat (x0, SV_POW2, 7)) -+ -+/* -+** qdecb_pat_n_15_u64: -+** uqdecb x0, pow2, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_15_u64, uint64_t, -+ x0 = svqdecb_pat_n_u64 (x0, SV_POW2, 15), -+ x0 = svqdecb_pat (x0, SV_POW2, 15)) -+ -+/* -+** qdecb_pat_n_16_u64: -+** uqdecb x0, pow2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_16_u64, uint64_t, -+ x0 = svqdecb_pat_n_u64 (x0, SV_POW2, 16), -+ x0 = svqdecb_pat (x0, SV_POW2, 16)) -+ -+/* -+** qdecb_pat_n_vl1_u64: -+** uqdecb x0, vl1, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl1_u64, uint64_t, -+ x0 = svqdecb_pat_n_u64 (x0, SV_VL1, 16), -+ x0 = svqdecb_pat (x0, SV_VL1, 16)) -+ -+/* -+** qdecb_pat_n_vl2_u64: -+** uqdecb x0, vl2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl2_u64, uint64_t, -+ x0 = svqdecb_pat_n_u64 (x0, SV_VL2, 16), -+ x0 = svqdecb_pat (x0, SV_VL2, 16)) -+ -+/* -+** qdecb_pat_n_vl3_u64: -+** uqdecb x0, vl3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl3_u64, uint64_t, -+ x0 = svqdecb_pat_n_u64 (x0, SV_VL3, 16), -+ x0 = svqdecb_pat (x0, SV_VL3, 16)) -+ -+/* -+** qdecb_pat_n_vl4_u64: -+** uqdecb x0, vl4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl4_u64, uint64_t, -+ x0 = svqdecb_pat_n_u64 (x0, SV_VL4, 16), -+ x0 = svqdecb_pat (x0, SV_VL4, 16)) -+ -+/* -+** qdecb_pat_n_vl5_u64: -+** uqdecb x0, vl5, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl5_u64, uint64_t, -+ x0 = svqdecb_pat_n_u64 (x0, SV_VL5, 16), -+ x0 = svqdecb_pat (x0, SV_VL5, 16)) -+ -+/* -+** qdecb_pat_n_vl6_u64: -+** uqdecb x0, vl6, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl6_u64, uint64_t, -+ x0 = svqdecb_pat_n_u64 (x0, SV_VL6, 16), -+ x0 = svqdecb_pat (x0, SV_VL6, 16)) -+ -+/* -+** qdecb_pat_n_vl7_u64: -+** uqdecb x0, vl7, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl7_u64, uint64_t, -+ x0 = svqdecb_pat_n_u64 (x0, SV_VL7, 16), -+ x0 = svqdecb_pat (x0, SV_VL7, 16)) -+ -+/* -+** qdecb_pat_n_vl8_u64: -+** uqdecb x0, vl8, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl8_u64, uint64_t, -+ x0 = svqdecb_pat_n_u64 (x0, SV_VL8, 16), -+ x0 = svqdecb_pat (x0, SV_VL8, 16)) -+ -+/* -+** qdecb_pat_n_vl16_u64: -+** uqdecb x0, vl16, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl16_u64, uint64_t, -+ x0 = svqdecb_pat_n_u64 (x0, SV_VL16, 16), -+ x0 = svqdecb_pat (x0, SV_VL16, 16)) -+ -+/* -+** qdecb_pat_n_vl32_u64: -+** uqdecb x0, vl32, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl32_u64, uint64_t, -+ x0 = svqdecb_pat_n_u64 (x0, SV_VL32, 16), -+ x0 = svqdecb_pat (x0, SV_VL32, 16)) -+ -+/* -+** qdecb_pat_n_vl64_u64: -+** uqdecb x0, vl64, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl64_u64, uint64_t, -+ x0 = svqdecb_pat_n_u64 (x0, SV_VL64, 16), -+ x0 = svqdecb_pat (x0, SV_VL64, 16)) -+ -+/* -+** qdecb_pat_n_vl128_u64: -+** uqdecb x0, vl128, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl128_u64, uint64_t, -+ x0 = svqdecb_pat_n_u64 (x0, SV_VL128, 16), -+ x0 = svqdecb_pat (x0, SV_VL128, 16)) -+ -+/* -+** qdecb_pat_n_vl256_u64: -+** uqdecb x0, vl256, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_vl256_u64, uint64_t, -+ x0 = svqdecb_pat_n_u64 (x0, SV_VL256, 16), -+ x0 = svqdecb_pat (x0, SV_VL256, 16)) -+ -+/* -+** qdecb_pat_n_mul4_u64: -+** uqdecb x0, mul4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_mul4_u64, uint64_t, -+ x0 = svqdecb_pat_n_u64 (x0, SV_MUL4, 16), -+ x0 = svqdecb_pat (x0, SV_MUL4, 16)) -+ -+/* -+** qdecb_pat_n_mul3_u64: -+** uqdecb x0, mul3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_mul3_u64, uint64_t, -+ x0 = svqdecb_pat_n_u64 (x0, SV_MUL3, 16), -+ x0 = svqdecb_pat (x0, SV_MUL3, 16)) -+ -+/* -+** qdecb_pat_n_all_u64: -+** uqdecb x0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_pat_n_all_u64, uint64_t, -+ x0 = svqdecb_pat_n_u64 (x0, SV_ALL, 16), -+ x0 = svqdecb_pat (x0, SV_ALL, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecb_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecb_s32.c -new file mode 100644 -index 000000000..11180654e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecb_s32.c -@@ -0,0 +1,58 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qdecb_n_1_s32_tied: -+** sqdecb x0, w0 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_n_1_s32_tied, int32_t, -+ x0 = svqdecb_n_s32 (x0, 1), -+ x0 = svqdecb (x0, 1)) -+ -+/* -+** qdecb_n_1_s32_untied: -+** mov w0, w1 -+** sqdecb x0, w0 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_n_1_s32_untied, int32_t, -+ x0 = svqdecb_n_s32 (x1, 1), -+ x0 = svqdecb (x1, 1)) -+ -+/* -+** qdecb_n_2_s32: -+** sqdecb x0, w0, all, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_n_2_s32, int32_t, -+ x0 = svqdecb_n_s32 (x0, 2), -+ x0 = svqdecb (x0, 2)) -+ -+/* -+** qdecb_n_7_s32: -+** sqdecb x0, w0, all, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_n_7_s32, int32_t, -+ x0 = svqdecb_n_s32 (x0, 7), -+ x0 = svqdecb (x0, 7)) -+ -+/* -+** qdecb_n_15_s32: -+** sqdecb x0, w0, all, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_n_15_s32, int32_t, -+ x0 = svqdecb_n_s32 (x0, 15), -+ x0 = svqdecb (x0, 15)) -+ -+/* -+** qdecb_n_16_s32: -+** sqdecb x0, w0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_n_16_s32, int32_t, -+ x0 = svqdecb_n_s32 (x0, 16), -+ x0 = svqdecb (x0, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecb_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecb_s64.c -new file mode 100644 -index 000000000..17b765655 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecb_s64.c -@@ -0,0 +1,58 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qdecb_n_1_s64_tied: -+** sqdecb x0 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_n_1_s64_tied, int64_t, -+ x0 = svqdecb_n_s64 (x0, 1), -+ x0 = svqdecb (x0, 1)) -+ -+/* -+** qdecb_n_1_s64_untied: -+** mov x0, x1 -+** sqdecb x0 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_n_1_s64_untied, int64_t, -+ x0 = svqdecb_n_s64 (x1, 1), -+ x0 = svqdecb (x1, 1)) -+ -+/* -+** qdecb_n_2_s64: -+** sqdecb x0, all, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_n_2_s64, int64_t, -+ x0 = svqdecb_n_s64 (x0, 2), -+ x0 = svqdecb (x0, 2)) -+ -+/* -+** qdecb_n_7_s64: -+** sqdecb x0, all, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_n_7_s64, int64_t, -+ x0 = svqdecb_n_s64 (x0, 7), -+ x0 = svqdecb (x0, 7)) -+ -+/* -+** qdecb_n_15_s64: -+** sqdecb x0, all, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_n_15_s64, int64_t, -+ x0 = svqdecb_n_s64 (x0, 15), -+ x0 = svqdecb (x0, 15)) -+ -+/* -+** qdecb_n_16_s64: -+** sqdecb x0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_n_16_s64, int64_t, -+ x0 = svqdecb_n_s64 (x0, 16), -+ x0 = svqdecb (x0, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecb_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecb_u32.c -new file mode 100644 -index 000000000..b31e04de5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecb_u32.c -@@ -0,0 +1,58 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qdecb_n_1_u32_tied: -+** uqdecb w0 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_n_1_u32_tied, uint32_t, -+ x0 = svqdecb_n_u32 (x0, 1), -+ x0 = svqdecb (x0, 1)) -+ -+/* -+** qdecb_n_1_u32_untied: -+** mov w0, w1 -+** uqdecb w0 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_n_1_u32_untied, uint32_t, -+ x0 = svqdecb_n_u32 (x1, 1), -+ x0 = svqdecb (x1, 1)) -+ -+/* -+** qdecb_n_2_u32: -+** uqdecb w0, all, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_n_2_u32, uint32_t, -+ x0 = svqdecb_n_u32 (x0, 2), -+ x0 = svqdecb (x0, 2)) -+ -+/* -+** qdecb_n_7_u32: -+** uqdecb w0, all, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_n_7_u32, uint32_t, -+ x0 = svqdecb_n_u32 (x0, 7), -+ x0 = svqdecb (x0, 7)) -+ -+/* -+** qdecb_n_15_u32: -+** uqdecb w0, all, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_n_15_u32, uint32_t, -+ x0 = svqdecb_n_u32 (x0, 15), -+ x0 = svqdecb (x0, 15)) -+ -+/* -+** qdecb_n_16_u32: -+** uqdecb w0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_n_16_u32, uint32_t, -+ x0 = svqdecb_n_u32 (x0, 16), -+ x0 = svqdecb (x0, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecb_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecb_u64.c -new file mode 100644 -index 000000000..aab6faba9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecb_u64.c -@@ -0,0 +1,58 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qdecb_n_1_u64_tied: -+** uqdecb x0 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_n_1_u64_tied, uint64_t, -+ x0 = svqdecb_n_u64 (x0, 1), -+ x0 = svqdecb (x0, 1)) -+ -+/* -+** qdecb_n_1_u64_untied: -+** mov x0, x1 -+** uqdecb x0 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_n_1_u64_untied, uint64_t, -+ x0 = svqdecb_n_u64 (x1, 1), -+ x0 = svqdecb (x1, 1)) -+ -+/* -+** qdecb_n_2_u64: -+** uqdecb x0, all, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_n_2_u64, uint64_t, -+ x0 = svqdecb_n_u64 (x0, 2), -+ x0 = svqdecb (x0, 2)) -+ -+/* -+** qdecb_n_7_u64: -+** uqdecb x0, all, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_n_7_u64, uint64_t, -+ x0 = svqdecb_n_u64 (x0, 7), -+ x0 = svqdecb (x0, 7)) -+ -+/* -+** qdecb_n_15_u64: -+** uqdecb x0, all, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_n_15_u64, uint64_t, -+ x0 = svqdecb_n_u64 (x0, 15), -+ x0 = svqdecb (x0, 15)) -+ -+/* -+** qdecb_n_16_u64: -+** uqdecb x0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecb_n_16_u64, uint64_t, -+ x0 = svqdecb_n_u64 (x0, 16), -+ x0 = svqdecb (x0, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecd_pat_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecd_pat_s32.c -new file mode 100644 -index 000000000..bc491d397 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecd_pat_s32.c -@@ -0,0 +1,202 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qdecd_pat_n_1_s32_tied: -+** sqdecd x0, w0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_1_s32_tied, int32_t, -+ x0 = svqdecd_pat_n_s32 (x0, SV_POW2, 1), -+ x0 = svqdecd_pat (x0, SV_POW2, 1)) -+ -+/* -+** qdecd_pat_n_1_s32_untied: -+** mov w0, w1 -+** sqdecd x0, w0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_1_s32_untied, int32_t, -+ x0 = svqdecd_pat_n_s32 (x1, SV_POW2, 1), -+ x0 = svqdecd_pat (x1, SV_POW2, 1)) -+ -+/* -+** qdecd_pat_n_2_s32: -+** sqdecd x0, w0, pow2, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_2_s32, int32_t, -+ x0 = svqdecd_pat_n_s32 (x0, SV_POW2, 2), -+ x0 = svqdecd_pat (x0, SV_POW2, 2)) -+ -+/* -+** qdecd_pat_n_7_s32: -+** sqdecd x0, w0, pow2, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_7_s32, int32_t, -+ x0 = svqdecd_pat_n_s32 (x0, SV_POW2, 7), -+ x0 = svqdecd_pat (x0, SV_POW2, 7)) -+ -+/* -+** qdecd_pat_n_15_s32: -+** sqdecd x0, w0, pow2, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_15_s32, int32_t, -+ x0 = svqdecd_pat_n_s32 (x0, SV_POW2, 15), -+ x0 = svqdecd_pat (x0, SV_POW2, 15)) -+ -+/* -+** qdecd_pat_n_16_s32: -+** sqdecd x0, w0, pow2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_16_s32, int32_t, -+ x0 = svqdecd_pat_n_s32 (x0, SV_POW2, 16), -+ x0 = svqdecd_pat (x0, SV_POW2, 16)) -+ -+/* -+** qdecd_pat_n_vl1_s32: -+** sqdecd x0, w0, vl1, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl1_s32, int32_t, -+ x0 = svqdecd_pat_n_s32 (x0, SV_VL1, 16), -+ x0 = svqdecd_pat (x0, SV_VL1, 16)) -+ -+/* -+** qdecd_pat_n_vl2_s32: -+** sqdecd x0, w0, vl2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl2_s32, int32_t, -+ x0 = svqdecd_pat_n_s32 (x0, SV_VL2, 16), -+ x0 = svqdecd_pat (x0, SV_VL2, 16)) -+ -+/* -+** qdecd_pat_n_vl3_s32: -+** sqdecd x0, w0, vl3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl3_s32, int32_t, -+ x0 = svqdecd_pat_n_s32 (x0, SV_VL3, 16), -+ x0 = svqdecd_pat (x0, SV_VL3, 16)) -+ -+/* -+** qdecd_pat_n_vl4_s32: -+** sqdecd x0, w0, vl4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl4_s32, int32_t, -+ x0 = svqdecd_pat_n_s32 (x0, SV_VL4, 16), -+ x0 = svqdecd_pat (x0, SV_VL4, 16)) -+ -+/* -+** qdecd_pat_n_vl5_s32: -+** sqdecd x0, w0, vl5, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl5_s32, int32_t, -+ x0 = svqdecd_pat_n_s32 (x0, SV_VL5, 16), -+ x0 = svqdecd_pat (x0, SV_VL5, 16)) -+ -+/* -+** qdecd_pat_n_vl6_s32: -+** sqdecd x0, w0, vl6, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl6_s32, int32_t, -+ x0 = svqdecd_pat_n_s32 (x0, SV_VL6, 16), -+ x0 = svqdecd_pat (x0, SV_VL6, 16)) -+ -+/* -+** qdecd_pat_n_vl7_s32: -+** sqdecd x0, w0, vl7, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl7_s32, int32_t, -+ x0 = svqdecd_pat_n_s32 (x0, SV_VL7, 16), -+ x0 = svqdecd_pat (x0, SV_VL7, 16)) -+ -+/* -+** qdecd_pat_n_vl8_s32: -+** sqdecd x0, w0, vl8, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl8_s32, int32_t, -+ x0 = svqdecd_pat_n_s32 (x0, SV_VL8, 16), -+ x0 = svqdecd_pat (x0, SV_VL8, 16)) -+ -+/* -+** qdecd_pat_n_vl16_s32: -+** sqdecd x0, w0, vl16, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl16_s32, int32_t, -+ x0 = svqdecd_pat_n_s32 (x0, SV_VL16, 16), -+ x0 = svqdecd_pat (x0, SV_VL16, 16)) -+ -+/* -+** qdecd_pat_n_vl32_s32: -+** sqdecd x0, w0, vl32, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl32_s32, int32_t, -+ x0 = svqdecd_pat_n_s32 (x0, SV_VL32, 16), -+ x0 = svqdecd_pat (x0, SV_VL32, 16)) -+ -+/* -+** qdecd_pat_n_vl64_s32: -+** sqdecd x0, w0, vl64, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl64_s32, int32_t, -+ x0 = svqdecd_pat_n_s32 (x0, SV_VL64, 16), -+ x0 = svqdecd_pat (x0, SV_VL64, 16)) -+ -+/* -+** qdecd_pat_n_vl128_s32: -+** sqdecd x0, w0, vl128, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl128_s32, int32_t, -+ x0 = svqdecd_pat_n_s32 (x0, SV_VL128, 16), -+ x0 = svqdecd_pat (x0, SV_VL128, 16)) -+ -+/* -+** qdecd_pat_n_vl256_s32: -+** sqdecd x0, w0, vl256, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl256_s32, int32_t, -+ x0 = svqdecd_pat_n_s32 (x0, SV_VL256, 16), -+ x0 = svqdecd_pat (x0, SV_VL256, 16)) -+ -+/* -+** qdecd_pat_n_mul4_s32: -+** sqdecd x0, w0, mul4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_mul4_s32, int32_t, -+ x0 = svqdecd_pat_n_s32 (x0, SV_MUL4, 16), -+ x0 = svqdecd_pat (x0, SV_MUL4, 16)) -+ -+/* -+** qdecd_pat_n_mul3_s32: -+** sqdecd x0, w0, mul3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_mul3_s32, int32_t, -+ x0 = svqdecd_pat_n_s32 (x0, SV_MUL3, 16), -+ x0 = svqdecd_pat (x0, SV_MUL3, 16)) -+ -+/* -+** qdecd_pat_n_all_s32: -+** sqdecd x0, w0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_all_s32, int32_t, -+ x0 = svqdecd_pat_n_s32 (x0, SV_ALL, 16), -+ x0 = svqdecd_pat (x0, SV_ALL, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecd_pat_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecd_pat_s64.c -new file mode 100644 -index 000000000..3970ff058 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecd_pat_s64.c -@@ -0,0 +1,401 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qdecd_pat_1_s64_tied: -+** sqdecd z0\.d, pow2 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_pat_1_s64_tied, svint64_t, -+ z0 = svqdecd_pat_s64 (z0, SV_POW2, 1), -+ z0 = svqdecd_pat (z0, SV_POW2, 1)) -+ -+/* -+** qdecd_pat_1_s64_untied: -+** movprfx z0, z1 -+** sqdecd z0\.d, pow2 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_pat_1_s64_untied, svint64_t, -+ z0 = svqdecd_pat_s64 (z1, SV_POW2, 1), -+ z0 = svqdecd_pat (z1, SV_POW2, 1)) -+ -+/* -+** qdecd_pat_2_s64: -+** sqdecd z0\.d, pow2, mul #2 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_pat_2_s64, svint64_t, -+ z0 = svqdecd_pat_s64 (z0, SV_POW2, 2), -+ z0 = svqdecd_pat (z0, SV_POW2, 2)) -+ -+/* -+** qdecd_pat_7_s64: -+** sqdecd z0\.d, pow2, mul #7 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_pat_7_s64, svint64_t, -+ z0 = svqdecd_pat_s64 (z0, SV_POW2, 7), -+ z0 = svqdecd_pat (z0, SV_POW2, 7)) -+ -+/* -+** qdecd_pat_15_s64: -+** sqdecd z0\.d, pow2, mul #15 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_pat_15_s64, svint64_t, -+ z0 = svqdecd_pat_s64 (z0, SV_POW2, 15), -+ z0 = svqdecd_pat (z0, SV_POW2, 15)) -+ -+/* -+** qdecd_pat_16_s64: -+** sqdecd z0\.d, pow2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_pat_16_s64, svint64_t, -+ z0 = svqdecd_pat_s64 (z0, SV_POW2, 16), -+ z0 = svqdecd_pat (z0, SV_POW2, 16)) -+ -+/* -+** qdecd_pat_vl1_s64: -+** sqdecd z0\.d, vl1, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_pat_vl1_s64, svint64_t, -+ z0 = svqdecd_pat_s64 (z0, SV_VL1, 16), -+ z0 = svqdecd_pat (z0, SV_VL1, 16)) -+ -+/* -+** qdecd_pat_vl2_s64: -+** sqdecd z0\.d, vl2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_pat_vl2_s64, svint64_t, -+ z0 = svqdecd_pat_s64 (z0, SV_VL2, 16), -+ z0 = svqdecd_pat (z0, SV_VL2, 16)) -+ -+/* -+** qdecd_pat_vl3_s64: -+** sqdecd z0\.d, vl3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_pat_vl3_s64, svint64_t, -+ z0 = svqdecd_pat_s64 (z0, SV_VL3, 16), -+ z0 = svqdecd_pat (z0, SV_VL3, 16)) -+ -+/* -+** qdecd_pat_vl4_s64: -+** sqdecd z0\.d, vl4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_pat_vl4_s64, svint64_t, -+ z0 = svqdecd_pat_s64 (z0, SV_VL4, 16), -+ z0 = svqdecd_pat (z0, SV_VL4, 16)) -+ -+/* -+** qdecd_pat_vl5_s64: -+** sqdecd z0\.d, vl5, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_pat_vl5_s64, svint64_t, -+ z0 = svqdecd_pat_s64 (z0, SV_VL5, 16), -+ z0 = svqdecd_pat (z0, SV_VL5, 16)) -+ -+/* -+** qdecd_pat_vl6_s64: -+** sqdecd z0\.d, vl6, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_pat_vl6_s64, svint64_t, -+ z0 = svqdecd_pat_s64 (z0, SV_VL6, 16), -+ z0 = svqdecd_pat (z0, SV_VL6, 16)) -+ -+/* -+** qdecd_pat_vl7_s64: -+** sqdecd z0\.d, vl7, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_pat_vl7_s64, svint64_t, -+ z0 = svqdecd_pat_s64 (z0, SV_VL7, 16), -+ z0 = svqdecd_pat (z0, SV_VL7, 16)) -+ -+/* -+** qdecd_pat_vl8_s64: -+** sqdecd z0\.d, vl8, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_pat_vl8_s64, svint64_t, -+ z0 = svqdecd_pat_s64 (z0, SV_VL8, 16), -+ z0 = svqdecd_pat (z0, SV_VL8, 16)) -+ -+/* -+** qdecd_pat_vl16_s64: -+** sqdecd z0\.d, vl16, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_pat_vl16_s64, svint64_t, -+ z0 = svqdecd_pat_s64 (z0, SV_VL16, 16), -+ z0 = svqdecd_pat (z0, SV_VL16, 16)) -+ -+/* -+** qdecd_pat_vl32_s64: -+** sqdecd z0\.d, vl32, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_pat_vl32_s64, svint64_t, -+ z0 = svqdecd_pat_s64 (z0, SV_VL32, 16), -+ z0 = svqdecd_pat (z0, SV_VL32, 16)) -+ -+/* -+** qdecd_pat_vl64_s64: -+** sqdecd z0\.d, vl64, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_pat_vl64_s64, svint64_t, -+ z0 = svqdecd_pat_s64 (z0, SV_VL64, 16), -+ z0 = svqdecd_pat (z0, SV_VL64, 16)) -+ -+/* -+** qdecd_pat_vl128_s64: -+** sqdecd z0\.d, vl128, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_pat_vl128_s64, svint64_t, -+ z0 = svqdecd_pat_s64 (z0, SV_VL128, 16), -+ z0 = svqdecd_pat (z0, SV_VL128, 16)) -+ -+/* -+** qdecd_pat_vl256_s64: -+** sqdecd z0\.d, vl256, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_pat_vl256_s64, svint64_t, -+ z0 = svqdecd_pat_s64 (z0, SV_VL256, 16), -+ z0 = svqdecd_pat (z0, SV_VL256, 16)) -+ -+/* -+** qdecd_pat_mul4_s64: -+** sqdecd z0\.d, mul4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_pat_mul4_s64, svint64_t, -+ z0 = svqdecd_pat_s64 (z0, SV_MUL4, 16), -+ z0 = svqdecd_pat (z0, SV_MUL4, 16)) -+ -+/* -+** qdecd_pat_mul3_s64: -+** sqdecd z0\.d, mul3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_pat_mul3_s64, svint64_t, -+ z0 = svqdecd_pat_s64 (z0, SV_MUL3, 16), -+ z0 = svqdecd_pat (z0, SV_MUL3, 16)) -+ -+/* -+** qdecd_pat_all_s64: -+** sqdecd z0\.d, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_pat_all_s64, svint64_t, -+ z0 = svqdecd_pat_s64 (z0, SV_ALL, 16), -+ z0 = svqdecd_pat (z0, SV_ALL, 16)) -+ -+/* -+** qdecd_pat_n_1_s64_tied: -+** sqdecd x0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_1_s64_tied, int64_t, -+ x0 = svqdecd_pat_n_s64 (x0, SV_POW2, 1), -+ x0 = svqdecd_pat (x0, SV_POW2, 1)) -+ -+/* -+** qdecd_pat_n_1_s64_untied: -+** mov x0, x1 -+** sqdecd x0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_1_s64_untied, int64_t, -+ x0 = svqdecd_pat_n_s64 (x1, SV_POW2, 1), -+ x0 = svqdecd_pat (x1, SV_POW2, 1)) -+ -+/* -+** qdecd_pat_n_2_s64: -+** sqdecd x0, pow2, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_2_s64, int64_t, -+ x0 = svqdecd_pat_n_s64 (x0, SV_POW2, 2), -+ x0 = svqdecd_pat (x0, SV_POW2, 2)) -+ -+/* -+** qdecd_pat_n_7_s64: -+** sqdecd x0, pow2, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_7_s64, int64_t, -+ x0 = svqdecd_pat_n_s64 (x0, SV_POW2, 7), -+ x0 = svqdecd_pat (x0, SV_POW2, 7)) -+ -+/* -+** qdecd_pat_n_15_s64: -+** sqdecd x0, pow2, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_15_s64, int64_t, -+ x0 = svqdecd_pat_n_s64 (x0, SV_POW2, 15), -+ x0 = svqdecd_pat (x0, SV_POW2, 15)) -+ -+/* -+** qdecd_pat_n_16_s64: -+** sqdecd x0, pow2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_16_s64, int64_t, -+ x0 = svqdecd_pat_n_s64 (x0, SV_POW2, 16), -+ x0 = svqdecd_pat (x0, SV_POW2, 16)) -+ -+/* -+** qdecd_pat_n_vl1_s64: -+** sqdecd x0, vl1, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl1_s64, int64_t, -+ x0 = svqdecd_pat_n_s64 (x0, SV_VL1, 16), -+ x0 = svqdecd_pat (x0, SV_VL1, 16)) -+ -+/* -+** qdecd_pat_n_vl2_s64: -+** sqdecd x0, vl2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl2_s64, int64_t, -+ x0 = svqdecd_pat_n_s64 (x0, SV_VL2, 16), -+ x0 = svqdecd_pat (x0, SV_VL2, 16)) -+ -+/* -+** qdecd_pat_n_vl3_s64: -+** sqdecd x0, vl3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl3_s64, int64_t, -+ x0 = svqdecd_pat_n_s64 (x0, SV_VL3, 16), -+ x0 = svqdecd_pat (x0, SV_VL3, 16)) -+ -+/* -+** qdecd_pat_n_vl4_s64: -+** sqdecd x0, vl4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl4_s64, int64_t, -+ x0 = svqdecd_pat_n_s64 (x0, SV_VL4, 16), -+ x0 = svqdecd_pat (x0, SV_VL4, 16)) -+ -+/* -+** qdecd_pat_n_vl5_s64: -+** sqdecd x0, vl5, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl5_s64, int64_t, -+ x0 = svqdecd_pat_n_s64 (x0, SV_VL5, 16), -+ x0 = svqdecd_pat (x0, SV_VL5, 16)) -+ -+/* -+** qdecd_pat_n_vl6_s64: -+** sqdecd x0, vl6, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl6_s64, int64_t, -+ x0 = svqdecd_pat_n_s64 (x0, SV_VL6, 16), -+ x0 = svqdecd_pat (x0, SV_VL6, 16)) -+ -+/* -+** qdecd_pat_n_vl7_s64: -+** sqdecd x0, vl7, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl7_s64, int64_t, -+ x0 = svqdecd_pat_n_s64 (x0, SV_VL7, 16), -+ x0 = svqdecd_pat (x0, SV_VL7, 16)) -+ -+/* -+** qdecd_pat_n_vl8_s64: -+** sqdecd x0, vl8, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl8_s64, int64_t, -+ x0 = svqdecd_pat_n_s64 (x0, SV_VL8, 16), -+ x0 = svqdecd_pat (x0, SV_VL8, 16)) -+ -+/* -+** qdecd_pat_n_vl16_s64: -+** sqdecd x0, vl16, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl16_s64, int64_t, -+ x0 = svqdecd_pat_n_s64 (x0, SV_VL16, 16), -+ x0 = svqdecd_pat (x0, SV_VL16, 16)) -+ -+/* -+** qdecd_pat_n_vl32_s64: -+** sqdecd x0, vl32, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl32_s64, int64_t, -+ x0 = svqdecd_pat_n_s64 (x0, SV_VL32, 16), -+ x0 = svqdecd_pat (x0, SV_VL32, 16)) -+ -+/* -+** qdecd_pat_n_vl64_s64: -+** sqdecd x0, vl64, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl64_s64, int64_t, -+ x0 = svqdecd_pat_n_s64 (x0, SV_VL64, 16), -+ x0 = svqdecd_pat (x0, SV_VL64, 16)) -+ -+/* -+** qdecd_pat_n_vl128_s64: -+** sqdecd x0, vl128, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl128_s64, int64_t, -+ x0 = svqdecd_pat_n_s64 (x0, SV_VL128, 16), -+ x0 = svqdecd_pat (x0, SV_VL128, 16)) -+ -+/* -+** qdecd_pat_n_vl256_s64: -+** sqdecd x0, vl256, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl256_s64, int64_t, -+ x0 = svqdecd_pat_n_s64 (x0, SV_VL256, 16), -+ x0 = svqdecd_pat (x0, SV_VL256, 16)) -+ -+/* -+** qdecd_pat_n_mul4_s64: -+** sqdecd x0, mul4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_mul4_s64, int64_t, -+ x0 = svqdecd_pat_n_s64 (x0, SV_MUL4, 16), -+ x0 = svqdecd_pat (x0, SV_MUL4, 16)) -+ -+/* -+** qdecd_pat_n_mul3_s64: -+** sqdecd x0, mul3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_mul3_s64, int64_t, -+ x0 = svqdecd_pat_n_s64 (x0, SV_MUL3, 16), -+ x0 = svqdecd_pat (x0, SV_MUL3, 16)) -+ -+/* -+** qdecd_pat_n_all_s64: -+** sqdecd x0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_all_s64, int64_t, -+ x0 = svqdecd_pat_n_s64 (x0, SV_ALL, 16), -+ x0 = svqdecd_pat (x0, SV_ALL, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecd_pat_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecd_pat_u32.c -new file mode 100644 -index 000000000..b33e402f2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecd_pat_u32.c -@@ -0,0 +1,202 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qdecd_pat_n_1_u32_tied: -+** uqdecd w0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_1_u32_tied, uint32_t, -+ x0 = svqdecd_pat_n_u32 (x0, SV_POW2, 1), -+ x0 = svqdecd_pat (x0, SV_POW2, 1)) -+ -+/* -+** qdecd_pat_n_1_u32_untied: -+** mov w0, w1 -+** uqdecd w0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_1_u32_untied, uint32_t, -+ x0 = svqdecd_pat_n_u32 (x1, SV_POW2, 1), -+ x0 = svqdecd_pat (x1, SV_POW2, 1)) -+ -+/* -+** qdecd_pat_n_2_u32: -+** uqdecd w0, pow2, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_2_u32, uint32_t, -+ x0 = svqdecd_pat_n_u32 (x0, SV_POW2, 2), -+ x0 = svqdecd_pat (x0, SV_POW2, 2)) -+ -+/* -+** qdecd_pat_n_7_u32: -+** uqdecd w0, pow2, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_7_u32, uint32_t, -+ x0 = svqdecd_pat_n_u32 (x0, SV_POW2, 7), -+ x0 = svqdecd_pat (x0, SV_POW2, 7)) -+ -+/* -+** qdecd_pat_n_15_u32: -+** uqdecd w0, pow2, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_15_u32, uint32_t, -+ x0 = svqdecd_pat_n_u32 (x0, SV_POW2, 15), -+ x0 = svqdecd_pat (x0, SV_POW2, 15)) -+ -+/* -+** qdecd_pat_n_16_u32: -+** uqdecd w0, pow2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_16_u32, uint32_t, -+ x0 = svqdecd_pat_n_u32 (x0, SV_POW2, 16), -+ x0 = svqdecd_pat (x0, SV_POW2, 16)) -+ -+/* -+** qdecd_pat_n_vl1_u32: -+** uqdecd w0, vl1, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl1_u32, uint32_t, -+ x0 = svqdecd_pat_n_u32 (x0, SV_VL1, 16), -+ x0 = svqdecd_pat (x0, SV_VL1, 16)) -+ -+/* -+** qdecd_pat_n_vl2_u32: -+** uqdecd w0, vl2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl2_u32, uint32_t, -+ x0 = svqdecd_pat_n_u32 (x0, SV_VL2, 16), -+ x0 = svqdecd_pat (x0, SV_VL2, 16)) -+ -+/* -+** qdecd_pat_n_vl3_u32: -+** uqdecd w0, vl3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl3_u32, uint32_t, -+ x0 = svqdecd_pat_n_u32 (x0, SV_VL3, 16), -+ x0 = svqdecd_pat (x0, SV_VL3, 16)) -+ -+/* -+** qdecd_pat_n_vl4_u32: -+** uqdecd w0, vl4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl4_u32, uint32_t, -+ x0 = svqdecd_pat_n_u32 (x0, SV_VL4, 16), -+ x0 = svqdecd_pat (x0, SV_VL4, 16)) -+ -+/* -+** qdecd_pat_n_vl5_u32: -+** uqdecd w0, vl5, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl5_u32, uint32_t, -+ x0 = svqdecd_pat_n_u32 (x0, SV_VL5, 16), -+ x0 = svqdecd_pat (x0, SV_VL5, 16)) -+ -+/* -+** qdecd_pat_n_vl6_u32: -+** uqdecd w0, vl6, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl6_u32, uint32_t, -+ x0 = svqdecd_pat_n_u32 (x0, SV_VL6, 16), -+ x0 = svqdecd_pat (x0, SV_VL6, 16)) -+ -+/* -+** qdecd_pat_n_vl7_u32: -+** uqdecd w0, vl7, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl7_u32, uint32_t, -+ x0 = svqdecd_pat_n_u32 (x0, SV_VL7, 16), -+ x0 = svqdecd_pat (x0, SV_VL7, 16)) -+ -+/* -+** qdecd_pat_n_vl8_u32: -+** uqdecd w0, vl8, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl8_u32, uint32_t, -+ x0 = svqdecd_pat_n_u32 (x0, SV_VL8, 16), -+ x0 = svqdecd_pat (x0, SV_VL8, 16)) -+ -+/* -+** qdecd_pat_n_vl16_u32: -+** uqdecd w0, vl16, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl16_u32, uint32_t, -+ x0 = svqdecd_pat_n_u32 (x0, SV_VL16, 16), -+ x0 = svqdecd_pat (x0, SV_VL16, 16)) -+ -+/* -+** qdecd_pat_n_vl32_u32: -+** uqdecd w0, vl32, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl32_u32, uint32_t, -+ x0 = svqdecd_pat_n_u32 (x0, SV_VL32, 16), -+ x0 = svqdecd_pat (x0, SV_VL32, 16)) -+ -+/* -+** qdecd_pat_n_vl64_u32: -+** uqdecd w0, vl64, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl64_u32, uint32_t, -+ x0 = svqdecd_pat_n_u32 (x0, SV_VL64, 16), -+ x0 = svqdecd_pat (x0, SV_VL64, 16)) -+ -+/* -+** qdecd_pat_n_vl128_u32: -+** uqdecd w0, vl128, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl128_u32, uint32_t, -+ x0 = svqdecd_pat_n_u32 (x0, SV_VL128, 16), -+ x0 = svqdecd_pat (x0, SV_VL128, 16)) -+ -+/* -+** qdecd_pat_n_vl256_u32: -+** uqdecd w0, vl256, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl256_u32, uint32_t, -+ x0 = svqdecd_pat_n_u32 (x0, SV_VL256, 16), -+ x0 = svqdecd_pat (x0, SV_VL256, 16)) -+ -+/* -+** qdecd_pat_n_mul4_u32: -+** uqdecd w0, mul4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_mul4_u32, uint32_t, -+ x0 = svqdecd_pat_n_u32 (x0, SV_MUL4, 16), -+ x0 = svqdecd_pat (x0, SV_MUL4, 16)) -+ -+/* -+** qdecd_pat_n_mul3_u32: -+** uqdecd w0, mul3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_mul3_u32, uint32_t, -+ x0 = svqdecd_pat_n_u32 (x0, SV_MUL3, 16), -+ x0 = svqdecd_pat (x0, SV_MUL3, 16)) -+ -+/* -+** qdecd_pat_n_all_u32: -+** uqdecd w0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_all_u32, uint32_t, -+ x0 = svqdecd_pat_n_u32 (x0, SV_ALL, 16), -+ x0 = svqdecd_pat (x0, SV_ALL, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecd_pat_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecd_pat_u64.c -new file mode 100644 -index 000000000..f0d1bd357 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecd_pat_u64.c -@@ -0,0 +1,401 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qdecd_pat_1_u64_tied: -+** uqdecd z0\.d, pow2 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_pat_1_u64_tied, svuint64_t, -+ z0 = svqdecd_pat_u64 (z0, SV_POW2, 1), -+ z0 = svqdecd_pat (z0, SV_POW2, 1)) -+ -+/* -+** qdecd_pat_1_u64_untied: -+** movprfx z0, z1 -+** uqdecd z0\.d, pow2 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_pat_1_u64_untied, svuint64_t, -+ z0 = svqdecd_pat_u64 (z1, SV_POW2, 1), -+ z0 = svqdecd_pat (z1, SV_POW2, 1)) -+ -+/* -+** qdecd_pat_2_u64: -+** uqdecd z0\.d, pow2, mul #2 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_pat_2_u64, svuint64_t, -+ z0 = svqdecd_pat_u64 (z0, SV_POW2, 2), -+ z0 = svqdecd_pat (z0, SV_POW2, 2)) -+ -+/* -+** qdecd_pat_7_u64: -+** uqdecd z0\.d, pow2, mul #7 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_pat_7_u64, svuint64_t, -+ z0 = svqdecd_pat_u64 (z0, SV_POW2, 7), -+ z0 = svqdecd_pat (z0, SV_POW2, 7)) -+ -+/* -+** qdecd_pat_15_u64: -+** uqdecd z0\.d, pow2, mul #15 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_pat_15_u64, svuint64_t, -+ z0 = svqdecd_pat_u64 (z0, SV_POW2, 15), -+ z0 = svqdecd_pat (z0, SV_POW2, 15)) -+ -+/* -+** qdecd_pat_16_u64: -+** uqdecd z0\.d, pow2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_pat_16_u64, svuint64_t, -+ z0 = svqdecd_pat_u64 (z0, SV_POW2, 16), -+ z0 = svqdecd_pat (z0, SV_POW2, 16)) -+ -+/* -+** qdecd_pat_vl1_u64: -+** uqdecd z0\.d, vl1, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_pat_vl1_u64, svuint64_t, -+ z0 = svqdecd_pat_u64 (z0, SV_VL1, 16), -+ z0 = svqdecd_pat (z0, SV_VL1, 16)) -+ -+/* -+** qdecd_pat_vl2_u64: -+** uqdecd z0\.d, vl2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_pat_vl2_u64, svuint64_t, -+ z0 = svqdecd_pat_u64 (z0, SV_VL2, 16), -+ z0 = svqdecd_pat (z0, SV_VL2, 16)) -+ -+/* -+** qdecd_pat_vl3_u64: -+** uqdecd z0\.d, vl3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_pat_vl3_u64, svuint64_t, -+ z0 = svqdecd_pat_u64 (z0, SV_VL3, 16), -+ z0 = svqdecd_pat (z0, SV_VL3, 16)) -+ -+/* -+** qdecd_pat_vl4_u64: -+** uqdecd z0\.d, vl4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_pat_vl4_u64, svuint64_t, -+ z0 = svqdecd_pat_u64 (z0, SV_VL4, 16), -+ z0 = svqdecd_pat (z0, SV_VL4, 16)) -+ -+/* -+** qdecd_pat_vl5_u64: -+** uqdecd z0\.d, vl5, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_pat_vl5_u64, svuint64_t, -+ z0 = svqdecd_pat_u64 (z0, SV_VL5, 16), -+ z0 = svqdecd_pat (z0, SV_VL5, 16)) -+ -+/* -+** qdecd_pat_vl6_u64: -+** uqdecd z0\.d, vl6, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_pat_vl6_u64, svuint64_t, -+ z0 = svqdecd_pat_u64 (z0, SV_VL6, 16), -+ z0 = svqdecd_pat (z0, SV_VL6, 16)) -+ -+/* -+** qdecd_pat_vl7_u64: -+** uqdecd z0\.d, vl7, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_pat_vl7_u64, svuint64_t, -+ z0 = svqdecd_pat_u64 (z0, SV_VL7, 16), -+ z0 = svqdecd_pat (z0, SV_VL7, 16)) -+ -+/* -+** qdecd_pat_vl8_u64: -+** uqdecd z0\.d, vl8, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_pat_vl8_u64, svuint64_t, -+ z0 = svqdecd_pat_u64 (z0, SV_VL8, 16), -+ z0 = svqdecd_pat (z0, SV_VL8, 16)) -+ -+/* -+** qdecd_pat_vl16_u64: -+** uqdecd z0\.d, vl16, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_pat_vl16_u64, svuint64_t, -+ z0 = svqdecd_pat_u64 (z0, SV_VL16, 16), -+ z0 = svqdecd_pat (z0, SV_VL16, 16)) -+ -+/* -+** qdecd_pat_vl32_u64: -+** uqdecd z0\.d, vl32, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_pat_vl32_u64, svuint64_t, -+ z0 = svqdecd_pat_u64 (z0, SV_VL32, 16), -+ z0 = svqdecd_pat (z0, SV_VL32, 16)) -+ -+/* -+** qdecd_pat_vl64_u64: -+** uqdecd z0\.d, vl64, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_pat_vl64_u64, svuint64_t, -+ z0 = svqdecd_pat_u64 (z0, SV_VL64, 16), -+ z0 = svqdecd_pat (z0, SV_VL64, 16)) -+ -+/* -+** qdecd_pat_vl128_u64: -+** uqdecd z0\.d, vl128, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_pat_vl128_u64, svuint64_t, -+ z0 = svqdecd_pat_u64 (z0, SV_VL128, 16), -+ z0 = svqdecd_pat (z0, SV_VL128, 16)) -+ -+/* -+** qdecd_pat_vl256_u64: -+** uqdecd z0\.d, vl256, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_pat_vl256_u64, svuint64_t, -+ z0 = svqdecd_pat_u64 (z0, SV_VL256, 16), -+ z0 = svqdecd_pat (z0, SV_VL256, 16)) -+ -+/* -+** qdecd_pat_mul4_u64: -+** uqdecd z0\.d, mul4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_pat_mul4_u64, svuint64_t, -+ z0 = svqdecd_pat_u64 (z0, SV_MUL4, 16), -+ z0 = svqdecd_pat (z0, SV_MUL4, 16)) -+ -+/* -+** qdecd_pat_mul3_u64: -+** uqdecd z0\.d, mul3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_pat_mul3_u64, svuint64_t, -+ z0 = svqdecd_pat_u64 (z0, SV_MUL3, 16), -+ z0 = svqdecd_pat (z0, SV_MUL3, 16)) -+ -+/* -+** qdecd_pat_all_u64: -+** uqdecd z0\.d, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_pat_all_u64, svuint64_t, -+ z0 = svqdecd_pat_u64 (z0, SV_ALL, 16), -+ z0 = svqdecd_pat (z0, SV_ALL, 16)) -+ -+/* -+** qdecd_pat_n_1_u64_tied: -+** uqdecd x0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_1_u64_tied, uint64_t, -+ x0 = svqdecd_pat_n_u64 (x0, SV_POW2, 1), -+ x0 = svqdecd_pat (x0, SV_POW2, 1)) -+ -+/* -+** qdecd_pat_n_1_u64_untied: -+** mov x0, x1 -+** uqdecd x0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_1_u64_untied, uint64_t, -+ x0 = svqdecd_pat_n_u64 (x1, SV_POW2, 1), -+ x0 = svqdecd_pat (x1, SV_POW2, 1)) -+ -+/* -+** qdecd_pat_n_2_u64: -+** uqdecd x0, pow2, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_2_u64, uint64_t, -+ x0 = svqdecd_pat_n_u64 (x0, SV_POW2, 2), -+ x0 = svqdecd_pat (x0, SV_POW2, 2)) -+ -+/* -+** qdecd_pat_n_7_u64: -+** uqdecd x0, pow2, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_7_u64, uint64_t, -+ x0 = svqdecd_pat_n_u64 (x0, SV_POW2, 7), -+ x0 = svqdecd_pat (x0, SV_POW2, 7)) -+ -+/* -+** qdecd_pat_n_15_u64: -+** uqdecd x0, pow2, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_15_u64, uint64_t, -+ x0 = svqdecd_pat_n_u64 (x0, SV_POW2, 15), -+ x0 = svqdecd_pat (x0, SV_POW2, 15)) -+ -+/* -+** qdecd_pat_n_16_u64: -+** uqdecd x0, pow2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_16_u64, uint64_t, -+ x0 = svqdecd_pat_n_u64 (x0, SV_POW2, 16), -+ x0 = svqdecd_pat (x0, SV_POW2, 16)) -+ -+/* -+** qdecd_pat_n_vl1_u64: -+** uqdecd x0, vl1, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl1_u64, uint64_t, -+ x0 = svqdecd_pat_n_u64 (x0, SV_VL1, 16), -+ x0 = svqdecd_pat (x0, SV_VL1, 16)) -+ -+/* -+** qdecd_pat_n_vl2_u64: -+** uqdecd x0, vl2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl2_u64, uint64_t, -+ x0 = svqdecd_pat_n_u64 (x0, SV_VL2, 16), -+ x0 = svqdecd_pat (x0, SV_VL2, 16)) -+ -+/* -+** qdecd_pat_n_vl3_u64: -+** uqdecd x0, vl3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl3_u64, uint64_t, -+ x0 = svqdecd_pat_n_u64 (x0, SV_VL3, 16), -+ x0 = svqdecd_pat (x0, SV_VL3, 16)) -+ -+/* -+** qdecd_pat_n_vl4_u64: -+** uqdecd x0, vl4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl4_u64, uint64_t, -+ x0 = svqdecd_pat_n_u64 (x0, SV_VL4, 16), -+ x0 = svqdecd_pat (x0, SV_VL4, 16)) -+ -+/* -+** qdecd_pat_n_vl5_u64: -+** uqdecd x0, vl5, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl5_u64, uint64_t, -+ x0 = svqdecd_pat_n_u64 (x0, SV_VL5, 16), -+ x0 = svqdecd_pat (x0, SV_VL5, 16)) -+ -+/* -+** qdecd_pat_n_vl6_u64: -+** uqdecd x0, vl6, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl6_u64, uint64_t, -+ x0 = svqdecd_pat_n_u64 (x0, SV_VL6, 16), -+ x0 = svqdecd_pat (x0, SV_VL6, 16)) -+ -+/* -+** qdecd_pat_n_vl7_u64: -+** uqdecd x0, vl7, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl7_u64, uint64_t, -+ x0 = svqdecd_pat_n_u64 (x0, SV_VL7, 16), -+ x0 = svqdecd_pat (x0, SV_VL7, 16)) -+ -+/* -+** qdecd_pat_n_vl8_u64: -+** uqdecd x0, vl8, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl8_u64, uint64_t, -+ x0 = svqdecd_pat_n_u64 (x0, SV_VL8, 16), -+ x0 = svqdecd_pat (x0, SV_VL8, 16)) -+ -+/* -+** qdecd_pat_n_vl16_u64: -+** uqdecd x0, vl16, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl16_u64, uint64_t, -+ x0 = svqdecd_pat_n_u64 (x0, SV_VL16, 16), -+ x0 = svqdecd_pat (x0, SV_VL16, 16)) -+ -+/* -+** qdecd_pat_n_vl32_u64: -+** uqdecd x0, vl32, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl32_u64, uint64_t, -+ x0 = svqdecd_pat_n_u64 (x0, SV_VL32, 16), -+ x0 = svqdecd_pat (x0, SV_VL32, 16)) -+ -+/* -+** qdecd_pat_n_vl64_u64: -+** uqdecd x0, vl64, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl64_u64, uint64_t, -+ x0 = svqdecd_pat_n_u64 (x0, SV_VL64, 16), -+ x0 = svqdecd_pat (x0, SV_VL64, 16)) -+ -+/* -+** qdecd_pat_n_vl128_u64: -+** uqdecd x0, vl128, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl128_u64, uint64_t, -+ x0 = svqdecd_pat_n_u64 (x0, SV_VL128, 16), -+ x0 = svqdecd_pat (x0, SV_VL128, 16)) -+ -+/* -+** qdecd_pat_n_vl256_u64: -+** uqdecd x0, vl256, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_vl256_u64, uint64_t, -+ x0 = svqdecd_pat_n_u64 (x0, SV_VL256, 16), -+ x0 = svqdecd_pat (x0, SV_VL256, 16)) -+ -+/* -+** qdecd_pat_n_mul4_u64: -+** uqdecd x0, mul4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_mul4_u64, uint64_t, -+ x0 = svqdecd_pat_n_u64 (x0, SV_MUL4, 16), -+ x0 = svqdecd_pat (x0, SV_MUL4, 16)) -+ -+/* -+** qdecd_pat_n_mul3_u64: -+** uqdecd x0, mul3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_mul3_u64, uint64_t, -+ x0 = svqdecd_pat_n_u64 (x0, SV_MUL3, 16), -+ x0 = svqdecd_pat (x0, SV_MUL3, 16)) -+ -+/* -+** qdecd_pat_n_all_u64: -+** uqdecd x0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_pat_n_all_u64, uint64_t, -+ x0 = svqdecd_pat_n_u64 (x0, SV_ALL, 16), -+ x0 = svqdecd_pat (x0, SV_ALL, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecd_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecd_s32.c -new file mode 100644 -index 000000000..1912ed53f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecd_s32.c -@@ -0,0 +1,58 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qdecd_n_1_s32_tied: -+** sqdecd x0, w0 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_n_1_s32_tied, int32_t, -+ x0 = svqdecd_n_s32 (x0, 1), -+ x0 = svqdecd (x0, 1)) -+ -+/* -+** qdecd_n_1_s32_untied: -+** mov w0, w1 -+** sqdecd x0, w0 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_n_1_s32_untied, int32_t, -+ x0 = svqdecd_n_s32 (x1, 1), -+ x0 = svqdecd (x1, 1)) -+ -+/* -+** qdecd_n_2_s32: -+** sqdecd x0, w0, all, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_n_2_s32, int32_t, -+ x0 = svqdecd_n_s32 (x0, 2), -+ x0 = svqdecd (x0, 2)) -+ -+/* -+** qdecd_n_7_s32: -+** sqdecd x0, w0, all, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_n_7_s32, int32_t, -+ x0 = svqdecd_n_s32 (x0, 7), -+ x0 = svqdecd (x0, 7)) -+ -+/* -+** qdecd_n_15_s32: -+** sqdecd x0, w0, all, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_n_15_s32, int32_t, -+ x0 = svqdecd_n_s32 (x0, 15), -+ x0 = svqdecd (x0, 15)) -+ -+/* -+** qdecd_n_16_s32: -+** sqdecd x0, w0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_n_16_s32, int32_t, -+ x0 = svqdecd_n_s32 (x0, 16), -+ x0 = svqdecd (x0, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecd_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecd_s64.c -new file mode 100644 -index 000000000..bd113fc66 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecd_s64.c -@@ -0,0 +1,113 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qdecd_1_s64_tied: -+** sqdecd z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_1_s64_tied, svint64_t, -+ z0 = svqdecd_s64 (z0, 1), -+ z0 = svqdecd (z0, 1)) -+ -+/* -+** qdecd_1_s64_untied: -+** movprfx z0, z1 -+** sqdecd z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_1_s64_untied, svint64_t, -+ z0 = svqdecd_s64 (z1, 1), -+ z0 = svqdecd (z1, 1)) -+ -+/* -+** qdecd_2_s64: -+** sqdecd z0\.d, all, mul #2 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_2_s64, svint64_t, -+ z0 = svqdecd_s64 (z0, 2), -+ z0 = svqdecd (z0, 2)) -+ -+/* -+** qdecd_7_s64: -+** sqdecd z0\.d, all, mul #7 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_7_s64, svint64_t, -+ z0 = svqdecd_s64 (z0, 7), -+ z0 = svqdecd (z0, 7)) -+ -+/* -+** qdecd_15_s64: -+** sqdecd z0\.d, all, mul #15 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_15_s64, svint64_t, -+ z0 = svqdecd_s64 (z0, 15), -+ z0 = svqdecd (z0, 15)) -+ -+/* -+** qdecd_16_s64: -+** sqdecd z0\.d, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_16_s64, svint64_t, -+ z0 = svqdecd_s64 (z0, 16), -+ z0 = svqdecd (z0, 16)) -+ -+/* -+** qdecd_n_1_s64_tied: -+** sqdecd x0 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_n_1_s64_tied, int64_t, -+ x0 = svqdecd_n_s64 (x0, 1), -+ x0 = svqdecd (x0, 1)) -+ -+/* -+** qdecd_n_1_s64_untied: -+** mov x0, x1 -+** sqdecd x0 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_n_1_s64_untied, int64_t, -+ x0 = svqdecd_n_s64 (x1, 1), -+ x0 = svqdecd (x1, 1)) -+ -+/* -+** qdecd_n_2_s64: -+** sqdecd x0, all, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_n_2_s64, int64_t, -+ x0 = svqdecd_n_s64 (x0, 2), -+ x0 = svqdecd (x0, 2)) -+ -+/* -+** qdecd_n_7_s64: -+** sqdecd x0, all, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_n_7_s64, int64_t, -+ x0 = svqdecd_n_s64 (x0, 7), -+ x0 = svqdecd (x0, 7)) -+ -+/* -+** qdecd_n_15_s64: -+** sqdecd x0, all, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_n_15_s64, int64_t, -+ x0 = svqdecd_n_s64 (x0, 15), -+ x0 = svqdecd (x0, 15)) -+ -+/* -+** qdecd_n_16_s64: -+** sqdecd x0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_n_16_s64, int64_t, -+ x0 = svqdecd_n_s64 (x0, 16), -+ x0 = svqdecd (x0, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecd_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecd_u32.c -new file mode 100644 -index 000000000..a672dc215 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecd_u32.c -@@ -0,0 +1,58 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qdecd_n_1_u32_tied: -+** uqdecd w0 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_n_1_u32_tied, uint32_t, -+ x0 = svqdecd_n_u32 (x0, 1), -+ x0 = svqdecd (x0, 1)) -+ -+/* -+** qdecd_n_1_u32_untied: -+** mov w0, w1 -+** uqdecd w0 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_n_1_u32_untied, uint32_t, -+ x0 = svqdecd_n_u32 (x1, 1), -+ x0 = svqdecd (x1, 1)) -+ -+/* -+** qdecd_n_2_u32: -+** uqdecd w0, all, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_n_2_u32, uint32_t, -+ x0 = svqdecd_n_u32 (x0, 2), -+ x0 = svqdecd (x0, 2)) -+ -+/* -+** qdecd_n_7_u32: -+** uqdecd w0, all, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_n_7_u32, uint32_t, -+ x0 = svqdecd_n_u32 (x0, 7), -+ x0 = svqdecd (x0, 7)) -+ -+/* -+** qdecd_n_15_u32: -+** uqdecd w0, all, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_n_15_u32, uint32_t, -+ x0 = svqdecd_n_u32 (x0, 15), -+ x0 = svqdecd (x0, 15)) -+ -+/* -+** qdecd_n_16_u32: -+** uqdecd w0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_n_16_u32, uint32_t, -+ x0 = svqdecd_n_u32 (x0, 16), -+ x0 = svqdecd (x0, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecd_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecd_u64.c -new file mode 100644 -index 000000000..fca8868f3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecd_u64.c -@@ -0,0 +1,113 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qdecd_1_u64_tied: -+** uqdecd z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_1_u64_tied, svuint64_t, -+ z0 = svqdecd_u64 (z0, 1), -+ z0 = svqdecd (z0, 1)) -+ -+/* -+** qdecd_1_u64_untied: -+** movprfx z0, z1 -+** uqdecd z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_1_u64_untied, svuint64_t, -+ z0 = svqdecd_u64 (z1, 1), -+ z0 = svqdecd (z1, 1)) -+ -+/* -+** qdecd_2_u64: -+** uqdecd z0\.d, all, mul #2 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_2_u64, svuint64_t, -+ z0 = svqdecd_u64 (z0, 2), -+ z0 = svqdecd (z0, 2)) -+ -+/* -+** qdecd_7_u64: -+** uqdecd z0\.d, all, mul #7 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_7_u64, svuint64_t, -+ z0 = svqdecd_u64 (z0, 7), -+ z0 = svqdecd (z0, 7)) -+ -+/* -+** qdecd_15_u64: -+** uqdecd z0\.d, all, mul #15 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_15_u64, svuint64_t, -+ z0 = svqdecd_u64 (z0, 15), -+ z0 = svqdecd (z0, 15)) -+ -+/* -+** qdecd_16_u64: -+** uqdecd z0\.d, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecd_16_u64, svuint64_t, -+ z0 = svqdecd_u64 (z0, 16), -+ z0 = svqdecd (z0, 16)) -+ -+/* -+** qdecd_n_1_u64_tied: -+** uqdecd x0 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_n_1_u64_tied, uint64_t, -+ x0 = svqdecd_n_u64 (x0, 1), -+ x0 = svqdecd (x0, 1)) -+ -+/* -+** qdecd_n_1_u64_untied: -+** mov x0, x1 -+** uqdecd x0 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_n_1_u64_untied, uint64_t, -+ x0 = svqdecd_n_u64 (x1, 1), -+ x0 = svqdecd (x1, 1)) -+ -+/* -+** qdecd_n_2_u64: -+** uqdecd x0, all, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_n_2_u64, uint64_t, -+ x0 = svqdecd_n_u64 (x0, 2), -+ x0 = svqdecd (x0, 2)) -+ -+/* -+** qdecd_n_7_u64: -+** uqdecd x0, all, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_n_7_u64, uint64_t, -+ x0 = svqdecd_n_u64 (x0, 7), -+ x0 = svqdecd (x0, 7)) -+ -+/* -+** qdecd_n_15_u64: -+** uqdecd x0, all, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_n_15_u64, uint64_t, -+ x0 = svqdecd_n_u64 (x0, 15), -+ x0 = svqdecd (x0, 15)) -+ -+/* -+** qdecd_n_16_u64: -+** uqdecd x0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecd_n_16_u64, uint64_t, -+ x0 = svqdecd_n_u64 (x0, 16), -+ x0 = svqdecd (x0, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_pat_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_pat_s16.c -new file mode 100644 -index 000000000..c084043f1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_pat_s16.c -@@ -0,0 +1,202 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qdech_pat_1_s16_tied: -+** sqdech z0\.h, pow2 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_pat_1_s16_tied, svint16_t, -+ z0 = svqdech_pat_s16 (z0, SV_POW2, 1), -+ z0 = svqdech_pat (z0, SV_POW2, 1)) -+ -+/* -+** qdech_pat_1_s16_untied: -+** movprfx z0, z1 -+** sqdech z0\.h, pow2 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_pat_1_s16_untied, svint16_t, -+ z0 = svqdech_pat_s16 (z1, SV_POW2, 1), -+ z0 = svqdech_pat (z1, SV_POW2, 1)) -+ -+/* -+** qdech_pat_2_s16: -+** sqdech z0\.h, pow2, mul #2 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_pat_2_s16, svint16_t, -+ z0 = svqdech_pat_s16 (z0, SV_POW2, 2), -+ z0 = svqdech_pat (z0, SV_POW2, 2)) -+ -+/* -+** qdech_pat_7_s16: -+** sqdech z0\.h, pow2, mul #7 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_pat_7_s16, svint16_t, -+ z0 = svqdech_pat_s16 (z0, SV_POW2, 7), -+ z0 = svqdech_pat (z0, SV_POW2, 7)) -+ -+/* -+** qdech_pat_15_s16: -+** sqdech z0\.h, pow2, mul #15 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_pat_15_s16, svint16_t, -+ z0 = svqdech_pat_s16 (z0, SV_POW2, 15), -+ z0 = svqdech_pat (z0, SV_POW2, 15)) -+ -+/* -+** qdech_pat_16_s16: -+** sqdech z0\.h, pow2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_pat_16_s16, svint16_t, -+ z0 = svqdech_pat_s16 (z0, SV_POW2, 16), -+ z0 = svqdech_pat (z0, SV_POW2, 16)) -+ -+/* -+** qdech_pat_vl1_s16: -+** sqdech z0\.h, vl1, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_pat_vl1_s16, svint16_t, -+ z0 = svqdech_pat_s16 (z0, SV_VL1, 16), -+ z0 = svqdech_pat (z0, SV_VL1, 16)) -+ -+/* -+** qdech_pat_vl2_s16: -+** sqdech z0\.h, vl2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_pat_vl2_s16, svint16_t, -+ z0 = svqdech_pat_s16 (z0, SV_VL2, 16), -+ z0 = svqdech_pat (z0, SV_VL2, 16)) -+ -+/* -+** qdech_pat_vl3_s16: -+** sqdech z0\.h, vl3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_pat_vl3_s16, svint16_t, -+ z0 = svqdech_pat_s16 (z0, SV_VL3, 16), -+ z0 = svqdech_pat (z0, SV_VL3, 16)) -+ -+/* -+** qdech_pat_vl4_s16: -+** sqdech z0\.h, vl4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_pat_vl4_s16, svint16_t, -+ z0 = svqdech_pat_s16 (z0, SV_VL4, 16), -+ z0 = svqdech_pat (z0, SV_VL4, 16)) -+ -+/* -+** qdech_pat_vl5_s16: -+** sqdech z0\.h, vl5, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_pat_vl5_s16, svint16_t, -+ z0 = svqdech_pat_s16 (z0, SV_VL5, 16), -+ z0 = svqdech_pat (z0, SV_VL5, 16)) -+ -+/* -+** qdech_pat_vl6_s16: -+** sqdech z0\.h, vl6, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_pat_vl6_s16, svint16_t, -+ z0 = svqdech_pat_s16 (z0, SV_VL6, 16), -+ z0 = svqdech_pat (z0, SV_VL6, 16)) -+ -+/* -+** qdech_pat_vl7_s16: -+** sqdech z0\.h, vl7, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_pat_vl7_s16, svint16_t, -+ z0 = svqdech_pat_s16 (z0, SV_VL7, 16), -+ z0 = svqdech_pat (z0, SV_VL7, 16)) -+ -+/* -+** qdech_pat_vl8_s16: -+** sqdech z0\.h, vl8, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_pat_vl8_s16, svint16_t, -+ z0 = svqdech_pat_s16 (z0, SV_VL8, 16), -+ z0 = svqdech_pat (z0, SV_VL8, 16)) -+ -+/* -+** qdech_pat_vl16_s16: -+** sqdech z0\.h, vl16, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_pat_vl16_s16, svint16_t, -+ z0 = svqdech_pat_s16 (z0, SV_VL16, 16), -+ z0 = svqdech_pat (z0, SV_VL16, 16)) -+ -+/* -+** qdech_pat_vl32_s16: -+** sqdech z0\.h, vl32, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_pat_vl32_s16, svint16_t, -+ z0 = svqdech_pat_s16 (z0, SV_VL32, 16), -+ z0 = svqdech_pat (z0, SV_VL32, 16)) -+ -+/* -+** qdech_pat_vl64_s16: -+** sqdech z0\.h, vl64, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_pat_vl64_s16, svint16_t, -+ z0 = svqdech_pat_s16 (z0, SV_VL64, 16), -+ z0 = svqdech_pat (z0, SV_VL64, 16)) -+ -+/* -+** qdech_pat_vl128_s16: -+** sqdech z0\.h, vl128, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_pat_vl128_s16, svint16_t, -+ z0 = svqdech_pat_s16 (z0, SV_VL128, 16), -+ z0 = svqdech_pat (z0, SV_VL128, 16)) -+ -+/* -+** qdech_pat_vl256_s16: -+** sqdech z0\.h, vl256, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_pat_vl256_s16, svint16_t, -+ z0 = svqdech_pat_s16 (z0, SV_VL256, 16), -+ z0 = svqdech_pat (z0, SV_VL256, 16)) -+ -+/* -+** qdech_pat_mul4_s16: -+** sqdech z0\.h, mul4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_pat_mul4_s16, svint16_t, -+ z0 = svqdech_pat_s16 (z0, SV_MUL4, 16), -+ z0 = svqdech_pat (z0, SV_MUL4, 16)) -+ -+/* -+** qdech_pat_mul3_s16: -+** sqdech z0\.h, mul3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_pat_mul3_s16, svint16_t, -+ z0 = svqdech_pat_s16 (z0, SV_MUL3, 16), -+ z0 = svqdech_pat (z0, SV_MUL3, 16)) -+ -+/* -+** qdech_pat_all_s16: -+** sqdech z0\.h, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_pat_all_s16, svint16_t, -+ z0 = svqdech_pat_s16 (z0, SV_ALL, 16), -+ z0 = svqdech_pat (z0, SV_ALL, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_pat_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_pat_s32.c -new file mode 100644 -index 000000000..b56306db7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_pat_s32.c -@@ -0,0 +1,202 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qdech_pat_n_1_s32_tied: -+** sqdech x0, w0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_1_s32_tied, int32_t, -+ x0 = svqdech_pat_n_s32 (x0, SV_POW2, 1), -+ x0 = svqdech_pat (x0, SV_POW2, 1)) -+ -+/* -+** qdech_pat_n_1_s32_untied: -+** mov w0, w1 -+** sqdech x0, w0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_1_s32_untied, int32_t, -+ x0 = svqdech_pat_n_s32 (x1, SV_POW2, 1), -+ x0 = svqdech_pat (x1, SV_POW2, 1)) -+ -+/* -+** qdech_pat_n_2_s32: -+** sqdech x0, w0, pow2, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_2_s32, int32_t, -+ x0 = svqdech_pat_n_s32 (x0, SV_POW2, 2), -+ x0 = svqdech_pat (x0, SV_POW2, 2)) -+ -+/* -+** qdech_pat_n_7_s32: -+** sqdech x0, w0, pow2, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_7_s32, int32_t, -+ x0 = svqdech_pat_n_s32 (x0, SV_POW2, 7), -+ x0 = svqdech_pat (x0, SV_POW2, 7)) -+ -+/* -+** qdech_pat_n_15_s32: -+** sqdech x0, w0, pow2, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_15_s32, int32_t, -+ x0 = svqdech_pat_n_s32 (x0, SV_POW2, 15), -+ x0 = svqdech_pat (x0, SV_POW2, 15)) -+ -+/* -+** qdech_pat_n_16_s32: -+** sqdech x0, w0, pow2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_16_s32, int32_t, -+ x0 = svqdech_pat_n_s32 (x0, SV_POW2, 16), -+ x0 = svqdech_pat (x0, SV_POW2, 16)) -+ -+/* -+** qdech_pat_n_vl1_s32: -+** sqdech x0, w0, vl1, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl1_s32, int32_t, -+ x0 = svqdech_pat_n_s32 (x0, SV_VL1, 16), -+ x0 = svqdech_pat (x0, SV_VL1, 16)) -+ -+/* -+** qdech_pat_n_vl2_s32: -+** sqdech x0, w0, vl2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl2_s32, int32_t, -+ x0 = svqdech_pat_n_s32 (x0, SV_VL2, 16), -+ x0 = svqdech_pat (x0, SV_VL2, 16)) -+ -+/* -+** qdech_pat_n_vl3_s32: -+** sqdech x0, w0, vl3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl3_s32, int32_t, -+ x0 = svqdech_pat_n_s32 (x0, SV_VL3, 16), -+ x0 = svqdech_pat (x0, SV_VL3, 16)) -+ -+/* -+** qdech_pat_n_vl4_s32: -+** sqdech x0, w0, vl4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl4_s32, int32_t, -+ x0 = svqdech_pat_n_s32 (x0, SV_VL4, 16), -+ x0 = svqdech_pat (x0, SV_VL4, 16)) -+ -+/* -+** qdech_pat_n_vl5_s32: -+** sqdech x0, w0, vl5, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl5_s32, int32_t, -+ x0 = svqdech_pat_n_s32 (x0, SV_VL5, 16), -+ x0 = svqdech_pat (x0, SV_VL5, 16)) -+ -+/* -+** qdech_pat_n_vl6_s32: -+** sqdech x0, w0, vl6, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl6_s32, int32_t, -+ x0 = svqdech_pat_n_s32 (x0, SV_VL6, 16), -+ x0 = svqdech_pat (x0, SV_VL6, 16)) -+ -+/* -+** qdech_pat_n_vl7_s32: -+** sqdech x0, w0, vl7, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl7_s32, int32_t, -+ x0 = svqdech_pat_n_s32 (x0, SV_VL7, 16), -+ x0 = svqdech_pat (x0, SV_VL7, 16)) -+ -+/* -+** qdech_pat_n_vl8_s32: -+** sqdech x0, w0, vl8, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl8_s32, int32_t, -+ x0 = svqdech_pat_n_s32 (x0, SV_VL8, 16), -+ x0 = svqdech_pat (x0, SV_VL8, 16)) -+ -+/* -+** qdech_pat_n_vl16_s32: -+** sqdech x0, w0, vl16, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl16_s32, int32_t, -+ x0 = svqdech_pat_n_s32 (x0, SV_VL16, 16), -+ x0 = svqdech_pat (x0, SV_VL16, 16)) -+ -+/* -+** qdech_pat_n_vl32_s32: -+** sqdech x0, w0, vl32, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl32_s32, int32_t, -+ x0 = svqdech_pat_n_s32 (x0, SV_VL32, 16), -+ x0 = svqdech_pat (x0, SV_VL32, 16)) -+ -+/* -+** qdech_pat_n_vl64_s32: -+** sqdech x0, w0, vl64, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl64_s32, int32_t, -+ x0 = svqdech_pat_n_s32 (x0, SV_VL64, 16), -+ x0 = svqdech_pat (x0, SV_VL64, 16)) -+ -+/* -+** qdech_pat_n_vl128_s32: -+** sqdech x0, w0, vl128, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl128_s32, int32_t, -+ x0 = svqdech_pat_n_s32 (x0, SV_VL128, 16), -+ x0 = svqdech_pat (x0, SV_VL128, 16)) -+ -+/* -+** qdech_pat_n_vl256_s32: -+** sqdech x0, w0, vl256, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl256_s32, int32_t, -+ x0 = svqdech_pat_n_s32 (x0, SV_VL256, 16), -+ x0 = svqdech_pat (x0, SV_VL256, 16)) -+ -+/* -+** qdech_pat_n_mul4_s32: -+** sqdech x0, w0, mul4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_mul4_s32, int32_t, -+ x0 = svqdech_pat_n_s32 (x0, SV_MUL4, 16), -+ x0 = svqdech_pat (x0, SV_MUL4, 16)) -+ -+/* -+** qdech_pat_n_mul3_s32: -+** sqdech x0, w0, mul3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_mul3_s32, int32_t, -+ x0 = svqdech_pat_n_s32 (x0, SV_MUL3, 16), -+ x0 = svqdech_pat (x0, SV_MUL3, 16)) -+ -+/* -+** qdech_pat_n_all_s32: -+** sqdech x0, w0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_all_s32, int32_t, -+ x0 = svqdech_pat_n_s32 (x0, SV_ALL, 16), -+ x0 = svqdech_pat (x0, SV_ALL, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_pat_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_pat_s64.c -new file mode 100644 -index 000000000..591658f54 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_pat_s64.c -@@ -0,0 +1,202 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qdech_pat_n_1_s64_tied: -+** sqdech x0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_1_s64_tied, int64_t, -+ x0 = svqdech_pat_n_s64 (x0, SV_POW2, 1), -+ x0 = svqdech_pat (x0, SV_POW2, 1)) -+ -+/* -+** qdech_pat_n_1_s64_untied: -+** mov x0, x1 -+** sqdech x0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_1_s64_untied, int64_t, -+ x0 = svqdech_pat_n_s64 (x1, SV_POW2, 1), -+ x0 = svqdech_pat (x1, SV_POW2, 1)) -+ -+/* -+** qdech_pat_n_2_s64: -+** sqdech x0, pow2, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_2_s64, int64_t, -+ x0 = svqdech_pat_n_s64 (x0, SV_POW2, 2), -+ x0 = svqdech_pat (x0, SV_POW2, 2)) -+ -+/* -+** qdech_pat_n_7_s64: -+** sqdech x0, pow2, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_7_s64, int64_t, -+ x0 = svqdech_pat_n_s64 (x0, SV_POW2, 7), -+ x0 = svqdech_pat (x0, SV_POW2, 7)) -+ -+/* -+** qdech_pat_n_15_s64: -+** sqdech x0, pow2, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_15_s64, int64_t, -+ x0 = svqdech_pat_n_s64 (x0, SV_POW2, 15), -+ x0 = svqdech_pat (x0, SV_POW2, 15)) -+ -+/* -+** qdech_pat_n_16_s64: -+** sqdech x0, pow2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_16_s64, int64_t, -+ x0 = svqdech_pat_n_s64 (x0, SV_POW2, 16), -+ x0 = svqdech_pat (x0, SV_POW2, 16)) -+ -+/* -+** qdech_pat_n_vl1_s64: -+** sqdech x0, vl1, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl1_s64, int64_t, -+ x0 = svqdech_pat_n_s64 (x0, SV_VL1, 16), -+ x0 = svqdech_pat (x0, SV_VL1, 16)) -+ -+/* -+** qdech_pat_n_vl2_s64: -+** sqdech x0, vl2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl2_s64, int64_t, -+ x0 = svqdech_pat_n_s64 (x0, SV_VL2, 16), -+ x0 = svqdech_pat (x0, SV_VL2, 16)) -+ -+/* -+** qdech_pat_n_vl3_s64: -+** sqdech x0, vl3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl3_s64, int64_t, -+ x0 = svqdech_pat_n_s64 (x0, SV_VL3, 16), -+ x0 = svqdech_pat (x0, SV_VL3, 16)) -+ -+/* -+** qdech_pat_n_vl4_s64: -+** sqdech x0, vl4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl4_s64, int64_t, -+ x0 = svqdech_pat_n_s64 (x0, SV_VL4, 16), -+ x0 = svqdech_pat (x0, SV_VL4, 16)) -+ -+/* -+** qdech_pat_n_vl5_s64: -+** sqdech x0, vl5, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl5_s64, int64_t, -+ x0 = svqdech_pat_n_s64 (x0, SV_VL5, 16), -+ x0 = svqdech_pat (x0, SV_VL5, 16)) -+ -+/* -+** qdech_pat_n_vl6_s64: -+** sqdech x0, vl6, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl6_s64, int64_t, -+ x0 = svqdech_pat_n_s64 (x0, SV_VL6, 16), -+ x0 = svqdech_pat (x0, SV_VL6, 16)) -+ -+/* -+** qdech_pat_n_vl7_s64: -+** sqdech x0, vl7, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl7_s64, int64_t, -+ x0 = svqdech_pat_n_s64 (x0, SV_VL7, 16), -+ x0 = svqdech_pat (x0, SV_VL7, 16)) -+ -+/* -+** qdech_pat_n_vl8_s64: -+** sqdech x0, vl8, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl8_s64, int64_t, -+ x0 = svqdech_pat_n_s64 (x0, SV_VL8, 16), -+ x0 = svqdech_pat (x0, SV_VL8, 16)) -+ -+/* -+** qdech_pat_n_vl16_s64: -+** sqdech x0, vl16, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl16_s64, int64_t, -+ x0 = svqdech_pat_n_s64 (x0, SV_VL16, 16), -+ x0 = svqdech_pat (x0, SV_VL16, 16)) -+ -+/* -+** qdech_pat_n_vl32_s64: -+** sqdech x0, vl32, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl32_s64, int64_t, -+ x0 = svqdech_pat_n_s64 (x0, SV_VL32, 16), -+ x0 = svqdech_pat (x0, SV_VL32, 16)) -+ -+/* -+** qdech_pat_n_vl64_s64: -+** sqdech x0, vl64, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl64_s64, int64_t, -+ x0 = svqdech_pat_n_s64 (x0, SV_VL64, 16), -+ x0 = svqdech_pat (x0, SV_VL64, 16)) -+ -+/* -+** qdech_pat_n_vl128_s64: -+** sqdech x0, vl128, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl128_s64, int64_t, -+ x0 = svqdech_pat_n_s64 (x0, SV_VL128, 16), -+ x0 = svqdech_pat (x0, SV_VL128, 16)) -+ -+/* -+** qdech_pat_n_vl256_s64: -+** sqdech x0, vl256, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl256_s64, int64_t, -+ x0 = svqdech_pat_n_s64 (x0, SV_VL256, 16), -+ x0 = svqdech_pat (x0, SV_VL256, 16)) -+ -+/* -+** qdech_pat_n_mul4_s64: -+** sqdech x0, mul4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_mul4_s64, int64_t, -+ x0 = svqdech_pat_n_s64 (x0, SV_MUL4, 16), -+ x0 = svqdech_pat (x0, SV_MUL4, 16)) -+ -+/* -+** qdech_pat_n_mul3_s64: -+** sqdech x0, mul3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_mul3_s64, int64_t, -+ x0 = svqdech_pat_n_s64 (x0, SV_MUL3, 16), -+ x0 = svqdech_pat (x0, SV_MUL3, 16)) -+ -+/* -+** qdech_pat_n_all_s64: -+** sqdech x0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_all_s64, int64_t, -+ x0 = svqdech_pat_n_s64 (x0, SV_ALL, 16), -+ x0 = svqdech_pat (x0, SV_ALL, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_pat_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_pat_u16.c -new file mode 100644 -index 000000000..ce0b5f3e8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_pat_u16.c -@@ -0,0 +1,202 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qdech_pat_1_u16_tied: -+** uqdech z0\.h, pow2 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_pat_1_u16_tied, svuint16_t, -+ z0 = svqdech_pat_u16 (z0, SV_POW2, 1), -+ z0 = svqdech_pat (z0, SV_POW2, 1)) -+ -+/* -+** qdech_pat_1_u16_untied: -+** movprfx z0, z1 -+** uqdech z0\.h, pow2 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_pat_1_u16_untied, svuint16_t, -+ z0 = svqdech_pat_u16 (z1, SV_POW2, 1), -+ z0 = svqdech_pat (z1, SV_POW2, 1)) -+ -+/* -+** qdech_pat_2_u16: -+** uqdech z0\.h, pow2, mul #2 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_pat_2_u16, svuint16_t, -+ z0 = svqdech_pat_u16 (z0, SV_POW2, 2), -+ z0 = svqdech_pat (z0, SV_POW2, 2)) -+ -+/* -+** qdech_pat_7_u16: -+** uqdech z0\.h, pow2, mul #7 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_pat_7_u16, svuint16_t, -+ z0 = svqdech_pat_u16 (z0, SV_POW2, 7), -+ z0 = svqdech_pat (z0, SV_POW2, 7)) -+ -+/* -+** qdech_pat_15_u16: -+** uqdech z0\.h, pow2, mul #15 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_pat_15_u16, svuint16_t, -+ z0 = svqdech_pat_u16 (z0, SV_POW2, 15), -+ z0 = svqdech_pat (z0, SV_POW2, 15)) -+ -+/* -+** qdech_pat_16_u16: -+** uqdech z0\.h, pow2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_pat_16_u16, svuint16_t, -+ z0 = svqdech_pat_u16 (z0, SV_POW2, 16), -+ z0 = svqdech_pat (z0, SV_POW2, 16)) -+ -+/* -+** qdech_pat_vl1_u16: -+** uqdech z0\.h, vl1, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_pat_vl1_u16, svuint16_t, -+ z0 = svqdech_pat_u16 (z0, SV_VL1, 16), -+ z0 = svqdech_pat (z0, SV_VL1, 16)) -+ -+/* -+** qdech_pat_vl2_u16: -+** uqdech z0\.h, vl2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_pat_vl2_u16, svuint16_t, -+ z0 = svqdech_pat_u16 (z0, SV_VL2, 16), -+ z0 = svqdech_pat (z0, SV_VL2, 16)) -+ -+/* -+** qdech_pat_vl3_u16: -+** uqdech z0\.h, vl3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_pat_vl3_u16, svuint16_t, -+ z0 = svqdech_pat_u16 (z0, SV_VL3, 16), -+ z0 = svqdech_pat (z0, SV_VL3, 16)) -+ -+/* -+** qdech_pat_vl4_u16: -+** uqdech z0\.h, vl4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_pat_vl4_u16, svuint16_t, -+ z0 = svqdech_pat_u16 (z0, SV_VL4, 16), -+ z0 = svqdech_pat (z0, SV_VL4, 16)) -+ -+/* -+** qdech_pat_vl5_u16: -+** uqdech z0\.h, vl5, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_pat_vl5_u16, svuint16_t, -+ z0 = svqdech_pat_u16 (z0, SV_VL5, 16), -+ z0 = svqdech_pat (z0, SV_VL5, 16)) -+ -+/* -+** qdech_pat_vl6_u16: -+** uqdech z0\.h, vl6, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_pat_vl6_u16, svuint16_t, -+ z0 = svqdech_pat_u16 (z0, SV_VL6, 16), -+ z0 = svqdech_pat (z0, SV_VL6, 16)) -+ -+/* -+** qdech_pat_vl7_u16: -+** uqdech z0\.h, vl7, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_pat_vl7_u16, svuint16_t, -+ z0 = svqdech_pat_u16 (z0, SV_VL7, 16), -+ z0 = svqdech_pat (z0, SV_VL7, 16)) -+ -+/* -+** qdech_pat_vl8_u16: -+** uqdech z0\.h, vl8, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_pat_vl8_u16, svuint16_t, -+ z0 = svqdech_pat_u16 (z0, SV_VL8, 16), -+ z0 = svqdech_pat (z0, SV_VL8, 16)) -+ -+/* -+** qdech_pat_vl16_u16: -+** uqdech z0\.h, vl16, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_pat_vl16_u16, svuint16_t, -+ z0 = svqdech_pat_u16 (z0, SV_VL16, 16), -+ z0 = svqdech_pat (z0, SV_VL16, 16)) -+ -+/* -+** qdech_pat_vl32_u16: -+** uqdech z0\.h, vl32, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_pat_vl32_u16, svuint16_t, -+ z0 = svqdech_pat_u16 (z0, SV_VL32, 16), -+ z0 = svqdech_pat (z0, SV_VL32, 16)) -+ -+/* -+** qdech_pat_vl64_u16: -+** uqdech z0\.h, vl64, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_pat_vl64_u16, svuint16_t, -+ z0 = svqdech_pat_u16 (z0, SV_VL64, 16), -+ z0 = svqdech_pat (z0, SV_VL64, 16)) -+ -+/* -+** qdech_pat_vl128_u16: -+** uqdech z0\.h, vl128, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_pat_vl128_u16, svuint16_t, -+ z0 = svqdech_pat_u16 (z0, SV_VL128, 16), -+ z0 = svqdech_pat (z0, SV_VL128, 16)) -+ -+/* -+** qdech_pat_vl256_u16: -+** uqdech z0\.h, vl256, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_pat_vl256_u16, svuint16_t, -+ z0 = svqdech_pat_u16 (z0, SV_VL256, 16), -+ z0 = svqdech_pat (z0, SV_VL256, 16)) -+ -+/* -+** qdech_pat_mul4_u16: -+** uqdech z0\.h, mul4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_pat_mul4_u16, svuint16_t, -+ z0 = svqdech_pat_u16 (z0, SV_MUL4, 16), -+ z0 = svqdech_pat (z0, SV_MUL4, 16)) -+ -+/* -+** qdech_pat_mul3_u16: -+** uqdech z0\.h, mul3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_pat_mul3_u16, svuint16_t, -+ z0 = svqdech_pat_u16 (z0, SV_MUL3, 16), -+ z0 = svqdech_pat (z0, SV_MUL3, 16)) -+ -+/* -+** qdech_pat_all_u16: -+** uqdech z0\.h, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_pat_all_u16, svuint16_t, -+ z0 = svqdech_pat_u16 (z0, SV_ALL, 16), -+ z0 = svqdech_pat (z0, SV_ALL, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_pat_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_pat_u32.c -new file mode 100644 -index 000000000..177f32ec7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_pat_u32.c -@@ -0,0 +1,202 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qdech_pat_n_1_u32_tied: -+** uqdech w0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_1_u32_tied, uint32_t, -+ x0 = svqdech_pat_n_u32 (x0, SV_POW2, 1), -+ x0 = svqdech_pat (x0, SV_POW2, 1)) -+ -+/* -+** qdech_pat_n_1_u32_untied: -+** mov w0, w1 -+** uqdech w0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_1_u32_untied, uint32_t, -+ x0 = svqdech_pat_n_u32 (x1, SV_POW2, 1), -+ x0 = svqdech_pat (x1, SV_POW2, 1)) -+ -+/* -+** qdech_pat_n_2_u32: -+** uqdech w0, pow2, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_2_u32, uint32_t, -+ x0 = svqdech_pat_n_u32 (x0, SV_POW2, 2), -+ x0 = svqdech_pat (x0, SV_POW2, 2)) -+ -+/* -+** qdech_pat_n_7_u32: -+** uqdech w0, pow2, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_7_u32, uint32_t, -+ x0 = svqdech_pat_n_u32 (x0, SV_POW2, 7), -+ x0 = svqdech_pat (x0, SV_POW2, 7)) -+ -+/* -+** qdech_pat_n_15_u32: -+** uqdech w0, pow2, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_15_u32, uint32_t, -+ x0 = svqdech_pat_n_u32 (x0, SV_POW2, 15), -+ x0 = svqdech_pat (x0, SV_POW2, 15)) -+ -+/* -+** qdech_pat_n_16_u32: -+** uqdech w0, pow2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_16_u32, uint32_t, -+ x0 = svqdech_pat_n_u32 (x0, SV_POW2, 16), -+ x0 = svqdech_pat (x0, SV_POW2, 16)) -+ -+/* -+** qdech_pat_n_vl1_u32: -+** uqdech w0, vl1, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl1_u32, uint32_t, -+ x0 = svqdech_pat_n_u32 (x0, SV_VL1, 16), -+ x0 = svqdech_pat (x0, SV_VL1, 16)) -+ -+/* -+** qdech_pat_n_vl2_u32: -+** uqdech w0, vl2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl2_u32, uint32_t, -+ x0 = svqdech_pat_n_u32 (x0, SV_VL2, 16), -+ x0 = svqdech_pat (x0, SV_VL2, 16)) -+ -+/* -+** qdech_pat_n_vl3_u32: -+** uqdech w0, vl3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl3_u32, uint32_t, -+ x0 = svqdech_pat_n_u32 (x0, SV_VL3, 16), -+ x0 = svqdech_pat (x0, SV_VL3, 16)) -+ -+/* -+** qdech_pat_n_vl4_u32: -+** uqdech w0, vl4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl4_u32, uint32_t, -+ x0 = svqdech_pat_n_u32 (x0, SV_VL4, 16), -+ x0 = svqdech_pat (x0, SV_VL4, 16)) -+ -+/* -+** qdech_pat_n_vl5_u32: -+** uqdech w0, vl5, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl5_u32, uint32_t, -+ x0 = svqdech_pat_n_u32 (x0, SV_VL5, 16), -+ x0 = svqdech_pat (x0, SV_VL5, 16)) -+ -+/* -+** qdech_pat_n_vl6_u32: -+** uqdech w0, vl6, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl6_u32, uint32_t, -+ x0 = svqdech_pat_n_u32 (x0, SV_VL6, 16), -+ x0 = svqdech_pat (x0, SV_VL6, 16)) -+ -+/* -+** qdech_pat_n_vl7_u32: -+** uqdech w0, vl7, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl7_u32, uint32_t, -+ x0 = svqdech_pat_n_u32 (x0, SV_VL7, 16), -+ x0 = svqdech_pat (x0, SV_VL7, 16)) -+ -+/* -+** qdech_pat_n_vl8_u32: -+** uqdech w0, vl8, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl8_u32, uint32_t, -+ x0 = svqdech_pat_n_u32 (x0, SV_VL8, 16), -+ x0 = svqdech_pat (x0, SV_VL8, 16)) -+ -+/* -+** qdech_pat_n_vl16_u32: -+** uqdech w0, vl16, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl16_u32, uint32_t, -+ x0 = svqdech_pat_n_u32 (x0, SV_VL16, 16), -+ x0 = svqdech_pat (x0, SV_VL16, 16)) -+ -+/* -+** qdech_pat_n_vl32_u32: -+** uqdech w0, vl32, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl32_u32, uint32_t, -+ x0 = svqdech_pat_n_u32 (x0, SV_VL32, 16), -+ x0 = svqdech_pat (x0, SV_VL32, 16)) -+ -+/* -+** qdech_pat_n_vl64_u32: -+** uqdech w0, vl64, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl64_u32, uint32_t, -+ x0 = svqdech_pat_n_u32 (x0, SV_VL64, 16), -+ x0 = svqdech_pat (x0, SV_VL64, 16)) -+ -+/* -+** qdech_pat_n_vl128_u32: -+** uqdech w0, vl128, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl128_u32, uint32_t, -+ x0 = svqdech_pat_n_u32 (x0, SV_VL128, 16), -+ x0 = svqdech_pat (x0, SV_VL128, 16)) -+ -+/* -+** qdech_pat_n_vl256_u32: -+** uqdech w0, vl256, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl256_u32, uint32_t, -+ x0 = svqdech_pat_n_u32 (x0, SV_VL256, 16), -+ x0 = svqdech_pat (x0, SV_VL256, 16)) -+ -+/* -+** qdech_pat_n_mul4_u32: -+** uqdech w0, mul4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_mul4_u32, uint32_t, -+ x0 = svqdech_pat_n_u32 (x0, SV_MUL4, 16), -+ x0 = svqdech_pat (x0, SV_MUL4, 16)) -+ -+/* -+** qdech_pat_n_mul3_u32: -+** uqdech w0, mul3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_mul3_u32, uint32_t, -+ x0 = svqdech_pat_n_u32 (x0, SV_MUL3, 16), -+ x0 = svqdech_pat (x0, SV_MUL3, 16)) -+ -+/* -+** qdech_pat_n_all_u32: -+** uqdech w0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_all_u32, uint32_t, -+ x0 = svqdech_pat_n_u32 (x0, SV_ALL, 16), -+ x0 = svqdech_pat (x0, SV_ALL, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_pat_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_pat_u64.c -new file mode 100644 -index 000000000..7092127f2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_pat_u64.c -@@ -0,0 +1,202 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qdech_pat_n_1_u64_tied: -+** uqdech x0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_1_u64_tied, uint64_t, -+ x0 = svqdech_pat_n_u64 (x0, SV_POW2, 1), -+ x0 = svqdech_pat (x0, SV_POW2, 1)) -+ -+/* -+** qdech_pat_n_1_u64_untied: -+** mov x0, x1 -+** uqdech x0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_1_u64_untied, uint64_t, -+ x0 = svqdech_pat_n_u64 (x1, SV_POW2, 1), -+ x0 = svqdech_pat (x1, SV_POW2, 1)) -+ -+/* -+** qdech_pat_n_2_u64: -+** uqdech x0, pow2, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_2_u64, uint64_t, -+ x0 = svqdech_pat_n_u64 (x0, SV_POW2, 2), -+ x0 = svqdech_pat (x0, SV_POW2, 2)) -+ -+/* -+** qdech_pat_n_7_u64: -+** uqdech x0, pow2, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_7_u64, uint64_t, -+ x0 = svqdech_pat_n_u64 (x0, SV_POW2, 7), -+ x0 = svqdech_pat (x0, SV_POW2, 7)) -+ -+/* -+** qdech_pat_n_15_u64: -+** uqdech x0, pow2, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_15_u64, uint64_t, -+ x0 = svqdech_pat_n_u64 (x0, SV_POW2, 15), -+ x0 = svqdech_pat (x0, SV_POW2, 15)) -+ -+/* -+** qdech_pat_n_16_u64: -+** uqdech x0, pow2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_16_u64, uint64_t, -+ x0 = svqdech_pat_n_u64 (x0, SV_POW2, 16), -+ x0 = svqdech_pat (x0, SV_POW2, 16)) -+ -+/* -+** qdech_pat_n_vl1_u64: -+** uqdech x0, vl1, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl1_u64, uint64_t, -+ x0 = svqdech_pat_n_u64 (x0, SV_VL1, 16), -+ x0 = svqdech_pat (x0, SV_VL1, 16)) -+ -+/* -+** qdech_pat_n_vl2_u64: -+** uqdech x0, vl2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl2_u64, uint64_t, -+ x0 = svqdech_pat_n_u64 (x0, SV_VL2, 16), -+ x0 = svqdech_pat (x0, SV_VL2, 16)) -+ -+/* -+** qdech_pat_n_vl3_u64: -+** uqdech x0, vl3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl3_u64, uint64_t, -+ x0 = svqdech_pat_n_u64 (x0, SV_VL3, 16), -+ x0 = svqdech_pat (x0, SV_VL3, 16)) -+ -+/* -+** qdech_pat_n_vl4_u64: -+** uqdech x0, vl4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl4_u64, uint64_t, -+ x0 = svqdech_pat_n_u64 (x0, SV_VL4, 16), -+ x0 = svqdech_pat (x0, SV_VL4, 16)) -+ -+/* -+** qdech_pat_n_vl5_u64: -+** uqdech x0, vl5, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl5_u64, uint64_t, -+ x0 = svqdech_pat_n_u64 (x0, SV_VL5, 16), -+ x0 = svqdech_pat (x0, SV_VL5, 16)) -+ -+/* -+** qdech_pat_n_vl6_u64: -+** uqdech x0, vl6, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl6_u64, uint64_t, -+ x0 = svqdech_pat_n_u64 (x0, SV_VL6, 16), -+ x0 = svqdech_pat (x0, SV_VL6, 16)) -+ -+/* -+** qdech_pat_n_vl7_u64: -+** uqdech x0, vl7, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl7_u64, uint64_t, -+ x0 = svqdech_pat_n_u64 (x0, SV_VL7, 16), -+ x0 = svqdech_pat (x0, SV_VL7, 16)) -+ -+/* -+** qdech_pat_n_vl8_u64: -+** uqdech x0, vl8, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl8_u64, uint64_t, -+ x0 = svqdech_pat_n_u64 (x0, SV_VL8, 16), -+ x0 = svqdech_pat (x0, SV_VL8, 16)) -+ -+/* -+** qdech_pat_n_vl16_u64: -+** uqdech x0, vl16, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl16_u64, uint64_t, -+ x0 = svqdech_pat_n_u64 (x0, SV_VL16, 16), -+ x0 = svqdech_pat (x0, SV_VL16, 16)) -+ -+/* -+** qdech_pat_n_vl32_u64: -+** uqdech x0, vl32, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl32_u64, uint64_t, -+ x0 = svqdech_pat_n_u64 (x0, SV_VL32, 16), -+ x0 = svqdech_pat (x0, SV_VL32, 16)) -+ -+/* -+** qdech_pat_n_vl64_u64: -+** uqdech x0, vl64, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl64_u64, uint64_t, -+ x0 = svqdech_pat_n_u64 (x0, SV_VL64, 16), -+ x0 = svqdech_pat (x0, SV_VL64, 16)) -+ -+/* -+** qdech_pat_n_vl128_u64: -+** uqdech x0, vl128, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl128_u64, uint64_t, -+ x0 = svqdech_pat_n_u64 (x0, SV_VL128, 16), -+ x0 = svqdech_pat (x0, SV_VL128, 16)) -+ -+/* -+** qdech_pat_n_vl256_u64: -+** uqdech x0, vl256, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_vl256_u64, uint64_t, -+ x0 = svqdech_pat_n_u64 (x0, SV_VL256, 16), -+ x0 = svqdech_pat (x0, SV_VL256, 16)) -+ -+/* -+** qdech_pat_n_mul4_u64: -+** uqdech x0, mul4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_mul4_u64, uint64_t, -+ x0 = svqdech_pat_n_u64 (x0, SV_MUL4, 16), -+ x0 = svqdech_pat (x0, SV_MUL4, 16)) -+ -+/* -+** qdech_pat_n_mul3_u64: -+** uqdech x0, mul3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_mul3_u64, uint64_t, -+ x0 = svqdech_pat_n_u64 (x0, SV_MUL3, 16), -+ x0 = svqdech_pat (x0, SV_MUL3, 16)) -+ -+/* -+** qdech_pat_n_all_u64: -+** uqdech x0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_pat_n_all_u64, uint64_t, -+ x0 = svqdech_pat_n_u64 (x0, SV_ALL, 16), -+ x0 = svqdech_pat (x0, SV_ALL, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_s16.c -new file mode 100644 -index 000000000..2a7a8f7a6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_s16.c -@@ -0,0 +1,58 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qdech_1_s16_tied: -+** sqdech z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_1_s16_tied, svint16_t, -+ z0 = svqdech_s16 (z0, 1), -+ z0 = svqdech (z0, 1)) -+ -+/* -+** qdech_1_s16_untied: -+** movprfx z0, z1 -+** sqdech z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_1_s16_untied, svint16_t, -+ z0 = svqdech_s16 (z1, 1), -+ z0 = svqdech (z1, 1)) -+ -+/* -+** qdech_2_s16: -+** sqdech z0\.h, all, mul #2 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_2_s16, svint16_t, -+ z0 = svqdech_s16 (z0, 2), -+ z0 = svqdech (z0, 2)) -+ -+/* -+** qdech_7_s16: -+** sqdech z0\.h, all, mul #7 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_7_s16, svint16_t, -+ z0 = svqdech_s16 (z0, 7), -+ z0 = svqdech (z0, 7)) -+ -+/* -+** qdech_15_s16: -+** sqdech z0\.h, all, mul #15 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_15_s16, svint16_t, -+ z0 = svqdech_s16 (z0, 15), -+ z0 = svqdech (z0, 15)) -+ -+/* -+** qdech_16_s16: -+** sqdech z0\.h, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_16_s16, svint16_t, -+ z0 = svqdech_s16 (z0, 16), -+ z0 = svqdech (z0, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_s32.c -new file mode 100644 -index 000000000..7fd57d85a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_s32.c -@@ -0,0 +1,58 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qdech_n_1_s32_tied: -+** sqdech x0, w0 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_n_1_s32_tied, int32_t, -+ x0 = svqdech_n_s32 (x0, 1), -+ x0 = svqdech (x0, 1)) -+ -+/* -+** qdech_n_1_s32_untied: -+** mov w0, w1 -+** sqdech x0, w0 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_n_1_s32_untied, int32_t, -+ x0 = svqdech_n_s32 (x1, 1), -+ x0 = svqdech (x1, 1)) -+ -+/* -+** qdech_n_2_s32: -+** sqdech x0, w0, all, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_n_2_s32, int32_t, -+ x0 = svqdech_n_s32 (x0, 2), -+ x0 = svqdech (x0, 2)) -+ -+/* -+** qdech_n_7_s32: -+** sqdech x0, w0, all, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_n_7_s32, int32_t, -+ x0 = svqdech_n_s32 (x0, 7), -+ x0 = svqdech (x0, 7)) -+ -+/* -+** qdech_n_15_s32: -+** sqdech x0, w0, all, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_n_15_s32, int32_t, -+ x0 = svqdech_n_s32 (x0, 15), -+ x0 = svqdech (x0, 15)) -+ -+/* -+** qdech_n_16_s32: -+** sqdech x0, w0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_n_16_s32, int32_t, -+ x0 = svqdech_n_s32 (x0, 16), -+ x0 = svqdech (x0, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_s64.c -new file mode 100644 -index 000000000..61989f8d6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_s64.c -@@ -0,0 +1,58 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qdech_n_1_s64_tied: -+** sqdech x0 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_n_1_s64_tied, int64_t, -+ x0 = svqdech_n_s64 (x0, 1), -+ x0 = svqdech (x0, 1)) -+ -+/* -+** qdech_n_1_s64_untied: -+** mov x0, x1 -+** sqdech x0 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_n_1_s64_untied, int64_t, -+ x0 = svqdech_n_s64 (x1, 1), -+ x0 = svqdech (x1, 1)) -+ -+/* -+** qdech_n_2_s64: -+** sqdech x0, all, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_n_2_s64, int64_t, -+ x0 = svqdech_n_s64 (x0, 2), -+ x0 = svqdech (x0, 2)) -+ -+/* -+** qdech_n_7_s64: -+** sqdech x0, all, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_n_7_s64, int64_t, -+ x0 = svqdech_n_s64 (x0, 7), -+ x0 = svqdech (x0, 7)) -+ -+/* -+** qdech_n_15_s64: -+** sqdech x0, all, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_n_15_s64, int64_t, -+ x0 = svqdech_n_s64 (x0, 15), -+ x0 = svqdech (x0, 15)) -+ -+/* -+** qdech_n_16_s64: -+** sqdech x0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_n_16_s64, int64_t, -+ x0 = svqdech_n_s64 (x0, 16), -+ x0 = svqdech (x0, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_u16.c -new file mode 100644 -index 000000000..0d6587851 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_u16.c -@@ -0,0 +1,58 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qdech_1_u16_tied: -+** uqdech z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_1_u16_tied, svuint16_t, -+ z0 = svqdech_u16 (z0, 1), -+ z0 = svqdech (z0, 1)) -+ -+/* -+** qdech_1_u16_untied: -+** movprfx z0, z1 -+** uqdech z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_1_u16_untied, svuint16_t, -+ z0 = svqdech_u16 (z1, 1), -+ z0 = svqdech (z1, 1)) -+ -+/* -+** qdech_2_u16: -+** uqdech z0\.h, all, mul #2 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_2_u16, svuint16_t, -+ z0 = svqdech_u16 (z0, 2), -+ z0 = svqdech (z0, 2)) -+ -+/* -+** qdech_7_u16: -+** uqdech z0\.h, all, mul #7 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_7_u16, svuint16_t, -+ z0 = svqdech_u16 (z0, 7), -+ z0 = svqdech (z0, 7)) -+ -+/* -+** qdech_15_u16: -+** uqdech z0\.h, all, mul #15 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_15_u16, svuint16_t, -+ z0 = svqdech_u16 (z0, 15), -+ z0 = svqdech (z0, 15)) -+ -+/* -+** qdech_16_u16: -+** uqdech z0\.h, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdech_16_u16, svuint16_t, -+ z0 = svqdech_u16 (z0, 16), -+ z0 = svqdech (z0, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_u32.c -new file mode 100644 -index 000000000..179d67953 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_u32.c -@@ -0,0 +1,58 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qdech_n_1_u32_tied: -+** uqdech w0 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_n_1_u32_tied, uint32_t, -+ x0 = svqdech_n_u32 (x0, 1), -+ x0 = svqdech (x0, 1)) -+ -+/* -+** qdech_n_1_u32_untied: -+** mov w0, w1 -+** uqdech w0 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_n_1_u32_untied, uint32_t, -+ x0 = svqdech_n_u32 (x1, 1), -+ x0 = svqdech (x1, 1)) -+ -+/* -+** qdech_n_2_u32: -+** uqdech w0, all, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_n_2_u32, uint32_t, -+ x0 = svqdech_n_u32 (x0, 2), -+ x0 = svqdech (x0, 2)) -+ -+/* -+** qdech_n_7_u32: -+** uqdech w0, all, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_n_7_u32, uint32_t, -+ x0 = svqdech_n_u32 (x0, 7), -+ x0 = svqdech (x0, 7)) -+ -+/* -+** qdech_n_15_u32: -+** uqdech w0, all, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_n_15_u32, uint32_t, -+ x0 = svqdech_n_u32 (x0, 15), -+ x0 = svqdech (x0, 15)) -+ -+/* -+** qdech_n_16_u32: -+** uqdech w0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_n_16_u32, uint32_t, -+ x0 = svqdech_n_u32 (x0, 16), -+ x0 = svqdech (x0, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_u64.c -new file mode 100644 -index 000000000..da2f051af ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdech_u64.c -@@ -0,0 +1,58 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qdech_n_1_u64_tied: -+** uqdech x0 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_n_1_u64_tied, uint64_t, -+ x0 = svqdech_n_u64 (x0, 1), -+ x0 = svqdech (x0, 1)) -+ -+/* -+** qdech_n_1_u64_untied: -+** mov x0, x1 -+** uqdech x0 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_n_1_u64_untied, uint64_t, -+ x0 = svqdech_n_u64 (x1, 1), -+ x0 = svqdech (x1, 1)) -+ -+/* -+** qdech_n_2_u64: -+** uqdech x0, all, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_n_2_u64, uint64_t, -+ x0 = svqdech_n_u64 (x0, 2), -+ x0 = svqdech (x0, 2)) -+ -+/* -+** qdech_n_7_u64: -+** uqdech x0, all, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_n_7_u64, uint64_t, -+ x0 = svqdech_n_u64 (x0, 7), -+ x0 = svqdech (x0, 7)) -+ -+/* -+** qdech_n_15_u64: -+** uqdech x0, all, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_n_15_u64, uint64_t, -+ x0 = svqdech_n_u64 (x0, 15), -+ x0 = svqdech (x0, 15)) -+ -+/* -+** qdech_n_16_u64: -+** uqdech x0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdech_n_16_u64, uint64_t, -+ x0 = svqdech_n_u64 (x0, 16), -+ x0 = svqdech (x0, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecp_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecp_s16.c -new file mode 100644 -index 000000000..71b40c152 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecp_s16.c -@@ -0,0 +1,22 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qdecp_s16_tied: -+** sqdecp z0\.h, p0 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecp_s16_tied, svint16_t, -+ z0 = svqdecp_s16 (z0, p0), -+ z0 = svqdecp (z0, p0)) -+ -+/* -+** qdecp_s16_untied: -+** movprfx z0, z1 -+** sqdecp z0\.h, p0 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecp_s16_untied, svint16_t, -+ z0 = svqdecp_s16 (z1, p0), -+ z0 = svqdecp (z1, p0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecp_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecp_s32.c -new file mode 100644 -index 000000000..55e4067d1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecp_s32.c -@@ -0,0 +1,98 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qdecp_s32_tied: -+** sqdecp z0\.s, p0 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecp_s32_tied, svint32_t, -+ z0 = svqdecp_s32 (z0, p0), -+ z0 = svqdecp (z0, p0)) -+ -+/* -+** qdecp_s32_untied: -+** movprfx z0, z1 -+** sqdecp z0\.s, p0 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecp_s32_untied, svint32_t, -+ z0 = svqdecp_s32 (z1, p0), -+ z0 = svqdecp (z1, p0)) -+ -+/* -+** qdecp_n_s32_b8_tied: -+** sqdecp x0, p0\.b, w0 -+** ret -+*/ -+TEST_UNIFORM_S (qdecp_n_s32_b8_tied, int32_t, -+ x0 = svqdecp_n_s32_b8 (x0, p0), -+ x0 = svqdecp_b8 (x0, p0)) -+ -+/* -+** qdecp_n_s32_b8_untied: -+** mov w0, w1 -+** sqdecp x0, p0\.b, w0 -+** ret -+*/ -+TEST_UNIFORM_S (qdecp_n_s32_b8_untied, int32_t, -+ x0 = svqdecp_n_s32_b8 (x1, p0), -+ x0 = svqdecp_b8 (x1, p0)) -+ -+/* -+** qdecp_n_s32_b16_tied: -+** sqdecp x0, p0\.h, w0 -+** ret -+*/ -+TEST_UNIFORM_S (qdecp_n_s32_b16_tied, int32_t, -+ x0 = svqdecp_n_s32_b16 (x0, p0), -+ x0 = svqdecp_b16 (x0, p0)) -+ -+/* -+** qdecp_n_s32_b16_untied: -+** mov w0, w1 -+** sqdecp x0, p0\.h, w0 -+** ret -+*/ -+TEST_UNIFORM_S (qdecp_n_s32_b16_untied, int32_t, -+ x0 = svqdecp_n_s32_b16 (x1, p0), -+ x0 = svqdecp_b16 (x1, p0)) -+ -+/* -+** qdecp_n_s32_b32_tied: -+** sqdecp x0, p0\.s, w0 -+** ret -+*/ -+TEST_UNIFORM_S (qdecp_n_s32_b32_tied, int32_t, -+ x0 = svqdecp_n_s32_b32 (x0, p0), -+ x0 = svqdecp_b32 (x0, p0)) -+ -+/* -+** qdecp_n_s32_b32_untied: -+** mov w0, w1 -+** sqdecp x0, p0\.s, w0 -+** ret -+*/ -+TEST_UNIFORM_S (qdecp_n_s32_b32_untied, int32_t, -+ x0 = svqdecp_n_s32_b32 (x1, p0), -+ x0 = svqdecp_b32 (x1, p0)) -+ -+/* -+** qdecp_n_s32_b64_tied: -+** sqdecp x0, p0\.d, w0 -+** ret -+*/ -+TEST_UNIFORM_S (qdecp_n_s32_b64_tied, int32_t, -+ x0 = svqdecp_n_s32_b64 (x0, p0), -+ x0 = svqdecp_b64 (x0, p0)) -+ -+/* -+** qdecp_n_s32_b64_untied: -+** mov w0, w1 -+** sqdecp x0, p0\.d, w0 -+** ret -+*/ -+TEST_UNIFORM_S (qdecp_n_s32_b64_untied, int32_t, -+ x0 = svqdecp_n_s32_b64 (x1, p0), -+ x0 = svqdecp_b64 (x1, p0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecp_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecp_s64.c -new file mode 100644 -index 000000000..9527999c8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecp_s64.c -@@ -0,0 +1,98 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qdecp_s64_tied: -+** sqdecp z0\.d, p0 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecp_s64_tied, svint64_t, -+ z0 = svqdecp_s64 (z0, p0), -+ z0 = svqdecp (z0, p0)) -+ -+/* -+** qdecp_s64_untied: -+** movprfx z0, z1 -+** sqdecp z0\.d, p0 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecp_s64_untied, svint64_t, -+ z0 = svqdecp_s64 (z1, p0), -+ z0 = svqdecp (z1, p0)) -+ -+/* -+** qdecp_n_s64_b8_tied: -+** sqdecp x0, p0\.b -+** ret -+*/ -+TEST_UNIFORM_S (qdecp_n_s64_b8_tied, int64_t, -+ x0 = svqdecp_n_s64_b8 (x0, p0), -+ x0 = svqdecp_b8 (x0, p0)) -+ -+/* -+** qdecp_n_s64_b8_untied: -+** mov x0, x1 -+** sqdecp x0, p0\.b -+** ret -+*/ -+TEST_UNIFORM_S (qdecp_n_s64_b8_untied, int64_t, -+ x0 = svqdecp_n_s64_b8 (x1, p0), -+ x0 = svqdecp_b8 (x1, p0)) -+ -+/* -+** qdecp_n_s64_b16_tied: -+** sqdecp x0, p0\.h -+** ret -+*/ -+TEST_UNIFORM_S (qdecp_n_s64_b16_tied, int64_t, -+ x0 = svqdecp_n_s64_b16 (x0, p0), -+ x0 = svqdecp_b16 (x0, p0)) -+ -+/* -+** qdecp_n_s64_b16_untied: -+** mov x0, x1 -+** sqdecp x0, p0\.h -+** ret -+*/ -+TEST_UNIFORM_S (qdecp_n_s64_b16_untied, int64_t, -+ x0 = svqdecp_n_s64_b16 (x1, p0), -+ x0 = svqdecp_b16 (x1, p0)) -+ -+/* -+** qdecp_n_s64_b32_tied: -+** sqdecp x0, p0\.s -+** ret -+*/ -+TEST_UNIFORM_S (qdecp_n_s64_b32_tied, int64_t, -+ x0 = svqdecp_n_s64_b32 (x0, p0), -+ x0 = svqdecp_b32 (x0, p0)) -+ -+/* -+** qdecp_n_s64_b32_untied: -+** mov x0, x1 -+** sqdecp x0, p0\.s -+** ret -+*/ -+TEST_UNIFORM_S (qdecp_n_s64_b32_untied, int64_t, -+ x0 = svqdecp_n_s64_b32 (x1, p0), -+ x0 = svqdecp_b32 (x1, p0)) -+ -+/* -+** qdecp_n_s64_b64_tied: -+** sqdecp x0, p0\.d -+** ret -+*/ -+TEST_UNIFORM_S (qdecp_n_s64_b64_tied, int64_t, -+ x0 = svqdecp_n_s64_b64 (x0, p0), -+ x0 = svqdecp_b64 (x0, p0)) -+ -+/* -+** qdecp_n_s64_b64_untied: -+** mov x0, x1 -+** sqdecp x0, p0\.d -+** ret -+*/ -+TEST_UNIFORM_S (qdecp_n_s64_b64_untied, int64_t, -+ x0 = svqdecp_n_s64_b64 (x1, p0), -+ x0 = svqdecp_b64 (x1, p0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecp_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecp_u16.c -new file mode 100644 -index 000000000..33357ada4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecp_u16.c -@@ -0,0 +1,22 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qdecp_u16_tied: -+** uqdecp z0\.h, p0 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecp_u16_tied, svuint16_t, -+ z0 = svqdecp_u16 (z0, p0), -+ z0 = svqdecp (z0, p0)) -+ -+/* -+** qdecp_u16_untied: -+** movprfx z0, z1 -+** uqdecp z0\.h, p0 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecp_u16_untied, svuint16_t, -+ z0 = svqdecp_u16 (z1, p0), -+ z0 = svqdecp (z1, p0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecp_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecp_u32.c -new file mode 100644 -index 000000000..58e9a642e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecp_u32.c -@@ -0,0 +1,98 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qdecp_u32_tied: -+** uqdecp z0\.s, p0 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecp_u32_tied, svuint32_t, -+ z0 = svqdecp_u32 (z0, p0), -+ z0 = svqdecp (z0, p0)) -+ -+/* -+** qdecp_u32_untied: -+** movprfx z0, z1 -+** uqdecp z0\.s, p0 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecp_u32_untied, svuint32_t, -+ z0 = svqdecp_u32 (z1, p0), -+ z0 = svqdecp (z1, p0)) -+ -+/* -+** qdecp_n_u32_b8_tied: -+** uqdecp w0, p0\.b -+** ret -+*/ -+TEST_UNIFORM_S (qdecp_n_u32_b8_tied, uint32_t, -+ x0 = svqdecp_n_u32_b8 (x0, p0), -+ x0 = svqdecp_b8 (x0, p0)) -+ -+/* -+** qdecp_n_u32_b8_untied: -+** mov w0, w1 -+** uqdecp w0, p0\.b -+** ret -+*/ -+TEST_UNIFORM_S (qdecp_n_u32_b8_untied, uint32_t, -+ x0 = svqdecp_n_u32_b8 (x1, p0), -+ x0 = svqdecp_b8 (x1, p0)) -+ -+/* -+** qdecp_n_u32_b16_tied: -+** uqdecp w0, p0\.h -+** ret -+*/ -+TEST_UNIFORM_S (qdecp_n_u32_b16_tied, uint32_t, -+ x0 = svqdecp_n_u32_b16 (x0, p0), -+ x0 = svqdecp_b16 (x0, p0)) -+ -+/* -+** qdecp_n_u32_b16_untied: -+** mov w0, w1 -+** uqdecp w0, p0\.h -+** ret -+*/ -+TEST_UNIFORM_S (qdecp_n_u32_b16_untied, uint32_t, -+ x0 = svqdecp_n_u32_b16 (x1, p0), -+ x0 = svqdecp_b16 (x1, p0)) -+ -+/* -+** qdecp_n_u32_b32_tied: -+** uqdecp w0, p0\.s -+** ret -+*/ -+TEST_UNIFORM_S (qdecp_n_u32_b32_tied, uint32_t, -+ x0 = svqdecp_n_u32_b32 (x0, p0), -+ x0 = svqdecp_b32 (x0, p0)) -+ -+/* -+** qdecp_n_u32_b32_untied: -+** mov w0, w1 -+** uqdecp w0, p0\.s -+** ret -+*/ -+TEST_UNIFORM_S (qdecp_n_u32_b32_untied, uint32_t, -+ x0 = svqdecp_n_u32_b32 (x1, p0), -+ x0 = svqdecp_b32 (x1, p0)) -+ -+/* -+** qdecp_n_u32_b64_tied: -+** uqdecp w0, p0\.d -+** ret -+*/ -+TEST_UNIFORM_S (qdecp_n_u32_b64_tied, uint32_t, -+ x0 = svqdecp_n_u32_b64 (x0, p0), -+ x0 = svqdecp_b64 (x0, p0)) -+ -+/* -+** qdecp_n_u32_b64_untied: -+** mov w0, w1 -+** uqdecp w0, p0\.d -+** ret -+*/ -+TEST_UNIFORM_S (qdecp_n_u32_b64_untied, uint32_t, -+ x0 = svqdecp_n_u32_b64 (x1, p0), -+ x0 = svqdecp_b64 (x1, p0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecp_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecp_u64.c -new file mode 100644 -index 000000000..e2091d8ae ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecp_u64.c -@@ -0,0 +1,98 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qdecp_u64_tied: -+** uqdecp z0\.d, p0 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecp_u64_tied, svuint64_t, -+ z0 = svqdecp_u64 (z0, p0), -+ z0 = svqdecp (z0, p0)) -+ -+/* -+** qdecp_u64_untied: -+** movprfx z0, z1 -+** uqdecp z0\.d, p0 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecp_u64_untied, svuint64_t, -+ z0 = svqdecp_u64 (z1, p0), -+ z0 = svqdecp (z1, p0)) -+ -+/* -+** qdecp_n_u64_b8_tied: -+** uqdecp x0, p0\.b -+** ret -+*/ -+TEST_UNIFORM_S (qdecp_n_u64_b8_tied, uint64_t, -+ x0 = svqdecp_n_u64_b8 (x0, p0), -+ x0 = svqdecp_b8 (x0, p0)) -+ -+/* -+** qdecp_n_u64_b8_untied: -+** mov x0, x1 -+** uqdecp x0, p0\.b -+** ret -+*/ -+TEST_UNIFORM_S (qdecp_n_u64_b8_untied, uint64_t, -+ x0 = svqdecp_n_u64_b8 (x1, p0), -+ x0 = svqdecp_b8 (x1, p0)) -+ -+/* -+** qdecp_n_u64_b16_tied: -+** uqdecp x0, p0\.h -+** ret -+*/ -+TEST_UNIFORM_S (qdecp_n_u64_b16_tied, uint64_t, -+ x0 = svqdecp_n_u64_b16 (x0, p0), -+ x0 = svqdecp_b16 (x0, p0)) -+ -+/* -+** qdecp_n_u64_b16_untied: -+** mov x0, x1 -+** uqdecp x0, p0\.h -+** ret -+*/ -+TEST_UNIFORM_S (qdecp_n_u64_b16_untied, uint64_t, -+ x0 = svqdecp_n_u64_b16 (x1, p0), -+ x0 = svqdecp_b16 (x1, p0)) -+ -+/* -+** qdecp_n_u64_b32_tied: -+** uqdecp x0, p0\.s -+** ret -+*/ -+TEST_UNIFORM_S (qdecp_n_u64_b32_tied, uint64_t, -+ x0 = svqdecp_n_u64_b32 (x0, p0), -+ x0 = svqdecp_b32 (x0, p0)) -+ -+/* -+** qdecp_n_u64_b32_untied: -+** mov x0, x1 -+** uqdecp x0, p0\.s -+** ret -+*/ -+TEST_UNIFORM_S (qdecp_n_u64_b32_untied, uint64_t, -+ x0 = svqdecp_n_u64_b32 (x1, p0), -+ x0 = svqdecp_b32 (x1, p0)) -+ -+/* -+** qdecp_n_u64_b64_tied: -+** uqdecp x0, p0\.d -+** ret -+*/ -+TEST_UNIFORM_S (qdecp_n_u64_b64_tied, uint64_t, -+ x0 = svqdecp_n_u64_b64 (x0, p0), -+ x0 = svqdecp_b64 (x0, p0)) -+ -+/* -+** qdecp_n_u64_b64_untied: -+** mov x0, x1 -+** uqdecp x0, p0\.d -+** ret -+*/ -+TEST_UNIFORM_S (qdecp_n_u64_b64_untied, uint64_t, -+ x0 = svqdecp_n_u64_b64 (x1, p0), -+ x0 = svqdecp_b64 (x1, p0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecw_pat_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecw_pat_s32.c -new file mode 100644 -index 000000000..d80f7be4d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecw_pat_s32.c -@@ -0,0 +1,401 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qdecw_pat_1_s32_tied: -+** sqdecw z0\.s, pow2 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_pat_1_s32_tied, svint32_t, -+ z0 = svqdecw_pat_s32 (z0, SV_POW2, 1), -+ z0 = svqdecw_pat (z0, SV_POW2, 1)) -+ -+/* -+** qdecw_pat_1_s32_untied: -+** movprfx z0, z1 -+** sqdecw z0\.s, pow2 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_pat_1_s32_untied, svint32_t, -+ z0 = svqdecw_pat_s32 (z1, SV_POW2, 1), -+ z0 = svqdecw_pat (z1, SV_POW2, 1)) -+ -+/* -+** qdecw_pat_2_s32: -+** sqdecw z0\.s, pow2, mul #2 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_pat_2_s32, svint32_t, -+ z0 = svqdecw_pat_s32 (z0, SV_POW2, 2), -+ z0 = svqdecw_pat (z0, SV_POW2, 2)) -+ -+/* -+** qdecw_pat_7_s32: -+** sqdecw z0\.s, pow2, mul #7 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_pat_7_s32, svint32_t, -+ z0 = svqdecw_pat_s32 (z0, SV_POW2, 7), -+ z0 = svqdecw_pat (z0, SV_POW2, 7)) -+ -+/* -+** qdecw_pat_15_s32: -+** sqdecw z0\.s, pow2, mul #15 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_pat_15_s32, svint32_t, -+ z0 = svqdecw_pat_s32 (z0, SV_POW2, 15), -+ z0 = svqdecw_pat (z0, SV_POW2, 15)) -+ -+/* -+** qdecw_pat_16_s32: -+** sqdecw z0\.s, pow2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_pat_16_s32, svint32_t, -+ z0 = svqdecw_pat_s32 (z0, SV_POW2, 16), -+ z0 = svqdecw_pat (z0, SV_POW2, 16)) -+ -+/* -+** qdecw_pat_vl1_s32: -+** sqdecw z0\.s, vl1, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_pat_vl1_s32, svint32_t, -+ z0 = svqdecw_pat_s32 (z0, SV_VL1, 16), -+ z0 = svqdecw_pat (z0, SV_VL1, 16)) -+ -+/* -+** qdecw_pat_vl2_s32: -+** sqdecw z0\.s, vl2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_pat_vl2_s32, svint32_t, -+ z0 = svqdecw_pat_s32 (z0, SV_VL2, 16), -+ z0 = svqdecw_pat (z0, SV_VL2, 16)) -+ -+/* -+** qdecw_pat_vl3_s32: -+** sqdecw z0\.s, vl3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_pat_vl3_s32, svint32_t, -+ z0 = svqdecw_pat_s32 (z0, SV_VL3, 16), -+ z0 = svqdecw_pat (z0, SV_VL3, 16)) -+ -+/* -+** qdecw_pat_vl4_s32: -+** sqdecw z0\.s, vl4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_pat_vl4_s32, svint32_t, -+ z0 = svqdecw_pat_s32 (z0, SV_VL4, 16), -+ z0 = svqdecw_pat (z0, SV_VL4, 16)) -+ -+/* -+** qdecw_pat_vl5_s32: -+** sqdecw z0\.s, vl5, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_pat_vl5_s32, svint32_t, -+ z0 = svqdecw_pat_s32 (z0, SV_VL5, 16), -+ z0 = svqdecw_pat (z0, SV_VL5, 16)) -+ -+/* -+** qdecw_pat_vl6_s32: -+** sqdecw z0\.s, vl6, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_pat_vl6_s32, svint32_t, -+ z0 = svqdecw_pat_s32 (z0, SV_VL6, 16), -+ z0 = svqdecw_pat (z0, SV_VL6, 16)) -+ -+/* -+** qdecw_pat_vl7_s32: -+** sqdecw z0\.s, vl7, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_pat_vl7_s32, svint32_t, -+ z0 = svqdecw_pat_s32 (z0, SV_VL7, 16), -+ z0 = svqdecw_pat (z0, SV_VL7, 16)) -+ -+/* -+** qdecw_pat_vl8_s32: -+** sqdecw z0\.s, vl8, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_pat_vl8_s32, svint32_t, -+ z0 = svqdecw_pat_s32 (z0, SV_VL8, 16), -+ z0 = svqdecw_pat (z0, SV_VL8, 16)) -+ -+/* -+** qdecw_pat_vl16_s32: -+** sqdecw z0\.s, vl16, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_pat_vl16_s32, svint32_t, -+ z0 = svqdecw_pat_s32 (z0, SV_VL16, 16), -+ z0 = svqdecw_pat (z0, SV_VL16, 16)) -+ -+/* -+** qdecw_pat_vl32_s32: -+** sqdecw z0\.s, vl32, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_pat_vl32_s32, svint32_t, -+ z0 = svqdecw_pat_s32 (z0, SV_VL32, 16), -+ z0 = svqdecw_pat (z0, SV_VL32, 16)) -+ -+/* -+** qdecw_pat_vl64_s32: -+** sqdecw z0\.s, vl64, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_pat_vl64_s32, svint32_t, -+ z0 = svqdecw_pat_s32 (z0, SV_VL64, 16), -+ z0 = svqdecw_pat (z0, SV_VL64, 16)) -+ -+/* -+** qdecw_pat_vl128_s32: -+** sqdecw z0\.s, vl128, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_pat_vl128_s32, svint32_t, -+ z0 = svqdecw_pat_s32 (z0, SV_VL128, 16), -+ z0 = svqdecw_pat (z0, SV_VL128, 16)) -+ -+/* -+** qdecw_pat_vl256_s32: -+** sqdecw z0\.s, vl256, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_pat_vl256_s32, svint32_t, -+ z0 = svqdecw_pat_s32 (z0, SV_VL256, 16), -+ z0 = svqdecw_pat (z0, SV_VL256, 16)) -+ -+/* -+** qdecw_pat_mul4_s32: -+** sqdecw z0\.s, mul4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_pat_mul4_s32, svint32_t, -+ z0 = svqdecw_pat_s32 (z0, SV_MUL4, 16), -+ z0 = svqdecw_pat (z0, SV_MUL4, 16)) -+ -+/* -+** qdecw_pat_mul3_s32: -+** sqdecw z0\.s, mul3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_pat_mul3_s32, svint32_t, -+ z0 = svqdecw_pat_s32 (z0, SV_MUL3, 16), -+ z0 = svqdecw_pat (z0, SV_MUL3, 16)) -+ -+/* -+** qdecw_pat_all_s32: -+** sqdecw z0\.s, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_pat_all_s32, svint32_t, -+ z0 = svqdecw_pat_s32 (z0, SV_ALL, 16), -+ z0 = svqdecw_pat (z0, SV_ALL, 16)) -+ -+/* -+** qdecw_pat_n_1_s32_tied: -+** sqdecw x0, w0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_1_s32_tied, int32_t, -+ x0 = svqdecw_pat_n_s32 (x0, SV_POW2, 1), -+ x0 = svqdecw_pat (x0, SV_POW2, 1)) -+ -+/* -+** qdecw_pat_n_1_s32_untied: -+** mov w0, w1 -+** sqdecw x0, w0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_1_s32_untied, int32_t, -+ x0 = svqdecw_pat_n_s32 (x1, SV_POW2, 1), -+ x0 = svqdecw_pat (x1, SV_POW2, 1)) -+ -+/* -+** qdecw_pat_n_2_s32: -+** sqdecw x0, w0, pow2, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_2_s32, int32_t, -+ x0 = svqdecw_pat_n_s32 (x0, SV_POW2, 2), -+ x0 = svqdecw_pat (x0, SV_POW2, 2)) -+ -+/* -+** qdecw_pat_n_7_s32: -+** sqdecw x0, w0, pow2, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_7_s32, int32_t, -+ x0 = svqdecw_pat_n_s32 (x0, SV_POW2, 7), -+ x0 = svqdecw_pat (x0, SV_POW2, 7)) -+ -+/* -+** qdecw_pat_n_15_s32: -+** sqdecw x0, w0, pow2, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_15_s32, int32_t, -+ x0 = svqdecw_pat_n_s32 (x0, SV_POW2, 15), -+ x0 = svqdecw_pat (x0, SV_POW2, 15)) -+ -+/* -+** qdecw_pat_n_16_s32: -+** sqdecw x0, w0, pow2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_16_s32, int32_t, -+ x0 = svqdecw_pat_n_s32 (x0, SV_POW2, 16), -+ x0 = svqdecw_pat (x0, SV_POW2, 16)) -+ -+/* -+** qdecw_pat_n_vl1_s32: -+** sqdecw x0, w0, vl1, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl1_s32, int32_t, -+ x0 = svqdecw_pat_n_s32 (x0, SV_VL1, 16), -+ x0 = svqdecw_pat (x0, SV_VL1, 16)) -+ -+/* -+** qdecw_pat_n_vl2_s32: -+** sqdecw x0, w0, vl2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl2_s32, int32_t, -+ x0 = svqdecw_pat_n_s32 (x0, SV_VL2, 16), -+ x0 = svqdecw_pat (x0, SV_VL2, 16)) -+ -+/* -+** qdecw_pat_n_vl3_s32: -+** sqdecw x0, w0, vl3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl3_s32, int32_t, -+ x0 = svqdecw_pat_n_s32 (x0, SV_VL3, 16), -+ x0 = svqdecw_pat (x0, SV_VL3, 16)) -+ -+/* -+** qdecw_pat_n_vl4_s32: -+** sqdecw x0, w0, vl4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl4_s32, int32_t, -+ x0 = svqdecw_pat_n_s32 (x0, SV_VL4, 16), -+ x0 = svqdecw_pat (x0, SV_VL4, 16)) -+ -+/* -+** qdecw_pat_n_vl5_s32: -+** sqdecw x0, w0, vl5, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl5_s32, int32_t, -+ x0 = svqdecw_pat_n_s32 (x0, SV_VL5, 16), -+ x0 = svqdecw_pat (x0, SV_VL5, 16)) -+ -+/* -+** qdecw_pat_n_vl6_s32: -+** sqdecw x0, w0, vl6, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl6_s32, int32_t, -+ x0 = svqdecw_pat_n_s32 (x0, SV_VL6, 16), -+ x0 = svqdecw_pat (x0, SV_VL6, 16)) -+ -+/* -+** qdecw_pat_n_vl7_s32: -+** sqdecw x0, w0, vl7, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl7_s32, int32_t, -+ x0 = svqdecw_pat_n_s32 (x0, SV_VL7, 16), -+ x0 = svqdecw_pat (x0, SV_VL7, 16)) -+ -+/* -+** qdecw_pat_n_vl8_s32: -+** sqdecw x0, w0, vl8, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl8_s32, int32_t, -+ x0 = svqdecw_pat_n_s32 (x0, SV_VL8, 16), -+ x0 = svqdecw_pat (x0, SV_VL8, 16)) -+ -+/* -+** qdecw_pat_n_vl16_s32: -+** sqdecw x0, w0, vl16, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl16_s32, int32_t, -+ x0 = svqdecw_pat_n_s32 (x0, SV_VL16, 16), -+ x0 = svqdecw_pat (x0, SV_VL16, 16)) -+ -+/* -+** qdecw_pat_n_vl32_s32: -+** sqdecw x0, w0, vl32, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl32_s32, int32_t, -+ x0 = svqdecw_pat_n_s32 (x0, SV_VL32, 16), -+ x0 = svqdecw_pat (x0, SV_VL32, 16)) -+ -+/* -+** qdecw_pat_n_vl64_s32: -+** sqdecw x0, w0, vl64, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl64_s32, int32_t, -+ x0 = svqdecw_pat_n_s32 (x0, SV_VL64, 16), -+ x0 = svqdecw_pat (x0, SV_VL64, 16)) -+ -+/* -+** qdecw_pat_n_vl128_s32: -+** sqdecw x0, w0, vl128, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl128_s32, int32_t, -+ x0 = svqdecw_pat_n_s32 (x0, SV_VL128, 16), -+ x0 = svqdecw_pat (x0, SV_VL128, 16)) -+ -+/* -+** qdecw_pat_n_vl256_s32: -+** sqdecw x0, w0, vl256, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl256_s32, int32_t, -+ x0 = svqdecw_pat_n_s32 (x0, SV_VL256, 16), -+ x0 = svqdecw_pat (x0, SV_VL256, 16)) -+ -+/* -+** qdecw_pat_n_mul4_s32: -+** sqdecw x0, w0, mul4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_mul4_s32, int32_t, -+ x0 = svqdecw_pat_n_s32 (x0, SV_MUL4, 16), -+ x0 = svqdecw_pat (x0, SV_MUL4, 16)) -+ -+/* -+** qdecw_pat_n_mul3_s32: -+** sqdecw x0, w0, mul3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_mul3_s32, int32_t, -+ x0 = svqdecw_pat_n_s32 (x0, SV_MUL3, 16), -+ x0 = svqdecw_pat (x0, SV_MUL3, 16)) -+ -+/* -+** qdecw_pat_n_all_s32: -+** sqdecw x0, w0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_all_s32, int32_t, -+ x0 = svqdecw_pat_n_s32 (x0, SV_ALL, 16), -+ x0 = svqdecw_pat (x0, SV_ALL, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecw_pat_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecw_pat_s64.c -new file mode 100644 -index 000000000..9c684a7c7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecw_pat_s64.c -@@ -0,0 +1,202 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qdecw_pat_n_1_s64_tied: -+** sqdecw x0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_1_s64_tied, int64_t, -+ x0 = svqdecw_pat_n_s64 (x0, SV_POW2, 1), -+ x0 = svqdecw_pat (x0, SV_POW2, 1)) -+ -+/* -+** qdecw_pat_n_1_s64_untied: -+** mov x0, x1 -+** sqdecw x0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_1_s64_untied, int64_t, -+ x0 = svqdecw_pat_n_s64 (x1, SV_POW2, 1), -+ x0 = svqdecw_pat (x1, SV_POW2, 1)) -+ -+/* -+** qdecw_pat_n_2_s64: -+** sqdecw x0, pow2, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_2_s64, int64_t, -+ x0 = svqdecw_pat_n_s64 (x0, SV_POW2, 2), -+ x0 = svqdecw_pat (x0, SV_POW2, 2)) -+ -+/* -+** qdecw_pat_n_7_s64: -+** sqdecw x0, pow2, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_7_s64, int64_t, -+ x0 = svqdecw_pat_n_s64 (x0, SV_POW2, 7), -+ x0 = svqdecw_pat (x0, SV_POW2, 7)) -+ -+/* -+** qdecw_pat_n_15_s64: -+** sqdecw x0, pow2, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_15_s64, int64_t, -+ x0 = svqdecw_pat_n_s64 (x0, SV_POW2, 15), -+ x0 = svqdecw_pat (x0, SV_POW2, 15)) -+ -+/* -+** qdecw_pat_n_16_s64: -+** sqdecw x0, pow2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_16_s64, int64_t, -+ x0 = svqdecw_pat_n_s64 (x0, SV_POW2, 16), -+ x0 = svqdecw_pat (x0, SV_POW2, 16)) -+ -+/* -+** qdecw_pat_n_vl1_s64: -+** sqdecw x0, vl1, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl1_s64, int64_t, -+ x0 = svqdecw_pat_n_s64 (x0, SV_VL1, 16), -+ x0 = svqdecw_pat (x0, SV_VL1, 16)) -+ -+/* -+** qdecw_pat_n_vl2_s64: -+** sqdecw x0, vl2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl2_s64, int64_t, -+ x0 = svqdecw_pat_n_s64 (x0, SV_VL2, 16), -+ x0 = svqdecw_pat (x0, SV_VL2, 16)) -+ -+/* -+** qdecw_pat_n_vl3_s64: -+** sqdecw x0, vl3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl3_s64, int64_t, -+ x0 = svqdecw_pat_n_s64 (x0, SV_VL3, 16), -+ x0 = svqdecw_pat (x0, SV_VL3, 16)) -+ -+/* -+** qdecw_pat_n_vl4_s64: -+** sqdecw x0, vl4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl4_s64, int64_t, -+ x0 = svqdecw_pat_n_s64 (x0, SV_VL4, 16), -+ x0 = svqdecw_pat (x0, SV_VL4, 16)) -+ -+/* -+** qdecw_pat_n_vl5_s64: -+** sqdecw x0, vl5, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl5_s64, int64_t, -+ x0 = svqdecw_pat_n_s64 (x0, SV_VL5, 16), -+ x0 = svqdecw_pat (x0, SV_VL5, 16)) -+ -+/* -+** qdecw_pat_n_vl6_s64: -+** sqdecw x0, vl6, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl6_s64, int64_t, -+ x0 = svqdecw_pat_n_s64 (x0, SV_VL6, 16), -+ x0 = svqdecw_pat (x0, SV_VL6, 16)) -+ -+/* -+** qdecw_pat_n_vl7_s64: -+** sqdecw x0, vl7, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl7_s64, int64_t, -+ x0 = svqdecw_pat_n_s64 (x0, SV_VL7, 16), -+ x0 = svqdecw_pat (x0, SV_VL7, 16)) -+ -+/* -+** qdecw_pat_n_vl8_s64: -+** sqdecw x0, vl8, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl8_s64, int64_t, -+ x0 = svqdecw_pat_n_s64 (x0, SV_VL8, 16), -+ x0 = svqdecw_pat (x0, SV_VL8, 16)) -+ -+/* -+** qdecw_pat_n_vl16_s64: -+** sqdecw x0, vl16, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl16_s64, int64_t, -+ x0 = svqdecw_pat_n_s64 (x0, SV_VL16, 16), -+ x0 = svqdecw_pat (x0, SV_VL16, 16)) -+ -+/* -+** qdecw_pat_n_vl32_s64: -+** sqdecw x0, vl32, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl32_s64, int64_t, -+ x0 = svqdecw_pat_n_s64 (x0, SV_VL32, 16), -+ x0 = svqdecw_pat (x0, SV_VL32, 16)) -+ -+/* -+** qdecw_pat_n_vl64_s64: -+** sqdecw x0, vl64, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl64_s64, int64_t, -+ x0 = svqdecw_pat_n_s64 (x0, SV_VL64, 16), -+ x0 = svqdecw_pat (x0, SV_VL64, 16)) -+ -+/* -+** qdecw_pat_n_vl128_s64: -+** sqdecw x0, vl128, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl128_s64, int64_t, -+ x0 = svqdecw_pat_n_s64 (x0, SV_VL128, 16), -+ x0 = svqdecw_pat (x0, SV_VL128, 16)) -+ -+/* -+** qdecw_pat_n_vl256_s64: -+** sqdecw x0, vl256, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl256_s64, int64_t, -+ x0 = svqdecw_pat_n_s64 (x0, SV_VL256, 16), -+ x0 = svqdecw_pat (x0, SV_VL256, 16)) -+ -+/* -+** qdecw_pat_n_mul4_s64: -+** sqdecw x0, mul4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_mul4_s64, int64_t, -+ x0 = svqdecw_pat_n_s64 (x0, SV_MUL4, 16), -+ x0 = svqdecw_pat (x0, SV_MUL4, 16)) -+ -+/* -+** qdecw_pat_n_mul3_s64: -+** sqdecw x0, mul3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_mul3_s64, int64_t, -+ x0 = svqdecw_pat_n_s64 (x0, SV_MUL3, 16), -+ x0 = svqdecw_pat (x0, SV_MUL3, 16)) -+ -+/* -+** qdecw_pat_n_all_s64: -+** sqdecw x0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_all_s64, int64_t, -+ x0 = svqdecw_pat_n_s64 (x0, SV_ALL, 16), -+ x0 = svqdecw_pat (x0, SV_ALL, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecw_pat_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecw_pat_u32.c -new file mode 100644 -index 000000000..8d3fcb473 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecw_pat_u32.c -@@ -0,0 +1,401 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qdecw_pat_1_u32_tied: -+** uqdecw z0\.s, pow2 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_pat_1_u32_tied, svuint32_t, -+ z0 = svqdecw_pat_u32 (z0, SV_POW2, 1), -+ z0 = svqdecw_pat (z0, SV_POW2, 1)) -+ -+/* -+** qdecw_pat_1_u32_untied: -+** movprfx z0, z1 -+** uqdecw z0\.s, pow2 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_pat_1_u32_untied, svuint32_t, -+ z0 = svqdecw_pat_u32 (z1, SV_POW2, 1), -+ z0 = svqdecw_pat (z1, SV_POW2, 1)) -+ -+/* -+** qdecw_pat_2_u32: -+** uqdecw z0\.s, pow2, mul #2 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_pat_2_u32, svuint32_t, -+ z0 = svqdecw_pat_u32 (z0, SV_POW2, 2), -+ z0 = svqdecw_pat (z0, SV_POW2, 2)) -+ -+/* -+** qdecw_pat_7_u32: -+** uqdecw z0\.s, pow2, mul #7 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_pat_7_u32, svuint32_t, -+ z0 = svqdecw_pat_u32 (z0, SV_POW2, 7), -+ z0 = svqdecw_pat (z0, SV_POW2, 7)) -+ -+/* -+** qdecw_pat_15_u32: -+** uqdecw z0\.s, pow2, mul #15 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_pat_15_u32, svuint32_t, -+ z0 = svqdecw_pat_u32 (z0, SV_POW2, 15), -+ z0 = svqdecw_pat (z0, SV_POW2, 15)) -+ -+/* -+** qdecw_pat_16_u32: -+** uqdecw z0\.s, pow2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_pat_16_u32, svuint32_t, -+ z0 = svqdecw_pat_u32 (z0, SV_POW2, 16), -+ z0 = svqdecw_pat (z0, SV_POW2, 16)) -+ -+/* -+** qdecw_pat_vl1_u32: -+** uqdecw z0\.s, vl1, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_pat_vl1_u32, svuint32_t, -+ z0 = svqdecw_pat_u32 (z0, SV_VL1, 16), -+ z0 = svqdecw_pat (z0, SV_VL1, 16)) -+ -+/* -+** qdecw_pat_vl2_u32: -+** uqdecw z0\.s, vl2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_pat_vl2_u32, svuint32_t, -+ z0 = svqdecw_pat_u32 (z0, SV_VL2, 16), -+ z0 = svqdecw_pat (z0, SV_VL2, 16)) -+ -+/* -+** qdecw_pat_vl3_u32: -+** uqdecw z0\.s, vl3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_pat_vl3_u32, svuint32_t, -+ z0 = svqdecw_pat_u32 (z0, SV_VL3, 16), -+ z0 = svqdecw_pat (z0, SV_VL3, 16)) -+ -+/* -+** qdecw_pat_vl4_u32: -+** uqdecw z0\.s, vl4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_pat_vl4_u32, svuint32_t, -+ z0 = svqdecw_pat_u32 (z0, SV_VL4, 16), -+ z0 = svqdecw_pat (z0, SV_VL4, 16)) -+ -+/* -+** qdecw_pat_vl5_u32: -+** uqdecw z0\.s, vl5, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_pat_vl5_u32, svuint32_t, -+ z0 = svqdecw_pat_u32 (z0, SV_VL5, 16), -+ z0 = svqdecw_pat (z0, SV_VL5, 16)) -+ -+/* -+** qdecw_pat_vl6_u32: -+** uqdecw z0\.s, vl6, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_pat_vl6_u32, svuint32_t, -+ z0 = svqdecw_pat_u32 (z0, SV_VL6, 16), -+ z0 = svqdecw_pat (z0, SV_VL6, 16)) -+ -+/* -+** qdecw_pat_vl7_u32: -+** uqdecw z0\.s, vl7, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_pat_vl7_u32, svuint32_t, -+ z0 = svqdecw_pat_u32 (z0, SV_VL7, 16), -+ z0 = svqdecw_pat (z0, SV_VL7, 16)) -+ -+/* -+** qdecw_pat_vl8_u32: -+** uqdecw z0\.s, vl8, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_pat_vl8_u32, svuint32_t, -+ z0 = svqdecw_pat_u32 (z0, SV_VL8, 16), -+ z0 = svqdecw_pat (z0, SV_VL8, 16)) -+ -+/* -+** qdecw_pat_vl16_u32: -+** uqdecw z0\.s, vl16, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_pat_vl16_u32, svuint32_t, -+ z0 = svqdecw_pat_u32 (z0, SV_VL16, 16), -+ z0 = svqdecw_pat (z0, SV_VL16, 16)) -+ -+/* -+** qdecw_pat_vl32_u32: -+** uqdecw z0\.s, vl32, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_pat_vl32_u32, svuint32_t, -+ z0 = svqdecw_pat_u32 (z0, SV_VL32, 16), -+ z0 = svqdecw_pat (z0, SV_VL32, 16)) -+ -+/* -+** qdecw_pat_vl64_u32: -+** uqdecw z0\.s, vl64, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_pat_vl64_u32, svuint32_t, -+ z0 = svqdecw_pat_u32 (z0, SV_VL64, 16), -+ z0 = svqdecw_pat (z0, SV_VL64, 16)) -+ -+/* -+** qdecw_pat_vl128_u32: -+** uqdecw z0\.s, vl128, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_pat_vl128_u32, svuint32_t, -+ z0 = svqdecw_pat_u32 (z0, SV_VL128, 16), -+ z0 = svqdecw_pat (z0, SV_VL128, 16)) -+ -+/* -+** qdecw_pat_vl256_u32: -+** uqdecw z0\.s, vl256, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_pat_vl256_u32, svuint32_t, -+ z0 = svqdecw_pat_u32 (z0, SV_VL256, 16), -+ z0 = svqdecw_pat (z0, SV_VL256, 16)) -+ -+/* -+** qdecw_pat_mul4_u32: -+** uqdecw z0\.s, mul4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_pat_mul4_u32, svuint32_t, -+ z0 = svqdecw_pat_u32 (z0, SV_MUL4, 16), -+ z0 = svqdecw_pat (z0, SV_MUL4, 16)) -+ -+/* -+** qdecw_pat_mul3_u32: -+** uqdecw z0\.s, mul3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_pat_mul3_u32, svuint32_t, -+ z0 = svqdecw_pat_u32 (z0, SV_MUL3, 16), -+ z0 = svqdecw_pat (z0, SV_MUL3, 16)) -+ -+/* -+** qdecw_pat_all_u32: -+** uqdecw z0\.s, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_pat_all_u32, svuint32_t, -+ z0 = svqdecw_pat_u32 (z0, SV_ALL, 16), -+ z0 = svqdecw_pat (z0, SV_ALL, 16)) -+ -+/* -+** qdecw_pat_n_1_u32_tied: -+** uqdecw w0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_1_u32_tied, uint32_t, -+ x0 = svqdecw_pat_n_u32 (x0, SV_POW2, 1), -+ x0 = svqdecw_pat (x0, SV_POW2, 1)) -+ -+/* -+** qdecw_pat_n_1_u32_untied: -+** mov w0, w1 -+** uqdecw w0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_1_u32_untied, uint32_t, -+ x0 = svqdecw_pat_n_u32 (x1, SV_POW2, 1), -+ x0 = svqdecw_pat (x1, SV_POW2, 1)) -+ -+/* -+** qdecw_pat_n_2_u32: -+** uqdecw w0, pow2, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_2_u32, uint32_t, -+ x0 = svqdecw_pat_n_u32 (x0, SV_POW2, 2), -+ x0 = svqdecw_pat (x0, SV_POW2, 2)) -+ -+/* -+** qdecw_pat_n_7_u32: -+** uqdecw w0, pow2, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_7_u32, uint32_t, -+ x0 = svqdecw_pat_n_u32 (x0, SV_POW2, 7), -+ x0 = svqdecw_pat (x0, SV_POW2, 7)) -+ -+/* -+** qdecw_pat_n_15_u32: -+** uqdecw w0, pow2, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_15_u32, uint32_t, -+ x0 = svqdecw_pat_n_u32 (x0, SV_POW2, 15), -+ x0 = svqdecw_pat (x0, SV_POW2, 15)) -+ -+/* -+** qdecw_pat_n_16_u32: -+** uqdecw w0, pow2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_16_u32, uint32_t, -+ x0 = svqdecw_pat_n_u32 (x0, SV_POW2, 16), -+ x0 = svqdecw_pat (x0, SV_POW2, 16)) -+ -+/* -+** qdecw_pat_n_vl1_u32: -+** uqdecw w0, vl1, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl1_u32, uint32_t, -+ x0 = svqdecw_pat_n_u32 (x0, SV_VL1, 16), -+ x0 = svqdecw_pat (x0, SV_VL1, 16)) -+ -+/* -+** qdecw_pat_n_vl2_u32: -+** uqdecw w0, vl2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl2_u32, uint32_t, -+ x0 = svqdecw_pat_n_u32 (x0, SV_VL2, 16), -+ x0 = svqdecw_pat (x0, SV_VL2, 16)) -+ -+/* -+** qdecw_pat_n_vl3_u32: -+** uqdecw w0, vl3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl3_u32, uint32_t, -+ x0 = svqdecw_pat_n_u32 (x0, SV_VL3, 16), -+ x0 = svqdecw_pat (x0, SV_VL3, 16)) -+ -+/* -+** qdecw_pat_n_vl4_u32: -+** uqdecw w0, vl4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl4_u32, uint32_t, -+ x0 = svqdecw_pat_n_u32 (x0, SV_VL4, 16), -+ x0 = svqdecw_pat (x0, SV_VL4, 16)) -+ -+/* -+** qdecw_pat_n_vl5_u32: -+** uqdecw w0, vl5, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl5_u32, uint32_t, -+ x0 = svqdecw_pat_n_u32 (x0, SV_VL5, 16), -+ x0 = svqdecw_pat (x0, SV_VL5, 16)) -+ -+/* -+** qdecw_pat_n_vl6_u32: -+** uqdecw w0, vl6, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl6_u32, uint32_t, -+ x0 = svqdecw_pat_n_u32 (x0, SV_VL6, 16), -+ x0 = svqdecw_pat (x0, SV_VL6, 16)) -+ -+/* -+** qdecw_pat_n_vl7_u32: -+** uqdecw w0, vl7, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl7_u32, uint32_t, -+ x0 = svqdecw_pat_n_u32 (x0, SV_VL7, 16), -+ x0 = svqdecw_pat (x0, SV_VL7, 16)) -+ -+/* -+** qdecw_pat_n_vl8_u32: -+** uqdecw w0, vl8, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl8_u32, uint32_t, -+ x0 = svqdecw_pat_n_u32 (x0, SV_VL8, 16), -+ x0 = svqdecw_pat (x0, SV_VL8, 16)) -+ -+/* -+** qdecw_pat_n_vl16_u32: -+** uqdecw w0, vl16, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl16_u32, uint32_t, -+ x0 = svqdecw_pat_n_u32 (x0, SV_VL16, 16), -+ x0 = svqdecw_pat (x0, SV_VL16, 16)) -+ -+/* -+** qdecw_pat_n_vl32_u32: -+** uqdecw w0, vl32, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl32_u32, uint32_t, -+ x0 = svqdecw_pat_n_u32 (x0, SV_VL32, 16), -+ x0 = svqdecw_pat (x0, SV_VL32, 16)) -+ -+/* -+** qdecw_pat_n_vl64_u32: -+** uqdecw w0, vl64, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl64_u32, uint32_t, -+ x0 = svqdecw_pat_n_u32 (x0, SV_VL64, 16), -+ x0 = svqdecw_pat (x0, SV_VL64, 16)) -+ -+/* -+** qdecw_pat_n_vl128_u32: -+** uqdecw w0, vl128, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl128_u32, uint32_t, -+ x0 = svqdecw_pat_n_u32 (x0, SV_VL128, 16), -+ x0 = svqdecw_pat (x0, SV_VL128, 16)) -+ -+/* -+** qdecw_pat_n_vl256_u32: -+** uqdecw w0, vl256, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl256_u32, uint32_t, -+ x0 = svqdecw_pat_n_u32 (x0, SV_VL256, 16), -+ x0 = svqdecw_pat (x0, SV_VL256, 16)) -+ -+/* -+** qdecw_pat_n_mul4_u32: -+** uqdecw w0, mul4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_mul4_u32, uint32_t, -+ x0 = svqdecw_pat_n_u32 (x0, SV_MUL4, 16), -+ x0 = svqdecw_pat (x0, SV_MUL4, 16)) -+ -+/* -+** qdecw_pat_n_mul3_u32: -+** uqdecw w0, mul3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_mul3_u32, uint32_t, -+ x0 = svqdecw_pat_n_u32 (x0, SV_MUL3, 16), -+ x0 = svqdecw_pat (x0, SV_MUL3, 16)) -+ -+/* -+** qdecw_pat_n_all_u32: -+** uqdecw w0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_all_u32, uint32_t, -+ x0 = svqdecw_pat_n_u32 (x0, SV_ALL, 16), -+ x0 = svqdecw_pat (x0, SV_ALL, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecw_pat_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecw_pat_u64.c -new file mode 100644 -index 000000000..015775b17 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecw_pat_u64.c -@@ -0,0 +1,202 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qdecw_pat_n_1_u64_tied: -+** uqdecw x0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_1_u64_tied, uint64_t, -+ x0 = svqdecw_pat_n_u64 (x0, SV_POW2, 1), -+ x0 = svqdecw_pat (x0, SV_POW2, 1)) -+ -+/* -+** qdecw_pat_n_1_u64_untied: -+** mov x0, x1 -+** uqdecw x0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_1_u64_untied, uint64_t, -+ x0 = svqdecw_pat_n_u64 (x1, SV_POW2, 1), -+ x0 = svqdecw_pat (x1, SV_POW2, 1)) -+ -+/* -+** qdecw_pat_n_2_u64: -+** uqdecw x0, pow2, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_2_u64, uint64_t, -+ x0 = svqdecw_pat_n_u64 (x0, SV_POW2, 2), -+ x0 = svqdecw_pat (x0, SV_POW2, 2)) -+ -+/* -+** qdecw_pat_n_7_u64: -+** uqdecw x0, pow2, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_7_u64, uint64_t, -+ x0 = svqdecw_pat_n_u64 (x0, SV_POW2, 7), -+ x0 = svqdecw_pat (x0, SV_POW2, 7)) -+ -+/* -+** qdecw_pat_n_15_u64: -+** uqdecw x0, pow2, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_15_u64, uint64_t, -+ x0 = svqdecw_pat_n_u64 (x0, SV_POW2, 15), -+ x0 = svqdecw_pat (x0, SV_POW2, 15)) -+ -+/* -+** qdecw_pat_n_16_u64: -+** uqdecw x0, pow2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_16_u64, uint64_t, -+ x0 = svqdecw_pat_n_u64 (x0, SV_POW2, 16), -+ x0 = svqdecw_pat (x0, SV_POW2, 16)) -+ -+/* -+** qdecw_pat_n_vl1_u64: -+** uqdecw x0, vl1, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl1_u64, uint64_t, -+ x0 = svqdecw_pat_n_u64 (x0, SV_VL1, 16), -+ x0 = svqdecw_pat (x0, SV_VL1, 16)) -+ -+/* -+** qdecw_pat_n_vl2_u64: -+** uqdecw x0, vl2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl2_u64, uint64_t, -+ x0 = svqdecw_pat_n_u64 (x0, SV_VL2, 16), -+ x0 = svqdecw_pat (x0, SV_VL2, 16)) -+ -+/* -+** qdecw_pat_n_vl3_u64: -+** uqdecw x0, vl3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl3_u64, uint64_t, -+ x0 = svqdecw_pat_n_u64 (x0, SV_VL3, 16), -+ x0 = svqdecw_pat (x0, SV_VL3, 16)) -+ -+/* -+** qdecw_pat_n_vl4_u64: -+** uqdecw x0, vl4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl4_u64, uint64_t, -+ x0 = svqdecw_pat_n_u64 (x0, SV_VL4, 16), -+ x0 = svqdecw_pat (x0, SV_VL4, 16)) -+ -+/* -+** qdecw_pat_n_vl5_u64: -+** uqdecw x0, vl5, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl5_u64, uint64_t, -+ x0 = svqdecw_pat_n_u64 (x0, SV_VL5, 16), -+ x0 = svqdecw_pat (x0, SV_VL5, 16)) -+ -+/* -+** qdecw_pat_n_vl6_u64: -+** uqdecw x0, vl6, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl6_u64, uint64_t, -+ x0 = svqdecw_pat_n_u64 (x0, SV_VL6, 16), -+ x0 = svqdecw_pat (x0, SV_VL6, 16)) -+ -+/* -+** qdecw_pat_n_vl7_u64: -+** uqdecw x0, vl7, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl7_u64, uint64_t, -+ x0 = svqdecw_pat_n_u64 (x0, SV_VL7, 16), -+ x0 = svqdecw_pat (x0, SV_VL7, 16)) -+ -+/* -+** qdecw_pat_n_vl8_u64: -+** uqdecw x0, vl8, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl8_u64, uint64_t, -+ x0 = svqdecw_pat_n_u64 (x0, SV_VL8, 16), -+ x0 = svqdecw_pat (x0, SV_VL8, 16)) -+ -+/* -+** qdecw_pat_n_vl16_u64: -+** uqdecw x0, vl16, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl16_u64, uint64_t, -+ x0 = svqdecw_pat_n_u64 (x0, SV_VL16, 16), -+ x0 = svqdecw_pat (x0, SV_VL16, 16)) -+ -+/* -+** qdecw_pat_n_vl32_u64: -+** uqdecw x0, vl32, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl32_u64, uint64_t, -+ x0 = svqdecw_pat_n_u64 (x0, SV_VL32, 16), -+ x0 = svqdecw_pat (x0, SV_VL32, 16)) -+ -+/* -+** qdecw_pat_n_vl64_u64: -+** uqdecw x0, vl64, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl64_u64, uint64_t, -+ x0 = svqdecw_pat_n_u64 (x0, SV_VL64, 16), -+ x0 = svqdecw_pat (x0, SV_VL64, 16)) -+ -+/* -+** qdecw_pat_n_vl128_u64: -+** uqdecw x0, vl128, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl128_u64, uint64_t, -+ x0 = svqdecw_pat_n_u64 (x0, SV_VL128, 16), -+ x0 = svqdecw_pat (x0, SV_VL128, 16)) -+ -+/* -+** qdecw_pat_n_vl256_u64: -+** uqdecw x0, vl256, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_vl256_u64, uint64_t, -+ x0 = svqdecw_pat_n_u64 (x0, SV_VL256, 16), -+ x0 = svqdecw_pat (x0, SV_VL256, 16)) -+ -+/* -+** qdecw_pat_n_mul4_u64: -+** uqdecw x0, mul4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_mul4_u64, uint64_t, -+ x0 = svqdecw_pat_n_u64 (x0, SV_MUL4, 16), -+ x0 = svqdecw_pat (x0, SV_MUL4, 16)) -+ -+/* -+** qdecw_pat_n_mul3_u64: -+** uqdecw x0, mul3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_mul3_u64, uint64_t, -+ x0 = svqdecw_pat_n_u64 (x0, SV_MUL3, 16), -+ x0 = svqdecw_pat (x0, SV_MUL3, 16)) -+ -+/* -+** qdecw_pat_n_all_u64: -+** uqdecw x0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_pat_n_all_u64, uint64_t, -+ x0 = svqdecw_pat_n_u64 (x0, SV_ALL, 16), -+ x0 = svqdecw_pat (x0, SV_ALL, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecw_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecw_s32.c -new file mode 100644 -index 000000000..8dfe8a177 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecw_s32.c -@@ -0,0 +1,113 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qdecw_1_s32_tied: -+** sqdecw z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_1_s32_tied, svint32_t, -+ z0 = svqdecw_s32 (z0, 1), -+ z0 = svqdecw (z0, 1)) -+ -+/* -+** qdecw_1_s32_untied: -+** movprfx z0, z1 -+** sqdecw z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_1_s32_untied, svint32_t, -+ z0 = svqdecw_s32 (z1, 1), -+ z0 = svqdecw (z1, 1)) -+ -+/* -+** qdecw_2_s32: -+** sqdecw z0\.s, all, mul #2 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_2_s32, svint32_t, -+ z0 = svqdecw_s32 (z0, 2), -+ z0 = svqdecw (z0, 2)) -+ -+/* -+** qdecw_7_s32: -+** sqdecw z0\.s, all, mul #7 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_7_s32, svint32_t, -+ z0 = svqdecw_s32 (z0, 7), -+ z0 = svqdecw (z0, 7)) -+ -+/* -+** qdecw_15_s32: -+** sqdecw z0\.s, all, mul #15 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_15_s32, svint32_t, -+ z0 = svqdecw_s32 (z0, 15), -+ z0 = svqdecw (z0, 15)) -+ -+/* -+** qdecw_16_s32: -+** sqdecw z0\.s, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_16_s32, svint32_t, -+ z0 = svqdecw_s32 (z0, 16), -+ z0 = svqdecw (z0, 16)) -+ -+/* -+** qdecw_n_1_s32_tied: -+** sqdecw x0, w0 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_n_1_s32_tied, int32_t, -+ x0 = svqdecw_n_s32 (x0, 1), -+ x0 = svqdecw (x0, 1)) -+ -+/* -+** qdecw_n_1_s32_untied: -+** mov w0, w1 -+** sqdecw x0, w0 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_n_1_s32_untied, int32_t, -+ x0 = svqdecw_n_s32 (x1, 1), -+ x0 = svqdecw (x1, 1)) -+ -+/* -+** qdecw_n_2_s32: -+** sqdecw x0, w0, all, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_n_2_s32, int32_t, -+ x0 = svqdecw_n_s32 (x0, 2), -+ x0 = svqdecw (x0, 2)) -+ -+/* -+** qdecw_n_7_s32: -+** sqdecw x0, w0, all, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_n_7_s32, int32_t, -+ x0 = svqdecw_n_s32 (x0, 7), -+ x0 = svqdecw (x0, 7)) -+ -+/* -+** qdecw_n_15_s32: -+** sqdecw x0, w0, all, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_n_15_s32, int32_t, -+ x0 = svqdecw_n_s32 (x0, 15), -+ x0 = svqdecw (x0, 15)) -+ -+/* -+** qdecw_n_16_s32: -+** sqdecw x0, w0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_n_16_s32, int32_t, -+ x0 = svqdecw_n_s32 (x0, 16), -+ x0 = svqdecw (x0, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecw_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecw_s64.c -new file mode 100644 -index 000000000..b0841a8b2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecw_s64.c -@@ -0,0 +1,58 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qdecw_n_1_s64_tied: -+** sqdecw x0 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_n_1_s64_tied, int64_t, -+ x0 = svqdecw_n_s64 (x0, 1), -+ x0 = svqdecw (x0, 1)) -+ -+/* -+** qdecw_n_1_s64_untied: -+** mov x0, x1 -+** sqdecw x0 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_n_1_s64_untied, int64_t, -+ x0 = svqdecw_n_s64 (x1, 1), -+ x0 = svqdecw (x1, 1)) -+ -+/* -+** qdecw_n_2_s64: -+** sqdecw x0, all, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_n_2_s64, int64_t, -+ x0 = svqdecw_n_s64 (x0, 2), -+ x0 = svqdecw (x0, 2)) -+ -+/* -+** qdecw_n_7_s64: -+** sqdecw x0, all, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_n_7_s64, int64_t, -+ x0 = svqdecw_n_s64 (x0, 7), -+ x0 = svqdecw (x0, 7)) -+ -+/* -+** qdecw_n_15_s64: -+** sqdecw x0, all, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_n_15_s64, int64_t, -+ x0 = svqdecw_n_s64 (x0, 15), -+ x0 = svqdecw (x0, 15)) -+ -+/* -+** qdecw_n_16_s64: -+** sqdecw x0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_n_16_s64, int64_t, -+ x0 = svqdecw_n_s64 (x0, 16), -+ x0 = svqdecw (x0, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecw_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecw_u32.c -new file mode 100644 -index 000000000..22e8a8d69 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecw_u32.c -@@ -0,0 +1,113 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qdecw_1_u32_tied: -+** uqdecw z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_1_u32_tied, svuint32_t, -+ z0 = svqdecw_u32 (z0, 1), -+ z0 = svqdecw (z0, 1)) -+ -+/* -+** qdecw_1_u32_untied: -+** movprfx z0, z1 -+** uqdecw z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_1_u32_untied, svuint32_t, -+ z0 = svqdecw_u32 (z1, 1), -+ z0 = svqdecw (z1, 1)) -+ -+/* -+** qdecw_2_u32: -+** uqdecw z0\.s, all, mul #2 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_2_u32, svuint32_t, -+ z0 = svqdecw_u32 (z0, 2), -+ z0 = svqdecw (z0, 2)) -+ -+/* -+** qdecw_7_u32: -+** uqdecw z0\.s, all, mul #7 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_7_u32, svuint32_t, -+ z0 = svqdecw_u32 (z0, 7), -+ z0 = svqdecw (z0, 7)) -+ -+/* -+** qdecw_15_u32: -+** uqdecw z0\.s, all, mul #15 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_15_u32, svuint32_t, -+ z0 = svqdecw_u32 (z0, 15), -+ z0 = svqdecw (z0, 15)) -+ -+/* -+** qdecw_16_u32: -+** uqdecw z0\.s, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qdecw_16_u32, svuint32_t, -+ z0 = svqdecw_u32 (z0, 16), -+ z0 = svqdecw (z0, 16)) -+ -+/* -+** qdecw_n_1_u32_tied: -+** uqdecw w0 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_n_1_u32_tied, uint32_t, -+ x0 = svqdecw_n_u32 (x0, 1), -+ x0 = svqdecw (x0, 1)) -+ -+/* -+** qdecw_n_1_u32_untied: -+** mov w0, w1 -+** uqdecw w0 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_n_1_u32_untied, uint32_t, -+ x0 = svqdecw_n_u32 (x1, 1), -+ x0 = svqdecw (x1, 1)) -+ -+/* -+** qdecw_n_2_u32: -+** uqdecw w0, all, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_n_2_u32, uint32_t, -+ x0 = svqdecw_n_u32 (x0, 2), -+ x0 = svqdecw (x0, 2)) -+ -+/* -+** qdecw_n_7_u32: -+** uqdecw w0, all, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_n_7_u32, uint32_t, -+ x0 = svqdecw_n_u32 (x0, 7), -+ x0 = svqdecw (x0, 7)) -+ -+/* -+** qdecw_n_15_u32: -+** uqdecw w0, all, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_n_15_u32, uint32_t, -+ x0 = svqdecw_n_u32 (x0, 15), -+ x0 = svqdecw (x0, 15)) -+ -+/* -+** qdecw_n_16_u32: -+** uqdecw w0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_n_16_u32, uint32_t, -+ x0 = svqdecw_n_u32 (x0, 16), -+ x0 = svqdecw (x0, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecw_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecw_u64.c -new file mode 100644 -index 000000000..88c484e8b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qdecw_u64.c -@@ -0,0 +1,58 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qdecw_n_1_u64_tied: -+** uqdecw x0 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_n_1_u64_tied, uint64_t, -+ x0 = svqdecw_n_u64 (x0, 1), -+ x0 = svqdecw (x0, 1)) -+ -+/* -+** qdecw_n_1_u64_untied: -+** mov x0, x1 -+** uqdecw x0 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_n_1_u64_untied, uint64_t, -+ x0 = svqdecw_n_u64 (x1, 1), -+ x0 = svqdecw (x1, 1)) -+ -+/* -+** qdecw_n_2_u64: -+** uqdecw x0, all, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_n_2_u64, uint64_t, -+ x0 = svqdecw_n_u64 (x0, 2), -+ x0 = svqdecw (x0, 2)) -+ -+/* -+** qdecw_n_7_u64: -+** uqdecw x0, all, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_n_7_u64, uint64_t, -+ x0 = svqdecw_n_u64 (x0, 7), -+ x0 = svqdecw (x0, 7)) -+ -+/* -+** qdecw_n_15_u64: -+** uqdecw x0, all, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_n_15_u64, uint64_t, -+ x0 = svqdecw_n_u64 (x0, 15), -+ x0 = svqdecw (x0, 15)) -+ -+/* -+** qdecw_n_16_u64: -+** uqdecw x0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qdecw_n_16_u64, uint64_t, -+ x0 = svqdecw_n_u64 (x0, 16), -+ x0 = svqdecw (x0, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincb_pat_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincb_pat_s32.c -new file mode 100644 -index 000000000..16a8d8e9a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincb_pat_s32.c -@@ -0,0 +1,202 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qincb_pat_n_1_s32_tied: -+** sqincb x0, w0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_1_s32_tied, int32_t, -+ x0 = svqincb_pat_n_s32 (x0, SV_POW2, 1), -+ x0 = svqincb_pat (x0, SV_POW2, 1)) -+ -+/* -+** qincb_pat_n_1_s32_untied: -+** mov w0, w1 -+** sqincb x0, w0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_1_s32_untied, int32_t, -+ x0 = svqincb_pat_n_s32 (x1, SV_POW2, 1), -+ x0 = svqincb_pat (x1, SV_POW2, 1)) -+ -+/* -+** qincb_pat_n_2_s32: -+** sqincb x0, w0, pow2, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_2_s32, int32_t, -+ x0 = svqincb_pat_n_s32 (x0, SV_POW2, 2), -+ x0 = svqincb_pat (x0, SV_POW2, 2)) -+ -+/* -+** qincb_pat_n_7_s32: -+** sqincb x0, w0, pow2, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_7_s32, int32_t, -+ x0 = svqincb_pat_n_s32 (x0, SV_POW2, 7), -+ x0 = svqincb_pat (x0, SV_POW2, 7)) -+ -+/* -+** qincb_pat_n_15_s32: -+** sqincb x0, w0, pow2, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_15_s32, int32_t, -+ x0 = svqincb_pat_n_s32 (x0, SV_POW2, 15), -+ x0 = svqincb_pat (x0, SV_POW2, 15)) -+ -+/* -+** qincb_pat_n_16_s32: -+** sqincb x0, w0, pow2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_16_s32, int32_t, -+ x0 = svqincb_pat_n_s32 (x0, SV_POW2, 16), -+ x0 = svqincb_pat (x0, SV_POW2, 16)) -+ -+/* -+** qincb_pat_n_vl1_s32: -+** sqincb x0, w0, vl1, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl1_s32, int32_t, -+ x0 = svqincb_pat_n_s32 (x0, SV_VL1, 16), -+ x0 = svqincb_pat (x0, SV_VL1, 16)) -+ -+/* -+** qincb_pat_n_vl2_s32: -+** sqincb x0, w0, vl2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl2_s32, int32_t, -+ x0 = svqincb_pat_n_s32 (x0, SV_VL2, 16), -+ x0 = svqincb_pat (x0, SV_VL2, 16)) -+ -+/* -+** qincb_pat_n_vl3_s32: -+** sqincb x0, w0, vl3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl3_s32, int32_t, -+ x0 = svqincb_pat_n_s32 (x0, SV_VL3, 16), -+ x0 = svqincb_pat (x0, SV_VL3, 16)) -+ -+/* -+** qincb_pat_n_vl4_s32: -+** sqincb x0, w0, vl4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl4_s32, int32_t, -+ x0 = svqincb_pat_n_s32 (x0, SV_VL4, 16), -+ x0 = svqincb_pat (x0, SV_VL4, 16)) -+ -+/* -+** qincb_pat_n_vl5_s32: -+** sqincb x0, w0, vl5, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl5_s32, int32_t, -+ x0 = svqincb_pat_n_s32 (x0, SV_VL5, 16), -+ x0 = svqincb_pat (x0, SV_VL5, 16)) -+ -+/* -+** qincb_pat_n_vl6_s32: -+** sqincb x0, w0, vl6, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl6_s32, int32_t, -+ x0 = svqincb_pat_n_s32 (x0, SV_VL6, 16), -+ x0 = svqincb_pat (x0, SV_VL6, 16)) -+ -+/* -+** qincb_pat_n_vl7_s32: -+** sqincb x0, w0, vl7, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl7_s32, int32_t, -+ x0 = svqincb_pat_n_s32 (x0, SV_VL7, 16), -+ x0 = svqincb_pat (x0, SV_VL7, 16)) -+ -+/* -+** qincb_pat_n_vl8_s32: -+** sqincb x0, w0, vl8, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl8_s32, int32_t, -+ x0 = svqincb_pat_n_s32 (x0, SV_VL8, 16), -+ x0 = svqincb_pat (x0, SV_VL8, 16)) -+ -+/* -+** qincb_pat_n_vl16_s32: -+** sqincb x0, w0, vl16, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl16_s32, int32_t, -+ x0 = svqincb_pat_n_s32 (x0, SV_VL16, 16), -+ x0 = svqincb_pat (x0, SV_VL16, 16)) -+ -+/* -+** qincb_pat_n_vl32_s32: -+** sqincb x0, w0, vl32, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl32_s32, int32_t, -+ x0 = svqincb_pat_n_s32 (x0, SV_VL32, 16), -+ x0 = svqincb_pat (x0, SV_VL32, 16)) -+ -+/* -+** qincb_pat_n_vl64_s32: -+** sqincb x0, w0, vl64, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl64_s32, int32_t, -+ x0 = svqincb_pat_n_s32 (x0, SV_VL64, 16), -+ x0 = svqincb_pat (x0, SV_VL64, 16)) -+ -+/* -+** qincb_pat_n_vl128_s32: -+** sqincb x0, w0, vl128, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl128_s32, int32_t, -+ x0 = svqincb_pat_n_s32 (x0, SV_VL128, 16), -+ x0 = svqincb_pat (x0, SV_VL128, 16)) -+ -+/* -+** qincb_pat_n_vl256_s32: -+** sqincb x0, w0, vl256, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl256_s32, int32_t, -+ x0 = svqincb_pat_n_s32 (x0, SV_VL256, 16), -+ x0 = svqincb_pat (x0, SV_VL256, 16)) -+ -+/* -+** qincb_pat_n_mul4_s32: -+** sqincb x0, w0, mul4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_mul4_s32, int32_t, -+ x0 = svqincb_pat_n_s32 (x0, SV_MUL4, 16), -+ x0 = svqincb_pat (x0, SV_MUL4, 16)) -+ -+/* -+** qincb_pat_n_mul3_s32: -+** sqincb x0, w0, mul3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_mul3_s32, int32_t, -+ x0 = svqincb_pat_n_s32 (x0, SV_MUL3, 16), -+ x0 = svqincb_pat (x0, SV_MUL3, 16)) -+ -+/* -+** qincb_pat_n_all_s32: -+** sqincb x0, w0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_all_s32, int32_t, -+ x0 = svqincb_pat_n_s32 (x0, SV_ALL, 16), -+ x0 = svqincb_pat (x0, SV_ALL, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincb_pat_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincb_pat_s64.c -new file mode 100644 -index 000000000..79ed73ba7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincb_pat_s64.c -@@ -0,0 +1,202 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qincb_pat_n_1_s64_tied: -+** sqincb x0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_1_s64_tied, int64_t, -+ x0 = svqincb_pat_n_s64 (x0, SV_POW2, 1), -+ x0 = svqincb_pat (x0, SV_POW2, 1)) -+ -+/* -+** qincb_pat_n_1_s64_untied: -+** mov x0, x1 -+** sqincb x0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_1_s64_untied, int64_t, -+ x0 = svqincb_pat_n_s64 (x1, SV_POW2, 1), -+ x0 = svqincb_pat (x1, SV_POW2, 1)) -+ -+/* -+** qincb_pat_n_2_s64: -+** sqincb x0, pow2, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_2_s64, int64_t, -+ x0 = svqincb_pat_n_s64 (x0, SV_POW2, 2), -+ x0 = svqincb_pat (x0, SV_POW2, 2)) -+ -+/* -+** qincb_pat_n_7_s64: -+** sqincb x0, pow2, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_7_s64, int64_t, -+ x0 = svqincb_pat_n_s64 (x0, SV_POW2, 7), -+ x0 = svqincb_pat (x0, SV_POW2, 7)) -+ -+/* -+** qincb_pat_n_15_s64: -+** sqincb x0, pow2, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_15_s64, int64_t, -+ x0 = svqincb_pat_n_s64 (x0, SV_POW2, 15), -+ x0 = svqincb_pat (x0, SV_POW2, 15)) -+ -+/* -+** qincb_pat_n_16_s64: -+** sqincb x0, pow2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_16_s64, int64_t, -+ x0 = svqincb_pat_n_s64 (x0, SV_POW2, 16), -+ x0 = svqincb_pat (x0, SV_POW2, 16)) -+ -+/* -+** qincb_pat_n_vl1_s64: -+** sqincb x0, vl1, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl1_s64, int64_t, -+ x0 = svqincb_pat_n_s64 (x0, SV_VL1, 16), -+ x0 = svqincb_pat (x0, SV_VL1, 16)) -+ -+/* -+** qincb_pat_n_vl2_s64: -+** sqincb x0, vl2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl2_s64, int64_t, -+ x0 = svqincb_pat_n_s64 (x0, SV_VL2, 16), -+ x0 = svqincb_pat (x0, SV_VL2, 16)) -+ -+/* -+** qincb_pat_n_vl3_s64: -+** sqincb x0, vl3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl3_s64, int64_t, -+ x0 = svqincb_pat_n_s64 (x0, SV_VL3, 16), -+ x0 = svqincb_pat (x0, SV_VL3, 16)) -+ -+/* -+** qincb_pat_n_vl4_s64: -+** sqincb x0, vl4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl4_s64, int64_t, -+ x0 = svqincb_pat_n_s64 (x0, SV_VL4, 16), -+ x0 = svqincb_pat (x0, SV_VL4, 16)) -+ -+/* -+** qincb_pat_n_vl5_s64: -+** sqincb x0, vl5, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl5_s64, int64_t, -+ x0 = svqincb_pat_n_s64 (x0, SV_VL5, 16), -+ x0 = svqincb_pat (x0, SV_VL5, 16)) -+ -+/* -+** qincb_pat_n_vl6_s64: -+** sqincb x0, vl6, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl6_s64, int64_t, -+ x0 = svqincb_pat_n_s64 (x0, SV_VL6, 16), -+ x0 = svqincb_pat (x0, SV_VL6, 16)) -+ -+/* -+** qincb_pat_n_vl7_s64: -+** sqincb x0, vl7, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl7_s64, int64_t, -+ x0 = svqincb_pat_n_s64 (x0, SV_VL7, 16), -+ x0 = svqincb_pat (x0, SV_VL7, 16)) -+ -+/* -+** qincb_pat_n_vl8_s64: -+** sqincb x0, vl8, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl8_s64, int64_t, -+ x0 = svqincb_pat_n_s64 (x0, SV_VL8, 16), -+ x0 = svqincb_pat (x0, SV_VL8, 16)) -+ -+/* -+** qincb_pat_n_vl16_s64: -+** sqincb x0, vl16, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl16_s64, int64_t, -+ x0 = svqincb_pat_n_s64 (x0, SV_VL16, 16), -+ x0 = svqincb_pat (x0, SV_VL16, 16)) -+ -+/* -+** qincb_pat_n_vl32_s64: -+** sqincb x0, vl32, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl32_s64, int64_t, -+ x0 = svqincb_pat_n_s64 (x0, SV_VL32, 16), -+ x0 = svqincb_pat (x0, SV_VL32, 16)) -+ -+/* -+** qincb_pat_n_vl64_s64: -+** sqincb x0, vl64, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl64_s64, int64_t, -+ x0 = svqincb_pat_n_s64 (x0, SV_VL64, 16), -+ x0 = svqincb_pat (x0, SV_VL64, 16)) -+ -+/* -+** qincb_pat_n_vl128_s64: -+** sqincb x0, vl128, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl128_s64, int64_t, -+ x0 = svqincb_pat_n_s64 (x0, SV_VL128, 16), -+ x0 = svqincb_pat (x0, SV_VL128, 16)) -+ -+/* -+** qincb_pat_n_vl256_s64: -+** sqincb x0, vl256, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl256_s64, int64_t, -+ x0 = svqincb_pat_n_s64 (x0, SV_VL256, 16), -+ x0 = svqincb_pat (x0, SV_VL256, 16)) -+ -+/* -+** qincb_pat_n_mul4_s64: -+** sqincb x0, mul4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_mul4_s64, int64_t, -+ x0 = svqincb_pat_n_s64 (x0, SV_MUL4, 16), -+ x0 = svqincb_pat (x0, SV_MUL4, 16)) -+ -+/* -+** qincb_pat_n_mul3_s64: -+** sqincb x0, mul3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_mul3_s64, int64_t, -+ x0 = svqincb_pat_n_s64 (x0, SV_MUL3, 16), -+ x0 = svqincb_pat (x0, SV_MUL3, 16)) -+ -+/* -+** qincb_pat_n_all_s64: -+** sqincb x0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_all_s64, int64_t, -+ x0 = svqincb_pat_n_s64 (x0, SV_ALL, 16), -+ x0 = svqincb_pat (x0, SV_ALL, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincb_pat_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincb_pat_u32.c -new file mode 100644 -index 000000000..30e5f28ee ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincb_pat_u32.c -@@ -0,0 +1,202 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qincb_pat_n_1_u32_tied: -+** uqincb w0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_1_u32_tied, uint32_t, -+ x0 = svqincb_pat_n_u32 (x0, SV_POW2, 1), -+ x0 = svqincb_pat (x0, SV_POW2, 1)) -+ -+/* -+** qincb_pat_n_1_u32_untied: -+** mov w0, w1 -+** uqincb w0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_1_u32_untied, uint32_t, -+ x0 = svqincb_pat_n_u32 (x1, SV_POW2, 1), -+ x0 = svqincb_pat (x1, SV_POW2, 1)) -+ -+/* -+** qincb_pat_n_2_u32: -+** uqincb w0, pow2, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_2_u32, uint32_t, -+ x0 = svqincb_pat_n_u32 (x0, SV_POW2, 2), -+ x0 = svqincb_pat (x0, SV_POW2, 2)) -+ -+/* -+** qincb_pat_n_7_u32: -+** uqincb w0, pow2, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_7_u32, uint32_t, -+ x0 = svqincb_pat_n_u32 (x0, SV_POW2, 7), -+ x0 = svqincb_pat (x0, SV_POW2, 7)) -+ -+/* -+** qincb_pat_n_15_u32: -+** uqincb w0, pow2, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_15_u32, uint32_t, -+ x0 = svqincb_pat_n_u32 (x0, SV_POW2, 15), -+ x0 = svqincb_pat (x0, SV_POW2, 15)) -+ -+/* -+** qincb_pat_n_16_u32: -+** uqincb w0, pow2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_16_u32, uint32_t, -+ x0 = svqincb_pat_n_u32 (x0, SV_POW2, 16), -+ x0 = svqincb_pat (x0, SV_POW2, 16)) -+ -+/* -+** qincb_pat_n_vl1_u32: -+** uqincb w0, vl1, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl1_u32, uint32_t, -+ x0 = svqincb_pat_n_u32 (x0, SV_VL1, 16), -+ x0 = svqincb_pat (x0, SV_VL1, 16)) -+ -+/* -+** qincb_pat_n_vl2_u32: -+** uqincb w0, vl2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl2_u32, uint32_t, -+ x0 = svqincb_pat_n_u32 (x0, SV_VL2, 16), -+ x0 = svqincb_pat (x0, SV_VL2, 16)) -+ -+/* -+** qincb_pat_n_vl3_u32: -+** uqincb w0, vl3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl3_u32, uint32_t, -+ x0 = svqincb_pat_n_u32 (x0, SV_VL3, 16), -+ x0 = svqincb_pat (x0, SV_VL3, 16)) -+ -+/* -+** qincb_pat_n_vl4_u32: -+** uqincb w0, vl4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl4_u32, uint32_t, -+ x0 = svqincb_pat_n_u32 (x0, SV_VL4, 16), -+ x0 = svqincb_pat (x0, SV_VL4, 16)) -+ -+/* -+** qincb_pat_n_vl5_u32: -+** uqincb w0, vl5, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl5_u32, uint32_t, -+ x0 = svqincb_pat_n_u32 (x0, SV_VL5, 16), -+ x0 = svqincb_pat (x0, SV_VL5, 16)) -+ -+/* -+** qincb_pat_n_vl6_u32: -+** uqincb w0, vl6, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl6_u32, uint32_t, -+ x0 = svqincb_pat_n_u32 (x0, SV_VL6, 16), -+ x0 = svqincb_pat (x0, SV_VL6, 16)) -+ -+/* -+** qincb_pat_n_vl7_u32: -+** uqincb w0, vl7, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl7_u32, uint32_t, -+ x0 = svqincb_pat_n_u32 (x0, SV_VL7, 16), -+ x0 = svqincb_pat (x0, SV_VL7, 16)) -+ -+/* -+** qincb_pat_n_vl8_u32: -+** uqincb w0, vl8, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl8_u32, uint32_t, -+ x0 = svqincb_pat_n_u32 (x0, SV_VL8, 16), -+ x0 = svqincb_pat (x0, SV_VL8, 16)) -+ -+/* -+** qincb_pat_n_vl16_u32: -+** uqincb w0, vl16, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl16_u32, uint32_t, -+ x0 = svqincb_pat_n_u32 (x0, SV_VL16, 16), -+ x0 = svqincb_pat (x0, SV_VL16, 16)) -+ -+/* -+** qincb_pat_n_vl32_u32: -+** uqincb w0, vl32, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl32_u32, uint32_t, -+ x0 = svqincb_pat_n_u32 (x0, SV_VL32, 16), -+ x0 = svqincb_pat (x0, SV_VL32, 16)) -+ -+/* -+** qincb_pat_n_vl64_u32: -+** uqincb w0, vl64, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl64_u32, uint32_t, -+ x0 = svqincb_pat_n_u32 (x0, SV_VL64, 16), -+ x0 = svqincb_pat (x0, SV_VL64, 16)) -+ -+/* -+** qincb_pat_n_vl128_u32: -+** uqincb w0, vl128, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl128_u32, uint32_t, -+ x0 = svqincb_pat_n_u32 (x0, SV_VL128, 16), -+ x0 = svqincb_pat (x0, SV_VL128, 16)) -+ -+/* -+** qincb_pat_n_vl256_u32: -+** uqincb w0, vl256, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl256_u32, uint32_t, -+ x0 = svqincb_pat_n_u32 (x0, SV_VL256, 16), -+ x0 = svqincb_pat (x0, SV_VL256, 16)) -+ -+/* -+** qincb_pat_n_mul4_u32: -+** uqincb w0, mul4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_mul4_u32, uint32_t, -+ x0 = svqincb_pat_n_u32 (x0, SV_MUL4, 16), -+ x0 = svqincb_pat (x0, SV_MUL4, 16)) -+ -+/* -+** qincb_pat_n_mul3_u32: -+** uqincb w0, mul3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_mul3_u32, uint32_t, -+ x0 = svqincb_pat_n_u32 (x0, SV_MUL3, 16), -+ x0 = svqincb_pat (x0, SV_MUL3, 16)) -+ -+/* -+** qincb_pat_n_all_u32: -+** uqincb w0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_all_u32, uint32_t, -+ x0 = svqincb_pat_n_u32 (x0, SV_ALL, 16), -+ x0 = svqincb_pat (x0, SV_ALL, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincb_pat_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincb_pat_u64.c -new file mode 100644 -index 000000000..038b1edb6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincb_pat_u64.c -@@ -0,0 +1,202 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qincb_pat_n_1_u64_tied: -+** uqincb x0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_1_u64_tied, uint64_t, -+ x0 = svqincb_pat_n_u64 (x0, SV_POW2, 1), -+ x0 = svqincb_pat (x0, SV_POW2, 1)) -+ -+/* -+** qincb_pat_n_1_u64_untied: -+** mov x0, x1 -+** uqincb x0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_1_u64_untied, uint64_t, -+ x0 = svqincb_pat_n_u64 (x1, SV_POW2, 1), -+ x0 = svqincb_pat (x1, SV_POW2, 1)) -+ -+/* -+** qincb_pat_n_2_u64: -+** uqincb x0, pow2, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_2_u64, uint64_t, -+ x0 = svqincb_pat_n_u64 (x0, SV_POW2, 2), -+ x0 = svqincb_pat (x0, SV_POW2, 2)) -+ -+/* -+** qincb_pat_n_7_u64: -+** uqincb x0, pow2, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_7_u64, uint64_t, -+ x0 = svqincb_pat_n_u64 (x0, SV_POW2, 7), -+ x0 = svqincb_pat (x0, SV_POW2, 7)) -+ -+/* -+** qincb_pat_n_15_u64: -+** uqincb x0, pow2, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_15_u64, uint64_t, -+ x0 = svqincb_pat_n_u64 (x0, SV_POW2, 15), -+ x0 = svqincb_pat (x0, SV_POW2, 15)) -+ -+/* -+** qincb_pat_n_16_u64: -+** uqincb x0, pow2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_16_u64, uint64_t, -+ x0 = svqincb_pat_n_u64 (x0, SV_POW2, 16), -+ x0 = svqincb_pat (x0, SV_POW2, 16)) -+ -+/* -+** qincb_pat_n_vl1_u64: -+** uqincb x0, vl1, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl1_u64, uint64_t, -+ x0 = svqincb_pat_n_u64 (x0, SV_VL1, 16), -+ x0 = svqincb_pat (x0, SV_VL1, 16)) -+ -+/* -+** qincb_pat_n_vl2_u64: -+** uqincb x0, vl2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl2_u64, uint64_t, -+ x0 = svqincb_pat_n_u64 (x0, SV_VL2, 16), -+ x0 = svqincb_pat (x0, SV_VL2, 16)) -+ -+/* -+** qincb_pat_n_vl3_u64: -+** uqincb x0, vl3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl3_u64, uint64_t, -+ x0 = svqincb_pat_n_u64 (x0, SV_VL3, 16), -+ x0 = svqincb_pat (x0, SV_VL3, 16)) -+ -+/* -+** qincb_pat_n_vl4_u64: -+** uqincb x0, vl4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl4_u64, uint64_t, -+ x0 = svqincb_pat_n_u64 (x0, SV_VL4, 16), -+ x0 = svqincb_pat (x0, SV_VL4, 16)) -+ -+/* -+** qincb_pat_n_vl5_u64: -+** uqincb x0, vl5, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl5_u64, uint64_t, -+ x0 = svqincb_pat_n_u64 (x0, SV_VL5, 16), -+ x0 = svqincb_pat (x0, SV_VL5, 16)) -+ -+/* -+** qincb_pat_n_vl6_u64: -+** uqincb x0, vl6, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl6_u64, uint64_t, -+ x0 = svqincb_pat_n_u64 (x0, SV_VL6, 16), -+ x0 = svqincb_pat (x0, SV_VL6, 16)) -+ -+/* -+** qincb_pat_n_vl7_u64: -+** uqincb x0, vl7, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl7_u64, uint64_t, -+ x0 = svqincb_pat_n_u64 (x0, SV_VL7, 16), -+ x0 = svqincb_pat (x0, SV_VL7, 16)) -+ -+/* -+** qincb_pat_n_vl8_u64: -+** uqincb x0, vl8, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl8_u64, uint64_t, -+ x0 = svqincb_pat_n_u64 (x0, SV_VL8, 16), -+ x0 = svqincb_pat (x0, SV_VL8, 16)) -+ -+/* -+** qincb_pat_n_vl16_u64: -+** uqincb x0, vl16, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl16_u64, uint64_t, -+ x0 = svqincb_pat_n_u64 (x0, SV_VL16, 16), -+ x0 = svqincb_pat (x0, SV_VL16, 16)) -+ -+/* -+** qincb_pat_n_vl32_u64: -+** uqincb x0, vl32, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl32_u64, uint64_t, -+ x0 = svqincb_pat_n_u64 (x0, SV_VL32, 16), -+ x0 = svqincb_pat (x0, SV_VL32, 16)) -+ -+/* -+** qincb_pat_n_vl64_u64: -+** uqincb x0, vl64, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl64_u64, uint64_t, -+ x0 = svqincb_pat_n_u64 (x0, SV_VL64, 16), -+ x0 = svqincb_pat (x0, SV_VL64, 16)) -+ -+/* -+** qincb_pat_n_vl128_u64: -+** uqincb x0, vl128, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl128_u64, uint64_t, -+ x0 = svqincb_pat_n_u64 (x0, SV_VL128, 16), -+ x0 = svqincb_pat (x0, SV_VL128, 16)) -+ -+/* -+** qincb_pat_n_vl256_u64: -+** uqincb x0, vl256, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_vl256_u64, uint64_t, -+ x0 = svqincb_pat_n_u64 (x0, SV_VL256, 16), -+ x0 = svqincb_pat (x0, SV_VL256, 16)) -+ -+/* -+** qincb_pat_n_mul4_u64: -+** uqincb x0, mul4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_mul4_u64, uint64_t, -+ x0 = svqincb_pat_n_u64 (x0, SV_MUL4, 16), -+ x0 = svqincb_pat (x0, SV_MUL4, 16)) -+ -+/* -+** qincb_pat_n_mul3_u64: -+** uqincb x0, mul3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_mul3_u64, uint64_t, -+ x0 = svqincb_pat_n_u64 (x0, SV_MUL3, 16), -+ x0 = svqincb_pat (x0, SV_MUL3, 16)) -+ -+/* -+** qincb_pat_n_all_u64: -+** uqincb x0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_pat_n_all_u64, uint64_t, -+ x0 = svqincb_pat_n_u64 (x0, SV_ALL, 16), -+ x0 = svqincb_pat (x0, SV_ALL, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincb_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincb_s32.c -new file mode 100644 -index 000000000..8e74073de ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincb_s32.c -@@ -0,0 +1,58 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qincb_n_1_s32_tied: -+** sqincb x0, w0 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_n_1_s32_tied, int32_t, -+ x0 = svqincb_n_s32 (x0, 1), -+ x0 = svqincb (x0, 1)) -+ -+/* -+** qincb_n_1_s32_untied: -+** mov w0, w1 -+** sqincb x0, w0 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_n_1_s32_untied, int32_t, -+ x0 = svqincb_n_s32 (x1, 1), -+ x0 = svqincb (x1, 1)) -+ -+/* -+** qincb_n_2_s32: -+** sqincb x0, w0, all, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_n_2_s32, int32_t, -+ x0 = svqincb_n_s32 (x0, 2), -+ x0 = svqincb (x0, 2)) -+ -+/* -+** qincb_n_7_s32: -+** sqincb x0, w0, all, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_n_7_s32, int32_t, -+ x0 = svqincb_n_s32 (x0, 7), -+ x0 = svqincb (x0, 7)) -+ -+/* -+** qincb_n_15_s32: -+** sqincb x0, w0, all, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_n_15_s32, int32_t, -+ x0 = svqincb_n_s32 (x0, 15), -+ x0 = svqincb (x0, 15)) -+ -+/* -+** qincb_n_16_s32: -+** sqincb x0, w0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_n_16_s32, int32_t, -+ x0 = svqincb_n_s32 (x0, 16), -+ x0 = svqincb (x0, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincb_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincb_s64.c -new file mode 100644 -index 000000000..b064c1264 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincb_s64.c -@@ -0,0 +1,58 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qincb_n_1_s64_tied: -+** sqincb x0 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_n_1_s64_tied, int64_t, -+ x0 = svqincb_n_s64 (x0, 1), -+ x0 = svqincb (x0, 1)) -+ -+/* -+** qincb_n_1_s64_untied: -+** mov x0, x1 -+** sqincb x0 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_n_1_s64_untied, int64_t, -+ x0 = svqincb_n_s64 (x1, 1), -+ x0 = svqincb (x1, 1)) -+ -+/* -+** qincb_n_2_s64: -+** sqincb x0, all, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_n_2_s64, int64_t, -+ x0 = svqincb_n_s64 (x0, 2), -+ x0 = svqincb (x0, 2)) -+ -+/* -+** qincb_n_7_s64: -+** sqincb x0, all, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_n_7_s64, int64_t, -+ x0 = svqincb_n_s64 (x0, 7), -+ x0 = svqincb (x0, 7)) -+ -+/* -+** qincb_n_15_s64: -+** sqincb x0, all, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_n_15_s64, int64_t, -+ x0 = svqincb_n_s64 (x0, 15), -+ x0 = svqincb (x0, 15)) -+ -+/* -+** qincb_n_16_s64: -+** sqincb x0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_n_16_s64, int64_t, -+ x0 = svqincb_n_s64 (x0, 16), -+ x0 = svqincb (x0, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincb_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincb_u32.c -new file mode 100644 -index 000000000..df3add73e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincb_u32.c -@@ -0,0 +1,58 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qincb_n_1_u32_tied: -+** uqincb w0 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_n_1_u32_tied, uint32_t, -+ x0 = svqincb_n_u32 (x0, 1), -+ x0 = svqincb (x0, 1)) -+ -+/* -+** qincb_n_1_u32_untied: -+** mov w0, w1 -+** uqincb w0 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_n_1_u32_untied, uint32_t, -+ x0 = svqincb_n_u32 (x1, 1), -+ x0 = svqincb (x1, 1)) -+ -+/* -+** qincb_n_2_u32: -+** uqincb w0, all, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_n_2_u32, uint32_t, -+ x0 = svqincb_n_u32 (x0, 2), -+ x0 = svqincb (x0, 2)) -+ -+/* -+** qincb_n_7_u32: -+** uqincb w0, all, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_n_7_u32, uint32_t, -+ x0 = svqincb_n_u32 (x0, 7), -+ x0 = svqincb (x0, 7)) -+ -+/* -+** qincb_n_15_u32: -+** uqincb w0, all, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_n_15_u32, uint32_t, -+ x0 = svqincb_n_u32 (x0, 15), -+ x0 = svqincb (x0, 15)) -+ -+/* -+** qincb_n_16_u32: -+** uqincb w0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_n_16_u32, uint32_t, -+ x0 = svqincb_n_u32 (x0, 16), -+ x0 = svqincb (x0, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincb_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincb_u64.c -new file mode 100644 -index 000000000..d9a08c865 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincb_u64.c -@@ -0,0 +1,58 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qincb_n_1_u64_tied: -+** uqincb x0 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_n_1_u64_tied, uint64_t, -+ x0 = svqincb_n_u64 (x0, 1), -+ x0 = svqincb (x0, 1)) -+ -+/* -+** qincb_n_1_u64_untied: -+** mov x0, x1 -+** uqincb x0 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_n_1_u64_untied, uint64_t, -+ x0 = svqincb_n_u64 (x1, 1), -+ x0 = svqincb (x1, 1)) -+ -+/* -+** qincb_n_2_u64: -+** uqincb x0, all, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_n_2_u64, uint64_t, -+ x0 = svqincb_n_u64 (x0, 2), -+ x0 = svqincb (x0, 2)) -+ -+/* -+** qincb_n_7_u64: -+** uqincb x0, all, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_n_7_u64, uint64_t, -+ x0 = svqincb_n_u64 (x0, 7), -+ x0 = svqincb (x0, 7)) -+ -+/* -+** qincb_n_15_u64: -+** uqincb x0, all, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_n_15_u64, uint64_t, -+ x0 = svqincb_n_u64 (x0, 15), -+ x0 = svqincb (x0, 15)) -+ -+/* -+** qincb_n_16_u64: -+** uqincb x0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincb_n_16_u64, uint64_t, -+ x0 = svqincb_n_u64 (x0, 16), -+ x0 = svqincb (x0, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincd_pat_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincd_pat_s32.c -new file mode 100644 -index 000000000..061f88314 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincd_pat_s32.c -@@ -0,0 +1,202 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qincd_pat_n_1_s32_tied: -+** sqincd x0, w0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_1_s32_tied, int32_t, -+ x0 = svqincd_pat_n_s32 (x0, SV_POW2, 1), -+ x0 = svqincd_pat (x0, SV_POW2, 1)) -+ -+/* -+** qincd_pat_n_1_s32_untied: -+** mov w0, w1 -+** sqincd x0, w0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_1_s32_untied, int32_t, -+ x0 = svqincd_pat_n_s32 (x1, SV_POW2, 1), -+ x0 = svqincd_pat (x1, SV_POW2, 1)) -+ -+/* -+** qincd_pat_n_2_s32: -+** sqincd x0, w0, pow2, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_2_s32, int32_t, -+ x0 = svqincd_pat_n_s32 (x0, SV_POW2, 2), -+ x0 = svqincd_pat (x0, SV_POW2, 2)) -+ -+/* -+** qincd_pat_n_7_s32: -+** sqincd x0, w0, pow2, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_7_s32, int32_t, -+ x0 = svqincd_pat_n_s32 (x0, SV_POW2, 7), -+ x0 = svqincd_pat (x0, SV_POW2, 7)) -+ -+/* -+** qincd_pat_n_15_s32: -+** sqincd x0, w0, pow2, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_15_s32, int32_t, -+ x0 = svqincd_pat_n_s32 (x0, SV_POW2, 15), -+ x0 = svqincd_pat (x0, SV_POW2, 15)) -+ -+/* -+** qincd_pat_n_16_s32: -+** sqincd x0, w0, pow2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_16_s32, int32_t, -+ x0 = svqincd_pat_n_s32 (x0, SV_POW2, 16), -+ x0 = svqincd_pat (x0, SV_POW2, 16)) -+ -+/* -+** qincd_pat_n_vl1_s32: -+** sqincd x0, w0, vl1, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl1_s32, int32_t, -+ x0 = svqincd_pat_n_s32 (x0, SV_VL1, 16), -+ x0 = svqincd_pat (x0, SV_VL1, 16)) -+ -+/* -+** qincd_pat_n_vl2_s32: -+** sqincd x0, w0, vl2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl2_s32, int32_t, -+ x0 = svqincd_pat_n_s32 (x0, SV_VL2, 16), -+ x0 = svqincd_pat (x0, SV_VL2, 16)) -+ -+/* -+** qincd_pat_n_vl3_s32: -+** sqincd x0, w0, vl3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl3_s32, int32_t, -+ x0 = svqincd_pat_n_s32 (x0, SV_VL3, 16), -+ x0 = svqincd_pat (x0, SV_VL3, 16)) -+ -+/* -+** qincd_pat_n_vl4_s32: -+** sqincd x0, w0, vl4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl4_s32, int32_t, -+ x0 = svqincd_pat_n_s32 (x0, SV_VL4, 16), -+ x0 = svqincd_pat (x0, SV_VL4, 16)) -+ -+/* -+** qincd_pat_n_vl5_s32: -+** sqincd x0, w0, vl5, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl5_s32, int32_t, -+ x0 = svqincd_pat_n_s32 (x0, SV_VL5, 16), -+ x0 = svqincd_pat (x0, SV_VL5, 16)) -+ -+/* -+** qincd_pat_n_vl6_s32: -+** sqincd x0, w0, vl6, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl6_s32, int32_t, -+ x0 = svqincd_pat_n_s32 (x0, SV_VL6, 16), -+ x0 = svqincd_pat (x0, SV_VL6, 16)) -+ -+/* -+** qincd_pat_n_vl7_s32: -+** sqincd x0, w0, vl7, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl7_s32, int32_t, -+ x0 = svqincd_pat_n_s32 (x0, SV_VL7, 16), -+ x0 = svqincd_pat (x0, SV_VL7, 16)) -+ -+/* -+** qincd_pat_n_vl8_s32: -+** sqincd x0, w0, vl8, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl8_s32, int32_t, -+ x0 = svqincd_pat_n_s32 (x0, SV_VL8, 16), -+ x0 = svqincd_pat (x0, SV_VL8, 16)) -+ -+/* -+** qincd_pat_n_vl16_s32: -+** sqincd x0, w0, vl16, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl16_s32, int32_t, -+ x0 = svqincd_pat_n_s32 (x0, SV_VL16, 16), -+ x0 = svqincd_pat (x0, SV_VL16, 16)) -+ -+/* -+** qincd_pat_n_vl32_s32: -+** sqincd x0, w0, vl32, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl32_s32, int32_t, -+ x0 = svqincd_pat_n_s32 (x0, SV_VL32, 16), -+ x0 = svqincd_pat (x0, SV_VL32, 16)) -+ -+/* -+** qincd_pat_n_vl64_s32: -+** sqincd x0, w0, vl64, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl64_s32, int32_t, -+ x0 = svqincd_pat_n_s32 (x0, SV_VL64, 16), -+ x0 = svqincd_pat (x0, SV_VL64, 16)) -+ -+/* -+** qincd_pat_n_vl128_s32: -+** sqincd x0, w0, vl128, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl128_s32, int32_t, -+ x0 = svqincd_pat_n_s32 (x0, SV_VL128, 16), -+ x0 = svqincd_pat (x0, SV_VL128, 16)) -+ -+/* -+** qincd_pat_n_vl256_s32: -+** sqincd x0, w0, vl256, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl256_s32, int32_t, -+ x0 = svqincd_pat_n_s32 (x0, SV_VL256, 16), -+ x0 = svqincd_pat (x0, SV_VL256, 16)) -+ -+/* -+** qincd_pat_n_mul4_s32: -+** sqincd x0, w0, mul4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_mul4_s32, int32_t, -+ x0 = svqincd_pat_n_s32 (x0, SV_MUL4, 16), -+ x0 = svqincd_pat (x0, SV_MUL4, 16)) -+ -+/* -+** qincd_pat_n_mul3_s32: -+** sqincd x0, w0, mul3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_mul3_s32, int32_t, -+ x0 = svqincd_pat_n_s32 (x0, SV_MUL3, 16), -+ x0 = svqincd_pat (x0, SV_MUL3, 16)) -+ -+/* -+** qincd_pat_n_all_s32: -+** sqincd x0, w0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_all_s32, int32_t, -+ x0 = svqincd_pat_n_s32 (x0, SV_ALL, 16), -+ x0 = svqincd_pat (x0, SV_ALL, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincd_pat_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincd_pat_s64.c -new file mode 100644 -index 000000000..02b53e1bc ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincd_pat_s64.c -@@ -0,0 +1,401 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qincd_pat_1_s64_tied: -+** sqincd z0\.d, pow2 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_pat_1_s64_tied, svint64_t, -+ z0 = svqincd_pat_s64 (z0, SV_POW2, 1), -+ z0 = svqincd_pat (z0, SV_POW2, 1)) -+ -+/* -+** qincd_pat_1_s64_untied: -+** movprfx z0, z1 -+** sqincd z0\.d, pow2 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_pat_1_s64_untied, svint64_t, -+ z0 = svqincd_pat_s64 (z1, SV_POW2, 1), -+ z0 = svqincd_pat (z1, SV_POW2, 1)) -+ -+/* -+** qincd_pat_2_s64: -+** sqincd z0\.d, pow2, mul #2 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_pat_2_s64, svint64_t, -+ z0 = svqincd_pat_s64 (z0, SV_POW2, 2), -+ z0 = svqincd_pat (z0, SV_POW2, 2)) -+ -+/* -+** qincd_pat_7_s64: -+** sqincd z0\.d, pow2, mul #7 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_pat_7_s64, svint64_t, -+ z0 = svqincd_pat_s64 (z0, SV_POW2, 7), -+ z0 = svqincd_pat (z0, SV_POW2, 7)) -+ -+/* -+** qincd_pat_15_s64: -+** sqincd z0\.d, pow2, mul #15 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_pat_15_s64, svint64_t, -+ z0 = svqincd_pat_s64 (z0, SV_POW2, 15), -+ z0 = svqincd_pat (z0, SV_POW2, 15)) -+ -+/* -+** qincd_pat_16_s64: -+** sqincd z0\.d, pow2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_pat_16_s64, svint64_t, -+ z0 = svqincd_pat_s64 (z0, SV_POW2, 16), -+ z0 = svqincd_pat (z0, SV_POW2, 16)) -+ -+/* -+** qincd_pat_vl1_s64: -+** sqincd z0\.d, vl1, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_pat_vl1_s64, svint64_t, -+ z0 = svqincd_pat_s64 (z0, SV_VL1, 16), -+ z0 = svqincd_pat (z0, SV_VL1, 16)) -+ -+/* -+** qincd_pat_vl2_s64: -+** sqincd z0\.d, vl2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_pat_vl2_s64, svint64_t, -+ z0 = svqincd_pat_s64 (z0, SV_VL2, 16), -+ z0 = svqincd_pat (z0, SV_VL2, 16)) -+ -+/* -+** qincd_pat_vl3_s64: -+** sqincd z0\.d, vl3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_pat_vl3_s64, svint64_t, -+ z0 = svqincd_pat_s64 (z0, SV_VL3, 16), -+ z0 = svqincd_pat (z0, SV_VL3, 16)) -+ -+/* -+** qincd_pat_vl4_s64: -+** sqincd z0\.d, vl4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_pat_vl4_s64, svint64_t, -+ z0 = svqincd_pat_s64 (z0, SV_VL4, 16), -+ z0 = svqincd_pat (z0, SV_VL4, 16)) -+ -+/* -+** qincd_pat_vl5_s64: -+** sqincd z0\.d, vl5, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_pat_vl5_s64, svint64_t, -+ z0 = svqincd_pat_s64 (z0, SV_VL5, 16), -+ z0 = svqincd_pat (z0, SV_VL5, 16)) -+ -+/* -+** qincd_pat_vl6_s64: -+** sqincd z0\.d, vl6, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_pat_vl6_s64, svint64_t, -+ z0 = svqincd_pat_s64 (z0, SV_VL6, 16), -+ z0 = svqincd_pat (z0, SV_VL6, 16)) -+ -+/* -+** qincd_pat_vl7_s64: -+** sqincd z0\.d, vl7, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_pat_vl7_s64, svint64_t, -+ z0 = svqincd_pat_s64 (z0, SV_VL7, 16), -+ z0 = svqincd_pat (z0, SV_VL7, 16)) -+ -+/* -+** qincd_pat_vl8_s64: -+** sqincd z0\.d, vl8, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_pat_vl8_s64, svint64_t, -+ z0 = svqincd_pat_s64 (z0, SV_VL8, 16), -+ z0 = svqincd_pat (z0, SV_VL8, 16)) -+ -+/* -+** qincd_pat_vl16_s64: -+** sqincd z0\.d, vl16, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_pat_vl16_s64, svint64_t, -+ z0 = svqincd_pat_s64 (z0, SV_VL16, 16), -+ z0 = svqincd_pat (z0, SV_VL16, 16)) -+ -+/* -+** qincd_pat_vl32_s64: -+** sqincd z0\.d, vl32, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_pat_vl32_s64, svint64_t, -+ z0 = svqincd_pat_s64 (z0, SV_VL32, 16), -+ z0 = svqincd_pat (z0, SV_VL32, 16)) -+ -+/* -+** qincd_pat_vl64_s64: -+** sqincd z0\.d, vl64, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_pat_vl64_s64, svint64_t, -+ z0 = svqincd_pat_s64 (z0, SV_VL64, 16), -+ z0 = svqincd_pat (z0, SV_VL64, 16)) -+ -+/* -+** qincd_pat_vl128_s64: -+** sqincd z0\.d, vl128, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_pat_vl128_s64, svint64_t, -+ z0 = svqincd_pat_s64 (z0, SV_VL128, 16), -+ z0 = svqincd_pat (z0, SV_VL128, 16)) -+ -+/* -+** qincd_pat_vl256_s64: -+** sqincd z0\.d, vl256, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_pat_vl256_s64, svint64_t, -+ z0 = svqincd_pat_s64 (z0, SV_VL256, 16), -+ z0 = svqincd_pat (z0, SV_VL256, 16)) -+ -+/* -+** qincd_pat_mul4_s64: -+** sqincd z0\.d, mul4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_pat_mul4_s64, svint64_t, -+ z0 = svqincd_pat_s64 (z0, SV_MUL4, 16), -+ z0 = svqincd_pat (z0, SV_MUL4, 16)) -+ -+/* -+** qincd_pat_mul3_s64: -+** sqincd z0\.d, mul3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_pat_mul3_s64, svint64_t, -+ z0 = svqincd_pat_s64 (z0, SV_MUL3, 16), -+ z0 = svqincd_pat (z0, SV_MUL3, 16)) -+ -+/* -+** qincd_pat_all_s64: -+** sqincd z0\.d, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_pat_all_s64, svint64_t, -+ z0 = svqincd_pat_s64 (z0, SV_ALL, 16), -+ z0 = svqincd_pat (z0, SV_ALL, 16)) -+ -+/* -+** qincd_pat_n_1_s64_tied: -+** sqincd x0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_1_s64_tied, int64_t, -+ x0 = svqincd_pat_n_s64 (x0, SV_POW2, 1), -+ x0 = svqincd_pat (x0, SV_POW2, 1)) -+ -+/* -+** qincd_pat_n_1_s64_untied: -+** mov x0, x1 -+** sqincd x0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_1_s64_untied, int64_t, -+ x0 = svqincd_pat_n_s64 (x1, SV_POW2, 1), -+ x0 = svqincd_pat (x1, SV_POW2, 1)) -+ -+/* -+** qincd_pat_n_2_s64: -+** sqincd x0, pow2, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_2_s64, int64_t, -+ x0 = svqincd_pat_n_s64 (x0, SV_POW2, 2), -+ x0 = svqincd_pat (x0, SV_POW2, 2)) -+ -+/* -+** qincd_pat_n_7_s64: -+** sqincd x0, pow2, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_7_s64, int64_t, -+ x0 = svqincd_pat_n_s64 (x0, SV_POW2, 7), -+ x0 = svqincd_pat (x0, SV_POW2, 7)) -+ -+/* -+** qincd_pat_n_15_s64: -+** sqincd x0, pow2, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_15_s64, int64_t, -+ x0 = svqincd_pat_n_s64 (x0, SV_POW2, 15), -+ x0 = svqincd_pat (x0, SV_POW2, 15)) -+ -+/* -+** qincd_pat_n_16_s64: -+** sqincd x0, pow2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_16_s64, int64_t, -+ x0 = svqincd_pat_n_s64 (x0, SV_POW2, 16), -+ x0 = svqincd_pat (x0, SV_POW2, 16)) -+ -+/* -+** qincd_pat_n_vl1_s64: -+** sqincd x0, vl1, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl1_s64, int64_t, -+ x0 = svqincd_pat_n_s64 (x0, SV_VL1, 16), -+ x0 = svqincd_pat (x0, SV_VL1, 16)) -+ -+/* -+** qincd_pat_n_vl2_s64: -+** sqincd x0, vl2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl2_s64, int64_t, -+ x0 = svqincd_pat_n_s64 (x0, SV_VL2, 16), -+ x0 = svqincd_pat (x0, SV_VL2, 16)) -+ -+/* -+** qincd_pat_n_vl3_s64: -+** sqincd x0, vl3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl3_s64, int64_t, -+ x0 = svqincd_pat_n_s64 (x0, SV_VL3, 16), -+ x0 = svqincd_pat (x0, SV_VL3, 16)) -+ -+/* -+** qincd_pat_n_vl4_s64: -+** sqincd x0, vl4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl4_s64, int64_t, -+ x0 = svqincd_pat_n_s64 (x0, SV_VL4, 16), -+ x0 = svqincd_pat (x0, SV_VL4, 16)) -+ -+/* -+** qincd_pat_n_vl5_s64: -+** sqincd x0, vl5, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl5_s64, int64_t, -+ x0 = svqincd_pat_n_s64 (x0, SV_VL5, 16), -+ x0 = svqincd_pat (x0, SV_VL5, 16)) -+ -+/* -+** qincd_pat_n_vl6_s64: -+** sqincd x0, vl6, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl6_s64, int64_t, -+ x0 = svqincd_pat_n_s64 (x0, SV_VL6, 16), -+ x0 = svqincd_pat (x0, SV_VL6, 16)) -+ -+/* -+** qincd_pat_n_vl7_s64: -+** sqincd x0, vl7, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl7_s64, int64_t, -+ x0 = svqincd_pat_n_s64 (x0, SV_VL7, 16), -+ x0 = svqincd_pat (x0, SV_VL7, 16)) -+ -+/* -+** qincd_pat_n_vl8_s64: -+** sqincd x0, vl8, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl8_s64, int64_t, -+ x0 = svqincd_pat_n_s64 (x0, SV_VL8, 16), -+ x0 = svqincd_pat (x0, SV_VL8, 16)) -+ -+/* -+** qincd_pat_n_vl16_s64: -+** sqincd x0, vl16, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl16_s64, int64_t, -+ x0 = svqincd_pat_n_s64 (x0, SV_VL16, 16), -+ x0 = svqincd_pat (x0, SV_VL16, 16)) -+ -+/* -+** qincd_pat_n_vl32_s64: -+** sqincd x0, vl32, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl32_s64, int64_t, -+ x0 = svqincd_pat_n_s64 (x0, SV_VL32, 16), -+ x0 = svqincd_pat (x0, SV_VL32, 16)) -+ -+/* -+** qincd_pat_n_vl64_s64: -+** sqincd x0, vl64, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl64_s64, int64_t, -+ x0 = svqincd_pat_n_s64 (x0, SV_VL64, 16), -+ x0 = svqincd_pat (x0, SV_VL64, 16)) -+ -+/* -+** qincd_pat_n_vl128_s64: -+** sqincd x0, vl128, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl128_s64, int64_t, -+ x0 = svqincd_pat_n_s64 (x0, SV_VL128, 16), -+ x0 = svqincd_pat (x0, SV_VL128, 16)) -+ -+/* -+** qincd_pat_n_vl256_s64: -+** sqincd x0, vl256, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl256_s64, int64_t, -+ x0 = svqincd_pat_n_s64 (x0, SV_VL256, 16), -+ x0 = svqincd_pat (x0, SV_VL256, 16)) -+ -+/* -+** qincd_pat_n_mul4_s64: -+** sqincd x0, mul4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_mul4_s64, int64_t, -+ x0 = svqincd_pat_n_s64 (x0, SV_MUL4, 16), -+ x0 = svqincd_pat (x0, SV_MUL4, 16)) -+ -+/* -+** qincd_pat_n_mul3_s64: -+** sqincd x0, mul3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_mul3_s64, int64_t, -+ x0 = svqincd_pat_n_s64 (x0, SV_MUL3, 16), -+ x0 = svqincd_pat (x0, SV_MUL3, 16)) -+ -+/* -+** qincd_pat_n_all_s64: -+** sqincd x0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_all_s64, int64_t, -+ x0 = svqincd_pat_n_s64 (x0, SV_ALL, 16), -+ x0 = svqincd_pat (x0, SV_ALL, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincd_pat_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincd_pat_u32.c -new file mode 100644 -index 000000000..0e3cbdb54 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincd_pat_u32.c -@@ -0,0 +1,202 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qincd_pat_n_1_u32_tied: -+** uqincd w0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_1_u32_tied, uint32_t, -+ x0 = svqincd_pat_n_u32 (x0, SV_POW2, 1), -+ x0 = svqincd_pat (x0, SV_POW2, 1)) -+ -+/* -+** qincd_pat_n_1_u32_untied: -+** mov w0, w1 -+** uqincd w0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_1_u32_untied, uint32_t, -+ x0 = svqincd_pat_n_u32 (x1, SV_POW2, 1), -+ x0 = svqincd_pat (x1, SV_POW2, 1)) -+ -+/* -+** qincd_pat_n_2_u32: -+** uqincd w0, pow2, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_2_u32, uint32_t, -+ x0 = svqincd_pat_n_u32 (x0, SV_POW2, 2), -+ x0 = svqincd_pat (x0, SV_POW2, 2)) -+ -+/* -+** qincd_pat_n_7_u32: -+** uqincd w0, pow2, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_7_u32, uint32_t, -+ x0 = svqincd_pat_n_u32 (x0, SV_POW2, 7), -+ x0 = svqincd_pat (x0, SV_POW2, 7)) -+ -+/* -+** qincd_pat_n_15_u32: -+** uqincd w0, pow2, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_15_u32, uint32_t, -+ x0 = svqincd_pat_n_u32 (x0, SV_POW2, 15), -+ x0 = svqincd_pat (x0, SV_POW2, 15)) -+ -+/* -+** qincd_pat_n_16_u32: -+** uqincd w0, pow2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_16_u32, uint32_t, -+ x0 = svqincd_pat_n_u32 (x0, SV_POW2, 16), -+ x0 = svqincd_pat (x0, SV_POW2, 16)) -+ -+/* -+** qincd_pat_n_vl1_u32: -+** uqincd w0, vl1, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl1_u32, uint32_t, -+ x0 = svqincd_pat_n_u32 (x0, SV_VL1, 16), -+ x0 = svqincd_pat (x0, SV_VL1, 16)) -+ -+/* -+** qincd_pat_n_vl2_u32: -+** uqincd w0, vl2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl2_u32, uint32_t, -+ x0 = svqincd_pat_n_u32 (x0, SV_VL2, 16), -+ x0 = svqincd_pat (x0, SV_VL2, 16)) -+ -+/* -+** qincd_pat_n_vl3_u32: -+** uqincd w0, vl3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl3_u32, uint32_t, -+ x0 = svqincd_pat_n_u32 (x0, SV_VL3, 16), -+ x0 = svqincd_pat (x0, SV_VL3, 16)) -+ -+/* -+** qincd_pat_n_vl4_u32: -+** uqincd w0, vl4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl4_u32, uint32_t, -+ x0 = svqincd_pat_n_u32 (x0, SV_VL4, 16), -+ x0 = svqincd_pat (x0, SV_VL4, 16)) -+ -+/* -+** qincd_pat_n_vl5_u32: -+** uqincd w0, vl5, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl5_u32, uint32_t, -+ x0 = svqincd_pat_n_u32 (x0, SV_VL5, 16), -+ x0 = svqincd_pat (x0, SV_VL5, 16)) -+ -+/* -+** qincd_pat_n_vl6_u32: -+** uqincd w0, vl6, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl6_u32, uint32_t, -+ x0 = svqincd_pat_n_u32 (x0, SV_VL6, 16), -+ x0 = svqincd_pat (x0, SV_VL6, 16)) -+ -+/* -+** qincd_pat_n_vl7_u32: -+** uqincd w0, vl7, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl7_u32, uint32_t, -+ x0 = svqincd_pat_n_u32 (x0, SV_VL7, 16), -+ x0 = svqincd_pat (x0, SV_VL7, 16)) -+ -+/* -+** qincd_pat_n_vl8_u32: -+** uqincd w0, vl8, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl8_u32, uint32_t, -+ x0 = svqincd_pat_n_u32 (x0, SV_VL8, 16), -+ x0 = svqincd_pat (x0, SV_VL8, 16)) -+ -+/* -+** qincd_pat_n_vl16_u32: -+** uqincd w0, vl16, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl16_u32, uint32_t, -+ x0 = svqincd_pat_n_u32 (x0, SV_VL16, 16), -+ x0 = svqincd_pat (x0, SV_VL16, 16)) -+ -+/* -+** qincd_pat_n_vl32_u32: -+** uqincd w0, vl32, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl32_u32, uint32_t, -+ x0 = svqincd_pat_n_u32 (x0, SV_VL32, 16), -+ x0 = svqincd_pat (x0, SV_VL32, 16)) -+ -+/* -+** qincd_pat_n_vl64_u32: -+** uqincd w0, vl64, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl64_u32, uint32_t, -+ x0 = svqincd_pat_n_u32 (x0, SV_VL64, 16), -+ x0 = svqincd_pat (x0, SV_VL64, 16)) -+ -+/* -+** qincd_pat_n_vl128_u32: -+** uqincd w0, vl128, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl128_u32, uint32_t, -+ x0 = svqincd_pat_n_u32 (x0, SV_VL128, 16), -+ x0 = svqincd_pat (x0, SV_VL128, 16)) -+ -+/* -+** qincd_pat_n_vl256_u32: -+** uqincd w0, vl256, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl256_u32, uint32_t, -+ x0 = svqincd_pat_n_u32 (x0, SV_VL256, 16), -+ x0 = svqincd_pat (x0, SV_VL256, 16)) -+ -+/* -+** qincd_pat_n_mul4_u32: -+** uqincd w0, mul4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_mul4_u32, uint32_t, -+ x0 = svqincd_pat_n_u32 (x0, SV_MUL4, 16), -+ x0 = svqincd_pat (x0, SV_MUL4, 16)) -+ -+/* -+** qincd_pat_n_mul3_u32: -+** uqincd w0, mul3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_mul3_u32, uint32_t, -+ x0 = svqincd_pat_n_u32 (x0, SV_MUL3, 16), -+ x0 = svqincd_pat (x0, SV_MUL3, 16)) -+ -+/* -+** qincd_pat_n_all_u32: -+** uqincd w0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_all_u32, uint32_t, -+ x0 = svqincd_pat_n_u32 (x0, SV_ALL, 16), -+ x0 = svqincd_pat (x0, SV_ALL, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincd_pat_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincd_pat_u64.c -new file mode 100644 -index 000000000..49dc350df ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincd_pat_u64.c -@@ -0,0 +1,401 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qincd_pat_1_u64_tied: -+** uqincd z0\.d, pow2 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_pat_1_u64_tied, svuint64_t, -+ z0 = svqincd_pat_u64 (z0, SV_POW2, 1), -+ z0 = svqincd_pat (z0, SV_POW2, 1)) -+ -+/* -+** qincd_pat_1_u64_untied: -+** movprfx z0, z1 -+** uqincd z0\.d, pow2 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_pat_1_u64_untied, svuint64_t, -+ z0 = svqincd_pat_u64 (z1, SV_POW2, 1), -+ z0 = svqincd_pat (z1, SV_POW2, 1)) -+ -+/* -+** qincd_pat_2_u64: -+** uqincd z0\.d, pow2, mul #2 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_pat_2_u64, svuint64_t, -+ z0 = svqincd_pat_u64 (z0, SV_POW2, 2), -+ z0 = svqincd_pat (z0, SV_POW2, 2)) -+ -+/* -+** qincd_pat_7_u64: -+** uqincd z0\.d, pow2, mul #7 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_pat_7_u64, svuint64_t, -+ z0 = svqincd_pat_u64 (z0, SV_POW2, 7), -+ z0 = svqincd_pat (z0, SV_POW2, 7)) -+ -+/* -+** qincd_pat_15_u64: -+** uqincd z0\.d, pow2, mul #15 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_pat_15_u64, svuint64_t, -+ z0 = svqincd_pat_u64 (z0, SV_POW2, 15), -+ z0 = svqincd_pat (z0, SV_POW2, 15)) -+ -+/* -+** qincd_pat_16_u64: -+** uqincd z0\.d, pow2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_pat_16_u64, svuint64_t, -+ z0 = svqincd_pat_u64 (z0, SV_POW2, 16), -+ z0 = svqincd_pat (z0, SV_POW2, 16)) -+ -+/* -+** qincd_pat_vl1_u64: -+** uqincd z0\.d, vl1, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_pat_vl1_u64, svuint64_t, -+ z0 = svqincd_pat_u64 (z0, SV_VL1, 16), -+ z0 = svqincd_pat (z0, SV_VL1, 16)) -+ -+/* -+** qincd_pat_vl2_u64: -+** uqincd z0\.d, vl2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_pat_vl2_u64, svuint64_t, -+ z0 = svqincd_pat_u64 (z0, SV_VL2, 16), -+ z0 = svqincd_pat (z0, SV_VL2, 16)) -+ -+/* -+** qincd_pat_vl3_u64: -+** uqincd z0\.d, vl3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_pat_vl3_u64, svuint64_t, -+ z0 = svqincd_pat_u64 (z0, SV_VL3, 16), -+ z0 = svqincd_pat (z0, SV_VL3, 16)) -+ -+/* -+** qincd_pat_vl4_u64: -+** uqincd z0\.d, vl4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_pat_vl4_u64, svuint64_t, -+ z0 = svqincd_pat_u64 (z0, SV_VL4, 16), -+ z0 = svqincd_pat (z0, SV_VL4, 16)) -+ -+/* -+** qincd_pat_vl5_u64: -+** uqincd z0\.d, vl5, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_pat_vl5_u64, svuint64_t, -+ z0 = svqincd_pat_u64 (z0, SV_VL5, 16), -+ z0 = svqincd_pat (z0, SV_VL5, 16)) -+ -+/* -+** qincd_pat_vl6_u64: -+** uqincd z0\.d, vl6, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_pat_vl6_u64, svuint64_t, -+ z0 = svqincd_pat_u64 (z0, SV_VL6, 16), -+ z0 = svqincd_pat (z0, SV_VL6, 16)) -+ -+/* -+** qincd_pat_vl7_u64: -+** uqincd z0\.d, vl7, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_pat_vl7_u64, svuint64_t, -+ z0 = svqincd_pat_u64 (z0, SV_VL7, 16), -+ z0 = svqincd_pat (z0, SV_VL7, 16)) -+ -+/* -+** qincd_pat_vl8_u64: -+** uqincd z0\.d, vl8, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_pat_vl8_u64, svuint64_t, -+ z0 = svqincd_pat_u64 (z0, SV_VL8, 16), -+ z0 = svqincd_pat (z0, SV_VL8, 16)) -+ -+/* -+** qincd_pat_vl16_u64: -+** uqincd z0\.d, vl16, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_pat_vl16_u64, svuint64_t, -+ z0 = svqincd_pat_u64 (z0, SV_VL16, 16), -+ z0 = svqincd_pat (z0, SV_VL16, 16)) -+ -+/* -+** qincd_pat_vl32_u64: -+** uqincd z0\.d, vl32, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_pat_vl32_u64, svuint64_t, -+ z0 = svqincd_pat_u64 (z0, SV_VL32, 16), -+ z0 = svqincd_pat (z0, SV_VL32, 16)) -+ -+/* -+** qincd_pat_vl64_u64: -+** uqincd z0\.d, vl64, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_pat_vl64_u64, svuint64_t, -+ z0 = svqincd_pat_u64 (z0, SV_VL64, 16), -+ z0 = svqincd_pat (z0, SV_VL64, 16)) -+ -+/* -+** qincd_pat_vl128_u64: -+** uqincd z0\.d, vl128, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_pat_vl128_u64, svuint64_t, -+ z0 = svqincd_pat_u64 (z0, SV_VL128, 16), -+ z0 = svqincd_pat (z0, SV_VL128, 16)) -+ -+/* -+** qincd_pat_vl256_u64: -+** uqincd z0\.d, vl256, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_pat_vl256_u64, svuint64_t, -+ z0 = svqincd_pat_u64 (z0, SV_VL256, 16), -+ z0 = svqincd_pat (z0, SV_VL256, 16)) -+ -+/* -+** qincd_pat_mul4_u64: -+** uqincd z0\.d, mul4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_pat_mul4_u64, svuint64_t, -+ z0 = svqincd_pat_u64 (z0, SV_MUL4, 16), -+ z0 = svqincd_pat (z0, SV_MUL4, 16)) -+ -+/* -+** qincd_pat_mul3_u64: -+** uqincd z0\.d, mul3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_pat_mul3_u64, svuint64_t, -+ z0 = svqincd_pat_u64 (z0, SV_MUL3, 16), -+ z0 = svqincd_pat (z0, SV_MUL3, 16)) -+ -+/* -+** qincd_pat_all_u64: -+** uqincd z0\.d, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_pat_all_u64, svuint64_t, -+ z0 = svqincd_pat_u64 (z0, SV_ALL, 16), -+ z0 = svqincd_pat (z0, SV_ALL, 16)) -+ -+/* -+** qincd_pat_n_1_u64_tied: -+** uqincd x0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_1_u64_tied, uint64_t, -+ x0 = svqincd_pat_n_u64 (x0, SV_POW2, 1), -+ x0 = svqincd_pat (x0, SV_POW2, 1)) -+ -+/* -+** qincd_pat_n_1_u64_untied: -+** mov x0, x1 -+** uqincd x0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_1_u64_untied, uint64_t, -+ x0 = svqincd_pat_n_u64 (x1, SV_POW2, 1), -+ x0 = svqincd_pat (x1, SV_POW2, 1)) -+ -+/* -+** qincd_pat_n_2_u64: -+** uqincd x0, pow2, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_2_u64, uint64_t, -+ x0 = svqincd_pat_n_u64 (x0, SV_POW2, 2), -+ x0 = svqincd_pat (x0, SV_POW2, 2)) -+ -+/* -+** qincd_pat_n_7_u64: -+** uqincd x0, pow2, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_7_u64, uint64_t, -+ x0 = svqincd_pat_n_u64 (x0, SV_POW2, 7), -+ x0 = svqincd_pat (x0, SV_POW2, 7)) -+ -+/* -+** qincd_pat_n_15_u64: -+** uqincd x0, pow2, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_15_u64, uint64_t, -+ x0 = svqincd_pat_n_u64 (x0, SV_POW2, 15), -+ x0 = svqincd_pat (x0, SV_POW2, 15)) -+ -+/* -+** qincd_pat_n_16_u64: -+** uqincd x0, pow2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_16_u64, uint64_t, -+ x0 = svqincd_pat_n_u64 (x0, SV_POW2, 16), -+ x0 = svqincd_pat (x0, SV_POW2, 16)) -+ -+/* -+** qincd_pat_n_vl1_u64: -+** uqincd x0, vl1, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl1_u64, uint64_t, -+ x0 = svqincd_pat_n_u64 (x0, SV_VL1, 16), -+ x0 = svqincd_pat (x0, SV_VL1, 16)) -+ -+/* -+** qincd_pat_n_vl2_u64: -+** uqincd x0, vl2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl2_u64, uint64_t, -+ x0 = svqincd_pat_n_u64 (x0, SV_VL2, 16), -+ x0 = svqincd_pat (x0, SV_VL2, 16)) -+ -+/* -+** qincd_pat_n_vl3_u64: -+** uqincd x0, vl3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl3_u64, uint64_t, -+ x0 = svqincd_pat_n_u64 (x0, SV_VL3, 16), -+ x0 = svqincd_pat (x0, SV_VL3, 16)) -+ -+/* -+** qincd_pat_n_vl4_u64: -+** uqincd x0, vl4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl4_u64, uint64_t, -+ x0 = svqincd_pat_n_u64 (x0, SV_VL4, 16), -+ x0 = svqincd_pat (x0, SV_VL4, 16)) -+ -+/* -+** qincd_pat_n_vl5_u64: -+** uqincd x0, vl5, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl5_u64, uint64_t, -+ x0 = svqincd_pat_n_u64 (x0, SV_VL5, 16), -+ x0 = svqincd_pat (x0, SV_VL5, 16)) -+ -+/* -+** qincd_pat_n_vl6_u64: -+** uqincd x0, vl6, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl6_u64, uint64_t, -+ x0 = svqincd_pat_n_u64 (x0, SV_VL6, 16), -+ x0 = svqincd_pat (x0, SV_VL6, 16)) -+ -+/* -+** qincd_pat_n_vl7_u64: -+** uqincd x0, vl7, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl7_u64, uint64_t, -+ x0 = svqincd_pat_n_u64 (x0, SV_VL7, 16), -+ x0 = svqincd_pat (x0, SV_VL7, 16)) -+ -+/* -+** qincd_pat_n_vl8_u64: -+** uqincd x0, vl8, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl8_u64, uint64_t, -+ x0 = svqincd_pat_n_u64 (x0, SV_VL8, 16), -+ x0 = svqincd_pat (x0, SV_VL8, 16)) -+ -+/* -+** qincd_pat_n_vl16_u64: -+** uqincd x0, vl16, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl16_u64, uint64_t, -+ x0 = svqincd_pat_n_u64 (x0, SV_VL16, 16), -+ x0 = svqincd_pat (x0, SV_VL16, 16)) -+ -+/* -+** qincd_pat_n_vl32_u64: -+** uqincd x0, vl32, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl32_u64, uint64_t, -+ x0 = svqincd_pat_n_u64 (x0, SV_VL32, 16), -+ x0 = svqincd_pat (x0, SV_VL32, 16)) -+ -+/* -+** qincd_pat_n_vl64_u64: -+** uqincd x0, vl64, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl64_u64, uint64_t, -+ x0 = svqincd_pat_n_u64 (x0, SV_VL64, 16), -+ x0 = svqincd_pat (x0, SV_VL64, 16)) -+ -+/* -+** qincd_pat_n_vl128_u64: -+** uqincd x0, vl128, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl128_u64, uint64_t, -+ x0 = svqincd_pat_n_u64 (x0, SV_VL128, 16), -+ x0 = svqincd_pat (x0, SV_VL128, 16)) -+ -+/* -+** qincd_pat_n_vl256_u64: -+** uqincd x0, vl256, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_vl256_u64, uint64_t, -+ x0 = svqincd_pat_n_u64 (x0, SV_VL256, 16), -+ x0 = svqincd_pat (x0, SV_VL256, 16)) -+ -+/* -+** qincd_pat_n_mul4_u64: -+** uqincd x0, mul4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_mul4_u64, uint64_t, -+ x0 = svqincd_pat_n_u64 (x0, SV_MUL4, 16), -+ x0 = svqincd_pat (x0, SV_MUL4, 16)) -+ -+/* -+** qincd_pat_n_mul3_u64: -+** uqincd x0, mul3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_mul3_u64, uint64_t, -+ x0 = svqincd_pat_n_u64 (x0, SV_MUL3, 16), -+ x0 = svqincd_pat (x0, SV_MUL3, 16)) -+ -+/* -+** qincd_pat_n_all_u64: -+** uqincd x0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_pat_n_all_u64, uint64_t, -+ x0 = svqincd_pat_n_u64 (x0, SV_ALL, 16), -+ x0 = svqincd_pat (x0, SV_ALL, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincd_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincd_s32.c -new file mode 100644 -index 000000000..2fa0438a3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincd_s32.c -@@ -0,0 +1,58 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qincd_n_1_s32_tied: -+** sqincd x0, w0 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_n_1_s32_tied, int32_t, -+ x0 = svqincd_n_s32 (x0, 1), -+ x0 = svqincd (x0, 1)) -+ -+/* -+** qincd_n_1_s32_untied: -+** mov w0, w1 -+** sqincd x0, w0 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_n_1_s32_untied, int32_t, -+ x0 = svqincd_n_s32 (x1, 1), -+ x0 = svqincd (x1, 1)) -+ -+/* -+** qincd_n_2_s32: -+** sqincd x0, w0, all, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_n_2_s32, int32_t, -+ x0 = svqincd_n_s32 (x0, 2), -+ x0 = svqincd (x0, 2)) -+ -+/* -+** qincd_n_7_s32: -+** sqincd x0, w0, all, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_n_7_s32, int32_t, -+ x0 = svqincd_n_s32 (x0, 7), -+ x0 = svqincd (x0, 7)) -+ -+/* -+** qincd_n_15_s32: -+** sqincd x0, w0, all, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_n_15_s32, int32_t, -+ x0 = svqincd_n_s32 (x0, 15), -+ x0 = svqincd (x0, 15)) -+ -+/* -+** qincd_n_16_s32: -+** sqincd x0, w0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_n_16_s32, int32_t, -+ x0 = svqincd_n_s32 (x0, 16), -+ x0 = svqincd (x0, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincd_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincd_s64.c -new file mode 100644 -index 000000000..0920ac2ec ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincd_s64.c -@@ -0,0 +1,113 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qincd_1_s64_tied: -+** sqincd z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_1_s64_tied, svint64_t, -+ z0 = svqincd_s64 (z0, 1), -+ z0 = svqincd (z0, 1)) -+ -+/* -+** qincd_1_s64_untied: -+** movprfx z0, z1 -+** sqincd z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_1_s64_untied, svint64_t, -+ z0 = svqincd_s64 (z1, 1), -+ z0 = svqincd (z1, 1)) -+ -+/* -+** qincd_2_s64: -+** sqincd z0\.d, all, mul #2 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_2_s64, svint64_t, -+ z0 = svqincd_s64 (z0, 2), -+ z0 = svqincd (z0, 2)) -+ -+/* -+** qincd_7_s64: -+** sqincd z0\.d, all, mul #7 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_7_s64, svint64_t, -+ z0 = svqincd_s64 (z0, 7), -+ z0 = svqincd (z0, 7)) -+ -+/* -+** qincd_15_s64: -+** sqincd z0\.d, all, mul #15 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_15_s64, svint64_t, -+ z0 = svqincd_s64 (z0, 15), -+ z0 = svqincd (z0, 15)) -+ -+/* -+** qincd_16_s64: -+** sqincd z0\.d, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_16_s64, svint64_t, -+ z0 = svqincd_s64 (z0, 16), -+ z0 = svqincd (z0, 16)) -+ -+/* -+** qincd_n_1_s64_tied: -+** sqincd x0 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_n_1_s64_tied, int64_t, -+ x0 = svqincd_n_s64 (x0, 1), -+ x0 = svqincd (x0, 1)) -+ -+/* -+** qincd_n_1_s64_untied: -+** mov x0, x1 -+** sqincd x0 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_n_1_s64_untied, int64_t, -+ x0 = svqincd_n_s64 (x1, 1), -+ x0 = svqincd (x1, 1)) -+ -+/* -+** qincd_n_2_s64: -+** sqincd x0, all, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_n_2_s64, int64_t, -+ x0 = svqincd_n_s64 (x0, 2), -+ x0 = svqincd (x0, 2)) -+ -+/* -+** qincd_n_7_s64: -+** sqincd x0, all, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_n_7_s64, int64_t, -+ x0 = svqincd_n_s64 (x0, 7), -+ x0 = svqincd (x0, 7)) -+ -+/* -+** qincd_n_15_s64: -+** sqincd x0, all, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_n_15_s64, int64_t, -+ x0 = svqincd_n_s64 (x0, 15), -+ x0 = svqincd (x0, 15)) -+ -+/* -+** qincd_n_16_s64: -+** sqincd x0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_n_16_s64, int64_t, -+ x0 = svqincd_n_s64 (x0, 16), -+ x0 = svqincd (x0, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincd_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincd_u32.c -new file mode 100644 -index 000000000..33dc12cb1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincd_u32.c -@@ -0,0 +1,58 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qincd_n_1_u32_tied: -+** uqincd w0 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_n_1_u32_tied, uint32_t, -+ x0 = svqincd_n_u32 (x0, 1), -+ x0 = svqincd (x0, 1)) -+ -+/* -+** qincd_n_1_u32_untied: -+** mov w0, w1 -+** uqincd w0 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_n_1_u32_untied, uint32_t, -+ x0 = svqincd_n_u32 (x1, 1), -+ x0 = svqincd (x1, 1)) -+ -+/* -+** qincd_n_2_u32: -+** uqincd w0, all, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_n_2_u32, uint32_t, -+ x0 = svqincd_n_u32 (x0, 2), -+ x0 = svqincd (x0, 2)) -+ -+/* -+** qincd_n_7_u32: -+** uqincd w0, all, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_n_7_u32, uint32_t, -+ x0 = svqincd_n_u32 (x0, 7), -+ x0 = svqincd (x0, 7)) -+ -+/* -+** qincd_n_15_u32: -+** uqincd w0, all, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_n_15_u32, uint32_t, -+ x0 = svqincd_n_u32 (x0, 15), -+ x0 = svqincd (x0, 15)) -+ -+/* -+** qincd_n_16_u32: -+** uqincd w0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_n_16_u32, uint32_t, -+ x0 = svqincd_n_u32 (x0, 16), -+ x0 = svqincd (x0, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincd_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincd_u64.c -new file mode 100644 -index 000000000..28c611a8f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincd_u64.c -@@ -0,0 +1,113 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qincd_1_u64_tied: -+** uqincd z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_1_u64_tied, svuint64_t, -+ z0 = svqincd_u64 (z0, 1), -+ z0 = svqincd (z0, 1)) -+ -+/* -+** qincd_1_u64_untied: -+** movprfx z0, z1 -+** uqincd z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_1_u64_untied, svuint64_t, -+ z0 = svqincd_u64 (z1, 1), -+ z0 = svqincd (z1, 1)) -+ -+/* -+** qincd_2_u64: -+** uqincd z0\.d, all, mul #2 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_2_u64, svuint64_t, -+ z0 = svqincd_u64 (z0, 2), -+ z0 = svqincd (z0, 2)) -+ -+/* -+** qincd_7_u64: -+** uqincd z0\.d, all, mul #7 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_7_u64, svuint64_t, -+ z0 = svqincd_u64 (z0, 7), -+ z0 = svqincd (z0, 7)) -+ -+/* -+** qincd_15_u64: -+** uqincd z0\.d, all, mul #15 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_15_u64, svuint64_t, -+ z0 = svqincd_u64 (z0, 15), -+ z0 = svqincd (z0, 15)) -+ -+/* -+** qincd_16_u64: -+** uqincd z0\.d, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincd_16_u64, svuint64_t, -+ z0 = svqincd_u64 (z0, 16), -+ z0 = svqincd (z0, 16)) -+ -+/* -+** qincd_n_1_u64_tied: -+** uqincd x0 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_n_1_u64_tied, uint64_t, -+ x0 = svqincd_n_u64 (x0, 1), -+ x0 = svqincd (x0, 1)) -+ -+/* -+** qincd_n_1_u64_untied: -+** mov x0, x1 -+** uqincd x0 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_n_1_u64_untied, uint64_t, -+ x0 = svqincd_n_u64 (x1, 1), -+ x0 = svqincd (x1, 1)) -+ -+/* -+** qincd_n_2_u64: -+** uqincd x0, all, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_n_2_u64, uint64_t, -+ x0 = svqincd_n_u64 (x0, 2), -+ x0 = svqincd (x0, 2)) -+ -+/* -+** qincd_n_7_u64: -+** uqincd x0, all, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_n_7_u64, uint64_t, -+ x0 = svqincd_n_u64 (x0, 7), -+ x0 = svqincd (x0, 7)) -+ -+/* -+** qincd_n_15_u64: -+** uqincd x0, all, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_n_15_u64, uint64_t, -+ x0 = svqincd_n_u64 (x0, 15), -+ x0 = svqincd (x0, 15)) -+ -+/* -+** qincd_n_16_u64: -+** uqincd x0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincd_n_16_u64, uint64_t, -+ x0 = svqincd_n_u64 (x0, 16), -+ x0 = svqincd (x0, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_pat_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_pat_s16.c -new file mode 100644 -index 000000000..708d635c5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_pat_s16.c -@@ -0,0 +1,202 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qinch_pat_1_s16_tied: -+** sqinch z0\.h, pow2 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_pat_1_s16_tied, svint16_t, -+ z0 = svqinch_pat_s16 (z0, SV_POW2, 1), -+ z0 = svqinch_pat (z0, SV_POW2, 1)) -+ -+/* -+** qinch_pat_1_s16_untied: -+** movprfx z0, z1 -+** sqinch z0\.h, pow2 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_pat_1_s16_untied, svint16_t, -+ z0 = svqinch_pat_s16 (z1, SV_POW2, 1), -+ z0 = svqinch_pat (z1, SV_POW2, 1)) -+ -+/* -+** qinch_pat_2_s16: -+** sqinch z0\.h, pow2, mul #2 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_pat_2_s16, svint16_t, -+ z0 = svqinch_pat_s16 (z0, SV_POW2, 2), -+ z0 = svqinch_pat (z0, SV_POW2, 2)) -+ -+/* -+** qinch_pat_7_s16: -+** sqinch z0\.h, pow2, mul #7 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_pat_7_s16, svint16_t, -+ z0 = svqinch_pat_s16 (z0, SV_POW2, 7), -+ z0 = svqinch_pat (z0, SV_POW2, 7)) -+ -+/* -+** qinch_pat_15_s16: -+** sqinch z0\.h, pow2, mul #15 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_pat_15_s16, svint16_t, -+ z0 = svqinch_pat_s16 (z0, SV_POW2, 15), -+ z0 = svqinch_pat (z0, SV_POW2, 15)) -+ -+/* -+** qinch_pat_16_s16: -+** sqinch z0\.h, pow2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_pat_16_s16, svint16_t, -+ z0 = svqinch_pat_s16 (z0, SV_POW2, 16), -+ z0 = svqinch_pat (z0, SV_POW2, 16)) -+ -+/* -+** qinch_pat_vl1_s16: -+** sqinch z0\.h, vl1, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_pat_vl1_s16, svint16_t, -+ z0 = svqinch_pat_s16 (z0, SV_VL1, 16), -+ z0 = svqinch_pat (z0, SV_VL1, 16)) -+ -+/* -+** qinch_pat_vl2_s16: -+** sqinch z0\.h, vl2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_pat_vl2_s16, svint16_t, -+ z0 = svqinch_pat_s16 (z0, SV_VL2, 16), -+ z0 = svqinch_pat (z0, SV_VL2, 16)) -+ -+/* -+** qinch_pat_vl3_s16: -+** sqinch z0\.h, vl3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_pat_vl3_s16, svint16_t, -+ z0 = svqinch_pat_s16 (z0, SV_VL3, 16), -+ z0 = svqinch_pat (z0, SV_VL3, 16)) -+ -+/* -+** qinch_pat_vl4_s16: -+** sqinch z0\.h, vl4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_pat_vl4_s16, svint16_t, -+ z0 = svqinch_pat_s16 (z0, SV_VL4, 16), -+ z0 = svqinch_pat (z0, SV_VL4, 16)) -+ -+/* -+** qinch_pat_vl5_s16: -+** sqinch z0\.h, vl5, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_pat_vl5_s16, svint16_t, -+ z0 = svqinch_pat_s16 (z0, SV_VL5, 16), -+ z0 = svqinch_pat (z0, SV_VL5, 16)) -+ -+/* -+** qinch_pat_vl6_s16: -+** sqinch z0\.h, vl6, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_pat_vl6_s16, svint16_t, -+ z0 = svqinch_pat_s16 (z0, SV_VL6, 16), -+ z0 = svqinch_pat (z0, SV_VL6, 16)) -+ -+/* -+** qinch_pat_vl7_s16: -+** sqinch z0\.h, vl7, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_pat_vl7_s16, svint16_t, -+ z0 = svqinch_pat_s16 (z0, SV_VL7, 16), -+ z0 = svqinch_pat (z0, SV_VL7, 16)) -+ -+/* -+** qinch_pat_vl8_s16: -+** sqinch z0\.h, vl8, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_pat_vl8_s16, svint16_t, -+ z0 = svqinch_pat_s16 (z0, SV_VL8, 16), -+ z0 = svqinch_pat (z0, SV_VL8, 16)) -+ -+/* -+** qinch_pat_vl16_s16: -+** sqinch z0\.h, vl16, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_pat_vl16_s16, svint16_t, -+ z0 = svqinch_pat_s16 (z0, SV_VL16, 16), -+ z0 = svqinch_pat (z0, SV_VL16, 16)) -+ -+/* -+** qinch_pat_vl32_s16: -+** sqinch z0\.h, vl32, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_pat_vl32_s16, svint16_t, -+ z0 = svqinch_pat_s16 (z0, SV_VL32, 16), -+ z0 = svqinch_pat (z0, SV_VL32, 16)) -+ -+/* -+** qinch_pat_vl64_s16: -+** sqinch z0\.h, vl64, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_pat_vl64_s16, svint16_t, -+ z0 = svqinch_pat_s16 (z0, SV_VL64, 16), -+ z0 = svqinch_pat (z0, SV_VL64, 16)) -+ -+/* -+** qinch_pat_vl128_s16: -+** sqinch z0\.h, vl128, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_pat_vl128_s16, svint16_t, -+ z0 = svqinch_pat_s16 (z0, SV_VL128, 16), -+ z0 = svqinch_pat (z0, SV_VL128, 16)) -+ -+/* -+** qinch_pat_vl256_s16: -+** sqinch z0\.h, vl256, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_pat_vl256_s16, svint16_t, -+ z0 = svqinch_pat_s16 (z0, SV_VL256, 16), -+ z0 = svqinch_pat (z0, SV_VL256, 16)) -+ -+/* -+** qinch_pat_mul4_s16: -+** sqinch z0\.h, mul4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_pat_mul4_s16, svint16_t, -+ z0 = svqinch_pat_s16 (z0, SV_MUL4, 16), -+ z0 = svqinch_pat (z0, SV_MUL4, 16)) -+ -+/* -+** qinch_pat_mul3_s16: -+** sqinch z0\.h, mul3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_pat_mul3_s16, svint16_t, -+ z0 = svqinch_pat_s16 (z0, SV_MUL3, 16), -+ z0 = svqinch_pat (z0, SV_MUL3, 16)) -+ -+/* -+** qinch_pat_all_s16: -+** sqinch z0\.h, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_pat_all_s16, svint16_t, -+ z0 = svqinch_pat_s16 (z0, SV_ALL, 16), -+ z0 = svqinch_pat (z0, SV_ALL, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_pat_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_pat_s32.c -new file mode 100644 -index 000000000..7c91c6202 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_pat_s32.c -@@ -0,0 +1,202 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qinch_pat_n_1_s32_tied: -+** sqinch x0, w0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_1_s32_tied, int32_t, -+ x0 = svqinch_pat_n_s32 (x0, SV_POW2, 1), -+ x0 = svqinch_pat (x0, SV_POW2, 1)) -+ -+/* -+** qinch_pat_n_1_s32_untied: -+** mov w0, w1 -+** sqinch x0, w0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_1_s32_untied, int32_t, -+ x0 = svqinch_pat_n_s32 (x1, SV_POW2, 1), -+ x0 = svqinch_pat (x1, SV_POW2, 1)) -+ -+/* -+** qinch_pat_n_2_s32: -+** sqinch x0, w0, pow2, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_2_s32, int32_t, -+ x0 = svqinch_pat_n_s32 (x0, SV_POW2, 2), -+ x0 = svqinch_pat (x0, SV_POW2, 2)) -+ -+/* -+** qinch_pat_n_7_s32: -+** sqinch x0, w0, pow2, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_7_s32, int32_t, -+ x0 = svqinch_pat_n_s32 (x0, SV_POW2, 7), -+ x0 = svqinch_pat (x0, SV_POW2, 7)) -+ -+/* -+** qinch_pat_n_15_s32: -+** sqinch x0, w0, pow2, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_15_s32, int32_t, -+ x0 = svqinch_pat_n_s32 (x0, SV_POW2, 15), -+ x0 = svqinch_pat (x0, SV_POW2, 15)) -+ -+/* -+** qinch_pat_n_16_s32: -+** sqinch x0, w0, pow2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_16_s32, int32_t, -+ x0 = svqinch_pat_n_s32 (x0, SV_POW2, 16), -+ x0 = svqinch_pat (x0, SV_POW2, 16)) -+ -+/* -+** qinch_pat_n_vl1_s32: -+** sqinch x0, w0, vl1, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl1_s32, int32_t, -+ x0 = svqinch_pat_n_s32 (x0, SV_VL1, 16), -+ x0 = svqinch_pat (x0, SV_VL1, 16)) -+ -+/* -+** qinch_pat_n_vl2_s32: -+** sqinch x0, w0, vl2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl2_s32, int32_t, -+ x0 = svqinch_pat_n_s32 (x0, SV_VL2, 16), -+ x0 = svqinch_pat (x0, SV_VL2, 16)) -+ -+/* -+** qinch_pat_n_vl3_s32: -+** sqinch x0, w0, vl3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl3_s32, int32_t, -+ x0 = svqinch_pat_n_s32 (x0, SV_VL3, 16), -+ x0 = svqinch_pat (x0, SV_VL3, 16)) -+ -+/* -+** qinch_pat_n_vl4_s32: -+** sqinch x0, w0, vl4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl4_s32, int32_t, -+ x0 = svqinch_pat_n_s32 (x0, SV_VL4, 16), -+ x0 = svqinch_pat (x0, SV_VL4, 16)) -+ -+/* -+** qinch_pat_n_vl5_s32: -+** sqinch x0, w0, vl5, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl5_s32, int32_t, -+ x0 = svqinch_pat_n_s32 (x0, SV_VL5, 16), -+ x0 = svqinch_pat (x0, SV_VL5, 16)) -+ -+/* -+** qinch_pat_n_vl6_s32: -+** sqinch x0, w0, vl6, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl6_s32, int32_t, -+ x0 = svqinch_pat_n_s32 (x0, SV_VL6, 16), -+ x0 = svqinch_pat (x0, SV_VL6, 16)) -+ -+/* -+** qinch_pat_n_vl7_s32: -+** sqinch x0, w0, vl7, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl7_s32, int32_t, -+ x0 = svqinch_pat_n_s32 (x0, SV_VL7, 16), -+ x0 = svqinch_pat (x0, SV_VL7, 16)) -+ -+/* -+** qinch_pat_n_vl8_s32: -+** sqinch x0, w0, vl8, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl8_s32, int32_t, -+ x0 = svqinch_pat_n_s32 (x0, SV_VL8, 16), -+ x0 = svqinch_pat (x0, SV_VL8, 16)) -+ -+/* -+** qinch_pat_n_vl16_s32: -+** sqinch x0, w0, vl16, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl16_s32, int32_t, -+ x0 = svqinch_pat_n_s32 (x0, SV_VL16, 16), -+ x0 = svqinch_pat (x0, SV_VL16, 16)) -+ -+/* -+** qinch_pat_n_vl32_s32: -+** sqinch x0, w0, vl32, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl32_s32, int32_t, -+ x0 = svqinch_pat_n_s32 (x0, SV_VL32, 16), -+ x0 = svqinch_pat (x0, SV_VL32, 16)) -+ -+/* -+** qinch_pat_n_vl64_s32: -+** sqinch x0, w0, vl64, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl64_s32, int32_t, -+ x0 = svqinch_pat_n_s32 (x0, SV_VL64, 16), -+ x0 = svqinch_pat (x0, SV_VL64, 16)) -+ -+/* -+** qinch_pat_n_vl128_s32: -+** sqinch x0, w0, vl128, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl128_s32, int32_t, -+ x0 = svqinch_pat_n_s32 (x0, SV_VL128, 16), -+ x0 = svqinch_pat (x0, SV_VL128, 16)) -+ -+/* -+** qinch_pat_n_vl256_s32: -+** sqinch x0, w0, vl256, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl256_s32, int32_t, -+ x0 = svqinch_pat_n_s32 (x0, SV_VL256, 16), -+ x0 = svqinch_pat (x0, SV_VL256, 16)) -+ -+/* -+** qinch_pat_n_mul4_s32: -+** sqinch x0, w0, mul4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_mul4_s32, int32_t, -+ x0 = svqinch_pat_n_s32 (x0, SV_MUL4, 16), -+ x0 = svqinch_pat (x0, SV_MUL4, 16)) -+ -+/* -+** qinch_pat_n_mul3_s32: -+** sqinch x0, w0, mul3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_mul3_s32, int32_t, -+ x0 = svqinch_pat_n_s32 (x0, SV_MUL3, 16), -+ x0 = svqinch_pat (x0, SV_MUL3, 16)) -+ -+/* -+** qinch_pat_n_all_s32: -+** sqinch x0, w0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_all_s32, int32_t, -+ x0 = svqinch_pat_n_s32 (x0, SV_ALL, 16), -+ x0 = svqinch_pat (x0, SV_ALL, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_pat_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_pat_s64.c -new file mode 100644 -index 000000000..2cde6482f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_pat_s64.c -@@ -0,0 +1,202 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qinch_pat_n_1_s64_tied: -+** sqinch x0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_1_s64_tied, int64_t, -+ x0 = svqinch_pat_n_s64 (x0, SV_POW2, 1), -+ x0 = svqinch_pat (x0, SV_POW2, 1)) -+ -+/* -+** qinch_pat_n_1_s64_untied: -+** mov x0, x1 -+** sqinch x0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_1_s64_untied, int64_t, -+ x0 = svqinch_pat_n_s64 (x1, SV_POW2, 1), -+ x0 = svqinch_pat (x1, SV_POW2, 1)) -+ -+/* -+** qinch_pat_n_2_s64: -+** sqinch x0, pow2, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_2_s64, int64_t, -+ x0 = svqinch_pat_n_s64 (x0, SV_POW2, 2), -+ x0 = svqinch_pat (x0, SV_POW2, 2)) -+ -+/* -+** qinch_pat_n_7_s64: -+** sqinch x0, pow2, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_7_s64, int64_t, -+ x0 = svqinch_pat_n_s64 (x0, SV_POW2, 7), -+ x0 = svqinch_pat (x0, SV_POW2, 7)) -+ -+/* -+** qinch_pat_n_15_s64: -+** sqinch x0, pow2, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_15_s64, int64_t, -+ x0 = svqinch_pat_n_s64 (x0, SV_POW2, 15), -+ x0 = svqinch_pat (x0, SV_POW2, 15)) -+ -+/* -+** qinch_pat_n_16_s64: -+** sqinch x0, pow2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_16_s64, int64_t, -+ x0 = svqinch_pat_n_s64 (x0, SV_POW2, 16), -+ x0 = svqinch_pat (x0, SV_POW2, 16)) -+ -+/* -+** qinch_pat_n_vl1_s64: -+** sqinch x0, vl1, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl1_s64, int64_t, -+ x0 = svqinch_pat_n_s64 (x0, SV_VL1, 16), -+ x0 = svqinch_pat (x0, SV_VL1, 16)) -+ -+/* -+** qinch_pat_n_vl2_s64: -+** sqinch x0, vl2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl2_s64, int64_t, -+ x0 = svqinch_pat_n_s64 (x0, SV_VL2, 16), -+ x0 = svqinch_pat (x0, SV_VL2, 16)) -+ -+/* -+** qinch_pat_n_vl3_s64: -+** sqinch x0, vl3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl3_s64, int64_t, -+ x0 = svqinch_pat_n_s64 (x0, SV_VL3, 16), -+ x0 = svqinch_pat (x0, SV_VL3, 16)) -+ -+/* -+** qinch_pat_n_vl4_s64: -+** sqinch x0, vl4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl4_s64, int64_t, -+ x0 = svqinch_pat_n_s64 (x0, SV_VL4, 16), -+ x0 = svqinch_pat (x0, SV_VL4, 16)) -+ -+/* -+** qinch_pat_n_vl5_s64: -+** sqinch x0, vl5, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl5_s64, int64_t, -+ x0 = svqinch_pat_n_s64 (x0, SV_VL5, 16), -+ x0 = svqinch_pat (x0, SV_VL5, 16)) -+ -+/* -+** qinch_pat_n_vl6_s64: -+** sqinch x0, vl6, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl6_s64, int64_t, -+ x0 = svqinch_pat_n_s64 (x0, SV_VL6, 16), -+ x0 = svqinch_pat (x0, SV_VL6, 16)) -+ -+/* -+** qinch_pat_n_vl7_s64: -+** sqinch x0, vl7, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl7_s64, int64_t, -+ x0 = svqinch_pat_n_s64 (x0, SV_VL7, 16), -+ x0 = svqinch_pat (x0, SV_VL7, 16)) -+ -+/* -+** qinch_pat_n_vl8_s64: -+** sqinch x0, vl8, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl8_s64, int64_t, -+ x0 = svqinch_pat_n_s64 (x0, SV_VL8, 16), -+ x0 = svqinch_pat (x0, SV_VL8, 16)) -+ -+/* -+** qinch_pat_n_vl16_s64: -+** sqinch x0, vl16, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl16_s64, int64_t, -+ x0 = svqinch_pat_n_s64 (x0, SV_VL16, 16), -+ x0 = svqinch_pat (x0, SV_VL16, 16)) -+ -+/* -+** qinch_pat_n_vl32_s64: -+** sqinch x0, vl32, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl32_s64, int64_t, -+ x0 = svqinch_pat_n_s64 (x0, SV_VL32, 16), -+ x0 = svqinch_pat (x0, SV_VL32, 16)) -+ -+/* -+** qinch_pat_n_vl64_s64: -+** sqinch x0, vl64, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl64_s64, int64_t, -+ x0 = svqinch_pat_n_s64 (x0, SV_VL64, 16), -+ x0 = svqinch_pat (x0, SV_VL64, 16)) -+ -+/* -+** qinch_pat_n_vl128_s64: -+** sqinch x0, vl128, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl128_s64, int64_t, -+ x0 = svqinch_pat_n_s64 (x0, SV_VL128, 16), -+ x0 = svqinch_pat (x0, SV_VL128, 16)) -+ -+/* -+** qinch_pat_n_vl256_s64: -+** sqinch x0, vl256, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl256_s64, int64_t, -+ x0 = svqinch_pat_n_s64 (x0, SV_VL256, 16), -+ x0 = svqinch_pat (x0, SV_VL256, 16)) -+ -+/* -+** qinch_pat_n_mul4_s64: -+** sqinch x0, mul4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_mul4_s64, int64_t, -+ x0 = svqinch_pat_n_s64 (x0, SV_MUL4, 16), -+ x0 = svqinch_pat (x0, SV_MUL4, 16)) -+ -+/* -+** qinch_pat_n_mul3_s64: -+** sqinch x0, mul3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_mul3_s64, int64_t, -+ x0 = svqinch_pat_n_s64 (x0, SV_MUL3, 16), -+ x0 = svqinch_pat (x0, SV_MUL3, 16)) -+ -+/* -+** qinch_pat_n_all_s64: -+** sqinch x0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_all_s64, int64_t, -+ x0 = svqinch_pat_n_s64 (x0, SV_ALL, 16), -+ x0 = svqinch_pat (x0, SV_ALL, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_pat_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_pat_u16.c -new file mode 100644 -index 000000000..5a1a846a0 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_pat_u16.c -@@ -0,0 +1,202 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qinch_pat_1_u16_tied: -+** uqinch z0\.h, pow2 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_pat_1_u16_tied, svuint16_t, -+ z0 = svqinch_pat_u16 (z0, SV_POW2, 1), -+ z0 = svqinch_pat (z0, SV_POW2, 1)) -+ -+/* -+** qinch_pat_1_u16_untied: -+** movprfx z0, z1 -+** uqinch z0\.h, pow2 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_pat_1_u16_untied, svuint16_t, -+ z0 = svqinch_pat_u16 (z1, SV_POW2, 1), -+ z0 = svqinch_pat (z1, SV_POW2, 1)) -+ -+/* -+** qinch_pat_2_u16: -+** uqinch z0\.h, pow2, mul #2 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_pat_2_u16, svuint16_t, -+ z0 = svqinch_pat_u16 (z0, SV_POW2, 2), -+ z0 = svqinch_pat (z0, SV_POW2, 2)) -+ -+/* -+** qinch_pat_7_u16: -+** uqinch z0\.h, pow2, mul #7 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_pat_7_u16, svuint16_t, -+ z0 = svqinch_pat_u16 (z0, SV_POW2, 7), -+ z0 = svqinch_pat (z0, SV_POW2, 7)) -+ -+/* -+** qinch_pat_15_u16: -+** uqinch z0\.h, pow2, mul #15 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_pat_15_u16, svuint16_t, -+ z0 = svqinch_pat_u16 (z0, SV_POW2, 15), -+ z0 = svqinch_pat (z0, SV_POW2, 15)) -+ -+/* -+** qinch_pat_16_u16: -+** uqinch z0\.h, pow2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_pat_16_u16, svuint16_t, -+ z0 = svqinch_pat_u16 (z0, SV_POW2, 16), -+ z0 = svqinch_pat (z0, SV_POW2, 16)) -+ -+/* -+** qinch_pat_vl1_u16: -+** uqinch z0\.h, vl1, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_pat_vl1_u16, svuint16_t, -+ z0 = svqinch_pat_u16 (z0, SV_VL1, 16), -+ z0 = svqinch_pat (z0, SV_VL1, 16)) -+ -+/* -+** qinch_pat_vl2_u16: -+** uqinch z0\.h, vl2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_pat_vl2_u16, svuint16_t, -+ z0 = svqinch_pat_u16 (z0, SV_VL2, 16), -+ z0 = svqinch_pat (z0, SV_VL2, 16)) -+ -+/* -+** qinch_pat_vl3_u16: -+** uqinch z0\.h, vl3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_pat_vl3_u16, svuint16_t, -+ z0 = svqinch_pat_u16 (z0, SV_VL3, 16), -+ z0 = svqinch_pat (z0, SV_VL3, 16)) -+ -+/* -+** qinch_pat_vl4_u16: -+** uqinch z0\.h, vl4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_pat_vl4_u16, svuint16_t, -+ z0 = svqinch_pat_u16 (z0, SV_VL4, 16), -+ z0 = svqinch_pat (z0, SV_VL4, 16)) -+ -+/* -+** qinch_pat_vl5_u16: -+** uqinch z0\.h, vl5, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_pat_vl5_u16, svuint16_t, -+ z0 = svqinch_pat_u16 (z0, SV_VL5, 16), -+ z0 = svqinch_pat (z0, SV_VL5, 16)) -+ -+/* -+** qinch_pat_vl6_u16: -+** uqinch z0\.h, vl6, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_pat_vl6_u16, svuint16_t, -+ z0 = svqinch_pat_u16 (z0, SV_VL6, 16), -+ z0 = svqinch_pat (z0, SV_VL6, 16)) -+ -+/* -+** qinch_pat_vl7_u16: -+** uqinch z0\.h, vl7, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_pat_vl7_u16, svuint16_t, -+ z0 = svqinch_pat_u16 (z0, SV_VL7, 16), -+ z0 = svqinch_pat (z0, SV_VL7, 16)) -+ -+/* -+** qinch_pat_vl8_u16: -+** uqinch z0\.h, vl8, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_pat_vl8_u16, svuint16_t, -+ z0 = svqinch_pat_u16 (z0, SV_VL8, 16), -+ z0 = svqinch_pat (z0, SV_VL8, 16)) -+ -+/* -+** qinch_pat_vl16_u16: -+** uqinch z0\.h, vl16, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_pat_vl16_u16, svuint16_t, -+ z0 = svqinch_pat_u16 (z0, SV_VL16, 16), -+ z0 = svqinch_pat (z0, SV_VL16, 16)) -+ -+/* -+** qinch_pat_vl32_u16: -+** uqinch z0\.h, vl32, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_pat_vl32_u16, svuint16_t, -+ z0 = svqinch_pat_u16 (z0, SV_VL32, 16), -+ z0 = svqinch_pat (z0, SV_VL32, 16)) -+ -+/* -+** qinch_pat_vl64_u16: -+** uqinch z0\.h, vl64, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_pat_vl64_u16, svuint16_t, -+ z0 = svqinch_pat_u16 (z0, SV_VL64, 16), -+ z0 = svqinch_pat (z0, SV_VL64, 16)) -+ -+/* -+** qinch_pat_vl128_u16: -+** uqinch z0\.h, vl128, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_pat_vl128_u16, svuint16_t, -+ z0 = svqinch_pat_u16 (z0, SV_VL128, 16), -+ z0 = svqinch_pat (z0, SV_VL128, 16)) -+ -+/* -+** qinch_pat_vl256_u16: -+** uqinch z0\.h, vl256, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_pat_vl256_u16, svuint16_t, -+ z0 = svqinch_pat_u16 (z0, SV_VL256, 16), -+ z0 = svqinch_pat (z0, SV_VL256, 16)) -+ -+/* -+** qinch_pat_mul4_u16: -+** uqinch z0\.h, mul4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_pat_mul4_u16, svuint16_t, -+ z0 = svqinch_pat_u16 (z0, SV_MUL4, 16), -+ z0 = svqinch_pat (z0, SV_MUL4, 16)) -+ -+/* -+** qinch_pat_mul3_u16: -+** uqinch z0\.h, mul3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_pat_mul3_u16, svuint16_t, -+ z0 = svqinch_pat_u16 (z0, SV_MUL3, 16), -+ z0 = svqinch_pat (z0, SV_MUL3, 16)) -+ -+/* -+** qinch_pat_all_u16: -+** uqinch z0\.h, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_pat_all_u16, svuint16_t, -+ z0 = svqinch_pat_u16 (z0, SV_ALL, 16), -+ z0 = svqinch_pat (z0, SV_ALL, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_pat_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_pat_u32.c -new file mode 100644 -index 000000000..8398c5689 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_pat_u32.c -@@ -0,0 +1,202 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qinch_pat_n_1_u32_tied: -+** uqinch w0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_1_u32_tied, uint32_t, -+ x0 = svqinch_pat_n_u32 (x0, SV_POW2, 1), -+ x0 = svqinch_pat (x0, SV_POW2, 1)) -+ -+/* -+** qinch_pat_n_1_u32_untied: -+** mov w0, w1 -+** uqinch w0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_1_u32_untied, uint32_t, -+ x0 = svqinch_pat_n_u32 (x1, SV_POW2, 1), -+ x0 = svqinch_pat (x1, SV_POW2, 1)) -+ -+/* -+** qinch_pat_n_2_u32: -+** uqinch w0, pow2, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_2_u32, uint32_t, -+ x0 = svqinch_pat_n_u32 (x0, SV_POW2, 2), -+ x0 = svqinch_pat (x0, SV_POW2, 2)) -+ -+/* -+** qinch_pat_n_7_u32: -+** uqinch w0, pow2, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_7_u32, uint32_t, -+ x0 = svqinch_pat_n_u32 (x0, SV_POW2, 7), -+ x0 = svqinch_pat (x0, SV_POW2, 7)) -+ -+/* -+** qinch_pat_n_15_u32: -+** uqinch w0, pow2, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_15_u32, uint32_t, -+ x0 = svqinch_pat_n_u32 (x0, SV_POW2, 15), -+ x0 = svqinch_pat (x0, SV_POW2, 15)) -+ -+/* -+** qinch_pat_n_16_u32: -+** uqinch w0, pow2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_16_u32, uint32_t, -+ x0 = svqinch_pat_n_u32 (x0, SV_POW2, 16), -+ x0 = svqinch_pat (x0, SV_POW2, 16)) -+ -+/* -+** qinch_pat_n_vl1_u32: -+** uqinch w0, vl1, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl1_u32, uint32_t, -+ x0 = svqinch_pat_n_u32 (x0, SV_VL1, 16), -+ x0 = svqinch_pat (x0, SV_VL1, 16)) -+ -+/* -+** qinch_pat_n_vl2_u32: -+** uqinch w0, vl2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl2_u32, uint32_t, -+ x0 = svqinch_pat_n_u32 (x0, SV_VL2, 16), -+ x0 = svqinch_pat (x0, SV_VL2, 16)) -+ -+/* -+** qinch_pat_n_vl3_u32: -+** uqinch w0, vl3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl3_u32, uint32_t, -+ x0 = svqinch_pat_n_u32 (x0, SV_VL3, 16), -+ x0 = svqinch_pat (x0, SV_VL3, 16)) -+ -+/* -+** qinch_pat_n_vl4_u32: -+** uqinch w0, vl4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl4_u32, uint32_t, -+ x0 = svqinch_pat_n_u32 (x0, SV_VL4, 16), -+ x0 = svqinch_pat (x0, SV_VL4, 16)) -+ -+/* -+** qinch_pat_n_vl5_u32: -+** uqinch w0, vl5, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl5_u32, uint32_t, -+ x0 = svqinch_pat_n_u32 (x0, SV_VL5, 16), -+ x0 = svqinch_pat (x0, SV_VL5, 16)) -+ -+/* -+** qinch_pat_n_vl6_u32: -+** uqinch w0, vl6, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl6_u32, uint32_t, -+ x0 = svqinch_pat_n_u32 (x0, SV_VL6, 16), -+ x0 = svqinch_pat (x0, SV_VL6, 16)) -+ -+/* -+** qinch_pat_n_vl7_u32: -+** uqinch w0, vl7, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl7_u32, uint32_t, -+ x0 = svqinch_pat_n_u32 (x0, SV_VL7, 16), -+ x0 = svqinch_pat (x0, SV_VL7, 16)) -+ -+/* -+** qinch_pat_n_vl8_u32: -+** uqinch w0, vl8, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl8_u32, uint32_t, -+ x0 = svqinch_pat_n_u32 (x0, SV_VL8, 16), -+ x0 = svqinch_pat (x0, SV_VL8, 16)) -+ -+/* -+** qinch_pat_n_vl16_u32: -+** uqinch w0, vl16, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl16_u32, uint32_t, -+ x0 = svqinch_pat_n_u32 (x0, SV_VL16, 16), -+ x0 = svqinch_pat (x0, SV_VL16, 16)) -+ -+/* -+** qinch_pat_n_vl32_u32: -+** uqinch w0, vl32, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl32_u32, uint32_t, -+ x0 = svqinch_pat_n_u32 (x0, SV_VL32, 16), -+ x0 = svqinch_pat (x0, SV_VL32, 16)) -+ -+/* -+** qinch_pat_n_vl64_u32: -+** uqinch w0, vl64, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl64_u32, uint32_t, -+ x0 = svqinch_pat_n_u32 (x0, SV_VL64, 16), -+ x0 = svqinch_pat (x0, SV_VL64, 16)) -+ -+/* -+** qinch_pat_n_vl128_u32: -+** uqinch w0, vl128, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl128_u32, uint32_t, -+ x0 = svqinch_pat_n_u32 (x0, SV_VL128, 16), -+ x0 = svqinch_pat (x0, SV_VL128, 16)) -+ -+/* -+** qinch_pat_n_vl256_u32: -+** uqinch w0, vl256, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl256_u32, uint32_t, -+ x0 = svqinch_pat_n_u32 (x0, SV_VL256, 16), -+ x0 = svqinch_pat (x0, SV_VL256, 16)) -+ -+/* -+** qinch_pat_n_mul4_u32: -+** uqinch w0, mul4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_mul4_u32, uint32_t, -+ x0 = svqinch_pat_n_u32 (x0, SV_MUL4, 16), -+ x0 = svqinch_pat (x0, SV_MUL4, 16)) -+ -+/* -+** qinch_pat_n_mul3_u32: -+** uqinch w0, mul3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_mul3_u32, uint32_t, -+ x0 = svqinch_pat_n_u32 (x0, SV_MUL3, 16), -+ x0 = svqinch_pat (x0, SV_MUL3, 16)) -+ -+/* -+** qinch_pat_n_all_u32: -+** uqinch w0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_all_u32, uint32_t, -+ x0 = svqinch_pat_n_u32 (x0, SV_ALL, 16), -+ x0 = svqinch_pat (x0, SV_ALL, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_pat_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_pat_u64.c -new file mode 100644 -index 000000000..51722646d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_pat_u64.c -@@ -0,0 +1,202 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qinch_pat_n_1_u64_tied: -+** uqinch x0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_1_u64_tied, uint64_t, -+ x0 = svqinch_pat_n_u64 (x0, SV_POW2, 1), -+ x0 = svqinch_pat (x0, SV_POW2, 1)) -+ -+/* -+** qinch_pat_n_1_u64_untied: -+** mov x0, x1 -+** uqinch x0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_1_u64_untied, uint64_t, -+ x0 = svqinch_pat_n_u64 (x1, SV_POW2, 1), -+ x0 = svqinch_pat (x1, SV_POW2, 1)) -+ -+/* -+** qinch_pat_n_2_u64: -+** uqinch x0, pow2, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_2_u64, uint64_t, -+ x0 = svqinch_pat_n_u64 (x0, SV_POW2, 2), -+ x0 = svqinch_pat (x0, SV_POW2, 2)) -+ -+/* -+** qinch_pat_n_7_u64: -+** uqinch x0, pow2, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_7_u64, uint64_t, -+ x0 = svqinch_pat_n_u64 (x0, SV_POW2, 7), -+ x0 = svqinch_pat (x0, SV_POW2, 7)) -+ -+/* -+** qinch_pat_n_15_u64: -+** uqinch x0, pow2, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_15_u64, uint64_t, -+ x0 = svqinch_pat_n_u64 (x0, SV_POW2, 15), -+ x0 = svqinch_pat (x0, SV_POW2, 15)) -+ -+/* -+** qinch_pat_n_16_u64: -+** uqinch x0, pow2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_16_u64, uint64_t, -+ x0 = svqinch_pat_n_u64 (x0, SV_POW2, 16), -+ x0 = svqinch_pat (x0, SV_POW2, 16)) -+ -+/* -+** qinch_pat_n_vl1_u64: -+** uqinch x0, vl1, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl1_u64, uint64_t, -+ x0 = svqinch_pat_n_u64 (x0, SV_VL1, 16), -+ x0 = svqinch_pat (x0, SV_VL1, 16)) -+ -+/* -+** qinch_pat_n_vl2_u64: -+** uqinch x0, vl2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl2_u64, uint64_t, -+ x0 = svqinch_pat_n_u64 (x0, SV_VL2, 16), -+ x0 = svqinch_pat (x0, SV_VL2, 16)) -+ -+/* -+** qinch_pat_n_vl3_u64: -+** uqinch x0, vl3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl3_u64, uint64_t, -+ x0 = svqinch_pat_n_u64 (x0, SV_VL3, 16), -+ x0 = svqinch_pat (x0, SV_VL3, 16)) -+ -+/* -+** qinch_pat_n_vl4_u64: -+** uqinch x0, vl4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl4_u64, uint64_t, -+ x0 = svqinch_pat_n_u64 (x0, SV_VL4, 16), -+ x0 = svqinch_pat (x0, SV_VL4, 16)) -+ -+/* -+** qinch_pat_n_vl5_u64: -+** uqinch x0, vl5, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl5_u64, uint64_t, -+ x0 = svqinch_pat_n_u64 (x0, SV_VL5, 16), -+ x0 = svqinch_pat (x0, SV_VL5, 16)) -+ -+/* -+** qinch_pat_n_vl6_u64: -+** uqinch x0, vl6, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl6_u64, uint64_t, -+ x0 = svqinch_pat_n_u64 (x0, SV_VL6, 16), -+ x0 = svqinch_pat (x0, SV_VL6, 16)) -+ -+/* -+** qinch_pat_n_vl7_u64: -+** uqinch x0, vl7, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl7_u64, uint64_t, -+ x0 = svqinch_pat_n_u64 (x0, SV_VL7, 16), -+ x0 = svqinch_pat (x0, SV_VL7, 16)) -+ -+/* -+** qinch_pat_n_vl8_u64: -+** uqinch x0, vl8, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl8_u64, uint64_t, -+ x0 = svqinch_pat_n_u64 (x0, SV_VL8, 16), -+ x0 = svqinch_pat (x0, SV_VL8, 16)) -+ -+/* -+** qinch_pat_n_vl16_u64: -+** uqinch x0, vl16, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl16_u64, uint64_t, -+ x0 = svqinch_pat_n_u64 (x0, SV_VL16, 16), -+ x0 = svqinch_pat (x0, SV_VL16, 16)) -+ -+/* -+** qinch_pat_n_vl32_u64: -+** uqinch x0, vl32, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl32_u64, uint64_t, -+ x0 = svqinch_pat_n_u64 (x0, SV_VL32, 16), -+ x0 = svqinch_pat (x0, SV_VL32, 16)) -+ -+/* -+** qinch_pat_n_vl64_u64: -+** uqinch x0, vl64, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl64_u64, uint64_t, -+ x0 = svqinch_pat_n_u64 (x0, SV_VL64, 16), -+ x0 = svqinch_pat (x0, SV_VL64, 16)) -+ -+/* -+** qinch_pat_n_vl128_u64: -+** uqinch x0, vl128, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl128_u64, uint64_t, -+ x0 = svqinch_pat_n_u64 (x0, SV_VL128, 16), -+ x0 = svqinch_pat (x0, SV_VL128, 16)) -+ -+/* -+** qinch_pat_n_vl256_u64: -+** uqinch x0, vl256, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_vl256_u64, uint64_t, -+ x0 = svqinch_pat_n_u64 (x0, SV_VL256, 16), -+ x0 = svqinch_pat (x0, SV_VL256, 16)) -+ -+/* -+** qinch_pat_n_mul4_u64: -+** uqinch x0, mul4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_mul4_u64, uint64_t, -+ x0 = svqinch_pat_n_u64 (x0, SV_MUL4, 16), -+ x0 = svqinch_pat (x0, SV_MUL4, 16)) -+ -+/* -+** qinch_pat_n_mul3_u64: -+** uqinch x0, mul3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_mul3_u64, uint64_t, -+ x0 = svqinch_pat_n_u64 (x0, SV_MUL3, 16), -+ x0 = svqinch_pat (x0, SV_MUL3, 16)) -+ -+/* -+** qinch_pat_n_all_u64: -+** uqinch x0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_pat_n_all_u64, uint64_t, -+ x0 = svqinch_pat_n_u64 (x0, SV_ALL, 16), -+ x0 = svqinch_pat (x0, SV_ALL, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_s16.c -new file mode 100644 -index 000000000..1f460db8e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_s16.c -@@ -0,0 +1,58 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qinch_1_s16_tied: -+** sqinch z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_1_s16_tied, svint16_t, -+ z0 = svqinch_s16 (z0, 1), -+ z0 = svqinch (z0, 1)) -+ -+/* -+** qinch_1_s16_untied: -+** movprfx z0, z1 -+** sqinch z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_1_s16_untied, svint16_t, -+ z0 = svqinch_s16 (z1, 1), -+ z0 = svqinch (z1, 1)) -+ -+/* -+** qinch_2_s16: -+** sqinch z0\.h, all, mul #2 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_2_s16, svint16_t, -+ z0 = svqinch_s16 (z0, 2), -+ z0 = svqinch (z0, 2)) -+ -+/* -+** qinch_7_s16: -+** sqinch z0\.h, all, mul #7 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_7_s16, svint16_t, -+ z0 = svqinch_s16 (z0, 7), -+ z0 = svqinch (z0, 7)) -+ -+/* -+** qinch_15_s16: -+** sqinch z0\.h, all, mul #15 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_15_s16, svint16_t, -+ z0 = svqinch_s16 (z0, 15), -+ z0 = svqinch (z0, 15)) -+ -+/* -+** qinch_16_s16: -+** sqinch z0\.h, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_16_s16, svint16_t, -+ z0 = svqinch_s16 (z0, 16), -+ z0 = svqinch (z0, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_s32.c -new file mode 100644 -index 000000000..a7b1aac80 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_s32.c -@@ -0,0 +1,58 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qinch_n_1_s32_tied: -+** sqinch x0, w0 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_n_1_s32_tied, int32_t, -+ x0 = svqinch_n_s32 (x0, 1), -+ x0 = svqinch (x0, 1)) -+ -+/* -+** qinch_n_1_s32_untied: -+** mov w0, w1 -+** sqinch x0, w0 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_n_1_s32_untied, int32_t, -+ x0 = svqinch_n_s32 (x1, 1), -+ x0 = svqinch (x1, 1)) -+ -+/* -+** qinch_n_2_s32: -+** sqinch x0, w0, all, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_n_2_s32, int32_t, -+ x0 = svqinch_n_s32 (x0, 2), -+ x0 = svqinch (x0, 2)) -+ -+/* -+** qinch_n_7_s32: -+** sqinch x0, w0, all, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_n_7_s32, int32_t, -+ x0 = svqinch_n_s32 (x0, 7), -+ x0 = svqinch (x0, 7)) -+ -+/* -+** qinch_n_15_s32: -+** sqinch x0, w0, all, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_n_15_s32, int32_t, -+ x0 = svqinch_n_s32 (x0, 15), -+ x0 = svqinch (x0, 15)) -+ -+/* -+** qinch_n_16_s32: -+** sqinch x0, w0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_n_16_s32, int32_t, -+ x0 = svqinch_n_s32 (x0, 16), -+ x0 = svqinch (x0, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_s64.c -new file mode 100644 -index 000000000..74ac6a3df ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_s64.c -@@ -0,0 +1,58 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qinch_n_1_s64_tied: -+** sqinch x0 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_n_1_s64_tied, int64_t, -+ x0 = svqinch_n_s64 (x0, 1), -+ x0 = svqinch (x0, 1)) -+ -+/* -+** qinch_n_1_s64_untied: -+** mov x0, x1 -+** sqinch x0 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_n_1_s64_untied, int64_t, -+ x0 = svqinch_n_s64 (x1, 1), -+ x0 = svqinch (x1, 1)) -+ -+/* -+** qinch_n_2_s64: -+** sqinch x0, all, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_n_2_s64, int64_t, -+ x0 = svqinch_n_s64 (x0, 2), -+ x0 = svqinch (x0, 2)) -+ -+/* -+** qinch_n_7_s64: -+** sqinch x0, all, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_n_7_s64, int64_t, -+ x0 = svqinch_n_s64 (x0, 7), -+ x0 = svqinch (x0, 7)) -+ -+/* -+** qinch_n_15_s64: -+** sqinch x0, all, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_n_15_s64, int64_t, -+ x0 = svqinch_n_s64 (x0, 15), -+ x0 = svqinch (x0, 15)) -+ -+/* -+** qinch_n_16_s64: -+** sqinch x0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_n_16_s64, int64_t, -+ x0 = svqinch_n_s64 (x0, 16), -+ x0 = svqinch (x0, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_u16.c -new file mode 100644 -index 000000000..aa9905897 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_u16.c -@@ -0,0 +1,58 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qinch_1_u16_tied: -+** uqinch z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_1_u16_tied, svuint16_t, -+ z0 = svqinch_u16 (z0, 1), -+ z0 = svqinch (z0, 1)) -+ -+/* -+** qinch_1_u16_untied: -+** movprfx z0, z1 -+** uqinch z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_1_u16_untied, svuint16_t, -+ z0 = svqinch_u16 (z1, 1), -+ z0 = svqinch (z1, 1)) -+ -+/* -+** qinch_2_u16: -+** uqinch z0\.h, all, mul #2 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_2_u16, svuint16_t, -+ z0 = svqinch_u16 (z0, 2), -+ z0 = svqinch (z0, 2)) -+ -+/* -+** qinch_7_u16: -+** uqinch z0\.h, all, mul #7 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_7_u16, svuint16_t, -+ z0 = svqinch_u16 (z0, 7), -+ z0 = svqinch (z0, 7)) -+ -+/* -+** qinch_15_u16: -+** uqinch z0\.h, all, mul #15 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_15_u16, svuint16_t, -+ z0 = svqinch_u16 (z0, 15), -+ z0 = svqinch (z0, 15)) -+ -+/* -+** qinch_16_u16: -+** uqinch z0\.h, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qinch_16_u16, svuint16_t, -+ z0 = svqinch_u16 (z0, 16), -+ z0 = svqinch (z0, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_u32.c -new file mode 100644 -index 000000000..396f95b2a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_u32.c -@@ -0,0 +1,58 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qinch_n_1_u32_tied: -+** uqinch w0 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_n_1_u32_tied, uint32_t, -+ x0 = svqinch_n_u32 (x0, 1), -+ x0 = svqinch (x0, 1)) -+ -+/* -+** qinch_n_1_u32_untied: -+** mov w0, w1 -+** uqinch w0 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_n_1_u32_untied, uint32_t, -+ x0 = svqinch_n_u32 (x1, 1), -+ x0 = svqinch (x1, 1)) -+ -+/* -+** qinch_n_2_u32: -+** uqinch w0, all, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_n_2_u32, uint32_t, -+ x0 = svqinch_n_u32 (x0, 2), -+ x0 = svqinch (x0, 2)) -+ -+/* -+** qinch_n_7_u32: -+** uqinch w0, all, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_n_7_u32, uint32_t, -+ x0 = svqinch_n_u32 (x0, 7), -+ x0 = svqinch (x0, 7)) -+ -+/* -+** qinch_n_15_u32: -+** uqinch w0, all, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_n_15_u32, uint32_t, -+ x0 = svqinch_n_u32 (x0, 15), -+ x0 = svqinch (x0, 15)) -+ -+/* -+** qinch_n_16_u32: -+** uqinch w0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_n_16_u32, uint32_t, -+ x0 = svqinch_n_u32 (x0, 16), -+ x0 = svqinch (x0, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_u64.c -new file mode 100644 -index 000000000..5a9231722 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qinch_u64.c -@@ -0,0 +1,58 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qinch_n_1_u64_tied: -+** uqinch x0 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_n_1_u64_tied, uint64_t, -+ x0 = svqinch_n_u64 (x0, 1), -+ x0 = svqinch (x0, 1)) -+ -+/* -+** qinch_n_1_u64_untied: -+** mov x0, x1 -+** uqinch x0 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_n_1_u64_untied, uint64_t, -+ x0 = svqinch_n_u64 (x1, 1), -+ x0 = svqinch (x1, 1)) -+ -+/* -+** qinch_n_2_u64: -+** uqinch x0, all, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_n_2_u64, uint64_t, -+ x0 = svqinch_n_u64 (x0, 2), -+ x0 = svqinch (x0, 2)) -+ -+/* -+** qinch_n_7_u64: -+** uqinch x0, all, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_n_7_u64, uint64_t, -+ x0 = svqinch_n_u64 (x0, 7), -+ x0 = svqinch (x0, 7)) -+ -+/* -+** qinch_n_15_u64: -+** uqinch x0, all, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_n_15_u64, uint64_t, -+ x0 = svqinch_n_u64 (x0, 15), -+ x0 = svqinch (x0, 15)) -+ -+/* -+** qinch_n_16_u64: -+** uqinch x0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qinch_n_16_u64, uint64_t, -+ x0 = svqinch_n_u64 (x0, 16), -+ x0 = svqinch (x0, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincp_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincp_s16.c -new file mode 100644 -index 000000000..979b57476 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincp_s16.c -@@ -0,0 +1,22 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qincp_s16_tied: -+** sqincp z0\.h, p0 -+** ret -+*/ -+TEST_UNIFORM_Z (qincp_s16_tied, svint16_t, -+ z0 = svqincp_s16 (z0, p0), -+ z0 = svqincp (z0, p0)) -+ -+/* -+** qincp_s16_untied: -+** movprfx z0, z1 -+** sqincp z0\.h, p0 -+** ret -+*/ -+TEST_UNIFORM_Z (qincp_s16_untied, svint16_t, -+ z0 = svqincp_s16 (z1, p0), -+ z0 = svqincp (z1, p0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincp_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincp_s32.c -new file mode 100644 -index 000000000..46ad51b01 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincp_s32.c -@@ -0,0 +1,98 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qincp_s32_tied: -+** sqincp z0\.s, p0 -+** ret -+*/ -+TEST_UNIFORM_Z (qincp_s32_tied, svint32_t, -+ z0 = svqincp_s32 (z0, p0), -+ z0 = svqincp (z0, p0)) -+ -+/* -+** qincp_s32_untied: -+** movprfx z0, z1 -+** sqincp z0\.s, p0 -+** ret -+*/ -+TEST_UNIFORM_Z (qincp_s32_untied, svint32_t, -+ z0 = svqincp_s32 (z1, p0), -+ z0 = svqincp (z1, p0)) -+ -+/* -+** qincp_n_s32_b8_tied: -+** sqincp x0, p0\.b, w0 -+** ret -+*/ -+TEST_UNIFORM_S (qincp_n_s32_b8_tied, int32_t, -+ x0 = svqincp_n_s32_b8 (x0, p0), -+ x0 = svqincp_b8 (x0, p0)) -+ -+/* -+** qincp_n_s32_b8_untied: -+** mov w0, w1 -+** sqincp x0, p0\.b, w0 -+** ret -+*/ -+TEST_UNIFORM_S (qincp_n_s32_b8_untied, int32_t, -+ x0 = svqincp_n_s32_b8 (x1, p0), -+ x0 = svqincp_b8 (x1, p0)) -+ -+/* -+** qincp_n_s32_b16_tied: -+** sqincp x0, p0\.h, w0 -+** ret -+*/ -+TEST_UNIFORM_S (qincp_n_s32_b16_tied, int32_t, -+ x0 = svqincp_n_s32_b16 (x0, p0), -+ x0 = svqincp_b16 (x0, p0)) -+ -+/* -+** qincp_n_s32_b16_untied: -+** mov w0, w1 -+** sqincp x0, p0\.h, w0 -+** ret -+*/ -+TEST_UNIFORM_S (qincp_n_s32_b16_untied, int32_t, -+ x0 = svqincp_n_s32_b16 (x1, p0), -+ x0 = svqincp_b16 (x1, p0)) -+ -+/* -+** qincp_n_s32_b32_tied: -+** sqincp x0, p0\.s, w0 -+** ret -+*/ -+TEST_UNIFORM_S (qincp_n_s32_b32_tied, int32_t, -+ x0 = svqincp_n_s32_b32 (x0, p0), -+ x0 = svqincp_b32 (x0, p0)) -+ -+/* -+** qincp_n_s32_b32_untied: -+** mov w0, w1 -+** sqincp x0, p0\.s, w0 -+** ret -+*/ -+TEST_UNIFORM_S (qincp_n_s32_b32_untied, int32_t, -+ x0 = svqincp_n_s32_b32 (x1, p0), -+ x0 = svqincp_b32 (x1, p0)) -+ -+/* -+** qincp_n_s32_b64_tied: -+** sqincp x0, p0\.d, w0 -+** ret -+*/ -+TEST_UNIFORM_S (qincp_n_s32_b64_tied, int32_t, -+ x0 = svqincp_n_s32_b64 (x0, p0), -+ x0 = svqincp_b64 (x0, p0)) -+ -+/* -+** qincp_n_s32_b64_untied: -+** mov w0, w1 -+** sqincp x0, p0\.d, w0 -+** ret -+*/ -+TEST_UNIFORM_S (qincp_n_s32_b64_untied, int32_t, -+ x0 = svqincp_n_s32_b64 (x1, p0), -+ x0 = svqincp_b64 (x1, p0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincp_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincp_s64.c -new file mode 100644 -index 000000000..226502328 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincp_s64.c -@@ -0,0 +1,98 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qincp_s64_tied: -+** sqincp z0\.d, p0 -+** ret -+*/ -+TEST_UNIFORM_Z (qincp_s64_tied, svint64_t, -+ z0 = svqincp_s64 (z0, p0), -+ z0 = svqincp (z0, p0)) -+ -+/* -+** qincp_s64_untied: -+** movprfx z0, z1 -+** sqincp z0\.d, p0 -+** ret -+*/ -+TEST_UNIFORM_Z (qincp_s64_untied, svint64_t, -+ z0 = svqincp_s64 (z1, p0), -+ z0 = svqincp (z1, p0)) -+ -+/* -+** qincp_n_s64_b8_tied: -+** sqincp x0, p0\.b -+** ret -+*/ -+TEST_UNIFORM_S (qincp_n_s64_b8_tied, int64_t, -+ x0 = svqincp_n_s64_b8 (x0, p0), -+ x0 = svqincp_b8 (x0, p0)) -+ -+/* -+** qincp_n_s64_b8_untied: -+** mov x0, x1 -+** sqincp x0, p0\.b -+** ret -+*/ -+TEST_UNIFORM_S (qincp_n_s64_b8_untied, int64_t, -+ x0 = svqincp_n_s64_b8 (x1, p0), -+ x0 = svqincp_b8 (x1, p0)) -+ -+/* -+** qincp_n_s64_b16_tied: -+** sqincp x0, p0\.h -+** ret -+*/ -+TEST_UNIFORM_S (qincp_n_s64_b16_tied, int64_t, -+ x0 = svqincp_n_s64_b16 (x0, p0), -+ x0 = svqincp_b16 (x0, p0)) -+ -+/* -+** qincp_n_s64_b16_untied: -+** mov x0, x1 -+** sqincp x0, p0\.h -+** ret -+*/ -+TEST_UNIFORM_S (qincp_n_s64_b16_untied, int64_t, -+ x0 = svqincp_n_s64_b16 (x1, p0), -+ x0 = svqincp_b16 (x1, p0)) -+ -+/* -+** qincp_n_s64_b32_tied: -+** sqincp x0, p0\.s -+** ret -+*/ -+TEST_UNIFORM_S (qincp_n_s64_b32_tied, int64_t, -+ x0 = svqincp_n_s64_b32 (x0, p0), -+ x0 = svqincp_b32 (x0, p0)) -+ -+/* -+** qincp_n_s64_b32_untied: -+** mov x0, x1 -+** sqincp x0, p0\.s -+** ret -+*/ -+TEST_UNIFORM_S (qincp_n_s64_b32_untied, int64_t, -+ x0 = svqincp_n_s64_b32 (x1, p0), -+ x0 = svqincp_b32 (x1, p0)) -+ -+/* -+** qincp_n_s64_b64_tied: -+** sqincp x0, p0\.d -+** ret -+*/ -+TEST_UNIFORM_S (qincp_n_s64_b64_tied, int64_t, -+ x0 = svqincp_n_s64_b64 (x0, p0), -+ x0 = svqincp_b64 (x0, p0)) -+ -+/* -+** qincp_n_s64_b64_untied: -+** mov x0, x1 -+** sqincp x0, p0\.d -+** ret -+*/ -+TEST_UNIFORM_S (qincp_n_s64_b64_untied, int64_t, -+ x0 = svqincp_n_s64_b64 (x1, p0), -+ x0 = svqincp_b64 (x1, p0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincp_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincp_u16.c -new file mode 100644 -index 000000000..ecd84470c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincp_u16.c -@@ -0,0 +1,22 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qincp_u16_tied: -+** uqincp z0\.h, p0 -+** ret -+*/ -+TEST_UNIFORM_Z (qincp_u16_tied, svuint16_t, -+ z0 = svqincp_u16 (z0, p0), -+ z0 = svqincp (z0, p0)) -+ -+/* -+** qincp_u16_untied: -+** movprfx z0, z1 -+** uqincp z0\.h, p0 -+** ret -+*/ -+TEST_UNIFORM_Z (qincp_u16_untied, svuint16_t, -+ z0 = svqincp_u16 (z1, p0), -+ z0 = svqincp (z1, p0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincp_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincp_u32.c -new file mode 100644 -index 000000000..011a26253 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincp_u32.c -@@ -0,0 +1,98 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qincp_u32_tied: -+** uqincp z0\.s, p0 -+** ret -+*/ -+TEST_UNIFORM_Z (qincp_u32_tied, svuint32_t, -+ z0 = svqincp_u32 (z0, p0), -+ z0 = svqincp (z0, p0)) -+ -+/* -+** qincp_u32_untied: -+** movprfx z0, z1 -+** uqincp z0\.s, p0 -+** ret -+*/ -+TEST_UNIFORM_Z (qincp_u32_untied, svuint32_t, -+ z0 = svqincp_u32 (z1, p0), -+ z0 = svqincp (z1, p0)) -+ -+/* -+** qincp_n_u32_b8_tied: -+** uqincp w0, p0\.b -+** ret -+*/ -+TEST_UNIFORM_S (qincp_n_u32_b8_tied, uint32_t, -+ x0 = svqincp_n_u32_b8 (x0, p0), -+ x0 = svqincp_b8 (x0, p0)) -+ -+/* -+** qincp_n_u32_b8_untied: -+** mov w0, w1 -+** uqincp w0, p0\.b -+** ret -+*/ -+TEST_UNIFORM_S (qincp_n_u32_b8_untied, uint32_t, -+ x0 = svqincp_n_u32_b8 (x1, p0), -+ x0 = svqincp_b8 (x1, p0)) -+ -+/* -+** qincp_n_u32_b16_tied: -+** uqincp w0, p0\.h -+** ret -+*/ -+TEST_UNIFORM_S (qincp_n_u32_b16_tied, uint32_t, -+ x0 = svqincp_n_u32_b16 (x0, p0), -+ x0 = svqincp_b16 (x0, p0)) -+ -+/* -+** qincp_n_u32_b16_untied: -+** mov w0, w1 -+** uqincp w0, p0\.h -+** ret -+*/ -+TEST_UNIFORM_S (qincp_n_u32_b16_untied, uint32_t, -+ x0 = svqincp_n_u32_b16 (x1, p0), -+ x0 = svqincp_b16 (x1, p0)) -+ -+/* -+** qincp_n_u32_b32_tied: -+** uqincp w0, p0\.s -+** ret -+*/ -+TEST_UNIFORM_S (qincp_n_u32_b32_tied, uint32_t, -+ x0 = svqincp_n_u32_b32 (x0, p0), -+ x0 = svqincp_b32 (x0, p0)) -+ -+/* -+** qincp_n_u32_b32_untied: -+** mov w0, w1 -+** uqincp w0, p0\.s -+** ret -+*/ -+TEST_UNIFORM_S (qincp_n_u32_b32_untied, uint32_t, -+ x0 = svqincp_n_u32_b32 (x1, p0), -+ x0 = svqincp_b32 (x1, p0)) -+ -+/* -+** qincp_n_u32_b64_tied: -+** uqincp w0, p0\.d -+** ret -+*/ -+TEST_UNIFORM_S (qincp_n_u32_b64_tied, uint32_t, -+ x0 = svqincp_n_u32_b64 (x0, p0), -+ x0 = svqincp_b64 (x0, p0)) -+ -+/* -+** qincp_n_u32_b64_untied: -+** mov w0, w1 -+** uqincp w0, p0\.d -+** ret -+*/ -+TEST_UNIFORM_S (qincp_n_u32_b64_untied, uint32_t, -+ x0 = svqincp_n_u32_b64 (x1, p0), -+ x0 = svqincp_b64 (x1, p0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincp_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincp_u64.c -new file mode 100644 -index 000000000..761ac553a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincp_u64.c -@@ -0,0 +1,98 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qincp_u64_tied: -+** uqincp z0\.d, p0 -+** ret -+*/ -+TEST_UNIFORM_Z (qincp_u64_tied, svuint64_t, -+ z0 = svqincp_u64 (z0, p0), -+ z0 = svqincp (z0, p0)) -+ -+/* -+** qincp_u64_untied: -+** movprfx z0, z1 -+** uqincp z0\.d, p0 -+** ret -+*/ -+TEST_UNIFORM_Z (qincp_u64_untied, svuint64_t, -+ z0 = svqincp_u64 (z1, p0), -+ z0 = svqincp (z1, p0)) -+ -+/* -+** qincp_n_u64_b8_tied: -+** uqincp x0, p0\.b -+** ret -+*/ -+TEST_UNIFORM_S (qincp_n_u64_b8_tied, uint64_t, -+ x0 = svqincp_n_u64_b8 (x0, p0), -+ x0 = svqincp_b8 (x0, p0)) -+ -+/* -+** qincp_n_u64_b8_untied: -+** mov x0, x1 -+** uqincp x0, p0\.b -+** ret -+*/ -+TEST_UNIFORM_S (qincp_n_u64_b8_untied, uint64_t, -+ x0 = svqincp_n_u64_b8 (x1, p0), -+ x0 = svqincp_b8 (x1, p0)) -+ -+/* -+** qincp_n_u64_b16_tied: -+** uqincp x0, p0\.h -+** ret -+*/ -+TEST_UNIFORM_S (qincp_n_u64_b16_tied, uint64_t, -+ x0 = svqincp_n_u64_b16 (x0, p0), -+ x0 = svqincp_b16 (x0, p0)) -+ -+/* -+** qincp_n_u64_b16_untied: -+** mov x0, x1 -+** uqincp x0, p0\.h -+** ret -+*/ -+TEST_UNIFORM_S (qincp_n_u64_b16_untied, uint64_t, -+ x0 = svqincp_n_u64_b16 (x1, p0), -+ x0 = svqincp_b16 (x1, p0)) -+ -+/* -+** qincp_n_u64_b32_tied: -+** uqincp x0, p0\.s -+** ret -+*/ -+TEST_UNIFORM_S (qincp_n_u64_b32_tied, uint64_t, -+ x0 = svqincp_n_u64_b32 (x0, p0), -+ x0 = svqincp_b32 (x0, p0)) -+ -+/* -+** qincp_n_u64_b32_untied: -+** mov x0, x1 -+** uqincp x0, p0\.s -+** ret -+*/ -+TEST_UNIFORM_S (qincp_n_u64_b32_untied, uint64_t, -+ x0 = svqincp_n_u64_b32 (x1, p0), -+ x0 = svqincp_b32 (x1, p0)) -+ -+/* -+** qincp_n_u64_b64_tied: -+** uqincp x0, p0\.d -+** ret -+*/ -+TEST_UNIFORM_S (qincp_n_u64_b64_tied, uint64_t, -+ x0 = svqincp_n_u64_b64 (x0, p0), -+ x0 = svqincp_b64 (x0, p0)) -+ -+/* -+** qincp_n_u64_b64_untied: -+** mov x0, x1 -+** uqincp x0, p0\.d -+** ret -+*/ -+TEST_UNIFORM_S (qincp_n_u64_b64_untied, uint64_t, -+ x0 = svqincp_n_u64_b64 (x1, p0), -+ x0 = svqincp_b64 (x1, p0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincw_pat_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincw_pat_s32.c -new file mode 100644 -index 000000000..6ceb003ab ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincw_pat_s32.c -@@ -0,0 +1,401 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qincw_pat_1_s32_tied: -+** sqincw z0\.s, pow2 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_pat_1_s32_tied, svint32_t, -+ z0 = svqincw_pat_s32 (z0, SV_POW2, 1), -+ z0 = svqincw_pat (z0, SV_POW2, 1)) -+ -+/* -+** qincw_pat_1_s32_untied: -+** movprfx z0, z1 -+** sqincw z0\.s, pow2 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_pat_1_s32_untied, svint32_t, -+ z0 = svqincw_pat_s32 (z1, SV_POW2, 1), -+ z0 = svqincw_pat (z1, SV_POW2, 1)) -+ -+/* -+** qincw_pat_2_s32: -+** sqincw z0\.s, pow2, mul #2 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_pat_2_s32, svint32_t, -+ z0 = svqincw_pat_s32 (z0, SV_POW2, 2), -+ z0 = svqincw_pat (z0, SV_POW2, 2)) -+ -+/* -+** qincw_pat_7_s32: -+** sqincw z0\.s, pow2, mul #7 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_pat_7_s32, svint32_t, -+ z0 = svqincw_pat_s32 (z0, SV_POW2, 7), -+ z0 = svqincw_pat (z0, SV_POW2, 7)) -+ -+/* -+** qincw_pat_15_s32: -+** sqincw z0\.s, pow2, mul #15 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_pat_15_s32, svint32_t, -+ z0 = svqincw_pat_s32 (z0, SV_POW2, 15), -+ z0 = svqincw_pat (z0, SV_POW2, 15)) -+ -+/* -+** qincw_pat_16_s32: -+** sqincw z0\.s, pow2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_pat_16_s32, svint32_t, -+ z0 = svqincw_pat_s32 (z0, SV_POW2, 16), -+ z0 = svqincw_pat (z0, SV_POW2, 16)) -+ -+/* -+** qincw_pat_vl1_s32: -+** sqincw z0\.s, vl1, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_pat_vl1_s32, svint32_t, -+ z0 = svqincw_pat_s32 (z0, SV_VL1, 16), -+ z0 = svqincw_pat (z0, SV_VL1, 16)) -+ -+/* -+** qincw_pat_vl2_s32: -+** sqincw z0\.s, vl2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_pat_vl2_s32, svint32_t, -+ z0 = svqincw_pat_s32 (z0, SV_VL2, 16), -+ z0 = svqincw_pat (z0, SV_VL2, 16)) -+ -+/* -+** qincw_pat_vl3_s32: -+** sqincw z0\.s, vl3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_pat_vl3_s32, svint32_t, -+ z0 = svqincw_pat_s32 (z0, SV_VL3, 16), -+ z0 = svqincw_pat (z0, SV_VL3, 16)) -+ -+/* -+** qincw_pat_vl4_s32: -+** sqincw z0\.s, vl4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_pat_vl4_s32, svint32_t, -+ z0 = svqincw_pat_s32 (z0, SV_VL4, 16), -+ z0 = svqincw_pat (z0, SV_VL4, 16)) -+ -+/* -+** qincw_pat_vl5_s32: -+** sqincw z0\.s, vl5, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_pat_vl5_s32, svint32_t, -+ z0 = svqincw_pat_s32 (z0, SV_VL5, 16), -+ z0 = svqincw_pat (z0, SV_VL5, 16)) -+ -+/* -+** qincw_pat_vl6_s32: -+** sqincw z0\.s, vl6, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_pat_vl6_s32, svint32_t, -+ z0 = svqincw_pat_s32 (z0, SV_VL6, 16), -+ z0 = svqincw_pat (z0, SV_VL6, 16)) -+ -+/* -+** qincw_pat_vl7_s32: -+** sqincw z0\.s, vl7, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_pat_vl7_s32, svint32_t, -+ z0 = svqincw_pat_s32 (z0, SV_VL7, 16), -+ z0 = svqincw_pat (z0, SV_VL7, 16)) -+ -+/* -+** qincw_pat_vl8_s32: -+** sqincw z0\.s, vl8, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_pat_vl8_s32, svint32_t, -+ z0 = svqincw_pat_s32 (z0, SV_VL8, 16), -+ z0 = svqincw_pat (z0, SV_VL8, 16)) -+ -+/* -+** qincw_pat_vl16_s32: -+** sqincw z0\.s, vl16, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_pat_vl16_s32, svint32_t, -+ z0 = svqincw_pat_s32 (z0, SV_VL16, 16), -+ z0 = svqincw_pat (z0, SV_VL16, 16)) -+ -+/* -+** qincw_pat_vl32_s32: -+** sqincw z0\.s, vl32, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_pat_vl32_s32, svint32_t, -+ z0 = svqincw_pat_s32 (z0, SV_VL32, 16), -+ z0 = svqincw_pat (z0, SV_VL32, 16)) -+ -+/* -+** qincw_pat_vl64_s32: -+** sqincw z0\.s, vl64, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_pat_vl64_s32, svint32_t, -+ z0 = svqincw_pat_s32 (z0, SV_VL64, 16), -+ z0 = svqincw_pat (z0, SV_VL64, 16)) -+ -+/* -+** qincw_pat_vl128_s32: -+** sqincw z0\.s, vl128, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_pat_vl128_s32, svint32_t, -+ z0 = svqincw_pat_s32 (z0, SV_VL128, 16), -+ z0 = svqincw_pat (z0, SV_VL128, 16)) -+ -+/* -+** qincw_pat_vl256_s32: -+** sqincw z0\.s, vl256, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_pat_vl256_s32, svint32_t, -+ z0 = svqincw_pat_s32 (z0, SV_VL256, 16), -+ z0 = svqincw_pat (z0, SV_VL256, 16)) -+ -+/* -+** qincw_pat_mul4_s32: -+** sqincw z0\.s, mul4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_pat_mul4_s32, svint32_t, -+ z0 = svqincw_pat_s32 (z0, SV_MUL4, 16), -+ z0 = svqincw_pat (z0, SV_MUL4, 16)) -+ -+/* -+** qincw_pat_mul3_s32: -+** sqincw z0\.s, mul3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_pat_mul3_s32, svint32_t, -+ z0 = svqincw_pat_s32 (z0, SV_MUL3, 16), -+ z0 = svqincw_pat (z0, SV_MUL3, 16)) -+ -+/* -+** qincw_pat_all_s32: -+** sqincw z0\.s, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_pat_all_s32, svint32_t, -+ z0 = svqincw_pat_s32 (z0, SV_ALL, 16), -+ z0 = svqincw_pat (z0, SV_ALL, 16)) -+ -+/* -+** qincw_pat_n_1_s32_tied: -+** sqincw x0, w0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_1_s32_tied, int32_t, -+ x0 = svqincw_pat_n_s32 (x0, SV_POW2, 1), -+ x0 = svqincw_pat (x0, SV_POW2, 1)) -+ -+/* -+** qincw_pat_n_1_s32_untied: -+** mov w0, w1 -+** sqincw x0, w0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_1_s32_untied, int32_t, -+ x0 = svqincw_pat_n_s32 (x1, SV_POW2, 1), -+ x0 = svqincw_pat (x1, SV_POW2, 1)) -+ -+/* -+** qincw_pat_n_2_s32: -+** sqincw x0, w0, pow2, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_2_s32, int32_t, -+ x0 = svqincw_pat_n_s32 (x0, SV_POW2, 2), -+ x0 = svqincw_pat (x0, SV_POW2, 2)) -+ -+/* -+** qincw_pat_n_7_s32: -+** sqincw x0, w0, pow2, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_7_s32, int32_t, -+ x0 = svqincw_pat_n_s32 (x0, SV_POW2, 7), -+ x0 = svqincw_pat (x0, SV_POW2, 7)) -+ -+/* -+** qincw_pat_n_15_s32: -+** sqincw x0, w0, pow2, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_15_s32, int32_t, -+ x0 = svqincw_pat_n_s32 (x0, SV_POW2, 15), -+ x0 = svqincw_pat (x0, SV_POW2, 15)) -+ -+/* -+** qincw_pat_n_16_s32: -+** sqincw x0, w0, pow2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_16_s32, int32_t, -+ x0 = svqincw_pat_n_s32 (x0, SV_POW2, 16), -+ x0 = svqincw_pat (x0, SV_POW2, 16)) -+ -+/* -+** qincw_pat_n_vl1_s32: -+** sqincw x0, w0, vl1, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl1_s32, int32_t, -+ x0 = svqincw_pat_n_s32 (x0, SV_VL1, 16), -+ x0 = svqincw_pat (x0, SV_VL1, 16)) -+ -+/* -+** qincw_pat_n_vl2_s32: -+** sqincw x0, w0, vl2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl2_s32, int32_t, -+ x0 = svqincw_pat_n_s32 (x0, SV_VL2, 16), -+ x0 = svqincw_pat (x0, SV_VL2, 16)) -+ -+/* -+** qincw_pat_n_vl3_s32: -+** sqincw x0, w0, vl3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl3_s32, int32_t, -+ x0 = svqincw_pat_n_s32 (x0, SV_VL3, 16), -+ x0 = svqincw_pat (x0, SV_VL3, 16)) -+ -+/* -+** qincw_pat_n_vl4_s32: -+** sqincw x0, w0, vl4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl4_s32, int32_t, -+ x0 = svqincw_pat_n_s32 (x0, SV_VL4, 16), -+ x0 = svqincw_pat (x0, SV_VL4, 16)) -+ -+/* -+** qincw_pat_n_vl5_s32: -+** sqincw x0, w0, vl5, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl5_s32, int32_t, -+ x0 = svqincw_pat_n_s32 (x0, SV_VL5, 16), -+ x0 = svqincw_pat (x0, SV_VL5, 16)) -+ -+/* -+** qincw_pat_n_vl6_s32: -+** sqincw x0, w0, vl6, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl6_s32, int32_t, -+ x0 = svqincw_pat_n_s32 (x0, SV_VL6, 16), -+ x0 = svqincw_pat (x0, SV_VL6, 16)) -+ -+/* -+** qincw_pat_n_vl7_s32: -+** sqincw x0, w0, vl7, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl7_s32, int32_t, -+ x0 = svqincw_pat_n_s32 (x0, SV_VL7, 16), -+ x0 = svqincw_pat (x0, SV_VL7, 16)) -+ -+/* -+** qincw_pat_n_vl8_s32: -+** sqincw x0, w0, vl8, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl8_s32, int32_t, -+ x0 = svqincw_pat_n_s32 (x0, SV_VL8, 16), -+ x0 = svqincw_pat (x0, SV_VL8, 16)) -+ -+/* -+** qincw_pat_n_vl16_s32: -+** sqincw x0, w0, vl16, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl16_s32, int32_t, -+ x0 = svqincw_pat_n_s32 (x0, SV_VL16, 16), -+ x0 = svqincw_pat (x0, SV_VL16, 16)) -+ -+/* -+** qincw_pat_n_vl32_s32: -+** sqincw x0, w0, vl32, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl32_s32, int32_t, -+ x0 = svqincw_pat_n_s32 (x0, SV_VL32, 16), -+ x0 = svqincw_pat (x0, SV_VL32, 16)) -+ -+/* -+** qincw_pat_n_vl64_s32: -+** sqincw x0, w0, vl64, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl64_s32, int32_t, -+ x0 = svqincw_pat_n_s32 (x0, SV_VL64, 16), -+ x0 = svqincw_pat (x0, SV_VL64, 16)) -+ -+/* -+** qincw_pat_n_vl128_s32: -+** sqincw x0, w0, vl128, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl128_s32, int32_t, -+ x0 = svqincw_pat_n_s32 (x0, SV_VL128, 16), -+ x0 = svqincw_pat (x0, SV_VL128, 16)) -+ -+/* -+** qincw_pat_n_vl256_s32: -+** sqincw x0, w0, vl256, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl256_s32, int32_t, -+ x0 = svqincw_pat_n_s32 (x0, SV_VL256, 16), -+ x0 = svqincw_pat (x0, SV_VL256, 16)) -+ -+/* -+** qincw_pat_n_mul4_s32: -+** sqincw x0, w0, mul4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_mul4_s32, int32_t, -+ x0 = svqincw_pat_n_s32 (x0, SV_MUL4, 16), -+ x0 = svqincw_pat (x0, SV_MUL4, 16)) -+ -+/* -+** qincw_pat_n_mul3_s32: -+** sqincw x0, w0, mul3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_mul3_s32, int32_t, -+ x0 = svqincw_pat_n_s32 (x0, SV_MUL3, 16), -+ x0 = svqincw_pat (x0, SV_MUL3, 16)) -+ -+/* -+** qincw_pat_n_all_s32: -+** sqincw x0, w0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_all_s32, int32_t, -+ x0 = svqincw_pat_n_s32 (x0, SV_ALL, 16), -+ x0 = svqincw_pat (x0, SV_ALL, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincw_pat_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincw_pat_s64.c -new file mode 100644 -index 000000000..feebc25cc ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincw_pat_s64.c -@@ -0,0 +1,202 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qincw_pat_n_1_s64_tied: -+** sqincw x0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_1_s64_tied, int64_t, -+ x0 = svqincw_pat_n_s64 (x0, SV_POW2, 1), -+ x0 = svqincw_pat (x0, SV_POW2, 1)) -+ -+/* -+** qincw_pat_n_1_s64_untied: -+** mov x0, x1 -+** sqincw x0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_1_s64_untied, int64_t, -+ x0 = svqincw_pat_n_s64 (x1, SV_POW2, 1), -+ x0 = svqincw_pat (x1, SV_POW2, 1)) -+ -+/* -+** qincw_pat_n_2_s64: -+** sqincw x0, pow2, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_2_s64, int64_t, -+ x0 = svqincw_pat_n_s64 (x0, SV_POW2, 2), -+ x0 = svqincw_pat (x0, SV_POW2, 2)) -+ -+/* -+** qincw_pat_n_7_s64: -+** sqincw x0, pow2, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_7_s64, int64_t, -+ x0 = svqincw_pat_n_s64 (x0, SV_POW2, 7), -+ x0 = svqincw_pat (x0, SV_POW2, 7)) -+ -+/* -+** qincw_pat_n_15_s64: -+** sqincw x0, pow2, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_15_s64, int64_t, -+ x0 = svqincw_pat_n_s64 (x0, SV_POW2, 15), -+ x0 = svqincw_pat (x0, SV_POW2, 15)) -+ -+/* -+** qincw_pat_n_16_s64: -+** sqincw x0, pow2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_16_s64, int64_t, -+ x0 = svqincw_pat_n_s64 (x0, SV_POW2, 16), -+ x0 = svqincw_pat (x0, SV_POW2, 16)) -+ -+/* -+** qincw_pat_n_vl1_s64: -+** sqincw x0, vl1, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl1_s64, int64_t, -+ x0 = svqincw_pat_n_s64 (x0, SV_VL1, 16), -+ x0 = svqincw_pat (x0, SV_VL1, 16)) -+ -+/* -+** qincw_pat_n_vl2_s64: -+** sqincw x0, vl2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl2_s64, int64_t, -+ x0 = svqincw_pat_n_s64 (x0, SV_VL2, 16), -+ x0 = svqincw_pat (x0, SV_VL2, 16)) -+ -+/* -+** qincw_pat_n_vl3_s64: -+** sqincw x0, vl3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl3_s64, int64_t, -+ x0 = svqincw_pat_n_s64 (x0, SV_VL3, 16), -+ x0 = svqincw_pat (x0, SV_VL3, 16)) -+ -+/* -+** qincw_pat_n_vl4_s64: -+** sqincw x0, vl4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl4_s64, int64_t, -+ x0 = svqincw_pat_n_s64 (x0, SV_VL4, 16), -+ x0 = svqincw_pat (x0, SV_VL4, 16)) -+ -+/* -+** qincw_pat_n_vl5_s64: -+** sqincw x0, vl5, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl5_s64, int64_t, -+ x0 = svqincw_pat_n_s64 (x0, SV_VL5, 16), -+ x0 = svqincw_pat (x0, SV_VL5, 16)) -+ -+/* -+** qincw_pat_n_vl6_s64: -+** sqincw x0, vl6, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl6_s64, int64_t, -+ x0 = svqincw_pat_n_s64 (x0, SV_VL6, 16), -+ x0 = svqincw_pat (x0, SV_VL6, 16)) -+ -+/* -+** qincw_pat_n_vl7_s64: -+** sqincw x0, vl7, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl7_s64, int64_t, -+ x0 = svqincw_pat_n_s64 (x0, SV_VL7, 16), -+ x0 = svqincw_pat (x0, SV_VL7, 16)) -+ -+/* -+** qincw_pat_n_vl8_s64: -+** sqincw x0, vl8, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl8_s64, int64_t, -+ x0 = svqincw_pat_n_s64 (x0, SV_VL8, 16), -+ x0 = svqincw_pat (x0, SV_VL8, 16)) -+ -+/* -+** qincw_pat_n_vl16_s64: -+** sqincw x0, vl16, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl16_s64, int64_t, -+ x0 = svqincw_pat_n_s64 (x0, SV_VL16, 16), -+ x0 = svqincw_pat (x0, SV_VL16, 16)) -+ -+/* -+** qincw_pat_n_vl32_s64: -+** sqincw x0, vl32, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl32_s64, int64_t, -+ x0 = svqincw_pat_n_s64 (x0, SV_VL32, 16), -+ x0 = svqincw_pat (x0, SV_VL32, 16)) -+ -+/* -+** qincw_pat_n_vl64_s64: -+** sqincw x0, vl64, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl64_s64, int64_t, -+ x0 = svqincw_pat_n_s64 (x0, SV_VL64, 16), -+ x0 = svqincw_pat (x0, SV_VL64, 16)) -+ -+/* -+** qincw_pat_n_vl128_s64: -+** sqincw x0, vl128, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl128_s64, int64_t, -+ x0 = svqincw_pat_n_s64 (x0, SV_VL128, 16), -+ x0 = svqincw_pat (x0, SV_VL128, 16)) -+ -+/* -+** qincw_pat_n_vl256_s64: -+** sqincw x0, vl256, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl256_s64, int64_t, -+ x0 = svqincw_pat_n_s64 (x0, SV_VL256, 16), -+ x0 = svqincw_pat (x0, SV_VL256, 16)) -+ -+/* -+** qincw_pat_n_mul4_s64: -+** sqincw x0, mul4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_mul4_s64, int64_t, -+ x0 = svqincw_pat_n_s64 (x0, SV_MUL4, 16), -+ x0 = svqincw_pat (x0, SV_MUL4, 16)) -+ -+/* -+** qincw_pat_n_mul3_s64: -+** sqincw x0, mul3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_mul3_s64, int64_t, -+ x0 = svqincw_pat_n_s64 (x0, SV_MUL3, 16), -+ x0 = svqincw_pat (x0, SV_MUL3, 16)) -+ -+/* -+** qincw_pat_n_all_s64: -+** sqincw x0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_all_s64, int64_t, -+ x0 = svqincw_pat_n_s64 (x0, SV_ALL, 16), -+ x0 = svqincw_pat (x0, SV_ALL, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincw_pat_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincw_pat_u32.c -new file mode 100644 -index 000000000..e08e91d09 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincw_pat_u32.c -@@ -0,0 +1,401 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qincw_pat_1_u32_tied: -+** uqincw z0\.s, pow2 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_pat_1_u32_tied, svuint32_t, -+ z0 = svqincw_pat_u32 (z0, SV_POW2, 1), -+ z0 = svqincw_pat (z0, SV_POW2, 1)) -+ -+/* -+** qincw_pat_1_u32_untied: -+** movprfx z0, z1 -+** uqincw z0\.s, pow2 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_pat_1_u32_untied, svuint32_t, -+ z0 = svqincw_pat_u32 (z1, SV_POW2, 1), -+ z0 = svqincw_pat (z1, SV_POW2, 1)) -+ -+/* -+** qincw_pat_2_u32: -+** uqincw z0\.s, pow2, mul #2 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_pat_2_u32, svuint32_t, -+ z0 = svqincw_pat_u32 (z0, SV_POW2, 2), -+ z0 = svqincw_pat (z0, SV_POW2, 2)) -+ -+/* -+** qincw_pat_7_u32: -+** uqincw z0\.s, pow2, mul #7 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_pat_7_u32, svuint32_t, -+ z0 = svqincw_pat_u32 (z0, SV_POW2, 7), -+ z0 = svqincw_pat (z0, SV_POW2, 7)) -+ -+/* -+** qincw_pat_15_u32: -+** uqincw z0\.s, pow2, mul #15 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_pat_15_u32, svuint32_t, -+ z0 = svqincw_pat_u32 (z0, SV_POW2, 15), -+ z0 = svqincw_pat (z0, SV_POW2, 15)) -+ -+/* -+** qincw_pat_16_u32: -+** uqincw z0\.s, pow2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_pat_16_u32, svuint32_t, -+ z0 = svqincw_pat_u32 (z0, SV_POW2, 16), -+ z0 = svqincw_pat (z0, SV_POW2, 16)) -+ -+/* -+** qincw_pat_vl1_u32: -+** uqincw z0\.s, vl1, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_pat_vl1_u32, svuint32_t, -+ z0 = svqincw_pat_u32 (z0, SV_VL1, 16), -+ z0 = svqincw_pat (z0, SV_VL1, 16)) -+ -+/* -+** qincw_pat_vl2_u32: -+** uqincw z0\.s, vl2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_pat_vl2_u32, svuint32_t, -+ z0 = svqincw_pat_u32 (z0, SV_VL2, 16), -+ z0 = svqincw_pat (z0, SV_VL2, 16)) -+ -+/* -+** qincw_pat_vl3_u32: -+** uqincw z0\.s, vl3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_pat_vl3_u32, svuint32_t, -+ z0 = svqincw_pat_u32 (z0, SV_VL3, 16), -+ z0 = svqincw_pat (z0, SV_VL3, 16)) -+ -+/* -+** qincw_pat_vl4_u32: -+** uqincw z0\.s, vl4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_pat_vl4_u32, svuint32_t, -+ z0 = svqincw_pat_u32 (z0, SV_VL4, 16), -+ z0 = svqincw_pat (z0, SV_VL4, 16)) -+ -+/* -+** qincw_pat_vl5_u32: -+** uqincw z0\.s, vl5, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_pat_vl5_u32, svuint32_t, -+ z0 = svqincw_pat_u32 (z0, SV_VL5, 16), -+ z0 = svqincw_pat (z0, SV_VL5, 16)) -+ -+/* -+** qincw_pat_vl6_u32: -+** uqincw z0\.s, vl6, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_pat_vl6_u32, svuint32_t, -+ z0 = svqincw_pat_u32 (z0, SV_VL6, 16), -+ z0 = svqincw_pat (z0, SV_VL6, 16)) -+ -+/* -+** qincw_pat_vl7_u32: -+** uqincw z0\.s, vl7, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_pat_vl7_u32, svuint32_t, -+ z0 = svqincw_pat_u32 (z0, SV_VL7, 16), -+ z0 = svqincw_pat (z0, SV_VL7, 16)) -+ -+/* -+** qincw_pat_vl8_u32: -+** uqincw z0\.s, vl8, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_pat_vl8_u32, svuint32_t, -+ z0 = svqincw_pat_u32 (z0, SV_VL8, 16), -+ z0 = svqincw_pat (z0, SV_VL8, 16)) -+ -+/* -+** qincw_pat_vl16_u32: -+** uqincw z0\.s, vl16, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_pat_vl16_u32, svuint32_t, -+ z0 = svqincw_pat_u32 (z0, SV_VL16, 16), -+ z0 = svqincw_pat (z0, SV_VL16, 16)) -+ -+/* -+** qincw_pat_vl32_u32: -+** uqincw z0\.s, vl32, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_pat_vl32_u32, svuint32_t, -+ z0 = svqincw_pat_u32 (z0, SV_VL32, 16), -+ z0 = svqincw_pat (z0, SV_VL32, 16)) -+ -+/* -+** qincw_pat_vl64_u32: -+** uqincw z0\.s, vl64, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_pat_vl64_u32, svuint32_t, -+ z0 = svqincw_pat_u32 (z0, SV_VL64, 16), -+ z0 = svqincw_pat (z0, SV_VL64, 16)) -+ -+/* -+** qincw_pat_vl128_u32: -+** uqincw z0\.s, vl128, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_pat_vl128_u32, svuint32_t, -+ z0 = svqincw_pat_u32 (z0, SV_VL128, 16), -+ z0 = svqincw_pat (z0, SV_VL128, 16)) -+ -+/* -+** qincw_pat_vl256_u32: -+** uqincw z0\.s, vl256, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_pat_vl256_u32, svuint32_t, -+ z0 = svqincw_pat_u32 (z0, SV_VL256, 16), -+ z0 = svqincw_pat (z0, SV_VL256, 16)) -+ -+/* -+** qincw_pat_mul4_u32: -+** uqincw z0\.s, mul4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_pat_mul4_u32, svuint32_t, -+ z0 = svqincw_pat_u32 (z0, SV_MUL4, 16), -+ z0 = svqincw_pat (z0, SV_MUL4, 16)) -+ -+/* -+** qincw_pat_mul3_u32: -+** uqincw z0\.s, mul3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_pat_mul3_u32, svuint32_t, -+ z0 = svqincw_pat_u32 (z0, SV_MUL3, 16), -+ z0 = svqincw_pat (z0, SV_MUL3, 16)) -+ -+/* -+** qincw_pat_all_u32: -+** uqincw z0\.s, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_pat_all_u32, svuint32_t, -+ z0 = svqincw_pat_u32 (z0, SV_ALL, 16), -+ z0 = svqincw_pat (z0, SV_ALL, 16)) -+ -+/* -+** qincw_pat_n_1_u32_tied: -+** uqincw w0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_1_u32_tied, uint32_t, -+ x0 = svqincw_pat_n_u32 (x0, SV_POW2, 1), -+ x0 = svqincw_pat (x0, SV_POW2, 1)) -+ -+/* -+** qincw_pat_n_1_u32_untied: -+** mov w0, w1 -+** uqincw w0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_1_u32_untied, uint32_t, -+ x0 = svqincw_pat_n_u32 (x1, SV_POW2, 1), -+ x0 = svqincw_pat (x1, SV_POW2, 1)) -+ -+/* -+** qincw_pat_n_2_u32: -+** uqincw w0, pow2, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_2_u32, uint32_t, -+ x0 = svqincw_pat_n_u32 (x0, SV_POW2, 2), -+ x0 = svqincw_pat (x0, SV_POW2, 2)) -+ -+/* -+** qincw_pat_n_7_u32: -+** uqincw w0, pow2, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_7_u32, uint32_t, -+ x0 = svqincw_pat_n_u32 (x0, SV_POW2, 7), -+ x0 = svqincw_pat (x0, SV_POW2, 7)) -+ -+/* -+** qincw_pat_n_15_u32: -+** uqincw w0, pow2, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_15_u32, uint32_t, -+ x0 = svqincw_pat_n_u32 (x0, SV_POW2, 15), -+ x0 = svqincw_pat (x0, SV_POW2, 15)) -+ -+/* -+** qincw_pat_n_16_u32: -+** uqincw w0, pow2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_16_u32, uint32_t, -+ x0 = svqincw_pat_n_u32 (x0, SV_POW2, 16), -+ x0 = svqincw_pat (x0, SV_POW2, 16)) -+ -+/* -+** qincw_pat_n_vl1_u32: -+** uqincw w0, vl1, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl1_u32, uint32_t, -+ x0 = svqincw_pat_n_u32 (x0, SV_VL1, 16), -+ x0 = svqincw_pat (x0, SV_VL1, 16)) -+ -+/* -+** qincw_pat_n_vl2_u32: -+** uqincw w0, vl2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl2_u32, uint32_t, -+ x0 = svqincw_pat_n_u32 (x0, SV_VL2, 16), -+ x0 = svqincw_pat (x0, SV_VL2, 16)) -+ -+/* -+** qincw_pat_n_vl3_u32: -+** uqincw w0, vl3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl3_u32, uint32_t, -+ x0 = svqincw_pat_n_u32 (x0, SV_VL3, 16), -+ x0 = svqincw_pat (x0, SV_VL3, 16)) -+ -+/* -+** qincw_pat_n_vl4_u32: -+** uqincw w0, vl4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl4_u32, uint32_t, -+ x0 = svqincw_pat_n_u32 (x0, SV_VL4, 16), -+ x0 = svqincw_pat (x0, SV_VL4, 16)) -+ -+/* -+** qincw_pat_n_vl5_u32: -+** uqincw w0, vl5, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl5_u32, uint32_t, -+ x0 = svqincw_pat_n_u32 (x0, SV_VL5, 16), -+ x0 = svqincw_pat (x0, SV_VL5, 16)) -+ -+/* -+** qincw_pat_n_vl6_u32: -+** uqincw w0, vl6, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl6_u32, uint32_t, -+ x0 = svqincw_pat_n_u32 (x0, SV_VL6, 16), -+ x0 = svqincw_pat (x0, SV_VL6, 16)) -+ -+/* -+** qincw_pat_n_vl7_u32: -+** uqincw w0, vl7, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl7_u32, uint32_t, -+ x0 = svqincw_pat_n_u32 (x0, SV_VL7, 16), -+ x0 = svqincw_pat (x0, SV_VL7, 16)) -+ -+/* -+** qincw_pat_n_vl8_u32: -+** uqincw w0, vl8, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl8_u32, uint32_t, -+ x0 = svqincw_pat_n_u32 (x0, SV_VL8, 16), -+ x0 = svqincw_pat (x0, SV_VL8, 16)) -+ -+/* -+** qincw_pat_n_vl16_u32: -+** uqincw w0, vl16, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl16_u32, uint32_t, -+ x0 = svqincw_pat_n_u32 (x0, SV_VL16, 16), -+ x0 = svqincw_pat (x0, SV_VL16, 16)) -+ -+/* -+** qincw_pat_n_vl32_u32: -+** uqincw w0, vl32, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl32_u32, uint32_t, -+ x0 = svqincw_pat_n_u32 (x0, SV_VL32, 16), -+ x0 = svqincw_pat (x0, SV_VL32, 16)) -+ -+/* -+** qincw_pat_n_vl64_u32: -+** uqincw w0, vl64, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl64_u32, uint32_t, -+ x0 = svqincw_pat_n_u32 (x0, SV_VL64, 16), -+ x0 = svqincw_pat (x0, SV_VL64, 16)) -+ -+/* -+** qincw_pat_n_vl128_u32: -+** uqincw w0, vl128, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl128_u32, uint32_t, -+ x0 = svqincw_pat_n_u32 (x0, SV_VL128, 16), -+ x0 = svqincw_pat (x0, SV_VL128, 16)) -+ -+/* -+** qincw_pat_n_vl256_u32: -+** uqincw w0, vl256, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl256_u32, uint32_t, -+ x0 = svqincw_pat_n_u32 (x0, SV_VL256, 16), -+ x0 = svqincw_pat (x0, SV_VL256, 16)) -+ -+/* -+** qincw_pat_n_mul4_u32: -+** uqincw w0, mul4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_mul4_u32, uint32_t, -+ x0 = svqincw_pat_n_u32 (x0, SV_MUL4, 16), -+ x0 = svqincw_pat (x0, SV_MUL4, 16)) -+ -+/* -+** qincw_pat_n_mul3_u32: -+** uqincw w0, mul3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_mul3_u32, uint32_t, -+ x0 = svqincw_pat_n_u32 (x0, SV_MUL3, 16), -+ x0 = svqincw_pat (x0, SV_MUL3, 16)) -+ -+/* -+** qincw_pat_n_all_u32: -+** uqincw w0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_all_u32, uint32_t, -+ x0 = svqincw_pat_n_u32 (x0, SV_ALL, 16), -+ x0 = svqincw_pat (x0, SV_ALL, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincw_pat_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincw_pat_u64.c -new file mode 100644 -index 000000000..a2ac9ee72 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincw_pat_u64.c -@@ -0,0 +1,202 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qincw_pat_n_1_u64_tied: -+** uqincw x0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_1_u64_tied, uint64_t, -+ x0 = svqincw_pat_n_u64 (x0, SV_POW2, 1), -+ x0 = svqincw_pat (x0, SV_POW2, 1)) -+ -+/* -+** qincw_pat_n_1_u64_untied: -+** mov x0, x1 -+** uqincw x0, pow2 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_1_u64_untied, uint64_t, -+ x0 = svqincw_pat_n_u64 (x1, SV_POW2, 1), -+ x0 = svqincw_pat (x1, SV_POW2, 1)) -+ -+/* -+** qincw_pat_n_2_u64: -+** uqincw x0, pow2, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_2_u64, uint64_t, -+ x0 = svqincw_pat_n_u64 (x0, SV_POW2, 2), -+ x0 = svqincw_pat (x0, SV_POW2, 2)) -+ -+/* -+** qincw_pat_n_7_u64: -+** uqincw x0, pow2, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_7_u64, uint64_t, -+ x0 = svqincw_pat_n_u64 (x0, SV_POW2, 7), -+ x0 = svqincw_pat (x0, SV_POW2, 7)) -+ -+/* -+** qincw_pat_n_15_u64: -+** uqincw x0, pow2, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_15_u64, uint64_t, -+ x0 = svqincw_pat_n_u64 (x0, SV_POW2, 15), -+ x0 = svqincw_pat (x0, SV_POW2, 15)) -+ -+/* -+** qincw_pat_n_16_u64: -+** uqincw x0, pow2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_16_u64, uint64_t, -+ x0 = svqincw_pat_n_u64 (x0, SV_POW2, 16), -+ x0 = svqincw_pat (x0, SV_POW2, 16)) -+ -+/* -+** qincw_pat_n_vl1_u64: -+** uqincw x0, vl1, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl1_u64, uint64_t, -+ x0 = svqincw_pat_n_u64 (x0, SV_VL1, 16), -+ x0 = svqincw_pat (x0, SV_VL1, 16)) -+ -+/* -+** qincw_pat_n_vl2_u64: -+** uqincw x0, vl2, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl2_u64, uint64_t, -+ x0 = svqincw_pat_n_u64 (x0, SV_VL2, 16), -+ x0 = svqincw_pat (x0, SV_VL2, 16)) -+ -+/* -+** qincw_pat_n_vl3_u64: -+** uqincw x0, vl3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl3_u64, uint64_t, -+ x0 = svqincw_pat_n_u64 (x0, SV_VL3, 16), -+ x0 = svqincw_pat (x0, SV_VL3, 16)) -+ -+/* -+** qincw_pat_n_vl4_u64: -+** uqincw x0, vl4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl4_u64, uint64_t, -+ x0 = svqincw_pat_n_u64 (x0, SV_VL4, 16), -+ x0 = svqincw_pat (x0, SV_VL4, 16)) -+ -+/* -+** qincw_pat_n_vl5_u64: -+** uqincw x0, vl5, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl5_u64, uint64_t, -+ x0 = svqincw_pat_n_u64 (x0, SV_VL5, 16), -+ x0 = svqincw_pat (x0, SV_VL5, 16)) -+ -+/* -+** qincw_pat_n_vl6_u64: -+** uqincw x0, vl6, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl6_u64, uint64_t, -+ x0 = svqincw_pat_n_u64 (x0, SV_VL6, 16), -+ x0 = svqincw_pat (x0, SV_VL6, 16)) -+ -+/* -+** qincw_pat_n_vl7_u64: -+** uqincw x0, vl7, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl7_u64, uint64_t, -+ x0 = svqincw_pat_n_u64 (x0, SV_VL7, 16), -+ x0 = svqincw_pat (x0, SV_VL7, 16)) -+ -+/* -+** qincw_pat_n_vl8_u64: -+** uqincw x0, vl8, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl8_u64, uint64_t, -+ x0 = svqincw_pat_n_u64 (x0, SV_VL8, 16), -+ x0 = svqincw_pat (x0, SV_VL8, 16)) -+ -+/* -+** qincw_pat_n_vl16_u64: -+** uqincw x0, vl16, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl16_u64, uint64_t, -+ x0 = svqincw_pat_n_u64 (x0, SV_VL16, 16), -+ x0 = svqincw_pat (x0, SV_VL16, 16)) -+ -+/* -+** qincw_pat_n_vl32_u64: -+** uqincw x0, vl32, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl32_u64, uint64_t, -+ x0 = svqincw_pat_n_u64 (x0, SV_VL32, 16), -+ x0 = svqincw_pat (x0, SV_VL32, 16)) -+ -+/* -+** qincw_pat_n_vl64_u64: -+** uqincw x0, vl64, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl64_u64, uint64_t, -+ x0 = svqincw_pat_n_u64 (x0, SV_VL64, 16), -+ x0 = svqincw_pat (x0, SV_VL64, 16)) -+ -+/* -+** qincw_pat_n_vl128_u64: -+** uqincw x0, vl128, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl128_u64, uint64_t, -+ x0 = svqincw_pat_n_u64 (x0, SV_VL128, 16), -+ x0 = svqincw_pat (x0, SV_VL128, 16)) -+ -+/* -+** qincw_pat_n_vl256_u64: -+** uqincw x0, vl256, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_vl256_u64, uint64_t, -+ x0 = svqincw_pat_n_u64 (x0, SV_VL256, 16), -+ x0 = svqincw_pat (x0, SV_VL256, 16)) -+ -+/* -+** qincw_pat_n_mul4_u64: -+** uqincw x0, mul4, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_mul4_u64, uint64_t, -+ x0 = svqincw_pat_n_u64 (x0, SV_MUL4, 16), -+ x0 = svqincw_pat (x0, SV_MUL4, 16)) -+ -+/* -+** qincw_pat_n_mul3_u64: -+** uqincw x0, mul3, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_mul3_u64, uint64_t, -+ x0 = svqincw_pat_n_u64 (x0, SV_MUL3, 16), -+ x0 = svqincw_pat (x0, SV_MUL3, 16)) -+ -+/* -+** qincw_pat_n_all_u64: -+** uqincw x0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_pat_n_all_u64, uint64_t, -+ x0 = svqincw_pat_n_u64 (x0, SV_ALL, 16), -+ x0 = svqincw_pat (x0, SV_ALL, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincw_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincw_s32.c -new file mode 100644 -index 000000000..031824acf ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincw_s32.c -@@ -0,0 +1,113 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qincw_1_s32_tied: -+** sqincw z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_1_s32_tied, svint32_t, -+ z0 = svqincw_s32 (z0, 1), -+ z0 = svqincw (z0, 1)) -+ -+/* -+** qincw_1_s32_untied: -+** movprfx z0, z1 -+** sqincw z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_1_s32_untied, svint32_t, -+ z0 = svqincw_s32 (z1, 1), -+ z0 = svqincw (z1, 1)) -+ -+/* -+** qincw_2_s32: -+** sqincw z0\.s, all, mul #2 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_2_s32, svint32_t, -+ z0 = svqincw_s32 (z0, 2), -+ z0 = svqincw (z0, 2)) -+ -+/* -+** qincw_7_s32: -+** sqincw z0\.s, all, mul #7 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_7_s32, svint32_t, -+ z0 = svqincw_s32 (z0, 7), -+ z0 = svqincw (z0, 7)) -+ -+/* -+** qincw_15_s32: -+** sqincw z0\.s, all, mul #15 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_15_s32, svint32_t, -+ z0 = svqincw_s32 (z0, 15), -+ z0 = svqincw (z0, 15)) -+ -+/* -+** qincw_16_s32: -+** sqincw z0\.s, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_16_s32, svint32_t, -+ z0 = svqincw_s32 (z0, 16), -+ z0 = svqincw (z0, 16)) -+ -+/* -+** qincw_n_1_s32_tied: -+** sqincw x0, w0 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_n_1_s32_tied, int32_t, -+ x0 = svqincw_n_s32 (x0, 1), -+ x0 = svqincw (x0, 1)) -+ -+/* -+** qincw_n_1_s32_untied: -+** mov w0, w1 -+** sqincw x0, w0 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_n_1_s32_untied, int32_t, -+ x0 = svqincw_n_s32 (x1, 1), -+ x0 = svqincw (x1, 1)) -+ -+/* -+** qincw_n_2_s32: -+** sqincw x0, w0, all, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_n_2_s32, int32_t, -+ x0 = svqincw_n_s32 (x0, 2), -+ x0 = svqincw (x0, 2)) -+ -+/* -+** qincw_n_7_s32: -+** sqincw x0, w0, all, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_n_7_s32, int32_t, -+ x0 = svqincw_n_s32 (x0, 7), -+ x0 = svqincw (x0, 7)) -+ -+/* -+** qincw_n_15_s32: -+** sqincw x0, w0, all, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_n_15_s32, int32_t, -+ x0 = svqincw_n_s32 (x0, 15), -+ x0 = svqincw (x0, 15)) -+ -+/* -+** qincw_n_16_s32: -+** sqincw x0, w0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_n_16_s32, int32_t, -+ x0 = svqincw_n_s32 (x0, 16), -+ x0 = svqincw (x0, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincw_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincw_s64.c -new file mode 100644 -index 000000000..df61f909f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincw_s64.c -@@ -0,0 +1,58 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qincw_n_1_s64_tied: -+** sqincw x0 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_n_1_s64_tied, int64_t, -+ x0 = svqincw_n_s64 (x0, 1), -+ x0 = svqincw (x0, 1)) -+ -+/* -+** qincw_n_1_s64_untied: -+** mov x0, x1 -+** sqincw x0 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_n_1_s64_untied, int64_t, -+ x0 = svqincw_n_s64 (x1, 1), -+ x0 = svqincw (x1, 1)) -+ -+/* -+** qincw_n_2_s64: -+** sqincw x0, all, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_n_2_s64, int64_t, -+ x0 = svqincw_n_s64 (x0, 2), -+ x0 = svqincw (x0, 2)) -+ -+/* -+** qincw_n_7_s64: -+** sqincw x0, all, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_n_7_s64, int64_t, -+ x0 = svqincw_n_s64 (x0, 7), -+ x0 = svqincw (x0, 7)) -+ -+/* -+** qincw_n_15_s64: -+** sqincw x0, all, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_n_15_s64, int64_t, -+ x0 = svqincw_n_s64 (x0, 15), -+ x0 = svqincw (x0, 15)) -+ -+/* -+** qincw_n_16_s64: -+** sqincw x0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_n_16_s64, int64_t, -+ x0 = svqincw_n_s64 (x0, 16), -+ x0 = svqincw (x0, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincw_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincw_u32.c -new file mode 100644 -index 000000000..65a446ab6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincw_u32.c -@@ -0,0 +1,113 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qincw_1_u32_tied: -+** uqincw z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_1_u32_tied, svuint32_t, -+ z0 = svqincw_u32 (z0, 1), -+ z0 = svqincw (z0, 1)) -+ -+/* -+** qincw_1_u32_untied: -+** movprfx z0, z1 -+** uqincw z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_1_u32_untied, svuint32_t, -+ z0 = svqincw_u32 (z1, 1), -+ z0 = svqincw (z1, 1)) -+ -+/* -+** qincw_2_u32: -+** uqincw z0\.s, all, mul #2 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_2_u32, svuint32_t, -+ z0 = svqincw_u32 (z0, 2), -+ z0 = svqincw (z0, 2)) -+ -+/* -+** qincw_7_u32: -+** uqincw z0\.s, all, mul #7 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_7_u32, svuint32_t, -+ z0 = svqincw_u32 (z0, 7), -+ z0 = svqincw (z0, 7)) -+ -+/* -+** qincw_15_u32: -+** uqincw z0\.s, all, mul #15 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_15_u32, svuint32_t, -+ z0 = svqincw_u32 (z0, 15), -+ z0 = svqincw (z0, 15)) -+ -+/* -+** qincw_16_u32: -+** uqincw z0\.s, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_Z (qincw_16_u32, svuint32_t, -+ z0 = svqincw_u32 (z0, 16), -+ z0 = svqincw (z0, 16)) -+ -+/* -+** qincw_n_1_u32_tied: -+** uqincw w0 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_n_1_u32_tied, uint32_t, -+ x0 = svqincw_n_u32 (x0, 1), -+ x0 = svqincw (x0, 1)) -+ -+/* -+** qincw_n_1_u32_untied: -+** mov w0, w1 -+** uqincw w0 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_n_1_u32_untied, uint32_t, -+ x0 = svqincw_n_u32 (x1, 1), -+ x0 = svqincw (x1, 1)) -+ -+/* -+** qincw_n_2_u32: -+** uqincw w0, all, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_n_2_u32, uint32_t, -+ x0 = svqincw_n_u32 (x0, 2), -+ x0 = svqincw (x0, 2)) -+ -+/* -+** qincw_n_7_u32: -+** uqincw w0, all, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_n_7_u32, uint32_t, -+ x0 = svqincw_n_u32 (x0, 7), -+ x0 = svqincw (x0, 7)) -+ -+/* -+** qincw_n_15_u32: -+** uqincw w0, all, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_n_15_u32, uint32_t, -+ x0 = svqincw_n_u32 (x0, 15), -+ x0 = svqincw (x0, 15)) -+ -+/* -+** qincw_n_16_u32: -+** uqincw w0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_n_16_u32, uint32_t, -+ x0 = svqincw_n_u32 (x0, 16), -+ x0 = svqincw (x0, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincw_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincw_u64.c -new file mode 100644 -index 000000000..806a79945 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qincw_u64.c -@@ -0,0 +1,58 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qincw_n_1_u64_tied: -+** uqincw x0 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_n_1_u64_tied, uint64_t, -+ x0 = svqincw_n_u64 (x0, 1), -+ x0 = svqincw (x0, 1)) -+ -+/* -+** qincw_n_1_u64_untied: -+** mov x0, x1 -+** uqincw x0 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_n_1_u64_untied, uint64_t, -+ x0 = svqincw_n_u64 (x1, 1), -+ x0 = svqincw (x1, 1)) -+ -+/* -+** qincw_n_2_u64: -+** uqincw x0, all, mul #2 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_n_2_u64, uint64_t, -+ x0 = svqincw_n_u64 (x0, 2), -+ x0 = svqincw (x0, 2)) -+ -+/* -+** qincw_n_7_u64: -+** uqincw x0, all, mul #7 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_n_7_u64, uint64_t, -+ x0 = svqincw_n_u64 (x0, 7), -+ x0 = svqincw (x0, 7)) -+ -+/* -+** qincw_n_15_u64: -+** uqincw x0, all, mul #15 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_n_15_u64, uint64_t, -+ x0 = svqincw_n_u64 (x0, 15), -+ x0 = svqincw (x0, 15)) -+ -+/* -+** qincw_n_16_u64: -+** uqincw x0, all, mul #16 -+** ret -+*/ -+TEST_UNIFORM_S (qincw_n_16_u64, uint64_t, -+ x0 = svqincw_n_u64 (x0, 16), -+ x0 = svqincw (x0, 16)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qsub_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qsub_s16.c -new file mode 100644 -index 000000000..8dd8381dc ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qsub_s16.c -@@ -0,0 +1,123 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qsub_s16_tied1: -+** sqsub z0\.h, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_s16_tied1, svint16_t, -+ z0 = svqsub_s16 (z0, z1), -+ z0 = svqsub (z0, z1)) -+ -+/* -+** qsub_s16_tied2: -+** sqsub z0\.h, z1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_s16_tied2, svint16_t, -+ z0 = svqsub_s16 (z1, z0), -+ z0 = svqsub (z1, z0)) -+ -+/* -+** qsub_s16_untied: -+** sqsub z0\.h, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_s16_untied, svint16_t, -+ z0 = svqsub_s16 (z1, z2), -+ z0 = svqsub (z1, z2)) -+ -+/* -+** qsub_w0_s16_tied1: -+** mov (z[0-9]+\.h), w0 -+** sqsub z0\.h, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (qsub_w0_s16_tied1, svint16_t, int16_t, -+ z0 = svqsub_n_s16 (z0, x0), -+ z0 = svqsub (z0, x0)) -+ -+/* -+** qsub_w0_s16_untied: -+** mov (z[0-9]+\.h), w0 -+** sqsub z0\.h, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (qsub_w0_s16_untied, svint16_t, int16_t, -+ z0 = svqsub_n_s16 (z1, x0), -+ z0 = svqsub (z1, x0)) -+ -+/* -+** qsub_1_s16_tied1: -+** sqsub z0\.h, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_1_s16_tied1, svint16_t, -+ z0 = svqsub_n_s16 (z0, 1), -+ z0 = svqsub (z0, 1)) -+ -+/* -+** qsub_1_s16_untied: -+** movprfx z0, z1 -+** sqsub z0\.h, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_1_s16_untied, svint16_t, -+ z0 = svqsub_n_s16 (z1, 1), -+ z0 = svqsub (z1, 1)) -+ -+/* -+** qsub_127_s16: -+** sqsub z0\.h, z0\.h, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_127_s16, svint16_t, -+ z0 = svqsub_n_s16 (z0, 127), -+ z0 = svqsub (z0, 127)) -+ -+/* -+** qsub_128_s16: -+** sqsub z0\.h, z0\.h, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_128_s16, svint16_t, -+ z0 = svqsub_n_s16 (z0, 128), -+ z0 = svqsub (z0, 128)) -+ -+/* -+** qsub_255_s16: -+** sqsub z0\.h, z0\.h, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_255_s16, svint16_t, -+ z0 = svqsub_n_s16 (z0, 255), -+ z0 = svqsub (z0, 255)) -+ -+/* -+** qsub_m1_s16: -+** sqadd z0\.h, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_m1_s16, svint16_t, -+ z0 = svqsub_n_s16 (z0, -1), -+ z0 = svqsub (z0, -1)) -+ -+/* -+** qsub_m127_s16: -+** sqadd z0\.h, z0\.h, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_m127_s16, svint16_t, -+ z0 = svqsub_n_s16 (z0, -127), -+ z0 = svqsub (z0, -127)) -+ -+/* -+** qsub_m128_s16: -+** sqadd z0\.h, z0\.h, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_m128_s16, svint16_t, -+ z0 = svqsub_n_s16 (z0, -128), -+ z0 = svqsub (z0, -128)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qsub_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qsub_s32.c -new file mode 100644 -index 000000000..920736aec ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qsub_s32.c -@@ -0,0 +1,123 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qsub_s32_tied1: -+** sqsub z0\.s, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_s32_tied1, svint32_t, -+ z0 = svqsub_s32 (z0, z1), -+ z0 = svqsub (z0, z1)) -+ -+/* -+** qsub_s32_tied2: -+** sqsub z0\.s, z1\.s, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_s32_tied2, svint32_t, -+ z0 = svqsub_s32 (z1, z0), -+ z0 = svqsub (z1, z0)) -+ -+/* -+** qsub_s32_untied: -+** sqsub z0\.s, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_s32_untied, svint32_t, -+ z0 = svqsub_s32 (z1, z2), -+ z0 = svqsub (z1, z2)) -+ -+/* -+** qsub_w0_s32_tied1: -+** mov (z[0-9]+\.s), w0 -+** sqsub z0\.s, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (qsub_w0_s32_tied1, svint32_t, int32_t, -+ z0 = svqsub_n_s32 (z0, x0), -+ z0 = svqsub (z0, x0)) -+ -+/* -+** qsub_w0_s32_untied: -+** mov (z[0-9]+\.s), w0 -+** sqsub z0\.s, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (qsub_w0_s32_untied, svint32_t, int32_t, -+ z0 = svqsub_n_s32 (z1, x0), -+ z0 = svqsub (z1, x0)) -+ -+/* -+** qsub_1_s32_tied1: -+** sqsub z0\.s, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_1_s32_tied1, svint32_t, -+ z0 = svqsub_n_s32 (z0, 1), -+ z0 = svqsub (z0, 1)) -+ -+/* -+** qsub_1_s32_untied: -+** movprfx z0, z1 -+** sqsub z0\.s, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_1_s32_untied, svint32_t, -+ z0 = svqsub_n_s32 (z1, 1), -+ z0 = svqsub (z1, 1)) -+ -+/* -+** qsub_127_s32: -+** sqsub z0\.s, z0\.s, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_127_s32, svint32_t, -+ z0 = svqsub_n_s32 (z0, 127), -+ z0 = svqsub (z0, 127)) -+ -+/* -+** qsub_128_s32: -+** sqsub z0\.s, z0\.s, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_128_s32, svint32_t, -+ z0 = svqsub_n_s32 (z0, 128), -+ z0 = svqsub (z0, 128)) -+ -+/* -+** qsub_255_s32: -+** sqsub z0\.s, z0\.s, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_255_s32, svint32_t, -+ z0 = svqsub_n_s32 (z0, 255), -+ z0 = svqsub (z0, 255)) -+ -+/* -+** qsub_m1_s32: -+** sqadd z0\.s, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_m1_s32, svint32_t, -+ z0 = svqsub_n_s32 (z0, -1), -+ z0 = svqsub (z0, -1)) -+ -+/* -+** qsub_m127_s32: -+** sqadd z0\.s, z0\.s, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_m127_s32, svint32_t, -+ z0 = svqsub_n_s32 (z0, -127), -+ z0 = svqsub (z0, -127)) -+ -+/* -+** qsub_m128_s32: -+** sqadd z0\.s, z0\.s, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_m128_s32, svint32_t, -+ z0 = svqsub_n_s32 (z0, -128), -+ z0 = svqsub (z0, -128)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qsub_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qsub_s64.c -new file mode 100644 -index 000000000..3d0fc2bcc ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qsub_s64.c -@@ -0,0 +1,123 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qsub_s64_tied1: -+** sqsub z0\.d, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_s64_tied1, svint64_t, -+ z0 = svqsub_s64 (z0, z1), -+ z0 = svqsub (z0, z1)) -+ -+/* -+** qsub_s64_tied2: -+** sqsub z0\.d, z1\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_s64_tied2, svint64_t, -+ z0 = svqsub_s64 (z1, z0), -+ z0 = svqsub (z1, z0)) -+ -+/* -+** qsub_s64_untied: -+** sqsub z0\.d, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_s64_untied, svint64_t, -+ z0 = svqsub_s64 (z1, z2), -+ z0 = svqsub (z1, z2)) -+ -+/* -+** qsub_x0_s64_tied1: -+** mov (z[0-9]+\.d), x0 -+** sqsub z0\.d, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (qsub_x0_s64_tied1, svint64_t, int64_t, -+ z0 = svqsub_n_s64 (z0, x0), -+ z0 = svqsub (z0, x0)) -+ -+/* -+** qsub_x0_s64_untied: -+** mov (z[0-9]+\.d), x0 -+** sqsub z0\.d, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (qsub_x0_s64_untied, svint64_t, int64_t, -+ z0 = svqsub_n_s64 (z1, x0), -+ z0 = svqsub (z1, x0)) -+ -+/* -+** qsub_1_s64_tied1: -+** sqsub z0\.d, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_1_s64_tied1, svint64_t, -+ z0 = svqsub_n_s64 (z0, 1), -+ z0 = svqsub (z0, 1)) -+ -+/* -+** qsub_1_s64_untied: -+** movprfx z0, z1 -+** sqsub z0\.d, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_1_s64_untied, svint64_t, -+ z0 = svqsub_n_s64 (z1, 1), -+ z0 = svqsub (z1, 1)) -+ -+/* -+** qsub_127_s64: -+** sqsub z0\.d, z0\.d, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_127_s64, svint64_t, -+ z0 = svqsub_n_s64 (z0, 127), -+ z0 = svqsub (z0, 127)) -+ -+/* -+** qsub_128_s64: -+** sqsub z0\.d, z0\.d, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_128_s64, svint64_t, -+ z0 = svqsub_n_s64 (z0, 128), -+ z0 = svqsub (z0, 128)) -+ -+/* -+** qsub_255_s64: -+** sqsub z0\.d, z0\.d, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_255_s64, svint64_t, -+ z0 = svqsub_n_s64 (z0, 255), -+ z0 = svqsub (z0, 255)) -+ -+/* -+** qsub_m1_s64: -+** sqadd z0\.d, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_m1_s64, svint64_t, -+ z0 = svqsub_n_s64 (z0, -1), -+ z0 = svqsub (z0, -1)) -+ -+/* -+** qsub_m127_s64: -+** sqadd z0\.d, z0\.d, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_m127_s64, svint64_t, -+ z0 = svqsub_n_s64 (z0, -127), -+ z0 = svqsub (z0, -127)) -+ -+/* -+** qsub_m128_s64: -+** sqadd z0\.d, z0\.d, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_m128_s64, svint64_t, -+ z0 = svqsub_n_s64 (z0, -128), -+ z0 = svqsub (z0, -128)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qsub_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qsub_s8.c -new file mode 100644 -index 000000000..3e7e84c77 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qsub_s8.c -@@ -0,0 +1,123 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qsub_s8_tied1: -+** sqsub z0\.b, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_s8_tied1, svint8_t, -+ z0 = svqsub_s8 (z0, z1), -+ z0 = svqsub (z0, z1)) -+ -+/* -+** qsub_s8_tied2: -+** sqsub z0\.b, z1\.b, z0\.b -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_s8_tied2, svint8_t, -+ z0 = svqsub_s8 (z1, z0), -+ z0 = svqsub (z1, z0)) -+ -+/* -+** qsub_s8_untied: -+** sqsub z0\.b, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_s8_untied, svint8_t, -+ z0 = svqsub_s8 (z1, z2), -+ z0 = svqsub (z1, z2)) -+ -+/* -+** qsub_w0_s8_tied1: -+** mov (z[0-9]+\.b), w0 -+** sqsub z0\.b, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (qsub_w0_s8_tied1, svint8_t, int8_t, -+ z0 = svqsub_n_s8 (z0, x0), -+ z0 = svqsub (z0, x0)) -+ -+/* -+** qsub_w0_s8_untied: -+** mov (z[0-9]+\.b), w0 -+** sqsub z0\.b, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (qsub_w0_s8_untied, svint8_t, int8_t, -+ z0 = svqsub_n_s8 (z1, x0), -+ z0 = svqsub (z1, x0)) -+ -+/* -+** qsub_1_s8_tied1: -+** sqsub z0\.b, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_1_s8_tied1, svint8_t, -+ z0 = svqsub_n_s8 (z0, 1), -+ z0 = svqsub (z0, 1)) -+ -+/* -+** qsub_1_s8_untied: -+** movprfx z0, z1 -+** sqsub z0\.b, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_1_s8_untied, svint8_t, -+ z0 = svqsub_n_s8 (z1, 1), -+ z0 = svqsub (z1, 1)) -+ -+/* -+** qsub_127_s8: -+** sqsub z0\.b, z0\.b, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_127_s8, svint8_t, -+ z0 = svqsub_n_s8 (z0, 127), -+ z0 = svqsub (z0, 127)) -+ -+/* -+** qsub_128_s8: -+** sqadd z0\.b, z0\.b, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_128_s8, svint8_t, -+ z0 = svqsub_n_s8 (z0, 128), -+ z0 = svqsub (z0, 128)) -+ -+/* -+** qsub_255_s8: -+** sqadd z0\.b, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_255_s8, svint8_t, -+ z0 = svqsub_n_s8 (z0, 255), -+ z0 = svqsub (z0, 255)) -+ -+/* -+** qsub_m1_s8: -+** sqadd z0\.b, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_m1_s8, svint8_t, -+ z0 = svqsub_n_s8 (z0, -1), -+ z0 = svqsub (z0, -1)) -+ -+/* -+** qsub_m127_s8: -+** sqadd z0\.b, z0\.b, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_m127_s8, svint8_t, -+ z0 = svqsub_n_s8 (z0, -127), -+ z0 = svqsub (z0, -127)) -+ -+/* -+** qsub_m128_s8: -+** sqadd z0\.b, z0\.b, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_m128_s8, svint8_t, -+ z0 = svqsub_n_s8 (z0, -128), -+ z0 = svqsub (z0, -128)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qsub_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qsub_u16.c -new file mode 100644 -index 000000000..6d4d68e20 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qsub_u16.c -@@ -0,0 +1,126 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qsub_u16_tied1: -+** uqsub z0\.h, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_u16_tied1, svuint16_t, -+ z0 = svqsub_u16 (z0, z1), -+ z0 = svqsub (z0, z1)) -+ -+/* -+** qsub_u16_tied2: -+** uqsub z0\.h, z1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_u16_tied2, svuint16_t, -+ z0 = svqsub_u16 (z1, z0), -+ z0 = svqsub (z1, z0)) -+ -+/* -+** qsub_u16_untied: -+** uqsub z0\.h, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_u16_untied, svuint16_t, -+ z0 = svqsub_u16 (z1, z2), -+ z0 = svqsub (z1, z2)) -+ -+/* -+** qsub_w0_u16_tied1: -+** mov (z[0-9]+\.h), w0 -+** uqsub z0\.h, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (qsub_w0_u16_tied1, svuint16_t, uint16_t, -+ z0 = svqsub_n_u16 (z0, x0), -+ z0 = svqsub (z0, x0)) -+ -+/* -+** qsub_w0_u16_untied: -+** mov (z[0-9]+\.h), w0 -+** uqsub z0\.h, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (qsub_w0_u16_untied, svuint16_t, uint16_t, -+ z0 = svqsub_n_u16 (z1, x0), -+ z0 = svqsub (z1, x0)) -+ -+/* -+** qsub_1_u16_tied1: -+** uqsub z0\.h, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_1_u16_tied1, svuint16_t, -+ z0 = svqsub_n_u16 (z0, 1), -+ z0 = svqsub (z0, 1)) -+ -+/* -+** qsub_1_u16_untied: -+** movprfx z0, z1 -+** uqsub z0\.h, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_1_u16_untied, svuint16_t, -+ z0 = svqsub_n_u16 (z1, 1), -+ z0 = svqsub (z1, 1)) -+ -+/* -+** qsub_127_u16: -+** uqsub z0\.h, z0\.h, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_127_u16, svuint16_t, -+ z0 = svqsub_n_u16 (z0, 127), -+ z0 = svqsub (z0, 127)) -+ -+/* -+** qsub_128_u16: -+** uqsub z0\.h, z0\.h, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_128_u16, svuint16_t, -+ z0 = svqsub_n_u16 (z0, 128), -+ z0 = svqsub (z0, 128)) -+ -+/* -+** qsub_255_u16: -+** uqsub z0\.h, z0\.h, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_255_u16, svuint16_t, -+ z0 = svqsub_n_u16 (z0, 255), -+ z0 = svqsub (z0, 255)) -+ -+/* -+** qsub_m1_u16: -+** mov (z[0-9]+)\.b, #-1 -+** uqsub z0\.h, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_m1_u16, svuint16_t, -+ z0 = svqsub_n_u16 (z0, -1), -+ z0 = svqsub (z0, -1)) -+ -+/* -+** qsub_m127_u16: -+** mov (z[0-9]+\.h), #-127 -+** uqsub z0\.h, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_m127_u16, svuint16_t, -+ z0 = svqsub_n_u16 (z0, -127), -+ z0 = svqsub (z0, -127)) -+ -+/* -+** qsub_m128_u16: -+** mov (z[0-9]+\.h), #-128 -+** uqsub z0\.h, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_m128_u16, svuint16_t, -+ z0 = svqsub_n_u16 (z0, -128), -+ z0 = svqsub (z0, -128)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qsub_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qsub_u32.c -new file mode 100644 -index 000000000..9c93cfc45 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qsub_u32.c -@@ -0,0 +1,126 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qsub_u32_tied1: -+** uqsub z0\.s, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_u32_tied1, svuint32_t, -+ z0 = svqsub_u32 (z0, z1), -+ z0 = svqsub (z0, z1)) -+ -+/* -+** qsub_u32_tied2: -+** uqsub z0\.s, z1\.s, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_u32_tied2, svuint32_t, -+ z0 = svqsub_u32 (z1, z0), -+ z0 = svqsub (z1, z0)) -+ -+/* -+** qsub_u32_untied: -+** uqsub z0\.s, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_u32_untied, svuint32_t, -+ z0 = svqsub_u32 (z1, z2), -+ z0 = svqsub (z1, z2)) -+ -+/* -+** qsub_w0_u32_tied1: -+** mov (z[0-9]+\.s), w0 -+** uqsub z0\.s, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (qsub_w0_u32_tied1, svuint32_t, uint32_t, -+ z0 = svqsub_n_u32 (z0, x0), -+ z0 = svqsub (z0, x0)) -+ -+/* -+** qsub_w0_u32_untied: -+** mov (z[0-9]+\.s), w0 -+** uqsub z0\.s, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (qsub_w0_u32_untied, svuint32_t, uint32_t, -+ z0 = svqsub_n_u32 (z1, x0), -+ z0 = svqsub (z1, x0)) -+ -+/* -+** qsub_1_u32_tied1: -+** uqsub z0\.s, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_1_u32_tied1, svuint32_t, -+ z0 = svqsub_n_u32 (z0, 1), -+ z0 = svqsub (z0, 1)) -+ -+/* -+** qsub_1_u32_untied: -+** movprfx z0, z1 -+** uqsub z0\.s, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_1_u32_untied, svuint32_t, -+ z0 = svqsub_n_u32 (z1, 1), -+ z0 = svqsub (z1, 1)) -+ -+/* -+** qsub_127_u32: -+** uqsub z0\.s, z0\.s, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_127_u32, svuint32_t, -+ z0 = svqsub_n_u32 (z0, 127), -+ z0 = svqsub (z0, 127)) -+ -+/* -+** qsub_128_u32: -+** uqsub z0\.s, z0\.s, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_128_u32, svuint32_t, -+ z0 = svqsub_n_u32 (z0, 128), -+ z0 = svqsub (z0, 128)) -+ -+/* -+** qsub_255_u32: -+** uqsub z0\.s, z0\.s, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_255_u32, svuint32_t, -+ z0 = svqsub_n_u32 (z0, 255), -+ z0 = svqsub (z0, 255)) -+ -+/* -+** qsub_m1_u32: -+** mov (z[0-9]+)\.b, #-1 -+** uqsub z0\.s, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_m1_u32, svuint32_t, -+ z0 = svqsub_n_u32 (z0, -1), -+ z0 = svqsub (z0, -1)) -+ -+/* -+** qsub_m127_u32: -+** mov (z[0-9]+\.s), #-127 -+** uqsub z0\.s, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_m127_u32, svuint32_t, -+ z0 = svqsub_n_u32 (z0, -127), -+ z0 = svqsub (z0, -127)) -+ -+/* -+** qsub_m128_u32: -+** mov (z[0-9]+\.s), #-128 -+** uqsub z0\.s, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_m128_u32, svuint32_t, -+ z0 = svqsub_n_u32 (z0, -128), -+ z0 = svqsub (z0, -128)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qsub_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qsub_u64.c -new file mode 100644 -index 000000000..6109b5f29 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qsub_u64.c -@@ -0,0 +1,126 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qsub_u64_tied1: -+** uqsub z0\.d, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_u64_tied1, svuint64_t, -+ z0 = svqsub_u64 (z0, z1), -+ z0 = svqsub (z0, z1)) -+ -+/* -+** qsub_u64_tied2: -+** uqsub z0\.d, z1\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_u64_tied2, svuint64_t, -+ z0 = svqsub_u64 (z1, z0), -+ z0 = svqsub (z1, z0)) -+ -+/* -+** qsub_u64_untied: -+** uqsub z0\.d, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_u64_untied, svuint64_t, -+ z0 = svqsub_u64 (z1, z2), -+ z0 = svqsub (z1, z2)) -+ -+/* -+** qsub_x0_u64_tied1: -+** mov (z[0-9]+\.d), x0 -+** uqsub z0\.d, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (qsub_x0_u64_tied1, svuint64_t, uint64_t, -+ z0 = svqsub_n_u64 (z0, x0), -+ z0 = svqsub (z0, x0)) -+ -+/* -+** qsub_x0_u64_untied: -+** mov (z[0-9]+\.d), x0 -+** uqsub z0\.d, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (qsub_x0_u64_untied, svuint64_t, uint64_t, -+ z0 = svqsub_n_u64 (z1, x0), -+ z0 = svqsub (z1, x0)) -+ -+/* -+** qsub_1_u64_tied1: -+** uqsub z0\.d, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_1_u64_tied1, svuint64_t, -+ z0 = svqsub_n_u64 (z0, 1), -+ z0 = svqsub (z0, 1)) -+ -+/* -+** qsub_1_u64_untied: -+** movprfx z0, z1 -+** uqsub z0\.d, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_1_u64_untied, svuint64_t, -+ z0 = svqsub_n_u64 (z1, 1), -+ z0 = svqsub (z1, 1)) -+ -+/* -+** qsub_127_u64: -+** uqsub z0\.d, z0\.d, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_127_u64, svuint64_t, -+ z0 = svqsub_n_u64 (z0, 127), -+ z0 = svqsub (z0, 127)) -+ -+/* -+** qsub_128_u64: -+** uqsub z0\.d, z0\.d, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_128_u64, svuint64_t, -+ z0 = svqsub_n_u64 (z0, 128), -+ z0 = svqsub (z0, 128)) -+ -+/* -+** qsub_255_u64: -+** uqsub z0\.d, z0\.d, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_255_u64, svuint64_t, -+ z0 = svqsub_n_u64 (z0, 255), -+ z0 = svqsub (z0, 255)) -+ -+/* -+** qsub_m1_u64: -+** mov (z[0-9]+)\.b, #-1 -+** uqsub z0\.d, z0\.d, \1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_m1_u64, svuint64_t, -+ z0 = svqsub_n_u64 (z0, -1), -+ z0 = svqsub (z0, -1)) -+ -+/* -+** qsub_m127_u64: -+** mov (z[0-9]+\.d), #-127 -+** uqsub z0\.d, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_m127_u64, svuint64_t, -+ z0 = svqsub_n_u64 (z0, -127), -+ z0 = svqsub (z0, -127)) -+ -+/* -+** qsub_m128_u64: -+** mov (z[0-9]+\.d), #-128 -+** uqsub z0\.d, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_m128_u64, svuint64_t, -+ z0 = svqsub_n_u64 (z0, -128), -+ z0 = svqsub (z0, -128)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qsub_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qsub_u8.c -new file mode 100644 -index 000000000..40aa74e8d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/qsub_u8.c -@@ -0,0 +1,123 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** qsub_u8_tied1: -+** uqsub z0\.b, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_u8_tied1, svuint8_t, -+ z0 = svqsub_u8 (z0, z1), -+ z0 = svqsub (z0, z1)) -+ -+/* -+** qsub_u8_tied2: -+** uqsub z0\.b, z1\.b, z0\.b -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_u8_tied2, svuint8_t, -+ z0 = svqsub_u8 (z1, z0), -+ z0 = svqsub (z1, z0)) -+ -+/* -+** qsub_u8_untied: -+** uqsub z0\.b, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_u8_untied, svuint8_t, -+ z0 = svqsub_u8 (z1, z2), -+ z0 = svqsub (z1, z2)) -+ -+/* -+** qsub_w0_u8_tied1: -+** mov (z[0-9]+\.b), w0 -+** uqsub z0\.b, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (qsub_w0_u8_tied1, svuint8_t, uint8_t, -+ z0 = svqsub_n_u8 (z0, x0), -+ z0 = svqsub (z0, x0)) -+ -+/* -+** qsub_w0_u8_untied: -+** mov (z[0-9]+\.b), w0 -+** uqsub z0\.b, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (qsub_w0_u8_untied, svuint8_t, uint8_t, -+ z0 = svqsub_n_u8 (z1, x0), -+ z0 = svqsub (z1, x0)) -+ -+/* -+** qsub_1_u8_tied1: -+** uqsub z0\.b, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_1_u8_tied1, svuint8_t, -+ z0 = svqsub_n_u8 (z0, 1), -+ z0 = svqsub (z0, 1)) -+ -+/* -+** qsub_1_u8_untied: -+** movprfx z0, z1 -+** uqsub z0\.b, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_1_u8_untied, svuint8_t, -+ z0 = svqsub_n_u8 (z1, 1), -+ z0 = svqsub (z1, 1)) -+ -+/* -+** qsub_127_u8: -+** uqsub z0\.b, z0\.b, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_127_u8, svuint8_t, -+ z0 = svqsub_n_u8 (z0, 127), -+ z0 = svqsub (z0, 127)) -+ -+/* -+** qsub_128_u8: -+** uqsub z0\.b, z0\.b, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_128_u8, svuint8_t, -+ z0 = svqsub_n_u8 (z0, 128), -+ z0 = svqsub (z0, 128)) -+ -+/* -+** qsub_255_u8: -+** uqsub z0\.b, z0\.b, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_255_u8, svuint8_t, -+ z0 = svqsub_n_u8 (z0, 255), -+ z0 = svqsub (z0, 255)) -+ -+/* -+** qsub_m1_u8: -+** uqsub z0\.b, z0\.b, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_m1_u8, svuint8_t, -+ z0 = svqsub_n_u8 (z0, -1), -+ z0 = svqsub (z0, -1)) -+ -+/* -+** qsub_m127_u8: -+** uqsub z0\.b, z0\.b, #129 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_m127_u8, svuint8_t, -+ z0 = svqsub_n_u8 (z0, -127), -+ z0 = svqsub (z0, -127)) -+ -+/* -+** qsub_m128_u8: -+** uqsub z0\.b, z0\.b, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (qsub_m128_u8, svuint8_t, -+ z0 = svqsub_n_u8 (z0, -128), -+ z0 = svqsub (z0, -128)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_s16.c -new file mode 100644 -index 000000000..4f794f600 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_s16.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rbit_s16_m_tied12: -+** rbit z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_s16_m_tied12, svint16_t, -+ z0 = svrbit_s16_m (z0, p0, z0), -+ z0 = svrbit_m (z0, p0, z0)) -+ -+/* -+** rbit_s16_m_tied1: -+** rbit z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_s16_m_tied1, svint16_t, -+ z0 = svrbit_s16_m (z0, p0, z1), -+ z0 = svrbit_m (z0, p0, z1)) -+ -+/* -+** rbit_s16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** rbit z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_s16_m_tied2, svint16_t, -+ z0 = svrbit_s16_m (z1, p0, z0), -+ z0 = svrbit_m (z1, p0, z0)) -+ -+/* -+** rbit_s16_m_untied: -+** movprfx z0, z2 -+** rbit z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_s16_m_untied, svint16_t, -+ z0 = svrbit_s16_m (z2, p0, z1), -+ z0 = svrbit_m (z2, p0, z1)) -+ -+/* -+** rbit_s16_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.h, p0/z, \1\.h -+** rbit z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_s16_z_tied1, svint16_t, -+ z0 = svrbit_s16_z (p0, z0), -+ z0 = svrbit_z (p0, z0)) -+ -+/* -+** rbit_s16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** rbit z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_s16_z_untied, svint16_t, -+ z0 = svrbit_s16_z (p0, z1), -+ z0 = svrbit_z (p0, z1)) -+ -+/* -+** rbit_s16_x_tied1: -+** rbit z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_s16_x_tied1, svint16_t, -+ z0 = svrbit_s16_x (p0, z0), -+ z0 = svrbit_x (p0, z0)) -+ -+/* -+** rbit_s16_x_untied: -+** rbit z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_s16_x_untied, svint16_t, -+ z0 = svrbit_s16_x (p0, z1), -+ z0 = svrbit_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_s32.c -new file mode 100644 -index 000000000..8b5e1a463 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_s32.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rbit_s32_m_tied12: -+** rbit z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_s32_m_tied12, svint32_t, -+ z0 = svrbit_s32_m (z0, p0, z0), -+ z0 = svrbit_m (z0, p0, z0)) -+ -+/* -+** rbit_s32_m_tied1: -+** rbit z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_s32_m_tied1, svint32_t, -+ z0 = svrbit_s32_m (z0, p0, z1), -+ z0 = svrbit_m (z0, p0, z1)) -+ -+/* -+** rbit_s32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** rbit z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_s32_m_tied2, svint32_t, -+ z0 = svrbit_s32_m (z1, p0, z0), -+ z0 = svrbit_m (z1, p0, z0)) -+ -+/* -+** rbit_s32_m_untied: -+** movprfx z0, z2 -+** rbit z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_s32_m_untied, svint32_t, -+ z0 = svrbit_s32_m (z2, p0, z1), -+ z0 = svrbit_m (z2, p0, z1)) -+ -+/* -+** rbit_s32_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, \1\.s -+** rbit z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_s32_z_tied1, svint32_t, -+ z0 = svrbit_s32_z (p0, z0), -+ z0 = svrbit_z (p0, z0)) -+ -+/* -+** rbit_s32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** rbit z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_s32_z_untied, svint32_t, -+ z0 = svrbit_s32_z (p0, z1), -+ z0 = svrbit_z (p0, z1)) -+ -+/* -+** rbit_s32_x_tied1: -+** rbit z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_s32_x_tied1, svint32_t, -+ z0 = svrbit_s32_x (p0, z0), -+ z0 = svrbit_x (p0, z0)) -+ -+/* -+** rbit_s32_x_untied: -+** rbit z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_s32_x_untied, svint32_t, -+ z0 = svrbit_s32_x (p0, z1), -+ z0 = svrbit_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_s64.c -new file mode 100644 -index 000000000..cec27a421 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_s64.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rbit_s64_m_tied12: -+** rbit z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_s64_m_tied12, svint64_t, -+ z0 = svrbit_s64_m (z0, p0, z0), -+ z0 = svrbit_m (z0, p0, z0)) -+ -+/* -+** rbit_s64_m_tied1: -+** rbit z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_s64_m_tied1, svint64_t, -+ z0 = svrbit_s64_m (z0, p0, z1), -+ z0 = svrbit_m (z0, p0, z1)) -+ -+/* -+** rbit_s64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** rbit z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_s64_m_tied2, svint64_t, -+ z0 = svrbit_s64_m (z1, p0, z0), -+ z0 = svrbit_m (z1, p0, z0)) -+ -+/* -+** rbit_s64_m_untied: -+** movprfx z0, z2 -+** rbit z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_s64_m_untied, svint64_t, -+ z0 = svrbit_s64_m (z2, p0, z1), -+ z0 = svrbit_m (z2, p0, z1)) -+ -+/* -+** rbit_s64_z_tied1: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** rbit z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_s64_z_tied1, svint64_t, -+ z0 = svrbit_s64_z (p0, z0), -+ z0 = svrbit_z (p0, z0)) -+ -+/* -+** rbit_s64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** rbit z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_s64_z_untied, svint64_t, -+ z0 = svrbit_s64_z (p0, z1), -+ z0 = svrbit_z (p0, z1)) -+ -+/* -+** rbit_s64_x_tied1: -+** rbit z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_s64_x_tied1, svint64_t, -+ z0 = svrbit_s64_x (p0, z0), -+ z0 = svrbit_x (p0, z0)) -+ -+/* -+** rbit_s64_x_untied: -+** rbit z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_s64_x_untied, svint64_t, -+ z0 = svrbit_s64_x (p0, z1), -+ z0 = svrbit_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_s8.c -new file mode 100644 -index 000000000..9c152116a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_s8.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rbit_s8_m_tied12: -+** rbit z0\.b, p0/m, z0\.b -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_s8_m_tied12, svint8_t, -+ z0 = svrbit_s8_m (z0, p0, z0), -+ z0 = svrbit_m (z0, p0, z0)) -+ -+/* -+** rbit_s8_m_tied1: -+** rbit z0\.b, p0/m, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_s8_m_tied1, svint8_t, -+ z0 = svrbit_s8_m (z0, p0, z1), -+ z0 = svrbit_m (z0, p0, z1)) -+ -+/* -+** rbit_s8_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** rbit z0\.b, p0/m, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_s8_m_tied2, svint8_t, -+ z0 = svrbit_s8_m (z1, p0, z0), -+ z0 = svrbit_m (z1, p0, z0)) -+ -+/* -+** rbit_s8_m_untied: -+** movprfx z0, z2 -+** rbit z0\.b, p0/m, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_s8_m_untied, svint8_t, -+ z0 = svrbit_s8_m (z2, p0, z1), -+ z0 = svrbit_m (z2, p0, z1)) -+ -+/* -+** rbit_s8_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.b, p0/z, \1\.b -+** rbit z0\.b, p0/m, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_s8_z_tied1, svint8_t, -+ z0 = svrbit_s8_z (p0, z0), -+ z0 = svrbit_z (p0, z0)) -+ -+/* -+** rbit_s8_z_untied: -+** movprfx z0\.b, p0/z, z1\.b -+** rbit z0\.b, p0/m, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_s8_z_untied, svint8_t, -+ z0 = svrbit_s8_z (p0, z1), -+ z0 = svrbit_z (p0, z1)) -+ -+/* -+** rbit_s8_x_tied1: -+** rbit z0\.b, p0/m, z0\.b -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_s8_x_tied1, svint8_t, -+ z0 = svrbit_s8_x (p0, z0), -+ z0 = svrbit_x (p0, z0)) -+ -+/* -+** rbit_s8_x_untied: -+** rbit z0\.b, p0/m, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_s8_x_untied, svint8_t, -+ z0 = svrbit_s8_x (p0, z1), -+ z0 = svrbit_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_u16.c -new file mode 100644 -index 000000000..001ef2bf0 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_u16.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rbit_u16_m_tied12: -+** rbit z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_u16_m_tied12, svuint16_t, -+ z0 = svrbit_u16_m (z0, p0, z0), -+ z0 = svrbit_m (z0, p0, z0)) -+ -+/* -+** rbit_u16_m_tied1: -+** rbit z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_u16_m_tied1, svuint16_t, -+ z0 = svrbit_u16_m (z0, p0, z1), -+ z0 = svrbit_m (z0, p0, z1)) -+ -+/* -+** rbit_u16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** rbit z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_u16_m_tied2, svuint16_t, -+ z0 = svrbit_u16_m (z1, p0, z0), -+ z0 = svrbit_m (z1, p0, z0)) -+ -+/* -+** rbit_u16_m_untied: -+** movprfx z0, z2 -+** rbit z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_u16_m_untied, svuint16_t, -+ z0 = svrbit_u16_m (z2, p0, z1), -+ z0 = svrbit_m (z2, p0, z1)) -+ -+/* -+** rbit_u16_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.h, p0/z, \1\.h -+** rbit z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_u16_z_tied1, svuint16_t, -+ z0 = svrbit_u16_z (p0, z0), -+ z0 = svrbit_z (p0, z0)) -+ -+/* -+** rbit_u16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** rbit z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_u16_z_untied, svuint16_t, -+ z0 = svrbit_u16_z (p0, z1), -+ z0 = svrbit_z (p0, z1)) -+ -+/* -+** rbit_u16_x_tied1: -+** rbit z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_u16_x_tied1, svuint16_t, -+ z0 = svrbit_u16_x (p0, z0), -+ z0 = svrbit_x (p0, z0)) -+ -+/* -+** rbit_u16_x_untied: -+** rbit z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_u16_x_untied, svuint16_t, -+ z0 = svrbit_u16_x (p0, z1), -+ z0 = svrbit_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_u32.c -new file mode 100644 -index 000000000..4d91e954d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_u32.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rbit_u32_m_tied12: -+** rbit z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_u32_m_tied12, svuint32_t, -+ z0 = svrbit_u32_m (z0, p0, z0), -+ z0 = svrbit_m (z0, p0, z0)) -+ -+/* -+** rbit_u32_m_tied1: -+** rbit z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_u32_m_tied1, svuint32_t, -+ z0 = svrbit_u32_m (z0, p0, z1), -+ z0 = svrbit_m (z0, p0, z1)) -+ -+/* -+** rbit_u32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** rbit z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_u32_m_tied2, svuint32_t, -+ z0 = svrbit_u32_m (z1, p0, z0), -+ z0 = svrbit_m (z1, p0, z0)) -+ -+/* -+** rbit_u32_m_untied: -+** movprfx z0, z2 -+** rbit z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_u32_m_untied, svuint32_t, -+ z0 = svrbit_u32_m (z2, p0, z1), -+ z0 = svrbit_m (z2, p0, z1)) -+ -+/* -+** rbit_u32_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, \1\.s -+** rbit z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_u32_z_tied1, svuint32_t, -+ z0 = svrbit_u32_z (p0, z0), -+ z0 = svrbit_z (p0, z0)) -+ -+/* -+** rbit_u32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** rbit z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_u32_z_untied, svuint32_t, -+ z0 = svrbit_u32_z (p0, z1), -+ z0 = svrbit_z (p0, z1)) -+ -+/* -+** rbit_u32_x_tied1: -+** rbit z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_u32_x_tied1, svuint32_t, -+ z0 = svrbit_u32_x (p0, z0), -+ z0 = svrbit_x (p0, z0)) -+ -+/* -+** rbit_u32_x_untied: -+** rbit z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_u32_x_untied, svuint32_t, -+ z0 = svrbit_u32_x (p0, z1), -+ z0 = svrbit_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_u64.c -new file mode 100644 -index 000000000..77f88d116 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_u64.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rbit_u64_m_tied12: -+** rbit z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_u64_m_tied12, svuint64_t, -+ z0 = svrbit_u64_m (z0, p0, z0), -+ z0 = svrbit_m (z0, p0, z0)) -+ -+/* -+** rbit_u64_m_tied1: -+** rbit z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_u64_m_tied1, svuint64_t, -+ z0 = svrbit_u64_m (z0, p0, z1), -+ z0 = svrbit_m (z0, p0, z1)) -+ -+/* -+** rbit_u64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** rbit z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_u64_m_tied2, svuint64_t, -+ z0 = svrbit_u64_m (z1, p0, z0), -+ z0 = svrbit_m (z1, p0, z0)) -+ -+/* -+** rbit_u64_m_untied: -+** movprfx z0, z2 -+** rbit z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_u64_m_untied, svuint64_t, -+ z0 = svrbit_u64_m (z2, p0, z1), -+ z0 = svrbit_m (z2, p0, z1)) -+ -+/* -+** rbit_u64_z_tied1: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** rbit z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_u64_z_tied1, svuint64_t, -+ z0 = svrbit_u64_z (p0, z0), -+ z0 = svrbit_z (p0, z0)) -+ -+/* -+** rbit_u64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** rbit z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_u64_z_untied, svuint64_t, -+ z0 = svrbit_u64_z (p0, z1), -+ z0 = svrbit_z (p0, z1)) -+ -+/* -+** rbit_u64_x_tied1: -+** rbit z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_u64_x_tied1, svuint64_t, -+ z0 = svrbit_u64_x (p0, z0), -+ z0 = svrbit_x (p0, z0)) -+ -+/* -+** rbit_u64_x_untied: -+** rbit z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_u64_x_untied, svuint64_t, -+ z0 = svrbit_u64_x (p0, z1), -+ z0 = svrbit_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_u8.c -new file mode 100644 -index 000000000..fa347e4c7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rbit_u8.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rbit_u8_m_tied12: -+** rbit z0\.b, p0/m, z0\.b -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_u8_m_tied12, svuint8_t, -+ z0 = svrbit_u8_m (z0, p0, z0), -+ z0 = svrbit_m (z0, p0, z0)) -+ -+/* -+** rbit_u8_m_tied1: -+** rbit z0\.b, p0/m, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_u8_m_tied1, svuint8_t, -+ z0 = svrbit_u8_m (z0, p0, z1), -+ z0 = svrbit_m (z0, p0, z1)) -+ -+/* -+** rbit_u8_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** rbit z0\.b, p0/m, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_u8_m_tied2, svuint8_t, -+ z0 = svrbit_u8_m (z1, p0, z0), -+ z0 = svrbit_m (z1, p0, z0)) -+ -+/* -+** rbit_u8_m_untied: -+** movprfx z0, z2 -+** rbit z0\.b, p0/m, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_u8_m_untied, svuint8_t, -+ z0 = svrbit_u8_m (z2, p0, z1), -+ z0 = svrbit_m (z2, p0, z1)) -+ -+/* -+** rbit_u8_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.b, p0/z, \1\.b -+** rbit z0\.b, p0/m, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_u8_z_tied1, svuint8_t, -+ z0 = svrbit_u8_z (p0, z0), -+ z0 = svrbit_z (p0, z0)) -+ -+/* -+** rbit_u8_z_untied: -+** movprfx z0\.b, p0/z, z1\.b -+** rbit z0\.b, p0/m, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_u8_z_untied, svuint8_t, -+ z0 = svrbit_u8_z (p0, z1), -+ z0 = svrbit_z (p0, z1)) -+ -+/* -+** rbit_u8_x_tied1: -+** rbit z0\.b, p0/m, z0\.b -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_u8_x_tied1, svuint8_t, -+ z0 = svrbit_u8_x (p0, z0), -+ z0 = svrbit_x (p0, z0)) -+ -+/* -+** rbit_u8_x_untied: -+** rbit z0\.b, p0/m, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (rbit_u8_x_untied, svuint8_t, -+ z0 = svrbit_u8_x (p0, z1), -+ z0 = svrbit_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rdffr_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rdffr_1.c -new file mode 100644 -index 000000000..5564e967f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rdffr_1.c -@@ -0,0 +1,59 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** setffr_rdffr_1: -+** ptrue p0\.b, all -+** ret -+*/ -+TEST_UNIFORM_P_SINGLE (setffr_rdffr_1, -+ svsetffr (); -+ p0 = svrdffr ()); -+ -+/* -+** setffr_rdffr_2: -+** ret -+*/ -+TEST_UNIFORM_P_SINGLE (setffr_rdffr_2, -+ svsetffr (); -+ svrdffr ()); -+ -+/* -+** setffr_rdffr_3: -+** ptrue p0\.b, all -+** ret -+*/ -+TEST_UNIFORM_P_SINGLE (setffr_rdffr_3, -+ svsetffr (); -+ svsetffr (); -+ svrdffr (); -+ p0 = svrdffr ()); -+ -+/* -+** wrffr_rdffr_1: -+** mov p0\.b, p1\.b -+** ret -+*/ -+TEST_UNIFORM_P_SINGLE (wrffr_rdffr_1, -+ svwrffr (p1); -+ p0 = svrdffr ()); -+ -+/* -+** wrffr_rdffr_2: -+** ret -+*/ -+TEST_UNIFORM_P_SINGLE (wrffr_rdffr_2, -+ svwrffr (p1); -+ svrdffr ()); -+ -+/* -+** wrffr_rdffr_3: -+** mov p0\.b, p2\.b -+** ret -+*/ -+TEST_UNIFORM_P_SINGLE (wrffr_rdffr_3, -+ svwrffr (p1); -+ svwrffr (p2); -+ svrdffr (); -+ p0 = svrdffr ()); -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recpe_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recpe_f16.c -new file mode 100644 -index 000000000..d0cd8281a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recpe_f16.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** recpe_f16_tied1: -+** frecpe z0\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (recpe_f16_tied1, svfloat16_t, -+ z0 = svrecpe_f16 (z0), -+ z0 = svrecpe (z0)) -+ -+/* -+** recpe_f16_untied: -+** frecpe z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (recpe_f16_untied, svfloat16_t, -+ z0 = svrecpe_f16 (z1), -+ z0 = svrecpe (z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recpe_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recpe_f32.c -new file mode 100644 -index 000000000..013ed8c43 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recpe_f32.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** recpe_f32_tied1: -+** frecpe z0\.s, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (recpe_f32_tied1, svfloat32_t, -+ z0 = svrecpe_f32 (z0), -+ z0 = svrecpe (z0)) -+ -+/* -+** recpe_f32_untied: -+** frecpe z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (recpe_f32_untied, svfloat32_t, -+ z0 = svrecpe_f32 (z1), -+ z0 = svrecpe (z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recpe_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recpe_f64.c -new file mode 100644 -index 000000000..40b3df292 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recpe_f64.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** recpe_f64_tied1: -+** frecpe z0\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (recpe_f64_tied1, svfloat64_t, -+ z0 = svrecpe_f64 (z0), -+ z0 = svrecpe (z0)) -+ -+/* -+** recpe_f64_untied: -+** frecpe z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (recpe_f64_untied, svfloat64_t, -+ z0 = svrecpe_f64 (z1), -+ z0 = svrecpe (z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recps_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recps_f16.c -new file mode 100644 -index 000000000..e35c5c545 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recps_f16.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** recps_f16_tied1: -+** frecps z0\.h, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (recps_f16_tied1, svfloat16_t, -+ z0 = svrecps_f16 (z0, z1), -+ z0 = svrecps (z0, z1)) -+ -+/* -+** recps_f16_tied2: -+** frecps z0\.h, z1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (recps_f16_tied2, svfloat16_t, -+ z0 = svrecps_f16 (z1, z0), -+ z0 = svrecps (z1, z0)) -+ -+/* -+** recps_f16_untied: -+** frecps z0\.h, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (recps_f16_untied, svfloat16_t, -+ z0 = svrecps_f16 (z1, z2), -+ z0 = svrecps (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recps_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recps_f32.c -new file mode 100644 -index 000000000..3f3aa203e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recps_f32.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** recps_f32_tied1: -+** frecps z0\.s, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (recps_f32_tied1, svfloat32_t, -+ z0 = svrecps_f32 (z0, z1), -+ z0 = svrecps (z0, z1)) -+ -+/* -+** recps_f32_tied2: -+** frecps z0\.s, z1\.s, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (recps_f32_tied2, svfloat32_t, -+ z0 = svrecps_f32 (z1, z0), -+ z0 = svrecps (z1, z0)) -+ -+/* -+** recps_f32_untied: -+** frecps z0\.s, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (recps_f32_untied, svfloat32_t, -+ z0 = svrecps_f32 (z1, z2), -+ z0 = svrecps (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recps_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recps_f64.c -new file mode 100644 -index 000000000..eca421d5e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recps_f64.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** recps_f64_tied1: -+** frecps z0\.d, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (recps_f64_tied1, svfloat64_t, -+ z0 = svrecps_f64 (z0, z1), -+ z0 = svrecps (z0, z1)) -+ -+/* -+** recps_f64_tied2: -+** frecps z0\.d, z1\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (recps_f64_tied2, svfloat64_t, -+ z0 = svrecps_f64 (z1, z0), -+ z0 = svrecps (z1, z0)) -+ -+/* -+** recps_f64_untied: -+** frecps z0\.d, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (recps_f64_untied, svfloat64_t, -+ z0 = svrecps_f64 (z1, z2), -+ z0 = svrecps (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recpx_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recpx_f16.c -new file mode 100644 -index 000000000..2dd7ada2c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recpx_f16.c -@@ -0,0 +1,103 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** recpx_f16_m_tied12: -+** frecpx z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (recpx_f16_m_tied12, svfloat16_t, -+ z0 = svrecpx_f16_m (z0, p0, z0), -+ z0 = svrecpx_m (z0, p0, z0)) -+ -+/* -+** recpx_f16_m_tied1: -+** frecpx z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (recpx_f16_m_tied1, svfloat16_t, -+ z0 = svrecpx_f16_m (z0, p0, z1), -+ z0 = svrecpx_m (z0, p0, z1)) -+ -+/* -+** recpx_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** frecpx z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (recpx_f16_m_tied2, svfloat16_t, -+ z0 = svrecpx_f16_m (z1, p0, z0), -+ z0 = svrecpx_m (z1, p0, z0)) -+ -+/* -+** recpx_f16_m_untied: -+** movprfx z0, z2 -+** frecpx z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (recpx_f16_m_untied, svfloat16_t, -+ z0 = svrecpx_f16_m (z2, p0, z1), -+ z0 = svrecpx_m (z2, p0, z1)) -+ -+/* -+** recpx_f16_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.h, p0/z, \1\.h -+** frecpx z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (recpx_f16_z_tied1, svfloat16_t, -+ z0 = svrecpx_f16_z (p0, z0), -+ z0 = svrecpx_z (p0, z0)) -+ -+/* -+** recpx_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** frecpx z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (recpx_f16_z_untied, svfloat16_t, -+ z0 = svrecpx_f16_z (p0, z1), -+ z0 = svrecpx_z (p0, z1)) -+ -+/* -+** recpx_f16_x_tied1: -+** frecpx z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (recpx_f16_x_tied1, svfloat16_t, -+ z0 = svrecpx_f16_x (p0, z0), -+ z0 = svrecpx_x (p0, z0)) -+ -+/* -+** recpx_f16_x_untied: -+** frecpx z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (recpx_f16_x_untied, svfloat16_t, -+ z0 = svrecpx_f16_x (p0, z1), -+ z0 = svrecpx_x (p0, z1)) -+ -+/* -+** ptrue_recpx_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_recpx_f16_x_tied1, svfloat16_t, -+ z0 = svrecpx_f16_x (svptrue_b16 (), z0), -+ z0 = svrecpx_x (svptrue_b16 (), z0)) -+ -+/* -+** ptrue_recpx_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_recpx_f16_x_untied, svfloat16_t, -+ z0 = svrecpx_f16_x (svptrue_b16 (), z1), -+ z0 = svrecpx_x (svptrue_b16 (), z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recpx_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recpx_f32.c -new file mode 100644 -index 000000000..6364fb83b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recpx_f32.c -@@ -0,0 +1,103 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** recpx_f32_m_tied12: -+** frecpx z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (recpx_f32_m_tied12, svfloat32_t, -+ z0 = svrecpx_f32_m (z0, p0, z0), -+ z0 = svrecpx_m (z0, p0, z0)) -+ -+/* -+** recpx_f32_m_tied1: -+** frecpx z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (recpx_f32_m_tied1, svfloat32_t, -+ z0 = svrecpx_f32_m (z0, p0, z1), -+ z0 = svrecpx_m (z0, p0, z1)) -+ -+/* -+** recpx_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** frecpx z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (recpx_f32_m_tied2, svfloat32_t, -+ z0 = svrecpx_f32_m (z1, p0, z0), -+ z0 = svrecpx_m (z1, p0, z0)) -+ -+/* -+** recpx_f32_m_untied: -+** movprfx z0, z2 -+** frecpx z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (recpx_f32_m_untied, svfloat32_t, -+ z0 = svrecpx_f32_m (z2, p0, z1), -+ z0 = svrecpx_m (z2, p0, z1)) -+ -+/* -+** recpx_f32_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, \1\.s -+** frecpx z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (recpx_f32_z_tied1, svfloat32_t, -+ z0 = svrecpx_f32_z (p0, z0), -+ z0 = svrecpx_z (p0, z0)) -+ -+/* -+** recpx_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** frecpx z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (recpx_f32_z_untied, svfloat32_t, -+ z0 = svrecpx_f32_z (p0, z1), -+ z0 = svrecpx_z (p0, z1)) -+ -+/* -+** recpx_f32_x_tied1: -+** frecpx z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (recpx_f32_x_tied1, svfloat32_t, -+ z0 = svrecpx_f32_x (p0, z0), -+ z0 = svrecpx_x (p0, z0)) -+ -+/* -+** recpx_f32_x_untied: -+** frecpx z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (recpx_f32_x_untied, svfloat32_t, -+ z0 = svrecpx_f32_x (p0, z1), -+ z0 = svrecpx_x (p0, z1)) -+ -+/* -+** ptrue_recpx_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_recpx_f32_x_tied1, svfloat32_t, -+ z0 = svrecpx_f32_x (svptrue_b32 (), z0), -+ z0 = svrecpx_x (svptrue_b32 (), z0)) -+ -+/* -+** ptrue_recpx_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_recpx_f32_x_untied, svfloat32_t, -+ z0 = svrecpx_f32_x (svptrue_b32 (), z1), -+ z0 = svrecpx_x (svptrue_b32 (), z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recpx_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recpx_f64.c -new file mode 100644 -index 000000000..ca5232331 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/recpx_f64.c -@@ -0,0 +1,103 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** recpx_f64_m_tied12: -+** frecpx z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (recpx_f64_m_tied12, svfloat64_t, -+ z0 = svrecpx_f64_m (z0, p0, z0), -+ z0 = svrecpx_m (z0, p0, z0)) -+ -+/* -+** recpx_f64_m_tied1: -+** frecpx z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (recpx_f64_m_tied1, svfloat64_t, -+ z0 = svrecpx_f64_m (z0, p0, z1), -+ z0 = svrecpx_m (z0, p0, z1)) -+ -+/* -+** recpx_f64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** frecpx z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (recpx_f64_m_tied2, svfloat64_t, -+ z0 = svrecpx_f64_m (z1, p0, z0), -+ z0 = svrecpx_m (z1, p0, z0)) -+ -+/* -+** recpx_f64_m_untied: -+** movprfx z0, z2 -+** frecpx z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (recpx_f64_m_untied, svfloat64_t, -+ z0 = svrecpx_f64_m (z2, p0, z1), -+ z0 = svrecpx_m (z2, p0, z1)) -+ -+/* -+** recpx_f64_z_tied1: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** frecpx z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (recpx_f64_z_tied1, svfloat64_t, -+ z0 = svrecpx_f64_z (p0, z0), -+ z0 = svrecpx_z (p0, z0)) -+ -+/* -+** recpx_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** frecpx z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (recpx_f64_z_untied, svfloat64_t, -+ z0 = svrecpx_f64_z (p0, z1), -+ z0 = svrecpx_z (p0, z1)) -+ -+/* -+** recpx_f64_x_tied1: -+** frecpx z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (recpx_f64_x_tied1, svfloat64_t, -+ z0 = svrecpx_f64_x (p0, z0), -+ z0 = svrecpx_x (p0, z0)) -+ -+/* -+** recpx_f64_x_untied: -+** frecpx z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (recpx_f64_x_untied, svfloat64_t, -+ z0 = svrecpx_f64_x (p0, z1), -+ z0 = svrecpx_x (p0, z1)) -+ -+/* -+** ptrue_recpx_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_recpx_f64_x_tied1, svfloat64_t, -+ z0 = svrecpx_f64_x (svptrue_b64 (), z0), -+ z0 = svrecpx_x (svptrue_b64 (), z0)) -+ -+/* -+** ptrue_recpx_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_recpx_f64_x_untied, svfloat64_t, -+ z0 = svrecpx_f64_x (svptrue_b64 (), z1), -+ z0 = svrecpx_x (svptrue_b64 (), z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_bf16.c -new file mode 100644 -index 000000000..2d2c2a714 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_bf16.c -@@ -0,0 +1,207 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** reinterpret_bf16_bf16_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_bf16_bf16_tied1, svbfloat16_t, svbfloat16_t, -+ z0_res = svreinterpret_bf16_bf16 (z0), -+ z0_res = svreinterpret_bf16 (z0)) -+ -+/* -+** reinterpret_bf16_bf16_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_bf16_bf16_untied, svbfloat16_t, svbfloat16_t, -+ z0 = svreinterpret_bf16_bf16 (z4), -+ z0 = svreinterpret_bf16 (z4)) -+ -+/* -+** reinterpret_bf16_f16_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_bf16_f16_tied1, svbfloat16_t, svfloat16_t, -+ z0_res = svreinterpret_bf16_f16 (z0), -+ z0_res = svreinterpret_bf16 (z0)) -+ -+/* -+** reinterpret_bf16_f16_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_bf16_f16_untied, svbfloat16_t, svfloat16_t, -+ z0 = svreinterpret_bf16_f16 (z4), -+ z0 = svreinterpret_bf16 (z4)) -+ -+/* -+** reinterpret_bf16_f32_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_bf16_f32_tied1, svbfloat16_t, svfloat32_t, -+ z0_res = svreinterpret_bf16_f32 (z0), -+ z0_res = svreinterpret_bf16 (z0)) -+ -+/* -+** reinterpret_bf16_f32_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_bf16_f32_untied, svbfloat16_t, svfloat32_t, -+ z0 = svreinterpret_bf16_f32 (z4), -+ z0 = svreinterpret_bf16 (z4)) -+ -+/* -+** reinterpret_bf16_f64_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_bf16_f64_tied1, svbfloat16_t, svfloat64_t, -+ z0_res = svreinterpret_bf16_f64 (z0), -+ z0_res = svreinterpret_bf16 (z0)) -+ -+/* -+** reinterpret_bf16_f64_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_bf16_f64_untied, svbfloat16_t, svfloat64_t, -+ z0 = svreinterpret_bf16_f64 (z4), -+ z0 = svreinterpret_bf16 (z4)) -+ -+/* -+** reinterpret_bf16_s8_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_bf16_s8_tied1, svbfloat16_t, svint8_t, -+ z0_res = svreinterpret_bf16_s8 (z0), -+ z0_res = svreinterpret_bf16 (z0)) -+ -+/* -+** reinterpret_bf16_s8_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_bf16_s8_untied, svbfloat16_t, svint8_t, -+ z0 = svreinterpret_bf16_s8 (z4), -+ z0 = svreinterpret_bf16 (z4)) -+ -+/* -+** reinterpret_bf16_s16_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_bf16_s16_tied1, svbfloat16_t, svint16_t, -+ z0_res = svreinterpret_bf16_s16 (z0), -+ z0_res = svreinterpret_bf16 (z0)) -+ -+/* -+** reinterpret_bf16_s16_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_bf16_s16_untied, svbfloat16_t, svint16_t, -+ z0 = svreinterpret_bf16_s16 (z4), -+ z0 = svreinterpret_bf16 (z4)) -+ -+/* -+** reinterpret_bf16_s32_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_bf16_s32_tied1, svbfloat16_t, svint32_t, -+ z0_res = svreinterpret_bf16_s32 (z0), -+ z0_res = svreinterpret_bf16 (z0)) -+ -+/* -+** reinterpret_bf16_s32_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_bf16_s32_untied, svbfloat16_t, svint32_t, -+ z0 = svreinterpret_bf16_s32 (z4), -+ z0 = svreinterpret_bf16 (z4)) -+ -+/* -+** reinterpret_bf16_s64_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_bf16_s64_tied1, svbfloat16_t, svint64_t, -+ z0_res = svreinterpret_bf16_s64 (z0), -+ z0_res = svreinterpret_bf16 (z0)) -+ -+/* -+** reinterpret_bf16_s64_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_bf16_s64_untied, svbfloat16_t, svint64_t, -+ z0 = svreinterpret_bf16_s64 (z4), -+ z0 = svreinterpret_bf16 (z4)) -+ -+/* -+** reinterpret_bf16_u8_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_bf16_u8_tied1, svbfloat16_t, svuint8_t, -+ z0_res = svreinterpret_bf16_u8 (z0), -+ z0_res = svreinterpret_bf16 (z0)) -+ -+/* -+** reinterpret_bf16_u8_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_bf16_u8_untied, svbfloat16_t, svuint8_t, -+ z0 = svreinterpret_bf16_u8 (z4), -+ z0 = svreinterpret_bf16 (z4)) -+ -+/* -+** reinterpret_bf16_u16_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_bf16_u16_tied1, svbfloat16_t, svuint16_t, -+ z0_res = svreinterpret_bf16_u16 (z0), -+ z0_res = svreinterpret_bf16 (z0)) -+ -+/* -+** reinterpret_bf16_u16_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_bf16_u16_untied, svbfloat16_t, svuint16_t, -+ z0 = svreinterpret_bf16_u16 (z4), -+ z0 = svreinterpret_bf16 (z4)) -+ -+/* -+** reinterpret_bf16_u32_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_bf16_u32_tied1, svbfloat16_t, svuint32_t, -+ z0_res = svreinterpret_bf16_u32 (z0), -+ z0_res = svreinterpret_bf16 (z0)) -+ -+/* -+** reinterpret_bf16_u32_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_bf16_u32_untied, svbfloat16_t, svuint32_t, -+ z0 = svreinterpret_bf16_u32 (z4), -+ z0 = svreinterpret_bf16 (z4)) -+ -+/* -+** reinterpret_bf16_u64_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_bf16_u64_tied1, svbfloat16_t, svuint64_t, -+ z0_res = svreinterpret_bf16_u64 (z0), -+ z0_res = svreinterpret_bf16 (z0)) -+ -+/* -+** reinterpret_bf16_u64_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_bf16_u64_untied, svbfloat16_t, svuint64_t, -+ z0 = svreinterpret_bf16_u64 (z4), -+ z0 = svreinterpret_bf16 (z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_f16.c -new file mode 100644 -index 000000000..60705e628 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_f16.c -@@ -0,0 +1,207 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** reinterpret_f16_bf16_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_f16_bf16_tied1, svfloat16_t, svbfloat16_t, -+ z0_res = svreinterpret_f16_bf16 (z0), -+ z0_res = svreinterpret_f16 (z0)) -+ -+/* -+** reinterpret_f16_bf16_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_f16_bf16_untied, svfloat16_t, svbfloat16_t, -+ z0 = svreinterpret_f16_bf16 (z4), -+ z0 = svreinterpret_f16 (z4)) -+ -+/* -+** reinterpret_f16_f16_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_f16_f16_tied1, svfloat16_t, svfloat16_t, -+ z0_res = svreinterpret_f16_f16 (z0), -+ z0_res = svreinterpret_f16 (z0)) -+ -+/* -+** reinterpret_f16_f16_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_f16_f16_untied, svfloat16_t, svfloat16_t, -+ z0 = svreinterpret_f16_f16 (z4), -+ z0 = svreinterpret_f16 (z4)) -+ -+/* -+** reinterpret_f16_f32_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_f16_f32_tied1, svfloat16_t, svfloat32_t, -+ z0_res = svreinterpret_f16_f32 (z0), -+ z0_res = svreinterpret_f16 (z0)) -+ -+/* -+** reinterpret_f16_f32_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_f16_f32_untied, svfloat16_t, svfloat32_t, -+ z0 = svreinterpret_f16_f32 (z4), -+ z0 = svreinterpret_f16 (z4)) -+ -+/* -+** reinterpret_f16_f64_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_f16_f64_tied1, svfloat16_t, svfloat64_t, -+ z0_res = svreinterpret_f16_f64 (z0), -+ z0_res = svreinterpret_f16 (z0)) -+ -+/* -+** reinterpret_f16_f64_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_f16_f64_untied, svfloat16_t, svfloat64_t, -+ z0 = svreinterpret_f16_f64 (z4), -+ z0 = svreinterpret_f16 (z4)) -+ -+/* -+** reinterpret_f16_s8_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_f16_s8_tied1, svfloat16_t, svint8_t, -+ z0_res = svreinterpret_f16_s8 (z0), -+ z0_res = svreinterpret_f16 (z0)) -+ -+/* -+** reinterpret_f16_s8_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_f16_s8_untied, svfloat16_t, svint8_t, -+ z0 = svreinterpret_f16_s8 (z4), -+ z0 = svreinterpret_f16 (z4)) -+ -+/* -+** reinterpret_f16_s16_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_f16_s16_tied1, svfloat16_t, svint16_t, -+ z0_res = svreinterpret_f16_s16 (z0), -+ z0_res = svreinterpret_f16 (z0)) -+ -+/* -+** reinterpret_f16_s16_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_f16_s16_untied, svfloat16_t, svint16_t, -+ z0 = svreinterpret_f16_s16 (z4), -+ z0 = svreinterpret_f16 (z4)) -+ -+/* -+** reinterpret_f16_s32_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_f16_s32_tied1, svfloat16_t, svint32_t, -+ z0_res = svreinterpret_f16_s32 (z0), -+ z0_res = svreinterpret_f16 (z0)) -+ -+/* -+** reinterpret_f16_s32_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_f16_s32_untied, svfloat16_t, svint32_t, -+ z0 = svreinterpret_f16_s32 (z4), -+ z0 = svreinterpret_f16 (z4)) -+ -+/* -+** reinterpret_f16_s64_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_f16_s64_tied1, svfloat16_t, svint64_t, -+ z0_res = svreinterpret_f16_s64 (z0), -+ z0_res = svreinterpret_f16 (z0)) -+ -+/* -+** reinterpret_f16_s64_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_f16_s64_untied, svfloat16_t, svint64_t, -+ z0 = svreinterpret_f16_s64 (z4), -+ z0 = svreinterpret_f16 (z4)) -+ -+/* -+** reinterpret_f16_u8_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_f16_u8_tied1, svfloat16_t, svuint8_t, -+ z0_res = svreinterpret_f16_u8 (z0), -+ z0_res = svreinterpret_f16 (z0)) -+ -+/* -+** reinterpret_f16_u8_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_f16_u8_untied, svfloat16_t, svuint8_t, -+ z0 = svreinterpret_f16_u8 (z4), -+ z0 = svreinterpret_f16 (z4)) -+ -+/* -+** reinterpret_f16_u16_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_f16_u16_tied1, svfloat16_t, svuint16_t, -+ z0_res = svreinterpret_f16_u16 (z0), -+ z0_res = svreinterpret_f16 (z0)) -+ -+/* -+** reinterpret_f16_u16_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_f16_u16_untied, svfloat16_t, svuint16_t, -+ z0 = svreinterpret_f16_u16 (z4), -+ z0 = svreinterpret_f16 (z4)) -+ -+/* -+** reinterpret_f16_u32_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_f16_u32_tied1, svfloat16_t, svuint32_t, -+ z0_res = svreinterpret_f16_u32 (z0), -+ z0_res = svreinterpret_f16 (z0)) -+ -+/* -+** reinterpret_f16_u32_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_f16_u32_untied, svfloat16_t, svuint32_t, -+ z0 = svreinterpret_f16_u32 (z4), -+ z0 = svreinterpret_f16 (z4)) -+ -+/* -+** reinterpret_f16_u64_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_f16_u64_tied1, svfloat16_t, svuint64_t, -+ z0_res = svreinterpret_f16_u64 (z0), -+ z0_res = svreinterpret_f16 (z0)) -+ -+/* -+** reinterpret_f16_u64_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_f16_u64_untied, svfloat16_t, svuint64_t, -+ z0 = svreinterpret_f16_u64 (z4), -+ z0 = svreinterpret_f16 (z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_f32.c -new file mode 100644 -index 000000000..06fc46f25 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_f32.c -@@ -0,0 +1,207 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** reinterpret_f32_bf16_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_f32_bf16_tied1, svfloat32_t, svbfloat16_t, -+ z0_res = svreinterpret_f32_bf16 (z0), -+ z0_res = svreinterpret_f32 (z0)) -+ -+/* -+** reinterpret_f32_bf16_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_f32_bf16_untied, svfloat32_t, svbfloat16_t, -+ z0 = svreinterpret_f32_bf16 (z4), -+ z0 = svreinterpret_f32 (z4)) -+ -+/* -+** reinterpret_f32_f16_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_f32_f16_tied1, svfloat32_t, svfloat16_t, -+ z0_res = svreinterpret_f32_f16 (z0), -+ z0_res = svreinterpret_f32 (z0)) -+ -+/* -+** reinterpret_f32_f16_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_f32_f16_untied, svfloat32_t, svfloat16_t, -+ z0 = svreinterpret_f32_f16 (z4), -+ z0 = svreinterpret_f32 (z4)) -+ -+/* -+** reinterpret_f32_f32_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_f32_f32_tied1, svfloat32_t, svfloat32_t, -+ z0_res = svreinterpret_f32_f32 (z0), -+ z0_res = svreinterpret_f32 (z0)) -+ -+/* -+** reinterpret_f32_f32_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_f32_f32_untied, svfloat32_t, svfloat32_t, -+ z0 = svreinterpret_f32_f32 (z4), -+ z0 = svreinterpret_f32 (z4)) -+ -+/* -+** reinterpret_f32_f64_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_f32_f64_tied1, svfloat32_t, svfloat64_t, -+ z0_res = svreinterpret_f32_f64 (z0), -+ z0_res = svreinterpret_f32 (z0)) -+ -+/* -+** reinterpret_f32_f64_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_f32_f64_untied, svfloat32_t, svfloat64_t, -+ z0 = svreinterpret_f32_f64 (z4), -+ z0 = svreinterpret_f32 (z4)) -+ -+/* -+** reinterpret_f32_s8_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_f32_s8_tied1, svfloat32_t, svint8_t, -+ z0_res = svreinterpret_f32_s8 (z0), -+ z0_res = svreinterpret_f32 (z0)) -+ -+/* -+** reinterpret_f32_s8_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_f32_s8_untied, svfloat32_t, svint8_t, -+ z0 = svreinterpret_f32_s8 (z4), -+ z0 = svreinterpret_f32 (z4)) -+ -+/* -+** reinterpret_f32_s16_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_f32_s16_tied1, svfloat32_t, svint16_t, -+ z0_res = svreinterpret_f32_s16 (z0), -+ z0_res = svreinterpret_f32 (z0)) -+ -+/* -+** reinterpret_f32_s16_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_f32_s16_untied, svfloat32_t, svint16_t, -+ z0 = svreinterpret_f32_s16 (z4), -+ z0 = svreinterpret_f32 (z4)) -+ -+/* -+** reinterpret_f32_s32_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_f32_s32_tied1, svfloat32_t, svint32_t, -+ z0_res = svreinterpret_f32_s32 (z0), -+ z0_res = svreinterpret_f32 (z0)) -+ -+/* -+** reinterpret_f32_s32_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_f32_s32_untied, svfloat32_t, svint32_t, -+ z0 = svreinterpret_f32_s32 (z4), -+ z0 = svreinterpret_f32 (z4)) -+ -+/* -+** reinterpret_f32_s64_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_f32_s64_tied1, svfloat32_t, svint64_t, -+ z0_res = svreinterpret_f32_s64 (z0), -+ z0_res = svreinterpret_f32 (z0)) -+ -+/* -+** reinterpret_f32_s64_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_f32_s64_untied, svfloat32_t, svint64_t, -+ z0 = svreinterpret_f32_s64 (z4), -+ z0 = svreinterpret_f32 (z4)) -+ -+/* -+** reinterpret_f32_u8_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_f32_u8_tied1, svfloat32_t, svuint8_t, -+ z0_res = svreinterpret_f32_u8 (z0), -+ z0_res = svreinterpret_f32 (z0)) -+ -+/* -+** reinterpret_f32_u8_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_f32_u8_untied, svfloat32_t, svuint8_t, -+ z0 = svreinterpret_f32_u8 (z4), -+ z0 = svreinterpret_f32 (z4)) -+ -+/* -+** reinterpret_f32_u16_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_f32_u16_tied1, svfloat32_t, svuint16_t, -+ z0_res = svreinterpret_f32_u16 (z0), -+ z0_res = svreinterpret_f32 (z0)) -+ -+/* -+** reinterpret_f32_u16_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_f32_u16_untied, svfloat32_t, svuint16_t, -+ z0 = svreinterpret_f32_u16 (z4), -+ z0 = svreinterpret_f32 (z4)) -+ -+/* -+** reinterpret_f32_u32_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_f32_u32_tied1, svfloat32_t, svuint32_t, -+ z0_res = svreinterpret_f32_u32 (z0), -+ z0_res = svreinterpret_f32 (z0)) -+ -+/* -+** reinterpret_f32_u32_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_f32_u32_untied, svfloat32_t, svuint32_t, -+ z0 = svreinterpret_f32_u32 (z4), -+ z0 = svreinterpret_f32 (z4)) -+ -+/* -+** reinterpret_f32_u64_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_f32_u64_tied1, svfloat32_t, svuint64_t, -+ z0_res = svreinterpret_f32_u64 (z0), -+ z0_res = svreinterpret_f32 (z0)) -+ -+/* -+** reinterpret_f32_u64_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_f32_u64_untied, svfloat32_t, svuint64_t, -+ z0 = svreinterpret_f32_u64 (z4), -+ z0 = svreinterpret_f32 (z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_f64.c -new file mode 100644 -index 000000000..003ee3fe2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_f64.c -@@ -0,0 +1,207 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** reinterpret_f64_bf16_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_f64_bf16_tied1, svfloat64_t, svbfloat16_t, -+ z0_res = svreinterpret_f64_bf16 (z0), -+ z0_res = svreinterpret_f64 (z0)) -+ -+/* -+** reinterpret_f64_bf16_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_f64_bf16_untied, svfloat64_t, svbfloat16_t, -+ z0 = svreinterpret_f64_bf16 (z4), -+ z0 = svreinterpret_f64 (z4)) -+ -+/* -+** reinterpret_f64_f16_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_f64_f16_tied1, svfloat64_t, svfloat16_t, -+ z0_res = svreinterpret_f64_f16 (z0), -+ z0_res = svreinterpret_f64 (z0)) -+ -+/* -+** reinterpret_f64_f16_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_f64_f16_untied, svfloat64_t, svfloat16_t, -+ z0 = svreinterpret_f64_f16 (z4), -+ z0 = svreinterpret_f64 (z4)) -+ -+/* -+** reinterpret_f64_f32_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_f64_f32_tied1, svfloat64_t, svfloat32_t, -+ z0_res = svreinterpret_f64_f32 (z0), -+ z0_res = svreinterpret_f64 (z0)) -+ -+/* -+** reinterpret_f64_f32_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_f64_f32_untied, svfloat64_t, svfloat32_t, -+ z0 = svreinterpret_f64_f32 (z4), -+ z0 = svreinterpret_f64 (z4)) -+ -+/* -+** reinterpret_f64_f64_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_f64_f64_tied1, svfloat64_t, svfloat64_t, -+ z0_res = svreinterpret_f64_f64 (z0), -+ z0_res = svreinterpret_f64 (z0)) -+ -+/* -+** reinterpret_f64_f64_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_f64_f64_untied, svfloat64_t, svfloat64_t, -+ z0 = svreinterpret_f64_f64 (z4), -+ z0 = svreinterpret_f64 (z4)) -+ -+/* -+** reinterpret_f64_s8_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_f64_s8_tied1, svfloat64_t, svint8_t, -+ z0_res = svreinterpret_f64_s8 (z0), -+ z0_res = svreinterpret_f64 (z0)) -+ -+/* -+** reinterpret_f64_s8_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_f64_s8_untied, svfloat64_t, svint8_t, -+ z0 = svreinterpret_f64_s8 (z4), -+ z0 = svreinterpret_f64 (z4)) -+ -+/* -+** reinterpret_f64_s16_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_f64_s16_tied1, svfloat64_t, svint16_t, -+ z0_res = svreinterpret_f64_s16 (z0), -+ z0_res = svreinterpret_f64 (z0)) -+ -+/* -+** reinterpret_f64_s16_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_f64_s16_untied, svfloat64_t, svint16_t, -+ z0 = svreinterpret_f64_s16 (z4), -+ z0 = svreinterpret_f64 (z4)) -+ -+/* -+** reinterpret_f64_s32_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_f64_s32_tied1, svfloat64_t, svint32_t, -+ z0_res = svreinterpret_f64_s32 (z0), -+ z0_res = svreinterpret_f64 (z0)) -+ -+/* -+** reinterpret_f64_s32_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_f64_s32_untied, svfloat64_t, svint32_t, -+ z0 = svreinterpret_f64_s32 (z4), -+ z0 = svreinterpret_f64 (z4)) -+ -+/* -+** reinterpret_f64_s64_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_f64_s64_tied1, svfloat64_t, svint64_t, -+ z0_res = svreinterpret_f64_s64 (z0), -+ z0_res = svreinterpret_f64 (z0)) -+ -+/* -+** reinterpret_f64_s64_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_f64_s64_untied, svfloat64_t, svint64_t, -+ z0 = svreinterpret_f64_s64 (z4), -+ z0 = svreinterpret_f64 (z4)) -+ -+/* -+** reinterpret_f64_u8_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_f64_u8_tied1, svfloat64_t, svuint8_t, -+ z0_res = svreinterpret_f64_u8 (z0), -+ z0_res = svreinterpret_f64 (z0)) -+ -+/* -+** reinterpret_f64_u8_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_f64_u8_untied, svfloat64_t, svuint8_t, -+ z0 = svreinterpret_f64_u8 (z4), -+ z0 = svreinterpret_f64 (z4)) -+ -+/* -+** reinterpret_f64_u16_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_f64_u16_tied1, svfloat64_t, svuint16_t, -+ z0_res = svreinterpret_f64_u16 (z0), -+ z0_res = svreinterpret_f64 (z0)) -+ -+/* -+** reinterpret_f64_u16_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_f64_u16_untied, svfloat64_t, svuint16_t, -+ z0 = svreinterpret_f64_u16 (z4), -+ z0 = svreinterpret_f64 (z4)) -+ -+/* -+** reinterpret_f64_u32_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_f64_u32_tied1, svfloat64_t, svuint32_t, -+ z0_res = svreinterpret_f64_u32 (z0), -+ z0_res = svreinterpret_f64 (z0)) -+ -+/* -+** reinterpret_f64_u32_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_f64_u32_untied, svfloat64_t, svuint32_t, -+ z0 = svreinterpret_f64_u32 (z4), -+ z0 = svreinterpret_f64 (z4)) -+ -+/* -+** reinterpret_f64_u64_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_f64_u64_tied1, svfloat64_t, svuint64_t, -+ z0_res = svreinterpret_f64_u64 (z0), -+ z0_res = svreinterpret_f64 (z0)) -+ -+/* -+** reinterpret_f64_u64_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_f64_u64_untied, svfloat64_t, svuint64_t, -+ z0 = svreinterpret_f64_u64 (z4), -+ z0 = svreinterpret_f64 (z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s16.c -new file mode 100644 -index 000000000..d62817c2c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s16.c -@@ -0,0 +1,207 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** reinterpret_s16_bf16_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s16_bf16_tied1, svint16_t, svbfloat16_t, -+ z0_res = svreinterpret_s16_bf16 (z0), -+ z0_res = svreinterpret_s16 (z0)) -+ -+/* -+** reinterpret_s16_bf16_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_s16_bf16_untied, svint16_t, svbfloat16_t, -+ z0 = svreinterpret_s16_bf16 (z4), -+ z0 = svreinterpret_s16 (z4)) -+ -+/* -+** reinterpret_s16_f16_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s16_f16_tied1, svint16_t, svfloat16_t, -+ z0_res = svreinterpret_s16_f16 (z0), -+ z0_res = svreinterpret_s16 (z0)) -+ -+/* -+** reinterpret_s16_f16_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_s16_f16_untied, svint16_t, svfloat16_t, -+ z0 = svreinterpret_s16_f16 (z4), -+ z0 = svreinterpret_s16 (z4)) -+ -+/* -+** reinterpret_s16_f32_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s16_f32_tied1, svint16_t, svfloat32_t, -+ z0_res = svreinterpret_s16_f32 (z0), -+ z0_res = svreinterpret_s16 (z0)) -+ -+/* -+** reinterpret_s16_f32_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_s16_f32_untied, svint16_t, svfloat32_t, -+ z0 = svreinterpret_s16_f32 (z4), -+ z0 = svreinterpret_s16 (z4)) -+ -+/* -+** reinterpret_s16_f64_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s16_f64_tied1, svint16_t, svfloat64_t, -+ z0_res = svreinterpret_s16_f64 (z0), -+ z0_res = svreinterpret_s16 (z0)) -+ -+/* -+** reinterpret_s16_f64_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_s16_f64_untied, svint16_t, svfloat64_t, -+ z0 = svreinterpret_s16_f64 (z4), -+ z0 = svreinterpret_s16 (z4)) -+ -+/* -+** reinterpret_s16_s8_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s16_s8_tied1, svint16_t, svint8_t, -+ z0_res = svreinterpret_s16_s8 (z0), -+ z0_res = svreinterpret_s16 (z0)) -+ -+/* -+** reinterpret_s16_s8_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_s16_s8_untied, svint16_t, svint8_t, -+ z0 = svreinterpret_s16_s8 (z4), -+ z0 = svreinterpret_s16 (z4)) -+ -+/* -+** reinterpret_s16_s16_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s16_s16_tied1, svint16_t, svint16_t, -+ z0_res = svreinterpret_s16_s16 (z0), -+ z0_res = svreinterpret_s16 (z0)) -+ -+/* -+** reinterpret_s16_s16_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_s16_s16_untied, svint16_t, svint16_t, -+ z0 = svreinterpret_s16_s16 (z4), -+ z0 = svreinterpret_s16 (z4)) -+ -+/* -+** reinterpret_s16_s32_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s16_s32_tied1, svint16_t, svint32_t, -+ z0_res = svreinterpret_s16_s32 (z0), -+ z0_res = svreinterpret_s16 (z0)) -+ -+/* -+** reinterpret_s16_s32_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_s16_s32_untied, svint16_t, svint32_t, -+ z0 = svreinterpret_s16_s32 (z4), -+ z0 = svreinterpret_s16 (z4)) -+ -+/* -+** reinterpret_s16_s64_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s16_s64_tied1, svint16_t, svint64_t, -+ z0_res = svreinterpret_s16_s64 (z0), -+ z0_res = svreinterpret_s16 (z0)) -+ -+/* -+** reinterpret_s16_s64_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_s16_s64_untied, svint16_t, svint64_t, -+ z0 = svreinterpret_s16_s64 (z4), -+ z0 = svreinterpret_s16 (z4)) -+ -+/* -+** reinterpret_s16_u8_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s16_u8_tied1, svint16_t, svuint8_t, -+ z0_res = svreinterpret_s16_u8 (z0), -+ z0_res = svreinterpret_s16 (z0)) -+ -+/* -+** reinterpret_s16_u8_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_s16_u8_untied, svint16_t, svuint8_t, -+ z0 = svreinterpret_s16_u8 (z4), -+ z0 = svreinterpret_s16 (z4)) -+ -+/* -+** reinterpret_s16_u16_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s16_u16_tied1, svint16_t, svuint16_t, -+ z0_res = svreinterpret_s16_u16 (z0), -+ z0_res = svreinterpret_s16 (z0)) -+ -+/* -+** reinterpret_s16_u16_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_s16_u16_untied, svint16_t, svuint16_t, -+ z0 = svreinterpret_s16_u16 (z4), -+ z0 = svreinterpret_s16 (z4)) -+ -+/* -+** reinterpret_s16_u32_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s16_u32_tied1, svint16_t, svuint32_t, -+ z0_res = svreinterpret_s16_u32 (z0), -+ z0_res = svreinterpret_s16 (z0)) -+ -+/* -+** reinterpret_s16_u32_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_s16_u32_untied, svint16_t, svuint32_t, -+ z0 = svreinterpret_s16_u32 (z4), -+ z0 = svreinterpret_s16 (z4)) -+ -+/* -+** reinterpret_s16_u64_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s16_u64_tied1, svint16_t, svuint64_t, -+ z0_res = svreinterpret_s16_u64 (z0), -+ z0_res = svreinterpret_s16 (z0)) -+ -+/* -+** reinterpret_s16_u64_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_s16_u64_untied, svint16_t, svuint64_t, -+ z0 = svreinterpret_s16_u64 (z4), -+ z0 = svreinterpret_s16 (z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s32.c -new file mode 100644 -index 000000000..e1068f244 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s32.c -@@ -0,0 +1,207 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** reinterpret_s32_bf16_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s32_bf16_tied1, svint32_t, svbfloat16_t, -+ z0_res = svreinterpret_s32_bf16 (z0), -+ z0_res = svreinterpret_s32 (z0)) -+ -+/* -+** reinterpret_s32_bf16_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_s32_bf16_untied, svint32_t, svbfloat16_t, -+ z0 = svreinterpret_s32_bf16 (z4), -+ z0 = svreinterpret_s32 (z4)) -+ -+/* -+** reinterpret_s32_f16_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s32_f16_tied1, svint32_t, svfloat16_t, -+ z0_res = svreinterpret_s32_f16 (z0), -+ z0_res = svreinterpret_s32 (z0)) -+ -+/* -+** reinterpret_s32_f16_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_s32_f16_untied, svint32_t, svfloat16_t, -+ z0 = svreinterpret_s32_f16 (z4), -+ z0 = svreinterpret_s32 (z4)) -+ -+/* -+** reinterpret_s32_f32_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s32_f32_tied1, svint32_t, svfloat32_t, -+ z0_res = svreinterpret_s32_f32 (z0), -+ z0_res = svreinterpret_s32 (z0)) -+ -+/* -+** reinterpret_s32_f32_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_s32_f32_untied, svint32_t, svfloat32_t, -+ z0 = svreinterpret_s32_f32 (z4), -+ z0 = svreinterpret_s32 (z4)) -+ -+/* -+** reinterpret_s32_f64_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s32_f64_tied1, svint32_t, svfloat64_t, -+ z0_res = svreinterpret_s32_f64 (z0), -+ z0_res = svreinterpret_s32 (z0)) -+ -+/* -+** reinterpret_s32_f64_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_s32_f64_untied, svint32_t, svfloat64_t, -+ z0 = svreinterpret_s32_f64 (z4), -+ z0 = svreinterpret_s32 (z4)) -+ -+/* -+** reinterpret_s32_s8_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s32_s8_tied1, svint32_t, svint8_t, -+ z0_res = svreinterpret_s32_s8 (z0), -+ z0_res = svreinterpret_s32 (z0)) -+ -+/* -+** reinterpret_s32_s8_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_s32_s8_untied, svint32_t, svint8_t, -+ z0 = svreinterpret_s32_s8 (z4), -+ z0 = svreinterpret_s32 (z4)) -+ -+/* -+** reinterpret_s32_s16_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s32_s16_tied1, svint32_t, svint16_t, -+ z0_res = svreinterpret_s32_s16 (z0), -+ z0_res = svreinterpret_s32 (z0)) -+ -+/* -+** reinterpret_s32_s16_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_s32_s16_untied, svint32_t, svint16_t, -+ z0 = svreinterpret_s32_s16 (z4), -+ z0 = svreinterpret_s32 (z4)) -+ -+/* -+** reinterpret_s32_s32_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s32_s32_tied1, svint32_t, svint32_t, -+ z0_res = svreinterpret_s32_s32 (z0), -+ z0_res = svreinterpret_s32 (z0)) -+ -+/* -+** reinterpret_s32_s32_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_s32_s32_untied, svint32_t, svint32_t, -+ z0 = svreinterpret_s32_s32 (z4), -+ z0 = svreinterpret_s32 (z4)) -+ -+/* -+** reinterpret_s32_s64_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s32_s64_tied1, svint32_t, svint64_t, -+ z0_res = svreinterpret_s32_s64 (z0), -+ z0_res = svreinterpret_s32 (z0)) -+ -+/* -+** reinterpret_s32_s64_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_s32_s64_untied, svint32_t, svint64_t, -+ z0 = svreinterpret_s32_s64 (z4), -+ z0 = svreinterpret_s32 (z4)) -+ -+/* -+** reinterpret_s32_u8_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s32_u8_tied1, svint32_t, svuint8_t, -+ z0_res = svreinterpret_s32_u8 (z0), -+ z0_res = svreinterpret_s32 (z0)) -+ -+/* -+** reinterpret_s32_u8_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_s32_u8_untied, svint32_t, svuint8_t, -+ z0 = svreinterpret_s32_u8 (z4), -+ z0 = svreinterpret_s32 (z4)) -+ -+/* -+** reinterpret_s32_u16_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s32_u16_tied1, svint32_t, svuint16_t, -+ z0_res = svreinterpret_s32_u16 (z0), -+ z0_res = svreinterpret_s32 (z0)) -+ -+/* -+** reinterpret_s32_u16_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_s32_u16_untied, svint32_t, svuint16_t, -+ z0 = svreinterpret_s32_u16 (z4), -+ z0 = svreinterpret_s32 (z4)) -+ -+/* -+** reinterpret_s32_u32_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s32_u32_tied1, svint32_t, svuint32_t, -+ z0_res = svreinterpret_s32_u32 (z0), -+ z0_res = svreinterpret_s32 (z0)) -+ -+/* -+** reinterpret_s32_u32_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_s32_u32_untied, svint32_t, svuint32_t, -+ z0 = svreinterpret_s32_u32 (z4), -+ z0 = svreinterpret_s32 (z4)) -+ -+/* -+** reinterpret_s32_u64_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s32_u64_tied1, svint32_t, svuint64_t, -+ z0_res = svreinterpret_s32_u64 (z0), -+ z0_res = svreinterpret_s32 (z0)) -+ -+/* -+** reinterpret_s32_u64_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_s32_u64_untied, svint32_t, svuint64_t, -+ z0 = svreinterpret_s32_u64 (z4), -+ z0 = svreinterpret_s32 (z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s64.c -new file mode 100644 -index 000000000..cada7533c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s64.c -@@ -0,0 +1,207 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** reinterpret_s64_bf16_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s64_bf16_tied1, svint64_t, svbfloat16_t, -+ z0_res = svreinterpret_s64_bf16 (z0), -+ z0_res = svreinterpret_s64 (z0)) -+ -+/* -+** reinterpret_s64_bf16_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_s64_bf16_untied, svint64_t, svbfloat16_t, -+ z0 = svreinterpret_s64_bf16 (z4), -+ z0 = svreinterpret_s64 (z4)) -+ -+/* -+** reinterpret_s64_f16_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s64_f16_tied1, svint64_t, svfloat16_t, -+ z0_res = svreinterpret_s64_f16 (z0), -+ z0_res = svreinterpret_s64 (z0)) -+ -+/* -+** reinterpret_s64_f16_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_s64_f16_untied, svint64_t, svfloat16_t, -+ z0 = svreinterpret_s64_f16 (z4), -+ z0 = svreinterpret_s64 (z4)) -+ -+/* -+** reinterpret_s64_f32_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s64_f32_tied1, svint64_t, svfloat32_t, -+ z0_res = svreinterpret_s64_f32 (z0), -+ z0_res = svreinterpret_s64 (z0)) -+ -+/* -+** reinterpret_s64_f32_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_s64_f32_untied, svint64_t, svfloat32_t, -+ z0 = svreinterpret_s64_f32 (z4), -+ z0 = svreinterpret_s64 (z4)) -+ -+/* -+** reinterpret_s64_f64_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s64_f64_tied1, svint64_t, svfloat64_t, -+ z0_res = svreinterpret_s64_f64 (z0), -+ z0_res = svreinterpret_s64 (z0)) -+ -+/* -+** reinterpret_s64_f64_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_s64_f64_untied, svint64_t, svfloat64_t, -+ z0 = svreinterpret_s64_f64 (z4), -+ z0 = svreinterpret_s64 (z4)) -+ -+/* -+** reinterpret_s64_s8_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s64_s8_tied1, svint64_t, svint8_t, -+ z0_res = svreinterpret_s64_s8 (z0), -+ z0_res = svreinterpret_s64 (z0)) -+ -+/* -+** reinterpret_s64_s8_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_s64_s8_untied, svint64_t, svint8_t, -+ z0 = svreinterpret_s64_s8 (z4), -+ z0 = svreinterpret_s64 (z4)) -+ -+/* -+** reinterpret_s64_s16_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s64_s16_tied1, svint64_t, svint16_t, -+ z0_res = svreinterpret_s64_s16 (z0), -+ z0_res = svreinterpret_s64 (z0)) -+ -+/* -+** reinterpret_s64_s16_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_s64_s16_untied, svint64_t, svint16_t, -+ z0 = svreinterpret_s64_s16 (z4), -+ z0 = svreinterpret_s64 (z4)) -+ -+/* -+** reinterpret_s64_s32_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s64_s32_tied1, svint64_t, svint32_t, -+ z0_res = svreinterpret_s64_s32 (z0), -+ z0_res = svreinterpret_s64 (z0)) -+ -+/* -+** reinterpret_s64_s32_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_s64_s32_untied, svint64_t, svint32_t, -+ z0 = svreinterpret_s64_s32 (z4), -+ z0 = svreinterpret_s64 (z4)) -+ -+/* -+** reinterpret_s64_s64_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s64_s64_tied1, svint64_t, svint64_t, -+ z0_res = svreinterpret_s64_s64 (z0), -+ z0_res = svreinterpret_s64 (z0)) -+ -+/* -+** reinterpret_s64_s64_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_s64_s64_untied, svint64_t, svint64_t, -+ z0 = svreinterpret_s64_s64 (z4), -+ z0 = svreinterpret_s64 (z4)) -+ -+/* -+** reinterpret_s64_u8_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s64_u8_tied1, svint64_t, svuint8_t, -+ z0_res = svreinterpret_s64_u8 (z0), -+ z0_res = svreinterpret_s64 (z0)) -+ -+/* -+** reinterpret_s64_u8_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_s64_u8_untied, svint64_t, svuint8_t, -+ z0 = svreinterpret_s64_u8 (z4), -+ z0 = svreinterpret_s64 (z4)) -+ -+/* -+** reinterpret_s64_u16_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s64_u16_tied1, svint64_t, svuint16_t, -+ z0_res = svreinterpret_s64_u16 (z0), -+ z0_res = svreinterpret_s64 (z0)) -+ -+/* -+** reinterpret_s64_u16_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_s64_u16_untied, svint64_t, svuint16_t, -+ z0 = svreinterpret_s64_u16 (z4), -+ z0 = svreinterpret_s64 (z4)) -+ -+/* -+** reinterpret_s64_u32_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s64_u32_tied1, svint64_t, svuint32_t, -+ z0_res = svreinterpret_s64_u32 (z0), -+ z0_res = svreinterpret_s64 (z0)) -+ -+/* -+** reinterpret_s64_u32_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_s64_u32_untied, svint64_t, svuint32_t, -+ z0 = svreinterpret_s64_u32 (z4), -+ z0 = svreinterpret_s64 (z4)) -+ -+/* -+** reinterpret_s64_u64_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s64_u64_tied1, svint64_t, svuint64_t, -+ z0_res = svreinterpret_s64_u64 (z0), -+ z0_res = svreinterpret_s64 (z0)) -+ -+/* -+** reinterpret_s64_u64_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_s64_u64_untied, svint64_t, svuint64_t, -+ z0 = svreinterpret_s64_u64 (z4), -+ z0 = svreinterpret_s64 (z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s8.c -new file mode 100644 -index 000000000..23a40d0ba ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_s8.c -@@ -0,0 +1,207 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** reinterpret_s8_bf16_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s8_bf16_tied1, svint8_t, svbfloat16_t, -+ z0_res = svreinterpret_s8_bf16 (z0), -+ z0_res = svreinterpret_s8 (z0)) -+ -+/* -+** reinterpret_s8_bf16_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_s8_bf16_untied, svint8_t, svbfloat16_t, -+ z0 = svreinterpret_s8_bf16 (z4), -+ z0 = svreinterpret_s8 (z4)) -+ -+/* -+** reinterpret_s8_f16_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s8_f16_tied1, svint8_t, svfloat16_t, -+ z0_res = svreinterpret_s8_f16 (z0), -+ z0_res = svreinterpret_s8 (z0)) -+ -+/* -+** reinterpret_s8_f16_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_s8_f16_untied, svint8_t, svfloat16_t, -+ z0 = svreinterpret_s8_f16 (z4), -+ z0 = svreinterpret_s8 (z4)) -+ -+/* -+** reinterpret_s8_f32_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s8_f32_tied1, svint8_t, svfloat32_t, -+ z0_res = svreinterpret_s8_f32 (z0), -+ z0_res = svreinterpret_s8 (z0)) -+ -+/* -+** reinterpret_s8_f32_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_s8_f32_untied, svint8_t, svfloat32_t, -+ z0 = svreinterpret_s8_f32 (z4), -+ z0 = svreinterpret_s8 (z4)) -+ -+/* -+** reinterpret_s8_f64_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s8_f64_tied1, svint8_t, svfloat64_t, -+ z0_res = svreinterpret_s8_f64 (z0), -+ z0_res = svreinterpret_s8 (z0)) -+ -+/* -+** reinterpret_s8_f64_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_s8_f64_untied, svint8_t, svfloat64_t, -+ z0 = svreinterpret_s8_f64 (z4), -+ z0 = svreinterpret_s8 (z4)) -+ -+/* -+** reinterpret_s8_s8_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s8_s8_tied1, svint8_t, svint8_t, -+ z0_res = svreinterpret_s8_s8 (z0), -+ z0_res = svreinterpret_s8 (z0)) -+ -+/* -+** reinterpret_s8_s8_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_s8_s8_untied, svint8_t, svint8_t, -+ z0 = svreinterpret_s8_s8 (z4), -+ z0 = svreinterpret_s8 (z4)) -+ -+/* -+** reinterpret_s8_s16_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s8_s16_tied1, svint8_t, svint16_t, -+ z0_res = svreinterpret_s8_s16 (z0), -+ z0_res = svreinterpret_s8 (z0)) -+ -+/* -+** reinterpret_s8_s16_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_s8_s16_untied, svint8_t, svint16_t, -+ z0 = svreinterpret_s8_s16 (z4), -+ z0 = svreinterpret_s8 (z4)) -+ -+/* -+** reinterpret_s8_s32_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s8_s32_tied1, svint8_t, svint32_t, -+ z0_res = svreinterpret_s8_s32 (z0), -+ z0_res = svreinterpret_s8 (z0)) -+ -+/* -+** reinterpret_s8_s32_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_s8_s32_untied, svint8_t, svint32_t, -+ z0 = svreinterpret_s8_s32 (z4), -+ z0 = svreinterpret_s8 (z4)) -+ -+/* -+** reinterpret_s8_s64_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s8_s64_tied1, svint8_t, svint64_t, -+ z0_res = svreinterpret_s8_s64 (z0), -+ z0_res = svreinterpret_s8 (z0)) -+ -+/* -+** reinterpret_s8_s64_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_s8_s64_untied, svint8_t, svint64_t, -+ z0 = svreinterpret_s8_s64 (z4), -+ z0 = svreinterpret_s8 (z4)) -+ -+/* -+** reinterpret_s8_u8_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s8_u8_tied1, svint8_t, svuint8_t, -+ z0_res = svreinterpret_s8_u8 (z0), -+ z0_res = svreinterpret_s8 (z0)) -+ -+/* -+** reinterpret_s8_u8_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_s8_u8_untied, svint8_t, svuint8_t, -+ z0 = svreinterpret_s8_u8 (z4), -+ z0 = svreinterpret_s8 (z4)) -+ -+/* -+** reinterpret_s8_u16_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s8_u16_tied1, svint8_t, svuint16_t, -+ z0_res = svreinterpret_s8_u16 (z0), -+ z0_res = svreinterpret_s8 (z0)) -+ -+/* -+** reinterpret_s8_u16_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_s8_u16_untied, svint8_t, svuint16_t, -+ z0 = svreinterpret_s8_u16 (z4), -+ z0 = svreinterpret_s8 (z4)) -+ -+/* -+** reinterpret_s8_u32_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s8_u32_tied1, svint8_t, svuint32_t, -+ z0_res = svreinterpret_s8_u32 (z0), -+ z0_res = svreinterpret_s8 (z0)) -+ -+/* -+** reinterpret_s8_u32_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_s8_u32_untied, svint8_t, svuint32_t, -+ z0 = svreinterpret_s8_u32 (z4), -+ z0 = svreinterpret_s8 (z4)) -+ -+/* -+** reinterpret_s8_u64_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_s8_u64_tied1, svint8_t, svuint64_t, -+ z0_res = svreinterpret_s8_u64 (z0), -+ z0_res = svreinterpret_s8 (z0)) -+ -+/* -+** reinterpret_s8_u64_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_s8_u64_untied, svint8_t, svuint64_t, -+ z0 = svreinterpret_s8_u64 (z4), -+ z0 = svreinterpret_s8 (z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u16.c -new file mode 100644 -index 000000000..48e8ecaff ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u16.c -@@ -0,0 +1,207 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** reinterpret_u16_bf16_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u16_bf16_tied1, svuint16_t, svbfloat16_t, -+ z0_res = svreinterpret_u16_bf16 (z0), -+ z0_res = svreinterpret_u16 (z0)) -+ -+/* -+** reinterpret_u16_bf16_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_u16_bf16_untied, svuint16_t, svbfloat16_t, -+ z0 = svreinterpret_u16_bf16 (z4), -+ z0 = svreinterpret_u16 (z4)) -+ -+/* -+** reinterpret_u16_f16_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u16_f16_tied1, svuint16_t, svfloat16_t, -+ z0_res = svreinterpret_u16_f16 (z0), -+ z0_res = svreinterpret_u16 (z0)) -+ -+/* -+** reinterpret_u16_f16_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_u16_f16_untied, svuint16_t, svfloat16_t, -+ z0 = svreinterpret_u16_f16 (z4), -+ z0 = svreinterpret_u16 (z4)) -+ -+/* -+** reinterpret_u16_f32_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u16_f32_tied1, svuint16_t, svfloat32_t, -+ z0_res = svreinterpret_u16_f32 (z0), -+ z0_res = svreinterpret_u16 (z0)) -+ -+/* -+** reinterpret_u16_f32_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_u16_f32_untied, svuint16_t, svfloat32_t, -+ z0 = svreinterpret_u16_f32 (z4), -+ z0 = svreinterpret_u16 (z4)) -+ -+/* -+** reinterpret_u16_f64_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u16_f64_tied1, svuint16_t, svfloat64_t, -+ z0_res = svreinterpret_u16_f64 (z0), -+ z0_res = svreinterpret_u16 (z0)) -+ -+/* -+** reinterpret_u16_f64_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_u16_f64_untied, svuint16_t, svfloat64_t, -+ z0 = svreinterpret_u16_f64 (z4), -+ z0 = svreinterpret_u16 (z4)) -+ -+/* -+** reinterpret_u16_s8_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u16_s8_tied1, svuint16_t, svint8_t, -+ z0_res = svreinterpret_u16_s8 (z0), -+ z0_res = svreinterpret_u16 (z0)) -+ -+/* -+** reinterpret_u16_s8_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_u16_s8_untied, svuint16_t, svint8_t, -+ z0 = svreinterpret_u16_s8 (z4), -+ z0 = svreinterpret_u16 (z4)) -+ -+/* -+** reinterpret_u16_s16_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u16_s16_tied1, svuint16_t, svint16_t, -+ z0_res = svreinterpret_u16_s16 (z0), -+ z0_res = svreinterpret_u16 (z0)) -+ -+/* -+** reinterpret_u16_s16_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_u16_s16_untied, svuint16_t, svint16_t, -+ z0 = svreinterpret_u16_s16 (z4), -+ z0 = svreinterpret_u16 (z4)) -+ -+/* -+** reinterpret_u16_s32_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u16_s32_tied1, svuint16_t, svint32_t, -+ z0_res = svreinterpret_u16_s32 (z0), -+ z0_res = svreinterpret_u16 (z0)) -+ -+/* -+** reinterpret_u16_s32_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_u16_s32_untied, svuint16_t, svint32_t, -+ z0 = svreinterpret_u16_s32 (z4), -+ z0 = svreinterpret_u16 (z4)) -+ -+/* -+** reinterpret_u16_s64_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u16_s64_tied1, svuint16_t, svint64_t, -+ z0_res = svreinterpret_u16_s64 (z0), -+ z0_res = svreinterpret_u16 (z0)) -+ -+/* -+** reinterpret_u16_s64_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_u16_s64_untied, svuint16_t, svint64_t, -+ z0 = svreinterpret_u16_s64 (z4), -+ z0 = svreinterpret_u16 (z4)) -+ -+/* -+** reinterpret_u16_u8_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u16_u8_tied1, svuint16_t, svuint8_t, -+ z0_res = svreinterpret_u16_u8 (z0), -+ z0_res = svreinterpret_u16 (z0)) -+ -+/* -+** reinterpret_u16_u8_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_u16_u8_untied, svuint16_t, svuint8_t, -+ z0 = svreinterpret_u16_u8 (z4), -+ z0 = svreinterpret_u16 (z4)) -+ -+/* -+** reinterpret_u16_u16_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u16_u16_tied1, svuint16_t, svuint16_t, -+ z0_res = svreinterpret_u16_u16 (z0), -+ z0_res = svreinterpret_u16 (z0)) -+ -+/* -+** reinterpret_u16_u16_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_u16_u16_untied, svuint16_t, svuint16_t, -+ z0 = svreinterpret_u16_u16 (z4), -+ z0 = svreinterpret_u16 (z4)) -+ -+/* -+** reinterpret_u16_u32_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u16_u32_tied1, svuint16_t, svuint32_t, -+ z0_res = svreinterpret_u16_u32 (z0), -+ z0_res = svreinterpret_u16 (z0)) -+ -+/* -+** reinterpret_u16_u32_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_u16_u32_untied, svuint16_t, svuint32_t, -+ z0 = svreinterpret_u16_u32 (z4), -+ z0 = svreinterpret_u16 (z4)) -+ -+/* -+** reinterpret_u16_u64_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u16_u64_tied1, svuint16_t, svuint64_t, -+ z0_res = svreinterpret_u16_u64 (z0), -+ z0_res = svreinterpret_u16 (z0)) -+ -+/* -+** reinterpret_u16_u64_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_u16_u64_untied, svuint16_t, svuint64_t, -+ z0 = svreinterpret_u16_u64 (z4), -+ z0 = svreinterpret_u16 (z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u32.c -new file mode 100644 -index 000000000..1d4e85712 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u32.c -@@ -0,0 +1,207 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** reinterpret_u32_bf16_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u32_bf16_tied1, svuint32_t, svbfloat16_t, -+ z0_res = svreinterpret_u32_bf16 (z0), -+ z0_res = svreinterpret_u32 (z0)) -+ -+/* -+** reinterpret_u32_bf16_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_u32_bf16_untied, svuint32_t, svbfloat16_t, -+ z0 = svreinterpret_u32_bf16 (z4), -+ z0 = svreinterpret_u32 (z4)) -+ -+/* -+** reinterpret_u32_f16_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u32_f16_tied1, svuint32_t, svfloat16_t, -+ z0_res = svreinterpret_u32_f16 (z0), -+ z0_res = svreinterpret_u32 (z0)) -+ -+/* -+** reinterpret_u32_f16_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_u32_f16_untied, svuint32_t, svfloat16_t, -+ z0 = svreinterpret_u32_f16 (z4), -+ z0 = svreinterpret_u32 (z4)) -+ -+/* -+** reinterpret_u32_f32_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u32_f32_tied1, svuint32_t, svfloat32_t, -+ z0_res = svreinterpret_u32_f32 (z0), -+ z0_res = svreinterpret_u32 (z0)) -+ -+/* -+** reinterpret_u32_f32_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_u32_f32_untied, svuint32_t, svfloat32_t, -+ z0 = svreinterpret_u32_f32 (z4), -+ z0 = svreinterpret_u32 (z4)) -+ -+/* -+** reinterpret_u32_f64_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u32_f64_tied1, svuint32_t, svfloat64_t, -+ z0_res = svreinterpret_u32_f64 (z0), -+ z0_res = svreinterpret_u32 (z0)) -+ -+/* -+** reinterpret_u32_f64_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_u32_f64_untied, svuint32_t, svfloat64_t, -+ z0 = svreinterpret_u32_f64 (z4), -+ z0 = svreinterpret_u32 (z4)) -+ -+/* -+** reinterpret_u32_s8_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u32_s8_tied1, svuint32_t, svint8_t, -+ z0_res = svreinterpret_u32_s8 (z0), -+ z0_res = svreinterpret_u32 (z0)) -+ -+/* -+** reinterpret_u32_s8_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_u32_s8_untied, svuint32_t, svint8_t, -+ z0 = svreinterpret_u32_s8 (z4), -+ z0 = svreinterpret_u32 (z4)) -+ -+/* -+** reinterpret_u32_s16_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u32_s16_tied1, svuint32_t, svint16_t, -+ z0_res = svreinterpret_u32_s16 (z0), -+ z0_res = svreinterpret_u32 (z0)) -+ -+/* -+** reinterpret_u32_s16_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_u32_s16_untied, svuint32_t, svint16_t, -+ z0 = svreinterpret_u32_s16 (z4), -+ z0 = svreinterpret_u32 (z4)) -+ -+/* -+** reinterpret_u32_s32_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u32_s32_tied1, svuint32_t, svint32_t, -+ z0_res = svreinterpret_u32_s32 (z0), -+ z0_res = svreinterpret_u32 (z0)) -+ -+/* -+** reinterpret_u32_s32_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_u32_s32_untied, svuint32_t, svint32_t, -+ z0 = svreinterpret_u32_s32 (z4), -+ z0 = svreinterpret_u32 (z4)) -+ -+/* -+** reinterpret_u32_s64_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u32_s64_tied1, svuint32_t, svint64_t, -+ z0_res = svreinterpret_u32_s64 (z0), -+ z0_res = svreinterpret_u32 (z0)) -+ -+/* -+** reinterpret_u32_s64_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_u32_s64_untied, svuint32_t, svint64_t, -+ z0 = svreinterpret_u32_s64 (z4), -+ z0 = svreinterpret_u32 (z4)) -+ -+/* -+** reinterpret_u32_u8_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u32_u8_tied1, svuint32_t, svuint8_t, -+ z0_res = svreinterpret_u32_u8 (z0), -+ z0_res = svreinterpret_u32 (z0)) -+ -+/* -+** reinterpret_u32_u8_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_u32_u8_untied, svuint32_t, svuint8_t, -+ z0 = svreinterpret_u32_u8 (z4), -+ z0 = svreinterpret_u32 (z4)) -+ -+/* -+** reinterpret_u32_u16_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u32_u16_tied1, svuint32_t, svuint16_t, -+ z0_res = svreinterpret_u32_u16 (z0), -+ z0_res = svreinterpret_u32 (z0)) -+ -+/* -+** reinterpret_u32_u16_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_u32_u16_untied, svuint32_t, svuint16_t, -+ z0 = svreinterpret_u32_u16 (z4), -+ z0 = svreinterpret_u32 (z4)) -+ -+/* -+** reinterpret_u32_u32_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u32_u32_tied1, svuint32_t, svuint32_t, -+ z0_res = svreinterpret_u32_u32 (z0), -+ z0_res = svreinterpret_u32 (z0)) -+ -+/* -+** reinterpret_u32_u32_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_u32_u32_untied, svuint32_t, svuint32_t, -+ z0 = svreinterpret_u32_u32 (z4), -+ z0 = svreinterpret_u32 (z4)) -+ -+/* -+** reinterpret_u32_u64_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u32_u64_tied1, svuint32_t, svuint64_t, -+ z0_res = svreinterpret_u32_u64 (z0), -+ z0_res = svreinterpret_u32 (z0)) -+ -+/* -+** reinterpret_u32_u64_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_u32_u64_untied, svuint32_t, svuint64_t, -+ z0 = svreinterpret_u32_u64 (z4), -+ z0 = svreinterpret_u32 (z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u64.c -new file mode 100644 -index 000000000..07af69dce ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u64.c -@@ -0,0 +1,207 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** reinterpret_u64_bf16_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u64_bf16_tied1, svuint64_t, svbfloat16_t, -+ z0_res = svreinterpret_u64_bf16 (z0), -+ z0_res = svreinterpret_u64 (z0)) -+ -+/* -+** reinterpret_u64_bf16_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_u64_bf16_untied, svuint64_t, svbfloat16_t, -+ z0 = svreinterpret_u64_bf16 (z4), -+ z0 = svreinterpret_u64 (z4)) -+ -+/* -+** reinterpret_u64_f16_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u64_f16_tied1, svuint64_t, svfloat16_t, -+ z0_res = svreinterpret_u64_f16 (z0), -+ z0_res = svreinterpret_u64 (z0)) -+ -+/* -+** reinterpret_u64_f16_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_u64_f16_untied, svuint64_t, svfloat16_t, -+ z0 = svreinterpret_u64_f16 (z4), -+ z0 = svreinterpret_u64 (z4)) -+ -+/* -+** reinterpret_u64_f32_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u64_f32_tied1, svuint64_t, svfloat32_t, -+ z0_res = svreinterpret_u64_f32 (z0), -+ z0_res = svreinterpret_u64 (z0)) -+ -+/* -+** reinterpret_u64_f32_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_u64_f32_untied, svuint64_t, svfloat32_t, -+ z0 = svreinterpret_u64_f32 (z4), -+ z0 = svreinterpret_u64 (z4)) -+ -+/* -+** reinterpret_u64_f64_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u64_f64_tied1, svuint64_t, svfloat64_t, -+ z0_res = svreinterpret_u64_f64 (z0), -+ z0_res = svreinterpret_u64 (z0)) -+ -+/* -+** reinterpret_u64_f64_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_u64_f64_untied, svuint64_t, svfloat64_t, -+ z0 = svreinterpret_u64_f64 (z4), -+ z0 = svreinterpret_u64 (z4)) -+ -+/* -+** reinterpret_u64_s8_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u64_s8_tied1, svuint64_t, svint8_t, -+ z0_res = svreinterpret_u64_s8 (z0), -+ z0_res = svreinterpret_u64 (z0)) -+ -+/* -+** reinterpret_u64_s8_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_u64_s8_untied, svuint64_t, svint8_t, -+ z0 = svreinterpret_u64_s8 (z4), -+ z0 = svreinterpret_u64 (z4)) -+ -+/* -+** reinterpret_u64_s16_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u64_s16_tied1, svuint64_t, svint16_t, -+ z0_res = svreinterpret_u64_s16 (z0), -+ z0_res = svreinterpret_u64 (z0)) -+ -+/* -+** reinterpret_u64_s16_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_u64_s16_untied, svuint64_t, svint16_t, -+ z0 = svreinterpret_u64_s16 (z4), -+ z0 = svreinterpret_u64 (z4)) -+ -+/* -+** reinterpret_u64_s32_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u64_s32_tied1, svuint64_t, svint32_t, -+ z0_res = svreinterpret_u64_s32 (z0), -+ z0_res = svreinterpret_u64 (z0)) -+ -+/* -+** reinterpret_u64_s32_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_u64_s32_untied, svuint64_t, svint32_t, -+ z0 = svreinterpret_u64_s32 (z4), -+ z0 = svreinterpret_u64 (z4)) -+ -+/* -+** reinterpret_u64_s64_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u64_s64_tied1, svuint64_t, svint64_t, -+ z0_res = svreinterpret_u64_s64 (z0), -+ z0_res = svreinterpret_u64 (z0)) -+ -+/* -+** reinterpret_u64_s64_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_u64_s64_untied, svuint64_t, svint64_t, -+ z0 = svreinterpret_u64_s64 (z4), -+ z0 = svreinterpret_u64 (z4)) -+ -+/* -+** reinterpret_u64_u8_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u64_u8_tied1, svuint64_t, svuint8_t, -+ z0_res = svreinterpret_u64_u8 (z0), -+ z0_res = svreinterpret_u64 (z0)) -+ -+/* -+** reinterpret_u64_u8_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_u64_u8_untied, svuint64_t, svuint8_t, -+ z0 = svreinterpret_u64_u8 (z4), -+ z0 = svreinterpret_u64 (z4)) -+ -+/* -+** reinterpret_u64_u16_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u64_u16_tied1, svuint64_t, svuint16_t, -+ z0_res = svreinterpret_u64_u16 (z0), -+ z0_res = svreinterpret_u64 (z0)) -+ -+/* -+** reinterpret_u64_u16_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_u64_u16_untied, svuint64_t, svuint16_t, -+ z0 = svreinterpret_u64_u16 (z4), -+ z0 = svreinterpret_u64 (z4)) -+ -+/* -+** reinterpret_u64_u32_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u64_u32_tied1, svuint64_t, svuint32_t, -+ z0_res = svreinterpret_u64_u32 (z0), -+ z0_res = svreinterpret_u64 (z0)) -+ -+/* -+** reinterpret_u64_u32_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_u64_u32_untied, svuint64_t, svuint32_t, -+ z0 = svreinterpret_u64_u32 (z4), -+ z0 = svreinterpret_u64 (z4)) -+ -+/* -+** reinterpret_u64_u64_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u64_u64_tied1, svuint64_t, svuint64_t, -+ z0_res = svreinterpret_u64_u64 (z0), -+ z0_res = svreinterpret_u64 (z0)) -+ -+/* -+** reinterpret_u64_u64_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_u64_u64_untied, svuint64_t, svuint64_t, -+ z0 = svreinterpret_u64_u64 (z4), -+ z0 = svreinterpret_u64 (z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u8.c -new file mode 100644 -index 000000000..a4c7f4c8d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/reinterpret_u8.c -@@ -0,0 +1,207 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** reinterpret_u8_bf16_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u8_bf16_tied1, svuint8_t, svbfloat16_t, -+ z0_res = svreinterpret_u8_bf16 (z0), -+ z0_res = svreinterpret_u8 (z0)) -+ -+/* -+** reinterpret_u8_bf16_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_u8_bf16_untied, svuint8_t, svbfloat16_t, -+ z0 = svreinterpret_u8_bf16 (z4), -+ z0 = svreinterpret_u8 (z4)) -+ -+/* -+** reinterpret_u8_f16_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u8_f16_tied1, svuint8_t, svfloat16_t, -+ z0_res = svreinterpret_u8_f16 (z0), -+ z0_res = svreinterpret_u8 (z0)) -+ -+/* -+** reinterpret_u8_f16_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_u8_f16_untied, svuint8_t, svfloat16_t, -+ z0 = svreinterpret_u8_f16 (z4), -+ z0 = svreinterpret_u8 (z4)) -+ -+/* -+** reinterpret_u8_f32_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u8_f32_tied1, svuint8_t, svfloat32_t, -+ z0_res = svreinterpret_u8_f32 (z0), -+ z0_res = svreinterpret_u8 (z0)) -+ -+/* -+** reinterpret_u8_f32_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_u8_f32_untied, svuint8_t, svfloat32_t, -+ z0 = svreinterpret_u8_f32 (z4), -+ z0 = svreinterpret_u8 (z4)) -+ -+/* -+** reinterpret_u8_f64_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u8_f64_tied1, svuint8_t, svfloat64_t, -+ z0_res = svreinterpret_u8_f64 (z0), -+ z0_res = svreinterpret_u8 (z0)) -+ -+/* -+** reinterpret_u8_f64_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_u8_f64_untied, svuint8_t, svfloat64_t, -+ z0 = svreinterpret_u8_f64 (z4), -+ z0 = svreinterpret_u8 (z4)) -+ -+/* -+** reinterpret_u8_s8_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u8_s8_tied1, svuint8_t, svint8_t, -+ z0_res = svreinterpret_u8_s8 (z0), -+ z0_res = svreinterpret_u8 (z0)) -+ -+/* -+** reinterpret_u8_s8_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_u8_s8_untied, svuint8_t, svint8_t, -+ z0 = svreinterpret_u8_s8 (z4), -+ z0 = svreinterpret_u8 (z4)) -+ -+/* -+** reinterpret_u8_s16_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u8_s16_tied1, svuint8_t, svint16_t, -+ z0_res = svreinterpret_u8_s16 (z0), -+ z0_res = svreinterpret_u8 (z0)) -+ -+/* -+** reinterpret_u8_s16_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_u8_s16_untied, svuint8_t, svint16_t, -+ z0 = svreinterpret_u8_s16 (z4), -+ z0 = svreinterpret_u8 (z4)) -+ -+/* -+** reinterpret_u8_s32_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u8_s32_tied1, svuint8_t, svint32_t, -+ z0_res = svreinterpret_u8_s32 (z0), -+ z0_res = svreinterpret_u8 (z0)) -+ -+/* -+** reinterpret_u8_s32_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_u8_s32_untied, svuint8_t, svint32_t, -+ z0 = svreinterpret_u8_s32 (z4), -+ z0 = svreinterpret_u8 (z4)) -+ -+/* -+** reinterpret_u8_s64_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u8_s64_tied1, svuint8_t, svint64_t, -+ z0_res = svreinterpret_u8_s64 (z0), -+ z0_res = svreinterpret_u8 (z0)) -+ -+/* -+** reinterpret_u8_s64_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_u8_s64_untied, svuint8_t, svint64_t, -+ z0 = svreinterpret_u8_s64 (z4), -+ z0 = svreinterpret_u8 (z4)) -+ -+/* -+** reinterpret_u8_u8_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u8_u8_tied1, svuint8_t, svuint8_t, -+ z0_res = svreinterpret_u8_u8 (z0), -+ z0_res = svreinterpret_u8 (z0)) -+ -+/* -+** reinterpret_u8_u8_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_u8_u8_untied, svuint8_t, svuint8_t, -+ z0 = svreinterpret_u8_u8 (z4), -+ z0 = svreinterpret_u8 (z4)) -+ -+/* -+** reinterpret_u8_u16_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u8_u16_tied1, svuint8_t, svuint16_t, -+ z0_res = svreinterpret_u8_u16 (z0), -+ z0_res = svreinterpret_u8 (z0)) -+ -+/* -+** reinterpret_u8_u16_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_u8_u16_untied, svuint8_t, svuint16_t, -+ z0 = svreinterpret_u8_u16 (z4), -+ z0 = svreinterpret_u8 (z4)) -+ -+/* -+** reinterpret_u8_u32_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u8_u32_tied1, svuint8_t, svuint32_t, -+ z0_res = svreinterpret_u8_u32 (z0), -+ z0_res = svreinterpret_u8 (z0)) -+ -+/* -+** reinterpret_u8_u32_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_u8_u32_untied, svuint8_t, svuint32_t, -+ z0 = svreinterpret_u8_u32 (z4), -+ z0 = svreinterpret_u8 (z4)) -+ -+/* -+** reinterpret_u8_u64_tied1: -+** ret -+*/ -+TEST_DUAL_Z_REV (reinterpret_u8_u64_tied1, svuint8_t, svuint64_t, -+ z0_res = svreinterpret_u8_u64 (z0), -+ z0_res = svreinterpret_u8 (z0)) -+ -+/* -+** reinterpret_u8_u64_untied: -+** mov z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (reinterpret_u8_u64_untied, svuint8_t, svuint64_t, -+ z0 = svreinterpret_u8_u64 (z4), -+ z0 = svreinterpret_u8 (z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_b16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_b16.c -new file mode 100644 -index 000000000..7d5c67d5c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_b16.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rev_b16_tied1: -+** rev p0\.h, p0\.h -+** ret -+*/ -+TEST_UNIFORM_P (rev_b16_tied1, -+ p0 = svrev_b16 (p0), -+ p0 = svrev_b16 (p0)) -+ -+/* -+** rev_b16_untied: -+** rev p0\.h, p1\.h -+** ret -+*/ -+TEST_UNIFORM_P (rev_b16_untied, -+ p0 = svrev_b16 (p1), -+ p0 = svrev_b16 (p1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_b32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_b32.c -new file mode 100644 -index 000000000..3f8c810c0 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_b32.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rev_b32_tied1: -+** rev p0\.s, p0\.s -+** ret -+*/ -+TEST_UNIFORM_P (rev_b32_tied1, -+ p0 = svrev_b32 (p0), -+ p0 = svrev_b32 (p0)) -+ -+/* -+** rev_b32_untied: -+** rev p0\.s, p1\.s -+** ret -+*/ -+TEST_UNIFORM_P (rev_b32_untied, -+ p0 = svrev_b32 (p1), -+ p0 = svrev_b32 (p1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_b64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_b64.c -new file mode 100644 -index 000000000..fe937ecc6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_b64.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rev_b64_tied1: -+** rev p0\.d, p0\.d -+** ret -+*/ -+TEST_UNIFORM_P (rev_b64_tied1, -+ p0 = svrev_b64 (p0), -+ p0 = svrev_b64 (p0)) -+ -+/* -+** rev_b64_untied: -+** rev p0\.d, p1\.d -+** ret -+*/ -+TEST_UNIFORM_P (rev_b64_untied, -+ p0 = svrev_b64 (p1), -+ p0 = svrev_b64 (p1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_b8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_b8.c -new file mode 100644 -index 000000000..d23e50407 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_b8.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rev_b8_tied1: -+** rev p0\.b, p0\.b -+** ret -+*/ -+TEST_UNIFORM_P (rev_b8_tied1, -+ p0 = svrev_b8 (p0), -+ p0 = svrev_b8 (p0)) -+ -+/* -+** rev_b8_untied: -+** rev p0\.b, p1\.b -+** ret -+*/ -+TEST_UNIFORM_P (rev_b8_untied, -+ p0 = svrev_b8 (p1), -+ p0 = svrev_b8 (p1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_bf16.c -new file mode 100644 -index 000000000..fe587d42c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_bf16.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rev_bf16_tied1: -+** rev z0\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rev_bf16_tied1, svbfloat16_t, -+ z0 = svrev_bf16 (z0), -+ z0 = svrev (z0)) -+ -+/* -+** rev_bf16_untied: -+** rev z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rev_bf16_untied, svbfloat16_t, -+ z0 = svrev_bf16 (z1), -+ z0 = svrev (z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_f16.c -new file mode 100644 -index 000000000..321e2f900 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_f16.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rev_f16_tied1: -+** rev z0\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rev_f16_tied1, svfloat16_t, -+ z0 = svrev_f16 (z0), -+ z0 = svrev (z0)) -+ -+/* -+** rev_f16_untied: -+** rev z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rev_f16_untied, svfloat16_t, -+ z0 = svrev_f16 (z1), -+ z0 = svrev (z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_f32.c -new file mode 100644 -index 000000000..6f31928b5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_f32.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rev_f32_tied1: -+** rev z0\.s, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rev_f32_tied1, svfloat32_t, -+ z0 = svrev_f32 (z0), -+ z0 = svrev (z0)) -+ -+/* -+** rev_f32_untied: -+** rev z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rev_f32_untied, svfloat32_t, -+ z0 = svrev_f32 (z1), -+ z0 = svrev (z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_f64.c -new file mode 100644 -index 000000000..6f14078a7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_f64.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rev_f64_tied1: -+** rev z0\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rev_f64_tied1, svfloat64_t, -+ z0 = svrev_f64 (z0), -+ z0 = svrev (z0)) -+ -+/* -+** rev_f64_untied: -+** rev z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rev_f64_untied, svfloat64_t, -+ z0 = svrev_f64 (z1), -+ z0 = svrev (z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_s16.c -new file mode 100644 -index 000000000..63f6ea73c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_s16.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rev_s16_tied1: -+** rev z0\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rev_s16_tied1, svint16_t, -+ z0 = svrev_s16 (z0), -+ z0 = svrev (z0)) -+ -+/* -+** rev_s16_untied: -+** rev z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rev_s16_untied, svint16_t, -+ z0 = svrev_s16 (z1), -+ z0 = svrev (z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_s32.c -new file mode 100644 -index 000000000..38240b7ec ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_s32.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rev_s32_tied1: -+** rev z0\.s, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rev_s32_tied1, svint32_t, -+ z0 = svrev_s32 (z0), -+ z0 = svrev (z0)) -+ -+/* -+** rev_s32_untied: -+** rev z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rev_s32_untied, svint32_t, -+ z0 = svrev_s32 (z1), -+ z0 = svrev (z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_s64.c -new file mode 100644 -index 000000000..0004e4586 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_s64.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rev_s64_tied1: -+** rev z0\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rev_s64_tied1, svint64_t, -+ z0 = svrev_s64 (z0), -+ z0 = svrev (z0)) -+ -+/* -+** rev_s64_untied: -+** rev z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rev_s64_untied, svint64_t, -+ z0 = svrev_s64 (z1), -+ z0 = svrev (z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_s8.c -new file mode 100644 -index 000000000..44b874c92 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_s8.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rev_s8_tied1: -+** rev z0\.b, z0\.b -+** ret -+*/ -+TEST_UNIFORM_Z (rev_s8_tied1, svint8_t, -+ z0 = svrev_s8 (z0), -+ z0 = svrev (z0)) -+ -+/* -+** rev_s8_untied: -+** rev z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (rev_s8_untied, svint8_t, -+ z0 = svrev_s8 (z1), -+ z0 = svrev (z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_u16.c -new file mode 100644 -index 000000000..2b4c88854 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_u16.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rev_u16_tied1: -+** rev z0\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rev_u16_tied1, svuint16_t, -+ z0 = svrev_u16 (z0), -+ z0 = svrev (z0)) -+ -+/* -+** rev_u16_untied: -+** rev z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rev_u16_untied, svuint16_t, -+ z0 = svrev_u16 (z1), -+ z0 = svrev (z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_u32.c -new file mode 100644 -index 000000000..e14351f30 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_u32.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rev_u32_tied1: -+** rev z0\.s, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rev_u32_tied1, svuint32_t, -+ z0 = svrev_u32 (z0), -+ z0 = svrev (z0)) -+ -+/* -+** rev_u32_untied: -+** rev z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rev_u32_untied, svuint32_t, -+ z0 = svrev_u32 (z1), -+ z0 = svrev (z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_u64.c -new file mode 100644 -index 000000000..5fc987475 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_u64.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rev_u64_tied1: -+** rev z0\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rev_u64_tied1, svuint64_t, -+ z0 = svrev_u64 (z0), -+ z0 = svrev (z0)) -+ -+/* -+** rev_u64_untied: -+** rev z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rev_u64_untied, svuint64_t, -+ z0 = svrev_u64 (z1), -+ z0 = svrev (z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_u8.c -new file mode 100644 -index 000000000..9dd4f440b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rev_u8.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rev_u8_tied1: -+** rev z0\.b, z0\.b -+** ret -+*/ -+TEST_UNIFORM_Z (rev_u8_tied1, svuint8_t, -+ z0 = svrev_u8 (z0), -+ z0 = svrev (z0)) -+ -+/* -+** rev_u8_untied: -+** rev z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (rev_u8_untied, svuint8_t, -+ z0 = svrev_u8 (z1), -+ z0 = svrev (z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revb_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revb_s16.c -new file mode 100644 -index 000000000..ecfabe668 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revb_s16.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** revb_s16_m_tied12: -+** revb z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (revb_s16_m_tied12, svint16_t, -+ z0 = svrevb_s16_m (z0, p0, z0), -+ z0 = svrevb_m (z0, p0, z0)) -+ -+/* -+** revb_s16_m_tied1: -+** revb z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (revb_s16_m_tied1, svint16_t, -+ z0 = svrevb_s16_m (z0, p0, z1), -+ z0 = svrevb_m (z0, p0, z1)) -+ -+/* -+** revb_s16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** revb z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (revb_s16_m_tied2, svint16_t, -+ z0 = svrevb_s16_m (z1, p0, z0), -+ z0 = svrevb_m (z1, p0, z0)) -+ -+/* -+** revb_s16_m_untied: -+** movprfx z0, z2 -+** revb z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (revb_s16_m_untied, svint16_t, -+ z0 = svrevb_s16_m (z2, p0, z1), -+ z0 = svrevb_m (z2, p0, z1)) -+ -+/* -+** revb_s16_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.h, p0/z, \1\.h -+** revb z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (revb_s16_z_tied1, svint16_t, -+ z0 = svrevb_s16_z (p0, z0), -+ z0 = svrevb_z (p0, z0)) -+ -+/* -+** revb_s16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** revb z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (revb_s16_z_untied, svint16_t, -+ z0 = svrevb_s16_z (p0, z1), -+ z0 = svrevb_z (p0, z1)) -+ -+/* -+** revb_s16_x_tied1: -+** revb z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (revb_s16_x_tied1, svint16_t, -+ z0 = svrevb_s16_x (p0, z0), -+ z0 = svrevb_x (p0, z0)) -+ -+/* -+** revb_s16_x_untied: -+** revb z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (revb_s16_x_untied, svint16_t, -+ z0 = svrevb_s16_x (p0, z1), -+ z0 = svrevb_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revb_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revb_s32.c -new file mode 100644 -index 000000000..a46a81973 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revb_s32.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** revb_s32_m_tied12: -+** revb z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (revb_s32_m_tied12, svint32_t, -+ z0 = svrevb_s32_m (z0, p0, z0), -+ z0 = svrevb_m (z0, p0, z0)) -+ -+/* -+** revb_s32_m_tied1: -+** revb z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (revb_s32_m_tied1, svint32_t, -+ z0 = svrevb_s32_m (z0, p0, z1), -+ z0 = svrevb_m (z0, p0, z1)) -+ -+/* -+** revb_s32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** revb z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (revb_s32_m_tied2, svint32_t, -+ z0 = svrevb_s32_m (z1, p0, z0), -+ z0 = svrevb_m (z1, p0, z0)) -+ -+/* -+** revb_s32_m_untied: -+** movprfx z0, z2 -+** revb z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (revb_s32_m_untied, svint32_t, -+ z0 = svrevb_s32_m (z2, p0, z1), -+ z0 = svrevb_m (z2, p0, z1)) -+ -+/* -+** revb_s32_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, \1\.s -+** revb z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (revb_s32_z_tied1, svint32_t, -+ z0 = svrevb_s32_z (p0, z0), -+ z0 = svrevb_z (p0, z0)) -+ -+/* -+** revb_s32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** revb z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (revb_s32_z_untied, svint32_t, -+ z0 = svrevb_s32_z (p0, z1), -+ z0 = svrevb_z (p0, z1)) -+ -+/* -+** revb_s32_x_tied1: -+** revb z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (revb_s32_x_tied1, svint32_t, -+ z0 = svrevb_s32_x (p0, z0), -+ z0 = svrevb_x (p0, z0)) -+ -+/* -+** revb_s32_x_untied: -+** revb z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (revb_s32_x_untied, svint32_t, -+ z0 = svrevb_s32_x (p0, z1), -+ z0 = svrevb_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revb_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revb_s64.c -new file mode 100644 -index 000000000..21547238c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revb_s64.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** revb_s64_m_tied12: -+** revb z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (revb_s64_m_tied12, svint64_t, -+ z0 = svrevb_s64_m (z0, p0, z0), -+ z0 = svrevb_m (z0, p0, z0)) -+ -+/* -+** revb_s64_m_tied1: -+** revb z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (revb_s64_m_tied1, svint64_t, -+ z0 = svrevb_s64_m (z0, p0, z1), -+ z0 = svrevb_m (z0, p0, z1)) -+ -+/* -+** revb_s64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** revb z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (revb_s64_m_tied2, svint64_t, -+ z0 = svrevb_s64_m (z1, p0, z0), -+ z0 = svrevb_m (z1, p0, z0)) -+ -+/* -+** revb_s64_m_untied: -+** movprfx z0, z2 -+** revb z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (revb_s64_m_untied, svint64_t, -+ z0 = svrevb_s64_m (z2, p0, z1), -+ z0 = svrevb_m (z2, p0, z1)) -+ -+/* -+** revb_s64_z_tied1: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** revb z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (revb_s64_z_tied1, svint64_t, -+ z0 = svrevb_s64_z (p0, z0), -+ z0 = svrevb_z (p0, z0)) -+ -+/* -+** revb_s64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** revb z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (revb_s64_z_untied, svint64_t, -+ z0 = svrevb_s64_z (p0, z1), -+ z0 = svrevb_z (p0, z1)) -+ -+/* -+** revb_s64_x_tied1: -+** revb z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (revb_s64_x_tied1, svint64_t, -+ z0 = svrevb_s64_x (p0, z0), -+ z0 = svrevb_x (p0, z0)) -+ -+/* -+** revb_s64_x_untied: -+** revb z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (revb_s64_x_untied, svint64_t, -+ z0 = svrevb_s64_x (p0, z1), -+ z0 = svrevb_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revb_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revb_u16.c -new file mode 100644 -index 000000000..d58bd3d74 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revb_u16.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** revb_u16_m_tied12: -+** revb z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (revb_u16_m_tied12, svuint16_t, -+ z0 = svrevb_u16_m (z0, p0, z0), -+ z0 = svrevb_m (z0, p0, z0)) -+ -+/* -+** revb_u16_m_tied1: -+** revb z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (revb_u16_m_tied1, svuint16_t, -+ z0 = svrevb_u16_m (z0, p0, z1), -+ z0 = svrevb_m (z0, p0, z1)) -+ -+/* -+** revb_u16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** revb z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (revb_u16_m_tied2, svuint16_t, -+ z0 = svrevb_u16_m (z1, p0, z0), -+ z0 = svrevb_m (z1, p0, z0)) -+ -+/* -+** revb_u16_m_untied: -+** movprfx z0, z2 -+** revb z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (revb_u16_m_untied, svuint16_t, -+ z0 = svrevb_u16_m (z2, p0, z1), -+ z0 = svrevb_m (z2, p0, z1)) -+ -+/* -+** revb_u16_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.h, p0/z, \1\.h -+** revb z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (revb_u16_z_tied1, svuint16_t, -+ z0 = svrevb_u16_z (p0, z0), -+ z0 = svrevb_z (p0, z0)) -+ -+/* -+** revb_u16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** revb z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (revb_u16_z_untied, svuint16_t, -+ z0 = svrevb_u16_z (p0, z1), -+ z0 = svrevb_z (p0, z1)) -+ -+/* -+** revb_u16_x_tied1: -+** revb z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (revb_u16_x_tied1, svuint16_t, -+ z0 = svrevb_u16_x (p0, z0), -+ z0 = svrevb_x (p0, z0)) -+ -+/* -+** revb_u16_x_untied: -+** revb z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (revb_u16_x_untied, svuint16_t, -+ z0 = svrevb_u16_x (p0, z1), -+ z0 = svrevb_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revb_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revb_u32.c -new file mode 100644 -index 000000000..33df990d5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revb_u32.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** revb_u32_m_tied12: -+** revb z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (revb_u32_m_tied12, svuint32_t, -+ z0 = svrevb_u32_m (z0, p0, z0), -+ z0 = svrevb_m (z0, p0, z0)) -+ -+/* -+** revb_u32_m_tied1: -+** revb z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (revb_u32_m_tied1, svuint32_t, -+ z0 = svrevb_u32_m (z0, p0, z1), -+ z0 = svrevb_m (z0, p0, z1)) -+ -+/* -+** revb_u32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** revb z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (revb_u32_m_tied2, svuint32_t, -+ z0 = svrevb_u32_m (z1, p0, z0), -+ z0 = svrevb_m (z1, p0, z0)) -+ -+/* -+** revb_u32_m_untied: -+** movprfx z0, z2 -+** revb z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (revb_u32_m_untied, svuint32_t, -+ z0 = svrevb_u32_m (z2, p0, z1), -+ z0 = svrevb_m (z2, p0, z1)) -+ -+/* -+** revb_u32_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, \1\.s -+** revb z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (revb_u32_z_tied1, svuint32_t, -+ z0 = svrevb_u32_z (p0, z0), -+ z0 = svrevb_z (p0, z0)) -+ -+/* -+** revb_u32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** revb z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (revb_u32_z_untied, svuint32_t, -+ z0 = svrevb_u32_z (p0, z1), -+ z0 = svrevb_z (p0, z1)) -+ -+/* -+** revb_u32_x_tied1: -+** revb z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (revb_u32_x_tied1, svuint32_t, -+ z0 = svrevb_u32_x (p0, z0), -+ z0 = svrevb_x (p0, z0)) -+ -+/* -+** revb_u32_x_untied: -+** revb z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (revb_u32_x_untied, svuint32_t, -+ z0 = svrevb_u32_x (p0, z1), -+ z0 = svrevb_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revb_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revb_u64.c -new file mode 100644 -index 000000000..50ad618cc ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revb_u64.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** revb_u64_m_tied12: -+** revb z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (revb_u64_m_tied12, svuint64_t, -+ z0 = svrevb_u64_m (z0, p0, z0), -+ z0 = svrevb_m (z0, p0, z0)) -+ -+/* -+** revb_u64_m_tied1: -+** revb z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (revb_u64_m_tied1, svuint64_t, -+ z0 = svrevb_u64_m (z0, p0, z1), -+ z0 = svrevb_m (z0, p0, z1)) -+ -+/* -+** revb_u64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** revb z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (revb_u64_m_tied2, svuint64_t, -+ z0 = svrevb_u64_m (z1, p0, z0), -+ z0 = svrevb_m (z1, p0, z0)) -+ -+/* -+** revb_u64_m_untied: -+** movprfx z0, z2 -+** revb z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (revb_u64_m_untied, svuint64_t, -+ z0 = svrevb_u64_m (z2, p0, z1), -+ z0 = svrevb_m (z2, p0, z1)) -+ -+/* -+** revb_u64_z_tied1: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** revb z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (revb_u64_z_tied1, svuint64_t, -+ z0 = svrevb_u64_z (p0, z0), -+ z0 = svrevb_z (p0, z0)) -+ -+/* -+** revb_u64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** revb z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (revb_u64_z_untied, svuint64_t, -+ z0 = svrevb_u64_z (p0, z1), -+ z0 = svrevb_z (p0, z1)) -+ -+/* -+** revb_u64_x_tied1: -+** revb z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (revb_u64_x_tied1, svuint64_t, -+ z0 = svrevb_u64_x (p0, z0), -+ z0 = svrevb_x (p0, z0)) -+ -+/* -+** revb_u64_x_untied: -+** revb z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (revb_u64_x_untied, svuint64_t, -+ z0 = svrevb_u64_x (p0, z1), -+ z0 = svrevb_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revh_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revh_s32.c -new file mode 100644 -index 000000000..07d512ddb ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revh_s32.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** revh_s32_m_tied12: -+** revh z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (revh_s32_m_tied12, svint32_t, -+ z0 = svrevh_s32_m (z0, p0, z0), -+ z0 = svrevh_m (z0, p0, z0)) -+ -+/* -+** revh_s32_m_tied1: -+** revh z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (revh_s32_m_tied1, svint32_t, -+ z0 = svrevh_s32_m (z0, p0, z1), -+ z0 = svrevh_m (z0, p0, z1)) -+ -+/* -+** revh_s32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** revh z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (revh_s32_m_tied2, svint32_t, -+ z0 = svrevh_s32_m (z1, p0, z0), -+ z0 = svrevh_m (z1, p0, z0)) -+ -+/* -+** revh_s32_m_untied: -+** movprfx z0, z2 -+** revh z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (revh_s32_m_untied, svint32_t, -+ z0 = svrevh_s32_m (z2, p0, z1), -+ z0 = svrevh_m (z2, p0, z1)) -+ -+/* -+** revh_s32_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, \1\.s -+** revh z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (revh_s32_z_tied1, svint32_t, -+ z0 = svrevh_s32_z (p0, z0), -+ z0 = svrevh_z (p0, z0)) -+ -+/* -+** revh_s32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** revh z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (revh_s32_z_untied, svint32_t, -+ z0 = svrevh_s32_z (p0, z1), -+ z0 = svrevh_z (p0, z1)) -+ -+/* -+** revh_s32_x_tied1: -+** revh z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (revh_s32_x_tied1, svint32_t, -+ z0 = svrevh_s32_x (p0, z0), -+ z0 = svrevh_x (p0, z0)) -+ -+/* -+** revh_s32_x_untied: -+** revh z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (revh_s32_x_untied, svint32_t, -+ z0 = svrevh_s32_x (p0, z1), -+ z0 = svrevh_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revh_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revh_s64.c -new file mode 100644 -index 000000000..b1446347c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revh_s64.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** revh_s64_m_tied12: -+** revh z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (revh_s64_m_tied12, svint64_t, -+ z0 = svrevh_s64_m (z0, p0, z0), -+ z0 = svrevh_m (z0, p0, z0)) -+ -+/* -+** revh_s64_m_tied1: -+** revh z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (revh_s64_m_tied1, svint64_t, -+ z0 = svrevh_s64_m (z0, p0, z1), -+ z0 = svrevh_m (z0, p0, z1)) -+ -+/* -+** revh_s64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** revh z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (revh_s64_m_tied2, svint64_t, -+ z0 = svrevh_s64_m (z1, p0, z0), -+ z0 = svrevh_m (z1, p0, z0)) -+ -+/* -+** revh_s64_m_untied: -+** movprfx z0, z2 -+** revh z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (revh_s64_m_untied, svint64_t, -+ z0 = svrevh_s64_m (z2, p0, z1), -+ z0 = svrevh_m (z2, p0, z1)) -+ -+/* -+** revh_s64_z_tied1: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** revh z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (revh_s64_z_tied1, svint64_t, -+ z0 = svrevh_s64_z (p0, z0), -+ z0 = svrevh_z (p0, z0)) -+ -+/* -+** revh_s64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** revh z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (revh_s64_z_untied, svint64_t, -+ z0 = svrevh_s64_z (p0, z1), -+ z0 = svrevh_z (p0, z1)) -+ -+/* -+** revh_s64_x_tied1: -+** revh z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (revh_s64_x_tied1, svint64_t, -+ z0 = svrevh_s64_x (p0, z0), -+ z0 = svrevh_x (p0, z0)) -+ -+/* -+** revh_s64_x_untied: -+** revh z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (revh_s64_x_untied, svint64_t, -+ z0 = svrevh_s64_x (p0, z1), -+ z0 = svrevh_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revh_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revh_u32.c -new file mode 100644 -index 000000000..9ea51884d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revh_u32.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** revh_u32_m_tied12: -+** revh z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (revh_u32_m_tied12, svuint32_t, -+ z0 = svrevh_u32_m (z0, p0, z0), -+ z0 = svrevh_m (z0, p0, z0)) -+ -+/* -+** revh_u32_m_tied1: -+** revh z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (revh_u32_m_tied1, svuint32_t, -+ z0 = svrevh_u32_m (z0, p0, z1), -+ z0 = svrevh_m (z0, p0, z1)) -+ -+/* -+** revh_u32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** revh z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (revh_u32_m_tied2, svuint32_t, -+ z0 = svrevh_u32_m (z1, p0, z0), -+ z0 = svrevh_m (z1, p0, z0)) -+ -+/* -+** revh_u32_m_untied: -+** movprfx z0, z2 -+** revh z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (revh_u32_m_untied, svuint32_t, -+ z0 = svrevh_u32_m (z2, p0, z1), -+ z0 = svrevh_m (z2, p0, z1)) -+ -+/* -+** revh_u32_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, \1\.s -+** revh z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (revh_u32_z_tied1, svuint32_t, -+ z0 = svrevh_u32_z (p0, z0), -+ z0 = svrevh_z (p0, z0)) -+ -+/* -+** revh_u32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** revh z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (revh_u32_z_untied, svuint32_t, -+ z0 = svrevh_u32_z (p0, z1), -+ z0 = svrevh_z (p0, z1)) -+ -+/* -+** revh_u32_x_tied1: -+** revh z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (revh_u32_x_tied1, svuint32_t, -+ z0 = svrevh_u32_x (p0, z0), -+ z0 = svrevh_x (p0, z0)) -+ -+/* -+** revh_u32_x_untied: -+** revh z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (revh_u32_x_untied, svuint32_t, -+ z0 = svrevh_u32_x (p0, z1), -+ z0 = svrevh_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revh_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revh_u64.c -new file mode 100644 -index 000000000..7b2da2701 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revh_u64.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** revh_u64_m_tied12: -+** revh z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (revh_u64_m_tied12, svuint64_t, -+ z0 = svrevh_u64_m (z0, p0, z0), -+ z0 = svrevh_m (z0, p0, z0)) -+ -+/* -+** revh_u64_m_tied1: -+** revh z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (revh_u64_m_tied1, svuint64_t, -+ z0 = svrevh_u64_m (z0, p0, z1), -+ z0 = svrevh_m (z0, p0, z1)) -+ -+/* -+** revh_u64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** revh z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (revh_u64_m_tied2, svuint64_t, -+ z0 = svrevh_u64_m (z1, p0, z0), -+ z0 = svrevh_m (z1, p0, z0)) -+ -+/* -+** revh_u64_m_untied: -+** movprfx z0, z2 -+** revh z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (revh_u64_m_untied, svuint64_t, -+ z0 = svrevh_u64_m (z2, p0, z1), -+ z0 = svrevh_m (z2, p0, z1)) -+ -+/* -+** revh_u64_z_tied1: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** revh z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (revh_u64_z_tied1, svuint64_t, -+ z0 = svrevh_u64_z (p0, z0), -+ z0 = svrevh_z (p0, z0)) -+ -+/* -+** revh_u64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** revh z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (revh_u64_z_untied, svuint64_t, -+ z0 = svrevh_u64_z (p0, z1), -+ z0 = svrevh_z (p0, z1)) -+ -+/* -+** revh_u64_x_tied1: -+** revh z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (revh_u64_x_tied1, svuint64_t, -+ z0 = svrevh_u64_x (p0, z0), -+ z0 = svrevh_x (p0, z0)) -+ -+/* -+** revh_u64_x_untied: -+** revh z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (revh_u64_x_untied, svuint64_t, -+ z0 = svrevh_u64_x (p0, z1), -+ z0 = svrevh_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revw_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revw_s64.c -new file mode 100644 -index 000000000..26ca0f0bd ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revw_s64.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** revw_s64_m_tied12: -+** revw z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (revw_s64_m_tied12, svint64_t, -+ z0 = svrevw_s64_m (z0, p0, z0), -+ z0 = svrevw_m (z0, p0, z0)) -+ -+/* -+** revw_s64_m_tied1: -+** revw z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (revw_s64_m_tied1, svint64_t, -+ z0 = svrevw_s64_m (z0, p0, z1), -+ z0 = svrevw_m (z0, p0, z1)) -+ -+/* -+** revw_s64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** revw z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (revw_s64_m_tied2, svint64_t, -+ z0 = svrevw_s64_m (z1, p0, z0), -+ z0 = svrevw_m (z1, p0, z0)) -+ -+/* -+** revw_s64_m_untied: -+** movprfx z0, z2 -+** revw z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (revw_s64_m_untied, svint64_t, -+ z0 = svrevw_s64_m (z2, p0, z1), -+ z0 = svrevw_m (z2, p0, z1)) -+ -+/* -+** revw_s64_z_tied1: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** revw z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (revw_s64_z_tied1, svint64_t, -+ z0 = svrevw_s64_z (p0, z0), -+ z0 = svrevw_z (p0, z0)) -+ -+/* -+** revw_s64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** revw z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (revw_s64_z_untied, svint64_t, -+ z0 = svrevw_s64_z (p0, z1), -+ z0 = svrevw_z (p0, z1)) -+ -+/* -+** revw_s64_x_tied1: -+** revw z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (revw_s64_x_tied1, svint64_t, -+ z0 = svrevw_s64_x (p0, z0), -+ z0 = svrevw_x (p0, z0)) -+ -+/* -+** revw_s64_x_untied: -+** revw z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (revw_s64_x_untied, svint64_t, -+ z0 = svrevw_s64_x (p0, z1), -+ z0 = svrevw_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revw_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revw_u64.c -new file mode 100644 -index 000000000..c70cdb428 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/revw_u64.c -@@ -0,0 +1,81 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** revw_u64_m_tied12: -+** revw z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (revw_u64_m_tied12, svuint64_t, -+ z0 = svrevw_u64_m (z0, p0, z0), -+ z0 = svrevw_m (z0, p0, z0)) -+ -+/* -+** revw_u64_m_tied1: -+** revw z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (revw_u64_m_tied1, svuint64_t, -+ z0 = svrevw_u64_m (z0, p0, z1), -+ z0 = svrevw_m (z0, p0, z1)) -+ -+/* -+** revw_u64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** revw z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (revw_u64_m_tied2, svuint64_t, -+ z0 = svrevw_u64_m (z1, p0, z0), -+ z0 = svrevw_m (z1, p0, z0)) -+ -+/* -+** revw_u64_m_untied: -+** movprfx z0, z2 -+** revw z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (revw_u64_m_untied, svuint64_t, -+ z0 = svrevw_u64_m (z2, p0, z1), -+ z0 = svrevw_m (z2, p0, z1)) -+ -+/* -+** revw_u64_z_tied1: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** revw z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (revw_u64_z_tied1, svuint64_t, -+ z0 = svrevw_u64_z (p0, z0), -+ z0 = svrevw_z (p0, z0)) -+ -+/* -+** revw_u64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** revw z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (revw_u64_z_untied, svuint64_t, -+ z0 = svrevw_u64_z (p0, z1), -+ z0 = svrevw_z (p0, z1)) -+ -+/* -+** revw_u64_x_tied1: -+** revw z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (revw_u64_x_tied1, svuint64_t, -+ z0 = svrevw_u64_x (p0, z0), -+ z0 = svrevw_x (p0, z0)) -+ -+/* -+** revw_u64_x_untied: -+** revw z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (revw_u64_x_untied, svuint64_t, -+ z0 = svrevw_u64_x (p0, z1), -+ z0 = svrevw_x (p0, z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rinta_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rinta_f16.c -new file mode 100644 -index 000000000..99a604209 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rinta_f16.c -@@ -0,0 +1,103 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rinta_f16_m_tied12: -+** frinta z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rinta_f16_m_tied12, svfloat16_t, -+ z0 = svrinta_f16_m (z0, p0, z0), -+ z0 = svrinta_m (z0, p0, z0)) -+ -+/* -+** rinta_f16_m_tied1: -+** frinta z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rinta_f16_m_tied1, svfloat16_t, -+ z0 = svrinta_f16_m (z0, p0, z1), -+ z0 = svrinta_m (z0, p0, z1)) -+ -+/* -+** rinta_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** frinta z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rinta_f16_m_tied2, svfloat16_t, -+ z0 = svrinta_f16_m (z1, p0, z0), -+ z0 = svrinta_m (z1, p0, z0)) -+ -+/* -+** rinta_f16_m_untied: -+** movprfx z0, z2 -+** frinta z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rinta_f16_m_untied, svfloat16_t, -+ z0 = svrinta_f16_m (z2, p0, z1), -+ z0 = svrinta_m (z2, p0, z1)) -+ -+/* -+** rinta_f16_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.h, p0/z, \1\.h -+** frinta z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rinta_f16_z_tied1, svfloat16_t, -+ z0 = svrinta_f16_z (p0, z0), -+ z0 = svrinta_z (p0, z0)) -+ -+/* -+** rinta_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** frinta z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rinta_f16_z_untied, svfloat16_t, -+ z0 = svrinta_f16_z (p0, z1), -+ z0 = svrinta_z (p0, z1)) -+ -+/* -+** rinta_f16_x_tied1: -+** frinta z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rinta_f16_x_tied1, svfloat16_t, -+ z0 = svrinta_f16_x (p0, z0), -+ z0 = svrinta_x (p0, z0)) -+ -+/* -+** rinta_f16_x_untied: -+** frinta z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rinta_f16_x_untied, svfloat16_t, -+ z0 = svrinta_f16_x (p0, z1), -+ z0 = svrinta_x (p0, z1)) -+ -+/* -+** ptrue_rinta_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_rinta_f16_x_tied1, svfloat16_t, -+ z0 = svrinta_f16_x (svptrue_b16 (), z0), -+ z0 = svrinta_x (svptrue_b16 (), z0)) -+ -+/* -+** ptrue_rinta_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_rinta_f16_x_untied, svfloat16_t, -+ z0 = svrinta_f16_x (svptrue_b16 (), z1), -+ z0 = svrinta_x (svptrue_b16 (), z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rinta_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rinta_f32.c -new file mode 100644 -index 000000000..b4e3714bc ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rinta_f32.c -@@ -0,0 +1,103 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rinta_f32_m_tied12: -+** frinta z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rinta_f32_m_tied12, svfloat32_t, -+ z0 = svrinta_f32_m (z0, p0, z0), -+ z0 = svrinta_m (z0, p0, z0)) -+ -+/* -+** rinta_f32_m_tied1: -+** frinta z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rinta_f32_m_tied1, svfloat32_t, -+ z0 = svrinta_f32_m (z0, p0, z1), -+ z0 = svrinta_m (z0, p0, z1)) -+ -+/* -+** rinta_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** frinta z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rinta_f32_m_tied2, svfloat32_t, -+ z0 = svrinta_f32_m (z1, p0, z0), -+ z0 = svrinta_m (z1, p0, z0)) -+ -+/* -+** rinta_f32_m_untied: -+** movprfx z0, z2 -+** frinta z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rinta_f32_m_untied, svfloat32_t, -+ z0 = svrinta_f32_m (z2, p0, z1), -+ z0 = svrinta_m (z2, p0, z1)) -+ -+/* -+** rinta_f32_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, \1\.s -+** frinta z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rinta_f32_z_tied1, svfloat32_t, -+ z0 = svrinta_f32_z (p0, z0), -+ z0 = svrinta_z (p0, z0)) -+ -+/* -+** rinta_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** frinta z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rinta_f32_z_untied, svfloat32_t, -+ z0 = svrinta_f32_z (p0, z1), -+ z0 = svrinta_z (p0, z1)) -+ -+/* -+** rinta_f32_x_tied1: -+** frinta z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rinta_f32_x_tied1, svfloat32_t, -+ z0 = svrinta_f32_x (p0, z0), -+ z0 = svrinta_x (p0, z0)) -+ -+/* -+** rinta_f32_x_untied: -+** frinta z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rinta_f32_x_untied, svfloat32_t, -+ z0 = svrinta_f32_x (p0, z1), -+ z0 = svrinta_x (p0, z1)) -+ -+/* -+** ptrue_rinta_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_rinta_f32_x_tied1, svfloat32_t, -+ z0 = svrinta_f32_x (svptrue_b32 (), z0), -+ z0 = svrinta_x (svptrue_b32 (), z0)) -+ -+/* -+** ptrue_rinta_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_rinta_f32_x_untied, svfloat32_t, -+ z0 = svrinta_f32_x (svptrue_b32 (), z1), -+ z0 = svrinta_x (svptrue_b32 (), z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rinta_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rinta_f64.c -new file mode 100644 -index 000000000..24d6b7dc8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rinta_f64.c -@@ -0,0 +1,103 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rinta_f64_m_tied12: -+** frinta z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rinta_f64_m_tied12, svfloat64_t, -+ z0 = svrinta_f64_m (z0, p0, z0), -+ z0 = svrinta_m (z0, p0, z0)) -+ -+/* -+** rinta_f64_m_tied1: -+** frinta z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rinta_f64_m_tied1, svfloat64_t, -+ z0 = svrinta_f64_m (z0, p0, z1), -+ z0 = svrinta_m (z0, p0, z1)) -+ -+/* -+** rinta_f64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** frinta z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (rinta_f64_m_tied2, svfloat64_t, -+ z0 = svrinta_f64_m (z1, p0, z0), -+ z0 = svrinta_m (z1, p0, z0)) -+ -+/* -+** rinta_f64_m_untied: -+** movprfx z0, z2 -+** frinta z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rinta_f64_m_untied, svfloat64_t, -+ z0 = svrinta_f64_m (z2, p0, z1), -+ z0 = svrinta_m (z2, p0, z1)) -+ -+/* -+** rinta_f64_z_tied1: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** frinta z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (rinta_f64_z_tied1, svfloat64_t, -+ z0 = svrinta_f64_z (p0, z0), -+ z0 = svrinta_z (p0, z0)) -+ -+/* -+** rinta_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** frinta z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rinta_f64_z_untied, svfloat64_t, -+ z0 = svrinta_f64_z (p0, z1), -+ z0 = svrinta_z (p0, z1)) -+ -+/* -+** rinta_f64_x_tied1: -+** frinta z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rinta_f64_x_tied1, svfloat64_t, -+ z0 = svrinta_f64_x (p0, z0), -+ z0 = svrinta_x (p0, z0)) -+ -+/* -+** rinta_f64_x_untied: -+** frinta z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rinta_f64_x_untied, svfloat64_t, -+ z0 = svrinta_f64_x (p0, z1), -+ z0 = svrinta_x (p0, z1)) -+ -+/* -+** ptrue_rinta_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_rinta_f64_x_tied1, svfloat64_t, -+ z0 = svrinta_f64_x (svptrue_b64 (), z0), -+ z0 = svrinta_x (svptrue_b64 (), z0)) -+ -+/* -+** ptrue_rinta_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_rinta_f64_x_untied, svfloat64_t, -+ z0 = svrinta_f64_x (svptrue_b64 (), z1), -+ z0 = svrinta_x (svptrue_b64 (), z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rinti_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rinti_f16.c -new file mode 100644 -index 000000000..1f0ac85e3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rinti_f16.c -@@ -0,0 +1,103 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rinti_f16_m_tied12: -+** frinti z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rinti_f16_m_tied12, svfloat16_t, -+ z0 = svrinti_f16_m (z0, p0, z0), -+ z0 = svrinti_m (z0, p0, z0)) -+ -+/* -+** rinti_f16_m_tied1: -+** frinti z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rinti_f16_m_tied1, svfloat16_t, -+ z0 = svrinti_f16_m (z0, p0, z1), -+ z0 = svrinti_m (z0, p0, z1)) -+ -+/* -+** rinti_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** frinti z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rinti_f16_m_tied2, svfloat16_t, -+ z0 = svrinti_f16_m (z1, p0, z0), -+ z0 = svrinti_m (z1, p0, z0)) -+ -+/* -+** rinti_f16_m_untied: -+** movprfx z0, z2 -+** frinti z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rinti_f16_m_untied, svfloat16_t, -+ z0 = svrinti_f16_m (z2, p0, z1), -+ z0 = svrinti_m (z2, p0, z1)) -+ -+/* -+** rinti_f16_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.h, p0/z, \1\.h -+** frinti z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rinti_f16_z_tied1, svfloat16_t, -+ z0 = svrinti_f16_z (p0, z0), -+ z0 = svrinti_z (p0, z0)) -+ -+/* -+** rinti_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** frinti z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rinti_f16_z_untied, svfloat16_t, -+ z0 = svrinti_f16_z (p0, z1), -+ z0 = svrinti_z (p0, z1)) -+ -+/* -+** rinti_f16_x_tied1: -+** frinti z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rinti_f16_x_tied1, svfloat16_t, -+ z0 = svrinti_f16_x (p0, z0), -+ z0 = svrinti_x (p0, z0)) -+ -+/* -+** rinti_f16_x_untied: -+** frinti z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rinti_f16_x_untied, svfloat16_t, -+ z0 = svrinti_f16_x (p0, z1), -+ z0 = svrinti_x (p0, z1)) -+ -+/* -+** ptrue_rinti_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_rinti_f16_x_tied1, svfloat16_t, -+ z0 = svrinti_f16_x (svptrue_b16 (), z0), -+ z0 = svrinti_x (svptrue_b16 (), z0)) -+ -+/* -+** ptrue_rinti_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_rinti_f16_x_untied, svfloat16_t, -+ z0 = svrinti_f16_x (svptrue_b16 (), z1), -+ z0 = svrinti_x (svptrue_b16 (), z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rinti_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rinti_f32.c -new file mode 100644 -index 000000000..cf54fde5c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rinti_f32.c -@@ -0,0 +1,103 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rinti_f32_m_tied12: -+** frinti z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rinti_f32_m_tied12, svfloat32_t, -+ z0 = svrinti_f32_m (z0, p0, z0), -+ z0 = svrinti_m (z0, p0, z0)) -+ -+/* -+** rinti_f32_m_tied1: -+** frinti z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rinti_f32_m_tied1, svfloat32_t, -+ z0 = svrinti_f32_m (z0, p0, z1), -+ z0 = svrinti_m (z0, p0, z1)) -+ -+/* -+** rinti_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** frinti z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rinti_f32_m_tied2, svfloat32_t, -+ z0 = svrinti_f32_m (z1, p0, z0), -+ z0 = svrinti_m (z1, p0, z0)) -+ -+/* -+** rinti_f32_m_untied: -+** movprfx z0, z2 -+** frinti z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rinti_f32_m_untied, svfloat32_t, -+ z0 = svrinti_f32_m (z2, p0, z1), -+ z0 = svrinti_m (z2, p0, z1)) -+ -+/* -+** rinti_f32_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, \1\.s -+** frinti z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rinti_f32_z_tied1, svfloat32_t, -+ z0 = svrinti_f32_z (p0, z0), -+ z0 = svrinti_z (p0, z0)) -+ -+/* -+** rinti_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** frinti z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rinti_f32_z_untied, svfloat32_t, -+ z0 = svrinti_f32_z (p0, z1), -+ z0 = svrinti_z (p0, z1)) -+ -+/* -+** rinti_f32_x_tied1: -+** frinti z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rinti_f32_x_tied1, svfloat32_t, -+ z0 = svrinti_f32_x (p0, z0), -+ z0 = svrinti_x (p0, z0)) -+ -+/* -+** rinti_f32_x_untied: -+** frinti z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rinti_f32_x_untied, svfloat32_t, -+ z0 = svrinti_f32_x (p0, z1), -+ z0 = svrinti_x (p0, z1)) -+ -+/* -+** ptrue_rinti_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_rinti_f32_x_tied1, svfloat32_t, -+ z0 = svrinti_f32_x (svptrue_b32 (), z0), -+ z0 = svrinti_x (svptrue_b32 (), z0)) -+ -+/* -+** ptrue_rinti_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_rinti_f32_x_untied, svfloat32_t, -+ z0 = svrinti_f32_x (svptrue_b32 (), z1), -+ z0 = svrinti_x (svptrue_b32 (), z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rinti_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rinti_f64.c -new file mode 100644 -index 000000000..08b861caa ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rinti_f64.c -@@ -0,0 +1,103 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rinti_f64_m_tied12: -+** frinti z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rinti_f64_m_tied12, svfloat64_t, -+ z0 = svrinti_f64_m (z0, p0, z0), -+ z0 = svrinti_m (z0, p0, z0)) -+ -+/* -+** rinti_f64_m_tied1: -+** frinti z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rinti_f64_m_tied1, svfloat64_t, -+ z0 = svrinti_f64_m (z0, p0, z1), -+ z0 = svrinti_m (z0, p0, z1)) -+ -+/* -+** rinti_f64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** frinti z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (rinti_f64_m_tied2, svfloat64_t, -+ z0 = svrinti_f64_m (z1, p0, z0), -+ z0 = svrinti_m (z1, p0, z0)) -+ -+/* -+** rinti_f64_m_untied: -+** movprfx z0, z2 -+** frinti z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rinti_f64_m_untied, svfloat64_t, -+ z0 = svrinti_f64_m (z2, p0, z1), -+ z0 = svrinti_m (z2, p0, z1)) -+ -+/* -+** rinti_f64_z_tied1: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** frinti z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (rinti_f64_z_tied1, svfloat64_t, -+ z0 = svrinti_f64_z (p0, z0), -+ z0 = svrinti_z (p0, z0)) -+ -+/* -+** rinti_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** frinti z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rinti_f64_z_untied, svfloat64_t, -+ z0 = svrinti_f64_z (p0, z1), -+ z0 = svrinti_z (p0, z1)) -+ -+/* -+** rinti_f64_x_tied1: -+** frinti z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rinti_f64_x_tied1, svfloat64_t, -+ z0 = svrinti_f64_x (p0, z0), -+ z0 = svrinti_x (p0, z0)) -+ -+/* -+** rinti_f64_x_untied: -+** frinti z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rinti_f64_x_untied, svfloat64_t, -+ z0 = svrinti_f64_x (p0, z1), -+ z0 = svrinti_x (p0, z1)) -+ -+/* -+** ptrue_rinti_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_rinti_f64_x_tied1, svfloat64_t, -+ z0 = svrinti_f64_x (svptrue_b64 (), z0), -+ z0 = svrinti_x (svptrue_b64 (), z0)) -+ -+/* -+** ptrue_rinti_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_rinti_f64_x_untied, svfloat64_t, -+ z0 = svrinti_f64_x (svptrue_b64 (), z1), -+ z0 = svrinti_x (svptrue_b64 (), z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintm_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintm_f16.c -new file mode 100644 -index 000000000..194d01cbd ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintm_f16.c -@@ -0,0 +1,103 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rintm_f16_m_tied12: -+** frintm z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rintm_f16_m_tied12, svfloat16_t, -+ z0 = svrintm_f16_m (z0, p0, z0), -+ z0 = svrintm_m (z0, p0, z0)) -+ -+/* -+** rintm_f16_m_tied1: -+** frintm z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rintm_f16_m_tied1, svfloat16_t, -+ z0 = svrintm_f16_m (z0, p0, z1), -+ z0 = svrintm_m (z0, p0, z1)) -+ -+/* -+** rintm_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** frintm z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rintm_f16_m_tied2, svfloat16_t, -+ z0 = svrintm_f16_m (z1, p0, z0), -+ z0 = svrintm_m (z1, p0, z0)) -+ -+/* -+** rintm_f16_m_untied: -+** movprfx z0, z2 -+** frintm z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rintm_f16_m_untied, svfloat16_t, -+ z0 = svrintm_f16_m (z2, p0, z1), -+ z0 = svrintm_m (z2, p0, z1)) -+ -+/* -+** rintm_f16_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.h, p0/z, \1\.h -+** frintm z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rintm_f16_z_tied1, svfloat16_t, -+ z0 = svrintm_f16_z (p0, z0), -+ z0 = svrintm_z (p0, z0)) -+ -+/* -+** rintm_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** frintm z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rintm_f16_z_untied, svfloat16_t, -+ z0 = svrintm_f16_z (p0, z1), -+ z0 = svrintm_z (p0, z1)) -+ -+/* -+** rintm_f16_x_tied1: -+** frintm z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rintm_f16_x_tied1, svfloat16_t, -+ z0 = svrintm_f16_x (p0, z0), -+ z0 = svrintm_x (p0, z0)) -+ -+/* -+** rintm_f16_x_untied: -+** frintm z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rintm_f16_x_untied, svfloat16_t, -+ z0 = svrintm_f16_x (p0, z1), -+ z0 = svrintm_x (p0, z1)) -+ -+/* -+** ptrue_rintm_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_rintm_f16_x_tied1, svfloat16_t, -+ z0 = svrintm_f16_x (svptrue_b16 (), z0), -+ z0 = svrintm_x (svptrue_b16 (), z0)) -+ -+/* -+** ptrue_rintm_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_rintm_f16_x_untied, svfloat16_t, -+ z0 = svrintm_f16_x (svptrue_b16 (), z1), -+ z0 = svrintm_x (svptrue_b16 (), z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintm_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintm_f32.c -new file mode 100644 -index 000000000..6c3297aa1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintm_f32.c -@@ -0,0 +1,103 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rintm_f32_m_tied12: -+** frintm z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rintm_f32_m_tied12, svfloat32_t, -+ z0 = svrintm_f32_m (z0, p0, z0), -+ z0 = svrintm_m (z0, p0, z0)) -+ -+/* -+** rintm_f32_m_tied1: -+** frintm z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rintm_f32_m_tied1, svfloat32_t, -+ z0 = svrintm_f32_m (z0, p0, z1), -+ z0 = svrintm_m (z0, p0, z1)) -+ -+/* -+** rintm_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** frintm z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rintm_f32_m_tied2, svfloat32_t, -+ z0 = svrintm_f32_m (z1, p0, z0), -+ z0 = svrintm_m (z1, p0, z0)) -+ -+/* -+** rintm_f32_m_untied: -+** movprfx z0, z2 -+** frintm z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rintm_f32_m_untied, svfloat32_t, -+ z0 = svrintm_f32_m (z2, p0, z1), -+ z0 = svrintm_m (z2, p0, z1)) -+ -+/* -+** rintm_f32_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, \1\.s -+** frintm z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rintm_f32_z_tied1, svfloat32_t, -+ z0 = svrintm_f32_z (p0, z0), -+ z0 = svrintm_z (p0, z0)) -+ -+/* -+** rintm_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** frintm z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rintm_f32_z_untied, svfloat32_t, -+ z0 = svrintm_f32_z (p0, z1), -+ z0 = svrintm_z (p0, z1)) -+ -+/* -+** rintm_f32_x_tied1: -+** frintm z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rintm_f32_x_tied1, svfloat32_t, -+ z0 = svrintm_f32_x (p0, z0), -+ z0 = svrintm_x (p0, z0)) -+ -+/* -+** rintm_f32_x_untied: -+** frintm z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rintm_f32_x_untied, svfloat32_t, -+ z0 = svrintm_f32_x (p0, z1), -+ z0 = svrintm_x (p0, z1)) -+ -+/* -+** ptrue_rintm_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_rintm_f32_x_tied1, svfloat32_t, -+ z0 = svrintm_f32_x (svptrue_b32 (), z0), -+ z0 = svrintm_x (svptrue_b32 (), z0)) -+ -+/* -+** ptrue_rintm_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_rintm_f32_x_untied, svfloat32_t, -+ z0 = svrintm_f32_x (svptrue_b32 (), z1), -+ z0 = svrintm_x (svptrue_b32 (), z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintm_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintm_f64.c -new file mode 100644 -index 000000000..ecbb24447 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintm_f64.c -@@ -0,0 +1,103 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rintm_f64_m_tied12: -+** frintm z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rintm_f64_m_tied12, svfloat64_t, -+ z0 = svrintm_f64_m (z0, p0, z0), -+ z0 = svrintm_m (z0, p0, z0)) -+ -+/* -+** rintm_f64_m_tied1: -+** frintm z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rintm_f64_m_tied1, svfloat64_t, -+ z0 = svrintm_f64_m (z0, p0, z1), -+ z0 = svrintm_m (z0, p0, z1)) -+ -+/* -+** rintm_f64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** frintm z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (rintm_f64_m_tied2, svfloat64_t, -+ z0 = svrintm_f64_m (z1, p0, z0), -+ z0 = svrintm_m (z1, p0, z0)) -+ -+/* -+** rintm_f64_m_untied: -+** movprfx z0, z2 -+** frintm z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rintm_f64_m_untied, svfloat64_t, -+ z0 = svrintm_f64_m (z2, p0, z1), -+ z0 = svrintm_m (z2, p0, z1)) -+ -+/* -+** rintm_f64_z_tied1: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** frintm z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (rintm_f64_z_tied1, svfloat64_t, -+ z0 = svrintm_f64_z (p0, z0), -+ z0 = svrintm_z (p0, z0)) -+ -+/* -+** rintm_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** frintm z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rintm_f64_z_untied, svfloat64_t, -+ z0 = svrintm_f64_z (p0, z1), -+ z0 = svrintm_z (p0, z1)) -+ -+/* -+** rintm_f64_x_tied1: -+** frintm z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rintm_f64_x_tied1, svfloat64_t, -+ z0 = svrintm_f64_x (p0, z0), -+ z0 = svrintm_x (p0, z0)) -+ -+/* -+** rintm_f64_x_untied: -+** frintm z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rintm_f64_x_untied, svfloat64_t, -+ z0 = svrintm_f64_x (p0, z1), -+ z0 = svrintm_x (p0, z1)) -+ -+/* -+** ptrue_rintm_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_rintm_f64_x_tied1, svfloat64_t, -+ z0 = svrintm_f64_x (svptrue_b64 (), z0), -+ z0 = svrintm_x (svptrue_b64 (), z0)) -+ -+/* -+** ptrue_rintm_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_rintm_f64_x_untied, svfloat64_t, -+ z0 = svrintm_f64_x (svptrue_b64 (), z1), -+ z0 = svrintm_x (svptrue_b64 (), z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintn_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintn_f16.c -new file mode 100644 -index 000000000..273307ef1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintn_f16.c -@@ -0,0 +1,103 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rintn_f16_m_tied12: -+** frintn z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rintn_f16_m_tied12, svfloat16_t, -+ z0 = svrintn_f16_m (z0, p0, z0), -+ z0 = svrintn_m (z0, p0, z0)) -+ -+/* -+** rintn_f16_m_tied1: -+** frintn z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rintn_f16_m_tied1, svfloat16_t, -+ z0 = svrintn_f16_m (z0, p0, z1), -+ z0 = svrintn_m (z0, p0, z1)) -+ -+/* -+** rintn_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** frintn z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rintn_f16_m_tied2, svfloat16_t, -+ z0 = svrintn_f16_m (z1, p0, z0), -+ z0 = svrintn_m (z1, p0, z0)) -+ -+/* -+** rintn_f16_m_untied: -+** movprfx z0, z2 -+** frintn z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rintn_f16_m_untied, svfloat16_t, -+ z0 = svrintn_f16_m (z2, p0, z1), -+ z0 = svrintn_m (z2, p0, z1)) -+ -+/* -+** rintn_f16_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.h, p0/z, \1\.h -+** frintn z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rintn_f16_z_tied1, svfloat16_t, -+ z0 = svrintn_f16_z (p0, z0), -+ z0 = svrintn_z (p0, z0)) -+ -+/* -+** rintn_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** frintn z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rintn_f16_z_untied, svfloat16_t, -+ z0 = svrintn_f16_z (p0, z1), -+ z0 = svrintn_z (p0, z1)) -+ -+/* -+** rintn_f16_x_tied1: -+** frintn z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rintn_f16_x_tied1, svfloat16_t, -+ z0 = svrintn_f16_x (p0, z0), -+ z0 = svrintn_x (p0, z0)) -+ -+/* -+** rintn_f16_x_untied: -+** frintn z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rintn_f16_x_untied, svfloat16_t, -+ z0 = svrintn_f16_x (p0, z1), -+ z0 = svrintn_x (p0, z1)) -+ -+/* -+** ptrue_rintn_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_rintn_f16_x_tied1, svfloat16_t, -+ z0 = svrintn_f16_x (svptrue_b16 (), z0), -+ z0 = svrintn_x (svptrue_b16 (), z0)) -+ -+/* -+** ptrue_rintn_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_rintn_f16_x_untied, svfloat16_t, -+ z0 = svrintn_f16_x (svptrue_b16 (), z1), -+ z0 = svrintn_x (svptrue_b16 (), z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintn_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintn_f32.c -new file mode 100644 -index 000000000..bafd43106 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintn_f32.c -@@ -0,0 +1,103 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rintn_f32_m_tied12: -+** frintn z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rintn_f32_m_tied12, svfloat32_t, -+ z0 = svrintn_f32_m (z0, p0, z0), -+ z0 = svrintn_m (z0, p0, z0)) -+ -+/* -+** rintn_f32_m_tied1: -+** frintn z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rintn_f32_m_tied1, svfloat32_t, -+ z0 = svrintn_f32_m (z0, p0, z1), -+ z0 = svrintn_m (z0, p0, z1)) -+ -+/* -+** rintn_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** frintn z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rintn_f32_m_tied2, svfloat32_t, -+ z0 = svrintn_f32_m (z1, p0, z0), -+ z0 = svrintn_m (z1, p0, z0)) -+ -+/* -+** rintn_f32_m_untied: -+** movprfx z0, z2 -+** frintn z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rintn_f32_m_untied, svfloat32_t, -+ z0 = svrintn_f32_m (z2, p0, z1), -+ z0 = svrintn_m (z2, p0, z1)) -+ -+/* -+** rintn_f32_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, \1\.s -+** frintn z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rintn_f32_z_tied1, svfloat32_t, -+ z0 = svrintn_f32_z (p0, z0), -+ z0 = svrintn_z (p0, z0)) -+ -+/* -+** rintn_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** frintn z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rintn_f32_z_untied, svfloat32_t, -+ z0 = svrintn_f32_z (p0, z1), -+ z0 = svrintn_z (p0, z1)) -+ -+/* -+** rintn_f32_x_tied1: -+** frintn z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rintn_f32_x_tied1, svfloat32_t, -+ z0 = svrintn_f32_x (p0, z0), -+ z0 = svrintn_x (p0, z0)) -+ -+/* -+** rintn_f32_x_untied: -+** frintn z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rintn_f32_x_untied, svfloat32_t, -+ z0 = svrintn_f32_x (p0, z1), -+ z0 = svrintn_x (p0, z1)) -+ -+/* -+** ptrue_rintn_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_rintn_f32_x_tied1, svfloat32_t, -+ z0 = svrintn_f32_x (svptrue_b32 (), z0), -+ z0 = svrintn_x (svptrue_b32 (), z0)) -+ -+/* -+** ptrue_rintn_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_rintn_f32_x_untied, svfloat32_t, -+ z0 = svrintn_f32_x (svptrue_b32 (), z1), -+ z0 = svrintn_x (svptrue_b32 (), z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintn_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintn_f64.c -new file mode 100644 -index 000000000..0142315e6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintn_f64.c -@@ -0,0 +1,103 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rintn_f64_m_tied12: -+** frintn z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rintn_f64_m_tied12, svfloat64_t, -+ z0 = svrintn_f64_m (z0, p0, z0), -+ z0 = svrintn_m (z0, p0, z0)) -+ -+/* -+** rintn_f64_m_tied1: -+** frintn z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rintn_f64_m_tied1, svfloat64_t, -+ z0 = svrintn_f64_m (z0, p0, z1), -+ z0 = svrintn_m (z0, p0, z1)) -+ -+/* -+** rintn_f64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** frintn z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (rintn_f64_m_tied2, svfloat64_t, -+ z0 = svrintn_f64_m (z1, p0, z0), -+ z0 = svrintn_m (z1, p0, z0)) -+ -+/* -+** rintn_f64_m_untied: -+** movprfx z0, z2 -+** frintn z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rintn_f64_m_untied, svfloat64_t, -+ z0 = svrintn_f64_m (z2, p0, z1), -+ z0 = svrintn_m (z2, p0, z1)) -+ -+/* -+** rintn_f64_z_tied1: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** frintn z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (rintn_f64_z_tied1, svfloat64_t, -+ z0 = svrintn_f64_z (p0, z0), -+ z0 = svrintn_z (p0, z0)) -+ -+/* -+** rintn_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** frintn z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rintn_f64_z_untied, svfloat64_t, -+ z0 = svrintn_f64_z (p0, z1), -+ z0 = svrintn_z (p0, z1)) -+ -+/* -+** rintn_f64_x_tied1: -+** frintn z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rintn_f64_x_tied1, svfloat64_t, -+ z0 = svrintn_f64_x (p0, z0), -+ z0 = svrintn_x (p0, z0)) -+ -+/* -+** rintn_f64_x_untied: -+** frintn z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rintn_f64_x_untied, svfloat64_t, -+ z0 = svrintn_f64_x (p0, z1), -+ z0 = svrintn_x (p0, z1)) -+ -+/* -+** ptrue_rintn_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_rintn_f64_x_tied1, svfloat64_t, -+ z0 = svrintn_f64_x (svptrue_b64 (), z0), -+ z0 = svrintn_x (svptrue_b64 (), z0)) -+ -+/* -+** ptrue_rintn_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_rintn_f64_x_untied, svfloat64_t, -+ z0 = svrintn_f64_x (svptrue_b64 (), z1), -+ z0 = svrintn_x (svptrue_b64 (), z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintp_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintp_f16.c -new file mode 100644 -index 000000000..0e85c3448 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintp_f16.c -@@ -0,0 +1,103 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rintp_f16_m_tied12: -+** frintp z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rintp_f16_m_tied12, svfloat16_t, -+ z0 = svrintp_f16_m (z0, p0, z0), -+ z0 = svrintp_m (z0, p0, z0)) -+ -+/* -+** rintp_f16_m_tied1: -+** frintp z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rintp_f16_m_tied1, svfloat16_t, -+ z0 = svrintp_f16_m (z0, p0, z1), -+ z0 = svrintp_m (z0, p0, z1)) -+ -+/* -+** rintp_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** frintp z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rintp_f16_m_tied2, svfloat16_t, -+ z0 = svrintp_f16_m (z1, p0, z0), -+ z0 = svrintp_m (z1, p0, z0)) -+ -+/* -+** rintp_f16_m_untied: -+** movprfx z0, z2 -+** frintp z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rintp_f16_m_untied, svfloat16_t, -+ z0 = svrintp_f16_m (z2, p0, z1), -+ z0 = svrintp_m (z2, p0, z1)) -+ -+/* -+** rintp_f16_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.h, p0/z, \1\.h -+** frintp z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rintp_f16_z_tied1, svfloat16_t, -+ z0 = svrintp_f16_z (p0, z0), -+ z0 = svrintp_z (p0, z0)) -+ -+/* -+** rintp_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** frintp z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rintp_f16_z_untied, svfloat16_t, -+ z0 = svrintp_f16_z (p0, z1), -+ z0 = svrintp_z (p0, z1)) -+ -+/* -+** rintp_f16_x_tied1: -+** frintp z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rintp_f16_x_tied1, svfloat16_t, -+ z0 = svrintp_f16_x (p0, z0), -+ z0 = svrintp_x (p0, z0)) -+ -+/* -+** rintp_f16_x_untied: -+** frintp z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rintp_f16_x_untied, svfloat16_t, -+ z0 = svrintp_f16_x (p0, z1), -+ z0 = svrintp_x (p0, z1)) -+ -+/* -+** ptrue_rintp_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_rintp_f16_x_tied1, svfloat16_t, -+ z0 = svrintp_f16_x (svptrue_b16 (), z0), -+ z0 = svrintp_x (svptrue_b16 (), z0)) -+ -+/* -+** ptrue_rintp_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_rintp_f16_x_untied, svfloat16_t, -+ z0 = svrintp_f16_x (svptrue_b16 (), z1), -+ z0 = svrintp_x (svptrue_b16 (), z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintp_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintp_f32.c -new file mode 100644 -index 000000000..cec360d7c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintp_f32.c -@@ -0,0 +1,103 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rintp_f32_m_tied12: -+** frintp z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rintp_f32_m_tied12, svfloat32_t, -+ z0 = svrintp_f32_m (z0, p0, z0), -+ z0 = svrintp_m (z0, p0, z0)) -+ -+/* -+** rintp_f32_m_tied1: -+** frintp z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rintp_f32_m_tied1, svfloat32_t, -+ z0 = svrintp_f32_m (z0, p0, z1), -+ z0 = svrintp_m (z0, p0, z1)) -+ -+/* -+** rintp_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** frintp z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rintp_f32_m_tied2, svfloat32_t, -+ z0 = svrintp_f32_m (z1, p0, z0), -+ z0 = svrintp_m (z1, p0, z0)) -+ -+/* -+** rintp_f32_m_untied: -+** movprfx z0, z2 -+** frintp z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rintp_f32_m_untied, svfloat32_t, -+ z0 = svrintp_f32_m (z2, p0, z1), -+ z0 = svrintp_m (z2, p0, z1)) -+ -+/* -+** rintp_f32_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, \1\.s -+** frintp z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rintp_f32_z_tied1, svfloat32_t, -+ z0 = svrintp_f32_z (p0, z0), -+ z0 = svrintp_z (p0, z0)) -+ -+/* -+** rintp_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** frintp z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rintp_f32_z_untied, svfloat32_t, -+ z0 = svrintp_f32_z (p0, z1), -+ z0 = svrintp_z (p0, z1)) -+ -+/* -+** rintp_f32_x_tied1: -+** frintp z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rintp_f32_x_tied1, svfloat32_t, -+ z0 = svrintp_f32_x (p0, z0), -+ z0 = svrintp_x (p0, z0)) -+ -+/* -+** rintp_f32_x_untied: -+** frintp z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rintp_f32_x_untied, svfloat32_t, -+ z0 = svrintp_f32_x (p0, z1), -+ z0 = svrintp_x (p0, z1)) -+ -+/* -+** ptrue_rintp_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_rintp_f32_x_tied1, svfloat32_t, -+ z0 = svrintp_f32_x (svptrue_b32 (), z0), -+ z0 = svrintp_x (svptrue_b32 (), z0)) -+ -+/* -+** ptrue_rintp_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_rintp_f32_x_untied, svfloat32_t, -+ z0 = svrintp_f32_x (svptrue_b32 (), z1), -+ z0 = svrintp_x (svptrue_b32 (), z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintp_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintp_f64.c -new file mode 100644 -index 000000000..1305fb682 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintp_f64.c -@@ -0,0 +1,103 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rintp_f64_m_tied12: -+** frintp z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rintp_f64_m_tied12, svfloat64_t, -+ z0 = svrintp_f64_m (z0, p0, z0), -+ z0 = svrintp_m (z0, p0, z0)) -+ -+/* -+** rintp_f64_m_tied1: -+** frintp z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rintp_f64_m_tied1, svfloat64_t, -+ z0 = svrintp_f64_m (z0, p0, z1), -+ z0 = svrintp_m (z0, p0, z1)) -+ -+/* -+** rintp_f64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** frintp z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (rintp_f64_m_tied2, svfloat64_t, -+ z0 = svrintp_f64_m (z1, p0, z0), -+ z0 = svrintp_m (z1, p0, z0)) -+ -+/* -+** rintp_f64_m_untied: -+** movprfx z0, z2 -+** frintp z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rintp_f64_m_untied, svfloat64_t, -+ z0 = svrintp_f64_m (z2, p0, z1), -+ z0 = svrintp_m (z2, p0, z1)) -+ -+/* -+** rintp_f64_z_tied1: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** frintp z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (rintp_f64_z_tied1, svfloat64_t, -+ z0 = svrintp_f64_z (p0, z0), -+ z0 = svrintp_z (p0, z0)) -+ -+/* -+** rintp_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** frintp z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rintp_f64_z_untied, svfloat64_t, -+ z0 = svrintp_f64_z (p0, z1), -+ z0 = svrintp_z (p0, z1)) -+ -+/* -+** rintp_f64_x_tied1: -+** frintp z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rintp_f64_x_tied1, svfloat64_t, -+ z0 = svrintp_f64_x (p0, z0), -+ z0 = svrintp_x (p0, z0)) -+ -+/* -+** rintp_f64_x_untied: -+** frintp z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rintp_f64_x_untied, svfloat64_t, -+ z0 = svrintp_f64_x (p0, z1), -+ z0 = svrintp_x (p0, z1)) -+ -+/* -+** ptrue_rintp_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_rintp_f64_x_tied1, svfloat64_t, -+ z0 = svrintp_f64_x (svptrue_b64 (), z0), -+ z0 = svrintp_x (svptrue_b64 (), z0)) -+ -+/* -+** ptrue_rintp_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_rintp_f64_x_untied, svfloat64_t, -+ z0 = svrintp_f64_x (svptrue_b64 (), z1), -+ z0 = svrintp_x (svptrue_b64 (), z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintx_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintx_f16.c -new file mode 100644 -index 000000000..96f7f2c72 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintx_f16.c -@@ -0,0 +1,103 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rintx_f16_m_tied12: -+** frintx z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rintx_f16_m_tied12, svfloat16_t, -+ z0 = svrintx_f16_m (z0, p0, z0), -+ z0 = svrintx_m (z0, p0, z0)) -+ -+/* -+** rintx_f16_m_tied1: -+** frintx z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rintx_f16_m_tied1, svfloat16_t, -+ z0 = svrintx_f16_m (z0, p0, z1), -+ z0 = svrintx_m (z0, p0, z1)) -+ -+/* -+** rintx_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** frintx z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rintx_f16_m_tied2, svfloat16_t, -+ z0 = svrintx_f16_m (z1, p0, z0), -+ z0 = svrintx_m (z1, p0, z0)) -+ -+/* -+** rintx_f16_m_untied: -+** movprfx z0, z2 -+** frintx z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rintx_f16_m_untied, svfloat16_t, -+ z0 = svrintx_f16_m (z2, p0, z1), -+ z0 = svrintx_m (z2, p0, z1)) -+ -+/* -+** rintx_f16_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.h, p0/z, \1\.h -+** frintx z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rintx_f16_z_tied1, svfloat16_t, -+ z0 = svrintx_f16_z (p0, z0), -+ z0 = svrintx_z (p0, z0)) -+ -+/* -+** rintx_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** frintx z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rintx_f16_z_untied, svfloat16_t, -+ z0 = svrintx_f16_z (p0, z1), -+ z0 = svrintx_z (p0, z1)) -+ -+/* -+** rintx_f16_x_tied1: -+** frintx z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rintx_f16_x_tied1, svfloat16_t, -+ z0 = svrintx_f16_x (p0, z0), -+ z0 = svrintx_x (p0, z0)) -+ -+/* -+** rintx_f16_x_untied: -+** frintx z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rintx_f16_x_untied, svfloat16_t, -+ z0 = svrintx_f16_x (p0, z1), -+ z0 = svrintx_x (p0, z1)) -+ -+/* -+** ptrue_rintx_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_rintx_f16_x_tied1, svfloat16_t, -+ z0 = svrintx_f16_x (svptrue_b16 (), z0), -+ z0 = svrintx_x (svptrue_b16 (), z0)) -+ -+/* -+** ptrue_rintx_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_rintx_f16_x_untied, svfloat16_t, -+ z0 = svrintx_f16_x (svptrue_b16 (), z1), -+ z0 = svrintx_x (svptrue_b16 (), z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintx_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintx_f32.c -new file mode 100644 -index 000000000..1c42d2a94 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintx_f32.c -@@ -0,0 +1,103 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rintx_f32_m_tied12: -+** frintx z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rintx_f32_m_tied12, svfloat32_t, -+ z0 = svrintx_f32_m (z0, p0, z0), -+ z0 = svrintx_m (z0, p0, z0)) -+ -+/* -+** rintx_f32_m_tied1: -+** frintx z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rintx_f32_m_tied1, svfloat32_t, -+ z0 = svrintx_f32_m (z0, p0, z1), -+ z0 = svrintx_m (z0, p0, z1)) -+ -+/* -+** rintx_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** frintx z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rintx_f32_m_tied2, svfloat32_t, -+ z0 = svrintx_f32_m (z1, p0, z0), -+ z0 = svrintx_m (z1, p0, z0)) -+ -+/* -+** rintx_f32_m_untied: -+** movprfx z0, z2 -+** frintx z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rintx_f32_m_untied, svfloat32_t, -+ z0 = svrintx_f32_m (z2, p0, z1), -+ z0 = svrintx_m (z2, p0, z1)) -+ -+/* -+** rintx_f32_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, \1\.s -+** frintx z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rintx_f32_z_tied1, svfloat32_t, -+ z0 = svrintx_f32_z (p0, z0), -+ z0 = svrintx_z (p0, z0)) -+ -+/* -+** rintx_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** frintx z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rintx_f32_z_untied, svfloat32_t, -+ z0 = svrintx_f32_z (p0, z1), -+ z0 = svrintx_z (p0, z1)) -+ -+/* -+** rintx_f32_x_tied1: -+** frintx z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rintx_f32_x_tied1, svfloat32_t, -+ z0 = svrintx_f32_x (p0, z0), -+ z0 = svrintx_x (p0, z0)) -+ -+/* -+** rintx_f32_x_untied: -+** frintx z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rintx_f32_x_untied, svfloat32_t, -+ z0 = svrintx_f32_x (p0, z1), -+ z0 = svrintx_x (p0, z1)) -+ -+/* -+** ptrue_rintx_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_rintx_f32_x_tied1, svfloat32_t, -+ z0 = svrintx_f32_x (svptrue_b32 (), z0), -+ z0 = svrintx_x (svptrue_b32 (), z0)) -+ -+/* -+** ptrue_rintx_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_rintx_f32_x_untied, svfloat32_t, -+ z0 = svrintx_f32_x (svptrue_b32 (), z1), -+ z0 = svrintx_x (svptrue_b32 (), z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintx_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintx_f64.c -new file mode 100644 -index 000000000..bee806b3b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintx_f64.c -@@ -0,0 +1,103 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rintx_f64_m_tied12: -+** frintx z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rintx_f64_m_tied12, svfloat64_t, -+ z0 = svrintx_f64_m (z0, p0, z0), -+ z0 = svrintx_m (z0, p0, z0)) -+ -+/* -+** rintx_f64_m_tied1: -+** frintx z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rintx_f64_m_tied1, svfloat64_t, -+ z0 = svrintx_f64_m (z0, p0, z1), -+ z0 = svrintx_m (z0, p0, z1)) -+ -+/* -+** rintx_f64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** frintx z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (rintx_f64_m_tied2, svfloat64_t, -+ z0 = svrintx_f64_m (z1, p0, z0), -+ z0 = svrintx_m (z1, p0, z0)) -+ -+/* -+** rintx_f64_m_untied: -+** movprfx z0, z2 -+** frintx z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rintx_f64_m_untied, svfloat64_t, -+ z0 = svrintx_f64_m (z2, p0, z1), -+ z0 = svrintx_m (z2, p0, z1)) -+ -+/* -+** rintx_f64_z_tied1: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** frintx z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (rintx_f64_z_tied1, svfloat64_t, -+ z0 = svrintx_f64_z (p0, z0), -+ z0 = svrintx_z (p0, z0)) -+ -+/* -+** rintx_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** frintx z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rintx_f64_z_untied, svfloat64_t, -+ z0 = svrintx_f64_z (p0, z1), -+ z0 = svrintx_z (p0, z1)) -+ -+/* -+** rintx_f64_x_tied1: -+** frintx z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rintx_f64_x_tied1, svfloat64_t, -+ z0 = svrintx_f64_x (p0, z0), -+ z0 = svrintx_x (p0, z0)) -+ -+/* -+** rintx_f64_x_untied: -+** frintx z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rintx_f64_x_untied, svfloat64_t, -+ z0 = svrintx_f64_x (p0, z1), -+ z0 = svrintx_x (p0, z1)) -+ -+/* -+** ptrue_rintx_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_rintx_f64_x_tied1, svfloat64_t, -+ z0 = svrintx_f64_x (svptrue_b64 (), z0), -+ z0 = svrintx_x (svptrue_b64 (), z0)) -+ -+/* -+** ptrue_rintx_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_rintx_f64_x_untied, svfloat64_t, -+ z0 = svrintx_f64_x (svptrue_b64 (), z1), -+ z0 = svrintx_x (svptrue_b64 (), z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintz_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintz_f16.c -new file mode 100644 -index 000000000..be13d82b4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintz_f16.c -@@ -0,0 +1,103 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rintz_f16_m_tied12: -+** frintz z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rintz_f16_m_tied12, svfloat16_t, -+ z0 = svrintz_f16_m (z0, p0, z0), -+ z0 = svrintz_m (z0, p0, z0)) -+ -+/* -+** rintz_f16_m_tied1: -+** frintz z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rintz_f16_m_tied1, svfloat16_t, -+ z0 = svrintz_f16_m (z0, p0, z1), -+ z0 = svrintz_m (z0, p0, z1)) -+ -+/* -+** rintz_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** frintz z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rintz_f16_m_tied2, svfloat16_t, -+ z0 = svrintz_f16_m (z1, p0, z0), -+ z0 = svrintz_m (z1, p0, z0)) -+ -+/* -+** rintz_f16_m_untied: -+** movprfx z0, z2 -+** frintz z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rintz_f16_m_untied, svfloat16_t, -+ z0 = svrintz_f16_m (z2, p0, z1), -+ z0 = svrintz_m (z2, p0, z1)) -+ -+/* -+** rintz_f16_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.h, p0/z, \1\.h -+** frintz z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rintz_f16_z_tied1, svfloat16_t, -+ z0 = svrintz_f16_z (p0, z0), -+ z0 = svrintz_z (p0, z0)) -+ -+/* -+** rintz_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** frintz z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rintz_f16_z_untied, svfloat16_t, -+ z0 = svrintz_f16_z (p0, z1), -+ z0 = svrintz_z (p0, z1)) -+ -+/* -+** rintz_f16_x_tied1: -+** frintz z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rintz_f16_x_tied1, svfloat16_t, -+ z0 = svrintz_f16_x (p0, z0), -+ z0 = svrintz_x (p0, z0)) -+ -+/* -+** rintz_f16_x_untied: -+** frintz z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rintz_f16_x_untied, svfloat16_t, -+ z0 = svrintz_f16_x (p0, z1), -+ z0 = svrintz_x (p0, z1)) -+ -+/* -+** ptrue_rintz_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_rintz_f16_x_tied1, svfloat16_t, -+ z0 = svrintz_f16_x (svptrue_b16 (), z0), -+ z0 = svrintz_x (svptrue_b16 (), z0)) -+ -+/* -+** ptrue_rintz_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_rintz_f16_x_untied, svfloat16_t, -+ z0 = svrintz_f16_x (svptrue_b16 (), z1), -+ z0 = svrintz_x (svptrue_b16 (), z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintz_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintz_f32.c -new file mode 100644 -index 000000000..873c0d468 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintz_f32.c -@@ -0,0 +1,103 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rintz_f32_m_tied12: -+** frintz z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rintz_f32_m_tied12, svfloat32_t, -+ z0 = svrintz_f32_m (z0, p0, z0), -+ z0 = svrintz_m (z0, p0, z0)) -+ -+/* -+** rintz_f32_m_tied1: -+** frintz z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rintz_f32_m_tied1, svfloat32_t, -+ z0 = svrintz_f32_m (z0, p0, z1), -+ z0 = svrintz_m (z0, p0, z1)) -+ -+/* -+** rintz_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** frintz z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rintz_f32_m_tied2, svfloat32_t, -+ z0 = svrintz_f32_m (z1, p0, z0), -+ z0 = svrintz_m (z1, p0, z0)) -+ -+/* -+** rintz_f32_m_untied: -+** movprfx z0, z2 -+** frintz z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rintz_f32_m_untied, svfloat32_t, -+ z0 = svrintz_f32_m (z2, p0, z1), -+ z0 = svrintz_m (z2, p0, z1)) -+ -+/* -+** rintz_f32_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, \1\.s -+** frintz z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rintz_f32_z_tied1, svfloat32_t, -+ z0 = svrintz_f32_z (p0, z0), -+ z0 = svrintz_z (p0, z0)) -+ -+/* -+** rintz_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** frintz z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rintz_f32_z_untied, svfloat32_t, -+ z0 = svrintz_f32_z (p0, z1), -+ z0 = svrintz_z (p0, z1)) -+ -+/* -+** rintz_f32_x_tied1: -+** frintz z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rintz_f32_x_tied1, svfloat32_t, -+ z0 = svrintz_f32_x (p0, z0), -+ z0 = svrintz_x (p0, z0)) -+ -+/* -+** rintz_f32_x_untied: -+** frintz z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rintz_f32_x_untied, svfloat32_t, -+ z0 = svrintz_f32_x (p0, z1), -+ z0 = svrintz_x (p0, z1)) -+ -+/* -+** ptrue_rintz_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_rintz_f32_x_tied1, svfloat32_t, -+ z0 = svrintz_f32_x (svptrue_b32 (), z0), -+ z0 = svrintz_x (svptrue_b32 (), z0)) -+ -+/* -+** ptrue_rintz_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_rintz_f32_x_untied, svfloat32_t, -+ z0 = svrintz_f32_x (svptrue_b32 (), z1), -+ z0 = svrintz_x (svptrue_b32 (), z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintz_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintz_f64.c -new file mode 100644 -index 000000000..e6c9d1fc8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rintz_f64.c -@@ -0,0 +1,103 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rintz_f64_m_tied12: -+** frintz z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rintz_f64_m_tied12, svfloat64_t, -+ z0 = svrintz_f64_m (z0, p0, z0), -+ z0 = svrintz_m (z0, p0, z0)) -+ -+/* -+** rintz_f64_m_tied1: -+** frintz z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rintz_f64_m_tied1, svfloat64_t, -+ z0 = svrintz_f64_m (z0, p0, z1), -+ z0 = svrintz_m (z0, p0, z1)) -+ -+/* -+** rintz_f64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** frintz z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (rintz_f64_m_tied2, svfloat64_t, -+ z0 = svrintz_f64_m (z1, p0, z0), -+ z0 = svrintz_m (z1, p0, z0)) -+ -+/* -+** rintz_f64_m_untied: -+** movprfx z0, z2 -+** frintz z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rintz_f64_m_untied, svfloat64_t, -+ z0 = svrintz_f64_m (z2, p0, z1), -+ z0 = svrintz_m (z2, p0, z1)) -+ -+/* -+** rintz_f64_z_tied1: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** frintz z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (rintz_f64_z_tied1, svfloat64_t, -+ z0 = svrintz_f64_z (p0, z0), -+ z0 = svrintz_z (p0, z0)) -+ -+/* -+** rintz_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** frintz z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rintz_f64_z_untied, svfloat64_t, -+ z0 = svrintz_f64_z (p0, z1), -+ z0 = svrintz_z (p0, z1)) -+ -+/* -+** rintz_f64_x_tied1: -+** frintz z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rintz_f64_x_tied1, svfloat64_t, -+ z0 = svrintz_f64_x (p0, z0), -+ z0 = svrintz_x (p0, z0)) -+ -+/* -+** rintz_f64_x_untied: -+** frintz z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rintz_f64_x_untied, svfloat64_t, -+ z0 = svrintz_f64_x (p0, z1), -+ z0 = svrintz_x (p0, z1)) -+ -+/* -+** ptrue_rintz_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_rintz_f64_x_tied1, svfloat64_t, -+ z0 = svrintz_f64_x (svptrue_b64 (), z0), -+ z0 = svrintz_x (svptrue_b64 (), z0)) -+ -+/* -+** ptrue_rintz_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_rintz_f64_x_untied, svfloat64_t, -+ z0 = svrintz_f64_x (svptrue_b64 (), z1), -+ z0 = svrintz_x (svptrue_b64 (), z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rsqrte_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rsqrte_f16.c -new file mode 100644 -index 000000000..adfdc2b9c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rsqrte_f16.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rsqrte_f16_tied1: -+** frsqrte z0\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rsqrte_f16_tied1, svfloat16_t, -+ z0 = svrsqrte_f16 (z0), -+ z0 = svrsqrte (z0)) -+ -+/* -+** rsqrte_f16_untied: -+** frsqrte z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rsqrte_f16_untied, svfloat16_t, -+ z0 = svrsqrte_f16 (z1), -+ z0 = svrsqrte (z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rsqrte_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rsqrte_f32.c -new file mode 100644 -index 000000000..fd938ebdf ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rsqrte_f32.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rsqrte_f32_tied1: -+** frsqrte z0\.s, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rsqrte_f32_tied1, svfloat32_t, -+ z0 = svrsqrte_f32 (z0), -+ z0 = svrsqrte (z0)) -+ -+/* -+** rsqrte_f32_untied: -+** frsqrte z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rsqrte_f32_untied, svfloat32_t, -+ z0 = svrsqrte_f32 (z1), -+ z0 = svrsqrte (z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rsqrte_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rsqrte_f64.c -new file mode 100644 -index 000000000..3ac0f4053 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rsqrte_f64.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rsqrte_f64_tied1: -+** frsqrte z0\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rsqrte_f64_tied1, svfloat64_t, -+ z0 = svrsqrte_f64 (z0), -+ z0 = svrsqrte (z0)) -+ -+/* -+** rsqrte_f64_untied: -+** frsqrte z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rsqrte_f64_untied, svfloat64_t, -+ z0 = svrsqrte_f64 (z1), -+ z0 = svrsqrte (z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rsqrts_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rsqrts_f16.c -new file mode 100644 -index 000000000..2d88be3d6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rsqrts_f16.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rsqrts_f16_tied1: -+** frsqrts z0\.h, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rsqrts_f16_tied1, svfloat16_t, -+ z0 = svrsqrts_f16 (z0, z1), -+ z0 = svrsqrts (z0, z1)) -+ -+/* -+** rsqrts_f16_tied2: -+** frsqrts z0\.h, z1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rsqrts_f16_tied2, svfloat16_t, -+ z0 = svrsqrts_f16 (z1, z0), -+ z0 = svrsqrts (z1, z0)) -+ -+/* -+** rsqrts_f16_untied: -+** frsqrts z0\.h, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (rsqrts_f16_untied, svfloat16_t, -+ z0 = svrsqrts_f16 (z1, z2), -+ z0 = svrsqrts (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rsqrts_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rsqrts_f32.c -new file mode 100644 -index 000000000..cd76aef4d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rsqrts_f32.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rsqrts_f32_tied1: -+** frsqrts z0\.s, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rsqrts_f32_tied1, svfloat32_t, -+ z0 = svrsqrts_f32 (z0, z1), -+ z0 = svrsqrts (z0, z1)) -+ -+/* -+** rsqrts_f32_tied2: -+** frsqrts z0\.s, z1\.s, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rsqrts_f32_tied2, svfloat32_t, -+ z0 = svrsqrts_f32 (z1, z0), -+ z0 = svrsqrts (z1, z0)) -+ -+/* -+** rsqrts_f32_untied: -+** frsqrts z0\.s, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (rsqrts_f32_untied, svfloat32_t, -+ z0 = svrsqrts_f32 (z1, z2), -+ z0 = svrsqrts (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rsqrts_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rsqrts_f64.c -new file mode 100644 -index 000000000..e72a82fcb ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/rsqrts_f64.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** rsqrts_f64_tied1: -+** frsqrts z0\.d, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rsqrts_f64_tied1, svfloat64_t, -+ z0 = svrsqrts_f64 (z0, z1), -+ z0 = svrsqrts (z0, z1)) -+ -+/* -+** rsqrts_f64_tied2: -+** frsqrts z0\.d, z1\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rsqrts_f64_tied2, svfloat64_t, -+ z0 = svrsqrts_f64 (z1, z0), -+ z0 = svrsqrts (z1, z0)) -+ -+/* -+** rsqrts_f64_untied: -+** frsqrts z0\.d, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (rsqrts_f64_untied, svfloat64_t, -+ z0 = svrsqrts_f64 (z1, z2), -+ z0 = svrsqrts (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/scale_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/scale_f16.c -new file mode 100644 -index 000000000..9c554255b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/scale_f16.c -@@ -0,0 +1,330 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** scale_f16_m_tied1: -+** fscale z0\.h, p0/m, z0\.h, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (scale_f16_m_tied1, svfloat16_t, svint16_t, -+ z0 = svscale_f16_m (p0, z0, z4), -+ z0 = svscale_m (p0, z0, z4)) -+ -+/* -+** scale_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** fscale z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (scale_f16_m_tied2, svfloat16_t, svint16_t, -+ z0_res = svscale_f16_m (p0, z4, z0), -+ z0_res = svscale_m (p0, z4, z0)) -+ -+/* -+** scale_f16_m_untied: -+** movprfx z0, z1 -+** fscale z0\.h, p0/m, z0\.h, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (scale_f16_m_untied, svfloat16_t, svint16_t, -+ z0 = svscale_f16_m (p0, z1, z4), -+ z0 = svscale_m (p0, z1, z4)) -+ -+/* -+** scale_w0_f16_m_tied1: -+** mov (z[0-9]+\.h), w0 -+** fscale z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (scale_w0_f16_m_tied1, svfloat16_t, int16_t, -+ z0 = svscale_n_f16_m (p0, z0, x0), -+ z0 = svscale_m (p0, z0, x0)) -+ -+/* -+** scale_w0_f16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), w0 -+** movprfx z0, z1 -+** fscale z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (scale_w0_f16_m_untied, svfloat16_t, int16_t, -+ z0 = svscale_n_f16_m (p0, z1, x0), -+ z0 = svscale_m (p0, z1, x0)) -+ -+/* -+** scale_3_f16_m_tied1: -+** mov (z[0-9]+\.h), #3 -+** fscale z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (scale_3_f16_m_tied1, svfloat16_t, -+ z0 = svscale_n_f16_m (p0, z0, 3), -+ z0 = svscale_m (p0, z0, 3)) -+ -+/* -+** scale_3_f16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), #3 -+** movprfx z0, z1 -+** fscale z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (scale_3_f16_m_untied, svfloat16_t, -+ z0 = svscale_n_f16_m (p0, z1, 3), -+ z0 = svscale_m (p0, z1, 3)) -+ -+/* -+** scale_m3_f16_m: -+** mov (z[0-9]+\.h), #-3 -+** fscale z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (scale_m3_f16_m, svfloat16_t, -+ z0 = svscale_n_f16_m (p0, z0, -3), -+ z0 = svscale_m (p0, z0, -3)) -+ -+/* -+** scale_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fscale z0\.h, p0/m, z0\.h, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (scale_f16_z_tied1, svfloat16_t, svint16_t, -+ z0 = svscale_f16_z (p0, z0, z4), -+ z0 = svscale_z (p0, z0, z4)) -+ -+/* -+** scale_f16_z_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.h, p0/z, z4\.h -+** fscale z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (scale_f16_z_tied2, svfloat16_t, svint16_t, -+ z0_res = svscale_f16_z (p0, z4, z0), -+ z0_res = svscale_z (p0, z4, z0)) -+ -+/* -+** scale_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** fscale z0\.h, p0/m, z0\.h, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (scale_f16_z_untied, svfloat16_t, svint16_t, -+ z0 = svscale_f16_z (p0, z1, z4), -+ z0 = svscale_z (p0, z1, z4)) -+ -+/* -+** scale_w0_f16_z_tied1: -+** mov (z[0-9]+\.h), w0 -+** movprfx z0\.h, p0/z, z0\.h -+** fscale z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (scale_w0_f16_z_tied1, svfloat16_t, int16_t, -+ z0 = svscale_n_f16_z (p0, z0, x0), -+ z0 = svscale_z (p0, z0, x0)) -+ -+/* -+** scale_w0_f16_z_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), w0 -+** movprfx z0\.h, p0/z, z1\.h -+** fscale z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (scale_w0_f16_z_untied, svfloat16_t, int16_t, -+ z0 = svscale_n_f16_z (p0, z1, x0), -+ z0 = svscale_z (p0, z1, x0)) -+ -+/* -+** scale_3_f16_z_tied1: -+** mov (z[0-9]+\.h), #3 -+** movprfx z0\.h, p0/z, z0\.h -+** fscale z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (scale_3_f16_z_tied1, svfloat16_t, -+ z0 = svscale_n_f16_z (p0, z0, 3), -+ z0 = svscale_z (p0, z0, 3)) -+ -+/* -+** scale_3_f16_z_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), #3 -+** movprfx z0\.h, p0/z, z1\.h -+** fscale z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (scale_3_f16_z_untied, svfloat16_t, -+ z0 = svscale_n_f16_z (p0, z1, 3), -+ z0 = svscale_z (p0, z1, 3)) -+ -+/* -+** scale_m3_f16_z: -+** mov (z[0-9]+\.h), #-3 -+** movprfx z0\.h, p0/z, z0\.h -+** fscale z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (scale_m3_f16_z, svfloat16_t, -+ z0 = svscale_n_f16_z (p0, z0, -3), -+ z0 = svscale_z (p0, z0, -3)) -+ -+/* -+** scale_f16_x_tied1: -+** fscale z0\.h, p0/m, z0\.h, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (scale_f16_x_tied1, svfloat16_t, svint16_t, -+ z0 = svscale_f16_x (p0, z0, z4), -+ z0 = svscale_x (p0, z0, z4)) -+ -+/* -+** scale_f16_x_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** fscale z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (scale_f16_x_tied2, svfloat16_t, svint16_t, -+ z0_res = svscale_f16_x (p0, z4, z0), -+ z0_res = svscale_x (p0, z4, z0)) -+ -+/* -+** scale_f16_x_untied: -+** movprfx z0, z1 -+** fscale z0\.h, p0/m, z0\.h, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (scale_f16_x_untied, svfloat16_t, svint16_t, -+ z0 = svscale_f16_x (p0, z1, z4), -+ z0 = svscale_x (p0, z1, z4)) -+ -+/* -+** scale_w0_f16_x_tied1: -+** mov (z[0-9]+\.h), w0 -+** fscale z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (scale_w0_f16_x_tied1, svfloat16_t, int16_t, -+ z0 = svscale_n_f16_x (p0, z0, x0), -+ z0 = svscale_x (p0, z0, x0)) -+ -+/* -+** scale_w0_f16_x_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), w0 -+** movprfx z0, z1 -+** fscale z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (scale_w0_f16_x_untied, svfloat16_t, int16_t, -+ z0 = svscale_n_f16_x (p0, z1, x0), -+ z0 = svscale_x (p0, z1, x0)) -+ -+/* -+** scale_3_f16_x_tied1: -+** mov (z[0-9]+\.h), #3 -+** fscale z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (scale_3_f16_x_tied1, svfloat16_t, -+ z0 = svscale_n_f16_x (p0, z0, 3), -+ z0 = svscale_x (p0, z0, 3)) -+ -+/* -+** scale_3_f16_x_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), #3 -+** movprfx z0, z1 -+** fscale z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (scale_3_f16_x_untied, svfloat16_t, -+ z0 = svscale_n_f16_x (p0, z1, 3), -+ z0 = svscale_x (p0, z1, 3)) -+ -+/* -+** scale_m3_f16_x: -+** mov (z[0-9]+\.h), #-3 -+** fscale z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (scale_m3_f16_x, svfloat16_t, -+ z0 = svscale_n_f16_x (p0, z0, -3), -+ z0 = svscale_x (p0, z0, -3)) -+ -+/* -+** ptrue_scale_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z (ptrue_scale_f16_x_tied1, svfloat16_t, svint16_t, -+ z0 = svscale_f16_x (svptrue_b16 (), z0, z4), -+ z0 = svscale_x (svptrue_b16 (), z0, z4)) -+ -+/* -+** ptrue_scale_f16_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z_REV (ptrue_scale_f16_x_tied2, svfloat16_t, svint16_t, -+ z0_res = svscale_f16_x (svptrue_b16 (), z4, z0), -+ z0_res = svscale_x (svptrue_b16 (), z4, z0)) -+ -+/* -+** ptrue_scale_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z (ptrue_scale_f16_x_untied, svfloat16_t, svint16_t, -+ z0 = svscale_f16_x (svptrue_b16 (), z1, z4), -+ z0 = svscale_x (svptrue_b16 (), z1, z4)) -+ -+/* -+** ptrue_scale_3_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_scale_3_f16_x_tied1, svfloat16_t, -+ z0 = svscale_n_f16_x (svptrue_b16 (), z0, 3), -+ z0 = svscale_x (svptrue_b16 (), z0, 3)) -+ -+/* -+** ptrue_scale_3_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_scale_3_f16_x_untied, svfloat16_t, -+ z0 = svscale_n_f16_x (svptrue_b16 (), z1, 3), -+ z0 = svscale_x (svptrue_b16 (), z1, 3)) -+ -+/* -+** ptrue_scale_m3_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_scale_m3_f16_x_tied1, svfloat16_t, -+ z0 = svscale_n_f16_x (svptrue_b16 (), z0, -3), -+ z0 = svscale_x (svptrue_b16 (), z0, -3)) -+ -+/* -+** ptrue_scale_m3_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_scale_m3_f16_x_untied, svfloat16_t, -+ z0 = svscale_n_f16_x (svptrue_b16 (), z1, -3), -+ z0 = svscale_x (svptrue_b16 (), z1, -3)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/scale_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/scale_f32.c -new file mode 100644 -index 000000000..747f8a639 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/scale_f32.c -@@ -0,0 +1,330 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** scale_f32_m_tied1: -+** fscale z0\.s, p0/m, z0\.s, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (scale_f32_m_tied1, svfloat32_t, svint32_t, -+ z0 = svscale_f32_m (p0, z0, z4), -+ z0 = svscale_m (p0, z0, z4)) -+ -+/* -+** scale_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** fscale z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (scale_f32_m_tied2, svfloat32_t, svint32_t, -+ z0_res = svscale_f32_m (p0, z4, z0), -+ z0_res = svscale_m (p0, z4, z0)) -+ -+/* -+** scale_f32_m_untied: -+** movprfx z0, z1 -+** fscale z0\.s, p0/m, z0\.s, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (scale_f32_m_untied, svfloat32_t, svint32_t, -+ z0 = svscale_f32_m (p0, z1, z4), -+ z0 = svscale_m (p0, z1, z4)) -+ -+/* -+** scale_w0_f32_m_tied1: -+** mov (z[0-9]+\.s), w0 -+** fscale z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (scale_w0_f32_m_tied1, svfloat32_t, int32_t, -+ z0 = svscale_n_f32_m (p0, z0, x0), -+ z0 = svscale_m (p0, z0, x0)) -+ -+/* -+** scale_w0_f32_m_untied: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0, z1 -+** fscale z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (scale_w0_f32_m_untied, svfloat32_t, int32_t, -+ z0 = svscale_n_f32_m (p0, z1, x0), -+ z0 = svscale_m (p0, z1, x0)) -+ -+/* -+** scale_3_f32_m_tied1: -+** mov (z[0-9]+\.s), #3 -+** fscale z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (scale_3_f32_m_tied1, svfloat32_t, -+ z0 = svscale_n_f32_m (p0, z0, 3), -+ z0 = svscale_m (p0, z0, 3)) -+ -+/* -+** scale_3_f32_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.s), #3 -+** movprfx z0, z1 -+** fscale z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (scale_3_f32_m_untied, svfloat32_t, -+ z0 = svscale_n_f32_m (p0, z1, 3), -+ z0 = svscale_m (p0, z1, 3)) -+ -+/* -+** scale_m3_f32_m: -+** mov (z[0-9]+\.s), #-3 -+** fscale z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (scale_m3_f32_m, svfloat32_t, -+ z0 = svscale_n_f32_m (p0, z0, -3), -+ z0 = svscale_m (p0, z0, -3)) -+ -+/* -+** scale_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fscale z0\.s, p0/m, z0\.s, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (scale_f32_z_tied1, svfloat32_t, svint32_t, -+ z0 = svscale_f32_z (p0, z0, z4), -+ z0 = svscale_z (p0, z0, z4)) -+ -+/* -+** scale_f32_z_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, z4\.s -+** fscale z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (scale_f32_z_tied2, svfloat32_t, svint32_t, -+ z0_res = svscale_f32_z (p0, z4, z0), -+ z0_res = svscale_z (p0, z4, z0)) -+ -+/* -+** scale_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** fscale z0\.s, p0/m, z0\.s, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (scale_f32_z_untied, svfloat32_t, svint32_t, -+ z0 = svscale_f32_z (p0, z1, z4), -+ z0 = svscale_z (p0, z1, z4)) -+ -+/* -+** scale_w0_f32_z_tied1: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** fscale z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (scale_w0_f32_z_tied1, svfloat32_t, int32_t, -+ z0 = svscale_n_f32_z (p0, z0, x0), -+ z0 = svscale_z (p0, z0, x0)) -+ -+/* -+** scale_w0_f32_z_untied: { xfail *-*-* } -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z1\.s -+** fscale z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (scale_w0_f32_z_untied, svfloat32_t, int32_t, -+ z0 = svscale_n_f32_z (p0, z1, x0), -+ z0 = svscale_z (p0, z1, x0)) -+ -+/* -+** scale_3_f32_z_tied1: -+** mov (z[0-9]+\.s), #3 -+** movprfx z0\.s, p0/z, z0\.s -+** fscale z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (scale_3_f32_z_tied1, svfloat32_t, -+ z0 = svscale_n_f32_z (p0, z0, 3), -+ z0 = svscale_z (p0, z0, 3)) -+ -+/* -+** scale_3_f32_z_untied: { xfail *-*-* } -+** mov (z[0-9]+\.s), #3 -+** movprfx z0\.s, p0/z, z1\.s -+** fscale z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (scale_3_f32_z_untied, svfloat32_t, -+ z0 = svscale_n_f32_z (p0, z1, 3), -+ z0 = svscale_z (p0, z1, 3)) -+ -+/* -+** scale_m3_f32_z: -+** mov (z[0-9]+\.s), #-3 -+** movprfx z0\.s, p0/z, z0\.s -+** fscale z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (scale_m3_f32_z, svfloat32_t, -+ z0 = svscale_n_f32_z (p0, z0, -3), -+ z0 = svscale_z (p0, z0, -3)) -+ -+/* -+** scale_f32_x_tied1: -+** fscale z0\.s, p0/m, z0\.s, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (scale_f32_x_tied1, svfloat32_t, svint32_t, -+ z0 = svscale_f32_x (p0, z0, z4), -+ z0 = svscale_x (p0, z0, z4)) -+ -+/* -+** scale_f32_x_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** fscale z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (scale_f32_x_tied2, svfloat32_t, svint32_t, -+ z0_res = svscale_f32_x (p0, z4, z0), -+ z0_res = svscale_x (p0, z4, z0)) -+ -+/* -+** scale_f32_x_untied: -+** movprfx z0, z1 -+** fscale z0\.s, p0/m, z0\.s, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (scale_f32_x_untied, svfloat32_t, svint32_t, -+ z0 = svscale_f32_x (p0, z1, z4), -+ z0 = svscale_x (p0, z1, z4)) -+ -+/* -+** scale_w0_f32_x_tied1: -+** mov (z[0-9]+\.s), w0 -+** fscale z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (scale_w0_f32_x_tied1, svfloat32_t, int32_t, -+ z0 = svscale_n_f32_x (p0, z0, x0), -+ z0 = svscale_x (p0, z0, x0)) -+ -+/* -+** scale_w0_f32_x_untied: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0, z1 -+** fscale z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (scale_w0_f32_x_untied, svfloat32_t, int32_t, -+ z0 = svscale_n_f32_x (p0, z1, x0), -+ z0 = svscale_x (p0, z1, x0)) -+ -+/* -+** scale_3_f32_x_tied1: -+** mov (z[0-9]+\.s), #3 -+** fscale z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (scale_3_f32_x_tied1, svfloat32_t, -+ z0 = svscale_n_f32_x (p0, z0, 3), -+ z0 = svscale_x (p0, z0, 3)) -+ -+/* -+** scale_3_f32_x_untied: { xfail *-*-* } -+** mov (z[0-9]+\.s), #3 -+** movprfx z0, z1 -+** fscale z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (scale_3_f32_x_untied, svfloat32_t, -+ z0 = svscale_n_f32_x (p0, z1, 3), -+ z0 = svscale_x (p0, z1, 3)) -+ -+/* -+** scale_m3_f32_x: -+** mov (z[0-9]+\.s), #-3 -+** fscale z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (scale_m3_f32_x, svfloat32_t, -+ z0 = svscale_n_f32_x (p0, z0, -3), -+ z0 = svscale_x (p0, z0, -3)) -+ -+/* -+** ptrue_scale_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z (ptrue_scale_f32_x_tied1, svfloat32_t, svint32_t, -+ z0 = svscale_f32_x (svptrue_b32 (), z0, z4), -+ z0 = svscale_x (svptrue_b32 (), z0, z4)) -+ -+/* -+** ptrue_scale_f32_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z_REV (ptrue_scale_f32_x_tied2, svfloat32_t, svint32_t, -+ z0_res = svscale_f32_x (svptrue_b32 (), z4, z0), -+ z0_res = svscale_x (svptrue_b32 (), z4, z0)) -+ -+/* -+** ptrue_scale_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z (ptrue_scale_f32_x_untied, svfloat32_t, svint32_t, -+ z0 = svscale_f32_x (svptrue_b32 (), z1, z4), -+ z0 = svscale_x (svptrue_b32 (), z1, z4)) -+ -+/* -+** ptrue_scale_3_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_scale_3_f32_x_tied1, svfloat32_t, -+ z0 = svscale_n_f32_x (svptrue_b32 (), z0, 3), -+ z0 = svscale_x (svptrue_b32 (), z0, 3)) -+ -+/* -+** ptrue_scale_3_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_scale_3_f32_x_untied, svfloat32_t, -+ z0 = svscale_n_f32_x (svptrue_b32 (), z1, 3), -+ z0 = svscale_x (svptrue_b32 (), z1, 3)) -+ -+/* -+** ptrue_scale_m3_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_scale_m3_f32_x_tied1, svfloat32_t, -+ z0 = svscale_n_f32_x (svptrue_b32 (), z0, -3), -+ z0 = svscale_x (svptrue_b32 (), z0, -3)) -+ -+/* -+** ptrue_scale_m3_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_scale_m3_f32_x_untied, svfloat32_t, -+ z0 = svscale_n_f32_x (svptrue_b32 (), z1, -3), -+ z0 = svscale_x (svptrue_b32 (), z1, -3)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/scale_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/scale_f64.c -new file mode 100644 -index 000000000..004cbfa3e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/scale_f64.c -@@ -0,0 +1,330 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** scale_f64_m_tied1: -+** fscale z0\.d, p0/m, z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (scale_f64_m_tied1, svfloat64_t, svint64_t, -+ z0 = svscale_f64_m (p0, z0, z4), -+ z0 = svscale_m (p0, z0, z4)) -+ -+/* -+** scale_f64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z4 -+** fscale z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (scale_f64_m_tied2, svfloat64_t, svint64_t, -+ z0_res = svscale_f64_m (p0, z4, z0), -+ z0_res = svscale_m (p0, z4, z0)) -+ -+/* -+** scale_f64_m_untied: -+** movprfx z0, z1 -+** fscale z0\.d, p0/m, z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (scale_f64_m_untied, svfloat64_t, svint64_t, -+ z0 = svscale_f64_m (p0, z1, z4), -+ z0 = svscale_m (p0, z1, z4)) -+ -+/* -+** scale_x0_f64_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** fscale z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (scale_x0_f64_m_tied1, svfloat64_t, int64_t, -+ z0 = svscale_n_f64_m (p0, z0, x0), -+ z0 = svscale_m (p0, z0, x0)) -+ -+/* -+** scale_x0_f64_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** fscale z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (scale_x0_f64_m_untied, svfloat64_t, int64_t, -+ z0 = svscale_n_f64_m (p0, z1, x0), -+ z0 = svscale_m (p0, z1, x0)) -+ -+/* -+** scale_3_f64_m_tied1: -+** mov (z[0-9]+\.d), #3 -+** fscale z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (scale_3_f64_m_tied1, svfloat64_t, -+ z0 = svscale_n_f64_m (p0, z0, 3), -+ z0 = svscale_m (p0, z0, 3)) -+ -+/* -+** scale_3_f64_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #3 -+** movprfx z0, z1 -+** fscale z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (scale_3_f64_m_untied, svfloat64_t, -+ z0 = svscale_n_f64_m (p0, z1, 3), -+ z0 = svscale_m (p0, z1, 3)) -+ -+/* -+** scale_m3_f64_m: -+** mov (z[0-9]+\.d), #-3 -+** fscale z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (scale_m3_f64_m, svfloat64_t, -+ z0 = svscale_n_f64_m (p0, z0, -3), -+ z0 = svscale_m (p0, z0, -3)) -+ -+/* -+** scale_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fscale z0\.d, p0/m, z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (scale_f64_z_tied1, svfloat64_t, svint64_t, -+ z0 = svscale_f64_z (p0, z0, z4), -+ z0 = svscale_z (p0, z0, z4)) -+ -+/* -+** scale_f64_z_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, z4\.d -+** fscale z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (scale_f64_z_tied2, svfloat64_t, svint64_t, -+ z0_res = svscale_f64_z (p0, z4, z0), -+ z0_res = svscale_z (p0, z4, z0)) -+ -+/* -+** scale_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** fscale z0\.d, p0/m, z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (scale_f64_z_untied, svfloat64_t, svint64_t, -+ z0 = svscale_f64_z (p0, z1, z4), -+ z0 = svscale_z (p0, z1, z4)) -+ -+/* -+** scale_x0_f64_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** fscale z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (scale_x0_f64_z_tied1, svfloat64_t, int64_t, -+ z0 = svscale_n_f64_z (p0, z0, x0), -+ z0 = svscale_z (p0, z0, x0)) -+ -+/* -+** scale_x0_f64_z_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z1\.d -+** fscale z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (scale_x0_f64_z_untied, svfloat64_t, int64_t, -+ z0 = svscale_n_f64_z (p0, z1, x0), -+ z0 = svscale_z (p0, z1, x0)) -+ -+/* -+** scale_3_f64_z_tied1: -+** mov (z[0-9]+\.d), #3 -+** movprfx z0\.d, p0/z, z0\.d -+** fscale z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (scale_3_f64_z_tied1, svfloat64_t, -+ z0 = svscale_n_f64_z (p0, z0, 3), -+ z0 = svscale_z (p0, z0, 3)) -+ -+/* -+** scale_3_f64_z_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #3 -+** movprfx z0\.d, p0/z, z1\.d -+** fscale z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (scale_3_f64_z_untied, svfloat64_t, -+ z0 = svscale_n_f64_z (p0, z1, 3), -+ z0 = svscale_z (p0, z1, 3)) -+ -+/* -+** scale_m3_f64_z: -+** mov (z[0-9]+\.d), #-3 -+** movprfx z0\.d, p0/z, z0\.d -+** fscale z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (scale_m3_f64_z, svfloat64_t, -+ z0 = svscale_n_f64_z (p0, z0, -3), -+ z0 = svscale_z (p0, z0, -3)) -+ -+/* -+** scale_f64_x_tied1: -+** fscale z0\.d, p0/m, z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (scale_f64_x_tied1, svfloat64_t, svint64_t, -+ z0 = svscale_f64_x (p0, z0, z4), -+ z0 = svscale_x (p0, z0, z4)) -+ -+/* -+** scale_f64_x_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z4 -+** fscale z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_DUAL_Z_REV (scale_f64_x_tied2, svfloat64_t, svint64_t, -+ z0_res = svscale_f64_x (p0, z4, z0), -+ z0_res = svscale_x (p0, z4, z0)) -+ -+/* -+** scale_f64_x_untied: -+** movprfx z0, z1 -+** fscale z0\.d, p0/m, z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (scale_f64_x_untied, svfloat64_t, svint64_t, -+ z0 = svscale_f64_x (p0, z1, z4), -+ z0 = svscale_x (p0, z1, z4)) -+ -+/* -+** scale_x0_f64_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** fscale z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (scale_x0_f64_x_tied1, svfloat64_t, int64_t, -+ z0 = svscale_n_f64_x (p0, z0, x0), -+ z0 = svscale_x (p0, z0, x0)) -+ -+/* -+** scale_x0_f64_x_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** fscale z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (scale_x0_f64_x_untied, svfloat64_t, int64_t, -+ z0 = svscale_n_f64_x (p0, z1, x0), -+ z0 = svscale_x (p0, z1, x0)) -+ -+/* -+** scale_3_f64_x_tied1: -+** mov (z[0-9]+\.d), #3 -+** fscale z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (scale_3_f64_x_tied1, svfloat64_t, -+ z0 = svscale_n_f64_x (p0, z0, 3), -+ z0 = svscale_x (p0, z0, 3)) -+ -+/* -+** scale_3_f64_x_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #3 -+** movprfx z0, z1 -+** fscale z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (scale_3_f64_x_untied, svfloat64_t, -+ z0 = svscale_n_f64_x (p0, z1, 3), -+ z0 = svscale_x (p0, z1, 3)) -+ -+/* -+** scale_m3_f64_x: -+** mov (z[0-9]+\.d), #-3 -+** fscale z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (scale_m3_f64_x, svfloat64_t, -+ z0 = svscale_n_f64_x (p0, z0, -3), -+ z0 = svscale_x (p0, z0, -3)) -+ -+/* -+** ptrue_scale_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z (ptrue_scale_f64_x_tied1, svfloat64_t, svint64_t, -+ z0 = svscale_f64_x (svptrue_b64 (), z0, z4), -+ z0 = svscale_x (svptrue_b64 (), z0, z4)) -+ -+/* -+** ptrue_scale_f64_x_tied2: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z_REV (ptrue_scale_f64_x_tied2, svfloat64_t, svint64_t, -+ z0_res = svscale_f64_x (svptrue_b64 (), z4, z0), -+ z0_res = svscale_x (svptrue_b64 (), z4, z0)) -+ -+/* -+** ptrue_scale_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_DUAL_Z (ptrue_scale_f64_x_untied, svfloat64_t, svint64_t, -+ z0 = svscale_f64_x (svptrue_b64 (), z1, z4), -+ z0 = svscale_x (svptrue_b64 (), z1, z4)) -+ -+/* -+** ptrue_scale_3_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_scale_3_f64_x_tied1, svfloat64_t, -+ z0 = svscale_n_f64_x (svptrue_b64 (), z0, 3), -+ z0 = svscale_x (svptrue_b64 (), z0, 3)) -+ -+/* -+** ptrue_scale_3_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_scale_3_f64_x_untied, svfloat64_t, -+ z0 = svscale_n_f64_x (svptrue_b64 (), z1, 3), -+ z0 = svscale_x (svptrue_b64 (), z1, 3)) -+ -+/* -+** ptrue_scale_m3_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_scale_m3_f64_x_tied1, svfloat64_t, -+ z0 = svscale_n_f64_x (svptrue_b64 (), z0, -3), -+ z0 = svscale_x (svptrue_b64 (), z0, -3)) -+ -+/* -+** ptrue_scale_m3_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_scale_m3_f64_x_untied, svfloat64_t, -+ z0 = svscale_n_f64_x (svptrue_b64 (), z1, -3), -+ z0 = svscale_x (svptrue_b64 (), z1, -3)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_b.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_b.c -new file mode 100644 -index 000000000..a135e9c99 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_b.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** sel_b_tied1: -+** sel p0\.b, p3, p0\.b, p1\.b -+** ret -+*/ -+TEST_UNIFORM_P (sel_b_tied1, -+ p0 = svsel_b (p3, p0, p1), -+ p0 = svsel (p3, p0, p1)) -+ -+/* -+** sel_b_tied2: -+** sel p0\.b, p3, p1\.b, p0\.b -+** ret -+*/ -+TEST_UNIFORM_P (sel_b_tied2, -+ p0 = svsel_b (p3, p1, p0), -+ p0 = svsel (p3, p1, p0)) -+ -+/* -+** sel_b_untied: -+** sel p0\.b, p3, p1\.b, p2\.b -+** ret -+*/ -+TEST_UNIFORM_P (sel_b_untied, -+ p0 = svsel_b (p3, p1, p2), -+ p0 = svsel (p3, p1, p2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_bf16.c -new file mode 100644 -index 000000000..44636d8f8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_bf16.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** sel_bf16_tied1: -+** sel z0\.h, p0, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sel_bf16_tied1, svbfloat16_t, -+ z0 = svsel_bf16 (p0, z0, z1), -+ z0 = svsel (p0, z0, z1)) -+ -+/* -+** sel_bf16_tied2: -+** sel z0\.h, p0, z1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sel_bf16_tied2, svbfloat16_t, -+ z0 = svsel_bf16 (p0, z1, z0), -+ z0 = svsel (p0, z1, z0)) -+ -+/* -+** sel_bf16_untied: -+** sel z0\.h, p0, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sel_bf16_untied, svbfloat16_t, -+ z0 = svsel_bf16 (p0, z1, z2), -+ z0 = svsel (p0, z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_f16.c -new file mode 100644 -index 000000000..35750ea81 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_f16.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** sel_f16_tied1: -+** sel z0\.h, p0, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sel_f16_tied1, svfloat16_t, -+ z0 = svsel_f16 (p0, z0, z1), -+ z0 = svsel (p0, z0, z1)) -+ -+/* -+** sel_f16_tied2: -+** sel z0\.h, p0, z1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sel_f16_tied2, svfloat16_t, -+ z0 = svsel_f16 (p0, z1, z0), -+ z0 = svsel (p0, z1, z0)) -+ -+/* -+** sel_f16_untied: -+** sel z0\.h, p0, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sel_f16_untied, svfloat16_t, -+ z0 = svsel_f16 (p0, z1, z2), -+ z0 = svsel (p0, z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_f32.c -new file mode 100644 -index 000000000..639a84724 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_f32.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** sel_f32_tied1: -+** sel z0\.s, p0, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sel_f32_tied1, svfloat32_t, -+ z0 = svsel_f32 (p0, z0, z1), -+ z0 = svsel (p0, z0, z1)) -+ -+/* -+** sel_f32_tied2: -+** sel z0\.s, p0, z1\.s, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sel_f32_tied2, svfloat32_t, -+ z0 = svsel_f32 (p0, z1, z0), -+ z0 = svsel (p0, z1, z0)) -+ -+/* -+** sel_f32_untied: -+** sel z0\.s, p0, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sel_f32_untied, svfloat32_t, -+ z0 = svsel_f32 (p0, z1, z2), -+ z0 = svsel (p0, z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_f64.c -new file mode 100644 -index 000000000..048d6e52a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_f64.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** sel_f64_tied1: -+** sel z0\.d, p0, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sel_f64_tied1, svfloat64_t, -+ z0 = svsel_f64 (p0, z0, z1), -+ z0 = svsel (p0, z0, z1)) -+ -+/* -+** sel_f64_tied2: -+** sel z0\.d, p0, z1\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sel_f64_tied2, svfloat64_t, -+ z0 = svsel_f64 (p0, z1, z0), -+ z0 = svsel (p0, z1, z0)) -+ -+/* -+** sel_f64_untied: -+** sel z0\.d, p0, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sel_f64_untied, svfloat64_t, -+ z0 = svsel_f64 (p0, z1, z2), -+ z0 = svsel (p0, z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_s16.c -new file mode 100644 -index 000000000..e162da499 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_s16.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** sel_s16_tied1: -+** sel z0\.h, p0, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sel_s16_tied1, svint16_t, -+ z0 = svsel_s16 (p0, z0, z1), -+ z0 = svsel (p0, z0, z1)) -+ -+/* -+** sel_s16_tied2: -+** sel z0\.h, p0, z1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sel_s16_tied2, svint16_t, -+ z0 = svsel_s16 (p0, z1, z0), -+ z0 = svsel (p0, z1, z0)) -+ -+/* -+** sel_s16_untied: -+** sel z0\.h, p0, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sel_s16_untied, svint16_t, -+ z0 = svsel_s16 (p0, z1, z2), -+ z0 = svsel (p0, z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_s32.c -new file mode 100644 -index 000000000..80839d803 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_s32.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** sel_s32_tied1: -+** sel z0\.s, p0, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sel_s32_tied1, svint32_t, -+ z0 = svsel_s32 (p0, z0, z1), -+ z0 = svsel (p0, z0, z1)) -+ -+/* -+** sel_s32_tied2: -+** sel z0\.s, p0, z1\.s, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sel_s32_tied2, svint32_t, -+ z0 = svsel_s32 (p0, z1, z0), -+ z0 = svsel (p0, z1, z0)) -+ -+/* -+** sel_s32_untied: -+** sel z0\.s, p0, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sel_s32_untied, svint32_t, -+ z0 = svsel_s32 (p0, z1, z2), -+ z0 = svsel (p0, z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_s64.c -new file mode 100644 -index 000000000..85a77eafb ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_s64.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** sel_s64_tied1: -+** sel z0\.d, p0, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sel_s64_tied1, svint64_t, -+ z0 = svsel_s64 (p0, z0, z1), -+ z0 = svsel (p0, z0, z1)) -+ -+/* -+** sel_s64_tied2: -+** sel z0\.d, p0, z1\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sel_s64_tied2, svint64_t, -+ z0 = svsel_s64 (p0, z1, z0), -+ z0 = svsel (p0, z1, z0)) -+ -+/* -+** sel_s64_untied: -+** sel z0\.d, p0, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sel_s64_untied, svint64_t, -+ z0 = svsel_s64 (p0, z1, z2), -+ z0 = svsel (p0, z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_s8.c -new file mode 100644 -index 000000000..28c43f627 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_s8.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** sel_s8_tied1: -+** sel z0\.b, p0, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (sel_s8_tied1, svint8_t, -+ z0 = svsel_s8 (p0, z0, z1), -+ z0 = svsel (p0, z0, z1)) -+ -+/* -+** sel_s8_tied2: -+** sel z0\.b, p0, z1\.b, z0\.b -+** ret -+*/ -+TEST_UNIFORM_Z (sel_s8_tied2, svint8_t, -+ z0 = svsel_s8 (p0, z1, z0), -+ z0 = svsel (p0, z1, z0)) -+ -+/* -+** sel_s8_untied: -+** sel z0\.b, p0, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (sel_s8_untied, svint8_t, -+ z0 = svsel_s8 (p0, z1, z2), -+ z0 = svsel (p0, z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_u16.c -new file mode 100644 -index 000000000..b85ede803 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_u16.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** sel_u16_tied1: -+** sel z0\.h, p0, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sel_u16_tied1, svuint16_t, -+ z0 = svsel_u16 (p0, z0, z1), -+ z0 = svsel (p0, z0, z1)) -+ -+/* -+** sel_u16_tied2: -+** sel z0\.h, p0, z1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sel_u16_tied2, svuint16_t, -+ z0 = svsel_u16 (p0, z1, z0), -+ z0 = svsel (p0, z1, z0)) -+ -+/* -+** sel_u16_untied: -+** sel z0\.h, p0, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sel_u16_untied, svuint16_t, -+ z0 = svsel_u16 (p0, z1, z2), -+ z0 = svsel (p0, z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_u32.c -new file mode 100644 -index 000000000..636cf8790 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_u32.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** sel_u32_tied1: -+** sel z0\.s, p0, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sel_u32_tied1, svuint32_t, -+ z0 = svsel_u32 (p0, z0, z1), -+ z0 = svsel (p0, z0, z1)) -+ -+/* -+** sel_u32_tied2: -+** sel z0\.s, p0, z1\.s, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sel_u32_tied2, svuint32_t, -+ z0 = svsel_u32 (p0, z1, z0), -+ z0 = svsel (p0, z1, z0)) -+ -+/* -+** sel_u32_untied: -+** sel z0\.s, p0, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sel_u32_untied, svuint32_t, -+ z0 = svsel_u32 (p0, z1, z2), -+ z0 = svsel (p0, z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_u64.c -new file mode 100644 -index 000000000..6325ca56f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_u64.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** sel_u64_tied1: -+** sel z0\.d, p0, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sel_u64_tied1, svuint64_t, -+ z0 = svsel_u64 (p0, z0, z1), -+ z0 = svsel (p0, z0, z1)) -+ -+/* -+** sel_u64_tied2: -+** sel z0\.d, p0, z1\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sel_u64_tied2, svuint64_t, -+ z0 = svsel_u64 (p0, z1, z0), -+ z0 = svsel (p0, z1, z0)) -+ -+/* -+** sel_u64_untied: -+** sel z0\.d, p0, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sel_u64_untied, svuint64_t, -+ z0 = svsel_u64 (p0, z1, z2), -+ z0 = svsel (p0, z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_u8.c -new file mode 100644 -index 000000000..5af53dccd ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sel_u8.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** sel_u8_tied1: -+** sel z0\.b, p0, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (sel_u8_tied1, svuint8_t, -+ z0 = svsel_u8 (p0, z0, z1), -+ z0 = svsel (p0, z0, z1)) -+ -+/* -+** sel_u8_tied2: -+** sel z0\.b, p0, z1\.b, z0\.b -+** ret -+*/ -+TEST_UNIFORM_Z (sel_u8_tied2, svuint8_t, -+ z0 = svsel_u8 (p0, z1, z0), -+ z0 = svsel (p0, z1, z0)) -+ -+/* -+** sel_u8_untied: -+** sel z0\.b, p0, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (sel_u8_untied, svuint8_t, -+ z0 = svsel_u8 (p0, z1, z2), -+ z0 = svsel (p0, z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_bf16.c -new file mode 100644 -index 000000000..b160a2517 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_bf16.c -@@ -0,0 +1,41 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** set2_bf16_z24_0: -+** mov z25\.d, z5\.d -+** mov z24\.d, z0\.d -+** ret -+*/ -+TEST_SET (set2_bf16_z24_0, svbfloat16x2_t, svbfloat16_t, -+ z24 = svset2_bf16 (z4, 0, z0), -+ z24 = svset2 (z4, 0, z0)) -+ -+/* -+** set2_bf16_z24_1: -+** mov z24\.d, z4\.d -+** mov z25\.d, z0\.d -+** ret -+*/ -+TEST_SET (set2_bf16_z24_1, svbfloat16x2_t, svbfloat16_t, -+ z24 = svset2_bf16 (z4, 1, z0), -+ z24 = svset2 (z4, 1, z0)) -+ -+/* -+** set2_bf16_z4_0: -+** mov z4\.d, z0\.d -+** ret -+*/ -+TEST_SET (set2_bf16_z4_0, svbfloat16x2_t, svbfloat16_t, -+ z4 = svset2_bf16 (z4, 0, z0), -+ z4 = svset2 (z4, 0, z0)) -+ -+/* -+** set2_bf16_z4_1: -+** mov z5\.d, z0\.d -+** ret -+*/ -+TEST_SET (set2_bf16_z4_1, svbfloat16x2_t, svbfloat16_t, -+ z4 = svset2_bf16 (z4, 1, z0), -+ z4 = svset2 (z4, 1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_f16.c -new file mode 100644 -index 000000000..859600698 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_f16.c -@@ -0,0 +1,41 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** set2_f16_z24_0: -+** mov z25\.d, z5\.d -+** mov z24\.d, z0\.d -+** ret -+*/ -+TEST_SET (set2_f16_z24_0, svfloat16x2_t, svfloat16_t, -+ z24 = svset2_f16 (z4, 0, z0), -+ z24 = svset2 (z4, 0, z0)) -+ -+/* -+** set2_f16_z24_1: -+** mov z24\.d, z4\.d -+** mov z25\.d, z0\.d -+** ret -+*/ -+TEST_SET (set2_f16_z24_1, svfloat16x2_t, svfloat16_t, -+ z24 = svset2_f16 (z4, 1, z0), -+ z24 = svset2 (z4, 1, z0)) -+ -+/* -+** set2_f16_z4_0: -+** mov z4\.d, z0\.d -+** ret -+*/ -+TEST_SET (set2_f16_z4_0, svfloat16x2_t, svfloat16_t, -+ z4 = svset2_f16 (z4, 0, z0), -+ z4 = svset2 (z4, 0, z0)) -+ -+/* -+** set2_f16_z4_1: -+** mov z5\.d, z0\.d -+** ret -+*/ -+TEST_SET (set2_f16_z4_1, svfloat16x2_t, svfloat16_t, -+ z4 = svset2_f16 (z4, 1, z0), -+ z4 = svset2 (z4, 1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_f32.c -new file mode 100644 -index 000000000..a95ff2fc5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_f32.c -@@ -0,0 +1,41 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** set2_f32_z24_0: -+** mov z25\.d, z5\.d -+** mov z24\.d, z0\.d -+** ret -+*/ -+TEST_SET (set2_f32_z24_0, svfloat32x2_t, svfloat32_t, -+ z24 = svset2_f32 (z4, 0, z0), -+ z24 = svset2 (z4, 0, z0)) -+ -+/* -+** set2_f32_z24_1: -+** mov z24\.d, z4\.d -+** mov z25\.d, z0\.d -+** ret -+*/ -+TEST_SET (set2_f32_z24_1, svfloat32x2_t, svfloat32_t, -+ z24 = svset2_f32 (z4, 1, z0), -+ z24 = svset2 (z4, 1, z0)) -+ -+/* -+** set2_f32_z4_0: -+** mov z4\.d, z0\.d -+** ret -+*/ -+TEST_SET (set2_f32_z4_0, svfloat32x2_t, svfloat32_t, -+ z4 = svset2_f32 (z4, 0, z0), -+ z4 = svset2 (z4, 0, z0)) -+ -+/* -+** set2_f32_z4_1: -+** mov z5\.d, z0\.d -+** ret -+*/ -+TEST_SET (set2_f32_z4_1, svfloat32x2_t, svfloat32_t, -+ z4 = svset2_f32 (z4, 1, z0), -+ z4 = svset2 (z4, 1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_f64.c -new file mode 100644 -index 000000000..77837b7d8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_f64.c -@@ -0,0 +1,41 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** set2_f64_z24_0: -+** mov z25\.d, z5\.d -+** mov z24\.d, z0\.d -+** ret -+*/ -+TEST_SET (set2_f64_z24_0, svfloat64x2_t, svfloat64_t, -+ z24 = svset2_f64 (z4, 0, z0), -+ z24 = svset2 (z4, 0, z0)) -+ -+/* -+** set2_f64_z24_1: -+** mov z24\.d, z4\.d -+** mov z25\.d, z0\.d -+** ret -+*/ -+TEST_SET (set2_f64_z24_1, svfloat64x2_t, svfloat64_t, -+ z24 = svset2_f64 (z4, 1, z0), -+ z24 = svset2 (z4, 1, z0)) -+ -+/* -+** set2_f64_z4_0: -+** mov z4\.d, z0\.d -+** ret -+*/ -+TEST_SET (set2_f64_z4_0, svfloat64x2_t, svfloat64_t, -+ z4 = svset2_f64 (z4, 0, z0), -+ z4 = svset2 (z4, 0, z0)) -+ -+/* -+** set2_f64_z4_1: -+** mov z5\.d, z0\.d -+** ret -+*/ -+TEST_SET (set2_f64_z4_1, svfloat64x2_t, svfloat64_t, -+ z4 = svset2_f64 (z4, 1, z0), -+ z4 = svset2 (z4, 1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_s16.c -new file mode 100644 -index 000000000..aa2e70fd1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_s16.c -@@ -0,0 +1,41 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** set2_s16_z24_0: -+** mov z25\.d, z5\.d -+** mov z24\.d, z0\.d -+** ret -+*/ -+TEST_SET (set2_s16_z24_0, svint16x2_t, svint16_t, -+ z24 = svset2_s16 (z4, 0, z0), -+ z24 = svset2 (z4, 0, z0)) -+ -+/* -+** set2_s16_z24_1: -+** mov z24\.d, z4\.d -+** mov z25\.d, z0\.d -+** ret -+*/ -+TEST_SET (set2_s16_z24_1, svint16x2_t, svint16_t, -+ z24 = svset2_s16 (z4, 1, z0), -+ z24 = svset2 (z4, 1, z0)) -+ -+/* -+** set2_s16_z4_0: -+** mov z4\.d, z0\.d -+** ret -+*/ -+TEST_SET (set2_s16_z4_0, svint16x2_t, svint16_t, -+ z4 = svset2_s16 (z4, 0, z0), -+ z4 = svset2 (z4, 0, z0)) -+ -+/* -+** set2_s16_z4_1: -+** mov z5\.d, z0\.d -+** ret -+*/ -+TEST_SET (set2_s16_z4_1, svint16x2_t, svint16_t, -+ z4 = svset2_s16 (z4, 1, z0), -+ z4 = svset2 (z4, 1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_s32.c -new file mode 100644 -index 000000000..3a7c289aa ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_s32.c -@@ -0,0 +1,41 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** set2_s32_z24_0: -+** mov z25\.d, z5\.d -+** mov z24\.d, z0\.d -+** ret -+*/ -+TEST_SET (set2_s32_z24_0, svint32x2_t, svint32_t, -+ z24 = svset2_s32 (z4, 0, z0), -+ z24 = svset2 (z4, 0, z0)) -+ -+/* -+** set2_s32_z24_1: -+** mov z24\.d, z4\.d -+** mov z25\.d, z0\.d -+** ret -+*/ -+TEST_SET (set2_s32_z24_1, svint32x2_t, svint32_t, -+ z24 = svset2_s32 (z4, 1, z0), -+ z24 = svset2 (z4, 1, z0)) -+ -+/* -+** set2_s32_z4_0: -+** mov z4\.d, z0\.d -+** ret -+*/ -+TEST_SET (set2_s32_z4_0, svint32x2_t, svint32_t, -+ z4 = svset2_s32 (z4, 0, z0), -+ z4 = svset2 (z4, 0, z0)) -+ -+/* -+** set2_s32_z4_1: -+** mov z5\.d, z0\.d -+** ret -+*/ -+TEST_SET (set2_s32_z4_1, svint32x2_t, svint32_t, -+ z4 = svset2_s32 (z4, 1, z0), -+ z4 = svset2 (z4, 1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_s64.c -new file mode 100644 -index 000000000..ca6df54d9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_s64.c -@@ -0,0 +1,41 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** set2_s64_z24_0: -+** mov z25\.d, z5\.d -+** mov z24\.d, z0\.d -+** ret -+*/ -+TEST_SET (set2_s64_z24_0, svint64x2_t, svint64_t, -+ z24 = svset2_s64 (z4, 0, z0), -+ z24 = svset2 (z4, 0, z0)) -+ -+/* -+** set2_s64_z24_1: -+** mov z24\.d, z4\.d -+** mov z25\.d, z0\.d -+** ret -+*/ -+TEST_SET (set2_s64_z24_1, svint64x2_t, svint64_t, -+ z24 = svset2_s64 (z4, 1, z0), -+ z24 = svset2 (z4, 1, z0)) -+ -+/* -+** set2_s64_z4_0: -+** mov z4\.d, z0\.d -+** ret -+*/ -+TEST_SET (set2_s64_z4_0, svint64x2_t, svint64_t, -+ z4 = svset2_s64 (z4, 0, z0), -+ z4 = svset2 (z4, 0, z0)) -+ -+/* -+** set2_s64_z4_1: -+** mov z5\.d, z0\.d -+** ret -+*/ -+TEST_SET (set2_s64_z4_1, svint64x2_t, svint64_t, -+ z4 = svset2_s64 (z4, 1, z0), -+ z4 = svset2 (z4, 1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_s8.c -new file mode 100644 -index 000000000..e143128a4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_s8.c -@@ -0,0 +1,41 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** set2_s8_z24_0: -+** mov z25\.d, z5\.d -+** mov z24\.d, z0\.d -+** ret -+*/ -+TEST_SET (set2_s8_z24_0, svint8x2_t, svint8_t, -+ z24 = svset2_s8 (z4, 0, z0), -+ z24 = svset2 (z4, 0, z0)) -+ -+/* -+** set2_s8_z24_1: -+** mov z24\.d, z4\.d -+** mov z25\.d, z0\.d -+** ret -+*/ -+TEST_SET (set2_s8_z24_1, svint8x2_t, svint8_t, -+ z24 = svset2_s8 (z4, 1, z0), -+ z24 = svset2 (z4, 1, z0)) -+ -+/* -+** set2_s8_z4_0: -+** mov z4\.d, z0\.d -+** ret -+*/ -+TEST_SET (set2_s8_z4_0, svint8x2_t, svint8_t, -+ z4 = svset2_s8 (z4, 0, z0), -+ z4 = svset2 (z4, 0, z0)) -+ -+/* -+** set2_s8_z4_1: -+** mov z5\.d, z0\.d -+** ret -+*/ -+TEST_SET (set2_s8_z4_1, svint8x2_t, svint8_t, -+ z4 = svset2_s8 (z4, 1, z0), -+ z4 = svset2 (z4, 1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_u16.c -new file mode 100644 -index 000000000..53da08398 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_u16.c -@@ -0,0 +1,41 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** set2_u16_z24_0: -+** mov z25\.d, z5\.d -+** mov z24\.d, z0\.d -+** ret -+*/ -+TEST_SET (set2_u16_z24_0, svuint16x2_t, svuint16_t, -+ z24 = svset2_u16 (z4, 0, z0), -+ z24 = svset2 (z4, 0, z0)) -+ -+/* -+** set2_u16_z24_1: -+** mov z24\.d, z4\.d -+** mov z25\.d, z0\.d -+** ret -+*/ -+TEST_SET (set2_u16_z24_1, svuint16x2_t, svuint16_t, -+ z24 = svset2_u16 (z4, 1, z0), -+ z24 = svset2 (z4, 1, z0)) -+ -+/* -+** set2_u16_z4_0: -+** mov z4\.d, z0\.d -+** ret -+*/ -+TEST_SET (set2_u16_z4_0, svuint16x2_t, svuint16_t, -+ z4 = svset2_u16 (z4, 0, z0), -+ z4 = svset2 (z4, 0, z0)) -+ -+/* -+** set2_u16_z4_1: -+** mov z5\.d, z0\.d -+** ret -+*/ -+TEST_SET (set2_u16_z4_1, svuint16x2_t, svuint16_t, -+ z4 = svset2_u16 (z4, 1, z0), -+ z4 = svset2 (z4, 1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_u32.c -new file mode 100644 -index 000000000..5266a62d8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_u32.c -@@ -0,0 +1,41 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** set2_u32_z24_0: -+** mov z25\.d, z5\.d -+** mov z24\.d, z0\.d -+** ret -+*/ -+TEST_SET (set2_u32_z24_0, svuint32x2_t, svuint32_t, -+ z24 = svset2_u32 (z4, 0, z0), -+ z24 = svset2 (z4, 0, z0)) -+ -+/* -+** set2_u32_z24_1: -+** mov z24\.d, z4\.d -+** mov z25\.d, z0\.d -+** ret -+*/ -+TEST_SET (set2_u32_z24_1, svuint32x2_t, svuint32_t, -+ z24 = svset2_u32 (z4, 1, z0), -+ z24 = svset2 (z4, 1, z0)) -+ -+/* -+** set2_u32_z4_0: -+** mov z4\.d, z0\.d -+** ret -+*/ -+TEST_SET (set2_u32_z4_0, svuint32x2_t, svuint32_t, -+ z4 = svset2_u32 (z4, 0, z0), -+ z4 = svset2 (z4, 0, z0)) -+ -+/* -+** set2_u32_z4_1: -+** mov z5\.d, z0\.d -+** ret -+*/ -+TEST_SET (set2_u32_z4_1, svuint32x2_t, svuint32_t, -+ z4 = svset2_u32 (z4, 1, z0), -+ z4 = svset2 (z4, 1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_u64.c -new file mode 100644 -index 000000000..f7d2a1807 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_u64.c -@@ -0,0 +1,41 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** set2_u64_z24_0: -+** mov z25\.d, z5\.d -+** mov z24\.d, z0\.d -+** ret -+*/ -+TEST_SET (set2_u64_z24_0, svuint64x2_t, svuint64_t, -+ z24 = svset2_u64 (z4, 0, z0), -+ z24 = svset2 (z4, 0, z0)) -+ -+/* -+** set2_u64_z24_1: -+** mov z24\.d, z4\.d -+** mov z25\.d, z0\.d -+** ret -+*/ -+TEST_SET (set2_u64_z24_1, svuint64x2_t, svuint64_t, -+ z24 = svset2_u64 (z4, 1, z0), -+ z24 = svset2 (z4, 1, z0)) -+ -+/* -+** set2_u64_z4_0: -+** mov z4\.d, z0\.d -+** ret -+*/ -+TEST_SET (set2_u64_z4_0, svuint64x2_t, svuint64_t, -+ z4 = svset2_u64 (z4, 0, z0), -+ z4 = svset2 (z4, 0, z0)) -+ -+/* -+** set2_u64_z4_1: -+** mov z5\.d, z0\.d -+** ret -+*/ -+TEST_SET (set2_u64_z4_1, svuint64x2_t, svuint64_t, -+ z4 = svset2_u64 (z4, 1, z0), -+ z4 = svset2 (z4, 1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_u8.c -new file mode 100644 -index 000000000..9494a0e54 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set2_u8.c -@@ -0,0 +1,41 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** set2_u8_z24_0: -+** mov z25\.d, z5\.d -+** mov z24\.d, z0\.d -+** ret -+*/ -+TEST_SET (set2_u8_z24_0, svuint8x2_t, svuint8_t, -+ z24 = svset2_u8 (z4, 0, z0), -+ z24 = svset2 (z4, 0, z0)) -+ -+/* -+** set2_u8_z24_1: -+** mov z24\.d, z4\.d -+** mov z25\.d, z0\.d -+** ret -+*/ -+TEST_SET (set2_u8_z24_1, svuint8x2_t, svuint8_t, -+ z24 = svset2_u8 (z4, 1, z0), -+ z24 = svset2 (z4, 1, z0)) -+ -+/* -+** set2_u8_z4_0: -+** mov z4\.d, z0\.d -+** ret -+*/ -+TEST_SET (set2_u8_z4_0, svuint8x2_t, svuint8_t, -+ z4 = svset2_u8 (z4, 0, z0), -+ z4 = svset2 (z4, 0, z0)) -+ -+/* -+** set2_u8_z4_1: -+** mov z5\.d, z0\.d -+** ret -+*/ -+TEST_SET (set2_u8_z4_1, svuint8x2_t, svuint8_t, -+ z4 = svset2_u8 (z4, 1, z0), -+ z4 = svset2 (z4, 1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_bf16.c -new file mode 100644 -index 000000000..4e0707d09 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_bf16.c -@@ -0,0 +1,63 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** set3_bf16_z24_0: -+** mov z25\.d, z5\.d -+** mov z26\.d, z6\.d -+** mov z24\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_bf16_z24_0, svbfloat16x3_t, svbfloat16_t, -+ z24 = svset3_bf16 (z4, 0, z0), -+ z24 = svset3 (z4, 0, z0)) -+ -+/* -+** set3_bf16_z24_1: -+** mov z24\.d, z4\.d -+** mov z26\.d, z6\.d -+** mov z25\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_bf16_z24_1, svbfloat16x3_t, svbfloat16_t, -+ z24 = svset3_bf16 (z4, 1, z0), -+ z24 = svset3 (z4, 1, z0)) -+ -+/* -+** set3_bf16_z24_2: -+** mov z24\.d, z4\.d -+** mov z25\.d, z5\.d -+** mov z26\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_bf16_z24_2, svbfloat16x3_t, svbfloat16_t, -+ z24 = svset3_bf16 (z4, 2, z0), -+ z24 = svset3 (z4, 2, z0)) -+ -+/* -+** set3_bf16_z4_0: -+** mov z4\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_bf16_z4_0, svbfloat16x3_t, svbfloat16_t, -+ z4 = svset3_bf16 (z4, 0, z0), -+ z4 = svset3 (z4, 0, z0)) -+ -+/* -+** set3_bf16_z4_1: -+** mov z5\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_bf16_z4_1, svbfloat16x3_t, svbfloat16_t, -+ z4 = svset3_bf16 (z4, 1, z0), -+ z4 = svset3 (z4, 1, z0)) -+ -+/* -+** set3_bf16_z4_2: -+** mov z6\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_bf16_z4_2, svbfloat16x3_t, svbfloat16_t, -+ z4 = svset3_bf16 (z4, 2, z0), -+ z4 = svset3 (z4, 2, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_f16.c -new file mode 100644 -index 000000000..b6bb3a2bf ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_f16.c -@@ -0,0 +1,63 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** set3_f16_z24_0: -+** mov z25\.d, z5\.d -+** mov z26\.d, z6\.d -+** mov z24\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_f16_z24_0, svfloat16x3_t, svfloat16_t, -+ z24 = svset3_f16 (z4, 0, z0), -+ z24 = svset3 (z4, 0, z0)) -+ -+/* -+** set3_f16_z24_1: -+** mov z24\.d, z4\.d -+** mov z26\.d, z6\.d -+** mov z25\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_f16_z24_1, svfloat16x3_t, svfloat16_t, -+ z24 = svset3_f16 (z4, 1, z0), -+ z24 = svset3 (z4, 1, z0)) -+ -+/* -+** set3_f16_z24_2: -+** mov z24\.d, z4\.d -+** mov z25\.d, z5\.d -+** mov z26\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_f16_z24_2, svfloat16x3_t, svfloat16_t, -+ z24 = svset3_f16 (z4, 2, z0), -+ z24 = svset3 (z4, 2, z0)) -+ -+/* -+** set3_f16_z4_0: -+** mov z4\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_f16_z4_0, svfloat16x3_t, svfloat16_t, -+ z4 = svset3_f16 (z4, 0, z0), -+ z4 = svset3 (z4, 0, z0)) -+ -+/* -+** set3_f16_z4_1: -+** mov z5\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_f16_z4_1, svfloat16x3_t, svfloat16_t, -+ z4 = svset3_f16 (z4, 1, z0), -+ z4 = svset3 (z4, 1, z0)) -+ -+/* -+** set3_f16_z4_2: -+** mov z6\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_f16_z4_2, svfloat16x3_t, svfloat16_t, -+ z4 = svset3_f16 (z4, 2, z0), -+ z4 = svset3 (z4, 2, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_f32.c -new file mode 100644 -index 000000000..659bc713f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_f32.c -@@ -0,0 +1,63 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** set3_f32_z24_0: -+** mov z25\.d, z5\.d -+** mov z26\.d, z6\.d -+** mov z24\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_f32_z24_0, svfloat32x3_t, svfloat32_t, -+ z24 = svset3_f32 (z4, 0, z0), -+ z24 = svset3 (z4, 0, z0)) -+ -+/* -+** set3_f32_z24_1: -+** mov z24\.d, z4\.d -+** mov z26\.d, z6\.d -+** mov z25\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_f32_z24_1, svfloat32x3_t, svfloat32_t, -+ z24 = svset3_f32 (z4, 1, z0), -+ z24 = svset3 (z4, 1, z0)) -+ -+/* -+** set3_f32_z24_2: -+** mov z24\.d, z4\.d -+** mov z25\.d, z5\.d -+** mov z26\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_f32_z24_2, svfloat32x3_t, svfloat32_t, -+ z24 = svset3_f32 (z4, 2, z0), -+ z24 = svset3 (z4, 2, z0)) -+ -+/* -+** set3_f32_z4_0: -+** mov z4\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_f32_z4_0, svfloat32x3_t, svfloat32_t, -+ z4 = svset3_f32 (z4, 0, z0), -+ z4 = svset3 (z4, 0, z0)) -+ -+/* -+** set3_f32_z4_1: -+** mov z5\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_f32_z4_1, svfloat32x3_t, svfloat32_t, -+ z4 = svset3_f32 (z4, 1, z0), -+ z4 = svset3 (z4, 1, z0)) -+ -+/* -+** set3_f32_z4_2: -+** mov z6\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_f32_z4_2, svfloat32x3_t, svfloat32_t, -+ z4 = svset3_f32 (z4, 2, z0), -+ z4 = svset3 (z4, 2, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_f64.c -new file mode 100644 -index 000000000..2cf3b6015 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_f64.c -@@ -0,0 +1,63 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** set3_f64_z24_0: -+** mov z25\.d, z5\.d -+** mov z26\.d, z6\.d -+** mov z24\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_f64_z24_0, svfloat64x3_t, svfloat64_t, -+ z24 = svset3_f64 (z4, 0, z0), -+ z24 = svset3 (z4, 0, z0)) -+ -+/* -+** set3_f64_z24_1: -+** mov z24\.d, z4\.d -+** mov z26\.d, z6\.d -+** mov z25\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_f64_z24_1, svfloat64x3_t, svfloat64_t, -+ z24 = svset3_f64 (z4, 1, z0), -+ z24 = svset3 (z4, 1, z0)) -+ -+/* -+** set3_f64_z24_2: -+** mov z24\.d, z4\.d -+** mov z25\.d, z5\.d -+** mov z26\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_f64_z24_2, svfloat64x3_t, svfloat64_t, -+ z24 = svset3_f64 (z4, 2, z0), -+ z24 = svset3 (z4, 2, z0)) -+ -+/* -+** set3_f64_z4_0: -+** mov z4\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_f64_z4_0, svfloat64x3_t, svfloat64_t, -+ z4 = svset3_f64 (z4, 0, z0), -+ z4 = svset3 (z4, 0, z0)) -+ -+/* -+** set3_f64_z4_1: -+** mov z5\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_f64_z4_1, svfloat64x3_t, svfloat64_t, -+ z4 = svset3_f64 (z4, 1, z0), -+ z4 = svset3 (z4, 1, z0)) -+ -+/* -+** set3_f64_z4_2: -+** mov z6\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_f64_z4_2, svfloat64x3_t, svfloat64_t, -+ z4 = svset3_f64 (z4, 2, z0), -+ z4 = svset3 (z4, 2, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_s16.c -new file mode 100644 -index 000000000..907ae9894 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_s16.c -@@ -0,0 +1,63 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** set3_s16_z24_0: -+** mov z25\.d, z5\.d -+** mov z26\.d, z6\.d -+** mov z24\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_s16_z24_0, svint16x3_t, svint16_t, -+ z24 = svset3_s16 (z4, 0, z0), -+ z24 = svset3 (z4, 0, z0)) -+ -+/* -+** set3_s16_z24_1: -+** mov z24\.d, z4\.d -+** mov z26\.d, z6\.d -+** mov z25\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_s16_z24_1, svint16x3_t, svint16_t, -+ z24 = svset3_s16 (z4, 1, z0), -+ z24 = svset3 (z4, 1, z0)) -+ -+/* -+** set3_s16_z24_2: -+** mov z24\.d, z4\.d -+** mov z25\.d, z5\.d -+** mov z26\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_s16_z24_2, svint16x3_t, svint16_t, -+ z24 = svset3_s16 (z4, 2, z0), -+ z24 = svset3 (z4, 2, z0)) -+ -+/* -+** set3_s16_z4_0: -+** mov z4\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_s16_z4_0, svint16x3_t, svint16_t, -+ z4 = svset3_s16 (z4, 0, z0), -+ z4 = svset3 (z4, 0, z0)) -+ -+/* -+** set3_s16_z4_1: -+** mov z5\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_s16_z4_1, svint16x3_t, svint16_t, -+ z4 = svset3_s16 (z4, 1, z0), -+ z4 = svset3 (z4, 1, z0)) -+ -+/* -+** set3_s16_z4_2: -+** mov z6\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_s16_z4_2, svint16x3_t, svint16_t, -+ z4 = svset3_s16 (z4, 2, z0), -+ z4 = svset3 (z4, 2, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_s32.c -new file mode 100644 -index 000000000..0baa33c3a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_s32.c -@@ -0,0 +1,63 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** set3_s32_z24_0: -+** mov z25\.d, z5\.d -+** mov z26\.d, z6\.d -+** mov z24\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_s32_z24_0, svint32x3_t, svint32_t, -+ z24 = svset3_s32 (z4, 0, z0), -+ z24 = svset3 (z4, 0, z0)) -+ -+/* -+** set3_s32_z24_1: -+** mov z24\.d, z4\.d -+** mov z26\.d, z6\.d -+** mov z25\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_s32_z24_1, svint32x3_t, svint32_t, -+ z24 = svset3_s32 (z4, 1, z0), -+ z24 = svset3 (z4, 1, z0)) -+ -+/* -+** set3_s32_z24_2: -+** mov z24\.d, z4\.d -+** mov z25\.d, z5\.d -+** mov z26\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_s32_z24_2, svint32x3_t, svint32_t, -+ z24 = svset3_s32 (z4, 2, z0), -+ z24 = svset3 (z4, 2, z0)) -+ -+/* -+** set3_s32_z4_0: -+** mov z4\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_s32_z4_0, svint32x3_t, svint32_t, -+ z4 = svset3_s32 (z4, 0, z0), -+ z4 = svset3 (z4, 0, z0)) -+ -+/* -+** set3_s32_z4_1: -+** mov z5\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_s32_z4_1, svint32x3_t, svint32_t, -+ z4 = svset3_s32 (z4, 1, z0), -+ z4 = svset3 (z4, 1, z0)) -+ -+/* -+** set3_s32_z4_2: -+** mov z6\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_s32_z4_2, svint32x3_t, svint32_t, -+ z4 = svset3_s32 (z4, 2, z0), -+ z4 = svset3 (z4, 2, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_s64.c -new file mode 100644 -index 000000000..d1d142c71 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_s64.c -@@ -0,0 +1,63 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** set3_s64_z24_0: -+** mov z25\.d, z5\.d -+** mov z26\.d, z6\.d -+** mov z24\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_s64_z24_0, svint64x3_t, svint64_t, -+ z24 = svset3_s64 (z4, 0, z0), -+ z24 = svset3 (z4, 0, z0)) -+ -+/* -+** set3_s64_z24_1: -+** mov z24\.d, z4\.d -+** mov z26\.d, z6\.d -+** mov z25\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_s64_z24_1, svint64x3_t, svint64_t, -+ z24 = svset3_s64 (z4, 1, z0), -+ z24 = svset3 (z4, 1, z0)) -+ -+/* -+** set3_s64_z24_2: -+** mov z24\.d, z4\.d -+** mov z25\.d, z5\.d -+** mov z26\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_s64_z24_2, svint64x3_t, svint64_t, -+ z24 = svset3_s64 (z4, 2, z0), -+ z24 = svset3 (z4, 2, z0)) -+ -+/* -+** set3_s64_z4_0: -+** mov z4\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_s64_z4_0, svint64x3_t, svint64_t, -+ z4 = svset3_s64 (z4, 0, z0), -+ z4 = svset3 (z4, 0, z0)) -+ -+/* -+** set3_s64_z4_1: -+** mov z5\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_s64_z4_1, svint64x3_t, svint64_t, -+ z4 = svset3_s64 (z4, 1, z0), -+ z4 = svset3 (z4, 1, z0)) -+ -+/* -+** set3_s64_z4_2: -+** mov z6\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_s64_z4_2, svint64x3_t, svint64_t, -+ z4 = svset3_s64 (z4, 2, z0), -+ z4 = svset3 (z4, 2, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_s8.c -new file mode 100644 -index 000000000..8badf4b1d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_s8.c -@@ -0,0 +1,63 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** set3_s8_z24_0: -+** mov z25\.d, z5\.d -+** mov z26\.d, z6\.d -+** mov z24\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_s8_z24_0, svint8x3_t, svint8_t, -+ z24 = svset3_s8 (z4, 0, z0), -+ z24 = svset3 (z4, 0, z0)) -+ -+/* -+** set3_s8_z24_1: -+** mov z24\.d, z4\.d -+** mov z26\.d, z6\.d -+** mov z25\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_s8_z24_1, svint8x3_t, svint8_t, -+ z24 = svset3_s8 (z4, 1, z0), -+ z24 = svset3 (z4, 1, z0)) -+ -+/* -+** set3_s8_z24_2: -+** mov z24\.d, z4\.d -+** mov z25\.d, z5\.d -+** mov z26\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_s8_z24_2, svint8x3_t, svint8_t, -+ z24 = svset3_s8 (z4, 2, z0), -+ z24 = svset3 (z4, 2, z0)) -+ -+/* -+** set3_s8_z4_0: -+** mov z4\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_s8_z4_0, svint8x3_t, svint8_t, -+ z4 = svset3_s8 (z4, 0, z0), -+ z4 = svset3 (z4, 0, z0)) -+ -+/* -+** set3_s8_z4_1: -+** mov z5\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_s8_z4_1, svint8x3_t, svint8_t, -+ z4 = svset3_s8 (z4, 1, z0), -+ z4 = svset3 (z4, 1, z0)) -+ -+/* -+** set3_s8_z4_2: -+** mov z6\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_s8_z4_2, svint8x3_t, svint8_t, -+ z4 = svset3_s8 (z4, 2, z0), -+ z4 = svset3 (z4, 2, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_u16.c -new file mode 100644 -index 000000000..df7ce88d8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_u16.c -@@ -0,0 +1,63 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** set3_u16_z24_0: -+** mov z25\.d, z5\.d -+** mov z26\.d, z6\.d -+** mov z24\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_u16_z24_0, svuint16x3_t, svuint16_t, -+ z24 = svset3_u16 (z4, 0, z0), -+ z24 = svset3 (z4, 0, z0)) -+ -+/* -+** set3_u16_z24_1: -+** mov z24\.d, z4\.d -+** mov z26\.d, z6\.d -+** mov z25\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_u16_z24_1, svuint16x3_t, svuint16_t, -+ z24 = svset3_u16 (z4, 1, z0), -+ z24 = svset3 (z4, 1, z0)) -+ -+/* -+** set3_u16_z24_2: -+** mov z24\.d, z4\.d -+** mov z25\.d, z5\.d -+** mov z26\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_u16_z24_2, svuint16x3_t, svuint16_t, -+ z24 = svset3_u16 (z4, 2, z0), -+ z24 = svset3 (z4, 2, z0)) -+ -+/* -+** set3_u16_z4_0: -+** mov z4\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_u16_z4_0, svuint16x3_t, svuint16_t, -+ z4 = svset3_u16 (z4, 0, z0), -+ z4 = svset3 (z4, 0, z0)) -+ -+/* -+** set3_u16_z4_1: -+** mov z5\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_u16_z4_1, svuint16x3_t, svuint16_t, -+ z4 = svset3_u16 (z4, 1, z0), -+ z4 = svset3 (z4, 1, z0)) -+ -+/* -+** set3_u16_z4_2: -+** mov z6\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_u16_z4_2, svuint16x3_t, svuint16_t, -+ z4 = svset3_u16 (z4, 2, z0), -+ z4 = svset3 (z4, 2, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_u32.c -new file mode 100644 -index 000000000..703a68f5c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_u32.c -@@ -0,0 +1,63 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** set3_u32_z24_0: -+** mov z25\.d, z5\.d -+** mov z26\.d, z6\.d -+** mov z24\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_u32_z24_0, svuint32x3_t, svuint32_t, -+ z24 = svset3_u32 (z4, 0, z0), -+ z24 = svset3 (z4, 0, z0)) -+ -+/* -+** set3_u32_z24_1: -+** mov z24\.d, z4\.d -+** mov z26\.d, z6\.d -+** mov z25\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_u32_z24_1, svuint32x3_t, svuint32_t, -+ z24 = svset3_u32 (z4, 1, z0), -+ z24 = svset3 (z4, 1, z0)) -+ -+/* -+** set3_u32_z24_2: -+** mov z24\.d, z4\.d -+** mov z25\.d, z5\.d -+** mov z26\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_u32_z24_2, svuint32x3_t, svuint32_t, -+ z24 = svset3_u32 (z4, 2, z0), -+ z24 = svset3 (z4, 2, z0)) -+ -+/* -+** set3_u32_z4_0: -+** mov z4\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_u32_z4_0, svuint32x3_t, svuint32_t, -+ z4 = svset3_u32 (z4, 0, z0), -+ z4 = svset3 (z4, 0, z0)) -+ -+/* -+** set3_u32_z4_1: -+** mov z5\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_u32_z4_1, svuint32x3_t, svuint32_t, -+ z4 = svset3_u32 (z4, 1, z0), -+ z4 = svset3 (z4, 1, z0)) -+ -+/* -+** set3_u32_z4_2: -+** mov z6\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_u32_z4_2, svuint32x3_t, svuint32_t, -+ z4 = svset3_u32 (z4, 2, z0), -+ z4 = svset3 (z4, 2, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_u64.c -new file mode 100644 -index 000000000..bff5b3539 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_u64.c -@@ -0,0 +1,63 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** set3_u64_z24_0: -+** mov z25\.d, z5\.d -+** mov z26\.d, z6\.d -+** mov z24\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_u64_z24_0, svuint64x3_t, svuint64_t, -+ z24 = svset3_u64 (z4, 0, z0), -+ z24 = svset3 (z4, 0, z0)) -+ -+/* -+** set3_u64_z24_1: -+** mov z24\.d, z4\.d -+** mov z26\.d, z6\.d -+** mov z25\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_u64_z24_1, svuint64x3_t, svuint64_t, -+ z24 = svset3_u64 (z4, 1, z0), -+ z24 = svset3 (z4, 1, z0)) -+ -+/* -+** set3_u64_z24_2: -+** mov z24\.d, z4\.d -+** mov z25\.d, z5\.d -+** mov z26\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_u64_z24_2, svuint64x3_t, svuint64_t, -+ z24 = svset3_u64 (z4, 2, z0), -+ z24 = svset3 (z4, 2, z0)) -+ -+/* -+** set3_u64_z4_0: -+** mov z4\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_u64_z4_0, svuint64x3_t, svuint64_t, -+ z4 = svset3_u64 (z4, 0, z0), -+ z4 = svset3 (z4, 0, z0)) -+ -+/* -+** set3_u64_z4_1: -+** mov z5\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_u64_z4_1, svuint64x3_t, svuint64_t, -+ z4 = svset3_u64 (z4, 1, z0), -+ z4 = svset3 (z4, 1, z0)) -+ -+/* -+** set3_u64_z4_2: -+** mov z6\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_u64_z4_2, svuint64x3_t, svuint64_t, -+ z4 = svset3_u64 (z4, 2, z0), -+ z4 = svset3 (z4, 2, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_u8.c -new file mode 100644 -index 000000000..9f40001c4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set3_u8.c -@@ -0,0 +1,63 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** set3_u8_z24_0: -+** mov z25\.d, z5\.d -+** mov z26\.d, z6\.d -+** mov z24\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_u8_z24_0, svuint8x3_t, svuint8_t, -+ z24 = svset3_u8 (z4, 0, z0), -+ z24 = svset3 (z4, 0, z0)) -+ -+/* -+** set3_u8_z24_1: -+** mov z24\.d, z4\.d -+** mov z26\.d, z6\.d -+** mov z25\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_u8_z24_1, svuint8x3_t, svuint8_t, -+ z24 = svset3_u8 (z4, 1, z0), -+ z24 = svset3 (z4, 1, z0)) -+ -+/* -+** set3_u8_z24_2: -+** mov z24\.d, z4\.d -+** mov z25\.d, z5\.d -+** mov z26\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_u8_z24_2, svuint8x3_t, svuint8_t, -+ z24 = svset3_u8 (z4, 2, z0), -+ z24 = svset3 (z4, 2, z0)) -+ -+/* -+** set3_u8_z4_0: -+** mov z4\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_u8_z4_0, svuint8x3_t, svuint8_t, -+ z4 = svset3_u8 (z4, 0, z0), -+ z4 = svset3 (z4, 0, z0)) -+ -+/* -+** set3_u8_z4_1: -+** mov z5\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_u8_z4_1, svuint8x3_t, svuint8_t, -+ z4 = svset3_u8 (z4, 1, z0), -+ z4 = svset3 (z4, 1, z0)) -+ -+/* -+** set3_u8_z4_2: -+** mov z6\.d, z0\.d -+** ret -+*/ -+TEST_SET (set3_u8_z4_2, svuint8x3_t, svuint8_t, -+ z4 = svset3_u8 (z4, 2, z0), -+ z4 = svset3 (z4, 2, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_bf16.c -new file mode 100644 -index 000000000..4e26c1117 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_bf16.c -@@ -0,0 +1,87 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** set4_bf16_z24_0: -+** mov z25\.d, z5\.d -+** mov z26\.d, z6\.d -+** mov z27\.d, z7\.d -+** mov z24\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_bf16_z24_0, svbfloat16x4_t, svbfloat16_t, -+ z24 = svset4_bf16 (z4, 0, z0), -+ z24 = svset4 (z4, 0, z0)) -+ -+/* -+** set4_bf16_z24_1: -+** mov z24\.d, z4\.d -+** mov z26\.d, z6\.d -+** mov z27\.d, z7\.d -+** mov z25\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_bf16_z24_1, svbfloat16x4_t, svbfloat16_t, -+ z24 = svset4_bf16 (z4, 1, z0), -+ z24 = svset4 (z4, 1, z0)) -+ -+/* -+** set4_bf16_z24_2: -+** mov z24\.d, z4\.d -+** mov z25\.d, z5\.d -+** mov z27\.d, z7\.d -+** mov z26\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_bf16_z24_2, svbfloat16x4_t, svbfloat16_t, -+ z24 = svset4_bf16 (z4, 2, z0), -+ z24 = svset4 (z4, 2, z0)) -+ -+/* -+** set4_bf16_z24_3: -+** mov z24\.d, z4\.d -+** mov z25\.d, z5\.d -+** mov z26\.d, z6\.d -+** mov z27\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_bf16_z24_3, svbfloat16x4_t, svbfloat16_t, -+ z24 = svset4_bf16 (z4, 3, z0), -+ z24 = svset4 (z4, 3, z0)) -+ -+/* -+** set4_bf16_z4_0: -+** mov z4\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_bf16_z4_0, svbfloat16x4_t, svbfloat16_t, -+ z4 = svset4_bf16 (z4, 0, z0), -+ z4 = svset4 (z4, 0, z0)) -+ -+/* -+** set4_bf16_z4_1: -+** mov z5\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_bf16_z4_1, svbfloat16x4_t, svbfloat16_t, -+ z4 = svset4_bf16 (z4, 1, z0), -+ z4 = svset4 (z4, 1, z0)) -+ -+/* -+** set4_bf16_z4_2: -+** mov z6\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_bf16_z4_2, svbfloat16x4_t, svbfloat16_t, -+ z4 = svset4_bf16 (z4, 2, z0), -+ z4 = svset4 (z4, 2, z0)) -+ -+/* -+** set4_bf16_z4_3: -+** mov z7\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_bf16_z4_3, svbfloat16x4_t, svbfloat16_t, -+ z4 = svset4_bf16 (z4, 3, z0), -+ z4 = svset4 (z4, 3, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_f16.c -new file mode 100644 -index 000000000..a28ff9ca6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_f16.c -@@ -0,0 +1,87 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** set4_f16_z24_0: -+** mov z25\.d, z5\.d -+** mov z26\.d, z6\.d -+** mov z27\.d, z7\.d -+** mov z24\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_f16_z24_0, svfloat16x4_t, svfloat16_t, -+ z24 = svset4_f16 (z4, 0, z0), -+ z24 = svset4 (z4, 0, z0)) -+ -+/* -+** set4_f16_z24_1: -+** mov z24\.d, z4\.d -+** mov z26\.d, z6\.d -+** mov z27\.d, z7\.d -+** mov z25\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_f16_z24_1, svfloat16x4_t, svfloat16_t, -+ z24 = svset4_f16 (z4, 1, z0), -+ z24 = svset4 (z4, 1, z0)) -+ -+/* -+** set4_f16_z24_2: -+** mov z24\.d, z4\.d -+** mov z25\.d, z5\.d -+** mov z27\.d, z7\.d -+** mov z26\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_f16_z24_2, svfloat16x4_t, svfloat16_t, -+ z24 = svset4_f16 (z4, 2, z0), -+ z24 = svset4 (z4, 2, z0)) -+ -+/* -+** set4_f16_z24_3: -+** mov z24\.d, z4\.d -+** mov z25\.d, z5\.d -+** mov z26\.d, z6\.d -+** mov z27\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_f16_z24_3, svfloat16x4_t, svfloat16_t, -+ z24 = svset4_f16 (z4, 3, z0), -+ z24 = svset4 (z4, 3, z0)) -+ -+/* -+** set4_f16_z4_0: -+** mov z4\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_f16_z4_0, svfloat16x4_t, svfloat16_t, -+ z4 = svset4_f16 (z4, 0, z0), -+ z4 = svset4 (z4, 0, z0)) -+ -+/* -+** set4_f16_z4_1: -+** mov z5\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_f16_z4_1, svfloat16x4_t, svfloat16_t, -+ z4 = svset4_f16 (z4, 1, z0), -+ z4 = svset4 (z4, 1, z0)) -+ -+/* -+** set4_f16_z4_2: -+** mov z6\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_f16_z4_2, svfloat16x4_t, svfloat16_t, -+ z4 = svset4_f16 (z4, 2, z0), -+ z4 = svset4 (z4, 2, z0)) -+ -+/* -+** set4_f16_z4_3: -+** mov z7\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_f16_z4_3, svfloat16x4_t, svfloat16_t, -+ z4 = svset4_f16 (z4, 3, z0), -+ z4 = svset4 (z4, 3, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_f32.c -new file mode 100644 -index 000000000..e6e3f5ebd ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_f32.c -@@ -0,0 +1,87 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** set4_f32_z24_0: -+** mov z25\.d, z5\.d -+** mov z26\.d, z6\.d -+** mov z27\.d, z7\.d -+** mov z24\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_f32_z24_0, svfloat32x4_t, svfloat32_t, -+ z24 = svset4_f32 (z4, 0, z0), -+ z24 = svset4 (z4, 0, z0)) -+ -+/* -+** set4_f32_z24_1: -+** mov z24\.d, z4\.d -+** mov z26\.d, z6\.d -+** mov z27\.d, z7\.d -+** mov z25\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_f32_z24_1, svfloat32x4_t, svfloat32_t, -+ z24 = svset4_f32 (z4, 1, z0), -+ z24 = svset4 (z4, 1, z0)) -+ -+/* -+** set4_f32_z24_2: -+** mov z24\.d, z4\.d -+** mov z25\.d, z5\.d -+** mov z27\.d, z7\.d -+** mov z26\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_f32_z24_2, svfloat32x4_t, svfloat32_t, -+ z24 = svset4_f32 (z4, 2, z0), -+ z24 = svset4 (z4, 2, z0)) -+ -+/* -+** set4_f32_z24_3: -+** mov z24\.d, z4\.d -+** mov z25\.d, z5\.d -+** mov z26\.d, z6\.d -+** mov z27\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_f32_z24_3, svfloat32x4_t, svfloat32_t, -+ z24 = svset4_f32 (z4, 3, z0), -+ z24 = svset4 (z4, 3, z0)) -+ -+/* -+** set4_f32_z4_0: -+** mov z4\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_f32_z4_0, svfloat32x4_t, svfloat32_t, -+ z4 = svset4_f32 (z4, 0, z0), -+ z4 = svset4 (z4, 0, z0)) -+ -+/* -+** set4_f32_z4_1: -+** mov z5\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_f32_z4_1, svfloat32x4_t, svfloat32_t, -+ z4 = svset4_f32 (z4, 1, z0), -+ z4 = svset4 (z4, 1, z0)) -+ -+/* -+** set4_f32_z4_2: -+** mov z6\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_f32_z4_2, svfloat32x4_t, svfloat32_t, -+ z4 = svset4_f32 (z4, 2, z0), -+ z4 = svset4 (z4, 2, z0)) -+ -+/* -+** set4_f32_z4_3: -+** mov z7\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_f32_z4_3, svfloat32x4_t, svfloat32_t, -+ z4 = svset4_f32 (z4, 3, z0), -+ z4 = svset4 (z4, 3, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_f64.c -new file mode 100644 -index 000000000..3ceaa459a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_f64.c -@@ -0,0 +1,87 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** set4_f64_z24_0: -+** mov z25\.d, z5\.d -+** mov z26\.d, z6\.d -+** mov z27\.d, z7\.d -+** mov z24\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_f64_z24_0, svfloat64x4_t, svfloat64_t, -+ z24 = svset4_f64 (z4, 0, z0), -+ z24 = svset4 (z4, 0, z0)) -+ -+/* -+** set4_f64_z24_1: -+** mov z24\.d, z4\.d -+** mov z26\.d, z6\.d -+** mov z27\.d, z7\.d -+** mov z25\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_f64_z24_1, svfloat64x4_t, svfloat64_t, -+ z24 = svset4_f64 (z4, 1, z0), -+ z24 = svset4 (z4, 1, z0)) -+ -+/* -+** set4_f64_z24_2: -+** mov z24\.d, z4\.d -+** mov z25\.d, z5\.d -+** mov z27\.d, z7\.d -+** mov z26\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_f64_z24_2, svfloat64x4_t, svfloat64_t, -+ z24 = svset4_f64 (z4, 2, z0), -+ z24 = svset4 (z4, 2, z0)) -+ -+/* -+** set4_f64_z24_3: -+** mov z24\.d, z4\.d -+** mov z25\.d, z5\.d -+** mov z26\.d, z6\.d -+** mov z27\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_f64_z24_3, svfloat64x4_t, svfloat64_t, -+ z24 = svset4_f64 (z4, 3, z0), -+ z24 = svset4 (z4, 3, z0)) -+ -+/* -+** set4_f64_z4_0: -+** mov z4\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_f64_z4_0, svfloat64x4_t, svfloat64_t, -+ z4 = svset4_f64 (z4, 0, z0), -+ z4 = svset4 (z4, 0, z0)) -+ -+/* -+** set4_f64_z4_1: -+** mov z5\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_f64_z4_1, svfloat64x4_t, svfloat64_t, -+ z4 = svset4_f64 (z4, 1, z0), -+ z4 = svset4 (z4, 1, z0)) -+ -+/* -+** set4_f64_z4_2: -+** mov z6\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_f64_z4_2, svfloat64x4_t, svfloat64_t, -+ z4 = svset4_f64 (z4, 2, z0), -+ z4 = svset4 (z4, 2, z0)) -+ -+/* -+** set4_f64_z4_3: -+** mov z7\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_f64_z4_3, svfloat64x4_t, svfloat64_t, -+ z4 = svset4_f64 (z4, 3, z0), -+ z4 = svset4 (z4, 3, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_s16.c -new file mode 100644 -index 000000000..3cef6ebe8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_s16.c -@@ -0,0 +1,87 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** set4_s16_z24_0: -+** mov z25\.d, z5\.d -+** mov z26\.d, z6\.d -+** mov z27\.d, z7\.d -+** mov z24\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_s16_z24_0, svint16x4_t, svint16_t, -+ z24 = svset4_s16 (z4, 0, z0), -+ z24 = svset4 (z4, 0, z0)) -+ -+/* -+** set4_s16_z24_1: -+** mov z24\.d, z4\.d -+** mov z26\.d, z6\.d -+** mov z27\.d, z7\.d -+** mov z25\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_s16_z24_1, svint16x4_t, svint16_t, -+ z24 = svset4_s16 (z4, 1, z0), -+ z24 = svset4 (z4, 1, z0)) -+ -+/* -+** set4_s16_z24_2: -+** mov z24\.d, z4\.d -+** mov z25\.d, z5\.d -+** mov z27\.d, z7\.d -+** mov z26\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_s16_z24_2, svint16x4_t, svint16_t, -+ z24 = svset4_s16 (z4, 2, z0), -+ z24 = svset4 (z4, 2, z0)) -+ -+/* -+** set4_s16_z24_3: -+** mov z24\.d, z4\.d -+** mov z25\.d, z5\.d -+** mov z26\.d, z6\.d -+** mov z27\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_s16_z24_3, svint16x4_t, svint16_t, -+ z24 = svset4_s16 (z4, 3, z0), -+ z24 = svset4 (z4, 3, z0)) -+ -+/* -+** set4_s16_z4_0: -+** mov z4\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_s16_z4_0, svint16x4_t, svint16_t, -+ z4 = svset4_s16 (z4, 0, z0), -+ z4 = svset4 (z4, 0, z0)) -+ -+/* -+** set4_s16_z4_1: -+** mov z5\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_s16_z4_1, svint16x4_t, svint16_t, -+ z4 = svset4_s16 (z4, 1, z0), -+ z4 = svset4 (z4, 1, z0)) -+ -+/* -+** set4_s16_z4_2: -+** mov z6\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_s16_z4_2, svint16x4_t, svint16_t, -+ z4 = svset4_s16 (z4, 2, z0), -+ z4 = svset4 (z4, 2, z0)) -+ -+/* -+** set4_s16_z4_3: -+** mov z7\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_s16_z4_3, svint16x4_t, svint16_t, -+ z4 = svset4_s16 (z4, 3, z0), -+ z4 = svset4 (z4, 3, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_s32.c -new file mode 100644 -index 000000000..49f646e8d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_s32.c -@@ -0,0 +1,87 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** set4_s32_z24_0: -+** mov z25\.d, z5\.d -+** mov z26\.d, z6\.d -+** mov z27\.d, z7\.d -+** mov z24\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_s32_z24_0, svint32x4_t, svint32_t, -+ z24 = svset4_s32 (z4, 0, z0), -+ z24 = svset4 (z4, 0, z0)) -+ -+/* -+** set4_s32_z24_1: -+** mov z24\.d, z4\.d -+** mov z26\.d, z6\.d -+** mov z27\.d, z7\.d -+** mov z25\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_s32_z24_1, svint32x4_t, svint32_t, -+ z24 = svset4_s32 (z4, 1, z0), -+ z24 = svset4 (z4, 1, z0)) -+ -+/* -+** set4_s32_z24_2: -+** mov z24\.d, z4\.d -+** mov z25\.d, z5\.d -+** mov z27\.d, z7\.d -+** mov z26\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_s32_z24_2, svint32x4_t, svint32_t, -+ z24 = svset4_s32 (z4, 2, z0), -+ z24 = svset4 (z4, 2, z0)) -+ -+/* -+** set4_s32_z24_3: -+** mov z24\.d, z4\.d -+** mov z25\.d, z5\.d -+** mov z26\.d, z6\.d -+** mov z27\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_s32_z24_3, svint32x4_t, svint32_t, -+ z24 = svset4_s32 (z4, 3, z0), -+ z24 = svset4 (z4, 3, z0)) -+ -+/* -+** set4_s32_z4_0: -+** mov z4\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_s32_z4_0, svint32x4_t, svint32_t, -+ z4 = svset4_s32 (z4, 0, z0), -+ z4 = svset4 (z4, 0, z0)) -+ -+/* -+** set4_s32_z4_1: -+** mov z5\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_s32_z4_1, svint32x4_t, svint32_t, -+ z4 = svset4_s32 (z4, 1, z0), -+ z4 = svset4 (z4, 1, z0)) -+ -+/* -+** set4_s32_z4_2: -+** mov z6\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_s32_z4_2, svint32x4_t, svint32_t, -+ z4 = svset4_s32 (z4, 2, z0), -+ z4 = svset4 (z4, 2, z0)) -+ -+/* -+** set4_s32_z4_3: -+** mov z7\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_s32_z4_3, svint32x4_t, svint32_t, -+ z4 = svset4_s32 (z4, 3, z0), -+ z4 = svset4 (z4, 3, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_s64.c -new file mode 100644 -index 000000000..7544e25a2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_s64.c -@@ -0,0 +1,87 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** set4_s64_z24_0: -+** mov z25\.d, z5\.d -+** mov z26\.d, z6\.d -+** mov z27\.d, z7\.d -+** mov z24\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_s64_z24_0, svint64x4_t, svint64_t, -+ z24 = svset4_s64 (z4, 0, z0), -+ z24 = svset4 (z4, 0, z0)) -+ -+/* -+** set4_s64_z24_1: -+** mov z24\.d, z4\.d -+** mov z26\.d, z6\.d -+** mov z27\.d, z7\.d -+** mov z25\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_s64_z24_1, svint64x4_t, svint64_t, -+ z24 = svset4_s64 (z4, 1, z0), -+ z24 = svset4 (z4, 1, z0)) -+ -+/* -+** set4_s64_z24_2: -+** mov z24\.d, z4\.d -+** mov z25\.d, z5\.d -+** mov z27\.d, z7\.d -+** mov z26\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_s64_z24_2, svint64x4_t, svint64_t, -+ z24 = svset4_s64 (z4, 2, z0), -+ z24 = svset4 (z4, 2, z0)) -+ -+/* -+** set4_s64_z24_3: -+** mov z24\.d, z4\.d -+** mov z25\.d, z5\.d -+** mov z26\.d, z6\.d -+** mov z27\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_s64_z24_3, svint64x4_t, svint64_t, -+ z24 = svset4_s64 (z4, 3, z0), -+ z24 = svset4 (z4, 3, z0)) -+ -+/* -+** set4_s64_z4_0: -+** mov z4\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_s64_z4_0, svint64x4_t, svint64_t, -+ z4 = svset4_s64 (z4, 0, z0), -+ z4 = svset4 (z4, 0, z0)) -+ -+/* -+** set4_s64_z4_1: -+** mov z5\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_s64_z4_1, svint64x4_t, svint64_t, -+ z4 = svset4_s64 (z4, 1, z0), -+ z4 = svset4 (z4, 1, z0)) -+ -+/* -+** set4_s64_z4_2: -+** mov z6\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_s64_z4_2, svint64x4_t, svint64_t, -+ z4 = svset4_s64 (z4, 2, z0), -+ z4 = svset4 (z4, 2, z0)) -+ -+/* -+** set4_s64_z4_3: -+** mov z7\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_s64_z4_3, svint64x4_t, svint64_t, -+ z4 = svset4_s64 (z4, 3, z0), -+ z4 = svset4 (z4, 3, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_s8.c -new file mode 100644 -index 000000000..2ec9ff059 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_s8.c -@@ -0,0 +1,87 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** set4_s8_z24_0: -+** mov z25\.d, z5\.d -+** mov z26\.d, z6\.d -+** mov z27\.d, z7\.d -+** mov z24\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_s8_z24_0, svint8x4_t, svint8_t, -+ z24 = svset4_s8 (z4, 0, z0), -+ z24 = svset4 (z4, 0, z0)) -+ -+/* -+** set4_s8_z24_1: -+** mov z24\.d, z4\.d -+** mov z26\.d, z6\.d -+** mov z27\.d, z7\.d -+** mov z25\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_s8_z24_1, svint8x4_t, svint8_t, -+ z24 = svset4_s8 (z4, 1, z0), -+ z24 = svset4 (z4, 1, z0)) -+ -+/* -+** set4_s8_z24_2: -+** mov z24\.d, z4\.d -+** mov z25\.d, z5\.d -+** mov z27\.d, z7\.d -+** mov z26\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_s8_z24_2, svint8x4_t, svint8_t, -+ z24 = svset4_s8 (z4, 2, z0), -+ z24 = svset4 (z4, 2, z0)) -+ -+/* -+** set4_s8_z24_3: -+** mov z24\.d, z4\.d -+** mov z25\.d, z5\.d -+** mov z26\.d, z6\.d -+** mov z27\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_s8_z24_3, svint8x4_t, svint8_t, -+ z24 = svset4_s8 (z4, 3, z0), -+ z24 = svset4 (z4, 3, z0)) -+ -+/* -+** set4_s8_z4_0: -+** mov z4\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_s8_z4_0, svint8x4_t, svint8_t, -+ z4 = svset4_s8 (z4, 0, z0), -+ z4 = svset4 (z4, 0, z0)) -+ -+/* -+** set4_s8_z4_1: -+** mov z5\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_s8_z4_1, svint8x4_t, svint8_t, -+ z4 = svset4_s8 (z4, 1, z0), -+ z4 = svset4 (z4, 1, z0)) -+ -+/* -+** set4_s8_z4_2: -+** mov z6\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_s8_z4_2, svint8x4_t, svint8_t, -+ z4 = svset4_s8 (z4, 2, z0), -+ z4 = svset4 (z4, 2, z0)) -+ -+/* -+** set4_s8_z4_3: -+** mov z7\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_s8_z4_3, svint8x4_t, svint8_t, -+ z4 = svset4_s8 (z4, 3, z0), -+ z4 = svset4 (z4, 3, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_u16.c -new file mode 100644 -index 000000000..c9499b044 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_u16.c -@@ -0,0 +1,87 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** set4_u16_z24_0: -+** mov z25\.d, z5\.d -+** mov z26\.d, z6\.d -+** mov z27\.d, z7\.d -+** mov z24\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_u16_z24_0, svuint16x4_t, svuint16_t, -+ z24 = svset4_u16 (z4, 0, z0), -+ z24 = svset4 (z4, 0, z0)) -+ -+/* -+** set4_u16_z24_1: -+** mov z24\.d, z4\.d -+** mov z26\.d, z6\.d -+** mov z27\.d, z7\.d -+** mov z25\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_u16_z24_1, svuint16x4_t, svuint16_t, -+ z24 = svset4_u16 (z4, 1, z0), -+ z24 = svset4 (z4, 1, z0)) -+ -+/* -+** set4_u16_z24_2: -+** mov z24\.d, z4\.d -+** mov z25\.d, z5\.d -+** mov z27\.d, z7\.d -+** mov z26\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_u16_z24_2, svuint16x4_t, svuint16_t, -+ z24 = svset4_u16 (z4, 2, z0), -+ z24 = svset4 (z4, 2, z0)) -+ -+/* -+** set4_u16_z24_3: -+** mov z24\.d, z4\.d -+** mov z25\.d, z5\.d -+** mov z26\.d, z6\.d -+** mov z27\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_u16_z24_3, svuint16x4_t, svuint16_t, -+ z24 = svset4_u16 (z4, 3, z0), -+ z24 = svset4 (z4, 3, z0)) -+ -+/* -+** set4_u16_z4_0: -+** mov z4\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_u16_z4_0, svuint16x4_t, svuint16_t, -+ z4 = svset4_u16 (z4, 0, z0), -+ z4 = svset4 (z4, 0, z0)) -+ -+/* -+** set4_u16_z4_1: -+** mov z5\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_u16_z4_1, svuint16x4_t, svuint16_t, -+ z4 = svset4_u16 (z4, 1, z0), -+ z4 = svset4 (z4, 1, z0)) -+ -+/* -+** set4_u16_z4_2: -+** mov z6\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_u16_z4_2, svuint16x4_t, svuint16_t, -+ z4 = svset4_u16 (z4, 2, z0), -+ z4 = svset4 (z4, 2, z0)) -+ -+/* -+** set4_u16_z4_3: -+** mov z7\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_u16_z4_3, svuint16x4_t, svuint16_t, -+ z4 = svset4_u16 (z4, 3, z0), -+ z4 = svset4 (z4, 3, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_u32.c -new file mode 100644 -index 000000000..00b3dc513 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_u32.c -@@ -0,0 +1,87 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** set4_u32_z24_0: -+** mov z25\.d, z5\.d -+** mov z26\.d, z6\.d -+** mov z27\.d, z7\.d -+** mov z24\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_u32_z24_0, svuint32x4_t, svuint32_t, -+ z24 = svset4_u32 (z4, 0, z0), -+ z24 = svset4 (z4, 0, z0)) -+ -+/* -+** set4_u32_z24_1: -+** mov z24\.d, z4\.d -+** mov z26\.d, z6\.d -+** mov z27\.d, z7\.d -+** mov z25\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_u32_z24_1, svuint32x4_t, svuint32_t, -+ z24 = svset4_u32 (z4, 1, z0), -+ z24 = svset4 (z4, 1, z0)) -+ -+/* -+** set4_u32_z24_2: -+** mov z24\.d, z4\.d -+** mov z25\.d, z5\.d -+** mov z27\.d, z7\.d -+** mov z26\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_u32_z24_2, svuint32x4_t, svuint32_t, -+ z24 = svset4_u32 (z4, 2, z0), -+ z24 = svset4 (z4, 2, z0)) -+ -+/* -+** set4_u32_z24_3: -+** mov z24\.d, z4\.d -+** mov z25\.d, z5\.d -+** mov z26\.d, z6\.d -+** mov z27\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_u32_z24_3, svuint32x4_t, svuint32_t, -+ z24 = svset4_u32 (z4, 3, z0), -+ z24 = svset4 (z4, 3, z0)) -+ -+/* -+** set4_u32_z4_0: -+** mov z4\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_u32_z4_0, svuint32x4_t, svuint32_t, -+ z4 = svset4_u32 (z4, 0, z0), -+ z4 = svset4 (z4, 0, z0)) -+ -+/* -+** set4_u32_z4_1: -+** mov z5\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_u32_z4_1, svuint32x4_t, svuint32_t, -+ z4 = svset4_u32 (z4, 1, z0), -+ z4 = svset4 (z4, 1, z0)) -+ -+/* -+** set4_u32_z4_2: -+** mov z6\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_u32_z4_2, svuint32x4_t, svuint32_t, -+ z4 = svset4_u32 (z4, 2, z0), -+ z4 = svset4 (z4, 2, z0)) -+ -+/* -+** set4_u32_z4_3: -+** mov z7\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_u32_z4_3, svuint32x4_t, svuint32_t, -+ z4 = svset4_u32 (z4, 3, z0), -+ z4 = svset4 (z4, 3, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_u64.c -new file mode 100644 -index 000000000..d2f048b82 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_u64.c -@@ -0,0 +1,87 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** set4_u64_z24_0: -+** mov z25\.d, z5\.d -+** mov z26\.d, z6\.d -+** mov z27\.d, z7\.d -+** mov z24\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_u64_z24_0, svuint64x4_t, svuint64_t, -+ z24 = svset4_u64 (z4, 0, z0), -+ z24 = svset4 (z4, 0, z0)) -+ -+/* -+** set4_u64_z24_1: -+** mov z24\.d, z4\.d -+** mov z26\.d, z6\.d -+** mov z27\.d, z7\.d -+** mov z25\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_u64_z24_1, svuint64x4_t, svuint64_t, -+ z24 = svset4_u64 (z4, 1, z0), -+ z24 = svset4 (z4, 1, z0)) -+ -+/* -+** set4_u64_z24_2: -+** mov z24\.d, z4\.d -+** mov z25\.d, z5\.d -+** mov z27\.d, z7\.d -+** mov z26\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_u64_z24_2, svuint64x4_t, svuint64_t, -+ z24 = svset4_u64 (z4, 2, z0), -+ z24 = svset4 (z4, 2, z0)) -+ -+/* -+** set4_u64_z24_3: -+** mov z24\.d, z4\.d -+** mov z25\.d, z5\.d -+** mov z26\.d, z6\.d -+** mov z27\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_u64_z24_3, svuint64x4_t, svuint64_t, -+ z24 = svset4_u64 (z4, 3, z0), -+ z24 = svset4 (z4, 3, z0)) -+ -+/* -+** set4_u64_z4_0: -+** mov z4\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_u64_z4_0, svuint64x4_t, svuint64_t, -+ z4 = svset4_u64 (z4, 0, z0), -+ z4 = svset4 (z4, 0, z0)) -+ -+/* -+** set4_u64_z4_1: -+** mov z5\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_u64_z4_1, svuint64x4_t, svuint64_t, -+ z4 = svset4_u64 (z4, 1, z0), -+ z4 = svset4 (z4, 1, z0)) -+ -+/* -+** set4_u64_z4_2: -+** mov z6\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_u64_z4_2, svuint64x4_t, svuint64_t, -+ z4 = svset4_u64 (z4, 2, z0), -+ z4 = svset4 (z4, 2, z0)) -+ -+/* -+** set4_u64_z4_3: -+** mov z7\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_u64_z4_3, svuint64x4_t, svuint64_t, -+ z4 = svset4_u64 (z4, 3, z0), -+ z4 = svset4 (z4, 3, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_u8.c -new file mode 100644 -index 000000000..b4f27c6f1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_u8.c -@@ -0,0 +1,87 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** set4_u8_z24_0: -+** mov z25\.d, z5\.d -+** mov z26\.d, z6\.d -+** mov z27\.d, z7\.d -+** mov z24\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_u8_z24_0, svuint8x4_t, svuint8_t, -+ z24 = svset4_u8 (z4, 0, z0), -+ z24 = svset4 (z4, 0, z0)) -+ -+/* -+** set4_u8_z24_1: -+** mov z24\.d, z4\.d -+** mov z26\.d, z6\.d -+** mov z27\.d, z7\.d -+** mov z25\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_u8_z24_1, svuint8x4_t, svuint8_t, -+ z24 = svset4_u8 (z4, 1, z0), -+ z24 = svset4 (z4, 1, z0)) -+ -+/* -+** set4_u8_z24_2: -+** mov z24\.d, z4\.d -+** mov z25\.d, z5\.d -+** mov z27\.d, z7\.d -+** mov z26\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_u8_z24_2, svuint8x4_t, svuint8_t, -+ z24 = svset4_u8 (z4, 2, z0), -+ z24 = svset4 (z4, 2, z0)) -+ -+/* -+** set4_u8_z24_3: -+** mov z24\.d, z4\.d -+** mov z25\.d, z5\.d -+** mov z26\.d, z6\.d -+** mov z27\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_u8_z24_3, svuint8x4_t, svuint8_t, -+ z24 = svset4_u8 (z4, 3, z0), -+ z24 = svset4 (z4, 3, z0)) -+ -+/* -+** set4_u8_z4_0: -+** mov z4\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_u8_z4_0, svuint8x4_t, svuint8_t, -+ z4 = svset4_u8 (z4, 0, z0), -+ z4 = svset4 (z4, 0, z0)) -+ -+/* -+** set4_u8_z4_1: -+** mov z5\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_u8_z4_1, svuint8x4_t, svuint8_t, -+ z4 = svset4_u8 (z4, 1, z0), -+ z4 = svset4 (z4, 1, z0)) -+ -+/* -+** set4_u8_z4_2: -+** mov z6\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_u8_z4_2, svuint8x4_t, svuint8_t, -+ z4 = svset4_u8 (z4, 2, z0), -+ z4 = svset4 (z4, 2, z0)) -+ -+/* -+** set4_u8_z4_3: -+** mov z7\.d, z0\.d -+** ret -+*/ -+TEST_SET (set4_u8_z4_3, svuint8x4_t, svuint8_t, -+ z4 = svset4_u8 (z4, 3, z0), -+ z4 = svset4 (z4, 3, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_bf16.c -new file mode 100644 -index 000000000..3d2dbf20d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_bf16.c -@@ -0,0 +1,33 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** splice_bf16_tied1: -+** splice z0\.h, p0, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (splice_bf16_tied1, svbfloat16_t, -+ z0 = svsplice_bf16 (p0, z0, z1), -+ z0 = svsplice (p0, z0, z1)) -+ -+/* -+** splice_bf16_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** splice z0\.h, p0, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (splice_bf16_tied2, svbfloat16_t, -+ z0 = svsplice_bf16 (p0, z1, z0), -+ z0 = svsplice (p0, z1, z0)) -+ -+/* -+** splice_bf16_untied: -+** movprfx z0, z1 -+** splice z0\.h, p0, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (splice_bf16_untied, svbfloat16_t, -+ z0 = svsplice_bf16 (p0, z1, z2), -+ z0 = svsplice (p0, z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_f16.c -new file mode 100644 -index 000000000..b796eaf3d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_f16.c -@@ -0,0 +1,33 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** splice_f16_tied1: -+** splice z0\.h, p0, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (splice_f16_tied1, svfloat16_t, -+ z0 = svsplice_f16 (p0, z0, z1), -+ z0 = svsplice (p0, z0, z1)) -+ -+/* -+** splice_f16_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** splice z0\.h, p0, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (splice_f16_tied2, svfloat16_t, -+ z0 = svsplice_f16 (p0, z1, z0), -+ z0 = svsplice (p0, z1, z0)) -+ -+/* -+** splice_f16_untied: -+** movprfx z0, z1 -+** splice z0\.h, p0, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (splice_f16_untied, svfloat16_t, -+ z0 = svsplice_f16 (p0, z1, z2), -+ z0 = svsplice (p0, z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_f32.c -new file mode 100644 -index 000000000..1fc552bc3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_f32.c -@@ -0,0 +1,33 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** splice_f32_tied1: -+** splice z0\.s, p0, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (splice_f32_tied1, svfloat32_t, -+ z0 = svsplice_f32 (p0, z0, z1), -+ z0 = svsplice (p0, z0, z1)) -+ -+/* -+** splice_f32_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** splice z0\.s, p0, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (splice_f32_tied2, svfloat32_t, -+ z0 = svsplice_f32 (p0, z1, z0), -+ z0 = svsplice (p0, z1, z0)) -+ -+/* -+** splice_f32_untied: -+** movprfx z0, z1 -+** splice z0\.s, p0, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (splice_f32_untied, svfloat32_t, -+ z0 = svsplice_f32 (p0, z1, z2), -+ z0 = svsplice (p0, z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_f64.c -new file mode 100644 -index 000000000..26b523520 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_f64.c -@@ -0,0 +1,33 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** splice_f64_tied1: -+** splice z0\.d, p0, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (splice_f64_tied1, svfloat64_t, -+ z0 = svsplice_f64 (p0, z0, z1), -+ z0 = svsplice (p0, z0, z1)) -+ -+/* -+** splice_f64_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** splice z0\.d, p0, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (splice_f64_tied2, svfloat64_t, -+ z0 = svsplice_f64 (p0, z1, z0), -+ z0 = svsplice (p0, z1, z0)) -+ -+/* -+** splice_f64_untied: -+** movprfx z0, z1 -+** splice z0\.d, p0, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (splice_f64_untied, svfloat64_t, -+ z0 = svsplice_f64 (p0, z1, z2), -+ z0 = svsplice (p0, z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_s16.c -new file mode 100644 -index 000000000..8796c6ecd ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_s16.c -@@ -0,0 +1,33 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** splice_s16_tied1: -+** splice z0\.h, p0, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (splice_s16_tied1, svint16_t, -+ z0 = svsplice_s16 (p0, z0, z1), -+ z0 = svsplice (p0, z0, z1)) -+ -+/* -+** splice_s16_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** splice z0\.h, p0, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (splice_s16_tied2, svint16_t, -+ z0 = svsplice_s16 (p0, z1, z0), -+ z0 = svsplice (p0, z1, z0)) -+ -+/* -+** splice_s16_untied: -+** movprfx z0, z1 -+** splice z0\.h, p0, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (splice_s16_untied, svint16_t, -+ z0 = svsplice_s16 (p0, z1, z2), -+ z0 = svsplice (p0, z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_s32.c -new file mode 100644 -index 000000000..5f2798e06 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_s32.c -@@ -0,0 +1,33 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** splice_s32_tied1: -+** splice z0\.s, p0, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (splice_s32_tied1, svint32_t, -+ z0 = svsplice_s32 (p0, z0, z1), -+ z0 = svsplice (p0, z0, z1)) -+ -+/* -+** splice_s32_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** splice z0\.s, p0, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (splice_s32_tied2, svint32_t, -+ z0 = svsplice_s32 (p0, z1, z0), -+ z0 = svsplice (p0, z1, z0)) -+ -+/* -+** splice_s32_untied: -+** movprfx z0, z1 -+** splice z0\.s, p0, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (splice_s32_untied, svint32_t, -+ z0 = svsplice_s32 (p0, z1, z2), -+ z0 = svsplice (p0, z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_s64.c -new file mode 100644 -index 000000000..024bfa479 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_s64.c -@@ -0,0 +1,33 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** splice_s64_tied1: -+** splice z0\.d, p0, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (splice_s64_tied1, svint64_t, -+ z0 = svsplice_s64 (p0, z0, z1), -+ z0 = svsplice (p0, z0, z1)) -+ -+/* -+** splice_s64_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** splice z0\.d, p0, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (splice_s64_tied2, svint64_t, -+ z0 = svsplice_s64 (p0, z1, z0), -+ z0 = svsplice (p0, z1, z0)) -+ -+/* -+** splice_s64_untied: -+** movprfx z0, z1 -+** splice z0\.d, p0, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (splice_s64_untied, svint64_t, -+ z0 = svsplice_s64 (p0, z1, z2), -+ z0 = svsplice (p0, z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_s8.c -new file mode 100644 -index 000000000..cd91ee245 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_s8.c -@@ -0,0 +1,33 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** splice_s8_tied1: -+** splice z0\.b, p0, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (splice_s8_tied1, svint8_t, -+ z0 = svsplice_s8 (p0, z0, z1), -+ z0 = svsplice (p0, z0, z1)) -+ -+/* -+** splice_s8_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** splice z0\.b, p0, z0\.b, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (splice_s8_tied2, svint8_t, -+ z0 = svsplice_s8 (p0, z1, z0), -+ z0 = svsplice (p0, z1, z0)) -+ -+/* -+** splice_s8_untied: -+** movprfx z0, z1 -+** splice z0\.b, p0, z0\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (splice_s8_untied, svint8_t, -+ z0 = svsplice_s8 (p0, z1, z2), -+ z0 = svsplice (p0, z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_u16.c -new file mode 100644 -index 000000000..821ebaee6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_u16.c -@@ -0,0 +1,33 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** splice_u16_tied1: -+** splice z0\.h, p0, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (splice_u16_tied1, svuint16_t, -+ z0 = svsplice_u16 (p0, z0, z1), -+ z0 = svsplice (p0, z0, z1)) -+ -+/* -+** splice_u16_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** splice z0\.h, p0, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (splice_u16_tied2, svuint16_t, -+ z0 = svsplice_u16 (p0, z1, z0), -+ z0 = svsplice (p0, z1, z0)) -+ -+/* -+** splice_u16_untied: -+** movprfx z0, z1 -+** splice z0\.h, p0, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (splice_u16_untied, svuint16_t, -+ z0 = svsplice_u16 (p0, z1, z2), -+ z0 = svsplice (p0, z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_u32.c -new file mode 100644 -index 000000000..200364f20 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_u32.c -@@ -0,0 +1,33 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** splice_u32_tied1: -+** splice z0\.s, p0, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (splice_u32_tied1, svuint32_t, -+ z0 = svsplice_u32 (p0, z0, z1), -+ z0 = svsplice (p0, z0, z1)) -+ -+/* -+** splice_u32_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** splice z0\.s, p0, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (splice_u32_tied2, svuint32_t, -+ z0 = svsplice_u32 (p0, z1, z0), -+ z0 = svsplice (p0, z1, z0)) -+ -+/* -+** splice_u32_untied: -+** movprfx z0, z1 -+** splice z0\.s, p0, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (splice_u32_untied, svuint32_t, -+ z0 = svsplice_u32 (p0, z1, z2), -+ z0 = svsplice (p0, z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_u64.c -new file mode 100644 -index 000000000..352bcdeed ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_u64.c -@@ -0,0 +1,33 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** splice_u64_tied1: -+** splice z0\.d, p0, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (splice_u64_tied1, svuint64_t, -+ z0 = svsplice_u64 (p0, z0, z1), -+ z0 = svsplice (p0, z0, z1)) -+ -+/* -+** splice_u64_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** splice z0\.d, p0, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (splice_u64_tied2, svuint64_t, -+ z0 = svsplice_u64 (p0, z1, z0), -+ z0 = svsplice (p0, z1, z0)) -+ -+/* -+** splice_u64_untied: -+** movprfx z0, z1 -+** splice z0\.d, p0, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (splice_u64_untied, svuint64_t, -+ z0 = svsplice_u64 (p0, z1, z2), -+ z0 = svsplice (p0, z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_u8.c -new file mode 100644 -index 000000000..6c24fe64d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/splice_u8.c -@@ -0,0 +1,33 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** splice_u8_tied1: -+** splice z0\.b, p0, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (splice_u8_tied1, svuint8_t, -+ z0 = svsplice_u8 (p0, z0, z1), -+ z0 = svsplice (p0, z0, z1)) -+ -+/* -+** splice_u8_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** splice z0\.b, p0, z0\.b, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (splice_u8_tied2, svuint8_t, -+ z0 = svsplice_u8 (p0, z1, z0), -+ z0 = svsplice (p0, z1, z0)) -+ -+/* -+** splice_u8_untied: -+** movprfx z0, z1 -+** splice z0\.b, p0, z0\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (splice_u8_untied, svuint8_t, -+ z0 = svsplice_u8 (p0, z1, z2), -+ z0 = svsplice (p0, z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sqrt_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sqrt_f16.c -new file mode 100644 -index 000000000..6dc5940fb ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sqrt_f16.c -@@ -0,0 +1,103 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** sqrt_f16_m_tied12: -+** fsqrt z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sqrt_f16_m_tied12, svfloat16_t, -+ z0 = svsqrt_f16_m (z0, p0, z0), -+ z0 = svsqrt_m (z0, p0, z0)) -+ -+/* -+** sqrt_f16_m_tied1: -+** fsqrt z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sqrt_f16_m_tied1, svfloat16_t, -+ z0 = svsqrt_f16_m (z0, p0, z1), -+ z0 = svsqrt_m (z0, p0, z1)) -+ -+/* -+** sqrt_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fsqrt z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sqrt_f16_m_tied2, svfloat16_t, -+ z0 = svsqrt_f16_m (z1, p0, z0), -+ z0 = svsqrt_m (z1, p0, z0)) -+ -+/* -+** sqrt_f16_m_untied: -+** movprfx z0, z2 -+** fsqrt z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sqrt_f16_m_untied, svfloat16_t, -+ z0 = svsqrt_f16_m (z2, p0, z1), -+ z0 = svsqrt_m (z2, p0, z1)) -+ -+/* -+** sqrt_f16_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.h, p0/z, \1\.h -+** fsqrt z0\.h, p0/m, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sqrt_f16_z_tied1, svfloat16_t, -+ z0 = svsqrt_f16_z (p0, z0), -+ z0 = svsqrt_z (p0, z0)) -+ -+/* -+** sqrt_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** fsqrt z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sqrt_f16_z_untied, svfloat16_t, -+ z0 = svsqrt_f16_z (p0, z1), -+ z0 = svsqrt_z (p0, z1)) -+ -+/* -+** sqrt_f16_x_tied1: -+** fsqrt z0\.h, p0/m, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sqrt_f16_x_tied1, svfloat16_t, -+ z0 = svsqrt_f16_x (p0, z0), -+ z0 = svsqrt_x (p0, z0)) -+ -+/* -+** sqrt_f16_x_untied: -+** fsqrt z0\.h, p0/m, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sqrt_f16_x_untied, svfloat16_t, -+ z0 = svsqrt_f16_x (p0, z1), -+ z0 = svsqrt_x (p0, z1)) -+ -+/* -+** ptrue_sqrt_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sqrt_f16_x_tied1, svfloat16_t, -+ z0 = svsqrt_f16_x (svptrue_b16 (), z0), -+ z0 = svsqrt_x (svptrue_b16 (), z0)) -+ -+/* -+** ptrue_sqrt_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sqrt_f16_x_untied, svfloat16_t, -+ z0 = svsqrt_f16_x (svptrue_b16 (), z1), -+ z0 = svsqrt_x (svptrue_b16 (), z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sqrt_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sqrt_f32.c -new file mode 100644 -index 000000000..71d1f8f74 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sqrt_f32.c -@@ -0,0 +1,103 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** sqrt_f32_m_tied12: -+** fsqrt z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sqrt_f32_m_tied12, svfloat32_t, -+ z0 = svsqrt_f32_m (z0, p0, z0), -+ z0 = svsqrt_m (z0, p0, z0)) -+ -+/* -+** sqrt_f32_m_tied1: -+** fsqrt z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sqrt_f32_m_tied1, svfloat32_t, -+ z0 = svsqrt_f32_m (z0, p0, z1), -+ z0 = svsqrt_m (z0, p0, z1)) -+ -+/* -+** sqrt_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fsqrt z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sqrt_f32_m_tied2, svfloat32_t, -+ z0 = svsqrt_f32_m (z1, p0, z0), -+ z0 = svsqrt_m (z1, p0, z0)) -+ -+/* -+** sqrt_f32_m_untied: -+** movprfx z0, z2 -+** fsqrt z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sqrt_f32_m_untied, svfloat32_t, -+ z0 = svsqrt_f32_m (z2, p0, z1), -+ z0 = svsqrt_m (z2, p0, z1)) -+ -+/* -+** sqrt_f32_z_tied1: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0\.s, p0/z, \1\.s -+** fsqrt z0\.s, p0/m, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sqrt_f32_z_tied1, svfloat32_t, -+ z0 = svsqrt_f32_z (p0, z0), -+ z0 = svsqrt_z (p0, z0)) -+ -+/* -+** sqrt_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** fsqrt z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sqrt_f32_z_untied, svfloat32_t, -+ z0 = svsqrt_f32_z (p0, z1), -+ z0 = svsqrt_z (p0, z1)) -+ -+/* -+** sqrt_f32_x_tied1: -+** fsqrt z0\.s, p0/m, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sqrt_f32_x_tied1, svfloat32_t, -+ z0 = svsqrt_f32_x (p0, z0), -+ z0 = svsqrt_x (p0, z0)) -+ -+/* -+** sqrt_f32_x_untied: -+** fsqrt z0\.s, p0/m, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sqrt_f32_x_untied, svfloat32_t, -+ z0 = svsqrt_f32_x (p0, z1), -+ z0 = svsqrt_x (p0, z1)) -+ -+/* -+** ptrue_sqrt_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sqrt_f32_x_tied1, svfloat32_t, -+ z0 = svsqrt_f32_x (svptrue_b32 (), z0), -+ z0 = svsqrt_x (svptrue_b32 (), z0)) -+ -+/* -+** ptrue_sqrt_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sqrt_f32_x_untied, svfloat32_t, -+ z0 = svsqrt_f32_x (svptrue_b32 (), z1), -+ z0 = svsqrt_x (svptrue_b32 (), z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sqrt_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sqrt_f64.c -new file mode 100644 -index 000000000..7771df545 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sqrt_f64.c -@@ -0,0 +1,103 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** sqrt_f64_m_tied12: -+** fsqrt z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sqrt_f64_m_tied12, svfloat64_t, -+ z0 = svsqrt_f64_m (z0, p0, z0), -+ z0 = svsqrt_m (z0, p0, z0)) -+ -+/* -+** sqrt_f64_m_tied1: -+** fsqrt z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sqrt_f64_m_tied1, svfloat64_t, -+ z0 = svsqrt_f64_m (z0, p0, z1), -+ z0 = svsqrt_m (z0, p0, z1)) -+ -+/* -+** sqrt_f64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fsqrt z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (sqrt_f64_m_tied2, svfloat64_t, -+ z0 = svsqrt_f64_m (z1, p0, z0), -+ z0 = svsqrt_m (z1, p0, z0)) -+ -+/* -+** sqrt_f64_m_untied: -+** movprfx z0, z2 -+** fsqrt z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sqrt_f64_m_untied, svfloat64_t, -+ z0 = svsqrt_f64_m (z2, p0, z1), -+ z0 = svsqrt_m (z2, p0, z1)) -+ -+/* -+** sqrt_f64_z_tied1: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0\.d, p0/z, \1 -+** fsqrt z0\.d, p0/m, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (sqrt_f64_z_tied1, svfloat64_t, -+ z0 = svsqrt_f64_z (p0, z0), -+ z0 = svsqrt_z (p0, z0)) -+ -+/* -+** sqrt_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** fsqrt z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sqrt_f64_z_untied, svfloat64_t, -+ z0 = svsqrt_f64_z (p0, z1), -+ z0 = svsqrt_z (p0, z1)) -+ -+/* -+** sqrt_f64_x_tied1: -+** fsqrt z0\.d, p0/m, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sqrt_f64_x_tied1, svfloat64_t, -+ z0 = svsqrt_f64_x (p0, z0), -+ z0 = svsqrt_x (p0, z0)) -+ -+/* -+** sqrt_f64_x_untied: -+** fsqrt z0\.d, p0/m, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sqrt_f64_x_untied, svfloat64_t, -+ z0 = svsqrt_f64_x (p0, z1), -+ z0 = svsqrt_x (p0, z1)) -+ -+/* -+** ptrue_sqrt_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sqrt_f64_x_tied1, svfloat64_t, -+ z0 = svsqrt_f64_x (svptrue_b64 (), z0), -+ z0 = svsqrt_x (svptrue_b64 (), z0)) -+ -+/* -+** ptrue_sqrt_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sqrt_f64_x_untied, svfloat64_t, -+ z0 = svsqrt_f64_x (svptrue_b64 (), z1), -+ z0 = svsqrt_x (svptrue_b64 (), z1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_bf16.c -new file mode 100644 -index 000000000..ec3dbe318 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_bf16.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st1_bf16_base: -+** st1h z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_bf16_base, svbfloat16_t, bfloat16_t, -+ svst1_bf16 (p0, x0, z0), -+ svst1 (p0, x0, z0)) -+ -+/* -+** st1_bf16_index: -+** st1h z0\.h, p0, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_STORE (st1_bf16_index, svbfloat16_t, bfloat16_t, -+ svst1_bf16 (p0, x0 + x1, z0), -+ svst1 (p0, x0 + x1, z0)) -+ -+/* -+** st1_bf16_1: -+** st1h z0\.h, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_bf16_1, svbfloat16_t, bfloat16_t, -+ svst1_bf16 (p0, x0 + svcnth (), z0), -+ svst1 (p0, x0 + svcnth (), z0)) -+ -+/* -+** st1_bf16_7: -+** st1h z0\.h, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_bf16_7, svbfloat16_t, bfloat16_t, -+ svst1_bf16 (p0, x0 + svcnth () * 7, z0), -+ svst1 (p0, x0 + svcnth () * 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1_bf16_8: -+** incb x0, all, mul #8 -+** st1h z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_bf16_8, svbfloat16_t, bfloat16_t, -+ svst1_bf16 (p0, x0 + svcnth () * 8, z0), -+ svst1 (p0, x0 + svcnth () * 8, z0)) -+ -+/* -+** st1_bf16_m1: -+** st1h z0\.h, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_bf16_m1, svbfloat16_t, bfloat16_t, -+ svst1_bf16 (p0, x0 - svcnth (), z0), -+ svst1 (p0, x0 - svcnth (), z0)) -+ -+/* -+** st1_bf16_m8: -+** st1h z0\.h, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_bf16_m8, svbfloat16_t, bfloat16_t, -+ svst1_bf16 (p0, x0 - svcnth () * 8, z0), -+ svst1 (p0, x0 - svcnth () * 8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1_bf16_m9: -+** decb x0, all, mul #9 -+** st1h z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_bf16_m9, svbfloat16_t, bfloat16_t, -+ svst1_bf16 (p0, x0 - svcnth () * 9, z0), -+ svst1 (p0, x0 - svcnth () * 9, z0)) -+ -+/* -+** st1_vnum_bf16_0: -+** st1h z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_vnum_bf16_0, svbfloat16_t, bfloat16_t, -+ svst1_vnum_bf16 (p0, x0, 0, z0), -+ svst1_vnum (p0, x0, 0, z0)) -+ -+/* -+** st1_vnum_bf16_1: -+** st1h z0\.h, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_vnum_bf16_1, svbfloat16_t, bfloat16_t, -+ svst1_vnum_bf16 (p0, x0, 1, z0), -+ svst1_vnum (p0, x0, 1, z0)) -+ -+/* -+** st1_vnum_bf16_7: -+** st1h z0\.h, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_vnum_bf16_7, svbfloat16_t, bfloat16_t, -+ svst1_vnum_bf16 (p0, x0, 7, z0), -+ svst1_vnum (p0, x0, 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1_vnum_bf16_8: -+** incb x0, all, mul #8 -+** st1h z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_vnum_bf16_8, svbfloat16_t, bfloat16_t, -+ svst1_vnum_bf16 (p0, x0, 8, z0), -+ svst1_vnum (p0, x0, 8, z0)) -+ -+/* -+** st1_vnum_bf16_m1: -+** st1h z0\.h, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_vnum_bf16_m1, svbfloat16_t, bfloat16_t, -+ svst1_vnum_bf16 (p0, x0, -1, z0), -+ svst1_vnum (p0, x0, -1, z0)) -+ -+/* -+** st1_vnum_bf16_m8: -+** st1h z0\.h, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_vnum_bf16_m8, svbfloat16_t, bfloat16_t, -+ svst1_vnum_bf16 (p0, x0, -8, z0), -+ svst1_vnum (p0, x0, -8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1_vnum_bf16_m9: -+** decb x0, all, mul #9 -+** st1h z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_vnum_bf16_m9, svbfloat16_t, bfloat16_t, -+ svst1_vnum_bf16 (p0, x0, -9, z0), -+ svst1_vnum (p0, x0, -9, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** st1_vnum_bf16_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** st1h z0\.h, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (st1_vnum_bf16_x1, svbfloat16_t, bfloat16_t, -+ svst1_vnum_bf16 (p0, x0, x1, z0), -+ svst1_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_f16.c -new file mode 100644 -index 000000000..2406cfd97 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_f16.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st1_f16_base: -+** st1h z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_f16_base, svfloat16_t, float16_t, -+ svst1_f16 (p0, x0, z0), -+ svst1 (p0, x0, z0)) -+ -+/* -+** st1_f16_index: -+** st1h z0\.h, p0, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_STORE (st1_f16_index, svfloat16_t, float16_t, -+ svst1_f16 (p0, x0 + x1, z0), -+ svst1 (p0, x0 + x1, z0)) -+ -+/* -+** st1_f16_1: -+** st1h z0\.h, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_f16_1, svfloat16_t, float16_t, -+ svst1_f16 (p0, x0 + svcnth (), z0), -+ svst1 (p0, x0 + svcnth (), z0)) -+ -+/* -+** st1_f16_7: -+** st1h z0\.h, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_f16_7, svfloat16_t, float16_t, -+ svst1_f16 (p0, x0 + svcnth () * 7, z0), -+ svst1 (p0, x0 + svcnth () * 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1_f16_8: -+** incb x0, all, mul #8 -+** st1h z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_f16_8, svfloat16_t, float16_t, -+ svst1_f16 (p0, x0 + svcnth () * 8, z0), -+ svst1 (p0, x0 + svcnth () * 8, z0)) -+ -+/* -+** st1_f16_m1: -+** st1h z0\.h, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_f16_m1, svfloat16_t, float16_t, -+ svst1_f16 (p0, x0 - svcnth (), z0), -+ svst1 (p0, x0 - svcnth (), z0)) -+ -+/* -+** st1_f16_m8: -+** st1h z0\.h, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_f16_m8, svfloat16_t, float16_t, -+ svst1_f16 (p0, x0 - svcnth () * 8, z0), -+ svst1 (p0, x0 - svcnth () * 8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1_f16_m9: -+** decb x0, all, mul #9 -+** st1h z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_f16_m9, svfloat16_t, float16_t, -+ svst1_f16 (p0, x0 - svcnth () * 9, z0), -+ svst1 (p0, x0 - svcnth () * 9, z0)) -+ -+/* -+** st1_vnum_f16_0: -+** st1h z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_vnum_f16_0, svfloat16_t, float16_t, -+ svst1_vnum_f16 (p0, x0, 0, z0), -+ svst1_vnum (p0, x0, 0, z0)) -+ -+/* -+** st1_vnum_f16_1: -+** st1h z0\.h, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_vnum_f16_1, svfloat16_t, float16_t, -+ svst1_vnum_f16 (p0, x0, 1, z0), -+ svst1_vnum (p0, x0, 1, z0)) -+ -+/* -+** st1_vnum_f16_7: -+** st1h z0\.h, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_vnum_f16_7, svfloat16_t, float16_t, -+ svst1_vnum_f16 (p0, x0, 7, z0), -+ svst1_vnum (p0, x0, 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1_vnum_f16_8: -+** incb x0, all, mul #8 -+** st1h z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_vnum_f16_8, svfloat16_t, float16_t, -+ svst1_vnum_f16 (p0, x0, 8, z0), -+ svst1_vnum (p0, x0, 8, z0)) -+ -+/* -+** st1_vnum_f16_m1: -+** st1h z0\.h, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_vnum_f16_m1, svfloat16_t, float16_t, -+ svst1_vnum_f16 (p0, x0, -1, z0), -+ svst1_vnum (p0, x0, -1, z0)) -+ -+/* -+** st1_vnum_f16_m8: -+** st1h z0\.h, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_vnum_f16_m8, svfloat16_t, float16_t, -+ svst1_vnum_f16 (p0, x0, -8, z0), -+ svst1_vnum (p0, x0, -8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1_vnum_f16_m9: -+** decb x0, all, mul #9 -+** st1h z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_vnum_f16_m9, svfloat16_t, float16_t, -+ svst1_vnum_f16 (p0, x0, -9, z0), -+ svst1_vnum (p0, x0, -9, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** st1_vnum_f16_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** st1h z0\.h, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (st1_vnum_f16_x1, svfloat16_t, float16_t, -+ svst1_vnum_f16 (p0, x0, x1, z0), -+ svst1_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_f32.c -new file mode 100644 -index 000000000..5fad7f06f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_f32.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st1_f32_base: -+** st1w z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_f32_base, svfloat32_t, float32_t, -+ svst1_f32 (p0, x0, z0), -+ svst1 (p0, x0, z0)) -+ -+/* -+** st1_f32_index: -+** st1w z0\.s, p0, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_STORE (st1_f32_index, svfloat32_t, float32_t, -+ svst1_f32 (p0, x0 + x1, z0), -+ svst1 (p0, x0 + x1, z0)) -+ -+/* -+** st1_f32_1: -+** st1w z0\.s, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_f32_1, svfloat32_t, float32_t, -+ svst1_f32 (p0, x0 + svcntw (), z0), -+ svst1 (p0, x0 + svcntw (), z0)) -+ -+/* -+** st1_f32_7: -+** st1w z0\.s, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_f32_7, svfloat32_t, float32_t, -+ svst1_f32 (p0, x0 + svcntw () * 7, z0), -+ svst1 (p0, x0 + svcntw () * 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1_f32_8: -+** incb x0, all, mul #8 -+** st1w z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_f32_8, svfloat32_t, float32_t, -+ svst1_f32 (p0, x0 + svcntw () * 8, z0), -+ svst1 (p0, x0 + svcntw () * 8, z0)) -+ -+/* -+** st1_f32_m1: -+** st1w z0\.s, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_f32_m1, svfloat32_t, float32_t, -+ svst1_f32 (p0, x0 - svcntw (), z0), -+ svst1 (p0, x0 - svcntw (), z0)) -+ -+/* -+** st1_f32_m8: -+** st1w z0\.s, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_f32_m8, svfloat32_t, float32_t, -+ svst1_f32 (p0, x0 - svcntw () * 8, z0), -+ svst1 (p0, x0 - svcntw () * 8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1_f32_m9: -+** decb x0, all, mul #9 -+** st1w z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_f32_m9, svfloat32_t, float32_t, -+ svst1_f32 (p0, x0 - svcntw () * 9, z0), -+ svst1 (p0, x0 - svcntw () * 9, z0)) -+ -+/* -+** st1_vnum_f32_0: -+** st1w z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_vnum_f32_0, svfloat32_t, float32_t, -+ svst1_vnum_f32 (p0, x0, 0, z0), -+ svst1_vnum (p0, x0, 0, z0)) -+ -+/* -+** st1_vnum_f32_1: -+** st1w z0\.s, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_vnum_f32_1, svfloat32_t, float32_t, -+ svst1_vnum_f32 (p0, x0, 1, z0), -+ svst1_vnum (p0, x0, 1, z0)) -+ -+/* -+** st1_vnum_f32_7: -+** st1w z0\.s, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_vnum_f32_7, svfloat32_t, float32_t, -+ svst1_vnum_f32 (p0, x0, 7, z0), -+ svst1_vnum (p0, x0, 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1_vnum_f32_8: -+** incb x0, all, mul #8 -+** st1w z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_vnum_f32_8, svfloat32_t, float32_t, -+ svst1_vnum_f32 (p0, x0, 8, z0), -+ svst1_vnum (p0, x0, 8, z0)) -+ -+/* -+** st1_vnum_f32_m1: -+** st1w z0\.s, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_vnum_f32_m1, svfloat32_t, float32_t, -+ svst1_vnum_f32 (p0, x0, -1, z0), -+ svst1_vnum (p0, x0, -1, z0)) -+ -+/* -+** st1_vnum_f32_m8: -+** st1w z0\.s, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_vnum_f32_m8, svfloat32_t, float32_t, -+ svst1_vnum_f32 (p0, x0, -8, z0), -+ svst1_vnum (p0, x0, -8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1_vnum_f32_m9: -+** decb x0, all, mul #9 -+** st1w z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_vnum_f32_m9, svfloat32_t, float32_t, -+ svst1_vnum_f32 (p0, x0, -9, z0), -+ svst1_vnum (p0, x0, -9, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** st1_vnum_f32_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** st1w z0\.s, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (st1_vnum_f32_x1, svfloat32_t, float32_t, -+ svst1_vnum_f32 (p0, x0, x1, z0), -+ svst1_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_f64.c -new file mode 100644 -index 000000000..486f92beb ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_f64.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st1_f64_base: -+** st1d z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_f64_base, svfloat64_t, float64_t, -+ svst1_f64 (p0, x0, z0), -+ svst1 (p0, x0, z0)) -+ -+/* -+** st1_f64_index: -+** st1d z0\.d, p0, \[x0, x1, lsl 3\] -+** ret -+*/ -+TEST_STORE (st1_f64_index, svfloat64_t, float64_t, -+ svst1_f64 (p0, x0 + x1, z0), -+ svst1 (p0, x0 + x1, z0)) -+ -+/* -+** st1_f64_1: -+** st1d z0\.d, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_f64_1, svfloat64_t, float64_t, -+ svst1_f64 (p0, x0 + svcntd (), z0), -+ svst1 (p0, x0 + svcntd (), z0)) -+ -+/* -+** st1_f64_7: -+** st1d z0\.d, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_f64_7, svfloat64_t, float64_t, -+ svst1_f64 (p0, x0 + svcntd () * 7, z0), -+ svst1 (p0, x0 + svcntd () * 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1_f64_8: -+** incb x0, all, mul #8 -+** st1d z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_f64_8, svfloat64_t, float64_t, -+ svst1_f64 (p0, x0 + svcntd () * 8, z0), -+ svst1 (p0, x0 + svcntd () * 8, z0)) -+ -+/* -+** st1_f64_m1: -+** st1d z0\.d, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_f64_m1, svfloat64_t, float64_t, -+ svst1_f64 (p0, x0 - svcntd (), z0), -+ svst1 (p0, x0 - svcntd (), z0)) -+ -+/* -+** st1_f64_m8: -+** st1d z0\.d, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_f64_m8, svfloat64_t, float64_t, -+ svst1_f64 (p0, x0 - svcntd () * 8, z0), -+ svst1 (p0, x0 - svcntd () * 8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1_f64_m9: -+** decb x0, all, mul #9 -+** st1d z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_f64_m9, svfloat64_t, float64_t, -+ svst1_f64 (p0, x0 - svcntd () * 9, z0), -+ svst1 (p0, x0 - svcntd () * 9, z0)) -+ -+/* -+** st1_vnum_f64_0: -+** st1d z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_vnum_f64_0, svfloat64_t, float64_t, -+ svst1_vnum_f64 (p0, x0, 0, z0), -+ svst1_vnum (p0, x0, 0, z0)) -+ -+/* -+** st1_vnum_f64_1: -+** st1d z0\.d, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_vnum_f64_1, svfloat64_t, float64_t, -+ svst1_vnum_f64 (p0, x0, 1, z0), -+ svst1_vnum (p0, x0, 1, z0)) -+ -+/* -+** st1_vnum_f64_7: -+** st1d z0\.d, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_vnum_f64_7, svfloat64_t, float64_t, -+ svst1_vnum_f64 (p0, x0, 7, z0), -+ svst1_vnum (p0, x0, 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1_vnum_f64_8: -+** incb x0, all, mul #8 -+** st1d z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_vnum_f64_8, svfloat64_t, float64_t, -+ svst1_vnum_f64 (p0, x0, 8, z0), -+ svst1_vnum (p0, x0, 8, z0)) -+ -+/* -+** st1_vnum_f64_m1: -+** st1d z0\.d, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_vnum_f64_m1, svfloat64_t, float64_t, -+ svst1_vnum_f64 (p0, x0, -1, z0), -+ svst1_vnum (p0, x0, -1, z0)) -+ -+/* -+** st1_vnum_f64_m8: -+** st1d z0\.d, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_vnum_f64_m8, svfloat64_t, float64_t, -+ svst1_vnum_f64 (p0, x0, -8, z0), -+ svst1_vnum (p0, x0, -8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1_vnum_f64_m9: -+** decb x0, all, mul #9 -+** st1d z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_vnum_f64_m9, svfloat64_t, float64_t, -+ svst1_vnum_f64 (p0, x0, -9, z0), -+ svst1_vnum (p0, x0, -9, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** st1_vnum_f64_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** st1d z0\.d, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (st1_vnum_f64_x1, svfloat64_t, float64_t, -+ svst1_vnum_f64 (p0, x0, x1, z0), -+ svst1_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_s16.c -new file mode 100644 -index 000000000..7d4ac25d2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_s16.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st1_s16_base: -+** st1h z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_s16_base, svint16_t, int16_t, -+ svst1_s16 (p0, x0, z0), -+ svst1 (p0, x0, z0)) -+ -+/* -+** st1_s16_index: -+** st1h z0\.h, p0, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_STORE (st1_s16_index, svint16_t, int16_t, -+ svst1_s16 (p0, x0 + x1, z0), -+ svst1 (p0, x0 + x1, z0)) -+ -+/* -+** st1_s16_1: -+** st1h z0\.h, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_s16_1, svint16_t, int16_t, -+ svst1_s16 (p0, x0 + svcnth (), z0), -+ svst1 (p0, x0 + svcnth (), z0)) -+ -+/* -+** st1_s16_7: -+** st1h z0\.h, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_s16_7, svint16_t, int16_t, -+ svst1_s16 (p0, x0 + svcnth () * 7, z0), -+ svst1 (p0, x0 + svcnth () * 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1_s16_8: -+** incb x0, all, mul #8 -+** st1h z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_s16_8, svint16_t, int16_t, -+ svst1_s16 (p0, x0 + svcnth () * 8, z0), -+ svst1 (p0, x0 + svcnth () * 8, z0)) -+ -+/* -+** st1_s16_m1: -+** st1h z0\.h, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_s16_m1, svint16_t, int16_t, -+ svst1_s16 (p0, x0 - svcnth (), z0), -+ svst1 (p0, x0 - svcnth (), z0)) -+ -+/* -+** st1_s16_m8: -+** st1h z0\.h, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_s16_m8, svint16_t, int16_t, -+ svst1_s16 (p0, x0 - svcnth () * 8, z0), -+ svst1 (p0, x0 - svcnth () * 8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1_s16_m9: -+** decb x0, all, mul #9 -+** st1h z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_s16_m9, svint16_t, int16_t, -+ svst1_s16 (p0, x0 - svcnth () * 9, z0), -+ svst1 (p0, x0 - svcnth () * 9, z0)) -+ -+/* -+** st1_vnum_s16_0: -+** st1h z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_vnum_s16_0, svint16_t, int16_t, -+ svst1_vnum_s16 (p0, x0, 0, z0), -+ svst1_vnum (p0, x0, 0, z0)) -+ -+/* -+** st1_vnum_s16_1: -+** st1h z0\.h, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_vnum_s16_1, svint16_t, int16_t, -+ svst1_vnum_s16 (p0, x0, 1, z0), -+ svst1_vnum (p0, x0, 1, z0)) -+ -+/* -+** st1_vnum_s16_7: -+** st1h z0\.h, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_vnum_s16_7, svint16_t, int16_t, -+ svst1_vnum_s16 (p0, x0, 7, z0), -+ svst1_vnum (p0, x0, 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1_vnum_s16_8: -+** incb x0, all, mul #8 -+** st1h z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_vnum_s16_8, svint16_t, int16_t, -+ svst1_vnum_s16 (p0, x0, 8, z0), -+ svst1_vnum (p0, x0, 8, z0)) -+ -+/* -+** st1_vnum_s16_m1: -+** st1h z0\.h, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_vnum_s16_m1, svint16_t, int16_t, -+ svst1_vnum_s16 (p0, x0, -1, z0), -+ svst1_vnum (p0, x0, -1, z0)) -+ -+/* -+** st1_vnum_s16_m8: -+** st1h z0\.h, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_vnum_s16_m8, svint16_t, int16_t, -+ svst1_vnum_s16 (p0, x0, -8, z0), -+ svst1_vnum (p0, x0, -8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1_vnum_s16_m9: -+** decb x0, all, mul #9 -+** st1h z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_vnum_s16_m9, svint16_t, int16_t, -+ svst1_vnum_s16 (p0, x0, -9, z0), -+ svst1_vnum (p0, x0, -9, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** st1_vnum_s16_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** st1h z0\.h, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (st1_vnum_s16_x1, svint16_t, int16_t, -+ svst1_vnum_s16 (p0, x0, x1, z0), -+ svst1_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_s32.c -new file mode 100644 -index 000000000..e2bcc3403 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_s32.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st1_s32_base: -+** st1w z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_s32_base, svint32_t, int32_t, -+ svst1_s32 (p0, x0, z0), -+ svst1 (p0, x0, z0)) -+ -+/* -+** st1_s32_index: -+** st1w z0\.s, p0, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_STORE (st1_s32_index, svint32_t, int32_t, -+ svst1_s32 (p0, x0 + x1, z0), -+ svst1 (p0, x0 + x1, z0)) -+ -+/* -+** st1_s32_1: -+** st1w z0\.s, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_s32_1, svint32_t, int32_t, -+ svst1_s32 (p0, x0 + svcntw (), z0), -+ svst1 (p0, x0 + svcntw (), z0)) -+ -+/* -+** st1_s32_7: -+** st1w z0\.s, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_s32_7, svint32_t, int32_t, -+ svst1_s32 (p0, x0 + svcntw () * 7, z0), -+ svst1 (p0, x0 + svcntw () * 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1_s32_8: -+** incb x0, all, mul #8 -+** st1w z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_s32_8, svint32_t, int32_t, -+ svst1_s32 (p0, x0 + svcntw () * 8, z0), -+ svst1 (p0, x0 + svcntw () * 8, z0)) -+ -+/* -+** st1_s32_m1: -+** st1w z0\.s, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_s32_m1, svint32_t, int32_t, -+ svst1_s32 (p0, x0 - svcntw (), z0), -+ svst1 (p0, x0 - svcntw (), z0)) -+ -+/* -+** st1_s32_m8: -+** st1w z0\.s, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_s32_m8, svint32_t, int32_t, -+ svst1_s32 (p0, x0 - svcntw () * 8, z0), -+ svst1 (p0, x0 - svcntw () * 8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1_s32_m9: -+** decb x0, all, mul #9 -+** st1w z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_s32_m9, svint32_t, int32_t, -+ svst1_s32 (p0, x0 - svcntw () * 9, z0), -+ svst1 (p0, x0 - svcntw () * 9, z0)) -+ -+/* -+** st1_vnum_s32_0: -+** st1w z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_vnum_s32_0, svint32_t, int32_t, -+ svst1_vnum_s32 (p0, x0, 0, z0), -+ svst1_vnum (p0, x0, 0, z0)) -+ -+/* -+** st1_vnum_s32_1: -+** st1w z0\.s, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_vnum_s32_1, svint32_t, int32_t, -+ svst1_vnum_s32 (p0, x0, 1, z0), -+ svst1_vnum (p0, x0, 1, z0)) -+ -+/* -+** st1_vnum_s32_7: -+** st1w z0\.s, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_vnum_s32_7, svint32_t, int32_t, -+ svst1_vnum_s32 (p0, x0, 7, z0), -+ svst1_vnum (p0, x0, 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1_vnum_s32_8: -+** incb x0, all, mul #8 -+** st1w z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_vnum_s32_8, svint32_t, int32_t, -+ svst1_vnum_s32 (p0, x0, 8, z0), -+ svst1_vnum (p0, x0, 8, z0)) -+ -+/* -+** st1_vnum_s32_m1: -+** st1w z0\.s, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_vnum_s32_m1, svint32_t, int32_t, -+ svst1_vnum_s32 (p0, x0, -1, z0), -+ svst1_vnum (p0, x0, -1, z0)) -+ -+/* -+** st1_vnum_s32_m8: -+** st1w z0\.s, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_vnum_s32_m8, svint32_t, int32_t, -+ svst1_vnum_s32 (p0, x0, -8, z0), -+ svst1_vnum (p0, x0, -8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1_vnum_s32_m9: -+** decb x0, all, mul #9 -+** st1w z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_vnum_s32_m9, svint32_t, int32_t, -+ svst1_vnum_s32 (p0, x0, -9, z0), -+ svst1_vnum (p0, x0, -9, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** st1_vnum_s32_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** st1w z0\.s, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (st1_vnum_s32_x1, svint32_t, int32_t, -+ svst1_vnum_s32 (p0, x0, x1, z0), -+ svst1_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_s64.c -new file mode 100644 -index 000000000..8e0b69f73 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_s64.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st1_s64_base: -+** st1d z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_s64_base, svint64_t, int64_t, -+ svst1_s64 (p0, x0, z0), -+ svst1 (p0, x0, z0)) -+ -+/* -+** st1_s64_index: -+** st1d z0\.d, p0, \[x0, x1, lsl 3\] -+** ret -+*/ -+TEST_STORE (st1_s64_index, svint64_t, int64_t, -+ svst1_s64 (p0, x0 + x1, z0), -+ svst1 (p0, x0 + x1, z0)) -+ -+/* -+** st1_s64_1: -+** st1d z0\.d, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_s64_1, svint64_t, int64_t, -+ svst1_s64 (p0, x0 + svcntd (), z0), -+ svst1 (p0, x0 + svcntd (), z0)) -+ -+/* -+** st1_s64_7: -+** st1d z0\.d, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_s64_7, svint64_t, int64_t, -+ svst1_s64 (p0, x0 + svcntd () * 7, z0), -+ svst1 (p0, x0 + svcntd () * 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1_s64_8: -+** incb x0, all, mul #8 -+** st1d z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_s64_8, svint64_t, int64_t, -+ svst1_s64 (p0, x0 + svcntd () * 8, z0), -+ svst1 (p0, x0 + svcntd () * 8, z0)) -+ -+/* -+** st1_s64_m1: -+** st1d z0\.d, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_s64_m1, svint64_t, int64_t, -+ svst1_s64 (p0, x0 - svcntd (), z0), -+ svst1 (p0, x0 - svcntd (), z0)) -+ -+/* -+** st1_s64_m8: -+** st1d z0\.d, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_s64_m8, svint64_t, int64_t, -+ svst1_s64 (p0, x0 - svcntd () * 8, z0), -+ svst1 (p0, x0 - svcntd () * 8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1_s64_m9: -+** decb x0, all, mul #9 -+** st1d z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_s64_m9, svint64_t, int64_t, -+ svst1_s64 (p0, x0 - svcntd () * 9, z0), -+ svst1 (p0, x0 - svcntd () * 9, z0)) -+ -+/* -+** st1_vnum_s64_0: -+** st1d z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_vnum_s64_0, svint64_t, int64_t, -+ svst1_vnum_s64 (p0, x0, 0, z0), -+ svst1_vnum (p0, x0, 0, z0)) -+ -+/* -+** st1_vnum_s64_1: -+** st1d z0\.d, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_vnum_s64_1, svint64_t, int64_t, -+ svst1_vnum_s64 (p0, x0, 1, z0), -+ svst1_vnum (p0, x0, 1, z0)) -+ -+/* -+** st1_vnum_s64_7: -+** st1d z0\.d, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_vnum_s64_7, svint64_t, int64_t, -+ svst1_vnum_s64 (p0, x0, 7, z0), -+ svst1_vnum (p0, x0, 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1_vnum_s64_8: -+** incb x0, all, mul #8 -+** st1d z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_vnum_s64_8, svint64_t, int64_t, -+ svst1_vnum_s64 (p0, x0, 8, z0), -+ svst1_vnum (p0, x0, 8, z0)) -+ -+/* -+** st1_vnum_s64_m1: -+** st1d z0\.d, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_vnum_s64_m1, svint64_t, int64_t, -+ svst1_vnum_s64 (p0, x0, -1, z0), -+ svst1_vnum (p0, x0, -1, z0)) -+ -+/* -+** st1_vnum_s64_m8: -+** st1d z0\.d, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_vnum_s64_m8, svint64_t, int64_t, -+ svst1_vnum_s64 (p0, x0, -8, z0), -+ svst1_vnum (p0, x0, -8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1_vnum_s64_m9: -+** decb x0, all, mul #9 -+** st1d z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_vnum_s64_m9, svint64_t, int64_t, -+ svst1_vnum_s64 (p0, x0, -9, z0), -+ svst1_vnum (p0, x0, -9, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** st1_vnum_s64_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** st1d z0\.d, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (st1_vnum_s64_x1, svint64_t, int64_t, -+ svst1_vnum_s64 (p0, x0, x1, z0), -+ svst1_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_s8.c -new file mode 100644 -index 000000000..4155683ab ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_s8.c -@@ -0,0 +1,162 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st1_s8_base: -+** st1b z0\.b, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_s8_base, svint8_t, int8_t, -+ svst1_s8 (p0, x0, z0), -+ svst1 (p0, x0, z0)) -+ -+/* -+** st1_s8_index: -+** st1b z0\.b, p0, \[x0, x1\] -+** ret -+*/ -+TEST_STORE (st1_s8_index, svint8_t, int8_t, -+ svst1_s8 (p0, x0 + x1, z0), -+ svst1 (p0, x0 + x1, z0)) -+ -+/* -+** st1_s8_1: -+** st1b z0\.b, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_s8_1, svint8_t, int8_t, -+ svst1_s8 (p0, x0 + svcntb (), z0), -+ svst1 (p0, x0 + svcntb (), z0)) -+ -+/* -+** st1_s8_7: -+** st1b z0\.b, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_s8_7, svint8_t, int8_t, -+ svst1_s8 (p0, x0 + svcntb () * 7, z0), -+ svst1 (p0, x0 + svcntb () * 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1_s8_8: -+** incb x0, all, mul #8 -+** st1b z0\.b, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_s8_8, svint8_t, int8_t, -+ svst1_s8 (p0, x0 + svcntb () * 8, z0), -+ svst1 (p0, x0 + svcntb () * 8, z0)) -+ -+/* -+** st1_s8_m1: -+** st1b z0\.b, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_s8_m1, svint8_t, int8_t, -+ svst1_s8 (p0, x0 - svcntb (), z0), -+ svst1 (p0, x0 - svcntb (), z0)) -+ -+/* -+** st1_s8_m8: -+** st1b z0\.b, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_s8_m8, svint8_t, int8_t, -+ svst1_s8 (p0, x0 - svcntb () * 8, z0), -+ svst1 (p0, x0 - svcntb () * 8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1_s8_m9: -+** decb x0, all, mul #9 -+** st1b z0\.b, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_s8_m9, svint8_t, int8_t, -+ svst1_s8 (p0, x0 - svcntb () * 9, z0), -+ svst1 (p0, x0 - svcntb () * 9, z0)) -+ -+/* -+** st1_vnum_s8_0: -+** st1b z0\.b, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_vnum_s8_0, svint8_t, int8_t, -+ svst1_vnum_s8 (p0, x0, 0, z0), -+ svst1_vnum (p0, x0, 0, z0)) -+ -+/* -+** st1_vnum_s8_1: -+** st1b z0\.b, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_vnum_s8_1, svint8_t, int8_t, -+ svst1_vnum_s8 (p0, x0, 1, z0), -+ svst1_vnum (p0, x0, 1, z0)) -+ -+/* -+** st1_vnum_s8_7: -+** st1b z0\.b, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_vnum_s8_7, svint8_t, int8_t, -+ svst1_vnum_s8 (p0, x0, 7, z0), -+ svst1_vnum (p0, x0, 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1_vnum_s8_8: -+** incb x0, all, mul #8 -+** st1b z0\.b, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_vnum_s8_8, svint8_t, int8_t, -+ svst1_vnum_s8 (p0, x0, 8, z0), -+ svst1_vnum (p0, x0, 8, z0)) -+ -+/* -+** st1_vnum_s8_m1: -+** st1b z0\.b, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_vnum_s8_m1, svint8_t, int8_t, -+ svst1_vnum_s8 (p0, x0, -1, z0), -+ svst1_vnum (p0, x0, -1, z0)) -+ -+/* -+** st1_vnum_s8_m8: -+** st1b z0\.b, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_vnum_s8_m8, svint8_t, int8_t, -+ svst1_vnum_s8 (p0, x0, -8, z0), -+ svst1_vnum (p0, x0, -8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1_vnum_s8_m9: -+** decb x0, all, mul #9 -+** st1b z0\.b, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_vnum_s8_m9, svint8_t, int8_t, -+ svst1_vnum_s8 (p0, x0, -9, z0), -+ svst1_vnum (p0, x0, -9, z0)) -+ -+/* -+** st1_vnum_s8_x1: -+** cntb (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** st1b z0\.b, p0, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** st1b z0\.b, p0, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_STORE (st1_vnum_s8_x1, svint8_t, int8_t, -+ svst1_vnum_s8 (p0, x0, x1, z0), -+ svst1_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_f32.c -new file mode 100644 -index 000000000..cb6774ad0 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_f32.c -@@ -0,0 +1,227 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st1_scatter_f32: -+** st1w z0\.s, p0, \[z1\.s\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_f32, svfloat32_t, svuint32_t, -+ svst1_scatter_u32base_f32 (p0, z1, z0), -+ svst1_scatter (p0, z1, z0)) -+ -+/* -+** st1_scatter_x0_f32_offset: -+** st1w z0\.s, p0, \[x0, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_x0_f32_offset, svfloat32_t, svuint32_t, -+ svst1_scatter_u32base_offset_f32 (p0, z1, x0, z0), -+ svst1_scatter_offset (p0, z1, x0, z0)) -+ -+/* -+** st1_scatter_m4_f32_offset: -+** mov (x[0-9]+), #?-4 -+** st1w z0\.s, p0, \[\1, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_m4_f32_offset, svfloat32_t, svuint32_t, -+ svst1_scatter_u32base_offset_f32 (p0, z1, -4, z0), -+ svst1_scatter_offset (p0, z1, -4, z0)) -+ -+/* -+** st1_scatter_0_f32_offset: -+** st1w z0\.s, p0, \[z1\.s\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_0_f32_offset, svfloat32_t, svuint32_t, -+ svst1_scatter_u32base_offset_f32 (p0, z1, 0, z0), -+ svst1_scatter_offset (p0, z1, 0, z0)) -+ -+/* -+** st1_scatter_5_f32_offset: -+** mov (x[0-9]+), #?5 -+** st1w z0\.s, p0, \[\1, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_5_f32_offset, svfloat32_t, svuint32_t, -+ svst1_scatter_u32base_offset_f32 (p0, z1, 5, z0), -+ svst1_scatter_offset (p0, z1, 5, z0)) -+ -+/* -+** st1_scatter_6_f32_offset: -+** mov (x[0-9]+), #?6 -+** st1w z0\.s, p0, \[\1, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_6_f32_offset, svfloat32_t, svuint32_t, -+ svst1_scatter_u32base_offset_f32 (p0, z1, 6, z0), -+ svst1_scatter_offset (p0, z1, 6, z0)) -+ -+/* -+** st1_scatter_7_f32_offset: -+** mov (x[0-9]+), #?7 -+** st1w z0\.s, p0, \[\1, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_7_f32_offset, svfloat32_t, svuint32_t, -+ svst1_scatter_u32base_offset_f32 (p0, z1, 7, z0), -+ svst1_scatter_offset (p0, z1, 7, z0)) -+ -+/* -+** st1_scatter_8_f32_offset: -+** st1w z0\.s, p0, \[z1\.s, #8\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_8_f32_offset, svfloat32_t, svuint32_t, -+ svst1_scatter_u32base_offset_f32 (p0, z1, 8, z0), -+ svst1_scatter_offset (p0, z1, 8, z0)) -+ -+/* -+** st1_scatter_124_f32_offset: -+** st1w z0\.s, p0, \[z1\.s, #124\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_124_f32_offset, svfloat32_t, svuint32_t, -+ svst1_scatter_u32base_offset_f32 (p0, z1, 124, z0), -+ svst1_scatter_offset (p0, z1, 124, z0)) -+ -+/* -+** st1_scatter_128_f32_offset: -+** mov (x[0-9]+), #?128 -+** st1w z0\.s, p0, \[\1, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_128_f32_offset, svfloat32_t, svuint32_t, -+ svst1_scatter_u32base_offset_f32 (p0, z1, 128, z0), -+ svst1_scatter_offset (p0, z1, 128, z0)) -+ -+/* -+** st1_scatter_x0_f32_index: -+** lsl (x[0-9]+), x0, #?2 -+** st1w z0\.s, p0, \[\1, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_x0_f32_index, svfloat32_t, svuint32_t, -+ svst1_scatter_u32base_index_f32 (p0, z1, x0, z0), -+ svst1_scatter_index (p0, z1, x0, z0)) -+ -+/* -+** st1_scatter_m1_f32_index: -+** mov (x[0-9]+), #?-4 -+** st1w z0\.s, p0, \[\1, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_m1_f32_index, svfloat32_t, svuint32_t, -+ svst1_scatter_u32base_index_f32 (p0, z1, -1, z0), -+ svst1_scatter_index (p0, z1, -1, z0)) -+ -+/* -+** st1_scatter_0_f32_index: -+** st1w z0\.s, p0, \[z1\.s\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_0_f32_index, svfloat32_t, svuint32_t, -+ svst1_scatter_u32base_index_f32 (p0, z1, 0, z0), -+ svst1_scatter_index (p0, z1, 0, z0)) -+ -+/* -+** st1_scatter_5_f32_index: -+** st1w z0\.s, p0, \[z1\.s, #20\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_5_f32_index, svfloat32_t, svuint32_t, -+ svst1_scatter_u32base_index_f32 (p0, z1, 5, z0), -+ svst1_scatter_index (p0, z1, 5, z0)) -+ -+/* -+** st1_scatter_31_f32_index: -+** st1w z0\.s, p0, \[z1\.s, #124\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_31_f32_index, svfloat32_t, svuint32_t, -+ svst1_scatter_u32base_index_f32 (p0, z1, 31, z0), -+ svst1_scatter_index (p0, z1, 31, z0)) -+ -+/* -+** st1_scatter_32_f32_index: -+** mov (x[0-9]+), #?128 -+** st1w z0\.s, p0, \[\1, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_32_f32_index, svfloat32_t, svuint32_t, -+ svst1_scatter_u32base_index_f32 (p0, z1, 32, z0), -+ svst1_scatter_index (p0, z1, 32, z0)) -+ -+/* -+** st1_scatter_x0_f32_s32offset: -+** st1w z0\.s, p0, \[x0, z1\.s, sxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_x0_f32_s32offset, svfloat32_t, float32_t, svint32_t, -+ svst1_scatter_s32offset_f32 (p0, x0, z1, z0), -+ svst1_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1_scatter_f32_s32offset: -+** st1w z0\.s, p0, \[x0, z1\.s, sxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_f32_s32offset, svfloat32_t, float32_t, svint32_t, -+ svst1_scatter_s32offset_f32 (p0, x0, z1, z0), -+ svst1_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1_scatter_x0_f32_u32offset: -+** st1w z0\.s, p0, \[x0, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_x0_f32_u32offset, svfloat32_t, float32_t, svuint32_t, -+ svst1_scatter_u32offset_f32 (p0, x0, z1, z0), -+ svst1_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1_scatter_f32_u32offset: -+** st1w z0\.s, p0, \[x0, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_f32_u32offset, svfloat32_t, float32_t, svuint32_t, -+ svst1_scatter_u32offset_f32 (p0, x0, z1, z0), -+ svst1_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1_scatter_x0_f32_s32index: -+** st1w z0\.s, p0, \[x0, z1\.s, sxtw 2\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_x0_f32_s32index, svfloat32_t, float32_t, svint32_t, -+ svst1_scatter_s32index_f32 (p0, x0, z1, z0), -+ svst1_scatter_index (p0, x0, z1, z0)) -+ -+/* -+** st1_scatter_f32_s32index: -+** st1w z0\.s, p0, \[x0, z1\.s, sxtw 2\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_f32_s32index, svfloat32_t, float32_t, svint32_t, -+ svst1_scatter_s32index_f32 (p0, x0, z1, z0), -+ svst1_scatter_index (p0, x0, z1, z0)) -+ -+/* -+** st1_scatter_x0_f32_u32index: -+** st1w z0\.s, p0, \[x0, z1\.s, uxtw 2\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_x0_f32_u32index, svfloat32_t, float32_t, svuint32_t, -+ svst1_scatter_u32index_f32 (p0, x0, z1, z0), -+ svst1_scatter_index (p0, x0, z1, z0)) -+ -+/* -+** st1_scatter_f32_u32index: -+** st1w z0\.s, p0, \[x0, z1\.s, uxtw 2\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_f32_u32index, svfloat32_t, float32_t, svuint32_t, -+ svst1_scatter_u32index_f32 (p0, x0, z1, z0), -+ svst1_scatter_index (p0, x0, z1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_f64.c -new file mode 100644 -index 000000000..fe978bbe5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_f64.c -@@ -0,0 +1,303 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st1_scatter_f64: -+** st1d z0\.d, p0, \[z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_f64, svfloat64_t, svuint64_t, -+ svst1_scatter_u64base_f64 (p0, z1, z0), -+ svst1_scatter (p0, z1, z0)) -+ -+/* -+** st1_scatter_x0_f64_offset: -+** st1d z0\.d, p0, \[x0, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_x0_f64_offset, svfloat64_t, svuint64_t, -+ svst1_scatter_u64base_offset_f64 (p0, z1, x0, z0), -+ svst1_scatter_offset (p0, z1, x0, z0)) -+ -+/* -+** st1_scatter_m8_f64_offset: -+** mov (x[0-9]+), #?-8 -+** st1d z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_m8_f64_offset, svfloat64_t, svuint64_t, -+ svst1_scatter_u64base_offset_f64 (p0, z1, -8, z0), -+ svst1_scatter_offset (p0, z1, -8, z0)) -+ -+/* -+** st1_scatter_0_f64_offset: -+** st1d z0\.d, p0, \[z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_0_f64_offset, svfloat64_t, svuint64_t, -+ svst1_scatter_u64base_offset_f64 (p0, z1, 0, z0), -+ svst1_scatter_offset (p0, z1, 0, z0)) -+ -+/* -+** st1_scatter_9_f64_offset: -+** mov (x[0-9]+), #?9 -+** st1d z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_9_f64_offset, svfloat64_t, svuint64_t, -+ svst1_scatter_u64base_offset_f64 (p0, z1, 9, z0), -+ svst1_scatter_offset (p0, z1, 9, z0)) -+ -+/* -+** st1_scatter_10_f64_offset: -+** mov (x[0-9]+), #?10 -+** st1d z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_10_f64_offset, svfloat64_t, svuint64_t, -+ svst1_scatter_u64base_offset_f64 (p0, z1, 10, z0), -+ svst1_scatter_offset (p0, z1, 10, z0)) -+ -+/* -+** st1_scatter_11_f64_offset: -+** mov (x[0-9]+), #?11 -+** st1d z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_11_f64_offset, svfloat64_t, svuint64_t, -+ svst1_scatter_u64base_offset_f64 (p0, z1, 11, z0), -+ svst1_scatter_offset (p0, z1, 11, z0)) -+ -+/* -+** st1_scatter_12_f64_offset: -+** mov (x[0-9]+), #?12 -+** st1d z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_12_f64_offset, svfloat64_t, svuint64_t, -+ svst1_scatter_u64base_offset_f64 (p0, z1, 12, z0), -+ svst1_scatter_offset (p0, z1, 12, z0)) -+ -+/* -+** st1_scatter_13_f64_offset: -+** mov (x[0-9]+), #?13 -+** st1d z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_13_f64_offset, svfloat64_t, svuint64_t, -+ svst1_scatter_u64base_offset_f64 (p0, z1, 13, z0), -+ svst1_scatter_offset (p0, z1, 13, z0)) -+ -+/* -+** st1_scatter_14_f64_offset: -+** mov (x[0-9]+), #?14 -+** st1d z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_14_f64_offset, svfloat64_t, svuint64_t, -+ svst1_scatter_u64base_offset_f64 (p0, z1, 14, z0), -+ svst1_scatter_offset (p0, z1, 14, z0)) -+ -+/* -+** st1_scatter_15_f64_offset: -+** mov (x[0-9]+), #?15 -+** st1d z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_15_f64_offset, svfloat64_t, svuint64_t, -+ svst1_scatter_u64base_offset_f64 (p0, z1, 15, z0), -+ svst1_scatter_offset (p0, z1, 15, z0)) -+ -+/* -+** st1_scatter_16_f64_offset: -+** st1d z0\.d, p0, \[z1\.d, #16\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_16_f64_offset, svfloat64_t, svuint64_t, -+ svst1_scatter_u64base_offset_f64 (p0, z1, 16, z0), -+ svst1_scatter_offset (p0, z1, 16, z0)) -+ -+/* -+** st1_scatter_248_f64_offset: -+** st1d z0\.d, p0, \[z1\.d, #248\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_248_f64_offset, svfloat64_t, svuint64_t, -+ svst1_scatter_u64base_offset_f64 (p0, z1, 248, z0), -+ svst1_scatter_offset (p0, z1, 248, z0)) -+ -+/* -+** st1_scatter_256_f64_offset: -+** mov (x[0-9]+), #?256 -+** st1d z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_256_f64_offset, svfloat64_t, svuint64_t, -+ svst1_scatter_u64base_offset_f64 (p0, z1, 256, z0), -+ svst1_scatter_offset (p0, z1, 256, z0)) -+ -+/* -+** st1_scatter_x0_f64_index: -+** lsl (x[0-9]+), x0, #?3 -+** st1d z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_x0_f64_index, svfloat64_t, svuint64_t, -+ svst1_scatter_u64base_index_f64 (p0, z1, x0, z0), -+ svst1_scatter_index (p0, z1, x0, z0)) -+ -+/* -+** st1_scatter_m1_f64_index: -+** mov (x[0-9]+), #?-8 -+** st1d z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_m1_f64_index, svfloat64_t, svuint64_t, -+ svst1_scatter_u64base_index_f64 (p0, z1, -1, z0), -+ svst1_scatter_index (p0, z1, -1, z0)) -+ -+/* -+** st1_scatter_0_f64_index: -+** st1d z0\.d, p0, \[z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_0_f64_index, svfloat64_t, svuint64_t, -+ svst1_scatter_u64base_index_f64 (p0, z1, 0, z0), -+ svst1_scatter_index (p0, z1, 0, z0)) -+ -+/* -+** st1_scatter_5_f64_index: -+** st1d z0\.d, p0, \[z1\.d, #40\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_5_f64_index, svfloat64_t, svuint64_t, -+ svst1_scatter_u64base_index_f64 (p0, z1, 5, z0), -+ svst1_scatter_index (p0, z1, 5, z0)) -+ -+/* -+** st1_scatter_31_f64_index: -+** st1d z0\.d, p0, \[z1\.d, #248\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_31_f64_index, svfloat64_t, svuint64_t, -+ svst1_scatter_u64base_index_f64 (p0, z1, 31, z0), -+ svst1_scatter_index (p0, z1, 31, z0)) -+ -+/* -+** st1_scatter_32_f64_index: -+** mov (x[0-9]+), #?256 -+** st1d z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_32_f64_index, svfloat64_t, svuint64_t, -+ svst1_scatter_u64base_index_f64 (p0, z1, 32, z0), -+ svst1_scatter_index (p0, z1, 32, z0)) -+ -+/* -+** st1_scatter_x0_f64_s64offset: -+** st1d z0\.d, p0, \[x0, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_x0_f64_s64offset, svfloat64_t, float64_t, svint64_t, -+ svst1_scatter_s64offset_f64 (p0, x0, z1, z0), -+ svst1_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1_scatter_f64_s64offset: -+** st1d z0\.d, p0, \[x0, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_f64_s64offset, svfloat64_t, float64_t, svint64_t, -+ svst1_scatter_s64offset_f64 (p0, x0, z1, z0), -+ svst1_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1_scatter_ext_f64_s64offset: -+** st1d z0\.d, p0, \[x0, z1\.d, sxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_ext_f64_s64offset, svfloat64_t, float64_t, svint64_t, -+ svst1_scatter_s64offset_f64 (p0, x0, svextw_s64_x (p0, z1), z0), -+ svst1_scatter_offset (p0, x0, svextw_x (p0, z1), z0)) -+ -+/* -+** st1_scatter_x0_f64_u64offset: -+** st1d z0\.d, p0, \[x0, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_x0_f64_u64offset, svfloat64_t, float64_t, svuint64_t, -+ svst1_scatter_u64offset_f64 (p0, x0, z1, z0), -+ svst1_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1_scatter_f64_u64offset: -+** st1d z0\.d, p0, \[x0, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_f64_u64offset, svfloat64_t, float64_t, svuint64_t, -+ svst1_scatter_u64offset_f64 (p0, x0, z1, z0), -+ svst1_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1_scatter_ext_f64_u64offset: -+** st1d z0\.d, p0, \[x0, z1\.d, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_ext_f64_u64offset, svfloat64_t, float64_t, svuint64_t, -+ svst1_scatter_u64offset_f64 (p0, x0, svextw_u64_x (p0, z1), z0), -+ svst1_scatter_offset (p0, x0, svextw_x (p0, z1), z0)) -+ -+/* -+** st1_scatter_x0_f64_s64index: -+** st1d z0\.d, p0, \[x0, z1\.d, lsl 3\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_x0_f64_s64index, svfloat64_t, float64_t, svint64_t, -+ svst1_scatter_s64index_f64 (p0, x0, z1, z0), -+ svst1_scatter_index (p0, x0, z1, z0)) -+ -+/* -+** st1_scatter_f64_s64index: -+** st1d z0\.d, p0, \[x0, z1\.d, lsl 3\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_f64_s64index, svfloat64_t, float64_t, svint64_t, -+ svst1_scatter_s64index_f64 (p0, x0, z1, z0), -+ svst1_scatter_index (p0, x0, z1, z0)) -+ -+/* -+** st1_scatter_ext_f64_s64index: -+** st1d z0\.d, p0, \[x0, z1\.d, sxtw 3\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_ext_f64_s64index, svfloat64_t, float64_t, svint64_t, -+ svst1_scatter_s64index_f64 (p0, x0, svextw_s64_x (p0, z1), z0), -+ svst1_scatter_index (p0, x0, svextw_x (p0, z1), z0)) -+ -+/* -+** st1_scatter_x0_f64_u64index: -+** st1d z0\.d, p0, \[x0, z1\.d, lsl 3\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_x0_f64_u64index, svfloat64_t, float64_t, svuint64_t, -+ svst1_scatter_u64index_f64 (p0, x0, z1, z0), -+ svst1_scatter_index (p0, x0, z1, z0)) -+ -+/* -+** st1_scatter_f64_u64index: -+** st1d z0\.d, p0, \[x0, z1\.d, lsl 3\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_f64_u64index, svfloat64_t, float64_t, svuint64_t, -+ svst1_scatter_u64index_f64 (p0, x0, z1, z0), -+ svst1_scatter_index (p0, x0, z1, z0)) -+ -+/* -+** st1_scatter_ext_f64_u64index: -+** st1d z0\.d, p0, \[x0, z1\.d, uxtw 3\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_ext_f64_u64index, svfloat64_t, float64_t, svuint64_t, -+ svst1_scatter_u64index_f64 (p0, x0, svextw_u64_x (p0, z1), z0), -+ svst1_scatter_index (p0, x0, svextw_x (p0, z1), z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_s32.c -new file mode 100644 -index 000000000..d244e701a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_s32.c -@@ -0,0 +1,227 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st1_scatter_s32: -+** st1w z0\.s, p0, \[z1\.s\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_s32, svint32_t, svuint32_t, -+ svst1_scatter_u32base_s32 (p0, z1, z0), -+ svst1_scatter (p0, z1, z0)) -+ -+/* -+** st1_scatter_x0_s32_offset: -+** st1w z0\.s, p0, \[x0, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_x0_s32_offset, svint32_t, svuint32_t, -+ svst1_scatter_u32base_offset_s32 (p0, z1, x0, z0), -+ svst1_scatter_offset (p0, z1, x0, z0)) -+ -+/* -+** st1_scatter_m4_s32_offset: -+** mov (x[0-9]+), #?-4 -+** st1w z0\.s, p0, \[\1, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_m4_s32_offset, svint32_t, svuint32_t, -+ svst1_scatter_u32base_offset_s32 (p0, z1, -4, z0), -+ svst1_scatter_offset (p0, z1, -4, z0)) -+ -+/* -+** st1_scatter_0_s32_offset: -+** st1w z0\.s, p0, \[z1\.s\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_0_s32_offset, svint32_t, svuint32_t, -+ svst1_scatter_u32base_offset_s32 (p0, z1, 0, z0), -+ svst1_scatter_offset (p0, z1, 0, z0)) -+ -+/* -+** st1_scatter_5_s32_offset: -+** mov (x[0-9]+), #?5 -+** st1w z0\.s, p0, \[\1, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_5_s32_offset, svint32_t, svuint32_t, -+ svst1_scatter_u32base_offset_s32 (p0, z1, 5, z0), -+ svst1_scatter_offset (p0, z1, 5, z0)) -+ -+/* -+** st1_scatter_6_s32_offset: -+** mov (x[0-9]+), #?6 -+** st1w z0\.s, p0, \[\1, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_6_s32_offset, svint32_t, svuint32_t, -+ svst1_scatter_u32base_offset_s32 (p0, z1, 6, z0), -+ svst1_scatter_offset (p0, z1, 6, z0)) -+ -+/* -+** st1_scatter_7_s32_offset: -+** mov (x[0-9]+), #?7 -+** st1w z0\.s, p0, \[\1, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_7_s32_offset, svint32_t, svuint32_t, -+ svst1_scatter_u32base_offset_s32 (p0, z1, 7, z0), -+ svst1_scatter_offset (p0, z1, 7, z0)) -+ -+/* -+** st1_scatter_8_s32_offset: -+** st1w z0\.s, p0, \[z1\.s, #8\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_8_s32_offset, svint32_t, svuint32_t, -+ svst1_scatter_u32base_offset_s32 (p0, z1, 8, z0), -+ svst1_scatter_offset (p0, z1, 8, z0)) -+ -+/* -+** st1_scatter_124_s32_offset: -+** st1w z0\.s, p0, \[z1\.s, #124\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_124_s32_offset, svint32_t, svuint32_t, -+ svst1_scatter_u32base_offset_s32 (p0, z1, 124, z0), -+ svst1_scatter_offset (p0, z1, 124, z0)) -+ -+/* -+** st1_scatter_128_s32_offset: -+** mov (x[0-9]+), #?128 -+** st1w z0\.s, p0, \[\1, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_128_s32_offset, svint32_t, svuint32_t, -+ svst1_scatter_u32base_offset_s32 (p0, z1, 128, z0), -+ svst1_scatter_offset (p0, z1, 128, z0)) -+ -+/* -+** st1_scatter_x0_s32_index: -+** lsl (x[0-9]+), x0, #?2 -+** st1w z0\.s, p0, \[\1, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_x0_s32_index, svint32_t, svuint32_t, -+ svst1_scatter_u32base_index_s32 (p0, z1, x0, z0), -+ svst1_scatter_index (p0, z1, x0, z0)) -+ -+/* -+** st1_scatter_m1_s32_index: -+** mov (x[0-9]+), #?-4 -+** st1w z0\.s, p0, \[\1, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_m1_s32_index, svint32_t, svuint32_t, -+ svst1_scatter_u32base_index_s32 (p0, z1, -1, z0), -+ svst1_scatter_index (p0, z1, -1, z0)) -+ -+/* -+** st1_scatter_0_s32_index: -+** st1w z0\.s, p0, \[z1\.s\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_0_s32_index, svint32_t, svuint32_t, -+ svst1_scatter_u32base_index_s32 (p0, z1, 0, z0), -+ svst1_scatter_index (p0, z1, 0, z0)) -+ -+/* -+** st1_scatter_5_s32_index: -+** st1w z0\.s, p0, \[z1\.s, #20\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_5_s32_index, svint32_t, svuint32_t, -+ svst1_scatter_u32base_index_s32 (p0, z1, 5, z0), -+ svst1_scatter_index (p0, z1, 5, z0)) -+ -+/* -+** st1_scatter_31_s32_index: -+** st1w z0\.s, p0, \[z1\.s, #124\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_31_s32_index, svint32_t, svuint32_t, -+ svst1_scatter_u32base_index_s32 (p0, z1, 31, z0), -+ svst1_scatter_index (p0, z1, 31, z0)) -+ -+/* -+** st1_scatter_32_s32_index: -+** mov (x[0-9]+), #?128 -+** st1w z0\.s, p0, \[\1, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_32_s32_index, svint32_t, svuint32_t, -+ svst1_scatter_u32base_index_s32 (p0, z1, 32, z0), -+ svst1_scatter_index (p0, z1, 32, z0)) -+ -+/* -+** st1_scatter_x0_s32_s32offset: -+** st1w z0\.s, p0, \[x0, z1\.s, sxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_x0_s32_s32offset, svint32_t, int32_t, svint32_t, -+ svst1_scatter_s32offset_s32 (p0, x0, z1, z0), -+ svst1_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1_scatter_s32_s32offset: -+** st1w z0\.s, p0, \[x0, z1\.s, sxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_s32_s32offset, svint32_t, int32_t, svint32_t, -+ svst1_scatter_s32offset_s32 (p0, x0, z1, z0), -+ svst1_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1_scatter_x0_s32_u32offset: -+** st1w z0\.s, p0, \[x0, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_x0_s32_u32offset, svint32_t, int32_t, svuint32_t, -+ svst1_scatter_u32offset_s32 (p0, x0, z1, z0), -+ svst1_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1_scatter_s32_u32offset: -+** st1w z0\.s, p0, \[x0, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_s32_u32offset, svint32_t, int32_t, svuint32_t, -+ svst1_scatter_u32offset_s32 (p0, x0, z1, z0), -+ svst1_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1_scatter_x0_s32_s32index: -+** st1w z0\.s, p0, \[x0, z1\.s, sxtw 2\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_x0_s32_s32index, svint32_t, int32_t, svint32_t, -+ svst1_scatter_s32index_s32 (p0, x0, z1, z0), -+ svst1_scatter_index (p0, x0, z1, z0)) -+ -+/* -+** st1_scatter_s32_s32index: -+** st1w z0\.s, p0, \[x0, z1\.s, sxtw 2\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_s32_s32index, svint32_t, int32_t, svint32_t, -+ svst1_scatter_s32index_s32 (p0, x0, z1, z0), -+ svst1_scatter_index (p0, x0, z1, z0)) -+ -+/* -+** st1_scatter_x0_s32_u32index: -+** st1w z0\.s, p0, \[x0, z1\.s, uxtw 2\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_x0_s32_u32index, svint32_t, int32_t, svuint32_t, -+ svst1_scatter_u32index_s32 (p0, x0, z1, z0), -+ svst1_scatter_index (p0, x0, z1, z0)) -+ -+/* -+** st1_scatter_s32_u32index: -+** st1w z0\.s, p0, \[x0, z1\.s, uxtw 2\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_s32_u32index, svint32_t, int32_t, svuint32_t, -+ svst1_scatter_u32index_s32 (p0, x0, z1, z0), -+ svst1_scatter_index (p0, x0, z1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_s64.c -new file mode 100644 -index 000000000..5c4ebf440 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_s64.c -@@ -0,0 +1,303 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st1_scatter_s64: -+** st1d z0\.d, p0, \[z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_s64, svint64_t, svuint64_t, -+ svst1_scatter_u64base_s64 (p0, z1, z0), -+ svst1_scatter (p0, z1, z0)) -+ -+/* -+** st1_scatter_x0_s64_offset: -+** st1d z0\.d, p0, \[x0, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_x0_s64_offset, svint64_t, svuint64_t, -+ svst1_scatter_u64base_offset_s64 (p0, z1, x0, z0), -+ svst1_scatter_offset (p0, z1, x0, z0)) -+ -+/* -+** st1_scatter_m8_s64_offset: -+** mov (x[0-9]+), #?-8 -+** st1d z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_m8_s64_offset, svint64_t, svuint64_t, -+ svst1_scatter_u64base_offset_s64 (p0, z1, -8, z0), -+ svst1_scatter_offset (p0, z1, -8, z0)) -+ -+/* -+** st1_scatter_0_s64_offset: -+** st1d z0\.d, p0, \[z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_0_s64_offset, svint64_t, svuint64_t, -+ svst1_scatter_u64base_offset_s64 (p0, z1, 0, z0), -+ svst1_scatter_offset (p0, z1, 0, z0)) -+ -+/* -+** st1_scatter_9_s64_offset: -+** mov (x[0-9]+), #?9 -+** st1d z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_9_s64_offset, svint64_t, svuint64_t, -+ svst1_scatter_u64base_offset_s64 (p0, z1, 9, z0), -+ svst1_scatter_offset (p0, z1, 9, z0)) -+ -+/* -+** st1_scatter_10_s64_offset: -+** mov (x[0-9]+), #?10 -+** st1d z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_10_s64_offset, svint64_t, svuint64_t, -+ svst1_scatter_u64base_offset_s64 (p0, z1, 10, z0), -+ svst1_scatter_offset (p0, z1, 10, z0)) -+ -+/* -+** st1_scatter_11_s64_offset: -+** mov (x[0-9]+), #?11 -+** st1d z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_11_s64_offset, svint64_t, svuint64_t, -+ svst1_scatter_u64base_offset_s64 (p0, z1, 11, z0), -+ svst1_scatter_offset (p0, z1, 11, z0)) -+ -+/* -+** st1_scatter_12_s64_offset: -+** mov (x[0-9]+), #?12 -+** st1d z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_12_s64_offset, svint64_t, svuint64_t, -+ svst1_scatter_u64base_offset_s64 (p0, z1, 12, z0), -+ svst1_scatter_offset (p0, z1, 12, z0)) -+ -+/* -+** st1_scatter_13_s64_offset: -+** mov (x[0-9]+), #?13 -+** st1d z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_13_s64_offset, svint64_t, svuint64_t, -+ svst1_scatter_u64base_offset_s64 (p0, z1, 13, z0), -+ svst1_scatter_offset (p0, z1, 13, z0)) -+ -+/* -+** st1_scatter_14_s64_offset: -+** mov (x[0-9]+), #?14 -+** st1d z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_14_s64_offset, svint64_t, svuint64_t, -+ svst1_scatter_u64base_offset_s64 (p0, z1, 14, z0), -+ svst1_scatter_offset (p0, z1, 14, z0)) -+ -+/* -+** st1_scatter_15_s64_offset: -+** mov (x[0-9]+), #?15 -+** st1d z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_15_s64_offset, svint64_t, svuint64_t, -+ svst1_scatter_u64base_offset_s64 (p0, z1, 15, z0), -+ svst1_scatter_offset (p0, z1, 15, z0)) -+ -+/* -+** st1_scatter_16_s64_offset: -+** st1d z0\.d, p0, \[z1\.d, #16\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_16_s64_offset, svint64_t, svuint64_t, -+ svst1_scatter_u64base_offset_s64 (p0, z1, 16, z0), -+ svst1_scatter_offset (p0, z1, 16, z0)) -+ -+/* -+** st1_scatter_248_s64_offset: -+** st1d z0\.d, p0, \[z1\.d, #248\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_248_s64_offset, svint64_t, svuint64_t, -+ svst1_scatter_u64base_offset_s64 (p0, z1, 248, z0), -+ svst1_scatter_offset (p0, z1, 248, z0)) -+ -+/* -+** st1_scatter_256_s64_offset: -+** mov (x[0-9]+), #?256 -+** st1d z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_256_s64_offset, svint64_t, svuint64_t, -+ svst1_scatter_u64base_offset_s64 (p0, z1, 256, z0), -+ svst1_scatter_offset (p0, z1, 256, z0)) -+ -+/* -+** st1_scatter_x0_s64_index: -+** lsl (x[0-9]+), x0, #?3 -+** st1d z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_x0_s64_index, svint64_t, svuint64_t, -+ svst1_scatter_u64base_index_s64 (p0, z1, x0, z0), -+ svst1_scatter_index (p0, z1, x0, z0)) -+ -+/* -+** st1_scatter_m1_s64_index: -+** mov (x[0-9]+), #?-8 -+** st1d z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_m1_s64_index, svint64_t, svuint64_t, -+ svst1_scatter_u64base_index_s64 (p0, z1, -1, z0), -+ svst1_scatter_index (p0, z1, -1, z0)) -+ -+/* -+** st1_scatter_0_s64_index: -+** st1d z0\.d, p0, \[z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_0_s64_index, svint64_t, svuint64_t, -+ svst1_scatter_u64base_index_s64 (p0, z1, 0, z0), -+ svst1_scatter_index (p0, z1, 0, z0)) -+ -+/* -+** st1_scatter_5_s64_index: -+** st1d z0\.d, p0, \[z1\.d, #40\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_5_s64_index, svint64_t, svuint64_t, -+ svst1_scatter_u64base_index_s64 (p0, z1, 5, z0), -+ svst1_scatter_index (p0, z1, 5, z0)) -+ -+/* -+** st1_scatter_31_s64_index: -+** st1d z0\.d, p0, \[z1\.d, #248\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_31_s64_index, svint64_t, svuint64_t, -+ svst1_scatter_u64base_index_s64 (p0, z1, 31, z0), -+ svst1_scatter_index (p0, z1, 31, z0)) -+ -+/* -+** st1_scatter_32_s64_index: -+** mov (x[0-9]+), #?256 -+** st1d z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_32_s64_index, svint64_t, svuint64_t, -+ svst1_scatter_u64base_index_s64 (p0, z1, 32, z0), -+ svst1_scatter_index (p0, z1, 32, z0)) -+ -+/* -+** st1_scatter_x0_s64_s64offset: -+** st1d z0\.d, p0, \[x0, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_x0_s64_s64offset, svint64_t, int64_t, svint64_t, -+ svst1_scatter_s64offset_s64 (p0, x0, z1, z0), -+ svst1_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1_scatter_s64_s64offset: -+** st1d z0\.d, p0, \[x0, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_s64_s64offset, svint64_t, int64_t, svint64_t, -+ svst1_scatter_s64offset_s64 (p0, x0, z1, z0), -+ svst1_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1_scatter_ext_s64_s64offset: -+** st1d z0\.d, p0, \[x0, z1\.d, sxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_ext_s64_s64offset, svint64_t, int64_t, svint64_t, -+ svst1_scatter_s64offset_s64 (p0, x0, svextw_s64_x (p0, z1), z0), -+ svst1_scatter_offset (p0, x0, svextw_x (p0, z1), z0)) -+ -+/* -+** st1_scatter_x0_s64_u64offset: -+** st1d z0\.d, p0, \[x0, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_x0_s64_u64offset, svint64_t, int64_t, svuint64_t, -+ svst1_scatter_u64offset_s64 (p0, x0, z1, z0), -+ svst1_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1_scatter_s64_u64offset: -+** st1d z0\.d, p0, \[x0, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_s64_u64offset, svint64_t, int64_t, svuint64_t, -+ svst1_scatter_u64offset_s64 (p0, x0, z1, z0), -+ svst1_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1_scatter_ext_s64_u64offset: -+** st1d z0\.d, p0, \[x0, z1\.d, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_ext_s64_u64offset, svint64_t, int64_t, svuint64_t, -+ svst1_scatter_u64offset_s64 (p0, x0, svextw_u64_x (p0, z1), z0), -+ svst1_scatter_offset (p0, x0, svextw_x (p0, z1), z0)) -+ -+/* -+** st1_scatter_x0_s64_s64index: -+** st1d z0\.d, p0, \[x0, z1\.d, lsl 3\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_x0_s64_s64index, svint64_t, int64_t, svint64_t, -+ svst1_scatter_s64index_s64 (p0, x0, z1, z0), -+ svst1_scatter_index (p0, x0, z1, z0)) -+ -+/* -+** st1_scatter_s64_s64index: -+** st1d z0\.d, p0, \[x0, z1\.d, lsl 3\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_s64_s64index, svint64_t, int64_t, svint64_t, -+ svst1_scatter_s64index_s64 (p0, x0, z1, z0), -+ svst1_scatter_index (p0, x0, z1, z0)) -+ -+/* -+** st1_scatter_ext_s64_s64index: -+** st1d z0\.d, p0, \[x0, z1\.d, sxtw 3\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_ext_s64_s64index, svint64_t, int64_t, svint64_t, -+ svst1_scatter_s64index_s64 (p0, x0, svextw_s64_x (p0, z1), z0), -+ svst1_scatter_index (p0, x0, svextw_x (p0, z1), z0)) -+ -+/* -+** st1_scatter_x0_s64_u64index: -+** st1d z0\.d, p0, \[x0, z1\.d, lsl 3\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_x0_s64_u64index, svint64_t, int64_t, svuint64_t, -+ svst1_scatter_u64index_s64 (p0, x0, z1, z0), -+ svst1_scatter_index (p0, x0, z1, z0)) -+ -+/* -+** st1_scatter_s64_u64index: -+** st1d z0\.d, p0, \[x0, z1\.d, lsl 3\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_s64_u64index, svint64_t, int64_t, svuint64_t, -+ svst1_scatter_u64index_s64 (p0, x0, z1, z0), -+ svst1_scatter_index (p0, x0, z1, z0)) -+ -+/* -+** st1_scatter_ext_s64_u64index: -+** st1d z0\.d, p0, \[x0, z1\.d, uxtw 3\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_ext_s64_u64index, svint64_t, int64_t, svuint64_t, -+ svst1_scatter_u64index_s64 (p0, x0, svextw_u64_x (p0, z1), z0), -+ svst1_scatter_index (p0, x0, svextw_x (p0, z1), z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_u32.c -new file mode 100644 -index 000000000..fe3f7259f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_u32.c -@@ -0,0 +1,227 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st1_scatter_u32: -+** st1w z0\.s, p0, \[z1\.s\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_u32, svuint32_t, svuint32_t, -+ svst1_scatter_u32base_u32 (p0, z1, z0), -+ svst1_scatter (p0, z1, z0)) -+ -+/* -+** st1_scatter_x0_u32_offset: -+** st1w z0\.s, p0, \[x0, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_x0_u32_offset, svuint32_t, svuint32_t, -+ svst1_scatter_u32base_offset_u32 (p0, z1, x0, z0), -+ svst1_scatter_offset (p0, z1, x0, z0)) -+ -+/* -+** st1_scatter_m4_u32_offset: -+** mov (x[0-9]+), #?-4 -+** st1w z0\.s, p0, \[\1, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_m4_u32_offset, svuint32_t, svuint32_t, -+ svst1_scatter_u32base_offset_u32 (p0, z1, -4, z0), -+ svst1_scatter_offset (p0, z1, -4, z0)) -+ -+/* -+** st1_scatter_0_u32_offset: -+** st1w z0\.s, p0, \[z1\.s\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_0_u32_offset, svuint32_t, svuint32_t, -+ svst1_scatter_u32base_offset_u32 (p0, z1, 0, z0), -+ svst1_scatter_offset (p0, z1, 0, z0)) -+ -+/* -+** st1_scatter_5_u32_offset: -+** mov (x[0-9]+), #?5 -+** st1w z0\.s, p0, \[\1, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_5_u32_offset, svuint32_t, svuint32_t, -+ svst1_scatter_u32base_offset_u32 (p0, z1, 5, z0), -+ svst1_scatter_offset (p0, z1, 5, z0)) -+ -+/* -+** st1_scatter_6_u32_offset: -+** mov (x[0-9]+), #?6 -+** st1w z0\.s, p0, \[\1, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_6_u32_offset, svuint32_t, svuint32_t, -+ svst1_scatter_u32base_offset_u32 (p0, z1, 6, z0), -+ svst1_scatter_offset (p0, z1, 6, z0)) -+ -+/* -+** st1_scatter_7_u32_offset: -+** mov (x[0-9]+), #?7 -+** st1w z0\.s, p0, \[\1, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_7_u32_offset, svuint32_t, svuint32_t, -+ svst1_scatter_u32base_offset_u32 (p0, z1, 7, z0), -+ svst1_scatter_offset (p0, z1, 7, z0)) -+ -+/* -+** st1_scatter_8_u32_offset: -+** st1w z0\.s, p0, \[z1\.s, #8\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_8_u32_offset, svuint32_t, svuint32_t, -+ svst1_scatter_u32base_offset_u32 (p0, z1, 8, z0), -+ svst1_scatter_offset (p0, z1, 8, z0)) -+ -+/* -+** st1_scatter_124_u32_offset: -+** st1w z0\.s, p0, \[z1\.s, #124\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_124_u32_offset, svuint32_t, svuint32_t, -+ svst1_scatter_u32base_offset_u32 (p0, z1, 124, z0), -+ svst1_scatter_offset (p0, z1, 124, z0)) -+ -+/* -+** st1_scatter_128_u32_offset: -+** mov (x[0-9]+), #?128 -+** st1w z0\.s, p0, \[\1, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_128_u32_offset, svuint32_t, svuint32_t, -+ svst1_scatter_u32base_offset_u32 (p0, z1, 128, z0), -+ svst1_scatter_offset (p0, z1, 128, z0)) -+ -+/* -+** st1_scatter_x0_u32_index: -+** lsl (x[0-9]+), x0, #?2 -+** st1w z0\.s, p0, \[\1, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_x0_u32_index, svuint32_t, svuint32_t, -+ svst1_scatter_u32base_index_u32 (p0, z1, x0, z0), -+ svst1_scatter_index (p0, z1, x0, z0)) -+ -+/* -+** st1_scatter_m1_u32_index: -+** mov (x[0-9]+), #?-4 -+** st1w z0\.s, p0, \[\1, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_m1_u32_index, svuint32_t, svuint32_t, -+ svst1_scatter_u32base_index_u32 (p0, z1, -1, z0), -+ svst1_scatter_index (p0, z1, -1, z0)) -+ -+/* -+** st1_scatter_0_u32_index: -+** st1w z0\.s, p0, \[z1\.s\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_0_u32_index, svuint32_t, svuint32_t, -+ svst1_scatter_u32base_index_u32 (p0, z1, 0, z0), -+ svst1_scatter_index (p0, z1, 0, z0)) -+ -+/* -+** st1_scatter_5_u32_index: -+** st1w z0\.s, p0, \[z1\.s, #20\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_5_u32_index, svuint32_t, svuint32_t, -+ svst1_scatter_u32base_index_u32 (p0, z1, 5, z0), -+ svst1_scatter_index (p0, z1, 5, z0)) -+ -+/* -+** st1_scatter_31_u32_index: -+** st1w z0\.s, p0, \[z1\.s, #124\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_31_u32_index, svuint32_t, svuint32_t, -+ svst1_scatter_u32base_index_u32 (p0, z1, 31, z0), -+ svst1_scatter_index (p0, z1, 31, z0)) -+ -+/* -+** st1_scatter_32_u32_index: -+** mov (x[0-9]+), #?128 -+** st1w z0\.s, p0, \[\1, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_32_u32_index, svuint32_t, svuint32_t, -+ svst1_scatter_u32base_index_u32 (p0, z1, 32, z0), -+ svst1_scatter_index (p0, z1, 32, z0)) -+ -+/* -+** st1_scatter_x0_u32_s32offset: -+** st1w z0\.s, p0, \[x0, z1\.s, sxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_x0_u32_s32offset, svuint32_t, uint32_t, svint32_t, -+ svst1_scatter_s32offset_u32 (p0, x0, z1, z0), -+ svst1_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1_scatter_u32_s32offset: -+** st1w z0\.s, p0, \[x0, z1\.s, sxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_u32_s32offset, svuint32_t, uint32_t, svint32_t, -+ svst1_scatter_s32offset_u32 (p0, x0, z1, z0), -+ svst1_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1_scatter_x0_u32_u32offset: -+** st1w z0\.s, p0, \[x0, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_x0_u32_u32offset, svuint32_t, uint32_t, svuint32_t, -+ svst1_scatter_u32offset_u32 (p0, x0, z1, z0), -+ svst1_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1_scatter_u32_u32offset: -+** st1w z0\.s, p0, \[x0, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_u32_u32offset, svuint32_t, uint32_t, svuint32_t, -+ svst1_scatter_u32offset_u32 (p0, x0, z1, z0), -+ svst1_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1_scatter_x0_u32_s32index: -+** st1w z0\.s, p0, \[x0, z1\.s, sxtw 2\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_x0_u32_s32index, svuint32_t, uint32_t, svint32_t, -+ svst1_scatter_s32index_u32 (p0, x0, z1, z0), -+ svst1_scatter_index (p0, x0, z1, z0)) -+ -+/* -+** st1_scatter_u32_s32index: -+** st1w z0\.s, p0, \[x0, z1\.s, sxtw 2\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_u32_s32index, svuint32_t, uint32_t, svint32_t, -+ svst1_scatter_s32index_u32 (p0, x0, z1, z0), -+ svst1_scatter_index (p0, x0, z1, z0)) -+ -+/* -+** st1_scatter_x0_u32_u32index: -+** st1w z0\.s, p0, \[x0, z1\.s, uxtw 2\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_x0_u32_u32index, svuint32_t, uint32_t, svuint32_t, -+ svst1_scatter_u32index_u32 (p0, x0, z1, z0), -+ svst1_scatter_index (p0, x0, z1, z0)) -+ -+/* -+** st1_scatter_u32_u32index: -+** st1w z0\.s, p0, \[x0, z1\.s, uxtw 2\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_u32_u32index, svuint32_t, uint32_t, svuint32_t, -+ svst1_scatter_u32index_u32 (p0, x0, z1, z0), -+ svst1_scatter_index (p0, x0, z1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_u64.c -new file mode 100644 -index 000000000..232123566 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_scatter_u64.c -@@ -0,0 +1,303 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st1_scatter_u64: -+** st1d z0\.d, p0, \[z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_u64, svuint64_t, svuint64_t, -+ svst1_scatter_u64base_u64 (p0, z1, z0), -+ svst1_scatter (p0, z1, z0)) -+ -+/* -+** st1_scatter_x0_u64_offset: -+** st1d z0\.d, p0, \[x0, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_x0_u64_offset, svuint64_t, svuint64_t, -+ svst1_scatter_u64base_offset_u64 (p0, z1, x0, z0), -+ svst1_scatter_offset (p0, z1, x0, z0)) -+ -+/* -+** st1_scatter_m8_u64_offset: -+** mov (x[0-9]+), #?-8 -+** st1d z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_m8_u64_offset, svuint64_t, svuint64_t, -+ svst1_scatter_u64base_offset_u64 (p0, z1, -8, z0), -+ svst1_scatter_offset (p0, z1, -8, z0)) -+ -+/* -+** st1_scatter_0_u64_offset: -+** st1d z0\.d, p0, \[z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_0_u64_offset, svuint64_t, svuint64_t, -+ svst1_scatter_u64base_offset_u64 (p0, z1, 0, z0), -+ svst1_scatter_offset (p0, z1, 0, z0)) -+ -+/* -+** st1_scatter_9_u64_offset: -+** mov (x[0-9]+), #?9 -+** st1d z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_9_u64_offset, svuint64_t, svuint64_t, -+ svst1_scatter_u64base_offset_u64 (p0, z1, 9, z0), -+ svst1_scatter_offset (p0, z1, 9, z0)) -+ -+/* -+** st1_scatter_10_u64_offset: -+** mov (x[0-9]+), #?10 -+** st1d z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_10_u64_offset, svuint64_t, svuint64_t, -+ svst1_scatter_u64base_offset_u64 (p0, z1, 10, z0), -+ svst1_scatter_offset (p0, z1, 10, z0)) -+ -+/* -+** st1_scatter_11_u64_offset: -+** mov (x[0-9]+), #?11 -+** st1d z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_11_u64_offset, svuint64_t, svuint64_t, -+ svst1_scatter_u64base_offset_u64 (p0, z1, 11, z0), -+ svst1_scatter_offset (p0, z1, 11, z0)) -+ -+/* -+** st1_scatter_12_u64_offset: -+** mov (x[0-9]+), #?12 -+** st1d z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_12_u64_offset, svuint64_t, svuint64_t, -+ svst1_scatter_u64base_offset_u64 (p0, z1, 12, z0), -+ svst1_scatter_offset (p0, z1, 12, z0)) -+ -+/* -+** st1_scatter_13_u64_offset: -+** mov (x[0-9]+), #?13 -+** st1d z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_13_u64_offset, svuint64_t, svuint64_t, -+ svst1_scatter_u64base_offset_u64 (p0, z1, 13, z0), -+ svst1_scatter_offset (p0, z1, 13, z0)) -+ -+/* -+** st1_scatter_14_u64_offset: -+** mov (x[0-9]+), #?14 -+** st1d z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_14_u64_offset, svuint64_t, svuint64_t, -+ svst1_scatter_u64base_offset_u64 (p0, z1, 14, z0), -+ svst1_scatter_offset (p0, z1, 14, z0)) -+ -+/* -+** st1_scatter_15_u64_offset: -+** mov (x[0-9]+), #?15 -+** st1d z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_15_u64_offset, svuint64_t, svuint64_t, -+ svst1_scatter_u64base_offset_u64 (p0, z1, 15, z0), -+ svst1_scatter_offset (p0, z1, 15, z0)) -+ -+/* -+** st1_scatter_16_u64_offset: -+** st1d z0\.d, p0, \[z1\.d, #16\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_16_u64_offset, svuint64_t, svuint64_t, -+ svst1_scatter_u64base_offset_u64 (p0, z1, 16, z0), -+ svst1_scatter_offset (p0, z1, 16, z0)) -+ -+/* -+** st1_scatter_248_u64_offset: -+** st1d z0\.d, p0, \[z1\.d, #248\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_248_u64_offset, svuint64_t, svuint64_t, -+ svst1_scatter_u64base_offset_u64 (p0, z1, 248, z0), -+ svst1_scatter_offset (p0, z1, 248, z0)) -+ -+/* -+** st1_scatter_256_u64_offset: -+** mov (x[0-9]+), #?256 -+** st1d z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_256_u64_offset, svuint64_t, svuint64_t, -+ svst1_scatter_u64base_offset_u64 (p0, z1, 256, z0), -+ svst1_scatter_offset (p0, z1, 256, z0)) -+ -+/* -+** st1_scatter_x0_u64_index: -+** lsl (x[0-9]+), x0, #?3 -+** st1d z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_x0_u64_index, svuint64_t, svuint64_t, -+ svst1_scatter_u64base_index_u64 (p0, z1, x0, z0), -+ svst1_scatter_index (p0, z1, x0, z0)) -+ -+/* -+** st1_scatter_m1_u64_index: -+** mov (x[0-9]+), #?-8 -+** st1d z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_m1_u64_index, svuint64_t, svuint64_t, -+ svst1_scatter_u64base_index_u64 (p0, z1, -1, z0), -+ svst1_scatter_index (p0, z1, -1, z0)) -+ -+/* -+** st1_scatter_0_u64_index: -+** st1d z0\.d, p0, \[z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_0_u64_index, svuint64_t, svuint64_t, -+ svst1_scatter_u64base_index_u64 (p0, z1, 0, z0), -+ svst1_scatter_index (p0, z1, 0, z0)) -+ -+/* -+** st1_scatter_5_u64_index: -+** st1d z0\.d, p0, \[z1\.d, #40\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_5_u64_index, svuint64_t, svuint64_t, -+ svst1_scatter_u64base_index_u64 (p0, z1, 5, z0), -+ svst1_scatter_index (p0, z1, 5, z0)) -+ -+/* -+** st1_scatter_31_u64_index: -+** st1d z0\.d, p0, \[z1\.d, #248\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_31_u64_index, svuint64_t, svuint64_t, -+ svst1_scatter_u64base_index_u64 (p0, z1, 31, z0), -+ svst1_scatter_index (p0, z1, 31, z0)) -+ -+/* -+** st1_scatter_32_u64_index: -+** mov (x[0-9]+), #?256 -+** st1d z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1_scatter_32_u64_index, svuint64_t, svuint64_t, -+ svst1_scatter_u64base_index_u64 (p0, z1, 32, z0), -+ svst1_scatter_index (p0, z1, 32, z0)) -+ -+/* -+** st1_scatter_x0_u64_s64offset: -+** st1d z0\.d, p0, \[x0, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_x0_u64_s64offset, svuint64_t, uint64_t, svint64_t, -+ svst1_scatter_s64offset_u64 (p0, x0, z1, z0), -+ svst1_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1_scatter_u64_s64offset: -+** st1d z0\.d, p0, \[x0, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_u64_s64offset, svuint64_t, uint64_t, svint64_t, -+ svst1_scatter_s64offset_u64 (p0, x0, z1, z0), -+ svst1_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1_scatter_ext_u64_s64offset: -+** st1d z0\.d, p0, \[x0, z1\.d, sxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_ext_u64_s64offset, svuint64_t, uint64_t, svint64_t, -+ svst1_scatter_s64offset_u64 (p0, x0, svextw_s64_x (p0, z1), z0), -+ svst1_scatter_offset (p0, x0, svextw_x (p0, z1), z0)) -+ -+/* -+** st1_scatter_x0_u64_u64offset: -+** st1d z0\.d, p0, \[x0, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_x0_u64_u64offset, svuint64_t, uint64_t, svuint64_t, -+ svst1_scatter_u64offset_u64 (p0, x0, z1, z0), -+ svst1_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1_scatter_u64_u64offset: -+** st1d z0\.d, p0, \[x0, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_u64_u64offset, svuint64_t, uint64_t, svuint64_t, -+ svst1_scatter_u64offset_u64 (p0, x0, z1, z0), -+ svst1_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1_scatter_ext_u64_u64offset: -+** st1d z0\.d, p0, \[x0, z1\.d, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_ext_u64_u64offset, svuint64_t, uint64_t, svuint64_t, -+ svst1_scatter_u64offset_u64 (p0, x0, svextw_u64_x (p0, z1), z0), -+ svst1_scatter_offset (p0, x0, svextw_x (p0, z1), z0)) -+ -+/* -+** st1_scatter_x0_u64_s64index: -+** st1d z0\.d, p0, \[x0, z1\.d, lsl 3\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_x0_u64_s64index, svuint64_t, uint64_t, svint64_t, -+ svst1_scatter_s64index_u64 (p0, x0, z1, z0), -+ svst1_scatter_index (p0, x0, z1, z0)) -+ -+/* -+** st1_scatter_u64_s64index: -+** st1d z0\.d, p0, \[x0, z1\.d, lsl 3\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_u64_s64index, svuint64_t, uint64_t, svint64_t, -+ svst1_scatter_s64index_u64 (p0, x0, z1, z0), -+ svst1_scatter_index (p0, x0, z1, z0)) -+ -+/* -+** st1_scatter_ext_u64_s64index: -+** st1d z0\.d, p0, \[x0, z1\.d, sxtw 3\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_ext_u64_s64index, svuint64_t, uint64_t, svint64_t, -+ svst1_scatter_s64index_u64 (p0, x0, svextw_s64_x (p0, z1), z0), -+ svst1_scatter_index (p0, x0, svextw_x (p0, z1), z0)) -+ -+/* -+** st1_scatter_x0_u64_u64index: -+** st1d z0\.d, p0, \[x0, z1\.d, lsl 3\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_x0_u64_u64index, svuint64_t, uint64_t, svuint64_t, -+ svst1_scatter_u64index_u64 (p0, x0, z1, z0), -+ svst1_scatter_index (p0, x0, z1, z0)) -+ -+/* -+** st1_scatter_u64_u64index: -+** st1d z0\.d, p0, \[x0, z1\.d, lsl 3\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_u64_u64index, svuint64_t, uint64_t, svuint64_t, -+ svst1_scatter_u64index_u64 (p0, x0, z1, z0), -+ svst1_scatter_index (p0, x0, z1, z0)) -+ -+/* -+** st1_scatter_ext_u64_u64index: -+** st1d z0\.d, p0, \[x0, z1\.d, uxtw 3\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1_scatter_ext_u64_u64index, svuint64_t, uint64_t, svuint64_t, -+ svst1_scatter_u64index_u64 (p0, x0, svextw_u64_x (p0, z1), z0), -+ svst1_scatter_index (p0, x0, svextw_x (p0, z1), z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_u16.c -new file mode 100644 -index 000000000..e9dc05219 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_u16.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st1_u16_base: -+** st1h z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_u16_base, svuint16_t, uint16_t, -+ svst1_u16 (p0, x0, z0), -+ svst1 (p0, x0, z0)) -+ -+/* -+** st1_u16_index: -+** st1h z0\.h, p0, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_STORE (st1_u16_index, svuint16_t, uint16_t, -+ svst1_u16 (p0, x0 + x1, z0), -+ svst1 (p0, x0 + x1, z0)) -+ -+/* -+** st1_u16_1: -+** st1h z0\.h, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_u16_1, svuint16_t, uint16_t, -+ svst1_u16 (p0, x0 + svcnth (), z0), -+ svst1 (p0, x0 + svcnth (), z0)) -+ -+/* -+** st1_u16_7: -+** st1h z0\.h, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_u16_7, svuint16_t, uint16_t, -+ svst1_u16 (p0, x0 + svcnth () * 7, z0), -+ svst1 (p0, x0 + svcnth () * 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1_u16_8: -+** incb x0, all, mul #8 -+** st1h z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_u16_8, svuint16_t, uint16_t, -+ svst1_u16 (p0, x0 + svcnth () * 8, z0), -+ svst1 (p0, x0 + svcnth () * 8, z0)) -+ -+/* -+** st1_u16_m1: -+** st1h z0\.h, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_u16_m1, svuint16_t, uint16_t, -+ svst1_u16 (p0, x0 - svcnth (), z0), -+ svst1 (p0, x0 - svcnth (), z0)) -+ -+/* -+** st1_u16_m8: -+** st1h z0\.h, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_u16_m8, svuint16_t, uint16_t, -+ svst1_u16 (p0, x0 - svcnth () * 8, z0), -+ svst1 (p0, x0 - svcnth () * 8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1_u16_m9: -+** decb x0, all, mul #9 -+** st1h z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_u16_m9, svuint16_t, uint16_t, -+ svst1_u16 (p0, x0 - svcnth () * 9, z0), -+ svst1 (p0, x0 - svcnth () * 9, z0)) -+ -+/* -+** st1_vnum_u16_0: -+** st1h z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_vnum_u16_0, svuint16_t, uint16_t, -+ svst1_vnum_u16 (p0, x0, 0, z0), -+ svst1_vnum (p0, x0, 0, z0)) -+ -+/* -+** st1_vnum_u16_1: -+** st1h z0\.h, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_vnum_u16_1, svuint16_t, uint16_t, -+ svst1_vnum_u16 (p0, x0, 1, z0), -+ svst1_vnum (p0, x0, 1, z0)) -+ -+/* -+** st1_vnum_u16_7: -+** st1h z0\.h, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_vnum_u16_7, svuint16_t, uint16_t, -+ svst1_vnum_u16 (p0, x0, 7, z0), -+ svst1_vnum (p0, x0, 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1_vnum_u16_8: -+** incb x0, all, mul #8 -+** st1h z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_vnum_u16_8, svuint16_t, uint16_t, -+ svst1_vnum_u16 (p0, x0, 8, z0), -+ svst1_vnum (p0, x0, 8, z0)) -+ -+/* -+** st1_vnum_u16_m1: -+** st1h z0\.h, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_vnum_u16_m1, svuint16_t, uint16_t, -+ svst1_vnum_u16 (p0, x0, -1, z0), -+ svst1_vnum (p0, x0, -1, z0)) -+ -+/* -+** st1_vnum_u16_m8: -+** st1h z0\.h, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_vnum_u16_m8, svuint16_t, uint16_t, -+ svst1_vnum_u16 (p0, x0, -8, z0), -+ svst1_vnum (p0, x0, -8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1_vnum_u16_m9: -+** decb x0, all, mul #9 -+** st1h z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_vnum_u16_m9, svuint16_t, uint16_t, -+ svst1_vnum_u16 (p0, x0, -9, z0), -+ svst1_vnum (p0, x0, -9, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** st1_vnum_u16_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** st1h z0\.h, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (st1_vnum_u16_x1, svuint16_t, uint16_t, -+ svst1_vnum_u16 (p0, x0, x1, z0), -+ svst1_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_u32.c -new file mode 100644 -index 000000000..8610ae4c8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_u32.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st1_u32_base: -+** st1w z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_u32_base, svuint32_t, uint32_t, -+ svst1_u32 (p0, x0, z0), -+ svst1 (p0, x0, z0)) -+ -+/* -+** st1_u32_index: -+** st1w z0\.s, p0, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_STORE (st1_u32_index, svuint32_t, uint32_t, -+ svst1_u32 (p0, x0 + x1, z0), -+ svst1 (p0, x0 + x1, z0)) -+ -+/* -+** st1_u32_1: -+** st1w z0\.s, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_u32_1, svuint32_t, uint32_t, -+ svst1_u32 (p0, x0 + svcntw (), z0), -+ svst1 (p0, x0 + svcntw (), z0)) -+ -+/* -+** st1_u32_7: -+** st1w z0\.s, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_u32_7, svuint32_t, uint32_t, -+ svst1_u32 (p0, x0 + svcntw () * 7, z0), -+ svst1 (p0, x0 + svcntw () * 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1_u32_8: -+** incb x0, all, mul #8 -+** st1w z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_u32_8, svuint32_t, uint32_t, -+ svst1_u32 (p0, x0 + svcntw () * 8, z0), -+ svst1 (p0, x0 + svcntw () * 8, z0)) -+ -+/* -+** st1_u32_m1: -+** st1w z0\.s, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_u32_m1, svuint32_t, uint32_t, -+ svst1_u32 (p0, x0 - svcntw (), z0), -+ svst1 (p0, x0 - svcntw (), z0)) -+ -+/* -+** st1_u32_m8: -+** st1w z0\.s, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_u32_m8, svuint32_t, uint32_t, -+ svst1_u32 (p0, x0 - svcntw () * 8, z0), -+ svst1 (p0, x0 - svcntw () * 8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1_u32_m9: -+** decb x0, all, mul #9 -+** st1w z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_u32_m9, svuint32_t, uint32_t, -+ svst1_u32 (p0, x0 - svcntw () * 9, z0), -+ svst1 (p0, x0 - svcntw () * 9, z0)) -+ -+/* -+** st1_vnum_u32_0: -+** st1w z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_vnum_u32_0, svuint32_t, uint32_t, -+ svst1_vnum_u32 (p0, x0, 0, z0), -+ svst1_vnum (p0, x0, 0, z0)) -+ -+/* -+** st1_vnum_u32_1: -+** st1w z0\.s, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_vnum_u32_1, svuint32_t, uint32_t, -+ svst1_vnum_u32 (p0, x0, 1, z0), -+ svst1_vnum (p0, x0, 1, z0)) -+ -+/* -+** st1_vnum_u32_7: -+** st1w z0\.s, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_vnum_u32_7, svuint32_t, uint32_t, -+ svst1_vnum_u32 (p0, x0, 7, z0), -+ svst1_vnum (p0, x0, 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1_vnum_u32_8: -+** incb x0, all, mul #8 -+** st1w z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_vnum_u32_8, svuint32_t, uint32_t, -+ svst1_vnum_u32 (p0, x0, 8, z0), -+ svst1_vnum (p0, x0, 8, z0)) -+ -+/* -+** st1_vnum_u32_m1: -+** st1w z0\.s, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_vnum_u32_m1, svuint32_t, uint32_t, -+ svst1_vnum_u32 (p0, x0, -1, z0), -+ svst1_vnum (p0, x0, -1, z0)) -+ -+/* -+** st1_vnum_u32_m8: -+** st1w z0\.s, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_vnum_u32_m8, svuint32_t, uint32_t, -+ svst1_vnum_u32 (p0, x0, -8, z0), -+ svst1_vnum (p0, x0, -8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1_vnum_u32_m9: -+** decb x0, all, mul #9 -+** st1w z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_vnum_u32_m9, svuint32_t, uint32_t, -+ svst1_vnum_u32 (p0, x0, -9, z0), -+ svst1_vnum (p0, x0, -9, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** st1_vnum_u32_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** st1w z0\.s, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (st1_vnum_u32_x1, svuint32_t, uint32_t, -+ svst1_vnum_u32 (p0, x0, x1, z0), -+ svst1_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_u64.c -new file mode 100644 -index 000000000..5d4fae932 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_u64.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st1_u64_base: -+** st1d z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_u64_base, svuint64_t, uint64_t, -+ svst1_u64 (p0, x0, z0), -+ svst1 (p0, x0, z0)) -+ -+/* -+** st1_u64_index: -+** st1d z0\.d, p0, \[x0, x1, lsl 3\] -+** ret -+*/ -+TEST_STORE (st1_u64_index, svuint64_t, uint64_t, -+ svst1_u64 (p0, x0 + x1, z0), -+ svst1 (p0, x0 + x1, z0)) -+ -+/* -+** st1_u64_1: -+** st1d z0\.d, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_u64_1, svuint64_t, uint64_t, -+ svst1_u64 (p0, x0 + svcntd (), z0), -+ svst1 (p0, x0 + svcntd (), z0)) -+ -+/* -+** st1_u64_7: -+** st1d z0\.d, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_u64_7, svuint64_t, uint64_t, -+ svst1_u64 (p0, x0 + svcntd () * 7, z0), -+ svst1 (p0, x0 + svcntd () * 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1_u64_8: -+** incb x0, all, mul #8 -+** st1d z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_u64_8, svuint64_t, uint64_t, -+ svst1_u64 (p0, x0 + svcntd () * 8, z0), -+ svst1 (p0, x0 + svcntd () * 8, z0)) -+ -+/* -+** st1_u64_m1: -+** st1d z0\.d, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_u64_m1, svuint64_t, uint64_t, -+ svst1_u64 (p0, x0 - svcntd (), z0), -+ svst1 (p0, x0 - svcntd (), z0)) -+ -+/* -+** st1_u64_m8: -+** st1d z0\.d, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_u64_m8, svuint64_t, uint64_t, -+ svst1_u64 (p0, x0 - svcntd () * 8, z0), -+ svst1 (p0, x0 - svcntd () * 8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1_u64_m9: -+** decb x0, all, mul #9 -+** st1d z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_u64_m9, svuint64_t, uint64_t, -+ svst1_u64 (p0, x0 - svcntd () * 9, z0), -+ svst1 (p0, x0 - svcntd () * 9, z0)) -+ -+/* -+** st1_vnum_u64_0: -+** st1d z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_vnum_u64_0, svuint64_t, uint64_t, -+ svst1_vnum_u64 (p0, x0, 0, z0), -+ svst1_vnum (p0, x0, 0, z0)) -+ -+/* -+** st1_vnum_u64_1: -+** st1d z0\.d, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_vnum_u64_1, svuint64_t, uint64_t, -+ svst1_vnum_u64 (p0, x0, 1, z0), -+ svst1_vnum (p0, x0, 1, z0)) -+ -+/* -+** st1_vnum_u64_7: -+** st1d z0\.d, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_vnum_u64_7, svuint64_t, uint64_t, -+ svst1_vnum_u64 (p0, x0, 7, z0), -+ svst1_vnum (p0, x0, 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1_vnum_u64_8: -+** incb x0, all, mul #8 -+** st1d z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_vnum_u64_8, svuint64_t, uint64_t, -+ svst1_vnum_u64 (p0, x0, 8, z0), -+ svst1_vnum (p0, x0, 8, z0)) -+ -+/* -+** st1_vnum_u64_m1: -+** st1d z0\.d, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_vnum_u64_m1, svuint64_t, uint64_t, -+ svst1_vnum_u64 (p0, x0, -1, z0), -+ svst1_vnum (p0, x0, -1, z0)) -+ -+/* -+** st1_vnum_u64_m8: -+** st1d z0\.d, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_vnum_u64_m8, svuint64_t, uint64_t, -+ svst1_vnum_u64 (p0, x0, -8, z0), -+ svst1_vnum (p0, x0, -8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1_vnum_u64_m9: -+** decb x0, all, mul #9 -+** st1d z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_vnum_u64_m9, svuint64_t, uint64_t, -+ svst1_vnum_u64 (p0, x0, -9, z0), -+ svst1_vnum (p0, x0, -9, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** st1_vnum_u64_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** st1d z0\.d, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (st1_vnum_u64_x1, svuint64_t, uint64_t, -+ svst1_vnum_u64 (p0, x0, x1, z0), -+ svst1_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_u8.c -new file mode 100644 -index 000000000..52c79d0e0 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1_u8.c -@@ -0,0 +1,162 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st1_u8_base: -+** st1b z0\.b, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_u8_base, svuint8_t, uint8_t, -+ svst1_u8 (p0, x0, z0), -+ svst1 (p0, x0, z0)) -+ -+/* -+** st1_u8_index: -+** st1b z0\.b, p0, \[x0, x1\] -+** ret -+*/ -+TEST_STORE (st1_u8_index, svuint8_t, uint8_t, -+ svst1_u8 (p0, x0 + x1, z0), -+ svst1 (p0, x0 + x1, z0)) -+ -+/* -+** st1_u8_1: -+** st1b z0\.b, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_u8_1, svuint8_t, uint8_t, -+ svst1_u8 (p0, x0 + svcntb (), z0), -+ svst1 (p0, x0 + svcntb (), z0)) -+ -+/* -+** st1_u8_7: -+** st1b z0\.b, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_u8_7, svuint8_t, uint8_t, -+ svst1_u8 (p0, x0 + svcntb () * 7, z0), -+ svst1 (p0, x0 + svcntb () * 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1_u8_8: -+** incb x0, all, mul #8 -+** st1b z0\.b, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_u8_8, svuint8_t, uint8_t, -+ svst1_u8 (p0, x0 + svcntb () * 8, z0), -+ svst1 (p0, x0 + svcntb () * 8, z0)) -+ -+/* -+** st1_u8_m1: -+** st1b z0\.b, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_u8_m1, svuint8_t, uint8_t, -+ svst1_u8 (p0, x0 - svcntb (), z0), -+ svst1 (p0, x0 - svcntb (), z0)) -+ -+/* -+** st1_u8_m8: -+** st1b z0\.b, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_u8_m8, svuint8_t, uint8_t, -+ svst1_u8 (p0, x0 - svcntb () * 8, z0), -+ svst1 (p0, x0 - svcntb () * 8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1_u8_m9: -+** decb x0, all, mul #9 -+** st1b z0\.b, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_u8_m9, svuint8_t, uint8_t, -+ svst1_u8 (p0, x0 - svcntb () * 9, z0), -+ svst1 (p0, x0 - svcntb () * 9, z0)) -+ -+/* -+** st1_vnum_u8_0: -+** st1b z0\.b, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_vnum_u8_0, svuint8_t, uint8_t, -+ svst1_vnum_u8 (p0, x0, 0, z0), -+ svst1_vnum (p0, x0, 0, z0)) -+ -+/* -+** st1_vnum_u8_1: -+** st1b z0\.b, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_vnum_u8_1, svuint8_t, uint8_t, -+ svst1_vnum_u8 (p0, x0, 1, z0), -+ svst1_vnum (p0, x0, 1, z0)) -+ -+/* -+** st1_vnum_u8_7: -+** st1b z0\.b, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_vnum_u8_7, svuint8_t, uint8_t, -+ svst1_vnum_u8 (p0, x0, 7, z0), -+ svst1_vnum (p0, x0, 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1_vnum_u8_8: -+** incb x0, all, mul #8 -+** st1b z0\.b, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_vnum_u8_8, svuint8_t, uint8_t, -+ svst1_vnum_u8 (p0, x0, 8, z0), -+ svst1_vnum (p0, x0, 8, z0)) -+ -+/* -+** st1_vnum_u8_m1: -+** st1b z0\.b, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_vnum_u8_m1, svuint8_t, uint8_t, -+ svst1_vnum_u8 (p0, x0, -1, z0), -+ svst1_vnum (p0, x0, -1, z0)) -+ -+/* -+** st1_vnum_u8_m8: -+** st1b z0\.b, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (st1_vnum_u8_m8, svuint8_t, uint8_t, -+ svst1_vnum_u8 (p0, x0, -8, z0), -+ svst1_vnum (p0, x0, -8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1_vnum_u8_m9: -+** decb x0, all, mul #9 -+** st1b z0\.b, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1_vnum_u8_m9, svuint8_t, uint8_t, -+ svst1_vnum_u8 (p0, x0, -9, z0), -+ svst1_vnum (p0, x0, -9, z0)) -+ -+/* -+** st1_vnum_u8_x1: -+** cntb (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** st1b z0\.b, p0, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** st1b z0\.b, p0, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_STORE (st1_vnum_u8_x1, svuint8_t, uint8_t, -+ svst1_vnum_u8 (p0, x0, x1, z0), -+ svst1_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_s16.c -new file mode 100644 -index 000000000..770fb61e6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_s16.c -@@ -0,0 +1,162 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st1b_s16_base: -+** st1b z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1b_s16_base, svint16_t, int8_t, -+ svst1b_s16 (p0, x0, z0), -+ svst1b (p0, x0, z0)) -+ -+/* -+** st1b_s16_index: -+** st1b z0\.h, p0, \[x0, x1\] -+** ret -+*/ -+TEST_STORE (st1b_s16_index, svint16_t, int8_t, -+ svst1b_s16 (p0, x0 + x1, z0), -+ svst1b (p0, x0 + x1, z0)) -+ -+/* -+** st1b_s16_1: -+** st1b z0\.h, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1b_s16_1, svint16_t, int8_t, -+ svst1b_s16 (p0, x0 + svcnth (), z0), -+ svst1b (p0, x0 + svcnth (), z0)) -+ -+/* -+** st1b_s16_7: -+** st1b z0\.h, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (st1b_s16_7, svint16_t, int8_t, -+ svst1b_s16 (p0, x0 + svcnth () * 7, z0), -+ svst1b (p0, x0 + svcnth () * 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1b_s16_8: -+** incb x0, all, mul #4 -+** st1b z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1b_s16_8, svint16_t, int8_t, -+ svst1b_s16 (p0, x0 + svcnth () * 8, z0), -+ svst1b (p0, x0 + svcnth () * 8, z0)) -+ -+/* -+** st1b_s16_m1: -+** st1b z0\.h, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1b_s16_m1, svint16_t, int8_t, -+ svst1b_s16 (p0, x0 - svcnth (), z0), -+ svst1b (p0, x0 - svcnth (), z0)) -+ -+/* -+** st1b_s16_m8: -+** st1b z0\.h, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (st1b_s16_m8, svint16_t, int8_t, -+ svst1b_s16 (p0, x0 - svcnth () * 8, z0), -+ svst1b (p0, x0 - svcnth () * 8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1b_s16_m9: -+** dech x0, all, mul #9 -+** st1b z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1b_s16_m9, svint16_t, int8_t, -+ svst1b_s16 (p0, x0 - svcnth () * 9, z0), -+ svst1b (p0, x0 - svcnth () * 9, z0)) -+ -+/* -+** st1b_vnum_s16_0: -+** st1b z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1b_vnum_s16_0, svint16_t, int8_t, -+ svst1b_vnum_s16 (p0, x0, 0, z0), -+ svst1b_vnum (p0, x0, 0, z0)) -+ -+/* -+** st1b_vnum_s16_1: -+** st1b z0\.h, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1b_vnum_s16_1, svint16_t, int8_t, -+ svst1b_vnum_s16 (p0, x0, 1, z0), -+ svst1b_vnum (p0, x0, 1, z0)) -+ -+/* -+** st1b_vnum_s16_7: -+** st1b z0\.h, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (st1b_vnum_s16_7, svint16_t, int8_t, -+ svst1b_vnum_s16 (p0, x0, 7, z0), -+ svst1b_vnum (p0, x0, 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1b_vnum_s16_8: -+** incb x0, all, mul #4 -+** st1b z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1b_vnum_s16_8, svint16_t, int8_t, -+ svst1b_vnum_s16 (p0, x0, 8, z0), -+ svst1b_vnum (p0, x0, 8, z0)) -+ -+/* -+** st1b_vnum_s16_m1: -+** st1b z0\.h, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1b_vnum_s16_m1, svint16_t, int8_t, -+ svst1b_vnum_s16 (p0, x0, -1, z0), -+ svst1b_vnum (p0, x0, -1, z0)) -+ -+/* -+** st1b_vnum_s16_m8: -+** st1b z0\.h, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (st1b_vnum_s16_m8, svint16_t, int8_t, -+ svst1b_vnum_s16 (p0, x0, -8, z0), -+ svst1b_vnum (p0, x0, -8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1b_vnum_s16_m9: -+** dech x0, all, mul #9 -+** st1b z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1b_vnum_s16_m9, svint16_t, int8_t, -+ svst1b_vnum_s16 (p0, x0, -9, z0), -+ svst1b_vnum (p0, x0, -9, z0)) -+ -+/* -+** st1b_vnum_s16_x1: -+** cnth (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** st1b z0\.h, p0, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** st1b z0\.h, p0, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_STORE (st1b_vnum_s16_x1, svint16_t, int8_t, -+ svst1b_vnum_s16 (p0, x0, x1, z0), -+ svst1b_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_s32.c -new file mode 100644 -index 000000000..85333aea9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_s32.c -@@ -0,0 +1,162 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st1b_s32_base: -+** st1b z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1b_s32_base, svint32_t, int8_t, -+ svst1b_s32 (p0, x0, z0), -+ svst1b (p0, x0, z0)) -+ -+/* -+** st1b_s32_index: -+** st1b z0\.s, p0, \[x0, x1\] -+** ret -+*/ -+TEST_STORE (st1b_s32_index, svint32_t, int8_t, -+ svst1b_s32 (p0, x0 + x1, z0), -+ svst1b (p0, x0 + x1, z0)) -+ -+/* -+** st1b_s32_1: -+** st1b z0\.s, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1b_s32_1, svint32_t, int8_t, -+ svst1b_s32 (p0, x0 + svcntw (), z0), -+ svst1b (p0, x0 + svcntw (), z0)) -+ -+/* -+** st1b_s32_7: -+** st1b z0\.s, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (st1b_s32_7, svint32_t, int8_t, -+ svst1b_s32 (p0, x0 + svcntw () * 7, z0), -+ svst1b (p0, x0 + svcntw () * 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1b_s32_8: -+** incb x0, all, mul #2 -+** st1b z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1b_s32_8, svint32_t, int8_t, -+ svst1b_s32 (p0, x0 + svcntw () * 8, z0), -+ svst1b (p0, x0 + svcntw () * 8, z0)) -+ -+/* -+** st1b_s32_m1: -+** st1b z0\.s, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1b_s32_m1, svint32_t, int8_t, -+ svst1b_s32 (p0, x0 - svcntw (), z0), -+ svst1b (p0, x0 - svcntw (), z0)) -+ -+/* -+** st1b_s32_m8: -+** st1b z0\.s, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (st1b_s32_m8, svint32_t, int8_t, -+ svst1b_s32 (p0, x0 - svcntw () * 8, z0), -+ svst1b (p0, x0 - svcntw () * 8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1b_s32_m9: -+** decw x0, all, mul #9 -+** st1b z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1b_s32_m9, svint32_t, int8_t, -+ svst1b_s32 (p0, x0 - svcntw () * 9, z0), -+ svst1b (p0, x0 - svcntw () * 9, z0)) -+ -+/* -+** st1b_vnum_s32_0: -+** st1b z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1b_vnum_s32_0, svint32_t, int8_t, -+ svst1b_vnum_s32 (p0, x0, 0, z0), -+ svst1b_vnum (p0, x0, 0, z0)) -+ -+/* -+** st1b_vnum_s32_1: -+** st1b z0\.s, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1b_vnum_s32_1, svint32_t, int8_t, -+ svst1b_vnum_s32 (p0, x0, 1, z0), -+ svst1b_vnum (p0, x0, 1, z0)) -+ -+/* -+** st1b_vnum_s32_7: -+** st1b z0\.s, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (st1b_vnum_s32_7, svint32_t, int8_t, -+ svst1b_vnum_s32 (p0, x0, 7, z0), -+ svst1b_vnum (p0, x0, 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1b_vnum_s32_8: -+** incb x0, all, mul #2 -+** st1b z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1b_vnum_s32_8, svint32_t, int8_t, -+ svst1b_vnum_s32 (p0, x0, 8, z0), -+ svst1b_vnum (p0, x0, 8, z0)) -+ -+/* -+** st1b_vnum_s32_m1: -+** st1b z0\.s, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1b_vnum_s32_m1, svint32_t, int8_t, -+ svst1b_vnum_s32 (p0, x0, -1, z0), -+ svst1b_vnum (p0, x0, -1, z0)) -+ -+/* -+** st1b_vnum_s32_m8: -+** st1b z0\.s, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (st1b_vnum_s32_m8, svint32_t, int8_t, -+ svst1b_vnum_s32 (p0, x0, -8, z0), -+ svst1b_vnum (p0, x0, -8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1b_vnum_s32_m9: -+** decw x0, all, mul #9 -+** st1b z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1b_vnum_s32_m9, svint32_t, int8_t, -+ svst1b_vnum_s32 (p0, x0, -9, z0), -+ svst1b_vnum (p0, x0, -9, z0)) -+ -+/* -+** st1b_vnum_s32_x1: -+** cntw (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** st1b z0\.s, p0, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** st1b z0\.s, p0, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_STORE (st1b_vnum_s32_x1, svint32_t, int8_t, -+ svst1b_vnum_s32 (p0, x0, x1, z0), -+ svst1b_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_s64.c -new file mode 100644 -index 000000000..321f168d9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_s64.c -@@ -0,0 +1,162 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st1b_s64_base: -+** st1b z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1b_s64_base, svint64_t, int8_t, -+ svst1b_s64 (p0, x0, z0), -+ svst1b (p0, x0, z0)) -+ -+/* -+** st1b_s64_index: -+** st1b z0\.d, p0, \[x0, x1\] -+** ret -+*/ -+TEST_STORE (st1b_s64_index, svint64_t, int8_t, -+ svst1b_s64 (p0, x0 + x1, z0), -+ svst1b (p0, x0 + x1, z0)) -+ -+/* -+** st1b_s64_1: -+** st1b z0\.d, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1b_s64_1, svint64_t, int8_t, -+ svst1b_s64 (p0, x0 + svcntd (), z0), -+ svst1b (p0, x0 + svcntd (), z0)) -+ -+/* -+** st1b_s64_7: -+** st1b z0\.d, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (st1b_s64_7, svint64_t, int8_t, -+ svst1b_s64 (p0, x0 + svcntd () * 7, z0), -+ svst1b (p0, x0 + svcntd () * 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1b_s64_8: -+** incb x0 -+** st1b z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1b_s64_8, svint64_t, int8_t, -+ svst1b_s64 (p0, x0 + svcntd () * 8, z0), -+ svst1b (p0, x0 + svcntd () * 8, z0)) -+ -+/* -+** st1b_s64_m1: -+** st1b z0\.d, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1b_s64_m1, svint64_t, int8_t, -+ svst1b_s64 (p0, x0 - svcntd (), z0), -+ svst1b (p0, x0 - svcntd (), z0)) -+ -+/* -+** st1b_s64_m8: -+** st1b z0\.d, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (st1b_s64_m8, svint64_t, int8_t, -+ svst1b_s64 (p0, x0 - svcntd () * 8, z0), -+ svst1b (p0, x0 - svcntd () * 8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1b_s64_m9: -+** decd x0, all, mul #9 -+** st1b z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1b_s64_m9, svint64_t, int8_t, -+ svst1b_s64 (p0, x0 - svcntd () * 9, z0), -+ svst1b (p0, x0 - svcntd () * 9, z0)) -+ -+/* -+** st1b_vnum_s64_0: -+** st1b z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1b_vnum_s64_0, svint64_t, int8_t, -+ svst1b_vnum_s64 (p0, x0, 0, z0), -+ svst1b_vnum (p0, x0, 0, z0)) -+ -+/* -+** st1b_vnum_s64_1: -+** st1b z0\.d, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1b_vnum_s64_1, svint64_t, int8_t, -+ svst1b_vnum_s64 (p0, x0, 1, z0), -+ svst1b_vnum (p0, x0, 1, z0)) -+ -+/* -+** st1b_vnum_s64_7: -+** st1b z0\.d, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (st1b_vnum_s64_7, svint64_t, int8_t, -+ svst1b_vnum_s64 (p0, x0, 7, z0), -+ svst1b_vnum (p0, x0, 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1b_vnum_s64_8: -+** incb x0 -+** st1b z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1b_vnum_s64_8, svint64_t, int8_t, -+ svst1b_vnum_s64 (p0, x0, 8, z0), -+ svst1b_vnum (p0, x0, 8, z0)) -+ -+/* -+** st1b_vnum_s64_m1: -+** st1b z0\.d, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1b_vnum_s64_m1, svint64_t, int8_t, -+ svst1b_vnum_s64 (p0, x0, -1, z0), -+ svst1b_vnum (p0, x0, -1, z0)) -+ -+/* -+** st1b_vnum_s64_m8: -+** st1b z0\.d, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (st1b_vnum_s64_m8, svint64_t, int8_t, -+ svst1b_vnum_s64 (p0, x0, -8, z0), -+ svst1b_vnum (p0, x0, -8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1b_vnum_s64_m9: -+** decd x0, all, mul #9 -+** st1b z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1b_vnum_s64_m9, svint64_t, int8_t, -+ svst1b_vnum_s64 (p0, x0, -9, z0), -+ svst1b_vnum (p0, x0, -9, z0)) -+ -+/* -+** st1b_vnum_s64_x1: -+** cntd (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** st1b z0\.d, p0, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** st1b z0\.d, p0, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_STORE (st1b_vnum_s64_x1, svint64_t, int8_t, -+ svst1b_vnum_s64 (p0, x0, x1, z0), -+ svst1b_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_s32.c -new file mode 100644 -index 000000000..d59033356 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_s32.c -@@ -0,0 +1,104 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st1b_scatter_s32: -+** st1b z0\.s, p0, \[z1\.s\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1b_scatter_s32, svint32_t, svuint32_t, -+ svst1b_scatter_u32base_s32 (p0, z1, z0), -+ svst1b_scatter (p0, z1, z0)) -+ -+/* -+** st1b_scatter_x0_s32_offset: -+** st1b z0\.s, p0, \[x0, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1b_scatter_x0_s32_offset, svint32_t, svuint32_t, -+ svst1b_scatter_u32base_offset_s32 (p0, z1, x0, z0), -+ svst1b_scatter_offset (p0, z1, x0, z0)) -+ -+/* -+** st1b_scatter_m1_s32_offset: -+** mov (x[0-9]+), #?-1 -+** st1b z0\.s, p0, \[\1, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1b_scatter_m1_s32_offset, svint32_t, svuint32_t, -+ svst1b_scatter_u32base_offset_s32 (p0, z1, -1, z0), -+ svst1b_scatter_offset (p0, z1, -1, z0)) -+ -+/* -+** st1b_scatter_0_s32_offset: -+** st1b z0\.s, p0, \[z1\.s\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1b_scatter_0_s32_offset, svint32_t, svuint32_t, -+ svst1b_scatter_u32base_offset_s32 (p0, z1, 0, z0), -+ svst1b_scatter_offset (p0, z1, 0, z0)) -+ -+/* -+** st1b_scatter_5_s32_offset: -+** st1b z0\.s, p0, \[z1\.s, #5\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1b_scatter_5_s32_offset, svint32_t, svuint32_t, -+ svst1b_scatter_u32base_offset_s32 (p0, z1, 5, z0), -+ svst1b_scatter_offset (p0, z1, 5, z0)) -+ -+/* -+** st1b_scatter_31_s32_offset: -+** st1b z0\.s, p0, \[z1\.s, #31\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1b_scatter_31_s32_offset, svint32_t, svuint32_t, -+ svst1b_scatter_u32base_offset_s32 (p0, z1, 31, z0), -+ svst1b_scatter_offset (p0, z1, 31, z0)) -+ -+/* -+** st1b_scatter_32_s32_offset: -+** mov (x[0-9]+), #?32 -+** st1b z0\.s, p0, \[\1, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1b_scatter_32_s32_offset, svint32_t, svuint32_t, -+ svst1b_scatter_u32base_offset_s32 (p0, z1, 32, z0), -+ svst1b_scatter_offset (p0, z1, 32, z0)) -+ -+/* -+** st1b_scatter_x0_s32_s32offset: -+** st1b z0\.s, p0, \[x0, z1\.s, sxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1b_scatter_x0_s32_s32offset, svint32_t, int8_t, svint32_t, -+ svst1b_scatter_s32offset_s32 (p0, x0, z1, z0), -+ svst1b_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1b_scatter_s32_s32offset: -+** st1b z0\.s, p0, \[x0, z1\.s, sxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1b_scatter_s32_s32offset, svint32_t, int8_t, svint32_t, -+ svst1b_scatter_s32offset_s32 (p0, x0, z1, z0), -+ svst1b_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1b_scatter_x0_s32_u32offset: -+** st1b z0\.s, p0, \[x0, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1b_scatter_x0_s32_u32offset, svint32_t, int8_t, svuint32_t, -+ svst1b_scatter_u32offset_s32 (p0, x0, z1, z0), -+ svst1b_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1b_scatter_s32_u32offset: -+** st1b z0\.s, p0, \[x0, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1b_scatter_s32_u32offset, svint32_t, int8_t, svuint32_t, -+ svst1b_scatter_u32offset_s32 (p0, x0, z1, z0), -+ svst1b_scatter_offset (p0, x0, z1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_s64.c -new file mode 100644 -index 000000000..c7a35f1b4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_s64.c -@@ -0,0 +1,122 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st1b_scatter_s64: -+** st1b z0\.d, p0, \[z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1b_scatter_s64, svint64_t, svuint64_t, -+ svst1b_scatter_u64base_s64 (p0, z1, z0), -+ svst1b_scatter (p0, z1, z0)) -+ -+/* -+** st1b_scatter_x0_s64_offset: -+** st1b z0\.d, p0, \[x0, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1b_scatter_x0_s64_offset, svint64_t, svuint64_t, -+ svst1b_scatter_u64base_offset_s64 (p0, z1, x0, z0), -+ svst1b_scatter_offset (p0, z1, x0, z0)) -+ -+/* -+** st1b_scatter_m1_s64_offset: -+** mov (x[0-9]+), #?-1 -+** st1b z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1b_scatter_m1_s64_offset, svint64_t, svuint64_t, -+ svst1b_scatter_u64base_offset_s64 (p0, z1, -1, z0), -+ svst1b_scatter_offset (p0, z1, -1, z0)) -+ -+/* -+** st1b_scatter_0_s64_offset: -+** st1b z0\.d, p0, \[z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1b_scatter_0_s64_offset, svint64_t, svuint64_t, -+ svst1b_scatter_u64base_offset_s64 (p0, z1, 0, z0), -+ svst1b_scatter_offset (p0, z1, 0, z0)) -+ -+/* -+** st1b_scatter_5_s64_offset: -+** st1b z0\.d, p0, \[z1\.d, #5\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1b_scatter_5_s64_offset, svint64_t, svuint64_t, -+ svst1b_scatter_u64base_offset_s64 (p0, z1, 5, z0), -+ svst1b_scatter_offset (p0, z1, 5, z0)) -+ -+/* -+** st1b_scatter_31_s64_offset: -+** st1b z0\.d, p0, \[z1\.d, #31\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1b_scatter_31_s64_offset, svint64_t, svuint64_t, -+ svst1b_scatter_u64base_offset_s64 (p0, z1, 31, z0), -+ svst1b_scatter_offset (p0, z1, 31, z0)) -+ -+/* -+** st1b_scatter_32_s64_offset: -+** mov (x[0-9]+), #?32 -+** st1b z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1b_scatter_32_s64_offset, svint64_t, svuint64_t, -+ svst1b_scatter_u64base_offset_s64 (p0, z1, 32, z0), -+ svst1b_scatter_offset (p0, z1, 32, z0)) -+ -+/* -+** st1b_scatter_x0_s64_s64offset: -+** st1b z0\.d, p0, \[x0, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1b_scatter_x0_s64_s64offset, svint64_t, int8_t, svint64_t, -+ svst1b_scatter_s64offset_s64 (p0, x0, z1, z0), -+ svst1b_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1b_scatter_s64_s64offset: -+** st1b z0\.d, p0, \[x0, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1b_scatter_s64_s64offset, svint64_t, int8_t, svint64_t, -+ svst1b_scatter_s64offset_s64 (p0, x0, z1, z0), -+ svst1b_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1b_scatter_ext_s64_s64offset: -+** st1b z0\.d, p0, \[x0, z1\.d, sxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1b_scatter_ext_s64_s64offset, svint64_t, int8_t, svint64_t, -+ svst1b_scatter_s64offset_s64 (p0, x0, svextw_s64_x (p0, z1), z0), -+ svst1b_scatter_offset (p0, x0, svextw_x (p0, z1), z0)) -+ -+/* -+** st1b_scatter_x0_s64_u64offset: -+** st1b z0\.d, p0, \[x0, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1b_scatter_x0_s64_u64offset, svint64_t, int8_t, svuint64_t, -+ svst1b_scatter_u64offset_s64 (p0, x0, z1, z0), -+ svst1b_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1b_scatter_s64_u64offset: -+** st1b z0\.d, p0, \[x0, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1b_scatter_s64_u64offset, svint64_t, int8_t, svuint64_t, -+ svst1b_scatter_u64offset_s64 (p0, x0, z1, z0), -+ svst1b_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1b_scatter_ext_s64_u64offset: -+** st1b z0\.d, p0, \[x0, z1\.d, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1b_scatter_ext_s64_u64offset, svint64_t, int8_t, svuint64_t, -+ svst1b_scatter_u64offset_s64 (p0, x0, svextw_u64_x (p0, z1), z0), -+ svst1b_scatter_offset (p0, x0, svextw_x (p0, z1), z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_u32.c -new file mode 100644 -index 000000000..e098cb9b7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_u32.c -@@ -0,0 +1,104 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st1b_scatter_u32: -+** st1b z0\.s, p0, \[z1\.s\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1b_scatter_u32, svuint32_t, svuint32_t, -+ svst1b_scatter_u32base_u32 (p0, z1, z0), -+ svst1b_scatter (p0, z1, z0)) -+ -+/* -+** st1b_scatter_x0_u32_offset: -+** st1b z0\.s, p0, \[x0, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1b_scatter_x0_u32_offset, svuint32_t, svuint32_t, -+ svst1b_scatter_u32base_offset_u32 (p0, z1, x0, z0), -+ svst1b_scatter_offset (p0, z1, x0, z0)) -+ -+/* -+** st1b_scatter_m1_u32_offset: -+** mov (x[0-9]+), #?-1 -+** st1b z0\.s, p0, \[\1, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1b_scatter_m1_u32_offset, svuint32_t, svuint32_t, -+ svst1b_scatter_u32base_offset_u32 (p0, z1, -1, z0), -+ svst1b_scatter_offset (p0, z1, -1, z0)) -+ -+/* -+** st1b_scatter_0_u32_offset: -+** st1b z0\.s, p0, \[z1\.s\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1b_scatter_0_u32_offset, svuint32_t, svuint32_t, -+ svst1b_scatter_u32base_offset_u32 (p0, z1, 0, z0), -+ svst1b_scatter_offset (p0, z1, 0, z0)) -+ -+/* -+** st1b_scatter_5_u32_offset: -+** st1b z0\.s, p0, \[z1\.s, #5\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1b_scatter_5_u32_offset, svuint32_t, svuint32_t, -+ svst1b_scatter_u32base_offset_u32 (p0, z1, 5, z0), -+ svst1b_scatter_offset (p0, z1, 5, z0)) -+ -+/* -+** st1b_scatter_31_u32_offset: -+** st1b z0\.s, p0, \[z1\.s, #31\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1b_scatter_31_u32_offset, svuint32_t, svuint32_t, -+ svst1b_scatter_u32base_offset_u32 (p0, z1, 31, z0), -+ svst1b_scatter_offset (p0, z1, 31, z0)) -+ -+/* -+** st1b_scatter_32_u32_offset: -+** mov (x[0-9]+), #?32 -+** st1b z0\.s, p0, \[\1, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1b_scatter_32_u32_offset, svuint32_t, svuint32_t, -+ svst1b_scatter_u32base_offset_u32 (p0, z1, 32, z0), -+ svst1b_scatter_offset (p0, z1, 32, z0)) -+ -+/* -+** st1b_scatter_x0_u32_s32offset: -+** st1b z0\.s, p0, \[x0, z1\.s, sxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1b_scatter_x0_u32_s32offset, svuint32_t, uint8_t, svint32_t, -+ svst1b_scatter_s32offset_u32 (p0, x0, z1, z0), -+ svst1b_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1b_scatter_u32_s32offset: -+** st1b z0\.s, p0, \[x0, z1\.s, sxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1b_scatter_u32_s32offset, svuint32_t, uint8_t, svint32_t, -+ svst1b_scatter_s32offset_u32 (p0, x0, z1, z0), -+ svst1b_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1b_scatter_x0_u32_u32offset: -+** st1b z0\.s, p0, \[x0, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1b_scatter_x0_u32_u32offset, svuint32_t, uint8_t, svuint32_t, -+ svst1b_scatter_u32offset_u32 (p0, x0, z1, z0), -+ svst1b_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1b_scatter_u32_u32offset: -+** st1b z0\.s, p0, \[x0, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1b_scatter_u32_u32offset, svuint32_t, uint8_t, svuint32_t, -+ svst1b_scatter_u32offset_u32 (p0, x0, z1, z0), -+ svst1b_scatter_offset (p0, x0, z1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_u64.c -new file mode 100644 -index 000000000..058d1313f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_scatter_u64.c -@@ -0,0 +1,122 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st1b_scatter_u64: -+** st1b z0\.d, p0, \[z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1b_scatter_u64, svuint64_t, svuint64_t, -+ svst1b_scatter_u64base_u64 (p0, z1, z0), -+ svst1b_scatter (p0, z1, z0)) -+ -+/* -+** st1b_scatter_x0_u64_offset: -+** st1b z0\.d, p0, \[x0, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1b_scatter_x0_u64_offset, svuint64_t, svuint64_t, -+ svst1b_scatter_u64base_offset_u64 (p0, z1, x0, z0), -+ svst1b_scatter_offset (p0, z1, x0, z0)) -+ -+/* -+** st1b_scatter_m1_u64_offset: -+** mov (x[0-9]+), #?-1 -+** st1b z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1b_scatter_m1_u64_offset, svuint64_t, svuint64_t, -+ svst1b_scatter_u64base_offset_u64 (p0, z1, -1, z0), -+ svst1b_scatter_offset (p0, z1, -1, z0)) -+ -+/* -+** st1b_scatter_0_u64_offset: -+** st1b z0\.d, p0, \[z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1b_scatter_0_u64_offset, svuint64_t, svuint64_t, -+ svst1b_scatter_u64base_offset_u64 (p0, z1, 0, z0), -+ svst1b_scatter_offset (p0, z1, 0, z0)) -+ -+/* -+** st1b_scatter_5_u64_offset: -+** st1b z0\.d, p0, \[z1\.d, #5\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1b_scatter_5_u64_offset, svuint64_t, svuint64_t, -+ svst1b_scatter_u64base_offset_u64 (p0, z1, 5, z0), -+ svst1b_scatter_offset (p0, z1, 5, z0)) -+ -+/* -+** st1b_scatter_31_u64_offset: -+** st1b z0\.d, p0, \[z1\.d, #31\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1b_scatter_31_u64_offset, svuint64_t, svuint64_t, -+ svst1b_scatter_u64base_offset_u64 (p0, z1, 31, z0), -+ svst1b_scatter_offset (p0, z1, 31, z0)) -+ -+/* -+** st1b_scatter_32_u64_offset: -+** mov (x[0-9]+), #?32 -+** st1b z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1b_scatter_32_u64_offset, svuint64_t, svuint64_t, -+ svst1b_scatter_u64base_offset_u64 (p0, z1, 32, z0), -+ svst1b_scatter_offset (p0, z1, 32, z0)) -+ -+/* -+** st1b_scatter_x0_u64_s64offset: -+** st1b z0\.d, p0, \[x0, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1b_scatter_x0_u64_s64offset, svuint64_t, uint8_t, svint64_t, -+ svst1b_scatter_s64offset_u64 (p0, x0, z1, z0), -+ svst1b_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1b_scatter_u64_s64offset: -+** st1b z0\.d, p0, \[x0, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1b_scatter_u64_s64offset, svuint64_t, uint8_t, svint64_t, -+ svst1b_scatter_s64offset_u64 (p0, x0, z1, z0), -+ svst1b_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1b_scatter_ext_u64_s64offset: -+** st1b z0\.d, p0, \[x0, z1\.d, sxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1b_scatter_ext_u64_s64offset, svuint64_t, uint8_t, svint64_t, -+ svst1b_scatter_s64offset_u64 (p0, x0, svextw_s64_x (p0, z1), z0), -+ svst1b_scatter_offset (p0, x0, svextw_x (p0, z1), z0)) -+ -+/* -+** st1b_scatter_x0_u64_u64offset: -+** st1b z0\.d, p0, \[x0, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1b_scatter_x0_u64_u64offset, svuint64_t, uint8_t, svuint64_t, -+ svst1b_scatter_u64offset_u64 (p0, x0, z1, z0), -+ svst1b_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1b_scatter_u64_u64offset: -+** st1b z0\.d, p0, \[x0, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1b_scatter_u64_u64offset, svuint64_t, uint8_t, svuint64_t, -+ svst1b_scatter_u64offset_u64 (p0, x0, z1, z0), -+ svst1b_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1b_scatter_ext_u64_u64offset: -+** st1b z0\.d, p0, \[x0, z1\.d, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1b_scatter_ext_u64_u64offset, svuint64_t, uint8_t, svuint64_t, -+ svst1b_scatter_u64offset_u64 (p0, x0, svextw_u64_x (p0, z1), z0), -+ svst1b_scatter_offset (p0, x0, svextw_x (p0, z1), z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_u16.c -new file mode 100644 -index 000000000..025a2212a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_u16.c -@@ -0,0 +1,162 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st1b_u16_base: -+** st1b z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1b_u16_base, svuint16_t, uint8_t, -+ svst1b_u16 (p0, x0, z0), -+ svst1b (p0, x0, z0)) -+ -+/* -+** st1b_u16_index: -+** st1b z0\.h, p0, \[x0, x1\] -+** ret -+*/ -+TEST_STORE (st1b_u16_index, svuint16_t, uint8_t, -+ svst1b_u16 (p0, x0 + x1, z0), -+ svst1b (p0, x0 + x1, z0)) -+ -+/* -+** st1b_u16_1: -+** st1b z0\.h, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1b_u16_1, svuint16_t, uint8_t, -+ svst1b_u16 (p0, x0 + svcnth (), z0), -+ svst1b (p0, x0 + svcnth (), z0)) -+ -+/* -+** st1b_u16_7: -+** st1b z0\.h, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (st1b_u16_7, svuint16_t, uint8_t, -+ svst1b_u16 (p0, x0 + svcnth () * 7, z0), -+ svst1b (p0, x0 + svcnth () * 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1b_u16_8: -+** incb x0, all, mul #4 -+** st1b z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1b_u16_8, svuint16_t, uint8_t, -+ svst1b_u16 (p0, x0 + svcnth () * 8, z0), -+ svst1b (p0, x0 + svcnth () * 8, z0)) -+ -+/* -+** st1b_u16_m1: -+** st1b z0\.h, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1b_u16_m1, svuint16_t, uint8_t, -+ svst1b_u16 (p0, x0 - svcnth (), z0), -+ svst1b (p0, x0 - svcnth (), z0)) -+ -+/* -+** st1b_u16_m8: -+** st1b z0\.h, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (st1b_u16_m8, svuint16_t, uint8_t, -+ svst1b_u16 (p0, x0 - svcnth () * 8, z0), -+ svst1b (p0, x0 - svcnth () * 8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1b_u16_m9: -+** dech x0, all, mul #9 -+** st1b z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1b_u16_m9, svuint16_t, uint8_t, -+ svst1b_u16 (p0, x0 - svcnth () * 9, z0), -+ svst1b (p0, x0 - svcnth () * 9, z0)) -+ -+/* -+** st1b_vnum_u16_0: -+** st1b z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1b_vnum_u16_0, svuint16_t, uint8_t, -+ svst1b_vnum_u16 (p0, x0, 0, z0), -+ svst1b_vnum (p0, x0, 0, z0)) -+ -+/* -+** st1b_vnum_u16_1: -+** st1b z0\.h, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1b_vnum_u16_1, svuint16_t, uint8_t, -+ svst1b_vnum_u16 (p0, x0, 1, z0), -+ svst1b_vnum (p0, x0, 1, z0)) -+ -+/* -+** st1b_vnum_u16_7: -+** st1b z0\.h, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (st1b_vnum_u16_7, svuint16_t, uint8_t, -+ svst1b_vnum_u16 (p0, x0, 7, z0), -+ svst1b_vnum (p0, x0, 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1b_vnum_u16_8: -+** incb x0, all, mul #4 -+** st1b z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1b_vnum_u16_8, svuint16_t, uint8_t, -+ svst1b_vnum_u16 (p0, x0, 8, z0), -+ svst1b_vnum (p0, x0, 8, z0)) -+ -+/* -+** st1b_vnum_u16_m1: -+** st1b z0\.h, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1b_vnum_u16_m1, svuint16_t, uint8_t, -+ svst1b_vnum_u16 (p0, x0, -1, z0), -+ svst1b_vnum (p0, x0, -1, z0)) -+ -+/* -+** st1b_vnum_u16_m8: -+** st1b z0\.h, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (st1b_vnum_u16_m8, svuint16_t, uint8_t, -+ svst1b_vnum_u16 (p0, x0, -8, z0), -+ svst1b_vnum (p0, x0, -8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1b_vnum_u16_m9: -+** dech x0, all, mul #9 -+** st1b z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1b_vnum_u16_m9, svuint16_t, uint8_t, -+ svst1b_vnum_u16 (p0, x0, -9, z0), -+ svst1b_vnum (p0, x0, -9, z0)) -+ -+/* -+** st1b_vnum_u16_x1: -+** cnth (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** st1b z0\.h, p0, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** st1b z0\.h, p0, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_STORE (st1b_vnum_u16_x1, svuint16_t, uint8_t, -+ svst1b_vnum_u16 (p0, x0, x1, z0), -+ svst1b_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_u32.c -new file mode 100644 -index 000000000..5833cb44b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_u32.c -@@ -0,0 +1,162 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st1b_u32_base: -+** st1b z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1b_u32_base, svuint32_t, uint8_t, -+ svst1b_u32 (p0, x0, z0), -+ svst1b (p0, x0, z0)) -+ -+/* -+** st1b_u32_index: -+** st1b z0\.s, p0, \[x0, x1\] -+** ret -+*/ -+TEST_STORE (st1b_u32_index, svuint32_t, uint8_t, -+ svst1b_u32 (p0, x0 + x1, z0), -+ svst1b (p0, x0 + x1, z0)) -+ -+/* -+** st1b_u32_1: -+** st1b z0\.s, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1b_u32_1, svuint32_t, uint8_t, -+ svst1b_u32 (p0, x0 + svcntw (), z0), -+ svst1b (p0, x0 + svcntw (), z0)) -+ -+/* -+** st1b_u32_7: -+** st1b z0\.s, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (st1b_u32_7, svuint32_t, uint8_t, -+ svst1b_u32 (p0, x0 + svcntw () * 7, z0), -+ svst1b (p0, x0 + svcntw () * 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1b_u32_8: -+** incb x0, all, mul #2 -+** st1b z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1b_u32_8, svuint32_t, uint8_t, -+ svst1b_u32 (p0, x0 + svcntw () * 8, z0), -+ svst1b (p0, x0 + svcntw () * 8, z0)) -+ -+/* -+** st1b_u32_m1: -+** st1b z0\.s, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1b_u32_m1, svuint32_t, uint8_t, -+ svst1b_u32 (p0, x0 - svcntw (), z0), -+ svst1b (p0, x0 - svcntw (), z0)) -+ -+/* -+** st1b_u32_m8: -+** st1b z0\.s, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (st1b_u32_m8, svuint32_t, uint8_t, -+ svst1b_u32 (p0, x0 - svcntw () * 8, z0), -+ svst1b (p0, x0 - svcntw () * 8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1b_u32_m9: -+** decw x0, all, mul #9 -+** st1b z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1b_u32_m9, svuint32_t, uint8_t, -+ svst1b_u32 (p0, x0 - svcntw () * 9, z0), -+ svst1b (p0, x0 - svcntw () * 9, z0)) -+ -+/* -+** st1b_vnum_u32_0: -+** st1b z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1b_vnum_u32_0, svuint32_t, uint8_t, -+ svst1b_vnum_u32 (p0, x0, 0, z0), -+ svst1b_vnum (p0, x0, 0, z0)) -+ -+/* -+** st1b_vnum_u32_1: -+** st1b z0\.s, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1b_vnum_u32_1, svuint32_t, uint8_t, -+ svst1b_vnum_u32 (p0, x0, 1, z0), -+ svst1b_vnum (p0, x0, 1, z0)) -+ -+/* -+** st1b_vnum_u32_7: -+** st1b z0\.s, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (st1b_vnum_u32_7, svuint32_t, uint8_t, -+ svst1b_vnum_u32 (p0, x0, 7, z0), -+ svst1b_vnum (p0, x0, 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1b_vnum_u32_8: -+** incb x0, all, mul #2 -+** st1b z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1b_vnum_u32_8, svuint32_t, uint8_t, -+ svst1b_vnum_u32 (p0, x0, 8, z0), -+ svst1b_vnum (p0, x0, 8, z0)) -+ -+/* -+** st1b_vnum_u32_m1: -+** st1b z0\.s, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1b_vnum_u32_m1, svuint32_t, uint8_t, -+ svst1b_vnum_u32 (p0, x0, -1, z0), -+ svst1b_vnum (p0, x0, -1, z0)) -+ -+/* -+** st1b_vnum_u32_m8: -+** st1b z0\.s, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (st1b_vnum_u32_m8, svuint32_t, uint8_t, -+ svst1b_vnum_u32 (p0, x0, -8, z0), -+ svst1b_vnum (p0, x0, -8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1b_vnum_u32_m9: -+** decw x0, all, mul #9 -+** st1b z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1b_vnum_u32_m9, svuint32_t, uint8_t, -+ svst1b_vnum_u32 (p0, x0, -9, z0), -+ svst1b_vnum (p0, x0, -9, z0)) -+ -+/* -+** st1b_vnum_u32_x1: -+** cntw (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** st1b z0\.s, p0, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** st1b z0\.s, p0, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_STORE (st1b_vnum_u32_x1, svuint32_t, uint8_t, -+ svst1b_vnum_u32 (p0, x0, x1, z0), -+ svst1b_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_u64.c -new file mode 100644 -index 000000000..e96f4c486 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1b_u64.c -@@ -0,0 +1,162 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st1b_u64_base: -+** st1b z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1b_u64_base, svuint64_t, uint8_t, -+ svst1b_u64 (p0, x0, z0), -+ svst1b (p0, x0, z0)) -+ -+/* -+** st1b_u64_index: -+** st1b z0\.d, p0, \[x0, x1\] -+** ret -+*/ -+TEST_STORE (st1b_u64_index, svuint64_t, uint8_t, -+ svst1b_u64 (p0, x0 + x1, z0), -+ svst1b (p0, x0 + x1, z0)) -+ -+/* -+** st1b_u64_1: -+** st1b z0\.d, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1b_u64_1, svuint64_t, uint8_t, -+ svst1b_u64 (p0, x0 + svcntd (), z0), -+ svst1b (p0, x0 + svcntd (), z0)) -+ -+/* -+** st1b_u64_7: -+** st1b z0\.d, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (st1b_u64_7, svuint64_t, uint8_t, -+ svst1b_u64 (p0, x0 + svcntd () * 7, z0), -+ svst1b (p0, x0 + svcntd () * 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1b_u64_8: -+** incb x0 -+** st1b z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1b_u64_8, svuint64_t, uint8_t, -+ svst1b_u64 (p0, x0 + svcntd () * 8, z0), -+ svst1b (p0, x0 + svcntd () * 8, z0)) -+ -+/* -+** st1b_u64_m1: -+** st1b z0\.d, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1b_u64_m1, svuint64_t, uint8_t, -+ svst1b_u64 (p0, x0 - svcntd (), z0), -+ svst1b (p0, x0 - svcntd (), z0)) -+ -+/* -+** st1b_u64_m8: -+** st1b z0\.d, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (st1b_u64_m8, svuint64_t, uint8_t, -+ svst1b_u64 (p0, x0 - svcntd () * 8, z0), -+ svst1b (p0, x0 - svcntd () * 8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1b_u64_m9: -+** decd x0, all, mul #9 -+** st1b z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1b_u64_m9, svuint64_t, uint8_t, -+ svst1b_u64 (p0, x0 - svcntd () * 9, z0), -+ svst1b (p0, x0 - svcntd () * 9, z0)) -+ -+/* -+** st1b_vnum_u64_0: -+** st1b z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1b_vnum_u64_0, svuint64_t, uint8_t, -+ svst1b_vnum_u64 (p0, x0, 0, z0), -+ svst1b_vnum (p0, x0, 0, z0)) -+ -+/* -+** st1b_vnum_u64_1: -+** st1b z0\.d, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1b_vnum_u64_1, svuint64_t, uint8_t, -+ svst1b_vnum_u64 (p0, x0, 1, z0), -+ svst1b_vnum (p0, x0, 1, z0)) -+ -+/* -+** st1b_vnum_u64_7: -+** st1b z0\.d, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (st1b_vnum_u64_7, svuint64_t, uint8_t, -+ svst1b_vnum_u64 (p0, x0, 7, z0), -+ svst1b_vnum (p0, x0, 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1b_vnum_u64_8: -+** incb x0 -+** st1b z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1b_vnum_u64_8, svuint64_t, uint8_t, -+ svst1b_vnum_u64 (p0, x0, 8, z0), -+ svst1b_vnum (p0, x0, 8, z0)) -+ -+/* -+** st1b_vnum_u64_m1: -+** st1b z0\.d, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1b_vnum_u64_m1, svuint64_t, uint8_t, -+ svst1b_vnum_u64 (p0, x0, -1, z0), -+ svst1b_vnum (p0, x0, -1, z0)) -+ -+/* -+** st1b_vnum_u64_m8: -+** st1b z0\.d, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (st1b_vnum_u64_m8, svuint64_t, uint8_t, -+ svst1b_vnum_u64 (p0, x0, -8, z0), -+ svst1b_vnum (p0, x0, -8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1b_vnum_u64_m9: -+** decd x0, all, mul #9 -+** st1b z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1b_vnum_u64_m9, svuint64_t, uint8_t, -+ svst1b_vnum_u64 (p0, x0, -9, z0), -+ svst1b_vnum (p0, x0, -9, z0)) -+ -+/* -+** st1b_vnum_u64_x1: -+** cntd (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** st1b z0\.d, p0, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** st1b z0\.d, p0, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_STORE (st1b_vnum_u64_x1, svuint64_t, uint8_t, -+ svst1b_vnum_u64 (p0, x0, x1, z0), -+ svst1b_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_s32.c -new file mode 100644 -index 000000000..3466e3293 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_s32.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st1h_s32_base: -+** st1h z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1h_s32_base, svint32_t, int16_t, -+ svst1h_s32 (p0, x0, z0), -+ svst1h (p0, x0, z0)) -+ -+/* -+** st1h_s32_index: -+** st1h z0\.s, p0, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_STORE (st1h_s32_index, svint32_t, int16_t, -+ svst1h_s32 (p0, x0 + x1, z0), -+ svst1h (p0, x0 + x1, z0)) -+ -+/* -+** st1h_s32_1: -+** st1h z0\.s, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1h_s32_1, svint32_t, int16_t, -+ svst1h_s32 (p0, x0 + svcntw (), z0), -+ svst1h (p0, x0 + svcntw (), z0)) -+ -+/* -+** st1h_s32_7: -+** st1h z0\.s, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (st1h_s32_7, svint32_t, int16_t, -+ svst1h_s32 (p0, x0 + svcntw () * 7, z0), -+ svst1h (p0, x0 + svcntw () * 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1h_s32_8: -+** incb x0, all, mul #4 -+** st1h z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1h_s32_8, svint32_t, int16_t, -+ svst1h_s32 (p0, x0 + svcntw () * 8, z0), -+ svst1h (p0, x0 + svcntw () * 8, z0)) -+ -+/* -+** st1h_s32_m1: -+** st1h z0\.s, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1h_s32_m1, svint32_t, int16_t, -+ svst1h_s32 (p0, x0 - svcntw (), z0), -+ svst1h (p0, x0 - svcntw (), z0)) -+ -+/* -+** st1h_s32_m8: -+** st1h z0\.s, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (st1h_s32_m8, svint32_t, int16_t, -+ svst1h_s32 (p0, x0 - svcntw () * 8, z0), -+ svst1h (p0, x0 - svcntw () * 8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1h_s32_m9: -+** dech x0, all, mul #9 -+** st1h z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1h_s32_m9, svint32_t, int16_t, -+ svst1h_s32 (p0, x0 - svcntw () * 9, z0), -+ svst1h (p0, x0 - svcntw () * 9, z0)) -+ -+/* -+** st1h_vnum_s32_0: -+** st1h z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1h_vnum_s32_0, svint32_t, int16_t, -+ svst1h_vnum_s32 (p0, x0, 0, z0), -+ svst1h_vnum (p0, x0, 0, z0)) -+ -+/* -+** st1h_vnum_s32_1: -+** st1h z0\.s, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1h_vnum_s32_1, svint32_t, int16_t, -+ svst1h_vnum_s32 (p0, x0, 1, z0), -+ svst1h_vnum (p0, x0, 1, z0)) -+ -+/* -+** st1h_vnum_s32_7: -+** st1h z0\.s, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (st1h_vnum_s32_7, svint32_t, int16_t, -+ svst1h_vnum_s32 (p0, x0, 7, z0), -+ svst1h_vnum (p0, x0, 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1h_vnum_s32_8: -+** incb x0, all, mul #4 -+** st1h z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1h_vnum_s32_8, svint32_t, int16_t, -+ svst1h_vnum_s32 (p0, x0, 8, z0), -+ svst1h_vnum (p0, x0, 8, z0)) -+ -+/* -+** st1h_vnum_s32_m1: -+** st1h z0\.s, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1h_vnum_s32_m1, svint32_t, int16_t, -+ svst1h_vnum_s32 (p0, x0, -1, z0), -+ svst1h_vnum (p0, x0, -1, z0)) -+ -+/* -+** st1h_vnum_s32_m8: -+** st1h z0\.s, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (st1h_vnum_s32_m8, svint32_t, int16_t, -+ svst1h_vnum_s32 (p0, x0, -8, z0), -+ svst1h_vnum (p0, x0, -8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1h_vnum_s32_m9: -+** dech x0, all, mul #9 -+** st1h z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1h_vnum_s32_m9, svint32_t, int16_t, -+ svst1h_vnum_s32 (p0, x0, -9, z0), -+ svst1h_vnum (p0, x0, -9, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** st1h_vnum_s32_x1: -+** cnth (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** st1h z0\.s, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (st1h_vnum_s32_x1, svint32_t, int16_t, -+ svst1h_vnum_s32 (p0, x0, x1, z0), -+ svst1h_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_s64.c -new file mode 100644 -index 000000000..c5df3b0c3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_s64.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st1h_s64_base: -+** st1h z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1h_s64_base, svint64_t, int16_t, -+ svst1h_s64 (p0, x0, z0), -+ svst1h (p0, x0, z0)) -+ -+/* -+** st1h_s64_index: -+** st1h z0\.d, p0, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_STORE (st1h_s64_index, svint64_t, int16_t, -+ svst1h_s64 (p0, x0 + x1, z0), -+ svst1h (p0, x0 + x1, z0)) -+ -+/* -+** st1h_s64_1: -+** st1h z0\.d, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1h_s64_1, svint64_t, int16_t, -+ svst1h_s64 (p0, x0 + svcntd (), z0), -+ svst1h (p0, x0 + svcntd (), z0)) -+ -+/* -+** st1h_s64_7: -+** st1h z0\.d, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (st1h_s64_7, svint64_t, int16_t, -+ svst1h_s64 (p0, x0 + svcntd () * 7, z0), -+ svst1h (p0, x0 + svcntd () * 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1h_s64_8: -+** incb x0, all, mul #2 -+** st1h z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1h_s64_8, svint64_t, int16_t, -+ svst1h_s64 (p0, x0 + svcntd () * 8, z0), -+ svst1h (p0, x0 + svcntd () * 8, z0)) -+ -+/* -+** st1h_s64_m1: -+** st1h z0\.d, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1h_s64_m1, svint64_t, int16_t, -+ svst1h_s64 (p0, x0 - svcntd (), z0), -+ svst1h (p0, x0 - svcntd (), z0)) -+ -+/* -+** st1h_s64_m8: -+** st1h z0\.d, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (st1h_s64_m8, svint64_t, int16_t, -+ svst1h_s64 (p0, x0 - svcntd () * 8, z0), -+ svst1h (p0, x0 - svcntd () * 8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1h_s64_m9: -+** decw x0, all, mul #9 -+** st1h z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1h_s64_m9, svint64_t, int16_t, -+ svst1h_s64 (p0, x0 - svcntd () * 9, z0), -+ svst1h (p0, x0 - svcntd () * 9, z0)) -+ -+/* -+** st1h_vnum_s64_0: -+** st1h z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1h_vnum_s64_0, svint64_t, int16_t, -+ svst1h_vnum_s64 (p0, x0, 0, z0), -+ svst1h_vnum (p0, x0, 0, z0)) -+ -+/* -+** st1h_vnum_s64_1: -+** st1h z0\.d, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1h_vnum_s64_1, svint64_t, int16_t, -+ svst1h_vnum_s64 (p0, x0, 1, z0), -+ svst1h_vnum (p0, x0, 1, z0)) -+ -+/* -+** st1h_vnum_s64_7: -+** st1h z0\.d, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (st1h_vnum_s64_7, svint64_t, int16_t, -+ svst1h_vnum_s64 (p0, x0, 7, z0), -+ svst1h_vnum (p0, x0, 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1h_vnum_s64_8: -+** incb x0, all, mul #2 -+** st1h z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1h_vnum_s64_8, svint64_t, int16_t, -+ svst1h_vnum_s64 (p0, x0, 8, z0), -+ svst1h_vnum (p0, x0, 8, z0)) -+ -+/* -+** st1h_vnum_s64_m1: -+** st1h z0\.d, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1h_vnum_s64_m1, svint64_t, int16_t, -+ svst1h_vnum_s64 (p0, x0, -1, z0), -+ svst1h_vnum (p0, x0, -1, z0)) -+ -+/* -+** st1h_vnum_s64_m8: -+** st1h z0\.d, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (st1h_vnum_s64_m8, svint64_t, int16_t, -+ svst1h_vnum_s64 (p0, x0, -8, z0), -+ svst1h_vnum (p0, x0, -8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1h_vnum_s64_m9: -+** decw x0, all, mul #9 -+** st1h z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1h_vnum_s64_m9, svint64_t, int16_t, -+ svst1h_vnum_s64 (p0, x0, -9, z0), -+ svst1h_vnum (p0, x0, -9, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** st1h_vnum_s64_x1: -+** cntw (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** st1h z0\.d, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (st1h_vnum_s64_x1, svint64_t, int16_t, -+ svst1h_vnum_s64 (p0, x0, x1, z0), -+ svst1h_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_s32.c -new file mode 100644 -index 000000000..2a23d41f3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_s32.c -@@ -0,0 +1,207 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st1h_scatter_s32: -+** st1h z0\.s, p0, \[z1\.s\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_s32, svint32_t, svuint32_t, -+ svst1h_scatter_u32base_s32 (p0, z1, z0), -+ svst1h_scatter (p0, z1, z0)) -+ -+/* -+** st1h_scatter_x0_s32_offset: -+** st1h z0\.s, p0, \[x0, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_x0_s32_offset, svint32_t, svuint32_t, -+ svst1h_scatter_u32base_offset_s32 (p0, z1, x0, z0), -+ svst1h_scatter_offset (p0, z1, x0, z0)) -+ -+/* -+** st1h_scatter_m2_s32_offset: -+** mov (x[0-9]+), #?-2 -+** st1h z0\.s, p0, \[\1, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_m2_s32_offset, svint32_t, svuint32_t, -+ svst1h_scatter_u32base_offset_s32 (p0, z1, -2, z0), -+ svst1h_scatter_offset (p0, z1, -2, z0)) -+ -+/* -+** st1h_scatter_0_s32_offset: -+** st1h z0\.s, p0, \[z1\.s\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_0_s32_offset, svint32_t, svuint32_t, -+ svst1h_scatter_u32base_offset_s32 (p0, z1, 0, z0), -+ svst1h_scatter_offset (p0, z1, 0, z0)) -+ -+/* -+** st1h_scatter_5_s32_offset: -+** mov (x[0-9]+), #?5 -+** st1h z0\.s, p0, \[\1, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_5_s32_offset, svint32_t, svuint32_t, -+ svst1h_scatter_u32base_offset_s32 (p0, z1, 5, z0), -+ svst1h_scatter_offset (p0, z1, 5, z0)) -+ -+/* -+** st1h_scatter_6_s32_offset: -+** st1h z0\.s, p0, \[z1\.s, #6\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_6_s32_offset, svint32_t, svuint32_t, -+ svst1h_scatter_u32base_offset_s32 (p0, z1, 6, z0), -+ svst1h_scatter_offset (p0, z1, 6, z0)) -+ -+/* -+** st1h_scatter_62_s32_offset: -+** st1h z0\.s, p0, \[z1\.s, #62\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_62_s32_offset, svint32_t, svuint32_t, -+ svst1h_scatter_u32base_offset_s32 (p0, z1, 62, z0), -+ svst1h_scatter_offset (p0, z1, 62, z0)) -+ -+/* -+** st1h_scatter_64_s32_offset: -+** mov (x[0-9]+), #?64 -+** st1h z0\.s, p0, \[\1, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_64_s32_offset, svint32_t, svuint32_t, -+ svst1h_scatter_u32base_offset_s32 (p0, z1, 64, z0), -+ svst1h_scatter_offset (p0, z1, 64, z0)) -+ -+/* -+** st1h_scatter_x0_s32_index: -+** lsl (x[0-9]+), x0, #?1 -+** st1h z0\.s, p0, \[\1, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_x0_s32_index, svint32_t, svuint32_t, -+ svst1h_scatter_u32base_index_s32 (p0, z1, x0, z0), -+ svst1h_scatter_index (p0, z1, x0, z0)) -+ -+/* -+** st1h_scatter_m1_s32_index: -+** mov (x[0-9]+), #?-2 -+** st1h z0\.s, p0, \[\1, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_m1_s32_index, svint32_t, svuint32_t, -+ svst1h_scatter_u32base_index_s32 (p0, z1, -1, z0), -+ svst1h_scatter_index (p0, z1, -1, z0)) -+ -+/* -+** st1h_scatter_0_s32_index: -+** st1h z0\.s, p0, \[z1\.s\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_0_s32_index, svint32_t, svuint32_t, -+ svst1h_scatter_u32base_index_s32 (p0, z1, 0, z0), -+ svst1h_scatter_index (p0, z1, 0, z0)) -+ -+/* -+** st1h_scatter_5_s32_index: -+** st1h z0\.s, p0, \[z1\.s, #10\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_5_s32_index, svint32_t, svuint32_t, -+ svst1h_scatter_u32base_index_s32 (p0, z1, 5, z0), -+ svst1h_scatter_index (p0, z1, 5, z0)) -+ -+/* -+** st1h_scatter_31_s32_index: -+** st1h z0\.s, p0, \[z1\.s, #62\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_31_s32_index, svint32_t, svuint32_t, -+ svst1h_scatter_u32base_index_s32 (p0, z1, 31, z0), -+ svst1h_scatter_index (p0, z1, 31, z0)) -+ -+/* -+** st1h_scatter_32_s32_index: -+** mov (x[0-9]+), #?64 -+** st1h z0\.s, p0, \[\1, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_32_s32_index, svint32_t, svuint32_t, -+ svst1h_scatter_u32base_index_s32 (p0, z1, 32, z0), -+ svst1h_scatter_index (p0, z1, 32, z0)) -+ -+/* -+** st1h_scatter_x0_s32_s32offset: -+** st1h z0\.s, p0, \[x0, z1\.s, sxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1h_scatter_x0_s32_s32offset, svint32_t, int16_t, svint32_t, -+ svst1h_scatter_s32offset_s32 (p0, x0, z1, z0), -+ svst1h_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1h_scatter_s32_s32offset: -+** st1h z0\.s, p0, \[x0, z1\.s, sxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1h_scatter_s32_s32offset, svint32_t, int16_t, svint32_t, -+ svst1h_scatter_s32offset_s32 (p0, x0, z1, z0), -+ svst1h_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1h_scatter_x0_s32_u32offset: -+** st1h z0\.s, p0, \[x0, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1h_scatter_x0_s32_u32offset, svint32_t, int16_t, svuint32_t, -+ svst1h_scatter_u32offset_s32 (p0, x0, z1, z0), -+ svst1h_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1h_scatter_s32_u32offset: -+** st1h z0\.s, p0, \[x0, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1h_scatter_s32_u32offset, svint32_t, int16_t, svuint32_t, -+ svst1h_scatter_u32offset_s32 (p0, x0, z1, z0), -+ svst1h_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1h_scatter_x0_s32_s32index: -+** st1h z0\.s, p0, \[x0, z1\.s, sxtw 1\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1h_scatter_x0_s32_s32index, svint32_t, int16_t, svint32_t, -+ svst1h_scatter_s32index_s32 (p0, x0, z1, z0), -+ svst1h_scatter_index (p0, x0, z1, z0)) -+ -+/* -+** st1h_scatter_s32_s32index: -+** st1h z0\.s, p0, \[x0, z1\.s, sxtw 1\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1h_scatter_s32_s32index, svint32_t, int16_t, svint32_t, -+ svst1h_scatter_s32index_s32 (p0, x0, z1, z0), -+ svst1h_scatter_index (p0, x0, z1, z0)) -+ -+/* -+** st1h_scatter_x0_s32_u32index: -+** st1h z0\.s, p0, \[x0, z1\.s, uxtw 1\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1h_scatter_x0_s32_u32index, svint32_t, int16_t, svuint32_t, -+ svst1h_scatter_u32index_s32 (p0, x0, z1, z0), -+ svst1h_scatter_index (p0, x0, z1, z0)) -+ -+/* -+** st1h_scatter_s32_u32index: -+** st1h z0\.s, p0, \[x0, z1\.s, uxtw 1\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1h_scatter_s32_u32index, svint32_t, int16_t, svuint32_t, -+ svst1h_scatter_u32index_s32 (p0, x0, z1, z0), -+ svst1h_scatter_index (p0, x0, z1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_s64.c -new file mode 100644 -index 000000000..6a1adb056 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_s64.c -@@ -0,0 +1,243 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st1h_scatter_s64: -+** st1h z0\.d, p0, \[z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_s64, svint64_t, svuint64_t, -+ svst1h_scatter_u64base_s64 (p0, z1, z0), -+ svst1h_scatter (p0, z1, z0)) -+ -+/* -+** st1h_scatter_x0_s64_offset: -+** st1h z0\.d, p0, \[x0, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_x0_s64_offset, svint64_t, svuint64_t, -+ svst1h_scatter_u64base_offset_s64 (p0, z1, x0, z0), -+ svst1h_scatter_offset (p0, z1, x0, z0)) -+ -+/* -+** st1h_scatter_m2_s64_offset: -+** mov (x[0-9]+), #?-2 -+** st1h z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_m2_s64_offset, svint64_t, svuint64_t, -+ svst1h_scatter_u64base_offset_s64 (p0, z1, -2, z0), -+ svst1h_scatter_offset (p0, z1, -2, z0)) -+ -+/* -+** st1h_scatter_0_s64_offset: -+** st1h z0\.d, p0, \[z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_0_s64_offset, svint64_t, svuint64_t, -+ svst1h_scatter_u64base_offset_s64 (p0, z1, 0, z0), -+ svst1h_scatter_offset (p0, z1, 0, z0)) -+ -+/* -+** st1h_scatter_5_s64_offset: -+** mov (x[0-9]+), #?5 -+** st1h z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_5_s64_offset, svint64_t, svuint64_t, -+ svst1h_scatter_u64base_offset_s64 (p0, z1, 5, z0), -+ svst1h_scatter_offset (p0, z1, 5, z0)) -+ -+/* -+** st1h_scatter_6_s64_offset: -+** st1h z0\.d, p0, \[z1\.d, #6\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_6_s64_offset, svint64_t, svuint64_t, -+ svst1h_scatter_u64base_offset_s64 (p0, z1, 6, z0), -+ svst1h_scatter_offset (p0, z1, 6, z0)) -+ -+/* -+** st1h_scatter_62_s64_offset: -+** st1h z0\.d, p0, \[z1\.d, #62\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_62_s64_offset, svint64_t, svuint64_t, -+ svst1h_scatter_u64base_offset_s64 (p0, z1, 62, z0), -+ svst1h_scatter_offset (p0, z1, 62, z0)) -+ -+/* -+** st1h_scatter_64_s64_offset: -+** mov (x[0-9]+), #?64 -+** st1h z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_64_s64_offset, svint64_t, svuint64_t, -+ svst1h_scatter_u64base_offset_s64 (p0, z1, 64, z0), -+ svst1h_scatter_offset (p0, z1, 64, z0)) -+ -+/* -+** st1h_scatter_x0_s64_index: -+** lsl (x[0-9]+), x0, #?1 -+** st1h z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_x0_s64_index, svint64_t, svuint64_t, -+ svst1h_scatter_u64base_index_s64 (p0, z1, x0, z0), -+ svst1h_scatter_index (p0, z1, x0, z0)) -+ -+/* -+** st1h_scatter_m1_s64_index: -+** mov (x[0-9]+), #?-2 -+** st1h z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_m1_s64_index, svint64_t, svuint64_t, -+ svst1h_scatter_u64base_index_s64 (p0, z1, -1, z0), -+ svst1h_scatter_index (p0, z1, -1, z0)) -+ -+/* -+** st1h_scatter_0_s64_index: -+** st1h z0\.d, p0, \[z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_0_s64_index, svint64_t, svuint64_t, -+ svst1h_scatter_u64base_index_s64 (p0, z1, 0, z0), -+ svst1h_scatter_index (p0, z1, 0, z0)) -+ -+/* -+** st1h_scatter_5_s64_index: -+** st1h z0\.d, p0, \[z1\.d, #10\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_5_s64_index, svint64_t, svuint64_t, -+ svst1h_scatter_u64base_index_s64 (p0, z1, 5, z0), -+ svst1h_scatter_index (p0, z1, 5, z0)) -+ -+/* -+** st1h_scatter_31_s64_index: -+** st1h z0\.d, p0, \[z1\.d, #62\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_31_s64_index, svint64_t, svuint64_t, -+ svst1h_scatter_u64base_index_s64 (p0, z1, 31, z0), -+ svst1h_scatter_index (p0, z1, 31, z0)) -+ -+/* -+** st1h_scatter_32_s64_index: -+** mov (x[0-9]+), #?64 -+** st1h z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_32_s64_index, svint64_t, svuint64_t, -+ svst1h_scatter_u64base_index_s64 (p0, z1, 32, z0), -+ svst1h_scatter_index (p0, z1, 32, z0)) -+ -+/* -+** st1h_scatter_x0_s64_s64offset: -+** st1h z0\.d, p0, \[x0, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1h_scatter_x0_s64_s64offset, svint64_t, int16_t, svint64_t, -+ svst1h_scatter_s64offset_s64 (p0, x0, z1, z0), -+ svst1h_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1h_scatter_s64_s64offset: -+** st1h z0\.d, p0, \[x0, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1h_scatter_s64_s64offset, svint64_t, int16_t, svint64_t, -+ svst1h_scatter_s64offset_s64 (p0, x0, z1, z0), -+ svst1h_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1h_scatter_ext_s64_s64offset: -+** st1h z0\.d, p0, \[x0, z1\.d, sxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1h_scatter_ext_s64_s64offset, svint64_t, int16_t, svint64_t, -+ svst1h_scatter_s64offset_s64 (p0, x0, svextw_s64_x (p0, z1), z0), -+ svst1h_scatter_offset (p0, x0, svextw_x (p0, z1), z0)) -+ -+/* -+** st1h_scatter_x0_s64_u64offset: -+** st1h z0\.d, p0, \[x0, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1h_scatter_x0_s64_u64offset, svint64_t, int16_t, svuint64_t, -+ svst1h_scatter_u64offset_s64 (p0, x0, z1, z0), -+ svst1h_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1h_scatter_s64_u64offset: -+** st1h z0\.d, p0, \[x0, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1h_scatter_s64_u64offset, svint64_t, int16_t, svuint64_t, -+ svst1h_scatter_u64offset_s64 (p0, x0, z1, z0), -+ svst1h_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1h_scatter_ext_s64_u64offset: -+** st1h z0\.d, p0, \[x0, z1\.d, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1h_scatter_ext_s64_u64offset, svint64_t, int16_t, svuint64_t, -+ svst1h_scatter_u64offset_s64 (p0, x0, svextw_u64_x (p0, z1), z0), -+ svst1h_scatter_offset (p0, x0, svextw_x (p0, z1), z0)) -+ -+/* -+** st1h_scatter_x0_s64_s64index: -+** st1h z0\.d, p0, \[x0, z1\.d, lsl 1\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1h_scatter_x0_s64_s64index, svint64_t, int16_t, svint64_t, -+ svst1h_scatter_s64index_s64 (p0, x0, z1, z0), -+ svst1h_scatter_index (p0, x0, z1, z0)) -+ -+/* -+** st1h_scatter_s64_s64index: -+** st1h z0\.d, p0, \[x0, z1\.d, lsl 1\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1h_scatter_s64_s64index, svint64_t, int16_t, svint64_t, -+ svst1h_scatter_s64index_s64 (p0, x0, z1, z0), -+ svst1h_scatter_index (p0, x0, z1, z0)) -+ -+/* -+** st1h_scatter_ext_s64_s64index: -+** st1h z0\.d, p0, \[x0, z1\.d, sxtw 1\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1h_scatter_ext_s64_s64index, svint64_t, int16_t, svint64_t, -+ svst1h_scatter_s64index_s64 (p0, x0, svextw_s64_x (p0, z1), z0), -+ svst1h_scatter_index (p0, x0, svextw_x (p0, z1), z0)) -+ -+/* -+** st1h_scatter_x0_s64_u64index: -+** st1h z0\.d, p0, \[x0, z1\.d, lsl 1\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1h_scatter_x0_s64_u64index, svint64_t, int16_t, svuint64_t, -+ svst1h_scatter_u64index_s64 (p0, x0, z1, z0), -+ svst1h_scatter_index (p0, x0, z1, z0)) -+ -+/* -+** st1h_scatter_s64_u64index: -+** st1h z0\.d, p0, \[x0, z1\.d, lsl 1\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1h_scatter_s64_u64index, svint64_t, int16_t, svuint64_t, -+ svst1h_scatter_u64index_s64 (p0, x0, z1, z0), -+ svst1h_scatter_index (p0, x0, z1, z0)) -+ -+/* -+** st1h_scatter_ext_s64_u64index: -+** st1h z0\.d, p0, \[x0, z1\.d, uxtw 1\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1h_scatter_ext_s64_u64index, svint64_t, int16_t, svuint64_t, -+ svst1h_scatter_u64index_s64 (p0, x0, svextw_u64_x (p0, z1), z0), -+ svst1h_scatter_index (p0, x0, svextw_x (p0, z1), z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_u32.c -new file mode 100644 -index 000000000..12197315d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_u32.c -@@ -0,0 +1,207 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st1h_scatter_u32: -+** st1h z0\.s, p0, \[z1\.s\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_u32, svuint32_t, svuint32_t, -+ svst1h_scatter_u32base_u32 (p0, z1, z0), -+ svst1h_scatter (p0, z1, z0)) -+ -+/* -+** st1h_scatter_x0_u32_offset: -+** st1h z0\.s, p0, \[x0, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_x0_u32_offset, svuint32_t, svuint32_t, -+ svst1h_scatter_u32base_offset_u32 (p0, z1, x0, z0), -+ svst1h_scatter_offset (p0, z1, x0, z0)) -+ -+/* -+** st1h_scatter_m2_u32_offset: -+** mov (x[0-9]+), #?-2 -+** st1h z0\.s, p0, \[\1, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_m2_u32_offset, svuint32_t, svuint32_t, -+ svst1h_scatter_u32base_offset_u32 (p0, z1, -2, z0), -+ svst1h_scatter_offset (p0, z1, -2, z0)) -+ -+/* -+** st1h_scatter_0_u32_offset: -+** st1h z0\.s, p0, \[z1\.s\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_0_u32_offset, svuint32_t, svuint32_t, -+ svst1h_scatter_u32base_offset_u32 (p0, z1, 0, z0), -+ svst1h_scatter_offset (p0, z1, 0, z0)) -+ -+/* -+** st1h_scatter_5_u32_offset: -+** mov (x[0-9]+), #?5 -+** st1h z0\.s, p0, \[\1, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_5_u32_offset, svuint32_t, svuint32_t, -+ svst1h_scatter_u32base_offset_u32 (p0, z1, 5, z0), -+ svst1h_scatter_offset (p0, z1, 5, z0)) -+ -+/* -+** st1h_scatter_6_u32_offset: -+** st1h z0\.s, p0, \[z1\.s, #6\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_6_u32_offset, svuint32_t, svuint32_t, -+ svst1h_scatter_u32base_offset_u32 (p0, z1, 6, z0), -+ svst1h_scatter_offset (p0, z1, 6, z0)) -+ -+/* -+** st1h_scatter_62_u32_offset: -+** st1h z0\.s, p0, \[z1\.s, #62\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_62_u32_offset, svuint32_t, svuint32_t, -+ svst1h_scatter_u32base_offset_u32 (p0, z1, 62, z0), -+ svst1h_scatter_offset (p0, z1, 62, z0)) -+ -+/* -+** st1h_scatter_64_u32_offset: -+** mov (x[0-9]+), #?64 -+** st1h z0\.s, p0, \[\1, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_64_u32_offset, svuint32_t, svuint32_t, -+ svst1h_scatter_u32base_offset_u32 (p0, z1, 64, z0), -+ svst1h_scatter_offset (p0, z1, 64, z0)) -+ -+/* -+** st1h_scatter_x0_u32_index: -+** lsl (x[0-9]+), x0, #?1 -+** st1h z0\.s, p0, \[\1, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_x0_u32_index, svuint32_t, svuint32_t, -+ svst1h_scatter_u32base_index_u32 (p0, z1, x0, z0), -+ svst1h_scatter_index (p0, z1, x0, z0)) -+ -+/* -+** st1h_scatter_m1_u32_index: -+** mov (x[0-9]+), #?-2 -+** st1h z0\.s, p0, \[\1, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_m1_u32_index, svuint32_t, svuint32_t, -+ svst1h_scatter_u32base_index_u32 (p0, z1, -1, z0), -+ svst1h_scatter_index (p0, z1, -1, z0)) -+ -+/* -+** st1h_scatter_0_u32_index: -+** st1h z0\.s, p0, \[z1\.s\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_0_u32_index, svuint32_t, svuint32_t, -+ svst1h_scatter_u32base_index_u32 (p0, z1, 0, z0), -+ svst1h_scatter_index (p0, z1, 0, z0)) -+ -+/* -+** st1h_scatter_5_u32_index: -+** st1h z0\.s, p0, \[z1\.s, #10\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_5_u32_index, svuint32_t, svuint32_t, -+ svst1h_scatter_u32base_index_u32 (p0, z1, 5, z0), -+ svst1h_scatter_index (p0, z1, 5, z0)) -+ -+/* -+** st1h_scatter_31_u32_index: -+** st1h z0\.s, p0, \[z1\.s, #62\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_31_u32_index, svuint32_t, svuint32_t, -+ svst1h_scatter_u32base_index_u32 (p0, z1, 31, z0), -+ svst1h_scatter_index (p0, z1, 31, z0)) -+ -+/* -+** st1h_scatter_32_u32_index: -+** mov (x[0-9]+), #?64 -+** st1h z0\.s, p0, \[\1, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_32_u32_index, svuint32_t, svuint32_t, -+ svst1h_scatter_u32base_index_u32 (p0, z1, 32, z0), -+ svst1h_scatter_index (p0, z1, 32, z0)) -+ -+/* -+** st1h_scatter_x0_u32_s32offset: -+** st1h z0\.s, p0, \[x0, z1\.s, sxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1h_scatter_x0_u32_s32offset, svuint32_t, uint16_t, svint32_t, -+ svst1h_scatter_s32offset_u32 (p0, x0, z1, z0), -+ svst1h_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1h_scatter_u32_s32offset: -+** st1h z0\.s, p0, \[x0, z1\.s, sxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1h_scatter_u32_s32offset, svuint32_t, uint16_t, svint32_t, -+ svst1h_scatter_s32offset_u32 (p0, x0, z1, z0), -+ svst1h_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1h_scatter_x0_u32_u32offset: -+** st1h z0\.s, p0, \[x0, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1h_scatter_x0_u32_u32offset, svuint32_t, uint16_t, svuint32_t, -+ svst1h_scatter_u32offset_u32 (p0, x0, z1, z0), -+ svst1h_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1h_scatter_u32_u32offset: -+** st1h z0\.s, p0, \[x0, z1\.s, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1h_scatter_u32_u32offset, svuint32_t, uint16_t, svuint32_t, -+ svst1h_scatter_u32offset_u32 (p0, x0, z1, z0), -+ svst1h_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1h_scatter_x0_u32_s32index: -+** st1h z0\.s, p0, \[x0, z1\.s, sxtw 1\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1h_scatter_x0_u32_s32index, svuint32_t, uint16_t, svint32_t, -+ svst1h_scatter_s32index_u32 (p0, x0, z1, z0), -+ svst1h_scatter_index (p0, x0, z1, z0)) -+ -+/* -+** st1h_scatter_u32_s32index: -+** st1h z0\.s, p0, \[x0, z1\.s, sxtw 1\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1h_scatter_u32_s32index, svuint32_t, uint16_t, svint32_t, -+ svst1h_scatter_s32index_u32 (p0, x0, z1, z0), -+ svst1h_scatter_index (p0, x0, z1, z0)) -+ -+/* -+** st1h_scatter_x0_u32_u32index: -+** st1h z0\.s, p0, \[x0, z1\.s, uxtw 1\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1h_scatter_x0_u32_u32index, svuint32_t, uint16_t, svuint32_t, -+ svst1h_scatter_u32index_u32 (p0, x0, z1, z0), -+ svst1h_scatter_index (p0, x0, z1, z0)) -+ -+/* -+** st1h_scatter_u32_u32index: -+** st1h z0\.s, p0, \[x0, z1\.s, uxtw 1\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1h_scatter_u32_u32index, svuint32_t, uint16_t, svuint32_t, -+ svst1h_scatter_u32index_u32 (p0, x0, z1, z0), -+ svst1h_scatter_index (p0, x0, z1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_u64.c -new file mode 100644 -index 000000000..7021ea68f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_scatter_u64.c -@@ -0,0 +1,243 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st1h_scatter_u64: -+** st1h z0\.d, p0, \[z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_u64, svuint64_t, svuint64_t, -+ svst1h_scatter_u64base_u64 (p0, z1, z0), -+ svst1h_scatter (p0, z1, z0)) -+ -+/* -+** st1h_scatter_x0_u64_offset: -+** st1h z0\.d, p0, \[x0, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_x0_u64_offset, svuint64_t, svuint64_t, -+ svst1h_scatter_u64base_offset_u64 (p0, z1, x0, z0), -+ svst1h_scatter_offset (p0, z1, x0, z0)) -+ -+/* -+** st1h_scatter_m2_u64_offset: -+** mov (x[0-9]+), #?-2 -+** st1h z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_m2_u64_offset, svuint64_t, svuint64_t, -+ svst1h_scatter_u64base_offset_u64 (p0, z1, -2, z0), -+ svst1h_scatter_offset (p0, z1, -2, z0)) -+ -+/* -+** st1h_scatter_0_u64_offset: -+** st1h z0\.d, p0, \[z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_0_u64_offset, svuint64_t, svuint64_t, -+ svst1h_scatter_u64base_offset_u64 (p0, z1, 0, z0), -+ svst1h_scatter_offset (p0, z1, 0, z0)) -+ -+/* -+** st1h_scatter_5_u64_offset: -+** mov (x[0-9]+), #?5 -+** st1h z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_5_u64_offset, svuint64_t, svuint64_t, -+ svst1h_scatter_u64base_offset_u64 (p0, z1, 5, z0), -+ svst1h_scatter_offset (p0, z1, 5, z0)) -+ -+/* -+** st1h_scatter_6_u64_offset: -+** st1h z0\.d, p0, \[z1\.d, #6\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_6_u64_offset, svuint64_t, svuint64_t, -+ svst1h_scatter_u64base_offset_u64 (p0, z1, 6, z0), -+ svst1h_scatter_offset (p0, z1, 6, z0)) -+ -+/* -+** st1h_scatter_62_u64_offset: -+** st1h z0\.d, p0, \[z1\.d, #62\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_62_u64_offset, svuint64_t, svuint64_t, -+ svst1h_scatter_u64base_offset_u64 (p0, z1, 62, z0), -+ svst1h_scatter_offset (p0, z1, 62, z0)) -+ -+/* -+** st1h_scatter_64_u64_offset: -+** mov (x[0-9]+), #?64 -+** st1h z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_64_u64_offset, svuint64_t, svuint64_t, -+ svst1h_scatter_u64base_offset_u64 (p0, z1, 64, z0), -+ svst1h_scatter_offset (p0, z1, 64, z0)) -+ -+/* -+** st1h_scatter_x0_u64_index: -+** lsl (x[0-9]+), x0, #?1 -+** st1h z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_x0_u64_index, svuint64_t, svuint64_t, -+ svst1h_scatter_u64base_index_u64 (p0, z1, x0, z0), -+ svst1h_scatter_index (p0, z1, x0, z0)) -+ -+/* -+** st1h_scatter_m1_u64_index: -+** mov (x[0-9]+), #?-2 -+** st1h z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_m1_u64_index, svuint64_t, svuint64_t, -+ svst1h_scatter_u64base_index_u64 (p0, z1, -1, z0), -+ svst1h_scatter_index (p0, z1, -1, z0)) -+ -+/* -+** st1h_scatter_0_u64_index: -+** st1h z0\.d, p0, \[z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_0_u64_index, svuint64_t, svuint64_t, -+ svst1h_scatter_u64base_index_u64 (p0, z1, 0, z0), -+ svst1h_scatter_index (p0, z1, 0, z0)) -+ -+/* -+** st1h_scatter_5_u64_index: -+** st1h z0\.d, p0, \[z1\.d, #10\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_5_u64_index, svuint64_t, svuint64_t, -+ svst1h_scatter_u64base_index_u64 (p0, z1, 5, z0), -+ svst1h_scatter_index (p0, z1, 5, z0)) -+ -+/* -+** st1h_scatter_31_u64_index: -+** st1h z0\.d, p0, \[z1\.d, #62\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_31_u64_index, svuint64_t, svuint64_t, -+ svst1h_scatter_u64base_index_u64 (p0, z1, 31, z0), -+ svst1h_scatter_index (p0, z1, 31, z0)) -+ -+/* -+** st1h_scatter_32_u64_index: -+** mov (x[0-9]+), #?64 -+** st1h z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1h_scatter_32_u64_index, svuint64_t, svuint64_t, -+ svst1h_scatter_u64base_index_u64 (p0, z1, 32, z0), -+ svst1h_scatter_index (p0, z1, 32, z0)) -+ -+/* -+** st1h_scatter_x0_u64_s64offset: -+** st1h z0\.d, p0, \[x0, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1h_scatter_x0_u64_s64offset, svuint64_t, uint16_t, svint64_t, -+ svst1h_scatter_s64offset_u64 (p0, x0, z1, z0), -+ svst1h_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1h_scatter_u64_s64offset: -+** st1h z0\.d, p0, \[x0, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1h_scatter_u64_s64offset, svuint64_t, uint16_t, svint64_t, -+ svst1h_scatter_s64offset_u64 (p0, x0, z1, z0), -+ svst1h_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1h_scatter_ext_u64_s64offset: -+** st1h z0\.d, p0, \[x0, z1\.d, sxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1h_scatter_ext_u64_s64offset, svuint64_t, uint16_t, svint64_t, -+ svst1h_scatter_s64offset_u64 (p0, x0, svextw_s64_x (p0, z1), z0), -+ svst1h_scatter_offset (p0, x0, svextw_x (p0, z1), z0)) -+ -+/* -+** st1h_scatter_x0_u64_u64offset: -+** st1h z0\.d, p0, \[x0, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1h_scatter_x0_u64_u64offset, svuint64_t, uint16_t, svuint64_t, -+ svst1h_scatter_u64offset_u64 (p0, x0, z1, z0), -+ svst1h_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1h_scatter_u64_u64offset: -+** st1h z0\.d, p0, \[x0, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1h_scatter_u64_u64offset, svuint64_t, uint16_t, svuint64_t, -+ svst1h_scatter_u64offset_u64 (p0, x0, z1, z0), -+ svst1h_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1h_scatter_ext_u64_u64offset: -+** st1h z0\.d, p0, \[x0, z1\.d, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1h_scatter_ext_u64_u64offset, svuint64_t, uint16_t, svuint64_t, -+ svst1h_scatter_u64offset_u64 (p0, x0, svextw_u64_x (p0, z1), z0), -+ svst1h_scatter_offset (p0, x0, svextw_x (p0, z1), z0)) -+ -+/* -+** st1h_scatter_x0_u64_s64index: -+** st1h z0\.d, p0, \[x0, z1\.d, lsl 1\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1h_scatter_x0_u64_s64index, svuint64_t, uint16_t, svint64_t, -+ svst1h_scatter_s64index_u64 (p0, x0, z1, z0), -+ svst1h_scatter_index (p0, x0, z1, z0)) -+ -+/* -+** st1h_scatter_u64_s64index: -+** st1h z0\.d, p0, \[x0, z1\.d, lsl 1\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1h_scatter_u64_s64index, svuint64_t, uint16_t, svint64_t, -+ svst1h_scatter_s64index_u64 (p0, x0, z1, z0), -+ svst1h_scatter_index (p0, x0, z1, z0)) -+ -+/* -+** st1h_scatter_ext_u64_s64index: -+** st1h z0\.d, p0, \[x0, z1\.d, sxtw 1\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1h_scatter_ext_u64_s64index, svuint64_t, uint16_t, svint64_t, -+ svst1h_scatter_s64index_u64 (p0, x0, svextw_s64_x (p0, z1), z0), -+ svst1h_scatter_index (p0, x0, svextw_x (p0, z1), z0)) -+ -+/* -+** st1h_scatter_x0_u64_u64index: -+** st1h z0\.d, p0, \[x0, z1\.d, lsl 1\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1h_scatter_x0_u64_u64index, svuint64_t, uint16_t, svuint64_t, -+ svst1h_scatter_u64index_u64 (p0, x0, z1, z0), -+ svst1h_scatter_index (p0, x0, z1, z0)) -+ -+/* -+** st1h_scatter_u64_u64index: -+** st1h z0\.d, p0, \[x0, z1\.d, lsl 1\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1h_scatter_u64_u64index, svuint64_t, uint16_t, svuint64_t, -+ svst1h_scatter_u64index_u64 (p0, x0, z1, z0), -+ svst1h_scatter_index (p0, x0, z1, z0)) -+ -+/* -+** st1h_scatter_ext_u64_u64index: -+** st1h z0\.d, p0, \[x0, z1\.d, uxtw 1\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1h_scatter_ext_u64_u64index, svuint64_t, uint16_t, svuint64_t, -+ svst1h_scatter_u64index_u64 (p0, x0, svextw_u64_x (p0, z1), z0), -+ svst1h_scatter_index (p0, x0, svextw_x (p0, z1), z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_u32.c -new file mode 100644 -index 000000000..49111043b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_u32.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st1h_u32_base: -+** st1h z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1h_u32_base, svuint32_t, uint16_t, -+ svst1h_u32 (p0, x0, z0), -+ svst1h (p0, x0, z0)) -+ -+/* -+** st1h_u32_index: -+** st1h z0\.s, p0, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_STORE (st1h_u32_index, svuint32_t, uint16_t, -+ svst1h_u32 (p0, x0 + x1, z0), -+ svst1h (p0, x0 + x1, z0)) -+ -+/* -+** st1h_u32_1: -+** st1h z0\.s, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1h_u32_1, svuint32_t, uint16_t, -+ svst1h_u32 (p0, x0 + svcntw (), z0), -+ svst1h (p0, x0 + svcntw (), z0)) -+ -+/* -+** st1h_u32_7: -+** st1h z0\.s, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (st1h_u32_7, svuint32_t, uint16_t, -+ svst1h_u32 (p0, x0 + svcntw () * 7, z0), -+ svst1h (p0, x0 + svcntw () * 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1h_u32_8: -+** incb x0, all, mul #4 -+** st1h z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1h_u32_8, svuint32_t, uint16_t, -+ svst1h_u32 (p0, x0 + svcntw () * 8, z0), -+ svst1h (p0, x0 + svcntw () * 8, z0)) -+ -+/* -+** st1h_u32_m1: -+** st1h z0\.s, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1h_u32_m1, svuint32_t, uint16_t, -+ svst1h_u32 (p0, x0 - svcntw (), z0), -+ svst1h (p0, x0 - svcntw (), z0)) -+ -+/* -+** st1h_u32_m8: -+** st1h z0\.s, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (st1h_u32_m8, svuint32_t, uint16_t, -+ svst1h_u32 (p0, x0 - svcntw () * 8, z0), -+ svst1h (p0, x0 - svcntw () * 8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1h_u32_m9: -+** dech x0, all, mul #9 -+** st1h z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1h_u32_m9, svuint32_t, uint16_t, -+ svst1h_u32 (p0, x0 - svcntw () * 9, z0), -+ svst1h (p0, x0 - svcntw () * 9, z0)) -+ -+/* -+** st1h_vnum_u32_0: -+** st1h z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1h_vnum_u32_0, svuint32_t, uint16_t, -+ svst1h_vnum_u32 (p0, x0, 0, z0), -+ svst1h_vnum (p0, x0, 0, z0)) -+ -+/* -+** st1h_vnum_u32_1: -+** st1h z0\.s, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1h_vnum_u32_1, svuint32_t, uint16_t, -+ svst1h_vnum_u32 (p0, x0, 1, z0), -+ svst1h_vnum (p0, x0, 1, z0)) -+ -+/* -+** st1h_vnum_u32_7: -+** st1h z0\.s, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (st1h_vnum_u32_7, svuint32_t, uint16_t, -+ svst1h_vnum_u32 (p0, x0, 7, z0), -+ svst1h_vnum (p0, x0, 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1h_vnum_u32_8: -+** incb x0, all, mul #4 -+** st1h z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1h_vnum_u32_8, svuint32_t, uint16_t, -+ svst1h_vnum_u32 (p0, x0, 8, z0), -+ svst1h_vnum (p0, x0, 8, z0)) -+ -+/* -+** st1h_vnum_u32_m1: -+** st1h z0\.s, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1h_vnum_u32_m1, svuint32_t, uint16_t, -+ svst1h_vnum_u32 (p0, x0, -1, z0), -+ svst1h_vnum (p0, x0, -1, z0)) -+ -+/* -+** st1h_vnum_u32_m8: -+** st1h z0\.s, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (st1h_vnum_u32_m8, svuint32_t, uint16_t, -+ svst1h_vnum_u32 (p0, x0, -8, z0), -+ svst1h_vnum (p0, x0, -8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1h_vnum_u32_m9: -+** dech x0, all, mul #9 -+** st1h z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1h_vnum_u32_m9, svuint32_t, uint16_t, -+ svst1h_vnum_u32 (p0, x0, -9, z0), -+ svst1h_vnum (p0, x0, -9, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** st1h_vnum_u32_x1: -+** cnth (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** st1h z0\.s, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (st1h_vnum_u32_x1, svuint32_t, uint16_t, -+ svst1h_vnum_u32 (p0, x0, x1, z0), -+ svst1h_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_u64.c -new file mode 100644 -index 000000000..448cadb49 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1h_u64.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st1h_u64_base: -+** st1h z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1h_u64_base, svuint64_t, uint16_t, -+ svst1h_u64 (p0, x0, z0), -+ svst1h (p0, x0, z0)) -+ -+/* -+** st1h_u64_index: -+** st1h z0\.d, p0, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_STORE (st1h_u64_index, svuint64_t, uint16_t, -+ svst1h_u64 (p0, x0 + x1, z0), -+ svst1h (p0, x0 + x1, z0)) -+ -+/* -+** st1h_u64_1: -+** st1h z0\.d, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1h_u64_1, svuint64_t, uint16_t, -+ svst1h_u64 (p0, x0 + svcntd (), z0), -+ svst1h (p0, x0 + svcntd (), z0)) -+ -+/* -+** st1h_u64_7: -+** st1h z0\.d, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (st1h_u64_7, svuint64_t, uint16_t, -+ svst1h_u64 (p0, x0 + svcntd () * 7, z0), -+ svst1h (p0, x0 + svcntd () * 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1h_u64_8: -+** incb x0, all, mul #2 -+** st1h z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1h_u64_8, svuint64_t, uint16_t, -+ svst1h_u64 (p0, x0 + svcntd () * 8, z0), -+ svst1h (p0, x0 + svcntd () * 8, z0)) -+ -+/* -+** st1h_u64_m1: -+** st1h z0\.d, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1h_u64_m1, svuint64_t, uint16_t, -+ svst1h_u64 (p0, x0 - svcntd (), z0), -+ svst1h (p0, x0 - svcntd (), z0)) -+ -+/* -+** st1h_u64_m8: -+** st1h z0\.d, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (st1h_u64_m8, svuint64_t, uint16_t, -+ svst1h_u64 (p0, x0 - svcntd () * 8, z0), -+ svst1h (p0, x0 - svcntd () * 8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1h_u64_m9: -+** decw x0, all, mul #9 -+** st1h z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1h_u64_m9, svuint64_t, uint16_t, -+ svst1h_u64 (p0, x0 - svcntd () * 9, z0), -+ svst1h (p0, x0 - svcntd () * 9, z0)) -+ -+/* -+** st1h_vnum_u64_0: -+** st1h z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1h_vnum_u64_0, svuint64_t, uint16_t, -+ svst1h_vnum_u64 (p0, x0, 0, z0), -+ svst1h_vnum (p0, x0, 0, z0)) -+ -+/* -+** st1h_vnum_u64_1: -+** st1h z0\.d, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1h_vnum_u64_1, svuint64_t, uint16_t, -+ svst1h_vnum_u64 (p0, x0, 1, z0), -+ svst1h_vnum (p0, x0, 1, z0)) -+ -+/* -+** st1h_vnum_u64_7: -+** st1h z0\.d, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (st1h_vnum_u64_7, svuint64_t, uint16_t, -+ svst1h_vnum_u64 (p0, x0, 7, z0), -+ svst1h_vnum (p0, x0, 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1h_vnum_u64_8: -+** incb x0, all, mul #2 -+** st1h z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1h_vnum_u64_8, svuint64_t, uint16_t, -+ svst1h_vnum_u64 (p0, x0, 8, z0), -+ svst1h_vnum (p0, x0, 8, z0)) -+ -+/* -+** st1h_vnum_u64_m1: -+** st1h z0\.d, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1h_vnum_u64_m1, svuint64_t, uint16_t, -+ svst1h_vnum_u64 (p0, x0, -1, z0), -+ svst1h_vnum (p0, x0, -1, z0)) -+ -+/* -+** st1h_vnum_u64_m8: -+** st1h z0\.d, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (st1h_vnum_u64_m8, svuint64_t, uint16_t, -+ svst1h_vnum_u64 (p0, x0, -8, z0), -+ svst1h_vnum (p0, x0, -8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1h_vnum_u64_m9: -+** decw x0, all, mul #9 -+** st1h z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1h_vnum_u64_m9, svuint64_t, uint16_t, -+ svst1h_vnum_u64 (p0, x0, -9, z0), -+ svst1h_vnum (p0, x0, -9, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** st1h_vnum_u64_x1: -+** cntw (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** st1h z0\.d, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (st1h_vnum_u64_x1, svuint64_t, uint16_t, -+ svst1h_vnum_u64 (p0, x0, x1, z0), -+ svst1h_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1w_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1w_s64.c -new file mode 100644 -index 000000000..0893ce926 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1w_s64.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st1w_s64_base: -+** st1w z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1w_s64_base, svint64_t, int32_t, -+ svst1w_s64 (p0, x0, z0), -+ svst1w (p0, x0, z0)) -+ -+/* -+** st1w_s64_index: -+** st1w z0\.d, p0, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_STORE (st1w_s64_index, svint64_t, int32_t, -+ svst1w_s64 (p0, x0 + x1, z0), -+ svst1w (p0, x0 + x1, z0)) -+ -+/* -+** st1w_s64_1: -+** st1w z0\.d, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1w_s64_1, svint64_t, int32_t, -+ svst1w_s64 (p0, x0 + svcntd (), z0), -+ svst1w (p0, x0 + svcntd (), z0)) -+ -+/* -+** st1w_s64_7: -+** st1w z0\.d, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (st1w_s64_7, svint64_t, int32_t, -+ svst1w_s64 (p0, x0 + svcntd () * 7, z0), -+ svst1w (p0, x0 + svcntd () * 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1w_s64_8: -+** incb x0, all, mul #4 -+** st1w z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1w_s64_8, svint64_t, int32_t, -+ svst1w_s64 (p0, x0 + svcntd () * 8, z0), -+ svst1w (p0, x0 + svcntd () * 8, z0)) -+ -+/* -+** st1w_s64_m1: -+** st1w z0\.d, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1w_s64_m1, svint64_t, int32_t, -+ svst1w_s64 (p0, x0 - svcntd (), z0), -+ svst1w (p0, x0 - svcntd (), z0)) -+ -+/* -+** st1w_s64_m8: -+** st1w z0\.d, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (st1w_s64_m8, svint64_t, int32_t, -+ svst1w_s64 (p0, x0 - svcntd () * 8, z0), -+ svst1w (p0, x0 - svcntd () * 8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1w_s64_m9: -+** dech x0, all, mul #9 -+** st1w z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1w_s64_m9, svint64_t, int32_t, -+ svst1w_s64 (p0, x0 - svcntd () * 9, z0), -+ svst1w (p0, x0 - svcntd () * 9, z0)) -+ -+/* -+** st1w_vnum_s64_0: -+** st1w z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1w_vnum_s64_0, svint64_t, int32_t, -+ svst1w_vnum_s64 (p0, x0, 0, z0), -+ svst1w_vnum (p0, x0, 0, z0)) -+ -+/* -+** st1w_vnum_s64_1: -+** st1w z0\.d, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1w_vnum_s64_1, svint64_t, int32_t, -+ svst1w_vnum_s64 (p0, x0, 1, z0), -+ svst1w_vnum (p0, x0, 1, z0)) -+ -+/* -+** st1w_vnum_s64_7: -+** st1w z0\.d, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (st1w_vnum_s64_7, svint64_t, int32_t, -+ svst1w_vnum_s64 (p0, x0, 7, z0), -+ svst1w_vnum (p0, x0, 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1w_vnum_s64_8: -+** incb x0, all, mul #4 -+** st1w z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1w_vnum_s64_8, svint64_t, int32_t, -+ svst1w_vnum_s64 (p0, x0, 8, z0), -+ svst1w_vnum (p0, x0, 8, z0)) -+ -+/* -+** st1w_vnum_s64_m1: -+** st1w z0\.d, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1w_vnum_s64_m1, svint64_t, int32_t, -+ svst1w_vnum_s64 (p0, x0, -1, z0), -+ svst1w_vnum (p0, x0, -1, z0)) -+ -+/* -+** st1w_vnum_s64_m8: -+** st1w z0\.d, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (st1w_vnum_s64_m8, svint64_t, int32_t, -+ svst1w_vnum_s64 (p0, x0, -8, z0), -+ svst1w_vnum (p0, x0, -8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1w_vnum_s64_m9: -+** dech x0, all, mul #9 -+** st1w z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1w_vnum_s64_m9, svint64_t, int32_t, -+ svst1w_vnum_s64 (p0, x0, -9, z0), -+ svst1w_vnum (p0, x0, -9, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** st1w_vnum_s64_x1: -+** cnth (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** st1w z0\.d, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (st1w_vnum_s64_x1, svint64_t, int32_t, -+ svst1w_vnum_s64 (p0, x0, x1, z0), -+ svst1w_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1w_scatter_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1w_scatter_s64.c -new file mode 100644 -index 000000000..2363f592b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1w_scatter_s64.c -@@ -0,0 +1,263 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st1w_scatter_s64: -+** st1w z0\.d, p0, \[z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1w_scatter_s64, svint64_t, svuint64_t, -+ svst1w_scatter_u64base_s64 (p0, z1, z0), -+ svst1w_scatter (p0, z1, z0)) -+ -+/* -+** st1w_scatter_x0_s64_offset: -+** st1w z0\.d, p0, \[x0, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1w_scatter_x0_s64_offset, svint64_t, svuint64_t, -+ svst1w_scatter_u64base_offset_s64 (p0, z1, x0, z0), -+ svst1w_scatter_offset (p0, z1, x0, z0)) -+ -+/* -+** st1w_scatter_m4_s64_offset: -+** mov (x[0-9]+), #?-4 -+** st1w z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1w_scatter_m4_s64_offset, svint64_t, svuint64_t, -+ svst1w_scatter_u64base_offset_s64 (p0, z1, -4, z0), -+ svst1w_scatter_offset (p0, z1, -4, z0)) -+ -+/* -+** st1w_scatter_0_s64_offset: -+** st1w z0\.d, p0, \[z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1w_scatter_0_s64_offset, svint64_t, svuint64_t, -+ svst1w_scatter_u64base_offset_s64 (p0, z1, 0, z0), -+ svst1w_scatter_offset (p0, z1, 0, z0)) -+ -+/* -+** st1w_scatter_5_s64_offset: -+** mov (x[0-9]+), #?5 -+** st1w z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1w_scatter_5_s64_offset, svint64_t, svuint64_t, -+ svst1w_scatter_u64base_offset_s64 (p0, z1, 5, z0), -+ svst1w_scatter_offset (p0, z1, 5, z0)) -+ -+/* -+** st1w_scatter_6_s64_offset: -+** mov (x[0-9]+), #?6 -+** st1w z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1w_scatter_6_s64_offset, svint64_t, svuint64_t, -+ svst1w_scatter_u64base_offset_s64 (p0, z1, 6, z0), -+ svst1w_scatter_offset (p0, z1, 6, z0)) -+ -+/* -+** st1w_scatter_7_s64_offset: -+** mov (x[0-9]+), #?7 -+** st1w z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1w_scatter_7_s64_offset, svint64_t, svuint64_t, -+ svst1w_scatter_u64base_offset_s64 (p0, z1, 7, z0), -+ svst1w_scatter_offset (p0, z1, 7, z0)) -+ -+/* -+** st1w_scatter_8_s64_offset: -+** st1w z0\.d, p0, \[z1\.d, #8\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1w_scatter_8_s64_offset, svint64_t, svuint64_t, -+ svst1w_scatter_u64base_offset_s64 (p0, z1, 8, z0), -+ svst1w_scatter_offset (p0, z1, 8, z0)) -+ -+/* -+** st1w_scatter_124_s64_offset: -+** st1w z0\.d, p0, \[z1\.d, #124\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1w_scatter_124_s64_offset, svint64_t, svuint64_t, -+ svst1w_scatter_u64base_offset_s64 (p0, z1, 124, z0), -+ svst1w_scatter_offset (p0, z1, 124, z0)) -+ -+/* -+** st1w_scatter_128_s64_offset: -+** mov (x[0-9]+), #?128 -+** st1w z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1w_scatter_128_s64_offset, svint64_t, svuint64_t, -+ svst1w_scatter_u64base_offset_s64 (p0, z1, 128, z0), -+ svst1w_scatter_offset (p0, z1, 128, z0)) -+ -+/* -+** st1w_scatter_x0_s64_index: -+** lsl (x[0-9]+), x0, #?2 -+** st1w z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1w_scatter_x0_s64_index, svint64_t, svuint64_t, -+ svst1w_scatter_u64base_index_s64 (p0, z1, x0, z0), -+ svst1w_scatter_index (p0, z1, x0, z0)) -+ -+/* -+** st1w_scatter_m1_s64_index: -+** mov (x[0-9]+), #?-4 -+** st1w z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1w_scatter_m1_s64_index, svint64_t, svuint64_t, -+ svst1w_scatter_u64base_index_s64 (p0, z1, -1, z0), -+ svst1w_scatter_index (p0, z1, -1, z0)) -+ -+/* -+** st1w_scatter_0_s64_index: -+** st1w z0\.d, p0, \[z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1w_scatter_0_s64_index, svint64_t, svuint64_t, -+ svst1w_scatter_u64base_index_s64 (p0, z1, 0, z0), -+ svst1w_scatter_index (p0, z1, 0, z0)) -+ -+/* -+** st1w_scatter_5_s64_index: -+** st1w z0\.d, p0, \[z1\.d, #20\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1w_scatter_5_s64_index, svint64_t, svuint64_t, -+ svst1w_scatter_u64base_index_s64 (p0, z1, 5, z0), -+ svst1w_scatter_index (p0, z1, 5, z0)) -+ -+/* -+** st1w_scatter_31_s64_index: -+** st1w z0\.d, p0, \[z1\.d, #124\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1w_scatter_31_s64_index, svint64_t, svuint64_t, -+ svst1w_scatter_u64base_index_s64 (p0, z1, 31, z0), -+ svst1w_scatter_index (p0, z1, 31, z0)) -+ -+/* -+** st1w_scatter_32_s64_index: -+** mov (x[0-9]+), #?128 -+** st1w z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1w_scatter_32_s64_index, svint64_t, svuint64_t, -+ svst1w_scatter_u64base_index_s64 (p0, z1, 32, z0), -+ svst1w_scatter_index (p0, z1, 32, z0)) -+ -+/* -+** st1w_scatter_x0_s64_s64offset: -+** st1w z0\.d, p0, \[x0, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1w_scatter_x0_s64_s64offset, svint64_t, int32_t, svint64_t, -+ svst1w_scatter_s64offset_s64 (p0, x0, z1, z0), -+ svst1w_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1w_scatter_s64_s64offset: -+** st1w z0\.d, p0, \[x0, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1w_scatter_s64_s64offset, svint64_t, int32_t, svint64_t, -+ svst1w_scatter_s64offset_s64 (p0, x0, z1, z0), -+ svst1w_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1w_scatter_ext_s64_s64offset: -+** st1w z0\.d, p0, \[x0, z1\.d, sxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1w_scatter_ext_s64_s64offset, svint64_t, int32_t, svint64_t, -+ svst1w_scatter_s64offset_s64 (p0, x0, svextw_s64_x (p0, z1), z0), -+ svst1w_scatter_offset (p0, x0, svextw_x (p0, z1), z0)) -+ -+/* -+** st1w_scatter_x0_s64_u64offset: -+** st1w z0\.d, p0, \[x0, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1w_scatter_x0_s64_u64offset, svint64_t, int32_t, svuint64_t, -+ svst1w_scatter_u64offset_s64 (p0, x0, z1, z0), -+ svst1w_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1w_scatter_s64_u64offset: -+** st1w z0\.d, p0, \[x0, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1w_scatter_s64_u64offset, svint64_t, int32_t, svuint64_t, -+ svst1w_scatter_u64offset_s64 (p0, x0, z1, z0), -+ svst1w_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1w_scatter_ext_s64_u64offset: -+** st1w z0\.d, p0, \[x0, z1\.d, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1w_scatter_ext_s64_u64offset, svint64_t, int32_t, svuint64_t, -+ svst1w_scatter_u64offset_s64 (p0, x0, svextw_u64_x (p0, z1), z0), -+ svst1w_scatter_offset (p0, x0, svextw_x (p0, z1), z0)) -+ -+/* -+** st1w_scatter_x0_s64_s64index: -+** st1w z0\.d, p0, \[x0, z1\.d, lsl 2\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1w_scatter_x0_s64_s64index, svint64_t, int32_t, svint64_t, -+ svst1w_scatter_s64index_s64 (p0, x0, z1, z0), -+ svst1w_scatter_index (p0, x0, z1, z0)) -+ -+/* -+** st1w_scatter_s64_s64index: -+** st1w z0\.d, p0, \[x0, z1\.d, lsl 2\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1w_scatter_s64_s64index, svint64_t, int32_t, svint64_t, -+ svst1w_scatter_s64index_s64 (p0, x0, z1, z0), -+ svst1w_scatter_index (p0, x0, z1, z0)) -+ -+/* -+** st1w_scatter_ext_s64_s64index: -+** st1w z0\.d, p0, \[x0, z1\.d, sxtw 2\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1w_scatter_ext_s64_s64index, svint64_t, int32_t, svint64_t, -+ svst1w_scatter_s64index_s64 (p0, x0, svextw_s64_x (p0, z1), z0), -+ svst1w_scatter_index (p0, x0, svextw_x (p0, z1), z0)) -+ -+/* -+** st1w_scatter_x0_s64_u64index: -+** st1w z0\.d, p0, \[x0, z1\.d, lsl 2\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1w_scatter_x0_s64_u64index, svint64_t, int32_t, svuint64_t, -+ svst1w_scatter_u64index_s64 (p0, x0, z1, z0), -+ svst1w_scatter_index (p0, x0, z1, z0)) -+ -+/* -+** st1w_scatter_s64_u64index: -+** st1w z0\.d, p0, \[x0, z1\.d, lsl 2\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1w_scatter_s64_u64index, svint64_t, int32_t, svuint64_t, -+ svst1w_scatter_u64index_s64 (p0, x0, z1, z0), -+ svst1w_scatter_index (p0, x0, z1, z0)) -+ -+/* -+** st1w_scatter_ext_s64_u64index: -+** st1w z0\.d, p0, \[x0, z1\.d, uxtw 2\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1w_scatter_ext_s64_u64index, svint64_t, int32_t, svuint64_t, -+ svst1w_scatter_u64index_s64 (p0, x0, svextw_u64_x (p0, z1), z0), -+ svst1w_scatter_index (p0, x0, svextw_x (p0, z1), z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1w_scatter_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1w_scatter_u64.c -new file mode 100644 -index 000000000..767c009b4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1w_scatter_u64.c -@@ -0,0 +1,263 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st1w_scatter_u64: -+** st1w z0\.d, p0, \[z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1w_scatter_u64, svuint64_t, svuint64_t, -+ svst1w_scatter_u64base_u64 (p0, z1, z0), -+ svst1w_scatter (p0, z1, z0)) -+ -+/* -+** st1w_scatter_x0_u64_offset: -+** st1w z0\.d, p0, \[x0, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1w_scatter_x0_u64_offset, svuint64_t, svuint64_t, -+ svst1w_scatter_u64base_offset_u64 (p0, z1, x0, z0), -+ svst1w_scatter_offset (p0, z1, x0, z0)) -+ -+/* -+** st1w_scatter_m4_u64_offset: -+** mov (x[0-9]+), #?-4 -+** st1w z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1w_scatter_m4_u64_offset, svuint64_t, svuint64_t, -+ svst1w_scatter_u64base_offset_u64 (p0, z1, -4, z0), -+ svst1w_scatter_offset (p0, z1, -4, z0)) -+ -+/* -+** st1w_scatter_0_u64_offset: -+** st1w z0\.d, p0, \[z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1w_scatter_0_u64_offset, svuint64_t, svuint64_t, -+ svst1w_scatter_u64base_offset_u64 (p0, z1, 0, z0), -+ svst1w_scatter_offset (p0, z1, 0, z0)) -+ -+/* -+** st1w_scatter_5_u64_offset: -+** mov (x[0-9]+), #?5 -+** st1w z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1w_scatter_5_u64_offset, svuint64_t, svuint64_t, -+ svst1w_scatter_u64base_offset_u64 (p0, z1, 5, z0), -+ svst1w_scatter_offset (p0, z1, 5, z0)) -+ -+/* -+** st1w_scatter_6_u64_offset: -+** mov (x[0-9]+), #?6 -+** st1w z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1w_scatter_6_u64_offset, svuint64_t, svuint64_t, -+ svst1w_scatter_u64base_offset_u64 (p0, z1, 6, z0), -+ svst1w_scatter_offset (p0, z1, 6, z0)) -+ -+/* -+** st1w_scatter_7_u64_offset: -+** mov (x[0-9]+), #?7 -+** st1w z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1w_scatter_7_u64_offset, svuint64_t, svuint64_t, -+ svst1w_scatter_u64base_offset_u64 (p0, z1, 7, z0), -+ svst1w_scatter_offset (p0, z1, 7, z0)) -+ -+/* -+** st1w_scatter_8_u64_offset: -+** st1w z0\.d, p0, \[z1\.d, #8\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1w_scatter_8_u64_offset, svuint64_t, svuint64_t, -+ svst1w_scatter_u64base_offset_u64 (p0, z1, 8, z0), -+ svst1w_scatter_offset (p0, z1, 8, z0)) -+ -+/* -+** st1w_scatter_124_u64_offset: -+** st1w z0\.d, p0, \[z1\.d, #124\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1w_scatter_124_u64_offset, svuint64_t, svuint64_t, -+ svst1w_scatter_u64base_offset_u64 (p0, z1, 124, z0), -+ svst1w_scatter_offset (p0, z1, 124, z0)) -+ -+/* -+** st1w_scatter_128_u64_offset: -+** mov (x[0-9]+), #?128 -+** st1w z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1w_scatter_128_u64_offset, svuint64_t, svuint64_t, -+ svst1w_scatter_u64base_offset_u64 (p0, z1, 128, z0), -+ svst1w_scatter_offset (p0, z1, 128, z0)) -+ -+/* -+** st1w_scatter_x0_u64_index: -+** lsl (x[0-9]+), x0, #?2 -+** st1w z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1w_scatter_x0_u64_index, svuint64_t, svuint64_t, -+ svst1w_scatter_u64base_index_u64 (p0, z1, x0, z0), -+ svst1w_scatter_index (p0, z1, x0, z0)) -+ -+/* -+** st1w_scatter_m1_u64_index: -+** mov (x[0-9]+), #?-4 -+** st1w z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1w_scatter_m1_u64_index, svuint64_t, svuint64_t, -+ svst1w_scatter_u64base_index_u64 (p0, z1, -1, z0), -+ svst1w_scatter_index (p0, z1, -1, z0)) -+ -+/* -+** st1w_scatter_0_u64_index: -+** st1w z0\.d, p0, \[z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1w_scatter_0_u64_index, svuint64_t, svuint64_t, -+ svst1w_scatter_u64base_index_u64 (p0, z1, 0, z0), -+ svst1w_scatter_index (p0, z1, 0, z0)) -+ -+/* -+** st1w_scatter_5_u64_index: -+** st1w z0\.d, p0, \[z1\.d, #20\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1w_scatter_5_u64_index, svuint64_t, svuint64_t, -+ svst1w_scatter_u64base_index_u64 (p0, z1, 5, z0), -+ svst1w_scatter_index (p0, z1, 5, z0)) -+ -+/* -+** st1w_scatter_31_u64_index: -+** st1w z0\.d, p0, \[z1\.d, #124\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1w_scatter_31_u64_index, svuint64_t, svuint64_t, -+ svst1w_scatter_u64base_index_u64 (p0, z1, 31, z0), -+ svst1w_scatter_index (p0, z1, 31, z0)) -+ -+/* -+** st1w_scatter_32_u64_index: -+** mov (x[0-9]+), #?128 -+** st1w z0\.d, p0, \[\1, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_ZS (st1w_scatter_32_u64_index, svuint64_t, svuint64_t, -+ svst1w_scatter_u64base_index_u64 (p0, z1, 32, z0), -+ svst1w_scatter_index (p0, z1, 32, z0)) -+ -+/* -+** st1w_scatter_x0_u64_s64offset: -+** st1w z0\.d, p0, \[x0, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1w_scatter_x0_u64_s64offset, svuint64_t, uint32_t, svint64_t, -+ svst1w_scatter_s64offset_u64 (p0, x0, z1, z0), -+ svst1w_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1w_scatter_u64_s64offset: -+** st1w z0\.d, p0, \[x0, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1w_scatter_u64_s64offset, svuint64_t, uint32_t, svint64_t, -+ svst1w_scatter_s64offset_u64 (p0, x0, z1, z0), -+ svst1w_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1w_scatter_ext_u64_s64offset: -+** st1w z0\.d, p0, \[x0, z1\.d, sxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1w_scatter_ext_u64_s64offset, svuint64_t, uint32_t, svint64_t, -+ svst1w_scatter_s64offset_u64 (p0, x0, svextw_s64_x (p0, z1), z0), -+ svst1w_scatter_offset (p0, x0, svextw_x (p0, z1), z0)) -+ -+/* -+** st1w_scatter_x0_u64_u64offset: -+** st1w z0\.d, p0, \[x0, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1w_scatter_x0_u64_u64offset, svuint64_t, uint32_t, svuint64_t, -+ svst1w_scatter_u64offset_u64 (p0, x0, z1, z0), -+ svst1w_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1w_scatter_u64_u64offset: -+** st1w z0\.d, p0, \[x0, z1\.d\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1w_scatter_u64_u64offset, svuint64_t, uint32_t, svuint64_t, -+ svst1w_scatter_u64offset_u64 (p0, x0, z1, z0), -+ svst1w_scatter_offset (p0, x0, z1, z0)) -+ -+/* -+** st1w_scatter_ext_u64_u64offset: -+** st1w z0\.d, p0, \[x0, z1\.d, uxtw\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1w_scatter_ext_u64_u64offset, svuint64_t, uint32_t, svuint64_t, -+ svst1w_scatter_u64offset_u64 (p0, x0, svextw_u64_x (p0, z1), z0), -+ svst1w_scatter_offset (p0, x0, svextw_x (p0, z1), z0)) -+ -+/* -+** st1w_scatter_x0_u64_s64index: -+** st1w z0\.d, p0, \[x0, z1\.d, lsl 2\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1w_scatter_x0_u64_s64index, svuint64_t, uint32_t, svint64_t, -+ svst1w_scatter_s64index_u64 (p0, x0, z1, z0), -+ svst1w_scatter_index (p0, x0, z1, z0)) -+ -+/* -+** st1w_scatter_u64_s64index: -+** st1w z0\.d, p0, \[x0, z1\.d, lsl 2\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1w_scatter_u64_s64index, svuint64_t, uint32_t, svint64_t, -+ svst1w_scatter_s64index_u64 (p0, x0, z1, z0), -+ svst1w_scatter_index (p0, x0, z1, z0)) -+ -+/* -+** st1w_scatter_ext_u64_s64index: -+** st1w z0\.d, p0, \[x0, z1\.d, sxtw 2\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1w_scatter_ext_u64_s64index, svuint64_t, uint32_t, svint64_t, -+ svst1w_scatter_s64index_u64 (p0, x0, svextw_s64_x (p0, z1), z0), -+ svst1w_scatter_index (p0, x0, svextw_x (p0, z1), z0)) -+ -+/* -+** st1w_scatter_x0_u64_u64index: -+** st1w z0\.d, p0, \[x0, z1\.d, lsl 2\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1w_scatter_x0_u64_u64index, svuint64_t, uint32_t, svuint64_t, -+ svst1w_scatter_u64index_u64 (p0, x0, z1, z0), -+ svst1w_scatter_index (p0, x0, z1, z0)) -+ -+/* -+** st1w_scatter_u64_u64index: -+** st1w z0\.d, p0, \[x0, z1\.d, lsl 2\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1w_scatter_u64_u64index, svuint64_t, uint32_t, svuint64_t, -+ svst1w_scatter_u64index_u64 (p0, x0, z1, z0), -+ svst1w_scatter_index (p0, x0, z1, z0)) -+ -+/* -+** st1w_scatter_ext_u64_u64index: -+** st1w z0\.d, p0, \[x0, z1\.d, uxtw 2\] -+** ret -+*/ -+TEST_STORE_SCATTER_SZ (st1w_scatter_ext_u64_u64index, svuint64_t, uint32_t, svuint64_t, -+ svst1w_scatter_u64index_u64 (p0, x0, svextw_u64_x (p0, z1), z0), -+ svst1w_scatter_index (p0, x0, svextw_x (p0, z1), z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1w_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1w_u64.c -new file mode 100644 -index 000000000..882abebbb ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st1w_u64.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st1w_u64_base: -+** st1w z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1w_u64_base, svuint64_t, uint32_t, -+ svst1w_u64 (p0, x0, z0), -+ svst1w (p0, x0, z0)) -+ -+/* -+** st1w_u64_index: -+** st1w z0\.d, p0, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_STORE (st1w_u64_index, svuint64_t, uint32_t, -+ svst1w_u64 (p0, x0 + x1, z0), -+ svst1w (p0, x0 + x1, z0)) -+ -+/* -+** st1w_u64_1: -+** st1w z0\.d, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1w_u64_1, svuint64_t, uint32_t, -+ svst1w_u64 (p0, x0 + svcntd (), z0), -+ svst1w (p0, x0 + svcntd (), z0)) -+ -+/* -+** st1w_u64_7: -+** st1w z0\.d, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (st1w_u64_7, svuint64_t, uint32_t, -+ svst1w_u64 (p0, x0 + svcntd () * 7, z0), -+ svst1w (p0, x0 + svcntd () * 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1w_u64_8: -+** incb x0, all, mul #4 -+** st1w z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1w_u64_8, svuint64_t, uint32_t, -+ svst1w_u64 (p0, x0 + svcntd () * 8, z0), -+ svst1w (p0, x0 + svcntd () * 8, z0)) -+ -+/* -+** st1w_u64_m1: -+** st1w z0\.d, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1w_u64_m1, svuint64_t, uint32_t, -+ svst1w_u64 (p0, x0 - svcntd (), z0), -+ svst1w (p0, x0 - svcntd (), z0)) -+ -+/* -+** st1w_u64_m8: -+** st1w z0\.d, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (st1w_u64_m8, svuint64_t, uint32_t, -+ svst1w_u64 (p0, x0 - svcntd () * 8, z0), -+ svst1w (p0, x0 - svcntd () * 8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1w_u64_m9: -+** dech x0, all, mul #9 -+** st1w z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1w_u64_m9, svuint64_t, uint32_t, -+ svst1w_u64 (p0, x0 - svcntd () * 9, z0), -+ svst1w (p0, x0 - svcntd () * 9, z0)) -+ -+/* -+** st1w_vnum_u64_0: -+** st1w z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1w_vnum_u64_0, svuint64_t, uint32_t, -+ svst1w_vnum_u64 (p0, x0, 0, z0), -+ svst1w_vnum (p0, x0, 0, z0)) -+ -+/* -+** st1w_vnum_u64_1: -+** st1w z0\.d, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1w_vnum_u64_1, svuint64_t, uint32_t, -+ svst1w_vnum_u64 (p0, x0, 1, z0), -+ svst1w_vnum (p0, x0, 1, z0)) -+ -+/* -+** st1w_vnum_u64_7: -+** st1w z0\.d, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (st1w_vnum_u64_7, svuint64_t, uint32_t, -+ svst1w_vnum_u64 (p0, x0, 7, z0), -+ svst1w_vnum (p0, x0, 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1w_vnum_u64_8: -+** incb x0, all, mul #4 -+** st1w z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1w_vnum_u64_8, svuint64_t, uint32_t, -+ svst1w_vnum_u64 (p0, x0, 8, z0), -+ svst1w_vnum (p0, x0, 8, z0)) -+ -+/* -+** st1w_vnum_u64_m1: -+** st1w z0\.d, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (st1w_vnum_u64_m1, svuint64_t, uint32_t, -+ svst1w_vnum_u64 (p0, x0, -1, z0), -+ svst1w_vnum (p0, x0, -1, z0)) -+ -+/* -+** st1w_vnum_u64_m8: -+** st1w z0\.d, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (st1w_vnum_u64_m8, svuint64_t, uint32_t, -+ svst1w_vnum_u64 (p0, x0, -8, z0), -+ svst1w_vnum (p0, x0, -8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st1w_vnum_u64_m9: -+** dech x0, all, mul #9 -+** st1w z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st1w_vnum_u64_m9, svuint64_t, uint32_t, -+ svst1w_vnum_u64 (p0, x0, -9, z0), -+ svst1w_vnum (p0, x0, -9, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** st1w_vnum_u64_x1: -+** cnth (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** st1w z0\.d, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (st1w_vnum_u64_x1, svuint64_t, uint32_t, -+ svst1w_vnum_u64 (p0, x0, x1, z0), -+ svst1w_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_bf16.c -new file mode 100644 -index 000000000..a4a57af08 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_bf16.c -@@ -0,0 +1,200 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st2_bf16_base: -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_bf16_base, svbfloat16x2_t, bfloat16_t, -+ svst2_bf16 (p0, x0, z0), -+ svst2 (p0, x0, z0)) -+ -+/* -+** st2_bf16_index: -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_STORE (st2_bf16_index, svbfloat16x2_t, bfloat16_t, -+ svst2_bf16 (p0, x0 + x1, z0), -+ svst2 (p0, x0 + x1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_bf16_1: -+** incb x0 -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_bf16_1, svbfloat16x2_t, bfloat16_t, -+ svst2_bf16 (p0, x0 + svcnth (), z0), -+ svst2 (p0, x0 + svcnth (), z0)) -+ -+/* -+** st2_bf16_2: -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #2, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_bf16_2, svbfloat16x2_t, bfloat16_t, -+ svst2_bf16 (p0, x0 + svcnth () * 2, z0), -+ svst2 (p0, x0 + svcnth () * 2, z0)) -+ -+/* -+** st2_bf16_14: -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #14, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_bf16_14, svbfloat16x2_t, bfloat16_t, -+ svst2_bf16 (p0, x0 + svcnth () * 14, z0), -+ svst2 (p0, x0 + svcnth () * 14, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_bf16_16: -+** incb x0, all, mul #16 -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_bf16_16, svbfloat16x2_t, bfloat16_t, -+ svst2_bf16 (p0, x0 + svcnth () * 16, z0), -+ svst2 (p0, x0 + svcnth () * 16, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_bf16_m1: -+** decb x0 -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_bf16_m1, svbfloat16x2_t, bfloat16_t, -+ svst2_bf16 (p0, x0 - svcnth (), z0), -+ svst2 (p0, x0 - svcnth (), z0)) -+ -+/* -+** st2_bf16_m2: -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #-2, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_bf16_m2, svbfloat16x2_t, bfloat16_t, -+ svst2_bf16 (p0, x0 - svcnth () * 2, z0), -+ svst2 (p0, x0 - svcnth () * 2, z0)) -+ -+/* -+** st2_bf16_m16: -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #-16, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_bf16_m16, svbfloat16x2_t, bfloat16_t, -+ svst2_bf16 (p0, x0 - svcnth () * 16, z0), -+ svst2 (p0, x0 - svcnth () * 16, z0)) -+ -+/* -+** st2_bf16_m18: -+** addvl (x[0-9]+), x0, #-18 -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st2_bf16_m18, svbfloat16x2_t, bfloat16_t, -+ svst2_bf16 (p0, x0 - svcnth () * 18, z0), -+ svst2 (p0, x0 - svcnth () * 18, z0)) -+ -+/* -+** st2_vnum_bf16_0: -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_vnum_bf16_0, svbfloat16x2_t, bfloat16_t, -+ svst2_vnum_bf16 (p0, x0, 0, z0), -+ svst2_vnum (p0, x0, 0, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_vnum_bf16_1: -+** incb x0 -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_vnum_bf16_1, svbfloat16x2_t, bfloat16_t, -+ svst2_vnum_bf16 (p0, x0, 1, z0), -+ svst2_vnum (p0, x0, 1, z0)) -+ -+/* -+** st2_vnum_bf16_2: -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #2, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_vnum_bf16_2, svbfloat16x2_t, bfloat16_t, -+ svst2_vnum_bf16 (p0, x0, 2, z0), -+ svst2_vnum (p0, x0, 2, z0)) -+ -+/* -+** st2_vnum_bf16_14: -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #14, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_vnum_bf16_14, svbfloat16x2_t, bfloat16_t, -+ svst2_vnum_bf16 (p0, x0, 14, z0), -+ svst2_vnum (p0, x0, 14, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_vnum_bf16_16: -+** incb x0, all, mul #16 -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_vnum_bf16_16, svbfloat16x2_t, bfloat16_t, -+ svst2_vnum_bf16 (p0, x0, 16, z0), -+ svst2_vnum (p0, x0, 16, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_vnum_bf16_m1: -+** decb x0 -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_vnum_bf16_m1, svbfloat16x2_t, bfloat16_t, -+ svst2_vnum_bf16 (p0, x0, -1, z0), -+ svst2_vnum (p0, x0, -1, z0)) -+ -+/* -+** st2_vnum_bf16_m2: -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #-2, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_vnum_bf16_m2, svbfloat16x2_t, bfloat16_t, -+ svst2_vnum_bf16 (p0, x0, -2, z0), -+ svst2_vnum (p0, x0, -2, z0)) -+ -+/* -+** st2_vnum_bf16_m16: -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #-16, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_vnum_bf16_m16, svbfloat16x2_t, bfloat16_t, -+ svst2_vnum_bf16 (p0, x0, -16, z0), -+ svst2_vnum (p0, x0, -16, z0)) -+ -+/* -+** st2_vnum_bf16_m18: -+** addvl (x[0-9]+), x0, #-18 -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st2_vnum_bf16_m18, svbfloat16x2_t, bfloat16_t, -+ svst2_vnum_bf16 (p0, x0, -18, z0), -+ svst2_vnum (p0, x0, -18, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** st2_vnum_bf16_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (st2_vnum_bf16_x1, svbfloat16x2_t, bfloat16_t, -+ svst2_vnum_bf16 (p0, x0, x1, z0), -+ svst2_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_f16.c -new file mode 100644 -index 000000000..014203be6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_f16.c -@@ -0,0 +1,200 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st2_f16_base: -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_f16_base, svfloat16x2_t, float16_t, -+ svst2_f16 (p0, x0, z0), -+ svst2 (p0, x0, z0)) -+ -+/* -+** st2_f16_index: -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_STORE (st2_f16_index, svfloat16x2_t, float16_t, -+ svst2_f16 (p0, x0 + x1, z0), -+ svst2 (p0, x0 + x1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_f16_1: -+** incb x0 -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_f16_1, svfloat16x2_t, float16_t, -+ svst2_f16 (p0, x0 + svcnth (), z0), -+ svst2 (p0, x0 + svcnth (), z0)) -+ -+/* -+** st2_f16_2: -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #2, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_f16_2, svfloat16x2_t, float16_t, -+ svst2_f16 (p0, x0 + svcnth () * 2, z0), -+ svst2 (p0, x0 + svcnth () * 2, z0)) -+ -+/* -+** st2_f16_14: -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #14, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_f16_14, svfloat16x2_t, float16_t, -+ svst2_f16 (p0, x0 + svcnth () * 14, z0), -+ svst2 (p0, x0 + svcnth () * 14, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_f16_16: -+** incb x0, all, mul #16 -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_f16_16, svfloat16x2_t, float16_t, -+ svst2_f16 (p0, x0 + svcnth () * 16, z0), -+ svst2 (p0, x0 + svcnth () * 16, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_f16_m1: -+** decb x0 -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_f16_m1, svfloat16x2_t, float16_t, -+ svst2_f16 (p0, x0 - svcnth (), z0), -+ svst2 (p0, x0 - svcnth (), z0)) -+ -+/* -+** st2_f16_m2: -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #-2, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_f16_m2, svfloat16x2_t, float16_t, -+ svst2_f16 (p0, x0 - svcnth () * 2, z0), -+ svst2 (p0, x0 - svcnth () * 2, z0)) -+ -+/* -+** st2_f16_m16: -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #-16, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_f16_m16, svfloat16x2_t, float16_t, -+ svst2_f16 (p0, x0 - svcnth () * 16, z0), -+ svst2 (p0, x0 - svcnth () * 16, z0)) -+ -+/* -+** st2_f16_m18: -+** addvl (x[0-9]+), x0, #-18 -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st2_f16_m18, svfloat16x2_t, float16_t, -+ svst2_f16 (p0, x0 - svcnth () * 18, z0), -+ svst2 (p0, x0 - svcnth () * 18, z0)) -+ -+/* -+** st2_vnum_f16_0: -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_vnum_f16_0, svfloat16x2_t, float16_t, -+ svst2_vnum_f16 (p0, x0, 0, z0), -+ svst2_vnum (p0, x0, 0, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_vnum_f16_1: -+** incb x0 -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_vnum_f16_1, svfloat16x2_t, float16_t, -+ svst2_vnum_f16 (p0, x0, 1, z0), -+ svst2_vnum (p0, x0, 1, z0)) -+ -+/* -+** st2_vnum_f16_2: -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #2, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_vnum_f16_2, svfloat16x2_t, float16_t, -+ svst2_vnum_f16 (p0, x0, 2, z0), -+ svst2_vnum (p0, x0, 2, z0)) -+ -+/* -+** st2_vnum_f16_14: -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #14, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_vnum_f16_14, svfloat16x2_t, float16_t, -+ svst2_vnum_f16 (p0, x0, 14, z0), -+ svst2_vnum (p0, x0, 14, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_vnum_f16_16: -+** incb x0, all, mul #16 -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_vnum_f16_16, svfloat16x2_t, float16_t, -+ svst2_vnum_f16 (p0, x0, 16, z0), -+ svst2_vnum (p0, x0, 16, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_vnum_f16_m1: -+** decb x0 -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_vnum_f16_m1, svfloat16x2_t, float16_t, -+ svst2_vnum_f16 (p0, x0, -1, z0), -+ svst2_vnum (p0, x0, -1, z0)) -+ -+/* -+** st2_vnum_f16_m2: -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #-2, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_vnum_f16_m2, svfloat16x2_t, float16_t, -+ svst2_vnum_f16 (p0, x0, -2, z0), -+ svst2_vnum (p0, x0, -2, z0)) -+ -+/* -+** st2_vnum_f16_m16: -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #-16, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_vnum_f16_m16, svfloat16x2_t, float16_t, -+ svst2_vnum_f16 (p0, x0, -16, z0), -+ svst2_vnum (p0, x0, -16, z0)) -+ -+/* -+** st2_vnum_f16_m18: -+** addvl (x[0-9]+), x0, #-18 -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st2_vnum_f16_m18, svfloat16x2_t, float16_t, -+ svst2_vnum_f16 (p0, x0, -18, z0), -+ svst2_vnum (p0, x0, -18, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** st2_vnum_f16_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (st2_vnum_f16_x1, svfloat16x2_t, float16_t, -+ svst2_vnum_f16 (p0, x0, x1, z0), -+ svst2_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_f32.c -new file mode 100644 -index 000000000..ba271882e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_f32.c -@@ -0,0 +1,200 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st2_f32_base: -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_f32_base, svfloat32x2_t, float32_t, -+ svst2_f32 (p0, x0, z0), -+ svst2 (p0, x0, z0)) -+ -+/* -+** st2_f32_index: -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_STORE (st2_f32_index, svfloat32x2_t, float32_t, -+ svst2_f32 (p0, x0 + x1, z0), -+ svst2 (p0, x0 + x1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_f32_1: -+** incb x0 -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_f32_1, svfloat32x2_t, float32_t, -+ svst2_f32 (p0, x0 + svcntw (), z0), -+ svst2 (p0, x0 + svcntw (), z0)) -+ -+/* -+** st2_f32_2: -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, #2, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_f32_2, svfloat32x2_t, float32_t, -+ svst2_f32 (p0, x0 + svcntw () * 2, z0), -+ svst2 (p0, x0 + svcntw () * 2, z0)) -+ -+/* -+** st2_f32_14: -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, #14, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_f32_14, svfloat32x2_t, float32_t, -+ svst2_f32 (p0, x0 + svcntw () * 14, z0), -+ svst2 (p0, x0 + svcntw () * 14, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_f32_16: -+** incb x0, all, mul #16 -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_f32_16, svfloat32x2_t, float32_t, -+ svst2_f32 (p0, x0 + svcntw () * 16, z0), -+ svst2 (p0, x0 + svcntw () * 16, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_f32_m1: -+** decb x0 -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_f32_m1, svfloat32x2_t, float32_t, -+ svst2_f32 (p0, x0 - svcntw (), z0), -+ svst2 (p0, x0 - svcntw (), z0)) -+ -+/* -+** st2_f32_m2: -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, #-2, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_f32_m2, svfloat32x2_t, float32_t, -+ svst2_f32 (p0, x0 - svcntw () * 2, z0), -+ svst2 (p0, x0 - svcntw () * 2, z0)) -+ -+/* -+** st2_f32_m16: -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, #-16, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_f32_m16, svfloat32x2_t, float32_t, -+ svst2_f32 (p0, x0 - svcntw () * 16, z0), -+ svst2 (p0, x0 - svcntw () * 16, z0)) -+ -+/* -+** st2_f32_m18: -+** addvl (x[0-9]+), x0, #-18 -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st2_f32_m18, svfloat32x2_t, float32_t, -+ svst2_f32 (p0, x0 - svcntw () * 18, z0), -+ svst2 (p0, x0 - svcntw () * 18, z0)) -+ -+/* -+** st2_vnum_f32_0: -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_vnum_f32_0, svfloat32x2_t, float32_t, -+ svst2_vnum_f32 (p0, x0, 0, z0), -+ svst2_vnum (p0, x0, 0, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_vnum_f32_1: -+** incb x0 -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_vnum_f32_1, svfloat32x2_t, float32_t, -+ svst2_vnum_f32 (p0, x0, 1, z0), -+ svst2_vnum (p0, x0, 1, z0)) -+ -+/* -+** st2_vnum_f32_2: -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, #2, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_vnum_f32_2, svfloat32x2_t, float32_t, -+ svst2_vnum_f32 (p0, x0, 2, z0), -+ svst2_vnum (p0, x0, 2, z0)) -+ -+/* -+** st2_vnum_f32_14: -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, #14, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_vnum_f32_14, svfloat32x2_t, float32_t, -+ svst2_vnum_f32 (p0, x0, 14, z0), -+ svst2_vnum (p0, x0, 14, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_vnum_f32_16: -+** incb x0, all, mul #16 -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_vnum_f32_16, svfloat32x2_t, float32_t, -+ svst2_vnum_f32 (p0, x0, 16, z0), -+ svst2_vnum (p0, x0, 16, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_vnum_f32_m1: -+** decb x0 -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_vnum_f32_m1, svfloat32x2_t, float32_t, -+ svst2_vnum_f32 (p0, x0, -1, z0), -+ svst2_vnum (p0, x0, -1, z0)) -+ -+/* -+** st2_vnum_f32_m2: -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, #-2, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_vnum_f32_m2, svfloat32x2_t, float32_t, -+ svst2_vnum_f32 (p0, x0, -2, z0), -+ svst2_vnum (p0, x0, -2, z0)) -+ -+/* -+** st2_vnum_f32_m16: -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, #-16, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_vnum_f32_m16, svfloat32x2_t, float32_t, -+ svst2_vnum_f32 (p0, x0, -16, z0), -+ svst2_vnum (p0, x0, -16, z0)) -+ -+/* -+** st2_vnum_f32_m18: -+** addvl (x[0-9]+), x0, #-18 -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st2_vnum_f32_m18, svfloat32x2_t, float32_t, -+ svst2_vnum_f32 (p0, x0, -18, z0), -+ svst2_vnum (p0, x0, -18, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** st2_vnum_f32_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (st2_vnum_f32_x1, svfloat32x2_t, float32_t, -+ svst2_vnum_f32 (p0, x0, x1, z0), -+ svst2_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_f64.c -new file mode 100644 -index 000000000..c499ba0fe ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_f64.c -@@ -0,0 +1,200 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st2_f64_base: -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_f64_base, svfloat64x2_t, float64_t, -+ svst2_f64 (p0, x0, z0), -+ svst2 (p0, x0, z0)) -+ -+/* -+** st2_f64_index: -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, x1, lsl 3\] -+** ret -+*/ -+TEST_STORE (st2_f64_index, svfloat64x2_t, float64_t, -+ svst2_f64 (p0, x0 + x1, z0), -+ svst2 (p0, x0 + x1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_f64_1: -+** incb x0 -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_f64_1, svfloat64x2_t, float64_t, -+ svst2_f64 (p0, x0 + svcntd (), z0), -+ svst2 (p0, x0 + svcntd (), z0)) -+ -+/* -+** st2_f64_2: -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, #2, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_f64_2, svfloat64x2_t, float64_t, -+ svst2_f64 (p0, x0 + svcntd () * 2, z0), -+ svst2 (p0, x0 + svcntd () * 2, z0)) -+ -+/* -+** st2_f64_14: -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, #14, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_f64_14, svfloat64x2_t, float64_t, -+ svst2_f64 (p0, x0 + svcntd () * 14, z0), -+ svst2 (p0, x0 + svcntd () * 14, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_f64_16: -+** incb x0, all, mul #16 -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_f64_16, svfloat64x2_t, float64_t, -+ svst2_f64 (p0, x0 + svcntd () * 16, z0), -+ svst2 (p0, x0 + svcntd () * 16, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_f64_m1: -+** decb x0 -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_f64_m1, svfloat64x2_t, float64_t, -+ svst2_f64 (p0, x0 - svcntd (), z0), -+ svst2 (p0, x0 - svcntd (), z0)) -+ -+/* -+** st2_f64_m2: -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, #-2, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_f64_m2, svfloat64x2_t, float64_t, -+ svst2_f64 (p0, x0 - svcntd () * 2, z0), -+ svst2 (p0, x0 - svcntd () * 2, z0)) -+ -+/* -+** st2_f64_m16: -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, #-16, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_f64_m16, svfloat64x2_t, float64_t, -+ svst2_f64 (p0, x0 - svcntd () * 16, z0), -+ svst2 (p0, x0 - svcntd () * 16, z0)) -+ -+/* -+** st2_f64_m18: -+** addvl (x[0-9]+), x0, #-18 -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st2_f64_m18, svfloat64x2_t, float64_t, -+ svst2_f64 (p0, x0 - svcntd () * 18, z0), -+ svst2 (p0, x0 - svcntd () * 18, z0)) -+ -+/* -+** st2_vnum_f64_0: -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_vnum_f64_0, svfloat64x2_t, float64_t, -+ svst2_vnum_f64 (p0, x0, 0, z0), -+ svst2_vnum (p0, x0, 0, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_vnum_f64_1: -+** incb x0 -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_vnum_f64_1, svfloat64x2_t, float64_t, -+ svst2_vnum_f64 (p0, x0, 1, z0), -+ svst2_vnum (p0, x0, 1, z0)) -+ -+/* -+** st2_vnum_f64_2: -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, #2, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_vnum_f64_2, svfloat64x2_t, float64_t, -+ svst2_vnum_f64 (p0, x0, 2, z0), -+ svst2_vnum (p0, x0, 2, z0)) -+ -+/* -+** st2_vnum_f64_14: -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, #14, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_vnum_f64_14, svfloat64x2_t, float64_t, -+ svst2_vnum_f64 (p0, x0, 14, z0), -+ svst2_vnum (p0, x0, 14, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_vnum_f64_16: -+** incb x0, all, mul #16 -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_vnum_f64_16, svfloat64x2_t, float64_t, -+ svst2_vnum_f64 (p0, x0, 16, z0), -+ svst2_vnum (p0, x0, 16, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_vnum_f64_m1: -+** decb x0 -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_vnum_f64_m1, svfloat64x2_t, float64_t, -+ svst2_vnum_f64 (p0, x0, -1, z0), -+ svst2_vnum (p0, x0, -1, z0)) -+ -+/* -+** st2_vnum_f64_m2: -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, #-2, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_vnum_f64_m2, svfloat64x2_t, float64_t, -+ svst2_vnum_f64 (p0, x0, -2, z0), -+ svst2_vnum (p0, x0, -2, z0)) -+ -+/* -+** st2_vnum_f64_m16: -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, #-16, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_vnum_f64_m16, svfloat64x2_t, float64_t, -+ svst2_vnum_f64 (p0, x0, -16, z0), -+ svst2_vnum (p0, x0, -16, z0)) -+ -+/* -+** st2_vnum_f64_m18: -+** addvl (x[0-9]+), x0, #-18 -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st2_vnum_f64_m18, svfloat64x2_t, float64_t, -+ svst2_vnum_f64 (p0, x0, -18, z0), -+ svst2_vnum (p0, x0, -18, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** st2_vnum_f64_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (st2_vnum_f64_x1, svfloat64x2_t, float64_t, -+ svst2_vnum_f64 (p0, x0, x1, z0), -+ svst2_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_s16.c -new file mode 100644 -index 000000000..860b45eac ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_s16.c -@@ -0,0 +1,200 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st2_s16_base: -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_s16_base, svint16x2_t, int16_t, -+ svst2_s16 (p0, x0, z0), -+ svst2 (p0, x0, z0)) -+ -+/* -+** st2_s16_index: -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_STORE (st2_s16_index, svint16x2_t, int16_t, -+ svst2_s16 (p0, x0 + x1, z0), -+ svst2 (p0, x0 + x1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_s16_1: -+** incb x0 -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_s16_1, svint16x2_t, int16_t, -+ svst2_s16 (p0, x0 + svcnth (), z0), -+ svst2 (p0, x0 + svcnth (), z0)) -+ -+/* -+** st2_s16_2: -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #2, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_s16_2, svint16x2_t, int16_t, -+ svst2_s16 (p0, x0 + svcnth () * 2, z0), -+ svst2 (p0, x0 + svcnth () * 2, z0)) -+ -+/* -+** st2_s16_14: -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #14, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_s16_14, svint16x2_t, int16_t, -+ svst2_s16 (p0, x0 + svcnth () * 14, z0), -+ svst2 (p0, x0 + svcnth () * 14, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_s16_16: -+** incb x0, all, mul #16 -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_s16_16, svint16x2_t, int16_t, -+ svst2_s16 (p0, x0 + svcnth () * 16, z0), -+ svst2 (p0, x0 + svcnth () * 16, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_s16_m1: -+** decb x0 -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_s16_m1, svint16x2_t, int16_t, -+ svst2_s16 (p0, x0 - svcnth (), z0), -+ svst2 (p0, x0 - svcnth (), z0)) -+ -+/* -+** st2_s16_m2: -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #-2, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_s16_m2, svint16x2_t, int16_t, -+ svst2_s16 (p0, x0 - svcnth () * 2, z0), -+ svst2 (p0, x0 - svcnth () * 2, z0)) -+ -+/* -+** st2_s16_m16: -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #-16, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_s16_m16, svint16x2_t, int16_t, -+ svst2_s16 (p0, x0 - svcnth () * 16, z0), -+ svst2 (p0, x0 - svcnth () * 16, z0)) -+ -+/* -+** st2_s16_m18: -+** addvl (x[0-9]+), x0, #-18 -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st2_s16_m18, svint16x2_t, int16_t, -+ svst2_s16 (p0, x0 - svcnth () * 18, z0), -+ svst2 (p0, x0 - svcnth () * 18, z0)) -+ -+/* -+** st2_vnum_s16_0: -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_vnum_s16_0, svint16x2_t, int16_t, -+ svst2_vnum_s16 (p0, x0, 0, z0), -+ svst2_vnum (p0, x0, 0, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_vnum_s16_1: -+** incb x0 -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_vnum_s16_1, svint16x2_t, int16_t, -+ svst2_vnum_s16 (p0, x0, 1, z0), -+ svst2_vnum (p0, x0, 1, z0)) -+ -+/* -+** st2_vnum_s16_2: -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #2, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_vnum_s16_2, svint16x2_t, int16_t, -+ svst2_vnum_s16 (p0, x0, 2, z0), -+ svst2_vnum (p0, x0, 2, z0)) -+ -+/* -+** st2_vnum_s16_14: -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #14, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_vnum_s16_14, svint16x2_t, int16_t, -+ svst2_vnum_s16 (p0, x0, 14, z0), -+ svst2_vnum (p0, x0, 14, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_vnum_s16_16: -+** incb x0, all, mul #16 -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_vnum_s16_16, svint16x2_t, int16_t, -+ svst2_vnum_s16 (p0, x0, 16, z0), -+ svst2_vnum (p0, x0, 16, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_vnum_s16_m1: -+** decb x0 -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_vnum_s16_m1, svint16x2_t, int16_t, -+ svst2_vnum_s16 (p0, x0, -1, z0), -+ svst2_vnum (p0, x0, -1, z0)) -+ -+/* -+** st2_vnum_s16_m2: -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #-2, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_vnum_s16_m2, svint16x2_t, int16_t, -+ svst2_vnum_s16 (p0, x0, -2, z0), -+ svst2_vnum (p0, x0, -2, z0)) -+ -+/* -+** st2_vnum_s16_m16: -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #-16, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_vnum_s16_m16, svint16x2_t, int16_t, -+ svst2_vnum_s16 (p0, x0, -16, z0), -+ svst2_vnum (p0, x0, -16, z0)) -+ -+/* -+** st2_vnum_s16_m18: -+** addvl (x[0-9]+), x0, #-18 -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st2_vnum_s16_m18, svint16x2_t, int16_t, -+ svst2_vnum_s16 (p0, x0, -18, z0), -+ svst2_vnum (p0, x0, -18, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** st2_vnum_s16_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (st2_vnum_s16_x1, svint16x2_t, int16_t, -+ svst2_vnum_s16 (p0, x0, x1, z0), -+ svst2_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_s32.c -new file mode 100644 -index 000000000..16b674992 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_s32.c -@@ -0,0 +1,200 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st2_s32_base: -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_s32_base, svint32x2_t, int32_t, -+ svst2_s32 (p0, x0, z0), -+ svst2 (p0, x0, z0)) -+ -+/* -+** st2_s32_index: -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_STORE (st2_s32_index, svint32x2_t, int32_t, -+ svst2_s32 (p0, x0 + x1, z0), -+ svst2 (p0, x0 + x1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_s32_1: -+** incb x0 -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_s32_1, svint32x2_t, int32_t, -+ svst2_s32 (p0, x0 + svcntw (), z0), -+ svst2 (p0, x0 + svcntw (), z0)) -+ -+/* -+** st2_s32_2: -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, #2, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_s32_2, svint32x2_t, int32_t, -+ svst2_s32 (p0, x0 + svcntw () * 2, z0), -+ svst2 (p0, x0 + svcntw () * 2, z0)) -+ -+/* -+** st2_s32_14: -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, #14, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_s32_14, svint32x2_t, int32_t, -+ svst2_s32 (p0, x0 + svcntw () * 14, z0), -+ svst2 (p0, x0 + svcntw () * 14, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_s32_16: -+** incb x0, all, mul #16 -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_s32_16, svint32x2_t, int32_t, -+ svst2_s32 (p0, x0 + svcntw () * 16, z0), -+ svst2 (p0, x0 + svcntw () * 16, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_s32_m1: -+** decb x0 -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_s32_m1, svint32x2_t, int32_t, -+ svst2_s32 (p0, x0 - svcntw (), z0), -+ svst2 (p0, x0 - svcntw (), z0)) -+ -+/* -+** st2_s32_m2: -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, #-2, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_s32_m2, svint32x2_t, int32_t, -+ svst2_s32 (p0, x0 - svcntw () * 2, z0), -+ svst2 (p0, x0 - svcntw () * 2, z0)) -+ -+/* -+** st2_s32_m16: -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, #-16, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_s32_m16, svint32x2_t, int32_t, -+ svst2_s32 (p0, x0 - svcntw () * 16, z0), -+ svst2 (p0, x0 - svcntw () * 16, z0)) -+ -+/* -+** st2_s32_m18: -+** addvl (x[0-9]+), x0, #-18 -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st2_s32_m18, svint32x2_t, int32_t, -+ svst2_s32 (p0, x0 - svcntw () * 18, z0), -+ svst2 (p0, x0 - svcntw () * 18, z0)) -+ -+/* -+** st2_vnum_s32_0: -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_vnum_s32_0, svint32x2_t, int32_t, -+ svst2_vnum_s32 (p0, x0, 0, z0), -+ svst2_vnum (p0, x0, 0, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_vnum_s32_1: -+** incb x0 -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_vnum_s32_1, svint32x2_t, int32_t, -+ svst2_vnum_s32 (p0, x0, 1, z0), -+ svst2_vnum (p0, x0, 1, z0)) -+ -+/* -+** st2_vnum_s32_2: -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, #2, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_vnum_s32_2, svint32x2_t, int32_t, -+ svst2_vnum_s32 (p0, x0, 2, z0), -+ svst2_vnum (p0, x0, 2, z0)) -+ -+/* -+** st2_vnum_s32_14: -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, #14, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_vnum_s32_14, svint32x2_t, int32_t, -+ svst2_vnum_s32 (p0, x0, 14, z0), -+ svst2_vnum (p0, x0, 14, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_vnum_s32_16: -+** incb x0, all, mul #16 -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_vnum_s32_16, svint32x2_t, int32_t, -+ svst2_vnum_s32 (p0, x0, 16, z0), -+ svst2_vnum (p0, x0, 16, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_vnum_s32_m1: -+** decb x0 -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_vnum_s32_m1, svint32x2_t, int32_t, -+ svst2_vnum_s32 (p0, x0, -1, z0), -+ svst2_vnum (p0, x0, -1, z0)) -+ -+/* -+** st2_vnum_s32_m2: -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, #-2, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_vnum_s32_m2, svint32x2_t, int32_t, -+ svst2_vnum_s32 (p0, x0, -2, z0), -+ svst2_vnum (p0, x0, -2, z0)) -+ -+/* -+** st2_vnum_s32_m16: -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, #-16, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_vnum_s32_m16, svint32x2_t, int32_t, -+ svst2_vnum_s32 (p0, x0, -16, z0), -+ svst2_vnum (p0, x0, -16, z0)) -+ -+/* -+** st2_vnum_s32_m18: -+** addvl (x[0-9]+), x0, #-18 -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st2_vnum_s32_m18, svint32x2_t, int32_t, -+ svst2_vnum_s32 (p0, x0, -18, z0), -+ svst2_vnum (p0, x0, -18, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** st2_vnum_s32_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (st2_vnum_s32_x1, svint32x2_t, int32_t, -+ svst2_vnum_s32 (p0, x0, x1, z0), -+ svst2_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_s64.c -new file mode 100644 -index 000000000..1421333cb ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_s64.c -@@ -0,0 +1,200 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st2_s64_base: -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_s64_base, svint64x2_t, int64_t, -+ svst2_s64 (p0, x0, z0), -+ svst2 (p0, x0, z0)) -+ -+/* -+** st2_s64_index: -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, x1, lsl 3\] -+** ret -+*/ -+TEST_STORE (st2_s64_index, svint64x2_t, int64_t, -+ svst2_s64 (p0, x0 + x1, z0), -+ svst2 (p0, x0 + x1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_s64_1: -+** incb x0 -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_s64_1, svint64x2_t, int64_t, -+ svst2_s64 (p0, x0 + svcntd (), z0), -+ svst2 (p0, x0 + svcntd (), z0)) -+ -+/* -+** st2_s64_2: -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, #2, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_s64_2, svint64x2_t, int64_t, -+ svst2_s64 (p0, x0 + svcntd () * 2, z0), -+ svst2 (p0, x0 + svcntd () * 2, z0)) -+ -+/* -+** st2_s64_14: -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, #14, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_s64_14, svint64x2_t, int64_t, -+ svst2_s64 (p0, x0 + svcntd () * 14, z0), -+ svst2 (p0, x0 + svcntd () * 14, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_s64_16: -+** incb x0, all, mul #16 -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_s64_16, svint64x2_t, int64_t, -+ svst2_s64 (p0, x0 + svcntd () * 16, z0), -+ svst2 (p0, x0 + svcntd () * 16, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_s64_m1: -+** decb x0 -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_s64_m1, svint64x2_t, int64_t, -+ svst2_s64 (p0, x0 - svcntd (), z0), -+ svst2 (p0, x0 - svcntd (), z0)) -+ -+/* -+** st2_s64_m2: -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, #-2, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_s64_m2, svint64x2_t, int64_t, -+ svst2_s64 (p0, x0 - svcntd () * 2, z0), -+ svst2 (p0, x0 - svcntd () * 2, z0)) -+ -+/* -+** st2_s64_m16: -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, #-16, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_s64_m16, svint64x2_t, int64_t, -+ svst2_s64 (p0, x0 - svcntd () * 16, z0), -+ svst2 (p0, x0 - svcntd () * 16, z0)) -+ -+/* -+** st2_s64_m18: -+** addvl (x[0-9]+), x0, #-18 -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st2_s64_m18, svint64x2_t, int64_t, -+ svst2_s64 (p0, x0 - svcntd () * 18, z0), -+ svst2 (p0, x0 - svcntd () * 18, z0)) -+ -+/* -+** st2_vnum_s64_0: -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_vnum_s64_0, svint64x2_t, int64_t, -+ svst2_vnum_s64 (p0, x0, 0, z0), -+ svst2_vnum (p0, x0, 0, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_vnum_s64_1: -+** incb x0 -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_vnum_s64_1, svint64x2_t, int64_t, -+ svst2_vnum_s64 (p0, x0, 1, z0), -+ svst2_vnum (p0, x0, 1, z0)) -+ -+/* -+** st2_vnum_s64_2: -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, #2, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_vnum_s64_2, svint64x2_t, int64_t, -+ svst2_vnum_s64 (p0, x0, 2, z0), -+ svst2_vnum (p0, x0, 2, z0)) -+ -+/* -+** st2_vnum_s64_14: -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, #14, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_vnum_s64_14, svint64x2_t, int64_t, -+ svst2_vnum_s64 (p0, x0, 14, z0), -+ svst2_vnum (p0, x0, 14, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_vnum_s64_16: -+** incb x0, all, mul #16 -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_vnum_s64_16, svint64x2_t, int64_t, -+ svst2_vnum_s64 (p0, x0, 16, z0), -+ svst2_vnum (p0, x0, 16, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_vnum_s64_m1: -+** decb x0 -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_vnum_s64_m1, svint64x2_t, int64_t, -+ svst2_vnum_s64 (p0, x0, -1, z0), -+ svst2_vnum (p0, x0, -1, z0)) -+ -+/* -+** st2_vnum_s64_m2: -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, #-2, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_vnum_s64_m2, svint64x2_t, int64_t, -+ svst2_vnum_s64 (p0, x0, -2, z0), -+ svst2_vnum (p0, x0, -2, z0)) -+ -+/* -+** st2_vnum_s64_m16: -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, #-16, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_vnum_s64_m16, svint64x2_t, int64_t, -+ svst2_vnum_s64 (p0, x0, -16, z0), -+ svst2_vnum (p0, x0, -16, z0)) -+ -+/* -+** st2_vnum_s64_m18: -+** addvl (x[0-9]+), x0, #-18 -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st2_vnum_s64_m18, svint64x2_t, int64_t, -+ svst2_vnum_s64 (p0, x0, -18, z0), -+ svst2_vnum (p0, x0, -18, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** st2_vnum_s64_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (st2_vnum_s64_x1, svint64x2_t, int64_t, -+ svst2_vnum_s64 (p0, x0, x1, z0), -+ svst2_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_s8.c -new file mode 100644 -index 000000000..f0b7df3c5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_s8.c -@@ -0,0 +1,204 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st2_s8_base: -+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_s8_base, svint8x2_t, int8_t, -+ svst2_s8 (p0, x0, z0), -+ svst2 (p0, x0, z0)) -+ -+/* -+** st2_s8_index: -+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, x1\] -+** ret -+*/ -+TEST_STORE (st2_s8_index, svint8x2_t, int8_t, -+ svst2_s8 (p0, x0 + x1, z0), -+ svst2 (p0, x0 + x1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_s8_1: -+** incb x0 -+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_s8_1, svint8x2_t, int8_t, -+ svst2_s8 (p0, x0 + svcntb (), z0), -+ svst2 (p0, x0 + svcntb (), z0)) -+ -+/* -+** st2_s8_2: -+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, #2, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_s8_2, svint8x2_t, int8_t, -+ svst2_s8 (p0, x0 + svcntb () * 2, z0), -+ svst2 (p0, x0 + svcntb () * 2, z0)) -+ -+/* -+** st2_s8_14: -+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, #14, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_s8_14, svint8x2_t, int8_t, -+ svst2_s8 (p0, x0 + svcntb () * 14, z0), -+ svst2 (p0, x0 + svcntb () * 14, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_s8_16: -+** incb x0, all, mul #16 -+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_s8_16, svint8x2_t, int8_t, -+ svst2_s8 (p0, x0 + svcntb () * 16, z0), -+ svst2 (p0, x0 + svcntb () * 16, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_s8_m1: -+** decb x0 -+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_s8_m1, svint8x2_t, int8_t, -+ svst2_s8 (p0, x0 - svcntb (), z0), -+ svst2 (p0, x0 - svcntb (), z0)) -+ -+/* -+** st2_s8_m2: -+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, #-2, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_s8_m2, svint8x2_t, int8_t, -+ svst2_s8 (p0, x0 - svcntb () * 2, z0), -+ svst2 (p0, x0 - svcntb () * 2, z0)) -+ -+/* -+** st2_s8_m16: -+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, #-16, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_s8_m16, svint8x2_t, int8_t, -+ svst2_s8 (p0, x0 - svcntb () * 16, z0), -+ svst2 (p0, x0 - svcntb () * 16, z0)) -+ -+/* -+** st2_s8_m18: -+** addvl (x[0-9]+), x0, #-18 -+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st2_s8_m18, svint8x2_t, int8_t, -+ svst2_s8 (p0, x0 - svcntb () * 18, z0), -+ svst2 (p0, x0 - svcntb () * 18, z0)) -+ -+/* -+** st2_vnum_s8_0: -+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_vnum_s8_0, svint8x2_t, int8_t, -+ svst2_vnum_s8 (p0, x0, 0, z0), -+ svst2_vnum (p0, x0, 0, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_vnum_s8_1: -+** incb x0 -+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_vnum_s8_1, svint8x2_t, int8_t, -+ svst2_vnum_s8 (p0, x0, 1, z0), -+ svst2_vnum (p0, x0, 1, z0)) -+ -+/* -+** st2_vnum_s8_2: -+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, #2, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_vnum_s8_2, svint8x2_t, int8_t, -+ svst2_vnum_s8 (p0, x0, 2, z0), -+ svst2_vnum (p0, x0, 2, z0)) -+ -+/* -+** st2_vnum_s8_14: -+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, #14, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_vnum_s8_14, svint8x2_t, int8_t, -+ svst2_vnum_s8 (p0, x0, 14, z0), -+ svst2_vnum (p0, x0, 14, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_vnum_s8_16: -+** incb x0, all, mul #16 -+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_vnum_s8_16, svint8x2_t, int8_t, -+ svst2_vnum_s8 (p0, x0, 16, z0), -+ svst2_vnum (p0, x0, 16, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_vnum_s8_m1: -+** decb x0 -+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_vnum_s8_m1, svint8x2_t, int8_t, -+ svst2_vnum_s8 (p0, x0, -1, z0), -+ svst2_vnum (p0, x0, -1, z0)) -+ -+/* -+** st2_vnum_s8_m2: -+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, #-2, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_vnum_s8_m2, svint8x2_t, int8_t, -+ svst2_vnum_s8 (p0, x0, -2, z0), -+ svst2_vnum (p0, x0, -2, z0)) -+ -+/* -+** st2_vnum_s8_m16: -+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, #-16, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_vnum_s8_m16, svint8x2_t, int8_t, -+ svst2_vnum_s8 (p0, x0, -16, z0), -+ svst2_vnum (p0, x0, -16, z0)) -+ -+/* -+** st2_vnum_s8_m18: -+** addvl (x[0-9]+), x0, #-18 -+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st2_vnum_s8_m18, svint8x2_t, int8_t, -+ svst2_vnum_s8 (p0, x0, -18, z0), -+ svst2_vnum (p0, x0, -18, z0)) -+ -+/* -+** st2_vnum_s8_x1: -+** cntb (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_STORE (st2_vnum_s8_x1, svint8x2_t, int8_t, -+ svst2_vnum_s8 (p0, x0, x1, z0), -+ svst2_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_u16.c -new file mode 100644 -index 000000000..edd32d81e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_u16.c -@@ -0,0 +1,200 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st2_u16_base: -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_u16_base, svuint16x2_t, uint16_t, -+ svst2_u16 (p0, x0, z0), -+ svst2 (p0, x0, z0)) -+ -+/* -+** st2_u16_index: -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_STORE (st2_u16_index, svuint16x2_t, uint16_t, -+ svst2_u16 (p0, x0 + x1, z0), -+ svst2 (p0, x0 + x1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_u16_1: -+** incb x0 -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_u16_1, svuint16x2_t, uint16_t, -+ svst2_u16 (p0, x0 + svcnth (), z0), -+ svst2 (p0, x0 + svcnth (), z0)) -+ -+/* -+** st2_u16_2: -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #2, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_u16_2, svuint16x2_t, uint16_t, -+ svst2_u16 (p0, x0 + svcnth () * 2, z0), -+ svst2 (p0, x0 + svcnth () * 2, z0)) -+ -+/* -+** st2_u16_14: -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #14, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_u16_14, svuint16x2_t, uint16_t, -+ svst2_u16 (p0, x0 + svcnth () * 14, z0), -+ svst2 (p0, x0 + svcnth () * 14, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_u16_16: -+** incb x0, all, mul #16 -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_u16_16, svuint16x2_t, uint16_t, -+ svst2_u16 (p0, x0 + svcnth () * 16, z0), -+ svst2 (p0, x0 + svcnth () * 16, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_u16_m1: -+** decb x0 -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_u16_m1, svuint16x2_t, uint16_t, -+ svst2_u16 (p0, x0 - svcnth (), z0), -+ svst2 (p0, x0 - svcnth (), z0)) -+ -+/* -+** st2_u16_m2: -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #-2, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_u16_m2, svuint16x2_t, uint16_t, -+ svst2_u16 (p0, x0 - svcnth () * 2, z0), -+ svst2 (p0, x0 - svcnth () * 2, z0)) -+ -+/* -+** st2_u16_m16: -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #-16, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_u16_m16, svuint16x2_t, uint16_t, -+ svst2_u16 (p0, x0 - svcnth () * 16, z0), -+ svst2 (p0, x0 - svcnth () * 16, z0)) -+ -+/* -+** st2_u16_m18: -+** addvl (x[0-9]+), x0, #-18 -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st2_u16_m18, svuint16x2_t, uint16_t, -+ svst2_u16 (p0, x0 - svcnth () * 18, z0), -+ svst2 (p0, x0 - svcnth () * 18, z0)) -+ -+/* -+** st2_vnum_u16_0: -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_vnum_u16_0, svuint16x2_t, uint16_t, -+ svst2_vnum_u16 (p0, x0, 0, z0), -+ svst2_vnum (p0, x0, 0, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_vnum_u16_1: -+** incb x0 -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_vnum_u16_1, svuint16x2_t, uint16_t, -+ svst2_vnum_u16 (p0, x0, 1, z0), -+ svst2_vnum (p0, x0, 1, z0)) -+ -+/* -+** st2_vnum_u16_2: -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #2, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_vnum_u16_2, svuint16x2_t, uint16_t, -+ svst2_vnum_u16 (p0, x0, 2, z0), -+ svst2_vnum (p0, x0, 2, z0)) -+ -+/* -+** st2_vnum_u16_14: -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #14, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_vnum_u16_14, svuint16x2_t, uint16_t, -+ svst2_vnum_u16 (p0, x0, 14, z0), -+ svst2_vnum (p0, x0, 14, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_vnum_u16_16: -+** incb x0, all, mul #16 -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_vnum_u16_16, svuint16x2_t, uint16_t, -+ svst2_vnum_u16 (p0, x0, 16, z0), -+ svst2_vnum (p0, x0, 16, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_vnum_u16_m1: -+** decb x0 -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_vnum_u16_m1, svuint16x2_t, uint16_t, -+ svst2_vnum_u16 (p0, x0, -1, z0), -+ svst2_vnum (p0, x0, -1, z0)) -+ -+/* -+** st2_vnum_u16_m2: -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #-2, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_vnum_u16_m2, svuint16x2_t, uint16_t, -+ svst2_vnum_u16 (p0, x0, -2, z0), -+ svst2_vnum (p0, x0, -2, z0)) -+ -+/* -+** st2_vnum_u16_m16: -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[x0, #-16, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_vnum_u16_m16, svuint16x2_t, uint16_t, -+ svst2_vnum_u16 (p0, x0, -16, z0), -+ svst2_vnum (p0, x0, -16, z0)) -+ -+/* -+** st2_vnum_u16_m18: -+** addvl (x[0-9]+), x0, #-18 -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st2_vnum_u16_m18, svuint16x2_t, uint16_t, -+ svst2_vnum_u16 (p0, x0, -18, z0), -+ svst2_vnum (p0, x0, -18, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** st2_vnum_u16_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** st2h {z0\.h(?: - |, )z1\.h}, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (st2_vnum_u16_x1, svuint16x2_t, uint16_t, -+ svst2_vnum_u16 (p0, x0, x1, z0), -+ svst2_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_u32.c -new file mode 100644 -index 000000000..46f1b5ca7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_u32.c -@@ -0,0 +1,200 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st2_u32_base: -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_u32_base, svuint32x2_t, uint32_t, -+ svst2_u32 (p0, x0, z0), -+ svst2 (p0, x0, z0)) -+ -+/* -+** st2_u32_index: -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_STORE (st2_u32_index, svuint32x2_t, uint32_t, -+ svst2_u32 (p0, x0 + x1, z0), -+ svst2 (p0, x0 + x1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_u32_1: -+** incb x0 -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_u32_1, svuint32x2_t, uint32_t, -+ svst2_u32 (p0, x0 + svcntw (), z0), -+ svst2 (p0, x0 + svcntw (), z0)) -+ -+/* -+** st2_u32_2: -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, #2, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_u32_2, svuint32x2_t, uint32_t, -+ svst2_u32 (p0, x0 + svcntw () * 2, z0), -+ svst2 (p0, x0 + svcntw () * 2, z0)) -+ -+/* -+** st2_u32_14: -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, #14, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_u32_14, svuint32x2_t, uint32_t, -+ svst2_u32 (p0, x0 + svcntw () * 14, z0), -+ svst2 (p0, x0 + svcntw () * 14, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_u32_16: -+** incb x0, all, mul #16 -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_u32_16, svuint32x2_t, uint32_t, -+ svst2_u32 (p0, x0 + svcntw () * 16, z0), -+ svst2 (p0, x0 + svcntw () * 16, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_u32_m1: -+** decb x0 -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_u32_m1, svuint32x2_t, uint32_t, -+ svst2_u32 (p0, x0 - svcntw (), z0), -+ svst2 (p0, x0 - svcntw (), z0)) -+ -+/* -+** st2_u32_m2: -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, #-2, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_u32_m2, svuint32x2_t, uint32_t, -+ svst2_u32 (p0, x0 - svcntw () * 2, z0), -+ svst2 (p0, x0 - svcntw () * 2, z0)) -+ -+/* -+** st2_u32_m16: -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, #-16, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_u32_m16, svuint32x2_t, uint32_t, -+ svst2_u32 (p0, x0 - svcntw () * 16, z0), -+ svst2 (p0, x0 - svcntw () * 16, z0)) -+ -+/* -+** st2_u32_m18: -+** addvl (x[0-9]+), x0, #-18 -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st2_u32_m18, svuint32x2_t, uint32_t, -+ svst2_u32 (p0, x0 - svcntw () * 18, z0), -+ svst2 (p0, x0 - svcntw () * 18, z0)) -+ -+/* -+** st2_vnum_u32_0: -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_vnum_u32_0, svuint32x2_t, uint32_t, -+ svst2_vnum_u32 (p0, x0, 0, z0), -+ svst2_vnum (p0, x0, 0, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_vnum_u32_1: -+** incb x0 -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_vnum_u32_1, svuint32x2_t, uint32_t, -+ svst2_vnum_u32 (p0, x0, 1, z0), -+ svst2_vnum (p0, x0, 1, z0)) -+ -+/* -+** st2_vnum_u32_2: -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, #2, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_vnum_u32_2, svuint32x2_t, uint32_t, -+ svst2_vnum_u32 (p0, x0, 2, z0), -+ svst2_vnum (p0, x0, 2, z0)) -+ -+/* -+** st2_vnum_u32_14: -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, #14, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_vnum_u32_14, svuint32x2_t, uint32_t, -+ svst2_vnum_u32 (p0, x0, 14, z0), -+ svst2_vnum (p0, x0, 14, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_vnum_u32_16: -+** incb x0, all, mul #16 -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_vnum_u32_16, svuint32x2_t, uint32_t, -+ svst2_vnum_u32 (p0, x0, 16, z0), -+ svst2_vnum (p0, x0, 16, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_vnum_u32_m1: -+** decb x0 -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_vnum_u32_m1, svuint32x2_t, uint32_t, -+ svst2_vnum_u32 (p0, x0, -1, z0), -+ svst2_vnum (p0, x0, -1, z0)) -+ -+/* -+** st2_vnum_u32_m2: -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, #-2, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_vnum_u32_m2, svuint32x2_t, uint32_t, -+ svst2_vnum_u32 (p0, x0, -2, z0), -+ svst2_vnum (p0, x0, -2, z0)) -+ -+/* -+** st2_vnum_u32_m16: -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[x0, #-16, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_vnum_u32_m16, svuint32x2_t, uint32_t, -+ svst2_vnum_u32 (p0, x0, -16, z0), -+ svst2_vnum (p0, x0, -16, z0)) -+ -+/* -+** st2_vnum_u32_m18: -+** addvl (x[0-9]+), x0, #-18 -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st2_vnum_u32_m18, svuint32x2_t, uint32_t, -+ svst2_vnum_u32 (p0, x0, -18, z0), -+ svst2_vnum (p0, x0, -18, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** st2_vnum_u32_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** st2w {z0\.s(?: - |, )z1\.s}, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (st2_vnum_u32_x1, svuint32x2_t, uint32_t, -+ svst2_vnum_u32 (p0, x0, x1, z0), -+ svst2_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_u64.c -new file mode 100644 -index 000000000..0d9202b72 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_u64.c -@@ -0,0 +1,200 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st2_u64_base: -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_u64_base, svuint64x2_t, uint64_t, -+ svst2_u64 (p0, x0, z0), -+ svst2 (p0, x0, z0)) -+ -+/* -+** st2_u64_index: -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, x1, lsl 3\] -+** ret -+*/ -+TEST_STORE (st2_u64_index, svuint64x2_t, uint64_t, -+ svst2_u64 (p0, x0 + x1, z0), -+ svst2 (p0, x0 + x1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_u64_1: -+** incb x0 -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_u64_1, svuint64x2_t, uint64_t, -+ svst2_u64 (p0, x0 + svcntd (), z0), -+ svst2 (p0, x0 + svcntd (), z0)) -+ -+/* -+** st2_u64_2: -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, #2, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_u64_2, svuint64x2_t, uint64_t, -+ svst2_u64 (p0, x0 + svcntd () * 2, z0), -+ svst2 (p0, x0 + svcntd () * 2, z0)) -+ -+/* -+** st2_u64_14: -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, #14, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_u64_14, svuint64x2_t, uint64_t, -+ svst2_u64 (p0, x0 + svcntd () * 14, z0), -+ svst2 (p0, x0 + svcntd () * 14, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_u64_16: -+** incb x0, all, mul #16 -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_u64_16, svuint64x2_t, uint64_t, -+ svst2_u64 (p0, x0 + svcntd () * 16, z0), -+ svst2 (p0, x0 + svcntd () * 16, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_u64_m1: -+** decb x0 -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_u64_m1, svuint64x2_t, uint64_t, -+ svst2_u64 (p0, x0 - svcntd (), z0), -+ svst2 (p0, x0 - svcntd (), z0)) -+ -+/* -+** st2_u64_m2: -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, #-2, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_u64_m2, svuint64x2_t, uint64_t, -+ svst2_u64 (p0, x0 - svcntd () * 2, z0), -+ svst2 (p0, x0 - svcntd () * 2, z0)) -+ -+/* -+** st2_u64_m16: -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, #-16, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_u64_m16, svuint64x2_t, uint64_t, -+ svst2_u64 (p0, x0 - svcntd () * 16, z0), -+ svst2 (p0, x0 - svcntd () * 16, z0)) -+ -+/* -+** st2_u64_m18: -+** addvl (x[0-9]+), x0, #-18 -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st2_u64_m18, svuint64x2_t, uint64_t, -+ svst2_u64 (p0, x0 - svcntd () * 18, z0), -+ svst2 (p0, x0 - svcntd () * 18, z0)) -+ -+/* -+** st2_vnum_u64_0: -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_vnum_u64_0, svuint64x2_t, uint64_t, -+ svst2_vnum_u64 (p0, x0, 0, z0), -+ svst2_vnum (p0, x0, 0, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_vnum_u64_1: -+** incb x0 -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_vnum_u64_1, svuint64x2_t, uint64_t, -+ svst2_vnum_u64 (p0, x0, 1, z0), -+ svst2_vnum (p0, x0, 1, z0)) -+ -+/* -+** st2_vnum_u64_2: -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, #2, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_vnum_u64_2, svuint64x2_t, uint64_t, -+ svst2_vnum_u64 (p0, x0, 2, z0), -+ svst2_vnum (p0, x0, 2, z0)) -+ -+/* -+** st2_vnum_u64_14: -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, #14, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_vnum_u64_14, svuint64x2_t, uint64_t, -+ svst2_vnum_u64 (p0, x0, 14, z0), -+ svst2_vnum (p0, x0, 14, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_vnum_u64_16: -+** incb x0, all, mul #16 -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_vnum_u64_16, svuint64x2_t, uint64_t, -+ svst2_vnum_u64 (p0, x0, 16, z0), -+ svst2_vnum (p0, x0, 16, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_vnum_u64_m1: -+** decb x0 -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_vnum_u64_m1, svuint64x2_t, uint64_t, -+ svst2_vnum_u64 (p0, x0, -1, z0), -+ svst2_vnum (p0, x0, -1, z0)) -+ -+/* -+** st2_vnum_u64_m2: -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, #-2, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_vnum_u64_m2, svuint64x2_t, uint64_t, -+ svst2_vnum_u64 (p0, x0, -2, z0), -+ svst2_vnum (p0, x0, -2, z0)) -+ -+/* -+** st2_vnum_u64_m16: -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[x0, #-16, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_vnum_u64_m16, svuint64x2_t, uint64_t, -+ svst2_vnum_u64 (p0, x0, -16, z0), -+ svst2_vnum (p0, x0, -16, z0)) -+ -+/* -+** st2_vnum_u64_m18: -+** addvl (x[0-9]+), x0, #-18 -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st2_vnum_u64_m18, svuint64x2_t, uint64_t, -+ svst2_vnum_u64 (p0, x0, -18, z0), -+ svst2_vnum (p0, x0, -18, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** st2_vnum_u64_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** st2d {z0\.d(?: - |, )z1\.d}, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (st2_vnum_u64_x1, svuint64x2_t, uint64_t, -+ svst2_vnum_u64 (p0, x0, x1, z0), -+ svst2_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_u8.c -new file mode 100644 -index 000000000..e7ea977a2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st2_u8.c -@@ -0,0 +1,204 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st2_u8_base: -+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_u8_base, svuint8x2_t, uint8_t, -+ svst2_u8 (p0, x0, z0), -+ svst2 (p0, x0, z0)) -+ -+/* -+** st2_u8_index: -+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, x1\] -+** ret -+*/ -+TEST_STORE (st2_u8_index, svuint8x2_t, uint8_t, -+ svst2_u8 (p0, x0 + x1, z0), -+ svst2 (p0, x0 + x1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_u8_1: -+** incb x0 -+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_u8_1, svuint8x2_t, uint8_t, -+ svst2_u8 (p0, x0 + svcntb (), z0), -+ svst2 (p0, x0 + svcntb (), z0)) -+ -+/* -+** st2_u8_2: -+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, #2, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_u8_2, svuint8x2_t, uint8_t, -+ svst2_u8 (p0, x0 + svcntb () * 2, z0), -+ svst2 (p0, x0 + svcntb () * 2, z0)) -+ -+/* -+** st2_u8_14: -+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, #14, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_u8_14, svuint8x2_t, uint8_t, -+ svst2_u8 (p0, x0 + svcntb () * 14, z0), -+ svst2 (p0, x0 + svcntb () * 14, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_u8_16: -+** incb x0, all, mul #16 -+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_u8_16, svuint8x2_t, uint8_t, -+ svst2_u8 (p0, x0 + svcntb () * 16, z0), -+ svst2 (p0, x0 + svcntb () * 16, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_u8_m1: -+** decb x0 -+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_u8_m1, svuint8x2_t, uint8_t, -+ svst2_u8 (p0, x0 - svcntb (), z0), -+ svst2 (p0, x0 - svcntb (), z0)) -+ -+/* -+** st2_u8_m2: -+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, #-2, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_u8_m2, svuint8x2_t, uint8_t, -+ svst2_u8 (p0, x0 - svcntb () * 2, z0), -+ svst2 (p0, x0 - svcntb () * 2, z0)) -+ -+/* -+** st2_u8_m16: -+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, #-16, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_u8_m16, svuint8x2_t, uint8_t, -+ svst2_u8 (p0, x0 - svcntb () * 16, z0), -+ svst2 (p0, x0 - svcntb () * 16, z0)) -+ -+/* -+** st2_u8_m18: -+** addvl (x[0-9]+), x0, #-18 -+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st2_u8_m18, svuint8x2_t, uint8_t, -+ svst2_u8 (p0, x0 - svcntb () * 18, z0), -+ svst2 (p0, x0 - svcntb () * 18, z0)) -+ -+/* -+** st2_vnum_u8_0: -+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_vnum_u8_0, svuint8x2_t, uint8_t, -+ svst2_vnum_u8 (p0, x0, 0, z0), -+ svst2_vnum (p0, x0, 0, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_vnum_u8_1: -+** incb x0 -+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_vnum_u8_1, svuint8x2_t, uint8_t, -+ svst2_vnum_u8 (p0, x0, 1, z0), -+ svst2_vnum (p0, x0, 1, z0)) -+ -+/* -+** st2_vnum_u8_2: -+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, #2, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_vnum_u8_2, svuint8x2_t, uint8_t, -+ svst2_vnum_u8 (p0, x0, 2, z0), -+ svst2_vnum (p0, x0, 2, z0)) -+ -+/* -+** st2_vnum_u8_14: -+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, #14, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_vnum_u8_14, svuint8x2_t, uint8_t, -+ svst2_vnum_u8 (p0, x0, 14, z0), -+ svst2_vnum (p0, x0, 14, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_vnum_u8_16: -+** incb x0, all, mul #16 -+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_vnum_u8_16, svuint8x2_t, uint8_t, -+ svst2_vnum_u8 (p0, x0, 16, z0), -+ svst2_vnum (p0, x0, 16, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st2_vnum_u8_m1: -+** decb x0 -+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st2_vnum_u8_m1, svuint8x2_t, uint8_t, -+ svst2_vnum_u8 (p0, x0, -1, z0), -+ svst2_vnum (p0, x0, -1, z0)) -+ -+/* -+** st2_vnum_u8_m2: -+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, #-2, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_vnum_u8_m2, svuint8x2_t, uint8_t, -+ svst2_vnum_u8 (p0, x0, -2, z0), -+ svst2_vnum (p0, x0, -2, z0)) -+ -+/* -+** st2_vnum_u8_m16: -+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, #-16, mul vl\] -+** ret -+*/ -+TEST_STORE (st2_vnum_u8_m16, svuint8x2_t, uint8_t, -+ svst2_vnum_u8 (p0, x0, -16, z0), -+ svst2_vnum (p0, x0, -16, z0)) -+ -+/* -+** st2_vnum_u8_m18: -+** addvl (x[0-9]+), x0, #-18 -+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st2_vnum_u8_m18, svuint8x2_t, uint8_t, -+ svst2_vnum_u8 (p0, x0, -18, z0), -+ svst2_vnum (p0, x0, -18, z0)) -+ -+/* -+** st2_vnum_u8_x1: -+** cntb (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** st2b {z0\.b(?: - |, )z1\.b}, p0, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_STORE (st2_vnum_u8_x1, svuint8x2_t, uint8_t, -+ svst2_vnum_u8 (p0, x0, x1, z0), -+ svst2_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_bf16.c -new file mode 100644 -index 000000000..2f921687c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_bf16.c -@@ -0,0 +1,242 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st3_bf16_base: -+** st3h {z0\.h - z2\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_bf16_base, svbfloat16x3_t, bfloat16_t, -+ svst3_bf16 (p0, x0, z0), -+ svst3 (p0, x0, z0)) -+ -+/* -+** st3_bf16_index: -+** st3h {z0\.h - z2\.h}, p0, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_STORE (st3_bf16_index, svbfloat16x3_t, bfloat16_t, -+ svst3_bf16 (p0, x0 + x1, z0), -+ svst3 (p0, x0 + x1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_bf16_1: -+** incb x0 -+** st3h {z0\.h - z2\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_bf16_1, svbfloat16x3_t, bfloat16_t, -+ svst3_bf16 (p0, x0 + svcnth (), z0), -+ svst3 (p0, x0 + svcnth (), z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_bf16_2: -+** incb x0, all, mul #2 -+** st3h {z0\.h - z2\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_bf16_2, svbfloat16x3_t, bfloat16_t, -+ svst3_bf16 (p0, x0 + svcnth () * 2, z0), -+ svst3 (p0, x0 + svcnth () * 2, z0)) -+ -+/* -+** st3_bf16_3: -+** st3h {z0\.h - z2\.h}, p0, \[x0, #3, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_bf16_3, svbfloat16x3_t, bfloat16_t, -+ svst3_bf16 (p0, x0 + svcnth () * 3, z0), -+ svst3 (p0, x0 + svcnth () * 3, z0)) -+ -+/* -+** st3_bf16_21: -+** st3h {z0\.h - z2\.h}, p0, \[x0, #21, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_bf16_21, svbfloat16x3_t, bfloat16_t, -+ svst3_bf16 (p0, x0 + svcnth () * 21, z0), -+ svst3 (p0, x0 + svcnth () * 21, z0)) -+ -+/* -+** st3_bf16_24: -+** addvl (x[0-9]+), x0, #24 -+** st3h {z0\.h - z2\.h}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st3_bf16_24, svbfloat16x3_t, bfloat16_t, -+ svst3_bf16 (p0, x0 + svcnth () * 24, z0), -+ svst3 (p0, x0 + svcnth () * 24, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_bf16_m1: -+** decb x0 -+** st3h {z0\.h - z2\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_bf16_m1, svbfloat16x3_t, bfloat16_t, -+ svst3_bf16 (p0, x0 - svcnth (), z0), -+ svst3 (p0, x0 - svcnth (), z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_bf16_m2: -+** decb x0, all, mul #2 -+** st3h {z0\.h - z2\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_bf16_m2, svbfloat16x3_t, bfloat16_t, -+ svst3_bf16 (p0, x0 - svcnth () * 2, z0), -+ svst3 (p0, x0 - svcnth () * 2, z0)) -+ -+/* -+** st3_bf16_m3: -+** st3h {z0\.h - z2\.h}, p0, \[x0, #-3, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_bf16_m3, svbfloat16x3_t, bfloat16_t, -+ svst3_bf16 (p0, x0 - svcnth () * 3, z0), -+ svst3 (p0, x0 - svcnth () * 3, z0)) -+ -+/* -+** st3_bf16_m24: -+** st3h {z0\.h - z2\.h}, p0, \[x0, #-24, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_bf16_m24, svbfloat16x3_t, bfloat16_t, -+ svst3_bf16 (p0, x0 - svcnth () * 24, z0), -+ svst3 (p0, x0 - svcnth () * 24, z0)) -+ -+/* -+** st3_bf16_m27: -+** addvl (x[0-9]+), x0, #-27 -+** st3h {z0\.h - z2\.h}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st3_bf16_m27, svbfloat16x3_t, bfloat16_t, -+ svst3_bf16 (p0, x0 - svcnth () * 27, z0), -+ svst3 (p0, x0 - svcnth () * 27, z0)) -+ -+/* -+** st3_vnum_bf16_0: -+** st3h {z0\.h - z2\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_bf16_0, svbfloat16x3_t, bfloat16_t, -+ svst3_vnum_bf16 (p0, x0, 0, z0), -+ svst3_vnum (p0, x0, 0, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_vnum_bf16_1: -+** incb x0 -+** st3h {z0\.h - z2\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_bf16_1, svbfloat16x3_t, bfloat16_t, -+ svst3_vnum_bf16 (p0, x0, 1, z0), -+ svst3_vnum (p0, x0, 1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_vnum_bf16_2: -+** incb x0, all, mul #2 -+** st3h {z0\.h - z2\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_bf16_2, svbfloat16x3_t, bfloat16_t, -+ svst3_vnum_bf16 (p0, x0, 2, z0), -+ svst3_vnum (p0, x0, 2, z0)) -+ -+/* -+** st3_vnum_bf16_3: -+** st3h {z0\.h - z2\.h}, p0, \[x0, #3, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_vnum_bf16_3, svbfloat16x3_t, bfloat16_t, -+ svst3_vnum_bf16 (p0, x0, 3, z0), -+ svst3_vnum (p0, x0, 3, z0)) -+ -+/* -+** st3_vnum_bf16_21: -+** st3h {z0\.h - z2\.h}, p0, \[x0, #21, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_vnum_bf16_21, svbfloat16x3_t, bfloat16_t, -+ svst3_vnum_bf16 (p0, x0, 21, z0), -+ svst3_vnum (p0, x0, 21, z0)) -+ -+/* -+** st3_vnum_bf16_24: -+** addvl (x[0-9]+), x0, #24 -+** st3h {z0\.h - z2\.h}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st3_vnum_bf16_24, svbfloat16x3_t, bfloat16_t, -+ svst3_vnum_bf16 (p0, x0, 24, z0), -+ svst3_vnum (p0, x0, 24, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_vnum_bf16_m1: -+** decb x0 -+** st3h {z0\.h - z2\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_bf16_m1, svbfloat16x3_t, bfloat16_t, -+ svst3_vnum_bf16 (p0, x0, -1, z0), -+ svst3_vnum (p0, x0, -1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_vnum_bf16_m2: -+** decb x0, all, mul #2 -+** st3h {z0\.h - z2\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_bf16_m2, svbfloat16x3_t, bfloat16_t, -+ svst3_vnum_bf16 (p0, x0, -2, z0), -+ svst3_vnum (p0, x0, -2, z0)) -+ -+/* -+** st3_vnum_bf16_m3: -+** st3h {z0\.h - z2\.h}, p0, \[x0, #-3, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_vnum_bf16_m3, svbfloat16x3_t, bfloat16_t, -+ svst3_vnum_bf16 (p0, x0, -3, z0), -+ svst3_vnum (p0, x0, -3, z0)) -+ -+/* -+** st3_vnum_bf16_m24: -+** st3h {z0\.h - z2\.h}, p0, \[x0, #-24, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_vnum_bf16_m24, svbfloat16x3_t, bfloat16_t, -+ svst3_vnum_bf16 (p0, x0, -24, z0), -+ svst3_vnum (p0, x0, -24, z0)) -+ -+/* -+** st3_vnum_bf16_m27: -+** addvl (x[0-9]+), x0, #-27 -+** st3h {z0\.h - z2\.h}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st3_vnum_bf16_m27, svbfloat16x3_t, bfloat16_t, -+ svst3_vnum_bf16 (p0, x0, -27, z0), -+ svst3_vnum (p0, x0, -27, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** st3_vnum_bf16_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** st3h {z0\.h - z2\.h}, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (st3_vnum_bf16_x1, svbfloat16x3_t, bfloat16_t, -+ svst3_vnum_bf16 (p0, x0, x1, z0), -+ svst3_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_f16.c -new file mode 100644 -index 000000000..388eb3708 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_f16.c -@@ -0,0 +1,242 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st3_f16_base: -+** st3h {z0\.h - z2\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_f16_base, svfloat16x3_t, float16_t, -+ svst3_f16 (p0, x0, z0), -+ svst3 (p0, x0, z0)) -+ -+/* -+** st3_f16_index: -+** st3h {z0\.h - z2\.h}, p0, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_STORE (st3_f16_index, svfloat16x3_t, float16_t, -+ svst3_f16 (p0, x0 + x1, z0), -+ svst3 (p0, x0 + x1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_f16_1: -+** incb x0 -+** st3h {z0\.h - z2\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_f16_1, svfloat16x3_t, float16_t, -+ svst3_f16 (p0, x0 + svcnth (), z0), -+ svst3 (p0, x0 + svcnth (), z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_f16_2: -+** incb x0, all, mul #2 -+** st3h {z0\.h - z2\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_f16_2, svfloat16x3_t, float16_t, -+ svst3_f16 (p0, x0 + svcnth () * 2, z0), -+ svst3 (p0, x0 + svcnth () * 2, z0)) -+ -+/* -+** st3_f16_3: -+** st3h {z0\.h - z2\.h}, p0, \[x0, #3, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_f16_3, svfloat16x3_t, float16_t, -+ svst3_f16 (p0, x0 + svcnth () * 3, z0), -+ svst3 (p0, x0 + svcnth () * 3, z0)) -+ -+/* -+** st3_f16_21: -+** st3h {z0\.h - z2\.h}, p0, \[x0, #21, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_f16_21, svfloat16x3_t, float16_t, -+ svst3_f16 (p0, x0 + svcnth () * 21, z0), -+ svst3 (p0, x0 + svcnth () * 21, z0)) -+ -+/* -+** st3_f16_24: -+** addvl (x[0-9]+), x0, #24 -+** st3h {z0\.h - z2\.h}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st3_f16_24, svfloat16x3_t, float16_t, -+ svst3_f16 (p0, x0 + svcnth () * 24, z0), -+ svst3 (p0, x0 + svcnth () * 24, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_f16_m1: -+** decb x0 -+** st3h {z0\.h - z2\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_f16_m1, svfloat16x3_t, float16_t, -+ svst3_f16 (p0, x0 - svcnth (), z0), -+ svst3 (p0, x0 - svcnth (), z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_f16_m2: -+** decb x0, all, mul #2 -+** st3h {z0\.h - z2\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_f16_m2, svfloat16x3_t, float16_t, -+ svst3_f16 (p0, x0 - svcnth () * 2, z0), -+ svst3 (p0, x0 - svcnth () * 2, z0)) -+ -+/* -+** st3_f16_m3: -+** st3h {z0\.h - z2\.h}, p0, \[x0, #-3, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_f16_m3, svfloat16x3_t, float16_t, -+ svst3_f16 (p0, x0 - svcnth () * 3, z0), -+ svst3 (p0, x0 - svcnth () * 3, z0)) -+ -+/* -+** st3_f16_m24: -+** st3h {z0\.h - z2\.h}, p0, \[x0, #-24, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_f16_m24, svfloat16x3_t, float16_t, -+ svst3_f16 (p0, x0 - svcnth () * 24, z0), -+ svst3 (p0, x0 - svcnth () * 24, z0)) -+ -+/* -+** st3_f16_m27: -+** addvl (x[0-9]+), x0, #-27 -+** st3h {z0\.h - z2\.h}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st3_f16_m27, svfloat16x3_t, float16_t, -+ svst3_f16 (p0, x0 - svcnth () * 27, z0), -+ svst3 (p0, x0 - svcnth () * 27, z0)) -+ -+/* -+** st3_vnum_f16_0: -+** st3h {z0\.h - z2\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_f16_0, svfloat16x3_t, float16_t, -+ svst3_vnum_f16 (p0, x0, 0, z0), -+ svst3_vnum (p0, x0, 0, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_vnum_f16_1: -+** incb x0 -+** st3h {z0\.h - z2\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_f16_1, svfloat16x3_t, float16_t, -+ svst3_vnum_f16 (p0, x0, 1, z0), -+ svst3_vnum (p0, x0, 1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_vnum_f16_2: -+** incb x0, all, mul #2 -+** st3h {z0\.h - z2\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_f16_2, svfloat16x3_t, float16_t, -+ svst3_vnum_f16 (p0, x0, 2, z0), -+ svst3_vnum (p0, x0, 2, z0)) -+ -+/* -+** st3_vnum_f16_3: -+** st3h {z0\.h - z2\.h}, p0, \[x0, #3, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_vnum_f16_3, svfloat16x3_t, float16_t, -+ svst3_vnum_f16 (p0, x0, 3, z0), -+ svst3_vnum (p0, x0, 3, z0)) -+ -+/* -+** st3_vnum_f16_21: -+** st3h {z0\.h - z2\.h}, p0, \[x0, #21, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_vnum_f16_21, svfloat16x3_t, float16_t, -+ svst3_vnum_f16 (p0, x0, 21, z0), -+ svst3_vnum (p0, x0, 21, z0)) -+ -+/* -+** st3_vnum_f16_24: -+** addvl (x[0-9]+), x0, #24 -+** st3h {z0\.h - z2\.h}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st3_vnum_f16_24, svfloat16x3_t, float16_t, -+ svst3_vnum_f16 (p0, x0, 24, z0), -+ svst3_vnum (p0, x0, 24, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_vnum_f16_m1: -+** decb x0 -+** st3h {z0\.h - z2\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_f16_m1, svfloat16x3_t, float16_t, -+ svst3_vnum_f16 (p0, x0, -1, z0), -+ svst3_vnum (p0, x0, -1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_vnum_f16_m2: -+** decb x0, all, mul #2 -+** st3h {z0\.h - z2\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_f16_m2, svfloat16x3_t, float16_t, -+ svst3_vnum_f16 (p0, x0, -2, z0), -+ svst3_vnum (p0, x0, -2, z0)) -+ -+/* -+** st3_vnum_f16_m3: -+** st3h {z0\.h - z2\.h}, p0, \[x0, #-3, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_vnum_f16_m3, svfloat16x3_t, float16_t, -+ svst3_vnum_f16 (p0, x0, -3, z0), -+ svst3_vnum (p0, x0, -3, z0)) -+ -+/* -+** st3_vnum_f16_m24: -+** st3h {z0\.h - z2\.h}, p0, \[x0, #-24, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_vnum_f16_m24, svfloat16x3_t, float16_t, -+ svst3_vnum_f16 (p0, x0, -24, z0), -+ svst3_vnum (p0, x0, -24, z0)) -+ -+/* -+** st3_vnum_f16_m27: -+** addvl (x[0-9]+), x0, #-27 -+** st3h {z0\.h - z2\.h}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st3_vnum_f16_m27, svfloat16x3_t, float16_t, -+ svst3_vnum_f16 (p0, x0, -27, z0), -+ svst3_vnum (p0, x0, -27, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** st3_vnum_f16_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** st3h {z0\.h - z2\.h}, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (st3_vnum_f16_x1, svfloat16x3_t, float16_t, -+ svst3_vnum_f16 (p0, x0, x1, z0), -+ svst3_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_f32.c -new file mode 100644 -index 000000000..a5e3bdb45 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_f32.c -@@ -0,0 +1,242 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st3_f32_base: -+** st3w {z0\.s - z2\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_f32_base, svfloat32x3_t, float32_t, -+ svst3_f32 (p0, x0, z0), -+ svst3 (p0, x0, z0)) -+ -+/* -+** st3_f32_index: -+** st3w {z0\.s - z2\.s}, p0, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_STORE (st3_f32_index, svfloat32x3_t, float32_t, -+ svst3_f32 (p0, x0 + x1, z0), -+ svst3 (p0, x0 + x1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_f32_1: -+** incb x0 -+** st3w {z0\.s - z2\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_f32_1, svfloat32x3_t, float32_t, -+ svst3_f32 (p0, x0 + svcntw (), z0), -+ svst3 (p0, x0 + svcntw (), z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_f32_2: -+** incb x0, all, mul #2 -+** st3w {z0\.s - z2\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_f32_2, svfloat32x3_t, float32_t, -+ svst3_f32 (p0, x0 + svcntw () * 2, z0), -+ svst3 (p0, x0 + svcntw () * 2, z0)) -+ -+/* -+** st3_f32_3: -+** st3w {z0\.s - z2\.s}, p0, \[x0, #3, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_f32_3, svfloat32x3_t, float32_t, -+ svst3_f32 (p0, x0 + svcntw () * 3, z0), -+ svst3 (p0, x0 + svcntw () * 3, z0)) -+ -+/* -+** st3_f32_21: -+** st3w {z0\.s - z2\.s}, p0, \[x0, #21, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_f32_21, svfloat32x3_t, float32_t, -+ svst3_f32 (p0, x0 + svcntw () * 21, z0), -+ svst3 (p0, x0 + svcntw () * 21, z0)) -+ -+/* -+** st3_f32_24: -+** addvl (x[0-9]+), x0, #24 -+** st3w {z0\.s - z2\.s}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st3_f32_24, svfloat32x3_t, float32_t, -+ svst3_f32 (p0, x0 + svcntw () * 24, z0), -+ svst3 (p0, x0 + svcntw () * 24, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_f32_m1: -+** decb x0 -+** st3w {z0\.s - z2\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_f32_m1, svfloat32x3_t, float32_t, -+ svst3_f32 (p0, x0 - svcntw (), z0), -+ svst3 (p0, x0 - svcntw (), z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_f32_m2: -+** decb x0, all, mul #2 -+** st3w {z0\.s - z2\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_f32_m2, svfloat32x3_t, float32_t, -+ svst3_f32 (p0, x0 - svcntw () * 2, z0), -+ svst3 (p0, x0 - svcntw () * 2, z0)) -+ -+/* -+** st3_f32_m3: -+** st3w {z0\.s - z2\.s}, p0, \[x0, #-3, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_f32_m3, svfloat32x3_t, float32_t, -+ svst3_f32 (p0, x0 - svcntw () * 3, z0), -+ svst3 (p0, x0 - svcntw () * 3, z0)) -+ -+/* -+** st3_f32_m24: -+** st3w {z0\.s - z2\.s}, p0, \[x0, #-24, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_f32_m24, svfloat32x3_t, float32_t, -+ svst3_f32 (p0, x0 - svcntw () * 24, z0), -+ svst3 (p0, x0 - svcntw () * 24, z0)) -+ -+/* -+** st3_f32_m27: -+** addvl (x[0-9]+), x0, #-27 -+** st3w {z0\.s - z2\.s}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st3_f32_m27, svfloat32x3_t, float32_t, -+ svst3_f32 (p0, x0 - svcntw () * 27, z0), -+ svst3 (p0, x0 - svcntw () * 27, z0)) -+ -+/* -+** st3_vnum_f32_0: -+** st3w {z0\.s - z2\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_f32_0, svfloat32x3_t, float32_t, -+ svst3_vnum_f32 (p0, x0, 0, z0), -+ svst3_vnum (p0, x0, 0, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_vnum_f32_1: -+** incb x0 -+** st3w {z0\.s - z2\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_f32_1, svfloat32x3_t, float32_t, -+ svst3_vnum_f32 (p0, x0, 1, z0), -+ svst3_vnum (p0, x0, 1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_vnum_f32_2: -+** incb x0, all, mul #2 -+** st3w {z0\.s - z2\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_f32_2, svfloat32x3_t, float32_t, -+ svst3_vnum_f32 (p0, x0, 2, z0), -+ svst3_vnum (p0, x0, 2, z0)) -+ -+/* -+** st3_vnum_f32_3: -+** st3w {z0\.s - z2\.s}, p0, \[x0, #3, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_vnum_f32_3, svfloat32x3_t, float32_t, -+ svst3_vnum_f32 (p0, x0, 3, z0), -+ svst3_vnum (p0, x0, 3, z0)) -+ -+/* -+** st3_vnum_f32_21: -+** st3w {z0\.s - z2\.s}, p0, \[x0, #21, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_vnum_f32_21, svfloat32x3_t, float32_t, -+ svst3_vnum_f32 (p0, x0, 21, z0), -+ svst3_vnum (p0, x0, 21, z0)) -+ -+/* -+** st3_vnum_f32_24: -+** addvl (x[0-9]+), x0, #24 -+** st3w {z0\.s - z2\.s}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st3_vnum_f32_24, svfloat32x3_t, float32_t, -+ svst3_vnum_f32 (p0, x0, 24, z0), -+ svst3_vnum (p0, x0, 24, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_vnum_f32_m1: -+** decb x0 -+** st3w {z0\.s - z2\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_f32_m1, svfloat32x3_t, float32_t, -+ svst3_vnum_f32 (p0, x0, -1, z0), -+ svst3_vnum (p0, x0, -1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_vnum_f32_m2: -+** decb x0, all, mul #2 -+** st3w {z0\.s - z2\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_f32_m2, svfloat32x3_t, float32_t, -+ svst3_vnum_f32 (p0, x0, -2, z0), -+ svst3_vnum (p0, x0, -2, z0)) -+ -+/* -+** st3_vnum_f32_m3: -+** st3w {z0\.s - z2\.s}, p0, \[x0, #-3, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_vnum_f32_m3, svfloat32x3_t, float32_t, -+ svst3_vnum_f32 (p0, x0, -3, z0), -+ svst3_vnum (p0, x0, -3, z0)) -+ -+/* -+** st3_vnum_f32_m24: -+** st3w {z0\.s - z2\.s}, p0, \[x0, #-24, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_vnum_f32_m24, svfloat32x3_t, float32_t, -+ svst3_vnum_f32 (p0, x0, -24, z0), -+ svst3_vnum (p0, x0, -24, z0)) -+ -+/* -+** st3_vnum_f32_m27: -+** addvl (x[0-9]+), x0, #-27 -+** st3w {z0\.s - z2\.s}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st3_vnum_f32_m27, svfloat32x3_t, float32_t, -+ svst3_vnum_f32 (p0, x0, -27, z0), -+ svst3_vnum (p0, x0, -27, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** st3_vnum_f32_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** st3w {z0\.s - z2\.s}, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (st3_vnum_f32_x1, svfloat32x3_t, float32_t, -+ svst3_vnum_f32 (p0, x0, x1, z0), -+ svst3_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_f64.c -new file mode 100644 -index 000000000..30407da8a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_f64.c -@@ -0,0 +1,242 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st3_f64_base: -+** st3d {z0\.d - z2\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_f64_base, svfloat64x3_t, float64_t, -+ svst3_f64 (p0, x0, z0), -+ svst3 (p0, x0, z0)) -+ -+/* -+** st3_f64_index: -+** st3d {z0\.d - z2\.d}, p0, \[x0, x1, lsl 3\] -+** ret -+*/ -+TEST_STORE (st3_f64_index, svfloat64x3_t, float64_t, -+ svst3_f64 (p0, x0 + x1, z0), -+ svst3 (p0, x0 + x1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_f64_1: -+** incb x0 -+** st3d {z0\.d - z2\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_f64_1, svfloat64x3_t, float64_t, -+ svst3_f64 (p0, x0 + svcntd (), z0), -+ svst3 (p0, x0 + svcntd (), z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_f64_2: -+** incb x0, all, mul #2 -+** st3d {z0\.d - z2\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_f64_2, svfloat64x3_t, float64_t, -+ svst3_f64 (p0, x0 + svcntd () * 2, z0), -+ svst3 (p0, x0 + svcntd () * 2, z0)) -+ -+/* -+** st3_f64_3: -+** st3d {z0\.d - z2\.d}, p0, \[x0, #3, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_f64_3, svfloat64x3_t, float64_t, -+ svst3_f64 (p0, x0 + svcntd () * 3, z0), -+ svst3 (p0, x0 + svcntd () * 3, z0)) -+ -+/* -+** st3_f64_21: -+** st3d {z0\.d - z2\.d}, p0, \[x0, #21, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_f64_21, svfloat64x3_t, float64_t, -+ svst3_f64 (p0, x0 + svcntd () * 21, z0), -+ svst3 (p0, x0 + svcntd () * 21, z0)) -+ -+/* -+** st3_f64_24: -+** addvl (x[0-9]+), x0, #24 -+** st3d {z0\.d - z2\.d}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st3_f64_24, svfloat64x3_t, float64_t, -+ svst3_f64 (p0, x0 + svcntd () * 24, z0), -+ svst3 (p0, x0 + svcntd () * 24, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_f64_m1: -+** decb x0 -+** st3d {z0\.d - z2\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_f64_m1, svfloat64x3_t, float64_t, -+ svst3_f64 (p0, x0 - svcntd (), z0), -+ svst3 (p0, x0 - svcntd (), z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_f64_m2: -+** decb x0, all, mul #2 -+** st3d {z0\.d - z2\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_f64_m2, svfloat64x3_t, float64_t, -+ svst3_f64 (p0, x0 - svcntd () * 2, z0), -+ svst3 (p0, x0 - svcntd () * 2, z0)) -+ -+/* -+** st3_f64_m3: -+** st3d {z0\.d - z2\.d}, p0, \[x0, #-3, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_f64_m3, svfloat64x3_t, float64_t, -+ svst3_f64 (p0, x0 - svcntd () * 3, z0), -+ svst3 (p0, x0 - svcntd () * 3, z0)) -+ -+/* -+** st3_f64_m24: -+** st3d {z0\.d - z2\.d}, p0, \[x0, #-24, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_f64_m24, svfloat64x3_t, float64_t, -+ svst3_f64 (p0, x0 - svcntd () * 24, z0), -+ svst3 (p0, x0 - svcntd () * 24, z0)) -+ -+/* -+** st3_f64_m27: -+** addvl (x[0-9]+), x0, #-27 -+** st3d {z0\.d - z2\.d}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st3_f64_m27, svfloat64x3_t, float64_t, -+ svst3_f64 (p0, x0 - svcntd () * 27, z0), -+ svst3 (p0, x0 - svcntd () * 27, z0)) -+ -+/* -+** st3_vnum_f64_0: -+** st3d {z0\.d - z2\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_f64_0, svfloat64x3_t, float64_t, -+ svst3_vnum_f64 (p0, x0, 0, z0), -+ svst3_vnum (p0, x0, 0, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_vnum_f64_1: -+** incb x0 -+** st3d {z0\.d - z2\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_f64_1, svfloat64x3_t, float64_t, -+ svst3_vnum_f64 (p0, x0, 1, z0), -+ svst3_vnum (p0, x0, 1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_vnum_f64_2: -+** incb x0, all, mul #2 -+** st3d {z0\.d - z2\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_f64_2, svfloat64x3_t, float64_t, -+ svst3_vnum_f64 (p0, x0, 2, z0), -+ svst3_vnum (p0, x0, 2, z0)) -+ -+/* -+** st3_vnum_f64_3: -+** st3d {z0\.d - z2\.d}, p0, \[x0, #3, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_vnum_f64_3, svfloat64x3_t, float64_t, -+ svst3_vnum_f64 (p0, x0, 3, z0), -+ svst3_vnum (p0, x0, 3, z0)) -+ -+/* -+** st3_vnum_f64_21: -+** st3d {z0\.d - z2\.d}, p0, \[x0, #21, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_vnum_f64_21, svfloat64x3_t, float64_t, -+ svst3_vnum_f64 (p0, x0, 21, z0), -+ svst3_vnum (p0, x0, 21, z0)) -+ -+/* -+** st3_vnum_f64_24: -+** addvl (x[0-9]+), x0, #24 -+** st3d {z0\.d - z2\.d}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st3_vnum_f64_24, svfloat64x3_t, float64_t, -+ svst3_vnum_f64 (p0, x0, 24, z0), -+ svst3_vnum (p0, x0, 24, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_vnum_f64_m1: -+** decb x0 -+** st3d {z0\.d - z2\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_f64_m1, svfloat64x3_t, float64_t, -+ svst3_vnum_f64 (p0, x0, -1, z0), -+ svst3_vnum (p0, x0, -1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_vnum_f64_m2: -+** decb x0, all, mul #2 -+** st3d {z0\.d - z2\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_f64_m2, svfloat64x3_t, float64_t, -+ svst3_vnum_f64 (p0, x0, -2, z0), -+ svst3_vnum (p0, x0, -2, z0)) -+ -+/* -+** st3_vnum_f64_m3: -+** st3d {z0\.d - z2\.d}, p0, \[x0, #-3, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_vnum_f64_m3, svfloat64x3_t, float64_t, -+ svst3_vnum_f64 (p0, x0, -3, z0), -+ svst3_vnum (p0, x0, -3, z0)) -+ -+/* -+** st3_vnum_f64_m24: -+** st3d {z0\.d - z2\.d}, p0, \[x0, #-24, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_vnum_f64_m24, svfloat64x3_t, float64_t, -+ svst3_vnum_f64 (p0, x0, -24, z0), -+ svst3_vnum (p0, x0, -24, z0)) -+ -+/* -+** st3_vnum_f64_m27: -+** addvl (x[0-9]+), x0, #-27 -+** st3d {z0\.d - z2\.d}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st3_vnum_f64_m27, svfloat64x3_t, float64_t, -+ svst3_vnum_f64 (p0, x0, -27, z0), -+ svst3_vnum (p0, x0, -27, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** st3_vnum_f64_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** st3d {z0\.d - z2\.d}, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (st3_vnum_f64_x1, svfloat64x3_t, float64_t, -+ svst3_vnum_f64 (p0, x0, x1, z0), -+ svst3_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_s16.c -new file mode 100644 -index 000000000..a4a1109c5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_s16.c -@@ -0,0 +1,242 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st3_s16_base: -+** st3h {z0\.h - z2\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_s16_base, svint16x3_t, int16_t, -+ svst3_s16 (p0, x0, z0), -+ svst3 (p0, x0, z0)) -+ -+/* -+** st3_s16_index: -+** st3h {z0\.h - z2\.h}, p0, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_STORE (st3_s16_index, svint16x3_t, int16_t, -+ svst3_s16 (p0, x0 + x1, z0), -+ svst3 (p0, x0 + x1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_s16_1: -+** incb x0 -+** st3h {z0\.h - z2\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_s16_1, svint16x3_t, int16_t, -+ svst3_s16 (p0, x0 + svcnth (), z0), -+ svst3 (p0, x0 + svcnth (), z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_s16_2: -+** incb x0, all, mul #2 -+** st3h {z0\.h - z2\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_s16_2, svint16x3_t, int16_t, -+ svst3_s16 (p0, x0 + svcnth () * 2, z0), -+ svst3 (p0, x0 + svcnth () * 2, z0)) -+ -+/* -+** st3_s16_3: -+** st3h {z0\.h - z2\.h}, p0, \[x0, #3, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_s16_3, svint16x3_t, int16_t, -+ svst3_s16 (p0, x0 + svcnth () * 3, z0), -+ svst3 (p0, x0 + svcnth () * 3, z0)) -+ -+/* -+** st3_s16_21: -+** st3h {z0\.h - z2\.h}, p0, \[x0, #21, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_s16_21, svint16x3_t, int16_t, -+ svst3_s16 (p0, x0 + svcnth () * 21, z0), -+ svst3 (p0, x0 + svcnth () * 21, z0)) -+ -+/* -+** st3_s16_24: -+** addvl (x[0-9]+), x0, #24 -+** st3h {z0\.h - z2\.h}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st3_s16_24, svint16x3_t, int16_t, -+ svst3_s16 (p0, x0 + svcnth () * 24, z0), -+ svst3 (p0, x0 + svcnth () * 24, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_s16_m1: -+** decb x0 -+** st3h {z0\.h - z2\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_s16_m1, svint16x3_t, int16_t, -+ svst3_s16 (p0, x0 - svcnth (), z0), -+ svst3 (p0, x0 - svcnth (), z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_s16_m2: -+** decb x0, all, mul #2 -+** st3h {z0\.h - z2\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_s16_m2, svint16x3_t, int16_t, -+ svst3_s16 (p0, x0 - svcnth () * 2, z0), -+ svst3 (p0, x0 - svcnth () * 2, z0)) -+ -+/* -+** st3_s16_m3: -+** st3h {z0\.h - z2\.h}, p0, \[x0, #-3, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_s16_m3, svint16x3_t, int16_t, -+ svst3_s16 (p0, x0 - svcnth () * 3, z0), -+ svst3 (p0, x0 - svcnth () * 3, z0)) -+ -+/* -+** st3_s16_m24: -+** st3h {z0\.h - z2\.h}, p0, \[x0, #-24, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_s16_m24, svint16x3_t, int16_t, -+ svst3_s16 (p0, x0 - svcnth () * 24, z0), -+ svst3 (p0, x0 - svcnth () * 24, z0)) -+ -+/* -+** st3_s16_m27: -+** addvl (x[0-9]+), x0, #-27 -+** st3h {z0\.h - z2\.h}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st3_s16_m27, svint16x3_t, int16_t, -+ svst3_s16 (p0, x0 - svcnth () * 27, z0), -+ svst3 (p0, x0 - svcnth () * 27, z0)) -+ -+/* -+** st3_vnum_s16_0: -+** st3h {z0\.h - z2\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_s16_0, svint16x3_t, int16_t, -+ svst3_vnum_s16 (p0, x0, 0, z0), -+ svst3_vnum (p0, x0, 0, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_vnum_s16_1: -+** incb x0 -+** st3h {z0\.h - z2\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_s16_1, svint16x3_t, int16_t, -+ svst3_vnum_s16 (p0, x0, 1, z0), -+ svst3_vnum (p0, x0, 1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_vnum_s16_2: -+** incb x0, all, mul #2 -+** st3h {z0\.h - z2\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_s16_2, svint16x3_t, int16_t, -+ svst3_vnum_s16 (p0, x0, 2, z0), -+ svst3_vnum (p0, x0, 2, z0)) -+ -+/* -+** st3_vnum_s16_3: -+** st3h {z0\.h - z2\.h}, p0, \[x0, #3, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_vnum_s16_3, svint16x3_t, int16_t, -+ svst3_vnum_s16 (p0, x0, 3, z0), -+ svst3_vnum (p0, x0, 3, z0)) -+ -+/* -+** st3_vnum_s16_21: -+** st3h {z0\.h - z2\.h}, p0, \[x0, #21, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_vnum_s16_21, svint16x3_t, int16_t, -+ svst3_vnum_s16 (p0, x0, 21, z0), -+ svst3_vnum (p0, x0, 21, z0)) -+ -+/* -+** st3_vnum_s16_24: -+** addvl (x[0-9]+), x0, #24 -+** st3h {z0\.h - z2\.h}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st3_vnum_s16_24, svint16x3_t, int16_t, -+ svst3_vnum_s16 (p0, x0, 24, z0), -+ svst3_vnum (p0, x0, 24, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_vnum_s16_m1: -+** decb x0 -+** st3h {z0\.h - z2\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_s16_m1, svint16x3_t, int16_t, -+ svst3_vnum_s16 (p0, x0, -1, z0), -+ svst3_vnum (p0, x0, -1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_vnum_s16_m2: -+** decb x0, all, mul #2 -+** st3h {z0\.h - z2\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_s16_m2, svint16x3_t, int16_t, -+ svst3_vnum_s16 (p0, x0, -2, z0), -+ svst3_vnum (p0, x0, -2, z0)) -+ -+/* -+** st3_vnum_s16_m3: -+** st3h {z0\.h - z2\.h}, p0, \[x0, #-3, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_vnum_s16_m3, svint16x3_t, int16_t, -+ svst3_vnum_s16 (p0, x0, -3, z0), -+ svst3_vnum (p0, x0, -3, z0)) -+ -+/* -+** st3_vnum_s16_m24: -+** st3h {z0\.h - z2\.h}, p0, \[x0, #-24, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_vnum_s16_m24, svint16x3_t, int16_t, -+ svst3_vnum_s16 (p0, x0, -24, z0), -+ svst3_vnum (p0, x0, -24, z0)) -+ -+/* -+** st3_vnum_s16_m27: -+** addvl (x[0-9]+), x0, #-27 -+** st3h {z0\.h - z2\.h}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st3_vnum_s16_m27, svint16x3_t, int16_t, -+ svst3_vnum_s16 (p0, x0, -27, z0), -+ svst3_vnum (p0, x0, -27, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** st3_vnum_s16_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** st3h {z0\.h - z2\.h}, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (st3_vnum_s16_x1, svint16x3_t, int16_t, -+ svst3_vnum_s16 (p0, x0, x1, z0), -+ svst3_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_s32.c -new file mode 100644 -index 000000000..2442d9b28 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_s32.c -@@ -0,0 +1,242 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st3_s32_base: -+** st3w {z0\.s - z2\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_s32_base, svint32x3_t, int32_t, -+ svst3_s32 (p0, x0, z0), -+ svst3 (p0, x0, z0)) -+ -+/* -+** st3_s32_index: -+** st3w {z0\.s - z2\.s}, p0, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_STORE (st3_s32_index, svint32x3_t, int32_t, -+ svst3_s32 (p0, x0 + x1, z0), -+ svst3 (p0, x0 + x1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_s32_1: -+** incb x0 -+** st3w {z0\.s - z2\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_s32_1, svint32x3_t, int32_t, -+ svst3_s32 (p0, x0 + svcntw (), z0), -+ svst3 (p0, x0 + svcntw (), z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_s32_2: -+** incb x0, all, mul #2 -+** st3w {z0\.s - z2\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_s32_2, svint32x3_t, int32_t, -+ svst3_s32 (p0, x0 + svcntw () * 2, z0), -+ svst3 (p0, x0 + svcntw () * 2, z0)) -+ -+/* -+** st3_s32_3: -+** st3w {z0\.s - z2\.s}, p0, \[x0, #3, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_s32_3, svint32x3_t, int32_t, -+ svst3_s32 (p0, x0 + svcntw () * 3, z0), -+ svst3 (p0, x0 + svcntw () * 3, z0)) -+ -+/* -+** st3_s32_21: -+** st3w {z0\.s - z2\.s}, p0, \[x0, #21, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_s32_21, svint32x3_t, int32_t, -+ svst3_s32 (p0, x0 + svcntw () * 21, z0), -+ svst3 (p0, x0 + svcntw () * 21, z0)) -+ -+/* -+** st3_s32_24: -+** addvl (x[0-9]+), x0, #24 -+** st3w {z0\.s - z2\.s}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st3_s32_24, svint32x3_t, int32_t, -+ svst3_s32 (p0, x0 + svcntw () * 24, z0), -+ svst3 (p0, x0 + svcntw () * 24, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_s32_m1: -+** decb x0 -+** st3w {z0\.s - z2\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_s32_m1, svint32x3_t, int32_t, -+ svst3_s32 (p0, x0 - svcntw (), z0), -+ svst3 (p0, x0 - svcntw (), z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_s32_m2: -+** decb x0, all, mul #2 -+** st3w {z0\.s - z2\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_s32_m2, svint32x3_t, int32_t, -+ svst3_s32 (p0, x0 - svcntw () * 2, z0), -+ svst3 (p0, x0 - svcntw () * 2, z0)) -+ -+/* -+** st3_s32_m3: -+** st3w {z0\.s - z2\.s}, p0, \[x0, #-3, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_s32_m3, svint32x3_t, int32_t, -+ svst3_s32 (p0, x0 - svcntw () * 3, z0), -+ svst3 (p0, x0 - svcntw () * 3, z0)) -+ -+/* -+** st3_s32_m24: -+** st3w {z0\.s - z2\.s}, p0, \[x0, #-24, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_s32_m24, svint32x3_t, int32_t, -+ svst3_s32 (p0, x0 - svcntw () * 24, z0), -+ svst3 (p0, x0 - svcntw () * 24, z0)) -+ -+/* -+** st3_s32_m27: -+** addvl (x[0-9]+), x0, #-27 -+** st3w {z0\.s - z2\.s}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st3_s32_m27, svint32x3_t, int32_t, -+ svst3_s32 (p0, x0 - svcntw () * 27, z0), -+ svst3 (p0, x0 - svcntw () * 27, z0)) -+ -+/* -+** st3_vnum_s32_0: -+** st3w {z0\.s - z2\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_s32_0, svint32x3_t, int32_t, -+ svst3_vnum_s32 (p0, x0, 0, z0), -+ svst3_vnum (p0, x0, 0, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_vnum_s32_1: -+** incb x0 -+** st3w {z0\.s - z2\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_s32_1, svint32x3_t, int32_t, -+ svst3_vnum_s32 (p0, x0, 1, z0), -+ svst3_vnum (p0, x0, 1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_vnum_s32_2: -+** incb x0, all, mul #2 -+** st3w {z0\.s - z2\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_s32_2, svint32x3_t, int32_t, -+ svst3_vnum_s32 (p0, x0, 2, z0), -+ svst3_vnum (p0, x0, 2, z0)) -+ -+/* -+** st3_vnum_s32_3: -+** st3w {z0\.s - z2\.s}, p0, \[x0, #3, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_vnum_s32_3, svint32x3_t, int32_t, -+ svst3_vnum_s32 (p0, x0, 3, z0), -+ svst3_vnum (p0, x0, 3, z0)) -+ -+/* -+** st3_vnum_s32_21: -+** st3w {z0\.s - z2\.s}, p0, \[x0, #21, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_vnum_s32_21, svint32x3_t, int32_t, -+ svst3_vnum_s32 (p0, x0, 21, z0), -+ svst3_vnum (p0, x0, 21, z0)) -+ -+/* -+** st3_vnum_s32_24: -+** addvl (x[0-9]+), x0, #24 -+** st3w {z0\.s - z2\.s}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st3_vnum_s32_24, svint32x3_t, int32_t, -+ svst3_vnum_s32 (p0, x0, 24, z0), -+ svst3_vnum (p0, x0, 24, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_vnum_s32_m1: -+** decb x0 -+** st3w {z0\.s - z2\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_s32_m1, svint32x3_t, int32_t, -+ svst3_vnum_s32 (p0, x0, -1, z0), -+ svst3_vnum (p0, x0, -1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_vnum_s32_m2: -+** decb x0, all, mul #2 -+** st3w {z0\.s - z2\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_s32_m2, svint32x3_t, int32_t, -+ svst3_vnum_s32 (p0, x0, -2, z0), -+ svst3_vnum (p0, x0, -2, z0)) -+ -+/* -+** st3_vnum_s32_m3: -+** st3w {z0\.s - z2\.s}, p0, \[x0, #-3, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_vnum_s32_m3, svint32x3_t, int32_t, -+ svst3_vnum_s32 (p0, x0, -3, z0), -+ svst3_vnum (p0, x0, -3, z0)) -+ -+/* -+** st3_vnum_s32_m24: -+** st3w {z0\.s - z2\.s}, p0, \[x0, #-24, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_vnum_s32_m24, svint32x3_t, int32_t, -+ svst3_vnum_s32 (p0, x0, -24, z0), -+ svst3_vnum (p0, x0, -24, z0)) -+ -+/* -+** st3_vnum_s32_m27: -+** addvl (x[0-9]+), x0, #-27 -+** st3w {z0\.s - z2\.s}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st3_vnum_s32_m27, svint32x3_t, int32_t, -+ svst3_vnum_s32 (p0, x0, -27, z0), -+ svst3_vnum (p0, x0, -27, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** st3_vnum_s32_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** st3w {z0\.s - z2\.s}, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (st3_vnum_s32_x1, svint32x3_t, int32_t, -+ svst3_vnum_s32 (p0, x0, x1, z0), -+ svst3_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_s64.c -new file mode 100644 -index 000000000..eca6a7cea ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_s64.c -@@ -0,0 +1,242 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st3_s64_base: -+** st3d {z0\.d - z2\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_s64_base, svint64x3_t, int64_t, -+ svst3_s64 (p0, x0, z0), -+ svst3 (p0, x0, z0)) -+ -+/* -+** st3_s64_index: -+** st3d {z0\.d - z2\.d}, p0, \[x0, x1, lsl 3\] -+** ret -+*/ -+TEST_STORE (st3_s64_index, svint64x3_t, int64_t, -+ svst3_s64 (p0, x0 + x1, z0), -+ svst3 (p0, x0 + x1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_s64_1: -+** incb x0 -+** st3d {z0\.d - z2\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_s64_1, svint64x3_t, int64_t, -+ svst3_s64 (p0, x0 + svcntd (), z0), -+ svst3 (p0, x0 + svcntd (), z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_s64_2: -+** incb x0, all, mul #2 -+** st3d {z0\.d - z2\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_s64_2, svint64x3_t, int64_t, -+ svst3_s64 (p0, x0 + svcntd () * 2, z0), -+ svst3 (p0, x0 + svcntd () * 2, z0)) -+ -+/* -+** st3_s64_3: -+** st3d {z0\.d - z2\.d}, p0, \[x0, #3, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_s64_3, svint64x3_t, int64_t, -+ svst3_s64 (p0, x0 + svcntd () * 3, z0), -+ svst3 (p0, x0 + svcntd () * 3, z0)) -+ -+/* -+** st3_s64_21: -+** st3d {z0\.d - z2\.d}, p0, \[x0, #21, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_s64_21, svint64x3_t, int64_t, -+ svst3_s64 (p0, x0 + svcntd () * 21, z0), -+ svst3 (p0, x0 + svcntd () * 21, z0)) -+ -+/* -+** st3_s64_24: -+** addvl (x[0-9]+), x0, #24 -+** st3d {z0\.d - z2\.d}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st3_s64_24, svint64x3_t, int64_t, -+ svst3_s64 (p0, x0 + svcntd () * 24, z0), -+ svst3 (p0, x0 + svcntd () * 24, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_s64_m1: -+** decb x0 -+** st3d {z0\.d - z2\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_s64_m1, svint64x3_t, int64_t, -+ svst3_s64 (p0, x0 - svcntd (), z0), -+ svst3 (p0, x0 - svcntd (), z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_s64_m2: -+** decb x0, all, mul #2 -+** st3d {z0\.d - z2\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_s64_m2, svint64x3_t, int64_t, -+ svst3_s64 (p0, x0 - svcntd () * 2, z0), -+ svst3 (p0, x0 - svcntd () * 2, z0)) -+ -+/* -+** st3_s64_m3: -+** st3d {z0\.d - z2\.d}, p0, \[x0, #-3, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_s64_m3, svint64x3_t, int64_t, -+ svst3_s64 (p0, x0 - svcntd () * 3, z0), -+ svst3 (p0, x0 - svcntd () * 3, z0)) -+ -+/* -+** st3_s64_m24: -+** st3d {z0\.d - z2\.d}, p0, \[x0, #-24, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_s64_m24, svint64x3_t, int64_t, -+ svst3_s64 (p0, x0 - svcntd () * 24, z0), -+ svst3 (p0, x0 - svcntd () * 24, z0)) -+ -+/* -+** st3_s64_m27: -+** addvl (x[0-9]+), x0, #-27 -+** st3d {z0\.d - z2\.d}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st3_s64_m27, svint64x3_t, int64_t, -+ svst3_s64 (p0, x0 - svcntd () * 27, z0), -+ svst3 (p0, x0 - svcntd () * 27, z0)) -+ -+/* -+** st3_vnum_s64_0: -+** st3d {z0\.d - z2\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_s64_0, svint64x3_t, int64_t, -+ svst3_vnum_s64 (p0, x0, 0, z0), -+ svst3_vnum (p0, x0, 0, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_vnum_s64_1: -+** incb x0 -+** st3d {z0\.d - z2\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_s64_1, svint64x3_t, int64_t, -+ svst3_vnum_s64 (p0, x0, 1, z0), -+ svst3_vnum (p0, x0, 1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_vnum_s64_2: -+** incb x0, all, mul #2 -+** st3d {z0\.d - z2\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_s64_2, svint64x3_t, int64_t, -+ svst3_vnum_s64 (p0, x0, 2, z0), -+ svst3_vnum (p0, x0, 2, z0)) -+ -+/* -+** st3_vnum_s64_3: -+** st3d {z0\.d - z2\.d}, p0, \[x0, #3, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_vnum_s64_3, svint64x3_t, int64_t, -+ svst3_vnum_s64 (p0, x0, 3, z0), -+ svst3_vnum (p0, x0, 3, z0)) -+ -+/* -+** st3_vnum_s64_21: -+** st3d {z0\.d - z2\.d}, p0, \[x0, #21, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_vnum_s64_21, svint64x3_t, int64_t, -+ svst3_vnum_s64 (p0, x0, 21, z0), -+ svst3_vnum (p0, x0, 21, z0)) -+ -+/* -+** st3_vnum_s64_24: -+** addvl (x[0-9]+), x0, #24 -+** st3d {z0\.d - z2\.d}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st3_vnum_s64_24, svint64x3_t, int64_t, -+ svst3_vnum_s64 (p0, x0, 24, z0), -+ svst3_vnum (p0, x0, 24, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_vnum_s64_m1: -+** decb x0 -+** st3d {z0\.d - z2\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_s64_m1, svint64x3_t, int64_t, -+ svst3_vnum_s64 (p0, x0, -1, z0), -+ svst3_vnum (p0, x0, -1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_vnum_s64_m2: -+** decb x0, all, mul #2 -+** st3d {z0\.d - z2\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_s64_m2, svint64x3_t, int64_t, -+ svst3_vnum_s64 (p0, x0, -2, z0), -+ svst3_vnum (p0, x0, -2, z0)) -+ -+/* -+** st3_vnum_s64_m3: -+** st3d {z0\.d - z2\.d}, p0, \[x0, #-3, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_vnum_s64_m3, svint64x3_t, int64_t, -+ svst3_vnum_s64 (p0, x0, -3, z0), -+ svst3_vnum (p0, x0, -3, z0)) -+ -+/* -+** st3_vnum_s64_m24: -+** st3d {z0\.d - z2\.d}, p0, \[x0, #-24, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_vnum_s64_m24, svint64x3_t, int64_t, -+ svst3_vnum_s64 (p0, x0, -24, z0), -+ svst3_vnum (p0, x0, -24, z0)) -+ -+/* -+** st3_vnum_s64_m27: -+** addvl (x[0-9]+), x0, #-27 -+** st3d {z0\.d - z2\.d}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st3_vnum_s64_m27, svint64x3_t, int64_t, -+ svst3_vnum_s64 (p0, x0, -27, z0), -+ svst3_vnum (p0, x0, -27, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** st3_vnum_s64_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** st3d {z0\.d - z2\.d}, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (st3_vnum_s64_x1, svint64x3_t, int64_t, -+ svst3_vnum_s64 (p0, x0, x1, z0), -+ svst3_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_s8.c -new file mode 100644 -index 000000000..a54ff4b74 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_s8.c -@@ -0,0 +1,246 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st3_s8_base: -+** st3b {z0\.b - z2\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_s8_base, svint8x3_t, int8_t, -+ svst3_s8 (p0, x0, z0), -+ svst3 (p0, x0, z0)) -+ -+/* -+** st3_s8_index: -+** st3b {z0\.b - z2\.b}, p0, \[x0, x1\] -+** ret -+*/ -+TEST_STORE (st3_s8_index, svint8x3_t, int8_t, -+ svst3_s8 (p0, x0 + x1, z0), -+ svst3 (p0, x0 + x1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_s8_1: -+** incb x0 -+** st3b {z0\.b - z2\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_s8_1, svint8x3_t, int8_t, -+ svst3_s8 (p0, x0 + svcntb (), z0), -+ svst3 (p0, x0 + svcntb (), z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_s8_2: -+** incb x0, all, mul #2 -+** st3b {z0\.b - z2\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_s8_2, svint8x3_t, int8_t, -+ svst3_s8 (p0, x0 + svcntb () * 2, z0), -+ svst3 (p0, x0 + svcntb () * 2, z0)) -+ -+/* -+** st3_s8_3: -+** st3b {z0\.b - z2\.b}, p0, \[x0, #3, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_s8_3, svint8x3_t, int8_t, -+ svst3_s8 (p0, x0 + svcntb () * 3, z0), -+ svst3 (p0, x0 + svcntb () * 3, z0)) -+ -+/* -+** st3_s8_21: -+** st3b {z0\.b - z2\.b}, p0, \[x0, #21, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_s8_21, svint8x3_t, int8_t, -+ svst3_s8 (p0, x0 + svcntb () * 21, z0), -+ svst3 (p0, x0 + svcntb () * 21, z0)) -+ -+/* -+** st3_s8_24: -+** addvl (x[0-9]+), x0, #24 -+** st3b {z0\.b - z2\.b}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st3_s8_24, svint8x3_t, int8_t, -+ svst3_s8 (p0, x0 + svcntb () * 24, z0), -+ svst3 (p0, x0 + svcntb () * 24, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_s8_m1: -+** decb x0 -+** st3b {z0\.b - z2\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_s8_m1, svint8x3_t, int8_t, -+ svst3_s8 (p0, x0 - svcntb (), z0), -+ svst3 (p0, x0 - svcntb (), z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_s8_m2: -+** decb x0, all, mul #2 -+** st3b {z0\.b - z2\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_s8_m2, svint8x3_t, int8_t, -+ svst3_s8 (p0, x0 - svcntb () * 2, z0), -+ svst3 (p0, x0 - svcntb () * 2, z0)) -+ -+/* -+** st3_s8_m3: -+** st3b {z0\.b - z2\.b}, p0, \[x0, #-3, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_s8_m3, svint8x3_t, int8_t, -+ svst3_s8 (p0, x0 - svcntb () * 3, z0), -+ svst3 (p0, x0 - svcntb () * 3, z0)) -+ -+/* -+** st3_s8_m24: -+** st3b {z0\.b - z2\.b}, p0, \[x0, #-24, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_s8_m24, svint8x3_t, int8_t, -+ svst3_s8 (p0, x0 - svcntb () * 24, z0), -+ svst3 (p0, x0 - svcntb () * 24, z0)) -+ -+/* -+** st3_s8_m27: -+** addvl (x[0-9]+), x0, #-27 -+** st3b {z0\.b - z2\.b}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st3_s8_m27, svint8x3_t, int8_t, -+ svst3_s8 (p0, x0 - svcntb () * 27, z0), -+ svst3 (p0, x0 - svcntb () * 27, z0)) -+ -+/* -+** st3_vnum_s8_0: -+** st3b {z0\.b - z2\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_s8_0, svint8x3_t, int8_t, -+ svst3_vnum_s8 (p0, x0, 0, z0), -+ svst3_vnum (p0, x0, 0, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_vnum_s8_1: -+** incb x0 -+** st3b {z0\.b - z2\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_s8_1, svint8x3_t, int8_t, -+ svst3_vnum_s8 (p0, x0, 1, z0), -+ svst3_vnum (p0, x0, 1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_vnum_s8_2: -+** incb x0, all, mul #2 -+** st3b {z0\.b - z2\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_s8_2, svint8x3_t, int8_t, -+ svst3_vnum_s8 (p0, x0, 2, z0), -+ svst3_vnum (p0, x0, 2, z0)) -+ -+/* -+** st3_vnum_s8_3: -+** st3b {z0\.b - z2\.b}, p0, \[x0, #3, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_vnum_s8_3, svint8x3_t, int8_t, -+ svst3_vnum_s8 (p0, x0, 3, z0), -+ svst3_vnum (p0, x0, 3, z0)) -+ -+/* -+** st3_vnum_s8_21: -+** st3b {z0\.b - z2\.b}, p0, \[x0, #21, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_vnum_s8_21, svint8x3_t, int8_t, -+ svst3_vnum_s8 (p0, x0, 21, z0), -+ svst3_vnum (p0, x0, 21, z0)) -+ -+/* -+** st3_vnum_s8_24: -+** addvl (x[0-9]+), x0, #24 -+** st3b {z0\.b - z2\.b}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st3_vnum_s8_24, svint8x3_t, int8_t, -+ svst3_vnum_s8 (p0, x0, 24, z0), -+ svst3_vnum (p0, x0, 24, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_vnum_s8_m1: -+** decb x0 -+** st3b {z0\.b - z2\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_s8_m1, svint8x3_t, int8_t, -+ svst3_vnum_s8 (p0, x0, -1, z0), -+ svst3_vnum (p0, x0, -1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_vnum_s8_m2: -+** decb x0, all, mul #2 -+** st3b {z0\.b - z2\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_s8_m2, svint8x3_t, int8_t, -+ svst3_vnum_s8 (p0, x0, -2, z0), -+ svst3_vnum (p0, x0, -2, z0)) -+ -+/* -+** st3_vnum_s8_m3: -+** st3b {z0\.b - z2\.b}, p0, \[x0, #-3, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_vnum_s8_m3, svint8x3_t, int8_t, -+ svst3_vnum_s8 (p0, x0, -3, z0), -+ svst3_vnum (p0, x0, -3, z0)) -+ -+/* -+** st3_vnum_s8_m24: -+** st3b {z0\.b - z2\.b}, p0, \[x0, #-24, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_vnum_s8_m24, svint8x3_t, int8_t, -+ svst3_vnum_s8 (p0, x0, -24, z0), -+ svst3_vnum (p0, x0, -24, z0)) -+ -+/* -+** st3_vnum_s8_m27: -+** addvl (x[0-9]+), x0, #-27 -+** st3b {z0\.b - z2\.b}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st3_vnum_s8_m27, svint8x3_t, int8_t, -+ svst3_vnum_s8 (p0, x0, -27, z0), -+ svst3_vnum (p0, x0, -27, z0)) -+ -+/* -+** st3_vnum_s8_x1: -+** cntb (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** st3b {z0\.b - z2\.b}, p0, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** st3b {z0\.b - z2\.b}, p0, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_STORE (st3_vnum_s8_x1, svint8x3_t, int8_t, -+ svst3_vnum_s8 (p0, x0, x1, z0), -+ svst3_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_u16.c -new file mode 100644 -index 000000000..d4e8efca3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_u16.c -@@ -0,0 +1,242 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st3_u16_base: -+** st3h {z0\.h - z2\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_u16_base, svuint16x3_t, uint16_t, -+ svst3_u16 (p0, x0, z0), -+ svst3 (p0, x0, z0)) -+ -+/* -+** st3_u16_index: -+** st3h {z0\.h - z2\.h}, p0, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_STORE (st3_u16_index, svuint16x3_t, uint16_t, -+ svst3_u16 (p0, x0 + x1, z0), -+ svst3 (p0, x0 + x1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_u16_1: -+** incb x0 -+** st3h {z0\.h - z2\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_u16_1, svuint16x3_t, uint16_t, -+ svst3_u16 (p0, x0 + svcnth (), z0), -+ svst3 (p0, x0 + svcnth (), z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_u16_2: -+** incb x0, all, mul #2 -+** st3h {z0\.h - z2\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_u16_2, svuint16x3_t, uint16_t, -+ svst3_u16 (p0, x0 + svcnth () * 2, z0), -+ svst3 (p0, x0 + svcnth () * 2, z0)) -+ -+/* -+** st3_u16_3: -+** st3h {z0\.h - z2\.h}, p0, \[x0, #3, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_u16_3, svuint16x3_t, uint16_t, -+ svst3_u16 (p0, x0 + svcnth () * 3, z0), -+ svst3 (p0, x0 + svcnth () * 3, z0)) -+ -+/* -+** st3_u16_21: -+** st3h {z0\.h - z2\.h}, p0, \[x0, #21, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_u16_21, svuint16x3_t, uint16_t, -+ svst3_u16 (p0, x0 + svcnth () * 21, z0), -+ svst3 (p0, x0 + svcnth () * 21, z0)) -+ -+/* -+** st3_u16_24: -+** addvl (x[0-9]+), x0, #24 -+** st3h {z0\.h - z2\.h}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st3_u16_24, svuint16x3_t, uint16_t, -+ svst3_u16 (p0, x0 + svcnth () * 24, z0), -+ svst3 (p0, x0 + svcnth () * 24, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_u16_m1: -+** decb x0 -+** st3h {z0\.h - z2\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_u16_m1, svuint16x3_t, uint16_t, -+ svst3_u16 (p0, x0 - svcnth (), z0), -+ svst3 (p0, x0 - svcnth (), z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_u16_m2: -+** decb x0, all, mul #2 -+** st3h {z0\.h - z2\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_u16_m2, svuint16x3_t, uint16_t, -+ svst3_u16 (p0, x0 - svcnth () * 2, z0), -+ svst3 (p0, x0 - svcnth () * 2, z0)) -+ -+/* -+** st3_u16_m3: -+** st3h {z0\.h - z2\.h}, p0, \[x0, #-3, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_u16_m3, svuint16x3_t, uint16_t, -+ svst3_u16 (p0, x0 - svcnth () * 3, z0), -+ svst3 (p0, x0 - svcnth () * 3, z0)) -+ -+/* -+** st3_u16_m24: -+** st3h {z0\.h - z2\.h}, p0, \[x0, #-24, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_u16_m24, svuint16x3_t, uint16_t, -+ svst3_u16 (p0, x0 - svcnth () * 24, z0), -+ svst3 (p0, x0 - svcnth () * 24, z0)) -+ -+/* -+** st3_u16_m27: -+** addvl (x[0-9]+), x0, #-27 -+** st3h {z0\.h - z2\.h}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st3_u16_m27, svuint16x3_t, uint16_t, -+ svst3_u16 (p0, x0 - svcnth () * 27, z0), -+ svst3 (p0, x0 - svcnth () * 27, z0)) -+ -+/* -+** st3_vnum_u16_0: -+** st3h {z0\.h - z2\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_u16_0, svuint16x3_t, uint16_t, -+ svst3_vnum_u16 (p0, x0, 0, z0), -+ svst3_vnum (p0, x0, 0, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_vnum_u16_1: -+** incb x0 -+** st3h {z0\.h - z2\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_u16_1, svuint16x3_t, uint16_t, -+ svst3_vnum_u16 (p0, x0, 1, z0), -+ svst3_vnum (p0, x0, 1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_vnum_u16_2: -+** incb x0, all, mul #2 -+** st3h {z0\.h - z2\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_u16_2, svuint16x3_t, uint16_t, -+ svst3_vnum_u16 (p0, x0, 2, z0), -+ svst3_vnum (p0, x0, 2, z0)) -+ -+/* -+** st3_vnum_u16_3: -+** st3h {z0\.h - z2\.h}, p0, \[x0, #3, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_vnum_u16_3, svuint16x3_t, uint16_t, -+ svst3_vnum_u16 (p0, x0, 3, z0), -+ svst3_vnum (p0, x0, 3, z0)) -+ -+/* -+** st3_vnum_u16_21: -+** st3h {z0\.h - z2\.h}, p0, \[x0, #21, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_vnum_u16_21, svuint16x3_t, uint16_t, -+ svst3_vnum_u16 (p0, x0, 21, z0), -+ svst3_vnum (p0, x0, 21, z0)) -+ -+/* -+** st3_vnum_u16_24: -+** addvl (x[0-9]+), x0, #24 -+** st3h {z0\.h - z2\.h}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st3_vnum_u16_24, svuint16x3_t, uint16_t, -+ svst3_vnum_u16 (p0, x0, 24, z0), -+ svst3_vnum (p0, x0, 24, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_vnum_u16_m1: -+** decb x0 -+** st3h {z0\.h - z2\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_u16_m1, svuint16x3_t, uint16_t, -+ svst3_vnum_u16 (p0, x0, -1, z0), -+ svst3_vnum (p0, x0, -1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_vnum_u16_m2: -+** decb x0, all, mul #2 -+** st3h {z0\.h - z2\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_u16_m2, svuint16x3_t, uint16_t, -+ svst3_vnum_u16 (p0, x0, -2, z0), -+ svst3_vnum (p0, x0, -2, z0)) -+ -+/* -+** st3_vnum_u16_m3: -+** st3h {z0\.h - z2\.h}, p0, \[x0, #-3, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_vnum_u16_m3, svuint16x3_t, uint16_t, -+ svst3_vnum_u16 (p0, x0, -3, z0), -+ svst3_vnum (p0, x0, -3, z0)) -+ -+/* -+** st3_vnum_u16_m24: -+** st3h {z0\.h - z2\.h}, p0, \[x0, #-24, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_vnum_u16_m24, svuint16x3_t, uint16_t, -+ svst3_vnum_u16 (p0, x0, -24, z0), -+ svst3_vnum (p0, x0, -24, z0)) -+ -+/* -+** st3_vnum_u16_m27: -+** addvl (x[0-9]+), x0, #-27 -+** st3h {z0\.h - z2\.h}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st3_vnum_u16_m27, svuint16x3_t, uint16_t, -+ svst3_vnum_u16 (p0, x0, -27, z0), -+ svst3_vnum (p0, x0, -27, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** st3_vnum_u16_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** st3h {z0\.h - z2\.h}, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (st3_vnum_u16_x1, svuint16x3_t, uint16_t, -+ svst3_vnum_u16 (p0, x0, x1, z0), -+ svst3_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_u32.c -new file mode 100644 -index 000000000..8be3aa957 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_u32.c -@@ -0,0 +1,242 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st3_u32_base: -+** st3w {z0\.s - z2\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_u32_base, svuint32x3_t, uint32_t, -+ svst3_u32 (p0, x0, z0), -+ svst3 (p0, x0, z0)) -+ -+/* -+** st3_u32_index: -+** st3w {z0\.s - z2\.s}, p0, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_STORE (st3_u32_index, svuint32x3_t, uint32_t, -+ svst3_u32 (p0, x0 + x1, z0), -+ svst3 (p0, x0 + x1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_u32_1: -+** incb x0 -+** st3w {z0\.s - z2\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_u32_1, svuint32x3_t, uint32_t, -+ svst3_u32 (p0, x0 + svcntw (), z0), -+ svst3 (p0, x0 + svcntw (), z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_u32_2: -+** incb x0, all, mul #2 -+** st3w {z0\.s - z2\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_u32_2, svuint32x3_t, uint32_t, -+ svst3_u32 (p0, x0 + svcntw () * 2, z0), -+ svst3 (p0, x0 + svcntw () * 2, z0)) -+ -+/* -+** st3_u32_3: -+** st3w {z0\.s - z2\.s}, p0, \[x0, #3, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_u32_3, svuint32x3_t, uint32_t, -+ svst3_u32 (p0, x0 + svcntw () * 3, z0), -+ svst3 (p0, x0 + svcntw () * 3, z0)) -+ -+/* -+** st3_u32_21: -+** st3w {z0\.s - z2\.s}, p0, \[x0, #21, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_u32_21, svuint32x3_t, uint32_t, -+ svst3_u32 (p0, x0 + svcntw () * 21, z0), -+ svst3 (p0, x0 + svcntw () * 21, z0)) -+ -+/* -+** st3_u32_24: -+** addvl (x[0-9]+), x0, #24 -+** st3w {z0\.s - z2\.s}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st3_u32_24, svuint32x3_t, uint32_t, -+ svst3_u32 (p0, x0 + svcntw () * 24, z0), -+ svst3 (p0, x0 + svcntw () * 24, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_u32_m1: -+** decb x0 -+** st3w {z0\.s - z2\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_u32_m1, svuint32x3_t, uint32_t, -+ svst3_u32 (p0, x0 - svcntw (), z0), -+ svst3 (p0, x0 - svcntw (), z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_u32_m2: -+** decb x0, all, mul #2 -+** st3w {z0\.s - z2\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_u32_m2, svuint32x3_t, uint32_t, -+ svst3_u32 (p0, x0 - svcntw () * 2, z0), -+ svst3 (p0, x0 - svcntw () * 2, z0)) -+ -+/* -+** st3_u32_m3: -+** st3w {z0\.s - z2\.s}, p0, \[x0, #-3, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_u32_m3, svuint32x3_t, uint32_t, -+ svst3_u32 (p0, x0 - svcntw () * 3, z0), -+ svst3 (p0, x0 - svcntw () * 3, z0)) -+ -+/* -+** st3_u32_m24: -+** st3w {z0\.s - z2\.s}, p0, \[x0, #-24, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_u32_m24, svuint32x3_t, uint32_t, -+ svst3_u32 (p0, x0 - svcntw () * 24, z0), -+ svst3 (p0, x0 - svcntw () * 24, z0)) -+ -+/* -+** st3_u32_m27: -+** addvl (x[0-9]+), x0, #-27 -+** st3w {z0\.s - z2\.s}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st3_u32_m27, svuint32x3_t, uint32_t, -+ svst3_u32 (p0, x0 - svcntw () * 27, z0), -+ svst3 (p0, x0 - svcntw () * 27, z0)) -+ -+/* -+** st3_vnum_u32_0: -+** st3w {z0\.s - z2\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_u32_0, svuint32x3_t, uint32_t, -+ svst3_vnum_u32 (p0, x0, 0, z0), -+ svst3_vnum (p0, x0, 0, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_vnum_u32_1: -+** incb x0 -+** st3w {z0\.s - z2\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_u32_1, svuint32x3_t, uint32_t, -+ svst3_vnum_u32 (p0, x0, 1, z0), -+ svst3_vnum (p0, x0, 1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_vnum_u32_2: -+** incb x0, all, mul #2 -+** st3w {z0\.s - z2\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_u32_2, svuint32x3_t, uint32_t, -+ svst3_vnum_u32 (p0, x0, 2, z0), -+ svst3_vnum (p0, x0, 2, z0)) -+ -+/* -+** st3_vnum_u32_3: -+** st3w {z0\.s - z2\.s}, p0, \[x0, #3, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_vnum_u32_3, svuint32x3_t, uint32_t, -+ svst3_vnum_u32 (p0, x0, 3, z0), -+ svst3_vnum (p0, x0, 3, z0)) -+ -+/* -+** st3_vnum_u32_21: -+** st3w {z0\.s - z2\.s}, p0, \[x0, #21, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_vnum_u32_21, svuint32x3_t, uint32_t, -+ svst3_vnum_u32 (p0, x0, 21, z0), -+ svst3_vnum (p0, x0, 21, z0)) -+ -+/* -+** st3_vnum_u32_24: -+** addvl (x[0-9]+), x0, #24 -+** st3w {z0\.s - z2\.s}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st3_vnum_u32_24, svuint32x3_t, uint32_t, -+ svst3_vnum_u32 (p0, x0, 24, z0), -+ svst3_vnum (p0, x0, 24, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_vnum_u32_m1: -+** decb x0 -+** st3w {z0\.s - z2\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_u32_m1, svuint32x3_t, uint32_t, -+ svst3_vnum_u32 (p0, x0, -1, z0), -+ svst3_vnum (p0, x0, -1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_vnum_u32_m2: -+** decb x0, all, mul #2 -+** st3w {z0\.s - z2\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_u32_m2, svuint32x3_t, uint32_t, -+ svst3_vnum_u32 (p0, x0, -2, z0), -+ svst3_vnum (p0, x0, -2, z0)) -+ -+/* -+** st3_vnum_u32_m3: -+** st3w {z0\.s - z2\.s}, p0, \[x0, #-3, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_vnum_u32_m3, svuint32x3_t, uint32_t, -+ svst3_vnum_u32 (p0, x0, -3, z0), -+ svst3_vnum (p0, x0, -3, z0)) -+ -+/* -+** st3_vnum_u32_m24: -+** st3w {z0\.s - z2\.s}, p0, \[x0, #-24, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_vnum_u32_m24, svuint32x3_t, uint32_t, -+ svst3_vnum_u32 (p0, x0, -24, z0), -+ svst3_vnum (p0, x0, -24, z0)) -+ -+/* -+** st3_vnum_u32_m27: -+** addvl (x[0-9]+), x0, #-27 -+** st3w {z0\.s - z2\.s}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st3_vnum_u32_m27, svuint32x3_t, uint32_t, -+ svst3_vnum_u32 (p0, x0, -27, z0), -+ svst3_vnum (p0, x0, -27, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** st3_vnum_u32_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** st3w {z0\.s - z2\.s}, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (st3_vnum_u32_x1, svuint32x3_t, uint32_t, -+ svst3_vnum_u32 (p0, x0, x1, z0), -+ svst3_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_u64.c -new file mode 100644 -index 000000000..31cb304ea ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_u64.c -@@ -0,0 +1,242 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st3_u64_base: -+** st3d {z0\.d - z2\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_u64_base, svuint64x3_t, uint64_t, -+ svst3_u64 (p0, x0, z0), -+ svst3 (p0, x0, z0)) -+ -+/* -+** st3_u64_index: -+** st3d {z0\.d - z2\.d}, p0, \[x0, x1, lsl 3\] -+** ret -+*/ -+TEST_STORE (st3_u64_index, svuint64x3_t, uint64_t, -+ svst3_u64 (p0, x0 + x1, z0), -+ svst3 (p0, x0 + x1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_u64_1: -+** incb x0 -+** st3d {z0\.d - z2\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_u64_1, svuint64x3_t, uint64_t, -+ svst3_u64 (p0, x0 + svcntd (), z0), -+ svst3 (p0, x0 + svcntd (), z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_u64_2: -+** incb x0, all, mul #2 -+** st3d {z0\.d - z2\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_u64_2, svuint64x3_t, uint64_t, -+ svst3_u64 (p0, x0 + svcntd () * 2, z0), -+ svst3 (p0, x0 + svcntd () * 2, z0)) -+ -+/* -+** st3_u64_3: -+** st3d {z0\.d - z2\.d}, p0, \[x0, #3, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_u64_3, svuint64x3_t, uint64_t, -+ svst3_u64 (p0, x0 + svcntd () * 3, z0), -+ svst3 (p0, x0 + svcntd () * 3, z0)) -+ -+/* -+** st3_u64_21: -+** st3d {z0\.d - z2\.d}, p0, \[x0, #21, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_u64_21, svuint64x3_t, uint64_t, -+ svst3_u64 (p0, x0 + svcntd () * 21, z0), -+ svst3 (p0, x0 + svcntd () * 21, z0)) -+ -+/* -+** st3_u64_24: -+** addvl (x[0-9]+), x0, #24 -+** st3d {z0\.d - z2\.d}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st3_u64_24, svuint64x3_t, uint64_t, -+ svst3_u64 (p0, x0 + svcntd () * 24, z0), -+ svst3 (p0, x0 + svcntd () * 24, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_u64_m1: -+** decb x0 -+** st3d {z0\.d - z2\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_u64_m1, svuint64x3_t, uint64_t, -+ svst3_u64 (p0, x0 - svcntd (), z0), -+ svst3 (p0, x0 - svcntd (), z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_u64_m2: -+** decb x0, all, mul #2 -+** st3d {z0\.d - z2\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_u64_m2, svuint64x3_t, uint64_t, -+ svst3_u64 (p0, x0 - svcntd () * 2, z0), -+ svst3 (p0, x0 - svcntd () * 2, z0)) -+ -+/* -+** st3_u64_m3: -+** st3d {z0\.d - z2\.d}, p0, \[x0, #-3, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_u64_m3, svuint64x3_t, uint64_t, -+ svst3_u64 (p0, x0 - svcntd () * 3, z0), -+ svst3 (p0, x0 - svcntd () * 3, z0)) -+ -+/* -+** st3_u64_m24: -+** st3d {z0\.d - z2\.d}, p0, \[x0, #-24, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_u64_m24, svuint64x3_t, uint64_t, -+ svst3_u64 (p0, x0 - svcntd () * 24, z0), -+ svst3 (p0, x0 - svcntd () * 24, z0)) -+ -+/* -+** st3_u64_m27: -+** addvl (x[0-9]+), x0, #-27 -+** st3d {z0\.d - z2\.d}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st3_u64_m27, svuint64x3_t, uint64_t, -+ svst3_u64 (p0, x0 - svcntd () * 27, z0), -+ svst3 (p0, x0 - svcntd () * 27, z0)) -+ -+/* -+** st3_vnum_u64_0: -+** st3d {z0\.d - z2\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_u64_0, svuint64x3_t, uint64_t, -+ svst3_vnum_u64 (p0, x0, 0, z0), -+ svst3_vnum (p0, x0, 0, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_vnum_u64_1: -+** incb x0 -+** st3d {z0\.d - z2\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_u64_1, svuint64x3_t, uint64_t, -+ svst3_vnum_u64 (p0, x0, 1, z0), -+ svst3_vnum (p0, x0, 1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_vnum_u64_2: -+** incb x0, all, mul #2 -+** st3d {z0\.d - z2\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_u64_2, svuint64x3_t, uint64_t, -+ svst3_vnum_u64 (p0, x0, 2, z0), -+ svst3_vnum (p0, x0, 2, z0)) -+ -+/* -+** st3_vnum_u64_3: -+** st3d {z0\.d - z2\.d}, p0, \[x0, #3, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_vnum_u64_3, svuint64x3_t, uint64_t, -+ svst3_vnum_u64 (p0, x0, 3, z0), -+ svst3_vnum (p0, x0, 3, z0)) -+ -+/* -+** st3_vnum_u64_21: -+** st3d {z0\.d - z2\.d}, p0, \[x0, #21, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_vnum_u64_21, svuint64x3_t, uint64_t, -+ svst3_vnum_u64 (p0, x0, 21, z0), -+ svst3_vnum (p0, x0, 21, z0)) -+ -+/* -+** st3_vnum_u64_24: -+** addvl (x[0-9]+), x0, #24 -+** st3d {z0\.d - z2\.d}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st3_vnum_u64_24, svuint64x3_t, uint64_t, -+ svst3_vnum_u64 (p0, x0, 24, z0), -+ svst3_vnum (p0, x0, 24, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_vnum_u64_m1: -+** decb x0 -+** st3d {z0\.d - z2\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_u64_m1, svuint64x3_t, uint64_t, -+ svst3_vnum_u64 (p0, x0, -1, z0), -+ svst3_vnum (p0, x0, -1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_vnum_u64_m2: -+** decb x0, all, mul #2 -+** st3d {z0\.d - z2\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_u64_m2, svuint64x3_t, uint64_t, -+ svst3_vnum_u64 (p0, x0, -2, z0), -+ svst3_vnum (p0, x0, -2, z0)) -+ -+/* -+** st3_vnum_u64_m3: -+** st3d {z0\.d - z2\.d}, p0, \[x0, #-3, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_vnum_u64_m3, svuint64x3_t, uint64_t, -+ svst3_vnum_u64 (p0, x0, -3, z0), -+ svst3_vnum (p0, x0, -3, z0)) -+ -+/* -+** st3_vnum_u64_m24: -+** st3d {z0\.d - z2\.d}, p0, \[x0, #-24, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_vnum_u64_m24, svuint64x3_t, uint64_t, -+ svst3_vnum_u64 (p0, x0, -24, z0), -+ svst3_vnum (p0, x0, -24, z0)) -+ -+/* -+** st3_vnum_u64_m27: -+** addvl (x[0-9]+), x0, #-27 -+** st3d {z0\.d - z2\.d}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st3_vnum_u64_m27, svuint64x3_t, uint64_t, -+ svst3_vnum_u64 (p0, x0, -27, z0), -+ svst3_vnum (p0, x0, -27, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** st3_vnum_u64_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** st3d {z0\.d - z2\.d}, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (st3_vnum_u64_x1, svuint64x3_t, uint64_t, -+ svst3_vnum_u64 (p0, x0, x1, z0), -+ svst3_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_u8.c -new file mode 100644 -index 000000000..e2d5a19ad ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st3_u8.c -@@ -0,0 +1,246 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st3_u8_base: -+** st3b {z0\.b - z2\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_u8_base, svuint8x3_t, uint8_t, -+ svst3_u8 (p0, x0, z0), -+ svst3 (p0, x0, z0)) -+ -+/* -+** st3_u8_index: -+** st3b {z0\.b - z2\.b}, p0, \[x0, x1\] -+** ret -+*/ -+TEST_STORE (st3_u8_index, svuint8x3_t, uint8_t, -+ svst3_u8 (p0, x0 + x1, z0), -+ svst3 (p0, x0 + x1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_u8_1: -+** incb x0 -+** st3b {z0\.b - z2\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_u8_1, svuint8x3_t, uint8_t, -+ svst3_u8 (p0, x0 + svcntb (), z0), -+ svst3 (p0, x0 + svcntb (), z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_u8_2: -+** incb x0, all, mul #2 -+** st3b {z0\.b - z2\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_u8_2, svuint8x3_t, uint8_t, -+ svst3_u8 (p0, x0 + svcntb () * 2, z0), -+ svst3 (p0, x0 + svcntb () * 2, z0)) -+ -+/* -+** st3_u8_3: -+** st3b {z0\.b - z2\.b}, p0, \[x0, #3, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_u8_3, svuint8x3_t, uint8_t, -+ svst3_u8 (p0, x0 + svcntb () * 3, z0), -+ svst3 (p0, x0 + svcntb () * 3, z0)) -+ -+/* -+** st3_u8_21: -+** st3b {z0\.b - z2\.b}, p0, \[x0, #21, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_u8_21, svuint8x3_t, uint8_t, -+ svst3_u8 (p0, x0 + svcntb () * 21, z0), -+ svst3 (p0, x0 + svcntb () * 21, z0)) -+ -+/* -+** st3_u8_24: -+** addvl (x[0-9]+), x0, #24 -+** st3b {z0\.b - z2\.b}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st3_u8_24, svuint8x3_t, uint8_t, -+ svst3_u8 (p0, x0 + svcntb () * 24, z0), -+ svst3 (p0, x0 + svcntb () * 24, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_u8_m1: -+** decb x0 -+** st3b {z0\.b - z2\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_u8_m1, svuint8x3_t, uint8_t, -+ svst3_u8 (p0, x0 - svcntb (), z0), -+ svst3 (p0, x0 - svcntb (), z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_u8_m2: -+** decb x0, all, mul #2 -+** st3b {z0\.b - z2\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_u8_m2, svuint8x3_t, uint8_t, -+ svst3_u8 (p0, x0 - svcntb () * 2, z0), -+ svst3 (p0, x0 - svcntb () * 2, z0)) -+ -+/* -+** st3_u8_m3: -+** st3b {z0\.b - z2\.b}, p0, \[x0, #-3, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_u8_m3, svuint8x3_t, uint8_t, -+ svst3_u8 (p0, x0 - svcntb () * 3, z0), -+ svst3 (p0, x0 - svcntb () * 3, z0)) -+ -+/* -+** st3_u8_m24: -+** st3b {z0\.b - z2\.b}, p0, \[x0, #-24, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_u8_m24, svuint8x3_t, uint8_t, -+ svst3_u8 (p0, x0 - svcntb () * 24, z0), -+ svst3 (p0, x0 - svcntb () * 24, z0)) -+ -+/* -+** st3_u8_m27: -+** addvl (x[0-9]+), x0, #-27 -+** st3b {z0\.b - z2\.b}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st3_u8_m27, svuint8x3_t, uint8_t, -+ svst3_u8 (p0, x0 - svcntb () * 27, z0), -+ svst3 (p0, x0 - svcntb () * 27, z0)) -+ -+/* -+** st3_vnum_u8_0: -+** st3b {z0\.b - z2\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_u8_0, svuint8x3_t, uint8_t, -+ svst3_vnum_u8 (p0, x0, 0, z0), -+ svst3_vnum (p0, x0, 0, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_vnum_u8_1: -+** incb x0 -+** st3b {z0\.b - z2\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_u8_1, svuint8x3_t, uint8_t, -+ svst3_vnum_u8 (p0, x0, 1, z0), -+ svst3_vnum (p0, x0, 1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_vnum_u8_2: -+** incb x0, all, mul #2 -+** st3b {z0\.b - z2\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_u8_2, svuint8x3_t, uint8_t, -+ svst3_vnum_u8 (p0, x0, 2, z0), -+ svst3_vnum (p0, x0, 2, z0)) -+ -+/* -+** st3_vnum_u8_3: -+** st3b {z0\.b - z2\.b}, p0, \[x0, #3, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_vnum_u8_3, svuint8x3_t, uint8_t, -+ svst3_vnum_u8 (p0, x0, 3, z0), -+ svst3_vnum (p0, x0, 3, z0)) -+ -+/* -+** st3_vnum_u8_21: -+** st3b {z0\.b - z2\.b}, p0, \[x0, #21, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_vnum_u8_21, svuint8x3_t, uint8_t, -+ svst3_vnum_u8 (p0, x0, 21, z0), -+ svst3_vnum (p0, x0, 21, z0)) -+ -+/* -+** st3_vnum_u8_24: -+** addvl (x[0-9]+), x0, #24 -+** st3b {z0\.b - z2\.b}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st3_vnum_u8_24, svuint8x3_t, uint8_t, -+ svst3_vnum_u8 (p0, x0, 24, z0), -+ svst3_vnum (p0, x0, 24, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_vnum_u8_m1: -+** decb x0 -+** st3b {z0\.b - z2\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_u8_m1, svuint8x3_t, uint8_t, -+ svst3_vnum_u8 (p0, x0, -1, z0), -+ svst3_vnum (p0, x0, -1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st3_vnum_u8_m2: -+** decb x0, all, mul #2 -+** st3b {z0\.b - z2\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st3_vnum_u8_m2, svuint8x3_t, uint8_t, -+ svst3_vnum_u8 (p0, x0, -2, z0), -+ svst3_vnum (p0, x0, -2, z0)) -+ -+/* -+** st3_vnum_u8_m3: -+** st3b {z0\.b - z2\.b}, p0, \[x0, #-3, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_vnum_u8_m3, svuint8x3_t, uint8_t, -+ svst3_vnum_u8 (p0, x0, -3, z0), -+ svst3_vnum (p0, x0, -3, z0)) -+ -+/* -+** st3_vnum_u8_m24: -+** st3b {z0\.b - z2\.b}, p0, \[x0, #-24, mul vl\] -+** ret -+*/ -+TEST_STORE (st3_vnum_u8_m24, svuint8x3_t, uint8_t, -+ svst3_vnum_u8 (p0, x0, -24, z0), -+ svst3_vnum (p0, x0, -24, z0)) -+ -+/* -+** st3_vnum_u8_m27: -+** addvl (x[0-9]+), x0, #-27 -+** st3b {z0\.b - z2\.b}, p0, \[\1\] -+** ret -+*/ -+TEST_STORE (st3_vnum_u8_m27, svuint8x3_t, uint8_t, -+ svst3_vnum_u8 (p0, x0, -27, z0), -+ svst3_vnum (p0, x0, -27, z0)) -+ -+/* -+** st3_vnum_u8_x1: -+** cntb (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** st3b {z0\.b - z2\.b}, p0, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** st3b {z0\.b - z2\.b}, p0, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_STORE (st3_vnum_u8_x1, svuint8x3_t, uint8_t, -+ svst3_vnum_u8 (p0, x0, x1, z0), -+ svst3_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_bf16.c -new file mode 100644 -index 000000000..b8d9f4afa ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_bf16.c -@@ -0,0 +1,286 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st4_bf16_base: -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_bf16_base, svbfloat16x4_t, bfloat16_t, -+ svst4_bf16 (p0, x0, z0), -+ svst4 (p0, x0, z0)) -+ -+/* -+** st4_bf16_index: -+** st4h {z0\.h - z3\.h}, p0, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_STORE (st4_bf16_index, svbfloat16x4_t, bfloat16_t, -+ svst4_bf16 (p0, x0 + x1, z0), -+ svst4 (p0, x0 + x1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_bf16_1: -+** incb x0 -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_bf16_1, svbfloat16x4_t, bfloat16_t, -+ svst4_bf16 (p0, x0 + svcnth (), z0), -+ svst4 (p0, x0 + svcnth (), z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_bf16_2: -+** incb x0, all, mul #2 -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_bf16_2, svbfloat16x4_t, bfloat16_t, -+ svst4_bf16 (p0, x0 + svcnth () * 2, z0), -+ svst4 (p0, x0 + svcnth () * 2, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_bf16_3: -+** incb x0, all, mul #3 -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_bf16_3, svbfloat16x4_t, bfloat16_t, -+ svst4_bf16 (p0, x0 + svcnth () * 3, z0), -+ svst4 (p0, x0 + svcnth () * 3, z0)) -+ -+/* -+** st4_bf16_4: -+** st4h {z0\.h - z3\.h}, p0, \[x0, #4, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_bf16_4, svbfloat16x4_t, bfloat16_t, -+ svst4_bf16 (p0, x0 + svcnth () * 4, z0), -+ svst4 (p0, x0 + svcnth () * 4, z0)) -+ -+/* -+** st4_bf16_28: -+** st4h {z0\.h - z3\.h}, p0, \[x0, #28, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_bf16_28, svbfloat16x4_t, bfloat16_t, -+ svst4_bf16 (p0, x0 + svcnth () * 28, z0), -+ svst4 (p0, x0 + svcnth () * 28, z0)) -+ -+/* -+** st4_bf16_32: -+** [^{]* -+** st4h {z0\.h - z3\.h}, p0, \[x[0-9]+\] -+** ret -+*/ -+TEST_STORE (st4_bf16_32, svbfloat16x4_t, bfloat16_t, -+ svst4_bf16 (p0, x0 + svcnth () * 32, z0), -+ svst4 (p0, x0 + svcnth () * 32, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_bf16_m1: -+** decb x0 -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_bf16_m1, svbfloat16x4_t, bfloat16_t, -+ svst4_bf16 (p0, x0 - svcnth (), z0), -+ svst4 (p0, x0 - svcnth (), z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_bf16_m2: -+** decb x0, all, mul #2 -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_bf16_m2, svbfloat16x4_t, bfloat16_t, -+ svst4_bf16 (p0, x0 - svcnth () * 2, z0), -+ svst4 (p0, x0 - svcnth () * 2, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_bf16_m3: -+** decb x0, all, mul #3 -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_bf16_m3, svbfloat16x4_t, bfloat16_t, -+ svst4_bf16 (p0, x0 - svcnth () * 3, z0), -+ svst4 (p0, x0 - svcnth () * 3, z0)) -+ -+/* -+** st4_bf16_m4: -+** st4h {z0\.h - z3\.h}, p0, \[x0, #-4, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_bf16_m4, svbfloat16x4_t, bfloat16_t, -+ svst4_bf16 (p0, x0 - svcnth () * 4, z0), -+ svst4 (p0, x0 - svcnth () * 4, z0)) -+ -+/* -+** st4_bf16_m32: -+** st4h {z0\.h - z3\.h}, p0, \[x0, #-32, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_bf16_m32, svbfloat16x4_t, bfloat16_t, -+ svst4_bf16 (p0, x0 - svcnth () * 32, z0), -+ svst4 (p0, x0 - svcnth () * 32, z0)) -+ -+/* -+** st4_bf16_m36: -+** [^{]* -+** st4h {z0\.h - z3\.h}, p0, \[x[0-9]+\] -+** ret -+*/ -+TEST_STORE (st4_bf16_m36, svbfloat16x4_t, bfloat16_t, -+ svst4_bf16 (p0, x0 - svcnth () * 36, z0), -+ svst4 (p0, x0 - svcnth () * 36, z0)) -+ -+/* -+** st4_vnum_bf16_0: -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_bf16_0, svbfloat16x4_t, bfloat16_t, -+ svst4_vnum_bf16 (p0, x0, 0, z0), -+ svst4_vnum (p0, x0, 0, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_bf16_1: -+** incb x0 -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_bf16_1, svbfloat16x4_t, bfloat16_t, -+ svst4_vnum_bf16 (p0, x0, 1, z0), -+ svst4_vnum (p0, x0, 1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_bf16_2: -+** incb x0, all, mul #2 -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_bf16_2, svbfloat16x4_t, bfloat16_t, -+ svst4_vnum_bf16 (p0, x0, 2, z0), -+ svst4_vnum (p0, x0, 2, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_bf16_3: -+** incb x0, all, mul #3 -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_bf16_3, svbfloat16x4_t, bfloat16_t, -+ svst4_vnum_bf16 (p0, x0, 3, z0), -+ svst4_vnum (p0, x0, 3, z0)) -+ -+/* -+** st4_vnum_bf16_4: -+** st4h {z0\.h - z3\.h}, p0, \[x0, #4, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_vnum_bf16_4, svbfloat16x4_t, bfloat16_t, -+ svst4_vnum_bf16 (p0, x0, 4, z0), -+ svst4_vnum (p0, x0, 4, z0)) -+ -+/* -+** st4_vnum_bf16_28: -+** st4h {z0\.h - z3\.h}, p0, \[x0, #28, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_vnum_bf16_28, svbfloat16x4_t, bfloat16_t, -+ svst4_vnum_bf16 (p0, x0, 28, z0), -+ svst4_vnum (p0, x0, 28, z0)) -+ -+/* -+** st4_vnum_bf16_32: -+** [^{]* -+** st4h {z0\.h - z3\.h}, p0, \[x[0-9]+\] -+** ret -+*/ -+TEST_STORE (st4_vnum_bf16_32, svbfloat16x4_t, bfloat16_t, -+ svst4_vnum_bf16 (p0, x0, 32, z0), -+ svst4_vnum (p0, x0, 32, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_bf16_m1: -+** decb x0 -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_bf16_m1, svbfloat16x4_t, bfloat16_t, -+ svst4_vnum_bf16 (p0, x0, -1, z0), -+ svst4_vnum (p0, x0, -1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_bf16_m2: -+** decb x0, all, mul #2 -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_bf16_m2, svbfloat16x4_t, bfloat16_t, -+ svst4_vnum_bf16 (p0, x0, -2, z0), -+ svst4_vnum (p0, x0, -2, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_bf16_m3: -+** decb x0, all, mul #3 -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_bf16_m3, svbfloat16x4_t, bfloat16_t, -+ svst4_vnum_bf16 (p0, x0, -3, z0), -+ svst4_vnum (p0, x0, -3, z0)) -+ -+/* -+** st4_vnum_bf16_m4: -+** st4h {z0\.h - z3\.h}, p0, \[x0, #-4, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_vnum_bf16_m4, svbfloat16x4_t, bfloat16_t, -+ svst4_vnum_bf16 (p0, x0, -4, z0), -+ svst4_vnum (p0, x0, -4, z0)) -+ -+/* -+** st4_vnum_bf16_m32: -+** st4h {z0\.h - z3\.h}, p0, \[x0, #-32, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_vnum_bf16_m32, svbfloat16x4_t, bfloat16_t, -+ svst4_vnum_bf16 (p0, x0, -32, z0), -+ svst4_vnum (p0, x0, -32, z0)) -+ -+/* -+** st4_vnum_bf16_m36: -+** [^{]* -+** st4h {z0\.h - z3\.h}, p0, \[x[0-9]+\] -+** ret -+*/ -+TEST_STORE (st4_vnum_bf16_m36, svbfloat16x4_t, bfloat16_t, -+ svst4_vnum_bf16 (p0, x0, -36, z0), -+ svst4_vnum (p0, x0, -36, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** st4_vnum_bf16_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** st4h {z0\.h - z3\.h}, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (st4_vnum_bf16_x1, svbfloat16x4_t, bfloat16_t, -+ svst4_vnum_bf16 (p0, x0, x1, z0), -+ svst4_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_f16.c -new file mode 100644 -index 000000000..296bdb4a6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_f16.c -@@ -0,0 +1,286 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st4_f16_base: -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_f16_base, svfloat16x4_t, float16_t, -+ svst4_f16 (p0, x0, z0), -+ svst4 (p0, x0, z0)) -+ -+/* -+** st4_f16_index: -+** st4h {z0\.h - z3\.h}, p0, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_STORE (st4_f16_index, svfloat16x4_t, float16_t, -+ svst4_f16 (p0, x0 + x1, z0), -+ svst4 (p0, x0 + x1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_f16_1: -+** incb x0 -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_f16_1, svfloat16x4_t, float16_t, -+ svst4_f16 (p0, x0 + svcnth (), z0), -+ svst4 (p0, x0 + svcnth (), z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_f16_2: -+** incb x0, all, mul #2 -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_f16_2, svfloat16x4_t, float16_t, -+ svst4_f16 (p0, x0 + svcnth () * 2, z0), -+ svst4 (p0, x0 + svcnth () * 2, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_f16_3: -+** incb x0, all, mul #3 -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_f16_3, svfloat16x4_t, float16_t, -+ svst4_f16 (p0, x0 + svcnth () * 3, z0), -+ svst4 (p0, x0 + svcnth () * 3, z0)) -+ -+/* -+** st4_f16_4: -+** st4h {z0\.h - z3\.h}, p0, \[x0, #4, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_f16_4, svfloat16x4_t, float16_t, -+ svst4_f16 (p0, x0 + svcnth () * 4, z0), -+ svst4 (p0, x0 + svcnth () * 4, z0)) -+ -+/* -+** st4_f16_28: -+** st4h {z0\.h - z3\.h}, p0, \[x0, #28, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_f16_28, svfloat16x4_t, float16_t, -+ svst4_f16 (p0, x0 + svcnth () * 28, z0), -+ svst4 (p0, x0 + svcnth () * 28, z0)) -+ -+/* -+** st4_f16_32: -+** [^{]* -+** st4h {z0\.h - z3\.h}, p0, \[x[0-9]+\] -+** ret -+*/ -+TEST_STORE (st4_f16_32, svfloat16x4_t, float16_t, -+ svst4_f16 (p0, x0 + svcnth () * 32, z0), -+ svst4 (p0, x0 + svcnth () * 32, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_f16_m1: -+** decb x0 -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_f16_m1, svfloat16x4_t, float16_t, -+ svst4_f16 (p0, x0 - svcnth (), z0), -+ svst4 (p0, x0 - svcnth (), z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_f16_m2: -+** decb x0, all, mul #2 -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_f16_m2, svfloat16x4_t, float16_t, -+ svst4_f16 (p0, x0 - svcnth () * 2, z0), -+ svst4 (p0, x0 - svcnth () * 2, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_f16_m3: -+** decb x0, all, mul #3 -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_f16_m3, svfloat16x4_t, float16_t, -+ svst4_f16 (p0, x0 - svcnth () * 3, z0), -+ svst4 (p0, x0 - svcnth () * 3, z0)) -+ -+/* -+** st4_f16_m4: -+** st4h {z0\.h - z3\.h}, p0, \[x0, #-4, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_f16_m4, svfloat16x4_t, float16_t, -+ svst4_f16 (p0, x0 - svcnth () * 4, z0), -+ svst4 (p0, x0 - svcnth () * 4, z0)) -+ -+/* -+** st4_f16_m32: -+** st4h {z0\.h - z3\.h}, p0, \[x0, #-32, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_f16_m32, svfloat16x4_t, float16_t, -+ svst4_f16 (p0, x0 - svcnth () * 32, z0), -+ svst4 (p0, x0 - svcnth () * 32, z0)) -+ -+/* -+** st4_f16_m36: -+** [^{]* -+** st4h {z0\.h - z3\.h}, p0, \[x[0-9]+\] -+** ret -+*/ -+TEST_STORE (st4_f16_m36, svfloat16x4_t, float16_t, -+ svst4_f16 (p0, x0 - svcnth () * 36, z0), -+ svst4 (p0, x0 - svcnth () * 36, z0)) -+ -+/* -+** st4_vnum_f16_0: -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_f16_0, svfloat16x4_t, float16_t, -+ svst4_vnum_f16 (p0, x0, 0, z0), -+ svst4_vnum (p0, x0, 0, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_f16_1: -+** incb x0 -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_f16_1, svfloat16x4_t, float16_t, -+ svst4_vnum_f16 (p0, x0, 1, z0), -+ svst4_vnum (p0, x0, 1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_f16_2: -+** incb x0, all, mul #2 -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_f16_2, svfloat16x4_t, float16_t, -+ svst4_vnum_f16 (p0, x0, 2, z0), -+ svst4_vnum (p0, x0, 2, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_f16_3: -+** incb x0, all, mul #3 -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_f16_3, svfloat16x4_t, float16_t, -+ svst4_vnum_f16 (p0, x0, 3, z0), -+ svst4_vnum (p0, x0, 3, z0)) -+ -+/* -+** st4_vnum_f16_4: -+** st4h {z0\.h - z3\.h}, p0, \[x0, #4, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_vnum_f16_4, svfloat16x4_t, float16_t, -+ svst4_vnum_f16 (p0, x0, 4, z0), -+ svst4_vnum (p0, x0, 4, z0)) -+ -+/* -+** st4_vnum_f16_28: -+** st4h {z0\.h - z3\.h}, p0, \[x0, #28, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_vnum_f16_28, svfloat16x4_t, float16_t, -+ svst4_vnum_f16 (p0, x0, 28, z0), -+ svst4_vnum (p0, x0, 28, z0)) -+ -+/* -+** st4_vnum_f16_32: -+** [^{]* -+** st4h {z0\.h - z3\.h}, p0, \[x[0-9]+\] -+** ret -+*/ -+TEST_STORE (st4_vnum_f16_32, svfloat16x4_t, float16_t, -+ svst4_vnum_f16 (p0, x0, 32, z0), -+ svst4_vnum (p0, x0, 32, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_f16_m1: -+** decb x0 -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_f16_m1, svfloat16x4_t, float16_t, -+ svst4_vnum_f16 (p0, x0, -1, z0), -+ svst4_vnum (p0, x0, -1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_f16_m2: -+** decb x0, all, mul #2 -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_f16_m2, svfloat16x4_t, float16_t, -+ svst4_vnum_f16 (p0, x0, -2, z0), -+ svst4_vnum (p0, x0, -2, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_f16_m3: -+** decb x0, all, mul #3 -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_f16_m3, svfloat16x4_t, float16_t, -+ svst4_vnum_f16 (p0, x0, -3, z0), -+ svst4_vnum (p0, x0, -3, z0)) -+ -+/* -+** st4_vnum_f16_m4: -+** st4h {z0\.h - z3\.h}, p0, \[x0, #-4, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_vnum_f16_m4, svfloat16x4_t, float16_t, -+ svst4_vnum_f16 (p0, x0, -4, z0), -+ svst4_vnum (p0, x0, -4, z0)) -+ -+/* -+** st4_vnum_f16_m32: -+** st4h {z0\.h - z3\.h}, p0, \[x0, #-32, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_vnum_f16_m32, svfloat16x4_t, float16_t, -+ svst4_vnum_f16 (p0, x0, -32, z0), -+ svst4_vnum (p0, x0, -32, z0)) -+ -+/* -+** st4_vnum_f16_m36: -+** [^{]* -+** st4h {z0\.h - z3\.h}, p0, \[x[0-9]+\] -+** ret -+*/ -+TEST_STORE (st4_vnum_f16_m36, svfloat16x4_t, float16_t, -+ svst4_vnum_f16 (p0, x0, -36, z0), -+ svst4_vnum (p0, x0, -36, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** st4_vnum_f16_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** st4h {z0\.h - z3\.h}, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (st4_vnum_f16_x1, svfloat16x4_t, float16_t, -+ svst4_vnum_f16 (p0, x0, x1, z0), -+ svst4_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_f32.c -new file mode 100644 -index 000000000..313ed7bc0 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_f32.c -@@ -0,0 +1,286 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st4_f32_base: -+** st4w {z0\.s - z3\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_f32_base, svfloat32x4_t, float32_t, -+ svst4_f32 (p0, x0, z0), -+ svst4 (p0, x0, z0)) -+ -+/* -+** st4_f32_index: -+** st4w {z0\.s - z3\.s}, p0, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_STORE (st4_f32_index, svfloat32x4_t, float32_t, -+ svst4_f32 (p0, x0 + x1, z0), -+ svst4 (p0, x0 + x1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_f32_1: -+** incb x0 -+** st4w {z0\.s - z3\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_f32_1, svfloat32x4_t, float32_t, -+ svst4_f32 (p0, x0 + svcntw (), z0), -+ svst4 (p0, x0 + svcntw (), z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_f32_2: -+** incb x0, all, mul #2 -+** st4w {z0\.s - z3\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_f32_2, svfloat32x4_t, float32_t, -+ svst4_f32 (p0, x0 + svcntw () * 2, z0), -+ svst4 (p0, x0 + svcntw () * 2, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_f32_3: -+** incb x0, all, mul #3 -+** st4w {z0\.s - z3\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_f32_3, svfloat32x4_t, float32_t, -+ svst4_f32 (p0, x0 + svcntw () * 3, z0), -+ svst4 (p0, x0 + svcntw () * 3, z0)) -+ -+/* -+** st4_f32_4: -+** st4w {z0\.s - z3\.s}, p0, \[x0, #4, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_f32_4, svfloat32x4_t, float32_t, -+ svst4_f32 (p0, x0 + svcntw () * 4, z0), -+ svst4 (p0, x0 + svcntw () * 4, z0)) -+ -+/* -+** st4_f32_28: -+** st4w {z0\.s - z3\.s}, p0, \[x0, #28, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_f32_28, svfloat32x4_t, float32_t, -+ svst4_f32 (p0, x0 + svcntw () * 28, z0), -+ svst4 (p0, x0 + svcntw () * 28, z0)) -+ -+/* -+** st4_f32_32: -+** [^{]* -+** st4w {z0\.s - z3\.s}, p0, \[x[0-9]+\] -+** ret -+*/ -+TEST_STORE (st4_f32_32, svfloat32x4_t, float32_t, -+ svst4_f32 (p0, x0 + svcntw () * 32, z0), -+ svst4 (p0, x0 + svcntw () * 32, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_f32_m1: -+** decb x0 -+** st4w {z0\.s - z3\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_f32_m1, svfloat32x4_t, float32_t, -+ svst4_f32 (p0, x0 - svcntw (), z0), -+ svst4 (p0, x0 - svcntw (), z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_f32_m2: -+** decb x0, all, mul #2 -+** st4w {z0\.s - z3\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_f32_m2, svfloat32x4_t, float32_t, -+ svst4_f32 (p0, x0 - svcntw () * 2, z0), -+ svst4 (p0, x0 - svcntw () * 2, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_f32_m3: -+** decb x0, all, mul #3 -+** st4w {z0\.s - z3\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_f32_m3, svfloat32x4_t, float32_t, -+ svst4_f32 (p0, x0 - svcntw () * 3, z0), -+ svst4 (p0, x0 - svcntw () * 3, z0)) -+ -+/* -+** st4_f32_m4: -+** st4w {z0\.s - z3\.s}, p0, \[x0, #-4, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_f32_m4, svfloat32x4_t, float32_t, -+ svst4_f32 (p0, x0 - svcntw () * 4, z0), -+ svst4 (p0, x0 - svcntw () * 4, z0)) -+ -+/* -+** st4_f32_m32: -+** st4w {z0\.s - z3\.s}, p0, \[x0, #-32, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_f32_m32, svfloat32x4_t, float32_t, -+ svst4_f32 (p0, x0 - svcntw () * 32, z0), -+ svst4 (p0, x0 - svcntw () * 32, z0)) -+ -+/* -+** st4_f32_m36: -+** [^{]* -+** st4w {z0\.s - z3\.s}, p0, \[x[0-9]+\] -+** ret -+*/ -+TEST_STORE (st4_f32_m36, svfloat32x4_t, float32_t, -+ svst4_f32 (p0, x0 - svcntw () * 36, z0), -+ svst4 (p0, x0 - svcntw () * 36, z0)) -+ -+/* -+** st4_vnum_f32_0: -+** st4w {z0\.s - z3\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_f32_0, svfloat32x4_t, float32_t, -+ svst4_vnum_f32 (p0, x0, 0, z0), -+ svst4_vnum (p0, x0, 0, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_f32_1: -+** incb x0 -+** st4w {z0\.s - z3\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_f32_1, svfloat32x4_t, float32_t, -+ svst4_vnum_f32 (p0, x0, 1, z0), -+ svst4_vnum (p0, x0, 1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_f32_2: -+** incb x0, all, mul #2 -+** st4w {z0\.s - z3\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_f32_2, svfloat32x4_t, float32_t, -+ svst4_vnum_f32 (p0, x0, 2, z0), -+ svst4_vnum (p0, x0, 2, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_f32_3: -+** incb x0, all, mul #3 -+** st4w {z0\.s - z3\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_f32_3, svfloat32x4_t, float32_t, -+ svst4_vnum_f32 (p0, x0, 3, z0), -+ svst4_vnum (p0, x0, 3, z0)) -+ -+/* -+** st4_vnum_f32_4: -+** st4w {z0\.s - z3\.s}, p0, \[x0, #4, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_vnum_f32_4, svfloat32x4_t, float32_t, -+ svst4_vnum_f32 (p0, x0, 4, z0), -+ svst4_vnum (p0, x0, 4, z0)) -+ -+/* -+** st4_vnum_f32_28: -+** st4w {z0\.s - z3\.s}, p0, \[x0, #28, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_vnum_f32_28, svfloat32x4_t, float32_t, -+ svst4_vnum_f32 (p0, x0, 28, z0), -+ svst4_vnum (p0, x0, 28, z0)) -+ -+/* -+** st4_vnum_f32_32: -+** [^{]* -+** st4w {z0\.s - z3\.s}, p0, \[x[0-9]+\] -+** ret -+*/ -+TEST_STORE (st4_vnum_f32_32, svfloat32x4_t, float32_t, -+ svst4_vnum_f32 (p0, x0, 32, z0), -+ svst4_vnum (p0, x0, 32, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_f32_m1: -+** decb x0 -+** st4w {z0\.s - z3\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_f32_m1, svfloat32x4_t, float32_t, -+ svst4_vnum_f32 (p0, x0, -1, z0), -+ svst4_vnum (p0, x0, -1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_f32_m2: -+** decb x0, all, mul #2 -+** st4w {z0\.s - z3\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_f32_m2, svfloat32x4_t, float32_t, -+ svst4_vnum_f32 (p0, x0, -2, z0), -+ svst4_vnum (p0, x0, -2, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_f32_m3: -+** decb x0, all, mul #3 -+** st4w {z0\.s - z3\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_f32_m3, svfloat32x4_t, float32_t, -+ svst4_vnum_f32 (p0, x0, -3, z0), -+ svst4_vnum (p0, x0, -3, z0)) -+ -+/* -+** st4_vnum_f32_m4: -+** st4w {z0\.s - z3\.s}, p0, \[x0, #-4, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_vnum_f32_m4, svfloat32x4_t, float32_t, -+ svst4_vnum_f32 (p0, x0, -4, z0), -+ svst4_vnum (p0, x0, -4, z0)) -+ -+/* -+** st4_vnum_f32_m32: -+** st4w {z0\.s - z3\.s}, p0, \[x0, #-32, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_vnum_f32_m32, svfloat32x4_t, float32_t, -+ svst4_vnum_f32 (p0, x0, -32, z0), -+ svst4_vnum (p0, x0, -32, z0)) -+ -+/* -+** st4_vnum_f32_m36: -+** [^{]* -+** st4w {z0\.s - z3\.s}, p0, \[x[0-9]+\] -+** ret -+*/ -+TEST_STORE (st4_vnum_f32_m36, svfloat32x4_t, float32_t, -+ svst4_vnum_f32 (p0, x0, -36, z0), -+ svst4_vnum (p0, x0, -36, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** st4_vnum_f32_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** st4w {z0\.s - z3\.s}, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (st4_vnum_f32_x1, svfloat32x4_t, float32_t, -+ svst4_vnum_f32 (p0, x0, x1, z0), -+ svst4_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_f64.c -new file mode 100644 -index 000000000..6c65ef016 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_f64.c -@@ -0,0 +1,286 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st4_f64_base: -+** st4d {z0\.d - z3\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_f64_base, svfloat64x4_t, float64_t, -+ svst4_f64 (p0, x0, z0), -+ svst4 (p0, x0, z0)) -+ -+/* -+** st4_f64_index: -+** st4d {z0\.d - z3\.d}, p0, \[x0, x1, lsl 3\] -+** ret -+*/ -+TEST_STORE (st4_f64_index, svfloat64x4_t, float64_t, -+ svst4_f64 (p0, x0 + x1, z0), -+ svst4 (p0, x0 + x1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_f64_1: -+** incb x0 -+** st4d {z0\.d - z3\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_f64_1, svfloat64x4_t, float64_t, -+ svst4_f64 (p0, x0 + svcntd (), z0), -+ svst4 (p0, x0 + svcntd (), z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_f64_2: -+** incb x0, all, mul #2 -+** st4d {z0\.d - z3\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_f64_2, svfloat64x4_t, float64_t, -+ svst4_f64 (p0, x0 + svcntd () * 2, z0), -+ svst4 (p0, x0 + svcntd () * 2, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_f64_3: -+** incb x0, all, mul #3 -+** st4d {z0\.d - z3\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_f64_3, svfloat64x4_t, float64_t, -+ svst4_f64 (p0, x0 + svcntd () * 3, z0), -+ svst4 (p0, x0 + svcntd () * 3, z0)) -+ -+/* -+** st4_f64_4: -+** st4d {z0\.d - z3\.d}, p0, \[x0, #4, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_f64_4, svfloat64x4_t, float64_t, -+ svst4_f64 (p0, x0 + svcntd () * 4, z0), -+ svst4 (p0, x0 + svcntd () * 4, z0)) -+ -+/* -+** st4_f64_28: -+** st4d {z0\.d - z3\.d}, p0, \[x0, #28, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_f64_28, svfloat64x4_t, float64_t, -+ svst4_f64 (p0, x0 + svcntd () * 28, z0), -+ svst4 (p0, x0 + svcntd () * 28, z0)) -+ -+/* -+** st4_f64_32: -+** [^{]* -+** st4d {z0\.d - z3\.d}, p0, \[x[0-9]+\] -+** ret -+*/ -+TEST_STORE (st4_f64_32, svfloat64x4_t, float64_t, -+ svst4_f64 (p0, x0 + svcntd () * 32, z0), -+ svst4 (p0, x0 + svcntd () * 32, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_f64_m1: -+** decb x0 -+** st4d {z0\.d - z3\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_f64_m1, svfloat64x4_t, float64_t, -+ svst4_f64 (p0, x0 - svcntd (), z0), -+ svst4 (p0, x0 - svcntd (), z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_f64_m2: -+** decb x0, all, mul #2 -+** st4d {z0\.d - z3\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_f64_m2, svfloat64x4_t, float64_t, -+ svst4_f64 (p0, x0 - svcntd () * 2, z0), -+ svst4 (p0, x0 - svcntd () * 2, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_f64_m3: -+** decb x0, all, mul #3 -+** st4d {z0\.d - z3\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_f64_m3, svfloat64x4_t, float64_t, -+ svst4_f64 (p0, x0 - svcntd () * 3, z0), -+ svst4 (p0, x0 - svcntd () * 3, z0)) -+ -+/* -+** st4_f64_m4: -+** st4d {z0\.d - z3\.d}, p0, \[x0, #-4, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_f64_m4, svfloat64x4_t, float64_t, -+ svst4_f64 (p0, x0 - svcntd () * 4, z0), -+ svst4 (p0, x0 - svcntd () * 4, z0)) -+ -+/* -+** st4_f64_m32: -+** st4d {z0\.d - z3\.d}, p0, \[x0, #-32, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_f64_m32, svfloat64x4_t, float64_t, -+ svst4_f64 (p0, x0 - svcntd () * 32, z0), -+ svst4 (p0, x0 - svcntd () * 32, z0)) -+ -+/* -+** st4_f64_m36: -+** [^{]* -+** st4d {z0\.d - z3\.d}, p0, \[x[0-9]+\] -+** ret -+*/ -+TEST_STORE (st4_f64_m36, svfloat64x4_t, float64_t, -+ svst4_f64 (p0, x0 - svcntd () * 36, z0), -+ svst4 (p0, x0 - svcntd () * 36, z0)) -+ -+/* -+** st4_vnum_f64_0: -+** st4d {z0\.d - z3\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_f64_0, svfloat64x4_t, float64_t, -+ svst4_vnum_f64 (p0, x0, 0, z0), -+ svst4_vnum (p0, x0, 0, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_f64_1: -+** incb x0 -+** st4d {z0\.d - z3\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_f64_1, svfloat64x4_t, float64_t, -+ svst4_vnum_f64 (p0, x0, 1, z0), -+ svst4_vnum (p0, x0, 1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_f64_2: -+** incb x0, all, mul #2 -+** st4d {z0\.d - z3\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_f64_2, svfloat64x4_t, float64_t, -+ svst4_vnum_f64 (p0, x0, 2, z0), -+ svst4_vnum (p0, x0, 2, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_f64_3: -+** incb x0, all, mul #3 -+** st4d {z0\.d - z3\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_f64_3, svfloat64x4_t, float64_t, -+ svst4_vnum_f64 (p0, x0, 3, z0), -+ svst4_vnum (p0, x0, 3, z0)) -+ -+/* -+** st4_vnum_f64_4: -+** st4d {z0\.d - z3\.d}, p0, \[x0, #4, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_vnum_f64_4, svfloat64x4_t, float64_t, -+ svst4_vnum_f64 (p0, x0, 4, z0), -+ svst4_vnum (p0, x0, 4, z0)) -+ -+/* -+** st4_vnum_f64_28: -+** st4d {z0\.d - z3\.d}, p0, \[x0, #28, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_vnum_f64_28, svfloat64x4_t, float64_t, -+ svst4_vnum_f64 (p0, x0, 28, z0), -+ svst4_vnum (p0, x0, 28, z0)) -+ -+/* -+** st4_vnum_f64_32: -+** [^{]* -+** st4d {z0\.d - z3\.d}, p0, \[x[0-9]+\] -+** ret -+*/ -+TEST_STORE (st4_vnum_f64_32, svfloat64x4_t, float64_t, -+ svst4_vnum_f64 (p0, x0, 32, z0), -+ svst4_vnum (p0, x0, 32, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_f64_m1: -+** decb x0 -+** st4d {z0\.d - z3\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_f64_m1, svfloat64x4_t, float64_t, -+ svst4_vnum_f64 (p0, x0, -1, z0), -+ svst4_vnum (p0, x0, -1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_f64_m2: -+** decb x0, all, mul #2 -+** st4d {z0\.d - z3\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_f64_m2, svfloat64x4_t, float64_t, -+ svst4_vnum_f64 (p0, x0, -2, z0), -+ svst4_vnum (p0, x0, -2, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_f64_m3: -+** decb x0, all, mul #3 -+** st4d {z0\.d - z3\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_f64_m3, svfloat64x4_t, float64_t, -+ svst4_vnum_f64 (p0, x0, -3, z0), -+ svst4_vnum (p0, x0, -3, z0)) -+ -+/* -+** st4_vnum_f64_m4: -+** st4d {z0\.d - z3\.d}, p0, \[x0, #-4, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_vnum_f64_m4, svfloat64x4_t, float64_t, -+ svst4_vnum_f64 (p0, x0, -4, z0), -+ svst4_vnum (p0, x0, -4, z0)) -+ -+/* -+** st4_vnum_f64_m32: -+** st4d {z0\.d - z3\.d}, p0, \[x0, #-32, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_vnum_f64_m32, svfloat64x4_t, float64_t, -+ svst4_vnum_f64 (p0, x0, -32, z0), -+ svst4_vnum (p0, x0, -32, z0)) -+ -+/* -+** st4_vnum_f64_m36: -+** [^{]* -+** st4d {z0\.d - z3\.d}, p0, \[x[0-9]+\] -+** ret -+*/ -+TEST_STORE (st4_vnum_f64_m36, svfloat64x4_t, float64_t, -+ svst4_vnum_f64 (p0, x0, -36, z0), -+ svst4_vnum (p0, x0, -36, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** st4_vnum_f64_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** st4d {z0\.d - z3\.d}, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (st4_vnum_f64_x1, svfloat64x4_t, float64_t, -+ svst4_vnum_f64 (p0, x0, x1, z0), -+ svst4_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_s16.c -new file mode 100644 -index 000000000..35ac5f803 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_s16.c -@@ -0,0 +1,286 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st4_s16_base: -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_s16_base, svint16x4_t, int16_t, -+ svst4_s16 (p0, x0, z0), -+ svst4 (p0, x0, z0)) -+ -+/* -+** st4_s16_index: -+** st4h {z0\.h - z3\.h}, p0, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_STORE (st4_s16_index, svint16x4_t, int16_t, -+ svst4_s16 (p0, x0 + x1, z0), -+ svst4 (p0, x0 + x1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_s16_1: -+** incb x0 -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_s16_1, svint16x4_t, int16_t, -+ svst4_s16 (p0, x0 + svcnth (), z0), -+ svst4 (p0, x0 + svcnth (), z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_s16_2: -+** incb x0, all, mul #2 -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_s16_2, svint16x4_t, int16_t, -+ svst4_s16 (p0, x0 + svcnth () * 2, z0), -+ svst4 (p0, x0 + svcnth () * 2, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_s16_3: -+** incb x0, all, mul #3 -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_s16_3, svint16x4_t, int16_t, -+ svst4_s16 (p0, x0 + svcnth () * 3, z0), -+ svst4 (p0, x0 + svcnth () * 3, z0)) -+ -+/* -+** st4_s16_4: -+** st4h {z0\.h - z3\.h}, p0, \[x0, #4, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_s16_4, svint16x4_t, int16_t, -+ svst4_s16 (p0, x0 + svcnth () * 4, z0), -+ svst4 (p0, x0 + svcnth () * 4, z0)) -+ -+/* -+** st4_s16_28: -+** st4h {z0\.h - z3\.h}, p0, \[x0, #28, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_s16_28, svint16x4_t, int16_t, -+ svst4_s16 (p0, x0 + svcnth () * 28, z0), -+ svst4 (p0, x0 + svcnth () * 28, z0)) -+ -+/* -+** st4_s16_32: -+** [^{]* -+** st4h {z0\.h - z3\.h}, p0, \[x[0-9]+\] -+** ret -+*/ -+TEST_STORE (st4_s16_32, svint16x4_t, int16_t, -+ svst4_s16 (p0, x0 + svcnth () * 32, z0), -+ svst4 (p0, x0 + svcnth () * 32, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_s16_m1: -+** decb x0 -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_s16_m1, svint16x4_t, int16_t, -+ svst4_s16 (p0, x0 - svcnth (), z0), -+ svst4 (p0, x0 - svcnth (), z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_s16_m2: -+** decb x0, all, mul #2 -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_s16_m2, svint16x4_t, int16_t, -+ svst4_s16 (p0, x0 - svcnth () * 2, z0), -+ svst4 (p0, x0 - svcnth () * 2, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_s16_m3: -+** decb x0, all, mul #3 -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_s16_m3, svint16x4_t, int16_t, -+ svst4_s16 (p0, x0 - svcnth () * 3, z0), -+ svst4 (p0, x0 - svcnth () * 3, z0)) -+ -+/* -+** st4_s16_m4: -+** st4h {z0\.h - z3\.h}, p0, \[x0, #-4, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_s16_m4, svint16x4_t, int16_t, -+ svst4_s16 (p0, x0 - svcnth () * 4, z0), -+ svst4 (p0, x0 - svcnth () * 4, z0)) -+ -+/* -+** st4_s16_m32: -+** st4h {z0\.h - z3\.h}, p0, \[x0, #-32, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_s16_m32, svint16x4_t, int16_t, -+ svst4_s16 (p0, x0 - svcnth () * 32, z0), -+ svst4 (p0, x0 - svcnth () * 32, z0)) -+ -+/* -+** st4_s16_m36: -+** [^{]* -+** st4h {z0\.h - z3\.h}, p0, \[x[0-9]+\] -+** ret -+*/ -+TEST_STORE (st4_s16_m36, svint16x4_t, int16_t, -+ svst4_s16 (p0, x0 - svcnth () * 36, z0), -+ svst4 (p0, x0 - svcnth () * 36, z0)) -+ -+/* -+** st4_vnum_s16_0: -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s16_0, svint16x4_t, int16_t, -+ svst4_vnum_s16 (p0, x0, 0, z0), -+ svst4_vnum (p0, x0, 0, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_s16_1: -+** incb x0 -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s16_1, svint16x4_t, int16_t, -+ svst4_vnum_s16 (p0, x0, 1, z0), -+ svst4_vnum (p0, x0, 1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_s16_2: -+** incb x0, all, mul #2 -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s16_2, svint16x4_t, int16_t, -+ svst4_vnum_s16 (p0, x0, 2, z0), -+ svst4_vnum (p0, x0, 2, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_s16_3: -+** incb x0, all, mul #3 -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s16_3, svint16x4_t, int16_t, -+ svst4_vnum_s16 (p0, x0, 3, z0), -+ svst4_vnum (p0, x0, 3, z0)) -+ -+/* -+** st4_vnum_s16_4: -+** st4h {z0\.h - z3\.h}, p0, \[x0, #4, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s16_4, svint16x4_t, int16_t, -+ svst4_vnum_s16 (p0, x0, 4, z0), -+ svst4_vnum (p0, x0, 4, z0)) -+ -+/* -+** st4_vnum_s16_28: -+** st4h {z0\.h - z3\.h}, p0, \[x0, #28, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s16_28, svint16x4_t, int16_t, -+ svst4_vnum_s16 (p0, x0, 28, z0), -+ svst4_vnum (p0, x0, 28, z0)) -+ -+/* -+** st4_vnum_s16_32: -+** [^{]* -+** st4h {z0\.h - z3\.h}, p0, \[x[0-9]+\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s16_32, svint16x4_t, int16_t, -+ svst4_vnum_s16 (p0, x0, 32, z0), -+ svst4_vnum (p0, x0, 32, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_s16_m1: -+** decb x0 -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s16_m1, svint16x4_t, int16_t, -+ svst4_vnum_s16 (p0, x0, -1, z0), -+ svst4_vnum (p0, x0, -1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_s16_m2: -+** decb x0, all, mul #2 -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s16_m2, svint16x4_t, int16_t, -+ svst4_vnum_s16 (p0, x0, -2, z0), -+ svst4_vnum (p0, x0, -2, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_s16_m3: -+** decb x0, all, mul #3 -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s16_m3, svint16x4_t, int16_t, -+ svst4_vnum_s16 (p0, x0, -3, z0), -+ svst4_vnum (p0, x0, -3, z0)) -+ -+/* -+** st4_vnum_s16_m4: -+** st4h {z0\.h - z3\.h}, p0, \[x0, #-4, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s16_m4, svint16x4_t, int16_t, -+ svst4_vnum_s16 (p0, x0, -4, z0), -+ svst4_vnum (p0, x0, -4, z0)) -+ -+/* -+** st4_vnum_s16_m32: -+** st4h {z0\.h - z3\.h}, p0, \[x0, #-32, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s16_m32, svint16x4_t, int16_t, -+ svst4_vnum_s16 (p0, x0, -32, z0), -+ svst4_vnum (p0, x0, -32, z0)) -+ -+/* -+** st4_vnum_s16_m36: -+** [^{]* -+** st4h {z0\.h - z3\.h}, p0, \[x[0-9]+\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s16_m36, svint16x4_t, int16_t, -+ svst4_vnum_s16 (p0, x0, -36, z0), -+ svst4_vnum (p0, x0, -36, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** st4_vnum_s16_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** st4h {z0\.h - z3\.h}, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s16_x1, svint16x4_t, int16_t, -+ svst4_vnum_s16 (p0, x0, x1, z0), -+ svst4_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_s32.c -new file mode 100644 -index 000000000..b8302f10d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_s32.c -@@ -0,0 +1,286 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st4_s32_base: -+** st4w {z0\.s - z3\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_s32_base, svint32x4_t, int32_t, -+ svst4_s32 (p0, x0, z0), -+ svst4 (p0, x0, z0)) -+ -+/* -+** st4_s32_index: -+** st4w {z0\.s - z3\.s}, p0, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_STORE (st4_s32_index, svint32x4_t, int32_t, -+ svst4_s32 (p0, x0 + x1, z0), -+ svst4 (p0, x0 + x1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_s32_1: -+** incb x0 -+** st4w {z0\.s - z3\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_s32_1, svint32x4_t, int32_t, -+ svst4_s32 (p0, x0 + svcntw (), z0), -+ svst4 (p0, x0 + svcntw (), z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_s32_2: -+** incb x0, all, mul #2 -+** st4w {z0\.s - z3\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_s32_2, svint32x4_t, int32_t, -+ svst4_s32 (p0, x0 + svcntw () * 2, z0), -+ svst4 (p0, x0 + svcntw () * 2, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_s32_3: -+** incb x0, all, mul #3 -+** st4w {z0\.s - z3\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_s32_3, svint32x4_t, int32_t, -+ svst4_s32 (p0, x0 + svcntw () * 3, z0), -+ svst4 (p0, x0 + svcntw () * 3, z0)) -+ -+/* -+** st4_s32_4: -+** st4w {z0\.s - z3\.s}, p0, \[x0, #4, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_s32_4, svint32x4_t, int32_t, -+ svst4_s32 (p0, x0 + svcntw () * 4, z0), -+ svst4 (p0, x0 + svcntw () * 4, z0)) -+ -+/* -+** st4_s32_28: -+** st4w {z0\.s - z3\.s}, p0, \[x0, #28, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_s32_28, svint32x4_t, int32_t, -+ svst4_s32 (p0, x0 + svcntw () * 28, z0), -+ svst4 (p0, x0 + svcntw () * 28, z0)) -+ -+/* -+** st4_s32_32: -+** [^{]* -+** st4w {z0\.s - z3\.s}, p0, \[x[0-9]+\] -+** ret -+*/ -+TEST_STORE (st4_s32_32, svint32x4_t, int32_t, -+ svst4_s32 (p0, x0 + svcntw () * 32, z0), -+ svst4 (p0, x0 + svcntw () * 32, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_s32_m1: -+** decb x0 -+** st4w {z0\.s - z3\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_s32_m1, svint32x4_t, int32_t, -+ svst4_s32 (p0, x0 - svcntw (), z0), -+ svst4 (p0, x0 - svcntw (), z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_s32_m2: -+** decb x0, all, mul #2 -+** st4w {z0\.s - z3\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_s32_m2, svint32x4_t, int32_t, -+ svst4_s32 (p0, x0 - svcntw () * 2, z0), -+ svst4 (p0, x0 - svcntw () * 2, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_s32_m3: -+** decb x0, all, mul #3 -+** st4w {z0\.s - z3\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_s32_m3, svint32x4_t, int32_t, -+ svst4_s32 (p0, x0 - svcntw () * 3, z0), -+ svst4 (p0, x0 - svcntw () * 3, z0)) -+ -+/* -+** st4_s32_m4: -+** st4w {z0\.s - z3\.s}, p0, \[x0, #-4, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_s32_m4, svint32x4_t, int32_t, -+ svst4_s32 (p0, x0 - svcntw () * 4, z0), -+ svst4 (p0, x0 - svcntw () * 4, z0)) -+ -+/* -+** st4_s32_m32: -+** st4w {z0\.s - z3\.s}, p0, \[x0, #-32, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_s32_m32, svint32x4_t, int32_t, -+ svst4_s32 (p0, x0 - svcntw () * 32, z0), -+ svst4 (p0, x0 - svcntw () * 32, z0)) -+ -+/* -+** st4_s32_m36: -+** [^{]* -+** st4w {z0\.s - z3\.s}, p0, \[x[0-9]+\] -+** ret -+*/ -+TEST_STORE (st4_s32_m36, svint32x4_t, int32_t, -+ svst4_s32 (p0, x0 - svcntw () * 36, z0), -+ svst4 (p0, x0 - svcntw () * 36, z0)) -+ -+/* -+** st4_vnum_s32_0: -+** st4w {z0\.s - z3\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s32_0, svint32x4_t, int32_t, -+ svst4_vnum_s32 (p0, x0, 0, z0), -+ svst4_vnum (p0, x0, 0, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_s32_1: -+** incb x0 -+** st4w {z0\.s - z3\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s32_1, svint32x4_t, int32_t, -+ svst4_vnum_s32 (p0, x0, 1, z0), -+ svst4_vnum (p0, x0, 1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_s32_2: -+** incb x0, all, mul #2 -+** st4w {z0\.s - z3\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s32_2, svint32x4_t, int32_t, -+ svst4_vnum_s32 (p0, x0, 2, z0), -+ svst4_vnum (p0, x0, 2, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_s32_3: -+** incb x0, all, mul #3 -+** st4w {z0\.s - z3\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s32_3, svint32x4_t, int32_t, -+ svst4_vnum_s32 (p0, x0, 3, z0), -+ svst4_vnum (p0, x0, 3, z0)) -+ -+/* -+** st4_vnum_s32_4: -+** st4w {z0\.s - z3\.s}, p0, \[x0, #4, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s32_4, svint32x4_t, int32_t, -+ svst4_vnum_s32 (p0, x0, 4, z0), -+ svst4_vnum (p0, x0, 4, z0)) -+ -+/* -+** st4_vnum_s32_28: -+** st4w {z0\.s - z3\.s}, p0, \[x0, #28, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s32_28, svint32x4_t, int32_t, -+ svst4_vnum_s32 (p0, x0, 28, z0), -+ svst4_vnum (p0, x0, 28, z0)) -+ -+/* -+** st4_vnum_s32_32: -+** [^{]* -+** st4w {z0\.s - z3\.s}, p0, \[x[0-9]+\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s32_32, svint32x4_t, int32_t, -+ svst4_vnum_s32 (p0, x0, 32, z0), -+ svst4_vnum (p0, x0, 32, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_s32_m1: -+** decb x0 -+** st4w {z0\.s - z3\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s32_m1, svint32x4_t, int32_t, -+ svst4_vnum_s32 (p0, x0, -1, z0), -+ svst4_vnum (p0, x0, -1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_s32_m2: -+** decb x0, all, mul #2 -+** st4w {z0\.s - z3\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s32_m2, svint32x4_t, int32_t, -+ svst4_vnum_s32 (p0, x0, -2, z0), -+ svst4_vnum (p0, x0, -2, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_s32_m3: -+** decb x0, all, mul #3 -+** st4w {z0\.s - z3\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s32_m3, svint32x4_t, int32_t, -+ svst4_vnum_s32 (p0, x0, -3, z0), -+ svst4_vnum (p0, x0, -3, z0)) -+ -+/* -+** st4_vnum_s32_m4: -+** st4w {z0\.s - z3\.s}, p0, \[x0, #-4, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s32_m4, svint32x4_t, int32_t, -+ svst4_vnum_s32 (p0, x0, -4, z0), -+ svst4_vnum (p0, x0, -4, z0)) -+ -+/* -+** st4_vnum_s32_m32: -+** st4w {z0\.s - z3\.s}, p0, \[x0, #-32, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s32_m32, svint32x4_t, int32_t, -+ svst4_vnum_s32 (p0, x0, -32, z0), -+ svst4_vnum (p0, x0, -32, z0)) -+ -+/* -+** st4_vnum_s32_m36: -+** [^{]* -+** st4w {z0\.s - z3\.s}, p0, \[x[0-9]+\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s32_m36, svint32x4_t, int32_t, -+ svst4_vnum_s32 (p0, x0, -36, z0), -+ svst4_vnum (p0, x0, -36, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** st4_vnum_s32_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** st4w {z0\.s - z3\.s}, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s32_x1, svint32x4_t, int32_t, -+ svst4_vnum_s32 (p0, x0, x1, z0), -+ svst4_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_s64.c -new file mode 100644 -index 000000000..bf9cdf5e0 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_s64.c -@@ -0,0 +1,286 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st4_s64_base: -+** st4d {z0\.d - z3\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_s64_base, svint64x4_t, int64_t, -+ svst4_s64 (p0, x0, z0), -+ svst4 (p0, x0, z0)) -+ -+/* -+** st4_s64_index: -+** st4d {z0\.d - z3\.d}, p0, \[x0, x1, lsl 3\] -+** ret -+*/ -+TEST_STORE (st4_s64_index, svint64x4_t, int64_t, -+ svst4_s64 (p0, x0 + x1, z0), -+ svst4 (p0, x0 + x1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_s64_1: -+** incb x0 -+** st4d {z0\.d - z3\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_s64_1, svint64x4_t, int64_t, -+ svst4_s64 (p0, x0 + svcntd (), z0), -+ svst4 (p0, x0 + svcntd (), z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_s64_2: -+** incb x0, all, mul #2 -+** st4d {z0\.d - z3\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_s64_2, svint64x4_t, int64_t, -+ svst4_s64 (p0, x0 + svcntd () * 2, z0), -+ svst4 (p0, x0 + svcntd () * 2, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_s64_3: -+** incb x0, all, mul #3 -+** st4d {z0\.d - z3\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_s64_3, svint64x4_t, int64_t, -+ svst4_s64 (p0, x0 + svcntd () * 3, z0), -+ svst4 (p0, x0 + svcntd () * 3, z0)) -+ -+/* -+** st4_s64_4: -+** st4d {z0\.d - z3\.d}, p0, \[x0, #4, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_s64_4, svint64x4_t, int64_t, -+ svst4_s64 (p0, x0 + svcntd () * 4, z0), -+ svst4 (p0, x0 + svcntd () * 4, z0)) -+ -+/* -+** st4_s64_28: -+** st4d {z0\.d - z3\.d}, p0, \[x0, #28, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_s64_28, svint64x4_t, int64_t, -+ svst4_s64 (p0, x0 + svcntd () * 28, z0), -+ svst4 (p0, x0 + svcntd () * 28, z0)) -+ -+/* -+** st4_s64_32: -+** [^{]* -+** st4d {z0\.d - z3\.d}, p0, \[x[0-9]+\] -+** ret -+*/ -+TEST_STORE (st4_s64_32, svint64x4_t, int64_t, -+ svst4_s64 (p0, x0 + svcntd () * 32, z0), -+ svst4 (p0, x0 + svcntd () * 32, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_s64_m1: -+** decb x0 -+** st4d {z0\.d - z3\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_s64_m1, svint64x4_t, int64_t, -+ svst4_s64 (p0, x0 - svcntd (), z0), -+ svst4 (p0, x0 - svcntd (), z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_s64_m2: -+** decb x0, all, mul #2 -+** st4d {z0\.d - z3\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_s64_m2, svint64x4_t, int64_t, -+ svst4_s64 (p0, x0 - svcntd () * 2, z0), -+ svst4 (p0, x0 - svcntd () * 2, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_s64_m3: -+** decb x0, all, mul #3 -+** st4d {z0\.d - z3\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_s64_m3, svint64x4_t, int64_t, -+ svst4_s64 (p0, x0 - svcntd () * 3, z0), -+ svst4 (p0, x0 - svcntd () * 3, z0)) -+ -+/* -+** st4_s64_m4: -+** st4d {z0\.d - z3\.d}, p0, \[x0, #-4, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_s64_m4, svint64x4_t, int64_t, -+ svst4_s64 (p0, x0 - svcntd () * 4, z0), -+ svst4 (p0, x0 - svcntd () * 4, z0)) -+ -+/* -+** st4_s64_m32: -+** st4d {z0\.d - z3\.d}, p0, \[x0, #-32, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_s64_m32, svint64x4_t, int64_t, -+ svst4_s64 (p0, x0 - svcntd () * 32, z0), -+ svst4 (p0, x0 - svcntd () * 32, z0)) -+ -+/* -+** st4_s64_m36: -+** [^{]* -+** st4d {z0\.d - z3\.d}, p0, \[x[0-9]+\] -+** ret -+*/ -+TEST_STORE (st4_s64_m36, svint64x4_t, int64_t, -+ svst4_s64 (p0, x0 - svcntd () * 36, z0), -+ svst4 (p0, x0 - svcntd () * 36, z0)) -+ -+/* -+** st4_vnum_s64_0: -+** st4d {z0\.d - z3\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s64_0, svint64x4_t, int64_t, -+ svst4_vnum_s64 (p0, x0, 0, z0), -+ svst4_vnum (p0, x0, 0, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_s64_1: -+** incb x0 -+** st4d {z0\.d - z3\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s64_1, svint64x4_t, int64_t, -+ svst4_vnum_s64 (p0, x0, 1, z0), -+ svst4_vnum (p0, x0, 1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_s64_2: -+** incb x0, all, mul #2 -+** st4d {z0\.d - z3\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s64_2, svint64x4_t, int64_t, -+ svst4_vnum_s64 (p0, x0, 2, z0), -+ svst4_vnum (p0, x0, 2, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_s64_3: -+** incb x0, all, mul #3 -+** st4d {z0\.d - z3\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s64_3, svint64x4_t, int64_t, -+ svst4_vnum_s64 (p0, x0, 3, z0), -+ svst4_vnum (p0, x0, 3, z0)) -+ -+/* -+** st4_vnum_s64_4: -+** st4d {z0\.d - z3\.d}, p0, \[x0, #4, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s64_4, svint64x4_t, int64_t, -+ svst4_vnum_s64 (p0, x0, 4, z0), -+ svst4_vnum (p0, x0, 4, z0)) -+ -+/* -+** st4_vnum_s64_28: -+** st4d {z0\.d - z3\.d}, p0, \[x0, #28, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s64_28, svint64x4_t, int64_t, -+ svst4_vnum_s64 (p0, x0, 28, z0), -+ svst4_vnum (p0, x0, 28, z0)) -+ -+/* -+** st4_vnum_s64_32: -+** [^{]* -+** st4d {z0\.d - z3\.d}, p0, \[x[0-9]+\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s64_32, svint64x4_t, int64_t, -+ svst4_vnum_s64 (p0, x0, 32, z0), -+ svst4_vnum (p0, x0, 32, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_s64_m1: -+** decb x0 -+** st4d {z0\.d - z3\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s64_m1, svint64x4_t, int64_t, -+ svst4_vnum_s64 (p0, x0, -1, z0), -+ svst4_vnum (p0, x0, -1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_s64_m2: -+** decb x0, all, mul #2 -+** st4d {z0\.d - z3\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s64_m2, svint64x4_t, int64_t, -+ svst4_vnum_s64 (p0, x0, -2, z0), -+ svst4_vnum (p0, x0, -2, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_s64_m3: -+** decb x0, all, mul #3 -+** st4d {z0\.d - z3\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s64_m3, svint64x4_t, int64_t, -+ svst4_vnum_s64 (p0, x0, -3, z0), -+ svst4_vnum (p0, x0, -3, z0)) -+ -+/* -+** st4_vnum_s64_m4: -+** st4d {z0\.d - z3\.d}, p0, \[x0, #-4, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s64_m4, svint64x4_t, int64_t, -+ svst4_vnum_s64 (p0, x0, -4, z0), -+ svst4_vnum (p0, x0, -4, z0)) -+ -+/* -+** st4_vnum_s64_m32: -+** st4d {z0\.d - z3\.d}, p0, \[x0, #-32, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s64_m32, svint64x4_t, int64_t, -+ svst4_vnum_s64 (p0, x0, -32, z0), -+ svst4_vnum (p0, x0, -32, z0)) -+ -+/* -+** st4_vnum_s64_m36: -+** [^{]* -+** st4d {z0\.d - z3\.d}, p0, \[x[0-9]+\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s64_m36, svint64x4_t, int64_t, -+ svst4_vnum_s64 (p0, x0, -36, z0), -+ svst4_vnum (p0, x0, -36, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** st4_vnum_s64_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** st4d {z0\.d - z3\.d}, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s64_x1, svint64x4_t, int64_t, -+ svst4_vnum_s64 (p0, x0, x1, z0), -+ svst4_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_s8.c -new file mode 100644 -index 000000000..1eb0bf131 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_s8.c -@@ -0,0 +1,290 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st4_s8_base: -+** st4b {z0\.b - z3\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_s8_base, svint8x4_t, int8_t, -+ svst4_s8 (p0, x0, z0), -+ svst4 (p0, x0, z0)) -+ -+/* -+** st4_s8_index: -+** st4b {z0\.b - z3\.b}, p0, \[x0, x1\] -+** ret -+*/ -+TEST_STORE (st4_s8_index, svint8x4_t, int8_t, -+ svst4_s8 (p0, x0 + x1, z0), -+ svst4 (p0, x0 + x1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_s8_1: -+** incb x0 -+** st4b {z0\.b - z3\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_s8_1, svint8x4_t, int8_t, -+ svst4_s8 (p0, x0 + svcntb (), z0), -+ svst4 (p0, x0 + svcntb (), z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_s8_2: -+** incb x0, all, mul #2 -+** st4b {z0\.b - z3\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_s8_2, svint8x4_t, int8_t, -+ svst4_s8 (p0, x0 + svcntb () * 2, z0), -+ svst4 (p0, x0 + svcntb () * 2, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_s8_3: -+** incb x0, all, mul #3 -+** st4b {z0\.b - z3\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_s8_3, svint8x4_t, int8_t, -+ svst4_s8 (p0, x0 + svcntb () * 3, z0), -+ svst4 (p0, x0 + svcntb () * 3, z0)) -+ -+/* -+** st4_s8_4: -+** st4b {z0\.b - z3\.b}, p0, \[x0, #4, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_s8_4, svint8x4_t, int8_t, -+ svst4_s8 (p0, x0 + svcntb () * 4, z0), -+ svst4 (p0, x0 + svcntb () * 4, z0)) -+ -+/* -+** st4_s8_28: -+** st4b {z0\.b - z3\.b}, p0, \[x0, #28, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_s8_28, svint8x4_t, int8_t, -+ svst4_s8 (p0, x0 + svcntb () * 28, z0), -+ svst4 (p0, x0 + svcntb () * 28, z0)) -+ -+/* -+** st4_s8_32: -+** [^{]* -+** st4b {z0\.b - z3\.b}, p0, \[x[0-9]+\] -+** ret -+*/ -+TEST_STORE (st4_s8_32, svint8x4_t, int8_t, -+ svst4_s8 (p0, x0 + svcntb () * 32, z0), -+ svst4 (p0, x0 + svcntb () * 32, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_s8_m1: -+** decb x0 -+** st4b {z0\.b - z3\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_s8_m1, svint8x4_t, int8_t, -+ svst4_s8 (p0, x0 - svcntb (), z0), -+ svst4 (p0, x0 - svcntb (), z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_s8_m2: -+** decb x0, all, mul #2 -+** st4b {z0\.b - z3\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_s8_m2, svint8x4_t, int8_t, -+ svst4_s8 (p0, x0 - svcntb () * 2, z0), -+ svst4 (p0, x0 - svcntb () * 2, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_s8_m3: -+** decb x0, all, mul #3 -+** st4b {z0\.b - z3\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_s8_m3, svint8x4_t, int8_t, -+ svst4_s8 (p0, x0 - svcntb () * 3, z0), -+ svst4 (p0, x0 - svcntb () * 3, z0)) -+ -+/* -+** st4_s8_m4: -+** st4b {z0\.b - z3\.b}, p0, \[x0, #-4, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_s8_m4, svint8x4_t, int8_t, -+ svst4_s8 (p0, x0 - svcntb () * 4, z0), -+ svst4 (p0, x0 - svcntb () * 4, z0)) -+ -+/* -+** st4_s8_m32: -+** st4b {z0\.b - z3\.b}, p0, \[x0, #-32, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_s8_m32, svint8x4_t, int8_t, -+ svst4_s8 (p0, x0 - svcntb () * 32, z0), -+ svst4 (p0, x0 - svcntb () * 32, z0)) -+ -+/* -+** st4_s8_m36: -+** [^{]* -+** st4b {z0\.b - z3\.b}, p0, \[x[0-9]+\] -+** ret -+*/ -+TEST_STORE (st4_s8_m36, svint8x4_t, int8_t, -+ svst4_s8 (p0, x0 - svcntb () * 36, z0), -+ svst4 (p0, x0 - svcntb () * 36, z0)) -+ -+/* -+** st4_vnum_s8_0: -+** st4b {z0\.b - z3\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s8_0, svint8x4_t, int8_t, -+ svst4_vnum_s8 (p0, x0, 0, z0), -+ svst4_vnum (p0, x0, 0, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_s8_1: -+** incb x0 -+** st4b {z0\.b - z3\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s8_1, svint8x4_t, int8_t, -+ svst4_vnum_s8 (p0, x0, 1, z0), -+ svst4_vnum (p0, x0, 1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_s8_2: -+** incb x0, all, mul #2 -+** st4b {z0\.b - z3\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s8_2, svint8x4_t, int8_t, -+ svst4_vnum_s8 (p0, x0, 2, z0), -+ svst4_vnum (p0, x0, 2, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_s8_3: -+** incb x0, all, mul #3 -+** st4b {z0\.b - z3\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s8_3, svint8x4_t, int8_t, -+ svst4_vnum_s8 (p0, x0, 3, z0), -+ svst4_vnum (p0, x0, 3, z0)) -+ -+/* -+** st4_vnum_s8_4: -+** st4b {z0\.b - z3\.b}, p0, \[x0, #4, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s8_4, svint8x4_t, int8_t, -+ svst4_vnum_s8 (p0, x0, 4, z0), -+ svst4_vnum (p0, x0, 4, z0)) -+ -+/* -+** st4_vnum_s8_28: -+** st4b {z0\.b - z3\.b}, p0, \[x0, #28, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s8_28, svint8x4_t, int8_t, -+ svst4_vnum_s8 (p0, x0, 28, z0), -+ svst4_vnum (p0, x0, 28, z0)) -+ -+/* -+** st4_vnum_s8_32: -+** [^{]* -+** st4b {z0\.b - z3\.b}, p0, \[x[0-9]+\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s8_32, svint8x4_t, int8_t, -+ svst4_vnum_s8 (p0, x0, 32, z0), -+ svst4_vnum (p0, x0, 32, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_s8_m1: -+** decb x0 -+** st4b {z0\.b - z3\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s8_m1, svint8x4_t, int8_t, -+ svst4_vnum_s8 (p0, x0, -1, z0), -+ svst4_vnum (p0, x0, -1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_s8_m2: -+** decb x0, all, mul #2 -+** st4b {z0\.b - z3\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s8_m2, svint8x4_t, int8_t, -+ svst4_vnum_s8 (p0, x0, -2, z0), -+ svst4_vnum (p0, x0, -2, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_s8_m3: -+** decb x0, all, mul #3 -+** st4b {z0\.b - z3\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s8_m3, svint8x4_t, int8_t, -+ svst4_vnum_s8 (p0, x0, -3, z0), -+ svst4_vnum (p0, x0, -3, z0)) -+ -+/* -+** st4_vnum_s8_m4: -+** st4b {z0\.b - z3\.b}, p0, \[x0, #-4, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s8_m4, svint8x4_t, int8_t, -+ svst4_vnum_s8 (p0, x0, -4, z0), -+ svst4_vnum (p0, x0, -4, z0)) -+ -+/* -+** st4_vnum_s8_m32: -+** st4b {z0\.b - z3\.b}, p0, \[x0, #-32, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s8_m32, svint8x4_t, int8_t, -+ svst4_vnum_s8 (p0, x0, -32, z0), -+ svst4_vnum (p0, x0, -32, z0)) -+ -+/* -+** st4_vnum_s8_m36: -+** [^{]* -+** st4b {z0\.b - z3\.b}, p0, \[x[0-9]+\] -+** ret -+*/ -+TEST_STORE (st4_vnum_s8_m36, svint8x4_t, int8_t, -+ svst4_vnum_s8 (p0, x0, -36, z0), -+ svst4_vnum (p0, x0, -36, z0)) -+ -+/* -+** st4_vnum_s8_x1: -+** cntb (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** st4b {z0\.b - z3\.b}, p0, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** st4b {z0\.b - z3\.b}, p0, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_STORE (st4_vnum_s8_x1, svint8x4_t, int8_t, -+ svst4_vnum_s8 (p0, x0, x1, z0), -+ svst4_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_u16.c -new file mode 100644 -index 000000000..5272c7f61 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_u16.c -@@ -0,0 +1,286 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st4_u16_base: -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_u16_base, svuint16x4_t, uint16_t, -+ svst4_u16 (p0, x0, z0), -+ svst4 (p0, x0, z0)) -+ -+/* -+** st4_u16_index: -+** st4h {z0\.h - z3\.h}, p0, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_STORE (st4_u16_index, svuint16x4_t, uint16_t, -+ svst4_u16 (p0, x0 + x1, z0), -+ svst4 (p0, x0 + x1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_u16_1: -+** incb x0 -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_u16_1, svuint16x4_t, uint16_t, -+ svst4_u16 (p0, x0 + svcnth (), z0), -+ svst4 (p0, x0 + svcnth (), z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_u16_2: -+** incb x0, all, mul #2 -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_u16_2, svuint16x4_t, uint16_t, -+ svst4_u16 (p0, x0 + svcnth () * 2, z0), -+ svst4 (p0, x0 + svcnth () * 2, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_u16_3: -+** incb x0, all, mul #3 -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_u16_3, svuint16x4_t, uint16_t, -+ svst4_u16 (p0, x0 + svcnth () * 3, z0), -+ svst4 (p0, x0 + svcnth () * 3, z0)) -+ -+/* -+** st4_u16_4: -+** st4h {z0\.h - z3\.h}, p0, \[x0, #4, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_u16_4, svuint16x4_t, uint16_t, -+ svst4_u16 (p0, x0 + svcnth () * 4, z0), -+ svst4 (p0, x0 + svcnth () * 4, z0)) -+ -+/* -+** st4_u16_28: -+** st4h {z0\.h - z3\.h}, p0, \[x0, #28, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_u16_28, svuint16x4_t, uint16_t, -+ svst4_u16 (p0, x0 + svcnth () * 28, z0), -+ svst4 (p0, x0 + svcnth () * 28, z0)) -+ -+/* -+** st4_u16_32: -+** [^{]* -+** st4h {z0\.h - z3\.h}, p0, \[x[0-9]+\] -+** ret -+*/ -+TEST_STORE (st4_u16_32, svuint16x4_t, uint16_t, -+ svst4_u16 (p0, x0 + svcnth () * 32, z0), -+ svst4 (p0, x0 + svcnth () * 32, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_u16_m1: -+** decb x0 -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_u16_m1, svuint16x4_t, uint16_t, -+ svst4_u16 (p0, x0 - svcnth (), z0), -+ svst4 (p0, x0 - svcnth (), z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_u16_m2: -+** decb x0, all, mul #2 -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_u16_m2, svuint16x4_t, uint16_t, -+ svst4_u16 (p0, x0 - svcnth () * 2, z0), -+ svst4 (p0, x0 - svcnth () * 2, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_u16_m3: -+** decb x0, all, mul #3 -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_u16_m3, svuint16x4_t, uint16_t, -+ svst4_u16 (p0, x0 - svcnth () * 3, z0), -+ svst4 (p0, x0 - svcnth () * 3, z0)) -+ -+/* -+** st4_u16_m4: -+** st4h {z0\.h - z3\.h}, p0, \[x0, #-4, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_u16_m4, svuint16x4_t, uint16_t, -+ svst4_u16 (p0, x0 - svcnth () * 4, z0), -+ svst4 (p0, x0 - svcnth () * 4, z0)) -+ -+/* -+** st4_u16_m32: -+** st4h {z0\.h - z3\.h}, p0, \[x0, #-32, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_u16_m32, svuint16x4_t, uint16_t, -+ svst4_u16 (p0, x0 - svcnth () * 32, z0), -+ svst4 (p0, x0 - svcnth () * 32, z0)) -+ -+/* -+** st4_u16_m36: -+** [^{]* -+** st4h {z0\.h - z3\.h}, p0, \[x[0-9]+\] -+** ret -+*/ -+TEST_STORE (st4_u16_m36, svuint16x4_t, uint16_t, -+ svst4_u16 (p0, x0 - svcnth () * 36, z0), -+ svst4 (p0, x0 - svcnth () * 36, z0)) -+ -+/* -+** st4_vnum_u16_0: -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u16_0, svuint16x4_t, uint16_t, -+ svst4_vnum_u16 (p0, x0, 0, z0), -+ svst4_vnum (p0, x0, 0, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_u16_1: -+** incb x0 -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u16_1, svuint16x4_t, uint16_t, -+ svst4_vnum_u16 (p0, x0, 1, z0), -+ svst4_vnum (p0, x0, 1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_u16_2: -+** incb x0, all, mul #2 -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u16_2, svuint16x4_t, uint16_t, -+ svst4_vnum_u16 (p0, x0, 2, z0), -+ svst4_vnum (p0, x0, 2, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_u16_3: -+** incb x0, all, mul #3 -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u16_3, svuint16x4_t, uint16_t, -+ svst4_vnum_u16 (p0, x0, 3, z0), -+ svst4_vnum (p0, x0, 3, z0)) -+ -+/* -+** st4_vnum_u16_4: -+** st4h {z0\.h - z3\.h}, p0, \[x0, #4, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u16_4, svuint16x4_t, uint16_t, -+ svst4_vnum_u16 (p0, x0, 4, z0), -+ svst4_vnum (p0, x0, 4, z0)) -+ -+/* -+** st4_vnum_u16_28: -+** st4h {z0\.h - z3\.h}, p0, \[x0, #28, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u16_28, svuint16x4_t, uint16_t, -+ svst4_vnum_u16 (p0, x0, 28, z0), -+ svst4_vnum (p0, x0, 28, z0)) -+ -+/* -+** st4_vnum_u16_32: -+** [^{]* -+** st4h {z0\.h - z3\.h}, p0, \[x[0-9]+\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u16_32, svuint16x4_t, uint16_t, -+ svst4_vnum_u16 (p0, x0, 32, z0), -+ svst4_vnum (p0, x0, 32, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_u16_m1: -+** decb x0 -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u16_m1, svuint16x4_t, uint16_t, -+ svst4_vnum_u16 (p0, x0, -1, z0), -+ svst4_vnum (p0, x0, -1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_u16_m2: -+** decb x0, all, mul #2 -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u16_m2, svuint16x4_t, uint16_t, -+ svst4_vnum_u16 (p0, x0, -2, z0), -+ svst4_vnum (p0, x0, -2, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_u16_m3: -+** decb x0, all, mul #3 -+** st4h {z0\.h - z3\.h}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u16_m3, svuint16x4_t, uint16_t, -+ svst4_vnum_u16 (p0, x0, -3, z0), -+ svst4_vnum (p0, x0, -3, z0)) -+ -+/* -+** st4_vnum_u16_m4: -+** st4h {z0\.h - z3\.h}, p0, \[x0, #-4, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u16_m4, svuint16x4_t, uint16_t, -+ svst4_vnum_u16 (p0, x0, -4, z0), -+ svst4_vnum (p0, x0, -4, z0)) -+ -+/* -+** st4_vnum_u16_m32: -+** st4h {z0\.h - z3\.h}, p0, \[x0, #-32, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u16_m32, svuint16x4_t, uint16_t, -+ svst4_vnum_u16 (p0, x0, -32, z0), -+ svst4_vnum (p0, x0, -32, z0)) -+ -+/* -+** st4_vnum_u16_m36: -+** [^{]* -+** st4h {z0\.h - z3\.h}, p0, \[x[0-9]+\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u16_m36, svuint16x4_t, uint16_t, -+ svst4_vnum_u16 (p0, x0, -36, z0), -+ svst4_vnum (p0, x0, -36, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** st4_vnum_u16_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** st4h {z0\.h - z3\.h}, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u16_x1, svuint16x4_t, uint16_t, -+ svst4_vnum_u16 (p0, x0, x1, z0), -+ svst4_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_u32.c -new file mode 100644 -index 000000000..8b9b322e5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_u32.c -@@ -0,0 +1,286 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st4_u32_base: -+** st4w {z0\.s - z3\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_u32_base, svuint32x4_t, uint32_t, -+ svst4_u32 (p0, x0, z0), -+ svst4 (p0, x0, z0)) -+ -+/* -+** st4_u32_index: -+** st4w {z0\.s - z3\.s}, p0, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_STORE (st4_u32_index, svuint32x4_t, uint32_t, -+ svst4_u32 (p0, x0 + x1, z0), -+ svst4 (p0, x0 + x1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_u32_1: -+** incb x0 -+** st4w {z0\.s - z3\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_u32_1, svuint32x4_t, uint32_t, -+ svst4_u32 (p0, x0 + svcntw (), z0), -+ svst4 (p0, x0 + svcntw (), z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_u32_2: -+** incb x0, all, mul #2 -+** st4w {z0\.s - z3\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_u32_2, svuint32x4_t, uint32_t, -+ svst4_u32 (p0, x0 + svcntw () * 2, z0), -+ svst4 (p0, x0 + svcntw () * 2, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_u32_3: -+** incb x0, all, mul #3 -+** st4w {z0\.s - z3\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_u32_3, svuint32x4_t, uint32_t, -+ svst4_u32 (p0, x0 + svcntw () * 3, z0), -+ svst4 (p0, x0 + svcntw () * 3, z0)) -+ -+/* -+** st4_u32_4: -+** st4w {z0\.s - z3\.s}, p0, \[x0, #4, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_u32_4, svuint32x4_t, uint32_t, -+ svst4_u32 (p0, x0 + svcntw () * 4, z0), -+ svst4 (p0, x0 + svcntw () * 4, z0)) -+ -+/* -+** st4_u32_28: -+** st4w {z0\.s - z3\.s}, p0, \[x0, #28, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_u32_28, svuint32x4_t, uint32_t, -+ svst4_u32 (p0, x0 + svcntw () * 28, z0), -+ svst4 (p0, x0 + svcntw () * 28, z0)) -+ -+/* -+** st4_u32_32: -+** [^{]* -+** st4w {z0\.s - z3\.s}, p0, \[x[0-9]+\] -+** ret -+*/ -+TEST_STORE (st4_u32_32, svuint32x4_t, uint32_t, -+ svst4_u32 (p0, x0 + svcntw () * 32, z0), -+ svst4 (p0, x0 + svcntw () * 32, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_u32_m1: -+** decb x0 -+** st4w {z0\.s - z3\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_u32_m1, svuint32x4_t, uint32_t, -+ svst4_u32 (p0, x0 - svcntw (), z0), -+ svst4 (p0, x0 - svcntw (), z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_u32_m2: -+** decb x0, all, mul #2 -+** st4w {z0\.s - z3\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_u32_m2, svuint32x4_t, uint32_t, -+ svst4_u32 (p0, x0 - svcntw () * 2, z0), -+ svst4 (p0, x0 - svcntw () * 2, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_u32_m3: -+** decb x0, all, mul #3 -+** st4w {z0\.s - z3\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_u32_m3, svuint32x4_t, uint32_t, -+ svst4_u32 (p0, x0 - svcntw () * 3, z0), -+ svst4 (p0, x0 - svcntw () * 3, z0)) -+ -+/* -+** st4_u32_m4: -+** st4w {z0\.s - z3\.s}, p0, \[x0, #-4, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_u32_m4, svuint32x4_t, uint32_t, -+ svst4_u32 (p0, x0 - svcntw () * 4, z0), -+ svst4 (p0, x0 - svcntw () * 4, z0)) -+ -+/* -+** st4_u32_m32: -+** st4w {z0\.s - z3\.s}, p0, \[x0, #-32, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_u32_m32, svuint32x4_t, uint32_t, -+ svst4_u32 (p0, x0 - svcntw () * 32, z0), -+ svst4 (p0, x0 - svcntw () * 32, z0)) -+ -+/* -+** st4_u32_m36: -+** [^{]* -+** st4w {z0\.s - z3\.s}, p0, \[x[0-9]+\] -+** ret -+*/ -+TEST_STORE (st4_u32_m36, svuint32x4_t, uint32_t, -+ svst4_u32 (p0, x0 - svcntw () * 36, z0), -+ svst4 (p0, x0 - svcntw () * 36, z0)) -+ -+/* -+** st4_vnum_u32_0: -+** st4w {z0\.s - z3\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u32_0, svuint32x4_t, uint32_t, -+ svst4_vnum_u32 (p0, x0, 0, z0), -+ svst4_vnum (p0, x0, 0, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_u32_1: -+** incb x0 -+** st4w {z0\.s - z3\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u32_1, svuint32x4_t, uint32_t, -+ svst4_vnum_u32 (p0, x0, 1, z0), -+ svst4_vnum (p0, x0, 1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_u32_2: -+** incb x0, all, mul #2 -+** st4w {z0\.s - z3\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u32_2, svuint32x4_t, uint32_t, -+ svst4_vnum_u32 (p0, x0, 2, z0), -+ svst4_vnum (p0, x0, 2, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_u32_3: -+** incb x0, all, mul #3 -+** st4w {z0\.s - z3\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u32_3, svuint32x4_t, uint32_t, -+ svst4_vnum_u32 (p0, x0, 3, z0), -+ svst4_vnum (p0, x0, 3, z0)) -+ -+/* -+** st4_vnum_u32_4: -+** st4w {z0\.s - z3\.s}, p0, \[x0, #4, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u32_4, svuint32x4_t, uint32_t, -+ svst4_vnum_u32 (p0, x0, 4, z0), -+ svst4_vnum (p0, x0, 4, z0)) -+ -+/* -+** st4_vnum_u32_28: -+** st4w {z0\.s - z3\.s}, p0, \[x0, #28, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u32_28, svuint32x4_t, uint32_t, -+ svst4_vnum_u32 (p0, x0, 28, z0), -+ svst4_vnum (p0, x0, 28, z0)) -+ -+/* -+** st4_vnum_u32_32: -+** [^{]* -+** st4w {z0\.s - z3\.s}, p0, \[x[0-9]+\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u32_32, svuint32x4_t, uint32_t, -+ svst4_vnum_u32 (p0, x0, 32, z0), -+ svst4_vnum (p0, x0, 32, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_u32_m1: -+** decb x0 -+** st4w {z0\.s - z3\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u32_m1, svuint32x4_t, uint32_t, -+ svst4_vnum_u32 (p0, x0, -1, z0), -+ svst4_vnum (p0, x0, -1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_u32_m2: -+** decb x0, all, mul #2 -+** st4w {z0\.s - z3\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u32_m2, svuint32x4_t, uint32_t, -+ svst4_vnum_u32 (p0, x0, -2, z0), -+ svst4_vnum (p0, x0, -2, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_u32_m3: -+** decb x0, all, mul #3 -+** st4w {z0\.s - z3\.s}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u32_m3, svuint32x4_t, uint32_t, -+ svst4_vnum_u32 (p0, x0, -3, z0), -+ svst4_vnum (p0, x0, -3, z0)) -+ -+/* -+** st4_vnum_u32_m4: -+** st4w {z0\.s - z3\.s}, p0, \[x0, #-4, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u32_m4, svuint32x4_t, uint32_t, -+ svst4_vnum_u32 (p0, x0, -4, z0), -+ svst4_vnum (p0, x0, -4, z0)) -+ -+/* -+** st4_vnum_u32_m32: -+** st4w {z0\.s - z3\.s}, p0, \[x0, #-32, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u32_m32, svuint32x4_t, uint32_t, -+ svst4_vnum_u32 (p0, x0, -32, z0), -+ svst4_vnum (p0, x0, -32, z0)) -+ -+/* -+** st4_vnum_u32_m36: -+** [^{]* -+** st4w {z0\.s - z3\.s}, p0, \[x[0-9]+\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u32_m36, svuint32x4_t, uint32_t, -+ svst4_vnum_u32 (p0, x0, -36, z0), -+ svst4_vnum (p0, x0, -36, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** st4_vnum_u32_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** st4w {z0\.s - z3\.s}, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u32_x1, svuint32x4_t, uint32_t, -+ svst4_vnum_u32 (p0, x0, x1, z0), -+ svst4_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_u64.c -new file mode 100644 -index 000000000..53b78f5ba ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_u64.c -@@ -0,0 +1,286 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st4_u64_base: -+** st4d {z0\.d - z3\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_u64_base, svuint64x4_t, uint64_t, -+ svst4_u64 (p0, x0, z0), -+ svst4 (p0, x0, z0)) -+ -+/* -+** st4_u64_index: -+** st4d {z0\.d - z3\.d}, p0, \[x0, x1, lsl 3\] -+** ret -+*/ -+TEST_STORE (st4_u64_index, svuint64x4_t, uint64_t, -+ svst4_u64 (p0, x0 + x1, z0), -+ svst4 (p0, x0 + x1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_u64_1: -+** incb x0 -+** st4d {z0\.d - z3\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_u64_1, svuint64x4_t, uint64_t, -+ svst4_u64 (p0, x0 + svcntd (), z0), -+ svst4 (p0, x0 + svcntd (), z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_u64_2: -+** incb x0, all, mul #2 -+** st4d {z0\.d - z3\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_u64_2, svuint64x4_t, uint64_t, -+ svst4_u64 (p0, x0 + svcntd () * 2, z0), -+ svst4 (p0, x0 + svcntd () * 2, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_u64_3: -+** incb x0, all, mul #3 -+** st4d {z0\.d - z3\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_u64_3, svuint64x4_t, uint64_t, -+ svst4_u64 (p0, x0 + svcntd () * 3, z0), -+ svst4 (p0, x0 + svcntd () * 3, z0)) -+ -+/* -+** st4_u64_4: -+** st4d {z0\.d - z3\.d}, p0, \[x0, #4, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_u64_4, svuint64x4_t, uint64_t, -+ svst4_u64 (p0, x0 + svcntd () * 4, z0), -+ svst4 (p0, x0 + svcntd () * 4, z0)) -+ -+/* -+** st4_u64_28: -+** st4d {z0\.d - z3\.d}, p0, \[x0, #28, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_u64_28, svuint64x4_t, uint64_t, -+ svst4_u64 (p0, x0 + svcntd () * 28, z0), -+ svst4 (p0, x0 + svcntd () * 28, z0)) -+ -+/* -+** st4_u64_32: -+** [^{]* -+** st4d {z0\.d - z3\.d}, p0, \[x[0-9]+\] -+** ret -+*/ -+TEST_STORE (st4_u64_32, svuint64x4_t, uint64_t, -+ svst4_u64 (p0, x0 + svcntd () * 32, z0), -+ svst4 (p0, x0 + svcntd () * 32, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_u64_m1: -+** decb x0 -+** st4d {z0\.d - z3\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_u64_m1, svuint64x4_t, uint64_t, -+ svst4_u64 (p0, x0 - svcntd (), z0), -+ svst4 (p0, x0 - svcntd (), z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_u64_m2: -+** decb x0, all, mul #2 -+** st4d {z0\.d - z3\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_u64_m2, svuint64x4_t, uint64_t, -+ svst4_u64 (p0, x0 - svcntd () * 2, z0), -+ svst4 (p0, x0 - svcntd () * 2, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_u64_m3: -+** decb x0, all, mul #3 -+** st4d {z0\.d - z3\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_u64_m3, svuint64x4_t, uint64_t, -+ svst4_u64 (p0, x0 - svcntd () * 3, z0), -+ svst4 (p0, x0 - svcntd () * 3, z0)) -+ -+/* -+** st4_u64_m4: -+** st4d {z0\.d - z3\.d}, p0, \[x0, #-4, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_u64_m4, svuint64x4_t, uint64_t, -+ svst4_u64 (p0, x0 - svcntd () * 4, z0), -+ svst4 (p0, x0 - svcntd () * 4, z0)) -+ -+/* -+** st4_u64_m32: -+** st4d {z0\.d - z3\.d}, p0, \[x0, #-32, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_u64_m32, svuint64x4_t, uint64_t, -+ svst4_u64 (p0, x0 - svcntd () * 32, z0), -+ svst4 (p0, x0 - svcntd () * 32, z0)) -+ -+/* -+** st4_u64_m36: -+** [^{]* -+** st4d {z0\.d - z3\.d}, p0, \[x[0-9]+\] -+** ret -+*/ -+TEST_STORE (st4_u64_m36, svuint64x4_t, uint64_t, -+ svst4_u64 (p0, x0 - svcntd () * 36, z0), -+ svst4 (p0, x0 - svcntd () * 36, z0)) -+ -+/* -+** st4_vnum_u64_0: -+** st4d {z0\.d - z3\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u64_0, svuint64x4_t, uint64_t, -+ svst4_vnum_u64 (p0, x0, 0, z0), -+ svst4_vnum (p0, x0, 0, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_u64_1: -+** incb x0 -+** st4d {z0\.d - z3\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u64_1, svuint64x4_t, uint64_t, -+ svst4_vnum_u64 (p0, x0, 1, z0), -+ svst4_vnum (p0, x0, 1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_u64_2: -+** incb x0, all, mul #2 -+** st4d {z0\.d - z3\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u64_2, svuint64x4_t, uint64_t, -+ svst4_vnum_u64 (p0, x0, 2, z0), -+ svst4_vnum (p0, x0, 2, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_u64_3: -+** incb x0, all, mul #3 -+** st4d {z0\.d - z3\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u64_3, svuint64x4_t, uint64_t, -+ svst4_vnum_u64 (p0, x0, 3, z0), -+ svst4_vnum (p0, x0, 3, z0)) -+ -+/* -+** st4_vnum_u64_4: -+** st4d {z0\.d - z3\.d}, p0, \[x0, #4, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u64_4, svuint64x4_t, uint64_t, -+ svst4_vnum_u64 (p0, x0, 4, z0), -+ svst4_vnum (p0, x0, 4, z0)) -+ -+/* -+** st4_vnum_u64_28: -+** st4d {z0\.d - z3\.d}, p0, \[x0, #28, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u64_28, svuint64x4_t, uint64_t, -+ svst4_vnum_u64 (p0, x0, 28, z0), -+ svst4_vnum (p0, x0, 28, z0)) -+ -+/* -+** st4_vnum_u64_32: -+** [^{]* -+** st4d {z0\.d - z3\.d}, p0, \[x[0-9]+\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u64_32, svuint64x4_t, uint64_t, -+ svst4_vnum_u64 (p0, x0, 32, z0), -+ svst4_vnum (p0, x0, 32, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_u64_m1: -+** decb x0 -+** st4d {z0\.d - z3\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u64_m1, svuint64x4_t, uint64_t, -+ svst4_vnum_u64 (p0, x0, -1, z0), -+ svst4_vnum (p0, x0, -1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_u64_m2: -+** decb x0, all, mul #2 -+** st4d {z0\.d - z3\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u64_m2, svuint64x4_t, uint64_t, -+ svst4_vnum_u64 (p0, x0, -2, z0), -+ svst4_vnum (p0, x0, -2, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_u64_m3: -+** decb x0, all, mul #3 -+** st4d {z0\.d - z3\.d}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u64_m3, svuint64x4_t, uint64_t, -+ svst4_vnum_u64 (p0, x0, -3, z0), -+ svst4_vnum (p0, x0, -3, z0)) -+ -+/* -+** st4_vnum_u64_m4: -+** st4d {z0\.d - z3\.d}, p0, \[x0, #-4, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u64_m4, svuint64x4_t, uint64_t, -+ svst4_vnum_u64 (p0, x0, -4, z0), -+ svst4_vnum (p0, x0, -4, z0)) -+ -+/* -+** st4_vnum_u64_m32: -+** st4d {z0\.d - z3\.d}, p0, \[x0, #-32, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u64_m32, svuint64x4_t, uint64_t, -+ svst4_vnum_u64 (p0, x0, -32, z0), -+ svst4_vnum (p0, x0, -32, z0)) -+ -+/* -+** st4_vnum_u64_m36: -+** [^{]* -+** st4d {z0\.d - z3\.d}, p0, \[x[0-9]+\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u64_m36, svuint64x4_t, uint64_t, -+ svst4_vnum_u64 (p0, x0, -36, z0), -+ svst4_vnum (p0, x0, -36, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** st4_vnum_u64_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** st4d {z0\.d - z3\.d}, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u64_x1, svuint64x4_t, uint64_t, -+ svst4_vnum_u64 (p0, x0, x1, z0), -+ svst4_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_u8.c -new file mode 100644 -index 000000000..e7c2e7d76 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/st4_u8.c -@@ -0,0 +1,290 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** st4_u8_base: -+** st4b {z0\.b - z3\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_u8_base, svuint8x4_t, uint8_t, -+ svst4_u8 (p0, x0, z0), -+ svst4 (p0, x0, z0)) -+ -+/* -+** st4_u8_index: -+** st4b {z0\.b - z3\.b}, p0, \[x0, x1\] -+** ret -+*/ -+TEST_STORE (st4_u8_index, svuint8x4_t, uint8_t, -+ svst4_u8 (p0, x0 + x1, z0), -+ svst4 (p0, x0 + x1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_u8_1: -+** incb x0 -+** st4b {z0\.b - z3\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_u8_1, svuint8x4_t, uint8_t, -+ svst4_u8 (p0, x0 + svcntb (), z0), -+ svst4 (p0, x0 + svcntb (), z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_u8_2: -+** incb x0, all, mul #2 -+** st4b {z0\.b - z3\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_u8_2, svuint8x4_t, uint8_t, -+ svst4_u8 (p0, x0 + svcntb () * 2, z0), -+ svst4 (p0, x0 + svcntb () * 2, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_u8_3: -+** incb x0, all, mul #3 -+** st4b {z0\.b - z3\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_u8_3, svuint8x4_t, uint8_t, -+ svst4_u8 (p0, x0 + svcntb () * 3, z0), -+ svst4 (p0, x0 + svcntb () * 3, z0)) -+ -+/* -+** st4_u8_4: -+** st4b {z0\.b - z3\.b}, p0, \[x0, #4, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_u8_4, svuint8x4_t, uint8_t, -+ svst4_u8 (p0, x0 + svcntb () * 4, z0), -+ svst4 (p0, x0 + svcntb () * 4, z0)) -+ -+/* -+** st4_u8_28: -+** st4b {z0\.b - z3\.b}, p0, \[x0, #28, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_u8_28, svuint8x4_t, uint8_t, -+ svst4_u8 (p0, x0 + svcntb () * 28, z0), -+ svst4 (p0, x0 + svcntb () * 28, z0)) -+ -+/* -+** st4_u8_32: -+** [^{]* -+** st4b {z0\.b - z3\.b}, p0, \[x[0-9]+\] -+** ret -+*/ -+TEST_STORE (st4_u8_32, svuint8x4_t, uint8_t, -+ svst4_u8 (p0, x0 + svcntb () * 32, z0), -+ svst4 (p0, x0 + svcntb () * 32, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_u8_m1: -+** decb x0 -+** st4b {z0\.b - z3\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_u8_m1, svuint8x4_t, uint8_t, -+ svst4_u8 (p0, x0 - svcntb (), z0), -+ svst4 (p0, x0 - svcntb (), z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_u8_m2: -+** decb x0, all, mul #2 -+** st4b {z0\.b - z3\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_u8_m2, svuint8x4_t, uint8_t, -+ svst4_u8 (p0, x0 - svcntb () * 2, z0), -+ svst4 (p0, x0 - svcntb () * 2, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_u8_m3: -+** decb x0, all, mul #3 -+** st4b {z0\.b - z3\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_u8_m3, svuint8x4_t, uint8_t, -+ svst4_u8 (p0, x0 - svcntb () * 3, z0), -+ svst4 (p0, x0 - svcntb () * 3, z0)) -+ -+/* -+** st4_u8_m4: -+** st4b {z0\.b - z3\.b}, p0, \[x0, #-4, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_u8_m4, svuint8x4_t, uint8_t, -+ svst4_u8 (p0, x0 - svcntb () * 4, z0), -+ svst4 (p0, x0 - svcntb () * 4, z0)) -+ -+/* -+** st4_u8_m32: -+** st4b {z0\.b - z3\.b}, p0, \[x0, #-32, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_u8_m32, svuint8x4_t, uint8_t, -+ svst4_u8 (p0, x0 - svcntb () * 32, z0), -+ svst4 (p0, x0 - svcntb () * 32, z0)) -+ -+/* -+** st4_u8_m36: -+** [^{]* -+** st4b {z0\.b - z3\.b}, p0, \[x[0-9]+\] -+** ret -+*/ -+TEST_STORE (st4_u8_m36, svuint8x4_t, uint8_t, -+ svst4_u8 (p0, x0 - svcntb () * 36, z0), -+ svst4 (p0, x0 - svcntb () * 36, z0)) -+ -+/* -+** st4_vnum_u8_0: -+** st4b {z0\.b - z3\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u8_0, svuint8x4_t, uint8_t, -+ svst4_vnum_u8 (p0, x0, 0, z0), -+ svst4_vnum (p0, x0, 0, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_u8_1: -+** incb x0 -+** st4b {z0\.b - z3\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u8_1, svuint8x4_t, uint8_t, -+ svst4_vnum_u8 (p0, x0, 1, z0), -+ svst4_vnum (p0, x0, 1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_u8_2: -+** incb x0, all, mul #2 -+** st4b {z0\.b - z3\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u8_2, svuint8x4_t, uint8_t, -+ svst4_vnum_u8 (p0, x0, 2, z0), -+ svst4_vnum (p0, x0, 2, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_u8_3: -+** incb x0, all, mul #3 -+** st4b {z0\.b - z3\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u8_3, svuint8x4_t, uint8_t, -+ svst4_vnum_u8 (p0, x0, 3, z0), -+ svst4_vnum (p0, x0, 3, z0)) -+ -+/* -+** st4_vnum_u8_4: -+** st4b {z0\.b - z3\.b}, p0, \[x0, #4, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u8_4, svuint8x4_t, uint8_t, -+ svst4_vnum_u8 (p0, x0, 4, z0), -+ svst4_vnum (p0, x0, 4, z0)) -+ -+/* -+** st4_vnum_u8_28: -+** st4b {z0\.b - z3\.b}, p0, \[x0, #28, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u8_28, svuint8x4_t, uint8_t, -+ svst4_vnum_u8 (p0, x0, 28, z0), -+ svst4_vnum (p0, x0, 28, z0)) -+ -+/* -+** st4_vnum_u8_32: -+** [^{]* -+** st4b {z0\.b - z3\.b}, p0, \[x[0-9]+\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u8_32, svuint8x4_t, uint8_t, -+ svst4_vnum_u8 (p0, x0, 32, z0), -+ svst4_vnum (p0, x0, 32, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_u8_m1: -+** decb x0 -+** st4b {z0\.b - z3\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u8_m1, svuint8x4_t, uint8_t, -+ svst4_vnum_u8 (p0, x0, -1, z0), -+ svst4_vnum (p0, x0, -1, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_u8_m2: -+** decb x0, all, mul #2 -+** st4b {z0\.b - z3\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u8_m2, svuint8x4_t, uint8_t, -+ svst4_vnum_u8 (p0, x0, -2, z0), -+ svst4_vnum (p0, x0, -2, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** st4_vnum_u8_m3: -+** decb x0, all, mul #3 -+** st4b {z0\.b - z3\.b}, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u8_m3, svuint8x4_t, uint8_t, -+ svst4_vnum_u8 (p0, x0, -3, z0), -+ svst4_vnum (p0, x0, -3, z0)) -+ -+/* -+** st4_vnum_u8_m4: -+** st4b {z0\.b - z3\.b}, p0, \[x0, #-4, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u8_m4, svuint8x4_t, uint8_t, -+ svst4_vnum_u8 (p0, x0, -4, z0), -+ svst4_vnum (p0, x0, -4, z0)) -+ -+/* -+** st4_vnum_u8_m32: -+** st4b {z0\.b - z3\.b}, p0, \[x0, #-32, mul vl\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u8_m32, svuint8x4_t, uint8_t, -+ svst4_vnum_u8 (p0, x0, -32, z0), -+ svst4_vnum (p0, x0, -32, z0)) -+ -+/* -+** st4_vnum_u8_m36: -+** [^{]* -+** st4b {z0\.b - z3\.b}, p0, \[x[0-9]+\] -+** ret -+*/ -+TEST_STORE (st4_vnum_u8_m36, svuint8x4_t, uint8_t, -+ svst4_vnum_u8 (p0, x0, -36, z0), -+ svst4_vnum (p0, x0, -36, z0)) -+ -+/* -+** st4_vnum_u8_x1: -+** cntb (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** st4b {z0\.b - z3\.b}, p0, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** st4b {z0\.b - z3\.b}, p0, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_STORE (st4_vnum_u8_x1, svuint8x4_t, uint8_t, -+ svst4_vnum_u8 (p0, x0, x1, z0), -+ svst4_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_bf16.c -new file mode 100644 -index 000000000..3c4d21f27 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_bf16.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** stnt1_bf16_base: -+** stnt1h z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_bf16_base, svbfloat16_t, bfloat16_t, -+ svstnt1_bf16 (p0, x0, z0), -+ svstnt1 (p0, x0, z0)) -+ -+/* -+** stnt1_bf16_index: -+** stnt1h z0\.h, p0, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_STORE (stnt1_bf16_index, svbfloat16_t, bfloat16_t, -+ svstnt1_bf16 (p0, x0 + x1, z0), -+ svstnt1 (p0, x0 + x1, z0)) -+ -+/* -+** stnt1_bf16_1: -+** stnt1h z0\.h, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_bf16_1, svbfloat16_t, bfloat16_t, -+ svstnt1_bf16 (p0, x0 + svcnth (), z0), -+ svstnt1 (p0, x0 + svcnth (), z0)) -+ -+/* -+** stnt1_bf16_7: -+** stnt1h z0\.h, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_bf16_7, svbfloat16_t, bfloat16_t, -+ svstnt1_bf16 (p0, x0 + svcnth () * 7, z0), -+ svstnt1 (p0, x0 + svcnth () * 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** stnt1_bf16_8: -+** incb x0, all, mul #8 -+** stnt1h z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_bf16_8, svbfloat16_t, bfloat16_t, -+ svstnt1_bf16 (p0, x0 + svcnth () * 8, z0), -+ svstnt1 (p0, x0 + svcnth () * 8, z0)) -+ -+/* -+** stnt1_bf16_m1: -+** stnt1h z0\.h, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_bf16_m1, svbfloat16_t, bfloat16_t, -+ svstnt1_bf16 (p0, x0 - svcnth (), z0), -+ svstnt1 (p0, x0 - svcnth (), z0)) -+ -+/* -+** stnt1_bf16_m8: -+** stnt1h z0\.h, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_bf16_m8, svbfloat16_t, bfloat16_t, -+ svstnt1_bf16 (p0, x0 - svcnth () * 8, z0), -+ svstnt1 (p0, x0 - svcnth () * 8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** stnt1_bf16_m9: -+** decb x0, all, mul #9 -+** stnt1h z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_bf16_m9, svbfloat16_t, bfloat16_t, -+ svstnt1_bf16 (p0, x0 - svcnth () * 9, z0), -+ svstnt1 (p0, x0 - svcnth () * 9, z0)) -+ -+/* -+** stnt1_vnum_bf16_0: -+** stnt1h z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_bf16_0, svbfloat16_t, bfloat16_t, -+ svstnt1_vnum_bf16 (p0, x0, 0, z0), -+ svstnt1_vnum (p0, x0, 0, z0)) -+ -+/* -+** stnt1_vnum_bf16_1: -+** stnt1h z0\.h, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_bf16_1, svbfloat16_t, bfloat16_t, -+ svstnt1_vnum_bf16 (p0, x0, 1, z0), -+ svstnt1_vnum (p0, x0, 1, z0)) -+ -+/* -+** stnt1_vnum_bf16_7: -+** stnt1h z0\.h, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_bf16_7, svbfloat16_t, bfloat16_t, -+ svstnt1_vnum_bf16 (p0, x0, 7, z0), -+ svstnt1_vnum (p0, x0, 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** stnt1_vnum_bf16_8: -+** incb x0, all, mul #8 -+** stnt1h z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_bf16_8, svbfloat16_t, bfloat16_t, -+ svstnt1_vnum_bf16 (p0, x0, 8, z0), -+ svstnt1_vnum (p0, x0, 8, z0)) -+ -+/* -+** stnt1_vnum_bf16_m1: -+** stnt1h z0\.h, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_bf16_m1, svbfloat16_t, bfloat16_t, -+ svstnt1_vnum_bf16 (p0, x0, -1, z0), -+ svstnt1_vnum (p0, x0, -1, z0)) -+ -+/* -+** stnt1_vnum_bf16_m8: -+** stnt1h z0\.h, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_bf16_m8, svbfloat16_t, bfloat16_t, -+ svstnt1_vnum_bf16 (p0, x0, -8, z0), -+ svstnt1_vnum (p0, x0, -8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** stnt1_vnum_bf16_m9: -+** decb x0, all, mul #9 -+** stnt1h z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_bf16_m9, svbfloat16_t, bfloat16_t, -+ svstnt1_vnum_bf16 (p0, x0, -9, z0), -+ svstnt1_vnum (p0, x0, -9, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** stnt1_vnum_bf16_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** stnt1h z0\.h, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_bf16_x1, svbfloat16_t, bfloat16_t, -+ svstnt1_vnum_bf16 (p0, x0, x1, z0), -+ svstnt1_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_f16.c -new file mode 100644 -index 000000000..a3d89caf1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_f16.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** stnt1_f16_base: -+** stnt1h z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_f16_base, svfloat16_t, float16_t, -+ svstnt1_f16 (p0, x0, z0), -+ svstnt1 (p0, x0, z0)) -+ -+/* -+** stnt1_f16_index: -+** stnt1h z0\.h, p0, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_STORE (stnt1_f16_index, svfloat16_t, float16_t, -+ svstnt1_f16 (p0, x0 + x1, z0), -+ svstnt1 (p0, x0 + x1, z0)) -+ -+/* -+** stnt1_f16_1: -+** stnt1h z0\.h, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_f16_1, svfloat16_t, float16_t, -+ svstnt1_f16 (p0, x0 + svcnth (), z0), -+ svstnt1 (p0, x0 + svcnth (), z0)) -+ -+/* -+** stnt1_f16_7: -+** stnt1h z0\.h, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_f16_7, svfloat16_t, float16_t, -+ svstnt1_f16 (p0, x0 + svcnth () * 7, z0), -+ svstnt1 (p0, x0 + svcnth () * 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** stnt1_f16_8: -+** incb x0, all, mul #8 -+** stnt1h z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_f16_8, svfloat16_t, float16_t, -+ svstnt1_f16 (p0, x0 + svcnth () * 8, z0), -+ svstnt1 (p0, x0 + svcnth () * 8, z0)) -+ -+/* -+** stnt1_f16_m1: -+** stnt1h z0\.h, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_f16_m1, svfloat16_t, float16_t, -+ svstnt1_f16 (p0, x0 - svcnth (), z0), -+ svstnt1 (p0, x0 - svcnth (), z0)) -+ -+/* -+** stnt1_f16_m8: -+** stnt1h z0\.h, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_f16_m8, svfloat16_t, float16_t, -+ svstnt1_f16 (p0, x0 - svcnth () * 8, z0), -+ svstnt1 (p0, x0 - svcnth () * 8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** stnt1_f16_m9: -+** decb x0, all, mul #9 -+** stnt1h z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_f16_m9, svfloat16_t, float16_t, -+ svstnt1_f16 (p0, x0 - svcnth () * 9, z0), -+ svstnt1 (p0, x0 - svcnth () * 9, z0)) -+ -+/* -+** stnt1_vnum_f16_0: -+** stnt1h z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_f16_0, svfloat16_t, float16_t, -+ svstnt1_vnum_f16 (p0, x0, 0, z0), -+ svstnt1_vnum (p0, x0, 0, z0)) -+ -+/* -+** stnt1_vnum_f16_1: -+** stnt1h z0\.h, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_f16_1, svfloat16_t, float16_t, -+ svstnt1_vnum_f16 (p0, x0, 1, z0), -+ svstnt1_vnum (p0, x0, 1, z0)) -+ -+/* -+** stnt1_vnum_f16_7: -+** stnt1h z0\.h, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_f16_7, svfloat16_t, float16_t, -+ svstnt1_vnum_f16 (p0, x0, 7, z0), -+ svstnt1_vnum (p0, x0, 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** stnt1_vnum_f16_8: -+** incb x0, all, mul #8 -+** stnt1h z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_f16_8, svfloat16_t, float16_t, -+ svstnt1_vnum_f16 (p0, x0, 8, z0), -+ svstnt1_vnum (p0, x0, 8, z0)) -+ -+/* -+** stnt1_vnum_f16_m1: -+** stnt1h z0\.h, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_f16_m1, svfloat16_t, float16_t, -+ svstnt1_vnum_f16 (p0, x0, -1, z0), -+ svstnt1_vnum (p0, x0, -1, z0)) -+ -+/* -+** stnt1_vnum_f16_m8: -+** stnt1h z0\.h, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_f16_m8, svfloat16_t, float16_t, -+ svstnt1_vnum_f16 (p0, x0, -8, z0), -+ svstnt1_vnum (p0, x0, -8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** stnt1_vnum_f16_m9: -+** decb x0, all, mul #9 -+** stnt1h z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_f16_m9, svfloat16_t, float16_t, -+ svstnt1_vnum_f16 (p0, x0, -9, z0), -+ svstnt1_vnum (p0, x0, -9, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** stnt1_vnum_f16_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** stnt1h z0\.h, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_f16_x1, svfloat16_t, float16_t, -+ svstnt1_vnum_f16 (p0, x0, x1, z0), -+ svstnt1_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_f32.c -new file mode 100644 -index 000000000..24e890512 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_f32.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** stnt1_f32_base: -+** stnt1w z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_f32_base, svfloat32_t, float32_t, -+ svstnt1_f32 (p0, x0, z0), -+ svstnt1 (p0, x0, z0)) -+ -+/* -+** stnt1_f32_index: -+** stnt1w z0\.s, p0, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_STORE (stnt1_f32_index, svfloat32_t, float32_t, -+ svstnt1_f32 (p0, x0 + x1, z0), -+ svstnt1 (p0, x0 + x1, z0)) -+ -+/* -+** stnt1_f32_1: -+** stnt1w z0\.s, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_f32_1, svfloat32_t, float32_t, -+ svstnt1_f32 (p0, x0 + svcntw (), z0), -+ svstnt1 (p0, x0 + svcntw (), z0)) -+ -+/* -+** stnt1_f32_7: -+** stnt1w z0\.s, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_f32_7, svfloat32_t, float32_t, -+ svstnt1_f32 (p0, x0 + svcntw () * 7, z0), -+ svstnt1 (p0, x0 + svcntw () * 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** stnt1_f32_8: -+** incb x0, all, mul #8 -+** stnt1w z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_f32_8, svfloat32_t, float32_t, -+ svstnt1_f32 (p0, x0 + svcntw () * 8, z0), -+ svstnt1 (p0, x0 + svcntw () * 8, z0)) -+ -+/* -+** stnt1_f32_m1: -+** stnt1w z0\.s, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_f32_m1, svfloat32_t, float32_t, -+ svstnt1_f32 (p0, x0 - svcntw (), z0), -+ svstnt1 (p0, x0 - svcntw (), z0)) -+ -+/* -+** stnt1_f32_m8: -+** stnt1w z0\.s, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_f32_m8, svfloat32_t, float32_t, -+ svstnt1_f32 (p0, x0 - svcntw () * 8, z0), -+ svstnt1 (p0, x0 - svcntw () * 8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** stnt1_f32_m9: -+** decb x0, all, mul #9 -+** stnt1w z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_f32_m9, svfloat32_t, float32_t, -+ svstnt1_f32 (p0, x0 - svcntw () * 9, z0), -+ svstnt1 (p0, x0 - svcntw () * 9, z0)) -+ -+/* -+** stnt1_vnum_f32_0: -+** stnt1w z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_f32_0, svfloat32_t, float32_t, -+ svstnt1_vnum_f32 (p0, x0, 0, z0), -+ svstnt1_vnum (p0, x0, 0, z0)) -+ -+/* -+** stnt1_vnum_f32_1: -+** stnt1w z0\.s, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_f32_1, svfloat32_t, float32_t, -+ svstnt1_vnum_f32 (p0, x0, 1, z0), -+ svstnt1_vnum (p0, x0, 1, z0)) -+ -+/* -+** stnt1_vnum_f32_7: -+** stnt1w z0\.s, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_f32_7, svfloat32_t, float32_t, -+ svstnt1_vnum_f32 (p0, x0, 7, z0), -+ svstnt1_vnum (p0, x0, 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** stnt1_vnum_f32_8: -+** incb x0, all, mul #8 -+** stnt1w z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_f32_8, svfloat32_t, float32_t, -+ svstnt1_vnum_f32 (p0, x0, 8, z0), -+ svstnt1_vnum (p0, x0, 8, z0)) -+ -+/* -+** stnt1_vnum_f32_m1: -+** stnt1w z0\.s, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_f32_m1, svfloat32_t, float32_t, -+ svstnt1_vnum_f32 (p0, x0, -1, z0), -+ svstnt1_vnum (p0, x0, -1, z0)) -+ -+/* -+** stnt1_vnum_f32_m8: -+** stnt1w z0\.s, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_f32_m8, svfloat32_t, float32_t, -+ svstnt1_vnum_f32 (p0, x0, -8, z0), -+ svstnt1_vnum (p0, x0, -8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** stnt1_vnum_f32_m9: -+** decb x0, all, mul #9 -+** stnt1w z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_f32_m9, svfloat32_t, float32_t, -+ svstnt1_vnum_f32 (p0, x0, -9, z0), -+ svstnt1_vnum (p0, x0, -9, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** stnt1_vnum_f32_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** stnt1w z0\.s, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_f32_x1, svfloat32_t, float32_t, -+ svstnt1_vnum_f32 (p0, x0, x1, z0), -+ svstnt1_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_f64.c -new file mode 100644 -index 000000000..9555a1faf ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_f64.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** stnt1_f64_base: -+** stnt1d z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_f64_base, svfloat64_t, float64_t, -+ svstnt1_f64 (p0, x0, z0), -+ svstnt1 (p0, x0, z0)) -+ -+/* -+** stnt1_f64_index: -+** stnt1d z0\.d, p0, \[x0, x1, lsl 3\] -+** ret -+*/ -+TEST_STORE (stnt1_f64_index, svfloat64_t, float64_t, -+ svstnt1_f64 (p0, x0 + x1, z0), -+ svstnt1 (p0, x0 + x1, z0)) -+ -+/* -+** stnt1_f64_1: -+** stnt1d z0\.d, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_f64_1, svfloat64_t, float64_t, -+ svstnt1_f64 (p0, x0 + svcntd (), z0), -+ svstnt1 (p0, x0 + svcntd (), z0)) -+ -+/* -+** stnt1_f64_7: -+** stnt1d z0\.d, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_f64_7, svfloat64_t, float64_t, -+ svstnt1_f64 (p0, x0 + svcntd () * 7, z0), -+ svstnt1 (p0, x0 + svcntd () * 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** stnt1_f64_8: -+** incb x0, all, mul #8 -+** stnt1d z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_f64_8, svfloat64_t, float64_t, -+ svstnt1_f64 (p0, x0 + svcntd () * 8, z0), -+ svstnt1 (p0, x0 + svcntd () * 8, z0)) -+ -+/* -+** stnt1_f64_m1: -+** stnt1d z0\.d, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_f64_m1, svfloat64_t, float64_t, -+ svstnt1_f64 (p0, x0 - svcntd (), z0), -+ svstnt1 (p0, x0 - svcntd (), z0)) -+ -+/* -+** stnt1_f64_m8: -+** stnt1d z0\.d, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_f64_m8, svfloat64_t, float64_t, -+ svstnt1_f64 (p0, x0 - svcntd () * 8, z0), -+ svstnt1 (p0, x0 - svcntd () * 8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** stnt1_f64_m9: -+** decb x0, all, mul #9 -+** stnt1d z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_f64_m9, svfloat64_t, float64_t, -+ svstnt1_f64 (p0, x0 - svcntd () * 9, z0), -+ svstnt1 (p0, x0 - svcntd () * 9, z0)) -+ -+/* -+** stnt1_vnum_f64_0: -+** stnt1d z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_f64_0, svfloat64_t, float64_t, -+ svstnt1_vnum_f64 (p0, x0, 0, z0), -+ svstnt1_vnum (p0, x0, 0, z0)) -+ -+/* -+** stnt1_vnum_f64_1: -+** stnt1d z0\.d, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_f64_1, svfloat64_t, float64_t, -+ svstnt1_vnum_f64 (p0, x0, 1, z0), -+ svstnt1_vnum (p0, x0, 1, z0)) -+ -+/* -+** stnt1_vnum_f64_7: -+** stnt1d z0\.d, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_f64_7, svfloat64_t, float64_t, -+ svstnt1_vnum_f64 (p0, x0, 7, z0), -+ svstnt1_vnum (p0, x0, 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** stnt1_vnum_f64_8: -+** incb x0, all, mul #8 -+** stnt1d z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_f64_8, svfloat64_t, float64_t, -+ svstnt1_vnum_f64 (p0, x0, 8, z0), -+ svstnt1_vnum (p0, x0, 8, z0)) -+ -+/* -+** stnt1_vnum_f64_m1: -+** stnt1d z0\.d, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_f64_m1, svfloat64_t, float64_t, -+ svstnt1_vnum_f64 (p0, x0, -1, z0), -+ svstnt1_vnum (p0, x0, -1, z0)) -+ -+/* -+** stnt1_vnum_f64_m8: -+** stnt1d z0\.d, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_f64_m8, svfloat64_t, float64_t, -+ svstnt1_vnum_f64 (p0, x0, -8, z0), -+ svstnt1_vnum (p0, x0, -8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** stnt1_vnum_f64_m9: -+** decb x0, all, mul #9 -+** stnt1d z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_f64_m9, svfloat64_t, float64_t, -+ svstnt1_vnum_f64 (p0, x0, -9, z0), -+ svstnt1_vnum (p0, x0, -9, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** stnt1_vnum_f64_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** stnt1d z0\.d, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_f64_x1, svfloat64_t, float64_t, -+ svstnt1_vnum_f64 (p0, x0, x1, z0), -+ svstnt1_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_s16.c -new file mode 100644 -index 000000000..62e31450d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_s16.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** stnt1_s16_base: -+** stnt1h z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_s16_base, svint16_t, int16_t, -+ svstnt1_s16 (p0, x0, z0), -+ svstnt1 (p0, x0, z0)) -+ -+/* -+** stnt1_s16_index: -+** stnt1h z0\.h, p0, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_STORE (stnt1_s16_index, svint16_t, int16_t, -+ svstnt1_s16 (p0, x0 + x1, z0), -+ svstnt1 (p0, x0 + x1, z0)) -+ -+/* -+** stnt1_s16_1: -+** stnt1h z0\.h, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_s16_1, svint16_t, int16_t, -+ svstnt1_s16 (p0, x0 + svcnth (), z0), -+ svstnt1 (p0, x0 + svcnth (), z0)) -+ -+/* -+** stnt1_s16_7: -+** stnt1h z0\.h, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_s16_7, svint16_t, int16_t, -+ svstnt1_s16 (p0, x0 + svcnth () * 7, z0), -+ svstnt1 (p0, x0 + svcnth () * 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** stnt1_s16_8: -+** incb x0, all, mul #8 -+** stnt1h z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_s16_8, svint16_t, int16_t, -+ svstnt1_s16 (p0, x0 + svcnth () * 8, z0), -+ svstnt1 (p0, x0 + svcnth () * 8, z0)) -+ -+/* -+** stnt1_s16_m1: -+** stnt1h z0\.h, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_s16_m1, svint16_t, int16_t, -+ svstnt1_s16 (p0, x0 - svcnth (), z0), -+ svstnt1 (p0, x0 - svcnth (), z0)) -+ -+/* -+** stnt1_s16_m8: -+** stnt1h z0\.h, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_s16_m8, svint16_t, int16_t, -+ svstnt1_s16 (p0, x0 - svcnth () * 8, z0), -+ svstnt1 (p0, x0 - svcnth () * 8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** stnt1_s16_m9: -+** decb x0, all, mul #9 -+** stnt1h z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_s16_m9, svint16_t, int16_t, -+ svstnt1_s16 (p0, x0 - svcnth () * 9, z0), -+ svstnt1 (p0, x0 - svcnth () * 9, z0)) -+ -+/* -+** stnt1_vnum_s16_0: -+** stnt1h z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_s16_0, svint16_t, int16_t, -+ svstnt1_vnum_s16 (p0, x0, 0, z0), -+ svstnt1_vnum (p0, x0, 0, z0)) -+ -+/* -+** stnt1_vnum_s16_1: -+** stnt1h z0\.h, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_s16_1, svint16_t, int16_t, -+ svstnt1_vnum_s16 (p0, x0, 1, z0), -+ svstnt1_vnum (p0, x0, 1, z0)) -+ -+/* -+** stnt1_vnum_s16_7: -+** stnt1h z0\.h, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_s16_7, svint16_t, int16_t, -+ svstnt1_vnum_s16 (p0, x0, 7, z0), -+ svstnt1_vnum (p0, x0, 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** stnt1_vnum_s16_8: -+** incb x0, all, mul #8 -+** stnt1h z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_s16_8, svint16_t, int16_t, -+ svstnt1_vnum_s16 (p0, x0, 8, z0), -+ svstnt1_vnum (p0, x0, 8, z0)) -+ -+/* -+** stnt1_vnum_s16_m1: -+** stnt1h z0\.h, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_s16_m1, svint16_t, int16_t, -+ svstnt1_vnum_s16 (p0, x0, -1, z0), -+ svstnt1_vnum (p0, x0, -1, z0)) -+ -+/* -+** stnt1_vnum_s16_m8: -+** stnt1h z0\.h, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_s16_m8, svint16_t, int16_t, -+ svstnt1_vnum_s16 (p0, x0, -8, z0), -+ svstnt1_vnum (p0, x0, -8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** stnt1_vnum_s16_m9: -+** decb x0, all, mul #9 -+** stnt1h z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_s16_m9, svint16_t, int16_t, -+ svstnt1_vnum_s16 (p0, x0, -9, z0), -+ svstnt1_vnum (p0, x0, -9, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** stnt1_vnum_s16_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** stnt1h z0\.h, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_s16_x1, svint16_t, int16_t, -+ svstnt1_vnum_s16 (p0, x0, x1, z0), -+ svstnt1_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_s32.c -new file mode 100644 -index 000000000..ff1f27c05 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_s32.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** stnt1_s32_base: -+** stnt1w z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_s32_base, svint32_t, int32_t, -+ svstnt1_s32 (p0, x0, z0), -+ svstnt1 (p0, x0, z0)) -+ -+/* -+** stnt1_s32_index: -+** stnt1w z0\.s, p0, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_STORE (stnt1_s32_index, svint32_t, int32_t, -+ svstnt1_s32 (p0, x0 + x1, z0), -+ svstnt1 (p0, x0 + x1, z0)) -+ -+/* -+** stnt1_s32_1: -+** stnt1w z0\.s, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_s32_1, svint32_t, int32_t, -+ svstnt1_s32 (p0, x0 + svcntw (), z0), -+ svstnt1 (p0, x0 + svcntw (), z0)) -+ -+/* -+** stnt1_s32_7: -+** stnt1w z0\.s, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_s32_7, svint32_t, int32_t, -+ svstnt1_s32 (p0, x0 + svcntw () * 7, z0), -+ svstnt1 (p0, x0 + svcntw () * 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** stnt1_s32_8: -+** incb x0, all, mul #8 -+** stnt1w z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_s32_8, svint32_t, int32_t, -+ svstnt1_s32 (p0, x0 + svcntw () * 8, z0), -+ svstnt1 (p0, x0 + svcntw () * 8, z0)) -+ -+/* -+** stnt1_s32_m1: -+** stnt1w z0\.s, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_s32_m1, svint32_t, int32_t, -+ svstnt1_s32 (p0, x0 - svcntw (), z0), -+ svstnt1 (p0, x0 - svcntw (), z0)) -+ -+/* -+** stnt1_s32_m8: -+** stnt1w z0\.s, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_s32_m8, svint32_t, int32_t, -+ svstnt1_s32 (p0, x0 - svcntw () * 8, z0), -+ svstnt1 (p0, x0 - svcntw () * 8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** stnt1_s32_m9: -+** decb x0, all, mul #9 -+** stnt1w z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_s32_m9, svint32_t, int32_t, -+ svstnt1_s32 (p0, x0 - svcntw () * 9, z0), -+ svstnt1 (p0, x0 - svcntw () * 9, z0)) -+ -+/* -+** stnt1_vnum_s32_0: -+** stnt1w z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_s32_0, svint32_t, int32_t, -+ svstnt1_vnum_s32 (p0, x0, 0, z0), -+ svstnt1_vnum (p0, x0, 0, z0)) -+ -+/* -+** stnt1_vnum_s32_1: -+** stnt1w z0\.s, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_s32_1, svint32_t, int32_t, -+ svstnt1_vnum_s32 (p0, x0, 1, z0), -+ svstnt1_vnum (p0, x0, 1, z0)) -+ -+/* -+** stnt1_vnum_s32_7: -+** stnt1w z0\.s, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_s32_7, svint32_t, int32_t, -+ svstnt1_vnum_s32 (p0, x0, 7, z0), -+ svstnt1_vnum (p0, x0, 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** stnt1_vnum_s32_8: -+** incb x0, all, mul #8 -+** stnt1w z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_s32_8, svint32_t, int32_t, -+ svstnt1_vnum_s32 (p0, x0, 8, z0), -+ svstnt1_vnum (p0, x0, 8, z0)) -+ -+/* -+** stnt1_vnum_s32_m1: -+** stnt1w z0\.s, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_s32_m1, svint32_t, int32_t, -+ svstnt1_vnum_s32 (p0, x0, -1, z0), -+ svstnt1_vnum (p0, x0, -1, z0)) -+ -+/* -+** stnt1_vnum_s32_m8: -+** stnt1w z0\.s, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_s32_m8, svint32_t, int32_t, -+ svstnt1_vnum_s32 (p0, x0, -8, z0), -+ svstnt1_vnum (p0, x0, -8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** stnt1_vnum_s32_m9: -+** decb x0, all, mul #9 -+** stnt1w z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_s32_m9, svint32_t, int32_t, -+ svstnt1_vnum_s32 (p0, x0, -9, z0), -+ svstnt1_vnum (p0, x0, -9, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** stnt1_vnum_s32_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** stnt1w z0\.s, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_s32_x1, svint32_t, int32_t, -+ svstnt1_vnum_s32 (p0, x0, x1, z0), -+ svstnt1_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_s64.c -new file mode 100644 -index 000000000..7d548f8f2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_s64.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** stnt1_s64_base: -+** stnt1d z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_s64_base, svint64_t, int64_t, -+ svstnt1_s64 (p0, x0, z0), -+ svstnt1 (p0, x0, z0)) -+ -+/* -+** stnt1_s64_index: -+** stnt1d z0\.d, p0, \[x0, x1, lsl 3\] -+** ret -+*/ -+TEST_STORE (stnt1_s64_index, svint64_t, int64_t, -+ svstnt1_s64 (p0, x0 + x1, z0), -+ svstnt1 (p0, x0 + x1, z0)) -+ -+/* -+** stnt1_s64_1: -+** stnt1d z0\.d, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_s64_1, svint64_t, int64_t, -+ svstnt1_s64 (p0, x0 + svcntd (), z0), -+ svstnt1 (p0, x0 + svcntd (), z0)) -+ -+/* -+** stnt1_s64_7: -+** stnt1d z0\.d, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_s64_7, svint64_t, int64_t, -+ svstnt1_s64 (p0, x0 + svcntd () * 7, z0), -+ svstnt1 (p0, x0 + svcntd () * 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** stnt1_s64_8: -+** incb x0, all, mul #8 -+** stnt1d z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_s64_8, svint64_t, int64_t, -+ svstnt1_s64 (p0, x0 + svcntd () * 8, z0), -+ svstnt1 (p0, x0 + svcntd () * 8, z0)) -+ -+/* -+** stnt1_s64_m1: -+** stnt1d z0\.d, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_s64_m1, svint64_t, int64_t, -+ svstnt1_s64 (p0, x0 - svcntd (), z0), -+ svstnt1 (p0, x0 - svcntd (), z0)) -+ -+/* -+** stnt1_s64_m8: -+** stnt1d z0\.d, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_s64_m8, svint64_t, int64_t, -+ svstnt1_s64 (p0, x0 - svcntd () * 8, z0), -+ svstnt1 (p0, x0 - svcntd () * 8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** stnt1_s64_m9: -+** decb x0, all, mul #9 -+** stnt1d z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_s64_m9, svint64_t, int64_t, -+ svstnt1_s64 (p0, x0 - svcntd () * 9, z0), -+ svstnt1 (p0, x0 - svcntd () * 9, z0)) -+ -+/* -+** stnt1_vnum_s64_0: -+** stnt1d z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_s64_0, svint64_t, int64_t, -+ svstnt1_vnum_s64 (p0, x0, 0, z0), -+ svstnt1_vnum (p0, x0, 0, z0)) -+ -+/* -+** stnt1_vnum_s64_1: -+** stnt1d z0\.d, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_s64_1, svint64_t, int64_t, -+ svstnt1_vnum_s64 (p0, x0, 1, z0), -+ svstnt1_vnum (p0, x0, 1, z0)) -+ -+/* -+** stnt1_vnum_s64_7: -+** stnt1d z0\.d, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_s64_7, svint64_t, int64_t, -+ svstnt1_vnum_s64 (p0, x0, 7, z0), -+ svstnt1_vnum (p0, x0, 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** stnt1_vnum_s64_8: -+** incb x0, all, mul #8 -+** stnt1d z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_s64_8, svint64_t, int64_t, -+ svstnt1_vnum_s64 (p0, x0, 8, z0), -+ svstnt1_vnum (p0, x0, 8, z0)) -+ -+/* -+** stnt1_vnum_s64_m1: -+** stnt1d z0\.d, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_s64_m1, svint64_t, int64_t, -+ svstnt1_vnum_s64 (p0, x0, -1, z0), -+ svstnt1_vnum (p0, x0, -1, z0)) -+ -+/* -+** stnt1_vnum_s64_m8: -+** stnt1d z0\.d, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_s64_m8, svint64_t, int64_t, -+ svstnt1_vnum_s64 (p0, x0, -8, z0), -+ svstnt1_vnum (p0, x0, -8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** stnt1_vnum_s64_m9: -+** decb x0, all, mul #9 -+** stnt1d z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_s64_m9, svint64_t, int64_t, -+ svstnt1_vnum_s64 (p0, x0, -9, z0), -+ svstnt1_vnum (p0, x0, -9, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** stnt1_vnum_s64_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** stnt1d z0\.d, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_s64_x1, svint64_t, int64_t, -+ svstnt1_vnum_s64 (p0, x0, x1, z0), -+ svstnt1_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_s8.c -new file mode 100644 -index 000000000..87c88035d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_s8.c -@@ -0,0 +1,162 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** stnt1_s8_base: -+** stnt1b z0\.b, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_s8_base, svint8_t, int8_t, -+ svstnt1_s8 (p0, x0, z0), -+ svstnt1 (p0, x0, z0)) -+ -+/* -+** stnt1_s8_index: -+** stnt1b z0\.b, p0, \[x0, x1\] -+** ret -+*/ -+TEST_STORE (stnt1_s8_index, svint8_t, int8_t, -+ svstnt1_s8 (p0, x0 + x1, z0), -+ svstnt1 (p0, x0 + x1, z0)) -+ -+/* -+** stnt1_s8_1: -+** stnt1b z0\.b, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_s8_1, svint8_t, int8_t, -+ svstnt1_s8 (p0, x0 + svcntb (), z0), -+ svstnt1 (p0, x0 + svcntb (), z0)) -+ -+/* -+** stnt1_s8_7: -+** stnt1b z0\.b, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_s8_7, svint8_t, int8_t, -+ svstnt1_s8 (p0, x0 + svcntb () * 7, z0), -+ svstnt1 (p0, x0 + svcntb () * 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** stnt1_s8_8: -+** incb x0, all, mul #8 -+** stnt1b z0\.b, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_s8_8, svint8_t, int8_t, -+ svstnt1_s8 (p0, x0 + svcntb () * 8, z0), -+ svstnt1 (p0, x0 + svcntb () * 8, z0)) -+ -+/* -+** stnt1_s8_m1: -+** stnt1b z0\.b, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_s8_m1, svint8_t, int8_t, -+ svstnt1_s8 (p0, x0 - svcntb (), z0), -+ svstnt1 (p0, x0 - svcntb (), z0)) -+ -+/* -+** stnt1_s8_m8: -+** stnt1b z0\.b, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_s8_m8, svint8_t, int8_t, -+ svstnt1_s8 (p0, x0 - svcntb () * 8, z0), -+ svstnt1 (p0, x0 - svcntb () * 8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** stnt1_s8_m9: -+** decb x0, all, mul #9 -+** stnt1b z0\.b, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_s8_m9, svint8_t, int8_t, -+ svstnt1_s8 (p0, x0 - svcntb () * 9, z0), -+ svstnt1 (p0, x0 - svcntb () * 9, z0)) -+ -+/* -+** stnt1_vnum_s8_0: -+** stnt1b z0\.b, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_s8_0, svint8_t, int8_t, -+ svstnt1_vnum_s8 (p0, x0, 0, z0), -+ svstnt1_vnum (p0, x0, 0, z0)) -+ -+/* -+** stnt1_vnum_s8_1: -+** stnt1b z0\.b, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_s8_1, svint8_t, int8_t, -+ svstnt1_vnum_s8 (p0, x0, 1, z0), -+ svstnt1_vnum (p0, x0, 1, z0)) -+ -+/* -+** stnt1_vnum_s8_7: -+** stnt1b z0\.b, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_s8_7, svint8_t, int8_t, -+ svstnt1_vnum_s8 (p0, x0, 7, z0), -+ svstnt1_vnum (p0, x0, 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** stnt1_vnum_s8_8: -+** incb x0, all, mul #8 -+** stnt1b z0\.b, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_s8_8, svint8_t, int8_t, -+ svstnt1_vnum_s8 (p0, x0, 8, z0), -+ svstnt1_vnum (p0, x0, 8, z0)) -+ -+/* -+** stnt1_vnum_s8_m1: -+** stnt1b z0\.b, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_s8_m1, svint8_t, int8_t, -+ svstnt1_vnum_s8 (p0, x0, -1, z0), -+ svstnt1_vnum (p0, x0, -1, z0)) -+ -+/* -+** stnt1_vnum_s8_m8: -+** stnt1b z0\.b, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_s8_m8, svint8_t, int8_t, -+ svstnt1_vnum_s8 (p0, x0, -8, z0), -+ svstnt1_vnum (p0, x0, -8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** stnt1_vnum_s8_m9: -+** decb x0, all, mul #9 -+** stnt1b z0\.b, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_s8_m9, svint8_t, int8_t, -+ svstnt1_vnum_s8 (p0, x0, -9, z0), -+ svstnt1_vnum (p0, x0, -9, z0)) -+ -+/* -+** stnt1_vnum_s8_x1: -+** cntb (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** stnt1b z0\.b, p0, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** stnt1b z0\.b, p0, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_STORE (stnt1_vnum_s8_x1, svint8_t, int8_t, -+ svstnt1_vnum_s8 (p0, x0, x1, z0), -+ svstnt1_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_u16.c -new file mode 100644 -index 000000000..7d32df362 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_u16.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** stnt1_u16_base: -+** stnt1h z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_u16_base, svuint16_t, uint16_t, -+ svstnt1_u16 (p0, x0, z0), -+ svstnt1 (p0, x0, z0)) -+ -+/* -+** stnt1_u16_index: -+** stnt1h z0\.h, p0, \[x0, x1, lsl 1\] -+** ret -+*/ -+TEST_STORE (stnt1_u16_index, svuint16_t, uint16_t, -+ svstnt1_u16 (p0, x0 + x1, z0), -+ svstnt1 (p0, x0 + x1, z0)) -+ -+/* -+** stnt1_u16_1: -+** stnt1h z0\.h, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_u16_1, svuint16_t, uint16_t, -+ svstnt1_u16 (p0, x0 + svcnth (), z0), -+ svstnt1 (p0, x0 + svcnth (), z0)) -+ -+/* -+** stnt1_u16_7: -+** stnt1h z0\.h, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_u16_7, svuint16_t, uint16_t, -+ svstnt1_u16 (p0, x0 + svcnth () * 7, z0), -+ svstnt1 (p0, x0 + svcnth () * 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** stnt1_u16_8: -+** incb x0, all, mul #8 -+** stnt1h z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_u16_8, svuint16_t, uint16_t, -+ svstnt1_u16 (p0, x0 + svcnth () * 8, z0), -+ svstnt1 (p0, x0 + svcnth () * 8, z0)) -+ -+/* -+** stnt1_u16_m1: -+** stnt1h z0\.h, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_u16_m1, svuint16_t, uint16_t, -+ svstnt1_u16 (p0, x0 - svcnth (), z0), -+ svstnt1 (p0, x0 - svcnth (), z0)) -+ -+/* -+** stnt1_u16_m8: -+** stnt1h z0\.h, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_u16_m8, svuint16_t, uint16_t, -+ svstnt1_u16 (p0, x0 - svcnth () * 8, z0), -+ svstnt1 (p0, x0 - svcnth () * 8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** stnt1_u16_m9: -+** decb x0, all, mul #9 -+** stnt1h z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_u16_m9, svuint16_t, uint16_t, -+ svstnt1_u16 (p0, x0 - svcnth () * 9, z0), -+ svstnt1 (p0, x0 - svcnth () * 9, z0)) -+ -+/* -+** stnt1_vnum_u16_0: -+** stnt1h z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_u16_0, svuint16_t, uint16_t, -+ svstnt1_vnum_u16 (p0, x0, 0, z0), -+ svstnt1_vnum (p0, x0, 0, z0)) -+ -+/* -+** stnt1_vnum_u16_1: -+** stnt1h z0\.h, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_u16_1, svuint16_t, uint16_t, -+ svstnt1_vnum_u16 (p0, x0, 1, z0), -+ svstnt1_vnum (p0, x0, 1, z0)) -+ -+/* -+** stnt1_vnum_u16_7: -+** stnt1h z0\.h, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_u16_7, svuint16_t, uint16_t, -+ svstnt1_vnum_u16 (p0, x0, 7, z0), -+ svstnt1_vnum (p0, x0, 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** stnt1_vnum_u16_8: -+** incb x0, all, mul #8 -+** stnt1h z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_u16_8, svuint16_t, uint16_t, -+ svstnt1_vnum_u16 (p0, x0, 8, z0), -+ svstnt1_vnum (p0, x0, 8, z0)) -+ -+/* -+** stnt1_vnum_u16_m1: -+** stnt1h z0\.h, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_u16_m1, svuint16_t, uint16_t, -+ svstnt1_vnum_u16 (p0, x0, -1, z0), -+ svstnt1_vnum (p0, x0, -1, z0)) -+ -+/* -+** stnt1_vnum_u16_m8: -+** stnt1h z0\.h, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_u16_m8, svuint16_t, uint16_t, -+ svstnt1_vnum_u16 (p0, x0, -8, z0), -+ svstnt1_vnum (p0, x0, -8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** stnt1_vnum_u16_m9: -+** decb x0, all, mul #9 -+** stnt1h z0\.h, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_u16_m9, svuint16_t, uint16_t, -+ svstnt1_vnum_u16 (p0, x0, -9, z0), -+ svstnt1_vnum (p0, x0, -9, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** stnt1_vnum_u16_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** stnt1h z0\.h, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_u16_x1, svuint16_t, uint16_t, -+ svstnt1_vnum_u16 (p0, x0, x1, z0), -+ svstnt1_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_u32.c -new file mode 100644 -index 000000000..cd4ccaba9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_u32.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** stnt1_u32_base: -+** stnt1w z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_u32_base, svuint32_t, uint32_t, -+ svstnt1_u32 (p0, x0, z0), -+ svstnt1 (p0, x0, z0)) -+ -+/* -+** stnt1_u32_index: -+** stnt1w z0\.s, p0, \[x0, x1, lsl 2\] -+** ret -+*/ -+TEST_STORE (stnt1_u32_index, svuint32_t, uint32_t, -+ svstnt1_u32 (p0, x0 + x1, z0), -+ svstnt1 (p0, x0 + x1, z0)) -+ -+/* -+** stnt1_u32_1: -+** stnt1w z0\.s, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_u32_1, svuint32_t, uint32_t, -+ svstnt1_u32 (p0, x0 + svcntw (), z0), -+ svstnt1 (p0, x0 + svcntw (), z0)) -+ -+/* -+** stnt1_u32_7: -+** stnt1w z0\.s, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_u32_7, svuint32_t, uint32_t, -+ svstnt1_u32 (p0, x0 + svcntw () * 7, z0), -+ svstnt1 (p0, x0 + svcntw () * 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** stnt1_u32_8: -+** incb x0, all, mul #8 -+** stnt1w z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_u32_8, svuint32_t, uint32_t, -+ svstnt1_u32 (p0, x0 + svcntw () * 8, z0), -+ svstnt1 (p0, x0 + svcntw () * 8, z0)) -+ -+/* -+** stnt1_u32_m1: -+** stnt1w z0\.s, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_u32_m1, svuint32_t, uint32_t, -+ svstnt1_u32 (p0, x0 - svcntw (), z0), -+ svstnt1 (p0, x0 - svcntw (), z0)) -+ -+/* -+** stnt1_u32_m8: -+** stnt1w z0\.s, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_u32_m8, svuint32_t, uint32_t, -+ svstnt1_u32 (p0, x0 - svcntw () * 8, z0), -+ svstnt1 (p0, x0 - svcntw () * 8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** stnt1_u32_m9: -+** decb x0, all, mul #9 -+** stnt1w z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_u32_m9, svuint32_t, uint32_t, -+ svstnt1_u32 (p0, x0 - svcntw () * 9, z0), -+ svstnt1 (p0, x0 - svcntw () * 9, z0)) -+ -+/* -+** stnt1_vnum_u32_0: -+** stnt1w z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_u32_0, svuint32_t, uint32_t, -+ svstnt1_vnum_u32 (p0, x0, 0, z0), -+ svstnt1_vnum (p0, x0, 0, z0)) -+ -+/* -+** stnt1_vnum_u32_1: -+** stnt1w z0\.s, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_u32_1, svuint32_t, uint32_t, -+ svstnt1_vnum_u32 (p0, x0, 1, z0), -+ svstnt1_vnum (p0, x0, 1, z0)) -+ -+/* -+** stnt1_vnum_u32_7: -+** stnt1w z0\.s, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_u32_7, svuint32_t, uint32_t, -+ svstnt1_vnum_u32 (p0, x0, 7, z0), -+ svstnt1_vnum (p0, x0, 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** stnt1_vnum_u32_8: -+** incb x0, all, mul #8 -+** stnt1w z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_u32_8, svuint32_t, uint32_t, -+ svstnt1_vnum_u32 (p0, x0, 8, z0), -+ svstnt1_vnum (p0, x0, 8, z0)) -+ -+/* -+** stnt1_vnum_u32_m1: -+** stnt1w z0\.s, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_u32_m1, svuint32_t, uint32_t, -+ svstnt1_vnum_u32 (p0, x0, -1, z0), -+ svstnt1_vnum (p0, x0, -1, z0)) -+ -+/* -+** stnt1_vnum_u32_m8: -+** stnt1w z0\.s, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_u32_m8, svuint32_t, uint32_t, -+ svstnt1_vnum_u32 (p0, x0, -8, z0), -+ svstnt1_vnum (p0, x0, -8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** stnt1_vnum_u32_m9: -+** decb x0, all, mul #9 -+** stnt1w z0\.s, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_u32_m9, svuint32_t, uint32_t, -+ svstnt1_vnum_u32 (p0, x0, -9, z0), -+ svstnt1_vnum (p0, x0, -9, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** stnt1_vnum_u32_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** stnt1w z0\.s, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_u32_x1, svuint32_t, uint32_t, -+ svstnt1_vnum_u32 (p0, x0, x1, z0), -+ svstnt1_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_u64.c -new file mode 100644 -index 000000000..c8145f65c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_u64.c -@@ -0,0 +1,158 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** stnt1_u64_base: -+** stnt1d z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_u64_base, svuint64_t, uint64_t, -+ svstnt1_u64 (p0, x0, z0), -+ svstnt1 (p0, x0, z0)) -+ -+/* -+** stnt1_u64_index: -+** stnt1d z0\.d, p0, \[x0, x1, lsl 3\] -+** ret -+*/ -+TEST_STORE (stnt1_u64_index, svuint64_t, uint64_t, -+ svstnt1_u64 (p0, x0 + x1, z0), -+ svstnt1 (p0, x0 + x1, z0)) -+ -+/* -+** stnt1_u64_1: -+** stnt1d z0\.d, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_u64_1, svuint64_t, uint64_t, -+ svstnt1_u64 (p0, x0 + svcntd (), z0), -+ svstnt1 (p0, x0 + svcntd (), z0)) -+ -+/* -+** stnt1_u64_7: -+** stnt1d z0\.d, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_u64_7, svuint64_t, uint64_t, -+ svstnt1_u64 (p0, x0 + svcntd () * 7, z0), -+ svstnt1 (p0, x0 + svcntd () * 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** stnt1_u64_8: -+** incb x0, all, mul #8 -+** stnt1d z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_u64_8, svuint64_t, uint64_t, -+ svstnt1_u64 (p0, x0 + svcntd () * 8, z0), -+ svstnt1 (p0, x0 + svcntd () * 8, z0)) -+ -+/* -+** stnt1_u64_m1: -+** stnt1d z0\.d, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_u64_m1, svuint64_t, uint64_t, -+ svstnt1_u64 (p0, x0 - svcntd (), z0), -+ svstnt1 (p0, x0 - svcntd (), z0)) -+ -+/* -+** stnt1_u64_m8: -+** stnt1d z0\.d, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_u64_m8, svuint64_t, uint64_t, -+ svstnt1_u64 (p0, x0 - svcntd () * 8, z0), -+ svstnt1 (p0, x0 - svcntd () * 8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** stnt1_u64_m9: -+** decb x0, all, mul #9 -+** stnt1d z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_u64_m9, svuint64_t, uint64_t, -+ svstnt1_u64 (p0, x0 - svcntd () * 9, z0), -+ svstnt1 (p0, x0 - svcntd () * 9, z0)) -+ -+/* -+** stnt1_vnum_u64_0: -+** stnt1d z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_u64_0, svuint64_t, uint64_t, -+ svstnt1_vnum_u64 (p0, x0, 0, z0), -+ svstnt1_vnum (p0, x0, 0, z0)) -+ -+/* -+** stnt1_vnum_u64_1: -+** stnt1d z0\.d, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_u64_1, svuint64_t, uint64_t, -+ svstnt1_vnum_u64 (p0, x0, 1, z0), -+ svstnt1_vnum (p0, x0, 1, z0)) -+ -+/* -+** stnt1_vnum_u64_7: -+** stnt1d z0\.d, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_u64_7, svuint64_t, uint64_t, -+ svstnt1_vnum_u64 (p0, x0, 7, z0), -+ svstnt1_vnum (p0, x0, 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** stnt1_vnum_u64_8: -+** incb x0, all, mul #8 -+** stnt1d z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_u64_8, svuint64_t, uint64_t, -+ svstnt1_vnum_u64 (p0, x0, 8, z0), -+ svstnt1_vnum (p0, x0, 8, z0)) -+ -+/* -+** stnt1_vnum_u64_m1: -+** stnt1d z0\.d, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_u64_m1, svuint64_t, uint64_t, -+ svstnt1_vnum_u64 (p0, x0, -1, z0), -+ svstnt1_vnum (p0, x0, -1, z0)) -+ -+/* -+** stnt1_vnum_u64_m8: -+** stnt1d z0\.d, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_u64_m8, svuint64_t, uint64_t, -+ svstnt1_vnum_u64 (p0, x0, -8, z0), -+ svstnt1_vnum (p0, x0, -8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** stnt1_vnum_u64_m9: -+** decb x0, all, mul #9 -+** stnt1d z0\.d, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_u64_m9, svuint64_t, uint64_t, -+ svstnt1_vnum_u64 (p0, x0, -9, z0), -+ svstnt1_vnum (p0, x0, -9, z0)) -+ -+/* Using MUL to calculate an index would also be OK. */ -+/* -+** stnt1_vnum_u64_x1: -+** cntb (x[0-9]+) -+** madd (x[0-9]+), (x1, \1|\1, x1), x0 -+** stnt1d z0\.d, p0, \[\2\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_u64_x1, svuint64_t, uint64_t, -+ svstnt1_vnum_u64 (p0, x0, x1, z0), -+ svstnt1_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_u8.c -new file mode 100644 -index 000000000..11c68f555 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/stnt1_u8.c -@@ -0,0 +1,162 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" { target { ! ilp32 } } } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** stnt1_u8_base: -+** stnt1b z0\.b, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_u8_base, svuint8_t, uint8_t, -+ svstnt1_u8 (p0, x0, z0), -+ svstnt1 (p0, x0, z0)) -+ -+/* -+** stnt1_u8_index: -+** stnt1b z0\.b, p0, \[x0, x1\] -+** ret -+*/ -+TEST_STORE (stnt1_u8_index, svuint8_t, uint8_t, -+ svstnt1_u8 (p0, x0 + x1, z0), -+ svstnt1 (p0, x0 + x1, z0)) -+ -+/* -+** stnt1_u8_1: -+** stnt1b z0\.b, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_u8_1, svuint8_t, uint8_t, -+ svstnt1_u8 (p0, x0 + svcntb (), z0), -+ svstnt1 (p0, x0 + svcntb (), z0)) -+ -+/* -+** stnt1_u8_7: -+** stnt1b z0\.b, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_u8_7, svuint8_t, uint8_t, -+ svstnt1_u8 (p0, x0 + svcntb () * 7, z0), -+ svstnt1 (p0, x0 + svcntb () * 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** stnt1_u8_8: -+** incb x0, all, mul #8 -+** stnt1b z0\.b, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_u8_8, svuint8_t, uint8_t, -+ svstnt1_u8 (p0, x0 + svcntb () * 8, z0), -+ svstnt1 (p0, x0 + svcntb () * 8, z0)) -+ -+/* -+** stnt1_u8_m1: -+** stnt1b z0\.b, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_u8_m1, svuint8_t, uint8_t, -+ svstnt1_u8 (p0, x0 - svcntb (), z0), -+ svstnt1 (p0, x0 - svcntb (), z0)) -+ -+/* -+** stnt1_u8_m8: -+** stnt1b z0\.b, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_u8_m8, svuint8_t, uint8_t, -+ svstnt1_u8 (p0, x0 - svcntb () * 8, z0), -+ svstnt1 (p0, x0 - svcntb () * 8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** stnt1_u8_m9: -+** decb x0, all, mul #9 -+** stnt1b z0\.b, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_u8_m9, svuint8_t, uint8_t, -+ svstnt1_u8 (p0, x0 - svcntb () * 9, z0), -+ svstnt1 (p0, x0 - svcntb () * 9, z0)) -+ -+/* -+** stnt1_vnum_u8_0: -+** stnt1b z0\.b, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_u8_0, svuint8_t, uint8_t, -+ svstnt1_vnum_u8 (p0, x0, 0, z0), -+ svstnt1_vnum (p0, x0, 0, z0)) -+ -+/* -+** stnt1_vnum_u8_1: -+** stnt1b z0\.b, p0, \[x0, #1, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_u8_1, svuint8_t, uint8_t, -+ svstnt1_vnum_u8 (p0, x0, 1, z0), -+ svstnt1_vnum (p0, x0, 1, z0)) -+ -+/* -+** stnt1_vnum_u8_7: -+** stnt1b z0\.b, p0, \[x0, #7, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_u8_7, svuint8_t, uint8_t, -+ svstnt1_vnum_u8 (p0, x0, 7, z0), -+ svstnt1_vnum (p0, x0, 7, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** stnt1_vnum_u8_8: -+** incb x0, all, mul #8 -+** stnt1b z0\.b, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_u8_8, svuint8_t, uint8_t, -+ svstnt1_vnum_u8 (p0, x0, 8, z0), -+ svstnt1_vnum (p0, x0, 8, z0)) -+ -+/* -+** stnt1_vnum_u8_m1: -+** stnt1b z0\.b, p0, \[x0, #-1, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_u8_m1, svuint8_t, uint8_t, -+ svstnt1_vnum_u8 (p0, x0, -1, z0), -+ svstnt1_vnum (p0, x0, -1, z0)) -+ -+/* -+** stnt1_vnum_u8_m8: -+** stnt1b z0\.b, p0, \[x0, #-8, mul vl\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_u8_m8, svuint8_t, uint8_t, -+ svstnt1_vnum_u8 (p0, x0, -8, z0), -+ svstnt1_vnum (p0, x0, -8, z0)) -+ -+/* Moving the constant into a register would also be OK. */ -+/* -+** stnt1_vnum_u8_m9: -+** decb x0, all, mul #9 -+** stnt1b z0\.b, p0, \[x0\] -+** ret -+*/ -+TEST_STORE (stnt1_vnum_u8_m9, svuint8_t, uint8_t, -+ svstnt1_vnum_u8 (p0, x0, -9, z0), -+ svstnt1_vnum (p0, x0, -9, z0)) -+ -+/* -+** stnt1_vnum_u8_x1: -+** cntb (x[0-9]+) -+** ( -+** madd (x[0-9]+), (?:x1, \1|\1, x1), x0 -+** stnt1b z0\.b, p0, \[\2\] -+** | -+** mul (x[0-9]+), (?:x1, \1|\1, x1) -+** stnt1b z0\.b, p0, \[x0, \3\] -+** ) -+** ret -+*/ -+TEST_STORE (stnt1_vnum_u8_x1, svuint8_t, uint8_t, -+ svstnt1_vnum_u8 (p0, x0, x1, z0), -+ svstnt1_vnum (p0, x0, x1, z0)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_f16.c -new file mode 100644 -index 000000000..bf4a0ab1e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_f16.c -@@ -0,0 +1,577 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** sub_f16_m_tied1: -+** fsub z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f16_m_tied1, svfloat16_t, -+ z0 = svsub_f16_m (p0, z0, z1), -+ z0 = svsub_m (p0, z0, z1)) -+ -+/* -+** sub_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fsub z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f16_m_tied2, svfloat16_t, -+ z0 = svsub_f16_m (p0, z1, z0), -+ z0 = svsub_m (p0, z1, z0)) -+ -+/* -+** sub_f16_m_untied: -+** movprfx z0, z1 -+** fsub z0\.h, p0/m, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f16_m_untied, svfloat16_t, -+ z0 = svsub_f16_m (p0, z1, z2), -+ z0 = svsub_m (p0, z1, z2)) -+ -+/* -+** sub_h4_f16_m_tied1: -+** mov (z[0-9]+\.h), h4 -+** fsub z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (sub_h4_f16_m_tied1, svfloat16_t, __fp16, -+ z0 = svsub_n_f16_m (p0, z0, d4), -+ z0 = svsub_m (p0, z0, d4)) -+ -+/* -+** sub_h4_f16_m_untied: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0, z1 -+** fsub z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (sub_h4_f16_m_untied, svfloat16_t, __fp16, -+ z0 = svsub_n_f16_m (p0, z1, d4), -+ z0 = svsub_m (p0, z1, d4)) -+ -+/* -+** sub_1_f16_m_tied1: -+** fsub z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_f16_m_tied1, svfloat16_t, -+ z0 = svsub_n_f16_m (p0, z0, 1), -+ z0 = svsub_m (p0, z0, 1)) -+ -+/* -+** sub_1_f16_m_untied: -+** movprfx z0, z1 -+** fsub z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_f16_m_untied, svfloat16_t, -+ z0 = svsub_n_f16_m (p0, z1, 1), -+ z0 = svsub_m (p0, z1, 1)) -+ -+/* -+** sub_0p5_f16_m_tied1: -+** fsub z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_0p5_f16_m_tied1, svfloat16_t, -+ z0 = svsub_n_f16_m (p0, z0, 0.5), -+ z0 = svsub_m (p0, z0, 0.5)) -+ -+/* -+** sub_0p5_f16_m_untied: -+** movprfx z0, z1 -+** fsub z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_0p5_f16_m_untied, svfloat16_t, -+ z0 = svsub_n_f16_m (p0, z1, 0.5), -+ z0 = svsub_m (p0, z1, 0.5)) -+ -+/* -+** sub_m1_f16_m_tied1: -+** fadd z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m1_f16_m_tied1, svfloat16_t, -+ z0 = svsub_n_f16_m (p0, z0, -1), -+ z0 = svsub_m (p0, z0, -1)) -+ -+/* -+** sub_m1_f16_m_untied: -+** movprfx z0, z1 -+** fadd z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m1_f16_m_untied, svfloat16_t, -+ z0 = svsub_n_f16_m (p0, z1, -1), -+ z0 = svsub_m (p0, z1, -1)) -+ -+/* -+** sub_m0p5_f16_m_tied1: -+** fadd z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m0p5_f16_m_tied1, svfloat16_t, -+ z0 = svsub_n_f16_m (p0, z0, -0.5), -+ z0 = svsub_m (p0, z0, -0.5)) -+ -+/* -+** sub_m0p5_f16_m_untied: -+** movprfx z0, z1 -+** fadd z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m0p5_f16_m_untied, svfloat16_t, -+ z0 = svsub_n_f16_m (p0, z1, -0.5), -+ z0 = svsub_m (p0, z1, -0.5)) -+ -+/* -+** sub_m2_f16_m: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** fadd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m2_f16_m, svfloat16_t, -+ z0 = svsub_n_f16_m (p0, z0, -2), -+ z0 = svsub_m (p0, z0, -2)) -+ -+/* -+** sub_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fsub z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f16_z_tied1, svfloat16_t, -+ z0 = svsub_f16_z (p0, z0, z1), -+ z0 = svsub_z (p0, z0, z1)) -+ -+/* -+** sub_f16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** fsubr z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f16_z_tied2, svfloat16_t, -+ z0 = svsub_f16_z (p0, z1, z0), -+ z0 = svsub_z (p0, z1, z0)) -+ -+/* -+** sub_f16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fsub z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** fsubr z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f16_z_untied, svfloat16_t, -+ z0 = svsub_f16_z (p0, z1, z2), -+ z0 = svsub_z (p0, z1, z2)) -+ -+/* -+** sub_h4_f16_z_tied1: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0\.h, p0/z, z0\.h -+** fsub z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (sub_h4_f16_z_tied1, svfloat16_t, __fp16, -+ z0 = svsub_n_f16_z (p0, z0, d4), -+ z0 = svsub_z (p0, z0, d4)) -+ -+/* -+** sub_h4_f16_z_untied: -+** mov (z[0-9]+\.h), h4 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fsub z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** fsubr z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (sub_h4_f16_z_untied, svfloat16_t, __fp16, -+ z0 = svsub_n_f16_z (p0, z1, d4), -+ z0 = svsub_z (p0, z1, d4)) -+ -+/* -+** sub_1_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fsub z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_f16_z_tied1, svfloat16_t, -+ z0 = svsub_n_f16_z (p0, z0, 1), -+ z0 = svsub_z (p0, z0, 1)) -+ -+/* -+** sub_1_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** fsub z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_f16_z_untied, svfloat16_t, -+ z0 = svsub_n_f16_z (p0, z1, 1), -+ z0 = svsub_z (p0, z1, 1)) -+ -+/* -+** sub_0p5_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fsub z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_0p5_f16_z_tied1, svfloat16_t, -+ z0 = svsub_n_f16_z (p0, z0, 0.5), -+ z0 = svsub_z (p0, z0, 0.5)) -+ -+/* -+** sub_0p5_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** fsub z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_0p5_f16_z_untied, svfloat16_t, -+ z0 = svsub_n_f16_z (p0, z1, 0.5), -+ z0 = svsub_z (p0, z1, 0.5)) -+ -+/* -+** sub_m1_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fadd z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m1_f16_z_tied1, svfloat16_t, -+ z0 = svsub_n_f16_z (p0, z0, -1), -+ z0 = svsub_z (p0, z0, -1)) -+ -+/* -+** sub_m1_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** fadd z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m1_f16_z_untied, svfloat16_t, -+ z0 = svsub_n_f16_z (p0, z1, -1), -+ z0 = svsub_z (p0, z1, -1)) -+ -+/* -+** sub_m0p5_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fadd z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m0p5_f16_z_tied1, svfloat16_t, -+ z0 = svsub_n_f16_z (p0, z0, -0.5), -+ z0 = svsub_z (p0, z0, -0.5)) -+ -+/* -+** sub_m0p5_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** fadd z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m0p5_f16_z_untied, svfloat16_t, -+ z0 = svsub_n_f16_z (p0, z1, -0.5), -+ z0 = svsub_z (p0, z1, -0.5)) -+ -+/* -+** sub_m2_f16_z: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** movprfx z0\.h, p0/z, z0\.h -+** fadd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m2_f16_z, svfloat16_t, -+ z0 = svsub_n_f16_z (p0, z0, -2), -+ z0 = svsub_z (p0, z0, -2)) -+ -+/* -+** sub_f16_x_tied1: -+** fsub z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f16_x_tied1, svfloat16_t, -+ z0 = svsub_f16_x (p0, z0, z1), -+ z0 = svsub_x (p0, z0, z1)) -+ -+/* -+** sub_f16_x_tied2: -+** fsubr z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f16_x_tied2, svfloat16_t, -+ z0 = svsub_f16_x (p0, z1, z0), -+ z0 = svsub_x (p0, z1, z0)) -+ -+/* -+** sub_f16_x_untied: -+** ( -+** movprfx z0, z1 -+** fsub z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0, z2 -+** fsubr z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f16_x_untied, svfloat16_t, -+ z0 = svsub_f16_x (p0, z1, z2), -+ z0 = svsub_x (p0, z1, z2)) -+ -+/* -+** sub_h4_f16_x_tied1: -+** mov (z[0-9]+\.h), h4 -+** fsub z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (sub_h4_f16_x_tied1, svfloat16_t, __fp16, -+ z0 = svsub_n_f16_x (p0, z0, d4), -+ z0 = svsub_x (p0, z0, d4)) -+ -+/* -+** sub_h4_f16_x_untied: { xfail *-*-* } -+** mov z0\.h, h4 -+** fsubr z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZD (sub_h4_f16_x_untied, svfloat16_t, __fp16, -+ z0 = svsub_n_f16_x (p0, z1, d4), -+ z0 = svsub_x (p0, z1, d4)) -+ -+/* -+** sub_1_f16_x_tied1: -+** fsub z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_f16_x_tied1, svfloat16_t, -+ z0 = svsub_n_f16_x (p0, z0, 1), -+ z0 = svsub_x (p0, z0, 1)) -+ -+/* -+** sub_1_f16_x_untied: -+** movprfx z0, z1 -+** fsub z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_f16_x_untied, svfloat16_t, -+ z0 = svsub_n_f16_x (p0, z1, 1), -+ z0 = svsub_x (p0, z1, 1)) -+ -+/* -+** sub_0p5_f16_x_tied1: -+** fsub z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_0p5_f16_x_tied1, svfloat16_t, -+ z0 = svsub_n_f16_x (p0, z0, 0.5), -+ z0 = svsub_x (p0, z0, 0.5)) -+ -+/* -+** sub_0p5_f16_x_untied: -+** movprfx z0, z1 -+** fsub z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_0p5_f16_x_untied, svfloat16_t, -+ z0 = svsub_n_f16_x (p0, z1, 0.5), -+ z0 = svsub_x (p0, z1, 0.5)) -+ -+/* -+** sub_m1_f16_x_tied1: -+** fadd z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m1_f16_x_tied1, svfloat16_t, -+ z0 = svsub_n_f16_x (p0, z0, -1), -+ z0 = svsub_x (p0, z0, -1)) -+ -+/* -+** sub_m1_f16_x_untied: -+** movprfx z0, z1 -+** fadd z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m1_f16_x_untied, svfloat16_t, -+ z0 = svsub_n_f16_x (p0, z1, -1), -+ z0 = svsub_x (p0, z1, -1)) -+ -+/* -+** sub_m0p5_f16_x_tied1: -+** fadd z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m0p5_f16_x_tied1, svfloat16_t, -+ z0 = svsub_n_f16_x (p0, z0, -0.5), -+ z0 = svsub_x (p0, z0, -0.5)) -+ -+/* -+** sub_m0p5_f16_x_untied: -+** movprfx z0, z1 -+** fadd z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m0p5_f16_x_untied, svfloat16_t, -+ z0 = svsub_n_f16_x (p0, z1, -0.5), -+ z0 = svsub_x (p0, z1, -0.5)) -+ -+/* -+** sub_2_f16_x_tied1: -+** fmov (z[0-9]+\.h), #-2\.0(?:e\+0)? -+** fadd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_2_f16_x_tied1, svfloat16_t, -+ z0 = svsub_n_f16_x (p0, z0, 2), -+ z0 = svsub_x (p0, z0, 2)) -+ -+/* -+** sub_2_f16_x_untied: -+** fmov z0\.h, #-2\.0(?:e\+0)? -+** fadd z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sub_2_f16_x_untied, svfloat16_t, -+ z0 = svsub_n_f16_x (p0, z1, 2), -+ z0 = svsub_x (p0, z1, 2)) -+ -+/* -+** ptrue_sub_f16_x_tied1: -+** fsub z0\.h, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_f16_x_tied1, svfloat16_t, -+ z0 = svsub_f16_x (svptrue_b16 (), z0, z1), -+ z0 = svsub_x (svptrue_b16 (), z0, z1)) -+ -+/* -+** ptrue_sub_f16_x_tied2: -+** fsub z0\.h, z1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_f16_x_tied2, svfloat16_t, -+ z0 = svsub_f16_x (svptrue_b16 (), z1, z0), -+ z0 = svsub_x (svptrue_b16 (), z1, z0)) -+ -+/* -+** ptrue_sub_f16_x_untied: -+** fsub z0\.h, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_f16_x_untied, svfloat16_t, -+ z0 = svsub_f16_x (svptrue_b16 (), z1, z2), -+ z0 = svsub_x (svptrue_b16 (), z1, z2)) -+ -+/* -+** ptrue_sub_1_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_1_f16_x_tied1, svfloat16_t, -+ z0 = svsub_n_f16_x (svptrue_b16 (), z0, 1), -+ z0 = svsub_x (svptrue_b16 (), z0, 1)) -+ -+/* -+** ptrue_sub_1_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_1_f16_x_untied, svfloat16_t, -+ z0 = svsub_n_f16_x (svptrue_b16 (), z1, 1), -+ z0 = svsub_x (svptrue_b16 (), z1, 1)) -+ -+/* -+** ptrue_sub_0p5_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_0p5_f16_x_tied1, svfloat16_t, -+ z0 = svsub_n_f16_x (svptrue_b16 (), z0, 0.5), -+ z0 = svsub_x (svptrue_b16 (), z0, 0.5)) -+ -+/* -+** ptrue_sub_0p5_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_0p5_f16_x_untied, svfloat16_t, -+ z0 = svsub_n_f16_x (svptrue_b16 (), z1, 0.5), -+ z0 = svsub_x (svptrue_b16 (), z1, 0.5)) -+ -+/* -+** ptrue_sub_m1_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_m1_f16_x_tied1, svfloat16_t, -+ z0 = svsub_n_f16_x (svptrue_b16 (), z0, -1), -+ z0 = svsub_x (svptrue_b16 (), z0, -1)) -+ -+/* -+** ptrue_sub_m1_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_m1_f16_x_untied, svfloat16_t, -+ z0 = svsub_n_f16_x (svptrue_b16 (), z1, -1), -+ z0 = svsub_x (svptrue_b16 (), z1, -1)) -+ -+/* -+** ptrue_sub_m0p5_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_m0p5_f16_x_tied1, svfloat16_t, -+ z0 = svsub_n_f16_x (svptrue_b16 (), z0, -0.5), -+ z0 = svsub_x (svptrue_b16 (), z0, -0.5)) -+ -+/* -+** ptrue_sub_m0p5_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_m0p5_f16_x_untied, svfloat16_t, -+ z0 = svsub_n_f16_x (svptrue_b16 (), z1, -0.5), -+ z0 = svsub_x (svptrue_b16 (), z1, -0.5)) -+ -+/* -+** ptrue_sub_2_f16_x_tied1: -+** fmov (z[0-9]+\.h), #-2\.0(?:e\+0)? -+** fadd z0\.h, (z0\.h, \1|\1, z0\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_2_f16_x_tied1, svfloat16_t, -+ z0 = svsub_n_f16_x (svptrue_b16 (), z0, 2), -+ z0 = svsub_x (svptrue_b16 (), z0, 2)) -+ -+/* -+** ptrue_sub_2_f16_x_untied: -+** fmov (z[0-9]+\.h), #-2\.0(?:e\+0)? -+** fadd z0\.h, (z1\.h, \1|\1, z1\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_2_f16_x_untied, svfloat16_t, -+ z0 = svsub_n_f16_x (svptrue_b16 (), z1, 2), -+ z0 = svsub_x (svptrue_b16 (), z1, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_f16_notrap.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_f16_notrap.c -new file mode 100644 -index 000000000..e45098944 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_f16_notrap.c -@@ -0,0 +1,572 @@ -+/* { dg-additional-options "-fno-trapping-math" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** sub_f16_m_tied1: -+** fsub z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f16_m_tied1, svfloat16_t, -+ z0 = svsub_f16_m (p0, z0, z1), -+ z0 = svsub_m (p0, z0, z1)) -+ -+/* -+** sub_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fsub z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f16_m_tied2, svfloat16_t, -+ z0 = svsub_f16_m (p0, z1, z0), -+ z0 = svsub_m (p0, z1, z0)) -+ -+/* -+** sub_f16_m_untied: -+** movprfx z0, z1 -+** fsub z0\.h, p0/m, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f16_m_untied, svfloat16_t, -+ z0 = svsub_f16_m (p0, z1, z2), -+ z0 = svsub_m (p0, z1, z2)) -+ -+/* -+** sub_h4_f16_m_tied1: -+** mov (z[0-9]+\.h), h4 -+** fsub z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (sub_h4_f16_m_tied1, svfloat16_t, __fp16, -+ z0 = svsub_n_f16_m (p0, z0, d4), -+ z0 = svsub_m (p0, z0, d4)) -+ -+/* -+** sub_h4_f16_m_untied: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0, z1 -+** fsub z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (sub_h4_f16_m_untied, svfloat16_t, __fp16, -+ z0 = svsub_n_f16_m (p0, z1, d4), -+ z0 = svsub_m (p0, z1, d4)) -+ -+/* -+** sub_1_f16_m_tied1: -+** fsub z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_f16_m_tied1, svfloat16_t, -+ z0 = svsub_n_f16_m (p0, z0, 1), -+ z0 = svsub_m (p0, z0, 1)) -+ -+/* -+** sub_1_f16_m_untied: -+** movprfx z0, z1 -+** fsub z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_f16_m_untied, svfloat16_t, -+ z0 = svsub_n_f16_m (p0, z1, 1), -+ z0 = svsub_m (p0, z1, 1)) -+ -+/* -+** sub_0p5_f16_m_tied1: -+** fsub z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_0p5_f16_m_tied1, svfloat16_t, -+ z0 = svsub_n_f16_m (p0, z0, 0.5), -+ z0 = svsub_m (p0, z0, 0.5)) -+ -+/* -+** sub_0p5_f16_m_untied: -+** movprfx z0, z1 -+** fsub z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_0p5_f16_m_untied, svfloat16_t, -+ z0 = svsub_n_f16_m (p0, z1, 0.5), -+ z0 = svsub_m (p0, z1, 0.5)) -+ -+/* -+** sub_m1_f16_m_tied1: -+** fadd z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m1_f16_m_tied1, svfloat16_t, -+ z0 = svsub_n_f16_m (p0, z0, -1), -+ z0 = svsub_m (p0, z0, -1)) -+ -+/* -+** sub_m1_f16_m_untied: -+** movprfx z0, z1 -+** fadd z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m1_f16_m_untied, svfloat16_t, -+ z0 = svsub_n_f16_m (p0, z1, -1), -+ z0 = svsub_m (p0, z1, -1)) -+ -+/* -+** sub_m0p5_f16_m_tied1: -+** fadd z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m0p5_f16_m_tied1, svfloat16_t, -+ z0 = svsub_n_f16_m (p0, z0, -0.5), -+ z0 = svsub_m (p0, z0, -0.5)) -+ -+/* -+** sub_m0p5_f16_m_untied: -+** movprfx z0, z1 -+** fadd z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m0p5_f16_m_untied, svfloat16_t, -+ z0 = svsub_n_f16_m (p0, z1, -0.5), -+ z0 = svsub_m (p0, z1, -0.5)) -+ -+/* -+** sub_m2_f16_m: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** fadd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m2_f16_m, svfloat16_t, -+ z0 = svsub_n_f16_m (p0, z0, -2), -+ z0 = svsub_m (p0, z0, -2)) -+ -+/* -+** sub_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fsub z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f16_z_tied1, svfloat16_t, -+ z0 = svsub_f16_z (p0, z0, z1), -+ z0 = svsub_z (p0, z0, z1)) -+ -+/* -+** sub_f16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** fsubr z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f16_z_tied2, svfloat16_t, -+ z0 = svsub_f16_z (p0, z1, z0), -+ z0 = svsub_z (p0, z1, z0)) -+ -+/* -+** sub_f16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fsub z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** fsubr z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f16_z_untied, svfloat16_t, -+ z0 = svsub_f16_z (p0, z1, z2), -+ z0 = svsub_z (p0, z1, z2)) -+ -+/* -+** sub_h4_f16_z_tied1: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0\.h, p0/z, z0\.h -+** fsub z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (sub_h4_f16_z_tied1, svfloat16_t, __fp16, -+ z0 = svsub_n_f16_z (p0, z0, d4), -+ z0 = svsub_z (p0, z0, d4)) -+ -+/* -+** sub_h4_f16_z_untied: -+** mov (z[0-9]+\.h), h4 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fsub z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** fsubr z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (sub_h4_f16_z_untied, svfloat16_t, __fp16, -+ z0 = svsub_n_f16_z (p0, z1, d4), -+ z0 = svsub_z (p0, z1, d4)) -+ -+/* -+** sub_1_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fsub z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_f16_z_tied1, svfloat16_t, -+ z0 = svsub_n_f16_z (p0, z0, 1), -+ z0 = svsub_z (p0, z0, 1)) -+ -+/* -+** sub_1_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** fsub z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_f16_z_untied, svfloat16_t, -+ z0 = svsub_n_f16_z (p0, z1, 1), -+ z0 = svsub_z (p0, z1, 1)) -+ -+/* -+** sub_0p5_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fsub z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_0p5_f16_z_tied1, svfloat16_t, -+ z0 = svsub_n_f16_z (p0, z0, 0.5), -+ z0 = svsub_z (p0, z0, 0.5)) -+ -+/* -+** sub_0p5_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** fsub z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_0p5_f16_z_untied, svfloat16_t, -+ z0 = svsub_n_f16_z (p0, z1, 0.5), -+ z0 = svsub_z (p0, z1, 0.5)) -+ -+/* -+** sub_m1_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fadd z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m1_f16_z_tied1, svfloat16_t, -+ z0 = svsub_n_f16_z (p0, z0, -1), -+ z0 = svsub_z (p0, z0, -1)) -+ -+/* -+** sub_m1_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** fadd z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m1_f16_z_untied, svfloat16_t, -+ z0 = svsub_n_f16_z (p0, z1, -1), -+ z0 = svsub_z (p0, z1, -1)) -+ -+/* -+** sub_m0p5_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fadd z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m0p5_f16_z_tied1, svfloat16_t, -+ z0 = svsub_n_f16_z (p0, z0, -0.5), -+ z0 = svsub_z (p0, z0, -0.5)) -+ -+/* -+** sub_m0p5_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** fadd z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m0p5_f16_z_untied, svfloat16_t, -+ z0 = svsub_n_f16_z (p0, z1, -0.5), -+ z0 = svsub_z (p0, z1, -0.5)) -+ -+/* -+** sub_m2_f16_z: -+** fmov (z[0-9]+\.h), #2\.0(?:e\+0)? -+** movprfx z0\.h, p0/z, z0\.h -+** fadd z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m2_f16_z, svfloat16_t, -+ z0 = svsub_n_f16_z (p0, z0, -2), -+ z0 = svsub_z (p0, z0, -2)) -+ -+/* -+** sub_f16_x_tied1: -+** fsub z0\.h, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f16_x_tied1, svfloat16_t, -+ z0 = svsub_f16_x (p0, z0, z1), -+ z0 = svsub_x (p0, z0, z1)) -+ -+/* -+** sub_f16_x_tied2: -+** fsub z0\.h, z1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f16_x_tied2, svfloat16_t, -+ z0 = svsub_f16_x (p0, z1, z0), -+ z0 = svsub_x (p0, z1, z0)) -+ -+/* -+** sub_f16_x_untied: -+** fsub z0\.h, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f16_x_untied, svfloat16_t, -+ z0 = svsub_f16_x (p0, z1, z2), -+ z0 = svsub_x (p0, z1, z2)) -+ -+/* -+** sub_h4_f16_x_tied1: -+** mov (z[0-9]+\.h), h4 -+** fsub z0\.h, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (sub_h4_f16_x_tied1, svfloat16_t, __fp16, -+ z0 = svsub_n_f16_x (p0, z0, d4), -+ z0 = svsub_x (p0, z0, d4)) -+ -+/* -+** sub_h4_f16_x_untied: -+** mov (z[0-9]+\.h), h4 -+** fsub z0\.h, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (sub_h4_f16_x_untied, svfloat16_t, __fp16, -+ z0 = svsub_n_f16_x (p0, z1, d4), -+ z0 = svsub_x (p0, z1, d4)) -+ -+/* -+** sub_1_f16_x_tied1: -+** fsub z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_f16_x_tied1, svfloat16_t, -+ z0 = svsub_n_f16_x (p0, z0, 1), -+ z0 = svsub_x (p0, z0, 1)) -+ -+/* -+** sub_1_f16_x_untied: -+** movprfx z0, z1 -+** fsub z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_f16_x_untied, svfloat16_t, -+ z0 = svsub_n_f16_x (p0, z1, 1), -+ z0 = svsub_x (p0, z1, 1)) -+ -+/* -+** sub_0p5_f16_x_tied1: -+** fsub z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_0p5_f16_x_tied1, svfloat16_t, -+ z0 = svsub_n_f16_x (p0, z0, 0.5), -+ z0 = svsub_x (p0, z0, 0.5)) -+ -+/* -+** sub_0p5_f16_x_untied: -+** movprfx z0, z1 -+** fsub z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_0p5_f16_x_untied, svfloat16_t, -+ z0 = svsub_n_f16_x (p0, z1, 0.5), -+ z0 = svsub_x (p0, z1, 0.5)) -+ -+/* -+** sub_m1_f16_x_tied1: -+** fadd z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m1_f16_x_tied1, svfloat16_t, -+ z0 = svsub_n_f16_x (p0, z0, -1), -+ z0 = svsub_x (p0, z0, -1)) -+ -+/* -+** sub_m1_f16_x_untied: -+** movprfx z0, z1 -+** fadd z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m1_f16_x_untied, svfloat16_t, -+ z0 = svsub_n_f16_x (p0, z1, -1), -+ z0 = svsub_x (p0, z1, -1)) -+ -+/* -+** sub_m0p5_f16_x_tied1: -+** fadd z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m0p5_f16_x_tied1, svfloat16_t, -+ z0 = svsub_n_f16_x (p0, z0, -0.5), -+ z0 = svsub_x (p0, z0, -0.5)) -+ -+/* -+** sub_m0p5_f16_x_untied: -+** movprfx z0, z1 -+** fadd z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m0p5_f16_x_untied, svfloat16_t, -+ z0 = svsub_n_f16_x (p0, z1, -0.5), -+ z0 = svsub_x (p0, z1, -0.5)) -+ -+/* -+** sub_2_f16_x_tied1: -+** fmov (z[0-9]+\.h), #-2\.0(?:e\+0)? -+** fadd z0\.h, (z0\.h, \1|\1, z0\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_2_f16_x_tied1, svfloat16_t, -+ z0 = svsub_n_f16_x (p0, z0, 2), -+ z0 = svsub_x (p0, z0, 2)) -+ -+/* -+** sub_2_f16_x_untied: -+** fmov (z[0-9]+\.h), #-2\.0(?:e\+0)? -+** fadd z0\.h, (z1\.h, \1|\1, z1\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_2_f16_x_untied, svfloat16_t, -+ z0 = svsub_n_f16_x (p0, z1, 2), -+ z0 = svsub_x (p0, z1, 2)) -+ -+/* -+** ptrue_sub_f16_x_tied1: -+** fsub z0\.h, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_f16_x_tied1, svfloat16_t, -+ z0 = svsub_f16_x (svptrue_b16 (), z0, z1), -+ z0 = svsub_x (svptrue_b16 (), z0, z1)) -+ -+/* -+** ptrue_sub_f16_x_tied2: -+** fsub z0\.h, z1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_f16_x_tied2, svfloat16_t, -+ z0 = svsub_f16_x (svptrue_b16 (), z1, z0), -+ z0 = svsub_x (svptrue_b16 (), z1, z0)) -+ -+/* -+** ptrue_sub_f16_x_untied: -+** fsub z0\.h, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_f16_x_untied, svfloat16_t, -+ z0 = svsub_f16_x (svptrue_b16 (), z1, z2), -+ z0 = svsub_x (svptrue_b16 (), z1, z2)) -+ -+/* -+** ptrue_sub_1_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_1_f16_x_tied1, svfloat16_t, -+ z0 = svsub_n_f16_x (svptrue_b16 (), z0, 1), -+ z0 = svsub_x (svptrue_b16 (), z0, 1)) -+ -+/* -+** ptrue_sub_1_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_1_f16_x_untied, svfloat16_t, -+ z0 = svsub_n_f16_x (svptrue_b16 (), z1, 1), -+ z0 = svsub_x (svptrue_b16 (), z1, 1)) -+ -+/* -+** ptrue_sub_0p5_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_0p5_f16_x_tied1, svfloat16_t, -+ z0 = svsub_n_f16_x (svptrue_b16 (), z0, 0.5), -+ z0 = svsub_x (svptrue_b16 (), z0, 0.5)) -+ -+/* -+** ptrue_sub_0p5_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_0p5_f16_x_untied, svfloat16_t, -+ z0 = svsub_n_f16_x (svptrue_b16 (), z1, 0.5), -+ z0 = svsub_x (svptrue_b16 (), z1, 0.5)) -+ -+/* -+** ptrue_sub_m1_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_m1_f16_x_tied1, svfloat16_t, -+ z0 = svsub_n_f16_x (svptrue_b16 (), z0, -1), -+ z0 = svsub_x (svptrue_b16 (), z0, -1)) -+ -+/* -+** ptrue_sub_m1_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_m1_f16_x_untied, svfloat16_t, -+ z0 = svsub_n_f16_x (svptrue_b16 (), z1, -1), -+ z0 = svsub_x (svptrue_b16 (), z1, -1)) -+ -+/* -+** ptrue_sub_m0p5_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_m0p5_f16_x_tied1, svfloat16_t, -+ z0 = svsub_n_f16_x (svptrue_b16 (), z0, -0.5), -+ z0 = svsub_x (svptrue_b16 (), z0, -0.5)) -+ -+/* -+** ptrue_sub_m0p5_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_m0p5_f16_x_untied, svfloat16_t, -+ z0 = svsub_n_f16_x (svptrue_b16 (), z1, -0.5), -+ z0 = svsub_x (svptrue_b16 (), z1, -0.5)) -+ -+/* -+** ptrue_sub_2_f16_x_tied1: -+** fmov (z[0-9]+\.h), #-2\.0(?:e\+0)? -+** fadd z0\.h, (z0\.h, \1|\1, z0\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_2_f16_x_tied1, svfloat16_t, -+ z0 = svsub_n_f16_x (svptrue_b16 (), z0, 2), -+ z0 = svsub_x (svptrue_b16 (), z0, 2)) -+ -+/* -+** ptrue_sub_2_f16_x_untied: -+** fmov (z[0-9]+\.h), #-2\.0(?:e\+0)? -+** fadd z0\.h, (z1\.h, \1|\1, z1\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_2_f16_x_untied, svfloat16_t, -+ z0 = svsub_n_f16_x (svptrue_b16 (), z1, 2), -+ z0 = svsub_x (svptrue_b16 (), z1, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_f32.c -new file mode 100644 -index 000000000..05be52bad ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_f32.c -@@ -0,0 +1,577 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** sub_f32_m_tied1: -+** fsub z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f32_m_tied1, svfloat32_t, -+ z0 = svsub_f32_m (p0, z0, z1), -+ z0 = svsub_m (p0, z0, z1)) -+ -+/* -+** sub_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fsub z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f32_m_tied2, svfloat32_t, -+ z0 = svsub_f32_m (p0, z1, z0), -+ z0 = svsub_m (p0, z1, z0)) -+ -+/* -+** sub_f32_m_untied: -+** movprfx z0, z1 -+** fsub z0\.s, p0/m, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f32_m_untied, svfloat32_t, -+ z0 = svsub_f32_m (p0, z1, z2), -+ z0 = svsub_m (p0, z1, z2)) -+ -+/* -+** sub_s4_f32_m_tied1: -+** mov (z[0-9]+\.s), s4 -+** fsub z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (sub_s4_f32_m_tied1, svfloat32_t, float, -+ z0 = svsub_n_f32_m (p0, z0, d4), -+ z0 = svsub_m (p0, z0, d4)) -+ -+/* -+** sub_s4_f32_m_untied: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0, z1 -+** fsub z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (sub_s4_f32_m_untied, svfloat32_t, float, -+ z0 = svsub_n_f32_m (p0, z1, d4), -+ z0 = svsub_m (p0, z1, d4)) -+ -+/* -+** sub_1_f32_m_tied1: -+** fsub z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_f32_m_tied1, svfloat32_t, -+ z0 = svsub_n_f32_m (p0, z0, 1), -+ z0 = svsub_m (p0, z0, 1)) -+ -+/* -+** sub_1_f32_m_untied: -+** movprfx z0, z1 -+** fsub z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_f32_m_untied, svfloat32_t, -+ z0 = svsub_n_f32_m (p0, z1, 1), -+ z0 = svsub_m (p0, z1, 1)) -+ -+/* -+** sub_0p5_f32_m_tied1: -+** fsub z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_0p5_f32_m_tied1, svfloat32_t, -+ z0 = svsub_n_f32_m (p0, z0, 0.5), -+ z0 = svsub_m (p0, z0, 0.5)) -+ -+/* -+** sub_0p5_f32_m_untied: -+** movprfx z0, z1 -+** fsub z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_0p5_f32_m_untied, svfloat32_t, -+ z0 = svsub_n_f32_m (p0, z1, 0.5), -+ z0 = svsub_m (p0, z1, 0.5)) -+ -+/* -+** sub_m1_f32_m_tied1: -+** fadd z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m1_f32_m_tied1, svfloat32_t, -+ z0 = svsub_n_f32_m (p0, z0, -1), -+ z0 = svsub_m (p0, z0, -1)) -+ -+/* -+** sub_m1_f32_m_untied: -+** movprfx z0, z1 -+** fadd z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m1_f32_m_untied, svfloat32_t, -+ z0 = svsub_n_f32_m (p0, z1, -1), -+ z0 = svsub_m (p0, z1, -1)) -+ -+/* -+** sub_m0p5_f32_m_tied1: -+** fadd z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m0p5_f32_m_tied1, svfloat32_t, -+ z0 = svsub_n_f32_m (p0, z0, -0.5), -+ z0 = svsub_m (p0, z0, -0.5)) -+ -+/* -+** sub_m0p5_f32_m_untied: -+** movprfx z0, z1 -+** fadd z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m0p5_f32_m_untied, svfloat32_t, -+ z0 = svsub_n_f32_m (p0, z1, -0.5), -+ z0 = svsub_m (p0, z1, -0.5)) -+ -+/* -+** sub_m2_f32_m: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** fadd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m2_f32_m, svfloat32_t, -+ z0 = svsub_n_f32_m (p0, z0, -2), -+ z0 = svsub_m (p0, z0, -2)) -+ -+/* -+** sub_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fsub z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f32_z_tied1, svfloat32_t, -+ z0 = svsub_f32_z (p0, z0, z1), -+ z0 = svsub_z (p0, z0, z1)) -+ -+/* -+** sub_f32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** fsubr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f32_z_tied2, svfloat32_t, -+ z0 = svsub_f32_z (p0, z1, z0), -+ z0 = svsub_z (p0, z1, z0)) -+ -+/* -+** sub_f32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fsub z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** fsubr z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f32_z_untied, svfloat32_t, -+ z0 = svsub_f32_z (p0, z1, z2), -+ z0 = svsub_z (p0, z1, z2)) -+ -+/* -+** sub_s4_f32_z_tied1: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0\.s, p0/z, z0\.s -+** fsub z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (sub_s4_f32_z_tied1, svfloat32_t, float, -+ z0 = svsub_n_f32_z (p0, z0, d4), -+ z0 = svsub_z (p0, z0, d4)) -+ -+/* -+** sub_s4_f32_z_untied: -+** mov (z[0-9]+\.s), s4 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fsub z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** fsubr z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (sub_s4_f32_z_untied, svfloat32_t, float, -+ z0 = svsub_n_f32_z (p0, z1, d4), -+ z0 = svsub_z (p0, z1, d4)) -+ -+/* -+** sub_1_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fsub z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_f32_z_tied1, svfloat32_t, -+ z0 = svsub_n_f32_z (p0, z0, 1), -+ z0 = svsub_z (p0, z0, 1)) -+ -+/* -+** sub_1_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** fsub z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_f32_z_untied, svfloat32_t, -+ z0 = svsub_n_f32_z (p0, z1, 1), -+ z0 = svsub_z (p0, z1, 1)) -+ -+/* -+** sub_0p5_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fsub z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_0p5_f32_z_tied1, svfloat32_t, -+ z0 = svsub_n_f32_z (p0, z0, 0.5), -+ z0 = svsub_z (p0, z0, 0.5)) -+ -+/* -+** sub_0p5_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** fsub z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_0p5_f32_z_untied, svfloat32_t, -+ z0 = svsub_n_f32_z (p0, z1, 0.5), -+ z0 = svsub_z (p0, z1, 0.5)) -+ -+/* -+** sub_m1_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fadd z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m1_f32_z_tied1, svfloat32_t, -+ z0 = svsub_n_f32_z (p0, z0, -1), -+ z0 = svsub_z (p0, z0, -1)) -+ -+/* -+** sub_m1_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** fadd z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m1_f32_z_untied, svfloat32_t, -+ z0 = svsub_n_f32_z (p0, z1, -1), -+ z0 = svsub_z (p0, z1, -1)) -+ -+/* -+** sub_m0p5_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fadd z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m0p5_f32_z_tied1, svfloat32_t, -+ z0 = svsub_n_f32_z (p0, z0, -0.5), -+ z0 = svsub_z (p0, z0, -0.5)) -+ -+/* -+** sub_m0p5_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** fadd z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m0p5_f32_z_untied, svfloat32_t, -+ z0 = svsub_n_f32_z (p0, z1, -0.5), -+ z0 = svsub_z (p0, z1, -0.5)) -+ -+/* -+** sub_m2_f32_z: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** movprfx z0\.s, p0/z, z0\.s -+** fadd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m2_f32_z, svfloat32_t, -+ z0 = svsub_n_f32_z (p0, z0, -2), -+ z0 = svsub_z (p0, z0, -2)) -+ -+/* -+** sub_f32_x_tied1: -+** fsub z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f32_x_tied1, svfloat32_t, -+ z0 = svsub_f32_x (p0, z0, z1), -+ z0 = svsub_x (p0, z0, z1)) -+ -+/* -+** sub_f32_x_tied2: -+** fsubr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f32_x_tied2, svfloat32_t, -+ z0 = svsub_f32_x (p0, z1, z0), -+ z0 = svsub_x (p0, z1, z0)) -+ -+/* -+** sub_f32_x_untied: -+** ( -+** movprfx z0, z1 -+** fsub z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0, z2 -+** fsubr z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f32_x_untied, svfloat32_t, -+ z0 = svsub_f32_x (p0, z1, z2), -+ z0 = svsub_x (p0, z1, z2)) -+ -+/* -+** sub_s4_f32_x_tied1: -+** mov (z[0-9]+\.s), s4 -+** fsub z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (sub_s4_f32_x_tied1, svfloat32_t, float, -+ z0 = svsub_n_f32_x (p0, z0, d4), -+ z0 = svsub_x (p0, z0, d4)) -+ -+/* -+** sub_s4_f32_x_untied: { xfail *-*-* } -+** mov z0\.s, s4 -+** fsubr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZD (sub_s4_f32_x_untied, svfloat32_t, float, -+ z0 = svsub_n_f32_x (p0, z1, d4), -+ z0 = svsub_x (p0, z1, d4)) -+ -+/* -+** sub_1_f32_x_tied1: -+** fsub z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_f32_x_tied1, svfloat32_t, -+ z0 = svsub_n_f32_x (p0, z0, 1), -+ z0 = svsub_x (p0, z0, 1)) -+ -+/* -+** sub_1_f32_x_untied: -+** movprfx z0, z1 -+** fsub z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_f32_x_untied, svfloat32_t, -+ z0 = svsub_n_f32_x (p0, z1, 1), -+ z0 = svsub_x (p0, z1, 1)) -+ -+/* -+** sub_0p5_f32_x_tied1: -+** fsub z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_0p5_f32_x_tied1, svfloat32_t, -+ z0 = svsub_n_f32_x (p0, z0, 0.5), -+ z0 = svsub_x (p0, z0, 0.5)) -+ -+/* -+** sub_0p5_f32_x_untied: -+** movprfx z0, z1 -+** fsub z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_0p5_f32_x_untied, svfloat32_t, -+ z0 = svsub_n_f32_x (p0, z1, 0.5), -+ z0 = svsub_x (p0, z1, 0.5)) -+ -+/* -+** sub_m1_f32_x_tied1: -+** fadd z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m1_f32_x_tied1, svfloat32_t, -+ z0 = svsub_n_f32_x (p0, z0, -1), -+ z0 = svsub_x (p0, z0, -1)) -+ -+/* -+** sub_m1_f32_x_untied: -+** movprfx z0, z1 -+** fadd z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m1_f32_x_untied, svfloat32_t, -+ z0 = svsub_n_f32_x (p0, z1, -1), -+ z0 = svsub_x (p0, z1, -1)) -+ -+/* -+** sub_m0p5_f32_x_tied1: -+** fadd z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m0p5_f32_x_tied1, svfloat32_t, -+ z0 = svsub_n_f32_x (p0, z0, -0.5), -+ z0 = svsub_x (p0, z0, -0.5)) -+ -+/* -+** sub_m0p5_f32_x_untied: -+** movprfx z0, z1 -+** fadd z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m0p5_f32_x_untied, svfloat32_t, -+ z0 = svsub_n_f32_x (p0, z1, -0.5), -+ z0 = svsub_x (p0, z1, -0.5)) -+ -+/* -+** sub_2_f32_x_tied1: -+** fmov (z[0-9]+\.s), #-2\.0(?:e\+0)? -+** fadd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_2_f32_x_tied1, svfloat32_t, -+ z0 = svsub_n_f32_x (p0, z0, 2), -+ z0 = svsub_x (p0, z0, 2)) -+ -+/* -+** sub_2_f32_x_untied: -+** fmov z0\.s, #-2\.0(?:e\+0)? -+** fadd z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sub_2_f32_x_untied, svfloat32_t, -+ z0 = svsub_n_f32_x (p0, z1, 2), -+ z0 = svsub_x (p0, z1, 2)) -+ -+/* -+** ptrue_sub_f32_x_tied1: -+** fsub z0\.s, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_f32_x_tied1, svfloat32_t, -+ z0 = svsub_f32_x (svptrue_b32 (), z0, z1), -+ z0 = svsub_x (svptrue_b32 (), z0, z1)) -+ -+/* -+** ptrue_sub_f32_x_tied2: -+** fsub z0\.s, z1\.s, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_f32_x_tied2, svfloat32_t, -+ z0 = svsub_f32_x (svptrue_b32 (), z1, z0), -+ z0 = svsub_x (svptrue_b32 (), z1, z0)) -+ -+/* -+** ptrue_sub_f32_x_untied: -+** fsub z0\.s, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_f32_x_untied, svfloat32_t, -+ z0 = svsub_f32_x (svptrue_b32 (), z1, z2), -+ z0 = svsub_x (svptrue_b32 (), z1, z2)) -+ -+/* -+** ptrue_sub_1_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_1_f32_x_tied1, svfloat32_t, -+ z0 = svsub_n_f32_x (svptrue_b32 (), z0, 1), -+ z0 = svsub_x (svptrue_b32 (), z0, 1)) -+ -+/* -+** ptrue_sub_1_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_1_f32_x_untied, svfloat32_t, -+ z0 = svsub_n_f32_x (svptrue_b32 (), z1, 1), -+ z0 = svsub_x (svptrue_b32 (), z1, 1)) -+ -+/* -+** ptrue_sub_0p5_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_0p5_f32_x_tied1, svfloat32_t, -+ z0 = svsub_n_f32_x (svptrue_b32 (), z0, 0.5), -+ z0 = svsub_x (svptrue_b32 (), z0, 0.5)) -+ -+/* -+** ptrue_sub_0p5_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_0p5_f32_x_untied, svfloat32_t, -+ z0 = svsub_n_f32_x (svptrue_b32 (), z1, 0.5), -+ z0 = svsub_x (svptrue_b32 (), z1, 0.5)) -+ -+/* -+** ptrue_sub_m1_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_m1_f32_x_tied1, svfloat32_t, -+ z0 = svsub_n_f32_x (svptrue_b32 (), z0, -1), -+ z0 = svsub_x (svptrue_b32 (), z0, -1)) -+ -+/* -+** ptrue_sub_m1_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_m1_f32_x_untied, svfloat32_t, -+ z0 = svsub_n_f32_x (svptrue_b32 (), z1, -1), -+ z0 = svsub_x (svptrue_b32 (), z1, -1)) -+ -+/* -+** ptrue_sub_m0p5_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_m0p5_f32_x_tied1, svfloat32_t, -+ z0 = svsub_n_f32_x (svptrue_b32 (), z0, -0.5), -+ z0 = svsub_x (svptrue_b32 (), z0, -0.5)) -+ -+/* -+** ptrue_sub_m0p5_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_m0p5_f32_x_untied, svfloat32_t, -+ z0 = svsub_n_f32_x (svptrue_b32 (), z1, -0.5), -+ z0 = svsub_x (svptrue_b32 (), z1, -0.5)) -+ -+/* -+** ptrue_sub_2_f32_x_tied1: -+** fmov (z[0-9]+\.s), #-2\.0(?:e\+0)? -+** fadd z0\.s, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_2_f32_x_tied1, svfloat32_t, -+ z0 = svsub_n_f32_x (svptrue_b32 (), z0, 2), -+ z0 = svsub_x (svptrue_b32 (), z0, 2)) -+ -+/* -+** ptrue_sub_2_f32_x_untied: -+** fmov (z[0-9]+\.s), #-2\.0(?:e\+0)? -+** fadd z0\.s, (z1\.s, \1|\1, z1\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_2_f32_x_untied, svfloat32_t, -+ z0 = svsub_n_f32_x (svptrue_b32 (), z1, 2), -+ z0 = svsub_x (svptrue_b32 (), z1, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_f32_notrap.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_f32_notrap.c -new file mode 100644 -index 000000000..eb79a253a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_f32_notrap.c -@@ -0,0 +1,572 @@ -+/* { dg-additional-options "-fno-trapping-math" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** sub_f32_m_tied1: -+** fsub z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f32_m_tied1, svfloat32_t, -+ z0 = svsub_f32_m (p0, z0, z1), -+ z0 = svsub_m (p0, z0, z1)) -+ -+/* -+** sub_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fsub z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f32_m_tied2, svfloat32_t, -+ z0 = svsub_f32_m (p0, z1, z0), -+ z0 = svsub_m (p0, z1, z0)) -+ -+/* -+** sub_f32_m_untied: -+** movprfx z0, z1 -+** fsub z0\.s, p0/m, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f32_m_untied, svfloat32_t, -+ z0 = svsub_f32_m (p0, z1, z2), -+ z0 = svsub_m (p0, z1, z2)) -+ -+/* -+** sub_s4_f32_m_tied1: -+** mov (z[0-9]+\.s), s4 -+** fsub z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (sub_s4_f32_m_tied1, svfloat32_t, float, -+ z0 = svsub_n_f32_m (p0, z0, d4), -+ z0 = svsub_m (p0, z0, d4)) -+ -+/* -+** sub_s4_f32_m_untied: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0, z1 -+** fsub z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (sub_s4_f32_m_untied, svfloat32_t, float, -+ z0 = svsub_n_f32_m (p0, z1, d4), -+ z0 = svsub_m (p0, z1, d4)) -+ -+/* -+** sub_1_f32_m_tied1: -+** fsub z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_f32_m_tied1, svfloat32_t, -+ z0 = svsub_n_f32_m (p0, z0, 1), -+ z0 = svsub_m (p0, z0, 1)) -+ -+/* -+** sub_1_f32_m_untied: -+** movprfx z0, z1 -+** fsub z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_f32_m_untied, svfloat32_t, -+ z0 = svsub_n_f32_m (p0, z1, 1), -+ z0 = svsub_m (p0, z1, 1)) -+ -+/* -+** sub_0p5_f32_m_tied1: -+** fsub z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_0p5_f32_m_tied1, svfloat32_t, -+ z0 = svsub_n_f32_m (p0, z0, 0.5), -+ z0 = svsub_m (p0, z0, 0.5)) -+ -+/* -+** sub_0p5_f32_m_untied: -+** movprfx z0, z1 -+** fsub z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_0p5_f32_m_untied, svfloat32_t, -+ z0 = svsub_n_f32_m (p0, z1, 0.5), -+ z0 = svsub_m (p0, z1, 0.5)) -+ -+/* -+** sub_m1_f32_m_tied1: -+** fadd z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m1_f32_m_tied1, svfloat32_t, -+ z0 = svsub_n_f32_m (p0, z0, -1), -+ z0 = svsub_m (p0, z0, -1)) -+ -+/* -+** sub_m1_f32_m_untied: -+** movprfx z0, z1 -+** fadd z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m1_f32_m_untied, svfloat32_t, -+ z0 = svsub_n_f32_m (p0, z1, -1), -+ z0 = svsub_m (p0, z1, -1)) -+ -+/* -+** sub_m0p5_f32_m_tied1: -+** fadd z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m0p5_f32_m_tied1, svfloat32_t, -+ z0 = svsub_n_f32_m (p0, z0, -0.5), -+ z0 = svsub_m (p0, z0, -0.5)) -+ -+/* -+** sub_m0p5_f32_m_untied: -+** movprfx z0, z1 -+** fadd z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m0p5_f32_m_untied, svfloat32_t, -+ z0 = svsub_n_f32_m (p0, z1, -0.5), -+ z0 = svsub_m (p0, z1, -0.5)) -+ -+/* -+** sub_m2_f32_m: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** fadd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m2_f32_m, svfloat32_t, -+ z0 = svsub_n_f32_m (p0, z0, -2), -+ z0 = svsub_m (p0, z0, -2)) -+ -+/* -+** sub_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fsub z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f32_z_tied1, svfloat32_t, -+ z0 = svsub_f32_z (p0, z0, z1), -+ z0 = svsub_z (p0, z0, z1)) -+ -+/* -+** sub_f32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** fsubr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f32_z_tied2, svfloat32_t, -+ z0 = svsub_f32_z (p0, z1, z0), -+ z0 = svsub_z (p0, z1, z0)) -+ -+/* -+** sub_f32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fsub z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** fsubr z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f32_z_untied, svfloat32_t, -+ z0 = svsub_f32_z (p0, z1, z2), -+ z0 = svsub_z (p0, z1, z2)) -+ -+/* -+** sub_s4_f32_z_tied1: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0\.s, p0/z, z0\.s -+** fsub z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (sub_s4_f32_z_tied1, svfloat32_t, float, -+ z0 = svsub_n_f32_z (p0, z0, d4), -+ z0 = svsub_z (p0, z0, d4)) -+ -+/* -+** sub_s4_f32_z_untied: -+** mov (z[0-9]+\.s), s4 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fsub z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** fsubr z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (sub_s4_f32_z_untied, svfloat32_t, float, -+ z0 = svsub_n_f32_z (p0, z1, d4), -+ z0 = svsub_z (p0, z1, d4)) -+ -+/* -+** sub_1_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fsub z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_f32_z_tied1, svfloat32_t, -+ z0 = svsub_n_f32_z (p0, z0, 1), -+ z0 = svsub_z (p0, z0, 1)) -+ -+/* -+** sub_1_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** fsub z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_f32_z_untied, svfloat32_t, -+ z0 = svsub_n_f32_z (p0, z1, 1), -+ z0 = svsub_z (p0, z1, 1)) -+ -+/* -+** sub_0p5_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fsub z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_0p5_f32_z_tied1, svfloat32_t, -+ z0 = svsub_n_f32_z (p0, z0, 0.5), -+ z0 = svsub_z (p0, z0, 0.5)) -+ -+/* -+** sub_0p5_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** fsub z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_0p5_f32_z_untied, svfloat32_t, -+ z0 = svsub_n_f32_z (p0, z1, 0.5), -+ z0 = svsub_z (p0, z1, 0.5)) -+ -+/* -+** sub_m1_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fadd z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m1_f32_z_tied1, svfloat32_t, -+ z0 = svsub_n_f32_z (p0, z0, -1), -+ z0 = svsub_z (p0, z0, -1)) -+ -+/* -+** sub_m1_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** fadd z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m1_f32_z_untied, svfloat32_t, -+ z0 = svsub_n_f32_z (p0, z1, -1), -+ z0 = svsub_z (p0, z1, -1)) -+ -+/* -+** sub_m0p5_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fadd z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m0p5_f32_z_tied1, svfloat32_t, -+ z0 = svsub_n_f32_z (p0, z0, -0.5), -+ z0 = svsub_z (p0, z0, -0.5)) -+ -+/* -+** sub_m0p5_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** fadd z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m0p5_f32_z_untied, svfloat32_t, -+ z0 = svsub_n_f32_z (p0, z1, -0.5), -+ z0 = svsub_z (p0, z1, -0.5)) -+ -+/* -+** sub_m2_f32_z: -+** fmov (z[0-9]+\.s), #2\.0(?:e\+0)? -+** movprfx z0\.s, p0/z, z0\.s -+** fadd z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m2_f32_z, svfloat32_t, -+ z0 = svsub_n_f32_z (p0, z0, -2), -+ z0 = svsub_z (p0, z0, -2)) -+ -+/* -+** sub_f32_x_tied1: -+** fsub z0\.s, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f32_x_tied1, svfloat32_t, -+ z0 = svsub_f32_x (p0, z0, z1), -+ z0 = svsub_x (p0, z0, z1)) -+ -+/* -+** sub_f32_x_tied2: -+** fsub z0\.s, z1\.s, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f32_x_tied2, svfloat32_t, -+ z0 = svsub_f32_x (p0, z1, z0), -+ z0 = svsub_x (p0, z1, z0)) -+ -+/* -+** sub_f32_x_untied: -+** fsub z0\.s, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f32_x_untied, svfloat32_t, -+ z0 = svsub_f32_x (p0, z1, z2), -+ z0 = svsub_x (p0, z1, z2)) -+ -+/* -+** sub_s4_f32_x_tied1: -+** mov (z[0-9]+\.s), s4 -+** fsub z0\.s, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (sub_s4_f32_x_tied1, svfloat32_t, float, -+ z0 = svsub_n_f32_x (p0, z0, d4), -+ z0 = svsub_x (p0, z0, d4)) -+ -+/* -+** sub_s4_f32_x_untied: -+** mov (z[0-9]+\.s), s4 -+** fsub z0\.s, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (sub_s4_f32_x_untied, svfloat32_t, float, -+ z0 = svsub_n_f32_x (p0, z1, d4), -+ z0 = svsub_x (p0, z1, d4)) -+ -+/* -+** sub_1_f32_x_tied1: -+** fsub z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_f32_x_tied1, svfloat32_t, -+ z0 = svsub_n_f32_x (p0, z0, 1), -+ z0 = svsub_x (p0, z0, 1)) -+ -+/* -+** sub_1_f32_x_untied: -+** movprfx z0, z1 -+** fsub z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_f32_x_untied, svfloat32_t, -+ z0 = svsub_n_f32_x (p0, z1, 1), -+ z0 = svsub_x (p0, z1, 1)) -+ -+/* -+** sub_0p5_f32_x_tied1: -+** fsub z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_0p5_f32_x_tied1, svfloat32_t, -+ z0 = svsub_n_f32_x (p0, z0, 0.5), -+ z0 = svsub_x (p0, z0, 0.5)) -+ -+/* -+** sub_0p5_f32_x_untied: -+** movprfx z0, z1 -+** fsub z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_0p5_f32_x_untied, svfloat32_t, -+ z0 = svsub_n_f32_x (p0, z1, 0.5), -+ z0 = svsub_x (p0, z1, 0.5)) -+ -+/* -+** sub_m1_f32_x_tied1: -+** fadd z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m1_f32_x_tied1, svfloat32_t, -+ z0 = svsub_n_f32_x (p0, z0, -1), -+ z0 = svsub_x (p0, z0, -1)) -+ -+/* -+** sub_m1_f32_x_untied: -+** movprfx z0, z1 -+** fadd z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m1_f32_x_untied, svfloat32_t, -+ z0 = svsub_n_f32_x (p0, z1, -1), -+ z0 = svsub_x (p0, z1, -1)) -+ -+/* -+** sub_m0p5_f32_x_tied1: -+** fadd z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m0p5_f32_x_tied1, svfloat32_t, -+ z0 = svsub_n_f32_x (p0, z0, -0.5), -+ z0 = svsub_x (p0, z0, -0.5)) -+ -+/* -+** sub_m0p5_f32_x_untied: -+** movprfx z0, z1 -+** fadd z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m0p5_f32_x_untied, svfloat32_t, -+ z0 = svsub_n_f32_x (p0, z1, -0.5), -+ z0 = svsub_x (p0, z1, -0.5)) -+ -+/* -+** sub_2_f32_x_tied1: -+** fmov (z[0-9]+\.s), #-2\.0(?:e\+0)? -+** fadd z0\.s, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_2_f32_x_tied1, svfloat32_t, -+ z0 = svsub_n_f32_x (p0, z0, 2), -+ z0 = svsub_x (p0, z0, 2)) -+ -+/* -+** sub_2_f32_x_untied: -+** fmov (z[0-9]+\.s), #-2\.0(?:e\+0)? -+** fadd z0\.s, (z1\.s, \1|\1, z1\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_2_f32_x_untied, svfloat32_t, -+ z0 = svsub_n_f32_x (p0, z1, 2), -+ z0 = svsub_x (p0, z1, 2)) -+ -+/* -+** ptrue_sub_f32_x_tied1: -+** fsub z0\.s, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_f32_x_tied1, svfloat32_t, -+ z0 = svsub_f32_x (svptrue_b32 (), z0, z1), -+ z0 = svsub_x (svptrue_b32 (), z0, z1)) -+ -+/* -+** ptrue_sub_f32_x_tied2: -+** fsub z0\.s, z1\.s, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_f32_x_tied2, svfloat32_t, -+ z0 = svsub_f32_x (svptrue_b32 (), z1, z0), -+ z0 = svsub_x (svptrue_b32 (), z1, z0)) -+ -+/* -+** ptrue_sub_f32_x_untied: -+** fsub z0\.s, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_f32_x_untied, svfloat32_t, -+ z0 = svsub_f32_x (svptrue_b32 (), z1, z2), -+ z0 = svsub_x (svptrue_b32 (), z1, z2)) -+ -+/* -+** ptrue_sub_1_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_1_f32_x_tied1, svfloat32_t, -+ z0 = svsub_n_f32_x (svptrue_b32 (), z0, 1), -+ z0 = svsub_x (svptrue_b32 (), z0, 1)) -+ -+/* -+** ptrue_sub_1_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_1_f32_x_untied, svfloat32_t, -+ z0 = svsub_n_f32_x (svptrue_b32 (), z1, 1), -+ z0 = svsub_x (svptrue_b32 (), z1, 1)) -+ -+/* -+** ptrue_sub_0p5_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_0p5_f32_x_tied1, svfloat32_t, -+ z0 = svsub_n_f32_x (svptrue_b32 (), z0, 0.5), -+ z0 = svsub_x (svptrue_b32 (), z0, 0.5)) -+ -+/* -+** ptrue_sub_0p5_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_0p5_f32_x_untied, svfloat32_t, -+ z0 = svsub_n_f32_x (svptrue_b32 (), z1, 0.5), -+ z0 = svsub_x (svptrue_b32 (), z1, 0.5)) -+ -+/* -+** ptrue_sub_m1_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_m1_f32_x_tied1, svfloat32_t, -+ z0 = svsub_n_f32_x (svptrue_b32 (), z0, -1), -+ z0 = svsub_x (svptrue_b32 (), z0, -1)) -+ -+/* -+** ptrue_sub_m1_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_m1_f32_x_untied, svfloat32_t, -+ z0 = svsub_n_f32_x (svptrue_b32 (), z1, -1), -+ z0 = svsub_x (svptrue_b32 (), z1, -1)) -+ -+/* -+** ptrue_sub_m0p5_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_m0p5_f32_x_tied1, svfloat32_t, -+ z0 = svsub_n_f32_x (svptrue_b32 (), z0, -0.5), -+ z0 = svsub_x (svptrue_b32 (), z0, -0.5)) -+ -+/* -+** ptrue_sub_m0p5_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_m0p5_f32_x_untied, svfloat32_t, -+ z0 = svsub_n_f32_x (svptrue_b32 (), z1, -0.5), -+ z0 = svsub_x (svptrue_b32 (), z1, -0.5)) -+ -+/* -+** ptrue_sub_2_f32_x_tied1: -+** fmov (z[0-9]+\.s), #-2\.0(?:e\+0)? -+** fadd z0\.s, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_2_f32_x_tied1, svfloat32_t, -+ z0 = svsub_n_f32_x (svptrue_b32 (), z0, 2), -+ z0 = svsub_x (svptrue_b32 (), z0, 2)) -+ -+/* -+** ptrue_sub_2_f32_x_untied: -+** fmov (z[0-9]+\.s), #-2\.0(?:e\+0)? -+** fadd z0\.s, (z1\.s, \1|\1, z1\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_2_f32_x_untied, svfloat32_t, -+ z0 = svsub_n_f32_x (svptrue_b32 (), z1, 2), -+ z0 = svsub_x (svptrue_b32 (), z1, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_f64.c -new file mode 100644 -index 000000000..2179382c3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_f64.c -@@ -0,0 +1,577 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** sub_f64_m_tied1: -+** fsub z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f64_m_tied1, svfloat64_t, -+ z0 = svsub_f64_m (p0, z0, z1), -+ z0 = svsub_m (p0, z0, z1)) -+ -+/* -+** sub_f64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fsub z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f64_m_tied2, svfloat64_t, -+ z0 = svsub_f64_m (p0, z1, z0), -+ z0 = svsub_m (p0, z1, z0)) -+ -+/* -+** sub_f64_m_untied: -+** movprfx z0, z1 -+** fsub z0\.d, p0/m, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f64_m_untied, svfloat64_t, -+ z0 = svsub_f64_m (p0, z1, z2), -+ z0 = svsub_m (p0, z1, z2)) -+ -+/* -+** sub_d4_f64_m_tied1: -+** mov (z[0-9]+\.d), d4 -+** fsub z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (sub_d4_f64_m_tied1, svfloat64_t, double, -+ z0 = svsub_n_f64_m (p0, z0, d4), -+ z0 = svsub_m (p0, z0, d4)) -+ -+/* -+** sub_d4_f64_m_untied: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0, z1 -+** fsub z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (sub_d4_f64_m_untied, svfloat64_t, double, -+ z0 = svsub_n_f64_m (p0, z1, d4), -+ z0 = svsub_m (p0, z1, d4)) -+ -+/* -+** sub_1_f64_m_tied1: -+** fsub z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_f64_m_tied1, svfloat64_t, -+ z0 = svsub_n_f64_m (p0, z0, 1), -+ z0 = svsub_m (p0, z0, 1)) -+ -+/* -+** sub_1_f64_m_untied: -+** movprfx z0, z1 -+** fsub z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_f64_m_untied, svfloat64_t, -+ z0 = svsub_n_f64_m (p0, z1, 1), -+ z0 = svsub_m (p0, z1, 1)) -+ -+/* -+** sub_0p5_f64_m_tied1: -+** fsub z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_0p5_f64_m_tied1, svfloat64_t, -+ z0 = svsub_n_f64_m (p0, z0, 0.5), -+ z0 = svsub_m (p0, z0, 0.5)) -+ -+/* -+** sub_0p5_f64_m_untied: -+** movprfx z0, z1 -+** fsub z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_0p5_f64_m_untied, svfloat64_t, -+ z0 = svsub_n_f64_m (p0, z1, 0.5), -+ z0 = svsub_m (p0, z1, 0.5)) -+ -+/* -+** sub_m1_f64_m_tied1: -+** fadd z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m1_f64_m_tied1, svfloat64_t, -+ z0 = svsub_n_f64_m (p0, z0, -1), -+ z0 = svsub_m (p0, z0, -1)) -+ -+/* -+** sub_m1_f64_m_untied: -+** movprfx z0, z1 -+** fadd z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m1_f64_m_untied, svfloat64_t, -+ z0 = svsub_n_f64_m (p0, z1, -1), -+ z0 = svsub_m (p0, z1, -1)) -+ -+/* -+** sub_m0p5_f64_m_tied1: -+** fadd z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m0p5_f64_m_tied1, svfloat64_t, -+ z0 = svsub_n_f64_m (p0, z0, -0.5), -+ z0 = svsub_m (p0, z0, -0.5)) -+ -+/* -+** sub_m0p5_f64_m_untied: -+** movprfx z0, z1 -+** fadd z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m0p5_f64_m_untied, svfloat64_t, -+ z0 = svsub_n_f64_m (p0, z1, -0.5), -+ z0 = svsub_m (p0, z1, -0.5)) -+ -+/* -+** sub_m2_f64_m: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** fadd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m2_f64_m, svfloat64_t, -+ z0 = svsub_n_f64_m (p0, z0, -2), -+ z0 = svsub_m (p0, z0, -2)) -+ -+/* -+** sub_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fsub z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f64_z_tied1, svfloat64_t, -+ z0 = svsub_f64_z (p0, z0, z1), -+ z0 = svsub_z (p0, z0, z1)) -+ -+/* -+** sub_f64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** fsubr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f64_z_tied2, svfloat64_t, -+ z0 = svsub_f64_z (p0, z1, z0), -+ z0 = svsub_z (p0, z1, z0)) -+ -+/* -+** sub_f64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fsub z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** fsubr z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f64_z_untied, svfloat64_t, -+ z0 = svsub_f64_z (p0, z1, z2), -+ z0 = svsub_z (p0, z1, z2)) -+ -+/* -+** sub_d4_f64_z_tied1: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0\.d, p0/z, z0\.d -+** fsub z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (sub_d4_f64_z_tied1, svfloat64_t, double, -+ z0 = svsub_n_f64_z (p0, z0, d4), -+ z0 = svsub_z (p0, z0, d4)) -+ -+/* -+** sub_d4_f64_z_untied: -+** mov (z[0-9]+\.d), d4 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fsub z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** fsubr z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (sub_d4_f64_z_untied, svfloat64_t, double, -+ z0 = svsub_n_f64_z (p0, z1, d4), -+ z0 = svsub_z (p0, z1, d4)) -+ -+/* -+** sub_1_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fsub z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_f64_z_tied1, svfloat64_t, -+ z0 = svsub_n_f64_z (p0, z0, 1), -+ z0 = svsub_z (p0, z0, 1)) -+ -+/* -+** sub_1_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** fsub z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_f64_z_untied, svfloat64_t, -+ z0 = svsub_n_f64_z (p0, z1, 1), -+ z0 = svsub_z (p0, z1, 1)) -+ -+/* -+** sub_0p5_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fsub z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_0p5_f64_z_tied1, svfloat64_t, -+ z0 = svsub_n_f64_z (p0, z0, 0.5), -+ z0 = svsub_z (p0, z0, 0.5)) -+ -+/* -+** sub_0p5_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** fsub z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_0p5_f64_z_untied, svfloat64_t, -+ z0 = svsub_n_f64_z (p0, z1, 0.5), -+ z0 = svsub_z (p0, z1, 0.5)) -+ -+/* -+** sub_m1_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fadd z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m1_f64_z_tied1, svfloat64_t, -+ z0 = svsub_n_f64_z (p0, z0, -1), -+ z0 = svsub_z (p0, z0, -1)) -+ -+/* -+** sub_m1_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** fadd z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m1_f64_z_untied, svfloat64_t, -+ z0 = svsub_n_f64_z (p0, z1, -1), -+ z0 = svsub_z (p0, z1, -1)) -+ -+/* -+** sub_m0p5_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fadd z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m0p5_f64_z_tied1, svfloat64_t, -+ z0 = svsub_n_f64_z (p0, z0, -0.5), -+ z0 = svsub_z (p0, z0, -0.5)) -+ -+/* -+** sub_m0p5_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** fadd z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m0p5_f64_z_untied, svfloat64_t, -+ z0 = svsub_n_f64_z (p0, z1, -0.5), -+ z0 = svsub_z (p0, z1, -0.5)) -+ -+/* -+** sub_m2_f64_z: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** movprfx z0\.d, p0/z, z0\.d -+** fadd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m2_f64_z, svfloat64_t, -+ z0 = svsub_n_f64_z (p0, z0, -2), -+ z0 = svsub_z (p0, z0, -2)) -+ -+/* -+** sub_f64_x_tied1: -+** fsub z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f64_x_tied1, svfloat64_t, -+ z0 = svsub_f64_x (p0, z0, z1), -+ z0 = svsub_x (p0, z0, z1)) -+ -+/* -+** sub_f64_x_tied2: -+** fsubr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f64_x_tied2, svfloat64_t, -+ z0 = svsub_f64_x (p0, z1, z0), -+ z0 = svsub_x (p0, z1, z0)) -+ -+/* -+** sub_f64_x_untied: -+** ( -+** movprfx z0, z1 -+** fsub z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0, z2 -+** fsubr z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f64_x_untied, svfloat64_t, -+ z0 = svsub_f64_x (p0, z1, z2), -+ z0 = svsub_x (p0, z1, z2)) -+ -+/* -+** sub_d4_f64_x_tied1: -+** mov (z[0-9]+\.d), d4 -+** fsub z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (sub_d4_f64_x_tied1, svfloat64_t, double, -+ z0 = svsub_n_f64_x (p0, z0, d4), -+ z0 = svsub_x (p0, z0, d4)) -+ -+/* -+** sub_d4_f64_x_untied: { xfail *-*-* } -+** mov z0\.d, d4 -+** fsubr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZD (sub_d4_f64_x_untied, svfloat64_t, double, -+ z0 = svsub_n_f64_x (p0, z1, d4), -+ z0 = svsub_x (p0, z1, d4)) -+ -+/* -+** sub_1_f64_x_tied1: -+** fsub z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_f64_x_tied1, svfloat64_t, -+ z0 = svsub_n_f64_x (p0, z0, 1), -+ z0 = svsub_x (p0, z0, 1)) -+ -+/* -+** sub_1_f64_x_untied: -+** movprfx z0, z1 -+** fsub z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_f64_x_untied, svfloat64_t, -+ z0 = svsub_n_f64_x (p0, z1, 1), -+ z0 = svsub_x (p0, z1, 1)) -+ -+/* -+** sub_0p5_f64_x_tied1: -+** fsub z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_0p5_f64_x_tied1, svfloat64_t, -+ z0 = svsub_n_f64_x (p0, z0, 0.5), -+ z0 = svsub_x (p0, z0, 0.5)) -+ -+/* -+** sub_0p5_f64_x_untied: -+** movprfx z0, z1 -+** fsub z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_0p5_f64_x_untied, svfloat64_t, -+ z0 = svsub_n_f64_x (p0, z1, 0.5), -+ z0 = svsub_x (p0, z1, 0.5)) -+ -+/* -+** sub_m1_f64_x_tied1: -+** fadd z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m1_f64_x_tied1, svfloat64_t, -+ z0 = svsub_n_f64_x (p0, z0, -1), -+ z0 = svsub_x (p0, z0, -1)) -+ -+/* -+** sub_m1_f64_x_untied: -+** movprfx z0, z1 -+** fadd z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m1_f64_x_untied, svfloat64_t, -+ z0 = svsub_n_f64_x (p0, z1, -1), -+ z0 = svsub_x (p0, z1, -1)) -+ -+/* -+** sub_m0p5_f64_x_tied1: -+** fadd z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m0p5_f64_x_tied1, svfloat64_t, -+ z0 = svsub_n_f64_x (p0, z0, -0.5), -+ z0 = svsub_x (p0, z0, -0.5)) -+ -+/* -+** sub_m0p5_f64_x_untied: -+** movprfx z0, z1 -+** fadd z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m0p5_f64_x_untied, svfloat64_t, -+ z0 = svsub_n_f64_x (p0, z1, -0.5), -+ z0 = svsub_x (p0, z1, -0.5)) -+ -+/* -+** sub_2_f64_x_tied1: -+** fmov (z[0-9]+\.d), #-2\.0(?:e\+0)? -+** fadd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_2_f64_x_tied1, svfloat64_t, -+ z0 = svsub_n_f64_x (p0, z0, 2), -+ z0 = svsub_x (p0, z0, 2)) -+ -+/* -+** sub_2_f64_x_untied: -+** fmov z0\.d, #-2\.0(?:e\+0)? -+** fadd z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sub_2_f64_x_untied, svfloat64_t, -+ z0 = svsub_n_f64_x (p0, z1, 2), -+ z0 = svsub_x (p0, z1, 2)) -+ -+/* -+** ptrue_sub_f64_x_tied1: -+** fsub z0\.d, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_f64_x_tied1, svfloat64_t, -+ z0 = svsub_f64_x (svptrue_b64 (), z0, z1), -+ z0 = svsub_x (svptrue_b64 (), z0, z1)) -+ -+/* -+** ptrue_sub_f64_x_tied2: -+** fsub z0\.d, z1\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_f64_x_tied2, svfloat64_t, -+ z0 = svsub_f64_x (svptrue_b64 (), z1, z0), -+ z0 = svsub_x (svptrue_b64 (), z1, z0)) -+ -+/* -+** ptrue_sub_f64_x_untied: -+** fsub z0\.d, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_f64_x_untied, svfloat64_t, -+ z0 = svsub_f64_x (svptrue_b64 (), z1, z2), -+ z0 = svsub_x (svptrue_b64 (), z1, z2)) -+ -+/* -+** ptrue_sub_1_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_1_f64_x_tied1, svfloat64_t, -+ z0 = svsub_n_f64_x (svptrue_b64 (), z0, 1), -+ z0 = svsub_x (svptrue_b64 (), z0, 1)) -+ -+/* -+** ptrue_sub_1_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_1_f64_x_untied, svfloat64_t, -+ z0 = svsub_n_f64_x (svptrue_b64 (), z1, 1), -+ z0 = svsub_x (svptrue_b64 (), z1, 1)) -+ -+/* -+** ptrue_sub_0p5_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_0p5_f64_x_tied1, svfloat64_t, -+ z0 = svsub_n_f64_x (svptrue_b64 (), z0, 0.5), -+ z0 = svsub_x (svptrue_b64 (), z0, 0.5)) -+ -+/* -+** ptrue_sub_0p5_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_0p5_f64_x_untied, svfloat64_t, -+ z0 = svsub_n_f64_x (svptrue_b64 (), z1, 0.5), -+ z0 = svsub_x (svptrue_b64 (), z1, 0.5)) -+ -+/* -+** ptrue_sub_m1_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_m1_f64_x_tied1, svfloat64_t, -+ z0 = svsub_n_f64_x (svptrue_b64 (), z0, -1), -+ z0 = svsub_x (svptrue_b64 (), z0, -1)) -+ -+/* -+** ptrue_sub_m1_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_m1_f64_x_untied, svfloat64_t, -+ z0 = svsub_n_f64_x (svptrue_b64 (), z1, -1), -+ z0 = svsub_x (svptrue_b64 (), z1, -1)) -+ -+/* -+** ptrue_sub_m0p5_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_m0p5_f64_x_tied1, svfloat64_t, -+ z0 = svsub_n_f64_x (svptrue_b64 (), z0, -0.5), -+ z0 = svsub_x (svptrue_b64 (), z0, -0.5)) -+ -+/* -+** ptrue_sub_m0p5_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_m0p5_f64_x_untied, svfloat64_t, -+ z0 = svsub_n_f64_x (svptrue_b64 (), z1, -0.5), -+ z0 = svsub_x (svptrue_b64 (), z1, -0.5)) -+ -+/* -+** ptrue_sub_2_f64_x_tied1: -+** fmov (z[0-9]+\.d), #-2\.0(?:e\+0)? -+** fadd z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_2_f64_x_tied1, svfloat64_t, -+ z0 = svsub_n_f64_x (svptrue_b64 (), z0, 2), -+ z0 = svsub_x (svptrue_b64 (), z0, 2)) -+ -+/* -+** ptrue_sub_2_f64_x_untied: -+** fmov (z[0-9]+\.d), #-2\.0(?:e\+0)? -+** fadd z0\.d, (z1\.d, \1|\1, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_2_f64_x_untied, svfloat64_t, -+ z0 = svsub_n_f64_x (svptrue_b64 (), z1, 2), -+ z0 = svsub_x (svptrue_b64 (), z1, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_f64_notrap.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_f64_notrap.c -new file mode 100644 -index 000000000..bd89f44b4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_f64_notrap.c -@@ -0,0 +1,572 @@ -+/* { dg-additional-options "-fno-trapping-math" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** sub_f64_m_tied1: -+** fsub z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f64_m_tied1, svfloat64_t, -+ z0 = svsub_f64_m (p0, z0, z1), -+ z0 = svsub_m (p0, z0, z1)) -+ -+/* -+** sub_f64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fsub z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f64_m_tied2, svfloat64_t, -+ z0 = svsub_f64_m (p0, z1, z0), -+ z0 = svsub_m (p0, z1, z0)) -+ -+/* -+** sub_f64_m_untied: -+** movprfx z0, z1 -+** fsub z0\.d, p0/m, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f64_m_untied, svfloat64_t, -+ z0 = svsub_f64_m (p0, z1, z2), -+ z0 = svsub_m (p0, z1, z2)) -+ -+/* -+** sub_d4_f64_m_tied1: -+** mov (z[0-9]+\.d), d4 -+** fsub z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (sub_d4_f64_m_tied1, svfloat64_t, double, -+ z0 = svsub_n_f64_m (p0, z0, d4), -+ z0 = svsub_m (p0, z0, d4)) -+ -+/* -+** sub_d4_f64_m_untied: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0, z1 -+** fsub z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (sub_d4_f64_m_untied, svfloat64_t, double, -+ z0 = svsub_n_f64_m (p0, z1, d4), -+ z0 = svsub_m (p0, z1, d4)) -+ -+/* -+** sub_1_f64_m_tied1: -+** fsub z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_f64_m_tied1, svfloat64_t, -+ z0 = svsub_n_f64_m (p0, z0, 1), -+ z0 = svsub_m (p0, z0, 1)) -+ -+/* -+** sub_1_f64_m_untied: -+** movprfx z0, z1 -+** fsub z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_f64_m_untied, svfloat64_t, -+ z0 = svsub_n_f64_m (p0, z1, 1), -+ z0 = svsub_m (p0, z1, 1)) -+ -+/* -+** sub_0p5_f64_m_tied1: -+** fsub z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_0p5_f64_m_tied1, svfloat64_t, -+ z0 = svsub_n_f64_m (p0, z0, 0.5), -+ z0 = svsub_m (p0, z0, 0.5)) -+ -+/* -+** sub_0p5_f64_m_untied: -+** movprfx z0, z1 -+** fsub z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_0p5_f64_m_untied, svfloat64_t, -+ z0 = svsub_n_f64_m (p0, z1, 0.5), -+ z0 = svsub_m (p0, z1, 0.5)) -+ -+/* -+** sub_m1_f64_m_tied1: -+** fadd z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m1_f64_m_tied1, svfloat64_t, -+ z0 = svsub_n_f64_m (p0, z0, -1), -+ z0 = svsub_m (p0, z0, -1)) -+ -+/* -+** sub_m1_f64_m_untied: -+** movprfx z0, z1 -+** fadd z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m1_f64_m_untied, svfloat64_t, -+ z0 = svsub_n_f64_m (p0, z1, -1), -+ z0 = svsub_m (p0, z1, -1)) -+ -+/* -+** sub_m0p5_f64_m_tied1: -+** fadd z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m0p5_f64_m_tied1, svfloat64_t, -+ z0 = svsub_n_f64_m (p0, z0, -0.5), -+ z0 = svsub_m (p0, z0, -0.5)) -+ -+/* -+** sub_m0p5_f64_m_untied: -+** movprfx z0, z1 -+** fadd z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m0p5_f64_m_untied, svfloat64_t, -+ z0 = svsub_n_f64_m (p0, z1, -0.5), -+ z0 = svsub_m (p0, z1, -0.5)) -+ -+/* -+** sub_m2_f64_m: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** fadd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m2_f64_m, svfloat64_t, -+ z0 = svsub_n_f64_m (p0, z0, -2), -+ z0 = svsub_m (p0, z0, -2)) -+ -+/* -+** sub_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fsub z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f64_z_tied1, svfloat64_t, -+ z0 = svsub_f64_z (p0, z0, z1), -+ z0 = svsub_z (p0, z0, z1)) -+ -+/* -+** sub_f64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** fsubr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f64_z_tied2, svfloat64_t, -+ z0 = svsub_f64_z (p0, z1, z0), -+ z0 = svsub_z (p0, z1, z0)) -+ -+/* -+** sub_f64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fsub z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** fsubr z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f64_z_untied, svfloat64_t, -+ z0 = svsub_f64_z (p0, z1, z2), -+ z0 = svsub_z (p0, z1, z2)) -+ -+/* -+** sub_d4_f64_z_tied1: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0\.d, p0/z, z0\.d -+** fsub z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (sub_d4_f64_z_tied1, svfloat64_t, double, -+ z0 = svsub_n_f64_z (p0, z0, d4), -+ z0 = svsub_z (p0, z0, d4)) -+ -+/* -+** sub_d4_f64_z_untied: -+** mov (z[0-9]+\.d), d4 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fsub z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** fsubr z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (sub_d4_f64_z_untied, svfloat64_t, double, -+ z0 = svsub_n_f64_z (p0, z1, d4), -+ z0 = svsub_z (p0, z1, d4)) -+ -+/* -+** sub_1_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fsub z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_f64_z_tied1, svfloat64_t, -+ z0 = svsub_n_f64_z (p0, z0, 1), -+ z0 = svsub_z (p0, z0, 1)) -+ -+/* -+** sub_1_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** fsub z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_f64_z_untied, svfloat64_t, -+ z0 = svsub_n_f64_z (p0, z1, 1), -+ z0 = svsub_z (p0, z1, 1)) -+ -+/* -+** sub_0p5_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fsub z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_0p5_f64_z_tied1, svfloat64_t, -+ z0 = svsub_n_f64_z (p0, z0, 0.5), -+ z0 = svsub_z (p0, z0, 0.5)) -+ -+/* -+** sub_0p5_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** fsub z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_0p5_f64_z_untied, svfloat64_t, -+ z0 = svsub_n_f64_z (p0, z1, 0.5), -+ z0 = svsub_z (p0, z1, 0.5)) -+ -+/* -+** sub_m1_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fadd z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m1_f64_z_tied1, svfloat64_t, -+ z0 = svsub_n_f64_z (p0, z0, -1), -+ z0 = svsub_z (p0, z0, -1)) -+ -+/* -+** sub_m1_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** fadd z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m1_f64_z_untied, svfloat64_t, -+ z0 = svsub_n_f64_z (p0, z1, -1), -+ z0 = svsub_z (p0, z1, -1)) -+ -+/* -+** sub_m0p5_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fadd z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m0p5_f64_z_tied1, svfloat64_t, -+ z0 = svsub_n_f64_z (p0, z0, -0.5), -+ z0 = svsub_z (p0, z0, -0.5)) -+ -+/* -+** sub_m0p5_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** fadd z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m0p5_f64_z_untied, svfloat64_t, -+ z0 = svsub_n_f64_z (p0, z1, -0.5), -+ z0 = svsub_z (p0, z1, -0.5)) -+ -+/* -+** sub_m2_f64_z: -+** fmov (z[0-9]+\.d), #2\.0(?:e\+0)? -+** movprfx z0\.d, p0/z, z0\.d -+** fadd z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m2_f64_z, svfloat64_t, -+ z0 = svsub_n_f64_z (p0, z0, -2), -+ z0 = svsub_z (p0, z0, -2)) -+ -+/* -+** sub_f64_x_tied1: -+** fsub z0\.d, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f64_x_tied1, svfloat64_t, -+ z0 = svsub_f64_x (p0, z0, z1), -+ z0 = svsub_x (p0, z0, z1)) -+ -+/* -+** sub_f64_x_tied2: -+** fsub z0\.d, z1\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f64_x_tied2, svfloat64_t, -+ z0 = svsub_f64_x (p0, z1, z0), -+ z0 = svsub_x (p0, z1, z0)) -+ -+/* -+** sub_f64_x_untied: -+** fsub z0\.d, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sub_f64_x_untied, svfloat64_t, -+ z0 = svsub_f64_x (p0, z1, z2), -+ z0 = svsub_x (p0, z1, z2)) -+ -+/* -+** sub_d4_f64_x_tied1: -+** mov (z[0-9]+\.d), d4 -+** fsub z0\.d, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (sub_d4_f64_x_tied1, svfloat64_t, double, -+ z0 = svsub_n_f64_x (p0, z0, d4), -+ z0 = svsub_x (p0, z0, d4)) -+ -+/* -+** sub_d4_f64_x_untied: -+** mov (z[0-9]+\.d), d4 -+** fsub z0\.d, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (sub_d4_f64_x_untied, svfloat64_t, double, -+ z0 = svsub_n_f64_x (p0, z1, d4), -+ z0 = svsub_x (p0, z1, d4)) -+ -+/* -+** sub_1_f64_x_tied1: -+** fsub z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_f64_x_tied1, svfloat64_t, -+ z0 = svsub_n_f64_x (p0, z0, 1), -+ z0 = svsub_x (p0, z0, 1)) -+ -+/* -+** sub_1_f64_x_untied: -+** movprfx z0, z1 -+** fsub z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_f64_x_untied, svfloat64_t, -+ z0 = svsub_n_f64_x (p0, z1, 1), -+ z0 = svsub_x (p0, z1, 1)) -+ -+/* -+** sub_0p5_f64_x_tied1: -+** fsub z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_0p5_f64_x_tied1, svfloat64_t, -+ z0 = svsub_n_f64_x (p0, z0, 0.5), -+ z0 = svsub_x (p0, z0, 0.5)) -+ -+/* -+** sub_0p5_f64_x_untied: -+** movprfx z0, z1 -+** fsub z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_0p5_f64_x_untied, svfloat64_t, -+ z0 = svsub_n_f64_x (p0, z1, 0.5), -+ z0 = svsub_x (p0, z1, 0.5)) -+ -+/* -+** sub_m1_f64_x_tied1: -+** fadd z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m1_f64_x_tied1, svfloat64_t, -+ z0 = svsub_n_f64_x (p0, z0, -1), -+ z0 = svsub_x (p0, z0, -1)) -+ -+/* -+** sub_m1_f64_x_untied: -+** movprfx z0, z1 -+** fadd z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m1_f64_x_untied, svfloat64_t, -+ z0 = svsub_n_f64_x (p0, z1, -1), -+ z0 = svsub_x (p0, z1, -1)) -+ -+/* -+** sub_m0p5_f64_x_tied1: -+** fadd z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m0p5_f64_x_tied1, svfloat64_t, -+ z0 = svsub_n_f64_x (p0, z0, -0.5), -+ z0 = svsub_x (p0, z0, -0.5)) -+ -+/* -+** sub_m0p5_f64_x_untied: -+** movprfx z0, z1 -+** fadd z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m0p5_f64_x_untied, svfloat64_t, -+ z0 = svsub_n_f64_x (p0, z1, -0.5), -+ z0 = svsub_x (p0, z1, -0.5)) -+ -+/* -+** sub_2_f64_x_tied1: -+** fmov (z[0-9]+\.d), #-2\.0(?:e\+0)? -+** fadd z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_2_f64_x_tied1, svfloat64_t, -+ z0 = svsub_n_f64_x (p0, z0, 2), -+ z0 = svsub_x (p0, z0, 2)) -+ -+/* -+** sub_2_f64_x_untied: -+** fmov (z[0-9]+\.d), #-2\.0(?:e\+0)? -+** fadd z0\.d, (z1\.d, \1|\1, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_2_f64_x_untied, svfloat64_t, -+ z0 = svsub_n_f64_x (p0, z1, 2), -+ z0 = svsub_x (p0, z1, 2)) -+ -+/* -+** ptrue_sub_f64_x_tied1: -+** fsub z0\.d, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_f64_x_tied1, svfloat64_t, -+ z0 = svsub_f64_x (svptrue_b64 (), z0, z1), -+ z0 = svsub_x (svptrue_b64 (), z0, z1)) -+ -+/* -+** ptrue_sub_f64_x_tied2: -+** fsub z0\.d, z1\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_f64_x_tied2, svfloat64_t, -+ z0 = svsub_f64_x (svptrue_b64 (), z1, z0), -+ z0 = svsub_x (svptrue_b64 (), z1, z0)) -+ -+/* -+** ptrue_sub_f64_x_untied: -+** fsub z0\.d, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_f64_x_untied, svfloat64_t, -+ z0 = svsub_f64_x (svptrue_b64 (), z1, z2), -+ z0 = svsub_x (svptrue_b64 (), z1, z2)) -+ -+/* -+** ptrue_sub_1_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_1_f64_x_tied1, svfloat64_t, -+ z0 = svsub_n_f64_x (svptrue_b64 (), z0, 1), -+ z0 = svsub_x (svptrue_b64 (), z0, 1)) -+ -+/* -+** ptrue_sub_1_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_1_f64_x_untied, svfloat64_t, -+ z0 = svsub_n_f64_x (svptrue_b64 (), z1, 1), -+ z0 = svsub_x (svptrue_b64 (), z1, 1)) -+ -+/* -+** ptrue_sub_0p5_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_0p5_f64_x_tied1, svfloat64_t, -+ z0 = svsub_n_f64_x (svptrue_b64 (), z0, 0.5), -+ z0 = svsub_x (svptrue_b64 (), z0, 0.5)) -+ -+/* -+** ptrue_sub_0p5_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_0p5_f64_x_untied, svfloat64_t, -+ z0 = svsub_n_f64_x (svptrue_b64 (), z1, 0.5), -+ z0 = svsub_x (svptrue_b64 (), z1, 0.5)) -+ -+/* -+** ptrue_sub_m1_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_m1_f64_x_tied1, svfloat64_t, -+ z0 = svsub_n_f64_x (svptrue_b64 (), z0, -1), -+ z0 = svsub_x (svptrue_b64 (), z0, -1)) -+ -+/* -+** ptrue_sub_m1_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_m1_f64_x_untied, svfloat64_t, -+ z0 = svsub_n_f64_x (svptrue_b64 (), z1, -1), -+ z0 = svsub_x (svptrue_b64 (), z1, -1)) -+ -+/* -+** ptrue_sub_m0p5_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_m0p5_f64_x_tied1, svfloat64_t, -+ z0 = svsub_n_f64_x (svptrue_b64 (), z0, -0.5), -+ z0 = svsub_x (svptrue_b64 (), z0, -0.5)) -+ -+/* -+** ptrue_sub_m0p5_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_m0p5_f64_x_untied, svfloat64_t, -+ z0 = svsub_n_f64_x (svptrue_b64 (), z1, -0.5), -+ z0 = svsub_x (svptrue_b64 (), z1, -0.5)) -+ -+/* -+** ptrue_sub_2_f64_x_tied1: -+** fmov (z[0-9]+\.d), #-2\.0(?:e\+0)? -+** fadd z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_2_f64_x_tied1, svfloat64_t, -+ z0 = svsub_n_f64_x (svptrue_b64 (), z0, 2), -+ z0 = svsub_x (svptrue_b64 (), z0, 2)) -+ -+/* -+** ptrue_sub_2_f64_x_untied: -+** fmov (z[0-9]+\.d), #-2\.0(?:e\+0)? -+** fadd z0\.d, (z1\.d, \1|\1, z1\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_sub_2_f64_x_untied, svfloat64_t, -+ z0 = svsub_n_f64_x (svptrue_b64 (), z1, 2), -+ z0 = svsub_x (svptrue_b64 (), z1, 2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_s16.c -new file mode 100644 -index 000000000..aea8ea2b4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_s16.c -@@ -0,0 +1,377 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** sub_s16_m_tied1: -+** sub z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sub_s16_m_tied1, svint16_t, -+ z0 = svsub_s16_m (p0, z0, z1), -+ z0 = svsub_m (p0, z0, z1)) -+ -+/* -+** sub_s16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** sub z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sub_s16_m_tied2, svint16_t, -+ z0 = svsub_s16_m (p0, z1, z0), -+ z0 = svsub_m (p0, z1, z0)) -+ -+/* -+** sub_s16_m_untied: -+** movprfx z0, z1 -+** sub z0\.h, p0/m, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sub_s16_m_untied, svint16_t, -+ z0 = svsub_s16_m (p0, z1, z2), -+ z0 = svsub_m (p0, z1, z2)) -+ -+/* -+** sub_w0_s16_m_tied1: -+** mov (z[0-9]+\.h), w0 -+** sub z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (sub_w0_s16_m_tied1, svint16_t, int16_t, -+ z0 = svsub_n_s16_m (p0, z0, x0), -+ z0 = svsub_m (p0, z0, x0)) -+ -+/* -+** sub_w0_s16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), w0 -+** movprfx z0, z1 -+** sub z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (sub_w0_s16_m_untied, svint16_t, int16_t, -+ z0 = svsub_n_s16_m (p0, z1, x0), -+ z0 = svsub_m (p0, z1, x0)) -+ -+/* -+** sub_1_s16_m_tied1: -+** mov (z[0-9]+)\.b, #-1 -+** add z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_s16_m_tied1, svint16_t, -+ z0 = svsub_n_s16_m (p0, z0, 1), -+ z0 = svsub_m (p0, z0, 1)) -+ -+/* -+** sub_1_s16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+)\.b, #-1 -+** movprfx z0, z1 -+** add z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_s16_m_untied, svint16_t, -+ z0 = svsub_n_s16_m (p0, z1, 1), -+ z0 = svsub_m (p0, z1, 1)) -+ -+/* -+** sub_m2_s16_m: -+** mov (z[0-9]+\.h), #2 -+** add z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m2_s16_m, svint16_t, -+ z0 = svsub_n_s16_m (p0, z0, -2), -+ z0 = svsub_m (p0, z0, -2)) -+ -+/* -+** sub_s16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** sub z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sub_s16_z_tied1, svint16_t, -+ z0 = svsub_s16_z (p0, z0, z1), -+ z0 = svsub_z (p0, z0, z1)) -+ -+/* -+** sub_s16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** subr z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sub_s16_z_tied2, svint16_t, -+ z0 = svsub_s16_z (p0, z1, z0), -+ z0 = svsub_z (p0, z1, z0)) -+ -+/* -+** sub_s16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** sub z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** subr z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_s16_z_untied, svint16_t, -+ z0 = svsub_s16_z (p0, z1, z2), -+ z0 = svsub_z (p0, z1, z2)) -+ -+/* -+** sub_w0_s16_z_tied1: -+** mov (z[0-9]+\.h), w0 -+** movprfx z0\.h, p0/z, z0\.h -+** sub z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (sub_w0_s16_z_tied1, svint16_t, int16_t, -+ z0 = svsub_n_s16_z (p0, z0, x0), -+ z0 = svsub_z (p0, z0, x0)) -+ -+/* -+** sub_w0_s16_z_untied: -+** mov (z[0-9]+\.h), w0 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** sub z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** subr z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (sub_w0_s16_z_untied, svint16_t, int16_t, -+ z0 = svsub_n_s16_z (p0, z1, x0), -+ z0 = svsub_z (p0, z1, x0)) -+ -+/* -+** sub_1_s16_z_tied1: -+** mov (z[0-9]+)\.b, #-1 -+** movprfx z0\.h, p0/z, z0\.h -+** add z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_s16_z_tied1, svint16_t, -+ z0 = svsub_n_s16_z (p0, z0, 1), -+ z0 = svsub_z (p0, z0, 1)) -+ -+/* -+** sub_1_s16_z_untied: -+** mov (z[0-9]+)\.b, #-1 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** add z0\.h, p0/m, z0\.h, \1\.h -+** | -+** movprfx z0\.h, p0/z, \1\.h -+** add z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_s16_z_untied, svint16_t, -+ z0 = svsub_n_s16_z (p0, z1, 1), -+ z0 = svsub_z (p0, z1, 1)) -+ -+/* -+** sub_s16_x_tied1: -+** sub z0\.h, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sub_s16_x_tied1, svint16_t, -+ z0 = svsub_s16_x (p0, z0, z1), -+ z0 = svsub_x (p0, z0, z1)) -+ -+/* -+** sub_s16_x_tied2: -+** sub z0\.h, z1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sub_s16_x_tied2, svint16_t, -+ z0 = svsub_s16_x (p0, z1, z0), -+ z0 = svsub_x (p0, z1, z0)) -+ -+/* -+** sub_s16_x_untied: -+** sub z0\.h, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sub_s16_x_untied, svint16_t, -+ z0 = svsub_s16_x (p0, z1, z2), -+ z0 = svsub_x (p0, z1, z2)) -+ -+/* -+** sub_w0_s16_x_tied1: -+** mov (z[0-9]+\.h), w0 -+** sub z0\.h, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (sub_w0_s16_x_tied1, svint16_t, int16_t, -+ z0 = svsub_n_s16_x (p0, z0, x0), -+ z0 = svsub_x (p0, z0, x0)) -+ -+/* -+** sub_w0_s16_x_untied: -+** mov (z[0-9]+\.h), w0 -+** sub z0\.h, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (sub_w0_s16_x_untied, svint16_t, int16_t, -+ z0 = svsub_n_s16_x (p0, z1, x0), -+ z0 = svsub_x (p0, z1, x0)) -+ -+/* -+** sub_1_s16_x_tied1: -+** sub z0\.h, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_s16_x_tied1, svint16_t, -+ z0 = svsub_n_s16_x (p0, z0, 1), -+ z0 = svsub_x (p0, z0, 1)) -+ -+/* -+** sub_1_s16_x_untied: -+** movprfx z0, z1 -+** sub z0\.h, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_s16_x_untied, svint16_t, -+ z0 = svsub_n_s16_x (p0, z1, 1), -+ z0 = svsub_x (p0, z1, 1)) -+ -+/* -+** sub_127_s16_x: -+** sub z0\.h, z0\.h, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_127_s16_x, svint16_t, -+ z0 = svsub_n_s16_x (p0, z0, 127), -+ z0 = svsub_x (p0, z0, 127)) -+ -+/* -+** sub_128_s16_x: -+** sub z0\.h, z0\.h, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_128_s16_x, svint16_t, -+ z0 = svsub_n_s16_x (p0, z0, 128), -+ z0 = svsub_x (p0, z0, 128)) -+ -+/* -+** sub_255_s16_x: -+** sub z0\.h, z0\.h, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_255_s16_x, svint16_t, -+ z0 = svsub_n_s16_x (p0, z0, 255), -+ z0 = svsub_x (p0, z0, 255)) -+ -+/* -+** sub_256_s16_x: -+** add z0\.h, z0\.h, #65280 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_256_s16_x, svint16_t, -+ z0 = svsub_n_s16_x (p0, z0, 256), -+ z0 = svsub_x (p0, z0, 256)) -+ -+/* -+** sub_257_s16_x: -+** mov (z[0-9]+\.h), #-257 -+** add z0\.h, (z0\.h, \1|\1, z0\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_257_s16_x, svint16_t, -+ z0 = svsub_n_s16_x (p0, z0, 257), -+ z0 = svsub_x (p0, z0, 257)) -+ -+/* -+** sub_512_s16_x: -+** add z0\.h, z0\.h, #65024 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_512_s16_x, svint16_t, -+ z0 = svsub_n_s16_x (p0, z0, 512), -+ z0 = svsub_x (p0, z0, 512)) -+ -+/* -+** sub_65280_s16_x: -+** add z0\.h, z0\.h, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_65280_s16_x, svint16_t, -+ z0 = svsub_n_s16_x (p0, z0, 0xff00), -+ z0 = svsub_x (p0, z0, 0xff00)) -+ -+/* -+** sub_m1_s16_x: -+** add z0\.h, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m1_s16_x, svint16_t, -+ z0 = svsub_n_s16_x (p0, z0, -1), -+ z0 = svsub_x (p0, z0, -1)) -+ -+/* -+** sub_m127_s16_x: -+** add z0\.h, z0\.h, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m127_s16_x, svint16_t, -+ z0 = svsub_n_s16_x (p0, z0, -127), -+ z0 = svsub_x (p0, z0, -127)) -+ -+/* -+** sub_m128_s16_x: -+** add z0\.h, z0\.h, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m128_s16_x, svint16_t, -+ z0 = svsub_n_s16_x (p0, z0, -128), -+ z0 = svsub_x (p0, z0, -128)) -+ -+/* -+** sub_m255_s16_x: -+** add z0\.h, z0\.h, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m255_s16_x, svint16_t, -+ z0 = svsub_n_s16_x (p0, z0, -255), -+ z0 = svsub_x (p0, z0, -255)) -+ -+/* -+** sub_m256_s16_x: -+** add z0\.h, z0\.h, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m256_s16_x, svint16_t, -+ z0 = svsub_n_s16_x (p0, z0, -256), -+ z0 = svsub_x (p0, z0, -256)) -+ -+/* -+** sub_m257_s16_x: -+** mov (z[0-9]+)\.b, #1 -+** add z0\.h, (z0\.h, \1\.h|\1\.h, z0\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m257_s16_x, svint16_t, -+ z0 = svsub_n_s16_x (p0, z0, -257), -+ z0 = svsub_x (p0, z0, -257)) -+ -+/* -+** sub_m512_s16_x: -+** add z0\.h, z0\.h, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m512_s16_x, svint16_t, -+ z0 = svsub_n_s16_x (p0, z0, -512), -+ z0 = svsub_x (p0, z0, -512)) -+ -+/* -+** sub_m32768_s16_x: -+** add z0\.h, z0\.h, #32768 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m32768_s16_x, svint16_t, -+ z0 = svsub_n_s16_x (p0, z0, -0x8000), -+ z0 = svsub_x (p0, z0, -0x8000)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_s32.c -new file mode 100644 -index 000000000..db6f3df90 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_s32.c -@@ -0,0 +1,426 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** sub_s32_m_tied1: -+** sub z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sub_s32_m_tied1, svint32_t, -+ z0 = svsub_s32_m (p0, z0, z1), -+ z0 = svsub_m (p0, z0, z1)) -+ -+/* -+** sub_s32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** sub z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sub_s32_m_tied2, svint32_t, -+ z0 = svsub_s32_m (p0, z1, z0), -+ z0 = svsub_m (p0, z1, z0)) -+ -+/* -+** sub_s32_m_untied: -+** movprfx z0, z1 -+** sub z0\.s, p0/m, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sub_s32_m_untied, svint32_t, -+ z0 = svsub_s32_m (p0, z1, z2), -+ z0 = svsub_m (p0, z1, z2)) -+ -+/* -+** sub_w0_s32_m_tied1: -+** mov (z[0-9]+\.s), w0 -+** sub z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (sub_w0_s32_m_tied1, svint32_t, int32_t, -+ z0 = svsub_n_s32_m (p0, z0, x0), -+ z0 = svsub_m (p0, z0, x0)) -+ -+/* -+** sub_w0_s32_m_untied: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0, z1 -+** sub z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (sub_w0_s32_m_untied, svint32_t, int32_t, -+ z0 = svsub_n_s32_m (p0, z1, x0), -+ z0 = svsub_m (p0, z1, x0)) -+ -+/* -+** sub_1_s32_m_tied1: -+** mov (z[0-9]+)\.b, #-1 -+** add z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_s32_m_tied1, svint32_t, -+ z0 = svsub_n_s32_m (p0, z0, 1), -+ z0 = svsub_m (p0, z0, 1)) -+ -+/* -+** sub_1_s32_m_untied: { xfail *-*-* } -+** mov (z[0-9]+)\.b, #-1 -+** movprfx z0, z1 -+** add z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_s32_m_untied, svint32_t, -+ z0 = svsub_n_s32_m (p0, z1, 1), -+ z0 = svsub_m (p0, z1, 1)) -+ -+/* -+** sub_m2_s32_m: -+** mov (z[0-9]+\.s), #2 -+** add z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m2_s32_m, svint32_t, -+ z0 = svsub_n_s32_m (p0, z0, -2), -+ z0 = svsub_m (p0, z0, -2)) -+ -+/* -+** sub_s32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** sub z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sub_s32_z_tied1, svint32_t, -+ z0 = svsub_s32_z (p0, z0, z1), -+ z0 = svsub_z (p0, z0, z1)) -+ -+/* -+** sub_s32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** subr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sub_s32_z_tied2, svint32_t, -+ z0 = svsub_s32_z (p0, z1, z0), -+ z0 = svsub_z (p0, z1, z0)) -+ -+/* -+** sub_s32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** sub z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** subr z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_s32_z_untied, svint32_t, -+ z0 = svsub_s32_z (p0, z1, z2), -+ z0 = svsub_z (p0, z1, z2)) -+ -+/* -+** sub_w0_s32_z_tied1: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** sub z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (sub_w0_s32_z_tied1, svint32_t, int32_t, -+ z0 = svsub_n_s32_z (p0, z0, x0), -+ z0 = svsub_z (p0, z0, x0)) -+ -+/* -+** sub_w0_s32_z_untied: -+** mov (z[0-9]+\.s), w0 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** sub z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** subr z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (sub_w0_s32_z_untied, svint32_t, int32_t, -+ z0 = svsub_n_s32_z (p0, z1, x0), -+ z0 = svsub_z (p0, z1, x0)) -+ -+/* -+** sub_1_s32_z_tied1: -+** mov (z[0-9]+)\.b, #-1 -+** movprfx z0\.s, p0/z, z0\.s -+** add z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_s32_z_tied1, svint32_t, -+ z0 = svsub_n_s32_z (p0, z0, 1), -+ z0 = svsub_z (p0, z0, 1)) -+ -+/* -+** sub_1_s32_z_untied: -+** mov (z[0-9]+)\.b, #-1 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** add z0\.s, p0/m, z0\.s, \1\.s -+** | -+** movprfx z0\.s, p0/z, \1\.s -+** add z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_s32_z_untied, svint32_t, -+ z0 = svsub_n_s32_z (p0, z1, 1), -+ z0 = svsub_z (p0, z1, 1)) -+ -+/* -+** sub_s32_x_tied1: -+** sub z0\.s, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sub_s32_x_tied1, svint32_t, -+ z0 = svsub_s32_x (p0, z0, z1), -+ z0 = svsub_x (p0, z0, z1)) -+ -+/* -+** sub_s32_x_tied2: -+** sub z0\.s, z1\.s, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sub_s32_x_tied2, svint32_t, -+ z0 = svsub_s32_x (p0, z1, z0), -+ z0 = svsub_x (p0, z1, z0)) -+ -+/* -+** sub_s32_x_untied: -+** sub z0\.s, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sub_s32_x_untied, svint32_t, -+ z0 = svsub_s32_x (p0, z1, z2), -+ z0 = svsub_x (p0, z1, z2)) -+ -+/* -+** sub_w0_s32_x_tied1: -+** mov (z[0-9]+\.s), w0 -+** sub z0\.s, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (sub_w0_s32_x_tied1, svint32_t, int32_t, -+ z0 = svsub_n_s32_x (p0, z0, x0), -+ z0 = svsub_x (p0, z0, x0)) -+ -+/* -+** sub_w0_s32_x_untied: -+** mov (z[0-9]+\.s), w0 -+** sub z0\.s, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (sub_w0_s32_x_untied, svint32_t, int32_t, -+ z0 = svsub_n_s32_x (p0, z1, x0), -+ z0 = svsub_x (p0, z1, x0)) -+ -+/* -+** sub_1_s32_x_tied1: -+** sub z0\.s, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_s32_x_tied1, svint32_t, -+ z0 = svsub_n_s32_x (p0, z0, 1), -+ z0 = svsub_x (p0, z0, 1)) -+ -+/* -+** sub_1_s32_x_untied: -+** movprfx z0, z1 -+** sub z0\.s, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_s32_x_untied, svint32_t, -+ z0 = svsub_n_s32_x (p0, z1, 1), -+ z0 = svsub_x (p0, z1, 1)) -+ -+/* -+** sub_127_s32_x: -+** sub z0\.s, z0\.s, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_127_s32_x, svint32_t, -+ z0 = svsub_n_s32_x (p0, z0, 127), -+ z0 = svsub_x (p0, z0, 127)) -+ -+/* -+** sub_128_s32_x: -+** sub z0\.s, z0\.s, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_128_s32_x, svint32_t, -+ z0 = svsub_n_s32_x (p0, z0, 128), -+ z0 = svsub_x (p0, z0, 128)) -+ -+/* -+** sub_255_s32_x: -+** sub z0\.s, z0\.s, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_255_s32_x, svint32_t, -+ z0 = svsub_n_s32_x (p0, z0, 255), -+ z0 = svsub_x (p0, z0, 255)) -+ -+/* -+** sub_256_s32_x: -+** sub z0\.s, z0\.s, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_256_s32_x, svint32_t, -+ z0 = svsub_n_s32_x (p0, z0, 256), -+ z0 = svsub_x (p0, z0, 256)) -+ -+/* -+** sub_511_s32_x: -+** mov (z[0-9]+\.s), #-511 -+** add z0\.s, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_511_s32_x, svint32_t, -+ z0 = svsub_n_s32_x (p0, z0, 511), -+ z0 = svsub_x (p0, z0, 511)) -+ -+/* -+** sub_512_s32_x: -+** sub z0\.s, z0\.s, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_512_s32_x, svint32_t, -+ z0 = svsub_n_s32_x (p0, z0, 512), -+ z0 = svsub_x (p0, z0, 512)) -+ -+/* -+** sub_65280_s32_x: -+** sub z0\.s, z0\.s, #65280 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_65280_s32_x, svint32_t, -+ z0 = svsub_n_s32_x (p0, z0, 0xff00), -+ z0 = svsub_x (p0, z0, 0xff00)) -+ -+/* -+** sub_65535_s32_x: -+** mov (z[0-9]+\.s), #-65535 -+** add z0\.s, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_65535_s32_x, svint32_t, -+ z0 = svsub_n_s32_x (p0, z0, 65535), -+ z0 = svsub_x (p0, z0, 65535)) -+ -+/* -+** sub_65536_s32_x: -+** mov (z[0-9]+\.s), #-65536 -+** add z0\.s, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_65536_s32_x, svint32_t, -+ z0 = svsub_n_s32_x (p0, z0, 65536), -+ z0 = svsub_x (p0, z0, 65536)) -+ -+/* -+** sub_m1_s32_x: -+** add z0\.s, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m1_s32_x, svint32_t, -+ z0 = svsub_n_s32_x (p0, z0, -1), -+ z0 = svsub_x (p0, z0, -1)) -+ -+/* -+** sub_m127_s32_x: -+** add z0\.s, z0\.s, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m127_s32_x, svint32_t, -+ z0 = svsub_n_s32_x (p0, z0, -127), -+ z0 = svsub_x (p0, z0, -127)) -+ -+/* -+** sub_m128_s32_x: -+** add z0\.s, z0\.s, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m128_s32_x, svint32_t, -+ z0 = svsub_n_s32_x (p0, z0, -128), -+ z0 = svsub_x (p0, z0, -128)) -+ -+/* -+** sub_m255_s32_x: -+** add z0\.s, z0\.s, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m255_s32_x, svint32_t, -+ z0 = svsub_n_s32_x (p0, z0, -255), -+ z0 = svsub_x (p0, z0, -255)) -+ -+/* -+** sub_m256_s32_x: -+** add z0\.s, z0\.s, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m256_s32_x, svint32_t, -+ z0 = svsub_n_s32_x (p0, z0, -256), -+ z0 = svsub_x (p0, z0, -256)) -+ -+/* -+** sub_m511_s32_x: -+** mov (z[0-9]+\.s), #511 -+** add z0\.s, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m511_s32_x, svint32_t, -+ z0 = svsub_n_s32_x (p0, z0, -511), -+ z0 = svsub_x (p0, z0, -511)) -+ -+/* -+** sub_m512_s32_x: -+** add z0\.s, z0\.s, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m512_s32_x, svint32_t, -+ z0 = svsub_n_s32_x (p0, z0, -512), -+ z0 = svsub_x (p0, z0, -512)) -+ -+/* -+** sub_m32768_s32_x: -+** add z0\.s, z0\.s, #32768 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m32768_s32_x, svint32_t, -+ z0 = svsub_n_s32_x (p0, z0, -0x8000), -+ z0 = svsub_x (p0, z0, -0x8000)) -+ -+/* -+** sub_m65280_s32_x: -+** add z0\.s, z0\.s, #65280 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m65280_s32_x, svint32_t, -+ z0 = svsub_n_s32_x (p0, z0, -0xff00), -+ z0 = svsub_x (p0, z0, -0xff00)) -+ -+/* -+** sub_m65535_s32_x: -+** mov (z[0-9]+\.s), #65535 -+** add z0\.s, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m65535_s32_x, svint32_t, -+ z0 = svsub_n_s32_x (p0, z0, -65535), -+ z0 = svsub_x (p0, z0, -65535)) -+ -+/* -+** sub_m65536_s32_x: -+** mov (z[0-9]+\.s), #65536 -+** add z0\.s, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m65536_s32_x, svint32_t, -+ z0 = svsub_n_s32_x (p0, z0, -65536), -+ z0 = svsub_x (p0, z0, -65536)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_s64.c -new file mode 100644 -index 000000000..b9184c3a8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_s64.c -@@ -0,0 +1,426 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** sub_s64_m_tied1: -+** sub z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sub_s64_m_tied1, svint64_t, -+ z0 = svsub_s64_m (p0, z0, z1), -+ z0 = svsub_m (p0, z0, z1)) -+ -+/* -+** sub_s64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** sub z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_s64_m_tied2, svint64_t, -+ z0 = svsub_s64_m (p0, z1, z0), -+ z0 = svsub_m (p0, z1, z0)) -+ -+/* -+** sub_s64_m_untied: -+** movprfx z0, z1 -+** sub z0\.d, p0/m, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sub_s64_m_untied, svint64_t, -+ z0 = svsub_s64_m (p0, z1, z2), -+ z0 = svsub_m (p0, z1, z2)) -+ -+/* -+** sub_x0_s64_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** sub z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (sub_x0_s64_m_tied1, svint64_t, int64_t, -+ z0 = svsub_n_s64_m (p0, z0, x0), -+ z0 = svsub_m (p0, z0, x0)) -+ -+/* -+** sub_x0_s64_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** sub z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (sub_x0_s64_m_untied, svint64_t, int64_t, -+ z0 = svsub_n_s64_m (p0, z1, x0), -+ z0 = svsub_m (p0, z1, x0)) -+ -+/* -+** sub_1_s64_m_tied1: -+** mov (z[0-9]+)\.b, #-1 -+** add z0\.d, p0/m, z0\.d, \1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_s64_m_tied1, svint64_t, -+ z0 = svsub_n_s64_m (p0, z0, 1), -+ z0 = svsub_m (p0, z0, 1)) -+ -+/* -+** sub_1_s64_m_untied: { xfail *-*-* } -+** mov (z[0-9]+)\.b, #-1 -+** movprfx z0, z1 -+** add z0\.d, p0/m, z0\.d, \1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_s64_m_untied, svint64_t, -+ z0 = svsub_n_s64_m (p0, z1, 1), -+ z0 = svsub_m (p0, z1, 1)) -+ -+/* -+** sub_m2_s64_m: -+** mov (z[0-9]+\.d), #2 -+** add z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m2_s64_m, svint64_t, -+ z0 = svsub_n_s64_m (p0, z0, -2), -+ z0 = svsub_m (p0, z0, -2)) -+ -+/* -+** sub_s64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** sub z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sub_s64_z_tied1, svint64_t, -+ z0 = svsub_s64_z (p0, z0, z1), -+ z0 = svsub_z (p0, z0, z1)) -+ -+/* -+** sub_s64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** subr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sub_s64_z_tied2, svint64_t, -+ z0 = svsub_s64_z (p0, z1, z0), -+ z0 = svsub_z (p0, z1, z0)) -+ -+/* -+** sub_s64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** sub z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** subr z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_s64_z_untied, svint64_t, -+ z0 = svsub_s64_z (p0, z1, z2), -+ z0 = svsub_z (p0, z1, z2)) -+ -+/* -+** sub_x0_s64_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** sub z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (sub_x0_s64_z_tied1, svint64_t, int64_t, -+ z0 = svsub_n_s64_z (p0, z0, x0), -+ z0 = svsub_z (p0, z0, x0)) -+ -+/* -+** sub_x0_s64_z_untied: -+** mov (z[0-9]+\.d), x0 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** sub z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** subr z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (sub_x0_s64_z_untied, svint64_t, int64_t, -+ z0 = svsub_n_s64_z (p0, z1, x0), -+ z0 = svsub_z (p0, z1, x0)) -+ -+/* -+** sub_1_s64_z_tied1: -+** mov (z[0-9]+)\.b, #-1 -+** movprfx z0\.d, p0/z, z0\.d -+** add z0\.d, p0/m, z0\.d, \1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_s64_z_tied1, svint64_t, -+ z0 = svsub_n_s64_z (p0, z0, 1), -+ z0 = svsub_z (p0, z0, 1)) -+ -+/* -+** sub_1_s64_z_untied: -+** mov (z[0-9]+)\.b, #-1 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** add z0\.d, p0/m, z0\.d, \1\.d -+** | -+** movprfx z0\.d, p0/z, \1\.d -+** add z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_s64_z_untied, svint64_t, -+ z0 = svsub_n_s64_z (p0, z1, 1), -+ z0 = svsub_z (p0, z1, 1)) -+ -+/* -+** sub_s64_x_tied1: -+** sub z0\.d, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sub_s64_x_tied1, svint64_t, -+ z0 = svsub_s64_x (p0, z0, z1), -+ z0 = svsub_x (p0, z0, z1)) -+ -+/* -+** sub_s64_x_tied2: -+** sub z0\.d, z1\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sub_s64_x_tied2, svint64_t, -+ z0 = svsub_s64_x (p0, z1, z0), -+ z0 = svsub_x (p0, z1, z0)) -+ -+/* -+** sub_s64_x_untied: -+** sub z0\.d, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sub_s64_x_untied, svint64_t, -+ z0 = svsub_s64_x (p0, z1, z2), -+ z0 = svsub_x (p0, z1, z2)) -+ -+/* -+** sub_x0_s64_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** sub z0\.d, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (sub_x0_s64_x_tied1, svint64_t, int64_t, -+ z0 = svsub_n_s64_x (p0, z0, x0), -+ z0 = svsub_x (p0, z0, x0)) -+ -+/* -+** sub_x0_s64_x_untied: -+** mov (z[0-9]+\.d), x0 -+** sub z0\.d, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (sub_x0_s64_x_untied, svint64_t, int64_t, -+ z0 = svsub_n_s64_x (p0, z1, x0), -+ z0 = svsub_x (p0, z1, x0)) -+ -+/* -+** sub_1_s64_x_tied1: -+** sub z0\.d, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_s64_x_tied1, svint64_t, -+ z0 = svsub_n_s64_x (p0, z0, 1), -+ z0 = svsub_x (p0, z0, 1)) -+ -+/* -+** sub_1_s64_x_untied: -+** movprfx z0, z1 -+** sub z0\.d, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_s64_x_untied, svint64_t, -+ z0 = svsub_n_s64_x (p0, z1, 1), -+ z0 = svsub_x (p0, z1, 1)) -+ -+/* -+** sub_127_s64_x: -+** sub z0\.d, z0\.d, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_127_s64_x, svint64_t, -+ z0 = svsub_n_s64_x (p0, z0, 127), -+ z0 = svsub_x (p0, z0, 127)) -+ -+/* -+** sub_128_s64_x: -+** sub z0\.d, z0\.d, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_128_s64_x, svint64_t, -+ z0 = svsub_n_s64_x (p0, z0, 128), -+ z0 = svsub_x (p0, z0, 128)) -+ -+/* -+** sub_255_s64_x: -+** sub z0\.d, z0\.d, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_255_s64_x, svint64_t, -+ z0 = svsub_n_s64_x (p0, z0, 255), -+ z0 = svsub_x (p0, z0, 255)) -+ -+/* -+** sub_256_s64_x: -+** sub z0\.d, z0\.d, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_256_s64_x, svint64_t, -+ z0 = svsub_n_s64_x (p0, z0, 256), -+ z0 = svsub_x (p0, z0, 256)) -+ -+/* -+** sub_511_s64_x: -+** mov (z[0-9]+\.d), #-511 -+** add z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_511_s64_x, svint64_t, -+ z0 = svsub_n_s64_x (p0, z0, 511), -+ z0 = svsub_x (p0, z0, 511)) -+ -+/* -+** sub_512_s64_x: -+** sub z0\.d, z0\.d, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_512_s64_x, svint64_t, -+ z0 = svsub_n_s64_x (p0, z0, 512), -+ z0 = svsub_x (p0, z0, 512)) -+ -+/* -+** sub_65280_s64_x: -+** sub z0\.d, z0\.d, #65280 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_65280_s64_x, svint64_t, -+ z0 = svsub_n_s64_x (p0, z0, 0xff00), -+ z0 = svsub_x (p0, z0, 0xff00)) -+ -+/* -+** sub_65535_s64_x: -+** mov (z[0-9]+\.d), #-65535 -+** add z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_65535_s64_x, svint64_t, -+ z0 = svsub_n_s64_x (p0, z0, 65535), -+ z0 = svsub_x (p0, z0, 65535)) -+ -+/* -+** sub_65536_s64_x: -+** mov (z[0-9]+\.d), #-65536 -+** add z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_65536_s64_x, svint64_t, -+ z0 = svsub_n_s64_x (p0, z0, 65536), -+ z0 = svsub_x (p0, z0, 65536)) -+ -+/* -+** sub_m1_s64_x: -+** add z0\.d, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m1_s64_x, svint64_t, -+ z0 = svsub_n_s64_x (p0, z0, -1), -+ z0 = svsub_x (p0, z0, -1)) -+ -+/* -+** sub_m127_s64_x: -+** add z0\.d, z0\.d, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m127_s64_x, svint64_t, -+ z0 = svsub_n_s64_x (p0, z0, -127), -+ z0 = svsub_x (p0, z0, -127)) -+ -+/* -+** sub_m128_s64_x: -+** add z0\.d, z0\.d, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m128_s64_x, svint64_t, -+ z0 = svsub_n_s64_x (p0, z0, -128), -+ z0 = svsub_x (p0, z0, -128)) -+ -+/* -+** sub_m255_s64_x: -+** add z0\.d, z0\.d, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m255_s64_x, svint64_t, -+ z0 = svsub_n_s64_x (p0, z0, -255), -+ z0 = svsub_x (p0, z0, -255)) -+ -+/* -+** sub_m256_s64_x: -+** add z0\.d, z0\.d, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m256_s64_x, svint64_t, -+ z0 = svsub_n_s64_x (p0, z0, -256), -+ z0 = svsub_x (p0, z0, -256)) -+ -+/* -+** sub_m511_s64_x: -+** mov (z[0-9]+\.d), #511 -+** add z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m511_s64_x, svint64_t, -+ z0 = svsub_n_s64_x (p0, z0, -511), -+ z0 = svsub_x (p0, z0, -511)) -+ -+/* -+** sub_m512_s64_x: -+** add z0\.d, z0\.d, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m512_s64_x, svint64_t, -+ z0 = svsub_n_s64_x (p0, z0, -512), -+ z0 = svsub_x (p0, z0, -512)) -+ -+/* -+** sub_m32768_s64_x: -+** add z0\.d, z0\.d, #32768 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m32768_s64_x, svint64_t, -+ z0 = svsub_n_s64_x (p0, z0, -0x8000), -+ z0 = svsub_x (p0, z0, -0x8000)) -+ -+/* -+** sub_m65280_s64_x: -+** add z0\.d, z0\.d, #65280 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m65280_s64_x, svint64_t, -+ z0 = svsub_n_s64_x (p0, z0, -0xff00), -+ z0 = svsub_x (p0, z0, -0xff00)) -+ -+/* -+** sub_m65535_s64_x: -+** mov (z[0-9]+\.d), #65535 -+** add z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m65535_s64_x, svint64_t, -+ z0 = svsub_n_s64_x (p0, z0, -65535), -+ z0 = svsub_x (p0, z0, -65535)) -+ -+/* -+** sub_m65536_s64_x: -+** mov (z[0-9]+\.d), #65536 -+** add z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m65536_s64_x, svint64_t, -+ z0 = svsub_n_s64_x (p0, z0, -65536), -+ z0 = svsub_x (p0, z0, -65536)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_s8.c -new file mode 100644 -index 000000000..0d7ba99aa ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_s8.c -@@ -0,0 +1,294 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** sub_s8_m_tied1: -+** sub z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (sub_s8_m_tied1, svint8_t, -+ z0 = svsub_s8_m (p0, z0, z1), -+ z0 = svsub_m (p0, z0, z1)) -+ -+/* -+** sub_s8_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** sub z0\.b, p0/m, z0\.b, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (sub_s8_m_tied2, svint8_t, -+ z0 = svsub_s8_m (p0, z1, z0), -+ z0 = svsub_m (p0, z1, z0)) -+ -+/* -+** sub_s8_m_untied: -+** movprfx z0, z1 -+** sub z0\.b, p0/m, z0\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (sub_s8_m_untied, svint8_t, -+ z0 = svsub_s8_m (p0, z1, z2), -+ z0 = svsub_m (p0, z1, z2)) -+ -+/* -+** sub_w0_s8_m_tied1: -+** mov (z[0-9]+\.b), w0 -+** sub z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (sub_w0_s8_m_tied1, svint8_t, int8_t, -+ z0 = svsub_n_s8_m (p0, z0, x0), -+ z0 = svsub_m (p0, z0, x0)) -+ -+/* -+** sub_w0_s8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), w0 -+** movprfx z0, z1 -+** sub z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (sub_w0_s8_m_untied, svint8_t, int8_t, -+ z0 = svsub_n_s8_m (p0, z1, x0), -+ z0 = svsub_m (p0, z1, x0)) -+ -+/* -+** sub_1_s8_m_tied1: -+** mov (z[0-9]+\.b), #-1 -+** add z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_s8_m_tied1, svint8_t, -+ z0 = svsub_n_s8_m (p0, z0, 1), -+ z0 = svsub_m (p0, z0, 1)) -+ -+/* -+** sub_1_s8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), #-1 -+** movprfx z0, z1 -+** add z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_s8_m_untied, svint8_t, -+ z0 = svsub_n_s8_m (p0, z1, 1), -+ z0 = svsub_m (p0, z1, 1)) -+ -+/* -+** sub_m1_s8_m: -+** mov (z[0-9]+\.b), #1 -+** add z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m1_s8_m, svint8_t, -+ z0 = svsub_n_s8_m (p0, z0, -1), -+ z0 = svsub_m (p0, z0, -1)) -+ -+/* -+** sub_s8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** sub z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (sub_s8_z_tied1, svint8_t, -+ z0 = svsub_s8_z (p0, z0, z1), -+ z0 = svsub_z (p0, z0, z1)) -+ -+/* -+** sub_s8_z_tied2: -+** movprfx z0\.b, p0/z, z0\.b -+** subr z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (sub_s8_z_tied2, svint8_t, -+ z0 = svsub_s8_z (p0, z1, z0), -+ z0 = svsub_z (p0, z1, z0)) -+ -+/* -+** sub_s8_z_untied: -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** sub z0\.b, p0/m, z0\.b, z2\.b -+** | -+** movprfx z0\.b, p0/z, z2\.b -+** subr z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_s8_z_untied, svint8_t, -+ z0 = svsub_s8_z (p0, z1, z2), -+ z0 = svsub_z (p0, z1, z2)) -+ -+/* -+** sub_w0_s8_z_tied1: -+** mov (z[0-9]+\.b), w0 -+** movprfx z0\.b, p0/z, z0\.b -+** sub z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (sub_w0_s8_z_tied1, svint8_t, int8_t, -+ z0 = svsub_n_s8_z (p0, z0, x0), -+ z0 = svsub_z (p0, z0, x0)) -+ -+/* -+** sub_w0_s8_z_untied: -+** mov (z[0-9]+\.b), w0 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** sub z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** subr z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (sub_w0_s8_z_untied, svint8_t, int8_t, -+ z0 = svsub_n_s8_z (p0, z1, x0), -+ z0 = svsub_z (p0, z1, x0)) -+ -+/* -+** sub_1_s8_z_tied1: -+** mov (z[0-9]+\.b), #-1 -+** movprfx z0\.b, p0/z, z0\.b -+** add z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_s8_z_tied1, svint8_t, -+ z0 = svsub_n_s8_z (p0, z0, 1), -+ z0 = svsub_z (p0, z0, 1)) -+ -+/* -+** sub_1_s8_z_untied: -+** mov (z[0-9]+\.b), #-1 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** add z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** add z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_s8_z_untied, svint8_t, -+ z0 = svsub_n_s8_z (p0, z1, 1), -+ z0 = svsub_z (p0, z1, 1)) -+ -+/* -+** sub_s8_x_tied1: -+** sub z0\.b, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (sub_s8_x_tied1, svint8_t, -+ z0 = svsub_s8_x (p0, z0, z1), -+ z0 = svsub_x (p0, z0, z1)) -+ -+/* -+** sub_s8_x_tied2: -+** sub z0\.b, z1\.b, z0\.b -+** ret -+*/ -+TEST_UNIFORM_Z (sub_s8_x_tied2, svint8_t, -+ z0 = svsub_s8_x (p0, z1, z0), -+ z0 = svsub_x (p0, z1, z0)) -+ -+/* -+** sub_s8_x_untied: -+** sub z0\.b, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (sub_s8_x_untied, svint8_t, -+ z0 = svsub_s8_x (p0, z1, z2), -+ z0 = svsub_x (p0, z1, z2)) -+ -+/* -+** sub_w0_s8_x_tied1: -+** mov (z[0-9]+\.b), w0 -+** sub z0\.b, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (sub_w0_s8_x_tied1, svint8_t, int8_t, -+ z0 = svsub_n_s8_x (p0, z0, x0), -+ z0 = svsub_x (p0, z0, x0)) -+ -+/* -+** sub_w0_s8_x_untied: -+** mov (z[0-9]+\.b), w0 -+** sub z0\.b, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (sub_w0_s8_x_untied, svint8_t, int8_t, -+ z0 = svsub_n_s8_x (p0, z1, x0), -+ z0 = svsub_x (p0, z1, x0)) -+ -+/* -+** sub_1_s8_x_tied1: -+** add z0\.b, z0\.b, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_s8_x_tied1, svint8_t, -+ z0 = svsub_n_s8_x (p0, z0, 1), -+ z0 = svsub_x (p0, z0, 1)) -+ -+/* -+** sub_1_s8_x_untied: -+** movprfx z0, z1 -+** add z0\.b, z0\.b, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_s8_x_untied, svint8_t, -+ z0 = svsub_n_s8_x (p0, z1, 1), -+ z0 = svsub_x (p0, z1, 1)) -+ -+/* -+** sub_127_s8_x: -+** add z0\.b, z0\.b, #129 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_127_s8_x, svint8_t, -+ z0 = svsub_n_s8_x (p0, z0, 127), -+ z0 = svsub_x (p0, z0, 127)) -+ -+/* -+** sub_128_s8_x: -+** add z0\.b, z0\.b, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_128_s8_x, svint8_t, -+ z0 = svsub_n_s8_x (p0, z0, 128), -+ z0 = svsub_x (p0, z0, 128)) -+ -+/* -+** sub_255_s8_x: -+** add z0\.b, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_255_s8_x, svint8_t, -+ z0 = svsub_n_s8_x (p0, z0, 255), -+ z0 = svsub_x (p0, z0, 255)) -+ -+/* -+** sub_m1_s8_x: -+** add z0\.b, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m1_s8_x, svint8_t, -+ z0 = svsub_n_s8_x (p0, z0, -1), -+ z0 = svsub_x (p0, z0, -1)) -+ -+/* -+** sub_m127_s8_x: -+** add z0\.b, z0\.b, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m127_s8_x, svint8_t, -+ z0 = svsub_n_s8_x (p0, z0, -127), -+ z0 = svsub_x (p0, z0, -127)) -+ -+/* -+** sub_m128_s8_x: -+** add z0\.b, z0\.b, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m128_s8_x, svint8_t, -+ z0 = svsub_n_s8_x (p0, z0, -128), -+ z0 = svsub_x (p0, z0, -128)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_u16.c -new file mode 100644 -index 000000000..89620e159 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_u16.c -@@ -0,0 +1,377 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** sub_u16_m_tied1: -+** sub z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sub_u16_m_tied1, svuint16_t, -+ z0 = svsub_u16_m (p0, z0, z1), -+ z0 = svsub_m (p0, z0, z1)) -+ -+/* -+** sub_u16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** sub z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sub_u16_m_tied2, svuint16_t, -+ z0 = svsub_u16_m (p0, z1, z0), -+ z0 = svsub_m (p0, z1, z0)) -+ -+/* -+** sub_u16_m_untied: -+** movprfx z0, z1 -+** sub z0\.h, p0/m, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sub_u16_m_untied, svuint16_t, -+ z0 = svsub_u16_m (p0, z1, z2), -+ z0 = svsub_m (p0, z1, z2)) -+ -+/* -+** sub_w0_u16_m_tied1: -+** mov (z[0-9]+\.h), w0 -+** sub z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (sub_w0_u16_m_tied1, svuint16_t, uint16_t, -+ z0 = svsub_n_u16_m (p0, z0, x0), -+ z0 = svsub_m (p0, z0, x0)) -+ -+/* -+** sub_w0_u16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), w0 -+** movprfx z0, z1 -+** sub z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (sub_w0_u16_m_untied, svuint16_t, uint16_t, -+ z0 = svsub_n_u16_m (p0, z1, x0), -+ z0 = svsub_m (p0, z1, x0)) -+ -+/* -+** sub_1_u16_m_tied1: -+** mov (z[0-9]+)\.b, #-1 -+** add z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_u16_m_tied1, svuint16_t, -+ z0 = svsub_n_u16_m (p0, z0, 1), -+ z0 = svsub_m (p0, z0, 1)) -+ -+/* -+** sub_1_u16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+)\.b, #-1 -+** movprfx z0, z1 -+** add z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_u16_m_untied, svuint16_t, -+ z0 = svsub_n_u16_m (p0, z1, 1), -+ z0 = svsub_m (p0, z1, 1)) -+ -+/* -+** sub_m2_u16_m: -+** mov (z[0-9]+\.h), #2 -+** add z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m2_u16_m, svuint16_t, -+ z0 = svsub_n_u16_m (p0, z0, -2), -+ z0 = svsub_m (p0, z0, -2)) -+ -+/* -+** sub_u16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** sub z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sub_u16_z_tied1, svuint16_t, -+ z0 = svsub_u16_z (p0, z0, z1), -+ z0 = svsub_z (p0, z0, z1)) -+ -+/* -+** sub_u16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** subr z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sub_u16_z_tied2, svuint16_t, -+ z0 = svsub_u16_z (p0, z1, z0), -+ z0 = svsub_z (p0, z1, z0)) -+ -+/* -+** sub_u16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** sub z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** subr z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_u16_z_untied, svuint16_t, -+ z0 = svsub_u16_z (p0, z1, z2), -+ z0 = svsub_z (p0, z1, z2)) -+ -+/* -+** sub_w0_u16_z_tied1: -+** mov (z[0-9]+\.h), w0 -+** movprfx z0\.h, p0/z, z0\.h -+** sub z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (sub_w0_u16_z_tied1, svuint16_t, uint16_t, -+ z0 = svsub_n_u16_z (p0, z0, x0), -+ z0 = svsub_z (p0, z0, x0)) -+ -+/* -+** sub_w0_u16_z_untied: -+** mov (z[0-9]+\.h), w0 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** sub z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** subr z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (sub_w0_u16_z_untied, svuint16_t, uint16_t, -+ z0 = svsub_n_u16_z (p0, z1, x0), -+ z0 = svsub_z (p0, z1, x0)) -+ -+/* -+** sub_1_u16_z_tied1: -+** mov (z[0-9]+)\.b, #-1 -+** movprfx z0\.h, p0/z, z0\.h -+** add z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_u16_z_tied1, svuint16_t, -+ z0 = svsub_n_u16_z (p0, z0, 1), -+ z0 = svsub_z (p0, z0, 1)) -+ -+/* -+** sub_1_u16_z_untied: -+** mov (z[0-9]+)\.b, #-1 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** add z0\.h, p0/m, z0\.h, \1\.h -+** | -+** movprfx z0\.h, p0/z, \1\.h -+** add z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_u16_z_untied, svuint16_t, -+ z0 = svsub_n_u16_z (p0, z1, 1), -+ z0 = svsub_z (p0, z1, 1)) -+ -+/* -+** sub_u16_x_tied1: -+** sub z0\.h, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sub_u16_x_tied1, svuint16_t, -+ z0 = svsub_u16_x (p0, z0, z1), -+ z0 = svsub_x (p0, z0, z1)) -+ -+/* -+** sub_u16_x_tied2: -+** sub z0\.h, z1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sub_u16_x_tied2, svuint16_t, -+ z0 = svsub_u16_x (p0, z1, z0), -+ z0 = svsub_x (p0, z1, z0)) -+ -+/* -+** sub_u16_x_untied: -+** sub z0\.h, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (sub_u16_x_untied, svuint16_t, -+ z0 = svsub_u16_x (p0, z1, z2), -+ z0 = svsub_x (p0, z1, z2)) -+ -+/* -+** sub_w0_u16_x_tied1: -+** mov (z[0-9]+\.h), w0 -+** sub z0\.h, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (sub_w0_u16_x_tied1, svuint16_t, uint16_t, -+ z0 = svsub_n_u16_x (p0, z0, x0), -+ z0 = svsub_x (p0, z0, x0)) -+ -+/* -+** sub_w0_u16_x_untied: -+** mov (z[0-9]+\.h), w0 -+** sub z0\.h, z1\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (sub_w0_u16_x_untied, svuint16_t, uint16_t, -+ z0 = svsub_n_u16_x (p0, z1, x0), -+ z0 = svsub_x (p0, z1, x0)) -+ -+/* -+** sub_1_u16_x_tied1: -+** sub z0\.h, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_u16_x_tied1, svuint16_t, -+ z0 = svsub_n_u16_x (p0, z0, 1), -+ z0 = svsub_x (p0, z0, 1)) -+ -+/* -+** sub_1_u16_x_untied: -+** movprfx z0, z1 -+** sub z0\.h, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_u16_x_untied, svuint16_t, -+ z0 = svsub_n_u16_x (p0, z1, 1), -+ z0 = svsub_x (p0, z1, 1)) -+ -+/* -+** sub_127_u16_x: -+** sub z0\.h, z0\.h, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_127_u16_x, svuint16_t, -+ z0 = svsub_n_u16_x (p0, z0, 127), -+ z0 = svsub_x (p0, z0, 127)) -+ -+/* -+** sub_128_u16_x: -+** sub z0\.h, z0\.h, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_128_u16_x, svuint16_t, -+ z0 = svsub_n_u16_x (p0, z0, 128), -+ z0 = svsub_x (p0, z0, 128)) -+ -+/* -+** sub_255_u16_x: -+** sub z0\.h, z0\.h, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_255_u16_x, svuint16_t, -+ z0 = svsub_n_u16_x (p0, z0, 255), -+ z0 = svsub_x (p0, z0, 255)) -+ -+/* -+** sub_256_u16_x: -+** add z0\.h, z0\.h, #65280 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_256_u16_x, svuint16_t, -+ z0 = svsub_n_u16_x (p0, z0, 256), -+ z0 = svsub_x (p0, z0, 256)) -+ -+/* -+** sub_257_u16_x: -+** mov (z[0-9]+\.h), #-257 -+** add z0\.h, (z0\.h, \1|\1, z0\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_257_u16_x, svuint16_t, -+ z0 = svsub_n_u16_x (p0, z0, 257), -+ z0 = svsub_x (p0, z0, 257)) -+ -+/* -+** sub_512_u16_x: -+** add z0\.h, z0\.h, #65024 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_512_u16_x, svuint16_t, -+ z0 = svsub_n_u16_x (p0, z0, 512), -+ z0 = svsub_x (p0, z0, 512)) -+ -+/* -+** sub_65280_u16_x: -+** add z0\.h, z0\.h, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_65280_u16_x, svuint16_t, -+ z0 = svsub_n_u16_x (p0, z0, 0xff00), -+ z0 = svsub_x (p0, z0, 0xff00)) -+ -+/* -+** sub_m1_u16_x: -+** add z0\.h, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m1_u16_x, svuint16_t, -+ z0 = svsub_n_u16_x (p0, z0, -1), -+ z0 = svsub_x (p0, z0, -1)) -+ -+/* -+** sub_m127_u16_x: -+** add z0\.h, z0\.h, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m127_u16_x, svuint16_t, -+ z0 = svsub_n_u16_x (p0, z0, -127), -+ z0 = svsub_x (p0, z0, -127)) -+ -+/* -+** sub_m128_u16_x: -+** add z0\.h, z0\.h, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m128_u16_x, svuint16_t, -+ z0 = svsub_n_u16_x (p0, z0, -128), -+ z0 = svsub_x (p0, z0, -128)) -+ -+/* -+** sub_m255_u16_x: -+** add z0\.h, z0\.h, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m255_u16_x, svuint16_t, -+ z0 = svsub_n_u16_x (p0, z0, -255), -+ z0 = svsub_x (p0, z0, -255)) -+ -+/* -+** sub_m256_u16_x: -+** add z0\.h, z0\.h, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m256_u16_x, svuint16_t, -+ z0 = svsub_n_u16_x (p0, z0, -256), -+ z0 = svsub_x (p0, z0, -256)) -+ -+/* -+** sub_m257_u16_x: -+** mov (z[0-9]+)\.b, #1 -+** add z0\.h, (z0\.h, \1\.h|\1\.h, z0\.h) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m257_u16_x, svuint16_t, -+ z0 = svsub_n_u16_x (p0, z0, -257), -+ z0 = svsub_x (p0, z0, -257)) -+ -+/* -+** sub_m512_u16_x: -+** add z0\.h, z0\.h, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m512_u16_x, svuint16_t, -+ z0 = svsub_n_u16_x (p0, z0, -512), -+ z0 = svsub_x (p0, z0, -512)) -+ -+/* -+** sub_m32768_u16_x: -+** add z0\.h, z0\.h, #32768 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m32768_u16_x, svuint16_t, -+ z0 = svsub_n_u16_x (p0, z0, -0x8000), -+ z0 = svsub_x (p0, z0, -0x8000)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_u32.c -new file mode 100644 -index 000000000..c4b405d4d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_u32.c -@@ -0,0 +1,426 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** sub_u32_m_tied1: -+** sub z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sub_u32_m_tied1, svuint32_t, -+ z0 = svsub_u32_m (p0, z0, z1), -+ z0 = svsub_m (p0, z0, z1)) -+ -+/* -+** sub_u32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** sub z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sub_u32_m_tied2, svuint32_t, -+ z0 = svsub_u32_m (p0, z1, z0), -+ z0 = svsub_m (p0, z1, z0)) -+ -+/* -+** sub_u32_m_untied: -+** movprfx z0, z1 -+** sub z0\.s, p0/m, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sub_u32_m_untied, svuint32_t, -+ z0 = svsub_u32_m (p0, z1, z2), -+ z0 = svsub_m (p0, z1, z2)) -+ -+/* -+** sub_w0_u32_m_tied1: -+** mov (z[0-9]+\.s), w0 -+** sub z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (sub_w0_u32_m_tied1, svuint32_t, uint32_t, -+ z0 = svsub_n_u32_m (p0, z0, x0), -+ z0 = svsub_m (p0, z0, x0)) -+ -+/* -+** sub_w0_u32_m_untied: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0, z1 -+** sub z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (sub_w0_u32_m_untied, svuint32_t, uint32_t, -+ z0 = svsub_n_u32_m (p0, z1, x0), -+ z0 = svsub_m (p0, z1, x0)) -+ -+/* -+** sub_1_u32_m_tied1: -+** mov (z[0-9]+)\.b, #-1 -+** add z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_u32_m_tied1, svuint32_t, -+ z0 = svsub_n_u32_m (p0, z0, 1), -+ z0 = svsub_m (p0, z0, 1)) -+ -+/* -+** sub_1_u32_m_untied: { xfail *-*-* } -+** mov (z[0-9]+)\.b, #-1 -+** movprfx z0, z1 -+** add z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_u32_m_untied, svuint32_t, -+ z0 = svsub_n_u32_m (p0, z1, 1), -+ z0 = svsub_m (p0, z1, 1)) -+ -+/* -+** sub_m2_u32_m: -+** mov (z[0-9]+\.s), #2 -+** add z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m2_u32_m, svuint32_t, -+ z0 = svsub_n_u32_m (p0, z0, -2), -+ z0 = svsub_m (p0, z0, -2)) -+ -+/* -+** sub_u32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** sub z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sub_u32_z_tied1, svuint32_t, -+ z0 = svsub_u32_z (p0, z0, z1), -+ z0 = svsub_z (p0, z0, z1)) -+ -+/* -+** sub_u32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** subr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sub_u32_z_tied2, svuint32_t, -+ z0 = svsub_u32_z (p0, z1, z0), -+ z0 = svsub_z (p0, z1, z0)) -+ -+/* -+** sub_u32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** sub z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** subr z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_u32_z_untied, svuint32_t, -+ z0 = svsub_u32_z (p0, z1, z2), -+ z0 = svsub_z (p0, z1, z2)) -+ -+/* -+** sub_w0_u32_z_tied1: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** sub z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (sub_w0_u32_z_tied1, svuint32_t, uint32_t, -+ z0 = svsub_n_u32_z (p0, z0, x0), -+ z0 = svsub_z (p0, z0, x0)) -+ -+/* -+** sub_w0_u32_z_untied: -+** mov (z[0-9]+\.s), w0 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** sub z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** subr z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (sub_w0_u32_z_untied, svuint32_t, uint32_t, -+ z0 = svsub_n_u32_z (p0, z1, x0), -+ z0 = svsub_z (p0, z1, x0)) -+ -+/* -+** sub_1_u32_z_tied1: -+** mov (z[0-9]+)\.b, #-1 -+** movprfx z0\.s, p0/z, z0\.s -+** add z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_u32_z_tied1, svuint32_t, -+ z0 = svsub_n_u32_z (p0, z0, 1), -+ z0 = svsub_z (p0, z0, 1)) -+ -+/* -+** sub_1_u32_z_untied: -+** mov (z[0-9]+)\.b, #-1 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** add z0\.s, p0/m, z0\.s, \1\.s -+** | -+** movprfx z0\.s, p0/z, \1\.s -+** add z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_u32_z_untied, svuint32_t, -+ z0 = svsub_n_u32_z (p0, z1, 1), -+ z0 = svsub_z (p0, z1, 1)) -+ -+/* -+** sub_u32_x_tied1: -+** sub z0\.s, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sub_u32_x_tied1, svuint32_t, -+ z0 = svsub_u32_x (p0, z0, z1), -+ z0 = svsub_x (p0, z0, z1)) -+ -+/* -+** sub_u32_x_tied2: -+** sub z0\.s, z1\.s, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sub_u32_x_tied2, svuint32_t, -+ z0 = svsub_u32_x (p0, z1, z0), -+ z0 = svsub_x (p0, z1, z0)) -+ -+/* -+** sub_u32_x_untied: -+** sub z0\.s, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (sub_u32_x_untied, svuint32_t, -+ z0 = svsub_u32_x (p0, z1, z2), -+ z0 = svsub_x (p0, z1, z2)) -+ -+/* -+** sub_w0_u32_x_tied1: -+** mov (z[0-9]+\.s), w0 -+** sub z0\.s, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (sub_w0_u32_x_tied1, svuint32_t, uint32_t, -+ z0 = svsub_n_u32_x (p0, z0, x0), -+ z0 = svsub_x (p0, z0, x0)) -+ -+/* -+** sub_w0_u32_x_untied: -+** mov (z[0-9]+\.s), w0 -+** sub z0\.s, z1\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (sub_w0_u32_x_untied, svuint32_t, uint32_t, -+ z0 = svsub_n_u32_x (p0, z1, x0), -+ z0 = svsub_x (p0, z1, x0)) -+ -+/* -+** sub_1_u32_x_tied1: -+** sub z0\.s, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_u32_x_tied1, svuint32_t, -+ z0 = svsub_n_u32_x (p0, z0, 1), -+ z0 = svsub_x (p0, z0, 1)) -+ -+/* -+** sub_1_u32_x_untied: -+** movprfx z0, z1 -+** sub z0\.s, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_u32_x_untied, svuint32_t, -+ z0 = svsub_n_u32_x (p0, z1, 1), -+ z0 = svsub_x (p0, z1, 1)) -+ -+/* -+** sub_127_u32_x: -+** sub z0\.s, z0\.s, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_127_u32_x, svuint32_t, -+ z0 = svsub_n_u32_x (p0, z0, 127), -+ z0 = svsub_x (p0, z0, 127)) -+ -+/* -+** sub_128_u32_x: -+** sub z0\.s, z0\.s, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_128_u32_x, svuint32_t, -+ z0 = svsub_n_u32_x (p0, z0, 128), -+ z0 = svsub_x (p0, z0, 128)) -+ -+/* -+** sub_255_u32_x: -+** sub z0\.s, z0\.s, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_255_u32_x, svuint32_t, -+ z0 = svsub_n_u32_x (p0, z0, 255), -+ z0 = svsub_x (p0, z0, 255)) -+ -+/* -+** sub_256_u32_x: -+** sub z0\.s, z0\.s, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_256_u32_x, svuint32_t, -+ z0 = svsub_n_u32_x (p0, z0, 256), -+ z0 = svsub_x (p0, z0, 256)) -+ -+/* -+** sub_511_u32_x: -+** mov (z[0-9]+\.s), #-511 -+** add z0\.s, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_511_u32_x, svuint32_t, -+ z0 = svsub_n_u32_x (p0, z0, 511), -+ z0 = svsub_x (p0, z0, 511)) -+ -+/* -+** sub_512_u32_x: -+** sub z0\.s, z0\.s, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_512_u32_x, svuint32_t, -+ z0 = svsub_n_u32_x (p0, z0, 512), -+ z0 = svsub_x (p0, z0, 512)) -+ -+/* -+** sub_65280_u32_x: -+** sub z0\.s, z0\.s, #65280 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_65280_u32_x, svuint32_t, -+ z0 = svsub_n_u32_x (p0, z0, 0xff00), -+ z0 = svsub_x (p0, z0, 0xff00)) -+ -+/* -+** sub_65535_u32_x: -+** mov (z[0-9]+\.s), #-65535 -+** add z0\.s, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_65535_u32_x, svuint32_t, -+ z0 = svsub_n_u32_x (p0, z0, 65535), -+ z0 = svsub_x (p0, z0, 65535)) -+ -+/* -+** sub_65536_u32_x: -+** mov (z[0-9]+\.s), #-65536 -+** add z0\.s, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_65536_u32_x, svuint32_t, -+ z0 = svsub_n_u32_x (p0, z0, 65536), -+ z0 = svsub_x (p0, z0, 65536)) -+ -+/* -+** sub_m1_u32_x: -+** add z0\.s, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m1_u32_x, svuint32_t, -+ z0 = svsub_n_u32_x (p0, z0, -1), -+ z0 = svsub_x (p0, z0, -1)) -+ -+/* -+** sub_m127_u32_x: -+** add z0\.s, z0\.s, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m127_u32_x, svuint32_t, -+ z0 = svsub_n_u32_x (p0, z0, -127), -+ z0 = svsub_x (p0, z0, -127)) -+ -+/* -+** sub_m128_u32_x: -+** add z0\.s, z0\.s, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m128_u32_x, svuint32_t, -+ z0 = svsub_n_u32_x (p0, z0, -128), -+ z0 = svsub_x (p0, z0, -128)) -+ -+/* -+** sub_m255_u32_x: -+** add z0\.s, z0\.s, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m255_u32_x, svuint32_t, -+ z0 = svsub_n_u32_x (p0, z0, -255), -+ z0 = svsub_x (p0, z0, -255)) -+ -+/* -+** sub_m256_u32_x: -+** add z0\.s, z0\.s, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m256_u32_x, svuint32_t, -+ z0 = svsub_n_u32_x (p0, z0, -256), -+ z0 = svsub_x (p0, z0, -256)) -+ -+/* -+** sub_m511_u32_x: -+** mov (z[0-9]+\.s), #511 -+** add z0\.s, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m511_u32_x, svuint32_t, -+ z0 = svsub_n_u32_x (p0, z0, -511), -+ z0 = svsub_x (p0, z0, -511)) -+ -+/* -+** sub_m512_u32_x: -+** add z0\.s, z0\.s, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m512_u32_x, svuint32_t, -+ z0 = svsub_n_u32_x (p0, z0, -512), -+ z0 = svsub_x (p0, z0, -512)) -+ -+/* -+** sub_m32768_u32_x: -+** add z0\.s, z0\.s, #32768 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m32768_u32_x, svuint32_t, -+ z0 = svsub_n_u32_x (p0, z0, -0x8000), -+ z0 = svsub_x (p0, z0, -0x8000)) -+ -+/* -+** sub_m65280_u32_x: -+** add z0\.s, z0\.s, #65280 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m65280_u32_x, svuint32_t, -+ z0 = svsub_n_u32_x (p0, z0, -0xff00), -+ z0 = svsub_x (p0, z0, -0xff00)) -+ -+/* -+** sub_m65535_u32_x: -+** mov (z[0-9]+\.s), #65535 -+** add z0\.s, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m65535_u32_x, svuint32_t, -+ z0 = svsub_n_u32_x (p0, z0, -65535), -+ z0 = svsub_x (p0, z0, -65535)) -+ -+/* -+** sub_m65536_u32_x: -+** mov (z[0-9]+\.s), #65536 -+** add z0\.s, (z0\.s, \1|\1, z0\.s) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m65536_u32_x, svuint32_t, -+ z0 = svsub_n_u32_x (p0, z0, -65536), -+ z0 = svsub_x (p0, z0, -65536)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_u64.c -new file mode 100644 -index 000000000..fb7f7173a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_u64.c -@@ -0,0 +1,426 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** sub_u64_m_tied1: -+** sub z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sub_u64_m_tied1, svuint64_t, -+ z0 = svsub_u64_m (p0, z0, z1), -+ z0 = svsub_m (p0, z0, z1)) -+ -+/* -+** sub_u64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** sub z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_u64_m_tied2, svuint64_t, -+ z0 = svsub_u64_m (p0, z1, z0), -+ z0 = svsub_m (p0, z1, z0)) -+ -+/* -+** sub_u64_m_untied: -+** movprfx z0, z1 -+** sub z0\.d, p0/m, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sub_u64_m_untied, svuint64_t, -+ z0 = svsub_u64_m (p0, z1, z2), -+ z0 = svsub_m (p0, z1, z2)) -+ -+/* -+** sub_x0_u64_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** sub z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (sub_x0_u64_m_tied1, svuint64_t, uint64_t, -+ z0 = svsub_n_u64_m (p0, z0, x0), -+ z0 = svsub_m (p0, z0, x0)) -+ -+/* -+** sub_x0_u64_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** sub z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (sub_x0_u64_m_untied, svuint64_t, uint64_t, -+ z0 = svsub_n_u64_m (p0, z1, x0), -+ z0 = svsub_m (p0, z1, x0)) -+ -+/* -+** sub_1_u64_m_tied1: -+** mov (z[0-9]+)\.b, #-1 -+** add z0\.d, p0/m, z0\.d, \1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_u64_m_tied1, svuint64_t, -+ z0 = svsub_n_u64_m (p0, z0, 1), -+ z0 = svsub_m (p0, z0, 1)) -+ -+/* -+** sub_1_u64_m_untied: { xfail *-*-* } -+** mov (z[0-9]+)\.b, #-1 -+** movprfx z0, z1 -+** add z0\.d, p0/m, z0\.d, \1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_u64_m_untied, svuint64_t, -+ z0 = svsub_n_u64_m (p0, z1, 1), -+ z0 = svsub_m (p0, z1, 1)) -+ -+/* -+** sub_m2_u64_m: -+** mov (z[0-9]+\.d), #2 -+** add z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m2_u64_m, svuint64_t, -+ z0 = svsub_n_u64_m (p0, z0, -2), -+ z0 = svsub_m (p0, z0, -2)) -+ -+/* -+** sub_u64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** sub z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sub_u64_z_tied1, svuint64_t, -+ z0 = svsub_u64_z (p0, z0, z1), -+ z0 = svsub_z (p0, z0, z1)) -+ -+/* -+** sub_u64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** subr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sub_u64_z_tied2, svuint64_t, -+ z0 = svsub_u64_z (p0, z1, z0), -+ z0 = svsub_z (p0, z1, z0)) -+ -+/* -+** sub_u64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** sub z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** subr z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_u64_z_untied, svuint64_t, -+ z0 = svsub_u64_z (p0, z1, z2), -+ z0 = svsub_z (p0, z1, z2)) -+ -+/* -+** sub_x0_u64_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** sub z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (sub_x0_u64_z_tied1, svuint64_t, uint64_t, -+ z0 = svsub_n_u64_z (p0, z0, x0), -+ z0 = svsub_z (p0, z0, x0)) -+ -+/* -+** sub_x0_u64_z_untied: -+** mov (z[0-9]+\.d), x0 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** sub z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** subr z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (sub_x0_u64_z_untied, svuint64_t, uint64_t, -+ z0 = svsub_n_u64_z (p0, z1, x0), -+ z0 = svsub_z (p0, z1, x0)) -+ -+/* -+** sub_1_u64_z_tied1: -+** mov (z[0-9]+)\.b, #-1 -+** movprfx z0\.d, p0/z, z0\.d -+** add z0\.d, p0/m, z0\.d, \1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_u64_z_tied1, svuint64_t, -+ z0 = svsub_n_u64_z (p0, z0, 1), -+ z0 = svsub_z (p0, z0, 1)) -+ -+/* -+** sub_1_u64_z_untied: -+** mov (z[0-9]+)\.b, #-1 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** add z0\.d, p0/m, z0\.d, \1\.d -+** | -+** movprfx z0\.d, p0/z, \1\.d -+** add z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_u64_z_untied, svuint64_t, -+ z0 = svsub_n_u64_z (p0, z1, 1), -+ z0 = svsub_z (p0, z1, 1)) -+ -+/* -+** sub_u64_x_tied1: -+** sub z0\.d, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sub_u64_x_tied1, svuint64_t, -+ z0 = svsub_u64_x (p0, z0, z1), -+ z0 = svsub_x (p0, z0, z1)) -+ -+/* -+** sub_u64_x_tied2: -+** sub z0\.d, z1\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sub_u64_x_tied2, svuint64_t, -+ z0 = svsub_u64_x (p0, z1, z0), -+ z0 = svsub_x (p0, z1, z0)) -+ -+/* -+** sub_u64_x_untied: -+** sub z0\.d, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (sub_u64_x_untied, svuint64_t, -+ z0 = svsub_u64_x (p0, z1, z2), -+ z0 = svsub_x (p0, z1, z2)) -+ -+/* -+** sub_x0_u64_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** sub z0\.d, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (sub_x0_u64_x_tied1, svuint64_t, uint64_t, -+ z0 = svsub_n_u64_x (p0, z0, x0), -+ z0 = svsub_x (p0, z0, x0)) -+ -+/* -+** sub_x0_u64_x_untied: -+** mov (z[0-9]+\.d), x0 -+** sub z0\.d, z1\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (sub_x0_u64_x_untied, svuint64_t, uint64_t, -+ z0 = svsub_n_u64_x (p0, z1, x0), -+ z0 = svsub_x (p0, z1, x0)) -+ -+/* -+** sub_1_u64_x_tied1: -+** sub z0\.d, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_u64_x_tied1, svuint64_t, -+ z0 = svsub_n_u64_x (p0, z0, 1), -+ z0 = svsub_x (p0, z0, 1)) -+ -+/* -+** sub_1_u64_x_untied: -+** movprfx z0, z1 -+** sub z0\.d, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_u64_x_untied, svuint64_t, -+ z0 = svsub_n_u64_x (p0, z1, 1), -+ z0 = svsub_x (p0, z1, 1)) -+ -+/* -+** sub_127_u64_x: -+** sub z0\.d, z0\.d, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_127_u64_x, svuint64_t, -+ z0 = svsub_n_u64_x (p0, z0, 127), -+ z0 = svsub_x (p0, z0, 127)) -+ -+/* -+** sub_128_u64_x: -+** sub z0\.d, z0\.d, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_128_u64_x, svuint64_t, -+ z0 = svsub_n_u64_x (p0, z0, 128), -+ z0 = svsub_x (p0, z0, 128)) -+ -+/* -+** sub_255_u64_x: -+** sub z0\.d, z0\.d, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_255_u64_x, svuint64_t, -+ z0 = svsub_n_u64_x (p0, z0, 255), -+ z0 = svsub_x (p0, z0, 255)) -+ -+/* -+** sub_256_u64_x: -+** sub z0\.d, z0\.d, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_256_u64_x, svuint64_t, -+ z0 = svsub_n_u64_x (p0, z0, 256), -+ z0 = svsub_x (p0, z0, 256)) -+ -+/* -+** sub_511_u64_x: -+** mov (z[0-9]+\.d), #-511 -+** add z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_511_u64_x, svuint64_t, -+ z0 = svsub_n_u64_x (p0, z0, 511), -+ z0 = svsub_x (p0, z0, 511)) -+ -+/* -+** sub_512_u64_x: -+** sub z0\.d, z0\.d, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_512_u64_x, svuint64_t, -+ z0 = svsub_n_u64_x (p0, z0, 512), -+ z0 = svsub_x (p0, z0, 512)) -+ -+/* -+** sub_65280_u64_x: -+** sub z0\.d, z0\.d, #65280 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_65280_u64_x, svuint64_t, -+ z0 = svsub_n_u64_x (p0, z0, 0xff00), -+ z0 = svsub_x (p0, z0, 0xff00)) -+ -+/* -+** sub_65535_u64_x: -+** mov (z[0-9]+\.d), #-65535 -+** add z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_65535_u64_x, svuint64_t, -+ z0 = svsub_n_u64_x (p0, z0, 65535), -+ z0 = svsub_x (p0, z0, 65535)) -+ -+/* -+** sub_65536_u64_x: -+** mov (z[0-9]+\.d), #-65536 -+** add z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_65536_u64_x, svuint64_t, -+ z0 = svsub_n_u64_x (p0, z0, 65536), -+ z0 = svsub_x (p0, z0, 65536)) -+ -+/* -+** sub_m1_u64_x: -+** add z0\.d, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m1_u64_x, svuint64_t, -+ z0 = svsub_n_u64_x (p0, z0, -1), -+ z0 = svsub_x (p0, z0, -1)) -+ -+/* -+** sub_m127_u64_x: -+** add z0\.d, z0\.d, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m127_u64_x, svuint64_t, -+ z0 = svsub_n_u64_x (p0, z0, -127), -+ z0 = svsub_x (p0, z0, -127)) -+ -+/* -+** sub_m128_u64_x: -+** add z0\.d, z0\.d, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m128_u64_x, svuint64_t, -+ z0 = svsub_n_u64_x (p0, z0, -128), -+ z0 = svsub_x (p0, z0, -128)) -+ -+/* -+** sub_m255_u64_x: -+** add z0\.d, z0\.d, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m255_u64_x, svuint64_t, -+ z0 = svsub_n_u64_x (p0, z0, -255), -+ z0 = svsub_x (p0, z0, -255)) -+ -+/* -+** sub_m256_u64_x: -+** add z0\.d, z0\.d, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m256_u64_x, svuint64_t, -+ z0 = svsub_n_u64_x (p0, z0, -256), -+ z0 = svsub_x (p0, z0, -256)) -+ -+/* -+** sub_m511_u64_x: -+** mov (z[0-9]+\.d), #511 -+** add z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m511_u64_x, svuint64_t, -+ z0 = svsub_n_u64_x (p0, z0, -511), -+ z0 = svsub_x (p0, z0, -511)) -+ -+/* -+** sub_m512_u64_x: -+** add z0\.d, z0\.d, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m512_u64_x, svuint64_t, -+ z0 = svsub_n_u64_x (p0, z0, -512), -+ z0 = svsub_x (p0, z0, -512)) -+ -+/* -+** sub_m32768_u64_x: -+** add z0\.d, z0\.d, #32768 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m32768_u64_x, svuint64_t, -+ z0 = svsub_n_u64_x (p0, z0, -0x8000), -+ z0 = svsub_x (p0, z0, -0x8000)) -+ -+/* -+** sub_m65280_u64_x: -+** add z0\.d, z0\.d, #65280 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m65280_u64_x, svuint64_t, -+ z0 = svsub_n_u64_x (p0, z0, -0xff00), -+ z0 = svsub_x (p0, z0, -0xff00)) -+ -+/* -+** sub_m65535_u64_x: -+** mov (z[0-9]+\.d), #65535 -+** add z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m65535_u64_x, svuint64_t, -+ z0 = svsub_n_u64_x (p0, z0, -65535), -+ z0 = svsub_x (p0, z0, -65535)) -+ -+/* -+** sub_m65536_u64_x: -+** mov (z[0-9]+\.d), #65536 -+** add z0\.d, (z0\.d, \1|\1, z0\.d) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m65536_u64_x, svuint64_t, -+ z0 = svsub_n_u64_x (p0, z0, -65536), -+ z0 = svsub_x (p0, z0, -65536)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_u8.c -new file mode 100644 -index 000000000..455204191 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sub_u8.c -@@ -0,0 +1,294 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** sub_u8_m_tied1: -+** sub z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (sub_u8_m_tied1, svuint8_t, -+ z0 = svsub_u8_m (p0, z0, z1), -+ z0 = svsub_m (p0, z0, z1)) -+ -+/* -+** sub_u8_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** sub z0\.b, p0/m, z0\.b, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (sub_u8_m_tied2, svuint8_t, -+ z0 = svsub_u8_m (p0, z1, z0), -+ z0 = svsub_m (p0, z1, z0)) -+ -+/* -+** sub_u8_m_untied: -+** movprfx z0, z1 -+** sub z0\.b, p0/m, z0\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (sub_u8_m_untied, svuint8_t, -+ z0 = svsub_u8_m (p0, z1, z2), -+ z0 = svsub_m (p0, z1, z2)) -+ -+/* -+** sub_w0_u8_m_tied1: -+** mov (z[0-9]+\.b), w0 -+** sub z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (sub_w0_u8_m_tied1, svuint8_t, uint8_t, -+ z0 = svsub_n_u8_m (p0, z0, x0), -+ z0 = svsub_m (p0, z0, x0)) -+ -+/* -+** sub_w0_u8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), w0 -+** movprfx z0, z1 -+** sub z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (sub_w0_u8_m_untied, svuint8_t, uint8_t, -+ z0 = svsub_n_u8_m (p0, z1, x0), -+ z0 = svsub_m (p0, z1, x0)) -+ -+/* -+** sub_1_u8_m_tied1: -+** mov (z[0-9]+\.b), #-1 -+** add z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_u8_m_tied1, svuint8_t, -+ z0 = svsub_n_u8_m (p0, z0, 1), -+ z0 = svsub_m (p0, z0, 1)) -+ -+/* -+** sub_1_u8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), #-1 -+** movprfx z0, z1 -+** add z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_u8_m_untied, svuint8_t, -+ z0 = svsub_n_u8_m (p0, z1, 1), -+ z0 = svsub_m (p0, z1, 1)) -+ -+/* -+** sub_m1_u8_m: -+** mov (z[0-9]+\.b), #1 -+** add z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m1_u8_m, svuint8_t, -+ z0 = svsub_n_u8_m (p0, z0, -1), -+ z0 = svsub_m (p0, z0, -1)) -+ -+/* -+** sub_u8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** sub z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (sub_u8_z_tied1, svuint8_t, -+ z0 = svsub_u8_z (p0, z0, z1), -+ z0 = svsub_z (p0, z0, z1)) -+ -+/* -+** sub_u8_z_tied2: -+** movprfx z0\.b, p0/z, z0\.b -+** subr z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (sub_u8_z_tied2, svuint8_t, -+ z0 = svsub_u8_z (p0, z1, z0), -+ z0 = svsub_z (p0, z1, z0)) -+ -+/* -+** sub_u8_z_untied: -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** sub z0\.b, p0/m, z0\.b, z2\.b -+** | -+** movprfx z0\.b, p0/z, z2\.b -+** subr z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_u8_z_untied, svuint8_t, -+ z0 = svsub_u8_z (p0, z1, z2), -+ z0 = svsub_z (p0, z1, z2)) -+ -+/* -+** sub_w0_u8_z_tied1: -+** mov (z[0-9]+\.b), w0 -+** movprfx z0\.b, p0/z, z0\.b -+** sub z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (sub_w0_u8_z_tied1, svuint8_t, uint8_t, -+ z0 = svsub_n_u8_z (p0, z0, x0), -+ z0 = svsub_z (p0, z0, x0)) -+ -+/* -+** sub_w0_u8_z_untied: -+** mov (z[0-9]+\.b), w0 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** sub z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** subr z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (sub_w0_u8_z_untied, svuint8_t, uint8_t, -+ z0 = svsub_n_u8_z (p0, z1, x0), -+ z0 = svsub_z (p0, z1, x0)) -+ -+/* -+** sub_1_u8_z_tied1: -+** mov (z[0-9]+\.b), #-1 -+** movprfx z0\.b, p0/z, z0\.b -+** add z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_u8_z_tied1, svuint8_t, -+ z0 = svsub_n_u8_z (p0, z0, 1), -+ z0 = svsub_z (p0, z0, 1)) -+ -+/* -+** sub_1_u8_z_untied: -+** mov (z[0-9]+\.b), #-1 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** add z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** add z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_u8_z_untied, svuint8_t, -+ z0 = svsub_n_u8_z (p0, z1, 1), -+ z0 = svsub_z (p0, z1, 1)) -+ -+/* -+** sub_u8_x_tied1: -+** sub z0\.b, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (sub_u8_x_tied1, svuint8_t, -+ z0 = svsub_u8_x (p0, z0, z1), -+ z0 = svsub_x (p0, z0, z1)) -+ -+/* -+** sub_u8_x_tied2: -+** sub z0\.b, z1\.b, z0\.b -+** ret -+*/ -+TEST_UNIFORM_Z (sub_u8_x_tied2, svuint8_t, -+ z0 = svsub_u8_x (p0, z1, z0), -+ z0 = svsub_x (p0, z1, z0)) -+ -+/* -+** sub_u8_x_untied: -+** sub z0\.b, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (sub_u8_x_untied, svuint8_t, -+ z0 = svsub_u8_x (p0, z1, z2), -+ z0 = svsub_x (p0, z1, z2)) -+ -+/* -+** sub_w0_u8_x_tied1: -+** mov (z[0-9]+\.b), w0 -+** sub z0\.b, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (sub_w0_u8_x_tied1, svuint8_t, uint8_t, -+ z0 = svsub_n_u8_x (p0, z0, x0), -+ z0 = svsub_x (p0, z0, x0)) -+ -+/* -+** sub_w0_u8_x_untied: -+** mov (z[0-9]+\.b), w0 -+** sub z0\.b, z1\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (sub_w0_u8_x_untied, svuint8_t, uint8_t, -+ z0 = svsub_n_u8_x (p0, z1, x0), -+ z0 = svsub_x (p0, z1, x0)) -+ -+/* -+** sub_1_u8_x_tied1: -+** add z0\.b, z0\.b, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_u8_x_tied1, svuint8_t, -+ z0 = svsub_n_u8_x (p0, z0, 1), -+ z0 = svsub_x (p0, z0, 1)) -+ -+/* -+** sub_1_u8_x_untied: -+** movprfx z0, z1 -+** add z0\.b, z0\.b, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_1_u8_x_untied, svuint8_t, -+ z0 = svsub_n_u8_x (p0, z1, 1), -+ z0 = svsub_x (p0, z1, 1)) -+ -+/* -+** sub_127_u8_x: -+** add z0\.b, z0\.b, #129 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_127_u8_x, svuint8_t, -+ z0 = svsub_n_u8_x (p0, z0, 127), -+ z0 = svsub_x (p0, z0, 127)) -+ -+/* -+** sub_128_u8_x: -+** add z0\.b, z0\.b, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_128_u8_x, svuint8_t, -+ z0 = svsub_n_u8_x (p0, z0, 128), -+ z0 = svsub_x (p0, z0, 128)) -+ -+/* -+** sub_255_u8_x: -+** add z0\.b, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_255_u8_x, svuint8_t, -+ z0 = svsub_n_u8_x (p0, z0, 255), -+ z0 = svsub_x (p0, z0, 255)) -+ -+/* -+** sub_m1_u8_x: -+** add z0\.b, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m1_u8_x, svuint8_t, -+ z0 = svsub_n_u8_x (p0, z0, -1), -+ z0 = svsub_x (p0, z0, -1)) -+ -+/* -+** sub_m127_u8_x: -+** add z0\.b, z0\.b, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m127_u8_x, svuint8_t, -+ z0 = svsub_n_u8_x (p0, z0, -127), -+ z0 = svsub_x (p0, z0, -127)) -+ -+/* -+** sub_m128_u8_x: -+** add z0\.b, z0\.b, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (sub_m128_u8_x, svuint8_t, -+ z0 = svsub_n_u8_x (p0, z0, -128), -+ z0 = svsub_x (p0, z0, -128)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_f16.c -new file mode 100644 -index 000000000..e14357db2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_f16.c -@@ -0,0 +1,444 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** subr_f16_m_tied1: -+** fsubr z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f16_m_tied1, svfloat16_t, -+ z0 = svsubr_f16_m (p0, z0, z1), -+ z0 = svsubr_m (p0, z0, z1)) -+ -+/* -+** subr_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fsubr z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f16_m_tied2, svfloat16_t, -+ z0 = svsubr_f16_m (p0, z1, z0), -+ z0 = svsubr_m (p0, z1, z0)) -+ -+/* -+** subr_f16_m_untied: -+** movprfx z0, z1 -+** fsubr z0\.h, p0/m, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f16_m_untied, svfloat16_t, -+ z0 = svsubr_f16_m (p0, z1, z2), -+ z0 = svsubr_m (p0, z1, z2)) -+ -+/* -+** subr_h4_f16_m_tied1: -+** mov (z[0-9]+\.h), h4 -+** fsubr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (subr_h4_f16_m_tied1, svfloat16_t, __fp16, -+ z0 = svsubr_n_f16_m (p0, z0, d4), -+ z0 = svsubr_m (p0, z0, d4)) -+ -+/* -+** subr_h4_f16_m_untied: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0, z1 -+** fsubr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (subr_h4_f16_m_untied, svfloat16_t, __fp16, -+ z0 = svsubr_n_f16_m (p0, z1, d4), -+ z0 = svsubr_m (p0, z1, d4)) -+ -+/* -+** subr_1_f16_m_tied1: -+** fsubr z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_f16_m_tied1, svfloat16_t, -+ z0 = svsubr_n_f16_m (p0, z0, 1), -+ z0 = svsubr_m (p0, z0, 1)) -+ -+/* -+** subr_1_f16_m_untied: -+** movprfx z0, z1 -+** fsubr z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_f16_m_untied, svfloat16_t, -+ z0 = svsubr_n_f16_m (p0, z1, 1), -+ z0 = svsubr_m (p0, z1, 1)) -+ -+/* -+** subr_0p5_f16_m_tied1: -+** fsubr z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_0p5_f16_m_tied1, svfloat16_t, -+ z0 = svsubr_n_f16_m (p0, z0, 0.5), -+ z0 = svsubr_m (p0, z0, 0.5)) -+ -+/* -+** subr_0p5_f16_m_untied: -+** movprfx z0, z1 -+** fsubr z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_0p5_f16_m_untied, svfloat16_t, -+ z0 = svsubr_n_f16_m (p0, z1, 0.5), -+ z0 = svsubr_m (p0, z1, 0.5)) -+ -+/* -+** subr_m1_f16_m_tied1: -+** fmov (z[0-9]+\.h), #-1\.0(?:e\+0)? -+** fsubr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_f16_m_tied1, svfloat16_t, -+ z0 = svsubr_n_f16_m (p0, z0, -1), -+ z0 = svsubr_m (p0, z0, -1)) -+ -+/* -+** subr_m1_f16_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.h), #-1\.0(?:e\+0)? -+** movprfx z0, z1 -+** fsubr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_f16_m_untied, svfloat16_t, -+ z0 = svsubr_n_f16_m (p0, z1, -1), -+ z0 = svsubr_m (p0, z1, -1)) -+ -+/* -+** subr_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fsubr z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f16_z_tied1, svfloat16_t, -+ z0 = svsubr_f16_z (p0, z0, z1), -+ z0 = svsubr_z (p0, z0, z1)) -+ -+/* -+** subr_f16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** fsub z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f16_z_tied2, svfloat16_t, -+ z0 = svsubr_f16_z (p0, z1, z0), -+ z0 = svsubr_z (p0, z1, z0)) -+ -+/* -+** subr_f16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fsubr z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** fsub z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f16_z_untied, svfloat16_t, -+ z0 = svsubr_f16_z (p0, z1, z2), -+ z0 = svsubr_z (p0, z1, z2)) -+ -+/* -+** subr_h4_f16_z_tied1: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0\.h, p0/z, z0\.h -+** fsubr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (subr_h4_f16_z_tied1, svfloat16_t, __fp16, -+ z0 = svsubr_n_f16_z (p0, z0, d4), -+ z0 = svsubr_z (p0, z0, d4)) -+ -+/* -+** subr_h4_f16_z_untied: -+** mov (z[0-9]+\.h), h4 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fsubr z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** fsub z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (subr_h4_f16_z_untied, svfloat16_t, __fp16, -+ z0 = svsubr_n_f16_z (p0, z1, d4), -+ z0 = svsubr_z (p0, z1, d4)) -+ -+/* -+** subr_1_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fsubr z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_f16_z_tied1, svfloat16_t, -+ z0 = svsubr_n_f16_z (p0, z0, 1), -+ z0 = svsubr_z (p0, z0, 1)) -+ -+/* -+** subr_1_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** fsubr z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_f16_z_untied, svfloat16_t, -+ z0 = svsubr_n_f16_z (p0, z1, 1), -+ z0 = svsubr_z (p0, z1, 1)) -+ -+/* -+** subr_0p5_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fsubr z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_0p5_f16_z_tied1, svfloat16_t, -+ z0 = svsubr_n_f16_z (p0, z0, 0.5), -+ z0 = svsubr_z (p0, z0, 0.5)) -+ -+/* -+** subr_0p5_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** fsubr z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_0p5_f16_z_untied, svfloat16_t, -+ z0 = svsubr_n_f16_z (p0, z1, 0.5), -+ z0 = svsubr_z (p0, z1, 0.5)) -+ -+/* -+** subr_m1_f16_z_tied1: -+** fmov (z[0-9]+\.h), #-1\.0(?:e\+0)? -+** movprfx z0\.h, p0/z, z0\.h -+** fsubr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_f16_z_tied1, svfloat16_t, -+ z0 = svsubr_n_f16_z (p0, z0, -1), -+ z0 = svsubr_z (p0, z0, -1)) -+ -+/* -+** subr_m1_f16_z_untied: -+** fmov (z[0-9]+\.h), #-1\.0(?:e\+0)? -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fsubr z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** fsub z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_f16_z_untied, svfloat16_t, -+ z0 = svsubr_n_f16_z (p0, z1, -1), -+ z0 = svsubr_z (p0, z1, -1)) -+ -+/* -+** subr_f16_x_tied1: -+** fsubr z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f16_x_tied1, svfloat16_t, -+ z0 = svsubr_f16_x (p0, z0, z1), -+ z0 = svsubr_x (p0, z0, z1)) -+ -+/* -+** subr_f16_x_tied2: -+** fsub z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f16_x_tied2, svfloat16_t, -+ z0 = svsubr_f16_x (p0, z1, z0), -+ z0 = svsubr_x (p0, z1, z0)) -+ -+/* -+** subr_f16_x_untied: -+** ( -+** movprfx z0, z1 -+** fsubr z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0, z2 -+** fsub z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f16_x_untied, svfloat16_t, -+ z0 = svsubr_f16_x (p0, z1, z2), -+ z0 = svsubr_x (p0, z1, z2)) -+ -+/* -+** subr_h4_f16_x_tied1: -+** mov (z[0-9]+\.h), h4 -+** fsubr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (subr_h4_f16_x_tied1, svfloat16_t, __fp16, -+ z0 = svsubr_n_f16_x (p0, z0, d4), -+ z0 = svsubr_x (p0, z0, d4)) -+ -+/* -+** subr_h4_f16_x_untied: { xfail *-*-* } -+** mov z0\.h, h4 -+** fsub z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZD (subr_h4_f16_x_untied, svfloat16_t, __fp16, -+ z0 = svsubr_n_f16_x (p0, z1, d4), -+ z0 = svsubr_x (p0, z1, d4)) -+ -+/* -+** subr_1_f16_x_tied1: -+** fsubr z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_f16_x_tied1, svfloat16_t, -+ z0 = svsubr_n_f16_x (p0, z0, 1), -+ z0 = svsubr_x (p0, z0, 1)) -+ -+/* -+** subr_1_f16_x_untied: -+** movprfx z0, z1 -+** fsubr z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_f16_x_untied, svfloat16_t, -+ z0 = svsubr_n_f16_x (p0, z1, 1), -+ z0 = svsubr_x (p0, z1, 1)) -+ -+/* -+** subr_0p5_f16_x_tied1: -+** fsubr z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_0p5_f16_x_tied1, svfloat16_t, -+ z0 = svsubr_n_f16_x (p0, z0, 0.5), -+ z0 = svsubr_x (p0, z0, 0.5)) -+ -+/* -+** subr_0p5_f16_x_untied: -+** movprfx z0, z1 -+** fsubr z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_0p5_f16_x_untied, svfloat16_t, -+ z0 = svsubr_n_f16_x (p0, z1, 0.5), -+ z0 = svsubr_x (p0, z1, 0.5)) -+ -+/* -+** subr_m1_f16_x_tied1: -+** fmov (z[0-9]+\.h), #-1\.0(?:e\+0)? -+** fsubr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_f16_x_tied1, svfloat16_t, -+ z0 = svsubr_n_f16_x (p0, z0, -1), -+ z0 = svsubr_x (p0, z0, -1)) -+ -+/* -+** subr_m1_f16_x_untied: -+** fmov z0\.h, #-1\.0(?:e\+0)? -+** fsub z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_f16_x_untied, svfloat16_t, -+ z0 = svsubr_n_f16_x (p0, z1, -1), -+ z0 = svsubr_x (p0, z1, -1)) -+ -+/* -+** ptrue_subr_f16_x_tied1: -+** fsub z0\.h, z1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_f16_x_tied1, svfloat16_t, -+ z0 = svsubr_f16_x (svptrue_b16 (), z0, z1), -+ z0 = svsubr_x (svptrue_b16 (), z0, z1)) -+ -+/* -+** ptrue_subr_f16_x_tied2: -+** fsub z0\.h, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_f16_x_tied2, svfloat16_t, -+ z0 = svsubr_f16_x (svptrue_b16 (), z1, z0), -+ z0 = svsubr_x (svptrue_b16 (), z1, z0)) -+ -+/* -+** ptrue_subr_f16_x_untied: -+** fsub z0\.h, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_f16_x_untied, svfloat16_t, -+ z0 = svsubr_f16_x (svptrue_b16 (), z1, z2), -+ z0 = svsubr_x (svptrue_b16 (), z1, z2)) -+ -+/* -+** ptrue_subr_1_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_1_f16_x_tied1, svfloat16_t, -+ z0 = svsubr_n_f16_x (svptrue_b16 (), z0, 1), -+ z0 = svsubr_x (svptrue_b16 (), z0, 1)) -+ -+/* -+** ptrue_subr_1_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_1_f16_x_untied, svfloat16_t, -+ z0 = svsubr_n_f16_x (svptrue_b16 (), z1, 1), -+ z0 = svsubr_x (svptrue_b16 (), z1, 1)) -+ -+/* -+** ptrue_subr_0p5_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_0p5_f16_x_tied1, svfloat16_t, -+ z0 = svsubr_n_f16_x (svptrue_b16 (), z0, 0.5), -+ z0 = svsubr_x (svptrue_b16 (), z0, 0.5)) -+ -+/* -+** ptrue_subr_0p5_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_0p5_f16_x_untied, svfloat16_t, -+ z0 = svsubr_n_f16_x (svptrue_b16 (), z1, 0.5), -+ z0 = svsubr_x (svptrue_b16 (), z1, 0.5)) -+ -+/* -+** ptrue_subr_m1_f16_x_tied1: -+** fmov (z[0-9]+\.h), #-1\.0(?:e\+0)? -+** fsub z0\.h, \1, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_m1_f16_x_tied1, svfloat16_t, -+ z0 = svsubr_n_f16_x (svptrue_b16 (), z0, -1), -+ z0 = svsubr_x (svptrue_b16 (), z0, -1)) -+ -+/* -+** ptrue_subr_m1_f16_x_untied: -+** fmov (z[0-9]+\.h), #-1\.0(?:e\+0)? -+** fsub z0\.h, \1, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_m1_f16_x_untied, svfloat16_t, -+ z0 = svsubr_n_f16_x (svptrue_b16 (), z1, -1), -+ z0 = svsubr_x (svptrue_b16 (), z1, -1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_f16_notrap.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_f16_notrap.c -new file mode 100644 -index 000000000..a31ebd2ef ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_f16_notrap.c -@@ -0,0 +1,439 @@ -+/* { dg-additional-options "-fno-trapping-math" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** subr_f16_m_tied1: -+** fsubr z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f16_m_tied1, svfloat16_t, -+ z0 = svsubr_f16_m (p0, z0, z1), -+ z0 = svsubr_m (p0, z0, z1)) -+ -+/* -+** subr_f16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fsubr z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f16_m_tied2, svfloat16_t, -+ z0 = svsubr_f16_m (p0, z1, z0), -+ z0 = svsubr_m (p0, z1, z0)) -+ -+/* -+** subr_f16_m_untied: -+** movprfx z0, z1 -+** fsubr z0\.h, p0/m, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f16_m_untied, svfloat16_t, -+ z0 = svsubr_f16_m (p0, z1, z2), -+ z0 = svsubr_m (p0, z1, z2)) -+ -+/* -+** subr_h4_f16_m_tied1: -+** mov (z[0-9]+\.h), h4 -+** fsubr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (subr_h4_f16_m_tied1, svfloat16_t, __fp16, -+ z0 = svsubr_n_f16_m (p0, z0, d4), -+ z0 = svsubr_m (p0, z0, d4)) -+ -+/* -+** subr_h4_f16_m_untied: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0, z1 -+** fsubr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (subr_h4_f16_m_untied, svfloat16_t, __fp16, -+ z0 = svsubr_n_f16_m (p0, z1, d4), -+ z0 = svsubr_m (p0, z1, d4)) -+ -+/* -+** subr_1_f16_m_tied1: -+** fsubr z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_f16_m_tied1, svfloat16_t, -+ z0 = svsubr_n_f16_m (p0, z0, 1), -+ z0 = svsubr_m (p0, z0, 1)) -+ -+/* -+** subr_1_f16_m_untied: -+** movprfx z0, z1 -+** fsubr z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_f16_m_untied, svfloat16_t, -+ z0 = svsubr_n_f16_m (p0, z1, 1), -+ z0 = svsubr_m (p0, z1, 1)) -+ -+/* -+** subr_0p5_f16_m_tied1: -+** fsubr z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_0p5_f16_m_tied1, svfloat16_t, -+ z0 = svsubr_n_f16_m (p0, z0, 0.5), -+ z0 = svsubr_m (p0, z0, 0.5)) -+ -+/* -+** subr_0p5_f16_m_untied: -+** movprfx z0, z1 -+** fsubr z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_0p5_f16_m_untied, svfloat16_t, -+ z0 = svsubr_n_f16_m (p0, z1, 0.5), -+ z0 = svsubr_m (p0, z1, 0.5)) -+ -+/* -+** subr_m1_f16_m_tied1: -+** fmov (z[0-9]+\.h), #-1\.0(?:e\+0)? -+** fsubr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_f16_m_tied1, svfloat16_t, -+ z0 = svsubr_n_f16_m (p0, z0, -1), -+ z0 = svsubr_m (p0, z0, -1)) -+ -+/* -+** subr_m1_f16_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.h), #-1\.0(?:e\+0)? -+** movprfx z0, z1 -+** fsubr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_f16_m_untied, svfloat16_t, -+ z0 = svsubr_n_f16_m (p0, z1, -1), -+ z0 = svsubr_m (p0, z1, -1)) -+ -+/* -+** subr_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fsubr z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f16_z_tied1, svfloat16_t, -+ z0 = svsubr_f16_z (p0, z0, z1), -+ z0 = svsubr_z (p0, z0, z1)) -+ -+/* -+** subr_f16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** fsub z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f16_z_tied2, svfloat16_t, -+ z0 = svsubr_f16_z (p0, z1, z0), -+ z0 = svsubr_z (p0, z1, z0)) -+ -+/* -+** subr_f16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fsubr z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** fsub z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f16_z_untied, svfloat16_t, -+ z0 = svsubr_f16_z (p0, z1, z2), -+ z0 = svsubr_z (p0, z1, z2)) -+ -+/* -+** subr_h4_f16_z_tied1: -+** mov (z[0-9]+\.h), h4 -+** movprfx z0\.h, p0/z, z0\.h -+** fsubr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (subr_h4_f16_z_tied1, svfloat16_t, __fp16, -+ z0 = svsubr_n_f16_z (p0, z0, d4), -+ z0 = svsubr_z (p0, z0, d4)) -+ -+/* -+** subr_h4_f16_z_untied: -+** mov (z[0-9]+\.h), h4 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fsubr z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** fsub z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (subr_h4_f16_z_untied, svfloat16_t, __fp16, -+ z0 = svsubr_n_f16_z (p0, z1, d4), -+ z0 = svsubr_z (p0, z1, d4)) -+ -+/* -+** subr_1_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fsubr z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_f16_z_tied1, svfloat16_t, -+ z0 = svsubr_n_f16_z (p0, z0, 1), -+ z0 = svsubr_z (p0, z0, 1)) -+ -+/* -+** subr_1_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** fsubr z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_f16_z_untied, svfloat16_t, -+ z0 = svsubr_n_f16_z (p0, z1, 1), -+ z0 = svsubr_z (p0, z1, 1)) -+ -+/* -+** subr_0p5_f16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** fsubr z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_0p5_f16_z_tied1, svfloat16_t, -+ z0 = svsubr_n_f16_z (p0, z0, 0.5), -+ z0 = svsubr_z (p0, z0, 0.5)) -+ -+/* -+** subr_0p5_f16_z_untied: -+** movprfx z0\.h, p0/z, z1\.h -+** fsubr z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_0p5_f16_z_untied, svfloat16_t, -+ z0 = svsubr_n_f16_z (p0, z1, 0.5), -+ z0 = svsubr_z (p0, z1, 0.5)) -+ -+/* -+** subr_m1_f16_z_tied1: -+** fmov (z[0-9]+\.h), #-1\.0(?:e\+0)? -+** movprfx z0\.h, p0/z, z0\.h -+** fsubr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_f16_z_tied1, svfloat16_t, -+ z0 = svsubr_n_f16_z (p0, z0, -1), -+ z0 = svsubr_z (p0, z0, -1)) -+ -+/* -+** subr_m1_f16_z_untied: -+** fmov (z[0-9]+\.h), #-1\.0(?:e\+0)? -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** fsubr z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** fsub z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_f16_z_untied, svfloat16_t, -+ z0 = svsubr_n_f16_z (p0, z1, -1), -+ z0 = svsubr_z (p0, z1, -1)) -+ -+/* -+** subr_f16_x_tied1: -+** fsub z0\.h, z1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f16_x_tied1, svfloat16_t, -+ z0 = svsubr_f16_x (p0, z0, z1), -+ z0 = svsubr_x (p0, z0, z1)) -+ -+/* -+** subr_f16_x_tied2: -+** fsub z0\.h, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f16_x_tied2, svfloat16_t, -+ z0 = svsubr_f16_x (p0, z1, z0), -+ z0 = svsubr_x (p0, z1, z0)) -+ -+/* -+** subr_f16_x_untied: -+** fsub z0\.h, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f16_x_untied, svfloat16_t, -+ z0 = svsubr_f16_x (p0, z1, z2), -+ z0 = svsubr_x (p0, z1, z2)) -+ -+/* -+** subr_h4_f16_x_tied1: -+** mov (z[0-9]+\.h), h4 -+** fsub z0\.h, \1, z0\.h -+** ret -+*/ -+TEST_UNIFORM_ZD (subr_h4_f16_x_tied1, svfloat16_t, __fp16, -+ z0 = svsubr_n_f16_x (p0, z0, d4), -+ z0 = svsubr_x (p0, z0, d4)) -+ -+/* -+** subr_h4_f16_x_untied: -+** mov (z[0-9]+\.h), h4 -+** fsub z0\.h, \1, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZD (subr_h4_f16_x_untied, svfloat16_t, __fp16, -+ z0 = svsubr_n_f16_x (p0, z1, d4), -+ z0 = svsubr_x (p0, z1, d4)) -+ -+/* -+** subr_1_f16_x_tied1: -+** fsubr z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_f16_x_tied1, svfloat16_t, -+ z0 = svsubr_n_f16_x (p0, z0, 1), -+ z0 = svsubr_x (p0, z0, 1)) -+ -+/* -+** subr_1_f16_x_untied: -+** movprfx z0, z1 -+** fsubr z0\.h, p0/m, z0\.h, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_f16_x_untied, svfloat16_t, -+ z0 = svsubr_n_f16_x (p0, z1, 1), -+ z0 = svsubr_x (p0, z1, 1)) -+ -+/* -+** subr_0p5_f16_x_tied1: -+** fsubr z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_0p5_f16_x_tied1, svfloat16_t, -+ z0 = svsubr_n_f16_x (p0, z0, 0.5), -+ z0 = svsubr_x (p0, z0, 0.5)) -+ -+/* -+** subr_0p5_f16_x_untied: -+** movprfx z0, z1 -+** fsubr z0\.h, p0/m, z0\.h, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_0p5_f16_x_untied, svfloat16_t, -+ z0 = svsubr_n_f16_x (p0, z1, 0.5), -+ z0 = svsubr_x (p0, z1, 0.5)) -+ -+/* -+** subr_m1_f16_x_tied1: -+** fmov (z[0-9]+\.h), #-1\.0(?:e\+0)? -+** fsub z0\.h, \1, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_f16_x_tied1, svfloat16_t, -+ z0 = svsubr_n_f16_x (p0, z0, -1), -+ z0 = svsubr_x (p0, z0, -1)) -+ -+/* -+** subr_m1_f16_x_untied: -+** fmov (z[0-9]+\.h), #-1\.0(?:e\+0)? -+** fsub z0\.h, \1, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_f16_x_untied, svfloat16_t, -+ z0 = svsubr_n_f16_x (p0, z1, -1), -+ z0 = svsubr_x (p0, z1, -1)) -+ -+/* -+** ptrue_subr_f16_x_tied1: -+** fsub z0\.h, z1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_f16_x_tied1, svfloat16_t, -+ z0 = svsubr_f16_x (svptrue_b16 (), z0, z1), -+ z0 = svsubr_x (svptrue_b16 (), z0, z1)) -+ -+/* -+** ptrue_subr_f16_x_tied2: -+** fsub z0\.h, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_f16_x_tied2, svfloat16_t, -+ z0 = svsubr_f16_x (svptrue_b16 (), z1, z0), -+ z0 = svsubr_x (svptrue_b16 (), z1, z0)) -+ -+/* -+** ptrue_subr_f16_x_untied: -+** fsub z0\.h, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_f16_x_untied, svfloat16_t, -+ z0 = svsubr_f16_x (svptrue_b16 (), z1, z2), -+ z0 = svsubr_x (svptrue_b16 (), z1, z2)) -+ -+/* -+** ptrue_subr_1_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_1_f16_x_tied1, svfloat16_t, -+ z0 = svsubr_n_f16_x (svptrue_b16 (), z0, 1), -+ z0 = svsubr_x (svptrue_b16 (), z0, 1)) -+ -+/* -+** ptrue_subr_1_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_1_f16_x_untied, svfloat16_t, -+ z0 = svsubr_n_f16_x (svptrue_b16 (), z1, 1), -+ z0 = svsubr_x (svptrue_b16 (), z1, 1)) -+ -+/* -+** ptrue_subr_0p5_f16_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_0p5_f16_x_tied1, svfloat16_t, -+ z0 = svsubr_n_f16_x (svptrue_b16 (), z0, 0.5), -+ z0 = svsubr_x (svptrue_b16 (), z0, 0.5)) -+ -+/* -+** ptrue_subr_0p5_f16_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_0p5_f16_x_untied, svfloat16_t, -+ z0 = svsubr_n_f16_x (svptrue_b16 (), z1, 0.5), -+ z0 = svsubr_x (svptrue_b16 (), z1, 0.5)) -+ -+/* -+** ptrue_subr_m1_f16_x_tied1: -+** fmov (z[0-9]+\.h), #-1\.0(?:e\+0)? -+** fsub z0\.h, \1, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_m1_f16_x_tied1, svfloat16_t, -+ z0 = svsubr_n_f16_x (svptrue_b16 (), z0, -1), -+ z0 = svsubr_x (svptrue_b16 (), z0, -1)) -+ -+/* -+** ptrue_subr_m1_f16_x_untied: -+** fmov (z[0-9]+\.h), #-1\.0(?:e\+0)? -+** fsub z0\.h, \1, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_m1_f16_x_untied, svfloat16_t, -+ z0 = svsubr_n_f16_x (svptrue_b16 (), z1, -1), -+ z0 = svsubr_x (svptrue_b16 (), z1, -1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_f32.c -new file mode 100644 -index 000000000..98dc7ad2b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_f32.c -@@ -0,0 +1,444 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** subr_f32_m_tied1: -+** fsubr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f32_m_tied1, svfloat32_t, -+ z0 = svsubr_f32_m (p0, z0, z1), -+ z0 = svsubr_m (p0, z0, z1)) -+ -+/* -+** subr_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fsubr z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f32_m_tied2, svfloat32_t, -+ z0 = svsubr_f32_m (p0, z1, z0), -+ z0 = svsubr_m (p0, z1, z0)) -+ -+/* -+** subr_f32_m_untied: -+** movprfx z0, z1 -+** fsubr z0\.s, p0/m, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f32_m_untied, svfloat32_t, -+ z0 = svsubr_f32_m (p0, z1, z2), -+ z0 = svsubr_m (p0, z1, z2)) -+ -+/* -+** subr_s4_f32_m_tied1: -+** mov (z[0-9]+\.s), s4 -+** fsubr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (subr_s4_f32_m_tied1, svfloat32_t, float, -+ z0 = svsubr_n_f32_m (p0, z0, d4), -+ z0 = svsubr_m (p0, z0, d4)) -+ -+/* -+** subr_s4_f32_m_untied: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0, z1 -+** fsubr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (subr_s4_f32_m_untied, svfloat32_t, float, -+ z0 = svsubr_n_f32_m (p0, z1, d4), -+ z0 = svsubr_m (p0, z1, d4)) -+ -+/* -+** subr_1_f32_m_tied1: -+** fsubr z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_f32_m_tied1, svfloat32_t, -+ z0 = svsubr_n_f32_m (p0, z0, 1), -+ z0 = svsubr_m (p0, z0, 1)) -+ -+/* -+** subr_1_f32_m_untied: -+** movprfx z0, z1 -+** fsubr z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_f32_m_untied, svfloat32_t, -+ z0 = svsubr_n_f32_m (p0, z1, 1), -+ z0 = svsubr_m (p0, z1, 1)) -+ -+/* -+** subr_0p5_f32_m_tied1: -+** fsubr z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_0p5_f32_m_tied1, svfloat32_t, -+ z0 = svsubr_n_f32_m (p0, z0, 0.5), -+ z0 = svsubr_m (p0, z0, 0.5)) -+ -+/* -+** subr_0p5_f32_m_untied: -+** movprfx z0, z1 -+** fsubr z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_0p5_f32_m_untied, svfloat32_t, -+ z0 = svsubr_n_f32_m (p0, z1, 0.5), -+ z0 = svsubr_m (p0, z1, 0.5)) -+ -+/* -+** subr_m1_f32_m_tied1: -+** fmov (z[0-9]+\.s), #-1\.0(?:e\+0)? -+** fsubr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_f32_m_tied1, svfloat32_t, -+ z0 = svsubr_n_f32_m (p0, z0, -1), -+ z0 = svsubr_m (p0, z0, -1)) -+ -+/* -+** subr_m1_f32_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.s), #-1\.0(?:e\+0)? -+** movprfx z0, z1 -+** fsubr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_f32_m_untied, svfloat32_t, -+ z0 = svsubr_n_f32_m (p0, z1, -1), -+ z0 = svsubr_m (p0, z1, -1)) -+ -+/* -+** subr_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fsubr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f32_z_tied1, svfloat32_t, -+ z0 = svsubr_f32_z (p0, z0, z1), -+ z0 = svsubr_z (p0, z0, z1)) -+ -+/* -+** subr_f32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** fsub z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f32_z_tied2, svfloat32_t, -+ z0 = svsubr_f32_z (p0, z1, z0), -+ z0 = svsubr_z (p0, z1, z0)) -+ -+/* -+** subr_f32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fsubr z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** fsub z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f32_z_untied, svfloat32_t, -+ z0 = svsubr_f32_z (p0, z1, z2), -+ z0 = svsubr_z (p0, z1, z2)) -+ -+/* -+** subr_s4_f32_z_tied1: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0\.s, p0/z, z0\.s -+** fsubr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (subr_s4_f32_z_tied1, svfloat32_t, float, -+ z0 = svsubr_n_f32_z (p0, z0, d4), -+ z0 = svsubr_z (p0, z0, d4)) -+ -+/* -+** subr_s4_f32_z_untied: -+** mov (z[0-9]+\.s), s4 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fsubr z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** fsub z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (subr_s4_f32_z_untied, svfloat32_t, float, -+ z0 = svsubr_n_f32_z (p0, z1, d4), -+ z0 = svsubr_z (p0, z1, d4)) -+ -+/* -+** subr_1_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fsubr z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_f32_z_tied1, svfloat32_t, -+ z0 = svsubr_n_f32_z (p0, z0, 1), -+ z0 = svsubr_z (p0, z0, 1)) -+ -+/* -+** subr_1_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** fsubr z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_f32_z_untied, svfloat32_t, -+ z0 = svsubr_n_f32_z (p0, z1, 1), -+ z0 = svsubr_z (p0, z1, 1)) -+ -+/* -+** subr_0p5_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fsubr z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_0p5_f32_z_tied1, svfloat32_t, -+ z0 = svsubr_n_f32_z (p0, z0, 0.5), -+ z0 = svsubr_z (p0, z0, 0.5)) -+ -+/* -+** subr_0p5_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** fsubr z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_0p5_f32_z_untied, svfloat32_t, -+ z0 = svsubr_n_f32_z (p0, z1, 0.5), -+ z0 = svsubr_z (p0, z1, 0.5)) -+ -+/* -+** subr_m1_f32_z_tied1: -+** fmov (z[0-9]+\.s), #-1\.0(?:e\+0)? -+** movprfx z0\.s, p0/z, z0\.s -+** fsubr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_f32_z_tied1, svfloat32_t, -+ z0 = svsubr_n_f32_z (p0, z0, -1), -+ z0 = svsubr_z (p0, z0, -1)) -+ -+/* -+** subr_m1_f32_z_untied: -+** fmov (z[0-9]+\.s), #-1\.0(?:e\+0)? -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fsubr z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** fsub z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_f32_z_untied, svfloat32_t, -+ z0 = svsubr_n_f32_z (p0, z1, -1), -+ z0 = svsubr_z (p0, z1, -1)) -+ -+/* -+** subr_f32_x_tied1: -+** fsubr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f32_x_tied1, svfloat32_t, -+ z0 = svsubr_f32_x (p0, z0, z1), -+ z0 = svsubr_x (p0, z0, z1)) -+ -+/* -+** subr_f32_x_tied2: -+** fsub z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f32_x_tied2, svfloat32_t, -+ z0 = svsubr_f32_x (p0, z1, z0), -+ z0 = svsubr_x (p0, z1, z0)) -+ -+/* -+** subr_f32_x_untied: -+** ( -+** movprfx z0, z1 -+** fsubr z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0, z2 -+** fsub z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f32_x_untied, svfloat32_t, -+ z0 = svsubr_f32_x (p0, z1, z2), -+ z0 = svsubr_x (p0, z1, z2)) -+ -+/* -+** subr_s4_f32_x_tied1: -+** mov (z[0-9]+\.s), s4 -+** fsubr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (subr_s4_f32_x_tied1, svfloat32_t, float, -+ z0 = svsubr_n_f32_x (p0, z0, d4), -+ z0 = svsubr_x (p0, z0, d4)) -+ -+/* -+** subr_s4_f32_x_untied: { xfail *-*-* } -+** mov z0\.s, s4 -+** fsub z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZD (subr_s4_f32_x_untied, svfloat32_t, float, -+ z0 = svsubr_n_f32_x (p0, z1, d4), -+ z0 = svsubr_x (p0, z1, d4)) -+ -+/* -+** subr_1_f32_x_tied1: -+** fsubr z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_f32_x_tied1, svfloat32_t, -+ z0 = svsubr_n_f32_x (p0, z0, 1), -+ z0 = svsubr_x (p0, z0, 1)) -+ -+/* -+** subr_1_f32_x_untied: -+** movprfx z0, z1 -+** fsubr z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_f32_x_untied, svfloat32_t, -+ z0 = svsubr_n_f32_x (p0, z1, 1), -+ z0 = svsubr_x (p0, z1, 1)) -+ -+/* -+** subr_0p5_f32_x_tied1: -+** fsubr z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_0p5_f32_x_tied1, svfloat32_t, -+ z0 = svsubr_n_f32_x (p0, z0, 0.5), -+ z0 = svsubr_x (p0, z0, 0.5)) -+ -+/* -+** subr_0p5_f32_x_untied: -+** movprfx z0, z1 -+** fsubr z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_0p5_f32_x_untied, svfloat32_t, -+ z0 = svsubr_n_f32_x (p0, z1, 0.5), -+ z0 = svsubr_x (p0, z1, 0.5)) -+ -+/* -+** subr_m1_f32_x_tied1: -+** fmov (z[0-9]+\.s), #-1\.0(?:e\+0)? -+** fsubr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_f32_x_tied1, svfloat32_t, -+ z0 = svsubr_n_f32_x (p0, z0, -1), -+ z0 = svsubr_x (p0, z0, -1)) -+ -+/* -+** subr_m1_f32_x_untied: -+** fmov z0\.s, #-1\.0(?:e\+0)? -+** fsub z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_f32_x_untied, svfloat32_t, -+ z0 = svsubr_n_f32_x (p0, z1, -1), -+ z0 = svsubr_x (p0, z1, -1)) -+ -+/* -+** ptrue_subr_f32_x_tied1: -+** fsub z0\.s, z1\.s, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_f32_x_tied1, svfloat32_t, -+ z0 = svsubr_f32_x (svptrue_b32 (), z0, z1), -+ z0 = svsubr_x (svptrue_b32 (), z0, z1)) -+ -+/* -+** ptrue_subr_f32_x_tied2: -+** fsub z0\.s, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_f32_x_tied2, svfloat32_t, -+ z0 = svsubr_f32_x (svptrue_b32 (), z1, z0), -+ z0 = svsubr_x (svptrue_b32 (), z1, z0)) -+ -+/* -+** ptrue_subr_f32_x_untied: -+** fsub z0\.s, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_f32_x_untied, svfloat32_t, -+ z0 = svsubr_f32_x (svptrue_b32 (), z1, z2), -+ z0 = svsubr_x (svptrue_b32 (), z1, z2)) -+ -+/* -+** ptrue_subr_1_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_1_f32_x_tied1, svfloat32_t, -+ z0 = svsubr_n_f32_x (svptrue_b32 (), z0, 1), -+ z0 = svsubr_x (svptrue_b32 (), z0, 1)) -+ -+/* -+** ptrue_subr_1_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_1_f32_x_untied, svfloat32_t, -+ z0 = svsubr_n_f32_x (svptrue_b32 (), z1, 1), -+ z0 = svsubr_x (svptrue_b32 (), z1, 1)) -+ -+/* -+** ptrue_subr_0p5_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_0p5_f32_x_tied1, svfloat32_t, -+ z0 = svsubr_n_f32_x (svptrue_b32 (), z0, 0.5), -+ z0 = svsubr_x (svptrue_b32 (), z0, 0.5)) -+ -+/* -+** ptrue_subr_0p5_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_0p5_f32_x_untied, svfloat32_t, -+ z0 = svsubr_n_f32_x (svptrue_b32 (), z1, 0.5), -+ z0 = svsubr_x (svptrue_b32 (), z1, 0.5)) -+ -+/* -+** ptrue_subr_m1_f32_x_tied1: -+** fmov (z[0-9]+\.s), #-1\.0(?:e\+0)? -+** fsub z0\.s, \1, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_m1_f32_x_tied1, svfloat32_t, -+ z0 = svsubr_n_f32_x (svptrue_b32 (), z0, -1), -+ z0 = svsubr_x (svptrue_b32 (), z0, -1)) -+ -+/* -+** ptrue_subr_m1_f32_x_untied: -+** fmov (z[0-9]+\.s), #-1\.0(?:e\+0)? -+** fsub z0\.s, \1, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_m1_f32_x_untied, svfloat32_t, -+ z0 = svsubr_n_f32_x (svptrue_b32 (), z1, -1), -+ z0 = svsubr_x (svptrue_b32 (), z1, -1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_f32_notrap.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_f32_notrap.c -new file mode 100644 -index 000000000..75ae0dc61 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_f32_notrap.c -@@ -0,0 +1,439 @@ -+/* { dg-additional-options "-fno-trapping-math" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** subr_f32_m_tied1: -+** fsubr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f32_m_tied1, svfloat32_t, -+ z0 = svsubr_f32_m (p0, z0, z1), -+ z0 = svsubr_m (p0, z0, z1)) -+ -+/* -+** subr_f32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** fsubr z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f32_m_tied2, svfloat32_t, -+ z0 = svsubr_f32_m (p0, z1, z0), -+ z0 = svsubr_m (p0, z1, z0)) -+ -+/* -+** subr_f32_m_untied: -+** movprfx z0, z1 -+** fsubr z0\.s, p0/m, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f32_m_untied, svfloat32_t, -+ z0 = svsubr_f32_m (p0, z1, z2), -+ z0 = svsubr_m (p0, z1, z2)) -+ -+/* -+** subr_s4_f32_m_tied1: -+** mov (z[0-9]+\.s), s4 -+** fsubr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (subr_s4_f32_m_tied1, svfloat32_t, float, -+ z0 = svsubr_n_f32_m (p0, z0, d4), -+ z0 = svsubr_m (p0, z0, d4)) -+ -+/* -+** subr_s4_f32_m_untied: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0, z1 -+** fsubr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (subr_s4_f32_m_untied, svfloat32_t, float, -+ z0 = svsubr_n_f32_m (p0, z1, d4), -+ z0 = svsubr_m (p0, z1, d4)) -+ -+/* -+** subr_1_f32_m_tied1: -+** fsubr z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_f32_m_tied1, svfloat32_t, -+ z0 = svsubr_n_f32_m (p0, z0, 1), -+ z0 = svsubr_m (p0, z0, 1)) -+ -+/* -+** subr_1_f32_m_untied: -+** movprfx z0, z1 -+** fsubr z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_f32_m_untied, svfloat32_t, -+ z0 = svsubr_n_f32_m (p0, z1, 1), -+ z0 = svsubr_m (p0, z1, 1)) -+ -+/* -+** subr_0p5_f32_m_tied1: -+** fsubr z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_0p5_f32_m_tied1, svfloat32_t, -+ z0 = svsubr_n_f32_m (p0, z0, 0.5), -+ z0 = svsubr_m (p0, z0, 0.5)) -+ -+/* -+** subr_0p5_f32_m_untied: -+** movprfx z0, z1 -+** fsubr z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_0p5_f32_m_untied, svfloat32_t, -+ z0 = svsubr_n_f32_m (p0, z1, 0.5), -+ z0 = svsubr_m (p0, z1, 0.5)) -+ -+/* -+** subr_m1_f32_m_tied1: -+** fmov (z[0-9]+\.s), #-1\.0(?:e\+0)? -+** fsubr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_f32_m_tied1, svfloat32_t, -+ z0 = svsubr_n_f32_m (p0, z0, -1), -+ z0 = svsubr_m (p0, z0, -1)) -+ -+/* -+** subr_m1_f32_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.s), #-1\.0(?:e\+0)? -+** movprfx z0, z1 -+** fsubr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_f32_m_untied, svfloat32_t, -+ z0 = svsubr_n_f32_m (p0, z1, -1), -+ z0 = svsubr_m (p0, z1, -1)) -+ -+/* -+** subr_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fsubr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f32_z_tied1, svfloat32_t, -+ z0 = svsubr_f32_z (p0, z0, z1), -+ z0 = svsubr_z (p0, z0, z1)) -+ -+/* -+** subr_f32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** fsub z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f32_z_tied2, svfloat32_t, -+ z0 = svsubr_f32_z (p0, z1, z0), -+ z0 = svsubr_z (p0, z1, z0)) -+ -+/* -+** subr_f32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fsubr z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** fsub z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f32_z_untied, svfloat32_t, -+ z0 = svsubr_f32_z (p0, z1, z2), -+ z0 = svsubr_z (p0, z1, z2)) -+ -+/* -+** subr_s4_f32_z_tied1: -+** mov (z[0-9]+\.s), s4 -+** movprfx z0\.s, p0/z, z0\.s -+** fsubr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (subr_s4_f32_z_tied1, svfloat32_t, float, -+ z0 = svsubr_n_f32_z (p0, z0, d4), -+ z0 = svsubr_z (p0, z0, d4)) -+ -+/* -+** subr_s4_f32_z_untied: -+** mov (z[0-9]+\.s), s4 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fsubr z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** fsub z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (subr_s4_f32_z_untied, svfloat32_t, float, -+ z0 = svsubr_n_f32_z (p0, z1, d4), -+ z0 = svsubr_z (p0, z1, d4)) -+ -+/* -+** subr_1_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fsubr z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_f32_z_tied1, svfloat32_t, -+ z0 = svsubr_n_f32_z (p0, z0, 1), -+ z0 = svsubr_z (p0, z0, 1)) -+ -+/* -+** subr_1_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** fsubr z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_f32_z_untied, svfloat32_t, -+ z0 = svsubr_n_f32_z (p0, z1, 1), -+ z0 = svsubr_z (p0, z1, 1)) -+ -+/* -+** subr_0p5_f32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** fsubr z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_0p5_f32_z_tied1, svfloat32_t, -+ z0 = svsubr_n_f32_z (p0, z0, 0.5), -+ z0 = svsubr_z (p0, z0, 0.5)) -+ -+/* -+** subr_0p5_f32_z_untied: -+** movprfx z0\.s, p0/z, z1\.s -+** fsubr z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_0p5_f32_z_untied, svfloat32_t, -+ z0 = svsubr_n_f32_z (p0, z1, 0.5), -+ z0 = svsubr_z (p0, z1, 0.5)) -+ -+/* -+** subr_m1_f32_z_tied1: -+** fmov (z[0-9]+\.s), #-1\.0(?:e\+0)? -+** movprfx z0\.s, p0/z, z0\.s -+** fsubr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_f32_z_tied1, svfloat32_t, -+ z0 = svsubr_n_f32_z (p0, z0, -1), -+ z0 = svsubr_z (p0, z0, -1)) -+ -+/* -+** subr_m1_f32_z_untied: -+** fmov (z[0-9]+\.s), #-1\.0(?:e\+0)? -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** fsubr z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** fsub z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_f32_z_untied, svfloat32_t, -+ z0 = svsubr_n_f32_z (p0, z1, -1), -+ z0 = svsubr_z (p0, z1, -1)) -+ -+/* -+** subr_f32_x_tied1: -+** fsub z0\.s, z1\.s, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f32_x_tied1, svfloat32_t, -+ z0 = svsubr_f32_x (p0, z0, z1), -+ z0 = svsubr_x (p0, z0, z1)) -+ -+/* -+** subr_f32_x_tied2: -+** fsub z0\.s, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f32_x_tied2, svfloat32_t, -+ z0 = svsubr_f32_x (p0, z1, z0), -+ z0 = svsubr_x (p0, z1, z0)) -+ -+/* -+** subr_f32_x_untied: -+** fsub z0\.s, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f32_x_untied, svfloat32_t, -+ z0 = svsubr_f32_x (p0, z1, z2), -+ z0 = svsubr_x (p0, z1, z2)) -+ -+/* -+** subr_s4_f32_x_tied1: -+** mov (z[0-9]+\.s), s4 -+** fsub z0\.s, \1, z0\.s -+** ret -+*/ -+TEST_UNIFORM_ZD (subr_s4_f32_x_tied1, svfloat32_t, float, -+ z0 = svsubr_n_f32_x (p0, z0, d4), -+ z0 = svsubr_x (p0, z0, d4)) -+ -+/* -+** subr_s4_f32_x_untied: -+** mov (z[0-9]+\.s), s4 -+** fsub z0\.s, \1, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZD (subr_s4_f32_x_untied, svfloat32_t, float, -+ z0 = svsubr_n_f32_x (p0, z1, d4), -+ z0 = svsubr_x (p0, z1, d4)) -+ -+/* -+** subr_1_f32_x_tied1: -+** fsubr z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_f32_x_tied1, svfloat32_t, -+ z0 = svsubr_n_f32_x (p0, z0, 1), -+ z0 = svsubr_x (p0, z0, 1)) -+ -+/* -+** subr_1_f32_x_untied: -+** movprfx z0, z1 -+** fsubr z0\.s, p0/m, z0\.s, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_f32_x_untied, svfloat32_t, -+ z0 = svsubr_n_f32_x (p0, z1, 1), -+ z0 = svsubr_x (p0, z1, 1)) -+ -+/* -+** subr_0p5_f32_x_tied1: -+** fsubr z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_0p5_f32_x_tied1, svfloat32_t, -+ z0 = svsubr_n_f32_x (p0, z0, 0.5), -+ z0 = svsubr_x (p0, z0, 0.5)) -+ -+/* -+** subr_0p5_f32_x_untied: -+** movprfx z0, z1 -+** fsubr z0\.s, p0/m, z0\.s, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_0p5_f32_x_untied, svfloat32_t, -+ z0 = svsubr_n_f32_x (p0, z1, 0.5), -+ z0 = svsubr_x (p0, z1, 0.5)) -+ -+/* -+** subr_m1_f32_x_tied1: -+** fmov (z[0-9]+\.s), #-1\.0(?:e\+0)? -+** fsub z0\.s, \1, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_f32_x_tied1, svfloat32_t, -+ z0 = svsubr_n_f32_x (p0, z0, -1), -+ z0 = svsubr_x (p0, z0, -1)) -+ -+/* -+** subr_m1_f32_x_untied: -+** fmov (z[0-9]+\.s), #-1\.0(?:e\+0)? -+** fsub z0\.s, \1, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_f32_x_untied, svfloat32_t, -+ z0 = svsubr_n_f32_x (p0, z1, -1), -+ z0 = svsubr_x (p0, z1, -1)) -+ -+/* -+** ptrue_subr_f32_x_tied1: -+** fsub z0\.s, z1\.s, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_f32_x_tied1, svfloat32_t, -+ z0 = svsubr_f32_x (svptrue_b32 (), z0, z1), -+ z0 = svsubr_x (svptrue_b32 (), z0, z1)) -+ -+/* -+** ptrue_subr_f32_x_tied2: -+** fsub z0\.s, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_f32_x_tied2, svfloat32_t, -+ z0 = svsubr_f32_x (svptrue_b32 (), z1, z0), -+ z0 = svsubr_x (svptrue_b32 (), z1, z0)) -+ -+/* -+** ptrue_subr_f32_x_untied: -+** fsub z0\.s, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_f32_x_untied, svfloat32_t, -+ z0 = svsubr_f32_x (svptrue_b32 (), z1, z2), -+ z0 = svsubr_x (svptrue_b32 (), z1, z2)) -+ -+/* -+** ptrue_subr_1_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_1_f32_x_tied1, svfloat32_t, -+ z0 = svsubr_n_f32_x (svptrue_b32 (), z0, 1), -+ z0 = svsubr_x (svptrue_b32 (), z0, 1)) -+ -+/* -+** ptrue_subr_1_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_1_f32_x_untied, svfloat32_t, -+ z0 = svsubr_n_f32_x (svptrue_b32 (), z1, 1), -+ z0 = svsubr_x (svptrue_b32 (), z1, 1)) -+ -+/* -+** ptrue_subr_0p5_f32_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_0p5_f32_x_tied1, svfloat32_t, -+ z0 = svsubr_n_f32_x (svptrue_b32 (), z0, 0.5), -+ z0 = svsubr_x (svptrue_b32 (), z0, 0.5)) -+ -+/* -+** ptrue_subr_0p5_f32_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_0p5_f32_x_untied, svfloat32_t, -+ z0 = svsubr_n_f32_x (svptrue_b32 (), z1, 0.5), -+ z0 = svsubr_x (svptrue_b32 (), z1, 0.5)) -+ -+/* -+** ptrue_subr_m1_f32_x_tied1: -+** fmov (z[0-9]+\.s), #-1\.0(?:e\+0)? -+** fsub z0\.s, \1, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_m1_f32_x_tied1, svfloat32_t, -+ z0 = svsubr_n_f32_x (svptrue_b32 (), z0, -1), -+ z0 = svsubr_x (svptrue_b32 (), z0, -1)) -+ -+/* -+** ptrue_subr_m1_f32_x_untied: -+** fmov (z[0-9]+\.s), #-1\.0(?:e\+0)? -+** fsub z0\.s, \1, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_m1_f32_x_untied, svfloat32_t, -+ z0 = svsubr_n_f32_x (svptrue_b32 (), z1, -1), -+ z0 = svsubr_x (svptrue_b32 (), z1, -1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_f64.c -new file mode 100644 -index 000000000..81f1112d7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_f64.c -@@ -0,0 +1,444 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** subr_f64_m_tied1: -+** fsubr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f64_m_tied1, svfloat64_t, -+ z0 = svsubr_f64_m (p0, z0, z1), -+ z0 = svsubr_m (p0, z0, z1)) -+ -+/* -+** subr_f64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fsubr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f64_m_tied2, svfloat64_t, -+ z0 = svsubr_f64_m (p0, z1, z0), -+ z0 = svsubr_m (p0, z1, z0)) -+ -+/* -+** subr_f64_m_untied: -+** movprfx z0, z1 -+** fsubr z0\.d, p0/m, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f64_m_untied, svfloat64_t, -+ z0 = svsubr_f64_m (p0, z1, z2), -+ z0 = svsubr_m (p0, z1, z2)) -+ -+/* -+** subr_d4_f64_m_tied1: -+** mov (z[0-9]+\.d), d4 -+** fsubr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (subr_d4_f64_m_tied1, svfloat64_t, double, -+ z0 = svsubr_n_f64_m (p0, z0, d4), -+ z0 = svsubr_m (p0, z0, d4)) -+ -+/* -+** subr_d4_f64_m_untied: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0, z1 -+** fsubr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (subr_d4_f64_m_untied, svfloat64_t, double, -+ z0 = svsubr_n_f64_m (p0, z1, d4), -+ z0 = svsubr_m (p0, z1, d4)) -+ -+/* -+** subr_1_f64_m_tied1: -+** fsubr z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_f64_m_tied1, svfloat64_t, -+ z0 = svsubr_n_f64_m (p0, z0, 1), -+ z0 = svsubr_m (p0, z0, 1)) -+ -+/* -+** subr_1_f64_m_untied: -+** movprfx z0, z1 -+** fsubr z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_f64_m_untied, svfloat64_t, -+ z0 = svsubr_n_f64_m (p0, z1, 1), -+ z0 = svsubr_m (p0, z1, 1)) -+ -+/* -+** subr_0p5_f64_m_tied1: -+** fsubr z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_0p5_f64_m_tied1, svfloat64_t, -+ z0 = svsubr_n_f64_m (p0, z0, 0.5), -+ z0 = svsubr_m (p0, z0, 0.5)) -+ -+/* -+** subr_0p5_f64_m_untied: -+** movprfx z0, z1 -+** fsubr z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_0p5_f64_m_untied, svfloat64_t, -+ z0 = svsubr_n_f64_m (p0, z1, 0.5), -+ z0 = svsubr_m (p0, z1, 0.5)) -+ -+/* -+** subr_m1_f64_m_tied1: -+** fmov (z[0-9]+\.d), #-1\.0(?:e\+0)? -+** fsubr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_f64_m_tied1, svfloat64_t, -+ z0 = svsubr_n_f64_m (p0, z0, -1), -+ z0 = svsubr_m (p0, z0, -1)) -+ -+/* -+** subr_m1_f64_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.d), #-1\.0(?:e\+0)? -+** movprfx z0, z1 -+** fsubr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_f64_m_untied, svfloat64_t, -+ z0 = svsubr_n_f64_m (p0, z1, -1), -+ z0 = svsubr_m (p0, z1, -1)) -+ -+/* -+** subr_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fsubr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f64_z_tied1, svfloat64_t, -+ z0 = svsubr_f64_z (p0, z0, z1), -+ z0 = svsubr_z (p0, z0, z1)) -+ -+/* -+** subr_f64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** fsub z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f64_z_tied2, svfloat64_t, -+ z0 = svsubr_f64_z (p0, z1, z0), -+ z0 = svsubr_z (p0, z1, z0)) -+ -+/* -+** subr_f64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fsubr z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** fsub z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f64_z_untied, svfloat64_t, -+ z0 = svsubr_f64_z (p0, z1, z2), -+ z0 = svsubr_z (p0, z1, z2)) -+ -+/* -+** subr_d4_f64_z_tied1: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0\.d, p0/z, z0\.d -+** fsubr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (subr_d4_f64_z_tied1, svfloat64_t, double, -+ z0 = svsubr_n_f64_z (p0, z0, d4), -+ z0 = svsubr_z (p0, z0, d4)) -+ -+/* -+** subr_d4_f64_z_untied: -+** mov (z[0-9]+\.d), d4 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fsubr z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** fsub z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (subr_d4_f64_z_untied, svfloat64_t, double, -+ z0 = svsubr_n_f64_z (p0, z1, d4), -+ z0 = svsubr_z (p0, z1, d4)) -+ -+/* -+** subr_1_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fsubr z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_f64_z_tied1, svfloat64_t, -+ z0 = svsubr_n_f64_z (p0, z0, 1), -+ z0 = svsubr_z (p0, z0, 1)) -+ -+/* -+** subr_1_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** fsubr z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_f64_z_untied, svfloat64_t, -+ z0 = svsubr_n_f64_z (p0, z1, 1), -+ z0 = svsubr_z (p0, z1, 1)) -+ -+/* -+** subr_0p5_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fsubr z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_0p5_f64_z_tied1, svfloat64_t, -+ z0 = svsubr_n_f64_z (p0, z0, 0.5), -+ z0 = svsubr_z (p0, z0, 0.5)) -+ -+/* -+** subr_0p5_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** fsubr z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_0p5_f64_z_untied, svfloat64_t, -+ z0 = svsubr_n_f64_z (p0, z1, 0.5), -+ z0 = svsubr_z (p0, z1, 0.5)) -+ -+/* -+** subr_m1_f64_z_tied1: -+** fmov (z[0-9]+\.d), #-1\.0(?:e\+0)? -+** movprfx z0\.d, p0/z, z0\.d -+** fsubr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_f64_z_tied1, svfloat64_t, -+ z0 = svsubr_n_f64_z (p0, z0, -1), -+ z0 = svsubr_z (p0, z0, -1)) -+ -+/* -+** subr_m1_f64_z_untied: -+** fmov (z[0-9]+\.d), #-1\.0(?:e\+0)? -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fsubr z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** fsub z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_f64_z_untied, svfloat64_t, -+ z0 = svsubr_n_f64_z (p0, z1, -1), -+ z0 = svsubr_z (p0, z1, -1)) -+ -+/* -+** subr_f64_x_tied1: -+** fsubr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f64_x_tied1, svfloat64_t, -+ z0 = svsubr_f64_x (p0, z0, z1), -+ z0 = svsubr_x (p0, z0, z1)) -+ -+/* -+** subr_f64_x_tied2: -+** fsub z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f64_x_tied2, svfloat64_t, -+ z0 = svsubr_f64_x (p0, z1, z0), -+ z0 = svsubr_x (p0, z1, z0)) -+ -+/* -+** subr_f64_x_untied: -+** ( -+** movprfx z0, z1 -+** fsubr z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0, z2 -+** fsub z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f64_x_untied, svfloat64_t, -+ z0 = svsubr_f64_x (p0, z1, z2), -+ z0 = svsubr_x (p0, z1, z2)) -+ -+/* -+** subr_d4_f64_x_tied1: -+** mov (z[0-9]+\.d), d4 -+** fsubr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (subr_d4_f64_x_tied1, svfloat64_t, double, -+ z0 = svsubr_n_f64_x (p0, z0, d4), -+ z0 = svsubr_x (p0, z0, d4)) -+ -+/* -+** subr_d4_f64_x_untied: { xfail *-*-* } -+** mov z0\.d, d4 -+** fsub z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZD (subr_d4_f64_x_untied, svfloat64_t, double, -+ z0 = svsubr_n_f64_x (p0, z1, d4), -+ z0 = svsubr_x (p0, z1, d4)) -+ -+/* -+** subr_1_f64_x_tied1: -+** fsubr z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_f64_x_tied1, svfloat64_t, -+ z0 = svsubr_n_f64_x (p0, z0, 1), -+ z0 = svsubr_x (p0, z0, 1)) -+ -+/* -+** subr_1_f64_x_untied: -+** movprfx z0, z1 -+** fsubr z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_f64_x_untied, svfloat64_t, -+ z0 = svsubr_n_f64_x (p0, z1, 1), -+ z0 = svsubr_x (p0, z1, 1)) -+ -+/* -+** subr_0p5_f64_x_tied1: -+** fsubr z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_0p5_f64_x_tied1, svfloat64_t, -+ z0 = svsubr_n_f64_x (p0, z0, 0.5), -+ z0 = svsubr_x (p0, z0, 0.5)) -+ -+/* -+** subr_0p5_f64_x_untied: -+** movprfx z0, z1 -+** fsubr z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_0p5_f64_x_untied, svfloat64_t, -+ z0 = svsubr_n_f64_x (p0, z1, 0.5), -+ z0 = svsubr_x (p0, z1, 0.5)) -+ -+/* -+** subr_m1_f64_x_tied1: -+** fmov (z[0-9]+\.d), #-1\.0(?:e\+0)? -+** fsubr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_f64_x_tied1, svfloat64_t, -+ z0 = svsubr_n_f64_x (p0, z0, -1), -+ z0 = svsubr_x (p0, z0, -1)) -+ -+/* -+** subr_m1_f64_x_untied: -+** fmov z0\.d, #-1\.0(?:e\+0)? -+** fsub z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_f64_x_untied, svfloat64_t, -+ z0 = svsubr_n_f64_x (p0, z1, -1), -+ z0 = svsubr_x (p0, z1, -1)) -+ -+/* -+** ptrue_subr_f64_x_tied1: -+** fsub z0\.d, z1\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_f64_x_tied1, svfloat64_t, -+ z0 = svsubr_f64_x (svptrue_b64 (), z0, z1), -+ z0 = svsubr_x (svptrue_b64 (), z0, z1)) -+ -+/* -+** ptrue_subr_f64_x_tied2: -+** fsub z0\.d, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_f64_x_tied2, svfloat64_t, -+ z0 = svsubr_f64_x (svptrue_b64 (), z1, z0), -+ z0 = svsubr_x (svptrue_b64 (), z1, z0)) -+ -+/* -+** ptrue_subr_f64_x_untied: -+** fsub z0\.d, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_f64_x_untied, svfloat64_t, -+ z0 = svsubr_f64_x (svptrue_b64 (), z1, z2), -+ z0 = svsubr_x (svptrue_b64 (), z1, z2)) -+ -+/* -+** ptrue_subr_1_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_1_f64_x_tied1, svfloat64_t, -+ z0 = svsubr_n_f64_x (svptrue_b64 (), z0, 1), -+ z0 = svsubr_x (svptrue_b64 (), z0, 1)) -+ -+/* -+** ptrue_subr_1_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_1_f64_x_untied, svfloat64_t, -+ z0 = svsubr_n_f64_x (svptrue_b64 (), z1, 1), -+ z0 = svsubr_x (svptrue_b64 (), z1, 1)) -+ -+/* -+** ptrue_subr_0p5_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_0p5_f64_x_tied1, svfloat64_t, -+ z0 = svsubr_n_f64_x (svptrue_b64 (), z0, 0.5), -+ z0 = svsubr_x (svptrue_b64 (), z0, 0.5)) -+ -+/* -+** ptrue_subr_0p5_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_0p5_f64_x_untied, svfloat64_t, -+ z0 = svsubr_n_f64_x (svptrue_b64 (), z1, 0.5), -+ z0 = svsubr_x (svptrue_b64 (), z1, 0.5)) -+ -+/* -+** ptrue_subr_m1_f64_x_tied1: -+** fmov (z[0-9]+\.d), #-1\.0(?:e\+0)? -+** fsub z0\.d, \1, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_m1_f64_x_tied1, svfloat64_t, -+ z0 = svsubr_n_f64_x (svptrue_b64 (), z0, -1), -+ z0 = svsubr_x (svptrue_b64 (), z0, -1)) -+ -+/* -+** ptrue_subr_m1_f64_x_untied: -+** fmov (z[0-9]+\.d), #-1\.0(?:e\+0)? -+** fsub z0\.d, \1, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_m1_f64_x_untied, svfloat64_t, -+ z0 = svsubr_n_f64_x (svptrue_b64 (), z1, -1), -+ z0 = svsubr_x (svptrue_b64 (), z1, -1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_f64_notrap.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_f64_notrap.c -new file mode 100644 -index 000000000..98598dd77 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_f64_notrap.c -@@ -0,0 +1,439 @@ -+/* { dg-additional-options "-fno-trapping-math" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** subr_f64_m_tied1: -+** fsubr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f64_m_tied1, svfloat64_t, -+ z0 = svsubr_f64_m (p0, z0, z1), -+ z0 = svsubr_m (p0, z0, z1)) -+ -+/* -+** subr_f64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** fsubr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f64_m_tied2, svfloat64_t, -+ z0 = svsubr_f64_m (p0, z1, z0), -+ z0 = svsubr_m (p0, z1, z0)) -+ -+/* -+** subr_f64_m_untied: -+** movprfx z0, z1 -+** fsubr z0\.d, p0/m, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f64_m_untied, svfloat64_t, -+ z0 = svsubr_f64_m (p0, z1, z2), -+ z0 = svsubr_m (p0, z1, z2)) -+ -+/* -+** subr_d4_f64_m_tied1: -+** mov (z[0-9]+\.d), d4 -+** fsubr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (subr_d4_f64_m_tied1, svfloat64_t, double, -+ z0 = svsubr_n_f64_m (p0, z0, d4), -+ z0 = svsubr_m (p0, z0, d4)) -+ -+/* -+** subr_d4_f64_m_untied: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0, z1 -+** fsubr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (subr_d4_f64_m_untied, svfloat64_t, double, -+ z0 = svsubr_n_f64_m (p0, z1, d4), -+ z0 = svsubr_m (p0, z1, d4)) -+ -+/* -+** subr_1_f64_m_tied1: -+** fsubr z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_f64_m_tied1, svfloat64_t, -+ z0 = svsubr_n_f64_m (p0, z0, 1), -+ z0 = svsubr_m (p0, z0, 1)) -+ -+/* -+** subr_1_f64_m_untied: -+** movprfx z0, z1 -+** fsubr z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_f64_m_untied, svfloat64_t, -+ z0 = svsubr_n_f64_m (p0, z1, 1), -+ z0 = svsubr_m (p0, z1, 1)) -+ -+/* -+** subr_0p5_f64_m_tied1: -+** fsubr z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_0p5_f64_m_tied1, svfloat64_t, -+ z0 = svsubr_n_f64_m (p0, z0, 0.5), -+ z0 = svsubr_m (p0, z0, 0.5)) -+ -+/* -+** subr_0p5_f64_m_untied: -+** movprfx z0, z1 -+** fsubr z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_0p5_f64_m_untied, svfloat64_t, -+ z0 = svsubr_n_f64_m (p0, z1, 0.5), -+ z0 = svsubr_m (p0, z1, 0.5)) -+ -+/* -+** subr_m1_f64_m_tied1: -+** fmov (z[0-9]+\.d), #-1\.0(?:e\+0)? -+** fsubr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_f64_m_tied1, svfloat64_t, -+ z0 = svsubr_n_f64_m (p0, z0, -1), -+ z0 = svsubr_m (p0, z0, -1)) -+ -+/* -+** subr_m1_f64_m_untied: { xfail *-*-* } -+** fmov (z[0-9]+\.d), #-1\.0(?:e\+0)? -+** movprfx z0, z1 -+** fsubr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_f64_m_untied, svfloat64_t, -+ z0 = svsubr_n_f64_m (p0, z1, -1), -+ z0 = svsubr_m (p0, z1, -1)) -+ -+/* -+** subr_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fsubr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f64_z_tied1, svfloat64_t, -+ z0 = svsubr_f64_z (p0, z0, z1), -+ z0 = svsubr_z (p0, z0, z1)) -+ -+/* -+** subr_f64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** fsub z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f64_z_tied2, svfloat64_t, -+ z0 = svsubr_f64_z (p0, z1, z0), -+ z0 = svsubr_z (p0, z1, z0)) -+ -+/* -+** subr_f64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fsubr z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** fsub z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f64_z_untied, svfloat64_t, -+ z0 = svsubr_f64_z (p0, z1, z2), -+ z0 = svsubr_z (p0, z1, z2)) -+ -+/* -+** subr_d4_f64_z_tied1: -+** mov (z[0-9]+\.d), d4 -+** movprfx z0\.d, p0/z, z0\.d -+** fsubr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZD (subr_d4_f64_z_tied1, svfloat64_t, double, -+ z0 = svsubr_n_f64_z (p0, z0, d4), -+ z0 = svsubr_z (p0, z0, d4)) -+ -+/* -+** subr_d4_f64_z_untied: -+** mov (z[0-9]+\.d), d4 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fsubr z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** fsub z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZD (subr_d4_f64_z_untied, svfloat64_t, double, -+ z0 = svsubr_n_f64_z (p0, z1, d4), -+ z0 = svsubr_z (p0, z1, d4)) -+ -+/* -+** subr_1_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fsubr z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_f64_z_tied1, svfloat64_t, -+ z0 = svsubr_n_f64_z (p0, z0, 1), -+ z0 = svsubr_z (p0, z0, 1)) -+ -+/* -+** subr_1_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** fsubr z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_f64_z_untied, svfloat64_t, -+ z0 = svsubr_n_f64_z (p0, z1, 1), -+ z0 = svsubr_z (p0, z1, 1)) -+ -+/* -+** subr_0p5_f64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** fsubr z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_0p5_f64_z_tied1, svfloat64_t, -+ z0 = svsubr_n_f64_z (p0, z0, 0.5), -+ z0 = svsubr_z (p0, z0, 0.5)) -+ -+/* -+** subr_0p5_f64_z_untied: -+** movprfx z0\.d, p0/z, z1\.d -+** fsubr z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_0p5_f64_z_untied, svfloat64_t, -+ z0 = svsubr_n_f64_z (p0, z1, 0.5), -+ z0 = svsubr_z (p0, z1, 0.5)) -+ -+/* -+** subr_m1_f64_z_tied1: -+** fmov (z[0-9]+\.d), #-1\.0(?:e\+0)? -+** movprfx z0\.d, p0/z, z0\.d -+** fsubr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_f64_z_tied1, svfloat64_t, -+ z0 = svsubr_n_f64_z (p0, z0, -1), -+ z0 = svsubr_z (p0, z0, -1)) -+ -+/* -+** subr_m1_f64_z_untied: -+** fmov (z[0-9]+\.d), #-1\.0(?:e\+0)? -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** fsubr z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** fsub z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_f64_z_untied, svfloat64_t, -+ z0 = svsubr_n_f64_z (p0, z1, -1), -+ z0 = svsubr_z (p0, z1, -1)) -+ -+/* -+** subr_f64_x_tied1: -+** fsub z0\.d, z1\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f64_x_tied1, svfloat64_t, -+ z0 = svsubr_f64_x (p0, z0, z1), -+ z0 = svsubr_x (p0, z0, z1)) -+ -+/* -+** subr_f64_x_tied2: -+** fsub z0\.d, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f64_x_tied2, svfloat64_t, -+ z0 = svsubr_f64_x (p0, z1, z0), -+ z0 = svsubr_x (p0, z1, z0)) -+ -+/* -+** subr_f64_x_untied: -+** fsub z0\.d, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (subr_f64_x_untied, svfloat64_t, -+ z0 = svsubr_f64_x (p0, z1, z2), -+ z0 = svsubr_x (p0, z1, z2)) -+ -+/* -+** subr_d4_f64_x_tied1: -+** mov (z[0-9]+\.d), d4 -+** fsub z0\.d, \1, z0\.d -+** ret -+*/ -+TEST_UNIFORM_ZD (subr_d4_f64_x_tied1, svfloat64_t, double, -+ z0 = svsubr_n_f64_x (p0, z0, d4), -+ z0 = svsubr_x (p0, z0, d4)) -+ -+/* -+** subr_d4_f64_x_untied: -+** mov (z[0-9]+\.d), d4 -+** fsub z0\.d, \1, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZD (subr_d4_f64_x_untied, svfloat64_t, double, -+ z0 = svsubr_n_f64_x (p0, z1, d4), -+ z0 = svsubr_x (p0, z1, d4)) -+ -+/* -+** subr_1_f64_x_tied1: -+** fsubr z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_f64_x_tied1, svfloat64_t, -+ z0 = svsubr_n_f64_x (p0, z0, 1), -+ z0 = svsubr_x (p0, z0, 1)) -+ -+/* -+** subr_1_f64_x_untied: -+** movprfx z0, z1 -+** fsubr z0\.d, p0/m, z0\.d, #1\.0 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_f64_x_untied, svfloat64_t, -+ z0 = svsubr_n_f64_x (p0, z1, 1), -+ z0 = svsubr_x (p0, z1, 1)) -+ -+/* -+** subr_0p5_f64_x_tied1: -+** fsubr z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_0p5_f64_x_tied1, svfloat64_t, -+ z0 = svsubr_n_f64_x (p0, z0, 0.5), -+ z0 = svsubr_x (p0, z0, 0.5)) -+ -+/* -+** subr_0p5_f64_x_untied: -+** movprfx z0, z1 -+** fsubr z0\.d, p0/m, z0\.d, #0\.5 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_0p5_f64_x_untied, svfloat64_t, -+ z0 = svsubr_n_f64_x (p0, z1, 0.5), -+ z0 = svsubr_x (p0, z1, 0.5)) -+ -+/* -+** subr_m1_f64_x_tied1: -+** fmov (z[0-9]+\.d), #-1\.0(?:e\+0)? -+** fsub z0\.d, \1, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_f64_x_tied1, svfloat64_t, -+ z0 = svsubr_n_f64_x (p0, z0, -1), -+ z0 = svsubr_x (p0, z0, -1)) -+ -+/* -+** subr_m1_f64_x_untied: -+** fmov (z[0-9]+\.d), #-1\.0(?:e\+0)? -+** fsub z0\.d, \1, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_f64_x_untied, svfloat64_t, -+ z0 = svsubr_n_f64_x (p0, z1, -1), -+ z0 = svsubr_x (p0, z1, -1)) -+ -+/* -+** ptrue_subr_f64_x_tied1: -+** fsub z0\.d, z1\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_f64_x_tied1, svfloat64_t, -+ z0 = svsubr_f64_x (svptrue_b64 (), z0, z1), -+ z0 = svsubr_x (svptrue_b64 (), z0, z1)) -+ -+/* -+** ptrue_subr_f64_x_tied2: -+** fsub z0\.d, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_f64_x_tied2, svfloat64_t, -+ z0 = svsubr_f64_x (svptrue_b64 (), z1, z0), -+ z0 = svsubr_x (svptrue_b64 (), z1, z0)) -+ -+/* -+** ptrue_subr_f64_x_untied: -+** fsub z0\.d, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_f64_x_untied, svfloat64_t, -+ z0 = svsubr_f64_x (svptrue_b64 (), z1, z2), -+ z0 = svsubr_x (svptrue_b64 (), z1, z2)) -+ -+/* -+** ptrue_subr_1_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_1_f64_x_tied1, svfloat64_t, -+ z0 = svsubr_n_f64_x (svptrue_b64 (), z0, 1), -+ z0 = svsubr_x (svptrue_b64 (), z0, 1)) -+ -+/* -+** ptrue_subr_1_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_1_f64_x_untied, svfloat64_t, -+ z0 = svsubr_n_f64_x (svptrue_b64 (), z1, 1), -+ z0 = svsubr_x (svptrue_b64 (), z1, 1)) -+ -+/* -+** ptrue_subr_0p5_f64_x_tied1: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_0p5_f64_x_tied1, svfloat64_t, -+ z0 = svsubr_n_f64_x (svptrue_b64 (), z0, 0.5), -+ z0 = svsubr_x (svptrue_b64 (), z0, 0.5)) -+ -+/* -+** ptrue_subr_0p5_f64_x_untied: -+** ... -+** ptrue p[0-9]+\.b[^\n]* -+** ... -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_0p5_f64_x_untied, svfloat64_t, -+ z0 = svsubr_n_f64_x (svptrue_b64 (), z1, 0.5), -+ z0 = svsubr_x (svptrue_b64 (), z1, 0.5)) -+ -+/* -+** ptrue_subr_m1_f64_x_tied1: -+** fmov (z[0-9]+\.d), #-1\.0(?:e\+0)? -+** fsub z0\.d, \1, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_m1_f64_x_tied1, svfloat64_t, -+ z0 = svsubr_n_f64_x (svptrue_b64 (), z0, -1), -+ z0 = svsubr_x (svptrue_b64 (), z0, -1)) -+ -+/* -+** ptrue_subr_m1_f64_x_untied: -+** fmov (z[0-9]+\.d), #-1\.0(?:e\+0)? -+** fsub z0\.d, \1, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (ptrue_subr_m1_f64_x_untied, svfloat64_t, -+ z0 = svsubr_n_f64_x (svptrue_b64 (), z1, -1), -+ z0 = svsubr_x (svptrue_b64 (), z1, -1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_s16.c -new file mode 100644 -index 000000000..d3dad62da ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_s16.c -@@ -0,0 +1,324 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** subr_s16_m_tied1: -+** subr z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (subr_s16_m_tied1, svint16_t, -+ z0 = svsubr_s16_m (p0, z0, z1), -+ z0 = svsubr_m (p0, z0, z1)) -+ -+/* -+** subr_s16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** subr z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (subr_s16_m_tied2, svint16_t, -+ z0 = svsubr_s16_m (p0, z1, z0), -+ z0 = svsubr_m (p0, z1, z0)) -+ -+/* -+** subr_s16_m_untied: -+** movprfx z0, z1 -+** subr z0\.h, p0/m, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (subr_s16_m_untied, svint16_t, -+ z0 = svsubr_s16_m (p0, z1, z2), -+ z0 = svsubr_m (p0, z1, z2)) -+ -+/* -+** subr_w0_s16_m_tied1: -+** mov (z[0-9]+\.h), w0 -+** subr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (subr_w0_s16_m_tied1, svint16_t, int16_t, -+ z0 = svsubr_n_s16_m (p0, z0, x0), -+ z0 = svsubr_m (p0, z0, x0)) -+ -+/* -+** subr_w0_s16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), w0 -+** movprfx z0, z1 -+** subr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (subr_w0_s16_m_untied, svint16_t, int16_t, -+ z0 = svsubr_n_s16_m (p0, z1, x0), -+ z0 = svsubr_m (p0, z1, x0)) -+ -+/* -+** subr_1_s16_m_tied1: -+** mov (z[0-9]+\.h), #1 -+** subr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_s16_m_tied1, svint16_t, -+ z0 = svsubr_n_s16_m (p0, z0, 1), -+ z0 = svsubr_m (p0, z0, 1)) -+ -+/* -+** subr_1_s16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), #1 -+** movprfx z0, z1 -+** subr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_s16_m_untied, svint16_t, -+ z0 = svsubr_n_s16_m (p0, z1, 1), -+ z0 = svsubr_m (p0, z1, 1)) -+ -+/* -+** subr_m2_s16_m: -+** mov (z[0-9]+\.h), #-2 -+** subr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m2_s16_m, svint16_t, -+ z0 = svsubr_n_s16_m (p0, z0, -2), -+ z0 = svsubr_m (p0, z0, -2)) -+ -+/* -+** subr_s16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** subr z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (subr_s16_z_tied1, svint16_t, -+ z0 = svsubr_s16_z (p0, z0, z1), -+ z0 = svsubr_z (p0, z0, z1)) -+ -+/* -+** subr_s16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** sub z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (subr_s16_z_tied2, svint16_t, -+ z0 = svsubr_s16_z (p0, z1, z0), -+ z0 = svsubr_z (p0, z1, z0)) -+ -+/* -+** subr_s16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** subr z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** sub z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (subr_s16_z_untied, svint16_t, -+ z0 = svsubr_s16_z (p0, z1, z2), -+ z0 = svsubr_z (p0, z1, z2)) -+ -+/* -+** subr_w0_s16_z_tied1: -+** mov (z[0-9]+\.h), w0 -+** movprfx z0\.h, p0/z, z0\.h -+** subr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (subr_w0_s16_z_tied1, svint16_t, int16_t, -+ z0 = svsubr_n_s16_z (p0, z0, x0), -+ z0 = svsubr_z (p0, z0, x0)) -+ -+/* -+** subr_w0_s16_z_untied: -+** mov (z[0-9]+\.h), w0 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** subr z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** sub z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (subr_w0_s16_z_untied, svint16_t, int16_t, -+ z0 = svsubr_n_s16_z (p0, z1, x0), -+ z0 = svsubr_z (p0, z1, x0)) -+ -+/* -+** subr_1_s16_z_tied1: -+** mov (z[0-9]+\.h), #1 -+** movprfx z0\.h, p0/z, z0\.h -+** subr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_s16_z_tied1, svint16_t, -+ z0 = svsubr_n_s16_z (p0, z0, 1), -+ z0 = svsubr_z (p0, z0, 1)) -+ -+/* -+** subr_1_s16_z_untied: -+** mov (z[0-9]+\.h), #1 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** subr z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** sub z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_s16_z_untied, svint16_t, -+ z0 = svsubr_n_s16_z (p0, z1, 1), -+ z0 = svsubr_z (p0, z1, 1)) -+ -+/* -+** subr_s16_x_tied1: -+** sub z0\.h, z1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (subr_s16_x_tied1, svint16_t, -+ z0 = svsubr_s16_x (p0, z0, z1), -+ z0 = svsubr_x (p0, z0, z1)) -+ -+/* -+** subr_s16_x_tied2: -+** sub z0\.h, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (subr_s16_x_tied2, svint16_t, -+ z0 = svsubr_s16_x (p0, z1, z0), -+ z0 = svsubr_x (p0, z1, z0)) -+ -+/* -+** subr_s16_x_untied: -+** sub z0\.h, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (subr_s16_x_untied, svint16_t, -+ z0 = svsubr_s16_x (p0, z1, z2), -+ z0 = svsubr_x (p0, z1, z2)) -+ -+/* -+** subr_w0_s16_x_tied1: -+** mov (z[0-9]+\.h), w0 -+** sub z0\.h, \1, z0\.h -+** ret -+*/ -+TEST_UNIFORM_ZX (subr_w0_s16_x_tied1, svint16_t, int16_t, -+ z0 = svsubr_n_s16_x (p0, z0, x0), -+ z0 = svsubr_x (p0, z0, x0)) -+ -+/* -+** subr_w0_s16_x_untied: -+** mov (z[0-9]+\.h), w0 -+** sub z0\.h, \1, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZX (subr_w0_s16_x_untied, svint16_t, int16_t, -+ z0 = svsubr_n_s16_x (p0, z1, x0), -+ z0 = svsubr_x (p0, z1, x0)) -+ -+/* -+** subr_1_s16_x_tied1: -+** subr z0\.h, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_s16_x_tied1, svint16_t, -+ z0 = svsubr_n_s16_x (p0, z0, 1), -+ z0 = svsubr_x (p0, z0, 1)) -+ -+/* -+** subr_1_s16_x_untied: -+** movprfx z0, z1 -+** subr z0\.h, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_s16_x_untied, svint16_t, -+ z0 = svsubr_n_s16_x (p0, z1, 1), -+ z0 = svsubr_x (p0, z1, 1)) -+ -+/* -+** subr_127_s16_x: -+** subr z0\.h, z0\.h, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_127_s16_x, svint16_t, -+ z0 = svsubr_n_s16_x (p0, z0, 127), -+ z0 = svsubr_x (p0, z0, 127)) -+ -+/* -+** subr_128_s16_x: -+** subr z0\.h, z0\.h, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_128_s16_x, svint16_t, -+ z0 = svsubr_n_s16_x (p0, z0, 128), -+ z0 = svsubr_x (p0, z0, 128)) -+ -+/* -+** subr_255_s16_x: -+** subr z0\.h, z0\.h, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_255_s16_x, svint16_t, -+ z0 = svsubr_n_s16_x (p0, z0, 255), -+ z0 = svsubr_x (p0, z0, 255)) -+ -+/* -+** subr_256_s16_x: -+** subr z0\.h, z0\.h, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_256_s16_x, svint16_t, -+ z0 = svsubr_n_s16_x (p0, z0, 256), -+ z0 = svsubr_x (p0, z0, 256)) -+ -+/* -+** subr_257_s16_x: -+** mov (z[0-9]+)\.b, #1 -+** sub z0\.h, \1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (subr_257_s16_x, svint16_t, -+ z0 = svsubr_n_s16_x (p0, z0, 257), -+ z0 = svsubr_x (p0, z0, 257)) -+ -+/* -+** subr_512_s16_x: -+** subr z0\.h, z0\.h, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_512_s16_x, svint16_t, -+ z0 = svsubr_n_s16_x (p0, z0, 512), -+ z0 = svsubr_x (p0, z0, 512)) -+ -+/* -+** subr_65280_s16_x: -+** subr z0\.h, z0\.h, #65280 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_65280_s16_x, svint16_t, -+ z0 = svsubr_n_s16_x (p0, z0, 0xff00), -+ z0 = svsubr_x (p0, z0, 0xff00)) -+ -+/* -+** subr_m1_s16_x_tied1: -+** mov (z[0-9]+)\.b, #-1 -+** sub z0\.h, \1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_s16_x_tied1, svint16_t, -+ z0 = svsubr_n_s16_x (p0, z0, -1), -+ z0 = svsubr_x (p0, z0, -1)) -+ -+/* -+** subr_m1_s16_x_untied: -+** mov (z[0-9]+)\.b, #-1 -+** sub z0\.h, \1\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_s16_x_untied, svint16_t, -+ z0 = svsubr_n_s16_x (p0, z1, -1), -+ z0 = svsubr_x (p0, z1, -1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_s32.c -new file mode 100644 -index 000000000..ce62e2f21 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_s32.c -@@ -0,0 +1,344 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** subr_s32_m_tied1: -+** subr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (subr_s32_m_tied1, svint32_t, -+ z0 = svsubr_s32_m (p0, z0, z1), -+ z0 = svsubr_m (p0, z0, z1)) -+ -+/* -+** subr_s32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** subr z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (subr_s32_m_tied2, svint32_t, -+ z0 = svsubr_s32_m (p0, z1, z0), -+ z0 = svsubr_m (p0, z1, z0)) -+ -+/* -+** subr_s32_m_untied: -+** movprfx z0, z1 -+** subr z0\.s, p0/m, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (subr_s32_m_untied, svint32_t, -+ z0 = svsubr_s32_m (p0, z1, z2), -+ z0 = svsubr_m (p0, z1, z2)) -+ -+/* -+** subr_w0_s32_m_tied1: -+** mov (z[0-9]+\.s), w0 -+** subr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (subr_w0_s32_m_tied1, svint32_t, int32_t, -+ z0 = svsubr_n_s32_m (p0, z0, x0), -+ z0 = svsubr_m (p0, z0, x0)) -+ -+/* -+** subr_w0_s32_m_untied: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0, z1 -+** subr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (subr_w0_s32_m_untied, svint32_t, int32_t, -+ z0 = svsubr_n_s32_m (p0, z1, x0), -+ z0 = svsubr_m (p0, z1, x0)) -+ -+/* -+** subr_1_s32_m_tied1: -+** mov (z[0-9]+\.s), #1 -+** subr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_s32_m_tied1, svint32_t, -+ z0 = svsubr_n_s32_m (p0, z0, 1), -+ z0 = svsubr_m (p0, z0, 1)) -+ -+/* -+** subr_1_s32_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.s), #1 -+** movprfx z0, z1 -+** subr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_s32_m_untied, svint32_t, -+ z0 = svsubr_n_s32_m (p0, z1, 1), -+ z0 = svsubr_m (p0, z1, 1)) -+ -+/* -+** subr_m2_s32_m: -+** mov (z[0-9]+\.s), #-2 -+** subr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m2_s32_m, svint32_t, -+ z0 = svsubr_n_s32_m (p0, z0, -2), -+ z0 = svsubr_m (p0, z0, -2)) -+ -+/* -+** subr_s32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** subr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (subr_s32_z_tied1, svint32_t, -+ z0 = svsubr_s32_z (p0, z0, z1), -+ z0 = svsubr_z (p0, z0, z1)) -+ -+/* -+** subr_s32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** sub z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (subr_s32_z_tied2, svint32_t, -+ z0 = svsubr_s32_z (p0, z1, z0), -+ z0 = svsubr_z (p0, z1, z0)) -+ -+/* -+** subr_s32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** subr z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** sub z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (subr_s32_z_untied, svint32_t, -+ z0 = svsubr_s32_z (p0, z1, z2), -+ z0 = svsubr_z (p0, z1, z2)) -+ -+/* -+** subr_w0_s32_z_tied1: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** subr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (subr_w0_s32_z_tied1, svint32_t, int32_t, -+ z0 = svsubr_n_s32_z (p0, z0, x0), -+ z0 = svsubr_z (p0, z0, x0)) -+ -+/* -+** subr_w0_s32_z_untied: -+** mov (z[0-9]+\.s), w0 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** subr z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** sub z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (subr_w0_s32_z_untied, svint32_t, int32_t, -+ z0 = svsubr_n_s32_z (p0, z1, x0), -+ z0 = svsubr_z (p0, z1, x0)) -+ -+/* -+** subr_1_s32_z_tied1: -+** mov (z[0-9]+\.s), #1 -+** movprfx z0\.s, p0/z, z0\.s -+** subr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_s32_z_tied1, svint32_t, -+ z0 = svsubr_n_s32_z (p0, z0, 1), -+ z0 = svsubr_z (p0, z0, 1)) -+ -+/* -+** subr_1_s32_z_untied: -+** mov (z[0-9]+\.s), #1 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** subr z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** sub z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_s32_z_untied, svint32_t, -+ z0 = svsubr_n_s32_z (p0, z1, 1), -+ z0 = svsubr_z (p0, z1, 1)) -+ -+/* -+** subr_s32_x_tied1: -+** sub z0\.s, z1\.s, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (subr_s32_x_tied1, svint32_t, -+ z0 = svsubr_s32_x (p0, z0, z1), -+ z0 = svsubr_x (p0, z0, z1)) -+ -+/* -+** subr_s32_x_tied2: -+** sub z0\.s, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (subr_s32_x_tied2, svint32_t, -+ z0 = svsubr_s32_x (p0, z1, z0), -+ z0 = svsubr_x (p0, z1, z0)) -+ -+/* -+** subr_s32_x_untied: -+** sub z0\.s, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (subr_s32_x_untied, svint32_t, -+ z0 = svsubr_s32_x (p0, z1, z2), -+ z0 = svsubr_x (p0, z1, z2)) -+ -+/* -+** subr_w0_s32_x_tied1: -+** mov (z[0-9]+\.s), w0 -+** sub z0\.s, \1, z0\.s -+** ret -+*/ -+TEST_UNIFORM_ZX (subr_w0_s32_x_tied1, svint32_t, int32_t, -+ z0 = svsubr_n_s32_x (p0, z0, x0), -+ z0 = svsubr_x (p0, z0, x0)) -+ -+/* -+** subr_w0_s32_x_untied: -+** mov (z[0-9]+\.s), w0 -+** sub z0\.s, \1, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZX (subr_w0_s32_x_untied, svint32_t, int32_t, -+ z0 = svsubr_n_s32_x (p0, z1, x0), -+ z0 = svsubr_x (p0, z1, x0)) -+ -+/* -+** subr_1_s32_x_tied1: -+** subr z0\.s, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_s32_x_tied1, svint32_t, -+ z0 = svsubr_n_s32_x (p0, z0, 1), -+ z0 = svsubr_x (p0, z0, 1)) -+ -+/* -+** subr_1_s32_x_untied: -+** movprfx z0, z1 -+** subr z0\.s, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_s32_x_untied, svint32_t, -+ z0 = svsubr_n_s32_x (p0, z1, 1), -+ z0 = svsubr_x (p0, z1, 1)) -+ -+/* -+** subr_127_s32_x: -+** subr z0\.s, z0\.s, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_127_s32_x, svint32_t, -+ z0 = svsubr_n_s32_x (p0, z0, 127), -+ z0 = svsubr_x (p0, z0, 127)) -+ -+/* -+** subr_128_s32_x: -+** subr z0\.s, z0\.s, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_128_s32_x, svint32_t, -+ z0 = svsubr_n_s32_x (p0, z0, 128), -+ z0 = svsubr_x (p0, z0, 128)) -+ -+/* -+** subr_255_s32_x: -+** subr z0\.s, z0\.s, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_255_s32_x, svint32_t, -+ z0 = svsubr_n_s32_x (p0, z0, 255), -+ z0 = svsubr_x (p0, z0, 255)) -+ -+/* -+** subr_256_s32_x: -+** subr z0\.s, z0\.s, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_256_s32_x, svint32_t, -+ z0 = svsubr_n_s32_x (p0, z0, 256), -+ z0 = svsubr_x (p0, z0, 256)) -+ -+/* -+** subr_511_s32_x: -+** mov (z[0-9]+\.s), #511 -+** sub z0\.s, \1, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (subr_511_s32_x, svint32_t, -+ z0 = svsubr_n_s32_x (p0, z0, 511), -+ z0 = svsubr_x (p0, z0, 511)) -+ -+/* -+** subr_512_s32_x: -+** subr z0\.s, z0\.s, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_512_s32_x, svint32_t, -+ z0 = svsubr_n_s32_x (p0, z0, 512), -+ z0 = svsubr_x (p0, z0, 512)) -+ -+/* -+** subr_65280_s32_x: -+** subr z0\.s, z0\.s, #65280 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_65280_s32_x, svint32_t, -+ z0 = svsubr_n_s32_x (p0, z0, 0xff00), -+ z0 = svsubr_x (p0, z0, 0xff00)) -+ -+/* -+** subr_65535_s32_x: -+** mov (z[0-9]+\.s), #65535 -+** sub z0\.s, \1, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (subr_65535_s32_x, svint32_t, -+ z0 = svsubr_n_s32_x (p0, z0, 65535), -+ z0 = svsubr_x (p0, z0, 65535)) -+ -+/* -+** subr_65536_s32_x: -+** mov (z[0-9]+\.s), #65536 -+** sub z0\.s, \1, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (subr_65536_s32_x, svint32_t, -+ z0 = svsubr_n_s32_x (p0, z0, 65536), -+ z0 = svsubr_x (p0, z0, 65536)) -+ -+/* -+** subr_m1_s32_x_tied1: -+** mov (z[0-9]+)\.b, #-1 -+** sub z0\.s, \1\.s, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_s32_x_tied1, svint32_t, -+ z0 = svsubr_n_s32_x (p0, z0, -1), -+ z0 = svsubr_x (p0, z0, -1)) -+ -+/* -+** subr_m1_s32_x_untied: -+** mov (z[0-9]+)\.b, #-1 -+** sub z0\.s, \1\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_s32_x_untied, svint32_t, -+ z0 = svsubr_n_s32_x (p0, z1, -1), -+ z0 = svsubr_x (p0, z1, -1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_s64.c -new file mode 100644 -index 000000000..ada9e977c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_s64.c -@@ -0,0 +1,344 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** subr_s64_m_tied1: -+** subr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (subr_s64_m_tied1, svint64_t, -+ z0 = svsubr_s64_m (p0, z0, z1), -+ z0 = svsubr_m (p0, z0, z1)) -+ -+/* -+** subr_s64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** subr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_s64_m_tied2, svint64_t, -+ z0 = svsubr_s64_m (p0, z1, z0), -+ z0 = svsubr_m (p0, z1, z0)) -+ -+/* -+** subr_s64_m_untied: -+** movprfx z0, z1 -+** subr z0\.d, p0/m, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (subr_s64_m_untied, svint64_t, -+ z0 = svsubr_s64_m (p0, z1, z2), -+ z0 = svsubr_m (p0, z1, z2)) -+ -+/* -+** subr_x0_s64_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** subr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (subr_x0_s64_m_tied1, svint64_t, int64_t, -+ z0 = svsubr_n_s64_m (p0, z0, x0), -+ z0 = svsubr_m (p0, z0, x0)) -+ -+/* -+** subr_x0_s64_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** subr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (subr_x0_s64_m_untied, svint64_t, int64_t, -+ z0 = svsubr_n_s64_m (p0, z1, x0), -+ z0 = svsubr_m (p0, z1, x0)) -+ -+/* -+** subr_1_s64_m_tied1: -+** mov (z[0-9]+\.d), #1 -+** subr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_s64_m_tied1, svint64_t, -+ z0 = svsubr_n_s64_m (p0, z0, 1), -+ z0 = svsubr_m (p0, z0, 1)) -+ -+/* -+** subr_1_s64_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #1 -+** movprfx z0, z1 -+** subr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_s64_m_untied, svint64_t, -+ z0 = svsubr_n_s64_m (p0, z1, 1), -+ z0 = svsubr_m (p0, z1, 1)) -+ -+/* -+** subr_m2_s64_m: -+** mov (z[0-9]+\.d), #-2 -+** subr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m2_s64_m, svint64_t, -+ z0 = svsubr_n_s64_m (p0, z0, -2), -+ z0 = svsubr_m (p0, z0, -2)) -+ -+/* -+** subr_s64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** subr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (subr_s64_z_tied1, svint64_t, -+ z0 = svsubr_s64_z (p0, z0, z1), -+ z0 = svsubr_z (p0, z0, z1)) -+ -+/* -+** subr_s64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** sub z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (subr_s64_z_tied2, svint64_t, -+ z0 = svsubr_s64_z (p0, z1, z0), -+ z0 = svsubr_z (p0, z1, z0)) -+ -+/* -+** subr_s64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** subr z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** sub z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (subr_s64_z_untied, svint64_t, -+ z0 = svsubr_s64_z (p0, z1, z2), -+ z0 = svsubr_z (p0, z1, z2)) -+ -+/* -+** subr_x0_s64_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** subr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (subr_x0_s64_z_tied1, svint64_t, int64_t, -+ z0 = svsubr_n_s64_z (p0, z0, x0), -+ z0 = svsubr_z (p0, z0, x0)) -+ -+/* -+** subr_x0_s64_z_untied: -+** mov (z[0-9]+\.d), x0 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** subr z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** sub z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (subr_x0_s64_z_untied, svint64_t, int64_t, -+ z0 = svsubr_n_s64_z (p0, z1, x0), -+ z0 = svsubr_z (p0, z1, x0)) -+ -+/* -+** subr_1_s64_z_tied1: -+** mov (z[0-9]+\.d), #1 -+** movprfx z0\.d, p0/z, z0\.d -+** subr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_s64_z_tied1, svint64_t, -+ z0 = svsubr_n_s64_z (p0, z0, 1), -+ z0 = svsubr_z (p0, z0, 1)) -+ -+/* -+** subr_1_s64_z_untied: -+** mov (z[0-9]+\.d), #1 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** subr z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** sub z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_s64_z_untied, svint64_t, -+ z0 = svsubr_n_s64_z (p0, z1, 1), -+ z0 = svsubr_z (p0, z1, 1)) -+ -+/* -+** subr_s64_x_tied1: -+** sub z0\.d, z1\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (subr_s64_x_tied1, svint64_t, -+ z0 = svsubr_s64_x (p0, z0, z1), -+ z0 = svsubr_x (p0, z0, z1)) -+ -+/* -+** subr_s64_x_tied2: -+** sub z0\.d, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (subr_s64_x_tied2, svint64_t, -+ z0 = svsubr_s64_x (p0, z1, z0), -+ z0 = svsubr_x (p0, z1, z0)) -+ -+/* -+** subr_s64_x_untied: -+** sub z0\.d, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (subr_s64_x_untied, svint64_t, -+ z0 = svsubr_s64_x (p0, z1, z2), -+ z0 = svsubr_x (p0, z1, z2)) -+ -+/* -+** subr_x0_s64_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** sub z0\.d, \1, z0\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (subr_x0_s64_x_tied1, svint64_t, int64_t, -+ z0 = svsubr_n_s64_x (p0, z0, x0), -+ z0 = svsubr_x (p0, z0, x0)) -+ -+/* -+** subr_x0_s64_x_untied: -+** mov (z[0-9]+\.d), x0 -+** sub z0\.d, \1, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (subr_x0_s64_x_untied, svint64_t, int64_t, -+ z0 = svsubr_n_s64_x (p0, z1, x0), -+ z0 = svsubr_x (p0, z1, x0)) -+ -+/* -+** subr_1_s64_x_tied1: -+** subr z0\.d, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_s64_x_tied1, svint64_t, -+ z0 = svsubr_n_s64_x (p0, z0, 1), -+ z0 = svsubr_x (p0, z0, 1)) -+ -+/* -+** subr_1_s64_x_untied: -+** movprfx z0, z1 -+** subr z0\.d, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_s64_x_untied, svint64_t, -+ z0 = svsubr_n_s64_x (p0, z1, 1), -+ z0 = svsubr_x (p0, z1, 1)) -+ -+/* -+** subr_127_s64_x: -+** subr z0\.d, z0\.d, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_127_s64_x, svint64_t, -+ z0 = svsubr_n_s64_x (p0, z0, 127), -+ z0 = svsubr_x (p0, z0, 127)) -+ -+/* -+** subr_128_s64_x: -+** subr z0\.d, z0\.d, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_128_s64_x, svint64_t, -+ z0 = svsubr_n_s64_x (p0, z0, 128), -+ z0 = svsubr_x (p0, z0, 128)) -+ -+/* -+** subr_255_s64_x: -+** subr z0\.d, z0\.d, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_255_s64_x, svint64_t, -+ z0 = svsubr_n_s64_x (p0, z0, 255), -+ z0 = svsubr_x (p0, z0, 255)) -+ -+/* -+** subr_256_s64_x: -+** subr z0\.d, z0\.d, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_256_s64_x, svint64_t, -+ z0 = svsubr_n_s64_x (p0, z0, 256), -+ z0 = svsubr_x (p0, z0, 256)) -+ -+/* -+** subr_511_s64_x: -+** mov (z[0-9]+\.d), #511 -+** sub z0\.d, \1, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (subr_511_s64_x, svint64_t, -+ z0 = svsubr_n_s64_x (p0, z0, 511), -+ z0 = svsubr_x (p0, z0, 511)) -+ -+/* -+** subr_512_s64_x: -+** subr z0\.d, z0\.d, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_512_s64_x, svint64_t, -+ z0 = svsubr_n_s64_x (p0, z0, 512), -+ z0 = svsubr_x (p0, z0, 512)) -+ -+/* -+** subr_65280_s64_x: -+** subr z0\.d, z0\.d, #65280 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_65280_s64_x, svint64_t, -+ z0 = svsubr_n_s64_x (p0, z0, 0xff00), -+ z0 = svsubr_x (p0, z0, 0xff00)) -+ -+/* -+** subr_65535_s64_x: -+** mov (z[0-9]+\.d), #65535 -+** sub z0\.d, \1, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (subr_65535_s64_x, svint64_t, -+ z0 = svsubr_n_s64_x (p0, z0, 65535), -+ z0 = svsubr_x (p0, z0, 65535)) -+ -+/* -+** subr_65536_s64_x: -+** mov (z[0-9]+\.d), #65536 -+** sub z0\.d, \1, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (subr_65536_s64_x, svint64_t, -+ z0 = svsubr_n_s64_x (p0, z0, 65536), -+ z0 = svsubr_x (p0, z0, 65536)) -+ -+/* -+** subr_m1_s64_x_tied1: -+** mov (z[0-9]+)\.b, #-1 -+** sub z0\.d, \1\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_s64_x_tied1, svint64_t, -+ z0 = svsubr_n_s64_x (p0, z0, -1), -+ z0 = svsubr_x (p0, z0, -1)) -+ -+/* -+** subr_m1_s64_x_untied: -+** mov (z[0-9]+)\.b, #-1 -+** sub z0\.d, \1\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_s64_x_untied, svint64_t, -+ z0 = svsubr_n_s64_x (p0, z1, -1), -+ z0 = svsubr_x (p0, z1, -1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_s8.c -new file mode 100644 -index 000000000..90d2a6de9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_s8.c -@@ -0,0 +1,294 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** subr_s8_m_tied1: -+** subr z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (subr_s8_m_tied1, svint8_t, -+ z0 = svsubr_s8_m (p0, z0, z1), -+ z0 = svsubr_m (p0, z0, z1)) -+ -+/* -+** subr_s8_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** subr z0\.b, p0/m, z0\.b, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (subr_s8_m_tied2, svint8_t, -+ z0 = svsubr_s8_m (p0, z1, z0), -+ z0 = svsubr_m (p0, z1, z0)) -+ -+/* -+** subr_s8_m_untied: -+** movprfx z0, z1 -+** subr z0\.b, p0/m, z0\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (subr_s8_m_untied, svint8_t, -+ z0 = svsubr_s8_m (p0, z1, z2), -+ z0 = svsubr_m (p0, z1, z2)) -+ -+/* -+** subr_w0_s8_m_tied1: -+** mov (z[0-9]+\.b), w0 -+** subr z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (subr_w0_s8_m_tied1, svint8_t, int8_t, -+ z0 = svsubr_n_s8_m (p0, z0, x0), -+ z0 = svsubr_m (p0, z0, x0)) -+ -+/* -+** subr_w0_s8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), w0 -+** movprfx z0, z1 -+** subr z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (subr_w0_s8_m_untied, svint8_t, int8_t, -+ z0 = svsubr_n_s8_m (p0, z1, x0), -+ z0 = svsubr_m (p0, z1, x0)) -+ -+/* -+** subr_1_s8_m_tied1: -+** mov (z[0-9]+\.b), #1 -+** subr z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_s8_m_tied1, svint8_t, -+ z0 = svsubr_n_s8_m (p0, z0, 1), -+ z0 = svsubr_m (p0, z0, 1)) -+ -+/* -+** subr_1_s8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), #1 -+** movprfx z0, z1 -+** subr z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_s8_m_untied, svint8_t, -+ z0 = svsubr_n_s8_m (p0, z1, 1), -+ z0 = svsubr_m (p0, z1, 1)) -+ -+/* -+** subr_m1_s8_m: -+** mov (z[0-9]+\.b), #-1 -+** subr z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_s8_m, svint8_t, -+ z0 = svsubr_n_s8_m (p0, z0, -1), -+ z0 = svsubr_m (p0, z0, -1)) -+ -+/* -+** subr_s8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** subr z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (subr_s8_z_tied1, svint8_t, -+ z0 = svsubr_s8_z (p0, z0, z1), -+ z0 = svsubr_z (p0, z0, z1)) -+ -+/* -+** subr_s8_z_tied2: -+** movprfx z0\.b, p0/z, z0\.b -+** sub z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (subr_s8_z_tied2, svint8_t, -+ z0 = svsubr_s8_z (p0, z1, z0), -+ z0 = svsubr_z (p0, z1, z0)) -+ -+/* -+** subr_s8_z_untied: -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** subr z0\.b, p0/m, z0\.b, z2\.b -+** | -+** movprfx z0\.b, p0/z, z2\.b -+** sub z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (subr_s8_z_untied, svint8_t, -+ z0 = svsubr_s8_z (p0, z1, z2), -+ z0 = svsubr_z (p0, z1, z2)) -+ -+/* -+** subr_w0_s8_z_tied1: -+** mov (z[0-9]+\.b), w0 -+** movprfx z0\.b, p0/z, z0\.b -+** subr z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (subr_w0_s8_z_tied1, svint8_t, int8_t, -+ z0 = svsubr_n_s8_z (p0, z0, x0), -+ z0 = svsubr_z (p0, z0, x0)) -+ -+/* -+** subr_w0_s8_z_untied: -+** mov (z[0-9]+\.b), w0 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** subr z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** sub z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (subr_w0_s8_z_untied, svint8_t, int8_t, -+ z0 = svsubr_n_s8_z (p0, z1, x0), -+ z0 = svsubr_z (p0, z1, x0)) -+ -+/* -+** subr_1_s8_z_tied1: -+** mov (z[0-9]+\.b), #1 -+** movprfx z0\.b, p0/z, z0\.b -+** subr z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_s8_z_tied1, svint8_t, -+ z0 = svsubr_n_s8_z (p0, z0, 1), -+ z0 = svsubr_z (p0, z0, 1)) -+ -+/* -+** subr_1_s8_z_untied: -+** mov (z[0-9]+\.b), #1 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** subr z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** sub z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_s8_z_untied, svint8_t, -+ z0 = svsubr_n_s8_z (p0, z1, 1), -+ z0 = svsubr_z (p0, z1, 1)) -+ -+/* -+** subr_s8_x_tied1: -+** sub z0\.b, z1\.b, z0\.b -+** ret -+*/ -+TEST_UNIFORM_Z (subr_s8_x_tied1, svint8_t, -+ z0 = svsubr_s8_x (p0, z0, z1), -+ z0 = svsubr_x (p0, z0, z1)) -+ -+/* -+** subr_s8_x_tied2: -+** sub z0\.b, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (subr_s8_x_tied2, svint8_t, -+ z0 = svsubr_s8_x (p0, z1, z0), -+ z0 = svsubr_x (p0, z1, z0)) -+ -+/* -+** subr_s8_x_untied: -+** sub z0\.b, z2\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (subr_s8_x_untied, svint8_t, -+ z0 = svsubr_s8_x (p0, z1, z2), -+ z0 = svsubr_x (p0, z1, z2)) -+ -+/* -+** subr_w0_s8_x_tied1: -+** mov (z[0-9]+\.b), w0 -+** sub z0\.b, \1, z0\.b -+** ret -+*/ -+TEST_UNIFORM_ZX (subr_w0_s8_x_tied1, svint8_t, int8_t, -+ z0 = svsubr_n_s8_x (p0, z0, x0), -+ z0 = svsubr_x (p0, z0, x0)) -+ -+/* -+** subr_w0_s8_x_untied: -+** mov (z[0-9]+\.b), w0 -+** sub z0\.b, \1, z1\.b -+** ret -+*/ -+TEST_UNIFORM_ZX (subr_w0_s8_x_untied, svint8_t, int8_t, -+ z0 = svsubr_n_s8_x (p0, z1, x0), -+ z0 = svsubr_x (p0, z1, x0)) -+ -+/* -+** subr_1_s8_x_tied1: -+** subr z0\.b, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_s8_x_tied1, svint8_t, -+ z0 = svsubr_n_s8_x (p0, z0, 1), -+ z0 = svsubr_x (p0, z0, 1)) -+ -+/* -+** subr_1_s8_x_untied: -+** movprfx z0, z1 -+** subr z0\.b, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_s8_x_untied, svint8_t, -+ z0 = svsubr_n_s8_x (p0, z1, 1), -+ z0 = svsubr_x (p0, z1, 1)) -+ -+/* -+** subr_127_s8_x: -+** subr z0\.b, z0\.b, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_127_s8_x, svint8_t, -+ z0 = svsubr_n_s8_x (p0, z0, 127), -+ z0 = svsubr_x (p0, z0, 127)) -+ -+/* -+** subr_128_s8_x: -+** subr z0\.b, z0\.b, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_128_s8_x, svint8_t, -+ z0 = svsubr_n_s8_x (p0, z0, 128), -+ z0 = svsubr_x (p0, z0, 128)) -+ -+/* -+** subr_255_s8_x: -+** subr z0\.b, z0\.b, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_255_s8_x, svint8_t, -+ z0 = svsubr_n_s8_x (p0, z0, 255), -+ z0 = svsubr_x (p0, z0, 255)) -+ -+/* -+** subr_m1_s8_x: -+** subr z0\.b, z0\.b, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_s8_x, svint8_t, -+ z0 = svsubr_n_s8_x (p0, z0, -1), -+ z0 = svsubr_x (p0, z0, -1)) -+ -+/* -+** subr_m127_s8_x: -+** subr z0\.b, z0\.b, #129 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m127_s8_x, svint8_t, -+ z0 = svsubr_n_s8_x (p0, z0, -127), -+ z0 = svsubr_x (p0, z0, -127)) -+ -+/* -+** subr_m128_s8_x: -+** subr z0\.b, z0\.b, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m128_s8_x, svint8_t, -+ z0 = svsubr_n_s8_x (p0, z0, -128), -+ z0 = svsubr_x (p0, z0, -128)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_u16.c -new file mode 100644 -index 000000000..379a80fb1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_u16.c -@@ -0,0 +1,324 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** subr_u16_m_tied1: -+** subr z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (subr_u16_m_tied1, svuint16_t, -+ z0 = svsubr_u16_m (p0, z0, z1), -+ z0 = svsubr_m (p0, z0, z1)) -+ -+/* -+** subr_u16_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** subr z0\.h, p0/m, z0\.h, \1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (subr_u16_m_tied2, svuint16_t, -+ z0 = svsubr_u16_m (p0, z1, z0), -+ z0 = svsubr_m (p0, z1, z0)) -+ -+/* -+** subr_u16_m_untied: -+** movprfx z0, z1 -+** subr z0\.h, p0/m, z0\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (subr_u16_m_untied, svuint16_t, -+ z0 = svsubr_u16_m (p0, z1, z2), -+ z0 = svsubr_m (p0, z1, z2)) -+ -+/* -+** subr_w0_u16_m_tied1: -+** mov (z[0-9]+\.h), w0 -+** subr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (subr_w0_u16_m_tied1, svuint16_t, uint16_t, -+ z0 = svsubr_n_u16_m (p0, z0, x0), -+ z0 = svsubr_m (p0, z0, x0)) -+ -+/* -+** subr_w0_u16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), w0 -+** movprfx z0, z1 -+** subr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (subr_w0_u16_m_untied, svuint16_t, uint16_t, -+ z0 = svsubr_n_u16_m (p0, z1, x0), -+ z0 = svsubr_m (p0, z1, x0)) -+ -+/* -+** subr_1_u16_m_tied1: -+** mov (z[0-9]+\.h), #1 -+** subr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_u16_m_tied1, svuint16_t, -+ z0 = svsubr_n_u16_m (p0, z0, 1), -+ z0 = svsubr_m (p0, z0, 1)) -+ -+/* -+** subr_1_u16_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.h), #1 -+** movprfx z0, z1 -+** subr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_u16_m_untied, svuint16_t, -+ z0 = svsubr_n_u16_m (p0, z1, 1), -+ z0 = svsubr_m (p0, z1, 1)) -+ -+/* -+** subr_m2_u16_m: -+** mov (z[0-9]+\.h), #-2 -+** subr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m2_u16_m, svuint16_t, -+ z0 = svsubr_n_u16_m (p0, z0, -2), -+ z0 = svsubr_m (p0, z0, -2)) -+ -+/* -+** subr_u16_z_tied1: -+** movprfx z0\.h, p0/z, z0\.h -+** subr z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (subr_u16_z_tied1, svuint16_t, -+ z0 = svsubr_u16_z (p0, z0, z1), -+ z0 = svsubr_z (p0, z0, z1)) -+ -+/* -+** subr_u16_z_tied2: -+** movprfx z0\.h, p0/z, z0\.h -+** sub z0\.h, p0/m, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (subr_u16_z_tied2, svuint16_t, -+ z0 = svsubr_u16_z (p0, z1, z0), -+ z0 = svsubr_z (p0, z1, z0)) -+ -+/* -+** subr_u16_z_untied: -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** subr z0\.h, p0/m, z0\.h, z2\.h -+** | -+** movprfx z0\.h, p0/z, z2\.h -+** sub z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (subr_u16_z_untied, svuint16_t, -+ z0 = svsubr_u16_z (p0, z1, z2), -+ z0 = svsubr_z (p0, z1, z2)) -+ -+/* -+** subr_w0_u16_z_tied1: -+** mov (z[0-9]+\.h), w0 -+** movprfx z0\.h, p0/z, z0\.h -+** subr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (subr_w0_u16_z_tied1, svuint16_t, uint16_t, -+ z0 = svsubr_n_u16_z (p0, z0, x0), -+ z0 = svsubr_z (p0, z0, x0)) -+ -+/* -+** subr_w0_u16_z_untied: -+** mov (z[0-9]+\.h), w0 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** subr z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** sub z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (subr_w0_u16_z_untied, svuint16_t, uint16_t, -+ z0 = svsubr_n_u16_z (p0, z1, x0), -+ z0 = svsubr_z (p0, z1, x0)) -+ -+/* -+** subr_1_u16_z_tied1: -+** mov (z[0-9]+\.h), #1 -+** movprfx z0\.h, p0/z, z0\.h -+** subr z0\.h, p0/m, z0\.h, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_u16_z_tied1, svuint16_t, -+ z0 = svsubr_n_u16_z (p0, z0, 1), -+ z0 = svsubr_z (p0, z0, 1)) -+ -+/* -+** subr_1_u16_z_untied: -+** mov (z[0-9]+\.h), #1 -+** ( -+** movprfx z0\.h, p0/z, z1\.h -+** subr z0\.h, p0/m, z0\.h, \1 -+** | -+** movprfx z0\.h, p0/z, \1 -+** sub z0\.h, p0/m, z0\.h, z1\.h -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_u16_z_untied, svuint16_t, -+ z0 = svsubr_n_u16_z (p0, z1, 1), -+ z0 = svsubr_z (p0, z1, 1)) -+ -+/* -+** subr_u16_x_tied1: -+** sub z0\.h, z1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (subr_u16_x_tied1, svuint16_t, -+ z0 = svsubr_u16_x (p0, z0, z1), -+ z0 = svsubr_x (p0, z0, z1)) -+ -+/* -+** subr_u16_x_tied2: -+** sub z0\.h, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (subr_u16_x_tied2, svuint16_t, -+ z0 = svsubr_u16_x (p0, z1, z0), -+ z0 = svsubr_x (p0, z1, z0)) -+ -+/* -+** subr_u16_x_untied: -+** sub z0\.h, z2\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (subr_u16_x_untied, svuint16_t, -+ z0 = svsubr_u16_x (p0, z1, z2), -+ z0 = svsubr_x (p0, z1, z2)) -+ -+/* -+** subr_w0_u16_x_tied1: -+** mov (z[0-9]+\.h), w0 -+** sub z0\.h, \1, z0\.h -+** ret -+*/ -+TEST_UNIFORM_ZX (subr_w0_u16_x_tied1, svuint16_t, uint16_t, -+ z0 = svsubr_n_u16_x (p0, z0, x0), -+ z0 = svsubr_x (p0, z0, x0)) -+ -+/* -+** subr_w0_u16_x_untied: -+** mov (z[0-9]+\.h), w0 -+** sub z0\.h, \1, z1\.h -+** ret -+*/ -+TEST_UNIFORM_ZX (subr_w0_u16_x_untied, svuint16_t, uint16_t, -+ z0 = svsubr_n_u16_x (p0, z1, x0), -+ z0 = svsubr_x (p0, z1, x0)) -+ -+/* -+** subr_1_u16_x_tied1: -+** subr z0\.h, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_u16_x_tied1, svuint16_t, -+ z0 = svsubr_n_u16_x (p0, z0, 1), -+ z0 = svsubr_x (p0, z0, 1)) -+ -+/* -+** subr_1_u16_x_untied: -+** movprfx z0, z1 -+** subr z0\.h, z0\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_u16_x_untied, svuint16_t, -+ z0 = svsubr_n_u16_x (p0, z1, 1), -+ z0 = svsubr_x (p0, z1, 1)) -+ -+/* -+** subr_127_u16_x: -+** subr z0\.h, z0\.h, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_127_u16_x, svuint16_t, -+ z0 = svsubr_n_u16_x (p0, z0, 127), -+ z0 = svsubr_x (p0, z0, 127)) -+ -+/* -+** subr_128_u16_x: -+** subr z0\.h, z0\.h, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_128_u16_x, svuint16_t, -+ z0 = svsubr_n_u16_x (p0, z0, 128), -+ z0 = svsubr_x (p0, z0, 128)) -+ -+/* -+** subr_255_u16_x: -+** subr z0\.h, z0\.h, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_255_u16_x, svuint16_t, -+ z0 = svsubr_n_u16_x (p0, z0, 255), -+ z0 = svsubr_x (p0, z0, 255)) -+ -+/* -+** subr_256_u16_x: -+** subr z0\.h, z0\.h, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_256_u16_x, svuint16_t, -+ z0 = svsubr_n_u16_x (p0, z0, 256), -+ z0 = svsubr_x (p0, z0, 256)) -+ -+/* -+** subr_257_u16_x: -+** mov (z[0-9]+)\.b, #1 -+** sub z0\.h, \1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (subr_257_u16_x, svuint16_t, -+ z0 = svsubr_n_u16_x (p0, z0, 257), -+ z0 = svsubr_x (p0, z0, 257)) -+ -+/* -+** subr_512_u16_x: -+** subr z0\.h, z0\.h, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_512_u16_x, svuint16_t, -+ z0 = svsubr_n_u16_x (p0, z0, 512), -+ z0 = svsubr_x (p0, z0, 512)) -+ -+/* -+** subr_65280_u16_x: -+** subr z0\.h, z0\.h, #65280 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_65280_u16_x, svuint16_t, -+ z0 = svsubr_n_u16_x (p0, z0, 0xff00), -+ z0 = svsubr_x (p0, z0, 0xff00)) -+ -+/* -+** subr_m1_u16_x_tied1: -+** mov (z[0-9]+)\.b, #-1 -+** sub z0\.h, \1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_u16_x_tied1, svuint16_t, -+ z0 = svsubr_n_u16_x (p0, z0, -1), -+ z0 = svsubr_x (p0, z0, -1)) -+ -+/* -+** subr_m1_u16_x_untied: -+** mov (z[0-9]+)\.b, #-1 -+** sub z0\.h, \1\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_u16_x_untied, svuint16_t, -+ z0 = svsubr_n_u16_x (p0, z1, -1), -+ z0 = svsubr_x (p0, z1, -1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_u32.c -new file mode 100644 -index 000000000..215f8b449 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_u32.c -@@ -0,0 +1,344 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** subr_u32_m_tied1: -+** subr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (subr_u32_m_tied1, svuint32_t, -+ z0 = svsubr_u32_m (p0, z0, z1), -+ z0 = svsubr_m (p0, z0, z1)) -+ -+/* -+** subr_u32_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** subr z0\.s, p0/m, z0\.s, \1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (subr_u32_m_tied2, svuint32_t, -+ z0 = svsubr_u32_m (p0, z1, z0), -+ z0 = svsubr_m (p0, z1, z0)) -+ -+/* -+** subr_u32_m_untied: -+** movprfx z0, z1 -+** subr z0\.s, p0/m, z0\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (subr_u32_m_untied, svuint32_t, -+ z0 = svsubr_u32_m (p0, z1, z2), -+ z0 = svsubr_m (p0, z1, z2)) -+ -+/* -+** subr_w0_u32_m_tied1: -+** mov (z[0-9]+\.s), w0 -+** subr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (subr_w0_u32_m_tied1, svuint32_t, uint32_t, -+ z0 = svsubr_n_u32_m (p0, z0, x0), -+ z0 = svsubr_m (p0, z0, x0)) -+ -+/* -+** subr_w0_u32_m_untied: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0, z1 -+** subr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (subr_w0_u32_m_untied, svuint32_t, uint32_t, -+ z0 = svsubr_n_u32_m (p0, z1, x0), -+ z0 = svsubr_m (p0, z1, x0)) -+ -+/* -+** subr_1_u32_m_tied1: -+** mov (z[0-9]+\.s), #1 -+** subr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_u32_m_tied1, svuint32_t, -+ z0 = svsubr_n_u32_m (p0, z0, 1), -+ z0 = svsubr_m (p0, z0, 1)) -+ -+/* -+** subr_1_u32_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.s), #1 -+** movprfx z0, z1 -+** subr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_u32_m_untied, svuint32_t, -+ z0 = svsubr_n_u32_m (p0, z1, 1), -+ z0 = svsubr_m (p0, z1, 1)) -+ -+/* -+** subr_m2_u32_m: -+** mov (z[0-9]+\.s), #-2 -+** subr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m2_u32_m, svuint32_t, -+ z0 = svsubr_n_u32_m (p0, z0, -2), -+ z0 = svsubr_m (p0, z0, -2)) -+ -+/* -+** subr_u32_z_tied1: -+** movprfx z0\.s, p0/z, z0\.s -+** subr z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (subr_u32_z_tied1, svuint32_t, -+ z0 = svsubr_u32_z (p0, z0, z1), -+ z0 = svsubr_z (p0, z0, z1)) -+ -+/* -+** subr_u32_z_tied2: -+** movprfx z0\.s, p0/z, z0\.s -+** sub z0\.s, p0/m, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (subr_u32_z_tied2, svuint32_t, -+ z0 = svsubr_u32_z (p0, z1, z0), -+ z0 = svsubr_z (p0, z1, z0)) -+ -+/* -+** subr_u32_z_untied: -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** subr z0\.s, p0/m, z0\.s, z2\.s -+** | -+** movprfx z0\.s, p0/z, z2\.s -+** sub z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (subr_u32_z_untied, svuint32_t, -+ z0 = svsubr_u32_z (p0, z1, z2), -+ z0 = svsubr_z (p0, z1, z2)) -+ -+/* -+** subr_w0_u32_z_tied1: -+** mov (z[0-9]+\.s), w0 -+** movprfx z0\.s, p0/z, z0\.s -+** subr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (subr_w0_u32_z_tied1, svuint32_t, uint32_t, -+ z0 = svsubr_n_u32_z (p0, z0, x0), -+ z0 = svsubr_z (p0, z0, x0)) -+ -+/* -+** subr_w0_u32_z_untied: -+** mov (z[0-9]+\.s), w0 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** subr z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** sub z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (subr_w0_u32_z_untied, svuint32_t, uint32_t, -+ z0 = svsubr_n_u32_z (p0, z1, x0), -+ z0 = svsubr_z (p0, z1, x0)) -+ -+/* -+** subr_1_u32_z_tied1: -+** mov (z[0-9]+\.s), #1 -+** movprfx z0\.s, p0/z, z0\.s -+** subr z0\.s, p0/m, z0\.s, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_u32_z_tied1, svuint32_t, -+ z0 = svsubr_n_u32_z (p0, z0, 1), -+ z0 = svsubr_z (p0, z0, 1)) -+ -+/* -+** subr_1_u32_z_untied: -+** mov (z[0-9]+\.s), #1 -+** ( -+** movprfx z0\.s, p0/z, z1\.s -+** subr z0\.s, p0/m, z0\.s, \1 -+** | -+** movprfx z0\.s, p0/z, \1 -+** sub z0\.s, p0/m, z0\.s, z1\.s -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_u32_z_untied, svuint32_t, -+ z0 = svsubr_n_u32_z (p0, z1, 1), -+ z0 = svsubr_z (p0, z1, 1)) -+ -+/* -+** subr_u32_x_tied1: -+** sub z0\.s, z1\.s, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (subr_u32_x_tied1, svuint32_t, -+ z0 = svsubr_u32_x (p0, z0, z1), -+ z0 = svsubr_x (p0, z0, z1)) -+ -+/* -+** subr_u32_x_tied2: -+** sub z0\.s, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (subr_u32_x_tied2, svuint32_t, -+ z0 = svsubr_u32_x (p0, z1, z0), -+ z0 = svsubr_x (p0, z1, z0)) -+ -+/* -+** subr_u32_x_untied: -+** sub z0\.s, z2\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (subr_u32_x_untied, svuint32_t, -+ z0 = svsubr_u32_x (p0, z1, z2), -+ z0 = svsubr_x (p0, z1, z2)) -+ -+/* -+** subr_w0_u32_x_tied1: -+** mov (z[0-9]+\.s), w0 -+** sub z0\.s, \1, z0\.s -+** ret -+*/ -+TEST_UNIFORM_ZX (subr_w0_u32_x_tied1, svuint32_t, uint32_t, -+ z0 = svsubr_n_u32_x (p0, z0, x0), -+ z0 = svsubr_x (p0, z0, x0)) -+ -+/* -+** subr_w0_u32_x_untied: -+** mov (z[0-9]+\.s), w0 -+** sub z0\.s, \1, z1\.s -+** ret -+*/ -+TEST_UNIFORM_ZX (subr_w0_u32_x_untied, svuint32_t, uint32_t, -+ z0 = svsubr_n_u32_x (p0, z1, x0), -+ z0 = svsubr_x (p0, z1, x0)) -+ -+/* -+** subr_1_u32_x_tied1: -+** subr z0\.s, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_u32_x_tied1, svuint32_t, -+ z0 = svsubr_n_u32_x (p0, z0, 1), -+ z0 = svsubr_x (p0, z0, 1)) -+ -+/* -+** subr_1_u32_x_untied: -+** movprfx z0, z1 -+** subr z0\.s, z0\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_u32_x_untied, svuint32_t, -+ z0 = svsubr_n_u32_x (p0, z1, 1), -+ z0 = svsubr_x (p0, z1, 1)) -+ -+/* -+** subr_127_u32_x: -+** subr z0\.s, z0\.s, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_127_u32_x, svuint32_t, -+ z0 = svsubr_n_u32_x (p0, z0, 127), -+ z0 = svsubr_x (p0, z0, 127)) -+ -+/* -+** subr_128_u32_x: -+** subr z0\.s, z0\.s, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_128_u32_x, svuint32_t, -+ z0 = svsubr_n_u32_x (p0, z0, 128), -+ z0 = svsubr_x (p0, z0, 128)) -+ -+/* -+** subr_255_u32_x: -+** subr z0\.s, z0\.s, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_255_u32_x, svuint32_t, -+ z0 = svsubr_n_u32_x (p0, z0, 255), -+ z0 = svsubr_x (p0, z0, 255)) -+ -+/* -+** subr_256_u32_x: -+** subr z0\.s, z0\.s, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_256_u32_x, svuint32_t, -+ z0 = svsubr_n_u32_x (p0, z0, 256), -+ z0 = svsubr_x (p0, z0, 256)) -+ -+/* -+** subr_511_u32_x: -+** mov (z[0-9]+\.s), #511 -+** sub z0\.s, \1, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (subr_511_u32_x, svuint32_t, -+ z0 = svsubr_n_u32_x (p0, z0, 511), -+ z0 = svsubr_x (p0, z0, 511)) -+ -+/* -+** subr_512_u32_x: -+** subr z0\.s, z0\.s, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_512_u32_x, svuint32_t, -+ z0 = svsubr_n_u32_x (p0, z0, 512), -+ z0 = svsubr_x (p0, z0, 512)) -+ -+/* -+** subr_65280_u32_x: -+** subr z0\.s, z0\.s, #65280 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_65280_u32_x, svuint32_t, -+ z0 = svsubr_n_u32_x (p0, z0, 0xff00), -+ z0 = svsubr_x (p0, z0, 0xff00)) -+ -+/* -+** subr_65535_u32_x: -+** mov (z[0-9]+\.s), #65535 -+** sub z0\.s, \1, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (subr_65535_u32_x, svuint32_t, -+ z0 = svsubr_n_u32_x (p0, z0, 65535), -+ z0 = svsubr_x (p0, z0, 65535)) -+ -+/* -+** subr_65536_u32_x: -+** mov (z[0-9]+\.s), #65536 -+** sub z0\.s, \1, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (subr_65536_u32_x, svuint32_t, -+ z0 = svsubr_n_u32_x (p0, z0, 65536), -+ z0 = svsubr_x (p0, z0, 65536)) -+ -+/* -+** subr_m1_u32_x_tied1: -+** mov (z[0-9]+)\.b, #-1 -+** sub z0\.s, \1\.s, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_u32_x_tied1, svuint32_t, -+ z0 = svsubr_n_u32_x (p0, z0, -1), -+ z0 = svsubr_x (p0, z0, -1)) -+ -+/* -+** subr_m1_u32_x_untied: -+** mov (z[0-9]+)\.b, #-1 -+** sub z0\.s, \1\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_u32_x_untied, svuint32_t, -+ z0 = svsubr_n_u32_x (p0, z1, -1), -+ z0 = svsubr_x (p0, z1, -1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_u64.c -new file mode 100644 -index 000000000..78d94515b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_u64.c -@@ -0,0 +1,344 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** subr_u64_m_tied1: -+** subr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (subr_u64_m_tied1, svuint64_t, -+ z0 = svsubr_u64_m (p0, z0, z1), -+ z0 = svsubr_m (p0, z0, z1)) -+ -+/* -+** subr_u64_m_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** subr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_u64_m_tied2, svuint64_t, -+ z0 = svsubr_u64_m (p0, z1, z0), -+ z0 = svsubr_m (p0, z1, z0)) -+ -+/* -+** subr_u64_m_untied: -+** movprfx z0, z1 -+** subr z0\.d, p0/m, z0\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (subr_u64_m_untied, svuint64_t, -+ z0 = svsubr_u64_m (p0, z1, z2), -+ z0 = svsubr_m (p0, z1, z2)) -+ -+/* -+** subr_x0_u64_m_tied1: -+** mov (z[0-9]+\.d), x0 -+** subr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (subr_x0_u64_m_tied1, svuint64_t, uint64_t, -+ z0 = svsubr_n_u64_m (p0, z0, x0), -+ z0 = svsubr_m (p0, z0, x0)) -+ -+/* -+** subr_x0_u64_m_untied: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0, z1 -+** subr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (subr_x0_u64_m_untied, svuint64_t, uint64_t, -+ z0 = svsubr_n_u64_m (p0, z1, x0), -+ z0 = svsubr_m (p0, z1, x0)) -+ -+/* -+** subr_1_u64_m_tied1: -+** mov (z[0-9]+\.d), #1 -+** subr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_u64_m_tied1, svuint64_t, -+ z0 = svsubr_n_u64_m (p0, z0, 1), -+ z0 = svsubr_m (p0, z0, 1)) -+ -+/* -+** subr_1_u64_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.d), #1 -+** movprfx z0, z1 -+** subr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_u64_m_untied, svuint64_t, -+ z0 = svsubr_n_u64_m (p0, z1, 1), -+ z0 = svsubr_m (p0, z1, 1)) -+ -+/* -+** subr_m2_u64_m: -+** mov (z[0-9]+\.d), #-2 -+** subr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m2_u64_m, svuint64_t, -+ z0 = svsubr_n_u64_m (p0, z0, -2), -+ z0 = svsubr_m (p0, z0, -2)) -+ -+/* -+** subr_u64_z_tied1: -+** movprfx z0\.d, p0/z, z0\.d -+** subr z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (subr_u64_z_tied1, svuint64_t, -+ z0 = svsubr_u64_z (p0, z0, z1), -+ z0 = svsubr_z (p0, z0, z1)) -+ -+/* -+** subr_u64_z_tied2: -+** movprfx z0\.d, p0/z, z0\.d -+** sub z0\.d, p0/m, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (subr_u64_z_tied2, svuint64_t, -+ z0 = svsubr_u64_z (p0, z1, z0), -+ z0 = svsubr_z (p0, z1, z0)) -+ -+/* -+** subr_u64_z_untied: -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** subr z0\.d, p0/m, z0\.d, z2\.d -+** | -+** movprfx z0\.d, p0/z, z2\.d -+** sub z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (subr_u64_z_untied, svuint64_t, -+ z0 = svsubr_u64_z (p0, z1, z2), -+ z0 = svsubr_z (p0, z1, z2)) -+ -+/* -+** subr_x0_u64_z_tied1: -+** mov (z[0-9]+\.d), x0 -+** movprfx z0\.d, p0/z, z0\.d -+** subr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (subr_x0_u64_z_tied1, svuint64_t, uint64_t, -+ z0 = svsubr_n_u64_z (p0, z0, x0), -+ z0 = svsubr_z (p0, z0, x0)) -+ -+/* -+** subr_x0_u64_z_untied: -+** mov (z[0-9]+\.d), x0 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** subr z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** sub z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (subr_x0_u64_z_untied, svuint64_t, uint64_t, -+ z0 = svsubr_n_u64_z (p0, z1, x0), -+ z0 = svsubr_z (p0, z1, x0)) -+ -+/* -+** subr_1_u64_z_tied1: -+** mov (z[0-9]+\.d), #1 -+** movprfx z0\.d, p0/z, z0\.d -+** subr z0\.d, p0/m, z0\.d, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_u64_z_tied1, svuint64_t, -+ z0 = svsubr_n_u64_z (p0, z0, 1), -+ z0 = svsubr_z (p0, z0, 1)) -+ -+/* -+** subr_1_u64_z_untied: -+** mov (z[0-9]+\.d), #1 -+** ( -+** movprfx z0\.d, p0/z, z1\.d -+** subr z0\.d, p0/m, z0\.d, \1 -+** | -+** movprfx z0\.d, p0/z, \1 -+** sub z0\.d, p0/m, z0\.d, z1\.d -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_u64_z_untied, svuint64_t, -+ z0 = svsubr_n_u64_z (p0, z1, 1), -+ z0 = svsubr_z (p0, z1, 1)) -+ -+/* -+** subr_u64_x_tied1: -+** sub z0\.d, z1\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (subr_u64_x_tied1, svuint64_t, -+ z0 = svsubr_u64_x (p0, z0, z1), -+ z0 = svsubr_x (p0, z0, z1)) -+ -+/* -+** subr_u64_x_tied2: -+** sub z0\.d, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (subr_u64_x_tied2, svuint64_t, -+ z0 = svsubr_u64_x (p0, z1, z0), -+ z0 = svsubr_x (p0, z1, z0)) -+ -+/* -+** subr_u64_x_untied: -+** sub z0\.d, z2\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (subr_u64_x_untied, svuint64_t, -+ z0 = svsubr_u64_x (p0, z1, z2), -+ z0 = svsubr_x (p0, z1, z2)) -+ -+/* -+** subr_x0_u64_x_tied1: -+** mov (z[0-9]+\.d), x0 -+** sub z0\.d, \1, z0\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (subr_x0_u64_x_tied1, svuint64_t, uint64_t, -+ z0 = svsubr_n_u64_x (p0, z0, x0), -+ z0 = svsubr_x (p0, z0, x0)) -+ -+/* -+** subr_x0_u64_x_untied: -+** mov (z[0-9]+\.d), x0 -+** sub z0\.d, \1, z1\.d -+** ret -+*/ -+TEST_UNIFORM_ZX (subr_x0_u64_x_untied, svuint64_t, uint64_t, -+ z0 = svsubr_n_u64_x (p0, z1, x0), -+ z0 = svsubr_x (p0, z1, x0)) -+ -+/* -+** subr_1_u64_x_tied1: -+** subr z0\.d, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_u64_x_tied1, svuint64_t, -+ z0 = svsubr_n_u64_x (p0, z0, 1), -+ z0 = svsubr_x (p0, z0, 1)) -+ -+/* -+** subr_1_u64_x_untied: -+** movprfx z0, z1 -+** subr z0\.d, z0\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_u64_x_untied, svuint64_t, -+ z0 = svsubr_n_u64_x (p0, z1, 1), -+ z0 = svsubr_x (p0, z1, 1)) -+ -+/* -+** subr_127_u64_x: -+** subr z0\.d, z0\.d, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_127_u64_x, svuint64_t, -+ z0 = svsubr_n_u64_x (p0, z0, 127), -+ z0 = svsubr_x (p0, z0, 127)) -+ -+/* -+** subr_128_u64_x: -+** subr z0\.d, z0\.d, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_128_u64_x, svuint64_t, -+ z0 = svsubr_n_u64_x (p0, z0, 128), -+ z0 = svsubr_x (p0, z0, 128)) -+ -+/* -+** subr_255_u64_x: -+** subr z0\.d, z0\.d, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_255_u64_x, svuint64_t, -+ z0 = svsubr_n_u64_x (p0, z0, 255), -+ z0 = svsubr_x (p0, z0, 255)) -+ -+/* -+** subr_256_u64_x: -+** subr z0\.d, z0\.d, #256 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_256_u64_x, svuint64_t, -+ z0 = svsubr_n_u64_x (p0, z0, 256), -+ z0 = svsubr_x (p0, z0, 256)) -+ -+/* -+** subr_511_u64_x: -+** mov (z[0-9]+\.d), #511 -+** sub z0\.d, \1, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (subr_511_u64_x, svuint64_t, -+ z0 = svsubr_n_u64_x (p0, z0, 511), -+ z0 = svsubr_x (p0, z0, 511)) -+ -+/* -+** subr_512_u64_x: -+** subr z0\.d, z0\.d, #512 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_512_u64_x, svuint64_t, -+ z0 = svsubr_n_u64_x (p0, z0, 512), -+ z0 = svsubr_x (p0, z0, 512)) -+ -+/* -+** subr_65280_u64_x: -+** subr z0\.d, z0\.d, #65280 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_65280_u64_x, svuint64_t, -+ z0 = svsubr_n_u64_x (p0, z0, 0xff00), -+ z0 = svsubr_x (p0, z0, 0xff00)) -+ -+/* -+** subr_65535_u64_x: -+** mov (z[0-9]+\.d), #65535 -+** sub z0\.d, \1, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (subr_65535_u64_x, svuint64_t, -+ z0 = svsubr_n_u64_x (p0, z0, 65535), -+ z0 = svsubr_x (p0, z0, 65535)) -+ -+/* -+** subr_65536_u64_x: -+** mov (z[0-9]+\.d), #65536 -+** sub z0\.d, \1, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (subr_65536_u64_x, svuint64_t, -+ z0 = svsubr_n_u64_x (p0, z0, 65536), -+ z0 = svsubr_x (p0, z0, 65536)) -+ -+/* -+** subr_m1_u64_x_tied1: -+** mov (z[0-9]+)\.b, #-1 -+** sub z0\.d, \1\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_u64_x_tied1, svuint64_t, -+ z0 = svsubr_n_u64_x (p0, z0, -1), -+ z0 = svsubr_x (p0, z0, -1)) -+ -+/* -+** subr_m1_u64_x_untied: -+** mov (z[0-9]+)\.b, #-1 -+** sub z0\.d, \1\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_u64_x_untied, svuint64_t, -+ z0 = svsubr_n_u64_x (p0, z1, -1), -+ z0 = svsubr_x (p0, z1, -1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_u8.c -new file mode 100644 -index 000000000..fe5f96da8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/subr_u8.c -@@ -0,0 +1,294 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** subr_u8_m_tied1: -+** subr z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (subr_u8_m_tied1, svuint8_t, -+ z0 = svsubr_u8_m (p0, z0, z1), -+ z0 = svsubr_m (p0, z0, z1)) -+ -+/* -+** subr_u8_m_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** subr z0\.b, p0/m, z0\.b, \1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (subr_u8_m_tied2, svuint8_t, -+ z0 = svsubr_u8_m (p0, z1, z0), -+ z0 = svsubr_m (p0, z1, z0)) -+ -+/* -+** subr_u8_m_untied: -+** movprfx z0, z1 -+** subr z0\.b, p0/m, z0\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (subr_u8_m_untied, svuint8_t, -+ z0 = svsubr_u8_m (p0, z1, z2), -+ z0 = svsubr_m (p0, z1, z2)) -+ -+/* -+** subr_w0_u8_m_tied1: -+** mov (z[0-9]+\.b), w0 -+** subr z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (subr_w0_u8_m_tied1, svuint8_t, uint8_t, -+ z0 = svsubr_n_u8_m (p0, z0, x0), -+ z0 = svsubr_m (p0, z0, x0)) -+ -+/* -+** subr_w0_u8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), w0 -+** movprfx z0, z1 -+** subr z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (subr_w0_u8_m_untied, svuint8_t, uint8_t, -+ z0 = svsubr_n_u8_m (p0, z1, x0), -+ z0 = svsubr_m (p0, z1, x0)) -+ -+/* -+** subr_1_u8_m_tied1: -+** mov (z[0-9]+\.b), #1 -+** subr z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_u8_m_tied1, svuint8_t, -+ z0 = svsubr_n_u8_m (p0, z0, 1), -+ z0 = svsubr_m (p0, z0, 1)) -+ -+/* -+** subr_1_u8_m_untied: { xfail *-*-* } -+** mov (z[0-9]+\.b), #1 -+** movprfx z0, z1 -+** subr z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_u8_m_untied, svuint8_t, -+ z0 = svsubr_n_u8_m (p0, z1, 1), -+ z0 = svsubr_m (p0, z1, 1)) -+ -+/* -+** subr_m1_u8_m: -+** mov (z[0-9]+\.b), #-1 -+** subr z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_u8_m, svuint8_t, -+ z0 = svsubr_n_u8_m (p0, z0, -1), -+ z0 = svsubr_m (p0, z0, -1)) -+ -+/* -+** subr_u8_z_tied1: -+** movprfx z0\.b, p0/z, z0\.b -+** subr z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (subr_u8_z_tied1, svuint8_t, -+ z0 = svsubr_u8_z (p0, z0, z1), -+ z0 = svsubr_z (p0, z0, z1)) -+ -+/* -+** subr_u8_z_tied2: -+** movprfx z0\.b, p0/z, z0\.b -+** sub z0\.b, p0/m, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (subr_u8_z_tied2, svuint8_t, -+ z0 = svsubr_u8_z (p0, z1, z0), -+ z0 = svsubr_z (p0, z1, z0)) -+ -+/* -+** subr_u8_z_untied: -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** subr z0\.b, p0/m, z0\.b, z2\.b -+** | -+** movprfx z0\.b, p0/z, z2\.b -+** sub z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (subr_u8_z_untied, svuint8_t, -+ z0 = svsubr_u8_z (p0, z1, z2), -+ z0 = svsubr_z (p0, z1, z2)) -+ -+/* -+** subr_w0_u8_z_tied1: -+** mov (z[0-9]+\.b), w0 -+** movprfx z0\.b, p0/z, z0\.b -+** subr z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_ZX (subr_w0_u8_z_tied1, svuint8_t, uint8_t, -+ z0 = svsubr_n_u8_z (p0, z0, x0), -+ z0 = svsubr_z (p0, z0, x0)) -+ -+/* -+** subr_w0_u8_z_untied: -+** mov (z[0-9]+\.b), w0 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** subr z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** sub z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_ZX (subr_w0_u8_z_untied, svuint8_t, uint8_t, -+ z0 = svsubr_n_u8_z (p0, z1, x0), -+ z0 = svsubr_z (p0, z1, x0)) -+ -+/* -+** subr_1_u8_z_tied1: -+** mov (z[0-9]+\.b), #1 -+** movprfx z0\.b, p0/z, z0\.b -+** subr z0\.b, p0/m, z0\.b, \1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_u8_z_tied1, svuint8_t, -+ z0 = svsubr_n_u8_z (p0, z0, 1), -+ z0 = svsubr_z (p0, z0, 1)) -+ -+/* -+** subr_1_u8_z_untied: -+** mov (z[0-9]+\.b), #1 -+** ( -+** movprfx z0\.b, p0/z, z1\.b -+** subr z0\.b, p0/m, z0\.b, \1 -+** | -+** movprfx z0\.b, p0/z, \1 -+** sub z0\.b, p0/m, z0\.b, z1\.b -+** ) -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_u8_z_untied, svuint8_t, -+ z0 = svsubr_n_u8_z (p0, z1, 1), -+ z0 = svsubr_z (p0, z1, 1)) -+ -+/* -+** subr_u8_x_tied1: -+** sub z0\.b, z1\.b, z0\.b -+** ret -+*/ -+TEST_UNIFORM_Z (subr_u8_x_tied1, svuint8_t, -+ z0 = svsubr_u8_x (p0, z0, z1), -+ z0 = svsubr_x (p0, z0, z1)) -+ -+/* -+** subr_u8_x_tied2: -+** sub z0\.b, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (subr_u8_x_tied2, svuint8_t, -+ z0 = svsubr_u8_x (p0, z1, z0), -+ z0 = svsubr_x (p0, z1, z0)) -+ -+/* -+** subr_u8_x_untied: -+** sub z0\.b, z2\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (subr_u8_x_untied, svuint8_t, -+ z0 = svsubr_u8_x (p0, z1, z2), -+ z0 = svsubr_x (p0, z1, z2)) -+ -+/* -+** subr_w0_u8_x_tied1: -+** mov (z[0-9]+\.b), w0 -+** sub z0\.b, \1, z0\.b -+** ret -+*/ -+TEST_UNIFORM_ZX (subr_w0_u8_x_tied1, svuint8_t, uint8_t, -+ z0 = svsubr_n_u8_x (p0, z0, x0), -+ z0 = svsubr_x (p0, z0, x0)) -+ -+/* -+** subr_w0_u8_x_untied: -+** mov (z[0-9]+\.b), w0 -+** sub z0\.b, \1, z1\.b -+** ret -+*/ -+TEST_UNIFORM_ZX (subr_w0_u8_x_untied, svuint8_t, uint8_t, -+ z0 = svsubr_n_u8_x (p0, z1, x0), -+ z0 = svsubr_x (p0, z1, x0)) -+ -+/* -+** subr_1_u8_x_tied1: -+** subr z0\.b, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_u8_x_tied1, svuint8_t, -+ z0 = svsubr_n_u8_x (p0, z0, 1), -+ z0 = svsubr_x (p0, z0, 1)) -+ -+/* -+** subr_1_u8_x_untied: -+** movprfx z0, z1 -+** subr z0\.b, z0\.b, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_1_u8_x_untied, svuint8_t, -+ z0 = svsubr_n_u8_x (p0, z1, 1), -+ z0 = svsubr_x (p0, z1, 1)) -+ -+/* -+** subr_127_u8_x: -+** subr z0\.b, z0\.b, #127 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_127_u8_x, svuint8_t, -+ z0 = svsubr_n_u8_x (p0, z0, 127), -+ z0 = svsubr_x (p0, z0, 127)) -+ -+/* -+** subr_128_u8_x: -+** subr z0\.b, z0\.b, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_128_u8_x, svuint8_t, -+ z0 = svsubr_n_u8_x (p0, z0, 128), -+ z0 = svsubr_x (p0, z0, 128)) -+ -+/* -+** subr_255_u8_x: -+** subr z0\.b, z0\.b, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_255_u8_x, svuint8_t, -+ z0 = svsubr_n_u8_x (p0, z0, 255), -+ z0 = svsubr_x (p0, z0, 255)) -+ -+/* -+** subr_m1_u8_x: -+** subr z0\.b, z0\.b, #255 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m1_u8_x, svuint8_t, -+ z0 = svsubr_n_u8_x (p0, z0, -1), -+ z0 = svsubr_x (p0, z0, -1)) -+ -+/* -+** subr_m127_u8_x: -+** subr z0\.b, z0\.b, #129 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m127_u8_x, svuint8_t, -+ z0 = svsubr_n_u8_x (p0, z0, -127), -+ z0 = svsubr_x (p0, z0, -127)) -+ -+/* -+** subr_m128_u8_x: -+** subr z0\.b, z0\.b, #128 -+** ret -+*/ -+TEST_UNIFORM_Z (subr_m128_u8_x, svuint8_t, -+ z0 = svsubr_n_u8_x (p0, z0, -128), -+ z0 = svsubr_x (p0, z0, -128)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sudot_lane_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sudot_lane_s32.c -new file mode 100644 -index 000000000..c6d74a4af ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sudot_lane_s32.c -@@ -0,0 +1,97 @@ -+/* { dg-require-effective-target aarch64_asm_i8mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+sve+i8mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** sudot_lane_0_s32_tied1: -+** sudot z0\.s, z2\.b, z4\.b\[0\] -+** ret -+*/ -+TEST_TRIPLE_Z (sudot_lane_0_s32_tied1, svint32_t, svint8_t, svuint8_t, -+ z0 = svsudot_lane_s32 (z0, z2, z4, 0), -+ z0 = svsudot_lane (z0, z2, z4, 0)) -+ -+/* -+** sudot_lane_0_s32_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z2 -+** sudot z0\.s, \1\.b, z4\.b\[0\] -+** ret -+*/ -+TEST_TRIPLE_Z_REV2 (sudot_lane_0_s32_tied2, svint32_t, svint8_t, svuint8_t, -+ z0_res = svsudot_lane_s32 (z2, z0, z4, 0), -+ z0_res = svsudot_lane (z2, z0, z4, 0)) -+ -+/* -+** sudot_lane_0_s32_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** sudot z0\.s, z2\.b, \1\.b\[0\] -+** ret -+*/ -+TEST_TRIPLE_Z_REV (sudot_lane_0_s32_tied3, svint32_t, svint8_t, svuint8_t, -+ z0_res = svsudot_lane_s32 (z4, z2, z0, 0), -+ z0_res = svsudot_lane (z4, z2, z0, 0)) -+ -+/* -+** sudot_lane_0_s32_untied: -+** movprfx z0, z1 -+** sudot z0\.s, z2\.b, z4\.b\[0\] -+** ret -+*/ -+TEST_TRIPLE_Z (sudot_lane_0_s32_untied, svint32_t, svint8_t, svuint8_t, -+ z0 = svsudot_lane_s32 (z1, z2, z4, 0), -+ z0 = svsudot_lane (z1, z2, z4, 0)) -+ -+/* -+** sudot_lane_1_s32: -+** sudot z0\.s, z2\.b, z5\.b\[1\] -+** ret -+*/ -+TEST_TRIPLE_Z (sudot_lane_1_s32, svint32_t, svint8_t, svuint8_t, -+ z0 = svsudot_lane_s32 (z0, z2, z5, 1), -+ z0 = svsudot_lane (z0, z2, z5, 1)) -+ -+/* -+** sudot_lane_2_s32: -+** sudot z0\.s, z2\.b, z5\.b\[2\] -+** ret -+*/ -+TEST_TRIPLE_Z (sudot_lane_2_s32, svint32_t, svint8_t, svuint8_t, -+ z0 = svsudot_lane_s32 (z0, z2, z5, 2), -+ z0 = svsudot_lane (z0, z2, z5, 2)) -+ -+/* -+** sudot_lane_3_s32: -+** sudot z0\.s, z2\.b, z5\.b\[3\] -+** ret -+*/ -+TEST_TRIPLE_Z (sudot_lane_3_s32, svint32_t, svint8_t, svuint8_t, -+ z0 = svsudot_lane_s32 (z0, z2, z5, 3), -+ z0 = svsudot_lane (z0, z2, z5, 3)) -+ -+/* -+** sudot_lane_z8_s32: -+** str d8, \[sp, -16\]! -+** mov (z[0-7])\.d, z8\.d -+** sudot z0\.s, z1\.b, \1\.b\[1\] -+** ldr d8, \[sp\], 16 -+** ret -+*/ -+TEST_TRIPLE_LANE_REG (sudot_lane_z8_s32, svint32_t, svint8_t, svuint8_t, -+ z8, -+ z0 = svsudot_lane_s32 (z0, z1, z8, 1), -+ z0 = svsudot_lane (z0, z1, z8, 1)) -+ -+/* -+** sudot_lane_z16_s32: -+** mov (z[0-7])\.d, z16\.d -+** sudot z0\.s, z1\.b, \1\.b\[1\] -+** ret -+*/ -+TEST_TRIPLE_LANE_REG (sudot_lane_z16_s32, svint32_t, svint8_t, svuint8_t, -+ z16, -+ z0 = svsudot_lane_s32 (z0, z1, z16, 1), -+ z0 = svsudot_lane (z0, z1, z16, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sudot_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sudot_s32.c -new file mode 100644 -index 000000000..4b452619e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/sudot_s32.c -@@ -0,0 +1,45 @@ -+/* { dg-require-effective-target aarch64_asm_i8mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+sve+i8mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** sudot_s32_tied1: -+** usdot z0\.s, z2\.b, z4\.b -+** ret -+*/ -+TEST_TRIPLE_Z (sudot_s32_tied1, svint32_t, svint8_t, svuint8_t, -+ z0 = svsudot_s32 (z0, z2, z4), -+ z0 = svsudot (z0, z2, z4)) -+ -+/* -+** sudot_s32_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** usdot z0\.s, z2\.b, \1\.b -+** ret -+*/ -+TEST_TRIPLE_Z_REV (sudot_s32_tied2, svint32_t, svint8_t, svuint8_t, -+ z0_res = svsudot_s32 (z4, z2, z0), -+ z0_res = svsudot (z4, z2, z0)) -+ -+/* -+** sudot_w0_s32_tied: -+** mov (z[0-9]+\.b), w0 -+** usdot z0\.s, z2\.b, \1 -+** ret -+*/ -+TEST_TRIPLE_ZX (sudot_w0_s32_tied, svint32_t, svint8_t, uint8_t, -+ z0 = svsudot_n_s32 (z0, z2, x0), -+ z0 = svsudot (z0, z2, x0)) -+ -+/* -+** sudot_9_s32_tied: -+** mov (z[0-9]+\.b), #9 -+** usdot z0\.s, z2\.b, \1 -+** ret -+*/ -+TEST_TRIPLE_Z (sudot_9_s32_tied, svint32_t, svint8_t, uint8_t, -+ z0 = svsudot_n_s32 (z0, z2, 9), -+ z0 = svsudot (z0, z2, 9)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_bf16.c -new file mode 100644 -index 000000000..8c077d118 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_bf16.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** tbl_bf16_tied1: -+** tbl z0\.h, z0\.h, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (tbl_bf16_tied1, svbfloat16_t, svuint16_t, -+ z0 = svtbl_bf16 (z0, z4), -+ z0 = svtbl (z0, z4)) -+ -+/* -+** tbl_bf16_tied2: -+** tbl z0\.h, z4\.h, z0\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (tbl_bf16_tied2, svbfloat16_t, svuint16_t, -+ z0_res = svtbl_bf16 (z4, z0), -+ z0_res = svtbl (z4, z0)) -+ -+/* -+** tbl_bf16_untied: -+** tbl z0\.h, z1\.h, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (tbl_bf16_untied, svbfloat16_t, svuint16_t, -+ z0 = svtbl_bf16 (z1, z4), -+ z0 = svtbl (z1, z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_f16.c -new file mode 100644 -index 000000000..94b610412 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_f16.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** tbl_f16_tied1: -+** tbl z0\.h, z0\.h, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (tbl_f16_tied1, svfloat16_t, svuint16_t, -+ z0 = svtbl_f16 (z0, z4), -+ z0 = svtbl (z0, z4)) -+ -+/* -+** tbl_f16_tied2: -+** tbl z0\.h, z4\.h, z0\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (tbl_f16_tied2, svfloat16_t, svuint16_t, -+ z0_res = svtbl_f16 (z4, z0), -+ z0_res = svtbl (z4, z0)) -+ -+/* -+** tbl_f16_untied: -+** tbl z0\.h, z1\.h, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (tbl_f16_untied, svfloat16_t, svuint16_t, -+ z0 = svtbl_f16 (z1, z4), -+ z0 = svtbl (z1, z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_f32.c -new file mode 100644 -index 000000000..741d3bdcf ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_f32.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** tbl_f32_tied1: -+** tbl z0\.s, z0\.s, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (tbl_f32_tied1, svfloat32_t, svuint32_t, -+ z0 = svtbl_f32 (z0, z4), -+ z0 = svtbl (z0, z4)) -+ -+/* -+** tbl_f32_tied2: -+** tbl z0\.s, z4\.s, z0\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (tbl_f32_tied2, svfloat32_t, svuint32_t, -+ z0_res = svtbl_f32 (z4, z0), -+ z0_res = svtbl (z4, z0)) -+ -+/* -+** tbl_f32_untied: -+** tbl z0\.s, z1\.s, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (tbl_f32_untied, svfloat32_t, svuint32_t, -+ z0 = svtbl_f32 (z1, z4), -+ z0 = svtbl (z1, z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_f64.c -new file mode 100644 -index 000000000..3c24e9a59 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_f64.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** tbl_f64_tied1: -+** tbl z0\.d, z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (tbl_f64_tied1, svfloat64_t, svuint64_t, -+ z0 = svtbl_f64 (z0, z4), -+ z0 = svtbl (z0, z4)) -+ -+/* -+** tbl_f64_tied2: -+** tbl z0\.d, z4\.d, z0\.d -+** ret -+*/ -+TEST_DUAL_Z_REV (tbl_f64_tied2, svfloat64_t, svuint64_t, -+ z0_res = svtbl_f64 (z4, z0), -+ z0_res = svtbl (z4, z0)) -+ -+/* -+** tbl_f64_untied: -+** tbl z0\.d, z1\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (tbl_f64_untied, svfloat64_t, svuint64_t, -+ z0 = svtbl_f64 (z1, z4), -+ z0 = svtbl (z1, z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_s16.c -new file mode 100644 -index 000000000..2ec9c389a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_s16.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** tbl_s16_tied1: -+** tbl z0\.h, z0\.h, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (tbl_s16_tied1, svint16_t, svuint16_t, -+ z0 = svtbl_s16 (z0, z4), -+ z0 = svtbl (z0, z4)) -+ -+/* -+** tbl_s16_tied2: -+** tbl z0\.h, z4\.h, z0\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (tbl_s16_tied2, svint16_t, svuint16_t, -+ z0_res = svtbl_s16 (z4, z0), -+ z0_res = svtbl (z4, z0)) -+ -+/* -+** tbl_s16_untied: -+** tbl z0\.h, z1\.h, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (tbl_s16_untied, svint16_t, svuint16_t, -+ z0 = svtbl_s16 (z1, z4), -+ z0 = svtbl (z1, z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_s32.c -new file mode 100644 -index 000000000..98b2d8d8b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_s32.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** tbl_s32_tied1: -+** tbl z0\.s, z0\.s, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (tbl_s32_tied1, svint32_t, svuint32_t, -+ z0 = svtbl_s32 (z0, z4), -+ z0 = svtbl (z0, z4)) -+ -+/* -+** tbl_s32_tied2: -+** tbl z0\.s, z4\.s, z0\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (tbl_s32_tied2, svint32_t, svuint32_t, -+ z0_res = svtbl_s32 (z4, z0), -+ z0_res = svtbl (z4, z0)) -+ -+/* -+** tbl_s32_untied: -+** tbl z0\.s, z1\.s, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (tbl_s32_untied, svint32_t, svuint32_t, -+ z0 = svtbl_s32 (z1, z4), -+ z0 = svtbl (z1, z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_s64.c -new file mode 100644 -index 000000000..0138a80d2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_s64.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** tbl_s64_tied1: -+** tbl z0\.d, z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (tbl_s64_tied1, svint64_t, svuint64_t, -+ z0 = svtbl_s64 (z0, z4), -+ z0 = svtbl (z0, z4)) -+ -+/* -+** tbl_s64_tied2: -+** tbl z0\.d, z4\.d, z0\.d -+** ret -+*/ -+TEST_DUAL_Z_REV (tbl_s64_tied2, svint64_t, svuint64_t, -+ z0_res = svtbl_s64 (z4, z0), -+ z0_res = svtbl (z4, z0)) -+ -+/* -+** tbl_s64_untied: -+** tbl z0\.d, z1\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (tbl_s64_untied, svint64_t, svuint64_t, -+ z0 = svtbl_s64 (z1, z4), -+ z0 = svtbl (z1, z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_s8.c -new file mode 100644 -index 000000000..7818d1b6d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_s8.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** tbl_s8_tied1: -+** tbl z0\.b, z0\.b, z4\.b -+** ret -+*/ -+TEST_DUAL_Z (tbl_s8_tied1, svint8_t, svuint8_t, -+ z0 = svtbl_s8 (z0, z4), -+ z0 = svtbl (z0, z4)) -+ -+/* -+** tbl_s8_tied2: -+** tbl z0\.b, z4\.b, z0\.b -+** ret -+*/ -+TEST_DUAL_Z_REV (tbl_s8_tied2, svint8_t, svuint8_t, -+ z0_res = svtbl_s8 (z4, z0), -+ z0_res = svtbl (z4, z0)) -+ -+/* -+** tbl_s8_untied: -+** tbl z0\.b, z1\.b, z4\.b -+** ret -+*/ -+TEST_DUAL_Z (tbl_s8_untied, svint8_t, svuint8_t, -+ z0 = svtbl_s8 (z1, z4), -+ z0 = svtbl (z1, z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_u16.c -new file mode 100644 -index 000000000..f15da9211 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_u16.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** tbl_u16_tied1: -+** tbl z0\.h, z0\.h, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (tbl_u16_tied1, svuint16_t, svuint16_t, -+ z0 = svtbl_u16 (z0, z4), -+ z0 = svtbl (z0, z4)) -+ -+/* -+** tbl_u16_tied2: -+** tbl z0\.h, z4\.h, z0\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (tbl_u16_tied2, svuint16_t, svuint16_t, -+ z0_res = svtbl_u16 (z4, z0), -+ z0_res = svtbl (z4, z0)) -+ -+/* -+** tbl_u16_untied: -+** tbl z0\.h, z1\.h, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (tbl_u16_untied, svuint16_t, svuint16_t, -+ z0 = svtbl_u16 (z1, z4), -+ z0 = svtbl (z1, z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_u32.c -new file mode 100644 -index 000000000..494300436 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_u32.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** tbl_u32_tied1: -+** tbl z0\.s, z0\.s, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (tbl_u32_tied1, svuint32_t, svuint32_t, -+ z0 = svtbl_u32 (z0, z4), -+ z0 = svtbl (z0, z4)) -+ -+/* -+** tbl_u32_tied2: -+** tbl z0\.s, z4\.s, z0\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (tbl_u32_tied2, svuint32_t, svuint32_t, -+ z0_res = svtbl_u32 (z4, z0), -+ z0_res = svtbl (z4, z0)) -+ -+/* -+** tbl_u32_untied: -+** tbl z0\.s, z1\.s, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (tbl_u32_untied, svuint32_t, svuint32_t, -+ z0 = svtbl_u32 (z1, z4), -+ z0 = svtbl (z1, z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_u64.c -new file mode 100644 -index 000000000..158990e12 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_u64.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** tbl_u64_tied1: -+** tbl z0\.d, z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (tbl_u64_tied1, svuint64_t, svuint64_t, -+ z0 = svtbl_u64 (z0, z4), -+ z0 = svtbl (z0, z4)) -+ -+/* -+** tbl_u64_tied2: -+** tbl z0\.d, z4\.d, z0\.d -+** ret -+*/ -+TEST_DUAL_Z_REV (tbl_u64_tied2, svuint64_t, svuint64_t, -+ z0_res = svtbl_u64 (z4, z0), -+ z0_res = svtbl (z4, z0)) -+ -+/* -+** tbl_u64_untied: -+** tbl z0\.d, z1\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (tbl_u64_untied, svuint64_t, svuint64_t, -+ z0 = svtbl_u64 (z1, z4), -+ z0 = svtbl (z1, z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_u8.c -new file mode 100644 -index 000000000..a46309a95 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tbl_u8.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** tbl_u8_tied1: -+** tbl z0\.b, z0\.b, z4\.b -+** ret -+*/ -+TEST_DUAL_Z (tbl_u8_tied1, svuint8_t, svuint8_t, -+ z0 = svtbl_u8 (z0, z4), -+ z0 = svtbl (z0, z4)) -+ -+/* -+** tbl_u8_tied2: -+** tbl z0\.b, z4\.b, z0\.b -+** ret -+*/ -+TEST_DUAL_Z_REV (tbl_u8_tied2, svuint8_t, svuint8_t, -+ z0_res = svtbl_u8 (z4, z0), -+ z0_res = svtbl (z4, z0)) -+ -+/* -+** tbl_u8_untied: -+** tbl z0\.b, z1\.b, z4\.b -+** ret -+*/ -+TEST_DUAL_Z (tbl_u8_untied, svuint8_t, svuint8_t, -+ z0 = svtbl_u8 (z1, z4), -+ z0 = svtbl (z1, z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h -new file mode 100644 -index 000000000..d1f8fdb13 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h -@@ -0,0 +1,424 @@ -+#ifndef TEST_SVE_ACLE_H -+#define TEST_SVE_ACLE_H 1 -+ -+#include -+ -+#if defined (TEST_OVERLOADS) -+#define INVOKE(CODE1, CODE2) CODE2 -+#elif defined (TEST_FULL) -+#define INVOKE(CODE1, CODE2) CODE1 -+#else -+#error "Please define -DTEST_OVERLOADS or -DTEST_FULL" -+#endif -+ -+#ifdef __cplusplus -+#define PROTO(NAME, RET, ARGS) extern "C" RET NAME ARGS; RET NAME ARGS -+#else -+#define PROTO(NAME, RET, ARGS) RET NAME ARGS -+#endif -+ -+#define TEST_UNIFORM_Z(NAME, TYPE, CODE1, CODE2) \ -+ PROTO (NAME, TYPE, (TYPE z0, TYPE z1, TYPE z2, TYPE z3, \ -+ svbool_t p0, svbool_t p1)) \ -+ { \ -+ INVOKE (CODE1, CODE2); \ -+ return z0; \ -+ } -+ -+#define TEST_UNIFORM_P(NAME, CODE1, CODE2) \ -+ PROTO (NAME, svbool_t, (svbool_t p0, svbool_t p1, \ -+ svbool_t p2, svbool_t p3)) \ -+ { \ -+ INVOKE (CODE1, CODE2); \ -+ return p0; \ -+ } -+ -+#define TEST_UNIFORM_P_SINGLE(NAME, CODE) \ -+ PROTO (NAME, svbool_t, (svbool_t p0, svbool_t p1, \ -+ svbool_t p2, svbool_t p3)) \ -+ { \ -+ CODE; \ -+ return p0; \ -+ } -+ -+#define TEST_UNIFORM_S(NAME, TYPE, CODE1, CODE2) \ -+ PROTO (NAME, TYPE, (TYPE x0, TYPE x1, TYPE x2, TYPE x3, \ -+ svbool_t p0, svbool_t p1)) \ -+ { \ -+ INVOKE (CODE1, CODE2); \ -+ return x0; \ -+ } -+ -+#define TEST_DUAL_Z(NAME, TYPE1, TYPE2, CODE1, CODE2) \ -+ PROTO (NAME, TYPE1, (TYPE1 z0, TYPE1 z1, TYPE1 z2, TYPE1 z3, \ -+ TYPE2 z4, TYPE2 z5, TYPE2 z6, TYPE2 z7, \ -+ svbool_t p0, svbool_t p1)) \ -+ { \ -+ INVOKE (CODE1, CODE2); \ -+ return z0; \ -+ } -+ -+#define TEST_DUAL_Z_REV(NAME, TYPE1, TYPE2, CODE1, CODE2) \ -+ PROTO (NAME, TYPE1, (TYPE2 z0, TYPE2 z1, TYPE2 z2, TYPE2 z3, \ -+ TYPE1 z4, TYPE1 z5, TYPE1 z6, TYPE1 z7, \ -+ svbool_t p0, svbool_t p1)) \ -+ { \ -+ TYPE1 z0_res; \ -+ INVOKE (CODE1, CODE2); \ -+ return z0_res; \ -+ } -+ -+#define TEST_TRIPLE_Z(NAME, TYPE1, TYPE2, TYPE3, CODE1, CODE2) \ -+ PROTO (NAME, TYPE1, (TYPE1 z0, TYPE1 z1, TYPE2 z2, TYPE2 z3, \ -+ TYPE3 z4, TYPE3 z5, \ -+ svbool_t p0, svbool_t p1)) \ -+ { \ -+ INVOKE (CODE1, CODE2); \ -+ return z0; \ -+ } -+ -+#define TEST_TRIPLE_Z_REV2(NAME, TYPE1, TYPE2, TYPE3, CODE1, CODE2)\ -+ PROTO (NAME, TYPE1, (TYPE2 z0, TYPE2 z1, TYPE1 z2, TYPE1 z3, \ -+ TYPE3 z4, TYPE3 z5, \ -+ svbool_t p0, svbool_t p1)) \ -+ { \ -+ TYPE1 z0_res; \ -+ INVOKE (CODE1, CODE2); \ -+ return z0_res; \ -+ } -+ -+#define TEST_TRIPLE_Z_REV(NAME, TYPE1, TYPE2, TYPE3, CODE1, CODE2)\ -+ PROTO (NAME, TYPE1, (TYPE3 z0, TYPE3 z1, TYPE2 z2, TYPE2 z3, \ -+ TYPE1 z4, TYPE1 z5, \ -+ svbool_t p0, svbool_t p1)) \ -+ { \ -+ TYPE1 z0_res; \ -+ INVOKE (CODE1, CODE2); \ -+ return z0_res; \ -+ } -+ -+#define TEST_DUAL_LANE_REG(NAME, ZTYPE1, ZTYPE2, REG, CODE1, CODE2) \ -+ PROTO (NAME, void, (void)) \ -+ { \ -+ register ZTYPE1 z0 __asm ("z0"); \ -+ register ZTYPE2 z1 __asm ("z1"); \ -+ register ZTYPE2 REG __asm (#REG); \ -+ __asm volatile ("" : "=w" (z0), "=w" (z1), "=w" (REG)); \ -+ INVOKE (CODE1, CODE2); \ -+ __asm volatile ("" :: "w" (z0)); \ -+ } -+ -+#define TEST_TYPE_CHANGE_Z(NAME, TYPE1, TYPE2, CODE1, CODE2) \ -+ PROTO (NAME, TYPE1, (TYPE2 z0, TYPE2 z1, TYPE2 z2, TYPE2 z3, \ -+ svbool_t p0, svbool_t p1)) \ -+ { \ -+ TYPE1 z0_res; \ -+ INVOKE (CODE1, CODE2); \ -+ return z0_res; \ -+ } -+ -+#define TEST_TRIPLE_LANE_REG(NAME, ZTYPE1, ZTYPE2, ZTYPE3, REG, CODE1, CODE2) \ -+ PROTO (NAME, void, (void)) \ -+ { \ -+ register ZTYPE1 z0 __asm ("z0"); \ -+ register ZTYPE2 z1 __asm ("z1"); \ -+ register ZTYPE3 REG __asm (#REG); \ -+ __asm volatile ("" : "=w" (z0), "=w" (z1), "=w" (REG)); \ -+ INVOKE (CODE1, CODE2); \ -+ __asm volatile ("" :: "w" (z0)); \ -+ } -+ -+#define TEST_TRIPLE_ZX(NAME, TYPE1, TYPE2, TYPE3, CODE1, CODE2) \ -+ PROTO (NAME, TYPE1, (TYPE1 z0, TYPE1 z1, TYPE2 z2, TYPE2 z3, \ -+ TYPE3 x0, TYPE3 x1, \ -+ svbool_t p0, svbool_t p1)) \ -+ { \ -+ INVOKE (CODE1, CODE2); \ -+ return z0; \ -+ } -+ -+#define TEST_UNIFORM_ZX(NAME, ZTYPE, STYPE, CODE1, CODE2) \ -+ PROTO (NAME, ZTYPE, (ZTYPE z0, ZTYPE z1, ZTYPE z2, ZTYPE z3, \ -+ svbool_t p0, STYPE x0)) \ -+ { \ -+ INVOKE (CODE1, CODE2); \ -+ return z0; \ -+ } -+ -+#define TEST_UNIFORM_ZD(NAME, ZTYPE, STYPE, CODE1, CODE2) \ -+ PROTO (NAME, ZTYPE, (ZTYPE z0, ZTYPE z1, ZTYPE z2, ZTYPE z3, \ -+ svbool_t p0, STYPE d4)) \ -+ { \ -+ INVOKE (CODE1, CODE2); \ -+ return z0; \ -+ } -+ -+#define TEST_UNIFORM_PS(NAME, CODE1, CODE2) \ -+ PROTO (NAME, svbool_t, (svbool_t p0, svbool_t p1, \ -+ svbool_t p2, svbool_t p3, bool x0)) \ -+ { \ -+ INVOKE (CODE1, CODE2); \ -+ return p0; \ -+ } -+ -+#define TEST_DUAL_ZD(NAME, ZTYPE1, ZTYPE2, STYPE, CODE1, CODE2) \ -+ PROTO (NAME, ZTYPE1, (ZTYPE1 z0, ZTYPE1 z1, ZTYPE1 z2, \ -+ ZTYPE1 z3, ZTYPE2 z4, ZTYPE2 z5, \ -+ ZTYPE2 z6, STYPE d7, svbool_t p0, \ -+ svbool_t p1)) \ -+ { \ -+ INVOKE (CODE1, CODE2); \ -+ return z0; \ -+ } -+ -+#define TEST_DUAL_ZX(NAME, ZTYPE1, ZTYPE2, STYPE, CODE1, CODE2) \ -+ PROTO (NAME, ZTYPE1, (ZTYPE1 z0, ZTYPE1 z1, ZTYPE1 z2, \ -+ ZTYPE1 z3, ZTYPE2 z4, ZTYPE2 z5, \ -+ ZTYPE2 z6, ZTYPE2 z7, svbool_t p0, \ -+ svbool_t p1, STYPE x0)) \ -+ { \ -+ INVOKE (CODE1, CODE2); \ -+ return z0; \ -+ } -+ -+#define TEST_TYPE_CHANGE_ZX(NAME, ZTYPE1, ZTYPE2, STYPE, CODE1, CODE2) \ -+ PROTO (NAME, ZTYPE1, (ZTYPE2 z0, ZTYPE2 z1, ZTYPE2 z2, \ -+ ZTYPE2 z3, svbool_t p0, svbool_t p1, \ -+ STYPE x0)) \ -+ { \ -+ ZTYPE1 z0_res; \ -+ INVOKE (CODE1, CODE2); \ -+ return z0_res; \ -+ } -+ -+#define TEST_LOAD(NAME, ZTYPE, STYPE, CODE1, CODE2) \ -+ PROTO (NAME, ZTYPE, (svbool_t p0, const STYPE *x0, \ -+ intptr_t x1)) \ -+ { \ -+ ZTYPE z0; \ -+ INVOKE (CODE1, CODE2); \ -+ return z0; \ -+ } -+ -+#define TEST_LOAD_GATHER_SZ(NAME, RES_TYPE, STYPE, ZTYPE, CODE1, CODE2) \ -+ PROTO (NAME, RES_TYPE, (ZTYPE z0, ZTYPE z1, svbool_t p0, \ -+ const STYPE *x0)) \ -+ { \ -+ RES_TYPE z0_res; \ -+ INVOKE (CODE1, CODE2); \ -+ return z0_res; \ -+ } -+ -+#define TEST_LOAD_GATHER_ZS(NAME, RES_TYPE, ZTYPE, CODE1, CODE2) \ -+ PROTO (NAME, RES_TYPE, (ZTYPE z0, ZTYPE z1, svbool_t p0, \ -+ int64_t x0)) \ -+ { \ -+ RES_TYPE z0_res; \ -+ INVOKE (CODE1, CODE2); \ -+ return z0_res; \ -+ } -+ -+#define TEST_PREFETCH(NAME, STYPE, CODE1, CODE2) \ -+ PROTO (NAME, void, (svbool_t p0, const STYPE *x0, \ -+ intptr_t x1)) \ -+ { \ -+ INVOKE (CODE1, CODE2); \ -+ } -+ -+#define TEST_PREFETCH_GATHER_SZ(NAME, ZTYPE, CODE1, CODE2) \ -+ PROTO (NAME, void, (ZTYPE z0, ZTYPE z1, svbool_t p0, \ -+ const void *x0)) \ -+ { \ -+ INVOKE (CODE1, CODE2); \ -+ } -+ -+#define TEST_PREFETCH_GATHER_ZS(NAME, ZTYPE, CODE1, CODE2) \ -+ PROTO (NAME, void, (ZTYPE z0, ZTYPE z1, svbool_t p0, \ -+ int64_t x0)) \ -+ { \ -+ INVOKE (CODE1, CODE2); \ -+ } -+ -+#define TEST_STORE(NAME, ZTYPE, STYPE, CODE1, CODE2) \ -+ PROTO (NAME, void, (ZTYPE z0, svbool_t p0, STYPE *x0, \ -+ intptr_t x1)) \ -+ { \ -+ INVOKE (CODE1, CODE2); \ -+ } -+ -+#define TEST_STORE_SCATTER_SZ(NAME, DATA_TYPE, STYPE, ZTYPE, CODE1, CODE2) \ -+ PROTO (NAME, void, (DATA_TYPE z0, ZTYPE z1, svbool_t p0, \ -+ STYPE *x0)) \ -+ { \ -+ INVOKE (CODE1, CODE2); \ -+ } -+ -+#define TEST_STORE_SCATTER_ZS(NAME, DATA_TYPE, ZTYPE, CODE1, CODE2) \ -+ PROTO (NAME, void, (DATA_TYPE z0, ZTYPE z1, svbool_t p0, \ -+ int64_t x0)) \ -+ { \ -+ INVOKE (CODE1, CODE2); \ -+ } -+ -+#define TEST_P(NAME, CODE1, CODE2) \ -+ PROTO (NAME, svbool_t, (void)) \ -+ { \ -+ svbool_t p0; \ -+ INVOKE (CODE1, CODE2); \ -+ return p0; \ -+ } -+ -+#define TEST_PTEST(NAME, TYPE, CODE) \ -+ PROTO (NAME, TYPE, (svbool_t p0, svbool_t p1, svbool_t p2, \ -+ svbool_t p3, TYPE x0, TYPE x1)) \ -+ { \ -+ INVOKE (CODE, CODE); \ -+ return x0; \ -+ } -+ -+#define TEST_COMPARE_S(NAME, TYPE, CODE1, CODE2) \ -+ PROTO (NAME, svbool_t, (TYPE x0, TYPE x1)) \ -+ { \ -+ svbool_t p0; \ -+ INVOKE (CODE1, CODE2); \ -+ return p0; \ -+ } -+ -+#define TEST_COMPARE_Z(NAME, TYPE, CODE1, CODE2) \ -+ PROTO (NAME, svbool_t, (TYPE z0, TYPE z1, \ -+ svbool_t p0, svbool_t p1)) \ -+ { \ -+ INVOKE (CODE1, CODE2); \ -+ return p0; \ -+ } -+ -+#define TEST_COMPARE_ZX(NAME, ZTYPE, STYPE, CODE1, CODE2) \ -+ PROTO (NAME, svbool_t, (ZTYPE z0, ZTYPE z1, svbool_t p0, \ -+ svbool_t p1, STYPE x0)) \ -+ { \ -+ INVOKE (CODE1, CODE2); \ -+ return p0; \ -+ } -+ -+#define TEST_COMPARE_ZD(NAME, ZTYPE, STYPE, CODE1, CODE2) \ -+ PROTO (NAME, svbool_t, (ZTYPE z0, ZTYPE z1, ZTYPE z2, \ -+ ZTYPE z3, svbool_t p0, svbool_t p1, \ -+ STYPE d4)) \ -+ { \ -+ INVOKE (CODE1, CODE2); \ -+ return p0; \ -+ } -+ -+#define TEST_COMPARE_DUAL_Z(NAME, TYPE1, TYPE2, CODE1, CODE2) \ -+ PROTO (NAME, svbool_t, (TYPE1 z0, TYPE2 z1, \ -+ svbool_t p0, svbool_t p1)) \ -+ { \ -+ INVOKE (CODE1, CODE2); \ -+ return p0; \ -+ } -+ -+#define TEST_REDUCTION_X(NAME, STYPE, ZTYPE, CODE1, CODE2) \ -+ PROTO (NAME, STYPE, (ZTYPE z0, ZTYPE z1, svbool_t p0)) \ -+ { \ -+ STYPE x0; \ -+ INVOKE (CODE1, CODE2); \ -+ return x0; \ -+ } -+ -+#define TEST_REDUCTION_D(NAME, STYPE, ZTYPE, CODE1, CODE2) \ -+ PROTO (NAME, STYPE, (ZTYPE z0, ZTYPE z1, svbool_t p0)) \ -+ { \ -+ STYPE d0; \ -+ INVOKE (CODE1, CODE2); \ -+ return d0; \ -+ } -+ -+#define TEST_FOLD_LEFT_D(NAME, STYPE, ZTYPE, CODE1, CODE2) \ -+ PROTO (NAME, STYPE, (STYPE d0, STYPE d1, ZTYPE z2, \ -+ svbool_t p0)) \ -+ { \ -+ INVOKE (CODE1, CODE2); \ -+ return d0; \ -+ } -+ -+#define TEST_FOLD_LEFT_X(NAME, STYPE, ZTYPE, CODE1, CODE2) \ -+ PROTO (NAME, STYPE, (STYPE x0, STYPE x1, ZTYPE z0, \ -+ svbool_t p0)) \ -+ { \ -+ INVOKE (CODE1, CODE2); \ -+ return x0; \ -+ } -+ -+#define TEST_S(NAME, ZTYPE, STYPE, CODE) \ -+ PROTO (NAME, ZTYPE, (STYPE x0, STYPE x1)) \ -+ { \ -+ ZTYPE z0; \ -+ CODE; \ -+ return z0; \ -+ } -+ -+#define TEST_ADR(NAME, TYPE1, TYPE2, CODE1, CODE2) \ -+ PROTO (NAME, TYPE1, (TYPE1 z0, TYPE2 z1)) \ -+ { \ -+ INVOKE (CODE1, CODE2); \ -+ return z0; \ -+ } -+ -+#define TEST_UNDEF(NAME, TYPE, CODE) \ -+ PROTO (NAME, TYPE, (void)) \ -+ { \ -+ TYPE z0; \ -+ CODE; \ -+ return z0; \ -+ } -+ -+#define TEST_CREATE(NAME, TTYPE, ZTYPE, CODE1, CODE2) \ -+ PROTO (NAME, TTYPE, (ZTYPE unused0, ZTYPE unused1, \ -+ ZTYPE unused2, ZTYPE unused3, \ -+ ZTYPE z4, ZTYPE z5, ZTYPE z6, ZTYPE z7)) \ -+ { \ -+ TTYPE z0; \ -+ INVOKE (CODE1, CODE2); \ -+ return z0; \ -+ } -+ -+#define TEST_GET(NAME, TTYPE, ZTYPE, CODE1, CODE2) \ -+ PROTO (NAME, void, (ZTYPE unused0, ZTYPE unused1, \ -+ ZTYPE unused2, ZTYPE unused3, TTYPE z4)) \ -+ { \ -+ register ZTYPE z0 __asm ("z0"); \ -+ register ZTYPE z4_res __asm ("z4"); \ -+ register ZTYPE z5_res __asm ("z5"); \ -+ register ZTYPE z6_res __asm ("z6"); \ -+ register ZTYPE z7_res __asm ("z7"); \ -+ INVOKE (CODE1, CODE2); \ -+ __asm volatile ("" :: "w" (z0), "w" (z4_res), "w" (z5_res), \ -+ "w" (z6_res), "w" (z7_res)); \ -+ } -+ -+#define TEST_SET(NAME, TTYPE, ZTYPE, CODE1, CODE2) \ -+ PROTO (NAME, void, (ZTYPE z0, ZTYPE z1, ZTYPE z2, ZTYPE z3, \ -+ TTYPE z4)) \ -+ { \ -+ register TTYPE z24 __asm ("z24"); \ -+ INVOKE (CODE1, CODE2); \ -+ __asm volatile ("" :: "w" (z4), "w" (z24)); \ -+ } -+ -+#define TEST_TBL2(NAME, TTYPE, ZTYPE, UTYPE, CODE1, CODE2) \ -+ PROTO (NAME, ZTYPE, (TTYPE z0, TTYPE z2, UTYPE z4)) \ -+ { \ -+ register ZTYPE z0_res __asm ("z0"); \ -+ INVOKE (CODE1, CODE2); \ -+ return z0_res; \ -+ } -+ -+#define TEST_TBL2_REV(NAME, TTYPE, ZTYPE, UTYPE, CODE1, CODE2) \ -+ PROTO (NAME, ZTYPE, (UTYPE z0, TTYPE z1, TTYPE z3)) \ -+ { \ -+ register ZTYPE z0_res __asm ("z0"); \ -+ INVOKE (CODE1, CODE2); \ -+ return z0_res; \ -+ } -+ -+#endif -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tmad_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tmad_f16.c -new file mode 100644 -index 000000000..3a00716e3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tmad_f16.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** tmad_0_f16_tied1: -+** ftmad z0\.h, z0\.h, z1\.h, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (tmad_0_f16_tied1, svfloat16_t, -+ z0 = svtmad_f16 (z0, z1, 0), -+ z0 = svtmad (z0, z1, 0)) -+ -+/* -+** tmad_0_f16_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** ftmad z0\.h, z0\.h, \1\.h, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (tmad_0_f16_tied2, svfloat16_t, -+ z0 = svtmad_f16 (z1, z0, 0), -+ z0 = svtmad (z1, z0, 0)) -+ -+/* -+** tmad_0_f16_untied: -+** movprfx z0, z1 -+** ftmad z0\.h, z0\.h, z2\.h, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (tmad_0_f16_untied, svfloat16_t, -+ z0 = svtmad_f16 (z1, z2, 0), -+ z0 = svtmad (z1, z2, 0)) -+ -+/* -+** tmad_1_f16: -+** ftmad z0\.h, z0\.h, z1\.h, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (tmad_1_f16, svfloat16_t, -+ z0 = svtmad_f16 (z0, z1, 1), -+ z0 = svtmad (z0, z1, 1)) -+ -+/* -+** tmad_2_f16: -+** ftmad z0\.h, z0\.h, z1\.h, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (tmad_2_f16, svfloat16_t, -+ z0 = svtmad_f16 (z0, z1, 2), -+ z0 = svtmad (z0, z1, 2)) -+ -+/* -+** tmad_3_f16: -+** ftmad z0\.h, z0\.h, z1\.h, #3 -+** ret -+*/ -+TEST_UNIFORM_Z (tmad_3_f16, svfloat16_t, -+ z0 = svtmad_f16 (z0, z1, 3), -+ z0 = svtmad (z0, z1, 3)) -+ -+/* -+** tmad_4_f16: -+** ftmad z0\.h, z0\.h, z1\.h, #4 -+** ret -+*/ -+TEST_UNIFORM_Z (tmad_4_f16, svfloat16_t, -+ z0 = svtmad_f16 (z0, z1, 4), -+ z0 = svtmad (z0, z1, 4)) -+ -+/* -+** tmad_5_f16: -+** ftmad z0\.h, z0\.h, z1\.h, #5 -+** ret -+*/ -+TEST_UNIFORM_Z (tmad_5_f16, svfloat16_t, -+ z0 = svtmad_f16 (z0, z1, 5), -+ z0 = svtmad (z0, z1, 5)) -+ -+/* -+** tmad_6_f16: -+** ftmad z0\.h, z0\.h, z1\.h, #6 -+** ret -+*/ -+TEST_UNIFORM_Z (tmad_6_f16, svfloat16_t, -+ z0 = svtmad_f16 (z0, z1, 6), -+ z0 = svtmad (z0, z1, 6)) -+ -+/* -+** tmad_7_f16: -+** ftmad z0\.h, z0\.h, z1\.h, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (tmad_7_f16, svfloat16_t, -+ z0 = svtmad_f16 (z0, z1, 7), -+ z0 = svtmad (z0, z1, 7)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tmad_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tmad_f32.c -new file mode 100644 -index 000000000..b73d420fb ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tmad_f32.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** tmad_0_f32_tied1: -+** ftmad z0\.s, z0\.s, z1\.s, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (tmad_0_f32_tied1, svfloat32_t, -+ z0 = svtmad_f32 (z0, z1, 0), -+ z0 = svtmad (z0, z1, 0)) -+ -+/* -+** tmad_0_f32_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z1 -+** ftmad z0\.s, z0\.s, \1\.s, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (tmad_0_f32_tied2, svfloat32_t, -+ z0 = svtmad_f32 (z1, z0, 0), -+ z0 = svtmad (z1, z0, 0)) -+ -+/* -+** tmad_0_f32_untied: -+** movprfx z0, z1 -+** ftmad z0\.s, z0\.s, z2\.s, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (tmad_0_f32_untied, svfloat32_t, -+ z0 = svtmad_f32 (z1, z2, 0), -+ z0 = svtmad (z1, z2, 0)) -+ -+/* -+** tmad_1_f32: -+** ftmad z0\.s, z0\.s, z1\.s, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (tmad_1_f32, svfloat32_t, -+ z0 = svtmad_f32 (z0, z1, 1), -+ z0 = svtmad (z0, z1, 1)) -+ -+/* -+** tmad_2_f32: -+** ftmad z0\.s, z0\.s, z1\.s, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (tmad_2_f32, svfloat32_t, -+ z0 = svtmad_f32 (z0, z1, 2), -+ z0 = svtmad (z0, z1, 2)) -+ -+/* -+** tmad_3_f32: -+** ftmad z0\.s, z0\.s, z1\.s, #3 -+** ret -+*/ -+TEST_UNIFORM_Z (tmad_3_f32, svfloat32_t, -+ z0 = svtmad_f32 (z0, z1, 3), -+ z0 = svtmad (z0, z1, 3)) -+ -+/* -+** tmad_4_f32: -+** ftmad z0\.s, z0\.s, z1\.s, #4 -+** ret -+*/ -+TEST_UNIFORM_Z (tmad_4_f32, svfloat32_t, -+ z0 = svtmad_f32 (z0, z1, 4), -+ z0 = svtmad (z0, z1, 4)) -+ -+/* -+** tmad_5_f32: -+** ftmad z0\.s, z0\.s, z1\.s, #5 -+** ret -+*/ -+TEST_UNIFORM_Z (tmad_5_f32, svfloat32_t, -+ z0 = svtmad_f32 (z0, z1, 5), -+ z0 = svtmad (z0, z1, 5)) -+ -+/* -+** tmad_6_f32: -+** ftmad z0\.s, z0\.s, z1\.s, #6 -+** ret -+*/ -+TEST_UNIFORM_Z (tmad_6_f32, svfloat32_t, -+ z0 = svtmad_f32 (z0, z1, 6), -+ z0 = svtmad (z0, z1, 6)) -+ -+/* -+** tmad_7_f32: -+** ftmad z0\.s, z0\.s, z1\.s, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (tmad_7_f32, svfloat32_t, -+ z0 = svtmad_f32 (z0, z1, 7), -+ z0 = svtmad (z0, z1, 7)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tmad_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tmad_f64.c -new file mode 100644 -index 000000000..fc31928a6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tmad_f64.c -@@ -0,0 +1,96 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** tmad_0_f64_tied1: -+** ftmad z0\.d, z0\.d, z1\.d, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (tmad_0_f64_tied1, svfloat64_t, -+ z0 = svtmad_f64 (z0, z1, 0), -+ z0 = svtmad (z0, z1, 0)) -+ -+/* -+** tmad_0_f64_tied2: -+** mov (z[0-9]+\.d), z0\.d -+** movprfx z0, z1 -+** ftmad z0\.d, z0\.d, \1, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (tmad_0_f64_tied2, svfloat64_t, -+ z0 = svtmad_f64 (z1, z0, 0), -+ z0 = svtmad (z1, z0, 0)) -+ -+/* -+** tmad_0_f64_untied: -+** movprfx z0, z1 -+** ftmad z0\.d, z0\.d, z2\.d, #0 -+** ret -+*/ -+TEST_UNIFORM_Z (tmad_0_f64_untied, svfloat64_t, -+ z0 = svtmad_f64 (z1, z2, 0), -+ z0 = svtmad (z1, z2, 0)) -+ -+/* -+** tmad_1_f64: -+** ftmad z0\.d, z0\.d, z1\.d, #1 -+** ret -+*/ -+TEST_UNIFORM_Z (tmad_1_f64, svfloat64_t, -+ z0 = svtmad_f64 (z0, z1, 1), -+ z0 = svtmad (z0, z1, 1)) -+ -+/* -+** tmad_2_f64: -+** ftmad z0\.d, z0\.d, z1\.d, #2 -+** ret -+*/ -+TEST_UNIFORM_Z (tmad_2_f64, svfloat64_t, -+ z0 = svtmad_f64 (z0, z1, 2), -+ z0 = svtmad (z0, z1, 2)) -+ -+/* -+** tmad_3_f64: -+** ftmad z0\.d, z0\.d, z1\.d, #3 -+** ret -+*/ -+TEST_UNIFORM_Z (tmad_3_f64, svfloat64_t, -+ z0 = svtmad_f64 (z0, z1, 3), -+ z0 = svtmad (z0, z1, 3)) -+ -+/* -+** tmad_4_f64: -+** ftmad z0\.d, z0\.d, z1\.d, #4 -+** ret -+*/ -+TEST_UNIFORM_Z (tmad_4_f64, svfloat64_t, -+ z0 = svtmad_f64 (z0, z1, 4), -+ z0 = svtmad (z0, z1, 4)) -+ -+/* -+** tmad_5_f64: -+** ftmad z0\.d, z0\.d, z1\.d, #5 -+** ret -+*/ -+TEST_UNIFORM_Z (tmad_5_f64, svfloat64_t, -+ z0 = svtmad_f64 (z0, z1, 5), -+ z0 = svtmad (z0, z1, 5)) -+ -+/* -+** tmad_6_f64: -+** ftmad z0\.d, z0\.d, z1\.d, #6 -+** ret -+*/ -+TEST_UNIFORM_Z (tmad_6_f64, svfloat64_t, -+ z0 = svtmad_f64 (z0, z1, 6), -+ z0 = svtmad (z0, z1, 6)) -+ -+/* -+** tmad_7_f64: -+** ftmad z0\.d, z0\.d, z1\.d, #7 -+** ret -+*/ -+TEST_UNIFORM_Z (tmad_7_f64, svfloat64_t, -+ z0 = svtmad_f64 (z0, z1, 7), -+ z0 = svtmad (z0, z1, 7)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_b16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_b16.c -new file mode 100644 -index 000000000..902f8c397 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_b16.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn1_b16_tied1: -+** trn1 p0\.h, p0\.h, p1\.h -+** ret -+*/ -+TEST_UNIFORM_P (trn1_b16_tied1, -+ p0 = svtrn1_b16 (p0, p1), -+ p0 = svtrn1_b16 (p0, p1)) -+ -+/* -+** trn1_b16_tied2: -+** trn1 p0\.h, p1\.h, p0\.h -+** ret -+*/ -+TEST_UNIFORM_P (trn1_b16_tied2, -+ p0 = svtrn1_b16 (p1, p0), -+ p0 = svtrn1_b16 (p1, p0)) -+ -+/* -+** trn1_b16_untied: -+** trn1 p0\.h, p1\.h, p2\.h -+** ret -+*/ -+TEST_UNIFORM_P (trn1_b16_untied, -+ p0 = svtrn1_b16 (p1, p2), -+ p0 = svtrn1_b16 (p1, p2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_b32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_b32.c -new file mode 100644 -index 000000000..8c9ed5152 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_b32.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn1_b32_tied1: -+** trn1 p0\.s, p0\.s, p1\.s -+** ret -+*/ -+TEST_UNIFORM_P (trn1_b32_tied1, -+ p0 = svtrn1_b32 (p0, p1), -+ p0 = svtrn1_b32 (p0, p1)) -+ -+/* -+** trn1_b32_tied2: -+** trn1 p0\.s, p1\.s, p0\.s -+** ret -+*/ -+TEST_UNIFORM_P (trn1_b32_tied2, -+ p0 = svtrn1_b32 (p1, p0), -+ p0 = svtrn1_b32 (p1, p0)) -+ -+/* -+** trn1_b32_untied: -+** trn1 p0\.s, p1\.s, p2\.s -+** ret -+*/ -+TEST_UNIFORM_P (trn1_b32_untied, -+ p0 = svtrn1_b32 (p1, p2), -+ p0 = svtrn1_b32 (p1, p2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_b64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_b64.c -new file mode 100644 -index 000000000..55b00571d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_b64.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn1_b64_tied1: -+** trn1 p0\.d, p0\.d, p1\.d -+** ret -+*/ -+TEST_UNIFORM_P (trn1_b64_tied1, -+ p0 = svtrn1_b64 (p0, p1), -+ p0 = svtrn1_b64 (p0, p1)) -+ -+/* -+** trn1_b64_tied2: -+** trn1 p0\.d, p1\.d, p0\.d -+** ret -+*/ -+TEST_UNIFORM_P (trn1_b64_tied2, -+ p0 = svtrn1_b64 (p1, p0), -+ p0 = svtrn1_b64 (p1, p0)) -+ -+/* -+** trn1_b64_untied: -+** trn1 p0\.d, p1\.d, p2\.d -+** ret -+*/ -+TEST_UNIFORM_P (trn1_b64_untied, -+ p0 = svtrn1_b64 (p1, p2), -+ p0 = svtrn1_b64 (p1, p2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_b8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_b8.c -new file mode 100644 -index 000000000..4b5e80fbe ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_b8.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn1_b8_tied1: -+** trn1 p0\.b, p0\.b, p1\.b -+** ret -+*/ -+TEST_UNIFORM_P (trn1_b8_tied1, -+ p0 = svtrn1_b8 (p0, p1), -+ p0 = svtrn1_b8 (p0, p1)) -+ -+/* -+** trn1_b8_tied2: -+** trn1 p0\.b, p1\.b, p0\.b -+** ret -+*/ -+TEST_UNIFORM_P (trn1_b8_tied2, -+ p0 = svtrn1_b8 (p1, p0), -+ p0 = svtrn1_b8 (p1, p0)) -+ -+/* -+** trn1_b8_untied: -+** trn1 p0\.b, p1\.b, p2\.b -+** ret -+*/ -+TEST_UNIFORM_P (trn1_b8_untied, -+ p0 = svtrn1_b8 (p1, p2), -+ p0 = svtrn1_b8 (p1, p2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_bf16.c -new file mode 100644 -index 000000000..b04c7da4f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_bf16.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn1_bf16_tied1: -+** trn1 z0\.h, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (trn1_bf16_tied1, svbfloat16_t, -+ z0 = svtrn1_bf16 (z0, z1), -+ z0 = svtrn1 (z0, z1)) -+ -+/* -+** trn1_bf16_tied2: -+** trn1 z0\.h, z1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (trn1_bf16_tied2, svbfloat16_t, -+ z0 = svtrn1_bf16 (z1, z0), -+ z0 = svtrn1 (z1, z0)) -+ -+/* -+** trn1_bf16_untied: -+** trn1 z0\.h, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (trn1_bf16_untied, svbfloat16_t, -+ z0 = svtrn1_bf16 (z1, z2), -+ z0 = svtrn1 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_f16.c -new file mode 100644 -index 000000000..373eb9dd9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_f16.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn1_f16_tied1: -+** trn1 z0\.h, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (trn1_f16_tied1, svfloat16_t, -+ z0 = svtrn1_f16 (z0, z1), -+ z0 = svtrn1 (z0, z1)) -+ -+/* -+** trn1_f16_tied2: -+** trn1 z0\.h, z1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (trn1_f16_tied2, svfloat16_t, -+ z0 = svtrn1_f16 (z1, z0), -+ z0 = svtrn1 (z1, z0)) -+ -+/* -+** trn1_f16_untied: -+** trn1 z0\.h, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (trn1_f16_untied, svfloat16_t, -+ z0 = svtrn1_f16 (z1, z2), -+ z0 = svtrn1 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_f32.c -new file mode 100644 -index 000000000..ccd84d94e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_f32.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn1_f32_tied1: -+** trn1 z0\.s, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (trn1_f32_tied1, svfloat32_t, -+ z0 = svtrn1_f32 (z0, z1), -+ z0 = svtrn1 (z0, z1)) -+ -+/* -+** trn1_f32_tied2: -+** trn1 z0\.s, z1\.s, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (trn1_f32_tied2, svfloat32_t, -+ z0 = svtrn1_f32 (z1, z0), -+ z0 = svtrn1 (z1, z0)) -+ -+/* -+** trn1_f32_untied: -+** trn1 z0\.s, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (trn1_f32_untied, svfloat32_t, -+ z0 = svtrn1_f32 (z1, z2), -+ z0 = svtrn1 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_f64.c -new file mode 100644 -index 000000000..d3cc51948 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_f64.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn1_f64_tied1: -+** trn1 z0\.d, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (trn1_f64_tied1, svfloat64_t, -+ z0 = svtrn1_f64 (z0, z1), -+ z0 = svtrn1 (z0, z1)) -+ -+/* -+** trn1_f64_tied2: -+** trn1 z0\.d, z1\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (trn1_f64_tied2, svfloat64_t, -+ z0 = svtrn1_f64 (z1, z0), -+ z0 = svtrn1 (z1, z0)) -+ -+/* -+** trn1_f64_untied: -+** trn1 z0\.d, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (trn1_f64_untied, svfloat64_t, -+ z0 = svtrn1_f64 (z1, z2), -+ z0 = svtrn1 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_s16.c -new file mode 100644 -index 000000000..466bb8c02 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_s16.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn1_s16_tied1: -+** trn1 z0\.h, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (trn1_s16_tied1, svint16_t, -+ z0 = svtrn1_s16 (z0, z1), -+ z0 = svtrn1 (z0, z1)) -+ -+/* -+** trn1_s16_tied2: -+** trn1 z0\.h, z1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (trn1_s16_tied2, svint16_t, -+ z0 = svtrn1_s16 (z1, z0), -+ z0 = svtrn1 (z1, z0)) -+ -+/* -+** trn1_s16_untied: -+** trn1 z0\.h, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (trn1_s16_untied, svint16_t, -+ z0 = svtrn1_s16 (z1, z2), -+ z0 = svtrn1 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_s32.c -new file mode 100644 -index 000000000..24655e622 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_s32.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn1_s32_tied1: -+** trn1 z0\.s, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (trn1_s32_tied1, svint32_t, -+ z0 = svtrn1_s32 (z0, z1), -+ z0 = svtrn1 (z0, z1)) -+ -+/* -+** trn1_s32_tied2: -+** trn1 z0\.s, z1\.s, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (trn1_s32_tied2, svint32_t, -+ z0 = svtrn1_s32 (z1, z0), -+ z0 = svtrn1 (z1, z0)) -+ -+/* -+** trn1_s32_untied: -+** trn1 z0\.s, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (trn1_s32_untied, svint32_t, -+ z0 = svtrn1_s32 (z1, z2), -+ z0 = svtrn1 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_s64.c -new file mode 100644 -index 000000000..553fb610b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_s64.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn1_s64_tied1: -+** trn1 z0\.d, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (trn1_s64_tied1, svint64_t, -+ z0 = svtrn1_s64 (z0, z1), -+ z0 = svtrn1 (z0, z1)) -+ -+/* -+** trn1_s64_tied2: -+** trn1 z0\.d, z1\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (trn1_s64_tied2, svint64_t, -+ z0 = svtrn1_s64 (z1, z0), -+ z0 = svtrn1 (z1, z0)) -+ -+/* -+** trn1_s64_untied: -+** trn1 z0\.d, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (trn1_s64_untied, svint64_t, -+ z0 = svtrn1_s64 (z1, z2), -+ z0 = svtrn1 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_s8.c -new file mode 100644 -index 000000000..1fa150792 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_s8.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn1_s8_tied1: -+** trn1 z0\.b, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (trn1_s8_tied1, svint8_t, -+ z0 = svtrn1_s8 (z0, z1), -+ z0 = svtrn1 (z0, z1)) -+ -+/* -+** trn1_s8_tied2: -+** trn1 z0\.b, z1\.b, z0\.b -+** ret -+*/ -+TEST_UNIFORM_Z (trn1_s8_tied2, svint8_t, -+ z0 = svtrn1_s8 (z1, z0), -+ z0 = svtrn1 (z1, z0)) -+ -+/* -+** trn1_s8_untied: -+** trn1 z0\.b, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (trn1_s8_untied, svint8_t, -+ z0 = svtrn1_s8 (z1, z2), -+ z0 = svtrn1 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_u16.c -new file mode 100644 -index 000000000..a3ce936f3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_u16.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn1_u16_tied1: -+** trn1 z0\.h, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (trn1_u16_tied1, svuint16_t, -+ z0 = svtrn1_u16 (z0, z1), -+ z0 = svtrn1 (z0, z1)) -+ -+/* -+** trn1_u16_tied2: -+** trn1 z0\.h, z1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (trn1_u16_tied2, svuint16_t, -+ z0 = svtrn1_u16 (z1, z0), -+ z0 = svtrn1 (z1, z0)) -+ -+/* -+** trn1_u16_untied: -+** trn1 z0\.h, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (trn1_u16_untied, svuint16_t, -+ z0 = svtrn1_u16 (z1, z2), -+ z0 = svtrn1 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_u32.c -new file mode 100644 -index 000000000..b14d7a67a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_u32.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn1_u32_tied1: -+** trn1 z0\.s, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (trn1_u32_tied1, svuint32_t, -+ z0 = svtrn1_u32 (z0, z1), -+ z0 = svtrn1 (z0, z1)) -+ -+/* -+** trn1_u32_tied2: -+** trn1 z0\.s, z1\.s, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (trn1_u32_tied2, svuint32_t, -+ z0 = svtrn1_u32 (z1, z0), -+ z0 = svtrn1 (z1, z0)) -+ -+/* -+** trn1_u32_untied: -+** trn1 z0\.s, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (trn1_u32_untied, svuint32_t, -+ z0 = svtrn1_u32 (z1, z2), -+ z0 = svtrn1 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_u64.c -new file mode 100644 -index 000000000..2ccda1d72 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_u64.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn1_u64_tied1: -+** trn1 z0\.d, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (trn1_u64_tied1, svuint64_t, -+ z0 = svtrn1_u64 (z0, z1), -+ z0 = svtrn1 (z0, z1)) -+ -+/* -+** trn1_u64_tied2: -+** trn1 z0\.d, z1\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (trn1_u64_tied2, svuint64_t, -+ z0 = svtrn1_u64 (z1, z0), -+ z0 = svtrn1 (z1, z0)) -+ -+/* -+** trn1_u64_untied: -+** trn1 z0\.d, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (trn1_u64_untied, svuint64_t, -+ z0 = svtrn1_u64 (z1, z2), -+ z0 = svtrn1 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_u8.c -new file mode 100644 -index 000000000..84f8d31e8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1_u8.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn1_u8_tied1: -+** trn1 z0\.b, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (trn1_u8_tied1, svuint8_t, -+ z0 = svtrn1_u8 (z0, z1), -+ z0 = svtrn1 (z0, z1)) -+ -+/* -+** trn1_u8_tied2: -+** trn1 z0\.b, z1\.b, z0\.b -+** ret -+*/ -+TEST_UNIFORM_Z (trn1_u8_tied2, svuint8_t, -+ z0 = svtrn1_u8 (z1, z0), -+ z0 = svtrn1 (z1, z0)) -+ -+/* -+** trn1_u8_untied: -+** trn1 z0\.b, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (trn1_u8_untied, svuint8_t, -+ z0 = svtrn1_u8 (z1, z2), -+ z0 = svtrn1 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_bf16.c -new file mode 100644 -index 000000000..f1810da9e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_bf16.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn1q_bf16_tied1: -+** trn1 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn1q_bf16_tied1, svbfloat16_t, -+ z0 = svtrn1q_bf16 (z0, z1), -+ z0 = svtrn1q (z0, z1)) -+ -+/* -+** trn1q_bf16_tied2: -+** trn1 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn1q_bf16_tied2, svbfloat16_t, -+ z0 = svtrn1q_bf16 (z1, z0), -+ z0 = svtrn1q (z1, z0)) -+ -+/* -+** trn1q_bf16_untied: -+** trn1 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn1q_bf16_untied, svbfloat16_t, -+ z0 = svtrn1q_bf16 (z1, z2), -+ z0 = svtrn1q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_f16.c -new file mode 100644 -index 000000000..6420d0f0a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_f16.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn1q_f16_tied1: -+** trn1 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn1q_f16_tied1, svfloat16_t, -+ z0 = svtrn1q_f16 (z0, z1), -+ z0 = svtrn1q (z0, z1)) -+ -+/* -+** trn1q_f16_tied2: -+** trn1 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn1q_f16_tied2, svfloat16_t, -+ z0 = svtrn1q_f16 (z1, z0), -+ z0 = svtrn1q (z1, z0)) -+ -+/* -+** trn1q_f16_untied: -+** trn1 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn1q_f16_untied, svfloat16_t, -+ z0 = svtrn1q_f16 (z1, z2), -+ z0 = svtrn1q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_f32.c -new file mode 100644 -index 000000000..6fb2eecf5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_f32.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn1q_f32_tied1: -+** trn1 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn1q_f32_tied1, svfloat32_t, -+ z0 = svtrn1q_f32 (z0, z1), -+ z0 = svtrn1q (z0, z1)) -+ -+/* -+** trn1q_f32_tied2: -+** trn1 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn1q_f32_tied2, svfloat32_t, -+ z0 = svtrn1q_f32 (z1, z0), -+ z0 = svtrn1q (z1, z0)) -+ -+/* -+** trn1q_f32_untied: -+** trn1 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn1q_f32_untied, svfloat32_t, -+ z0 = svtrn1q_f32 (z1, z2), -+ z0 = svtrn1q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_f64.c -new file mode 100644 -index 000000000..e786a8d04 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_f64.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn1q_f64_tied1: -+** trn1 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn1q_f64_tied1, svfloat64_t, -+ z0 = svtrn1q_f64 (z0, z1), -+ z0 = svtrn1q (z0, z1)) -+ -+/* -+** trn1q_f64_tied2: -+** trn1 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn1q_f64_tied2, svfloat64_t, -+ z0 = svtrn1q_f64 (z1, z0), -+ z0 = svtrn1q (z1, z0)) -+ -+/* -+** trn1q_f64_untied: -+** trn1 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn1q_f64_untied, svfloat64_t, -+ z0 = svtrn1q_f64 (z1, z2), -+ z0 = svtrn1q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_s16.c -new file mode 100644 -index 000000000..548360719 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_s16.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn1q_s16_tied1: -+** trn1 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn1q_s16_tied1, svint16_t, -+ z0 = svtrn1q_s16 (z0, z1), -+ z0 = svtrn1q (z0, z1)) -+ -+/* -+** trn1q_s16_tied2: -+** trn1 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn1q_s16_tied2, svint16_t, -+ z0 = svtrn1q_s16 (z1, z0), -+ z0 = svtrn1q (z1, z0)) -+ -+/* -+** trn1q_s16_untied: -+** trn1 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn1q_s16_untied, svint16_t, -+ z0 = svtrn1q_s16 (z1, z2), -+ z0 = svtrn1q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_s32.c -new file mode 100644 -index 000000000..ccb8319f7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_s32.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn1q_s32_tied1: -+** trn1 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn1q_s32_tied1, svint32_t, -+ z0 = svtrn1q_s32 (z0, z1), -+ z0 = svtrn1q (z0, z1)) -+ -+/* -+** trn1q_s32_tied2: -+** trn1 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn1q_s32_tied2, svint32_t, -+ z0 = svtrn1q_s32 (z1, z0), -+ z0 = svtrn1q (z1, z0)) -+ -+/* -+** trn1q_s32_untied: -+** trn1 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn1q_s32_untied, svint32_t, -+ z0 = svtrn1q_s32 (z1, z2), -+ z0 = svtrn1q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_s64.c -new file mode 100644 -index 000000000..fe8125a8a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_s64.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn1q_s64_tied1: -+** trn1 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn1q_s64_tied1, svint64_t, -+ z0 = svtrn1q_s64 (z0, z1), -+ z0 = svtrn1q (z0, z1)) -+ -+/* -+** trn1q_s64_tied2: -+** trn1 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn1q_s64_tied2, svint64_t, -+ z0 = svtrn1q_s64 (z1, z0), -+ z0 = svtrn1q (z1, z0)) -+ -+/* -+** trn1q_s64_untied: -+** trn1 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn1q_s64_untied, svint64_t, -+ z0 = svtrn1q_s64 (z1, z2), -+ z0 = svtrn1q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_s8.c -new file mode 100644 -index 000000000..48040c1ad ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_s8.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn1q_s8_tied1: -+** trn1 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn1q_s8_tied1, svint8_t, -+ z0 = svtrn1q_s8 (z0, z1), -+ z0 = svtrn1q (z0, z1)) -+ -+/* -+** trn1q_s8_tied2: -+** trn1 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn1q_s8_tied2, svint8_t, -+ z0 = svtrn1q_s8 (z1, z0), -+ z0 = svtrn1q (z1, z0)) -+ -+/* -+** trn1q_s8_untied: -+** trn1 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn1q_s8_untied, svint8_t, -+ z0 = svtrn1q_s8 (z1, z2), -+ z0 = svtrn1q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_u16.c -new file mode 100644 -index 000000000..3657f919e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_u16.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn1q_u16_tied1: -+** trn1 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn1q_u16_tied1, svuint16_t, -+ z0 = svtrn1q_u16 (z0, z1), -+ z0 = svtrn1q (z0, z1)) -+ -+/* -+** trn1q_u16_tied2: -+** trn1 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn1q_u16_tied2, svuint16_t, -+ z0 = svtrn1q_u16 (z1, z0), -+ z0 = svtrn1q (z1, z0)) -+ -+/* -+** trn1q_u16_untied: -+** trn1 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn1q_u16_untied, svuint16_t, -+ z0 = svtrn1q_u16 (z1, z2), -+ z0 = svtrn1q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_u32.c -new file mode 100644 -index 000000000..cc5ea2878 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_u32.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn1q_u32_tied1: -+** trn1 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn1q_u32_tied1, svuint32_t, -+ z0 = svtrn1q_u32 (z0, z1), -+ z0 = svtrn1q (z0, z1)) -+ -+/* -+** trn1q_u32_tied2: -+** trn1 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn1q_u32_tied2, svuint32_t, -+ z0 = svtrn1q_u32 (z1, z0), -+ z0 = svtrn1q (z1, z0)) -+ -+/* -+** trn1q_u32_untied: -+** trn1 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn1q_u32_untied, svuint32_t, -+ z0 = svtrn1q_u32 (z1, z2), -+ z0 = svtrn1q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_u64.c -new file mode 100644 -index 000000000..4435b53d0 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_u64.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn1q_u64_tied1: -+** trn1 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn1q_u64_tied1, svuint64_t, -+ z0 = svtrn1q_u64 (z0, z1), -+ z0 = svtrn1q (z0, z1)) -+ -+/* -+** trn1q_u64_tied2: -+** trn1 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn1q_u64_tied2, svuint64_t, -+ z0 = svtrn1q_u64 (z1, z0), -+ z0 = svtrn1q (z1, z0)) -+ -+/* -+** trn1q_u64_untied: -+** trn1 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn1q_u64_untied, svuint64_t, -+ z0 = svtrn1q_u64 (z1, z2), -+ z0 = svtrn1q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_u8.c -new file mode 100644 -index 000000000..4ebfedbea ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn1q_u8.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn1q_u8_tied1: -+** trn1 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn1q_u8_tied1, svuint8_t, -+ z0 = svtrn1q_u8 (z0, z1), -+ z0 = svtrn1q (z0, z1)) -+ -+/* -+** trn1q_u8_tied2: -+** trn1 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn1q_u8_tied2, svuint8_t, -+ z0 = svtrn1q_u8 (z1, z0), -+ z0 = svtrn1q (z1, z0)) -+ -+/* -+** trn1q_u8_untied: -+** trn1 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn1q_u8_untied, svuint8_t, -+ z0 = svtrn1q_u8 (z1, z2), -+ z0 = svtrn1q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_b16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_b16.c -new file mode 100644 -index 000000000..54b593afe ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_b16.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn2_b16_tied1: -+** trn2 p0\.h, p0\.h, p1\.h -+** ret -+*/ -+TEST_UNIFORM_P (trn2_b16_tied1, -+ p0 = svtrn2_b16 (p0, p1), -+ p0 = svtrn2_b16 (p0, p1)) -+ -+/* -+** trn2_b16_tied2: -+** trn2 p0\.h, p1\.h, p0\.h -+** ret -+*/ -+TEST_UNIFORM_P (trn2_b16_tied2, -+ p0 = svtrn2_b16 (p1, p0), -+ p0 = svtrn2_b16 (p1, p0)) -+ -+/* -+** trn2_b16_untied: -+** trn2 p0\.h, p1\.h, p2\.h -+** ret -+*/ -+TEST_UNIFORM_P (trn2_b16_untied, -+ p0 = svtrn2_b16 (p1, p2), -+ p0 = svtrn2_b16 (p1, p2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_b32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_b32.c -new file mode 100644 -index 000000000..ead3d85cf ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_b32.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn2_b32_tied1: -+** trn2 p0\.s, p0\.s, p1\.s -+** ret -+*/ -+TEST_UNIFORM_P (trn2_b32_tied1, -+ p0 = svtrn2_b32 (p0, p1), -+ p0 = svtrn2_b32 (p0, p1)) -+ -+/* -+** trn2_b32_tied2: -+** trn2 p0\.s, p1\.s, p0\.s -+** ret -+*/ -+TEST_UNIFORM_P (trn2_b32_tied2, -+ p0 = svtrn2_b32 (p1, p0), -+ p0 = svtrn2_b32 (p1, p0)) -+ -+/* -+** trn2_b32_untied: -+** trn2 p0\.s, p1\.s, p2\.s -+** ret -+*/ -+TEST_UNIFORM_P (trn2_b32_untied, -+ p0 = svtrn2_b32 (p1, p2), -+ p0 = svtrn2_b32 (p1, p2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_b64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_b64.c -new file mode 100644 -index 000000000..ccca03557 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_b64.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn2_b64_tied1: -+** trn2 p0\.d, p0\.d, p1\.d -+** ret -+*/ -+TEST_UNIFORM_P (trn2_b64_tied1, -+ p0 = svtrn2_b64 (p0, p1), -+ p0 = svtrn2_b64 (p0, p1)) -+ -+/* -+** trn2_b64_tied2: -+** trn2 p0\.d, p1\.d, p0\.d -+** ret -+*/ -+TEST_UNIFORM_P (trn2_b64_tied2, -+ p0 = svtrn2_b64 (p1, p0), -+ p0 = svtrn2_b64 (p1, p0)) -+ -+/* -+** trn2_b64_untied: -+** trn2 p0\.d, p1\.d, p2\.d -+** ret -+*/ -+TEST_UNIFORM_P (trn2_b64_untied, -+ p0 = svtrn2_b64 (p1, p2), -+ p0 = svtrn2_b64 (p1, p2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_b8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_b8.c -new file mode 100644 -index 000000000..7b0803e79 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_b8.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn2_b8_tied1: -+** trn2 p0\.b, p0\.b, p1\.b -+** ret -+*/ -+TEST_UNIFORM_P (trn2_b8_tied1, -+ p0 = svtrn2_b8 (p0, p1), -+ p0 = svtrn2_b8 (p0, p1)) -+ -+/* -+** trn2_b8_tied2: -+** trn2 p0\.b, p1\.b, p0\.b -+** ret -+*/ -+TEST_UNIFORM_P (trn2_b8_tied2, -+ p0 = svtrn2_b8 (p1, p0), -+ p0 = svtrn2_b8 (p1, p0)) -+ -+/* -+** trn2_b8_untied: -+** trn2 p0\.b, p1\.b, p2\.b -+** ret -+*/ -+TEST_UNIFORM_P (trn2_b8_untied, -+ p0 = svtrn2_b8 (p1, p2), -+ p0 = svtrn2_b8 (p1, p2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_bf16.c -new file mode 100644 -index 000000000..12028b0f6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_bf16.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn2_bf16_tied1: -+** trn2 z0\.h, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (trn2_bf16_tied1, svbfloat16_t, -+ z0 = svtrn2_bf16 (z0, z1), -+ z0 = svtrn2 (z0, z1)) -+ -+/* -+** trn2_bf16_tied2: -+** trn2 z0\.h, z1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (trn2_bf16_tied2, svbfloat16_t, -+ z0 = svtrn2_bf16 (z1, z0), -+ z0 = svtrn2 (z1, z0)) -+ -+/* -+** trn2_bf16_untied: -+** trn2 z0\.h, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (trn2_bf16_untied, svbfloat16_t, -+ z0 = svtrn2_bf16 (z1, z2), -+ z0 = svtrn2 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_f16.c -new file mode 100644 -index 000000000..112567725 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_f16.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn2_f16_tied1: -+** trn2 z0\.h, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (trn2_f16_tied1, svfloat16_t, -+ z0 = svtrn2_f16 (z0, z1), -+ z0 = svtrn2 (z0, z1)) -+ -+/* -+** trn2_f16_tied2: -+** trn2 z0\.h, z1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (trn2_f16_tied2, svfloat16_t, -+ z0 = svtrn2_f16 (z1, z0), -+ z0 = svtrn2 (z1, z0)) -+ -+/* -+** trn2_f16_untied: -+** trn2 z0\.h, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (trn2_f16_untied, svfloat16_t, -+ z0 = svtrn2_f16 (z1, z2), -+ z0 = svtrn2 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_f32.c -new file mode 100644 -index 000000000..daee566cc ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_f32.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn2_f32_tied1: -+** trn2 z0\.s, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (trn2_f32_tied1, svfloat32_t, -+ z0 = svtrn2_f32 (z0, z1), -+ z0 = svtrn2 (z0, z1)) -+ -+/* -+** trn2_f32_tied2: -+** trn2 z0\.s, z1\.s, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (trn2_f32_tied2, svfloat32_t, -+ z0 = svtrn2_f32 (z1, z0), -+ z0 = svtrn2 (z1, z0)) -+ -+/* -+** trn2_f32_untied: -+** trn2 z0\.s, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (trn2_f32_untied, svfloat32_t, -+ z0 = svtrn2_f32 (z1, z2), -+ z0 = svtrn2 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_f64.c -new file mode 100644 -index 000000000..338fee49f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_f64.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn2_f64_tied1: -+** trn2 z0\.d, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (trn2_f64_tied1, svfloat64_t, -+ z0 = svtrn2_f64 (z0, z1), -+ z0 = svtrn2 (z0, z1)) -+ -+/* -+** trn2_f64_tied2: -+** trn2 z0\.d, z1\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (trn2_f64_tied2, svfloat64_t, -+ z0 = svtrn2_f64 (z1, z0), -+ z0 = svtrn2 (z1, z0)) -+ -+/* -+** trn2_f64_untied: -+** trn2 z0\.d, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (trn2_f64_untied, svfloat64_t, -+ z0 = svtrn2_f64 (z1, z2), -+ z0 = svtrn2 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_s16.c -new file mode 100644 -index 000000000..93f63de5e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_s16.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn2_s16_tied1: -+** trn2 z0\.h, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (trn2_s16_tied1, svint16_t, -+ z0 = svtrn2_s16 (z0, z1), -+ z0 = svtrn2 (z0, z1)) -+ -+/* -+** trn2_s16_tied2: -+** trn2 z0\.h, z1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (trn2_s16_tied2, svint16_t, -+ z0 = svtrn2_s16 (z1, z0), -+ z0 = svtrn2 (z1, z0)) -+ -+/* -+** trn2_s16_untied: -+** trn2 z0\.h, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (trn2_s16_untied, svint16_t, -+ z0 = svtrn2_s16 (z1, z2), -+ z0 = svtrn2 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_s32.c -new file mode 100644 -index 000000000..82edd72f7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_s32.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn2_s32_tied1: -+** trn2 z0\.s, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (trn2_s32_tied1, svint32_t, -+ z0 = svtrn2_s32 (z0, z1), -+ z0 = svtrn2 (z0, z1)) -+ -+/* -+** trn2_s32_tied2: -+** trn2 z0\.s, z1\.s, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (trn2_s32_tied2, svint32_t, -+ z0 = svtrn2_s32 (z1, z0), -+ z0 = svtrn2 (z1, z0)) -+ -+/* -+** trn2_s32_untied: -+** trn2 z0\.s, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (trn2_s32_untied, svint32_t, -+ z0 = svtrn2_s32 (z1, z2), -+ z0 = svtrn2 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_s64.c -new file mode 100644 -index 000000000..5f43441d5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_s64.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn2_s64_tied1: -+** trn2 z0\.d, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (trn2_s64_tied1, svint64_t, -+ z0 = svtrn2_s64 (z0, z1), -+ z0 = svtrn2 (z0, z1)) -+ -+/* -+** trn2_s64_tied2: -+** trn2 z0\.d, z1\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (trn2_s64_tied2, svint64_t, -+ z0 = svtrn2_s64 (z1, z0), -+ z0 = svtrn2 (z1, z0)) -+ -+/* -+** trn2_s64_untied: -+** trn2 z0\.d, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (trn2_s64_untied, svint64_t, -+ z0 = svtrn2_s64 (z1, z2), -+ z0 = svtrn2 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_s8.c -new file mode 100644 -index 000000000..716538119 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_s8.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn2_s8_tied1: -+** trn2 z0\.b, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (trn2_s8_tied1, svint8_t, -+ z0 = svtrn2_s8 (z0, z1), -+ z0 = svtrn2 (z0, z1)) -+ -+/* -+** trn2_s8_tied2: -+** trn2 z0\.b, z1\.b, z0\.b -+** ret -+*/ -+TEST_UNIFORM_Z (trn2_s8_tied2, svint8_t, -+ z0 = svtrn2_s8 (z1, z0), -+ z0 = svtrn2 (z1, z0)) -+ -+/* -+** trn2_s8_untied: -+** trn2 z0\.b, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (trn2_s8_untied, svint8_t, -+ z0 = svtrn2_s8 (z1, z2), -+ z0 = svtrn2 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_u16.c -new file mode 100644 -index 000000000..e68d233b8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_u16.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn2_u16_tied1: -+** trn2 z0\.h, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (trn2_u16_tied1, svuint16_t, -+ z0 = svtrn2_u16 (z0, z1), -+ z0 = svtrn2 (z0, z1)) -+ -+/* -+** trn2_u16_tied2: -+** trn2 z0\.h, z1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (trn2_u16_tied2, svuint16_t, -+ z0 = svtrn2_u16 (z1, z0), -+ z0 = svtrn2 (z1, z0)) -+ -+/* -+** trn2_u16_untied: -+** trn2 z0\.h, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (trn2_u16_untied, svuint16_t, -+ z0 = svtrn2_u16 (z1, z2), -+ z0 = svtrn2 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_u32.c -new file mode 100644 -index 000000000..e48aad179 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_u32.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn2_u32_tied1: -+** trn2 z0\.s, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (trn2_u32_tied1, svuint32_t, -+ z0 = svtrn2_u32 (z0, z1), -+ z0 = svtrn2 (z0, z1)) -+ -+/* -+** trn2_u32_tied2: -+** trn2 z0\.s, z1\.s, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (trn2_u32_tied2, svuint32_t, -+ z0 = svtrn2_u32 (z1, z0), -+ z0 = svtrn2 (z1, z0)) -+ -+/* -+** trn2_u32_untied: -+** trn2 z0\.s, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (trn2_u32_untied, svuint32_t, -+ z0 = svtrn2_u32 (z1, z2), -+ z0 = svtrn2 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_u64.c -new file mode 100644 -index 000000000..aa452275b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_u64.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn2_u64_tied1: -+** trn2 z0\.d, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (trn2_u64_tied1, svuint64_t, -+ z0 = svtrn2_u64 (z0, z1), -+ z0 = svtrn2 (z0, z1)) -+ -+/* -+** trn2_u64_tied2: -+** trn2 z0\.d, z1\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (trn2_u64_tied2, svuint64_t, -+ z0 = svtrn2_u64 (z1, z0), -+ z0 = svtrn2 (z1, z0)) -+ -+/* -+** trn2_u64_untied: -+** trn2 z0\.d, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (trn2_u64_untied, svuint64_t, -+ z0 = svtrn2_u64 (z1, z2), -+ z0 = svtrn2 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_u8.c -new file mode 100644 -index 000000000..cb26b2338 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2_u8.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn2_u8_tied1: -+** trn2 z0\.b, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (trn2_u8_tied1, svuint8_t, -+ z0 = svtrn2_u8 (z0, z1), -+ z0 = svtrn2 (z0, z1)) -+ -+/* -+** trn2_u8_tied2: -+** trn2 z0\.b, z1\.b, z0\.b -+** ret -+*/ -+TEST_UNIFORM_Z (trn2_u8_tied2, svuint8_t, -+ z0 = svtrn2_u8 (z1, z0), -+ z0 = svtrn2 (z1, z0)) -+ -+/* -+** trn2_u8_untied: -+** trn2 z0\.b, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (trn2_u8_untied, svuint8_t, -+ z0 = svtrn2_u8 (z1, z2), -+ z0 = svtrn2 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_bf16.c -new file mode 100644 -index 000000000..5623b54f0 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_bf16.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn2q_bf16_tied1: -+** trn2 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn2q_bf16_tied1, svbfloat16_t, -+ z0 = svtrn2q_bf16 (z0, z1), -+ z0 = svtrn2q (z0, z1)) -+ -+/* -+** trn2q_bf16_tied2: -+** trn2 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn2q_bf16_tied2, svbfloat16_t, -+ z0 = svtrn2q_bf16 (z1, z0), -+ z0 = svtrn2q (z1, z0)) -+ -+/* -+** trn2q_bf16_untied: -+** trn2 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn2q_bf16_untied, svbfloat16_t, -+ z0 = svtrn2q_bf16 (z1, z2), -+ z0 = svtrn2q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_f16.c -new file mode 100644 -index 000000000..db2190929 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_f16.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn2q_f16_tied1: -+** trn2 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn2q_f16_tied1, svfloat16_t, -+ z0 = svtrn2q_f16 (z0, z1), -+ z0 = svtrn2q (z0, z1)) -+ -+/* -+** trn2q_f16_tied2: -+** trn2 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn2q_f16_tied2, svfloat16_t, -+ z0 = svtrn2q_f16 (z1, z0), -+ z0 = svtrn2q (z1, z0)) -+ -+/* -+** trn2q_f16_untied: -+** trn2 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn2q_f16_untied, svfloat16_t, -+ z0 = svtrn2q_f16 (z1, z2), -+ z0 = svtrn2q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_f32.c -new file mode 100644 -index 000000000..1367a1e06 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_f32.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn2q_f32_tied1: -+** trn2 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn2q_f32_tied1, svfloat32_t, -+ z0 = svtrn2q_f32 (z0, z1), -+ z0 = svtrn2q (z0, z1)) -+ -+/* -+** trn2q_f32_tied2: -+** trn2 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn2q_f32_tied2, svfloat32_t, -+ z0 = svtrn2q_f32 (z1, z0), -+ z0 = svtrn2q (z1, z0)) -+ -+/* -+** trn2q_f32_untied: -+** trn2 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn2q_f32_untied, svfloat32_t, -+ z0 = svtrn2q_f32 (z1, z2), -+ z0 = svtrn2q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_f64.c -new file mode 100644 -index 000000000..54325e705 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_f64.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn2q_f64_tied1: -+** trn2 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn2q_f64_tied1, svfloat64_t, -+ z0 = svtrn2q_f64 (z0, z1), -+ z0 = svtrn2q (z0, z1)) -+ -+/* -+** trn2q_f64_tied2: -+** trn2 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn2q_f64_tied2, svfloat64_t, -+ z0 = svtrn2q_f64 (z1, z0), -+ z0 = svtrn2q (z1, z0)) -+ -+/* -+** trn2q_f64_untied: -+** trn2 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn2q_f64_untied, svfloat64_t, -+ z0 = svtrn2q_f64 (z1, z2), -+ z0 = svtrn2q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_s16.c -new file mode 100644 -index 000000000..a0b641278 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_s16.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn2q_s16_tied1: -+** trn2 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn2q_s16_tied1, svint16_t, -+ z0 = svtrn2q_s16 (z0, z1), -+ z0 = svtrn2q (z0, z1)) -+ -+/* -+** trn2q_s16_tied2: -+** trn2 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn2q_s16_tied2, svint16_t, -+ z0 = svtrn2q_s16 (z1, z0), -+ z0 = svtrn2q (z1, z0)) -+ -+/* -+** trn2q_s16_untied: -+** trn2 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn2q_s16_untied, svint16_t, -+ z0 = svtrn2q_s16 (z1, z2), -+ z0 = svtrn2q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_s32.c -new file mode 100644 -index 000000000..7c128c6ef ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_s32.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn2q_s32_tied1: -+** trn2 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn2q_s32_tied1, svint32_t, -+ z0 = svtrn2q_s32 (z0, z1), -+ z0 = svtrn2q (z0, z1)) -+ -+/* -+** trn2q_s32_tied2: -+** trn2 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn2q_s32_tied2, svint32_t, -+ z0 = svtrn2q_s32 (z1, z0), -+ z0 = svtrn2q (z1, z0)) -+ -+/* -+** trn2q_s32_untied: -+** trn2 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn2q_s32_untied, svint32_t, -+ z0 = svtrn2q_s32 (z1, z2), -+ z0 = svtrn2q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_s64.c -new file mode 100644 -index 000000000..f22222525 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_s64.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn2q_s64_tied1: -+** trn2 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn2q_s64_tied1, svint64_t, -+ z0 = svtrn2q_s64 (z0, z1), -+ z0 = svtrn2q (z0, z1)) -+ -+/* -+** trn2q_s64_tied2: -+** trn2 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn2q_s64_tied2, svint64_t, -+ z0 = svtrn2q_s64 (z1, z0), -+ z0 = svtrn2q (z1, z0)) -+ -+/* -+** trn2q_s64_untied: -+** trn2 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn2q_s64_untied, svint64_t, -+ z0 = svtrn2q_s64 (z1, z2), -+ z0 = svtrn2q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_s8.c -new file mode 100644 -index 000000000..bd5243f35 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_s8.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn2q_s8_tied1: -+** trn2 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn2q_s8_tied1, svint8_t, -+ z0 = svtrn2q_s8 (z0, z1), -+ z0 = svtrn2q (z0, z1)) -+ -+/* -+** trn2q_s8_tied2: -+** trn2 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn2q_s8_tied2, svint8_t, -+ z0 = svtrn2q_s8 (z1, z0), -+ z0 = svtrn2q (z1, z0)) -+ -+/* -+** trn2q_s8_untied: -+** trn2 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn2q_s8_untied, svint8_t, -+ z0 = svtrn2q_s8 (z1, z2), -+ z0 = svtrn2q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_u16.c -new file mode 100644 -index 000000000..8da8563b2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_u16.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn2q_u16_tied1: -+** trn2 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn2q_u16_tied1, svuint16_t, -+ z0 = svtrn2q_u16 (z0, z1), -+ z0 = svtrn2q (z0, z1)) -+ -+/* -+** trn2q_u16_tied2: -+** trn2 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn2q_u16_tied2, svuint16_t, -+ z0 = svtrn2q_u16 (z1, z0), -+ z0 = svtrn2q (z1, z0)) -+ -+/* -+** trn2q_u16_untied: -+** trn2 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn2q_u16_untied, svuint16_t, -+ z0 = svtrn2q_u16 (z1, z2), -+ z0 = svtrn2q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_u32.c -new file mode 100644 -index 000000000..6c0af02da ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_u32.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn2q_u32_tied1: -+** trn2 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn2q_u32_tied1, svuint32_t, -+ z0 = svtrn2q_u32 (z0, z1), -+ z0 = svtrn2q (z0, z1)) -+ -+/* -+** trn2q_u32_tied2: -+** trn2 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn2q_u32_tied2, svuint32_t, -+ z0 = svtrn2q_u32 (z1, z0), -+ z0 = svtrn2q (z1, z0)) -+ -+/* -+** trn2q_u32_untied: -+** trn2 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn2q_u32_untied, svuint32_t, -+ z0 = svtrn2q_u32 (z1, z2), -+ z0 = svtrn2q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_u64.c -new file mode 100644 -index 000000000..857595cbb ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_u64.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn2q_u64_tied1: -+** trn2 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn2q_u64_tied1, svuint64_t, -+ z0 = svtrn2q_u64 (z0, z1), -+ z0 = svtrn2q (z0, z1)) -+ -+/* -+** trn2q_u64_tied2: -+** trn2 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn2q_u64_tied2, svuint64_t, -+ z0 = svtrn2q_u64 (z1, z0), -+ z0 = svtrn2q (z1, z0)) -+ -+/* -+** trn2q_u64_untied: -+** trn2 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn2q_u64_untied, svuint64_t, -+ z0 = svtrn2q_u64 (z1, z2), -+ z0 = svtrn2q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_u8.c -new file mode 100644 -index 000000000..1fb85b249 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/trn2q_u8.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** trn2q_u8_tied1: -+** trn2 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn2q_u8_tied1, svuint8_t, -+ z0 = svtrn2q_u8 (z0, z1), -+ z0 = svtrn2q (z0, z1)) -+ -+/* -+** trn2q_u8_tied2: -+** trn2 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn2q_u8_tied2, svuint8_t, -+ z0 = svtrn2q_u8 (z1, z0), -+ z0 = svtrn2q (z1, z0)) -+ -+/* -+** trn2q_u8_untied: -+** trn2 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (trn2q_u8_untied, svuint8_t, -+ z0 = svtrn2q_u8 (z1, z2), -+ z0 = svtrn2q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tsmul_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tsmul_f16.c -new file mode 100644 -index 000000000..94bc696eb ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tsmul_f16.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** tsmul_f16_tied1: -+** ftsmul z0\.h, z0\.h, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (tsmul_f16_tied1, svfloat16_t, svuint16_t, -+ z0 = svtsmul_f16 (z0, z4), -+ z0 = svtsmul (z0, z4)) -+ -+/* -+** tsmul_f16_tied2: -+** ftsmul z0\.h, z4\.h, z0\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (tsmul_f16_tied2, svfloat16_t, svuint16_t, -+ z0_res = svtsmul_f16 (z4, z0), -+ z0_res = svtsmul (z4, z0)) -+ -+/* -+** tsmul_f16_untied: -+** ftsmul z0\.h, z1\.h, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (tsmul_f16_untied, svfloat16_t, svuint16_t, -+ z0 = svtsmul_f16 (z1, z4), -+ z0 = svtsmul (z1, z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tsmul_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tsmul_f32.c -new file mode 100644 -index 000000000..d0ec91882 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tsmul_f32.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** tsmul_f32_tied1: -+** ftsmul z0\.s, z0\.s, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (tsmul_f32_tied1, svfloat32_t, svuint32_t, -+ z0 = svtsmul_f32 (z0, z4), -+ z0 = svtsmul (z0, z4)) -+ -+/* -+** tsmul_f32_tied2: -+** ftsmul z0\.s, z4\.s, z0\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (tsmul_f32_tied2, svfloat32_t, svuint32_t, -+ z0_res = svtsmul_f32 (z4, z0), -+ z0_res = svtsmul (z4, z0)) -+ -+/* -+** tsmul_f32_untied: -+** ftsmul z0\.s, z1\.s, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (tsmul_f32_untied, svfloat32_t, svuint32_t, -+ z0 = svtsmul_f32 (z1, z4), -+ z0 = svtsmul (z1, z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tsmul_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tsmul_f64.c -new file mode 100644 -index 000000000..23e0da3f7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tsmul_f64.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** tsmul_f64_tied1: -+** ftsmul z0\.d, z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (tsmul_f64_tied1, svfloat64_t, svuint64_t, -+ z0 = svtsmul_f64 (z0, z4), -+ z0 = svtsmul (z0, z4)) -+ -+/* -+** tsmul_f64_tied2: -+** ftsmul z0\.d, z4\.d, z0\.d -+** ret -+*/ -+TEST_DUAL_Z_REV (tsmul_f64_tied2, svfloat64_t, svuint64_t, -+ z0_res = svtsmul_f64 (z4, z0), -+ z0_res = svtsmul (z4, z0)) -+ -+/* -+** tsmul_f64_untied: -+** ftsmul z0\.d, z1\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (tsmul_f64_untied, svfloat64_t, svuint64_t, -+ z0 = svtsmul_f64 (z1, z4), -+ z0 = svtsmul (z1, z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tssel_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tssel_f16.c -new file mode 100644 -index 000000000..e7c3ea03b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tssel_f16.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** tssel_f16_tied1: -+** ftssel z0\.h, z0\.h, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (tssel_f16_tied1, svfloat16_t, svuint16_t, -+ z0 = svtssel_f16 (z0, z4), -+ z0 = svtssel (z0, z4)) -+ -+/* -+** tssel_f16_tied2: -+** ftssel z0\.h, z4\.h, z0\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (tssel_f16_tied2, svfloat16_t, svuint16_t, -+ z0_res = svtssel_f16 (z4, z0), -+ z0_res = svtssel (z4, z0)) -+ -+/* -+** tssel_f16_untied: -+** ftssel z0\.h, z1\.h, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (tssel_f16_untied, svfloat16_t, svuint16_t, -+ z0 = svtssel_f16 (z1, z4), -+ z0 = svtssel (z1, z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tssel_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tssel_f32.c -new file mode 100644 -index 000000000..022573a19 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tssel_f32.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** tssel_f32_tied1: -+** ftssel z0\.s, z0\.s, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (tssel_f32_tied1, svfloat32_t, svuint32_t, -+ z0 = svtssel_f32 (z0, z4), -+ z0 = svtssel (z0, z4)) -+ -+/* -+** tssel_f32_tied2: -+** ftssel z0\.s, z4\.s, z0\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (tssel_f32_tied2, svfloat32_t, svuint32_t, -+ z0_res = svtssel_f32 (z4, z0), -+ z0_res = svtssel (z4, z0)) -+ -+/* -+** tssel_f32_untied: -+** ftssel z0\.s, z1\.s, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (tssel_f32_untied, svfloat32_t, svuint32_t, -+ z0 = svtssel_f32 (z1, z4), -+ z0 = svtssel (z1, z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tssel_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tssel_f64.c -new file mode 100644 -index 000000000..ffcdf4224 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/tssel_f64.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** tssel_f64_tied1: -+** ftssel z0\.d, z0\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (tssel_f64_tied1, svfloat64_t, svuint64_t, -+ z0 = svtssel_f64 (z0, z4), -+ z0 = svtssel (z0, z4)) -+ -+/* -+** tssel_f64_tied2: -+** ftssel z0\.d, z4\.d, z0\.d -+** ret -+*/ -+TEST_DUAL_Z_REV (tssel_f64_tied2, svfloat64_t, svuint64_t, -+ z0_res = svtssel_f64 (z4, z0), -+ z0_res = svtssel (z4, z0)) -+ -+/* -+** tssel_f64_untied: -+** ftssel z0\.d, z1\.d, z4\.d -+** ret -+*/ -+TEST_DUAL_Z (tssel_f64_untied, svfloat64_t, svuint64_t, -+ z0 = svtssel_f64 (z1, z4), -+ z0 = svtssel (z1, z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/undef2_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/undef2_1.c -new file mode 100644 -index 000000000..fe6c4c7c7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/undef2_1.c -@@ -0,0 +1,87 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** int8: -+** ret -+*/ -+TEST_UNDEF (int8, svint8x2_t, -+ z0 = svundef2_s8 ()) -+ -+/* -+** uint8: -+** ret -+*/ -+TEST_UNDEF (uint8, svuint8x2_t, -+ z0 = svundef2_u8 ()) -+ -+/* -+** int16: -+** ret -+*/ -+TEST_UNDEF (int16, svint16x2_t, -+ z0 = svundef2_s16 ()) -+ -+/* -+** uint16: -+** ret -+*/ -+TEST_UNDEF (uint16, svuint16x2_t, -+ z0 = svundef2_u16 ()) -+ -+/* -+** float16: -+** ret -+*/ -+TEST_UNDEF (float16, svfloat16x2_t, -+ z0 = svundef2_f16 ()) -+ -+/* -+** bfloat16: -+** ret -+*/ -+TEST_UNDEF (bfloat16, svbfloat16x2_t, -+ z0 = svundef2_bf16 ()) -+ -+/* -+** int32: -+** ret -+*/ -+TEST_UNDEF (int32, svint32x2_t, -+ z0 = svundef2_s32 ()) -+ -+/* -+** uint32: -+** ret -+*/ -+TEST_UNDEF (uint32, svuint32x2_t, -+ z0 = svundef2_u32 ()) -+ -+/* -+** float32: -+** ret -+*/ -+TEST_UNDEF (float32, svfloat32x2_t, -+ z0 = svundef2_f32 ()) -+ -+/* -+** int64: -+** ret -+*/ -+TEST_UNDEF (int64, svint64x2_t, -+ z0 = svundef2_s64 ()) -+ -+/* -+** uint64: -+** ret -+*/ -+TEST_UNDEF (uint64, svuint64x2_t, -+ z0 = svundef2_u64 ()) -+ -+/* -+** float64: -+** ret -+*/ -+TEST_UNDEF (float64, svfloat64x2_t, -+ z0 = svundef2_f64 ()) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/undef3_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/undef3_1.c -new file mode 100644 -index 000000000..5c18c6317 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/undef3_1.c -@@ -0,0 +1,87 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** int8: -+** ret -+*/ -+TEST_UNDEF (int8, svint8x3_t, -+ z0 = svundef3_s8 ()) -+ -+/* -+** uint8: -+** ret -+*/ -+TEST_UNDEF (uint8, svuint8x3_t, -+ z0 = svundef3_u8 ()) -+ -+/* -+** int16: -+** ret -+*/ -+TEST_UNDEF (int16, svint16x3_t, -+ z0 = svundef3_s16 ()) -+ -+/* -+** uint16: -+** ret -+*/ -+TEST_UNDEF (uint16, svuint16x3_t, -+ z0 = svundef3_u16 ()) -+ -+/* -+** float16: -+** ret -+*/ -+TEST_UNDEF (float16, svfloat16x3_t, -+ z0 = svundef3_f16 ()) -+ -+/* -+** bfloat16: -+** ret -+*/ -+TEST_UNDEF (bfloat16, svbfloat16x3_t, -+ z0 = svundef3_bf16 ()) -+ -+/* -+** int32: -+** ret -+*/ -+TEST_UNDEF (int32, svint32x3_t, -+ z0 = svundef3_s32 ()) -+ -+/* -+** uint32: -+** ret -+*/ -+TEST_UNDEF (uint32, svuint32x3_t, -+ z0 = svundef3_u32 ()) -+ -+/* -+** float32: -+** ret -+*/ -+TEST_UNDEF (float32, svfloat32x3_t, -+ z0 = svundef3_f32 ()) -+ -+/* -+** int64: -+** ret -+*/ -+TEST_UNDEF (int64, svint64x3_t, -+ z0 = svundef3_s64 ()) -+ -+/* -+** uint64: -+** ret -+*/ -+TEST_UNDEF (uint64, svuint64x3_t, -+ z0 = svundef3_u64 ()) -+ -+/* -+** float64: -+** ret -+*/ -+TEST_UNDEF (float64, svfloat64x3_t, -+ z0 = svundef3_f64 ()) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/undef4_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/undef4_1.c -new file mode 100644 -index 000000000..4d6b86b04 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/undef4_1.c -@@ -0,0 +1,87 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** int8: -+** ret -+*/ -+TEST_UNDEF (int8, svint8x4_t, -+ z0 = svundef4_s8 ()) -+ -+/* -+** uint8: -+** ret -+*/ -+TEST_UNDEF (uint8, svuint8x4_t, -+ z0 = svundef4_u8 ()) -+ -+/* -+** int16: -+** ret -+*/ -+TEST_UNDEF (int16, svint16x4_t, -+ z0 = svundef4_s16 ()) -+ -+/* -+** uint16: -+** ret -+*/ -+TEST_UNDEF (uint16, svuint16x4_t, -+ z0 = svundef4_u16 ()) -+ -+/* -+** float16: -+** ret -+*/ -+TEST_UNDEF (float16, svfloat16x4_t, -+ z0 = svundef4_f16 ()) -+ -+/* -+** bfloat16: -+** ret -+*/ -+TEST_UNDEF (bfloat16, svbfloat16x4_t, -+ z0 = svundef4_bf16 ()) -+ -+/* -+** int32: -+** ret -+*/ -+TEST_UNDEF (int32, svint32x4_t, -+ z0 = svundef4_s32 ()) -+ -+/* -+** uint32: -+** ret -+*/ -+TEST_UNDEF (uint32, svuint32x4_t, -+ z0 = svundef4_u32 ()) -+ -+/* -+** float32: -+** ret -+*/ -+TEST_UNDEF (float32, svfloat32x4_t, -+ z0 = svundef4_f32 ()) -+ -+/* -+** int64: -+** ret -+*/ -+TEST_UNDEF (int64, svint64x4_t, -+ z0 = svundef4_s64 ()) -+ -+/* -+** uint64: -+** ret -+*/ -+TEST_UNDEF (uint64, svuint64x4_t, -+ z0 = svundef4_u64 ()) -+ -+/* -+** float64: -+** ret -+*/ -+TEST_UNDEF (float64, svfloat64x4_t, -+ z0 = svundef4_f64 ()) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/undef_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/undef_1.c -new file mode 100644 -index 000000000..62873b6e1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/undef_1.c -@@ -0,0 +1,87 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** int8: -+** ret -+*/ -+TEST_UNDEF (int8, svint8_t, -+ z0 = svundef_s8 ()) -+ -+/* -+** uint8: -+** ret -+*/ -+TEST_UNDEF (uint8, svuint8_t, -+ z0 = svundef_u8 ()) -+ -+/* -+** int16: -+** ret -+*/ -+TEST_UNDEF (int16, svint16_t, -+ z0 = svundef_s16 ()) -+ -+/* -+** uint16: -+** ret -+*/ -+TEST_UNDEF (uint16, svuint16_t, -+ z0 = svundef_u16 ()) -+ -+/* -+** float16: -+** ret -+*/ -+TEST_UNDEF (float16, svfloat16_t, -+ z0 = svundef_f16 ()) -+ -+/* -+** bfloat16: -+** ret -+*/ -+TEST_UNDEF (bfloat16, svbfloat16_t, -+ z0 = svundef_bf16 ()) -+ -+/* -+** int32: -+** ret -+*/ -+TEST_UNDEF (int32, svint32_t, -+ z0 = svundef_s32 ()) -+ -+/* -+** uint32: -+** ret -+*/ -+TEST_UNDEF (uint32, svuint32_t, -+ z0 = svundef_u32 ()) -+ -+/* -+** float32: -+** ret -+*/ -+TEST_UNDEF (float32, svfloat32_t, -+ z0 = svundef_f32 ()) -+ -+/* -+** int64: -+** ret -+*/ -+TEST_UNDEF (int64, svint64_t, -+ z0 = svundef_s64 ()) -+ -+/* -+** uint64: -+** ret -+*/ -+TEST_UNDEF (uint64, svuint64_t, -+ z0 = svundef_u64 ()) -+ -+/* -+** float64: -+** ret -+*/ -+TEST_UNDEF (float64, svfloat64_t, -+ z0 = svundef_f64 ()) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpkhi_b.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpkhi_b.c -new file mode 100644 -index 000000000..ff1a84aac ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpkhi_b.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** unpkhi_b_tied1: -+** punpkhi p0\.h, p0\.b -+** ret -+*/ -+TEST_UNIFORM_P (unpkhi_b_tied1, -+ p0 = svunpkhi_b (p0), -+ p0 = svunpkhi (p0)) -+ -+/* -+** unpkhi_b_untied: -+** punpkhi p0\.h, p1\.b -+** ret -+*/ -+TEST_UNIFORM_P (unpkhi_b_untied, -+ p0 = svunpkhi_b (p1), -+ p0 = svunpkhi (p1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpkhi_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpkhi_s16.c -new file mode 100644 -index 000000000..3f79ac65f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpkhi_s16.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** unpkhi_s16_tied1: -+** sunpkhi z0\.h, z0\.b -+** ret -+*/ -+TEST_DUAL_Z_REV (unpkhi_s16_tied1, svint16_t, svint8_t, -+ z0_res = svunpkhi_s16 (z0), -+ z0_res = svunpkhi (z0)) -+ -+/* -+** unpkhi_s16_untied: -+** sunpkhi z0\.h, z4\.b -+** ret -+*/ -+TEST_DUAL_Z (unpkhi_s16_untied, svint16_t, svint8_t, -+ z0 = svunpkhi_s16 (z4), -+ z0 = svunpkhi (z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpkhi_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpkhi_s32.c -new file mode 100644 -index 000000000..619fb0882 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpkhi_s32.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** unpkhi_s32_tied1: -+** sunpkhi z0\.s, z0\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (unpkhi_s32_tied1, svint32_t, svint16_t, -+ z0_res = svunpkhi_s32 (z0), -+ z0_res = svunpkhi (z0)) -+ -+/* -+** unpkhi_s32_untied: -+** sunpkhi z0\.s, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (unpkhi_s32_untied, svint32_t, svint16_t, -+ z0 = svunpkhi_s32 (z4), -+ z0 = svunpkhi (z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpkhi_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpkhi_s64.c -new file mode 100644 -index 000000000..5d6da1768 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpkhi_s64.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** unpkhi_s64_tied1: -+** sunpkhi z0\.d, z0\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (unpkhi_s64_tied1, svint64_t, svint32_t, -+ z0_res = svunpkhi_s64 (z0), -+ z0_res = svunpkhi (z0)) -+ -+/* -+** unpkhi_s64_untied: -+** sunpkhi z0\.d, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (unpkhi_s64_untied, svint64_t, svint32_t, -+ z0 = svunpkhi_s64 (z4), -+ z0 = svunpkhi (z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpkhi_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpkhi_u16.c -new file mode 100644 -index 000000000..68f47a282 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpkhi_u16.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** unpkhi_u16_tied1: -+** uunpkhi z0\.h, z0\.b -+** ret -+*/ -+TEST_DUAL_Z_REV (unpkhi_u16_tied1, svuint16_t, svuint8_t, -+ z0_res = svunpkhi_u16 (z0), -+ z0_res = svunpkhi (z0)) -+ -+/* -+** unpkhi_u16_untied: -+** uunpkhi z0\.h, z4\.b -+** ret -+*/ -+TEST_DUAL_Z (unpkhi_u16_untied, svuint16_t, svuint8_t, -+ z0 = svunpkhi_u16 (z4), -+ z0 = svunpkhi (z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpkhi_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpkhi_u32.c -new file mode 100644 -index 000000000..3c4b161e4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpkhi_u32.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** unpkhi_u32_tied1: -+** uunpkhi z0\.s, z0\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (unpkhi_u32_tied1, svuint32_t, svuint16_t, -+ z0_res = svunpkhi_u32 (z0), -+ z0_res = svunpkhi (z0)) -+ -+/* -+** unpkhi_u32_untied: -+** uunpkhi z0\.s, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (unpkhi_u32_untied, svuint32_t, svuint16_t, -+ z0 = svunpkhi_u32 (z4), -+ z0 = svunpkhi (z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpkhi_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpkhi_u64.c -new file mode 100644 -index 000000000..94cfbd493 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpkhi_u64.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** unpkhi_u64_tied1: -+** uunpkhi z0\.d, z0\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (unpkhi_u64_tied1, svuint64_t, svuint32_t, -+ z0_res = svunpkhi_u64 (z0), -+ z0_res = svunpkhi (z0)) -+ -+/* -+** unpkhi_u64_untied: -+** uunpkhi z0\.d, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (unpkhi_u64_untied, svuint64_t, svuint32_t, -+ z0 = svunpkhi_u64 (z4), -+ z0 = svunpkhi (z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpklo_b.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpklo_b.c -new file mode 100644 -index 000000000..476ec8bc3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpklo_b.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** unpklo_b_tied1: -+** punpklo p0\.h, p0\.b -+** ret -+*/ -+TEST_UNIFORM_P (unpklo_b_tied1, -+ p0 = svunpklo_b (p0), -+ p0 = svunpklo (p0)) -+ -+/* -+** unpklo_b_untied: -+** punpklo p0\.h, p1\.b -+** ret -+*/ -+TEST_UNIFORM_P (unpklo_b_untied, -+ p0 = svunpklo_b (p1), -+ p0 = svunpklo (p1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpklo_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpklo_s16.c -new file mode 100644 -index 000000000..a0e83ff1b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpklo_s16.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** unpklo_s16_tied1: -+** sunpklo z0\.h, z0\.b -+** ret -+*/ -+TEST_DUAL_Z_REV (unpklo_s16_tied1, svint16_t, svint8_t, -+ z0_res = svunpklo_s16 (z0), -+ z0_res = svunpklo (z0)) -+ -+/* -+** unpklo_s16_untied: -+** sunpklo z0\.h, z4\.b -+** ret -+*/ -+TEST_DUAL_Z (unpklo_s16_untied, svint16_t, svint8_t, -+ z0 = svunpklo_s16 (z4), -+ z0 = svunpklo (z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpklo_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpklo_s32.c -new file mode 100644 -index 000000000..49a14fb7b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpklo_s32.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** unpklo_s32_tied1: -+** sunpklo z0\.s, z0\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (unpklo_s32_tied1, svint32_t, svint16_t, -+ z0_res = svunpklo_s32 (z0), -+ z0_res = svunpklo (z0)) -+ -+/* -+** unpklo_s32_untied: -+** sunpklo z0\.s, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (unpklo_s32_untied, svint32_t, svint16_t, -+ z0 = svunpklo_s32 (z4), -+ z0 = svunpklo (z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpklo_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpklo_s64.c -new file mode 100644 -index 000000000..c430047e1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpklo_s64.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** unpklo_s64_tied1: -+** sunpklo z0\.d, z0\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (unpklo_s64_tied1, svint64_t, svint32_t, -+ z0_res = svunpklo_s64 (z0), -+ z0_res = svunpklo (z0)) -+ -+/* -+** unpklo_s64_untied: -+** sunpklo z0\.d, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (unpklo_s64_untied, svint64_t, svint32_t, -+ z0 = svunpklo_s64 (z4), -+ z0 = svunpklo (z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpklo_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpklo_u16.c -new file mode 100644 -index 000000000..6feee4427 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpklo_u16.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** unpklo_u16_tied1: -+** uunpklo z0\.h, z0\.b -+** ret -+*/ -+TEST_DUAL_Z_REV (unpklo_u16_tied1, svuint16_t, svuint8_t, -+ z0_res = svunpklo_u16 (z0), -+ z0_res = svunpklo (z0)) -+ -+/* -+** unpklo_u16_untied: -+** uunpklo z0\.h, z4\.b -+** ret -+*/ -+TEST_DUAL_Z (unpklo_u16_untied, svuint16_t, svuint8_t, -+ z0 = svunpklo_u16 (z4), -+ z0 = svunpklo (z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpklo_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpklo_u32.c -new file mode 100644 -index 000000000..c4d4efc86 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpklo_u32.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** unpklo_u32_tied1: -+** uunpklo z0\.s, z0\.h -+** ret -+*/ -+TEST_DUAL_Z_REV (unpklo_u32_tied1, svuint32_t, svuint16_t, -+ z0_res = svunpklo_u32 (z0), -+ z0_res = svunpklo (z0)) -+ -+/* -+** unpklo_u32_untied: -+** uunpklo z0\.s, z4\.h -+** ret -+*/ -+TEST_DUAL_Z (unpklo_u32_untied, svuint32_t, svuint16_t, -+ z0 = svunpklo_u32 (z4), -+ z0 = svunpklo (z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpklo_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpklo_u64.c -new file mode 100644 -index 000000000..2845e37a5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/unpklo_u64.c -@@ -0,0 +1,21 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** unpklo_u64_tied1: -+** uunpklo z0\.d, z0\.s -+** ret -+*/ -+TEST_DUAL_Z_REV (unpklo_u64_tied1, svuint64_t, svuint32_t, -+ z0_res = svunpklo_u64 (z0), -+ z0_res = svunpklo (z0)) -+ -+/* -+** unpklo_u64_untied: -+** uunpklo z0\.d, z4\.s -+** ret -+*/ -+TEST_DUAL_Z (unpklo_u64_untied, svuint64_t, svuint32_t, -+ z0 = svunpklo_u64 (z4), -+ z0 = svunpklo (z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/usdot_lane_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/usdot_lane_s32.c -new file mode 100644 -index 000000000..8fd255687 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/usdot_lane_s32.c -@@ -0,0 +1,97 @@ -+/* { dg-require-effective-target aarch64_asm_i8mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+sve+i8mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** usdot_lane_0_s32_tied1: -+** usdot z0\.s, z2\.b, z4\.b\[0\] -+** ret -+*/ -+TEST_TRIPLE_Z (usdot_lane_0_s32_tied1, svint32_t, svuint8_t, svint8_t, -+ z0 = svusdot_lane_s32 (z0, z2, z4, 0), -+ z0 = svusdot_lane (z0, z2, z4, 0)) -+ -+/* -+** usdot_lane_0_s32_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z2 -+** usdot z0\.s, \1\.b, z4\.b\[0\] -+** ret -+*/ -+TEST_TRIPLE_Z_REV2 (usdot_lane_0_s32_tied2, svint32_t, svuint8_t, svint8_t, -+ z0_res = svusdot_lane_s32 (z2, z0, z4, 0), -+ z0_res = svusdot_lane (z2, z0, z4, 0)) -+ -+/* -+** usdot_lane_0_s32_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** usdot z0\.s, z2\.b, \1\.b\[0\] -+** ret -+*/ -+TEST_TRIPLE_Z_REV (usdot_lane_0_s32_tied3, svint32_t, svuint8_t, svint8_t, -+ z0_res = svusdot_lane_s32 (z4, z2, z0, 0), -+ z0_res = svusdot_lane (z4, z2, z0, 0)) -+ -+/* -+** usdot_lane_0_s32_untied: -+** movprfx z0, z1 -+** usdot z0\.s, z2\.b, z4\.b\[0\] -+** ret -+*/ -+TEST_TRIPLE_Z (usdot_lane_0_s32_untied, svint32_t, svuint8_t, svint8_t, -+ z0 = svusdot_lane_s32 (z1, z2, z4, 0), -+ z0 = svusdot_lane (z1, z2, z4, 0)) -+ -+/* -+** usdot_lane_1_s32: -+** usdot z0\.s, z2\.b, z5\.b\[1\] -+** ret -+*/ -+TEST_TRIPLE_Z (usdot_lane_1_s32, svint32_t, svuint8_t, svint8_t, -+ z0 = svusdot_lane_s32 (z0, z2, z5, 1), -+ z0 = svusdot_lane (z0, z2, z5, 1)) -+ -+/* -+** usdot_lane_2_s32: -+** usdot z0\.s, z2\.b, z5\.b\[2\] -+** ret -+*/ -+TEST_TRIPLE_Z (usdot_lane_2_s32, svint32_t, svuint8_t, svint8_t, -+ z0 = svusdot_lane_s32 (z0, z2, z5, 2), -+ z0 = svusdot_lane (z0, z2, z5, 2)) -+ -+/* -+** usdot_lane_3_s32: -+** usdot z0\.s, z2\.b, z5\.b\[3\] -+** ret -+*/ -+TEST_TRIPLE_Z (usdot_lane_3_s32, svint32_t, svuint8_t, svint8_t, -+ z0 = svusdot_lane_s32 (z0, z2, z5, 3), -+ z0 = svusdot_lane (z0, z2, z5, 3)) -+ -+/* -+** usdot_lane_z8_s32: -+** str d8, \[sp, -16\]! -+** mov (z[0-7])\.d, z8\.d -+** usdot z0\.s, z1\.b, \1\.b\[1\] -+** ldr d8, \[sp\], 16 -+** ret -+*/ -+TEST_TRIPLE_LANE_REG (usdot_lane_z8_s32, svint32_t, svuint8_t, svint8_t, -+ z8, -+ z0 = svusdot_lane_s32 (z0, z1, z8, 1), -+ z0 = svusdot_lane (z0, z1, z8, 1)) -+ -+/* -+** usdot_lane_z16_s32: -+** mov (z[0-7])\.d, z16\.d -+** usdot z0\.s, z1\.b, \1\.b\[1\] -+** ret -+*/ -+TEST_TRIPLE_LANE_REG (usdot_lane_z16_s32, svint32_t, svuint8_t, svint8_t, -+ z16, -+ z0 = svusdot_lane_s32 (z0, z1, z16, 1), -+ z0 = svusdot_lane (z0, z1, z16, 1)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/usdot_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/usdot_s32.c -new file mode 100644 -index 000000000..ccac5cae5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/usdot_s32.c -@@ -0,0 +1,46 @@ -+/* { dg-require-effective-target aarch64_asm_i8mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+sve+i8mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** usdot_s32_tied1: -+** usdot z0\.s, z2\.b, z4\.b -+** ret -+*/ -+TEST_TRIPLE_Z (usdot_s32_tied1, svint32_t, svuint8_t, svint8_t, -+ z0 = svusdot_s32 (z0, z2, z4), -+ z0 = svusdot (z0, z2, z4)) -+ -+/* -+** usdot_s32_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** usdot z0\.s, z2\.b, \1\.b -+** ret -+*/ -+TEST_TRIPLE_Z_REV (usdot_s32_tied2, svint32_t, svuint8_t, svint8_t, -+ z0_res = svusdot_s32 (z4, z2, z0), -+ z0_res = svusdot (z4, z2, z0)) -+ -+/* -+** usdot_w0_s32_tied: -+** mov (z[0-9]+\.b), w0 -+** usdot z0\.s, z2\.b, \1 -+** ret -+*/ -+TEST_TRIPLE_ZX (usdot_w0_s32_tied, svint32_t, svuint8_t, int8_t, -+ z0 = svusdot_n_s32 (z0, z2, x0), -+ z0 = svusdot (z0, z2, x0)) -+ -+/* -+** usdot_9_s32_tied: -+** mov (z[0-9]+\.b), #9 -+** usdot z0\.s, z2\.b, \1 -+** ret -+*/ -+TEST_TRIPLE_Z (usdot_9_s32_tied, svint32_t, svuint8_t, int8_t, -+ z0 = svusdot_n_s32 (z0, z2, 9), -+ z0 = svusdot (z0, z2, 9)) -+ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/usmmla_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/usmmla_s32.c -new file mode 100644 -index 000000000..9440f3fd9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/usmmla_s32.c -@@ -0,0 +1,46 @@ -+/* { dg-require-effective-target aarch64_asm_i8mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+sve+i8mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** usmmla_s32_tied1: -+** usmmla z0\.s, z2\.b, z4\.b -+** ret -+*/ -+TEST_TRIPLE_Z (usmmla_s32_tied1, svint32_t, svuint8_t, svint8_t, -+ z0 = svusmmla_s32 (z0, z2, z4), -+ z0 = svusmmla (z0, z2, z4)) -+ -+/* -+** usmmla_s32_tied2: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z2 -+** usmmla z0\.s, \1\.b, z4\.b -+** ret -+*/ -+TEST_TRIPLE_Z_REV2 (usmmla_s32_tied2, svint32_t, svuint8_t, svint8_t, -+ z0_res = svusmmla_s32 (z2, z0, z4), -+ z0_res = svusmmla (z2, z0, z4)) -+ -+/* -+** usmmla_s32_tied3: -+** mov (z[0-9]+)\.d, z0\.d -+** movprfx z0, z4 -+** usmmla z0\.s, z2\.b, \1\.b -+** ret -+*/ -+TEST_TRIPLE_Z_REV (usmmla_s32_tied3, svint32_t, svuint8_t, svint8_t, -+ z0_res = svusmmla_s32 (z4, z2, z0), -+ z0_res = svusmmla (z4, z2, z0)) -+ -+/* -+** usmmla_s32_untied: -+** movprfx z0, z1 -+** usmmla z0\.s, z2\.b, z4\.b -+** ret -+*/ -+TEST_TRIPLE_Z (usmmla_s32_untied, svint32_t, svuint8_t, svint8_t, -+ z0 = svusmmla_s32 (z1, z2, z4), -+ z0 = svusmmla (z1, z2, z4)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_b16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_b16.c -new file mode 100644 -index 000000000..245e401aa ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_b16.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp1_b16_tied1: -+** uzp1 p0\.h, p0\.h, p1\.h -+** ret -+*/ -+TEST_UNIFORM_P (uzp1_b16_tied1, -+ p0 = svuzp1_b16 (p0, p1), -+ p0 = svuzp1_b16 (p0, p1)) -+ -+/* -+** uzp1_b16_tied2: -+** uzp1 p0\.h, p1\.h, p0\.h -+** ret -+*/ -+TEST_UNIFORM_P (uzp1_b16_tied2, -+ p0 = svuzp1_b16 (p1, p0), -+ p0 = svuzp1_b16 (p1, p0)) -+ -+/* -+** uzp1_b16_untied: -+** uzp1 p0\.h, p1\.h, p2\.h -+** ret -+*/ -+TEST_UNIFORM_P (uzp1_b16_untied, -+ p0 = svuzp1_b16 (p1, p2), -+ p0 = svuzp1_b16 (p1, p2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_b32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_b32.c -new file mode 100644 -index 000000000..c88034492 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_b32.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp1_b32_tied1: -+** uzp1 p0\.s, p0\.s, p1\.s -+** ret -+*/ -+TEST_UNIFORM_P (uzp1_b32_tied1, -+ p0 = svuzp1_b32 (p0, p1), -+ p0 = svuzp1_b32 (p0, p1)) -+ -+/* -+** uzp1_b32_tied2: -+** uzp1 p0\.s, p1\.s, p0\.s -+** ret -+*/ -+TEST_UNIFORM_P (uzp1_b32_tied2, -+ p0 = svuzp1_b32 (p1, p0), -+ p0 = svuzp1_b32 (p1, p0)) -+ -+/* -+** uzp1_b32_untied: -+** uzp1 p0\.s, p1\.s, p2\.s -+** ret -+*/ -+TEST_UNIFORM_P (uzp1_b32_untied, -+ p0 = svuzp1_b32 (p1, p2), -+ p0 = svuzp1_b32 (p1, p2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_b64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_b64.c -new file mode 100644 -index 000000000..71ac5c150 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_b64.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp1_b64_tied1: -+** uzp1 p0\.d, p0\.d, p1\.d -+** ret -+*/ -+TEST_UNIFORM_P (uzp1_b64_tied1, -+ p0 = svuzp1_b64 (p0, p1), -+ p0 = svuzp1_b64 (p0, p1)) -+ -+/* -+** uzp1_b64_tied2: -+** uzp1 p0\.d, p1\.d, p0\.d -+** ret -+*/ -+TEST_UNIFORM_P (uzp1_b64_tied2, -+ p0 = svuzp1_b64 (p1, p0), -+ p0 = svuzp1_b64 (p1, p0)) -+ -+/* -+** uzp1_b64_untied: -+** uzp1 p0\.d, p1\.d, p2\.d -+** ret -+*/ -+TEST_UNIFORM_P (uzp1_b64_untied, -+ p0 = svuzp1_b64 (p1, p2), -+ p0 = svuzp1_b64 (p1, p2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_b8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_b8.c -new file mode 100644 -index 000000000..250054bb6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_b8.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp1_b8_tied1: -+** uzp1 p0\.b, p0\.b, p1\.b -+** ret -+*/ -+TEST_UNIFORM_P (uzp1_b8_tied1, -+ p0 = svuzp1_b8 (p0, p1), -+ p0 = svuzp1_b8 (p0, p1)) -+ -+/* -+** uzp1_b8_tied2: -+** uzp1 p0\.b, p1\.b, p0\.b -+** ret -+*/ -+TEST_UNIFORM_P (uzp1_b8_tied2, -+ p0 = svuzp1_b8 (p1, p0), -+ p0 = svuzp1_b8 (p1, p0)) -+ -+/* -+** uzp1_b8_untied: -+** uzp1 p0\.b, p1\.b, p2\.b -+** ret -+*/ -+TEST_UNIFORM_P (uzp1_b8_untied, -+ p0 = svuzp1_b8 (p1, p2), -+ p0 = svuzp1_b8 (p1, p2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_bf16.c -new file mode 100644 -index 000000000..19d43ed11 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_bf16.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp1_bf16_tied1: -+** uzp1 z0\.h, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1_bf16_tied1, svbfloat16_t, -+ z0 = svuzp1_bf16 (z0, z1), -+ z0 = svuzp1 (z0, z1)) -+ -+/* -+** uzp1_bf16_tied2: -+** uzp1 z0\.h, z1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1_bf16_tied2, svbfloat16_t, -+ z0 = svuzp1_bf16 (z1, z0), -+ z0 = svuzp1 (z1, z0)) -+ -+/* -+** uzp1_bf16_untied: -+** uzp1 z0\.h, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1_bf16_untied, svbfloat16_t, -+ z0 = svuzp1_bf16 (z1, z2), -+ z0 = svuzp1 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_f16.c -new file mode 100644 -index 000000000..313673e9d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_f16.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp1_f16_tied1: -+** uzp1 z0\.h, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1_f16_tied1, svfloat16_t, -+ z0 = svuzp1_f16 (z0, z1), -+ z0 = svuzp1 (z0, z1)) -+ -+/* -+** uzp1_f16_tied2: -+** uzp1 z0\.h, z1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1_f16_tied2, svfloat16_t, -+ z0 = svuzp1_f16 (z1, z0), -+ z0 = svuzp1 (z1, z0)) -+ -+/* -+** uzp1_f16_untied: -+** uzp1 z0\.h, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1_f16_untied, svfloat16_t, -+ z0 = svuzp1_f16 (z1, z2), -+ z0 = svuzp1 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_f32.c -new file mode 100644 -index 000000000..5bbac2c60 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_f32.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp1_f32_tied1: -+** uzp1 z0\.s, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1_f32_tied1, svfloat32_t, -+ z0 = svuzp1_f32 (z0, z1), -+ z0 = svuzp1 (z0, z1)) -+ -+/* -+** uzp1_f32_tied2: -+** uzp1 z0\.s, z1\.s, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1_f32_tied2, svfloat32_t, -+ z0 = svuzp1_f32 (z1, z0), -+ z0 = svuzp1 (z1, z0)) -+ -+/* -+** uzp1_f32_untied: -+** uzp1 z0\.s, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1_f32_untied, svfloat32_t, -+ z0 = svuzp1_f32 (z1, z2), -+ z0 = svuzp1 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_f64.c -new file mode 100644 -index 000000000..ef97b1765 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_f64.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp1_f64_tied1: -+** uzp1 z0\.d, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1_f64_tied1, svfloat64_t, -+ z0 = svuzp1_f64 (z0, z1), -+ z0 = svuzp1 (z0, z1)) -+ -+/* -+** uzp1_f64_tied2: -+** uzp1 z0\.d, z1\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1_f64_tied2, svfloat64_t, -+ z0 = svuzp1_f64 (z1, z0), -+ z0 = svuzp1 (z1, z0)) -+ -+/* -+** uzp1_f64_untied: -+** uzp1 z0\.d, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1_f64_untied, svfloat64_t, -+ z0 = svuzp1_f64 (z1, z2), -+ z0 = svuzp1 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_s16.c -new file mode 100644 -index 000000000..b77832b07 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_s16.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp1_s16_tied1: -+** uzp1 z0\.h, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1_s16_tied1, svint16_t, -+ z0 = svuzp1_s16 (z0, z1), -+ z0 = svuzp1 (z0, z1)) -+ -+/* -+** uzp1_s16_tied2: -+** uzp1 z0\.h, z1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1_s16_tied2, svint16_t, -+ z0 = svuzp1_s16 (z1, z0), -+ z0 = svuzp1 (z1, z0)) -+ -+/* -+** uzp1_s16_untied: -+** uzp1 z0\.h, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1_s16_untied, svint16_t, -+ z0 = svuzp1_s16 (z1, z2), -+ z0 = svuzp1 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_s32.c -new file mode 100644 -index 000000000..64291afbe ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_s32.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp1_s32_tied1: -+** uzp1 z0\.s, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1_s32_tied1, svint32_t, -+ z0 = svuzp1_s32 (z0, z1), -+ z0 = svuzp1 (z0, z1)) -+ -+/* -+** uzp1_s32_tied2: -+** uzp1 z0\.s, z1\.s, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1_s32_tied2, svint32_t, -+ z0 = svuzp1_s32 (z1, z0), -+ z0 = svuzp1 (z1, z0)) -+ -+/* -+** uzp1_s32_untied: -+** uzp1 z0\.s, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1_s32_untied, svint32_t, -+ z0 = svuzp1_s32 (z1, z2), -+ z0 = svuzp1 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_s64.c -new file mode 100644 -index 000000000..e8f7799f6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_s64.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp1_s64_tied1: -+** uzp1 z0\.d, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1_s64_tied1, svint64_t, -+ z0 = svuzp1_s64 (z0, z1), -+ z0 = svuzp1 (z0, z1)) -+ -+/* -+** uzp1_s64_tied2: -+** uzp1 z0\.d, z1\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1_s64_tied2, svint64_t, -+ z0 = svuzp1_s64 (z1, z0), -+ z0 = svuzp1 (z1, z0)) -+ -+/* -+** uzp1_s64_untied: -+** uzp1 z0\.d, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1_s64_untied, svint64_t, -+ z0 = svuzp1_s64 (z1, z2), -+ z0 = svuzp1 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_s8.c -new file mode 100644 -index 000000000..98464b790 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_s8.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp1_s8_tied1: -+** uzp1 z0\.b, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1_s8_tied1, svint8_t, -+ z0 = svuzp1_s8 (z0, z1), -+ z0 = svuzp1 (z0, z1)) -+ -+/* -+** uzp1_s8_tied2: -+** uzp1 z0\.b, z1\.b, z0\.b -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1_s8_tied2, svint8_t, -+ z0 = svuzp1_s8 (z1, z0), -+ z0 = svuzp1 (z1, z0)) -+ -+/* -+** uzp1_s8_untied: -+** uzp1 z0\.b, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1_s8_untied, svint8_t, -+ z0 = svuzp1_s8 (z1, z2), -+ z0 = svuzp1 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_u16.c -new file mode 100644 -index 000000000..da95171fe ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_u16.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp1_u16_tied1: -+** uzp1 z0\.h, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1_u16_tied1, svuint16_t, -+ z0 = svuzp1_u16 (z0, z1), -+ z0 = svuzp1 (z0, z1)) -+ -+/* -+** uzp1_u16_tied2: -+** uzp1 z0\.h, z1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1_u16_tied2, svuint16_t, -+ z0 = svuzp1_u16 (z1, z0), -+ z0 = svuzp1 (z1, z0)) -+ -+/* -+** uzp1_u16_untied: -+** uzp1 z0\.h, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1_u16_untied, svuint16_t, -+ z0 = svuzp1_u16 (z1, z2), -+ z0 = svuzp1 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_u32.c -new file mode 100644 -index 000000000..a57cdcc06 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_u32.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp1_u32_tied1: -+** uzp1 z0\.s, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1_u32_tied1, svuint32_t, -+ z0 = svuzp1_u32 (z0, z1), -+ z0 = svuzp1 (z0, z1)) -+ -+/* -+** uzp1_u32_tied2: -+** uzp1 z0\.s, z1\.s, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1_u32_tied2, svuint32_t, -+ z0 = svuzp1_u32 (z1, z0), -+ z0 = svuzp1 (z1, z0)) -+ -+/* -+** uzp1_u32_untied: -+** uzp1 z0\.s, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1_u32_untied, svuint32_t, -+ z0 = svuzp1_u32 (z1, z2), -+ z0 = svuzp1 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_u64.c -new file mode 100644 -index 000000000..24d820359 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_u64.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp1_u64_tied1: -+** uzp1 z0\.d, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1_u64_tied1, svuint64_t, -+ z0 = svuzp1_u64 (z0, z1), -+ z0 = svuzp1 (z0, z1)) -+ -+/* -+** uzp1_u64_tied2: -+** uzp1 z0\.d, z1\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1_u64_tied2, svuint64_t, -+ z0 = svuzp1_u64 (z1, z0), -+ z0 = svuzp1 (z1, z0)) -+ -+/* -+** uzp1_u64_untied: -+** uzp1 z0\.d, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1_u64_untied, svuint64_t, -+ z0 = svuzp1_u64 (z1, z2), -+ z0 = svuzp1 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_u8.c -new file mode 100644 -index 000000000..359d4c5f8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1_u8.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp1_u8_tied1: -+** uzp1 z0\.b, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1_u8_tied1, svuint8_t, -+ z0 = svuzp1_u8 (z0, z1), -+ z0 = svuzp1 (z0, z1)) -+ -+/* -+** uzp1_u8_tied2: -+** uzp1 z0\.b, z1\.b, z0\.b -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1_u8_tied2, svuint8_t, -+ z0 = svuzp1_u8 (z1, z0), -+ z0 = svuzp1 (z1, z0)) -+ -+/* -+** uzp1_u8_untied: -+** uzp1 z0\.b, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1_u8_untied, svuint8_t, -+ z0 = svuzp1_u8 (z1, z2), -+ z0 = svuzp1 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_bf16.c -new file mode 100644 -index 000000000..30a199241 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_bf16.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp1q_bf16_tied1: -+** uzp1 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1q_bf16_tied1, svbfloat16_t, -+ z0 = svuzp1q_bf16 (z0, z1), -+ z0 = svuzp1q (z0, z1)) -+ -+/* -+** uzp1q_bf16_tied2: -+** uzp1 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1q_bf16_tied2, svbfloat16_t, -+ z0 = svuzp1q_bf16 (z1, z0), -+ z0 = svuzp1q (z1, z0)) -+ -+/* -+** uzp1q_bf16_untied: -+** uzp1 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1q_bf16_untied, svbfloat16_t, -+ z0 = svuzp1q_bf16 (z1, z2), -+ z0 = svuzp1q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_f16.c -new file mode 100644 -index 000000000..c11e5bdc4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_f16.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp1q_f16_tied1: -+** uzp1 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1q_f16_tied1, svfloat16_t, -+ z0 = svuzp1q_f16 (z0, z1), -+ z0 = svuzp1q (z0, z1)) -+ -+/* -+** uzp1q_f16_tied2: -+** uzp1 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1q_f16_tied2, svfloat16_t, -+ z0 = svuzp1q_f16 (z1, z0), -+ z0 = svuzp1q (z1, z0)) -+ -+/* -+** uzp1q_f16_untied: -+** uzp1 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1q_f16_untied, svfloat16_t, -+ z0 = svuzp1q_f16 (z1, z2), -+ z0 = svuzp1q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_f32.c -new file mode 100644 -index 000000000..d0ac94543 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_f32.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp1q_f32_tied1: -+** uzp1 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1q_f32_tied1, svfloat32_t, -+ z0 = svuzp1q_f32 (z0, z1), -+ z0 = svuzp1q (z0, z1)) -+ -+/* -+** uzp1q_f32_tied2: -+** uzp1 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1q_f32_tied2, svfloat32_t, -+ z0 = svuzp1q_f32 (z1, z0), -+ z0 = svuzp1q (z1, z0)) -+ -+/* -+** uzp1q_f32_untied: -+** uzp1 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1q_f32_untied, svfloat32_t, -+ z0 = svuzp1q_f32 (z1, z2), -+ z0 = svuzp1q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_f64.c -new file mode 100644 -index 000000000..ac2e5c5cf ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_f64.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp1q_f64_tied1: -+** uzp1 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1q_f64_tied1, svfloat64_t, -+ z0 = svuzp1q_f64 (z0, z1), -+ z0 = svuzp1q (z0, z1)) -+ -+/* -+** uzp1q_f64_tied2: -+** uzp1 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1q_f64_tied2, svfloat64_t, -+ z0 = svuzp1q_f64 (z1, z0), -+ z0 = svuzp1q (z1, z0)) -+ -+/* -+** uzp1q_f64_untied: -+** uzp1 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1q_f64_untied, svfloat64_t, -+ z0 = svuzp1q_f64 (z1, z2), -+ z0 = svuzp1q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_s16.c -new file mode 100644 -index 000000000..aa200b24e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_s16.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp1q_s16_tied1: -+** uzp1 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1q_s16_tied1, svint16_t, -+ z0 = svuzp1q_s16 (z0, z1), -+ z0 = svuzp1q (z0, z1)) -+ -+/* -+** uzp1q_s16_tied2: -+** uzp1 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1q_s16_tied2, svint16_t, -+ z0 = svuzp1q_s16 (z1, z0), -+ z0 = svuzp1q (z1, z0)) -+ -+/* -+** uzp1q_s16_untied: -+** uzp1 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1q_s16_untied, svint16_t, -+ z0 = svuzp1q_s16 (z1, z2), -+ z0 = svuzp1q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_s32.c -new file mode 100644 -index 000000000..eb849df74 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_s32.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp1q_s32_tied1: -+** uzp1 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1q_s32_tied1, svint32_t, -+ z0 = svuzp1q_s32 (z0, z1), -+ z0 = svuzp1q (z0, z1)) -+ -+/* -+** uzp1q_s32_tied2: -+** uzp1 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1q_s32_tied2, svint32_t, -+ z0 = svuzp1q_s32 (z1, z0), -+ z0 = svuzp1q (z1, z0)) -+ -+/* -+** uzp1q_s32_untied: -+** uzp1 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1q_s32_untied, svint32_t, -+ z0 = svuzp1q_s32 (z1, z2), -+ z0 = svuzp1q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_s64.c -new file mode 100644 -index 000000000..e1049761c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_s64.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp1q_s64_tied1: -+** uzp1 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1q_s64_tied1, svint64_t, -+ z0 = svuzp1q_s64 (z0, z1), -+ z0 = svuzp1q (z0, z1)) -+ -+/* -+** uzp1q_s64_tied2: -+** uzp1 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1q_s64_tied2, svint64_t, -+ z0 = svuzp1q_s64 (z1, z0), -+ z0 = svuzp1q (z1, z0)) -+ -+/* -+** uzp1q_s64_untied: -+** uzp1 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1q_s64_untied, svint64_t, -+ z0 = svuzp1q_s64 (z1, z2), -+ z0 = svuzp1q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_s8.c -new file mode 100644 -index 000000000..8aa592199 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_s8.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp1q_s8_tied1: -+** uzp1 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1q_s8_tied1, svint8_t, -+ z0 = svuzp1q_s8 (z0, z1), -+ z0 = svuzp1q (z0, z1)) -+ -+/* -+** uzp1q_s8_tied2: -+** uzp1 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1q_s8_tied2, svint8_t, -+ z0 = svuzp1q_s8 (z1, z0), -+ z0 = svuzp1q (z1, z0)) -+ -+/* -+** uzp1q_s8_untied: -+** uzp1 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1q_s8_untied, svint8_t, -+ z0 = svuzp1q_s8 (z1, z2), -+ z0 = svuzp1q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_u16.c -new file mode 100644 -index 000000000..00ffaab06 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_u16.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp1q_u16_tied1: -+** uzp1 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1q_u16_tied1, svuint16_t, -+ z0 = svuzp1q_u16 (z0, z1), -+ z0 = svuzp1q (z0, z1)) -+ -+/* -+** uzp1q_u16_tied2: -+** uzp1 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1q_u16_tied2, svuint16_t, -+ z0 = svuzp1q_u16 (z1, z0), -+ z0 = svuzp1q (z1, z0)) -+ -+/* -+** uzp1q_u16_untied: -+** uzp1 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1q_u16_untied, svuint16_t, -+ z0 = svuzp1q_u16 (z1, z2), -+ z0 = svuzp1q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_u32.c -new file mode 100644 -index 000000000..cd2e4db26 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_u32.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp1q_u32_tied1: -+** uzp1 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1q_u32_tied1, svuint32_t, -+ z0 = svuzp1q_u32 (z0, z1), -+ z0 = svuzp1q (z0, z1)) -+ -+/* -+** uzp1q_u32_tied2: -+** uzp1 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1q_u32_tied2, svuint32_t, -+ z0 = svuzp1q_u32 (z1, z0), -+ z0 = svuzp1q (z1, z0)) -+ -+/* -+** uzp1q_u32_untied: -+** uzp1 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1q_u32_untied, svuint32_t, -+ z0 = svuzp1q_u32 (z1, z2), -+ z0 = svuzp1q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_u64.c -new file mode 100644 -index 000000000..7d8823329 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_u64.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp1q_u64_tied1: -+** uzp1 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1q_u64_tied1, svuint64_t, -+ z0 = svuzp1q_u64 (z0, z1), -+ z0 = svuzp1q (z0, z1)) -+ -+/* -+** uzp1q_u64_tied2: -+** uzp1 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1q_u64_tied2, svuint64_t, -+ z0 = svuzp1q_u64 (z1, z0), -+ z0 = svuzp1q (z1, z0)) -+ -+/* -+** uzp1q_u64_untied: -+** uzp1 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1q_u64_untied, svuint64_t, -+ z0 = svuzp1q_u64 (z1, z2), -+ z0 = svuzp1q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_u8.c -new file mode 100644 -index 000000000..701a1d575 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp1q_u8.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp1q_u8_tied1: -+** uzp1 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1q_u8_tied1, svuint8_t, -+ z0 = svuzp1q_u8 (z0, z1), -+ z0 = svuzp1q (z0, z1)) -+ -+/* -+** uzp1q_u8_tied2: -+** uzp1 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1q_u8_tied2, svuint8_t, -+ z0 = svuzp1q_u8 (z1, z0), -+ z0 = svuzp1q (z1, z0)) -+ -+/* -+** uzp1q_u8_untied: -+** uzp1 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp1q_u8_untied, svuint8_t, -+ z0 = svuzp1q_u8 (z1, z2), -+ z0 = svuzp1q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_b16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_b16.c -new file mode 100644 -index 000000000..c3a91e7fc ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_b16.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp2_b16_tied1: -+** uzp2 p0\.h, p0\.h, p1\.h -+** ret -+*/ -+TEST_UNIFORM_P (uzp2_b16_tied1, -+ p0 = svuzp2_b16 (p0, p1), -+ p0 = svuzp2_b16 (p0, p1)) -+ -+/* -+** uzp2_b16_tied2: -+** uzp2 p0\.h, p1\.h, p0\.h -+** ret -+*/ -+TEST_UNIFORM_P (uzp2_b16_tied2, -+ p0 = svuzp2_b16 (p1, p0), -+ p0 = svuzp2_b16 (p1, p0)) -+ -+/* -+** uzp2_b16_untied: -+** uzp2 p0\.h, p1\.h, p2\.h -+** ret -+*/ -+TEST_UNIFORM_P (uzp2_b16_untied, -+ p0 = svuzp2_b16 (p1, p2), -+ p0 = svuzp2_b16 (p1, p2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_b32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_b32.c -new file mode 100644 -index 000000000..e3294a6f3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_b32.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp2_b32_tied1: -+** uzp2 p0\.s, p0\.s, p1\.s -+** ret -+*/ -+TEST_UNIFORM_P (uzp2_b32_tied1, -+ p0 = svuzp2_b32 (p0, p1), -+ p0 = svuzp2_b32 (p0, p1)) -+ -+/* -+** uzp2_b32_tied2: -+** uzp2 p0\.s, p1\.s, p0\.s -+** ret -+*/ -+TEST_UNIFORM_P (uzp2_b32_tied2, -+ p0 = svuzp2_b32 (p1, p0), -+ p0 = svuzp2_b32 (p1, p0)) -+ -+/* -+** uzp2_b32_untied: -+** uzp2 p0\.s, p1\.s, p2\.s -+** ret -+*/ -+TEST_UNIFORM_P (uzp2_b32_untied, -+ p0 = svuzp2_b32 (p1, p2), -+ p0 = svuzp2_b32 (p1, p2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_b64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_b64.c -new file mode 100644 -index 000000000..3ae72e10c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_b64.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp2_b64_tied1: -+** uzp2 p0\.d, p0\.d, p1\.d -+** ret -+*/ -+TEST_UNIFORM_P (uzp2_b64_tied1, -+ p0 = svuzp2_b64 (p0, p1), -+ p0 = svuzp2_b64 (p0, p1)) -+ -+/* -+** uzp2_b64_tied2: -+** uzp2 p0\.d, p1\.d, p0\.d -+** ret -+*/ -+TEST_UNIFORM_P (uzp2_b64_tied2, -+ p0 = svuzp2_b64 (p1, p0), -+ p0 = svuzp2_b64 (p1, p0)) -+ -+/* -+** uzp2_b64_untied: -+** uzp2 p0\.d, p1\.d, p2\.d -+** ret -+*/ -+TEST_UNIFORM_P (uzp2_b64_untied, -+ p0 = svuzp2_b64 (p1, p2), -+ p0 = svuzp2_b64 (p1, p2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_b8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_b8.c -new file mode 100644 -index 000000000..726a9a079 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_b8.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp2_b8_tied1: -+** uzp2 p0\.b, p0\.b, p1\.b -+** ret -+*/ -+TEST_UNIFORM_P (uzp2_b8_tied1, -+ p0 = svuzp2_b8 (p0, p1), -+ p0 = svuzp2_b8 (p0, p1)) -+ -+/* -+** uzp2_b8_tied2: -+** uzp2 p0\.b, p1\.b, p0\.b -+** ret -+*/ -+TEST_UNIFORM_P (uzp2_b8_tied2, -+ p0 = svuzp2_b8 (p1, p0), -+ p0 = svuzp2_b8 (p1, p0)) -+ -+/* -+** uzp2_b8_untied: -+** uzp2 p0\.b, p1\.b, p2\.b -+** ret -+*/ -+TEST_UNIFORM_P (uzp2_b8_untied, -+ p0 = svuzp2_b8 (p1, p2), -+ p0 = svuzp2_b8 (p1, p2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_bf16.c -new file mode 100644 -index 000000000..b5566bfdf ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_bf16.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp2_bf16_tied1: -+** uzp2 z0\.h, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2_bf16_tied1, svbfloat16_t, -+ z0 = svuzp2_bf16 (z0, z1), -+ z0 = svuzp2 (z0, z1)) -+ -+/* -+** uzp2_bf16_tied2: -+** uzp2 z0\.h, z1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2_bf16_tied2, svbfloat16_t, -+ z0 = svuzp2_bf16 (z1, z0), -+ z0 = svuzp2 (z1, z0)) -+ -+/* -+** uzp2_bf16_untied: -+** uzp2 z0\.h, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2_bf16_untied, svbfloat16_t, -+ z0 = svuzp2_bf16 (z1, z2), -+ z0 = svuzp2 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_f16.c -new file mode 100644 -index 000000000..d4847ef37 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_f16.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp2_f16_tied1: -+** uzp2 z0\.h, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2_f16_tied1, svfloat16_t, -+ z0 = svuzp2_f16 (z0, z1), -+ z0 = svuzp2 (z0, z1)) -+ -+/* -+** uzp2_f16_tied2: -+** uzp2 z0\.h, z1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2_f16_tied2, svfloat16_t, -+ z0 = svuzp2_f16 (z1, z0), -+ z0 = svuzp2 (z1, z0)) -+ -+/* -+** uzp2_f16_untied: -+** uzp2 z0\.h, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2_f16_untied, svfloat16_t, -+ z0 = svuzp2_f16 (z1, z2), -+ z0 = svuzp2 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_f32.c -new file mode 100644 -index 000000000..c1699fc9c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_f32.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp2_f32_tied1: -+** uzp2 z0\.s, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2_f32_tied1, svfloat32_t, -+ z0 = svuzp2_f32 (z0, z1), -+ z0 = svuzp2 (z0, z1)) -+ -+/* -+** uzp2_f32_tied2: -+** uzp2 z0\.s, z1\.s, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2_f32_tied2, svfloat32_t, -+ z0 = svuzp2_f32 (z1, z0), -+ z0 = svuzp2 (z1, z0)) -+ -+/* -+** uzp2_f32_untied: -+** uzp2 z0\.s, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2_f32_untied, svfloat32_t, -+ z0 = svuzp2_f32 (z1, z2), -+ z0 = svuzp2 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_f64.c -new file mode 100644 -index 000000000..afbf5c11a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_f64.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp2_f64_tied1: -+** uzp2 z0\.d, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2_f64_tied1, svfloat64_t, -+ z0 = svuzp2_f64 (z0, z1), -+ z0 = svuzp2 (z0, z1)) -+ -+/* -+** uzp2_f64_tied2: -+** uzp2 z0\.d, z1\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2_f64_tied2, svfloat64_t, -+ z0 = svuzp2_f64 (z1, z0), -+ z0 = svuzp2 (z1, z0)) -+ -+/* -+** uzp2_f64_untied: -+** uzp2 z0\.d, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2_f64_untied, svfloat64_t, -+ z0 = svuzp2_f64 (z1, z2), -+ z0 = svuzp2 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_s16.c -new file mode 100644 -index 000000000..e88df8734 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_s16.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp2_s16_tied1: -+** uzp2 z0\.h, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2_s16_tied1, svint16_t, -+ z0 = svuzp2_s16 (z0, z1), -+ z0 = svuzp2 (z0, z1)) -+ -+/* -+** uzp2_s16_tied2: -+** uzp2 z0\.h, z1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2_s16_tied2, svint16_t, -+ z0 = svuzp2_s16 (z1, z0), -+ z0 = svuzp2 (z1, z0)) -+ -+/* -+** uzp2_s16_untied: -+** uzp2 z0\.h, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2_s16_untied, svint16_t, -+ z0 = svuzp2_s16 (z1, z2), -+ z0 = svuzp2 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_s32.c -new file mode 100644 -index 000000000..2e9a73d1f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_s32.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp2_s32_tied1: -+** uzp2 z0\.s, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2_s32_tied1, svint32_t, -+ z0 = svuzp2_s32 (z0, z1), -+ z0 = svuzp2 (z0, z1)) -+ -+/* -+** uzp2_s32_tied2: -+** uzp2 z0\.s, z1\.s, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2_s32_tied2, svint32_t, -+ z0 = svuzp2_s32 (z1, z0), -+ z0 = svuzp2 (z1, z0)) -+ -+/* -+** uzp2_s32_untied: -+** uzp2 z0\.s, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2_s32_untied, svint32_t, -+ z0 = svuzp2_s32 (z1, z2), -+ z0 = svuzp2 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_s64.c -new file mode 100644 -index 000000000..ffec78ccc ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_s64.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp2_s64_tied1: -+** uzp2 z0\.d, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2_s64_tied1, svint64_t, -+ z0 = svuzp2_s64 (z0, z1), -+ z0 = svuzp2 (z0, z1)) -+ -+/* -+** uzp2_s64_tied2: -+** uzp2 z0\.d, z1\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2_s64_tied2, svint64_t, -+ z0 = svuzp2_s64 (z1, z0), -+ z0 = svuzp2 (z1, z0)) -+ -+/* -+** uzp2_s64_untied: -+** uzp2 z0\.d, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2_s64_untied, svint64_t, -+ z0 = svuzp2_s64 (z1, z2), -+ z0 = svuzp2 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_s8.c -new file mode 100644 -index 000000000..72037a088 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_s8.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp2_s8_tied1: -+** uzp2 z0\.b, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2_s8_tied1, svint8_t, -+ z0 = svuzp2_s8 (z0, z1), -+ z0 = svuzp2 (z0, z1)) -+ -+/* -+** uzp2_s8_tied2: -+** uzp2 z0\.b, z1\.b, z0\.b -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2_s8_tied2, svint8_t, -+ z0 = svuzp2_s8 (z1, z0), -+ z0 = svuzp2 (z1, z0)) -+ -+/* -+** uzp2_s8_untied: -+** uzp2 z0\.b, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2_s8_untied, svint8_t, -+ z0 = svuzp2_s8 (z1, z2), -+ z0 = svuzp2 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_u16.c -new file mode 100644 -index 000000000..d84f8c9ed ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_u16.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp2_u16_tied1: -+** uzp2 z0\.h, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2_u16_tied1, svuint16_t, -+ z0 = svuzp2_u16 (z0, z1), -+ z0 = svuzp2 (z0, z1)) -+ -+/* -+** uzp2_u16_tied2: -+** uzp2 z0\.h, z1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2_u16_tied2, svuint16_t, -+ z0 = svuzp2_u16 (z1, z0), -+ z0 = svuzp2 (z1, z0)) -+ -+/* -+** uzp2_u16_untied: -+** uzp2 z0\.h, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2_u16_untied, svuint16_t, -+ z0 = svuzp2_u16 (z1, z2), -+ z0 = svuzp2 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_u32.c -new file mode 100644 -index 000000000..0285ff91f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_u32.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp2_u32_tied1: -+** uzp2 z0\.s, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2_u32_tied1, svuint32_t, -+ z0 = svuzp2_u32 (z0, z1), -+ z0 = svuzp2 (z0, z1)) -+ -+/* -+** uzp2_u32_tied2: -+** uzp2 z0\.s, z1\.s, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2_u32_tied2, svuint32_t, -+ z0 = svuzp2_u32 (z1, z0), -+ z0 = svuzp2 (z1, z0)) -+ -+/* -+** uzp2_u32_untied: -+** uzp2 z0\.s, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2_u32_untied, svuint32_t, -+ z0 = svuzp2_u32 (z1, z2), -+ z0 = svuzp2 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_u64.c -new file mode 100644 -index 000000000..1b51baf90 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_u64.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp2_u64_tied1: -+** uzp2 z0\.d, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2_u64_tied1, svuint64_t, -+ z0 = svuzp2_u64 (z0, z1), -+ z0 = svuzp2 (z0, z1)) -+ -+/* -+** uzp2_u64_tied2: -+** uzp2 z0\.d, z1\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2_u64_tied2, svuint64_t, -+ z0 = svuzp2_u64 (z1, z0), -+ z0 = svuzp2 (z1, z0)) -+ -+/* -+** uzp2_u64_untied: -+** uzp2 z0\.d, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2_u64_untied, svuint64_t, -+ z0 = svuzp2_u64 (z1, z2), -+ z0 = svuzp2 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_u8.c -new file mode 100644 -index 000000000..662e0b818 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2_u8.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp2_u8_tied1: -+** uzp2 z0\.b, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2_u8_tied1, svuint8_t, -+ z0 = svuzp2_u8 (z0, z1), -+ z0 = svuzp2 (z0, z1)) -+ -+/* -+** uzp2_u8_tied2: -+** uzp2 z0\.b, z1\.b, z0\.b -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2_u8_tied2, svuint8_t, -+ z0 = svuzp2_u8 (z1, z0), -+ z0 = svuzp2 (z1, z0)) -+ -+/* -+** uzp2_u8_untied: -+** uzp2 z0\.b, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2_u8_untied, svuint8_t, -+ z0 = svuzp2_u8 (z1, z2), -+ z0 = svuzp2 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_bf16.c -new file mode 100644 -index 000000000..bbac53a7a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_bf16.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp2q_bf16_tied1: -+** uzp2 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2q_bf16_tied1, svbfloat16_t, -+ z0 = svuzp2q_bf16 (z0, z1), -+ z0 = svuzp2q (z0, z1)) -+ -+/* -+** uzp2q_bf16_tied2: -+** uzp2 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2q_bf16_tied2, svbfloat16_t, -+ z0 = svuzp2q_bf16 (z1, z0), -+ z0 = svuzp2q (z1, z0)) -+ -+/* -+** uzp2q_bf16_untied: -+** uzp2 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2q_bf16_untied, svbfloat16_t, -+ z0 = svuzp2q_bf16 (z1, z2), -+ z0 = svuzp2q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_f16.c -new file mode 100644 -index 000000000..e19d118fb ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_f16.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp2q_f16_tied1: -+** uzp2 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2q_f16_tied1, svfloat16_t, -+ z0 = svuzp2q_f16 (z0, z1), -+ z0 = svuzp2q (z0, z1)) -+ -+/* -+** uzp2q_f16_tied2: -+** uzp2 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2q_f16_tied2, svfloat16_t, -+ z0 = svuzp2q_f16 (z1, z0), -+ z0 = svuzp2q (z1, z0)) -+ -+/* -+** uzp2q_f16_untied: -+** uzp2 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2q_f16_untied, svfloat16_t, -+ z0 = svuzp2q_f16 (z1, z2), -+ z0 = svuzp2q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_f32.c -new file mode 100644 -index 000000000..af7112b15 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_f32.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp2q_f32_tied1: -+** uzp2 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2q_f32_tied1, svfloat32_t, -+ z0 = svuzp2q_f32 (z0, z1), -+ z0 = svuzp2q (z0, z1)) -+ -+/* -+** uzp2q_f32_tied2: -+** uzp2 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2q_f32_tied2, svfloat32_t, -+ z0 = svuzp2q_f32 (z1, z0), -+ z0 = svuzp2q (z1, z0)) -+ -+/* -+** uzp2q_f32_untied: -+** uzp2 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2q_f32_untied, svfloat32_t, -+ z0 = svuzp2q_f32 (z1, z2), -+ z0 = svuzp2q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_f64.c -new file mode 100644 -index 000000000..4109b843c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_f64.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp2q_f64_tied1: -+** uzp2 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2q_f64_tied1, svfloat64_t, -+ z0 = svuzp2q_f64 (z0, z1), -+ z0 = svuzp2q (z0, z1)) -+ -+/* -+** uzp2q_f64_tied2: -+** uzp2 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2q_f64_tied2, svfloat64_t, -+ z0 = svuzp2q_f64 (z1, z0), -+ z0 = svuzp2q (z1, z0)) -+ -+/* -+** uzp2q_f64_untied: -+** uzp2 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2q_f64_untied, svfloat64_t, -+ z0 = svuzp2q_f64 (z1, z2), -+ z0 = svuzp2q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_s16.c -new file mode 100644 -index 000000000..0c6ab25cf ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_s16.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp2q_s16_tied1: -+** uzp2 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2q_s16_tied1, svint16_t, -+ z0 = svuzp2q_s16 (z0, z1), -+ z0 = svuzp2q (z0, z1)) -+ -+/* -+** uzp2q_s16_tied2: -+** uzp2 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2q_s16_tied2, svint16_t, -+ z0 = svuzp2q_s16 (z1, z0), -+ z0 = svuzp2q (z1, z0)) -+ -+/* -+** uzp2q_s16_untied: -+** uzp2 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2q_s16_untied, svint16_t, -+ z0 = svuzp2q_s16 (z1, z2), -+ z0 = svuzp2q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_s32.c -new file mode 100644 -index 000000000..9b914e704 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_s32.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp2q_s32_tied1: -+** uzp2 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2q_s32_tied1, svint32_t, -+ z0 = svuzp2q_s32 (z0, z1), -+ z0 = svuzp2q (z0, z1)) -+ -+/* -+** uzp2q_s32_tied2: -+** uzp2 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2q_s32_tied2, svint32_t, -+ z0 = svuzp2q_s32 (z1, z0), -+ z0 = svuzp2q (z1, z0)) -+ -+/* -+** uzp2q_s32_untied: -+** uzp2 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2q_s32_untied, svint32_t, -+ z0 = svuzp2q_s32 (z1, z2), -+ z0 = svuzp2q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_s64.c -new file mode 100644 -index 000000000..697e37d78 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_s64.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp2q_s64_tied1: -+** uzp2 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2q_s64_tied1, svint64_t, -+ z0 = svuzp2q_s64 (z0, z1), -+ z0 = svuzp2q (z0, z1)) -+ -+/* -+** uzp2q_s64_tied2: -+** uzp2 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2q_s64_tied2, svint64_t, -+ z0 = svuzp2q_s64 (z1, z0), -+ z0 = svuzp2q (z1, z0)) -+ -+/* -+** uzp2q_s64_untied: -+** uzp2 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2q_s64_untied, svint64_t, -+ z0 = svuzp2q_s64 (z1, z2), -+ z0 = svuzp2q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_s8.c -new file mode 100644 -index 000000000..576262c5d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_s8.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp2q_s8_tied1: -+** uzp2 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2q_s8_tied1, svint8_t, -+ z0 = svuzp2q_s8 (z0, z1), -+ z0 = svuzp2q (z0, z1)) -+ -+/* -+** uzp2q_s8_tied2: -+** uzp2 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2q_s8_tied2, svint8_t, -+ z0 = svuzp2q_s8 (z1, z0), -+ z0 = svuzp2q (z1, z0)) -+ -+/* -+** uzp2q_s8_untied: -+** uzp2 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2q_s8_untied, svint8_t, -+ z0 = svuzp2q_s8 (z1, z2), -+ z0 = svuzp2q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_u16.c -new file mode 100644 -index 000000000..f2debc28f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_u16.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp2q_u16_tied1: -+** uzp2 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2q_u16_tied1, svuint16_t, -+ z0 = svuzp2q_u16 (z0, z1), -+ z0 = svuzp2q (z0, z1)) -+ -+/* -+** uzp2q_u16_tied2: -+** uzp2 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2q_u16_tied2, svuint16_t, -+ z0 = svuzp2q_u16 (z1, z0), -+ z0 = svuzp2q (z1, z0)) -+ -+/* -+** uzp2q_u16_untied: -+** uzp2 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2q_u16_untied, svuint16_t, -+ z0 = svuzp2q_u16 (z1, z2), -+ z0 = svuzp2q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_u32.c -new file mode 100644 -index 000000000..ad6a4bcc0 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_u32.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp2q_u32_tied1: -+** uzp2 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2q_u32_tied1, svuint32_t, -+ z0 = svuzp2q_u32 (z0, z1), -+ z0 = svuzp2q (z0, z1)) -+ -+/* -+** uzp2q_u32_tied2: -+** uzp2 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2q_u32_tied2, svuint32_t, -+ z0 = svuzp2q_u32 (z1, z0), -+ z0 = svuzp2q (z1, z0)) -+ -+/* -+** uzp2q_u32_untied: -+** uzp2 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2q_u32_untied, svuint32_t, -+ z0 = svuzp2q_u32 (z1, z2), -+ z0 = svuzp2q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_u64.c -new file mode 100644 -index 000000000..a846aa295 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_u64.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp2q_u64_tied1: -+** uzp2 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2q_u64_tied1, svuint64_t, -+ z0 = svuzp2q_u64 (z0, z1), -+ z0 = svuzp2q (z0, z1)) -+ -+/* -+** uzp2q_u64_tied2: -+** uzp2 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2q_u64_tied2, svuint64_t, -+ z0 = svuzp2q_u64 (z1, z0), -+ z0 = svuzp2q (z1, z0)) -+ -+/* -+** uzp2q_u64_untied: -+** uzp2 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2q_u64_untied, svuint64_t, -+ z0 = svuzp2q_u64 (z1, z2), -+ z0 = svuzp2q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_u8.c -new file mode 100644 -index 000000000..163c22659 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/uzp2q_u8.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** uzp2q_u8_tied1: -+** uzp2 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2q_u8_tied1, svuint8_t, -+ z0 = svuzp2q_u8 (z0, z1), -+ z0 = svuzp2q (z0, z1)) -+ -+/* -+** uzp2q_u8_tied2: -+** uzp2 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2q_u8_tied2, svuint8_t, -+ z0 = svuzp2q_u8 (z1, z0), -+ z0 = svuzp2q (z1, z0)) -+ -+/* -+** uzp2q_u8_untied: -+** uzp2 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (uzp2q_u8_untied, svuint8_t, -+ z0 = svuzp2q_u8 (z1, z2), -+ z0 = svuzp2q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/whilele_b16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/whilele_b16.c -new file mode 100644 -index 000000000..c285a7a73 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/whilele_b16.c -@@ -0,0 +1,173 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** whilele_rr_b16_s32: -+** whilele p0\.h, w0, w1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_rr_b16_s32, int32_t, -+ p0 = svwhilele_b16_s32 (x0, x1), -+ p0 = svwhilele_b16 (x0, x1)) -+ -+/* -+** whilele_0r_b16_s32: -+** whilele p0\.h, wzr, w1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_0r_b16_s32, int32_t, -+ p0 = svwhilele_b16_s32 (0, x1), -+ p0 = svwhilele_b16 (0, x1)) -+ -+/* -+** whilele_5r_b16_s32: -+** mov (w[0-9]+), #?5 -+** whilele p0\.h, \1, w1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_5r_b16_s32, int32_t, -+ p0 = svwhilele_b16_s32 (5, x1), -+ p0 = svwhilele_b16 (5, x1)) -+ -+/* -+** whilele_r0_b16_s32: -+** whilele p0\.h, w0, wzr -+** ret -+*/ -+TEST_COMPARE_S (whilele_r0_b16_s32, int32_t, -+ p0 = svwhilele_b16_s32 (x0, 0), -+ p0 = svwhilele_b16 (x0, 0)) -+ -+/* -+** whilele_r5_b16_s32: -+** mov (w[0-9]+), #?5 -+** whilele p0\.h, w0, \1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_r5_b16_s32, int32_t, -+ p0 = svwhilele_b16_s32 (x0, 5), -+ p0 = svwhilele_b16 (x0, 5)) -+ -+/* -+** whilele_rr_b16_s64: -+** whilele p0\.h, x0, x1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_rr_b16_s64, int64_t, -+ p0 = svwhilele_b16_s64 (x0, x1), -+ p0 = svwhilele_b16 (x0, x1)) -+ -+/* -+** whilele_0r_b16_s64: -+** whilele p0\.h, xzr, x1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_0r_b16_s64, int64_t, -+ p0 = svwhilele_b16_s64 (0, x1), -+ p0 = svwhilele_b16 ((int64_t) 0, x1)) -+ -+/* -+** whilele_5r_b16_s64: -+** mov (x[0-9]+), #?5 -+** whilele p0\.h, \1, x1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_5r_b16_s64, int64_t, -+ p0 = svwhilele_b16_s64 (5, x1), -+ p0 = svwhilele_b16 ((int64_t) 5, x1)) -+ -+/* -+** whilele_r0_b16_s64: -+** whilele p0\.h, x0, xzr -+** ret -+*/ -+TEST_COMPARE_S (whilele_r0_b16_s64, int64_t, -+ p0 = svwhilele_b16_s64 (x0, 0), -+ p0 = svwhilele_b16 (x0, (int64_t) 0)) -+ -+/* -+** whilele_r5_b16_s64: -+** mov (x[0-9]+), #?5 -+** whilele p0\.h, x0, \1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_r5_b16_s64, int64_t, -+ p0 = svwhilele_b16_s64 (x0, 5), -+ p0 = svwhilele_b16 (x0, (int64_t) 5)) -+ -+/* -+** whilele_rr_b16_u32: -+** whilels p0\.h, w0, w1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_rr_b16_u32, uint32_t, -+ p0 = svwhilele_b16_u32 (x0, x1), -+ p0 = svwhilele_b16 (x0, x1)) -+ -+/* -+** whilele_0r_b16_u32: -+** whilels p0\.h, wzr, w1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_0r_b16_u32, uint32_t, -+ p0 = svwhilele_b16_u32 (0, x1), -+ p0 = svwhilele_b16 ((uint32_t) 0, x1)) -+ -+/* -+** whilele_5r_b16_u32: -+** mov (w[0-9]+), #?5 -+** whilels p0\.h, \1, w1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_5r_b16_u32, uint32_t, -+ p0 = svwhilele_b16_u32 (5, x1), -+ p0 = svwhilele_b16 ((uint32_t) 5, x1)) -+ -+/* -+** whilele_r5_b16_u32: -+** mov (w[0-9]+), #?5 -+** whilels p0\.h, w0, \1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_r5_b16_u32, uint32_t, -+ p0 = svwhilele_b16_u32 (x0, 5), -+ p0 = svwhilele_b16 (x0, (uint32_t) 5)) -+ -+/* -+** whilele_rr_b16_u64: -+** whilels p0\.h, x0, x1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_rr_b16_u64, uint64_t, -+ p0 = svwhilele_b16_u64 (x0, x1), -+ p0 = svwhilele_b16 (x0, x1)) -+ -+/* -+** whilele_0r_b16_u64: -+** whilels p0\.h, xzr, x1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_0r_b16_u64, uint64_t, -+ p0 = svwhilele_b16_u64 (0, x1), -+ p0 = svwhilele_b16 ((uint64_t) 0, x1)) -+ -+/* -+** whilele_5r_b16_u64: -+** mov (x[0-9]+), #?5 -+** whilels p0\.h, \1, x1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_5r_b16_u64, uint64_t, -+ p0 = svwhilele_b16_u64 (5, x1), -+ p0 = svwhilele_b16 ((uint64_t) 5, x1)) -+ -+/* -+** whilele_r5_b16_u64: -+** mov (x[0-9]+), #?5 -+** whilels p0\.h, x0, \1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_r5_b16_u64, uint64_t, -+ p0 = svwhilele_b16_u64 (x0, 5), -+ p0 = svwhilele_b16 (x0, (uint64_t) 5)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/whilele_b32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/whilele_b32.c -new file mode 100644 -index 000000000..d369ccfa3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/whilele_b32.c -@@ -0,0 +1,173 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** whilele_rr_b32_s32: -+** whilele p0\.s, w0, w1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_rr_b32_s32, int32_t, -+ p0 = svwhilele_b32_s32 (x0, x1), -+ p0 = svwhilele_b32 (x0, x1)) -+ -+/* -+** whilele_0r_b32_s32: -+** whilele p0\.s, wzr, w1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_0r_b32_s32, int32_t, -+ p0 = svwhilele_b32_s32 (0, x1), -+ p0 = svwhilele_b32 (0, x1)) -+ -+/* -+** whilele_5r_b32_s32: -+** mov (w[0-9]+), #?5 -+** whilele p0\.s, \1, w1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_5r_b32_s32, int32_t, -+ p0 = svwhilele_b32_s32 (5, x1), -+ p0 = svwhilele_b32 (5, x1)) -+ -+/* -+** whilele_r0_b32_s32: -+** whilele p0\.s, w0, wzr -+** ret -+*/ -+TEST_COMPARE_S (whilele_r0_b32_s32, int32_t, -+ p0 = svwhilele_b32_s32 (x0, 0), -+ p0 = svwhilele_b32 (x0, 0)) -+ -+/* -+** whilele_r5_b32_s32: -+** mov (w[0-9]+), #?5 -+** whilele p0\.s, w0, \1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_r5_b32_s32, int32_t, -+ p0 = svwhilele_b32_s32 (x0, 5), -+ p0 = svwhilele_b32 (x0, 5)) -+ -+/* -+** whilele_rr_b32_s64: -+** whilele p0\.s, x0, x1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_rr_b32_s64, int64_t, -+ p0 = svwhilele_b32_s64 (x0, x1), -+ p0 = svwhilele_b32 (x0, x1)) -+ -+/* -+** whilele_0r_b32_s64: -+** whilele p0\.s, xzr, x1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_0r_b32_s64, int64_t, -+ p0 = svwhilele_b32_s64 (0, x1), -+ p0 = svwhilele_b32 ((int64_t) 0, x1)) -+ -+/* -+** whilele_5r_b32_s64: -+** mov (x[0-9]+), #?5 -+** whilele p0\.s, \1, x1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_5r_b32_s64, int64_t, -+ p0 = svwhilele_b32_s64 (5, x1), -+ p0 = svwhilele_b32 ((int64_t) 5, x1)) -+ -+/* -+** whilele_r0_b32_s64: -+** whilele p0\.s, x0, xzr -+** ret -+*/ -+TEST_COMPARE_S (whilele_r0_b32_s64, int64_t, -+ p0 = svwhilele_b32_s64 (x0, 0), -+ p0 = svwhilele_b32 (x0, (int64_t) 0)) -+ -+/* -+** whilele_r5_b32_s64: -+** mov (x[0-9]+), #?5 -+** whilele p0\.s, x0, \1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_r5_b32_s64, int64_t, -+ p0 = svwhilele_b32_s64 (x0, 5), -+ p0 = svwhilele_b32 (x0, (int64_t) 5)) -+ -+/* -+** whilele_rr_b32_u32: -+** whilels p0\.s, w0, w1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_rr_b32_u32, uint32_t, -+ p0 = svwhilele_b32_u32 (x0, x1), -+ p0 = svwhilele_b32 (x0, x1)) -+ -+/* -+** whilele_0r_b32_u32: -+** whilels p0\.s, wzr, w1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_0r_b32_u32, uint32_t, -+ p0 = svwhilele_b32_u32 (0, x1), -+ p0 = svwhilele_b32 ((uint32_t) 0, x1)) -+ -+/* -+** whilele_5r_b32_u32: -+** mov (w[0-9]+), #?5 -+** whilels p0\.s, \1, w1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_5r_b32_u32, uint32_t, -+ p0 = svwhilele_b32_u32 (5, x1), -+ p0 = svwhilele_b32 ((uint32_t) 5, x1)) -+ -+/* -+** whilele_r5_b32_u32: -+** mov (w[0-9]+), #?5 -+** whilels p0\.s, w0, \1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_r5_b32_u32, uint32_t, -+ p0 = svwhilele_b32_u32 (x0, 5), -+ p0 = svwhilele_b32 (x0, (uint32_t) 5)) -+ -+/* -+** whilele_rr_b32_u64: -+** whilels p0\.s, x0, x1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_rr_b32_u64, uint64_t, -+ p0 = svwhilele_b32_u64 (x0, x1), -+ p0 = svwhilele_b32 (x0, x1)) -+ -+/* -+** whilele_0r_b32_u64: -+** whilels p0\.s, xzr, x1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_0r_b32_u64, uint64_t, -+ p0 = svwhilele_b32_u64 (0, x1), -+ p0 = svwhilele_b32 ((uint64_t) 0, x1)) -+ -+/* -+** whilele_5r_b32_u64: -+** mov (x[0-9]+), #?5 -+** whilels p0\.s, \1, x1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_5r_b32_u64, uint64_t, -+ p0 = svwhilele_b32_u64 (5, x1), -+ p0 = svwhilele_b32 ((uint64_t) 5, x1)) -+ -+/* -+** whilele_r5_b32_u64: -+** mov (x[0-9]+), #?5 -+** whilels p0\.s, x0, \1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_r5_b32_u64, uint64_t, -+ p0 = svwhilele_b32_u64 (x0, 5), -+ p0 = svwhilele_b32 (x0, (uint64_t) 5)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/whilele_b64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/whilele_b64.c -new file mode 100644 -index 000000000..394f51f44 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/whilele_b64.c -@@ -0,0 +1,173 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** whilele_rr_b64_s32: -+** whilele p0\.d, w0, w1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_rr_b64_s32, int32_t, -+ p0 = svwhilele_b64_s32 (x0, x1), -+ p0 = svwhilele_b64 (x0, x1)) -+ -+/* -+** whilele_0r_b64_s32: -+** whilele p0\.d, wzr, w1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_0r_b64_s32, int32_t, -+ p0 = svwhilele_b64_s32 (0, x1), -+ p0 = svwhilele_b64 (0, x1)) -+ -+/* -+** whilele_5r_b64_s32: -+** mov (w[0-9]+), #?5 -+** whilele p0\.d, \1, w1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_5r_b64_s32, int32_t, -+ p0 = svwhilele_b64_s32 (5, x1), -+ p0 = svwhilele_b64 (5, x1)) -+ -+/* -+** whilele_r0_b64_s32: -+** whilele p0\.d, w0, wzr -+** ret -+*/ -+TEST_COMPARE_S (whilele_r0_b64_s32, int32_t, -+ p0 = svwhilele_b64_s32 (x0, 0), -+ p0 = svwhilele_b64 (x0, 0)) -+ -+/* -+** whilele_r5_b64_s32: -+** mov (w[0-9]+), #?5 -+** whilele p0\.d, w0, \1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_r5_b64_s32, int32_t, -+ p0 = svwhilele_b64_s32 (x0, 5), -+ p0 = svwhilele_b64 (x0, 5)) -+ -+/* -+** whilele_rr_b64_s64: -+** whilele p0\.d, x0, x1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_rr_b64_s64, int64_t, -+ p0 = svwhilele_b64_s64 (x0, x1), -+ p0 = svwhilele_b64 (x0, x1)) -+ -+/* -+** whilele_0r_b64_s64: -+** whilele p0\.d, xzr, x1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_0r_b64_s64, int64_t, -+ p0 = svwhilele_b64_s64 (0, x1), -+ p0 = svwhilele_b64 ((int64_t) 0, x1)) -+ -+/* -+** whilele_5r_b64_s64: -+** mov (x[0-9]+), #?5 -+** whilele p0\.d, \1, x1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_5r_b64_s64, int64_t, -+ p0 = svwhilele_b64_s64 (5, x1), -+ p0 = svwhilele_b64 ((int64_t) 5, x1)) -+ -+/* -+** whilele_r0_b64_s64: -+** whilele p0\.d, x0, xzr -+** ret -+*/ -+TEST_COMPARE_S (whilele_r0_b64_s64, int64_t, -+ p0 = svwhilele_b64_s64 (x0, 0), -+ p0 = svwhilele_b64 (x0, (int64_t) 0)) -+ -+/* -+** whilele_r5_b64_s64: -+** mov (x[0-9]+), #?5 -+** whilele p0\.d, x0, \1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_r5_b64_s64, int64_t, -+ p0 = svwhilele_b64_s64 (x0, 5), -+ p0 = svwhilele_b64 (x0, (int64_t) 5)) -+ -+/* -+** whilele_rr_b64_u32: -+** whilels p0\.d, w0, w1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_rr_b64_u32, uint32_t, -+ p0 = svwhilele_b64_u32 (x0, x1), -+ p0 = svwhilele_b64 (x0, x1)) -+ -+/* -+** whilele_0r_b64_u32: -+** whilels p0\.d, wzr, w1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_0r_b64_u32, uint32_t, -+ p0 = svwhilele_b64_u32 (0, x1), -+ p0 = svwhilele_b64 ((uint32_t) 0, x1)) -+ -+/* -+** whilele_5r_b64_u32: -+** mov (w[0-9]+), #?5 -+** whilels p0\.d, \1, w1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_5r_b64_u32, uint32_t, -+ p0 = svwhilele_b64_u32 (5, x1), -+ p0 = svwhilele_b64 ((uint32_t) 5, x1)) -+ -+/* -+** whilele_r5_b64_u32: -+** mov (w[0-9]+), #?5 -+** whilels p0\.d, w0, \1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_r5_b64_u32, uint32_t, -+ p0 = svwhilele_b64_u32 (x0, 5), -+ p0 = svwhilele_b64 (x0, (uint32_t) 5)) -+ -+/* -+** whilele_rr_b64_u64: -+** whilels p0\.d, x0, x1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_rr_b64_u64, uint64_t, -+ p0 = svwhilele_b64_u64 (x0, x1), -+ p0 = svwhilele_b64 (x0, x1)) -+ -+/* -+** whilele_0r_b64_u64: -+** whilels p0\.d, xzr, x1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_0r_b64_u64, uint64_t, -+ p0 = svwhilele_b64_u64 (0, x1), -+ p0 = svwhilele_b64 ((uint64_t) 0, x1)) -+ -+/* -+** whilele_5r_b64_u64: -+** mov (x[0-9]+), #?5 -+** whilels p0\.d, \1, x1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_5r_b64_u64, uint64_t, -+ p0 = svwhilele_b64_u64 (5, x1), -+ p0 = svwhilele_b64 ((uint64_t) 5, x1)) -+ -+/* -+** whilele_r5_b64_u64: -+** mov (x[0-9]+), #?5 -+** whilels p0\.d, x0, \1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_r5_b64_u64, uint64_t, -+ p0 = svwhilele_b64_u64 (x0, 5), -+ p0 = svwhilele_b64 (x0, (uint64_t) 5)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/whilele_b8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/whilele_b8.c -new file mode 100644 -index 000000000..2ec101473 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/whilele_b8.c -@@ -0,0 +1,173 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** whilele_rr_b8_s32: -+** whilele p0\.b, w0, w1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_rr_b8_s32, int32_t, -+ p0 = svwhilele_b8_s32 (x0, x1), -+ p0 = svwhilele_b8 (x0, x1)) -+ -+/* -+** whilele_0r_b8_s32: -+** whilele p0\.b, wzr, w1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_0r_b8_s32, int32_t, -+ p0 = svwhilele_b8_s32 (0, x1), -+ p0 = svwhilele_b8 (0, x1)) -+ -+/* -+** whilele_5r_b8_s32: -+** mov (w[0-9]+), #?5 -+** whilele p0\.b, \1, w1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_5r_b8_s32, int32_t, -+ p0 = svwhilele_b8_s32 (5, x1), -+ p0 = svwhilele_b8 (5, x1)) -+ -+/* -+** whilele_r0_b8_s32: -+** whilele p0\.b, w0, wzr -+** ret -+*/ -+TEST_COMPARE_S (whilele_r0_b8_s32, int32_t, -+ p0 = svwhilele_b8_s32 (x0, 0), -+ p0 = svwhilele_b8 (x0, 0)) -+ -+/* -+** whilele_r5_b8_s32: -+** mov (w[0-9]+), #?5 -+** whilele p0\.b, w0, \1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_r5_b8_s32, int32_t, -+ p0 = svwhilele_b8_s32 (x0, 5), -+ p0 = svwhilele_b8 (x0, 5)) -+ -+/* -+** whilele_rr_b8_s64: -+** whilele p0\.b, x0, x1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_rr_b8_s64, int64_t, -+ p0 = svwhilele_b8_s64 (x0, x1), -+ p0 = svwhilele_b8 (x0, x1)) -+ -+/* -+** whilele_0r_b8_s64: -+** whilele p0\.b, xzr, x1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_0r_b8_s64, int64_t, -+ p0 = svwhilele_b8_s64 (0, x1), -+ p0 = svwhilele_b8 ((int64_t) 0, x1)) -+ -+/* -+** whilele_5r_b8_s64: -+** mov (x[0-9]+), #?5 -+** whilele p0\.b, \1, x1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_5r_b8_s64, int64_t, -+ p0 = svwhilele_b8_s64 (5, x1), -+ p0 = svwhilele_b8 ((int64_t) 5, x1)) -+ -+/* -+** whilele_r0_b8_s64: -+** whilele p0\.b, x0, xzr -+** ret -+*/ -+TEST_COMPARE_S (whilele_r0_b8_s64, int64_t, -+ p0 = svwhilele_b8_s64 (x0, 0), -+ p0 = svwhilele_b8 (x0, (int64_t) 0)) -+ -+/* -+** whilele_r5_b8_s64: -+** mov (x[0-9]+), #?5 -+** whilele p0\.b, x0, \1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_r5_b8_s64, int64_t, -+ p0 = svwhilele_b8_s64 (x0, 5), -+ p0 = svwhilele_b8 (x0, (int64_t) 5)) -+ -+/* -+** whilele_rr_b8_u32: -+** whilels p0\.b, w0, w1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_rr_b8_u32, uint32_t, -+ p0 = svwhilele_b8_u32 (x0, x1), -+ p0 = svwhilele_b8 (x0, x1)) -+ -+/* -+** whilele_0r_b8_u32: -+** whilels p0\.b, wzr, w1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_0r_b8_u32, uint32_t, -+ p0 = svwhilele_b8_u32 (0, x1), -+ p0 = svwhilele_b8 ((uint32_t) 0, x1)) -+ -+/* -+** whilele_5r_b8_u32: -+** mov (w[0-9]+), #?5 -+** whilels p0\.b, \1, w1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_5r_b8_u32, uint32_t, -+ p0 = svwhilele_b8_u32 (5, x1), -+ p0 = svwhilele_b8 ((uint32_t) 5, x1)) -+ -+/* -+** whilele_r5_b8_u32: -+** mov (w[0-9]+), #?5 -+** whilels p0\.b, w0, \1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_r5_b8_u32, uint32_t, -+ p0 = svwhilele_b8_u32 (x0, 5), -+ p0 = svwhilele_b8 (x0, (uint32_t) 5)) -+ -+/* -+** whilele_rr_b8_u64: -+** whilels p0\.b, x0, x1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_rr_b8_u64, uint64_t, -+ p0 = svwhilele_b8_u64 (x0, x1), -+ p0 = svwhilele_b8 (x0, x1)) -+ -+/* -+** whilele_0r_b8_u64: -+** whilels p0\.b, xzr, x1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_0r_b8_u64, uint64_t, -+ p0 = svwhilele_b8_u64 (0, x1), -+ p0 = svwhilele_b8 ((uint64_t) 0, x1)) -+ -+/* -+** whilele_5r_b8_u64: -+** mov (x[0-9]+), #?5 -+** whilels p0\.b, \1, x1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_5r_b8_u64, uint64_t, -+ p0 = svwhilele_b8_u64 (5, x1), -+ p0 = svwhilele_b8 ((uint64_t) 5, x1)) -+ -+/* -+** whilele_r5_b8_u64: -+** mov (x[0-9]+), #?5 -+** whilels p0\.b, x0, \1 -+** ret -+*/ -+TEST_COMPARE_S (whilele_r5_b8_u64, uint64_t, -+ p0 = svwhilele_b8_u64 (x0, 5), -+ p0 = svwhilele_b8 (x0, (uint64_t) 5)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/whilelt_b16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/whilelt_b16.c -new file mode 100644 -index 000000000..14a60432b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/whilelt_b16.c -@@ -0,0 +1,173 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** whilelt_rr_b16_s32: -+** whilelt p0\.h, w0, w1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_rr_b16_s32, int32_t, -+ p0 = svwhilelt_b16_s32 (x0, x1), -+ p0 = svwhilelt_b16 (x0, x1)) -+ -+/* -+** whilelt_0r_b16_s32: -+** whilelt p0\.h, wzr, w1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_0r_b16_s32, int32_t, -+ p0 = svwhilelt_b16_s32 (0, x1), -+ p0 = svwhilelt_b16 (0, x1)) -+ -+/* -+** whilelt_5r_b16_s32: -+** mov (w[0-9]+), #?5 -+** whilelt p0\.h, \1, w1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_5r_b16_s32, int32_t, -+ p0 = svwhilelt_b16_s32 (5, x1), -+ p0 = svwhilelt_b16 (5, x1)) -+ -+/* -+** whilelt_r0_b16_s32: -+** whilelt p0\.h, w0, wzr -+** ret -+*/ -+TEST_COMPARE_S (whilelt_r0_b16_s32, int32_t, -+ p0 = svwhilelt_b16_s32 (x0, 0), -+ p0 = svwhilelt_b16 (x0, 0)) -+ -+/* -+** whilelt_r5_b16_s32: -+** mov (w[0-9]+), #?5 -+** whilelt p0\.h, w0, \1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_r5_b16_s32, int32_t, -+ p0 = svwhilelt_b16_s32 (x0, 5), -+ p0 = svwhilelt_b16 (x0, 5)) -+ -+/* -+** whilelt_rr_b16_s64: -+** whilelt p0\.h, x0, x1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_rr_b16_s64, int64_t, -+ p0 = svwhilelt_b16_s64 (x0, x1), -+ p0 = svwhilelt_b16 (x0, x1)) -+ -+/* -+** whilelt_0r_b16_s64: -+** whilelt p0\.h, xzr, x1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_0r_b16_s64, int64_t, -+ p0 = svwhilelt_b16_s64 (0, x1), -+ p0 = svwhilelt_b16 ((int64_t) 0, x1)) -+ -+/* -+** whilelt_5r_b16_s64: -+** mov (x[0-9]+), #?5 -+** whilelt p0\.h, \1, x1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_5r_b16_s64, int64_t, -+ p0 = svwhilelt_b16_s64 (5, x1), -+ p0 = svwhilelt_b16 ((int64_t) 5, x1)) -+ -+/* -+** whilelt_r0_b16_s64: -+** whilelt p0\.h, x0, xzr -+** ret -+*/ -+TEST_COMPARE_S (whilelt_r0_b16_s64, int64_t, -+ p0 = svwhilelt_b16_s64 (x0, 0), -+ p0 = svwhilelt_b16 (x0, (int64_t) 0)) -+ -+/* -+** whilelt_r5_b16_s64: -+** mov (x[0-9]+), #?5 -+** whilelt p0\.h, x0, \1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_r5_b16_s64, int64_t, -+ p0 = svwhilelt_b16_s64 (x0, 5), -+ p0 = svwhilelt_b16 (x0, (int64_t) 5)) -+ -+/* -+** whilelt_rr_b16_u32: -+** whilelo p0\.h, w0, w1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_rr_b16_u32, uint32_t, -+ p0 = svwhilelt_b16_u32 (x0, x1), -+ p0 = svwhilelt_b16 (x0, x1)) -+ -+/* -+** whilelt_0r_b16_u32: -+** whilelo p0\.h, wzr, w1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_0r_b16_u32, uint32_t, -+ p0 = svwhilelt_b16_u32 (0, x1), -+ p0 = svwhilelt_b16 ((uint32_t) 0, x1)) -+ -+/* -+** whilelt_5r_b16_u32: -+** mov (w[0-9]+), #?5 -+** whilelo p0\.h, \1, w1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_5r_b16_u32, uint32_t, -+ p0 = svwhilelt_b16_u32 (5, x1), -+ p0 = svwhilelt_b16 ((uint32_t) 5, x1)) -+ -+/* -+** whilelt_r5_b16_u32: -+** mov (w[0-9]+), #?5 -+** whilelo p0\.h, w0, \1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_r5_b16_u32, uint32_t, -+ p0 = svwhilelt_b16_u32 (x0, 5), -+ p0 = svwhilelt_b16 (x0, (uint32_t) 5)) -+ -+/* -+** whilelt_rr_b16_u64: -+** whilelo p0\.h, x0, x1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_rr_b16_u64, uint64_t, -+ p0 = svwhilelt_b16_u64 (x0, x1), -+ p0 = svwhilelt_b16 (x0, x1)) -+ -+/* -+** whilelt_0r_b16_u64: -+** whilelo p0\.h, xzr, x1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_0r_b16_u64, uint64_t, -+ p0 = svwhilelt_b16_u64 (0, x1), -+ p0 = svwhilelt_b16 ((uint64_t) 0, x1)) -+ -+/* -+** whilelt_5r_b16_u64: -+** mov (x[0-9]+), #?5 -+** whilelo p0\.h, \1, x1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_5r_b16_u64, uint64_t, -+ p0 = svwhilelt_b16_u64 (5, x1), -+ p0 = svwhilelt_b16 ((uint64_t) 5, x1)) -+ -+/* -+** whilelt_r5_b16_u64: -+** mov (x[0-9]+), #?5 -+** whilelo p0\.h, x0, \1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_r5_b16_u64, uint64_t, -+ p0 = svwhilelt_b16_u64 (x0, 5), -+ p0 = svwhilelt_b16 (x0, (uint64_t) 5)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/whilelt_b32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/whilelt_b32.c -new file mode 100644 -index 000000000..0e50bb07a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/whilelt_b32.c -@@ -0,0 +1,173 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** whilelt_rr_b32_s32: -+** whilelt p0\.s, w0, w1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_rr_b32_s32, int32_t, -+ p0 = svwhilelt_b32_s32 (x0, x1), -+ p0 = svwhilelt_b32 (x0, x1)) -+ -+/* -+** whilelt_0r_b32_s32: -+** whilelt p0\.s, wzr, w1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_0r_b32_s32, int32_t, -+ p0 = svwhilelt_b32_s32 (0, x1), -+ p0 = svwhilelt_b32 (0, x1)) -+ -+/* -+** whilelt_5r_b32_s32: -+** mov (w[0-9]+), #?5 -+** whilelt p0\.s, \1, w1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_5r_b32_s32, int32_t, -+ p0 = svwhilelt_b32_s32 (5, x1), -+ p0 = svwhilelt_b32 (5, x1)) -+ -+/* -+** whilelt_r0_b32_s32: -+** whilelt p0\.s, w0, wzr -+** ret -+*/ -+TEST_COMPARE_S (whilelt_r0_b32_s32, int32_t, -+ p0 = svwhilelt_b32_s32 (x0, 0), -+ p0 = svwhilelt_b32 (x0, 0)) -+ -+/* -+** whilelt_r5_b32_s32: -+** mov (w[0-9]+), #?5 -+** whilelt p0\.s, w0, \1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_r5_b32_s32, int32_t, -+ p0 = svwhilelt_b32_s32 (x0, 5), -+ p0 = svwhilelt_b32 (x0, 5)) -+ -+/* -+** whilelt_rr_b32_s64: -+** whilelt p0\.s, x0, x1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_rr_b32_s64, int64_t, -+ p0 = svwhilelt_b32_s64 (x0, x1), -+ p0 = svwhilelt_b32 (x0, x1)) -+ -+/* -+** whilelt_0r_b32_s64: -+** whilelt p0\.s, xzr, x1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_0r_b32_s64, int64_t, -+ p0 = svwhilelt_b32_s64 (0, x1), -+ p0 = svwhilelt_b32 ((int64_t) 0, x1)) -+ -+/* -+** whilelt_5r_b32_s64: -+** mov (x[0-9]+), #?5 -+** whilelt p0\.s, \1, x1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_5r_b32_s64, int64_t, -+ p0 = svwhilelt_b32_s64 (5, x1), -+ p0 = svwhilelt_b32 ((int64_t) 5, x1)) -+ -+/* -+** whilelt_r0_b32_s64: -+** whilelt p0\.s, x0, xzr -+** ret -+*/ -+TEST_COMPARE_S (whilelt_r0_b32_s64, int64_t, -+ p0 = svwhilelt_b32_s64 (x0, 0), -+ p0 = svwhilelt_b32 (x0, (int64_t) 0)) -+ -+/* -+** whilelt_r5_b32_s64: -+** mov (x[0-9]+), #?5 -+** whilelt p0\.s, x0, \1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_r5_b32_s64, int64_t, -+ p0 = svwhilelt_b32_s64 (x0, 5), -+ p0 = svwhilelt_b32 (x0, (int64_t) 5)) -+ -+/* -+** whilelt_rr_b32_u32: -+** whilelo p0\.s, w0, w1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_rr_b32_u32, uint32_t, -+ p0 = svwhilelt_b32_u32 (x0, x1), -+ p0 = svwhilelt_b32 (x0, x1)) -+ -+/* -+** whilelt_0r_b32_u32: -+** whilelo p0\.s, wzr, w1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_0r_b32_u32, uint32_t, -+ p0 = svwhilelt_b32_u32 (0, x1), -+ p0 = svwhilelt_b32 ((uint32_t) 0, x1)) -+ -+/* -+** whilelt_5r_b32_u32: -+** mov (w[0-9]+), #?5 -+** whilelo p0\.s, \1, w1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_5r_b32_u32, uint32_t, -+ p0 = svwhilelt_b32_u32 (5, x1), -+ p0 = svwhilelt_b32 ((uint32_t) 5, x1)) -+ -+/* -+** whilelt_r5_b32_u32: -+** mov (w[0-9]+), #?5 -+** whilelo p0\.s, w0, \1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_r5_b32_u32, uint32_t, -+ p0 = svwhilelt_b32_u32 (x0, 5), -+ p0 = svwhilelt_b32 (x0, (uint32_t) 5)) -+ -+/* -+** whilelt_rr_b32_u64: -+** whilelo p0\.s, x0, x1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_rr_b32_u64, uint64_t, -+ p0 = svwhilelt_b32_u64 (x0, x1), -+ p0 = svwhilelt_b32 (x0, x1)) -+ -+/* -+** whilelt_0r_b32_u64: -+** whilelo p0\.s, xzr, x1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_0r_b32_u64, uint64_t, -+ p0 = svwhilelt_b32_u64 (0, x1), -+ p0 = svwhilelt_b32 ((uint64_t) 0, x1)) -+ -+/* -+** whilelt_5r_b32_u64: -+** mov (x[0-9]+), #?5 -+** whilelo p0\.s, \1, x1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_5r_b32_u64, uint64_t, -+ p0 = svwhilelt_b32_u64 (5, x1), -+ p0 = svwhilelt_b32 ((uint64_t) 5, x1)) -+ -+/* -+** whilelt_r5_b32_u64: -+** mov (x[0-9]+), #?5 -+** whilelo p0\.s, x0, \1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_r5_b32_u64, uint64_t, -+ p0 = svwhilelt_b32_u64 (x0, 5), -+ p0 = svwhilelt_b32 (x0, (uint64_t) 5)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/whilelt_b64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/whilelt_b64.c -new file mode 100644 -index 000000000..539c93347 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/whilelt_b64.c -@@ -0,0 +1,173 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** whilelt_rr_b64_s32: -+** whilelt p0\.d, w0, w1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_rr_b64_s32, int32_t, -+ p0 = svwhilelt_b64_s32 (x0, x1), -+ p0 = svwhilelt_b64 (x0, x1)) -+ -+/* -+** whilelt_0r_b64_s32: -+** whilelt p0\.d, wzr, w1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_0r_b64_s32, int32_t, -+ p0 = svwhilelt_b64_s32 (0, x1), -+ p0 = svwhilelt_b64 (0, x1)) -+ -+/* -+** whilelt_5r_b64_s32: -+** mov (w[0-9]+), #?5 -+** whilelt p0\.d, \1, w1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_5r_b64_s32, int32_t, -+ p0 = svwhilelt_b64_s32 (5, x1), -+ p0 = svwhilelt_b64 (5, x1)) -+ -+/* -+** whilelt_r0_b64_s32: -+** whilelt p0\.d, w0, wzr -+** ret -+*/ -+TEST_COMPARE_S (whilelt_r0_b64_s32, int32_t, -+ p0 = svwhilelt_b64_s32 (x0, 0), -+ p0 = svwhilelt_b64 (x0, 0)) -+ -+/* -+** whilelt_r5_b64_s32: -+** mov (w[0-9]+), #?5 -+** whilelt p0\.d, w0, \1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_r5_b64_s32, int32_t, -+ p0 = svwhilelt_b64_s32 (x0, 5), -+ p0 = svwhilelt_b64 (x0, 5)) -+ -+/* -+** whilelt_rr_b64_s64: -+** whilelt p0\.d, x0, x1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_rr_b64_s64, int64_t, -+ p0 = svwhilelt_b64_s64 (x0, x1), -+ p0 = svwhilelt_b64 (x0, x1)) -+ -+/* -+** whilelt_0r_b64_s64: -+** whilelt p0\.d, xzr, x1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_0r_b64_s64, int64_t, -+ p0 = svwhilelt_b64_s64 (0, x1), -+ p0 = svwhilelt_b64 ((int64_t) 0, x1)) -+ -+/* -+** whilelt_5r_b64_s64: -+** mov (x[0-9]+), #?5 -+** whilelt p0\.d, \1, x1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_5r_b64_s64, int64_t, -+ p0 = svwhilelt_b64_s64 (5, x1), -+ p0 = svwhilelt_b64 ((int64_t) 5, x1)) -+ -+/* -+** whilelt_r0_b64_s64: -+** whilelt p0\.d, x0, xzr -+** ret -+*/ -+TEST_COMPARE_S (whilelt_r0_b64_s64, int64_t, -+ p0 = svwhilelt_b64_s64 (x0, 0), -+ p0 = svwhilelt_b64 (x0, (int64_t) 0)) -+ -+/* -+** whilelt_r5_b64_s64: -+** mov (x[0-9]+), #?5 -+** whilelt p0\.d, x0, \1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_r5_b64_s64, int64_t, -+ p0 = svwhilelt_b64_s64 (x0, 5), -+ p0 = svwhilelt_b64 (x0, (int64_t) 5)) -+ -+/* -+** whilelt_rr_b64_u32: -+** whilelo p0\.d, w0, w1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_rr_b64_u32, uint32_t, -+ p0 = svwhilelt_b64_u32 (x0, x1), -+ p0 = svwhilelt_b64 (x0, x1)) -+ -+/* -+** whilelt_0r_b64_u32: -+** whilelo p0\.d, wzr, w1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_0r_b64_u32, uint32_t, -+ p0 = svwhilelt_b64_u32 (0, x1), -+ p0 = svwhilelt_b64 ((uint32_t) 0, x1)) -+ -+/* -+** whilelt_5r_b64_u32: -+** mov (w[0-9]+), #?5 -+** whilelo p0\.d, \1, w1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_5r_b64_u32, uint32_t, -+ p0 = svwhilelt_b64_u32 (5, x1), -+ p0 = svwhilelt_b64 ((uint32_t) 5, x1)) -+ -+/* -+** whilelt_r5_b64_u32: -+** mov (w[0-9]+), #?5 -+** whilelo p0\.d, w0, \1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_r5_b64_u32, uint32_t, -+ p0 = svwhilelt_b64_u32 (x0, 5), -+ p0 = svwhilelt_b64 (x0, (uint32_t) 5)) -+ -+/* -+** whilelt_rr_b64_u64: -+** whilelo p0\.d, x0, x1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_rr_b64_u64, uint64_t, -+ p0 = svwhilelt_b64_u64 (x0, x1), -+ p0 = svwhilelt_b64 (x0, x1)) -+ -+/* -+** whilelt_0r_b64_u64: -+** whilelo p0\.d, xzr, x1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_0r_b64_u64, uint64_t, -+ p0 = svwhilelt_b64_u64 (0, x1), -+ p0 = svwhilelt_b64 ((uint64_t) 0, x1)) -+ -+/* -+** whilelt_5r_b64_u64: -+** mov (x[0-9]+), #?5 -+** whilelo p0\.d, \1, x1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_5r_b64_u64, uint64_t, -+ p0 = svwhilelt_b64_u64 (5, x1), -+ p0 = svwhilelt_b64 ((uint64_t) 5, x1)) -+ -+/* -+** whilelt_r5_b64_u64: -+** mov (x[0-9]+), #?5 -+** whilelo p0\.d, x0, \1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_r5_b64_u64, uint64_t, -+ p0 = svwhilelt_b64_u64 (x0, 5), -+ p0 = svwhilelt_b64 (x0, (uint64_t) 5)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/whilelt_b8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/whilelt_b8.c -new file mode 100644 -index 000000000..5b6a5c44d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/whilelt_b8.c -@@ -0,0 +1,173 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** whilelt_rr_b8_s32: -+** whilelt p0\.b, w0, w1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_rr_b8_s32, int32_t, -+ p0 = svwhilelt_b8_s32 (x0, x1), -+ p0 = svwhilelt_b8 (x0, x1)) -+ -+/* -+** whilelt_0r_b8_s32: -+** whilelt p0\.b, wzr, w1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_0r_b8_s32, int32_t, -+ p0 = svwhilelt_b8_s32 (0, x1), -+ p0 = svwhilelt_b8 (0, x1)) -+ -+/* -+** whilelt_5r_b8_s32: -+** mov (w[0-9]+), #?5 -+** whilelt p0\.b, \1, w1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_5r_b8_s32, int32_t, -+ p0 = svwhilelt_b8_s32 (5, x1), -+ p0 = svwhilelt_b8 (5, x1)) -+ -+/* -+** whilelt_r0_b8_s32: -+** whilelt p0\.b, w0, wzr -+** ret -+*/ -+TEST_COMPARE_S (whilelt_r0_b8_s32, int32_t, -+ p0 = svwhilelt_b8_s32 (x0, 0), -+ p0 = svwhilelt_b8 (x0, 0)) -+ -+/* -+** whilelt_r5_b8_s32: -+** mov (w[0-9]+), #?5 -+** whilelt p0\.b, w0, \1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_r5_b8_s32, int32_t, -+ p0 = svwhilelt_b8_s32 (x0, 5), -+ p0 = svwhilelt_b8 (x0, 5)) -+ -+/* -+** whilelt_rr_b8_s64: -+** whilelt p0\.b, x0, x1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_rr_b8_s64, int64_t, -+ p0 = svwhilelt_b8_s64 (x0, x1), -+ p0 = svwhilelt_b8 (x0, x1)) -+ -+/* -+** whilelt_0r_b8_s64: -+** whilelt p0\.b, xzr, x1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_0r_b8_s64, int64_t, -+ p0 = svwhilelt_b8_s64 (0, x1), -+ p0 = svwhilelt_b8 ((int64_t) 0, x1)) -+ -+/* -+** whilelt_5r_b8_s64: -+** mov (x[0-9]+), #?5 -+** whilelt p0\.b, \1, x1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_5r_b8_s64, int64_t, -+ p0 = svwhilelt_b8_s64 (5, x1), -+ p0 = svwhilelt_b8 ((int64_t) 5, x1)) -+ -+/* -+** whilelt_r0_b8_s64: -+** whilelt p0\.b, x0, xzr -+** ret -+*/ -+TEST_COMPARE_S (whilelt_r0_b8_s64, int64_t, -+ p0 = svwhilelt_b8_s64 (x0, 0), -+ p0 = svwhilelt_b8 (x0, (int64_t) 0)) -+ -+/* -+** whilelt_r5_b8_s64: -+** mov (x[0-9]+), #?5 -+** whilelt p0\.b, x0, \1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_r5_b8_s64, int64_t, -+ p0 = svwhilelt_b8_s64 (x0, 5), -+ p0 = svwhilelt_b8 (x0, (int64_t) 5)) -+ -+/* -+** whilelt_rr_b8_u32: -+** whilelo p0\.b, w0, w1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_rr_b8_u32, uint32_t, -+ p0 = svwhilelt_b8_u32 (x0, x1), -+ p0 = svwhilelt_b8 (x0, x1)) -+ -+/* -+** whilelt_0r_b8_u32: -+** whilelo p0\.b, wzr, w1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_0r_b8_u32, uint32_t, -+ p0 = svwhilelt_b8_u32 (0, x1), -+ p0 = svwhilelt_b8 ((uint32_t) 0, x1)) -+ -+/* -+** whilelt_5r_b8_u32: -+** mov (w[0-9]+), #?5 -+** whilelo p0\.b, \1, w1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_5r_b8_u32, uint32_t, -+ p0 = svwhilelt_b8_u32 (5, x1), -+ p0 = svwhilelt_b8 ((uint32_t) 5, x1)) -+ -+/* -+** whilelt_r5_b8_u32: -+** mov (w[0-9]+), #?5 -+** whilelo p0\.b, w0, \1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_r5_b8_u32, uint32_t, -+ p0 = svwhilelt_b8_u32 (x0, 5), -+ p0 = svwhilelt_b8 (x0, (uint32_t) 5)) -+ -+/* -+** whilelt_rr_b8_u64: -+** whilelo p0\.b, x0, x1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_rr_b8_u64, uint64_t, -+ p0 = svwhilelt_b8_u64 (x0, x1), -+ p0 = svwhilelt_b8 (x0, x1)) -+ -+/* -+** whilelt_0r_b8_u64: -+** whilelo p0\.b, xzr, x1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_0r_b8_u64, uint64_t, -+ p0 = svwhilelt_b8_u64 (0, x1), -+ p0 = svwhilelt_b8 ((uint64_t) 0, x1)) -+ -+/* -+** whilelt_5r_b8_u64: -+** mov (x[0-9]+), #?5 -+** whilelo p0\.b, \1, x1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_5r_b8_u64, uint64_t, -+ p0 = svwhilelt_b8_u64 (5, x1), -+ p0 = svwhilelt_b8 ((uint64_t) 5, x1)) -+ -+/* -+** whilelt_r5_b8_u64: -+** mov (x[0-9]+), #?5 -+** whilelo p0\.b, x0, \1 -+** ret -+*/ -+TEST_COMPARE_S (whilelt_r5_b8_u64, uint64_t, -+ p0 = svwhilelt_b8_u64 (x0, 5), -+ p0 = svwhilelt_b8 (x0, (uint64_t) 5)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_b16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_b16.c -new file mode 100644 -index 000000000..269260eb4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_b16.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip1_b16_tied1: -+** zip1 p0\.h, p0\.h, p1\.h -+** ret -+*/ -+TEST_UNIFORM_P (zip1_b16_tied1, -+ p0 = svzip1_b16 (p0, p1), -+ p0 = svzip1_b16 (p0, p1)) -+ -+/* -+** zip1_b16_tied2: -+** zip1 p0\.h, p1\.h, p0\.h -+** ret -+*/ -+TEST_UNIFORM_P (zip1_b16_tied2, -+ p0 = svzip1_b16 (p1, p0), -+ p0 = svzip1_b16 (p1, p0)) -+ -+/* -+** zip1_b16_untied: -+** zip1 p0\.h, p1\.h, p2\.h -+** ret -+*/ -+TEST_UNIFORM_P (zip1_b16_untied, -+ p0 = svzip1_b16 (p1, p2), -+ p0 = svzip1_b16 (p1, p2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_b32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_b32.c -new file mode 100644 -index 000000000..027609a7d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_b32.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip1_b32_tied1: -+** zip1 p0\.s, p0\.s, p1\.s -+** ret -+*/ -+TEST_UNIFORM_P (zip1_b32_tied1, -+ p0 = svzip1_b32 (p0, p1), -+ p0 = svzip1_b32 (p0, p1)) -+ -+/* -+** zip1_b32_tied2: -+** zip1 p0\.s, p1\.s, p0\.s -+** ret -+*/ -+TEST_UNIFORM_P (zip1_b32_tied2, -+ p0 = svzip1_b32 (p1, p0), -+ p0 = svzip1_b32 (p1, p0)) -+ -+/* -+** zip1_b32_untied: -+** zip1 p0\.s, p1\.s, p2\.s -+** ret -+*/ -+TEST_UNIFORM_P (zip1_b32_untied, -+ p0 = svzip1_b32 (p1, p2), -+ p0 = svzip1_b32 (p1, p2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_b64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_b64.c -new file mode 100644 -index 000000000..8add16d8e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_b64.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip1_b64_tied1: -+** zip1 p0\.d, p0\.d, p1\.d -+** ret -+*/ -+TEST_UNIFORM_P (zip1_b64_tied1, -+ p0 = svzip1_b64 (p0, p1), -+ p0 = svzip1_b64 (p0, p1)) -+ -+/* -+** zip1_b64_tied2: -+** zip1 p0\.d, p1\.d, p0\.d -+** ret -+*/ -+TEST_UNIFORM_P (zip1_b64_tied2, -+ p0 = svzip1_b64 (p1, p0), -+ p0 = svzip1_b64 (p1, p0)) -+ -+/* -+** zip1_b64_untied: -+** zip1 p0\.d, p1\.d, p2\.d -+** ret -+*/ -+TEST_UNIFORM_P (zip1_b64_untied, -+ p0 = svzip1_b64 (p1, p2), -+ p0 = svzip1_b64 (p1, p2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_b8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_b8.c -new file mode 100644 -index 000000000..8648298ac ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_b8.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip1_b8_tied1: -+** zip1 p0\.b, p0\.b, p1\.b -+** ret -+*/ -+TEST_UNIFORM_P (zip1_b8_tied1, -+ p0 = svzip1_b8 (p0, p1), -+ p0 = svzip1_b8 (p0, p1)) -+ -+/* -+** zip1_b8_tied2: -+** zip1 p0\.b, p1\.b, p0\.b -+** ret -+*/ -+TEST_UNIFORM_P (zip1_b8_tied2, -+ p0 = svzip1_b8 (p1, p0), -+ p0 = svzip1_b8 (p1, p0)) -+ -+/* -+** zip1_b8_untied: -+** zip1 p0\.b, p1\.b, p2\.b -+** ret -+*/ -+TEST_UNIFORM_P (zip1_b8_untied, -+ p0 = svzip1_b8 (p1, p2), -+ p0 = svzip1_b8 (p1, p2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_bf16.c -new file mode 100644 -index 000000000..6017cde41 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_bf16.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip1_bf16_tied1: -+** zip1 z0\.h, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (zip1_bf16_tied1, svbfloat16_t, -+ z0 = svzip1_bf16 (z0, z1), -+ z0 = svzip1 (z0, z1)) -+ -+/* -+** zip1_bf16_tied2: -+** zip1 z0\.h, z1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (zip1_bf16_tied2, svbfloat16_t, -+ z0 = svzip1_bf16 (z1, z0), -+ z0 = svzip1 (z1, z0)) -+ -+/* -+** zip1_bf16_untied: -+** zip1 z0\.h, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (zip1_bf16_untied, svbfloat16_t, -+ z0 = svzip1_bf16 (z1, z2), -+ z0 = svzip1 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_f16.c -new file mode 100644 -index 000000000..1c6ce4e7d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_f16.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip1_f16_tied1: -+** zip1 z0\.h, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (zip1_f16_tied1, svfloat16_t, -+ z0 = svzip1_f16 (z0, z1), -+ z0 = svzip1 (z0, z1)) -+ -+/* -+** zip1_f16_tied2: -+** zip1 z0\.h, z1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (zip1_f16_tied2, svfloat16_t, -+ z0 = svzip1_f16 (z1, z0), -+ z0 = svzip1 (z1, z0)) -+ -+/* -+** zip1_f16_untied: -+** zip1 z0\.h, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (zip1_f16_untied, svfloat16_t, -+ z0 = svzip1_f16 (z1, z2), -+ z0 = svzip1 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_f32.c -new file mode 100644 -index 000000000..288ceff3f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_f32.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip1_f32_tied1: -+** zip1 z0\.s, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (zip1_f32_tied1, svfloat32_t, -+ z0 = svzip1_f32 (z0, z1), -+ z0 = svzip1 (z0, z1)) -+ -+/* -+** zip1_f32_tied2: -+** zip1 z0\.s, z1\.s, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (zip1_f32_tied2, svfloat32_t, -+ z0 = svzip1_f32 (z1, z0), -+ z0 = svzip1 (z1, z0)) -+ -+/* -+** zip1_f32_untied: -+** zip1 z0\.s, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (zip1_f32_untied, svfloat32_t, -+ z0 = svzip1_f32 (z1, z2), -+ z0 = svzip1 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_f64.c -new file mode 100644 -index 000000000..5abbea1cd ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_f64.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip1_f64_tied1: -+** zip1 z0\.d, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (zip1_f64_tied1, svfloat64_t, -+ z0 = svzip1_f64 (z0, z1), -+ z0 = svzip1 (z0, z1)) -+ -+/* -+** zip1_f64_tied2: -+** zip1 z0\.d, z1\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (zip1_f64_tied2, svfloat64_t, -+ z0 = svzip1_f64 (z1, z0), -+ z0 = svzip1 (z1, z0)) -+ -+/* -+** zip1_f64_untied: -+** zip1 z0\.d, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (zip1_f64_untied, svfloat64_t, -+ z0 = svzip1_f64 (z1, z2), -+ z0 = svzip1 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_s16.c -new file mode 100644 -index 000000000..8ecd20142 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_s16.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip1_s16_tied1: -+** zip1 z0\.h, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (zip1_s16_tied1, svint16_t, -+ z0 = svzip1_s16 (z0, z1), -+ z0 = svzip1 (z0, z1)) -+ -+/* -+** zip1_s16_tied2: -+** zip1 z0\.h, z1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (zip1_s16_tied2, svint16_t, -+ z0 = svzip1_s16 (z1, z0), -+ z0 = svzip1 (z1, z0)) -+ -+/* -+** zip1_s16_untied: -+** zip1 z0\.h, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (zip1_s16_untied, svint16_t, -+ z0 = svzip1_s16 (z1, z2), -+ z0 = svzip1 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_s32.c -new file mode 100644 -index 000000000..c523885ea ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_s32.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip1_s32_tied1: -+** zip1 z0\.s, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (zip1_s32_tied1, svint32_t, -+ z0 = svzip1_s32 (z0, z1), -+ z0 = svzip1 (z0, z1)) -+ -+/* -+** zip1_s32_tied2: -+** zip1 z0\.s, z1\.s, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (zip1_s32_tied2, svint32_t, -+ z0 = svzip1_s32 (z1, z0), -+ z0 = svzip1 (z1, z0)) -+ -+/* -+** zip1_s32_untied: -+** zip1 z0\.s, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (zip1_s32_untied, svint32_t, -+ z0 = svzip1_s32 (z1, z2), -+ z0 = svzip1 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_s64.c -new file mode 100644 -index 000000000..d1dca7ee9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_s64.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip1_s64_tied1: -+** zip1 z0\.d, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (zip1_s64_tied1, svint64_t, -+ z0 = svzip1_s64 (z0, z1), -+ z0 = svzip1 (z0, z1)) -+ -+/* -+** zip1_s64_tied2: -+** zip1 z0\.d, z1\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (zip1_s64_tied2, svint64_t, -+ z0 = svzip1_s64 (z1, z0), -+ z0 = svzip1 (z1, z0)) -+ -+/* -+** zip1_s64_untied: -+** zip1 z0\.d, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (zip1_s64_untied, svint64_t, -+ z0 = svzip1_s64 (z1, z2), -+ z0 = svzip1 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_s8.c -new file mode 100644 -index 000000000..1600ab586 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_s8.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip1_s8_tied1: -+** zip1 z0\.b, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (zip1_s8_tied1, svint8_t, -+ z0 = svzip1_s8 (z0, z1), -+ z0 = svzip1 (z0, z1)) -+ -+/* -+** zip1_s8_tied2: -+** zip1 z0\.b, z1\.b, z0\.b -+** ret -+*/ -+TEST_UNIFORM_Z (zip1_s8_tied2, svint8_t, -+ z0 = svzip1_s8 (z1, z0), -+ z0 = svzip1 (z1, z0)) -+ -+/* -+** zip1_s8_untied: -+** zip1 z0\.b, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (zip1_s8_untied, svint8_t, -+ z0 = svzip1_s8 (z1, z2), -+ z0 = svzip1 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_u16.c -new file mode 100644 -index 000000000..3773ed22f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_u16.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip1_u16_tied1: -+** zip1 z0\.h, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (zip1_u16_tied1, svuint16_t, -+ z0 = svzip1_u16 (z0, z1), -+ z0 = svzip1 (z0, z1)) -+ -+/* -+** zip1_u16_tied2: -+** zip1 z0\.h, z1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (zip1_u16_tied2, svuint16_t, -+ z0 = svzip1_u16 (z1, z0), -+ z0 = svzip1 (z1, z0)) -+ -+/* -+** zip1_u16_untied: -+** zip1 z0\.h, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (zip1_u16_untied, svuint16_t, -+ z0 = svzip1_u16 (z1, z2), -+ z0 = svzip1 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_u32.c -new file mode 100644 -index 000000000..e67c121e5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_u32.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip1_u32_tied1: -+** zip1 z0\.s, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (zip1_u32_tied1, svuint32_t, -+ z0 = svzip1_u32 (z0, z1), -+ z0 = svzip1 (z0, z1)) -+ -+/* -+** zip1_u32_tied2: -+** zip1 z0\.s, z1\.s, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (zip1_u32_tied2, svuint32_t, -+ z0 = svzip1_u32 (z1, z0), -+ z0 = svzip1 (z1, z0)) -+ -+/* -+** zip1_u32_untied: -+** zip1 z0\.s, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (zip1_u32_untied, svuint32_t, -+ z0 = svzip1_u32 (z1, z2), -+ z0 = svzip1 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_u64.c -new file mode 100644 -index 000000000..bb6380a6a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_u64.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip1_u64_tied1: -+** zip1 z0\.d, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (zip1_u64_tied1, svuint64_t, -+ z0 = svzip1_u64 (z0, z1), -+ z0 = svzip1 (z0, z1)) -+ -+/* -+** zip1_u64_tied2: -+** zip1 z0\.d, z1\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (zip1_u64_tied2, svuint64_t, -+ z0 = svzip1_u64 (z1, z0), -+ z0 = svzip1 (z1, z0)) -+ -+/* -+** zip1_u64_untied: -+** zip1 z0\.d, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (zip1_u64_untied, svuint64_t, -+ z0 = svzip1_u64 (z1, z2), -+ z0 = svzip1 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_u8.c -new file mode 100644 -index 000000000..01d89d4fe ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1_u8.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip1_u8_tied1: -+** zip1 z0\.b, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (zip1_u8_tied1, svuint8_t, -+ z0 = svzip1_u8 (z0, z1), -+ z0 = svzip1 (z0, z1)) -+ -+/* -+** zip1_u8_tied2: -+** zip1 z0\.b, z1\.b, z0\.b -+** ret -+*/ -+TEST_UNIFORM_Z (zip1_u8_tied2, svuint8_t, -+ z0 = svzip1_u8 (z1, z0), -+ z0 = svzip1 (z1, z0)) -+ -+/* -+** zip1_u8_untied: -+** zip1 z0\.b, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (zip1_u8_untied, svuint8_t, -+ z0 = svzip1_u8 (z1, z2), -+ z0 = svzip1 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_bf16.c -new file mode 100644 -index 000000000..aabf7c0e1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_bf16.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip1q_bf16_tied1: -+** zip1 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip1q_bf16_tied1, svbfloat16_t, -+ z0 = svzip1q_bf16 (z0, z1), -+ z0 = svzip1q (z0, z1)) -+ -+/* -+** zip1q_bf16_tied2: -+** zip1 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip1q_bf16_tied2, svbfloat16_t, -+ z0 = svzip1q_bf16 (z1, z0), -+ z0 = svzip1q (z1, z0)) -+ -+/* -+** zip1q_bf16_untied: -+** zip1 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip1q_bf16_untied, svbfloat16_t, -+ z0 = svzip1q_bf16 (z1, z2), -+ z0 = svzip1q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_f16.c -new file mode 100644 -index 000000000..1170cc5e7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_f16.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip1q_f16_tied1: -+** zip1 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip1q_f16_tied1, svfloat16_t, -+ z0 = svzip1q_f16 (z0, z1), -+ z0 = svzip1q (z0, z1)) -+ -+/* -+** zip1q_f16_tied2: -+** zip1 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip1q_f16_tied2, svfloat16_t, -+ z0 = svzip1q_f16 (z1, z0), -+ z0 = svzip1q (z1, z0)) -+ -+/* -+** zip1q_f16_untied: -+** zip1 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip1q_f16_untied, svfloat16_t, -+ z0 = svzip1q_f16 (z1, z2), -+ z0 = svzip1q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_f32.c -new file mode 100644 -index 000000000..09666da1b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_f32.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip1q_f32_tied1: -+** zip1 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip1q_f32_tied1, svfloat32_t, -+ z0 = svzip1q_f32 (z0, z1), -+ z0 = svzip1q (z0, z1)) -+ -+/* -+** zip1q_f32_tied2: -+** zip1 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip1q_f32_tied2, svfloat32_t, -+ z0 = svzip1q_f32 (z1, z0), -+ z0 = svzip1q (z1, z0)) -+ -+/* -+** zip1q_f32_untied: -+** zip1 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip1q_f32_untied, svfloat32_t, -+ z0 = svzip1q_f32 (z1, z2), -+ z0 = svzip1q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_f64.c -new file mode 100644 -index 000000000..d77fb1c90 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_f64.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip1q_f64_tied1: -+** zip1 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip1q_f64_tied1, svfloat64_t, -+ z0 = svzip1q_f64 (z0, z1), -+ z0 = svzip1q (z0, z1)) -+ -+/* -+** zip1q_f64_tied2: -+** zip1 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip1q_f64_tied2, svfloat64_t, -+ z0 = svzip1q_f64 (z1, z0), -+ z0 = svzip1q (z1, z0)) -+ -+/* -+** zip1q_f64_untied: -+** zip1 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip1q_f64_untied, svfloat64_t, -+ z0 = svzip1q_f64 (z1, z2), -+ z0 = svzip1q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_s16.c -new file mode 100644 -index 000000000..92a6b5514 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_s16.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip1q_s16_tied1: -+** zip1 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip1q_s16_tied1, svint16_t, -+ z0 = svzip1q_s16 (z0, z1), -+ z0 = svzip1q (z0, z1)) -+ -+/* -+** zip1q_s16_tied2: -+** zip1 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip1q_s16_tied2, svint16_t, -+ z0 = svzip1q_s16 (z1, z0), -+ z0 = svzip1q (z1, z0)) -+ -+/* -+** zip1q_s16_untied: -+** zip1 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip1q_s16_untied, svint16_t, -+ z0 = svzip1q_s16 (z1, z2), -+ z0 = svzip1q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_s32.c -new file mode 100644 -index 000000000..a918d2d4c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_s32.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip1q_s32_tied1: -+** zip1 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip1q_s32_tied1, svint32_t, -+ z0 = svzip1q_s32 (z0, z1), -+ z0 = svzip1q (z0, z1)) -+ -+/* -+** zip1q_s32_tied2: -+** zip1 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip1q_s32_tied2, svint32_t, -+ z0 = svzip1q_s32 (z1, z0), -+ z0 = svzip1q (z1, z0)) -+ -+/* -+** zip1q_s32_untied: -+** zip1 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip1q_s32_untied, svint32_t, -+ z0 = svzip1q_s32 (z1, z2), -+ z0 = svzip1q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_s64.c -new file mode 100644 -index 000000000..be3524fd5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_s64.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip1q_s64_tied1: -+** zip1 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip1q_s64_tied1, svint64_t, -+ z0 = svzip1q_s64 (z0, z1), -+ z0 = svzip1q (z0, z1)) -+ -+/* -+** zip1q_s64_tied2: -+** zip1 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip1q_s64_tied2, svint64_t, -+ z0 = svzip1q_s64 (z1, z0), -+ z0 = svzip1q (z1, z0)) -+ -+/* -+** zip1q_s64_untied: -+** zip1 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip1q_s64_untied, svint64_t, -+ z0 = svzip1q_s64 (z1, z2), -+ z0 = svzip1q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_s8.c -new file mode 100644 -index 000000000..24ea2399c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_s8.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip1q_s8_tied1: -+** zip1 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip1q_s8_tied1, svint8_t, -+ z0 = svzip1q_s8 (z0, z1), -+ z0 = svzip1q (z0, z1)) -+ -+/* -+** zip1q_s8_tied2: -+** zip1 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip1q_s8_tied2, svint8_t, -+ z0 = svzip1q_s8 (z1, z0), -+ z0 = svzip1q (z1, z0)) -+ -+/* -+** zip1q_s8_untied: -+** zip1 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip1q_s8_untied, svint8_t, -+ z0 = svzip1q_s8 (z1, z2), -+ z0 = svzip1q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_u16.c -new file mode 100644 -index 000000000..65caf9706 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_u16.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip1q_u16_tied1: -+** zip1 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip1q_u16_tied1, svuint16_t, -+ z0 = svzip1q_u16 (z0, z1), -+ z0 = svzip1q (z0, z1)) -+ -+/* -+** zip1q_u16_tied2: -+** zip1 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip1q_u16_tied2, svuint16_t, -+ z0 = svzip1q_u16 (z1, z0), -+ z0 = svzip1q (z1, z0)) -+ -+/* -+** zip1q_u16_untied: -+** zip1 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip1q_u16_untied, svuint16_t, -+ z0 = svzip1q_u16 (z1, z2), -+ z0 = svzip1q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_u32.c -new file mode 100644 -index 000000000..abd76b74f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_u32.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip1q_u32_tied1: -+** zip1 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip1q_u32_tied1, svuint32_t, -+ z0 = svzip1q_u32 (z0, z1), -+ z0 = svzip1q (z0, z1)) -+ -+/* -+** zip1q_u32_tied2: -+** zip1 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip1q_u32_tied2, svuint32_t, -+ z0 = svzip1q_u32 (z1, z0), -+ z0 = svzip1q (z1, z0)) -+ -+/* -+** zip1q_u32_untied: -+** zip1 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip1q_u32_untied, svuint32_t, -+ z0 = svzip1q_u32 (z1, z2), -+ z0 = svzip1q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_u64.c -new file mode 100644 -index 000000000..0e91929b7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_u64.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip1q_u64_tied1: -+** zip1 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip1q_u64_tied1, svuint64_t, -+ z0 = svzip1q_u64 (z0, z1), -+ z0 = svzip1q (z0, z1)) -+ -+/* -+** zip1q_u64_tied2: -+** zip1 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip1q_u64_tied2, svuint64_t, -+ z0 = svzip1q_u64 (z1, z0), -+ z0 = svzip1q (z1, z0)) -+ -+/* -+** zip1q_u64_untied: -+** zip1 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip1q_u64_untied, svuint64_t, -+ z0 = svzip1q_u64 (z1, z2), -+ z0 = svzip1q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_u8.c -new file mode 100644 -index 000000000..07d484b0b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip1q_u8.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip1q_u8_tied1: -+** zip1 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip1q_u8_tied1, svuint8_t, -+ z0 = svzip1q_u8 (z0, z1), -+ z0 = svzip1q (z0, z1)) -+ -+/* -+** zip1q_u8_tied2: -+** zip1 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip1q_u8_tied2, svuint8_t, -+ z0 = svzip1q_u8 (z1, z0), -+ z0 = svzip1q (z1, z0)) -+ -+/* -+** zip1q_u8_untied: -+** zip1 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip1q_u8_untied, svuint8_t, -+ z0 = svzip1q_u8 (z1, z2), -+ z0 = svzip1q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_b16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_b16.c -new file mode 100644 -index 000000000..5624c9815 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_b16.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip2_b16_tied1: -+** zip2 p0\.h, p0\.h, p1\.h -+** ret -+*/ -+TEST_UNIFORM_P (zip2_b16_tied1, -+ p0 = svzip2_b16 (p0, p1), -+ p0 = svzip2_b16 (p0, p1)) -+ -+/* -+** zip2_b16_tied2: -+** zip2 p0\.h, p1\.h, p0\.h -+** ret -+*/ -+TEST_UNIFORM_P (zip2_b16_tied2, -+ p0 = svzip2_b16 (p1, p0), -+ p0 = svzip2_b16 (p1, p0)) -+ -+/* -+** zip2_b16_untied: -+** zip2 p0\.h, p1\.h, p2\.h -+** ret -+*/ -+TEST_UNIFORM_P (zip2_b16_untied, -+ p0 = svzip2_b16 (p1, p2), -+ p0 = svzip2_b16 (p1, p2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_b32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_b32.c -new file mode 100644 -index 000000000..b73d5b490 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_b32.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip2_b32_tied1: -+** zip2 p0\.s, p0\.s, p1\.s -+** ret -+*/ -+TEST_UNIFORM_P (zip2_b32_tied1, -+ p0 = svzip2_b32 (p0, p1), -+ p0 = svzip2_b32 (p0, p1)) -+ -+/* -+** zip2_b32_tied2: -+** zip2 p0\.s, p1\.s, p0\.s -+** ret -+*/ -+TEST_UNIFORM_P (zip2_b32_tied2, -+ p0 = svzip2_b32 (p1, p0), -+ p0 = svzip2_b32 (p1, p0)) -+ -+/* -+** zip2_b32_untied: -+** zip2 p0\.s, p1\.s, p2\.s -+** ret -+*/ -+TEST_UNIFORM_P (zip2_b32_untied, -+ p0 = svzip2_b32 (p1, p2), -+ p0 = svzip2_b32 (p1, p2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_b64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_b64.c -new file mode 100644 -index 000000000..9ebf050b8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_b64.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip2_b64_tied1: -+** zip2 p0\.d, p0\.d, p1\.d -+** ret -+*/ -+TEST_UNIFORM_P (zip2_b64_tied1, -+ p0 = svzip2_b64 (p0, p1), -+ p0 = svzip2_b64 (p0, p1)) -+ -+/* -+** zip2_b64_tied2: -+** zip2 p0\.d, p1\.d, p0\.d -+** ret -+*/ -+TEST_UNIFORM_P (zip2_b64_tied2, -+ p0 = svzip2_b64 (p1, p0), -+ p0 = svzip2_b64 (p1, p0)) -+ -+/* -+** zip2_b64_untied: -+** zip2 p0\.d, p1\.d, p2\.d -+** ret -+*/ -+TEST_UNIFORM_P (zip2_b64_untied, -+ p0 = svzip2_b64 (p1, p2), -+ p0 = svzip2_b64 (p1, p2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_b8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_b8.c -new file mode 100644 -index 000000000..223a22f99 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_b8.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip2_b8_tied1: -+** zip2 p0\.b, p0\.b, p1\.b -+** ret -+*/ -+TEST_UNIFORM_P (zip2_b8_tied1, -+ p0 = svzip2_b8 (p0, p1), -+ p0 = svzip2_b8 (p0, p1)) -+ -+/* -+** zip2_b8_tied2: -+** zip2 p0\.b, p1\.b, p0\.b -+** ret -+*/ -+TEST_UNIFORM_P (zip2_b8_tied2, -+ p0 = svzip2_b8 (p1, p0), -+ p0 = svzip2_b8 (p1, p0)) -+ -+/* -+** zip2_b8_untied: -+** zip2 p0\.b, p1\.b, p2\.b -+** ret -+*/ -+TEST_UNIFORM_P (zip2_b8_untied, -+ p0 = svzip2_b8 (p1, p2), -+ p0 = svzip2_b8 (p1, p2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_bf16.c -new file mode 100644 -index 000000000..a9e0cfc93 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_bf16.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip2_bf16_tied1: -+** zip2 z0\.h, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (zip2_bf16_tied1, svbfloat16_t, -+ z0 = svzip2_bf16 (z0, z1), -+ z0 = svzip2 (z0, z1)) -+ -+/* -+** zip2_bf16_tied2: -+** zip2 z0\.h, z1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (zip2_bf16_tied2, svbfloat16_t, -+ z0 = svzip2_bf16 (z1, z0), -+ z0 = svzip2 (z1, z0)) -+ -+/* -+** zip2_bf16_untied: -+** zip2 z0\.h, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (zip2_bf16_untied, svbfloat16_t, -+ z0 = svzip2_bf16 (z1, z2), -+ z0 = svzip2 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_f16.c -new file mode 100644 -index 000000000..73d4272bc ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_f16.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip2_f16_tied1: -+** zip2 z0\.h, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (zip2_f16_tied1, svfloat16_t, -+ z0 = svzip2_f16 (z0, z1), -+ z0 = svzip2 (z0, z1)) -+ -+/* -+** zip2_f16_tied2: -+** zip2 z0\.h, z1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (zip2_f16_tied2, svfloat16_t, -+ z0 = svzip2_f16 (z1, z0), -+ z0 = svzip2 (z1, z0)) -+ -+/* -+** zip2_f16_untied: -+** zip2 z0\.h, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (zip2_f16_untied, svfloat16_t, -+ z0 = svzip2_f16 (z1, z2), -+ z0 = svzip2 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_f32.c -new file mode 100644 -index 000000000..2ad8ff81d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_f32.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip2_f32_tied1: -+** zip2 z0\.s, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (zip2_f32_tied1, svfloat32_t, -+ z0 = svzip2_f32 (z0, z1), -+ z0 = svzip2 (z0, z1)) -+ -+/* -+** zip2_f32_tied2: -+** zip2 z0\.s, z1\.s, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (zip2_f32_tied2, svfloat32_t, -+ z0 = svzip2_f32 (z1, z0), -+ z0 = svzip2 (z1, z0)) -+ -+/* -+** zip2_f32_untied: -+** zip2 z0\.s, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (zip2_f32_untied, svfloat32_t, -+ z0 = svzip2_f32 (z1, z2), -+ z0 = svzip2 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_f64.c -new file mode 100644 -index 000000000..de5c2646f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_f64.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip2_f64_tied1: -+** zip2 z0\.d, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (zip2_f64_tied1, svfloat64_t, -+ z0 = svzip2_f64 (z0, z1), -+ z0 = svzip2 (z0, z1)) -+ -+/* -+** zip2_f64_tied2: -+** zip2 z0\.d, z1\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (zip2_f64_tied2, svfloat64_t, -+ z0 = svzip2_f64 (z1, z0), -+ z0 = svzip2 (z1, z0)) -+ -+/* -+** zip2_f64_untied: -+** zip2 z0\.d, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (zip2_f64_untied, svfloat64_t, -+ z0 = svzip2_f64 (z1, z2), -+ z0 = svzip2 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_s16.c -new file mode 100644 -index 000000000..fc366c991 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_s16.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip2_s16_tied1: -+** zip2 z0\.h, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (zip2_s16_tied1, svint16_t, -+ z0 = svzip2_s16 (z0, z1), -+ z0 = svzip2 (z0, z1)) -+ -+/* -+** zip2_s16_tied2: -+** zip2 z0\.h, z1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (zip2_s16_tied2, svint16_t, -+ z0 = svzip2_s16 (z1, z0), -+ z0 = svzip2 (z1, z0)) -+ -+/* -+** zip2_s16_untied: -+** zip2 z0\.h, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (zip2_s16_untied, svint16_t, -+ z0 = svzip2_s16 (z1, z2), -+ z0 = svzip2 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_s32.c -new file mode 100644 -index 000000000..e56934d26 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_s32.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip2_s32_tied1: -+** zip2 z0\.s, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (zip2_s32_tied1, svint32_t, -+ z0 = svzip2_s32 (z0, z1), -+ z0 = svzip2 (z0, z1)) -+ -+/* -+** zip2_s32_tied2: -+** zip2 z0\.s, z1\.s, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (zip2_s32_tied2, svint32_t, -+ z0 = svzip2_s32 (z1, z0), -+ z0 = svzip2 (z1, z0)) -+ -+/* -+** zip2_s32_untied: -+** zip2 z0\.s, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (zip2_s32_untied, svint32_t, -+ z0 = svzip2_s32 (z1, z2), -+ z0 = svzip2 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_s64.c -new file mode 100644 -index 000000000..cefc73b72 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_s64.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip2_s64_tied1: -+** zip2 z0\.d, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (zip2_s64_tied1, svint64_t, -+ z0 = svzip2_s64 (z0, z1), -+ z0 = svzip2 (z0, z1)) -+ -+/* -+** zip2_s64_tied2: -+** zip2 z0\.d, z1\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (zip2_s64_tied2, svint64_t, -+ z0 = svzip2_s64 (z1, z0), -+ z0 = svzip2 (z1, z0)) -+ -+/* -+** zip2_s64_untied: -+** zip2 z0\.d, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (zip2_s64_untied, svint64_t, -+ z0 = svzip2_s64 (z1, z2), -+ z0 = svzip2 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_s8.c -new file mode 100644 -index 000000000..452bbce26 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_s8.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip2_s8_tied1: -+** zip2 z0\.b, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (zip2_s8_tied1, svint8_t, -+ z0 = svzip2_s8 (z0, z1), -+ z0 = svzip2 (z0, z1)) -+ -+/* -+** zip2_s8_tied2: -+** zip2 z0\.b, z1\.b, z0\.b -+** ret -+*/ -+TEST_UNIFORM_Z (zip2_s8_tied2, svint8_t, -+ z0 = svzip2_s8 (z1, z0), -+ z0 = svzip2 (z1, z0)) -+ -+/* -+** zip2_s8_untied: -+** zip2 z0\.b, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (zip2_s8_untied, svint8_t, -+ z0 = svzip2_s8 (z1, z2), -+ z0 = svzip2 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_u16.c -new file mode 100644 -index 000000000..9a20b4ed1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_u16.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip2_u16_tied1: -+** zip2 z0\.h, z0\.h, z1\.h -+** ret -+*/ -+TEST_UNIFORM_Z (zip2_u16_tied1, svuint16_t, -+ z0 = svzip2_u16 (z0, z1), -+ z0 = svzip2 (z0, z1)) -+ -+/* -+** zip2_u16_tied2: -+** zip2 z0\.h, z1\.h, z0\.h -+** ret -+*/ -+TEST_UNIFORM_Z (zip2_u16_tied2, svuint16_t, -+ z0 = svzip2_u16 (z1, z0), -+ z0 = svzip2 (z1, z0)) -+ -+/* -+** zip2_u16_untied: -+** zip2 z0\.h, z1\.h, z2\.h -+** ret -+*/ -+TEST_UNIFORM_Z (zip2_u16_untied, svuint16_t, -+ z0 = svzip2_u16 (z1, z2), -+ z0 = svzip2 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_u32.c -new file mode 100644 -index 000000000..70626c66e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_u32.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip2_u32_tied1: -+** zip2 z0\.s, z0\.s, z1\.s -+** ret -+*/ -+TEST_UNIFORM_Z (zip2_u32_tied1, svuint32_t, -+ z0 = svzip2_u32 (z0, z1), -+ z0 = svzip2 (z0, z1)) -+ -+/* -+** zip2_u32_tied2: -+** zip2 z0\.s, z1\.s, z0\.s -+** ret -+*/ -+TEST_UNIFORM_Z (zip2_u32_tied2, svuint32_t, -+ z0 = svzip2_u32 (z1, z0), -+ z0 = svzip2 (z1, z0)) -+ -+/* -+** zip2_u32_untied: -+** zip2 z0\.s, z1\.s, z2\.s -+** ret -+*/ -+TEST_UNIFORM_Z (zip2_u32_untied, svuint32_t, -+ z0 = svzip2_u32 (z1, z2), -+ z0 = svzip2 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_u64.c -new file mode 100644 -index 000000000..43a43ff7c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_u64.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip2_u64_tied1: -+** zip2 z0\.d, z0\.d, z1\.d -+** ret -+*/ -+TEST_UNIFORM_Z (zip2_u64_tied1, svuint64_t, -+ z0 = svzip2_u64 (z0, z1), -+ z0 = svzip2 (z0, z1)) -+ -+/* -+** zip2_u64_tied2: -+** zip2 z0\.d, z1\.d, z0\.d -+** ret -+*/ -+TEST_UNIFORM_Z (zip2_u64_tied2, svuint64_t, -+ z0 = svzip2_u64 (z1, z0), -+ z0 = svzip2 (z1, z0)) -+ -+/* -+** zip2_u64_untied: -+** zip2 z0\.d, z1\.d, z2\.d -+** ret -+*/ -+TEST_UNIFORM_Z (zip2_u64_untied, svuint64_t, -+ z0 = svzip2_u64 (z1, z2), -+ z0 = svzip2 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_u8.c -new file mode 100644 -index 000000000..015f1844b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2_u8.c -@@ -0,0 +1,30 @@ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip2_u8_tied1: -+** zip2 z0\.b, z0\.b, z1\.b -+** ret -+*/ -+TEST_UNIFORM_Z (zip2_u8_tied1, svuint8_t, -+ z0 = svzip2_u8 (z0, z1), -+ z0 = svzip2 (z0, z1)) -+ -+/* -+** zip2_u8_tied2: -+** zip2 z0\.b, z1\.b, z0\.b -+** ret -+*/ -+TEST_UNIFORM_Z (zip2_u8_tied2, svuint8_t, -+ z0 = svzip2_u8 (z1, z0), -+ z0 = svzip2 (z1, z0)) -+ -+/* -+** zip2_u8_untied: -+** zip2 z0\.b, z1\.b, z2\.b -+** ret -+*/ -+TEST_UNIFORM_Z (zip2_u8_untied, svuint8_t, -+ z0 = svzip2_u8 (z1, z2), -+ z0 = svzip2 (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_bf16.c -new file mode 100644 -index 000000000..6d79136cf ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_bf16.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip2q_bf16_tied1: -+** zip2 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip2q_bf16_tied1, svbfloat16_t, -+ z0 = svzip2q_bf16 (z0, z1), -+ z0 = svzip2q (z0, z1)) -+ -+/* -+** zip2q_bf16_tied2: -+** zip2 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip2q_bf16_tied2, svbfloat16_t, -+ z0 = svzip2q_bf16 (z1, z0), -+ z0 = svzip2q (z1, z0)) -+ -+/* -+** zip2q_bf16_untied: -+** zip2 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip2q_bf16_untied, svbfloat16_t, -+ z0 = svzip2q_bf16 (z1, z2), -+ z0 = svzip2q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_f16.c -new file mode 100644 -index 000000000..984240e19 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_f16.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip2q_f16_tied1: -+** zip2 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip2q_f16_tied1, svfloat16_t, -+ z0 = svzip2q_f16 (z0, z1), -+ z0 = svzip2q (z0, z1)) -+ -+/* -+** zip2q_f16_tied2: -+** zip2 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip2q_f16_tied2, svfloat16_t, -+ z0 = svzip2q_f16 (z1, z0), -+ z0 = svzip2q (z1, z0)) -+ -+/* -+** zip2q_f16_untied: -+** zip2 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip2q_f16_untied, svfloat16_t, -+ z0 = svzip2q_f16 (z1, z2), -+ z0 = svzip2q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_f32.c -new file mode 100644 -index 000000000..0f8ccd804 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_f32.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip2q_f32_tied1: -+** zip2 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip2q_f32_tied1, svfloat32_t, -+ z0 = svzip2q_f32 (z0, z1), -+ z0 = svzip2q (z0, z1)) -+ -+/* -+** zip2q_f32_tied2: -+** zip2 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip2q_f32_tied2, svfloat32_t, -+ z0 = svzip2q_f32 (z1, z0), -+ z0 = svzip2q (z1, z0)) -+ -+/* -+** zip2q_f32_untied: -+** zip2 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip2q_f32_untied, svfloat32_t, -+ z0 = svzip2q_f32 (z1, z2), -+ z0 = svzip2q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_f64.c -new file mode 100644 -index 000000000..b5411cff7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_f64.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip2q_f64_tied1: -+** zip2 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip2q_f64_tied1, svfloat64_t, -+ z0 = svzip2q_f64 (z0, z1), -+ z0 = svzip2q (z0, z1)) -+ -+/* -+** zip2q_f64_tied2: -+** zip2 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip2q_f64_tied2, svfloat64_t, -+ z0 = svzip2q_f64 (z1, z0), -+ z0 = svzip2q (z1, z0)) -+ -+/* -+** zip2q_f64_untied: -+** zip2 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip2q_f64_untied, svfloat64_t, -+ z0 = svzip2q_f64 (z1, z2), -+ z0 = svzip2q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_s16.c -new file mode 100644 -index 000000000..66751fc7f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_s16.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip2q_s16_tied1: -+** zip2 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip2q_s16_tied1, svint16_t, -+ z0 = svzip2q_s16 (z0, z1), -+ z0 = svzip2q (z0, z1)) -+ -+/* -+** zip2q_s16_tied2: -+** zip2 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip2q_s16_tied2, svint16_t, -+ z0 = svzip2q_s16 (z1, z0), -+ z0 = svzip2q (z1, z0)) -+ -+/* -+** zip2q_s16_untied: -+** zip2 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip2q_s16_untied, svint16_t, -+ z0 = svzip2q_s16 (z1, z2), -+ z0 = svzip2q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_s32.c -new file mode 100644 -index 000000000..830de3311 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_s32.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip2q_s32_tied1: -+** zip2 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip2q_s32_tied1, svint32_t, -+ z0 = svzip2q_s32 (z0, z1), -+ z0 = svzip2q (z0, z1)) -+ -+/* -+** zip2q_s32_tied2: -+** zip2 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip2q_s32_tied2, svint32_t, -+ z0 = svzip2q_s32 (z1, z0), -+ z0 = svzip2q (z1, z0)) -+ -+/* -+** zip2q_s32_untied: -+** zip2 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip2q_s32_untied, svint32_t, -+ z0 = svzip2q_s32 (z1, z2), -+ z0 = svzip2q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_s64.c -new file mode 100644 -index 000000000..917be4f40 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_s64.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip2q_s64_tied1: -+** zip2 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip2q_s64_tied1, svint64_t, -+ z0 = svzip2q_s64 (z0, z1), -+ z0 = svzip2q (z0, z1)) -+ -+/* -+** zip2q_s64_tied2: -+** zip2 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip2q_s64_tied2, svint64_t, -+ z0 = svzip2q_s64 (z1, z0), -+ z0 = svzip2q (z1, z0)) -+ -+/* -+** zip2q_s64_untied: -+** zip2 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip2q_s64_untied, svint64_t, -+ z0 = svzip2q_s64 (z1, z2), -+ z0 = svzip2q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_s8.c -new file mode 100644 -index 000000000..dff6e2d7b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_s8.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip2q_s8_tied1: -+** zip2 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip2q_s8_tied1, svint8_t, -+ z0 = svzip2q_s8 (z0, z1), -+ z0 = svzip2q (z0, z1)) -+ -+/* -+** zip2q_s8_tied2: -+** zip2 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip2q_s8_tied2, svint8_t, -+ z0 = svzip2q_s8 (z1, z0), -+ z0 = svzip2q (z1, z0)) -+ -+/* -+** zip2q_s8_untied: -+** zip2 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip2q_s8_untied, svint8_t, -+ z0 = svzip2q_s8 (z1, z2), -+ z0 = svzip2q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_u16.c -new file mode 100644 -index 000000000..9e194425c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_u16.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip2q_u16_tied1: -+** zip2 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip2q_u16_tied1, svuint16_t, -+ z0 = svzip2q_u16 (z0, z1), -+ z0 = svzip2q (z0, z1)) -+ -+/* -+** zip2q_u16_tied2: -+** zip2 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip2q_u16_tied2, svuint16_t, -+ z0 = svzip2q_u16 (z1, z0), -+ z0 = svzip2q (z1, z0)) -+ -+/* -+** zip2q_u16_untied: -+** zip2 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip2q_u16_untied, svuint16_t, -+ z0 = svzip2q_u16 (z1, z2), -+ z0 = svzip2q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_u32.c -new file mode 100644 -index 000000000..89de27f6b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_u32.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip2q_u32_tied1: -+** zip2 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip2q_u32_tied1, svuint32_t, -+ z0 = svzip2q_u32 (z0, z1), -+ z0 = svzip2q (z0, z1)) -+ -+/* -+** zip2q_u32_tied2: -+** zip2 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip2q_u32_tied2, svuint32_t, -+ z0 = svzip2q_u32 (z1, z0), -+ z0 = svzip2q (z1, z0)) -+ -+/* -+** zip2q_u32_untied: -+** zip2 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip2q_u32_untied, svuint32_t, -+ z0 = svzip2q_u32 (z1, z2), -+ z0 = svzip2q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_u64.c -new file mode 100644 -index 000000000..f2c9852ac ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_u64.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip2q_u64_tied1: -+** zip2 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip2q_u64_tied1, svuint64_t, -+ z0 = svzip2q_u64 (z0, z1), -+ z0 = svzip2q (z0, z1)) -+ -+/* -+** zip2q_u64_tied2: -+** zip2 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip2q_u64_tied2, svuint64_t, -+ z0 = svzip2q_u64 (z1, z0), -+ z0 = svzip2q (z1, z0)) -+ -+/* -+** zip2q_u64_untied: -+** zip2 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip2q_u64_untied, svuint64_t, -+ z0 = svzip2q_u64 (z1, z2), -+ z0 = svzip2q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_u8.c -new file mode 100644 -index 000000000..a12905586 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/zip2q_u8.c -@@ -0,0 +1,32 @@ -+/* { dg-require-effective-target aarch64_asm_f64mm_ok } */ -+/* { dg-additional-options "-march=armv8.2-a+f64mm" } */ -+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */ -+ -+#include "test_sve_acle.h" -+ -+/* -+** zip2q_u8_tied1: -+** zip2 z0\.q, z0\.q, z1\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip2q_u8_tied1, svuint8_t, -+ z0 = svzip2q_u8 (z0, z1), -+ z0 = svzip2q (z0, z1)) -+ -+/* -+** zip2q_u8_tied2: -+** zip2 z0\.q, z1\.q, z0\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip2q_u8_tied2, svuint8_t, -+ z0 = svzip2q_u8 (z1, z0), -+ z0 = svzip2q (z1, z0)) -+ -+/* -+** zip2q_u8_untied: -+** zip2 z0\.q, z1\.q, z2\.q -+** ret -+*/ -+TEST_UNIFORM_Z (zip2q_u8_untied, svuint8_t, -+ z0 = svzip2q_u8 (z1, z2), -+ z0 = svzip2q (z1, z2)) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/adr_index_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/adr_index_1.c -new file mode 100644 -index 000000000..714265ed1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/adr_index_1.c -@@ -0,0 +1,38 @@ -+/* { dg-do compile } */ -+/* { dg-options "-std=c99" } */ -+ -+#include -+ -+void -+f1 (svbool_t pg, uint32_t *u32_ptr, svuint8_t u8, svuint16_t u16, -+ svint32_t s32, svuint32_t u32, svfloat32_t f32, -+ svint64_t s64, svuint64_t u64, svfloat64_t f64) -+{ -+ svadrh_index (u32); /* { dg-error {too few arguments to function 'svadrh_index'} } */ -+ svadrh_index (u32, u32, u32); /* { dg-error {too many arguments to function 'svadrh_index'} } */ -+ svadrh_index (u32_ptr, s32); /* { dg-error {passing '[^']*\*'[^\n]* to argument 1 of 'svadrh_index', which expects an SVE vector type} } */ -+ svadrh_index (0, s32); /* { dg-error {passing 'int' to argument 1 of 'svadrh_index', which expects an SVE vector type} } */ -+ svadrh_index (u16, u16); /* { dg-error {passing 'svuint16_t' to argument 1 of 'svadrh_index', which expects 'svuint32_t' or 'svuint64_t'} } */ -+ svadrh_index (s32, s32); /* { dg-error {passing 'svint32_t' to argument 1 of 'svadrh_index', which expects 'svuint32_t' or 'svuint64_t'} } */ -+ svadrh_index (f32, s32); /* { dg-error {passing 'svfloat32_t' to argument 1 of 'svadrh_index', which expects 'svuint32_t' or 'svuint64_t'} } */ -+ svadrh_index (pg, s32); /* { dg-error {passing 'svbool_t' to argument 1 of 'svadrh_index', which expects 'svuint32_t' or 'svuint64_t'} } */ -+ -+ svadrh_index (u32, 0); /* { dg-error {passing 'int' to argument 2 of 'svadrh_index', which expects an SVE vector type} } */ -+ svadrh_index (u32, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svadrh_index', which expects a vector of 32-bit or 64-bit integers} } */ -+ svadrh_index (u32, u16); /* { dg-error {passing 'svuint16_t' to argument 2 of 'svadrh_index', which expects a vector of 32-bit or 64-bit integers} } */ -+ svadrh_index (u32, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svadrh_index', which expects a vector of integers} } */ -+ -+ svadrh_index (u32, s32); -+ svadrh_index (u32, u32); -+ svadrh_index (u32, f32); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svadrh_index', which expects a vector of integers} } */ -+ svadrh_index (u32, s64); /* { dg-error {cannot combine a base of type 'svuint32_t' with an index of type 'svint64_t'} } */ -+ svadrh_index (u32, u64); /* { dg-error {cannot combine a base of type 'svuint32_t' with an index of type 'svuint64_t'} } */ -+ svadrh_index (u32, f64); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svadrh_index', which expects a vector of integers} } */ -+ -+ svadrh_index (u64, s32); /* { dg-error {cannot combine a base of type 'svuint64_t' with an index of type 'svint32_t'} } */ -+ svadrh_index (u64, u32); /* { dg-error {cannot combine a base of type 'svuint64_t' with an index of type 'svuint32_t'} } */ -+ svadrh_index (u64, f32); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svadrh_index', which expects a vector of integers} } */ -+ svadrh_index (u64, s64); -+ svadrh_index (u64, u64); -+ svadrh_index (u64, f64); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svadrh_index', which expects a vector of integers} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/adr_offset_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/adr_offset_1.c -new file mode 100644 -index 000000000..528d7ac51 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/adr_offset_1.c -@@ -0,0 +1,38 @@ -+/* { dg-do compile } */ -+/* { dg-options "-std=c99" } */ -+ -+#include -+ -+void -+f1 (svbool_t pg, uint32_t *u32_ptr, svuint8_t u8, svuint16_t u16, -+ svint32_t s32, svuint32_t u32, svfloat32_t f32, -+ svint64_t s64, svuint64_t u64, svfloat64_t f64) -+{ -+ svadrb_offset (u32); /* { dg-error {too few arguments to function 'svadrb_offset'} } */ -+ svadrb_offset (u32, u32, u32); /* { dg-error {too many arguments to function 'svadrb_offset'} } */ -+ svadrb_offset (u32_ptr, s32); /* { dg-error {passing '[^']*\*'[^\n]* to argument 1 of 'svadrb_offset', which expects an SVE vector type} } */ -+ svadrb_offset (0, s32); /* { dg-error {passing 'int' to argument 1 of 'svadrb_offset', which expects an SVE vector type} } */ -+ svadrb_offset (u16, u16); /* { dg-error {passing 'svuint16_t' to argument 1 of 'svadrb_offset', which expects 'svuint32_t' or 'svuint64_t'} } */ -+ svadrb_offset (s32, s32); /* { dg-error {passing 'svint32_t' to argument 1 of 'svadrb_offset', which expects 'svuint32_t' or 'svuint64_t'} } */ -+ svadrb_offset (f32, s32); /* { dg-error {passing 'svfloat32_t' to argument 1 of 'svadrb_offset', which expects 'svuint32_t' or 'svuint64_t'} } */ -+ svadrb_offset (pg, s32); /* { dg-error {passing 'svbool_t' to argument 1 of 'svadrb_offset', which expects 'svuint32_t' or 'svuint64_t'} } */ -+ -+ svadrb_offset (u32, 0); /* { dg-error {passing 'int' to argument 2 of 'svadrb_offset', which expects an SVE vector type} } */ -+ svadrb_offset (u32, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svadrb_offset', which expects a vector of 32-bit or 64-bit integers} } */ -+ svadrb_offset (u32, u16); /* { dg-error {passing 'svuint16_t' to argument 2 of 'svadrb_offset', which expects a vector of 32-bit or 64-bit integers} } */ -+ svadrb_offset (u32, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svadrb_offset', which expects a vector of integers} } */ -+ -+ svadrb_offset (u32, s32); -+ svadrb_offset (u32, u32); -+ svadrb_offset (u32, f32); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svadrb_offset', which expects a vector of integers} } */ -+ svadrb_offset (u32, s64); /* { dg-error {cannot combine a base of type 'svuint32_t' with an offset of type 'svint64_t'} } */ -+ svadrb_offset (u32, u64); /* { dg-error {cannot combine a base of type 'svuint32_t' with an offset of type 'svuint64_t'} } */ -+ svadrb_offset (u32, f64); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svadrb_offset', which expects a vector of integers} } */ -+ -+ svadrb_offset (u64, s32); /* { dg-error {cannot combine a base of type 'svuint64_t' with an offset of type 'svint32_t'} } */ -+ svadrb_offset (u64, u32); /* { dg-error {cannot combine a base of type 'svuint64_t' with an offset of type 'svuint32_t'} } */ -+ svadrb_offset (u64, f32); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svadrb_offset', which expects a vector of integers} } */ -+ svadrb_offset (u64, s64); -+ svadrb_offset (u64, u64); -+ svadrb_offset (u64, f64); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svadrb_offset', which expects a vector of integers} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_1.c -new file mode 100644 -index 000000000..8ce89fa10 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_1.c -@@ -0,0 +1,14 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+svuint8_t -+f1 (svbool_t pg, svuint8_t u8, svint16_t s16) -+{ -+ svzip1 (pg); /* { dg-error {too few arguments to function 'svzip1'} } */ -+ svzip1 (pg, u8, u8); /* { dg-error {too many arguments to function 'svzip1'} } */ -+ svzip1 (pg, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svzip1', but previous arguments had type 'svbool_t'} } */ -+ svzip1 (u8, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svzip1', but previous arguments had type 'svuint8_t'} } */ -+ svzip1 (u8, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svzip1', but previous arguments had type 'svuint8_t'} } */ -+ svzip1 (u8, 0); /* { dg-error {passing 'int' to argument 2 of 'svzip1', which expects an SVE vector type} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_int_opt_n.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_int_opt_n.c -new file mode 100644 -index 000000000..965e9a13c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_int_opt_n.c -@@ -0,0 +1,28 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+void -+f1 (svbool_t pg, svfloat16_t f16, svint16_t s16, svuint16_t u16, -+ svfloat32_t f32, svint32_t s32, svuint32_t u32) -+{ -+ svscale_x (pg, f16); /* { dg-error {too few arguments to function 'svscale_x'} } */ -+ svscale_x (pg, f16, s16, s16); /* { dg-error {too many arguments to function 'svscale_x'} } */ -+ svscale_x (s32, f16, s32); /* { dg-error {passing 'svint32_t' to argument 1 of 'svscale_x', which expects 'svbool_t'} } */ -+ svscale_x (1, f16, s32); /* { dg-error {passing 'int' to argument 1 of 'svscale_x', which expects 'svbool_t'} } */ -+ svscale_x (pg, pg, s16); /* { dg-error {'svscale_x' has no form that takes 'svbool_t' arguments} } */ -+ svscale_x (pg, 1, s16); /* { dg-error {passing 'int' to argument 2 of 'svscale_x', which expects an SVE vector type} } */ -+ svscale_x (pg, f16, s16); -+ svscale_x (pg, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svscale_x', which expects a vector of signed integers} } */ -+ svscale_x (pg, f16, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svscale_x', which expects a vector of signed integers} } */ -+ svscale_x (pg, f16, s32); /* { dg-error {arguments 2 and 3 of 'svscale_x' must have the same element size, but the values passed here have type 'svfloat16_t' and 'svint32_t' respectively} } */ -+ svscale_x (pg, f16, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svscale_x', which expects a vector of signed integers} } */ -+ svscale_x (pg, f16, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svscale_x', which expects a vector of signed integers} } */ -+ svscale_x (pg, f16, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svscale_x', which expects a vector of signed integers} } */ -+ svscale_x (pg, f16, 0); -+ svscale_x (pg, s16, s16); /* { dg-error {'svscale_x' has no form that takes 'svint16_t' arguments} } */ -+ svscale_x (pg, s16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svscale_x', which expects a vector of signed integers} } */ -+ svscale_x (pg, s16, s32); /* { dg-error {'svscale_x' has no form that takes 'svint16_t' arguments} } */ -+ svscale_x (pg, s16, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svscale_x', which expects a vector of signed integers} } */ -+ svscale_x (pg, u16, s16); /* { dg-error {'svscale_x' has no form that takes 'svuint16_t' arguments} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_lane_1.c -new file mode 100644 -index 000000000..f1879ca6e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_lane_1.c -@@ -0,0 +1,33 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+void -+f1 (svbool_t pg, svfloat16_t f16, svfloat32_t f32, svfloat64_t f64, -+ svint32_t s32, int i) -+{ -+ svmul_lane (f32, f32); /* { dg-error {too few arguments to function 'svmul_lane'} } */ -+ svmul_lane (f32, f32, 0, 0); /* { dg-error {too many arguments to function 'svmul_lane'} } */ -+ svmul_lane (pg, pg, 0); /* { dg-error {'svmul_lane' has no form that takes 'svbool_t' arguments} } */ -+ svmul_lane (s32, s32, 0); /* { dg-error {'svmul_lane' has no form that takes 'svint32_t' arguments} } */ -+ svmul_lane (1, f32, 0); /* { dg-error {passing 'int' to argument 1 of 'svmul_lane', which expects an SVE vector type} } */ -+ svmul_lane (f32, 1, 0); /* { dg-error {passing 'int' to argument 2 of 'svmul_lane', which expects an SVE vector type} } */ -+ svmul_lane (f32, f64, 0); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svmul_lane', but previous arguments had type 'svfloat32_t'} } */ -+ svmul_lane (f32, f32, s32); /* { dg-error {argument 3 of 'svmul_lane' must be an integer constant expression} } */ -+ svmul_lane (f32, f32, i); /* { dg-error {argument 3 of 'svmul_lane' must be an integer constant expression} } */ -+ -+ svmul_lane (f16, f16, 0); -+ svmul_lane (f16, f16, 7); -+ svmul_lane (f16, f16, 8); /* { dg-error {passing 8 to argument 3 of 'svmul_lane', which expects a value in the range \[0, 7\]} } */ -+ svmul_lane (f16, f16, -1); /* { dg-error {passing -1 to argument 3 of 'svmul_lane', which expects a value in the range \[0, 7\]} } */ -+ -+ svmul_lane (f32, f32, 0); -+ svmul_lane (f32, f32, 3); -+ svmul_lane (f32, f32, 4); /* { dg-error {passing 4 to argument 3 of 'svmul_lane', which expects a value in the range \[0, 3\]} } */ -+ svmul_lane (f32, f32, -1); /* { dg-error {passing -1 to argument 3 of 'svmul_lane', which expects a value in the range \[0, 3\]} } */ -+ -+ svmul_lane (f64, f64, 0); -+ svmul_lane (f64, f64, 1); -+ svmul_lane (f64, f64, 2); /* { dg-error {passing 2 to argument 3 of 'svmul_lane', which expects a value in the range \[0, 1\]} } */ -+ svmul_lane (f64, f64, -1); /* { dg-error {passing -1 to argument 3 of 'svmul_lane', which expects a value in the range \[0, 1\]} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_n_1.c -new file mode 100644 -index 000000000..0c69e66a1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_n_1.c -@@ -0,0 +1,19 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+void -+f1 (svbool_t pg, svuint8_t u8, svfloat16_t f16, int i, float f) -+{ -+ svinsr (u8); /* { dg-error {too few arguments to function 'svinsr'} } */ -+ svinsr (u8, 0, 0); /* { dg-error {too many arguments to function 'svinsr'} } */ -+ svinsr (0, 0); /* { dg-error {passing 'int' to argument 1 of 'svinsr', which expects an SVE vector type} } */ -+ svinsr (u8, 0); -+ svinsr (u8, -1); -+ svinsr (u8, i); -+ svinsr (u8, f); -+ svinsr (u8, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svinsr', which expects a scalar element} } */ -+ svinsr (pg, 0); /* { dg-error {'svinsr' has no form that takes 'svbool_t' arguments} } */ -+ svinsr (f16, f); -+ svinsr (f16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svinsr', which expects a scalar element} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_1.c -new file mode 100644 -index 000000000..29615e5be ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_1.c -@@ -0,0 +1,12 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+svuint8_t -+f1 (svbool_t pg, svuint8_t u8, svint8_t s8) -+{ -+ svadd_u8_x (pg, u8, s8); /* { dg-error {incompatible type for argument 3 of 'svadd_u8_x'} } */ -+ svadd_u8_x (pg, u8); /* { dg-error {too few arguments to function 'svadd_u8_x'} } */ -+ svadd_u8_x (pg, u8, u8, u8); /* { dg-error {too many arguments to function 'svadd_u8_x'} } */ -+ return svadd_s8_x (pg, s8, s8); /* { dg-error {incompatible types when returning type 'svint8_t' but 'svuint8_t' was expected} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_2.c -new file mode 100644 -index 000000000..9fa83ca99 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_2.c -@@ -0,0 +1,26 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+void -+f1 (svbool_t pg, svint8_t s8, svuint8_t u8, -+ svint16_t s16, svuint16_t u16, svfloat16_t f16) -+{ -+ svadd_x (pg, u8); /* { dg-error {too few arguments to function 'svadd_x'} } */ -+ svadd_x (pg, u8, u8, u8); /* { dg-error {too many arguments to function 'svadd_x'} } */ -+ svadd_x (u8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svadd_x', which expects 'svbool_t'} } */ -+ svadd_x (pg, pg, pg); /* { dg-error {'svadd_x' has no form that takes 'svbool_t' arguments} } */ -+ svadd_x (pg, 1, u8); /* { dg-error {passing 'int' to argument 2 of 'svadd_x', which expects an SVE vector type} } */ -+ svadd_x (pg, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svadd_x', but previous arguments had type 'svuint8_t'} } */ -+ svadd_x (pg, u8, u8); -+ svadd_x (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svadd_x', but previous arguments had type 'svuint8_t'} } */ -+ svadd_x (pg, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svadd_x', but previous arguments had type 'svuint8_t'} } */ -+ svadd_x (pg, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svadd_x', but previous arguments had type 'svuint8_t'} } */ -+ svadd_x (pg, u8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svadd_x', but previous arguments had type 'svuint8_t'} } */ -+ svadd_x (pg, u8, 0); -+ -+ svadd_x (pg, f16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svadd_x', but previous arguments had type 'svfloat16_t'} } */ -+ svadd_x (pg, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svadd_x', but previous arguments had type 'svfloat16_t'} } */ -+ svadd_x (pg, f16, f16); -+ svadd_x (pg, f16, 1); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_3.c -new file mode 100644 -index 000000000..4d0b253e3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_3.c -@@ -0,0 +1,29 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+void -+f1 (svbool_t pg, svint8_t s8, svuint8_t u8, -+ svint16_t s16, svuint16_t u16, svfloat16_t f16) -+{ -+ svand_z (pg, u8); /* { dg-error {too few arguments to function 'svand_z'} } */ -+ svand_z (pg, u8, u8, u8); /* { dg-error {too many arguments to function 'svand_z'} } */ -+ svand_z (u8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svand_z', which expects 'svbool_t'} } */ -+ svand_z (pg, pg, pg); -+ svand_z (pg, 1, u8); /* { dg-error {passing 'int' to argument 2 of 'svand_z', which expects an SVE vector type} } */ -+ svand_z (pg, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svand_z', but previous arguments had type 'svuint8_t'} } */ -+ svand_z (pg, u8, u8); -+ svand_z (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svand_z', but previous arguments had type 'svuint8_t'} } */ -+ svand_z (pg, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svand_z', but previous arguments had type 'svuint8_t'} } */ -+ svand_z (pg, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svand_z', but previous arguments had type 'svuint8_t'} } */ -+ svand_z (pg, u8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svand_z', but previous arguments had type 'svuint8_t'} } */ -+ svand_z (pg, u8, 0); -+ -+ svand_z (pg, pg, u8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svand_z', but previous arguments had type 'svbool_t'} } */ -+ svand_z (pg, pg, 0); /* { dg-error {passing 'int' to argument 3 of 'svand_z', but its 'svbool_t' form does not accept scalars} } */ -+ -+ svand_z (pg, f16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svand_z', but previous arguments had type 'svfloat16_t'} } */ -+ svand_z (pg, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svand_z', but previous arguments had type 'svfloat16_t'} } */ -+ svand_z (pg, f16, f16); /* { dg-error {'svand_z' has no form that takes 'svfloat16_t' arguments} } */ -+ svand_z (pg, f16, 1); /* { dg-error {'svand_z' has no form that takes 'svfloat16_t' arguments} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_rotate_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_rotate_1.c -new file mode 100644 -index 000000000..8ffe91bce ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_rotate_1.c -@@ -0,0 +1,24 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+void -+f1 (svbool_t pg, svfloat32_t f32, svfloat64_t f64, svint32_t s32, int i) -+{ -+ svcadd_x (pg, f32, f32); /* { dg-error {too few arguments to function 'svcadd_x'} } */ -+ svcadd_x (pg, f32, f32, 90, 90); /* { dg-error {too many arguments to function 'svcadd_x'} } */ -+ svcadd_x (f32, f32, f32, 90); /* { dg-error {passing 'svfloat32_t' to argument 1 of 'svcadd_x', which expects 'svbool_t'} } */ -+ svcadd_x (pg, pg, pg, 90); /* { dg-error {'svcadd_x' has no form that takes 'svbool_t' arguments} } */ -+ svcadd_x (pg, s32, s32, 90); /* { dg-error {'svcadd_x' has no form that takes 'svint32_t' arguments} } */ -+ svcadd_x (pg, 1, f32, 90); /* { dg-error {passing 'int' to argument 2 of 'svcadd_x', which expects an SVE vector type} } */ -+ svcadd_x (pg, f32, 1, 90); /* { dg-error {passing 'int' to argument 3 of 'svcadd_x', which expects an SVE vector type} } */ -+ svcadd_x (pg, f32, f64, 90); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svcadd_x', but previous arguments had type 'svfloat32_t'} } */ -+ svcadd_x (pg, f32, f32, s32); /* { dg-error {argument 4 of 'svcadd_x' must be an integer constant expression} } */ -+ svcadd_x (pg, f32, f32, i); /* { dg-error {argument 4 of 'svcadd_x' must be an integer constant expression} } */ -+ svcadd_x (pg, f32, f32, -90); /* { dg-error {passing -90 to argument 4 of 'svcadd_x', which expects either 90 or 270} } */ -+ svcadd_x (pg, f32, f32, 0); /* { dg-error {passing 0 to argument 4 of 'svcadd_x', which expects either 90 or 270} } */ -+ svcadd_x (pg, f32, f32, 1); /* { dg-error {passing 1 to argument 4 of 'svcadd_x', which expects either 90 or 270} } */ -+ svcadd_x (pg, f32, f32, 90); -+ svcadd_x (pg, f32, f32, 180); /* { dg-error {passing 180 to argument 4 of 'svcadd_x', which expects either 90 or 270} } */ -+ svcadd_x (pg, f32, f32, 270); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint64_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint64_n_1.c -new file mode 100644 -index 000000000..c8ca5f746 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint64_n_1.c -@@ -0,0 +1,17 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+void -+f1 (svbool_t pg, svuint8_t u8, int i, float f) -+{ -+ svdupq_lane (u8); /* { dg-error {too few arguments to function 'svdupq_lane'} } */ -+ svdupq_lane (u8, 0, 0); /* { dg-error {too many arguments to function 'svdupq_lane'} } */ -+ svdupq_lane (0, 0); /* { dg-error {passing 'int' to argument 1 of 'svdupq_lane', which expects an SVE vector type} } */ -+ svdupq_lane (u8, 0); -+ svdupq_lane (u8, -1); -+ svdupq_lane (u8, i); -+ svdupq_lane (u8, f); -+ svdupq_lane (u8, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svdupq_lane', which expects 'uint64_t'} } */ -+ svdupq_lane (pg, 0); /* { dg-error {'svdupq_lane' has no form that takes 'svbool_t' arguments} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint64_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint64_opt_n_1.c -new file mode 100644 -index 000000000..27726a80f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint64_opt_n_1.c -@@ -0,0 +1,12 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+svuint8_t -+f1 (svbool_t pg, svuint8_t u8, svint8_t s8, svuint64_t u64) -+{ -+ svlsl_wide_u8_x (pg, u8, u8); /* { dg-error {incompatible type for argument 3 of 'svlsl_wide_u8_x'} } */ -+ svlsl_wide_u8_x (pg, u8); /* { dg-error {too few arguments to function 'svlsl_wide_u8_x'} } */ -+ svlsl_wide_u8_x (pg, u8, u64, u8); /* { dg-error {too many arguments to function 'svlsl_wide_u8_x'} } */ -+ return svlsl_wide_s8_x (pg, s8, u64); /* { dg-error {incompatible types when returning type 'svint8_t' but 'svuint8_t' was expected} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint64_opt_n_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint64_opt_n_2.c -new file mode 100644 -index 000000000..be217394f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint64_opt_n_2.c -@@ -0,0 +1,14 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+void -+f1 (svbool_t pg, svuint8_t u8, svuint64_t u64) -+{ -+ svlsl_wide_x (pg, u8); /* { dg-error {too few arguments to function 'svlsl_wide_x'} } */ -+ svlsl_wide_x (pg, u8, u8, u8); /* { dg-error {too many arguments to function 'svlsl_wide_x'} } */ -+ svlsl_wide_x (u8, u8, u64); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svlsl_wide_x', which expects 'svbool_t'} } */ -+ svlsl_wide_x (pg, 1, u64); /* { dg-error {passing 'int' to argument 2 of 'svlsl_wide_x', which expects an SVE vector type} } */ -+ svlsl_wide_x (pg, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svlsl_wide_x', which expects 'svuint64_t'} } */ -+ svlsl_wide_x (pg, u64, u64); /* { dg-error {'svlsl_wide_x' has no form that takes 'svuint64_t' arguments} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint_1.c -new file mode 100644 -index 000000000..8f86c50b6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint_1.c -@@ -0,0 +1,44 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+void -+f1 (svbool_t pg, svuint8_t u8, svint8_t s8, svuint16_t u16, svint16_t s16, -+ svfloat16_t f16) -+{ -+ svtbl (u8); /* { dg-error {too few arguments to function 'svtbl'} } */ -+ svtbl (u8, u8, u8); /* { dg-error {too many arguments to function 'svtbl'} } */ -+ svtbl (pg, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svtbl', which expects a vector of unsigned integers} } */ -+ svtbl (pg, u8); /* { dg-error {'svtbl' has no form that takes 'svbool_t' arguments} } */ -+ -+ svtbl (u8, 0); /* { dg-error {passing 'int' to argument 2 of 'svtbl', which expects an SVE vector type} } */ -+ svtbl (u8, u8); -+ svtbl (u8, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svtbl', which expects a vector of unsigned integers} } */ -+ svtbl (u8, u16); /* { dg-error {arguments 1 and 2 of 'svtbl' must have the same element size, but the values passed here have type 'svuint8_t' and 'svuint16_t' respectively} } */ -+ svtbl (u8, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svtbl', which expects a vector of unsigned integers} } */ -+ svtbl (u8, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svtbl', which expects a vector of unsigned integers} } */ -+ -+ svtbl (s8, u8); -+ svtbl (s8, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svtbl', which expects a vector of unsigned integers} } */ -+ svtbl (s8, u16); /* { dg-error {arguments 1 and 2 of 'svtbl' must have the same element size, but the values passed here have type 'svint8_t' and 'svuint16_t' respectively} } */ -+ svtbl (s8, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svtbl', which expects a vector of unsigned integers} } */ -+ svtbl (s8, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svtbl', which expects a vector of unsigned integers} } */ -+ -+ svtbl (u16, u8); /* { dg-error {arguments 1 and 2 of 'svtbl' must have the same element size, but the values passed here have type 'svuint16_t' and 'svuint8_t' respectively} } */ -+ svtbl (u16, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svtbl', which expects a vector of unsigned integers} } */ -+ svtbl (u16, u16); -+ svtbl (u16, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svtbl', which expects a vector of unsigned integers} } */ -+ svtbl (u16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svtbl', which expects a vector of unsigned integers} } */ -+ -+ svtbl (s16, u8); /* { dg-error {arguments 1 and 2 of 'svtbl' must have the same element size, but the values passed here have type 'svint16_t' and 'svuint8_t' respectively} } */ -+ svtbl (s16, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svtbl', which expects a vector of unsigned integers} } */ -+ svtbl (s16, u16); -+ svtbl (s16, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svtbl', which expects a vector of unsigned integers} } */ -+ svtbl (s16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svtbl', which expects a vector of unsigned integers} } */ -+ -+ svtbl (f16, u8); /* { dg-error {arguments 1 and 2 of 'svtbl' must have the same element size, but the values passed here have type 'svfloat16_t' and 'svuint8_t' respectively} } */ -+ svtbl (f16, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svtbl', which expects a vector of unsigned integers} } */ -+ svtbl (f16, u16); -+ svtbl (f16, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svtbl', which expects a vector of unsigned integers} } */ -+ svtbl (f16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svtbl', which expects a vector of unsigned integers} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint_n_1.c -new file mode 100644 -index 000000000..36a902e69 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint_n_1.c -@@ -0,0 +1,17 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+void -+f1 (svbool_t pg, svuint8_t u8, int i, float f) -+{ -+ svdup_lane (u8); /* { dg-error {too few arguments to function 'svdup_lane'} } */ -+ svdup_lane (u8, 0, 0); /* { dg-error {too many arguments to function 'svdup_lane'} } */ -+ svdup_lane (0, 0); /* { dg-error {passing 'int' to argument 1 of 'svdup_lane', which expects an SVE vector type} } */ -+ svdup_lane (u8, 0); -+ svdup_lane (u8, -1); -+ svdup_lane (u8, i); -+ svdup_lane (u8, f); -+ svdup_lane (u8, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svdup_lane', which expects a scalar integer} } */ -+ svdup_lane (pg, 0); /* { dg-error {'svdup_lane' has no form that takes 'svbool_t' arguments} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint_opt_n_1.c -new file mode 100644 -index 000000000..b162ab405 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_uint_opt_n_1.c -@@ -0,0 +1,27 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+void -+f1 (svbool_t pg, svfloat16_t f16, svint16_t s16, svuint16_t u16, -+ svfloat32_t f32, svint32_t s32, svuint32_t u32) -+{ -+ svlsl_x (pg, s16); /* { dg-error {too few arguments to function 'svlsl_x'} } */ -+ svlsl_x (pg, s16, u16, u16); /* { dg-error {too many arguments to function 'svlsl_x'} } */ -+ svlsl_x (s32, s32, u32); /* { dg-error {passing 'svint32_t' to argument 1 of 'svlsl_x', which expects 'svbool_t'} } */ -+ svlsl_x (1, s32, u32); /* { dg-error {passing 'int' to argument 1 of 'svlsl_x', which expects 'svbool_t'} } */ -+ svlsl_x (pg, pg, u16); /* { dg-error {'svlsl_x' has no form that takes 'svbool_t' arguments} } */ -+ svlsl_x (pg, 1, s16); /* { dg-error {passing 'int' to argument 2 of 'svlsl_x', which expects an SVE vector type} } */ -+ svlsl_x (pg, s16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svlsl_x', which expects a vector of unsigned integers} } */ -+ svlsl_x (pg, s16, u16); -+ svlsl_x (pg, s16, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svlsl_x', which expects a vector of unsigned integers} } */ -+ svlsl_x (pg, s16, s32); /* { dg-error {passing 'svint32_t' to argument 3 of 'svlsl_x', which expects a vector of unsigned integers} } */ -+ svlsl_x (pg, s16, u32); /* { dg-error {arguments 2 and 3 of 'svlsl_x' must have the same element size, but the values passed here have type 'svint16_t' and 'svuint32_t' respectively} } */ -+ svlsl_x (pg, s16, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svlsl_x', which expects a vector of unsigned integers} } */ -+ svlsl_x (pg, s16, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svlsl_x', which expects a vector of unsigned integers} } */ -+ svlsl_x (pg, s16, 0); -+ svlsl_x (pg, f16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svlsl_x', which expects a vector of unsigned integers} } */ -+ svlsl_x (pg, f16, u16); /* { dg-error {'svlsl_x' has no form that takes 'svfloat16_t' arguments} } */ -+ svlsl_x (pg, f16, s32); /* { dg-error {passing 'svint32_t' to argument 3 of 'svlsl_x', which expects a vector of unsigned integers} } */ -+ svlsl_x (pg, f16, u32); /* { dg-error {'svlsl_x' has no form that takes 'svfloat16_t' arguments} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/clast_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/clast_1.c -new file mode 100644 -index 000000000..cb9ac946c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/clast_1.c -@@ -0,0 +1,15 @@ -+#include -+ -+void -+test (svbool_t pg, svint32_t s32, svint64_t s64, int i) -+{ -+ svclasta (pg, 1); /* { dg-error {too few arguments to function 'svclasta'} } */ -+ svclasta (pg, 1, s32, 1); /* { dg-error {too many arguments to function 'svclasta'} } */ -+ svclasta (1, 1, s32); /* { dg-error {passing 'int' to argument 1 of 'svclasta', which expects 'svbool_t'} } */ -+ svclasta (pg, 1, 1); /* { dg-error {passing 'int' to argument 3 of 'svclasta', which expects an SVE vector type} } */ -+ svclasta (pg, 1, pg); /* { dg-error {'svclasta' has no form that takes 'svbool_t' arguments} } */ -+ svclasta (pg, i, s32); -+ svclasta (pg, s32, 1); /* { dg-error {passing 'int' to argument 3 of 'svclasta', which expects an SVE vector type} } */ -+ svclasta (pg, s32, s64); /* { dg-error {passing 'svint64_t' to argument 3 of 'svclasta', but previous arguments had type 'svint32_t'} } */ -+ svclasta (pg, pg, pg); /* { dg-error {'svclasta' has no form that takes 'svbool_t' arguments} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_opt_n_1.c -new file mode 100644 -index 000000000..71c8e86d5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_opt_n_1.c -@@ -0,0 +1,26 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+void -+f1 (svbool_t pg, svint8_t s8, svuint8_t u8, -+ svint16_t s16, svuint16_t u16, svfloat16_t f16) -+{ -+ svcmpeq (pg, u8); /* { dg-error {too few arguments to function 'svcmpeq'} } */ -+ svcmpeq (pg, u8, u8, u8); /* { dg-error {too many arguments to function 'svcmpeq'} } */ -+ svcmpeq (u8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svcmpeq', which expects 'svbool_t'} } */ -+ svcmpeq (pg, pg, pg); /* { dg-error {'svcmpeq' has no form that takes 'svbool_t' arguments} } */ -+ svcmpeq (pg, 1, u8); /* { dg-error {passing 'int' to argument 2 of 'svcmpeq', which expects an SVE vector type} } */ -+ svcmpeq (pg, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svcmpeq', but previous arguments had type 'svuint8_t'} } */ -+ svcmpeq (pg, u8, u8); -+ svcmpeq (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svcmpeq', but previous arguments had type 'svuint8_t'} } */ -+ svcmpeq (pg, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svcmpeq', but previous arguments had type 'svuint8_t'} } */ -+ svcmpeq (pg, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svcmpeq', but previous arguments had type 'svuint8_t'} } */ -+ svcmpeq (pg, u8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svcmpeq', but previous arguments had type 'svuint8_t'} } */ -+ svcmpeq (pg, u8, 0); -+ -+ svcmpeq (pg, f16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svcmpeq', but previous arguments had type 'svfloat16_t'} } */ -+ svcmpeq (pg, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svcmpeq', but previous arguments had type 'svfloat16_t'} } */ -+ svcmpeq (pg, f16, f16); -+ svcmpeq (pg, f16, 1); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_scalar_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_scalar_1.c -new file mode 100644 -index 000000000..d5a60f841 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_scalar_1.c -@@ -0,0 +1,85 @@ -+/* { dg-do compile } */ -+ -+#include -+#include -+ -+enum signed_enum { SA = -1, SB }; -+enum unsigned_enum { UA, UB }; -+ -+void -+test (int8_t s8, int16_t s16, int32_t s32, int64_t s64, -+ uint8_t u8, uint16_t u16, uint32_t u32, uint64_t u64, -+ bool b, enum signed_enum se, enum unsigned_enum ue, -+ int *ptr, float f32, svbool_t pg, svint32_t vec) -+{ -+ svwhilele_b8 (s32); /* { dg-error {too few arguments to function 'svwhilele_b8'} } */ -+ svwhilele_b8 (s32, s32, s32); /* { dg-error {too many arguments to function 'svwhilele_b8'} } */ -+ -+ svwhilele_b8 (b, b); -+ svwhilele_b8 (se, se); -+ svwhilele_b8 (ue, ue); -+ svwhilele_b8 (s8, s8); -+ svwhilele_b8 (u8, u8); -+ svwhilele_b8 (s16, s16); -+ svwhilele_b8 (u16, u16); -+ svwhilele_b8 (ptr, ptr); /* { dg-error {passing 'int \*' to argument 1 of 'svwhilele_b8', which expects a 32-bit or 64-bit integer type} } */ -+ svwhilele_b8 (f32, f32); /* { dg-error {passing 'float' to argument 1 of 'svwhilele_b8', which expects a 32-bit or 64-bit integer type} } */ -+ svwhilele_b8 (pg, pg); /* { dg-error {passing 'svbool_t' to argument 1 of 'svwhilele_b8', which expects a 32-bit or 64-bit integer type} } */ -+ svwhilele_b8 (vec, vec); /* { dg-error {passing 'svint32_t' to argument 1 of 'svwhilele_b8', which expects a 32-bit or 64-bit integer type} } */ -+ -+ svwhilele_b8 (s32, b); -+ svwhilele_b8 (s32, se); -+ svwhilele_b8 (s32, ue); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'int32_t' but argument 2 has type 'uint32_t'} } */ -+ svwhilele_b8 (s32, s8); -+ svwhilele_b8 (s32, u8); -+ svwhilele_b8 (s32, s16); -+ svwhilele_b8 (s32, u16); -+ -+ svwhilele_b8 (u32, b); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'uint32_t' but argument 2 has type 'int32_t'} } */ -+ svwhilele_b8 (u32, se); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'uint32_t' but argument 2 has type 'int32_t'} } */ -+ svwhilele_b8 (u32, ue); -+ svwhilele_b8 (u32, s8); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'uint32_t' but argument 2 has type 'int32_t'} } */ -+ svwhilele_b8 (u32, u8); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'uint32_t' but argument 2 has type 'int32_t'} } */ -+ svwhilele_b8 (u32, s16); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'uint32_t' but argument 2 has type 'int32_t'} } */ -+ svwhilele_b8 (u32, u16); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'uint32_t' but argument 2 has type 'int32_t'} } */ -+ -+ svwhilele_b8 (s32, s32); -+ svwhilele_b8 (s32, u32); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'int32_t' but argument 2 has type 'uint32_t'} } */ -+ svwhilele_b8 (s32, s64); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'int32_t' but argument 2 has type 'int64_t'} } */ -+ svwhilele_b8 (s32, u64); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'int32_t' but argument 2 has type 'uint64_t'} } */ -+ -+ svwhilele_b8 (u32, s32); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'uint32_t' but argument 2 has type 'int32_t'} } */ -+ svwhilele_b8 (u32, u32); -+ svwhilele_b8 (u32, s64); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'uint32_t' but argument 2 has type 'int64_t'} } */ -+ svwhilele_b8 (u32, u64); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'uint32_t' but argument 2 has type 'uint64_t'} } */ -+ -+ svwhilele_b8 (s64, s32); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'int64_t' but argument 2 has type 'int32_t'} } */ -+ svwhilele_b8 (s64, u32); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'int64_t' but argument 2 has type 'uint32_t'} } */ -+ svwhilele_b8 (s64, s64); -+ svwhilele_b8 (s64, u64); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'int64_t' but argument 2 has type 'uint64_t'} } */ -+ -+ svwhilele_b8 (u64, s32); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'uint64_t' but argument 2 has type 'int32_t'} } */ -+ svwhilele_b8 (u64, u32); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'uint64_t' but argument 2 has type 'uint32_t'} } */ -+ svwhilele_b8 (u64, s64); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'uint64_t' but argument 2 has type 'int64_t'} } */ -+ svwhilele_b8 (u64, u64); -+ -+ svwhilele_b8 (0, s32); -+ svwhilele_b8 (0, u32); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'int32_t' but argument 2 has type 'uint32_t'} } */ -+ svwhilele_b8 (0, s64); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'int32_t' but argument 2 has type 'int64_t'} } */ -+ svwhilele_b8 (0, u64); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'int32_t' but argument 2 has type 'uint64_t'} } */ -+ -+ svwhilele_b8 (s32, 0); -+ svwhilele_b8 (u32, 0); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'uint32_t' but argument 2 has type 'int32_t'} } */ -+ svwhilele_b8 (s64, 0); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'int64_t' but argument 2 has type 'int32_t'} } */ -+ svwhilele_b8 (u64, 0); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'uint64_t' but argument 2 has type 'int32_t'} } */ -+ -+ svwhilele_b8 (0U, s32); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'uint32_t' but argument 2 has type 'int32_t'} } */ -+ svwhilele_b8 (0U, u32); -+ svwhilele_b8 (0U, s64); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'uint32_t' but argument 2 has type 'int64_t'} } */ -+ svwhilele_b8 (0U, u64); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'uint32_t' but argument 2 has type 'uint64_t'} } */ -+ -+ svwhilele_b8 (s32, 0U); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'int32_t' but argument 2 has type 'uint32_t'} } */ -+ svwhilele_b8 (u32, 0U); -+ svwhilele_b8 (s64, 0U); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'int64_t' but argument 2 has type 'uint32_t'} } */ -+ svwhilele_b8 (u64, 0U); /* { dg-error {call to 'svwhilele_b8' is ambiguous; argument 1 has type 'uint64_t' but argument 2 has type 'uint32_t'} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_wide_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_wide_opt_n_1.c -new file mode 100644 -index 000000000..fc5e45663 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_wide_opt_n_1.c -@@ -0,0 +1,26 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+svuint8_t -+f1 (svbool_t pg, svuint8_t u8, svint8_t s8, svint64_t s64, svuint64_t u64, -+ svfloat32_t f32, svfloat64_t f64, unsigned int x) -+{ -+ svcmpeq_wide (pg, s8); /* { dg-error {too few arguments to function 'svcmpeq_wide'} } */ -+ svcmpeq_wide (pg, s8, s64, s8); /* { dg-error {too many arguments to function 'svcmpeq_wide'} } */ -+ svcmpeq_wide (s8, s8, s64); /* { dg-error {passing 'svint8_t' to argument 1 of 'svcmpeq_wide', which expects 'svbool_t'} } */ -+ svcmpeq_wide (pg, 0, s64); /* { dg-error {passing 'int' to argument 2 of 'svcmpeq_wide', which expects an SVE vector type} } */ -+ svcmpeq_wide (pg, s8, 0); -+ svcmpeq_wide (pg, s8, x); -+ svcmpeq_wide (pg, s8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svcmpeq_wide', which expects a vector of 64-bit elements} } */ -+ svcmpeq_wide (pg, s8, u8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svcmpeq_wide', which expects a vector of 64-bit elements} } */ -+ svcmpeq_wide (pg, s8, s64); -+ svcmpeq_wide (pg, s8, u64); /* { dg-error {arguments 2 and 3 of 'svcmpeq_wide' must have the same signedness, but the values passed here have type 'svint8_t' and 'svuint64_t' respectively} } */ -+ svcmpeq_wide (pg, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svcmpeq_wide', which expects a vector of 64-bit elements} } */ -+ svcmpeq_wide (pg, u8, u64); /* { dg-error {'svcmpeq_wide' has no form that takes 'svuint8_t' arguments} } */ -+ svcmpeq_wide (pg, s64, s64); /* { dg-error {'svcmpeq_wide' has no form that takes 'svint64_t' arguments} } */ -+ svcmpeq_wide (pg, f32, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svcmpeq_wide', which expects a vector of 64-bit elements} } */ -+ svcmpeq_wide (pg, f32, f64); /* { dg-error {'svcmpeq_wide' has no form that takes 'svfloat32_t' arguments} } */ -+ svcmpeq_wide (pg, f64, f64); /* { dg-error {'svcmpeq_wide' has no form that takes 'svfloat64_t' arguments} } */ -+ svcmpeq_wide (pg, pg, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svcmpeq_wide', which expects a vector of 64-bit elements} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/count_pat_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/count_pat_1.c -new file mode 100644 -index 000000000..8dd76a553 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/count_pat_1.c -@@ -0,0 +1,42 @@ -+#include -+ -+void -+test (enum svpattern pat, int i) -+{ -+ svcntb_pat (pat); /* { dg-error {argument 1 of 'svcntb_pat' must be an integer constant expression} } */ -+ svcntb_pat (i); /* { dg-error {argument 1 of 'svcntb_pat' must be an integer constant expression} } */ -+ svcntb_pat ((enum svpattern) -1); /* { dg-error {passing 4294967295 to argument 1 of 'svcntb_pat', which expects a valid 'enum svpattern' value} } */ -+ svcntb_pat ((enum svpattern) 0); -+ svcntb_pat ((enum svpattern) 1); -+ svcntb_pat ((enum svpattern) 2); -+ svcntb_pat ((enum svpattern) 3); -+ svcntb_pat ((enum svpattern) 4); -+ svcntb_pat ((enum svpattern) 5); -+ svcntb_pat ((enum svpattern) 6); -+ svcntb_pat ((enum svpattern) 7); -+ svcntb_pat ((enum svpattern) 8); -+ svcntb_pat ((enum svpattern) 9); -+ svcntb_pat ((enum svpattern) 10); -+ svcntb_pat ((enum svpattern) 11); -+ svcntb_pat ((enum svpattern) 12); -+ svcntb_pat ((enum svpattern) 13); -+ svcntb_pat ((enum svpattern) 14); /* { dg-error {passing 14 to argument 1 of 'svcntb_pat', which expects a valid 'enum svpattern' value} } */ -+ svcntb_pat ((enum svpattern) 15); /* { dg-error {passing 15 to argument 1 of 'svcntb_pat', which expects a valid 'enum svpattern' value} } */ -+ svcntb_pat ((enum svpattern) 16); /* { dg-error {passing 16 to argument 1 of 'svcntb_pat', which expects a valid 'enum svpattern' value} } */ -+ svcntb_pat ((enum svpattern) 17); /* { dg-error {passing 17 to argument 1 of 'svcntb_pat', which expects a valid 'enum svpattern' value} } */ -+ svcntb_pat ((enum svpattern) 18); /* { dg-error {passing 18 to argument 1 of 'svcntb_pat', which expects a valid 'enum svpattern' value} } */ -+ svcntb_pat ((enum svpattern) 19); /* { dg-error {passing 19 to argument 1 of 'svcntb_pat', which expects a valid 'enum svpattern' value} } */ -+ svcntb_pat ((enum svpattern) 20); /* { dg-error {passing 20 to argument 1 of 'svcntb_pat', which expects a valid 'enum svpattern' value} } */ -+ svcntb_pat ((enum svpattern) 21); /* { dg-error {passing 21 to argument 1 of 'svcntb_pat', which expects a valid 'enum svpattern' value} } */ -+ svcntb_pat ((enum svpattern) 22); /* { dg-error {passing 22 to argument 1 of 'svcntb_pat', which expects a valid 'enum svpattern' value} } */ -+ svcntb_pat ((enum svpattern) 23); /* { dg-error {passing 23 to argument 1 of 'svcntb_pat', which expects a valid 'enum svpattern' value} } */ -+ svcntb_pat ((enum svpattern) 24); /* { dg-error {passing 24 to argument 1 of 'svcntb_pat', which expects a valid 'enum svpattern' value} } */ -+ svcntb_pat ((enum svpattern) 25); /* { dg-error {passing 25 to argument 1 of 'svcntb_pat', which expects a valid 'enum svpattern' value} } */ -+ svcntb_pat ((enum svpattern) 26); /* { dg-error {passing 26 to argument 1 of 'svcntb_pat', which expects a valid 'enum svpattern' value} } */ -+ svcntb_pat ((enum svpattern) 27); /* { dg-error {passing 27 to argument 1 of 'svcntb_pat', which expects a valid 'enum svpattern' value} } */ -+ svcntb_pat ((enum svpattern) 28); /* { dg-error {passing 28 to argument 1 of 'svcntb_pat', which expects a valid 'enum svpattern' value} } */ -+ svcntb_pat ((enum svpattern) 29); -+ svcntb_pat ((enum svpattern) 30); -+ svcntb_pat ((enum svpattern) 31); -+ svcntb_pat ((enum svpattern) 32); /* { dg-error {passing 32 to argument 1 of 'svcntb_pat', which expects a valid 'enum svpattern' value} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/count_vector_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/count_vector_1.c -new file mode 100644 -index 000000000..daf9e0d5b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/count_vector_1.c -@@ -0,0 +1,13 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+void -+f1 (svbool_t pg, svuint32_t u32, svuint32x2_t u32x2) -+{ -+ svlen (); /* { dg-error {too few arguments to function 'svlen'} } */ -+ svlen (u32, u32); /* { dg-error {too many arguments to function 'svlen'} } */ -+ svlen (0); /* { dg-error {passing 'int' to argument 1 of 'svlen', which expects an SVE vector type} } */ -+ svlen (pg); /* { dg-error {'svlen' has no form that takes 'svbool_t' arguments} } */ -+ svlen (u32x2); /* { dg-error {passing 'svuint32x2_t' to argument 1 of 'svlen', which expects a single SVE vector rather than a tuple} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_1.c -new file mode 100644 -index 000000000..31321a046 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_1.c -@@ -0,0 +1,21 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-std=c99 -Wall -Wextra" } */ -+ -+#include -+ -+void -+f1 (svuint8x2_t *ptr, svbool_t pg, svuint8_t u8, svfloat64_t f64, -+ svuint8x2_t u8x2, int x) -+{ -+ *ptr = svcreate2 (u8); /* { dg-error {too few arguments to function 'svcreate2'} } */ -+ *ptr = svcreate2 (u8, u8, u8); /* { dg-error {too many arguments to function 'svcreate2'} } */ -+ *ptr = svcreate2 (u8x2, u8x2); /* { dg-error {passing 'svuint8x2_t' to argument 1 of 'svcreate2', which expects a single SVE vector rather than a tuple} } */ -+ *ptr = svcreate2 (u8, f64); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svcreate2', but previous arguments had type 'svuint8_t'} } */ -+ *ptr = svcreate2 (u8, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svcreate2', but previous arguments had type 'svuint8_t'} } */ -+ *ptr = svcreate2 (u8, x); /* { dg-error {passing 'int' to argument 2 of 'svcreate2', which expects an SVE vector type} } */ -+ *ptr = svcreate2 (x, u8); /* { dg-error {passing 'int' to argument 1 of 'svcreate2', which expects an SVE vector type} } */ -+ *ptr = svcreate2 (pg, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svcreate2', but previous arguments had type 'svbool_t'} } */ -+ *ptr = svcreate2 (pg, pg); /* { dg-error {'svcreate2' has no form that takes 'svbool_t' arguments} } */ -+ *ptr = svcreate2 (u8, u8); -+ *ptr = svcreate2 (f64, f64); /* { dg-error {incompatible types when assigning to type 'svuint8x2_t' from type 'svfloat64x2_t'} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_2.c -new file mode 100644 -index 000000000..28ad16c2d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_2.c -@@ -0,0 +1,23 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-std=c99 -Wall -Wextra" } */ -+ -+#include -+ -+void -+f1 (svuint8x2_t *ptr, svbool_t pg, svuint8_t u8, svfloat64_t f64, -+ svuint8x2_t u8x2, int x) -+{ -+ *ptr = svcreate2_u8 (u8); /* { dg-error {too few arguments to function 'svcreate2_u8'} } */ -+ *ptr = svcreate2_u8 (u8, u8, u8); /* { dg-error {too many arguments to function 'svcreate2_u8'} } */ -+ *ptr = svcreate2_u8 (u8x2, u8x2); /* { dg-error {incompatible type for argument 1 of 'svcreate2_u8'} } */ -+ /* { dg-error {incompatible type for argument 2 of 'svcreate2_u8'} "" { target *-*-* } .-1 } */ -+ *ptr = svcreate2_u8 (u8, f64); /* { dg-error {incompatible type for argument 2 of 'svcreate2_u8'} } */ -+ *ptr = svcreate2_u8 (u8, pg); /* { dg-error {incompatible type for argument 2 of 'svcreate2_u8'} } */ -+ *ptr = svcreate2_u8 (u8, x); /* { dg-error {incompatible type for argument 2 of 'svcreate2_u8'} } */ -+ *ptr = svcreate2_u8 (x, u8); /* { dg-error {incompatible type for argument 1 of 'svcreate2_u8'} } */ -+ *ptr = svcreate2_u8 (pg, u8); /* { dg-error {incompatible type for argument 1 of 'svcreate2_u8'} } */ -+ *ptr = svcreate2_u8 (pg, pg); /* { dg-error {incompatible type for argument 1 of 'svcreate2_u8'} } */ -+ /* { dg-error {incompatible type for argument 2 of 'svcreate2_u8'} "" { target *-*-* } .-1 } */ -+ *ptr = svcreate2_u8 (u8, u8); -+ *ptr = svcreate2_f64 (f64, f64); /* { dg-error {incompatible types when assigning to type 'svuint8x2_t' from type 'svfloat64x2_t'} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_3.c -new file mode 100644 -index 000000000..a88e56b31 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_3.c -@@ -0,0 +1,22 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-std=c99 -Wall -Wextra" } */ -+ -+#include -+ -+void -+f1 (svfloat16x3_t *ptr, svbool_t pg, svfloat16_t f16, svfloat64_t f64, -+ svfloat16x3_t f16x3, int x) -+{ -+ *ptr = svcreate3 (f16); /* { dg-error {too few arguments to function 'svcreate3'} } */ -+ *ptr = svcreate3 (f16, f16); /* { dg-error {too few arguments to function 'svcreate3'} } */ -+ *ptr = svcreate3 (f16, f16, f16, f16); /* { dg-error {too many arguments to function 'svcreate3'} } */ -+ *ptr = svcreate3 (f16x3, f16x3, f16x3); /* { dg-error {passing 'svfloat16x3_t' to argument 1 of 'svcreate3', which expects a single SVE vector rather than a tuple} } */ -+ *ptr = svcreate3 (f16, f16, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svcreate3', but previous arguments had type 'svfloat16_t'} } */ -+ *ptr = svcreate3 (f16, pg, f16); /* { dg-error {passing 'svbool_t' to argument 2 of 'svcreate3', but previous arguments had type 'svfloat16_t'} } */ -+ *ptr = svcreate3 (f16, x, f16); /* { dg-error {passing 'int' to argument 2 of 'svcreate3', which expects an SVE vector type} } */ -+ *ptr = svcreate3 (x, f16, f16); /* { dg-error {passing 'int' to argument 1 of 'svcreate3', which expects an SVE vector type} } */ -+ *ptr = svcreate3 (pg, f16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svcreate3', but previous arguments had type 'svbool_t'} } */ -+ *ptr = svcreate3 (pg, pg, pg); /* { dg-error {'svcreate3' has no form that takes 'svbool_t' arguments} } */ -+ *ptr = svcreate3 (f16, f16, f16); -+ *ptr = svcreate3 (f64, f64, f64); /* { dg-error {incompatible types when assigning to type 'svfloat16x3_t' from type 'svfloat64x3_t'} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_4.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_4.c -new file mode 100644 -index 000000000..c111e9f29 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_4.c -@@ -0,0 +1,26 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-std=c99 -Wall -Wextra" } */ -+ -+#include -+ -+void -+f1 (svfloat16x3_t *ptr, svbool_t pg, svfloat16_t f16, svfloat64_t f64, -+ svfloat16x3_t f16x3, int x) -+{ -+ *ptr = svcreate3_f16 (f16); /* { dg-error {too few arguments to function 'svcreate3_f16'} } */ -+ *ptr = svcreate3_f16 (f16, f16); /* { dg-error {too few arguments to function 'svcreate3_f16'} } */ -+ *ptr = svcreate3_f16 (f16, f16, f16, f16); /* { dg-error {too many arguments to function 'svcreate3_f16'} } */ -+ *ptr = svcreate3_f16 (f16x3, f16x3, f16x3); /* { dg-error {incompatible type for argument 1 of 'svcreate3_f16'} } */ -+ /* { dg-error {incompatible type for argument 2 of 'svcreate3_f16'} "" { target *-*-* } .-1 } */ -+ /* { dg-error {incompatible type for argument 3 of 'svcreate3_f16'} "" { target *-*-* } .-2 } */ -+ *ptr = svcreate3_f16 (f16, f16, f64); /* { dg-error {incompatible type for argument 3 of 'svcreate3_f16'} } */ -+ *ptr = svcreate3_f16 (f16, pg, f16); /* { dg-error {incompatible type for argument 2 of 'svcreate3_f16'} } */ -+ *ptr = svcreate3_f16 (f16, x, f16); /* { dg-error {incompatible type for argument 2 of 'svcreate3_f16'} } */ -+ *ptr = svcreate3_f16 (x, f16, f16); /* { dg-error {incompatible type for argument 1 of 'svcreate3_f16'} } */ -+ *ptr = svcreate3_f16 (pg, f16, f16); /* { dg-error {incompatible type for argument 1 of 'svcreate3_f16'} } */ -+ *ptr = svcreate3_f16 (pg, pg, pg); /* { dg-error {incompatible type for argument 1 of 'svcreate3_f16'} } */ -+ /* { dg-error {incompatible type for argument 2 of 'svcreate3_f16'} "" { target *-*-* } .-1 } */ -+ /* { dg-error {incompatible type for argument 3 of 'svcreate3_f16'} "" { target *-*-* } .-2 } */ -+ *ptr = svcreate3_f16 (f16, f16, f16); -+ *ptr = svcreate3_f64 (f64, f64, f64); /* { dg-error {incompatible types when assigning to type 'svfloat16x3_t' from type 'svfloat64x3_t'} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_5.c -new file mode 100644 -index 000000000..fed124506 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_5.c -@@ -0,0 +1,23 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-std=c99 -Wall -Wextra" } */ -+ -+#include -+ -+void -+f1 (svint32x4_t *ptr, svbool_t pg, svint32_t s32, svfloat64_t f64, -+ svint32x4_t s32x4, int x) -+{ -+ *ptr = svcreate4 (s32); /* { dg-error {too few arguments to function 'svcreate4'} } */ -+ *ptr = svcreate4 (s32, s32); /* { dg-error {too few arguments to function 'svcreate4'} } */ -+ *ptr = svcreate4 (s32, s32, s32); /* { dg-error {too few arguments to function 'svcreate4'} } */ -+ *ptr = svcreate4 (s32, s32, s32, s32, s32); /* { dg-error {too many arguments to function 'svcreate4'} } */ -+ *ptr = svcreate4 (s32x4, s32x4, s32x4, s32x4); /* { dg-error {passing 'svint32x4_t' to argument 1 of 'svcreate4', which expects a single SVE vector rather than a tuple} } */ -+ *ptr = svcreate4 (s32, s32, s32, f64); /* { dg-error {passing 'svfloat64_t' to argument 4 of 'svcreate4', but previous arguments had type 'svint32_t'} } */ -+ *ptr = svcreate4 (s32, s32, pg, s32); /* { dg-error {passing 'svbool_t' to argument 3 of 'svcreate4', but previous arguments had type 'svint32_t'} } */ -+ *ptr = svcreate4 (s32, x, s32, s32); /* { dg-error {passing 'int' to argument 2 of 'svcreate4', which expects an SVE vector type} } */ -+ *ptr = svcreate4 (x, s32, s32, s32); /* { dg-error {passing 'int' to argument 1 of 'svcreate4', which expects an SVE vector type} } */ -+ *ptr = svcreate4 (pg, s32, s32, s32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svcreate4', but previous arguments had type 'svbool_t'} } */ -+ *ptr = svcreate4 (pg, pg, pg, pg); /* { dg-error {'svcreate4' has no form that takes 'svbool_t' arguments} } */ -+ *ptr = svcreate4 (s32, s32, s32, s32); -+ *ptr = svcreate4 (f64, f64, f64, f64); /* { dg-error {incompatible types when assigning to type 'svint32x4_t' from type 'svfloat64x4_t'} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_6.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_6.c -new file mode 100644 -index 000000000..b9e298acf ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_6.c -@@ -0,0 +1,29 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-std=c99 -Wall -Wextra" } */ -+ -+#include -+ -+void -+f1 (svint32x4_t *ptr, svbool_t pg, svint32_t s32, svfloat64_t f64, -+ svint32x4_t s32x4, int x) -+{ -+ *ptr = svcreate4_s32 (s32); /* { dg-error {too few arguments to function 'svcreate4_s32'} } */ -+ *ptr = svcreate4_s32 (s32, s32); /* { dg-error {too few arguments to function 'svcreate4_s32'} } */ -+ *ptr = svcreate4_s32 (s32, s32, s32); /* { dg-error {too few arguments to function 'svcreate4_s32'} } */ -+ *ptr = svcreate4_s32 (s32, s32, s32, s32, s32); /* { dg-error {too many arguments to function 'svcreate4_s32'} } */ -+ *ptr = svcreate4_s32 (s32x4, s32x4, s32x4, s32x4); /* { dg-error {incompatible type for argument 1 of 'svcreate4_s32'} } */ -+ /* { dg-error {incompatible type for argument 2 of 'svcreate4_s32'} "" { target *-*-* } .-1 } */ -+ /* { dg-error {incompatible type for argument 3 of 'svcreate4_s32'} "" { target *-*-* } .-2 } */ -+ /* { dg-error {incompatible type for argument 4 of 'svcreate4_s32'} "" { target *-*-* } .-3 } */ -+ *ptr = svcreate4_s32 (s32, s32, s32, f64); /* { dg-error {incompatible type for argument 4 of 'svcreate4_s32'} } */ -+ *ptr = svcreate4_s32 (s32, s32, pg, s32); /* { dg-error {incompatible type for argument 3 of 'svcreate4_s32'} } */ -+ *ptr = svcreate4_s32 (s32, x, s32, s32); /* { dg-error {incompatible type for argument 2 of 'svcreate4_s32'} } */ -+ *ptr = svcreate4_s32 (x, s32, s32, s32); /* { dg-error {incompatible type for argument 1 of 'svcreate4_s32'} } */ -+ *ptr = svcreate4_s32 (pg, s32, s32, s32); /* { dg-error {incompatible type for argument 1 of 'svcreate4_s32'} } */ -+ *ptr = svcreate4_s32 (pg, pg, pg, pg); /* { dg-error {incompatible type for argument 1 of 'svcreate4_s32'} } */ -+ /* { dg-error {incompatible type for argument 2 of 'svcreate4_s32'} "" { target *-*-* } .-1 } */ -+ /* { dg-error {incompatible type for argument 3 of 'svcreate4_s32'} "" { target *-*-* } .-2 } */ -+ /* { dg-error {incompatible type for argument 4 of 'svcreate4_s32'} "" { target *-*-* } .-3 } */ -+ *ptr = svcreate4_s32 (s32, s32, s32, s32); -+ *ptr = svcreate4_f64 (f64, f64, f64, f64); /* { dg-error {incompatible types when assigning to type 'svint32x4_t' from type 'svfloat64x4_t'} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ext_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ext_1.c -new file mode 100644 -index 000000000..bdce3926d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ext_1.c -@@ -0,0 +1,67 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+void -+f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16, -+ svfloat16_t f16, svint32_t s32, svuint32_t u32, svfloat32_t f32, -+ svint64_t s64, svuint64_t u64, svfloat64_t f64, int i) -+{ -+ svext (pg, pg, 0); /* { dg-error {'svext' has no form that takes 'svbool_t' arguments} } */ -+ svext (s8, s8, i); /* { dg-error {argument 3 of 'svext' must be an integer constant expression} } */ -+ -+ svext (s8, s8, -1); /* { dg-error {passing -1 to argument 3 of 'svext', which expects a value in the range \[0, 255\]} } */ -+ svext (s8, s8, 0); -+ svext (s8, s8, 255); -+ svext (s8, s8, 256); /* { dg-error {passing 256 to argument 3 of 'svext', which expects a value in the range \[0, 255\]} } */ -+ -+ svext (u8, u8, -1); /* { dg-error {passing -1 to argument 3 of 'svext', which expects a value in the range \[0, 255\]} } */ -+ svext (u8, u8, 0); -+ svext (u8, u8, 255); -+ svext (u8, u8, 256); /* { dg-error {passing 256 to argument 3 of 'svext', which expects a value in the range \[0, 255\]} } */ -+ -+ svext (s16, s16, -1); /* { dg-error {passing -1 to argument 3 of 'svext', which expects a value in the range \[0, 127\]} } */ -+ svext (s16, s16, 0); -+ svext (s16, s16, 127); -+ svext (s16, s16, 128); /* { dg-error {passing 128 to argument 3 of 'svext', which expects a value in the range \[0, 127\]} } */ -+ -+ svext (u16, u16, -1); /* { dg-error {passing -1 to argument 3 of 'svext', which expects a value in the range \[0, 127\]} } */ -+ svext (u16, u16, 0); -+ svext (u16, u16, 127); -+ svext (u16, u16, 128); /* { dg-error {passing 128 to argument 3 of 'svext', which expects a value in the range \[0, 127\]} } */ -+ -+ svext (f16, f16, -1); /* { dg-error {passing -1 to argument 3 of 'svext', which expects a value in the range \[0, 127\]} } */ -+ svext (f16, f16, 0); -+ svext (f16, f16, 127); -+ svext (f16, f16, 128); /* { dg-error {passing 128 to argument 3 of 'svext', which expects a value in the range \[0, 127\]} } */ -+ -+ svext (s32, s32, -1); /* { dg-error {passing -1 to argument 3 of 'svext', which expects a value in the range \[0, 63\]} } */ -+ svext (s32, s32, 0); -+ svext (s32, s32, 63); -+ svext (s32, s32, 64); /* { dg-error {passing 64 to argument 3 of 'svext', which expects a value in the range \[0, 63\]} } */ -+ -+ svext (u32, u32, -1); /* { dg-error {passing -1 to argument 3 of 'svext', which expects a value in the range \[0, 63\]} } */ -+ svext (u32, u32, 0); -+ svext (u32, u32, 63); -+ svext (u32, u32, 64); /* { dg-error {passing 64 to argument 3 of 'svext', which expects a value in the range \[0, 63\]} } */ -+ -+ svext (f32, f32, -1); /* { dg-error {passing -1 to argument 3 of 'svext', which expects a value in the range \[0, 63\]} } */ -+ svext (f32, f32, 0); -+ svext (f32, f32, 63); -+ svext (f32, f32, 64); /* { dg-error {passing 64 to argument 3 of 'svext', which expects a value in the range \[0, 63\]} } */ -+ -+ svext (s64, s64, -1); /* { dg-error {passing -1 to argument 3 of 'svext', which expects a value in the range \[0, 31\]} } */ -+ svext (s64, s64, 0); -+ svext (s64, s64, 31); -+ svext (s64, s64, 32); /* { dg-error {passing 32 to argument 3 of 'svext', which expects a value in the range \[0, 31\]} } */ -+ -+ svext (u64, u64, -1); /* { dg-error {passing -1 to argument 3 of 'svext', which expects a value in the range \[0, 31\]} } */ -+ svext (u64, u64, 0); -+ svext (u64, u64, 31); -+ svext (u64, u64, 32); /* { dg-error {passing 32 to argument 3 of 'svext', which expects a value in the range \[0, 31\]} } */ -+ -+ svext (f64, f64, -1); /* { dg-error {passing -1 to argument 3 of 'svext', which expects a value in the range \[0, 31\]} } */ -+ svext (f64, f64, 0); -+ svext (f64, f64, 31); -+ svext (f64, f64, 32); /* { dg-error {passing 32 to argument 3 of 'svext', which expects a value in the range \[0, 31\]} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/fold_left_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/fold_left_1.c -new file mode 100644 -index 000000000..1d292786d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/fold_left_1.c -@@ -0,0 +1,21 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+svuint8_t -+f1 (svbool_t pg, int i, float f, double d, void *ptr, svfloat32_t f32, -+ svint32_t i32) -+{ -+ svadda (pg, f); /* { dg-error {too few arguments to function 'svadda'} } */ -+ svadda (pg, f, f32, f32); /* { dg-error {too many arguments to function 'svadda'} } */ -+ svadda (f32, f, f32); /* { dg-error {passing 'svfloat32_t' to argument 1 of 'svadda', which expects 'svbool_t'} } */ -+ svadda (pg, i, f32); -+ svadda (pg, f, f32); -+ svadda (pg, d, f32); -+ svadda (pg, ptr, f32); /* { dg-error {incompatible type for argument 2 of 'svadda_f32'} } */ -+ svadda (pg, pg, f32); /* { dg-error {passing 'svbool_t' to argument 2 of 'svadda', which expects a scalar element} } */ -+ svadda (pg, f32, f32); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svadda', which expects a scalar element} } */ -+ svadda (pg, f, f); /* { dg-error {passing 'float' to argument 3 of 'svadda', which expects an SVE vector type} } */ -+ svadda (pg, i, i32); /* { dg-error {'svadda' has no form that takes 'svint32_t' arguments} } */ -+ svadda (pg, i, i); /* { dg-error {passing 'int' to argument 3 of 'svadda', which expects an SVE vector type} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_1.c -new file mode 100644 -index 000000000..e1b99fa36 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_1.c -@@ -0,0 +1,5 @@ -+/* { dg-do compile } */ -+ -+int svadd_n_u8_x; /* { dg-message "note: previous declaration of 'svadd_n_u8_x' was here" } */ -+ -+#pragma GCC aarch64 "arm_sve.h" /* { dg-error {'svadd_n_u8_x' redeclared} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_2.c -new file mode 100644 -index 000000000..7f653f117 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_2.c -@@ -0,0 +1,5 @@ -+/* { dg-do compile } */ -+ -+int svadd_n_u8_x = 1; /* { dg-message "note: previous definition of 'svadd_n_u8_x' was here" } */ -+ -+#pragma GCC aarch64 "arm_sve.h" /* { dg-error {'svadd_n_u8_x' redeclared} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_3.c -new file mode 100644 -index 000000000..d9ff15a6c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_3.c -@@ -0,0 +1,5 @@ -+/* { dg-do compile } */ -+ -+extern __SVInt8_t svadd_u8_x (__SVBool_t, __SVInt8_t, __SVInt8_t); /* { dg-message "note: previous declaration of 'svadd_u8_x' was here" } */ -+ -+#pragma GCC aarch64 "arm_sve.h" /* { dg-error {conflicting types for 'svadd_u8_x'} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_4.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_4.c -new file mode 100644 -index 000000000..9591e3d01 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_4.c -@@ -0,0 +1,9 @@ -+/* { dg-do compile } */ -+ -+/* Although somewhat suspect, this isn't actively wrong, and doesn't need -+ to be diagnosed. Any attempt to call the function before including -+ arm_sve.h will lead to a link failure. (Same for taking its address, -+ etc.) */ -+extern __SVUint8_t svadd_u8_x (__SVBool_t, __SVUint8_t, __SVUint8_t); -+ -+#pragma GCC aarch64 "arm_sve.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_5.c -new file mode 100644 -index 000000000..85923611d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_5.c -@@ -0,0 +1,21 @@ -+/* { dg-do compile } */ -+ -+/* There's no requirement to diagnose this. In particular, arm_sve.h -+ is allowed to use macros to implement the functions, and defining -+ a macro that matches an existing symbol would not be diagnosed. -+ -+ At the moment this works like other built-ins in the sense that the -+ explicit definition "wins". This isn't supported behavior though. */ -+__SVUint8_t -+svadd_u8_x (__SVBool_t pg, __SVUint8_t x, __SVUint8_t y) -+{ -+ return x; -+} -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+svuint8_t -+f (svbool_t pg, svuint8_t x, svuint8_t y) -+{ -+ return svadd_u8_x (pg, x, y); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_6.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_6.c -new file mode 100644 -index 000000000..1f04e4644 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/func_redef_6.c -@@ -0,0 +1,5 @@ -+/* { dg-do compile } */ -+ -+typedef int svadd_u8_x; /* { dg-message "note: previous declaration of 'svadd_u8_x' was here" } */ -+ -+#pragma GCC aarch64 "arm_sve.h" /* { dg-error {'svadd_u8_x' redeclared} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/get_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/get_1.c -new file mode 100644 -index 000000000..a3ac08fa8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/get_1.c -@@ -0,0 +1,31 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-std=c99 -Wall -Wextra" } */ -+ -+#include -+ -+svfloat64_t -+f1 (svbool_t pg, svuint8_t u8, svuint8x2_t u8x2, svuint8x3_t u8x3, int x) -+{ -+ const int one = 1; -+ svfloat64_t f64; -+ -+ u8 = svget2 (u8x2); /* { dg-error {too few arguments to function 'svget2'} } */ -+ u8 = svget2 (u8x2, 1, 2); /* { dg-error {too many arguments to function 'svget2'} } */ -+ u8 = svget2 (u8, 0); /* { dg-error {passing single vector 'svuint8_t' to argument 1 of 'svget2', which expects a tuple of 2 vectors} } */ -+ u8 = svget2 (u8x3, 0); /* { dg-error {passing 'svuint8x3_t' to argument 1 of 'svget2', which expects a tuple of 2 vectors} } */ -+ u8 = svget2 (pg, 0); /* { dg-error {passing 'svbool_t' to argument 1 of 'svget2', which expects a tuple of 2 vectors} } */ -+ u8 = svget2 (u8x2, x); /* { dg-error {argument 2 of 'svget2' must be an integer constant expression} } */ -+ u8 = svget2 (u8x2, 0); -+ f64 = svget2 (u8x2, 0); /* { dg-error {incompatible types when assigning to type 'svfloat64_t' from type 'svuint8_t'} } */ -+ u8 = svget2 (u8x2, 1); -+ u8 = svget2 (u8x2, 2); /* { dg-error {passing 2 to argument 2 of 'svget2', which expects a value in the range \[0, 1\]} } */ -+ u8 = svget2 (u8x2, 3); /* { dg-error {passing 3 to argument 2 of 'svget2', which expects a value in the range \[0, 1\]} } */ -+ u8 = svget2 (u8x2, 4); /* { dg-error {passing 4 to argument 2 of 'svget2', which expects a value in the range \[0, 1\]} } */ -+ u8 = svget2 (u8x2, 5); /* { dg-error {passing 5 to argument 2 of 'svget2', which expects a value in the range \[0, 1\]} } */ -+ u8 = svget2 (u8x2, ~0U); /* { dg-error {passing [^ ]* to argument 2 of 'svget2', which expects a value in the range \[0, 1\]} } */ -+ u8 = svget2 (u8x2, one); /* { dg-error {argument 2 of 'svget2' must be an integer constant expression} } */ -+ u8 = svget2 (u8x2, 3 - 2); -+ u8 = svget2 (u8x2, 1.0); -+ -+ return f64; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/get_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/get_2.c -new file mode 100644 -index 000000000..4eee2439e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/get_2.c -@@ -0,0 +1,33 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-std=c99 -Wall -Wextra" } */ -+ -+#include -+ -+svfloat64_t -+f1 (svbool_t pg, svuint8_t u8, svuint8x2_t u8x2, svint8x2_t s8x2, -+ svuint8x3_t u8x3, int x) -+{ -+ const int one = 1; -+ svfloat64_t f64; -+ -+ u8 = svget2_u8 (u8x2); /* { dg-error {too few arguments to function 'svget2_u8'} } */ -+ u8 = svget2_u8 (u8x2, 1, 2); /* { dg-error {too many arguments to function 'svget2_u8'} } */ -+ u8 = svget2_u8 (u8, 0); /* { dg-error {incompatible type for argument 1 of 'svget2_u8'} } */ -+ u8 = svget2_u8 (s8x2, 0); /* { dg-error {incompatible type for argument 1 of 'svget2_u8'} } */ -+ u8 = svget2_u8 (u8x3, 0); /* { dg-error {incompatible type for argument 1 of 'svget2_u8'} } */ -+ u8 = svget2_u8 (pg, 0); /* { dg-error {incompatible type for argument 1 of 'svget2_u8'} } */ -+ u8 = svget2_u8 (u8x2, x); /* { dg-error {argument 2 of 'svget2_u8' must be an integer constant expression} } */ -+ u8 = svget2_u8 (u8x2, 0); -+ f64 = svget2_u8 (u8x2, 0); /* { dg-error {incompatible types when assigning to type 'svfloat64_t' from type 'svuint8_t'} } */ -+ u8 = svget2_u8 (u8x2, 1); -+ u8 = svget2_u8 (u8x2, 2); /* { dg-error {passing 2 to argument 2 of 'svget2_u8', which expects a value in the range \[0, 1\]} } */ -+ u8 = svget2_u8 (u8x2, 3); /* { dg-error {passing 3 to argument 2 of 'svget2_u8', which expects a value in the range \[0, 1\]} } */ -+ u8 = svget2_u8 (u8x2, 4); /* { dg-error {passing 4 to argument 2 of 'svget2_u8', which expects a value in the range \[0, 1\]} } */ -+ u8 = svget2_u8 (u8x2, 5); /* { dg-error {passing 5 to argument 2 of 'svget2_u8', which expects a value in the range \[0, 1\]} } */ -+ u8 = svget2_u8 (u8x2, ~0U); /* { dg-error {passing [^ ]* to argument 2 of 'svget2_u8', which expects a value in the range \[0, 1\]} } */ -+ u8 = svget2_u8 (u8x2, one); /* { dg-error {argument 2 of 'svget2_u8' must be an integer constant expression} } */ -+ u8 = svget2_u8 (u8x2, 3 - 2); -+ u8 = svget2_u8 (u8x2, 1.0); -+ -+ return f64; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/get_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/get_3.c -new file mode 100644 -index 000000000..0e7b2e227 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/get_3.c -@@ -0,0 +1,32 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-std=c99 -Wall -Wextra" } */ -+ -+#include -+ -+svfloat64_t -+f1 (svbool_t pg, svfloat16_t f16, svfloat16x3_t f16x3, svfloat16x4_t f16x4, -+ int x) -+{ -+ const int one = 1; -+ svfloat64_t f64; -+ -+ f16 = svget3 (f16x3); /* { dg-error {too few arguments to function 'svget3'} } */ -+ f16 = svget3 (f16x3, 1, 2); /* { dg-error {too many arguments to function 'svget3'} } */ -+ f16 = svget3 (f16, 0); /* { dg-error {passing single vector 'svfloat16_t' to argument 1 of 'svget3', which expects a tuple of 3 vectors} } */ -+ f16 = svget3 (f16x4, 0); /* { dg-error {passing 'svfloat16x4_t' to argument 1 of 'svget3', which expects a tuple of 3 vectors} } */ -+ f16 = svget3 (pg, 0); /* { dg-error {passing 'svbool_t' to argument 1 of 'svget3', which expects a tuple of 3 vectors} } */ -+ f16 = svget3 (f16x3, x); /* { dg-error {argument 2 of 'svget3' must be an integer constant expression} } */ -+ f16 = svget3 (f16x3, 0); -+ f64 = svget3 (f16x3, 0); /* { dg-error {incompatible types when assigning to type 'svfloat64_t' from type 'svfloat16_t'} } */ -+ f16 = svget3 (f16x3, 1); -+ f16 = svget3 (f16x3, 2); -+ f16 = svget3 (f16x3, 3); /* { dg-error {passing 3 to argument 2 of 'svget3', which expects a value in the range \[0, 2\]} } */ -+ f16 = svget3 (f16x3, 4); /* { dg-error {passing 4 to argument 2 of 'svget3', which expects a value in the range \[0, 2\]} } */ -+ f16 = svget3 (f16x3, 5); /* { dg-error {passing 5 to argument 2 of 'svget3', which expects a value in the range \[0, 2\]} } */ -+ f16 = svget3 (f16x3, ~0U); /* { dg-error {passing [^ ]* to argument 2 of 'svget3', which expects a value in the range \[0, 2\]} } */ -+ f16 = svget3 (f16x3, one); /* { dg-error {argument 2 of 'svget3' must be an integer constant expression} } */ -+ f16 = svget3 (f16x3, 3 - 2); -+ f16 = svget3 (f16x3, 1.0); -+ -+ return f64; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/get_4.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/get_4.c -new file mode 100644 -index 000000000..72b4f82a6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/get_4.c -@@ -0,0 +1,33 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-std=c99 -Wall -Wextra" } */ -+ -+#include -+ -+svfloat64_t -+f1 (svbool_t pg, svfloat16_t f16, svfloat16x3_t f16x3, svfloat32x3_t f32x3, -+ svfloat16x4_t f16x4, int x) -+{ -+ const int one = 1; -+ svfloat64_t f64; -+ -+ f16 = svget3_f16 (f16x3); /* { dg-error {too few arguments to function 'svget3_f16'} } */ -+ f16 = svget3_f16 (f16x3, 1, 2); /* { dg-error {too many arguments to function 'svget3_f16'} } */ -+ f16 = svget3_f16 (f16, 0); /* { dg-error {incompatible type for argument 1 of 'svget3_f16'} } */ -+ f16 = svget3_f16 (f32x3, 0); /* { dg-error {incompatible type for argument 1 of 'svget3_f16'} } */ -+ f16 = svget3_f16 (f16x4, 0); /* { dg-error {incompatible type for argument 1 of 'svget3_f16'} } */ -+ f16 = svget3_f16 (pg, 0); /* { dg-error {incompatible type for argument 1 of 'svget3_f16'} } */ -+ f16 = svget3_f16 (f16x3, x); /* { dg-error {argument 2 of 'svget3_f16' must be an integer constant expression} } */ -+ f16 = svget3_f16 (f16x3, 0); -+ f64 = svget3_f16 (f16x3, 0); /* { dg-error {incompatible types when assigning to type 'svfloat64_t' from type 'svfloat16_t'} } */ -+ f16 = svget3_f16 (f16x3, 1); -+ f16 = svget3_f16 (f16x3, 2); -+ f16 = svget3_f16 (f16x3, 3); /* { dg-error {passing 3 to argument 2 of 'svget3_f16', which expects a value in the range \[0, 2\]} } */ -+ f16 = svget3_f16 (f16x3, 4); /* { dg-error {passing 4 to argument 2 of 'svget3_f16', which expects a value in the range \[0, 2\]} } */ -+ f16 = svget3_f16 (f16x3, 5); /* { dg-error {passing 5 to argument 2 of 'svget3_f16', which expects a value in the range \[0, 2\]} } */ -+ f16 = svget3_f16 (f16x3, ~0U); /* { dg-error {passing [^ ]* to argument 2 of 'svget3_f16', which expects a value in the range \[0, 2\]} } */ -+ f16 = svget3_f16 (f16x3, one); /* { dg-error {argument 2 of 'svget3_f16' must be an integer constant expression} } */ -+ f16 = svget3_f16 (f16x3, 3 - 2); -+ f16 = svget3_f16 (f16x3, 1.0); -+ -+ return f64; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/get_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/get_5.c -new file mode 100644 -index 000000000..b0b69b95e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/get_5.c -@@ -0,0 +1,31 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-std=c99 -Wall -Wextra" } */ -+ -+#include -+ -+svfloat64_t -+f1 (svbool_t pg, svint32_t s32, svint32x4_t s32x4, svint32x2_t s32x2, int x) -+{ -+ const int one = 1; -+ svfloat64_t f64; -+ -+ s32 = svget4 (s32x4); /* { dg-error {too few arguments to function 'svget4'} } */ -+ s32 = svget4 (s32x4, 1, 2); /* { dg-error {too many arguments to function 'svget4'} } */ -+ s32 = svget4 (s32, 0); /* { dg-error {passing single vector 'svint32_t' to argument 1 of 'svget4', which expects a tuple of 4 vectors} } */ -+ s32 = svget4 (s32x2, 0); /* { dg-error {passing 'svint32x2_t' to argument 1 of 'svget4', which expects a tuple of 4 vectors} } */ -+ s32 = svget4 (pg, 0); /* { dg-error {passing 'svbool_t' to argument 1 of 'svget4', which expects a tuple of 4 vectors} } */ -+ s32 = svget4 (s32x4, x); /* { dg-error {argument 2 of 'svget4' must be an integer constant expression} } */ -+ s32 = svget4 (s32x4, 0); -+ f64 = svget4 (s32x4, 0); /* { dg-error {incompatible types when assigning to type 'svfloat64_t' from type 'svint32_t'} } */ -+ s32 = svget4 (s32x4, 1); -+ s32 = svget4 (s32x4, 2); -+ s32 = svget4 (s32x4, 3); -+ s32 = svget4 (s32x4, 4); /* { dg-error {passing 4 to argument 2 of 'svget4', which expects a value in the range \[0, 3\]} } */ -+ s32 = svget4 (s32x4, 5); /* { dg-error {passing 5 to argument 2 of 'svget4', which expects a value in the range \[0, 3\]} } */ -+ s32 = svget4 (s32x4, ~0U); /* { dg-error {passing [^ ]* to argument 2 of 'svget4', which expects a value in the range \[0, 3\]} } */ -+ s32 = svget4 (s32x4, one); /* { dg-error {argument 2 of 'svget4' must be an integer constant expression} } */ -+ s32 = svget4 (s32x4, 3 - 2); -+ s32 = svget4 (s32x4, 1.0); -+ -+ return f64; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/get_6.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/get_6.c -new file mode 100644 -index 000000000..3801c0c4e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/get_6.c -@@ -0,0 +1,33 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-std=c99 -Wall -Wextra" } */ -+ -+#include -+ -+svfloat64_t -+f1 (svbool_t pg, svint32_t s32, svint32x4_t s32x4, svfloat32x4_t f32x4, -+ svint32x2_t s32x2, int x) -+{ -+ const int one = 1; -+ svfloat64_t f64; -+ -+ s32 = svget4_s32 (s32x4); /* { dg-error {too few arguments to function 'svget4_s32'} } */ -+ s32 = svget4_s32 (s32x4, 1, 2); /* { dg-error {too many arguments to function 'svget4_s32'} } */ -+ s32 = svget4_s32 (s32, 0); /* { dg-error {incompatible type for argument 1 of 'svget4_s32'} } */ -+ s32 = svget4_s32 (f32x4, 0); /* { dg-error {incompatible type for argument 1 of 'svget4_s32'} } */ -+ s32 = svget4_s32 (s32x2, 0); /* { dg-error {incompatible type for argument 1 of 'svget4_s32'} } */ -+ s32 = svget4_s32 (pg, 0); /* { dg-error {incompatible type for argument 1 of 'svget4_s32'} } */ -+ s32 = svget4_s32 (s32x4, x); /* { dg-error {argument 2 of 'svget4_s32' must be an integer constant expression} } */ -+ s32 = svget4_s32 (s32x4, 0); -+ f64 = svget4_s32 (s32x4, 0); /* { dg-error {incompatible types when assigning to type 'svfloat64_t' from type 'svint32_t'} } */ -+ s32 = svget4_s32 (s32x4, 1); -+ s32 = svget4_s32 (s32x4, 2); -+ s32 = svget4_s32 (s32x4, 3); -+ s32 = svget4_s32 (s32x4, 4); /* { dg-error {passing 4 to argument 2 of 'svget4_s32', which expects a value in the range \[0, 3\]} } */ -+ s32 = svget4_s32 (s32x4, 5); /* { dg-error {passing 5 to argument 2 of 'svget4_s32', which expects a value in the range \[0, 3\]} } */ -+ s32 = svget4_s32 (s32x4, ~0U); /* { dg-error {passing [^ ]* to argument 2 of 'svget4_s32', which expects a value in the range \[0, 3\]} } */ -+ s32 = svget4_s32 (s32x4, one); /* { dg-error {argument 2 of 'svget4_s32' must be an integer constant expression} } */ -+ s32 = svget4_s32 (s32x4, 3 - 2); -+ s32 = svget4_s32 (s32x4, 1.0); -+ -+ return f64; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_1.c -new file mode 100644 -index 000000000..dcd291da6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_1.c -@@ -0,0 +1,37 @@ -+#include -+ -+void -+test (svbool_t pg, svint8_t s8, svuint8_t u8, -+ svint16_t s16, svuint16_t u16, svint32_t s32, svuint32_t u32, -+ svint64_t s64, svuint64_t u64, int16_t sh, uint16_t uh, -+ int32_t sw, uint32_t uw, int64_t sd, uint64_t ud, -+ float f, int i) -+{ -+ svqincb (sw); /* { dg-error {too few arguments to function 'svqincb'} } */ -+ svqincb (sw, 1, 1); /* { dg-error {too many arguments to function 'svqincb'} } */ -+ -+ svqincb (pg, 1); /* { dg-error {'svqincb' has no form that takes 'svbool_t' arguments} } */ -+ svqincb (s8, 1); /* { dg-error {'svqincb' has no form that takes 'svint8_t' arguments} } */ -+ svqincb (u8, 1); /* { dg-error {'svqincb' has no form that takes 'svuint8_t' arguments} } */ -+ svqincb (s16, 1); /* { dg-error {'svqincb' has no form that takes 'svint16_t' arguments} } */ -+ svqincb (u16, 1); /* { dg-error {'svqincb' has no form that takes 'svuint16_t' arguments} } */ -+ svqincb (s32, 1); /* { dg-error {'svqincb' has no form that takes 'svint32_t' arguments} } */ -+ svqincb (u32, 1); /* { dg-error {'svqincb' has no form that takes 'svuint32_t' arguments} } */ -+ svqincb (s64, 1); /* { dg-error {'svqincb' has no form that takes 'svint64_t' arguments} } */ -+ svqincb (u64, 1); /* { dg-error {'svqincb' has no form that takes 'svuint64_t' arguments} } */ -+ svqincb (sh, 1); -+ svqincb (sw, 1); -+ svqincb (sd, 1); -+ svqincb (uh, 1); -+ svqincb (uw, 1); -+ svqincb (ud, 1); -+ svqincb (f, 1); /* { dg-error {passing 'float' to argument 1 of 'svqincb', which expects a 32-bit or 64-bit integer type} } */ -+ svqincb (ud, i); /* { dg-error {argument 2 of 'svqincb' must be an integer constant expression} } */ -+ -+ svqincb (sw, -1); /* { dg-error {passing -1 to argument 2 of 'svqincb', which expects a value in the range \[1, 16\]} } */ -+ svqincb (sw, 0); /* { dg-error {passing 0 to argument 2 of 'svqincb', which expects a value in the range \[1, 16\]} } */ -+ svqincb (sw, 1); -+ svqincb (sw, 2); -+ svqincb (sw, 16); -+ svqincb (sw, 17); /* { dg-error {passing 17 to argument 2 of 'svqincb', which expects a value in the range \[1, 16\]} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_2.c -new file mode 100644 -index 000000000..e5acad187 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_2.c -@@ -0,0 +1,13 @@ -+#include -+ -+void -+test (int32_t sw, int i) -+{ -+ svqincb_n_s32 (sw, -1); /* { dg-error {passing -1 to argument 2 of 'svqincb_n_s32', which expects a value in the range \[1, 16\]} } */ -+ svqincb_n_s32 (sw, 0); /* { dg-error {passing 0 to argument 2 of 'svqincb_n_s32', which expects a value in the range \[1, 16\]} } */ -+ svqincb_n_s32 (sw, 1); -+ svqincb_n_s32 (sw, 2); -+ svqincb_n_s32 (sw, 16); -+ svqincb_n_s32 (sw, 17); /* { dg-error {passing 17 to argument 2 of 'svqincb_n_s32', which expects a value in the range \[1, 16\]} } */ -+ svqincb_n_s32 (sw, i); /* { dg-error {argument 2 of 'svqincb_n_s32' must be an integer constant expression} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_3.c -new file mode 100644 -index 000000000..351e7757f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_3.c -@@ -0,0 +1,26 @@ -+#include -+ -+void -+test (svbool_t pg, svint8_t s8, svuint8_t u8, -+ svint16_t s16, svuint16_t u16, svint32_t s32, svuint32_t u32, -+ svint64_t s64, svuint64_t u64, int16_t sh, uint16_t uh, -+ int32_t sw, uint32_t uw, int64_t sd, uint64_t ud, -+ float f) -+{ -+ svqinch (pg, 1); /* { dg-error {'svqinch' has no form that takes 'svbool_t' arguments} } */ -+ svqinch (s8, 1); /* { dg-error {'svqinch' has no form that takes 'svint8_t' arguments} } */ -+ svqinch (u8, 1); /* { dg-error {'svqinch' has no form that takes 'svuint8_t' arguments} } */ -+ svqinch (s16, 1); -+ svqinch (u16, 1); -+ svqinch (s32, 1); /* { dg-error {'svqinch' has no form that takes 'svint32_t' arguments} } */ -+ svqinch (u32, 1); /* { dg-error {'svqinch' has no form that takes 'svuint32_t' arguments} } */ -+ svqinch (s64, 1); /* { dg-error {'svqinch' has no form that takes 'svint64_t' arguments} } */ -+ svqinch (u64, 1); /* { dg-error {'svqinch' has no form that takes 'svuint64_t' arguments} } */ -+ svqinch (sh, 1); -+ svqinch (sw, 1); -+ svqinch (sd, 1); -+ svqinch (uh, 1); -+ svqinch (uw, 1); -+ svqinch (ud, 1); -+ svqinch (f, 1); /* { dg-error {passing 'float' to argument 1 of 'svqinch', which expects a 32-bit or 64-bit integer type} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_4.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_4.c -new file mode 100644 -index 000000000..e071c0229 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_4.c -@@ -0,0 +1,26 @@ -+#include -+ -+void -+test (svbool_t pg, svint8_t s8, svuint8_t u8, -+ svint16_t s16, svuint16_t u16, svint32_t s32, svuint32_t u32, -+ svint64_t s64, svuint64_t u64, int16_t sh, uint16_t uh, -+ int32_t sw, uint32_t uw, int64_t sd, uint64_t ud, -+ float f) -+{ -+ svqincw (pg, 1); /* { dg-error {'svqincw' has no form that takes 'svbool_t' arguments} } */ -+ svqincw (s8, 1); /* { dg-error {'svqincw' has no form that takes 'svint8_t' arguments} } */ -+ svqincw (u8, 1); /* { dg-error {'svqincw' has no form that takes 'svuint8_t' arguments} } */ -+ svqincw (s16, 1); /* { dg-error {'svqincw' has no form that takes 'svint16_t' arguments} } */ -+ svqincw (u16, 1); /* { dg-error {'svqincw' has no form that takes 'svuint16_t' arguments} } */ -+ svqincw (s32, 1); -+ svqincw (u32, 1); -+ svqincw (s64, 1); /* { dg-error {'svqincw' has no form that takes 'svint64_t' arguments} } */ -+ svqincw (u64, 1); /* { dg-error {'svqincw' has no form that takes 'svuint64_t' arguments} } */ -+ svqincw (sh, 1); -+ svqincw (sw, 1); -+ svqincw (sd, 1); -+ svqincw (uh, 1); -+ svqincw (uw, 1); -+ svqincw (ud, 1); -+ svqincw (f, 1); /* { dg-error {passing 'float' to argument 1 of 'svqincw', which expects a 32-bit or 64-bit integer type} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_5.c -new file mode 100644 -index 000000000..be9c76928 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_5.c -@@ -0,0 +1,26 @@ -+#include -+ -+void -+test (svbool_t pg, svint8_t s8, svuint8_t u8, -+ svint16_t s16, svuint16_t u16, svint32_t s32, svuint32_t u32, -+ svint64_t s64, svuint64_t u64, int16_t sh, uint16_t uh, -+ int32_t sw, uint32_t uw, int64_t sd, uint64_t ud, -+ float f) -+{ -+ svqincd (pg, 1); /* { dg-error {'svqincd' has no form that takes 'svbool_t' arguments} } */ -+ svqincd (s8, 1); /* { dg-error {'svqincd' has no form that takes 'svint8_t' arguments} } */ -+ svqincd (u8, 1); /* { dg-error {'svqincd' has no form that takes 'svuint8_t' arguments} } */ -+ svqincd (s16, 1); /* { dg-error {'svqincd' has no form that takes 'svint16_t' arguments} } */ -+ svqincd (u16, 1); /* { dg-error {'svqincd' has no form that takes 'svuint16_t' arguments} } */ -+ svqincd (s32, 1); /* { dg-error {'svqincd' has no form that takes 'svint32_t' arguments} } */ -+ svqincd (u32, 1); /* { dg-error {'svqincd' has no form that takes 'svuint32_t' arguments} } */ -+ svqincd (s64, 1); -+ svqincd (u64, 1); -+ svqincd (sh, 1); -+ svqincd (sw, 1); -+ svqincd (sd, 1); -+ svqincd (uh, 1); -+ svqincd (uw, 1); -+ svqincd (ud, 1); -+ svqincd (f, 1); /* { dg-error {passing 'float' to argument 1 of 'svqincd', which expects a 32-bit or 64-bit integer type} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pat_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pat_1.c -new file mode 100644 -index 000000000..f2e5841d4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pat_1.c -@@ -0,0 +1,47 @@ -+#include -+ -+void -+test (enum svpattern pat, svbool_t pg, svint8_t s8, svuint8_t u8, -+ svint16_t s16, svuint16_t u16, svint32_t s32, svuint32_t u32, -+ svint64_t s64, svuint64_t u64, int16_t sh, uint16_t uh, -+ int32_t sw, uint32_t uw, int64_t sd, uint64_t ud, -+ float f, int i) -+{ -+ svqincb_pat (sw, pat); /* { dg-error {too few arguments to function 'svqincb_pat'} } */ -+ svqincb_pat (sw, pat, 1, 1); /* { dg-error {too many arguments to function 'svqincb_pat'} } */ -+ -+ svqincb_pat (pg, SV_ALL, 1); /* { dg-error {'svqincb_pat' has no form that takes 'svbool_t' arguments} } */ -+ svqincb_pat (s8, SV_ALL, 1); /* { dg-error {'svqincb_pat' has no form that takes 'svint8_t' arguments} } */ -+ svqincb_pat (u8, SV_ALL, 1); /* { dg-error {'svqincb_pat' has no form that takes 'svuint8_t' arguments} } */ -+ svqincb_pat (s16, SV_ALL, 1); /* { dg-error {'svqincb_pat' has no form that takes 'svint16_t' arguments} } */ -+ svqincb_pat (u16, SV_ALL, 1); /* { dg-error {'svqincb_pat' has no form that takes 'svuint16_t' arguments} } */ -+ svqincb_pat (s32, SV_ALL, 1); /* { dg-error {'svqincb_pat' has no form that takes 'svint32_t' arguments} } */ -+ svqincb_pat (u32, SV_ALL, 1); /* { dg-error {'svqincb_pat' has no form that takes 'svuint32_t' arguments} } */ -+ svqincb_pat (s64, SV_ALL, 1); /* { dg-error {'svqincb_pat' has no form that takes 'svint64_t' arguments} } */ -+ svqincb_pat (u64, SV_ALL, 1); /* { dg-error {'svqincb_pat' has no form that takes 'svuint64_t' arguments} } */ -+ svqincb_pat (sh, SV_ALL, 1); -+ svqincb_pat (sw, SV_ALL, 1); -+ svqincb_pat (sd, SV_ALL, 1); -+ svqincb_pat (uh, SV_ALL, 1); -+ svqincb_pat (uw, SV_ALL, 1); -+ svqincb_pat (ud, SV_ALL, 1); -+ svqincb_pat (f, SV_ALL, 1); /* { dg-error {passing 'float' to argument 1 of 'svqincb_pat', which expects a 32-bit or 64-bit integer type} } */ -+ -+ svqincb_pat (sw, pat, 1); /* { dg-error {argument 2 of 'svqincb_pat' must be an integer constant expression} } */ -+ svqincb_pat (sw, i, 1); /* { dg-error {argument 2 of 'svqincb_pat' must be an integer constant expression} } */ -+ svqincb_pat (sw, (enum svpattern) -1, 1); /* { dg-error {passing 4294967295 to argument 2 of 'svqincb_pat', which expects a valid 'enum svpattern' value} } */ -+ svqincb_pat (sw, (enum svpattern) 0, 1); -+ svqincb_pat (sw, (enum svpattern) 13, 1); -+ svqincb_pat (sw, (enum svpattern) 14, 1); /* { dg-error {passing 14 to argument 2 of 'svqincb_pat', which expects a valid 'enum svpattern' value} } */ -+ svqincb_pat (sw, (enum svpattern) 28, 1); /* { dg-error {passing 28 to argument 2 of 'svqincb_pat', which expects a valid 'enum svpattern' value} } */ -+ svqincb_pat (sw, (enum svpattern) 29, 1); -+ svqincb_pat (sw, (enum svpattern) 31, 1); -+ svqincb_pat (sw, (enum svpattern) 32, 1); /* { dg-error {passing 32 to argument 2 of 'svqincb_pat', which expects a valid 'enum svpattern' value} } */ -+ -+ svqincb_pat (sw, SV_POW2, -1); /* { dg-error {passing -1 to argument 3 of 'svqincb_pat', which expects a value in the range \[1, 16\]} } */ -+ svqincb_pat (sw, SV_POW2, 0); /* { dg-error {passing 0 to argument 3 of 'svqincb_pat', which expects a value in the range \[1, 16\]} } */ -+ svqincb_pat (sw, SV_POW2, 1); -+ svqincb_pat (sw, SV_POW2, 2); -+ svqincb_pat (sw, SV_POW2, 16); -+ svqincb_pat (sw, SV_POW2, 17); /* { dg-error {passing 17 to argument 3 of 'svqincb_pat', which expects a value in the range \[1, 16\]} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pat_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pat_2.c -new file mode 100644 -index 000000000..c1c1ab9d9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pat_2.c -@@ -0,0 +1,23 @@ -+#include -+ -+void -+test (int32_t sw, enum svpattern pat, int i) -+{ -+ svqincb_pat_n_s32 (sw, pat, 1); /* { dg-error {argument 2 of 'svqincb_pat_n_s32' must be an integer constant expression} } */ -+ svqincb_pat_n_s32 (sw, i, 1); /* { dg-error {argument 2 of 'svqincb_pat_n_s32' must be an integer constant expression} } */ -+ svqincb_pat_n_s32 (sw, (enum svpattern) -1, 1); /* { dg-error {passing 4294967295 to argument 2 of 'svqincb_pat_n_s32', which expects a valid 'enum svpattern' value} } */ -+ svqincb_pat_n_s32 (sw, (enum svpattern) 0, 1); -+ svqincb_pat_n_s32 (sw, (enum svpattern) 13, 1); -+ svqincb_pat_n_s32 (sw, (enum svpattern) 14, 1); /* { dg-error {passing 14 to argument 2 of 'svqincb_pat_n_s32', which expects a valid 'enum svpattern' value} } */ -+ svqincb_pat_n_s32 (sw, (enum svpattern) 28, 1); /* { dg-error {passing 28 to argument 2 of 'svqincb_pat_n_s32', which expects a valid 'enum svpattern' value} } */ -+ svqincb_pat_n_s32 (sw, (enum svpattern) 29, 1); -+ svqincb_pat_n_s32 (sw, (enum svpattern) 31, 1); -+ svqincb_pat_n_s32 (sw, (enum svpattern) 32, 1); /* { dg-error {passing 32 to argument 2 of 'svqincb_pat_n_s32', which expects a valid 'enum svpattern' value} } */ -+ -+ svqincb_pat_n_s32 (sw, SV_POW2, -1); /* { dg-error {passing -1 to argument 3 of 'svqincb_pat_n_s32', which expects a value in the range \[1, 16\]} } */ -+ svqincb_pat_n_s32 (sw, SV_POW2, 0); /* { dg-error {passing 0 to argument 3 of 'svqincb_pat_n_s32', which expects a value in the range \[1, 16\]} } */ -+ svqincb_pat_n_s32 (sw, SV_POW2, 1); -+ svqincb_pat_n_s32 (sw, SV_POW2, 2); -+ svqincb_pat_n_s32 (sw, SV_POW2, 16); -+ svqincb_pat_n_s32 (sw, SV_POW2, 17); /* { dg-error {passing 17 to argument 3 of 'svqincb_pat_n_s32', which expects a value in the range \[1, 16\]} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pat_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pat_3.c -new file mode 100644 -index 000000000..4126b2461 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pat_3.c -@@ -0,0 +1,26 @@ -+#include -+ -+void -+test (svbool_t pg, svint8_t s8, svuint8_t u8, -+ svint16_t s16, svuint16_t u16, svint32_t s32, svuint32_t u32, -+ svint64_t s64, svuint64_t u64, int16_t sh, uint16_t uh, -+ int32_t sw, uint32_t uw, int64_t sd, uint64_t ud, -+ float f) -+{ -+ svqinch_pat (pg, SV_ALL, 1); /* { dg-error {'svqinch_pat' has no form that takes 'svbool_t' arguments} } */ -+ svqinch_pat (s8, SV_ALL, 1); /* { dg-error {'svqinch_pat' has no form that takes 'svint8_t' arguments} } */ -+ svqinch_pat (u8, SV_ALL, 1); /* { dg-error {'svqinch_pat' has no form that takes 'svuint8_t' arguments} } */ -+ svqinch_pat (s16, SV_ALL, 1); -+ svqinch_pat (u16, SV_ALL, 1); -+ svqinch_pat (s32, SV_ALL, 1); /* { dg-error {'svqinch_pat' has no form that takes 'svint32_t' arguments} } */ -+ svqinch_pat (u32, SV_ALL, 1); /* { dg-error {'svqinch_pat' has no form that takes 'svuint32_t' arguments} } */ -+ svqinch_pat (s64, SV_ALL, 1); /* { dg-error {'svqinch_pat' has no form that takes 'svint64_t' arguments} } */ -+ svqinch_pat (u64, SV_ALL, 1); /* { dg-error {'svqinch_pat' has no form that takes 'svuint64_t' arguments} } */ -+ svqinch_pat (sh, SV_ALL, 1); -+ svqinch_pat (sw, SV_ALL, 1); -+ svqinch_pat (sd, SV_ALL, 1); -+ svqinch_pat (uh, SV_ALL, 1); -+ svqinch_pat (uw, SV_ALL, 1); -+ svqinch_pat (ud, SV_ALL, 1); -+ svqinch_pat (f, SV_ALL, 1); /* { dg-error {passing 'float' to argument 1 of 'svqinch_pat', which expects a 32-bit or 64-bit integer type} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pat_4.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pat_4.c -new file mode 100644 -index 000000000..9aabbd714 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pat_4.c -@@ -0,0 +1,26 @@ -+#include -+ -+void -+test (svbool_t pg, svint8_t s8, svuint8_t u8, -+ svint16_t s16, svuint16_t u16, svint32_t s32, svuint32_t u32, -+ svint64_t s64, svuint64_t u64, int16_t sh, uint16_t uh, -+ int32_t sw, uint32_t uw, int64_t sd, uint64_t ud, -+ float f) -+{ -+ svqincw_pat (pg, SV_ALL, 1); /* { dg-error {'svqincw_pat' has no form that takes 'svbool_t' arguments} } */ -+ svqincw_pat (s8, SV_ALL, 1); /* { dg-error {'svqincw_pat' has no form that takes 'svint8_t' arguments} } */ -+ svqincw_pat (u8, SV_ALL, 1); /* { dg-error {'svqincw_pat' has no form that takes 'svuint8_t' arguments} } */ -+ svqincw_pat (s16, SV_ALL, 1); /* { dg-error {'svqincw_pat' has no form that takes 'svint16_t' arguments} } */ -+ svqincw_pat (u16, SV_ALL, 1); /* { dg-error {'svqincw_pat' has no form that takes 'svuint16_t' arguments} } */ -+ svqincw_pat (s32, SV_ALL, 1); -+ svqincw_pat (u32, SV_ALL, 1); -+ svqincw_pat (s64, SV_ALL, 1); /* { dg-error {'svqincw_pat' has no form that takes 'svint64_t' arguments} } */ -+ svqincw_pat (u64, SV_ALL, 1); /* { dg-error {'svqincw_pat' has no form that takes 'svuint64_t' arguments} } */ -+ svqincw_pat (sh, SV_ALL, 1); -+ svqincw_pat (sw, SV_ALL, 1); -+ svqincw_pat (sd, SV_ALL, 1); -+ svqincw_pat (uh, SV_ALL, 1); -+ svqincw_pat (uw, SV_ALL, 1); -+ svqincw_pat (ud, SV_ALL, 1); -+ svqincw_pat (f, SV_ALL, 1); /* { dg-error {passing 'float' to argument 1 of 'svqincw_pat', which expects a 32-bit or 64-bit integer type} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pat_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pat_5.c -new file mode 100644 -index 000000000..5df88c649 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pat_5.c -@@ -0,0 +1,26 @@ -+#include -+ -+void -+test (svbool_t pg, svint8_t s8, svuint8_t u8, -+ svint16_t s16, svuint16_t u16, svint32_t s32, svuint32_t u32, -+ svint64_t s64, svuint64_t u64, int16_t sh, uint16_t uh, -+ int32_t sw, uint32_t uw, int64_t sd, uint64_t ud, -+ float f) -+{ -+ svqincd_pat (pg, SV_ALL, 1); /* { dg-error {'svqincd_pat' has no form that takes 'svbool_t' arguments} } */ -+ svqincd_pat (s8, SV_ALL, 1); /* { dg-error {'svqincd_pat' has no form that takes 'svint8_t' arguments} } */ -+ svqincd_pat (u8, SV_ALL, 1); /* { dg-error {'svqincd_pat' has no form that takes 'svuint8_t' arguments} } */ -+ svqincd_pat (s16, SV_ALL, 1); /* { dg-error {'svqincd_pat' has no form that takes 'svint16_t' arguments} } */ -+ svqincd_pat (u16, SV_ALL, 1); /* { dg-error {'svqincd_pat' has no form that takes 'svuint16_t' arguments} } */ -+ svqincd_pat (s32, SV_ALL, 1); /* { dg-error {'svqincd_pat' has no form that takes 'svint32_t' arguments} } */ -+ svqincd_pat (u32, SV_ALL, 1); /* { dg-error {'svqincd_pat' has no form that takes 'svuint32_t' arguments} } */ -+ svqincd_pat (s64, SV_ALL, 1); -+ svqincd_pat (u64, SV_ALL, 1); -+ svqincd_pat (sh, SV_ALL, 1); -+ svqincd_pat (sw, SV_ALL, 1); -+ svqincd_pat (sd, SV_ALL, 1); -+ svqincd_pat (uh, SV_ALL, 1); -+ svqincd_pat (uw, SV_ALL, 1); -+ svqincd_pat (ud, SV_ALL, 1); -+ svqincd_pat (f, SV_ALL, 1); /* { dg-error {passing 'float' to argument 1 of 'svqincd_pat', which expects a 32-bit or 64-bit integer type} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pred_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pred_1.c -new file mode 100644 -index 000000000..a61afcd2d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pred_1.c -@@ -0,0 +1,22 @@ -+#include -+ -+void -+test (svbool_t pg, svint8_t s8, svuint8_t u8, -+ svint16_t s16, svuint16_t u16, svint32_t s32, svuint32_t u32, -+ svint64_t s64, svuint64_t u64, int i) -+{ -+ svqincp (s32); /* { dg-error {too few arguments to function 'svqincp'} } */ -+ svqincp (s32, pg, pg); /* { dg-error {too many arguments to function 'svqincp'} } */ -+ svqincp (i, pg); /* { dg-error {passing 'int' to argument 1 of 'svqincp', which expects an SVE vector type} } */ -+ svqincp (pg, pg); /* { dg-error {'svqincp' has no form that takes 'svbool_t' arguments} } */ -+ svqincp (s8, pg); /* { dg-error {'svqincp' has no form that takes 'svint8_t' arguments} } */ -+ svqincp (u8, pg); /* { dg-error {'svqincp' has no form that takes 'svuint8_t' arguments} } */ -+ svqincp (s16, pg); -+ svqincp (u16, pg); -+ svqincp (s32, pg); -+ svqincp (u32, pg); -+ svqincp (s64, pg); -+ svqincp (u64, pg); -+ svqincp (u64, 0); /* { dg-error {passing 'int' to argument 2 of 'svqincp', which expects 'svbool_t'} } */ -+ svqincp (u64, u64); /* { dg-error {passing 'svuint64_t' to argument 2 of 'svqincp', which expects 'svbool_t'} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pred_scalar_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pred_scalar_1.c -new file mode 100644 -index 000000000..94ebe7e7a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/inc_dec_pred_scalar_1.c -@@ -0,0 +1,19 @@ -+#include -+ -+void -+test (svbool_t pg, svint32_t s32, svuint64_t u64, int16_t sh, uint16_t uh, -+ int32_t sw, uint32_t uw, int64_t sd, uint64_t ud) -+{ -+ svqincp_b8 (s32); /* { dg-error {too few arguments to function 'svqincp_b8'} } */ -+ svqincp_b8 (s32, pg, pg); /* { dg-error {too many arguments to function 'svqincp_b8'} } */ -+ svqincp_b8 (pg, pg); /* { dg-error {passing 'svbool_t' to argument 1 of 'svqincp_b8', which expects a 32-bit or 64-bit integer type} } */ -+ svqincp_b8 (s32, pg); /* { dg-error {passing 'svint32_t' to argument 1 of 'svqincp_b8', which expects a 32-bit or 64-bit integer type} } */ -+ svqincp_b8 (sh, pg); -+ svqincp_b8 (uh, pg); -+ svqincp_b8 (sw, pg); -+ svqincp_b8 (uw, pg); -+ svqincp_b8 (sd, pg); -+ svqincp_b8 (ud, pg); -+ svqincp_b8 (ud, 0); /* { dg-error {passing 'int' to argument 2 of 'svqincp_b8', which expects 'svbool_t'} } */ -+ svqincp_b8 (ud, u64); /* { dg-error {passing 'svuint64_t' to argument 2 of 'svqincp_b8', which expects 'svbool_t'} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ld1sh_gather_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ld1sh_gather_1.c -new file mode 100644 -index 000000000..91f37f6a5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ld1sh_gather_1.c -@@ -0,0 +1,35 @@ -+/* { dg-do compile } */ -+/* { dg-options "-std=c99 -Wpointer-sign" } */ -+ -+#include -+ -+struct s { int i; }; -+ -+void -+f1 (svbool_t pg, short *s16_ptr, unsigned short *u16_ptr, -+ svint8_t s8, svint16_t s16, -+ svint32_t s32, svuint32_t u32, svfloat32_t f32, -+ svint64_t s64, svuint64_t u64, svfloat64_t f64, struct s s) -+{ -+ svld1sh_gather_index (pg, s16_ptr, s32); /* { dg-warning {implicit declaration of function 'svld1sh_gather_index'; did you mean 'svld1_gather_index'} } */ -+ svld1sh_gather_index_u32 (pg, s16_ptr); /* { dg-error {too few arguments to function 'svld1sh_gather_index_u32'} } */ -+ svld1sh_gather_index_u32 (pg, s16_ptr, s32, 0); /* { dg-error {too many arguments to function 'svld1sh_gather_index_u32'} } */ -+ svld1sh_gather_index_u32 (pg, u16_ptr, s32); /* { dg-warning {pointer targets in passing argument 2 of 'svld1sh_gather_s32index_u32' differ in signedness} } */ -+ svld1sh_gather_index_u32 (pg, s16_ptr, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svld1sh_gather_index_u32', which expects a vector of 32-bit integers} } */ -+ svld1sh_gather_index_u32 (pg, s16_ptr, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svld1sh_gather_index_u32', which expects a vector of 32-bit integers} } */ -+ svld1sh_gather_index_u32 (pg, s16_ptr, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svld1sh_gather_index_u32', which expects a vector of 32-bit integers} } */ -+ svld1sh_gather_index_u32 (pg, s16_ptr, s32); -+ svld1sh_gather_index_u32 (pg, s16_ptr, u32); -+ svld1sh_gather_index_u32 (pg, s16_ptr, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svld1sh_gather_index_u32', which expects a vector of 32-bit integers} } */ -+ svld1sh_gather_index_u32 (pg, s16_ptr, s64); /* { dg-error {passing 'svint64_t' to argument 3 of 'svld1sh_gather_index_u32', which expects a vector of 32-bit integers} } */ -+ svld1sh_gather_index_u32 (pg, s16_ptr, u64); /* { dg-error {passing 'svuint64_t' to argument 3 of 'svld1sh_gather_index_u32', which expects a vector of 32-bit integers} } */ -+ svld1sh_gather_index_u32 (pg, s16_ptr, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svld1sh_gather_index_u32', which expects a vector of 32-bit integers} } */ -+ -+ svld1sh_gather_index_u32 (pg, 0, s32); -+ svld1sh_gather_index_u32 (pg, s, s32); /* { dg-error {'struct s' to argument 2 of 'svld1sh_gather_index_u32', which expects a vector or pointer base address} } */ -+ -+ svld1sh_gather_index_u32 (pg, pg, 0); /* { dg-error {passing 'svbool_t' to argument 2 of 'svld1sh_gather_index_u32', which expects 'svuint32_t'} } */ -+ svld1sh_gather_index_u32 (pg, s32, 0); /* { dg-error {passing 'svint32_t' to argument 2 of 'svld1sh_gather_index_u32', which expects 'svuint32_t'} } */ -+ svld1sh_gather_index_u32 (pg, u32, 0); -+ svld1sh_gather_index_u32 (pg, u64, 0); /* { dg-error {passing 'svuint64_t' to argument 2 of 'svld1sh_gather_index_u32', which expects 'svuint32_t'} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_1.c -new file mode 100644 -index 000000000..34f989bf8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_1.c -@@ -0,0 +1,23 @@ -+/* { dg-do compile } */ -+/* { dg-options "-std=c99" } */ -+ -+#include -+ -+struct s { signed char x; }; -+ -+svuint8_t -+f1 (svbool_t pg, signed char *s8_ptr, void *void_ptr, struct s *s_ptr, -+ float *f32_ptr, _Complex float *cf32_ptr, int **ptr_ptr) -+{ -+ svld1 (pg); /* { dg-error {too few arguments to function 'svld1'} } */ -+ svld1 (pg, s8_ptr, 0); /* { dg-error {too many arguments to function 'svld1'} } */ -+ svld1 (0, s8_ptr); /* { dg-error {passing 'int' to argument 1 of 'svld1', which expects 'svbool_t'} } */ -+ svld1 (pg, 0); /* { dg-error {passing 'int' to argument 2 of 'svld1', which expects a pointer type} } */ -+ svld1 (pg, (int *) 0); -+ svld1 (pg, void_ptr); /* { dg-error {passing 'void \*' to argument 2 of 'svld1', but 'void' is not a valid SVE element type} } */ -+ svld1 (pg, s_ptr); /* { dg-error {passing 'struct s \*' to argument 2 of 'svld1', but 'struct s' is not a valid SVE element type} } */ -+ svld1 (pg, f32_ptr); -+ svld1 (pg, cf32_ptr); /* { dg-error {passing '_Complex float \*' to argument 2 of 'svld1', but 'complex float' is not a valid SVE element type} } */ -+ svld1 (pg, ptr_ptr); /* { dg-error {passing 'int \*\*' to argument 2 of 'svld1', but 'int \*' is not a valid SVE element type} } */ -+ return svld1 (pg, s8_ptr); /* { dg-error {incompatible types when returning type 'svint8_t' but 'svuint8_t' was expected} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_2.c -new file mode 100644 -index 000000000..beb07f138 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_2.c -@@ -0,0 +1,22 @@ -+/* { dg-do compile } */ -+/* { dg-options "-std=c99" } */ -+ -+#include -+ -+struct s { signed char x; }; -+ -+svuint8_t -+f1 (svbool_t pg, signed char *s8_ptr, void *void_ptr, struct s *s_ptr, -+ float *f32_ptr, _Complex float *cf32_ptr) -+{ -+ svld1_s8 (pg); /* { dg-error {too few arguments to function 'svld1_s8'} } */ -+ svld1_s8 (pg, s8_ptr, 0); /* { dg-error {too many arguments to function 'svld1_s8'} } */ -+ svld1_s8 (0, 0); /* { dg-error {incompatible type for argument 1 of 'svld1_s8'} } */ -+ svld1_s8 (pg, 0); -+ svld1_s32 (pg, (int *) 0); -+ svld1_s8 (pg, void_ptr); -+ svld1_s8 (pg, s_ptr); /* { dg-warning {passing argument 2 of 'svld1_s8' from incompatible pointer type} } */ -+ svld1_f32 (pg, f32_ptr); -+ svld1_f32 (pg, cf32_ptr); /* { dg-warning {passing argument 2 of 'svld1_f32' from incompatible pointer type} } */ -+ return svld1_s8 (pg, s8_ptr); /* { dg-error {incompatible types when returning type 'svint8_t' but 'svuint8_t' was expected} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_3.c -new file mode 100644 -index 000000000..770203f64 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_3.c -@@ -0,0 +1,18 @@ -+/* { dg-do compile } */ -+/* { dg-options "-std=c99" } */ -+ -+#include -+ -+struct s { signed char x; }; -+ -+svuint8_t -+f1 (svbool_t pg, signed char *s8_ptr, svint8_t s8) -+{ -+ svld1_vnum (pg); /* { dg-error {too few arguments to function 'svld1_vnum'} } */ -+ svld1_vnum (pg, s8_ptr); /* { dg-error {too few arguments to function 'svld1_vnum'} } */ -+ svld1_vnum (pg, s8_ptr, 0, 0); /* { dg-error {too many arguments to function 'svld1_vnum'} } */ -+ svld1_vnum (0, s8_ptr, 0); /* { dg-error {passing 'int' to argument 1 of 'svld1_vnum', which expects 'svbool_t'} } */ -+ svld1_vnum (pg, 0, 0); /* { dg-error {passing 'int' to argument 2 of 'svld1_vnum', which expects a pointer type} } */ -+ svld1_vnum (pg, s8_ptr, s8_ptr); /* { dg-warning "passing argument 3 of 'svld1_vnum_s8' makes integer from pointer without a cast" } */ -+ svld1_vnum (pg, s8_ptr, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svld1_vnum', which expects 'int64_t'} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_ext_gather_index_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_ext_gather_index_1.c -new file mode 100644 -index 000000000..91f37f6a5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_ext_gather_index_1.c -@@ -0,0 +1,35 @@ -+/* { dg-do compile } */ -+/* { dg-options "-std=c99 -Wpointer-sign" } */ -+ -+#include -+ -+struct s { int i; }; -+ -+void -+f1 (svbool_t pg, short *s16_ptr, unsigned short *u16_ptr, -+ svint8_t s8, svint16_t s16, -+ svint32_t s32, svuint32_t u32, svfloat32_t f32, -+ svint64_t s64, svuint64_t u64, svfloat64_t f64, struct s s) -+{ -+ svld1sh_gather_index (pg, s16_ptr, s32); /* { dg-warning {implicit declaration of function 'svld1sh_gather_index'; did you mean 'svld1_gather_index'} } */ -+ svld1sh_gather_index_u32 (pg, s16_ptr); /* { dg-error {too few arguments to function 'svld1sh_gather_index_u32'} } */ -+ svld1sh_gather_index_u32 (pg, s16_ptr, s32, 0); /* { dg-error {too many arguments to function 'svld1sh_gather_index_u32'} } */ -+ svld1sh_gather_index_u32 (pg, u16_ptr, s32); /* { dg-warning {pointer targets in passing argument 2 of 'svld1sh_gather_s32index_u32' differ in signedness} } */ -+ svld1sh_gather_index_u32 (pg, s16_ptr, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svld1sh_gather_index_u32', which expects a vector of 32-bit integers} } */ -+ svld1sh_gather_index_u32 (pg, s16_ptr, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svld1sh_gather_index_u32', which expects a vector of 32-bit integers} } */ -+ svld1sh_gather_index_u32 (pg, s16_ptr, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svld1sh_gather_index_u32', which expects a vector of 32-bit integers} } */ -+ svld1sh_gather_index_u32 (pg, s16_ptr, s32); -+ svld1sh_gather_index_u32 (pg, s16_ptr, u32); -+ svld1sh_gather_index_u32 (pg, s16_ptr, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svld1sh_gather_index_u32', which expects a vector of 32-bit integers} } */ -+ svld1sh_gather_index_u32 (pg, s16_ptr, s64); /* { dg-error {passing 'svint64_t' to argument 3 of 'svld1sh_gather_index_u32', which expects a vector of 32-bit integers} } */ -+ svld1sh_gather_index_u32 (pg, s16_ptr, u64); /* { dg-error {passing 'svuint64_t' to argument 3 of 'svld1sh_gather_index_u32', which expects a vector of 32-bit integers} } */ -+ svld1sh_gather_index_u32 (pg, s16_ptr, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svld1sh_gather_index_u32', which expects a vector of 32-bit integers} } */ -+ -+ svld1sh_gather_index_u32 (pg, 0, s32); -+ svld1sh_gather_index_u32 (pg, s, s32); /* { dg-error {'struct s' to argument 2 of 'svld1sh_gather_index_u32', which expects a vector or pointer base address} } */ -+ -+ svld1sh_gather_index_u32 (pg, pg, 0); /* { dg-error {passing 'svbool_t' to argument 2 of 'svld1sh_gather_index_u32', which expects 'svuint32_t'} } */ -+ svld1sh_gather_index_u32 (pg, s32, 0); /* { dg-error {passing 'svint32_t' to argument 2 of 'svld1sh_gather_index_u32', which expects 'svuint32_t'} } */ -+ svld1sh_gather_index_u32 (pg, u32, 0); -+ svld1sh_gather_index_u32 (pg, u64, 0); /* { dg-error {passing 'svuint64_t' to argument 2 of 'svld1sh_gather_index_u32', which expects 'svuint32_t'} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_ext_gather_offset_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_ext_gather_offset_1.c -new file mode 100644 -index 000000000..dae4d0ce1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_ext_gather_offset_1.c -@@ -0,0 +1,35 @@ -+/* { dg-do compile } */ -+/* { dg-options "-std=c99" } */ -+ -+#include -+ -+struct s { int i; }; -+ -+void -+f1 (svbool_t pg, signed char *s8_ptr, short *s16_ptr, -+ svint8_t s8, svint16_t s16, -+ svint32_t s32, svuint32_t u32, svfloat32_t f32, -+ svint64_t s64, svuint64_t u64, svfloat64_t f64, struct s s) -+{ -+ svld1sb_gather_offset (pg, s8_ptr, s32); /* { dg-warning {implicit declaration of function 'svld1sb_gather_offset'; did you mean 'svld1_gather_offset'} } */ -+ svld1sb_gather_offset_s32 (pg, s8_ptr); /* { dg-error {too few arguments to function 'svld1sb_gather_offset_s32'} } */ -+ svld1sb_gather_offset_s32 (pg, s8_ptr, s32, 0); /* { dg-error {too many arguments to function 'svld1sb_gather_offset_s32'} } */ -+ svld1sb_gather_offset_s32 (pg, s16_ptr, s32); /* { dg-warning {passing argument 2 of 'svld1sb_gather_s32offset_s32' from incompatible pointer type} } */ -+ svld1sb_gather_offset_s32 (pg, s8_ptr, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svld1sb_gather_offset_s32', which expects a vector of 32-bit integers} } */ -+ svld1sb_gather_offset_s32 (pg, s8_ptr, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svld1sb_gather_offset_s32', which expects a vector of 32-bit integers} } */ -+ svld1sb_gather_offset_s32 (pg, s8_ptr, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svld1sb_gather_offset_s32', which expects a vector of 32-bit integers} } */ -+ svld1sb_gather_offset_s32 (pg, s8_ptr, s32); -+ svld1sb_gather_offset_s32 (pg, s8_ptr, u32); -+ svld1sb_gather_offset_s32 (pg, s8_ptr, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svld1sb_gather_offset_s32', which expects a vector of 32-bit integers} } */ -+ svld1sb_gather_offset_s32 (pg, s8_ptr, s64); /* { dg-error {passing 'svint64_t' to argument 3 of 'svld1sb_gather_offset_s32', which expects a vector of 32-bit integers} } */ -+ svld1sb_gather_offset_s32 (pg, s8_ptr, u64); /* { dg-error {passing 'svuint64_t' to argument 3 of 'svld1sb_gather_offset_s32', which expects a vector of 32-bit integers} } */ -+ svld1sb_gather_offset_s32 (pg, s8_ptr, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svld1sb_gather_offset_s32', which expects a vector of 32-bit integers} } */ -+ -+ svld1sb_gather_offset_s32 (pg, 0, s32); -+ svld1sb_gather_offset_s32 (pg, s, s32); /* { dg-error {'struct s' to argument 2 of 'svld1sb_gather_offset_s32', which expects a vector or pointer base address} } */ -+ -+ svld1sb_gather_offset_s32 (pg, pg, 0); /* { dg-error {passing 'svbool_t' to argument 2 of 'svld1sb_gather_offset_s32', which expects 'svuint32_t'} } */ -+ svld1sb_gather_offset_s32 (pg, s32, 0); /* { dg-error {passing 'svint32_t' to argument 2 of 'svld1sb_gather_offset_s32', which expects 'svuint32_t'} } */ -+ svld1sb_gather_offset_s32 (pg, u32, 0); -+ svld1sb_gather_offset_s32 (pg, u64, 0); /* { dg-error {passing 'svuint64_t' to argument 2 of 'svld1sb_gather_offset_s32', which expects 'svuint32_t'} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_ext_gather_offset_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_ext_gather_offset_2.c -new file mode 100644 -index 000000000..1bc66977c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_ext_gather_offset_2.c -@@ -0,0 +1,35 @@ -+/* { dg-do compile } */ -+/* { dg-options "-std=c99" } */ -+ -+#include -+ -+struct s { int i; }; -+ -+void -+f1 (svbool_t pg, signed char *s8_ptr, short *s16_ptr, -+ svint8_t s8, svint16_t s16, -+ svint32_t s32, svuint32_t u32, svfloat32_t f32, -+ svint64_t s64, svuint64_t u64, svfloat64_t f64, struct s s) -+{ -+ svld1sb_gather_offset (pg, s8_ptr, s32); /* { dg-warning {implicit declaration of function 'svld1sb_gather_offset'; did you mean 'svld1_gather_offset'} } */ -+ svld1sb_gather_offset_u32 (pg, s8_ptr); /* { dg-error {too few arguments to function 'svld1sb_gather_offset_u32'} } */ -+ svld1sb_gather_offset_u32 (pg, s8_ptr, s32, 0); /* { dg-error {too many arguments to function 'svld1sb_gather_offset_u32'} } */ -+ svld1sb_gather_offset_u32 (pg, s16_ptr, s32); /* { dg-warning {passing argument 2 of 'svld1sb_gather_s32offset_u32' from incompatible pointer type} } */ -+ svld1sb_gather_offset_u32 (pg, s8_ptr, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svld1sb_gather_offset_u32', which expects a vector of 32-bit integers} } */ -+ svld1sb_gather_offset_u32 (pg, s8_ptr, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svld1sb_gather_offset_u32', which expects a vector of 32-bit integers} } */ -+ svld1sb_gather_offset_u32 (pg, s8_ptr, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svld1sb_gather_offset_u32', which expects a vector of 32-bit integers} } */ -+ svld1sb_gather_offset_u32 (pg, s8_ptr, s32); -+ svld1sb_gather_offset_u32 (pg, s8_ptr, u32); -+ svld1sb_gather_offset_u32 (pg, s8_ptr, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svld1sb_gather_offset_u32', which expects a vector of 32-bit integers} } */ -+ svld1sb_gather_offset_u32 (pg, s8_ptr, s64); /* { dg-error {passing 'svint64_t' to argument 3 of 'svld1sb_gather_offset_u32', which expects a vector of 32-bit integers} } */ -+ svld1sb_gather_offset_u32 (pg, s8_ptr, u64); /* { dg-error {passing 'svuint64_t' to argument 3 of 'svld1sb_gather_offset_u32', which expects a vector of 32-bit integers} } */ -+ svld1sb_gather_offset_u32 (pg, s8_ptr, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svld1sb_gather_offset_u32', which expects a vector of 32-bit integers} } */ -+ -+ svld1sb_gather_offset_u32 (pg, 0, s32); -+ svld1sb_gather_offset_u32 (pg, s, s32); /* { dg-error {'struct s' to argument 2 of 'svld1sb_gather_offset_u32', which expects a vector or pointer base address} } */ -+ -+ svld1sb_gather_offset_u32 (pg, pg, 0); /* { dg-error {passing 'svbool_t' to argument 2 of 'svld1sb_gather_offset_u32', which expects 'svuint32_t'} } */ -+ svld1sb_gather_offset_u32 (pg, s32, 0); /* { dg-error {passing 'svint32_t' to argument 2 of 'svld1sb_gather_offset_u32', which expects 'svuint32_t'} } */ -+ svld1sb_gather_offset_u32 (pg, u32, 0); -+ svld1sb_gather_offset_u32 (pg, u64, 0); /* { dg-error {passing 'svuint64_t' to argument 2 of 'svld1sb_gather_offset_u32', which expects 'svuint32_t'} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_ext_gather_offset_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_ext_gather_offset_3.c -new file mode 100644 -index 000000000..6522889db ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_ext_gather_offset_3.c -@@ -0,0 +1,35 @@ -+/* { dg-do compile } */ -+/* { dg-options "-std=c99" } */ -+ -+#include -+ -+struct s { int i; }; -+ -+void -+f1 (svbool_t pg, signed char *s8_ptr, short *s16_ptr, -+ svint8_t s8, svint16_t s16, -+ svint32_t s32, svuint32_t u32, svfloat32_t f32, -+ svint64_t s64, svuint64_t u64, svfloat64_t f64, struct s s) -+{ -+ svld1sb_gather_offset (pg, s8_ptr, s64); /* { dg-warning {implicit declaration of function 'svld1sb_gather_offset'; did you mean 'svld1_gather_offset'} } */ -+ svld1sb_gather_offset_s64 (pg, s8_ptr); /* { dg-error {too few arguments to function 'svld1sb_gather_offset_s64'} } */ -+ svld1sb_gather_offset_s64 (pg, s8_ptr, s64, 0); /* { dg-error {too many arguments to function 'svld1sb_gather_offset_s64'} } */ -+ svld1sb_gather_offset_s64 (pg, s16_ptr, s64); /* { dg-warning {passing argument 2 of 'svld1sb_gather_s64offset_s64' from incompatible pointer type} } */ -+ svld1sb_gather_offset_s64 (pg, s8_ptr, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svld1sb_gather_offset_s64', which expects a vector of 64-bit integers} } */ -+ svld1sb_gather_offset_s64 (pg, s8_ptr, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svld1sb_gather_offset_s64', which expects a vector of 64-bit integers} } */ -+ svld1sb_gather_offset_s64 (pg, s8_ptr, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svld1sb_gather_offset_s64', which expects a vector of 64-bit integers} } */ -+ svld1sb_gather_offset_s64 (pg, s8_ptr, s32); /* { dg-error {passing 'svint32_t' to argument 3 of 'svld1sb_gather_offset_s64', which expects a vector of 64-bit integers} } */ -+ svld1sb_gather_offset_s64 (pg, s8_ptr, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svld1sb_gather_offset_s64', which expects a vector of 64-bit integers} } */ -+ svld1sb_gather_offset_s64 (pg, s8_ptr, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svld1sb_gather_offset_s64', which expects a vector of 64-bit integers} } */ -+ svld1sb_gather_offset_s64 (pg, s8_ptr, s64); -+ svld1sb_gather_offset_s64 (pg, s8_ptr, u64); -+ svld1sb_gather_offset_s64 (pg, s8_ptr, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svld1sb_gather_offset_s64', which expects a vector of 64-bit integers} } */ -+ -+ svld1sb_gather_offset_s64 (pg, 0, s64); -+ svld1sb_gather_offset_s64 (pg, s, s64); /* { dg-error {'struct s' to argument 2 of 'svld1sb_gather_offset_s64', which expects a vector or pointer base address} } */ -+ -+ svld1sb_gather_offset_s64 (pg, pg, 0); /* { dg-error {passing 'svbool_t' to argument 2 of 'svld1sb_gather_offset_s64', which expects 'svuint64_t'} } */ -+ svld1sb_gather_offset_s64 (pg, s32, 0); /* { dg-error {passing 'svint32_t' to argument 2 of 'svld1sb_gather_offset_s64', which expects 'svuint64_t'} } */ -+ svld1sb_gather_offset_s64 (pg, u32, 0); /* { dg-error {passing 'svuint32_t' to argument 2 of 'svld1sb_gather_offset_s64', which expects 'svuint64_t'} } */ -+ svld1sb_gather_offset_s64 (pg, u64, 0); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_ext_gather_offset_4.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_ext_gather_offset_4.c -new file mode 100644 -index 000000000..025621989 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_ext_gather_offset_4.c -@@ -0,0 +1,35 @@ -+/* { dg-do compile } */ -+/* { dg-options "-std=c99" } */ -+ -+#include -+ -+struct s { int i; }; -+ -+void -+f1 (svbool_t pg, signed char *s8_ptr, short *s16_ptr, -+ svint8_t s8, svint16_t s16, -+ svint32_t s32, svuint32_t u32, svfloat32_t f32, -+ svint64_t s64, svuint64_t u64, svfloat64_t f64, struct s s) -+{ -+ svld1sb_gather_offset (pg, s8_ptr, s64); /* { dg-warning {implicit declaration of function 'svld1sb_gather_offset'; did you mean 'svld1_gather_offset'} } */ -+ svld1sb_gather_offset_u64 (pg, s8_ptr); /* { dg-error {too few arguments to function 'svld1sb_gather_offset_u64'} } */ -+ svld1sb_gather_offset_u64 (pg, s8_ptr, s64, 0); /* { dg-error {too many arguments to function 'svld1sb_gather_offset_u64'} } */ -+ svld1sb_gather_offset_u64 (pg, s16_ptr, s64); /* { dg-warning {passing argument 2 of 'svld1sb_gather_s64offset_u64' from incompatible pointer type} } */ -+ svld1sb_gather_offset_u64 (pg, s8_ptr, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svld1sb_gather_offset_u64', which expects a vector of 64-bit integers} } */ -+ svld1sb_gather_offset_u64 (pg, s8_ptr, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svld1sb_gather_offset_u64', which expects a vector of 64-bit integers} } */ -+ svld1sb_gather_offset_u64 (pg, s8_ptr, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svld1sb_gather_offset_u64', which expects a vector of 64-bit integers} } */ -+ svld1sb_gather_offset_u64 (pg, s8_ptr, s32); /* { dg-error {passing 'svint32_t' to argument 3 of 'svld1sb_gather_offset_u64', which expects a vector of 64-bit integers} } */ -+ svld1sb_gather_offset_u64 (pg, s8_ptr, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svld1sb_gather_offset_u64', which expects a vector of 64-bit integers} } */ -+ svld1sb_gather_offset_u64 (pg, s8_ptr, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svld1sb_gather_offset_u64', which expects a vector of 64-bit integers} } */ -+ svld1sb_gather_offset_u64 (pg, s8_ptr, s64); -+ svld1sb_gather_offset_u64 (pg, s8_ptr, u64); -+ svld1sb_gather_offset_u64 (pg, s8_ptr, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svld1sb_gather_offset_u64', which expects a vector of 64-bit integers} } */ -+ -+ svld1sb_gather_offset_u64 (pg, 0, s64); -+ svld1sb_gather_offset_u64 (pg, s, s64); /* { dg-error {'struct s' to argument 2 of 'svld1sb_gather_offset_u64', which expects a vector or pointer base address} } */ -+ -+ svld1sb_gather_offset_u64 (pg, pg, 0); /* { dg-error {passing 'svbool_t' to argument 2 of 'svld1sb_gather_offset_u64', which expects 'svuint64_t'} } */ -+ svld1sb_gather_offset_u64 (pg, s32, 0); /* { dg-error {passing 'svint32_t' to argument 2 of 'svld1sb_gather_offset_u64', which expects 'svuint64_t'} } */ -+ svld1sb_gather_offset_u64 (pg, u32, 0); /* { dg-error {passing 'svuint32_t' to argument 2 of 'svld1sb_gather_offset_u64', which expects 'svuint64_t'} } */ -+ svld1sb_gather_offset_u64 (pg, u64, 0); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_ext_gather_offset_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_ext_gather_offset_5.c -new file mode 100644 -index 000000000..8d57aa020 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_ext_gather_offset_5.c -@@ -0,0 +1,35 @@ -+/* { dg-do compile } */ -+/* { dg-options "-std=c99" } */ -+ -+#include -+ -+struct s { int i; }; -+ -+void -+f1 (svbool_t pg, unsigned char *s8_ptr, unsigned short *s16_ptr, -+ svint8_t s8, svint16_t s16, -+ svint32_t s32, svuint32_t u32, svfloat32_t f32, -+ svint64_t s64, svuint64_t u64, svfloat64_t f64, struct s s) -+{ -+ svld1ub_gather_offset (pg, s8_ptr, s32); /* { dg-warning {implicit declaration of function 'svld1ub_gather_offset'; did you mean 'svld1_gather_offset'} } */ -+ svld1ub_gather_offset_s32 (pg, s8_ptr); /* { dg-error {too few arguments to function 'svld1ub_gather_offset_s32'} } */ -+ svld1ub_gather_offset_s32 (pg, s8_ptr, s32, 0); /* { dg-error {too many arguments to function 'svld1ub_gather_offset_s32'} } */ -+ svld1ub_gather_offset_s32 (pg, s16_ptr, s32); /* { dg-warning {passing argument 2 of 'svld1ub_gather_s32offset_s32' from incompatible pointer type} } */ -+ svld1ub_gather_offset_s32 (pg, s8_ptr, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svld1ub_gather_offset_s32', which expects a vector of 32-bit integers} } */ -+ svld1ub_gather_offset_s32 (pg, s8_ptr, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svld1ub_gather_offset_s32', which expects a vector of 32-bit integers} } */ -+ svld1ub_gather_offset_s32 (pg, s8_ptr, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svld1ub_gather_offset_s32', which expects a vector of 32-bit integers} } */ -+ svld1ub_gather_offset_s32 (pg, s8_ptr, s32); -+ svld1ub_gather_offset_s32 (pg, s8_ptr, u32); -+ svld1ub_gather_offset_s32 (pg, s8_ptr, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svld1ub_gather_offset_s32', which expects a vector of 32-bit integers} } */ -+ svld1ub_gather_offset_s32 (pg, s8_ptr, s64); /* { dg-error {passing 'svint64_t' to argument 3 of 'svld1ub_gather_offset_s32', which expects a vector of 32-bit integers} } */ -+ svld1ub_gather_offset_s32 (pg, s8_ptr, u64); /* { dg-error {passing 'svuint64_t' to argument 3 of 'svld1ub_gather_offset_s32', which expects a vector of 32-bit integers} } */ -+ svld1ub_gather_offset_s32 (pg, s8_ptr, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svld1ub_gather_offset_s32', which expects a vector of 32-bit integers} } */ -+ -+ svld1ub_gather_offset_s32 (pg, 0, s32); -+ svld1ub_gather_offset_s32 (pg, s, s32); /* { dg-error {'struct s' to argument 2 of 'svld1ub_gather_offset_s32', which expects a vector or pointer base address} } */ -+ -+ svld1ub_gather_offset_s32 (pg, pg, 0); /* { dg-error {passing 'svbool_t' to argument 2 of 'svld1ub_gather_offset_s32', which expects 'svuint32_t'} } */ -+ svld1ub_gather_offset_s32 (pg, s32, 0); /* { dg-error {passing 'svint32_t' to argument 2 of 'svld1ub_gather_offset_s32', which expects 'svuint32_t'} } */ -+ svld1ub_gather_offset_s32 (pg, u32, 0); -+ svld1ub_gather_offset_s32 (pg, u64, 0); /* { dg-error {passing 'svuint64_t' to argument 2 of 'svld1ub_gather_offset_s32', which expects 'svuint32_t'} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_gather_sv_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_gather_sv_1.c -new file mode 100644 -index 000000000..21566a9d9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_gather_sv_1.c -@@ -0,0 +1,80 @@ -+/* { dg-do compile } */ -+/* { dg-options "-std=c99" } */ -+ -+#include -+ -+struct s { signed char x; }; -+ -+svuint32_t -+f1 (svbool_t pg, signed char *s8_ptr, short *s16_ptr, -+ int32_t *s32_ptr, uint32_t *u32_ptr, float *f32_ptr, -+ int64_t *s64_ptr, uint64_t *u64_ptr, double *f64_ptr, -+ void *void_ptr, struct s *s_ptr, _Complex float *cf32_ptr, int **ptr_ptr, -+ svint8_t s8, svint16_t s16, -+ svint32_t s32, svuint32_t u32, svfloat32_t f32, -+ svint64_t s64, svuint64_t u64, svfloat64_t f64) -+{ -+ svld1_gather_offset (pg, s32_ptr); /* { dg-error {too few arguments to function 'svld1_gather_offset'} } */ -+ svld1_gather_offset (pg, s32_ptr, s32, 0); /* { dg-error {too many arguments to function 'svld1_gather_offset'} } */ -+ svld1_gather_offset (0, s32_ptr, s32); /* { dg-error {passing 'int' to argument 1 of 'svld1_gather_offset', which expects 'svbool_t'} } */ -+ svld1_gather_offset (pg, 0, s32); /* { dg-error {passing 'int' to argument 2 of 'svld1_gather_offset', which expects a pointer type} } */ -+ svld1_gather_offset (pg, (int *) 0, s32); -+ svld1_gather_offset (pg, void_ptr, s32); /* { dg-error {passing 'void \*' to argument 2 of 'svld1_gather_offset', but 'void' is not a valid SVE element type} } */ -+ svld1_gather_offset (pg, s_ptr, s32); /* { dg-error {passing 'struct s \*' to argument 2 of 'svld1_gather_offset', but 'struct s' is not a valid SVE element type} } */ -+ svld1_gather_offset (pg, f32_ptr, s32); -+ svld1_gather_offset (pg, cf32_ptr, s32); /* { dg-error {passing '_Complex float \*' to argument 2 of 'svld1_gather_offset', but 'complex float' is not a valid SVE element type} } */ -+ svld1_gather_offset (pg, ptr_ptr, u64); /* { dg-error {passing 'int \*\*' to argument 2 of 'svld1_gather_offset', but 'int \*' is not a valid SVE element type} } */ -+ svld1_gather_offset (pg, u32, 0); /* { dg-error {passing 'svuint32_t' to argument 2 of 'svld1_gather_offset', which expects a pointer type} } */ -+ /* { dg-message {an explicit type suffix is needed when using a vector of base addresses} "" { target *-*-* } .-1 } */ -+ svld1_gather_offset (pg, u64, 0); /* { dg-error {passing 'svuint64_t' to argument 2 of 'svld1_gather_offset', which expects a pointer type} } */ -+ /* { dg-message {an explicit type suffix is needed when using a vector of base addresses} "" { target *-*-* } .-1 } */ -+ -+ svld1_gather_offset (pg, s8_ptr, s8); /* { dg-error {passing 'signed char \*' to argument 2 of 'svld1_gather_offset', which expects a pointer to 32-bit or 64-bit elements} } */ -+ svld1_gather_offset (pg, s8_ptr, s32); /* { dg-error {passing 'signed char \*' to argument 2 of 'svld1_gather_offset', which expects a pointer to 32-bit or 64-bit elements} } */ -+ svld1_gather_offset (pg, s16_ptr, s16); /* { dg-error {passing 'short( int)? \*' to argument 2 of 'svld1_gather_offset', which expects a pointer to 32-bit or 64-bit elements} } */ -+ svld1_gather_offset (pg, s16_ptr, s32); /* { dg-error {passing 'short( int)? \*' to argument 2 of 'svld1_gather_offset', which expects a pointer to 32-bit or 64-bit elements} } */ -+ -+ svld1_gather_offset (pg, s32_ptr, s32); -+ svld1_gather_offset (pg, s32_ptr, u32); -+ svld1_gather_offset (pg, s32_ptr, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svld1_gather_offset', which when loading 'svint32_t' expects a vector of 32-bit integers} } */ -+ svld1_gather_offset (pg, s32_ptr, s64); /* { dg-error {passing 'svint64_t' to argument 3 of 'svld1_gather_offset', which when loading 'svint32_t' expects a vector of 32-bit integers} } */ -+ svld1_gather_offset (pg, s32_ptr, u64); /* { dg-error {passing 'svuint64_t' to argument 3 of 'svld1_gather_offset', which when loading 'svint32_t' expects a vector of 32-bit integers} } */ -+ svld1_gather_offset (pg, s32_ptr, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svld1_gather_offset', which when loading 'svint32_t' expects a vector of 32-bit integers} } */ -+ -+ svld1_gather_offset (pg, u32_ptr, s32); -+ svld1_gather_offset (pg, u32_ptr, u32); -+ svld1_gather_offset (pg, u32_ptr, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svld1_gather_offset', which when loading 'svuint32_t' expects a vector of 32-bit integers} } */ -+ svld1_gather_offset (pg, u32_ptr, s64); /* { dg-error {passing 'svint64_t' to argument 3 of 'svld1_gather_offset', which when loading 'svuint32_t' expects a vector of 32-bit integers} } */ -+ svld1_gather_offset (pg, u32_ptr, u64); /* { dg-error {passing 'svuint64_t' to argument 3 of 'svld1_gather_offset', which when loading 'svuint32_t' expects a vector of 32-bit integers} } */ -+ svld1_gather_offset (pg, u32_ptr, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svld1_gather_offset', which when loading 'svuint32_t' expects a vector of 32-bit integers} } */ -+ -+ svld1_gather_offset (pg, f32_ptr, s32); -+ svld1_gather_offset (pg, f32_ptr, u32); -+ svld1_gather_offset (pg, f32_ptr, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svld1_gather_offset', which when loading 'svfloat32_t' expects a vector of 32-bit integers} } */ -+ svld1_gather_offset (pg, f32_ptr, s64); /* { dg-error {passing 'svint64_t' to argument 3 of 'svld1_gather_offset', which when loading 'svfloat32_t' expects a vector of 32-bit integers} } */ -+ svld1_gather_offset (pg, f32_ptr, u64); /* { dg-error {passing 'svuint64_t' to argument 3 of 'svld1_gather_offset', which when loading 'svfloat32_t' expects a vector of 32-bit integers} } */ -+ svld1_gather_offset (pg, f32_ptr, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svld1_gather_offset', which when loading 'svfloat32_t' expects a vector of 32-bit integers} } */ -+ -+ svld1_gather_offset (pg, s64_ptr, s32); /* { dg-error {passing 'svint32_t' to argument 3 of 'svld1_gather_offset', which when loading 'svint64_t' expects a vector of 64-bit integers} } */ -+ svld1_gather_offset (pg, s64_ptr, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svld1_gather_offset', which when loading 'svint64_t' expects a vector of 64-bit integers} } */ -+ svld1_gather_offset (pg, s64_ptr, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svld1_gather_offset', which when loading 'svint64_t' expects a vector of 64-bit integers} } */ -+ svld1_gather_offset (pg, s64_ptr, s64); -+ svld1_gather_offset (pg, s64_ptr, u64); -+ svld1_gather_offset (pg, s64_ptr, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svld1_gather_offset', which when loading 'svint64_t' expects a vector of 64-bit integers} } */ -+ -+ svld1_gather_offset (pg, u64_ptr, s32); /* { dg-error {passing 'svint32_t' to argument 3 of 'svld1_gather_offset', which when loading 'svuint64_t' expects a vector of 64-bit integers} } */ -+ svld1_gather_offset (pg, u64_ptr, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svld1_gather_offset', which when loading 'svuint64_t' expects a vector of 64-bit integers} } */ -+ svld1_gather_offset (pg, u64_ptr, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svld1_gather_offset', which when loading 'svuint64_t' expects a vector of 64-bit integers} } */ -+ svld1_gather_offset (pg, u64_ptr, s64); -+ svld1_gather_offset (pg, u64_ptr, u64); -+ svld1_gather_offset (pg, u64_ptr, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svld1_gather_offset', which when loading 'svuint64_t' expects a vector of 64-bit integers} } */ -+ -+ svld1_gather_offset (pg, f64_ptr, s32); /* { dg-error {passing 'svint32_t' to argument 3 of 'svld1_gather_offset', which when loading 'svfloat64_t' expects a vector of 64-bit integers} } */ -+ svld1_gather_offset (pg, f64_ptr, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svld1_gather_offset', which when loading 'svfloat64_t' expects a vector of 64-bit integers} } */ -+ svld1_gather_offset (pg, f64_ptr, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svld1_gather_offset', which when loading 'svfloat64_t' expects a vector of 64-bit integers} } */ -+ svld1_gather_offset (pg, f64_ptr, s64); -+ svld1_gather_offset (pg, f64_ptr, u64); -+ svld1_gather_offset (pg, f64_ptr, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svld1_gather_offset', which when loading 'svfloat64_t' expects a vector of 64-bit integers} } */ -+ -+ return svld1_gather_offset (pg, s32_ptr, s32); /* { dg-error {incompatible types when returning type 'svint32_t' but 'svuint32_t' was expected} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_gather_sv_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_gather_sv_2.c -new file mode 100644 -index 000000000..4c15fc40c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_gather_sv_2.c -@@ -0,0 +1,80 @@ -+/* { dg-do compile } */ -+/* { dg-options "-std=c99" } */ -+ -+#include -+ -+struct s { signed char x; }; -+ -+svuint32_t -+f1 (svbool_t pg, signed char *s8_ptr, short *s16_ptr, -+ int32_t *s32_ptr, uint32_t *u32_ptr, float *f32_ptr, -+ int64_t *s64_ptr, uint64_t *u64_ptr, double *f64_ptr, -+ void *void_ptr, struct s *s_ptr, _Complex float *cf32_ptr, int **ptr_ptr, -+ svint8_t s8, svint16_t s16, -+ svint32_t s32, svuint32_t u32, svfloat32_t f32, -+ svint64_t s64, svuint64_t u64, svfloat64_t f64) -+{ -+ svld1_gather_index (pg, s32_ptr); /* { dg-error {too few arguments to function 'svld1_gather_index'} } */ -+ svld1_gather_index (pg, s32_ptr, s32, 0); /* { dg-error {too many arguments to function 'svld1_gather_index'} } */ -+ svld1_gather_index (0, s32_ptr, s32); /* { dg-error {passing 'int' to argument 1 of 'svld1_gather_index', which expects 'svbool_t'} } */ -+ svld1_gather_index (pg, 0, s32); /* { dg-error {passing 'int' to argument 2 of 'svld1_gather_index', which expects a pointer type} } */ -+ svld1_gather_index (pg, (int *) 0, s32); -+ svld1_gather_index (pg, void_ptr, s32); /* { dg-error {passing 'void \*' to argument 2 of 'svld1_gather_index', but 'void' is not a valid SVE element type} } */ -+ svld1_gather_index (pg, s_ptr, s32); /* { dg-error {passing 'struct s \*' to argument 2 of 'svld1_gather_index', but 'struct s' is not a valid SVE element type} } */ -+ svld1_gather_index (pg, f32_ptr, s32); -+ svld1_gather_index (pg, cf32_ptr, s32); /* { dg-error {passing '_Complex float \*' to argument 2 of 'svld1_gather_index', but 'complex float' is not a valid SVE element type} } */ -+ svld1_gather_index (pg, ptr_ptr, u64); /* { dg-error {passing 'int \*\*' to argument 2 of 'svld1_gather_index', but 'int \*' is not a valid SVE element type} } */ -+ svld1_gather_index (pg, u32, 0); /* { dg-error {passing 'svuint32_t' to argument 2 of 'svld1_gather_index', which expects a pointer type} } */ -+ /* { dg-message {an explicit type suffix is needed when using a vector of base addresses} "" { target *-*-* } .-1 } */ -+ svld1_gather_index (pg, u64, 0); /* { dg-error {passing 'svuint64_t' to argument 2 of 'svld1_gather_index', which expects a pointer type} } */ -+ /* { dg-message {an explicit type suffix is needed when using a vector of base addresses} "" { target *-*-* } .-1 } */ -+ -+ svld1_gather_index (pg, s8_ptr, s8); /* { dg-error {passing 'signed char \*' to argument 2 of 'svld1_gather_index', which expects a pointer to 32-bit or 64-bit elements} } */ -+ svld1_gather_index (pg, s8_ptr, s32); /* { dg-error {passing 'signed char \*' to argument 2 of 'svld1_gather_index', which expects a pointer to 32-bit or 64-bit elements} } */ -+ svld1_gather_index (pg, s16_ptr, s16); /* { dg-error {passing 'short( int)? \*' to argument 2 of 'svld1_gather_index', which expects a pointer to 32-bit or 64-bit elements} } */ -+ svld1_gather_index (pg, s16_ptr, s32); /* { dg-error {passing 'short( int)? \*' to argument 2 of 'svld1_gather_index', which expects a pointer to 32-bit or 64-bit elements} } */ -+ -+ svld1_gather_index (pg, s32_ptr, s32); -+ svld1_gather_index (pg, s32_ptr, u32); -+ svld1_gather_index (pg, s32_ptr, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svld1_gather_index', which when loading 'svint32_t' expects a vector of 32-bit integers} } */ -+ svld1_gather_index (pg, s32_ptr, s64); /* { dg-error {passing 'svint64_t' to argument 3 of 'svld1_gather_index', which when loading 'svint32_t' expects a vector of 32-bit integers} } */ -+ svld1_gather_index (pg, s32_ptr, u64); /* { dg-error {passing 'svuint64_t' to argument 3 of 'svld1_gather_index', which when loading 'svint32_t' expects a vector of 32-bit integers} } */ -+ svld1_gather_index (pg, s32_ptr, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svld1_gather_index', which when loading 'svint32_t' expects a vector of 32-bit integers} } */ -+ -+ svld1_gather_index (pg, u32_ptr, s32); -+ svld1_gather_index (pg, u32_ptr, u32); -+ svld1_gather_index (pg, u32_ptr, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svld1_gather_index', which when loading 'svuint32_t' expects a vector of 32-bit integers} } */ -+ svld1_gather_index (pg, u32_ptr, s64); /* { dg-error {passing 'svint64_t' to argument 3 of 'svld1_gather_index', which when loading 'svuint32_t' expects a vector of 32-bit integers} } */ -+ svld1_gather_index (pg, u32_ptr, u64); /* { dg-error {passing 'svuint64_t' to argument 3 of 'svld1_gather_index', which when loading 'svuint32_t' expects a vector of 32-bit integers} } */ -+ svld1_gather_index (pg, u32_ptr, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svld1_gather_index', which when loading 'svuint32_t' expects a vector of 32-bit integers} } */ -+ -+ svld1_gather_index (pg, f32_ptr, s32); -+ svld1_gather_index (pg, f32_ptr, u32); -+ svld1_gather_index (pg, f32_ptr, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svld1_gather_index', which when loading 'svfloat32_t' expects a vector of 32-bit integers} } */ -+ svld1_gather_index (pg, f32_ptr, s64); /* { dg-error {passing 'svint64_t' to argument 3 of 'svld1_gather_index', which when loading 'svfloat32_t' expects a vector of 32-bit integers} } */ -+ svld1_gather_index (pg, f32_ptr, u64); /* { dg-error {passing 'svuint64_t' to argument 3 of 'svld1_gather_index', which when loading 'svfloat32_t' expects a vector of 32-bit integers} } */ -+ svld1_gather_index (pg, f32_ptr, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svld1_gather_index', which when loading 'svfloat32_t' expects a vector of 32-bit integers} } */ -+ -+ svld1_gather_index (pg, s64_ptr, s32); /* { dg-error {passing 'svint32_t' to argument 3 of 'svld1_gather_index', which when loading 'svint64_t' expects a vector of 64-bit integers} } */ -+ svld1_gather_index (pg, s64_ptr, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svld1_gather_index', which when loading 'svint64_t' expects a vector of 64-bit integers} } */ -+ svld1_gather_index (pg, s64_ptr, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svld1_gather_index', which when loading 'svint64_t' expects a vector of 64-bit integers} } */ -+ svld1_gather_index (pg, s64_ptr, s64); -+ svld1_gather_index (pg, s64_ptr, u64); -+ svld1_gather_index (pg, s64_ptr, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svld1_gather_index', which when loading 'svint64_t' expects a vector of 64-bit integers} } */ -+ -+ svld1_gather_index (pg, u64_ptr, s32); /* { dg-error {passing 'svint32_t' to argument 3 of 'svld1_gather_index', which when loading 'svuint64_t' expects a vector of 64-bit integers} } */ -+ svld1_gather_index (pg, u64_ptr, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svld1_gather_index', which when loading 'svuint64_t' expects a vector of 64-bit integers} } */ -+ svld1_gather_index (pg, u64_ptr, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svld1_gather_index', which when loading 'svuint64_t' expects a vector of 64-bit integers} } */ -+ svld1_gather_index (pg, u64_ptr, s64); -+ svld1_gather_index (pg, u64_ptr, u64); -+ svld1_gather_index (pg, u64_ptr, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svld1_gather_index', which when loading 'svuint64_t' expects a vector of 64-bit integers} } */ -+ -+ svld1_gather_index (pg, f64_ptr, s32); /* { dg-error {passing 'svint32_t' to argument 3 of 'svld1_gather_index', which when loading 'svfloat64_t' expects a vector of 64-bit integers} } */ -+ svld1_gather_index (pg, f64_ptr, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svld1_gather_index', which when loading 'svfloat64_t' expects a vector of 64-bit integers} } */ -+ svld1_gather_index (pg, f64_ptr, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svld1_gather_index', which when loading 'svfloat64_t' expects a vector of 64-bit integers} } */ -+ svld1_gather_index (pg, f64_ptr, s64); -+ svld1_gather_index (pg, f64_ptr, u64); -+ svld1_gather_index (pg, f64_ptr, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svld1_gather_index', which when loading 'svfloat64_t' expects a vector of 64-bit integers} } */ -+ -+ return svld1_gather_index (pg, s32_ptr, s32); /* { dg-error {incompatible types when returning type 'svint32_t' but 'svuint32_t' was expected} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_replicate_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_replicate_1.c -new file mode 100644 -index 000000000..d4ff76ea8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/load_replicate_1.c -@@ -0,0 +1,23 @@ -+/* { dg-do compile } */ -+/* { dg-options "-std=c99" } */ -+ -+#include -+ -+struct s { signed char x; }; -+ -+svuint8_t -+f1 (svbool_t pg, signed char *s8_ptr, void *void_ptr, struct s *s_ptr, -+ float *f32_ptr, _Complex float *cf32_ptr, int **ptr_ptr) -+{ -+ svld1rq (pg); /* { dg-error {too few arguments to function 'svld1rq'} } */ -+ svld1rq (pg, s8_ptr, 0); /* { dg-error {too many arguments to function 'svld1rq'} } */ -+ svld1rq (0, s8_ptr); /* { dg-error {passing 'int' to argument 1 of 'svld1rq', which expects 'svbool_t'} } */ -+ svld1rq (pg, 0); /* { dg-error {passing 'int' to argument 2 of 'svld1rq', which expects a pointer type} } */ -+ svld1rq (pg, (int *) 0); -+ svld1rq (pg, void_ptr); /* { dg-error {passing 'void \*' to argument 2 of 'svld1rq', but 'void' is not a valid SVE element type} } */ -+ svld1rq (pg, s_ptr); /* { dg-error {passing 'struct s \*' to argument 2 of 'svld1rq', but 'struct s' is not a valid SVE element type} } */ -+ svld1rq (pg, f32_ptr); -+ svld1rq (pg, cf32_ptr); /* { dg-error {passing '_Complex float \*' to argument 2 of 'svld1rq', but 'complex float' is not a valid SVE element type} } */ -+ svld1rq (pg, ptr_ptr); /* { dg-error {passing 'int \*\*' to argument 2 of 'svld1rq', but 'int \*' is not a valid SVE element type} } */ -+ return svld1rq (pg, s8_ptr); /* { dg-error {incompatible types when returning type 'svint8_t' but 'svuint8_t' was expected} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_1.c -new file mode 100644 -index 000000000..5b0b00e96 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_1.c -@@ -0,0 +1,58 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-march=armv8.2-a+sve+i8mm+f32mm+f64mm" } */ -+ -+#include -+ -+svuint32_t -+f1 (svint32_t s32, svuint8_t u8, svint8_t s8, svuint32_t u32) -+{ -+ svmmla_s32 (s32); /* { dg-error {too few arguments to function 'svmmla_s32'} } */ -+ svmmla_s32 (s32, s8, s8, u32); /* { dg-error {too many arguments to function 'svmmla_s32'} } */ -+ svmmla_s32 (s32, u32, s8); /* { dg-error {incompatible type for argument 2 of 'svmmla_s32'} } */ -+ svmmla_s32 (s32, u8, s8); /* { dg-error {incompatible type for argument 2 of 'svmmla_s32'} } */ -+ svmmla_s32 (s32, s8, u8); /* { dg-error {incompatible type for argument 3 of 'svmmla_s32'} } */ -+ svmmla_s32 (s32, s8, s32); /* { dg-error {incompatible type for argument 3 of 'svmmla_s32'} } */ -+ svmmla_s32 (s32, s8, 0); /* { dg-error {incompatible type for argument 3 of 'svmmla_s32'} } */ -+ svmmla_s32 (s32, s8, s8); -+ return svmmla_s32 (s32, s8, s8); /* { dg-error {incompatible types when returning type 'svint32_t' but 'svuint32_t' was expected} } */ -+} -+ -+void -+f2 (svbool_t pg, svint8_t s8, svuint8_t u8, svuint32_t u32, svint32_t s32, -+ svfloat16_t f16, svfloat32_t f32, svfloat64_t f64) -+{ -+ svmmla (s32, s8); /* { dg-error {too few arguments to function 'svmmla'} } */ -+ svmmla (s32, s8, s8, s8); /* { dg-error {too many arguments to function 'svmmla'} } */ -+ svmmla (0, s8, s8); /* { dg-error {passing 'int' to argument 1 of 'svmmla', which expects an SVE vector type} } */ -+ svmmla (pg, s8, s8); /* { dg-error {'svmmla' has no form that takes 'svbool_t' arguments} } */ -+ svmmla (u8, s8, s8); /* { dg-error {'svmmla' has no form that takes 'svuint8_t' arguments} } */ -+ -+ svmmla (s32, 0, s8); /* { dg-error {passing 'int' to argument 2 of 'svmmla', which expects an SVE vector type} } */ -+ svmmla (s32, u8, s8); /* { dg-error {arguments 1 and 2 of 'svmmla' must have the same signedness, but the values passed here have type 'svint32_t' and 'svuint8_t' respectively} } */ -+ svmmla (s32, s8, u8); /* { dg-error {arguments 1 and 3 of 'svmmla' must have the same signedness, but the values passed here have type 'svint32_t' and 'svuint8_t' respectively} } */ -+ svmmla (s32, s8, 0); /* { dg-error {passing 'int' to argument 3 of 'svmmla', which expects an SVE vector type} } */ -+ svmmla (s32, s8, s8); -+ svmmla (s32, s32, s32); /* { dg-error {passing 'svint32_t' instead of the expected 'svint8_t' to argument 2 of 'svmmla', after passing 'svint32_t' to argument 1} } */ -+ svmmla (s32, u32, u32); /* { dg-error {passing 'svuint32_t' instead of the expected 'svint8_t' to argument 2 of 'svmmla', after passing 'svint32_t' to argument 1} } */ -+ -+ svmmla (u32, 0, u8); /* { dg-error {passing 'int' to argument 2 of 'svmmla', which expects an SVE vector type} } */ -+ svmmla (u32, s8, u8); /* { dg-error {arguments 1 and 2 of 'svmmla' must have the same signedness, but the values passed here have type 'svuint32_t' and 'svint8_t' respectively} } */ -+ svmmla (u32, u8, s8); /* { dg-error {arguments 1 and 3 of 'svmmla' must have the same signedness, but the values passed here have type 'svuint32_t' and 'svint8_t' respectively} } */ -+ svmmla (u32, u8, 0); /* { dg-error {passing 'int' to argument 3 of 'svmmla', which expects an SVE vector type} } */ -+ svmmla (u32, u8, u8); -+ svmmla (u32, s32, s32); /* { dg-error {passing 'svint32_t' instead of the expected 'svuint8_t' to argument 2 of 'svmmla', after passing 'svuint32_t' to argument 1} } */ -+ svmmla (u32, u32, u32); /* { dg-error {passing 'svuint32_t' instead of the expected 'svuint8_t' to argument 2 of 'svmmla', after passing 'svuint32_t' to argument 1} } */ -+ -+ svmmla (f16, s8, s8); /* { dg-error {'svmmla' has no form that takes 'svfloat16_t' arguments} } */ -+ svmmla (f32, s8, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svmmla', but previous arguments had type 'svfloat32_t'} } */ -+ svmmla (f32, s32, s32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svmmla', but previous arguments had type 'svfloat32_t'} } */ -+ svmmla (f32, f16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svmmla', but previous arguments had type 'svfloat32_t'} } */ -+ svmmla (f64, f16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svmmla', but previous arguments had type 'svfloat64_t'} } */ -+ svmmla (f32, f32, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmmla', but previous arguments had type 'svfloat32_t'} } */ -+ svmmla (f64, f32, f16); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svmmla', but previous arguments had type 'svfloat64_t'} } */ -+ svmmla (f64, f64, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmmla', but previous arguments had type 'svfloat64_t'} } */ -+ -+ svmmla (f16, f16, f16); /* { dg-error {'svmmla' has no form that takes 'svfloat16_t' arguments} } */ -+ svmmla (f32, f32, f32); -+ svmmla (f64, f64, f64); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_2.c -new file mode 100644 -index 000000000..b54725736 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_2.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-march=armv8.2-a+sve" } */ -+ -+#include -+ -+void -+f1 (svint32_t s32, svint8_t s8) -+{ -+ svmmla_s32 (s32, s8, s8); /* { dg-error {ACLE function 'svmmla_s32' requires ISA extension 'i8mm'} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_3.c -new file mode 100644 -index 000000000..d1c8297cc ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_3.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-march=armv8.2-a+sve" } */ -+ -+#include -+ -+void -+f1 (svint32_t s32, svint8_t s8) -+{ -+ svmmla (s32, s8, s8); /* { dg-error {ACLE function 'svmmla_s32' requires ISA extension 'i8mm'} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_4.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_4.c -new file mode 100644 -index 000000000..e6c3f5f94 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_4.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-march=armv8.2-a+sve" } */ -+ -+#include -+ -+void -+f1 (svfloat32_t f32) -+{ -+ svmmla_f32 (f32, f32, f32); /* { dg-error {ACLE function 'svmmla_f32' requires ISA extension 'f32mm'} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_5.c -new file mode 100644 -index 000000000..8f6f42366 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_5.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-march=armv8.2-a+sve" } */ -+ -+#include -+ -+void -+f1 (svfloat32_t f32) -+{ -+ svmmla (f32, f32, f32); /* { dg-error {ACLE function 'svmmla_f32' requires ISA extension 'f32mm'} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_6.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_6.c -new file mode 100644 -index 000000000..7ebeb4981 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_6.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-march=armv8.2-a+sve" } */ -+ -+#include -+ -+void -+f1 (svfloat64_t f64) -+{ -+ svmmla_f64 (f64, f64, f64); /* { dg-error {ACLE function 'svmmla_f64' requires ISA extension 'f64mm'} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_7.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_7.c -new file mode 100644 -index 000000000..e64ec1ea6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_7.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-march=armv8.2-a+sve" } */ -+ -+#include -+ -+void -+f1 (svfloat64_t f64) -+{ -+ svmmla (f64, f64, f64); /* { dg-error {ACLE function 'svmmla_f64' requires ISA extension 'f64mm'} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/pattern_pred_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/pattern_pred_1.c -new file mode 100644 -index 000000000..99b61bdf1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/pattern_pred_1.c -@@ -0,0 +1,14 @@ -+#include -+ -+void -+test () -+{ -+ svptrue_pat_b16 ((enum svpattern) -1); /* { dg-error {passing 4294967295 to argument 1 of 'svptrue_pat_b16', which expects a valid 'enum svpattern' value} } */ -+ svptrue_pat_b16 ((enum svpattern) 0); -+ svptrue_pat_b16 ((enum svpattern) 13); -+ svptrue_pat_b16 ((enum svpattern) 14); /* { dg-error {passing 14 to argument 1 of 'svptrue_pat_b16', which expects a valid 'enum svpattern' value} } */ -+ svptrue_pat_b16 ((enum svpattern) 28); /* { dg-error {passing 28 to argument 1 of 'svptrue_pat_b16', which expects a valid 'enum svpattern' value} } */ -+ svptrue_pat_b16 ((enum svpattern) 29); -+ svptrue_pat_b16 ((enum svpattern) 31); -+ svptrue_pat_b16 ((enum svpattern) 32); /* { dg-error {passing 32 to argument 1 of 'svptrue_pat_b16', which expects a valid 'enum svpattern' value} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_1.c -new file mode 100644 -index 000000000..316f77fc7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_1.c -@@ -0,0 +1,17 @@ -+/* { dg-do compile } */ -+/* { dg-options "-std=c99" } */ -+ -+#include -+ -+void -+f1 (svbool_t pg, int32_t *s32_ptr, enum svprfop op) -+{ -+ svprfb (pg, s32_ptr, op); /* { dg-error {argument 3 of 'svprfb' must be an integer constant expression} } */ -+ svprfb (pg, s32_ptr, (enum svprfop) -1); /* { dg-error {passing 4294967295 to argument 3 of 'svprfb', which expects a valid 'enum svprfop' value} } */ -+ svprfb (pg, s32_ptr, (enum svprfop) 0); -+ svprfb (pg, s32_ptr, (enum svprfop) 5); -+ svprfb (pg, s32_ptr, (enum svprfop) 6); /* { dg-error {passing 6 to argument 3 of 'svprfb', which expects a valid 'enum svprfop' value} } */ -+ svprfb (pg, s32_ptr, (enum svprfop) 7); /* { dg-error {passing 7 to argument 3 of 'svprfb', which expects a valid 'enum svprfop' value} } */ -+ svprfb (pg, s32_ptr, (enum svprfop) 8); -+ svprfb (pg, s32_ptr, (enum svprfop) 14); /* { dg-error {passing 14 to argument 3 of 'svprfb', which expects a valid 'enum svprfop' value} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_index_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_index_1.c -new file mode 100644 -index 000000000..c33c95440 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_index_1.c -@@ -0,0 +1,53 @@ -+/* { dg-do compile } */ -+/* { dg-options "-std=c99" } */ -+ -+#include -+ -+struct s { int i; }; -+ -+void -+f1 (svbool_t pg, int32_t *s32_ptr, void *void_ptr, void **ptr_ptr, -+ svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16, svfloat16_t f16, -+ svint32_t s32, svuint32_t u32, svfloat32_t f32, -+ svint64_t s64, svuint64_t u64, svfloat64_t f64, enum svprfop op, -+ struct s s) -+{ -+ svprfh_gather_index (pg, s32_ptr, s32); /* { dg-error {too few arguments to function 'svprfh_gather_index'} } */ -+ svprfh_gather_index (pg, s32_ptr, s32, SV_PLDL1KEEP, 0); /* { dg-error {too many arguments to function 'svprfh_gather_index'} } */ -+ svprfh_gather_index (0, s32_ptr, s32, SV_PLDL1KEEP); /* { dg-error {passing 'int' to argument 1 of 'svprfh_gather_index', which expects 'svbool_t'} } */ -+ svprfh_gather_index (pg, 0, s32, SV_PLDL1KEEP); -+ svprfh_gather_index (pg, (int *) 0, s32, SV_PLDL1KEEP); -+ svprfh_gather_index (pg, void_ptr, s32, SV_PLDL1KEEP); -+ svprfh_gather_index (pg, ptr_ptr, s32, SV_PLDL1KEEP); -+ svprfh_gather_index (pg, s, s32, SV_PLDL1KEEP); /* { dg-error {passing 'struct s' to argument 2 of 'svprfh_gather_index', which expects a vector or pointer base address} } */ -+ -+ svprfh_gather_index (pg, s32_ptr, s8, SV_PLDL1KEEP); /* { dg-error {passing 'svint8_t' to argument 3 of 'svprfh_gather_index', which expects a vector of 32-bit or 64-bit integers} } */ -+ svprfh_gather_index (pg, s32_ptr, u8, SV_PLDL1KEEP); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svprfh_gather_index', which expects a vector of 32-bit or 64-bit integers} } */ -+ svprfh_gather_index (pg, s32_ptr, s16, SV_PLDL1KEEP); /* { dg-error {passing 'svint16_t' to argument 3 of 'svprfh_gather_index', which expects a vector of 32-bit or 64-bit integers} } */ -+ svprfh_gather_index (pg, s32_ptr, u16, SV_PLDL1KEEP); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svprfh_gather_index', which expects a vector of 32-bit or 64-bit integers} } */ -+ svprfh_gather_index (pg, s32_ptr, f16, SV_PLDL1KEEP); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svprfh_gather_index', which expects a vector of integers} } */ -+ svprfh_gather_index (pg, s32_ptr, s32, SV_PLDL1KEEP); -+ svprfh_gather_index (pg, s32_ptr, u32, SV_PLDL1KEEP); -+ svprfh_gather_index (pg, s32_ptr, f32, SV_PLDL1KEEP); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svprfh_gather_index', which expects a vector of integers} } */ -+ svprfh_gather_index (pg, s32_ptr, s64, SV_PLDL1KEEP); -+ svprfh_gather_index (pg, s32_ptr, u64, SV_PLDL1KEEP); -+ svprfh_gather_index (pg, s32_ptr, f64, SV_PLDL1KEEP); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svprfh_gather_index', which expects a vector of integers} } */ -+ -+ svprfh_gather_index (pg, u8, 0, SV_PLDL1KEEP); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svprfh_gather_index', which expects 'svuint32_t' or 'svuint64_t'} } */ -+ svprfh_gather_index (pg, u16, 0, SV_PLDL1KEEP); /* { dg-error {passing 'svuint16_t' to argument 2 of 'svprfh_gather_index', which expects 'svuint32_t' or 'svuint64_t'} } */ -+ svprfh_gather_index (pg, s32, 0, SV_PLDL1KEEP); /* { dg-error {passing 'svint32_t' to argument 2 of 'svprfh_gather_index', which expects 'svuint32_t' or 'svuint64_t'} } */ -+ svprfh_gather_index (pg, u32, 0, SV_PLDL1KEEP); -+ svprfh_gather_index (pg, f32, 0, SV_PLDL1KEEP); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svprfh_gather_index', which expects 'svuint32_t' or 'svuint64_t'} } */ -+ svprfh_gather_index (pg, s64, 0, SV_PLDL1KEEP); /* { dg-error {passing 'svint64_t' to argument 2 of 'svprfh_gather_index', which expects 'svuint32_t' or 'svuint64_t'} } */ -+ svprfh_gather_index (pg, u64, 0, SV_PLDL1KEEP); -+ svprfh_gather_index (pg, f64, 0, SV_PLDL1KEEP); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svprfh_gather_index', which expects 'svuint32_t' or 'svuint64_t'} } */ -+ -+ svprfh_gather_index (pg, s32_ptr, s32, op); /* { dg-error {argument 4 of 'svprfh_gather_index' must be an integer constant expression} } */ -+ svprfh_gather_index (pg, s32_ptr, s32, (enum svprfop) -1); /* { dg-error {passing 4294967295 to argument 4 of 'svprfh_gather_index', which expects a valid 'enum svprfop' value} } */ -+ svprfh_gather_index (pg, s32_ptr, s32, (enum svprfop) 0); -+ svprfh_gather_index (pg, s32_ptr, s32, (enum svprfop) 5); -+ svprfh_gather_index (pg, s32_ptr, s32, (enum svprfop) 6); /* { dg-error {passing 6 to argument 4 of 'svprfh_gather_index', which expects a valid 'enum svprfop' value} } */ -+ svprfh_gather_index (pg, s32_ptr, s32, (enum svprfop) 7); /* { dg-error {passing 7 to argument 4 of 'svprfh_gather_index', which expects a valid 'enum svprfop' value} } */ -+ svprfh_gather_index (pg, s32_ptr, s32, (enum svprfop) 8); -+ svprfh_gather_index (pg, s32_ptr, s32, (enum svprfop) 14); /* { dg-error {passing 14 to argument 4 of 'svprfh_gather_index', which expects a valid 'enum svprfop' value} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_index_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_index_2.c -new file mode 100644 -index 000000000..3d7797305 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_index_2.c -@@ -0,0 +1,17 @@ -+/* { dg-do compile } */ -+/* { dg-options "-std=c99" } */ -+ -+#include -+ -+void -+f1 (svbool_t pg, int32_t *s32_ptr, svint32_t s32, enum svprfop op) -+{ -+ svprfh_gather_s32index (pg, s32_ptr, s32, op); /* { dg-error {argument 4 of 'svprfh_gather_s32index' must be an integer constant expression} } */ -+ svprfh_gather_s32index (pg, s32_ptr, s32, (enum svprfop) -1); /* { dg-error {passing 4294967295 to argument 4 of 'svprfh_gather_s32index', which expects a valid 'enum svprfop' value} } */ -+ svprfh_gather_s32index (pg, s32_ptr, s32, (enum svprfop) 0); -+ svprfh_gather_s32index (pg, s32_ptr, s32, (enum svprfop) 5); -+ svprfh_gather_s32index (pg, s32_ptr, s32, (enum svprfop) 6); /* { dg-error {passing 6 to argument 4 of 'svprfh_gather_s32index', which expects a valid 'enum svprfop' value} } */ -+ svprfh_gather_s32index (pg, s32_ptr, s32, (enum svprfop) 7); /* { dg-error {passing 7 to argument 4 of 'svprfh_gather_s32index', which expects a valid 'enum svprfop' value} } */ -+ svprfh_gather_s32index (pg, s32_ptr, s32, (enum svprfop) 8); -+ svprfh_gather_s32index (pg, s32_ptr, s32, (enum svprfop) 14); /* { dg-error {passing 14 to argument 4 of 'svprfh_gather_s32index', which expects a valid 'enum svprfop' value} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_1.c -new file mode 100644 -index 000000000..cc61901cb ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_1.c -@@ -0,0 +1,53 @@ -+/* { dg-do compile } */ -+/* { dg-options "-std=c99" } */ -+ -+#include -+ -+struct s { int i; }; -+ -+void -+f1 (svbool_t pg, int32_t *s32_ptr, void *void_ptr, void **ptr_ptr, -+ svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16, svfloat16_t f16, -+ svint32_t s32, svuint32_t u32, svfloat32_t f32, -+ svint64_t s64, svuint64_t u64, svfloat64_t f64, enum svprfop op, -+ struct s s) -+{ -+ svprfb_gather_offset (pg, s32_ptr, s32); /* { dg-error {too few arguments to function 'svprfb_gather_offset'} } */ -+ svprfb_gather_offset (pg, s32_ptr, s32, SV_PLDL1KEEP, 0); /* { dg-error {too many arguments to function 'svprfb_gather_offset'} } */ -+ svprfb_gather_offset (0, s32_ptr, s32, SV_PLDL1KEEP); /* { dg-error {passing 'int' to argument 1 of 'svprfb_gather_offset', which expects 'svbool_t'} } */ -+ svprfb_gather_offset (pg, 0, s32, SV_PLDL1KEEP); -+ svprfb_gather_offset (pg, (int *) 0, s32, SV_PLDL1KEEP); -+ svprfb_gather_offset (pg, void_ptr, s32, SV_PLDL1KEEP); -+ svprfb_gather_offset (pg, ptr_ptr, s32, SV_PLDL1KEEP); -+ svprfb_gather_offset (pg, s, s32, SV_PLDL1KEEP); /* { dg-error {passing 'struct s' to argument 2 of 'svprfb_gather_offset', which expects a vector or pointer base address} } */ -+ -+ svprfb_gather_offset (pg, s32_ptr, s8, SV_PLDL1KEEP); /* { dg-error {passing 'svint8_t' to argument 3 of 'svprfb_gather_offset', which expects a vector of 32-bit or 64-bit integers} } */ -+ svprfb_gather_offset (pg, s32_ptr, u8, SV_PLDL1KEEP); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svprfb_gather_offset', which expects a vector of 32-bit or 64-bit integers} } */ -+ svprfb_gather_offset (pg, s32_ptr, s16, SV_PLDL1KEEP); /* { dg-error {passing 'svint16_t' to argument 3 of 'svprfb_gather_offset', which expects a vector of 32-bit or 64-bit integers} } */ -+ svprfb_gather_offset (pg, s32_ptr, u16, SV_PLDL1KEEP); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svprfb_gather_offset', which expects a vector of 32-bit or 64-bit integers} } */ -+ svprfb_gather_offset (pg, s32_ptr, f16, SV_PLDL1KEEP); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svprfb_gather_offset', which expects a vector of integers} } */ -+ svprfb_gather_offset (pg, s32_ptr, s32, SV_PLDL1KEEP); -+ svprfb_gather_offset (pg, s32_ptr, u32, SV_PLDL1KEEP); -+ svprfb_gather_offset (pg, s32_ptr, f32, SV_PLDL1KEEP); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svprfb_gather_offset', which expects a vector of integers} } */ -+ svprfb_gather_offset (pg, s32_ptr, s64, SV_PLDL1KEEP); -+ svprfb_gather_offset (pg, s32_ptr, u64, SV_PLDL1KEEP); -+ svprfb_gather_offset (pg, s32_ptr, f64, SV_PLDL1KEEP); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svprfb_gather_offset', which expects a vector of integers} } */ -+ -+ svprfb_gather_offset (pg, u8, 0, SV_PLDL1KEEP); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svprfb_gather_offset', which expects 'svuint32_t' or 'svuint64_t'} } */ -+ svprfb_gather_offset (pg, u16, 0, SV_PLDL1KEEP); /* { dg-error {passing 'svuint16_t' to argument 2 of 'svprfb_gather_offset', which expects 'svuint32_t' or 'svuint64_t'} } */ -+ svprfb_gather_offset (pg, s32, 0, SV_PLDL1KEEP); /* { dg-error {passing 'svint32_t' to argument 2 of 'svprfb_gather_offset', which expects 'svuint32_t' or 'svuint64_t'} } */ -+ svprfb_gather_offset (pg, u32, 0, SV_PLDL1KEEP); -+ svprfb_gather_offset (pg, f32, 0, SV_PLDL1KEEP); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svprfb_gather_offset', which expects 'svuint32_t' or 'svuint64_t'} } */ -+ svprfb_gather_offset (pg, s64, 0, SV_PLDL1KEEP); /* { dg-error {passing 'svint64_t' to argument 2 of 'svprfb_gather_offset', which expects 'svuint32_t' or 'svuint64_t'} } */ -+ svprfb_gather_offset (pg, u64, 0, SV_PLDL1KEEP); -+ svprfb_gather_offset (pg, f64, 0, SV_PLDL1KEEP); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svprfb_gather_offset', which expects 'svuint32_t' or 'svuint64_t'} } */ -+ -+ svprfb_gather_offset (pg, s32_ptr, s32, op); /* { dg-error {argument 4 of 'svprfb_gather_offset' must be an integer constant expression} } */ -+ svprfb_gather_offset (pg, s32_ptr, s32, (enum svprfop) -1); /* { dg-error {passing 4294967295 to argument 4 of 'svprfb_gather_offset', which expects a valid 'enum svprfop' value} } */ -+ svprfb_gather_offset (pg, s32_ptr, s32, (enum svprfop) 0); -+ svprfb_gather_offset (pg, s32_ptr, s32, (enum svprfop) 5); -+ svprfb_gather_offset (pg, s32_ptr, s32, (enum svprfop) 6); /* { dg-error {passing 6 to argument 4 of 'svprfb_gather_offset', which expects a valid 'enum svprfop' value} } */ -+ svprfb_gather_offset (pg, s32_ptr, s32, (enum svprfop) 7); /* { dg-error {passing 7 to argument 4 of 'svprfb_gather_offset', which expects a valid 'enum svprfop' value} } */ -+ svprfb_gather_offset (pg, s32_ptr, s32, (enum svprfop) 8); -+ svprfb_gather_offset (pg, s32_ptr, s32, (enum svprfop) 14); /* { dg-error {passing 14 to argument 4 of 'svprfb_gather_offset', which expects a valid 'enum svprfop' value} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_2.c -new file mode 100644 -index 000000000..b74721fad ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_2.c -@@ -0,0 +1,37 @@ -+/* { dg-do compile } */ -+/* { dg-options "-std=c99" } */ -+ -+#include -+ -+void -+f1 (svbool_t pg, svint8_t s8, svuint8_t u8, -+ svint16_t s16, svuint16_t u16, svfloat16_t f16, -+ svint32_t s32, svuint32_t u32, svfloat32_t f32, -+ svint64_t s64, svuint64_t u64, svfloat64_t f64, enum svprfop op) -+{ -+ svprfb_gather (pg, u32); /* { dg-error {too few arguments to function 'svprfb_gather'} } */ -+ svprfb_gather (pg, u32, SV_PLDL1KEEP, 0); /* { dg-error {too many arguments to function 'svprfb_gather'} } */ -+ svprfb_gather (0, u32, SV_PLDL1KEEP); /* { dg-error {passing 'int' to argument 1 of 'svprfb_gather', which expects 'svbool_t'} } */ -+ svprfb_gather (pg, 0, SV_PLDL1KEEP); /* { dg-error {passing 'int' to argument 2 of 'svprfb_gather', which expects an SVE vector type} } */ -+ -+ svprfb_gather (pg, s8, SV_PLDL1KEEP); /* { dg-error {passing 'svint8_t' to argument 2 of 'svprfb_gather', which expects 'svuint32_t' or 'svuint64_t'} } */ -+ svprfb_gather (pg, u8, SV_PLDL1KEEP); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svprfb_gather', which expects 'svuint32_t' or 'svuint64_t'} } */ -+ svprfb_gather (pg, s16, SV_PLDL1KEEP); /* { dg-error {passing 'svint16_t' to argument 2 of 'svprfb_gather', which expects 'svuint32_t' or 'svuint64_t'} } */ -+ svprfb_gather (pg, u16, SV_PLDL1KEEP); /* { dg-error {passing 'svuint16_t' to argument 2 of 'svprfb_gather', which expects 'svuint32_t' or 'svuint64_t'} } */ -+ svprfb_gather (pg, f16, SV_PLDL1KEEP); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svprfb_gather', which expects 'svuint32_t' or 'svuint64_t'} } */ -+ svprfb_gather (pg, s32, SV_PLDL1KEEP); /* { dg-error {passing 'svint32_t' to argument 2 of 'svprfb_gather', which expects 'svuint32_t' or 'svuint64_t'} } */ -+ svprfb_gather (pg, u32, SV_PLDL1KEEP); -+ svprfb_gather (pg, f32, SV_PLDL1KEEP); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svprfb_gather', which expects 'svuint32_t' or 'svuint64_t'} } */ -+ svprfb_gather (pg, s64, SV_PLDL1KEEP); /* { dg-error {passing 'svint64_t' to argument 2 of 'svprfb_gather', which expects 'svuint32_t' or 'svuint64_t'} } */ -+ svprfb_gather (pg, u64, SV_PLDL1KEEP); -+ svprfb_gather (pg, f64, SV_PLDL1KEEP); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svprfb_gather', which expects 'svuint32_t' or 'svuint64_t'} } */ -+ -+ svprfb_gather (pg, u32, op); /* { dg-error {argument 3 of 'svprfb_gather' must be an integer constant expression} } */ -+ svprfb_gather (pg, u32, (enum svprfop) -1); /* { dg-error {passing 4294967295 to argument 3 of 'svprfb_gather', which expects a valid 'enum svprfop' value} } */ -+ svprfb_gather (pg, u32, (enum svprfop) 0); -+ svprfb_gather (pg, u32, (enum svprfop) 5); -+ svprfb_gather (pg, u32, (enum svprfop) 6); /* { dg-error {passing 6 to argument 3 of 'svprfb_gather', which expects a valid 'enum svprfop' value} } */ -+ svprfb_gather (pg, u32, (enum svprfop) 7); /* { dg-error {passing 7 to argument 3 of 'svprfb_gather', which expects a valid 'enum svprfop' value} } */ -+ svprfb_gather (pg, u32, (enum svprfop) 8); -+ svprfb_gather (pg, u32, (enum svprfop) 14); /* { dg-error {passing 14 to argument 3 of 'svprfb_gather', which expects a valid 'enum svprfop' value} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_3.c -new file mode 100644 -index 000000000..24b4aa190 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_3.c -@@ -0,0 +1,17 @@ -+/* { dg-do compile } */ -+/* { dg-options "-std=c99" } */ -+ -+#include -+ -+void -+f1 (svbool_t pg, int32_t *s32_ptr, svint32_t s32, enum svprfop op) -+{ -+ svprfb_gather_s32offset (pg, s32_ptr, s32, op); /* { dg-error {argument 4 of 'svprfb_gather_s32offset' must be an integer constant expression} } */ -+ svprfb_gather_s32offset (pg, s32_ptr, s32, (enum svprfop) -1); /* { dg-error {passing 4294967295 to argument 4 of 'svprfb_gather_s32offset', which expects a valid 'enum svprfop' value} } */ -+ svprfb_gather_s32offset (pg, s32_ptr, s32, (enum svprfop) 0); -+ svprfb_gather_s32offset (pg, s32_ptr, s32, (enum svprfop) 5); -+ svprfb_gather_s32offset (pg, s32_ptr, s32, (enum svprfop) 6); /* { dg-error {passing 6 to argument 4 of 'svprfb_gather_s32offset', which expects a valid 'enum svprfop' value} } */ -+ svprfb_gather_s32offset (pg, s32_ptr, s32, (enum svprfop) 7); /* { dg-error {passing 7 to argument 4 of 'svprfb_gather_s32offset', which expects a valid 'enum svprfop' value} } */ -+ svprfb_gather_s32offset (pg, s32_ptr, s32, (enum svprfop) 8); -+ svprfb_gather_s32offset (pg, s32_ptr, s32, (enum svprfop) 14); /* { dg-error {passing 14 to argument 4 of 'svprfb_gather_s32offset', which expects a valid 'enum svprfop' value} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_4.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_4.c -new file mode 100644 -index 000000000..63ccdc5a4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_4.c -@@ -0,0 +1,17 @@ -+/* { dg-do compile } */ -+/* { dg-options "-std=c99" } */ -+ -+#include -+ -+void -+f1 (svbool_t pg, svuint32_t u32, enum svprfop op) -+{ -+ svprfb_gather_u32base (pg, u32, op); /* { dg-error {argument 3 of 'svprfb_gather_u32base' must be an integer constant expression} } */ -+ svprfb_gather_u32base (pg, u32, (enum svprfop) -1); /* { dg-error {passing 4294967295 to argument 3 of 'svprfb_gather_u32base', which expects a valid 'enum svprfop' value} } */ -+ svprfb_gather_u32base (pg, u32, (enum svprfop) 0); -+ svprfb_gather_u32base (pg, u32, (enum svprfop) 5); -+ svprfb_gather_u32base (pg, u32, (enum svprfop) 6); /* { dg-error {passing 6 to argument 3 of 'svprfb_gather_u32base', which expects a valid 'enum svprfop' value} } */ -+ svprfb_gather_u32base (pg, u32, (enum svprfop) 7); /* { dg-error {passing 7 to argument 3 of 'svprfb_gather_u32base', which expects a valid 'enum svprfop' value} } */ -+ svprfb_gather_u32base (pg, u32, (enum svprfop) 8); -+ svprfb_gather_u32base (pg, u32, (enum svprfop) 14); /* { dg-error {passing 14 to argument 3 of 'svprfb_gather_u32base', which expects a valid 'enum svprfop' value} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/reduction_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/reduction_1.c -new file mode 100644 -index 000000000..ab0ef304a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/reduction_1.c -@@ -0,0 +1,19 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+void -+f1 (svbool_t pg, svint32_t s32, svuint32_t u32, svfloat32_t f32, -+ svuint32x2_t u32x2) -+{ -+ svorv (pg); /* { dg-error {too few arguments to function 'svorv'} } */ -+ svorv (pg, u32, u32); /* { dg-error {too many arguments to function 'svorv'} } */ -+ svorv (0, u32); /* { dg-error {passing 'int' to argument 1 of 'svorv', which expects 'svbool_t'} } */ -+ svorv (u32, u32); /* { dg-error {passing 'svuint32_t' to argument 1 of 'svorv', which expects 'svbool_t'} } */ -+ svorv (pg, 0); /* { dg-error {passing 'int' to argument 2 of 'svorv', which expects an SVE vector type} } */ -+ svorv (pg, pg); /* { dg-error {'svorv' has no form that takes 'svbool_t' arguments} } */ -+ svorv (pg, s32); -+ svorv (pg, u32); -+ svorv (pg, f32); /* { dg-error {'svorv' has no form that takes 'svfloat32_t' arguments} } */ -+ svorv (pg, u32x2); /* { dg-error {passing 'svuint32x2_t' to argument 2 of 'svorv', which expects a single SVE vector rather than a tuple} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/reduction_wide_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/reduction_wide_1.c -new file mode 100644 -index 000000000..f99a2887b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/reduction_wide_1.c -@@ -0,0 +1,19 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+void -+f1 (svbool_t pg, svint32_t s32, svuint32_t u32, svfloat32_t f32, -+ svuint32x2_t u32x2) -+{ -+ svaddv (pg); /* { dg-error {too few arguments to function 'svaddv'} } */ -+ svaddv (pg, u32, u32); /* { dg-error {too many arguments to function 'svaddv'} } */ -+ svaddv (0, u32); /* { dg-error {passing 'int' to argument 1 of 'svaddv', which expects 'svbool_t'} } */ -+ svaddv (u32, u32); /* { dg-error {passing 'svuint32_t' to argument 1 of 'svaddv', which expects 'svbool_t'} } */ -+ svaddv (pg, 0); /* { dg-error {passing 'int' to argument 2 of 'svaddv', which expects an SVE vector type} } */ -+ svaddv (pg, pg); /* { dg-error {'svaddv' has no form that takes 'svbool_t' arguments} } */ -+ svaddv (pg, s32); -+ svaddv (pg, u32); -+ svaddv (pg, f32); -+ svaddv (pg, u32x2); /* { dg-error {passing 'svuint32x2_t' to argument 2 of 'svaddv', which expects a single SVE vector rather than a tuple} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_1.c -new file mode 100644 -index 000000000..f07c76102 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_1.c -@@ -0,0 +1,35 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-std=c99 -Wall -Wextra" } */ -+ -+#include -+ -+svfloat64_t -+f1 (svbool_t pg, svuint8_t u8, svuint8x2_t u8x2, svuint8x3_t u8x3, int x) -+{ -+ const int one = 1; -+ svfloat64_t f64; -+ -+ u8x2 = svset2 (u8x2); /* { dg-error {too few arguments to function 'svset2'} } */ -+ u8x2 = svset2 (u8x2, 1); /* { dg-error {too few arguments to function 'svset2'} } */ -+ u8x2 = svset2 (u8x2, 1, u8, 3); /* { dg-error {too many arguments to function 'svset2'} } */ -+ u8x2 = svset2 (u8, 0, u8); /* { dg-error {passing single vector 'svuint8_t' to argument 1 of 'svset2', which expects a tuple of 2 vectors} } */ -+ u8x2 = svset2 (u8x3, 0, u8); /* { dg-error {passing 'svuint8x3_t' to argument 1 of 'svset2', which expects a tuple of 2 vectors} } */ -+ u8x2 = svset2 (pg, 0, u8); /* { dg-error {passing 'svbool_t' to argument 1 of 'svset2', which expects a tuple of 2 vectors} } */ -+ u8x2 = svset2 (u8x2, 0, u8x2); /* { dg-error {passing 'svuint8x2_t' to argument 3 of 'svset2', which expects a single SVE vector rather than a tuple} } */ -+ u8x2 = svset2 (u8x2, 0, f64); /* { dg-error {passing 'svfloat64_t' instead of the expected 'svuint8_t' to argument 3 of 'svset2', after passing 'svuint8x2_t' to argument 1} } */ -+ u8x2 = svset2 (u8x2, 0, pg); /* { dg-error {passing 'svbool_t' instead of the expected 'svuint8_t' to argument 3 of 'svset2', after passing 'svuint8x2_t' to argument 1} } */ -+ u8x2 = svset2 (u8x2, x, u8); /* { dg-error {argument 2 of 'svset2' must be an integer constant expression} } */ -+ u8x2 = svset2 (u8x2, 0, u8); -+ f64 = svset2 (u8x2, 0, u8); /* { dg-error {incompatible types when assigning to type 'svfloat64_t' from type 'svuint8x2_t'} } */ -+ u8x2 = svset2 (u8x2, 1, u8); -+ u8x2 = svset2 (u8x2, 2, u8); /* { dg-error {passing 2 to argument 2 of 'svset2', which expects a value in the range \[0, 1\]} } */ -+ u8x2 = svset2 (u8x2, 3, u8); /* { dg-error {passing 3 to argument 2 of 'svset2', which expects a value in the range \[0, 1\]} } */ -+ u8x2 = svset2 (u8x2, 4, u8); /* { dg-error {passing 4 to argument 2 of 'svset2', which expects a value in the range \[0, 1\]} } */ -+ u8x2 = svset2 (u8x2, 5, u8); /* { dg-error {passing 5 to argument 2 of 'svset2', which expects a value in the range \[0, 1\]} } */ -+ u8x2 = svset2 (u8x2, ~0U, u8); /* { dg-error {passing [^ ]* to argument 2 of 'svset2', which expects a value in the range \[0, 1\]} } */ -+ u8x2 = svset2 (u8x2, one, u8); /* { dg-error {argument 2 of 'svset2' must be an integer constant expression} } */ -+ u8x2 = svset2 (u8x2, 3 - 2, u8); -+ u8x2 = svset2 (u8x2, 1.0, u8); -+ -+ return f64; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_2.c -new file mode 100644 -index 000000000..ae277eafd ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_2.c -@@ -0,0 +1,37 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-std=c99 -Wall -Wextra" } */ -+ -+#include -+ -+svfloat64_t -+f1 (svbool_t pg, svuint8_t u8, svuint8x2_t u8x2, svint8x2_t s8x2, -+ svuint8x3_t u8x3, int x) -+{ -+ const int one = 1; -+ svfloat64_t f64; -+ -+ u8x2 = svset2_u8 (u8x2); /* { dg-error {too few arguments to function 'svset2_u8'} } */ -+ u8x2 = svset2_u8 (u8x2, 1); /* { dg-error {too few arguments to function 'svset2_u8'} } */ -+ u8x2 = svset2_u8 (u8x2, 1, u8, 3); /* { dg-error {too many arguments to function 'svset2_u8'} } */ -+ u8x2 = svset2_u8 (u8, 0, u8); /* { dg-error {incompatible type for argument 1 of 'svset2_u8'} } */ -+ u8x2 = svset2_u8 (s8x2, 0, u8); /* { dg-error {incompatible type for argument 1 of 'svset2_u8'} } */ -+ u8x2 = svset2_u8 (u8x3, 0, u8); /* { dg-error {incompatible type for argument 1 of 'svset2_u8'} } */ -+ u8x2 = svset2_u8 (pg, 0, u8); /* { dg-error {incompatible type for argument 1 of 'svset2_u8'} } */ -+ u8x2 = svset2_u8 (u8x2, 0, u8x2); /* { dg-error {incompatible type for argument 3 of 'svset2_u8'} } */ -+ u8x2 = svset2_u8 (u8x2, 0, f64); /* { dg-error {incompatible type for argument 3 of 'svset2_u8'} } */ -+ u8x2 = svset2_u8 (u8x2, 0, pg); /* { dg-error {incompatible type for argument 3 of 'svset2_u8'} } */ -+ u8x2 = svset2_u8 (u8x2, x, u8); /* { dg-error {argument 2 of 'svset2_u8' must be an integer constant expression} } */ -+ u8x2 = svset2_u8 (u8x2, 0, u8); -+ f64 = svset2_u8 (u8x2, 0, u8); /* { dg-error {incompatible types when assigning to type 'svfloat64_t' from type 'svuint8x2_t'} } */ -+ u8x2 = svset2_u8 (u8x2, 1, u8); -+ u8x2 = svset2_u8 (u8x2, 2, u8); /* { dg-error {passing 2 to argument 2 of 'svset2_u8', which expects a value in the range \[0, 1\]} } */ -+ u8x2 = svset2_u8 (u8x2, 3, u8); /* { dg-error {passing 3 to argument 2 of 'svset2_u8', which expects a value in the range \[0, 1\]} } */ -+ u8x2 = svset2_u8 (u8x2, 4, u8); /* { dg-error {passing 4 to argument 2 of 'svset2_u8', which expects a value in the range \[0, 1\]} } */ -+ u8x2 = svset2_u8 (u8x2, 5, u8); /* { dg-error {passing 5 to argument 2 of 'svset2_u8', which expects a value in the range \[0, 1\]} } */ -+ u8x2 = svset2_u8 (u8x2, ~0U, u8); /* { dg-error {passing [^ ]* to argument 2 of 'svset2_u8', which expects a value in the range \[0, 1\]} } */ -+ u8x2 = svset2_u8 (u8x2, one, u8); /* { dg-error {argument 2 of 'svset2_u8' must be an integer constant expression} } */ -+ u8x2 = svset2_u8 (u8x2, 3 - 2, u8); -+ u8x2 = svset2_u8 (u8x2, 1.0, u8); -+ -+ return f64; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_3.c -new file mode 100644 -index 000000000..543a1bea8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_3.c -@@ -0,0 +1,36 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-std=c99 -Wall -Wextra" } */ -+ -+#include -+ -+svfloat64_t -+f1 (svbool_t pg, svfloat16_t f16, svfloat16x3_t f16x3, svfloat16x4_t f16x4, -+ int x) -+{ -+ const int one = 1; -+ svfloat64_t f64; -+ -+ f16x3 = svset3 (f16x3); /* { dg-error {too few arguments to function 'svset3'} } */ -+ f16x3 = svset3 (f16x3, 1); /* { dg-error {too few arguments to function 'svset3'} } */ -+ f16x3 = svset3 (f16x3, 1, f16, 3); /* { dg-error {too many arguments to function 'svset3'} } */ -+ f16x3 = svset3 (f16, 0, f16); /* { dg-error {passing single vector 'svfloat16_t' to argument 1 of 'svset3', which expects a tuple of 3 vectors} } */ -+ f16x3 = svset3 (f16x4, 0, f16); /* { dg-error {passing 'svfloat16x4_t' to argument 1 of 'svset3', which expects a tuple of 3 vectors} } */ -+ f16x3 = svset3 (pg, 0, f16); /* { dg-error {passing 'svbool_t' to argument 1 of 'svset3', which expects a tuple of 3 vectors} } */ -+ f16x3 = svset3 (f16x3, 0, f16x3); /* { dg-error {passing 'svfloat16x3_t' to argument 3 of 'svset3', which expects a single SVE vector rather than a tuple} } */ -+ f16x3 = svset3 (f16x3, 0, f64); /* { dg-error {passing 'svfloat64_t' instead of the expected 'svfloat16_t' to argument 3 of 'svset3', after passing 'svfloat16x3_t' to argument 1} } */ -+ f16x3 = svset3 (f16x3, 0, pg); /* { dg-error {passing 'svbool_t' instead of the expected 'svfloat16_t' to argument 3 of 'svset3', after passing 'svfloat16x3_t' to argument 1} } */ -+ f16x3 = svset3 (f16x3, x, f16); /* { dg-error {argument 2 of 'svset3' must be an integer constant expression} } */ -+ f16x3 = svset3 (f16x3, 0, f16); -+ f64 = svset3 (f16x3, 0, f16); /* { dg-error {incompatible types when assigning to type 'svfloat64_t' from type 'svfloat16x3_t'} } */ -+ f16x3 = svset3 (f16x3, 1, f16); -+ f16x3 = svset3 (f16x3, 2, f16); -+ f16x3 = svset3 (f16x3, 3, f16); /* { dg-error {passing 3 to argument 2 of 'svset3', which expects a value in the range \[0, 2\]} } */ -+ f16x3 = svset3 (f16x3, 4, f16); /* { dg-error {passing 4 to argument 2 of 'svset3', which expects a value in the range \[0, 2\]} } */ -+ f16x3 = svset3 (f16x3, 5, f16); /* { dg-error {passing 5 to argument 2 of 'svset3', which expects a value in the range \[0, 2\]} } */ -+ f16x3 = svset3 (f16x3, ~0U, f16); /* { dg-error {passing [^ ]* to argument 2 of 'svset3', which expects a value in the range \[0, 2\]} } */ -+ f16x3 = svset3 (f16x3, one, f16); /* { dg-error {argument 2 of 'svset3' must be an integer constant expression} } */ -+ f16x3 = svset3 (f16x3, 3 - 2, f16); -+ f16x3 = svset3 (f16x3, 1.0, f16); -+ -+ return f64; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_4.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_4.c -new file mode 100644 -index 000000000..198b03407 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_4.c -@@ -0,0 +1,37 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-std=c99 -Wall -Wextra" } */ -+ -+#include -+ -+svfloat64_t -+f1 (svbool_t pg, svfloat16_t f16, svfloat16x3_t f16x3, svuint16x3_t u16x3, -+ svfloat16x4_t f16x4, int x) -+{ -+ const int one = 1; -+ svfloat64_t f64; -+ -+ f16x3 = svset3_f16 (f16x3); /* { dg-error {too few arguments to function 'svset3_f16'} } */ -+ f16x3 = svset3_f16 (f16x3, 1); /* { dg-error {too few arguments to function 'svset3_f16'} } */ -+ f16x3 = svset3_f16 (f16x3, 1, f16, 3); /* { dg-error {too many arguments to function 'svset3_f16'} } */ -+ f16x3 = svset3_f16 (f16, 0, f16); /* { dg-error {incompatible type for argument 1 of 'svset3_f16'} } */ -+ f16x3 = svset3_f16 (u16x3, 0, f16); /* { dg-error {incompatible type for argument 1 of 'svset3_f16'} } */ -+ f16x3 = svset3_f16 (f16x4, 0, f16); /* { dg-error {incompatible type for argument 1 of 'svset3_f16'} } */ -+ f16x3 = svset3_f16 (pg, 0, f16); /* { dg-error {incompatible type for argument 1 of 'svset3_f16'} } */ -+ f16x3 = svset3_f16 (f16x3, 0, f16x3); /* { dg-error {incompatible type for argument 3 of 'svset3_f16'} } */ -+ f16x3 = svset3_f16 (f16x3, 0, f64); /* { dg-error {incompatible type for argument 3 of 'svset3_f16'} } */ -+ f16x3 = svset3_f16 (f16x3, 0, pg); /* { dg-error {incompatible type for argument 3 of 'svset3_f16'} } */ -+ f16x3 = svset3_f16 (f16x3, x, f16); /* { dg-error {argument 2 of 'svset3_f16' must be an integer constant expression} } */ -+ f16x3 = svset3_f16 (f16x3, 0, f16); -+ f64 = svset3_f16 (f16x3, 0, f16); /* { dg-error {incompatible types when assigning to type 'svfloat64_t' from type 'svfloat16x3_t'} } */ -+ f16x3 = svset3_f16 (f16x3, 1, f16); -+ f16x3 = svset3_f16 (f16x3, 2, f16); -+ f16x3 = svset3_f16 (f16x3, 3, f16); /* { dg-error {passing 3 to argument 2 of 'svset3_f16', which expects a value in the range \[0, 2\]} } */ -+ f16x3 = svset3_f16 (f16x3, 4, f16); /* { dg-error {passing 4 to argument 2 of 'svset3_f16', which expects a value in the range \[0, 2\]} } */ -+ f16x3 = svset3_f16 (f16x3, 5, f16); /* { dg-error {passing 5 to argument 2 of 'svset3_f16', which expects a value in the range \[0, 2\]} } */ -+ f16x3 = svset3_f16 (f16x3, ~0U, f16); /* { dg-error {passing [^ ]* to argument 2 of 'svset3_f16', which expects a value in the range \[0, 2\]} } */ -+ f16x3 = svset3_f16 (f16x3, one, f16); /* { dg-error {argument 2 of 'svset3_f16' must be an integer constant expression} } */ -+ f16x3 = svset3_f16 (f16x3, 3 - 2, f16); -+ f16x3 = svset3_f16 (f16x3, 1.0, f16); -+ -+ return f64; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_5.c -new file mode 100644 -index 000000000..be911a731 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_5.c -@@ -0,0 +1,35 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-std=c99 -Wall -Wextra" } */ -+ -+#include -+ -+svfloat64_t -+f1 (svbool_t pg, svint32_t s32, svint32x4_t s32x4, svint32x2_t s32x2, int x) -+{ -+ const int one = 1; -+ svfloat64_t f64; -+ -+ s32x4 = svset4 (s32x4); /* { dg-error {too few arguments to function 'svset4'} } */ -+ s32x4 = svset4 (s32x4, 1); /* { dg-error {too few arguments to function 'svset4'} } */ -+ s32x4 = svset4 (s32x4, 1, s32, 3); /* { dg-error {too many arguments to function 'svset4'} } */ -+ s32x4 = svset4 (s32, 0, s32); /* { dg-error {passing single vector 'svint32_t' to argument 1 of 'svset4', which expects a tuple of 4 vectors} } */ -+ s32x4 = svset4 (s32x2, 0, s32); /* { dg-error {passing 'svint32x2_t' to argument 1 of 'svset4', which expects a tuple of 4 vectors} } */ -+ s32x4 = svset4 (pg, 0, s32); /* { dg-error {passing 'svbool_t' to argument 1 of 'svset4', which expects a tuple of 4 vectors} } */ -+ s32x4 = svset4 (s32x4, 0, s32x4); /* { dg-error {passing 'svint32x4_t' to argument 3 of 'svset4', which expects a single SVE vector rather than a tuple} } */ -+ s32x4 = svset4 (s32x4, 0, f64); /* { dg-error {passing 'svfloat64_t' instead of the expected 'svint32_t' to argument 3 of 'svset4', after passing 'svint32x4_t' to argument 1} } */ -+ s32x4 = svset4 (s32x4, 0, pg); /* { dg-error {passing 'svbool_t' instead of the expected 'svint32_t' to argument 3 of 'svset4', after passing 'svint32x4_t' to argument 1} } */ -+ s32x4 = svset4 (s32x4, x, s32); /* { dg-error {argument 2 of 'svset4' must be an integer constant expression} } */ -+ s32x4 = svset4 (s32x4, 0, s32); -+ f64 = svset4 (s32x4, 0, s32); /* { dg-error {incompatible types when assigning to type 'svfloat64_t' from type 'svint32x4_t'} } */ -+ s32x4 = svset4 (s32x4, 1, s32); -+ s32x4 = svset4 (s32x4, 2, s32); -+ s32x4 = svset4 (s32x4, 3, s32); -+ s32x4 = svset4 (s32x4, 4, s32); /* { dg-error {passing 4 to argument 2 of 'svset4', which expects a value in the range \[0, 3\]} } */ -+ s32x4 = svset4 (s32x4, 5, s32); /* { dg-error {passing 5 to argument 2 of 'svset4', which expects a value in the range \[0, 3\]} } */ -+ s32x4 = svset4 (s32x4, ~0U, s32); /* { dg-error {passing [^ ]* to argument 2 of 'svset4', which expects a value in the range \[0, 3\]} } */ -+ s32x4 = svset4 (s32x4, one, s32); /* { dg-error {argument 2 of 'svset4' must be an integer constant expression} } */ -+ s32x4 = svset4 (s32x4, 3 - 2, s32); -+ s32x4 = svset4 (s32x4, 1.0, s32); -+ -+ return f64; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_6.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_6.c -new file mode 100644 -index 000000000..cec435413 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_6.c -@@ -0,0 +1,37 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-std=c99 -Wall -Wextra" } */ -+ -+#include -+ -+svfloat64_t -+f1 (svbool_t pg, svint32_t s32, svint32x4_t s32x4, svfloat32x4_t f32x4, -+ svint32x2_t s32x2, int x) -+{ -+ const int one = 1; -+ svfloat64_t f64; -+ -+ s32x4 = svset4_s32 (s32x4); /* { dg-error {too few arguments to function 'svset4_s32'} } */ -+ s32x4 = svset4_s32 (s32x4, 1); /* { dg-error {too few arguments to function 'svset4_s32'} } */ -+ s32x4 = svset4_s32 (s32x4, 1, s32, 3); /* { dg-error {too many arguments to function 'svset4_s32'} } */ -+ s32x4 = svset4_s32 (s32, 0, s32); /* { dg-error {incompatible type for argument 1 of 'svset4_s32'} } */ -+ s32x4 = svset4_s32 (f32x4, 0, s32); /* { dg-error {incompatible type for argument 1 of 'svset4_s32'} } */ -+ s32x4 = svset4_s32 (s32x2, 0, s32); /* { dg-error {incompatible type for argument 1 of 'svset4_s32'} } */ -+ s32x4 = svset4_s32 (pg, 0, s32); /* { dg-error {incompatible type for argument 1 of 'svset4_s32'} } */ -+ s32x4 = svset4_s32 (s32x4, 0, s32x4); /* { dg-error {incompatible type for argument 3 of 'svset4_s32'} } */ -+ s32x4 = svset4_s32 (s32x4, 0, f64); /* { dg-error {incompatible type for argument 3 of 'svset4_s32'} } */ -+ s32x4 = svset4_s32 (s32x4, 0, pg); /* { dg-error {incompatible type for argument 3 of 'svset4_s32'} } */ -+ s32x4 = svset4_s32 (s32x4, x, s32); /* { dg-error {argument 2 of 'svset4_s32' must be an integer constant expression} } */ -+ s32x4 = svset4_s32 (s32x4, 0, s32); -+ f64 = svset4_s32 (s32x4, 0, s32); /* { dg-error {incompatible types when assigning to type 'svfloat64_t' from type 'svint32x4_t'} } */ -+ s32x4 = svset4_s32 (s32x4, 1, s32); -+ s32x4 = svset4_s32 (s32x4, 2, s32); -+ s32x4 = svset4_s32 (s32x4, 3, s32); -+ s32x4 = svset4_s32 (s32x4, 4, s32); /* { dg-error {passing 4 to argument 2 of 'svset4_s32', which expects a value in the range \[0, 3\]} } */ -+ s32x4 = svset4_s32 (s32x4, 5, s32); /* { dg-error {passing 5 to argument 2 of 'svset4_s32', which expects a value in the range \[0, 3\]} } */ -+ s32x4 = svset4_s32 (s32x4, ~0U, s32); /* { dg-error {passing [^ ]* to argument 2 of 'svset4_s32', which expects a value in the range \[0, 3\]} } */ -+ s32x4 = svset4_s32 (s32x4, one, s32); /* { dg-error {argument 2 of 'svset4_s32' must be an integer constant expression} } */ -+ s32x4 = svset4_s32 (s32x4, 3 - 2, s32); -+ s32x4 = svset4_s32 (s32x4, 1.0, s32); -+ -+ return f64; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_1.c -new file mode 100644 -index 000000000..4dd9a9c76 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_1.c -@@ -0,0 +1,34 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-std=c99 -Wall -Wextra" } */ -+ -+#include -+ -+void -+f1 (svbool_t pg, svuint8_t u8, svint8_t s8, svint16_t s16, -+ svint32_t s32, svint64_t s64, int x) -+{ -+ const int one = 1; -+ u8 = svasrd_x (pg, u8, 1); /* { dg-error {'svasrd_x' has no form that takes 'svuint8_t' arguments} } */ -+ s8 = svasrd_x (pg, s8, x); /* { dg-error {argument 3 of 'svasrd_x' must be an integer constant expression} } */ -+ s8 = svasrd_x (pg, s8, one); /* { dg-error {argument 3 of 'svasrd_x' must be an integer constant expression} } */ -+ s8 = svasrd_x (pg, s8, 0.4); /* { dg-error {passing 0 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 8\]} } */ -+ s8 = svasrd_x (pg, s8, 1.0); -+ s8 = svasrd_x (pg, s8, 0); /* { dg-error {passing 0 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 8\]} } */ -+ s8 = svasrd_x (pg, s8, 1); -+ s8 = svasrd_x (pg, s8, 1 + 1); -+ s8 = svasrd_x (pg, s8, 8); -+ s8 = svasrd_x (pg, s8, 9); /* { dg-error {passing 9 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 8\]} } */ -+ s8 = svasrd_x (pg, s8, (1ULL << 62) + 1); /* { dg-error {passing [^ ]* to argument 3 of 'svasrd_x', which expects a value in the range \[1, 8\]} } */ -+ s16 = svasrd_x (pg, s16, 0); /* { dg-error {passing 0 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 16\]} } */ -+ s16 = svasrd_x (pg, s16, 1); -+ s16 = svasrd_x (pg, s16, 16); -+ s16 = svasrd_x (pg, s16, 17); /* { dg-error {passing 17 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 16\]} } */ -+ s32 = svasrd_x (pg, s32, 0); /* { dg-error {passing 0 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 32\]} } */ -+ s32 = svasrd_x (pg, s32, 1); -+ s32 = svasrd_x (pg, s32, 32); -+ s32 = svasrd_x (pg, s32, 33); /* { dg-error {passing 33 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 32\]} } */ -+ s64 = svasrd_x (pg, s64, 0); /* { dg-error {passing 0 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 64\]} } */ -+ s64 = svasrd_x (pg, s64, 1); -+ s64 = svasrd_x (pg, s64, 64); -+ s64 = svasrd_x (pg, s64, 65); /* { dg-error {passing 65 to argument 3 of 'svasrd_x', which expects a value in the range \[1, 64\]} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_2.c -new file mode 100644 -index 000000000..4970689e9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/shift_right_imm_2.c -@@ -0,0 +1,33 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-std=c99 -Wall -Wextra" } */ -+ -+#include -+ -+void -+f1 (svbool_t pg, svint8_t s8, svint16_t s16, svint32_t s32, svint64_t s64, -+ int x) -+{ -+ const int one = 1; -+ s8 = svasrd_n_s8_x (pg, s8, x); /* { dg-error {argument 3 of 'svasrd_n_s8_x' must be an integer constant expression} } */ -+ s8 = svasrd_n_s8_x (pg, s8, one); /* { dg-error {argument 3 of 'svasrd_n_s8_x' must be an integer constant expression} } */ -+ s8 = svasrd_n_s8_x (pg, s8, 0.4); /* { dg-error {passing 0 to argument 3 of 'svasrd_n_s8_x', which expects a value in the range \[1, 8\]} } */ -+ s8 = svasrd_n_s8_x (pg, s8, 1.0); -+ s8 = svasrd_n_s8_x (pg, s8, 0); /* { dg-error {passing 0 to argument 3 of 'svasrd_n_s8_x', which expects a value in the range \[1, 8\]} } */ -+ s8 = svasrd_n_s8_x (pg, s8, 1); -+ s8 = svasrd_n_s8_x (pg, s8, 1 + 1); -+ s8 = svasrd_n_s8_x (pg, s8, 8); -+ s8 = svasrd_n_s8_x (pg, s8, 9); /* { dg-error {passing 9 to argument 3 of 'svasrd_n_s8_x', which expects a value in the range \[1, 8\]} } */ -+ s8 = svasrd_n_s8_x (pg, s8, (1ULL << 62) + 1); /* { dg-error {passing [^ ]* to argument 3 of 'svasrd_n_s8_x', which expects a value in the range \[1, 8\]} } */ -+ s16 = svasrd_n_s16_x (pg, s16, 0); /* { dg-error {passing 0 to argument 3 of 'svasrd_n_s16_x', which expects a value in the range \[1, 16\]} } */ -+ s16 = svasrd_n_s16_x (pg, s16, 1); -+ s16 = svasrd_n_s16_x (pg, s16, 16); -+ s16 = svasrd_n_s16_x (pg, s16, 17); /* { dg-error {passing 17 to argument 3 of 'svasrd_n_s16_x', which expects a value in the range \[1, 16\]} } */ -+ s32 = svasrd_n_s32_x (pg, s32, 0); /* { dg-error {passing 0 to argument 3 of 'svasrd_n_s32_x', which expects a value in the range \[1, 32\]} } */ -+ s32 = svasrd_n_s32_x (pg, s32, 1); -+ s32 = svasrd_n_s32_x (pg, s32, 32); -+ s32 = svasrd_n_s32_x (pg, s32, 33); /* { dg-error {passing 33 to argument 3 of 'svasrd_n_s32_x', which expects a value in the range \[1, 32\]} } */ -+ s64 = svasrd_n_s64_x (pg, s64, 0); /* { dg-error {passing 0 to argument 3 of 'svasrd_n_s64_x', which expects a value in the range \[1, 64\]} } */ -+ s64 = svasrd_n_s64_x (pg, s64, 1); -+ s64 = svasrd_n_s64_x (pg, s64, 64); -+ s64 = svasrd_n_s64_x (pg, s64, 65); /* { dg-error {passing 65 to argument 3 of 'svasrd_n_s64_x', which expects a value in the range \[1, 64\]} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_1.c -new file mode 100644 -index 000000000..267db83f7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_1.c -@@ -0,0 +1,26 @@ -+/* { dg-do compile } */ -+/* { dg-options "-std=c99" } */ -+ -+#include -+ -+struct s { signed char x; }; -+ -+svuint8_t -+f1 (svbool_t pg, signed char *s8_ptr, void *void_ptr, struct s *s_ptr, -+ float *f32_ptr, _Complex float *cf32_ptr, svint8_t s8, svfloat32_t f32, -+ struct s s) -+{ -+ svst1 (pg, s8_ptr); /* { dg-error {too few arguments to function 'svst1'} } */ -+ svst1 (pg, s8_ptr, s8, 0); /* { dg-error {too many arguments to function 'svst1'} } */ -+ svst1 (0, s8_ptr, s8); /* { dg-error {passing 'int' to argument 1 of 'svst1', which expects 'svbool_t'} } */ -+ svst1 (pg, void_ptr, 0); /* { dg-error {passing 'int' to argument 3 of 'svst1', which expects an SVE vector type} } */ -+ svst1 (pg, void_ptr, pg); /* { dg-error {'svst1' has no form that takes 'svbool_t' arguments} } */ -+ svst1 (pg, 0, s8); -+ svst1 (pg, (int *) 0, s8); /* { dg-warning "passing argument 2 of 'svst1_s8' from incompatible pointer type" } */ -+ svst1 (pg, void_ptr, s8); -+ svst1 (pg, s_ptr, s8); /* { dg-warning "passing argument 2 of 'svst1_s8' from incompatible pointer type" } */ -+ svst1 (pg, f32_ptr, s8); /* { dg-warning "passing argument 2 of 'svst1_s8' from incompatible pointer type" } */ -+ svst1 (pg, f32_ptr, f32); -+ svst1 (pg, cf32_ptr, f32); /* { dg-warning "passing argument 2 of 'svst1_f32' from incompatible pointer type" } */ -+ svst1 (pg, s, s8); /* { dg-error {passing 'struct s' to argument 2 of 'svst1', which expects a scalar pointer} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_2.c -new file mode 100644 -index 000000000..4e4fb3c6d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_2.c -@@ -0,0 +1,27 @@ -+/* { dg-do compile } */ -+/* { dg-options "-std=c99" } */ -+ -+#include -+ -+struct s { signed char x; }; -+ -+svuint8_t -+f1 (svbool_t pg, signed char *s8_ptr, void *void_ptr, struct s *s_ptr, -+ float *f32_ptr, _Complex float *cf32_ptr, svint8_t s8, svfloat32_t f32) -+{ -+ svst1_vnum (pg, s8_ptr, 0); /* { dg-error {too few arguments to function 'svst1_vnum'} } */ -+ svst1_vnum (pg, s8_ptr, 0, s8, 0); /* { dg-error {too many arguments to function 'svst1_vnum'} } */ -+ svst1_vnum (0, s8_ptr, 0, s8); /* { dg-error {passing 'int' to argument 1 of 'svst1_vnum', which expects 'svbool_t'} } */ -+ svst1_vnum (pg, s8_ptr, pg, s8); /* { dg-error {passing 'svbool_t' to argument 3 of 'svst1_vnum', which expects 'int64_t'} } */ -+ svst1_vnum (pg, s8_ptr, s8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svst1_vnum', which expects 'int64_t'} } */ -+ svst1_vnum (pg, s8_ptr, void_ptr, s8); /* { dg-warning "passing argument 3 of 'svst1_vnum_s8' makes integer from pointer without a cast" } */ -+ svst1_vnum (pg, void_ptr, 0, 0); /* { dg-error {passing 'int' to argument 4 of 'svst1_vnum', which expects an SVE vector type} } */ -+ svst1_vnum (pg, void_ptr, 0, pg); /* { dg-error {'svst1_vnum' has no form that takes 'svbool_t' arguments} } */ -+ svst1_vnum (pg, 0, 0, s8); -+ svst1_vnum (pg, (int *) 0, 0, s8); /* { dg-warning "passing argument 2 of 'svst1_vnum_s8' from incompatible pointer type" } */ -+ svst1_vnum (pg, void_ptr, 0, s8); -+ svst1_vnum (pg, s_ptr, 0, s8); /* { dg-warning "passing argument 2 of 'svst1_vnum_s8' from incompatible pointer type" } */ -+ svst1_vnum (pg, f32_ptr, 0, s8); /* { dg-warning "passing argument 2 of 'svst1_vnum_s8' from incompatible pointer type" } */ -+ svst1_vnum (pg, f32_ptr, 0, f32); -+ svst1_vnum (pg, cf32_ptr, 0, f32); /* { dg-warning "passing argument 2 of 'svst1_vnum_f32' from incompatible pointer type" } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_scatter_index_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_scatter_index_1.c -new file mode 100644 -index 000000000..3209149b6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_scatter_index_1.c -@@ -0,0 +1,101 @@ -+/* { dg-do compile } */ -+/* { dg-options "-std=c99" } */ -+ -+#include -+ -+struct s { signed char x; }; -+ -+svuint32_t -+f1 (svbool_t pg, signed char *s8_ptr, short *s16_ptr, -+ int32_t *s32_ptr, uint32_t *u32_ptr, float *f32_ptr, -+ int64_t *s64_ptr, uint64_t *u64_ptr, double *f64_ptr, -+ void *void_ptr, struct s *s_ptr, _Complex float *cf32_ptr, -+ svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16, svfloat16_t f16, -+ svint32_t s32, svuint32_t u32, svfloat32_t f32, -+ svint64_t s64, svuint64_t u64, svfloat64_t f64, struct s s) -+{ -+ svst1_scatter_index (pg, s32_ptr, s32); /* { dg-error {too few arguments to function 'svst1_scatter_index'} } */ -+ svst1_scatter_index (pg, s32_ptr, s32, s32, 0); /* { dg-error {too many arguments to function 'svst1_scatter_index'} } */ -+ svst1_scatter_index (0, s32_ptr, s32, s32); /* { dg-error {passing 'int' to argument 1 of 'svst1_scatter_index', which expects 'svbool_t'} } */ -+ svst1_scatter_index (pg, 0, s32, s32); -+ svst1_scatter_index (pg, (int *) 0, s32, s32); -+ svst1_scatter_index (pg, void_ptr, s32, s32); -+ svst1_scatter_index (pg, s_ptr, s32, s32); /* { dg-warning "passing argument 2 of 'svst1_scatter_s32index_s32' from incompatible pointer type" } */ -+ svst1_scatter_index (pg, f32_ptr, s32, s32); /* { dg-warning "passing argument 2 of 'svst1_scatter_s32index_s32' from incompatible pointer type" } */ -+ svst1_scatter_index (pg, f32_ptr, s32, f32); -+ svst1_scatter_index (pg, cf32_ptr, s32, f32); /* { dg-warning "passing argument 2 of 'svst1_scatter_s32index_f32' from incompatible pointer type" } */ -+ svst1_scatter_index (pg, s, s32, s32); /* { dg-error {passing 'struct s' to argument 2 of 'svst1_scatter_index', which expects a vector or pointer base address} } */ -+ -+ svst1_scatter_index (pg, u32, void_ptr, s32); /* { dg-warning "passing argument 3 of 'svst1_scatter_u32base_index_s32' makes integer from pointer without a cast" } */ -+ svst1_scatter_index (pg, u32, pg, s32); /* { dg-error {passing 'svbool_t' to argument 3 of 'svst1_scatter_index', which expects 'int64_t'} } */ -+ svst1_scatter_index (pg, u32, s32, s32); /* { dg-error {passing 'svint32_t' to argument 3 of 'svst1_scatter_index', which expects 'int64_t'} } */ -+ -+ svst1_scatter_index (pg, void_ptr, u32, pg); /* { dg-error {passing 'svbool_t' to argument 4 of 'svst1_scatter_index', which expects a vector of 32-bit or 64-bit elements} } */ -+ -+ svst1_scatter_index (pg, s8_ptr, u32, s8); /* { dg-error {passing 'svint8_t' to argument 4 of 'svst1_scatter_index', which expects a vector of 32-bit or 64-bit elements} } */ -+ svst1_scatter_index (pg, s8_ptr, u32, u8); /* { dg-error {passing 'svuint8_t' to argument 4 of 'svst1_scatter_index', which expects a vector of 32-bit or 64-bit elements} } */ -+ -+ svst1_scatter_index (pg, s16_ptr, u32, s16); /* { dg-error {passing 'svint16_t' to argument 4 of 'svst1_scatter_index', which expects a vector of 32-bit or 64-bit elements} } */ -+ svst1_scatter_index (pg, s16_ptr, u32, u16); /* { dg-error {passing 'svuint16_t' to argument 4 of 'svst1_scatter_index', which expects a vector of 32-bit or 64-bit elements} } */ -+ svst1_scatter_index (pg, s16_ptr, u32, f16); /* { dg-error {passing 'svfloat16_t' to argument 4 of 'svst1_scatter_index', which expects a vector of 32-bit or 64-bit elements} } */ -+ -+ svst1_scatter_index (pg, u32, 0, s32); -+ svst1_scatter_index (pg, s32, 0, s32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svst1_scatter_index', which expects 'svuint32_t'} } */ -+ -+ svst1_scatter_index (pg, u32, 0, u32); -+ svst1_scatter_index (pg, s32, 0, u32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svst1_scatter_index', which expects 'svuint32_t'} } */ -+ -+ svst1_scatter_index (pg, u32, 0, f32); -+ svst1_scatter_index (pg, s32, 0, f32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svst1_scatter_index', which expects 'svuint32_t'} } */ -+ -+ svst1_scatter_index (pg, u64, 0, s64); -+ svst1_scatter_index (pg, s64, 0, s64); /* { dg-error {passing 'svint64_t' to argument 2 of 'svst1_scatter_index', which expects 'svuint64_t'} } */ -+ -+ svst1_scatter_index (pg, u64, 0, u64); -+ svst1_scatter_index (pg, s64, 0, u64); /* { dg-error {passing 'svint64_t' to argument 2 of 'svst1_scatter_index', which expects 'svuint64_t'} } */ -+ -+ svst1_scatter_index (pg, u64, 0, f64); -+ svst1_scatter_index (pg, s64, 0, f64); /* { dg-error {passing 'svint64_t' to argument 2 of 'svst1_scatter_index', which expects 'svuint64_t'} } */ -+ -+ svst1_scatter_index (pg, s32_ptr, s32, s32); -+ svst1_scatter_index (pg, s32_ptr, u32, s32); -+ svst1_scatter_index (pg, s32_ptr, f32, s32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svst1_scatter_index', which when storing 'svint32_t' expects a vector of 32-bit integers} } */ -+ svst1_scatter_index (pg, s32_ptr, s64, s32); /* { dg-error {passing 'svint64_t' to argument 3 of 'svst1_scatter_index', which when storing 'svint32_t' expects a vector of 32-bit integers} } */ -+ svst1_scatter_index (pg, s32_ptr, u64, s32); /* { dg-error {passing 'svuint64_t' to argument 3 of 'svst1_scatter_index', which when storing 'svint32_t' expects a vector of 32-bit integers} } */ -+ svst1_scatter_index (pg, s32_ptr, f64, s32); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svst1_scatter_index', which when storing 'svint32_t' expects a vector of 32-bit integers} } */ -+ -+ svst1_scatter_index (pg, u32_ptr, s32, u32); -+ svst1_scatter_index (pg, u32_ptr, u32, u32); -+ svst1_scatter_index (pg, u32_ptr, f32, u32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svst1_scatter_index', which when storing 'svuint32_t' expects a vector of 32-bit integers} } */ -+ svst1_scatter_index (pg, u32_ptr, s64, u32); /* { dg-error {passing 'svint64_t' to argument 3 of 'svst1_scatter_index', which when storing 'svuint32_t' expects a vector of 32-bit integers} } */ -+ svst1_scatter_index (pg, u32_ptr, u64, u32); /* { dg-error {passing 'svuint64_t' to argument 3 of 'svst1_scatter_index', which when storing 'svuint32_t' expects a vector of 32-bit integers} } */ -+ svst1_scatter_index (pg, u32_ptr, f64, u32); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svst1_scatter_index', which when storing 'svuint32_t' expects a vector of 32-bit integers} } */ -+ -+ svst1_scatter_index (pg, f32_ptr, s32, f32); -+ svst1_scatter_index (pg, f32_ptr, u32, f32); -+ svst1_scatter_index (pg, f32_ptr, f32, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svst1_scatter_index', which when storing 'svfloat32_t' expects a vector of 32-bit integers} } */ -+ svst1_scatter_index (pg, f32_ptr, s64, f32); /* { dg-error {passing 'svint64_t' to argument 3 of 'svst1_scatter_index', which when storing 'svfloat32_t' expects a vector of 32-bit integers} } */ -+ svst1_scatter_index (pg, f32_ptr, u64, f32); /* { dg-error {passing 'svuint64_t' to argument 3 of 'svst1_scatter_index', which when storing 'svfloat32_t' expects a vector of 32-bit integers} } */ -+ svst1_scatter_index (pg, f32_ptr, f64, f32); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svst1_scatter_index', which when storing 'svfloat32_t' expects a vector of 32-bit integers} } */ -+ -+ svst1_scatter_index (pg, s64_ptr, s32, s64); /* { dg-error {passing 'svint32_t' to argument 3 of 'svst1_scatter_index', which when storing 'svint64_t' expects a vector of 64-bit integers} } */ -+ svst1_scatter_index (pg, s64_ptr, u32, s64); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svst1_scatter_index', which when storing 'svint64_t' expects a vector of 64-bit integers} } */ -+ svst1_scatter_index (pg, s64_ptr, f32, s64); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svst1_scatter_index', which when storing 'svint64_t' expects a vector of 64-bit integers} } */ -+ svst1_scatter_index (pg, s64_ptr, s64, s64); -+ svst1_scatter_index (pg, s64_ptr, u64, s64); -+ svst1_scatter_index (pg, s64_ptr, f64, s64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svst1_scatter_index', which when storing 'svint64_t' expects a vector of 64-bit integers} } */ -+ -+ svst1_scatter_index (pg, u64_ptr, s32, u64); /* { dg-error {passing 'svint32_t' to argument 3 of 'svst1_scatter_index', which when storing 'svuint64_t' expects a vector of 64-bit integers} } */ -+ svst1_scatter_index (pg, u64_ptr, u32, u64); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svst1_scatter_index', which when storing 'svuint64_t' expects a vector of 64-bit integers} } */ -+ svst1_scatter_index (pg, u64_ptr, f32, u64); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svst1_scatter_index', which when storing 'svuint64_t' expects a vector of 64-bit integers} } */ -+ svst1_scatter_index (pg, u64_ptr, s64, u64); -+ svst1_scatter_index (pg, u64_ptr, u64, u64); -+ svst1_scatter_index (pg, u64_ptr, f64, u64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svst1_scatter_index', which when storing 'svuint64_t' expects a vector of 64-bit integers} } */ -+ -+ svst1_scatter_index (pg, f64_ptr, s32, f64); /* { dg-error {passing 'svint32_t' to argument 3 of 'svst1_scatter_index', which when storing 'svfloat64_t' expects a vector of 64-bit integers} } */ -+ svst1_scatter_index (pg, f64_ptr, u32, f64); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svst1_scatter_index', which when storing 'svfloat64_t' expects a vector of 64-bit integers} } */ -+ svst1_scatter_index (pg, f64_ptr, f32, f64); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svst1_scatter_index', which when storing 'svfloat64_t' expects a vector of 64-bit integers} } */ -+ svst1_scatter_index (pg, f64_ptr, s64, f64); -+ svst1_scatter_index (pg, f64_ptr, u64, f64); -+ svst1_scatter_index (pg, f64_ptr, f64, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svst1_scatter_index', which when storing 'svfloat64_t' expects a vector of 64-bit integers} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_scatter_offset_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_scatter_offset_1.c -new file mode 100644 -index 000000000..10abf758c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_scatter_offset_1.c -@@ -0,0 +1,45 @@ -+/* { dg-do compile } */ -+/* { dg-options "-std=c99" } */ -+ -+#include -+ -+struct s { signed char x; }; -+ -+svuint32_t -+f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16, -+ svfloat16_t f16, svint32_t s32, svuint32_t u32, svfloat32_t f32, -+ svint64_t s64, svuint64_t u64, svfloat64_t f64) -+{ -+ svst1_scatter (pg, u32); /* { dg-error {too few arguments to function 'svst1_scatter'} } */ -+ svst1_scatter (pg, u32, u32, 0); /* { dg-error {too many arguments to function 'svst1_scatter'} } */ -+ svst1_scatter (0, u32, u32); /* { dg-error {passing 'int' to argument 1 of 'svst1_scatter', which expects 'svbool_t'} } */ -+ svst1_scatter (pg, 0, u32); /* { dg-error {passing 'int' to argument 2 of 'svst1_scatter', which expects an SVE vector type} } */ -+ svst1_scatter (pg, u32, 0); /* { dg-error {passing 'int' to argument 3 of 'svst1_scatter', which expects an SVE vector type} } */ -+ -+ svst1_scatter (pg, u32, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svst1_scatter', which expects a vector of 32-bit or 64-bit elements} } */ -+ -+ svst1_scatter (pg, u32, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svst1_scatter', which expects a vector of 32-bit or 64-bit elements} } */ -+ svst1_scatter (pg, u32, u8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svst1_scatter', which expects a vector of 32-bit or 64-bit elements} } */ -+ -+ svst1_scatter (pg, u32, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svst1_scatter', which expects a vector of 32-bit or 64-bit elements} } */ -+ svst1_scatter (pg, u32, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svst1_scatter', which expects a vector of 32-bit or 64-bit elements} } */ -+ svst1_scatter (pg, u32, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svst1_scatter', which expects a vector of 32-bit or 64-bit elements} } */ -+ -+ svst1_scatter (pg, u32, s32); -+ svst1_scatter (pg, s32, s32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svst1_scatter', which expects 'svuint32_t'} } */ -+ -+ svst1_scatter (pg, u32, u32); -+ svst1_scatter (pg, s32, u32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svst1_scatter', which expects 'svuint32_t'} } */ -+ -+ svst1_scatter (pg, u32, f32); -+ svst1_scatter (pg, s32, f32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svst1_scatter', which expects 'svuint32_t'} } */ -+ -+ svst1_scatter (pg, u64, s64); -+ svst1_scatter (pg, s64, s64); /* { dg-error {passing 'svint64_t' to argument 2 of 'svst1_scatter', which expects 'svuint64_t'} } */ -+ -+ svst1_scatter (pg, u64, u64); -+ svst1_scatter (pg, s64, u64); /* { dg-error {passing 'svint64_t' to argument 2 of 'svst1_scatter', which expects 'svuint64_t'} } */ -+ -+ svst1_scatter (pg, u64, f64); -+ svst1_scatter (pg, s64, f64); /* { dg-error {passing 'svint64_t' to argument 2 of 'svst1_scatter', which expects 'svuint64_t'} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_scatter_offset_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_scatter_offset_2.c -new file mode 100644 -index 000000000..8ee8129fa ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/store_scatter_offset_2.c -@@ -0,0 +1,101 @@ -+/* { dg-do compile } */ -+/* { dg-options "-std=c99" } */ -+ -+#include -+ -+struct s { signed char x; }; -+ -+svuint32_t -+f1 (svbool_t pg, signed char *s8_ptr, short *s16_ptr, -+ int32_t *s32_ptr, uint32_t *u32_ptr, float *f32_ptr, -+ int64_t *s64_ptr, uint64_t *u64_ptr, double *f64_ptr, -+ void *void_ptr, struct s *s_ptr, _Complex float *cf32_ptr, -+ svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16, svfloat16_t f16, -+ svint32_t s32, svuint32_t u32, svfloat32_t f32, -+ svint64_t s64, svuint64_t u64, svfloat64_t f64, struct s s) -+{ -+ svst1_scatter_offset (pg, s32_ptr, s32); /* { dg-error {too few arguments to function 'svst1_scatter_offset'} } */ -+ svst1_scatter_offset (pg, s32_ptr, s32, s32, 0); /* { dg-error {too many arguments to function 'svst1_scatter_offset'} } */ -+ svst1_scatter_offset (0, s32_ptr, s32, s32); /* { dg-error {passing 'int' to argument 1 of 'svst1_scatter_offset', which expects 'svbool_t'} } */ -+ svst1_scatter_offset (pg, 0, s32, s32); -+ svst1_scatter_offset (pg, (int *) 0, s32, s32); -+ svst1_scatter_offset (pg, void_ptr, s32, s32); -+ svst1_scatter_offset (pg, s_ptr, s32, s32); /* { dg-warning "passing argument 2 of 'svst1_scatter_s32offset_s32' from incompatible pointer type" } */ -+ svst1_scatter_offset (pg, f32_ptr, s32, s32); /* { dg-warning "passing argument 2 of 'svst1_scatter_s32offset_s32' from incompatible pointer type" } */ -+ svst1_scatter_offset (pg, f32_ptr, s32, f32); -+ svst1_scatter_offset (pg, cf32_ptr, s32, f32); /* { dg-warning "passing argument 2 of 'svst1_scatter_s32offset_f32' from incompatible pointer type" } */ -+ svst1_scatter_offset (pg, s, s32, s32); /* { dg-error {passing 'struct s' to argument 2 of 'svst1_scatter_offset', which expects a vector or pointer base address} } */ -+ -+ svst1_scatter_offset (pg, u32, void_ptr, s32); /* { dg-warning "passing argument 3 of 'svst1_scatter_u32base_offset_s32' makes integer from pointer without a cast" } */ -+ svst1_scatter_offset (pg, u32, pg, s32); /* { dg-error {passing 'svbool_t' to argument 3 of 'svst1_scatter_offset', which expects 'int64_t'} } */ -+ svst1_scatter_offset (pg, u32, s32, s32); /* { dg-error {passing 'svint32_t' to argument 3 of 'svst1_scatter_offset', which expects 'int64_t'} } */ -+ -+ svst1_scatter_offset (pg, void_ptr, u32, pg); /* { dg-error {passing 'svbool_t' to argument 4 of 'svst1_scatter_offset', which expects a vector of 32-bit or 64-bit elements} } */ -+ -+ svst1_scatter_offset (pg, s8_ptr, u32, s8); /* { dg-error {passing 'svint8_t' to argument 4 of 'svst1_scatter_offset', which expects a vector of 32-bit or 64-bit elements} } */ -+ svst1_scatter_offset (pg, s8_ptr, u32, u8); /* { dg-error {passing 'svuint8_t' to argument 4 of 'svst1_scatter_offset', which expects a vector of 32-bit or 64-bit elements} } */ -+ -+ svst1_scatter_offset (pg, s16_ptr, u32, s16); /* { dg-error {passing 'svint16_t' to argument 4 of 'svst1_scatter_offset', which expects a vector of 32-bit or 64-bit elements} } */ -+ svst1_scatter_offset (pg, s16_ptr, u32, u16); /* { dg-error {passing 'svuint16_t' to argument 4 of 'svst1_scatter_offset', which expects a vector of 32-bit or 64-bit elements} } */ -+ svst1_scatter_offset (pg, s16_ptr, u32, f16); /* { dg-error {passing 'svfloat16_t' to argument 4 of 'svst1_scatter_offset', which expects a vector of 32-bit or 64-bit elements} } */ -+ -+ svst1_scatter_offset (pg, u32, 0, s32); -+ svst1_scatter_offset (pg, s32, 0, s32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svst1_scatter_offset', which expects 'svuint32_t'} } */ -+ -+ svst1_scatter_offset (pg, u32, 0, u32); -+ svst1_scatter_offset (pg, s32, 0, u32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svst1_scatter_offset', which expects 'svuint32_t'} } */ -+ -+ svst1_scatter_offset (pg, u32, 0, f32); -+ svst1_scatter_offset (pg, s32, 0, f32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svst1_scatter_offset', which expects 'svuint32_t'} } */ -+ -+ svst1_scatter_offset (pg, u64, 0, s64); -+ svst1_scatter_offset (pg, s64, 0, s64); /* { dg-error {passing 'svint64_t' to argument 2 of 'svst1_scatter_offset', which expects 'svuint64_t'} } */ -+ -+ svst1_scatter_offset (pg, u64, 0, u64); -+ svst1_scatter_offset (pg, s64, 0, u64); /* { dg-error {passing 'svint64_t' to argument 2 of 'svst1_scatter_offset', which expects 'svuint64_t'} } */ -+ -+ svst1_scatter_offset (pg, u64, 0, f64); -+ svst1_scatter_offset (pg, s64, 0, f64); /* { dg-error {passing 'svint64_t' to argument 2 of 'svst1_scatter_offset', which expects 'svuint64_t'} } */ -+ -+ svst1_scatter_offset (pg, s32_ptr, s32, s32); -+ svst1_scatter_offset (pg, s32_ptr, u32, s32); -+ svst1_scatter_offset (pg, s32_ptr, f32, s32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svst1_scatter_offset', which when storing 'svint32_t' expects a vector of 32-bit integers} } */ -+ svst1_scatter_offset (pg, s32_ptr, s64, s32); /* { dg-error {passing 'svint64_t' to argument 3 of 'svst1_scatter_offset', which when storing 'svint32_t' expects a vector of 32-bit integers} } */ -+ svst1_scatter_offset (pg, s32_ptr, u64, s32); /* { dg-error {passing 'svuint64_t' to argument 3 of 'svst1_scatter_offset', which when storing 'svint32_t' expects a vector of 32-bit integers} } */ -+ svst1_scatter_offset (pg, s32_ptr, f64, s32); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svst1_scatter_offset', which when storing 'svint32_t' expects a vector of 32-bit integers} } */ -+ -+ svst1_scatter_offset (pg, u32_ptr, s32, u32); -+ svst1_scatter_offset (pg, u32_ptr, u32, u32); -+ svst1_scatter_offset (pg, u32_ptr, f32, u32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svst1_scatter_offset', which when storing 'svuint32_t' expects a vector of 32-bit integers} } */ -+ svst1_scatter_offset (pg, u32_ptr, s64, u32); /* { dg-error {passing 'svint64_t' to argument 3 of 'svst1_scatter_offset', which when storing 'svuint32_t' expects a vector of 32-bit integers} } */ -+ svst1_scatter_offset (pg, u32_ptr, u64, u32); /* { dg-error {passing 'svuint64_t' to argument 3 of 'svst1_scatter_offset', which when storing 'svuint32_t' expects a vector of 32-bit integers} } */ -+ svst1_scatter_offset (pg, u32_ptr, f64, u32); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svst1_scatter_offset', which when storing 'svuint32_t' expects a vector of 32-bit integers} } */ -+ -+ svst1_scatter_offset (pg, f32_ptr, s32, f32); -+ svst1_scatter_offset (pg, f32_ptr, u32, f32); -+ svst1_scatter_offset (pg, f32_ptr, f32, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svst1_scatter_offset', which when storing 'svfloat32_t' expects a vector of 32-bit integers} } */ -+ svst1_scatter_offset (pg, f32_ptr, s64, f32); /* { dg-error {passing 'svint64_t' to argument 3 of 'svst1_scatter_offset', which when storing 'svfloat32_t' expects a vector of 32-bit integers} } */ -+ svst1_scatter_offset (pg, f32_ptr, u64, f32); /* { dg-error {passing 'svuint64_t' to argument 3 of 'svst1_scatter_offset', which when storing 'svfloat32_t' expects a vector of 32-bit integers} } */ -+ svst1_scatter_offset (pg, f32_ptr, f64, f32); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svst1_scatter_offset', which when storing 'svfloat32_t' expects a vector of 32-bit integers} } */ -+ -+ svst1_scatter_offset (pg, s64_ptr, s32, s64); /* { dg-error {passing 'svint32_t' to argument 3 of 'svst1_scatter_offset', which when storing 'svint64_t' expects a vector of 64-bit integers} } */ -+ svst1_scatter_offset (pg, s64_ptr, u32, s64); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svst1_scatter_offset', which when storing 'svint64_t' expects a vector of 64-bit integers} } */ -+ svst1_scatter_offset (pg, s64_ptr, f32, s64); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svst1_scatter_offset', which when storing 'svint64_t' expects a vector of 64-bit integers} } */ -+ svst1_scatter_offset (pg, s64_ptr, s64, s64); -+ svst1_scatter_offset (pg, s64_ptr, u64, s64); -+ svst1_scatter_offset (pg, s64_ptr, f64, s64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svst1_scatter_offset', which when storing 'svint64_t' expects a vector of 64-bit integers} } */ -+ -+ svst1_scatter_offset (pg, u64_ptr, s32, u64); /* { dg-error {passing 'svint32_t' to argument 3 of 'svst1_scatter_offset', which when storing 'svuint64_t' expects a vector of 64-bit integers} } */ -+ svst1_scatter_offset (pg, u64_ptr, u32, u64); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svst1_scatter_offset', which when storing 'svuint64_t' expects a vector of 64-bit integers} } */ -+ svst1_scatter_offset (pg, u64_ptr, f32, u64); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svst1_scatter_offset', which when storing 'svuint64_t' expects a vector of 64-bit integers} } */ -+ svst1_scatter_offset (pg, u64_ptr, s64, u64); -+ svst1_scatter_offset (pg, u64_ptr, u64, u64); -+ svst1_scatter_offset (pg, u64_ptr, f64, u64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svst1_scatter_offset', which when storing 'svuint64_t' expects a vector of 64-bit integers} } */ -+ -+ svst1_scatter_offset (pg, f64_ptr, s32, f64); /* { dg-error {passing 'svint32_t' to argument 3 of 'svst1_scatter_offset', which when storing 'svfloat64_t' expects a vector of 64-bit integers} } */ -+ svst1_scatter_offset (pg, f64_ptr, u32, f64); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svst1_scatter_offset', which when storing 'svfloat64_t' expects a vector of 64-bit integers} } */ -+ svst1_scatter_offset (pg, f64_ptr, f32, f64); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svst1_scatter_offset', which when storing 'svfloat64_t' expects a vector of 64-bit integers} } */ -+ svst1_scatter_offset (pg, f64_ptr, s64, f64); -+ svst1_scatter_offset (pg, f64_ptr, u64, f64); -+ svst1_scatter_offset (pg, f64_ptr, f64, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svst1_scatter_offset', which when storing 'svfloat64_t' expects a vector of 64-bit integers} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_1.c -new file mode 100644 -index 000000000..a9233324c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_1.c -@@ -0,0 +1,24 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+#pragma GCC target ("arch=armv8.2-a+sve+bf16") -+ -+void -+f1 (svbool_t pg, svuint8_t u8, svuint16_t u16, svint32_t s32, -+ svbfloat16_t bf16, svfloat32_t f32, svfloat64_t f64, bfloat16_t bf) -+{ -+ svbfmmla (f32, bf16); /* { dg-error {too few arguments to function 'svbfmmla'} } */ -+ svbfmmla (f32, bf16, bf16, 0); /* { dg-error {too many arguments to function 'svbfmmla'} } */ -+ svbfmmla (0, bf16, bf16); /* { dg-error {passing 'int' to argument 1 of 'svbfmmla', which expects an SVE vector type} } */ -+ svbfmmla (pg, bf16, bf16); /* { dg-error {'svbfmmla' has no form that takes 'svbool_t' arguments} } */ -+ svbfmmla (u8, bf16, bf16); /* { dg-error {'svbfmmla' has no form that takes 'svuint8_t' arguments} } */ -+ svbfmmla (u16, bf16, bf16); /* { dg-error {'svbfmmla' has no form that takes 'svuint16_t' arguments} } */ -+ svbfmmla (f64, bf16, bf16); /* { dg-error {'svbfmmla' has no form that takes 'svfloat64_t' arguments} } */ -+ svbfmmla (f32, bf16, bf16); -+ svbfmmla (f32, 0, bf16); /* { dg-error {passing 'int' to argument 2 of 'svbfmmla', which expects 'svbfloat16_t'} } */ -+ svbfmmla (f32, f32, bf16); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svbfmmla', which expects 'svbfloat16_t'} } */ -+ svbfmmla (f32, bf16, 0); /* { dg-error {passing 'int' to argument 3 of 'svbfmmla', which expects 'svbfloat16_t'} } */ -+ svbfmmla (f32, bf16, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svbfmmla', which expects 'svbfloat16_t'} } */ -+ svbfmmla (f32, bf16, bf); /* { dg-error {passing 'bfloat16_t'[^\n]* to argument 3 of 'svbfmmla', which expects 'svbfloat16_t'} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lane_1.c -new file mode 100644 -index 000000000..23f027f2d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lane_1.c -@@ -0,0 +1,30 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+#pragma GCC target ("arch=armv8.2-a+sve+bf16") -+ -+void -+f1 (svbool_t pg, svuint8_t u8, svuint16_t u16, svint32_t s32, -+ svbfloat16_t bf16, svfloat32_t f32, svfloat64_t f64, int i) -+{ -+ svbfmlalb_lane (f32, bf16, bf16); /* { dg-error {too few arguments to function 'svbfmlalb_lane'} } */ -+ svbfmlalb_lane (f32, bf16, bf16, 0, 0); /* { dg-error {too many arguments to function 'svbfmlalb_lane'} } */ -+ svbfmlalb_lane (0, bf16, bf16, 0); /* { dg-error {passing 'int' to argument 1 of 'svbfmlalb_lane', which expects an SVE vector type} } */ -+ svbfmlalb_lane (pg, bf16, bf16, 0); /* { dg-error {'svbfmlalb_lane' has no form that takes 'svbool_t' arguments} } */ -+ svbfmlalb_lane (u8, bf16, bf16, 0); /* { dg-error {'svbfmlalb_lane' has no form that takes 'svuint8_t' arguments} } */ -+ svbfmlalb_lane (u16, bf16, bf16, 0); /* { dg-error {'svbfmlalb_lane' has no form that takes 'svuint16_t' arguments} } */ -+ svbfmlalb_lane (f64, bf16, bf16, 0); /* { dg-error {'svbfmlalb_lane' has no form that takes 'svfloat64_t' arguments} } */ -+ svbfmlalb_lane (f32, bf16, bf16, 0); -+ svbfmlalb_lane (f32, 0, bf16, 0); /* { dg-error {passing 'int' to argument 2 of 'svbfmlalb_lane', which expects 'svbfloat16_t'} } */ -+ svbfmlalb_lane (f32, f32, bf16, 0); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svbfmlalb_lane', which expects 'svbfloat16_t'} } */ -+ svbfmlalb_lane (f32, bf16, 0, 0); /* { dg-error {passing 'int' to argument 3 of 'svbfmlalb_lane', which expects 'svbfloat16_t'} } */ -+ svbfmlalb_lane (f32, bf16, f32, 0); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svbfmlalb_lane', which expects 'svbfloat16_t'} } */ -+ svbfmlalb_lane (f32, bf16, bf16, s32); /* { dg-error {argument 4 of 'svbfmlalb_lane' must be an integer constant expression} } */ -+ svbfmlalb_lane (f32, bf16, bf16, i); /* { dg-error {argument 4 of 'svbfmlalb_lane' must be an integer constant expression} } */ -+ -+ svbfmlalb_lane (f32, bf16, bf16, 0); -+ svbfmlalb_lane (f32, bf16, bf16, 7); -+ svbfmlalb_lane (f32, bf16, bf16, 8); /* { dg-error {passing 8 to argument 4 of 'svbfmlalb_lane', which expects a value in the range \[0, 7\]} } */ -+ svbfmlalb_lane (f32, bf16, bf16, -1); /* { dg-error {passing -1 to argument 4 of 'svbfmlalb_lane', which expects a value in the range \[0, 7\]} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lanex2_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lanex2_1.c -new file mode 100644 -index 000000000..4755ca79a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_lanex2_1.c -@@ -0,0 +1,30 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+#pragma GCC target ("arch=armv8.2-a+sve+bf16") -+ -+void -+f1 (svbool_t pg, svuint8_t u8, svuint16_t u16, svint32_t s32, -+ svbfloat16_t bf16, svfloat32_t f32, svfloat64_t f64, int i) -+{ -+ svbfdot_lane (f32, bf16, bf16); /* { dg-error {too few arguments to function 'svbfdot_lane'} } */ -+ svbfdot_lane (f32, bf16, bf16, 0, 0); /* { dg-error {too many arguments to function 'svbfdot_lane'} } */ -+ svbfdot_lane (0, bf16, bf16, 0); /* { dg-error {passing 'int' to argument 1 of 'svbfdot_lane', which expects an SVE vector type} } */ -+ svbfdot_lane (pg, bf16, bf16, 0); /* { dg-error {'svbfdot_lane' has no form that takes 'svbool_t' arguments} } */ -+ svbfdot_lane (u8, bf16, bf16, 0); /* { dg-error {'svbfdot_lane' has no form that takes 'svuint8_t' arguments} } */ -+ svbfdot_lane (u16, bf16, bf16, 0); /* { dg-error {'svbfdot_lane' has no form that takes 'svuint16_t' arguments} } */ -+ svbfdot_lane (f64, bf16, bf16, 0); /* { dg-error {'svbfdot_lane' has no form that takes 'svfloat64_t' arguments} } */ -+ svbfdot_lane (f32, bf16, bf16, 0); -+ svbfdot_lane (f32, 0, bf16, 0); /* { dg-error {passing 'int' to argument 2 of 'svbfdot_lane', which expects 'svbfloat16_t'} } */ -+ svbfdot_lane (f32, f32, bf16, 0); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svbfdot_lane', which expects 'svbfloat16_t'} } */ -+ svbfdot_lane (f32, bf16, 0, 0); /* { dg-error {passing 'int' to argument 3 of 'svbfdot_lane', which expects 'svbfloat16_t'} } */ -+ svbfdot_lane (f32, bf16, f32, 0); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svbfdot_lane', which expects 'svbfloat16_t'} } */ -+ svbfdot_lane (f32, bf16, bf16, s32); /* { dg-error {argument 4 of 'svbfdot_lane' must be an integer constant expression} } */ -+ svbfdot_lane (f32, bf16, bf16, i); /* { dg-error {argument 4 of 'svbfdot_lane' must be an integer constant expression} } */ -+ -+ svbfdot_lane (f32, bf16, bf16, 0); -+ svbfdot_lane (f32, bf16, bf16, 3); -+ svbfdot_lane (f32, bf16, bf16, 4); /* { dg-error {passing 4 to argument 4 of 'svbfdot_lane', which expects a value in the range \[0, 3\]} } */ -+ svbfdot_lane (f32, bf16, bf16, -1); /* { dg-error {passing -1 to argument 4 of 'svbfdot_lane', which expects a value in the range \[0, 3\]} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_opt_n_1.c -new file mode 100644 -index 000000000..2d09a8eeb ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_opt_n_1.c -@@ -0,0 +1,24 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+#pragma GCC target ("arch=armv8.2-a+sve+bf16") -+ -+void -+f1 (svbool_t pg, svuint8_t u8, svuint16_t u16, svint32_t s32, -+ svbfloat16_t bf16, svfloat32_t f32, svfloat64_t f64, bfloat16_t bf) -+{ -+ svbfdot (f32, bf16); /* { dg-error {too few arguments to function 'svbfdot'} } */ -+ svbfdot (f32, bf16, bf16, 0); /* { dg-error {too many arguments to function 'svbfdot'} } */ -+ svbfdot (0, bf16, bf16); /* { dg-error {passing 'int' to argument 1 of 'svbfdot', which expects an SVE vector type} } */ -+ svbfdot (pg, bf16, bf16); /* { dg-error {'svbfdot' has no form that takes 'svbool_t' arguments} } */ -+ svbfdot (u8, bf16, bf16); /* { dg-error {'svbfdot' has no form that takes 'svuint8_t' arguments} } */ -+ svbfdot (u16, bf16, bf16); /* { dg-error {'svbfdot' has no form that takes 'svuint16_t' arguments} } */ -+ svbfdot (f64, bf16, bf16); /* { dg-error {'svbfdot' has no form that takes 'svfloat64_t' arguments} } */ -+ svbfdot (f32, bf16, bf16); -+ svbfdot (f32, 0, bf16); /* { dg-error {passing 'int' to argument 2 of 'svbfdot', which expects 'svbfloat16_t'} } */ -+ svbfdot (f32, f32, bf16); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svbfdot', which expects 'svbfloat16_t'} } */ -+ svbfdot (f32, bf16, 0); /* { dg-error {invalid conversion to type 'bfloat16_t'} } */ -+ svbfdot (f32, bf16, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svbfdot', which expects 'svbfloat16_t'} } */ -+ svbfdot (f32, bf16, bf); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_intq_uintq_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_intq_uintq_lane_1.c -new file mode 100644 -index 000000000..600be05a8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_intq_uintq_lane_1.c -@@ -0,0 +1,32 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-march=armv8.6-a+sve+i8mm" } */ -+ -+#include -+ -+void -+f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16, -+ svint32_t s32, svuint32_t u32, svint64_t s64, svuint64_t u64, -+ svfloat32_t f32, int i) -+{ -+ svsudot_lane (s32, s8, u8); /* { dg-error {too few arguments to function 'svsudot_lane'} } */ -+ svsudot_lane (s32, s8, u8, 0, 0); /* { dg-error {too many arguments to function 'svsudot_lane'} } */ -+ svsudot_lane (0, s8, u8, 0); /* { dg-error {passing 'int' to argument 1 of 'svsudot_lane', which expects an SVE vector type} } */ -+ svsudot_lane (pg, s8, u8, 0); /* { dg-error {'svsudot_lane' has no form that takes 'svbool_t' arguments} } */ -+ svsudot_lane (u8, s8, u8, 0); /* { dg-error {'svsudot_lane' has no form that takes 'svuint8_t' arguments} } */ -+ svsudot_lane (f32, s8, u8, 0); /* { dg-error {'svsudot_lane' has no form that takes 'svfloat32_t' arguments} } */ -+ svsudot_lane (u32, s8, u8, 0); /* { dg-error {'svsudot_lane' has no form that takes 'svuint32_t' arguments} } */ -+ svsudot_lane (s32, s8, u8, 0); -+ svsudot_lane (s32, 0, u8, 0); /* { dg-error {passing 'int' to argument 2 of 'svsudot_lane', which expects an SVE vector type} } */ -+ svsudot_lane (s32, s8, 0, 0); /* { dg-error {passing 'int' to argument 3 of 'svsudot_lane', which expects an SVE vector type} } */ -+ -+ svsudot_lane (s32, s8, u8, 0); -+ svsudot_lane (s32, u8, u8, 0); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svsudot_lane', which expects a vector of signed integers} } */ -+ svsudot_lane (s32, s8, s8, 0); /* { dg-error {passing 'svint8_t' to argument 3 of 'svsudot_lane', which expects a vector of unsigned integers} } */ -+ svsudot_lane (s32, s32, s32, 0); /* { dg-error {passing 'svint32_t' instead of the expected 'svint8_t' to argument 2 of 'svsudot_lane', after passing 'svint32_t' to argument 1} } */ -+ -+ svsudot_lane (s32, s8, u8, i); /* { dg-error {argument 4 of 'svsudot_lane' must be an integer constant expression} } */ -+ svsudot_lane (s32, s8, u8, 0); -+ svsudot_lane (s32, s8, u8, 3); -+ svsudot_lane (s32, s8, u8, 4); /* { dg-error {passing 4 to argument 4 of 'svsudot_lane', which expects a value in the range \[0, 3\]} } */ -+ svsudot_lane (s32, s8, u8, -1); /* { dg-error {passing -1 to argument 4 of 'svsudot_lane', which expects a value in the range \[0, 3\]} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_intq_uintq_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_intq_uintq_opt_n_1.c -new file mode 100644 -index 000000000..f95ac582f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_intq_uintq_opt_n_1.c -@@ -0,0 +1,37 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-march=armv8.6-a+sve+i8mm" } */ -+ -+#include -+ -+svuint32_t -+f1 (svint32_t s32, svuint8_t u8, svint8_t s8, svuint32_t u32) -+{ -+ svsudot_s32 (s32); /* { dg-error {too few arguments to function 'svsudot_s32'} } */ -+ svsudot_s32 (s32, s8, u8, u32); /* { dg-error {too many arguments to function 'svsudot_s32'} } */ -+ svsudot_s32 (s32, s32, u8); /* { dg-error {incompatible type for argument 2 of 'svsudot_s32'} } */ -+ svsudot_s32 (s32, u8, u8); /* { dg-error {incompatible type for argument 2 of 'svsudot_s32'} } */ -+ svsudot_s32 (s32, s8, u32); /* { dg-error {incompatible type for argument 3 of 'svsudot_s32'} } */ -+ svsudot_s32 (s32, s8, s8); /* { dg-error {incompatible type for argument 3 of 'svsudot_s32'} } */ -+ svsudot_s32 (s32, s8, 0); /* { dg-error {incompatible type for argument 3 of 'svsudot_s32'} } */ -+ svsudot_s32 (s32, s8, u8); -+ return svsudot_s32 (s32, s8, u8); /* { dg-error {incompatible types when returning type 'svint32_t' but 'svuint32_t' was expected} } */ -+} -+ -+void -+f2 (svbool_t pg, svint8_t s8, svuint8_t u8, svuint32_t u32, -+ svint32_t s32, svfloat32_t f32) -+{ -+ svsudot (s32, s8); /* { dg-error {too few arguments to function 'svsudot'} } */ -+ svsudot (s32, s8, u8, u8); /* { dg-error {too many arguments to function 'svsudot'} } */ -+ svsudot (0, s8, u8); /* { dg-error {passing 'int' to argument 1 of 'svsudot', which expects an SVE vector type} } */ -+ svsudot (pg, s8, u8); /* { dg-error {'svsudot' has no form that takes 'svbool_t' arguments} } */ -+ svsudot (u8, s8, u8); /* { dg-error {'svsudot' has no form that takes 'svuint8_t' arguments} } */ -+ svsudot (f32, s8, u8); /* { dg-error {'svsudot' has no form that takes 'svfloat32_t' arguments} } */ -+ svsudot (s32, s8, u8); -+ svsudot (s32, 0, u8); /* { dg-error {passing 'int' to argument 2 of 'svsudot', which expects an SVE vector type} } */ -+ svsudot (s32, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svsudot', which expects a vector of signed integers} } */ -+ svsudot (s32, s8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svsudot', which expects a vector of unsigned integers} } */ -+ svsudot (s32, s8, 0); -+ svsudot (s32, s8, u8); -+ svsudot (s32, u32, u32); /* { dg-error {passing 'svuint32_t' to argument 2 of 'svsudot', which expects a vector of signed integers} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_1.c -new file mode 100644 -index 000000000..bbd1f91be ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_1.c -@@ -0,0 +1,35 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+void -+f1 (svbool_t pg, svfloat16_t f16, svfloat32_t f32, svfloat64_t f64, -+ svint32_t s32, int i) -+{ -+ svmla_lane (f32, f32, f32); /* { dg-error {too few arguments to function 'svmla_lane'} } */ -+ svmla_lane (f32, f32, f32, 0, 0); /* { dg-error {too many arguments to function 'svmla_lane'} } */ -+ svmla_lane (pg, pg, pg, 0); /* { dg-error {'svmla_lane' has no form that takes 'svbool_t' arguments} } */ -+ svmla_lane (s32, s32, s32, 0); /* { dg-error {'svmla_lane' has no form that takes 'svint32_t' arguments} } */ -+ svmla_lane (1, f32, f32, 0); /* { dg-error {passing 'int' to argument 1 of 'svmla_lane', which expects an SVE vector type} } */ -+ svmla_lane (f32, 1, f32, 0); /* { dg-error {passing 'int' to argument 2 of 'svmla_lane', which expects an SVE vector type} } */ -+ svmla_lane (f32, f32, 1, 0); /* { dg-error {passing 'int' to argument 3 of 'svmla_lane', which expects an SVE vector type} } */ -+ svmla_lane (f32, f64, f32, 0); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svmla_lane', but previous arguments had type 'svfloat32_t'} } */ -+ svmla_lane (f32, f32, f64, 0); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svmla_lane', but previous arguments had type 'svfloat32_t'} } */ -+ svmla_lane (f32, f32, f32, s32); /* { dg-error {argument 4 of 'svmla_lane' must be an integer constant expression} } */ -+ svmla_lane (f32, f32, f32, i); /* { dg-error {argument 4 of 'svmla_lane' must be an integer constant expression} } */ -+ -+ svmla_lane (f16, f16, f16, 0); -+ svmla_lane (f16, f16, f16, 7); -+ svmla_lane (f16, f16, f16, 8); /* { dg-error {passing 8 to argument 4 of 'svmla_lane', which expects a value in the range \[0, 7\]} } */ -+ svmla_lane (f16, f16, f16, -1); /* { dg-error {passing -1 to argument 4 of 'svmla_lane', which expects a value in the range \[0, 7\]} } */ -+ -+ svmla_lane (f32, f32, f32, 0); -+ svmla_lane (f32, f32, f32, 3); -+ svmla_lane (f32, f32, f32, 4); /* { dg-error {passing 4 to argument 4 of 'svmla_lane', which expects a value in the range \[0, 3\]} } */ -+ svmla_lane (f32, f32, f32, -1); /* { dg-error {passing -1 to argument 4 of 'svmla_lane', which expects a value in the range \[0, 3\]} } */ -+ -+ svmla_lane (f64, f64, f64, 0); -+ svmla_lane (f64, f64, f64, 1); -+ svmla_lane (f64, f64, f64, 2); /* { dg-error {passing 2 to argument 4 of 'svmla_lane', which expects a value in the range \[0, 1\]} } */ -+ svmla_lane (f64, f64, f64, -1); /* { dg-error {passing -1 to argument 4 of 'svmla_lane', which expects a value in the range \[0, 1\]} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_rotate_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_rotate_1.c -new file mode 100644 -index 000000000..bccc6c7e2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_rotate_1.c -@@ -0,0 +1,38 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+void -+f1 (svbool_t pg, svfloat16_t f16, svfloat32_t f32, svfloat64_t f64, -+ svint32_t s32, int i) -+{ -+ svcmla_lane (f32, f32, f32, 0); /* { dg-error {too few arguments to function 'svcmla_lane'} } */ -+ svcmla_lane (f32, f32, f32, 0, 90, 90); /* { dg-error {too many arguments to function 'svcmla_lane'} } */ -+ svcmla_lane (pg, pg, pg, 0, 90); /* { dg-error {'svcmla_lane' has no form that takes 'svbool_t' arguments} } */ -+ svcmla_lane (s32, s32, s32, 0, 90); /* { dg-error {'svcmla_lane' has no form that takes 'svint32_t' arguments} } */ -+ svcmla_lane (f64, f64, f64, 0, 90); /* { dg-error {'svcmla_lane' has no form that takes 'svfloat64_t' arguments} } */ -+ svcmla_lane (1, f32, f32, 0, 90); /* { dg-error {passing 'int' to argument 1 of 'svcmla_lane', which expects an SVE vector type} } */ -+ svcmla_lane (f32, 1, f32, 0, 90); /* { dg-error {passing 'int' to argument 2 of 'svcmla_lane', which expects an SVE vector type} } */ -+ svcmla_lane (f32, f32, 1, 0, 90); /* { dg-error {passing 'int' to argument 3 of 'svcmla_lane', which expects an SVE vector type} } */ -+ svcmla_lane (f32, f64, f32, 0, 90); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svcmla_lane', but previous arguments had type 'svfloat32_t'} } */ -+ svcmla_lane (f32, f32, f64, 0, 90); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svcmla_lane', but previous arguments had type 'svfloat32_t'} } */ -+ svcmla_lane (f32, f32, f32, s32, 0); /* { dg-error {argument 4 of 'svcmla_lane' must be an integer constant expression} } */ -+ svcmla_lane (f32, f32, f32, i, 0); /* { dg-error {argument 4 of 'svcmla_lane' must be an integer constant expression} } */ -+ -+ svcmla_lane (f16, f16, f16, 0, 0); -+ svcmla_lane (f16, f16, f16, 3, 0); -+ svcmla_lane (f16, f16, f16, 4, 0); /* { dg-error {passing 4 to argument 4 of 'svcmla_lane', which expects a value in the range \[0, 3\]} } */ -+ svcmla_lane (f16, f16, f16, -1, 0); /* { dg-error {passing -1 to argument 4 of 'svcmla_lane', which expects a value in the range \[0, 3\]} } */ -+ -+ svcmla_lane (f32, f32, f32, 0, 0); -+ svcmla_lane (f32, f32, f32, 1, 0); -+ svcmla_lane (f32, f32, f32, 2, 0); /* { dg-error {passing 2 to argument 4 of 'svcmla_lane', which expects a value in the range \[0, 1\]} } */ -+ svcmla_lane (f32, f32, f32, -1, 0); /* { dg-error {passing -1 to argument 4 of 'svcmla_lane', which expects a value in the range \[0, 1\]} } */ -+ -+ svcmla_lane (f32, f32, f32, 0, -90); /* { dg-error {passing -90 to argument 5 of 'svcmla_lane', which expects 0, 90, 180 or 270} } */ -+ svcmla_lane (f32, f32, f32, 0, 0); -+ svcmla_lane (f32, f32, f32, 0, 1); /* { dg-error {passing 1 to argument 5 of 'svcmla_lane', which expects 0, 90, 180 or 270} } */ -+ svcmla_lane (f32, f32, f32, 0, 90); -+ svcmla_lane (f32, f32, f32, 0, 180); -+ svcmla_lane (f32, f32, f32, 0, 270); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_opt_n_1.c -new file mode 100644 -index 000000000..c4a80e9da ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_opt_n_1.c -@@ -0,0 +1,34 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+void -+f1 (svbool_t pg, svint8_t s8, svuint8_t u8, -+ svint16_t s16, svuint16_t u16, svfloat16_t f16) -+{ -+ svmla_x (pg, u8, u8); /* { dg-error {too few arguments to function 'svmla_x'} } */ -+ svmla_x (pg, u8, u8, u8, u8); /* { dg-error {too many arguments to function 'svmla_x'} } */ -+ svmla_x (u8, u8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svmla_x', which expects 'svbool_t'} } */ -+ svmla_x (pg, pg, pg, pg); /* { dg-error {'svmla_x' has no form that takes 'svbool_t' arguments} } */ -+ svmla_x (pg, 1, u8, u8); /* { dg-error {passing 'int' to argument 2 of 'svmla_x', which expects an SVE vector type} } */ -+ svmla_x (pg, u8, s8, u8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ -+ svmla_x (pg, u8, u8, u8); -+ svmla_x (pg, u8, s16, u8); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ -+ svmla_x (pg, u8, u16, u8); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ -+ svmla_x (pg, u8, f16, u8); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ -+ svmla_x (pg, u8, pg, u8); /* { dg-error {passing 'svbool_t' to argument 3 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ -+ svmla_x (pg, u8, 0, u8); /* { dg-error {passing 'int' to argument 3 of 'svmla_x', which expects an SVE vector type} } */ -+ svmla_x (pg, u8, u8, s8); /* { dg-error {passing 'svint8_t' to argument 4 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ -+ svmla_x (pg, u8, u8, s16); /* { dg-error {passing 'svint16_t' to argument 4 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ -+ svmla_x (pg, u8, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 4 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ -+ svmla_x (pg, u8, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 4 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ -+ svmla_x (pg, u8, u8, pg); /* { dg-error {passing 'svbool_t' to argument 4 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */ -+ svmla_x (pg, u8, u8, 0); -+ -+ svmla_x (pg, f16, s16, f16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmla_x', but previous arguments had type 'svfloat16_t'} } */ -+ svmla_x (pg, f16, u16, f16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmla_x', but previous arguments had type 'svfloat16_t'} } */ -+ svmla_x (pg, f16, f16, s16); /* { dg-error {passing 'svint16_t' to argument 4 of 'svmla_x', but previous arguments had type 'svfloat16_t'} } */ -+ svmla_x (pg, f16, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 4 of 'svmla_x', but previous arguments had type 'svfloat16_t'} } */ -+ svmla_x (pg, f16, f16, f16); -+ svmla_x (pg, f16, f16, 1); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_lane_1.c -new file mode 100644 -index 000000000..e81552b64 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_lane_1.c -@@ -0,0 +1,63 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+void -+f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16, -+ svint32_t s32, svuint32_t u32, svint64_t s64, svuint64_t u64, -+ svfloat32_t f32, int i) -+{ -+ svdot_lane (u32, u8, u8); /* { dg-error {too few arguments to function 'svdot_lane'} } */ -+ svdot_lane (u32, u8, u8, 0, 0); /* { dg-error {too many arguments to function 'svdot_lane'} } */ -+ svdot_lane (0, u8, u8, 0); /* { dg-error {passing 'int' to argument 1 of 'svdot_lane', which expects an SVE vector type} } */ -+ svdot_lane (pg, u8, u8, 0); /* { dg-error {'svdot_lane' has no form that takes 'svbool_t' arguments} } */ -+ svdot_lane (u8, u8, u8, 0); /* { dg-error {'svdot_lane' has no form that takes 'svuint8_t' arguments} } */ -+ svdot_lane (f32, u8, u8, 0); /* { dg-error {'svdot_lane' has no form that takes 'svfloat32_t' arguments} } */ -+ svdot_lane (u32, u8, u8, 0); -+ svdot_lane (u32, 0, u8, 0); /* { dg-error {passing 'int' to argument 2 of 'svdot_lane', which expects an SVE vector type} } */ -+ svdot_lane (u32, u8, 0, 0); /* { dg-error {passing 'int' to argument 3 of 'svdot_lane', which expects an SVE vector type} } */ -+ -+ svdot_lane (s32, s8, s8, 0); -+ svdot_lane (s32, u8, s8, 0); /* { dg-error {arguments 1 and 2 of 'svdot_lane' must have the same signedness, but the values passed here have type 'svint32_t' and 'svuint8_t' respectively} } */ -+ svdot_lane (s32, s8, u8, 0); /* { dg-error {arguments 1 and 3 of 'svdot_lane' must have the same signedness, but the values passed here have type 'svint32_t' and 'svuint8_t' respectively} } */ -+ svdot_lane (s32, s32, s32, 0); /* { dg-error {passing 'svint32_t' instead of the expected 'svint8_t' to argument 2 of 'svdot_lane', after passing 'svint32_t' to argument 1} } */ -+ -+ svdot_lane (u32, u8, u8, 0); -+ svdot_lane (u32, s8, u8, 0); /* { dg-error {arguments 1 and 2 of 'svdot_lane' must have the same signedness, but the values passed here have type 'svuint32_t' and 'svint8_t' respectively} } */ -+ svdot_lane (u32, u8, s8, 0); /* { dg-error {arguments 1 and 3 of 'svdot_lane' must have the same signedness, but the values passed here have type 'svuint32_t' and 'svint8_t' respectively} } */ -+ svdot_lane (u32, u32, u32, 0); /* { dg-error {passing 'svuint32_t' instead of the expected 'svuint8_t' to argument 2 of 'svdot_lane', after passing 'svuint32_t' to argument 1} } */ -+ -+ svdot_lane (s64, s16, s16, 0); -+ svdot_lane (s64, u16, s16, 0); /* { dg-error {arguments 1 and 2 of 'svdot_lane' must have the same signedness, but the values passed here have type 'svint64_t' and 'svuint16_t' respectively} } */ -+ svdot_lane (s64, s16, u16, 0); /* { dg-error {arguments 1 and 3 of 'svdot_lane' must have the same signedness, but the values passed here have type 'svint64_t' and 'svuint16_t' respectively} } */ -+ svdot_lane (s64, s64, s64, 0); /* { dg-error {passing 'svint64_t' instead of the expected 'svint16_t' to argument 2 of 'svdot_lane', after passing 'svint64_t' to argument 1} } */ -+ -+ svdot_lane (u64, u16, u16, 0); -+ svdot_lane (u64, s16, u16, 0); /* { dg-error {arguments 1 and 2 of 'svdot_lane' must have the same signedness, but the values passed here have type 'svuint64_t' and 'svint16_t' respectively} } */ -+ svdot_lane (u64, u16, s16, 0); /* { dg-error {arguments 1 and 3 of 'svdot_lane' must have the same signedness, but the values passed here have type 'svuint64_t' and 'svint16_t' respectively} } */ -+ svdot_lane (u64, u64, u64, 0); /* { dg-error {passing 'svuint64_t' instead of the expected 'svuint16_t' to argument 2 of 'svdot_lane', after passing 'svuint64_t' to argument 1} } */ -+ -+ svdot_lane (s32, s8, s8, i); /* { dg-error {argument 4 of 'svdot_lane' must be an integer constant expression} } */ -+ svdot_lane (s32, s8, s8, 0); -+ svdot_lane (s32, s8, s8, 3); -+ svdot_lane (s32, s8, s8, 4); /* { dg-error {passing 4 to argument 4 of 'svdot_lane', which expects a value in the range \[0, 3\]} } */ -+ svdot_lane (s32, s8, s8, -1); /* { dg-error {passing -1 to argument 4 of 'svdot_lane', which expects a value in the range \[0, 3\]} } */ -+ -+ svdot_lane (u32, u8, u8, i); /* { dg-error {argument 4 of 'svdot_lane' must be an integer constant expression} } */ -+ svdot_lane (u32, u8, u8, 0); -+ svdot_lane (u32, u8, u8, 3); -+ svdot_lane (u32, u8, u8, 4); /* { dg-error {passing 4 to argument 4 of 'svdot_lane', which expects a value in the range \[0, 3\]} } */ -+ svdot_lane (u32, u8, u8, -1); /* { dg-error {passing -1 to argument 4 of 'svdot_lane', which expects a value in the range \[0, 3\]} } */ -+ -+ svdot_lane (s64, s16, s16, i); /* { dg-error {argument 4 of 'svdot_lane' must be an integer constant expression} } */ -+ svdot_lane (s64, s16, s16, 0); -+ svdot_lane (s64, s16, s16, 1); -+ svdot_lane (s64, s16, s16, 2); /* { dg-error {passing 2 to argument 4 of 'svdot_lane', which expects a value in the range \[0, 1\]} } */ -+ svdot_lane (s64, s16, s16, -1); /* { dg-error {passing -1 to argument 4 of 'svdot_lane', which expects a value in the range \[0, 1\]} } */ -+ -+ svdot_lane (u64, u16, u16, i); /* { dg-error {argument 4 of 'svdot_lane' must be an integer constant expression} } */ -+ svdot_lane (u64, u16, u16, 0); -+ svdot_lane (u64, u16, u16, 1); -+ svdot_lane (u64, u16, u16, 2); /* { dg-error {passing 2 to argument 4 of 'svdot_lane', which expects a value in the range \[0, 1\]} } */ -+ svdot_lane (u64, u16, u16, -1); /* { dg-error {passing -1 to argument 4 of 'svdot_lane', which expects a value in the range \[0, 1\]} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_opt_n_1.c -new file mode 100644 -index 000000000..b41e6fcce ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_opt_n_1.c -@@ -0,0 +1,15 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+svint32_t -+f1 (svuint32_t u32, svuint8_t u8, svint8_t s8) -+{ -+ svdot_u32 (u32); /* { dg-error {too few arguments to function 'svdot_u32'} } */ -+ svdot_u32 (u32, u8, u8, u32); /* { dg-error {too many arguments to function 'svdot_u32'} } */ -+ svdot_u32 (u32, u32, u8); /* { dg-error {incompatible type for argument 2 of 'svdot_u32'} } */ -+ svdot_u32 (u32, s8, u8); /* { dg-error {incompatible type for argument 2 of 'svdot_u32'} } */ -+ svdot_u32 (u32, u8, u32); /* { dg-error {incompatible type for argument 3 of 'svdot_u32'} } */ -+ svdot_u32 (u32, u8, s8); /* { dg-error {incompatible type for argument 3 of 'svdot_u32'} } */ -+ return svdot_u32 (u32, u8, u8); /* { dg-error {incompatible types when returning type 'svuint32_t' but 'svint32_t' was expected} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_opt_n_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_opt_n_2.c -new file mode 100644 -index 000000000..fee4096fe ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_qq_opt_n_2.c -@@ -0,0 +1,21 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+void -+f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svuint32_t u32, -+ svfloat32_t f32) -+{ -+ svdot (u32, u8); /* { dg-error {too few arguments to function 'svdot'} } */ -+ svdot (u32, u8, u8, u8); /* { dg-error {too many arguments to function 'svdot'} } */ -+ svdot (0, u8, u8); /* { dg-error {passing 'int' to argument 1 of 'svdot', which expects an SVE vector type} } */ -+ svdot (pg, u8, u8); /* { dg-error {'svdot' has no form that takes 'svbool_t' arguments} } */ -+ svdot (u8, u8, u8); /* { dg-error {'svdot' has no form that takes 'svuint8_t' arguments} } */ -+ svdot (f32, u8, u8); /* { dg-error {'svdot' has no form that takes 'svfloat32_t' arguments} } */ -+ svdot (u32, u8, u8); -+ svdot (u32, 0, u8); /* { dg-error {passing 'int' to argument 2 of 'svdot', which expects an SVE vector type} } */ -+ svdot (u32, s8, u8); /* { dg-error {arguments 1 and 2 of 'svdot' must have the same signedness, but the values passed here have type 'svuint32_t' and 'svint8_t' respectively} } */ -+ svdot (u32, u8, 0); -+ svdot (u32, u8, s8); /* { dg-error {arguments 1 and 3 of 'svdot' must have the same signedness, but the values passed here have type 'svuint32_t' and 'svint8_t' respectively} } */ -+ svdot (u32, u32, u32); /* { dg-error {passing 'svuint32_t' instead of the expected 'svuint8_t' to argument 2 of 'svdot', after passing 'svuint32_t' to argument 1} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_rotate_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_rotate_1.c -new file mode 100644 -index 000000000..f340e3d1e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_rotate_1.c -@@ -0,0 +1,26 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+void -+f1 (svbool_t pg, svfloat32_t f32, svfloat64_t f64, svint32_t s32, int i) -+{ -+ svcmla_x (pg, f32, f32, f32); /* { dg-error {too few arguments to function 'svcmla_x'} } */ -+ svcmla_x (pg, f32, f32, f32, 90, 90); /* { dg-error {too many arguments to function 'svcmla_x'} } */ -+ svcmla_x (f32, f32, f32, f32, 90); /* { dg-error {passing 'svfloat32_t' to argument 1 of 'svcmla_x', which expects 'svbool_t'} } */ -+ svcmla_x (pg, pg, pg, pg, 90); /* { dg-error {'svcmla_x' has no form that takes 'svbool_t' arguments} } */ -+ svcmla_x (pg, s32, s32, s32, 90); /* { dg-error {'svcmla_x' has no form that takes 'svint32_t' arguments} } */ -+ svcmla_x (pg, 1, f32, f32, 90); /* { dg-error {passing 'int' to argument 2 of 'svcmla_x', which expects an SVE vector type} } */ -+ svcmla_x (pg, f32, 1, f32, 90); /* { dg-error {passing 'int' to argument 3 of 'svcmla_x', which expects an SVE vector type} } */ -+ svcmla_x (pg, f32, f32, 1, 90); /* { dg-error {passing 'int' to argument 4 of 'svcmla_x', which expects an SVE vector type} } */ -+ svcmla_x (pg, f32, f64, f32, 90); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svcmla_x', but previous arguments had type 'svfloat32_t'} } */ -+ svcmla_x (pg, f32, f32, f64, 90); /* { dg-error {passing 'svfloat64_t' to argument 4 of 'svcmla_x', but previous arguments had type 'svfloat32_t'} } */ -+ svcmla_x (pg, f32, f32, f32, s32); /* { dg-error {argument 5 of 'svcmla_x' must be an integer constant expression} } */ -+ svcmla_x (pg, f32, f32, f32, i); /* { dg-error {argument 5 of 'svcmla_x' must be an integer constant expression} } */ -+ svcmla_x (pg, f32, f32, f32, -90); /* { dg-error {passing -90 to argument 5 of 'svcmla_x', which expects 0, 90, 180 or 270} } */ -+ svcmla_x (pg, f32, f32, f32, 0); -+ svcmla_x (pg, f32, f32, f32, 1); /* { dg-error {passing 1 to argument 5 of 'svcmla_x', which expects 0, 90, 180 or 270} } */ -+ svcmla_x (pg, f32, f32, f32, 90); -+ svcmla_x (pg, f32, f32, f32, 180); -+ svcmla_x (pg, f32, f32, f32, 270); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uintq_intq_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uintq_intq_1.c -new file mode 100644 -index 000000000..f52fb39bf ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uintq_intq_1.c -@@ -0,0 +1,37 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-march=armv8.6-a+sve+i8mm" } */ -+ -+#include -+ -+svuint32_t -+f1 (svint32_t s32, svuint8_t u8, svint8_t s8, svuint32_t u32) -+{ -+ svusmmla_s32 (s32); /* { dg-error {too few arguments to function 'svusmmla_s32'} } */ -+ svusmmla_s32 (s32, u8, s8, u32); /* { dg-error {too many arguments to function 'svusmmla_s32'} } */ -+ svusmmla_s32 (s32, u32, s8); /* { dg-error {incompatible type for argument 2 of 'svusmmla_s32'} } */ -+ svusmmla_s32 (s32, s8, s8); /* { dg-error {incompatible type for argument 2 of 'svusmmla_s32'} } */ -+ svusmmla_s32 (s32, u8, u8); /* { dg-error {incompatible type for argument 3 of 'svusmmla_s32'} } */ -+ svusmmla_s32 (s32, u8, s32); /* { dg-error {incompatible type for argument 3 of 'svusmmla_s32'} } */ -+ svusmmla_s32 (s32, u8, 0); /* { dg-error {incompatible type for argument 3 of 'svusmmla_s32'} } */ -+ svusmmla_s32 (s32, u8, s8); -+ return svusmmla_s32 (s32, u8, s8); /* { dg-error {incompatible types when returning type 'svint32_t' but 'svuint32_t' was expected} } */ -+} -+ -+void -+f2 (svbool_t pg, svint8_t s8, svuint8_t u8, svuint32_t u32, -+ svint32_t s32, svfloat32_t f32) -+{ -+ svusmmla (s32, u8); /* { dg-error {too few arguments to function 'svusmmla'} } */ -+ svusmmla (s32, u8, s8, u8); /* { dg-error {too many arguments to function 'svusmmla'} } */ -+ svusmmla (0, u8, s8); /* { dg-error {passing 'int' to argument 1 of 'svusmmla', which expects an SVE vector type} } */ -+ svusmmla (pg, u8, s8); /* { dg-error {'svusmmla' has no form that takes 'svbool_t' arguments} } */ -+ svusmmla (u8, u8, s8); /* { dg-error {'svusmmla' has no form that takes 'svuint8_t' arguments} } */ -+ svusmmla (f32, u8, s8); /* { dg-error {'svusmmla' has no form that takes 'svfloat32_t' arguments} } */ -+ svusmmla (s32, u8, s8); -+ svusmmla (s32, 0, s8); /* { dg-error {passing 'int' to argument 2 of 'svusmmla', which expects an SVE vector type} } */ -+ svusmmla (s32, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svusmmla', which expects a vector of signed integers} } */ -+ svusmmla (s32, s8, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svusmmla', which expects a vector of unsigned integers} } */ -+ svusmmla (s32, u8, 0); /* { dg-error {passing 'int' to argument 3 of 'svusmmla', which expects an SVE vector type} } */ -+ svusmmla (s32, u8, s8); -+ svusmmla (s32, u32, u32); /* { dg-error {passing 'svuint32_t' instead of the expected 'svuint8_t' to argument 2 of 'svusmmla', after passing 'svint32_t' to argument 1} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uintq_intq_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uintq_intq_lane_1.c -new file mode 100644 -index 000000000..b40cfe9e8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uintq_intq_lane_1.c -@@ -0,0 +1,32 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-march=armv8.6-a+sve+i8mm" } */ -+ -+#include -+ -+void -+f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16, -+ svint32_t s32, svuint32_t u32, svint64_t s64, svuint64_t u64, -+ svfloat32_t f32, int i) -+{ -+ svusdot_lane (s32, u8, s8); /* { dg-error {too few arguments to function 'svusdot_lane'} } */ -+ svusdot_lane (s32, u8, s8, 0, 0); /* { dg-error {too many arguments to function 'svusdot_lane'} } */ -+ svusdot_lane (0, u8, s8, 0); /* { dg-error {passing 'int' to argument 1 of 'svusdot_lane', which expects an SVE vector type} } */ -+ svusdot_lane (pg, u8, s8, 0); /* { dg-error {'svusdot_lane' has no form that takes 'svbool_t' arguments} } */ -+ svusdot_lane (u8, u8, s8, 0); /* { dg-error {'svusdot_lane' has no form that takes 'svuint8_t' arguments} } */ -+ svusdot_lane (f32, u8, s8, 0); /* { dg-error {'svusdot_lane' has no form that takes 'svfloat32_t' arguments} } */ -+ svusdot_lane (u32, u8, s8, 0); /* { dg-error {'svusdot_lane' has no form that takes 'svuint32_t' arguments} } */ -+ svusdot_lane (s32, u8, s8, 0); -+ svusdot_lane (s32, 0, s8, 0); /* { dg-error {passing 'int' to argument 2 of 'svusdot_lane', which expects an SVE vector type} } */ -+ svusdot_lane (s32, u8, 0, 0); /* { dg-error {passing 'int' to argument 3 of 'svusdot_lane', which expects an SVE vector type} } */ -+ -+ svusdot_lane (s32, u8, s8, 0); -+ svusdot_lane (s32, s8, s8, 0); /* { dg-error {passing 'svint8_t' to argument 2 of 'svusdot_lane', which expects a vector of unsigned integers} } */ -+ svusdot_lane (s32, u8, u8, 0); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svusdot_lane', which expects a vector of signed integers} } */ -+ svusdot_lane (s32, s32, s32, 0); /* { dg-error {passing 'svint32_t' to argument 2 of 'svusdot_lane', which expects a vector of unsigned integers} } */ -+ -+ svusdot_lane (s32, u8, s8, i); /* { dg-error {argument 4 of 'svusdot_lane' must be an integer constant expression} } */ -+ svusdot_lane (s32, u8, s8, 0); -+ svusdot_lane (s32, u8, s8, 3); -+ svusdot_lane (s32, u8, s8, 4); /* { dg-error {passing 4 to argument 4 of 'svusdot_lane', which expects a value in the range \[0, 3\]} } */ -+ svusdot_lane (s32, u8, s8, -1); /* { dg-error {passing -1 to argument 4 of 'svusdot_lane', which expects a value in the range \[0, 3\]} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uintq_intq_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uintq_intq_opt_n_1.c -new file mode 100644 -index 000000000..896b80390 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uintq_intq_opt_n_1.c -@@ -0,0 +1,37 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-march=armv8.6-a+sve+i8mm" } */ -+ -+#include -+ -+svuint32_t -+f1 (svint32_t s32, svuint8_t u8, svint8_t s8, svuint32_t u32) -+{ -+ svusdot_s32 (s32); /* { dg-error {too few arguments to function 'svusdot_s32'} } */ -+ svusdot_s32 (s32, u8, s8, u32); /* { dg-error {too many arguments to function 'svusdot_s32'} } */ -+ svusdot_s32 (s32, u32, s8); /* { dg-error {incompatible type for argument 2 of 'svusdot_s32'} } */ -+ svusdot_s32 (s32, s8, s8); /* { dg-error {incompatible type for argument 2 of 'svusdot_s32'} } */ -+ svusdot_s32 (s32, u8, u8); /* { dg-error {incompatible type for argument 3 of 'svusdot_s32'} } */ -+ svusdot_s32 (s32, u8, s32); /* { dg-error {incompatible type for argument 3 of 'svusdot_s32'} } */ -+ svusdot_s32 (s32, u8, 0); /* { dg-error {incompatible type for argument 3 of 'svusdot_s32'} } */ -+ svusdot_s32 (s32, u8, s8); -+ return svusdot_s32 (s32, u8, s8); /* { dg-error {incompatible types when returning type 'svint32_t' but 'svuint32_t' was expected} } */ -+} -+ -+void -+f2 (svbool_t pg, svint8_t s8, svuint8_t u8, svuint32_t u32, -+ svint32_t s32, svfloat32_t f32) -+{ -+ svusdot (s32, u8); /* { dg-error {too few arguments to function 'svusdot'} } */ -+ svusdot (s32, u8, s8, u8); /* { dg-error {too many arguments to function 'svusdot'} } */ -+ svusdot (0, u8, s8); /* { dg-error {passing 'int' to argument 1 of 'svusdot', which expects an SVE vector type} } */ -+ svusdot (pg, u8, s8); /* { dg-error {'svusdot' has no form that takes 'svbool_t' arguments} } */ -+ svusdot (u8, u8, s8); /* { dg-error {'svusdot' has no form that takes 'svuint8_t' arguments} } */ -+ svusdot (f32, u8, s8); /* { dg-error {'svusdot' has no form that takes 'svfloat32_t' arguments} } */ -+ svusdot (s32, u8, s8); -+ svusdot (s32, 0, s8); /* { dg-error {passing 'int' to argument 2 of 'svusdot', which expects an SVE vector type} } */ -+ svusdot (s32, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svusdot', which expects a vector of signed integers} } */ -+ svusdot (s32, s8, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svusdot', which expects a vector of unsigned integers} } */ -+ svusdot (s32, u8, 0); -+ svusdot (s32, u8, s8); -+ svusdot (s32, u32, u32); /* { dg-error {passing 'svuint32_t' instead of the expected 'svuint8_t' to argument 2 of 'svusdot', after passing 'svint32_t' to argument 1} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/tmad_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/tmad_1.c -new file mode 100644 -index 000000000..8b98fc24d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/tmad_1.c -@@ -0,0 +1,22 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+void -+f1 (svbool_t pg, svfloat32_t f32, svfloat64_t f64, svint32_t s32, int i) -+{ -+ svtmad (f32, f32); /* { dg-error {too few arguments to function 'svtmad'} } */ -+ svtmad (f32, f32, 0, 0); /* { dg-error {too many arguments to function 'svtmad'} } */ -+ svtmad (pg, pg, 0); /* { dg-error {'svtmad' has no form that takes 'svbool_t' arguments} } */ -+ svtmad (s32, s32, 0); /* { dg-error {'svtmad' has no form that takes 'svint32_t' arguments} } */ -+ svtmad (1, f32, 0); /* { dg-error {passing 'int' to argument 1 of 'svtmad', which expects an SVE vector type} } */ -+ svtmad (f32, 1, 0); /* { dg-error {passing 'int' to argument 2 of 'svtmad', which expects an SVE vector type} } */ -+ svtmad (f32, f64, 0); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svtmad', but previous arguments had type 'svfloat32_t'} } */ -+ svtmad (f32, f32, s32); /* { dg-error {argument 3 of 'svtmad' must be an integer constant expression} } */ -+ svtmad (f32, f32, i); /* { dg-error {argument 3 of 'svtmad' must be an integer constant expression} } */ -+ svtmad (f32, f32, -1); /* { dg-error {passing -1 to argument 3 of 'svtmad', which expects a value in the range \[0, 7\]} } */ -+ svtmad (f32, f32, 0); -+ svtmad (f32, f32, 1); -+ svtmad (f32, f32, 7); -+ svtmad (f32, f32, 8); /* { dg-error {passing 8 to argument 3 of 'svtmad', which expects a value in the range \[0, 7\]} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_1.c -new file mode 100644 -index 000000000..70b2d9dd1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_1.c -@@ -0,0 +1,5 @@ -+/* { dg-do compile } */ -+ -+int svbool_t; /* { dg-message "note: previous declaration of 'svbool_t' was here" } */ -+ -+#pragma GCC aarch64 "arm_sve.h" /* { dg-error {'svbool_t' redeclared} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_10.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_10.c -new file mode 100644 -index 000000000..8278c1cad ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_10.c -@@ -0,0 +1,5 @@ -+/* { dg-do compile } */ -+ -+typedef struct svint8x2_t svint8x2_t; /* { dg-message "note: previous declaration of 'svint8x2_t' was here" } */ -+ -+#pragma GCC aarch64 "arm_sve.h" /* { dg-error {conflicting types for 'svint8x2_t'} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_11.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_11.c -new file mode 100644 -index 000000000..2147df72c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_11.c -@@ -0,0 +1,12 @@ -+/* { dg-do compile } */ -+ -+/* This isn't explicitly allowed or disallowed, but mustn't ICE. */ -+struct svint8x2_t; -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+void -+f (svint8x2_t *a, struct svint8x2_t *b) -+{ -+ *a = *b; /* { dg-error {dereferencing pointer to incomplete type} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_12.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_12.c -new file mode 100644 -index 000000000..1a6ccbd05 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_12.c -@@ -0,0 +1,12 @@ -+/* { dg-do compile } */ -+ -+/* This isn't explicitly allowed or disallowed, but mustn't ICE. */ -+struct svint8x2_t { int x; }; -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+void -+f (svint8x2_t *a, struct svint8x2_t *b) -+{ -+ *a = *b; /* { dg-error {incompatible types} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_13.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_13.c -new file mode 100644 -index 000000000..62bab1f84 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_13.c -@@ -0,0 +1,5 @@ -+/* { dg-do compile } */ -+ -+#pragma GCC aarch64 "arm_sve.h" /* { dg-message "note: previous declaration of 'svint8x2_t' was here" } */ -+ -+int svint8x2_t; /* { dg-error {'svint8x2_t' redeclared} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_14.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_14.c -new file mode 100644 -index 000000000..0f00db1fb ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_14.c -@@ -0,0 +1,5 @@ -+/* { dg-do compile } */ -+ -+enum svpattern { FOO }; /* { dg-message "note: originally defined here" } */ -+ -+#pragma GCC aarch64 "arm_sve.h" /* { dg-error {redeclaration of 'enum svpattern'} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_15.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_15.c -new file mode 100644 -index 000000000..ea9721749 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_15.c -@@ -0,0 +1,8 @@ -+/* { dg-do compile } */ -+ -+#pragma GCC aarch64 "arm_sve.h" /* { dg-message "note: originally defined here" } */ -+ -+enum svpattern { FOO }; /* { dg-error {redeclaration of 'enum svpattern'} } */ -+enum foo { SV_ALL }; /* { dg-error {redeclaration of enumerator 'SV_ALL'} } */ -+typedef int SV_POW2; /* { dg-error {'SV_POW2' redeclared as different kind of symbol} } */ -+int SV_VL3; /* { dg-error {'SV_VL3' redeclared as different kind of symbol} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_16.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_16.c -new file mode 100644 -index 000000000..a59dabc6c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_16.c -@@ -0,0 +1,5 @@ -+/* { dg-do compile } */ -+ -+struct svpattern { int x; }; -+ -+#pragma GCC aarch64 "arm_sve.h" /* { dg-error {'svpattern' defined as wrong kind of tag} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_17.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_17.c -new file mode 100644 -index 000000000..027fdb2b9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_17.c -@@ -0,0 +1,5 @@ -+/* { dg-do compile } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+struct svpattern { int x; }; /* { dg-error {'svpattern' defined as wrong kind of tag} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_18.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_18.c -new file mode 100644 -index 000000000..b6706150b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_18.c -@@ -0,0 +1,5 @@ -+/* { dg-do compile } */ -+ -+int svpattern; /* OK in C. */ -+ -+#pragma GCC aarch64 "arm_sve.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_19.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_19.c -new file mode 100644 -index 000000000..c6379f762 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_19.c -@@ -0,0 +1,5 @@ -+/* { dg-do compile } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+int svpattern; /* OK in C. */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_2.c -new file mode 100644 -index 000000000..ffd86ae7b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_2.c -@@ -0,0 +1,5 @@ -+/* { dg-do compile } */ -+ -+int svint8_t; /* { dg-message "note: previous declaration of 'svint8_t' was here" } */ -+ -+#pragma GCC aarch64 "arm_sve.h" /* { dg-error {'svint8_t' redeclared} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_20.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_20.c -new file mode 100644 -index 000000000..3d770a956 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_20.c -@@ -0,0 +1,9 @@ -+/* { dg-do compile } */ -+ -+enum foo { SV_VL4 }; -+typedef int SV_POW2; -+int SV_ALL; -+ -+#pragma GCC aarch64 "arm_sve.h" /* { dg-error {redeclaration of enumerator 'SV_VL4'} } */ -+/* { dg-error {'SV_POW2' redeclared as different kind of symbol} "" { target *-*-* } .-1 } */ -+/* { dg-error {'SV_ALL' redeclared as different kind of symbol} "" { target *-*-* } .-2 } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_3.c -new file mode 100644 -index 000000000..f42dd9680 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_3.c -@@ -0,0 +1,5 @@ -+/* { dg-do compile } */ -+ -+int svuint16_t; /* { dg-message "note: previous declaration of 'svuint16_t' was here" } */ -+ -+#pragma GCC aarch64 "arm_sve.h" /* { dg-error {'svuint16_t' redeclared} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_4.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_4.c -new file mode 100644 -index 000000000..91c95a1f5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_4.c -@@ -0,0 +1,5 @@ -+/* { dg-do compile } */ -+ -+int svfloat32_t; /* { dg-message "note: previous declaration of 'svfloat32_t' was here" } */ -+ -+#pragma GCC aarch64 "arm_sve.h" /* { dg-error {'svfloat32_t' redeclared} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_5.c -new file mode 100644 -index 000000000..3cb6b8a1c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_5.c -@@ -0,0 +1,5 @@ -+/* { dg-do compile } */ -+ -+typedef int svbool_t; /* { dg-message "note: previous declaration of 'svbool_t' was here" } */ -+ -+#pragma GCC aarch64 "arm_sve.h" /* { dg-error {conflicting types for 'svbool_t'} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_6.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_6.c -new file mode 100644 -index 000000000..c051897b6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_6.c -@@ -0,0 +1,6 @@ -+/* { dg-do compile } */ -+ -+typedef __SVBool_t svbool_t; /* { dg-message "note: previous declaration of 'svbool_t' was here" } */ -+ -+#pragma GCC aarch64 "arm_sve.h" /* { dg-error {redefinition of typedef 'svbool_t'} } */ -+ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_7.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_7.c -new file mode 100644 -index 000000000..fd4063154 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_7.c -@@ -0,0 +1,7 @@ -+/* { dg-do compile } */ -+/* { dg-options "-std=gnu90" } */ -+ -+typedef __SVBool_t svbool_t; -+ -+/* Without -pedantic-errors this should compile. */ -+#pragma GCC aarch64 "arm_sve.h" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_8.c -new file mode 100644 -index 000000000..41614a304 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_8.c -@@ -0,0 +1,5 @@ -+/* { dg-do compile } */ -+ -+int svint8x2_t; /* { dg-message "note: previous declaration of 'svint8x2_t' was here" } */ -+ -+#pragma GCC aarch64 "arm_sve.h" /* { dg-error {'svint8x2_t' redeclared} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_9.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_9.c -new file mode 100644 -index 000000000..83b6855df ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/type_redef_9.c -@@ -0,0 +1,5 @@ -+/* { dg-do compile } */ -+ -+typedef int svint8x2_t; /* { dg-message "note: previous declaration of 'svint8x2_t' was here" } */ -+ -+#pragma GCC aarch64 "arm_sve.h" /* { dg-error {conflicting types for 'svint8x2_t'} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_1.c -new file mode 100644 -index 000000000..eef85a01d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_1.c -@@ -0,0 +1,21 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+void -+f1 (svbool_t pg, svint32_t s32, svuint32_t u32, svfloat32_t f32) -+{ -+ svabs_m (s32, pg); /* { dg-error {too few arguments to function 'svabs_m'} } */ -+ svabs_m (s32, pg, s32, s32); /* { dg-error {too many arguments to function 'svabs_m'} } */ -+ svabs_m (0, pg, s32); /* { dg-error {passing 'int' to argument 1 of 'svabs_m', which expects an SVE vector type} } */ -+ svabs_m (s32, s32, s32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svabs_m', which expects 'svbool_t'} } */ -+ svabs_m (s32, 0, s32); /* { dg-error {passing 'int' to argument 2 of 'svabs_m', which expects 'svbool_t'} } */ -+ svabs_m (s32, pg, s32); -+ svabs_m (u32, pg, u32); /* { dg-error {'svabs_m' has no form that takes 'svuint32_t' arguments} } */ -+ svabs_m (f32, pg, f32); -+ svabs_m (s32, pg, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svabs_m', but previous arguments had type 'svint32_t'} } */ -+ svabs_m (s32, pg, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svabs_m', but previous arguments had type 'svint32_t'} } */ -+ svabs_m (s32, pg, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svabs_m', but previous arguments had type 'svint32_t'} } */ -+ svabs_m (pg, pg, s32); /* { dg-error {passing 'svint32_t' to argument 3 of 'svabs_m', but previous arguments had type 'svbool_t'} } */ -+ svabs_m (pg, pg, pg); /* { dg-error {'svabs_m' has no form that takes 'svbool_t' arguments} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_2.c -new file mode 100644 -index 000000000..e94673a66 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_2.c -@@ -0,0 +1,15 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+void -+f1 (svbool_t pg, svint8_t s8, svuint8_t u8) -+{ -+ svabs_x (pg); /* { dg-error {too few arguments to function 'svabs_x'} } */ -+ svabs_x (pg, s8, s8); /* { dg-error {too many arguments to function 'svabs_x'} } */ -+ svabs_x (s8, s8); /* { dg-error {passing 'svint8_t' to argument 1 of 'svabs_x', which expects 'svbool_t'} } */ -+ svabs_x (pg, pg); /* { dg-error {'svabs_x' has no form that takes 'svbool_t' arguments} } */ -+ svabs_x (pg, 1); /* { dg-error {passing 'int' to argument 2 of 'svabs_x', which expects an SVE vector type} } */ -+ svabs_x (pg, s8); -+ svabs_x (pg, u8); /* { dg-error {'svabs_x' has no form that takes 'svuint8_t' arguments} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convert_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convert_1.c -new file mode 100644 -index 000000000..caa4e623d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convert_1.c -@@ -0,0 +1,73 @@ -+#include -+ -+void -+test (svbool_t pg, svint8_t s8, svuint8_t u8, -+ svint16_t s16, svuint16_t u16, svint32_t s32, svuint32_t u32, -+ svint64_t s64, svuint64_t u64, svfloat16_t f16, svfloat32_t f32, -+ svfloat64_t f64) -+{ -+ svcvt_f64_x (pg); /* { dg-error {too few arguments to function 'svcvt_f64_x'} } */ -+ svcvt_f64_x (pg, s32, 0); /* { dg-error {too many arguments to function 'svcvt_f64_x'} } */ -+ svcvt_f64_x (s32, s32); /* { dg-error {passing 'svint32_t' to argument 1 of 'svcvt_f64_x', which expects 'svbool_t'} } */ -+ svcvt_f64_x (pg, 0); /* { dg-error {passing 'int' to argument 2 of 'svcvt_f64_x', which expects an SVE vector type} } */ -+ -+ svcvt_f64_x (pg, s8); /* { dg-error {'svcvt_f64_x' has no form that takes 'svint8_t' arguments} } */ -+ svcvt_f64_x (pg, s16); /* { dg-error {'svcvt_f64_x' has no form that takes 'svint16_t' arguments} } */ -+ svcvt_f64_x (pg, s32); -+ svcvt_f64_x (pg, s64); -+ svcvt_f64_x (pg, u8); /* { dg-error {'svcvt_f64_x' has no form that takes 'svuint8_t' arguments} } */ -+ svcvt_f64_x (pg, u16); /* { dg-error {'svcvt_f64_x' has no form that takes 'svuint16_t' arguments} } */ -+ svcvt_f64_x (pg, u32); -+ svcvt_f64_x (pg, u64); -+ svcvt_f64_x (pg, f16); -+ svcvt_f64_x (pg, f32); -+ svcvt_f64_x (pg, f64); /* { dg-error {'svcvt_f64_x' has no form that takes 'svfloat64_t' arguments} } */ -+ -+ svcvt_f32_x (pg, s8); /* { dg-error {'svcvt_f32_x' has no form that takes 'svint8_t' arguments} } */ -+ svcvt_f32_x (pg, s16); /* { dg-error {'svcvt_f32_x' has no form that takes 'svint16_t' arguments} } */ -+ svcvt_f32_x (pg, s32); -+ svcvt_f32_x (pg, s64); -+ svcvt_f32_x (pg, u8); /* { dg-error {'svcvt_f32_x' has no form that takes 'svuint8_t' arguments} } */ -+ svcvt_f32_x (pg, u16); /* { dg-error {'svcvt_f32_x' has no form that takes 'svuint16_t' arguments} } */ -+ svcvt_f32_x (pg, u32); -+ svcvt_f32_x (pg, u64); -+ svcvt_f32_x (pg, f16); -+ svcvt_f32_x (pg, f32); /* { dg-error {'svcvt_f32_x' has no form that takes 'svfloat32_t' arguments} } */ -+ svcvt_f32_x (pg, f64); -+ -+ svcvt_f16_x (pg, s8); /* { dg-error {'svcvt_f16_x' has no form that takes 'svint8_t' arguments} } */ -+ svcvt_f16_x (pg, s16); -+ svcvt_f16_x (pg, s32); -+ svcvt_f16_x (pg, s64); -+ svcvt_f16_x (pg, u8); /* { dg-error {'svcvt_f16_x' has no form that takes 'svuint8_t' arguments} } */ -+ svcvt_f16_x (pg, u16); -+ svcvt_f16_x (pg, u32); -+ svcvt_f16_x (pg, u64); -+ svcvt_f16_x (pg, f16); /* { dg-error {'svcvt_f16_x' has no form that takes 'svfloat16_t' arguments} } */ -+ svcvt_f16_x (pg, f32); -+ svcvt_f16_x (pg, f64); -+ -+ svcvt_s64_x (pg, f16); -+ svcvt_s64_x (pg, f32); -+ svcvt_s64_x (pg, f64); -+ -+ svcvt_s32_x (pg, f16); -+ svcvt_s32_x (pg, f32); -+ svcvt_s32_x (pg, f64); -+ -+ svcvt_s16_x (pg, f16); -+ svcvt_s16_x (pg, f32); /* { dg-error {'svcvt_s16_x' has no form that takes 'svfloat32_t' arguments} } */ -+ svcvt_s16_x (pg, f64); /* { dg-error {'svcvt_s16_x' has no form that takes 'svfloat64_t' arguments} } */ -+ -+ svcvt_u64_x (pg, f16); -+ svcvt_u64_x (pg, f32); -+ svcvt_u64_x (pg, f64); -+ -+ svcvt_u32_x (pg, f16); -+ svcvt_u32_x (pg, f32); -+ svcvt_u32_x (pg, f64); -+ -+ svcvt_u16_x (pg, f16); -+ svcvt_u16_x (pg, f32); /* { dg-error {'svcvt_u16_x' has no form that takes 'svfloat32_t' arguments} } */ -+ svcvt_u16_x (pg, f64); /* { dg-error {'svcvt_u16_x' has no form that takes 'svfloat64_t' arguments} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convert_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convert_2.c -new file mode 100644 -index 000000000..ddbd93b69 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_convert_2.c -@@ -0,0 +1,76 @@ -+#include -+ -+void -+test (svbool_t pg, svint8_t s8, svuint8_t u8, -+ svint16_t s16, svuint16_t u16, svint32_t s32, svuint32_t u32, -+ svint64_t s64, svuint64_t u64, svfloat16_t f16, svfloat32_t f32, -+ svfloat64_t f64) -+{ -+ svcvt_f64_m (f64, pg); /* { dg-error {too few arguments to function 'svcvt_f64_m'} } */ -+ svcvt_f64_m (f64, pg, s32, 0); /* { dg-error {too many arguments to function 'svcvt_f64_m'} } */ -+ svcvt_f64_m (f32, pg, s32); /* { dg-error {passing 'svfloat32_t' to argument 1 of 'svcvt_f64_m', which expects 'svfloat64_t'} } */ -+ svcvt_f64_m (0, pg, s32); /* { dg-error {passing 'int' to argument 1 of 'svcvt_f64_m', which expects 'svfloat64_t'} } */ -+ svcvt_f64_m (pg, pg, s32); /* { dg-error {passing 'svbool_t' to argument 1 of 'svcvt_f64_m', which expects 'svfloat64_t'} } */ -+ svcvt_f64_m (f64, s32, s32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svcvt_f64_m', which expects 'svbool_t'} } */ -+ svcvt_f64_m (f64, pg, 0); /* { dg-error {passing 'int' to argument 3 of 'svcvt_f64_m', which expects an SVE vector type} } */ -+ -+ svcvt_f64_m (f64, pg, s8); /* { dg-error {'svcvt_f64_m' has no form that takes 'svint8_t' arguments} } */ -+ svcvt_f64_m (f64, pg, s16); /* { dg-error {'svcvt_f64_m' has no form that takes 'svint16_t' arguments} } */ -+ svcvt_f64_m (f64, pg, s32); -+ svcvt_f64_m (f64, pg, s64); -+ svcvt_f64_m (f64, pg, u8); /* { dg-error {'svcvt_f64_m' has no form that takes 'svuint8_t' arguments} } */ -+ svcvt_f64_m (f64, pg, u16); /* { dg-error {'svcvt_f64_m' has no form that takes 'svuint16_t' arguments} } */ -+ svcvt_f64_m (f64, pg, u32); -+ svcvt_f64_m (f64, pg, u64); -+ svcvt_f64_m (f64, pg, f16); -+ svcvt_f64_m (f64, pg, f32); -+ svcvt_f64_m (f64, pg, f64); /* { dg-error {'svcvt_f64_m' has no form that takes 'svfloat64_t' arguments} } */ -+ -+ svcvt_f32_m (f32, pg, s8); /* { dg-error {'svcvt_f32_m' has no form that takes 'svint8_t' arguments} } */ -+ svcvt_f32_m (f32, pg, s16); /* { dg-error {'svcvt_f32_m' has no form that takes 'svint16_t' arguments} } */ -+ svcvt_f32_m (f32, pg, s32); -+ svcvt_f32_m (f32, pg, s64); -+ svcvt_f32_m (f32, pg, u8); /* { dg-error {'svcvt_f32_m' has no form that takes 'svuint8_t' arguments} } */ -+ svcvt_f32_m (f32, pg, u16); /* { dg-error {'svcvt_f32_m' has no form that takes 'svuint16_t' arguments} } */ -+ svcvt_f32_m (f32, pg, u32); -+ svcvt_f32_m (f32, pg, u64); -+ svcvt_f32_m (f32, pg, f16); -+ svcvt_f32_m (f32, pg, f32); /* { dg-error {'svcvt_f32_m' has no form that takes 'svfloat32_t' arguments} } */ -+ svcvt_f32_m (f32, pg, f64); -+ -+ svcvt_f16_m (f16, pg, s8); /* { dg-error {'svcvt_f16_m' has no form that takes 'svint8_t' arguments} } */ -+ svcvt_f16_m (f16, pg, s16); -+ svcvt_f16_m (f16, pg, s32); -+ svcvt_f16_m (f16, pg, s64); -+ svcvt_f16_m (f16, pg, u8); /* { dg-error {'svcvt_f16_m' has no form that takes 'svuint8_t' arguments} } */ -+ svcvt_f16_m (f16, pg, u16); -+ svcvt_f16_m (f16, pg, u32); -+ svcvt_f16_m (f16, pg, u64); -+ svcvt_f16_m (f16, pg, f16); /* { dg-error {'svcvt_f16_m' has no form that takes 'svfloat16_t' arguments} } */ -+ svcvt_f16_m (f16, pg, f32); -+ svcvt_f16_m (f16, pg, f64); -+ -+ svcvt_s64_m (s64, pg, f16); -+ svcvt_s64_m (s64, pg, f32); -+ svcvt_s64_m (s64, pg, f64); -+ -+ svcvt_s32_m (s32, pg, f16); -+ svcvt_s32_m (s32, pg, f32); -+ svcvt_s32_m (s32, pg, f64); -+ -+ svcvt_s16_m (s16, pg, f16); -+ svcvt_s16_m (s16, pg, f32); /* { dg-error {'svcvt_s16_m' has no form that takes 'svfloat32_t' arguments} } */ -+ svcvt_s16_m (s16, pg, f64); /* { dg-error {'svcvt_s16_m' has no form that takes 'svfloat64_t' arguments} } */ -+ -+ svcvt_u64_m (u64, pg, f16); -+ svcvt_u64_m (u64, pg, f32); -+ svcvt_u64_m (u64, pg, f64); -+ -+ svcvt_u32_m (u32, pg, f16); -+ svcvt_u32_m (u32, pg, f32); -+ svcvt_u32_m (u32, pg, f64); -+ -+ svcvt_u16_m (u16, pg, f16); -+ svcvt_u16_m (u16, pg, f32); /* { dg-error {'svcvt_u16_m' has no form that takes 'svfloat32_t' arguments} } */ -+ svcvt_u16_m (u16, pg, f64); /* { dg-error {'svcvt_u16_m' has no form that takes 'svfloat64_t' arguments} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_uint_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_uint_1.c -new file mode 100644 -index 000000000..888b52513 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_uint_1.c -@@ -0,0 +1,24 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+void -+f1 (svbool_t pg, svint32_t s32, svuint32_t u32, svfloat32_t f32, -+ svint64_t s64, svuint64_t u64) -+{ -+ svclz_m (u32, pg); /* { dg-error {too few arguments to function 'svclz_m'} } */ -+ svclz_m (u32, pg, s32, s32); /* { dg-error {too many arguments to function 'svclz_m'} } */ -+ svclz_m (0, pg, f32); /* { dg-error {passing 'int' to argument 1 of 'svclz_m', which expects an SVE vector type} } */ -+ svclz_m (u32, u32, f32); /* { dg-error {passing 'svuint32_t' to argument 2 of 'svclz_m', which expects 'svbool_t'} } */ -+ svclz_m (u32, 0, f32); /* { dg-error {passing 'int' to argument 2 of 'svclz_m', which expects 'svbool_t'} } */ -+ svclz_m (u32, pg, s32); -+ svclz_m (u32, pg, u32); -+ svclz_m (u32, pg, f32); /* { dg-error {'svclz_m' has no form that takes 'svfloat32_t' arguments} } */ -+ svclz_m (u32, pg, pg); /* { dg-error {'svclz_m' has no form that takes 'svbool_t' arguments} } */ -+ -+ svclz_m (pg, pg, s32); /* { dg-error {passing 'svbool_t' to argument 1 of 'svclz_m', which expects a vector of unsigned integers} } */ -+ svclz_m (s32, pg, s32); /* { dg-error {passing 'svint32_t' to argument 1 of 'svclz_m', which expects a vector of unsigned integers} } */ -+ svclz_m (f32, pg, s32); /* { dg-error {passing 'svfloat32_t' to argument 1 of 'svclz_m', which expects a vector of unsigned integers} } */ -+ svclz_m (s64, pg, s32); /* { dg-error {passing 'svint64_t' to argument 1 of 'svclz_m', which expects a vector of unsigned integers} } */ -+ svclz_m (u64, pg, s32); /* { dg-error {arguments 1 and 3 of 'svclz_m' must have the same element size, but the values passed here have type 'svuint64_t' and 'svint32_t' respectively} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_uint_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_uint_2.c -new file mode 100644 -index 000000000..233e847e9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_uint_2.c -@@ -0,0 +1,25 @@ -+/* { dg-do compile } */ -+/* { dg-options "-flax-vector-conversions" } */ -+ -+#include -+ -+void -+f1 (svbool_t pg, svint32_t s32, svuint32_t u32, svfloat32_t f32, -+ svint64_t s64, svuint64_t u64) -+{ -+ svclz_m (u32, pg); /* { dg-error {too few arguments to function 'svclz_m'} } */ -+ svclz_m (u32, pg, s32, s32); /* { dg-error {too many arguments to function 'svclz_m'} } */ -+ svclz_m (0, pg, f32); /* { dg-error {passing 'int' to argument 1 of 'svclz_m', which expects an SVE vector type} } */ -+ svclz_m (u32, u32, f32); /* { dg-error {passing 'svuint32_t' to argument 2 of 'svclz_m', which expects 'svbool_t'} } */ -+ svclz_m (u32, 0, f32); /* { dg-error {passing 'int' to argument 2 of 'svclz_m', which expects 'svbool_t'} } */ -+ svclz_m (u32, pg, s32); -+ svclz_m (u32, pg, u32); -+ svclz_m (u32, pg, f32); /* { dg-error {'svclz_m' has no form that takes 'svfloat32_t' arguments} } */ -+ svclz_m (u32, pg, pg); /* { dg-error {'svclz_m' has no form that takes 'svbool_t' arguments} } */ -+ -+ svclz_m (pg, pg, s32); /* { dg-error {passing 'svbool_t' to argument 1 of 'svclz_m', which expects a vector of unsigned integers} } */ -+ svclz_m (s32, pg, s32); /* { dg-error {passing 'svint32_t' to argument 1 of 'svclz_m', which expects a vector of unsigned integers} } */ -+ svclz_m (f32, pg, s32); /* { dg-error {passing 'svfloat32_t' to argument 1 of 'svclz_m', which expects a vector of unsigned integers} } */ -+ svclz_m (s64, pg, s32); /* { dg-error {passing 'svint64_t' to argument 1 of 'svclz_m', which expects a vector of unsigned integers} } */ -+ svclz_m (u64, pg, s32); /* { dg-error {arguments 1 and 3 of 'svclz_m' must have the same element size, but the values passed here have type 'svuint64_t' and 'svint32_t' respectively} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_uint_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_uint_3.c -new file mode 100644 -index 000000000..da57b07ea ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_to_uint_3.c -@@ -0,0 +1,14 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+void -+f1 (svbool_t pg, svuint8_t u8) -+{ -+ svcnt_x (pg); /* { dg-error {too few arguments to function 'svcnt_x'} } */ -+ svcnt_x (pg, u8, u8); /* { dg-error {too many arguments to function 'svcnt_x'} } */ -+ svcnt_x (u8, u8); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svcnt_x', which expects 'svbool_t'} } */ -+ svcnt_x (pg, pg); /* { dg-error {'svcnt_x' has no form that takes 'svbool_t' arguments} } */ -+ svcnt_x (pg, 1); /* { dg-error {passing 'int' to argument 2 of 'svcnt_x', which expects an SVE vector type} } */ -+ svcnt_x (pg, u8); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_uint_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_uint_1.c -new file mode 100644 -index 000000000..9c8acdf2d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_uint_1.c -@@ -0,0 +1,19 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+void -+f1 (svbool_t pg, svint8_t s8, svuint8_t u8, -+ svint16_t s16, svuint16_t u16, svfloat16_t f16) -+{ -+ svexpa (); /* { dg-error {too few arguments to function 'svexpa'} } */ -+ svexpa (u16, u16); /* { dg-error {too many arguments to function 'svexpa'} } */ -+ svexpa (1); /* { dg-error {passing 'int' to argument 1 of 'svexpa', which expects an SVE vector type} } */ -+ svexpa (pg); /* { dg-error {passing 'svbool_t' to argument 1 of 'svexpa', which expects a vector of unsigned integers} } */ -+ svexpa (s8); /* { dg-error {passing 'svint8_t' to argument 1 of 'svexpa', which expects a vector of unsigned integers} } */ -+ svexpa (s16); /* { dg-error {passing 'svint16_t' to argument 1 of 'svexpa', which expects a vector of unsigned integers} } */ -+ svexpa (f16); /* { dg-error {passing 'svfloat16_t' to argument 1 of 'svexpa', which expects a vector of unsigned integers} } */ -+ -+ svexpa (u8); /* { dg-error {'svexpa' has no form that takes 'svuint8_t' arguments} } */ -+ svexpa (u16); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_widen_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_widen_1.c -new file mode 100644 -index 000000000..95a97a72e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_widen_1.c -@@ -0,0 +1,25 @@ -+#include -+ -+void -+test (svbool_t pg, svint8_t s8, svuint8_t u8, -+ svint16_t s16, svuint16_t u16, svfloat16_t f16, -+ svint32_t s32, svuint32_t u32, svfloat32_t f32, -+ svint64_t s64, svuint64_t u64, svfloat64_t f64, float f, int i) -+{ -+ svunpklo (); /* { dg-error {too few arguments to function 'svunpklo'} } */ -+ svunpklo (pg, s8); /* { dg-error {too many arguments to function 'svunpklo'} } */ -+ svunpklo (i); /* { dg-error {passing 'int' to argument 1 of 'svunpklo', which expects an SVE vector type} } */ -+ svunpklo (f); /* { dg-error {passing 'float' to argument 1 of 'svunpklo', which expects an SVE vector type} } */ -+ svunpklo (pg); -+ svunpklo (s8); -+ svunpklo (s16); -+ svunpklo (s32); -+ svunpklo (s64); /* { dg-error {'svunpklo' has no form that takes 'svint64_t' arguments} } */ -+ svunpklo (u8); -+ svunpklo (u16); -+ svunpklo (u32); -+ svunpklo (u64); /* { dg-error {'svunpklo' has no form that takes 'svuint64_t' arguments} } */ -+ svunpklo (f16); /* { dg-error {'svunpklo' has no form that takes 'svfloat16_t' arguments} } */ -+ svunpklo (f32); /* { dg-error {'svunpklo' has no form that takes 'svfloat32_t' arguments} } */ -+ svunpklo (f64); /* { dg-error {'svunpklo' has no form that takes 'svfloat64_t' arguments} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/undeclared_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/undeclared_1.c -new file mode 100644 -index 000000000..37524c2ed ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/undeclared_1.c -@@ -0,0 +1,17 @@ -+#include -+ -+void -+f (svint8_t s8, svuint16_t u16, svfloat32_t f32, -+ svint16x2_t s16x2, svuint32x3_t u32x3, svfloat64x4_t f64x4, -+ svbool_t pg) -+{ -+ s8 = no_ret_s8 (); /* { dg-error {incompatible types when assigning to type 'svint8_t' from type 'int'} } */ -+ u16 = no_ret_u16 (); /* { dg-error {incompatible types when assigning to type 'svuint16_t' from type 'int'} } */ -+ f32 = no_ret_f32 (); /* { dg-error {incompatible types when assigning to type 'svfloat32_t' from type 'int'} } */ -+ s16x2 = no_ret_s16x2 (); /* { dg-error {incompatible types when assigning to type 'svint16x2_t' from type 'int'} } */ -+ u32x3 = no_ret_u32x3 (); /* { dg-error {incompatible types when assigning to type 'svuint32x3_t' from type 'int'} } */ -+ f64x4 = no_ret_f64x4 (); /* { dg-error {incompatible types when assigning to type 'svfloat64x4_t' from type 'int'} } */ -+ pg = no_ret_pg (); /* { dg-error {incompatible types when assigning to type 'svbool_t' from type 'int'} } */ -+ -+ no_pass_args (pg, u16, f32, s16x2, u32x3, f64x4, pg); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/undeclared_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/undeclared_2.c -new file mode 100644 -index 000000000..7e869bda8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/undeclared_2.c -@@ -0,0 +1,15 @@ -+#include -+ -+void -+f (svint8_t s8, svuint16_t u16, svfloat32_t f32, -+ svint16x2_t s16x2, svuint32x3_t u32x3, svfloat64x4_t f64x4, -+ svbool_t pg) -+{ -+ s8 = svlsr_x (pg, s8, 1); /* { dg-error {'svlsr_x' has no form that takes 'svint8_t' arguments} } */ -+ u16 = svneg_x (pg, u16); /* { dg-error {'svneg_x' has no form that takes 'svuint16_t' arguments} } */ -+ f32 = svclz_x (pg, f32); /* { dg-error {'svclz_x' has no form that takes 'svfloat32_t' arguments} } */ -+ s16x2 = svcreate2 (s8); /* { dg-error {too few arguments to function 'svcreate2'} } */ -+ u32x3 = svcreate3 (u16, u16, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svcreate3', but previous arguments had type 'svuint16_t'} } */ -+ f64x4 = svcreate4 (f32, f32, f32, f32, f32); /* { dg-error {too many arguments to function 'svcreate4'} } */ -+ pg = svadd_x (pg, pg, pg); /* { dg-error {'svadd_x' has no form that takes 'svbool_t' arguments} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/add_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/add_1.c -new file mode 100644 -index 000000000..f5c6285f8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/add_1.c -@@ -0,0 +1,13 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -fdump-tree-optimized" } */ -+ -+#include -+ -+void -+foo (svint8_t *res1, svint8_t *res2, svbool_t pg, svint8_t a, svint8_t b) -+{ -+ *res1 = svadd_m (pg, a, b); -+ *res2 = svadd_m (pg, a, b); -+} -+ -+/* { dg-final { scan-tree-dump-times {svadd_s8_m|svadd_m} 1 "optimized" } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/and_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/and_1.c -new file mode 100644 -index 000000000..59348cece ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/and_1.c -@@ -0,0 +1,22 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+void -+test1 (svbool_t pg, svbool_t x, svbool_t y, int *any, svbool_t *ptr) -+{ -+ svbool_t res = svand_z (pg, x, y); -+ *any = svptest_any (pg, res); -+ *ptr = res; -+} -+ -+int -+test2 (svbool_t pg, svbool_t x, svbool_t y, int *any) -+{ -+ svbool_t res = svand_z (pg, x, y); -+ return svptest_any (pg, res); -+} -+ -+/* { dg-final { scan-assembler-times {\tands\t} 2 } } */ -+/* { dg-final { scan-assembler-not {\tand\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/bic_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/bic_1.c -new file mode 100644 -index 000000000..e1c484995 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/bic_1.c -@@ -0,0 +1,22 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+void -+test1 (svbool_t pg, svbool_t x, svbool_t y, int *any, svbool_t *ptr) -+{ -+ svbool_t res = svbic_z (pg, x, y); -+ *any = svptest_any (pg, res); -+ *ptr = res; -+} -+ -+int -+test2 (svbool_t pg, svbool_t x, svbool_t y, int *any) -+{ -+ svbool_t res = svbic_z (pg, x, y); -+ return svptest_any (pg, res); -+} -+ -+/* { dg-final { scan-assembler-times {\tbics\t} 2 } } */ -+/* { dg-final { scan-assembler-not {\tbic\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/brka_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/brka_1.c -new file mode 100644 -index 000000000..24aa8f317 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/brka_1.c -@@ -0,0 +1,22 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+void -+test1 (svbool_t pg, svbool_t x, svbool_t y, int *any, svbool_t *ptr) -+{ -+ svbool_t res = svbrka_m (x, pg, y); -+ *any = svptest_any (pg, res); -+ *ptr = res; -+} -+ -+int -+test2 (svbool_t pg, svbool_t x, svbool_t y, int *any) -+{ -+ svbool_t res = svbrka_m (x, pg, y); -+ return svptest_any (pg, res); -+} -+ -+/* { dg-final { scan-assembler-times {\tbrkas\tp[0-9]+\.b, p[0-9]+/m,} 2 } } */ -+/* { dg-final { scan-assembler-not {\tbrka\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/brka_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/brka_2.c -new file mode 100644 -index 000000000..8aa338867 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/brka_2.c -@@ -0,0 +1,22 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+void -+test1 (svbool_t pg, svbool_t x, int *any, svbool_t *ptr) -+{ -+ svbool_t res = svbrka_z (pg, x); -+ *any = svptest_any (pg, res); -+ *ptr = res; -+} -+ -+int -+test2 (svbool_t pg, svbool_t x, int *any) -+{ -+ svbool_t res = svbrka_z (pg, x); -+ return svptest_any (pg, res); -+} -+ -+/* { dg-final { scan-assembler-times {\tbrkas\tp[0-9]+\.b, p[0-9]+/z,} 2 } } */ -+/* { dg-final { scan-assembler-not {\tbrka\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/brkb_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/brkb_1.c -new file mode 100644 -index 000000000..07e3622ed ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/brkb_1.c -@@ -0,0 +1,22 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+void -+test1 (svbool_t pg, svbool_t x, svbool_t y, int *any, svbool_t *ptr) -+{ -+ svbool_t res = svbrkb_m (x, pg, y); -+ *any = svptest_any (pg, res); -+ *ptr = res; -+} -+ -+int -+test2 (svbool_t pg, svbool_t x, svbool_t y, int *any) -+{ -+ svbool_t res = svbrkb_m (x, pg, y); -+ return svptest_any (pg, res); -+} -+ -+/* { dg-final { scan-assembler-times {\tbrkbs\tp[0-9]+\.b, p[0-9]+/m,} 2 } } */ -+/* { dg-final { scan-assembler-not {\tbrkb\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/brkb_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/brkb_2.c -new file mode 100644 -index 000000000..ee677cedd ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/brkb_2.c -@@ -0,0 +1,22 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+void -+test1 (svbool_t pg, svbool_t x, int *any, svbool_t *ptr) -+{ -+ svbool_t res = svbrkb_z (pg, x); -+ *any = svptest_any (pg, res); -+ *ptr = res; -+} -+ -+int -+test2 (svbool_t pg, svbool_t x, int *any) -+{ -+ svbool_t res = svbrkb_z (pg, x); -+ return svptest_any (pg, res); -+} -+ -+/* { dg-final { scan-assembler-times {\tbrkbs\tp[0-9]+\.b, p[0-9]+/z,} 2 } } */ -+/* { dg-final { scan-assembler-not {\tbrkb\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/brkn_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/brkn_1.c -new file mode 100644 -index 000000000..7fd9318c1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/brkn_1.c -@@ -0,0 +1,22 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+void -+test1 (svbool_t pg, svbool_t x, svbool_t y, int *any, svbool_t *ptr) -+{ -+ svbool_t res = svbrkn_z (pg, x, y); -+ *any = svptest_any (pg, res); -+ *ptr = res; -+} -+ -+int -+test2 (svbool_t pg, svbool_t x, svbool_t y, int *any) -+{ -+ svbool_t res = svbrkn_z (pg, x, y); -+ return svptest_any (pg, res); -+} -+ -+/* { dg-final { scan-assembler-times {\tbrkns\t} 2 } } */ -+/* { dg-final { scan-assembler-not {\tbrkn\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/brkpa_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/brkpa_1.c -new file mode 100644 -index 000000000..18cca370c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/brkpa_1.c -@@ -0,0 +1,22 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+void -+test1 (svbool_t pg, svbool_t x, svbool_t y, int *any, svbool_t *ptr) -+{ -+ svbool_t res = svbrkpa_z (pg, x, y); -+ *any = svptest_any (pg, res); -+ *ptr = res; -+} -+ -+int -+test2 (svbool_t pg, svbool_t x, svbool_t y, int *any) -+{ -+ svbool_t res = svbrkpa_z (pg, x, y); -+ return svptest_any (pg, res); -+} -+ -+/* { dg-final { scan-assembler-times {\tbrkpas\t} 2 } } */ -+/* { dg-final { scan-assembler-not {\tbrkpa\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/brkpb_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/brkpb_1.c -new file mode 100644 -index 000000000..73eb7094d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/brkpb_1.c -@@ -0,0 +1,22 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+void -+test1 (svbool_t pg, svbool_t x, svbool_t y, int *any, svbool_t *ptr) -+{ -+ svbool_t res = svbrkpb_z (pg, x, y); -+ *any = svptest_any (pg, res); -+ *ptr = res; -+} -+ -+int -+test2 (svbool_t pg, svbool_t x, svbool_t y, int *any) -+{ -+ svbool_t res = svbrkpb_z (pg, x, y); -+ return svptest_any (pg, res); -+} -+ -+/* { dg-final { scan-assembler-times {\tbrkpbs\t} 2 } } */ -+/* { dg-final { scan-assembler-not {\tbrkpb\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cmpeq_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cmpeq_1.c -new file mode 100644 -index 000000000..dd8f6c494 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cmpeq_1.c -@@ -0,0 +1,22 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+void -+test1 (svbool_t pg, svint8_t x, svint64_t y, int *any, svbool_t *ptr) -+{ -+ svbool_t res = svcmpeq_wide (pg, x, y); -+ *any = svptest_any (pg, res); -+ *ptr = res; -+} -+ -+int -+test2 (svbool_t pg, svint8_t x, svint64_t y, int *any) -+{ -+ svbool_t res = svcmpeq_wide (pg, x, y); -+ return svptest_any (pg, res); -+} -+ -+/* { dg-final { scan-assembler-times {\tcmpeq\t} 2 } } */ -+/* { dg-final { scan-assembler-not {\tptest\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cmpeq_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cmpeq_2.c -new file mode 100644 -index 000000000..028d37516 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cmpeq_2.c -@@ -0,0 +1,38 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+void -+test1 (svbool_t pg, svint8_t x, svint8_t y, int *any, svbool_t *ptr) -+{ -+ svbool_t res = svcmpeq (pg, x, y); -+ *any = svptest_any (pg, res); -+ *ptr = res; -+} -+ -+int -+test2 (svbool_t pg, svint8_t x, svint8_t y, int *any) -+{ -+ svbool_t res = svcmpeq (pg, x, y); -+ return svptest_any (pg, res); -+} -+ -+void -+test3 (svbool_t pg, svint8_t x, int *any, svbool_t *ptr) -+{ -+ svbool_t res = svcmpeq (pg, x, 10); -+ *any = svptest_any (pg, res); -+ *ptr = res; -+} -+ -+int -+test4 (svbool_t pg, svint8_t x, int *any) -+{ -+ svbool_t res = svcmpeq (pg, x, 10); -+ return svptest_any (pg, res); -+} -+ -+/* { dg-final { scan-assembler-times {\tcmpeq\t} 4 } } */ -+/* { dg-final { scan-assembler-times {\tcmpeq\t[^\n]*, #10} 2 } } */ -+/* { dg-final { scan-assembler-not {\tptest\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cmpeq_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cmpeq_3.c -new file mode 100644 -index 000000000..115b26c8e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cmpeq_3.c -@@ -0,0 +1,38 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+void -+test1 (svbool_t pg, svfloat32_t x, svfloat32_t y, int *any, svbool_t *ptr) -+{ -+ svbool_t res = svcmpeq (pg, x, y); -+ *any = svptest_any (pg, res); -+ *ptr = res; -+} -+ -+int -+test2 (svbool_t pg, svfloat32_t x, svfloat32_t y, int *any) -+{ -+ svbool_t res = svcmpeq (pg, x, y); -+ return svptest_any (pg, res); -+} -+ -+void -+test3 (svbool_t pg, svfloat32_t x, int *any, svbool_t *ptr) -+{ -+ svbool_t res = svcmpeq (pg, x, 0.0); -+ *any = svptest_any (pg, res); -+ *ptr = res; -+} -+ -+int -+test4 (svbool_t pg, svfloat32_t x, int *any) -+{ -+ svbool_t res = svcmpeq (pg, x, 0.0); -+ return svptest_any (pg, res); -+} -+ -+/* { dg-final { scan-assembler-times {\tfcmeq\t} 4 } } */ -+/* { dg-final { scan-assembler-times {\tfcmeq\t[^\n]*, #0\.0} 2 } } */ -+/* { dg-final { scan-assembler-times {\tptest\t} 4 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cntb_pat_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cntb_pat_1.c -new file mode 100644 -index 000000000..d57a75c20 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cntb_pat_1.c -@@ -0,0 +1,132 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-O -msve-vector-bits=256" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#include -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+/* -+** cntb_pow2: -+** mov x0, #?32 -+** ret -+*/ -+uint64_t cntb_pow2 () { return svcntb_pat (SV_POW2); } -+ -+/* -+** cntb_vl1: -+** mov x0, #?1 -+** ret -+*/ -+uint64_t cntb_vl1 () { return svcntb_pat (SV_VL1); } -+ -+/* -+** cntb_vl2: -+** mov x0, #?2 -+** ret -+*/ -+uint64_t cntb_vl2 () { return svcntb_pat (SV_VL2); } -+ -+/* -+** cntb_vl3: -+** mov x0, #?3 -+** ret -+*/ -+uint64_t cntb_vl3 () { return svcntb_pat (SV_VL3); } -+ -+/* -+** cntb_vl4: -+** mov x0, #?4 -+** ret -+*/ -+uint64_t cntb_vl4 () { return svcntb_pat (SV_VL4); } -+ -+/* -+** cntb_vl5: -+** mov x0, #?5 -+** ret -+*/ -+uint64_t cntb_vl5 () { return svcntb_pat (SV_VL5); } -+ -+/* -+** cntb_vl6: -+** mov x0, #?6 -+** ret -+*/ -+uint64_t cntb_vl6 () { return svcntb_pat (SV_VL6); } -+ -+/* -+** cntb_vl7: -+** mov x0, #?7 -+** ret -+*/ -+uint64_t cntb_vl7 () { return svcntb_pat (SV_VL7); } -+ -+/* -+** cntb_vl8: -+** mov x0, #?8 -+** ret -+*/ -+uint64_t cntb_vl8 () { return svcntb_pat (SV_VL8); } -+ -+/* -+** cntb_vl16: -+** mov x0, #?16 -+** ret -+*/ -+uint64_t cntb_vl16 () { return svcntb_pat (SV_VL16); } -+ -+/* -+** cntb_vl32: -+** mov x0, #?32 -+** ret -+*/ -+uint64_t cntb_vl32 () { return svcntb_pat (SV_VL32); } -+ -+/* -+** cntb_vl64: -+** mov x0, #?0 -+** ret -+*/ -+uint64_t cntb_vl64 () { return svcntb_pat (SV_VL64); } -+ -+/* -+** cntb_vl128: -+** mov x0, #?0 -+** ret -+*/ -+uint64_t cntb_vl128 () { return svcntb_pat (SV_VL128); } -+ -+/* -+** cntb_vl256: -+** mov x0, #?0 -+** ret -+*/ -+uint64_t cntb_vl256 () { return svcntb_pat (SV_VL256); } -+ -+/* -+** cntb_mul3: -+** mov x0, #?30 -+** ret -+*/ -+uint64_t cntb_mul3 () { return svcntb_pat (SV_MUL3); } -+ -+/* -+** cntb_mul4: -+** mov x0, #?32 -+** ret -+*/ -+uint64_t cntb_mul4 () { return svcntb_pat (SV_MUL4); } -+ -+/* -+** cntb_all: -+** mov x0, #?32 -+** ret -+*/ -+uint64_t cntb_all () { return svcntb_pat (SV_ALL); } -+ -+#ifdef __cplusplus -+} -+#endif -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cntd_pat_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cntd_pat_1.c -new file mode 100644 -index 000000000..d93a32054 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cntd_pat_1.c -@@ -0,0 +1,132 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-O -msve-vector-bits=256" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#include -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+/* -+** cntd_pow2: -+** mov x0, #?4 -+** ret -+*/ -+uint64_t cntd_pow2 () { return svcntd_pat (SV_POW2); } -+ -+/* -+** cntd_vl1: -+** mov x0, #?1 -+** ret -+*/ -+uint64_t cntd_vl1 () { return svcntd_pat (SV_VL1); } -+ -+/* -+** cntd_vl2: -+** mov x0, #?2 -+** ret -+*/ -+uint64_t cntd_vl2 () { return svcntd_pat (SV_VL2); } -+ -+/* -+** cntd_vl3: -+** mov x0, #?3 -+** ret -+*/ -+uint64_t cntd_vl3 () { return svcntd_pat (SV_VL3); } -+ -+/* -+** cntd_vl4: -+** mov x0, #?4 -+** ret -+*/ -+uint64_t cntd_vl4 () { return svcntd_pat (SV_VL4); } -+ -+/* -+** cntd_vl5: -+** mov x0, #?0 -+** ret -+*/ -+uint64_t cntd_vl5 () { return svcntd_pat (SV_VL5); } -+ -+/* -+** cntd_vl6: -+** mov x0, #?0 -+** ret -+*/ -+uint64_t cntd_vl6 () { return svcntd_pat (SV_VL6); } -+ -+/* -+** cntd_vl7: -+** mov x0, #?0 -+** ret -+*/ -+uint64_t cntd_vl7 () { return svcntd_pat (SV_VL7); } -+ -+/* -+** cntd_vl8: -+** mov x0, #?0 -+** ret -+*/ -+uint64_t cntd_vl8 () { return svcntd_pat (SV_VL8); } -+ -+/* -+** cntd_vl16: -+** mov x0, #?0 -+** ret -+*/ -+uint64_t cntd_vl16 () { return svcntd_pat (SV_VL16); } -+ -+/* -+** cntd_vl32: -+** mov x0, #?0 -+** ret -+*/ -+uint64_t cntd_vl32 () { return svcntd_pat (SV_VL32); } -+ -+/* -+** cntd_vl64: -+** mov x0, #?0 -+** ret -+*/ -+uint64_t cntd_vl64 () { return svcntd_pat (SV_VL64); } -+ -+/* -+** cntd_vl128: -+** mov x0, #?0 -+** ret -+*/ -+uint64_t cntd_vl128 () { return svcntd_pat (SV_VL128); } -+ -+/* -+** cntd_vl256: -+** mov x0, #?0 -+** ret -+*/ -+uint64_t cntd_vl256 () { return svcntd_pat (SV_VL256); } -+ -+/* -+** cntd_mul3: -+** mov x0, #?3 -+** ret -+*/ -+uint64_t cntd_mul3 () { return svcntd_pat (SV_MUL3); } -+ -+/* -+** cntd_mul4: -+** mov x0, #?4 -+** ret -+*/ -+uint64_t cntd_mul4 () { return svcntd_pat (SV_MUL4); } -+ -+/* -+** cntd_all: -+** mov x0, #?4 -+** ret -+*/ -+uint64_t cntd_all () { return svcntd_pat (SV_ALL); } -+ -+#ifdef __cplusplus -+} -+#endif -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cnth_pat_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cnth_pat_1.c -new file mode 100644 -index 000000000..bd988f53d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cnth_pat_1.c -@@ -0,0 +1,132 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-O -msve-vector-bits=256" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#include -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+/* -+** cnth_pow2: -+** mov x0, #?16 -+** ret -+*/ -+uint64_t cnth_pow2 () { return svcnth_pat (SV_POW2); } -+ -+/* -+** cnth_vl1: -+** mov x0, #?1 -+** ret -+*/ -+uint64_t cnth_vl1 () { return svcnth_pat (SV_VL1); } -+ -+/* -+** cnth_vl2: -+** mov x0, #?2 -+** ret -+*/ -+uint64_t cnth_vl2 () { return svcnth_pat (SV_VL2); } -+ -+/* -+** cnth_vl3: -+** mov x0, #?3 -+** ret -+*/ -+uint64_t cnth_vl3 () { return svcnth_pat (SV_VL3); } -+ -+/* -+** cnth_vl4: -+** mov x0, #?4 -+** ret -+*/ -+uint64_t cnth_vl4 () { return svcnth_pat (SV_VL4); } -+ -+/* -+** cnth_vl5: -+** mov x0, #?5 -+** ret -+*/ -+uint64_t cnth_vl5 () { return svcnth_pat (SV_VL5); } -+ -+/* -+** cnth_vl6: -+** mov x0, #?6 -+** ret -+*/ -+uint64_t cnth_vl6 () { return svcnth_pat (SV_VL6); } -+ -+/* -+** cnth_vl7: -+** mov x0, #?7 -+** ret -+*/ -+uint64_t cnth_vl7 () { return svcnth_pat (SV_VL7); } -+ -+/* -+** cnth_vl8: -+** mov x0, #?8 -+** ret -+*/ -+uint64_t cnth_vl8 () { return svcnth_pat (SV_VL8); } -+ -+/* -+** cnth_vl16: -+** mov x0, #?16 -+** ret -+*/ -+uint64_t cnth_vl16 () { return svcnth_pat (SV_VL16); } -+ -+/* -+** cnth_vl32: -+** mov x0, #?0 -+** ret -+*/ -+uint64_t cnth_vl32 () { return svcnth_pat (SV_VL32); } -+ -+/* -+** cnth_vl64: -+** mov x0, #?0 -+** ret -+*/ -+uint64_t cnth_vl64 () { return svcnth_pat (SV_VL64); } -+ -+/* -+** cnth_vl128: -+** mov x0, #?0 -+** ret -+*/ -+uint64_t cnth_vl128 () { return svcnth_pat (SV_VL128); } -+ -+/* -+** cnth_vl256: -+** mov x0, #?0 -+** ret -+*/ -+uint64_t cnth_vl256 () { return svcnth_pat (SV_VL256); } -+ -+/* -+** cnth_mul3: -+** mov x0, #?15 -+** ret -+*/ -+uint64_t cnth_mul3 () { return svcnth_pat (SV_MUL3); } -+ -+/* -+** cnth_mul4: -+** mov x0, #?16 -+** ret -+*/ -+uint64_t cnth_mul4 () { return svcnth_pat (SV_MUL4); } -+ -+/* -+** cnth_all: -+** mov x0, #?16 -+** ret -+*/ -+uint64_t cnth_all () { return svcnth_pat (SV_ALL); } -+ -+#ifdef __cplusplus -+} -+#endif -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cntw_pat_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cntw_pat_1.c -new file mode 100644 -index 000000000..53c8435b1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cntw_pat_1.c -@@ -0,0 +1,132 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-O -msve-vector-bits=256" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#include -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+/* -+** cntw_pow2: -+** mov x0, #?8 -+** ret -+*/ -+uint64_t cntw_pow2 () { return svcntw_pat (SV_POW2); } -+ -+/* -+** cntw_vl1: -+** mov x0, #?1 -+** ret -+*/ -+uint64_t cntw_vl1 () { return svcntw_pat (SV_VL1); } -+ -+/* -+** cntw_vl2: -+** mov x0, #?2 -+** ret -+*/ -+uint64_t cntw_vl2 () { return svcntw_pat (SV_VL2); } -+ -+/* -+** cntw_vl3: -+** mov x0, #?3 -+** ret -+*/ -+uint64_t cntw_vl3 () { return svcntw_pat (SV_VL3); } -+ -+/* -+** cntw_vl4: -+** mov x0, #?4 -+** ret -+*/ -+uint64_t cntw_vl4 () { return svcntw_pat (SV_VL4); } -+ -+/* -+** cntw_vl5: -+** mov x0, #?5 -+** ret -+*/ -+uint64_t cntw_vl5 () { return svcntw_pat (SV_VL5); } -+ -+/* -+** cntw_vl6: -+** mov x0, #?6 -+** ret -+*/ -+uint64_t cntw_vl6 () { return svcntw_pat (SV_VL6); } -+ -+/* -+** cntw_vl7: -+** mov x0, #?7 -+** ret -+*/ -+uint64_t cntw_vl7 () { return svcntw_pat (SV_VL7); } -+ -+/* -+** cntw_vl8: -+** mov x0, #?8 -+** ret -+*/ -+uint64_t cntw_vl8 () { return svcntw_pat (SV_VL8); } -+ -+/* -+** cntw_vl16: -+** mov x0, #?0 -+** ret -+*/ -+uint64_t cntw_vl16 () { return svcntw_pat (SV_VL16); } -+ -+/* -+** cntw_vl32: -+** mov x0, #?0 -+** ret -+*/ -+uint64_t cntw_vl32 () { return svcntw_pat (SV_VL32); } -+ -+/* -+** cntw_vl64: -+** mov x0, #?0 -+** ret -+*/ -+uint64_t cntw_vl64 () { return svcntw_pat (SV_VL64); } -+ -+/* -+** cntw_vl128: -+** mov x0, #?0 -+** ret -+*/ -+uint64_t cntw_vl128 () { return svcntw_pat (SV_VL128); } -+ -+/* -+** cntw_vl256: -+** mov x0, #?0 -+** ret -+*/ -+uint64_t cntw_vl256 () { return svcntw_pat (SV_VL256); } -+ -+/* -+** cntw_mul3: -+** mov x0, #?6 -+** ret -+*/ -+uint64_t cntw_mul3 () { return svcntw_pat (SV_MUL3); } -+ -+/* -+** cntw_mul4: -+** mov x0, #?8 -+** ret -+*/ -+uint64_t cntw_mul4 () { return svcntw_pat (SV_MUL4); } -+ -+/* -+** cntw_all: -+** mov x0, #?8 -+** ret -+*/ -+uint64_t cntw_all () { return svcntw_pat (SV_ALL); } -+ -+#ifdef __cplusplus -+} -+#endif -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/debug_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/debug_1.c -new file mode 100644 -index 000000000..0442efef3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/debug_1.c -@@ -0,0 +1,16 @@ -+/* { dg-options "-g" } */ -+ -+#include -+ -+svbool_t f_b (svbool_t x) { return x; } -+svint8_t f_s8 (svint8_t x) { return x; } -+svuint8_t f_u8 (svuint8_t x) { return x; } -+svint16_t f_s16 (svint16_t x) { return x; } -+svuint16_t f_u16 (svuint16_t x) { return x; } -+svfloat16_t f_f16 (svfloat16_t x) { return x; } -+svint32_t f_s32 (svint32_t x) { return x; } -+svuint32_t f_u32 (svuint32_t x) { return x; } -+svfloat32_t f_f32 (svfloat32_t x) { return x; } -+svint64_t f_s64 (svint64_t x) { return x; } -+svuint64_t f_u64 (svuint64_t x) { return x; } -+svfloat64_t f_f64 (svfloat64_t x) { return x; } -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/debug_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/debug_2.c -new file mode 100644 -index 000000000..63a26d2e9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/debug_2.c -@@ -0,0 +1,16 @@ -+/* { dg-options "-g" } */ -+ -+#include -+ -+svbool_t f_b (svbool_t x) { return svptrue_b32 (); } -+svint8_t f_s8 (svint8_t x) { return svdup_s8 (0); } -+svuint8_t f_u8 (svuint8_t x) { return svdup_u8 (1); } -+svint16_t f_s16 (svint16_t x) { return svdup_s16 (2); } -+svuint16_t f_u16 (svuint16_t x) { return svdup_u16 (3); } -+svfloat16_t f_f16 (svfloat16_t x) { return svdup_f16 (4); } -+svint32_t f_s32 (svint32_t x) { return svdup_s32 (5); } -+svuint32_t f_u32 (svuint32_t x) { return svdup_u32 (6); } -+svfloat32_t f_f32 (svfloat32_t x) { return svdup_f32 (7); } -+svint64_t f_s64 (svint64_t x) { return svdup_s64 (8); } -+svuint64_t f_u64 (svuint64_t x) { return svdup_u64 (9); } -+svfloat64_t f_f64 (svfloat64_t x) { return svdup_f64 (10); } -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/debug_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/debug_3.c -new file mode 100644 -index 000000000..ac151e465 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/debug_3.c -@@ -0,0 +1,39 @@ -+/* { dg-options "-g" } */ -+ -+#include -+ -+svint8x2_t f2_s8 (svint8x2_t x) { return x; } -+svuint8x2_t f2_u8 (svuint8x2_t x) { return x; } -+svint16x2_t f2_s16 (svint16x2_t x) { return x; } -+svuint16x2_t f2_u16 (svuint16x2_t x) { return x; } -+svfloat16x2_t f2_f16 (svfloat16x2_t x) { return x; } -+svint32x2_t f2_s32 (svint32x2_t x) { return x; } -+svuint32x2_t f2_u32 (svuint32x2_t x) { return x; } -+svfloat32x2_t f2_f32 (svfloat32x2_t x) { return x; } -+svint64x2_t f2_s64 (svint64x2_t x) { return x; } -+svuint64x2_t f2_u64 (svuint64x2_t x) { return x; } -+svfloat64x2_t f2_f64 (svfloat64x2_t x) { return x; } -+ -+svint8x3_t f3_s8 (svint8x3_t x) { return x; } -+svuint8x3_t f3_u8 (svuint8x3_t x) { return x; } -+svint16x3_t f3_s16 (svint16x3_t x) { return x; } -+svuint16x3_t f3_u16 (svuint16x3_t x) { return x; } -+svfloat16x3_t f3_f16 (svfloat16x3_t x) { return x; } -+svint32x3_t f3_s32 (svint32x3_t x) { return x; } -+svuint32x3_t f3_u32 (svuint32x3_t x) { return x; } -+svfloat32x3_t f3_f32 (svfloat32x3_t x) { return x; } -+svint64x3_t f3_s64 (svint64x3_t x) { return x; } -+svuint64x3_t f3_u64 (svuint64x3_t x) { return x; } -+svfloat64x3_t f3_f64 (svfloat64x3_t x) { return x; } -+ -+svint8x4_t f4_s8 (svint8x4_t x) { return x; } -+svuint8x4_t f4_u8 (svuint8x4_t x) { return x; } -+svint16x4_t f4_s16 (svint16x4_t x) { return x; } -+svuint16x4_t f4_u16 (svuint16x4_t x) { return x; } -+svfloat16x4_t f4_f16 (svfloat16x4_t x) { return x; } -+svint32x4_t f4_s32 (svint32x4_t x) { return x; } -+svuint32x4_t f4_u32 (svuint32x4_t x) { return x; } -+svfloat32x4_t f4_f42 (svfloat32x4_t x) { return x; } -+svint64x4_t f4_s64 (svint64x4_t x) { return x; } -+svuint64x4_t f4_u64 (svuint64x4_t x) { return x; } -+svfloat64x4_t f4_f64 (svfloat64x4_t x) { return x; } -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/double_pragma_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/double_pragma_1.c -new file mode 100644 -index 000000000..9b3c3697c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/double_pragma_1.c -@@ -0,0 +1,7 @@ -+/* { dg-do compile } */ -+/* { dg-options "" } */ -+ -+/* It doesn't really matter if this produces errors about redefinitions, -+ but it mustn't trigger an ICE. */ -+#pragma GCC aarch64 "arm_sve.h" -+#pragma GCC aarch64 "arm_sve.h" /* { dg-error "duplicate definition of 'arm_sve.h'" } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_1.c -new file mode 100644 -index 000000000..d71507baa ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_1.c -@@ -0,0 +1,15 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -mlittle-endian" } */ -+ -+#include -+ -+svint32_t -+dupq (int x) -+{ -+ return svdupq_s32 (x, 1, 2, 3); -+} -+ -+/* { dg-final { scan-assembler {\tldr\tq[0-9]+,} } } */ -+/* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[0\], w0\n} } } */ -+/* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */ -+/* { dg-final { scan-assembler {\t\.word\t1\n\t\.word\t2\n\t\.word\t3\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_10.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_10.c -new file mode 100644 -index 000000000..f8f797c97 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_10.c -@@ -0,0 +1,66 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+svbool_t __attribute__ ((noipa)) -+make_b8 (int8_t x0, int8_t x1, int8_t x2, int8_t x3, -+ int8_t x4, int8_t x5, int8_t x6, int8_t x7, -+ int8_t x8, int8_t x9, int8_t xa, int8_t xb, -+ int8_t xc, int8_t xd, int8_t xe, int8_t xf) -+{ -+ return svdupq_b8 (x0, x1, x2, x3, x4, x5, x6, x7, -+ x8, x9, xa, xb, xc, xd, xe, xf); -+} -+ -+svbool_t __attribute__ ((noipa)) -+make_b16 (int16_t x0, int16_t x1, int16_t x2, int16_t x3, -+ int16_t x4, int16_t x5, int16_t x6, int16_t x7) -+{ -+ return svdupq_b16 (x0, x1, x2, x3, x4, x5, x6, x7); -+} -+ -+svbool_t __attribute__ ((noipa)) -+make_b32 (int32_t x0, int32_t x1, int32_t x2, int32_t x3) -+{ -+ return svdupq_b32 (x0, x1, x2, x3); -+} -+ -+svbool_t __attribute__ ((noipa)) -+make_b64 (int64_t x0, int64_t x1) -+{ -+ return svdupq_b64 (x0, x1); -+} -+ -+int8_t a[16] = { 1, 0, 0, -3, 0, 9, 11, 0, 0, 1, 0, -4, 9, 9, 0, 0 }; -+ -+int -+main () -+{ -+ svbool_t pg = svptrue_pat_b8 (SV_VL16); -+ svbool_t b8 = make_b8 (a[0], a[1], a[2], a[3], -+ a[4], a[5], a[6], a[7], -+ a[8], a[9], a[10], a[11], -+ a[12], a[13], a[14], a[15]); -+ if (svptest_any (svptrue_b8 (), -+ sveor_z (pg, b8, svcmpne (pg, svld1 (pg, a), 0)))) -+ __builtin_abort (); -+ -+ svbool_t b16 = make_b16 (a[0], a[1], a[2], a[3], -+ a[4], a[5], a[6], a[7]); -+ if (svptest_any (svptrue_b16 (), -+ sveor_z (pg, b16, svcmpne (pg, svld1sb_u16 (pg, a), 0)))) -+ __builtin_abort (); -+ -+ svbool_t b32 = make_b32 (a[0], a[1], a[2], a[3]); -+ if (svptest_any (svptrue_b32 (), -+ sveor_z (pg, b32, svcmpne (pg, svld1sb_u32 (pg, a), 0)))) -+ __builtin_abort (); -+ -+ svbool_t b64 = make_b64 (a[0], a[1]); -+ if (svptest_any (svptrue_b64 (), -+ sveor_z (pg, b64, svcmpne (pg, svld1sb_u64 (pg, a), 0)))) -+ __builtin_abort (); -+ -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c -new file mode 100644 -index 000000000..d494943a2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_2.c -@@ -0,0 +1,16 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -mbig-endian" } */ -+ -+/* To avoid needing big-endian header files. */ -+#pragma GCC aarch64 "arm_sve.h" -+ -+svint32_t -+dupq (int x) -+{ -+ return svdupq_s32 (x, 1, 2, 3); -+} -+ -+/* { dg-final { scan-assembler {\tldr\tq[0-9]+,} } } */ -+/* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[0\], w0\n} } } */ -+/* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */ -+/* { dg-final { scan-assembler {\t\.word\t3\n\t\.word\t2\n\t\.word\t1\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_3.c -new file mode 100644 -index 000000000..4bc8259df ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_3.c -@@ -0,0 +1,16 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -mlittle-endian" } */ -+ -+/* To avoid needing big-endian header files. */ -+#pragma GCC aarch64 "arm_sve.h" -+ -+svint32_t -+dupq (int x) -+{ -+ return svdupq_s32 (0, 1, x, 3); -+} -+ -+/* { dg-final { scan-assembler {\tldr\tq[0-9]+,} } } */ -+/* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[2\], w0\n} } } */ -+/* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */ -+/* { dg-final { scan-assembler {\t\.word\t0\n\t\.word\t1\n\t\.word\t[^\n]*\n\t\.word\t3\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c -new file mode 100644 -index 000000000..6f9f9f2f2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_4.c -@@ -0,0 +1,16 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -mbig-endian" } */ -+ -+/* To avoid needing big-endian header files. */ -+#pragma GCC aarch64 "arm_sve.h" -+ -+svint32_t -+dupq (int x) -+{ -+ return svdupq_s32 (0, 1, x, 3); -+} -+ -+/* { dg-final { scan-assembler {\tldr\tq[0-9]+,} } } */ -+/* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[2\], w0\n} } } */ -+/* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */ -+/* { dg-final { scan-assembler {\t\.word\t3\n\t\.word\t[^\n]*\n\t\.word\t1\n\t\.word\t0\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_5.c -new file mode 100644 -index 000000000..53426c9af ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_5.c -@@ -0,0 +1,17 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -mlittle-endian" } */ -+ -+#include -+ -+svint32_t -+dupq (int x1, int x2, int x3, int x4) -+{ -+ return svdupq_s32 (x1, x2, x3, x4); -+} -+ -+/* { dg-final { scan-assembler-not {\tldr\t} } } */ -+/* { dg-final { scan-assembler {, [wx]0\n} } } */ -+/* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[1\], w1\n} } } */ -+/* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[2\], w2\n} } } */ -+/* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[3\], w3\n} } } */ -+/* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_6.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_6.c -new file mode 100644 -index 000000000..dfce5e7a1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_6.c -@@ -0,0 +1,18 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -mbig-endian" } */ -+ -+/* To avoid needing big-endian header files. */ -+#pragma GCC aarch64 "arm_sve.h" -+ -+svint32_t -+dupq (int x1, int x2, int x3, int x4) -+{ -+ return svdupq_s32 (x1, x2, x3, x4); -+} -+ -+/* { dg-final { scan-assembler-not {\tldr\t} } } */ -+/* { dg-final { scan-assembler {, [wx]0\n} } } */ -+/* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[1\], w1\n} } } */ -+/* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[2\], w2\n} } } */ -+/* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[3\], w3\n} } } */ -+/* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_7.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_7.c -new file mode 100644 -index 000000000..08decb5f9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_7.c -@@ -0,0 +1,66 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+svint8_t __attribute__ ((noipa)) -+make_s8 (int8_t x0, int8_t x1, int8_t x2, int8_t x3, -+ int8_t x4, int8_t x5, int8_t x6, int8_t x7, -+ int8_t x8, int8_t x9, int8_t xa, int8_t xb, -+ int8_t xc, int8_t xd, int8_t xe, int8_t xf) -+{ -+ return svdupq_s8 (x0, x1, x2, x3, x4, x5, x6, x7, -+ x8, x9, xa, xb, xc, xd, xe, xf); -+} -+ -+svint16_t __attribute__ ((noipa)) -+make_s16 (int16_t x0, int16_t x1, int16_t x2, int16_t x3, -+ int16_t x4, int16_t x5, int16_t x6, int16_t x7) -+{ -+ return svdupq_s16 (x0, x1, x2, x3, x4, x5, x6, x7); -+} -+ -+svint32_t __attribute__ ((noipa)) -+make_s32 (int32_t x0, int32_t x1, int32_t x2, int32_t x3) -+{ -+ return svdupq_s32 (x0, x1, x2, x3); -+} -+ -+svint64_t __attribute__ ((noipa)) -+make_s64 (int64_t x0, int64_t x1) -+{ -+ return svdupq_s64 (x0, x1); -+} -+ -+int8_t a[16] = { 1, -44, 91, -24, 101, -55, 77, 83, -+ -30, 69, 121, -128, -1, 13, 127, 26 }; -+int16_t b[8] = { -716, -10288, 30604, -19258, -9418, -10435, -16001, 7300 }; -+int32_t c[4] = { 1268374995, -1023602831, -891830021, -1793452959 }; -+int64_t d[2] = { 0x123456789abcdefLL, -0x123456789abcdefLL }; -+ -+int -+main () -+{ -+ svbool_t pg = svptrue_pat_b8 (SV_VL16); -+ svint8_t s8 = make_s8 (a[0], a[1], a[2], a[3], -+ a[4], a[5], a[6], a[7], -+ a[8], a[9], a[10], a[11], -+ a[12], a[13], a[14], a[15]); -+ if (svptest_any (svptrue_b8 (), svcmpne (pg, s8, svld1 (pg, a)))) -+ __builtin_abort (); -+ -+ svint16_t s16 = make_s16 (b[0], b[1], b[2], b[3], -+ b[4], b[5], b[6], b[7]); -+ if (svptest_any (svptrue_b8 (), svcmpne (pg, s16, svld1 (pg, b)))) -+ __builtin_abort (); -+ -+ svint32_t s32 = make_s32 (c[0], c[1], c[2], c[3]); -+ if (svptest_any (svptrue_b8 (), svcmpne (pg, s32, svld1 (pg, c)))) -+ __builtin_abort (); -+ -+ svint64_t s64 = make_s64 (d[0], d[1]); -+ if (svptest_any (svptrue_b8 (), svcmpne (pg, s64, svld1 (pg, d)))) -+ __builtin_abort (); -+ -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_8.c -new file mode 100644 -index 000000000..c20fb7324 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_8.c -@@ -0,0 +1,66 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+svuint8_t __attribute__ ((noipa)) -+make_u8 (uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3, -+ uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7, -+ uint8_t x8, uint8_t x9, uint8_t xa, uint8_t xb, -+ uint8_t xc, uint8_t xd, uint8_t xe, uint8_t xf) -+{ -+ return svdupq_u8 (x0, x1, x2, x3, x4, x5, x6, x7, -+ x8, x9, xa, xb, xc, xd, xe, xf); -+} -+ -+svuint16_t __attribute__ ((noipa)) -+make_u16 (uint16_t x0, uint16_t x1, uint16_t x2, uint16_t x3, -+ uint16_t x4, uint16_t x5, uint16_t x6, uint16_t x7) -+{ -+ return svdupq_u16 (x0, x1, x2, x3, x4, x5, x6, x7); -+} -+ -+svuint32_t __attribute__ ((noipa)) -+make_u32 (uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3) -+{ -+ return svdupq_u32 (x0, x1, x2, x3); -+} -+ -+svuint64_t __attribute__ ((noipa)) -+make_u64 (uint64_t x0, uint64_t x1) -+{ -+ return svdupq_u64 (x0, x1); -+} -+ -+uint8_t a[16] = { 1, 212, 91, 232, 101, 201, 77, 83, -+ 226, 69, 121, 128, 255, 13, 127, 26 }; -+uint16_t b[8] = { 64820, 55248, 30604, 46278, 56118, 55101, 49535, 7300 }; -+uint32_t c[4] = { 1268374995, 3271364465, 3403137275, 2501514337 }; -+uint64_t d[2] = { 0x123456789abcdefULL, 0xfedcba9876543210ULL }; -+ -+int -+main () -+{ -+ svbool_t pg = svptrue_pat_b8 (SV_VL16); -+ svuint8_t u8 = make_u8 (a[0], a[1], a[2], a[3], -+ a[4], a[5], a[6], a[7], -+ a[8], a[9], a[10], a[11], -+ a[12], a[13], a[14], a[15]); -+ if (svptest_any (svptrue_b8 (), svcmpne (pg, u8, svld1 (pg, a)))) -+ __builtin_abort (); -+ -+ svuint16_t u16 = make_u16 (b[0], b[1], b[2], b[3], -+ b[4], b[5], b[6], b[7]); -+ if (svptest_any (svptrue_b8 (), svcmpne (pg, u16, svld1 (pg, b)))) -+ __builtin_abort (); -+ -+ svuint32_t u32 = make_u32 (c[0], c[1], c[2], c[3]); -+ if (svptest_any (svptrue_b8 (), svcmpne (pg, u32, svld1 (pg, c)))) -+ __builtin_abort (); -+ -+ svuint64_t u64 = make_u64 (d[0], d[1]); -+ if (svptest_any (svptrue_b8 (), svcmpne (pg, u64, svld1 (pg, d)))) -+ __builtin_abort (); -+ -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_9.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_9.c -new file mode 100644 -index 000000000..b29aa9474 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_9.c -@@ -0,0 +1,47 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+svfloat16_t __attribute__ ((noipa)) -+make_f16 (float16_t x0, float16_t x1, float16_t x2, float16_t x3, -+ float16_t x4, float16_t x5, float16_t x6, float16_t x7) -+{ -+ return svdupq_f16 (x0, x1, x2, x3, x4, x5, x6, x7); -+} -+ -+svfloat32_t __attribute__ ((noipa)) -+make_f32 (float32_t x0, float32_t x1, float32_t x2, float32_t x3) -+{ -+ return svdupq_f32 (x0, x1, x2, x3); -+} -+ -+svfloat64_t __attribute__ ((noipa)) -+make_f64 (float64_t x0, float64_t x1) -+{ -+ return svdupq_f64 (x0, x1); -+} -+ -+float16_t a[8] = { 1.0, -4.25, 9.75, 6.5, -2.125, 5.5, -3.75, 7.625 }; -+float32_t b[4] = { 1.0, -90.25, -11.75, 141.5 }; -+float64_t c[2] = { 9221.5, -4491.25 }; -+ -+int -+main () -+{ -+ svbool_t pg = svptrue_pat_b8 (SV_VL16); -+ svfloat16_t f16 = make_f16 (a[0], a[1], a[2], a[3], -+ a[4], a[5], a[6], a[7]); -+ if (svptest_any (svptrue_b8 (), svcmpne (pg, f16, svld1 (pg, a)))) -+ __builtin_abort (); -+ -+ svfloat32_t f32 = make_f32 (b[0], b[1], b[2], b[3]); -+ if (svptest_any (svptrue_b8 (), svcmpne (pg, f32, svld1 (pg, b)))) -+ __builtin_abort (); -+ -+ svfloat64_t f64 = make_f64 (c[0], c[1]); -+ if (svptest_any (svptrue_b8 (), svcmpne (pg, f64, svld1 (pg, c)))) -+ __builtin_abort (); -+ -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_1.c -new file mode 100644 -index 000000000..32ccb08d6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_1.c -@@ -0,0 +1,87 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+#ifndef TYPE -+#define TYPE svint8_t -+#define DUPQ svdupq_lane_s8 -+#define INDEX svindex_s8 -+#define COUNT 16 -+#endif -+ -+#define BASE 42 -+ -+TYPE __attribute__ ((noipa)) -+dupq_var (TYPE x, uint64_t y) -+{ -+ return DUPQ (x, y); -+} -+ -+TYPE __attribute__ ((noipa)) -+dupq_0 (TYPE x) -+{ -+ return DUPQ (x, 0); -+} -+ -+TYPE __attribute__ ((noipa)) -+dupq_1 (TYPE x) -+{ -+ return DUPQ (x, 1); -+} -+ -+TYPE __attribute__ ((noipa)) -+dupq_2 (TYPE x) -+{ -+ return DUPQ (x, 2); -+} -+ -+TYPE __attribute__ ((noipa)) -+dupq_3 (TYPE x) -+{ -+ return DUPQ (x, 3); -+} -+ -+TYPE __attribute__ ((noipa)) -+dupq_4 (TYPE x) -+{ -+ return DUPQ (x, 4); -+} -+ -+void __attribute__ ((noipa)) -+check (TYPE x, uint64_t y) -+{ -+ svbool_t pg = svptrue_b8 (); -+ if (y * 2 >= svcntd ()) -+ { -+ if (svptest_any (pg, svcmpne (pg, x, 0))) -+ __builtin_abort (); -+ } -+ else -+ { -+ TYPE repeat = svand_x (pg, INDEX (0, 1), COUNT - 1); -+ TYPE expected = svadd_x (pg, repeat, BASE + y * COUNT); -+ if (svptest_any (pg, svcmpne (pg, x, expected))) -+ __builtin_abort (); -+ } -+} -+ -+int -+main () -+{ -+ TYPE x = INDEX (BASE, 1); -+ -+ check (dupq_0 (x), 0); -+ check (dupq_1 (x), 1); -+ check (dupq_2 (x), 2); -+ check (dupq_3 (x), 3); -+ check (dupq_4 (x), 4); -+ -+ for (int i = 0; i < 63; ++i) -+ { -+ check (dupq_var (x, i), i); -+ check (dupq_var (x, (uint64_t) 1 << i), (uint64_t) 1 << i); -+ } -+ -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_2.c -new file mode 100644 -index 000000000..40de1c7dc ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_2.c -@@ -0,0 +1,9 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2" } */ -+ -+#define TYPE svuint8_t -+#define DUPQ svdupq_lane_u8 -+#define INDEX svindex_u8 -+#define COUNT 16 -+ -+#include "dupq_lane_1.c" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_3.c -new file mode 100644 -index 000000000..4ebe89545 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_3.c -@@ -0,0 +1,9 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2" } */ -+ -+#define TYPE svint16_t -+#define DUPQ svdupq_lane_s16 -+#define INDEX svindex_s16 -+#define COUNT 8 -+ -+#include "dupq_lane_1.c" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_4.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_4.c -new file mode 100644 -index 000000000..1be20c8e1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_4.c -@@ -0,0 +1,9 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2" } */ -+ -+#define TYPE svuint16_t -+#define DUPQ svdupq_lane_u16 -+#define INDEX svindex_u16 -+#define COUNT 8 -+ -+#include "dupq_lane_1.c" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_5.c -new file mode 100644 -index 000000000..67554d06a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_5.c -@@ -0,0 +1,9 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2" } */ -+ -+#define TYPE svint32_t -+#define DUPQ svdupq_lane_s32 -+#define INDEX svindex_s32 -+#define COUNT 4 -+ -+#include "dupq_lane_1.c" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_6.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_6.c -new file mode 100644 -index 000000000..1914d2368 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_6.c -@@ -0,0 +1,9 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2" } */ -+ -+#define TYPE svuint32_t -+#define DUPQ svdupq_lane_u32 -+#define INDEX svindex_u32 -+#define COUNT 4 -+ -+#include "dupq_lane_1.c" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_7.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_7.c -new file mode 100644 -index 000000000..d7a8e52f8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_7.c -@@ -0,0 +1,9 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2" } */ -+ -+#define TYPE svint64_t -+#define DUPQ svdupq_lane_s64 -+#define INDEX svindex_s64 -+#define COUNT 2 -+ -+#include "dupq_lane_1.c" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_8.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_8.c -new file mode 100644 -index 000000000..68655fefa ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/dupq_lane_8.c -@@ -0,0 +1,9 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2" } */ -+ -+#define TYPE svuint64_t -+#define DUPQ svdupq_lane_u64 -+#define INDEX svindex_u64 -+#define COUNT 2 -+ -+#include "dupq_lane_1.c" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/eor_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/eor_1.c -new file mode 100644 -index 000000000..357b0bfb8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/eor_1.c -@@ -0,0 +1,22 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+void -+test1 (svbool_t pg, svbool_t x, svbool_t y, int *any, svbool_t *ptr) -+{ -+ svbool_t res = sveor_z (pg, x, y); -+ *any = svptest_any (pg, res); -+ *ptr = res; -+} -+ -+int -+test2 (svbool_t pg, svbool_t x, svbool_t y, int *any) -+{ -+ svbool_t res = sveor_z (pg, x, y); -+ return svptest_any (pg, res); -+} -+ -+/* { dg-final { scan-assembler-times {\teors\t} 2 } } */ -+/* { dg-final { scan-assembler-not {\teor\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ld1_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ld1_1.c -new file mode 100644 -index 000000000..c68a9ed99 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ld1_1.c -@@ -0,0 +1,25 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#include -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+/* -+** nop1: -+** ret -+*/ -+void nop1 (int8_t *s) { svld1 (svptrue_b8 (), s); } -+ -+/* -+** nop2: -+** ret -+*/ -+void nop2 (svbool_t pg, int16_t *s) { svld1 (pg, s); } -+ -+#ifdef __cplusplus -+} -+#endif -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ldff1_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ldff1_1.c -new file mode 100644 -index 000000000..79f8bee1f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ldff1_1.c -@@ -0,0 +1,18 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+/* Make sure that SETFFR comes first, however high the priority of the -+ LDFF1 is. */ -+svint8_t -+foo (svbool_t pg, int8_t *ptr) -+{ -+ svsetffr (); -+ svint8_t x = svldff1 (pg, ptr); -+ x = svadd_x (pg, x, x); -+ x = svmul_x (pg, x, x); -+ return x; -+} -+ -+/* { dg-final { scan-assembler {\tsetffr\n.*\tldff1b\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ldff1_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ldff1_2.c -new file mode 100644 -index 000000000..7c3c8d8b5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ldff1_2.c -@@ -0,0 +1,20 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+/* Make sure that RDFFR comes after the LDFF1 and that the RDFFRs can -+ be CSEd. */ -+svint8_t -+foo (svbool_t pg, int8_t *__restrict ptr, -+ svbool_t *__restrict *__restrict preds) -+{ -+ svsetffr (); -+ svint8_t x = svldff1 (pg, ptr); -+ *preds[0] = svrdffr (); -+ *preds[1] = svrdffr (); -+ return x; -+} -+ -+/* { dg-final { scan-assembler {\tsetffr\n.*\tldff1b\t.*\trdffr\t} } } */ -+/* { dg-final { scan-assembler-times {\trdffr\t} 1 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ldff1_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ldff1_3.c -new file mode 100644 -index 000000000..41ad0bcea ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ldff1_3.c -@@ -0,0 +1,21 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+/* Make sure that LDFF1s can be reordered. The load of x should come due -+ to its longer dependence chain. */ -+svint8_t -+foo (int8_t *ptr1, int8_t *ptr2) -+{ -+ svsetffr (); -+ svbool_t pg = svptrue_b8 (); -+ svint8_t y = svldff1 (pg, ptr2); -+ svint8_t x = svldff1 (pg, ptr1); -+ x = svadd_x (pg, x, x); -+ x = svmul_x (pg, x, x); -+ x = svadd_x (pg, x, y); -+ return x; -+} -+ -+/* { dg-final { scan-assembler {\tldff1b\tz[0-9]+\.b, p[0-7]/z, \[x0\]\n.*\tldff1b\tz[0-9]+\.b, p[0-7]/z, \[x1\]\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ldff1_4.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ldff1_4.c -new file mode 100644 -index 000000000..c27302139 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ldff1_4.c -@@ -0,0 +1,17 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+/* Make sure that we can use RDFFRS to test for a fault. */ -+svint8_t -+foo (svbool_t pg, int8_t *ptr, int *fault) -+{ -+ svsetffr (); -+ svint8_t x = svldff1 (pg, ptr); -+ *fault = svptest_any (pg, svrdffr_z (pg)); -+ return x; -+} -+ -+/* { dg-final { scan-assembler {\tsetffr\n.*\tldff1b\t.*\trdffrs\t} } } */ -+/* { dg-final { scan-assembler-not {\trdffr\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ldff1_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ldff1_5.c -new file mode 100644 -index 000000000..76e7ab8ba ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ldff1_5.c -@@ -0,0 +1,20 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+/* Make sure that we can use RDFFRS to read the FFR while testing for a -+ fault. */ -+svint8_t -+foo (svbool_t pg, int8_t *ptr, svbool_t *pred, int *fault) -+{ -+ svsetffr (); -+ svint8_t x = svldff1 (pg, ptr); -+ svbool_t ffr = svrdffr_z (pg); -+ *fault = svptest_any (pg, ffr); -+ *pred = ffr; -+ return x; -+} -+ -+/* { dg-final { scan-assembler {\tsetffr\n.*\tldff1b\t.*\trdffrs\t} } } */ -+/* { dg-final { scan-assembler-not {\trdffr\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ldff1_6.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ldff1_6.c -new file mode 100644 -index 000000000..7110e5f1a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ldff1_6.c -@@ -0,0 +1,17 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+/* Make sure that we can use RDFFRS to test for a fault. */ -+svint8_t -+foo (svbool_t pg, int8_t *ptr, int *fault) -+{ -+ svsetffr (); -+ svint8_t x = svldff1 (pg, ptr); -+ *fault = svptest_any (svptrue_b8 (), svrdffr ()); -+ return x; -+} -+ -+/* { dg-final { scan-assembler {\tsetffr\n.*\tldff1b\t.*\trdffrs\t} } } */ -+/* { dg-final { scan-assembler-not {\trdffr\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ldff1_7.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ldff1_7.c -new file mode 100644 -index 000000000..355fe91f1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ldff1_7.c -@@ -0,0 +1,20 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+/* Make sure that we can use RDFFRS to read the FFR while testing for a -+ fault. */ -+svint8_t -+foo (svbool_t pg, int8_t *ptr, svbool_t *pred, int *fault) -+{ -+ svsetffr (); -+ svint8_t x = svldff1 (pg, ptr); -+ svbool_t ffr = svrdffr (); -+ *fault = svptest_any (svptrue_b8 (), ffr); -+ *pred = ffr; -+ return x; -+} -+ -+/* { dg-final { scan-assembler {\tsetffr\n.*\tldff1b\t.*\trdffrs\t} } } */ -+/* { dg-final { scan-assembler-not {\trdffr\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/nand_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/nand_1.c -new file mode 100644 -index 000000000..0bc54c049 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/nand_1.c -@@ -0,0 +1,22 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+void -+test1 (svbool_t pg, svbool_t x, svbool_t y, int *any, svbool_t *ptr) -+{ -+ svbool_t res = svnand_z (pg, x, y); -+ *any = svptest_any (pg, res); -+ *ptr = res; -+} -+ -+int -+test2 (svbool_t pg, svbool_t x, svbool_t y, int *any) -+{ -+ svbool_t res = svnand_z (pg, x, y); -+ return svptest_any (pg, res); -+} -+ -+/* { dg-final { scan-assembler-times {\tnands\t} 2 } } */ -+/* { dg-final { scan-assembler-not {\tnand\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/nor_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/nor_1.c -new file mode 100644 -index 000000000..7973294d1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/nor_1.c -@@ -0,0 +1,22 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+void -+test1 (svbool_t pg, svbool_t x, svbool_t y, int *any, svbool_t *ptr) -+{ -+ svbool_t res = svnor_z (pg, x, y); -+ *any = svptest_any (pg, res); -+ *ptr = res; -+} -+ -+int -+test2 (svbool_t pg, svbool_t x, svbool_t y, int *any) -+{ -+ svbool_t res = svnor_z (pg, x, y); -+ return svptest_any (pg, res); -+} -+ -+/* { dg-final { scan-assembler-times {\tnors\t} 2 } } */ -+/* { dg-final { scan-assembler-not {\tnor\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/nosve_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/nosve_1.c -new file mode 100644 -index 000000000..09dfacd22 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/nosve_1.c -@@ -0,0 +1,17 @@ -+/* { dg-options "-march=armv8-a" } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+void -+f (svbool_t *x, svint8_t *y) -+{ -+ *x = svptrue_b8 (); /* { dg-error {ACLE function '(svbool_t svptrue_b8\(\)|svptrue_b8)' requires ISA extension 'sve'} } */ -+ /* { dg-message {note: you can enable 'sve' using the command-line option '-march', or by using the 'target' attribute or pragma} "" { target *-*-* } .-1 } */ -+ *x = svptrue_b8 (); -+ *x = svptrue_b8 (); -+ *x = svptrue_b8 (); -+ *x = svptrue_b8 (); -+ *x = svptrue_b8 (); -+ *x = svptrue_b8 (); -+ *y = svadd_m (*x, *y, 1); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/nosve_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/nosve_2.c -new file mode 100644 -index 000000000..594be1cf4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/nosve_2.c -@@ -0,0 +1,14 @@ -+/* { dg-options "-march=armv8-a" } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+#pragma GCC target "+sve" -+ -+void -+f (svbool_t *x, svint8_t *y) -+{ -+ *x = svptrue_b8 (); -+ *y = svadd_m (*x, *y, 1); -+} -+ -+/* { dg-final { scan-assembler {\tadd\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/nosve_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/nosve_3.c -new file mode 100644 -index 000000000..85f4eb3c0 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/nosve_3.c -@@ -0,0 +1,12 @@ -+/* { dg-options "-march=armv8-a" } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+void __attribute__ ((target("+sve"))) -+f (svbool_t *x, svint8_t *y) -+{ -+ *x = svptrue_b8 (); -+ *y = svadd_m (*x, *y, 1); -+} -+ -+/* { dg-final { scan-assembler {\tadd\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/orn_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/orn_1.c -new file mode 100644 -index 000000000..c3ed1eb61 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/orn_1.c -@@ -0,0 +1,22 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+void -+test1 (svbool_t pg, svbool_t x, svbool_t y, int *any, svbool_t *ptr) -+{ -+ svbool_t res = svorn_z (pg, x, y); -+ *any = svptest_any (pg, res); -+ *ptr = res; -+} -+ -+int -+test2 (svbool_t pg, svbool_t x, svbool_t y, int *any) -+{ -+ svbool_t res = svorn_z (pg, x, y); -+ return svptest_any (pg, res); -+} -+ -+/* { dg-final { scan-assembler-times {\torns\t} 2 } } */ -+/* { dg-final { scan-assembler-not {\torn\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/orr_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/orr_1.c -new file mode 100644 -index 000000000..4456fa630 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/orr_1.c -@@ -0,0 +1,22 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+void -+test1 (svbool_t pg, svbool_t x, svbool_t y, int *any, svbool_t *ptr) -+{ -+ svbool_t res = svorr_z (pg, x, y); -+ *any = svptest_any (pg, res); -+ *ptr = res; -+} -+ -+int -+test2 (svbool_t pg, svbool_t x, svbool_t y, int *any) -+{ -+ svbool_t res = svorr_z (pg, x, y); -+ return svptest_any (pg, res); -+} -+ -+/* { dg-final { scan-assembler-times {\torrs\t} 2 } } */ -+/* { dg-final { scan-assembler-not {\torr\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pfirst_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pfirst_1.c -new file mode 100644 -index 000000000..de1ff691a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pfirst_1.c -@@ -0,0 +1,22 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+void -+test1 (svbool_t pg, int *last, svbool_t *ptr) -+{ -+ svbool_t res = svpfirst (pg, svpfalse ()); -+ *last = svptest_last (pg, res); -+ *ptr = res; -+} -+ -+int -+test2 (svbool_t pg) -+{ -+ svbool_t res = svpfirst (pg, svpfalse ()); -+ return svptest_last (pg, res); -+} -+ -+/* { dg-final { scan-assembler-times {\tpfirst\t} 2 } } */ -+/* { dg-final { scan-assembler-not {\tptest\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pnext_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pnext_1.c -new file mode 100644 -index 000000000..bf59cb963 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pnext_1.c -@@ -0,0 +1,22 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+void -+test1 (svbool_t pg, svbool_t prev, int *last, svbool_t *ptr) -+{ -+ svbool_t res = svpnext_b8 (pg, prev); -+ *last = svptest_last (pg, res); -+ *ptr = res; -+} -+ -+int -+test2 (svbool_t pg, svbool_t prev) -+{ -+ svbool_t res = svpnext_b8 (pg, prev); -+ return svptest_last (pg, res); -+} -+ -+/* { dg-final { scan-assembler-times {\tpnext\t} 2 } } */ -+/* { dg-final { scan-assembler-not {\tptest\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pnext_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pnext_2.c -new file mode 100644 -index 000000000..9926a2bee ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/pnext_2.c -@@ -0,0 +1,52 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+void -+test1 (svbool_t pg, svbool_t prev, int *last, svbool_t *ptr) -+{ -+ svbool_t res = svpnext_b16 (pg, prev); -+ *last = svptest_last (pg, res); -+ *ptr = res; -+} -+ -+int -+test2 (svbool_t pg, svbool_t prev) -+{ -+ svbool_t res = svpnext_b16 (pg, prev); -+ return svptest_last (pg, res); -+} -+ -+void -+test3 (svbool_t pg, svbool_t prev, int *last, svbool_t *ptr) -+{ -+ svbool_t res = svpnext_b32 (pg, prev); -+ *last = svptest_last (pg, res); -+ *ptr = res; -+} -+ -+int -+test4 (svbool_t pg, svbool_t prev) -+{ -+ svbool_t res = svpnext_b32 (pg, prev); -+ return svptest_last (pg, res); -+} -+ -+void -+test5 (svbool_t pg, svbool_t prev, int *last, svbool_t *ptr) -+{ -+ svbool_t res = svpnext_b64 (pg, prev); -+ *last = svptest_last (pg, res); -+ *ptr = res; -+} -+ -+int -+test6 (svbool_t pg, svbool_t prev) -+{ -+ svbool_t res = svpnext_b64 (pg, prev); -+ return svptest_last (pg, res); -+} -+ -+/* { dg-final { scan-assembler-times {\tpnext\t} 6 } } */ -+/* { dg-final { scan-assembler-times {\tptest\t} 6 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ptrue_pat_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ptrue_pat_1.c -new file mode 100644 -index 000000000..69bbb1ed0 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ptrue_pat_1.c -@@ -0,0 +1,23 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+void -+test1 (int *last, svbool_t *ptr) -+{ -+ svbool_t res = svptrue_pat_b8 (SV_VL32); -+ *last = svptest_last (svptrue_b8 (), res); -+ *ptr = res; -+} -+ -+int -+test2 () -+{ -+ svbool_t res = svptrue_pat_b8 (SV_VL32); -+ return svptest_last (svptrue_b8 (), res); -+} -+ -+/* { dg-final { scan-assembler-times {\tptrues\tp[0-9]+\.b, vl32\n} 2 } } */ -+/* { dg-final { scan-assembler-not {\tptrue\t} { xfail *-*-* } } } */ -+/* { dg-final { scan-assembler-not {\tptest\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ptrue_pat_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ptrue_pat_2.c -new file mode 100644 -index 000000000..ede83405e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ptrue_pat_2.c -@@ -0,0 +1,23 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+void -+test1 (int *last, svbool_t *ptr) -+{ -+ svbool_t res = svptrue_pat_b16 (SV_VL16); -+ *last = svptest_last (svptrue_b16 (), res); -+ *ptr = res; -+} -+ -+int -+test2 () -+{ -+ svbool_t res = svptrue_pat_b16 (SV_VL16); -+ return svptest_last (svptrue_b16 (), res); -+} -+ -+/* { dg-final { scan-assembler-times {\tptrues\tp[0-9]+\.h, vl16\n} 2 } } */ -+/* { dg-final { scan-assembler-not {\tptrue\t} { xfail *-*-* } } } */ -+/* { dg-final { scan-assembler-not {\tptest\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ptrue_pat_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ptrue_pat_3.c -new file mode 100644 -index 000000000..d2eb3fc30 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ptrue_pat_3.c -@@ -0,0 +1,23 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+void -+test1 (int *last, svbool_t *ptr) -+{ -+ svbool_t res = svptrue_pat_b32 (SV_VL16); -+ *last = svptest_last (svptrue_b32 (), res); -+ *ptr = res; -+} -+ -+int -+test2 () -+{ -+ svbool_t res = svptrue_pat_b32 (SV_VL16); -+ return svptest_last (svptrue_b32 (), res); -+} -+ -+/* { dg-final { scan-assembler-times {\tptrues\tp[0-9]+\.s, vl16\n} 2 } } */ -+/* { dg-final { scan-assembler-not {\tptrue\t} { xfail *-*-* } } } */ -+/* { dg-final { scan-assembler-not {\tptest\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ptrue_pat_4.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ptrue_pat_4.c -new file mode 100644 -index 000000000..59a21da9e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ptrue_pat_4.c -@@ -0,0 +1,23 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+void -+test1 (int *any, svbool_t *ptr) -+{ -+ svbool_t res = svptrue_pat_b64 (SV_VL7); -+ *any = svptest_any (svptrue_b64 (), res); -+ *ptr = res; -+} -+ -+int -+test2 () -+{ -+ svbool_t res = svptrue_pat_b64 (SV_VL7); -+ return svptest_any (svptrue_b64 (), res); -+} -+ -+/* { dg-final { scan-assembler-times {\tptrues\tp[0-9]+\.d, vl7\n} 2 } } */ -+/* { dg-final { scan-assembler-not {\tptrue\t} { xfail *-*-* } } } */ -+/* { dg-final { scan-assembler-not {\tptest\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ptrue_pat_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ptrue_pat_5.c -new file mode 100644 -index 000000000..c8f6d8aca ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/ptrue_pat_5.c -@@ -0,0 +1,188 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+void -+b8_b16_1 (int *any, svbool_t *ptr) -+{ -+ svbool_t res = svptrue_pat_b8 (SV_VL64); -+ *any = svptest_any (svptrue_b16 (), res); -+ *ptr = res; -+} -+ -+int -+b8_b16_2 () -+{ -+ svbool_t res = svptrue_pat_b8 (SV_VL64); -+ return svptest_any (svptrue_b16 (), res); -+} -+ -+void -+b8_b32_1 (int *any, svbool_t *ptr) -+{ -+ svbool_t res = svptrue_pat_b8 (SV_VL32); -+ *any = svptest_any (svptrue_b32 (), res); -+ *ptr = res; -+} -+ -+int -+b8_b32_2 () -+{ -+ svbool_t res = svptrue_pat_b8 (SV_VL32); -+ return svptest_any (svptrue_b32 (), res); -+} -+ -+void -+b8_b64_1 (int *any, svbool_t *ptr) -+{ -+ svbool_t res = svptrue_pat_b8 (SV_VL128); -+ *any = svptest_any (svptrue_b64 (), res); -+ *ptr = res; -+} -+ -+int -+b8_b64_2 () -+{ -+ svbool_t res = svptrue_pat_b8 (SV_VL128); -+ return svptest_any (svptrue_b64 (), res); -+} -+ -+void -+b16_b8_1 (int *any, svbool_t *ptr) -+{ -+ svbool_t res = svptrue_pat_b16 (SV_VL32); -+ *any = svptest_any (svptrue_b8 (), res); -+ *ptr = res; -+} -+ -+int -+b16_b8_2 () -+{ -+ svbool_t res = svptrue_pat_b16 (SV_VL32); -+ return svptest_any (svptrue_b8 (), res); -+} -+ -+void -+b16_b32_1 (int *any, svbool_t *ptr) -+{ -+ svbool_t res = svptrue_pat_b16 (SV_VL16); -+ *any = svptest_any (svptrue_b32 (), res); -+ *ptr = res; -+} -+ -+int -+b16_b32_2 () -+{ -+ svbool_t res = svptrue_pat_b16 (SV_VL16); -+ return svptest_any (svptrue_b32 (), res); -+} -+ -+void -+b16_b64_1 (int *any, svbool_t *ptr) -+{ -+ svbool_t res = svptrue_pat_b16 (SV_VL64); -+ *any = svptest_any (svptrue_b64 (), res); -+ *ptr = res; -+} -+ -+int -+b16_b64_2 () -+{ -+ svbool_t res = svptrue_pat_b16 (SV_VL64); -+ return svptest_any (svptrue_b64 (), res); -+} -+ -+void -+b32_b8_1 (int *any, svbool_t *ptr) -+{ -+ svbool_t res = svptrue_pat_b32 (SV_VL16); -+ *any = svptest_any (svptrue_b8 (), res); -+ *ptr = res; -+} -+ -+int -+b32_b8_2 () -+{ -+ svbool_t res = svptrue_pat_b32 (SV_VL16); -+ return svptest_any (svptrue_b8 (), res); -+} -+ -+void -+b32_b16_1 (int *any, svbool_t *ptr) -+{ -+ svbool_t res = svptrue_pat_b32 (SV_VL6); -+ *any = svptest_any (svptrue_b16 (), res); -+ *ptr = res; -+} -+ -+int -+b32_b16_2 () -+{ -+ svbool_t res = svptrue_pat_b32 (SV_VL6); -+ return svptest_any (svptrue_b16 (), res); -+} -+ -+void -+b32_b64_1 (int *any, svbool_t *ptr) -+{ -+ svbool_t res = svptrue_pat_b32 (SV_VL32); -+ *any = svptest_any (svptrue_b64 (), res); -+ *ptr = res; -+} -+ -+int -+b32_b64_2 () -+{ -+ svbool_t res = svptrue_pat_b32 (SV_VL32); -+ return svptest_any (svptrue_b64 (), res); -+} -+ -+void -+b64_b8_1 (int *any, svbool_t *ptr) -+{ -+ svbool_t res = svptrue_pat_b64 (SV_VL7); -+ *any = svptest_any (svptrue_b8 (), res); -+ *ptr = res; -+} -+ -+int -+b64_b8_2 () -+{ -+ svbool_t res = svptrue_pat_b64 (SV_VL7); -+ return svptest_any (svptrue_b8 (), res); -+} -+ -+void -+b64_b16_1 (int *any, svbool_t *ptr) -+{ -+ svbool_t res = svptrue_pat_b64 (SV_VL16); -+ *any = svptest_any (svptrue_b16 (), res); -+ *ptr = res; -+} -+ -+int -+b64_b16_2 () -+{ -+ svbool_t res = svptrue_pat_b64 (SV_VL16); -+ return svptest_any (svptrue_b16 (), res); -+} -+ -+void -+b64_b32_1 (int *any, svbool_t *ptr) -+{ -+ svbool_t res = svptrue_pat_b64 (SV_VL32); -+ *any = svptest_any (svptrue_b32 (), res); -+ *ptr = res; -+} -+ -+int -+b64_b32_2 () -+{ -+ svbool_t res = svptrue_pat_b64 (SV_VL32); -+ return svptest_any (svptrue_b32 (), res); -+} -+ -+/* { dg-final { scan-assembler-not {\tptrues\n} } } */ -+/* { dg-final { scan-assembler-times {\tptrue\t} 48 } } */ -+/* { dg-final { scan-assembler-times {\tptest\t} 24 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/qincb_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/qincb_1.c -new file mode 100644 -index 000000000..ba512f406 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/qincb_1.c -@@ -0,0 +1,43 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-O" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#include -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+/* -+** qincb_s32_s: -+** sqincb x0, w0, all, mul #15 -+** ret -+*/ -+uint64_t qincb_s32_s (int32_t x) { return svqincb (x, 15); } -+ -+/* -+** qincb_s32_z: -+** sqincb x([0-9]+), w0, all, mul #15 -+** uxtw x0, w\1 -+** ret -+*/ -+uint64_t qincb_s32_z (int32_t x) { return (uint32_t) svqincb (x, 15); } -+ -+/* -+** qincb_u32_s: -+** uqincb (w[0-9]+), all, mul #15 -+** sxtw x0, \1 -+** ret -+*/ -+uint64_t qincb_u32_s (uint32_t x) { return (int32_t) svqincb (x, 15); } -+ -+/* -+** qincb_u32_z: -+** uqincb w0, all, mul #15 -+** ret -+*/ -+uint64_t qincb_u32_z (uint32_t x) { return svqincb (x, 15); } -+ -+#ifdef __cplusplus -+} -+#endif -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/struct_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/struct_1.c -new file mode 100644 -index 000000000..50892c85a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/struct_1.c -@@ -0,0 +1,16 @@ -+#include -+ -+void -+f (svint8x2_t *a, svint8x2_t *b) -+{ -+ svint8_t *ptr; -+ svint8x2_t x = *a; -+ *a = *b; -+ a = &x; -+ (void) (a == b); -+ (void) (a != b); -+ (void) (a < b); -+ (void) (a > b); -+ (void) (a <= b); -+ (void) (a >= b); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/temporaries_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/temporaries_1.c -new file mode 100644 -index 000000000..2543e1e62 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/temporaries_1.c -@@ -0,0 +1,70 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O" } */ -+/* { dg-final { check-function-bodies "**" "" "" { target { ! ilp32 } } } } */ -+ -+#include -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+/* -+** test_s8: -+** ptrue (p[0-7])\.b, all -+** ld1b (z[0-9]+\.b), \1/z, \[x0\] -+** add \2, \2, #1 -+** st1b \2, \1, \[x1\] -+** ret -+*/ -+void -+test_s8 (int8_t *x, int8_t *y) -+{ -+ int8_t tmp1[32], tmp2[32]; -+ -+ svbool_t pg = svptrue_b8 (); -+ svst1 (pg, tmp1, svld1 (pg, x)); -+ svst1 (pg, tmp2, svadd_x (pg, svld1 (pg, tmp1), 1)); -+ svst1 (pg, y, svld1 (pg, tmp2)); -+} -+ -+/* -+** test_s32_b8: -+** ptrue (p[0-7])\.b, all -+** ld1w (z[0-9]+\.s), \1/z, \[x0\] -+** add \2, \2, #1 -+** st1w \2, \1, \[x1\] -+** ret -+*/ -+void -+test_s32_b8 (int32_t *x, int32_t *y) -+{ -+ int32_t tmp1[8], tmp2[8]; -+ -+ svbool_t pg = svptrue_b8 (); -+ svst1 (pg, tmp1, svld1 (pg, x)); -+ svst1 (pg, tmp2, svadd_x (pg, svld1 (pg, tmp1), 1)); -+ svst1 (pg, y, svld1 (pg, tmp2)); -+} -+ -+/* -+** test_s32_b32: -+** ptrue (p[0-7])\.b, all -+** ld1w (z[0-9]+\.s), \1/z, \[x0\] -+** add \2, \2, #1 -+** st1w \2, \1, \[x1\] -+** ret -+*/ -+void -+test_s32_b32 (int32_t *x, int32_t *y) -+{ -+ int32_t tmp1[8], tmp2[8]; -+ -+ svbool_t pg = svptrue_b32 (); -+ svst1 (pg, tmp1, svld1 (pg, x)); -+ svst1 (pg, tmp2, svadd_x (pg, svld1 (pg, tmp1), 1)); -+ svst1 (pg, y, svld1 (pg, tmp2)); -+} -+ -+#ifdef __cplusplus -+} -+#endif -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_1.c -new file mode 100644 -index 000000000..1d5523e31 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_1.c -@@ -0,0 +1,23 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+void -+test1 (int32_t x, int32_t y, int *any, svbool_t *ptr) -+{ -+ svbool_t res = svwhilele_b8 (x, y); -+ *any = svptest_last (svptrue_b8 (), res); -+ *ptr = res; -+} -+ -+int -+test2 (int32_t x, int32_t y) -+{ -+ svbool_t res = svwhilele_b8 (x, y); -+ return svptest_last (svptrue_b8 (), res); -+} -+ -+/* { dg-final { scan-assembler-times {\twhilele\t} 2 } } */ -+/* { dg-final { scan-assembler-not {\tptrue\t} } } */ -+/* { dg-final { scan-assembler-not {\tptest\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_10.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_10.c -new file mode 100644 -index 000000000..ca339c41c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_10.c -@@ -0,0 +1,28 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+/* { dg-final { scan-assembler-not {\twhilele\t} } } */ -+/* { dg-final { scan-assembler-not {\twhilelt\t} } } */ -+/* { dg-final { scan-assembler-not {\tptrue\t} } } */ -+ -+void -+test1 (svbool_t *ptr) -+{ -+ *ptr = svwhilele_b32_u32 (-1, 0); -+} -+ -+void -+test2 (svbool_t *ptr) -+{ -+ *ptr = svwhilele_b16_u64 (0x80000000, 0); -+} -+ -+void -+test3 (svbool_t *ptr) -+{ -+ *ptr = svwhilele_b8_u64 (0x8000000000000001ULL, 0x7ffffffffffffffeULL); -+} -+ -+/* { dg-final { scan-assembler-times {\tpfalse\tp[0-7]\.b\n} 3 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_2.c -new file mode 100644 -index 000000000..020846007 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_2.c -@@ -0,0 +1,23 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+void -+test1 (int32_t x, int32_t y, int *any, svbool_t *ptr) -+{ -+ svbool_t res = svwhilele_b16 (x, y); -+ *any = svptest_last (svptrue_b16 (), res); -+ *ptr = res; -+} -+ -+int -+test2 (int32_t x, int32_t y) -+{ -+ svbool_t res = svwhilele_b16 (x, y); -+ return svptest_last (svptrue_b16 (), res); -+} -+ -+/* { dg-final { scan-assembler-times {\twhilele\t} 2 } } */ -+/* { dg-final { scan-assembler-not {\tptrue\t} } } */ -+/* { dg-final { scan-assembler-not {\tptest\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_3.c -new file mode 100644 -index 000000000..4a1045cf6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_3.c -@@ -0,0 +1,23 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+void -+test1 (int32_t x, int32_t y, int *any, svbool_t *ptr) -+{ -+ svbool_t res = svwhilele_b32 (x, y); -+ *any = svptest_last (svptrue_b32 (), res); -+ *ptr = res; -+} -+ -+int -+test2 (int32_t x, int32_t y) -+{ -+ svbool_t res = svwhilele_b32 (x, y); -+ return svptest_last (svptrue_b32 (), res); -+} -+ -+/* { dg-final { scan-assembler-times {\twhilele\t} 2 } } */ -+/* { dg-final { scan-assembler-not {\tptrue\t} } } */ -+/* { dg-final { scan-assembler-not {\tptest\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_4.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_4.c -new file mode 100644 -index 000000000..f6fb0d099 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_4.c -@@ -0,0 +1,23 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+void -+test1 (int32_t x, int32_t y, int *any, svbool_t *ptr) -+{ -+ svbool_t res = svwhilele_b64 (x, y); -+ *any = svptest_last (svptrue_b64 (), res); -+ *ptr = res; -+} -+ -+int -+test2 (int32_t x, int32_t y) -+{ -+ svbool_t res = svwhilele_b64 (x, y); -+ return svptest_last (svptrue_b64 (), res); -+} -+ -+/* { dg-final { scan-assembler-times {\twhilele\t} 2 } } */ -+/* { dg-final { scan-assembler-not {\tptrue\t} } } */ -+/* { dg-final { scan-assembler-not {\tptest\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_5.c -new file mode 100644 -index 000000000..ada958b29 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_5.c -@@ -0,0 +1,47 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+/* { dg-final { scan-assembler-not {\twhilele\t} } } */ -+/* { dg-final { scan-assembler-not {\twhilelt\t} } } */ -+ -+void -+test1 (svbool_t *ptr) -+{ -+ *ptr = svwhilele_b32_s32 (-8, -8); -+} -+ -+/* { dg-final { scan-assembler {\tptrue\tp[0-7]\.[bhsd], vl1\n} } } */ -+ -+void -+test2 (svbool_t *ptr) -+{ -+ *ptr = svwhilele_b16_s64 (-1, 1); -+} -+ -+/* { dg-final { scan-assembler {\tptrue\tp[0-7]\.h, vl3\n} } } */ -+ -+void -+test3 (svbool_t *ptr) -+{ -+ *ptr = svwhilele_b16_s32 (0x7ffffffb, 0x7fffffff); -+} -+ -+/* { dg-final { scan-assembler {\tptrue\tp[0-7]\.h, vl5\n} } } */ -+ -+void -+test4 (svbool_t *ptr) -+{ -+ *ptr = svwhilele_b8_s64 (svcntb (), svcntb () + 6); -+} -+ -+/* { dg-final { scan-assembler {\tptrue\tp[0-7]\.b, vl7\n} } } */ -+ -+void -+test5 (svbool_t *ptr) -+{ -+ *ptr = svwhilele_b64_s64 (0, 1); -+} -+ -+/* { dg-final { scan-assembler {\tptrue\tp[0-7]\.d, vl2\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_6.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_6.c -new file mode 100644 -index 000000000..00d92ba8a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_6.c -@@ -0,0 +1,40 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+/* { dg-final { scan-assembler-not {\twhilele\t} } } */ -+/* { dg-final { scan-assembler-not {\twhilelt\t} } } */ -+/* { dg-final { scan-assembler-not {\tptrue\t} } } */ -+ -+void -+test1 (svbool_t *ptr) -+{ -+ *ptr = svwhilele_b32_s32 (-8, -9); -+} -+ -+void -+test2 (svbool_t *ptr) -+{ -+ *ptr = svwhilele_b16_s64 (50, -1); -+} -+ -+void -+test3 (svbool_t *ptr) -+{ -+ *ptr = svwhilele_b16_s32 (0x7ffffffb, 0x80000000); -+} -+ -+void -+test4 (svbool_t *ptr) -+{ -+ *ptr = svwhilele_b8_s64 (svcntb (), 15); -+} -+ -+void -+test5 (svbool_t *ptr) -+{ -+ *ptr = svwhilele_b8_s64 (svcntb (), svcntw ()); -+} -+ -+/* { dg-final { scan-assembler-times {\tpfalse\tp[0-7]\.b\n} 5 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_7.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_7.c -new file mode 100644 -index 000000000..92488f597 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_7.c -@@ -0,0 +1,31 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+/* { dg-final { scan-assembler-not {\twhilel[et]\t} } } */ -+/* { dg-final { scan-assembler-not {\tpfalse\t} } } */ -+ -+void -+test1 (svbool_t *ptr) -+{ -+ *ptr = svwhilele_b8_s32 (-svcnth (), svcnth () - 1); -+} -+ -+/* { dg-final { scan-assembler {\tptrue\tp[0-7]\.b, all\n} } } */ -+ -+void -+test2 (svbool_t *ptr) -+{ -+ *ptr = svwhilele_b16_s64 (1, svcntw () * 2); -+} -+ -+/* { dg-final { scan-assembler {\tptrue\tp[0-7]\.h, all\n} } } */ -+ -+void -+test3 (svbool_t *ptr) -+{ -+ *ptr = svwhilele_b32_s32 (svcntd (), svcntw () + svcntd () - 1); -+} -+ -+/* { dg-final { scan-assembler {\tptrue\tp[0-7]\.s, all\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_9.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_9.c -new file mode 100644 -index 000000000..e7f81a86f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilele_9.c -@@ -0,0 +1,31 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+/* { dg-final { scan-assembler-not {\twhilele\t} } } */ -+/* { dg-final { scan-assembler-not {\twhilelt\t} } } */ -+ -+void -+test1 (svbool_t *ptr) -+{ -+ *ptr = svwhilele_b32_u32 (1, 3); -+} -+ -+/* { dg-final { scan-assembler {\tptrue\tp[0-7]\.s, vl3\n} } } */ -+ -+void -+test2 (svbool_t *ptr) -+{ -+ *ptr = svwhilele_b16_u64 (svcntd (), svcntd () + 5); -+} -+ -+/* { dg-final { scan-assembler {\tptrue\tp[0-7]\.h, vl6\n} } } */ -+ -+void -+test3 (svbool_t *ptr) -+{ -+ *ptr = svwhilele_b8_u32 (0x7ffffffb, 0x80000002); -+} -+ -+/* { dg-final { scan-assembler {\tptrue\tp[0-7]\.b, vl8\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilelt_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilelt_1.c -new file mode 100644 -index 000000000..5c8f97e2f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilelt_1.c -@@ -0,0 +1,47 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+/* { dg-final { scan-assembler-not {\twhilele\t} } } */ -+/* { dg-final { scan-assembler-not {\twhilelt\t} } } */ -+ -+void -+test1 (svbool_t *ptr) -+{ -+ *ptr = svwhilelt_b32_s32 (-8, -7); -+} -+ -+/* { dg-final { scan-assembler {\tptrue\tp[0-7]\.[bhsd], vl1\n} } } */ -+ -+void -+test2 (svbool_t *ptr) -+{ -+ *ptr = svwhilelt_b16_s64 (-1, 2); -+} -+ -+/* { dg-final { scan-assembler {\tptrue\tp[0-7]\.h, vl3\n} } } */ -+ -+void -+test3 (svbool_t *ptr) -+{ -+ *ptr = svwhilelt_b16_s32 (0x7ffffffa, 0x7fffffff); -+} -+ -+/* { dg-final { scan-assembler {\tptrue\tp[0-7]\.h, vl5\n} } } */ -+ -+void -+test4 (svbool_t *ptr) -+{ -+ *ptr = svwhilelt_b8_s64 (svcntb (), svcntb () + 7); -+} -+ -+/* { dg-final { scan-assembler {\tptrue\tp[0-7]\.b, vl7\n} } } */ -+ -+void -+test5 (svbool_t *ptr) -+{ -+ *ptr = svwhilelt_b64_s64 (0, 2); -+} -+ -+/* { dg-final { scan-assembler {\tptrue\tp[0-7]\.d, vl2\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilelt_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilelt_2.c -new file mode 100644 -index 000000000..2be3a5b0c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilelt_2.c -@@ -0,0 +1,40 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+/* { dg-final { scan-assembler-not {\twhilele\t} } } */ -+/* { dg-final { scan-assembler-not {\twhilelt\t} } } */ -+/* { dg-final { scan-assembler-not {\tptrue\t} } } */ -+ -+void -+test1 (svbool_t *ptr) -+{ -+ *ptr = svwhilelt_b32_s32 (0, 0); -+} -+ -+void -+test2 (svbool_t *ptr) -+{ -+ *ptr = svwhilelt_b16_s64 (50, -1); -+} -+ -+void -+test3 (svbool_t *ptr) -+{ -+ *ptr = svwhilelt_b16_s32 (0x7ffffffb, 0x80000000); -+} -+ -+void -+test4 (svbool_t *ptr) -+{ -+ *ptr = svwhilelt_b8_s64 (svcntb (), svcntb ()); -+} -+ -+void -+test5 (svbool_t *ptr) -+{ -+ *ptr = svwhilelt_b8_s64 (svcntb (), svcntw ()); -+} -+ -+/* { dg-final { scan-assembler-times {\tpfalse\tp[0-7]\.b\n} 5 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilelt_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilelt_3.c -new file mode 100644 -index 000000000..650b2652f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/whilelt_3.c -@@ -0,0 +1,31 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#include -+ -+/* { dg-final { scan-assembler-not {\twhilel[et]\t} } } */ -+/* { dg-final { scan-assembler-not {\tpfalse\t} } } */ -+ -+void -+test1 (svbool_t *ptr) -+{ -+ *ptr = svwhilelt_b8_s32 (-svcnth (), svcnth ()); -+} -+ -+/* { dg-final { scan-assembler {\tptrue\tp[0-7]\.b, all\n} } } */ -+ -+void -+test2 (svbool_t *ptr) -+{ -+ *ptr = svwhilelt_b16_s64 (0, svcntw () * 2); -+} -+ -+/* { dg-final { scan-assembler {\tptrue\tp[0-7]\.h, all\n} } } */ -+ -+void -+test3 (svbool_t *ptr) -+{ -+ *ptr = svwhilelt_b32_s32 (svcntd (), svcntw () + svcntd ()); -+} -+ -+/* { dg-final { scan-assembler {\tptrue\tp[0-7]\.s, all\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/adr_1.c b/gcc/testsuite/gcc.target/aarch64/sve/adr_1.c -new file mode 100644 -index 000000000..223351c2f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/adr_1.c -@@ -0,0 +1,46 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#ifndef FACTOR -+#define FACTOR 2 -+#endif -+ -+#define LOOP(TYPE) \ -+ __attribute__ ((noipa)) \ -+ void \ -+ test_##TYPE (TYPE *restrict dst, TYPE *restrict src, \ -+ int count) \ -+ { \ -+ for (int i = 0; i < count; ++i) \ -+ dst[i] += src[i] * FACTOR; \ -+ } -+ -+#define TEST_ALL(T) \ -+ T (int8_t) \ -+ T (int16_t) \ -+ T (int32_t) \ -+ T (int64_t) \ -+ T (uint8_t) \ -+ T (uint16_t) \ -+ T (uint32_t) \ -+ T (uint64_t) -+ -+TEST_ALL (LOOP) -+ -+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]\.b,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]\.b,} 2 } } */ -+/* { dg-final { scan-assembler-not {\tadr\tz[0-9]\.b,} } } */ -+ -+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]\.h,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]\.h,} 2 } } */ -+/* { dg-final { scan-assembler-not {\tadr\tz[0-9]\.h,} } } */ -+ -+/* { dg-final { scan-assembler-not {\tadd\tz[0-9]\.s,} } } */ -+/* { dg-final { scan-assembler-not {\tlsl\tz[0-9]\.s,} } } */ -+/* { dg-final { scan-assembler-times {\tadr\tz[0-9]\.s, \[z[0-9]\.s, z[0-9]\.s, lsl 1\]} 2 } } */ -+ -+/* { dg-final { scan-assembler-not {\tadd\tz[0-9]\.d,} } } */ -+/* { dg-final { scan-assembler-not {\tlsl\tz[0-9]\.d,} } } */ -+/* { dg-final { scan-assembler-times {\tadr\tz[0-9]\.d, \[z[0-9]\.d, z[0-9]\.d, lsl 1\]} 2 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/adr_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/adr_1_run.c -new file mode 100644 -index 000000000..383a90c24 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/adr_1_run.c -@@ -0,0 +1,31 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "adr_1.c" -+ -+#define N 131 -+ -+#define TEST_LOOP(TYPE) \ -+ { \ -+ TYPE a[N], b[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ a[i] = (TYPE) i * i + i % 5; \ -+ b[i] = (TYPE) i * 3 + i % 7; \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##TYPE (a, b, N); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ TYPE expected = ((TYPE) (i * i + i % 5) \ -+ + ((TYPE) i * 3 + i % 7) * FACTOR); \ -+ if (a[i] != expected) \ -+ __builtin_abort (); \ -+ } \ -+ } -+ -+int __attribute__ ((optimize (1))) -+main (void) -+{ -+ TEST_ALL (TEST_LOOP) -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/adr_2.c b/gcc/testsuite/gcc.target/aarch64/sve/adr_2.c -new file mode 100644 -index 000000000..dc20ddbad ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/adr_2.c -@@ -0,0 +1,21 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#define FACTOR 4 -+#include "adr_1.c" -+ -+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]\.b,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]\.b,} 2 } } */ -+/* { dg-final { scan-assembler-not {\tadr\tz[0-9]\.b,} } } */ -+ -+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]\.h,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]\.h,} 2 } } */ -+/* { dg-final { scan-assembler-not {\tadr\tz[0-9]\.h,} } } */ -+ -+/* { dg-final { scan-assembler-not {\tadd\tz[0-9]\.s,} } } */ -+/* { dg-final { scan-assembler-not {\tlsl\tz[0-9]\.s,} } } */ -+/* { dg-final { scan-assembler-times {\tadr\tz[0-9]\.s, \[z[0-9]\.s, z[0-9]\.s, lsl 2\]} 2 } } */ -+ -+/* { dg-final { scan-assembler-not {\tadd\tz[0-9]\.d,} } } */ -+/* { dg-final { scan-assembler-not {\tlsl\tz[0-9]\.d,} } } */ -+/* { dg-final { scan-assembler-times {\tadr\tz[0-9]\.d, \[z[0-9]\.d, z[0-9]\.d, lsl 2\]} 2 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/adr_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve/adr_2_run.c -new file mode 100644 -index 000000000..e823d3d0a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/adr_2_run.c -@@ -0,0 +1,5 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#define FACTOR 4 -+#include "adr_1_run.c" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/adr_3.c b/gcc/testsuite/gcc.target/aarch64/sve/adr_3.c -new file mode 100644 -index 000000000..b0cb180dd ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/adr_3.c -@@ -0,0 +1,21 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#define FACTOR 8 -+#include "adr_1.c" -+ -+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]\.b,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]\.b,} 2 } } */ -+/* { dg-final { scan-assembler-not {\tadr\tz[0-9]\.b,} } } */ -+ -+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]\.h,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]\.h,} 2 } } */ -+/* { dg-final { scan-assembler-not {\tadr\tz[0-9]\.h,} } } */ -+ -+/* { dg-final { scan-assembler-not {\tadd\tz[0-9]\.s,} } } */ -+/* { dg-final { scan-assembler-not {\tlsl\tz[0-9]\.s,} } } */ -+/* { dg-final { scan-assembler-times {\tadr\tz[0-9]\.s, \[z[0-9]\.s, z[0-9]\.s, lsl 3\]} 2 } } */ -+ -+/* { dg-final { scan-assembler-not {\tadd\tz[0-9]\.d,} } } */ -+/* { dg-final { scan-assembler-not {\tlsl\tz[0-9]\.d,} } } */ -+/* { dg-final { scan-assembler-times {\tadr\tz[0-9]\.d, \[z[0-9]\.d, z[0-9]\.d, lsl 3\]} 2 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/adr_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve/adr_3_run.c -new file mode 100644 -index 000000000..721dd68ef ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/adr_3_run.c -@@ -0,0 +1,5 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#define FACTOR 8 -+#include "adr_1_run.c" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/adr_4.c b/gcc/testsuite/gcc.target/aarch64/sve/adr_4.c -new file mode 100644 -index 000000000..7c039ba13 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/adr_4.c -@@ -0,0 +1,9 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#define FACTOR 16 -+#include "adr_1.c" -+ -+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]\.[bhsd],} 8 } } */ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]\.[bhsd],} 8 } } */ -+/* { dg-final { scan-assembler-not {\tadr\tz[0-9]\.[bhsd],} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/adr_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve/adr_4_run.c -new file mode 100644 -index 000000000..3fb9099e1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/adr_4_run.c -@@ -0,0 +1,5 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#define FACTOR 16 -+#include "adr_1_run.c" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/adr_5.c b/gcc/testsuite/gcc.target/aarch64/sve/adr_5.c -new file mode 100644 -index 000000000..ce3991cb2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/adr_5.c -@@ -0,0 +1,27 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define LOOP(FACTOR) \ -+ __attribute__ ((noipa)) \ -+ void \ -+ test_##FACTOR (uint64_t *restrict dst, \ -+ uint64_t *restrict src, int count) \ -+ { \ -+ for (int i = 0; i < count; ++i) \ -+ dst[i] += (src[i] & 0xffffffff) * FACTOR; \ -+ } -+ -+#define TEST_ALL(T) T (1) T (2) T (4) T (8) -+ -+TEST_ALL (LOOP) -+ -+/* { dg-final { scan-assembler-not {\tadd\tz[0-9]\.d,} } } */ -+/* { dg-final { scan-assembler-not {\tlsl\tz[0-9]\.d,} } } */ -+/* { dg-final { scan-assembler-not {\tand\tz[0-9]\.d,} } } */ -+/* { dg-final { scan-assembler-not {\tuxtw\tz[0-9]\.d,} } } */ -+/* { dg-final { scan-assembler-times {\tadr\tz[0-9]\.d, \[z[0-9]\.d, z[0-9]\.d, uxtw\]} 1 } } */ -+/* { dg-final { scan-assembler-times {\tadr\tz[0-9]\.d, \[z[0-9]\.d, z[0-9]\.d, uxtw 1\]} 1 } } */ -+/* { dg-final { scan-assembler-times {\tadr\tz[0-9]\.d, \[z[0-9]\.d, z[0-9]\.d, uxtw 2\]} 1 } } */ -+/* { dg-final { scan-assembler-times {\tadr\tz[0-9]\.d, \[z[0-9]\.d, z[0-9]\.d, uxtw 3\]} 1 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/adr_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve/adr_5_run.c -new file mode 100644 -index 000000000..025c38d23 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/adr_5_run.c -@@ -0,0 +1,32 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "adr_5.c" -+ -+#define N 131 -+ -+#define TEST_LOOP(FACTOR) \ -+ { \ -+ uint64_t a[N], b[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ a[i] = (uint64_t) i * i + i % 5; \ -+ b[i] = (uint64_t) (i * 3) << ((i & 7) * 8); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##FACTOR (a, b, N); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ uint64_t expected = ((uint64_t) (i * i + i % 5) \ -+ + (((uint64_t) (i * 3) << ((i & 7) * 8)) \ -+ & 0xffffffff) * FACTOR); \ -+ if (a[i] != expected) \ -+ __builtin_abort (); \ -+ } \ -+ } -+ -+int __attribute__ ((optimize (1))) -+main (void) -+{ -+ TEST_ALL (TEST_LOOP) -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/asrdiv_1.c b/gcc/testsuite/gcc.target/aarch64/sve/asrdiv_1.c -new file mode 100644 -index 000000000..615d8b885 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/asrdiv_1.c -@@ -0,0 +1,51 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */ -+ -+#include -+ -+#define SIGNED(S) int##S##_t -+ -+#define DIV(x,y) ((x)/(y)) -+#define MOD(x,y) ((x)%(y)) -+ -+#define TEMPLATE(OP,SIZE) \ -+void __attribute__ ((noinline, noclone)) \ -+f_##OP##_##SIZE (SIGNED(SIZE) *restrict a, SIGNED(SIZE) *restrict b, \ -+ __INTPTR_TYPE__ n) \ -+{ \ -+ for (__INTPTR_TYPE__ i = 0; i < n; ++i) \ -+ a[i] = OP (b[i], ((SIGNED(SIZE))1 << ((SIZE)/2+1))); \ -+} -+#define DIVMOD(SIZE) \ -+TEMPLATE (DIV,SIZE); \ -+TEMPLATE (MOD,SIZE); -+ -+DIVMOD (8); -+DIVMOD (16); -+DIVMOD (32); -+DIVMOD (64); -+ -+/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 8 "vect" } } */ -+ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+, z[0-9]+\n} 4 } } */ -+ -+/* { dg-final { scan-assembler-times {\tasrd\tz[0-9]+\.b, p[0-9]+/m, z[0-9]+\.b, #5\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, z[0-9]+\.b, #5\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tasrd\tz[0-9]+\.h, p[0-9]+/m, z[0-9]+\.h, #9\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, z[0-9]+\.h, #9\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tasrd\tz[0-9]+\.s, p[0-9]+/m, z[0-9]+\.s, #17\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, z[0-9]+\.s, #17\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tasrd\tz[0-9]+\.d, p[0-9]+/m, z[0-9]+\.d, #33\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, z[0-9]+\.d, #33\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-not {\tasr\t%} } } */ -+/* { dg-final { scan-assembler-not {\tlsr\t%} } } */ -+/* { dg-final { scan-assembler-not {\tcmplt\t%} } } */ -+/* { dg-final { scan-assembler-not {\tand\t%} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clastb_8.c b/gcc/testsuite/gcc.target/aarch64/sve/clastb_8.c -new file mode 100644 -index 000000000..d86a428a7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/clastb_8.c -@@ -0,0 +1,25 @@ -+/* { dg-do assemble { target aarch64_asm_sve_ok } } */ -+/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=256 --save-temps" } */ -+ -+#include -+ -+#define TEST_TYPE(TYPE) \ -+ void \ -+ test_##TYPE (TYPE *ptr, TYPE *a, TYPE *b, TYPE min_v) \ -+ { \ -+ TYPE last = *ptr; \ -+ for (int i = 0; i < 1024; i++) \ -+ if (a[i] < min_v) \ -+ last = b[i]; \ -+ *ptr = last; \ -+ } -+ -+TEST_TYPE (uint8_t); -+TEST_TYPE (uint16_t); -+TEST_TYPE (uint32_t); -+TEST_TYPE (uint64_t); -+ -+/* { dg-final { scan-assembler {\tclastb\t(b[0-9]+), p[0-7], \1, z[0-9]+\.b\n} } } */ -+/* { dg-final { scan-assembler {\tclastb\t(h[0-9]+), p[0-7], \1, z[0-9]+\.h\n} } } */ -+/* { dg-final { scan-assembler {\tclastb\t(s[0-9]+), p[0-7], \1, z[0-9]+\.s\n} } } */ -+/* { dg-final { scan-assembler {\tclastb\t(d[0-9]+), p[0-7], \1, z[0-9]+\.d\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clrsb_1.c b/gcc/testsuite/gcc.target/aarch64/sve/clrsb_1.c -new file mode 100644 -index 000000000..bdc9856fa ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/clrsb_1.c -@@ -0,0 +1,22 @@ -+/* { dg-do assemble { target aarch64_asm_sve_ok } } */ -+/* { dg-options "-O2 -ftree-vectorize --save-temps" } */ -+ -+#include -+ -+void __attribute__ ((noinline, noclone)) -+clrsb_32 (unsigned int *restrict dst, uint32_t *restrict src, int size) -+{ -+ for (int i = 0; i < size; ++i) -+ dst[i] = __builtin_clrsb (src[i]); -+} -+ -+void __attribute__ ((noinline, noclone)) -+clrsb_64 (unsigned int *restrict dst, uint64_t *restrict src, int size) -+{ -+ for (int i = 0; i < size; ++i) -+ dst[i] = __builtin_clrsbll (src[i]); -+} -+ -+/* { dg-final { scan-assembler-times {\tcls\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tcls\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clrsb_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/clrsb_1_run.c -new file mode 100644 -index 000000000..287630d7f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/clrsb_1_run.c -@@ -0,0 +1,50 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "clrsb_1.c" -+ -+extern void abort (void) __attribute__ ((noreturn)); -+ -+unsigned int data[] = { -+ 0xffffff80, 24, -+ 0xffffffff, 31, -+ 0x00000000, 31, -+ 0x80000000, 0, -+ 0x7fffffff, 0, -+ 0x000003ff, 21, -+ 0x1fffffff, 2, -+ 0x0000ffff, 15, -+ 0xffff0000, 15 -+}; -+ -+int __attribute__ ((optimize (1))) -+main (void) -+{ -+ unsigned int count = sizeof (data) / sizeof (data[0]) / 2; -+ -+ uint32_t in32[count]; -+ unsigned int out32[count]; -+ for (unsigned int i = 0; i < count; ++i) -+ { -+ in32[i] = data[i * 2]; -+ asm volatile ("" ::: "memory"); -+ } -+ clrsb_32 (out32, in32, count); -+ for (unsigned int i = 0; i < count; ++i) -+ if (out32[i] != data[i * 2 + 1]) -+ abort (); -+ -+ uint64_t in64[count]; -+ unsigned int out64[count]; -+ for (unsigned int i = 0; i < count; ++i) -+ { -+ in64[i] = (uint64_t) data[i * 2] << 32; -+ asm volatile ("" ::: "memory"); -+ } -+ clrsb_64 (out64, in64, count); -+ for (unsigned int i = 0; i < count; ++i) -+ if (out64[i] != (data[i * 2] ? data[i * 2 + 1] : 63)) -+ abort (); -+ -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clz_1.c b/gcc/testsuite/gcc.target/aarch64/sve/clz_1.c -new file mode 100644 -index 000000000..0c7a4e6d7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/clz_1.c -@@ -0,0 +1,22 @@ -+/* { dg-do assemble { target aarch64_asm_sve_ok } } */ -+/* { dg-options "-O2 -ftree-vectorize --save-temps" } */ -+ -+#include -+ -+void __attribute__ ((noinline, noclone)) -+clz_32 (unsigned int *restrict dst, uint32_t *restrict src, int size) -+{ -+ for (int i = 0; i < size; ++i) -+ dst[i] = __builtin_clz (src[i]); -+} -+ -+void __attribute__ ((noinline, noclone)) -+clz_64 (unsigned int *restrict dst, uint64_t *restrict src, int size) -+{ -+ for (int i = 0; i < size; ++i) -+ dst[i] = __builtin_clzll (src[i]); -+} -+ -+/* { dg-final { scan-assembler-times {\tclz\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tclz\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clz_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/clz_1_run.c -new file mode 100644 -index 000000000..12d9cf276 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/clz_1_run.c -@@ -0,0 +1,50 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "clz_1.c" -+ -+extern void abort (void) __attribute__ ((noreturn)); -+ -+unsigned int data[] = { -+ 0xffffff80, 0, -+ 0xffffffff, 0, -+ 0x00000000, 32, -+ 0x80000000, 0, -+ 0x7fffffff, 1, -+ 0x000003ff, 22, -+ 0x1fffffff, 3, -+ 0x0000ffff, 16, -+ 0xffff0000, 0 -+}; -+ -+int __attribute__ ((optimize (1))) -+main (void) -+{ -+ unsigned int count = sizeof (data) / sizeof (data[0]) / 2; -+ -+ uint32_t in32[count]; -+ unsigned int out32[count]; -+ for (unsigned int i = 0; i < count; ++i) -+ { -+ in32[i] = data[i * 2]; -+ asm volatile ("" ::: "memory"); -+ } -+ clz_32 (out32, in32, count); -+ for (unsigned int i = 0; i < count; ++i) -+ if (out32[i] != data[i * 2 + 1]) -+ abort (); -+ -+ uint64_t in64[count]; -+ unsigned int out64[count]; -+ for (unsigned int i = 0; i < count; ++i) -+ { -+ in64[i] = (uint64_t) data[i * 2] << 10; -+ asm volatile ("" ::: "memory"); -+ } -+ clz_64 (out64, in64, count); -+ for (unsigned int i = 0; i < count; ++i) -+ if (out64[i] != (data[i * 2] ? data[i * 2 + 1] + 22 : 64)) -+ abort (); -+ -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cnot_1.c b/gcc/testsuite/gcc.target/aarch64/sve/cnot_1.c -new file mode 100644 -index 000000000..5fa33461c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cnot_1.c -@@ -0,0 +1,30 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define DEF_LOOP(TYPE) \ -+ void __attribute__ ((noipa)) \ -+ test_##TYPE (TYPE *restrict r, TYPE *restrict a, int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ r[i] = !a[i]; \ -+ } -+ -+#define TEST_ALL(T) \ -+ T (int8_t) \ -+ T (int16_t) \ -+ T (int32_t) \ -+ T (int64_t) \ -+ T (uint8_t) \ -+ T (uint16_t) \ -+ T (uint32_t) \ -+ T (uint64_t) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 2 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_1.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_1.c -new file mode 100644 -index 000000000..c02e8ae8e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_1.c -@@ -0,0 +1,42 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define abd(A, B) (((A) < (B) ? (B) : (A)) - ((A) < (B) ? (A) : (B))) -+ -+#define DEF_LOOP(TYPE) \ -+ void __attribute__ ((noinline, noclone)) \ -+ test_##TYPE (TYPE *__restrict r, TYPE *__restrict a, \ -+ TYPE *__restrict b, TYPE *__restrict c, \ -+ int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ r[i] = a[i] < 20 ? abd (b[i], c[i]) : b[i]; \ -+ } -+ -+#define TEST_ALL(T) \ -+ T (int8_t) \ -+ T (uint8_t) \ -+ T (int16_t) \ -+ T (uint16_t) \ -+ T (int32_t) \ -+ T (uint32_t) \ -+ T (int64_t) \ -+ T (uint64_t) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ -+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_1_run.c -new file mode 100644 -index 000000000..a45beefc2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_1_run.c -@@ -0,0 +1,33 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_abd_1.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(TYPE) \ -+ { \ -+ TYPE r[N], a[N], b[N], c[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ a[i] = (i & 1 ? i : 3 * i); \ -+ b[i] = (i >> 4) << (i & 15); \ -+ c[i] = ((i + 2) % 3) * (i + 1); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##TYPE (r, a, b, c, N); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ TYPE expected = a[i] < 20 ? abd (b[i], c[i]) : b[i]; \ -+ if (r[i] != expected) \ -+ __builtin_abort (); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ } -+ -+int -+main (void) -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_2.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_2.c -new file mode 100644 -index 000000000..97901b6f8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_2.c -@@ -0,0 +1,42 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define abd(A, B) (((A) < (B) ? (B) : (A)) - ((A) < (B) ? (A) : (B))) -+ -+#define DEF_LOOP(TYPE) \ -+ void __attribute__ ((noinline, noclone)) \ -+ test_##TYPE (TYPE *__restrict r, TYPE *__restrict a, \ -+ TYPE *__restrict b, TYPE *__restrict c, \ -+ int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ r[i] = a[i] < 20 ? abd (b[i], c[i]) : c[i]; \ -+ } -+ -+#define TEST_ALL(T) \ -+ T (int8_t) \ -+ T (uint8_t) \ -+ T (int16_t) \ -+ T (uint16_t) \ -+ T (int32_t) \ -+ T (uint32_t) \ -+ T (int64_t) \ -+ T (uint64_t) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ -+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_2_run.c -new file mode 100644 -index 000000000..474bc0f9a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_2_run.c -@@ -0,0 +1,33 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_abd_2.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(TYPE) \ -+ { \ -+ TYPE r[N], a[N], b[N], c[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ a[i] = (i & 1 ? i : 3 * i); \ -+ b[i] = (i >> 4) << (i & 15); \ -+ c[i] = ((i + 2) % 3) * (i + 1); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##TYPE (r, a, b, c, N); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ TYPE expected = a[i] < 20 ? abd (b[i], c[i]) : c[i]; \ -+ if (r[i] != expected) \ -+ __builtin_abort (); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ } -+ -+int -+main (void) -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_3.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_3.c -new file mode 100644 -index 000000000..dc8bc3cee ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_3.c -@@ -0,0 +1,46 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define abd(A, B) (((A) < (B) ? (B) : (A)) - ((A) < (B) ? (A) : (B))) -+ -+#define DEF_LOOP(TYPE) \ -+ void __attribute__ ((noinline, noclone)) \ -+ test_##TYPE (TYPE *__restrict r, TYPE *__restrict a, \ -+ TYPE *__restrict b, TYPE *__restrict c, \ -+ int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ r[i] = a[i] < 20 ? abd (b[i], c[i]) : a[i]; \ -+ } -+ -+#define TEST_ALL(T) \ -+ T (int8_t) \ -+ T (uint8_t) \ -+ T (int16_t) \ -+ T (uint16_t) \ -+ T (int32_t) \ -+ T (uint32_t) \ -+ T (int64_t) \ -+ T (uint64_t) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 2 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_3_run.c -new file mode 100644 -index 000000000..9f1ac2df8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_3_run.c -@@ -0,0 +1,33 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_abd_3.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(TYPE) \ -+ { \ -+ TYPE r[N], a[N], b[N], c[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ a[i] = (i & 1 ? i : 3 * i); \ -+ b[i] = (i >> 4) << (i & 15); \ -+ c[i] = ((i + 2) % 3) * (i + 1); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##TYPE (r, a, b, c, N); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ TYPE expected = a[i] < 20 ? abd (b[i], c[i]) : a[i]; \ -+ if (r[i] != expected) \ -+ __builtin_abort (); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ } -+ -+int -+main (void) -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_4.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_4.c -new file mode 100644 -index 000000000..5c65e59ed ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_4.c -@@ -0,0 +1,42 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define abd(A, B) (((A) < (B) ? (B) : (A)) - ((A) < (B) ? (A) : (B))) -+ -+#define DEF_LOOP(TYPE) \ -+ void __attribute__ ((noinline, noclone)) \ -+ test_##TYPE (TYPE *__restrict r, TYPE *__restrict a, \ -+ TYPE *__restrict b, TYPE *__restrict c, \ -+ int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ r[i] = a[i] < 20 ? abd (b[i], c[i]) : 79; \ -+ } -+ -+#define TEST_ALL(T) \ -+ T (int8_t) \ -+ T (uint8_t) \ -+ T (int16_t) \ -+ T (uint16_t) \ -+ T (int32_t) \ -+ T (uint32_t) \ -+ T (int64_t) \ -+ T (uint64_t) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ -+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ -+/* { dg-final { scan-assembler-times {\tsel\t} 8 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_4_run.c -new file mode 100644 -index 000000000..47fd9e09f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_4_run.c -@@ -0,0 +1,33 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_abd_4.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(TYPE) \ -+ { \ -+ TYPE r[N], a[N], b[N], c[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ a[i] = (i & 1 ? i : 3 * i); \ -+ b[i] = (i >> 4) << (i & 15); \ -+ c[i] = ((i + 2) % 3) * (i + 1); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##TYPE (r, a, b, c, N); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ TYPE expected = a[i] < 20 ? abd (b[i], c[i]) : 79; \ -+ if (r[i] != expected) \ -+ __builtin_abort (); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ } -+ -+int -+main (void) -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_5.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_5.c -new file mode 100644 -index 000000000..f2c013158 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_5.c -@@ -0,0 +1,46 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define abd(A, B) (((A) < (B) ? (B) : (A)) - ((A) < (B) ? (A) : (B))) -+ -+#define DEF_LOOP(TYPE) \ -+ void __attribute__ ((noinline, noclone)) \ -+ test_##TYPE (TYPE *__restrict r, TYPE *__restrict a, \ -+ TYPE *__restrict b, TYPE *__restrict c, \ -+ int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ r[i] = a[i] < 20 ? abd (b[i], c[i]) : 0; \ -+ } -+ -+#define TEST_ALL(T) \ -+ T (int8_t) \ -+ T (uint8_t) \ -+ T (int16_t) \ -+ T (uint16_t) \ -+ T (int32_t) \ -+ T (uint32_t) \ -+ T (int64_t) \ -+ T (uint64_t) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.b, p[0-7]/z, z[0-9]+\.b\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/z, z[0-9]+\.d\n} 2 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_5_run.c -new file mode 100644 -index 000000000..7cd44be38 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_abd_5_run.c -@@ -0,0 +1,33 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_abd_5.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(TYPE) \ -+ { \ -+ TYPE r[N], a[N], b[N], c[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ a[i] = (i & 1 ? i : 3 * i); \ -+ b[i] = (i >> 4) << (i & 15); \ -+ c[i] = ((i + 2) % 3) * (i + 1); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##TYPE (r, a, b, c, N); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ TYPE expected = a[i] < 20 ? abd (b[i], c[i]) : 0; \ -+ if (r[i] != expected) \ -+ __builtin_abort (); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ } -+ -+int -+main (void) -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_1.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_1.c -new file mode 100644 -index 000000000..bd8776637 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_1.c -@@ -0,0 +1,35 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define DEF_LOOP(TYPE) \ -+ void __attribute__ ((noipa)) \ -+ test_##TYPE (TYPE *__restrict r, TYPE *__restrict a, \ -+ TYPE *__restrict b, int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ r[i] = a[i] == 0 ? !b[i] : b[i]; \ -+ } -+ -+#define TEST_ALL(T) \ -+ T (int8_t) \ -+ T (uint8_t) \ -+ T (int16_t) \ -+ T (uint16_t) \ -+ T (int32_t) \ -+ T (uint32_t) \ -+ T (int64_t) \ -+ T (uint64_t) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz} } } */ -+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ -+/* Currently we canonicalize the ?: so that !b[i] is the "false" value. */ -+/* { dg-final { scan-assembler-not {\tsel\t} { xfail *-*-* } } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_1_run.c -new file mode 100644 -index 000000000..802bcbb2e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_1_run.c -@@ -0,0 +1,32 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_cnot_1.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(TYPE) \ -+ { \ -+ TYPE r[N], a[N], b[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ a[i] = (i % 3) < (i % 5); \ -+ b[i] = i % 7 < 3; \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##TYPE (r, a, b, N); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ TYPE expected = a[i] == 0 ? !b[i] : b[i]; \ -+ if (r[i] != expected) \ -+ __builtin_abort (); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ } -+ -+int -+main (void) -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_2.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_2.c -new file mode 100644 -index 000000000..3df2431be ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_2.c -@@ -0,0 +1,35 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define DEF_LOOP(TYPE) \ -+ void __attribute__ ((noipa)) \ -+ test_##TYPE (TYPE *__restrict r, TYPE *__restrict a, \ -+ TYPE *__restrict b, int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ r[i] = a[i] == 0 ? !b[i] : a[i]; \ -+ } -+ -+#define TEST_ALL(T) \ -+ T (int8_t) \ -+ T (uint8_t) \ -+ T (int16_t) \ -+ T (uint16_t) \ -+ T (int32_t) \ -+ T (uint32_t) \ -+ T (int64_t) \ -+ T (uint64_t) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz} } } */ -+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ -+/* Currently we canonicalize the ?: so that !b[i] is the "false" value. */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_2_run.c -new file mode 100644 -index 000000000..6db8bf14e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_2_run.c -@@ -0,0 +1,32 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_cnot_2.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(TYPE) \ -+ { \ -+ TYPE r[N], a[N], b[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ a[i] = (i % 3) < (i % 5); \ -+ b[i] = i % 7 < 3; \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##TYPE (r, a, b, N); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ TYPE expected = a[i] == 0 ? !b[i] : a[i]; \ -+ if (r[i] != expected) \ -+ __builtin_abort (); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ } -+ -+int -+main (void) -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_3.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_3.c -new file mode 100644 -index 000000000..806e51788 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_3.c -@@ -0,0 +1,35 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define DEF_LOOP(TYPE) \ -+ void __attribute__ ((noipa)) \ -+ test_##TYPE (TYPE *__restrict r, TYPE *__restrict a, \ -+ TYPE *__restrict b, int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ r[i] = a[i] == 0 ? !b[i] : 127; \ -+ } -+ -+#define TEST_ALL(T) \ -+ T (int8_t) \ -+ T (uint8_t) \ -+ T (int16_t) \ -+ T (uint16_t) \ -+ T (int32_t) \ -+ T (uint32_t) \ -+ T (int64_t) \ -+ T (uint64_t) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tcnot\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ -+ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+, z[0-9]+\n} 8 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz[^\n]*z} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_3_run.c -new file mode 100644 -index 000000000..6e025e489 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_cnot_3_run.c -@@ -0,0 +1,32 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_cnot_3.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(TYPE) \ -+ { \ -+ TYPE r[N], a[N], b[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ a[i] = (i % 3) < (i % 5); \ -+ b[i] = i % 7 < 3; \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##TYPE (r, a, b, N); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ TYPE expected = a[i] == 0 ? !b[i] : 127; \ -+ if (r[i] != expected) \ -+ __builtin_abort (); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ } -+ -+int -+main (void) -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_1.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_1.c -new file mode 100644 -index 000000000..86064ebfc ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_1.c -@@ -0,0 +1,39 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math" } */ -+ -+#include -+ -+#define DEF_LOOP(FLOAT_TYPE, INT_TYPE) \ -+ void __attribute__ ((noipa)) \ -+ test_##INT_TYPE (FLOAT_TYPE *__restrict r, \ -+ INT_TYPE *__restrict a, \ -+ FLOAT_TYPE *__restrict b, \ -+ INT_TYPE *__restrict pred, int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ { \ -+ FLOAT_TYPE bi = b[i]; \ -+ r[i] = pred[i] ? (FLOAT_TYPE) a[i] : bi; \ -+ } \ -+ } -+ -+#define TEST_ALL(T) \ -+ T (_Float16, int16_t) \ -+ T (_Float16, uint16_t) \ -+ T (float, int32_t) \ -+ T (float, uint32_t) \ -+ T (double, int64_t) \ -+ T (double, uint64_t) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tscvtf\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tucvtf\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tscvtf\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tucvtf\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tscvtf\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tucvtf\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz} } } */ -+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_1_run.c -new file mode 100644 -index 000000000..1f712b485 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_1_run.c -@@ -0,0 +1,29 @@ -+/* { dg-do run { target { aarch64_sve_hw } } } */ -+/* { dg-options "-O2 -ftree-vectorize -ftrapping-math" } */ -+ -+#include "cond_convert_1.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(FLOAT_TYPE, INT_TYPE) \ -+ { \ -+ FLOAT_TYPE r[N], b[N]; \ -+ INT_TYPE a[N], pred[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ a[i] = (i & 1 ? i : 3 * i) * (i % 3 == 0 ? 1 : -1); \ -+ b[i] = (i % 9) * (i % 7 + 1); \ -+ pred[i] = (i % 7 < 4); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##INT_TYPE (r, a, b, pred, N); \ -+ for (int i = 0; i < N; ++i) \ -+ if (r[i] != (pred[i] ? (FLOAT_TYPE) a[i] : b[i])) \ -+ __builtin_abort (); \ -+ } -+ -+int main () -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_2.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_2.c -new file mode 100644 -index 000000000..0e60b4381 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_2.c -@@ -0,0 +1,36 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math" } */ -+ -+#include -+ -+#define DEF_LOOP(FLOAT_TYPE, INT_TYPE) \ -+ void __attribute__ ((noipa)) \ -+ test_##INT_TYPE (FLOAT_TYPE *__restrict r, \ -+ INT_TYPE *__restrict a, \ -+ INT_TYPE *__restrict pred, int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ r[i] = pred[i] ? (FLOAT_TYPE) a[i] : 1.0; \ -+ } -+ -+#define TEST_ALL(T) \ -+ T (_Float16, int16_t) \ -+ T (_Float16, uint16_t) \ -+ T (float, int32_t) \ -+ T (float, uint32_t) \ -+ T (double, int64_t) \ -+ T (double, uint64_t) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tscvtf\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tucvtf\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tscvtf\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tucvtf\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tscvtf\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tucvtf\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+, z[0-9]+\n} 6 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz[^\n]*z} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_2_run.c -new file mode 100644 -index 000000000..9a4834921 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_2_run.c -@@ -0,0 +1,28 @@ -+/* { dg-do run { target { aarch64_sve_hw } } } */ -+/* { dg-options "-O2 -ftree-vectorize -ftrapping-math" } */ -+ -+#include "cond_convert_2.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(FLOAT_TYPE, INT_TYPE) \ -+ { \ -+ FLOAT_TYPE r[N]; \ -+ INT_TYPE a[N], pred[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ a[i] = (i & 1 ? i : 3 * i) * (i % 3 == 0 ? 1 : -1); \ -+ pred[i] = (i % 7 < 4); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##INT_TYPE (r, a, pred, N); \ -+ for (int i = 0; i < N; ++i) \ -+ if (r[i] != (pred[i] ? (FLOAT_TYPE) a[i] : 1.0)) \ -+ __builtin_abort (); \ -+ } -+ -+int main () -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_3.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_3.c -new file mode 100644 -index 000000000..a294effd4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_3.c -@@ -0,0 +1,40 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math" } */ -+ -+#include -+ -+#define DEF_LOOP(FLOAT_TYPE, INT_TYPE) \ -+ void __attribute__ ((noipa)) \ -+ test_##INT_TYPE (FLOAT_TYPE *__restrict r, \ -+ INT_TYPE *__restrict a, \ -+ INT_TYPE *__restrict pred, int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ r[i] = pred[i] ? (FLOAT_TYPE) a[i] : 0.0; \ -+ } -+ -+#define TEST_ALL(T) \ -+ T (_Float16, int16_t) \ -+ T (_Float16, uint16_t) \ -+ T (float, int32_t) \ -+ T (float, uint32_t) \ -+ T (double, int64_t) \ -+ T (double, uint64_t) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tscvtf\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tucvtf\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tscvtf\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tucvtf\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tscvtf\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tucvtf\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* Really we should be able to use MOVPRFX /z here, but at the moment -+ we're relying on combine to merge a SEL and an arithmetic operation, -+ and the SEL doesn't allow the "false" value to be zero when the "true" -+ value is a register. */ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+, z[0-9]+\n} 6 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz[^\n]*z} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_3_run.c -new file mode 100644 -index 000000000..90021097c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_3_run.c -@@ -0,0 +1,28 @@ -+/* { dg-do run { target { aarch64_sve_hw } } } */ -+/* { dg-options "-O2 -ftree-vectorize -ftrapping-math" } */ -+ -+#include "cond_convert_3.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(FLOAT_TYPE, INT_TYPE) \ -+ { \ -+ FLOAT_TYPE r[N]; \ -+ INT_TYPE a[N], pred[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ a[i] = (i & 1 ? i : 3 * i) * (i % 3 == 0 ? 1 : -1); \ -+ pred[i] = (i % 7 < 4); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##INT_TYPE (r, a, pred, N); \ -+ for (int i = 0; i < N; ++i) \ -+ if (r[i] != (pred[i] ? (FLOAT_TYPE) a[i] : 0.0)) \ -+ __builtin_abort (); \ -+ } -+ -+int main () -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_4.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_4.c -new file mode 100644 -index 000000000..e3a947b26 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_4.c -@@ -0,0 +1,39 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math" } */ -+ -+#include -+ -+#define DEF_LOOP(FLOAT_TYPE, INT_TYPE) \ -+ void __attribute__ ((noipa)) \ -+ test_##INT_TYPE (INT_TYPE *__restrict r, \ -+ FLOAT_TYPE *__restrict a, \ -+ INT_TYPE *__restrict b, \ -+ INT_TYPE *__restrict pred, int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ { \ -+ INT_TYPE bi = b[i]; \ -+ r[i] = pred[i] ? (INT_TYPE) a[i] : bi; \ -+ } \ -+ } -+ -+#define TEST_ALL(T) \ -+ T (_Float16, int16_t) \ -+ T (_Float16, uint16_t) \ -+ T (float, int32_t) \ -+ T (float, uint32_t) \ -+ T (double, int64_t) \ -+ T (double, uint64_t) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tfcvtzs\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfcvtzu\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfcvtzs\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfcvtzu\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfcvtzs\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfcvtzu\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz} } } */ -+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_4_run.c -new file mode 100644 -index 000000000..eaadcb7d4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_4_run.c -@@ -0,0 +1,29 @@ -+/* { dg-do run { target { aarch64_sve_hw } } } */ -+/* { dg-options "-O2 -ftree-vectorize -ftrapping-math" } */ -+ -+#include "cond_convert_4.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(FLOAT_TYPE, INT_TYPE) \ -+ { \ -+ INT_TYPE r[N], b[N], pred[N]; \ -+ FLOAT_TYPE a[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ a[i] = (i & 1 ? i : 3 * i) * (i % 3 == 0 ? 1 : -1); \ -+ b[i] = (i % 9) * (i % 7 + 1); \ -+ pred[i] = (i % 7 < 4); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##INT_TYPE (r, a, b, pred, N); \ -+ for (int i = 0; i < N; ++i) \ -+ if (r[i] != (pred[i] ? (INT_TYPE) a[i] : b[i])) \ -+ __builtin_abort (); \ -+ } -+ -+int main () -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_5.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_5.c -new file mode 100644 -index 000000000..5f3da83e6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_5.c -@@ -0,0 +1,36 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math" } */ -+ -+#include -+ -+#define DEF_LOOP(FLOAT_TYPE, INT_TYPE) \ -+ void __attribute__ ((noipa)) \ -+ test_##INT_TYPE (INT_TYPE *__restrict r, \ -+ FLOAT_TYPE *__restrict a, \ -+ INT_TYPE *__restrict pred, int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ r[i] = pred[i] ? (INT_TYPE) a[i] : 72; \ -+ } -+ -+#define TEST_ALL(T) \ -+ T (_Float16, int16_t) \ -+ T (_Float16, uint16_t) \ -+ T (float, int32_t) \ -+ T (float, uint32_t) \ -+ T (double, int64_t) \ -+ T (double, uint64_t) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tfcvtzs\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfcvtzu\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfcvtzs\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfcvtzu\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfcvtzs\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfcvtzu\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+, z[0-9]+\n} 6 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz[^\n]*z} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_5_run.c -new file mode 100644 -index 000000000..a1f2d4977 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_5_run.c -@@ -0,0 +1,28 @@ -+/* { dg-do run { target { aarch64_sve_hw } } } */ -+/* { dg-options "-O2 -ftree-vectorize -ftrapping-math" } */ -+ -+#include "cond_convert_5.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(FLOAT_TYPE, INT_TYPE) \ -+ { \ -+ INT_TYPE r[N], pred[N]; \ -+ FLOAT_TYPE a[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ a[i] = (i & 1 ? i : 3 * i) * (i % 3 == 0 ? 1 : -1); \ -+ pred[i] = (i % 7 < 4); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##INT_TYPE (r, a, pred, N); \ -+ for (int i = 0; i < N; ++i) \ -+ if (r[i] != (pred[i] ? (INT_TYPE) a[i] : 72)) \ -+ __builtin_abort (); \ -+ } -+ -+int main () -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_6.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_6.c -new file mode 100644 -index 000000000..6541a2ea4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_6.c -@@ -0,0 +1,40 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math" } */ -+ -+#include -+ -+#define DEF_LOOP(FLOAT_TYPE, INT_TYPE) \ -+ void __attribute__ ((noipa)) \ -+ test_##INT_TYPE (INT_TYPE *__restrict r, \ -+ FLOAT_TYPE *__restrict a, \ -+ INT_TYPE *__restrict pred, int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ r[i] = pred[i] ? (INT_TYPE) a[i] : 0; \ -+ } -+ -+#define TEST_ALL(T) \ -+ T (_Float16, int16_t) \ -+ T (_Float16, uint16_t) \ -+ T (float, int32_t) \ -+ T (float, uint32_t) \ -+ T (double, int64_t) \ -+ T (double, uint64_t) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tfcvtzs\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfcvtzu\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfcvtzs\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfcvtzu\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfcvtzs\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfcvtzu\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* Really we should be able to use MOVPRFX /z here, but at the moment -+ we're relying on combine to merge a SEL and an arithmetic operation, -+ and the SEL doesn't allow the "false" value to be zero when the "true" -+ value is a register. */ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+, z[0-9]+\n} 6 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz[^\n]*z} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_6_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_6_run.c -new file mode 100644 -index 000000000..49a64b4fc ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_convert_6_run.c -@@ -0,0 +1,28 @@ -+/* { dg-do run { target { aarch64_sve_hw } } } */ -+/* { dg-options "-O2 -ftree-vectorize -ftrapping-math" } */ -+ -+#include "cond_convert_6.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(FLOAT_TYPE, INT_TYPE) \ -+ { \ -+ INT_TYPE r[N], pred[N]; \ -+ FLOAT_TYPE a[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ a[i] = (i & 1 ? i : 3 * i) * (i % 3 == 0 ? 1 : -1); \ -+ pred[i] = (i % 7 < 4); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##INT_TYPE (r, a, pred, N); \ -+ for (int i = 0; i < N; ++i) \ -+ if (r[i] != (pred[i] ? (INT_TYPE) a[i] : 0)) \ -+ __builtin_abort (); \ -+ } -+ -+int main () -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_1.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_1.c -new file mode 100644 -index 000000000..c1f54e391 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_1.c -@@ -0,0 +1,29 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math" } */ -+ -+#include -+ -+#define DEF_LOOP(TYPE, ABS) \ -+ void __attribute__ ((noinline, noclone)) \ -+ test_##TYPE (TYPE *__restrict r, TYPE *__restrict a, \ -+ TYPE *__restrict b, TYPE *__restrict c, \ -+ int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ r[i] = a[i] < 20 ? ABS (b[i] - c[i]) : b[i]; \ -+ } -+ -+#define TEST_ALL(T) \ -+ T (_Float16, __builtin_fabsf16) \ -+ T (float, __builtin_fabsf) \ -+ T (double, __builtin_fabs) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tfabd\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfabd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfabd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ -+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_1_run.c -new file mode 100644 -index 000000000..a4d6972b9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_1_run.c -@@ -0,0 +1,33 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math" } */ -+ -+#include "cond_fabd_1.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(TYPE, ABS) \ -+ { \ -+ TYPE r[N], a[N], b[N], c[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ a[i] = (i & 1 ? i : 3 * i); \ -+ b[i] = (i >> 4) << (i & 15); \ -+ c[i] = ((i + 2) % 3) * (i + 1); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##TYPE (r, a, b, c, N); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ TYPE expected = a[i] < 20 ? ABS (b[i] - c[i]) : b[i]; \ -+ if (r[i] != expected) \ -+ __builtin_abort (); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ } -+ -+int -+main (void) -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_2.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_2.c -new file mode 100644 -index 000000000..dd6eecc17 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_2.c -@@ -0,0 +1,29 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math" } */ -+ -+#include -+ -+#define DEF_LOOP(TYPE, ABS) \ -+ void __attribute__ ((noinline, noclone)) \ -+ test_##TYPE (TYPE *__restrict r, TYPE *__restrict a, \ -+ TYPE *__restrict b, TYPE *__restrict c, \ -+ int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ r[i] = a[i] < 20 ? ABS (b[i] - c[i]) : c[i]; \ -+ } -+ -+#define TEST_ALL(T) \ -+ T (_Float16, __builtin_fabsf16) \ -+ T (float, __builtin_fabsf) \ -+ T (double, __builtin_fabs) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tfabd\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfabd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfabd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ -+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_2_run.c -new file mode 100644 -index 000000000..28dc7d011 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_2_run.c -@@ -0,0 +1,33 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math" } */ -+ -+#include "cond_fabd_2.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(TYPE, ABS) \ -+ { \ -+ TYPE r[N], a[N], b[N], c[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ a[i] = (i & 1 ? i : 3 * i); \ -+ b[i] = (i >> 4) << (i & 15); \ -+ c[i] = ((i + 2) % 3) * (i + 1); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##TYPE (r, a, b, c, N); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ TYPE expected = a[i] < 20 ? ABS (b[i] - c[i]) : c[i]; \ -+ if (r[i] != expected) \ -+ __builtin_abort (); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ } -+ -+int -+main (void) -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_3.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_3.c -new file mode 100644 -index 000000000..26fd7b265 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_3.c -@@ -0,0 +1,32 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math" } */ -+ -+#include -+ -+#define DEF_LOOP(TYPE, ABS) \ -+ void __attribute__ ((noinline, noclone)) \ -+ test_##TYPE (TYPE *__restrict r, TYPE *__restrict a, \ -+ TYPE *__restrict b, TYPE *__restrict c, \ -+ int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ r[i] = a[i] < 20 ? ABS (b[i] - c[i]) : a[i]; \ -+ } -+ -+#define TEST_ALL(T) \ -+ T (_Float16, __builtin_fabsf16) \ -+ T (float, __builtin_fabsf) \ -+ T (double, __builtin_fabs) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tfabd\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfabd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfabd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_3_run.c -new file mode 100644 -index 000000000..be21b7f99 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_3_run.c -@@ -0,0 +1,33 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math" } */ -+ -+#include "cond_fabd_3.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(TYPE, ABS) \ -+ { \ -+ TYPE r[N], a[N], b[N], c[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ a[i] = (i & 1 ? i : 3 * i); \ -+ b[i] = (i >> 4) << (i & 15); \ -+ c[i] = ((i + 2) % 3) * (i + 1); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##TYPE (r, a, b, c, N); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ TYPE expected = a[i] < 20 ? ABS (b[i] - c[i]) : a[i]; \ -+ if (r[i] != expected) \ -+ __builtin_abort (); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ } -+ -+int -+main (void) -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_4.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_4.c -new file mode 100644 -index 000000000..78f1fd914 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_4.c -@@ -0,0 +1,29 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math" } */ -+ -+#include -+ -+#define DEF_LOOP(TYPE, ABS) \ -+ void __attribute__ ((noinline, noclone)) \ -+ test_##TYPE (TYPE *__restrict r, TYPE *__restrict a, \ -+ TYPE *__restrict b, TYPE *__restrict c, \ -+ int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ r[i] = a[i] < 20 ? ABS (b[i] - c[i]) : 8.0; \ -+ } -+ -+#define TEST_ALL(T) \ -+ T (_Float16, __builtin_fabsf16) \ -+ T (float, __builtin_fabsf) \ -+ T (double, __builtin_fabs) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tfabd\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfabd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfabd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ -+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ -+/* { dg-final { scan-assembler-times {\tsel\t} 3 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_4_run.c -new file mode 100644 -index 000000000..86bdab415 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_4_run.c -@@ -0,0 +1,33 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math" } */ -+ -+#include "cond_fabd_4.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(TYPE, ABS) \ -+ { \ -+ TYPE r[N], a[N], b[N], c[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ a[i] = (i & 1 ? i : 3 * i); \ -+ b[i] = (i >> 4) << (i & 15); \ -+ c[i] = ((i + 2) % 3) * (i + 1); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##TYPE (r, a, b, c, N); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ TYPE expected = a[i] < 20 ? ABS (b[i] - c[i]) : 8; \ -+ if (r[i] != expected) \ -+ __builtin_abort (); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ } -+ -+int -+main (void) -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_5.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_5.c -new file mode 100644 -index 000000000..e66477b3b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_5.c -@@ -0,0 +1,35 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math" } */ -+ -+#include -+ -+#define DEF_LOOP(TYPE, ABS) \ -+ void __attribute__ ((noinline, noclone)) \ -+ test_##TYPE (TYPE *__restrict r, TYPE *__restrict a, \ -+ TYPE *__restrict b, TYPE *__restrict c, \ -+ int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ r[i] = a[i] < 20 ? ABS (b[i] - c[i]) : 0.0; \ -+ } -+ -+#define TEST_ALL(T) \ -+ T (_Float16, __builtin_fabsf16) \ -+ T (float, __builtin_fabsf) \ -+ T (double, __builtin_fabs) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tfabd\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfabd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfabd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* Really we should be able to use MOVPRFX /Z here, but at the moment -+ we're relying on combine to merge a SEL and an arithmetic operation, -+ and the SEL doesn't allow zero operands. */ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 1 { xfail *-*-* } } } */ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 1 { xfail *-*-* } } } */ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/z, z[0-9]+\.d\n} 1 { xfail *-*-* } } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} { xfail *-*-* } } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_5_run.c -new file mode 100644 -index 000000000..9fb5fbb81 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fabd_5_run.c -@@ -0,0 +1,33 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize -fno-trapping-math" } */ -+ -+#include "cond_fabd_5.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(TYPE, ABS) \ -+ { \ -+ TYPE r[N], a[N], b[N], c[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ a[i] = (i & 1 ? i : 3 * i); \ -+ b[i] = (i >> 4) << (i & 15); \ -+ c[i] = ((i + 2) % 3) * (i + 1); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##TYPE (r, a, b, c, N); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ TYPE expected = a[i] < 20 ? ABS (b[i] - c[i]) : 0; \ -+ if (r[i] != expected) \ -+ __builtin_abort (); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ } -+ -+int -+main (void) -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fadd_1.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fadd_1.c -new file mode 100644 -index 000000000..d103e1f38 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fadd_1.c -@@ -0,0 +1,62 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define DEF_LOOP(TYPE, PRED_TYPE, NAME, CONST) \ -+ void __attribute__ ((noipa)) \ -+ test_##TYPE##_##NAME (TYPE *__restrict x, \ -+ TYPE *__restrict y, \ -+ PRED_TYPE *__restrict pred, \ -+ int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ x[i] = pred[i] != 1 ? y[i] + (TYPE) CONST : y[i]; \ -+ } -+ -+#define TEST_TYPE(T, TYPE, PRED_TYPE) \ -+ T (TYPE, PRED_TYPE, half, 0.5) \ -+ T (TYPE, PRED_TYPE, one, 1.0) \ -+ T (TYPE, PRED_TYPE, two, 2.0) \ -+ T (TYPE, PRED_TYPE, minus_half, -0.5) \ -+ T (TYPE, PRED_TYPE, minus_one, -1.0) \ -+ T (TYPE, PRED_TYPE, minus_two, -2.0) -+ -+#define TEST_ALL(T) \ -+ TEST_TYPE (T, _Float16, int16_t) \ -+ TEST_TYPE (T, float, int32_t) \ -+ TEST_TYPE (T, double, int64_t) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0\.5\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.5\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.5\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1\.0\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0\.5\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.5\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.5\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1\.0\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.h, #2\.0} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #2\.0} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.h, #-2\.0} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #-2\.0} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #-2\.0} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz} } } */ -+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fadd_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fadd_1_run.c -new file mode 100644 -index 000000000..956ae1435 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fadd_1_run.c -@@ -0,0 +1,32 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_fadd_1.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(TYPE, PRED_TYPE, NAME, CONST) \ -+ { \ -+ TYPE x[N], y[N]; \ -+ PRED_TYPE pred[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ y[i] = i * i; \ -+ pred[i] = i % 3; \ -+ } \ -+ test_##TYPE##_##NAME (x, y, pred, N); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ TYPE expected = i % 3 != 1 ? y[i] + (TYPE) CONST : y[i]; \ -+ if (x[i] != expected) \ -+ __builtin_abort (); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ } -+ -+int -+main (void) -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fadd_2.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fadd_2.c -new file mode 100644 -index 000000000..b7d02f4ad ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fadd_2.c -@@ -0,0 +1,56 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define DEF_LOOP(TYPE, NAME, CONST) \ -+ void __attribute__ ((noipa)) \ -+ test_##TYPE##_##NAME (TYPE *__restrict x, \ -+ TYPE *__restrict y, \ -+ TYPE *__restrict z, \ -+ int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ x[i] = y[i] < 8 ? z[i] + (TYPE) CONST : y[i]; \ -+ } -+ -+#define TEST_TYPE(T, TYPE) \ -+ T (TYPE, half, 0.5) \ -+ T (TYPE, one, 1.0) \ -+ T (TYPE, two, 2.0) \ -+ T (TYPE, minus_half, -0.5) \ -+ T (TYPE, minus_one, -1.0) \ -+ T (TYPE, minus_two, -2.0) -+ -+#define TEST_ALL(T) \ -+ TEST_TYPE (T, float) \ -+ TEST_TYPE (T, double) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.5\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.5\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1\.0\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.5\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.5\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1\.0\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #2\.0} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #-2\.0} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #-2\.0} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ -+ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 6 } } */ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 6 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fadd_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fadd_2_run.c -new file mode 100644 -index 000000000..debf395cc ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fadd_2_run.c -@@ -0,0 +1,31 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_fadd_2.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(TYPE, NAME, CONST) \ -+ { \ -+ TYPE x[N], y[N], z[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ y[i] = i % 13; \ -+ z[i] = i * i; \ -+ } \ -+ test_##TYPE##_##NAME (x, y, z, N); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ TYPE expected = y[i] < 8 ? z[i] + (TYPE) CONST : y[i]; \ -+ if (x[i] != expected) \ -+ __builtin_abort (); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ } -+ -+int -+main (void) -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fadd_3.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fadd_3.c -new file mode 100644 -index 000000000..aec0e5aca ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fadd_3.c -@@ -0,0 +1,65 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define DEF_LOOP(TYPE, PRED_TYPE, NAME, CONST) \ -+ void __attribute__ ((noipa)) \ -+ test_##TYPE##_##NAME (TYPE *__restrict x, \ -+ TYPE *__restrict y, \ -+ PRED_TYPE *__restrict pred, \ -+ int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ x[i] = pred[i] != 1 ? y[i] + (TYPE) CONST : 4; \ -+ } -+ -+#define TEST_TYPE(T, TYPE, PRED_TYPE) \ -+ T (TYPE, PRED_TYPE, half, 0.5) \ -+ T (TYPE, PRED_TYPE, one, 1.0) \ -+ T (TYPE, PRED_TYPE, two, 2.0) \ -+ T (TYPE, PRED_TYPE, minus_half, -0.5) \ -+ T (TYPE, PRED_TYPE, minus_one, -1.0) \ -+ T (TYPE, PRED_TYPE, minus_two, -2.0) -+ -+#define TEST_ALL(T) \ -+ TEST_TYPE (T, _Float16, int16_t) \ -+ TEST_TYPE (T, float, int32_t) \ -+ TEST_TYPE (T, double, int64_t) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0\.5\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.5\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.5\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1\.0\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0\.5\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.5\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.5\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1\.0\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.h, #2\.0} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #2\.0} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.h, #-2\.0} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #-2\.0} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #-2\.0} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ -+ -+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ -+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ -+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ -+/* { dg-final { scan-assembler-not {\tmov\tz} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fadd_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fadd_3_run.c -new file mode 100644 -index 000000000..d5268c5ca ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fadd_3_run.c -@@ -0,0 +1,32 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_fadd_3.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(TYPE, PRED_TYPE, NAME, CONST) \ -+ { \ -+ TYPE x[N], y[N]; \ -+ PRED_TYPE pred[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ y[i] = i * i; \ -+ pred[i] = i % 3; \ -+ } \ -+ test_##TYPE##_##NAME (x, y, pred, N); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ TYPE expected = i % 3 != 1 ? y[i] + (TYPE) CONST : 4; \ -+ if (x[i] != expected) \ -+ __builtin_abort (); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ } -+ -+int -+main (void) -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fadd_4.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fadd_4.c -new file mode 100644 -index 000000000..bb276c140 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fadd_4.c -@@ -0,0 +1,64 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define DEF_LOOP(TYPE, PRED_TYPE, NAME, CONST) \ -+ void __attribute__ ((noipa)) \ -+ test_##TYPE##_##NAME (TYPE *__restrict x, \ -+ TYPE *__restrict y, \ -+ PRED_TYPE *__restrict pred, \ -+ int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ x[i] = pred[i] != 1 ? y[i] + (TYPE) CONST : 0; \ -+ } -+ -+#define TEST_TYPE(T, TYPE, PRED_TYPE) \ -+ T (TYPE, PRED_TYPE, half, 0.5) \ -+ T (TYPE, PRED_TYPE, one, 1.0) \ -+ T (TYPE, PRED_TYPE, two, 2.0) \ -+ T (TYPE, PRED_TYPE, minus_half, -0.5) \ -+ T (TYPE, PRED_TYPE, minus_one, -1.0) \ -+ T (TYPE, PRED_TYPE, minus_two, -2.0) -+ -+#define TEST_ALL(T) \ -+ TEST_TYPE (T, _Float16, int16_t) \ -+ TEST_TYPE (T, float, int32_t) \ -+ TEST_TYPE (T, double, int64_t) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0\.5\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.5\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.5\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1\.0\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0\.5\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.5\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.5\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1\.0\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.h, #2\.0} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #2\.0} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.h, #-2\.0} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #-2\.0} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #-2\.0} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ -+ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 6 } } */ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/z, z[0-9]+\.d\n} 6 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fadd_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fadd_4_run.c -new file mode 100644 -index 000000000..4ea8be661 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fadd_4_run.c -@@ -0,0 +1,32 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_fadd_4.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(TYPE, PRED_TYPE, NAME, CONST) \ -+ { \ -+ TYPE x[N], y[N]; \ -+ PRED_TYPE pred[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ y[i] = i * i; \ -+ pred[i] = i % 3; \ -+ } \ -+ test_##TYPE##_##NAME (x, y, pred, N); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ TYPE expected = i % 3 != 1 ? y[i] + (TYPE) CONST : 0; \ -+ if (x[i] != expected) \ -+ __builtin_abort (); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ } -+ -+int -+main (void) -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fmaxnm_1.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmaxnm_1.c -new file mode 100644 -index 000000000..d0db0900e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmaxnm_1.c -@@ -0,0 +1,55 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ -+ -+#include -+ -+#ifndef FN -+#define FN(X) __builtin_fmax##X -+#endif -+ -+#define DEF_LOOP(FN, TYPE, PRED_TYPE, NAME, CONST) \ -+ void __attribute__ ((noipa)) \ -+ test_##TYPE##_##NAME (TYPE *__restrict x, \ -+ TYPE *__restrict y, \ -+ PRED_TYPE *__restrict pred, \ -+ int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ x[i] = pred[i] != 1 ? FN (y[i], CONST) : y[i]; \ -+ } -+ -+#define TEST_TYPE(T, FN, TYPE, PRED_TYPE) \ -+ T (FN, TYPE, PRED_TYPE, zero, 0) \ -+ T (FN, TYPE, PRED_TYPE, one, 1) \ -+ T (FN, TYPE, PRED_TYPE, two, 2) -+ -+#define TEST_ALL(T) \ -+ TEST_TYPE (T, FN (f16), _Float16, int16_t) \ -+ TEST_TYPE (T, FN (f32), float, int32_t) \ -+ TEST_TYPE (T, FN (f64), double, int64_t) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.0\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1\.0\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1\.0\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.h, #2\.0} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #2\.0} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz} } } */ -+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fmaxnm_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmaxnm_1_run.c -new file mode 100644 -index 000000000..00a3c41f2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmaxnm_1_run.c -@@ -0,0 +1,32 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ -+ -+#include "cond_fmaxnm_1.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(FN, TYPE, PRED_TYPE, NAME, CONST) \ -+ { \ -+ TYPE x[N], y[N]; \ -+ PRED_TYPE pred[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ y[i] = i * i; \ -+ pred[i] = i % 3; \ -+ } \ -+ test_##TYPE##_##NAME (x, y, pred, N); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ TYPE expected = i % 3 != 1 ? FN (y[i], CONST) : y[i]; \ -+ if (x[i] != expected) \ -+ __builtin_abort (); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ } -+ -+int -+main (void) -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fmaxnm_2.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmaxnm_2.c -new file mode 100644 -index 000000000..0b535d15f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmaxnm_2.c -@@ -0,0 +1,48 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ -+ -+#include -+ -+#ifndef FN -+#define FN(X) __builtin_fmax##X -+#endif -+ -+#define DEF_LOOP(FN, TYPE, NAME, CONST) \ -+ void __attribute__ ((noipa)) \ -+ test_##TYPE##_##NAME (TYPE *__restrict x, \ -+ TYPE *__restrict y, \ -+ TYPE *__restrict z, \ -+ int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ x[i] = y[i] < 8 ? FN (z[i], CONST) : y[i]; \ -+ } -+ -+#define TEST_TYPE(T, FN, TYPE) \ -+ T (FN, TYPE, zero, 0) \ -+ T (FN, TYPE, one, 1) \ -+ T (FN, TYPE, two, 2) -+ -+#define TEST_ALL(T) \ -+ TEST_TYPE (T, FN (f32), float) \ -+ TEST_TYPE (T, FN (f64), double) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.0\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1\.0\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #2\.0} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 3 } } */ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 3 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fmaxnm_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmaxnm_2_run.c -new file mode 100644 -index 000000000..9eb4d80fc ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmaxnm_2_run.c -@@ -0,0 +1,31 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ -+ -+#include "cond_fmaxnm_2.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(FN, TYPE, NAME, CONST) \ -+ { \ -+ TYPE x[N], y[N], z[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ y[i] = i % 13; \ -+ z[i] = i * i; \ -+ } \ -+ test_##TYPE##_##NAME (x, y, z, N); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ TYPE expected = y[i] < 8 ? FN (z[i], CONST) : y[i]; \ -+ if (x[i] != expected) \ -+ __builtin_abort (); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ } -+ -+int -+main (void) -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fmaxnm_3.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmaxnm_3.c -new file mode 100644 -index 000000000..741f8f6d0 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmaxnm_3.c -@@ -0,0 +1,54 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ -+ -+#include -+ -+#ifndef FN -+#define FN(X) __builtin_fmax##X -+#endif -+ -+#define DEF_LOOP(FN, TYPE, PRED_TYPE, NAME, CONST) \ -+ void __attribute__ ((noipa)) \ -+ test_##TYPE##_##NAME (TYPE *__restrict x, \ -+ TYPE *__restrict y, \ -+ PRED_TYPE *__restrict pred, \ -+ int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ x[i] = pred[i] != 1 ? FN (y[i], CONST) : 4; \ -+ } -+ -+#define TEST_TYPE(T, FN, TYPE, PRED_TYPE) \ -+ T (FN, TYPE, PRED_TYPE, zero, 0) \ -+ T (FN, TYPE, PRED_TYPE, one, 1) \ -+ T (FN, TYPE, PRED_TYPE, two, 2) -+ -+#define TEST_ALL(T) \ -+ TEST_TYPE (T, FN (f16), _Float16, int16_t) \ -+ TEST_TYPE (T, FN (f32), float, int32_t) \ -+ TEST_TYPE (T, FN (f64), double, int64_t) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.0\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1\.0\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.h, #2\.0} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #2\.0} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} 3 } } */ -+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} 3 } } */ -+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} 3 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ -+/* { dg-final { scan-assembler-not {\tmov\tz} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fmaxnm_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmaxnm_3_run.c -new file mode 100644 -index 000000000..4aac75f0e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmaxnm_3_run.c -@@ -0,0 +1,32 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ -+ -+#include "cond_fmaxnm_3.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(FN, TYPE, PRED_TYPE, NAME, CONST) \ -+ { \ -+ TYPE x[N], y[N]; \ -+ PRED_TYPE pred[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ y[i] = i * i; \ -+ pred[i] = i % 3; \ -+ } \ -+ test_##TYPE##_##NAME (x, y, pred, N); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ TYPE expected = i % 3 != 1 ? FN (y[i], CONST) : 4; \ -+ if (x[i] != expected) \ -+ __builtin_abort (); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ } -+ -+int -+main (void) -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fmaxnm_4.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmaxnm_4.c -new file mode 100644 -index 000000000..83a53c7d4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmaxnm_4.c -@@ -0,0 +1,53 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ -+ -+#include -+ -+#ifndef FN -+#define FN(X) __builtin_fmax##X -+#endif -+ -+#define DEF_LOOP(FN, TYPE, PRED_TYPE, NAME, CONST) \ -+ void __attribute__ ((noipa)) \ -+ test_##TYPE##_##NAME (TYPE *__restrict x, \ -+ TYPE *__restrict y, \ -+ PRED_TYPE *__restrict pred, \ -+ int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ x[i] = pred[i] != 1 ? FN (y[i], CONST) : 0; \ -+ } -+ -+#define TEST_TYPE(T, FN, TYPE, PRED_TYPE) \ -+ T (FN, TYPE, PRED_TYPE, zero, 0) \ -+ T (FN, TYPE, PRED_TYPE, one, 1) \ -+ T (FN, TYPE, PRED_TYPE, two, 2) -+ -+#define TEST_ALL(T) \ -+ TEST_TYPE (T, FN (f16), _Float16, int16_t) \ -+ TEST_TYPE (T, FN (f32), float, int32_t) \ -+ TEST_TYPE (T, FN (f64), double, int64_t) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.0\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1\.0\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.h, #2\.0} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #2\.0} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 3 } } */ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/z, z[0-9]+\.d\n} 3 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fmaxnm_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmaxnm_4_run.c -new file mode 100644 -index 000000000..e1d904338 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmaxnm_4_run.c -@@ -0,0 +1,32 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ -+ -+#include "cond_fmaxnm_4.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(FN, TYPE, PRED_TYPE, NAME, CONST) \ -+ { \ -+ TYPE x[N], y[N]; \ -+ PRED_TYPE pred[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ y[i] = i * i; \ -+ pred[i] = i % 3; \ -+ } \ -+ test_##TYPE##_##NAME (x, y, pred, N); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ TYPE expected = i % 3 != 1 ? FN (y[i], CONST) : 0; \ -+ if (x[i] != expected) \ -+ __builtin_abort (); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ } -+ -+int -+main (void) -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fminnm_1.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fminnm_1.c -new file mode 100644 -index 000000000..d667b2088 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fminnm_1.c -@@ -0,0 +1,29 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ -+ -+#define FN(X) __builtin_fmin##X -+#include "cond_fmaxnm_1.c" -+ -+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.0\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1\.0\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1\.0\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.h, #2\.0} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #2\.0} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz} } } */ -+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fminnm_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fminnm_1_run.c -new file mode 100644 -index 000000000..5df2ff84b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fminnm_1_run.c -@@ -0,0 +1,5 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ -+ -+#define FN(X) __builtin_fmin##X -+#include "cond_fmaxnm_1_run.c" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fminnm_2.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fminnm_2.c -new file mode 100644 -index 000000000..d66a84b01 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fminnm_2.c -@@ -0,0 +1,23 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ -+ -+#define FN(X) __builtin_fmin##X -+#include "cond_fmaxnm_2.c" -+ -+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.0\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1\.0\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #2\.0} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 3 } } */ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 3 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fminnm_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fminnm_2_run.c -new file mode 100644 -index 000000000..79a98bb77 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fminnm_2_run.c -@@ -0,0 +1,5 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ -+ -+#define FN(X) __builtin_fmin##X -+#include "cond_fmaxnm_2_run.c" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fminnm_3.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fminnm_3.c -new file mode 100644 -index 000000000..d39dd1825 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fminnm_3.c -@@ -0,0 +1,28 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ -+ -+#define FN(X) __builtin_fmin##X -+#include "cond_fmaxnm_3.c" -+ -+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.0\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1\.0\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.h, #2\.0} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #2\.0} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} 3 } } */ -+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} 3 } } */ -+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} 3 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ -+/* { dg-final { scan-assembler-not {\tmov\tz} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fminnm_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fminnm_3_run.c -new file mode 100644 -index 000000000..ca1a047da ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fminnm_3_run.c -@@ -0,0 +1,5 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ -+ -+#define FN(X) __builtin_fmin##X -+#include "cond_fmaxnm_3_run.c" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fminnm_4.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fminnm_4.c -new file mode 100644 -index 000000000..fff6fdd37 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fminnm_4.c -@@ -0,0 +1,27 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ -+ -+#define FN(X) __builtin_fmin##X -+#include "cond_fmaxnm_4.c" -+ -+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.0\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1\.0\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.h, #2\.0} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #2\.0} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 3 } } */ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/z, z[0-9]+\.d\n} 3 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fminnm_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fminnm_4_run.c -new file mode 100644 -index 000000000..b945d0470 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fminnm_4_run.c -@@ -0,0 +1,5 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */ -+ -+#define FN(X) __builtin_fmin##X -+#include "cond_fmaxnm_4_run.c" -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fmul_1.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmul_1.c -new file mode 100644 -index 000000000..ce417ed85 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmul_1.c -@@ -0,0 +1,47 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define DEF_LOOP(TYPE, PRED_TYPE, NAME, CONST) \ -+ void __attribute__ ((noipa)) \ -+ test_##TYPE##_##NAME (TYPE *__restrict x, \ -+ TYPE *__restrict y, \ -+ PRED_TYPE *__restrict pred, \ -+ int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ x[i] = pred[i] != 1 ? y[i] * (TYPE) CONST : y[i]; \ -+ } -+ -+#define TEST_TYPE(T, TYPE, PRED_TYPE) \ -+ T (TYPE, PRED_TYPE, half, 0.5) \ -+ T (TYPE, PRED_TYPE, two, 2.0) \ -+ T (TYPE, PRED_TYPE, four, 4.0) -+ -+#define TEST_ALL(T) \ -+ TEST_TYPE (T, _Float16, int16_t) \ -+ TEST_TYPE (T, float, int32_t) \ -+ TEST_TYPE (T, double, int64_t) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0\.5\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.5\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.5\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #2\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #2\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #2\.0\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.h, #4\.0} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #4\.0} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #4\.0} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz} } } */ -+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fmul_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmul_1_run.c -new file mode 100644 -index 000000000..9ca5b5080 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmul_1_run.c -@@ -0,0 +1,32 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_fmul_1.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(TYPE, PRED_TYPE, NAME, CONST) \ -+ { \ -+ TYPE x[N], y[N]; \ -+ PRED_TYPE pred[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ y[i] = i * i; \ -+ pred[i] = i % 3; \ -+ } \ -+ test_##TYPE##_##NAME (x, y, pred, N); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ TYPE expected = i % 3 != 1 ? y[i] * (TYPE) CONST : y[i]; \ -+ if (x[i] != expected) \ -+ __builtin_abort (); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ } -+ -+int -+main (void) -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fmul_2.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmul_2.c -new file mode 100644 -index 000000000..cbf9d13a5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmul_2.c -@@ -0,0 +1,44 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define DEF_LOOP(TYPE, NAME, CONST) \ -+ void __attribute__ ((noipa)) \ -+ test_##TYPE##_##NAME (TYPE *__restrict x, \ -+ TYPE *__restrict y, \ -+ TYPE *__restrict z, \ -+ int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ x[i] = y[i] < 8 ? z[i] * (TYPE) CONST : y[i]; \ -+ } -+ -+#define TEST_TYPE(T, TYPE) \ -+ T (TYPE, half, 0.5) \ -+ T (TYPE, two, 2.0) \ -+ T (TYPE, four, 4.0) -+ -+#define TEST_ALL(T) \ -+ TEST_TYPE (T, float) \ -+ TEST_TYPE (T, double) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.5\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.5\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #2\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #2\.0\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #4\.0} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #4\.0} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 3 } } */ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 3 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fmul_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmul_2_run.c -new file mode 100644 -index 000000000..44b283ba3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmul_2_run.c -@@ -0,0 +1,31 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_fmul_2.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(TYPE, NAME, CONST) \ -+ { \ -+ TYPE x[N], y[N], z[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ y[i] = i % 13; \ -+ z[i] = i * i; \ -+ } \ -+ test_##TYPE##_##NAME (x, y, z, N); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ TYPE expected = y[i] < 8 ? z[i] * (TYPE) CONST : y[i]; \ -+ if (x[i] != expected) \ -+ __builtin_abort (); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ } -+ -+int -+main (void) -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fmul_3.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmul_3.c -new file mode 100644 -index 000000000..4da147e15 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmul_3.c -@@ -0,0 +1,50 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define DEF_LOOP(TYPE, PRED_TYPE, NAME, CONST) \ -+ void __attribute__ ((noipa)) \ -+ test_##TYPE##_##NAME (TYPE *__restrict x, \ -+ TYPE *__restrict y, \ -+ PRED_TYPE *__restrict pred, \ -+ int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ x[i] = pred[i] != 1 ? y[i] * (TYPE) CONST : 8; \ -+ } -+ -+#define TEST_TYPE(T, TYPE, PRED_TYPE) \ -+ T (TYPE, PRED_TYPE, half, 0.5) \ -+ T (TYPE, PRED_TYPE, two, 2.0) \ -+ T (TYPE, PRED_TYPE, four, 4.0) -+ -+#define TEST_ALL(T) \ -+ TEST_TYPE (T, _Float16, int16_t) \ -+ TEST_TYPE (T, float, int32_t) \ -+ TEST_TYPE (T, double, int64_t) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0\.5\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.5\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.5\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #2\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #2\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #2\.0\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.h, #4\.0} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #4\.0} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #4\.0} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} 3 } } */ -+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} 3 } } */ -+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} 3 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ -+/* { dg-final { scan-assembler-not {\tmov\tz} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fmul_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmul_3_run.c -new file mode 100644 -index 000000000..9b81d43c9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmul_3_run.c -@@ -0,0 +1,32 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_fmul_3.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(TYPE, PRED_TYPE, NAME, CONST) \ -+ { \ -+ TYPE x[N], y[N]; \ -+ PRED_TYPE pred[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ y[i] = i * i; \ -+ pred[i] = i % 3; \ -+ } \ -+ test_##TYPE##_##NAME (x, y, pred, N); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ TYPE expected = i % 3 != 1 ? y[i] * (TYPE) CONST : 8; \ -+ if (x[i] != expected) \ -+ __builtin_abort (); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ } -+ -+int -+main (void) -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fmul_4.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmul_4.c -new file mode 100644 -index 000000000..c4fdb2b2b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmul_4.c -@@ -0,0 +1,49 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define DEF_LOOP(TYPE, PRED_TYPE, NAME, CONST) \ -+ void __attribute__ ((noipa)) \ -+ test_##TYPE##_##NAME (TYPE *__restrict x, \ -+ TYPE *__restrict y, \ -+ PRED_TYPE *__restrict pred, \ -+ int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ x[i] = pred[i] != 1 ? y[i] * (TYPE) CONST : 0; \ -+ } -+ -+#define TEST_TYPE(T, TYPE, PRED_TYPE) \ -+ T (TYPE, PRED_TYPE, half, 0.5) \ -+ T (TYPE, PRED_TYPE, two, 2.0) \ -+ T (TYPE, PRED_TYPE, four, 4.0) -+ -+#define TEST_ALL(T) \ -+ TEST_TYPE (T, _Float16, int16_t) \ -+ TEST_TYPE (T, float, int32_t) \ -+ TEST_TYPE (T, double, int64_t) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0\.5\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.5\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.5\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #2\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #2\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #2\.0\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.h, #4\.0} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #4\.0} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #4\.0} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 3 } } */ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/z, z[0-9]+\.d\n} 3 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fmul_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmul_4_run.c -new file mode 100644 -index 000000000..b93e031e5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fmul_4_run.c -@@ -0,0 +1,32 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_fmul_4.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(TYPE, PRED_TYPE, NAME, CONST) \ -+ { \ -+ TYPE x[N], y[N]; \ -+ PRED_TYPE pred[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ y[i] = i * i; \ -+ pred[i] = i % 3; \ -+ } \ -+ test_##TYPE##_##NAME (x, y, pred, N); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ TYPE expected = i % 3 != 1 ? y[i] * (TYPE) CONST : 0; \ -+ if (x[i] != expected) \ -+ __builtin_abort (); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ } -+ -+int -+main (void) -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fsubr_1.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fsubr_1.c -new file mode 100644 -index 000000000..8e7172af4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fsubr_1.c -@@ -0,0 +1,47 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define DEF_LOOP(TYPE, PRED_TYPE, NAME, CONST) \ -+ void __attribute__ ((noipa)) \ -+ test_##TYPE##_##NAME (TYPE *__restrict x, \ -+ TYPE *__restrict y, \ -+ PRED_TYPE *__restrict pred, \ -+ int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ x[i] = pred[i] != 1 ? (TYPE) CONST - y[i] : y[i]; \ -+ } -+ -+#define TEST_TYPE(T, TYPE, PRED_TYPE) \ -+ T (TYPE, PRED_TYPE, half, 0.5) \ -+ T (TYPE, PRED_TYPE, one, 1.0) \ -+ T (TYPE, PRED_TYPE, two, 2.0) -+ -+#define TEST_ALL(T) \ -+ TEST_TYPE (T, _Float16, int16_t) \ -+ TEST_TYPE (T, float, int32_t) \ -+ TEST_TYPE (T, double, int64_t) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0\.5\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.5\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.5\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1\.0\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.h, #2\.0} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #2\.0} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz} } } */ -+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fsubr_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fsubr_1_run.c -new file mode 100644 -index 000000000..61ffac429 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fsubr_1_run.c -@@ -0,0 +1,32 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_fsubr_1.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(TYPE, PRED_TYPE, NAME, CONST) \ -+ { \ -+ TYPE x[N], y[N]; \ -+ PRED_TYPE pred[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ y[i] = i * i; \ -+ pred[i] = i % 3; \ -+ } \ -+ test_##TYPE##_##NAME (x, y, pred, N); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ TYPE expected = i % 3 != 1 ? (TYPE) CONST - y[i] : y[i]; \ -+ if (x[i] != expected) \ -+ __builtin_abort (); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ } -+ -+int -+main (void) -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fsubr_2.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fsubr_2.c -new file mode 100644 -index 000000000..6d2efde94 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fsubr_2.c -@@ -0,0 +1,44 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define DEF_LOOP(TYPE, NAME, CONST) \ -+ void __attribute__ ((noipa)) \ -+ test_##TYPE##_##NAME (TYPE *__restrict x, \ -+ TYPE *__restrict y, \ -+ TYPE *__restrict z, \ -+ int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ x[i] = y[i] < 8 ? (TYPE) CONST - z[i] : y[i]; \ -+ } -+ -+#define TEST_TYPE(T, TYPE) \ -+ T (TYPE, half, 0.5) \ -+ T (TYPE, one, 1.0) \ -+ T (TYPE, two, 2.0) -+ -+#define TEST_ALL(T) \ -+ TEST_TYPE (T, float) \ -+ TEST_TYPE (T, double) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.5\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.5\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1\.0\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #2\.0} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 3 } } */ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 3 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fsubr_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fsubr_2_run.c -new file mode 100644 -index 000000000..1b25392b0 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fsubr_2_run.c -@@ -0,0 +1,31 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_fsubr_2.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(TYPE, NAME, CONST) \ -+ { \ -+ TYPE x[N], y[N], z[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ y[i] = i % 13; \ -+ z[i] = i * i; \ -+ } \ -+ test_##TYPE##_##NAME (x, y, z, N); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ TYPE expected = y[i] < 8 ? (TYPE) CONST - z[i] : y[i]; \ -+ if (x[i] != expected) \ -+ __builtin_abort (); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ } -+ -+int -+main (void) -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fsubr_3.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fsubr_3.c -new file mode 100644 -index 000000000..328af5741 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fsubr_3.c -@@ -0,0 +1,50 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define DEF_LOOP(TYPE, PRED_TYPE, NAME, CONST) \ -+ void __attribute__ ((noipa)) \ -+ test_##TYPE##_##NAME (TYPE *__restrict x, \ -+ TYPE *__restrict y, \ -+ PRED_TYPE *__restrict pred, \ -+ int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ x[i] = pred[i] != 1 ? (TYPE) CONST - y[i] : 4; \ -+ } -+ -+#define TEST_TYPE(T, TYPE, PRED_TYPE) \ -+ T (TYPE, PRED_TYPE, half, 0.5) \ -+ T (TYPE, PRED_TYPE, one, 1.0) \ -+ T (TYPE, PRED_TYPE, two, 2.0) -+ -+#define TEST_ALL(T) \ -+ TEST_TYPE (T, _Float16, int16_t) \ -+ TEST_TYPE (T, float, int32_t) \ -+ TEST_TYPE (T, double, int64_t) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0\.5\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.5\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.5\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1\.0\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.h, #2\.0} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #2\.0} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} 3 } } */ -+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} 3 } } */ -+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} 3 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ -+/* { dg-final { scan-assembler-not {\tmov\tz} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fsubr_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fsubr_3_run.c -new file mode 100644 -index 000000000..8978287df ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fsubr_3_run.c -@@ -0,0 +1,32 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_fsubr_3.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(TYPE, PRED_TYPE, NAME, CONST) \ -+ { \ -+ TYPE x[N], y[N]; \ -+ PRED_TYPE pred[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ y[i] = i * i; \ -+ pred[i] = i % 3; \ -+ } \ -+ test_##TYPE##_##NAME (x, y, pred, N); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ TYPE expected = i % 3 != 1 ? (TYPE) CONST - y[i] : 4; \ -+ if (x[i] != expected) \ -+ __builtin_abort (); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ } -+ -+int -+main (void) -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fsubr_4.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fsubr_4.c -new file mode 100644 -index 000000000..1d420b104 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fsubr_4.c -@@ -0,0 +1,49 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define DEF_LOOP(TYPE, PRED_TYPE, NAME, CONST) \ -+ void __attribute__ ((noipa)) \ -+ test_##TYPE##_##NAME (TYPE *__restrict x, \ -+ TYPE *__restrict y, \ -+ PRED_TYPE *__restrict pred, \ -+ int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ x[i] = pred[i] != 1 ? (TYPE) CONST - y[i] : 0; \ -+ } -+ -+#define TEST_TYPE(T, TYPE, PRED_TYPE) \ -+ T (TYPE, PRED_TYPE, half, 0.5) \ -+ T (TYPE, PRED_TYPE, one, 1.0) \ -+ T (TYPE, PRED_TYPE, two, 2.0) -+ -+#define TEST_ALL(T) \ -+ TEST_TYPE (T, _Float16, int16_t) \ -+ TEST_TYPE (T, float, int32_t) \ -+ TEST_TYPE (T, double, int64_t) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0\.5\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.5\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.5\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1\.0\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.h, #2\.0} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #2\.0} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 3 } } */ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/z, z[0-9]+\.d\n} 3 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_fsubr_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_fsubr_4_run.c -new file mode 100644 -index 000000000..2cb3409af ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_fsubr_4_run.c -@@ -0,0 +1,32 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_fsubr_4.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(TYPE, PRED_TYPE, NAME, CONST) \ -+ { \ -+ TYPE x[N], y[N]; \ -+ PRED_TYPE pred[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ y[i] = i * i; \ -+ pred[i] = i % 3; \ -+ } \ -+ test_##TYPE##_##NAME (x, y, pred, N); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ TYPE expected = i % 3 != 1 ? (TYPE) CONST - y[i] : 0; \ -+ if (x[i] != expected) \ -+ __builtin_abort (); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ } -+ -+int -+main (void) -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_1.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_1.c -new file mode 100644 -index 000000000..a1e80b8a9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_1.c -@@ -0,0 +1,62 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define bit_and(A, B) ((A) & (B)) -+#define bit_or(A, B) ((A) | (B)) -+#define bit_xor(A, B) ((A) ^ (B)) -+#define bit_bic(A, B) ((A) & ~(B)) -+ -+#define DEF_LOOP(TYPE, OP) \ -+ void __attribute__ ((noinline, noclone)) \ -+ test_##TYPE##_##OP (TYPE *__restrict r, \ -+ TYPE *__restrict a, \ -+ TYPE *__restrict b, \ -+ TYPE *__restrict c, int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ r[i] = a[i] < 20 ? OP (b[i], c[i]) : b[i]; \ -+ } -+ -+#define TEST_TYPE(T, TYPE) \ -+ T (TYPE, bit_and) \ -+ T (TYPE, bit_or) \ -+ T (TYPE, bit_xor) \ -+ T (TYPE, bit_bic) -+ -+#define TEST_ALL(T) \ -+ TEST_TYPE (T, int8_t) \ -+ TEST_TYPE (T, uint8_t) \ -+ TEST_TYPE (T, int16_t) \ -+ TEST_TYPE (T, uint16_t) \ -+ TEST_TYPE (T, int32_t) \ -+ TEST_TYPE (T, uint32_t) \ -+ TEST_TYPE (T, int64_t) \ -+ TEST_TYPE (T, uint64_t) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ -+ -+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ -+ -+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ -+ -+/* { dg-final { scan-assembler-times {\tbic\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tbic\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tbic\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tbic\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ -+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_1_run.c -new file mode 100644 -index 000000000..cb12e5609 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_1_run.c -@@ -0,0 +1,33 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_logical_1.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(TYPE, OP) \ -+ { \ -+ TYPE r[N], a[N], b[N], c[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ a[i] = (i & 1 ? i : 3 * i); \ -+ b[i] = (i >> 4) << (i & 15); \ -+ c[i] = ((i + 2) % 3) * (i + 1); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##TYPE##_##OP (r, a, b, c, N); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ TYPE expected = a[i] < 20 ? OP (b[i], c[i]) : b[i]; \ -+ if (r[i] != expected) \ -+ __builtin_abort (); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ } -+ -+int -+main (void) -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_2.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_2.c -new file mode 100644 -index 000000000..c476fe2ff ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_2.c -@@ -0,0 +1,63 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define bit_and(A, B) ((A) & (B)) -+#define bit_or(A, B) ((A) | (B)) -+#define bit_xor(A, B) ((A) ^ (B)) -+#define bit_bic(A, B) ((A) & ~(B)) -+ -+#define DEF_LOOP(TYPE, OP) \ -+ void __attribute__ ((noinline, noclone)) \ -+ test_##TYPE##_##OP (TYPE *__restrict r, \ -+ TYPE *__restrict a, \ -+ TYPE *__restrict b, \ -+ TYPE *__restrict c, int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ r[i] = a[i] < 20 ? OP (b[i], c[i]) : c[i]; \ -+ } -+ -+#define TEST_TYPE(T, TYPE) \ -+ T (TYPE, bit_and) \ -+ T (TYPE, bit_or) \ -+ T (TYPE, bit_xor) \ -+ T (TYPE, bit_bic) -+ -+#define TEST_ALL(T) \ -+ TEST_TYPE (T, int8_t) \ -+ TEST_TYPE (T, uint8_t) \ -+ TEST_TYPE (T, int16_t) \ -+ TEST_TYPE (T, uint16_t) \ -+ TEST_TYPE (T, int32_t) \ -+ TEST_TYPE (T, uint32_t) \ -+ TEST_TYPE (T, int64_t) \ -+ TEST_TYPE (T, uint64_t) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ -+ -+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ -+ -+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ -+ -+/* { dg-final { scan-assembler-times {\tbic\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tbic\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tbic\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tbic\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ -+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ -+/* There's no BICR or equivalent, so the BIC functions need a select. */ -+/* { dg-final { scan-assembler-times {\tsel\t} 8 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_2_run.c -new file mode 100644 -index 000000000..9b9918cc8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_2_run.c -@@ -0,0 +1,33 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_logical_2.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(TYPE, OP) \ -+ { \ -+ TYPE r[N], a[N], b[N], c[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ a[i] = (i & 1 ? i : 3 * i); \ -+ b[i] = (i >> 4) << (i & 15); \ -+ c[i] = ((i + 2) % 3) * (i + 1); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##TYPE##_##OP (r, a, b, c, N); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ TYPE expected = a[i] < 20 ? OP (b[i], c[i]) : c[i]; \ -+ if (r[i] != expected) \ -+ __builtin_abort (); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ } -+ -+int -+main (void) -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_3.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_3.c -new file mode 100644 -index 000000000..7ad2c4ea3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_3.c -@@ -0,0 +1,66 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define bit_and(A, B) ((A) & (B)) -+#define bit_or(A, B) ((A) | (B)) -+#define bit_xor(A, B) ((A) ^ (B)) -+#define bit_bic(A, B) ((A) & ~(B)) -+ -+#define DEF_LOOP(TYPE, OP) \ -+ void __attribute__ ((noinline, noclone)) \ -+ test_##TYPE##_##OP (TYPE *__restrict r, \ -+ TYPE *__restrict a, \ -+ TYPE *__restrict b, \ -+ TYPE *__restrict c, int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ r[i] = a[i] < 20 ? OP (b[i], c[i]) : a[i]; \ -+ } -+ -+#define TEST_TYPE(T, TYPE) \ -+ T (TYPE, bit_and) \ -+ T (TYPE, bit_or) \ -+ T (TYPE, bit_xor) \ -+ T (TYPE, bit_bic) -+ -+#define TEST_ALL(T) \ -+ TEST_TYPE (T, int8_t) \ -+ TEST_TYPE (T, uint8_t) \ -+ TEST_TYPE (T, int16_t) \ -+ TEST_TYPE (T, uint16_t) \ -+ TEST_TYPE (T, int32_t) \ -+ TEST_TYPE (T, uint32_t) \ -+ TEST_TYPE (T, int64_t) \ -+ TEST_TYPE (T, uint64_t) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ -+ -+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ -+ -+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ -+ -+/* { dg-final { scan-assembler-times {\tbic\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tbic\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tbic\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tbic\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ -+ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b\n} 8 } } */ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 8 } } */ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 8 } } */ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 8 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_3_run.c -new file mode 100644 -index 000000000..05dc78ab3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_3_run.c -@@ -0,0 +1,33 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_logical_3.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(TYPE, OP) \ -+ { \ -+ TYPE r[N], a[N], b[N], c[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ a[i] = (i & 1 ? i : 3 * i); \ -+ b[i] = (i >> 4) << (i & 15); \ -+ c[i] = ((i + 2) % 3) * (i + 1); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##TYPE##_##OP (r, a, b, c, N); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ TYPE expected = a[i] < 20 ? OP (b[i], c[i]) : a[i]; \ -+ if (r[i] != expected) \ -+ __builtin_abort (); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ } -+ -+int -+main (void) -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_4.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_4.c -new file mode 100644 -index 000000000..00217bffa ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_4.c -@@ -0,0 +1,62 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define bit_and(A, B) ((A) & (B)) -+#define bit_or(A, B) ((A) | (B)) -+#define bit_xor(A, B) ((A) ^ (B)) -+#define bit_bic(A, B) ((A) & ~(B)) -+ -+#define DEF_LOOP(TYPE, OP) \ -+ void __attribute__ ((noinline, noclone)) \ -+ test_##TYPE##_##OP (TYPE *__restrict r, \ -+ TYPE *__restrict a, \ -+ TYPE *__restrict b, \ -+ TYPE *__restrict c, int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ r[i] = a[i] < 20 ? OP (b[i], c[i]) : 42; \ -+ } -+ -+#define TEST_TYPE(T, TYPE) \ -+ T (TYPE, bit_and) \ -+ T (TYPE, bit_or) \ -+ T (TYPE, bit_xor) \ -+ T (TYPE, bit_bic) -+ -+#define TEST_ALL(T) \ -+ TEST_TYPE (T, int8_t) \ -+ TEST_TYPE (T, uint8_t) \ -+ TEST_TYPE (T, int16_t) \ -+ TEST_TYPE (T, uint16_t) \ -+ TEST_TYPE (T, int32_t) \ -+ TEST_TYPE (T, uint32_t) \ -+ TEST_TYPE (T, int64_t) \ -+ TEST_TYPE (T, uint64_t) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ -+ -+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ -+ -+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ -+ -+/* { dg-final { scan-assembler-times {\tbic\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tbic\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tbic\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tbic\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ -+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ -+/* { dg-final { scan-assembler-times {\tsel\t} 32 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_4_run.c -new file mode 100644 -index 000000000..46fb11594 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_4_run.c -@@ -0,0 +1,33 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_logical_4.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(TYPE, OP) \ -+ { \ -+ TYPE r[N], a[N], b[N], c[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ a[i] = (i & 1 ? i : 3 * i); \ -+ b[i] = (i >> 4) << (i & 15); \ -+ c[i] = ((i + 2) % 3) * (i + 1); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##TYPE##_##OP (r, a, b, c, N); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ TYPE expected = a[i] < 20 ? OP (b[i], c[i]) : 42; \ -+ if (r[i] != expected) \ -+ __builtin_abort (); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ } -+ -+int -+main (void) -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_5.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_5.c -new file mode 100644 -index 000000000..36b541f21 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_5.c -@@ -0,0 +1,66 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define bit_and(A, B) ((A) & (B)) -+#define bit_or(A, B) ((A) | (B)) -+#define bit_xor(A, B) ((A) ^ (B)) -+#define bit_bic(A, B) ((A) & ~(B)) -+ -+#define DEF_LOOP(TYPE, OP) \ -+ void __attribute__ ((noinline, noclone)) \ -+ test_##TYPE##_##OP (TYPE *__restrict r, \ -+ TYPE *__restrict a, \ -+ TYPE *__restrict b, \ -+ TYPE *__restrict c, int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ r[i] = a[i] < 20 ? OP (b[i], c[i]) : 0; \ -+ } -+ -+#define TEST_TYPE(T, TYPE) \ -+ T (TYPE, bit_and) \ -+ T (TYPE, bit_or) \ -+ T (TYPE, bit_xor) \ -+ T (TYPE, bit_bic) -+ -+#define TEST_ALL(T) \ -+ TEST_TYPE (T, int8_t) \ -+ TEST_TYPE (T, uint8_t) \ -+ TEST_TYPE (T, int16_t) \ -+ TEST_TYPE (T, uint16_t) \ -+ TEST_TYPE (T, int32_t) \ -+ TEST_TYPE (T, uint32_t) \ -+ TEST_TYPE (T, int64_t) \ -+ TEST_TYPE (T, uint64_t) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ -+ -+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ -+ -+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ -+ -+/* { dg-final { scan-assembler-times {\tbic\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tbic\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tbic\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tbic\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ -+ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.b, p[0-7]/z, z[0-9]+\.b\n} 8 } } */ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 8 } } */ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 8 } } */ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/z, z[0-9]+\.d\n} 8 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_5_run.c -new file mode 100644 -index 000000000..e0da5fe58 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_logical_5_run.c -@@ -0,0 +1,33 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_logical_5.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(TYPE, OP) \ -+ { \ -+ TYPE r[N], a[N], b[N], c[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ a[i] = (i & 1 ? i : 3 * i); \ -+ b[i] = (i >> 4) << (i & 15); \ -+ c[i] = ((i + 2) % 3) * (i + 1); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##TYPE##_##OP (r, a, b, c, N); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ TYPE expected = a[i] < 20 ? OP (b[i], c[i]) : 0; \ -+ if (r[i] != expected) \ -+ __builtin_abort (); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ } -+ -+int -+main (void) -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_1.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_1.c -new file mode 100644 -index 000000000..cb01d50f3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_1.c -@@ -0,0 +1,52 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define DEF_LOOP(TYPE, NAME, OP) \ -+ void __attribute__ ((noipa)) \ -+ test_##TYPE##_##NAME (TYPE *__restrict r, \ -+ TYPE *__restrict a, \ -+ TYPE *__restrict b, TYPE c, \ -+ TYPE *__restrict pred, int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ r[i] = pred[i] != 1 ? a[i] OP b[i] * c : b[i]; \ -+ } -+ -+#define TEST_TYPE(T, TYPE) \ -+ T (TYPE, add, +) \ -+ T (TYPE, sub, -) -+ -+#define TEST_ALL(T) \ -+ TEST_TYPE (T, uint8_t) \ -+ TEST_TYPE (T, uint16_t) \ -+ TEST_TYPE (T, uint32_t) \ -+ TEST_TYPE (T, uint64_t) \ -+ TEST_TYPE (T, _Float16) \ -+ TEST_TYPE (T, float) \ -+ TEST_TYPE (T, double) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tmad\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tmad\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tmad\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tmad\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tmsb\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tmsb\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tmsb\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tmsb\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ -+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_1_run.c -new file mode 100644 -index 000000000..bcfc62280 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_1_run.c -@@ -0,0 +1,35 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_mla_1.c" -+ -+#define FACTOR 17 -+#define N 99 -+ -+#define TEST_LOOP(TYPE, NAME, OP) \ -+ { \ -+ TYPE r[N], a[N], b[N], pred[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ a[i] = (i & 1 ? i : 3 * i); \ -+ b[i] = (i >> 4) << (i & 15); \ -+ pred[i] = i % 3 < i % 5; \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##TYPE##_##NAME (r, a, b, FACTOR, pred, N); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ TYPE expected \ -+ = pred[i] != 1 ? a[i] OP b[i] * (TYPE) FACTOR : b[i]; \ -+ if (r[i] != expected) \ -+ __builtin_abort (); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ } -+ -+int -+main (void) -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_2.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_2.c -new file mode 100644 -index 000000000..b6ea1a3e2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_2.c -@@ -0,0 +1,53 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define DEF_LOOP(TYPE, NAME, OP) \ -+ void __attribute__ ((noipa)) \ -+ test_##TYPE##_##NAME (TYPE *__restrict r, \ -+ TYPE *__restrict a, \ -+ TYPE *__restrict b, TYPE c, \ -+ TYPE *__restrict pred, int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ r[i] = pred[i] != 1 ? a[i] OP b[i] * c : c; \ -+ } -+ -+#define TEST_TYPE(T, TYPE) \ -+ T (TYPE, add, +) \ -+ T (TYPE, sub, -) -+ -+#define TEST_ALL(T) \ -+ TEST_TYPE (T, uint8_t) \ -+ TEST_TYPE (T, uint16_t) \ -+ TEST_TYPE (T, uint32_t) \ -+ TEST_TYPE (T, uint64_t) \ -+ TEST_TYPE (T, _Float16) \ -+ TEST_TYPE (T, float) \ -+ TEST_TYPE (T, double) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tmad\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tmad\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tmad\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tmad\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tmsb\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tmsb\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tmsb\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tmsb\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmad\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmsb\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+, z[0-9]+\n} 14 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_2_run.c -new file mode 100644 -index 000000000..79998b84e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_2_run.c -@@ -0,0 +1,36 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_mla_2.c" -+ -+#define FACTOR 17 -+#define N 99 -+ -+#define TEST_LOOP(TYPE, NAME, OP) \ -+ { \ -+ TYPE r[N], a[N], b[N], pred[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ a[i] = (i & 1 ? i : 3 * i); \ -+ b[i] = (i >> 4) << (i & 15); \ -+ pred[i] = i % 3 < i % 5; \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##TYPE##_##NAME (r, a, b, FACTOR, pred, N); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ TYPE expected = (pred[i] != 1 \ -+ ? a[i] OP b[i] * (TYPE) FACTOR \ -+ : (TYPE) FACTOR); \ -+ if (r[i] != expected) \ -+ __builtin_abort (); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ } -+ -+int -+main (void) -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_3.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_3.c -new file mode 100644 -index 000000000..085fccf53 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_3.c -@@ -0,0 +1,52 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define DEF_LOOP(TYPE, NAME, OP) \ -+ void __attribute__ ((noipa)) \ -+ test_##TYPE##_##NAME (TYPE *__restrict r, \ -+ TYPE *__restrict a, \ -+ TYPE *__restrict b, TYPE c, \ -+ TYPE *__restrict pred, int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ r[i] = pred[i] != 1 ? a[i] OP b[i] * c : a[i]; \ -+ } -+ -+#define TEST_TYPE(T, TYPE) \ -+ T (TYPE, add, +) \ -+ T (TYPE, sub, -) -+ -+#define TEST_ALL(T) \ -+ TEST_TYPE (T, uint8_t) \ -+ TEST_TYPE (T, uint16_t) \ -+ TEST_TYPE (T, uint32_t) \ -+ TEST_TYPE (T, uint64_t) \ -+ TEST_TYPE (T, _Float16) \ -+ TEST_TYPE (T, float) \ -+ TEST_TYPE (T, double) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tmla\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tmla\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tmla\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tmla\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tmls\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tmls\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tmls\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tmls\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ -+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_3_run.c -new file mode 100644 -index 000000000..cbd1185b2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_3_run.c -@@ -0,0 +1,35 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_mla_3.c" -+ -+#define FACTOR 17 -+#define N 99 -+ -+#define TEST_LOOP(TYPE, NAME, OP) \ -+ { \ -+ TYPE r[N], a[N], b[N], pred[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ a[i] = (i & 1 ? i : 3 * i); \ -+ b[i] = (i >> 4) << (i & 15); \ -+ pred[i] = i % 3 < i % 5; \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##TYPE##_##NAME (r, a, b, FACTOR, pred, N); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ TYPE expected \ -+ = pred[i] != 1 ? a[i] OP b[i] * (TYPE) FACTOR : a[i]; \ -+ if (r[i] != expected) \ -+ __builtin_abort (); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ } -+ -+int -+main (void) -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_4.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_4.c -new file mode 100644 -index 000000000..ed9f73e9c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_4.c -@@ -0,0 +1,56 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define DEF_LOOP(TYPE, NAME, OP) \ -+ void __attribute__ ((noipa)) \ -+ test_##TYPE##_##NAME (TYPE *__restrict r, \ -+ TYPE *__restrict a, \ -+ TYPE *__restrict b, TYPE c, \ -+ TYPE *__restrict pred, int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ r[i] = pred[i] == 1 ? a[i] OP b[i] * c : pred[i]; \ -+ } -+ -+#define TEST_TYPE(T, TYPE) \ -+ T (TYPE, add, +) \ -+ T (TYPE, sub, -) -+ -+#define TEST_ALL(T) \ -+ TEST_TYPE (T, uint8_t) \ -+ TEST_TYPE (T, uint16_t) \ -+ TEST_TYPE (T, uint32_t) \ -+ TEST_TYPE (T, uint64_t) \ -+ TEST_TYPE (T, _Float16) \ -+ TEST_TYPE (T, float) \ -+ TEST_TYPE (T, double) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tmla\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tmla\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tmla\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tmla\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tmls\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tmls\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tmls\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tmls\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m,} 4 } } */ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m,} 4 } } */ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/m,} 4 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_4_run.c -new file mode 100644 -index 000000000..5e078594a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_4_run.c -@@ -0,0 +1,36 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_mla_4.c" -+ -+#define FACTOR 17 -+#define N 99 -+ -+#define TEST_LOOP(TYPE, NAME, OP) \ -+ { \ -+ TYPE r[N], a[N], b[N], pred[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ a[i] = (i & 1 ? i : 3 * i); \ -+ b[i] = (i >> 4) << (i & 15); \ -+ pred[i] = i % 3; \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##TYPE##_##NAME (r, a, b, FACTOR, pred, N); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ TYPE expected = (pred[i] == 1 \ -+ ? a[i] OP b[i] * (TYPE) FACTOR \ -+ : pred[i]); \ -+ if (r[i] != expected) \ -+ __builtin_abort (); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ } -+ -+int -+main (void) -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_5.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_5.c -new file mode 100644 -index 000000000..97e233579 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_5.c -@@ -0,0 +1,56 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define DEF_LOOP(TYPE, NAME, OP) \ -+ void __attribute__ ((noipa)) \ -+ test_##TYPE##_##NAME (TYPE *__restrict r, \ -+ TYPE *__restrict a, \ -+ TYPE *__restrict b, TYPE c, \ -+ TYPE *__restrict pred, int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ r[i] = pred[i] ? a[i] OP b[i] * c : 0; \ -+ } -+ -+#define TEST_TYPE(T, TYPE) \ -+ T (TYPE, add, +) \ -+ T (TYPE, sub, -) -+ -+#define TEST_ALL(T) \ -+ TEST_TYPE (T, uint8_t) \ -+ TEST_TYPE (T, uint16_t) \ -+ TEST_TYPE (T, uint32_t) \ -+ TEST_TYPE (T, uint64_t) \ -+ TEST_TYPE (T, _Float16) \ -+ TEST_TYPE (T, float) \ -+ TEST_TYPE (T, double) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\t(?:mla|mad)\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\t(?:mla|mad)\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\t(?:mla|mad)\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\t(?:mla|mad)\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\t(?:mls|msb)\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\t(?:mls|msb)\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\t(?:mls|msb)\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\t(?:mls|msb)\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\t(?:fmla|fmad)\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\t(?:fmla|fmad)\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\t(?:fmla|fmad)\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\t(?:fmls|fmsb)\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\t(?:fmls|fmsb)\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\t(?:fmls|fmsb)\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.b, p[0-7]/z,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z,} 4 } } */ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z,} 4 } } */ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/z,} 4 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_5_run.c -new file mode 100644 -index 000000000..9de46e30f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_5_run.c -@@ -0,0 +1,35 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_mla_5.c" -+ -+#define FACTOR 17 -+#define N 99 -+ -+#define TEST_LOOP(TYPE, NAME, OP) \ -+ { \ -+ TYPE r[N], a[N], b[N], pred[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ a[i] = (i & 1 ? i : 3 * i); \ -+ b[i] = (i >> 4) << (i & 15); \ -+ pred[i] = i % 3 < i % 5; \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##TYPE##_##NAME (r, a, b, FACTOR, pred, N); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ TYPE expected \ -+ = pred[i] ? a[i] OP b[i] * (TYPE) FACTOR : 0; \ -+ if (r[i] != expected) \ -+ __builtin_abort (); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ } -+ -+int -+main (void) -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_6.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_6.c -new file mode 100644 -index 000000000..832bdb3d8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_6.c -@@ -0,0 +1,53 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define DEF_LOOP(TYPE, NAME, OP) \ -+ void __attribute__ ((noipa)) \ -+ test_##TYPE##_##NAME (TYPE *__restrict r, \ -+ TYPE *__restrict a, \ -+ TYPE *__restrict b, TYPE c, \ -+ TYPE *__restrict pred, int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ r[i] = pred[i] ? a[i] OP b[i] * c : 5; \ -+ } -+ -+#define TEST_TYPE(T, TYPE) \ -+ T (TYPE, add, +) \ -+ T (TYPE, sub, -) -+ -+#define TEST_ALL(T) \ -+ TEST_TYPE (T, uint8_t) \ -+ TEST_TYPE (T, uint16_t) \ -+ TEST_TYPE (T, uint32_t) \ -+ TEST_TYPE (T, uint64_t) \ -+ TEST_TYPE (T, _Float16) \ -+ TEST_TYPE (T, float) \ -+ TEST_TYPE (T, double) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tmla\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tmla\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tmla\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tmla\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tmls\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tmls\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tmls\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tmls\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmls\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tsel\t} 14 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ -+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_6_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_6_run.c -new file mode 100644 -index 000000000..59f57a2db ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_6_run.c -@@ -0,0 +1,35 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_mla_6.c" -+ -+#define FACTOR 17 -+#define N 99 -+ -+#define TEST_LOOP(TYPE, NAME, OP) \ -+ { \ -+ TYPE r[N], a[N], b[N], pred[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ a[i] = (i & 1 ? i : 3 * i); \ -+ b[i] = (i >> 4) << (i & 15); \ -+ pred[i] = i % 3 < i % 5; \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##TYPE##_##NAME (r, a, b, FACTOR, pred, N); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ TYPE expected \ -+ = pred[i] ? a[i] OP b[i] * (TYPE) FACTOR : 5; \ -+ if (r[i] != expected) \ -+ __builtin_abort (); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ } -+ -+int -+main (void) -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_7.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_7.c -new file mode 100644 -index 000000000..5561f4219 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_7.c -@@ -0,0 +1,62 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define DEF_LOOP(TYPE, NAME, OP, CONST) \ -+ void __attribute__ ((noipa)) \ -+ test_##TYPE##_##NAME##_##CONST (TYPE *__restrict r, \ -+ TYPE *__restrict a, \ -+ TYPE *__restrict b, \ -+ TYPE *__restrict pred, int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ r[i] = pred[i] != 1 ? a[i] OP b[i] * CONST : a[i]; \ -+ } -+ -+#define TEST_COUNT(T, TYPE, CONST) \ -+ T (TYPE, add, +, CONST) \ -+ T (TYPE, sub, -, CONST) -+ -+#define TEST_TYPE(T, TYPE, CONST) \ -+ TEST_COUNT (T, TYPE, 2) \ -+ TEST_COUNT (T, TYPE, 4) \ -+ TEST_COUNT (T, TYPE, CONST) -+ -+#define TEST_ALL(T) \ -+ TEST_TYPE (T, uint8_t, 0x80) \ -+ TEST_TYPE (T, uint16_t, 0x8000) \ -+ TEST_TYPE (T, uint32_t, 0x80000000) \ -+ TEST_TYPE (T, uint64_t, 0x8000000000000000ULL) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, z[0-9]+\.b, #1\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, z[0-9]+\.b, #2\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, z[0-9]+\.b, #7\n} 2 } } */ -+ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, z[0-9]+\.h, #1\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, z[0-9]+\.h, #2\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, z[0-9]+\.h, #15\n} 2 } } */ -+ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, z[0-9]+\.s, #1\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, z[0-9]+\.s, #2\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, z[0-9]+\.s, #31\n} 2 } } */ -+ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, z[0-9]+\.d, #1\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, z[0-9]+\.d, #2\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, z[0-9]+\.d, #63\n} 2 } } */ -+ -+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 4 } } */ -+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ -+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */ -+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */ -+ -+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ -+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_7_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_7_run.c -new file mode 100644 -index 000000000..b094f40a2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_7_run.c -@@ -0,0 +1,34 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_mla_7.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(TYPE, NAME, OP, CONST) \ -+ { \ -+ TYPE r[N], a[N], b[N], pred[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ a[i] = (i & 1 ? i : 3 * i); \ -+ b[i] = (i >> 4) << (i & 15); \ -+ pred[i] = i % 3 < i % 5; \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##TYPE##_##NAME##_##CONST (r, a, b, pred, N); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ TYPE expected \ -+ = pred[i] != 1 ? a[i] OP b[i] * CONST : a[i]; \ -+ if (r[i] != expected) \ -+ __builtin_abort (); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ } -+ -+int -+main (void) -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_8.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_8.c -new file mode 100644 -index 000000000..d5549272e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_8.c -@@ -0,0 +1,62 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define DEF_LOOP(TYPE, NAME, OP, CONST) \ -+ void __attribute__ ((noipa)) \ -+ test_##TYPE##_##NAME##_##CONST (TYPE *__restrict r, \ -+ TYPE *__restrict a, \ -+ TYPE *__restrict b, \ -+ TYPE *__restrict pred, int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ r[i] = pred[i] != 1 ? a[i] OP b[i] * -CONST : a[i]; \ -+ } -+ -+#define TEST_COUNT(T, TYPE, CONST) \ -+ T (TYPE, add, +, CONST) \ -+ T (TYPE, sub, -, CONST) -+ -+#define TEST_TYPE(T, TYPE, CONST) \ -+ TEST_COUNT (T, TYPE, 2) \ -+ TEST_COUNT (T, TYPE, 4) \ -+ TEST_COUNT (T, TYPE, CONST) -+ -+#define TEST_ALL(T) \ -+ TEST_TYPE (T, uint8_t, 0x80) \ -+ TEST_TYPE (T, uint16_t, 0x8000) \ -+ TEST_TYPE (T, uint32_t, 0x80000000) \ -+ TEST_TYPE (T, uint64_t, 0x8000000000000000ULL) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, z[0-9]+\.b, #1\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, z[0-9]+\.b, #2\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, z[0-9]+\.b, #7\n} 2 } } */ -+ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, z[0-9]+\.h, #1\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, z[0-9]+\.h, #2\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, z[0-9]+\.h, #15\n} 2 } } */ -+ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, z[0-9]+\.s, #1\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, z[0-9]+\.s, #2\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, z[0-9]+\.s, #31\n} 2 } } */ -+ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, z[0-9]+\.d, #1\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, z[0-9]+\.d, #2\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, z[0-9]+\.d, #63\n} 2 } } */ -+ -+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 4 } } */ -+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ -+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 4 } } */ -+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */ -+ -+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ -+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_8_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_8_run.c -new file mode 100644 -index 000000000..7fb58aa70 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_mla_8_run.c -@@ -0,0 +1,34 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_mla_8.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(TYPE, NAME, OP, CONST) \ -+ { \ -+ TYPE r[N], a[N], b[N], pred[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ a[i] = (i & 1 ? i : 3 * i); \ -+ b[i] = (i >> 4) << (i & 15); \ -+ pred[i] = i % 3 < i % 5; \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##TYPE##_##NAME##_##CONST (r, a, b, pred, N); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ TYPE expected \ -+ = pred[i] != 1 ? a[i] OP b[i] * -CONST : a[i]; \ -+ if (r[i] != expected) \ -+ __builtin_abort (); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ } -+ -+int -+main (void) -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_1.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_1.c -new file mode 100644 -index 000000000..f2c51b291 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_1.c -@@ -0,0 +1,48 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define DEF_LOOP(TYPE, NAME, OP) \ -+ void __attribute__ ((noipa)) \ -+ test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \ -+ TYPE *__restrict b, int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ r[i] = a[i] > 20 ? b[i] OP 3 : b[i]; \ -+ } -+ -+#define TEST_TYPE(T, TYPE) \ -+ T (TYPE, shl, <<) \ -+ T (TYPE, shr, >>) -+ -+#define TEST_ALL(T) \ -+ TEST_TYPE (T, int8_t) \ -+ TEST_TYPE (T, uint8_t) \ -+ TEST_TYPE (T, int16_t) \ -+ TEST_TYPE (T, uint16_t) \ -+ TEST_TYPE (T, int32_t) \ -+ TEST_TYPE (T, uint32_t) \ -+ TEST_TYPE (T, int64_t) \ -+ TEST_TYPE (T, uint64_t) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ -+ -+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ -+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_1_run.c -new file mode 100644 -index 000000000..acc403ec8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_1_run.c -@@ -0,0 +1,27 @@ -+/* { dg-do run { target { aarch64_sve_hw } } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_shift_1.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(TYPE, NAME, OP) \ -+ { \ -+ TYPE r[N], a[N], b[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ a[i] = (i & 1 ? i : 3 * i); \ -+ b[i] = (i >> 4) << (i & 15); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##TYPE##_##NAME (r, a, b, N); \ -+ for (int i = 0; i < N; ++i) \ -+ if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP 3 : b[i])) \ -+ __builtin_abort (); \ -+ } -+ -+int main () -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_2.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_2.c -new file mode 100644 -index 000000000..c9082c9c8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_2.c -@@ -0,0 +1,52 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define DEF_LOOP(TYPE, NAME, OP) \ -+ void __attribute__ ((noipa)) \ -+ test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \ -+ TYPE *__restrict b, int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ r[i] = a[i] > 20 ? b[i] OP 3 : a[i]; \ -+ } -+ -+#define TEST_TYPE(T, TYPE) \ -+ T (TYPE, shl, <<) \ -+ T (TYPE, shr, >>) -+ -+#define TEST_ALL(T) \ -+ TEST_TYPE (T, int8_t) \ -+ TEST_TYPE (T, uint8_t) \ -+ TEST_TYPE (T, int16_t) \ -+ TEST_TYPE (T, uint16_t) \ -+ TEST_TYPE (T, int32_t) \ -+ TEST_TYPE (T, uint32_t) \ -+ TEST_TYPE (T, int64_t) \ -+ TEST_TYPE (T, uint64_t) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ -+ -+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b\n} 4 } } */ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h\n} 4 } } */ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 4 } } */ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 4 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_2_run.c -new file mode 100644 -index 000000000..4917d3af6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_2_run.c -@@ -0,0 +1,27 @@ -+/* { dg-do run { target { aarch64_sve_hw } } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_shift_2.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(TYPE, NAME, OP) \ -+ { \ -+ TYPE r[N], a[N], b[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ a[i] = (i & 1 ? i : 3 * i); \ -+ b[i] = (i >> 4) << (i & 15); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##TYPE##_##NAME (r, a, b, N); \ -+ for (int i = 0; i < N; ++i) \ -+ if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP 3 : a[i])) \ -+ __builtin_abort (); \ -+ } -+ -+int main () -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_3.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_3.c -new file mode 100644 -index 000000000..55e0de8aa ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_3.c -@@ -0,0 +1,48 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define DEF_LOOP(TYPE, NAME, OP) \ -+ void __attribute__ ((noipa)) \ -+ test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \ -+ TYPE *__restrict b, int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ r[i] = a[i] > 20 ? b[i] OP 3 : 72; \ -+ } -+ -+#define TEST_TYPE(T, TYPE) \ -+ T (TYPE, shl, <<) \ -+ T (TYPE, shr, >>) -+ -+#define TEST_ALL(T) \ -+ TEST_TYPE (T, int8_t) \ -+ TEST_TYPE (T, uint8_t) \ -+ TEST_TYPE (T, int16_t) \ -+ TEST_TYPE (T, uint16_t) \ -+ TEST_TYPE (T, int32_t) \ -+ TEST_TYPE (T, uint32_t) \ -+ TEST_TYPE (T, int64_t) \ -+ TEST_TYPE (T, uint64_t) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ -+ -+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ -+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ -+/* { dg-final { scan-assembler-times {\tsel\t} 16 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_3_run.c -new file mode 100644 -index 000000000..194c75b8d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_3_run.c -@@ -0,0 +1,27 @@ -+/* { dg-do run { target { aarch64_sve_hw } } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_shift_3.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(TYPE, NAME, OP) \ -+ { \ -+ TYPE r[N], a[N], b[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ a[i] = (i & 1 ? i : 3 * i); \ -+ b[i] = (i >> 4) << (i & 15); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##TYPE##_##NAME (r, a, b, N); \ -+ for (int i = 0; i < N; ++i) \ -+ if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP 3 : 72)) \ -+ __builtin_abort (); \ -+ } -+ -+int main () -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_4.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_4.c -new file mode 100644 -index 000000000..32dd68199 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_4.c -@@ -0,0 +1,52 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define DEF_LOOP(TYPE, NAME, OP) \ -+ void __attribute__ ((noipa)) \ -+ test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \ -+ TYPE *__restrict b, int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ r[i] = a[i] > 20 ? b[i] OP 3 : 0; \ -+ } -+ -+#define TEST_TYPE(T, TYPE) \ -+ T (TYPE, shl, <<) \ -+ T (TYPE, shr, >>) -+ -+#define TEST_ALL(T) \ -+ TEST_TYPE (T, int8_t) \ -+ TEST_TYPE (T, uint8_t) \ -+ TEST_TYPE (T, int16_t) \ -+ TEST_TYPE (T, uint16_t) \ -+ TEST_TYPE (T, int32_t) \ -+ TEST_TYPE (T, uint32_t) \ -+ TEST_TYPE (T, int64_t) \ -+ TEST_TYPE (T, uint64_t) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ -+ -+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.b, p[0-7]/z, z[0-9]+\.b\n} 4 } } */ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-7]/z, z[0-9]+\.h\n} 4 } } */ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 4 } } */ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/z, z[0-9]+\.d\n} 4 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_4_run.c -new file mode 100644 -index 000000000..ee263000d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_4_run.c -@@ -0,0 +1,27 @@ -+/* { dg-do run { target { aarch64_sve_hw } } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_shift_4.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(TYPE, NAME, OP) \ -+ { \ -+ TYPE r[N], a[N], b[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ a[i] = (i & 1 ? i : 3 * i); \ -+ b[i] = (i >> 4) << (i & 15); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##TYPE##_##NAME (r, a, b, N); \ -+ for (int i = 0; i < N; ++i) \ -+ if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP 3 : 0)) \ -+ __builtin_abort (); \ -+ } -+ -+int main () -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_5.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_5.c -new file mode 100644 -index 000000000..1d4491531 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_5.c -@@ -0,0 +1,38 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define DEF_LOOP(TYPE, NAME, OP) \ -+ void __attribute__ ((noipa)) \ -+ test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \ -+ TYPE *__restrict b, TYPE *__restrict c, int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ r[i] = a[i] > 20 ? b[i] OP c[i] : b[i]; \ -+ } -+ -+#define TEST_TYPE(T, TYPE) \ -+ T (TYPE, shl, <<) \ -+ T (TYPE, shr, >>) -+ -+#define TEST_ALL(T) \ -+ TEST_TYPE (T, int32_t) \ -+ TEST_TYPE (T, uint32_t) \ -+ TEST_TYPE (T, int64_t) \ -+ TEST_TYPE (T, uint64_t) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ -+ -+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ -+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_5_run.c -new file mode 100644 -index 000000000..35bf1b871 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_5_run.c -@@ -0,0 +1,28 @@ -+/* { dg-do run { target { aarch64_sve_hw } } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_shift_5.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(TYPE, NAME, OP) \ -+ { \ -+ TYPE r[N], a[N], b[N], c[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ a[i] = (i & 1 ? i : 3 * i); \ -+ b[i] = (i >> 4) << (i & 15); \ -+ c[i] = ~i & 7; \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##TYPE##_##NAME (r, a, b, c, N); \ -+ for (int i = 0; i < N; ++i) \ -+ if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP c[i] : b[i])) \ -+ __builtin_abort (); \ -+ } -+ -+int main () -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_6.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_6.c -new file mode 100644 -index 000000000..35cb67677 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_6.c -@@ -0,0 +1,33 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define DEF_LOOP(TYPE, NAME, OP) \ -+ void __attribute__ ((noipa)) \ -+ test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \ -+ TYPE *__restrict b, TYPE *__restrict c, int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ r[i] = a[i] > 20 ? b[i] OP c[i] : c[i]; \ -+ } -+ -+#define TEST_TYPE(T, TYPE) \ -+ T (TYPE, shl, <<) \ -+ T (TYPE, shr, >>) -+ -+#define TEST_ALL(T) \ -+ TEST_TYPE (T, int32_t) \ -+ TEST_TYPE (T, uint32_t) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tlslr\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ -+ -+/* { dg-final { scan-assembler-times {\tasrr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tlsrr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ -+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_6_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_6_run.c -new file mode 100644 -index 000000000..e601c6156 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_6_run.c -@@ -0,0 +1,28 @@ -+/* { dg-do run { target { aarch64_sve_hw } } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_shift_6.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(TYPE, NAME, OP) \ -+ { \ -+ TYPE r[N], a[N], b[N], c[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ a[i] = (i & 1 ? i : 3 * i); \ -+ b[i] = (i >> 4) << (i & 15); \ -+ c[i] = ~i & 7; \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##TYPE##_##NAME (r, a, b, c, N); \ -+ for (int i = 0; i < N; ++i) \ -+ if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP c[i] : c[i])) \ -+ __builtin_abort (); \ -+ } -+ -+int main () -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_7.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_7.c -new file mode 100644 -index 000000000..80154b25e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_7.c -@@ -0,0 +1,40 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define DEF_LOOP(TYPE, NAME, OP) \ -+ void __attribute__ ((noipa)) \ -+ test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \ -+ TYPE *__restrict b, TYPE *__restrict c, int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ r[i] = a[i] > 20 ? b[i] OP c[i] : a[i]; \ -+ } -+ -+#define TEST_TYPE(T, TYPE) \ -+ T (TYPE, shl, <<) \ -+ T (TYPE, shr, >>) -+ -+#define TEST_ALL(T) \ -+ TEST_TYPE (T, int32_t) \ -+ TEST_TYPE (T, uint32_t) \ -+ TEST_TYPE (T, int64_t) \ -+ TEST_TYPE (T, uint64_t) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ -+ -+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s\n} 4 } } */ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d\n} 4 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_7_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_7_run.c -new file mode 100644 -index 000000000..d23b0093d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_7_run.c -@@ -0,0 +1,28 @@ -+/* { dg-do run { target { aarch64_sve_hw } } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_shift_7.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(TYPE, NAME, OP) \ -+ { \ -+ TYPE r[N], a[N], b[N], c[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ a[i] = (i & 1 ? i : 3 * i); \ -+ b[i] = (i >> 4) << (i & 15); \ -+ c[i] = ~i & 7; \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##TYPE##_##NAME (r, a, b, c, N); \ -+ for (int i = 0; i < N; ++i) \ -+ if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP c[i] : a[i])) \ -+ __builtin_abort (); \ -+ } -+ -+int main () -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_8.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_8.c -new file mode 100644 -index 000000000..b478c0c4f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_8.c -@@ -0,0 +1,38 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define DEF_LOOP(TYPE, NAME, OP) \ -+ void __attribute__ ((noipa)) \ -+ test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \ -+ TYPE *__restrict b, TYPE *__restrict c, int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ r[i] = a[i] > 20 ? b[i] OP c[i] : 91; \ -+ } -+ -+#define TEST_TYPE(T, TYPE) \ -+ T (TYPE, shl, <<) \ -+ T (TYPE, shr, >>) -+ -+#define TEST_ALL(T) \ -+ TEST_TYPE (T, int32_t) \ -+ TEST_TYPE (T, uint32_t) \ -+ TEST_TYPE (T, int64_t) \ -+ TEST_TYPE (T, uint64_t) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ -+ -+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ -+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ -+/* { dg-final { scan-assembler-times {\tsel\t} 8 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_8_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_8_run.c -new file mode 100644 -index 000000000..72e5a7b59 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_8_run.c -@@ -0,0 +1,28 @@ -+/* { dg-do run { target { aarch64_sve_hw } } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_shift_8.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(TYPE, NAME, OP) \ -+ { \ -+ TYPE r[N], a[N], b[N], c[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ a[i] = (i & 1 ? i : 3 * i); \ -+ b[i] = (i >> 4) << (i & 15); \ -+ c[i] = ~i & 7; \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##TYPE##_##NAME (r, a, b, c, N); \ -+ for (int i = 0; i < N; ++i) \ -+ if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP c[i] : 91)) \ -+ __builtin_abort (); \ -+ } -+ -+int main () -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_9.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_9.c -new file mode 100644 -index 000000000..184e93ab8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_9.c -@@ -0,0 +1,40 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define DEF_LOOP(TYPE, NAME, OP) \ -+ void __attribute__ ((noipa)) \ -+ test_##TYPE##_##NAME (TYPE *__restrict r, TYPE *__restrict a, \ -+ TYPE *__restrict b, TYPE *__restrict c, int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ r[i] = a[i] > 20 ? b[i] OP c[i] : 0; \ -+ } -+ -+#define TEST_TYPE(T, TYPE) \ -+ T (TYPE, shl, <<) \ -+ T (TYPE, shr, >>) -+ -+#define TEST_ALL(T) \ -+ TEST_TYPE (T, int32_t) \ -+ TEST_TYPE (T, uint32_t) \ -+ TEST_TYPE (T, int64_t) \ -+ TEST_TYPE (T, uint64_t) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tlslr?\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ -+/* { dg-final { scan-assembler-times {\tlslr?\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ -+ -+/* { dg-final { scan-assembler-times {\tasrr?\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tasrr?\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tlsrr?\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tlsrr?\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-7]/z, z[0-9]+\.s\n} 4 } } */ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-7]/z, z[0-9]+\.d\n} 4 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz[^,]*z} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_9_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_9_run.c -new file mode 100644 -index 000000000..6e41ac4da ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_shift_9_run.c -@@ -0,0 +1,28 @@ -+/* { dg-do run { target { aarch64_sve_hw } } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_shift_9.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(TYPE, NAME, OP) \ -+ { \ -+ TYPE r[N], a[N], b[N], c[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ a[i] = (i & 1 ? i : 3 * i); \ -+ b[i] = (i >> 4) << (i & 15); \ -+ c[i] = ~i & 7; \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##TYPE##_##NAME (r, a, b, c, N); \ -+ for (int i = 0; i < N; ++i) \ -+ if (r[i] != (TYPE) (a[i] > 20 ? b[i] OP c[i] : 0)) \ -+ __builtin_abort (); \ -+ } -+ -+int main () -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_1.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_1.c -new file mode 100644 -index 000000000..2b5f9c345 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_1.c -@@ -0,0 +1,59 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define abs(A) ((A) < 0 ? -(A) : (A)) -+#define neg(A) (-(A)) -+ -+#define DEF_LOOP(TYPE, OP) \ -+ void __attribute__ ((noipa)) \ -+ test_##TYPE##_##OP (TYPE *__restrict r, TYPE *__restrict a, \ -+ TYPE *__restrict pred, int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ r[i] = pred[i] ? OP (a[i]) : a[i]; \ -+ } -+ -+#define TEST_INT_TYPE(T, TYPE) \ -+ T (TYPE, abs) \ -+ T (TYPE, neg) -+ -+#define TEST_FLOAT_TYPE(T, TYPE, SUFFIX) \ -+ T (TYPE, __builtin_fabs##SUFFIX) \ -+ T (TYPE, neg) -+ -+#define TEST_ALL(T) \ -+ TEST_INT_TYPE (T, int8_t) \ -+ TEST_INT_TYPE (T, int16_t) \ -+ TEST_INT_TYPE (T, int32_t) \ -+ TEST_INT_TYPE (T, int64_t) \ -+ TEST_FLOAT_TYPE (T, _Float16, f16) \ -+ TEST_FLOAT_TYPE (T, float, f) \ -+ TEST_FLOAT_TYPE (T, double, ) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tabs\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tabs\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tabs\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tabs\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz} } } */ -+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ -+/* XFAILed because the ?: gets canonicalized so that the operation is in -+ the false arm. */ -+/* { dg-final { scan-assembler-not {\tsel\t} { xfail *-*-* } } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_1_run.c -new file mode 100644 -index 000000000..a6c1a49dd ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_1_run.c -@@ -0,0 +1,27 @@ -+/* { dg-do run { target { aarch64_sve_hw } } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_unary_1.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(TYPE, OP) \ -+ { \ -+ TYPE r[N], a[N], pred[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ a[i] = (i & 1 ? i : 3 * i) * (i % 3 == 0 ? 1 : -1); \ -+ pred[i] = (i % 7 < 4); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##TYPE##_##OP (r, a, pred, N); \ -+ for (int i = 0; i < N; ++i) \ -+ if (r[i] != (pred[i] ? OP (a[i]) : a[i])) \ -+ __builtin_abort (); \ -+ } -+ -+int main () -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_2.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_2.c -new file mode 100644 -index 000000000..97d1b8f5d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_2.c -@@ -0,0 +1,61 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define abs(A) ((A) < 0 ? -(A) : (A)) -+#define neg(A) (-(A)) -+ -+#define DEF_LOOP(TYPE, OP) \ -+ void __attribute__ ((noipa)) \ -+ test_##TYPE##_##OP (TYPE *__restrict r, TYPE *__restrict a, \ -+ TYPE *__restrict b, \ -+ TYPE *__restrict pred, int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ { \ -+ TYPE bi = b[i]; \ -+ r[i] = pred[i] ? OP (a[i]) : bi; \ -+ } \ -+ } -+ -+#define TEST_INT_TYPE(T, TYPE) \ -+ T (TYPE, abs) \ -+ T (TYPE, neg) -+ -+#define TEST_FLOAT_TYPE(T, TYPE, SUFFIX) \ -+ T (TYPE, __builtin_fabs##SUFFIX) \ -+ T (TYPE, neg) -+ -+#define TEST_ALL(T) \ -+ TEST_INT_TYPE (T, int8_t) \ -+ TEST_INT_TYPE (T, int16_t) \ -+ TEST_INT_TYPE (T, int32_t) \ -+ TEST_INT_TYPE (T, int64_t) \ -+ TEST_FLOAT_TYPE (T, _Float16, f16) \ -+ TEST_FLOAT_TYPE (T, float, f) \ -+ TEST_FLOAT_TYPE (T, double, ) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tabs\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tabs\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tabs\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tabs\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz} } } */ -+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_2_run.c -new file mode 100644 -index 000000000..1a385c323 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_2_run.c -@@ -0,0 +1,28 @@ -+/* { dg-do run { target { aarch64_sve_hw } } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_unary_2.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(TYPE, OP) \ -+ { \ -+ TYPE r[N], a[N], b[N], pred[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ a[i] = (i & 1 ? i : 3 * i) * (i % 3 == 0 ? 1 : -1); \ -+ b[i] = (i % 9) * (i % 7 + 1); \ -+ pred[i] = (i % 7 < 4); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##TYPE##_##OP (r, a, b, pred, N); \ -+ for (int i = 0; i < N; ++i) \ -+ if (r[i] != (pred[i] ? OP (a[i]) : b[i])) \ -+ __builtin_abort (); \ -+ } -+ -+int main () -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_3.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_3.c -new file mode 100644 -index 000000000..dde0fdd92 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_3.c -@@ -0,0 +1,58 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define abs(A) ((A) < 0 ? -(A) : (A)) -+#define neg(A) (-(A)) -+ -+#define DEF_LOOP(TYPE, OP) \ -+ void __attribute__ ((noipa)) \ -+ test_##TYPE##_##OP (TYPE *__restrict r, TYPE *__restrict a, \ -+ TYPE *__restrict pred, int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ r[i] = pred[i] ? OP (a[i]) : 5; \ -+ } -+ -+#define TEST_INT_TYPE(T, TYPE) \ -+ T (TYPE, abs) \ -+ T (TYPE, neg) -+ -+#define TEST_FLOAT_TYPE(T, TYPE, SUFFIX) \ -+ T (TYPE, __builtin_fabs##SUFFIX) \ -+ T (TYPE, neg) -+ -+#define TEST_ALL(T) \ -+ TEST_INT_TYPE (T, int8_t) \ -+ TEST_INT_TYPE (T, int16_t) \ -+ TEST_INT_TYPE (T, int32_t) \ -+ TEST_INT_TYPE (T, int64_t) \ -+ TEST_FLOAT_TYPE (T, _Float16, f16) \ -+ TEST_FLOAT_TYPE (T, float, f) \ -+ TEST_FLOAT_TYPE (T, double, ) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tabs\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tabs\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tabs\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tabs\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+, z[0-9]+\n} 14 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz[^\n]*z} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_3_run.c -new file mode 100644 -index 000000000..3c72b239a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_3_run.c -@@ -0,0 +1,27 @@ -+/* { dg-do run { target { aarch64_sve_hw } } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_unary_3.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(TYPE, OP) \ -+ { \ -+ TYPE r[N], a[N], pred[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ a[i] = (i & 1 ? i : 3 * i) * (i % 3 == 0 ? 1 : -1); \ -+ pred[i] = (i % 7 < 4); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##TYPE##_##OP (r, a, pred, N); \ -+ for (int i = 0; i < N; ++i) \ -+ if (r[i] != (pred[i] ? OP (a[i]) : 5)) \ -+ __builtin_abort (); \ -+ } -+ -+int main () -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_4.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_4.c -new file mode 100644 -index 000000000..4604365fb ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_4.c -@@ -0,0 +1,62 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define abs(A) ((A) < 0 ? -(A) : (A)) -+#define neg(A) (-(A)) -+ -+#define DEF_LOOP(TYPE, OP) \ -+ void __attribute__ ((noipa)) \ -+ test_##TYPE##_##OP (TYPE *__restrict r, TYPE *__restrict a, \ -+ TYPE *__restrict pred, int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ r[i] = pred[i] ? OP (a[i]) : 0; \ -+ } -+ -+#define TEST_INT_TYPE(T, TYPE) \ -+ T (TYPE, abs) \ -+ T (TYPE, neg) -+ -+#define TEST_FLOAT_TYPE(T, TYPE, SUFFIX) \ -+ T (TYPE, __builtin_fabs##SUFFIX) \ -+ T (TYPE, neg) -+ -+#define TEST_ALL(T) \ -+ TEST_INT_TYPE (T, int8_t) \ -+ TEST_INT_TYPE (T, int16_t) \ -+ TEST_INT_TYPE (T, int32_t) \ -+ TEST_INT_TYPE (T, int64_t) \ -+ TEST_FLOAT_TYPE (T, _Float16, f16) \ -+ TEST_FLOAT_TYPE (T, float, f) \ -+ TEST_FLOAT_TYPE (T, double, ) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tabs\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tabs\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tabs\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tabs\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ -+ -+/* Really we should be able to use MOVPRFX /z here, but at the moment -+ we're relying on combine to merge a SEL and an arithmetic operation, -+ and the SEL doesn't allow the "false" value to be zero when the "true" -+ value is a register. */ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+, z[0-9]+\n} 14 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz[^\n]*z} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_4_run.c -new file mode 100644 -index 000000000..48d254150 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_4_run.c -@@ -0,0 +1,27 @@ -+/* { dg-do run { target { aarch64_sve_hw } } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_unary_4.c" -+ -+#define N 99 -+ -+#define TEST_LOOP(TYPE, OP) \ -+ { \ -+ TYPE r[N], a[N], pred[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ a[i] = (i & 1 ? i : 3 * i) * (i % 3 == 0 ? 1 : -1); \ -+ pred[i] = (i % 7 < 4); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##TYPE##_##OP (r, a, pred, N); \ -+ for (int i = 0; i < N; ++i) \ -+ if (r[i] != (pred[i] ? OP (a[i]) : 0)) \ -+ __builtin_abort (); \ -+ } -+ -+int main () -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_1.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_1.c -new file mode 100644 -index 000000000..05641199e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_1.c -@@ -0,0 +1,40 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define NUM_ELEMS(TYPE) (320 / sizeof (TYPE)) -+ -+#define DEF_LOOP(TYPE, CONST) \ -+ void __attribute__ ((noipa)) \ -+ test_##CONST##_##TYPE (TYPE *restrict r, TYPE *restrict a, \ -+ TYPE *restrict b) \ -+ { \ -+ for (int i = 0; i < NUM_ELEMS (TYPE); ++i) \ -+ r[i] = a[i] > 20 ? b[i] & CONST : b[i]; \ -+ } -+ -+#define TEST_ALL(T) \ -+ T (uint16_t, 0xff) \ -+ \ -+ T (uint32_t, 0xff) \ -+ T (uint32_t, 0xffff) \ -+ \ -+ T (uint64_t, 0xff) \ -+ T (uint64_t, 0xffff) \ -+ T (uint64_t, 0xffffffff) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+\.h), p[0-7]/z, \[x2,[^L]*\tuxtb\t\1, p[0-7]/m, \1\n} } } */ -+ -+/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+\.s), p[0-7]/z, \[x2,[^L]*\tuxtb\t\1, p[0-7]/m, \1\n} } } */ -+/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+\.s), p[0-7]/z, \[x2,[^L]*\tuxth\t\1, p[0-7]/m, \1\n} } } */ -+ -+/* { dg-final { scan-assembler {\tld1d\t(z[0-9]+\.d), p[0-7]/z, \[x2,[^L]*\tuxtb\t\1, p[0-7]/m, \1\n} } } */ -+/* { dg-final { scan-assembler {\tld1d\t(z[0-9]+\.d), p[0-7]/z, \[x2,[^L]*\tuxth\t\1, p[0-7]/m, \1\n} } } */ -+/* { dg-final { scan-assembler {\tld1d\t(z[0-9]+\.d), p[0-7]/z, \[x2,[^L]*\tuxtw\t\1, p[0-7]/m, \1\n} } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz} } } */ -+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_1_run.c -new file mode 100644 -index 000000000..685f39478 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_1_run.c -@@ -0,0 +1,27 @@ -+/* { dg-do run { target { aarch64_sve_hw } } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_uxt_1.c" -+ -+#define TEST_LOOP(TYPE, CONST) \ -+ { \ -+ TYPE r[NUM_ELEMS (TYPE)]; \ -+ TYPE a[NUM_ELEMS (TYPE)]; \ -+ TYPE b[NUM_ELEMS (TYPE)]; \ -+ for (int i = 0; i < NUM_ELEMS (TYPE); ++i) \ -+ { \ -+ a[i] = (i & 1 ? i : 3 * i); \ -+ b[i] = (i >> 4) << (i & 15); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##CONST##_##TYPE (r, a, b); \ -+ for (int i = 0; i < NUM_ELEMS (TYPE); ++i) \ -+ if (r[i] != (a[i] > 20 ? b[i] & CONST : b[i])) \ -+ __builtin_abort (); \ -+ } -+ -+int main () -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_2.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_2.c -new file mode 100644 -index 000000000..c900498a0 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_2.c -@@ -0,0 +1,40 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define NUM_ELEMS(TYPE) (320 / sizeof (TYPE)) -+ -+#define DEF_LOOP(TYPE, CONST) \ -+ void __attribute__ ((noipa)) \ -+ test_##CONST##_##TYPE (TYPE *restrict r, TYPE *restrict a, \ -+ TYPE *restrict b) \ -+ { \ -+ for (int i = 0; i < NUM_ELEMS (TYPE); ++i) \ -+ r[i] = a[i] > 20 ? b[i] & CONST : a[i]; \ -+ } -+ -+#define TEST_ALL(T) \ -+ T (uint16_t, 0xff) \ -+ \ -+ T (uint32_t, 0xff) \ -+ T (uint32_t, 0xffff) \ -+ \ -+ T (uint64_t, 0xff) \ -+ T (uint64_t, 0xffff) \ -+ T (uint64_t, 0xffffffff) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+\.h), p[0-7]/z, \[x1,[^L]*\tld1h\t(z[0-9]+\.h), p[0-7]/z, \[x2,[^L]*\tuxtb\t\1, p[0-7]/m, \2\n} } } */ -+ -+/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+\.s), p[0-7]/z, \[x1,[^L]*\tld1w\t(z[0-9]+\.s), p[0-7]/z, \[x2,[^L]*\tuxtb\t\1, p[0-7]/m, \2\n} } } */ -+/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+\.s), p[0-7]/z, \[x1,[^L]*\tld1w\t(z[0-9]+\.s), p[0-7]/z, \[x2,[^L]*\tuxth\t\1, p[0-7]/m, \2\n} } } */ -+ -+/* { dg-final { scan-assembler {\tld1d\t(z[0-9]+\.d), p[0-7]/z, \[x1,[^L]*\tld1d\t(z[0-9]+\.d), p[0-7]/z, \[x2,[^L]*\tuxtb\t\1, p[0-7]/m, \2\n} } } */ -+/* { dg-final { scan-assembler {\tld1d\t(z[0-9]+\.d), p[0-7]/z, \[x1,[^L]*\tld1d\t(z[0-9]+\.d), p[0-7]/z, \[x2,[^L]*\tuxth\t\1, p[0-7]/m, \2\n} } } */ -+/* { dg-final { scan-assembler {\tld1d\t(z[0-9]+\.d), p[0-7]/z, \[x1,[^L]*\tld1d\t(z[0-9]+\.d), p[0-7]/z, \[x2,[^L]*\tuxtw\t\1, p[0-7]/m, \2\n} } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz} } } */ -+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_2_run.c -new file mode 100644 -index 000000000..75679cdf9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_2_run.c -@@ -0,0 +1,27 @@ -+/* { dg-do run { target { aarch64_sve_hw } } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_uxt_2.c" -+ -+#define TEST_LOOP(TYPE, CONST) \ -+ { \ -+ TYPE r[NUM_ELEMS (TYPE)]; \ -+ TYPE a[NUM_ELEMS (TYPE)]; \ -+ TYPE b[NUM_ELEMS (TYPE)]; \ -+ for (int i = 0; i < NUM_ELEMS (TYPE); ++i) \ -+ { \ -+ a[i] = (i & 1 ? i : 3 * i); \ -+ b[i] = (i >> 4) << (i & 15); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##CONST##_##TYPE (r, a, b); \ -+ for (int i = 0; i < NUM_ELEMS (TYPE); ++i) \ -+ if (r[i] != (a[i] > 20 ? b[i] & CONST : a[i])) \ -+ __builtin_abort (); \ -+ } -+ -+int main () -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_3.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_3.c -new file mode 100644 -index 000000000..cf1fd0029 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_3.c -@@ -0,0 +1,39 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define NUM_ELEMS(TYPE) (320 / sizeof (TYPE)) -+ -+#define DEF_LOOP(TYPE, CONST) \ -+ void __attribute__ ((noipa)) \ -+ test_##CONST##_##TYPE (TYPE *restrict r, TYPE *restrict a, \ -+ TYPE *restrict b) \ -+ { \ -+ for (int i = 0; i < NUM_ELEMS (TYPE); ++i) \ -+ r[i] = a[i] > 20 ? b[i] & CONST : 127; \ -+ } -+ -+#define TEST_ALL(T) \ -+ T (uint16_t, 0xff) \ -+ \ -+ T (uint32_t, 0xff) \ -+ T (uint32_t, 0xffff) \ -+ \ -+ T (uint64_t, 0xff) \ -+ T (uint64_t, 0xffff) \ -+ T (uint64_t, 0xffffffff) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+), z[0-9]+\n\tuxtb\t\1\.h, p[0-7]/m, z[0-9]+\.h\n} } } */ -+ -+/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+), z[0-9]+\n\tuxtb\t\1\.s, p[0-7]/m, z[0-9]+\.s\n} } } */ -+/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+), z[0-9]+\n\tuxth\t\1\.s, p[0-7]/m, z[0-9]+\.s\n} } } */ -+ -+/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+), z[0-9]+\n\tuxtb\t\1\.d, p[0-7]/m, z[0-9]+\.d\n} } } */ -+/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+), z[0-9]+\n\tuxth\t\1\.d, p[0-7]/m, z[0-9]+\.d\n} } } */ -+/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+), z[0-9]+\n\tuxtw\t\1\.d, p[0-7]/m, z[0-9]+\.d\n} } } */ -+ -+/* { dg-final { scan-assembler-not {\tmov\tz[^\n]*z} } } */ -+/* { dg-final { scan-assembler-not {\tsel\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_3_run.c -new file mode 100644 -index 000000000..3d33d3a39 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_3_run.c -@@ -0,0 +1,27 @@ -+/* { dg-do run { target { aarch64_sve_hw } } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_uxt_3.c" -+ -+#define TEST_LOOP(TYPE, CONST) \ -+ { \ -+ TYPE r[NUM_ELEMS (TYPE)]; \ -+ TYPE a[NUM_ELEMS (TYPE)]; \ -+ TYPE b[NUM_ELEMS (TYPE)]; \ -+ for (int i = 0; i < NUM_ELEMS (TYPE); ++i) \ -+ { \ -+ a[i] = (i & 1 ? i : 3 * i); \ -+ b[i] = (i >> 4) << (i & 15); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##CONST##_##TYPE (r, a, b); \ -+ for (int i = 0; i < NUM_ELEMS (TYPE); ++i) \ -+ if (r[i] != (a[i] > 20 ? b[i] & CONST : 127)) \ -+ __builtin_abort (); \ -+ } -+ -+int main () -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_4.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_4.c -new file mode 100644 -index 000000000..25c664780 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_4.c -@@ -0,0 +1,36 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define NUM_ELEMS(TYPE) (320 / sizeof (TYPE)) -+ -+#define DEF_LOOP(TYPE, CONST) \ -+ void __attribute__ ((noipa)) \ -+ test_##CONST##_##TYPE (TYPE *restrict r, TYPE *restrict a, \ -+ TYPE *restrict b) \ -+ { \ -+ for (int i = 0; i < NUM_ELEMS (TYPE); ++i) \ -+ r[i] = a[i] > 20 ? b[i] & CONST : 0; \ -+ } -+ -+#define TEST_ALL(T) \ -+ T (uint16_t, 0xff) \ -+ \ -+ T (uint32_t, 0xff) \ -+ T (uint32_t, 0xffff) \ -+ \ -+ T (uint64_t, 0xff) \ -+ T (uint64_t, 0xffff) \ -+ T (uint64_t, 0xffffffff) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+\.h), (p[0-7])/z, z[0-9]+\.h\n\tuxtb\t\1, \2/m, z[0-9]+\.h\n} } } */ -+ -+/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+\.s), (p[0-7])/z, z[0-9]+\.s\n\tuxtb\t\1, \2/m, z[0-9]+\.s\n} } } */ -+/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+\.s), (p[0-7])/z, z[0-9]+\.s\n\tuxth\t\1, \2/m, z[0-9]+\.s\n} } } */ -+ -+/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+\.d), (p[0-7])/z, z[0-9]+\.d\n\tuxtb\t\1, \2/m, z[0-9]+\.d\n} } } */ -+/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+\.d), (p[0-7])/z, z[0-9]+\.d\n\tuxth\t\1, \2/m, z[0-9]+\.d\n} } } */ -+/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+\.d), (p[0-7])/z, z[0-9]+\.d\n\tuxtw\t\1, \2/m, z[0-9]+\.d\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_4_run.c -new file mode 100644 -index 000000000..f3c4374ba ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_uxt_4_run.c -@@ -0,0 +1,27 @@ -+/* { dg-do run { target { aarch64_sve_hw } } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "cond_uxt_4.c" -+ -+#define TEST_LOOP(TYPE, CONST) \ -+ { \ -+ TYPE r[NUM_ELEMS (TYPE)]; \ -+ TYPE a[NUM_ELEMS (TYPE)]; \ -+ TYPE b[NUM_ELEMS (TYPE)]; \ -+ for (int i = 0; i < NUM_ELEMS (TYPE); ++i) \ -+ { \ -+ a[i] = (i & 1 ? i : 3 * i); \ -+ b[i] = (i >> 4) << (i & 15); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##CONST##_##TYPE (r, a, b); \ -+ for (int i = 0; i < NUM_ELEMS (TYPE); ++i) \ -+ if (r[i] != (a[i] > 20 ? b[i] & CONST : 0)) \ -+ __builtin_abort (); \ -+ } -+ -+int main () -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/const_1.c b/gcc/testsuite/gcc.target/aarch64/sve/const_1.c -new file mode 100644 -index 000000000..ae25dcb73 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/const_1.c -@@ -0,0 +1,13 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O3" } */ -+ -+#include -+ -+void -+set (uint64_t *dst, int count) -+{ -+ for (int i = 0; i < count; ++i) -+ dst[i] = 0xffff00ff00ffff00ULL; -+} -+ -+/* { dg-final { scan-assembler {\tmovi\tv([0-9]+)\.2d, 0xffff00ff00ffff00\n.*\tdup\tz[0-9]+\.q, z\1\.q\[0\]\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/const_2.c b/gcc/testsuite/gcc.target/aarch64/sve/const_2.c -new file mode 100644 -index 000000000..7b2b5c2a1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/const_2.c -@@ -0,0 +1,20 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O3" } */ -+ -+#include -+ -+#define TEST(TYPE, CONST) \ -+ void \ -+ set_##TYPE (TYPE *dst, int count) \ -+ { \ -+ for (int i = 0; i < count; ++i) \ -+ dst[i] = CONST; \ -+ } -+ -+TEST (uint16_t, 129) -+TEST (uint32_t, 129) -+TEST (uint64_t, 129) -+ -+/* { dg-final { scan-assembler {\tmovi\tv([0-9]+)\.8h, 0x81\n[^:]*\tdup\tz[0-9]+\.q, z\1\.q\[0\]\n} } } */ -+/* { dg-final { scan-assembler {\tmovi\tv([0-9]+)\.4s, 0x81\n[^:]*\tdup\tz[0-9]+\.q, z\1\.q\[0\]\n} } } */ -+/* { dg-final { scan-assembler {\tmov\t(x[0-9]+), 129\n[^:]*\tmov\tz[0-9]+\.d, \1\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/const_3.c b/gcc/testsuite/gcc.target/aarch64/sve/const_3.c -new file mode 100644 -index 000000000..c18ceaedc ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/const_3.c -@@ -0,0 +1,20 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O3" } */ -+ -+#include -+ -+#define TEST(TYPE, CONST) \ -+ void \ -+ set_##TYPE (TYPE *dst, int count) \ -+ { \ -+ for (int i = 0; i < count; ++i) \ -+ dst[i] = CONST; \ -+ } -+ -+TEST (uint16_t, 0x1234) -+TEST (uint32_t, 0x1234) -+TEST (uint64_t, 0x1234) -+ -+/* { dg-final { scan-assembler {\tmov\t(w[0-9]+), 4660\n[^:]*\tmov\tz[0-9]+\.h, \1\n} } } */ -+/* { dg-final { scan-assembler {\tmov\t(w[0-9]+), 4660\n[^:]*\tmov\tz[0-9]+\.s, \1\n} } } */ -+/* { dg-final { scan-assembler {\tmov\t(x[0-9]+), 4660\n[^:]*\tmov\tz[0-9]+\.d, \1\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/ext_2.c b/gcc/testsuite/gcc.target/aarch64/sve/ext_2.c -index 0fe7e4c28..5593b070c 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/ext_2.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/ext_2.c -@@ -14,5 +14,4 @@ foo (void) - asm volatile ("" :: "w" (x)); - } - --/* { dg-final { scan-assembler {\tmov\tz0\.d, z1\.d\n} } } */ --/* { dg-final { scan-assembler {\text\tz0\.b, z0\.b, z[01]\.b, #4\n} } } */ -+/* { dg-final { scan-assembler {\tmovprfx\tz0, z1\n\text\tz0\.b, z0\.b, z1\.b, #4\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/ext_3.c b/gcc/testsuite/gcc.target/aarch64/sve/ext_3.c -new file mode 100644 -index 000000000..83c04c856 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/ext_3.c -@@ -0,0 +1,17 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -msve-vector-bits=1024" } */ -+ -+typedef int vnx4si __attribute__((vector_size (128))); -+ -+void -+foo (void) -+{ -+ register int x asm ("z0"); -+ register vnx4si y asm ("z1"); -+ -+ asm volatile ("" : "=w" (y)); -+ x = y[21]; -+ asm volatile ("" :: "w" (x)); -+} -+ -+/* { dg-final { scan-assembler {\tmovprfx\tz0, z1\n\text\tz0\.b, z0\.b, z1\.b, #84\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fabd_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fabd_1.c -new file mode 100644 -index 000000000..13ad83be2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/fabd_1.c -@@ -0,0 +1,35 @@ -+/* { dg-do assemble { target aarch64_asm_sve_ok } } */ -+/* { dg-options "-O3 --save-temps" } */ -+ -+#define N 16 -+ -+typedef float *__restrict__ vnx4sf; -+typedef double *__restrict__ vnx2df; -+typedef _Float16 *__restrict__ vnx8hf_a; -+typedef __fp16 *__restrict__ vnx8hf_b; -+ -+extern float fabsf (float); -+extern double fabs (double); -+ -+#define FABD(type, abs, n) \ -+ void fabd_##type (type res, type a, type b) \ -+ { \ -+ int i; \ -+ for (i = 0; i < n; i++) \ -+ res[i] = abs (a[i] - b[i]); \ -+ } -+ -+#define TEST_SVE_F_MODES(FUNC) \ -+ FUNC (vnx2df, fabs, N) \ -+ FUNC (vnx4sf, fabsf, N) \ -+ FUNC (vnx8hf_a, fabsf, N) \ -+ FUNC (vnx8hf_b, fabsf, N) \ -+ -+TEST_SVE_F_MODES (FABD) -+ -+/* { dg-final { scan-assembler "fabd" } } */ -+/* { dg-final { scan-assembler-not "fsub" } } */ -+/* { dg-final { scan-assembler-not "fabs" } } */ -+/* { dg-final { scan-assembler-times {\tfabd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfabd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfabd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fadda_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fadda_1.c -new file mode 100644 -index 000000000..158cd6c84 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/fadda_1.c -@@ -0,0 +1,20 @@ -+/* { dg-do assemble { target aarch64_asm_sve_ok } } */ -+/* { dg-options "-O2 -ftree-vectorize --save-temps" } */ -+ -+#define DO_OPS(TYPE) \ -+TYPE fold_##TYPE (TYPE *src, int count) \ -+{ \ -+ TYPE res = 0; \ -+ for (int i = 0; i < count; ++i) \ -+ res += src[i]; \ -+ return res; \ -+} -+ -+DO_OPS (_Float16) -+DO_OPS (float) -+DO_OPS (double) -+ -+/* { dg-final { scan-assembler-times {\tfadda\th[0-9]+, p[0-7], h[0-9]+, z[0-9]+\.h\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfadda\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfadda\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d\n} 1 } } */ -+/* { dg-final { scan-assembler-not "sel" } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fmaxnm_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fmaxnm_1.c -new file mode 100644 -index 000000000..2f0d64bd4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/fmaxnm_1.c -@@ -0,0 +1,45 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#ifndef FN -+#define FN(X) __builtin_fmax##X -+#endif -+ -+#define DEF_LOOP(FN, TYPE, NAME, CONST) \ -+ void __attribute__ ((noipa)) \ -+ test_##TYPE##_##NAME (TYPE *__restrict x, \ -+ TYPE *__restrict y, int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ x[i] = FN (y[i], CONST); \ -+ } -+ -+#define TEST_TYPE(T, FN, TYPE) \ -+ T (FN, TYPE, zero, 0) \ -+ T (FN, TYPE, one, 1) \ -+ T (FN, TYPE, two, 2) -+ -+#define TEST_ALL(T) \ -+ TEST_TYPE (T, FN (f16), _Float16) \ -+ TEST_TYPE (T, FN (f32), float) \ -+ TEST_TYPE (T, FN (f64), double) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.0\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1\.0\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.h, #2\.0} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #2\.0} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fminnm_1.c b/gcc/testsuite/gcc.target/aarch64/sve/fminnm_1.c -new file mode 100644 -index 000000000..547772e29 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/fminnm_1.c -@@ -0,0 +1,21 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#define FN(X) __builtin_fmin##X -+#include "fmaxnm_1.c" -+ -+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #0\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #0\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #0\.0\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, #1\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, #1\.0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, #1\.0\n} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.h, #2\.0} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #2\.0} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_1.c b/gcc/testsuite/gcc.target/aarch64/sve/init_1.c -new file mode 100644 -index 000000000..8e6004337 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/init_1.c -@@ -0,0 +1,22 @@ -+/* { dg-do assemble { target aarch64_asm_sve_ok } } */ -+/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+/* Case 1.1: Trailing constants with stepped sequence. */ -+ -+#include -+ -+typedef int32_t vnx4si __attribute__((vector_size (32))); -+ -+/* -+** foo: -+** index (z[0-9]+\.s), #1, #1 -+** insr \1, w1 -+** insr \1, w0 -+** ... -+*/ -+__attribute__((noipa)) -+vnx4si foo(int a, int b) -+{ -+ return (vnx4si) { a, b, 1, 2, 3, 4, 5, 6 }; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_10.c b/gcc/testsuite/gcc.target/aarch64/sve/init_10.c -new file mode 100644 -index 000000000..bee039415 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/init_10.c -@@ -0,0 +1,24 @@ -+/* { dg-do assemble { target aarch64_asm_sve_ok } } */ -+/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+/* Case 5.4: Interleaved repeating elements and non-repeating elements. */ -+ -+#include -+ -+typedef int32_t vnx4si __attribute__((vector_size (32))); -+ -+/* -+** foo: -+** mov (z[0-9]+\.s), w3 -+** mov (z[0-9]+\.s), w2 -+** insr \2, w1 -+** insr \2, w0 -+** zip1 \2, \2, \1 -+** ... -+*/ -+__attribute__((noipa)) -+vnx4si foo(int a, int b, int c, int f) -+{ -+ return (vnx4si) { a, f, b, f, c, f, c, f }; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_10_run.c b/gcc/testsuite/gcc.target/aarch64/sve/init_10_run.c -new file mode 100644 -index 000000000..9a6d8650e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/init_10_run.c -@@ -0,0 +1,21 @@ -+/* { dg-do run { target aarch64_sve256_hw } } */ -+/* { dg-options "-O2 -msve-vector-bits=256" } */ -+ -+#include "init_10.c" -+ -+int main() -+{ -+ int a = 10; -+ int b = 11; -+ int c = 12; -+ int f = 13; -+ -+ vnx4si v = foo (a, b, c, f); -+ int expected[] = { a, f, b, f, c, f, c, f }; -+ -+ for (int i = 0; i < 8; i++) -+ if (v[i] != expected[i]) -+ __builtin_abort (); -+ -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_11.c b/gcc/testsuite/gcc.target/aarch64/sve/init_11.c -new file mode 100644 -index 000000000..8a9496f34 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/init_11.c -@@ -0,0 +1,23 @@ -+/* { dg-do assemble { target aarch64_asm_sve_ok } } */ -+/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+/* Case 5.5: Interleaved repeating elements and trailing same elements. */ -+ -+#include -+ -+typedef int32_t vnx4si __attribute__((vector_size (32))); -+ -+/* -+** foo: -+** mov (z[0-9]+\.s), w1 -+** insr \1, w0 -+** mov (z[0-9]+\.s), w2 -+** zip1 \1, \1, \2 -+** ... -+*/ -+__attribute__((noipa)) -+vnx4si foo(int a, int b, int f) -+{ -+ return (vnx4si) { a, f, b, f, b, f, b, f }; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_11_run.c b/gcc/testsuite/gcc.target/aarch64/sve/init_11_run.c -new file mode 100644 -index 000000000..437155581 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/init_11_run.c -@@ -0,0 +1,20 @@ -+/* { dg-do run { target aarch64_sve256_hw } } */ -+/* { dg-options "-O2 -msve-vector-bits=256" } */ -+ -+#include "init_11.c" -+ -+int main() -+{ -+ int a = 10; -+ int b = 11; -+ int f = 12; -+ -+ vnx4si v = foo (a, b, f); -+ int expected[] = { a, f, b, f, b, f, b, f }; -+ -+ for (int i = 0; i < 8; i++) -+ if (v[i] != expected[i]) -+ __builtin_abort (); -+ -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_12.c b/gcc/testsuite/gcc.target/aarch64/sve/init_12.c -new file mode 100644 -index 000000000..bc698ddd3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/init_12.c -@@ -0,0 +1,26 @@ -+/* { dg-do assemble { target aarch64_asm_sve_ok } } */ -+/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+/* Case 5.5: Interleaved repeating elements and trailing same elements. */ -+ -+#include -+ -+typedef int32_t vnx4si __attribute__((vector_size (32))); -+ -+/* -+** foo: -+** fmov (s[0-9]+), w1 -+** mov (z[0-9]+\.s), w2 -+** mov (z[0-9]+\.s), w0 -+** insr \3, \1 -+** insr \3, \1 -+** insr \3, \1 -+** zip1 \3, \3, \2 -+** ... -+*/ -+__attribute__((noipa)) -+vnx4si foo(int a, int b, int f) -+{ -+ return (vnx4si) { b, f, b, f, b, f, a, f }; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_12_run.c b/gcc/testsuite/gcc.target/aarch64/sve/init_12_run.c -new file mode 100644 -index 000000000..5ce7edb1e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/init_12_run.c -@@ -0,0 +1,20 @@ -+/* { dg-do run { target aarch64_sve256_hw } } */ -+/* { dg-options "-O2 -msve-vector-bits=256" } */ -+ -+#include "init_12.c" -+ -+int main() -+{ -+ int a = 10; -+ int b = 11; -+ int f = 12; -+ -+ vnx4si v = foo (a, b, f); -+ int expected[] = { b, f, b, f, b, f, a, f }; -+ -+ for (int i = 0; i < 8; i++) -+ if (v[i] != expected[i]) -+ __builtin_abort (); -+ -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_13.c b/gcc/testsuite/gcc.target/aarch64/sve/init_13.c -new file mode 100644 -index 000000000..eea417063 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/init_13.c -@@ -0,0 +1,17 @@ -+/* { dg-do assemble { target aarch64_asm_sve_ok } } */ -+/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+typedef float vnx4sf __attribute__((vector_size (32))); -+ -+/* -+** foo: -+** mov (z[0-9]+\.s), s0 -+** insr \1, wzr -+** ... -+*/ -+vnx4sf -+foo (float a) -+{ -+ return (vnx4sf) { 0.0f, a, a, a, a, a, a, a }; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_1_run.c b/gcc/testsuite/gcc.target/aarch64/sve/init_1_run.c -new file mode 100644 -index 000000000..824a5cbea ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/init_1_run.c -@@ -0,0 +1,19 @@ -+/* { dg-do run { target aarch64_sve256_hw } } */ -+/* { dg-options "-O2 -msve-vector-bits=256" } */ -+ -+#include "init_1.c" -+ -+int main() -+{ -+ int a = 10; -+ int b = 11; -+ -+ vnx4si v = foo (a, b); -+ int expected[] = { a, b, 1, 2, 3, 4, 5, 6 }; -+ -+ for (int i = 0; i < 8; i++) -+ if (v[i] != expected[i]) -+ __builtin_abort (); -+ -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_2.c b/gcc/testsuite/gcc.target/aarch64/sve/init_2.c -new file mode 100644 -index 000000000..0a8aa8dec ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/init_2.c -@@ -0,0 +1,23 @@ -+/* { dg-do assemble { target aarch64_asm_sve_ok } } */ -+/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+/* Case 1.2: Trailing constants with repeating sequence. */ -+ -+#include -+ -+typedef int32_t vnx4si __attribute__((vector_size (32))); -+ -+/* -+** foo: -+** ... -+** ld1rd (z[0-9]+)\.d, p[0-9]+/z, \[x[0-9]+\] -+** insr \1\.s, w1 -+** insr \1\.s, w0 -+** ... -+*/ -+__attribute__((noipa)) -+vnx4si foo(int a, int b) -+{ -+ return (vnx4si) { a, b, 2, 3, 2, 3, 2, 3 }; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_2_run.c b/gcc/testsuite/gcc.target/aarch64/sve/init_2_run.c -new file mode 100644 -index 000000000..86c191c77 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/init_2_run.c -@@ -0,0 +1,19 @@ -+/* { dg-do run { target aarch64_sve256_hw } } */ -+/* { dg-options "-O2 -msve-vector-bits=256" } */ -+ -+#include "init_2.c" -+ -+int main() -+{ -+ int a = 10; -+ int b = 11; -+ -+ vnx4si v = foo (a, b); -+ int expected[] = { a, b, 2, 3, 2, 3, 2, 3 }; -+ -+ for (int i = 0; i < 8; i++) -+ if (v[i] != expected[i]) -+ __builtin_abort (); -+ -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_3.c b/gcc/testsuite/gcc.target/aarch64/sve/init_3.c -new file mode 100644 -index 000000000..4a418b633 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/init_3.c -@@ -0,0 +1,24 @@ -+/* { dg-do assemble { target aarch64_asm_sve_ok } } */ -+/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+/* Case 2.1: Leading constants with stepped sequence. */ -+ -+#include -+ -+typedef int32_t vnx4si __attribute__((vector_size (32))); -+ -+/* -+** foo: -+** index (z[0-9]+\.s), #6, #-1 -+** insr \1, w0 -+** insr \1, w1 -+** rev \1, \1 -+** ... -+*/ -+__attribute__((noipa)) -+vnx4si foo(int a, int b) -+{ -+ return (vnx4si) { 1, 2, 3, 4, 5, 6, a, b }; -+} -+ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_3_run.c b/gcc/testsuite/gcc.target/aarch64/sve/init_3_run.c -new file mode 100644 -index 000000000..ce4de6950 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/init_3_run.c -@@ -0,0 +1,19 @@ -+/* { dg-do run { target aarch64_sve256_hw } } */ -+/* { dg-options "-O2 -msve-vector-bits=256" } */ -+ -+#include "init_3.c" -+ -+int main() -+{ -+ int a = 10; -+ int b = 11; -+ -+ vnx4si v = foo (a, b); -+ int expected[] = { 1, 2, 3, 4, 5, 6, a, b }; -+ -+ for (int i = 0; i < 8; i++) -+ if (v[i] != expected[i]) -+ __builtin_abort (); -+ -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_4.c b/gcc/testsuite/gcc.target/aarch64/sve/init_4.c -new file mode 100644 -index 000000000..0fa99c151 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/init_4.c -@@ -0,0 +1,24 @@ -+/* { dg-do assemble { target aarch64_asm_sve_ok } } */ -+/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+/* Case 2.2: Leading constants with stepped sequence. */ -+ -+#include -+ -+typedef int32_t vnx4si __attribute__((vector_size (32))); -+ -+/* -+** foo: -+** ... -+** ld1rd (z[0-9]+)\.d, p[0-9]+/z, \[x[0-9]+\] -+** insr \1\.s, w1 -+** insr \1\.s, w0 -+** rev \1\.s, \1\.s -+** ... -+*/ -+__attribute__((noipa)) -+vnx4si foo(int a, int b) -+{ -+ return (vnx4si) { 3, 2, 3, 2, 3, 2, b, a }; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_4_run.c b/gcc/testsuite/gcc.target/aarch64/sve/init_4_run.c -new file mode 100644 -index 000000000..defee421f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/init_4_run.c -@@ -0,0 +1,19 @@ -+/* { dg-do run { target aarch64_sve256_hw } } */ -+/* { dg-options "-O2 -msve-vector-bits=256" } */ -+ -+#include "init_4.c" -+ -+int main() -+{ -+ int a = 10; -+ int b = 11; -+ -+ vnx4si v = foo (a, b); -+ int expected[] = { 3, 2, 3, 2, 3, 2, b, a }; -+ -+ for (int i = 0; i < 8; i++) -+ if (v[i] != expected[i]) -+ __builtin_abort (); -+ -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_5.c b/gcc/testsuite/gcc.target/aarch64/sve/init_5.c -new file mode 100644 -index 000000000..794e265c3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/init_5.c -@@ -0,0 +1,22 @@ -+/* { dg-do assemble { target aarch64_asm_sve_ok } } */ -+/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+/* Case 3: Trailing same element. */ -+ -+#include -+ -+typedef int32_t vnx4si __attribute__((vector_size (32))); -+ -+/* -+** foo: -+** mov (z[0-9]+\.s), w2 -+** insr \1, w1 -+** insr \1, w0 -+** ... -+*/ -+__attribute__((noipa)) -+vnx4si foo(int a, int b, int c) -+{ -+ return (vnx4si) { a, b, c, c, c, c, c, c }; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_5_run.c b/gcc/testsuite/gcc.target/aarch64/sve/init_5_run.c -new file mode 100644 -index 000000000..ba91d6fec ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/init_5_run.c -@@ -0,0 +1,20 @@ -+/* { dg-do run { target aarch64_sve256_hw } } */ -+/* { dg-options "-O2 -msve-vector-bits=256" } */ -+ -+#include "init_5.c" -+ -+int main() -+{ -+ int a = 10; -+ int b = 11; -+ int c = 12; -+ -+ vnx4si v = foo (a, b, c); -+ int expected[] = { a, b, c, c, c, c, c, c }; -+ -+ for (int i = 0; i < 8; i++) -+ if (v[i] != expected[i]) -+ __builtin_abort (); -+ -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_6.c b/gcc/testsuite/gcc.target/aarch64/sve/init_6.c -new file mode 100644 -index 000000000..8443fc000 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/init_6.c -@@ -0,0 +1,23 @@ -+/* { dg-do assemble { target aarch64_asm_sve_ok } } */ -+/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+/* Case 3: Trailing same element. */ -+ -+#include -+ -+typedef int32_t vnx4si __attribute__((vector_size (32))); -+ -+/* -+** foo: -+** mov (z[0-9]+\.s), w2 -+** insr \1, w1 -+** insr \1, w0 -+** rev \1, \1 -+** ... -+*/ -+__attribute__((noipa)) -+vnx4si foo(int a, int b, int c) -+{ -+ return (vnx4si) { c, c, c, c, c, c, b, a }; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_6_run.c b/gcc/testsuite/gcc.target/aarch64/sve/init_6_run.c -new file mode 100644 -index 000000000..802b28f98 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/init_6_run.c -@@ -0,0 +1,20 @@ -+/* { dg-do run { target aarch64_sve256_hw } } */ -+/* { dg-options "-O2 -msve-vector-bits=256" } */ -+ -+#include "init_6.c" -+ -+int main() -+{ -+ int a = 10; -+ int b = 11; -+ int c = 12; -+ -+ vnx4si v = foo (a, b, c); -+ int expected[] = { c, c, c, c, c, c, b, a }; -+ -+ for (int i = 0; i < 8; i++) -+ if (v[i] != expected[i]) -+ __builtin_abort (); -+ -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_7.c b/gcc/testsuite/gcc.target/aarch64/sve/init_7.c -new file mode 100644 -index 000000000..63dbbbe61 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/init_7.c -@@ -0,0 +1,27 @@ -+/* { dg-do assemble { target aarch64_asm_sve_ok } } */ -+/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+/* Case 5.1: All elements. */ -+ -+#include -+ -+typedef int32_t vnx4si __attribute__((vector_size (32))); -+ -+/* -+** foo: -+** mov (z[0-9]+\.s), w7 -+** insr \1, w6 -+** insr \1, w5 -+** insr \1, w4 -+** insr \1, w3 -+** insr \1, w2 -+** insr \1, w1 -+** insr \1, w0 -+** ... -+*/ -+__attribute__((noipa)) -+vnx4si foo(int a, int b, int c, int d, int e, int f, int g, int h) -+{ -+ return (vnx4si) { a, b, c, d, e, f, g, h }; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_7_run.c b/gcc/testsuite/gcc.target/aarch64/sve/init_7_run.c -new file mode 100644 -index 000000000..61fe28508 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/init_7_run.c -@@ -0,0 +1,25 @@ -+/* { dg-do run { target aarch64_sve256_hw } } */ -+/* { dg-options "-O2 -msve-vector-bits=256" } */ -+ -+#include "init_7.c" -+ -+int main() -+{ -+ int a = 10; -+ int b = 11; -+ int c = 12; -+ int d = 13; -+ int e = 14; -+ int f = 15; -+ int g = 16; -+ int h = 17; -+ -+ vnx4si v = foo (a, b, c, d, e, f, g, h); -+ int expected[] = { a, b, c, d, e, f, g, h }; -+ -+ for (int i = 0; i < 8; i++) -+ if (v[i] != expected[i]) -+ __builtin_abort (); -+ -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_8.c b/gcc/testsuite/gcc.target/aarch64/sve/init_8.c -new file mode 100644 -index 000000000..9c2456785 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/init_8.c -@@ -0,0 +1,26 @@ -+/* { dg-do assemble { target aarch64_asm_sve_ok } } */ -+/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+/* Case 5.2: Interleaved elements and constants. */ -+ -+#include -+ -+typedef int32_t vnx4si __attribute__((vector_size (32))); -+ -+/* -+** foo: -+** ... -+** ld1w (z[0-9]+\.s), p[0-9]+/z, \[x[0-9]+\] -+** mov (z[0-9]+\.s), w3 -+** insr \2, w2 -+** insr \2, w1 -+** insr \2, w0 -+** zip1 \2, \2, \1 -+** ... -+*/ -+__attribute__((noipa)) -+vnx4si foo(int a, int b, int c, int d) -+{ -+ return (vnx4si) { a, 1, b, 2, c, 3, d, 4 }; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_8_run.c b/gcc/testsuite/gcc.target/aarch64/sve/init_8_run.c -new file mode 100644 -index 000000000..24a0a6e06 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/init_8_run.c -@@ -0,0 +1,21 @@ -+/* { dg-do run { target aarch64_sve256_hw } } */ -+/* { dg-options "-O2 -msve-vector-bits=256" } */ -+ -+#include "init_8.c" -+ -+int main() -+{ -+ int a = 10; -+ int b = 11; -+ int c = 12; -+ int d = 13; -+ -+ vnx4si v = foo (a, b, c, d); -+ int expected[] = { a, 1, b, 2, c, 3, d, 4 }; -+ -+ for (int i = 0; i < 8; i++) -+ if (v[i] != expected[i]) -+ __builtin_abort (); -+ -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_9.c b/gcc/testsuite/gcc.target/aarch64/sve/init_9.c -new file mode 100644 -index 000000000..d22ab71e6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/init_9.c -@@ -0,0 +1,22 @@ -+/* { dg-do assemble { target aarch64_asm_sve_ok } } */ -+/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+/* Case 5.3: Repeated elements. */ -+ -+#include -+ -+typedef int32_t vnx4si __attribute__((vector_size (32))); -+ -+/* -+** foo: -+** mov (z[0-9]+\.s), w0 -+** mov (z[0-9]+\.s), w1 -+** zip1 \1, \1, \2 -+** ... -+*/ -+__attribute__((noipa)) -+vnx4si foo(int a, int b) -+{ -+ return (vnx4si) { a, b, a, b, a, b, a, b }; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_9_run.c b/gcc/testsuite/gcc.target/aarch64/sve/init_9_run.c -new file mode 100644 -index 000000000..636ae3b8b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/init_9_run.c -@@ -0,0 +1,19 @@ -+/* { dg-do run { target aarch64_sve256_hw } } */ -+/* { dg-options "-O2 -msve-vector-bits=256" } */ -+ -+#include "init_9.c" -+ -+int main() -+{ -+ int a = 10; -+ int b = 11; -+ -+ vnx4si v = foo (a, b); -+ int expected[] = { a, b, a, b, a, b, a, b }; -+ -+ for (int i = 0; i < 8; i++) -+ if (v[i] != expected[i]) -+ __builtin_abort (); -+ -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/ld1r_2.c b/gcc/testsuite/gcc.target/aarch64/sve/ld1r_2.c -index 2e6b59ab4..e0e0f4ee6 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/ld1r_2.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/ld1r_2.c -@@ -28,22 +28,6 @@ - T (int64_t) - - #define FOR_EACH_LOAD_BROADCAST_IMM(T) \ -- T (int16_t, 129, imm_129) \ -- T (int32_t, 129, imm_129) \ -- T (int64_t, 129, imm_129) \ -- \ -- T (int16_t, -130, imm_m130) \ -- T (int32_t, -130, imm_m130) \ -- T (int64_t, -130, imm_m130) \ -- \ -- T (int16_t, 0x1234, imm_0x1234) \ -- T (int32_t, 0x1234, imm_0x1234) \ -- T (int64_t, 0x1234, imm_0x1234) \ -- \ -- T (int16_t, 0xFEDC, imm_0xFEDC) \ -- T (int32_t, 0xFEDC, imm_0xFEDC) \ -- T (int64_t, 0xFEDC, imm_0xFEDC) \ -- \ - T (int32_t, 0x12345678, imm_0x12345678) \ - T (int64_t, 0x12345678, imm_0x12345678) \ - \ -@@ -56,6 +40,6 @@ FOR_EACH_LOAD_BROADCAST (DEF_LOAD_BROADCAST) - FOR_EACH_LOAD_BROADCAST_IMM (DEF_LOAD_BROADCAST_IMM) - - /* { dg-final { scan-assembler-times {\tld1rb\tz[0-9]+\.b, p[0-7]/z, } 1 } } */ --/* { dg-final { scan-assembler-times {\tld1rh\tz[0-9]+\.h, p[0-7]/z, } 5 } } */ --/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, p[0-7]/z, } 7 } } */ --/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, p[0-7]/z, } 8 } } */ -+/* { dg-final { scan-assembler-times {\tld1rh\tz[0-9]+\.h, p[0-7]/z, } 1 } } */ -+/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, p[0-7]/z, } 3 } } */ -+/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, p[0-7]/z, } 4 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/loop_add_4.c b/gcc/testsuite/gcc.target/aarch64/sve/loop_add_4.c -index 7f02497e8..9ead9c21b 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/loop_add_4.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/loop_add_4.c -@@ -68,7 +68,8 @@ TEST_ALL (LOOP) - /* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.s, w[0-9]+, w[0-9]+\n} 3 } } */ - /* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]+/z, \[x[0-9]+, x[0-9]+, lsl 2\]} 8 } } */ - /* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7]+, \[x[0-9]+, x[0-9]+, lsl 2\]} 8 } } */ --/* { dg-final { scan-assembler-times {\tincw\tx[0-9]+\n} 8 } } */ -+/* 2 for the calculations of -17 and 17. */ -+/* { dg-final { scan-assembler-times {\tincw\tx[0-9]+\n} 10 } } */ - - /* { dg-final { scan-assembler-times {\tdecw\tz[0-9]+\.s, all, mul #16\n} 1 } } */ - /* { dg-final { scan-assembler-times {\tdecw\tz[0-9]+\.s, all, mul #15\n} 1 } } */ -@@ -85,7 +86,8 @@ TEST_ALL (LOOP) - /* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.d, x[0-9]+, x[0-9]+\n} 3 } } */ - /* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]+/z, \[x[0-9]+, x[0-9]+, lsl 3\]} 8 } } */ - /* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7]+, \[x[0-9]+, x[0-9]+, lsl 3\]} 8 } } */ --/* { dg-final { scan-assembler-times {\tincd\tx[0-9]+\n} 8 } } */ -+/* 2 for the calculations of -17 and 17. */ -+/* { dg-final { scan-assembler-times {\tincd\tx[0-9]+\n} 10 } } */ - - /* { dg-final { scan-assembler-times {\tdecd\tz[0-9]+\.d, all, mul #16\n} 1 } } */ - /* { dg-final { scan-assembler-times {\tdecd\tz[0-9]+\.d, all, mul #15\n} 1 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/aarch64-sve-pcs.exp b/gcc/testsuite/gcc.target/aarch64/sve/pcs/aarch64-sve-pcs.exp -new file mode 100644 -index 000000000..745887593 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/aarch64-sve-pcs.exp -@@ -0,0 +1,52 @@ -+# Specific regression driver for AArch64 SVE. -+# Copyright (C) 2009-2019 Free Software Foundation, Inc. -+# Contributed by ARM Ltd. -+# -+# This file is part of GCC. -+# -+# GCC is free software; you can redistribute it and/or modify it -+# under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 3, or (at your option) -+# any later version. -+# -+# GCC is distributed in the hope that it will be useful, but -+# WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+# General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with GCC; see the file COPYING3. If not see -+# . */ -+ -+# GCC testsuite that uses the `dg.exp' driver. -+ -+# Exit immediately if this isn't an AArch64 target. -+if {![istarget aarch64*-*-*] } then { -+ return -+} -+ -+# Load support procs. -+load_lib gcc-dg.exp -+ -+# If a testcase doesn't have special options, use these. -+global DEFAULT_CFLAGS -+if ![info exists DEFAULT_CFLAGS] then { -+ set DEFAULT_CFLAGS " -ansi -pedantic-errors" -+} -+ -+# Initialize `dg'. -+dg-init -+ -+# Force SVE if we're not testing it already. -+if { [check_effective_target_aarch64_sve] } { -+ set sve_flags "" -+} else { -+ set sve_flags "-march=armv8.2-a+sve" -+} -+ -+# Main loop. -+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.c]] \ -+ $sve_flags $DEFAULT_CFLAGS -+ -+# All done. -+dg-finish -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_1.c -new file mode 100644 -index 000000000..12ae76789 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_1.c -@@ -0,0 +1,112 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+svbool_t ret_b (void) { return svptrue_b8 (); } -+ -+svint8_t ret_s8 (void) { return svdup_s8 (0); } -+svint16_t ret_s16 (void) { return svdup_s16 (0); } -+svint32_t ret_s32 (void) { return svdup_s32 (0); } -+svint64_t ret_s64 (void) { return svdup_s64 (0); } -+svuint8_t ret_u8 (void) { return svdup_u8 (0); } -+svuint16_t ret_u16 (void) { return svdup_u16 (0); } -+svuint32_t ret_u32 (void) { return svdup_u32 (0); } -+svuint64_t ret_u64 (void) { return svdup_u64 (0); } -+svbfloat16_t ret_bf16 (void) { return svundef_bf16 (); } -+svfloat16_t ret_f16 (void) { return svdup_f16 (0); } -+svfloat32_t ret_f32 (void) { return svdup_f32 (0); } -+svfloat64_t ret_f64 (void) { return svdup_f64 (0); } -+ -+svint8x2_t ret_s8x2 (void) { return svundef2_s8 (); } -+svint16x2_t ret_s16x2 (void) { return svundef2_s16 (); } -+svint32x2_t ret_s32x2 (void) { return svundef2_s32 (); } -+svint64x2_t ret_s64x2 (void) { return svundef2_s64 (); } -+svuint8x2_t ret_u8x2 (void) { return svundef2_u8 (); } -+svuint16x2_t ret_u16x2 (void) { return svundef2_u16 (); } -+svuint32x2_t ret_u32x2 (void) { return svundef2_u32 (); } -+svuint64x2_t ret_u64x2 (void) { return svundef2_u64 (); } -+svbfloat16x2_t ret_bf16x2 (void) { return svundef2_bf16 (); } -+svfloat16x2_t ret_f16x2 (void) { return svundef2_f16 (); } -+svfloat32x2_t ret_f32x2 (void) { return svundef2_f32 (); } -+svfloat64x2_t ret_f64x2 (void) { return svundef2_f64 (); } -+ -+svint8x3_t ret_s8x3 (void) { return svundef3_s8 (); } -+svint16x3_t ret_s16x3 (void) { return svundef3_s16 (); } -+svint32x3_t ret_s32x3 (void) { return svundef3_s32 (); } -+svint64x3_t ret_s64x3 (void) { return svundef3_s64 (); } -+svuint8x3_t ret_u8x3 (void) { return svundef3_u8 (); } -+svuint16x3_t ret_u16x3 (void) { return svundef3_u16 (); } -+svuint32x3_t ret_u32x3 (void) { return svundef3_u32 (); } -+svuint64x3_t ret_u64x3 (void) { return svundef3_u64 (); } -+svbfloat16x3_t ret_bf16x3 (void) { return svundef3_bf16 (); } -+svfloat16x3_t ret_f16x3 (void) { return svundef3_f16 (); } -+svfloat32x3_t ret_f32x3 (void) { return svundef3_f32 (); } -+svfloat64x3_t ret_f64x3 (void) { return svundef3_f64 (); } -+ -+svint8x4_t ret_s8x4 (void) { return svundef4_s8 (); } -+svint16x4_t ret_s16x4 (void) { return svundef4_s16 (); } -+svint32x4_t ret_s32x4 (void) { return svundef4_s32 (); } -+svint64x4_t ret_s64x4 (void) { return svundef4_s64 (); } -+svuint8x4_t ret_u8x4 (void) { return svundef4_u8 (); } -+svuint16x4_t ret_u16x4 (void) { return svundef4_u16 (); } -+svuint32x4_t ret_u32x4 (void) { return svundef4_u32 (); } -+svuint64x4_t ret_u64x4 (void) { return svundef4_u64 (); } -+svbfloat16x4_t ret_bf16x4 (void) { return svundef4_bf16 (); } -+svfloat16x4_t ret_f16x4 (void) { return svundef4_f16 (); } -+svfloat32x4_t ret_f32x4 (void) { return svundef4_f32 (); } -+svfloat64x4_t ret_f64x4 (void) { return svundef4_f64 (); } -+ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_b\n} } } */ -+ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_s8\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_s16\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_s32\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_s64\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_u8\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_u16\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_u32\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_u64\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_bf16\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_f16\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_f32\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_f64\n} } } */ -+ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_s8x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_s16x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_s32x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_s64x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_u8x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_u16x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_u32x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_u64x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_bf16x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_f16x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_f32x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_f64x2\n} } } */ -+ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_s8x3\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_s16x3\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_s16x3\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_s32x3\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_s64x3\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_u8x3\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_u16x3\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_u32x3\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_u64x3\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_bf16x3\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_f16x3\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_f32x3\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_f64x3\n} } } */ -+ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_s8x4\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_s16x4\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_s32x4\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_s64x4\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_u8x4\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_u16x4\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_u32x4\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_u64x4\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_bf16x4\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_f16x4\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_f32x4\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tret_f64x4\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_2.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_2.c -new file mode 100644 -index 000000000..9f0741e3c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_2.c -@@ -0,0 +1,111 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+void fn_b (svbool_t x) {} -+ -+void fn_s8 (svint8_t x) {} -+void fn_s16 (svint16_t x) {} -+void fn_s32 (svint32_t x) {} -+void fn_s64 (svint64_t x) {} -+void fn_u8 (svuint8_t x) {} -+void fn_u16 (svuint16_t x) {} -+void fn_u32 (svuint32_t x) {} -+void fn_u64 (svuint64_t x) {} -+void fn_bf16 (svbfloat16_t x) {} -+void fn_f16 (svfloat16_t x) {} -+void fn_f32 (svfloat32_t x) {} -+void fn_f64 (svfloat64_t x) {} -+ -+void fn_s8x2 (svint8x2_t x) {} -+void fn_s16x2 (svint16x2_t x) {} -+void fn_s32x2 (svint32x2_t x) {} -+void fn_s64x2 (svint64x2_t x) {} -+void fn_u8x2 (svuint8x2_t x) {} -+void fn_u16x2 (svuint16x2_t x) {} -+void fn_u32x2 (svuint32x2_t x) {} -+void fn_u64x2 (svuint64x2_t x) {} -+void fn_bf16x2 (svbfloat16x2_t x) {} -+void fn_f16x2 (svfloat16x2_t x) {} -+void fn_f32x2 (svfloat32x2_t x) {} -+void fn_f64x2 (svfloat64x2_t x) {} -+ -+void fn_s8x3 (svint8x3_t x) {} -+void fn_s16x3 (svint16x3_t x) {} -+void fn_s32x3 (svint32x3_t x) {} -+void fn_s64x3 (svint64x3_t x) {} -+void fn_u8x3 (svuint8x3_t x) {} -+void fn_u16x3 (svuint16x3_t x) {} -+void fn_u32x3 (svuint32x3_t x) {} -+void fn_u64x3 (svuint64x3_t x) {} -+void fn_bf16x3 (svbfloat16x3_t x) {} -+void fn_f16x3 (svfloat16x3_t x) {} -+void fn_f32x3 (svfloat32x3_t x) {} -+void fn_f64x3 (svfloat64x3_t x) {} -+ -+void fn_s8x4 (svint8x4_t x) {} -+void fn_s16x4 (svint16x4_t x) {} -+void fn_s32x4 (svint32x4_t x) {} -+void fn_s64x4 (svint64x4_t x) {} -+void fn_u8x4 (svuint8x4_t x) {} -+void fn_u16x4 (svuint16x4_t x) {} -+void fn_u32x4 (svuint32x4_t x) {} -+void fn_u64x4 (svuint64x4_t x) {} -+void fn_bf16x4 (svbfloat16x4_t x) {} -+void fn_f16x4 (svfloat16x4_t x) {} -+void fn_f32x4 (svfloat32x4_t x) {} -+void fn_f64x4 (svfloat64x4_t x) {} -+ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_b\n} } } */ -+ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s8\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s16\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s32\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s64\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u8\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u16\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u32\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u64\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_bf16\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f16\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f32\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f64\n} } } */ -+ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s8x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s16x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s32x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s64x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u8x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u16x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u32x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u64x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_bf16x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f16x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f32x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f64x2\n} } } */ -+ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s8x3\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s16x3\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s32x3\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s64x3\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u8x3\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u16x3\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u32x3\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u64x3\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_bf16x3\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f16x3\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f32x3\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f64x3\n} } } */ -+ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s8x4\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s16x4\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s32x4\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s64x4\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u8x4\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u16x4\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u32x4\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u64x4\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_bf16x4\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f16x4\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f32x4\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f64x4\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_3.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_3.c -new file mode 100644 -index 000000000..42e7860ff ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_3.c -@@ -0,0 +1,107 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+void fn_s8 (float d0, float d1, float d2, float d3, svint8_t x) {} -+void fn_s16 (float d0, float d1, float d2, float d3, svint16_t x) {} -+void fn_s32 (float d0, float d1, float d2, float d3, svint32_t x) {} -+void fn_s64 (float d0, float d1, float d2, float d3, svint64_t x) {} -+void fn_u8 (float d0, float d1, float d2, float d3, svuint8_t x) {} -+void fn_u16 (float d0, float d1, float d2, float d3, svuint16_t x) {} -+void fn_u32 (float d0, float d1, float d2, float d3, svuint32_t x) {} -+void fn_u64 (float d0, float d1, float d2, float d3, svuint64_t x) {} -+void fn_bf16 (float d0, float d1, float d2, float d3, svbfloat16_t x) {} -+void fn_f16 (float d0, float d1, float d2, float d3, svfloat16_t x) {} -+void fn_f32 (float d0, float d1, float d2, float d3, svfloat32_t x) {} -+void fn_f64 (float d0, float d1, float d2, float d3, svfloat64_t x) {} -+ -+void fn_s8x2 (float d0, float d1, float d2, float d3, svint8x2_t x) {} -+void fn_s16x2 (float d0, float d1, float d2, float d3, svint16x2_t x) {} -+void fn_s32x2 (float d0, float d1, float d2, float d3, svint32x2_t x) {} -+void fn_s64x2 (float d0, float d1, float d2, float d3, svint64x2_t x) {} -+void fn_u8x2 (float d0, float d1, float d2, float d3, svuint8x2_t x) {} -+void fn_u16x2 (float d0, float d1, float d2, float d3, svuint16x2_t x) {} -+void fn_u32x2 (float d0, float d1, float d2, float d3, svuint32x2_t x) {} -+void fn_u64x2 (float d0, float d1, float d2, float d3, svuint64x2_t x) {} -+void fn_bf16x2 (float d0, float d1, float d2, float d3, svbfloat16x2_t x) {} -+void fn_f16x2 (float d0, float d1, float d2, float d3, svfloat16x2_t x) {} -+void fn_f32x2 (float d0, float d1, float d2, float d3, svfloat32x2_t x) {} -+void fn_f64x2 (float d0, float d1, float d2, float d3, svfloat64x2_t x) {} -+ -+void fn_s8x3 (float d0, float d1, float d2, float d3, svint8x3_t x) {} -+void fn_s16x3 (float d0, float d1, float d2, float d3, svint16x3_t x) {} -+void fn_s32x3 (float d0, float d1, float d2, float d3, svint32x3_t x) {} -+void fn_s64x3 (float d0, float d1, float d2, float d3, svint64x3_t x) {} -+void fn_u8x3 (float d0, float d1, float d2, float d3, svuint8x3_t x) {} -+void fn_u16x3 (float d0, float d1, float d2, float d3, svuint16x3_t x) {} -+void fn_u32x3 (float d0, float d1, float d2, float d3, svuint32x3_t x) {} -+void fn_u64x3 (float d0, float d1, float d2, float d3, svuint64x3_t x) {} -+void fn_bf16x3 (float d0, float d1, float d2, float d3, svbfloat16x3_t x) {} -+void fn_f16x3 (float d0, float d1, float d2, float d3, svfloat16x3_t x) {} -+void fn_f32x3 (float d0, float d1, float d2, float d3, svfloat32x3_t x) {} -+void fn_f64x3 (float d0, float d1, float d2, float d3, svfloat64x3_t x) {} -+ -+void fn_s8x4 (float d0, float d1, float d2, float d3, svint8x4_t x) {} -+void fn_s16x4 (float d0, float d1, float d2, float d3, svint16x4_t x) {} -+void fn_s32x4 (float d0, float d1, float d2, float d3, svint32x4_t x) {} -+void fn_s64x4 (float d0, float d1, float d2, float d3, svint64x4_t x) {} -+void fn_u8x4 (float d0, float d1, float d2, float d3, svuint8x4_t x) {} -+void fn_u16x4 (float d0, float d1, float d2, float d3, svuint16x4_t x) {} -+void fn_u32x4 (float d0, float d1, float d2, float d3, svuint32x4_t x) {} -+void fn_u64x4 (float d0, float d1, float d2, float d3, svuint64x4_t x) {} -+void fn_bf16x4 (float d0, float d1, float d2, float d3, svbfloat16x4_t x) {} -+void fn_f16x4 (float d0, float d1, float d2, float d3, svfloat16x4_t x) {} -+void fn_f32x4 (float d0, float d1, float d2, float d3, svfloat32x4_t x) {} -+void fn_f64x4 (float d0, float d1, float d2, float d3, svfloat64x4_t x) {} -+ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s8\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s16\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s32\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s64\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u8\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u16\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u32\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u64\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_bf16\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f16\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f32\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f64\n} } } */ -+ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s8x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s16x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s32x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s64x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u8x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u16x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u32x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u64x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_bf16x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f16x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f32x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f64x2\n} } } */ -+ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s8x3\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s16x3\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s32x3\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s64x3\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u8x3\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u16x3\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u32x3\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u64x3\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_bf16x3\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f16x3\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f32x3\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f64x3\n} } } */ -+ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s8x4\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s16x4\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s32x4\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s64x4\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u8x4\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u16x4\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u32x4\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u64x4\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_bf16x4\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f16x4\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f32x4\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f64x4\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_4.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_4.c -new file mode 100644 -index 000000000..7e4438ed4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_4.c -@@ -0,0 +1,155 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+void fn_s8 (float d0, float d1, float d2, float d3, -+ float d4, svint8_t x) {} -+void fn_s16 (float d0, float d1, float d2, float d3, -+ float d4, svint16_t x) {} -+void fn_s32 (float d0, float d1, float d2, float d3, -+ float d4, svint32_t x) {} -+void fn_s64 (float d0, float d1, float d2, float d3, -+ float d4, svint64_t x) {} -+void fn_u8 (float d0, float d1, float d2, float d3, -+ float d4, svuint8_t x) {} -+void fn_u16 (float d0, float d1, float d2, float d3, -+ float d4, svuint16_t x) {} -+void fn_u32 (float d0, float d1, float d2, float d3, -+ float d4, svuint32_t x) {} -+void fn_u64 (float d0, float d1, float d2, float d3, -+ float d4, svuint64_t x) {} -+void fn_bf16 (float d0, float d1, float d2, float d3, -+ float d4, svbfloat16_t x) {} -+void fn_f16 (float d0, float d1, float d2, float d3, -+ float d4, svfloat16_t x) {} -+void fn_f32 (float d0, float d1, float d2, float d3, -+ float d4, svfloat32_t x) {} -+void fn_f64 (float d0, float d1, float d2, float d3, -+ float d4, svfloat64_t x) {} -+ -+void fn_s8x2 (float d0, float d1, float d2, float d3, -+ float d4, svint8x2_t x) {} -+void fn_s16x2 (float d0, float d1, float d2, float d3, -+ float d4, svint16x2_t x) {} -+void fn_s32x2 (float d0, float d1, float d2, float d3, -+ float d4, svint32x2_t x) {} -+void fn_s64x2 (float d0, float d1, float d2, float d3, -+ float d4, svint64x2_t x) {} -+void fn_u8x2 (float d0, float d1, float d2, float d3, -+ float d4, svuint8x2_t x) {} -+void fn_u16x2 (float d0, float d1, float d2, float d3, -+ float d4, svuint16x2_t x) {} -+void fn_u32x2 (float d0, float d1, float d2, float d3, -+ float d4, svuint32x2_t x) {} -+void fn_u64x2 (float d0, float d1, float d2, float d3, -+ float d4, svuint64x2_t x) {} -+void fn_bf16x2 (float d0, float d1, float d2, float d3, -+ float d4, svbfloat16x2_t x) {} -+void fn_f16x2 (float d0, float d1, float d2, float d3, -+ float d4, svfloat16x2_t x) {} -+void fn_f32x2 (float d0, float d1, float d2, float d3, -+ float d4, svfloat32x2_t x) {} -+void fn_f64x2 (float d0, float d1, float d2, float d3, -+ float d4, svfloat64x2_t x) {} -+ -+void fn_s8x3 (float d0, float d1, float d2, float d3, -+ float d4, svint8x3_t x) {} -+void fn_s16x3 (float d0, float d1, float d2, float d3, -+ float d4, svint16x3_t x) {} -+void fn_s32x3 (float d0, float d1, float d2, float d3, -+ float d4, svint32x3_t x) {} -+void fn_s64x3 (float d0, float d1, float d2, float d3, -+ float d4, svint64x3_t x) {} -+void fn_u8x3 (float d0, float d1, float d2, float d3, -+ float d4, svuint8x3_t x) {} -+void fn_u16x3 (float d0, float d1, float d2, float d3, -+ float d4, svuint16x3_t x) {} -+void fn_u32x3 (float d0, float d1, float d2, float d3, -+ float d4, svuint32x3_t x) {} -+void fn_u64x3 (float d0, float d1, float d2, float d3, -+ float d4, svuint64x3_t x) {} -+void fn_bf16x3 (float d0, float d1, float d2, float d3, -+ float d4, svbfloat16x3_t x) {} -+void fn_f16x3 (float d0, float d1, float d2, float d3, -+ float d4, svfloat16x3_t x) {} -+void fn_f32x3 (float d0, float d1, float d2, float d3, -+ float d4, svfloat32x3_t x) {} -+void fn_f64x3 (float d0, float d1, float d2, float d3, -+ float d4, svfloat64x3_t x) {} -+ -+void fn_s8x4 (float d0, float d1, float d2, float d3, -+ float d4, svint8x4_t x) {} -+void fn_s16x4 (float d0, float d1, float d2, float d3, -+ float d4, svint16x4_t x) {} -+void fn_s32x4 (float d0, float d1, float d2, float d3, -+ float d4, svint32x4_t x) {} -+void fn_s64x4 (float d0, float d1, float d2, float d3, -+ float d4, svint64x4_t x) {} -+void fn_u8x4 (float d0, float d1, float d2, float d3, -+ float d4, svuint8x4_t x) {} -+void fn_u16x4 (float d0, float d1, float d2, float d3, -+ float d4, svuint16x4_t x) {} -+void fn_u32x4 (float d0, float d1, float d2, float d3, -+ float d4, svuint32x4_t x) {} -+void fn_u64x4 (float d0, float d1, float d2, float d3, -+ float d4, svuint64x4_t x) {} -+void fn_bf16x4 (float d0, float d1, float d2, float d3, -+ float d4, svbfloat16x4_t x) {} -+void fn_f16x4 (float d0, float d1, float d2, float d3, -+ float d4, svfloat16x4_t x) {} -+void fn_f32x4 (float d0, float d1, float d2, float d3, -+ float d4, svfloat32x4_t x) {} -+void fn_f64x4 (float d0, float d1, float d2, float d3, -+ float d4, svfloat64x4_t x) {} -+ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s8\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s16\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s32\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s64\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u8\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u16\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u32\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u64\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_bf16\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f16\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f32\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f64\n} } } */ -+ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s8x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s16x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s32x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s64x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u8x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u16x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u32x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u64x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_bf16x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f16x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f32x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f64x2\n} } } */ -+ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s8x3\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s16x3\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s32x3\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s64x3\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u8x3\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u16x3\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u32x3\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u64x3\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_bf16x3\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f16x3\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f32x3\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f64x3\n} } } */ -+ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_s8x4\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_s16x4\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_s32x4\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_s64x4\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u8x4\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u16x4\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u32x4\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u64x4\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_bf16x4\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_f16x4\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_f32x4\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_f64x4\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_5.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_5.c -new file mode 100644 -index 000000000..6dadc0492 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_5.c -@@ -0,0 +1,155 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+void fn_s8 (float d0, float d1, float d2, float d3, -+ float d4, float d5, svint8_t x) {} -+void fn_s16 (float d0, float d1, float d2, float d3, -+ float d4, float d5, svint16_t x) {} -+void fn_s32 (float d0, float d1, float d2, float d3, -+ float d4, float d5, svint32_t x) {} -+void fn_s64 (float d0, float d1, float d2, float d3, -+ float d4, float d5, svint64_t x) {} -+void fn_u8 (float d0, float d1, float d2, float d3, -+ float d4, float d5, svuint8_t x) {} -+void fn_u16 (float d0, float d1, float d2, float d3, -+ float d4, float d5, svuint16_t x) {} -+void fn_u32 (float d0, float d1, float d2, float d3, -+ float d4, float d5, svuint32_t x) {} -+void fn_u64 (float d0, float d1, float d2, float d3, -+ float d4, float d5, svuint64_t x) {} -+void fn_bf16 (float d0, float d1, float d2, float d3, -+ float d4, float d5, svbfloat16_t x) {} -+void fn_f16 (float d0, float d1, float d2, float d3, -+ float d4, float d5, svfloat16_t x) {} -+void fn_f32 (float d0, float d1, float d2, float d3, -+ float d4, float d5, svfloat32_t x) {} -+void fn_f64 (float d0, float d1, float d2, float d3, -+ float d4, float d5, svfloat64_t x) {} -+ -+void fn_s8x2 (float d0, float d1, float d2, float d3, -+ float d4, float d5, svint8x2_t x) {} -+void fn_s16x2 (float d0, float d1, float d2, float d3, -+ float d4, float d5, svint16x2_t x) {} -+void fn_s32x2 (float d0, float d1, float d2, float d3, -+ float d4, float d5, svint32x2_t x) {} -+void fn_s64x2 (float d0, float d1, float d2, float d3, -+ float d4, float d5, svint64x2_t x) {} -+void fn_u8x2 (float d0, float d1, float d2, float d3, -+ float d4, float d5, svuint8x2_t x) {} -+void fn_u16x2 (float d0, float d1, float d2, float d3, -+ float d4, float d5, svuint16x2_t x) {} -+void fn_u32x2 (float d0, float d1, float d2, float d3, -+ float d4, float d5, svuint32x2_t x) {} -+void fn_u64x2 (float d0, float d1, float d2, float d3, -+ float d4, float d5, svuint64x2_t x) {} -+void fn_bf16x2 (float d0, float d1, float d2, float d3, -+ float d4, float d5, svbfloat16x2_t x) {} -+void fn_f16x2 (float d0, float d1, float d2, float d3, -+ float d4, float d5, svfloat16x2_t x) {} -+void fn_f32x2 (float d0, float d1, float d2, float d3, -+ float d4, float d5, svfloat32x2_t x) {} -+void fn_f64x2 (float d0, float d1, float d2, float d3, -+ float d4, float d5, svfloat64x2_t x) {} -+ -+void fn_s8x3 (float d0, float d1, float d2, float d3, -+ float d4, float d5, svint8x3_t x) {} -+void fn_s16x3 (float d0, float d1, float d2, float d3, -+ float d4, float d5, svint16x3_t x) {} -+void fn_s32x3 (float d0, float d1, float d2, float d3, -+ float d4, float d5, svint32x3_t x) {} -+void fn_s64x3 (float d0, float d1, float d2, float d3, -+ float d4, float d5, svint64x3_t x) {} -+void fn_u8x3 (float d0, float d1, float d2, float d3, -+ float d4, float d5, svuint8x3_t x) {} -+void fn_u16x3 (float d0, float d1, float d2, float d3, -+ float d4, float d5, svuint16x3_t x) {} -+void fn_u32x3 (float d0, float d1, float d2, float d3, -+ float d4, float d5, svuint32x3_t x) {} -+void fn_u64x3 (float d0, float d1, float d2, float d3, -+ float d4, float d5, svuint64x3_t x) {} -+void fn_bf16x3 (float d0, float d1, float d2, float d3, -+ float d4, float d5, svbfloat16x3_t x) {} -+void fn_f16x3 (float d0, float d1, float d2, float d3, -+ float d4, float d5, svfloat16x3_t x) {} -+void fn_f32x3 (float d0, float d1, float d2, float d3, -+ float d4, float d5, svfloat32x3_t x) {} -+void fn_f64x3 (float d0, float d1, float d2, float d3, -+ float d4, float d5, svfloat64x3_t x) {} -+ -+void fn_s8x4 (float d0, float d1, float d2, float d3, -+ float d4, float d5, svint8x4_t x) {} -+void fn_s16x4 (float d0, float d1, float d2, float d3, -+ float d4, float d5, svint16x4_t x) {} -+void fn_s32x4 (float d0, float d1, float d2, float d3, -+ float d4, float d5, svint32x4_t x) {} -+void fn_s64x4 (float d0, float d1, float d2, float d3, -+ float d4, float d5, svint64x4_t x) {} -+void fn_u8x4 (float d0, float d1, float d2, float d3, -+ float d4, float d5, svuint8x4_t x) {} -+void fn_u16x4 (float d0, float d1, float d2, float d3, -+ float d4, float d5, svuint16x4_t x) {} -+void fn_u32x4 (float d0, float d1, float d2, float d3, -+ float d4, float d5, svuint32x4_t x) {} -+void fn_u64x4 (float d0, float d1, float d2, float d3, -+ float d4, float d5, svuint64x4_t x) {} -+void fn_bf16x4 (float d0, float d1, float d2, float d3, -+ float d4, float d5, svbfloat16x4_t x) {} -+void fn_f16x4 (float d0, float d1, float d2, float d3, -+ float d4, float d5, svfloat16x4_t x) {} -+void fn_f32x4 (float d0, float d1, float d2, float d3, -+ float d4, float d5, svfloat32x4_t x) {} -+void fn_f64x4 (float d0, float d1, float d2, float d3, -+ float d4, float d5, svfloat64x4_t x) {} -+ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s8\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s16\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s32\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s64\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u8\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u16\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u32\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u64\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_bf16\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f16\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f32\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f64\n} } } */ -+ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s8x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s16x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s32x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s64x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u8x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u16x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u32x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u64x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_bf16x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f16x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f32x2\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f64x2\n} } } */ -+ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_s8x3\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_s16x3\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_s32x3\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_s64x3\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u8x3\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u16x3\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u32x3\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u64x3\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_bf16x3\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_f16x3\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_f32x3\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_f64x3\n} } } */ -+ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_s8x4\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_s16x4\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_s32x4\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_s64x4\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u8x4\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u16x4\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u32x4\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u64x4\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_bf16x4\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_f16x4\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_f32x4\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_f64x4\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_6.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_6.c -new file mode 100644 -index 000000000..0ff73e259 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_6.c -@@ -0,0 +1,155 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+void fn_s8 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, svint8_t x) {} -+void fn_s16 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, svint16_t x) {} -+void fn_s32 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, svint32_t x) {} -+void fn_s64 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, svint64_t x) {} -+void fn_u8 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, svuint8_t x) {} -+void fn_u16 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, svuint16_t x) {} -+void fn_u32 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, svuint32_t x) {} -+void fn_u64 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, svuint64_t x) {} -+void fn_bf16 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, svbfloat16_t x) {} -+void fn_f16 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, svfloat16_t x) {} -+void fn_f32 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, svfloat32_t x) {} -+void fn_f64 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, svfloat64_t x) {} -+ -+void fn_s8x2 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, svint8x2_t x) {} -+void fn_s16x2 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, svint16x2_t x) {} -+void fn_s32x2 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, svint32x2_t x) {} -+void fn_s64x2 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, svint64x2_t x) {} -+void fn_u8x2 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, svuint8x2_t x) {} -+void fn_u16x2 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, svuint16x2_t x) {} -+void fn_u32x2 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, svuint32x2_t x) {} -+void fn_u64x2 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, svuint64x2_t x) {} -+void fn_bf16x2 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, svbfloat16x2_t x) {} -+void fn_f16x2 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, svfloat16x2_t x) {} -+void fn_f32x2 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, svfloat32x2_t x) {} -+void fn_f64x2 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, svfloat64x2_t x) {} -+ -+void fn_s8x3 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, svint8x3_t x) {} -+void fn_s16x3 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, svint16x3_t x) {} -+void fn_s32x3 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, svint32x3_t x) {} -+void fn_s64x3 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, svint64x3_t x) {} -+void fn_u8x3 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, svuint8x3_t x) {} -+void fn_u16x3 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, svuint16x3_t x) {} -+void fn_u32x3 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, svuint32x3_t x) {} -+void fn_u64x3 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, svuint64x3_t x) {} -+void fn_bf16x3 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, svbfloat16x3_t x) {} -+void fn_f16x3 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, svfloat16x3_t x) {} -+void fn_f32x3 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, svfloat32x3_t x) {} -+void fn_f64x3 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, svfloat64x3_t x) {} -+ -+void fn_s8x4 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, svint8x4_t x) {} -+void fn_s16x4 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, svint16x4_t x) {} -+void fn_s32x4 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, svint32x4_t x) {} -+void fn_s64x4 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, svint64x4_t x) {} -+void fn_u8x4 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, svuint8x4_t x) {} -+void fn_u16x4 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, svuint16x4_t x) {} -+void fn_u32x4 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, svuint32x4_t x) {} -+void fn_u64x4 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, svuint64x4_t x) {} -+void fn_bf16x4 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, svbfloat16x4_t x) {} -+void fn_f16x4 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, svfloat16x4_t x) {} -+void fn_f32x4 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, svfloat32x4_t x) {} -+void fn_f64x4 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, svfloat64x4_t x) {} -+ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s8\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s16\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s32\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_s64\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u8\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u16\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u32\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_u64\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_bf16\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f16\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f32\n} } } */ -+/* { dg-final { scan-assembler {\t\.variant_pcs\tfn_f64\n} } } */ -+ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_s8x2\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_s16x2\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_s32x2\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_s64x2\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u8x2\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u16x2\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u32x2\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u64x2\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_bf16x2\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_f16x2\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_f32x2\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_f64x2\n} } } */ -+ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_s8x3\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_s16x3\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_s32x3\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_s64x3\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u8x3\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u16x3\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u32x3\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u64x3\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_bf16x3\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_f16x3\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_f32x3\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_f64x3\n} } } */ -+ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_s8x4\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_s16x4\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_s32x4\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_s64x4\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u8x4\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u16x4\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u32x4\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_u64x4\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_bf16x4\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_f16x4\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_f32x4\n} } } */ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\tfn_f64x4\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_7.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_7.c -new file mode 100644 -index 000000000..4f3ff8107 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/annotate_7.c -@@ -0,0 +1,105 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+void fn_s8 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, float d7, svint8_t x) {} -+void fn_s16 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, float d7, svint16_t x) {} -+void fn_s32 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, float d7, svint32_t x) {} -+void fn_s64 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, float d7, svint64_t x) {} -+void fn_u8 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, float d7, svuint8_t x) {} -+void fn_u16 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, float d7, svuint16_t x) {} -+void fn_u32 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, float d7, svuint32_t x) {} -+void fn_u64 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, float d7, svuint64_t x) {} -+void fn_bf16 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, float d7, svbfloat16_t x) {} -+void fn_f16 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, float d7, svfloat16_t x) {} -+void fn_f32 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, float d7, svfloat32_t x) {} -+void fn_f64 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, float d7, svfloat64_t x) {} -+ -+void fn_s8x2 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, float d7, svint8x2_t x) {} -+void fn_s16x2 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, float d7, svint16x2_t x) {} -+void fn_s32x2 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, float d7, svint32x2_t x) {} -+void fn_s64x2 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, float d7, svint64x2_t x) {} -+void fn_u8x2 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, float d7, svuint8x2_t x) {} -+void fn_u16x2 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, float d7, svuint16x2_t x) {} -+void fn_u32x2 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, float d7, svuint32x2_t x) {} -+void fn_u64x2 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, float d7, svuint64x2_t x) {} -+void fn_bf16x2 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, float d7, svbfloat16x2_t x) {} -+void fn_f16x2 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, float d7, svfloat16x2_t x) {} -+void fn_f32x2 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, float d7, svfloat32x2_t x) {} -+void fn_f64x2 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, float d7, svfloat64x2_t x) {} -+ -+void fn_s8x3 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, float d7, svint8x3_t x) {} -+void fn_s16x3 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, float d7, svint16x3_t x) {} -+void fn_s32x3 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, float d7, svint32x3_t x) {} -+void fn_s64x3 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, float d7, svint64x3_t x) {} -+void fn_u8x3 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, float d7, svuint8x3_t x) {} -+void fn_u16x3 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, float d7, svuint16x3_t x) {} -+void fn_u32x3 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, float d7, svuint32x3_t x) {} -+void fn_u64x3 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, float d7, svuint64x3_t x) {} -+void fn_bf16x3 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, float d7, svbfloat16x3_t x) {} -+void fn_f16x3 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, float d7, svfloat16x3_t x) {} -+void fn_f32x3 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, float d7, svfloat32x3_t x) {} -+void fn_f64x3 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, float d7, svfloat64x3_t x) {} -+ -+void fn_s8x4 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, float d7, svint8x4_t x) {} -+void fn_s16x4 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, float d7, svint16x4_t x) {} -+void fn_s32x4 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, float d7, svint32x4_t x) {} -+void fn_s64x4 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, float d7, svint64x4_t x) {} -+void fn_u8x4 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, float d7, svuint8x4_t x) {} -+void fn_u16x4 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, float d7, svuint16x4_t x) {} -+void fn_u32x4 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, float d7, svuint32x4_t x) {} -+void fn_u64x4 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, float d7, svuint64x4_t x) {} -+void fn_bf16x4 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, float d7, svbfloat16x4_t x) {} -+void fn_f16x4 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, float d7, svfloat16x4_t x) {} -+void fn_f32x4 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, float d7, svfloat32x4_t x) {} -+void fn_f64x4 (float d0, float d1, float d2, float d3, -+ float d4, float d5, float d6, float d7, svfloat64x4_t x) {} -+ -+/* { dg-final { scan-assembler-not {\t\.variant_pcs\t\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_1.c -new file mode 100644 -index 000000000..fd9932e2e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_1.c -@@ -0,0 +1,49 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -g" } */ -+/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ -+ -+#include -+ -+/* -+** callee_pred: -+** ldr (p[0-9]+), \[x0\] -+** ldr (p[0-9]+), \[x1\] -+** brkpa (p[0-7])\.b, p0/z, p1\.b, p2\.b -+** brkpb (p[0-7])\.b, \3/z, p3\.b, \1\.b -+** brka p0\.b, \4/z, \2\.b -+** ret -+*/ -+__SVBool_t __attribute__((noipa)) -+callee_pred (__SVBool_t p0, __SVBool_t p1, __SVBool_t p2, __SVBool_t p3, -+ __SVBool_t mem0, __SVBool_t mem1) -+{ -+ p0 = svbrkpa_z (p0, p1, p2); -+ p0 = svbrkpb_z (p0, p3, mem0); -+ return svbrka_z (p0, mem1); -+} -+ -+/* -+** caller_pred: -+** ... -+** ptrue (p[0-9]+)\.b, vl5 -+** str \1, \[x0\] -+** ... -+** ptrue (p[0-9]+)\.h, vl6 -+** str \2, \[x1\] -+** ptrue p3\.d, vl4 -+** ptrue p2\.s, vl3 -+** ptrue p1\.h, vl2 -+** ptrue p0\.b, vl1 -+** bl callee_pred -+** ... -+*/ -+__SVBool_t __attribute__((noipa)) -+caller_pred (void) -+{ -+ return callee_pred (svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3), -+ svptrue_pat_b64 (SV_VL4), -+ svptrue_pat_b8 (SV_VL5), -+ svptrue_pat_b16 (SV_VL6)); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_10.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_10.c -new file mode 100644 -index 000000000..1bbcb770d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_10.c -@@ -0,0 +1,33 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#include -+ -+/* -+** callee: -+** fadd s0, (s0, s6|s6, s0) -+** ret -+*/ -+float __attribute__((noipa)) -+callee (float s0, double d1, svfloat32x4_t z2, svfloat64x4_t stack1, -+ float s6, double d7) -+{ -+ return s0 + s6; -+} -+ -+float __attribute__((noipa)) -+caller (float32_t *x0, float64_t *x1) -+{ -+ return callee (0.0f, 1.0, -+ svld4 (svptrue_b8 (), x0), -+ svld4 (svptrue_b8 (), x1), -+ 6.0f, 7.0); -+} -+ -+/* { dg-final { scan-assembler {\tld4w\t{z2\.s - z5\.s}, p[0-7]/z, \[x0\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4d\t{z[0-9]+\.d - z[0-9]+\.d}, p[0-7]/z, \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tmovi\tv0\.[24]s, #0\n} } } */ -+/* { dg-final { scan-assembler {\tfmov\td1, #?1\.0} } } */ -+/* { dg-final { scan-assembler {\tfmov\ts6, #?6\.0} } } */ -+/* { dg-final { scan-assembler {\tfmov\td7, #?7\.0} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_11_nosc.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_11_nosc.c -new file mode 100644 -index 000000000..0f62e0b08 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_11_nosc.c -@@ -0,0 +1,61 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O0 -g" } */ -+ -+#include -+ -+void __attribute__((noipa)) -+callee (svbool_t p, svint8_t s8, svuint16x4_t u16, svfloat32x3_t f32, -+ svint64x2_t s64) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ -+ if (svptest_any (pg, sveor_z (pg, p, svptrue_pat_b8 (SV_VL7)))) -+ __builtin_abort (); -+ -+ if (svptest_any (pg, svcmpne (pg, s8, svindex_s8 (1, 2)))) -+ __builtin_abort (); -+ -+ if (svptest_any (pg, svcmpne (pg, svget4 (u16, 0), svindex_u16 (2, 3)))) -+ __builtin_abort (); -+ -+ if (svptest_any (pg, svcmpne (pg, svget4 (u16, 1), svindex_u16 (3, 4)))) -+ __builtin_abort (); -+ -+ if (svptest_any (pg, svcmpne (pg, svget4 (u16, 2), svindex_u16 (4, 5)))) -+ __builtin_abort (); -+ -+ if (svptest_any (pg, svcmpne (pg, svget4 (u16, 3), svindex_u16 (5, 6)))) -+ __builtin_abort (); -+ -+ if (svptest_any (pg, svcmpne (pg, svget3 (f32, 0), svdup_f32 (1.0)))) -+ __builtin_abort (); -+ -+ if (svptest_any (pg, svcmpne (pg, svget3 (f32, 1), svdup_f32 (2.0)))) -+ __builtin_abort (); -+ -+ if (svptest_any (pg, svcmpne (pg, svget3 (f32, 2), svdup_f32 (3.0)))) -+ __builtin_abort (); -+ -+ if (svptest_any (pg, svcmpne (pg, svget2 (s64, 0), svindex_s64 (6, 7)))) -+ __builtin_abort (); -+ -+ if (svptest_any (pg, svcmpne (pg, svget2 (s64, 1), svindex_s64 (7, 8)))) -+ __builtin_abort (); -+} -+ -+int __attribute__((noipa)) -+main (void) -+{ -+ callee (svptrue_pat_b8 (SV_VL7), -+ svindex_s8 (1, 2), -+ svcreate4 (svindex_u16 (2, 3), -+ svindex_u16 (3, 4), -+ svindex_u16 (4, 5), -+ svindex_u16 (5, 6)), -+ svcreate3 (svdup_f32 (1.0), -+ svdup_f32 (2.0), -+ svdup_f32 (3.0)), -+ svcreate2 (svindex_s64 (6, 7), -+ svindex_s64 (7, 8))); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_11_sc.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_11_sc.c -new file mode 100644 -index 000000000..8a98d58ce ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_11_sc.c -@@ -0,0 +1,61 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O0 -fstack-clash-protection -g" } */ -+ -+#include -+ -+void __attribute__((noipa)) -+callee (svbool_t p, svint8_t s8, svuint16x4_t u16, svfloat32x3_t f32, -+ svint64x2_t s64) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ -+ if (svptest_any (pg, sveor_z (pg, p, svptrue_pat_b8 (SV_VL7)))) -+ __builtin_abort (); -+ -+ if (svptest_any (pg, svcmpne (pg, s8, svindex_s8 (1, 2)))) -+ __builtin_abort (); -+ -+ if (svptest_any (pg, svcmpne (pg, svget4 (u16, 0), svindex_u16 (2, 3)))) -+ __builtin_abort (); -+ -+ if (svptest_any (pg, svcmpne (pg, svget4 (u16, 1), svindex_u16 (3, 4)))) -+ __builtin_abort (); -+ -+ if (svptest_any (pg, svcmpne (pg, svget4 (u16, 2), svindex_u16 (4, 5)))) -+ __builtin_abort (); -+ -+ if (svptest_any (pg, svcmpne (pg, svget4 (u16, 3), svindex_u16 (5, 6)))) -+ __builtin_abort (); -+ -+ if (svptest_any (pg, svcmpne (pg, svget3 (f32, 0), svdup_f32 (1.0)))) -+ __builtin_abort (); -+ -+ if (svptest_any (pg, svcmpne (pg, svget3 (f32, 1), svdup_f32 (2.0)))) -+ __builtin_abort (); -+ -+ if (svptest_any (pg, svcmpne (pg, svget3 (f32, 2), svdup_f32 (3.0)))) -+ __builtin_abort (); -+ -+ if (svptest_any (pg, svcmpne (pg, svget2 (s64, 0), svindex_s64 (6, 7)))) -+ __builtin_abort (); -+ -+ if (svptest_any (pg, svcmpne (pg, svget2 (s64, 1), svindex_s64 (7, 8)))) -+ __builtin_abort (); -+} -+ -+int __attribute__((noipa)) -+main (void) -+{ -+ callee (svptrue_pat_b8 (SV_VL7), -+ svindex_s8 (1, 2), -+ svcreate4 (svindex_u16 (2, 3), -+ svindex_u16 (3, 4), -+ svindex_u16 (4, 5), -+ svindex_u16 (5, 6)), -+ svcreate3 (svdup_f32 (1.0), -+ svdup_f32 (2.0), -+ svdup_f32 (3.0)), -+ svcreate2 (svindex_s64 (6, 7), -+ svindex_s64 (7, 8))); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_2.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_2.c -new file mode 100644 -index 000000000..43a50887d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_2.c -@@ -0,0 +1,70 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ -+ -+#include -+ -+/* -+** callee_int: -+** ptrue p3\.b, all -+** ld1b (z(?:2[4-9]|3[0-1]).b), p3/z, \[x4\] -+** st1b \1, p2, \[x0\] -+** st1b z4\.b, p1, \[x0\] -+** st1h z5\.h, p1, \[x1\] -+** st1w z6\.s, p1, \[x2\] -+** st1d z7\.d, p1, \[x3\] -+** st1b z0\.b, p0, \[x0\] -+** st1h z1\.h, p0, \[x1\] -+** st1w z2\.s, p0, \[x2\] -+** st1d z3\.d, p0, \[x3\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee_int (int8_t *x0, int16_t *x1, int32_t *x2, int64_t *x3, -+ svint8_t z0, svint16_t z1, svint32_t z2, svint64_t z3, -+ svint8_t z4, svint16_t z5, svint32_t z6, svint64_t z7, -+ svint8_t z8, -+ svbool_t p0, svbool_t p1, svbool_t p2) -+{ -+ svst1 (p2, x0, z8); -+ svst1 (p1, x0, z4); -+ svst1 (p1, x1, z5); -+ svst1 (p1, x2, z6); -+ svst1 (p1, x3, z7); -+ svst1 (p0, x0, z0); -+ svst1 (p0, x1, z1); -+ svst1 (p0, x2, z2); -+ svst1 (p0, x3, z3); -+} -+ -+void __attribute__((noipa)) -+caller_int (int8_t *x0, int16_t *x1, int32_t *x2, int64_t *x3) -+{ -+ callee_int (x0, x1, x2, x3, -+ svdup_s8 (0), -+ svdup_s16 (1), -+ svdup_s32 (2), -+ svdup_s64 (3), -+ svdup_s8 (4), -+ svdup_s16 (5), -+ svdup_s32 (6), -+ svdup_s64 (7), -+ svdup_s8 (8), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3)); -+} -+ -+/* { dg-final { scan-assembler {\tmov\tz0\.b, #0\n} } } */ -+/* { dg-final { scan-assembler {\tmov\tz1\.h, #1\n} } } */ -+/* { dg-final { scan-assembler {\tmov\tz2\.s, #2\n} } } */ -+/* { dg-final { scan-assembler {\tmov\tz3\.d, #3\n} } } */ -+/* { dg-final { scan-assembler {\tmov\tz4\.b, #4\n} } } */ -+/* { dg-final { scan-assembler {\tmov\tz5\.h, #5\n} } } */ -+/* { dg-final { scan-assembler {\tmov\tz6\.s, #6\n} } } */ -+/* { dg-final { scan-assembler {\tmov\tz7\.d, #7\n} } } */ -+/* { dg-final { scan-assembler {\tmov\tx4, sp\n} } } */ -+/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.b), #8\n.*\tst1b\t\1, p[0-7], \[x4\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_3.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_3.c -new file mode 100644 -index 000000000..49fdfc984 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_3.c -@@ -0,0 +1,70 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ -+ -+#include -+ -+/* -+** callee_uint: -+** ptrue p3\.b, all -+** ld1b (z(?:2[4-9]|3[0-1]).b), p3/z, \[x4\] -+** st1b \1, p2, \[x0\] -+** st1b z4\.b, p1, \[x0\] -+** st1h z5\.h, p1, \[x1\] -+** st1w z6\.s, p1, \[x2\] -+** st1d z7\.d, p1, \[x3\] -+** st1b z0\.b, p0, \[x0\] -+** st1h z1\.h, p0, \[x1\] -+** st1w z2\.s, p0, \[x2\] -+** st1d z3\.d, p0, \[x3\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee_uint (uint8_t *x0, uint16_t *x1, uint32_t *x2, uint64_t *x3, -+ svuint8_t z0, svuint16_t z1, svuint32_t z2, svuint64_t z3, -+ svuint8_t z4, svuint16_t z5, svuint32_t z6, svuint64_t z7, -+ svuint8_t z8, -+ svbool_t p0, svbool_t p1, svbool_t p2) -+{ -+ svst1 (p2, x0, z8); -+ svst1 (p1, x0, z4); -+ svst1 (p1, x1, z5); -+ svst1 (p1, x2, z6); -+ svst1 (p1, x3, z7); -+ svst1 (p0, x0, z0); -+ svst1 (p0, x1, z1); -+ svst1 (p0, x2, z2); -+ svst1 (p0, x3, z3); -+} -+ -+void __attribute__((noipa)) -+caller_uint (uint8_t *x0, uint16_t *x1, uint32_t *x2, uint64_t *x3) -+{ -+ callee_uint (x0, x1, x2, x3, -+ svdup_u8 (0), -+ svdup_u16 (1), -+ svdup_u32 (2), -+ svdup_u64 (3), -+ svdup_u8 (4), -+ svdup_u16 (5), -+ svdup_u32 (6), -+ svdup_u64 (7), -+ svdup_u8 (8), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3)); -+} -+ -+/* { dg-final { scan-assembler {\tmov\tz0\.b, #0\n} } } */ -+/* { dg-final { scan-assembler {\tmov\tz1\.h, #1\n} } } */ -+/* { dg-final { scan-assembler {\tmov\tz2\.s, #2\n} } } */ -+/* { dg-final { scan-assembler {\tmov\tz3\.d, #3\n} } } */ -+/* { dg-final { scan-assembler {\tmov\tz4\.b, #4\n} } } */ -+/* { dg-final { scan-assembler {\tmov\tz5\.h, #5\n} } } */ -+/* { dg-final { scan-assembler {\tmov\tz6\.s, #6\n} } } */ -+/* { dg-final { scan-assembler {\tmov\tz7\.d, #7\n} } } */ -+/* { dg-final { scan-assembler {\tmov\tx4, sp\n} } } */ -+/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.b), #8\n.*\tst1b\t\1, p[0-7], \[x4\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_4.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_4.c -new file mode 100644 -index 000000000..4f15fdd50 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_4.c -@@ -0,0 +1,70 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ -+ -+#include -+ -+/* -+** callee_float: -+** ptrue p3\.b, all -+** ld1h (z(?:2[4-9]|3[0-1]).h), p3/z, \[x4\] -+** st1h \1, p2, \[x0\] -+** st1h z4\.h, p1, \[x0\] -+** st1h z5\.h, p1, \[x1\] -+** st1w z6\.s, p1, \[x2\] -+** st1d z7\.d, p1, \[x3\] -+** st1h z0\.h, p0, \[x0\] -+** st1h z1\.h, p0, \[x1\] -+** st1w z2\.s, p0, \[x2\] -+** st1d z3\.d, p0, \[x3\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee_float (float16_t *x0, float16_t *x1, float32_t *x2, float64_t *x3, -+ svfloat16_t z0, svfloat16_t z1, svfloat32_t z2, svfloat64_t z3, -+ svfloat16_t z4, svfloat16_t z5, svfloat32_t z6, svfloat64_t z7, -+ svfloat16_t z8, -+ svbool_t p0, svbool_t p1, svbool_t p2) -+{ -+ svst1 (p2, x0, z8); -+ svst1 (p1, x0, z4); -+ svst1 (p1, x1, z5); -+ svst1 (p1, x2, z6); -+ svst1 (p1, x3, z7); -+ svst1 (p0, x0, z0); -+ svst1 (p0, x1, z1); -+ svst1 (p0, x2, z2); -+ svst1 (p0, x3, z3); -+} -+ -+void __attribute__((noipa)) -+caller_float (float16_t *x0, float16_t *x1, float32_t *x2, float64_t *x3) -+{ -+ callee_float (x0, x1, x2, x3, -+ svdup_f16 (0), -+ svdup_f16 (1), -+ svdup_f32 (2), -+ svdup_f64 (3), -+ svdup_f16 (4), -+ svdup_f16 (5), -+ svdup_f32 (6), -+ svdup_f64 (7), -+ svdup_f16 (8), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3)); -+} -+ -+/* { dg-final { scan-assembler {\tmov\tz0\.[bhsd], #0\n} } } */ -+/* { dg-final { scan-assembler {\tfmov\tz1\.h, #1\.0} } } */ -+/* { dg-final { scan-assembler {\tfmov\tz2\.s, #2\.0} } } */ -+/* { dg-final { scan-assembler {\tfmov\tz3\.d, #3\.0} } } */ -+/* { dg-final { scan-assembler {\tfmov\tz4\.h, #4\.0} } } */ -+/* { dg-final { scan-assembler {\tfmov\tz5\.h, #5\.0} } } */ -+/* { dg-final { scan-assembler {\tfmov\tz6\.s, #6\.0} } } */ -+/* { dg-final { scan-assembler {\tfmov\tz7\.d, #7\.0} } } */ -+/* { dg-final { scan-assembler {\tmov\tx4, sp\n} } } */ -+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.h), #8\.0.*\tst1h\t\1, p[0-7], \[x4\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_bf16.c -new file mode 100644 -index 000000000..e9b63a45d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_bf16.c -@@ -0,0 +1,63 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** callee: -+** addvl sp, sp, #-1 -+** str p4, \[sp\] -+** ptrue p4\.b, all -+** ( -+** ld1h (z[0-9]+\.h), p4/z, \[x1, #1, mul vl\] -+** ld1h (z[0-9]+\.h), p4/z, \[x1\] -+** st2h {\2 - \1}, p0, \[x0\] -+** | -+** ld1h (z[0-9]+\.h), p4/z, \[x1\] -+** ld1h (z[0-9]+\.h), p4/z, \[x1, #1, mul vl\] -+** st2h {\3 - \4}, p0, \[x0\] -+** ) -+** st4h {z0\.h - z3\.h}, p1, \[x0\] -+** st3h {z4\.h - z6\.h}, p2, \[x0\] -+** st1h z7\.h, p3, \[x0\] -+** ldr p4, \[sp\] -+** addvl sp, sp, #1 -+** ret -+*/ -+void __attribute__((noipa)) -+callee (void *x0, svbfloat16x4_t z0, svbfloat16x3_t z4, svbfloat16x2_t stack, -+ svbfloat16_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) -+{ -+ svst2 (p0, x0, stack); -+ svst4 (p1, x0, z0); -+ svst3 (p2, x0, z4); -+ svst1_bf16 (p3, x0, z7); -+} -+ -+void __attribute__((noipa)) -+caller (void *x0) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ callee (x0, -+ svld4_vnum_bf16 (pg, x0, -8), -+ svld3_vnum_bf16 (pg, x0, -3), -+ svld2_vnum_bf16 (pg, x0, 0), -+ svld1_vnum_bf16 (pg, x0, 2), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3), -+ svptrue_pat_b64 (SV_VL4)); -+} -+ -+/* { dg-final { scan-assembler {\tld4h\t{z0\.h - z3\.h}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld3h\t{z4\.h - z6\.h}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld1h\tz7\.h, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */ -+/* { dg-final { scan-assembler {\tld2h\t{(z[0-9]+\.h) - z[0-9]+\.h}.*\tst1h\t\1, p[0-7], \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2h\t{z[0-9]+\.h - (z[0-9]+\.h)}.*\tst1h\t\1, p[0-7], \[x1, #1, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_f16.c -new file mode 100644 -index 000000000..4152f9125 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_f16.c -@@ -0,0 +1,63 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** callee: -+** addvl sp, sp, #-1 -+** str p4, \[sp\] -+** ptrue p4\.b, all -+** ( -+** ld1h (z[0-9]+\.h), p4/z, \[x1, #1, mul vl\] -+** ld1h (z[0-9]+\.h), p4/z, \[x1\] -+** st2h {\2 - \1}, p0, \[x0\] -+** | -+** ld1h (z[0-9]+\.h), p4/z, \[x1\] -+** ld1h (z[0-9]+\.h), p4/z, \[x1, #1, mul vl\] -+** st2h {\3 - \4}, p0, \[x0\] -+** ) -+** st4h {z0\.h - z3\.h}, p1, \[x0\] -+** st3h {z4\.h - z6\.h}, p2, \[x0\] -+** st1h z7\.h, p3, \[x0\] -+** ldr p4, \[sp\] -+** addvl sp, sp, #1 -+** ret -+*/ -+void __attribute__((noipa)) -+callee (void *x0, svfloat16x4_t z0, svfloat16x3_t z4, svfloat16x2_t stack, -+ svfloat16_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) -+{ -+ svst2 (p0, x0, stack); -+ svst4 (p1, x0, z0); -+ svst3 (p2, x0, z4); -+ svst1_f16 (p3, x0, z7); -+} -+ -+void __attribute__((noipa)) -+caller (void *x0) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ callee (x0, -+ svld4_vnum_f16 (pg, x0, -8), -+ svld3_vnum_f16 (pg, x0, -3), -+ svld2_vnum_f16 (pg, x0, 0), -+ svld1_vnum_f16 (pg, x0, 2), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3), -+ svptrue_pat_b64 (SV_VL4)); -+} -+ -+/* { dg-final { scan-assembler {\tld4h\t{z0\.h - z3\.h}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld3h\t{z4\.h - z6\.h}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld1h\tz7\.h, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */ -+/* { dg-final { scan-assembler {\tld2h\t{(z[0-9]+\.h) - z[0-9]+\.h}.*\tst1h\t\1, p[0-7], \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2h\t{z[0-9]+\.h - (z[0-9]+\.h)}.*\tst1h\t\1, p[0-7], \[x1, #1, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_f32.c -new file mode 100644 -index 000000000..0f78fac79 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_f32.c -@@ -0,0 +1,63 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** callee: -+** addvl sp, sp, #-1 -+** str p4, \[sp\] -+** ptrue p4\.b, all -+** ( -+** ld1w (z[0-9]+\.s), p4/z, \[x1, #1, mul vl\] -+** ld1w (z[0-9]+\.s), p4/z, \[x1\] -+** st2w {\2 - \1}, p0, \[x0\] -+** | -+** ld1w (z[0-9]+\.s), p4/z, \[x1\] -+** ld1w (z[0-9]+\.s), p4/z, \[x1, #1, mul vl\] -+** st2w {\3 - \4}, p0, \[x0\] -+** ) -+** st4w {z0\.s - z3\.s}, p1, \[x0\] -+** st3w {z4\.s - z6\.s}, p2, \[x0\] -+** st1w z7\.s, p3, \[x0\] -+** ldr p4, \[sp\] -+** addvl sp, sp, #1 -+** ret -+*/ -+void __attribute__((noipa)) -+callee (void *x0, svfloat32x4_t z0, svfloat32x3_t z4, svfloat32x2_t stack, -+ svfloat32_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) -+{ -+ svst2 (p0, x0, stack); -+ svst4 (p1, x0, z0); -+ svst3 (p2, x0, z4); -+ svst1_f32 (p3, x0, z7); -+} -+ -+void __attribute__((noipa)) -+caller (void *x0) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ callee (x0, -+ svld4_vnum_f32 (pg, x0, -8), -+ svld3_vnum_f32 (pg, x0, -3), -+ svld2_vnum_f32 (pg, x0, 0), -+ svld1_vnum_f32 (pg, x0, 2), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3), -+ svptrue_pat_b64 (SV_VL4)); -+} -+ -+/* { dg-final { scan-assembler {\tld4w\t{z0\.s - z3\.s}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld3w\t{z4\.s - z6\.s}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld1w\tz7\.s, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */ -+/* { dg-final { scan-assembler {\tld2w\t{(z[0-9]+\.s) - z[0-9]+\.s}.*\tst1w\t\1, p[0-7], \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2w\t{z[0-9]+\.s - (z[0-9]+\.s)}.*\tst1w\t\1, p[0-7], \[x1, #1, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_f64.c -new file mode 100644 -index 000000000..fe832d0d0 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_f64.c -@@ -0,0 +1,63 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** callee: -+** addvl sp, sp, #-1 -+** str p4, \[sp\] -+** ptrue p4\.b, all -+** ( -+** ld1d (z[0-9]+\.d), p4/z, \[x1, #1, mul vl\] -+** ld1d (z[0-9]+\.d), p4/z, \[x1\] -+** st2d {\2 - \1}, p0, \[x0\] -+** | -+** ld1d (z[0-9]+\.d), p4/z, \[x1\] -+** ld1d (z[0-9]+\.d), p4/z, \[x1, #1, mul vl\] -+** st2d {\3 - \4}, p0, \[x0\] -+** ) -+** st4d {z0\.d - z3\.d}, p1, \[x0\] -+** st3d {z4\.d - z6\.d}, p2, \[x0\] -+** st1d z7\.d, p3, \[x0\] -+** ldr p4, \[sp\] -+** addvl sp, sp, #1 -+** ret -+*/ -+void __attribute__((noipa)) -+callee (void *x0, svfloat64x4_t z0, svfloat64x3_t z4, svfloat64x2_t stack, -+ svfloat64_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) -+{ -+ svst2 (p0, x0, stack); -+ svst4 (p1, x0, z0); -+ svst3 (p2, x0, z4); -+ svst1_f64 (p3, x0, z7); -+} -+ -+void __attribute__((noipa)) -+caller (void *x0) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ callee (x0, -+ svld4_vnum_f64 (pg, x0, -8), -+ svld3_vnum_f64 (pg, x0, -3), -+ svld2_vnum_f64 (pg, x0, 0), -+ svld1_vnum_f64 (pg, x0, 2), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3), -+ svptrue_pat_b64 (SV_VL4)); -+} -+ -+/* { dg-final { scan-assembler {\tld4d\t{z0\.d - z3\.d}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld3d\t{z4\.d - z6\.d}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld1d\tz7\.d, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */ -+/* { dg-final { scan-assembler {\tld2d\t{(z[0-9]+\.d) - z[0-9]+\.d}.*\tst1d\t\1, p[0-7], \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2d\t{z[0-9]+\.d - (z[0-9]+\.d)}.*\tst1d\t\1, p[0-7], \[x1, #1, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_s16.c -new file mode 100644 -index 000000000..3f708e0f0 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_s16.c -@@ -0,0 +1,63 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** callee: -+** addvl sp, sp, #-1 -+** str p4, \[sp\] -+** ptrue p4\.b, all -+** ( -+** ld1h (z[0-9]+\.h), p4/z, \[x1, #1, mul vl\] -+** ld1h (z[0-9]+\.h), p4/z, \[x1\] -+** st2h {\2 - \1}, p0, \[x0\] -+** | -+** ld1h (z[0-9]+\.h), p4/z, \[x1\] -+** ld1h (z[0-9]+\.h), p4/z, \[x1, #1, mul vl\] -+** st2h {\3 - \4}, p0, \[x0\] -+** ) -+** st4h {z0\.h - z3\.h}, p1, \[x0\] -+** st3h {z4\.h - z6\.h}, p2, \[x0\] -+** st1h z7\.h, p3, \[x0\] -+** ldr p4, \[sp\] -+** addvl sp, sp, #1 -+** ret -+*/ -+void __attribute__((noipa)) -+callee (void *x0, svint16x4_t z0, svint16x3_t z4, svint16x2_t stack, -+ svint16_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) -+{ -+ svst2 (p0, x0, stack); -+ svst4 (p1, x0, z0); -+ svst3 (p2, x0, z4); -+ svst1_s16 (p3, x0, z7); -+} -+ -+void __attribute__((noipa)) -+caller (void *x0) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ callee (x0, -+ svld4_vnum_s16 (pg, x0, -8), -+ svld3_vnum_s16 (pg, x0, -3), -+ svld2_vnum_s16 (pg, x0, 0), -+ svld1_vnum_s16 (pg, x0, 2), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3), -+ svptrue_pat_b64 (SV_VL4)); -+} -+ -+/* { dg-final { scan-assembler {\tld4h\t{z0\.h - z3\.h}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld3h\t{z4\.h - z6\.h}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld1h\tz7\.h, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */ -+/* { dg-final { scan-assembler {\tld2h\t{(z[0-9]+\.h) - z[0-9]+\.h}.*\tst1h\t\1, p[0-7], \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2h\t{z[0-9]+\.h - (z[0-9]+\.h)}.*\tst1h\t\1, p[0-7], \[x1, #1, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_s32.c -new file mode 100644 -index 000000000..8c57190ea ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_s32.c -@@ -0,0 +1,63 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** callee: -+** addvl sp, sp, #-1 -+** str p4, \[sp\] -+** ptrue p4\.b, all -+** ( -+** ld1w (z[0-9]+\.s), p4/z, \[x1, #1, mul vl\] -+** ld1w (z[0-9]+\.s), p4/z, \[x1\] -+** st2w {\2 - \1}, p0, \[x0\] -+** | -+** ld1w (z[0-9]+\.s), p4/z, \[x1\] -+** ld1w (z[0-9]+\.s), p4/z, \[x1, #1, mul vl\] -+** st2w {\3 - \4}, p0, \[x0\] -+** ) -+** st4w {z0\.s - z3\.s}, p1, \[x0\] -+** st3w {z4\.s - z6\.s}, p2, \[x0\] -+** st1w z7\.s, p3, \[x0\] -+** ldr p4, \[sp\] -+** addvl sp, sp, #1 -+** ret -+*/ -+void __attribute__((noipa)) -+callee (void *x0, svint32x4_t z0, svint32x3_t z4, svint32x2_t stack, -+ svint32_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) -+{ -+ svst2 (p0, x0, stack); -+ svst4 (p1, x0, z0); -+ svst3 (p2, x0, z4); -+ svst1_s32 (p3, x0, z7); -+} -+ -+void __attribute__((noipa)) -+caller (void *x0) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ callee (x0, -+ svld4_vnum_s32 (pg, x0, -8), -+ svld3_vnum_s32 (pg, x0, -3), -+ svld2_vnum_s32 (pg, x0, 0), -+ svld1_vnum_s32 (pg, x0, 2), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3), -+ svptrue_pat_b64 (SV_VL4)); -+} -+ -+/* { dg-final { scan-assembler {\tld4w\t{z0\.s - z3\.s}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld3w\t{z4\.s - z6\.s}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld1w\tz7\.s, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */ -+/* { dg-final { scan-assembler {\tld2w\t{(z[0-9]+\.s) - z[0-9]+\.s}.*\tst1w\t\1, p[0-7], \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2w\t{z[0-9]+\.s - (z[0-9]+\.s)}.*\tst1w\t\1, p[0-7], \[x1, #1, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_s64.c -new file mode 100644 -index 000000000..e60d049fb ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_s64.c -@@ -0,0 +1,63 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** callee: -+** addvl sp, sp, #-1 -+** str p4, \[sp\] -+** ptrue p4\.b, all -+** ( -+** ld1d (z[0-9]+\.d), p4/z, \[x1, #1, mul vl\] -+** ld1d (z[0-9]+\.d), p4/z, \[x1\] -+** st2d {\2 - \1}, p0, \[x0\] -+** | -+** ld1d (z[0-9]+\.d), p4/z, \[x1\] -+** ld1d (z[0-9]+\.d), p4/z, \[x1, #1, mul vl\] -+** st2d {\3 - \4}, p0, \[x0\] -+** ) -+** st4d {z0\.d - z3\.d}, p1, \[x0\] -+** st3d {z4\.d - z6\.d}, p2, \[x0\] -+** st1d z7\.d, p3, \[x0\] -+** ldr p4, \[sp\] -+** addvl sp, sp, #1 -+** ret -+*/ -+void __attribute__((noipa)) -+callee (void *x0, svint64x4_t z0, svint64x3_t z4, svint64x2_t stack, -+ svint64_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) -+{ -+ svst2 (p0, x0, stack); -+ svst4 (p1, x0, z0); -+ svst3 (p2, x0, z4); -+ svst1_s64 (p3, x0, z7); -+} -+ -+void __attribute__((noipa)) -+caller (void *x0) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ callee (x0, -+ svld4_vnum_s64 (pg, x0, -8), -+ svld3_vnum_s64 (pg, x0, -3), -+ svld2_vnum_s64 (pg, x0, 0), -+ svld1_vnum_s64 (pg, x0, 2), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3), -+ svptrue_pat_b64 (SV_VL4)); -+} -+ -+/* { dg-final { scan-assembler {\tld4d\t{z0\.d - z3\.d}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld3d\t{z4\.d - z6\.d}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld1d\tz7\.d, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */ -+/* { dg-final { scan-assembler {\tld2d\t{(z[0-9]+\.d) - z[0-9]+\.d}.*\tst1d\t\1, p[0-7], \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2d\t{z[0-9]+\.d - (z[0-9]+\.d)}.*\tst1d\t\1, p[0-7], \[x1, #1, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_s8.c -new file mode 100644 -index 000000000..bc0058372 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_s8.c -@@ -0,0 +1,63 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** callee: -+** addvl sp, sp, #-1 -+** str p4, \[sp\] -+** ptrue p4\.b, all -+** ( -+** ld1b (z[0-9]+\.b), p4/z, \[x1, #1, mul vl\] -+** ld1b (z[0-9]+\.b), p4/z, \[x1\] -+** st2b {\2 - \1}, p0, \[x0\] -+** | -+** ld1b (z[0-9]+\.b), p4/z, \[x1\] -+** ld1b (z[0-9]+\.b), p4/z, \[x1, #1, mul vl\] -+** st2b {\3 - \4}, p0, \[x0\] -+** ) -+** st4b {z0\.b - z3\.b}, p1, \[x0\] -+** st3b {z4\.b - z6\.b}, p2, \[x0\] -+** st1b z7\.b, p3, \[x0\] -+** ldr p4, \[sp\] -+** addvl sp, sp, #1 -+** ret -+*/ -+void __attribute__((noipa)) -+callee (void *x0, svint8x4_t z0, svint8x3_t z4, svint8x2_t stack, -+ svint8_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) -+{ -+ svst2 (p0, x0, stack); -+ svst4 (p1, x0, z0); -+ svst3 (p2, x0, z4); -+ svst1_s8 (p3, x0, z7); -+} -+ -+void __attribute__((noipa)) -+caller (void *x0) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ callee (x0, -+ svld4_vnum_s8 (pg, x0, -8), -+ svld3_vnum_s8 (pg, x0, -3), -+ svld2_vnum_s8 (pg, x0, 0), -+ svld1_vnum_s8 (pg, x0, 2), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3), -+ svptrue_pat_b64 (SV_VL4)); -+} -+ -+/* { dg-final { scan-assembler {\tld4b\t{z0\.b - z3\.b}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld3b\t{z4\.b - z6\.b}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld1b\tz7\.b, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */ -+/* { dg-final { scan-assembler {\tld2b\t{(z[0-9]+\.b) - z[0-9]+\.b}.*\tst1b\t\1, p[0-7], \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2b\t{z[0-9]+\.b - (z[0-9]+\.b)}.*\tst1b\t\1, p[0-7], \[x1, #1, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_u16.c -new file mode 100644 -index 000000000..8aa651a41 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_u16.c -@@ -0,0 +1,63 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** callee: -+** addvl sp, sp, #-1 -+** str p4, \[sp\] -+** ptrue p4\.b, all -+** ( -+** ld1h (z[0-9]+\.h), p4/z, \[x1, #1, mul vl\] -+** ld1h (z[0-9]+\.h), p4/z, \[x1\] -+** st2h {\2 - \1}, p0, \[x0\] -+** | -+** ld1h (z[0-9]+\.h), p4/z, \[x1\] -+** ld1h (z[0-9]+\.h), p4/z, \[x1, #1, mul vl\] -+** st2h {\3 - \4}, p0, \[x0\] -+** ) -+** st4h {z0\.h - z3\.h}, p1, \[x0\] -+** st3h {z4\.h - z6\.h}, p2, \[x0\] -+** st1h z7\.h, p3, \[x0\] -+** ldr p4, \[sp\] -+** addvl sp, sp, #1 -+** ret -+*/ -+void __attribute__((noipa)) -+callee (void *x0, svuint16x4_t z0, svuint16x3_t z4, svuint16x2_t stack, -+ svuint16_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) -+{ -+ svst2 (p0, x0, stack); -+ svst4 (p1, x0, z0); -+ svst3 (p2, x0, z4); -+ svst1_u16 (p3, x0, z7); -+} -+ -+void __attribute__((noipa)) -+caller (void *x0) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ callee (x0, -+ svld4_vnum_u16 (pg, x0, -8), -+ svld3_vnum_u16 (pg, x0, -3), -+ svld2_vnum_u16 (pg, x0, 0), -+ svld1_vnum_u16 (pg, x0, 2), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3), -+ svptrue_pat_b64 (SV_VL4)); -+} -+ -+/* { dg-final { scan-assembler {\tld4h\t{z0\.h - z3\.h}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld3h\t{z4\.h - z6\.h}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld1h\tz7\.h, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */ -+/* { dg-final { scan-assembler {\tld2h\t{(z[0-9]+\.h) - z[0-9]+\.h}.*\tst1h\t\1, p[0-7], \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2h\t{z[0-9]+\.h - (z[0-9]+\.h)}.*\tst1h\t\1, p[0-7], \[x1, #1, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_u32.c -new file mode 100644 -index 000000000..9ea3066ed ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_u32.c -@@ -0,0 +1,63 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** callee: -+** addvl sp, sp, #-1 -+** str p4, \[sp\] -+** ptrue p4\.b, all -+** ( -+** ld1w (z[0-9]+\.s), p4/z, \[x1, #1, mul vl\] -+** ld1w (z[0-9]+\.s), p4/z, \[x1\] -+** st2w {\2 - \1}, p0, \[x0\] -+** | -+** ld1w (z[0-9]+\.s), p4/z, \[x1\] -+** ld1w (z[0-9]+\.s), p4/z, \[x1, #1, mul vl\] -+** st2w {\3 - \4}, p0, \[x0\] -+** ) -+** st4w {z0\.s - z3\.s}, p1, \[x0\] -+** st3w {z4\.s - z6\.s}, p2, \[x0\] -+** st1w z7\.s, p3, \[x0\] -+** ldr p4, \[sp\] -+** addvl sp, sp, #1 -+** ret -+*/ -+void __attribute__((noipa)) -+callee (void *x0, svuint32x4_t z0, svuint32x3_t z4, svuint32x2_t stack, -+ svuint32_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) -+{ -+ svst2 (p0, x0, stack); -+ svst4 (p1, x0, z0); -+ svst3 (p2, x0, z4); -+ svst1_u32 (p3, x0, z7); -+} -+ -+void __attribute__((noipa)) -+caller (void *x0) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ callee (x0, -+ svld4_vnum_u32 (pg, x0, -8), -+ svld3_vnum_u32 (pg, x0, -3), -+ svld2_vnum_u32 (pg, x0, 0), -+ svld1_vnum_u32 (pg, x0, 2), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3), -+ svptrue_pat_b64 (SV_VL4)); -+} -+ -+/* { dg-final { scan-assembler {\tld4w\t{z0\.s - z3\.s}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld3w\t{z4\.s - z6\.s}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld1w\tz7\.s, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */ -+/* { dg-final { scan-assembler {\tld2w\t{(z[0-9]+\.s) - z[0-9]+\.s}.*\tst1w\t\1, p[0-7], \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2w\t{z[0-9]+\.s - (z[0-9]+\.s)}.*\tst1w\t\1, p[0-7], \[x1, #1, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_u64.c -new file mode 100644 -index 000000000..b64f3b6d5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_u64.c -@@ -0,0 +1,63 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** callee: -+** addvl sp, sp, #-1 -+** str p4, \[sp\] -+** ptrue p4\.b, all -+** ( -+** ld1d (z[0-9]+\.d), p4/z, \[x1, #1, mul vl\] -+** ld1d (z[0-9]+\.d), p4/z, \[x1\] -+** st2d {\2 - \1}, p0, \[x0\] -+** | -+** ld1d (z[0-9]+\.d), p4/z, \[x1\] -+** ld1d (z[0-9]+\.d), p4/z, \[x1, #1, mul vl\] -+** st2d {\3 - \4}, p0, \[x0\] -+** ) -+** st4d {z0\.d - z3\.d}, p1, \[x0\] -+** st3d {z4\.d - z6\.d}, p2, \[x0\] -+** st1d z7\.d, p3, \[x0\] -+** ldr p4, \[sp\] -+** addvl sp, sp, #1 -+** ret -+*/ -+void __attribute__((noipa)) -+callee (void *x0, svuint64x4_t z0, svuint64x3_t z4, svuint64x2_t stack, -+ svuint64_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) -+{ -+ svst2 (p0, x0, stack); -+ svst4 (p1, x0, z0); -+ svst3 (p2, x0, z4); -+ svst1_u64 (p3, x0, z7); -+} -+ -+void __attribute__((noipa)) -+caller (void *x0) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ callee (x0, -+ svld4_vnum_u64 (pg, x0, -8), -+ svld3_vnum_u64 (pg, x0, -3), -+ svld2_vnum_u64 (pg, x0, 0), -+ svld1_vnum_u64 (pg, x0, 2), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3), -+ svptrue_pat_b64 (SV_VL4)); -+} -+ -+/* { dg-final { scan-assembler {\tld4d\t{z0\.d - z3\.d}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld3d\t{z4\.d - z6\.d}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld1d\tz7\.d, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */ -+/* { dg-final { scan-assembler {\tld2d\t{(z[0-9]+\.d) - z[0-9]+\.d}.*\tst1d\t\1, p[0-7], \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2d\t{z[0-9]+\.d - (z[0-9]+\.d)}.*\tst1d\t\1, p[0-7], \[x1, #1, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_u8.c -new file mode 100644 -index 000000000..5575673ae ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_be_u8.c -@@ -0,0 +1,63 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** callee: -+** addvl sp, sp, #-1 -+** str p4, \[sp\] -+** ptrue p4\.b, all -+** ( -+** ld1b (z[0-9]+\.b), p4/z, \[x1, #1, mul vl\] -+** ld1b (z[0-9]+\.b), p4/z, \[x1\] -+** st2b {\2 - \1}, p0, \[x0\] -+** | -+** ld1b (z[0-9]+\.b), p4/z, \[x1\] -+** ld1b (z[0-9]+\.b), p4/z, \[x1, #1, mul vl\] -+** st2b {\3 - \4}, p0, \[x0\] -+** ) -+** st4b {z0\.b - z3\.b}, p1, \[x0\] -+** st3b {z4\.b - z6\.b}, p2, \[x0\] -+** st1b z7\.b, p3, \[x0\] -+** ldr p4, \[sp\] -+** addvl sp, sp, #1 -+** ret -+*/ -+void __attribute__((noipa)) -+callee (void *x0, svuint8x4_t z0, svuint8x3_t z4, svuint8x2_t stack, -+ svuint8_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) -+{ -+ svst2 (p0, x0, stack); -+ svst4 (p1, x0, z0); -+ svst3 (p2, x0, z4); -+ svst1_u8 (p3, x0, z7); -+} -+ -+void __attribute__((noipa)) -+caller (void *x0) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ callee (x0, -+ svld4_vnum_u8 (pg, x0, -8), -+ svld3_vnum_u8 (pg, x0, -3), -+ svld2_vnum_u8 (pg, x0, 0), -+ svld1_vnum_u8 (pg, x0, 2), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3), -+ svptrue_pat_b64 (SV_VL4)); -+} -+ -+/* { dg-final { scan-assembler {\tld4b\t{z0\.b - z3\.b}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld3b\t{z4\.b - z6\.b}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld1b\tz7\.b, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */ -+/* { dg-final { scan-assembler {\tld2b\t{(z[0-9]+\.b) - z[0-9]+\.b}.*\tst1b\t\1, p[0-7], \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2b\t{z[0-9]+\.b - (z[0-9]+\.b)}.*\tst1b\t\1, p[0-7], \[x1, #1, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_bf16.c -new file mode 100644 -index 000000000..94d84df4a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_bf16.c -@@ -0,0 +1,58 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** callee: -+** ( -+** ldr (z[0-9]+), \[x1, #1, mul vl\] -+** ldr (z[0-9]+), \[x1\] -+** st2h {\2\.h - \1\.h}, p0, \[x0\] -+** | -+** ldr (z[0-9]+), \[x1\] -+** ldr (z[0-9]+), \[x1, #1, mul vl\] -+** st2h {\3\.h - \4\.h}, p0, \[x0\] -+** ) -+** st4h {z0\.h - z3\.h}, p1, \[x0\] -+** st3h {z4\.h - z6\.h}, p2, \[x0\] -+** st1h z7\.h, p3, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee (void *x0, svbfloat16x4_t z0, svbfloat16x3_t z4, svbfloat16x2_t stack, -+ svbfloat16_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) -+{ -+ svst2 (p0, x0, stack); -+ svst4 (p1, x0, z0); -+ svst3 (p2, x0, z4); -+ svst1_bf16 (p3, x0, z7); -+} -+ -+void __attribute__((noipa)) -+caller (void *x0) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ callee (x0, -+ svld4_vnum_bf16 (pg, x0, -8), -+ svld3_vnum_bf16 (pg, x0, -3), -+ svld2_vnum_bf16 (pg, x0, 0), -+ svld1_vnum_bf16 (pg, x0, 2), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3), -+ svptrue_pat_b64 (SV_VL4)); -+} -+ -+/* { dg-final { scan-assembler {\tld4h\t{z0\.h - z3\.h}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld3h\t{z4\.h - z6\.h}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld1h\tz7\.h, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */ -+/* { dg-final { scan-assembler {\tld2h\t{(z[0-9]+)\.h - z[0-9]+\.h}.*\tstr\t\1, \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2h\t{z[0-9]+\.h - (z[0-9]+)\.h}.*\tstr\t\1, \[x1, #1, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_f16.c -new file mode 100644 -index 000000000..6271365c7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_f16.c -@@ -0,0 +1,58 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** callee: -+** ( -+** ldr (z[0-9]+), \[x1, #1, mul vl\] -+** ldr (z[0-9]+), \[x1\] -+** st2h {\2\.h - \1\.h}, p0, \[x0\] -+** | -+** ldr (z[0-9]+), \[x1\] -+** ldr (z[0-9]+), \[x1, #1, mul vl\] -+** st2h {\3\.h - \4\.h}, p0, \[x0\] -+** ) -+** st4h {z0\.h - z3\.h}, p1, \[x0\] -+** st3h {z4\.h - z6\.h}, p2, \[x0\] -+** st1h z7\.h, p3, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee (void *x0, svfloat16x4_t z0, svfloat16x3_t z4, svfloat16x2_t stack, -+ svfloat16_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) -+{ -+ svst2 (p0, x0, stack); -+ svst4 (p1, x0, z0); -+ svst3 (p2, x0, z4); -+ svst1_f16 (p3, x0, z7); -+} -+ -+void __attribute__((noipa)) -+caller (void *x0) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ callee (x0, -+ svld4_vnum_f16 (pg, x0, -8), -+ svld3_vnum_f16 (pg, x0, -3), -+ svld2_vnum_f16 (pg, x0, 0), -+ svld1_vnum_f16 (pg, x0, 2), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3), -+ svptrue_pat_b64 (SV_VL4)); -+} -+ -+/* { dg-final { scan-assembler {\tld4h\t{z0\.h - z3\.h}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld3h\t{z4\.h - z6\.h}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld1h\tz7\.h, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */ -+/* { dg-final { scan-assembler {\tld2h\t{(z[0-9]+)\.h - z[0-9]+\.h}.*\tstr\t\1, \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2h\t{z[0-9]+\.h - (z[0-9]+)\.h}.*\tstr\t\1, \[x1, #1, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_f32.c -new file mode 100644 -index 000000000..ef89de216 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_f32.c -@@ -0,0 +1,58 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** callee: -+** ( -+** ldr (z[0-9]+), \[x1, #1, mul vl\] -+** ldr (z[0-9]+), \[x1\] -+** st2w {\2\.s - \1\.s}, p0, \[x0\] -+** | -+** ldr (z[0-9]+), \[x1\] -+** ldr (z[0-9]+), \[x1, #1, mul vl\] -+** st2w {\3\.s - \4\.s}, p0, \[x0\] -+** ) -+** st4w {z0\.s - z3\.s}, p1, \[x0\] -+** st3w {z4\.s - z6\.s}, p2, \[x0\] -+** st1w z7\.s, p3, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee (void *x0, svfloat32x4_t z0, svfloat32x3_t z4, svfloat32x2_t stack, -+ svfloat32_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) -+{ -+ svst2 (p0, x0, stack); -+ svst4 (p1, x0, z0); -+ svst3 (p2, x0, z4); -+ svst1_f32 (p3, x0, z7); -+} -+ -+void __attribute__((noipa)) -+caller (void *x0) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ callee (x0, -+ svld4_vnum_f32 (pg, x0, -8), -+ svld3_vnum_f32 (pg, x0, -3), -+ svld2_vnum_f32 (pg, x0, 0), -+ svld1_vnum_f32 (pg, x0, 2), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3), -+ svptrue_pat_b64 (SV_VL4)); -+} -+ -+/* { dg-final { scan-assembler {\tld4w\t{z0\.s - z3\.s}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld3w\t{z4\.s - z6\.s}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld1w\tz7\.s, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */ -+/* { dg-final { scan-assembler {\tld2w\t{(z[0-9]+)\.s - z[0-9]+\.s}.*\tstr\t\1, \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2w\t{z[0-9]+\.s - (z[0-9]+)\.s}.*\tstr\t\1, \[x1, #1, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_f64.c -new file mode 100644 -index 000000000..4eddf2d1f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_f64.c -@@ -0,0 +1,58 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** callee: -+** ( -+** ldr (z[0-9]+), \[x1, #1, mul vl\] -+** ldr (z[0-9]+), \[x1\] -+** st2d {\2\.d - \1\.d}, p0, \[x0\] -+** | -+** ldr (z[0-9]+), \[x1\] -+** ldr (z[0-9]+), \[x1, #1, mul vl\] -+** st2d {\3\.d - \4\.d}, p0, \[x0\] -+** ) -+** st4d {z0\.d - z3\.d}, p1, \[x0\] -+** st3d {z4\.d - z6\.d}, p2, \[x0\] -+** st1d z7\.d, p3, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee (void *x0, svfloat64x4_t z0, svfloat64x3_t z4, svfloat64x2_t stack, -+ svfloat64_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) -+{ -+ svst2 (p0, x0, stack); -+ svst4 (p1, x0, z0); -+ svst3 (p2, x0, z4); -+ svst1_f64 (p3, x0, z7); -+} -+ -+void __attribute__((noipa)) -+caller (void *x0) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ callee (x0, -+ svld4_vnum_f64 (pg, x0, -8), -+ svld3_vnum_f64 (pg, x0, -3), -+ svld2_vnum_f64 (pg, x0, 0), -+ svld1_vnum_f64 (pg, x0, 2), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3), -+ svptrue_pat_b64 (SV_VL4)); -+} -+ -+/* { dg-final { scan-assembler {\tld4d\t{z0\.d - z3\.d}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld3d\t{z4\.d - z6\.d}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld1d\tz7\.d, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */ -+/* { dg-final { scan-assembler {\tld2d\t{(z[0-9]+)\.d - z[0-9]+\.d}.*\tstr\t\1, \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2d\t{z[0-9]+\.d - (z[0-9]+)\.d}.*\tstr\t\1, \[x1, #1, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_s16.c -new file mode 100644 -index 000000000..a4b6af071 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_s16.c -@@ -0,0 +1,58 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** callee: -+** ( -+** ldr (z[0-9]+), \[x1, #1, mul vl\] -+** ldr (z[0-9]+), \[x1\] -+** st2h {\2\.h - \1\.h}, p0, \[x0\] -+** | -+** ldr (z[0-9]+), \[x1\] -+** ldr (z[0-9]+), \[x1, #1, mul vl\] -+** st2h {\3\.h - \4\.h}, p0, \[x0\] -+** ) -+** st4h {z0\.h - z3\.h}, p1, \[x0\] -+** st3h {z4\.h - z6\.h}, p2, \[x0\] -+** st1h z7\.h, p3, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee (void *x0, svint16x4_t z0, svint16x3_t z4, svint16x2_t stack, -+ svint16_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) -+{ -+ svst2 (p0, x0, stack); -+ svst4 (p1, x0, z0); -+ svst3 (p2, x0, z4); -+ svst1_s16 (p3, x0, z7); -+} -+ -+void __attribute__((noipa)) -+caller (void *x0) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ callee (x0, -+ svld4_vnum_s16 (pg, x0, -8), -+ svld3_vnum_s16 (pg, x0, -3), -+ svld2_vnum_s16 (pg, x0, 0), -+ svld1_vnum_s16 (pg, x0, 2), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3), -+ svptrue_pat_b64 (SV_VL4)); -+} -+ -+/* { dg-final { scan-assembler {\tld4h\t{z0\.h - z3\.h}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld3h\t{z4\.h - z6\.h}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld1h\tz7\.h, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */ -+/* { dg-final { scan-assembler {\tld2h\t{(z[0-9]+)\.h - z[0-9]+\.h}.*\tstr\t\1, \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2h\t{z[0-9]+\.h - (z[0-9]+)\.h}.*\tstr\t\1, \[x1, #1, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_s32.c -new file mode 100644 -index 000000000..60b58d6fc ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_s32.c -@@ -0,0 +1,58 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** callee: -+** ( -+** ldr (z[0-9]+), \[x1, #1, mul vl\] -+** ldr (z[0-9]+), \[x1\] -+** st2w {\2\.s - \1\.s}, p0, \[x0\] -+** | -+** ldr (z[0-9]+), \[x1\] -+** ldr (z[0-9]+), \[x1, #1, mul vl\] -+** st2w {\3\.s - \4\.s}, p0, \[x0\] -+** ) -+** st4w {z0\.s - z3\.s}, p1, \[x0\] -+** st3w {z4\.s - z6\.s}, p2, \[x0\] -+** st1w z7\.s, p3, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee (void *x0, svint32x4_t z0, svint32x3_t z4, svint32x2_t stack, -+ svint32_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) -+{ -+ svst2 (p0, x0, stack); -+ svst4 (p1, x0, z0); -+ svst3 (p2, x0, z4); -+ svst1_s32 (p3, x0, z7); -+} -+ -+void __attribute__((noipa)) -+caller (void *x0) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ callee (x0, -+ svld4_vnum_s32 (pg, x0, -8), -+ svld3_vnum_s32 (pg, x0, -3), -+ svld2_vnum_s32 (pg, x0, 0), -+ svld1_vnum_s32 (pg, x0, 2), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3), -+ svptrue_pat_b64 (SV_VL4)); -+} -+ -+/* { dg-final { scan-assembler {\tld4w\t{z0\.s - z3\.s}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld3w\t{z4\.s - z6\.s}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld1w\tz7\.s, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */ -+/* { dg-final { scan-assembler {\tld2w\t{(z[0-9]+)\.s - z[0-9]+\.s}.*\tstr\t\1, \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2w\t{z[0-9]+\.s - (z[0-9]+)\.s}.*\tstr\t\1, \[x1, #1, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_s64.c -new file mode 100644 -index 000000000..b6126aa4c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_s64.c -@@ -0,0 +1,58 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** callee: -+** ( -+** ldr (z[0-9]+), \[x1, #1, mul vl\] -+** ldr (z[0-9]+), \[x1\] -+** st2d {\2\.d - \1\.d}, p0, \[x0\] -+** | -+** ldr (z[0-9]+), \[x1\] -+** ldr (z[0-9]+), \[x1, #1, mul vl\] -+** st2d {\3\.d - \4\.d}, p0, \[x0\] -+** ) -+** st4d {z0\.d - z3\.d}, p1, \[x0\] -+** st3d {z4\.d - z6\.d}, p2, \[x0\] -+** st1d z7\.d, p3, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee (void *x0, svint64x4_t z0, svint64x3_t z4, svint64x2_t stack, -+ svint64_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) -+{ -+ svst2 (p0, x0, stack); -+ svst4 (p1, x0, z0); -+ svst3 (p2, x0, z4); -+ svst1_s64 (p3, x0, z7); -+} -+ -+void __attribute__((noipa)) -+caller (void *x0) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ callee (x0, -+ svld4_vnum_s64 (pg, x0, -8), -+ svld3_vnum_s64 (pg, x0, -3), -+ svld2_vnum_s64 (pg, x0, 0), -+ svld1_vnum_s64 (pg, x0, 2), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3), -+ svptrue_pat_b64 (SV_VL4)); -+} -+ -+/* { dg-final { scan-assembler {\tld4d\t{z0\.d - z3\.d}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld3d\t{z4\.d - z6\.d}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld1d\tz7\.d, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */ -+/* { dg-final { scan-assembler {\tld2d\t{(z[0-9]+)\.d - z[0-9]+\.d}.*\tstr\t\1, \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2d\t{z[0-9]+\.d - (z[0-9]+)\.d}.*\tstr\t\1, \[x1, #1, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_s8.c -new file mode 100644 -index 000000000..5c16c3c8f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_s8.c -@@ -0,0 +1,58 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** callee: -+** ( -+** ldr (z[0-9]+), \[x1, #1, mul vl\] -+** ldr (z[0-9]+), \[x1\] -+** st2b {\2\.b - \1\.b}, p0, \[x0\] -+** | -+** ldr (z[0-9]+), \[x1\] -+** ldr (z[0-9]+), \[x1, #1, mul vl\] -+** st2b {\3\.b - \4\.b}, p0, \[x0\] -+** ) -+** st4b {z0\.b - z3\.b}, p1, \[x0\] -+** st3b {z4\.b - z6\.b}, p2, \[x0\] -+** st1b z7\.b, p3, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee (void *x0, svint8x4_t z0, svint8x3_t z4, svint8x2_t stack, -+ svint8_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) -+{ -+ svst2 (p0, x0, stack); -+ svst4 (p1, x0, z0); -+ svst3 (p2, x0, z4); -+ svst1_s8 (p3, x0, z7); -+} -+ -+void __attribute__((noipa)) -+caller (void *x0) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ callee (x0, -+ svld4_vnum_s8 (pg, x0, -8), -+ svld3_vnum_s8 (pg, x0, -3), -+ svld2_vnum_s8 (pg, x0, 0), -+ svld1_vnum_s8 (pg, x0, 2), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3), -+ svptrue_pat_b64 (SV_VL4)); -+} -+ -+/* { dg-final { scan-assembler {\tld4b\t{z0\.b - z3\.b}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld3b\t{z4\.b - z6\.b}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld1b\tz7\.b, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */ -+/* { dg-final { scan-assembler {\tld2b\t{(z[0-9]+)\.b - z[0-9]+\.b}.*\tstr\t\1, \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2b\t{z[0-9]+\.b - (z[0-9]+)\.b}.*\tstr\t\1, \[x1, #1, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_u16.c -new file mode 100644 -index 000000000..2b9a90025 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_u16.c -@@ -0,0 +1,58 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** callee: -+** ( -+** ldr (z[0-9]+), \[x1, #1, mul vl\] -+** ldr (z[0-9]+), \[x1\] -+** st2h {\2\.h - \1\.h}, p0, \[x0\] -+** | -+** ldr (z[0-9]+), \[x1\] -+** ldr (z[0-9]+), \[x1, #1, mul vl\] -+** st2h {\3\.h - \4\.h}, p0, \[x0\] -+** ) -+** st4h {z0\.h - z3\.h}, p1, \[x0\] -+** st3h {z4\.h - z6\.h}, p2, \[x0\] -+** st1h z7\.h, p3, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee (void *x0, svuint16x4_t z0, svuint16x3_t z4, svuint16x2_t stack, -+ svuint16_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) -+{ -+ svst2 (p0, x0, stack); -+ svst4 (p1, x0, z0); -+ svst3 (p2, x0, z4); -+ svst1_u16 (p3, x0, z7); -+} -+ -+void __attribute__((noipa)) -+caller (void *x0) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ callee (x0, -+ svld4_vnum_u16 (pg, x0, -8), -+ svld3_vnum_u16 (pg, x0, -3), -+ svld2_vnum_u16 (pg, x0, 0), -+ svld1_vnum_u16 (pg, x0, 2), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3), -+ svptrue_pat_b64 (SV_VL4)); -+} -+ -+/* { dg-final { scan-assembler {\tld4h\t{z0\.h - z3\.h}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld3h\t{z4\.h - z6\.h}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld1h\tz7\.h, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */ -+/* { dg-final { scan-assembler {\tld2h\t{(z[0-9]+)\.h - z[0-9]+\.h}.*\tstr\t\1, \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2h\t{z[0-9]+\.h - (z[0-9]+)\.h}.*\tstr\t\1, \[x1, #1, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_u32.c -new file mode 100644 -index 000000000..2902f59b4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_u32.c -@@ -0,0 +1,58 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** callee: -+** ( -+** ldr (z[0-9]+), \[x1, #1, mul vl\] -+** ldr (z[0-9]+), \[x1\] -+** st2w {\2\.s - \1\.s}, p0, \[x0\] -+** | -+** ldr (z[0-9]+), \[x1\] -+** ldr (z[0-9]+), \[x1, #1, mul vl\] -+** st2w {\3\.s - \4\.s}, p0, \[x0\] -+** ) -+** st4w {z0\.s - z3\.s}, p1, \[x0\] -+** st3w {z4\.s - z6\.s}, p2, \[x0\] -+** st1w z7\.s, p3, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee (void *x0, svuint32x4_t z0, svuint32x3_t z4, svuint32x2_t stack, -+ svuint32_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) -+{ -+ svst2 (p0, x0, stack); -+ svst4 (p1, x0, z0); -+ svst3 (p2, x0, z4); -+ svst1_u32 (p3, x0, z7); -+} -+ -+void __attribute__((noipa)) -+caller (void *x0) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ callee (x0, -+ svld4_vnum_u32 (pg, x0, -8), -+ svld3_vnum_u32 (pg, x0, -3), -+ svld2_vnum_u32 (pg, x0, 0), -+ svld1_vnum_u32 (pg, x0, 2), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3), -+ svptrue_pat_b64 (SV_VL4)); -+} -+ -+/* { dg-final { scan-assembler {\tld4w\t{z0\.s - z3\.s}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld3w\t{z4\.s - z6\.s}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld1w\tz7\.s, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */ -+/* { dg-final { scan-assembler {\tld2w\t{(z[0-9]+)\.s - z[0-9]+\.s}.*\tstr\t\1, \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2w\t{z[0-9]+\.s - (z[0-9]+)\.s}.*\tstr\t\1, \[x1, #1, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_u64.c -new file mode 100644 -index 000000000..85b3cfdad ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_u64.c -@@ -0,0 +1,58 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** callee: -+** ( -+** ldr (z[0-9]+), \[x1, #1, mul vl\] -+** ldr (z[0-9]+), \[x1\] -+** st2d {\2\.d - \1\.d}, p0, \[x0\] -+** | -+** ldr (z[0-9]+), \[x1\] -+** ldr (z[0-9]+), \[x1, #1, mul vl\] -+** st2d {\3\.d - \4\.d}, p0, \[x0\] -+** ) -+** st4d {z0\.d - z3\.d}, p1, \[x0\] -+** st3d {z4\.d - z6\.d}, p2, \[x0\] -+** st1d z7\.d, p3, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee (void *x0, svuint64x4_t z0, svuint64x3_t z4, svuint64x2_t stack, -+ svuint64_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) -+{ -+ svst2 (p0, x0, stack); -+ svst4 (p1, x0, z0); -+ svst3 (p2, x0, z4); -+ svst1_u64 (p3, x0, z7); -+} -+ -+void __attribute__((noipa)) -+caller (void *x0) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ callee (x0, -+ svld4_vnum_u64 (pg, x0, -8), -+ svld3_vnum_u64 (pg, x0, -3), -+ svld2_vnum_u64 (pg, x0, 0), -+ svld1_vnum_u64 (pg, x0, 2), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3), -+ svptrue_pat_b64 (SV_VL4)); -+} -+ -+/* { dg-final { scan-assembler {\tld4d\t{z0\.d - z3\.d}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld3d\t{z4\.d - z6\.d}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld1d\tz7\.d, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */ -+/* { dg-final { scan-assembler {\tld2d\t{(z[0-9]+)\.d - z[0-9]+\.d}.*\tstr\t\1, \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2d\t{z[0-9]+\.d - (z[0-9]+)\.d}.*\tstr\t\1, \[x1, #1, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_u8.c -new file mode 100644 -index 000000000..f56acb693 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_5_le_u8.c -@@ -0,0 +1,58 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** callee: -+** ( -+** ldr (z[0-9]+), \[x1, #1, mul vl\] -+** ldr (z[0-9]+), \[x1\] -+** st2b {\2\.b - \1\.b}, p0, \[x0\] -+** | -+** ldr (z[0-9]+), \[x1\] -+** ldr (z[0-9]+), \[x1, #1, mul vl\] -+** st2b {\3\.b - \4\.b}, p0, \[x0\] -+** ) -+** st4b {z0\.b - z3\.b}, p1, \[x0\] -+** st3b {z4\.b - z6\.b}, p2, \[x0\] -+** st1b z7\.b, p3, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee (void *x0, svuint8x4_t z0, svuint8x3_t z4, svuint8x2_t stack, -+ svuint8_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) -+{ -+ svst2 (p0, x0, stack); -+ svst4 (p1, x0, z0); -+ svst3 (p2, x0, z4); -+ svst1_u8 (p3, x0, z7); -+} -+ -+void __attribute__((noipa)) -+caller (void *x0) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ callee (x0, -+ svld4_vnum_u8 (pg, x0, -8), -+ svld3_vnum_u8 (pg, x0, -3), -+ svld2_vnum_u8 (pg, x0, 0), -+ svld1_vnum_u8 (pg, x0, 2), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3), -+ svptrue_pat_b64 (SV_VL4)); -+} -+ -+/* { dg-final { scan-assembler {\tld4b\t{z0\.b - z3\.b}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld3b\t{z4\.b - z6\.b}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld1b\tz7\.b, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */ -+/* { dg-final { scan-assembler {\tld2b\t{(z[0-9]+)\.b - z[0-9]+\.b}.*\tstr\t\1, \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2b\t{z[0-9]+\.b - (z[0-9]+)\.b}.*\tstr\t\1, \[x1, #1, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_bf16.c -new file mode 100644 -index 000000000..84d2c406c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_bf16.c -@@ -0,0 +1,71 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** callee1: -+** ptrue p3\.b, all -+** ... -+** ld1h (z[0-9]+\.h), p3/z, \[x1, #3, mul vl\] -+** ... -+** st4h {z[0-9]+\.h - \1}, p0, \[x0\] -+** st2h {z3\.h - z4\.h}, p1, \[x0\] -+** st3h {z5\.h - z7\.h}, p2, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee1 (void *x0, svbfloat16x3_t z0, svbfloat16x2_t z3, svbfloat16x3_t z5, -+ svbfloat16x4_t stack1, svbfloat16_t stack2, svbool_t p0, -+ svbool_t p1, svbool_t p2) -+{ -+ svst4_bf16 (p0, x0, stack1); -+ svst2_bf16 (p1, x0, z3); -+ svst3_bf16 (p2, x0, z5); -+} -+ -+/* -+** callee2: -+** ptrue p3\.b, all -+** ld1h (z[0-9]+\.h), p3/z, \[x2\] -+** st1h \1, p0, \[x0\] -+** st2h {z3\.h - z4\.h}, p1, \[x0\] -+** st3h {z0\.h - z2\.h}, p2, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee2 (void *x0, svbfloat16x3_t z0, svbfloat16x2_t z3, svbfloat16x3_t z5, -+ svbfloat16x4_t stack1, svbfloat16_t stack2, svbool_t p0, -+ svbool_t p1, svbool_t p2) -+{ -+ svst1_bf16 (p0, x0, stack2); -+ svst2_bf16 (p1, x0, z3); -+ svst3_bf16 (p2, x0, z0); -+} -+ -+void __attribute__((noipa)) -+caller (void *x0) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ callee1 (x0, -+ svld3_vnum_bf16 (pg, x0, -9), -+ svld2_vnum_bf16 (pg, x0, -2), -+ svld3_vnum_bf16 (pg, x0, 0), -+ svld4_vnum_bf16 (pg, x0, 8), -+ svld1_vnum_bf16 (pg, x0, 5), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3)); -+} -+ -+/* { dg-final { scan-assembler {\tld3h\t{z0\.h - z2\.h}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2h\t{z3\.h - z4\.h}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld3h\t{z5\.h - z7\.h}, p[0-7]/z, \[x0\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4h\t{(z[0-9]+\.h) - z[0-9]+\.h}.*\tst1h\t\1, p[0-7], \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4h\t{z[0-9]+\.h - (z[0-9]+\.h)}.*\tst1h\t\1, p[0-7], \[x1, #3, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+\.h), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1h\t\1, p[0-7], \[x2\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_f16.c -new file mode 100644 -index 000000000..dd4ccc3b2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_f16.c -@@ -0,0 +1,71 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** callee1: -+** ptrue p3\.b, all -+** ... -+** ld1h (z[0-9]+\.h), p3/z, \[x1, #3, mul vl\] -+** ... -+** st4h {z[0-9]+\.h - \1}, p0, \[x0\] -+** st2h {z3\.h - z4\.h}, p1, \[x0\] -+** st3h {z5\.h - z7\.h}, p2, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee1 (void *x0, svfloat16x3_t z0, svfloat16x2_t z3, svfloat16x3_t z5, -+ svfloat16x4_t stack1, svfloat16_t stack2, svbool_t p0, -+ svbool_t p1, svbool_t p2) -+{ -+ svst4_f16 (p0, x0, stack1); -+ svst2_f16 (p1, x0, z3); -+ svst3_f16 (p2, x0, z5); -+} -+ -+/* -+** callee2: -+** ptrue p3\.b, all -+** ld1h (z[0-9]+\.h), p3/z, \[x2\] -+** st1h \1, p0, \[x0\] -+** st2h {z3\.h - z4\.h}, p1, \[x0\] -+** st3h {z0\.h - z2\.h}, p2, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee2 (void *x0, svfloat16x3_t z0, svfloat16x2_t z3, svfloat16x3_t z5, -+ svfloat16x4_t stack1, svfloat16_t stack2, svbool_t p0, -+ svbool_t p1, svbool_t p2) -+{ -+ svst1_f16 (p0, x0, stack2); -+ svst2_f16 (p1, x0, z3); -+ svst3_f16 (p2, x0, z0); -+} -+ -+void __attribute__((noipa)) -+caller (void *x0) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ callee1 (x0, -+ svld3_vnum_f16 (pg, x0, -9), -+ svld2_vnum_f16 (pg, x0, -2), -+ svld3_vnum_f16 (pg, x0, 0), -+ svld4_vnum_f16 (pg, x0, 8), -+ svld1_vnum_f16 (pg, x0, 5), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3)); -+} -+ -+/* { dg-final { scan-assembler {\tld3h\t{z0\.h - z2\.h}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2h\t{z3\.h - z4\.h}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld3h\t{z5\.h - z7\.h}, p[0-7]/z, \[x0\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4h\t{(z[0-9]+\.h) - z[0-9]+\.h}.*\tst1h\t\1, p[0-7], \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4h\t{z[0-9]+\.h - (z[0-9]+\.h)}.*\tst1h\t\1, p[0-7], \[x1, #3, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+\.h), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1h\t\1, p[0-7], \[x2\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_f32.c -new file mode 100644 -index 000000000..26ea2a308 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_f32.c -@@ -0,0 +1,71 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** callee1: -+** ptrue p3\.b, all -+** ... -+** ld1w (z[0-9]+\.s), p3/z, \[x1, #3, mul vl\] -+** ... -+** st4w {z[0-9]+\.s - \1}, p0, \[x0\] -+** st2w {z3\.s - z4\.s}, p1, \[x0\] -+** st3w {z5\.s - z7\.s}, p2, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee1 (void *x0, svfloat32x3_t z0, svfloat32x2_t z3, svfloat32x3_t z5, -+ svfloat32x4_t stack1, svfloat32_t stack2, svbool_t p0, -+ svbool_t p1, svbool_t p2) -+{ -+ svst4_f32 (p0, x0, stack1); -+ svst2_f32 (p1, x0, z3); -+ svst3_f32 (p2, x0, z5); -+} -+ -+/* -+** callee2: -+** ptrue p3\.b, all -+** ld1w (z[0-9]+\.s), p3/z, \[x2\] -+** st1w \1, p0, \[x0\] -+** st2w {z3\.s - z4\.s}, p1, \[x0\] -+** st3w {z0\.s - z2\.s}, p2, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee2 (void *x0, svfloat32x3_t z0, svfloat32x2_t z3, svfloat32x3_t z5, -+ svfloat32x4_t stack1, svfloat32_t stack2, svbool_t p0, -+ svbool_t p1, svbool_t p2) -+{ -+ svst1_f32 (p0, x0, stack2); -+ svst2_f32 (p1, x0, z3); -+ svst3_f32 (p2, x0, z0); -+} -+ -+void __attribute__((noipa)) -+caller (void *x0) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ callee1 (x0, -+ svld3_vnum_f32 (pg, x0, -9), -+ svld2_vnum_f32 (pg, x0, -2), -+ svld3_vnum_f32 (pg, x0, 0), -+ svld4_vnum_f32 (pg, x0, 8), -+ svld1_vnum_f32 (pg, x0, 5), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3)); -+} -+ -+/* { dg-final { scan-assembler {\tld3w\t{z0\.s - z2\.s}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2w\t{z3\.s - z4\.s}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld3w\t{z5\.s - z7\.s}, p[0-7]/z, \[x0\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4w\t{(z[0-9]+\.s) - z[0-9]+\.s}.*\tst1w\t\1, p[0-7], \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4w\t{z[0-9]+\.s - (z[0-9]+\.s)}.*\tst1w\t\1, p[0-7], \[x1, #3, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+\.s), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1w\t\1, p[0-7], \[x2\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_f64.c -new file mode 100644 -index 000000000..62aded51c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_f64.c -@@ -0,0 +1,71 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** callee1: -+** ptrue p3\.b, all -+** ... -+** ld1d (z[0-9]+\.d), p3/z, \[x1, #3, mul vl\] -+** ... -+** st4d {z[0-9]+\.d - \1}, p0, \[x0\] -+** st2d {z3\.d - z4\.d}, p1, \[x0\] -+** st3d {z5\.d - z7\.d}, p2, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee1 (void *x0, svfloat64x3_t z0, svfloat64x2_t z3, svfloat64x3_t z5, -+ svfloat64x4_t stack1, svfloat64_t stack2, svbool_t p0, -+ svbool_t p1, svbool_t p2) -+{ -+ svst4_f64 (p0, x0, stack1); -+ svst2_f64 (p1, x0, z3); -+ svst3_f64 (p2, x0, z5); -+} -+ -+/* -+** callee2: -+** ptrue p3\.b, all -+** ld1d (z[0-9]+\.d), p3/z, \[x2\] -+** st1d \1, p0, \[x0\] -+** st2d {z3\.d - z4\.d}, p1, \[x0\] -+** st3d {z0\.d - z2\.d}, p2, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee2 (void *x0, svfloat64x3_t z0, svfloat64x2_t z3, svfloat64x3_t z5, -+ svfloat64x4_t stack1, svfloat64_t stack2, svbool_t p0, -+ svbool_t p1, svbool_t p2) -+{ -+ svst1_f64 (p0, x0, stack2); -+ svst2_f64 (p1, x0, z3); -+ svst3_f64 (p2, x0, z0); -+} -+ -+void __attribute__((noipa)) -+caller (void *x0) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ callee1 (x0, -+ svld3_vnum_f64 (pg, x0, -9), -+ svld2_vnum_f64 (pg, x0, -2), -+ svld3_vnum_f64 (pg, x0, 0), -+ svld4_vnum_f64 (pg, x0, 8), -+ svld1_vnum_f64 (pg, x0, 5), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3)); -+} -+ -+/* { dg-final { scan-assembler {\tld3d\t{z0\.d - z2\.d}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2d\t{z3\.d - z4\.d}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld3d\t{z5\.d - z7\.d}, p[0-7]/z, \[x0\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4d\t{(z[0-9]+\.d) - z[0-9]+\.d}.*\tst1d\t\1, p[0-7], \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4d\t{z[0-9]+\.d - (z[0-9]+\.d)}.*\tst1d\t\1, p[0-7], \[x1, #3, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld1d\t(z[0-9]+\.d), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1d\t\1, p[0-7], \[x2\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_s16.c -new file mode 100644 -index 000000000..204ef9a92 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_s16.c -@@ -0,0 +1,71 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** callee1: -+** ptrue p3\.b, all -+** ... -+** ld1h (z[0-9]+\.h), p3/z, \[x1, #3, mul vl\] -+** ... -+** st4h {z[0-9]+\.h - \1}, p0, \[x0\] -+** st2h {z3\.h - z4\.h}, p1, \[x0\] -+** st3h {z5\.h - z7\.h}, p2, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee1 (void *x0, svint16x3_t z0, svint16x2_t z3, svint16x3_t z5, -+ svint16x4_t stack1, svint16_t stack2, svbool_t p0, -+ svbool_t p1, svbool_t p2) -+{ -+ svst4_s16 (p0, x0, stack1); -+ svst2_s16 (p1, x0, z3); -+ svst3_s16 (p2, x0, z5); -+} -+ -+/* -+** callee2: -+** ptrue p3\.b, all -+** ld1h (z[0-9]+\.h), p3/z, \[x2\] -+** st1h \1, p0, \[x0\] -+** st2h {z3\.h - z4\.h}, p1, \[x0\] -+** st3h {z0\.h - z2\.h}, p2, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee2 (void *x0, svint16x3_t z0, svint16x2_t z3, svint16x3_t z5, -+ svint16x4_t stack1, svint16_t stack2, svbool_t p0, -+ svbool_t p1, svbool_t p2) -+{ -+ svst1_s16 (p0, x0, stack2); -+ svst2_s16 (p1, x0, z3); -+ svst3_s16 (p2, x0, z0); -+} -+ -+void __attribute__((noipa)) -+caller (void *x0) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ callee1 (x0, -+ svld3_vnum_s16 (pg, x0, -9), -+ svld2_vnum_s16 (pg, x0, -2), -+ svld3_vnum_s16 (pg, x0, 0), -+ svld4_vnum_s16 (pg, x0, 8), -+ svld1_vnum_s16 (pg, x0, 5), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3)); -+} -+ -+/* { dg-final { scan-assembler {\tld3h\t{z0\.h - z2\.h}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2h\t{z3\.h - z4\.h}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld3h\t{z5\.h - z7\.h}, p[0-7]/z, \[x0\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4h\t{(z[0-9]+\.h) - z[0-9]+\.h}.*\tst1h\t\1, p[0-7], \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4h\t{z[0-9]+\.h - (z[0-9]+\.h)}.*\tst1h\t\1, p[0-7], \[x1, #3, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+\.h), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1h\t\1, p[0-7], \[x2\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_s32.c -new file mode 100644 -index 000000000..9ae4567a4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_s32.c -@@ -0,0 +1,71 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** callee1: -+** ptrue p3\.b, all -+** ... -+** ld1w (z[0-9]+\.s), p3/z, \[x1, #3, mul vl\] -+** ... -+** st4w {z[0-9]+\.s - \1\}, p0, \[x0\] -+** st2w {z3\.s - z4\.s}, p1, \[x0\] -+** st3w {z5\.s - z7\.s}, p2, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee1 (void *x0, svint32x3_t z0, svint32x2_t z3, svint32x3_t z5, -+ svint32x4_t stack1, svint32_t stack2, svbool_t p0, -+ svbool_t p1, svbool_t p2) -+{ -+ svst4_s32 (p0, x0, stack1); -+ svst2_s32 (p1, x0, z3); -+ svst3_s32 (p2, x0, z5); -+} -+ -+/* -+** callee2: -+** ptrue p3\.b, all -+** ld1w (z[0-9]+\.s), p3/z, \[x2\] -+** st1w \1, p0, \[x0\] -+** st2w {z3\.s - z4\.s}, p1, \[x0\] -+** st3w {z0\.s - z2\.s}, p2, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee2 (void *x0, svint32x3_t z0, svint32x2_t z3, svint32x3_t z5, -+ svint32x4_t stack1, svint32_t stack2, svbool_t p0, -+ svbool_t p1, svbool_t p2) -+{ -+ svst1_s32 (p0, x0, stack2); -+ svst2_s32 (p1, x0, z3); -+ svst3_s32 (p2, x0, z0); -+} -+ -+void __attribute__((noipa)) -+caller (void *x0) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ callee1 (x0, -+ svld3_vnum_s32 (pg, x0, -9), -+ svld2_vnum_s32 (pg, x0, -2), -+ svld3_vnum_s32 (pg, x0, 0), -+ svld4_vnum_s32 (pg, x0, 8), -+ svld1_vnum_s32 (pg, x0, 5), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3)); -+} -+ -+/* { dg-final { scan-assembler {\tld3w\t{z0\.s - z2\.s}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2w\t{z3\.s - z4\.s}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld3w\t{z5\.s - z7\.s}, p[0-7]/z, \[x0\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4w\t{(z[0-9]+\.s) - z[0-9]+\.s}.*\tst1w\t\1, p[0-7], \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4w\t{z[0-9]+\.s - (z[0-9]+\.s)}.*\tst1w\t\1, p[0-7], \[x1, #3, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+\.s), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1w\t\1, p[0-7], \[x2\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_s64.c -new file mode 100644 -index 000000000..0b8a2e213 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_s64.c -@@ -0,0 +1,71 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** callee1: -+** ptrue p3\.b, all -+** ... -+** ld1d (z[0-9]+\.d), p3/z, \[x1, #3, mul vl\] -+** ... -+** st4d {z[0-9]+\.d - \1}, p0, \[x0\] -+** st2d {z3\.d - z4\.d}, p1, \[x0\] -+** st3d {z5\.d - z7\.d}, p2, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee1 (void *x0, svint64x3_t z0, svint64x2_t z3, svint64x3_t z5, -+ svint64x4_t stack1, svint64_t stack2, svbool_t p0, -+ svbool_t p1, svbool_t p2) -+{ -+ svst4_s64 (p0, x0, stack1); -+ svst2_s64 (p1, x0, z3); -+ svst3_s64 (p2, x0, z5); -+} -+ -+/* -+** callee2: -+** ptrue p3\.b, all -+** ld1d (z[0-9]+\.d), p3/z, \[x2\] -+** st1d \1, p0, \[x0\] -+** st2d {z3\.d - z4\.d}, p1, \[x0\] -+** st3d {z0\.d - z2\.d}, p2, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee2 (void *x0, svint64x3_t z0, svint64x2_t z3, svint64x3_t z5, -+ svint64x4_t stack1, svint64_t stack2, svbool_t p0, -+ svbool_t p1, svbool_t p2) -+{ -+ svst1_s64 (p0, x0, stack2); -+ svst2_s64 (p1, x0, z3); -+ svst3_s64 (p2, x0, z0); -+} -+ -+void __attribute__((noipa)) -+caller (void *x0) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ callee1 (x0, -+ svld3_vnum_s64 (pg, x0, -9), -+ svld2_vnum_s64 (pg, x0, -2), -+ svld3_vnum_s64 (pg, x0, 0), -+ svld4_vnum_s64 (pg, x0, 8), -+ svld1_vnum_s64 (pg, x0, 5), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3)); -+} -+ -+/* { dg-final { scan-assembler {\tld3d\t{z0\.d - z2\.d}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2d\t{z3\.d - z4\.d}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld3d\t{z5\.d - z7\.d}, p[0-7]/z, \[x0\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4d\t{(z[0-9]+\.d) - z[0-9]+\.d}.*\tst1d\t\1, p[0-7], \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4d\t{z[0-9]+\.d - (z[0-9]+\.d)}.*\tst1d\t\1, p[0-7], \[x1, #3, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld1d\t(z[0-9]+\.d), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1d\t\1, p[0-7], \[x2\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_s8.c -new file mode 100644 -index 000000000..0afbe71aa ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_s8.c -@@ -0,0 +1,71 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** callee1: -+** ptrue p3\.b, all -+** ... -+** ld1b (z[0-9]+\.b), p3/z, \[x1, #3, mul vl\] -+** ... -+** st4b {z[0-9]+\.b - \1}, p0, \[x0\] -+** st2b {z3\.b - z4\.b}, p1, \[x0\] -+** st3b {z5\.b - z7\.b}, p2, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee1 (void *x0, svint8x3_t z0, svint8x2_t z3, svint8x3_t z5, -+ svint8x4_t stack1, svint8_t stack2, svbool_t p0, -+ svbool_t p1, svbool_t p2) -+{ -+ svst4_s8 (p0, x0, stack1); -+ svst2_s8 (p1, x0, z3); -+ svst3_s8 (p2, x0, z5); -+} -+ -+/* -+** callee2: -+** ptrue p3\.b, all -+** ld1b (z[0-9]+\.b), p3/z, \[x2\] -+** st1b \1, p0, \[x0\] -+** st2b {z3\.b - z4\.b}, p1, \[x0\] -+** st3b {z0\.b - z2\.b}, p2, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee2 (void *x0, svint8x3_t z0, svint8x2_t z3, svint8x3_t z5, -+ svint8x4_t stack1, svint8_t stack2, svbool_t p0, -+ svbool_t p1, svbool_t p2) -+{ -+ svst1_s8 (p0, x0, stack2); -+ svst2_s8 (p1, x0, z3); -+ svst3_s8 (p2, x0, z0); -+} -+ -+void __attribute__((noipa)) -+caller (void *x0) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ callee1 (x0, -+ svld3_vnum_s8 (pg, x0, -9), -+ svld2_vnum_s8 (pg, x0, -2), -+ svld3_vnum_s8 (pg, x0, 0), -+ svld4_vnum_s8 (pg, x0, 8), -+ svld1_vnum_s8 (pg, x0, 5), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3)); -+} -+ -+/* { dg-final { scan-assembler {\tld3b\t{z0\.b - z2\.b}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2b\t{z3\.b - z4\.b}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld3b\t{z5\.b - z7\.b}, p[0-7]/z, \[x0\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4b\t{(z[0-9]+\.b) - z[0-9]+\.b}.*\tst1b\t\1, p[0-7], \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4b\t{z[0-9]+\.b - (z[0-9]+\.b)}.*\tst1b\t\1, p[0-7], \[x1, #3, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+\.b), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1b\t\1, p[0-7], \[x2\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_u16.c -new file mode 100644 -index 000000000..f010f5ebb ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_u16.c -@@ -0,0 +1,71 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** callee1: -+** ptrue p3\.b, all -+** ... -+** ld1h (z[0-9]+\.h), p3/z, \[x1, #3, mul vl\] -+** ... -+** st4h {z[0-9]+\.h - \1}, p0, \[x0\] -+** st2h {z3\.h - z4\.h}, p1, \[x0\] -+** st3h {z5\.h - z7\.h}, p2, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee1 (void *x0, svuint16x3_t z0, svuint16x2_t z3, svuint16x3_t z5, -+ svuint16x4_t stack1, svuint16_t stack2, svbool_t p0, -+ svbool_t p1, svbool_t p2) -+{ -+ svst4_u16 (p0, x0, stack1); -+ svst2_u16 (p1, x0, z3); -+ svst3_u16 (p2, x0, z5); -+} -+ -+/* -+** callee2: -+** ptrue p3\.b, all -+** ld1h (z[0-9]+\.h), p3/z, \[x2\] -+** st1h \1, p0, \[x0\] -+** st2h {z3\.h - z4\.h}, p1, \[x0\] -+** st3h {z0\.h - z2\.h}, p2, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee2 (void *x0, svuint16x3_t z0, svuint16x2_t z3, svuint16x3_t z5, -+ svuint16x4_t stack1, svuint16_t stack2, svbool_t p0, -+ svbool_t p1, svbool_t p2) -+{ -+ svst1_u16 (p0, x0, stack2); -+ svst2_u16 (p1, x0, z3); -+ svst3_u16 (p2, x0, z0); -+} -+ -+void __attribute__((noipa)) -+caller (void *x0) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ callee1 (x0, -+ svld3_vnum_u16 (pg, x0, -9), -+ svld2_vnum_u16 (pg, x0, -2), -+ svld3_vnum_u16 (pg, x0, 0), -+ svld4_vnum_u16 (pg, x0, 8), -+ svld1_vnum_u16 (pg, x0, 5), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3)); -+} -+ -+/* { dg-final { scan-assembler {\tld3h\t{z0\.h - z2\.h}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2h\t{z3\.h - z4\.h}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld3h\t{z5\.h - z7\.h}, p[0-7]/z, \[x0\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4h\t{(z[0-9]+\.h) - z[0-9]+\.h}.*\tst1h\t\1, p[0-7], \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4h\t{z[0-9]+\.h - (z[0-9]+\.h)}.*\tst1h\t\1, p[0-7], \[x1, #3, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+\.h), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1h\t\1, p[0-7], \[x2\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_u32.c -new file mode 100644 -index 000000000..60d903a31 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_u32.c -@@ -0,0 +1,71 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** callee1: -+** ptrue p3\.b, all -+** ... -+** ld1w (z[0-9]+\.s), p3/z, \[x1, #3, mul vl\] -+** ... -+** st4w {z[0-9]+\.s - \1}, p0, \[x0\] -+** st2w {z3\.s - z4\.s}, p1, \[x0\] -+** st3w {z5\.s - z7\.s}, p2, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee1 (void *x0, svuint32x3_t z0, svuint32x2_t z3, svuint32x3_t z5, -+ svuint32x4_t stack1, svuint32_t stack2, svbool_t p0, -+ svbool_t p1, svbool_t p2) -+{ -+ svst4_u32 (p0, x0, stack1); -+ svst2_u32 (p1, x0, z3); -+ svst3_u32 (p2, x0, z5); -+} -+ -+/* -+** callee2: -+** ptrue p3\.b, all -+** ld1w (z[0-9]+\.s), p3/z, \[x2\] -+** st1w \1, p0, \[x0\] -+** st2w {z3\.s - z4\.s}, p1, \[x0\] -+** st3w {z0\.s - z2\.s}, p2, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee2 (void *x0, svuint32x3_t z0, svuint32x2_t z3, svuint32x3_t z5, -+ svuint32x4_t stack1, svuint32_t stack2, svbool_t p0, -+ svbool_t p1, svbool_t p2) -+{ -+ svst1_u32 (p0, x0, stack2); -+ svst2_u32 (p1, x0, z3); -+ svst3_u32 (p2, x0, z0); -+} -+ -+void __attribute__((noipa)) -+caller (void *x0) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ callee1 (x0, -+ svld3_vnum_u32 (pg, x0, -9), -+ svld2_vnum_u32 (pg, x0, -2), -+ svld3_vnum_u32 (pg, x0, 0), -+ svld4_vnum_u32 (pg, x0, 8), -+ svld1_vnum_u32 (pg, x0, 5), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3)); -+} -+ -+/* { dg-final { scan-assembler {\tld3w\t{z0\.s - z2\.s}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2w\t{z3\.s - z4\.s}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld3w\t{z5\.s - z7\.s}, p[0-7]/z, \[x0\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4w\t{(z[0-9]+\.s) - z[0-9]+\.s}.*\tst1w\t\1, p[0-7], \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4w\t{z[0-9]+\.s - (z[0-9]+\.s)}.*\tst1w\t\1, p[0-7], \[x1, #3, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+\.s), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1w\t\1, p[0-7], \[x2\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_u64.c -new file mode 100644 -index 000000000..948f426f9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_u64.c -@@ -0,0 +1,71 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** callee1: -+** ptrue p3\.b, all -+** ... -+** ld1d (z[0-9]+\.d), p3/z, \[x1, #3, mul vl\] -+** ... -+** st4d {z[0-9]+\.d - \1}, p0, \[x0\] -+** st2d {z3\.d - z4\.d}, p1, \[x0\] -+** st3d {z5\.d - z7\.d}, p2, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee1 (void *x0, svuint64x3_t z0, svuint64x2_t z3, svuint64x3_t z5, -+ svuint64x4_t stack1, svuint64_t stack2, svbool_t p0, -+ svbool_t p1, svbool_t p2) -+{ -+ svst4_u64 (p0, x0, stack1); -+ svst2_u64 (p1, x0, z3); -+ svst3_u64 (p2, x0, z5); -+} -+ -+/* -+** callee2: -+** ptrue p3\.b, all -+** ld1d (z[0-9]+\.d), p3/z, \[x2\] -+** st1d \1, p0, \[x0\] -+** st2d {z3\.d - z4\.d}, p1, \[x0\] -+** st3d {z0\.d - z2\.d}, p2, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee2 (void *x0, svuint64x3_t z0, svuint64x2_t z3, svuint64x3_t z5, -+ svuint64x4_t stack1, svuint64_t stack2, svbool_t p0, -+ svbool_t p1, svbool_t p2) -+{ -+ svst1_u64 (p0, x0, stack2); -+ svst2_u64 (p1, x0, z3); -+ svst3_u64 (p2, x0, z0); -+} -+ -+void __attribute__((noipa)) -+caller (void *x0) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ callee1 (x0, -+ svld3_vnum_u64 (pg, x0, -9), -+ svld2_vnum_u64 (pg, x0, -2), -+ svld3_vnum_u64 (pg, x0, 0), -+ svld4_vnum_u64 (pg, x0, 8), -+ svld1_vnum_u64 (pg, x0, 5), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3)); -+} -+ -+/* { dg-final { scan-assembler {\tld3d\t{z0\.d - z2\.d}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2d\t{z3\.d - z4\.d}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld3d\t{z5\.d - z7\.d}, p[0-7]/z, \[x0\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4d\t{(z[0-9]+\.d) - z[0-9]+\.d}.*\tst1d\t\1, p[0-7], \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4d\t{z[0-9]+\.d - (z[0-9]+\.d)}.*\tst1d\t\1, p[0-7], \[x1, #3, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld1d\t(z[0-9]+\.d), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1d\t\1, p[0-7], \[x2\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_u8.c -new file mode 100644 -index 000000000..8049ec078 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_be_u8.c -@@ -0,0 +1,71 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** callee1: -+** ptrue p3\.b, all -+** ... -+** ld1b (z[0-9]+\.b), p3/z, \[x1, #3, mul vl\] -+** ... -+** st4b {z[0-9]+\.b - \1}, p0, \[x0\] -+** st2b {z3\.b - z4\.b}, p1, \[x0\] -+** st3b {z5\.b - z7\.b}, p2, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee1 (void *x0, svuint8x3_t z0, svuint8x2_t z3, svuint8x3_t z5, -+ svuint8x4_t stack1, svuint8_t stack2, svbool_t p0, -+ svbool_t p1, svbool_t p2) -+{ -+ svst4_u8 (p0, x0, stack1); -+ svst2_u8 (p1, x0, z3); -+ svst3_u8 (p2, x0, z5); -+} -+ -+/* -+** callee2: -+** ptrue p3\.b, all -+** ld1b (z[0-9]+\.b), p3/z, \[x2\] -+** st1b \1, p0, \[x0\] -+** st2b {z3\.b - z4\.b}, p1, \[x0\] -+** st3b {z0\.b - z2\.b}, p2, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee2 (void *x0, svuint8x3_t z0, svuint8x2_t z3, svuint8x3_t z5, -+ svuint8x4_t stack1, svuint8_t stack2, svbool_t p0, -+ svbool_t p1, svbool_t p2) -+{ -+ svst1_u8 (p0, x0, stack2); -+ svst2_u8 (p1, x0, z3); -+ svst3_u8 (p2, x0, z0); -+} -+ -+void __attribute__((noipa)) -+caller (void *x0) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ callee1 (x0, -+ svld3_vnum_u8 (pg, x0, -9), -+ svld2_vnum_u8 (pg, x0, -2), -+ svld3_vnum_u8 (pg, x0, 0), -+ svld4_vnum_u8 (pg, x0, 8), -+ svld1_vnum_u8 (pg, x0, 5), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3)); -+} -+ -+/* { dg-final { scan-assembler {\tld3b\t{z0\.b - z2\.b}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2b\t{z3\.b - z4\.b}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld3b\t{z5\.b - z7\.b}, p[0-7]/z, \[x0\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4b\t{(z[0-9]+\.b) - z[0-9]+\.b}.*\tst1b\t\1, p[0-7], \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4b\t{z[0-9]+\.b - (z[0-9]+\.b)}.*\tst1b\t\1, p[0-7], \[x1, #3, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+\.b), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1b\t\1, p[0-7], \[x2\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_bf16.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_bf16.c -new file mode 100644 -index 000000000..3dc9e42ed ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_bf16.c -@@ -0,0 +1,70 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** callee1: -+** ... -+** ldr (z[0-9]+), \[x1, #3, mul vl\] -+** ... -+** st4h {z[0-9]+\.h - \1\.h}, p0, \[x0\] -+** st2h {z3\.h - z4\.h}, p1, \[x0\] -+** st3h {z5\.h - z7\.h}, p2, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee1 (void *x0, svbfloat16x3_t z0, svbfloat16x2_t z3, svbfloat16x3_t z5, -+ svbfloat16x4_t stack1, svbfloat16_t stack2, svbool_t p0, -+ svbool_t p1, svbool_t p2) -+{ -+ svst4_bf16 (p0, x0, stack1); -+ svst2_bf16 (p1, x0, z3); -+ svst3_bf16 (p2, x0, z5); -+} -+ -+/* -+** callee2: -+** ptrue p3\.b, all -+** ld1h (z[0-9]+\.h), p3/z, \[x2\] -+** st1h \1, p0, \[x0\] -+** st2h {z3\.h - z4\.h}, p1, \[x0\] -+** st3h {z0\.h - z2\.h}, p2, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee2 (void *x0, svbfloat16x3_t z0, svbfloat16x2_t z3, svbfloat16x3_t z5, -+ svbfloat16x4_t stack1, svbfloat16_t stack2, svbool_t p0, -+ svbool_t p1, svbool_t p2) -+{ -+ svst1_bf16 (p0, x0, stack2); -+ svst2_bf16 (p1, x0, z3); -+ svst3_bf16 (p2, x0, z0); -+} -+ -+void __attribute__((noipa)) -+caller (void *x0) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ callee1 (x0, -+ svld3_vnum_bf16 (pg, x0, -9), -+ svld2_vnum_bf16 (pg, x0, -2), -+ svld3_vnum_bf16 (pg, x0, 0), -+ svld4_vnum_bf16 (pg, x0, 8), -+ svld1_vnum_bf16 (pg, x0, 5), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3)); -+} -+ -+/* { dg-final { scan-assembler {\tld3h\t{z0\.h - z2\.h}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2h\t{z3\.h - z4\.h}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld3h\t{z5\.h - z7\.h}, p[0-7]/z, \[x0\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4h\t{(z[0-9]+)\.h - z[0-9]+\.h}.*\tstr\t\1, \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4h\t{z[0-9]+\.h - (z[0-9]+)\.h}.*\tstr\t\1, \[x1, #3, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+\.h), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1h\t\1, p[0-7], \[x2\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_f16.c -new file mode 100644 -index 000000000..80a2e3aae ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_f16.c -@@ -0,0 +1,70 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** callee1: -+** ... -+** ldr (z[0-9]+), \[x1, #3, mul vl\] -+** ... -+** st4h {z[0-9]+\.h - \1\.h}, p0, \[x0\] -+** st2h {z3\.h - z4\.h}, p1, \[x0\] -+** st3h {z5\.h - z7\.h}, p2, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee1 (void *x0, svfloat16x3_t z0, svfloat16x2_t z3, svfloat16x3_t z5, -+ svfloat16x4_t stack1, svfloat16_t stack2, svbool_t p0, -+ svbool_t p1, svbool_t p2) -+{ -+ svst4_f16 (p0, x0, stack1); -+ svst2_f16 (p1, x0, z3); -+ svst3_f16 (p2, x0, z5); -+} -+ -+/* -+** callee2: -+** ptrue p3\.b, all -+** ld1h (z[0-9]+\.h), p3/z, \[x2\] -+** st1h \1, p0, \[x0\] -+** st2h {z3\.h - z4\.h}, p1, \[x0\] -+** st3h {z0\.h - z2\.h}, p2, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee2 (void *x0, svfloat16x3_t z0, svfloat16x2_t z3, svfloat16x3_t z5, -+ svfloat16x4_t stack1, svfloat16_t stack2, svbool_t p0, -+ svbool_t p1, svbool_t p2) -+{ -+ svst1_f16 (p0, x0, stack2); -+ svst2_f16 (p1, x0, z3); -+ svst3_f16 (p2, x0, z0); -+} -+ -+void __attribute__((noipa)) -+caller (void *x0) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ callee1 (x0, -+ svld3_vnum_f16 (pg, x0, -9), -+ svld2_vnum_f16 (pg, x0, -2), -+ svld3_vnum_f16 (pg, x0, 0), -+ svld4_vnum_f16 (pg, x0, 8), -+ svld1_vnum_f16 (pg, x0, 5), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3)); -+} -+ -+/* { dg-final { scan-assembler {\tld3h\t{z0\.h - z2\.h}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2h\t{z3\.h - z4\.h}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld3h\t{z5\.h - z7\.h}, p[0-7]/z, \[x0\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4h\t{(z[0-9]+)\.h - z[0-9]+\.h}.*\tstr\t\1, \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4h\t{z[0-9]+\.h - (z[0-9]+)\.h}.*\tstr\t\1, \[x1, #3, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+\.h), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1h\t\1, p[0-7], \[x2\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_f32.c -new file mode 100644 -index 000000000..40ff42128 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_f32.c -@@ -0,0 +1,70 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** callee1: -+** ... -+** ldr (z[0-9]+), \[x1, #3, mul vl\] -+** ... -+** st4w {z[0-9]+\.s - \1\.s}, p0, \[x0\] -+** st2w {z3\.s - z4\.s}, p1, \[x0\] -+** st3w {z5\.s - z7\.s}, p2, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee1 (void *x0, svfloat32x3_t z0, svfloat32x2_t z3, svfloat32x3_t z5, -+ svfloat32x4_t stack1, svfloat32_t stack2, svbool_t p0, -+ svbool_t p1, svbool_t p2) -+{ -+ svst4_f32 (p0, x0, stack1); -+ svst2_f32 (p1, x0, z3); -+ svst3_f32 (p2, x0, z5); -+} -+ -+/* -+** callee2: -+** ptrue p3\.b, all -+** ld1w (z[0-9]+\.s), p3/z, \[x2\] -+** st1w \1, p0, \[x0\] -+** st2w {z3\.s - z4\.s}, p1, \[x0\] -+** st3w {z0\.s - z2\.s}, p2, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee2 (void *x0, svfloat32x3_t z0, svfloat32x2_t z3, svfloat32x3_t z5, -+ svfloat32x4_t stack1, svfloat32_t stack2, svbool_t p0, -+ svbool_t p1, svbool_t p2) -+{ -+ svst1_f32 (p0, x0, stack2); -+ svst2_f32 (p1, x0, z3); -+ svst3_f32 (p2, x0, z0); -+} -+ -+void __attribute__((noipa)) -+caller (void *x0) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ callee1 (x0, -+ svld3_vnum_f32 (pg, x0, -9), -+ svld2_vnum_f32 (pg, x0, -2), -+ svld3_vnum_f32 (pg, x0, 0), -+ svld4_vnum_f32 (pg, x0, 8), -+ svld1_vnum_f32 (pg, x0, 5), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3)); -+} -+ -+/* { dg-final { scan-assembler {\tld3w\t{z0\.s - z2\.s}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2w\t{z3\.s - z4\.s}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld3w\t{z5\.s - z7\.s}, p[0-7]/z, \[x0\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4w\t{(z[0-9]+)\.s - z[0-9]+\.s}.*\tstr\t\1, \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4w\t{z[0-9]+\.s - (z[0-9]+)\.s}.*\tstr\t\1, \[x1, #3, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+\.s), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1w\t\1, p[0-7], \[x2\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_f64.c -new file mode 100644 -index 000000000..ee219ccdc ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_f64.c -@@ -0,0 +1,70 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** callee1: -+** ... -+** ldr (z[0-9]+), \[x1, #3, mul vl\] -+** ... -+** st4d {z[0-9]+\.d - \1\.d}, p0, \[x0\] -+** st2d {z3\.d - z4\.d}, p1, \[x0\] -+** st3d {z5\.d - z7\.d}, p2, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee1 (void *x0, svfloat64x3_t z0, svfloat64x2_t z3, svfloat64x3_t z5, -+ svfloat64x4_t stack1, svfloat64_t stack2, svbool_t p0, -+ svbool_t p1, svbool_t p2) -+{ -+ svst4_f64 (p0, x0, stack1); -+ svst2_f64 (p1, x0, z3); -+ svst3_f64 (p2, x0, z5); -+} -+ -+/* -+** callee2: -+** ptrue p3\.b, all -+** ld1d (z[0-9]+\.d), p3/z, \[x2\] -+** st1d \1, p0, \[x0\] -+** st2d {z3\.d - z4\.d}, p1, \[x0\] -+** st3d {z0\.d - z2\.d}, p2, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee2 (void *x0, svfloat64x3_t z0, svfloat64x2_t z3, svfloat64x3_t z5, -+ svfloat64x4_t stack1, svfloat64_t stack2, svbool_t p0, -+ svbool_t p1, svbool_t p2) -+{ -+ svst1_f64 (p0, x0, stack2); -+ svst2_f64 (p1, x0, z3); -+ svst3_f64 (p2, x0, z0); -+} -+ -+void __attribute__((noipa)) -+caller (void *x0) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ callee1 (x0, -+ svld3_vnum_f64 (pg, x0, -9), -+ svld2_vnum_f64 (pg, x0, -2), -+ svld3_vnum_f64 (pg, x0, 0), -+ svld4_vnum_f64 (pg, x0, 8), -+ svld1_vnum_f64 (pg, x0, 5), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3)); -+} -+ -+/* { dg-final { scan-assembler {\tld3d\t{z0\.d - z2\.d}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2d\t{z3\.d - z4\.d}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld3d\t{z5\.d - z7\.d}, p[0-7]/z, \[x0\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4d\t{(z[0-9]+)\.d - z[0-9]+\.d}.*\tstr\t\1, \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4d\t{z[0-9]+\.d - (z[0-9]+)\.d}.*\tstr\t\1, \[x1, #3, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld1d\t(z[0-9]+\.d), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1d\t\1, p[0-7], \[x2\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_s16.c -new file mode 100644 -index 000000000..ade75cb34 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_s16.c -@@ -0,0 +1,70 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** callee1: -+** ... -+** ldr (z[0-9]+), \[x1, #3, mul vl\] -+** ... -+** st4h {z[0-9]+\.h - \1\.h}, p0, \[x0\] -+** st2h {z3\.h - z4\.h}, p1, \[x0\] -+** st3h {z5\.h - z7\.h}, p2, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee1 (void *x0, svint16x3_t z0, svint16x2_t z3, svint16x3_t z5, -+ svint16x4_t stack1, svint16_t stack2, svbool_t p0, -+ svbool_t p1, svbool_t p2) -+{ -+ svst4_s16 (p0, x0, stack1); -+ svst2_s16 (p1, x0, z3); -+ svst3_s16 (p2, x0, z5); -+} -+ -+/* -+** callee2: -+** ptrue p3\.b, all -+** ld1h (z[0-9]+\.h), p3/z, \[x2\] -+** st1h \1, p0, \[x0\] -+** st2h {z3\.h - z4\.h}, p1, \[x0\] -+** st3h {z0\.h - z2\.h}, p2, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee2 (void *x0, svint16x3_t z0, svint16x2_t z3, svint16x3_t z5, -+ svint16x4_t stack1, svint16_t stack2, svbool_t p0, -+ svbool_t p1, svbool_t p2) -+{ -+ svst1_s16 (p0, x0, stack2); -+ svst2_s16 (p1, x0, z3); -+ svst3_s16 (p2, x0, z0); -+} -+ -+void __attribute__((noipa)) -+caller (void *x0) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ callee1 (x0, -+ svld3_vnum_s16 (pg, x0, -9), -+ svld2_vnum_s16 (pg, x0, -2), -+ svld3_vnum_s16 (pg, x0, 0), -+ svld4_vnum_s16 (pg, x0, 8), -+ svld1_vnum_s16 (pg, x0, 5), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3)); -+} -+ -+/* { dg-final { scan-assembler {\tld3h\t{z0\.h - z2\.h}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2h\t{z3\.h - z4\.h}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld3h\t{z5\.h - z7\.h}, p[0-7]/z, \[x0\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4h\t{(z[0-9]+)\.h - z[0-9]+\.h}.*\tstr\t\1, \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4h\t{z[0-9]+\.h - (z[0-9]+)\.h}.*\tstr\t\1, \[x1, #3, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+\.h), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1h\t\1, p[0-7], \[x2\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_s32.c -new file mode 100644 -index 000000000..a6c06e235 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_s32.c -@@ -0,0 +1,70 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** callee1: -+** ... -+** ldr (z[0-9]+), \[x1, #3, mul vl\] -+** ... -+** st4w {z[0-9]+\.s - \1\.s}, p0, \[x0\] -+** st2w {z3\.s - z4\.s}, p1, \[x0\] -+** st3w {z5\.s - z7\.s}, p2, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee1 (void *x0, svint32x3_t z0, svint32x2_t z3, svint32x3_t z5, -+ svint32x4_t stack1, svint32_t stack2, svbool_t p0, -+ svbool_t p1, svbool_t p2) -+{ -+ svst4_s32 (p0, x0, stack1); -+ svst2_s32 (p1, x0, z3); -+ svst3_s32 (p2, x0, z5); -+} -+ -+/* -+** callee2: -+** ptrue p3\.b, all -+** ld1w (z[0-9]+\.s), p3/z, \[x2\] -+** st1w \1, p0, \[x0\] -+** st2w {z3\.s - z4\.s}, p1, \[x0\] -+** st3w {z0\.s - z2\.s}, p2, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee2 (void *x0, svint32x3_t z0, svint32x2_t z3, svint32x3_t z5, -+ svint32x4_t stack1, svint32_t stack2, svbool_t p0, -+ svbool_t p1, svbool_t p2) -+{ -+ svst1_s32 (p0, x0, stack2); -+ svst2_s32 (p1, x0, z3); -+ svst3_s32 (p2, x0, z0); -+} -+ -+void __attribute__((noipa)) -+caller (void *x0) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ callee1 (x0, -+ svld3_vnum_s32 (pg, x0, -9), -+ svld2_vnum_s32 (pg, x0, -2), -+ svld3_vnum_s32 (pg, x0, 0), -+ svld4_vnum_s32 (pg, x0, 8), -+ svld1_vnum_s32 (pg, x0, 5), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3)); -+} -+ -+/* { dg-final { scan-assembler {\tld3w\t{z0\.s - z2\.s}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2w\t{z3\.s - z4\.s}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld3w\t{z5\.s - z7\.s}, p[0-7]/z, \[x0\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4w\t{(z[0-9]+)\.s - z[0-9]+\.s}.*\tstr\t\1, \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4w\t{z[0-9]+\.s - (z[0-9]+)\.s}.*\tstr\t\1, \[x1, #3, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+\.s), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1w\t\1, p[0-7], \[x2\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_s64.c -new file mode 100644 -index 000000000..219c71d82 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_s64.c -@@ -0,0 +1,70 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** callee1: -+** ... -+** ldr (z[0-9]+), \[x1, #3, mul vl\] -+** ... -+** st4d {z[0-9]+\.d - \1\.d}, p0, \[x0\] -+** st2d {z3\.d - z4\.d}, p1, \[x0\] -+** st3d {z5\.d - z7\.d}, p2, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee1 (void *x0, svint64x3_t z0, svint64x2_t z3, svint64x3_t z5, -+ svint64x4_t stack1, svint64_t stack2, svbool_t p0, -+ svbool_t p1, svbool_t p2) -+{ -+ svst4_s64 (p0, x0, stack1); -+ svst2_s64 (p1, x0, z3); -+ svst3_s64 (p2, x0, z5); -+} -+ -+/* -+** callee2: -+** ptrue p3\.b, all -+** ld1d (z[0-9]+\.d), p3/z, \[x2\] -+** st1d \1, p0, \[x0\] -+** st2d {z3\.d - z4\.d}, p1, \[x0\] -+** st3d {z0\.d - z2\.d}, p2, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee2 (void *x0, svint64x3_t z0, svint64x2_t z3, svint64x3_t z5, -+ svint64x4_t stack1, svint64_t stack2, svbool_t p0, -+ svbool_t p1, svbool_t p2) -+{ -+ svst1_s64 (p0, x0, stack2); -+ svst2_s64 (p1, x0, z3); -+ svst3_s64 (p2, x0, z0); -+} -+ -+void __attribute__((noipa)) -+caller (void *x0) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ callee1 (x0, -+ svld3_vnum_s64 (pg, x0, -9), -+ svld2_vnum_s64 (pg, x0, -2), -+ svld3_vnum_s64 (pg, x0, 0), -+ svld4_vnum_s64 (pg, x0, 8), -+ svld1_vnum_s64 (pg, x0, 5), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3)); -+} -+ -+/* { dg-final { scan-assembler {\tld3d\t{z0\.d - z2\.d}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2d\t{z3\.d - z4\.d}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld3d\t{z5\.d - z7\.d}, p[0-7]/z, \[x0\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4d\t{(z[0-9]+)\.d - z[0-9]+\.d}.*\tstr\t\1, \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4d\t{z[0-9]+\.d - (z[0-9]+)\.d}.*\tstr\t\1, \[x1, #3, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld1d\t(z[0-9]+\.d), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1d\t\1, p[0-7], \[x2\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_s8.c -new file mode 100644 -index 000000000..c48d391ca ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_s8.c -@@ -0,0 +1,70 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** callee1: -+** ... -+** ldr (z[0-9]+), \[x1, #3, mul vl\] -+** ... -+** st4b {z[0-9]+\.b - \1\.b}, p0, \[x0\] -+** st2b {z3\.b - z4\.b}, p1, \[x0\] -+** st3b {z5\.b - z7\.b}, p2, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee1 (void *x0, svint8x3_t z0, svint8x2_t z3, svint8x3_t z5, -+ svint8x4_t stack1, svint8_t stack2, svbool_t p0, -+ svbool_t p1, svbool_t p2) -+{ -+ svst4_s8 (p0, x0, stack1); -+ svst2_s8 (p1, x0, z3); -+ svst3_s8 (p2, x0, z5); -+} -+ -+/* -+** callee2: -+** ptrue p3\.b, all -+** ld1b (z[0-9]+\.b), p3/z, \[x2\] -+** st1b \1, p0, \[x0\] -+** st2b {z3\.b - z4\.b}, p1, \[x0\] -+** st3b {z0\.b - z2\.b}, p2, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee2 (void *x0, svint8x3_t z0, svint8x2_t z3, svint8x3_t z5, -+ svint8x4_t stack1, svint8_t stack2, svbool_t p0, -+ svbool_t p1, svbool_t p2) -+{ -+ svst1_s8 (p0, x0, stack2); -+ svst2_s8 (p1, x0, z3); -+ svst3_s8 (p2, x0, z0); -+} -+ -+void __attribute__((noipa)) -+caller (void *x0) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ callee1 (x0, -+ svld3_vnum_s8 (pg, x0, -9), -+ svld2_vnum_s8 (pg, x0, -2), -+ svld3_vnum_s8 (pg, x0, 0), -+ svld4_vnum_s8 (pg, x0, 8), -+ svld1_vnum_s8 (pg, x0, 5), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3)); -+} -+ -+/* { dg-final { scan-assembler {\tld3b\t{z0\.b - z2\.b}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2b\t{z3\.b - z4\.b}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld3b\t{z5\.b - z7\.b}, p[0-7]/z, \[x0\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4b\t{(z[0-9]+)\.b - z[0-9]+\.b}.*\tstr\t\1, \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4b\t{z[0-9]+\.b - (z[0-9]+)\.b}.*\tstr\t\1, \[x1, #3, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+\.b), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1b\t\1, p[0-7], \[x2\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_u16.c -new file mode 100644 -index 000000000..6c635fd94 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_u16.c -@@ -0,0 +1,70 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** callee1: -+** ... -+** ldr (z[0-9]+), \[x1, #3, mul vl\] -+** ... -+** st4h {z[0-9]+\.h - \1\.h}, p0, \[x0\] -+** st2h {z3\.h - z4\.h}, p1, \[x0\] -+** st3h {z5\.h - z7\.h}, p2, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee1 (void *x0, svuint16x3_t z0, svuint16x2_t z3, svuint16x3_t z5, -+ svuint16x4_t stack1, svuint16_t stack2, svbool_t p0, -+ svbool_t p1, svbool_t p2) -+{ -+ svst4_u16 (p0, x0, stack1); -+ svst2_u16 (p1, x0, z3); -+ svst3_u16 (p2, x0, z5); -+} -+ -+/* -+** callee2: -+** ptrue p3\.b, all -+** ld1h (z[0-9]+\.h), p3/z, \[x2\] -+** st1h \1, p0, \[x0\] -+** st2h {z3\.h - z4\.h}, p1, \[x0\] -+** st3h {z0\.h - z2\.h}, p2, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee2 (void *x0, svuint16x3_t z0, svuint16x2_t z3, svuint16x3_t z5, -+ svuint16x4_t stack1, svuint16_t stack2, svbool_t p0, -+ svbool_t p1, svbool_t p2) -+{ -+ svst1_u16 (p0, x0, stack2); -+ svst2_u16 (p1, x0, z3); -+ svst3_u16 (p2, x0, z0); -+} -+ -+void __attribute__((noipa)) -+caller (void *x0) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ callee1 (x0, -+ svld3_vnum_u16 (pg, x0, -9), -+ svld2_vnum_u16 (pg, x0, -2), -+ svld3_vnum_u16 (pg, x0, 0), -+ svld4_vnum_u16 (pg, x0, 8), -+ svld1_vnum_u16 (pg, x0, 5), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3)); -+} -+ -+/* { dg-final { scan-assembler {\tld3h\t{z0\.h - z2\.h}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2h\t{z3\.h - z4\.h}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld3h\t{z5\.h - z7\.h}, p[0-7]/z, \[x0\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4h\t{(z[0-9]+)\.h - z[0-9]+\.h}.*\tstr\t\1, \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4h\t{z[0-9]+\.h - (z[0-9]+)\.h}.*\tstr\t\1, \[x1, #3, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld1h\t(z[0-9]+\.h), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1h\t\1, p[0-7], \[x2\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_u32.c -new file mode 100644 -index 000000000..c31d45426 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_u32.c -@@ -0,0 +1,70 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** callee1: -+** ... -+** ldr (z[0-9]+), \[x1, #3, mul vl\] -+** ... -+** st4w {z[0-9]+\.s - \1\.s}, p0, \[x0\] -+** st2w {z3\.s - z4\.s}, p1, \[x0\] -+** st3w {z5\.s - z7\.s}, p2, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee1 (void *x0, svuint32x3_t z0, svuint32x2_t z3, svuint32x3_t z5, -+ svuint32x4_t stack1, svuint32_t stack2, svbool_t p0, -+ svbool_t p1, svbool_t p2) -+{ -+ svst4_u32 (p0, x0, stack1); -+ svst2_u32 (p1, x0, z3); -+ svst3_u32 (p2, x0, z5); -+} -+ -+/* -+** callee2: -+** ptrue p3\.b, all -+** ld1w (z[0-9]+\.s), p3/z, \[x2\] -+** st1w \1, p0, \[x0\] -+** st2w {z3\.s - z4\.s}, p1, \[x0\] -+** st3w {z0\.s - z2\.s}, p2, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee2 (void *x0, svuint32x3_t z0, svuint32x2_t z3, svuint32x3_t z5, -+ svuint32x4_t stack1, svuint32_t stack2, svbool_t p0, -+ svbool_t p1, svbool_t p2) -+{ -+ svst1_u32 (p0, x0, stack2); -+ svst2_u32 (p1, x0, z3); -+ svst3_u32 (p2, x0, z0); -+} -+ -+void __attribute__((noipa)) -+caller (void *x0) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ callee1 (x0, -+ svld3_vnum_u32 (pg, x0, -9), -+ svld2_vnum_u32 (pg, x0, -2), -+ svld3_vnum_u32 (pg, x0, 0), -+ svld4_vnum_u32 (pg, x0, 8), -+ svld1_vnum_u32 (pg, x0, 5), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3)); -+} -+ -+/* { dg-final { scan-assembler {\tld3w\t{z0\.s - z2\.s}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2w\t{z3\.s - z4\.s}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld3w\t{z5\.s - z7\.s}, p[0-7]/z, \[x0\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4w\t{(z[0-9]+)\.s - z[0-9]+\.s}.*\tstr\t\1, \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4w\t{z[0-9]+\.s - (z[0-9]+)\.s}.*\tstr\t\1, \[x1, #3, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+\.s), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1w\t\1, p[0-7], \[x2\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_u64.c -new file mode 100644 -index 000000000..969b258b7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_u64.c -@@ -0,0 +1,70 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** callee1: -+** ... -+** ldr (z[0-9]+), \[x1, #3, mul vl\] -+** ... -+** st4d {z[0-9]+\.d - \1\.d}, p0, \[x0\] -+** st2d {z3\.d - z4\.d}, p1, \[x0\] -+** st3d {z5\.d - z7\.d}, p2, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee1 (void *x0, svuint64x3_t z0, svuint64x2_t z3, svuint64x3_t z5, -+ svuint64x4_t stack1, svuint64_t stack2, svbool_t p0, -+ svbool_t p1, svbool_t p2) -+{ -+ svst4_u64 (p0, x0, stack1); -+ svst2_u64 (p1, x0, z3); -+ svst3_u64 (p2, x0, z5); -+} -+ -+/* -+** callee2: -+** ptrue p3\.b, all -+** ld1d (z[0-9]+\.d), p3/z, \[x2\] -+** st1d \1, p0, \[x0\] -+** st2d {z3\.d - z4\.d}, p1, \[x0\] -+** st3d {z0\.d - z2\.d}, p2, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee2 (void *x0, svuint64x3_t z0, svuint64x2_t z3, svuint64x3_t z5, -+ svuint64x4_t stack1, svuint64_t stack2, svbool_t p0, -+ svbool_t p1, svbool_t p2) -+{ -+ svst1_u64 (p0, x0, stack2); -+ svst2_u64 (p1, x0, z3); -+ svst3_u64 (p2, x0, z0); -+} -+ -+void __attribute__((noipa)) -+caller (void *x0) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ callee1 (x0, -+ svld3_vnum_u64 (pg, x0, -9), -+ svld2_vnum_u64 (pg, x0, -2), -+ svld3_vnum_u64 (pg, x0, 0), -+ svld4_vnum_u64 (pg, x0, 8), -+ svld1_vnum_u64 (pg, x0, 5), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3)); -+} -+ -+/* { dg-final { scan-assembler {\tld3d\t{z0\.d - z2\.d}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2d\t{z3\.d - z4\.d}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld3d\t{z5\.d - z7\.d}, p[0-7]/z, \[x0\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4d\t{(z[0-9]+)\.d - z[0-9]+\.d}.*\tstr\t\1, \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4d\t{z[0-9]+\.d - (z[0-9]+)\.d}.*\tstr\t\1, \[x1, #3, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld1d\t(z[0-9]+\.d), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1d\t\1, p[0-7], \[x2\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_u8.c -new file mode 100644 -index 000000000..d18604784 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_6_le_u8.c -@@ -0,0 +1,70 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** callee1: -+** ... -+** ldr (z[0-9]+), \[x1, #3, mul vl\] -+** ... -+** st4b {z[0-9]+\.b - \1\.b}, p0, \[x0\] -+** st2b {z3\.b - z4\.b}, p1, \[x0\] -+** st3b {z5\.b - z7\.b}, p2, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee1 (void *x0, svuint8x3_t z0, svuint8x2_t z3, svuint8x3_t z5, -+ svuint8x4_t stack1, svuint8_t stack2, svbool_t p0, -+ svbool_t p1, svbool_t p2) -+{ -+ svst4_u8 (p0, x0, stack1); -+ svst2_u8 (p1, x0, z3); -+ svst3_u8 (p2, x0, z5); -+} -+ -+/* -+** callee2: -+** ptrue p3\.b, all -+** ld1b (z[0-9]+\.b), p3/z, \[x2\] -+** st1b \1, p0, \[x0\] -+** st2b {z3\.b - z4\.b}, p1, \[x0\] -+** st3b {z0\.b - z2\.b}, p2, \[x0\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee2 (void *x0, svuint8x3_t z0, svuint8x2_t z3, svuint8x3_t z5, -+ svuint8x4_t stack1, svuint8_t stack2, svbool_t p0, -+ svbool_t p1, svbool_t p2) -+{ -+ svst1_u8 (p0, x0, stack2); -+ svst2_u8 (p1, x0, z3); -+ svst3_u8 (p2, x0, z0); -+} -+ -+void __attribute__((noipa)) -+caller (void *x0) -+{ -+ svbool_t pg; -+ pg = svptrue_b8 (); -+ callee1 (x0, -+ svld3_vnum_u8 (pg, x0, -9), -+ svld2_vnum_u8 (pg, x0, -2), -+ svld3_vnum_u8 (pg, x0, 0), -+ svld4_vnum_u8 (pg, x0, 8), -+ svld1_vnum_u8 (pg, x0, 5), -+ svptrue_pat_b8 (SV_VL1), -+ svptrue_pat_b16 (SV_VL2), -+ svptrue_pat_b32 (SV_VL3)); -+} -+ -+/* { dg-final { scan-assembler {\tld3b\t{z0\.b - z2\.b}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2b\t{z3\.b - z4\.b}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld3b\t{z5\.b - z7\.b}, p[0-7]/z, \[x0\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4b\t{(z[0-9]+)\.b - z[0-9]+\.b}.*\tstr\t\1, \[x1\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4b\t{z[0-9]+\.b - (z[0-9]+)\.b}.*\tstr\t\1, \[x1, #3, mul vl\]\n} } } */ -+/* { dg-final { scan-assembler {\tld1b\t(z[0-9]+\.b), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1b\t\1, p[0-7], \[x2\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_7.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_7.c -new file mode 100644 -index 000000000..15c022486 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_7.c -@@ -0,0 +1,30 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#include -+ -+/* -+** callee: -+** ... -+** ldr (x[0-9]+), \[sp\] -+** ... -+** ld1b (z[0-9]+\.b), p[1-3]/z, \[\1\] -+** st1b \2, p0, \[x0, x7\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee (int8_t *x0, int x1, int x2, int x3, -+ int x4, int x5, svbool_t p0, int x6, int64_t x7, -+ svint32x4_t z0, svint32x4_t z4, svint8_t stack) -+{ -+ svst1 (p0, x0 + x7, stack); -+} -+ -+void __attribute__((noipa)) -+caller (int8_t *x0, svbool_t p0, svint32x4_t z0, svint32x4_t z4) -+{ -+ callee (x0, 1, 2, 3, 4, 5, p0, 6, 7, z0, z4, svdup_s8 (42)); -+} -+ -+/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.b), #42\n.*\tst1b\t\1, p[0-7], \[(x[0-9]+)\]\n.*\tstr\t\2, \[sp\]\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_8.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_8.c -new file mode 100644 -index 000000000..93ace26f5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_8.c -@@ -0,0 +1,28 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#include -+ -+/* -+** callee: -+** ptrue (p[1-3])\.b, all -+** ld1b (z[0-9]+\.b), \1/z, \[x4\] -+** st1b \2, p0, \[x0, x7\] -+** ret -+*/ -+void __attribute__((noipa)) -+callee (int8_t *x0, int x1, int x2, int x3, -+ svint32x4_t z0, svint32x4_t z4, svint8_t stack, -+ int x5, svbool_t p0, int x6, int64_t x7) -+{ -+ svst1 (p0, x0 + x7, stack); -+} -+ -+void __attribute__((noipa)) -+caller (int8_t *x0, svbool_t p0, svint32x4_t z0, svint32x4_t z4) -+{ -+ callee (x0, 1, 2, 3, z0, z4, svdup_s8 (42), 5, p0, 6, 7); -+} -+ -+/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.b), #42\n.*\tst1b\t\1, p[0-7], \[x4\]\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_9.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_9.c -new file mode 100644 -index 000000000..ad9affadf ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/args_9.c -@@ -0,0 +1,49 @@ -+/* { dg-do compile { target lp64 } } */ -+/* { dg-options "-O -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#include -+ -+/* -+** callee: -+** ldr (x[0-9]+), \[sp, 8\] -+** ldr p0, \[\1\] -+** ret -+*/ -+svbool_t __attribute__((noipa)) -+callee (svint64x4_t z0, svint16x4_t z4, -+ svint64_t stack1, svint32_t stack2, -+ svint16_t stack3, svint8_t stack4, -+ svuint64_t stack5, svuint32_t stack6, -+ svuint16_t stack7, svuint8_t stack8, -+ svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3, -+ svbool_t stack9, svbool_t stack10) -+{ -+ return stack10; -+} -+ -+uint64_t __attribute__((noipa)) -+caller (int64_t *x0, int16_t *x1, svbool_t p0) -+{ -+ svbool_t res; -+ res = callee (svld4 (p0, x0), -+ svld4 (p0, x1), -+ svdup_s64 (1), -+ svdup_s32 (2), -+ svdup_s16 (3), -+ svdup_s8 (4), -+ svdup_u64 (5), -+ svdup_u32 (6), -+ svdup_u16 (7), -+ svdup_u8 (8), -+ svptrue_pat_b8 (SV_VL5), -+ svptrue_pat_b16 (SV_VL6), -+ svptrue_pat_b32 (SV_VL7), -+ svptrue_pat_b64 (SV_VL8), -+ svptrue_pat_b8 (SV_MUL3), -+ svptrue_pat_b16 (SV_MUL3)); -+ return svcntp_b8 (res, res); -+} -+ -+/* { dg-final { scan-assembler {\tptrue\t(p[0-9]+)\.b, mul3\n\tstr\t\1, \[(x[0-9]+)\]\n.*\tstr\t\2, \[sp\]\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\t(p[0-9]+)\.h, mul3\n\tstr\t\1, \[(x[0-9]+)\]\n.*\tstr\t\2, \[sp, 8\]\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/gnu_vectors_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/gnu_vectors_1.c -new file mode 100644 -index 000000000..e5fceb14b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/gnu_vectors_1.c -@@ -0,0 +1,107 @@ -+/* { dg-options "-O -msve-vector-bits=256 -fomit-frame-pointer" } */ -+ -+#include -+ -+typedef bfloat16_t bfloat16x16_t __attribute__((vector_size (32))); -+typedef float16_t float16x16_t __attribute__((vector_size (32))); -+typedef float32_t float32x8_t __attribute__((vector_size (32))); -+typedef float64_t float64x4_t __attribute__((vector_size (32))); -+typedef int8_t int8x32_t __attribute__((vector_size (32))); -+typedef int16_t int16x16_t __attribute__((vector_size (32))); -+typedef int32_t int32x8_t __attribute__((vector_size (32))); -+typedef int64_t int64x4_t __attribute__((vector_size (32))); -+typedef uint8_t uint8x32_t __attribute__((vector_size (32))); -+typedef uint16_t uint16x16_t __attribute__((vector_size (32))); -+typedef uint32_t uint32x8_t __attribute__((vector_size (32))); -+typedef uint64_t uint64x4_t __attribute__((vector_size (32))); -+ -+void bfloat16_callee (bfloat16x16_t); -+void float16_callee (float16x16_t); -+void float32_callee (float32x8_t); -+void float64_callee (float64x4_t); -+void int8_callee (int8x32_t); -+void int16_callee (int16x16_t); -+void int32_callee (int32x8_t); -+void int64_callee (int64x4_t); -+void uint8_callee (uint8x32_t); -+void uint16_callee (uint16x16_t); -+void uint32_callee (uint32x8_t); -+void uint64_callee (uint64x4_t); -+ -+void -+bfloat16_caller (bfloat16_t val) -+{ -+ bfloat16_callee (svdup_bf16 (val)); -+} -+ -+void -+float16_caller (void) -+{ -+ float16_callee (svdup_f16 (1.0)); -+} -+ -+void -+float32_caller (void) -+{ -+ float32_callee (svdup_f32 (2.0)); -+} -+ -+void -+float64_caller (void) -+{ -+ float64_callee (svdup_f64 (3.0)); -+} -+ -+void -+int8_caller (void) -+{ -+ int8_callee (svindex_s8 (0, 1)); -+} -+ -+void -+int16_caller (void) -+{ -+ int16_callee (svindex_s16 (0, 2)); -+} -+ -+void -+int32_caller (void) -+{ -+ int32_callee (svindex_s32 (0, 3)); -+} -+ -+void -+int64_caller (void) -+{ -+ int64_callee (svindex_s64 (0, 4)); -+} -+ -+void -+uint8_caller (void) -+{ -+ uint8_callee (svindex_u8 (1, 1)); -+} -+ -+void -+uint16_caller (void) -+{ -+ uint16_callee (svindex_u16 (1, 2)); -+} -+ -+void -+uint32_caller (void) -+{ -+ uint32_callee (svindex_u32 (1, 3)); -+} -+ -+void -+uint64_caller (void) -+{ -+ uint64_callee (svindex_u64 (1, 4)); -+} -+ -+/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b, p[0-7], \[x0\]} 2 } } */ -+/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h, p[0-7], \[x0\]} 4 } } */ -+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x0\]} 3 } } */ -+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x0\]} 3 } } */ -+/* { dg-final { scan-assembler-times {\tadd\tx0, sp, #?16\n} 12 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/gnu_vectors_2.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/gnu_vectors_2.c -new file mode 100644 -index 000000000..875567f01 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/gnu_vectors_2.c -@@ -0,0 +1,107 @@ -+/* { dg-options "-O -msve-vector-bits=256 -fomit-frame-pointer" } */ -+ -+#include -+ -+typedef bfloat16_t bfloat16x16_t __attribute__((vector_size (32))); -+typedef float16_t float16x16_t __attribute__((vector_size (32))); -+typedef float32_t float32x8_t __attribute__((vector_size (32))); -+typedef float64_t float64x4_t __attribute__((vector_size (32))); -+typedef int8_t int8x32_t __attribute__((vector_size (32))); -+typedef int16_t int16x16_t __attribute__((vector_size (32))); -+typedef int32_t int32x8_t __attribute__((vector_size (32))); -+typedef int64_t int64x4_t __attribute__((vector_size (32))); -+typedef uint8_t uint8x32_t __attribute__((vector_size (32))); -+typedef uint16_t uint16x16_t __attribute__((vector_size (32))); -+typedef uint32_t uint32x8_t __attribute__((vector_size (32))); -+typedef uint64_t uint64x4_t __attribute__((vector_size (32))); -+ -+void bfloat16_callee (svbfloat16_t); -+void float16_callee (svfloat16_t); -+void float32_callee (svfloat32_t); -+void float64_callee (svfloat64_t); -+void int8_callee (svint8_t); -+void int16_callee (svint16_t); -+void int32_callee (svint32_t); -+void int64_callee (svint64_t); -+void uint8_callee (svuint8_t); -+void uint16_callee (svuint16_t); -+void uint32_callee (svuint32_t); -+void uint64_callee (svuint64_t); -+ -+void -+bfloat16_caller (bfloat16x16_t arg) -+{ -+ bfloat16_callee (arg); -+} -+ -+void -+float16_caller (float16x16_t arg) -+{ -+ float16_callee (arg); -+} -+ -+void -+float32_caller (float32x8_t arg) -+{ -+ float32_callee (arg); -+} -+ -+void -+float64_caller (float64x4_t arg) -+{ -+ float64_callee (arg); -+} -+ -+void -+int8_caller (int8x32_t arg) -+{ -+ int8_callee (arg); -+} -+ -+void -+int16_caller (int16x16_t arg) -+{ -+ int16_callee (arg); -+} -+ -+void -+int32_caller (int32x8_t arg) -+{ -+ int32_callee (arg); -+} -+ -+void -+int64_caller (int64x4_t arg) -+{ -+ int64_callee (arg); -+} -+ -+void -+uint8_caller (uint8x32_t arg) -+{ -+ uint8_callee (arg); -+} -+ -+void -+uint16_caller (uint16x16_t arg) -+{ -+ uint16_callee (arg); -+} -+ -+void -+uint32_caller (uint32x8_t arg) -+{ -+ uint32_callee (arg); -+} -+ -+void -+uint64_caller (uint64x4_t arg) -+{ -+ uint64_callee (arg); -+} -+ -+/* { dg-final { scan-assembler-times {\tld1b\tz0\.b, p[0-7]/z, \[x0\]} 2 } } */ -+/* { dg-final { scan-assembler-times {\tld1h\tz0\.h, p[0-7]/z, \[x0\]} 4 } } */ -+/* { dg-final { scan-assembler-times {\tld1w\tz0\.s, p[0-7]/z, \[x0\]} 3 } } */ -+/* { dg-final { scan-assembler-times {\tld1d\tz0\.d, p[0-7]/z, \[x0\]} 3 } } */ -+/* { dg-final { scan-assembler-not {\tst1[bhwd]\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/nosve_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/nosve_1.c -new file mode 100644 -index 000000000..26802c87f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/nosve_1.c -@@ -0,0 +1,14 @@ -+/* { dg-do compile } */ -+/* { dg-prune-output "compilation terminated" } */ -+ -+#include -+ -+#pragma GCC target "+nosve" -+ -+svbool_t return_bool (); -+ -+void -+f (void) -+{ -+ return_bool (); /* { dg-error {'return_bool' requires the SVE ISA extension} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/nosve_2.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/nosve_2.c -new file mode 100644 -index 000000000..663165f89 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/nosve_2.c -@@ -0,0 +1,14 @@ -+/* { dg-do compile } */ -+/* { dg-prune-output "compilation terminated" } */ -+ -+#include -+ -+#pragma GCC target "+nosve" -+ -+svbool_t return_bool (); -+ -+void -+f (svbool_t *ptr) -+{ -+ *ptr = return_bool (); /* { dg-error {'return_bool' requires the SVE ISA extension} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/nosve_3.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/nosve_3.c -new file mode 100644 -index 000000000..6d5823cfd ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/nosve_3.c -@@ -0,0 +1,14 @@ -+/* { dg-do compile } */ -+/* { dg-prune-output "compilation terminated" } */ -+ -+#include -+ -+#pragma GCC target "+nosve" -+ -+svbool_t (*return_bool) (); -+ -+void -+f (svbool_t *ptr) -+{ -+ *ptr = return_bool (); /* { dg-error {calls to functions of type 'svbool_t\(\)' require the SVE ISA extension} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/nosve_4.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/nosve_4.c -new file mode 100644 -index 000000000..81e31cf4f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/nosve_4.c -@@ -0,0 +1,14 @@ -+/* { dg-do compile } */ -+/* { dg-prune-output "compilation terminated" } */ -+ -+#include -+ -+#pragma GCC target "+nosve" -+ -+void take_svuint8 (svuint8_t); -+ -+void -+f (svuint8_t *ptr) -+{ -+ take_svuint8 (*ptr); /* { dg-error {'take_svuint8' requires the SVE ISA extension} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/nosve_5.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/nosve_5.c -new file mode 100644 -index 000000000..300ed00a0 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/nosve_5.c -@@ -0,0 +1,15 @@ -+/* { dg-do compile } */ -+/* { dg-prune-output "compilation terminated" } */ -+ -+#include -+ -+#pragma GCC target "+nosve" -+ -+void take_svuint8_eventually (float, float, float, float, -+ float, float, float, float, svuint8_t); -+ -+void -+f (svuint8_t *ptr) -+{ -+ take_svuint8_eventually (0, 0, 0, 0, 0, 0, 0, 0, *ptr); /* { dg-error {arguments of type '(svuint8_t|__SVUint8_t)' require the SVE ISA extension} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/nosve_6.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/nosve_6.c -new file mode 100644 -index 000000000..4bddf76f8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/nosve_6.c -@@ -0,0 +1,14 @@ -+/* { dg-do compile } */ -+/* { dg-prune-output "compilation terminated" } */ -+ -+#include -+ -+#pragma GCC target "+nosve" -+ -+void unprototyped (); -+ -+void -+f (svuint8_t *ptr) -+{ -+ unprototyped (*ptr); /* { dg-error {arguments of type '(svuint8_t|__SVUint8_t)' require the SVE ISA extension} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/nosve_7.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/nosve_7.c -new file mode 100644 -index 000000000..ef742711d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/nosve_7.c -@@ -0,0 +1,8 @@ -+/* { dg-do compile } */ -+/* { dg-prune-output "compilation terminated" } */ -+ -+#include -+ -+#pragma GCC target "+nosve" -+ -+void f (svuint8_t x) {} /* { dg-error {'f' requires the SVE ISA extension} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/nosve_8.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/nosve_8.c -new file mode 100644 -index 000000000..45b549f12 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/nosve_8.c -@@ -0,0 +1,11 @@ -+/* { dg-do compile } */ -+/* { dg-prune-output "compilation terminated" } */ -+ -+#include -+ -+#pragma GCC target "+nosve" -+ -+void -+f (float a, float b, float c, float d, float e, float f, float g, float h, svuint8_t x) /* { dg-error {arguments of type '(svuint8_t|__SVUint8_t)' require the SVE ISA extension} } */ -+{ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_1.c -new file mode 100644 -index 000000000..f6328c901 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_1.c -@@ -0,0 +1,32 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -g" } */ -+/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ -+ -+/* -+** callee_pred: -+** ldr p0, \[x0\] -+** ret -+*/ -+__SVBool_t __attribute__((noipa)) -+callee_pred (__SVBool_t *ptr) -+{ -+ return *ptr; -+} -+ -+#include -+ -+/* -+** caller_pred: -+** ... -+** bl callee_pred -+** cntp x0, p0, p0.b -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+uint64_t __attribute__((noipa)) -+caller_pred (__SVBool_t *ptr1) -+{ -+ __SVBool_t p; -+ p = callee_pred (ptr1); -+ return svcntp_b8 (p, p); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_1_1024.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_1_1024.c -new file mode 100644 -index 000000000..450a3f029 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_1_1024.c -@@ -0,0 +1,31 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -msve-vector-bits=1024 -g" } */ -+/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ -+ -+/* -+** callee_pred: -+** ldr p0, \[x0\] -+** ret -+*/ -+__SVBool_t __attribute__((noipa)) -+callee_pred (__SVBool_t *ptr) -+{ -+ return *ptr; -+} -+ -+#include -+ -+/* -+** caller_pred: -+** ... -+** bl callee_pred -+** cntp x0, p0, p0.b -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+uint64_t __attribute__((noipa)) -+caller_pred (__SVBool_t *ptr1) -+{ -+ __SVBool_t p = callee_pred (ptr1); -+ return svcntp_b8 (p, p); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_1_2048.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_1_2048.c -new file mode 100644 -index 000000000..c9ea26899 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_1_2048.c -@@ -0,0 +1,31 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -msve-vector-bits=2048 -g" } */ -+/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ -+ -+/* -+** callee_pred: -+** ldr p0, \[x0\] -+** ret -+*/ -+__SVBool_t __attribute__((noipa)) -+callee_pred (__SVBool_t *ptr) -+{ -+ return *ptr; -+} -+ -+#include -+ -+/* -+** caller_pred: -+** ... -+** bl callee_pred -+** cntp x0, p0, p0.b -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+uint64_t __attribute__((noipa)) -+caller_pred (__SVBool_t *ptr1) -+{ -+ __SVBool_t p = callee_pred (ptr1); -+ return svcntp_b8 (p, p); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_1_256.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_1_256.c -new file mode 100644 -index 000000000..62bc695d1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_1_256.c -@@ -0,0 +1,31 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -msve-vector-bits=256 -g" } */ -+/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ -+ -+/* -+** callee_pred: -+** ldr p0, \[x0\] -+** ret -+*/ -+__SVBool_t __attribute__((noipa)) -+callee_pred (__SVBool_t *ptr) -+{ -+ return *ptr; -+} -+ -+#include -+ -+/* -+** caller_pred: -+** ... -+** bl callee_pred -+** cntp x0, p0, p0.b -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+uint64_t __attribute__((noipa)) -+caller_pred (__SVBool_t *ptr1) -+{ -+ __SVBool_t p = callee_pred (ptr1); -+ return svcntp_b8 (p, p); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_1_512.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_1_512.c -new file mode 100644 -index 000000000..f687689ce ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_1_512.c -@@ -0,0 +1,31 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -msve-vector-bits=512 -g" } */ -+/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ -+ -+/* -+** callee_pred: -+** ldr p0, \[x0\] -+** ret -+*/ -+__SVBool_t __attribute__((noipa)) -+callee_pred (__SVBool_t *ptr) -+{ -+ return *ptr; -+} -+ -+#include -+ -+/* -+** caller_pred: -+** ... -+** bl callee_pred -+** cntp x0, p0, p0.b -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+uint64_t __attribute__((noipa)) -+caller_pred (__SVBool_t *ptr1) -+{ -+ __SVBool_t p = callee_pred (ptr1); -+ return svcntp_b8 (p, p); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_2.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_2.c -new file mode 100644 -index 000000000..efaa81394 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_2.c -@@ -0,0 +1,32 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -g" } */ -+/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ -+ -+#include -+ -+/* -+** callee_pred: -+** ldr p0, \[x0\] -+** ret -+*/ -+svbool_t __attribute__((noipa)) -+callee_pred (svbool_t *ptr) -+{ -+ return *ptr; -+} -+ -+/* -+** caller_pred: -+** ... -+** bl callee_pred -+** cntp x0, p0, p0.b -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+uint64_t __attribute__((noipa)) -+caller_pred (svbool_t *ptr1) -+{ -+ svbool_t p; -+ p = callee_pred (ptr1); -+ return svcntp_b8 (p, p); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_3.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_3.c -new file mode 100644 -index 000000000..71046447d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_3.c -@@ -0,0 +1,34 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -g" } */ -+/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ -+ -+#include -+ -+typedef svbool_t my_pred; -+ -+/* -+** callee_pred: -+** ldr p0, \[x0\] -+** ret -+*/ -+my_pred __attribute__((noipa)) -+callee_pred (my_pred *ptr) -+{ -+ return *ptr; -+} -+ -+/* -+** caller_pred: -+** ... -+** bl callee_pred -+** cntp x0, p0, p0.b -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+uint64_t __attribute__((noipa)) -+caller_pred (my_pred *ptr1) -+{ -+ my_pred p; -+ p = callee_pred (ptr1); -+ return svcntp_b8 (p, p); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_4.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_4.c -new file mode 100644 -index 000000000..00eb2cbda ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_4.c -@@ -0,0 +1,264 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -g" } */ -+/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ -+ -+#define CALLEE(SUFFIX, TYPE) \ -+ TYPE __attribute__((noipa)) \ -+ callee_##SUFFIX (TYPE *ptr) \ -+ { \ -+ return *ptr; \ -+ } -+ -+/* -+** callee_s8: -+** ptrue (p[0-7])\.b, all -+** ld1b z0\.b, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (s8, __SVInt8_t) -+ -+/* -+** callee_u8: -+** ptrue (p[0-7])\.b, all -+** ld1b z0\.b, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (u8, __SVUint8_t) -+ -+/* -+** callee_s16: -+** ptrue (p[0-7])\.b, all -+** ld1h z0\.h, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (s16, __SVInt16_t) -+ -+/* -+** callee_u16: -+** ptrue (p[0-7])\.b, all -+** ld1h z0\.h, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (u16, __SVUint16_t) -+ -+/* -+** callee_f16: -+** ptrue (p[0-7])\.b, all -+** ld1h z0\.h, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (f16, __SVFloat16_t) -+ -+/* -+** callee_bf16: -+** ptrue (p[0-7])\.b, all -+** ld1h z0\.h, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (bf16, __SVBfloat16_t) -+ -+/* -+** callee_s32: -+** ptrue (p[0-7])\.b, all -+** ld1w z0\.s, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (s32, __SVInt32_t) -+ -+/* -+** callee_u32: -+** ptrue (p[0-7])\.b, all -+** ld1w z0\.s, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (u32, __SVUint32_t) -+ -+/* -+** callee_f32: -+** ptrue (p[0-7])\.b, all -+** ld1w z0\.s, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (f32, __SVFloat32_t) -+ -+/* -+** callee_s64: -+** ptrue (p[0-7])\.b, all -+** ld1d z0\.d, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (s64, __SVInt64_t) -+ -+/* -+** callee_u64: -+** ptrue (p[0-7])\.b, all -+** ld1d z0\.d, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (u64, __SVUint64_t) -+ -+/* -+** callee_f64: -+** ptrue (p[0-7])\.b, all -+** ld1d z0\.d, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (f64, __SVFloat64_t) -+ -+#include -+ -+#define CALLER(SUFFIX, TYPE) \ -+ typeof (svaddv (svptrue_b8 (), *(TYPE *) 0)) \ -+ __attribute__((noipa)) \ -+ caller_##SUFFIX (TYPE *ptr1) \ -+ { \ -+ return svaddv (svptrue_b8 (), callee_##SUFFIX (ptr1)); \ -+ } -+ -+#define CALLER_BF16(SUFFIX, TYPE) \ -+ typeof (svlasta (svptrue_b8 (), *(TYPE *) 0)) \ -+ __attribute__((noipa)) \ -+ caller_##SUFFIX (TYPE *ptr1) \ -+ { \ -+ return svlasta (svptrue_b8 (), callee_##SUFFIX (ptr1)); \ -+ } -+ -+/* -+** caller_s8: -+** ... -+** bl callee_s8 -+** ptrue (p[0-7])\.b, all -+** saddv (d[0-9]+), \1, z0\.b -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (s8, __SVInt8_t) -+ -+/* -+** caller_u8: -+** ... -+** bl callee_u8 -+** ptrue (p[0-7])\.b, all -+** uaddv (d[0-9]+), \1, z0\.b -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (u8, __SVUint8_t) -+ -+/* -+** caller_s16: -+** ... -+** bl callee_s16 -+** ptrue (p[0-7])\.b, all -+** saddv (d[0-9]+), \1, z0\.h -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (s16, __SVInt16_t) -+ -+/* -+** caller_u16: -+** ... -+** bl callee_u16 -+** ptrue (p[0-7])\.b, all -+** uaddv (d[0-9]+), \1, z0\.h -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (u16, __SVUint16_t) -+ -+/* -+** caller_f16: -+** ... -+** bl callee_f16 -+** ptrue (p[0-7])\.b, all -+** faddv h0, \1, z0\.h -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (f16, __SVFloat16_t) -+ -+/* -+** caller_bf16: -+** ... -+** bl callee_bf16 -+** ptrue (p[0-7])\.b, all -+** lasta h0, \1, z0\.h -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER_BF16 (bf16, __SVBfloat16_t) -+ -+/* -+** caller_s32: -+** ... -+** bl callee_s32 -+** ptrue (p[0-7])\.b, all -+** saddv (d[0-9]+), \1, z0\.s -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (s32, __SVInt32_t) -+ -+/* -+** caller_u32: -+** ... -+** bl callee_u32 -+** ptrue (p[0-7])\.b, all -+** uaddv (d[0-9]+), \1, z0\.s -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (u32, __SVUint32_t) -+ -+/* -+** caller_f32: -+** ... -+** bl callee_f32 -+** ptrue (p[0-7])\.b, all -+** faddv s0, \1, z0\.s -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (f32, __SVFloat32_t) -+ -+/* -+** caller_s64: -+** ... -+** bl callee_s64 -+** ptrue (p[0-7])\.b, all -+** uaddv (d[0-9]+), \1, z0\.d -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (s64, __SVInt64_t) -+ -+/* -+** caller_u64: -+** ... -+** bl callee_u64 -+** ptrue (p[0-7])\.b, all -+** uaddv (d[0-9]+), \1, z0\.d -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (u64, __SVUint64_t) -+ -+/* -+** caller_f64: -+** ... -+** bl callee_f64 -+** ptrue (p[0-7])\.b, all -+** faddv d0, \1, z0\.d -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (f64, __SVFloat64_t) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_4_1024.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_4_1024.c -new file mode 100644 -index 000000000..43519634c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_4_1024.c -@@ -0,0 +1,264 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -msve-vector-bits=1024 -g" } */ -+/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ -+ -+#define CALLEE(SUFFIX, TYPE) \ -+ TYPE __attribute__((noipa)) \ -+ callee_##SUFFIX (TYPE *ptr) \ -+ { \ -+ return *ptr; \ -+ } -+ -+/* -+** callee_s8: -+** ptrue (p[0-7])\.b, vl128 -+** ld1b z0\.b, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (s8, __SVInt8_t) -+ -+/* -+** callee_u8: -+** ptrue (p[0-7])\.b, vl128 -+** ld1b z0\.b, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (u8, __SVUint8_t) -+ -+/* -+** callee_s16: -+** ptrue (p[0-7])\.b, vl128 -+** ld1h z0\.h, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (s16, __SVInt16_t) -+ -+/* -+** callee_u16: -+** ptrue (p[0-7])\.b, vl128 -+** ld1h z0\.h, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (u16, __SVUint16_t) -+ -+/* -+** callee_f16: -+** ptrue (p[0-7])\.b, vl128 -+** ld1h z0\.h, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (f16, __SVFloat16_t) -+ -+/* -+** callee_bf16: -+** ptrue (p[0-7])\.b, vl128 -+** ld1h z0\.h, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (bf16, __SVBfloat16_t) -+ -+/* -+** callee_s32: -+** ptrue (p[0-7])\.b, vl128 -+** ld1w z0\.s, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (s32, __SVInt32_t) -+ -+/* -+** callee_u32: -+** ptrue (p[0-7])\.b, vl128 -+** ld1w z0\.s, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (u32, __SVUint32_t) -+ -+/* -+** callee_f32: -+** ptrue (p[0-7])\.b, vl128 -+** ld1w z0\.s, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (f32, __SVFloat32_t) -+ -+/* -+** callee_s64: -+** ptrue (p[0-7])\.b, vl128 -+** ld1d z0\.d, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (s64, __SVInt64_t) -+ -+/* -+** callee_u64: -+** ptrue (p[0-7])\.b, vl128 -+** ld1d z0\.d, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (u64, __SVUint64_t) -+ -+/* -+** callee_f64: -+** ptrue (p[0-7])\.b, vl128 -+** ld1d z0\.d, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (f64, __SVFloat64_t) -+ -+#include -+ -+#define CALLER(SUFFIX, TYPE) \ -+ typeof (svaddv (svptrue_b8 (), *(TYPE *) 0)) \ -+ __attribute__((noipa)) \ -+ caller_##SUFFIX (TYPE *ptr1) \ -+ { \ -+ return svaddv (svptrue_b8 (), callee_##SUFFIX (ptr1)); \ -+ } -+ -+#define CALLER_BF16(SUFFIX, TYPE) \ -+ typeof (svlasta (svptrue_b8 (), *(TYPE *) 0)) \ -+ __attribute__((noipa)) \ -+ caller_##SUFFIX (TYPE *ptr1) \ -+ { \ -+ return svlasta (svptrue_b8 (), callee_##SUFFIX (ptr1)); \ -+ } -+ -+/* -+** caller_s8: -+** ... -+** bl callee_s8 -+** ptrue (p[0-7])\.b, vl128 -+** saddv (d[0-9]+), \1, z0\.b -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (s8, __SVInt8_t) -+ -+/* -+** caller_u8: -+** ... -+** bl callee_u8 -+** ptrue (p[0-7])\.b, vl128 -+** uaddv (d[0-9]+), \1, z0\.b -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (u8, __SVUint8_t) -+ -+/* -+** caller_s16: -+** ... -+** bl callee_s16 -+** ptrue (p[0-7])\.b, vl128 -+** saddv (d[0-9]+), \1, z0\.h -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (s16, __SVInt16_t) -+ -+/* -+** caller_u16: -+** ... -+** bl callee_u16 -+** ptrue (p[0-7])\.b, vl128 -+** uaddv (d[0-9]+), \1, z0\.h -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (u16, __SVUint16_t) -+ -+/* -+** caller_f16: -+** ... -+** bl callee_f16 -+** ptrue (p[0-7])\.b, vl128 -+** faddv h0, \1, z0\.h -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (f16, __SVFloat16_t) -+ -+/* -+** caller_bf16: -+** ... -+** bl callee_bf16 -+** ptrue (p[0-7])\.b, vl128 -+** lasta h0, \1, z0\.h -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER_BF16 (bf16, __SVBfloat16_t) -+ -+/* -+** caller_s32: -+** ... -+** bl callee_s32 -+** ptrue (p[0-7])\.b, vl128 -+** saddv (d[0-9]+), \1, z0\.s -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (s32, __SVInt32_t) -+ -+/* -+** caller_u32: -+** ... -+** bl callee_u32 -+** ptrue (p[0-7])\.b, vl128 -+** uaddv (d[0-9]+), \1, z0\.s -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (u32, __SVUint32_t) -+ -+/* -+** caller_f32: -+** ... -+** bl callee_f32 -+** ptrue (p[0-7])\.b, vl128 -+** faddv s0, \1, z0\.s -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (f32, __SVFloat32_t) -+ -+/* -+** caller_s64: -+** ... -+** bl callee_s64 -+** ptrue (p[0-7])\.b, vl128 -+** uaddv (d[0-9]+), \1, z0\.d -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (s64, __SVInt64_t) -+ -+/* -+** caller_u64: -+** ... -+** bl callee_u64 -+** ptrue (p[0-7])\.b, vl128 -+** uaddv (d[0-9]+), \1, z0\.d -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (u64, __SVUint64_t) -+ -+/* -+** caller_f64: -+** ... -+** bl callee_f64 -+** ptrue (p[0-7])\.b, vl128 -+** faddv d0, \1, z0\.d -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (f64, __SVFloat64_t) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_4_2048.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_4_2048.c -new file mode 100644 -index 000000000..8256645f5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_4_2048.c -@@ -0,0 +1,264 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -msve-vector-bits=2048 -g" } */ -+/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ -+ -+#define CALLEE(SUFFIX, TYPE) \ -+ TYPE __attribute__((noipa)) \ -+ callee_##SUFFIX (TYPE *ptr) \ -+ { \ -+ return *ptr; \ -+ } -+ -+/* -+** callee_s8: -+** ptrue (p[0-7])\.b, vl256 -+** ld1b z0\.b, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (s8, __SVInt8_t) -+ -+/* -+** callee_u8: -+** ptrue (p[0-7])\.b, vl256 -+** ld1b z0\.b, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (u8, __SVUint8_t) -+ -+/* -+** callee_s16: -+** ptrue (p[0-7])\.b, vl256 -+** ld1h z0\.h, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (s16, __SVInt16_t) -+ -+/* -+** callee_u16: -+** ptrue (p[0-7])\.b, vl256 -+** ld1h z0\.h, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (u16, __SVUint16_t) -+ -+/* -+** callee_f16: -+** ptrue (p[0-7])\.b, vl256 -+** ld1h z0\.h, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (f16, __SVFloat16_t) -+ -+/* -+** callee_bf16: -+** ptrue (p[0-7])\.b, vl256 -+** ld1h z0\.h, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (bf16, __SVBfloat16_t) -+ -+/* -+** callee_s32: -+** ptrue (p[0-7])\.b, vl256 -+** ld1w z0\.s, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (s32, __SVInt32_t) -+ -+/* -+** callee_u32: -+** ptrue (p[0-7])\.b, vl256 -+** ld1w z0\.s, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (u32, __SVUint32_t) -+ -+/* -+** callee_f32: -+** ptrue (p[0-7])\.b, vl256 -+** ld1w z0\.s, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (f32, __SVFloat32_t) -+ -+/* -+** callee_s64: -+** ptrue (p[0-7])\.b, vl256 -+** ld1d z0\.d, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (s64, __SVInt64_t) -+ -+/* -+** callee_u64: -+** ptrue (p[0-7])\.b, vl256 -+** ld1d z0\.d, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (u64, __SVUint64_t) -+ -+/* -+** callee_f64: -+** ptrue (p[0-7])\.b, vl256 -+** ld1d z0\.d, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (f64, __SVFloat64_t) -+ -+#include -+ -+#define CALLER(SUFFIX, TYPE) \ -+ typeof (svaddv (svptrue_b8 (), *(TYPE *) 0)) \ -+ __attribute__((noipa)) \ -+ caller_##SUFFIX (TYPE *ptr1) \ -+ { \ -+ return svaddv (svptrue_b8 (), callee_##SUFFIX (ptr1)); \ -+ } -+ -+#define CALLER_BF16(SUFFIX, TYPE) \ -+ typeof (svlasta (svptrue_b8 (), *(TYPE *) 0)) \ -+ __attribute__((noipa)) \ -+ caller_##SUFFIX (TYPE *ptr1) \ -+ { \ -+ return svlasta (svptrue_b8 (), callee_##SUFFIX (ptr1)); \ -+ } -+ -+/* -+** caller_s8: -+** ... -+** bl callee_s8 -+** ptrue (p[0-7])\.b, vl256 -+** saddv (d[0-9]+), \1, z0\.b -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (s8, __SVInt8_t) -+ -+/* -+** caller_u8: -+** ... -+** bl callee_u8 -+** ptrue (p[0-7])\.b, vl256 -+** uaddv (d[0-9]+), \1, z0\.b -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (u8, __SVUint8_t) -+ -+/* -+** caller_s16: -+** ... -+** bl callee_s16 -+** ptrue (p[0-7])\.b, vl256 -+** saddv (d[0-9]+), \1, z0\.h -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (s16, __SVInt16_t) -+ -+/* -+** caller_u16: -+** ... -+** bl callee_u16 -+** ptrue (p[0-7])\.b, vl256 -+** uaddv (d[0-9]+), \1, z0\.h -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (u16, __SVUint16_t) -+ -+/* -+** caller_f16: -+** ... -+** bl callee_f16 -+** ptrue (p[0-7])\.b, vl256 -+** faddv h0, \1, z0\.h -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (f16, __SVFloat16_t) -+ -+/* -+** caller_bf16: -+** ... -+** bl callee_bf16 -+** ptrue (p[0-7])\.b, vl256 -+** lasta h0, \1, z0\.h -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER_BF16 (bf16, __SVBfloat16_t) -+ -+/* -+** caller_s32: -+** ... -+** bl callee_s32 -+** ptrue (p[0-7])\.b, vl256 -+** saddv (d[0-9]+), \1, z0\.s -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (s32, __SVInt32_t) -+ -+/* -+** caller_u32: -+** ... -+** bl callee_u32 -+** ptrue (p[0-7])\.b, vl256 -+** uaddv (d[0-9]+), \1, z0\.s -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (u32, __SVUint32_t) -+ -+/* -+** caller_f32: -+** ... -+** bl callee_f32 -+** ptrue (p[0-7])\.b, vl256 -+** faddv s0, \1, z0\.s -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (f32, __SVFloat32_t) -+ -+/* -+** caller_s64: -+** ... -+** bl callee_s64 -+** ptrue (p[0-7])\.b, vl256 -+** uaddv (d[0-9]+), \1, z0\.d -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (s64, __SVInt64_t) -+ -+/* -+** caller_u64: -+** ... -+** bl callee_u64 -+** ptrue (p[0-7])\.b, vl256 -+** uaddv (d[0-9]+), \1, z0\.d -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (u64, __SVUint64_t) -+ -+/* -+** caller_f64: -+** ... -+** bl callee_f64 -+** ptrue (p[0-7])\.b, vl256 -+** faddv d0, \1, z0\.d -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (f64, __SVFloat64_t) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_4_256.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_4_256.c -new file mode 100644 -index 000000000..1e0f6bb96 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_4_256.c -@@ -0,0 +1,264 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -msve-vector-bits=256 -g" } */ -+/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ -+ -+#define CALLEE(SUFFIX, TYPE) \ -+ TYPE __attribute__((noipa)) \ -+ callee_##SUFFIX (TYPE *ptr) \ -+ { \ -+ return *ptr; \ -+ } -+ -+/* -+** callee_s8: -+** ptrue (p[0-7])\.b, vl32 -+** ld1b z0\.b, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (s8, __SVInt8_t) -+ -+/* -+** callee_u8: -+** ptrue (p[0-7])\.b, vl32 -+** ld1b z0\.b, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (u8, __SVUint8_t) -+ -+/* -+** callee_s16: -+** ptrue (p[0-7])\.b, vl32 -+** ld1h z0\.h, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (s16, __SVInt16_t) -+ -+/* -+** callee_u16: -+** ptrue (p[0-7])\.b, vl32 -+** ld1h z0\.h, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (u16, __SVUint16_t) -+ -+/* -+** callee_f16: -+** ptrue (p[0-7])\.b, vl32 -+** ld1h z0\.h, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (f16, __SVFloat16_t) -+ -+/* -+** callee_bf16: -+** ptrue (p[0-7])\.b, vl32 -+** ld1h z0\.h, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (bf16, __SVBfloat16_t) -+ -+/* -+** callee_s32: -+** ptrue (p[0-7])\.b, vl32 -+** ld1w z0\.s, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (s32, __SVInt32_t) -+ -+/* -+** callee_u32: -+** ptrue (p[0-7])\.b, vl32 -+** ld1w z0\.s, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (u32, __SVUint32_t) -+ -+/* -+** callee_f32: -+** ptrue (p[0-7])\.b, vl32 -+** ld1w z0\.s, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (f32, __SVFloat32_t) -+ -+/* -+** callee_s64: -+** ptrue (p[0-7])\.b, vl32 -+** ld1d z0\.d, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (s64, __SVInt64_t) -+ -+/* -+** callee_u64: -+** ptrue (p[0-7])\.b, vl32 -+** ld1d z0\.d, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (u64, __SVUint64_t) -+ -+/* -+** callee_f64: -+** ptrue (p[0-7])\.b, vl32 -+** ld1d z0\.d, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (f64, __SVFloat64_t) -+ -+#include -+ -+#define CALLER(SUFFIX, TYPE) \ -+ typeof (svaddv (svptrue_b8 (), *(TYPE *) 0)) \ -+ __attribute__((noipa)) \ -+ caller_##SUFFIX (TYPE *ptr1) \ -+ { \ -+ return svaddv (svptrue_b8 (), callee_##SUFFIX (ptr1)); \ -+ } -+ -+#define CALLER_BF16(SUFFIX, TYPE) \ -+ typeof (svlasta (svptrue_b8 (), *(TYPE *) 0)) \ -+ __attribute__((noipa)) \ -+ caller_##SUFFIX (TYPE *ptr1) \ -+ { \ -+ return svlasta (svptrue_b8 (), callee_##SUFFIX (ptr1)); \ -+ } -+ -+/* -+** caller_s8: -+** ... -+** bl callee_s8 -+** ptrue (p[0-7])\.b, vl32 -+** saddv (d[0-9]+), \1, z0\.b -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (s8, __SVInt8_t) -+ -+/* -+** caller_u8: -+** ... -+** bl callee_u8 -+** ptrue (p[0-7])\.b, vl32 -+** uaddv (d[0-9]+), \1, z0\.b -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (u8, __SVUint8_t) -+ -+/* -+** caller_s16: -+** ... -+** bl callee_s16 -+** ptrue (p[0-7])\.b, vl32 -+** saddv (d[0-9]+), \1, z0\.h -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (s16, __SVInt16_t) -+ -+/* -+** caller_u16: -+** ... -+** bl callee_u16 -+** ptrue (p[0-7])\.b, vl32 -+** uaddv (d[0-9]+), \1, z0\.h -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (u16, __SVUint16_t) -+ -+/* -+** caller_f16: -+** ... -+** bl callee_f16 -+** ptrue (p[0-7])\.b, vl32 -+** faddv h0, \1, z0\.h -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (f16, __SVFloat16_t) -+ -+/* -+** caller_bf16: -+** ... -+** bl callee_bf16 -+** ptrue (p[0-7])\.b, vl32 -+** lasta h0, \1, z0\.h -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER_BF16 (bf16, __SVBfloat16_t) -+ -+/* -+** caller_s32: -+** ... -+** bl callee_s32 -+** ptrue (p[0-7])\.b, vl32 -+** saddv (d[0-9]+), \1, z0\.s -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (s32, __SVInt32_t) -+ -+/* -+** caller_u32: -+** ... -+** bl callee_u32 -+** ptrue (p[0-7])\.b, vl32 -+** uaddv (d[0-9]+), \1, z0\.s -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (u32, __SVUint32_t) -+ -+/* -+** caller_f32: -+** ... -+** bl callee_f32 -+** ptrue (p[0-7])\.b, vl32 -+** faddv s0, \1, z0\.s -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (f32, __SVFloat32_t) -+ -+/* -+** caller_s64: -+** ... -+** bl callee_s64 -+** ptrue (p[0-7])\.b, vl32 -+** uaddv (d[0-9]+), \1, z0\.d -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (s64, __SVInt64_t) -+ -+/* -+** caller_u64: -+** ... -+** bl callee_u64 -+** ptrue (p[0-7])\.b, vl32 -+** uaddv (d[0-9]+), \1, z0\.d -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (u64, __SVUint64_t) -+ -+/* -+** caller_f64: -+** ... -+** bl callee_f64 -+** ptrue (p[0-7])\.b, vl32 -+** faddv d0, \1, z0\.d -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (f64, __SVFloat64_t) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_4_512.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_4_512.c -new file mode 100644 -index 000000000..5b58ed734 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_4_512.c -@@ -0,0 +1,264 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -msve-vector-bits=512 -g" } */ -+/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ -+ -+#define CALLEE(SUFFIX, TYPE) \ -+ TYPE __attribute__((noipa)) \ -+ callee_##SUFFIX (TYPE *ptr) \ -+ { \ -+ return *ptr; \ -+ } -+ -+/* -+** callee_s8: -+** ptrue (p[0-7])\.b, vl64 -+** ld1b z0\.b, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (s8, __SVInt8_t) -+ -+/* -+** callee_u8: -+** ptrue (p[0-7])\.b, vl64 -+** ld1b z0\.b, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (u8, __SVUint8_t) -+ -+/* -+** callee_s16: -+** ptrue (p[0-7])\.b, vl64 -+** ld1h z0\.h, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (s16, __SVInt16_t) -+ -+/* -+** callee_u16: -+** ptrue (p[0-7])\.b, vl64 -+** ld1h z0\.h, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (u16, __SVUint16_t) -+ -+/* -+** callee_f16: -+** ptrue (p[0-7])\.b, vl64 -+** ld1h z0\.h, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (f16, __SVFloat16_t) -+ -+/* -+** callee_bf16: -+** ptrue (p[0-7])\.b, vl64 -+** ld1h z0\.h, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (bf16, __SVBfloat16_t) -+ -+/* -+** callee_s32: -+** ptrue (p[0-7])\.b, vl64 -+** ld1w z0\.s, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (s32, __SVInt32_t) -+ -+/* -+** callee_u32: -+** ptrue (p[0-7])\.b, vl64 -+** ld1w z0\.s, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (u32, __SVUint32_t) -+ -+/* -+** callee_f32: -+** ptrue (p[0-7])\.b, vl64 -+** ld1w z0\.s, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (f32, __SVFloat32_t) -+ -+/* -+** callee_s64: -+** ptrue (p[0-7])\.b, vl64 -+** ld1d z0\.d, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (s64, __SVInt64_t) -+ -+/* -+** callee_u64: -+** ptrue (p[0-7])\.b, vl64 -+** ld1d z0\.d, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (u64, __SVUint64_t) -+ -+/* -+** callee_f64: -+** ptrue (p[0-7])\.b, vl64 -+** ld1d z0\.d, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (f64, __SVFloat64_t) -+ -+#include -+ -+#define CALLER(SUFFIX, TYPE) \ -+ typeof (svaddv (svptrue_b8 (), *(TYPE *) 0)) \ -+ __attribute__((noipa)) \ -+ caller_##SUFFIX (TYPE *ptr1) \ -+ { \ -+ return svaddv (svptrue_b8 (), callee_##SUFFIX (ptr1)); \ -+ } -+ -+#define CALLER_BF16(SUFFIX, TYPE) \ -+ typeof (svlasta (svptrue_b8 (), *(TYPE *) 0)) \ -+ __attribute__((noipa)) \ -+ caller_##SUFFIX (TYPE *ptr1) \ -+ { \ -+ return svlasta (svptrue_b8 (), callee_##SUFFIX (ptr1)); \ -+ } -+ -+/* -+** caller_s8: -+** ... -+** bl callee_s8 -+** ptrue (p[0-7])\.b, vl64 -+** saddv (d[0-9]+), \1, z0\.b -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (s8, __SVInt8_t) -+ -+/* -+** caller_u8: -+** ... -+** bl callee_u8 -+** ptrue (p[0-7])\.b, vl64 -+** uaddv (d[0-9]+), \1, z0\.b -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (u8, __SVUint8_t) -+ -+/* -+** caller_s16: -+** ... -+** bl callee_s16 -+** ptrue (p[0-7])\.b, vl64 -+** saddv (d[0-9]+), \1, z0\.h -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (s16, __SVInt16_t) -+ -+/* -+** caller_u16: -+** ... -+** bl callee_u16 -+** ptrue (p[0-7])\.b, vl64 -+** uaddv (d[0-9]+), \1, z0\.h -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (u16, __SVUint16_t) -+ -+/* -+** caller_f16: -+** ... -+** bl callee_f16 -+** ptrue (p[0-7])\.b, vl64 -+** faddv h0, \1, z0\.h -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (f16, __SVFloat16_t) -+ -+/* -+** caller_bf16: -+** ... -+** bl callee_bf16 -+** ptrue (p[0-7])\.b, vl64 -+** lasta h0, \1, z0\.h -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER_BF16 (bf16, __SVBfloat16_t) -+ -+/* -+** caller_s32: -+** ... -+** bl callee_s32 -+** ptrue (p[0-7])\.b, vl64 -+** saddv (d[0-9]+), \1, z0\.s -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (s32, __SVInt32_t) -+ -+/* -+** caller_u32: -+** ... -+** bl callee_u32 -+** ptrue (p[0-7])\.b, vl64 -+** uaddv (d[0-9]+), \1, z0\.s -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (u32, __SVUint32_t) -+ -+/* -+** caller_f32: -+** ... -+** bl callee_f32 -+** ptrue (p[0-7])\.b, vl64 -+** faddv s0, \1, z0\.s -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (f32, __SVFloat32_t) -+ -+/* -+** caller_s64: -+** ... -+** bl callee_s64 -+** ptrue (p[0-7])\.b, vl64 -+** uaddv (d[0-9]+), \1, z0\.d -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (s64, __SVInt64_t) -+ -+/* -+** caller_u64: -+** ... -+** bl callee_u64 -+** ptrue (p[0-7])\.b, vl64 -+** uaddv (d[0-9]+), \1, z0\.d -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (u64, __SVUint64_t) -+ -+/* -+** caller_f64: -+** ... -+** bl callee_f64 -+** ptrue (p[0-7])\.b, vl64 -+** faddv d0, \1, z0\.d -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (f64, __SVFloat64_t) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_5.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_5.c -new file mode 100644 -index 000000000..55c78e16f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_5.c -@@ -0,0 +1,264 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -g" } */ -+/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ -+ -+#include -+ -+#define CALLEE(SUFFIX, TYPE) \ -+ TYPE __attribute__((noipa)) \ -+ callee_##SUFFIX (TYPE *ptr) \ -+ { \ -+ return *ptr; \ -+ } -+ -+/* -+** callee_s8: -+** ptrue (p[0-7])\.b, all -+** ld1b z0\.b, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (s8, svint8_t) -+ -+/* -+** callee_u8: -+** ptrue (p[0-7])\.b, all -+** ld1b z0\.b, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (u8, svuint8_t) -+ -+/* -+** callee_s16: -+** ptrue (p[0-7])\.b, all -+** ld1h z0\.h, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (s16, svint16_t) -+ -+/* -+** callee_u16: -+** ptrue (p[0-7])\.b, all -+** ld1h z0\.h, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (u16, svuint16_t) -+ -+/* -+** callee_f16: -+** ptrue (p[0-7])\.b, all -+** ld1h z0\.h, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (f16, svfloat16_t) -+ -+/* -+** callee_bf16: -+** ptrue (p[0-7])\.b, all -+** ld1h z0\.h, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (bf16, svbfloat16_t) -+ -+/* -+** callee_s32: -+** ptrue (p[0-7])\.b, all -+** ld1w z0\.s, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (s32, svint32_t) -+ -+/* -+** callee_u32: -+** ptrue (p[0-7])\.b, all -+** ld1w z0\.s, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (u32, svuint32_t) -+ -+/* -+** callee_f32: -+** ptrue (p[0-7])\.b, all -+** ld1w z0\.s, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (f32, svfloat32_t) -+ -+/* -+** callee_s64: -+** ptrue (p[0-7])\.b, all -+** ld1d z0\.d, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (s64, svint64_t) -+ -+/* -+** callee_u64: -+** ptrue (p[0-7])\.b, all -+** ld1d z0\.d, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (u64, svuint64_t) -+ -+/* -+** callee_f64: -+** ptrue (p[0-7])\.b, all -+** ld1d z0\.d, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (f64, svfloat64_t) -+ -+#define CALLER(SUFFIX, TYPE) \ -+ typeof (svaddv (svptrue_b8 (), *(TYPE *) 0)) \ -+ __attribute__((noipa)) \ -+ caller_##SUFFIX (TYPE *ptr1) \ -+ { \ -+ return svaddv (svptrue_b8 (), callee_##SUFFIX (ptr1)); \ -+ } -+ -+#define CALLER_BF16(SUFFIX, TYPE) \ -+ typeof (svlasta (svptrue_b8 (), *(TYPE *) 0)) \ -+ __attribute__((noipa)) \ -+ caller_##SUFFIX (TYPE *ptr1) \ -+ { \ -+ return svlasta (svptrue_b8 (), callee_##SUFFIX (ptr1)); \ -+ } -+ -+/* -+** caller_s8: -+** ... -+** bl callee_s8 -+** ptrue (p[0-7])\.b, all -+** saddv (d[0-9]+), \1, z0\.b -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (s8, svint8_t) -+ -+/* -+** caller_u8: -+** ... -+** bl callee_u8 -+** ptrue (p[0-7])\.b, all -+** uaddv (d[0-9]+), \1, z0\.b -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (u8, svuint8_t) -+ -+/* -+** caller_s16: -+** ... -+** bl callee_s16 -+** ptrue (p[0-7])\.b, all -+** saddv (d[0-9]+), \1, z0\.h -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (s16, svint16_t) -+ -+/* -+** caller_u16: -+** ... -+** bl callee_u16 -+** ptrue (p[0-7])\.b, all -+** uaddv (d[0-9]+), \1, z0\.h -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (u16, svuint16_t) -+ -+/* -+** caller_f16: -+** ... -+** bl callee_f16 -+** ptrue (p[0-7])\.b, all -+** faddv h0, \1, z0\.h -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (f16, svfloat16_t) -+ -+/* -+** caller_bf16: -+** ... -+** bl callee_bf16 -+** ptrue (p[0-7])\.b, all -+** lasta h0, \1, z0\.h -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER_BF16 (bf16, svbfloat16_t) -+ -+/* -+** caller_s32: -+** ... -+** bl callee_s32 -+** ptrue (p[0-7])\.b, all -+** saddv (d[0-9]+), \1, z0\.s -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (s32, svint32_t) -+ -+/* -+** caller_u32: -+** ... -+** bl callee_u32 -+** ptrue (p[0-7])\.b, all -+** uaddv (d[0-9]+), \1, z0\.s -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (u32, svuint32_t) -+ -+/* -+** caller_f32: -+** ... -+** bl callee_f32 -+** ptrue (p[0-7])\.b, all -+** faddv s0, \1, z0\.s -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (f32, svfloat32_t) -+ -+/* -+** caller_s64: -+** ... -+** bl callee_s64 -+** ptrue (p[0-7])\.b, all -+** uaddv (d[0-9]+), \1, z0\.d -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (s64, svint64_t) -+ -+/* -+** caller_u64: -+** ... -+** bl callee_u64 -+** ptrue (p[0-7])\.b, all -+** uaddv (d[0-9]+), \1, z0\.d -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (u64, svuint64_t) -+ -+/* -+** caller_f64: -+** ... -+** bl callee_f64 -+** ptrue (p[0-7])\.b, all -+** faddv d0, \1, z0\.d -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (f64, svfloat64_t) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_5_1024.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_5_1024.c -new file mode 100644 -index 000000000..52e9916d8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_5_1024.c -@@ -0,0 +1,264 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -msve-vector-bits=1024 -g" } */ -+/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ -+ -+#include -+ -+#define CALLEE(SUFFIX, TYPE) \ -+ TYPE __attribute__((noipa)) \ -+ callee_##SUFFIX (TYPE *ptr) \ -+ { \ -+ return *ptr; \ -+ } -+ -+/* -+** callee_s8: -+** ptrue (p[0-7])\.b, vl128 -+** ld1b z0\.b, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (s8, svint8_t) -+ -+/* -+** callee_u8: -+** ptrue (p[0-7])\.b, vl128 -+** ld1b z0\.b, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (u8, svuint8_t) -+ -+/* -+** callee_s16: -+** ptrue (p[0-7])\.b, vl128 -+** ld1h z0\.h, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (s16, svint16_t) -+ -+/* -+** callee_u16: -+** ptrue (p[0-7])\.b, vl128 -+** ld1h z0\.h, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (u16, svuint16_t) -+ -+/* -+** callee_f16: -+** ptrue (p[0-7])\.b, vl128 -+** ld1h z0\.h, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (f16, svfloat16_t) -+ -+/* -+** callee_bf16: -+** ptrue (p[0-7])\.b, vl128 -+** ld1h z0\.h, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (bf16, svbfloat16_t) -+ -+/* -+** callee_s32: -+** ptrue (p[0-7])\.b, vl128 -+** ld1w z0\.s, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (s32, svint32_t) -+ -+/* -+** callee_u32: -+** ptrue (p[0-7])\.b, vl128 -+** ld1w z0\.s, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (u32, svuint32_t) -+ -+/* -+** callee_f32: -+** ptrue (p[0-7])\.b, vl128 -+** ld1w z0\.s, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (f32, svfloat32_t) -+ -+/* -+** callee_s64: -+** ptrue (p[0-7])\.b, vl128 -+** ld1d z0\.d, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (s64, svint64_t) -+ -+/* -+** callee_u64: -+** ptrue (p[0-7])\.b, vl128 -+** ld1d z0\.d, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (u64, svuint64_t) -+ -+/* -+** callee_f64: -+** ptrue (p[0-7])\.b, vl128 -+** ld1d z0\.d, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (f64, svfloat64_t) -+ -+#define CALLER(SUFFIX, TYPE) \ -+ typeof (svaddv (svptrue_b8 (), *(TYPE *) 0)) \ -+ __attribute__((noipa)) \ -+ caller_##SUFFIX (TYPE *ptr1) \ -+ { \ -+ return svaddv (svptrue_b8 (), callee_##SUFFIX (ptr1)); \ -+ } -+ -+#define CALLER_BF16(SUFFIX, TYPE) \ -+ typeof (svlasta (svptrue_b8 (), *(TYPE *) 0)) \ -+ __attribute__((noipa)) \ -+ caller_##SUFFIX (TYPE *ptr1) \ -+ { \ -+ return svlasta (svptrue_b8 (), callee_##SUFFIX (ptr1)); \ -+ } -+ -+/* -+** caller_s8: -+** ... -+** bl callee_s8 -+** ptrue (p[0-7])\.b, vl128 -+** saddv (d[0-9]+), \1, z0\.b -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (s8, svint8_t) -+ -+/* -+** caller_u8: -+** ... -+** bl callee_u8 -+** ptrue (p[0-7])\.b, vl128 -+** uaddv (d[0-9]+), \1, z0\.b -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (u8, svuint8_t) -+ -+/* -+** caller_s16: -+** ... -+** bl callee_s16 -+** ptrue (p[0-7])\.b, vl128 -+** saddv (d[0-9]+), \1, z0\.h -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (s16, svint16_t) -+ -+/* -+** caller_u16: -+** ... -+** bl callee_u16 -+** ptrue (p[0-7])\.b, vl128 -+** uaddv (d[0-9]+), \1, z0\.h -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (u16, svuint16_t) -+ -+/* -+** caller_f16: -+** ... -+** bl callee_f16 -+** ptrue (p[0-7])\.b, vl128 -+** faddv h0, \1, z0\.h -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (f16, svfloat16_t) -+ -+/* -+** caller_bf16: -+** ... -+** bl callee_bf16 -+** ptrue (p[0-7])\.b, vl128 -+** lasta h0, \1, z0\.h -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER_BF16 (bf16, svbfloat16_t) -+ -+/* -+** caller_s32: -+** ... -+** bl callee_s32 -+** ptrue (p[0-7])\.b, vl128 -+** saddv (d[0-9]+), \1, z0\.s -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (s32, svint32_t) -+ -+/* -+** caller_u32: -+** ... -+** bl callee_u32 -+** ptrue (p[0-7])\.b, vl128 -+** uaddv (d[0-9]+), \1, z0\.s -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (u32, svuint32_t) -+ -+/* -+** caller_f32: -+** ... -+** bl callee_f32 -+** ptrue (p[0-7])\.b, vl128 -+** faddv s0, \1, z0\.s -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (f32, svfloat32_t) -+ -+/* -+** caller_s64: -+** ... -+** bl callee_s64 -+** ptrue (p[0-7])\.b, vl128 -+** uaddv (d[0-9]+), \1, z0\.d -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (s64, svint64_t) -+ -+/* -+** caller_u64: -+** ... -+** bl callee_u64 -+** ptrue (p[0-7])\.b, vl128 -+** uaddv (d[0-9]+), \1, z0\.d -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (u64, svuint64_t) -+ -+/* -+** caller_f64: -+** ... -+** bl callee_f64 -+** ptrue (p[0-7])\.b, vl128 -+** faddv d0, \1, z0\.d -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (f64, svfloat64_t) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_5_2048.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_5_2048.c -new file mode 100644 -index 000000000..6f37d9d6c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_5_2048.c -@@ -0,0 +1,264 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -msve-vector-bits=2048 -g" } */ -+/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ -+ -+#include -+ -+#define CALLEE(SUFFIX, TYPE) \ -+ TYPE __attribute__((noipa)) \ -+ callee_##SUFFIX (TYPE *ptr) \ -+ { \ -+ return *ptr; \ -+ } -+ -+/* -+** callee_s8: -+** ptrue (p[0-7])\.b, vl256 -+** ld1b z0\.b, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (s8, svint8_t) -+ -+/* -+** callee_u8: -+** ptrue (p[0-7])\.b, vl256 -+** ld1b z0\.b, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (u8, svuint8_t) -+ -+/* -+** callee_s16: -+** ptrue (p[0-7])\.b, vl256 -+** ld1h z0\.h, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (s16, svint16_t) -+ -+/* -+** callee_u16: -+** ptrue (p[0-7])\.b, vl256 -+** ld1h z0\.h, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (u16, svuint16_t) -+ -+/* -+** callee_f16: -+** ptrue (p[0-7])\.b, vl256 -+** ld1h z0\.h, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (f16, svfloat16_t) -+ -+/* -+** callee_bf16: -+** ptrue (p[0-7])\.b, vl256 -+** ld1h z0\.h, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (bf16, svbfloat16_t) -+ -+/* -+** callee_s32: -+** ptrue (p[0-7])\.b, vl256 -+** ld1w z0\.s, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (s32, svint32_t) -+ -+/* -+** callee_u32: -+** ptrue (p[0-7])\.b, vl256 -+** ld1w z0\.s, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (u32, svuint32_t) -+ -+/* -+** callee_f32: -+** ptrue (p[0-7])\.b, vl256 -+** ld1w z0\.s, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (f32, svfloat32_t) -+ -+/* -+** callee_s64: -+** ptrue (p[0-7])\.b, vl256 -+** ld1d z0\.d, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (s64, svint64_t) -+ -+/* -+** callee_u64: -+** ptrue (p[0-7])\.b, vl256 -+** ld1d z0\.d, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (u64, svuint64_t) -+ -+/* -+** callee_f64: -+** ptrue (p[0-7])\.b, vl256 -+** ld1d z0\.d, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (f64, svfloat64_t) -+ -+#define CALLER(SUFFIX, TYPE) \ -+ typeof (svaddv (svptrue_b8 (), *(TYPE *) 0)) \ -+ __attribute__((noipa)) \ -+ caller_##SUFFIX (TYPE *ptr1) \ -+ { \ -+ return svaddv (svptrue_b8 (), callee_##SUFFIX (ptr1)); \ -+ } -+ -+#define CALLER_BF16(SUFFIX, TYPE) \ -+ typeof (svlasta (svptrue_b8 (), *(TYPE *) 0)) \ -+ __attribute__((noipa)) \ -+ caller_##SUFFIX (TYPE *ptr1) \ -+ { \ -+ return svlasta (svptrue_b8 (), callee_##SUFFIX (ptr1)); \ -+ } -+ -+/* -+** caller_s8: -+** ... -+** bl callee_s8 -+** ptrue (p[0-7])\.b, vl256 -+** saddv (d[0-9]+), \1, z0\.b -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (s8, svint8_t) -+ -+/* -+** caller_u8: -+** ... -+** bl callee_u8 -+** ptrue (p[0-7])\.b, vl256 -+** uaddv (d[0-9]+), \1, z0\.b -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (u8, svuint8_t) -+ -+/* -+** caller_s16: -+** ... -+** bl callee_s16 -+** ptrue (p[0-7])\.b, vl256 -+** saddv (d[0-9]+), \1, z0\.h -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (s16, svint16_t) -+ -+/* -+** caller_u16: -+** ... -+** bl callee_u16 -+** ptrue (p[0-7])\.b, vl256 -+** uaddv (d[0-9]+), \1, z0\.h -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (u16, svuint16_t) -+ -+/* -+** caller_f16: -+** ... -+** bl callee_f16 -+** ptrue (p[0-7])\.b, vl256 -+** faddv h0, \1, z0\.h -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (f16, svfloat16_t) -+ -+/* -+** caller_bf16: -+** ... -+** bl callee_bf16 -+** ptrue (p[0-7])\.b, vl256 -+** lasta h0, \1, z0\.h -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER_BF16 (bf16, svbfloat16_t) -+ -+/* -+** caller_s32: -+** ... -+** bl callee_s32 -+** ptrue (p[0-7])\.b, vl256 -+** saddv (d[0-9]+), \1, z0\.s -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (s32, svint32_t) -+ -+/* -+** caller_u32: -+** ... -+** bl callee_u32 -+** ptrue (p[0-7])\.b, vl256 -+** uaddv (d[0-9]+), \1, z0\.s -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (u32, svuint32_t) -+ -+/* -+** caller_f32: -+** ... -+** bl callee_f32 -+** ptrue (p[0-7])\.b, vl256 -+** faddv s0, \1, z0\.s -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (f32, svfloat32_t) -+ -+/* -+** caller_s64: -+** ... -+** bl callee_s64 -+** ptrue (p[0-7])\.b, vl256 -+** uaddv (d[0-9]+), \1, z0\.d -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (s64, svint64_t) -+ -+/* -+** caller_u64: -+** ... -+** bl callee_u64 -+** ptrue (p[0-7])\.b, vl256 -+** uaddv (d[0-9]+), \1, z0\.d -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (u64, svuint64_t) -+ -+/* -+** caller_f64: -+** ... -+** bl callee_f64 -+** ptrue (p[0-7])\.b, vl256 -+** faddv d0, \1, z0\.d -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (f64, svfloat64_t) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_5_256.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_5_256.c -new file mode 100644 -index 000000000..7ba094e16 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_5_256.c -@@ -0,0 +1,264 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -msve-vector-bits=256 -g" } */ -+/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ -+ -+#include -+ -+#define CALLEE(SUFFIX, TYPE) \ -+ TYPE __attribute__((noipa)) \ -+ callee_##SUFFIX (TYPE *ptr) \ -+ { \ -+ return *ptr; \ -+ } -+ -+/* -+** callee_s8: -+** ptrue (p[0-7])\.b, vl32 -+** ld1b z0\.b, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (s8, svint8_t) -+ -+/* -+** callee_u8: -+** ptrue (p[0-7])\.b, vl32 -+** ld1b z0\.b, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (u8, svuint8_t) -+ -+/* -+** callee_s16: -+** ptrue (p[0-7])\.b, vl32 -+** ld1h z0\.h, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (s16, svint16_t) -+ -+/* -+** callee_u16: -+** ptrue (p[0-7])\.b, vl32 -+** ld1h z0\.h, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (u16, svuint16_t) -+ -+/* -+** callee_f16: -+** ptrue (p[0-7])\.b, vl32 -+** ld1h z0\.h, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (f16, svfloat16_t) -+ -+/* -+** callee_bf16: -+** ptrue (p[0-7])\.b, vl32 -+** ld1h z0\.h, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (bf16, svbfloat16_t) -+ -+/* -+** callee_s32: -+** ptrue (p[0-7])\.b, vl32 -+** ld1w z0\.s, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (s32, svint32_t) -+ -+/* -+** callee_u32: -+** ptrue (p[0-7])\.b, vl32 -+** ld1w z0\.s, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (u32, svuint32_t) -+ -+/* -+** callee_f32: -+** ptrue (p[0-7])\.b, vl32 -+** ld1w z0\.s, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (f32, svfloat32_t) -+ -+/* -+** callee_s64: -+** ptrue (p[0-7])\.b, vl32 -+** ld1d z0\.d, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (s64, svint64_t) -+ -+/* -+** callee_u64: -+** ptrue (p[0-7])\.b, vl32 -+** ld1d z0\.d, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (u64, svuint64_t) -+ -+/* -+** callee_f64: -+** ptrue (p[0-7])\.b, vl32 -+** ld1d z0\.d, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (f64, svfloat64_t) -+ -+#define CALLER(SUFFIX, TYPE) \ -+ typeof (svaddv (svptrue_b8 (), *(TYPE *) 0)) \ -+ __attribute__((noipa)) \ -+ caller_##SUFFIX (TYPE *ptr1) \ -+ { \ -+ return svaddv (svptrue_b8 (), callee_##SUFFIX (ptr1)); \ -+ } -+ -+#define CALLER_BF16(SUFFIX, TYPE) \ -+ typeof (svlasta (svptrue_b8 (), *(TYPE *) 0)) \ -+ __attribute__((noipa)) \ -+ caller_##SUFFIX (TYPE *ptr1) \ -+ { \ -+ return svlasta (svptrue_b8 (), callee_##SUFFIX (ptr1)); \ -+ } -+ -+/* -+** caller_s8: -+** ... -+** bl callee_s8 -+** ptrue (p[0-7])\.b, vl32 -+** saddv (d[0-9]+), \1, z0\.b -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (s8, svint8_t) -+ -+/* -+** caller_u8: -+** ... -+** bl callee_u8 -+** ptrue (p[0-7])\.b, vl32 -+** uaddv (d[0-9]+), \1, z0\.b -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (u8, svuint8_t) -+ -+/* -+** caller_s16: -+** ... -+** bl callee_s16 -+** ptrue (p[0-7])\.b, vl32 -+** saddv (d[0-9]+), \1, z0\.h -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (s16, svint16_t) -+ -+/* -+** caller_u16: -+** ... -+** bl callee_u16 -+** ptrue (p[0-7])\.b, vl32 -+** uaddv (d[0-9]+), \1, z0\.h -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (u16, svuint16_t) -+ -+/* -+** caller_f16: -+** ... -+** bl callee_f16 -+** ptrue (p[0-7])\.b, vl32 -+** faddv h0, \1, z0\.h -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (f16, svfloat16_t) -+ -+/* -+** caller_bf16: -+** ... -+** bl callee_bf16 -+** ptrue (p[0-7])\.b, vl32 -+** lasta h0, \1, z0\.h -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER_BF16 (bf16, svbfloat16_t) -+ -+/* -+** caller_s32: -+** ... -+** bl callee_s32 -+** ptrue (p[0-7])\.b, vl32 -+** saddv (d[0-9]+), \1, z0\.s -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (s32, svint32_t) -+ -+/* -+** caller_u32: -+** ... -+** bl callee_u32 -+** ptrue (p[0-7])\.b, vl32 -+** uaddv (d[0-9]+), \1, z0\.s -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (u32, svuint32_t) -+ -+/* -+** caller_f32: -+** ... -+** bl callee_f32 -+** ptrue (p[0-7])\.b, vl32 -+** faddv s0, \1, z0\.s -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (f32, svfloat32_t) -+ -+/* -+** caller_s64: -+** ... -+** bl callee_s64 -+** ptrue (p[0-7])\.b, vl32 -+** uaddv (d[0-9]+), \1, z0\.d -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (s64, svint64_t) -+ -+/* -+** caller_u64: -+** ... -+** bl callee_u64 -+** ptrue (p[0-7])\.b, vl32 -+** uaddv (d[0-9]+), \1, z0\.d -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (u64, svuint64_t) -+ -+/* -+** caller_f64: -+** ... -+** bl callee_f64 -+** ptrue (p[0-7])\.b, vl32 -+** faddv d0, \1, z0\.d -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (f64, svfloat64_t) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_5_512.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_5_512.c -new file mode 100644 -index 000000000..36b14d420 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_5_512.c -@@ -0,0 +1,264 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -msve-vector-bits=512 -g" } */ -+/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ -+ -+#include -+ -+#define CALLEE(SUFFIX, TYPE) \ -+ TYPE __attribute__((noipa)) \ -+ callee_##SUFFIX (TYPE *ptr) \ -+ { \ -+ return *ptr; \ -+ } -+ -+/* -+** callee_s8: -+** ptrue (p[0-7])\.b, vl64 -+** ld1b z0\.b, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (s8, svint8_t) -+ -+/* -+** callee_u8: -+** ptrue (p[0-7])\.b, vl64 -+** ld1b z0\.b, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (u8, svuint8_t) -+ -+/* -+** callee_s16: -+** ptrue (p[0-7])\.b, vl64 -+** ld1h z0\.h, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (s16, svint16_t) -+ -+/* -+** callee_u16: -+** ptrue (p[0-7])\.b, vl64 -+** ld1h z0\.h, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (u16, svuint16_t) -+ -+/* -+** callee_f16: -+** ptrue (p[0-7])\.b, vl64 -+** ld1h z0\.h, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (f16, svfloat16_t) -+ -+/* -+** callee_bf16: -+** ptrue (p[0-7])\.b, vl64 -+** ld1h z0\.h, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (bf16, svbfloat16_t) -+ -+/* -+** callee_s32: -+** ptrue (p[0-7])\.b, vl64 -+** ld1w z0\.s, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (s32, svint32_t) -+ -+/* -+** callee_u32: -+** ptrue (p[0-7])\.b, vl64 -+** ld1w z0\.s, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (u32, svuint32_t) -+ -+/* -+** callee_f32: -+** ptrue (p[0-7])\.b, vl64 -+** ld1w z0\.s, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (f32, svfloat32_t) -+ -+/* -+** callee_s64: -+** ptrue (p[0-7])\.b, vl64 -+** ld1d z0\.d, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (s64, svint64_t) -+ -+/* -+** callee_u64: -+** ptrue (p[0-7])\.b, vl64 -+** ld1d z0\.d, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (u64, svuint64_t) -+ -+/* -+** callee_f64: -+** ptrue (p[0-7])\.b, vl64 -+** ld1d z0\.d, \1/z, \[x0\] -+** ret -+*/ -+CALLEE (f64, svfloat64_t) -+ -+#define CALLER(SUFFIX, TYPE) \ -+ typeof (svaddv (svptrue_b8 (), *(TYPE *) 0)) \ -+ __attribute__((noipa)) \ -+ caller_##SUFFIX (TYPE *ptr1) \ -+ { \ -+ return svaddv (svptrue_b8 (), callee_##SUFFIX (ptr1)); \ -+ } -+ -+#define CALLER_BF16(SUFFIX, TYPE) \ -+ typeof (svlasta (svptrue_b8 (), *(TYPE *) 0)) \ -+ __attribute__((noipa)) \ -+ caller_##SUFFIX (TYPE *ptr1) \ -+ { \ -+ return svlasta (svptrue_b8 (), callee_##SUFFIX (ptr1)); \ -+ } -+ -+/* -+** caller_s8: -+** ... -+** bl callee_s8 -+** ptrue (p[0-7])\.b, vl64 -+** saddv (d[0-9]+), \1, z0\.b -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (s8, svint8_t) -+ -+/* -+** caller_u8: -+** ... -+** bl callee_u8 -+** ptrue (p[0-7])\.b, vl64 -+** uaddv (d[0-9]+), \1, z0\.b -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (u8, svuint8_t) -+ -+/* -+** caller_s16: -+** ... -+** bl callee_s16 -+** ptrue (p[0-7])\.b, vl64 -+** saddv (d[0-9]+), \1, z0\.h -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (s16, svint16_t) -+ -+/* -+** caller_u16: -+** ... -+** bl callee_u16 -+** ptrue (p[0-7])\.b, vl64 -+** uaddv (d[0-9]+), \1, z0\.h -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (u16, svuint16_t) -+ -+/* -+** caller_f16: -+** ... -+** bl callee_f16 -+** ptrue (p[0-7])\.b, vl64 -+** faddv h0, \1, z0\.h -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (f16, svfloat16_t) -+ -+/* -+** caller_bf16: -+** ... -+** bl callee_bf16 -+** ptrue (p[0-7])\.b, vl64 -+** lasta h0, \1, z0\.h -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER_BF16 (bf16, svbfloat16_t) -+ -+/* -+** caller_s32: -+** ... -+** bl callee_s32 -+** ptrue (p[0-7])\.b, vl64 -+** saddv (d[0-9]+), \1, z0\.s -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (s32, svint32_t) -+ -+/* -+** caller_u32: -+** ... -+** bl callee_u32 -+** ptrue (p[0-7])\.b, vl64 -+** uaddv (d[0-9]+), \1, z0\.s -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (u32, svuint32_t) -+ -+/* -+** caller_f32: -+** ... -+** bl callee_f32 -+** ptrue (p[0-7])\.b, vl64 -+** faddv s0, \1, z0\.s -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (f32, svfloat32_t) -+ -+/* -+** caller_s64: -+** ... -+** bl callee_s64 -+** ptrue (p[0-7])\.b, vl64 -+** uaddv (d[0-9]+), \1, z0\.d -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (s64, svint64_t) -+ -+/* -+** caller_u64: -+** ... -+** bl callee_u64 -+** ptrue (p[0-7])\.b, vl64 -+** uaddv (d[0-9]+), \1, z0\.d -+** fmov x0, \2 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (u64, svuint64_t) -+ -+/* -+** caller_f64: -+** ... -+** bl callee_f64 -+** ptrue (p[0-7])\.b, vl64 -+** faddv d0, \1, z0\.d -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+CALLER (f64, svfloat64_t) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_6.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_6.c -new file mode 100644 -index 000000000..72468eab1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_6.c -@@ -0,0 +1,272 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -g" } */ -+/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ -+ -+#include -+ -+typedef int8_t svint8_t __attribute__ ((vector_size (32))); -+typedef uint8_t svuint8_t __attribute__ ((vector_size (32))); -+ -+typedef int16_t svint16_t __attribute__ ((vector_size (32))); -+typedef uint16_t svuint16_t __attribute__ ((vector_size (32))); -+typedef __fp16 svfloat16_t __attribute__ ((vector_size (32))); -+typedef __bf16 svbfloat16_t __attribute__ ((vector_size (32))); -+ -+typedef int32_t svint32_t __attribute__ ((vector_size (32))); -+typedef uint32_t svuint32_t __attribute__ ((vector_size (32))); -+typedef float svfloat32_t __attribute__ ((vector_size (32))); -+ -+typedef int64_t svint64_t __attribute__ ((vector_size (32))); -+typedef uint64_t svuint64_t __attribute__ ((vector_size (32))); -+typedef double svfloat64_t __attribute__ ((vector_size (32))); -+ -+#define CALLEE(SUFFIX, TYPE) \ -+ TYPE __attribute__((noipa)) \ -+ callee_##SUFFIX (TYPE *ptr) \ -+ { \ -+ return *ptr; \ -+ } -+ -+/* -+** callee_s8: -+** ( -+** ld1 ({v.*}), \[x0\] -+** st1 \1, \[x8\] -+** | -+** ldp (q[0-9]+, q[0-9]+), \[x0\] -+** stp \2, \[x8\] -+** ) -+** ret -+*/ -+CALLEE (s8, svint8_t) -+ -+/* -+** callee_u8: -+** ( -+** ld1 ({v.*}), \[x0\] -+** st1 \1, \[x8\] -+** | -+** ldp (q[0-9]+, q[0-9]+), \[x0\] -+** stp \2, \[x8\] -+** ) -+** ret -+*/ -+CALLEE (u8, svuint8_t) -+ -+/* -+** callee_s16: -+** ( -+** ld1 ({v.*}), \[x0\] -+** st1 \1, \[x8\] -+** | -+** ldp (q[0-9]+, q[0-9]+), \[x0\] -+** stp \2, \[x8\] -+** ) -+** ret -+*/ -+CALLEE (s16, svint16_t) -+ -+/* -+** callee_u16: -+** ( -+** ld1 ({v.*}), \[x0\] -+** st1 \1, \[x8\] -+** | -+** ldp (q[0-9]+, q[0-9]+), \[x0\] -+** stp \2, \[x8\] -+** ) -+** ret -+*/ -+CALLEE (u16, svuint16_t) -+ -+/* Currently we scalarize this. */ -+CALLEE (f16, svfloat16_t) -+ -+/* Currently we scalarize this. */ -+CALLEE (bf16, svbfloat16_t) -+ -+/* -+** callee_s32: -+** ( -+** ld1 ({v.*}), \[x0\] -+** st1 \1, \[x8\] -+** | -+** ldp (q[0-9]+, q[0-9]+), \[x0\] -+** stp \2, \[x8\] -+** ) -+** ret -+*/ -+CALLEE (s32, svint32_t) -+ -+/* -+** callee_u32: -+** ( -+** ld1 ({v.*}), \[x0\] -+** st1 \1, \[x8\] -+** | -+** ldp (q[0-9]+, q[0-9]+), \[x0\] -+** stp \2, \[x8\] -+** ) -+** ret -+*/ -+CALLEE (u32, svuint32_t) -+ -+/* Currently we scalarize this. */ -+CALLEE (f32, svfloat32_t) -+ -+/* -+** callee_s64: -+** ( -+** ld1 ({v.*}), \[x0\] -+** st1 \1, \[x8\] -+** | -+** ldp (q[0-9]+, q[0-9]+), \[x0\] -+** stp \2, \[x8\] -+** ) -+** ret -+*/ -+CALLEE (s64, svint64_t) -+ -+/* -+** callee_u64: -+** ( -+** ld1 ({v.*}), \[x0\] -+** st1 \1, \[x8\] -+** | -+** ldp (q[0-9]+, q[0-9]+), \[x0\] -+** stp \2, \[x8\] -+** ) -+** ret -+*/ -+CALLEE (u64, svuint64_t) -+ -+/* Currently we scalarize this. */ -+CALLEE (f64, svfloat64_t) -+ -+#define CALLER(SUFFIX, TYPE) \ -+ typeof ((*(TYPE *) 0)[0]) \ -+ __attribute__((noipa)) \ -+ caller_##SUFFIX (TYPE *ptr1) \ -+ { \ -+ return callee_##SUFFIX (ptr1)[0]; \ -+ } -+ -+/* -+** caller_s8: -+** ... -+** bl callee_s8 -+** ldrb w0, \[sp, 16\] -+** ldp x29, x30, \[sp\], 48 -+** ret -+*/ -+CALLER (s8, svint8_t) -+ -+/* -+** caller_u8: -+** ... -+** bl callee_u8 -+** ldrb w0, \[sp, 16\] -+** ldp x29, x30, \[sp\], 48 -+** ret -+*/ -+CALLER (u8, svuint8_t) -+ -+/* -+** caller_s16: -+** ... -+** bl callee_s16 -+** ldrh w0, \[sp, 16\] -+** ldp x29, x30, \[sp\], 48 -+** ret -+*/ -+CALLER (s16, svint16_t) -+ -+/* -+** caller_u16: -+** ... -+** bl callee_u16 -+** ldrh w0, \[sp, 16\] -+** ldp x29, x30, \[sp\], 48 -+** ret -+*/ -+CALLER (u16, svuint16_t) -+ -+/* -+** caller_f16: -+** ... -+** bl callee_f16 -+** ldr h0, \[sp, 16\] -+** ldp x29, x30, \[sp\], 48 -+** ret -+*/ -+CALLER (f16, svfloat16_t) -+ -+/* -+** caller_bf16: -+** ... -+** bl callee_bf16 -+** ldr h0, \[sp, 16\] -+** ldp x29, x30, \[sp\], 48 -+** ret -+*/ -+CALLER (bf16, svbfloat16_t) -+ -+/* -+** caller_s32: -+** ... -+** bl callee_s32 -+** ldr w0, \[sp, 16\] -+** ldp x29, x30, \[sp\], 48 -+** ret -+*/ -+CALLER (s32, svint32_t) -+ -+/* -+** caller_u32: -+** ... -+** bl callee_u32 -+** ldr w0, \[sp, 16\] -+** ldp x29, x30, \[sp\], 48 -+** ret -+*/ -+CALLER (u32, svuint32_t) -+ -+/* -+** caller_f32: -+** ... -+** bl callee_f32 -+** ldr s0, \[sp, 16\] -+** ldp x29, x30, \[sp\], 48 -+** ret -+*/ -+CALLER (f32, svfloat32_t) -+ -+/* -+** caller_s64: -+** ... -+** bl callee_s64 -+** ldr x0, \[sp, 16\] -+** ldp x29, x30, \[sp\], 48 -+** ret -+*/ -+CALLER (s64, svint64_t) -+ -+/* -+** caller_u64: -+** ... -+** bl callee_u64 -+** ldr x0, \[sp, 16\] -+** ldp x29, x30, \[sp\], 48 -+** ret -+*/ -+CALLER (u64, svuint64_t) -+ -+/* -+** caller_f64: -+** ... -+** bl callee_f64 -+** ldr d0, \[sp, 16\] -+** ldp x29, x30, \[sp\], 48 -+** ret -+*/ -+CALLER (f64, svfloat64_t) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_6_1024.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_6_1024.c -new file mode 100644 -index 000000000..b6f267e76 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_6_1024.c -@@ -0,0 +1,287 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -msve-vector-bits=1024 -g" } */ -+/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ -+ -+#include -+ -+typedef int8_t svint8_t __attribute__ ((vector_size (128))); -+typedef uint8_t svuint8_t __attribute__ ((vector_size (128))); -+ -+typedef int16_t svint16_t __attribute__ ((vector_size (128))); -+typedef uint16_t svuint16_t __attribute__ ((vector_size (128))); -+typedef __fp16 svfloat16_t __attribute__ ((vector_size (128))); -+typedef __bf16 svbfloat16_t __attribute__ ((vector_size (128))); -+ -+typedef int32_t svint32_t __attribute__ ((vector_size (128))); -+typedef uint32_t svuint32_t __attribute__ ((vector_size (128))); -+typedef float svfloat32_t __attribute__ ((vector_size (128))); -+ -+typedef int64_t svint64_t __attribute__ ((vector_size (128))); -+typedef uint64_t svuint64_t __attribute__ ((vector_size (128))); -+typedef double svfloat64_t __attribute__ ((vector_size (128))); -+ -+#define CALLEE(SUFFIX, TYPE) \ -+ TYPE __attribute__((noipa)) \ -+ callee_##SUFFIX (TYPE *ptr) \ -+ { \ -+ return *ptr; \ -+ } -+ -+/* -+** callee_s8: -+** ptrue (p[0-7])\.b, vl128 -+** ld1b z0\.b, \1/z, \[x0\] -+** st1b z0\.b, \1, \[x8\] -+** ret -+*/ -+CALLEE (s8, svint8_t) -+ -+/* -+** callee_u8: -+** ptrue (p[0-7])\.b, vl128 -+** ld1b z0\.b, \1/z, \[x0\] -+** st1b z0\.b, \1, \[x8\] -+** ret -+*/ -+CALLEE (u8, svuint8_t) -+ -+/* -+** callee_s16: -+** ptrue (p[0-7])\.b, vl128 -+** ld1h z0\.h, \1/z, \[x0\] -+** st1h z0\.h, \1, \[x8\] -+** ret -+*/ -+CALLEE (s16, svint16_t) -+ -+/* -+** callee_u16: -+** ptrue (p[0-7])\.b, vl128 -+** ld1h z0\.h, \1/z, \[x0\] -+** st1h z0\.h, \1, \[x8\] -+** ret -+*/ -+CALLEE (u16, svuint16_t) -+ -+/* -+** callee_f16: -+** ptrue (p[0-7])\.b, vl128 -+** ld1h z0\.h, \1/z, \[x0\] -+** st1h z0\.h, \1, \[x8\] -+** ret -+*/ -+CALLEE (f16, svfloat16_t) -+ -+/* -+** callee_bf16: -+** ptrue (p[0-7])\.b, vl128 -+** ld1h z0\.h, \1/z, \[x0\] -+** st1h z0\.h, \1, \[x8\] -+** ret -+*/ -+CALLEE (bf16, svbfloat16_t) -+ -+/* -+** callee_s32: -+** ptrue (p[0-7])\.b, vl128 -+** ld1w z0\.s, \1/z, \[x0\] -+** st1w z0\.s, \1, \[x8\] -+** ret -+*/ -+CALLEE (s32, svint32_t) -+ -+/* -+** callee_u32: -+** ptrue (p[0-7])\.b, vl128 -+** ld1w z0\.s, \1/z, \[x0\] -+** st1w z0\.s, \1, \[x8\] -+** ret -+*/ -+CALLEE (u32, svuint32_t) -+ -+/* -+** callee_f32: -+** ptrue (p[0-7])\.b, vl128 -+** ld1w z0\.s, \1/z, \[x0\] -+** st1w z0\.s, \1, \[x8\] -+** ret -+*/ -+CALLEE (f32, svfloat32_t) -+ -+/* -+** callee_s64: -+** ptrue (p[0-7])\.b, vl128 -+** ld1d z0\.d, \1/z, \[x0\] -+** st1d z0\.d, \1, \[x8\] -+** ret -+*/ -+CALLEE (s64, svint64_t) -+ -+/* -+** callee_u64: -+** ptrue (p[0-7])\.b, vl128 -+** ld1d z0\.d, \1/z, \[x0\] -+** st1d z0\.d, \1, \[x8\] -+** ret -+*/ -+CALLEE (u64, svuint64_t) -+ -+/* -+** callee_f64: -+** ptrue (p[0-7])\.b, vl128 -+** ld1d z0\.d, \1/z, \[x0\] -+** st1d z0\.d, \1, \[x8\] -+** ret -+*/ -+CALLEE (f64, svfloat64_t) -+ -+#define CALLER(SUFFIX, TYPE) \ -+ void __attribute__((noipa)) \ -+ caller_##SUFFIX (TYPE *ptr1, TYPE *ptr2) \ -+ { \ -+ *ptr2 = callee_##SUFFIX (ptr1); \ -+ } -+ -+/* -+** caller_s8: -+** ... -+** bl callee_s8 -+** ... -+** ld1b (z[0-9]+\.b), (p[0-7])/z, \[[^]]*\] -+** st1b \1, \2, \[[^]]*\] -+** ... -+** ret -+*/ -+CALLER (s8, svint8_t) -+ -+/* -+** caller_u8: -+** ... -+** bl callee_u8 -+** ... -+** ld1b (z[0-9]+\.b), (p[0-7])/z, \[[^]]*\] -+** st1b \1, \2, \[[^]]*\] -+** ... -+** ret -+*/ -+CALLER (u8, svuint8_t) -+ -+/* -+** caller_s16: -+** ... -+** bl callee_s16 -+** ... -+** ld1h (z[0-9]+\.h), (p[0-7])/z, \[[^]]*\] -+** st1h \1, \2, \[[^]]*\] -+** ... -+** ret -+*/ -+CALLER (s16, svint16_t) -+ -+/* -+** caller_u16: -+** ... -+** bl callee_u16 -+** ... -+** ld1h (z[0-9]+\.h), (p[0-7])/z, \[[^]]*\] -+** st1h \1, \2, \[[^]]*\] -+** ... -+** ret -+*/ -+CALLER (u16, svuint16_t) -+ -+/* -+** caller_f16: -+** ... -+** bl callee_f16 -+** ... -+** ld1h (z[0-9]+\.h), (p[0-7])/z, \[[^]]*\] -+** st1h \1, \2, \[[^]]*\] -+** ... -+** ret -+*/ -+CALLER (f16, svfloat16_t) -+ -+/* -+** caller_bf16: -+** ... -+** bl callee_bf16 -+** ... -+** ld1h (z[0-9]+\.h), (p[0-7])/z, \[[^]]*\] -+** st1h \1, \2, \[[^]]*\] -+** ... -+** ret -+*/ -+CALLER (bf16, svbfloat16_t) -+ -+/* -+** caller_s32: -+** ... -+** bl callee_s32 -+** ... -+** ld1w (z[0-9]+\.s), (p[0-7])/z, \[[^]]*\] -+** st1w \1, \2, \[[^]]*\] -+** ... -+** ret -+*/ -+CALLER (s32, svint32_t) -+ -+/* -+** caller_u32: -+** ... -+** bl callee_u32 -+** ... -+** ld1w (z[0-9]+\.s), (p[0-7])/z, \[[^]]*\] -+** st1w \1, \2, \[[^]]*\] -+** ... -+** ret -+*/ -+CALLER (u32, svuint32_t) -+ -+/* -+** caller_f32: -+** ... -+** bl callee_f32 -+** ... -+** ld1w (z[0-9]+\.s), (p[0-7])/z, \[[^]]*\] -+** st1w \1, \2, \[[^]]*\] -+** ... -+** ret -+*/ -+CALLER (f32, svfloat32_t) -+ -+/* -+** caller_s64: -+** ... -+** bl callee_s64 -+** ... -+** ld1d (z[0-9]+\.d), (p[0-7])/z, \[[^]]*\] -+** st1d \1, \2, \[[^]]*\] -+** ... -+** ret -+*/ -+CALLER (s64, svint64_t) -+ -+/* -+** caller_u64: -+** ... -+** bl callee_u64 -+** ... -+** ld1d (z[0-9]+\.d), (p[0-7])/z, \[[^]]*\] -+** st1d \1, \2, \[[^]]*\] -+** ... -+** ret -+*/ -+CALLER (u64, svuint64_t) -+ -+/* -+** caller_f64: -+** ... -+** bl callee_f64 -+** ... -+** ld1d (z[0-9]+\.d), (p[0-7])/z, \[[^]]*\] -+** st1d \1, \2, \[[^]]*\] -+** ... -+** ret -+*/ -+CALLER (f64, svfloat64_t) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_6_2048.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_6_2048.c -new file mode 100644 -index 000000000..46b7d683e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_6_2048.c -@@ -0,0 +1,287 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -msve-vector-bits=2048 -g" } */ -+/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ -+ -+#include -+ -+typedef int8_t svint8_t __attribute__ ((vector_size (256))); -+typedef uint8_t svuint8_t __attribute__ ((vector_size (256))); -+ -+typedef int16_t svint16_t __attribute__ ((vector_size (256))); -+typedef uint16_t svuint16_t __attribute__ ((vector_size (256))); -+typedef __fp16 svfloat16_t __attribute__ ((vector_size (256))); -+typedef __bf16 svbfloat16_t __attribute__ ((vector_size (256))); -+ -+typedef int32_t svint32_t __attribute__ ((vector_size (256))); -+typedef uint32_t svuint32_t __attribute__ ((vector_size (256))); -+typedef float svfloat32_t __attribute__ ((vector_size (256))); -+ -+typedef int64_t svint64_t __attribute__ ((vector_size (256))); -+typedef uint64_t svuint64_t __attribute__ ((vector_size (256))); -+typedef double svfloat64_t __attribute__ ((vector_size (256))); -+ -+#define CALLEE(SUFFIX, TYPE) \ -+ TYPE __attribute__((noipa)) \ -+ callee_##SUFFIX (TYPE *ptr) \ -+ { \ -+ return *ptr; \ -+ } -+ -+/* -+** callee_s8: -+** ptrue (p[0-7])\.b, vl256 -+** ld1b z0\.b, \1/z, \[x0\] -+** st1b z0\.b, \1, \[x8\] -+** ret -+*/ -+CALLEE (s8, svint8_t) -+ -+/* -+** callee_u8: -+** ptrue (p[0-7])\.b, vl256 -+** ld1b z0\.b, \1/z, \[x0\] -+** st1b z0\.b, \1, \[x8\] -+** ret -+*/ -+CALLEE (u8, svuint8_t) -+ -+/* -+** callee_s16: -+** ptrue (p[0-7])\.b, vl256 -+** ld1h z0\.h, \1/z, \[x0\] -+** st1h z0\.h, \1, \[x8\] -+** ret -+*/ -+CALLEE (s16, svint16_t) -+ -+/* -+** callee_u16: -+** ptrue (p[0-7])\.b, vl256 -+** ld1h z0\.h, \1/z, \[x0\] -+** st1h z0\.h, \1, \[x8\] -+** ret -+*/ -+CALLEE (u16, svuint16_t) -+ -+/* -+** callee_f16: -+** ptrue (p[0-7])\.b, vl256 -+** ld1h z0\.h, \1/z, \[x0\] -+** st1h z0\.h, \1, \[x8\] -+** ret -+*/ -+CALLEE (f16, svfloat16_t) -+ -+/* -+** callee_bf16: -+** ptrue (p[0-7])\.b, vl256 -+** ld1h z0\.h, \1/z, \[x0\] -+** st1h z0\.h, \1, \[x8\] -+** ret -+*/ -+CALLEE (bf16, svbfloat16_t) -+ -+/* -+** callee_s32: -+** ptrue (p[0-7])\.b, vl256 -+** ld1w z0\.s, \1/z, \[x0\] -+** st1w z0\.s, \1, \[x8\] -+** ret -+*/ -+CALLEE (s32, svint32_t) -+ -+/* -+** callee_u32: -+** ptrue (p[0-7])\.b, vl256 -+** ld1w z0\.s, \1/z, \[x0\] -+** st1w z0\.s, \1, \[x8\] -+** ret -+*/ -+CALLEE (u32, svuint32_t) -+ -+/* -+** callee_f32: -+** ptrue (p[0-7])\.b, vl256 -+** ld1w z0\.s, \1/z, \[x0\] -+** st1w z0\.s, \1, \[x8\] -+** ret -+*/ -+CALLEE (f32, svfloat32_t) -+ -+/* -+** callee_s64: -+** ptrue (p[0-7])\.b, vl256 -+** ld1d z0\.d, \1/z, \[x0\] -+** st1d z0\.d, \1, \[x8\] -+** ret -+*/ -+CALLEE (s64, svint64_t) -+ -+/* -+** callee_u64: -+** ptrue (p[0-7])\.b, vl256 -+** ld1d z0\.d, \1/z, \[x0\] -+** st1d z0\.d, \1, \[x8\] -+** ret -+*/ -+CALLEE (u64, svuint64_t) -+ -+/* -+** callee_f64: -+** ptrue (p[0-7])\.b, vl256 -+** ld1d z0\.d, \1/z, \[x0\] -+** st1d z0\.d, \1, \[x8\] -+** ret -+*/ -+CALLEE (f64, svfloat64_t) -+ -+#define CALLER(SUFFIX, TYPE) \ -+ void __attribute__((noipa)) \ -+ caller_##SUFFIX (TYPE *ptr1, TYPE *ptr2) \ -+ { \ -+ *ptr2 = callee_##SUFFIX (ptr1); \ -+ } -+ -+/* -+** caller_s8: -+** ... -+** bl callee_s8 -+** ... -+** ld1b (z[0-9]+\.b), (p[0-7])/z, \[[^]]*\] -+** st1b \1, \2, \[[^]]*\] -+** ... -+** ret -+*/ -+CALLER (s8, svint8_t) -+ -+/* -+** caller_u8: -+** ... -+** bl callee_u8 -+** ... -+** ld1b (z[0-9]+\.b), (p[0-7])/z, \[[^]]*\] -+** st1b \1, \2, \[[^]]*\] -+** ... -+** ret -+*/ -+CALLER (u8, svuint8_t) -+ -+/* -+** caller_s16: -+** ... -+** bl callee_s16 -+** ... -+** ld1h (z[0-9]+\.h), (p[0-7])/z, \[[^]]*\] -+** st1h \1, \2, \[[^]]*\] -+** ... -+** ret -+*/ -+CALLER (s16, svint16_t) -+ -+/* -+** caller_u16: -+** ... -+** bl callee_u16 -+** ... -+** ld1h (z[0-9]+\.h), (p[0-7])/z, \[[^]]*\] -+** st1h \1, \2, \[[^]]*\] -+** ... -+** ret -+*/ -+CALLER (u16, svuint16_t) -+ -+/* -+** caller_f16: -+** ... -+** bl callee_f16 -+** ... -+** ld1h (z[0-9]+\.h), (p[0-7])/z, \[[^]]*\] -+** st1h \1, \2, \[[^]]*\] -+** ... -+** ret -+*/ -+CALLER (f16, svfloat16_t) -+ -+/* -+** caller_bf16: -+** ... -+** bl callee_bf16 -+** ... -+** ld1h (z[0-9]+\.h), (p[0-7])/z, \[[^]]*\] -+** st1h \1, \2, \[[^]]*\] -+** ... -+** ret -+*/ -+CALLER (bf16, svbfloat16_t) -+ -+/* -+** caller_s32: -+** ... -+** bl callee_s32 -+** ... -+** ld1w (z[0-9]+\.s), (p[0-7])/z, \[[^]]*\] -+** st1w \1, \2, \[[^]]*\] -+** ... -+** ret -+*/ -+CALLER (s32, svint32_t) -+ -+/* -+** caller_u32: -+** ... -+** bl callee_u32 -+** ... -+** ld1w (z[0-9]+\.s), (p[0-7])/z, \[[^]]*\] -+** st1w \1, \2, \[[^]]*\] -+** ... -+** ret -+*/ -+CALLER (u32, svuint32_t) -+ -+/* -+** caller_f32: -+** ... -+** bl callee_f32 -+** ... -+** ld1w (z[0-9]+\.s), (p[0-7])/z, \[[^]]*\] -+** st1w \1, \2, \[[^]]*\] -+** ... -+** ret -+*/ -+CALLER (f32, svfloat32_t) -+ -+/* -+** caller_s64: -+** ... -+** bl callee_s64 -+** ... -+** ld1d (z[0-9]+\.d), (p[0-7])/z, \[[^]]*\] -+** st1d \1, \2, \[[^]]*\] -+** ... -+** ret -+*/ -+CALLER (s64, svint64_t) -+ -+/* -+** caller_u64: -+** ... -+** bl callee_u64 -+** ... -+** ld1d (z[0-9]+\.d), (p[0-7])/z, \[[^]]*\] -+** st1d \1, \2, \[[^]]*\] -+** ... -+** ret -+*/ -+CALLER (u64, svuint64_t) -+ -+/* -+** caller_f64: -+** ... -+** bl callee_f64 -+** ... -+** ld1d (z[0-9]+\.d), (p[0-7])/z, \[[^]]*\] -+** st1d \1, \2, \[[^]]*\] -+** ... -+** ret -+*/ -+CALLER (f64, svfloat64_t) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_6_256.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_6_256.c -new file mode 100644 -index 000000000..04872493c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_6_256.c -@@ -0,0 +1,287 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -msve-vector-bits=256 -g" } */ -+/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ -+ -+#include -+ -+typedef int8_t svint8_t __attribute__ ((vector_size (32))); -+typedef uint8_t svuint8_t __attribute__ ((vector_size (32))); -+ -+typedef int16_t svint16_t __attribute__ ((vector_size (32))); -+typedef uint16_t svuint16_t __attribute__ ((vector_size (32))); -+typedef __fp16 svfloat16_t __attribute__ ((vector_size (32))); -+typedef __bf16 svbfloat16_t __attribute__ ((vector_size (32))); -+ -+typedef int32_t svint32_t __attribute__ ((vector_size (32))); -+typedef uint32_t svuint32_t __attribute__ ((vector_size (32))); -+typedef float svfloat32_t __attribute__ ((vector_size (32))); -+ -+typedef int64_t svint64_t __attribute__ ((vector_size (32))); -+typedef uint64_t svuint64_t __attribute__ ((vector_size (32))); -+typedef double svfloat64_t __attribute__ ((vector_size (32))); -+ -+#define CALLEE(SUFFIX, TYPE) \ -+ TYPE __attribute__((noipa)) \ -+ callee_##SUFFIX (TYPE *ptr) \ -+ { \ -+ return *ptr; \ -+ } -+ -+/* -+** callee_s8: -+** ptrue (p[0-7])\.b, vl32 -+** ld1b z0\.b, \1/z, \[x0\] -+** st1b z0\.b, \1, \[x8\] -+** ret -+*/ -+CALLEE (s8, svint8_t) -+ -+/* -+** callee_u8: -+** ptrue (p[0-7])\.b, vl32 -+** ld1b z0\.b, \1/z, \[x0\] -+** st1b z0\.b, \1, \[x8\] -+** ret -+*/ -+CALLEE (u8, svuint8_t) -+ -+/* -+** callee_s16: -+** ptrue (p[0-7])\.b, vl32 -+** ld1h z0\.h, \1/z, \[x0\] -+** st1h z0\.h, \1, \[x8\] -+** ret -+*/ -+CALLEE (s16, svint16_t) -+ -+/* -+** callee_u16: -+** ptrue (p[0-7])\.b, vl32 -+** ld1h z0\.h, \1/z, \[x0\] -+** st1h z0\.h, \1, \[x8\] -+** ret -+*/ -+CALLEE (u16, svuint16_t) -+ -+/* -+** callee_f16: -+** ptrue (p[0-7])\.b, vl32 -+** ld1h z0\.h, \1/z, \[x0\] -+** st1h z0\.h, \1, \[x8\] -+** ret -+*/ -+CALLEE (f16, svfloat16_t) -+ -+/* -+** callee_bf16: -+** ptrue (p[0-7])\.b, vl32 -+** ld1h z0\.h, \1/z, \[x0\] -+** st1h z0\.h, \1, \[x8\] -+** ret -+*/ -+CALLEE (bf16, svbfloat16_t) -+ -+/* -+** callee_s32: -+** ptrue (p[0-7])\.b, vl32 -+** ld1w z0\.s, \1/z, \[x0\] -+** st1w z0\.s, \1, \[x8\] -+** ret -+*/ -+CALLEE (s32, svint32_t) -+ -+/* -+** callee_u32: -+** ptrue (p[0-7])\.b, vl32 -+** ld1w z0\.s, \1/z, \[x0\] -+** st1w z0\.s, \1, \[x8\] -+** ret -+*/ -+CALLEE (u32, svuint32_t) -+ -+/* -+** callee_f32: -+** ptrue (p[0-7])\.b, vl32 -+** ld1w z0\.s, \1/z, \[x0\] -+** st1w z0\.s, \1, \[x8\] -+** ret -+*/ -+CALLEE (f32, svfloat32_t) -+ -+/* -+** callee_s64: -+** ptrue (p[0-7])\.b, vl32 -+** ld1d z0\.d, \1/z, \[x0\] -+** st1d z0\.d, \1, \[x8\] -+** ret -+*/ -+CALLEE (s64, svint64_t) -+ -+/* -+** callee_u64: -+** ptrue (p[0-7])\.b, vl32 -+** ld1d z0\.d, \1/z, \[x0\] -+** st1d z0\.d, \1, \[x8\] -+** ret -+*/ -+CALLEE (u64, svuint64_t) -+ -+/* -+** callee_f64: -+** ptrue (p[0-7])\.b, vl32 -+** ld1d z0\.d, \1/z, \[x0\] -+** st1d z0\.d, \1, \[x8\] -+** ret -+*/ -+CALLEE (f64, svfloat64_t) -+ -+#define CALLER(SUFFIX, TYPE) \ -+ void __attribute__((noipa)) \ -+ caller_##SUFFIX (TYPE *ptr1, TYPE *ptr2) \ -+ { \ -+ *ptr2 = callee_##SUFFIX (ptr1); \ -+ } -+ -+/* -+** caller_s8: -+** ... -+** bl callee_s8 -+** ... -+** ld1b (z[0-9]+\.b), (p[0-7])/z, \[[^]]*\] -+** st1b \1, \2, \[[^]]*\] -+** ... -+** ret -+*/ -+CALLER (s8, svint8_t) -+ -+/* -+** caller_u8: -+** ... -+** bl callee_u8 -+** ... -+** ld1b (z[0-9]+\.b), (p[0-7])/z, \[[^]]*\] -+** st1b \1, \2, \[[^]]*\] -+** ... -+** ret -+*/ -+CALLER (u8, svuint8_t) -+ -+/* -+** caller_s16: -+** ... -+** bl callee_s16 -+** ... -+** ld1h (z[0-9]+\.h), (p[0-7])/z, \[[^]]*\] -+** st1h \1, \2, \[[^]]*\] -+** ... -+** ret -+*/ -+CALLER (s16, svint16_t) -+ -+/* -+** caller_u16: -+** ... -+** bl callee_u16 -+** ... -+** ld1h (z[0-9]+\.h), (p[0-7])/z, \[[^]]*\] -+** st1h \1, \2, \[[^]]*\] -+** ... -+** ret -+*/ -+CALLER (u16, svuint16_t) -+ -+/* -+** caller_f16: -+** ... -+** bl callee_f16 -+** ... -+** ld1h (z[0-9]+\.h), (p[0-7])/z, \[[^]]*\] -+** st1h \1, \2, \[[^]]*\] -+** ... -+** ret -+*/ -+CALLER (f16, svfloat16_t) -+ -+/* -+** caller_bf16: -+** ... -+** bl callee_bf16 -+** ... -+** ld1h (z[0-9]+\.h), (p[0-7])/z, \[[^]]*\] -+** st1h \1, \2, \[[^]]*\] -+** ... -+** ret -+*/ -+CALLER (bf16, svbfloat16_t) -+ -+/* -+** caller_s32: -+** ... -+** bl callee_s32 -+** ... -+** ld1w (z[0-9]+\.s), (p[0-7])/z, \[[^]]*\] -+** st1w \1, \2, \[[^]]*\] -+** ... -+** ret -+*/ -+CALLER (s32, svint32_t) -+ -+/* -+** caller_u32: -+** ... -+** bl callee_u32 -+** ... -+** ld1w (z[0-9]+\.s), (p[0-7])/z, \[[^]]*\] -+** st1w \1, \2, \[[^]]*\] -+** ... -+** ret -+*/ -+CALLER (u32, svuint32_t) -+ -+/* -+** caller_f32: -+** ... -+** bl callee_f32 -+** ... -+** ld1w (z[0-9]+\.s), (p[0-7])/z, \[[^]]*\] -+** st1w \1, \2, \[[^]]*\] -+** ... -+** ret -+*/ -+CALLER (f32, svfloat32_t) -+ -+/* -+** caller_s64: -+** ... -+** bl callee_s64 -+** ... -+** ld1d (z[0-9]+\.d), (p[0-7])/z, \[[^]]*\] -+** st1d \1, \2, \[[^]]*\] -+** ... -+** ret -+*/ -+CALLER (s64, svint64_t) -+ -+/* -+** caller_u64: -+** ... -+** bl callee_u64 -+** ... -+** ld1d (z[0-9]+\.d), (p[0-7])/z, \[[^]]*\] -+** st1d \1, \2, \[[^]]*\] -+** ... -+** ret -+*/ -+CALLER (u64, svuint64_t) -+ -+/* -+** caller_f64: -+** ... -+** bl callee_f64 -+** ... -+** ld1d (z[0-9]+\.d), (p[0-7])/z, \[[^]]*\] -+** st1d \1, \2, \[[^]]*\] -+** ... -+** ret -+*/ -+CALLER (f64, svfloat64_t) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_6_512.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_6_512.c -new file mode 100644 -index 000000000..9817d856a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_6_512.c -@@ -0,0 +1,287 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -msve-vector-bits=512 -g" } */ -+/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ -+ -+#include -+ -+typedef int8_t svint8_t __attribute__ ((vector_size (64))); -+typedef uint8_t svuint8_t __attribute__ ((vector_size (64))); -+ -+typedef int16_t svint16_t __attribute__ ((vector_size (64))); -+typedef uint16_t svuint16_t __attribute__ ((vector_size (64))); -+typedef __fp16 svfloat16_t __attribute__ ((vector_size (64))); -+typedef __bf16 svbfloat16_t __attribute__ ((vector_size (64))); -+ -+typedef int32_t svint32_t __attribute__ ((vector_size (64))); -+typedef uint32_t svuint32_t __attribute__ ((vector_size (64))); -+typedef float svfloat32_t __attribute__ ((vector_size (64))); -+ -+typedef int64_t svint64_t __attribute__ ((vector_size (64))); -+typedef uint64_t svuint64_t __attribute__ ((vector_size (64))); -+typedef double svfloat64_t __attribute__ ((vector_size (64))); -+ -+#define CALLEE(SUFFIX, TYPE) \ -+ TYPE __attribute__((noipa)) \ -+ callee_##SUFFIX (TYPE *ptr) \ -+ { \ -+ return *ptr; \ -+ } -+ -+/* -+** callee_s8: -+** ptrue (p[0-7])\.b, vl64 -+** ld1b z0\.b, \1/z, \[x0\] -+** st1b z0\.b, \1, \[x8\] -+** ret -+*/ -+CALLEE (s8, svint8_t) -+ -+/* -+** callee_u8: -+** ptrue (p[0-7])\.b, vl64 -+** ld1b z0\.b, \1/z, \[x0\] -+** st1b z0\.b, \1, \[x8\] -+** ret -+*/ -+CALLEE (u8, svuint8_t) -+ -+/* -+** callee_s16: -+** ptrue (p[0-7])\.b, vl64 -+** ld1h z0\.h, \1/z, \[x0\] -+** st1h z0\.h, \1, \[x8\] -+** ret -+*/ -+CALLEE (s16, svint16_t) -+ -+/* -+** callee_u16: -+** ptrue (p[0-7])\.b, vl64 -+** ld1h z0\.h, \1/z, \[x0\] -+** st1h z0\.h, \1, \[x8\] -+** ret -+*/ -+CALLEE (u16, svuint16_t) -+ -+/* -+** callee_f16: -+** ptrue (p[0-7])\.b, vl64 -+** ld1h z0\.h, \1/z, \[x0\] -+** st1h z0\.h, \1, \[x8\] -+** ret -+*/ -+CALLEE (f16, svfloat16_t) -+ -+/* -+** callee_bf16: -+** ptrue (p[0-7])\.b, vl64 -+** ld1h z0\.h, \1/z, \[x0\] -+** st1h z0\.h, \1, \[x8\] -+** ret -+*/ -+CALLEE (bf16, svbfloat16_t) -+ -+/* -+** callee_s32: -+** ptrue (p[0-7])\.b, vl64 -+** ld1w z0\.s, \1/z, \[x0\] -+** st1w z0\.s, \1, \[x8\] -+** ret -+*/ -+CALLEE (s32, svint32_t) -+ -+/* -+** callee_u32: -+** ptrue (p[0-7])\.b, vl64 -+** ld1w z0\.s, \1/z, \[x0\] -+** st1w z0\.s, \1, \[x8\] -+** ret -+*/ -+CALLEE (u32, svuint32_t) -+ -+/* -+** callee_f32: -+** ptrue (p[0-7])\.b, vl64 -+** ld1w z0\.s, \1/z, \[x0\] -+** st1w z0\.s, \1, \[x8\] -+** ret -+*/ -+CALLEE (f32, svfloat32_t) -+ -+/* -+** callee_s64: -+** ptrue (p[0-7])\.b, vl64 -+** ld1d z0\.d, \1/z, \[x0\] -+** st1d z0\.d, \1, \[x8\] -+** ret -+*/ -+CALLEE (s64, svint64_t) -+ -+/* -+** callee_u64: -+** ptrue (p[0-7])\.b, vl64 -+** ld1d z0\.d, \1/z, \[x0\] -+** st1d z0\.d, \1, \[x8\] -+** ret -+*/ -+CALLEE (u64, svuint64_t) -+ -+/* -+** callee_f64: -+** ptrue (p[0-7])\.b, vl64 -+** ld1d z0\.d, \1/z, \[x0\] -+** st1d z0\.d, \1, \[x8\] -+** ret -+*/ -+CALLEE (f64, svfloat64_t) -+ -+#define CALLER(SUFFIX, TYPE) \ -+ void __attribute__((noipa)) \ -+ caller_##SUFFIX (TYPE *ptr1, TYPE *ptr2) \ -+ { \ -+ *ptr2 = callee_##SUFFIX (ptr1); \ -+ } -+ -+/* -+** caller_s8: -+** ... -+** bl callee_s8 -+** ... -+** ld1b (z[0-9]+\.b), (p[0-7])/z, \[[^]]*\] -+** st1b \1, \2, \[[^]]*\] -+** ... -+** ret -+*/ -+CALLER (s8, svint8_t) -+ -+/* -+** caller_u8: -+** ... -+** bl callee_u8 -+** ... -+** ld1b (z[0-9]+\.b), (p[0-7])/z, \[[^]]*\] -+** st1b \1, \2, \[[^]]*\] -+** ... -+** ret -+*/ -+CALLER (u8, svuint8_t) -+ -+/* -+** caller_s16: -+** ... -+** bl callee_s16 -+** ... -+** ld1h (z[0-9]+\.h), (p[0-7])/z, \[[^]]*\] -+** st1h \1, \2, \[[^]]*\] -+** ... -+** ret -+*/ -+CALLER (s16, svint16_t) -+ -+/* -+** caller_u16: -+** ... -+** bl callee_u16 -+** ... -+** ld1h (z[0-9]+\.h), (p[0-7])/z, \[[^]]*\] -+** st1h \1, \2, \[[^]]*\] -+** ... -+** ret -+*/ -+CALLER (u16, svuint16_t) -+ -+/* -+** caller_f16: -+** ... -+** bl callee_f16 -+** ... -+** ld1h (z[0-9]+\.h), (p[0-7])/z, \[[^]]*\] -+** st1h \1, \2, \[[^]]*\] -+** ... -+** ret -+*/ -+CALLER (f16, svfloat16_t) -+ -+/* -+** caller_bf16: -+** ... -+** bl callee_bf16 -+** ... -+** ld1h (z[0-9]+\.h), (p[0-7])/z, \[[^]]*\] -+** st1h \1, \2, \[[^]]*\] -+** ... -+** ret -+*/ -+CALLER (bf16, svbfloat16_t) -+ -+/* -+** caller_s32: -+** ... -+** bl callee_s32 -+** ... -+** ld1w (z[0-9]+\.s), (p[0-7])/z, \[[^]]*\] -+** st1w \1, \2, \[[^]]*\] -+** ... -+** ret -+*/ -+CALLER (s32, svint32_t) -+ -+/* -+** caller_u32: -+** ... -+** bl callee_u32 -+** ... -+** ld1w (z[0-9]+\.s), (p[0-7])/z, \[[^]]*\] -+** st1w \1, \2, \[[^]]*\] -+** ... -+** ret -+*/ -+CALLER (u32, svuint32_t) -+ -+/* -+** caller_f32: -+** ... -+** bl callee_f32 -+** ... -+** ld1w (z[0-9]+\.s), (p[0-7])/z, \[[^]]*\] -+** st1w \1, \2, \[[^]]*\] -+** ... -+** ret -+*/ -+CALLER (f32, svfloat32_t) -+ -+/* -+** caller_s64: -+** ... -+** bl callee_s64 -+** ... -+** ld1d (z[0-9]+\.d), (p[0-7])/z, \[[^]]*\] -+** st1d \1, \2, \[[^]]*\] -+** ... -+** ret -+*/ -+CALLER (s64, svint64_t) -+ -+/* -+** caller_u64: -+** ... -+** bl callee_u64 -+** ... -+** ld1d (z[0-9]+\.d), (p[0-7])/z, \[[^]]*\] -+** st1d \1, \2, \[[^]]*\] -+** ... -+** ret -+*/ -+CALLER (u64, svuint64_t) -+ -+/* -+** caller_f64: -+** ... -+** bl callee_f64 -+** ... -+** ld1d (z[0-9]+\.d), (p[0-7])/z, \[[^]]*\] -+** st1d \1, \2, \[[^]]*\] -+** ... -+** ret -+*/ -+CALLER (f64, svfloat64_t) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_7.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_7.c -new file mode 100644 -index 000000000..55456a3b4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_7.c -@@ -0,0 +1,341 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#include -+ -+/* -+** callee_s8: -+** mov z0\.b, #1 -+** mov z1\.b, #2 -+** ret -+*/ -+svint8x2_t __attribute__((noipa)) -+callee_s8 (void) -+{ -+ return svcreate2 (svdup_s8 (1), svdup_s8 (2)); -+} -+ -+/* -+** caller_s8: -+** ... -+** bl callee_s8 -+** trn1 z0\.b, z0\.b, z1\.b -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+svint8_t __attribute__((noipa)) -+caller_s8 (void) -+{ -+ svint8x2_t res; -+ res = callee_s8 (); -+ return svtrn1 (svget2 (res, 0), svget2 (res, 1)); -+} -+ -+/* -+** callee_u8: -+** mov z0\.b, #3 -+** mov z1\.b, #4 -+** ret -+*/ -+svuint8x2_t __attribute__((noipa)) -+callee_u8 (void) -+{ -+ return svcreate2 (svdup_u8 (3), svdup_u8 (4)); -+} -+ -+/* -+** caller_u8: -+** ... -+** bl callee_u8 -+** trn2 z0\.b, z1\.b, z0\.b -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+svuint8_t __attribute__((noipa)) -+caller_u8 (void) -+{ -+ svuint8x2_t res; -+ res = callee_u8 (); -+ return svtrn2 (svget2 (res, 1), svget2 (res, 0)); -+} -+ -+/* -+** callee_s16: -+** mov z0\.h, #1 -+** mov z1\.h, #2 -+** ret -+*/ -+svint16x2_t __attribute__((noipa)) -+callee_s16 (void) -+{ -+ return svcreate2 (svdup_s16 (1), svdup_s16 (2)); -+} -+ -+/* -+** caller_s16: -+** ... -+** bl callee_s16 -+** trn1 z0\.h, z0\.h, z1\.h -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+svint16_t __attribute__((noipa)) -+caller_s16 (void) -+{ -+ svint16x2_t res; -+ res = callee_s16 (); -+ return svtrn1 (svget2 (res, 0), svget2 (res, 1)); -+} -+ -+/* -+** callee_u16: -+** mov z0\.h, #3 -+** mov z1\.h, #4 -+** ret -+*/ -+svuint16x2_t __attribute__((noipa)) -+callee_u16 (void) -+{ -+ return svcreate2 (svdup_u16 (3), svdup_u16 (4)); -+} -+ -+/* -+** caller_u16: -+** ... -+** bl callee_u16 -+** trn2 z0\.h, z1\.h, z0\.h -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+svuint16_t __attribute__((noipa)) -+caller_u16 (void) -+{ -+ svuint16x2_t res; -+ res = callee_u16 (); -+ return svtrn2 (svget2 (res, 1), svget2 (res, 0)); -+} -+ -+/* -+** callee_f16: -+** fmov z0\.h, #5\.0(?:e\+0)? -+** fmov z1\.h, #6\.0(?:e\+0)? -+** ret -+*/ -+svfloat16x2_t __attribute__((noipa)) -+callee_f16 (void) -+{ -+ return svcreate2 (svdup_f16 (5), svdup_f16 (6)); -+} -+ -+/* -+** caller_f16: -+** ... -+** bl callee_f16 -+** zip1 z0\.h, z1\.h, z0\.h -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+svfloat16_t __attribute__((noipa)) -+caller_f16 (void) -+{ -+ svfloat16x2_t res; -+ res = callee_f16 (); -+ return svzip1 (svget2 (res, 1), svget2 (res, 0)); -+} -+ -+/* -+** callee_bf16: -+** mov z0\.h, h2 -+** mov z1\.h, h3 -+** ret -+*/ -+svbfloat16x2_t __attribute__((noipa)) -+callee_bf16 (bfloat16_t h0, bfloat16_t h1, bfloat16_t h2, bfloat16_t h3) -+{ -+ return svcreate2 (svdup_bf16 (h2), svdup_bf16 (h3)); -+} -+ -+/* -+** caller_bf16: -+** ... -+** bl callee_bf16 -+** zip2 z0\.h, z1\.h, z0\.h -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+svbfloat16_t __attribute__((noipa)) -+caller_bf16 (bfloat16_t h0, bfloat16_t h1, bfloat16_t h2, bfloat16_t h3) -+{ -+ svbfloat16x2_t res; -+ res = callee_bf16 (h0, h1, h2, h3); -+ return svzip2 (svget2 (res, 1), svget2 (res, 0)); -+} -+ -+/* -+** callee_s32: -+** mov z0\.s, #1 -+** mov z1\.s, #2 -+** ret -+*/ -+svint32x2_t __attribute__((noipa)) -+callee_s32 (void) -+{ -+ return svcreate2 (svdup_s32 (1), svdup_s32 (2)); -+} -+ -+/* -+** caller_s32: -+** ... -+** bl callee_s32 -+** trn1 z0\.s, z0\.s, z1\.s -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+svint32_t __attribute__((noipa)) -+caller_s32 (void) -+{ -+ svint32x2_t res; -+ res = callee_s32 (); -+ return svtrn1 (svget2 (res, 0), svget2 (res, 1)); -+} -+ -+/* -+** callee_u32: -+** mov z0\.s, #3 -+** mov z1\.s, #4 -+** ret -+*/ -+svuint32x2_t __attribute__((noipa)) -+callee_u32 (void) -+{ -+ return svcreate2 (svdup_u32 (3), svdup_u32 (4)); -+} -+ -+/* -+** caller_u32: -+** ... -+** bl callee_u32 -+** trn2 z0\.s, z1\.s, z0\.s -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+svuint32_t __attribute__((noipa)) -+caller_u32 (void) -+{ -+ svuint32x2_t res; -+ res = callee_u32 (); -+ return svtrn2 (svget2 (res, 1), svget2 (res, 0)); -+} -+ -+/* -+** callee_f32: -+** fmov z0\.s, #5\.0(?:e\+0)? -+** fmov z1\.s, #6\.0(?:e\+0)? -+** ret -+*/ -+svfloat32x2_t __attribute__((noipa)) -+callee_f32 (void) -+{ -+ return svcreate2 (svdup_f32 (5), svdup_f32 (6)); -+} -+ -+/* -+** caller_f32: -+** ... -+** bl callee_f32 -+** zip1 z0\.s, z1\.s, z0\.s -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+svfloat32_t __attribute__((noipa)) -+caller_f32 (void) -+{ -+ svfloat32x2_t res; -+ res = callee_f32 (); -+ return svzip1 (svget2 (res, 1), svget2 (res, 0)); -+} -+ -+/* -+** callee_s64: -+** mov z0\.d, #1 -+** mov z1\.d, #2 -+** ret -+*/ -+svint64x2_t __attribute__((noipa)) -+callee_s64 (void) -+{ -+ return svcreate2 (svdup_s64 (1), svdup_s64 (2)); -+} -+ -+/* -+** caller_s64: -+** ... -+** bl callee_s64 -+** trn1 z0\.d, z0\.d, z1\.d -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+svint64_t __attribute__((noipa)) -+caller_s64 (void) -+{ -+ svint64x2_t res; -+ res = callee_s64 (); -+ return svtrn1 (svget2 (res, 0), svget2 (res, 1)); -+} -+ -+/* -+** callee_u64: -+** mov z0\.d, #3 -+** mov z1\.d, #4 -+** ret -+*/ -+svuint64x2_t __attribute__((noipa)) -+callee_u64 (void) -+{ -+ return svcreate2 (svdup_u64 (3), svdup_u64 (4)); -+} -+ -+/* -+** caller_u64: -+** ... -+** bl callee_u64 -+** trn2 z0\.d, z1\.d, z0\.d -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+svuint64_t __attribute__((noipa)) -+caller_u64 (void) -+{ -+ svuint64x2_t res; -+ res = callee_u64 (); -+ return svtrn2 (svget2 (res, 1), svget2 (res, 0)); -+} -+ -+/* -+** callee_f64: -+** fmov z0\.d, #5\.0(?:e\+0)? -+** fmov z1\.d, #6\.0(?:e\+0)? -+** ret -+*/ -+svfloat64x2_t __attribute__((noipa)) -+callee_f64 (void) -+{ -+ return svcreate2 (svdup_f64 (5), svdup_f64 (6)); -+} -+ -+/* -+** caller_f64: -+** ... -+** bl callee_f64 -+** zip1 z0\.d, z1\.d, z0\.d -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+svfloat64_t __attribute__((noipa)) -+caller_f64 (void) -+{ -+ svfloat64x2_t res; -+ res = callee_f64 (); -+ return svzip1 (svget2 (res, 1), svget2 (res, 0)); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_8.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_8.c -new file mode 100644 -index 000000000..9581811e7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_8.c -@@ -0,0 +1,375 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -frename-registers -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#include -+ -+/* -+** callee_s8: -+** mov z0\.b, #1 -+** mov z1\.b, #2 -+** mov z2\.b, #3 -+** ret -+*/ -+svint8x3_t __attribute__((noipa)) -+callee_s8 (void) -+{ -+ return svcreate3 (svdup_s8 (1), svdup_s8 (2), svdup_s8 (3)); -+} -+ -+/* -+** caller_s8: -+** ... -+** bl callee_s8 -+** ptrue (p[0-7])\.b, all -+** mad z0\.b, \1/m, z1\.b, z2\.b -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+svint8_t __attribute__((noipa)) -+caller_s8 (void) -+{ -+ svint8x3_t res; -+ res = callee_s8 (); -+ return svmad_x (svptrue_b8 (), -+ svget3 (res, 0), svget3 (res, 1), svget3 (res, 2)); -+} -+ -+/* -+** callee_u8: -+** mov z0\.b, #4 -+** mov z1\.b, #5 -+** mov z2\.b, #6 -+** ret -+*/ -+svuint8x3_t __attribute__((noipa)) -+callee_u8 (void) -+{ -+ return svcreate3 (svdup_u8 (4), svdup_u8 (5), svdup_u8 (6)); -+} -+ -+/* -+** caller_u8: -+** ... -+** bl callee_u8 -+** ptrue (p[0-7])\.b, all -+** msb z0\.b, \1/m, z1\.b, z2\.b -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+svuint8_t __attribute__((noipa)) -+caller_u8 (void) -+{ -+ svuint8x3_t res; -+ res = callee_u8 (); -+ return svmsb_x (svptrue_b8 (), -+ svget3 (res, 0), svget3 (res, 1), svget3 (res, 2)); -+} -+ -+/* -+** callee_s16: -+** mov z0\.h, #1 -+** mov z1\.h, #2 -+** mov z2\.h, #3 -+** ret -+*/ -+svint16x3_t __attribute__((noipa)) -+callee_s16 (void) -+{ -+ return svcreate3 (svdup_s16 (1), svdup_s16 (2), svdup_s16 (3)); -+} -+ -+/* -+** caller_s16: -+** ... -+** bl callee_s16 -+** ptrue (p[0-7])\.b, all -+** mls z0\.h, \1/m, z1\.h, z2\.h -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+svint16_t __attribute__((noipa)) -+caller_s16 (void) -+{ -+ svint16x3_t res; -+ res = callee_s16 (); -+ return svmls_x (svptrue_b16 (), -+ svget3 (res, 0), svget3 (res, 1), svget3 (res, 2)); -+} -+ -+/* -+** callee_u16: -+** mov z0\.h, #4 -+** mov z1\.h, #5 -+** mov z2\.h, #6 -+** ret -+*/ -+svuint16x3_t __attribute__((noipa)) -+callee_u16 (void) -+{ -+ return svcreate3 (svdup_u16 (4), svdup_u16 (5), svdup_u16 (6)); -+} -+ -+/* -+** caller_u16: -+** ... -+** bl callee_u16 -+** ptrue (p[0-7])\.b, all -+** mla z0\.h, \1/m, z1\.h, z2\.h -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+svuint16_t __attribute__((noipa)) -+caller_u16 (void) -+{ -+ svuint16x3_t res; -+ res = callee_u16 (); -+ return svmla_x (svptrue_b16 (), -+ svget3 (res, 0), svget3 (res, 1), svget3 (res, 2)); -+} -+ -+/* -+** callee_f16: -+** fmov z0\.h, #1\.0(?:e\+0)? -+** fmov z1\.h, #2\.0(?:e\+0)? -+** fmov z2\.h, #3\.0(?:e\+0)? -+** ret -+*/ -+svfloat16x3_t __attribute__((noipa)) -+callee_f16 (void) -+{ -+ return svcreate3 (svdup_f16 (1), svdup_f16 (2), svdup_f16 (3)); -+} -+ -+/* -+** caller_f16: -+** ... -+** bl callee_f16 -+** ptrue (p[0-7])\.b, all -+** fmla z0\.h, \1/m, z1\.h, z2\.h -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+svfloat16_t __attribute__((noipa)) -+caller_f16 (void) -+{ -+ svfloat16x3_t res; -+ res = callee_f16 (); -+ return svmla_x (svptrue_b16 (), -+ svget3 (res, 0), svget3 (res, 1), svget3 (res, 2)); -+} -+ -+/* -+** callee_bf16: -+** mov z0\.h, h0 -+** mov z1\.h, h1 -+** mov z2\.h, h2 -+** ret -+*/ -+svbfloat16x3_t __attribute__((noipa)) -+callee_bf16 (bfloat16_t h0, bfloat16_t h1, bfloat16_t h2) -+{ -+ return svcreate3 (svdup_bf16 (h0), svdup_bf16 (h1), svdup_bf16 (h2)); -+} -+ -+/* -+** caller_bf16: -+** ... -+** bl callee_bf16 -+** trn2 z0\.h, z0\.h, z2\.h -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+svbfloat16_t __attribute__((noipa)) -+caller_bf16 (bfloat16_t h0, bfloat16_t h1, bfloat16_t h2) -+{ -+ svbfloat16x3_t res; -+ res = callee_bf16 (h0, h1, h2); -+ return svtrn2 (svget3 (res, 0), svget3 (res, 2)); -+} -+ -+/* -+** callee_s32: -+** mov z0\.s, #1 -+** mov z1\.s, #2 -+** mov z2\.s, #3 -+** ret -+*/ -+svint32x3_t __attribute__((noipa)) -+callee_s32 (void) -+{ -+ return svcreate3 (svdup_s32 (1), svdup_s32 (2), svdup_s32 (3)); -+} -+ -+/* -+** caller_s32: -+** ... -+** bl callee_s32 -+** ptrue (p[0-7])\.b, all -+** mad z0\.s, \1/m, z1\.s, z2\.s -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+svint32_t __attribute__((noipa)) -+caller_s32 (void) -+{ -+ svint32x3_t res; -+ res = callee_s32 (); -+ return svmad_x (svptrue_b32 (), -+ svget3 (res, 0), svget3 (res, 1), svget3 (res, 2)); -+} -+ -+/* -+** callee_u32: -+** mov z0\.s, #4 -+** mov z1\.s, #5 -+** mov z2\.s, #6 -+** ret -+*/ -+svuint32x3_t __attribute__((noipa)) -+callee_u32 (void) -+{ -+ return svcreate3 (svdup_u32 (4), svdup_u32 (5), svdup_u32 (6)); -+} -+ -+/* -+** caller_u32: -+** ... -+** bl callee_u32 -+** ptrue (p[0-7])\.b, all -+** msb z0\.s, \1/m, z1\.s, z2\.s -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+svuint32_t __attribute__((noipa)) -+caller_u32 (void) -+{ -+ svuint32x3_t res; -+ res = callee_u32 (); -+ return svmsb_x (svptrue_b32 (), -+ svget3 (res, 0), svget3 (res, 1), svget3 (res, 2)); -+} -+ -+/* -+** callee_f32: -+** fmov z0\.s, #1\.0(?:e\+0)? -+** fmov z1\.s, #2\.0(?:e\+0)? -+** fmov z2\.s, #3\.0(?:e\+0)? -+** ret -+*/ -+svfloat32x3_t __attribute__((noipa)) -+callee_f32 (void) -+{ -+ return svcreate3 (svdup_f32 (1), svdup_f32 (2), svdup_f32 (3)); -+} -+ -+/* -+** caller_f32: -+** ... -+** bl callee_f32 -+** ptrue (p[0-7])\.b, all -+** fmla z0\.s, \1/m, z1\.s, z2\.s -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+svfloat32_t __attribute__((noipa)) -+caller_f32 (void) -+{ -+ svfloat32x3_t res; -+ res = callee_f32 (); -+ return svmla_x (svptrue_b32 (), -+ svget3 (res, 0), svget3 (res, 1), svget3 (res, 2)); -+} -+ -+/* -+** callee_s64: -+** mov z0\.d, #1 -+** mov z1\.d, #2 -+** mov z2\.d, #3 -+** ret -+*/ -+svint64x3_t __attribute__((noipa)) -+callee_s64 (void) -+{ -+ return svcreate3 (svdup_s64 (1), svdup_s64 (2), svdup_s64 (3)); -+} -+ -+/* -+** caller_s64: -+** ... -+** bl callee_s64 -+** ptrue (p[0-7])\.b, all -+** mls z0\.d, \1/m, z1\.d, z2\.d -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+svint64_t __attribute__((noipa)) -+caller_s64 (void) -+{ -+ svint64x3_t res; -+ res = callee_s64 (); -+ return svmls_x (svptrue_b64 (), -+ svget3 (res, 0), svget3 (res, 1), svget3 (res, 2)); -+} -+ -+/* -+** callee_u64: -+** mov z0\.d, #4 -+** mov z1\.d, #5 -+** mov z2\.d, #6 -+** ret -+*/ -+svuint64x3_t __attribute__((noipa)) -+callee_u64 (void) -+{ -+ return svcreate3 (svdup_u64 (4), svdup_u64 (5), svdup_u64 (6)); -+} -+ -+/* -+** caller_u64: -+** ... -+** bl callee_u64 -+** ptrue (p[0-7])\.b, all -+** mla z0\.d, \1/m, z1\.d, z2\.d -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+svuint64_t __attribute__((noipa)) -+caller_u64 (void) -+{ -+ svuint64x3_t res; -+ res = callee_u64 (); -+ return svmla_x (svptrue_b64 (), -+ svget3 (res, 0), svget3 (res, 1), svget3 (res, 2)); -+} -+ -+/* -+** callee_f64: -+** fmov z0\.d, #1\.0(?:e\+0)? -+** fmov z1\.d, #2\.0(?:e\+0)? -+** fmov z2\.d, #3\.0(?:e\+0)? -+** ret -+*/ -+svfloat64x3_t __attribute__((noipa)) -+callee_f64 (void) -+{ -+ return svcreate3 (svdup_f64 (1), svdup_f64 (2), svdup_f64 (3)); -+} -+ -+/* -+** caller_f64: -+** ... -+** bl callee_f64 -+** ptrue (p[0-7])\.b, all -+** fmla z0\.d, \1/m, z1\.d, z2\.d -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+svfloat64_t __attribute__((noipa)) -+caller_f64 (void) -+{ -+ svfloat64x3_t res; -+ res = callee_f64 (); -+ return svmla_x (svptrue_b64 (), -+ svget3 (res, 0), svget3 (res, 1), svget3 (res, 2)); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_9.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_9.c -new file mode 100644 -index 000000000..ad32e1fe5 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/return_9.c -@@ -0,0 +1,438 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -frename-registers -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#include -+ -+/* -+** callee_s8: -+** mov z0\.b, #1 -+** mov z1\.b, #2 -+** mov z2\.b, #3 -+** mov z3\.b, #4 -+** ret -+*/ -+svint8x4_t __attribute__((noipa)) -+callee_s8 (void) -+{ -+ return svcreate4 (svdup_s8 (1), svdup_s8 (2), svdup_s8 (3), svdup_s8 (4)); -+} -+ -+/* -+** caller_s8: -+** ... -+** bl callee_s8 -+** add (z[2-7]\.b), z2\.b, z3\.b -+** ptrue (p[0-7])\.b, all -+** mla z0\.b, \2/m, (z1\.b, \1|\1, z1\.b) -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+svint8_t __attribute__((noipa)) -+caller_s8 (void) -+{ -+ svint8x4_t res; -+ res = callee_s8 (); -+ return svmla_x (svptrue_b8 (), svget4 (res, 0), svget4 (res, 1), -+ svadd_x (svptrue_b8 (), -+ svget4 (res, 2), -+ svget4 (res, 3))); -+} -+ -+/* -+** callee_u8: -+** mov z0\.b, #4 -+** mov z1\.b, #5 -+** mov z2\.b, #6 -+** mov z3\.b, #7 -+** ret -+*/ -+svuint8x4_t __attribute__((noipa)) -+callee_u8 (void) -+{ -+ return svcreate4 (svdup_u8 (4), svdup_u8 (5), svdup_u8 (6), svdup_u8 (7)); -+} -+ -+/* -+** caller_u8: -+** ... -+** bl callee_u8 -+** sub (z[2-7]\.b), z2\.b, z3\.b -+** ptrue (p[0-7])\.b, all -+** mla z0\.b, \2/m, (z1\.b, \1|\1, z1\.b) -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+svuint8_t __attribute__((noipa)) -+caller_u8 (void) -+{ -+ svuint8x4_t res; -+ res = callee_u8 (); -+ return svmla_x (svptrue_b8 (), svget4 (res, 0), svget4 (res, 1), -+ svsub_x (svptrue_b8 (), -+ svget4 (res, 2), -+ svget4 (res, 3))); -+} -+ -+/* -+** callee_s16: -+** mov z0\.h, #1 -+** mov z1\.h, #2 -+** mov z2\.h, #3 -+** mov z3\.h, #4 -+** ret -+*/ -+svint16x4_t __attribute__((noipa)) -+callee_s16 (void) -+{ -+ return svcreate4 (svdup_s16 (1), svdup_s16 (2), -+ svdup_s16 (3), svdup_s16 (4)); -+} -+ -+/* -+** caller_s16: -+** ... -+** bl callee_s16 -+** add (z[2-7]\.h), z2\.h, z3\.h -+** ptrue (p[0-7])\.b, all -+** mad z0\.h, \2/m, (z1\.h, \1|\1, z1\.h) -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+svint16_t __attribute__((noipa)) -+caller_s16 (void) -+{ -+ svint16x4_t res; -+ res = callee_s16 (); -+ return svmad_x (svptrue_b16 (), svget4 (res, 0), svget4 (res, 1), -+ svadd_x (svptrue_b16 (), -+ svget4 (res, 2), -+ svget4 (res, 3))); -+} -+ -+/* -+** callee_u16: -+** mov z0\.h, #4 -+** mov z1\.h, #5 -+** mov z2\.h, #6 -+** mov z3\.h, #7 -+** ret -+*/ -+svuint16x4_t __attribute__((noipa)) -+callee_u16 (void) -+{ -+ return svcreate4 (svdup_u16 (4), svdup_u16 (5), -+ svdup_u16 (6), svdup_u16 (7)); -+} -+ -+/* -+** caller_u16: -+** ... -+** bl callee_u16 -+** sub (z[2-7]\.h), z2\.h, z3\.h -+** ptrue (p[0-7])\.b, all -+** mad z0\.h, \2/m, (z1\.h, \1|\1, z1\.h) -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+svuint16_t __attribute__((noipa)) -+caller_u16 (void) -+{ -+ svuint16x4_t res; -+ res = callee_u16 (); -+ return svmad_x (svptrue_b16 (), svget4 (res, 0), svget4 (res, 1), -+ svsub_x (svptrue_b16 (), -+ svget4 (res, 2), -+ svget4 (res, 3))); -+} -+ -+/* -+** callee_f16: -+** fmov z0\.h, #1\.0(?:e\+0)? -+** fmov z1\.h, #2\.0(?:e\+0)? -+** fmov z2\.h, #3\.0(?:e\+0)? -+** fmov z3\.h, #4\.0(?:e\+0)? -+** ret -+*/ -+svfloat16x4_t __attribute__((noipa)) -+callee_f16 (void) -+{ -+ return svcreate4 (svdup_f16 (1), svdup_f16 (2), -+ svdup_f16 (3), svdup_f16 (4)); -+} -+ -+/* -+** caller_f16: -+** ... -+** bl callee_f16 -+** fadd (z[0-9]+\.h), z0\.h, z1\.h -+** fmul (z[0-9]+\.h), \1, z2\.h -+** fadd z0\.h, \2, z3\.h -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+svfloat16_t __attribute__((noipa)) -+caller_f16 (void) -+{ -+ svfloat16x4_t res; -+ res = callee_f16 (); -+ return svadd_x (svptrue_b16 (), -+ svmul_x (svptrue_b16 (), -+ svadd_x (svptrue_b16 (), svget4 (res, 0), -+ svget4 (res, 1)), -+ svget4 (res, 2)), -+ svget4 (res, 3)); -+} -+ -+/* -+** callee_bf16: -+** mov z0\.h, h4 -+** mov z1\.h, h5 -+** mov z2\.h, h6 -+** mov z3\.h, h7 -+** ret -+*/ -+svbfloat16x4_t __attribute__((noipa)) -+callee_bf16 (bfloat16_t h0, bfloat16_t h1, bfloat16_t h2, bfloat16_t h3, -+ bfloat16_t h4, bfloat16_t h5, bfloat16_t h6, bfloat16_t h7) -+{ -+ return svcreate4 (svdup_bf16 (h4), svdup_bf16 (h5), -+ svdup_bf16 (h6), svdup_bf16 (h7)); -+} -+ -+/* -+** caller_bf16: -+** ... -+** bl callee_bf16 -+** trn2 z0\.h, z0\.h, z3\.h -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+svbfloat16_t __attribute__((noipa)) -+caller_bf16 (bfloat16_t h0, bfloat16_t h1, bfloat16_t h2, bfloat16_t h3, -+ bfloat16_t h4, bfloat16_t h5, bfloat16_t h6, bfloat16_t h7) -+{ -+ svbfloat16x4_t res; -+ res = callee_bf16 (h0, h1, h2, h3, h4, h5, h6, h7); -+ return svtrn2 (svget4 (res, 0), svget4 (res, 3)); -+} -+ -+/* -+** callee_s32: -+** mov z0\.s, #1 -+** mov z1\.s, #2 -+** mov z2\.s, #3 -+** mov z3\.s, #4 -+** ret -+*/ -+svint32x4_t __attribute__((noipa)) -+callee_s32 (void) -+{ -+ return svcreate4 (svdup_s32 (1), svdup_s32 (2), -+ svdup_s32 (3), svdup_s32 (4)); -+} -+ -+/* -+** caller_s32: -+** ... -+** bl callee_s32 -+** add (z[2-7]\.s), z2\.s, z3\.s -+** ptrue (p[0-7])\.b, all -+** msb z0\.s, \2/m, (z1\.s, \1|\1, z1\.s) -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+svint32_t __attribute__((noipa)) -+caller_s32 (void) -+{ -+ svint32x4_t res; -+ res = callee_s32 (); -+ return svmsb_x (svptrue_b32 (), svget4 (res, 0), svget4 (res, 1), -+ svadd_x (svptrue_b32 (), -+ svget4 (res, 2), -+ svget4 (res, 3))); -+} -+ -+/* -+** callee_u32: -+** mov z0\.s, #4 -+** mov z1\.s, #5 -+** mov z2\.s, #6 -+** mov z3\.s, #7 -+** ret -+*/ -+svuint32x4_t __attribute__((noipa)) -+callee_u32 (void) -+{ -+ return svcreate4 (svdup_u32 (4), svdup_u32 (5), -+ svdup_u32 (6), svdup_u32 (7)); -+} -+ -+/* -+** caller_u32: -+** ... -+** bl callee_u32 -+** sub (z[2-7]\.s), z2\.s, z3\.s -+** ptrue (p[0-7])\.b, all -+** msb z0\.s, \2/m, (z1\.s, \1|\1, z1\.s) -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+svuint32_t __attribute__((noipa)) -+caller_u32 (void) -+{ -+ svuint32x4_t res; -+ res = callee_u32 (); -+ return svmsb_x (svptrue_b32 (), svget4 (res, 0), svget4 (res, 1), -+ svsub_x (svptrue_b32 (), -+ svget4 (res, 2), -+ svget4 (res, 3))); -+} -+ -+/* -+** callee_f32: -+** fmov z0\.s, #1\.0(?:e\+0)? -+** fmov z1\.s, #2\.0(?:e\+0)? -+** fmov z2\.s, #3\.0(?:e\+0)? -+** fmov z3\.s, #4\.0(?:e\+0)? -+** ret -+*/ -+svfloat32x4_t __attribute__((noipa)) -+callee_f32 (void) -+{ -+ return svcreate4 (svdup_f32 (1), svdup_f32 (2), -+ svdup_f32 (3), svdup_f32 (4)); -+} -+ -+/* -+** caller_f32: -+** ... -+** bl callee_f32 -+** fadd (z[0-9]+\.s), z0\.s, z1\.s -+** fmul (z[0-9]+\.s), \1, z2\.s -+** fadd z0\.s, \2, z3\.s -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+svfloat32_t __attribute__((noipa)) -+caller_f32 (void) -+{ -+ svfloat32x4_t res; -+ res = callee_f32 (); -+ return svadd_x (svptrue_b32 (), -+ svmul_x (svptrue_b32 (), -+ svadd_x (svptrue_b32 (), svget4 (res, 0), -+ svget4 (res, 1)), -+ svget4 (res, 2)), -+ svget4 (res, 3)); -+} -+ -+/* -+** callee_s64: -+** mov z0\.d, #1 -+** mov z1\.d, #2 -+** mov z2\.d, #3 -+** mov z3\.d, #4 -+** ret -+*/ -+svint64x4_t __attribute__((noipa)) -+callee_s64 (void) -+{ -+ return svcreate4 (svdup_s64 (1), svdup_s64 (2), -+ svdup_s64 (3), svdup_s64 (4)); -+} -+ -+/* -+** caller_s64: -+** ... -+** bl callee_s64 -+** add (z[2-7]\.d), z2\.d, z3\.d -+** ptrue (p[0-7])\.b, all -+** mls z0\.d, \2/m, (z1\.d, \1|\1, z1\.d) -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+svint64_t __attribute__((noipa)) -+caller_s64 (void) -+{ -+ svint64x4_t res; -+ res = callee_s64 (); -+ return svmls_x (svptrue_b64 (), svget4 (res, 0), svget4 (res, 1), -+ svadd_x (svptrue_b64 (), -+ svget4 (res, 2), -+ svget4 (res, 3))); -+} -+ -+/* -+** callee_u64: -+** mov z0\.d, #4 -+** mov z1\.d, #5 -+** mov z2\.d, #6 -+** mov z3\.d, #7 -+** ret -+*/ -+svuint64x4_t __attribute__((noipa)) -+callee_u64 (void) -+{ -+ return svcreate4 (svdup_u64 (4), svdup_u64 (5), -+ svdup_u64 (6), svdup_u64 (7)); -+} -+ -+/* -+** caller_u64: -+** ... -+** bl callee_u64 -+** sub (z[2-7]\.d), z2\.d, z3\.d -+** ptrue (p[0-7])\.b, all -+** mls z0\.d, \2/m, (z1\.d, \1|\1, z1\.d) -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+svuint64_t __attribute__((noipa)) -+caller_u64 (void) -+{ -+ svuint64x4_t res; -+ res = callee_u64 (); -+ return svmls_x (svptrue_b64 (), svget4 (res, 0), svget4 (res, 1), -+ svsub_x (svptrue_b64 (), -+ svget4 (res, 2), -+ svget4 (res, 3))); -+} -+ -+/* -+** callee_f64: -+** fmov z0\.d, #1\.0(?:e\+0)? -+** fmov z1\.d, #2\.0(?:e\+0)? -+** fmov z2\.d, #3\.0(?:e\+0)? -+** fmov z3\.d, #4\.0(?:e\+0)? -+** ret -+*/ -+svfloat64x4_t __attribute__((noipa)) -+callee_f64 (void) -+{ -+ return svcreate4 (svdup_f64 (1), svdup_f64 (2), -+ svdup_f64 (3), svdup_f64 (4)); -+} -+ -+/* -+** caller_f64: -+** ... -+** bl callee_f64 -+** fadd (z[0-9]+\.d), z0\.d, z1\.d -+** fmul (z[0-9]+\.d), \1, z2\.d -+** fadd z0\.d, \2, z3\.d -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+svfloat64_t __attribute__((noipa)) -+caller_f64 (void) -+{ -+ svfloat64x4_t res; -+ res = callee_f64 (); -+ return svadd_x (svptrue_b64 (), -+ svmul_x (svptrue_b64 (), -+ svadd_x (svptrue_b64 (), svget4 (res, 0), -+ svget4 (res, 1)), -+ svget4 (res, 2)), -+ svget4 (res, 3)); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_1_be_nowrap.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_1_be_nowrap.c -new file mode 100644 -index 000000000..4eee04226 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_1_be_nowrap.c -@@ -0,0 +1,196 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -mbig-endian -fno-shrink-wrap -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** test_1: -+** addvl sp, sp, #-17 -+** str p4, \[sp\] -+** str p5, \[sp, #1, mul vl\] -+** str p6, \[sp, #2, mul vl\] -+** str p7, \[sp, #3, mul vl\] -+** str p8, \[sp, #4, mul vl\] -+** str p9, \[sp, #5, mul vl\] -+** str p10, \[sp, #6, mul vl\] -+** str p11, \[sp, #7, mul vl\] -+** ptrue p1\.b, all -+** st1d z8\.d, p1, \[sp, #1, mul vl\] -+** st1d z9\.d, p1, \[sp, #2, mul vl\] -+** st1d z10\.d, p1, \[sp, #3, mul vl\] -+** st1d z11\.d, p1, \[sp, #4, mul vl\] -+** st1d z12\.d, p1, \[sp, #5, mul vl\] -+** st1d z13\.d, p1, \[sp, #6, mul vl\] -+** st1d z14\.d, p1, \[sp, #7, mul vl\] -+** addvl x11, sp, #16 -+** st1d z15\.d, p1, \[x11, #-8, mul vl\] -+** str z16, \[sp, #9, mul vl\] -+** str z17, \[sp, #10, mul vl\] -+** str z18, \[sp, #11, mul vl\] -+** str z19, \[sp, #12, mul vl\] -+** str z20, \[sp, #13, mul vl\] -+** str z21, \[sp, #14, mul vl\] -+** str z22, \[sp, #15, mul vl\] -+** str z23, \[sp, #16, mul vl\] -+** ptrue p0\.b, all -+** ptrue p1\.b, all -+** ld1d z8\.d, p1/z, \[sp, #1, mul vl\] -+** ld1d z9\.d, p1/z, \[sp, #2, mul vl\] -+** ld1d z10\.d, p1/z, \[sp, #3, mul vl\] -+** ld1d z11\.d, p1/z, \[sp, #4, mul vl\] -+** ld1d z12\.d, p1/z, \[sp, #5, mul vl\] -+** ld1d z13\.d, p1/z, \[sp, #6, mul vl\] -+** ld1d z14\.d, p1/z, \[sp, #7, mul vl\] -+** addvl x11, sp, #16 -+** ld1d z15\.d, p1/z, \[x11, #-8, mul vl\] -+** ldr z16, \[sp, #9, mul vl\] -+** ldr z17, \[sp, #10, mul vl\] -+** ldr z18, \[sp, #11, mul vl\] -+** ldr z19, \[sp, #12, mul vl\] -+** ldr z20, \[sp, #13, mul vl\] -+** ldr z21, \[sp, #14, mul vl\] -+** ldr z22, \[sp, #15, mul vl\] -+** ldr z23, \[sp, #16, mul vl\] -+** ldr p4, \[sp\] -+** ldr p5, \[sp, #1, mul vl\] -+** ldr p6, \[sp, #2, mul vl\] -+** ldr p7, \[sp, #3, mul vl\] -+** ldr p8, \[sp, #4, mul vl\] -+** ldr p9, \[sp, #5, mul vl\] -+** ldr p10, \[sp, #6, mul vl\] -+** ldr p11, \[sp, #7, mul vl\] -+** addvl sp, sp, #17 -+** ret -+*/ -+svbool_t -+test_1 (void) -+{ -+ asm volatile ("" ::: -+ "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", -+ "z8", "z9", "z10", "z11", "z12", "z13", "z14", "z15", -+ "z16", "z17", "z18", "z19", "z20", "z21", "z22", "z23", -+ "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31", -+ "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", -+ "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_2: -+** ptrue p0\.b, all -+** ret -+*/ -+svbool_t -+test_2 (void) -+{ -+ asm volatile ("" ::: -+ "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", -+ "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31", -+ "p0", "p1", "p2", "p3", "p12", "p13", "p14", "p15"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_3: -+** addvl sp, sp, #-6 -+** str p5, \[sp\] -+** str p6, \[sp, #1, mul vl\] -+** str p11, \[sp, #2, mul vl\] -+** ptrue p1\.b, all -+** st1d z8\.d, p1, \[sp, #1, mul vl\] -+** st1d z13\.d, p1, \[sp, #2, mul vl\] -+** str z19, \[sp, #3, mul vl\] -+** str z20, \[sp, #4, mul vl\] -+** str z22, \[sp, #5, mul vl\] -+** ptrue p0\.b, all -+** ptrue p1\.b, all -+** ld1d z8\.d, p1/z, \[sp, #1, mul vl\] -+** ld1d z13\.d, p1/z, \[sp, #2, mul vl\] -+** ldr z19, \[sp, #3, mul vl\] -+** ldr z20, \[sp, #4, mul vl\] -+** ldr z22, \[sp, #5, mul vl\] -+** ldr p5, \[sp\] -+** ldr p6, \[sp, #1, mul vl\] -+** ldr p11, \[sp, #2, mul vl\] -+** addvl sp, sp, #6 -+** ret -+*/ -+svbool_t -+test_3 (void) -+{ -+ asm volatile ("" ::: -+ "z8", "z13", "z19", "z20", "z22", -+ "p5", "p6", "p11"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_4: -+** addvl sp, sp, #-1 -+** str p4, \[sp\] -+** ptrue p0\.b, all -+** ldr p4, \[sp\] -+** addvl sp, sp, #1 -+** ret -+*/ -+svbool_t -+test_4 (void) -+{ -+ asm volatile ("" ::: "p4"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_5: -+** addvl sp, sp, #-1 -+** ptrue p1\.b, all -+** st1d z15\.d, p1, \[sp\] -+** ptrue p0\.b, all -+** ptrue p1\.b, all -+** ld1d z15\.d, p1/z, \[sp\] -+** addvl sp, sp, #1 -+** ret -+*/ -+svbool_t -+test_5 (void) -+{ -+ asm volatile ("" ::: "z15"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_6: -+** addvl sp, sp, #-2 -+** str p4, \[sp\] -+** ptrue p4\.b, all -+** st1d z15\.d, p4, \[sp, #1, mul vl\] -+** mov z0\.b, #1 -+** ptrue p4\.b, all -+** ld1d z15\.d, p4/z, \[sp, #1, mul vl\] -+** ldr p4, \[sp\] -+** addvl sp, sp, #2 -+** ret -+*/ -+svint8_t -+test_6 (svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) -+{ -+ asm volatile ("" :: "Upa" (p0), "Upa" (p1), "Upa" (p2), "Upa" (p3) : "z15"); -+ return svdup_s8 (1); -+} -+ -+/* -+** test_7: -+** addvl sp, sp, #-1 -+** str z16, \[sp\] -+** ptrue p0\.b, all -+** ldr z16, \[sp\] -+** addvl sp, sp, #1 -+** ret -+*/ -+svbool_t -+test_7 (void) -+{ -+ asm volatile ("" ::: "z16"); -+ return svptrue_b8 (); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_1_be_wrap.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_1_be_wrap.c -new file mode 100644 -index 000000000..e88a3dd1d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_1_be_wrap.c -@@ -0,0 +1,196 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -mbig-endian -fshrink-wrap -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** test_1: -+** addvl sp, sp, #-17 -+** str p4, \[sp\] -+** str p5, \[sp, #1, mul vl\] -+** str p6, \[sp, #2, mul vl\] -+** str p7, \[sp, #3, mul vl\] -+** str p8, \[sp, #4, mul vl\] -+** str p9, \[sp, #5, mul vl\] -+** str p10, \[sp, #6, mul vl\] -+** str p11, \[sp, #7, mul vl\] -+** ptrue p1\.b, all -+** st1d z8\.d, p1, \[sp, #1, mul vl\] -+** st1d z9\.d, p1, \[sp, #2, mul vl\] -+** st1d z10\.d, p1, \[sp, #3, mul vl\] -+** st1d z11\.d, p1, \[sp, #4, mul vl\] -+** st1d z12\.d, p1, \[sp, #5, mul vl\] -+** st1d z13\.d, p1, \[sp, #6, mul vl\] -+** st1d z14\.d, p1, \[sp, #7, mul vl\] -+** addvl x11, sp, #16 -+** st1d z15\.d, p1, \[x11, #-8, mul vl\] -+** str z16, \[sp, #9, mul vl\] -+** str z17, \[sp, #10, mul vl\] -+** str z18, \[sp, #11, mul vl\] -+** str z19, \[sp, #12, mul vl\] -+** str z20, \[sp, #13, mul vl\] -+** str z21, \[sp, #14, mul vl\] -+** str z22, \[sp, #15, mul vl\] -+** str z23, \[sp, #16, mul vl\] -+** ptrue p0\.b, all -+** ptrue p1\.b, all -+** ld1d z8\.d, p1/z, \[sp, #1, mul vl\] -+** ld1d z9\.d, p1/z, \[sp, #2, mul vl\] -+** ld1d z10\.d, p1/z, \[sp, #3, mul vl\] -+** ld1d z11\.d, p1/z, \[sp, #4, mul vl\] -+** ld1d z12\.d, p1/z, \[sp, #5, mul vl\] -+** ld1d z13\.d, p1/z, \[sp, #6, mul vl\] -+** ld1d z14\.d, p1/z, \[sp, #7, mul vl\] -+** addvl x11, sp, #16 -+** ld1d z15\.d, p1/z, \[x11, #-8, mul vl\] -+** ldr z16, \[sp, #9, mul vl\] -+** ldr z17, \[sp, #10, mul vl\] -+** ldr z18, \[sp, #11, mul vl\] -+** ldr z19, \[sp, #12, mul vl\] -+** ldr z20, \[sp, #13, mul vl\] -+** ldr z21, \[sp, #14, mul vl\] -+** ldr z22, \[sp, #15, mul vl\] -+** ldr z23, \[sp, #16, mul vl\] -+** ldr p4, \[sp\] -+** ldr p5, \[sp, #1, mul vl\] -+** ldr p6, \[sp, #2, mul vl\] -+** ldr p7, \[sp, #3, mul vl\] -+** ldr p8, \[sp, #4, mul vl\] -+** ldr p9, \[sp, #5, mul vl\] -+** ldr p10, \[sp, #6, mul vl\] -+** ldr p11, \[sp, #7, mul vl\] -+** addvl sp, sp, #17 -+** ret -+*/ -+svbool_t -+test_1 (void) -+{ -+ asm volatile ("" ::: -+ "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", -+ "z8", "z9", "z10", "z11", "z12", "z13", "z14", "z15", -+ "z16", "z17", "z18", "z19", "z20", "z21", "z22", "z23", -+ "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31", -+ "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", -+ "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_2: -+** ptrue p0\.b, all -+** ret -+*/ -+svbool_t -+test_2 (void) -+{ -+ asm volatile ("" ::: -+ "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", -+ "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31", -+ "p0", "p1", "p2", "p3", "p12", "p13", "p14", "p15"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_3: -+** addvl sp, sp, #-6 -+** str p5, \[sp\] -+** str p6, \[sp, #1, mul vl\] -+** str p11, \[sp, #2, mul vl\] -+** ptrue p1\.b, all -+** st1d z8\.d, p1, \[sp, #1, mul vl\] -+** st1d z13\.d, p1, \[sp, #2, mul vl\] -+** str z19, \[sp, #3, mul vl\] -+** str z20, \[sp, #4, mul vl\] -+** str z22, \[sp, #5, mul vl\] -+** ptrue p0\.b, all -+** ptrue p1\.b, all -+** ld1d z8\.d, p1/z, \[sp, #1, mul vl\] -+** ld1d z13\.d, p1/z, \[sp, #2, mul vl\] -+** ldr z19, \[sp, #3, mul vl\] -+** ldr z20, \[sp, #4, mul vl\] -+** ldr z22, \[sp, #5, mul vl\] -+** ldr p5, \[sp\] -+** ldr p6, \[sp, #1, mul vl\] -+** ldr p11, \[sp, #2, mul vl\] -+** addvl sp, sp, #6 -+** ret -+*/ -+svbool_t -+test_3 (void) -+{ -+ asm volatile ("" ::: -+ "z8", "z13", "z19", "z20", "z22", -+ "p5", "p6", "p11"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_4: -+** addvl sp, sp, #-1 -+** str p4, \[sp\] -+** ptrue p0\.b, all -+** ldr p4, \[sp\] -+** addvl sp, sp, #1 -+** ret -+*/ -+svbool_t -+test_4 (void) -+{ -+ asm volatile ("" ::: "p4"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_5: -+** addvl sp, sp, #-1 -+** ptrue p1\.b, all -+** st1d z15\.d, p1, \[sp\] -+** ptrue p0\.b, all -+** ptrue p1\.b, all -+** ld1d z15\.d, p1/z, \[sp\] -+** addvl sp, sp, #1 -+** ret -+*/ -+svbool_t -+test_5 (void) -+{ -+ asm volatile ("" ::: "z15"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_6: -+** addvl sp, sp, #-2 -+** str p4, \[sp\] -+** ptrue p4\.b, all -+** st1d z15\.d, p4, \[sp, #1, mul vl\] -+** mov z0\.b, #1 -+** ptrue p4\.b, all -+** ld1d z15\.d, p4/z, \[sp, #1, mul vl\] -+** ldr p4, \[sp\] -+** addvl sp, sp, #2 -+** ret -+*/ -+svint8_t -+test_6 (svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) -+{ -+ asm volatile ("" :: "Upa" (p0), "Upa" (p1), "Upa" (p2), "Upa" (p3) : "z15"); -+ return svdup_s8 (1); -+} -+ -+/* -+** test_7: -+** addvl sp, sp, #-1 -+** str z16, \[sp\] -+** ptrue p0\.b, all -+** ldr z16, \[sp\] -+** addvl sp, sp, #1 -+** ret -+*/ -+svbool_t -+test_7 (void) -+{ -+ asm volatile ("" ::: "z16"); -+ return svptrue_b8 (); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_1_le_nowrap.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_1_le_nowrap.c -new file mode 100644 -index 000000000..d14cd79b1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_1_le_nowrap.c -@@ -0,0 +1,184 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -mlittle-endian -fno-shrink-wrap -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** test_1: -+** addvl sp, sp, #-17 -+** str p4, \[sp\] -+** str p5, \[sp, #1, mul vl\] -+** str p6, \[sp, #2, mul vl\] -+** str p7, \[sp, #3, mul vl\] -+** str p8, \[sp, #4, mul vl\] -+** str p9, \[sp, #5, mul vl\] -+** str p10, \[sp, #6, mul vl\] -+** str p11, \[sp, #7, mul vl\] -+** str z8, \[sp, #1, mul vl\] -+** str z9, \[sp, #2, mul vl\] -+** str z10, \[sp, #3, mul vl\] -+** str z11, \[sp, #4, mul vl\] -+** str z12, \[sp, #5, mul vl\] -+** str z13, \[sp, #6, mul vl\] -+** str z14, \[sp, #7, mul vl\] -+** str z15, \[sp, #8, mul vl\] -+** str z16, \[sp, #9, mul vl\] -+** str z17, \[sp, #10, mul vl\] -+** str z18, \[sp, #11, mul vl\] -+** str z19, \[sp, #12, mul vl\] -+** str z20, \[sp, #13, mul vl\] -+** str z21, \[sp, #14, mul vl\] -+** str z22, \[sp, #15, mul vl\] -+** str z23, \[sp, #16, mul vl\] -+** ptrue p0\.b, all -+** ldr z8, \[sp, #1, mul vl\] -+** ldr z9, \[sp, #2, mul vl\] -+** ldr z10, \[sp, #3, mul vl\] -+** ldr z11, \[sp, #4, mul vl\] -+** ldr z12, \[sp, #5, mul vl\] -+** ldr z13, \[sp, #6, mul vl\] -+** ldr z14, \[sp, #7, mul vl\] -+** ldr z15, \[sp, #8, mul vl\] -+** ldr z16, \[sp, #9, mul vl\] -+** ldr z17, \[sp, #10, mul vl\] -+** ldr z18, \[sp, #11, mul vl\] -+** ldr z19, \[sp, #12, mul vl\] -+** ldr z20, \[sp, #13, mul vl\] -+** ldr z21, \[sp, #14, mul vl\] -+** ldr z22, \[sp, #15, mul vl\] -+** ldr z23, \[sp, #16, mul vl\] -+** ldr p4, \[sp\] -+** ldr p5, \[sp, #1, mul vl\] -+** ldr p6, \[sp, #2, mul vl\] -+** ldr p7, \[sp, #3, mul vl\] -+** ldr p8, \[sp, #4, mul vl\] -+** ldr p9, \[sp, #5, mul vl\] -+** ldr p10, \[sp, #6, mul vl\] -+** ldr p11, \[sp, #7, mul vl\] -+** addvl sp, sp, #17 -+** ret -+*/ -+svbool_t -+test_1 (void) -+{ -+ asm volatile ("" ::: -+ "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", -+ "z8", "z9", "z10", "z11", "z12", "z13", "z14", "z15", -+ "z16", "z17", "z18", "z19", "z20", "z21", "z22", "z23", -+ "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31", -+ "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", -+ "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_2: -+** ptrue p0\.b, all -+** ret -+*/ -+svbool_t -+test_2 (void) -+{ -+ asm volatile ("" ::: -+ "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", -+ "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31", -+ "p0", "p1", "p2", "p3", "p12", "p13", "p14", "p15"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_3: -+** addvl sp, sp, #-6 -+** str p5, \[sp\] -+** str p6, \[sp, #1, mul vl\] -+** str p11, \[sp, #2, mul vl\] -+** str z8, \[sp, #1, mul vl\] -+** str z13, \[sp, #2, mul vl\] -+** str z19, \[sp, #3, mul vl\] -+** str z20, \[sp, #4, mul vl\] -+** str z22, \[sp, #5, mul vl\] -+** ptrue p0\.b, all -+** ldr z8, \[sp, #1, mul vl\] -+** ldr z13, \[sp, #2, mul vl\] -+** ldr z19, \[sp, #3, mul vl\] -+** ldr z20, \[sp, #4, mul vl\] -+** ldr z22, \[sp, #5, mul vl\] -+** ldr p5, \[sp\] -+** ldr p6, \[sp, #1, mul vl\] -+** ldr p11, \[sp, #2, mul vl\] -+** addvl sp, sp, #6 -+** ret -+*/ -+svbool_t -+test_3 (void) -+{ -+ asm volatile ("" ::: -+ "z8", "z13", "z19", "z20", "z22", -+ "p5", "p6", "p11"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_4: -+** addvl sp, sp, #-1 -+** str p4, \[sp\] -+** ptrue p0\.b, all -+** ldr p4, \[sp\] -+** addvl sp, sp, #1 -+** ret -+*/ -+svbool_t -+test_4 (void) -+{ -+ asm volatile ("" ::: "p4"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_5: -+** addvl sp, sp, #-1 -+** str z15, \[sp\] -+** ptrue p0\.b, all -+** ldr z15, \[sp\] -+** addvl sp, sp, #1 -+** ret -+*/ -+svbool_t -+test_5 (void) -+{ -+ asm volatile ("" ::: "z15"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_6: -+** addvl sp, sp, #-1 -+** str z15, \[sp\] -+** mov z0\.b, #1 -+** ldr z15, \[sp\] -+** addvl sp, sp, #1 -+** ret -+*/ -+svint8_t -+test_6 (svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) -+{ -+ asm volatile ("" :: "Upa" (p0), "Upa" (p1), "Upa" (p2), "Upa" (p3) : "z15"); -+ return svdup_s8 (1); -+} -+ -+/* -+** test_7: -+** addvl sp, sp, #-1 -+** str z16, \[sp\] -+** ptrue p0\.b, all -+** ldr z16, \[sp\] -+** addvl sp, sp, #1 -+** ret -+*/ -+svbool_t -+test_7 (void) -+{ -+ asm volatile ("" ::: "z16"); -+ return svptrue_b8 (); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_1_le_wrap.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_1_le_wrap.c -new file mode 100644 -index 000000000..d81dd8e6b ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_1_le_wrap.c -@@ -0,0 +1,184 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -mlittle-endian -fshrink-wrap -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** test_1: -+** addvl sp, sp, #-17 -+** str p4, \[sp\] -+** str p5, \[sp, #1, mul vl\] -+** str p6, \[sp, #2, mul vl\] -+** str p7, \[sp, #3, mul vl\] -+** str p8, \[sp, #4, mul vl\] -+** str p9, \[sp, #5, mul vl\] -+** str p10, \[sp, #6, mul vl\] -+** str p11, \[sp, #7, mul vl\] -+** str z8, \[sp, #1, mul vl\] -+** str z9, \[sp, #2, mul vl\] -+** str z10, \[sp, #3, mul vl\] -+** str z11, \[sp, #4, mul vl\] -+** str z12, \[sp, #5, mul vl\] -+** str z13, \[sp, #6, mul vl\] -+** str z14, \[sp, #7, mul vl\] -+** str z15, \[sp, #8, mul vl\] -+** str z16, \[sp, #9, mul vl\] -+** str z17, \[sp, #10, mul vl\] -+** str z18, \[sp, #11, mul vl\] -+** str z19, \[sp, #12, mul vl\] -+** str z20, \[sp, #13, mul vl\] -+** str z21, \[sp, #14, mul vl\] -+** str z22, \[sp, #15, mul vl\] -+** str z23, \[sp, #16, mul vl\] -+** ptrue p0\.b, all -+** ldr z8, \[sp, #1, mul vl\] -+** ldr z9, \[sp, #2, mul vl\] -+** ldr z10, \[sp, #3, mul vl\] -+** ldr z11, \[sp, #4, mul vl\] -+** ldr z12, \[sp, #5, mul vl\] -+** ldr z13, \[sp, #6, mul vl\] -+** ldr z14, \[sp, #7, mul vl\] -+** ldr z15, \[sp, #8, mul vl\] -+** ldr z16, \[sp, #9, mul vl\] -+** ldr z17, \[sp, #10, mul vl\] -+** ldr z18, \[sp, #11, mul vl\] -+** ldr z19, \[sp, #12, mul vl\] -+** ldr z20, \[sp, #13, mul vl\] -+** ldr z21, \[sp, #14, mul vl\] -+** ldr z22, \[sp, #15, mul vl\] -+** ldr z23, \[sp, #16, mul vl\] -+** ldr p4, \[sp\] -+** ldr p5, \[sp, #1, mul vl\] -+** ldr p6, \[sp, #2, mul vl\] -+** ldr p7, \[sp, #3, mul vl\] -+** ldr p8, \[sp, #4, mul vl\] -+** ldr p9, \[sp, #5, mul vl\] -+** ldr p10, \[sp, #6, mul vl\] -+** ldr p11, \[sp, #7, mul vl\] -+** addvl sp, sp, #17 -+** ret -+*/ -+svbool_t -+test_1 (void) -+{ -+ asm volatile ("" ::: -+ "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", -+ "z8", "z9", "z10", "z11", "z12", "z13", "z14", "z15", -+ "z16", "z17", "z18", "z19", "z20", "z21", "z22", "z23", -+ "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31", -+ "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", -+ "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_2: -+** ptrue p0\.b, all -+** ret -+*/ -+svbool_t -+test_2 (void) -+{ -+ asm volatile ("" ::: -+ "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", -+ "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31", -+ "p0", "p1", "p2", "p3", "p12", "p13", "p14", "p15"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_3: -+** addvl sp, sp, #-6 -+** str p5, \[sp\] -+** str p6, \[sp, #1, mul vl\] -+** str p11, \[sp, #2, mul vl\] -+** str z8, \[sp, #1, mul vl\] -+** str z13, \[sp, #2, mul vl\] -+** str z19, \[sp, #3, mul vl\] -+** str z20, \[sp, #4, mul vl\] -+** str z22, \[sp, #5, mul vl\] -+** ptrue p0\.b, all -+** ldr z8, \[sp, #1, mul vl\] -+** ldr z13, \[sp, #2, mul vl\] -+** ldr z19, \[sp, #3, mul vl\] -+** ldr z20, \[sp, #4, mul vl\] -+** ldr z22, \[sp, #5, mul vl\] -+** ldr p5, \[sp\] -+** ldr p6, \[sp, #1, mul vl\] -+** ldr p11, \[sp, #2, mul vl\] -+** addvl sp, sp, #6 -+** ret -+*/ -+svbool_t -+test_3 (void) -+{ -+ asm volatile ("" ::: -+ "z8", "z13", "z19", "z20", "z22", -+ "p5", "p6", "p11"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_4: -+** addvl sp, sp, #-1 -+** str p4, \[sp\] -+** ptrue p0\.b, all -+** ldr p4, \[sp\] -+** addvl sp, sp, #1 -+** ret -+*/ -+svbool_t -+test_4 (void) -+{ -+ asm volatile ("" ::: "p4"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_5: -+** addvl sp, sp, #-1 -+** str z15, \[sp\] -+** ptrue p0\.b, all -+** ldr z15, \[sp\] -+** addvl sp, sp, #1 -+** ret -+*/ -+svbool_t -+test_5 (void) -+{ -+ asm volatile ("" ::: "z15"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_6: -+** addvl sp, sp, #-1 -+** str z15, \[sp\] -+** mov z0\.b, #1 -+** ldr z15, \[sp\] -+** addvl sp, sp, #1 -+** ret -+*/ -+svint8_t -+test_6 (svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) -+{ -+ asm volatile ("" :: "Upa" (p0), "Upa" (p1), "Upa" (p2), "Upa" (p3) : "z15"); -+ return svdup_s8 (1); -+} -+ -+/* -+** test_7: -+** addvl sp, sp, #-1 -+** str z16, \[sp\] -+** ptrue p0\.b, all -+** ldr z16, \[sp\] -+** addvl sp, sp, #1 -+** ret -+*/ -+svbool_t -+test_7 (void) -+{ -+ asm volatile ("" ::: "z16"); -+ return svptrue_b8 (); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_2_be_nowrap.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_2_be_nowrap.c -new file mode 100644 -index 000000000..05aa18b3c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_2_be_nowrap.c -@@ -0,0 +1,271 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -mbig-endian -fno-shrink-wrap -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ -+ -+void standard_callee (void); -+__attribute__((aarch64_vector_pcs)) void vpcs_callee (void); -+ -+/* -+** calls_standard: -+** stp x29, x30, \[sp, -16\]! -+** mov x29, sp -+** addvl sp, sp, #-17 -+** str p4, \[sp\] -+** str p5, \[sp, #1, mul vl\] -+** str p6, \[sp, #2, mul vl\] -+** str p7, \[sp, #3, mul vl\] -+** str p8, \[sp, #4, mul vl\] -+** str p9, \[sp, #5, mul vl\] -+** str p10, \[sp, #6, mul vl\] -+** str p11, \[sp, #7, mul vl\] -+** ptrue p0\.b, all -+** st1d z8\.d, p0, \[sp, #1, mul vl\] -+** st1d z9\.d, p0, \[sp, #2, mul vl\] -+** st1d z10\.d, p0, \[sp, #3, mul vl\] -+** st1d z11\.d, p0, \[sp, #4, mul vl\] -+** st1d z12\.d, p0, \[sp, #5, mul vl\] -+** st1d z13\.d, p0, \[sp, #6, mul vl\] -+** st1d z14\.d, p0, \[sp, #7, mul vl\] -+** addvl x11, sp, #16 -+** st1d z15\.d, p0, \[x11, #-8, mul vl\] -+** str z16, \[sp, #9, mul vl\] -+** str z17, \[sp, #10, mul vl\] -+** str z18, \[sp, #11, mul vl\] -+** str z19, \[sp, #12, mul vl\] -+** str z20, \[sp, #13, mul vl\] -+** str z21, \[sp, #14, mul vl\] -+** str z22, \[sp, #15, mul vl\] -+** str z23, \[sp, #16, mul vl\] -+** bl standard_callee -+** ptrue p0\.b, all -+** ld1d z8\.d, p0/z, \[sp, #1, mul vl\] -+** ld1d z9\.d, p0/z, \[sp, #2, mul vl\] -+** ld1d z10\.d, p0/z, \[sp, #3, mul vl\] -+** ld1d z11\.d, p0/z, \[sp, #4, mul vl\] -+** ld1d z12\.d, p0/z, \[sp, #5, mul vl\] -+** ld1d z13\.d, p0/z, \[sp, #6, mul vl\] -+** ld1d z14\.d, p0/z, \[sp, #7, mul vl\] -+** addvl x11, sp, #16 -+** ld1d z15\.d, p0/z, \[x11, #-8, mul vl\] -+** ldr z16, \[sp, #9, mul vl\] -+** ldr z17, \[sp, #10, mul vl\] -+** ldr z18, \[sp, #11, mul vl\] -+** ldr z19, \[sp, #12, mul vl\] -+** ldr z20, \[sp, #13, mul vl\] -+** ldr z21, \[sp, #14, mul vl\] -+** ldr z22, \[sp, #15, mul vl\] -+** ldr z23, \[sp, #16, mul vl\] -+** ldr p4, \[sp\] -+** ldr p5, \[sp, #1, mul vl\] -+** ldr p6, \[sp, #2, mul vl\] -+** ldr p7, \[sp, #3, mul vl\] -+** ldr p8, \[sp, #4, mul vl\] -+** ldr p9, \[sp, #5, mul vl\] -+** ldr p10, \[sp, #6, mul vl\] -+** ldr p11, \[sp, #7, mul vl\] -+** addvl sp, sp, #17 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+void calls_standard (__SVInt8_t x) { standard_callee (); } -+ -+/* -+** calls_vpcs: -+** stp x29, x30, \[sp, -16\]! -+** mov x29, sp -+** addvl sp, sp, #-17 -+** str p4, \[sp\] -+** str p5, \[sp, #1, mul vl\] -+** str p6, \[sp, #2, mul vl\] -+** str p7, \[sp, #3, mul vl\] -+** str p8, \[sp, #4, mul vl\] -+** str p9, \[sp, #5, mul vl\] -+** str p10, \[sp, #6, mul vl\] -+** str p11, \[sp, #7, mul vl\] -+** ptrue p0\.b, all -+** st1d z8\.d, p0, \[sp, #1, mul vl\] -+** st1d z9\.d, p0, \[sp, #2, mul vl\] -+** st1d z10\.d, p0, \[sp, #3, mul vl\] -+** st1d z11\.d, p0, \[sp, #4, mul vl\] -+** st1d z12\.d, p0, \[sp, #5, mul vl\] -+** st1d z13\.d, p0, \[sp, #6, mul vl\] -+** st1d z14\.d, p0, \[sp, #7, mul vl\] -+** addvl x11, sp, #16 -+** st1d z15\.d, p0, \[x11, #-8, mul vl\] -+** str z16, \[sp, #9, mul vl\] -+** str z17, \[sp, #10, mul vl\] -+** str z18, \[sp, #11, mul vl\] -+** str z19, \[sp, #12, mul vl\] -+** str z20, \[sp, #13, mul vl\] -+** str z21, \[sp, #14, mul vl\] -+** str z22, \[sp, #15, mul vl\] -+** str z23, \[sp, #16, mul vl\] -+** bl vpcs_callee -+** ptrue p0\.b, all -+** ld1d z8\.d, p0/z, \[sp, #1, mul vl\] -+** ld1d z9\.d, p0/z, \[sp, #2, mul vl\] -+** ld1d z10\.d, p0/z, \[sp, #3, mul vl\] -+** ld1d z11\.d, p0/z, \[sp, #4, mul vl\] -+** ld1d z12\.d, p0/z, \[sp, #5, mul vl\] -+** ld1d z13\.d, p0/z, \[sp, #6, mul vl\] -+** ld1d z14\.d, p0/z, \[sp, #7, mul vl\] -+** addvl x11, sp, #16 -+** ld1d z15\.d, p0/z, \[x11, #-8, mul vl\] -+** ldr z16, \[sp, #9, mul vl\] -+** ldr z17, \[sp, #10, mul vl\] -+** ldr z18, \[sp, #11, mul vl\] -+** ldr z19, \[sp, #12, mul vl\] -+** ldr z20, \[sp, #13, mul vl\] -+** ldr z21, \[sp, #14, mul vl\] -+** ldr z22, \[sp, #15, mul vl\] -+** ldr z23, \[sp, #16, mul vl\] -+** ldr p4, \[sp\] -+** ldr p5, \[sp, #1, mul vl\] -+** ldr p6, \[sp, #2, mul vl\] -+** ldr p7, \[sp, #3, mul vl\] -+** ldr p8, \[sp, #4, mul vl\] -+** ldr p9, \[sp, #5, mul vl\] -+** ldr p10, \[sp, #6, mul vl\] -+** ldr p11, \[sp, #7, mul vl\] -+** addvl sp, sp, #17 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+void calls_vpcs (__SVInt8_t x) { vpcs_callee (); } -+ -+/* -+** calls_standard_ptr: -+** stp x29, x30, \[sp, -16\]! -+** mov x29, sp -+** addvl sp, sp, #-17 -+** str p4, \[sp\] -+** str p5, \[sp, #1, mul vl\] -+** str p6, \[sp, #2, mul vl\] -+** str p7, \[sp, #3, mul vl\] -+** str p8, \[sp, #4, mul vl\] -+** str p9, \[sp, #5, mul vl\] -+** str p10, \[sp, #6, mul vl\] -+** str p11, \[sp, #7, mul vl\] -+** ptrue p0\.b, all -+** st1d z8\.d, p0, \[sp, #1, mul vl\] -+** st1d z9\.d, p0, \[sp, #2, mul vl\] -+** st1d z10\.d, p0, \[sp, #3, mul vl\] -+** st1d z11\.d, p0, \[sp, #4, mul vl\] -+** st1d z12\.d, p0, \[sp, #5, mul vl\] -+** st1d z13\.d, p0, \[sp, #6, mul vl\] -+** st1d z14\.d, p0, \[sp, #7, mul vl\] -+** addvl x11, sp, #16 -+** st1d z15\.d, p0, \[x11, #-8, mul vl\] -+** str z16, \[sp, #9, mul vl\] -+** str z17, \[sp, #10, mul vl\] -+** str z18, \[sp, #11, mul vl\] -+** str z19, \[sp, #12, mul vl\] -+** str z20, \[sp, #13, mul vl\] -+** str z21, \[sp, #14, mul vl\] -+** str z22, \[sp, #15, mul vl\] -+** str z23, \[sp, #16, mul vl\] -+** blr x0 -+** ptrue p0\.b, all -+** ld1d z8\.d, p0/z, \[sp, #1, mul vl\] -+** ld1d z9\.d, p0/z, \[sp, #2, mul vl\] -+** ld1d z10\.d, p0/z, \[sp, #3, mul vl\] -+** ld1d z11\.d, p0/z, \[sp, #4, mul vl\] -+** ld1d z12\.d, p0/z, \[sp, #5, mul vl\] -+** ld1d z13\.d, p0/z, \[sp, #6, mul vl\] -+** ld1d z14\.d, p0/z, \[sp, #7, mul vl\] -+** addvl x11, sp, #16 -+** ld1d z15\.d, p0/z, \[x11, #-8, mul vl\] -+** ldr z16, \[sp, #9, mul vl\] -+** ldr z17, \[sp, #10, mul vl\] -+** ldr z18, \[sp, #11, mul vl\] -+** ldr z19, \[sp, #12, mul vl\] -+** ldr z20, \[sp, #13, mul vl\] -+** ldr z21, \[sp, #14, mul vl\] -+** ldr z22, \[sp, #15, mul vl\] -+** ldr z23, \[sp, #16, mul vl\] -+** ldr p4, \[sp\] -+** ldr p5, \[sp, #1, mul vl\] -+** ldr p6, \[sp, #2, mul vl\] -+** ldr p7, \[sp, #3, mul vl\] -+** ldr p8, \[sp, #4, mul vl\] -+** ldr p9, \[sp, #5, mul vl\] -+** ldr p10, \[sp, #6, mul vl\] -+** ldr p11, \[sp, #7, mul vl\] -+** addvl sp, sp, #17 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+void -+calls_standard_ptr (__SVInt8_t x, void (*fn) (void)) -+{ -+ fn (); -+} -+ -+/* -+** calls_vpcs_ptr: -+** stp x29, x30, \[sp, -16\]! -+** mov x29, sp -+** addvl sp, sp, #-17 -+** str p4, \[sp\] -+** str p5, \[sp, #1, mul vl\] -+** str p6, \[sp, #2, mul vl\] -+** str p7, \[sp, #3, mul vl\] -+** str p8, \[sp, #4, mul vl\] -+** str p9, \[sp, #5, mul vl\] -+** str p10, \[sp, #6, mul vl\] -+** str p11, \[sp, #7, mul vl\] -+** ptrue p0\.b, all -+** st1d z8\.d, p0, \[sp, #1, mul vl\] -+** st1d z9\.d, p0, \[sp, #2, mul vl\] -+** st1d z10\.d, p0, \[sp, #3, mul vl\] -+** st1d z11\.d, p0, \[sp, #4, mul vl\] -+** st1d z12\.d, p0, \[sp, #5, mul vl\] -+** st1d z13\.d, p0, \[sp, #6, mul vl\] -+** st1d z14\.d, p0, \[sp, #7, mul vl\] -+** addvl x11, sp, #16 -+** st1d z15\.d, p0, \[x11, #-8, mul vl\] -+** str z16, \[sp, #9, mul vl\] -+** str z17, \[sp, #10, mul vl\] -+** str z18, \[sp, #11, mul vl\] -+** str z19, \[sp, #12, mul vl\] -+** str z20, \[sp, #13, mul vl\] -+** str z21, \[sp, #14, mul vl\] -+** str z22, \[sp, #15, mul vl\] -+** str z23, \[sp, #16, mul vl\] -+** blr x0 -+** ptrue p0\.b, all -+** ld1d z8\.d, p0/z, \[sp, #1, mul vl\] -+** ld1d z9\.d, p0/z, \[sp, #2, mul vl\] -+** ld1d z10\.d, p0/z, \[sp, #3, mul vl\] -+** ld1d z11\.d, p0/z, \[sp, #4, mul vl\] -+** ld1d z12\.d, p0/z, \[sp, #5, mul vl\] -+** ld1d z13\.d, p0/z, \[sp, #6, mul vl\] -+** ld1d z14\.d, p0/z, \[sp, #7, mul vl\] -+** addvl x11, sp, #16 -+** ld1d z15\.d, p0/z, \[x11, #-8, mul vl\] -+** ldr z16, \[sp, #9, mul vl\] -+** ldr z17, \[sp, #10, mul vl\] -+** ldr z18, \[sp, #11, mul vl\] -+** ldr z19, \[sp, #12, mul vl\] -+** ldr z20, \[sp, #13, mul vl\] -+** ldr z21, \[sp, #14, mul vl\] -+** ldr z22, \[sp, #15, mul vl\] -+** ldr z23, \[sp, #16, mul vl\] -+** ldr p4, \[sp\] -+** ldr p5, \[sp, #1, mul vl\] -+** ldr p6, \[sp, #2, mul vl\] -+** ldr p7, \[sp, #3, mul vl\] -+** ldr p8, \[sp, #4, mul vl\] -+** ldr p9, \[sp, #5, mul vl\] -+** ldr p10, \[sp, #6, mul vl\] -+** ldr p11, \[sp, #7, mul vl\] -+** addvl sp, sp, #17 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+void -+calls_vpcs_ptr (__SVInt8_t x, -+ void (*__attribute__((aarch64_vector_pcs)) fn) (void)) -+{ -+ fn (); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_2_be_wrap.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_2_be_wrap.c -new file mode 100644 -index 000000000..85b7794d7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_2_be_wrap.c -@@ -0,0 +1,271 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -mbig-endian -fshrink-wrap -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ -+ -+void standard_callee (void); -+__attribute__((aarch64_vector_pcs)) void vpcs_callee (void); -+ -+/* -+** calls_standard: -+** stp x29, x30, \[sp, -16\]! -+** mov x29, sp -+** addvl sp, sp, #-17 -+** str p4, \[sp\] -+** str p5, \[sp, #1, mul vl\] -+** str p6, \[sp, #2, mul vl\] -+** str p7, \[sp, #3, mul vl\] -+** str p8, \[sp, #4, mul vl\] -+** str p9, \[sp, #5, mul vl\] -+** str p10, \[sp, #6, mul vl\] -+** str p11, \[sp, #7, mul vl\] -+** ptrue p0\.b, all -+** st1d z8\.d, p0, \[sp, #1, mul vl\] -+** st1d z9\.d, p0, \[sp, #2, mul vl\] -+** st1d z10\.d, p0, \[sp, #3, mul vl\] -+** st1d z11\.d, p0, \[sp, #4, mul vl\] -+** st1d z12\.d, p0, \[sp, #5, mul vl\] -+** st1d z13\.d, p0, \[sp, #6, mul vl\] -+** st1d z14\.d, p0, \[sp, #7, mul vl\] -+** addvl x11, sp, #16 -+** st1d z15\.d, p0, \[x11, #-8, mul vl\] -+** str z16, \[sp, #9, mul vl\] -+** str z17, \[sp, #10, mul vl\] -+** str z18, \[sp, #11, mul vl\] -+** str z19, \[sp, #12, mul vl\] -+** str z20, \[sp, #13, mul vl\] -+** str z21, \[sp, #14, mul vl\] -+** str z22, \[sp, #15, mul vl\] -+** str z23, \[sp, #16, mul vl\] -+** bl standard_callee -+** ptrue p0\.b, all -+** ld1d z8\.d, p0/z, \[sp, #1, mul vl\] -+** ld1d z9\.d, p0/z, \[sp, #2, mul vl\] -+** ld1d z10\.d, p0/z, \[sp, #3, mul vl\] -+** ld1d z11\.d, p0/z, \[sp, #4, mul vl\] -+** ld1d z12\.d, p0/z, \[sp, #5, mul vl\] -+** ld1d z13\.d, p0/z, \[sp, #6, mul vl\] -+** ld1d z14\.d, p0/z, \[sp, #7, mul vl\] -+** addvl x11, sp, #16 -+** ld1d z15\.d, p0/z, \[x11, #-8, mul vl\] -+** ldr z16, \[sp, #9, mul vl\] -+** ldr z17, \[sp, #10, mul vl\] -+** ldr z18, \[sp, #11, mul vl\] -+** ldr z19, \[sp, #12, mul vl\] -+** ldr z20, \[sp, #13, mul vl\] -+** ldr z21, \[sp, #14, mul vl\] -+** ldr z22, \[sp, #15, mul vl\] -+** ldr z23, \[sp, #16, mul vl\] -+** ldr p4, \[sp\] -+** ldr p5, \[sp, #1, mul vl\] -+** ldr p6, \[sp, #2, mul vl\] -+** ldr p7, \[sp, #3, mul vl\] -+** ldr p8, \[sp, #4, mul vl\] -+** ldr p9, \[sp, #5, mul vl\] -+** ldr p10, \[sp, #6, mul vl\] -+** ldr p11, \[sp, #7, mul vl\] -+** addvl sp, sp, #17 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+void calls_standard (__SVInt8_t x) { standard_callee (); } -+ -+/* -+** calls_vpcs: -+** stp x29, x30, \[sp, -16\]! -+** mov x29, sp -+** addvl sp, sp, #-17 -+** str p4, \[sp\] -+** str p5, \[sp, #1, mul vl\] -+** str p6, \[sp, #2, mul vl\] -+** str p7, \[sp, #3, mul vl\] -+** str p8, \[sp, #4, mul vl\] -+** str p9, \[sp, #5, mul vl\] -+** str p10, \[sp, #6, mul vl\] -+** str p11, \[sp, #7, mul vl\] -+** ptrue p0\.b, all -+** st1d z8\.d, p0, \[sp, #1, mul vl\] -+** st1d z9\.d, p0, \[sp, #2, mul vl\] -+** st1d z10\.d, p0, \[sp, #3, mul vl\] -+** st1d z11\.d, p0, \[sp, #4, mul vl\] -+** st1d z12\.d, p0, \[sp, #5, mul vl\] -+** st1d z13\.d, p0, \[sp, #6, mul vl\] -+** st1d z14\.d, p0, \[sp, #7, mul vl\] -+** addvl x11, sp, #16 -+** st1d z15\.d, p0, \[x11, #-8, mul vl\] -+** str z16, \[sp, #9, mul vl\] -+** str z17, \[sp, #10, mul vl\] -+** str z18, \[sp, #11, mul vl\] -+** str z19, \[sp, #12, mul vl\] -+** str z20, \[sp, #13, mul vl\] -+** str z21, \[sp, #14, mul vl\] -+** str z22, \[sp, #15, mul vl\] -+** str z23, \[sp, #16, mul vl\] -+** bl vpcs_callee -+** ptrue p0\.b, all -+** ld1d z8\.d, p0/z, \[sp, #1, mul vl\] -+** ld1d z9\.d, p0/z, \[sp, #2, mul vl\] -+** ld1d z10\.d, p0/z, \[sp, #3, mul vl\] -+** ld1d z11\.d, p0/z, \[sp, #4, mul vl\] -+** ld1d z12\.d, p0/z, \[sp, #5, mul vl\] -+** ld1d z13\.d, p0/z, \[sp, #6, mul vl\] -+** ld1d z14\.d, p0/z, \[sp, #7, mul vl\] -+** addvl x11, sp, #16 -+** ld1d z15\.d, p0/z, \[x11, #-8, mul vl\] -+** ldr z16, \[sp, #9, mul vl\] -+** ldr z17, \[sp, #10, mul vl\] -+** ldr z18, \[sp, #11, mul vl\] -+** ldr z19, \[sp, #12, mul vl\] -+** ldr z20, \[sp, #13, mul vl\] -+** ldr z21, \[sp, #14, mul vl\] -+** ldr z22, \[sp, #15, mul vl\] -+** ldr z23, \[sp, #16, mul vl\] -+** ldr p4, \[sp\] -+** ldr p5, \[sp, #1, mul vl\] -+** ldr p6, \[sp, #2, mul vl\] -+** ldr p7, \[sp, #3, mul vl\] -+** ldr p8, \[sp, #4, mul vl\] -+** ldr p9, \[sp, #5, mul vl\] -+** ldr p10, \[sp, #6, mul vl\] -+** ldr p11, \[sp, #7, mul vl\] -+** addvl sp, sp, #17 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+void calls_vpcs (__SVInt8_t x) { vpcs_callee (); } -+ -+/* -+** calls_standard_ptr: -+** stp x29, x30, \[sp, -16\]! -+** mov x29, sp -+** addvl sp, sp, #-17 -+** str p4, \[sp\] -+** str p5, \[sp, #1, mul vl\] -+** str p6, \[sp, #2, mul vl\] -+** str p7, \[sp, #3, mul vl\] -+** str p8, \[sp, #4, mul vl\] -+** str p9, \[sp, #5, mul vl\] -+** str p10, \[sp, #6, mul vl\] -+** str p11, \[sp, #7, mul vl\] -+** ptrue p0\.b, all -+** st1d z8\.d, p0, \[sp, #1, mul vl\] -+** st1d z9\.d, p0, \[sp, #2, mul vl\] -+** st1d z10\.d, p0, \[sp, #3, mul vl\] -+** st1d z11\.d, p0, \[sp, #4, mul vl\] -+** st1d z12\.d, p0, \[sp, #5, mul vl\] -+** st1d z13\.d, p0, \[sp, #6, mul vl\] -+** st1d z14\.d, p0, \[sp, #7, mul vl\] -+** addvl x11, sp, #16 -+** st1d z15\.d, p0, \[x11, #-8, mul vl\] -+** str z16, \[sp, #9, mul vl\] -+** str z17, \[sp, #10, mul vl\] -+** str z18, \[sp, #11, mul vl\] -+** str z19, \[sp, #12, mul vl\] -+** str z20, \[sp, #13, mul vl\] -+** str z21, \[sp, #14, mul vl\] -+** str z22, \[sp, #15, mul vl\] -+** str z23, \[sp, #16, mul vl\] -+** blr x0 -+** ptrue p0\.b, all -+** ld1d z8\.d, p0/z, \[sp, #1, mul vl\] -+** ld1d z9\.d, p0/z, \[sp, #2, mul vl\] -+** ld1d z10\.d, p0/z, \[sp, #3, mul vl\] -+** ld1d z11\.d, p0/z, \[sp, #4, mul vl\] -+** ld1d z12\.d, p0/z, \[sp, #5, mul vl\] -+** ld1d z13\.d, p0/z, \[sp, #6, mul vl\] -+** ld1d z14\.d, p0/z, \[sp, #7, mul vl\] -+** addvl x11, sp, #16 -+** ld1d z15\.d, p0/z, \[x11, #-8, mul vl\] -+** ldr z16, \[sp, #9, mul vl\] -+** ldr z17, \[sp, #10, mul vl\] -+** ldr z18, \[sp, #11, mul vl\] -+** ldr z19, \[sp, #12, mul vl\] -+** ldr z20, \[sp, #13, mul vl\] -+** ldr z21, \[sp, #14, mul vl\] -+** ldr z22, \[sp, #15, mul vl\] -+** ldr z23, \[sp, #16, mul vl\] -+** ldr p4, \[sp\] -+** ldr p5, \[sp, #1, mul vl\] -+** ldr p6, \[sp, #2, mul vl\] -+** ldr p7, \[sp, #3, mul vl\] -+** ldr p8, \[sp, #4, mul vl\] -+** ldr p9, \[sp, #5, mul vl\] -+** ldr p10, \[sp, #6, mul vl\] -+** ldr p11, \[sp, #7, mul vl\] -+** addvl sp, sp, #17 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+void -+calls_standard_ptr (__SVInt8_t x, void (*fn) (void)) -+{ -+ fn (); -+} -+ -+/* -+** calls_vpcs_ptr: -+** stp x29, x30, \[sp, -16\]! -+** mov x29, sp -+** addvl sp, sp, #-17 -+** str p4, \[sp\] -+** str p5, \[sp, #1, mul vl\] -+** str p6, \[sp, #2, mul vl\] -+** str p7, \[sp, #3, mul vl\] -+** str p8, \[sp, #4, mul vl\] -+** str p9, \[sp, #5, mul vl\] -+** str p10, \[sp, #6, mul vl\] -+** str p11, \[sp, #7, mul vl\] -+** ptrue p0\.b, all -+** st1d z8\.d, p0, \[sp, #1, mul vl\] -+** st1d z9\.d, p0, \[sp, #2, mul vl\] -+** st1d z10\.d, p0, \[sp, #3, mul vl\] -+** st1d z11\.d, p0, \[sp, #4, mul vl\] -+** st1d z12\.d, p0, \[sp, #5, mul vl\] -+** st1d z13\.d, p0, \[sp, #6, mul vl\] -+** st1d z14\.d, p0, \[sp, #7, mul vl\] -+** addvl x11, sp, #16 -+** st1d z15\.d, p0, \[x11, #-8, mul vl\] -+** str z16, \[sp, #9, mul vl\] -+** str z17, \[sp, #10, mul vl\] -+** str z18, \[sp, #11, mul vl\] -+** str z19, \[sp, #12, mul vl\] -+** str z20, \[sp, #13, mul vl\] -+** str z21, \[sp, #14, mul vl\] -+** str z22, \[sp, #15, mul vl\] -+** str z23, \[sp, #16, mul vl\] -+** blr x0 -+** ptrue p0\.b, all -+** ld1d z8\.d, p0/z, \[sp, #1, mul vl\] -+** ld1d z9\.d, p0/z, \[sp, #2, mul vl\] -+** ld1d z10\.d, p0/z, \[sp, #3, mul vl\] -+** ld1d z11\.d, p0/z, \[sp, #4, mul vl\] -+** ld1d z12\.d, p0/z, \[sp, #5, mul vl\] -+** ld1d z13\.d, p0/z, \[sp, #6, mul vl\] -+** ld1d z14\.d, p0/z, \[sp, #7, mul vl\] -+** addvl x11, sp, #16 -+** ld1d z15\.d, p0/z, \[x11, #-8, mul vl\] -+** ldr z16, \[sp, #9, mul vl\] -+** ldr z17, \[sp, #10, mul vl\] -+** ldr z18, \[sp, #11, mul vl\] -+** ldr z19, \[sp, #12, mul vl\] -+** ldr z20, \[sp, #13, mul vl\] -+** ldr z21, \[sp, #14, mul vl\] -+** ldr z22, \[sp, #15, mul vl\] -+** ldr z23, \[sp, #16, mul vl\] -+** ldr p4, \[sp\] -+** ldr p5, \[sp, #1, mul vl\] -+** ldr p6, \[sp, #2, mul vl\] -+** ldr p7, \[sp, #3, mul vl\] -+** ldr p8, \[sp, #4, mul vl\] -+** ldr p9, \[sp, #5, mul vl\] -+** ldr p10, \[sp, #6, mul vl\] -+** ldr p11, \[sp, #7, mul vl\] -+** addvl sp, sp, #17 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+void -+calls_vpcs_ptr (__SVInt8_t x, -+ void (*__attribute__((aarch64_vector_pcs)) fn) (void)) -+{ -+ fn (); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_2_le_nowrap.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_2_le_nowrap.c -new file mode 100644 -index 000000000..0fcd357a0 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_2_le_nowrap.c -@@ -0,0 +1,255 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -mlittle-endian -fno-shrink-wrap -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ -+ -+void standard_callee (void); -+__attribute__((aarch64_vector_pcs)) void vpcs_callee (void); -+ -+/* -+** calls_standard: -+** stp x29, x30, \[sp, -16\]! -+** mov x29, sp -+** addvl sp, sp, #-17 -+** str p4, \[sp\] -+** str p5, \[sp, #1, mul vl\] -+** str p6, \[sp, #2, mul vl\] -+** str p7, \[sp, #3, mul vl\] -+** str p8, \[sp, #4, mul vl\] -+** str p9, \[sp, #5, mul vl\] -+** str p10, \[sp, #6, mul vl\] -+** str p11, \[sp, #7, mul vl\] -+** str z8, \[sp, #1, mul vl\] -+** str z9, \[sp, #2, mul vl\] -+** str z10, \[sp, #3, mul vl\] -+** str z11, \[sp, #4, mul vl\] -+** str z12, \[sp, #5, mul vl\] -+** str z13, \[sp, #6, mul vl\] -+** str z14, \[sp, #7, mul vl\] -+** str z15, \[sp, #8, mul vl\] -+** str z16, \[sp, #9, mul vl\] -+** str z17, \[sp, #10, mul vl\] -+** str z18, \[sp, #11, mul vl\] -+** str z19, \[sp, #12, mul vl\] -+** str z20, \[sp, #13, mul vl\] -+** str z21, \[sp, #14, mul vl\] -+** str z22, \[sp, #15, mul vl\] -+** str z23, \[sp, #16, mul vl\] -+** bl standard_callee -+** ldr z8, \[sp, #1, mul vl\] -+** ldr z9, \[sp, #2, mul vl\] -+** ldr z10, \[sp, #3, mul vl\] -+** ldr z11, \[sp, #4, mul vl\] -+** ldr z12, \[sp, #5, mul vl\] -+** ldr z13, \[sp, #6, mul vl\] -+** ldr z14, \[sp, #7, mul vl\] -+** ldr z15, \[sp, #8, mul vl\] -+** ldr z16, \[sp, #9, mul vl\] -+** ldr z17, \[sp, #10, mul vl\] -+** ldr z18, \[sp, #11, mul vl\] -+** ldr z19, \[sp, #12, mul vl\] -+** ldr z20, \[sp, #13, mul vl\] -+** ldr z21, \[sp, #14, mul vl\] -+** ldr z22, \[sp, #15, mul vl\] -+** ldr z23, \[sp, #16, mul vl\] -+** ldr p4, \[sp\] -+** ldr p5, \[sp, #1, mul vl\] -+** ldr p6, \[sp, #2, mul vl\] -+** ldr p7, \[sp, #3, mul vl\] -+** ldr p8, \[sp, #4, mul vl\] -+** ldr p9, \[sp, #5, mul vl\] -+** ldr p10, \[sp, #6, mul vl\] -+** ldr p11, \[sp, #7, mul vl\] -+** addvl sp, sp, #17 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+void calls_standard (__SVInt8_t x) { standard_callee (); } -+ -+/* -+** calls_vpcs: -+** stp x29, x30, \[sp, -16\]! -+** mov x29, sp -+** addvl sp, sp, #-17 -+** str p4, \[sp\] -+** str p5, \[sp, #1, mul vl\] -+** str p6, \[sp, #2, mul vl\] -+** str p7, \[sp, #3, mul vl\] -+** str p8, \[sp, #4, mul vl\] -+** str p9, \[sp, #5, mul vl\] -+** str p10, \[sp, #6, mul vl\] -+** str p11, \[sp, #7, mul vl\] -+** str z8, \[sp, #1, mul vl\] -+** str z9, \[sp, #2, mul vl\] -+** str z10, \[sp, #3, mul vl\] -+** str z11, \[sp, #4, mul vl\] -+** str z12, \[sp, #5, mul vl\] -+** str z13, \[sp, #6, mul vl\] -+** str z14, \[sp, #7, mul vl\] -+** str z15, \[sp, #8, mul vl\] -+** str z16, \[sp, #9, mul vl\] -+** str z17, \[sp, #10, mul vl\] -+** str z18, \[sp, #11, mul vl\] -+** str z19, \[sp, #12, mul vl\] -+** str z20, \[sp, #13, mul vl\] -+** str z21, \[sp, #14, mul vl\] -+** str z22, \[sp, #15, mul vl\] -+** str z23, \[sp, #16, mul vl\] -+** bl vpcs_callee -+** ldr z8, \[sp, #1, mul vl\] -+** ldr z9, \[sp, #2, mul vl\] -+** ldr z10, \[sp, #3, mul vl\] -+** ldr z11, \[sp, #4, mul vl\] -+** ldr z12, \[sp, #5, mul vl\] -+** ldr z13, \[sp, #6, mul vl\] -+** ldr z14, \[sp, #7, mul vl\] -+** ldr z15, \[sp, #8, mul vl\] -+** ldr z16, \[sp, #9, mul vl\] -+** ldr z17, \[sp, #10, mul vl\] -+** ldr z18, \[sp, #11, mul vl\] -+** ldr z19, \[sp, #12, mul vl\] -+** ldr z20, \[sp, #13, mul vl\] -+** ldr z21, \[sp, #14, mul vl\] -+** ldr z22, \[sp, #15, mul vl\] -+** ldr z23, \[sp, #16, mul vl\] -+** ldr p4, \[sp\] -+** ldr p5, \[sp, #1, mul vl\] -+** ldr p6, \[sp, #2, mul vl\] -+** ldr p7, \[sp, #3, mul vl\] -+** ldr p8, \[sp, #4, mul vl\] -+** ldr p9, \[sp, #5, mul vl\] -+** ldr p10, \[sp, #6, mul vl\] -+** ldr p11, \[sp, #7, mul vl\] -+** addvl sp, sp, #17 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+void calls_vpcs (__SVInt8_t x) { vpcs_callee (); } -+ -+/* -+** calls_standard_ptr: -+** stp x29, x30, \[sp, -16\]! -+** mov x29, sp -+** addvl sp, sp, #-17 -+** str p4, \[sp\] -+** str p5, \[sp, #1, mul vl\] -+** str p6, \[sp, #2, mul vl\] -+** str p7, \[sp, #3, mul vl\] -+** str p8, \[sp, #4, mul vl\] -+** str p9, \[sp, #5, mul vl\] -+** str p10, \[sp, #6, mul vl\] -+** str p11, \[sp, #7, mul vl\] -+** str z8, \[sp, #1, mul vl\] -+** str z9, \[sp, #2, mul vl\] -+** str z10, \[sp, #3, mul vl\] -+** str z11, \[sp, #4, mul vl\] -+** str z12, \[sp, #5, mul vl\] -+** str z13, \[sp, #6, mul vl\] -+** str z14, \[sp, #7, mul vl\] -+** str z15, \[sp, #8, mul vl\] -+** str z16, \[sp, #9, mul vl\] -+** str z17, \[sp, #10, mul vl\] -+** str z18, \[sp, #11, mul vl\] -+** str z19, \[sp, #12, mul vl\] -+** str z20, \[sp, #13, mul vl\] -+** str z21, \[sp, #14, mul vl\] -+** str z22, \[sp, #15, mul vl\] -+** str z23, \[sp, #16, mul vl\] -+** blr x0 -+** ldr z8, \[sp, #1, mul vl\] -+** ldr z9, \[sp, #2, mul vl\] -+** ldr z10, \[sp, #3, mul vl\] -+** ldr z11, \[sp, #4, mul vl\] -+** ldr z12, \[sp, #5, mul vl\] -+** ldr z13, \[sp, #6, mul vl\] -+** ldr z14, \[sp, #7, mul vl\] -+** ldr z15, \[sp, #8, mul vl\] -+** ldr z16, \[sp, #9, mul vl\] -+** ldr z17, \[sp, #10, mul vl\] -+** ldr z18, \[sp, #11, mul vl\] -+** ldr z19, \[sp, #12, mul vl\] -+** ldr z20, \[sp, #13, mul vl\] -+** ldr z21, \[sp, #14, mul vl\] -+** ldr z22, \[sp, #15, mul vl\] -+** ldr z23, \[sp, #16, mul vl\] -+** ldr p4, \[sp\] -+** ldr p5, \[sp, #1, mul vl\] -+** ldr p6, \[sp, #2, mul vl\] -+** ldr p7, \[sp, #3, mul vl\] -+** ldr p8, \[sp, #4, mul vl\] -+** ldr p9, \[sp, #5, mul vl\] -+** ldr p10, \[sp, #6, mul vl\] -+** ldr p11, \[sp, #7, mul vl\] -+** addvl sp, sp, #17 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+void -+calls_standard_ptr (__SVInt8_t x, void (*fn) (void)) -+{ -+ fn (); -+} -+ -+/* -+** calls_vpcs_ptr: -+** stp x29, x30, \[sp, -16\]! -+** mov x29, sp -+** addvl sp, sp, #-17 -+** str p4, \[sp\] -+** str p5, \[sp, #1, mul vl\] -+** str p6, \[sp, #2, mul vl\] -+** str p7, \[sp, #3, mul vl\] -+** str p8, \[sp, #4, mul vl\] -+** str p9, \[sp, #5, mul vl\] -+** str p10, \[sp, #6, mul vl\] -+** str p11, \[sp, #7, mul vl\] -+** str z8, \[sp, #1, mul vl\] -+** str z9, \[sp, #2, mul vl\] -+** str z10, \[sp, #3, mul vl\] -+** str z11, \[sp, #4, mul vl\] -+** str z12, \[sp, #5, mul vl\] -+** str z13, \[sp, #6, mul vl\] -+** str z14, \[sp, #7, mul vl\] -+** str z15, \[sp, #8, mul vl\] -+** str z16, \[sp, #9, mul vl\] -+** str z17, \[sp, #10, mul vl\] -+** str z18, \[sp, #11, mul vl\] -+** str z19, \[sp, #12, mul vl\] -+** str z20, \[sp, #13, mul vl\] -+** str z21, \[sp, #14, mul vl\] -+** str z22, \[sp, #15, mul vl\] -+** str z23, \[sp, #16, mul vl\] -+** blr x0 -+** ldr z8, \[sp, #1, mul vl\] -+** ldr z9, \[sp, #2, mul vl\] -+** ldr z10, \[sp, #3, mul vl\] -+** ldr z11, \[sp, #4, mul vl\] -+** ldr z12, \[sp, #5, mul vl\] -+** ldr z13, \[sp, #6, mul vl\] -+** ldr z14, \[sp, #7, mul vl\] -+** ldr z15, \[sp, #8, mul vl\] -+** ldr z16, \[sp, #9, mul vl\] -+** ldr z17, \[sp, #10, mul vl\] -+** ldr z18, \[sp, #11, mul vl\] -+** ldr z19, \[sp, #12, mul vl\] -+** ldr z20, \[sp, #13, mul vl\] -+** ldr z21, \[sp, #14, mul vl\] -+** ldr z22, \[sp, #15, mul vl\] -+** ldr z23, \[sp, #16, mul vl\] -+** ldr p4, \[sp\] -+** ldr p5, \[sp, #1, mul vl\] -+** ldr p6, \[sp, #2, mul vl\] -+** ldr p7, \[sp, #3, mul vl\] -+** ldr p8, \[sp, #4, mul vl\] -+** ldr p9, \[sp, #5, mul vl\] -+** ldr p10, \[sp, #6, mul vl\] -+** ldr p11, \[sp, #7, mul vl\] -+** addvl sp, sp, #17 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+void -+calls_vpcs_ptr (__SVInt8_t x, -+ void (*__attribute__((aarch64_vector_pcs)) fn) (void)) -+{ -+ fn (); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_2_le_wrap.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_2_le_wrap.c -new file mode 100644 -index 000000000..e81194c74 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_2_le_wrap.c -@@ -0,0 +1,255 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -mlittle-endian -fshrink-wrap -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ -+ -+void standard_callee (void); -+__attribute__((aarch64_vector_pcs)) void vpcs_callee (void); -+ -+/* -+** calls_standard: -+** stp x29, x30, \[sp, -16\]! -+** mov x29, sp -+** addvl sp, sp, #-17 -+** str p4, \[sp\] -+** str p5, \[sp, #1, mul vl\] -+** str p6, \[sp, #2, mul vl\] -+** str p7, \[sp, #3, mul vl\] -+** str p8, \[sp, #4, mul vl\] -+** str p9, \[sp, #5, mul vl\] -+** str p10, \[sp, #6, mul vl\] -+** str p11, \[sp, #7, mul vl\] -+** str z8, \[sp, #1, mul vl\] -+** str z9, \[sp, #2, mul vl\] -+** str z10, \[sp, #3, mul vl\] -+** str z11, \[sp, #4, mul vl\] -+** str z12, \[sp, #5, mul vl\] -+** str z13, \[sp, #6, mul vl\] -+** str z14, \[sp, #7, mul vl\] -+** str z15, \[sp, #8, mul vl\] -+** str z16, \[sp, #9, mul vl\] -+** str z17, \[sp, #10, mul vl\] -+** str z18, \[sp, #11, mul vl\] -+** str z19, \[sp, #12, mul vl\] -+** str z20, \[sp, #13, mul vl\] -+** str z21, \[sp, #14, mul vl\] -+** str z22, \[sp, #15, mul vl\] -+** str z23, \[sp, #16, mul vl\] -+** bl standard_callee -+** ldr z8, \[sp, #1, mul vl\] -+** ldr z9, \[sp, #2, mul vl\] -+** ldr z10, \[sp, #3, mul vl\] -+** ldr z11, \[sp, #4, mul vl\] -+** ldr z12, \[sp, #5, mul vl\] -+** ldr z13, \[sp, #6, mul vl\] -+** ldr z14, \[sp, #7, mul vl\] -+** ldr z15, \[sp, #8, mul vl\] -+** ldr z16, \[sp, #9, mul vl\] -+** ldr z17, \[sp, #10, mul vl\] -+** ldr z18, \[sp, #11, mul vl\] -+** ldr z19, \[sp, #12, mul vl\] -+** ldr z20, \[sp, #13, mul vl\] -+** ldr z21, \[sp, #14, mul vl\] -+** ldr z22, \[sp, #15, mul vl\] -+** ldr z23, \[sp, #16, mul vl\] -+** ldr p4, \[sp\] -+** ldr p5, \[sp, #1, mul vl\] -+** ldr p6, \[sp, #2, mul vl\] -+** ldr p7, \[sp, #3, mul vl\] -+** ldr p8, \[sp, #4, mul vl\] -+** ldr p9, \[sp, #5, mul vl\] -+** ldr p10, \[sp, #6, mul vl\] -+** ldr p11, \[sp, #7, mul vl\] -+** addvl sp, sp, #17 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+void calls_standard (__SVInt8_t x) { standard_callee (); } -+ -+/* -+** calls_vpcs: -+** stp x29, x30, \[sp, -16\]! -+** mov x29, sp -+** addvl sp, sp, #-17 -+** str p4, \[sp\] -+** str p5, \[sp, #1, mul vl\] -+** str p6, \[sp, #2, mul vl\] -+** str p7, \[sp, #3, mul vl\] -+** str p8, \[sp, #4, mul vl\] -+** str p9, \[sp, #5, mul vl\] -+** str p10, \[sp, #6, mul vl\] -+** str p11, \[sp, #7, mul vl\] -+** str z8, \[sp, #1, mul vl\] -+** str z9, \[sp, #2, mul vl\] -+** str z10, \[sp, #3, mul vl\] -+** str z11, \[sp, #4, mul vl\] -+** str z12, \[sp, #5, mul vl\] -+** str z13, \[sp, #6, mul vl\] -+** str z14, \[sp, #7, mul vl\] -+** str z15, \[sp, #8, mul vl\] -+** str z16, \[sp, #9, mul vl\] -+** str z17, \[sp, #10, mul vl\] -+** str z18, \[sp, #11, mul vl\] -+** str z19, \[sp, #12, mul vl\] -+** str z20, \[sp, #13, mul vl\] -+** str z21, \[sp, #14, mul vl\] -+** str z22, \[sp, #15, mul vl\] -+** str z23, \[sp, #16, mul vl\] -+** bl vpcs_callee -+** ldr z8, \[sp, #1, mul vl\] -+** ldr z9, \[sp, #2, mul vl\] -+** ldr z10, \[sp, #3, mul vl\] -+** ldr z11, \[sp, #4, mul vl\] -+** ldr z12, \[sp, #5, mul vl\] -+** ldr z13, \[sp, #6, mul vl\] -+** ldr z14, \[sp, #7, mul vl\] -+** ldr z15, \[sp, #8, mul vl\] -+** ldr z16, \[sp, #9, mul vl\] -+** ldr z17, \[sp, #10, mul vl\] -+** ldr z18, \[sp, #11, mul vl\] -+** ldr z19, \[sp, #12, mul vl\] -+** ldr z20, \[sp, #13, mul vl\] -+** ldr z21, \[sp, #14, mul vl\] -+** ldr z22, \[sp, #15, mul vl\] -+** ldr z23, \[sp, #16, mul vl\] -+** ldr p4, \[sp\] -+** ldr p5, \[sp, #1, mul vl\] -+** ldr p6, \[sp, #2, mul vl\] -+** ldr p7, \[sp, #3, mul vl\] -+** ldr p8, \[sp, #4, mul vl\] -+** ldr p9, \[sp, #5, mul vl\] -+** ldr p10, \[sp, #6, mul vl\] -+** ldr p11, \[sp, #7, mul vl\] -+** addvl sp, sp, #17 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+void calls_vpcs (__SVInt8_t x) { vpcs_callee (); } -+ -+/* -+** calls_standard_ptr: -+** stp x29, x30, \[sp, -16\]! -+** mov x29, sp -+** addvl sp, sp, #-17 -+** str p4, \[sp\] -+** str p5, \[sp, #1, mul vl\] -+** str p6, \[sp, #2, mul vl\] -+** str p7, \[sp, #3, mul vl\] -+** str p8, \[sp, #4, mul vl\] -+** str p9, \[sp, #5, mul vl\] -+** str p10, \[sp, #6, mul vl\] -+** str p11, \[sp, #7, mul vl\] -+** str z8, \[sp, #1, mul vl\] -+** str z9, \[sp, #2, mul vl\] -+** str z10, \[sp, #3, mul vl\] -+** str z11, \[sp, #4, mul vl\] -+** str z12, \[sp, #5, mul vl\] -+** str z13, \[sp, #6, mul vl\] -+** str z14, \[sp, #7, mul vl\] -+** str z15, \[sp, #8, mul vl\] -+** str z16, \[sp, #9, mul vl\] -+** str z17, \[sp, #10, mul vl\] -+** str z18, \[sp, #11, mul vl\] -+** str z19, \[sp, #12, mul vl\] -+** str z20, \[sp, #13, mul vl\] -+** str z21, \[sp, #14, mul vl\] -+** str z22, \[sp, #15, mul vl\] -+** str z23, \[sp, #16, mul vl\] -+** blr x0 -+** ldr z8, \[sp, #1, mul vl\] -+** ldr z9, \[sp, #2, mul vl\] -+** ldr z10, \[sp, #3, mul vl\] -+** ldr z11, \[sp, #4, mul vl\] -+** ldr z12, \[sp, #5, mul vl\] -+** ldr z13, \[sp, #6, mul vl\] -+** ldr z14, \[sp, #7, mul vl\] -+** ldr z15, \[sp, #8, mul vl\] -+** ldr z16, \[sp, #9, mul vl\] -+** ldr z17, \[sp, #10, mul vl\] -+** ldr z18, \[sp, #11, mul vl\] -+** ldr z19, \[sp, #12, mul vl\] -+** ldr z20, \[sp, #13, mul vl\] -+** ldr z21, \[sp, #14, mul vl\] -+** ldr z22, \[sp, #15, mul vl\] -+** ldr z23, \[sp, #16, mul vl\] -+** ldr p4, \[sp\] -+** ldr p5, \[sp, #1, mul vl\] -+** ldr p6, \[sp, #2, mul vl\] -+** ldr p7, \[sp, #3, mul vl\] -+** ldr p8, \[sp, #4, mul vl\] -+** ldr p9, \[sp, #5, mul vl\] -+** ldr p10, \[sp, #6, mul vl\] -+** ldr p11, \[sp, #7, mul vl\] -+** addvl sp, sp, #17 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+void -+calls_standard_ptr (__SVInt8_t x, void (*fn) (void)) -+{ -+ fn (); -+} -+ -+/* -+** calls_vpcs_ptr: -+** stp x29, x30, \[sp, -16\]! -+** mov x29, sp -+** addvl sp, sp, #-17 -+** str p4, \[sp\] -+** str p5, \[sp, #1, mul vl\] -+** str p6, \[sp, #2, mul vl\] -+** str p7, \[sp, #3, mul vl\] -+** str p8, \[sp, #4, mul vl\] -+** str p9, \[sp, #5, mul vl\] -+** str p10, \[sp, #6, mul vl\] -+** str p11, \[sp, #7, mul vl\] -+** str z8, \[sp, #1, mul vl\] -+** str z9, \[sp, #2, mul vl\] -+** str z10, \[sp, #3, mul vl\] -+** str z11, \[sp, #4, mul vl\] -+** str z12, \[sp, #5, mul vl\] -+** str z13, \[sp, #6, mul vl\] -+** str z14, \[sp, #7, mul vl\] -+** str z15, \[sp, #8, mul vl\] -+** str z16, \[sp, #9, mul vl\] -+** str z17, \[sp, #10, mul vl\] -+** str z18, \[sp, #11, mul vl\] -+** str z19, \[sp, #12, mul vl\] -+** str z20, \[sp, #13, mul vl\] -+** str z21, \[sp, #14, mul vl\] -+** str z22, \[sp, #15, mul vl\] -+** str z23, \[sp, #16, mul vl\] -+** blr x0 -+** ldr z8, \[sp, #1, mul vl\] -+** ldr z9, \[sp, #2, mul vl\] -+** ldr z10, \[sp, #3, mul vl\] -+** ldr z11, \[sp, #4, mul vl\] -+** ldr z12, \[sp, #5, mul vl\] -+** ldr z13, \[sp, #6, mul vl\] -+** ldr z14, \[sp, #7, mul vl\] -+** ldr z15, \[sp, #8, mul vl\] -+** ldr z16, \[sp, #9, mul vl\] -+** ldr z17, \[sp, #10, mul vl\] -+** ldr z18, \[sp, #11, mul vl\] -+** ldr z19, \[sp, #12, mul vl\] -+** ldr z20, \[sp, #13, mul vl\] -+** ldr z21, \[sp, #14, mul vl\] -+** ldr z22, \[sp, #15, mul vl\] -+** ldr z23, \[sp, #16, mul vl\] -+** ldr p4, \[sp\] -+** ldr p5, \[sp, #1, mul vl\] -+** ldr p6, \[sp, #2, mul vl\] -+** ldr p7, \[sp, #3, mul vl\] -+** ldr p8, \[sp, #4, mul vl\] -+** ldr p9, \[sp, #5, mul vl\] -+** ldr p10, \[sp, #6, mul vl\] -+** ldr p11, \[sp, #7, mul vl\] -+** addvl sp, sp, #17 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+void -+calls_vpcs_ptr (__SVInt8_t x, -+ void (*__attribute__((aarch64_vector_pcs)) fn) (void)) -+{ -+ fn (); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_3.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_3.c -new file mode 100644 -index 000000000..1fe86b0ea ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_3.c -@@ -0,0 +1,92 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -g" } */ -+/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ -+ -+#include -+ -+int sve_callee (svint8_t); -+ -+/* -+** standard_caller: -+** stp x29, x30, \[sp, -16\]! -+** mov x29, sp -+** mov z0\.b, #1 -+** bl sve_callee -+** add w0, w0, #?1 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+int standard_caller (void) { return sve_callee (svdup_s8 (1)) + 1; } -+ -+/* -+** vpcs_caller: -+** stp x29, x30, \[sp, -16\]! -+** mov x29, sp -+** mov z0\.b, #1 -+** bl sve_callee -+** add w0, w0, #?1 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+__attribute__((aarch64_vector_pcs)) -+int vpcs_caller (void) { return sve_callee (svdup_s8 (1)) + 1; } -+ -+/* -+** sve_caller: -+** stp x29, x30, \[sp, -16\]! -+** mov x29, sp -+** mov z0\.b, #1 -+** bl sve_callee -+** add w0, w0, #?1 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+int sve_caller (svbool_t p0) { return sve_callee (svdup_s8 (1)) + 1; } -+ -+/* -+** standard_caller_ptr: -+** stp x29, x30, \[sp, -16\]! -+** mov x29, sp -+** mov z0\.h, #1 -+** blr x0 -+** add w0, w0, #?1 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+int -+standard_caller_ptr (int (*fn) (__SVInt16_t)) -+{ -+ return fn (svdup_s16 (1)) + 1; -+} -+ -+/* -+** vpcs_caller_ptr: -+** stp x29, x30, \[sp, -16\]! -+** mov x29, sp -+** mov z0\.h, #1 -+** blr x0 -+** add w0, w0, #?1 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+int __attribute__((aarch64_vector_pcs)) -+vpcs_caller_ptr (int (*fn) (__SVInt16_t)) -+{ -+ return fn (svdup_s16 (1)) + 1; -+} -+ -+/* -+** sve_caller_ptr: -+** stp x29, x30, \[sp, -16\]! -+** mov x29, sp -+** mov z0\.h, #1 -+** blr x0 -+** add w0, w0, #?1 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+int -+sve_caller_ptr (svbool_t pg, int (*fn) (svint16_t)) -+{ -+ return fn (svdup_s16 (1)) + 1; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_4_be.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_4_be.c -new file mode 100644 -index 000000000..c42699dc7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_4_be.c -@@ -0,0 +1,84 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ -+ -+void standard_callee (__SVInt8_t *); -+ -+/* -+** calls_standard: -+** addvl sp, sp, #-1 -+** ( -+** stp x29, x30, \[sp, -16\]! -+** | -+** sub sp, sp, #?16 -+** stp x29, x30, \[sp\] -+** ) -+** mov x29, sp -+** addvl sp, sp, #-17 -+** str p4, \[sp\] -+** str p5, \[sp, #1, mul vl\] -+** str p6, \[sp, #2, mul vl\] -+** str p7, \[sp, #3, mul vl\] -+** str p8, \[sp, #4, mul vl\] -+** str p9, \[sp, #5, mul vl\] -+** str p10, \[sp, #6, mul vl\] -+** str p11, \[sp, #7, mul vl\] -+** ptrue p0\.b, all -+** st1d z8\.d, p0, \[sp, #1, mul vl\] -+** st1d z9\.d, p0, \[sp, #2, mul vl\] -+** st1d z10\.d, p0, \[sp, #3, mul vl\] -+** st1d z11\.d, p0, \[sp, #4, mul vl\] -+** st1d z12\.d, p0, \[sp, #5, mul vl\] -+** st1d z13\.d, p0, \[sp, #6, mul vl\] -+** st1d z14\.d, p0, \[sp, #7, mul vl\] -+** addvl x11, sp, #16 -+** st1d z15\.d, p0, \[x11, #-8, mul vl\] -+** str z16, \[sp, #9, mul vl\] -+** str z17, \[sp, #10, mul vl\] -+** str z18, \[sp, #11, mul vl\] -+** str z19, \[sp, #12, mul vl\] -+** str z20, \[sp, #13, mul vl\] -+** str z21, \[sp, #14, mul vl\] -+** str z22, \[sp, #15, mul vl\] -+** str z23, \[sp, #16, mul vl\] -+** addvl x0, sp, #17 -+** add x0, x0, #?16 -+** bl standard_callee -+** ptrue p0\.b, all -+** ld1d z8\.d, p0/z, \[sp, #1, mul vl\] -+** ld1d z9\.d, p0/z, \[sp, #2, mul vl\] -+** ld1d z10\.d, p0/z, \[sp, #3, mul vl\] -+** ld1d z11\.d, p0/z, \[sp, #4, mul vl\] -+** ld1d z12\.d, p0/z, \[sp, #5, mul vl\] -+** ld1d z13\.d, p0/z, \[sp, #6, mul vl\] -+** ld1d z14\.d, p0/z, \[sp, #7, mul vl\] -+** addvl x11, sp, #16 -+** ld1d z15\.d, p0/z, \[x11, #-8, mul vl\] -+** ldr z16, \[sp, #9, mul vl\] -+** ldr z17, \[sp, #10, mul vl\] -+** ldr z18, \[sp, #11, mul vl\] -+** ldr z19, \[sp, #12, mul vl\] -+** ldr z20, \[sp, #13, mul vl\] -+** ldr z21, \[sp, #14, mul vl\] -+** ldr z22, \[sp, #15, mul vl\] -+** ldr z23, \[sp, #16, mul vl\] -+** ldr p4, \[sp\] -+** ldr p5, \[sp, #1, mul vl\] -+** ldr p6, \[sp, #2, mul vl\] -+** ldr p7, \[sp, #3, mul vl\] -+** ldr p8, \[sp, #4, mul vl\] -+** ldr p9, \[sp, #5, mul vl\] -+** ldr p10, \[sp, #6, mul vl\] -+** ldr p11, \[sp, #7, mul vl\] -+** addvl sp, sp, #17 -+** ( -+** ldp x29, x30, \[sp\], 16 -+** addvl sp, sp, #1 -+** | -+** ldp x29, x30, \[sp\] -+** addvl sp, sp, #1 -+** add sp, sp, #?16 -+** ) -+** ret -+*/ -+void calls_standard (__SVInt8_t x) { __SVInt8_t tmp; standard_callee (&tmp); } -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_4_le.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_4_le.c -new file mode 100644 -index 000000000..49fe96800 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_4_le.c -@@ -0,0 +1,80 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ -+ -+void standard_callee (__SVInt8_t *); -+ -+/* -+** calls_standard: -+** addvl sp, sp, #-1 -+** ( -+** stp x29, x30, \[sp, -16\]! -+** | -+** sub sp, sp, #?16 -+** stp x29, x30, \[sp\] -+** ) -+** mov x29, sp -+** addvl sp, sp, #-17 -+** str p4, \[sp\] -+** str p5, \[sp, #1, mul vl\] -+** str p6, \[sp, #2, mul vl\] -+** str p7, \[sp, #3, mul vl\] -+** str p8, \[sp, #4, mul vl\] -+** str p9, \[sp, #5, mul vl\] -+** str p10, \[sp, #6, mul vl\] -+** str p11, \[sp, #7, mul vl\] -+** str z8, \[sp, #1, mul vl\] -+** str z9, \[sp, #2, mul vl\] -+** str z10, \[sp, #3, mul vl\] -+** str z11, \[sp, #4, mul vl\] -+** str z12, \[sp, #5, mul vl\] -+** str z13, \[sp, #6, mul vl\] -+** str z14, \[sp, #7, mul vl\] -+** str z15, \[sp, #8, mul vl\] -+** str z16, \[sp, #9, mul vl\] -+** str z17, \[sp, #10, mul vl\] -+** str z18, \[sp, #11, mul vl\] -+** str z19, \[sp, #12, mul vl\] -+** str z20, \[sp, #13, mul vl\] -+** str z21, \[sp, #14, mul vl\] -+** str z22, \[sp, #15, mul vl\] -+** str z23, \[sp, #16, mul vl\] -+** addvl x0, sp, #17 -+** add x0, x0, #?16 -+** bl standard_callee -+** ldr z8, \[sp, #1, mul vl\] -+** ldr z9, \[sp, #2, mul vl\] -+** ldr z10, \[sp, #3, mul vl\] -+** ldr z11, \[sp, #4, mul vl\] -+** ldr z12, \[sp, #5, mul vl\] -+** ldr z13, \[sp, #6, mul vl\] -+** ldr z14, \[sp, #7, mul vl\] -+** ldr z15, \[sp, #8, mul vl\] -+** ldr z16, \[sp, #9, mul vl\] -+** ldr z17, \[sp, #10, mul vl\] -+** ldr z18, \[sp, #11, mul vl\] -+** ldr z19, \[sp, #12, mul vl\] -+** ldr z20, \[sp, #13, mul vl\] -+** ldr z21, \[sp, #14, mul vl\] -+** ldr z22, \[sp, #15, mul vl\] -+** ldr z23, \[sp, #16, mul vl\] -+** ldr p4, \[sp\] -+** ldr p5, \[sp, #1, mul vl\] -+** ldr p6, \[sp, #2, mul vl\] -+** ldr p7, \[sp, #3, mul vl\] -+** ldr p8, \[sp, #4, mul vl\] -+** ldr p9, \[sp, #5, mul vl\] -+** ldr p10, \[sp, #6, mul vl\] -+** ldr p11, \[sp, #7, mul vl\] -+** addvl sp, sp, #17 -+** ( -+** ldp x29, x30, \[sp\], 16 -+** addvl sp, sp, #1 -+** | -+** ldp x29, x30, \[sp\] -+** addvl sp, sp, #1 -+** add sp, sp, #?16 -+** ) -+** ret -+*/ -+void calls_standard (__SVInt8_t x) { __SVInt8_t tmp; standard_callee (&tmp); } -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_5_be.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_5_be.c -new file mode 100644 -index 000000000..dc3282eee ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_5_be.c -@@ -0,0 +1,78 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+void standard_callee (void); -+ -+/* -+** calls_standard: -+** stp x29, x30, \[sp, -16\]! -+** mov x29, sp -+** addvl sp, sp, #-17 -+** ptrue p0\.b, all -+** st1d z8\.d, p0, \[sp, #1, mul vl\] -+** st1d z9\.d, p0, \[sp, #2, mul vl\] -+** st1d z10\.d, p0, \[sp, #3, mul vl\] -+** st1d z11\.d, p0, \[sp, #4, mul vl\] -+** st1d z12\.d, p0, \[sp, #5, mul vl\] -+** st1d z13\.d, p0, \[sp, #6, mul vl\] -+** st1d z14\.d, p0, \[sp, #7, mul vl\] -+** addvl x11, sp, #16 -+** st1d z15\.d, p0, \[x11, #-8, mul vl\] -+** cbnz w0, \.L[0-9]+ -+** ptrue p0\.b, all -+** ld1d z8\.d, p0/z, \[sp, #1, mul vl\] -+** ld1d z9\.d, p0/z, \[sp, #2, mul vl\] -+** ld1d z10\.d, p0/z, \[sp, #3, mul vl\] -+** ld1d z11\.d, p0/z, \[sp, #4, mul vl\] -+** ld1d z12\.d, p0/z, \[sp, #5, mul vl\] -+** ld1d z13\.d, p0/z, \[sp, #6, mul vl\] -+** ld1d z14\.d, p0/z, \[sp, #7, mul vl\] -+** addvl x11, sp, #16 -+** ld1d z15\.d, p0/z, \[x11, #-8, mul vl\] -+** addvl sp, sp, #17 -+** ldp x29, x30, \[sp\], 16 -+** ret -+** ... -+** str z16, \[sp, #9, mul vl\] -+** str z17, \[sp, #10, mul vl\] -+** str z18, \[sp, #11, mul vl\] -+** str z19, \[sp, #12, mul vl\] -+** str z20, \[sp, #13, mul vl\] -+** str z21, \[sp, #14, mul vl\] -+** str z22, \[sp, #15, mul vl\] -+** str z23, \[sp, #16, mul vl\] -+** str p4, \[sp\] -+** str p5, \[sp, #1, mul vl\] -+** str p6, \[sp, #2, mul vl\] -+** str p7, \[sp, #3, mul vl\] -+** str p8, \[sp, #4, mul vl\] -+** str p9, \[sp, #5, mul vl\] -+** str p10, \[sp, #6, mul vl\] -+** str p11, \[sp, #7, mul vl\] -+** bl standard_callee -+** ldr z16, \[sp, #9, mul vl\] -+** ldr z17, \[sp, #10, mul vl\] -+** ldr z18, \[sp, #11, mul vl\] -+** ldr z19, \[sp, #12, mul vl\] -+** ldr z20, \[sp, #13, mul vl\] -+** ldr z21, \[sp, #14, mul vl\] -+** ldr z22, \[sp, #15, mul vl\] -+** ldr z23, \[sp, #16, mul vl\] -+** ldr p4, \[sp\] -+** ldr p5, \[sp, #1, mul vl\] -+** ldr p6, \[sp, #2, mul vl\] -+** ldr p7, \[sp, #3, mul vl\] -+** ldr p8, \[sp, #4, mul vl\] -+** ldr p9, \[sp, #5, mul vl\] -+** ldr p10, \[sp, #6, mul vl\] -+** ldr p11, \[sp, #7, mul vl\] -+** b \.L[0-9]+ -+*/ -+void -+calls_standard (__SVInt8_t x, int y) -+{ -+ asm volatile ("" ::: "z8"); -+ if (__builtin_expect (y, 0)) -+ standard_callee (); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_5_le.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_5_le.c -new file mode 100644 -index 000000000..0d29ff2fd ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/saves_5_le.c -@@ -0,0 +1,74 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -mlittle-endian -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+void standard_callee (void); -+ -+/* -+** calls_standard: -+** stp x29, x30, \[sp, -16\]! -+** mov x29, sp -+** addvl sp, sp, #-17 -+** str z8, \[sp, #1, mul vl\] -+** cbnz w0, \.L[0-9]+ -+** ldr z8, \[sp, #1, mul vl\] -+** addvl sp, sp, #17 -+** ldp x29, x30, \[sp\], 16 -+** ret -+** ... -+** str z9, \[sp, #2, mul vl\] -+** str z10, \[sp, #3, mul vl\] -+** str z11, \[sp, #4, mul vl\] -+** str z12, \[sp, #5, mul vl\] -+** str z13, \[sp, #6, mul vl\] -+** str z14, \[sp, #7, mul vl\] -+** str z15, \[sp, #8, mul vl\] -+** str z16, \[sp, #9, mul vl\] -+** str z17, \[sp, #10, mul vl\] -+** str z18, \[sp, #11, mul vl\] -+** str z19, \[sp, #12, mul vl\] -+** str z20, \[sp, #13, mul vl\] -+** str z21, \[sp, #14, mul vl\] -+** str z22, \[sp, #15, mul vl\] -+** str z23, \[sp, #16, mul vl\] -+** str p4, \[sp\] -+** str p5, \[sp, #1, mul vl\] -+** str p6, \[sp, #2, mul vl\] -+** str p7, \[sp, #3, mul vl\] -+** str p8, \[sp, #4, mul vl\] -+** str p9, \[sp, #5, mul vl\] -+** str p10, \[sp, #6, mul vl\] -+** str p11, \[sp, #7, mul vl\] -+** bl standard_callee -+** ldr z9, \[sp, #2, mul vl\] -+** ldr z10, \[sp, #3, mul vl\] -+** ldr z11, \[sp, #4, mul vl\] -+** ldr z12, \[sp, #5, mul vl\] -+** ldr z13, \[sp, #6, mul vl\] -+** ldr z14, \[sp, #7, mul vl\] -+** ldr z15, \[sp, #8, mul vl\] -+** ldr z16, \[sp, #9, mul vl\] -+** ldr z17, \[sp, #10, mul vl\] -+** ldr z18, \[sp, #11, mul vl\] -+** ldr z19, \[sp, #12, mul vl\] -+** ldr z20, \[sp, #13, mul vl\] -+** ldr z21, \[sp, #14, mul vl\] -+** ldr z22, \[sp, #15, mul vl\] -+** ldr z23, \[sp, #16, mul vl\] -+** ldr p4, \[sp\] -+** ldr p5, \[sp, #1, mul vl\] -+** ldr p6, \[sp, #2, mul vl\] -+** ldr p7, \[sp, #3, mul vl\] -+** ldr p8, \[sp, #4, mul vl\] -+** ldr p9, \[sp, #5, mul vl\] -+** ldr p10, \[sp, #6, mul vl\] -+** ldr p11, \[sp, #7, mul vl\] -+** b \.L[0-9]+ -+*/ -+void -+calls_standard (__SVInt8_t x, int y) -+{ -+ asm volatile ("" ::: "z8"); -+ if (__builtin_expect (y, 0)) -+ standard_callee (); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1.c -new file mode 100644 -index 000000000..485d01875 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1.c -@@ -0,0 +1,204 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -mlittle-endian -fshrink-wrap -fstack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** test_1: -+** cntb x12 -+** mov x13, #?17 -+** mul x12, x12, x13 -+** mov x11, sp -+** ... -+** sub sp, sp, x12 -+** str p4, \[sp\] -+** str p5, \[sp, #1, mul vl\] -+** str p6, \[sp, #2, mul vl\] -+** str p7, \[sp, #3, mul vl\] -+** str p8, \[sp, #4, mul vl\] -+** str p9, \[sp, #5, mul vl\] -+** str p10, \[sp, #6, mul vl\] -+** str p11, \[sp, #7, mul vl\] -+** str z8, \[sp, #1, mul vl\] -+** str z9, \[sp, #2, mul vl\] -+** str z10, \[sp, #3, mul vl\] -+** str z11, \[sp, #4, mul vl\] -+** str z12, \[sp, #5, mul vl\] -+** str z13, \[sp, #6, mul vl\] -+** str z14, \[sp, #7, mul vl\] -+** str z15, \[sp, #8, mul vl\] -+** str z16, \[sp, #9, mul vl\] -+** str z17, \[sp, #10, mul vl\] -+** str z18, \[sp, #11, mul vl\] -+** str z19, \[sp, #12, mul vl\] -+** str z20, \[sp, #13, mul vl\] -+** str z21, \[sp, #14, mul vl\] -+** str z22, \[sp, #15, mul vl\] -+** str z23, \[sp, #16, mul vl\] -+** ptrue p0\.b, all -+** ldr z8, \[sp, #1, mul vl\] -+** ldr z9, \[sp, #2, mul vl\] -+** ldr z10, \[sp, #3, mul vl\] -+** ldr z11, \[sp, #4, mul vl\] -+** ldr z12, \[sp, #5, mul vl\] -+** ldr z13, \[sp, #6, mul vl\] -+** ldr z14, \[sp, #7, mul vl\] -+** ldr z15, \[sp, #8, mul vl\] -+** ldr z16, \[sp, #9, mul vl\] -+** ldr z17, \[sp, #10, mul vl\] -+** ldr z18, \[sp, #11, mul vl\] -+** ldr z19, \[sp, #12, mul vl\] -+** ldr z20, \[sp, #13, mul vl\] -+** ldr z21, \[sp, #14, mul vl\] -+** ldr z22, \[sp, #15, mul vl\] -+** ldr z23, \[sp, #16, mul vl\] -+** ldr p4, \[sp\] -+** ldr p5, \[sp, #1, mul vl\] -+** ldr p6, \[sp, #2, mul vl\] -+** ldr p7, \[sp, #3, mul vl\] -+** ldr p8, \[sp, #4, mul vl\] -+** ldr p9, \[sp, #5, mul vl\] -+** ldr p10, \[sp, #6, mul vl\] -+** ldr p11, \[sp, #7, mul vl\] -+** addvl sp, sp, #17 -+** ret -+*/ -+svbool_t -+test_1 (void) -+{ -+ asm volatile ("" ::: -+ "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", -+ "z8", "z9", "z10", "z11", "z12", "z13", "z14", "z15", -+ "z16", "z17", "z18", "z19", "z20", "z21", "z22", "z23", -+ "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31", -+ "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", -+ "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_2: -+** ptrue p0\.b, all -+** ret -+*/ -+svbool_t -+test_2 (void) -+{ -+ asm volatile ("" ::: -+ "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", -+ "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31", -+ "p0", "p1", "p2", "p3", "p12", "p13", "p14", "p15"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_3: -+** cntb x12, all, mul #6 -+** mov x11, sp -+** ... -+** sub sp, sp, x12 -+** str p5, \[sp\] -+** str p6, \[sp, #1, mul vl\] -+** str p11, \[sp, #2, mul vl\] -+** str z8, \[sp, #1, mul vl\] -+** str z13, \[sp, #2, mul vl\] -+** str z19, \[sp, #3, mul vl\] -+** str z20, \[sp, #4, mul vl\] -+** str z22, \[sp, #5, mul vl\] -+** ptrue p0\.b, all -+** ldr z8, \[sp, #1, mul vl\] -+** ldr z13, \[sp, #2, mul vl\] -+** ldr z19, \[sp, #3, mul vl\] -+** ldr z20, \[sp, #4, mul vl\] -+** ldr z22, \[sp, #5, mul vl\] -+** ldr p5, \[sp\] -+** ldr p6, \[sp, #1, mul vl\] -+** ldr p11, \[sp, #2, mul vl\] -+** addvl sp, sp, #6 -+** ret -+*/ -+svbool_t -+test_3 (void) -+{ -+ asm volatile ("" ::: -+ "z8", "z13", "z19", "z20", "z22", -+ "p5", "p6", "p11"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_4: -+** cntb x12 -+** mov x11, sp -+** ... -+** sub sp, sp, x12 -+** str p4, \[sp\] -+** ptrue p0\.b, all -+** ldr p4, \[sp\] -+** addvl sp, sp, #1 -+** ret -+*/ -+svbool_t -+test_4 (void) -+{ -+ asm volatile ("" ::: "p4"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_5: -+** cntb x12 -+** mov x11, sp -+** ... -+** sub sp, sp, x12 -+** str z15, \[sp\] -+** ptrue p0\.b, all -+** ldr z15, \[sp\] -+** addvl sp, sp, #1 -+** ret -+*/ -+svbool_t -+test_5 (void) -+{ -+ asm volatile ("" ::: "z15"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_6: -+** cntb x12 -+** mov x11, sp -+** ... -+** sub sp, sp, x12 -+** str z15, \[sp\] -+** mov z0\.b, #1 -+** ldr z15, \[sp\] -+** addvl sp, sp, #1 -+** ret -+*/ -+svint8_t -+test_6 (svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) -+{ -+ asm volatile ("" :: "Upa" (p0), "Upa" (p1), "Upa" (p2), "Upa" (p3) : "z15"); -+ return svdup_s8 (1); -+} -+ -+/* -+** test_7: -+** cntb x12 -+** mov x11, sp -+** ... -+** sub sp, sp, x12 -+** str z16, \[sp\] -+** ptrue p0\.b, all -+** ldr z16, \[sp\] -+** addvl sp, sp, #1 -+** ret -+*/ -+svbool_t -+test_7 (void) -+{ -+ asm volatile ("" ::: "z16"); -+ return svptrue_b8 (); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1_1024.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1_1024.c -new file mode 100644 -index 000000000..087e8db9e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1_1024.c -@@ -0,0 +1,184 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -mlittle-endian -fshrink-wrap -fstack-clash-protection -msve-vector-bits=1024 -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** test_1: -+** sub sp, sp, #2176 -+** str p4, \[sp\] -+** str p5, \[sp, #1, mul vl\] -+** str p6, \[sp, #2, mul vl\] -+** str p7, \[sp, #3, mul vl\] -+** str p8, \[sp, #4, mul vl\] -+** str p9, \[sp, #5, mul vl\] -+** str p10, \[sp, #6, mul vl\] -+** str p11, \[sp, #7, mul vl\] -+** str z8, \[sp, #1, mul vl\] -+** str z9, \[sp, #2, mul vl\] -+** str z10, \[sp, #3, mul vl\] -+** str z11, \[sp, #4, mul vl\] -+** str z12, \[sp, #5, mul vl\] -+** str z13, \[sp, #6, mul vl\] -+** str z14, \[sp, #7, mul vl\] -+** str z15, \[sp, #8, mul vl\] -+** str z16, \[sp, #9, mul vl\] -+** str z17, \[sp, #10, mul vl\] -+** str z18, \[sp, #11, mul vl\] -+** str z19, \[sp, #12, mul vl\] -+** str z20, \[sp, #13, mul vl\] -+** str z21, \[sp, #14, mul vl\] -+** str z22, \[sp, #15, mul vl\] -+** str z23, \[sp, #16, mul vl\] -+** ptrue p0\.b, vl128 -+** ldr z8, \[sp, #1, mul vl\] -+** ldr z9, \[sp, #2, mul vl\] -+** ldr z10, \[sp, #3, mul vl\] -+** ldr z11, \[sp, #4, mul vl\] -+** ldr z12, \[sp, #5, mul vl\] -+** ldr z13, \[sp, #6, mul vl\] -+** ldr z14, \[sp, #7, mul vl\] -+** ldr z15, \[sp, #8, mul vl\] -+** ldr z16, \[sp, #9, mul vl\] -+** ldr z17, \[sp, #10, mul vl\] -+** ldr z18, \[sp, #11, mul vl\] -+** ldr z19, \[sp, #12, mul vl\] -+** ldr z20, \[sp, #13, mul vl\] -+** ldr z21, \[sp, #14, mul vl\] -+** ldr z22, \[sp, #15, mul vl\] -+** ldr z23, \[sp, #16, mul vl\] -+** ldr p4, \[sp\] -+** ldr p5, \[sp, #1, mul vl\] -+** ldr p6, \[sp, #2, mul vl\] -+** ldr p7, \[sp, #3, mul vl\] -+** ldr p8, \[sp, #4, mul vl\] -+** ldr p9, \[sp, #5, mul vl\] -+** ldr p10, \[sp, #6, mul vl\] -+** ldr p11, \[sp, #7, mul vl\] -+** add sp, sp, #?2176 -+** ret -+*/ -+svbool_t -+test_1 (void) -+{ -+ asm volatile ("" ::: -+ "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", -+ "z8", "z9", "z10", "z11", "z12", "z13", "z14", "z15", -+ "z16", "z17", "z18", "z19", "z20", "z21", "z22", "z23", -+ "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31", -+ "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", -+ "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_2: -+** ptrue p0\.b, vl128 -+** ret -+*/ -+svbool_t -+test_2 (void) -+{ -+ asm volatile ("" ::: -+ "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", -+ "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31", -+ "p0", "p1", "p2", "p3", "p12", "p13", "p14", "p15"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_3: -+** sub sp, sp, #768 -+** str p5, \[sp\] -+** str p6, \[sp, #1, mul vl\] -+** str p11, \[sp, #2, mul vl\] -+** str z8, \[sp, #1, mul vl\] -+** str z13, \[sp, #2, mul vl\] -+** str z19, \[sp, #3, mul vl\] -+** str z20, \[sp, #4, mul vl\] -+** str z22, \[sp, #5, mul vl\] -+** ptrue p0\.b, vl128 -+** ldr z8, \[sp, #1, mul vl\] -+** ldr z13, \[sp, #2, mul vl\] -+** ldr z19, \[sp, #3, mul vl\] -+** ldr z20, \[sp, #4, mul vl\] -+** ldr z22, \[sp, #5, mul vl\] -+** ldr p5, \[sp\] -+** ldr p6, \[sp, #1, mul vl\] -+** ldr p11, \[sp, #2, mul vl\] -+** add sp, sp, #?768 -+** ret -+*/ -+svbool_t -+test_3 (void) -+{ -+ asm volatile ("" ::: -+ "z8", "z13", "z19", "z20", "z22", -+ "p5", "p6", "p11"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_4: -+** sub sp, sp, #128 -+** str p4, \[sp\] -+** ptrue p0\.b, vl128 -+** ldr p4, \[sp\] -+** add sp, sp, #?128 -+** ret -+*/ -+svbool_t -+test_4 (void) -+{ -+ asm volatile ("" ::: "p4"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_5: -+** sub sp, sp, #128 -+** str z15, \[sp\] -+** ptrue p0\.b, vl128 -+** ldr z15, \[sp\] -+** add sp, sp, #?128 -+** ret -+*/ -+svbool_t -+test_5 (void) -+{ -+ asm volatile ("" ::: "z15"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_6: -+** sub sp, sp, #128 -+** str z15, \[sp\] -+** mov z0\.b, #1 -+** ldr z15, \[sp\] -+** add sp, sp, #?128 -+** ret -+*/ -+svint8_t -+test_6 (svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) -+{ -+ asm volatile ("" :: "Upa" (p0), "Upa" (p1), "Upa" (p2), "Upa" (p3) : "z15"); -+ return svdup_s8 (1); -+} -+ -+/* -+** test_7: -+** sub sp, sp, #128 -+** str z16, \[sp\] -+** ptrue p0\.b, vl128 -+** ldr z16, \[sp\] -+** add sp, sp, #?128 -+** ret -+*/ -+svbool_t -+test_7 (void) -+{ -+ asm volatile ("" ::: "z16"); -+ return svptrue_b8 (); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1_2048.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1_2048.c -new file mode 100644 -index 000000000..e8dc5d5e4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1_2048.c -@@ -0,0 +1,185 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -mlittle-endian -fshrink-wrap -fstack-clash-protection -msve-vector-bits=2048 -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** test_1: -+** mov x12, #?4352 -+** sub sp, sp, x12 -+** str p4, \[sp\] -+** str p5, \[sp, #1, mul vl\] -+** str p6, \[sp, #2, mul vl\] -+** str p7, \[sp, #3, mul vl\] -+** str p8, \[sp, #4, mul vl\] -+** str p9, \[sp, #5, mul vl\] -+** str p10, \[sp, #6, mul vl\] -+** str p11, \[sp, #7, mul vl\] -+** str z8, \[sp, #1, mul vl\] -+** str z9, \[sp, #2, mul vl\] -+** str z10, \[sp, #3, mul vl\] -+** str z11, \[sp, #4, mul vl\] -+** str z12, \[sp, #5, mul vl\] -+** str z13, \[sp, #6, mul vl\] -+** str z14, \[sp, #7, mul vl\] -+** str z15, \[sp, #8, mul vl\] -+** str z16, \[sp, #9, mul vl\] -+** str z17, \[sp, #10, mul vl\] -+** str z18, \[sp, #11, mul vl\] -+** str z19, \[sp, #12, mul vl\] -+** str z20, \[sp, #13, mul vl\] -+** str z21, \[sp, #14, mul vl\] -+** str z22, \[sp, #15, mul vl\] -+** str z23, \[sp, #16, mul vl\] -+** ptrue p0\.b, vl256 -+** ldr z8, \[sp, #1, mul vl\] -+** ldr z9, \[sp, #2, mul vl\] -+** ldr z10, \[sp, #3, mul vl\] -+** ldr z11, \[sp, #4, mul vl\] -+** ldr z12, \[sp, #5, mul vl\] -+** ldr z13, \[sp, #6, mul vl\] -+** ldr z14, \[sp, #7, mul vl\] -+** ldr z15, \[sp, #8, mul vl\] -+** ldr z16, \[sp, #9, mul vl\] -+** ldr z17, \[sp, #10, mul vl\] -+** ldr z18, \[sp, #11, mul vl\] -+** ldr z19, \[sp, #12, mul vl\] -+** ldr z20, \[sp, #13, mul vl\] -+** ldr z21, \[sp, #14, mul vl\] -+** ldr z22, \[sp, #15, mul vl\] -+** ldr z23, \[sp, #16, mul vl\] -+** ldr p4, \[sp\] -+** ldr p5, \[sp, #1, mul vl\] -+** ldr p6, \[sp, #2, mul vl\] -+** ldr p7, \[sp, #3, mul vl\] -+** ldr p8, \[sp, #4, mul vl\] -+** ldr p9, \[sp, #5, mul vl\] -+** ldr p10, \[sp, #6, mul vl\] -+** ldr p11, \[sp, #7, mul vl\] -+** add sp, sp, x12 -+** ret -+*/ -+svbool_t -+test_1 (void) -+{ -+ asm volatile ("" ::: -+ "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", -+ "z8", "z9", "z10", "z11", "z12", "z13", "z14", "z15", -+ "z16", "z17", "z18", "z19", "z20", "z21", "z22", "z23", -+ "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31", -+ "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", -+ "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_2: -+** ptrue p0\.b, vl256 -+** ret -+*/ -+svbool_t -+test_2 (void) -+{ -+ asm volatile ("" ::: -+ "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", -+ "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31", -+ "p0", "p1", "p2", "p3", "p12", "p13", "p14", "p15"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_3: -+** sub sp, sp, #1536 -+** str p5, \[sp\] -+** str p6, \[sp, #1, mul vl\] -+** str p11, \[sp, #2, mul vl\] -+** str z8, \[sp, #1, mul vl\] -+** str z13, \[sp, #2, mul vl\] -+** str z19, \[sp, #3, mul vl\] -+** str z20, \[sp, #4, mul vl\] -+** str z22, \[sp, #5, mul vl\] -+** ptrue p0\.b, vl256 -+** ldr z8, \[sp, #1, mul vl\] -+** ldr z13, \[sp, #2, mul vl\] -+** ldr z19, \[sp, #3, mul vl\] -+** ldr z20, \[sp, #4, mul vl\] -+** ldr z22, \[sp, #5, mul vl\] -+** ldr p5, \[sp\] -+** ldr p6, \[sp, #1, mul vl\] -+** ldr p11, \[sp, #2, mul vl\] -+** add sp, sp, #?1536 -+** ret -+*/ -+svbool_t -+test_3 (void) -+{ -+ asm volatile ("" ::: -+ "z8", "z13", "z19", "z20", "z22", -+ "p5", "p6", "p11"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_4: -+** sub sp, sp, #256 -+** str p4, \[sp\] -+** ptrue p0\.b, vl256 -+** ldr p4, \[sp\] -+** add sp, sp, #?256 -+** ret -+*/ -+svbool_t -+test_4 (void) -+{ -+ asm volatile ("" ::: "p4"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_5: -+** sub sp, sp, #256 -+** str z15, \[sp\] -+** ptrue p0\.b, vl256 -+** ldr z15, \[sp\] -+** add sp, sp, #?256 -+** ret -+*/ -+svbool_t -+test_5 (void) -+{ -+ asm volatile ("" ::: "z15"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_6: -+** sub sp, sp, #256 -+** str z15, \[sp\] -+** mov z0\.b, #1 -+** ldr z15, \[sp\] -+** add sp, sp, #?256 -+** ret -+*/ -+svint8_t -+test_6 (svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) -+{ -+ asm volatile ("" :: "Upa" (p0), "Upa" (p1), "Upa" (p2), "Upa" (p3) : "z15"); -+ return svdup_s8 (1); -+} -+ -+/* -+** test_7: -+** sub sp, sp, #256 -+** str z16, \[sp\] -+** ptrue p0\.b, vl256 -+** ldr z16, \[sp\] -+** add sp, sp, #?256 -+** ret -+*/ -+svbool_t -+test_7 (void) -+{ -+ asm volatile ("" ::: "z16"); -+ return svptrue_b8 (); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1_256.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1_256.c -new file mode 100644 -index 000000000..73c49e4d4 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1_256.c -@@ -0,0 +1,184 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -mlittle-endian -fshrink-wrap -fstack-clash-protection -msve-vector-bits=256 -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** test_1: -+** sub sp, sp, #544 -+** str p4, \[sp\] -+** str p5, \[sp, #1, mul vl\] -+** str p6, \[sp, #2, mul vl\] -+** str p7, \[sp, #3, mul vl\] -+** str p8, \[sp, #4, mul vl\] -+** str p9, \[sp, #5, mul vl\] -+** str p10, \[sp, #6, mul vl\] -+** str p11, \[sp, #7, mul vl\] -+** str z8, \[sp, #1, mul vl\] -+** str z9, \[sp, #2, mul vl\] -+** str z10, \[sp, #3, mul vl\] -+** str z11, \[sp, #4, mul vl\] -+** str z12, \[sp, #5, mul vl\] -+** str z13, \[sp, #6, mul vl\] -+** str z14, \[sp, #7, mul vl\] -+** str z15, \[sp, #8, mul vl\] -+** str z16, \[sp, #9, mul vl\] -+** str z17, \[sp, #10, mul vl\] -+** str z18, \[sp, #11, mul vl\] -+** str z19, \[sp, #12, mul vl\] -+** str z20, \[sp, #13, mul vl\] -+** str z21, \[sp, #14, mul vl\] -+** str z22, \[sp, #15, mul vl\] -+** str z23, \[sp, #16, mul vl\] -+** ptrue p0\.b, vl32 -+** ldr z8, \[sp, #1, mul vl\] -+** ldr z9, \[sp, #2, mul vl\] -+** ldr z10, \[sp, #3, mul vl\] -+** ldr z11, \[sp, #4, mul vl\] -+** ldr z12, \[sp, #5, mul vl\] -+** ldr z13, \[sp, #6, mul vl\] -+** ldr z14, \[sp, #7, mul vl\] -+** ldr z15, \[sp, #8, mul vl\] -+** ldr z16, \[sp, #9, mul vl\] -+** ldr z17, \[sp, #10, mul vl\] -+** ldr z18, \[sp, #11, mul vl\] -+** ldr z19, \[sp, #12, mul vl\] -+** ldr z20, \[sp, #13, mul vl\] -+** ldr z21, \[sp, #14, mul vl\] -+** ldr z22, \[sp, #15, mul vl\] -+** ldr z23, \[sp, #16, mul vl\] -+** ldr p4, \[sp\] -+** ldr p5, \[sp, #1, mul vl\] -+** ldr p6, \[sp, #2, mul vl\] -+** ldr p7, \[sp, #3, mul vl\] -+** ldr p8, \[sp, #4, mul vl\] -+** ldr p9, \[sp, #5, mul vl\] -+** ldr p10, \[sp, #6, mul vl\] -+** ldr p11, \[sp, #7, mul vl\] -+** add sp, sp, #?544 -+** ret -+*/ -+svbool_t -+test_1 (void) -+{ -+ asm volatile ("" ::: -+ "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", -+ "z8", "z9", "z10", "z11", "z12", "z13", "z14", "z15", -+ "z16", "z17", "z18", "z19", "z20", "z21", "z22", "z23", -+ "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31", -+ "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", -+ "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_2: -+** ptrue p0\.b, vl32 -+** ret -+*/ -+svbool_t -+test_2 (void) -+{ -+ asm volatile ("" ::: -+ "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", -+ "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31", -+ "p0", "p1", "p2", "p3", "p12", "p13", "p14", "p15"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_3: -+** sub sp, sp, #192 -+** str p5, \[sp\] -+** str p6, \[sp, #1, mul vl\] -+** str p11, \[sp, #2, mul vl\] -+** str z8, \[sp, #1, mul vl\] -+** str z13, \[sp, #2, mul vl\] -+** str z19, \[sp, #3, mul vl\] -+** str z20, \[sp, #4, mul vl\] -+** str z22, \[sp, #5, mul vl\] -+** ptrue p0\.b, vl32 -+** ldr z8, \[sp, #1, mul vl\] -+** ldr z13, \[sp, #2, mul vl\] -+** ldr z19, \[sp, #3, mul vl\] -+** ldr z20, \[sp, #4, mul vl\] -+** ldr z22, \[sp, #5, mul vl\] -+** ldr p5, \[sp\] -+** ldr p6, \[sp, #1, mul vl\] -+** ldr p11, \[sp, #2, mul vl\] -+** add sp, sp, #?192 -+** ret -+*/ -+svbool_t -+test_3 (void) -+{ -+ asm volatile ("" ::: -+ "z8", "z13", "z19", "z20", "z22", -+ "p5", "p6", "p11"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_4: -+** sub sp, sp, #32 -+** str p4, \[sp\] -+** ptrue p0\.b, vl32 -+** ldr p4, \[sp\] -+** add sp, sp, #?32 -+** ret -+*/ -+svbool_t -+test_4 (void) -+{ -+ asm volatile ("" ::: "p4"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_5: -+** sub sp, sp, #32 -+** str z15, \[sp\] -+** ptrue p0\.b, vl32 -+** ldr z15, \[sp\] -+** add sp, sp, #?32 -+** ret -+*/ -+svbool_t -+test_5 (void) -+{ -+ asm volatile ("" ::: "z15"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_6: -+** sub sp, sp, #32 -+** str z15, \[sp\] -+** mov z0\.b, #1 -+** ldr z15, \[sp\] -+** add sp, sp, #?32 -+** ret -+*/ -+svint8_t -+test_6 (svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) -+{ -+ asm volatile ("" :: "Upa" (p0), "Upa" (p1), "Upa" (p2), "Upa" (p3) : "z15"); -+ return svdup_s8 (1); -+} -+ -+/* -+** test_7: -+** sub sp, sp, #32 -+** str z16, \[sp\] -+** ptrue p0\.b, vl32 -+** ldr z16, \[sp\] -+** add sp, sp, #?32 -+** ret -+*/ -+svbool_t -+test_7 (void) -+{ -+ asm volatile ("" ::: "z16"); -+ return svptrue_b8 (); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1_512.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1_512.c -new file mode 100644 -index 000000000..d4b524147 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1_512.c -@@ -0,0 +1,184 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -mlittle-endian -fshrink-wrap -fstack-clash-protection -msve-vector-bits=512 -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** test_1: -+** sub sp, sp, #1088 -+** str p4, \[sp\] -+** str p5, \[sp, #1, mul vl\] -+** str p6, \[sp, #2, mul vl\] -+** str p7, \[sp, #3, mul vl\] -+** str p8, \[sp, #4, mul vl\] -+** str p9, \[sp, #5, mul vl\] -+** str p10, \[sp, #6, mul vl\] -+** str p11, \[sp, #7, mul vl\] -+** str z8, \[sp, #1, mul vl\] -+** str z9, \[sp, #2, mul vl\] -+** str z10, \[sp, #3, mul vl\] -+** str z11, \[sp, #4, mul vl\] -+** str z12, \[sp, #5, mul vl\] -+** str z13, \[sp, #6, mul vl\] -+** str z14, \[sp, #7, mul vl\] -+** str z15, \[sp, #8, mul vl\] -+** str z16, \[sp, #9, mul vl\] -+** str z17, \[sp, #10, mul vl\] -+** str z18, \[sp, #11, mul vl\] -+** str z19, \[sp, #12, mul vl\] -+** str z20, \[sp, #13, mul vl\] -+** str z21, \[sp, #14, mul vl\] -+** str z22, \[sp, #15, mul vl\] -+** str z23, \[sp, #16, mul vl\] -+** ptrue p0\.b, vl64 -+** ldr z8, \[sp, #1, mul vl\] -+** ldr z9, \[sp, #2, mul vl\] -+** ldr z10, \[sp, #3, mul vl\] -+** ldr z11, \[sp, #4, mul vl\] -+** ldr z12, \[sp, #5, mul vl\] -+** ldr z13, \[sp, #6, mul vl\] -+** ldr z14, \[sp, #7, mul vl\] -+** ldr z15, \[sp, #8, mul vl\] -+** ldr z16, \[sp, #9, mul vl\] -+** ldr z17, \[sp, #10, mul vl\] -+** ldr z18, \[sp, #11, mul vl\] -+** ldr z19, \[sp, #12, mul vl\] -+** ldr z20, \[sp, #13, mul vl\] -+** ldr z21, \[sp, #14, mul vl\] -+** ldr z22, \[sp, #15, mul vl\] -+** ldr z23, \[sp, #16, mul vl\] -+** ldr p4, \[sp\] -+** ldr p5, \[sp, #1, mul vl\] -+** ldr p6, \[sp, #2, mul vl\] -+** ldr p7, \[sp, #3, mul vl\] -+** ldr p8, \[sp, #4, mul vl\] -+** ldr p9, \[sp, #5, mul vl\] -+** ldr p10, \[sp, #6, mul vl\] -+** ldr p11, \[sp, #7, mul vl\] -+** add sp, sp, #?1088 -+** ret -+*/ -+svbool_t -+test_1 (void) -+{ -+ asm volatile ("" ::: -+ "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", -+ "z8", "z9", "z10", "z11", "z12", "z13", "z14", "z15", -+ "z16", "z17", "z18", "z19", "z20", "z21", "z22", "z23", -+ "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31", -+ "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", -+ "p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_2: -+** ptrue p0\.b, vl64 -+** ret -+*/ -+svbool_t -+test_2 (void) -+{ -+ asm volatile ("" ::: -+ "z0", "z1", "z2", "z3", "z4", "z5", "z6", "z7", -+ "z24", "z25", "z26", "z27", "z28", "z29", "z30", "z31", -+ "p0", "p1", "p2", "p3", "p12", "p13", "p14", "p15"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_3: -+** sub sp, sp, #384 -+** str p5, \[sp\] -+** str p6, \[sp, #1, mul vl\] -+** str p11, \[sp, #2, mul vl\] -+** str z8, \[sp, #1, mul vl\] -+** str z13, \[sp, #2, mul vl\] -+** str z19, \[sp, #3, mul vl\] -+** str z20, \[sp, #4, mul vl\] -+** str z22, \[sp, #5, mul vl\] -+** ptrue p0\.b, vl64 -+** ldr z8, \[sp, #1, mul vl\] -+** ldr z13, \[sp, #2, mul vl\] -+** ldr z19, \[sp, #3, mul vl\] -+** ldr z20, \[sp, #4, mul vl\] -+** ldr z22, \[sp, #5, mul vl\] -+** ldr p5, \[sp\] -+** ldr p6, \[sp, #1, mul vl\] -+** ldr p11, \[sp, #2, mul vl\] -+** add sp, sp, #?384 -+** ret -+*/ -+svbool_t -+test_3 (void) -+{ -+ asm volatile ("" ::: -+ "z8", "z13", "z19", "z20", "z22", -+ "p5", "p6", "p11"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_4: -+** sub sp, sp, #64 -+** str p4, \[sp\] -+** ptrue p0\.b, vl64 -+** ldr p4, \[sp\] -+** add sp, sp, #?64 -+** ret -+*/ -+svbool_t -+test_4 (void) -+{ -+ asm volatile ("" ::: "p4"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_5: -+** sub sp, sp, #64 -+** str z15, \[sp\] -+** ptrue p0\.b, vl64 -+** ldr z15, \[sp\] -+** add sp, sp, #?64 -+** ret -+*/ -+svbool_t -+test_5 (void) -+{ -+ asm volatile ("" ::: "z15"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_6: -+** sub sp, sp, #64 -+** str z15, \[sp\] -+** mov z0\.b, #1 -+** ldr z15, \[sp\] -+** add sp, sp, #?64 -+** ret -+*/ -+svint8_t -+test_6 (svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3) -+{ -+ asm volatile ("" :: "Upa" (p0), "Upa" (p1), "Upa" (p2), "Upa" (p3) : "z15"); -+ return svdup_s8 (1); -+} -+ -+/* -+** test_7: -+** sub sp, sp, #64 -+** str z16, \[sp\] -+** ptrue p0\.b, vl64 -+** ldr z16, \[sp\] -+** add sp, sp, #?64 -+** ret -+*/ -+svbool_t -+test_7 (void) -+{ -+ asm volatile ("" ::: "z16"); -+ return svptrue_b8 (); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2.c -new file mode 100644 -index 000000000..4622a1eed ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2.c -@@ -0,0 +1,336 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -fshrink-wrap -fstack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+svbool_t take_stack_args (volatile void *, void *, int, int, int, -+ int, int, int, int); -+ -+/* -+** test_1: -+** cntb x12 -+** add x12, x12, #?16 -+** mov x11, sp -+** ... -+** sub sp, sp, x12 -+** str p4, \[sp\] -+** ... -+** ptrue p0\.b, all -+** ldr p4, \[sp\] -+** addvl sp, sp, #1 -+** add sp, sp, #?16 -+** ret -+*/ -+svbool_t -+test_1 (void) -+{ -+ volatile int x = 1; -+ asm volatile ("" ::: "p4"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_2: -+** stp x24, x25, \[sp, -48\]! -+** str x26, \[sp, 16\] -+** cntb x13 -+** mov x11, sp -+** ... -+** sub sp, sp, x13 -+** str p4, \[sp\] -+** ... -+** ptrue p0\.b, all -+** ldr p4, \[sp\] -+** addvl sp, sp, #1 -+** ldr x26, \[sp, 16\] -+** ldp x24, x25, \[sp\], 48 -+** ret -+*/ -+svbool_t -+test_2 (void) -+{ -+ volatile int x = 1; -+ asm volatile ("" ::: "p4", "x24", "x25", "x26"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_3: -+** cntb x12 -+** mov x13, #?4128 -+** add x12, x12, x13 -+** mov x11, sp -+** ... -+** sub sp, sp, x12 -+** addvl x11, sp, #1 -+** stp x24, x25, \[x11\] -+** str x26, \[x11, 16\] -+** str p4, \[sp\] -+** ... -+** ptrue p0\.b, all -+** ldr p4, \[sp\] -+** addvl sp, sp, #1 -+** ldp x24, x25, \[sp\] -+** ldr x26, \[sp, 16\] -+** mov x12, #?4128 -+** add sp, sp, x12 -+** ret -+*/ -+svbool_t -+test_3 (void) -+{ -+ volatile int x[1024]; -+ asm volatile ("" :: "r" (x) : "p4", "x24", "x25", "x26"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_4: -+** cntb x12, all, mul #2 -+** mov x11, sp -+** ... -+** sub sp, sp, x12 -+** str p4, \[sp\] -+** ... -+** ptrue p0\.h, all -+** ldr p4, \[sp\] -+** addvl sp, sp, #2 -+** ret -+*/ -+svbool_t -+test_4 (void) -+{ -+ volatile svint32_t b; -+ b = svdup_s32 (1); -+ asm volatile ("" ::: "p4"); -+ return svptrue_b16 (); -+} -+ -+/* -+** test_5: -+** cntb x12, all, mul #2 -+** add x12, x12, #?32 -+** mov x11, sp -+** ... -+** sub sp, sp, x12 -+** addvl x11, sp, #1 -+** stp x24, x25, \[x11\] -+** str x26, \[x11, 16\] -+** str p4, \[sp\] -+** ... -+** ptrue p0\.h, all -+** ldr p4, \[sp\] -+** addvl sp, sp, #1 -+** ldp x24, x25, \[sp\] -+** ldr x26, \[sp, 16\] -+** addvl sp, sp, #1 -+** add sp, sp, #?32 -+** ret -+*/ -+svbool_t -+test_5 (void) -+{ -+ volatile svint32_t b; -+ b = svdup_s32 (1); -+ asm volatile ("" ::: "p4", "x24", "x25", "x26"); -+ return svptrue_b16 (); -+} -+ -+/* -+** test_6: -+** stp x29, x30, \[sp, -16\]! -+** mov x29, sp -+** cntb x13 -+** mov x11, sp -+** ... -+** sub sp, sp, x13 -+** str p4, \[sp\] -+** sub sp, sp, #?16 -+** ... -+** ptrue p0\.b, all -+** add sp, sp, #?16 -+** ldr p4, \[sp\] -+** addvl sp, sp, #1 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+svbool_t -+test_6 (void) -+{ -+ take_stack_args (0, 0, 1, 2, 3, 4, 5, 6, 7); -+ asm volatile ("" ::: "p4"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_7: -+** cntb x12 -+** mov x13, #?4112 -+** add x12, x12, x13 -+** mov x11, sp -+** ... -+** sub sp, sp, x12 -+** addvl x11, sp, #1 -+** stp x29, x30, \[x11\] -+** addvl x29, sp, #1 -+** str p4, \[sp\] -+** sub sp, sp, #?16 -+** ... -+** ptrue p0\.b, all -+** add sp, sp, #?16 -+** ldr p4, \[sp\] -+** addvl sp, sp, #1 -+** ldp x29, x30, \[sp\] -+** mov x12, #?4112 -+** add sp, sp, x12 -+** ret -+*/ -+svbool_t -+test_7 (void) -+{ -+ volatile int x[1024]; -+ take_stack_args (x, 0, 1, 2, 3, 4, 5, 6, 7); -+ asm volatile ("" ::: "p4"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_8: -+** cntb x12 -+** mov x13, #?4144 -+** add x12, x12, x13 -+** mov x11, sp -+** ... -+** sub sp, sp, x12 -+** addvl x11, sp, #1 -+** stp x29, x30, \[x11\] -+** addvl x29, sp, #1 -+** stp x24, x25, \[x29, 16\] -+** str x26, \[x29, 32\] -+** str p4, \[sp\] -+** sub sp, sp, #?16 -+** ... -+** ptrue p0\.b, all -+** add sp, sp, #?16 -+** ldr p4, \[sp\] -+** addvl sp, sp, #1 -+** ldp x24, x25, \[sp, 16\] -+** ldr x26, \[sp, 32\] -+** ldp x29, x30, \[sp\] -+** mov x12, #?4144 -+** add sp, sp, x12 -+** ret -+*/ -+svbool_t -+test_8 (void) -+{ -+ volatile int x[1024]; -+ take_stack_args (x, 0, 1, 2, 3, 4, 5, 6, 7); -+ asm volatile ("" ::: "p4", "x24", "x25", "x26"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_9: -+** cntb x12 -+** mov x13, #?4112 -+** add x12, x12, x13 -+** mov x11, sp -+** ... -+** sub sp, sp, x12 -+** addvl x11, sp, #1 -+** stp x29, x30, \[x11\] -+** addvl x29, sp, #1 -+** str p4, \[sp\] -+** sub sp, sp, #?16 -+** ... -+** ptrue p0\.b, all -+** addvl sp, x29, #-1 -+** ldr p4, \[sp\] -+** addvl sp, sp, #1 -+** ldp x29, x30, \[sp\] -+** mov x12, #?4112 -+** add sp, sp, x12 -+** ret -+*/ -+svbool_t -+test_9 (int n) -+{ -+ volatile int x[1024]; -+ take_stack_args (x, __builtin_alloca (n), 1, 2, 3, 4, 5, 6, 7); -+ asm volatile ("" ::: "p4"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_10: -+** cntb x12 -+** mov x13, #?4144 -+** add x12, x12, x13 -+** mov x11, sp -+** ... -+** sub sp, sp, x12 -+** addvl x11, sp, #1 -+** stp x29, x30, \[x11\] -+** addvl x29, sp, #1 -+** stp x24, x25, \[x29, 16\] -+** str x26, \[x29, 32\] -+** str p4, \[sp\] -+** sub sp, sp, #?16 -+** ... -+** ptrue p0\.b, all -+** addvl sp, x29, #-1 -+** ldr p4, \[sp\] -+** addvl sp, sp, #1 -+** ldp x24, x25, \[sp, 16\] -+** ldr x26, \[sp, 32\] -+** ldp x29, x30, \[sp\] -+** mov x12, #?4144 -+** add sp, sp, x12 -+** ret -+*/ -+svbool_t -+test_10 (int n) -+{ -+ volatile int x[1024]; -+ take_stack_args (x, __builtin_alloca (n), 1, 2, 3, 4, 5, 6, 7); -+ asm volatile ("" ::: "p4", "x24", "x25", "x26"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_11: -+** cntb x12 -+** add x12, x12, #?3008 -+** add x12, x12, #?126976 -+** mov x11, sp -+** ... -+** sub sp, sp, x12 -+** addvl x11, sp, #1 -+** stp x29, x30, \[x11\] -+** addvl x29, sp, #1 -+** stp x24, x25, \[x29, 16\] -+** str x26, \[x29, 32\] -+** str p4, \[sp\] -+** sub sp, sp, #?16 -+** ... -+** ptrue p0\.b, all -+** addvl sp, x29, #-1 -+** ldr p4, \[sp\] -+** addvl sp, sp, #1 -+** ldp x24, x25, \[sp, 16\] -+** ldr x26, \[sp, 32\] -+** ldp x29, x30, \[sp\] -+** add sp, sp, #?3008 -+** add sp, sp, #?126976 -+** ret -+*/ -+svbool_t -+test_11 (int n) -+{ -+ volatile int x[0x7ee4]; -+ take_stack_args (x, __builtin_alloca (n), 1, 2, 3, 4, 5, 6, 7); -+ asm volatile ("" ::: "p4", "x24", "x25", "x26"); -+ return svptrue_b8 (); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_1024.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_1024.c -new file mode 100644 -index 000000000..d5a9d4444 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_1024.c -@@ -0,0 +1,285 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -fshrink-wrap -fstack-clash-protection -msve-vector-bits=1024 -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+svbool_t take_stack_args (volatile void *, void *, int, int, int, -+ int, int, int, int); -+ -+/* -+** test_1: -+** sub sp, sp, #144 -+** str p4, \[sp\] -+** ... -+** ptrue p0\.b, vl128 -+** ldr p4, \[sp\] -+** add sp, sp, #?144 -+** ret -+*/ -+svbool_t -+test_1 (void) -+{ -+ volatile int x = 1; -+ asm volatile ("" ::: "p4"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_2: -+** sub sp, sp, #176 -+** stp x24, x25, \[sp, 128\] -+** str x26, \[sp, 144\] -+** str p4, \[sp\] -+** ... -+** ptrue p0\.b, vl128 -+** ldr p4, \[sp\] -+** ldp x24, x25, \[sp, 128\] -+** ldr x26, \[sp, 144\] -+** add sp, sp, #?176 -+** ret -+*/ -+svbool_t -+test_2 (void) -+{ -+ volatile int x = 1; -+ asm volatile ("" ::: "p4", "x24", "x25", "x26"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_3: -+** mov x12, #?4256 -+** sub sp, sp, x12 -+** stp x24, x25, \[sp, 128\] -+** str x26, \[sp, 144\] -+** str p4, \[sp\] -+** ... -+** ptrue p0\.b, vl128 -+** ldr p4, \[sp\] -+** ldp x24, x25, \[sp, 128\] -+** ldr x26, \[sp, 144\] -+** add sp, sp, x12 -+** ret -+*/ -+svbool_t -+test_3 (void) -+{ -+ volatile int x[1024]; -+ asm volatile ("" :: "r" (x) : "p4", "x24", "x25", "x26"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_4: -+** sub sp, sp, #256 -+** str p4, \[sp\] -+** ... -+** ptrue p0\.h, vl64 -+** ldr p4, \[sp\] -+** add sp, sp, #?256 -+** ret -+*/ -+svbool_t -+test_4 (void) -+{ -+ volatile svint32_t b; -+ b = svdup_s32 (1); -+ asm volatile ("" ::: "p4"); -+ return svptrue_b16 (); -+} -+ -+/* -+** test_5: -+** sub sp, sp, #288 -+** stp x24, x25, \[sp, 128\] -+** str x26, \[sp, 144\] -+** str p4, \[sp\] -+** ... -+** ptrue p0\.h, vl64 -+** ldr p4, \[sp\] -+** ldp x24, x25, \[sp, 128\] -+** ldr x26, \[sp, 144\] -+** add sp, sp, #?288 -+** ret -+*/ -+svbool_t -+test_5 (void) -+{ -+ volatile svint32_t b; -+ b = svdup_s32 (1); -+ asm volatile ("" ::: "p4", "x24", "x25", "x26"); -+ return svptrue_b16 (); -+} -+ -+/* -+** test_6: -+** stp x29, x30, \[sp, -16\]! -+** mov x29, sp -+** sub sp, sp, #128 -+** str p4, \[sp\] -+** ... -+** ptrue p0\.b, vl128 -+** add sp, sp, #?16 -+** ldr p4, \[sp\] -+** add sp, sp, #?128 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+svbool_t -+test_6 (void) -+{ -+ take_stack_args (0, 0, 1, 2, 3, 4, 5, 6, 7); -+ asm volatile ("" ::: "p4"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_7: -+** mov x12, #?4240 -+** sub sp, sp, x12 -+** stp x29, x30, \[sp, 128\] -+** add x29, sp, #?128 -+** str p4, \[sp\] -+** sub sp, sp, #16 -+** ... -+** ptrue p0\.b, vl128 -+** add sp, sp, #?16 -+** ldr p4, \[sp\] -+** add sp, sp, #?128 -+** ldp x29, x30, \[sp\] -+** mov x12, #?4112 -+** add sp, sp, x12 -+** ret -+*/ -+svbool_t -+test_7 (void) -+{ -+ volatile int x[1024]; -+ take_stack_args (x, 0, 1, 2, 3, 4, 5, 6, 7); -+ asm volatile ("" ::: "p4"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_8: -+** mov x12, #?4272 -+** sub sp, sp, x12 -+** stp x29, x30, \[sp, 128\] -+** add x29, sp, #?128 -+** stp x24, x25, \[sp, 144\] -+** str x26, \[sp, 160\] -+** str p4, \[sp\] -+** sub sp, sp, #16 -+** ... -+** ptrue p0\.b, vl128 -+** add sp, sp, #?16 -+** ldr p4, \[sp\] -+** add sp, sp, #?128 -+** ldp x24, x25, \[sp, 16\] -+** ldr x26, \[sp, 32\] -+** ldp x29, x30, \[sp\] -+** mov x12, #?4144 -+** add sp, sp, x12 -+** ret -+*/ -+svbool_t -+test_8 (void) -+{ -+ volatile int x[1024]; -+ take_stack_args (x, 0, 1, 2, 3, 4, 5, 6, 7); -+ asm volatile ("" ::: "p4", "x24", "x25", "x26"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_9: -+** mov x12, #?4240 -+** sub sp, sp, x12 -+** stp x29, x30, \[sp, 128\] -+** add x29, sp, #?128 -+** str p4, \[sp\] -+** sub sp, sp, #16 -+** ... -+** ptrue p0\.b, vl128 -+** sub sp, x29, #128 -+** ldr p4, \[sp\] -+** add sp, sp, #?128 -+** ldp x29, x30, \[sp\] -+** mov x12, #?4112 -+** add sp, sp, x12 -+** ret -+*/ -+svbool_t -+test_9 (int n) -+{ -+ volatile int x[1024]; -+ take_stack_args (x, __builtin_alloca (n), 1, 2, 3, 4, 5, 6, 7); -+ asm volatile ("" ::: "p4"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_10: -+** mov x12, #?4272 -+** sub sp, sp, x12 -+** stp x29, x30, \[sp, 128\] -+** add x29, sp, #?128 -+** stp x24, x25, \[sp, 144\] -+** str x26, \[sp, 160\] -+** str p4, \[sp\] -+** sub sp, sp, #16 -+** ... -+** ptrue p0\.b, vl128 -+** sub sp, x29, #128 -+** ldr p4, \[sp\] -+** add sp, sp, #?128 -+** ldp x24, x25, \[sp, 16\] -+** ldr x26, \[sp, 32\] -+** ldp x29, x30, \[sp\] -+** mov x12, #?4144 -+** add sp, sp, x12 -+** ret -+*/ -+svbool_t -+test_10 (int n) -+{ -+ volatile int x[1024]; -+ take_stack_args (x, __builtin_alloca (n), 1, 2, 3, 4, 5, 6, 7); -+ asm volatile ("" ::: "p4", "x24", "x25", "x26"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_11: -+** sub sp, sp, #65536 -+** str xzr, \[sp, 1024\] -+** mov x12, #?64576 -+** sub sp, sp, x12 -+** str xzr, \[sp, 1024\] -+** stp x29, x30, \[sp, 128\] -+** add x29, sp, #?128 -+** stp x24, x25, \[sp, 144\] -+** str x26, \[sp, 160\] -+** str p4, \[sp\] -+** sub sp, sp, #16 -+** ... -+** ptrue p0\.b, vl128 -+** sub sp, x29, #128 -+** ldr p4, \[sp\] -+** add sp, sp, #?128 -+** ldp x24, x25, \[sp, 16\] -+** ldr x26, \[sp, 32\] -+** ldp x29, x30, \[sp\] -+** add sp, sp, #?3008 -+** add sp, sp, #?126976 -+** ret -+*/ -+svbool_t -+test_11 (int n) -+{ -+ volatile int x[0x7ee4]; -+ take_stack_args (x, __builtin_alloca (n), 1, 2, 3, 4, 5, 6, 7); -+ asm volatile ("" ::: "p4", "x24", "x25", "x26"); -+ return svptrue_b8 (); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_2048.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_2048.c -new file mode 100644 -index 000000000..c185e2e36 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_2048.c -@@ -0,0 +1,285 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -fshrink-wrap -fstack-clash-protection -msve-vector-bits=2048 -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+svbool_t take_stack_args (volatile void *, void *, int, int, int, -+ int, int, int, int); -+ -+/* -+** test_1: -+** sub sp, sp, #272 -+** str p4, \[sp\] -+** ... -+** ptrue p0\.b, vl256 -+** ldr p4, \[sp\] -+** add sp, sp, #?272 -+** ret -+*/ -+svbool_t -+test_1 (void) -+{ -+ volatile int x = 1; -+ asm volatile ("" ::: "p4"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_2: -+** sub sp, sp, #304 -+** stp x24, x25, \[sp, 256\] -+** str x26, \[sp, 272\] -+** str p4, \[sp\] -+** ... -+** ptrue p0\.b, vl256 -+** ldr p4, \[sp\] -+** ldp x24, x25, \[sp, 256\] -+** ldr x26, \[sp, 272\] -+** add sp, sp, #?304 -+** ret -+*/ -+svbool_t -+test_2 (void) -+{ -+ volatile int x = 1; -+ asm volatile ("" ::: "p4", "x24", "x25", "x26"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_3: -+** mov x12, #?4384 -+** sub sp, sp, x12 -+** stp x24, x25, \[sp, 256\] -+** str x26, \[sp, 272\] -+** str p4, \[sp\] -+** ... -+** ptrue p0\.b, vl256 -+** ldr p4, \[sp\] -+** ldp x24, x25, \[sp, 256\] -+** ldr x26, \[sp, 272\] -+** add sp, sp, x12 -+** ret -+*/ -+svbool_t -+test_3 (void) -+{ -+ volatile int x[1024]; -+ asm volatile ("" :: "r" (x) : "p4", "x24", "x25", "x26"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_4: -+** sub sp, sp, #512 -+** str p4, \[sp\] -+** ... -+** ptrue p0\.h, vl128 -+** ldr p4, \[sp\] -+** add sp, sp, #?512 -+** ret -+*/ -+svbool_t -+test_4 (void) -+{ -+ volatile svint32_t b; -+ b = svdup_s32 (1); -+ asm volatile ("" ::: "p4"); -+ return svptrue_b16 (); -+} -+ -+/* -+** test_5: -+** sub sp, sp, #544 -+** stp x24, x25, \[sp, 256\] -+** str x26, \[sp, 272\] -+** str p4, \[sp\] -+** ... -+** ptrue p0\.h, vl128 -+** ldr p4, \[sp\] -+** ldp x24, x25, \[sp, 256\] -+** ldr x26, \[sp, 272\] -+** add sp, sp, #?544 -+** ret -+*/ -+svbool_t -+test_5 (void) -+{ -+ volatile svint32_t b; -+ b = svdup_s32 (1); -+ asm volatile ("" ::: "p4", "x24", "x25", "x26"); -+ return svptrue_b16 (); -+} -+ -+/* -+** test_6: -+** stp x29, x30, \[sp, -16\]! -+** mov x29, sp -+** sub sp, sp, #256 -+** str p4, \[sp\] -+** ... -+** ptrue p0\.b, vl256 -+** add sp, sp, #?16 -+** ldr p4, \[sp\] -+** add sp, sp, #?256 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+svbool_t -+test_6 (void) -+{ -+ take_stack_args (0, 0, 1, 2, 3, 4, 5, 6, 7); -+ asm volatile ("" ::: "p4"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_7: -+** mov x12, #?4368 -+** sub sp, sp, x12 -+** stp x29, x30, \[sp, 256\] -+** add x29, sp, #?256 -+** str p4, \[sp\] -+** sub sp, sp, #16 -+** ... -+** ptrue p0\.b, vl256 -+** add sp, sp, #?16 -+** ldr p4, \[sp\] -+** add sp, sp, #?256 -+** ldp x29, x30, \[sp\] -+** mov x12, #?4112 -+** add sp, sp, x12 -+** ret -+*/ -+svbool_t -+test_7 (void) -+{ -+ volatile int x[1024]; -+ take_stack_args (x, 0, 1, 2, 3, 4, 5, 6, 7); -+ asm volatile ("" ::: "p4"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_8: -+** mov x12, #?4400 -+** sub sp, sp, x12 -+** stp x29, x30, \[sp, 256\] -+** add x29, sp, #?256 -+** stp x24, x25, \[sp, 272\] -+** str x26, \[sp, 288\] -+** str p4, \[sp\] -+** sub sp, sp, #16 -+** ... -+** ptrue p0\.b, vl256 -+** add sp, sp, #?16 -+** ldr p4, \[sp\] -+** add sp, sp, #?256 -+** ldp x24, x25, \[sp, 16\] -+** ldr x26, \[sp, 32\] -+** ldp x29, x30, \[sp\] -+** mov x12, #?4144 -+** add sp, sp, x12 -+** ret -+*/ -+svbool_t -+test_8 (void) -+{ -+ volatile int x[1024]; -+ take_stack_args (x, 0, 1, 2, 3, 4, 5, 6, 7); -+ asm volatile ("" ::: "p4", "x24", "x25", "x26"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_9: -+** mov x12, #?4368 -+** sub sp, sp, x12 -+** stp x29, x30, \[sp, 256\] -+** add x29, sp, #?256 -+** str p4, \[sp\] -+** sub sp, sp, #16 -+** ... -+** ptrue p0\.b, vl256 -+** sub sp, x29, #256 -+** ldr p4, \[sp\] -+** add sp, sp, #?256 -+** ldp x29, x30, \[sp\] -+** mov x12, #?4112 -+** add sp, sp, x12 -+** ret -+*/ -+svbool_t -+test_9 (int n) -+{ -+ volatile int x[1024]; -+ take_stack_args (x, __builtin_alloca (n), 1, 2, 3, 4, 5, 6, 7); -+ asm volatile ("" ::: "p4"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_10: -+** mov x12, #?4400 -+** sub sp, sp, x12 -+** stp x29, x30, \[sp, 256\] -+** add x29, sp, #?256 -+** stp x24, x25, \[sp, 272\] -+** str x26, \[sp, 288\] -+** str p4, \[sp\] -+** sub sp, sp, #16 -+** ... -+** ptrue p0\.b, vl256 -+** sub sp, x29, #256 -+** ldr p4, \[sp\] -+** add sp, sp, #?256 -+** ldp x24, x25, \[sp, 16\] -+** ldr x26, \[sp, 32\] -+** ldp x29, x30, \[sp\] -+** mov x12, #?4144 -+** add sp, sp, x12 -+** ret -+*/ -+svbool_t -+test_10 (int n) -+{ -+ volatile int x[1024]; -+ take_stack_args (x, __builtin_alloca (n), 1, 2, 3, 4, 5, 6, 7); -+ asm volatile ("" ::: "p4", "x24", "x25", "x26"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_11: -+** sub sp, sp, #65536 -+** str xzr, \[sp, 1024\] -+** mov x12, #?64704 -+** sub sp, sp, x12 -+** str xzr, \[sp, 1024\] -+** stp x29, x30, \[sp, 256\] -+** add x29, sp, #?256 -+** stp x24, x25, \[sp, 272\] -+** str x26, \[sp, 288\] -+** str p4, \[sp\] -+** sub sp, sp, #16 -+** ... -+** ptrue p0\.b, vl256 -+** sub sp, x29, #256 -+** ldr p4, \[sp\] -+** add sp, sp, #?256 -+** ldp x24, x25, \[sp, 16\] -+** ldr x26, \[sp, 32\] -+** ldp x29, x30, \[sp\] -+** add sp, sp, #?3008 -+** add sp, sp, #?126976 -+** ret -+*/ -+svbool_t -+test_11 (int n) -+{ -+ volatile int x[0x7ee4]; -+ take_stack_args (x, __builtin_alloca (n), 1, 2, 3, 4, 5, 6, 7); -+ asm volatile ("" ::: "p4", "x24", "x25", "x26"); -+ return svptrue_b8 (); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_256.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_256.c -new file mode 100644 -index 000000000..f8318b354 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_256.c -@@ -0,0 +1,284 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -fshrink-wrap -fstack-clash-protection -msve-vector-bits=256 -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+svbool_t take_stack_args (volatile void *, void *, int, int, int, -+ int, int, int, int); -+ -+/* -+** test_1: -+** sub sp, sp, #48 -+** str p4, \[sp\] -+** ... -+** ptrue p0\.b, vl32 -+** ldr p4, \[sp\] -+** add sp, sp, #?48 -+** ret -+*/ -+svbool_t -+test_1 (void) -+{ -+ volatile int x = 1; -+ asm volatile ("" ::: "p4"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_2: -+** sub sp, sp, #80 -+** stp x24, x25, \[sp, 32\] -+** str x26, \[sp, 48\] -+** str p4, \[sp\] -+** ... -+** ptrue p0\.b, vl32 -+** ldr p4, \[sp\] -+** ldp x24, x25, \[sp, 32\] -+** ldr x26, \[sp, 48\] -+** add sp, sp, #?80 -+** ret -+*/ -+svbool_t -+test_2 (void) -+{ -+ volatile int x = 1; -+ asm volatile ("" ::: "p4", "x24", "x25", "x26"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_3: -+** mov x12, #?4160 -+** sub sp, sp, x12 -+** stp x24, x25, \[sp, 32\] -+** str x26, \[sp, 48\] -+** str p4, \[sp\] -+** ... -+** ptrue p0\.b, vl32 -+** ldr p4, \[sp\] -+** ldp x24, x25, \[sp, 32\] -+** ldr x26, \[sp, 48\] -+** add sp, sp, x12 -+** ret -+*/ -+svbool_t -+test_3 (void) -+{ -+ volatile int x[1024]; -+ asm volatile ("" :: "r" (x) : "p4", "x24", "x25", "x26"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_4: -+** sub sp, sp, #64 -+** str p4, \[sp\] -+** ... -+** ptrue p0\.h, vl16 -+** ldr p4, \[sp\] -+** add sp, sp, #?64 -+** ret -+*/ -+svbool_t -+test_4 (void) -+{ -+ volatile svint32_t b; -+ b = svdup_s32 (1); -+ asm volatile ("" ::: "p4"); -+ return svptrue_b16 (); -+} -+ -+/* -+** test_5: -+** sub sp, sp, #96 -+** stp x24, x25, \[sp, 32\] -+** str x26, \[sp, 48\] -+** str p4, \[sp\] -+** ... -+** ptrue p0\.h, vl16 -+** ldr p4, \[sp\] -+** ldp x24, x25, \[sp, 32\] -+** ldr x26, \[sp, 48\] -+** add sp, sp, #?96 -+** ret -+*/ -+svbool_t -+test_5 (void) -+{ -+ volatile svint32_t b; -+ b = svdup_s32 (1); -+ asm volatile ("" ::: "p4", "x24", "x25", "x26"); -+ return svptrue_b16 (); -+} -+ -+/* -+** test_6: -+** stp x29, x30, \[sp, -16\]! -+** mov x29, sp -+** sub sp, sp, #32 -+** str p4, \[sp\] -+** ... -+** ptrue p0\.b, vl32 -+** add sp, sp, #?16 -+** ldr p4, \[sp\] -+** add sp, sp, #?32 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+svbool_t -+test_6 (void) -+{ -+ take_stack_args (0, 0, 1, 2, 3, 4, 5, 6, 7); -+ asm volatile ("" ::: "p4"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_7: -+** mov x12, #?4144 -+** sub sp, sp, x12 -+** stp x29, x30, \[sp, 32\] -+** add x29, sp, #?32 -+** str p4, \[sp\] -+** sub sp, sp, #16 -+** ... -+** ptrue p0\.b, vl32 -+** add sp, sp, #?16 -+** ldr p4, \[sp\] -+** add sp, sp, #?32 -+** ldp x29, x30, \[sp\] -+** mov x12, #?4112 -+** add sp, sp, x12 -+** ret -+*/ -+svbool_t -+test_7 (void) -+{ -+ volatile int x[1024]; -+ take_stack_args (x, 0, 1, 2, 3, 4, 5, 6, 7); -+ asm volatile ("" ::: "p4"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_8: -+** mov x12, #?4176 -+** sub sp, sp, x12 -+** stp x29, x30, \[sp, 32\] -+** add x29, sp, #?32 -+** stp x24, x25, \[sp, 48\] -+** str x26, \[sp, 64\] -+** str p4, \[sp\] -+** sub sp, sp, #16 -+** ... -+** ptrue p0\.b, vl32 -+** add sp, sp, #?16 -+** ldr p4, \[sp\] -+** add sp, sp, #?32 -+** ldp x24, x25, \[sp, 16\] -+** ldr x26, \[sp, 32\] -+** ldp x29, x30, \[sp\] -+** mov x12, #?4144 -+** add sp, sp, x12 -+** ret -+*/ -+svbool_t -+test_8 (void) -+{ -+ volatile int x[1024]; -+ take_stack_args (x, 0, 1, 2, 3, 4, 5, 6, 7); -+ asm volatile ("" ::: "p4", "x24", "x25", "x26"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_9: -+** mov x12, #?4144 -+** sub sp, sp, x12 -+** stp x29, x30, \[sp, 32\] -+** add x29, sp, #?32 -+** str p4, \[sp\] -+** sub sp, sp, #16 -+** ... -+** ptrue p0\.b, vl32 -+** sub sp, x29, #32 -+** ldr p4, \[sp\] -+** add sp, sp, #?32 -+** ldp x29, x30, \[sp\] -+** mov x12, #?4112 -+** add sp, sp, x12 -+** ret -+*/ -+svbool_t -+test_9 (int n) -+{ -+ volatile int x[1024]; -+ take_stack_args (x, __builtin_alloca (n), 1, 2, 3, 4, 5, 6, 7); -+ asm volatile ("" ::: "p4"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_10: -+** mov x12, #?4176 -+** sub sp, sp, x12 -+** stp x29, x30, \[sp, 32\] -+** add x29, sp, #?32 -+** stp x24, x25, \[sp, 48\] -+** str x26, \[sp, 64\] -+** str p4, \[sp\] -+** sub sp, sp, #16 -+** ... -+** ptrue p0\.b, vl32 -+** sub sp, x29, #32 -+** ldr p4, \[sp\] -+** add sp, sp, #?32 -+** ldp x24, x25, \[sp, 16\] -+** ldr x26, \[sp, 32\] -+** ldp x29, x30, \[sp\] -+** mov x12, #?4144 -+** add sp, sp, x12 -+** ret -+*/ -+svbool_t -+test_10 (int n) -+{ -+ volatile int x[1024]; -+ take_stack_args (x, __builtin_alloca (n), 1, 2, 3, 4, 5, 6, 7); -+ asm volatile ("" ::: "p4", "x24", "x25", "x26"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_11: -+** sub sp, sp, #65536 -+** str xzr, \[sp, 1024\] -+** mov x12, #?64480 -+** sub sp, sp, x12 -+** stp x29, x30, \[sp, 32\] -+** add x29, sp, #?32 -+** stp x24, x25, \[sp, 48\] -+** str x26, \[sp, 64\] -+** str p4, \[sp\] -+** sub sp, sp, #16 -+** ... -+** ptrue p0\.b, vl32 -+** sub sp, x29, #32 -+** ldr p4, \[sp\] -+** add sp, sp, #?32 -+** ldp x24, x25, \[sp, 16\] -+** ldr x26, \[sp, 32\] -+** ldp x29, x30, \[sp\] -+** add sp, sp, #?3008 -+** add sp, sp, #?126976 -+** ret -+*/ -+svbool_t -+test_11 (int n) -+{ -+ volatile int x[0x7ee4]; -+ take_stack_args (x, __builtin_alloca (n), 1, 2, 3, 4, 5, 6, 7); -+ asm volatile ("" ::: "p4", "x24", "x25", "x26"); -+ return svptrue_b8 (); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_512.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_512.c -new file mode 100644 -index 000000000..45a23ad49 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_512.c -@@ -0,0 +1,285 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -fshrink-wrap -fstack-clash-protection -msve-vector-bits=512 -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+svbool_t take_stack_args (volatile void *, void *, int, int, int, -+ int, int, int, int); -+ -+/* -+** test_1: -+** sub sp, sp, #80 -+** str p4, \[sp\] -+** ... -+** ptrue p0\.b, vl64 -+** ldr p4, \[sp\] -+** add sp, sp, #?80 -+** ret -+*/ -+svbool_t -+test_1 (void) -+{ -+ volatile int x = 1; -+ asm volatile ("" ::: "p4"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_2: -+** sub sp, sp, #112 -+** stp x24, x25, \[sp, 64\] -+** str x26, \[sp, 80\] -+** str p4, \[sp\] -+** ... -+** ptrue p0\.b, vl64 -+** ldr p4, \[sp\] -+** ldp x24, x25, \[sp, 64\] -+** ldr x26, \[sp, 80\] -+** add sp, sp, #?112 -+** ret -+*/ -+svbool_t -+test_2 (void) -+{ -+ volatile int x = 1; -+ asm volatile ("" ::: "p4", "x24", "x25", "x26"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_3: -+** mov x12, #?4192 -+** sub sp, sp, x12 -+** stp x24, x25, \[sp, 64\] -+** str x26, \[sp, 80\] -+** str p4, \[sp\] -+** ... -+** ptrue p0\.b, vl64 -+** ldr p4, \[sp\] -+** ldp x24, x25, \[sp, 64\] -+** ldr x26, \[sp, 80\] -+** add sp, sp, x12 -+** ret -+*/ -+svbool_t -+test_3 (void) -+{ -+ volatile int x[1024]; -+ asm volatile ("" :: "r" (x) : "p4", "x24", "x25", "x26"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_4: -+** sub sp, sp, #128 -+** str p4, \[sp\] -+** ... -+** ptrue p0\.h, vl32 -+** ldr p4, \[sp\] -+** add sp, sp, #?128 -+** ret -+*/ -+svbool_t -+test_4 (void) -+{ -+ volatile svint32_t b; -+ b = svdup_s32 (1); -+ asm volatile ("" ::: "p4"); -+ return svptrue_b16 (); -+} -+ -+/* -+** test_5: -+** sub sp, sp, #160 -+** stp x24, x25, \[sp, 64\] -+** str x26, \[sp, 80\] -+** str p4, \[sp\] -+** ... -+** ptrue p0\.h, vl32 -+** ldr p4, \[sp\] -+** ldp x24, x25, \[sp, 64\] -+** ldr x26, \[sp, 80\] -+** add sp, sp, #?160 -+** ret -+*/ -+svbool_t -+test_5 (void) -+{ -+ volatile svint32_t b; -+ b = svdup_s32 (1); -+ asm volatile ("" ::: "p4", "x24", "x25", "x26"); -+ return svptrue_b16 (); -+} -+ -+/* -+** test_6: -+** stp x29, x30, \[sp, -16\]! -+** mov x29, sp -+** sub sp, sp, #64 -+** str p4, \[sp\] -+** ... -+** ptrue p0\.b, vl64 -+** add sp, sp, #?16 -+** ldr p4, \[sp\] -+** add sp, sp, #?64 -+** ldp x29, x30, \[sp\], 16 -+** ret -+*/ -+svbool_t -+test_6 (void) -+{ -+ take_stack_args (0, 0, 1, 2, 3, 4, 5, 6, 7); -+ asm volatile ("" ::: "p4"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_7: -+** mov x12, #?4176 -+** sub sp, sp, x12 -+** stp x29, x30, \[sp, 64\] -+** add x29, sp, #?64 -+** str p4, \[sp\] -+** sub sp, sp, #16 -+** ... -+** ptrue p0\.b, vl64 -+** add sp, sp, #?16 -+** ldr p4, \[sp\] -+** add sp, sp, #?64 -+** ldp x29, x30, \[sp\] -+** mov x12, #?4112 -+** add sp, sp, x12 -+** ret -+*/ -+svbool_t -+test_7 (void) -+{ -+ volatile int x[1024]; -+ take_stack_args (x, 0, 1, 2, 3, 4, 5, 6, 7); -+ asm volatile ("" ::: "p4"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_8: -+** mov x12, #?4208 -+** sub sp, sp, x12 -+** stp x29, x30, \[sp, 64\] -+** add x29, sp, #?64 -+** stp x24, x25, \[sp, 80\] -+** str x26, \[sp, 96\] -+** str p4, \[sp\] -+** sub sp, sp, #16 -+** ... -+** ptrue p0\.b, vl64 -+** add sp, sp, #?16 -+** ldr p4, \[sp\] -+** add sp, sp, #?64 -+** ldp x24, x25, \[sp, 16\] -+** ldr x26, \[sp, 32\] -+** ldp x29, x30, \[sp\] -+** mov x12, #?4144 -+** add sp, sp, x12 -+** ret -+*/ -+svbool_t -+test_8 (void) -+{ -+ volatile int x[1024]; -+ take_stack_args (x, 0, 1, 2, 3, 4, 5, 6, 7); -+ asm volatile ("" ::: "p4", "x24", "x25", "x26"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_9: -+** mov x12, #?4176 -+** sub sp, sp, x12 -+** stp x29, x30, \[sp, 64\] -+** add x29, sp, #?64 -+** str p4, \[sp\] -+** sub sp, sp, #16 -+** ... -+** ptrue p0\.b, vl64 -+** sub sp, x29, #64 -+** ldr p4, \[sp\] -+** add sp, sp, #?64 -+** ldp x29, x30, \[sp\] -+** mov x12, #?4112 -+** add sp, sp, x12 -+** ret -+*/ -+svbool_t -+test_9 (int n) -+{ -+ volatile int x[1024]; -+ take_stack_args (x, __builtin_alloca (n), 1, 2, 3, 4, 5, 6, 7); -+ asm volatile ("" ::: "p4"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_10: -+** mov x12, #?4208 -+** sub sp, sp, x12 -+** stp x29, x30, \[sp, 64\] -+** add x29, sp, #?64 -+** stp x24, x25, \[sp, 80\] -+** str x26, \[sp, 96\] -+** str p4, \[sp\] -+** sub sp, sp, #16 -+** ... -+** ptrue p0\.b, vl64 -+** sub sp, x29, #64 -+** ldr p4, \[sp\] -+** add sp, sp, #?64 -+** ldp x24, x25, \[sp, 16\] -+** ldr x26, \[sp, 32\] -+** ldp x29, x30, \[sp\] -+** mov x12, #?4144 -+** add sp, sp, x12 -+** ret -+*/ -+svbool_t -+test_10 (int n) -+{ -+ volatile int x[1024]; -+ take_stack_args (x, __builtin_alloca (n), 1, 2, 3, 4, 5, 6, 7); -+ asm volatile ("" ::: "p4", "x24", "x25", "x26"); -+ return svptrue_b8 (); -+} -+ -+/* -+** test_11: -+** sub sp, sp, #65536 -+** str xzr, \[sp, 1024\] -+** mov x12, #?64512 -+** sub sp, sp, x12 -+** str xzr, \[sp, 1024\] -+** stp x29, x30, \[sp, 64\] -+** add x29, sp, #?64 -+** stp x24, x25, \[sp, 80\] -+** str x26, \[sp, 96\] -+** str p4, \[sp\] -+** sub sp, sp, #16 -+** ... -+** ptrue p0\.b, vl64 -+** sub sp, x29, #64 -+** ldr p4, \[sp\] -+** add sp, sp, #?64 -+** ldp x24, x25, \[sp, 16\] -+** ldr x26, \[sp, 32\] -+** ldp x29, x30, \[sp\] -+** add sp, sp, #?3008 -+** add sp, sp, #?126976 -+** ret -+*/ -+svbool_t -+test_11 (int n) -+{ -+ volatile int x[0x7ee4]; -+ take_stack_args (x, __builtin_alloca (n), 1, 2, 3, 4, 5, 6, 7); -+ asm volatile ("" ::: "p4", "x24", "x25", "x26"); -+ return svptrue_b8 (); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_3.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_3.c -new file mode 100644 -index 000000000..3e01ec36c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_3.c -@@ -0,0 +1,63 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -fshrink-wrap -fstack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+#pragma GCC aarch64 "arm_sve.h" -+ -+/* -+** test_1: -+** str x24, \[sp, -32\]! -+** cntb x13 -+** mov x11, sp -+** ... -+** sub sp, sp, x13 -+** str p4, \[sp\] -+** cbz w0, [^\n]* -+** ... -+** ptrue p0\.b, all -+** ldr p4, \[sp\] -+** addvl sp, sp, #1 -+** ldr x24, \[sp\], 32 -+** ret -+*/ -+svbool_t -+test_1 (int n) -+{ -+ asm volatile ("" ::: "x24"); -+ if (n) -+ { -+ volatile int x = 1; -+ asm volatile ("" ::: "p4"); -+ } -+ return svptrue_b8 (); -+} -+ -+/* -+** test_2: -+** str x24, \[sp, -32\]! -+** cntb x13 -+** mov x11, sp -+** ... -+** sub sp, sp, x13 -+** str p4, \[sp\] -+** cbz w0, [^\n]* -+** str p5, \[sp, #1, mul vl\] -+** str p6, \[sp, #2, mul vl\] -+** ... -+** ptrue p0\.b, all -+** ldr p4, \[sp\] -+** addvl sp, sp, #1 -+** ldr x24, \[sp\], 32 -+** ret -+*/ -+svbool_t -+test_2 (int n) -+{ -+ asm volatile ("" ::: "x24"); -+ if (n) -+ { -+ volatile int x = 1; -+ asm volatile ("" ::: "p4", "p5", "p6"); -+ } -+ return svptrue_b8 (); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/unprototyped_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/unprototyped_1.c -new file mode 100644 -index 000000000..5c7ed5167 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/unprototyped_1.c -@@ -0,0 +1,11 @@ -+/* { dg-do compile } */ -+ -+#include -+ -+void unprototyped (); -+ -+void -+f (svuint8_t *ptr) -+{ -+ unprototyped (*ptr); /* { dg-error {SVE type '(svuint8_t|__SVUint8_t)' cannot be passed to an unprototyped function} } */ -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_1.c -new file mode 100644 -index 000000000..6987245a6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_1.c -@@ -0,0 +1,170 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ -+ -+#include -+#include -+ -+/* -+** callee_0: -+** ... -+** ldr (p[0-7]), \[x1\] -+** ... -+** cntp x0, \1, \1\.b -+** ... -+** ret -+*/ -+uint64_t __attribute__((noipa)) -+callee_0 (int64_t *ptr, ...) -+{ -+ va_list va; -+ svbool_t pg; -+ -+ va_start (va, ptr); -+ pg = va_arg (va, svbool_t); -+ va_end (va); -+ return svcntp_b8 (pg, pg); -+} -+ -+/* -+** caller_0: -+** ... -+** ptrue (p[0-7])\.d, vl7 -+** ... -+** str \1, \[x1\] -+** ... -+** ret -+*/ -+uint64_t __attribute__((noipa)) -+caller_0 (int64_t *ptr) -+{ -+ return callee_0 (ptr, svptrue_pat_b64 (SV_VL7)); -+} -+ -+/* -+** callee_1: -+** ... -+** ldr (p[0-7]), \[x2\] -+** ... -+** cntp x0, \1, \1\.b -+** ... -+** ret -+*/ -+uint64_t __attribute__((noipa)) -+callee_1 (int64_t *ptr, ...) -+{ -+ va_list va; -+ svbool_t pg; -+ -+ va_start (va, ptr); -+ va_arg (va, int); -+ pg = va_arg (va, svbool_t); -+ va_end (va); -+ return svcntp_b8 (pg, pg); -+} -+ -+/* -+** caller_1: -+** ... -+** ptrue (p[0-7])\.d, vl7 -+** ... -+** str \1, \[x2\] -+** ... -+** ret -+*/ -+uint64_t __attribute__((noipa)) -+caller_1 (int64_t *ptr) -+{ -+ return callee_1 (ptr, 1, svptrue_pat_b64 (SV_VL7)); -+} -+ -+/* -+** callee_7: -+** ... -+** ldr (p[0-7]), \[x7\] -+** ... -+** cntp x0, \1, \1\.b -+** ... -+** ret -+*/ -+uint64_t __attribute__((noipa)) -+callee_7 (int64_t *ptr, ...) -+{ -+ va_list va; -+ svbool_t pg; -+ -+ va_start (va, ptr); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ pg = va_arg (va, svbool_t); -+ va_end (va); -+ return svcntp_b8 (pg, pg); -+} -+ -+/* -+** caller_7: -+** ... -+** ptrue (p[0-7])\.d, vl7 -+** ... -+** str \1, \[x7\] -+** ... -+** ret -+*/ -+uint64_t __attribute__((noipa)) -+caller_7 (int64_t *ptr) -+{ -+ return callee_7 (ptr, 1, 2, 3, 4, 5, 6, svptrue_pat_b64 (SV_VL7)); -+} -+ -+/* FIXME: We should be able to get rid of the va_list object. */ -+/* -+** callee_8: -+** sub sp, sp, #([0-9]+) -+** ... -+** ldr (x[0-9]+), \[sp, \1\] -+** ... -+** ldr (p[0-7]), \[\2\] -+** ... -+** cntp x0, \3, \3\.b -+** ... -+** ret -+*/ -+uint64_t __attribute__((noipa)) -+callee_8 (int64_t *ptr, ...) -+{ -+ va_list va; -+ svbool_t pg; -+ -+ va_start (va, ptr); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ pg = va_arg (va, svbool_t); -+ va_end (va); -+ return svcntp_b8 (pg, pg); -+} -+ -+/* -+** caller_8: -+** ... -+** ptrue (p[0-7])\.d, vl7 -+** ... -+** str \1, \[(x[0-9]+)\] -+** ... -+** str \2, \[sp\] -+** ... -+** ret -+*/ -+uint64_t __attribute__((noipa)) -+caller_8 (int64_t *ptr) -+{ -+ return callee_8 (ptr, 1, 2, 3, 4, 5, 6, 7, svptrue_pat_b64 (SV_VL7)); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f16.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f16.c -new file mode 100644 -index 000000000..79098851c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f16.c -@@ -0,0 +1,170 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ -+ -+#include -+#include -+ -+/* -+** callee_0: -+** ... -+** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x1\] -+** ... -+** st1h \1, \2, \[x0\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+callee_0 (int16_t *ptr, ...) -+{ -+ va_list va; -+ svint16_t vec; -+ -+ va_start (va, ptr); -+ vec = va_arg (va, svint16_t); -+ va_end (va); -+ svst1 (svptrue_b8 (), ptr, vec); -+} -+ -+/* -+** caller_0: -+** ... -+** fmov (z[0-9]+\.h), #9\.0[^\n]* -+** ... -+** st1h \1, p[0-7], \[x1\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+caller_0 (int16_t *ptr) -+{ -+ callee_0 (ptr, svdup_f16 (9)); -+} -+ -+/* -+** callee_1: -+** ... -+** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x2\] -+** ... -+** st1h \1, p[0-7], \[x0\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+callee_1 (int16_t *ptr, ...) -+{ -+ va_list va; -+ svint16_t vec; -+ -+ va_start (va, ptr); -+ va_arg (va, int); -+ vec = va_arg (va, svint16_t); -+ va_end (va); -+ svst1 (svptrue_b8 (), ptr, vec); -+} -+ -+/* -+** caller_1: -+** ... -+** fmov (z[0-9]+\.h), #9\.0[^\n]* -+** ... -+** st1h \1, p[0-7], \[x2\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+caller_1 (int16_t *ptr) -+{ -+ callee_1 (ptr, 1, svdup_f16 (9)); -+} -+ -+/* -+** callee_7: -+** ... -+** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x7\] -+** ... -+** st1h \1, p[0-7], \[x0\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+callee_7 (int16_t *ptr, ...) -+{ -+ va_list va; -+ svint16_t vec; -+ -+ va_start (va, ptr); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ vec = va_arg (va, svint16_t); -+ va_end (va); -+ svst1 (svptrue_b8 (), ptr, vec); -+} -+ -+/* -+** caller_7: -+** ... -+** fmov (z[0-9]+\.h), #9\.0[^\n]* -+** ... -+** st1h \1, p[0-7], \[x7\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+caller_7 (int16_t *ptr) -+{ -+ callee_7 (ptr, 1, 2, 3, 4, 5, 6, svdup_f16 (9)); -+} -+ -+/* FIXME: We should be able to get rid of the va_list object. */ -+/* -+** callee_8: -+** sub sp, sp, #([0-9]+) -+** ... -+** ldr (x[0-9]+), \[sp, \1\] -+** ... -+** ld1h (z[0-9]+\.h), (p[0-7])/z, \[\2\] -+** ... -+** st1h \3, \4, \[x0\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+callee_8 (int16_t *ptr, ...) -+{ -+ va_list va; -+ svint16_t vec; -+ -+ va_start (va, ptr); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ vec = va_arg (va, svint16_t); -+ va_end (va); -+ svst1 (svptrue_b8 (), ptr, vec); -+} -+ -+/* -+** caller_8: -+** ... -+** fmov (z[0-9]+\.h), #9\.0[^\n]* -+** ... -+** st1h \1, p[0-7], \[(x[0-9]+)\] -+** ... -+** str \2, \[sp\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+caller_8 (int16_t *ptr) -+{ -+ callee_8 (ptr, 1, 2, 3, 4, 5, 6, 7, svdup_f16 (9)); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f32.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f32.c -new file mode 100644 -index 000000000..325b0b2aa ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f32.c -@@ -0,0 +1,170 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ -+ -+#include -+#include -+ -+/* -+** callee_0: -+** ... -+** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x1\] -+** ... -+** st1w \1, \2, \[x0\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+callee_0 (int32_t *ptr, ...) -+{ -+ va_list va; -+ svint32_t vec; -+ -+ va_start (va, ptr); -+ vec = va_arg (va, svint32_t); -+ va_end (va); -+ svst1 (svptrue_b8 (), ptr, vec); -+} -+ -+/* -+** caller_0: -+** ... -+** fmov (z[0-9]+\.s), #9\.0[^\n]* -+** ... -+** st1w \1, p[0-7], \[x1\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+caller_0 (int32_t *ptr) -+{ -+ callee_0 (ptr, svdup_f32 (9)); -+} -+ -+/* -+** callee_1: -+** ... -+** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x2\] -+** ... -+** st1w \1, p[0-7], \[x0\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+callee_1 (int32_t *ptr, ...) -+{ -+ va_list va; -+ svint32_t vec; -+ -+ va_start (va, ptr); -+ va_arg (va, int); -+ vec = va_arg (va, svint32_t); -+ va_end (va); -+ svst1 (svptrue_b8 (), ptr, vec); -+} -+ -+/* -+** caller_1: -+** ... -+** fmov (z[0-9]+\.s), #9\.0[^\n]* -+** ... -+** st1w \1, p[0-7], \[x2\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+caller_1 (int32_t *ptr) -+{ -+ callee_1 (ptr, 1, svdup_f32 (9)); -+} -+ -+/* -+** callee_7: -+** ... -+** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x7\] -+** ... -+** st1w \1, p[0-7], \[x0\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+callee_7 (int32_t *ptr, ...) -+{ -+ va_list va; -+ svint32_t vec; -+ -+ va_start (va, ptr); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ vec = va_arg (va, svint32_t); -+ va_end (va); -+ svst1 (svptrue_b8 (), ptr, vec); -+} -+ -+/* -+** caller_7: -+** ... -+** fmov (z[0-9]+\.s), #9\.0[^\n]* -+** ... -+** st1w \1, p[0-7], \[x7\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+caller_7 (int32_t *ptr) -+{ -+ callee_7 (ptr, 1, 2, 3, 4, 5, 6, svdup_f32 (9)); -+} -+ -+/* FIXME: We should be able to get rid of the va_list object. */ -+/* -+** callee_8: -+** sub sp, sp, #([0-9]+) -+** ... -+** ldr (x[0-9]+), \[sp, \1\] -+** ... -+** ld1w (z[0-9]+\.s), (p[0-7])/z, \[\2\] -+** ... -+** st1w \3, \4, \[x0\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+callee_8 (int32_t *ptr, ...) -+{ -+ va_list va; -+ svint32_t vec; -+ -+ va_start (va, ptr); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ vec = va_arg (va, svint32_t); -+ va_end (va); -+ svst1 (svptrue_b8 (), ptr, vec); -+} -+ -+/* -+** caller_8: -+** ... -+** fmov (z[0-9]+\.s), #9\.0[^\n]* -+** ... -+** st1w \1, p[0-7], \[(x[0-9]+)\] -+** ... -+** str \2, \[sp\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+caller_8 (int32_t *ptr) -+{ -+ callee_8 (ptr, 1, 2, 3, 4, 5, 6, 7, svdup_f32 (9)); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f64.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f64.c -new file mode 100644 -index 000000000..07a6c707e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_f64.c -@@ -0,0 +1,170 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ -+ -+#include -+#include -+ -+/* -+** callee_0: -+** ... -+** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x1\] -+** ... -+** st1d \1, \2, \[x0\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+callee_0 (int64_t *ptr, ...) -+{ -+ va_list va; -+ svint64_t vec; -+ -+ va_start (va, ptr); -+ vec = va_arg (va, svint64_t); -+ va_end (va); -+ svst1 (svptrue_b8 (), ptr, vec); -+} -+ -+/* -+** caller_0: -+** ... -+** fmov (z[0-9]+\.d), #9\.0[^\n]* -+** ... -+** st1d \1, p[0-7], \[x1\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+caller_0 (int64_t *ptr) -+{ -+ callee_0 (ptr, svdup_f64 (9)); -+} -+ -+/* -+** callee_1: -+** ... -+** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x2\] -+** ... -+** st1d \1, p[0-7], \[x0\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+callee_1 (int64_t *ptr, ...) -+{ -+ va_list va; -+ svint64_t vec; -+ -+ va_start (va, ptr); -+ va_arg (va, int); -+ vec = va_arg (va, svint64_t); -+ va_end (va); -+ svst1 (svptrue_b8 (), ptr, vec); -+} -+ -+/* -+** caller_1: -+** ... -+** fmov (z[0-9]+\.d), #9\.0[^\n]* -+** ... -+** st1d \1, p[0-7], \[x2\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+caller_1 (int64_t *ptr) -+{ -+ callee_1 (ptr, 1, svdup_f64 (9)); -+} -+ -+/* -+** callee_7: -+** ... -+** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x7\] -+** ... -+** st1d \1, p[0-7], \[x0\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+callee_7 (int64_t *ptr, ...) -+{ -+ va_list va; -+ svint64_t vec; -+ -+ va_start (va, ptr); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ vec = va_arg (va, svint64_t); -+ va_end (va); -+ svst1 (svptrue_b8 (), ptr, vec); -+} -+ -+/* -+** caller_7: -+** ... -+** fmov (z[0-9]+\.d), #9\.0[^\n]* -+** ... -+** st1d \1, p[0-7], \[x7\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+caller_7 (int64_t *ptr) -+{ -+ callee_7 (ptr, 1, 2, 3, 4, 5, 6, svdup_f64 (9)); -+} -+ -+/* FIXME: We should be able to get rid of the va_list object. */ -+/* -+** callee_8: -+** sub sp, sp, #([0-9]+) -+** ... -+** ldr (x[0-9]+), \[sp, \1\] -+** ... -+** ld1d (z[0-9]+\.d), (p[0-7])/z, \[\2\] -+** ... -+** st1d \3, \4, \[x0\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+callee_8 (int64_t *ptr, ...) -+{ -+ va_list va; -+ svint64_t vec; -+ -+ va_start (va, ptr); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ vec = va_arg (va, svint64_t); -+ va_end (va); -+ svst1 (svptrue_b8 (), ptr, vec); -+} -+ -+/* -+** caller_8: -+** ... -+** fmov (z[0-9]+\.d), #9\.0[^\n]* -+** ... -+** st1d \1, p[0-7], \[(x[0-9]+)\] -+** ... -+** str \2, \[sp\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+caller_8 (int64_t *ptr) -+{ -+ callee_8 (ptr, 1, 2, 3, 4, 5, 6, 7, svdup_f64 (9)); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s16.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s16.c -new file mode 100644 -index 000000000..173063833 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s16.c -@@ -0,0 +1,170 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ -+ -+#include -+#include -+ -+/* -+** callee_0: -+** ... -+** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x1\] -+** ... -+** st1h \1, \2, \[x0\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+callee_0 (int16_t *ptr, ...) -+{ -+ va_list va; -+ svint16_t vec; -+ -+ va_start (va, ptr); -+ vec = va_arg (va, svint16_t); -+ va_end (va); -+ svst1 (svptrue_b8 (), ptr, vec); -+} -+ -+/* -+** caller_0: -+** ... -+** mov (z[0-9]+\.h), #42 -+** ... -+** st1h \1, p[0-7], \[x1\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+caller_0 (int16_t *ptr) -+{ -+ callee_0 (ptr, svdup_s16 (42)); -+} -+ -+/* -+** callee_1: -+** ... -+** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x2\] -+** ... -+** st1h \1, p[0-7], \[x0\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+callee_1 (int16_t *ptr, ...) -+{ -+ va_list va; -+ svint16_t vec; -+ -+ va_start (va, ptr); -+ va_arg (va, int); -+ vec = va_arg (va, svint16_t); -+ va_end (va); -+ svst1 (svptrue_b8 (), ptr, vec); -+} -+ -+/* -+** caller_1: -+** ... -+** mov (z[0-9]+\.h), #42 -+** ... -+** st1h \1, p[0-7], \[x2\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+caller_1 (int16_t *ptr) -+{ -+ callee_1 (ptr, 1, svdup_s16 (42)); -+} -+ -+/* -+** callee_7: -+** ... -+** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x7\] -+** ... -+** st1h \1, p[0-7], \[x0\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+callee_7 (int16_t *ptr, ...) -+{ -+ va_list va; -+ svint16_t vec; -+ -+ va_start (va, ptr); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ vec = va_arg (va, svint16_t); -+ va_end (va); -+ svst1 (svptrue_b8 (), ptr, vec); -+} -+ -+/* -+** caller_7: -+** ... -+** mov (z[0-9]+\.h), #42 -+** ... -+** st1h \1, p[0-7], \[x7\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+caller_7 (int16_t *ptr) -+{ -+ callee_7 (ptr, 1, 2, 3, 4, 5, 6, svdup_s16 (42)); -+} -+ -+/* FIXME: We should be able to get rid of the va_list object. */ -+/* -+** callee_8: -+** sub sp, sp, #([0-9]+) -+** ... -+** ldr (x[0-9]+), \[sp, \1\] -+** ... -+** ld1h (z[0-9]+\.h), (p[0-7])/z, \[\2\] -+** ... -+** st1h \3, \4, \[x0\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+callee_8 (int16_t *ptr, ...) -+{ -+ va_list va; -+ svint16_t vec; -+ -+ va_start (va, ptr); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ vec = va_arg (va, svint16_t); -+ va_end (va); -+ svst1 (svptrue_b8 (), ptr, vec); -+} -+ -+/* -+** caller_8: -+** ... -+** mov (z[0-9]+\.h), #42 -+** ... -+** st1h \1, p[0-7], \[(x[0-9]+)\] -+** ... -+** str \2, \[sp\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+caller_8 (int16_t *ptr) -+{ -+ callee_8 (ptr, 1, 2, 3, 4, 5, 6, 7, svdup_s16 (42)); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s32.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s32.c -new file mode 100644 -index 000000000..d93db8fc8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s32.c -@@ -0,0 +1,170 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ -+ -+#include -+#include -+ -+/* -+** callee_0: -+** ... -+** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x1\] -+** ... -+** st1w \1, \2, \[x0\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+callee_0 (int32_t *ptr, ...) -+{ -+ va_list va; -+ svint32_t vec; -+ -+ va_start (va, ptr); -+ vec = va_arg (va, svint32_t); -+ va_end (va); -+ svst1 (svptrue_b8 (), ptr, vec); -+} -+ -+/* -+** caller_0: -+** ... -+** mov (z[0-9]+\.s), #42 -+** ... -+** st1w \1, p[0-7], \[x1\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+caller_0 (int32_t *ptr) -+{ -+ callee_0 (ptr, svdup_s32 (42)); -+} -+ -+/* -+** callee_1: -+** ... -+** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x2\] -+** ... -+** st1w \1, p[0-7], \[x0\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+callee_1 (int32_t *ptr, ...) -+{ -+ va_list va; -+ svint32_t vec; -+ -+ va_start (va, ptr); -+ va_arg (va, int); -+ vec = va_arg (va, svint32_t); -+ va_end (va); -+ svst1 (svptrue_b8 (), ptr, vec); -+} -+ -+/* -+** caller_1: -+** ... -+** mov (z[0-9]+\.s), #42 -+** ... -+** st1w \1, p[0-7], \[x2\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+caller_1 (int32_t *ptr) -+{ -+ callee_1 (ptr, 1, svdup_s32 (42)); -+} -+ -+/* -+** callee_7: -+** ... -+** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x7\] -+** ... -+** st1w \1, p[0-7], \[x0\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+callee_7 (int32_t *ptr, ...) -+{ -+ va_list va; -+ svint32_t vec; -+ -+ va_start (va, ptr); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ vec = va_arg (va, svint32_t); -+ va_end (va); -+ svst1 (svptrue_b8 (), ptr, vec); -+} -+ -+/* -+** caller_7: -+** ... -+** mov (z[0-9]+\.s), #42 -+** ... -+** st1w \1, p[0-7], \[x7\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+caller_7 (int32_t *ptr) -+{ -+ callee_7 (ptr, 1, 2, 3, 4, 5, 6, svdup_s32 (42)); -+} -+ -+/* FIXME: We should be able to get rid of the va_list object. */ -+/* -+** callee_8: -+** sub sp, sp, #([0-9]+) -+** ... -+** ldr (x[0-9]+), \[sp, \1\] -+** ... -+** ld1w (z[0-9]+\.s), (p[0-7])/z, \[\2\] -+** ... -+** st1w \3, \4, \[x0\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+callee_8 (int32_t *ptr, ...) -+{ -+ va_list va; -+ svint32_t vec; -+ -+ va_start (va, ptr); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ vec = va_arg (va, svint32_t); -+ va_end (va); -+ svst1 (svptrue_b8 (), ptr, vec); -+} -+ -+/* -+** caller_8: -+** ... -+** mov (z[0-9]+\.s), #42 -+** ... -+** st1w \1, p[0-7], \[(x[0-9]+)\] -+** ... -+** str \2, \[sp\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+caller_8 (int32_t *ptr) -+{ -+ callee_8 (ptr, 1, 2, 3, 4, 5, 6, 7, svdup_s32 (42)); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s64.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s64.c -new file mode 100644 -index 000000000..b8c77455d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s64.c -@@ -0,0 +1,170 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ -+ -+#include -+#include -+ -+/* -+** callee_0: -+** ... -+** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x1\] -+** ... -+** st1d \1, \2, \[x0\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+callee_0 (int64_t *ptr, ...) -+{ -+ va_list va; -+ svint64_t vec; -+ -+ va_start (va, ptr); -+ vec = va_arg (va, svint64_t); -+ va_end (va); -+ svst1 (svptrue_b8 (), ptr, vec); -+} -+ -+/* -+** caller_0: -+** ... -+** mov (z[0-9]+\.d), #42 -+** ... -+** st1d \1, p[0-7], \[x1\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+caller_0 (int64_t *ptr) -+{ -+ callee_0 (ptr, svdup_s64 (42)); -+} -+ -+/* -+** callee_1: -+** ... -+** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x2\] -+** ... -+** st1d \1, p[0-7], \[x0\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+callee_1 (int64_t *ptr, ...) -+{ -+ va_list va; -+ svint64_t vec; -+ -+ va_start (va, ptr); -+ va_arg (va, int); -+ vec = va_arg (va, svint64_t); -+ va_end (va); -+ svst1 (svptrue_b8 (), ptr, vec); -+} -+ -+/* -+** caller_1: -+** ... -+** mov (z[0-9]+\.d), #42 -+** ... -+** st1d \1, p[0-7], \[x2\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+caller_1 (int64_t *ptr) -+{ -+ callee_1 (ptr, 1, svdup_s64 (42)); -+} -+ -+/* -+** callee_7: -+** ... -+** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x7\] -+** ... -+** st1d \1, p[0-7], \[x0\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+callee_7 (int64_t *ptr, ...) -+{ -+ va_list va; -+ svint64_t vec; -+ -+ va_start (va, ptr); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ vec = va_arg (va, svint64_t); -+ va_end (va); -+ svst1 (svptrue_b8 (), ptr, vec); -+} -+ -+/* -+** caller_7: -+** ... -+** mov (z[0-9]+\.d), #42 -+** ... -+** st1d \1, p[0-7], \[x7\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+caller_7 (int64_t *ptr) -+{ -+ callee_7 (ptr, 1, 2, 3, 4, 5, 6, svdup_s64 (42)); -+} -+ -+/* FIXME: We should be able to get rid of the va_list object. */ -+/* -+** callee_8: -+** sub sp, sp, #([0-9]+) -+** ... -+** ldr (x[0-9]+), \[sp, \1\] -+** ... -+** ld1d (z[0-9]+\.d), (p[0-7])/z, \[\2\] -+** ... -+** st1d \3, \4, \[x0\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+callee_8 (int64_t *ptr, ...) -+{ -+ va_list va; -+ svint64_t vec; -+ -+ va_start (va, ptr); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ vec = va_arg (va, svint64_t); -+ va_end (va); -+ svst1 (svptrue_b8 (), ptr, vec); -+} -+ -+/* -+** caller_8: -+** ... -+** mov (z[0-9]+\.d), #42 -+** ... -+** st1d \1, p[0-7], \[(x[0-9]+)\] -+** ... -+** str \2, \[sp\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+caller_8 (int64_t *ptr) -+{ -+ callee_8 (ptr, 1, 2, 3, 4, 5, 6, 7, svdup_s64 (42)); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s8.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s8.c -new file mode 100644 -index 000000000..de7cbe37d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_s8.c -@@ -0,0 +1,170 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ -+ -+#include -+#include -+ -+/* -+** callee_0: -+** ... -+** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x1\] -+** ... -+** st1b \1, \2, \[x0\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+callee_0 (int8_t *ptr, ...) -+{ -+ va_list va; -+ svint8_t vec; -+ -+ va_start (va, ptr); -+ vec = va_arg (va, svint8_t); -+ va_end (va); -+ svst1 (svptrue_b8 (), ptr, vec); -+} -+ -+/* -+** caller_0: -+** ... -+** mov (z[0-9]+\.b), #42 -+** ... -+** st1b \1, p[0-7], \[x1\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+caller_0 (int8_t *ptr) -+{ -+ callee_0 (ptr, svdup_s8 (42)); -+} -+ -+/* -+** callee_1: -+** ... -+** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x2\] -+** ... -+** st1b \1, p[0-7], \[x0\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+callee_1 (int8_t *ptr, ...) -+{ -+ va_list va; -+ svint8_t vec; -+ -+ va_start (va, ptr); -+ va_arg (va, int); -+ vec = va_arg (va, svint8_t); -+ va_end (va); -+ svst1 (svptrue_b8 (), ptr, vec); -+} -+ -+/* -+** caller_1: -+** ... -+** mov (z[0-9]+\.b), #42 -+** ... -+** st1b \1, p[0-7], \[x2\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+caller_1 (int8_t *ptr) -+{ -+ callee_1 (ptr, 1, svdup_s8 (42)); -+} -+ -+/* -+** callee_7: -+** ... -+** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x7\] -+** ... -+** st1b \1, p[0-7], \[x0\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+callee_7 (int8_t *ptr, ...) -+{ -+ va_list va; -+ svint8_t vec; -+ -+ va_start (va, ptr); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ vec = va_arg (va, svint8_t); -+ va_end (va); -+ svst1 (svptrue_b8 (), ptr, vec); -+} -+ -+/* -+** caller_7: -+** ... -+** mov (z[0-9]+\.b), #42 -+** ... -+** st1b \1, p[0-7], \[x7\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+caller_7 (int8_t *ptr) -+{ -+ callee_7 (ptr, 1, 2, 3, 4, 5, 6, svdup_s8 (42)); -+} -+ -+/* FIXME: We should be able to get rid of the va_list object. */ -+/* -+** callee_8: -+** sub sp, sp, #([0-9]+) -+** ... -+** ldr (x[0-9]+), \[sp, \1\] -+** ... -+** ld1b (z[0-9]+\.b), (p[0-7])/z, \[\2\] -+** ... -+** st1b \3, \4, \[x0\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+callee_8 (int8_t *ptr, ...) -+{ -+ va_list va; -+ svint8_t vec; -+ -+ va_start (va, ptr); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ vec = va_arg (va, svint8_t); -+ va_end (va); -+ svst1 (svptrue_b8 (), ptr, vec); -+} -+ -+/* -+** caller_8: -+** ... -+** mov (z[0-9]+\.b), #42 -+** ... -+** st1b \1, p[0-7], \[(x[0-9]+)\] -+** ... -+** str \2, \[sp\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+caller_8 (int8_t *ptr) -+{ -+ callee_8 (ptr, 1, 2, 3, 4, 5, 6, 7, svdup_s8 (42)); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u16.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u16.c -new file mode 100644 -index 000000000..59c9ca7db ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u16.c -@@ -0,0 +1,170 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ -+ -+#include -+#include -+ -+/* -+** callee_0: -+** ... -+** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x1\] -+** ... -+** st1h \1, \2, \[x0\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+callee_0 (int16_t *ptr, ...) -+{ -+ va_list va; -+ svint16_t vec; -+ -+ va_start (va, ptr); -+ vec = va_arg (va, svint16_t); -+ va_end (va); -+ svst1 (svptrue_b8 (), ptr, vec); -+} -+ -+/* -+** caller_0: -+** ... -+** mov (z[0-9]+\.h), #42 -+** ... -+** st1h \1, p[0-7], \[x1\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+caller_0 (int16_t *ptr) -+{ -+ callee_0 (ptr, svdup_u16 (42)); -+} -+ -+/* -+** callee_1: -+** ... -+** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x2\] -+** ... -+** st1h \1, p[0-7], \[x0\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+callee_1 (int16_t *ptr, ...) -+{ -+ va_list va; -+ svint16_t vec; -+ -+ va_start (va, ptr); -+ va_arg (va, int); -+ vec = va_arg (va, svint16_t); -+ va_end (va); -+ svst1 (svptrue_b8 (), ptr, vec); -+} -+ -+/* -+** caller_1: -+** ... -+** mov (z[0-9]+\.h), #42 -+** ... -+** st1h \1, p[0-7], \[x2\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+caller_1 (int16_t *ptr) -+{ -+ callee_1 (ptr, 1, svdup_u16 (42)); -+} -+ -+/* -+** callee_7: -+** ... -+** ld1h (z[0-9]+\.h), (p[0-7])/z, \[x7\] -+** ... -+** st1h \1, p[0-7], \[x0\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+callee_7 (int16_t *ptr, ...) -+{ -+ va_list va; -+ svint16_t vec; -+ -+ va_start (va, ptr); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ vec = va_arg (va, svint16_t); -+ va_end (va); -+ svst1 (svptrue_b8 (), ptr, vec); -+} -+ -+/* -+** caller_7: -+** ... -+** mov (z[0-9]+\.h), #42 -+** ... -+** st1h \1, p[0-7], \[x7\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+caller_7 (int16_t *ptr) -+{ -+ callee_7 (ptr, 1, 2, 3, 4, 5, 6, svdup_u16 (42)); -+} -+ -+/* FIXME: We should be able to get rid of the va_list object. */ -+/* -+** callee_8: -+** sub sp, sp, #([0-9]+) -+** ... -+** ldr (x[0-9]+), \[sp, \1\] -+** ... -+** ld1h (z[0-9]+\.h), (p[0-7])/z, \[\2\] -+** ... -+** st1h \3, \4, \[x0\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+callee_8 (int16_t *ptr, ...) -+{ -+ va_list va; -+ svint16_t vec; -+ -+ va_start (va, ptr); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ vec = va_arg (va, svint16_t); -+ va_end (va); -+ svst1 (svptrue_b8 (), ptr, vec); -+} -+ -+/* -+** caller_8: -+** ... -+** mov (z[0-9]+\.h), #42 -+** ... -+** st1h \1, p[0-7], \[(x[0-9]+)\] -+** ... -+** str \2, \[sp\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+caller_8 (int16_t *ptr) -+{ -+ callee_8 (ptr, 1, 2, 3, 4, 5, 6, 7, svdup_u16 (42)); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u32.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u32.c -new file mode 100644 -index 000000000..3050ad5f6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u32.c -@@ -0,0 +1,170 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ -+ -+#include -+#include -+ -+/* -+** callee_0: -+** ... -+** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x1\] -+** ... -+** st1w \1, \2, \[x0\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+callee_0 (int32_t *ptr, ...) -+{ -+ va_list va; -+ svint32_t vec; -+ -+ va_start (va, ptr); -+ vec = va_arg (va, svint32_t); -+ va_end (va); -+ svst1 (svptrue_b8 (), ptr, vec); -+} -+ -+/* -+** caller_0: -+** ... -+** mov (z[0-9]+\.s), #42 -+** ... -+** st1w \1, p[0-7], \[x1\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+caller_0 (int32_t *ptr) -+{ -+ callee_0 (ptr, svdup_u32 (42)); -+} -+ -+/* -+** callee_1: -+** ... -+** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x2\] -+** ... -+** st1w \1, p[0-7], \[x0\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+callee_1 (int32_t *ptr, ...) -+{ -+ va_list va; -+ svint32_t vec; -+ -+ va_start (va, ptr); -+ va_arg (va, int); -+ vec = va_arg (va, svint32_t); -+ va_end (va); -+ svst1 (svptrue_b8 (), ptr, vec); -+} -+ -+/* -+** caller_1: -+** ... -+** mov (z[0-9]+\.s), #42 -+** ... -+** st1w \1, p[0-7], \[x2\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+caller_1 (int32_t *ptr) -+{ -+ callee_1 (ptr, 1, svdup_u32 (42)); -+} -+ -+/* -+** callee_7: -+** ... -+** ld1w (z[0-9]+\.s), (p[0-7])/z, \[x7\] -+** ... -+** st1w \1, p[0-7], \[x0\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+callee_7 (int32_t *ptr, ...) -+{ -+ va_list va; -+ svint32_t vec; -+ -+ va_start (va, ptr); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ vec = va_arg (va, svint32_t); -+ va_end (va); -+ svst1 (svptrue_b8 (), ptr, vec); -+} -+ -+/* -+** caller_7: -+** ... -+** mov (z[0-9]+\.s), #42 -+** ... -+** st1w \1, p[0-7], \[x7\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+caller_7 (int32_t *ptr) -+{ -+ callee_7 (ptr, 1, 2, 3, 4, 5, 6, svdup_u32 (42)); -+} -+ -+/* FIXME: We should be able to get rid of the va_list object. */ -+/* -+** callee_8: -+** sub sp, sp, #([0-9]+) -+** ... -+** ldr (x[0-9]+), \[sp, \1\] -+** ... -+** ld1w (z[0-9]+\.s), (p[0-7])/z, \[\2\] -+** ... -+** st1w \3, \4, \[x0\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+callee_8 (int32_t *ptr, ...) -+{ -+ va_list va; -+ svint32_t vec; -+ -+ va_start (va, ptr); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ vec = va_arg (va, svint32_t); -+ va_end (va); -+ svst1 (svptrue_b8 (), ptr, vec); -+} -+ -+/* -+** caller_8: -+** ... -+** mov (z[0-9]+\.s), #42 -+** ... -+** st1w \1, p[0-7], \[(x[0-9]+)\] -+** ... -+** str \2, \[sp\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+caller_8 (int32_t *ptr) -+{ -+ callee_8 (ptr, 1, 2, 3, 4, 5, 6, 7, svdup_u32 (42)); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u64.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u64.c -new file mode 100644 -index 000000000..94322a34c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u64.c -@@ -0,0 +1,170 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ -+ -+#include -+#include -+ -+/* -+** callee_0: -+** ... -+** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x1\] -+** ... -+** st1d \1, \2, \[x0\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+callee_0 (int64_t *ptr, ...) -+{ -+ va_list va; -+ svint64_t vec; -+ -+ va_start (va, ptr); -+ vec = va_arg (va, svint64_t); -+ va_end (va); -+ svst1 (svptrue_b8 (), ptr, vec); -+} -+ -+/* -+** caller_0: -+** ... -+** mov (z[0-9]+\.d), #42 -+** ... -+** st1d \1, p[0-7], \[x1\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+caller_0 (int64_t *ptr) -+{ -+ callee_0 (ptr, svdup_u64 (42)); -+} -+ -+/* -+** callee_1: -+** ... -+** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x2\] -+** ... -+** st1d \1, p[0-7], \[x0\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+callee_1 (int64_t *ptr, ...) -+{ -+ va_list va; -+ svint64_t vec; -+ -+ va_start (va, ptr); -+ va_arg (va, int); -+ vec = va_arg (va, svint64_t); -+ va_end (va); -+ svst1 (svptrue_b8 (), ptr, vec); -+} -+ -+/* -+** caller_1: -+** ... -+** mov (z[0-9]+\.d), #42 -+** ... -+** st1d \1, p[0-7], \[x2\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+caller_1 (int64_t *ptr) -+{ -+ callee_1 (ptr, 1, svdup_u64 (42)); -+} -+ -+/* -+** callee_7: -+** ... -+** ld1d (z[0-9]+\.d), (p[0-7])/z, \[x7\] -+** ... -+** st1d \1, p[0-7], \[x0\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+callee_7 (int64_t *ptr, ...) -+{ -+ va_list va; -+ svint64_t vec; -+ -+ va_start (va, ptr); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ vec = va_arg (va, svint64_t); -+ va_end (va); -+ svst1 (svptrue_b8 (), ptr, vec); -+} -+ -+/* -+** caller_7: -+** ... -+** mov (z[0-9]+\.d), #42 -+** ... -+** st1d \1, p[0-7], \[x7\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+caller_7 (int64_t *ptr) -+{ -+ callee_7 (ptr, 1, 2, 3, 4, 5, 6, svdup_u64 (42)); -+} -+ -+/* FIXME: We should be able to get rid of the va_list object. */ -+/* -+** callee_8: -+** sub sp, sp, #([0-9]+) -+** ... -+** ldr (x[0-9]+), \[sp, \1\] -+** ... -+** ld1d (z[0-9]+\.d), (p[0-7])/z, \[\2\] -+** ... -+** st1d \3, \4, \[x0\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+callee_8 (int64_t *ptr, ...) -+{ -+ va_list va; -+ svint64_t vec; -+ -+ va_start (va, ptr); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ vec = va_arg (va, svint64_t); -+ va_end (va); -+ svst1 (svptrue_b8 (), ptr, vec); -+} -+ -+/* -+** caller_8: -+** ... -+** mov (z[0-9]+\.d), #42 -+** ... -+** st1d \1, p[0-7], \[(x[0-9]+)\] -+** ... -+** str \2, \[sp\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+caller_8 (int64_t *ptr) -+{ -+ callee_8 (ptr, 1, 2, 3, 4, 5, 6, 7, svdup_u64 (42)); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u8.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u8.c -new file mode 100644 -index 000000000..cf8ac2171 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_2_u8.c -@@ -0,0 +1,170 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -fno-stack-clash-protection -g" } */ -+/* { dg-final { check-function-bodies "**" "" { target lp64 } } } */ -+ -+#include -+#include -+ -+/* -+** callee_0: -+** ... -+** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x1\] -+** ... -+** st1b \1, \2, \[x0\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+callee_0 (int8_t *ptr, ...) -+{ -+ va_list va; -+ svint8_t vec; -+ -+ va_start (va, ptr); -+ vec = va_arg (va, svint8_t); -+ va_end (va); -+ svst1 (svptrue_b8 (), ptr, vec); -+} -+ -+/* -+** caller_0: -+** ... -+** mov (z[0-9]+\.b), #42 -+** ... -+** st1b \1, p[0-7], \[x1\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+caller_0 (int8_t *ptr) -+{ -+ callee_0 (ptr, svdup_u8 (42)); -+} -+ -+/* -+** callee_1: -+** ... -+** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x2\] -+** ... -+** st1b \1, p[0-7], \[x0\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+callee_1 (int8_t *ptr, ...) -+{ -+ va_list va; -+ svint8_t vec; -+ -+ va_start (va, ptr); -+ va_arg (va, int); -+ vec = va_arg (va, svint8_t); -+ va_end (va); -+ svst1 (svptrue_b8 (), ptr, vec); -+} -+ -+/* -+** caller_1: -+** ... -+** mov (z[0-9]+\.b), #42 -+** ... -+** st1b \1, p[0-7], \[x2\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+caller_1 (int8_t *ptr) -+{ -+ callee_1 (ptr, 1, svdup_u8 (42)); -+} -+ -+/* -+** callee_7: -+** ... -+** ld1b (z[0-9]+\.b), (p[0-7])/z, \[x7\] -+** ... -+** st1b \1, p[0-7], \[x0\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+callee_7 (int8_t *ptr, ...) -+{ -+ va_list va; -+ svint8_t vec; -+ -+ va_start (va, ptr); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ vec = va_arg (va, svint8_t); -+ va_end (va); -+ svst1 (svptrue_b8 (), ptr, vec); -+} -+ -+/* -+** caller_7: -+** ... -+** mov (z[0-9]+\.b), #42 -+** ... -+** st1b \1, p[0-7], \[x7\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+caller_7 (int8_t *ptr) -+{ -+ callee_7 (ptr, 1, 2, 3, 4, 5, 6, svdup_u8 (42)); -+} -+ -+/* FIXME: We should be able to get rid of the va_list object. */ -+/* -+** callee_8: -+** sub sp, sp, #([0-9]+) -+** ... -+** ldr (x[0-9]+), \[sp, \1\] -+** ... -+** ld1b (z[0-9]+\.b), (p[0-7])/z, \[\2\] -+** ... -+** st1b \3, \4, \[x0\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+callee_8 (int8_t *ptr, ...) -+{ -+ va_list va; -+ svint8_t vec; -+ -+ va_start (va, ptr); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ va_arg (va, int); -+ vec = va_arg (va, svint8_t); -+ va_end (va); -+ svst1 (svptrue_b8 (), ptr, vec); -+} -+ -+/* -+** caller_8: -+** ... -+** mov (z[0-9]+\.b), #42 -+** ... -+** st1b \1, p[0-7], \[(x[0-9]+)\] -+** ... -+** str \2, \[sp\] -+** ... -+** ret -+*/ -+void __attribute__((noipa)) -+caller_8 (int8_t *ptr) -+{ -+ callee_8 (ptr, 1, 2, 3, 4, 5, 6, 7, svdup_u8 (42)); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_3_nosc.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_3_nosc.c -new file mode 100644 -index 000000000..cea69cc88 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_3_nosc.c -@@ -0,0 +1,75 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O0 -g" } */ -+ -+#include -+#include -+ -+void __attribute__((noipa)) -+callee (int foo, ...) -+{ -+ va_list va; -+ svbool_t pg, p; -+ svint8_t s8; -+ svuint16x4_t u16; -+ svfloat32x3_t f32; -+ svint64x2_t s64; -+ -+ va_start (va, foo); -+ p = va_arg (va, svbool_t); -+ s8 = va_arg (va, svint8_t); -+ u16 = va_arg (va, svuint16x4_t); -+ f32 = va_arg (va, svfloat32x3_t); -+ s64 = va_arg (va, svint64x2_t); -+ -+ pg = svptrue_b8 (); -+ -+ if (svptest_any (pg, sveor_z (pg, p, svptrue_pat_b8 (SV_VL7)))) -+ __builtin_abort (); -+ -+ if (svptest_any (pg, svcmpne (pg, s8, svindex_s8 (1, 2)))) -+ __builtin_abort (); -+ -+ if (svptest_any (pg, svcmpne (pg, svget4 (u16, 0), svindex_u16 (2, 3)))) -+ __builtin_abort (); -+ -+ if (svptest_any (pg, svcmpne (pg, svget4 (u16, 1), svindex_u16 (3, 4)))) -+ __builtin_abort (); -+ -+ if (svptest_any (pg, svcmpne (pg, svget4 (u16, 2), svindex_u16 (4, 5)))) -+ __builtin_abort (); -+ -+ if (svptest_any (pg, svcmpne (pg, svget4 (u16, 3), svindex_u16 (5, 6)))) -+ __builtin_abort (); -+ -+ if (svptest_any (pg, svcmpne (pg, svget3 (f32, 0), svdup_f32 (1.0)))) -+ __builtin_abort (); -+ -+ if (svptest_any (pg, svcmpne (pg, svget3 (f32, 1), svdup_f32 (2.0)))) -+ __builtin_abort (); -+ -+ if (svptest_any (pg, svcmpne (pg, svget3 (f32, 2), svdup_f32 (3.0)))) -+ __builtin_abort (); -+ -+ if (svptest_any (pg, svcmpne (pg, svget2 (s64, 0), svindex_s64 (6, 7)))) -+ __builtin_abort (); -+ -+ if (svptest_any (pg, svcmpne (pg, svget2 (s64, 1), svindex_s64 (7, 8)))) -+ __builtin_abort (); -+} -+ -+int __attribute__((noipa)) -+main (void) -+{ -+ callee (100, -+ svptrue_pat_b8 (SV_VL7), -+ svindex_s8 (1, 2), -+ svcreate4 (svindex_u16 (2, 3), -+ svindex_u16 (3, 4), -+ svindex_u16 (4, 5), -+ svindex_u16 (5, 6)), -+ svcreate3 (svdup_f32 (1.0), -+ svdup_f32 (2.0), -+ svdup_f32 (3.0)), -+ svcreate2 (svindex_s64 (6, 7), -+ svindex_s64 (7, 8))); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_3_sc.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_3_sc.c -new file mode 100644 -index 000000000..b939aa5ea ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/varargs_3_sc.c -@@ -0,0 +1,75 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O0 -fstack-clash-protection -g" } */ -+ -+#include -+#include -+ -+void __attribute__((noipa)) -+callee (int foo, ...) -+{ -+ va_list va; -+ svbool_t pg, p; -+ svint8_t s8; -+ svuint16x4_t u16; -+ svfloat32x3_t f32; -+ svint64x2_t s64; -+ -+ va_start (va, foo); -+ p = va_arg (va, svbool_t); -+ s8 = va_arg (va, svint8_t); -+ u16 = va_arg (va, svuint16x4_t); -+ f32 = va_arg (va, svfloat32x3_t); -+ s64 = va_arg (va, svint64x2_t); -+ -+ pg = svptrue_b8 (); -+ -+ if (svptest_any (pg, sveor_z (pg, p, svptrue_pat_b8 (SV_VL7)))) -+ __builtin_abort (); -+ -+ if (svptest_any (pg, svcmpne (pg, s8, svindex_s8 (1, 2)))) -+ __builtin_abort (); -+ -+ if (svptest_any (pg, svcmpne (pg, svget4 (u16, 0), svindex_u16 (2, 3)))) -+ __builtin_abort (); -+ -+ if (svptest_any (pg, svcmpne (pg, svget4 (u16, 1), svindex_u16 (3, 4)))) -+ __builtin_abort (); -+ -+ if (svptest_any (pg, svcmpne (pg, svget4 (u16, 2), svindex_u16 (4, 5)))) -+ __builtin_abort (); -+ -+ if (svptest_any (pg, svcmpne (pg, svget4 (u16, 3), svindex_u16 (5, 6)))) -+ __builtin_abort (); -+ -+ if (svptest_any (pg, svcmpne (pg, svget3 (f32, 0), svdup_f32 (1.0)))) -+ __builtin_abort (); -+ -+ if (svptest_any (pg, svcmpne (pg, svget3 (f32, 1), svdup_f32 (2.0)))) -+ __builtin_abort (); -+ -+ if (svptest_any (pg, svcmpne (pg, svget3 (f32, 2), svdup_f32 (3.0)))) -+ __builtin_abort (); -+ -+ if (svptest_any (pg, svcmpne (pg, svget2 (s64, 0), svindex_s64 (6, 7)))) -+ __builtin_abort (); -+ -+ if (svptest_any (pg, svcmpne (pg, svget2 (s64, 1), svindex_s64 (7, 8)))) -+ __builtin_abort (); -+} -+ -+int __attribute__((noipa)) -+main (void) -+{ -+ callee (100, -+ svptrue_pat_b8 (SV_VL7), -+ svindex_s8 (1, 2), -+ svcreate4 (svindex_u16 (2, 3), -+ svindex_u16 (3, 4), -+ svindex_u16 (4, 5), -+ svindex_u16 (5, 6)), -+ svcreate3 (svdup_f32 (1.0), -+ svdup_f32 (2.0), -+ svdup_f32 (3.0)), -+ svcreate2 (svindex_s64 (6, 7), -+ svindex_s64 (7, 8))); -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/vpcs_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/vpcs_1.c -new file mode 100644 -index 000000000..d9f4e6c41 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/vpcs_1.c -@@ -0,0 +1,6 @@ -+/* { dg-do compile } */ -+ -+__attribute__ ((aarch64_vector_pcs)) void f1 (__SVBool_t); /* { dg-error {the 'aarch64_vector_pcs' attribute cannot be applied to an SVE function type} } */ -+__attribute__ ((aarch64_vector_pcs)) void f2 (__SVInt8_t s8) {} /* { dg-error {the 'aarch64_vector_pcs' attribute cannot be applied to an SVE function type} } */ -+__attribute__ ((aarch64_vector_pcs)) void (*f3) (__SVInt16_t); /* { dg-error {the 'aarch64_vector_pcs' attribute cannot be applied to an SVE function type} } */ -+typedef __attribute__ ((aarch64_vector_pcs)) void (*f4) (__SVInt32_t); /* { dg-error {the 'aarch64_vector_pcs' attribute cannot be applied to an SVE function type} } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_1.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_1.c -index a064c337b..156d04ae5 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_1.c -@@ -25,3 +25,4 @@ foo (void) - /* We should use an induction that starts at -5, with only the last - 7 elements of the first iteration being active. */ - /* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #-5, #5\n} } } */ -+/* { dg-final { scan-assembler {\tptrue\t(p[0-9]+\.b), vl1\n.*\tnot\tp[0-7]\.b, p[0-7]/z, \1\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_2.c b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_2.c -index f2113be90..e792cdf2c 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_2.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/peel_ind_2.c -@@ -20,3 +20,4 @@ foo (void) - /* { dg-final { scan-assembler {\t(adrp|adr)\tx[0-9]+, x\n} } } */ - /* We should unroll the loop three times. */ - /* { dg-final { scan-assembler-times "\tst1w\t" 3 } } */ -+/* { dg-final { scan-assembler {\tptrue\t(p[0-9]+)\.s, vl7\n.*\teor\tp[0-7]\.b, (p[0-7])/z, (\1\.b, \2\.b|\2\.b, \1\.b)\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/revb_1.c b/gcc/testsuite/gcc.target/aarch64/sve/revb_1.c -index 1a3d9b4ea..9cf2f27c8 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/revb_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/revb_1.c -@@ -1,9 +1,7 @@ - /* { dg-do assemble { target aarch64_asm_sve_ok } } */ --/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */ -+/* { dg-options "-O -msve-vector-bits=256 --save-temps -mlittle-endian" } */ - --#include -- --typedef int8_t vnx16qi __attribute__((vector_size (32))); -+typedef __INT8_TYPE__ vnx16qi __attribute__((vector_size (32))); - - #define MASK_2(X, Y) (X) ^ (Y), (X + 1) ^ (Y) - #define MASK_4(X, Y) MASK_2 (X, Y), MASK_2 (X + 2, Y) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/revb_2.c b/gcc/testsuite/gcc.target/aarch64/sve/revb_2.c -new file mode 100644 -index 000000000..389739cc8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/revb_2.c -@@ -0,0 +1,10 @@ -+/* { dg-do assemble { target aarch64_asm_sve_ok } } */ -+/* { dg-options "-O -msve-vector-bits=256 --save-temps -mbig-endian" } */ -+ -+#include "revb_1.c" -+ -+/* { dg-final { scan-assembler-not {\ttbl\t} } } */ -+ -+/* { dg-final { scan-assembler-times {\trevb\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d} 1 } } */ -+/* { dg-final { scan-assembler-times {\trevb\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s} 1 } } */ -+/* { dg-final { scan-assembler-times {\trevb\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h} 1 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/revh_1.c b/gcc/testsuite/gcc.target/aarch64/sve/revh_1.c -index 76145812b..28a0399b9 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/revh_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/revh_1.c -@@ -1,9 +1,7 @@ - /* { dg-do assemble { target aarch64_asm_sve_ok } } */ --/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */ -+/* { dg-options "-O -msve-vector-bits=256 --save-temps -mlittle-endian" } */ - --#include -- --typedef uint16_t vnx8hi __attribute__((vector_size (32))); -+typedef __UINT16_TYPE__ vnx8hi __attribute__((vector_size (32))); - typedef _Float16 vnx8hf __attribute__((vector_size (32))); - - #define MASK_2(X, Y) (X) ^ (Y), (X + 1) ^ (Y) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/revh_2.c b/gcc/testsuite/gcc.target/aarch64/sve/revh_2.c -new file mode 100644 -index 000000000..e821b6402 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/revh_2.c -@@ -0,0 +1,9 @@ -+/* { dg-do assemble { target aarch64_asm_sve_ok } } */ -+/* { dg-options "-O -msve-vector-bits=256 --save-temps -mbig-endian" } */ -+ -+#include "revh_1.c" -+ -+/* { dg-final { scan-assembler-not {\ttbl\t} } } */ -+ -+/* { dg-final { scan-assembler-times {\trevh\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d} 2 } } */ -+/* { dg-final { scan-assembler-times {\trevh\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s} 2 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/revw_1.c b/gcc/testsuite/gcc.target/aarch64/sve/revw_1.c -index 8ac68b782..de926753c 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/revw_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/revw_1.c -@@ -1,9 +1,7 @@ - /* { dg-do assemble { target aarch64_asm_sve_ok } } */ --/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */ -+/* { dg-options "-O -msve-vector-bits=256 --save-temps -mlittle-endian" } */ - --#include -- --typedef uint32_t vnx4si __attribute__((vector_size (32))); -+typedef __UINT32_TYPE__ vnx4si __attribute__((vector_size (32))); - typedef float vnx4sf __attribute__((vector_size (32))); - - #define MASK_2(X, Y) (X) ^ (Y), (X + 1) ^ (Y) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/revw_2.c b/gcc/testsuite/gcc.target/aarch64/sve/revw_2.c -new file mode 100644 -index 000000000..17243c05c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/revw_2.c -@@ -0,0 +1,8 @@ -+/* { dg-do assemble { target aarch64_asm_sve_ok } } */ -+/* { dg-options "-O -msve-vector-bits=256 --save-temps -mbig-endian" } */ -+ -+#include "revw_1.c" -+ -+/* { dg-final { scan-assembler-not {\ttbl\t} } } */ -+ -+/* { dg-final { scan-assembler-times {\trevw\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d} 2 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/sad_1.c b/gcc/testsuite/gcc.target/aarch64/sve/sad_1.c -new file mode 100644 -index 000000000..e7bf64a57 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/sad_1.c -@@ -0,0 +1,28 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define DEF_SAD(TYPE1, TYPE2) \ -+TYPE1 __attribute__ ((noinline, noclone)) \ -+sum_abs_##TYPE1##_##TYPE2 (TYPE2 *restrict x, TYPE2 *restrict y, int n) \ -+{ \ -+ TYPE1 sum = 0; \ -+ for (int i = 0; i < n; i++) \ -+ { \ -+ sum += __builtin_abs (x[i] - y[i]); \ -+ } \ -+ return sum; \ -+} -+ -+DEF_SAD(int32_t, uint8_t) -+DEF_SAD(int32_t, int8_t) -+DEF_SAD(int64_t, uint16_t) -+DEF_SAD(int64_t, int16_t) -+ -+/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tudot\tz[0-9]+\.s, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tuabd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsabd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tudot\tz[0-9]+\.d, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/sel_1.c b/gcc/testsuite/gcc.target/aarch64/sve/sel_1.c -new file mode 100644 -index 000000000..e651e5b93 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/sel_1.c -@@ -0,0 +1,27 @@ -+/* { dg-do assemble { target aarch64_asm_sve_ok } } */ -+/* { dg-options "-O2 -msve-vector-bits=256 --save-temps" } */ -+ -+#include -+ -+typedef int8_t vnx16qi __attribute__((vector_size (32))); -+ -+/* Predicate vector: 1 0 1 0 ... */ -+ -+#define MASK_32 { 0, 33, 2, 35, 4, 37, 6, 39, 8, 41, \ -+ 10, 43, 12, 45, 14, 47, 16, 49, 18, 51, \ -+ 20, 53, 22, 55, 24, 57, 26, 59, 28, 61, 30, 63 } -+ -+#define INDEX_32 vnx16qi -+ -+#define PERMUTE(type, nunits) \ -+type permute_##type (type x, type y) \ -+{ \ -+ return __builtin_shuffle (x, y, (INDEX_##nunits) MASK_##nunits); \ -+} -+ -+PERMUTE(vnx16qi, 32) -+ -+/* { dg-final { scan-assembler-not {\ttbl\t} } } */ -+ -+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.b, p[0-9]+, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.h, vl16\n} 1 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/sel_2.c b/gcc/testsuite/gcc.target/aarch64/sve/sel_2.c -new file mode 100644 -index 000000000..05391474a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/sel_2.c -@@ -0,0 +1,41 @@ -+/* { dg-do assemble { target aarch64_asm_sve_ok } } */ -+/* { dg-options "-O2 -msve-vector-bits=256 --save-temps" } */ -+ -+#include -+ -+typedef int8_t vnx16qi __attribute__((vector_size (32))); -+typedef int16_t vnx8hi __attribute__((vector_size (32))); -+typedef int32_t vnx4si __attribute__((vector_size (32))); -+ -+typedef _Float16 vnx8hf __attribute__((vector_size (32))); -+typedef float vnx4sf __attribute__((vector_size (32))); -+ -+/* Predicate vector: 1 0 0 0 ... */ -+ -+#define MASK_32 { 0, 33, 34, 35, 4, 37, 38, 39, 8, 41, 42, 43, 12, \ -+ 45, 46, 47, 16, 49, 50, 51, 20, 53, 54, 55, 24, \ -+ 57, 58, 59, 28, 61, 62, 63 } -+ -+/* Predicate vector: 1 0 1 0 ... */ -+ -+#define MASK_16 {0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31} -+ -+#define INDEX_32 vnx16qi -+#define INDEX_16 vnx8hi -+ -+#define PERMUTE(type, nunits) \ -+type permute_##type (type x, type y) \ -+{ \ -+ return __builtin_shuffle (x, y, (INDEX_##nunits) MASK_##nunits); \ -+} -+ -+PERMUTE(vnx16qi, 32) -+PERMUTE(vnx8hi, 16) -+PERMUTE(vnx8hf, 16) -+ -+/* { dg-final { scan-assembler-not {\ttbl\t} } } */ -+ -+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.b, p[0-9]+, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-9]+, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ -+ -+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s, vl8\n} 3 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/sel_3.c b/gcc/testsuite/gcc.target/aarch64/sve/sel_3.c -new file mode 100644 -index 000000000..a87492d9d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/sel_3.c -@@ -0,0 +1,50 @@ -+/* { dg-do assemble { target aarch64_asm_sve_ok } } */ -+/* { dg-options "-O2 -msve-vector-bits=256 --save-temps" } */ -+ -+#include -+ -+typedef int8_t vnx16qi __attribute__((vector_size (32))); -+typedef int16_t vnx8hi __attribute__((vector_size (32))); -+typedef int32_t vnx4si __attribute__((vector_size (32))); -+typedef _Float16 vnx8hf __attribute__((vector_size (32))); -+typedef float vnx4sf __attribute__((vector_size (32))); -+ -+/* Predicate vector: 1 0 0 0 0 0 0 0 ... */ -+ -+#define MASK_32 { 0, 33, 34, 35, 36, 37, 38, 39, \ -+ 8, 41, 42, 43, 44, 45, 46, 47, \ -+ 16, 49, 50, 51, 52, 53, 54, 55, \ -+ 24, 57, 58, 59, 60, 61, 62, 63 } -+ -+/* Predicate vector: 1 0 0 0 ... */ -+ -+#define MASK_16 { 0, 17, 18, 19, 4, 21, 22, 23, \ -+ 8, 25, 26, 27, 12, 29, 30, 31 } -+ -+/* Predicate vector: 1 0 ... */ -+ -+#define MASK_8 { 0, 9, 2, 11, 4, 13, 6, 15 } -+ -+#define INDEX_32 vnx16qi -+#define INDEX_16 vnx8hi -+#define INDEX_8 vnx4si -+ -+#define PERMUTE(type, nunits) \ -+type permute_##type (type x, type y) \ -+{ \ -+ return __builtin_shuffle (x, y, (INDEX_##nunits) MASK_##nunits); \ -+} -+ -+PERMUTE(vnx16qi, 32) -+PERMUTE(vnx8hi, 16) -+PERMUTE(vnx4si, 8) -+PERMUTE(vnx8hf, 16) -+PERMUTE(vnx4sf, 8) -+ -+/* { dg-final { scan-assembler-not {\ttbl\t} } } */ -+ -+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.b, p[0-9]+, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-9]+, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.s, p[0-9]+, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ -+ -+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d, vl4\n} 5 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/sel_4.c b/gcc/testsuite/gcc.target/aarch64/sve/sel_4.c -new file mode 100644 -index 000000000..e9bbc5527 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/sel_4.c -@@ -0,0 +1,50 @@ -+/* { dg-do assemble { target aarch64_asm_sve_ok } } */ -+/* { dg-options "-O2 -msve-vector-bits=256 --save-temps" } */ -+ -+#include -+ -+typedef int8_t vnx16qi __attribute__((vector_size (32))); -+typedef int16_t vnx8hi __attribute__((vector_size (32))); -+typedef int32_t vnx4si __attribute__((vector_size (32))); -+typedef int64_t vnx2di __attribute__((vector_size (32))); -+ -+typedef _Float16 vnx8hf __attribute__((vector_size (32))); -+typedef float vnx4sf __attribute__((vector_size (32))); -+typedef double vnx2df __attribute__((vector_size (32))); -+ -+/* Predicate vector: 1 1 0 0 ... */ -+ -+#define MASK_32 { 0, 1, 34, 35, 4, 5, 38, 39, 8, 9, 42, 43, 12, 13, \ -+ 46, 47, 16, 17, 50, 51, 20, 21, 54, 55, 24, 25, \ -+ 58, 59, 28, 29, 62, 63 } -+ -+#define MASK_16 {0, 1, 18, 19, 4, 5, 22, 23, 8, 9, 26, 27, 12, 13, 30, 31} -+#define MASK_8 {0, 1, 10, 11, 4, 5, 14, 15} -+#define MASK_4 {0, 1, 6, 7} -+ -+#define INDEX_32 vnx16qi -+#define INDEX_16 vnx8hi -+#define INDEX_8 vnx4si -+#define INDEX_4 vnx2di -+ -+#define PERMUTE(type, nunits) \ -+type permute_##type (type x, type y) \ -+{ \ -+ return __builtin_shuffle (x, y, (INDEX_##nunits) MASK_##nunits); \ -+} -+ -+PERMUTE(vnx16qi, 32) -+PERMUTE(vnx8hi, 16) -+PERMUTE(vnx4si, 8) -+PERMUTE(vnx2di, 4) -+ -+PERMUTE(vnx8hf, 16) -+PERMUTE(vnx4sf, 8) -+PERMUTE(vnx2df, 4) -+ -+/* { dg-final { scan-assembler-not {\ttbl\t} } } */ -+ -+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.b, p[0-9]+, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-9]+, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.s, p[0-9]+, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.d, p[0-9]+, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/sel_5.c b/gcc/testsuite/gcc.target/aarch64/sve/sel_5.c -new file mode 100644 -index 000000000..935abb54d ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/sel_5.c -@@ -0,0 +1,50 @@ -+/* { dg-do assemble { target aarch64_asm_sve_ok } } */ -+/* { dg-options "-O2 -msve-vector-bits=256 --save-temps" } */ -+ -+#include -+ -+typedef int8_t vnx16qi __attribute__((vector_size (32))); -+typedef int16_t vnx8hi __attribute__((vector_size (32))); -+typedef int32_t vnx4si __attribute__((vector_size (32))); -+typedef int64_t vnx2di __attribute__((vector_size (32))); -+ -+typedef _Float16 vnx8hf __attribute__((vector_size (32))); -+typedef float vnx4sf __attribute__((vector_size (32))); -+typedef double vnx2df __attribute__((vector_size (32))); -+ -+/* Predicate vector: 1 0 0 1 ... */ -+ -+#define MASK_32 { 0, 33, 34, 3, 4, 37, 38, 7, 8, 41, 42, 11, 12, 45, 46, \ -+ 15, 16, 49, 50, 19, 20, 53, 54, 23, 24, 57, 58, 27, 28, \ -+ 61, 62, 31 } -+ -+#define MASK_16 {0, 17, 18, 3, 4, 21, 22, 7, 8, 25, 26, 11, 12, 29, 30, 15} -+#define MASK_8 {0, 9, 10, 3, 4, 13, 14, 7} -+#define MASK_4 {0, 5, 6, 3} -+ -+#define INDEX_32 vnx16qi -+#define INDEX_16 vnx8hi -+#define INDEX_8 vnx4si -+#define INDEX_4 vnx2di -+ -+#define PERMUTE(type, nunits) \ -+type permute_##type (type x, type y) \ -+{ \ -+ return __builtin_shuffle (x, y, (INDEX_##nunits) MASK_##nunits); \ -+} -+ -+PERMUTE(vnx16qi, 32) -+PERMUTE(vnx8hi, 16) -+PERMUTE(vnx4si, 8) -+PERMUTE(vnx2di, 4) -+ -+PERMUTE(vnx8hf, 16) -+PERMUTE(vnx4sf, 8) -+PERMUTE(vnx2df, 4) -+ -+/* { dg-final { scan-assembler-not {\ttbl\t} } } */ -+ -+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.b, p[0-9]+, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-9]+, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.s, p[0-9]+, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.d, p[0-9]+, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/sel_6.c b/gcc/testsuite/gcc.target/aarch64/sve/sel_6.c -new file mode 100644 -index 000000000..772938f68 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/sel_6.c -@@ -0,0 +1,42 @@ -+/* { dg-do assemble { target aarch64_asm_sve_ok } } */ -+/* { dg-options "-O2 -msve-vector-bits=256 --save-temps" } */ -+ -+#include -+ -+typedef int32_t vnx4si __attribute__((vector_size (32))); -+typedef int64_t vnx2di __attribute__((vector_size (32))); -+ -+typedef float vnx4sf __attribute__((vector_size (32))); -+typedef double vnx2df __attribute__((vector_size (32))); -+ -+/* Predicate vector: 1 0 0 0 ... */ -+ -+#define MASK_32 { 0, 33, 34, 35, 4, 37, 38, 39, 8, 41, 42, 43, 12, \ -+ 45, 46, 47, 16, 49, 50, 51, 20, 53, 54, 55, 24, \ -+ 57, 58, 59, 28, 61, 62, 63 } -+ -+#define MASK_16 {0, 17, 18, 19, 4, 21, 22, 23, 8, 25, 26, 27, 12, 29, 30, 31} -+#define MASK_8 {0, 9, 10, 11, 4, 13, 14, 15} -+#define MASK_4 {0, 5, 6, 7} -+ -+#define INDEX_8 vnx4si -+#define INDEX_4 vnx2di -+ -+#define PERMUTE(type, nunits) \ -+type permute_##type (type x, type y) \ -+{ \ -+ return __builtin_shuffle (x, y, (INDEX_##nunits) MASK_##nunits); \ -+} -+ -+PERMUTE(vnx4si, 8) -+PERMUTE(vnx2di, 4) -+ -+PERMUTE(vnx4sf, 8) -+PERMUTE(vnx2df, 4) -+ -+/* { dg-final { scan-assembler-not {\ttbl\t} } } */ -+ -+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.s, p[0-9]+, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.d, p[0-9]+, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ -+ -+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d, vl4\n} 2 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/shift_1.c b/gcc/testsuite/gcc.target/aarch64/sve/shift_1.c -index f4c5ebd46..5ee66da15 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/shift_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/shift_1.c -@@ -75,9 +75,9 @@ DO_IMMEDIATE_OPS (63, int64_t, 63); - /* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ - /* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ - --/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ --/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ --/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tasrr?\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tlsrr?\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tlslr?\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ - - /* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.b, z[0-9]+\.b, #5\n} 1 } } */ - /* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.b, z[0-9]+\.b, #5\n} 1 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/single_1.c b/gcc/testsuite/gcc.target/aarch64/sve/single_1.c -index 11b88aef7..7764a1b0f 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/single_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/single_1.c -@@ -40,10 +40,7 @@ TEST_LOOP (double, 3.0) - /* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0e\+0\n} 1 } } */ - /* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3\.0e\+0\n} 1 } } */ - --/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl32\n} 2 } } */ --/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.h, vl16\n} 3 } } */ --/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s, vl8\n} 3 } } */ --/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d, vl4\n} 3 } } */ -+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl32\n} 11 } } */ - - /* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 2 } } */ - /* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/single_2.c b/gcc/testsuite/gcc.target/aarch64/sve/single_2.c -index 1fbf4892c..42fc17b73 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/single_2.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/single_2.c -@@ -16,10 +16,7 @@ - /* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0e\+0\n} 1 } } */ - /* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3\.0e\+0\n} 1 } } */ - --/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl64\n} 2 } } */ --/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.h, vl32\n} 3 } } */ --/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s, vl16\n} 3 } } */ --/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d, vl8\n} 3 } } */ -+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl64\n} 11 } } */ - - /* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 2 } } */ - /* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/single_3.c b/gcc/testsuite/gcc.target/aarch64/sve/single_3.c -index a3688b692..338ca1e3d 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/single_3.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/single_3.c -@@ -16,10 +16,7 @@ - /* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0e\+0\n} 1 } } */ - /* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3\.0e\+0\n} 1 } } */ - --/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl128\n} 2 } } */ --/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.h, vl64\n} 3 } } */ --/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s, vl32\n} 3 } } */ --/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d, vl16\n} 3 } } */ -+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl128\n} 11 } } */ - - /* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 2 } } */ - /* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/single_4.c b/gcc/testsuite/gcc.target/aarch64/sve/single_4.c -index 08965d39f..37c78a659 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/single_4.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/single_4.c -@@ -16,10 +16,7 @@ - /* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.s, #2\.0e\+0\n} 1 } } */ - /* { dg-final { scan-assembler-times {\tfmov\tz[0-9]+\.d, #3\.0e\+0\n} 1 } } */ - --/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl256\n} 2 } } */ --/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.h, vl128\n} 3 } } */ --/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s, vl64\n} 3 } } */ --/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d, vl32\n} 3 } } */ -+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl256\n} 11 } } */ - - /* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.b,} 2 } } */ - /* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.h,} 3 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_2.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_2.c -index 413532c07..d4b9776fe 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/slp_2.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_2.c -@@ -29,12 +29,9 @@ vec_slp_##TYPE (TYPE *restrict a, int n) \ - - TEST_ALL (VEC_PERM) - --/* { dg-final { scan-assembler-times {\tld1rh\tz[0-9]+\.h, } 2 { target aarch64_little_endian } } } */ --/* { dg-final { scan-assembler-times {\tld1rqb\tz[0-9]+\.b, } 2 { target aarch64_big_endian } } } */ --/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, } 3 { target aarch64_little_endian } } } */ --/* { dg-final { scan-assembler-times {\tld1rqh\tz[0-9]+\.h, } 3 { target aarch64_big_endian } } } */ --/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 3 { target aarch64_little_endian } } } */ --/* { dg-final { scan-assembler-times {\tld1rqw\tz[0-9]+\.s, } 3 { target aarch64_big_endian } } } */ -+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, w[0-9]+\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, } 3 } } */ -+/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 3 } } */ - /* { dg-final { scan-assembler-times {\tld1rqd\tz[0-9]+\.d, } 3 } } */ - /* { dg-final { scan-assembler-not {\tzip1\t} } } */ - /* { dg-final { scan-assembler-not {\tzip2\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_3.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_3.c -index 0f9f01a00..82dd43a4d 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/slp_3.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_3.c -@@ -32,18 +32,17 @@ vec_slp_##TYPE (TYPE *restrict a, int n) \ - TEST_ALL (VEC_PERM) - - /* 1 for each 8-bit type. */ --/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, } 2 { target aarch64_little_endian } } } */ --/* { dg-final { scan-assembler-times {\tld1rqb\tz[0-9]+\.b, } 2 { target aarch64_big_endian } } } */ --/* 1 for each 16-bit type and 4 for double. */ --/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 7 { target aarch64_little_endian } } } */ --/* { dg-final { scan-assembler-times {\tld1rqh\tz[0-9]+\.h, } 3 { target aarch64_big_endian } } } */ --/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 4 { target aarch64_big_endian } } } */ -+/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, } 2 } } */ -+/* 1 for each 16-bit type plus 1 for double. */ -+/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 4 } } */ - /* 1 for each 32-bit type. */ - /* { dg-final { scan-assembler-times {\tld1rqw\tz[0-9]+\.s, } 3 } } */ - /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #41\n} 2 } } */ - /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #25\n} 2 } } */ - /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #31\n} 2 } } */ - /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #62\n} 2 } } */ -+/* 3 for double. */ -+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, x[0-9]+\n} 3 } } */ - /* The 64-bit types need: - - ZIP1 ZIP1 (2 ZIP2s optimized away) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_4.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_4.c -index 8d9d5ab58..49fb828e8 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/slp_4.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_4.c -@@ -35,10 +35,8 @@ vec_slp_##TYPE (TYPE *restrict a, int n) \ - - TEST_ALL (VEC_PERM) - --/* 1 for each 8-bit type, 4 for each 32-bit type and 8 for double. */ --/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 22 { target aarch64_little_endian } } } */ --/* { dg-final { scan-assembler-times {\tld1rqb\tz[0-9]+\.b, } 2 { target aarch64_big_endian } } } */ --/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 20 { target aarch64_big_endian } } } */ -+/* 1 for each 8-bit type, 4 for each 32-bit type and 4 for double. */ -+/* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 18 } } */ - /* 1 for each 16-bit type. */ - /* { dg-final { scan-assembler-times {\tld1rqh\tz[0-9]\.h, } 3 } } */ - /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #99\n} 2 } } */ -@@ -49,6 +47,8 @@ TEST_ALL (VEC_PERM) - /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #37\n} 2 } } */ - /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #24\n} 2 } } */ - /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #81\n} 2 } } */ -+/* 4 for double. */ -+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, x[0-9]+\n} 4 } } */ - /* The 32-bit types need: - - ZIP1 ZIP1 (2 ZIP2s optimized away) -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/smax_1.c b/gcc/testsuite/gcc.target/aarch64/sve/smax_1.c -new file mode 100644 -index 000000000..050248c81 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/smax_1.c -@@ -0,0 +1,71 @@ -+/* { dg-do assemble { target aarch64_asm_sve_ok } } */ -+/* { dg-options "-O3 --save-temps" } */ -+ -+#include -+ -+#define DO_REGREG_OPS(TYPE) \ -+void varith_##TYPE##_reg (TYPE *dst, TYPE *src, int count) \ -+{ \ -+ for (int i = 0; i < count; ++i) \ -+ dst[i] = dst[i] > src[i] ? dst[i] : src[i]; \ -+} -+ -+#define DO_IMMEDIATE_OPS(VALUE, TYPE, NAME) \ -+void varithimm_##NAME##_##TYPE (TYPE *dst, int count) \ -+{ \ -+ for (int i = 0; i < count; ++i) \ -+ dst[i] = dst[i] > (TYPE) VALUE ? dst[i] : (TYPE) VALUE; \ -+} -+ -+#define DO_ARITH_OPS(TYPE) \ -+ DO_REGREG_OPS (TYPE); \ -+ DO_IMMEDIATE_OPS (0, TYPE, 0); \ -+ DO_IMMEDIATE_OPS (86, TYPE, 86); \ -+ DO_IMMEDIATE_OPS (109, TYPE, 109); \ -+ DO_IMMEDIATE_OPS (141, TYPE, 141); \ -+ DO_IMMEDIATE_OPS (-1, TYPE, minus1); \ -+ DO_IMMEDIATE_OPS (-110, TYPE, minus110); \ -+ DO_IMMEDIATE_OPS (-141, TYPE, minus141); -+ -+DO_ARITH_OPS (int8_t) -+DO_ARITH_OPS (int16_t) -+DO_ARITH_OPS (int32_t) -+DO_ARITH_OPS (int64_t) -+ -+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, z[0-9]+\.b, #0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, z[0-9]+\.b, #86\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, z[0-9]+\.b, #109\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, z[0-9]+\.b, #115\n} 1 } } */ -+/* { dg-final { scan-assembler-not {\tsmax\tz[0-9]+\.b, z[0-9]+\.b, #141\n} } } */ -+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, z[0-9]+\.b, #-1\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, z[0-9]+\.b, #-110\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, z[0-9]+\.b, #-115\n} 1 } } */ -+/* { dg-final { scan-assembler-not {\tsmax\tz[0-9]+\.b, z[0-9]+\.b, #-141\n} } } */ -+ -+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 3 } } */ -+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.h, z[0-9]+\.h, #0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.h, z[0-9]+\.h, #86\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.h, z[0-9]+\.h, #109\n} 1 } } */ -+/* { dg-final { scan-assembler-not {\tsmax\tz[0-9]+\.h, z[0-9]+\.h, #141\n} } } */ -+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.h, z[0-9]+\.h, #-1\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.h, z[0-9]+\.h, #-110\n} 1 } } */ -+/* { dg-final { scan-assembler-not {\tsmax\tz[0-9]+\.h, z[0-9]+\.h, #-141\n} } } */ -+ -+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ -+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.s, z[0-9]+\.s, #0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.s, z[0-9]+\.s, #86\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.s, z[0-9]+\.s, #109\n} 1 } } */ -+/* { dg-final { scan-assembler-not {\tsmax\tz[0-9]+\.s, z[0-9]+\.s, #141\n} } } */ -+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.s, z[0-9]+\.s, #-1\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.s, z[0-9]+\.s, #-110\n} 1 } } */ -+/* { dg-final { scan-assembler-not {\tsmax\tz[0-9]+\.s, z[0-9]+\.s, #-141\n} } } */ -+ -+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */ -+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, z[0-9]+\.d, #0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, z[0-9]+\.d, #86\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, z[0-9]+\.d, #109\n} 1 } } */ -+/* { dg-final { scan-assembler-not {\tsmax\tz[0-9]+\.d, z[0-9]+\.d, #141\n} } } */ -+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, z[0-9]+\.d, #-1\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, z[0-9]+\.d, #-110\n} 1 } } */ -+/* { dg-final { scan-assembler-not {\tsmax\tz[0-9]+\.d, z[0-9]+\.d, #-141\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/smin_1.c b/gcc/testsuite/gcc.target/aarch64/sve/smin_1.c -new file mode 100644 -index 000000000..d6a9e9467 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/smin_1.c -@@ -0,0 +1,71 @@ -+/* { dg-do assemble { target aarch64_asm_sve_ok } } */ -+/* { dg-options "-O3 --save-temps" } */ -+ -+#include -+ -+#define DO_REGREG_OPS(TYPE) \ -+void varith_##TYPE##_reg (TYPE *dst, TYPE *src, int count) \ -+{ \ -+ for (int i = 0; i < count; ++i) \ -+ dst[i] = dst[i] < src[i] ? dst[i] : src[i]; \ -+} -+ -+#define DO_IMMEDIATE_OPS(VALUE, TYPE, NAME) \ -+void varithimm_##NAME##_##TYPE (TYPE *dst, int count) \ -+{ \ -+ for (int i = 0; i < count; ++i) \ -+ dst[i] = dst[i] < (TYPE) VALUE ? dst[i] : (TYPE) VALUE; \ -+} -+ -+#define DO_ARITH_OPS(TYPE) \ -+ DO_REGREG_OPS (TYPE); \ -+ DO_IMMEDIATE_OPS (0, TYPE, 0); \ -+ DO_IMMEDIATE_OPS (86, TYPE, 86); \ -+ DO_IMMEDIATE_OPS (109, TYPE, 109); \ -+ DO_IMMEDIATE_OPS (141, TYPE, 141); \ -+ DO_IMMEDIATE_OPS (-1, TYPE, minus1); \ -+ DO_IMMEDIATE_OPS (-110, TYPE, minus110); \ -+ DO_IMMEDIATE_OPS (-141, TYPE, minus141); -+ -+DO_ARITH_OPS (int8_t) -+DO_ARITH_OPS (int16_t) -+DO_ARITH_OPS (int32_t) -+DO_ARITH_OPS (int64_t) -+ -+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, z[0-9]+\.b, #0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, z[0-9]+\.b, #86\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, z[0-9]+\.b, #109\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, z[0-9]+\.b, #115\n} 1 } } */ -+/* { dg-final { scan-assembler-not {\tsmin\tz[0-9]+\.b, z[0-9]+\.b, #141\n} } } */ -+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, z[0-9]+\.b, #-1\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, z[0-9]+\.b, #-110\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, z[0-9]+\.b, #-115\n} 1 } } */ -+/* { dg-final { scan-assembler-not {\tsmin\tz[0-9]+\.b, z[0-9]+\.b, #-141\n} } } */ -+ -+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 3 } } */ -+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.h, z[0-9]+\.h, #0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.h, z[0-9]+\.h, #86\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.h, z[0-9]+\.h, #109\n} 1 } } */ -+/* { dg-final { scan-assembler-not {\tsmin\tz[0-9]+\.h, z[0-9]+\.h, #141\n} } } */ -+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.h, z[0-9]+\.h, #-1\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.h, z[0-9]+\.h, #-110\n} 1 } } */ -+/* { dg-final { scan-assembler-not {\tsmin\tz[0-9]+\.h, z[0-9]+\.h, #-141\n} } } */ -+ -+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */ -+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.s, z[0-9]+\.s, #0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.s, z[0-9]+\.s, #86\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.s, z[0-9]+\.s, #109\n} 1 } } */ -+/* { dg-final { scan-assembler-not {\tsmin\tz[0-9]+\.s, z[0-9]+\.s, #141\n} } } */ -+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.s, z[0-9]+\.s, #-1\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.s, z[0-9]+\.s, #-110\n} 1 } } */ -+/* { dg-final { scan-assembler-not {\tsmin\tz[0-9]+\.s, z[0-9]+\.s, #-141\n} } } */ -+ -+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */ -+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, z[0-9]+\.d, #0\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, z[0-9]+\.d, #86\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, z[0-9]+\.d, #109\n} 1 } } */ -+/* { dg-final { scan-assembler-not {\tsmin\tz[0-9]+\.d, z[0-9]+\.d, #141\n} } } */ -+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, z[0-9]+\.d, #-1\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, z[0-9]+\.d, #-110\n} 1 } } */ -+/* { dg-final { scan-assembler-not {\tsmin\tz[0-9]+\.d, z[0-9]+\.d, #-141\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/spill_2.c b/gcc/testsuite/gcc.target/aarch64/sve/spill_2.c -index 28fcc4429..fcd481611 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/spill_2.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/spill_2.c -@@ -9,29 +9,30 @@ void consumer (void *); - void \ - multi_loop_##TYPE (TYPE *x, TYPE val) \ - { \ -- for (int i = 0; i < 7; ++i) \ -+ for (int i = 0; i < 9; ++i) \ - x[i] += val; \ - consumer (x); \ -- for (int i = 0; i < 7; ++i) \ -+ for (int i = 0; i < 9; ++i) \ - x[i] += val; \ - consumer (x); \ -- for (int i = 0; i < 7; ++i) \ -+ for (int i = 0; i < 9; ++i) \ - x[i] += val; \ - consumer (x); \ - } - - /* One iteration is enough. */ - TEST_LOOP (uint8_t); -+/* Two iterations are enough. We specialize the second two loops based -+ on whether the first executes once or twice. */ - TEST_LOOP (uint16_t); --/* Two iterations are enough. Complete unrolling makes sense -- even at -O2. */ -+/* Three iterations are needed; ought to stay a loop. */ - TEST_LOOP (uint32_t); --/* Four iterations are needed; ought to stay a loop. */ -+/* Five iterations are needed; ought to stay a loop. */ - TEST_LOOP (uint64_t); - - /* { dg-final { scan-assembler-times {\twhilelo\tp[0-9]\.b} 3 } } */ --/* { dg-final { scan-assembler-times {\twhilelo\tp[0-9]\.h} 3 } } */ --/* { dg-final { scan-assembler {\twhilelo\tp[0-9]\.s} } } */ -+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-9]\.h} 8 } } */ -+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-9]\.s} 6 } } */ - /* { dg-final { scan-assembler-times {\twhilelo\tp[0-9]\.d} 6 } } */ - /* { dg-final { scan-assembler-not {\tldr\tz[0-9]} } } */ - /* { dg-final { scan-assembler-not {\tstr\tz[0-9]} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/spill_4.c b/gcc/testsuite/gcc.target/aarch64/sve/spill_4.c -index 29e1a49dc..81b3f6452 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/spill_4.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/spill_4.c -@@ -24,10 +24,11 @@ TEST_LOOP (uint16_t, 0x1234); - TEST_LOOP (uint32_t, 0x12345); - TEST_LOOP (uint64_t, 0x123456); - --/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.h,} 3 } } */ --/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.s,} 3 } } */ --/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.d,} 3 } } */ --/* { dg-final { scan-assembler-times {\tld1rh\tz[0-9]+\.h,} 3 } } */ -+/* { dg-final { scan-assembler-times {\tptrue\tp[0-9]+\.b,} 6 } } */ -+/* { dg-final { scan-assembler-not {\tptrue\tp[0-9]+\.h,} } } */ -+/* { dg-final { scan-assembler-not {\tptrue\tp[0-9]+\.s,} } } */ -+/* { dg-final { scan-assembler-not {\tptrue\tp[0-9]+\.d,} } } */ -+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, w[0-9]+\n} 3 } } */ - /* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s,} 3 } } */ - /* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d,} 3 } } */ - /* { dg-final { scan-assembler-not {\tldr\tz[0-9]} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_1.c b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_1.c -index 6e3c8898a..918a58138 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_1.c -@@ -83,9 +83,9 @@ NAME(g4) (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c, - } - } - --/* { dg-final { scan-assembler {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} } } */ - /* { dg-final { scan-assembler {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} } } */ --/* { dg-final { scan-assembler {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} } } */ --/* { dg-final { scan-assembler {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} } } */ -+/* { dg-final { scan-assembler {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} } } */ - /* { dg-final { scan-assembler {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} } } */ --/* { dg-final { scan-assembler {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} } } */ -+/* { dg-final { scan-assembler {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_14.c b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_14.c -index 45644b67b..a16a79e51 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_14.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_14.c -@@ -43,12 +43,12 @@ - #undef NAME - #undef TYPE - --/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */ - /* { dg-final { scan-assembler-times {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ --/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ --/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */ - /* { dg-final { scan-assembler-times {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ --/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */ - - /* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ - /* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_15.c b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_15.c -index 814dbb3ae..bc00267c8 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_15.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_15.c -@@ -3,12 +3,12 @@ - - #include "struct_vect_14.c" - --/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */ - /* { dg-final { scan-assembler-times {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ --/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ --/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */ - /* { dg-final { scan-assembler-times {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ --/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */ - - /* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ - /* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_16.c b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_16.c -index 6ecf89b54..9e2a549f5 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_16.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_16.c -@@ -3,12 +3,12 @@ - - #include "struct_vect_14.c" - --/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */ - /* { dg-final { scan-assembler-times {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ --/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ --/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */ - /* { dg-final { scan-assembler-times {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ --/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */ - - /* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ - /* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_17.c b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_17.c -index 571c6d0d3..e791e2e12 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_17.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_17.c -@@ -3,12 +3,12 @@ - - #include "struct_vect_14.c" - --/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */ - /* { dg-final { scan-assembler-times {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ --/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */ --/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */ - /* { dg-final { scan-assembler-times {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ --/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */ - - /* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ - /* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_18.c b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_18.c -index dc912e63c..3bc53b69d 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_18.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_18.c -@@ -46,4 +46,4 @@ TEST (test) - /* { dg-final { scan-assembler-times {\tstr\td} 1 } } */ - - /* The only branches should be in the vectorized loop. */ --/* { dg-final { scan-assembler-times {\tb[a-z]+\t} 4 } } */ -+/* { dg-final { scan-assembler-times {\tb[.a-z]+\t} 4 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_19.c b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_19.c -index 6568dc71c..833bf0669 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_19.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_19.c -@@ -46,4 +46,4 @@ TEST (test) - /* Each function should have three branches: one directly to the exit - (n <= 0), one to the single scalar epilogue iteration (n == 1), - and one branch-back for the vectorized loop. */ --/* { dg-final { scan-assembler-times {\tb[a-z]+\t} 12 } } */ -+/* { dg-final { scan-assembler-times {\tb[.a-z]+\t} 12 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_20.c b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_20.c -index 6c3520c2f..858ca74f8 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_20.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_20.c -@@ -46,4 +46,4 @@ TEST (test) - /* { dg-final { scan-assembler-times {\tstr\td} 1 } } */ - - /* The only branches should be in the vectorized loop. */ --/* { dg-final { scan-assembler-times {\tb[a-z]+\t} 4 } } */ -+/* { dg-final { scan-assembler-times {\tb[.a-z]+\t} 4 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_21.c b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_21.c -index 4b2a5e463..95691fe9e 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_21.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_21.c -@@ -46,4 +46,4 @@ TEST (test) - /* Each function should have three branches: one directly to the exit - (n <= 0), one to the single scalar epilogue iteration (n == 1), - and one branch-back for the vectorized loop. */ --/* { dg-final { scan-assembler-times {\tb[a-z]+\t} 12 } } */ -+/* { dg-final { scan-assembler-times {\tb[.a-z]+\t} 12 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_22.c b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_22.c -index b61536053..8eb072505 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_22.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_22.c -@@ -46,4 +46,4 @@ TEST (test) - /* { dg-final { scan-assembler-times {\tstr\td} 1 } } */ - - /* The only branches should be in the vectorized loop. */ --/* { dg-final { scan-assembler-times {\tb[a-z]+\t} 4 } } */ -+/* { dg-final { scan-assembler-times {\tb[.a-z]+\t} 4 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_23.c b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_23.c -index b529e0386..705b2350a 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_23.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_23.c -@@ -46,4 +46,4 @@ TEST (test) - /* Each function should have three branches: one directly to the exit - (n <= 0), one to the single scalar epilogue iteration (n == 1), - and one branch-back for the vectorized loop. */ --/* { dg-final { scan-assembler-times {\tb[a-z]+\t} 12 } } */ -+/* { dg-final { scan-assembler-times {\tb[.a-z]+\t} 12 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_7.c b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_7.c -index b74190149..3d3070e77 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_7.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_7.c -@@ -78,9 +78,9 @@ g4 (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c, - } - } - --/* { dg-final { scan-assembler {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} } } */ -+/* { dg-final { scan-assembler {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} } } */ - /* { dg-final { scan-assembler {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} } } */ --/* { dg-final { scan-assembler {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} } } */ --/* { dg-final { scan-assembler {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} } } */ -+/* { dg-final { scan-assembler {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} } } */ -+/* { dg-final { scan-assembler {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} } } */ - /* { dg-final { scan-assembler {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} } } */ --/* { dg-final { scan-assembler {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} } } */ -+/* { dg-final { scan-assembler {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/umax_1.c b/gcc/testsuite/gcc.target/aarch64/sve/umax_1.c -new file mode 100644 -index 000000000..fffedb9c3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/umax_1.c -@@ -0,0 +1,65 @@ -+/* { dg-do assemble { target aarch64_asm_sve_ok } } */ -+/* { dg-options "-O3 --save-temps" } */ -+ -+#include -+ -+#define DO_REGREG_OPS(TYPE) \ -+void varith_##TYPE##_reg (TYPE *dst, TYPE *src, int count) \ -+{ \ -+ for (int i = 0; i < count; ++i) \ -+ dst[i] = dst[i] > src[i] ? dst[i] : src[i]; \ -+} -+ -+#define DO_IMMEDIATE_OPS(VALUE, TYPE) \ -+void varithimm_##VALUE##_##TYPE (TYPE *dst, int count) \ -+{ \ -+ for (int i = 0; i < count; ++i) \ -+ dst[i] = dst[i] > (TYPE) VALUE ? dst[i] : (TYPE) VALUE; \ -+} -+ -+#define DO_ARITH_OPS(TYPE) \ -+ DO_REGREG_OPS (TYPE); \ -+ DO_IMMEDIATE_OPS (2, TYPE); \ -+ DO_IMMEDIATE_OPS (86, TYPE); \ -+ DO_IMMEDIATE_OPS (109, TYPE); \ -+ DO_IMMEDIATE_OPS (141, TYPE); \ -+ DO_IMMEDIATE_OPS (229, TYPE); \ -+ DO_IMMEDIATE_OPS (255, TYPE); \ -+ DO_IMMEDIATE_OPS (256, TYPE); -+ -+DO_ARITH_OPS (uint8_t) -+DO_ARITH_OPS (uint16_t) -+DO_ARITH_OPS (uint32_t) -+DO_ARITH_OPS (uint64_t) -+ -+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.b, z[0-9]+\.b, #86\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.b, z[0-9]+\.b, #109\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.b, z[0-9]+\.b, #141\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.b, z[0-9]+\.b, #229\n} 1 } } */ -+/* { dg-final { scan-assembler-not {\tumax\tz[0-9]+\.b, z[0-9]+\.b, #255\n} } } */ -+/* { dg-final { scan-assembler-not {\tumax\tz[0-9]+\.b, z[0-9]+\.b, #256\n} } } */ -+ -+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, z[0-9]+\.h, #86\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, z[0-9]+\.h, #109\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, z[0-9]+\.h, #141\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, z[0-9]+\.h, #229\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, z[0-9]+\.h, #255\n} 1 } } */ -+/* { dg-final { scan-assembler-not {\tumax\tz[0-9]+\.h, z[0-9]+\.h, #256\n} } } */ -+ -+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, z[0-9]+\.s, #86\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, z[0-9]+\.s, #109\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, z[0-9]+\.s, #141\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, z[0-9]+\.s, #229\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, z[0-9]+\.s, #255\n} 1 } } */ -+/* { dg-final { scan-assembler-not {\tumax\tz[0-9]+\.s, z[0-9]+\.s, #256\n} } } */ -+ -+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, z[0-9]+\.d, #86\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, z[0-9]+\.d, #109\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, z[0-9]+\.d, #141\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, z[0-9]+\.d, #229\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, z[0-9]+\.d, #255\n} 1 } } */ -+/* { dg-final { scan-assembler-not {\tumax\tz[0-9]+\.d, z[0-9]+\.d, #256\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/umin_1.c b/gcc/testsuite/gcc.target/aarch64/sve/umin_1.c -new file mode 100644 -index 000000000..f7cdba3b7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/umin_1.c -@@ -0,0 +1,65 @@ -+/* { dg-do assemble { target aarch64_asm_sve_ok } } */ -+/* { dg-options "-O3 --save-temps" } */ -+ -+#include -+ -+#define DO_REGREG_OPS(TYPE) \ -+void varith_##TYPE##_reg (TYPE *dst, TYPE *src, int count) \ -+{ \ -+ for (int i = 0; i < count; ++i) \ -+ dst[i] = dst[i] < src[i] ? dst[i] : src[i]; \ -+} -+ -+#define DO_IMMEDIATE_OPS(VALUE, TYPE) \ -+void varithimm_##VALUE##_##TYPE (TYPE *dst, int count) \ -+{ \ -+ for (int i = 0; i < count; ++i) \ -+ dst[i] = dst[i] < (TYPE) VALUE ? dst[i] : (TYPE) VALUE; \ -+} -+ -+#define DO_ARITH_OPS(TYPE) \ -+ DO_REGREG_OPS (TYPE); \ -+ DO_IMMEDIATE_OPS (2, TYPE); \ -+ DO_IMMEDIATE_OPS (86, TYPE); \ -+ DO_IMMEDIATE_OPS (109, TYPE); \ -+ DO_IMMEDIATE_OPS (141, TYPE); \ -+ DO_IMMEDIATE_OPS (229, TYPE); \ -+ DO_IMMEDIATE_OPS (255, TYPE); \ -+ DO_IMMEDIATE_OPS (256, TYPE); -+ -+DO_ARITH_OPS (uint8_t) -+DO_ARITH_OPS (uint16_t) -+DO_ARITH_OPS (uint32_t) -+DO_ARITH_OPS (uint64_t) -+ -+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.b, z[0-9]+\.b, #86\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.b, z[0-9]+\.b, #109\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.b, z[0-9]+\.b, #141\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.b, z[0-9]+\.b, #229\n} 1 } } */ -+/* { dg-final { scan-assembler-not {\tumin\tz[0-9]+\.b, z[0-9]+\.b, #255\n} } } */ -+/* { dg-final { scan-assembler-not {\tumin\tz[0-9]+\.b, z[0-9]+\.b, #256\n} } } */ -+ -+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, z[0-9]+\.h, #86\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, z[0-9]+\.h, #109\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, z[0-9]+\.h, #141\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, z[0-9]+\.h, #229\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, z[0-9]+\.h, #255\n} 1 } } */ -+/* { dg-final { scan-assembler-not {\tumin\tz[0-9]+\.h, z[0-9]+\.h, #256\n} } } */ -+ -+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.s, z[0-9]+\.s, #86\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.s, z[0-9]+\.s, #109\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.s, z[0-9]+\.s, #141\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.s, z[0-9]+\.s, #229\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.s, z[0-9]+\.s, #255\n} 1 } } */ -+/* { dg-final { scan-assembler-not {\tumin\tz[0-9]+\.s, z[0-9]+\.s, #256\n} } } */ -+ -+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, z[0-9]+\.d, #86\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, z[0-9]+\.d, #109\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, z[0-9]+\.d, #141\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, z[0-9]+\.d, #229\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, z[0-9]+\.d, #255\n} 1 } } */ -+/* { dg-final { scan-assembler-not {\tumin\tz[0-9]+\.d, z[0-9]+\.d, #256\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unroll-1.c b/gcc/testsuite/gcc.target/aarch64/sve/unroll-1.c -index d4353009e..e33777fc3 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/unroll-1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/unroll-1.c -@@ -10,4 +10,4 @@ fully_peel_me (double *x) - x[i] = x[i] * 2; - } - --/* { dg-final { scan-assembler-times {b..\t\.L.\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tb[.a-z]+\t} 1 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_17.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_17.c -new file mode 100644 -index 000000000..cabcfa73e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_17.c -@@ -0,0 +1,94 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include -+ -+#define eq(A, B) ((A) == (B)) -+#define ne(A, B) ((A) != (B)) -+#define olt(A, B) ((A) < (B)) -+#define ole(A, B) ((A) <= (B)) -+#define oge(A, B) ((A) >= (B)) -+#define ogt(A, B) ((A) > (B)) -+#define ordered(A, B) (!__builtin_isunordered (A, B)) -+#define unordered(A, B) (__builtin_isunordered (A, B)) -+#define ueq(A, B) (!__builtin_islessgreater (A, B)) -+#define ult(A, B) (__builtin_isless (A, B)) -+#define ule(A, B) (__builtin_islessequal (A, B)) -+#define uge(A, B) (__builtin_isgreaterequal (A, B)) -+#define ugt(A, B) (__builtin_isgreater (A, B)) -+#define nueq(A, B) (__builtin_islessgreater (A, B)) -+#define nult(A, B) (!__builtin_isless (A, B)) -+#define nule(A, B) (!__builtin_islessequal (A, B)) -+#define nuge(A, B) (!__builtin_isgreaterequal (A, B)) -+#define nugt(A, B) (!__builtin_isgreater (A, B)) -+ -+#define DEF_LOOP(CMP, EXPECT_INVALID) \ -+ void __attribute__ ((noinline, noclone)) \ -+ test_##CMP##_var (__fp16 *restrict dest, __fp16 *restrict src, \ -+ __fp16 fallback, __fp16 *restrict a, \ -+ __fp16 *restrict b, int count) \ -+ { \ -+ for (int i = 0; i < count; ++i) \ -+ dest[i] = CMP (a[i], b[i]) ? src[i] : fallback; \ -+ } \ -+ \ -+ void __attribute__ ((noinline, noclone)) \ -+ test_##CMP##_zero (__fp16 *restrict dest, __fp16 *restrict src, \ -+ __fp16 fallback, __fp16 *restrict a, \ -+ int count) \ -+ { \ -+ for (int i = 0; i < count; ++i) \ -+ dest[i] = CMP (a[i], (__fp16) 0) ? src[i] : fallback; \ -+ } \ -+ \ -+ void __attribute__ ((noinline, noclone)) \ -+ test_##CMP##_sel (__fp16 *restrict dest, __fp16 if_true, \ -+ __fp16 if_false, __fp16 *restrict a, \ -+ __fp16 b, int count) \ -+ { \ -+ for (int i = 0; i < count; ++i) \ -+ dest[i] = CMP (a[i], b) ? if_true : if_false; \ -+ } -+ -+#define TEST_ALL(T) \ -+ T (eq, 0) \ -+ T (ne, 0) \ -+ T (olt, 1) \ -+ T (ole, 1) \ -+ T (oge, 1) \ -+ T (ogt, 1) \ -+ T (ordered, 0) \ -+ T (unordered, 0) \ -+ T (ueq, 0) \ -+ T (ult, 0) \ -+ T (ule, 0) \ -+ T (uge, 0) \ -+ T (ugt, 0) \ -+ T (nueq, 0) \ -+ T (nult, 0) \ -+ T (nule, 0) \ -+ T (nuge, 0) \ -+ T (nugt, 0) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler {\tfcmeq\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, #0\.0\n} { xfail *-*-* } } } */ -+/* { dg-final { scan-assembler {\tfcmeq\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */ -+ -+/* { dg-final { scan-assembler {\tfcmne\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, #0\.0\n} } } */ -+/* { dg-final { scan-assembler {\tfcmne\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */ -+ -+/* { dg-final { scan-assembler {\tfcmlt\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, #0\.0\n} } } */ -+/* { dg-final { scan-assembler {\tfcmlt\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */ -+ -+/* { dg-final { scan-assembler {\tfcmle\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, #0\.0\n} } } */ -+/* { dg-final { scan-assembler {\tfcmle\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */ -+ -+/* { dg-final { scan-assembler {\tfcmgt\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, #0\.0\n} } } */ -+/* { dg-final { scan-assembler {\tfcmgt\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */ -+ -+/* { dg-final { scan-assembler {\tfcmge\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, #0\.0\n} } } */ -+/* { dg-final { scan-assembler {\tfcmge\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */ -+ -+/* { dg-final { scan-assembler-not {\tfcmuo\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, #0\.0\n} } } */ -+/* { dg-final { scan-assembler {\tfcmuo\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_17_run.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_17_run.c -new file mode 100644 -index 000000000..4a228c8c2 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_17_run.c -@@ -0,0 +1,54 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+/* { dg-require-effective-target fenv_exceptions } */ -+ -+#include -+ -+#include "vcond_17.c" -+ -+#define N 401 -+ -+#define TEST_LOOP(CMP, EXPECT_INVALID) \ -+ { \ -+ __fp16 dest1[N], dest2[N], dest3[N], src[N]; \ -+ __fp16 a[N], b[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ src[i] = i * i; \ -+ if (i % 5 == 0) \ -+ a[i] = 0; \ -+ else if (i % 3) \ -+ a[i] = i * 0.1; \ -+ else \ -+ a[i] = i; \ -+ if (i % 7 == 0) \ -+ b[i] = __builtin_nan (""); \ -+ else if (i % 6) \ -+ b[i] = i * 0.1; \ -+ else \ -+ b[i] = i; \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ feclearexcept (FE_ALL_EXCEPT); \ -+ test_##CMP##_var (dest1, src, 11, a, b, N); \ -+ test_##CMP##_zero (dest2, src, 22, a, N); \ -+ test_##CMP##_sel (dest3, 33, 44, a, 9, N); \ -+ if (!fetestexcept (FE_INVALID) != !(EXPECT_INVALID)) \ -+ __builtin_abort (); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ if (dest1[i] != (CMP (a[i], b[i]) ? src[i] : 11)) \ -+ __builtin_abort (); \ -+ if (dest2[i] != (CMP (a[i], 0) ? src[i] : 22)) \ -+ __builtin_abort (); \ -+ if (dest3[i] != (CMP (a[i], 9) ? 33 : 44)) \ -+ __builtin_abort (); \ -+ } \ -+ } -+ -+int __attribute__ ((optimize (1))) -+main (void) -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_18.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_18.c -new file mode 100644 -index 000000000..a2590b9ee ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_18.c -@@ -0,0 +1,44 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#define DEF_LOOP(TYPE, NAME, CONST) \ -+ void \ -+ test_##TYPE##_##NAME (TYPE *restrict x, \ -+ TYPE *restrict pred, int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ x[i] = pred[i] > 0 ? CONST : 0; \ -+ } -+ -+#define TEST_TYPE(T, TYPE) \ -+ T (TYPE, 2, 2.0) \ -+ T (TYPE, 1p25, 1.25) \ -+ T (TYPE, 32p25, 32.25) \ -+ T (TYPE, m4, -4.0) \ -+ T (TYPE, m2p5, -2.5) \ -+ T (TYPE, m64p5, -64.5) -+ -+#define TEST_ALL(T) \ -+ TEST_TYPE (T, _Float16) \ -+ TEST_TYPE (T, float) \ -+ TEST_TYPE (T, double) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]/z, #16384\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]/z, #15616\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]/z, #-15360\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]/z, #-16128\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ -+ -+/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+\.s), (p[0-7])/z, \1\n\tfmov\t\1, \2/m, #2\.0(?:e[+]0)?\n} } } */ -+/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+\.s), (p[0-7])/z, \1\n\tfmov\t\1, \2/m, #1\.25(?:e[+]0)?\n} } } */ -+/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+\.s), (p[0-7])/z, \1\n\tfmov\t\1, \2/m, #-4\.0(?:e[+]0)?\n} } } */ -+/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+\.s), (p[0-7])/z, \1\n\tfmov\t\1, \2/m, #-2\.5(?:e[+]0)?\n} } } */ -+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.s, p[0-7], z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ -+ -+/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+\.d), (p[0-7])/z, \1\n\tfmov\t\1, \2/m, #2\.0(?:e[+]0)?\n} } } */ -+/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+\.d), (p[0-7])/z, \1\n\tfmov\t\1, \2/m, #1\.25(?:e[+]0)?\n} } } */ -+/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+\.d), (p[0-7])/z, \1\n\tfmov\t\1, \2/m, #-4\.0(?:e[+]0)?\n} } } */ -+/* { dg-final { scan-assembler {\tmovprfx\t(z[0-9]+\.d), (p[0-7])/z, \1\n\tfmov\t\1, \2/m, #-2\.5(?:e[+]0)?\n} } } */ -+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.d, p[0-7], z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_18_run.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_18_run.c -new file mode 100644 -index 000000000..279b0a3ba ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_18_run.c -@@ -0,0 +1,30 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "vcond_18.c" -+ -+#define N 97 -+ -+#define TEST_LOOP(TYPE, NAME, CONST) \ -+ { \ -+ TYPE x[N], pred[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ pred[i] = i % 5 <= i % 6; \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##TYPE##_##NAME (x, pred, N); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ if (x[i] != (TYPE) (pred[i] > 0 ? CONST : 0)) \ -+ __builtin_abort (); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ } -+ -+int __attribute__ ((optimize (1))) -+main (int argc, char **argv) -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_19.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_19.c -new file mode 100644 -index 000000000..2347b7f28 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_19.c -@@ -0,0 +1,46 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#define DEF_LOOP(TYPE, NAME, CONST) \ -+ void \ -+ test_##TYPE##_##NAME (TYPE *restrict x, \ -+ TYPE *restrict pred, int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ x[i] = pred[i] > 0 ? CONST : pred[i]; \ -+ } -+ -+#define TEST_TYPE(T, TYPE) \ -+ T (TYPE, 2, 2.0) \ -+ T (TYPE, 1p25, 1.25) \ -+ T (TYPE, 32p25, 32.25) \ -+ T (TYPE, m4, -4.0) \ -+ T (TYPE, m2p5, -2.5) \ -+ T (TYPE, m64p5, -64.5) -+ -+#define TEST_ALL(T) \ -+ TEST_TYPE (T, _Float16) \ -+ TEST_TYPE (T, float) \ -+ TEST_TYPE (T, double) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]/m, #16384\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]/m, #15616\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]/m, #-15360\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]/m, #-16128\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ -+ -+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.s), p[0-7]/m, #2\.0(?:e[+]0)?\n} } } */ -+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.s), p[0-7]/m, #1\.25(?:e[+]0)?\n} } } */ -+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.s), p[0-7]/m, #-4\.0(?:e[+]0)?\n} } } */ -+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.s), p[0-7]/m, #-2\.5(?:e[+]0)?\n} } } */ -+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.s, p[0-7], z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ -+ -+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.d), p[0-7]/m, #2\.0(?:e[+]0)?\n} } } */ -+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.d), p[0-7]/m, #1\.25(?:e[+]0)?\n} } } */ -+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.d), p[0-7]/m, #-4\.0(?:e[+]0)?\n} } } */ -+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.d), p[0-7]/m, #-2\.5(?:e[+]0)?\n} } } */ -+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.d, p[0-7], z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ -+ -+/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_19_run.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_19_run.c -new file mode 100644 -index 000000000..d93d8aa45 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_19_run.c -@@ -0,0 +1,30 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "vcond_19.c" -+ -+#define N 97 -+ -+#define TEST_LOOP(TYPE, NAME, CONST) \ -+ { \ -+ TYPE x[N], pred[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ pred[i] = i % 5 <= i % 6 ? i : 0; \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##TYPE##_##NAME (x, pred, N); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ if (x[i] != (TYPE) (pred[i] > 0 ? CONST : pred[i])) \ -+ __builtin_abort (); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ } -+ -+int __attribute__ ((optimize (1))) -+main (int argc, char **argv) -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_20.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_20.c -new file mode 100644 -index 000000000..bf2af1c62 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_20.c -@@ -0,0 +1,46 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#define DEF_LOOP(TYPE, NAME, CONST) \ -+ void \ -+ test_##TYPE##_##NAME (TYPE *restrict x, \ -+ TYPE *restrict pred, int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ x[i] = pred[i] > 0 ? CONST : 12.0; \ -+ } -+ -+#define TEST_TYPE(T, TYPE) \ -+ T (TYPE, 2, 2.0) \ -+ T (TYPE, 1p25, 1.25) \ -+ T (TYPE, 32p25, 32.25) \ -+ T (TYPE, m4, -4.0) \ -+ T (TYPE, m2p5, -2.5) \ -+ T (TYPE, m64p5, -64.5) -+ -+#define TEST_ALL(T) \ -+ TEST_TYPE (T, _Float16) \ -+ TEST_TYPE (T, float) \ -+ TEST_TYPE (T, double) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]/m, #16384\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]/m, #15616\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]/m, #-15360\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, p[0-7]/m, #-16128\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.h, p[0-7], z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ -+ -+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.s), p[0-7]/m, #2\.0(?:e[+]0)?\n} } } */ -+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.s), p[0-7]/m, #1\.25(?:e[+]0)?\n} } } */ -+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.s), p[0-7]/m, #-4\.0(?:e[+]0)?\n} } } */ -+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.s), p[0-7]/m, #-2\.5(?:e[+]0)?\n} } } */ -+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.s, p[0-7], z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ -+ -+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.d), p[0-7]/m, #2\.0(?:e[+]0)?\n} } } */ -+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.d), p[0-7]/m, #1\.25(?:e[+]0)?\n} } } */ -+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.d), p[0-7]/m, #-4\.0(?:e[+]0)?\n} } } */ -+/* { dg-final { scan-assembler {\tfmov\t(z[0-9]+\.d), p[0-7]/m, #-2\.5(?:e[+]0)?\n} } } */ -+/* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.d, p[0-7], z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ -+ -+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+, z[0-9]+\n} 12 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_20_run.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_20_run.c -new file mode 100644 -index 000000000..33c81deaa ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_20_run.c -@@ -0,0 +1,30 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "vcond_20.c" -+ -+#define N 97 -+ -+#define TEST_LOOP(TYPE, NAME, CONST) \ -+ { \ -+ TYPE x[N], pred[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ pred[i] = i % 5 <= i % 6; \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##TYPE##_##NAME (x, pred, N); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ if (x[i] != (TYPE) (pred[i] > 0 ? CONST : 12.0)) \ -+ __builtin_abort (); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ } -+ -+int __attribute__ ((optimize (1))) -+main (int argc, char **argv) -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_21.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_21.c -new file mode 100644 -index 000000000..d5df2e199 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_21.c -@@ -0,0 +1,34 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#define DEF_LOOP(TYPE, ABS, NAME, OP) \ -+ void \ -+ test_##TYPE##_##NAME (TYPE *restrict r, \ -+ TYPE *restrict a, \ -+ TYPE *restrict b, int n) \ -+ { \ -+ for (int i = 0; i < n; ++i) \ -+ r[i] = ABS (a[i]) OP ABS (b[i]) ? 1.0 : 0.0; \ -+ } -+ -+#define TEST_TYPE(T, TYPE, ABS) \ -+ T (TYPE, ABS, lt, <) \ -+ T (TYPE, ABS, le, <=) \ -+ T (TYPE, ABS, ge, >=) \ -+ T (TYPE, ABS, gt, >) -+ -+#define TEST_ALL(T) \ -+ TEST_TYPE (T, _Float16, __builtin_fabsf16) \ -+ TEST_TYPE (T, float, __builtin_fabsf) \ -+ TEST_TYPE (T, double, __builtin_fabs) -+ -+TEST_ALL (DEF_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tfac[lg]t\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tfac[lg]e\tp[0-9]+\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfac[lg]t\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tfac[lg]e\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ -+ -+/* { dg-final { scan-assembler-times {\tfac[lg]t\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ -+/* { dg-final { scan-assembler-times {\tfac[lg]e\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_21_run.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_21_run.c -new file mode 100644 -index 000000000..15c551324 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_21_run.c -@@ -0,0 +1,31 @@ -+/* { dg-do run { target aarch64_sve_hw } } */ -+/* { dg-options "-O2 -ftree-vectorize" } */ -+ -+#include "vcond_21.c" -+ -+#define N 97 -+ -+#define TEST_LOOP(TYPE, ABS, NAME, OP) \ -+ { \ -+ TYPE r[N], a[N], b[N]; \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ a[i] = i % 5 * (i & 1 ? -1 : 1); \ -+ b[i] = i % 9 * (i & 2 ? -1 : 1); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ test_##TYPE##_##NAME (r, a, b, N); \ -+ for (int i = 0; i < N; ++i) \ -+ { \ -+ if (r[i] != (ABS (a[i]) OP ABS (b[i]) ? 1.0 : 0.0)) \ -+ __builtin_abort (); \ -+ asm volatile ("" ::: "memory"); \ -+ } \ -+ } -+ -+int __attribute__ ((optimize (1))) -+main (int argc, char **argv) -+{ -+ TEST_ALL (TEST_LOOP) -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/while_1.c b/gcc/testsuite/gcc.target/aarch64/sve/while_1.c -index a93a04baa..2655c4242 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sve/while_1.c -+++ b/gcc/testsuite/gcc.target/aarch64/sve/while_1.c -@@ -42,3 +42,4 @@ TEST_ALL (ADD_LOOP) - /* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x0, x[0-9]+, lsl 2\]\n} 3 } } */ - /* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x0, x[0-9]+, lsl 3\]\n} 3 } } */ - /* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x0, x[0-9]+, lsl 3\]\n} 3 } } */ -+/* { dg-final { scan-assembler-times {\tb\.any\t} 10 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/while_10.c b/gcc/testsuite/gcc.target/aarch64/sve/while_10.c -new file mode 100644 -index 000000000..eaed326f9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/while_10.c -@@ -0,0 +1,25 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=512" } */ -+ -+#include -+ -+#define ADD_LOOP(TYPE, COUNT) \ -+ TYPE __attribute__ ((noinline, noclone)) \ -+ vec_while_##TYPE (TYPE *restrict a) \ -+ { \ -+ for (int i = 0; i < COUNT; ++i) \ -+ a[i] += 1; \ -+ } -+ -+#define TEST_ALL(T) \ -+ T (int8_t, 63) \ -+ T (int16_t, 30) \ -+ T (int32_t, 15) \ -+ T (int64_t, 6) -+ -+TEST_ALL (ADD_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, mul3\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.h, mul3\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.s, mul3\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.d, vl6\n} 1 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/while_6.c b/gcc/testsuite/gcc.target/aarch64/sve/while_6.c -new file mode 100644 -index 000000000..b4cc596ef ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/while_6.c -@@ -0,0 +1,25 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=scalable" } */ -+ -+#include -+ -+#define ADD_LOOP(TYPE) \ -+ TYPE __attribute__ ((noinline, noclone)) \ -+ vec_while_##TYPE (TYPE *restrict a) \ -+ { \ -+ for (int i = 0; i < 7; ++i) \ -+ a[i] += 1; \ -+ } -+ -+#define TEST_ALL(T) \ -+ T (int8_t) \ -+ T (int16_t) \ -+ T (int32_t) \ -+ T (int64_t) -+ -+TEST_ALL (ADD_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl7\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.h, vl7\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s,} 2 } } */ -+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d,} 2 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/while_7.c b/gcc/testsuite/gcc.target/aarch64/sve/while_7.c -new file mode 100644 -index 000000000..d5ffb66a1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/while_7.c -@@ -0,0 +1,25 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=scalable" } */ -+ -+#include -+ -+#define ADD_LOOP(TYPE) \ -+ TYPE __attribute__ ((noinline, noclone)) \ -+ vec_while_##TYPE (TYPE *restrict a) \ -+ { \ -+ for (int i = 0; i < 8; ++i) \ -+ a[i] += 1; \ -+ } -+ -+#define TEST_ALL(T) \ -+ T (int8_t) \ -+ T (int16_t) \ -+ T (int32_t) \ -+ T (int64_t) -+ -+TEST_ALL (ADD_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl8\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.h, vl8\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s,} 2 } } */ -+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d,} 2 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/while_8.c b/gcc/testsuite/gcc.target/aarch64/sve/while_8.c -new file mode 100644 -index 000000000..1c11aa849 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/while_8.c -@@ -0,0 +1,25 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=scalable" } */ -+ -+#include -+ -+#define ADD_LOOP(TYPE) \ -+ TYPE __attribute__ ((noinline, noclone)) \ -+ vec_while_##TYPE (TYPE *restrict a) \ -+ { \ -+ for (int i = 0; i < 9; ++i) \ -+ a[i] += 1; \ -+ } -+ -+#define TEST_ALL(T) \ -+ T (int8_t) \ -+ T (int16_t) \ -+ T (int32_t) \ -+ T (int64_t) -+ -+TEST_ALL (ADD_LOOP) -+ -+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b,} 1 } } */ -+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h,} 2 } } */ -+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s,} 2 } } */ -+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d,} 2 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/while_9.c b/gcc/testsuite/gcc.target/aarch64/sve/while_9.c -new file mode 100644 -index 000000000..9a8e5fe12 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/while_9.c -@@ -0,0 +1,25 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=scalable" } */ -+ -+#include -+ -+#define ADD_LOOP(TYPE) \ -+ TYPE __attribute__ ((noinline, noclone)) \ -+ vec_while_##TYPE (TYPE *restrict a) \ -+ { \ -+ for (int i = 0; i < 16; ++i) \ -+ a[i] += 1; \ -+ } -+ -+#define TEST_ALL(T) \ -+ T (int8_t) \ -+ T (int16_t) \ -+ T (int32_t) \ -+ T (int64_t) -+ -+TEST_ALL (ADD_LOOP) -+ -+/* { dg-final { scan-assembler-times {\tptrue\tp[0-7]\.b, vl16\n} 1 } } */ -+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h,} 2 } } */ -+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s,} 2 } } */ -+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d,} 2 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/symbol-range-tiny.c b/gcc/testsuite/gcc.target/aarch64/symbol-range-tiny.c -index d7e46b059..fc6a4f3ec 100644 ---- a/gcc/testsuite/gcc.target/aarch64/symbol-range-tiny.c -+++ b/gcc/testsuite/gcc.target/aarch64/symbol-range-tiny.c -@@ -1,12 +1,12 @@ --/* { dg-do compile } */ -+/* { dg-do link } */ - /* { dg-options "-O3 -save-temps -mcmodel=tiny" } */ - --int fixed_regs[0x00200000]; -+char fixed_regs[0x00080000]; - - int --foo() -+main () - { -- return fixed_regs[0x00080000]; -+ return fixed_regs[0x000ff000]; - } - - /* { dg-final { scan-assembler-not "adr\tx\[0-9\]+, fixed_regs\\\+" } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/symbol-range.c b/gcc/testsuite/gcc.target/aarch64/symbol-range.c -index 6574cf431..d8e82fa1b 100644 ---- a/gcc/testsuite/gcc.target/aarch64/symbol-range.c -+++ b/gcc/testsuite/gcc.target/aarch64/symbol-range.c -@@ -1,12 +1,12 @@ --/* { dg-do compile } */ -+/* { dg-do link } */ - /* { dg-options "-O3 -save-temps -mcmodel=small" } */ - --int fixed_regs[0x200000000ULL]; -+char fixed_regs[0x80000000]; - - int --foo() -+main () - { -- return fixed_regs[0x100000000ULL]; -+ return fixed_regs[0xfffff000]; - } - - /* { dg-final { scan-assembler-not "adrp\tx\[0-9\]+, fixed_regs\\\+" } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/sync-comp-swap.c b/gcc/testsuite/gcc.target/aarch64/sync-comp-swap.c -index e571b2f13..f56415f33 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sync-comp-swap.c -+++ b/gcc/testsuite/gcc.target/aarch64/sync-comp-swap.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-march=armv8-a+nolse -O2 -fno-ipa-icf" } */ -+/* { dg-options "-march=armv8-a+nolse -O2 -fno-ipa-icf -mno-outline-atomics" } */ - - #include "sync-comp-swap.x" - -diff --git a/gcc/testsuite/gcc.target/aarch64/sync-op-acquire.c b/gcc/testsuite/gcc.target/aarch64/sync-op-acquire.c -index 357bf1be3..39b3144aa 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sync-op-acquire.c -+++ b/gcc/testsuite/gcc.target/aarch64/sync-op-acquire.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-march=armv8-a+nolse -O2" } */ -+/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */ - - #include "sync-op-acquire.x" - -diff --git a/gcc/testsuite/gcc.target/aarch64/sync-op-full.c b/gcc/testsuite/gcc.target/aarch64/sync-op-full.c -index c6ba16299..6b8b2043f 100644 ---- a/gcc/testsuite/gcc.target/aarch64/sync-op-full.c -+++ b/gcc/testsuite/gcc.target/aarch64/sync-op-full.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-march=armv8-a+nolse -O2" } */ -+/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */ - - #include "sync-op-full.x" - -diff --git a/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-10.c b/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-10.c -new file mode 100644 -index 000000000..3d6893ee0 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-10.c -@@ -0,0 +1,14 @@ -+/* { dg-do compile } */ -+ -+int __attribute__((aarch64_vector_pcs)) (*callee) (void); -+ -+int __attribute__ ((aarch64_vector_pcs)) -+caller (int *x) -+{ -+ return callee () + 1; -+} -+ -+/* { dg-final { scan-assembler-not {\tstp\tq} } } */ -+/* { dg-final { scan-assembler-not {\tldp\tq} } } */ -+/* { dg-final { scan-assembler-not {\tstr\tq} } } */ -+/* { dg-final { scan-assembler-not {\tldr\tq} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-11.c b/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-11.c -new file mode 100644 -index 000000000..de99bd701 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-11.c -@@ -0,0 +1,26 @@ -+/* { dg-do compile } */ -+ -+int (*callee) (void); -+ -+int __attribute__ ((aarch64_vector_pcs)) -+caller (int *x) -+{ -+ return callee () + 1; -+} -+ -+/* { dg-final { scan-assembler {\sstp\tq8, q9} } } */ -+/* { dg-final { scan-assembler {\sstp\tq10, q11} } } */ -+/* { dg-final { scan-assembler {\sstp\tq12, q13} } } */ -+/* { dg-final { scan-assembler {\sstp\tq14, q15} } } */ -+/* { dg-final { scan-assembler {\sstp\tq16, q17} } } */ -+/* { dg-final { scan-assembler {\sstp\tq18, q19} } } */ -+/* { dg-final { scan-assembler {\sstp\tq20, q21} } } */ -+/* { dg-final { scan-assembler {\sstp\tq22, q23} } } */ -+/* { dg-final { scan-assembler {\sldp\tq8, q9} } } */ -+/* { dg-final { scan-assembler {\sldp\tq10, q11} } } */ -+/* { dg-final { scan-assembler {\sldp\tq12, q13} } } */ -+/* { dg-final { scan-assembler {\sldp\tq14, q15} } } */ -+/* { dg-final { scan-assembler {\sldp\tq16, q17} } } */ -+/* { dg-final { scan-assembler {\sldp\tq18, q19} } } */ -+/* { dg-final { scan-assembler {\sldp\tq20, q21} } } */ -+/* { dg-final { scan-assembler {\sldp\tq22, q23} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-8.c b/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-8.c -new file mode 100644 -index 000000000..6463f6c50 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-8.c -@@ -0,0 +1,20 @@ -+/* { dg-do compile } */ -+/* { dg-options "-std=gnu99" } */ -+/* { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ -+ -+#include -+ -+void __attribute__ ((aarch64_vector_pcs)) f (void); -+ -+void -+g (int64x2x4_t *ptr) -+{ -+ register int64x2x4_t copy asm ("v8") = *ptr; -+ int64x2x4_t save; -+ asm volatile ("" : "=w" (save) : "0" (copy)); -+ f (); -+ *ptr = save; -+} -+ -+/* { dg-final { scan-assembler-times {\tld1\t} 1 } } */ -+/* { dg-final { scan-assembler-times {\tst1\t} 1 } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-9.c b/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-9.c -new file mode 100644 -index 000000000..aaa0316d1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-9.c -@@ -0,0 +1,48 @@ -+/* { dg-do compile } */ -+/* { dg-options "-fshrink-wrap -ffat-lto-objects" } */ -+/* { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ -+/* { dg-final { check-function-bodies "**" "" } } */ -+ -+int callee (void); -+ -+/* -+** caller: -+** ldr (w[0-9]+), \[x0\] -+** cbn?z \1, [^\n]* -+** ... -+** ret -+*/ -+int __attribute__ ((aarch64_vector_pcs)) -+caller (int *x) -+{ -+ if (*x) -+ return callee () + 1; -+ else -+ return 0; -+} -+ -+/* { dg-final { scan-assembler {\sstp\tq8, q9} } } */ -+/* { dg-final { scan-assembler {\sstp\tq10, q11} } } */ -+/* { dg-final { scan-assembler {\sstp\tq12, q13} } } */ -+/* { dg-final { scan-assembler {\sstp\tq14, q15} } } */ -+/* { dg-final { scan-assembler {\sstp\tq16, q17} } } */ -+/* { dg-final { scan-assembler {\sstp\tq18, q19} } } */ -+/* { dg-final { scan-assembler {\sstp\tq20, q21} } } */ -+/* { dg-final { scan-assembler {\sstp\tq22, q23} } } */ -+/* { dg-final { scan-assembler {\sldp\tq8, q9} } } */ -+/* { dg-final { scan-assembler {\sldp\tq10, q11} } } */ -+/* { dg-final { scan-assembler {\sldp\tq12, q13} } } */ -+/* { dg-final { scan-assembler {\sldp\tq14, q15} } } */ -+/* { dg-final { scan-assembler {\sldp\tq16, q17} } } */ -+/* { dg-final { scan-assembler {\sldp\tq18, q19} } } */ -+/* { dg-final { scan-assembler {\sldp\tq20, q21} } } */ -+/* { dg-final { scan-assembler {\sldp\tq22, q23} } } */ -+ -+/* { dg-final { scan-assembler-not {\tstp\tq[0-7],} } } */ -+/* { dg-final { scan-assembler-not {\tldp\tq[0-7],} } } */ -+/* { dg-final { scan-assembler-not {\tstp\tq2[4-9],} } } */ -+/* { dg-final { scan-assembler-not {\tldp\tq2[4-9],} } } */ -+/* { dg-final { scan-assembler-not {\tstp\td} } } */ -+/* { dg-final { scan-assembler-not {\tldp\td} } } */ -+/* { dg-final { scan-assembler-not {\tstr\tq} } } */ -+/* { dg-final { scan-assembler-not {\tldr\tq} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/usadv16qi-dotprod.c b/gcc/testsuite/gcc.target/aarch64/usadv16qi-dotprod.c -new file mode 100644 -index 000000000..ea8de4d69 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/usadv16qi-dotprod.c -@@ -0,0 +1,30 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_v8_2a_dotprod_neon_ok } */ -+/* { dg-add-options arm_v8_2a_dotprod_neon } */ -+/* { dg-additional-options "-O3" } */ -+ -+#pragma GCC target "+nosve" -+ -+#define N 1024 -+ -+unsigned char pix1[N], pix2[N]; -+ -+int foo (void) -+{ -+ int i_sum = 0; -+ int i; -+ -+ for (i = 0; i < N; i++) -+ i_sum += __builtin_abs (pix1[i] - pix2[i]); -+ -+ return i_sum; -+} -+ -+/* { dg-final { scan-assembler-not {\tushll\t} } } */ -+/* { dg-final { scan-assembler-not {\tushll2\t} } } */ -+/* { dg-final { scan-assembler-not {\tusubl\t} } } */ -+/* { dg-final { scan-assembler-not {\tusubl2\t} } } */ -+/* { dg-final { scan-assembler-not {\tabs\t} } } */ -+ -+/* { dg-final { scan-assembler {\tuabd\t} } } */ -+/* { dg-final { scan-assembler {\tudot\t} } } */ -diff --git a/gcc/testsuite/gcc.target/aarch64/usadv16qi.c b/gcc/testsuite/gcc.target/aarch64/usadv16qi.c -index 69ceaf425..a66e12096 100644 ---- a/gcc/testsuite/gcc.target/aarch64/usadv16qi.c -+++ b/gcc/testsuite/gcc.target/aarch64/usadv16qi.c -@@ -1,7 +1,7 @@ - /* { dg-do compile } */ - /* { dg-options "-O3" } */ - --#pragma GCC target "+nosve" -+#pragma GCC target "+nosve+nodotprod" - - #define N 1024 - -diff --git a/gcc/testsuite/gcc.target/aarch64/vect-clz.c b/gcc/testsuite/gcc.target/aarch64/vect-clz.c -index 044fa9e99..cd181c346 100644 ---- a/gcc/testsuite/gcc.target/aarch64/vect-clz.c -+++ b/gcc/testsuite/gcc.target/aarch64/vect-clz.c -@@ -1,6 +1,8 @@ - /* { dg-do run } */ - /* { dg-options "-O3 -save-temps -fno-inline -fno-vect-cost-model" } */ - -+#pragma GCC target "+nosve" -+ - extern void abort (); - - void -diff --git a/gcc/testsuite/gcc.target/i386/asm-1.c b/gcc/testsuite/gcc.target/i386/asm-1.c -index cd60a09bd..5e516d882 100644 ---- a/gcc/testsuite/gcc.target/i386/asm-1.c -+++ b/gcc/testsuite/gcc.target/i386/asm-1.c -@@ -2,7 +2,7 @@ - /* { dg-require-effective-target ia32 } */ - /* { dg-options "" } */ - --register unsigned int EAX asm ("r14"); /* { dg-error "register name" } */ -+register unsigned int EAX asm ("r14"); /* { dg-error "cannot be accessed" } */ - - void foo () - { -diff --git a/gcc/testsuite/gcc.target/i386/asm-7.c b/gcc/testsuite/gcc.target/i386/asm-7.c -new file mode 100644 -index 000000000..d2d113626 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/asm-7.c -@@ -0,0 +1,8 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target ia32 } */ -+/* { dg-options "" } */ -+ -+void foo (void) -+{ -+ asm volatile ("" : : : "%r12"); /* { dg-error "cannot be clobbered" } */ -+} -diff --git a/gcc/testsuite/gcc.target/i386/asm-flag-0.c b/gcc/testsuite/gcc.target/i386/asm-flag-0.c -index b0c05239b..e7bd1a585 100644 ---- a/gcc/testsuite/gcc.target/i386/asm-flag-0.c -+++ b/gcc/testsuite/gcc.target/i386/asm-flag-0.c -@@ -11,5 +11,5 @@ void a(void) - void b(void) - { - char x; -- asm("" : "=@ccbad"(x)); /* { dg-error "unknown asm flag output" } */ -+ asm("" : "=@ccbad"(x)); /* { dg-error "unknown 'asm' flag output" } */ - } -diff --git a/gcc/testsuite/gcc.target/i386/funcspec-4.c b/gcc/testsuite/gcc.target/i386/funcspec-4.c -index 025b97dff..e345acdef 100644 ---- a/gcc/testsuite/gcc.target/i386/funcspec-4.c -+++ b/gcc/testsuite/gcc.target/i386/funcspec-4.c -@@ -5,7 +5,7 @@ - extern void error1 (void) __attribute__((__target__("fma400"))); /* { dg-error "unknown" } */ - - /* Multiple arch switches */ --extern void error2 (void) __attribute__((__target__("arch=core2,arch=k8"))); /* { dg-error "already specified" } */ -+extern void error2 (void) __attribute__((__target__("arch=core2,arch=k8"))); /* { dg-error "attribute value 'arch=k8' was already specified in 'target' attribute" } */ - - /* Unknown tune target */ - extern void error3 (void) __attribute__((__target__("tune=foobar"))); /* { dg-error "bad value" } */ -diff --git a/gcc/testsuite/gcc.target/i386/inline_error.c b/gcc/testsuite/gcc.target/i386/inline_error.c -index 18e506631..57e60fbad 100644 ---- a/gcc/testsuite/gcc.target/i386/inline_error.c -+++ b/gcc/testsuite/gcc.target/i386/inline_error.c -@@ -2,7 +2,7 @@ - /* { dg-options "-O0 -mno-popcnt" } */ - - inline int __attribute__ ((__gnu_inline__, __always_inline__, target("popcnt"))) --foo () /* { dg-error "inlining failed in call to always_inline .* target specific option mismatch" } */ -+foo () /* { dg-error "inlining failed in call to 'always_inline' .* target specific option mismatch" } */ - { - return 0; - } -diff --git a/gcc/testsuite/gcc.target/i386/interrupt-6.c b/gcc/testsuite/gcc.target/i386/interrupt-6.c -index bcbcc97c6..138b98fe1 100644 ---- a/gcc/testsuite/gcc.target/i386/interrupt-6.c -+++ b/gcc/testsuite/gcc.target/i386/interrupt-6.c -@@ -31,7 +31,7 @@ fn4 (uword_t error_code, void *frame) - error = error_code; - } - --extern int fn5 (void *) __attribute__ ((interrupt)); /* { dg-error "interrupt service routine can't have non-void return value" } */ -+extern int fn5 (void *) __attribute__ ((interrupt)); /* { dg-error "interrupt service routine must return 'void'" } */ - - int - fn5 (void *frame) -diff --git a/gcc/testsuite/gcc.target/i386/interrupt-7.c b/gcc/testsuite/gcc.target/i386/interrupt-7.c -index 506f61afa..3e2f6a0eb 100644 ---- a/gcc/testsuite/gcc.target/i386/interrupt-7.c -+++ b/gcc/testsuite/gcc.target/i386/interrupt-7.c -@@ -8,5 +8,5 @@ extern void fn (void *) __attribute__((interrupt)); - void - foo (void) - { -- fn (&error); /* { dg-error "interrupt service routine can't be called directly" } */ -+ fn (&error); /* { dg-error "interrupt service routine cannot be called directly" } */ - } -diff --git a/gcc/testsuite/gcc.target/i386/pr30848.c b/gcc/testsuite/gcc.target/i386/pr30848.c -index 2a9285151..9c4e22ac7 100644 ---- a/gcc/testsuite/gcc.target/i386/pr30848.c -+++ b/gcc/testsuite/gcc.target/i386/pr30848.c -@@ -2,5 +2,5 @@ - - void foo(double d) - { -- __asm__ ("" : "=u" (d)); /* { dg-error "output regs" } */ -+ __asm__ ("" : "=u" (d)); /* { dg-error "output registers" } */ - } -diff --git a/gcc/testsuite/gcc.target/i386/pr39082-1.c b/gcc/testsuite/gcc.target/i386/pr39082-1.c -index 2af2264c3..85b5671e9 100644 ---- a/gcc/testsuite/gcc.target/i386/pr39082-1.c -+++ b/gcc/testsuite/gcc.target/i386/pr39082-1.c -@@ -13,7 +13,7 @@ extern int bar1 (union un); - extern union un bar2 (int); - - int --foo1 (union un u) /* { dg-message "note: the ABI of passing union with long double has changed in GCC 4.4" } */ -+foo1 (union un u) /* { dg-message "note: the ABI of passing union with 'long double' has changed in GCC 4.4" } */ - { - bar1 (u); - return u.i; -diff --git a/gcc/testsuite/gcc.target/i386/pr39678.c b/gcc/testsuite/gcc.target/i386/pr39678.c -index 0548466d6..c94c002f1 100644 ---- a/gcc/testsuite/gcc.target/i386/pr39678.c -+++ b/gcc/testsuite/gcc.target/i386/pr39678.c -@@ -10,7 +10,7 @@ struct X { - - struct X - foo (float *p) --{ /* { dg-message "note: the ABI of passing structure with complex float member has changed in GCC 4.4" } */ -+{ /* { dg-message "note: the ABI of passing structure with 'complex float' member has changed in GCC 4.4" } */ - struct X x; - x.c = -3; - __real x.val = p[0]; -diff --git a/gcc/testsuite/gcc.target/i386/pr57756.c b/gcc/testsuite/gcc.target/i386/pr57756.c -index 25c565c87..9a78f62c9 100644 ---- a/gcc/testsuite/gcc.target/i386/pr57756.c -+++ b/gcc/testsuite/gcc.target/i386/pr57756.c -@@ -3,7 +3,7 @@ - - /* callee cannot be inlined into caller because it has a higher target ISA. */ - __attribute__((always_inline,target("sse4.2"))) --__inline int callee () /* { dg-error "inlining failed in call to always_inline" } */ -+__inline int callee () /* { dg-error "inlining failed in call to 'always_inline'" } */ - { - return 0; - } -diff --git a/gcc/testsuite/gcc.target/i386/pr62120.c b/gcc/testsuite/gcc.target/i386/pr62120.c -index bfb8c4703..28d85d377 100644 ---- a/gcc/testsuite/gcc.target/i386/pr62120.c -+++ b/gcc/testsuite/gcc.target/i386/pr62120.c -@@ -3,6 +3,6 @@ - - void foo () - { -- register int zmm_var asm ("ymm9");/* { dg-error "invalid register name" } */ -- register int zmm_var2 asm ("23");/* { dg-error "invalid register name" } */ -+ register int zmm_var asm ("ymm9");/* { dg-error "cannot be accessed" } */ -+ register int zmm_var2 asm ("23");/* { dg-error "cannot be accessed" } */ - } -diff --git a/gcc/testsuite/gcc.target/i386/pr68843-1.c b/gcc/testsuite/gcc.target/i386/pr68843-1.c -index da0676aa6..6198ea9af 100644 ---- a/gcc/testsuite/gcc.target/i386/pr68843-1.c -+++ b/gcc/testsuite/gcc.target/i386/pr68843-1.c -@@ -5,7 +5,7 @@ double - test () - { - double x = 1.0; -- asm ("fld %1" /* { dg-error "explicitly used regs must be grouped at top of stack" } */ -+ asm ("fld %1" /* { dg-error "explicitly used registers must be grouped at top of stack" } */ - : "=&t" (x) - : "u" (x)); - return x; -diff --git a/gcc/testsuite/gcc.target/i386/pr79804.c b/gcc/testsuite/gcc.target/i386/pr79804.c -index 10adb4466..08d1a3ea1 100644 ---- a/gcc/testsuite/gcc.target/i386/pr79804.c -+++ b/gcc/testsuite/gcc.target/i386/pr79804.c -@@ -7,4 +7,4 @@ void foo (void) - register int r19 asm ("19"); - - asm volatile ("# %0" : "=r"(r19)); /* { dg-error "invalid use of register" } */ --} /* { dg-error "cannot be used in asm here" } */ -+} /* { dg-error "cannot be used in 'asm' here" } */ -diff --git a/gcc/testsuite/gcc.target/i386/pr82673.c b/gcc/testsuite/gcc.target/i386/pr82673.c -index 50eb5a3bc..161ec88e3 100644 ---- a/gcc/testsuite/gcc.target/i386/pr82673.c -+++ b/gcc/testsuite/gcc.target/i386/pr82673.c -@@ -9,4 +9,4 @@ void - bar (void) /* { dg-error "frame pointer required, but reserved" } */ - { - B = &y; --} /* { dg-error "bp cannot be used in asm here" } */ -+} /* { dg-error "bp cannot be used in 'asm' here" } */ -diff --git a/gcc/testsuite/gcc.target/i386/pr88809-2.c b/gcc/testsuite/gcc.target/i386/pr88809-2.c -new file mode 100644 -index 000000000..b8ef51dab ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/pr88809-2.c -@@ -0,0 +1,9 @@ -+/* PR target/88809 */ -+/* { dg-options "-Os" } */ -+ -+unsigned int foo (const char *ptr) -+{ -+ return __builtin_strlen (ptr); -+} -+ -+/* { dg-final { scan-assembler "call\[ \t\]strlen" } } */ -diff --git a/gcc/testsuite/gcc.target/i386/pr88809.c b/gcc/testsuite/gcc.target/i386/pr88809.c -new file mode 100644 -index 000000000..20844ddb9 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/pr88809.c -@@ -0,0 +1,9 @@ -+/* PR target/88809 */ -+/* { dg-options "-O" } */ -+ -+unsigned int foo (const char *ptr) -+{ -+ return __builtin_strlen (ptr); -+} -+ -+/* { dg-final { scan-assembler "call\[ \t\]strlen" } } */ -diff --git a/gcc/testsuite/gcc.target/i386/pr88828-1.c b/gcc/testsuite/gcc.target/i386/pr88828-1.c -new file mode 100644 -index 000000000..a15d1fea3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/pr88828-1.c -@@ -0,0 +1,49 @@ -+/* { dg-do run { target sse2_runtime } } */ -+/* { dg-options "-O2 -msse2" } */ -+ -+#include "pr88828-1a.c" -+#include "pr88828-1b.c" -+#include "pr88828-1c.c" -+ -+extern void abort (); -+ -+void -+do_check (__v4sf y, float f[4], float z) -+{ -+ int i; -+ -+ for (i = 0; i < 4; i++) -+ if (i == 0) -+ { -+ if (y[i] != z) -+ abort (); -+ } -+ else -+ { -+ if (y[i] != f[i]) -+ abort (); -+ } -+} -+ -+int -+main (void) -+{ -+ float f[4] = { -11, 2, 55553, -4 }; -+ float z = 134567; -+ __v4sf x = { f[0], f[1], f[2], f[3] }; -+ __v4sf y; -+ int i; -+ -+ for (i = 0; i < 4; i++) -+ if (x[i] != f[i]) -+ abort (); -+ -+ y = foo1 (x, z); -+ do_check (y, f, z); -+ y = foo2 (x, z); -+ do_check (y, f, z); -+ y = foo3 (x, z); -+ do_check (y, f, z); -+ -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/i386/pr88828-1a.c b/gcc/testsuite/gcc.target/i386/pr88828-1a.c -new file mode 100644 -index 000000000..d37b24c66 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/pr88828-1a.c -@@ -0,0 +1,17 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -msse -mno-sse4" } */ -+/* { dg-final { scan-assembler "movss" } } */ -+/* { dg-final { scan-assembler-not "movaps" } } */ -+/* { dg-final { scan-assembler-not "movlhps" } } */ -+/* { dg-final { scan-assembler-not "unpcklps" } } */ -+/* { dg-final { scan-assembler-not "shufps" } } */ -+ -+typedef float __v4sf __attribute__ ((__vector_size__ (16))); -+ -+__attribute__((noinline, noclone)) -+__v4sf -+foo1 (__v4sf x, float f) -+{ -+ __v4sf y = { f, x[1], x[2], x[3] }; -+ return y; -+} -diff --git a/gcc/testsuite/gcc.target/i386/pr88828-1b.c b/gcc/testsuite/gcc.target/i386/pr88828-1b.c -new file mode 100644 -index 000000000..af4aced65 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/pr88828-1b.c -@@ -0,0 +1,23 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -msse -mno-sse4" } */ -+/* { dg-final { scan-assembler "movss" } } */ -+/* { dg-final { scan-assembler-not "movaps" } } */ -+/* { dg-final { scan-assembler-not "movlhps" } } */ -+/* { dg-final { scan-assembler-not "unpcklps" } } */ -+/* { dg-final { scan-assembler-not "shufps" } } */ -+ -+typedef float __v4sf __attribute__ ((__vector_size__ (16))); -+ -+static __v4sf -+vector_init (float f0,float f1, float f2,float f3) -+{ -+ __v4sf y = { f0, f1, f2, f3 }; -+ return y; -+} -+ -+__attribute__((noinline, noclone)) -+__v4sf -+foo2 (__v4sf x, float f) -+{ -+ return vector_init (f, x[1], x[2], x[3]) ; -+} -diff --git a/gcc/testsuite/gcc.target/i386/pr88828-1c.c b/gcc/testsuite/gcc.target/i386/pr88828-1c.c -new file mode 100644 -index 000000000..a117f3ec7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/pr88828-1c.c -@@ -0,0 +1,18 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -msse -mno-sse4" } */ -+/* { dg-final { scan-assembler "movss" } } */ -+/* { dg-final { scan-assembler-not "movaps" } } */ -+/* { dg-final { scan-assembler-not "movlhps" } } */ -+/* { dg-final { scan-assembler-not "unpcklps" } } */ -+/* { dg-final { scan-assembler-not "shufps" } } */ -+ -+typedef float __v4sf __attribute__ ((__vector_size__ (16))); -+ -+__attribute__((noinline, noclone)) -+__v4sf -+foo3 (__v4sf x, float f) -+{ -+ __v4sf y = x; -+ y[0] = f; -+ return y; -+} -diff --git a/gcc/testsuite/gcc.target/i386/pr88828-4a.c b/gcc/testsuite/gcc.target/i386/pr88828-4a.c -new file mode 100644 -index 000000000..64043b985 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/pr88828-4a.c -@@ -0,0 +1,18 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -msse -mno-sse4" } */ -+/* { dg-final { scan-assembler "movss" } } */ -+/* { dg-final { scan-assembler-times "shufps" 1 } } */ -+/* { dg-final { scan-assembler-not "movaps" } } */ -+/* { dg-final { scan-assembler-not "movlhps" } } */ -+/* { dg-final { scan-assembler-not "unpcklps" } } */ -+ -+typedef float __v4sf __attribute__ ((__vector_size__ (16))); -+ -+__attribute__((noinline, noclone)) -+__v4sf -+foo (__v4sf x, float f) -+{ -+ __v4sf y = { x[0], x[2], x[3], x[1] }; -+ y[0] = f; -+ return y; -+} -diff --git a/gcc/testsuite/gcc.target/i386/pr88828-4b.c b/gcc/testsuite/gcc.target/i386/pr88828-4b.c -new file mode 100644 -index 000000000..ad8d2b985 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/pr88828-4b.c -@@ -0,0 +1,21 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -mavx" } */ -+/* { dg-final { scan-assembler-times "vpermilps" 1 } } */ -+/* { dg-final { scan-assembler-times "vmovss" 1 { target { ! ia32 } } } } */ -+/* { dg-final { scan-assembler-times "vpinsrd" 1 { target ia32 } } } */ -+/* { dg-final { scan-assembler-not "vmovss" { target ia32 } } } */ -+/* { dg-final { scan-assembler-not "vshufps" } } */ -+/* { dg-final { scan-assembler-not "vmovaps" } } */ -+/* { dg-final { scan-assembler-not "vmovlhps" } } */ -+/* { dg-final { scan-assembler-not "vunpcklps" } } */ -+ -+typedef float __v4sf __attribute__ ((__vector_size__ (16))); -+ -+__attribute__((noinline, noclone)) -+__v4sf -+foo (__v4sf x, float f) -+{ -+ __v4sf y = { x[0], x[2], x[3], x[1] }; -+ y[0] = f; -+ return y; -+} -diff --git a/gcc/testsuite/gcc.target/i386/pr88828-5a.c b/gcc/testsuite/gcc.target/i386/pr88828-5a.c -new file mode 100644 -index 000000000..5e908faef ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/pr88828-5a.c -@@ -0,0 +1,18 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -msse -mno-sse4" } */ -+/* { dg-final { scan-assembler "movss" } } */ -+/* { dg-final { scan-assembler-times "shufps" 2 } } */ -+/* { dg-final { scan-assembler-times "movaps" 1 } } */ -+/* { dg-final { scan-assembler-not "movlhps" } } */ -+/* { dg-final { scan-assembler-not "unpcklps" } } */ -+ -+typedef float __v4sf __attribute__ ((__vector_size__ (16))); -+ -+__attribute__((noinline, noclone)) -+__v4sf -+foo (__v4sf x, float f) -+{ -+ __v4sf y = { x[0], x[2], x[3], x[0] }; -+ y[3] = f; -+ return y; -+} -diff --git a/gcc/testsuite/gcc.target/i386/pr88828-5b.c b/gcc/testsuite/gcc.target/i386/pr88828-5b.c -new file mode 100644 -index 000000000..988a48823 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/pr88828-5b.c -@@ -0,0 +1,20 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -mavx" } */ -+/* { dg-final { scan-assembler-times "vpermilps" 1 } } */ -+/* { dg-final { scan-assembler-times "vinsertps" 1 } } */ -+/* { dg-final { scan-assembler-not "vshufps" } } */ -+/* { dg-final { scan-assembler-not "vmovss" } } */ -+/* { dg-final { scan-assembler-not "vmovaps" } } */ -+/* { dg-final { scan-assembler-not "vmovlhps" } } */ -+/* { dg-final { scan-assembler-not "vunpcklps" } } */ -+ -+typedef float __v4sf __attribute__ ((__vector_size__ (16))); -+ -+__attribute__((noinline, noclone)) -+__v4sf -+foo (__v4sf x, float f) -+{ -+ __v4sf y = { x[0], x[2], x[3], x[0] }; -+ y[3] = f; -+ return y; -+} -diff --git a/gcc/testsuite/gcc.target/i386/pr88828-7.c b/gcc/testsuite/gcc.target/i386/pr88828-7.c -new file mode 100644 -index 000000000..4302c2664 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/pr88828-7.c -@@ -0,0 +1,53 @@ -+/* { dg-do run { target sse2_runtime } } */ -+/* { dg-options "-O2 -msse2 -fexcess-precision=standard" } */ -+ -+#include "pr88828-7a.c" -+#include "pr88828-7b.c" -+ -+extern void abort (); -+ -+float -+bar (float x, float y) -+{ -+ return x / y - y * x; -+} -+ -+void -+do_check (__v4sf x, float f1[4], float f2[4]) -+{ -+ int i; -+ -+ for (i = 0; i < 4; i++) -+ if (i == 0) -+ { -+ if (x[i] != bar (f1[i], f2[i])) -+ abort (); -+ } -+ else -+ { -+ if (x[i] != f1[i]) -+ abort (); -+ } -+} -+ -+int -+main (void) -+{ -+ float f1[4] = { -11, 2, 55553, -4 }; -+ float f2[4] = { 111, 3.3, -55.553, 4.8 }; -+ __v4sf x = { f1[0], f1[1], f1[2], f1[3] }; -+ __v4sf y = { f2[0], f2[1], f2[2], f2[3] }; -+ __v4sf z; -+ int i; -+ -+ for (i = 0; i < 4; i++) -+ if (x[i] != f1[i] || y[i] != f2[i] ) -+ abort (); -+ -+ z = foo1 (x, y); -+ do_check (z, f1, f2); -+ x = foo2 (x, y); -+ do_check (z, f1, f2); -+ -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/i386/pr88828-7a.c b/gcc/testsuite/gcc.target/i386/pr88828-7a.c -new file mode 100644 -index 000000000..f1ae57422 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/pr88828-7a.c -@@ -0,0 +1,16 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -msse -mno-sse4" } */ -+/* { dg-final { scan-assembler-not "movlhps" } } */ -+/* { dg-final { scan-assembler-not "unpckhps" } } */ -+/* { dg-final { scan-assembler-not "unpcklps" } } */ -+/* { dg-final { scan-assembler-not "shufps" } } */ -+ -+typedef float __v4sf __attribute__ ((__vector_size__ (16))); -+extern float bar (float, float); -+ -+__v4sf -+foo1 (__v4sf x, __v4sf y) -+{ -+ __v4sf z = { bar (x[0], y[0]), x[1], x[2], x[3] }; -+ return z; -+} -diff --git a/gcc/testsuite/gcc.target/i386/pr88828-7b.c b/gcc/testsuite/gcc.target/i386/pr88828-7b.c -new file mode 100644 -index 000000000..c027c5694 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/pr88828-7b.c -@@ -0,0 +1,22 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -msse -mno-sse4" } */ -+/* { dg-final { scan-assembler-not "movlhps" } } */ -+/* { dg-final { scan-assembler-not "unpckhps" } } */ -+/* { dg-final { scan-assembler-not "unpcklps" } } */ -+/* { dg-final { scan-assembler-not "shufps" } } */ -+ -+typedef float __v4sf __attribute__ ((__vector_size__ (16))); -+extern float bar (float, float); -+ -+static __v4sf -+vector_init (float f0,float f1, float f2,float f3) -+{ -+ __v4sf y = { f0, f1, f2, f3 }; -+ return y; -+} -+ -+__v4sf -+foo2 (__v4sf x, __v4sf y) -+{ -+ return vector_init (bar (x[0], y[0]), x[1], x[2], x[3]) ; -+} -diff --git a/gcc/testsuite/gcc.target/i386/pr88828-8.c b/gcc/testsuite/gcc.target/i386/pr88828-8.c -new file mode 100644 -index 000000000..3b8eabd22 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/pr88828-8.c -@@ -0,0 +1,46 @@ -+/* { dg-do run { target sse2_runtime } } */ -+/* { dg-options "-O2 -msse2" } */ -+ -+#include "pr88828-8a.c" -+#include "pr88828-8b.c" -+ -+extern void abort (); -+ -+void -+do_check (__v4sf y, float f[4], float z) -+{ -+ int i; -+ -+ for (i = 0; i < 4; i++) -+ if (i == 0) -+ { -+ if (y[i] != z) -+ abort (); -+ } -+ else -+ { -+ if (y[i] != f[i]) -+ abort (); -+ } -+} -+ -+int -+main (void) -+{ -+ float f[4] = { -11, 2, 55553, -4 }; -+ float z = 11.4; -+ __v4sf x = { f[0], f[1], f[2], f[3] }; -+ __v4sf y; -+ int i; -+ -+ for (i = 0; i < 4; i++) -+ if (x[i] != f[i]) -+ abort (); -+ -+ y = foo1 (x); -+ do_check (y, f, z); -+ y = foo2 (x); -+ do_check (y, f, z); -+ -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/i386/pr88828-8a.c b/gcc/testsuite/gcc.target/i386/pr88828-8a.c -new file mode 100644 -index 000000000..5d383dfd0 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/pr88828-8a.c -@@ -0,0 +1,15 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -msse -mno-sse4" } */ -+/* { dg-final { scan-assembler-not "movlhps" } } */ -+/* { dg-final { scan-assembler-not "unpckhps" } } */ -+/* { dg-final { scan-assembler-not "unpcklps" } } */ -+/* { dg-final { scan-assembler-not "shufps" } } */ -+ -+typedef float __v4sf __attribute__ ((__vector_size__ (16))); -+ -+__v4sf -+foo1 (__v4sf x) -+{ -+ __v4sf z = { 11.4, x[1], x[2], x[3] }; -+ return z; -+} -diff --git a/gcc/testsuite/gcc.target/i386/pr88828-8b.c b/gcc/testsuite/gcc.target/i386/pr88828-8b.c -new file mode 100644 -index 000000000..5ffbc9c31 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/pr88828-8b.c -@@ -0,0 +1,21 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -msse -mno-sse4" } */ -+/* { dg-final { scan-assembler-not "movlhps" } } */ -+/* { dg-final { scan-assembler-not "unpckhps" } } */ -+/* { dg-final { scan-assembler-not "unpcklps" } } */ -+/* { dg-final { scan-assembler-not "shufps" } } */ -+ -+typedef float __v4sf __attribute__ ((__vector_size__ (16))); -+ -+static __v4sf -+vector_init (float f0,float f1, float f2,float f3) -+{ -+ __v4sf y = { f0, f1, f2, f3 }; -+ return y; -+} -+ -+__v4sf -+foo2 (__v4sf x) -+{ -+ return vector_init (11.4, x[1], x[2], x[3]) ; -+} -diff --git a/gcc/testsuite/gcc.target/i386/pr88828-9.c b/gcc/testsuite/gcc.target/i386/pr88828-9.c -new file mode 100644 -index 000000000..c33907b4a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/pr88828-9.c -@@ -0,0 +1,46 @@ -+/* { dg-do run { target sse2_runtime } } */ -+/* { dg-options "-O2 -msse2" } */ -+ -+#include "pr88828-9a.c" -+#include "pr88828-9b.c" -+ -+extern void abort (); -+ -+void -+do_check (__v4sf y, float f[4], float z) -+{ -+ int i; -+ -+ for (i = 0; i < 4; i++) -+ if (i == 0) -+ { -+ if (y[i] != z) -+ abort (); -+ } -+ else -+ { -+ if (y[i] != f[i]) -+ abort (); -+ } -+} -+ -+int -+main (void) -+{ -+ float f[4] = { -11, 2, 55553, -4 }; -+ float z = 11.4; -+ __m128 x = (__m128) (__v4sf) { f[0], f[1], f[2], f[3] }; -+ __m128 y; -+ int i; -+ -+ for (i = 0; i < 4; i++) -+ if (x[i] != f[i]) -+ abort (); -+ -+ y = foo1 (x); -+ do_check (y, f, z); -+ y = foo2 (x); -+ do_check (y, f, z); -+ -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.target/i386/pr88828-9a.c b/gcc/testsuite/gcc.target/i386/pr88828-9a.c -new file mode 100644 -index 000000000..7f8306577 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/pr88828-9a.c -@@ -0,0 +1,16 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -msse -mno-sse4" } */ -+/* { dg-final { scan-assembler-not "movlhps" } } */ -+/* { dg-final { scan-assembler-not "unpckhps" } } */ -+/* { dg-final { scan-assembler-not "unpcklps" } } */ -+/* { dg-final { scan-assembler-not "shufps" } } */ -+ -+typedef float __v4sf __attribute__ ((__vector_size__ (16))); -+typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__)); -+ -+__m128 -+foo1 (__m128 x) -+{ -+ __v4sf z = { 11.4, ((__v4sf) x)[1], ((__v4sf) x)[2], ((__v4sf) x) [3] }; -+ return (__m128) z; -+} -diff --git a/gcc/testsuite/gcc.target/i386/pr88828-9b.c b/gcc/testsuite/gcc.target/i386/pr88828-9b.c -new file mode 100644 -index 000000000..6588ad15a ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/pr88828-9b.c -@@ -0,0 +1,23 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -msse -mno-sse4" } */ -+/* { dg-final { scan-assembler-not "movlhps" } } */ -+/* { dg-final { scan-assembler-not "unpckhps" } } */ -+/* { dg-final { scan-assembler-not "unpcklps" } } */ -+/* { dg-final { scan-assembler-not "shufps" } } */ -+ -+typedef float __v4sf __attribute__ ((__vector_size__ (16))); -+typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__)); -+ -+static __m128 -+vector_init (float f0,float f1, float f2,float f3) -+{ -+ __v4sf y = { f0, f1, f2, f3 }; -+ return (__m128) y; -+} -+ -+__m128 -+foo2 (__m128 x) -+{ -+ return vector_init (11.4, ((__v4sf) x)[1], ((__v4sf) x)[2], -+ ((__v4sf) x) [3]); -+} -diff --git a/gcc/testsuite/gcc.target/i386/pr88963-1.c b/gcc/testsuite/gcc.target/i386/pr88963-1.c -new file mode 100644 -index 000000000..e6f15259e ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/pr88963-1.c -@@ -0,0 +1,13 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -march=x86-64 -mavx2 -fdump-tree-optimized" } */ -+ -+typedef int VInt __attribute__((vector_size(64))); -+ -+void test(VInt*__restrict a, VInt*__restrict b, -+ VInt*__restrict c) -+{ -+ *a = *b + *c; -+} -+ -+/* Vector loads and stores should be split. */ -+/* { dg-final { scan-tree-dump-not "vector\\(16\\)" "optimized" } } */ -diff --git a/gcc/testsuite/gcc.target/i386/pr88963-2.c b/gcc/testsuite/gcc.target/i386/pr88963-2.c -new file mode 100644 -index 000000000..114f1f5c3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/pr88963-2.c -@@ -0,0 +1,14 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -march=x86-64 -msse2 -fdump-tree-optimized" } */ -+ -+typedef int VInt __attribute__((vector_size(64))); -+ -+void test(VInt*__restrict a, VInt*__restrict b, -+ VInt*__restrict c) -+{ -+ *a = *b + *c; -+} -+ -+/* Vector loads and stores should be split. */ -+/* { dg-final { scan-tree-dump-not "vector\\(16\\)" "optimized" } } */ -+/* { dg-final { scan-tree-dump-not "vector\\(8\\)" "optimized" } } */ -diff --git a/gcc/testsuite/gcc.target/i386/pr89261.c b/gcc/testsuite/gcc.target/i386/pr89261.c -new file mode 100644 -index 000000000..63882c099 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/pr89261.c -@@ -0,0 +1,9 @@ -+/* PR target/89261 */ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+typedef double __v2df __attribute__ ((vector_size (16), aligned (1 << 28))); -+ -+__v2df foo = { 1.0, 2.0 }; -+ -+/* { dg-final { scan-assembler "\.align\[ \t]+268435456" } } */ -diff --git a/gcc/testsuite/gcc.target/i386/pr92645-2.c b/gcc/testsuite/gcc.target/i386/pr92645-2.c -new file mode 100644 -index 000000000..d34ed3aa8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/pr92645-2.c -@@ -0,0 +1,34 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -msse2 -fdump-tree-cddce1" } */ -+ -+typedef int v4si __attribute__((vector_size(16))); -+typedef int v2si __attribute__((vector_size(8))); -+ -+void low (v2si *dst, v4si *srcp) -+{ -+ v4si src = *srcp; -+ *dst = (v2si) { src[0], src[1] }; -+} -+ -+void high (v2si *dst, v4si *srcp) -+{ -+ v4si src = *srcp; -+ *dst = (v2si) { src[2], src[3] }; -+} -+ -+void even (v2si *dst, v4si *srcp) -+{ -+ v4si src = *srcp; -+ *dst = (v2si) { src[0], src[2] }; -+} -+ -+void odd (v2si *dst, v4si *srcp) -+{ -+ v4si src = *srcp; -+ *dst = (v2si) { src[1], src[3] }; -+} -+ -+/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 4 "cddce1" } } */ -+/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 3 "cddce1" } } */ -+/* Ideally highpart extraction would elide the permutation as well. */ -+/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 2 "cddce1" { xfail *-*-* } } } */ -diff --git a/gcc/testsuite/gcc.target/i386/pr92645-3.c b/gcc/testsuite/gcc.target/i386/pr92645-3.c -new file mode 100644 -index 000000000..9c08c9fb6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/pr92645-3.c -@@ -0,0 +1,37 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -mavx2 -fdump-tree-cddce1" } */ -+ -+typedef int v8si __attribute__((vector_size(32))); -+typedef float v4sf __attribute__((vector_size(16))); -+ -+void low (v4sf *dst, v8si *srcp) -+{ -+ v8si src = *srcp; -+ *dst = (v4sf) { src[0], src[1], src[2], src[3] }; -+} -+ -+void high (v4sf *dst, v8si *srcp) -+{ -+ v8si src = *srcp; -+ *dst = (v4sf) { src[4], src[5], src[6], src[7] }; -+} -+ -+void even (v4sf *dst, v8si *srcp) -+{ -+ v8si src = *srcp; -+ *dst = (v4sf) { src[0], src[2], src[4], src[6] }; -+} -+ -+void odd (v4sf *dst, v8si *srcp) -+{ -+ v8si src = *srcp; -+ *dst = (v4sf) { src[1], src[3], src[5], src[7] }; -+} -+ -+/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 4 "cddce1" } } */ -+/* Four conversions, on the smaller vector type, to not convert excess -+ elements. */ -+/* { dg-final { scan-tree-dump-times " = \\\(vector\\\(4\\\) float\\\)" 4 "cddce1" } } */ -+/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 3 "cddce1" } } */ -+/* Ideally highpart extraction would elide the VEC_PERM_EXPR as well. */ -+/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 2 "cddce1" { xfail *-*-* } } } */ -diff --git a/gcc/testsuite/gcc.target/i386/pr92645-4.c b/gcc/testsuite/gcc.target/i386/pr92645-4.c -new file mode 100644 -index 000000000..788a97ed1 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/pr92645-4.c -@@ -0,0 +1,56 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -mavx2 -fdump-tree-optimized -Wno-psabi" } */ -+ -+typedef unsigned int u32v4 __attribute__((vector_size(16))); -+typedef unsigned short u16v16 __attribute__((vector_size(32))); -+typedef unsigned char u8v16 __attribute__((vector_size(16))); -+ -+union vec128 { -+ u8v16 u8; -+ u32v4 u32; -+}; -+ -+#define memcpy __builtin_memcpy -+ -+static u16v16 zxt(u8v16 x) -+{ -+ return (u16v16) { -+ x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], -+ x[8], x[9], x[10], x[11], x[12], x[13], x[14], x[15] -+ }; -+} -+ -+static u8v16 narrow(u16v16 x) -+{ -+ return (u8v16) { -+ x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], -+ x[8], x[9], x[10], x[11], x[12], x[13], x[14], x[15] -+ }; -+} -+ -+void f(char *dst, char *src, unsigned long n, unsigned c) -+{ -+ unsigned ia = 255 - (c >> 24); -+ ia += ia >> 7; -+ -+ union vec128 c4 = {0}, ia16 = {0}; -+ c4.u32 += c; -+ ia16.u8 += (unsigned char)ia; -+ -+ u16v16 c16 = (zxt(c4.u8) << 8) + 128; -+ -+ for (; n; src += 16, dst += 16, n -= 4) { -+ union vec128 s; -+ memcpy(&s, src, sizeof s); -+ s.u8 = narrow((zxt(s.u8)*zxt(ia16.u8) + c16) >> 8); -+ memcpy(dst, &s, sizeof s); -+ } -+} -+ -+/* { dg-final { scan-tree-dump-times "vec_unpack_lo" 3 "optimized" } } */ -+/* We're missing an opportunity to, after later optimizations, combine -+ a uniform CTOR with a vec_unpack_lo_expr to a CTOR on a converted -+ element. */ -+/* { dg-final { scan-tree-dump-times "vec_unpack_lo" 2 "optimized" { xfail *-*-* } } } */ -+/* { dg-final { scan-tree-dump-times "VEC_PACK_TRUNC" 1 "optimized" } } */ -+/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 2 "optimized" } } */ -diff --git a/gcc/testsuite/gcc.target/i386/pr92803.c b/gcc/testsuite/gcc.target/i386/pr92803.c -new file mode 100644 -index 000000000..fc8d64efb ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/pr92803.c -@@ -0,0 +1,38 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -Wno-psabi -mavx2 -fdump-tree-forwprop1" } */ -+ -+typedef double v4df __attribute__((vector_size (32))); -+typedef float v8sf __attribute__((vector_size (32))); -+typedef float v4sf __attribute__((vector_size (16))); -+typedef int v4si __attribute__((vector_size (16))); -+typedef double v2df __attribute__((vector_size (16))); -+ -+v2df -+foo (v4df x, double *p, v2df y) -+{ -+ return (v2df) { x[3], *p }; -+} -+ -+v4sf -+bar (v4si x, float *p) -+{ -+ return (v4sf) { x[0], x[1], x[2], *p }; -+} -+ -+v4sf -+baz (v4si x) -+{ -+ return (v4sf) { x[0], x[1], 3.0f, 1.0f }; -+} -+ -+v4sf -+barf (v8sf x) -+{ -+ return (v4sf) { x[4], x[5], 1.0f, 2.0f }; -+} -+ -+/* We expect all CTORs to turn into permutes, the FP converting ones -+ to two each with the one with constants possibly elided in the future -+ by converting 3.0f and 1.0f "back" to integers. */ -+/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 6 "forwprop1" } } */ -+/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 5 "forwprop1" { xfail *-*-* } } } */ -diff --git a/gcc/testsuite/gfortran.dg/graphite/interchange-3.f90 b/gcc/testsuite/gfortran.dg/graphite/interchange-3.f90 -index 8070bbb4a..d827323ac 100644 ---- a/gcc/testsuite/gfortran.dg/graphite/interchange-3.f90 -+++ b/gcc/testsuite/gfortran.dg/graphite/interchange-3.f90 -@@ -23,5 +23,3 @@ Program FOO - 366 format(/, ' PC = ',E12.4,/,' UC = ',E12.4,/,' VC = ',E12.4,/) - - end Program FOO -- --! { dg-final { scan-tree-dump "tiled" "graphite" } } -diff --git a/gcc/testsuite/gfortran.dg/pr88833.f90 b/gcc/testsuite/gfortran.dg/pr88833.f90 -new file mode 100644 -index 000000000..224e6ce5f ---- /dev/null -+++ b/gcc/testsuite/gfortran.dg/pr88833.f90 -@@ -0,0 +1,9 @@ -+! { dg-do assemble { target aarch64_asm_sve_ok } } -+! { dg-options "-O3 -march=armv8.2-a+sve --save-temps" } -+ -+subroutine foo(x) -+ real :: x(100) -+ x = x + 10 -+end subroutine foo -+ -+! { dg-final { scan-assembler {\twhilelo\tp[0-9]+\.s, wzr, (w[0-9]+).*\twhilelo\tp[0-9]+\.s, w[0-9]+, \1} } } -diff --git a/gcc/testsuite/gnat.dg/opt39.adb b/gcc/testsuite/gnat.dg/opt39.adb -index 3b12cf201..0a5ef67a2 100644 ---- a/gcc/testsuite/gnat.dg/opt39.adb -+++ b/gcc/testsuite/gnat.dg/opt39.adb -@@ -27,4 +27,5 @@ begin - end if; - end; - ---- { dg-final { scan-tree-dump-times "MEM" 1 "optimized" } } -+-- { dg-final { scan-tree-dump-not "MEM" "optimized" } } -+-- { dg-final { scan-tree-dump-not "tmp" "optimized" } } -diff --git a/gcc/testsuite/lib/prune.exp b/gcc/testsuite/lib/prune.exp -index 812c59e6f..a9beef48e 100644 ---- a/gcc/testsuite/lib/prune.exp -+++ b/gcc/testsuite/lib/prune.exp -@@ -21,7 +21,7 @@ load_lib multiline.exp - if ![info exists TEST_ALWAYS_FLAGS] { - set TEST_ALWAYS_FLAGS "" - } --set TEST_ALWAYS_FLAGS "-fno-diagnostics-show-caret -fno-diagnostics-show-line-numbers -fdiagnostics-color=never $TEST_ALWAYS_FLAGS" -+set TEST_ALWAYS_FLAGS "-fno-diagnostics-show-caret -fno-diagnostics-show-line-numbers -fdiagnostics-color=never -fdiagnostics-urls=never $TEST_ALWAYS_FLAGS" - - proc prune_gcc_output { text } { - global srcdir -diff --git a/gcc/testsuite/lib/scanasm.exp b/gcc/testsuite/lib/scanasm.exp -index 35ccbc86f..4ff39dab3 100644 ---- a/gcc/testsuite/lib/scanasm.exp -+++ b/gcc/testsuite/lib/scanasm.exp -@@ -546,3 +546,179 @@ proc scan-lto-assembler { args } { - verbose "output_file: $output_file" - dg-scan "scan-lto-assembler" 1 $testcase $output_file $args - } -+ -+# Read assembly file FILENAME and store a mapping from function names -+# to function bodies in array RESULT. FILENAME has already been uploaded -+# locally where necessary and is known to exist. -+ -+proc parse_function_bodies { filename result } { -+ upvar $result up_result -+ -+ # Regexp for the start of a function definition (name in \1). -+ set label {^([a-zA-Z_]\S+):$} -+ -+ # Regexp for the end of a function definition. -+ set terminator {^\s*\.size} -+ -+ # Regexp for lines that aren't interesting. -+ set fluff {^\s*(?:\.|//)} -+ -+ set fd [open $filename r] -+ set in_function 0 -+ while { [gets $fd line] >= 0 } { -+ if { [regexp $label $line dummy function_name] } { -+ set in_function 1 -+ set function_body "" -+ } elseif { $in_function } { -+ if { [regexp $terminator $line] } { -+ set up_result($function_name) $function_body -+ set in_function 0 -+ } elseif { ![regexp $fluff $line] } { -+ append function_body $line "\n" -+ } -+ } -+ } -+ close $fd -+} -+ -+# FUNCTIONS is an array that maps function names to function bodies. -+# Return true if it contains a definition of function NAME and if -+# that definition matches BODY_REGEXP. -+ -+proc check_function_body { functions name body_regexp } { -+ upvar $functions up_functions -+ -+ if { ![info exists up_functions($name)] } { -+ return 0 -+ } -+ return [regexp "^$body_regexp\$" $up_functions($name)] -+} -+ -+# Check the implementations of functions against expected output. Used as: -+# -+# { dg-do { check-function-bodies PREFIX TERMINATOR[ OPTION[ SELECTOR]] } } -+# -+# See sourcebuild.texi for details. -+ -+proc check-function-bodies { args } { -+ if { [llength $args] < 2 } { -+ error "too few arguments to check-function-bodies" -+ } -+ if { [llength $args] > 4 } { -+ error "too many arguments to check-function-bodies" -+ } -+ -+ if { [llength $args] >= 3 } { -+ set required_flag [lindex $args 2] -+ -+ upvar 2 dg-extra-tool-flags extra_tool_flags -+ set flags $extra_tool_flags -+ -+ global torture_current_flags -+ if { [info exists torture_current_flags] } { -+ append flags " " $torture_current_flags -+ } -+ if { ![regexp " $required_flag " $flags] } { -+ return -+ } -+ } -+ -+ set xfail_all 0 -+ if { [llength $args] >= 4 } { -+ switch [dg-process-target [lindex $args 3]] { -+ "S" { } -+ "N" { return } -+ "F" { set xfail_all 1 } -+ "P" { } -+ } -+ } -+ -+ set testcase [testname-for-summary] -+ # The name might include a list of options; extract the file name. -+ set filename [lindex $testcase 0] -+ -+ global srcdir -+ set input_filename "$srcdir/$filename" -+ set output_filename "[file rootname [file tail $filename]].s" -+ -+ set prefix [lindex $args 0] -+ set prefix_len [string length $prefix] -+ set terminator [lindex $args 1] -+ if { [string equal $terminator ""] } { -+ set terminator "*/" -+ } -+ set terminator_len [string length $terminator] -+ -+ set have_bodies 0 -+ if { [is_remote host] } { -+ remote_upload host "$filename" -+ } -+ if { [file exists $output_filename] } { -+ parse_function_bodies $output_filename functions -+ set have_bodies 1 -+ } else { -+ verbose -log "$testcase: output file does not exist" -+ } -+ -+ set count 0 -+ set function_regexp "" -+ set label {^(\S+):$} -+ -+ set lineno 1 -+ set fd [open $input_filename r] -+ set in_function 0 -+ while { [gets $fd line] >= 0 } { -+ if { [string equal -length $prefix_len $line $prefix] } { -+ set line [string trim [string range $line $prefix_len end]] -+ if { !$in_function } { -+ if { [regexp "^(.*\\S)\\s+{(.*)}\$" $line dummy \ -+ line selector] } { -+ set selector [dg-process-target $selector] -+ } else { -+ set selector "P" -+ } -+ if { ![regexp $label $line dummy function_name] } { -+ close $fd -+ error "check-function-bodies: line $lineno does not have a function label" -+ } -+ set in_function 1 -+ set function_regexp "" -+ } elseif { [string equal $line "("] } { -+ append function_regexp "(?:" -+ } elseif { [string equal $line "|"] } { -+ append function_regexp "|" -+ } elseif { [string equal $line ")"] } { -+ append function_regexp ")" -+ } elseif { [string equal $line "..."] } { -+ append function_regexp ".*" -+ } else { -+ append function_regexp "\t" $line "\n" -+ } -+ } elseif { [string equal -length $terminator_len $line $terminator] } { -+ if { ![string equal $selector "N"] } { -+ if { $xfail_all || [string equal $selector "F"] } { -+ setup_xfail "*-*-*" -+ } -+ set testname "$testcase check-function-bodies $function_name" -+ if { !$have_bodies } { -+ unresolved $testname -+ } elseif { [check_function_body functions $function_name \ -+ $function_regexp] } { -+ pass $testname -+ } else { -+ fail $testname -+ } -+ } -+ set in_function 0 -+ incr count -+ } -+ incr lineno -+ } -+ close $fd -+ if { $in_function } { -+ error "check-function-bodies: missing \"$terminator\"" -+ } -+ if { $count == 0 } { -+ error "check-function-bodies: no matches found" -+ } -+} -diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp -index ea9a50ccb..2eeb6883a 100644 ---- a/gcc/testsuite/lib/target-supports.exp -+++ b/gcc/testsuite/lib/target-supports.exp -@@ -3336,6 +3336,24 @@ proc check_effective_target_aarch64_sve { } { - }] - } - -+# Return 1 if this is an AArch64 target supporting SVE2. -+proc check_effective_target_aarch64_sve2 { } { -+ if { ![istarget aarch64*-*-*] } { -+ return 0 -+ } -+ return [check_no_compiler_messages aarch64_sve2 assembly { -+ #if !defined (__ARM_FEATURE_SVE2) -+ #error FOO -+ #endif -+ }] -+} -+ -+# Return 1 if this is an AArch64 target only supporting SVE (not SVE2). -+proc check_effective_target_aarch64_sve1_only { } { -+ return [expr { [check_effective_target_aarch64_sve] -+ && ![check_effective_target_aarch64_sve2] }] -+} -+ - # Return the size in bits of an SVE vector, or 0 if the size is variable. - proc aarch64_sve_bits { } { - return [check_cached_effective_target aarch64_sve_bits { -@@ -4356,6 +4374,22 @@ proc check_effective_target_aarch64_sve_hw { } { - }] - } - -+# Return true if this is an AArch64 target that can run SVE2 code. -+ -+proc check_effective_target_aarch64_sve2_hw { } { -+ if { ![istarget aarch64*-*-*] } { -+ return 0 -+ } -+ return [check_runtime aarch64_sve2_hw_available { -+ int -+ main (void) -+ { -+ asm volatile ("addp z0.b, p0/m, z0.b, z1.b"); -+ return 0; -+ } -+ }] -+} -+ - # Return true if this is an AArch64 target that can run SVE code and - # if its SVE vectors have exactly BITS bits. - -@@ -4569,6 +4603,49 @@ proc add_options_for_arm_v8_2a_dotprod_neon { flags } { - return "$flags $et_arm_v8_2a_dotprod_neon_flags" - } - -+# Return 1 if the target supports ARMv8.2+i8mm Adv.SIMD Dot Product -+# instructions, 0 otherwise. The test is valid for ARM and for AArch64. -+# Record the command line options needed. -+ -+proc check_effective_target_arm_v8_2a_i8mm_ok_nocache { } { -+ global et_arm_v8_2a_i8mm_flags -+ set et_arm_v8_2a_i8mm_flags "" -+ -+ if { ![istarget arm*-*-*] && ![istarget aarch64*-*-*] } { -+ return 0; -+ } -+ -+ # Iterate through sets of options to find the compiler flags that -+ # need to be added to the -march option. -+ foreach flags {"" "-mfloat-abi=hard -mfpu=neon-fp-armv8" "-mfloat-abi=softfp -mfpu=neon-fp-armv8" } { -+ if { [check_no_compiler_messages_nocache \ -+ arm_v8_2a_i8mm_ok object { -+ #include -+ #if !defined (__ARM_FEATURE_MATMUL_INT8) -+ #error "__ARM_FEATURE_MATMUL_INT8 not defined" -+ #endif -+ } "$flags -march=armv8.2-a+i8mm"] } { -+ set et_arm_v8_2a_i8mm_flags "$flags -march=armv8.2-a+i8mm" -+ return 1 -+ } -+ } -+ -+ return 0; -+} -+ -+proc check_effective_target_arm_v8_2a_i8mm_ok { } { -+ return [check_cached_effective_target arm_v8_2a_i8mm_ok \ -+ check_effective_target_arm_v8_2a_i8mm_ok_nocache] -+} -+ -+proc add_options_for_arm_v8_2a_i8mm { flags } { -+ if { ! [check_effective_target_arm_v8_2a_i8mm_ok] } { -+ return "$flags" -+ } -+ global et_arm_v8_2a_i8mm_flags -+ return "$flags $et_arm_v8_2a_i8mm_flags" -+} -+ - # Return 1 if the target supports FP16 VFMAL and VFMSL - # instructions, 0 otherwise. - # Record the command line options needed. -@@ -4614,6 +4691,45 @@ proc add_options_for_arm_fp16fml_neon { flags } { - return "$flags $et_arm_fp16fml_neon_flags" - } - -+# Return 1 if the target supports BFloat16 SIMD instructions, 0 otherwise. -+# The test is valid for ARM and for AArch64. -+ -+proc check_effective_target_arm_v8_2a_bf16_neon_ok_nocache { } { -+ global et_arm_v8_2a_bf16_neon_flags -+ set et_arm_v8_2a_bf16_neon_flags "" -+ -+ if { ![istarget arm*-*-*] && ![istarget aarch64*-*-*] } { -+ return 0; -+ } -+ -+ foreach flags {"" "-mfloat-abi=hard -mfpu=neon-fp-armv8" "-mfloat-abi=softfp -mfpu=neon-fp-armv8" } { -+ if { [check_no_compiler_messages_nocache arm_v8_2a_bf16_neon_ok object { -+ #include -+ #if !defined (__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) -+ #error "__ARM_FEATURE_BF16_VECTOR_ARITHMETIC not defined" -+ #endif -+ } "$flags -march=armv8.2-a+bf16"] } { -+ set et_arm_v8_2a_bf16_neon_flags "$flags -march=armv8.2-a+bf16" -+ return 1 -+ } -+ } -+ -+ return 0; -+} -+ -+proc check_effective_target_arm_v8_2a_bf16_neon_ok { } { -+ return [check_cached_effective_target arm_v8_2a_bf16_neon_ok \ -+ check_effective_target_arm_v8_2a_bf16_neon_ok_nocache] -+} -+ -+proc add_options_for_arm_v8_2a_bf16_neon { flags } { -+ if { ! [check_effective_target_arm_v8_2a_bf16_neon_ok] } { -+ return "$flags" -+ } -+ global et_arm_v8_2a_bf16_neon_flags -+ return "$flags $et_arm_v8_2a_bf16_neon_flags" -+} -+ - # Return 1 if the target supports executing ARMv8 NEON instructions, 0 - # otherwise. - -@@ -6093,7 +6209,24 @@ proc check_effective_target_vect_usad_char { } { - - proc check_effective_target_vect_avg_qi {} { - return [expr { [istarget aarch64*-*-*] -- && ![check_effective_target_aarch64_sve] }] -+ && ![check_effective_target_aarch64_sve1_only] }] -+} -+ -+# Return 1 if the target plus current options supports both signed -+# and unsigned multiply-high-with-round-and-scale operations -+# on vectors of half-words. -+ -+proc check_effective_target_vect_mulhrs_hi {} { -+ return [expr { [istarget aarch64*-*-*] -+ && [check_effective_target_aarch64_sve2] }] -+} -+ -+# Return 1 if the target plus current options supports signed division -+# by power-of-2 operations on vectors of 4-byte integers. -+ -+proc check_effective_target_vect_sdiv_pow2_si {} { -+ return [expr { [istarget aarch64*-*-*] -+ && [check_effective_target_aarch64_sve] }] - } - - # Return 1 if the target plus current options supports a vector -@@ -8579,7 +8712,8 @@ proc check_effective_target_aarch64_tiny { } { - # Create functions to check that the AArch64 assembler supports the - # various architecture extensions via the .arch_extension pseudo-op. - --foreach { aarch64_ext } { "fp" "simd" "crypto" "crc" "lse" "dotprod" "sve"} { -+foreach { aarch64_ext } { "fp" "simd" "crypto" "crc" "lse" "dotprod" "sve" -+ "i8mm" "f32mm" "f64mm" "bf16" } { - eval [string map [list FUNC $aarch64_ext] { - proc check_effective_target_aarch64_asm_FUNC_ok { } { - if { [istarget aarch64*-*-*] } { -diff --git a/gcc/testsuite/obj-c++.dg/stubify-1.mm b/gcc/testsuite/obj-c++.dg/stubify-1.mm -index e8f21882d..a32e28251 100644 ---- a/gcc/testsuite/obj-c++.dg/stubify-1.mm -+++ b/gcc/testsuite/obj-c++.dg/stubify-1.mm -@@ -4,7 +4,7 @@ - /* { dg-do compile { target *-*-darwin* } } */ - /* { dg-skip-if "" { *-*-* } { "-fgnu-runtime" } { "" } } */ - /* { dg-require-effective-target ilp32 } */ --/* { dg-options "-mdynamic-no-pic -fno-exceptions -mmacosx-version-min=10.4 -msymbol-stubs" } */ -+/* { dg-options "-Os -mdynamic-no-pic -fno-exceptions -mmacosx-version-min=10.4 -msymbol-stubs" } */ - - typedef struct objc_object { } *id ; - int x = 41 ; -diff --git a/gcc/testsuite/obj-c++.dg/stubify-2.mm b/gcc/testsuite/obj-c++.dg/stubify-2.mm -index 1863f986c..69fea8def 100644 ---- a/gcc/testsuite/obj-c++.dg/stubify-2.mm -+++ b/gcc/testsuite/obj-c++.dg/stubify-2.mm -@@ -4,7 +4,7 @@ - /* { dg-do compile { target *-*-darwin* } } */ - /* { dg-skip-if "" { *-*-* } { "-fgnu-runtime" } { "" } } */ - /* { dg-require-effective-target ilp32 } */ --/* { dg-options "-mdynamic-no-pic -mmacosx-version-min=10.4 -msymbol-stubs" } */ -+/* { dg-options "-mdynamic-no-pic -fdump-rtl-jump -mmacosx-version-min=10.4 -msymbol-stubs" } */ - - typedef struct objc_object { } *id ; - int x = 41 ; -@@ -30,6 +30,7 @@ extern int bogonic (int, int, int) ; - - /* Any symbol_ref of an un-stubified objc_msgSend is an error; look - for "objc_msgSend" in quotes, without the $stub suffix. */ -+/* { dg-final { scan-rtl-dump-not {symbol_ref.*"objc_msgSend"} "jump" { target powerpc*-*-darwin* } } } */ - - /* { dg-final { scan-assembler-not {(bl|call)[ \t]+_objc_msgSend\n} } } */ - /* { dg-final { scan-assembler {(bl|call)[ \t]+L_objc_msgSend\$stub\n} } } */ -diff --git a/gcc/testsuite/objc.dg/stubify-2.m b/gcc/testsuite/objc.dg/stubify-2.m -index 2930e46fc..904ac44b2 100644 ---- a/gcc/testsuite/objc.dg/stubify-2.m -+++ b/gcc/testsuite/objc.dg/stubify-2.m -@@ -4,7 +4,7 @@ - /* { dg-do compile { target *-*-darwin* } } */ - /* { dg-skip-if "" { *-*-* } { "-fgnu-runtime" } { "" } } */ - /* { dg-require-effective-target ilp32 } */ --/* { dg-options "-mdynamic-no-pic -mmacosx-version-min=10.4 -msymbol-stubs" } */ -+/* { dg-options "-mdynamic-no-pic -fdump-rtl-jump -mmacosx-version-min=10.4 -msymbol-stubs" } */ - - typedef struct objc_object { } *id ; - int x = 41 ; -@@ -30,6 +30,7 @@ extern int bogonic (int, int, int) ; - - /* Any symbol_ref of an un-stubified objc_msgSend is an error; look - for "objc_msgSend" in quotes, without the $stub suffix. */ -+/* { dg-final { scan-rtl-dump-not {symbol_ref.*"objc_msgSend"} "jump" { target powerpc*-*-darwin* } } } */ - - /* { dg-final { scan-assembler-not {(bl|call)[ \t]+_objc_msgSend\n} } } */ - /* { dg-final { scan-assembler {(bl|call)[ \t]+L_objc_msgSend\$stub\n} } } */ -diff --git a/gcc/trans-mem.c b/gcc/trans-mem.c -index 0581aae2d..8fc9f44d8 100644 ---- a/gcc/trans-mem.c -+++ b/gcc/trans-mem.c -@@ -3237,8 +3237,7 @@ expand_block_edges (struct tm_region *const region, basic_block bb) - || (gimple_call_flags (call_stmt) & ECF_TM_BUILTIN) == 0) - continue; - -- if (DECL_FUNCTION_CODE (gimple_call_fndecl (call_stmt)) -- == BUILT_IN_TM_ABORT) -+ if (gimple_call_builtin_p (call_stmt, BUILT_IN_TM_ABORT)) - { - // If we have a ``_transaction_cancel [[outer]]'', there is only - // one abnormal edge: to the transaction marked OUTER. -diff --git a/gcc/tree-call-cdce.c b/gcc/tree-call-cdce.c -index 2e482b37e..43f1ec6ee 100644 ---- a/gcc/tree-call-cdce.c -+++ b/gcc/tree-call-cdce.c -@@ -1074,9 +1074,7 @@ use_internal_fn (gcall *call) - { - gimple_stmt_iterator gsi = gsi_for_stmt (call); - gcall *new_call = gimple_build_call_internal (IFN_SET_EDOM, 0); -- gimple_set_vuse (new_call, gimple_vuse (call)); -- gimple_set_vdef (new_call, gimple_vdef (call)); -- SSA_NAME_DEF_STMT (gimple_vdef (new_call)) = new_call; -+ gimple_move_vops (new_call, call); - gimple_set_location (new_call, gimple_location (call)); - gsi_replace (&gsi, new_call, false); - call = new_call; -diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c -index 621c8ea3d..527deffe4 100644 ---- a/gcc/tree-cfg.c -+++ b/gcc/tree-cfg.c -@@ -9547,7 +9547,8 @@ execute_fixup_cfg (void) - Keep access when store has side effect, i.e. in case when source - is volatile. */ - if (gimple_store_p (stmt) -- && !gimple_has_side_effects (stmt)) -+ && !gimple_has_side_effects (stmt) -+ && !optimize_debug) - { - tree lhs = get_base_address (gimple_get_lhs (stmt)); - -diff --git a/gcc/tree-core.h b/gcc/tree-core.h -index 41d052949..26b6f46ad 100644 ---- a/gcc/tree-core.h -+++ b/gcc/tree-core.h -@@ -1791,6 +1791,17 @@ struct GTY(()) tree_decl_non_common { - tree result; - }; - -+/* Classify a special function declaration type. */ -+ -+enum function_decl_type -+{ -+ NONE, -+ OPERATOR_NEW, -+ LAMBDA_FUNCTION -+ -+ /* 0 values left */ -+}; -+ - /* FUNCTION_DECL inherits from DECL_NON_COMMON because of the use of the - arguments/result/saved_tree fields by front ends. It was either inherit - FUNCTION_DECL from non_common, or inherit non_common from FUNCTION_DECL, -@@ -1815,34 +1826,32 @@ struct GTY(()) tree_function_decl { - /* Index within a virtual table. */ - tree vindex; - -- /* In a FUNCTION_DECL for which DECL_BUILT_IN holds, this is -- DECL_FUNCTION_CODE. Otherwise unused. -- ??? The bitfield needs to be able to hold all target function -- codes as well. */ -- ENUM_BITFIELD(built_in_function) function_code : 12; -- ENUM_BITFIELD(built_in_class) built_in_class : 2; -+ /* In a FUNCTION_DECL this is DECL_UNCHECKED_FUNCTION_CODE. */ -+ unsigned int function_code; - -+ ENUM_BITFIELD(built_in_class) built_in_class : 2; - unsigned static_ctor_flag : 1; - unsigned static_dtor_flag : 1; -- - unsigned uninlinable : 1; - unsigned possibly_inlined : 1; - unsigned novops_flag : 1; - unsigned returns_twice_flag : 1; -+ - unsigned malloc_flag : 1; -- unsigned operator_new_flag : 1; - unsigned declared_inline_flag : 1; - unsigned no_inline_warning_flag : 1; -- - unsigned no_instrument_function_entry_exit : 1; - unsigned no_limit_stack : 1; - unsigned disregard_inline_limits : 1; - unsigned pure_flag : 1; - unsigned looping_const_or_pure_flag : 1; -+ -+ /* Align the bitfield to boundary of a byte. */ -+ ENUM_BITFIELD(function_decl_type) decl_type: 2; - unsigned has_debug_args_flag : 1; - unsigned versioned_function : 1; -- unsigned lambda_function: 1; -- /* No bits left. */ -+ -+ /* 12 bits left for future expansion. */ - }; - - struct GTY(()) tree_translation_unit_decl { -diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c -index ac81e10a3..38ebe4092 100644 ---- a/gcc/tree-if-conv.c -+++ b/gcc/tree-if-conv.c -@@ -2142,9 +2142,7 @@ predicate_load_or_store (gimple_stmt_iterator *gsi, gassign *stmt, tree mask) - new_stmt - = gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr, - mask, rhs); -- gimple_set_vuse (new_stmt, gimple_vuse (stmt)); -- gimple_set_vdef (new_stmt, gimple_vdef (stmt)); -- SSA_NAME_DEF_STMT (gimple_vdef (new_stmt)) = new_stmt; -+ gimple_move_vops (new_stmt, stmt); - } - gimple_call_set_nothrow (new_stmt, true); - return new_stmt; -diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c -index 1110089fa..784ab48c1 100644 ---- a/gcc/tree-inline.c -+++ b/gcc/tree-inline.c -@@ -4585,7 +4585,7 @@ expand_call_inline (basic_block bb, gimple *stmt, copy_body_data *id, - /* PR 20090218-1_0.c. Body can be provided by another module. */ - && (reason != CIF_BODY_NOT_AVAILABLE || !flag_generate_lto)) - { -- error ("inlining failed in call to always_inline %q+F: %s", fn, -+ error ("inlining failed in call to % %q+F: %s", fn, - cgraph_inline_failed_string (reason)); - if (gimple_location (stmt) != UNKNOWN_LOCATION) - inform (gimple_location (stmt), "called from here"); -@@ -4834,7 +4834,7 @@ expand_call_inline (basic_block bb, gimple *stmt, copy_body_data *id, - we may get confused if the compiler sees that the inlined new - function returns a pointer which was just deleted. See bug - 33407. */ -- if (DECL_IS_OPERATOR_NEW (fn)) -+ if (DECL_IS_OPERATOR_NEW_P (fn)) - { - return_slot = NULL; - modify_dest = NULL; -diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h -index 8741a9a49..1321a92c4 100644 ---- a/gcc/tree-pass.h -+++ b/gcc/tree-pass.h -@@ -584,8 +584,6 @@ extern rtl_opt_pass *make_pass_value_profile_transformations (gcc::context - extern rtl_opt_pass *make_pass_postreload_cse (gcc::context *ctxt); - extern rtl_opt_pass *make_pass_gcse2 (gcc::context *ctxt); - extern rtl_opt_pass *make_pass_split_after_reload (gcc::context *ctxt); --extern rtl_opt_pass *make_pass_branch_target_load_optimize1 (gcc::context -- *ctxt); - extern rtl_opt_pass *make_pass_thread_prologue_and_epilogue (gcc::context - *ctxt); - extern rtl_opt_pass *make_pass_stack_adjustments (gcc::context *ctxt); -@@ -595,8 +593,6 @@ extern rtl_opt_pass *make_pass_if_after_reload (gcc::context *ctxt); - extern rtl_opt_pass *make_pass_regrename (gcc::context *ctxt); - extern rtl_opt_pass *make_pass_cprop_hardreg (gcc::context *ctxt); - extern rtl_opt_pass *make_pass_reorder_blocks (gcc::context *ctxt); --extern rtl_opt_pass *make_pass_branch_target_load_optimize2 (gcc::context -- *ctxt); - extern rtl_opt_pass *make_pass_leaf_regs (gcc::context *ctxt); - extern rtl_opt_pass *make_pass_split_before_sched2 (gcc::context *ctxt); - extern rtl_opt_pass *make_pass_compare_elim_after_reload (gcc::context *ctxt); -diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c -index 8e4baf013..c36bf96ef 100644 ---- a/gcc/tree-sra.c -+++ b/gcc/tree-sra.c -@@ -106,6 +106,7 @@ along with GCC; see the file COPYING3. If not see - #include "ipa-utils.h" - #include "builtins.h" - -+ - /* Enumeration of all aggregate reductions we can do. */ - enum sra_mode { SRA_MODE_EARLY_IPA, /* early call regularization */ - SRA_MODE_EARLY_INTRA, /* early intraprocedural SRA */ -@@ -220,8 +221,11 @@ struct access - is not propagated in the access tree in any direction. */ - unsigned grp_scalar_write : 1; - -- /* Is this access an artificial one created to scalarize some record -- entirely? */ -+ /* In a root of an access tree, true means that the entire tree should be -+ totally scalarized - that all scalar leafs should be scalarized and -+ non-root grp_total_scalarization accesses should be honored. Otherwise, -+ non-root accesses with grp_total_scalarization should never get scalar -+ replacements. */ - unsigned grp_total_scalarization : 1; - - /* Other passes of the analysis use this bit to make function -@@ -242,6 +246,10 @@ struct access - access tree. */ - unsigned grp_unscalarized_data : 1; - -+ /* Set if all accesses in the group consist of the same chain of -+ COMPONENT_REFs and ARRAY_REFs. */ -+ unsigned grp_same_access_path : 1; -+ - /* Does this access and/or group contain a write access through a - BIT_FIELD_REF? */ - unsigned grp_partial_lhs : 1; -@@ -443,16 +451,18 @@ dump_access (FILE *f, struct access *access, bool grp) - "grp_scalar_write = %d, grp_total_scalarization = %d, " - "grp_hint = %d, grp_covered = %d, " - "grp_unscalarizable_region = %d, grp_unscalarized_data = %d, " -- "grp_partial_lhs = %d, grp_to_be_replaced = %d, " -- "grp_to_be_debug_replaced = %d, grp_maybe_modified = %d, " -+ "grp_same_access_path = %d, grp_partial_lhs = %d, " -+ "grp_to_be_replaced = %d, grp_to_be_debug_replaced = %d, " -+ "grp_maybe_modified = %d, " - "grp_not_necessarilly_dereferenced = %d\n", - access->grp_read, access->grp_write, access->grp_assignment_read, - access->grp_assignment_write, access->grp_scalar_read, - access->grp_scalar_write, access->grp_total_scalarization, - access->grp_hint, access->grp_covered, - access->grp_unscalarizable_region, access->grp_unscalarized_data, -- access->grp_partial_lhs, access->grp_to_be_replaced, -- access->grp_to_be_debug_replaced, access->grp_maybe_modified, -+ access->grp_same_access_path, access->grp_partial_lhs, -+ access->grp_to_be_replaced, access->grp_to_be_debug_replaced, -+ access->grp_maybe_modified, - access->grp_not_necessarilly_dereferenced); - else - fprintf (f, ", write = %d, grp_total_scalarization = %d, " -@@ -540,6 +550,15 @@ find_access_in_subtree (struct access *access, HOST_WIDE_INT offset, - access = child; - } - -+ /* Total scalarization does not replace single field structures with their -+ single field but rather creates an access for them underneath. Look for -+ it. */ -+ if (access) -+ while (access->first_child -+ && access->first_child->offset == offset -+ && access->first_child->size == size) -+ access = access->first_child; -+ - return access; - } - -@@ -971,7 +990,8 @@ create_access (tree expr, gimple *stmt, bool write) - static bool - scalarizable_type_p (tree type, bool const_decl) - { -- gcc_assert (!is_gimple_reg_type (type)); -+ if (is_gimple_reg_type (type)) -+ return true; - if (type_contains_placeholder_p (type)) - return false; - -@@ -986,8 +1006,7 @@ scalarizable_type_p (tree type, bool const_decl) - if (DECL_BIT_FIELD (fld)) - return false; - -- if (!is_gimple_reg_type (ft) -- && !scalarizable_type_p (ft, const_decl)) -+ if (!scalarizable_type_p (ft, const_decl)) - return false; - } - -@@ -1017,8 +1036,7 @@ scalarizable_type_p (tree type, bool const_decl) - return false; - - tree elem = TREE_TYPE (type); -- if (!is_gimple_reg_type (elem) -- && !scalarizable_type_p (elem, const_decl)) -+ if (!scalarizable_type_p (elem, const_decl)) - return false; - return true; - } -@@ -1027,114 +1045,6 @@ scalarizable_type_p (tree type, bool const_decl) - } - } - --static void scalarize_elem (tree, HOST_WIDE_INT, HOST_WIDE_INT, bool, tree, tree); -- --/* Create total_scalarization accesses for all scalar fields of a member -- of type DECL_TYPE conforming to scalarizable_type_p. BASE -- must be the top-most VAR_DECL representing the variable; within that, -- OFFSET locates the member and REF must be the memory reference expression for -- the member. */ -- --static void --completely_scalarize (tree base, tree decl_type, HOST_WIDE_INT offset, tree ref) --{ -- switch (TREE_CODE (decl_type)) -- { -- case RECORD_TYPE: -- for (tree fld = TYPE_FIELDS (decl_type); fld; fld = DECL_CHAIN (fld)) -- if (TREE_CODE (fld) == FIELD_DECL) -- { -- HOST_WIDE_INT pos = offset + int_bit_position (fld); -- tree ft = TREE_TYPE (fld); -- tree nref = build3 (COMPONENT_REF, ft, ref, fld, NULL_TREE); -- -- scalarize_elem (base, pos, tree_to_uhwi (DECL_SIZE (fld)), -- TYPE_REVERSE_STORAGE_ORDER (decl_type), -- nref, ft); -- } -- break; -- case ARRAY_TYPE: -- { -- tree elemtype = TREE_TYPE (decl_type); -- tree elem_size = TYPE_SIZE (elemtype); -- gcc_assert (elem_size && tree_fits_shwi_p (elem_size)); -- HOST_WIDE_INT el_size = tree_to_shwi (elem_size); -- gcc_assert (el_size > 0); -- -- tree minidx = TYPE_MIN_VALUE (TYPE_DOMAIN (decl_type)); -- gcc_assert (TREE_CODE (minidx) == INTEGER_CST); -- tree maxidx = TYPE_MAX_VALUE (TYPE_DOMAIN (decl_type)); -- /* Skip (some) zero-length arrays; others have MAXIDX == MINIDX - 1. */ -- if (maxidx) -- { -- gcc_assert (TREE_CODE (maxidx) == INTEGER_CST); -- tree domain = TYPE_DOMAIN (decl_type); -- /* MINIDX and MAXIDX are inclusive, and must be interpreted in -- DOMAIN (e.g. signed int, whereas min/max may be size_int). */ -- offset_int idx = wi::to_offset (minidx); -- offset_int max = wi::to_offset (maxidx); -- if (!TYPE_UNSIGNED (domain)) -- { -- idx = wi::sext (idx, TYPE_PRECISION (domain)); -- max = wi::sext (max, TYPE_PRECISION (domain)); -- } -- for (int el_off = offset; idx <= max; ++idx) -- { -- tree nref = build4 (ARRAY_REF, elemtype, -- ref, -- wide_int_to_tree (domain, idx), -- NULL_TREE, NULL_TREE); -- scalarize_elem (base, el_off, el_size, -- TYPE_REVERSE_STORAGE_ORDER (decl_type), -- nref, elemtype); -- el_off += el_size; -- } -- } -- } -- break; -- default: -- gcc_unreachable (); -- } --} -- --/* Create total_scalarization accesses for a member of type TYPE, which must -- satisfy either is_gimple_reg_type or scalarizable_type_p. BASE must be the -- top-most VAR_DECL representing the variable; within that, POS and SIZE locate -- the member, REVERSE gives its torage order. and REF must be the reference -- expression for it. */ -- --static void --scalarize_elem (tree base, HOST_WIDE_INT pos, HOST_WIDE_INT size, bool reverse, -- tree ref, tree type) --{ -- if (is_gimple_reg_type (type)) -- { -- struct access *access = create_access_1 (base, pos, size); -- access->expr = ref; -- access->type = type; -- access->grp_total_scalarization = 1; -- access->reverse = reverse; -- /* Accesses for intraprocedural SRA can have their stmt NULL. */ -- } -- else -- completely_scalarize (base, type, pos, ref); --} -- --/* Create a total_scalarization access for VAR as a whole. VAR must be of a -- RECORD_TYPE or ARRAY_TYPE conforming to scalarizable_type_p. */ -- --static void --create_total_scalarization_access (tree var) --{ -- HOST_WIDE_INT size = tree_to_uhwi (DECL_SIZE (var)); -- struct access *access; -- -- access = create_access_1 (var, 0, size); -- access->expr = var; -- access->type = TREE_TYPE (var); -- access->grp_total_scalarization = 1; --} -- - /* Return true if REF has an VIEW_CONVERT_EXPR somewhere in it. */ - - static inline bool -@@ -1795,6 +1705,30 @@ build_ref_for_offset (location_t loc, tree base, poly_int64 offset, - return mem_ref; - } - -+/* Construct and return a memory reference that is equal to a portion of -+ MODEL->expr but is based on BASE. If this cannot be done, return NULL. */ -+ -+static tree -+build_reconstructed_reference (location_t, tree base, struct access *model) -+{ -+ tree expr = model->expr, prev_expr = NULL; -+ while (!types_compatible_p (TREE_TYPE (expr), TREE_TYPE (base))) -+ { -+ if (!handled_component_p (expr)) -+ return NULL; -+ prev_expr = expr; -+ expr = TREE_OPERAND (expr, 0); -+ } -+ -+ if (get_object_alignment (base) < get_object_alignment (expr)) -+ return NULL; -+ -+ TREE_OPERAND (prev_expr, 0) = base; -+ tree ref = unshare_expr (model->expr); -+ TREE_OPERAND (prev_expr, 0) = expr; -+ return ref; -+} -+ - /* Construct a memory reference to a part of an aggregate BASE at the given - OFFSET and of the same type as MODEL. In case this is a reference to a - bit-field, the function will replicate the last component_ref of model's -@@ -1822,9 +1756,19 @@ build_ref_for_model (location_t loc, tree base, HOST_WIDE_INT offset, - NULL_TREE); - } - else -- return -- build_ref_for_offset (loc, base, offset, model->reverse, model->type, -- gsi, insert_after); -+ { -+ tree res; -+ if (model->grp_same_access_path -+ && !TREE_THIS_VOLATILE (base) -+ && offset <= model->offset -+ /* build_reconstructed_reference can still fail if we have already -+ massaged BASE because of another type incompatibility. */ -+ && (res = build_reconstructed_reference (loc, base, model))) -+ return res; -+ else -+ return build_ref_for_offset (loc, base, offset, model->reverse, -+ model->type, gsi, insert_after); -+ } - } - - /* Attempt to build a memory reference that we could but into a gimple -@@ -2076,6 +2020,69 @@ find_var_candidates (void) - return ret; - } - -+/* Return true if EXP is a reference chain of COMPONENT_REFs and AREAY_REFs -+ ending either with a DECL or a MEM_REF with zero offset. */ -+ -+static bool -+path_comparable_for_same_access (tree expr) -+{ -+ while (handled_component_p (expr)) -+ { -+ if (TREE_CODE (expr) == ARRAY_REF) -+ { -+ /* SSA name indices can occur here too when the array is of sie one. -+ But we cannot just re-use array_refs with SSA names elsewhere in -+ the function, so disallow non-constant indices. TODO: Remove this -+ limitation after teaching build_reconstructed_reference to replace -+ the index with the index type lower bound. */ -+ if (TREE_CODE (TREE_OPERAND (expr, 1)) != INTEGER_CST) -+ return false; -+ } -+ expr = TREE_OPERAND (expr, 0); -+ } -+ -+ if (TREE_CODE (expr) == MEM_REF) -+ { -+ if (!zerop (TREE_OPERAND (expr, 1))) -+ return false; -+ } -+ else -+ gcc_assert (DECL_P (expr)); -+ -+ return true; -+} -+ -+/* Assuming that EXP1 consists of only COMPONENT_REFs and ARRAY_REFs, return -+ true if the chain of these handled components are exactly the same as EXP2 -+ and the expression under them is the same DECL or an equivalent MEM_REF. -+ The reference picked by compare_access_positions must go to EXP1. */ -+ -+static bool -+same_access_path_p (tree exp1, tree exp2) -+{ -+ if (TREE_CODE (exp1) != TREE_CODE (exp2)) -+ { -+ /* Special case single-field structures loaded sometimes as the field -+ and sometimes as the structure. If the field is of a scalar type, -+ compare_access_positions will put it into exp1. -+ -+ TODO: The gimple register type condition can be removed if teach -+ compare_access_positions to put inner types first. */ -+ if (is_gimple_reg_type (TREE_TYPE (exp1)) -+ && TREE_CODE (exp1) == COMPONENT_REF -+ && (TYPE_MAIN_VARIANT (TREE_TYPE (TREE_OPERAND (exp1, 0))) -+ == TYPE_MAIN_VARIANT (TREE_TYPE (exp2)))) -+ exp1 = TREE_OPERAND (exp1, 0); -+ else -+ return false; -+ } -+ -+ if (!operand_equal_p (exp1, exp2, OEP_ADDRESS_OF)) -+ return false; -+ -+ return true; -+} -+ - /* Sort all accesses for the given variable, check for partial overlaps and - return NULL if there are any. If there are none, pick a representative for - each combination of offset and size and create a linked list out of them. -@@ -2112,10 +2119,10 @@ sort_and_splice_var_accesses (tree var) - bool grp_assignment_read = access->grp_assignment_read; - bool grp_assignment_write = access->grp_assignment_write; - bool multiple_scalar_reads = false; -- bool total_scalarization = access->grp_total_scalarization; - bool grp_partial_lhs = access->grp_partial_lhs; - bool first_scalar = is_gimple_reg_type (access->type); - bool unscalarizable_region = access->grp_unscalarizable_region; -+ bool grp_same_access_path = true; - bool bf_non_full_precision - = (INTEGRAL_TYPE_P (access->type) - && TYPE_PRECISION (access->type) != access->size -@@ -2134,6 +2141,8 @@ sort_and_splice_var_accesses (tree var) - gcc_assert (access->offset >= low - && access->offset + access->size <= high); - -+ grp_same_access_path = path_comparable_for_same_access (access->expr); -+ - j = i + 1; - while (j < access_count) - { -@@ -2161,7 +2170,6 @@ sort_and_splice_var_accesses (tree var) - grp_assignment_write |= ac2->grp_assignment_write; - grp_partial_lhs |= ac2->grp_partial_lhs; - unscalarizable_region |= ac2->grp_unscalarizable_region; -- total_scalarization |= ac2->grp_total_scalarization; - relink_to_new_repr (access, ac2); - - /* If there are both aggregate-type and scalar-type accesses with -@@ -2184,6 +2192,11 @@ sort_and_splice_var_accesses (tree var) - } - unscalarizable_region = true; - } -+ -+ if (grp_same_access_path -+ && !same_access_path_p (access->expr, ac2->expr)) -+ grp_same_access_path = false; -+ - ac2->group_representative = access; - j++; - } -@@ -2197,11 +2210,10 @@ sort_and_splice_var_accesses (tree var) - access->grp_scalar_write = grp_scalar_write; - access->grp_assignment_read = grp_assignment_read; - access->grp_assignment_write = grp_assignment_write; -- access->grp_hint = total_scalarization -- || (multiple_scalar_reads && !constant_decl_p (var)); -- access->grp_total_scalarization = total_scalarization; -+ access->grp_hint = multiple_scalar_reads && !constant_decl_p (var); - access->grp_partial_lhs = grp_partial_lhs; - access->grp_unscalarizable_region = unscalarizable_region; -+ access->grp_same_access_path = grp_same_access_path; - - *prev_acc_ptr = access; - prev_acc_ptr = &access->next_grp; -@@ -2395,6 +2407,88 @@ build_access_trees (struct access *access) - return true; - } - -+/* Traverse the access forest where ROOT is the first root and verify that -+ various important invariants hold true. */ -+ -+DEBUG_FUNCTION void -+verify_sra_access_forest (struct access *root) -+{ -+ struct access *access = root; -+ tree first_base = root->base; -+ gcc_assert (DECL_P (first_base)); -+ do -+ { -+ gcc_assert (access->base == first_base); -+ if (access->parent) -+ gcc_assert (access->offset >= access->parent->offset -+ && access->size <= access->parent->size); -+ if (access->next_sibling) -+ gcc_assert (access->next_sibling->offset -+ >= access->offset + access->size); -+ -+ poly_int64 poffset, psize, pmax_size; -+ bool reverse; -+ tree base = get_ref_base_and_extent (access->expr, &poffset, &psize, -+ &pmax_size, &reverse); -+ HOST_WIDE_INT offset, size, max_size; -+ if (!poffset.is_constant (&offset) -+ || !psize.is_constant (&size) -+ || !pmax_size.is_constant (&max_size)) -+ gcc_unreachable (); -+ gcc_assert (base == first_base); -+ gcc_assert (offset == access->offset); -+ gcc_assert (access->grp_unscalarizable_region -+ || size == max_size); -+ gcc_assert (max_size == access->size); -+ gcc_assert (reverse == access->reverse); -+ -+ if (access->first_child) -+ { -+ gcc_assert (access->first_child->parent == access); -+ access = access->first_child; -+ } -+ else if (access->next_sibling) -+ { -+ gcc_assert (access->next_sibling->parent == access->parent); -+ access = access->next_sibling; -+ } -+ else -+ { -+ while (access->parent && !access->next_sibling) -+ access = access->parent; -+ if (access->next_sibling) -+ access = access->next_sibling; -+ else -+ { -+ gcc_assert (access == root); -+ root = root->next_grp; -+ access = root; -+ } -+ } -+ } -+ while (access); -+} -+ -+/* Verify access forests of all candidates with accesses by calling -+ verify_access_forest on each on them. */ -+ -+DEBUG_FUNCTION void -+verify_all_sra_access_forests (void) -+{ -+ bitmap_iterator bi; -+ unsigned i; -+ EXECUTE_IF_SET_IN_BITMAP (candidate_bitmap, 0, i, bi) -+ { -+ tree var = candidate (i); -+ struct access *access = get_first_repr_for_decl (var); -+ if (access) -+ { -+ gcc_assert (access->base == var); -+ verify_sra_access_forest (access); -+ } -+ } -+} -+ - /* Return true if expr contains some ARRAY_REFs into a variable bounded - array. */ - -@@ -2412,15 +2506,16 @@ expr_with_var_bounded_array_refs_p (tree expr) - } - - /* Analyze the subtree of accesses rooted in ROOT, scheduling replacements when -- both seeming beneficial and when ALLOW_REPLACEMENTS allows it. Also set all -- sorts of access flags appropriately along the way, notably always set -- grp_read and grp_assign_read according to MARK_READ and grp_write when -- MARK_WRITE is true. -+ both seeming beneficial and when ALLOW_REPLACEMENTS allows it. If TOTALLY -+ is set, we are totally scalarizing the aggregate. Also set all sorts of -+ access flags appropriately along the way, notably always set grp_read and -+ grp_assign_read according to MARK_READ and grp_write when MARK_WRITE is -+ true. - - Creating a replacement for a scalar access is considered beneficial if its -- grp_hint is set (this means we are either attempting total scalarization or -- there is more than one direct read access) or according to the following -- table: -+ grp_hint ot TOTALLY is set (this means either that there is more than one -+ direct read access or that we are attempting total scalarization) or -+ according to the following table: - - Access written to through a scalar type (once or more times) - | -@@ -2451,7 +2546,7 @@ expr_with_var_bounded_array_refs_p (tree expr) - - static bool - analyze_access_subtree (struct access *root, struct access *parent, -- bool allow_replacements) -+ bool allow_replacements, bool totally) - { - struct access *child; - HOST_WIDE_INT limit = root->offset + root->size; -@@ -2469,8 +2564,8 @@ analyze_access_subtree (struct access *root, struct access *parent, - root->grp_write = 1; - if (parent->grp_assignment_write) - root->grp_assignment_write = 1; -- if (parent->grp_total_scalarization) -- root->grp_total_scalarization = 1; -+ if (!parent->grp_same_access_path) -+ root->grp_same_access_path = 0; - } - - if (root->grp_unscalarizable_region) -@@ -2483,10 +2578,10 @@ analyze_access_subtree (struct access *root, struct access *parent, - { - hole |= covered_to < child->offset; - sth_created |= analyze_access_subtree (child, root, -- allow_replacements && !scalar); -+ allow_replacements && !scalar, -+ totally); - - root->grp_unscalarized_data |= child->grp_unscalarized_data; -- root->grp_total_scalarization &= child->grp_total_scalarization; - if (child->grp_covered) - covered_to += child->size; - else -@@ -2494,7 +2589,9 @@ analyze_access_subtree (struct access *root, struct access *parent, - } - - if (allow_replacements && scalar && !root->first_child -- && (root->grp_hint -+ && (totally || !root->grp_total_scalarization) -+ && (totally -+ || root->grp_hint - || ((root->grp_scalar_read || root->grp_assignment_read) - && (root->grp_scalar_write || root->grp_assignment_write)))) - { -@@ -2536,6 +2633,7 @@ analyze_access_subtree (struct access *root, struct access *parent, - { - if (allow_replacements - && scalar && !root->first_child -+ && !root->grp_total_scalarization - && (root->grp_scalar_write || root->grp_assignment_write) - && !bitmap_bit_p (cannot_scalarize_away_bitmap, - DECL_UID (root->base))) -@@ -2556,7 +2654,7 @@ analyze_access_subtree (struct access *root, struct access *parent, - root->grp_total_scalarization = 0; - } - -- if (!hole || root->grp_total_scalarization) -+ if (!hole || totally) - root->grp_covered = 1; - else if (root->grp_write || comes_initialized_p (root->base)) - root->grp_unscalarized_data = 1; /* not covered and written to */ -@@ -2572,7 +2670,8 @@ analyze_access_trees (struct access *access) - - while (access) - { -- if (analyze_access_subtree (access, NULL, true)) -+ if (analyze_access_subtree (access, NULL, true, -+ access->grp_total_scalarization)) - ret = true; - access = access->next_grp; - } -@@ -2638,6 +2737,7 @@ create_artificial_child_access (struct access *parent, struct access *model, - access->offset = new_offset; - access->size = model->size; - access->type = model->type; -+ access->parent = parent; - access->grp_write = set_grp_write; - access->grp_read = false; - access->reverse = model->reverse; -@@ -2721,13 +2821,17 @@ propagate_subaccesses_across_link (struct access *lacc, struct access *racc) - lacc->type = racc->type; - if (build_user_friendly_ref_for_offset (&t, TREE_TYPE (t), - lacc->offset, racc->type)) -- lacc->expr = t; -+ { -+ lacc->expr = t; -+ lacc->grp_same_access_path = true; -+ } - else - { - lacc->expr = build_ref_for_model (EXPR_LOCATION (lacc->base), - lacc->base, lacc->offset, - racc, NULL, false); - lacc->grp_no_warning = true; -+ lacc->grp_same_access_path = false; - } - } - return ret; -@@ -2840,6 +2944,369 @@ propagate_all_subaccesses (void) - } - } - -+/* Return true if the forest beginning with ROOT does not contain -+ unscalarizable regions or non-byte aligned accesses. */ -+ -+static bool -+can_totally_scalarize_forest_p (struct access *root) -+{ -+ struct access *access = root; -+ do -+ { -+ if (access->grp_unscalarizable_region -+ || (access->offset % BITS_PER_UNIT) != 0 -+ || (access->size % BITS_PER_UNIT) != 0 -+ || (is_gimple_reg_type (access->type) -+ && access->first_child)) -+ return false; -+ -+ if (access->first_child) -+ access = access->first_child; -+ else if (access->next_sibling) -+ access = access->next_sibling; -+ else -+ { -+ while (access->parent && !access->next_sibling) -+ access = access->parent; -+ if (access->next_sibling) -+ access = access->next_sibling; -+ else -+ { -+ gcc_assert (access == root); -+ root = root->next_grp; -+ access = root; -+ } -+ } -+ } -+ while (access); -+ return true; -+} -+ -+/* Create and return an ACCESS in PARENT spanning from POS with SIZE, TYPE and -+ reference EXPR for total scalarization purposes and mark it as such. Within -+ the children of PARENT, link it in between PTR and NEXT_SIBLING. */ -+ -+static struct access * -+create_total_scalarization_access (struct access *parent, HOST_WIDE_INT pos, -+ HOST_WIDE_INT size, tree type, tree expr, -+ struct access **ptr, -+ struct access *next_sibling) -+{ -+ struct access *access = access_pool.allocate (); -+ memset (access, 0, sizeof (struct access)); -+ access->base = parent->base; -+ access->offset = pos; -+ access->size = size; -+ access->expr = expr; -+ access->type = type; -+ access->parent = parent; -+ access->grp_write = parent->grp_write; -+ access->grp_total_scalarization = 1; -+ access->grp_hint = 1; -+ access->grp_same_access_path = path_comparable_for_same_access (expr); -+ access->reverse = reverse_storage_order_for_component_p (expr); -+ -+ access->next_sibling = next_sibling; -+ *ptr = access; -+ return access; -+} -+ -+/* Create and return an ACCESS in PARENT spanning from POS with SIZE, TYPE and -+ reference EXPR for total scalarization purposes and mark it as such, link it -+ at *PTR and reshape the tree so that those elements at *PTR and their -+ siblings which fall within the part described by POS and SIZE are moved to -+ be children of the new access. If a partial overlap is detected, return -+ NULL. */ -+ -+static struct access * -+create_total_access_and_reshape (struct access *parent, HOST_WIDE_INT pos, -+ HOST_WIDE_INT size, tree type, tree expr, -+ struct access **ptr) -+{ -+ struct access **p = ptr; -+ -+ while (*p && (*p)->offset < pos + size) -+ { -+ if ((*p)->offset + (*p)->size > pos + size) -+ return NULL; -+ p = &(*p)->next_sibling; -+ } -+ -+ struct access *next_child = *ptr; -+ struct access *new_acc -+ = create_total_scalarization_access (parent, pos, size, type, expr, -+ ptr, *p); -+ if (p != ptr) -+ { -+ new_acc->first_child = next_child; -+ *p = NULL; -+ for (struct access *a = next_child; a; a = a->next_sibling) -+ a->parent = new_acc; -+ } -+ return new_acc; -+} -+ -+static bool totally_scalarize_subtree (struct access *root); -+ -+/* Return true if INNER is either the same type as OUTER or if it is the type -+ of a record field in OUTER at offset zero, possibly in nested -+ sub-records. */ -+ -+static bool -+access_and_field_type_match_p (tree outer, tree inner) -+{ -+ if (TYPE_MAIN_VARIANT (outer) == TYPE_MAIN_VARIANT (inner)) -+ return true; -+ if (TREE_CODE (outer) != RECORD_TYPE) -+ return false; -+ tree fld = TYPE_FIELDS (outer); -+ while (fld) -+ { -+ if (TREE_CODE (fld) == FIELD_DECL) -+ { -+ if (!zerop (DECL_FIELD_OFFSET (fld))) -+ return false; -+ if (TYPE_MAIN_VARIANT (TREE_TYPE (fld)) == inner) -+ return true; -+ if (TREE_CODE (TREE_TYPE (fld)) == RECORD_TYPE) -+ fld = TYPE_FIELDS (TREE_TYPE (fld)); -+ else -+ return false; -+ } -+ else -+ fld = DECL_CHAIN (fld); -+ } -+ return false; -+} -+ -+/* Return type of total_should_skip_creating_access indicating whether a total -+ scalarization access for a field/element should be created, whether it -+ already exists or whether the entire total scalarization has to fail. */ -+ -+enum total_sra_field_state {TOTAL_FLD_CREATE, TOTAL_FLD_DONE, TOTAL_FLD_FAILED}; -+ -+/* Do all the necessary steps in total scalarization when the given aggregate -+ type has a TYPE at POS with the given SIZE should be put into PARENT and -+ when we have processed all its siblings with smaller offsets up until and -+ including LAST_SEEN_SIBLING (which can be NULL). -+ -+ If some further siblings are to be skipped, set *LAST_SEEN_SIBLING as -+ appropriate. Return TOTAL_FLD_CREATE id the caller should carry on with -+ creating a new access, TOTAL_FLD_DONE if access or accesses capable of -+ representing the described part of the aggregate for the purposes of total -+ scalarization already exist or TOTAL_FLD_FAILED if there is a problem which -+ prevents total scalarization from happening at all. */ -+ -+static enum total_sra_field_state -+total_should_skip_creating_access (struct access *parent, -+ struct access **last_seen_sibling, -+ tree type, HOST_WIDE_INT pos, -+ HOST_WIDE_INT size) -+{ -+ struct access *next_child; -+ if (!*last_seen_sibling) -+ next_child = parent->first_child; -+ else -+ next_child = (*last_seen_sibling)->next_sibling; -+ -+ /* First, traverse the chain of siblings until it points to an access with -+ offset at least equal to POS. Check all skipped accesses whether they -+ span the POS boundary and if so, return with a failure. */ -+ while (next_child && next_child->offset < pos) -+ { -+ if (next_child->offset + next_child->size > pos) -+ return TOTAL_FLD_FAILED; -+ *last_seen_sibling = next_child; -+ next_child = next_child->next_sibling; -+ } -+ -+ /* Now check whether next_child has exactly the right POS and SIZE and if so, -+ whether it can represent what we need and can be totally scalarized -+ itself. */ -+ if (next_child && next_child->offset == pos -+ && next_child->size == size) -+ { -+ if (!is_gimple_reg_type (next_child->type) -+ && (!access_and_field_type_match_p (type, next_child->type) -+ || !totally_scalarize_subtree (next_child))) -+ return TOTAL_FLD_FAILED; -+ -+ *last_seen_sibling = next_child; -+ return TOTAL_FLD_DONE; -+ } -+ -+ /* If the child we're looking at would partially overlap, we just cannot -+ totally scalarize. */ -+ if (next_child -+ && next_child->offset < pos + size -+ && next_child->offset + next_child->size > pos + size) -+ return TOTAL_FLD_FAILED; -+ -+ if (is_gimple_reg_type (type)) -+ { -+ /* We don't scalarize accesses that are children of other scalar type -+ accesses, so if we go on and create an access for a register type, -+ there should not be any pre-existing children. There are rare cases -+ where the requested type is a vector but we already have register -+ accesses for all its elements which is equally good. Detect that -+ situation or whether we need to bail out. */ -+ -+ HOST_WIDE_INT covered = pos; -+ bool skipping = false; -+ while (next_child -+ && next_child->offset + next_child->size <= pos + size) -+ { -+ if (next_child->offset != covered -+ || !is_gimple_reg_type (next_child->type)) -+ return TOTAL_FLD_FAILED; -+ -+ covered += next_child->size; -+ *last_seen_sibling = next_child; -+ next_child = next_child->next_sibling; -+ skipping = true; -+ } -+ -+ if (skipping) -+ { -+ if (covered != pos + size) -+ return TOTAL_FLD_FAILED; -+ else -+ return TOTAL_FLD_DONE; -+ } -+ } -+ -+ return TOTAL_FLD_CREATE; -+} -+ -+/* Go over sub-tree rooted in ROOT and attempt to create scalar accesses -+ spanning all uncovered areas covered by ROOT, return false if the attempt -+ failed. All created accesses will have grp_unscalarizable_region set (and -+ should be ignored if the function returns false). */ -+ -+static bool -+totally_scalarize_subtree (struct access *root) -+{ -+ gcc_checking_assert (!root->grp_unscalarizable_region); -+ gcc_checking_assert (!is_gimple_reg_type (root->type)); -+ -+ struct access *last_seen_sibling = NULL; -+ -+ switch (TREE_CODE (root->type)) -+ { -+ case RECORD_TYPE: -+ for (tree fld = TYPE_FIELDS (root->type); fld; fld = DECL_CHAIN (fld)) -+ if (TREE_CODE (fld) == FIELD_DECL) -+ { -+ tree ft = TREE_TYPE (fld); -+ HOST_WIDE_INT fsize = tree_to_uhwi (DECL_SIZE (fld)); -+ if (!fsize) -+ continue; -+ -+ HOST_WIDE_INT pos = root->offset + int_bit_position (fld); -+ enum total_sra_field_state -+ state = total_should_skip_creating_access (root, -+ &last_seen_sibling, -+ ft, pos, fsize); -+ switch (state) -+ { -+ case TOTAL_FLD_FAILED: -+ return false; -+ case TOTAL_FLD_DONE: -+ continue; -+ case TOTAL_FLD_CREATE: -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ -+ struct access **p = (last_seen_sibling -+ ? &last_seen_sibling->next_sibling -+ : &root->first_child); -+ tree nref = build3 (COMPONENT_REF, ft, root->expr, fld, NULL_TREE); -+ struct access *new_child -+ = create_total_access_and_reshape (root, pos, fsize, ft, nref, p); -+ if (!new_child) -+ return false; -+ -+ if (!is_gimple_reg_type (ft) -+ && !totally_scalarize_subtree (new_child)) -+ return false; -+ last_seen_sibling = new_child; -+ } -+ break; -+ case ARRAY_TYPE: -+ { -+ tree elemtype = TREE_TYPE (root->type); -+ tree elem_size = TYPE_SIZE (elemtype); -+ gcc_assert (elem_size && tree_fits_shwi_p (elem_size)); -+ HOST_WIDE_INT el_size = tree_to_shwi (elem_size); -+ gcc_assert (el_size > 0); -+ -+ tree minidx = TYPE_MIN_VALUE (TYPE_DOMAIN (root->type)); -+ gcc_assert (TREE_CODE (minidx) == INTEGER_CST); -+ tree maxidx = TYPE_MAX_VALUE (TYPE_DOMAIN (root->type)); -+ /* Skip (some) zero-length arrays; others have MAXIDX == MINIDX - 1. */ -+ if (!maxidx) -+ goto out; -+ gcc_assert (TREE_CODE (maxidx) == INTEGER_CST); -+ tree domain = TYPE_DOMAIN (root->type); -+ /* MINIDX and MAXIDX are inclusive, and must be interpreted in -+ DOMAIN (e.g. signed int, whereas min/max may be size_int). */ -+ offset_int idx = wi::to_offset (minidx); -+ offset_int max = wi::to_offset (maxidx); -+ if (!TYPE_UNSIGNED (domain)) -+ { -+ idx = wi::sext (idx, TYPE_PRECISION (domain)); -+ max = wi::sext (max, TYPE_PRECISION (domain)); -+ } -+ for (HOST_WIDE_INT pos = root->offset; -+ idx <= max; -+ pos += el_size, ++idx) -+ { -+ enum total_sra_field_state -+ state = total_should_skip_creating_access (root, -+ &last_seen_sibling, -+ elemtype, pos, -+ el_size); -+ switch (state) -+ { -+ case TOTAL_FLD_FAILED: -+ return false; -+ case TOTAL_FLD_DONE: -+ continue; -+ case TOTAL_FLD_CREATE: -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ -+ struct access **p = (last_seen_sibling -+ ? &last_seen_sibling->next_sibling -+ : &root->first_child); -+ tree nref = build4 (ARRAY_REF, elemtype, root->expr, -+ wide_int_to_tree (domain, idx), -+ NULL_TREE, NULL_TREE); -+ struct access *new_child -+ = create_total_access_and_reshape (root, pos, el_size, elemtype, -+ nref, p); -+ if (!new_child) -+ return false; -+ -+ if (!is_gimple_reg_type (elemtype) -+ && !totally_scalarize_subtree (new_child)) -+ return false; -+ last_seen_sibling = new_child; -+ } -+ } -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ -+ out: -+ return true; -+} -+ - /* Go through all accesses collected throughout the (intraprocedural) analysis - stage, exclude overlapping ones, identify representatives and build trees - out of them, making decisions about scalarization on the way. Return true -@@ -2852,8 +3319,22 @@ analyze_all_variable_accesses (void) - bitmap tmp = BITMAP_ALLOC (NULL); - bitmap_iterator bi; - unsigned i; -- bool optimize_speed_p = !optimize_function_for_size_p (cfun); - -+ bitmap_copy (tmp, candidate_bitmap); -+ EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi) -+ { -+ tree var = candidate (i); -+ struct access *access; -+ -+ access = sort_and_splice_var_accesses (var); -+ if (!access || !build_access_trees (access)) -+ disqualify_candidate (var, -+ "No or inhibitingly overlapping accesses."); -+ } -+ -+ propagate_all_subaccesses (); -+ -+ bool optimize_speed_p = !optimize_function_for_size_p (cfun); - enum compiler_param param = optimize_speed_p - ? PARAM_SRA_MAX_SCALARIZATION_SIZE_SPEED - : PARAM_SRA_MAX_SCALARIZATION_SIZE_SIZE; -@@ -2872,46 +3353,59 @@ analyze_all_variable_accesses (void) - && !bitmap_bit_p (cannot_scalarize_away_bitmap, i)) - { - tree var = candidate (i); -+ if (!VAR_P (var)) -+ continue; - -- if (VAR_P (var) && scalarizable_type_p (TREE_TYPE (var), -- constant_decl_p (var))) -+ if (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (var))) > max_scalarization_size) - { -- if (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (var))) -- <= max_scalarization_size) -- { -- create_total_scalarization_access (var); -- completely_scalarize (var, TREE_TYPE (var), 0, var); -- statistics_counter_event (cfun, -- "Totally-scalarized aggregates", 1); -- if (dump_file && (dump_flags & TDF_DETAILS)) -- { -- fprintf (dump_file, "Will attempt to totally scalarize "); -- print_generic_expr (dump_file, var); -- fprintf (dump_file, " (UID: %u): \n", DECL_UID (var)); -- } -- } -- else if (dump_file && (dump_flags & TDF_DETAILS)) -+ if (dump_file && (dump_flags & TDF_DETAILS)) - { - fprintf (dump_file, "Too big to totally scalarize: "); - print_generic_expr (dump_file, var); - fprintf (dump_file, " (UID: %u)\n", DECL_UID (var)); - } -+ continue; - } -- } - -- bitmap_copy (tmp, candidate_bitmap); -- EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi) -- { -- tree var = candidate (i); -- struct access *access; -+ bool all_types_ok = true; -+ for (struct access *access = get_first_repr_for_decl (var); -+ access; -+ access = access->next_grp) -+ if (!can_totally_scalarize_forest_p (access) -+ || !scalarizable_type_p (access->type, constant_decl_p (var))) -+ { -+ all_types_ok = false; -+ break; -+ } -+ if (!all_types_ok) -+ continue; - -- access = sort_and_splice_var_accesses (var); -- if (!access || !build_access_trees (access)) -- disqualify_candidate (var, -- "No or inhibitingly overlapping accesses."); -- } -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ { -+ fprintf (dump_file, "Will attempt to totally scalarize "); -+ print_generic_expr (dump_file, var); -+ fprintf (dump_file, " (UID: %u): \n", DECL_UID (var)); -+ } -+ bool scalarized = true; -+ for (struct access *access = get_first_repr_for_decl (var); -+ access; -+ access = access->next_grp) -+ if (!is_gimple_reg_type (access->type) -+ && !totally_scalarize_subtree (access)) -+ { -+ scalarized = false; -+ break; -+ } - -- propagate_all_subaccesses (); -+ if (scalarized) -+ for (struct access *access = get_first_repr_for_decl (var); -+ access; -+ access = access->next_grp) -+ access->grp_total_scalarization = true; -+ } -+ -+ if (flag_checking) -+ verify_all_sra_access_forests (); - - bitmap_copy (tmp, candidate_bitmap); - EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi) -@@ -3775,25 +4269,39 @@ initialize_constant_pool_replacements (void) - tree var = candidate (i); - if (!constant_decl_p (var)) - continue; -- vec *access_vec = get_base_access_vector (var); -- if (!access_vec) -- continue; -- for (unsigned i = 0; i < access_vec->length (); i++) -+ -+ struct access *access = get_first_repr_for_decl (var); -+ -+ while (access) - { -- struct access *access = (*access_vec)[i]; -- if (!access->replacement_decl) -- continue; -- gassign *stmt -- = gimple_build_assign (get_access_replacement (access), -- unshare_expr (access->expr)); -- if (dump_file && (dump_flags & TDF_DETAILS)) -+ if (access->replacement_decl) - { -- fprintf (dump_file, "Generating constant initializer: "); -- print_gimple_stmt (dump_file, stmt, 0); -- fprintf (dump_file, "\n"); -+ gassign *stmt -+ = gimple_build_assign (get_access_replacement (access), -+ unshare_expr (access->expr)); -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ { -+ fprintf (dump_file, "Generating constant initializer: "); -+ print_gimple_stmt (dump_file, stmt, 0); -+ fprintf (dump_file, "\n"); -+ } -+ gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); -+ update_stmt (stmt); -+ } -+ -+ if (access->first_child) -+ access = access->first_child; -+ else if (access->next_sibling) -+ access = access->next_sibling; -+ else -+ { -+ while (access->parent && !access->next_sibling) -+ access = access->parent; -+ if (access->next_sibling) -+ access = access->next_sibling; -+ else -+ access = access->next_grp; - } -- gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); -- update_stmt (stmt); - } - } - -diff --git a/gcc/tree-ssa-address.c b/gcc/tree-ssa-address.c -index 2e5d87734..3195a21c7 100644 ---- a/gcc/tree-ssa-address.c -+++ b/gcc/tree-ssa-address.c -@@ -1141,6 +1141,35 @@ maybe_fold_tmr (tree ref) - return new_ref; - } - -+/* Return the preferred index scale factor for accessing memory of mode -+ MEM_MODE in the address space of pointer BASE. Assume that we're -+ optimizing for speed if SPEED is true and for size otherwise. */ -+unsigned int -+preferred_mem_scale_factor (tree base, machine_mode mem_mode, -+ bool speed) -+{ -+ struct mem_address parts = {}; -+ addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (base)); -+ unsigned int fact = GET_MODE_UNIT_SIZE (mem_mode); -+ -+ /* Addressing mode "base + index". */ -+ parts.index = integer_one_node; -+ parts.base = integer_one_node; -+ rtx addr = addr_for_mem_ref (&parts, as, false); -+ unsigned cost = address_cost (addr, mem_mode, as, speed); -+ -+ /* Addressing mode "base + index << scale". */ -+ parts.step = wide_int_to_tree (sizetype, fact); -+ addr = addr_for_mem_ref (&parts, as, false); -+ unsigned new_cost = address_cost (addr, mem_mode, as, speed); -+ -+ /* Compare the cost of an address with an unscaled index with -+ a scaled index and return factor if useful. */ -+ if (new_cost < cost) -+ return GET_MODE_UNIT_SIZE (mem_mode); -+ return 1; -+} -+ - /* Dump PARTS to FILE. */ - - extern void dump_mem_address (FILE *, struct mem_address *); -diff --git a/gcc/tree-ssa-address.h b/gcc/tree-ssa-address.h -index 6fa4eae89..9812f36fb 100644 ---- a/gcc/tree-ssa-address.h -+++ b/gcc/tree-ssa-address.h -@@ -39,4 +39,7 @@ tree create_mem_ref (gimple_stmt_iterator *, tree, - extern void copy_ref_info (tree, tree); - tree maybe_fold_tmr (tree); - -+extern unsigned int preferred_mem_scale_factor (tree base, -+ machine_mode mem_mode, -+ bool speed); - #endif /* GCC_TREE_SSA_ADDRESS_H */ -diff --git a/gcc/tree-ssa-ccp.c b/gcc/tree-ssa-ccp.c -index 8db6a34e0..dbe2fda96 100644 ---- a/gcc/tree-ssa-ccp.c -+++ b/gcc/tree-ssa-ccp.c -@@ -614,9 +614,17 @@ get_value_for_expr (tree expr, bool for_bits_p) - val.mask = -1; - } - if (for_bits_p -- && val.lattice_val == CONSTANT -- && TREE_CODE (val.value) == ADDR_EXPR) -- val = get_value_from_alignment (val.value); -+ && val.lattice_val == CONSTANT) -+ { -+ if (TREE_CODE (val.value) == ADDR_EXPR) -+ val = get_value_from_alignment (val.value); -+ else if (TREE_CODE (val.value) != INTEGER_CST) -+ { -+ val.lattice_val = VARYING; -+ val.value = NULL_TREE; -+ val.mask = -1; -+ } -+ } - /* Fall back to a copy value. */ - if (!for_bits_p - && val.lattice_val == VARYING -@@ -2566,7 +2574,7 @@ optimize_stack_restore (gimple_stmt_iterator i) - || ALLOCA_FUNCTION_CODE_P (DECL_FUNCTION_CODE (callee))) - return NULL_TREE; - -- if (DECL_FUNCTION_CODE (callee) == BUILT_IN_STACK_RESTORE) -+ if (fndecl_built_in_p (callee, BUILT_IN_STACK_RESTORE)) - goto second_stack_restore; - } - -@@ -2625,9 +2633,6 @@ optimize_stdarg_builtin (gimple *call) - bool va_list_simple_ptr; - location_t loc = gimple_location (call); - -- if (gimple_code (call) != GIMPLE_CALL) -- return NULL_TREE; -- - callee = gimple_call_fndecl (call); - - cfun_va_list = targetm.fn_abi_va_list (callee); -@@ -2930,12 +2935,10 @@ optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip, - bit, flag); - gimple_call_set_lhs (g, new_lhs); - gimple_set_location (g, gimple_location (call)); -- gimple_set_vuse (g, gimple_vuse (call)); -- gimple_set_vdef (g, gimple_vdef (call)); -+ gimple_move_vops (g, call); - bool throws = stmt_can_throw_internal (cfun, call); - gimple_call_set_nothrow (as_a (g), - gimple_call_nothrow_p (as_a (call))); -- SSA_NAME_DEF_STMT (gimple_vdef (call)) = g; - gimple_stmt_iterator gsi = *gsip; - gsi_insert_after (&gsi, g, GSI_NEW_STMT); - edge e = NULL; -diff --git a/gcc/tree-ssa-dce.c b/gcc/tree-ssa-dce.c -index a38899edd..be9f501c9 100644 ---- a/gcc/tree-ssa-dce.c -+++ b/gcc/tree-ssa-dce.c -@@ -115,6 +115,14 @@ static bool cfg_altered; - static int *bb_postorder; - - -+/* True if we should treat any stmt with a vdef as necessary. */ -+ -+static inline bool -+keep_all_vdefs_p () -+{ -+ return optimize_debug; -+} -+ - /* If STMT is not already marked necessary, mark it, and add it to the - worklist if ADD_TO_WORKLIST is true. */ - -@@ -311,6 +319,12 @@ mark_stmt_if_obviously_necessary (gimple *stmt, bool aggressive) - return; - } - -+ if (gimple_vdef (stmt) && keep_all_vdefs_p ()) -+ { -+ mark_stmt_necessary (stmt, true); -+ return; -+ } -+ - return; - } - -@@ -526,6 +540,9 @@ mark_aliased_reaching_defs_necessary_1 (ao_ref *ref, tree vdef, void *data) - static void - mark_aliased_reaching_defs_necessary (gimple *stmt, tree ref) - { -+ /* Should have been caught before calling this function. */ -+ gcc_checking_assert (!keep_all_vdefs_p ()); -+ - unsigned int chain; - ao_ref refd; - gcc_assert (!chain_ovfl); -@@ -599,6 +616,8 @@ mark_all_reaching_defs_necessary_1 (ao_ref *ref ATTRIBUTE_UNUSED, - static void - mark_all_reaching_defs_necessary (gimple *stmt) - { -+ /* Should have been caught before calling this function. */ -+ gcc_checking_assert (!keep_all_vdefs_p ()); - walk_aliased_vdefs (NULL, gimple_vuse (stmt), - mark_all_reaching_defs_necessary_1, NULL, &visited); - } -@@ -798,6 +817,10 @@ propagate_necessity (bool aggressive) - if (!use) - continue; - -+ /* No need to search for vdefs if we intrinsicly keep them all. */ -+ if (keep_all_vdefs_p ()) -+ continue; -+ - /* If we dropped to simple mode make all immediately - reachable definitions necessary. */ - if (chain_ovfl) -diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c -index e753689a7..0d716062b 100644 ---- a/gcc/tree-ssa-forwprop.c -+++ b/gcc/tree-ssa-forwprop.c -@@ -2011,16 +2011,12 @@ get_bit_field_ref_def (tree val, enum tree_code &conv_code) - return NULL_TREE; - enum tree_code code = gimple_assign_rhs_code (def_stmt); - if (code == FLOAT_EXPR -- || code == FIX_TRUNC_EXPR) -+ || code == FIX_TRUNC_EXPR -+ || CONVERT_EXPR_CODE_P (code)) - { - tree op1 = gimple_assign_rhs1 (def_stmt); - if (conv_code == ERROR_MARK) -- { -- if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (val))), -- GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (op1))))) -- return NULL_TREE; -- conv_code = code; -- } -+ conv_code = code; - else if (conv_code != code) - return NULL_TREE; - if (TREE_CODE (op1) != SSA_NAME) -@@ -2041,109 +2037,213 @@ static bool - simplify_vector_constructor (gimple_stmt_iterator *gsi) - { - gimple *stmt = gsi_stmt (*gsi); -- tree op, op2, orig[2], type, elem_type; -+ tree op, orig[2], type, elem_type; - unsigned elem_size, i; - unsigned HOST_WIDE_INT nelts; -+ unsigned HOST_WIDE_INT refnelts; - enum tree_code conv_code; - constructor_elt *elt; - bool maybe_ident; - -- gcc_checking_assert (gimple_assign_rhs_code (stmt) == CONSTRUCTOR); -- - op = gimple_assign_rhs1 (stmt); - type = TREE_TYPE (op); -- gcc_checking_assert (TREE_CODE (type) == VECTOR_TYPE); -+ gcc_checking_assert (TREE_CODE (op) == CONSTRUCTOR -+ && TREE_CODE (type) == VECTOR_TYPE); - - if (!TYPE_VECTOR_SUBPARTS (type).is_constant (&nelts)) - return false; - elem_type = TREE_TYPE (type); - elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type)); - -- vec_perm_builder sel (nelts, nelts, 1); - orig[0] = NULL; - orig[1] = NULL; - conv_code = ERROR_MARK; - maybe_ident = true; - tree one_constant = NULL_TREE; -+ tree one_nonconstant = NULL_TREE; - auto_vec constants; - constants.safe_grow_cleared (nelts); -+ auto_vec, 64> elts; - FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (op), i, elt) - { - tree ref, op1; -+ unsigned int elem; - - if (i >= nelts) - return false; - -+ /* Look for elements extracted and possibly converted from -+ another vector. */ - op1 = get_bit_field_ref_def (elt->value, conv_code); -- if (op1) -+ if (op1 -+ && TREE_CODE ((ref = TREE_OPERAND (op1, 0))) == SSA_NAME -+ && VECTOR_TYPE_P (TREE_TYPE (ref)) -+ && useless_type_conversion_p (TREE_TYPE (op1), -+ TREE_TYPE (TREE_TYPE (ref))) -+ && constant_multiple_p (bit_field_offset (op1), -+ bit_field_size (op1), &elem) -+ && TYPE_VECTOR_SUBPARTS (TREE_TYPE (ref)).is_constant (&refnelts)) - { -- ref = TREE_OPERAND (op1, 0); - unsigned int j; - for (j = 0; j < 2; ++j) - { - if (!orig[j]) - { -- if (TREE_CODE (ref) != SSA_NAME) -- return false; -- if (! VECTOR_TYPE_P (TREE_TYPE (ref)) -- || ! useless_type_conversion_p (TREE_TYPE (op1), -- TREE_TYPE (TREE_TYPE (ref)))) -- return false; -- if (j && !useless_type_conversion_p (TREE_TYPE (orig[0]), -- TREE_TYPE (ref))) -- return false; -- orig[j] = ref; -- break; -+ if (j == 0 -+ || useless_type_conversion_p (TREE_TYPE (orig[0]), -+ TREE_TYPE (ref))) -+ break; - } - else if (ref == orig[j]) - break; - } -- if (j == 2) -- return false; -- -- unsigned int elt; -- if (maybe_ne (bit_field_size (op1), elem_size) -- || !constant_multiple_p (bit_field_offset (op1), elem_size, &elt)) -- return false; -- if (j) -- elt += nelts; -- if (elt != i) -- maybe_ident = false; -- sel.quick_push (elt); -+ /* Found a suitable vector element. */ -+ if (j < 2) -+ { -+ orig[j] = ref; -+ if (elem != i || j != 0) -+ maybe_ident = false; -+ elts.safe_push (std::make_pair (j, elem)); -+ continue; -+ } -+ /* Else fallthru. */ - } -- else if (CONSTANT_CLASS_P (elt->value)) -+ /* Handle elements not extracted from a vector. -+ 1. constants by permuting with constant vector -+ 2. a unique non-constant element by permuting with a splat vector */ -+ if (orig[1] -+ && orig[1] != error_mark_node) -+ return false; -+ orig[1] = error_mark_node; -+ if (CONSTANT_CLASS_P (elt->value)) - { -- if (orig[1] -- && orig[1] != error_mark_node) -+ if (one_nonconstant) - return false; -- orig[1] = error_mark_node; - if (!one_constant) - one_constant = elt->value; - constants[i] = elt->value; -- sel.quick_push (i + nelts); -- maybe_ident = false; - } - else -- return false; -+ { -+ if (one_constant) -+ return false; -+ if (!one_nonconstant) -+ one_nonconstant = elt->value; -+ else if (!operand_equal_p (one_nonconstant, elt->value, 0)) -+ return false; -+ } -+ elts.safe_push (std::make_pair (1, i)); -+ maybe_ident = false; - } - if (i < nelts) - return false; - -- if (! VECTOR_TYPE_P (TREE_TYPE (orig[0])) -- || maybe_ne (TYPE_VECTOR_SUBPARTS (type), -- TYPE_VECTOR_SUBPARTS (TREE_TYPE (orig[0])))) -+ if (! orig[0] -+ || ! VECTOR_TYPE_P (TREE_TYPE (orig[0]))) - return false; -- -- tree tem; -- if (conv_code != ERROR_MARK -- && (! supportable_convert_operation (conv_code, type, -- TREE_TYPE (orig[0]), -- &tem, &conv_code) -- || conv_code == CALL_EXPR)) -+ refnelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (orig[0])).to_constant (); -+ /* We currently do not handle larger destination vectors. */ -+ if (refnelts < nelts) - return false; - - if (maybe_ident) - { -+ tree conv_src_type -+ = (nelts != refnelts -+ ? (conv_code != ERROR_MARK -+ ? build_vector_type (TREE_TYPE (TREE_TYPE (orig[0])), nelts) -+ : type) -+ : TREE_TYPE (orig[0])); -+ if (conv_code != ERROR_MARK -+ && !supportable_convert_operation (conv_code, type, conv_src_type, -+ &conv_code)) -+ { -+ /* Only few targets implement direct conversion patterns so try -+ some simple special cases via VEC_[UN]PACK[_FLOAT]_LO_EXPR. */ -+ optab optab; -+ tree halfvectype, dblvectype; -+ if (CONVERT_EXPR_CODE_P (conv_code) -+ && (2 * TYPE_PRECISION (TREE_TYPE (TREE_TYPE (orig[0]))) -+ == TYPE_PRECISION (TREE_TYPE (type))) -+ && mode_for_vector (as_a -+ (TYPE_MODE (TREE_TYPE (TREE_TYPE (orig[0])))), -+ nelts * 2).exists () -+ && (dblvectype -+ = build_vector_type (TREE_TYPE (TREE_TYPE (orig[0])), -+ nelts * 2)) -+ && (optab = optab_for_tree_code (FLOAT_TYPE_P (TREE_TYPE (type)) -+ ? VEC_UNPACK_FLOAT_LO_EXPR -+ : VEC_UNPACK_LO_EXPR, -+ dblvectype, -+ optab_default)) -+ && (optab_handler (optab, TYPE_MODE (dblvectype)) -+ != CODE_FOR_nothing)) -+ { -+ gimple_seq stmts = NULL; -+ tree dbl; -+ if (refnelts == nelts) -+ { -+ /* ??? Paradoxical subregs don't exist, so insert into -+ the lower half of a wider zero vector. */ -+ dbl = gimple_build (&stmts, BIT_INSERT_EXPR, dblvectype, -+ build_zero_cst (dblvectype), orig[0], -+ bitsize_zero_node); -+ } -+ else if (refnelts == 2 * nelts) -+ dbl = orig[0]; -+ else -+ dbl = gimple_build (&stmts, BIT_FIELD_REF, dblvectype, -+ orig[0], TYPE_SIZE (dblvectype), -+ bitsize_zero_node); -+ gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); -+ gimple_assign_set_rhs_with_ops (gsi, -+ FLOAT_TYPE_P (TREE_TYPE (type)) -+ ? VEC_UNPACK_FLOAT_LO_EXPR -+ : VEC_UNPACK_LO_EXPR, -+ dbl); -+ } -+ else if (CONVERT_EXPR_CODE_P (conv_code) -+ && (TYPE_PRECISION (TREE_TYPE (TREE_TYPE (orig[0]))) -+ == 2 * TYPE_PRECISION (TREE_TYPE (type))) -+ && mode_for_vector (as_a -+ (TYPE_MODE -+ (TREE_TYPE (TREE_TYPE (orig[0])))), -+ nelts / 2).exists () -+ && (halfvectype -+ = build_vector_type (TREE_TYPE (TREE_TYPE (orig[0])), -+ nelts / 2)) -+ && (optab = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, -+ halfvectype, -+ optab_default)) -+ && (optab_handler (optab, TYPE_MODE (halfvectype)) -+ != CODE_FOR_nothing)) -+ { -+ gimple_seq stmts = NULL; -+ tree low = gimple_build (&stmts, BIT_FIELD_REF, halfvectype, -+ orig[0], TYPE_SIZE (halfvectype), -+ bitsize_zero_node); -+ tree hig = gimple_build (&stmts, BIT_FIELD_REF, halfvectype, -+ orig[0], TYPE_SIZE (halfvectype), -+ TYPE_SIZE (halfvectype)); -+ gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); -+ gimple_assign_set_rhs_with_ops (gsi, VEC_PACK_TRUNC_EXPR, -+ low, hig); -+ } -+ else -+ return false; -+ update_stmt (gsi_stmt (*gsi)); -+ return true; -+ } -+ if (nelts != refnelts) -+ { -+ gassign *lowpart -+ = gimple_build_assign (make_ssa_name (conv_src_type), -+ build3 (BIT_FIELD_REF, conv_src_type, -+ orig[0], TYPE_SIZE (conv_src_type), -+ bitsize_zero_node)); -+ gsi_insert_before (gsi, lowpart, GSI_SAME_STMT); -+ orig[0] = gimple_assign_lhs (lowpart); -+ } - if (conv_code == ERROR_MARK) - gimple_assign_set_rhs_from_tree (gsi, orig[0]); - else -@@ -2152,54 +2252,119 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi) - } - else - { -- tree mask_type; -+ tree mask_type, perm_type, conv_src_type; -+ perm_type = TREE_TYPE (orig[0]); -+ conv_src_type = (nelts == refnelts -+ ? perm_type -+ : build_vector_type (TREE_TYPE (perm_type), nelts)); -+ if (conv_code != ERROR_MARK -+ && !supportable_convert_operation (conv_code, type, conv_src_type, -+ &conv_code)) -+ return false; - -- vec_perm_indices indices (sel, orig[1] ? 2 : 1, nelts); -- if (!can_vec_perm_const_p (TYPE_MODE (type), indices)) -+ /* Now that we know the number of elements of the source build the -+ permute vector. -+ ??? When the second vector has constant values we can shuffle -+ it and its source indexes to make the permutation supported. -+ For now it mimics a blend. */ -+ vec_perm_builder sel (refnelts, refnelts, 1); -+ bool all_same_p = true; -+ for (i = 0; i < elts.length (); ++i) -+ { -+ sel.quick_push (elts[i].second + elts[i].first * refnelts); -+ all_same_p &= known_eq (sel[i], sel[0]); -+ } -+ /* And fill the tail with "something". It's really don't care, -+ and ideally we'd allow VEC_PERM to have a smaller destination -+ vector. As a heuristic: -+ -+ (a) if what we have so far duplicates a single element, make the -+ tail do the same -+ -+ (b) otherwise preserve a uniform orig[0]. This facilitates -+ later pattern-matching of VEC_PERM_EXPR to a BIT_INSERT_EXPR. */ -+ for (; i < refnelts; ++i) -+ sel.quick_push (all_same_p -+ ? sel[0] -+ : (elts[0].second == 0 && elts[0].first == 0 -+ ? 0 : refnelts) + i); -+ vec_perm_indices indices (sel, orig[1] ? 2 : 1, refnelts); -+ if (!can_vec_perm_const_p (TYPE_MODE (perm_type), indices)) - return false; - mask_type - = build_vector_type (build_nonstandard_integer_type (elem_size, 1), -- nelts); -+ refnelts); - if (GET_MODE_CLASS (TYPE_MODE (mask_type)) != MODE_VECTOR_INT - || maybe_ne (GET_MODE_SIZE (TYPE_MODE (mask_type)), -- GET_MODE_SIZE (TYPE_MODE (type)))) -+ GET_MODE_SIZE (TYPE_MODE (perm_type)))) - return false; -- op2 = vec_perm_indices_to_tree (mask_type, indices); -+ tree op2 = vec_perm_indices_to_tree (mask_type, indices); -+ bool converted_orig1 = false; -+ gimple_seq stmts = NULL; - if (!orig[1]) - orig[1] = orig[0]; -- if (orig[1] == error_mark_node) -+ else if (orig[1] == error_mark_node -+ && one_nonconstant) - { -- tree_vector_builder vec (type, nelts, 1); -- for (unsigned i = 0; i < nelts; ++i) -- if (constants[i]) -+ /* ??? We can see if we can safely convert to the original -+ element type. */ -+ converted_orig1 = conv_code != ERROR_MARK; -+ orig[1] = gimple_build_vector_from_val (&stmts, UNKNOWN_LOCATION, -+ converted_orig1 -+ ? type : perm_type, -+ one_nonconstant); -+ } -+ else if (orig[1] == error_mark_node) -+ { -+ /* ??? See if we can convert the vector to the original type. */ -+ converted_orig1 = conv_code != ERROR_MARK; -+ unsigned n = converted_orig1 ? nelts : refnelts; -+ tree_vector_builder vec (converted_orig1 -+ ? type : perm_type, n, 1); -+ for (unsigned i = 0; i < n; ++i) -+ if (i < nelts && constants[i]) - vec.quick_push (constants[i]); - else - /* ??? Push a don't-care value. */ - vec.quick_push (one_constant); - orig[1] = vec.build (); - } -- if (conv_code == ERROR_MARK) -- gimple_assign_set_rhs_with_ops (gsi, VEC_PERM_EXPR, orig[0], -- orig[1], op2); -- else if (TREE_CODE (orig[1]) == VECTOR_CST) -+ tree blend_op2 = NULL_TREE; -+ if (converted_orig1) - { -- gimple *conv -- = gimple_build_assign (make_ssa_name (type), conv_code, orig[0]); -- orig[0] = gimple_assign_lhs (conv); -- gsi_insert_before (gsi, conv, GSI_SAME_STMT); -- gimple_assign_set_rhs_with_ops (gsi, VEC_PERM_EXPR, -- orig[0], orig[1], op2); -- } -- else -- { -- gimple *perm -- = gimple_build_assign (make_ssa_name (TREE_TYPE (orig[0])), -- VEC_PERM_EXPR, orig[0], orig[1], op2); -- orig[0] = gimple_assign_lhs (perm); -- gsi_insert_before (gsi, perm, GSI_SAME_STMT); -- gimple_assign_set_rhs_with_ops (gsi, conv_code, orig[0], -- NULL_TREE, NULL_TREE); -+ /* Make sure we can do a blend in the target type. */ -+ vec_perm_builder sel (nelts, nelts, 1); -+ for (i = 0; i < elts.length (); ++i) -+ sel.quick_push (elts[i].first -+ ? elts[i].second + nelts : i); -+ vec_perm_indices indices (sel, 2, nelts); -+ if (!can_vec_perm_const_p (TYPE_MODE (type), indices)) -+ return false; -+ mask_type -+ = build_vector_type (build_nonstandard_integer_type (elem_size, 1), -+ nelts); -+ if (GET_MODE_CLASS (TYPE_MODE (mask_type)) != MODE_VECTOR_INT -+ || maybe_ne (GET_MODE_SIZE (TYPE_MODE (mask_type)), -+ GET_MODE_SIZE (TYPE_MODE (type)))) -+ return false; -+ blend_op2 = vec_perm_indices_to_tree (mask_type, indices); - } -+ tree orig1_for_perm -+ = converted_orig1 ? build_zero_cst (perm_type) : orig[1]; -+ tree res = gimple_build (&stmts, VEC_PERM_EXPR, perm_type, -+ orig[0], orig1_for_perm, op2); -+ if (nelts != refnelts) -+ res = gimple_build (&stmts, BIT_FIELD_REF, -+ conv_code != ERROR_MARK ? conv_src_type : type, -+ res, TYPE_SIZE (type), bitsize_zero_node); -+ if (conv_code != ERROR_MARK) -+ res = gimple_build (&stmts, conv_code, type, res); -+ /* Blend in the actual constant. */ -+ if (converted_orig1) -+ res = gimple_build (&stmts, VEC_PERM_EXPR, type, -+ res, orig[1], blend_op2); -+ gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); -+ gimple_assign_set_rhs_with_ops (gsi, SSA_NAME, res); - } - update_stmt (gsi_stmt (*gsi)); - return true; -@@ -2449,6 +2614,72 @@ pass_forwprop::execute (function *fun) - else - gsi_next (&gsi); - } -+ else if (TREE_CODE (TREE_TYPE (lhs)) == VECTOR_TYPE -+ && TYPE_MODE (TREE_TYPE (lhs)) == BLKmode -+ && gimple_assign_load_p (stmt) -+ && !gimple_has_volatile_ops (stmt) -+ && (TREE_CODE (gimple_assign_rhs1 (stmt)) -+ != TARGET_MEM_REF) -+ && !stmt_can_throw_internal (cfun, stmt)) -+ { -+ /* Rewrite loads used only in BIT_FIELD_REF extractions to -+ component-wise loads. */ -+ use_operand_p use_p; -+ imm_use_iterator iter; -+ bool rewrite = true; -+ FOR_EACH_IMM_USE_FAST (use_p, iter, lhs) -+ { -+ gimple *use_stmt = USE_STMT (use_p); -+ if (is_gimple_debug (use_stmt)) -+ continue; -+ if (!is_gimple_assign (use_stmt) -+ || gimple_assign_rhs_code (use_stmt) != BIT_FIELD_REF) -+ { -+ rewrite = false; -+ break; -+ } -+ } -+ if (rewrite) -+ { -+ gimple *use_stmt; -+ FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs) -+ { -+ if (is_gimple_debug (use_stmt)) -+ { -+ if (gimple_debug_bind_p (use_stmt)) -+ { -+ gimple_debug_bind_reset_value (use_stmt); -+ update_stmt (use_stmt); -+ } -+ continue; -+ } -+ -+ tree bfr = gimple_assign_rhs1 (use_stmt); -+ tree new_rhs = fold_build3 (BIT_FIELD_REF, -+ TREE_TYPE (bfr), -+ unshare_expr (rhs), -+ TREE_OPERAND (bfr, 1), -+ TREE_OPERAND (bfr, 2)); -+ gimple *new_stmt -+ = gimple_build_assign (gimple_assign_lhs (use_stmt), -+ new_rhs); -+ -+ location_t loc = gimple_location (use_stmt); -+ gimple_set_location (new_stmt, loc); -+ gimple_stmt_iterator gsi2 = gsi_for_stmt (use_stmt); -+ unlink_stmt_vdef (use_stmt); -+ gsi_remove (&gsi2, true); -+ -+ gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); -+ } -+ -+ release_defs (stmt); -+ gsi_remove (&gsi, true); -+ } -+ else -+ gsi_next (&gsi); -+ } -+ - else if (code == COMPLEX_EXPR) - { - /* Rewrite stores of a single-use complex build expression -@@ -2489,6 +2720,66 @@ pass_forwprop::execute (function *fun) - else - gsi_next (&gsi); - } -+ else if (code == CONSTRUCTOR -+ && VECTOR_TYPE_P (TREE_TYPE (rhs)) -+ && TYPE_MODE (TREE_TYPE (rhs)) == BLKmode -+ && CONSTRUCTOR_NELTS (rhs) > 0 -+ && (!VECTOR_TYPE_P (TREE_TYPE (CONSTRUCTOR_ELT (rhs, 0)->value)) -+ || (TYPE_MODE (TREE_TYPE (CONSTRUCTOR_ELT (rhs, 0)->value)) -+ != BLKmode))) -+ { -+ /* Rewrite stores of a single-use vector constructors -+ to component-wise stores if the mode isn't supported. */ -+ use_operand_p use_p; -+ gimple *use_stmt; -+ if (single_imm_use (lhs, &use_p, &use_stmt) -+ && gimple_store_p (use_stmt) -+ && !gimple_has_volatile_ops (use_stmt) -+ && !stmt_can_throw_internal (cfun, use_stmt) -+ && is_gimple_assign (use_stmt) -+ && (TREE_CODE (gimple_assign_lhs (use_stmt)) -+ != TARGET_MEM_REF)) -+ { -+ tree elt_t = TREE_TYPE (CONSTRUCTOR_ELT (rhs, 0)->value); -+ unsigned HOST_WIDE_INT elt_w -+ = tree_to_uhwi (TYPE_SIZE (elt_t)); -+ unsigned HOST_WIDE_INT n -+ = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs))); -+ for (unsigned HOST_WIDE_INT bi = 0; bi < n; bi += elt_w) -+ { -+ unsigned HOST_WIDE_INT ci = bi / elt_w; -+ tree new_rhs; -+ if (ci < CONSTRUCTOR_NELTS (rhs)) -+ new_rhs = CONSTRUCTOR_ELT (rhs, ci)->value; -+ else -+ new_rhs = build_zero_cst (elt_t); -+ tree use_lhs = gimple_assign_lhs (use_stmt); -+ tree new_lhs = build3 (BIT_FIELD_REF, -+ elt_t, -+ unshare_expr (use_lhs), -+ bitsize_int (elt_w), -+ bitsize_int (bi)); -+ gimple *new_stmt = gimple_build_assign (new_lhs, new_rhs); -+ location_t loc = gimple_location (use_stmt); -+ gimple_set_location (new_stmt, loc); -+ gimple_set_vuse (new_stmt, gimple_vuse (use_stmt)); -+ gimple_set_vdef (new_stmt, -+ make_ssa_name (gimple_vop (cfun))); -+ SSA_NAME_DEF_STMT (gimple_vdef (new_stmt)) = new_stmt; -+ gimple_set_vuse (use_stmt, gimple_vdef (new_stmt)); -+ gimple_stmt_iterator gsi2 = gsi_for_stmt (use_stmt); -+ gsi_insert_before (&gsi2, new_stmt, GSI_SAME_STMT); -+ } -+ gimple_stmt_iterator gsi2 = gsi_for_stmt (use_stmt); -+ unlink_stmt_vdef (use_stmt); -+ release_defs (use_stmt); -+ gsi_remove (&gsi2, true); -+ release_defs (stmt); -+ gsi_remove (&gsi, true); -+ } -+ else -+ gsi_next (&gsi); -+ } - else - gsi_next (&gsi); - } -diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c -index fec378490..695646764 100644 ---- a/gcc/tree-ssa-loop-ivopts.c -+++ b/gcc/tree-ssa-loop-ivopts.c -@@ -2461,11 +2461,13 @@ get_mem_type_for_internal_fn (gcall *call, tree *op_p) - switch (gimple_call_internal_fn (call)) - { - case IFN_MASK_LOAD: -+ case IFN_MASK_LOAD_LANES: - if (op_p == gimple_call_arg_ptr (call, 0)) - return TREE_TYPE (gimple_call_lhs (call)); - return NULL_TREE; - - case IFN_MASK_STORE: -+ case IFN_MASK_STORE_LANES: - if (op_p == gimple_call_arg_ptr (call, 0)) - return TREE_TYPE (gimple_call_arg (call, 3)); - return NULL_TREE; -@@ -3510,6 +3512,26 @@ add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use) - basetype = sizetype; - record_common_cand (data, build_int_cst (basetype, 0), iv->step, use); - -+ /* Compare the cost of an address with an unscaled index with the cost of -+ an address with a scaled index and add candidate if useful. */ -+ poly_int64 step; -+ if (use != NULL -+ && poly_int_tree_p (iv->step, &step) -+ && address_p (use->type)) -+ { -+ poly_int64 new_step; -+ unsigned int fact = preferred_mem_scale_factor -+ (use->iv->base, -+ TYPE_MODE (use->mem_type), -+ optimize_loop_for_speed_p (data->current_loop)); -+ -+ if (fact != 1 -+ && multiple_p (step, fact, &new_step)) -+ add_candidate (data, size_int (0), -+ wide_int_to_tree (sizetype, new_step), -+ true, NULL); -+ } -+ - /* Record common candidate with constant offset stripped in base. - Like the use itself, we also add candidate directly for it. */ - base = strip_offset (iv->base, &offset); -@@ -4036,6 +4058,94 @@ get_computation_at (struct loop *loop, gimple *at, - return fold_convert (type, aff_combination_to_tree (&aff)); - } - -+/* Like get_computation_at, but try harder, even if the computation -+ is more expensive. Intended for debug stmts. */ -+ -+static tree -+get_debug_computation_at (class loop *loop, gimple *at, -+ struct iv_use *use, struct iv_cand *cand) -+{ -+ if (tree ret = get_computation_at (loop, at, use, cand)) -+ return ret; -+ -+ tree ubase = use->iv->base, ustep = use->iv->step; -+ tree cbase = cand->iv->base, cstep = cand->iv->step; -+ tree var; -+ tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase); -+ widest_int rat; -+ -+ /* We must have a precision to express the values of use. */ -+ if (TYPE_PRECISION (utype) >= TYPE_PRECISION (ctype)) -+ return NULL_TREE; -+ -+ /* Try to handle the case that get_computation_at doesn't, -+ try to express -+ use = ubase + (var - cbase) / ratio. */ -+ if (!constant_multiple_of (cstep, fold_convert (TREE_TYPE (cstep), ustep), -+ &rat)) -+ return NULL_TREE; -+ -+ bool neg_p = false; -+ if (wi::neg_p (rat)) -+ { -+ if (TYPE_UNSIGNED (ctype)) -+ return NULL_TREE; -+ neg_p = true; -+ rat = wi::neg (rat); -+ } -+ -+ /* If both IVs can wrap around and CAND doesn't have a power of two step, -+ it is unsafe. Consider uint16_t CAND with step 9, when wrapping around, -+ the values will be ... 0xfff0, 0xfff9, 2, 11 ... and when use is say -+ uint8_t with step 3, those values divided by 3 cast to uint8_t will be -+ ... 0x50, 0x53, 0, 3 ... rather than expected 0x50, 0x53, 0x56, 0x59. */ -+ if (!use->iv->no_overflow -+ && !cand->iv->no_overflow -+ && !integer_pow2p (cstep)) -+ return NULL_TREE; -+ -+ int bits = wi::exact_log2 (rat); -+ if (bits == -1) -+ bits = wi::floor_log2 (rat) + 1; -+ if (!cand->iv->no_overflow -+ && TYPE_PRECISION (utype) + bits > TYPE_PRECISION (ctype)) -+ return NULL_TREE; -+ -+ var = var_at_stmt (loop, cand, at); -+ -+ if (POINTER_TYPE_P (ctype)) -+ { -+ ctype = unsigned_type_for (ctype); -+ cbase = fold_convert (ctype, cbase); -+ cstep = fold_convert (ctype, cstep); -+ var = fold_convert (ctype, var); -+ } -+ -+ ubase = unshare_expr (ubase); -+ cbase = unshare_expr (cbase); -+ if (stmt_after_increment (loop, cand, at)) -+ var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var, -+ unshare_expr (cstep)); -+ -+ var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var, cbase); -+ var = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (var), var, -+ wide_int_to_tree (TREE_TYPE (var), rat)); -+ if (POINTER_TYPE_P (utype)) -+ { -+ var = fold_convert (sizetype, var); -+ if (neg_p) -+ var = fold_build1 (NEGATE_EXPR, sizetype, var); -+ var = fold_build2 (POINTER_PLUS_EXPR, utype, ubase, var); -+ } -+ else -+ { -+ var = fold_convert (utype, var); -+ var = fold_build2 (neg_p ? MINUS_EXPR : PLUS_EXPR, utype, -+ ubase, var); -+ } -+ return var; -+} -+ - /* Adjust the cost COST for being in loop setup rather than loop body. - If we're optimizing for space, the loop setup overhead is constant; - if we're optimizing for speed, amortize it over the per-iteration cost. -@@ -7122,6 +7232,8 @@ get_alias_ptr_type_for_ptr_address (iv_use *use) - { - case IFN_MASK_LOAD: - case IFN_MASK_STORE: -+ case IFN_MASK_LOAD_LANES: -+ case IFN_MASK_STORE_LANES: - /* The second argument contains the correct alias type. */ - gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0)); - return TREE_TYPE (gimple_call_arg (call, 1)); -@@ -7339,6 +7451,7 @@ remove_unused_ivs (struct ivopts_data *data, bitmap toremove) - struct iv_use dummy_use; - struct iv_cand *best_cand = NULL, *cand; - unsigned i, best_pref = 0, cand_pref; -+ tree comp = NULL_TREE; - - memset (&dummy_use, 0, sizeof (dummy_use)); - dummy_use.iv = info->iv; -@@ -7359,20 +7472,22 @@ remove_unused_ivs (struct ivopts_data *data, bitmap toremove) - ? 1 : 0; - if (best_cand == NULL || best_pref < cand_pref) - { -- best_cand = cand; -- best_pref = cand_pref; -+ tree this_comp -+ = get_debug_computation_at (data->current_loop, -+ SSA_NAME_DEF_STMT (def), -+ &dummy_use, cand); -+ if (this_comp) -+ { -+ best_cand = cand; -+ best_pref = cand_pref; -+ comp = this_comp; -+ } - } - } - - if (!best_cand) - continue; - -- tree comp = get_computation_at (data->current_loop, -- SSA_NAME_DEF_STMT (def), -- &dummy_use, best_cand); -- if (!comp) -- continue; -- - if (count > 1) - { - tree vexpr = make_node (DEBUG_EXPR_DECL); -diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c -index 3dfda7a4f..8607a59d4 100644 ---- a/gcc/tree-ssa-math-opts.c -+++ b/gcc/tree-ssa-math-opts.c -@@ -1040,14 +1040,9 @@ pass_cse_reciprocals::execute (function *fun) - else - stmt2 = gimple_build_call_internal_vec (ifn, args); - gimple_call_set_lhs (stmt2, arg1); -- if (gimple_vdef (call)) -- { -- gimple_set_vdef (stmt2, gimple_vdef (call)); -- SSA_NAME_DEF_STMT (gimple_vdef (stmt2)) = stmt2; -- } -+ gimple_move_vops (stmt2, call); - gimple_call_set_nothrow (stmt2, - gimple_call_nothrow_p (call)); -- gimple_set_vuse (stmt2, gimple_vuse (call)); - gimple_stmt_iterator gsi2 = gsi_for_stmt (call); - gsi_replace (&gsi2, stmt2, true); - } -@@ -3048,6 +3043,8 @@ last_fma_candidate_feeds_initial_phi (fma_deferring_state *state, - /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2 - with uses in additions and subtractions to form fused multiply-add - operations. Returns true if successful and MUL_STMT should be removed. -+ If MUL_COND is nonnull, the multiplication in MUL_STMT is conditional -+ on MUL_COND, otherwise it is unconditional. - - If STATE indicates that we are deferring FMA transformation, that means - that we do not produce FMAs for basic blocks which look like: -@@ -3064,7 +3061,7 @@ last_fma_candidate_feeds_initial_phi (fma_deferring_state *state, - - static bool - convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2, -- fma_deferring_state *state) -+ fma_deferring_state *state, tree mul_cond = NULL_TREE) - { - tree mul_result = gimple_get_lhs (mul_stmt); - tree type = TREE_TYPE (mul_result); -@@ -3178,6 +3175,9 @@ convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2, - return false; - } - -+ if (mul_cond && cond != mul_cond) -+ return false; -+ - if (cond) - { - if (cond == result || else_value == result) -@@ -3789,38 +3789,48 @@ math_opts_dom_walker::after_dom_children (basic_block bb) - } - else if (is_gimple_call (stmt)) - { -- tree fndecl = gimple_call_fndecl (stmt); -- if (fndecl && gimple_call_builtin_p (stmt, BUILT_IN_NORMAL)) -+ switch (gimple_call_combined_fn (stmt)) - { -- switch (DECL_FUNCTION_CODE (fndecl)) -+ CASE_CFN_POW: -+ if (gimple_call_lhs (stmt) -+ && TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST -+ && real_equal (&TREE_REAL_CST (gimple_call_arg (stmt, 1)), -+ &dconst2) -+ && convert_mult_to_fma (stmt, -+ gimple_call_arg (stmt, 0), -+ gimple_call_arg (stmt, 0), -+ &fma_state)) - { -- case BUILT_IN_POWF: -- case BUILT_IN_POW: -- case BUILT_IN_POWL: -- if (gimple_call_lhs (stmt) -- && TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST -- && real_equal -- (&TREE_REAL_CST (gimple_call_arg (stmt, 1)), -- &dconst2) -- && convert_mult_to_fma (stmt, -- gimple_call_arg (stmt, 0), -- gimple_call_arg (stmt, 0), -- &fma_state)) -- { -- unlink_stmt_vdef (stmt); -- if (gsi_remove (&gsi, true) -- && gimple_purge_dead_eh_edges (bb)) -- *m_cfg_changed_p = true; -- release_defs (stmt); -- continue; -- } -- break; -+ unlink_stmt_vdef (stmt); -+ if (gsi_remove (&gsi, true) -+ && gimple_purge_dead_eh_edges (bb)) -+ *m_cfg_changed_p = true; -+ release_defs (stmt); -+ continue; -+ } -+ break; - -- default:; -+ case CFN_COND_MUL: -+ if (convert_mult_to_fma (stmt, -+ gimple_call_arg (stmt, 1), -+ gimple_call_arg (stmt, 2), -+ &fma_state, -+ gimple_call_arg (stmt, 0))) -+ -+ { -+ gsi_remove (&gsi, true); -+ release_defs (stmt); -+ continue; - } -+ break; -+ -+ case CFN_LAST: -+ cancel_fma_deferring (&fma_state); -+ break; -+ -+ default: -+ break; - } -- else -- cancel_fma_deferring (&fma_state); - } - gsi_next (&gsi); - } -diff --git a/gcc/tree-ssa-propagate.c b/gcc/tree-ssa-propagate.c -index 6b78dc1c0..0862f83e9 100644 ---- a/gcc/tree-ssa-propagate.c -+++ b/gcc/tree-ssa-propagate.c -@@ -625,8 +625,7 @@ finish_update_gimple_call (gimple_stmt_iterator *si_p, gimple *new_stmt, - { - gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt)); - move_ssa_defining_stmt_for_defs (new_stmt, stmt); -- gimple_set_vuse (new_stmt, gimple_vuse (stmt)); -- gimple_set_vdef (new_stmt, gimple_vdef (stmt)); -+ gimple_move_vops (new_stmt, stmt); - gimple_set_location (new_stmt, gimple_location (stmt)); - if (gimple_block (new_stmt) == NULL_TREE) - gimple_set_block (new_stmt, gimple_block (stmt)); -@@ -706,8 +705,7 @@ update_call_from_tree (gimple_stmt_iterator *si_p, tree expr) - STRIP_USELESS_TYPE_CONVERSION (expr); - new_stmt = gimple_build_assign (lhs, expr); - move_ssa_defining_stmt_for_defs (new_stmt, stmt); -- gimple_set_vuse (new_stmt, gimple_vuse (stmt)); -- gimple_set_vdef (new_stmt, gimple_vdef (stmt)); -+ gimple_move_vops (new_stmt, stmt); - } - else if (!TREE_SIDE_EFFECTS (expr)) - { -@@ -732,8 +730,7 @@ update_call_from_tree (gimple_stmt_iterator *si_p, tree expr) - else - lhs = create_tmp_var (TREE_TYPE (expr)); - new_stmt = gimple_build_assign (lhs, expr); -- gimple_set_vuse (new_stmt, gimple_vuse (stmt)); -- gimple_set_vdef (new_stmt, gimple_vdef (stmt)); -+ gimple_move_vops (new_stmt, stmt); - move_ssa_defining_stmt_for_defs (new_stmt, stmt); - } - gimple_set_location (new_stmt, gimple_location (stmt)); -diff --git a/gcc/tree-ssa-threadedge.c b/gcc/tree-ssa-threadedge.c -index 91494d761..096584062 100644 ---- a/gcc/tree-ssa-threadedge.c -+++ b/gcc/tree-ssa-threadedge.c -@@ -331,6 +331,7 @@ record_temporary_equivalences_from_stmts_at_dest (edge e, - { - tree fndecl = gimple_call_fndecl (stmt); - if (fndecl -+ && fndecl_built_in_p (fndecl, BUILT_IN_NORMAL) - && (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_OBJECT_SIZE - || DECL_FUNCTION_CODE (fndecl) == BUILT_IN_CONSTANT_P)) - continue; -diff --git a/gcc/tree-streamer-in.c b/gcc/tree-streamer-in.c -index f6d137316..eb3e174fc 100644 ---- a/gcc/tree-streamer-in.c -+++ b/gcc/tree-streamer-in.c -@@ -324,8 +324,7 @@ unpack_ts_decl_with_vis_value_fields (struct bitpack_d *bp, tree expr) - static void - unpack_ts_function_decl_value_fields (struct bitpack_d *bp, tree expr) - { -- DECL_BUILT_IN_CLASS (expr) = bp_unpack_enum (bp, built_in_class, -- BUILT_IN_LAST); -+ built_in_class cl = bp_unpack_enum (bp, built_in_class, BUILT_IN_LAST); - DECL_STATIC_CONSTRUCTOR (expr) = (unsigned) bp_unpack_value (bp, 1); - DECL_STATIC_DESTRUCTOR (expr) = (unsigned) bp_unpack_value (bp, 1); - DECL_UNINLINABLE (expr) = (unsigned) bp_unpack_value (bp, 1); -@@ -333,7 +332,7 @@ unpack_ts_function_decl_value_fields (struct bitpack_d *bp, tree expr) - DECL_IS_NOVOPS (expr) = (unsigned) bp_unpack_value (bp, 1); - DECL_IS_RETURNS_TWICE (expr) = (unsigned) bp_unpack_value (bp, 1); - DECL_IS_MALLOC (expr) = (unsigned) bp_unpack_value (bp, 1); -- DECL_IS_OPERATOR_NEW (expr) = (unsigned) bp_unpack_value (bp, 1); -+ DECL_SET_IS_OPERATOR_NEW (expr, (unsigned) bp_unpack_value (bp, 1)); - DECL_DECLARED_INLINE_P (expr) = (unsigned) bp_unpack_value (bp, 1); - DECL_STATIC_CHAIN (expr) = (unsigned) bp_unpack_value (bp, 1); - DECL_NO_INLINE_WARNING_P (expr) = (unsigned) bp_unpack_value (bp, 1); -@@ -343,22 +342,22 @@ unpack_ts_function_decl_value_fields (struct bitpack_d *bp, tree expr) - DECL_DISREGARD_INLINE_LIMITS (expr) = (unsigned) bp_unpack_value (bp, 1); - DECL_PURE_P (expr) = (unsigned) bp_unpack_value (bp, 1); - DECL_LOOPING_CONST_OR_PURE_P (expr) = (unsigned) bp_unpack_value (bp, 1); -- if (DECL_BUILT_IN_CLASS (expr) != NOT_BUILT_IN) -+ unsigned int fcode = 0; -+ if (cl != NOT_BUILT_IN) - { -- DECL_FUNCTION_CODE (expr) = (enum built_in_function) bp_unpack_value (bp, -- 12); -- if (DECL_BUILT_IN_CLASS (expr) == BUILT_IN_NORMAL -- && DECL_FUNCTION_CODE (expr) >= END_BUILTINS) -+ fcode = bp_unpack_value (bp, 32); -+ if (cl == BUILT_IN_NORMAL && fcode >= END_BUILTINS) - fatal_error (input_location, - "machine independent builtin code out of range"); -- else if (DECL_BUILT_IN_CLASS (expr) == BUILT_IN_MD) -+ else if (cl == BUILT_IN_MD) - { -- tree result = targetm.builtin_decl (DECL_FUNCTION_CODE (expr), true); -+ tree result = targetm.builtin_decl (fcode, true); - if (!result || result == error_mark_node) - fatal_error (input_location, - "target specific builtin not available"); - } - } -+ set_decl_built_in_function (expr, cl, fcode); - } - - -diff --git a/gcc/tree-streamer-out.c b/gcc/tree-streamer-out.c -index 3f619e830..12693f6f4 100644 ---- a/gcc/tree-streamer-out.c -+++ b/gcc/tree-streamer-out.c -@@ -295,7 +295,7 @@ pack_ts_function_decl_value_fields (struct bitpack_d *bp, tree expr) - bp_pack_value (bp, DECL_IS_NOVOPS (expr), 1); - bp_pack_value (bp, DECL_IS_RETURNS_TWICE (expr), 1); - bp_pack_value (bp, DECL_IS_MALLOC (expr), 1); -- bp_pack_value (bp, DECL_IS_OPERATOR_NEW (expr), 1); -+ bp_pack_value (bp, DECL_IS_OPERATOR_NEW_P (expr), 1); - bp_pack_value (bp, DECL_DECLARED_INLINE_P (expr), 1); - bp_pack_value (bp, DECL_STATIC_CHAIN (expr), 1); - bp_pack_value (bp, DECL_NO_INLINE_WARNING_P (expr), 1); -@@ -305,7 +305,7 @@ pack_ts_function_decl_value_fields (struct bitpack_d *bp, tree expr) - bp_pack_value (bp, DECL_PURE_P (expr), 1); - bp_pack_value (bp, DECL_LOOPING_CONST_OR_PURE_P (expr), 1); - if (DECL_BUILT_IN_CLASS (expr) != NOT_BUILT_IN) -- bp_pack_value (bp, DECL_FUNCTION_CODE (expr), 12); -+ bp_pack_value (bp, DECL_UNCHECKED_FUNCTION_CODE (expr), 32); - } - - -diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c -index 39bc2a82b..8d97deaf2 100644 ---- a/gcc/tree-vect-generic.c -+++ b/gcc/tree-vect-generic.c -@@ -1671,7 +1671,6 @@ expand_vector_conversion (gimple_stmt_iterator *gsi) - gimple *g; - tree lhs = gimple_call_lhs (stmt); - tree arg = gimple_call_arg (stmt, 0); -- tree decl = NULL_TREE; - tree ret_type = TREE_TYPE (lhs); - tree arg_type = TREE_TYPE (arg); - tree new_rhs, compute_type = TREE_TYPE (arg_type); -@@ -1698,16 +1697,9 @@ expand_vector_conversion (gimple_stmt_iterator *gsi) - - if (modifier == NONE && (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)) - { -- if (supportable_convert_operation (code, ret_type, arg_type, &decl, -- &code1)) -+ if (supportable_convert_operation (code, ret_type, arg_type, &code1)) - { -- if (code1 == CALL_EXPR) -- { -- g = gimple_build_call (decl, 1, arg); -- gimple_call_set_lhs (g, lhs); -- } -- else -- g = gimple_build_assign (lhs, code1, arg); -+ g = gimple_build_assign (lhs, code1, arg); - gsi_replace (gsi, g, false); - return; - } -@@ -1726,11 +1718,11 @@ expand_vector_conversion (gimple_stmt_iterator *gsi) - tree ret1_type = build_vector_type (TREE_TYPE (ret_type), nelts); - tree arg1_type = build_vector_type (TREE_TYPE (arg_type), nelts); - if (supportable_convert_operation (code, ret1_type, arg1_type, -- &decl, &code1)) -+ &code1)) - { - new_rhs = expand_vector_piecewise (gsi, do_vec_conversion, - ret_type, arg1_type, arg, -- decl, code1); -+ NULL_TREE, code1); - g = gimple_build_assign (lhs, new_rhs); - gsi_replace (gsi, g, false); - return; -diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c -index 85be01748..b76728452 100644 ---- a/gcc/tree-vect-loop.c -+++ b/gcc/tree-vect-loop.c -@@ -5581,6 +5581,30 @@ vect_expand_fold_left (gimple_stmt_iterator *gsi, tree scalar_dest, - return lhs; - } - -+/* Get a masked internal function equivalent to REDUC_FN. VECTYPE_IN is the -+ type of the vector input. */ -+ -+static internal_fn -+get_masked_reduction_fn (internal_fn reduc_fn, tree vectype_in) -+{ -+ internal_fn mask_reduc_fn; -+ -+ switch (reduc_fn) -+ { -+ case IFN_FOLD_LEFT_PLUS: -+ mask_reduc_fn = IFN_MASK_FOLD_LEFT_PLUS; -+ break; -+ -+ default: -+ return IFN_LAST; -+ } -+ -+ if (direct_internal_fn_supported_p (mask_reduc_fn, vectype_in, -+ OPTIMIZE_FOR_SPEED)) -+ return mask_reduc_fn; -+ return IFN_LAST; -+} -+ - /* Perform an in-order reduction (FOLD_LEFT_REDUCTION). STMT_INFO is the - statement that sets the live-out value. REDUC_DEF_STMT is the phi - statement. CODE is the operation performed by STMT_INFO and OPS are -@@ -5603,6 +5627,7 @@ vectorize_fold_left_reduction (stmt_vec_info stmt_info, - struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); - tree vectype_out = STMT_VINFO_VECTYPE (stmt_info); - stmt_vec_info new_stmt_info = NULL; -+ internal_fn mask_reduc_fn = get_masked_reduction_fn (reduc_fn, vectype_in); - - int ncopies; - if (slp_node) -@@ -5673,16 +5698,21 @@ vectorize_fold_left_reduction (stmt_vec_info stmt_info, - def0 = negated; - } - -- if (mask) -+ if (mask && mask_reduc_fn == IFN_LAST) - def0 = merge_with_identity (gsi, mask, vectype_out, def0, - vector_identity); - - /* On the first iteration the input is simply the scalar phi - result, and for subsequent iterations it is the output of - the preceding operation. */ -- if (reduc_fn != IFN_LAST) -+ if (reduc_fn != IFN_LAST || (mask && mask_reduc_fn != IFN_LAST)) - { -- new_stmt = gimple_build_call_internal (reduc_fn, 2, reduc_var, def0); -+ if (mask && mask_reduc_fn != IFN_LAST) -+ new_stmt = gimple_build_call_internal (mask_reduc_fn, 3, reduc_var, -+ def0, mask); -+ else -+ new_stmt = gimple_build_call_internal (reduc_fn, 2, reduc_var, -+ def0); - /* For chained SLP reductions the output of the previous reduction - operation serves as the input of the next. For the final statement - the output cannot be a temporary - we reuse the original -@@ -5782,6 +5812,7 @@ use_mask_by_cond_expr_p (enum tree_code code, internal_fn cond_fn, - switch (code) - { - case DOT_PROD_EXPR: -+ case SAD_EXPR: - return true; - - default: -@@ -5811,6 +5842,17 @@ build_vect_cond_expr (enum tree_code code, tree vop[3], tree mask, - break; - } - -+ case SAD_EXPR: -+ { -+ tree vectype = TREE_TYPE (vop[1]); -+ tree masked_op1 = make_temp_ssa_name (vectype, NULL, "masked_op1"); -+ gassign *select = gimple_build_assign (masked_op1, VEC_COND_EXPR, -+ mask, vop[1], vop[0]); -+ gsi_insert_before (gsi, select, GSI_SAME_STMT); -+ vop[1] = masked_op1; -+ break; -+ } -+ - default: - gcc_unreachable (); - } -diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c -index 026148cc4..99df38711 100644 ---- a/gcc/tree-vect-patterns.c -+++ b/gcc/tree-vect-patterns.c -@@ -1302,7 +1302,7 @@ vect_recog_pow_pattern (stmt_vec_info stmt_vinfo, tree *type_out) - { - if (flag_unsafe_math_optimizations - && TREE_CODE (base) == REAL_CST -- && !gimple_call_internal_p (last_stmt)) -+ && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL)) - { - combined_fn log_cfn; - built_in_function exp_bfn; -@@ -1728,6 +1728,175 @@ vect_recog_over_widening_pattern (stmt_vec_info last_stmt_info, tree *type_out) - return pattern_stmt; - } - -+/* Recognize the following patterns: -+ -+ ATYPE a; // narrower than TYPE -+ BTYPE b; // narrower than TYPE -+ -+ 1) Multiply high with scaling -+ TYPE res = ((TYPE) a * (TYPE) b) >> c; -+ 2) ... or also with rounding -+ TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1; -+ -+ where only the bottom half of res is used. */ -+ -+static gimple * -+vect_recog_mulhs_pattern (stmt_vec_info last_stmt_info, tree *type_out) -+{ -+ /* Check for a right shift. */ -+ gassign *last_stmt = dyn_cast (last_stmt_info->stmt); -+ if (!last_stmt -+ || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR) -+ return NULL; -+ vec_info *vinfo = last_stmt_info->vinfo; -+ -+ /* Check that the shift result is wider than the users of the -+ result need (i.e. that narrowing would be a natural choice). */ -+ tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt)); -+ unsigned int target_precision -+ = vect_element_precision (last_stmt_info->min_output_precision); -+ if (!INTEGRAL_TYPE_P (lhs_type) -+ || target_precision >= TYPE_PRECISION (lhs_type)) -+ return NULL; -+ -+ /* Look through any change in sign on the outer shift input. */ -+ vect_unpromoted_value unprom_rshift_input; -+ tree rshift_input = vect_look_through_possible_promotion -+ (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input); -+ if (!rshift_input -+ || TYPE_PRECISION (TREE_TYPE (rshift_input)) -+ != TYPE_PRECISION (lhs_type)) -+ return NULL; -+ -+ /* Get the definition of the shift input. */ -+ stmt_vec_info rshift_input_stmt_info -+ = vect_get_internal_def (vinfo, rshift_input); -+ if (!rshift_input_stmt_info) -+ return NULL; -+ gassign *rshift_input_stmt -+ = dyn_cast (rshift_input_stmt_info->stmt); -+ if (!rshift_input_stmt) -+ return NULL; -+ -+ stmt_vec_info mulh_stmt_info; -+ tree scale_term; -+ internal_fn ifn; -+ unsigned int expect_offset; -+ -+ /* Check for the presence of the rounding term. */ -+ if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR) -+ { -+ /* Check that the outer shift was by 1. */ -+ if (!integer_onep (gimple_assign_rhs2 (last_stmt))) -+ return NULL; -+ -+ /* Check that the second operand of the PLUS_EXPR is 1. */ -+ if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt))) -+ return NULL; -+ -+ /* Look through any change in sign on the addition input. */ -+ vect_unpromoted_value unprom_plus_input; -+ tree plus_input = vect_look_through_possible_promotion -+ (vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input); -+ if (!plus_input -+ || TYPE_PRECISION (TREE_TYPE (plus_input)) -+ != TYPE_PRECISION (TREE_TYPE (rshift_input))) -+ return NULL; -+ -+ /* Get the definition of the multiply-high-scale part. */ -+ stmt_vec_info plus_input_stmt_info -+ = vect_get_internal_def (vinfo, plus_input); -+ if (!plus_input_stmt_info) -+ return NULL; -+ gassign *plus_input_stmt -+ = dyn_cast (plus_input_stmt_info->stmt); -+ if (!plus_input_stmt -+ || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR) -+ return NULL; -+ -+ /* Look through any change in sign on the scaling input. */ -+ vect_unpromoted_value unprom_scale_input; -+ tree scale_input = vect_look_through_possible_promotion -+ (vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input); -+ if (!scale_input -+ || TYPE_PRECISION (TREE_TYPE (scale_input)) -+ != TYPE_PRECISION (TREE_TYPE (plus_input))) -+ return NULL; -+ -+ /* Get the definition of the multiply-high part. */ -+ mulh_stmt_info = vect_get_internal_def (vinfo, scale_input); -+ if (!mulh_stmt_info) -+ return NULL; -+ -+ /* Get the scaling term. */ -+ scale_term = gimple_assign_rhs2 (plus_input_stmt); -+ -+ expect_offset = target_precision + 2; -+ ifn = IFN_MULHRS; -+ } -+ else -+ { -+ mulh_stmt_info = rshift_input_stmt_info; -+ scale_term = gimple_assign_rhs2 (last_stmt); -+ -+ expect_offset = target_precision + 1; -+ ifn = IFN_MULHS; -+ } -+ -+ /* Check that the scaling factor is correct. */ -+ if (TREE_CODE (scale_term) != INTEGER_CST -+ || wi::to_widest (scale_term) + expect_offset -+ != TYPE_PRECISION (lhs_type)) -+ return NULL; -+ -+ /* Check whether the scaling input term can be seen as two widened -+ inputs multiplied together. */ -+ vect_unpromoted_value unprom_mult[2]; -+ tree new_type; -+ unsigned int nops -+ = vect_widened_op_tree (mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR, -+ false, 2, unprom_mult, &new_type); -+ if (nops != 2) -+ return NULL; -+ -+ vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt); -+ -+ /* Adjust output precision. */ -+ if (TYPE_PRECISION (new_type) < target_precision) -+ new_type = build_nonstandard_integer_type -+ (target_precision, TYPE_UNSIGNED (new_type)); -+ -+ /* Check for target support. */ -+ tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type); -+ if (!new_vectype -+ || !direct_internal_fn_supported_p -+ (ifn, new_vectype, OPTIMIZE_FOR_SPEED)) -+ return NULL; -+ -+ /* The IR requires a valid vector type for the cast result, even though -+ it's likely to be discarded. */ -+ *type_out = get_vectype_for_scalar_type (vinfo, lhs_type); -+ if (!*type_out) -+ return NULL; -+ -+ /* Generate the IFN_MULHRS call. */ -+ tree new_var = vect_recog_temp_ssa_var (new_type, NULL); -+ tree new_ops[2]; -+ vect_convert_inputs (last_stmt_info, 2, new_ops, new_type, -+ unprom_mult, new_vectype); -+ gcall *mulhrs_stmt -+ = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]); -+ gimple_call_set_lhs (mulhrs_stmt, new_var); -+ gimple_set_location (mulhrs_stmt, gimple_location (last_stmt)); -+ -+ if (dump_enabled_p ()) -+ dump_printf_loc (MSG_NOTE, vect_location, -+ "created pattern stmt: %G", mulhrs_stmt); -+ -+ return vect_convert_output (last_stmt_info, lhs_type, -+ mulhrs_stmt, new_vectype); -+} -+ - /* Recognize the patterns: - - ATYPE a; // narrower than TYPE -@@ -2872,6 +3041,37 @@ vect_recog_divmod_pattern (stmt_vec_info stmt_vinfo, tree *type_out) - /* Pattern detected. */ - vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt); - -+ *type_out = vectype; -+ -+ /* Check if the target supports this internal function. */ -+ internal_fn ifn = IFN_DIV_POW2; -+ if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED)) -+ { -+ tree shift = build_int_cst (itype, tree_log2 (oprnd1)); -+ -+ tree var_div = vect_recog_temp_ssa_var (itype, NULL); -+ gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift); -+ gimple_call_set_lhs (div_stmt, var_div); -+ -+ if (rhs_code == TRUNC_MOD_EXPR) -+ { -+ append_pattern_def_seq (stmt_vinfo, div_stmt); -+ def_stmt -+ = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL), -+ LSHIFT_EXPR, var_div, shift); -+ append_pattern_def_seq (stmt_vinfo, def_stmt); -+ pattern_stmt -+ = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL), -+ MINUS_EXPR, oprnd0, -+ gimple_assign_lhs (def_stmt)); -+ } -+ else -+ pattern_stmt = div_stmt; -+ gimple_set_location (pattern_stmt, gimple_location (last_stmt)); -+ -+ return pattern_stmt; -+ } -+ - cond = build2 (LT_EXPR, boolean_type_node, oprnd0, - build_int_cst (itype, 0)); - if (rhs_code == TRUNC_DIV_EXPR -@@ -2948,7 +3148,6 @@ vect_recog_divmod_pattern (stmt_vec_info stmt_vinfo, tree *type_out) - signmask); - } - -- *type_out = vectype; - return pattern_stmt; - } - -@@ -4875,6 +5074,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = { - /* Must come after over_widening, which narrows the shift as much as - possible beforehand. */ - { vect_recog_average_pattern, "average" }, -+ { vect_recog_mulhs_pattern, "mult_high" }, - { vect_recog_cast_forwprop_pattern, "cast_forwprop" }, - { vect_recog_widen_mult_pattern, "widen_mult" }, - { vect_recog_dot_prod_pattern, "dot_prod" }, -diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c -index 82b868926..68a9f7574 100644 ---- a/gcc/tree-vect-stmts.c -+++ b/gcc/tree-vect-stmts.c -@@ -4497,7 +4497,6 @@ vectorizable_simd_clone_call (stmt_vec_info stmt_info, - - static gimple * - vect_gen_widened_results_half (enum tree_code code, -- tree decl, - tree vec_oprnd0, tree vec_oprnd1, int op_type, - tree vec_dest, gimple_stmt_iterator *gsi, - stmt_vec_info stmt_info) -@@ -4506,26 +4505,12 @@ vect_gen_widened_results_half (enum tree_code code, - tree new_temp; - - /* Generate half of the widened result: */ -- if (code == CALL_EXPR) -- { -- /* Target specific support */ -- if (op_type == binary_op) -- new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1); -- else -- new_stmt = gimple_build_call (decl, 1, vec_oprnd0); -- new_temp = make_ssa_name (vec_dest, new_stmt); -- gimple_call_set_lhs (new_stmt, new_temp); -- } -- else -- { -- /* Generic support */ -- gcc_assert (op_type == TREE_CODE_LENGTH (code)); -- if (op_type != binary_op) -- vec_oprnd1 = NULL; -- new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1); -- new_temp = make_ssa_name (vec_dest, new_stmt); -- gimple_assign_set_lhs (new_stmt, new_temp); -- } -+ gcc_assert (op_type == TREE_CODE_LENGTH (code)); -+ if (op_type != binary_op) -+ vec_oprnd1 = NULL; -+ new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1); -+ new_temp = make_ssa_name (vec_dest, new_stmt); -+ gimple_assign_set_lhs (new_stmt, new_temp); - vect_finish_stmt_generation (stmt_info, new_stmt, gsi); - - return new_stmt; -@@ -4651,8 +4636,7 @@ vect_create_vectorized_promotion_stmts (vec *vec_oprnds0, - stmt_vec_info stmt_info, tree vec_dest, - gimple_stmt_iterator *gsi, - enum tree_code code1, -- enum tree_code code2, tree decl1, -- tree decl2, int op_type) -+ enum tree_code code2, int op_type) - { - int i; - tree vop0, vop1, new_tmp1, new_tmp2; -@@ -4668,10 +4652,10 @@ vect_create_vectorized_promotion_stmts (vec *vec_oprnds0, - vop1 = NULL_TREE; - - /* Generate the two halves of promotion operation. */ -- new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1, -+ new_stmt1 = vect_gen_widened_results_half (code1, vop0, vop1, - op_type, vec_dest, gsi, - stmt_info); -- new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1, -+ new_stmt2 = vect_gen_widened_results_half (code2, vop0, vop1, - op_type, vec_dest, gsi, - stmt_info); - if (is_gimple_call (new_stmt1)) -@@ -4712,7 +4696,6 @@ vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, - loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); - enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK; - enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK; -- tree decl1 = NULL_TREE, decl2 = NULL_TREE; - tree new_temp; - enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type}; - int ndts = 2; -@@ -4883,8 +4866,7 @@ vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, - && code != FLOAT_EXPR - && !CONVERT_EXPR_CODE_P (code)) - return false; -- if (supportable_convert_operation (code, vectype_out, vectype_in, -- &decl1, &code1)) -+ if (supportable_convert_operation (code, vectype_out, vectype_in, &code1)) - break; - /* FALLTHRU */ - unsupported: -@@ -4924,7 +4906,7 @@ vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, - if (GET_MODE_SIZE (rhs_mode) == fltsz) - { - if (!supportable_convert_operation (code, vectype_out, -- cvt_type, &decl1, &codecvt1)) -+ cvt_type, &codecvt1)) - goto unsupported; - } - else if (!supportable_widening_operation (code, stmt_info, -@@ -4975,7 +4957,7 @@ vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, - if (cvt_type == NULL_TREE) - goto unsupported; - if (!supportable_convert_operation (code, cvt_type, vectype_in, -- &decl1, &codecvt1)) -+ &codecvt1)) - goto unsupported; - if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type, - &code1, &multi_step_cvt, -@@ -5084,24 +5066,12 @@ vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, - { - stmt_vec_info new_stmt_info; - /* Arguments are ready, create the new vector stmt. */ -- if (code1 == CALL_EXPR) -- { -- gcall *new_stmt = gimple_build_call (decl1, 1, vop0); -- new_temp = make_ssa_name (vec_dest, new_stmt); -- gimple_call_set_lhs (new_stmt, new_temp); -- new_stmt_info -- = vect_finish_stmt_generation (stmt_info, new_stmt, gsi); -- } -- else -- { -- gcc_assert (TREE_CODE_LENGTH (code1) == unary_op); -- gassign *new_stmt -- = gimple_build_assign (vec_dest, code1, vop0); -- new_temp = make_ssa_name (vec_dest, new_stmt); -- gimple_assign_set_lhs (new_stmt, new_temp); -- new_stmt_info -- = vect_finish_stmt_generation (stmt_info, new_stmt, gsi); -- } -+ gcc_assert (TREE_CODE_LENGTH (code1) == unary_op); -+ gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0); -+ new_temp = make_ssa_name (vec_dest, new_stmt); -+ gimple_assign_set_lhs (new_stmt, new_temp); -+ new_stmt_info -+ = vect_finish_stmt_generation (stmt_info, new_stmt, gsi); - - if (slp_node) - SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info); -@@ -5193,8 +5163,7 @@ vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, - vect_create_vectorized_promotion_stmts (&vec_oprnds0, - &vec_oprnds1, stmt_info, - this_dest, gsi, -- c1, c2, decl1, decl2, -- op_type); -+ c1, c2, op_type); - } - - FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0) -@@ -5202,25 +5171,12 @@ vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, - stmt_vec_info new_stmt_info; - if (cvt_type) - { -- if (codecvt1 == CALL_EXPR) -- { -- gcall *new_stmt = gimple_build_call (decl1, 1, vop0); -- new_temp = make_ssa_name (vec_dest, new_stmt); -- gimple_call_set_lhs (new_stmt, new_temp); -- new_stmt_info -- = vect_finish_stmt_generation (stmt_info, new_stmt, -- gsi); -- } -- else -- { -- gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op); -- new_temp = make_ssa_name (vec_dest); -- gassign *new_stmt -- = gimple_build_assign (new_temp, codecvt1, vop0); -- new_stmt_info -- = vect_finish_stmt_generation (stmt_info, new_stmt, -- gsi); -- } -+ gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op); -+ new_temp = make_ssa_name (vec_dest); -+ gassign *new_stmt -+ = gimple_build_assign (new_temp, codecvt1, vop0); -+ new_stmt_info -+ = vect_finish_stmt_generation (stmt_info, new_stmt, gsi); - } - else - new_stmt_info = vinfo->lookup_def (vop0); -@@ -5263,22 +5219,11 @@ vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, - if (cvt_type) - FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0) - { -- if (codecvt1 == CALL_EXPR) -- { -- gcall *new_stmt = gimple_build_call (decl1, 1, vop0); -- new_temp = make_ssa_name (vec_dest, new_stmt); -- gimple_call_set_lhs (new_stmt, new_temp); -- vect_finish_stmt_generation (stmt_info, new_stmt, gsi); -- } -- else -- { -- gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op); -- new_temp = make_ssa_name (vec_dest); -- gassign *new_stmt -- = gimple_build_assign (new_temp, codecvt1, vop0); -- vect_finish_stmt_generation (stmt_info, new_stmt, gsi); -- } -- -+ gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op); -+ new_temp = make_ssa_name (vec_dest); -+ gassign *new_stmt -+ = gimple_build_assign (new_temp, codecvt1, vop0); -+ vect_finish_stmt_generation (stmt_info, new_stmt, gsi); - vec_oprnds0[i] = new_temp; - } - -@@ -8774,8 +8719,7 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, - new_stmt = gimple_build_assign (vec_dest, data_ref); - new_temp = make_ssa_name (vec_dest, new_stmt); - gimple_assign_set_lhs (new_stmt, new_temp); -- gimple_set_vdef (new_stmt, gimple_vdef (stmt_info->stmt)); -- gimple_set_vuse (new_stmt, gimple_vuse (stmt_info->stmt)); -+ gimple_move_vops (new_stmt, stmt_info->stmt); - vect_finish_stmt_generation (stmt_info, new_stmt, gsi); - msq = new_temp; - -diff --git a/gcc/tree-vector-builder.c b/gcc/tree-vector-builder.c -index f31dc13b4..d02fb950c 100644 ---- a/gcc/tree-vector-builder.c -+++ b/gcc/tree-vector-builder.c -@@ -24,103 +24,6 @@ along with GCC; see the file COPYING3. If not see - #include "fold-const.h" - #include "tree-vector-builder.h" - --/* Try to start building a new vector of type TYPE that holds the result of -- a unary operation on VECTOR_CST T. ALLOW_STEPPED_P is true if the -- operation can handle stepped encodings directly, without having to -- expand the full sequence. -- -- Return true if the operation is possible, which it always is when -- ALLOW_STEPPED_P is true. Leave the builder unchanged otherwise. */ -- --bool --tree_vector_builder::new_unary_operation (tree type, tree t, -- bool allow_stepped_p) --{ -- poly_uint64 full_nelts = TYPE_VECTOR_SUBPARTS (type); -- gcc_assert (known_eq (full_nelts, TYPE_VECTOR_SUBPARTS (TREE_TYPE (t)))); -- unsigned int npatterns = VECTOR_CST_NPATTERNS (t); -- unsigned int nelts_per_pattern = VECTOR_CST_NELTS_PER_PATTERN (t); -- if (!allow_stepped_p && nelts_per_pattern > 2) -- { -- if (!full_nelts.is_constant ()) -- return false; -- npatterns = full_nelts.to_constant (); -- nelts_per_pattern = 1; -- } -- new_vector (type, npatterns, nelts_per_pattern); -- return true; --} -- --/* Try to start building a new vector of type TYPE that holds the result of -- a binary operation on VECTOR_CSTs T1 and T2. ALLOW_STEPPED_P is true if -- the operation can handle stepped encodings directly, without having to -- expand the full sequence. -- -- Return true if the operation is possible. Leave the builder unchanged -- otherwise. */ -- --bool --tree_vector_builder::new_binary_operation (tree type, tree t1, tree t2, -- bool allow_stepped_p) --{ -- poly_uint64 full_nelts = TYPE_VECTOR_SUBPARTS (type); -- gcc_assert (known_eq (full_nelts, TYPE_VECTOR_SUBPARTS (TREE_TYPE (t1))) -- && known_eq (full_nelts, TYPE_VECTOR_SUBPARTS (TREE_TYPE (t2)))); -- /* Conceptually we split the patterns in T1 and T2 until we have -- an equal number for both. Each split pattern requires the same -- number of elements per pattern as the original. E.g. splitting: -- -- { 1, 2, 3, ... } -- -- into two gives: -- -- { 1, 3, 5, ... } -- { 2, 4, 6, ... } -- -- while splitting: -- -- { 1, 0, ... } -- -- into two gives: -- -- { 1, 0, ... } -- { 0, 0, ... }. */ -- unsigned int npatterns = least_common_multiple (VECTOR_CST_NPATTERNS (t1), -- VECTOR_CST_NPATTERNS (t2)); -- unsigned int nelts_per_pattern = MAX (VECTOR_CST_NELTS_PER_PATTERN (t1), -- VECTOR_CST_NELTS_PER_PATTERN (t2)); -- if (!allow_stepped_p && nelts_per_pattern > 2) -- { -- if (!full_nelts.is_constant ()) -- return false; -- npatterns = full_nelts.to_constant (); -- nelts_per_pattern = 1; -- } -- new_vector (type, npatterns, nelts_per_pattern); -- return true; --} -- --/* Return the number of elements that the caller needs to operate on in -- order to handle a binary operation on VECTOR_CSTs T1 and T2. This static -- function is used instead of new_binary_operation if the result of the -- operation is not a VECTOR_CST. */ -- --unsigned int --tree_vector_builder::binary_encoded_nelts (tree t1, tree t2) --{ -- poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (t1)); -- gcc_assert (known_eq (nelts, TYPE_VECTOR_SUBPARTS (TREE_TYPE (t2)))); -- /* See new_binary_operation for details. */ -- unsigned int npatterns = least_common_multiple (VECTOR_CST_NPATTERNS (t1), -- VECTOR_CST_NPATTERNS (t2)); -- unsigned int nelts_per_pattern = MAX (VECTOR_CST_NELTS_PER_PATTERN (t1), -- VECTOR_CST_NELTS_PER_PATTERN (t2)); -- unsigned HOST_WIDE_INT const_nelts; -- if (nelts.is_constant (&const_nelts)) -- return MIN (npatterns * nelts_per_pattern, const_nelts); -- return npatterns * nelts_per_pattern; --} -- - /* Return a vector element with the value BASE + FACTOR * STEP. */ - - tree -diff --git a/gcc/tree-vector-builder.h b/gcc/tree-vector-builder.h -index 13af74ad8..add79e476 100644 ---- a/gcc/tree-vector-builder.h -+++ b/gcc/tree-vector-builder.h -@@ -24,10 +24,11 @@ along with GCC; see the file COPYING3. If not see - - /* This class is used to build VECTOR_CSTs from a sequence of elements. - See vector_builder for more details. */ --class tree_vector_builder : public vector_builder -+class tree_vector_builder : public vector_builder - { -- typedef vector_builder parent; -- friend class vector_builder; -+ typedef vector_builder parent; -+ friend class vector_builder; - - public: - tree_vector_builder () : m_type (0) {} -@@ -37,10 +38,6 @@ public: - tree type () const { return m_type; } - - void new_vector (tree, unsigned int, unsigned int); -- bool new_unary_operation (tree, tree, bool); -- bool new_binary_operation (tree, tree, tree, bool); -- -- static unsigned int binary_encoded_nelts (tree, tree); - - private: - bool equal_p (const_tree, const_tree) const; -@@ -51,6 +48,15 @@ private: - bool can_elide_p (const_tree) const; - void note_representative (tree *, tree); - -+ static poly_uint64 shape_nelts (const_tree t) -+ { return TYPE_VECTOR_SUBPARTS (t); } -+ static poly_uint64 nelts_of (const_tree t) -+ { return VECTOR_CST_NELTS (t); } -+ static unsigned int npatterns_of (const_tree t) -+ { return VECTOR_CST_NPATTERNS (t); } -+ static unsigned int nelts_per_pattern_of (const_tree t) -+ { return VECTOR_CST_NELTS_PER_PATTERN (t); } -+ - tree m_type; - }; - -diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c -index c2c6377d3..71ca80937 100644 ---- a/gcc/tree-vectorizer.c -+++ b/gcc/tree-vectorizer.c -@@ -288,10 +288,7 @@ adjust_simduid_builtins (hash_table *htab) - : BUILT_IN_GOMP_ORDERED_END); - gimple *g - = gimple_build_call (builtin_decl_explicit (bcode), 0); -- tree vdef = gimple_vdef (stmt); -- gimple_set_vdef (g, vdef); -- SSA_NAME_DEF_STMT (vdef) = g; -- gimple_set_vuse (g, gimple_vuse (stmt)); -+ gimple_move_vops (g, stmt); - gsi_replace (&i, g, true); - continue; - } -diff --git a/gcc/tree.c b/gcc/tree.c -index c4b8eea67..62607c63a 100644 ---- a/gcc/tree.c -+++ b/gcc/tree.c -@@ -1965,6 +1965,23 @@ build_index_vector (tree vec_type, poly_uint64 base, poly_uint64 step) - return v.build (); - } - -+/* Return a VECTOR_CST of type VEC_TYPE in which the first NUM_A -+ elements are A and the rest are B. */ -+ -+tree -+build_vector_a_then_b (tree vec_type, unsigned int num_a, tree a, tree b) -+{ -+ gcc_assert (known_le (num_a, TYPE_VECTOR_SUBPARTS (vec_type))); -+ unsigned int count = constant_lower_bound (TYPE_VECTOR_SUBPARTS (vec_type)); -+ /* Optimize the constant case. */ -+ if ((count & 1) == 0 && TYPE_VECTOR_SUBPARTS (vec_type).is_constant ()) -+ count /= 2; -+ tree_vector_builder builder (vec_type, count, 2); -+ for (unsigned int i = 0; i < count * 2; ++i) -+ builder.quick_push (i < num_a ? a : b); -+ return builder.build (); -+} -+ - /* Something has messed with the elements of CONSTRUCTOR C after it was built; - calculate TREE_CONSTANT and TREE_SIDE_EFFECTS. */ - -diff --git a/gcc/tree.h b/gcc/tree.h -index 6f73593fa..356a9f544 100644 ---- a/gcc/tree.h -+++ b/gcc/tree.h -@@ -2475,10 +2475,10 @@ extern machine_mode vector_type_mode (const_tree); - (DECL_COMMON_CHECK (NODE)->decl_common.mode = (MODE)) - - /* For FUNCTION_DECL, if it is built-in, this identifies which built-in -- operation it is. Note, however, that this field is overloaded, with -- DECL_BUILT_IN_CLASS as the discriminant, so the latter must always be -- checked before any access to the former. */ --#define DECL_FUNCTION_CODE(NODE) \ -+ operation it is. This is only intended for low-level accesses; -+ normally DECL_FUNCTION_CODE, DECL_FE_FUNCTION_CODE or DECL_MD_FUNCTION -+ should be used instead. */ -+#define DECL_UNCHECKED_FUNCTION_CODE(NODE) \ - (FUNCTION_DECL_CHECK (NODE)->function_decl.function_code) - - /* Test if FCODE is a function code for an alloca operation. */ -@@ -2955,11 +2955,34 @@ extern void decl_fini_priority_insert (tree, priority_type); - #define DECL_IS_MALLOC(NODE) \ - (FUNCTION_DECL_CHECK (NODE)->function_decl.malloc_flag) - -+/* Macro for direct set and get of function_decl.decl_type. */ -+#define FUNCTION_DECL_DECL_TYPE(NODE) \ -+ (NODE->function_decl.decl_type) -+ -+/* Set decl_type of a DECL. Set it to T when SET is true, or reset -+ it to NONE. */ -+ -+static inline void -+set_function_decl_type (tree decl, function_decl_type t, bool set) -+{ -+ if (set) -+ { -+ gcc_assert (FUNCTION_DECL_DECL_TYPE (decl) == NONE -+ || FUNCTION_DECL_DECL_TYPE (decl) == t); -+ decl->function_decl.decl_type = t; -+ } -+ else if (FUNCTION_DECL_DECL_TYPE (decl) == t) -+ FUNCTION_DECL_DECL_TYPE (decl) = NONE; -+} -+ - /* Nonzero in a FUNCTION_DECL means this function should be treated as - C++ operator new, meaning that it returns a pointer for which we - should not use type based aliasing. */ --#define DECL_IS_OPERATOR_NEW(NODE) \ -- (FUNCTION_DECL_CHECK (NODE)->function_decl.operator_new_flag) -+#define DECL_IS_OPERATOR_NEW_P(NODE) \ -+ (FUNCTION_DECL_CHECK (NODE)->function_decl.decl_type == OPERATOR_NEW) -+ -+#define DECL_SET_IS_OPERATOR_NEW(NODE, VAL) \ -+ set_function_decl_type (FUNCTION_DECL_CHECK (NODE), OPERATOR_NEW, VAL) - - /* Nonzero in a FUNCTION_DECL means this function may return more - than once. */ -@@ -3066,10 +3089,9 @@ extern vec **decl_debug_args_insert (tree); - #define DECL_STRUCT_FUNCTION(NODE) \ - (FUNCTION_DECL_CHECK (NODE)->function_decl.f) - -- - /* For a builtin function, identify which part of the compiler defined it. */ - #define DECL_BUILT_IN_CLASS(NODE) \ -- (FUNCTION_DECL_CHECK (NODE)->function_decl.built_in_class) -+ ((built_in_class) FUNCTION_DECL_CHECK (NODE)->function_decl.built_in_class) - - /* In FUNCTION_DECL, a chain of ..._DECL nodes. */ - #define DECL_ARGUMENTS(NODE) \ -@@ -3104,8 +3126,11 @@ extern vec **decl_debug_args_insert (tree); - (FUNCTION_DECL_CHECK (NODE)->decl_with_vis.cxx_destructor) - - /* In FUNCTION_DECL, this is set if this function is a lambda function. */ --#define DECL_LAMBDA_FUNCTION(NODE) \ -- (FUNCTION_DECL_CHECK (NODE)->function_decl.lambda_function) -+#define DECL_LAMBDA_FUNCTION_P(NODE) \ -+ (FUNCTION_DECL_CHECK (NODE)->function_decl.decl_type == LAMBDA_FUNCTION) -+ -+#define DECL_SET_LAMBDA_FUNCTION(NODE, VAL) \ -+ set_function_decl_type (FUNCTION_DECL_CHECK (NODE), LAMBDA_FUNCTION, VAL) - - /* In FUNCTION_DECL that represent an virtual method this is set when - the method is final. */ -@@ -3788,6 +3813,61 @@ valid_vector_subparts_p (poly_uint64 subparts) - return true; - } - -+/* Return the built-in function that DECL represents, given that it is known -+ to be a FUNCTION_DECL with built-in class BUILT_IN_NORMAL. */ -+inline built_in_function -+DECL_FUNCTION_CODE (const_tree decl) -+{ -+ const tree_function_decl &fndecl = FUNCTION_DECL_CHECK (decl)->function_decl; -+ gcc_checking_assert (fndecl.built_in_class == BUILT_IN_NORMAL); -+ return (built_in_function) fndecl.function_code; -+} -+ -+/* Return the target-specific built-in function that DECL represents, -+ given that it is known to be a FUNCTION_DECL with built-in class -+ BUILT_IN_MD. */ -+inline int -+DECL_MD_FUNCTION_CODE (const_tree decl) -+{ -+ const tree_function_decl &fndecl = FUNCTION_DECL_CHECK (decl)->function_decl; -+ gcc_checking_assert (fndecl.built_in_class == BUILT_IN_MD); -+ return fndecl.function_code; -+} -+ -+/* Return the frontend-specific built-in function that DECL represents, -+ given that it is known to be a FUNCTION_DECL with built-in class -+ BUILT_IN_FRONTEND. */ -+inline int -+DECL_FE_FUNCTION_CODE (const_tree decl) -+{ -+ const tree_function_decl &fndecl = FUNCTION_DECL_CHECK (decl)->function_decl; -+ gcc_checking_assert (fndecl.built_in_class == BUILT_IN_FRONTEND); -+ return fndecl.function_code; -+} -+ -+/* Record that FUNCTION_DECL DECL represents built-in function FCODE of -+ class FCLASS. */ -+inline void -+set_decl_built_in_function (tree decl, built_in_class fclass, -+ unsigned int fcode) -+{ -+ tree_function_decl &fndecl = FUNCTION_DECL_CHECK (decl)->function_decl; -+ fndecl.built_in_class = fclass; -+ fndecl.function_code = fcode; -+} -+ -+/* Record that FUNCTION_DECL NEWDECL represents the same built-in function -+ as OLDDECL (or none, if OLDDECL doesn't represent a built-in function). */ -+inline void -+copy_decl_built_in_function (tree newdecl, const_tree olddecl) -+{ -+ tree_function_decl &newfndecl = FUNCTION_DECL_CHECK (newdecl)->function_decl; -+ const tree_function_decl &oldfndecl -+ = FUNCTION_DECL_CHECK (olddecl)->function_decl; -+ newfndecl.built_in_class = oldfndecl.built_in_class; -+ newfndecl.function_code = oldfndecl.function_code; -+} -+ - /* In NON_LVALUE_EXPR and VIEW_CONVERT_EXPR, set when this node is merely a - wrapper added to express a location_t on behalf of the node's child - (e.g. by maybe_wrap_with_location). */ -@@ -4212,6 +4292,7 @@ extern tree build_vector_from_val (tree, tree); - extern tree build_uniform_cst (tree, tree); - extern tree build_vec_series (tree, tree, tree); - extern tree build_index_vector (tree, poly_uint64, poly_uint64); -+extern tree build_vector_a_then_b (tree, unsigned int, tree, tree); - extern void recompute_constructor_flags (tree); - extern void verify_constructor_flags (tree); - extern tree build_constructor (tree, vec *); -@@ -5967,9 +6048,10 @@ fndecl_built_in_p (const_tree node, built_in_class klass) - of class KLASS with name equal to NAME. */ - - inline bool --fndecl_built_in_p (const_tree node, int name, built_in_class klass) -+fndecl_built_in_p (const_tree node, unsigned int name, built_in_class klass) - { -- return (fndecl_built_in_p (node, klass) && DECL_FUNCTION_CODE (node) == name); -+ return (fndecl_built_in_p (node, klass) -+ && DECL_UNCHECKED_FUNCTION_CODE (node) == name); - } - - /* Return true if a FUNCTION_DECL NODE is a GCC built-in function -diff --git a/gcc/var-tracking.c b/gcc/var-tracking.c -index 96e0c93a6..982ef13d1 100644 ---- a/gcc/var-tracking.c -+++ b/gcc/var-tracking.c -@@ -116,6 +116,7 @@ - #include "rtl-iter.h" - #include "fibonacci_heap.h" - #include "print-rtl.h" -+#include "function-abi.h" - - typedef fibonacci_heap bb_heap_t; - typedef fibonacci_node bb_heap_node_t; -@@ -1238,7 +1239,7 @@ adjust_insn (basic_block bb, rtx_insn *insn) - amd.stack_adjust = -VTI (bb)->out.stack_adjust; - - amd.store = true; -- note_stores (PATTERN (insn), adjust_mem_stores, &amd); -+ note_stores (insn, adjust_mem_stores, &amd); - - amd.store = false; - if (GET_CODE (PATTERN (insn)) == PARALLEL -@@ -4899,12 +4900,11 @@ dataflow_set_clear_at_call (dataflow_set *set, rtx_insn *call_insn) - { - unsigned int r; - hard_reg_set_iterator hrsi; -- HARD_REG_SET invalidated_regs; - -- get_call_reg_set_usage (call_insn, &invalidated_regs, -- regs_invalidated_by_call); -+ HARD_REG_SET callee_clobbers -+ = insn_callee_abi (call_insn).full_reg_clobbers (); - -- EXECUTE_IF_SET_IN_HARD_REG_SET (invalidated_regs, 0, r, hrsi) -+ EXECUTE_IF_SET_IN_HARD_REG_SET (callee_clobbers, 0, r, hrsi) - var_regno_delete (set, r); - - if (MAY_HAVE_DEBUG_BIND_INSNS) -@@ -6292,14 +6292,12 @@ prepare_call_arguments (basic_block bb, rtx_insn *insn) - && targetm.calls.struct_value_rtx (type, 0) == 0) - { - tree struct_addr = build_pointer_type (TREE_TYPE (type)); -- machine_mode mode = TYPE_MODE (struct_addr); -+ function_arg_info arg (struct_addr, /*named=*/true); - rtx reg; - INIT_CUMULATIVE_ARGS (args_so_far_v, type, NULL_RTX, fndecl, - nargs + 1); -- reg = targetm.calls.function_arg (args_so_far, mode, -- struct_addr, true); -- targetm.calls.function_arg_advance (args_so_far, mode, -- struct_addr, true); -+ reg = targetm.calls.function_arg (args_so_far, arg); -+ targetm.calls.function_arg_advance (args_so_far, arg); - if (reg == NULL_RTX) - { - for (; link; link = XEXP (link, 1)) -@@ -6317,11 +6315,9 @@ prepare_call_arguments (basic_block bb, rtx_insn *insn) - nargs); - if (obj_type_ref && TYPE_ARG_TYPES (type) != void_list_node) - { -- machine_mode mode; - t = TYPE_ARG_TYPES (type); -- mode = TYPE_MODE (TREE_VALUE (t)); -- this_arg = targetm.calls.function_arg (args_so_far, mode, -- TREE_VALUE (t), true); -+ function_arg_info arg (TREE_VALUE (t), /*named=*/true); -+ this_arg = targetm.calls.function_arg (args_so_far, arg); - if (this_arg && !REG_P (this_arg)) - this_arg = NULL_RTX; - else if (this_arg == NULL_RTX) -@@ -6429,30 +6425,24 @@ prepare_call_arguments (basic_block bb, rtx_insn *insn) - } - if (t && t != void_list_node) - { -- tree argtype = TREE_VALUE (t); -- machine_mode mode = TYPE_MODE (argtype); - rtx reg; -- if (pass_by_reference (&args_so_far_v, mode, argtype, true)) -- { -- argtype = build_pointer_type (argtype); -- mode = TYPE_MODE (argtype); -- } -- reg = targetm.calls.function_arg (args_so_far, mode, -- argtype, true); -- if (TREE_CODE (argtype) == REFERENCE_TYPE -- && INTEGRAL_TYPE_P (TREE_TYPE (argtype)) -+ function_arg_info arg (TREE_VALUE (t), /*named=*/true); -+ apply_pass_by_reference_rules (&args_so_far_v, arg); -+ reg = targetm.calls.function_arg (args_so_far, arg); -+ if (TREE_CODE (arg.type) == REFERENCE_TYPE -+ && INTEGRAL_TYPE_P (TREE_TYPE (arg.type)) - && reg - && REG_P (reg) -- && GET_MODE (reg) == mode -- && (GET_MODE_CLASS (mode) == MODE_INT -- || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT) -+ && GET_MODE (reg) == arg.mode -+ && (GET_MODE_CLASS (arg.mode) == MODE_INT -+ || GET_MODE_CLASS (arg.mode) == MODE_PARTIAL_INT) - && REG_P (x) - && REGNO (x) == REGNO (reg) -- && GET_MODE (x) == mode -+ && GET_MODE (x) == arg.mode - && item) - { - machine_mode indmode -- = TYPE_MODE (TREE_TYPE (argtype)); -+ = TYPE_MODE (TREE_TYPE (arg.type)); - rtx mem = gen_rtx_MEM (indmode, x); - cselib_val *val = cselib_lookup (mem, indmode, 0, VOIDmode); - if (val && cselib_preserved_value_p (val)) -@@ -6492,8 +6482,7 @@ prepare_call_arguments (basic_block bb, rtx_insn *insn) - } - } - } -- targetm.calls.function_arg_advance (args_so_far, mode, -- argtype, true); -+ targetm.calls.function_arg_advance (args_so_far, arg); - t = TREE_CHAIN (t); - } - } -@@ -6642,7 +6631,7 @@ add_with_sets (rtx_insn *insn, struct cselib_set *sets, int n_sets) - insert notes before it without worrying about any - notes that MO_USEs might emit after the insn. */ - cui.store_p = true; -- note_stores (PATTERN (insn), add_stores, &cui); -+ note_stores (insn, add_stores, &cui); - n2 = VTI (bb)->mos.length () - 1; - mos = VTI (bb)->mos.address (); - -diff --git a/gcc/vector-builder.h b/gcc/vector-builder.h -index 9967daa6e..37911ac69 100644 ---- a/gcc/vector-builder.h -+++ b/gcc/vector-builder.h -@@ -45,8 +45,11 @@ along with GCC; see the file COPYING3. If not see - variable-length vectors. finalize () then canonicalizes the encoding - to a simpler form if possible. - -- The derived class Derived provides this functionality for specific Ts. -- Derived needs to provide the following interface: -+ Shape is the type that specifies the number of elements in the vector -+ and (where relevant) the type of each element. -+ -+ The derived class Derived provides the functionality of this class -+ for specific Ts. Derived needs to provide the following interface: - - bool equal_p (T elt1, T elt2) const; - -@@ -82,9 +85,30 @@ along with GCC; see the file COPYING3. If not see - - Record that ELT2 is being elided, given that ELT1_PTR points to - the last encoded element for the containing pattern. This is -- again provided for TREE_OVERFLOW handling. */ -+ again provided for TREE_OVERFLOW handling. -+ -+ static poly_uint64 shape_nelts (Shape shape); -+ -+ Return the number of elements in SHAPE. -+ -+ The class provides additional functionality for the case in which -+ T can describe a vector constant as well as an individual element. -+ This functionality requires: -+ -+ static poly_uint64 nelts_of (T x); -+ -+ Return the number of elements in vector constant X. -+ -+ static unsigned int npatterns_of (T x); - --template -+ Return the number of patterns used to encode vector constant X. -+ -+ static unsigned int nelts_per_pattern_of (T x); -+ -+ Return the number of elements used to encode each pattern -+ in vector constant X. */ -+ -+template - class vector_builder : public auto_vec - { - public: -@@ -96,12 +120,18 @@ public: - unsigned int encoded_nelts () const; - bool encoded_full_vector_p () const; - T elt (unsigned int) const; -+ unsigned int count_dups (int, int, int) const; - - bool operator == (const Derived &) const; - bool operator != (const Derived &x) const { return !operator == (x); } - -+ bool new_unary_operation (Shape, T, bool); -+ bool new_binary_operation (Shape, T, T, bool); -+ - void finalize (); - -+ static unsigned int binary_encoded_nelts (T, T); -+ - protected: - void new_vector (poly_uint64, unsigned int, unsigned int); - void reshape (unsigned int, unsigned int); -@@ -120,16 +150,16 @@ private: - unsigned int m_nelts_per_pattern; - }; - --template -+template - inline const Derived * --vector_builder::derived () const -+vector_builder::derived () const - { - return static_cast (this); - } - --template -+template - inline --vector_builder::vector_builder () -+vector_builder::vector_builder () - : m_full_nelts (0), - m_npatterns (0), - m_nelts_per_pattern (0) -@@ -139,18 +169,18 @@ vector_builder::vector_builder () - starts with these explicitly-encoded elements and may contain additional - elided elements. */ - --template -+template - inline unsigned int --vector_builder::encoded_nelts () const -+vector_builder::encoded_nelts () const - { - return m_npatterns * m_nelts_per_pattern; - } - - /* Return true if every element of the vector is explicitly encoded. */ - --template -+template - inline bool --vector_builder::encoded_full_vector_p () const -+vector_builder::encoded_full_vector_p () const - { - return known_eq (m_npatterns * m_nelts_per_pattern, m_full_nelts); - } -@@ -158,11 +188,11 @@ vector_builder::encoded_full_vector_p () const - /* Start building a vector that has FULL_NELTS elements. Initially - encode it using NPATTERNS patterns with NELTS_PER_PATTERN each. */ - --template -+template - void --vector_builder::new_vector (poly_uint64 full_nelts, -- unsigned int npatterns, -- unsigned int nelts_per_pattern) -+vector_builder::new_vector (poly_uint64 full_nelts, -+ unsigned int npatterns, -+ unsigned int nelts_per_pattern) - { - m_full_nelts = full_nelts; - m_npatterns = npatterns; -@@ -174,9 +204,9 @@ vector_builder::new_vector (poly_uint64 full_nelts, - /* Return true if this vector and OTHER have the same elements and - are encoded in the same way. */ - --template -+template - bool --vector_builder::operator == (const Derived &other) const -+vector_builder::operator == (const Derived &other) const - { - if (maybe_ne (m_full_nelts, other.m_full_nelts) - || m_npatterns != other.m_npatterns -@@ -194,18 +224,19 @@ vector_builder::operator == (const Derived &other) const - /* Return the value of vector element I, which might or might not be - encoded explicitly. */ - --template -+template - T --vector_builder::elt (unsigned int i) const -+vector_builder::elt (unsigned int i) const - { -- /* This only makes sense if the encoding has been fully populated. */ -- gcc_checking_assert (encoded_nelts () <= this->length ()); -- - /* First handle elements that are already present in the underlying - vector, regardless of whether they're part of the encoding or not. */ - if (i < this->length ()) - return (*this)[i]; - -+ /* Extrapolation is only possible if the encoding has been fully -+ populated. */ -+ gcc_checking_assert (encoded_nelts () <= this->length ()); -+ - /* Identify the pattern that contains element I and work out the index of - the last encoded element for that pattern. */ - unsigned int pattern = i % m_npatterns; -@@ -223,13 +254,136 @@ vector_builder::elt (unsigned int i) const - derived ()->step (prev, final)); - } - -+/* Try to start building a new vector of shape SHAPE that holds the result of -+ a unary operation on vector constant VEC. ALLOW_STEPPED_P is true if the -+ operation can handle stepped encodings directly, without having to expand -+ the full sequence. -+ -+ Return true if the operation is possible, which it always is when -+ ALLOW_STEPPED_P is true. Leave the builder unchanged otherwise. */ -+ -+template -+bool -+vector_builder::new_unary_operation (Shape shape, T vec, -+ bool allow_stepped_p) -+{ -+ poly_uint64 full_nelts = Derived::shape_nelts (shape); -+ gcc_assert (known_eq (full_nelts, Derived::nelts_of (vec))); -+ unsigned int npatterns = Derived::npatterns_of (vec); -+ unsigned int nelts_per_pattern = Derived::nelts_per_pattern_of (vec); -+ if (!allow_stepped_p && nelts_per_pattern > 2) -+ { -+ if (!full_nelts.is_constant ()) -+ return false; -+ npatterns = full_nelts.to_constant (); -+ nelts_per_pattern = 1; -+ } -+ derived ()->new_vector (shape, npatterns, nelts_per_pattern); -+ return true; -+} -+ -+/* Try to start building a new vector of shape SHAPE that holds the result of -+ a binary operation on vector constants VEC1 and VEC2. ALLOW_STEPPED_P is -+ true if the operation can handle stepped encodings directly, without -+ having to expand the full sequence. -+ -+ Return true if the operation is possible. Leave the builder unchanged -+ otherwise. */ -+ -+template -+bool -+vector_builder::new_binary_operation (Shape shape, -+ T vec1, T vec2, -+ bool allow_stepped_p) -+{ -+ poly_uint64 full_nelts = Derived::shape_nelts (shape); -+ gcc_assert (known_eq (full_nelts, Derived::nelts_of (vec1)) -+ && known_eq (full_nelts, Derived::nelts_of (vec2))); -+ /* Conceptually we split the patterns in VEC1 and VEC2 until we have -+ an equal number for both. Each split pattern requires the same -+ number of elements per pattern as the original. E.g. splitting: -+ -+ { 1, 2, 3, ... } -+ -+ into two gives: -+ -+ { 1, 3, 5, ... } -+ { 2, 4, 6, ... } -+ -+ while splitting: -+ -+ { 1, 0, ... } -+ -+ into two gives: -+ -+ { 1, 0, ... } -+ { 0, 0, ... }. */ -+ unsigned int npatterns -+ = least_common_multiple (Derived::npatterns_of (vec1), -+ Derived::npatterns_of (vec2)); -+ unsigned int nelts_per_pattern -+ = MAX (Derived::nelts_per_pattern_of (vec1), -+ Derived::nelts_per_pattern_of (vec2)); -+ if (!allow_stepped_p && nelts_per_pattern > 2) -+ { -+ if (!full_nelts.is_constant ()) -+ return false; -+ npatterns = full_nelts.to_constant (); -+ nelts_per_pattern = 1; -+ } -+ derived ()->new_vector (shape, npatterns, nelts_per_pattern); -+ return true; -+} -+ -+/* Return the number of elements that the caller needs to operate on in -+ order to handle a binary operation on vector constants VEC1 and VEC2. -+ This static function is used instead of new_binary_operation if the -+ result of the operation is not a constant vector. */ -+ -+template -+unsigned int -+vector_builder::binary_encoded_nelts (T vec1, T vec2) -+{ -+ poly_uint64 nelts = Derived::nelts_of (vec1); -+ gcc_assert (known_eq (nelts, Derived::nelts_of (vec2))); -+ /* See new_binary_operation for details. */ -+ unsigned int npatterns -+ = least_common_multiple (Derived::npatterns_of (vec1), -+ Derived::npatterns_of (vec2)); -+ unsigned int nelts_per_pattern -+ = MAX (Derived::nelts_per_pattern_of (vec1), -+ Derived::nelts_per_pattern_of (vec2)); -+ unsigned HOST_WIDE_INT const_nelts; -+ if (nelts.is_constant (&const_nelts)) -+ return MIN (npatterns * nelts_per_pattern, const_nelts); -+ return npatterns * nelts_per_pattern; -+} -+ -+/* Return the number of leading duplicate elements in the range -+ [START:END:STEP]. The value is always at least 1. */ -+ -+template -+unsigned int -+vector_builder::count_dups (int start, int end, -+ int step) const -+{ -+ gcc_assert ((end - start) % step == 0); -+ -+ unsigned int ndups = 1; -+ for (int i = start + step; -+ i != end && derived ()->equal_p (elt (i), elt (start)); -+ i += step) -+ ndups++; -+ return ndups; -+} -+ - /* Change the encoding to NPATTERNS patterns of NELTS_PER_PATTERN each, - but without changing the underlying vector. */ - --template -+template - void --vector_builder::reshape (unsigned int npatterns, -- unsigned int nelts_per_pattern) -+vector_builder::reshape (unsigned int npatterns, -+ unsigned int nelts_per_pattern) - { - unsigned int old_encoded_nelts = encoded_nelts (); - unsigned int new_encoded_nelts = npatterns * nelts_per_pattern; -@@ -249,11 +403,11 @@ vector_builder::reshape (unsigned int npatterns, - /* Return true if elements [START, END) contain a repeating sequence of - STEP elements. */ - --template -+template - bool --vector_builder::repeating_sequence_p (unsigned int start, -- unsigned int end, -- unsigned int step) -+vector_builder::repeating_sequence_p (unsigned int start, -+ unsigned int end, -+ unsigned int step) - { - for (unsigned int i = start; i < end - step; ++i) - if (!derived ()->equal_p ((*this)[i], (*this)[i + step])) -@@ -264,11 +418,11 @@ vector_builder::repeating_sequence_p (unsigned int start, - /* Return true if elements [START, END) contain STEP interleaved linear - series. */ - --template -+template - bool --vector_builder::stepped_sequence_p (unsigned int start, -- unsigned int end, -- unsigned int step) -+vector_builder::stepped_sequence_p (unsigned int start, -+ unsigned int end, -+ unsigned int step) - { - if (!derived ()->allow_steps_p ()) - return false; -@@ -297,9 +451,9 @@ vector_builder::stepped_sequence_p (unsigned int start, - /* Try to change the number of encoded patterns to NPATTERNS, returning - true on success. */ - --template -+template - bool --vector_builder::try_npatterns (unsigned int npatterns) -+vector_builder::try_npatterns (unsigned int npatterns) - { - if (m_nelts_per_pattern == 1) - { -@@ -350,9 +504,9 @@ vector_builder::try_npatterns (unsigned int npatterns) - - /* Replace the current encoding with the canonical form. */ - --template -+template - void --vector_builder::finalize () -+vector_builder::finalize () - { - /* The encoding requires the same number of elements to come from each - pattern. */ -diff --git a/libgcc/config.host b/libgcc/config.host -index 0f15fda36..9500ec2ee 100644 ---- a/libgcc/config.host -+++ b/libgcc/config.host -@@ -356,6 +356,12 @@ aarch64*-*-freebsd*) - tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp t-crtfm" - md_unwind_header=aarch64/freebsd-unwind.h - ;; -+aarch64*-*-netbsd*) -+ extra_parts="$extra_parts crtfastmath.o" -+ tmake_file="${tmake_file} ${cpu_type}/t-aarch64" -+ tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp t-crtfm" -+ md_unwind_header=aarch64/aarch64-unwind.h -+ ;; - aarch64*-*-fuchsia*) - tmake_file="${tmake_file} ${cpu_type}/t-aarch64" - tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp" -diff --git a/libgcc/config/aarch64/aarch64-unwind.h b/libgcc/config/aarch64/aarch64-unwind.h -index 223ac9157..13e6e4a6a 100644 ---- a/libgcc/config/aarch64/aarch64-unwind.h -+++ b/libgcc/config/aarch64/aarch64-unwind.h -@@ -35,6 +35,23 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - #define MD_FROB_UPDATE_CONTEXT(context, fs) \ - aarch64_frob_update_context (context, fs) - -+static inline int -+aarch64_cie_signed_with_b_key (struct _Unwind_Context *context) -+{ -+ const struct dwarf_fde *fde = _Unwind_Find_FDE (context->bases.func, -+ &context->bases); -+ if (fde != NULL) -+ { -+ const struct dwarf_cie *cie = get_cie (fde); -+ if (cie != NULL) -+ { -+ char *aug_str = cie->augmentation; -+ return strchr (aug_str, 'B') == NULL ? 0 : 1; -+ } -+ } -+ return 0; -+} -+ - /* Do AArch64 private extraction on ADDR based on context info CONTEXT and - unwind frame info FS. If ADDR is signed, we do address authentication on it - using CFA of current frame. */ -@@ -43,9 +60,11 @@ static inline void * - aarch64_post_extract_frame_addr (struct _Unwind_Context *context, - _Unwind_FrameState *fs, void *addr) - { -- if (fs->regs.reg[DWARF_REGNUM_AARCH64_RA_STATE].loc.offset & 0x1) -+ if (context->flags & RA_SIGNED_BIT) - { - _Unwind_Word salt = (_Unwind_Word) context->cfa; -+ if (aarch64_cie_signed_with_b_key (context) != 0) -+ return __builtin_aarch64_autib1716 (addr, salt); - return __builtin_aarch64_autia1716 (addr, salt); - } - else -@@ -62,9 +81,14 @@ aarch64_post_frob_eh_handler_addr (struct _Unwind_Context *current, - ATTRIBUTE_UNUSED, - void *handler_addr) - { -- if (current->flags & RA_A_SIGNED_BIT) -- return __builtin_aarch64_pacia1716 (handler_addr, -+ if (current->flags & RA_SIGNED_BIT) -+ { -+ if (aarch64_cie_signed_with_b_key (current)) -+ return __builtin_aarch64_pacib1716 (handler_addr, -+ (_Unwind_Word) current->cfa); -+ return __builtin_aarch64_pacia1716 (handler_addr, - (_Unwind_Word) current->cfa); -+ } - else - return handler_addr; - } -@@ -79,7 +103,7 @@ aarch64_frob_update_context (struct _Unwind_Context *context, - { - if (fs->regs.reg[DWARF_REGNUM_AARCH64_RA_STATE].loc.offset & 0x1) - /* The flag is used for re-authenticating EH handler's address. */ -- context->flags |= RA_A_SIGNED_BIT; -+ context->flags |= RA_SIGNED_BIT; - - return; - } -diff --git a/libgcc/unwind-dw2-fde.c b/libgcc/unwind-dw2-fde.c -index 24b4ecee6..40ebf85a9 100644 ---- a/libgcc/unwind-dw2-fde.c -+++ b/libgcc/unwind-dw2-fde.c -@@ -334,6 +334,9 @@ get_cie_encoding (const struct dwarf_cie *cie) - /* LSDA encoding. */ - else if (*aug == 'L') - p++; -+ /* aarch64 b-key pointer authentication. */ -+ else if (*aug == 'B') -+ p++; - /* Otherwise end of string, or unknown augmentation. */ - else - return DW_EH_PE_absptr; -diff --git a/libgcc/unwind-dw2.c b/libgcc/unwind-dw2.c -index e6130af2f..e76a1cbc4 100644 ---- a/libgcc/unwind-dw2.c -+++ b/libgcc/unwind-dw2.c -@@ -136,8 +136,9 @@ struct _Unwind_Context - #define SIGNAL_FRAME_BIT ((~(_Unwind_Word) 0 >> 1) + 1) - /* Context which has version/args_size/by_value fields. */ - #define EXTENDED_CONTEXT_BIT ((~(_Unwind_Word) 0 >> 2) + 1) -- /* Bit reserved on AArch64, return address has been signed with A key. */ --#define RA_A_SIGNED_BIT ((~(_Unwind_Word) 0 >> 3) + 1) -+ /* Bit reserved on AArch64, return address has been signed with A or B -+ key. */ -+#define RA_SIGNED_BIT ((~(_Unwind_Word) 0 >> 3) + 1) - _Unwind_Word flags; - /* 0 for now, can be increased when further fields are added to - struct _Unwind_Context. */ -@@ -502,6 +503,11 @@ extract_cie_info (const struct dwarf_cie *cie, struct _Unwind_Context *context, - fs->signal_frame = 1; - aug += 1; - } -+ /* aarch64 B-key pointer authentication. */ -+ else if (aug[0] == 'B') -+ { -+ aug += 1; -+ } - - /* Otherwise we have an unknown augmentation string. - Bail unless we saw a 'z' prefix. */ diff --git a/change-gcc-BASE-VER.patch b/change-gcc-BASE-VER.patch deleted file mode 100644 index 95e83240def0e4783c4e880d41f9d0bf814086d1..0000000000000000000000000000000000000000 --- a/change-gcc-BASE-VER.patch +++ /dev/null @@ -1,25 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-Bump-BASE-VER-to-9.3.1.patch -9f26e5863a75744bbee1479792ecae084a3ceb20 - -diff -Nurp a/gcc/BASE-VER b/gcc/BASE-VER ---- a/gcc/BASE-VER 2020-08-19 10:47:14.100000000 +0800 -+++ b/gcc/BASE-VER 2020-08-19 10:32:30.380000000 +0800 -@@ -1 +1 @@ --9.3.0 -+9.3.1 -diff -Nurp a/gcc/Makefile.in b/gcc/Makefile.in ---- a/gcc/Makefile.in 2020-08-19 10:32:45.528000000 +0800 -+++ b/gcc/Makefile.in 2020-08-19 10:34:24.968000000 +0800 -@@ -885,8 +885,7 @@ PATCHLEVEL_c := \ - # significant - do not remove it. - BASEVER_s := "\"$(BASEVER_c)\"" - DEVPHASE_s := "\"$(if $(DEVPHASE_c), ($(DEVPHASE_c)))\"" --DATESTAMP_s := \ -- "\"$(if $(DEVPHASE_c)$(filter-out 0,$(PATCHLEVEL_c)), $(DATESTAMP_c))\"" -+DATESTAMP_s := "\"\"" - PKGVERSION_s:= "\"@PKGVERSION@\"" - BUGURL_s := "\"@REPORT_BUGS_TO@\"" - diff --git a/complete-struct-reorg.patch b/complete-struct-reorg.patch deleted file mode 100644 index 10b5a35b66819977703ffcae628725a3f4e1b437..0000000000000000000000000000000000000000 --- a/complete-struct-reorg.patch +++ /dev/null @@ -1,1834 +0,0 @@ -diff -Nurp a/gcc/ipa-struct-reorg/escapes.def b/gcc/ipa-struct-reorg/escapes.def ---- a/gcc/ipa-struct-reorg/escapes.def 2020-09-17 02:26:36.900000000 -0400 -+++ b/gcc/ipa-struct-reorg/escapes.def 2020-09-17 02:59:19.308000000 -0400 -@@ -56,5 +56,7 @@ DEF_ESCAPE (escape_non_optimize, "Type u - DEF_ESCAPE (escape_array, "Type is used in an array [not handled yet]") - DEF_ESCAPE (escape_ptr_ptr, "Type is used in a pointer to a pointer [not handled yet]") - DEF_ESCAPE (escape_return, "Type escapes via a return [not handled yet]") -+DEF_ESCAPE (escape_separate_instance, "Type escapes via a separate instance") -+DEF_ESCAPE (escape_unhandled_rewrite, "Type escapes via a unhandled rewrite stmt") - - #undef DEF_ESCAPE -diff -Nurp a/gcc/ipa-struct-reorg/ipa-struct-reorg.c b/gcc/ipa-struct-reorg/ipa-struct-reorg.c ---- a/gcc/ipa-struct-reorg/ipa-struct-reorg.c 2020-09-17 02:58:59.540000000 -0400 -+++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.c 2020-09-17 04:55:03.992000000 -0400 -@@ -95,6 +95,7 @@ along with GCC; see the file COPYING3. - #include "ipa-struct-reorg.h" - #include "tree-eh.h" - #include "bitmap.h" -+#include "cfgloop.h" - #include "ipa-param-manipulation.h" - #include "tree-ssa-live.h" /* For remove_unused_locals. */ - -@@ -103,6 +104,7 @@ along with GCC; see the file COPYING3. - namespace { - - using namespace struct_reorg; -+using namespace struct_relayout; - - /* Return true iff TYPE is stdarg va_list type. */ - -@@ -158,6 +160,14 @@ handled_type (tree type) - return false; - } - -+enum srmode -+{ -+ NORMAL = 0, -+ COMPLETE_STRUCT_RELAYOUT -+}; -+ -+static bool is_result_of_mult (tree arg, tree *num, tree struct_size); -+ - } // anon namespace - - namespace struct_reorg { -@@ -247,7 +257,8 @@ srtype::srtype (tree type) - : type (type), - chain_type (false), - escapes (does_not_escape), -- visited (false) -+ visited (false), -+ has_alloc_array (0) - { - for (int i = 0; i < max_split; i++) - newtype[i] = NULL_TREE; -@@ -447,13 +458,6 @@ srtype::dump (FILE *f) - fn->simple_dump (f); - } - fprintf (f, "\n }\n"); -- fprintf (f, "\n field_sites = {"); -- FOR_EACH_VEC_ELT (field_sites, i, field) -- { -- fprintf (f, " \n"); -- field->simple_dump (f); -- } -- fprintf (f, "\n }\n"); - fprintf (f, "}\n"); - } - -@@ -804,12 +808,6 @@ srfield::dump (FILE *f) - fprintf (f, ", offset = " HOST_WIDE_INT_PRINT_DEC, offset); - fprintf (f, ", type = "); - print_generic_expr (f, fieldtype); -- if (type) -- { -- fprintf (f, "( srtype = "); -- type->simple_dump (f); -- fprintf (f, ")"); -- } - fprintf (f, "\n}\n"); - } - -@@ -819,7 +817,10 @@ srfield::dump (FILE *f) - void - srfield::simple_dump (FILE *f) - { -- fprintf (f, "field (%d)", DECL_UID (fielddecl)); -+ if (fielddecl) -+ { -+ fprintf (f, "field (%d)", DECL_UID (fielddecl)); -+ } - } - - /* Dump out the access structure to FILE. */ -@@ -863,21 +864,120 @@ srdecl::dump (FILE *file) - - } // namespace struct_reorg - -+namespace struct_relayout { -+ -+/* Complete Structure Relayout Optimization. -+ It reorganizes all structure members, and puts same member together. -+ struct s { -+ long a; -+ int b; -+ struct s* c; -+ }; -+ Array looks like -+ abcabcabcabc... -+ will be transformed to -+ aaaa...bbbb...cccc... -+*/ -+ -+#define GPTR_SIZE(i) \ -+ TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (gptr[i]))) -+ -+unsigned transformed = 0; -+ -+unsigned -+csrtype::calculate_field_num (tree field_offset) -+{ -+ if (field_offset == NULL) -+ { -+ return 0; -+ } -+ -+ HOST_WIDE_INT off = int_byte_position (field_offset); -+ unsigned i = 1; -+ for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) -+ { -+ if (off == int_byte_position (field)) -+ { -+ return i; -+ } -+ i++; -+ } -+ return 0; -+} -+ -+void -+csrtype::init_type_info (void) -+{ -+ if (!type) -+ { -+ return; -+ } -+ new_size = old_size = tree_to_uhwi (TYPE_SIZE_UNIT (type)); -+ -+ /* Close enough to pad to improve performance. -+ 33~63 should pad to 64 but 33~48 (first half) are too far away, and -+ 65~127 should pad to 128 but 65~96 (first half) are too far away. */ -+ if (old_size > 48 && old_size < 64) -+ { -+ new_size = 64; -+ } -+ if (old_size > 96 && old_size < 128) -+ { -+ new_size = 128; -+ } -+ -+ /* For performance reasons, only allow structure size -+ that is a power of 2 and not too big. */ -+ if (new_size != 1 && new_size != 2 -+ && new_size != 4 && new_size != 8 -+ && new_size != 16 && new_size != 32 -+ && new_size != 64 && new_size != 128) -+ { -+ new_size = 0; -+ field_count = 0; -+ return; -+ } -+ -+ unsigned i = 0; -+ for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) -+ { -+ i++; -+ } -+ field_count = i; -+ -+ struct_size = build_int_cstu (TREE_TYPE (TYPE_SIZE_UNIT (type)), -+ new_size); -+ -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ { -+ fprintf (dump_file, "Type: "); -+ print_generic_expr (dump_file, type); -+ fprintf (dump_file, " has %d members.\n", field_count); -+ fprintf (dump_file, "Modify struct size from %ld to %ld.\n", -+ old_size, new_size); -+ } -+} -+ -+} // namespace struct_relayout -+ - namespace { - -+/* Structure definition for ipa_struct_reorg and ipa_struct_relayout. */ -+ - struct ipa_struct_reorg - { -+public: - // Constructors - ipa_struct_reorg(void) - : current_function (NULL), -- done_recording(false) -+ done_recording (false), -+ current_mode (NORMAL) - { - } - -- // public methods -- unsigned execute(void); -+ unsigned execute (enum srmode mode); - void mark_type_as_escape (tree type, escape_type, gimple *stmt = NULL); --private: -+ - // fields - auto_vec_del types; - auto_vec_del functions; -@@ -885,8 +985,8 @@ private: - srfunction *current_function; - - bool done_recording; -+ srmode current_mode; - -- // private methods - void dump_types (FILE *f); - void dump_types_escaped (FILE *f); - void dump_functions (FILE *f); -@@ -916,6 +1016,7 @@ private: - void maybe_record_allocation_site (cgraph_node *, gimple *); - void record_stmt_expr (tree expr, cgraph_node *node, gimple *stmt); - void mark_expr_escape(tree, escape_type, gimple *stmt); -+ bool handled_allocation_stmt (gimple *stmt); - tree allocate_size (srtype *t, gimple *stmt); - - void mark_decls_in_as_not_needed (tree fn); -@@ -931,6 +1032,7 @@ private: - bool get_type_field (tree expr, tree &base, bool &indirect, srtype *&type, srfield *&field, bool &realpart, bool &imagpart, bool &address, bool should_create = false, bool can_escape = false); - bool wholeaccess (tree expr, tree base, tree accesstype, srtype *t); - -+ void check_alloc_num (gimple *stmt, srtype *type); - void check_definition (srdecl *decl, vec&); - void check_uses (srdecl *decl, vec&); - void check_use (srdecl *decl, gimple *stmt, vec&); -@@ -943,8 +1045,644 @@ private: - - bool has_rewritten_type (srfunction*); - void maybe_mark_or_record_other_side (tree side, tree other, gimple *stmt); -+ unsigned execute_struct_relayout (void); - }; - -+struct ipa_struct_relayout -+{ -+public: -+ // fields -+ tree gptr[max_relayout_split + 1]; -+ csrtype ctype; -+ ipa_struct_reorg *sr; -+ cgraph_node *current_node; -+ -+ // Constructors -+ ipa_struct_relayout (tree type, ipa_struct_reorg *sr_) -+ { -+ ctype.type = type; -+ sr = sr_; -+ current_node = NULL; -+ for (int i = 0; i < max_relayout_split + 1; i++) -+ { -+ gptr[i] = NULL; -+ } -+ } -+ -+ // Methods -+ tree create_new_vars (tree type, const char *name); -+ void create_global_ptrs (void); -+ unsigned int rewrite (void); -+ void rewrite_stmt_in_function (void); -+ bool rewrite_debug (gimple *stmt, gimple_stmt_iterator *gsi); -+ bool rewrite_stmt (gimple *stmt, gimple_stmt_iterator *gsi); -+ bool handled_allocation_stmt (gcall *stmt); -+ void init_global_ptrs (gcall *stmt, gimple_stmt_iterator *gsi); -+ bool check_call_uses (gcall *stmt); -+ bool rewrite_call (gcall *stmt, gimple_stmt_iterator *gsi); -+ tree create_ssa (tree node, gimple_stmt_iterator *gsi); -+ bool is_candidate (tree xhs); -+ tree rewrite_address (tree xhs, gimple_stmt_iterator *gsi); -+ tree rewrite_offset (tree offset, HOST_WIDE_INT num); -+ bool rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi); -+ bool maybe_rewrite_cst (tree cst, gimple_stmt_iterator *gsi, -+ HOST_WIDE_INT ×); -+ unsigned int execute (void); -+}; -+ -+} // anon namespace -+ -+namespace { -+ -+/* Methods for ipa_struct_relayout. */ -+ -+static void -+set_var_attributes (tree var) -+{ -+ if (!var) -+ { -+ return; -+ } -+ gcc_assert (TREE_CODE (var) == VAR_DECL); -+ -+ DECL_ARTIFICIAL (var) = 1; -+ DECL_EXTERNAL (var) = 0; -+ TREE_STATIC (var) = 1; -+ TREE_PUBLIC (var) = 0; -+ TREE_USED (var) = 1; -+ DECL_CONTEXT (var) = NULL; -+ TREE_THIS_VOLATILE (var) = 0; -+ TREE_ADDRESSABLE (var) = 0; -+ TREE_READONLY (var) = 0; -+ if (is_global_var (var)) -+ { -+ set_decl_tls_model (var, TLS_MODEL_NONE); -+ } -+} -+ -+tree -+ipa_struct_relayout::create_new_vars (tree type, const char *name) -+{ -+ gcc_assert (type); -+ tree new_type = build_pointer_type (type); -+ -+ tree new_name = NULL; -+ if (name) -+ { -+ new_name = get_identifier (name); -+ } -+ -+ tree new_var = build_decl (UNKNOWN_LOCATION, VAR_DECL, new_name, new_type); -+ -+ /* set new_var's attributes. */ -+ set_var_attributes (new_var); -+ -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ { -+ fprintf (dump_file, "Created new var: "); -+ print_generic_expr (dump_file, new_var); -+ fprintf (dump_file, "\n"); -+ } -+ return new_var; -+} -+ -+void -+ipa_struct_relayout::create_global_ptrs (void) -+{ -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ { -+ fprintf (dump_file, "Create global gptrs: {\n"); -+ } -+ -+ char *gptr0_name = NULL; -+ const char *type_name = get_type_name (ctype.type); -+ -+ if (type_name) -+ { -+ gptr0_name = concat (type_name, "_gptr0", NULL); -+ } -+ tree var_gptr0 = create_new_vars (ctype.type, gptr0_name); -+ gptr[0] = var_gptr0; -+ varpool_node::add (var_gptr0); -+ -+ unsigned i = 1; -+ for (tree field = TYPE_FIELDS (ctype.type); field; -+ field = DECL_CHAIN (field)) -+ { -+ if (TREE_CODE (field) == FIELD_DECL) -+ { -+ tree type = TREE_TYPE (field); -+ -+ char *name = NULL; -+ char id[10] = {0}; -+ sprintf (id, "%d", i); -+ const char *decl_name = IDENTIFIER_POINTER (DECL_NAME (field)); -+ -+ if (type_name && decl_name) -+ { -+ name = concat (type_name, "_", decl_name, "_gptr", id, NULL); -+ } -+ tree var = create_new_vars (type, name); -+ -+ gptr[i] = var; -+ varpool_node::add (var); -+ i++; -+ } -+ } -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ { -+ fprintf (dump_file, "\nTotally create %d gptrs. }\n\n", i); -+ } -+ gcc_assert (ctype.field_count == i - 1); -+} -+ -+void -+ipa_struct_relayout::rewrite_stmt_in_function (void) -+{ -+ gcc_assert (cfun); -+ -+ basic_block bb = NULL; -+ gimple_stmt_iterator si; -+ FOR_EACH_BB_FN (bb, cfun) -+ { -+ for (si = gsi_start_bb (bb); !gsi_end_p (si);) -+ { -+ gimple *stmt = gsi_stmt (si); -+ if (rewrite_stmt (stmt, &si)) -+ { -+ gsi_remove (&si, true); -+ } -+ else -+ { -+ gsi_next (&si); -+ } -+ } -+ } -+ -+ /* Debug statements need to happen after all other statements -+ have changed. */ -+ FOR_EACH_BB_FN (bb, cfun) -+ { -+ for (si = gsi_start_bb (bb); !gsi_end_p (si);) -+ { -+ gimple *stmt = gsi_stmt (si); -+ if (gimple_code (stmt) == GIMPLE_DEBUG -+ && rewrite_debug (stmt, &si)) -+ { -+ gsi_remove (&si, true); -+ } -+ else -+ { -+ gsi_next (&si); -+ } -+ } -+ } -+} -+ -+unsigned int -+ipa_struct_relayout::rewrite (void) -+{ -+ cgraph_node *cnode = NULL; -+ function *fn = NULL; -+ FOR_EACH_FUNCTION (cnode) -+ { -+ if (!cnode->real_symbol_p () || !cnode->has_gimple_body_p ()) -+ { -+ continue; -+ } -+ if (cnode->definition) -+ { -+ fn = DECL_STRUCT_FUNCTION (cnode->decl); -+ if (fn == NULL) -+ { -+ continue; -+ } -+ -+ current_node = cnode; -+ push_cfun (fn); -+ -+ rewrite_stmt_in_function (); -+ -+ update_ssa (TODO_update_ssa_only_virtuals); -+ -+ if (flag_tree_pta) -+ { -+ compute_may_aliases (); -+ } -+ -+ remove_unused_locals (); -+ -+ cgraph_edge::rebuild_edges (); -+ -+ free_dominance_info (CDI_DOMINATORS); -+ -+ pop_cfun (); -+ current_node = NULL; -+ } -+ } -+ return TODO_verify_all; -+} -+ -+bool -+ipa_struct_relayout::rewrite_debug (gimple *stmt, gimple_stmt_iterator *gsi) -+{ -+ /* Delete debug gimple now. */ -+ return true; -+} -+ -+bool -+ipa_struct_relayout::rewrite_stmt (gimple *stmt, gimple_stmt_iterator *gsi) -+{ -+ switch (gimple_code (stmt)) -+ { -+ case GIMPLE_ASSIGN: -+ return rewrite_assign (as_a (stmt), gsi); -+ case GIMPLE_CALL: -+ return rewrite_call (as_a (stmt), gsi); -+ default: -+ break; -+ } -+ return false; -+} -+ -+bool -+ipa_struct_relayout::handled_allocation_stmt (gcall *stmt) -+{ -+ if (gimple_call_builtin_p (stmt, BUILT_IN_CALLOC)) -+ { -+ return true; -+ } -+ return false; -+} -+ -+void -+ipa_struct_relayout::init_global_ptrs (gcall *stmt, gimple_stmt_iterator *gsi) -+{ -+ gcc_assert (handled_allocation_stmt (stmt)); -+ -+ tree lhs = gimple_call_lhs (stmt); -+ -+ /* Case that gimple is at the end of bb. */ -+ if (gsi_one_before_end_p (*gsi)) -+ { -+ gassign *gptr0 = gimple_build_assign (gptr[0], lhs); -+ gsi_insert_after (gsi, gptr0, GSI_SAME_STMT); -+ } -+ gsi_next (gsi); -+ -+ /* Emit gimple gptr0 = _X and gptr1 = _X. */ -+ gassign *gptr0 = gimple_build_assign (gptr[0], lhs); -+ gsi_insert_before (gsi, gptr0, GSI_SAME_STMT); -+ gassign *gptr1 = gimple_build_assign (gptr[1], lhs); -+ gsi_insert_before (gsi, gptr1, GSI_SAME_STMT); -+ -+ /* Emit gimple gptr_[i] = gptr_[i-1] + _Y[gap]. */ -+ for (unsigned i = 2; i <= ctype.field_count; i++) -+ { -+ gimple *new_stmt = NULL; -+ tree gptr_i_prev_ssa = create_ssa (gptr[i-1], gsi); -+ tree gptr_i_ssa = make_ssa_name (TREE_TYPE (gptr[i-1])); -+ -+ /* Emit gimple _Y[gap] = N * sizeof (member). */ -+ tree member_gap = gimplify_build2 (gsi, MULT_EXPR, -+ long_unsigned_type_node, -+ gimple_call_arg (stmt, 0), -+ GPTR_SIZE (i-1)); -+ -+ new_stmt = gimple_build_assign (gptr_i_ssa, POINTER_PLUS_EXPR, -+ gptr_i_prev_ssa, member_gap); -+ gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); -+ -+ gassign *gptr_i = gimple_build_assign (gptr[i], gptr_i_ssa); -+ gsi_insert_before (gsi, gptr_i, GSI_SAME_STMT); -+ } -+ gsi_prev (gsi); -+} -+ -+bool -+ipa_struct_relayout::check_call_uses (gcall *stmt) -+{ -+ gcc_assert (current_node); -+ srfunction *fn = sr->find_function (current_node); -+ tree lhs = gimple_call_lhs (stmt); -+ -+ if (fn == NULL) -+ { -+ return false; -+ } -+ -+ srdecl *d = fn->find_decl (lhs); -+ if (d == NULL) -+ { -+ return false; -+ } -+ if (types_compatible_p (d->type->type, ctype.type)) -+ { -+ return true; -+ } -+ -+ return false; -+} -+ -+bool -+ipa_struct_relayout::rewrite_call (gcall *stmt, gimple_stmt_iterator *gsi) -+{ -+ if (handled_allocation_stmt (stmt)) -+ { -+ /* Rewrite stmt _X = calloc (N, sizeof (struct)). */ -+ tree size = gimple_call_arg (stmt, 1); -+ if (TREE_CODE (size) != INTEGER_CST) -+ { -+ return false; -+ } -+ if (tree_to_uhwi (size) != ctype.old_size) -+ { -+ return false; -+ } -+ if (!check_call_uses (stmt)) -+ { -+ return false; -+ } -+ -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ { -+ fprintf (dump_file, "Rewrite allocation call:\n"); -+ print_gimple_stmt (dump_file, stmt, 0); -+ fprintf (dump_file, "to\n"); -+ } -+ -+ /* Modify sizeof (struct). */ -+ gimple_call_set_arg (stmt, 1, ctype.struct_size); -+ update_stmt (stmt); -+ -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ { -+ print_gimple_stmt (dump_file, stmt, 0); -+ fprintf (dump_file, "\n"); -+ } -+ -+ init_global_ptrs (stmt, gsi); -+ } -+ return false; -+} -+ -+tree -+ipa_struct_relayout::create_ssa (tree node, gimple_stmt_iterator *gsi) -+{ -+ gcc_assert (TREE_CODE (node) == VAR_DECL); -+ tree node_ssa = make_ssa_name (TREE_TYPE (node)); -+ gassign *stmt = gimple_build_assign (node_ssa, node); -+ gsi_insert_before (gsi, stmt, GSI_SAME_STMT); -+ return node_ssa; -+} -+ -+bool -+ipa_struct_relayout::is_candidate (tree xhs) -+{ -+ if (TREE_CODE (xhs) != COMPONENT_REF) -+ { -+ return false; -+ } -+ tree mem = TREE_OPERAND (xhs, 0); -+ if (TREE_CODE (mem) == MEM_REF) -+ { -+ tree type = TREE_TYPE (mem); -+ if (types_compatible_p (type, ctype.type)) -+ { -+ return true; -+ } -+ } -+ return false; -+} -+ -+tree -+ipa_struct_relayout::rewrite_address (tree xhs, gimple_stmt_iterator *gsi) -+{ -+ tree mem_ref = TREE_OPERAND (xhs, 0); -+ tree pointer = TREE_OPERAND (mem_ref, 0); -+ tree pointer_offset = TREE_OPERAND (mem_ref, 1); -+ tree field = TREE_OPERAND (xhs, 1); -+ -+ tree pointer_ssa = fold_convert (long_unsigned_type_node, pointer); -+ tree gptr0_ssa = fold_convert (long_unsigned_type_node, gptr[0]); -+ -+ /* Emit gimple _X1 = ptr - gptr0. */ -+ tree step1 = gimplify_build2 (gsi, MINUS_EXPR, long_unsigned_type_node, -+ pointer_ssa, gptr0_ssa); -+ -+ /* Emit gimple _X2 = _X1 / sizeof (struct). */ -+ tree step2 = gimplify_build2 (gsi, TRUNC_DIV_EXPR, long_unsigned_type_node, -+ step1, ctype.struct_size); -+ -+ unsigned field_num = ctype.calculate_field_num (field); -+ gcc_assert (field_num > 0 && field_num <= ctype.field_count); -+ -+ /* Emit gimple _X3 = _X2 * sizeof (member). */ -+ tree step3 = gimplify_build2 (gsi, MULT_EXPR, long_unsigned_type_node, -+ step2, GPTR_SIZE (field_num)); -+ -+ /* Emit gimple _X4 = gptr[I]. */ -+ tree gptr_field_ssa = create_ssa (gptr[field_num], gsi); -+ tree new_address = make_ssa_name (TREE_TYPE (gptr[field_num])); -+ gassign *new_stmt = gimple_build_assign (new_address, POINTER_PLUS_EXPR, -+ gptr_field_ssa, step3); -+ gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); -+ -+ /* MEM_REF with nonzero offset like -+ MEM[ptr + sizeof (struct)] = 0B -+ should be transformed to -+ MEM[gptr + sizeof (member)] = 0B -+ */ -+ HOST_WIDE_INT size -+ = tree_to_shwi (TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (new_address)))); -+ tree new_size = rewrite_offset (pointer_offset, size); -+ if (new_size) -+ { -+ TREE_OPERAND (mem_ref, 1) = new_size; -+ } -+ -+ /* Update mem_ref pointer. */ -+ TREE_OPERAND (mem_ref, 0) = new_address; -+ -+ /* Update mem_ref TREE_TYPE. */ -+ TREE_TYPE (mem_ref) = TREE_TYPE (TREE_TYPE (new_address)); -+ -+ return mem_ref; -+} -+ -+tree -+ipa_struct_relayout::rewrite_offset (tree offset, HOST_WIDE_INT num) -+{ -+ if (TREE_CODE (offset) == INTEGER_CST) -+ { -+ bool sign = false; -+ HOST_WIDE_INT off = TREE_INT_CST_LOW (offset); -+ if (off == 0) -+ { -+ return NULL; -+ } -+ if (off < 0) -+ { -+ off = -off; -+ sign = true; -+ } -+ if (off % ctype.old_size == 0) -+ { -+ HOST_WIDE_INT times = off / ctype.old_size; -+ times = sign ? -times : times; -+ return build_int_cst (TREE_TYPE (offset), num * times); -+ } -+ } -+ return NULL; -+} -+ -+#define REWRITE_ASSIGN_TREE_IN_STMT(node) \ -+do \ -+{ \ -+ tree node = gimple_assign_##node (stmt); \ -+ if (node && is_candidate (node)) \ -+ { \ -+ tree mem_ref = rewrite_address (node, gsi); \ -+ gimple_assign_set_##node (stmt, mem_ref); \ -+ update_stmt (stmt); \ -+ } \ -+} while (0) -+ -+/* COMPONENT_REF = exp => MEM_REF = exp -+ / \ / \ -+ MEM_REF field gptr offset -+ / \ -+ pointer offset -+*/ -+bool -+ipa_struct_relayout::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi) -+{ -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ { -+ fprintf (dump_file, "Maybe rewrite assign:\n"); -+ print_gimple_stmt (dump_file, stmt, 0); -+ fprintf (dump_file, "to\n"); -+ } -+ -+ switch (gimple_num_ops (stmt)) -+ { -+ case 4: REWRITE_ASSIGN_TREE_IN_STMT (rhs3); // FALLTHRU -+ case 3: -+ { -+ REWRITE_ASSIGN_TREE_IN_STMT (rhs2); -+ tree rhs2 = gimple_assign_rhs2 (stmt); -+ if (rhs2 && TREE_CODE (rhs2) == INTEGER_CST) -+ { -+ /* Handle pointer++ and pointer-- or -+ factor is euqal to struct size. */ -+ HOST_WIDE_INT times = 1; -+ if (maybe_rewrite_cst (rhs2, gsi, times)) -+ { -+ tree tmp = build_int_cst ( -+ TREE_TYPE (TYPE_SIZE_UNIT (ctype.type)), -+ ctype.new_size * times); -+ gimple_assign_set_rhs2 (stmt, tmp); -+ update_stmt (stmt); -+ } -+ } -+ } // FALLTHRU -+ case 2: REWRITE_ASSIGN_TREE_IN_STMT (rhs1); // FALLTHRU -+ case 1: REWRITE_ASSIGN_TREE_IN_STMT (lhs); // FALLTHRU -+ case 0: break; -+ default: gcc_unreachable (); -+ } -+ -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ { -+ print_gimple_stmt (dump_file, stmt, 0); -+ fprintf (dump_file, "\n"); -+ } -+ return false; -+} -+ -+bool -+ipa_struct_relayout::maybe_rewrite_cst (tree cst, gimple_stmt_iterator *gsi, -+ HOST_WIDE_INT ×) -+{ -+ bool ret = false; -+ gcc_assert (TREE_CODE (cst) == INTEGER_CST); -+ -+ gimple *stmt = gsi_stmt (*gsi); -+ if (gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR) -+ { -+ tree lhs = gimple_assign_lhs (stmt); -+ tree rhs1 = gimple_assign_rhs1 (stmt); -+ if (types_compatible_p (inner_type (TREE_TYPE (rhs1)), ctype.type) -+ || types_compatible_p (inner_type (TREE_TYPE (lhs)), ctype.type)) -+ { -+ tree num = NULL; -+ if (is_result_of_mult (cst, &num, TYPE_SIZE_UNIT (ctype.type))) -+ { -+ times = TREE_INT_CST_LOW (num); -+ return true; -+ } -+ } -+ } -+ -+ if (gimple_assign_rhs_code (stmt) == MULT_EXPR) -+ { -+ if (gsi_one_before_end_p (*gsi)) -+ { -+ return false; -+ } -+ gsi_next (gsi); -+ gimple *stmt2 = gsi_stmt (*gsi); -+ -+ if (gimple_code (stmt2) == GIMPLE_ASSIGN -+ && gimple_assign_rhs_code (stmt2) == POINTER_PLUS_EXPR) -+ { -+ tree lhs = gimple_assign_lhs (stmt2); -+ tree rhs1 = gimple_assign_rhs1 (stmt2); -+ if (types_compatible_p (inner_type (TREE_TYPE (rhs1)), ctype.type) -+ || types_compatible_p (inner_type (TREE_TYPE (lhs)), ctype.type)) -+ { -+ tree num = NULL; -+ if (is_result_of_mult (cst, &num, TYPE_SIZE_UNIT (ctype.type))) -+ { -+ times = TREE_INT_CST_LOW (num); -+ ret = true; -+ } -+ } -+ } -+ gsi_prev (gsi); -+ return ret; -+ } -+ return false; -+} -+ -+unsigned int -+ipa_struct_relayout::execute (void) -+{ -+ ctype.init_type_info (); -+ if (ctype.field_count < min_relayout_split -+ || ctype.field_count > max_relayout_split) -+ { -+ return 0; -+ } -+ -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ { -+ fprintf (dump_file, "Complete Struct Relayout Type: "); -+ print_generic_expr (dump_file, ctype.type); -+ fprintf (dump_file, "\n"); -+ } -+ transformed++; -+ -+ create_global_ptrs (); -+ return rewrite (); -+} -+ -+} // anon namespace -+ -+namespace { -+ -+/* Methods for ipa_struct_reorg. */ -+ - /* Dump all of the recorded types to file F. */ - - void -@@ -1140,8 +1878,10 @@ ipa_struct_reorg::record_type (tree type - f->type = t1; - t1->add_field_site (f); - } -- if (t1 == type1) -- type1->mark_escape (escape_rescusive_type, NULL); -+ if (t1 == type1 && current_mode != COMPLETE_STRUCT_RELAYOUT) -+ { -+ type1->mark_escape (escape_rescusive_type, NULL); -+ } - } - } - } -@@ -1278,6 +2018,14 @@ ipa_struct_reorg::record_var (tree decl, - else - e = escape_type_volatile_array_or_ptrptr (TREE_TYPE (decl)); - -+ /* Separate instance is hard to trace in complete struct -+ relayout optimization. */ -+ if (current_mode == COMPLETE_STRUCT_RELAYOUT -+ && TREE_CODE (TREE_TYPE (decl)) == RECORD_TYPE) -+ { -+ e = escape_separate_instance; -+ } -+ - if (e != does_not_escape) - type->mark_escape (e, NULL); - } -@@ -1353,7 +2101,8 @@ ipa_struct_reorg::find_vars (gimple *stm - { - case GIMPLE_ASSIGN: - if (gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS -- || gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR) -+ || gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR -+ || gimple_assign_rhs_code (stmt) == NOP_EXPR) - { - tree lhs = gimple_assign_lhs (stmt); - tree rhs = gimple_assign_rhs1 (stmt); -@@ -1378,6 +2127,32 @@ ipa_struct_reorg::find_vars (gimple *stm - current_function->record_decl (t, rhs, -1); - } - } -+ else -+ { -+ /* Because we won't handle these stmts in rewrite phase, -+ just mark these types as escaped. */ -+ switch (gimple_num_ops (stmt)) -+ { -+ case 4: mark_type_as_escape ( -+ TREE_TYPE (gimple_assign_rhs3 (stmt)), -+ escape_unhandled_rewrite, stmt); -+ // FALLTHRU -+ case 3: mark_type_as_escape ( -+ TREE_TYPE (gimple_assign_rhs2 (stmt)), -+ escape_unhandled_rewrite, stmt); -+ // FALLTHRU -+ case 2: mark_type_as_escape ( -+ TREE_TYPE (gimple_assign_rhs1 (stmt)), -+ escape_unhandled_rewrite, stmt); -+ // FALLTHRU -+ case 1: mark_type_as_escape ( -+ TREE_TYPE (gimple_assign_lhs (stmt)), -+ escape_unhandled_rewrite, stmt); -+ // FALLTHRU -+ case 0: break; -+ default: gcc_unreachable (); -+ } -+ } - break; - - case GIMPLE_CALL: -@@ -1459,9 +2234,23 @@ is_result_of_mult (tree arg, tree *num, - /* If we have a integer, just check if it is a multiply of STRUCT_SIZE. */ - if (TREE_CODE (arg) == INTEGER_CST) - { -- if (integer_zerop (size_binop (FLOOR_MOD_EXPR, arg, struct_size))) -+ bool sign = false; -+ HOST_WIDE_INT size = TREE_INT_CST_LOW (arg); -+ if (size < 0) - { -- *num = size_binop (FLOOR_DIV_EXPR, arg, struct_size); -+ size = -size; -+ sign = true; -+ } -+ tree arg2 = build_int_cst (TREE_TYPE (arg), size); -+ if (integer_zerop (size_binop (FLOOR_MOD_EXPR, arg2, struct_size))) -+ { -+ tree number = size_binop (FLOOR_DIV_EXPR, arg2, struct_size); -+ if (sign) -+ { -+ number = build_int_cst (TREE_TYPE (number), -+ -tree_to_shwi (number)); -+ } -+ *num = number; - return true; - } - return false; -@@ -1531,15 +2320,19 @@ is_result_of_mult (tree arg, tree *num, - - /* Return TRUE if STMT is an allocation statement that is handled. */ - --static bool --handled_allocation_stmt (gimple *stmt) -+bool -+ipa_struct_reorg::handled_allocation_stmt (gimple *stmt) - { -- if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC) -- || gimple_call_builtin_p (stmt, BUILT_IN_MALLOC) -- || gimple_call_builtin_p (stmt, BUILT_IN_CALLOC) -- || gimple_call_builtin_p (stmt, BUILT_IN_ALIGNED_ALLOC) -- || gimple_call_builtin_p (stmt, BUILT_IN_ALLOCA) -- || gimple_call_builtin_p (stmt, BUILT_IN_ALLOCA_WITH_ALIGN)) -+ if (current_mode == COMPLETE_STRUCT_RELAYOUT -+ && gimple_call_builtin_p (stmt, BUILT_IN_CALLOC)) -+ return true; -+ if (current_mode != COMPLETE_STRUCT_RELAYOUT -+ && (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC) -+ || gimple_call_builtin_p (stmt, BUILT_IN_MALLOC) -+ || gimple_call_builtin_p (stmt, BUILT_IN_CALLOC) -+ || gimple_call_builtin_p (stmt, BUILT_IN_ALIGNED_ALLOC) -+ || gimple_call_builtin_p (stmt, BUILT_IN_ALLOCA) -+ || gimple_call_builtin_p (stmt, BUILT_IN_ALLOCA_WITH_ALIGN))) - return true; - return false; - } -@@ -1581,7 +2374,8 @@ ipa_struct_reorg::allocate_size (srtype - /* Check that second argument is a constant equal to the size of structure. */ - if (operand_equal_p (arg1, struct_size, 0)) - return size; -- /* Check that first argument is a constant equal to the size of structure. */ -+ /* ??? Check that first argument is a constant -+ equal to the size of structure. */ - if (operand_equal_p (size, struct_size, 0)) - return arg1; - if (dump_file && (dump_flags & TDF_DETAILS)) -@@ -1698,6 +2492,29 @@ ipa_struct_reorg::maybe_record_assign (c - } - } - -+bool -+check_mem_ref_offset (tree expr) -+{ -+ tree num = NULL; -+ bool ret = false; -+ -+ if (TREE_CODE (expr) != MEM_REF) -+ { -+ return false; -+ } -+ -+ /* Try to find the structure size. */ -+ tree field_off = TREE_OPERAND (expr, 1); -+ tree tmp = TREE_OPERAND (expr, 0); -+ if (TREE_CODE (tmp) == ADDR_EXPR) -+ { -+ tmp = TREE_OPERAND (tmp, 0); -+ } -+ tree size = TYPE_SIZE_UNIT (inner_type (TREE_TYPE (tmp))); -+ ret = is_result_of_mult (field_off, &num, size); -+ return ret; -+} -+ - tree - get_ref_base_and_offset (tree &e, HOST_WIDE_INT &offset, bool &realpart, bool &imagpart, tree &accesstype) - { -@@ -1737,7 +2554,10 @@ get_ref_base_and_offset (tree &e, HOST_W - gcc_assert (TREE_CODE (field_off) == INTEGER_CST); - /* So we can mark the types as escaping if different. */ - accesstype = TREE_TYPE (field_off); -- offset += tree_to_uhwi (field_off); -+ if (!check_mem_ref_offset (expr)) -+ { -+ offset += tree_to_uhwi (field_off); -+ } - return TREE_OPERAND (expr, 0); - } - default: -@@ -2114,6 +2934,39 @@ ipa_struct_reorg::check_type_and_push (t - - } - -+void -+ipa_struct_reorg::check_alloc_num (gimple *stmt, srtype *type) -+{ -+ if (current_mode == COMPLETE_STRUCT_RELAYOUT -+ && handled_allocation_stmt (stmt)) -+ { -+ tree arg0 = gimple_call_arg (stmt, 0); -+ basic_block bb = gimple_bb (stmt); -+ cgraph_node *node = current_function->node; -+ if (integer_onep (arg0)) -+ { -+ /* Actually NOT an array, but may ruin other array. */ -+ type->has_alloc_array = -1; -+ } -+ else if (bb->loop_father != NULL -+ && loop_outer (bb->loop_father) != NULL) -+ { -+ /* The allocation is in a loop. */ -+ type->has_alloc_array = -2; -+ } -+ else if (node->callers != NULL) -+ { -+ type->has_alloc_array = -3; -+ } -+ else -+ { -+ type->has_alloc_array = type->has_alloc_array < 0 -+ ? type->has_alloc_array -+ : type->has_alloc_array + 1; -+ } -+ } -+} -+ - /* - 2) Check SSA_NAMEs for non type usages (source or use) (worlist of srdecl) - a) if the SSA_NAME is sourced from a pointer plus, record the pointer and -@@ -2157,6 +3010,7 @@ ipa_struct_reorg::check_definition (srde - if (!handled_allocation_stmt (stmt) - || !allocate_size (type, stmt)) - type->mark_escape (escape_return, stmt); -+ check_alloc_num (stmt, type); - return; - } - /* If the SSA_NAME is sourced from an inline-asm, just mark the type as escaping. */ -@@ -2195,6 +3049,21 @@ ipa_struct_reorg::check_definition (srde - return; - } - -+ if (gimple_assign_rhs_code (stmt) == MAX_EXPR -+ || gimple_assign_rhs_code (stmt) == MIN_EXPR) -+ { -+ tree rhs2 = gimple_assign_rhs2 (stmt); -+ if (TREE_CODE (rhs) == SSA_NAME) -+ { -+ check_type_and_push (rhs, type, worklist, stmt); -+ } -+ if (TREE_CODE (rhs2) == SSA_NAME) -+ { -+ check_type_and_push (rhs2, type, worklist, stmt); -+ } -+ return; -+ } -+ - /* Casts between pointers and integer are escaping. */ - if (gimple_assign_cast_p (stmt)) - { -@@ -2257,6 +3126,13 @@ ipa_struct_reorg::check_other_side (srde - srtype *t1 = find_type (inner_type (t)); - if (t1 == type) - { -+ /* In Complete Struct Relayout opti, if lhs type is the same -+ as rhs type, we could return without any harm. */ -+ if (current_mode == COMPLETE_STRUCT_RELAYOUT) -+ { -+ return; -+ } -+ - tree base; - bool indirect; - srtype *type1; -@@ -2304,8 +3180,11 @@ ipa_struct_reorg::check_use (srdecl *dec - tree rhs1 = gimple_cond_lhs (stmt); - tree rhs2 = gimple_cond_rhs (stmt); - tree orhs = rhs1; -- if (gimple_cond_code (stmt) != EQ_EXPR -- && gimple_cond_code (stmt) != NE_EXPR) -+ enum tree_code code = gimple_cond_code (stmt); -+ if (code != EQ_EXPR && code != NE_EXPR -+ && (current_mode != COMPLETE_STRUCT_RELAYOUT -+ || (code != LT_EXPR && code != LE_EXPR -+ && code != GT_EXPR && code != GE_EXPR))) - { - mark_expr_escape (rhs1, escape_non_eq, stmt); - mark_expr_escape (rhs2, escape_non_eq, stmt); -@@ -2335,8 +3214,11 @@ ipa_struct_reorg::check_use (srdecl *dec - tree rhs1 = gimple_assign_rhs1 (stmt); - tree rhs2 = gimple_assign_rhs2 (stmt); - tree orhs = rhs1; -- if (gimple_assign_rhs_code (stmt) != EQ_EXPR -- && gimple_assign_rhs_code (stmt) != NE_EXPR) -+ enum tree_code code = gimple_assign_rhs_code (stmt); -+ if (code != EQ_EXPR && code != NE_EXPR -+ && (current_mode != COMPLETE_STRUCT_RELAYOUT -+ || (code != LT_EXPR && code != LE_EXPR -+ && code != GT_EXPR && code != GE_EXPR))) - { - mark_expr_escape (rhs1, escape_non_eq, stmt); - mark_expr_escape (rhs2, escape_non_eq, stmt); -@@ -2733,8 +3615,11 @@ ipa_struct_reorg::propagate_escape (void - void - ipa_struct_reorg::prune_escaped_types (void) - { -- detect_cycles (); -- propagate_escape (); -+ if (current_mode != COMPLETE_STRUCT_RELAYOUT) -+ { -+ detect_cycles (); -+ propagate_escape (); -+ } - - if (dump_file && (dump_flags & TDF_DETAILS)) - { -@@ -3856,16 +4741,82 @@ ipa_struct_reorg::rewrite_functions (voi - } - - unsigned int --ipa_struct_reorg::execute (void) -+ipa_struct_reorg::execute_struct_relayout (void) - { -- /* FIXME: If there is a top-level inline-asm, the pass immediately returns. */ -- if (symtab->first_asm_symbol ()) -- return 0; -- record_accesses (); -- prune_escaped_types (); -- analyze_types (); -+ unsigned retval = 0; -+ for (unsigned i = 0; i < types.length (); i++) -+ { -+ tree type = types[i]->type; -+ if (TYPE_FIELDS (type) == NULL) -+ { -+ continue; -+ } -+ if (types[i]->has_alloc_array != 1) -+ { -+ continue; -+ } -+ if (types[i]->chain_type) -+ { -+ continue; -+ } -+ retval |= ipa_struct_relayout (type, this).execute (); -+ } -+ -+ if (dump_file) -+ { -+ if (transformed) -+ { -+ fprintf (dump_file, "\nNumber of structures to transform in " -+ "Complete Structure Relayout is %d\n", transformed); -+ } -+ else -+ { -+ fprintf (dump_file, "\nNo structures to transform in " -+ "Complete Structure Relayout.\n"); -+ } -+ } -+ -+ return retval; -+} -+ -+unsigned int -+ipa_struct_reorg::execute (enum srmode mode) -+{ -+ unsigned int ret = 0; -+ -+ if (mode == NORMAL) -+ { -+ current_mode = NORMAL; -+ /* If there is a top-level inline-asm, -+ the pass immediately returns. */ -+ if (symtab->first_asm_symbol ()) -+ { -+ return 0; -+ } -+ record_accesses (); -+ prune_escaped_types (); -+ analyze_types (); - -- return rewrite_functions (); -+ ret = rewrite_functions (); -+ } -+ else if (mode == COMPLETE_STRUCT_RELAYOUT) -+ { -+ if (dump_file) -+ { -+ fprintf (dump_file, "\n\nTry Complete Struct Relayout:\n"); -+ } -+ current_mode = COMPLETE_STRUCT_RELAYOUT; -+ if (symtab->first_asm_symbol ()) -+ { -+ return 0; -+ } -+ record_accesses (); -+ prune_escaped_types (); -+ -+ ret = execute_struct_relayout (); -+ } -+ -+ return ret; - } - - const pass_data pass_data_ipa_struct_reorg = -@@ -3890,17 +4841,27 @@ public: - - /* opt_pass methods: */ - virtual bool gate (function *); -- virtual unsigned int execute (function *) { return ipa_struct_reorg ().execute(); } -+ virtual unsigned int execute (function *) -+ { -+ unsigned int ret = 0; -+ ret = ipa_struct_reorg ().execute (NORMAL); -+ if (!ret) -+ { -+ ret = ipa_struct_reorg ().execute (COMPLETE_STRUCT_RELAYOUT); -+ } -+ return ret; -+ } - - }; // class pass_ipa_struct_reorg - - bool - pass_ipa_struct_reorg::gate (function *) - { -- return (optimize -+ return (optimize >= 3 - && flag_ipa_struct_reorg - /* Don't bother doing anything if the program has errors. */ -- && !seen_error ()); -+ && !seen_error () -+ && flag_lto_partition == LTO_PARTITION_ONE); - } - - } // anon namespace -diff -Nurp a/gcc/ipa-struct-reorg/ipa-struct-reorg.h b/gcc/ipa-struct-reorg/ipa-struct-reorg.h ---- a/gcc/ipa-struct-reorg/ipa-struct-reorg.h 2020-09-17 02:26:36.904000000 -0400 -+++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.h 2020-09-17 02:59:19.308000000 -0400 -@@ -121,6 +121,7 @@ public: - - tree newtype[max_split]; - bool visited; -+ int has_alloc_array; - - // Constructors - srtype(tree type); -@@ -232,4 +233,34 @@ struct srdecl - - } // namespace struct_reorg - -+ -+namespace struct_relayout { -+ -+const int min_relayout_split = 8; -+const int max_relayout_split = 16; -+ -+struct csrtype -+{ -+ tree type; -+ unsigned HOST_WIDE_INT old_size; -+ unsigned HOST_WIDE_INT new_size; -+ unsigned field_count; -+ tree struct_size; -+ -+ // Constructors -+ csrtype () -+ : type (NULL), -+ old_size (0), -+ new_size (0), -+ field_count (0), -+ struct_size (NULL) -+ {} -+ -+ // Methods -+ unsigned calculate_field_num (tree field_offset); -+ void init_type_info (void); -+}; -+ -+} // namespace struct_relayout -+ - #endif -diff -Nurp a/gcc/testsuite/gcc.dg/struct/complete_struct_relayout.c b/gcc/testsuite/gcc.dg/struct/complete_struct_relayout.c ---- a/gcc/testsuite/gcc.dg/struct/complete_struct_relayout.c 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/gcc.dg/struct/complete_struct_relayout.c 2020-09-17 02:59:19.308000000 -0400 -@@ -0,0 +1,60 @@ -+// { dg-do run } -+ -+#include -+#include -+ -+typedef struct node node_t; -+typedef struct node* node_p; -+ -+struct node { -+ unsigned long a; -+ unsigned long b; -+ node_p c; -+ node_p d; -+ long e; -+ long f; -+ long g; -+ long h; -+ long i; -+ long j; -+ long k; -+ long l; -+ int m; -+ int n; -+}; -+ -+const int MAX = 10000; -+node_p n; -+ -+int -+main () -+{ -+ n = (node_p) calloc (MAX, sizeof (node_t)); -+ -+ for (int i = 0; i < MAX; i++) -+ { -+ n[i].a = 100; -+ } -+ for (int i = 0; i < MAX; i++) -+ { -+ if (n[i].a != 100) -+ { -+ abort (); -+ } -+ } -+ -+ for (int i = 0; i < MAX; i++) -+ { -+ n[i].l = n[i].a; -+ } -+ for (int i = 0; i < MAX; i++) -+ { -+ if (n[i].l != 100) -+ { -+ abort (); -+ } -+ } -+ return 0; -+} -+ -+/* { dg-final { scan-ipa-dump "Number of structures to transform in Complete Structure Relayout is 1" "struct_reorg" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/struct/csr_allocation-1.c b/gcc/testsuite/gcc.dg/struct/csr_allocation-1.c ---- a/gcc/testsuite/gcc.dg/struct/csr_allocation-1.c 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/gcc.dg/struct/csr_allocation-1.c 2020-09-17 02:59:19.308000000 -0400 -@@ -0,0 +1,46 @@ -+#include -+#include -+ -+typedef struct node node_t; -+typedef struct node* node_p; -+ -+struct node { -+ unsigned long a; -+ unsigned long b; -+ node_p c; -+ node_p d; -+ long e; -+ long f; -+ long g; -+ long h; -+ long i; -+ long j; -+ long k; -+ long l; -+ int m; -+ int n; -+}; -+ -+const int MAX = 1; -+node_p n; -+ -+int -+main () -+{ -+ n = (node_p) calloc (MAX, sizeof (node_t)); -+ -+ for (int i = 0; i < MAX; i++) -+ { -+ n[i].a = 100; -+ } -+ for (int i = 0; i < MAX; i++) -+ { -+ if (n[i].a != 100) -+ { -+ abort (); -+ } -+ } -+ return 0; -+} -+ -+/* { dg-final { scan-ipa-dump "No structures to transform in Complete Structure Relayout." "struct_reorg" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/struct/csr_allocation-2.c b/gcc/testsuite/gcc.dg/struct/csr_allocation-2.c ---- a/gcc/testsuite/gcc.dg/struct/csr_allocation-2.c 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/gcc.dg/struct/csr_allocation-2.c 2020-09-17 02:59:19.308000000 -0400 -@@ -0,0 +1,59 @@ -+#include -+#include -+ -+typedef struct node node_t; -+typedef struct node* node_p; -+ -+struct node { -+ unsigned long a; -+ unsigned long b; -+ node_p c; -+ node_p d; -+ long e; -+ long f; -+ long g; -+ long h; -+ long i; -+ long j; -+ long k; -+ long l; -+ int m; -+ int n; -+}; -+ -+const int MAX = 10; -+node_p n; -+node_p m; -+ -+int main() -+{ -+ int i; -+ for (i = 0; i < MAX / 5; i++) -+ { -+ n = (node_p) calloc(MAX, sizeof(node_t)); -+ if (i == 0) -+ { -+ m = n; -+ } -+ } -+ -+ for (int i = 0; i < MAX; i++) -+ { -+ n[i].a = 100; -+ } -+ for (int i = 0; i < MAX; i++) -+ { -+ m[i].a = 50; -+ } -+ -+ for (int i = 0; i < MAX; i++) -+ { -+ if (n[i].a != 100) -+ { -+ abort (); -+ } -+ } -+ return 0; -+} -+ -+/* { dg-final { scan-ipa-dump "No structures to transform in Complete Structure Relayout." "struct_reorg" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/struct/csr_allocation-3.c b/gcc/testsuite/gcc.dg/struct/csr_allocation-3.c ---- a/gcc/testsuite/gcc.dg/struct/csr_allocation-3.c 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/gcc.dg/struct/csr_allocation-3.c 2020-09-17 02:59:19.308000000 -0400 -@@ -0,0 +1,77 @@ -+#include -+#include -+ -+typedef struct node node_t; -+typedef struct node* node_p; -+ -+struct node { -+ unsigned long a; -+ unsigned long b; -+ node_p c; -+ node_p d; -+ long e; -+ long f; -+ long g; -+ long h; -+ long i; -+ long j; -+ long k; -+ long l; -+ int m; -+ int n; -+}; -+ -+const int MAX = 10; -+node_p n; -+node_p m; -+ -+void test (int, int) __attribute__((noinline)); -+ -+void -+test (int num, int flag) -+{ -+ if (num <= 0) -+ { -+ return; -+ } -+ n = (node_p) calloc (num, sizeof (node_t)); -+ if (flag) -+ { -+ m = n; -+ } -+ return; -+} -+ -+int -+main () -+{ -+ test (MAX, 1); -+ test (MAX, 0); -+ -+ for (int i = 0; i < MAX; i++) -+ { -+ n[i].a = 100; -+ } -+ for (int i = 0; i < MAX; i++) -+ { -+ m[i].a = 50; -+ } -+ -+ for (int i = 0; i < MAX; i++) -+ { -+ if (n[i].a != 100) -+ { -+ abort (); -+ } -+ } -+ for (int i = 0; i < MAX; i++) -+ { -+ if (m[i].a != 50) -+ { -+ abort (); -+ } -+ } -+ return 0; -+} -+ -+/* { dg-final { scan-ipa-dump "No structures to transform in Complete Structure Relayout." "struct_reorg" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/struct/csr_cast_int.c b/gcc/testsuite/gcc.dg/struct/csr_cast_int.c ---- a/gcc/testsuite/gcc.dg/struct/csr_cast_int.c 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/gcc.dg/struct/csr_cast_int.c 2020-09-17 02:59:19.308000000 -0400 -@@ -0,0 +1,52 @@ -+// { dg-do run } -+ -+#include -+#include -+ -+typedef struct node node_t; -+typedef struct node* node_p; -+ -+struct node { -+ unsigned long a; -+ unsigned long b; -+ node_p c; -+ node_p d; -+ long e; -+ long f; -+ long g; -+ long h; -+ long i; -+ long j; -+ long k; -+ long l; -+ int m; -+ int n; -+}; -+ -+const int MAX = 100; -+node_p n; -+unsigned long y; -+ -+int -+main () -+{ -+ n = (node_p) calloc (MAX, sizeof (node_t)); -+ -+ for (int i = 0; i < MAX; i++) -+ { -+ n[i].b = 50; -+ } -+ -+ node_p x = &n[5]; -+ y = (unsigned long) x; -+ y += 8; -+ -+ if (*((unsigned long*) y) != 50) -+ { -+ abort (); -+ } -+ -+ return 0; -+} -+ -+/* { dg-final { scan-ipa-dump "struct node has escaped: \"Type escapes a cast from/to intergral type\"" "struct_reorg" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/struct/csr_separate_instance.c b/gcc/testsuite/gcc.dg/struct/csr_separate_instance.c ---- a/gcc/testsuite/gcc.dg/struct/csr_separate_instance.c 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/gcc.dg/struct/csr_separate_instance.c 2020-09-17 02:59:19.308000000 -0400 -@@ -0,0 +1,48 @@ -+#include -+#include -+ -+typedef struct node node_t; -+typedef struct node* node_p; -+ -+struct node { -+ unsigned long a; -+ unsigned long b; -+ node_p c; -+ node_p d; -+ long e; -+ long f; -+ long g; -+ long h; -+ long i; -+ long j; -+ long k; -+ long l; -+ int m; -+ int n; -+}; -+ -+const int MAX = 10000; -+node_p n; -+node_t t; -+ -+int -+main () -+{ -+ n = (node_p) calloc (MAX, sizeof (node_t)); -+ t.a = 100; -+ -+ for (int i = 0; i < MAX; i++) -+ { -+ n[i].a = t.a; -+ } -+ for (int i = 0; i < MAX; i++) -+ { -+ if (n[i].a != 100) -+ { -+ abort (); -+ } -+ } -+ return 0; -+} -+ -+/* { dg-final { scan-ipa-dump "struct node has escaped: \"Type escapes via a separate instance\"" "struct_reorg" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/struct/sr_address_of_field.c b/gcc/testsuite/gcc.dg/struct/sr_address_of_field.c ---- a/gcc/testsuite/gcc.dg/struct/sr_address_of_field.c 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/gcc.dg/struct/sr_address_of_field.c 2020-09-17 02:59:19.308000000 -0400 -@@ -0,0 +1,37 @@ -+/* { dg-do run } */ -+ -+static struct S { -+ int *p1; -+ int *p2; -+} s; -+ -+typedef __UINTPTR_TYPE__ uintptr_t; -+ -+int -+foo () -+{ -+ int i = 1; -+ int j = 2; -+ struct S s; -+ int **p; -+ s.p1 = &i; -+ s.p2 = &j; -+ p = &s.p1; -+ uintptr_t pi = (uintptr_t) p; -+ pi = pi + sizeof (int *); -+ p = (int **)pi; -+ **p = 3; -+ return j; -+} -+ -+int -+main () -+{ -+ if (foo () != 3) -+ { -+ __builtin_abort (); -+ } -+ return 0; -+} -+ -+/* { dg-final { scan-ipa-dump "struct S has escaped: \"Type escapes via taking the address of field\"" "struct_reorg" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/struct/sr_maxmin_expr.c b/gcc/testsuite/gcc.dg/struct/sr_maxmin_expr.c ---- a/gcc/testsuite/gcc.dg/struct/sr_maxmin_expr.c 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/gcc.dg/struct/sr_maxmin_expr.c 2020-09-17 02:59:19.308000000 -0400 -@@ -0,0 +1,25 @@ -+// { dg-do compile } -+ -+#include -+ -+struct S { -+ unsigned long a; -+ unsigned long b; -+}; -+ -+struct S* s; -+struct S* t = (struct S*) 1000; -+ -+int -+main () -+{ -+ s = (struct S*) calloc (1000, sizeof (struct S)); -+ s = s > t ? s : t; -+ if (s == 0) -+ { -+ abort (); -+ } -+ return 0; -+} -+ -+/* { dg-final { scan-ipa-dump "No structures to transform." "struct_reorg" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/struct/sr_pointer_minus.c b/gcc/testsuite/gcc.dg/struct/sr_pointer_minus.c ---- a/gcc/testsuite/gcc.dg/struct/sr_pointer_minus.c 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/gcc.dg/struct/sr_pointer_minus.c 2020-09-17 02:59:19.308000000 -0400 -@@ -0,0 +1,33 @@ -+// { dg-do compile } -+ -+#include -+ -+typedef struct node node_t; -+typedef struct node* node_p; -+ -+struct node { -+ unsigned long a; -+ unsigned long b; -+}; -+ -+int max; -+int x; -+ -+node_p n; -+node_p z; -+ -+int -+main () -+{ -+ n = (node_p) calloc (max, sizeof (node_t)); -+ -+ node_p xp = &n[x]; -+ -+ if (xp - z == 10) -+ { -+ abort (); -+ } -+ return 0; -+} -+ -+/* { dg-final { scan-ipa-dump "struct node has escaped: \"Type escapes via a unhandled rewrite stmt\"" "struct_reorg" } } */ diff --git a/cse-in-vectorization.patch b/cse-in-vectorization.patch deleted file mode 100644 index ac8e7a2b865f079fc039dc4639295cb1d0dd88b8..0000000000000000000000000000000000000000 --- a/cse-in-vectorization.patch +++ /dev/null @@ -1,68 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-vect-CSE-for-bump-and-offset-in-strided-load-store-o.patch -4a31a8add56d49867c187d90b3a89e97634543c2 - -diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr95199.c b/gcc/testsuite/gcc.target/aarch64/sve/pr95199.c -new file mode 100644 -index 00000000000..adcd5124a7c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr95199.c -@@ -0,0 +1,17 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O3 -march=armv8.2-a+sve -fdump-tree-vect" } */ -+ -+void -+foo (double *a, double *b, double m, int inc_x, int inc_y) -+{ -+ int ix = 0, iy = 0; -+ for (int i = 0; i < 1000; ++i) -+ { -+ a[ix] += m * b[iy]; -+ ix += inc_x; -+ iy += inc_y; -+ } -+ return ; -+} -+ -+/* { dg-final { scan-tree-dump-times "VEC_SERIES_EXPR" 2 "vect" } } */ -diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c -index 4a0a907fcb4..c9174395fca 100644 ---- a/gcc/tree-vect-stmts.c -+++ b/gcc/tree-vect-stmts.c -@@ -2846,16 +2846,12 @@ vect_get_strided_load_store_ops (stmt_vec_info stmt_info, - tree *dataref_bump, tree *vec_offset) - { - struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); -- struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); - tree vectype = STMT_VINFO_VECTYPE (stmt_info); -- gimple_seq stmts; - - tree bump = size_binop (MULT_EXPR, - fold_convert (sizetype, DR_STEP (dr)), - size_int (TYPE_VECTOR_SUBPARTS (vectype))); -- *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE); -- if (stmts) -- gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); -+ *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump); - - /* The offset given in GS_INFO can have pointer type, so use the element - type of the vector instead. */ -@@ -2866,13 +2862,11 @@ vect_get_strided_load_store_ops (stmt_vec_info stmt_info, - tree step = size_binop (EXACT_DIV_EXPR, DR_STEP (dr), - ssize_int (gs_info->scale)); - step = fold_convert (offset_type, step); -- step = force_gimple_operand (step, &stmts, true, NULL_TREE); - - /* Create {0, X, X*2, X*3, ...}. */ -- *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, offset_vectype, -- build_zero_cst (offset_type), step); -- if (stmts) -- gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); -+ tree offset = fold_build2 (VEC_SERIES_EXPR, offset_vectype, -+ build_zero_cst (offset_type), step); -+ *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset); - } - - /* Return the amount that should be added to a vector pointer to move diff --git a/delete-incorrect-smw.patch b/delete-incorrect-smw.patch deleted file mode 100644 index 44ed526e7b103c68325d54cf5a9922ea20ffcbb8..0000000000000000000000000000000000000000 --- a/delete-incorrect-smw.patch +++ /dev/null @@ -1,58 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-re-PR-middle-end-91195-incorrect-may-be-used-uniniti.patch -06e8db10cd80d88fb3a6afedf2c35da6c1fa6d85 - -diff -uprN a/gcc/testsuite/gcc.dg/pr91195.c b/gcc/testsuite/gcc.dg/pr91195.c -new file mode 100644 ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/pr91195.c -@@ -0,0 +1,25 @@ -+/* PR middle-end/91195 */ -+/* { dg-do compile } */ -+/* { dg-options "-Wmaybe-uninitialized -O2" } */ -+ -+int bar (char*); -+ -+void -+foo (char *x, char *y) -+{ -+ char *a[2]; -+ int b = 0; -+ -+ if (x) -+ a[b++] = x; /* { dg-bogus "may be used uninitialized in this function" } */ -+ if (y) -+ a[b++] = y; -+ -+ for (int j = 0; j < 4; j++) -+ switch (j) -+ { -+ case 0: -+ if (b == 0 || bar (a[0])) -+ break; -+ } -+} -diff -uprN a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c ---- a/gcc/tree-ssa-phiopt.c -+++ b/gcc/tree-ssa-phiopt.c -@@ -2269,6 +2269,10 @@ cond_store_replacement (basic_block middle_bb, basic_block join_bb, - name = make_temp_ssa_name (TREE_TYPE (lhs), NULL, "cstore"); - new_stmt = gimple_build_assign (name, lhs); - gimple_set_location (new_stmt, locus); -+ lhs = unshare_expr (lhs); -+ /* Set TREE_NO_WARNING on the rhs of the load to avoid uninit -+ warnings. */ -+ TREE_NO_WARNING (gimple_assign_rhs1 (new_stmt)) = 1; - gsi_insert_on_edge (e1, new_stmt); - - /* 3) Create a PHI node at the join block, with one argument -@@ -2279,7 +2283,6 @@ cond_store_replacement (basic_block middle_bb, basic_block join_bb, - add_phi_arg (newphi, rhs, e0, locus); - add_phi_arg (newphi, name, e1, locus); - -- lhs = unshare_expr (lhs); - new_stmt = gimple_build_assign (lhs, PHI_RESULT (newphi)); - - /* 4) Insert that PHI node. */ diff --git a/div-opti.patch b/div-opti.patch deleted file mode 100644 index 11519970112084d81ee140759e6f050b5e49d130..0000000000000000000000000000000000000000 --- a/div-opti.patch +++ /dev/null @@ -1,69 +0,0 @@ -From dbf3dc75888623e9d4bb7cc5e9c30caa9b24ffe7 Mon Sep 17 00:00:00 2001 -From: Bu Le -Date: Thu, 12 Mar 2020 22:39:12 +0000 -Subject: [PATCH] aarch64: Add --params to control the number of recip steps - [PR94154] - --mlow-precision-div hard-coded the number of iterations to 2 for double -and 1 for float. This patch adds a --param to control the number. - -2020-03-13 Bu Le - -gcc/ - PR target/94154 - * config/aarch64/aarch64.opt (-param=aarch64-float-recp-precision=) - (-param=aarch64-double-recp-precision=): New options. - * doc/invoke.texi: Document them. - * config/aarch64/aarch64.c (aarch64_emit_approx_div): Use them - instead of hard-coding the choice of 1 for float and 2 for double. ---- - gcc/ChangeLog | 9 +++++++++ - gcc/config/aarch64/aarch64.c | 8 +++++--- - gcc/config/aarch64/aarch64.opt | 9 +++++++++ - gcc/doc/invoke.texi | 11 +++++++++++ - 4 files changed, 34 insertions(+), 3 deletions(-) - -diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c -index c320d5ba51d..2c81f86dd2a 100644 ---- a/gcc/config/aarch64/aarch64.c -+++ b/gcc/config/aarch64/aarch64.c -@@ -12911,10 +12911,12 @@ aarch64_emit_approx_div (rtx quo, rtx num, rtx den) - /* Iterate over the series twice for SF and thrice for DF. */ - int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2; - -- /* Optionally iterate over the series once less for faster performance, -- while sacrificing the accuracy. */ -+ /* Optionally iterate over the series less for faster performance, -+ while sacrificing the accuracy. The default is 2 for DF and 1 for SF. */ - if (flag_mlow_precision_div) -- iterations--; -+ iterations = (GET_MODE_INNER (mode) == DFmode -+ ? PARAM_VALUE (PARAM_AARCH64_DOUBLE_RECP_PRECISION) -+ : PARAM_VALUE (PARAM_AARCH64_FLOAT_RECP_PRECISION)); - - /* Iterate over the series to calculate the approximate reciprocal. */ - rtx xtmp = gen_reg_rtx (mode); - ---- a/gcc/params.def 2020-04-15 17:24:31.984000000 +0800 -+++ b/gcc/params.def 2020-04-15 16:59:21.752000000 +0800 -@@ -1420,6 +1414,17 @@ DEFPARAM(PARAM_SSA_NAME_DEF_CHAIN_LIMIT, - "a value.", - 512, 0, 0) - -+DEFPARAM(PARAM_AARCH64_FLOAT_RECP_PRECISION, -+ "aarch64-float-recp-precision", -+ "The number of Newton iterations for calculating the reciprocal " -+ "for float type. ", -+ 1, 1, 5) -+ -+DEFPARAM(PARAM_AARCH64_DOUBLE_RECP_PRECISION, -+ "aarch64-double-recp-precision", -+ "The number of Newton iterations for calculating the reciprocal " -+ "for double type.", -+ 2, 1, 5) - /* - - Local variables: --- -2.18.2 - diff --git a/dont-generate-IF_THEN_ELSE.patch b/dont-generate-IF_THEN_ELSE.patch deleted file mode 100644 index 16f28a41e48c990e870b587b38018ea55084b40e..0000000000000000000000000000000000000000 --- a/dont-generate-IF_THEN_ELSE.patch +++ /dev/null @@ -1,25 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-combine-Don-t-generate-IF_THEN_ELSE.patch -ddbb5da5199fb421dc398911c37fa7f896efc13f - -diff --git a/gcc/combine.c b/gcc/combine.c -index 4de759a8e6b..ce7aeecb5c2 100644 ---- a/gcc/combine.c -+++ b/gcc/combine.c -@@ -5909,14 +5909,6 @@ combine_simplify_rtx (rtx x, machine_mode op0_mode, int in_dest, - mode, VOIDmode, - cond, cop1), - mode); -- else -- return gen_rtx_IF_THEN_ELSE (mode, -- simplify_gen_relational (cond_code, -- mode, -- VOIDmode, -- cond, -- cop1), -- true_rtx, false_rtx); - - code = GET_CODE (x); - op0_mode = VOIDmode; diff --git a/enable-aarch64-libquadmath.patch b/enable-aarch64-libquadmath.patch deleted file mode 100644 index c45b07822c052d4ac2fed8c8faf41cd0a32bb332..0000000000000000000000000000000000000000 --- a/enable-aarch64-libquadmath.patch +++ /dev/null @@ -1,460 +0,0 @@ -diff -urpN a/libquadmath/Makefile.in b/libquadmath/Makefile.in ---- a/libquadmath/Makefile.in 2020-03-31 09:51:59.000000000 +0800 -+++ b/libquadmath/Makefile.in 2020-04-06 11:52:45.650793256 +0800 -@@ -90,7 +90,7 @@ POST_UNINSTALL = : - build_triplet = @build@ - host_triplet = @host@ - target_triplet = @target@ --@BUILD_LIBQUADMATH_FALSE@libquadmath_la_DEPENDENCIES = -+#libquadmath_la_DEPENDENCIES = - subdir = . - ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 - am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \ -@@ -146,68 +146,68 @@ am__installdirs = "$(DESTDIR)$(toolexecl - "$(DESTDIR)$(libsubincludedir)" - LTLIBRARIES = $(toolexeclib_LTLIBRARIES) - am__dirstamp = $(am__leading_dot)dirstamp --@BUILD_LIBQUADMATH_TRUE@am_libquadmath_la_OBJECTS = math/x2y2m1q.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/acoshq.lo math/fmodq.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/acosq.lo math/frexpq.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/rem_pio2q.lo math/asinhq.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/hypotq.lo math/remainderq.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/asinq.lo math/rintq.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/atan2q.lo math/isinfq.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/roundq.lo math/atanhq.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/isnanq.lo math/scalblnq.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/atanq.lo math/j0q.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/scalbnq.lo math/cbrtq.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/j1q.lo math/signbitq.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/ceilq.lo math/jnq.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/sincos_table.lo math/complex.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/ldexpq.lo math/sincosq.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/copysignq.lo math/lgammaq.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/sincosq_kernel.lo math/coshq.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/llroundq.lo math/sinhq.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/cosq.lo math/log10q.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/sinq.lo math/cosq_kernel.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/log1pq.lo math/sinq_kernel.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/erfq.lo math/logq.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/sqrtq.lo math/expm1q.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/lroundq.lo math/tanhq.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/expq.lo math/modfq.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/tanq.lo math/fabsq.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/nanq.lo math/tgammaq.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/finiteq.lo math/nextafterq.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/truncq.lo math/floorq.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/powq.lo math/fmaq.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/logbq.lo math/exp2q.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/issignalingq.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/lgammaq_neg.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/lgammaq_product.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/tanq_kernel.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/tgammaq_product.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/casinhq_kernel.lo math/cacoshq.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/cacosq.lo math/casinhq.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/casinq.lo math/catanhq.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/catanq.lo math/cimagq.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/conjq.lo math/cprojq.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/crealq.lo math/fdimq.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/fmaxq.lo math/fminq.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/ilogbq.lo math/llrintq.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/log2q.lo math/lrintq.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/nearbyintq.lo math/remquoq.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/ccoshq.lo math/cexpq.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/clog10q.lo math/clogq.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/csinq.lo math/csinhq.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/csqrtq.lo math/ctanq.lo \ --@BUILD_LIBQUADMATH_TRUE@ math/ctanhq.lo printf/addmul_1.lo \ --@BUILD_LIBQUADMATH_TRUE@ printf/add_n.lo printf/cmp.lo \ --@BUILD_LIBQUADMATH_TRUE@ printf/divrem.lo printf/flt1282mpn.lo \ --@BUILD_LIBQUADMATH_TRUE@ printf/fpioconst.lo printf/lshift.lo \ --@BUILD_LIBQUADMATH_TRUE@ printf/mul_1.lo printf/mul_n.lo \ --@BUILD_LIBQUADMATH_TRUE@ printf/mul.lo printf/printf_fphex.lo \ --@BUILD_LIBQUADMATH_TRUE@ printf/printf_fp.lo \ --@BUILD_LIBQUADMATH_TRUE@ printf/quadmath-printf.lo \ --@BUILD_LIBQUADMATH_TRUE@ printf/rshift.lo printf/submul_1.lo \ --@BUILD_LIBQUADMATH_TRUE@ printf/sub_n.lo strtod/strtoflt128.lo \ --@BUILD_LIBQUADMATH_TRUE@ strtod/mpn2flt128.lo \ --@BUILD_LIBQUADMATH_TRUE@ strtod/tens_in_limb.lo -+am_libquadmath_la_OBJECTS = math/x2y2m1q.lo \ -+ math/acoshq.lo math/fmodq.lo \ -+ math/acosq.lo math/frexpq.lo \ -+ math/rem_pio2q.lo math/asinhq.lo \ -+ math/hypotq.lo math/remainderq.lo \ -+ math/asinq.lo math/rintq.lo \ -+ math/atan2q.lo math/isinfq.lo \ -+ math/roundq.lo math/atanhq.lo \ -+ math/isnanq.lo math/scalblnq.lo \ -+ math/atanq.lo math/j0q.lo \ -+ math/scalbnq.lo math/cbrtq.lo \ -+ math/j1q.lo math/signbitq.lo \ -+ math/ceilq.lo math/jnq.lo \ -+ math/sincos_table.lo math/complex.lo \ -+ math/ldexpq.lo math/sincosq.lo \ -+ math/copysignq.lo math/lgammaq.lo \ -+ math/sincosq_kernel.lo math/coshq.lo \ -+ math/llroundq.lo math/sinhq.lo \ -+ math/cosq.lo math/log10q.lo \ -+ math/sinq.lo math/cosq_kernel.lo \ -+ math/log1pq.lo math/sinq_kernel.lo \ -+ math/erfq.lo math/logq.lo \ -+ math/sqrtq.lo math/expm1q.lo \ -+ math/lroundq.lo math/tanhq.lo \ -+ math/expq.lo math/modfq.lo \ -+ math/tanq.lo math/fabsq.lo \ -+ math/nanq.lo math/tgammaq.lo \ -+ math/finiteq.lo math/nextafterq.lo \ -+ math/truncq.lo math/floorq.lo \ -+ math/powq.lo math/fmaq.lo \ -+ math/logbq.lo math/exp2q.lo \ -+ math/issignalingq.lo \ -+ math/lgammaq_neg.lo \ -+ math/lgammaq_product.lo \ -+ math/tanq_kernel.lo \ -+ math/tgammaq_product.lo \ -+ math/casinhq_kernel.lo math/cacoshq.lo \ -+ math/cacosq.lo math/casinhq.lo \ -+ math/casinq.lo math/catanhq.lo \ -+ math/catanq.lo math/cimagq.lo \ -+ math/conjq.lo math/cprojq.lo \ -+ math/crealq.lo math/fdimq.lo \ -+ math/fmaxq.lo math/fminq.lo \ -+ math/ilogbq.lo math/llrintq.lo \ -+ math/log2q.lo math/lrintq.lo \ -+ math/nearbyintq.lo math/remquoq.lo \ -+ math/ccoshq.lo math/cexpq.lo \ -+ math/clog10q.lo math/clogq.lo \ -+ math/csinq.lo math/csinhq.lo \ -+ math/csqrtq.lo math/ctanq.lo \ -+ math/ctanhq.lo printf/addmul_1.lo \ -+ printf/add_n.lo printf/cmp.lo \ -+ printf/divrem.lo printf/flt1282mpn.lo \ -+ printf/fpioconst.lo printf/lshift.lo \ -+ printf/mul_1.lo printf/mul_n.lo \ -+ printf/mul.lo printf/printf_fphex.lo \ -+ printf/printf_fp.lo \ -+ printf/quadmath-printf.lo \ -+ printf/rshift.lo printf/submul_1.lo \ -+ printf/sub_n.lo strtod/strtoflt128.lo \ -+ strtod/mpn2flt128.lo \ -+ strtod/tens_in_limb.lo - libquadmath_la_OBJECTS = $(am_libquadmath_la_OBJECTS) - AM_V_lt = $(am__v_lt_@AM_V@) - am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) -@@ -217,8 +217,8 @@ libquadmath_la_LINK = $(LIBTOOL) $(AM_V_ - $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ - $(AM_CFLAGS) $(CFLAGS) $(libquadmath_la_LDFLAGS) $(LDFLAGS) -o \ - $@ --@BUILD_LIBQUADMATH_TRUE@am_libquadmath_la_rpath = -rpath \ --@BUILD_LIBQUADMATH_TRUE@ $(toolexeclibdir) -+am_libquadmath_la_rpath = -rpath \ -+ $(toolexeclibdir) - AM_V_P = $(am__v_P_@AM_V@) - am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) - am__v_P_0 = false -@@ -336,7 +336,7 @@ CFLAGS = @CFLAGS@ - CPP = @CPP@ - CPPFLAGS = @CPPFLAGS@ - CYGPATH_W = @CYGPATH_W@ --DEFS = @DEFS@ -+DEFS = @DEFS@ -D__float128="long double" - DEPDIR = @DEPDIR@ - DSYMUTIL = @DSYMUTIL@ - DUMPBIN = @DUMPBIN@ -@@ -408,7 +408,7 @@ datadir = @datadir@ - datarootdir = @datarootdir@ - docdir = @docdir@ - dvidir = @dvidir@ --enable_shared = @enable_shared@ -+enable_shared = yes - enable_static = @enable_static@ - exec_prefix = @exec_prefix@ - get_gcc_base_ver = @get_gcc_base_ver@ -@@ -450,109 +450,109 @@ top_build_prefix = @top_build_prefix@ - top_builddir = @top_builddir@ - top_srcdir = @top_srcdir@ - AUTOMAKE_OPTIONS = foreign info-in-builddir --@BUILD_LIBQUADMATH_TRUE@ACLOCAL_AMFLAGS = -I .. -I ../config --@BUILD_LIBQUADMATH_TRUE@AM_CPPFLAGS = -I $(top_srcdir)/../include --@BUILD_LIBQUADMATH_TRUE@AM_CFLAGS = $(XCFLAGS) --@BUILD_LIBQUADMATH_TRUE@gcc_version := $(shell @get_gcc_base_ver@ $(top_srcdir)/../gcc/BASE-VER) --@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_FALSE@version_arg = --@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_GNU_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_arg = -Wl,--version-script=$(srcdir)/quadmath.map --@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_arg = -Wl,-M,quadmath.map-sun --@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_FALSE@version_dep = --@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_GNU_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_dep = $(srcdir)/quadmath.map --@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_dep = quadmath.map-sun --@BUILD_LIBQUADMATH_TRUE@toolexeclib_LTLIBRARIES = libquadmath.la --@BUILD_LIBQUADMATH_TRUE@libquadmath_la_LIBADD = --@BUILD_LIBQUADMATH_TRUE@libquadmath_la_LDFLAGS = -version-info `grep -v '^\#' $(srcdir)/libtool-version` \ --@BUILD_LIBQUADMATH_TRUE@ $(version_arg) $(lt_host_flags) -lm -- --@BUILD_LIBQUADMATH_TRUE@libquadmath_la_DEPENDENCIES = $(version_dep) $(libquadmath_la_LIBADD) --@BUILD_LIBQUADMATH_TRUE@nodist_libsubinclude_HEADERS = quadmath.h quadmath_weak.h --@BUILD_LIBQUADMATH_TRUE@libsubincludedir = $(libdir)/gcc/$(target_alias)/$(gcc_version)/include --@BUILD_LIBQUADMATH_TRUE@libquadmath_la_SOURCES = \ --@BUILD_LIBQUADMATH_TRUE@ math/x2y2m1q.c math/acoshq.c math/fmodq.c \ --@BUILD_LIBQUADMATH_TRUE@ math/acosq.c math/frexpq.c \ --@BUILD_LIBQUADMATH_TRUE@ math/rem_pio2q.c math/asinhq.c math/hypotq.c math/remainderq.c \ --@BUILD_LIBQUADMATH_TRUE@ math/asinq.c math/rintq.c math/atan2q.c math/isinfq.c \ --@BUILD_LIBQUADMATH_TRUE@ math/roundq.c math/atanhq.c math/isnanq.c math/scalblnq.c math/atanq.c \ --@BUILD_LIBQUADMATH_TRUE@ math/j0q.c math/scalbnq.c math/cbrtq.c math/j1q.c math/signbitq.c \ --@BUILD_LIBQUADMATH_TRUE@ math/ceilq.c math/jnq.c math/sincos_table.c math/complex.c math/ldexpq.c \ --@BUILD_LIBQUADMATH_TRUE@ math/sincosq.c math/copysignq.c math/lgammaq.c math/sincosq_kernel.c \ --@BUILD_LIBQUADMATH_TRUE@ math/coshq.c math/llroundq.c math/sinhq.c math/cosq.c math/log10q.c \ --@BUILD_LIBQUADMATH_TRUE@ math/sinq.c math/cosq_kernel.c math/log1pq.c math/sinq_kernel.c \ --@BUILD_LIBQUADMATH_TRUE@ math/erfq.c math/logq.c math/sqrtq.c math/expm1q.c math/lroundq.c \ --@BUILD_LIBQUADMATH_TRUE@ math/tanhq.c math/expq.c math/modfq.c math/tanq.c math/fabsq.c \ --@BUILD_LIBQUADMATH_TRUE@ math/nanq.c math/tgammaq.c math/finiteq.c math/nextafterq.c \ --@BUILD_LIBQUADMATH_TRUE@ math/truncq.c math/floorq.c math/powq.c math/fmaq.c math/logbq.c \ --@BUILD_LIBQUADMATH_TRUE@ math/exp2q.c math/issignalingq.c math/lgammaq_neg.c math/lgammaq_product.c \ --@BUILD_LIBQUADMATH_TRUE@ math/tanq_kernel.c math/tgammaq_product.c math/casinhq_kernel.c \ --@BUILD_LIBQUADMATH_TRUE@ math/cacoshq.c math/cacosq.c math/casinhq.c math/casinq.c \ --@BUILD_LIBQUADMATH_TRUE@ math/catanhq.c math/catanq.c math/cimagq.c math/conjq.c math/cprojq.c \ --@BUILD_LIBQUADMATH_TRUE@ math/crealq.c math/fdimq.c math/fmaxq.c math/fminq.c math/ilogbq.c \ --@BUILD_LIBQUADMATH_TRUE@ math/llrintq.c math/log2q.c math/lrintq.c math/nearbyintq.c math/remquoq.c \ --@BUILD_LIBQUADMATH_TRUE@ math/ccoshq.c math/cexpq.c math/clog10q.c math/clogq.c math/csinq.c \ --@BUILD_LIBQUADMATH_TRUE@ math/csinhq.c math/csqrtq.c math/ctanq.c math/ctanhq.c \ --@BUILD_LIBQUADMATH_TRUE@ printf/addmul_1.c printf/add_n.c printf/cmp.c printf/divrem.c \ --@BUILD_LIBQUADMATH_TRUE@ printf/flt1282mpn.c printf/fpioconst.c printf/lshift.c printf/mul_1.c \ --@BUILD_LIBQUADMATH_TRUE@ printf/mul_n.c printf/mul.c printf/printf_fphex.c printf/printf_fp.c \ --@BUILD_LIBQUADMATH_TRUE@ printf/quadmath-printf.c printf/rshift.c printf/submul_1.c printf/sub_n.c \ --@BUILD_LIBQUADMATH_TRUE@ strtod/strtoflt128.c strtod/mpn2flt128.c strtod/tens_in_limb.c -+ACLOCAL_AMFLAGS = -I .. -I ../config -+AM_CPPFLAGS = -I $(top_srcdir)/../include -+AM_CFLAGS = $(XCFLAGS) -+gcc_version := $(shell @get_gcc_base_ver@ $(top_srcdir)/../gcc/BASE-VER) -+@LIBQUAD_USE_SYMVER_FALSE@version_arg = -+@LIBQUAD_USE_SYMVER_GNU_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_arg = -Wl,--version-script=$(srcdir)/quadmath.map -+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_arg = -Wl,-M,quadmath.map-sun -+@LIBQUAD_USE_SYMVER_FALSE@version_dep = -+@LIBQUAD_USE_SYMVER_GNU_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_dep = $(srcdir)/quadmath.map -+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_dep = quadmath.map-sun -+toolexeclib_LTLIBRARIES = libquadmath.la -+libquadmath_la_LIBADD = -+libquadmath_la_LDFLAGS = -version-info `grep -v '^\#' $(srcdir)/libtool-version` \ -+ $(version_arg) $(lt_host_flags) -lm -+ -+libquadmath_la_DEPENDENCIES = $(version_dep) $(libquadmath_la_LIBADD) -+nodist_libsubinclude_HEADERS = quadmath.h quadmath_weak.h -+libsubincludedir = $(libdir)/gcc/$(target_alias)/$(gcc_version)/include -+libquadmath_la_SOURCES = \ -+ math/x2y2m1q.c math/acoshq.c math/fmodq.c \ -+ math/acosq.c math/frexpq.c \ -+ math/rem_pio2q.c math/asinhq.c math/hypotq.c math/remainderq.c \ -+ math/asinq.c math/rintq.c math/atan2q.c math/isinfq.c \ -+ math/roundq.c math/atanhq.c math/isnanq.c math/scalblnq.c math/atanq.c \ -+ math/j0q.c math/scalbnq.c math/cbrtq.c math/j1q.c math/signbitq.c \ -+ math/ceilq.c math/jnq.c math/sincos_table.c math/complex.c math/ldexpq.c \ -+ math/sincosq.c math/copysignq.c math/lgammaq.c math/sincosq_kernel.c \ -+ math/coshq.c math/llroundq.c math/sinhq.c math/cosq.c math/log10q.c \ -+ math/sinq.c math/cosq_kernel.c math/log1pq.c math/sinq_kernel.c \ -+ math/erfq.c math/logq.c math/sqrtq.c math/expm1q.c math/lroundq.c \ -+ math/tanhq.c math/expq.c math/modfq.c math/tanq.c math/fabsq.c \ -+ math/nanq.c math/tgammaq.c math/finiteq.c math/nextafterq.c \ -+ math/truncq.c math/floorq.c math/powq.c math/fmaq.c math/logbq.c \ -+ math/exp2q.c math/issignalingq.c math/lgammaq_neg.c math/lgammaq_product.c \ -+ math/tanq_kernel.c math/tgammaq_product.c math/casinhq_kernel.c \ -+ math/cacoshq.c math/cacosq.c math/casinhq.c math/casinq.c \ -+ math/catanhq.c math/catanq.c math/cimagq.c math/conjq.c math/cprojq.c \ -+ math/crealq.c math/fdimq.c math/fmaxq.c math/fminq.c math/ilogbq.c \ -+ math/llrintq.c math/log2q.c math/lrintq.c math/nearbyintq.c math/remquoq.c \ -+ math/ccoshq.c math/cexpq.c math/clog10q.c math/clogq.c math/csinq.c \ -+ math/csinhq.c math/csqrtq.c math/ctanq.c math/ctanhq.c \ -+ printf/addmul_1.c printf/add_n.c printf/cmp.c printf/divrem.c \ -+ printf/flt1282mpn.c printf/fpioconst.c printf/lshift.c printf/mul_1.c \ -+ printf/mul_n.c printf/mul.c printf/printf_fphex.c printf/printf_fp.c \ -+ printf/quadmath-printf.c printf/rshift.c printf/submul_1.c printf/sub_n.c \ -+ strtod/strtoflt128.c strtod/mpn2flt128.c strtod/tens_in_limb.c - - - # Work around what appears to be a GNU make bug handling MAKEFLAGS - # values defined in terms of make variables, as is the case for CC and - # friends when we are called from the top level Makefile. --@BUILD_LIBQUADMATH_TRUE@AM_MAKEFLAGS = \ --@BUILD_LIBQUADMATH_TRUE@ "AR_FLAGS=$(AR_FLAGS)" \ --@BUILD_LIBQUADMATH_TRUE@ "CC_FOR_BUILD=$(CC_FOR_BUILD)" \ --@BUILD_LIBQUADMATH_TRUE@ "CFLAGS=$(CFLAGS)" \ --@BUILD_LIBQUADMATH_TRUE@ "CXXFLAGS=$(CXXFLAGS)" \ --@BUILD_LIBQUADMATH_TRUE@ "CFLAGS_FOR_BUILD=$(CFLAGS_FOR_BUILD)" \ --@BUILD_LIBQUADMATH_TRUE@ "CFLAGS_FOR_TARGET=$(CFLAGS_FOR_TARGET)" \ --@BUILD_LIBQUADMATH_TRUE@ "INSTALL=$(INSTALL)" \ --@BUILD_LIBQUADMATH_TRUE@ "INSTALL_DATA=$(INSTALL_DATA)" \ --@BUILD_LIBQUADMATH_TRUE@ "INSTALL_PROGRAM=$(INSTALL_PROGRAM)" \ --@BUILD_LIBQUADMATH_TRUE@ "INSTALL_SCRIPT=$(INSTALL_SCRIPT)" \ --@BUILD_LIBQUADMATH_TRUE@ "JC1FLAGS=$(JC1FLAGS)" \ --@BUILD_LIBQUADMATH_TRUE@ "LDFLAGS=$(LDFLAGS)" \ --@BUILD_LIBQUADMATH_TRUE@ "LIBCFLAGS=$(LIBCFLAGS)" \ --@BUILD_LIBQUADMATH_TRUE@ "LIBCFLAGS_FOR_TARGET=$(LIBCFLAGS_FOR_TARGET)" \ --@BUILD_LIBQUADMATH_TRUE@ "MAKE=$(MAKE)" \ --@BUILD_LIBQUADMATH_TRUE@ "MAKEINFO=$(MAKEINFO) $(MAKEINFOFLAGS)" \ --@BUILD_LIBQUADMATH_TRUE@ "PICFLAG=$(PICFLAG)" \ --@BUILD_LIBQUADMATH_TRUE@ "PICFLAG_FOR_TARGET=$(PICFLAG_FOR_TARGET)" \ --@BUILD_LIBQUADMATH_TRUE@ "SHELL=$(SHELL)" \ --@BUILD_LIBQUADMATH_TRUE@ "RUNTESTFLAGS=$(RUNTESTFLAGS)" \ --@BUILD_LIBQUADMATH_TRUE@ "exec_prefix=$(exec_prefix)" \ --@BUILD_LIBQUADMATH_TRUE@ "infodir=$(infodir)" \ --@BUILD_LIBQUADMATH_TRUE@ "libdir=$(libdir)" \ --@BUILD_LIBQUADMATH_TRUE@ "prefix=$(prefix)" \ --@BUILD_LIBQUADMATH_TRUE@ "includedir=$(includedir)" \ --@BUILD_LIBQUADMATH_TRUE@ "AR=$(AR)" \ --@BUILD_LIBQUADMATH_TRUE@ "AS=$(AS)" \ --@BUILD_LIBQUADMATH_TRUE@ "CC=$(CC)" \ --@BUILD_LIBQUADMATH_TRUE@ "CXX=$(CXX)" \ --@BUILD_LIBQUADMATH_TRUE@ "LD=$(LD)" \ --@BUILD_LIBQUADMATH_TRUE@ "LIBCFLAGS=$(LIBCFLAGS)" \ --@BUILD_LIBQUADMATH_TRUE@ "NM=$(NM)" \ --@BUILD_LIBQUADMATH_TRUE@ "PICFLAG=$(PICFLAG)" \ --@BUILD_LIBQUADMATH_TRUE@ "RANLIB=$(RANLIB)" \ --@BUILD_LIBQUADMATH_TRUE@ "DESTDIR=$(DESTDIR)" -+AM_MAKEFLAGS = \ -+ "AR_FLAGS=$(AR_FLAGS)" \ -+ "CC_FOR_BUILD=$(CC_FOR_BUILD)" \ -+ "CFLAGS=$(CFLAGS)" \ -+ "CXXFLAGS=$(CXXFLAGS)" \ -+ "CFLAGS_FOR_BUILD=$(CFLAGS_FOR_BUILD)" \ -+ "CFLAGS_FOR_TARGET=$(CFLAGS_FOR_TARGET)" \ -+ "INSTALL=$(INSTALL)" \ -+ "INSTALL_DATA=$(INSTALL_DATA)" \ -+ "INSTALL_PROGRAM=$(INSTALL_PROGRAM)" \ -+ "INSTALL_SCRIPT=$(INSTALL_SCRIPT)" \ -+ "JC1FLAGS=$(JC1FLAGS)" \ -+ "LDFLAGS=$(LDFLAGS)" \ -+ "LIBCFLAGS=$(LIBCFLAGS)" \ -+ "LIBCFLAGS_FOR_TARGET=$(LIBCFLAGS_FOR_TARGET)" \ -+ "MAKE=$(MAKE)" \ -+ "MAKEINFO=$(MAKEINFO) $(MAKEINFOFLAGS)" \ -+ "PICFLAG=$(PICFLAG)" \ -+ "PICFLAG_FOR_TARGET=$(PICFLAG_FOR_TARGET)" \ -+ "SHELL=$(SHELL)" \ -+ "RUNTESTFLAGS=$(RUNTESTFLAGS)" \ -+ "exec_prefix=$(exec_prefix)" \ -+ "infodir=$(infodir)" \ -+ "libdir=$(libdir)" \ -+ "prefix=$(prefix)" \ -+ "includedir=$(includedir)" \ -+ "AR=$(AR)" \ -+ "AS=$(AS)" \ -+ "CC=$(CC)" \ -+ "CXX=$(CXX)" \ -+ "LD=$(LD)" \ -+ "LIBCFLAGS=$(LIBCFLAGS)" \ -+ "NM=$(NM)" \ -+ "PICFLAG=$(PICFLAG)" \ -+ "RANLIB=$(RANLIB)" \ -+ "DESTDIR=$(DESTDIR)" - - - # Subdir rules rely on $(FLAGS_TO_PASS) --@BUILD_LIBQUADMATH_TRUE@FLAGS_TO_PASS = $(AM_MAKEFLAGS) --@BUILD_LIBQUADMATH_TRUE@MAKEOVERRIDES = --@BUILD_LIBQUADMATH_TRUE@@GENINSRC_FALSE@STAMP_GENINSRC = -+FLAGS_TO_PASS = $(AM_MAKEFLAGS) -+MAKEOVERRIDES = -+@GENINSRC_FALSE@STAMP_GENINSRC = - - # AM_CONDITIONAL on configure option --generated-files-in-srcdir --@BUILD_LIBQUADMATH_TRUE@@GENINSRC_TRUE@STAMP_GENINSRC = stamp-geninsrc --@BUILD_LIBQUADMATH_TRUE@ALL_LOCAL_DEPS = $(STAMP_GENINSRC) --@BUILD_INFO_FALSE@@BUILD_LIBQUADMATH_TRUE@STAMP_BUILD_INFO = -+@GENINSRC_TRUE@STAMP_GENINSRC = stamp-geninsrc -+ALL_LOCAL_DEPS = $(STAMP_GENINSRC) -+@BUILD_INFO_FALSE@STAMP_BUILD_INFO = - - # AM_CONDITIONAL on configure check ACX_CHECK_PROG_VER([MAKEINFO]) --@BUILD_INFO_TRUE@@BUILD_LIBQUADMATH_TRUE@STAMP_BUILD_INFO = stamp-build-info --@BUILD_LIBQUADMATH_TRUE@CLEANFILES = $(STAMP_GENINSRC) $(STAMP_BUILD_INFO) --@BUILD_LIBQUADMATH_TRUE@MAINTAINERCLEANFILES = $(srcdir)/libquadmath.info -+@BUILD_INFO_TRUE@STAMP_BUILD_INFO = stamp-build-info -+CLEANFILES = $(STAMP_GENINSRC) $(STAMP_BUILD_INFO) -+MAINTAINERCLEANFILES = $(srcdir)/libquadmath.info - - # Automake Documentation: - # If your package has Texinfo files in many directories, you can use the -@@ -563,8 +563,8 @@ TEXINFO_TEX = ../gcc/doc/include/texinfo - - # Defines info, dvi, pdf and html targets - MAKEINFOFLAGS = -I $(srcdir)/../gcc/doc/include --@BUILD_LIBQUADMATH_FALSE@info_TEXINFOS = --@BUILD_LIBQUADMATH_TRUE@info_TEXINFOS = libquadmath.texi -+info_TEXINFOS = -+info_TEXINFOS = libquadmath.texi - libquadmath_TEXINFOS = libquadmath-vers.texi - MULTISRCTOP = - MULTIBUILDTOP = -@@ -1186,6 +1186,7 @@ distclean-tags: - -rm -f cscope.out cscope.in.out cscope.po.out cscope.files - check-am: all-am - check: check-am -+#all-local - all-am: Makefile $(INFO_DEPS) $(LTLIBRARIES) $(HEADERS) config.h \ - all-local - installdirs: -@@ -1424,22 +1425,22 @@ uninstall-am: uninstall-dvi-am uninstall - - .PRECIOUS: Makefile - --@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@quadmath.map-sun : $(srcdir)/quadmath.map \ --@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ $(top_srcdir)/../contrib/make_sunver.pl \ --@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ $(libquadmath_la_OBJECTS) $(libquadmath_la_LIBADD) --@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ perl $(top_srcdir)/../contrib/make_sunver.pl \ --@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ $(srcdir)/quadmath.map \ --@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ `echo $(libquadmath_la_OBJECTS) $(libquadmath_la_LIBADD) | \ --@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ sed 's,\([^/ ]*\)\.l\([ao]\),.libs/\1.\2,g'` \ --@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ > $@ || (rm -f $@ ; exit 1) -- --@BUILD_LIBQUADMATH_TRUE@stamp-geninsrc: libquadmath.info --@BUILD_LIBQUADMATH_TRUE@ cp -p $(top_builddir)/libquadmath.info $(srcdir)/libquadmath.info --@BUILD_LIBQUADMATH_TRUE@ @touch $@ -- --@BUILD_LIBQUADMATH_TRUE@stamp-build-info: libquadmath.texi $(libquadmath_TEXINFOS) --@BUILD_LIBQUADMATH_TRUE@ $(MAKEINFO) $(AM_MAKEINFOFLAGS) $(MAKEINFOFLAGS) -I $(srcdir) -o libquadmath.info $(srcdir)/libquadmath.texi --@BUILD_LIBQUADMATH_TRUE@ @touch $@ -+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@quadmath.map-sun : $(srcdir)/quadmath.map \ -+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ $(top_srcdir)/../contrib/make_sunver.pl \ -+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ $(libquadmath_la_OBJECTS) $(libquadmath_la_LIBADD) -+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ perl $(top_srcdir)/../contrib/make_sunver.pl \ -+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ $(srcdir)/quadmath.map \ -+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ `echo $(libquadmath_la_OBJECTS) $(libquadmath_la_LIBADD) | \ -+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ sed 's,\([^/ ]*\)\.l\([ao]\),.libs/\1.\2,g'` \ -+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ > $@ || (rm -f $@ ; exit 1) -+ -+stamp-geninsrc: libquadmath.info -+ cp -p $(top_builddir)/libquadmath.info $(srcdir)/libquadmath.info -+ @touch $@ -+ -+stamp-build-info: libquadmath.texi $(libquadmath_TEXINFOS) -+ $(MAKEINFO) $(AM_MAKEINFOFLAGS) $(MAKEINFOFLAGS) -I $(srcdir) -o libquadmath.info $(srcdir)/libquadmath.texi -+ @touch $@ - - all-local: $(ALL_LOCAL_DEPS) - -diff -Nurp a/libquadmath/quadmath.h b/libquadmath/quadmath.h ---- a/libquadmath/quadmath.h 2020-03-31 09:51:59.000000000 +0800 -+++ b/libquadmath/quadmath.h 2020-04-06 11:52:45.650793256 +0800 -@@ -27,6 +27,9 @@ Boston, MA 02110-1301, USA. */ - extern "C" { - #endif - -+#ifdef AARCH64_QUADMATH -+typedef long double __float128; -+#endif - /* Define the complex type corresponding to __float128 - ("_Complex __float128" is not allowed) */ - #if (!defined(_ARCH_PPC)) || defined(__LONG_DOUBLE_IEEE128__) -diff -Nurp a/libquadmath/quadmath.h b/libquadmath/quadmath.h ---- a/libquadmath/quadmath.h 2015-08-09 16:46:52.541904000 +0800 -+++ b/libquadmath/quadmath.h 2019-08-17 18:25:51.923399149 +0800 -@@ -154,10 +154,9 @@ extern int quadmath_snprintf (char *str, - #define FLT128_MAX_10_EXP 4932 - - --#define HUGE_VALQ __builtin_huge_valq() - /* The following alternative is valid, but brings the warning: - (floating constant exceeds range of ‘__float128’) */ --/* #define HUGE_VALQ (__extension__ 0x1.0p32767Q) */ -+ #define HUGE_VALQ (__extension__ 0x1.0p32767Q) - - #define M_Eq 2.718281828459045235360287471352662498Q /* e */ - #define M_LOG2Eq 1.442695040888963407359924681001892137Q /* log_2 e */ - diff --git a/enable-simd-math.patch b/enable-simd-math.patch deleted file mode 100644 index 7658fb8bf521a84cdc0affc85c9b241c968791f1..0000000000000000000000000000000000000000 --- a/enable-simd-math.patch +++ /dev/null @@ -1,258 +0,0 @@ -diff -Nurp a/gcc/c-family/c-opts.c b/gcc/c-family/c-opts.c ---- a/gcc/c-family/c-opts.c 2021-01-07 17:32:31.856000000 +0800 -+++ b/gcc/c-family/c-opts.c 2021-01-07 17:05:02.524000000 +0800 -@@ -783,6 +783,10 @@ c_common_post_options (const char **pfil - if (cpp_opts->deps.style == DEPS_NONE) - check_deps_environment_vars (); - -+ if (flag_simdmath) -+ { -+ defer_opt (OPT_include, "simdmath.h"); -+ } - handle_deferred_opts (); - - sanitize_cpp_opts (); -diff -Nurp a/gcc/common.opt b/gcc/common.opt ---- a/gcc/common.opt 2021-01-07 17:30:43.912000000 +0800 -+++ b/gcc/common.opt 2021-01-07 17:38:38.612000000 +0800 -@@ -1935,6 +1935,10 @@ fmath-errno - Common Report Var(flag_errno_math) Init(1) Optimization SetByCombined - Set errno after built-in math functions. - -+fsimdmath -+Common Report Var(flag_simdmath) Init(0) Optimization -+Enable auto-vectorize math functions for mathlib. This option will turn on -fno-math-errno and -fopenmp-simd. -+ - fmax-errors= - Common Joined RejectNegative UInteger Var(flag_max_errors) - -fmax-errors= Maximum number of errors to report. -diff -Nurp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c ---- a/gcc/config/aarch64/aarch64.c 2021-01-07 17:30:43.912000000 +0800 -+++ b/gcc/config/aarch64/aarch64.c 2021-01-05 15:17:21.580000000 +0800 -@@ -21588,8 +21588,12 @@ aarch64_simd_clone_compute_vecsize_and_s - elt_bits = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (base_type)); - if (clonei->simdlen == 0) - { -- count = 2; -- vec_bits = (num == 0 ? 64 : 128); -+ /* Currently mathlib or sleef hasn't provide function for V2SF mode -+ simdclone of single precision functions. (e.g._ZCVnN2v_expf) -+ Therefore this mode is disabled by default to avoid link error. -+ Use -msimdmath-64 option to enable this mode. */ -+ count = flag_simdmath_64 ? 2 : 1; -+ vec_bits = ((num == 0 && flag_simdmath_64) ? 64 : 128); - clonei->simdlen = vec_bits / elt_bits; - } - else -diff -Nurp a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt ---- a/gcc/config/aarch64/aarch64.opt 2021-01-07 17:30:43.912000000 +0800 -+++ b/gcc/config/aarch64/aarch64.opt 2021-01-05 15:17:21.448000000 +0800 -@@ -197,6 +197,12 @@ precision of square root results to abou - single precision and to 32 bits for double precision. - If enabled, it implies -mlow-precision-recip-sqrt. - -+msimdmath-64 -+Target Var(flag_simdmath_64) Optimization -+Allow compiler to generate V2SF 64 bits simdclone of math functions, -+which is not currently supported in mathlib or sleef. -+Therefore this option is disabled by default. -+ - mlow-precision-div - Target Var(flag_mlow_precision_div) Optimization - Enable the division approximation. Enabling this reduces -diff -Nurp a/gcc/fortran/scanner.c b/gcc/fortran/scanner.c ---- a/gcc/fortran/scanner.c 2021-01-07 17:31:59.264000000 +0800 -+++ b/gcc/fortran/scanner.c 2021-01-07 17:05:28.776000000 +0800 -@@ -2702,6 +2702,10 @@ gfc_new_file (void) - && !load_file (flag_pre_include, NULL, false)) - exit (FATAL_EXIT_CODE); - -+ if (flag_simdmath -+ && !load_file ("simdmath_f.h", NULL, false)) -+ exit (FATAL_EXIT_CODE); -+ - if (gfc_cpp_enabled ()) - { - result = gfc_cpp_preprocess (gfc_source_file); -diff -Nurp a/gcc/opts.c b/gcc/opts.c ---- a/gcc/opts.c 2021-01-07 17:30:57.740000000 +0800 -+++ b/gcc/opts.c 2021-01-05 15:17:21.068000000 +0800 -@@ -190,6 +190,7 @@ typedef char *char_p; /* For DEF_VEC_P. - static void handle_param (struct gcc_options *opts, - struct gcc_options *opts_set, location_t loc, - const char *carg); -+static void set_simdmath_flags (struct gcc_options *opts, int set); - static void set_debug_level (enum debug_info_type type, int extended, - const char *arg, struct gcc_options *opts, - struct gcc_options *opts_set, -@@ -2420,6 +2421,10 @@ common_handle_option (struct gcc_options - dc->min_margin_width = value; - break; - -+ case OPT_fsimdmath: -+ set_simdmath_flags (opts, value); -+ break; -+ - case OPT_fdump_: - /* Deferred. */ - break; -@@ -2843,6 +2848,18 @@ handle_param (struct gcc_options *opts, - free (arg); - } - -+/* The following routines are used to set -fno-math-errno and -fopenmp-simd -+ to enable vector mathlib. */ -+static void -+set_simdmath_flags (struct gcc_options *opts, int set) -+{ -+ if (set) -+ { -+ opts->x_flag_errno_math = 0; -+ opts->x_flag_openmp_simd = 1; -+ } -+} -+ - /* Used to set the level of strict aliasing warnings in OPTS, - when no level is specified (i.e., when -Wstrict-aliasing, and not - -Wstrict-aliasing=level was given). -diff -Nurp a/libgomp/configure b/libgomp/configure ---- a/libgomp/configure 2021-01-07 17:40:08.216000000 +0800 -+++ b/libgomp/configure 2021-01-07 16:29:45.628000000 +0800 -@@ -17258,7 +17258,7 @@ fi - - - --ac_config_files="$ac_config_files omp.h omp_lib.h omp_lib.f90 libgomp_f.h" -+ac_config_files="$ac_config_files omp.h omp_lib.h simdmath.h simdmath_f.h omp_lib.f90 libgomp_f.h" - - ac_config_files="$ac_config_files Makefile testsuite/Makefile libgomp.spec" - -@@ -18426,6 +18426,8 @@ do - "gstdint.h") CONFIG_COMMANDS="$CONFIG_COMMANDS gstdint.h" ;; - "omp.h") CONFIG_FILES="$CONFIG_FILES omp.h" ;; - "omp_lib.h") CONFIG_FILES="$CONFIG_FILES omp_lib.h" ;; -+ "simdmath.h") CONFIG_FILES="$CONFIG_FILES simdmath.h" ;; -+ "simdmath_f.h") CONFIG_FILES="$CONFIG_FILES simdmath_f.h" ;; - "omp_lib.f90") CONFIG_FILES="$CONFIG_FILES omp_lib.f90" ;; - "libgomp_f.h") CONFIG_FILES="$CONFIG_FILES libgomp_f.h" ;; - "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;; -diff -Nurp a/libgomp/configure.ac b/libgomp/configure.ac ---- a/libgomp/configure.ac 2021-01-07 17:40:08.216000000 +0800 -+++ b/libgomp/configure.ac 2021-01-07 16:26:26.560000000 +0800 -@@ -422,7 +422,7 @@ CFLAGS="$save_CFLAGS" - # Determine what GCC version number to use in filesystem paths. - GCC_BASE_VER - --AC_CONFIG_FILES(omp.h omp_lib.h omp_lib.f90 libgomp_f.h) -+AC_CONFIG_FILES(omp.h omp_lib.h simdmath.h simdmath_f.h omp_lib.f90 libgomp_f.h) - AC_CONFIG_FILES(Makefile testsuite/Makefile libgomp.spec) - AC_CONFIG_FILES([testsuite/libgomp-test-support.pt.exp:testsuite/libgomp-test-support.exp.in]) - AC_OUTPUT -diff -Nurp a/libgomp/Makefile.am b/libgomp/Makefile.am ---- a/libgomp/Makefile.am 2021-01-07 17:40:08.168000000 +0800 -+++ b/libgomp/Makefile.am 2021-01-07 16:27:39.776000000 +0800 -@@ -74,9 +74,9 @@ libgomp_la_SOURCES += openacc.f90 - endif - - nodist_noinst_HEADERS = libgomp_f.h --nodist_libsubinclude_HEADERS = omp.h openacc.h -+nodist_libsubinclude_HEADERS = omp.h openacc.h simdmath.h - if USE_FORTRAN --nodist_finclude_HEADERS = omp_lib.h omp_lib.f90 omp_lib.mod omp_lib_kinds.mod \ -+nodist_finclude_HEADERS = omp_lib.h simdmath_f.h omp_lib.f90 omp_lib.mod omp_lib_kinds.mod \ - openacc_lib.h openacc.f90 openacc.mod openacc_kinds.mod - endif - -diff -Nurp a/libgomp/Makefile.in b/libgomp/Makefile.in ---- a/libgomp/Makefile.in 2021-01-07 17:40:08.208000000 +0800 -+++ b/libgomp/Makefile.in 2021-01-07 16:50:28.820000000 +0800 -@@ -145,7 +145,7 @@ am__CONFIG_DISTCLEAN_FILES = config.stat - configure.lineno config.status.lineno - mkinstalldirs = $(SHELL) $(top_srcdir)/../mkinstalldirs - CONFIG_HEADER = config.h --CONFIG_CLEAN_FILES = omp.h omp_lib.h omp_lib.f90 libgomp_f.h \ -+CONFIG_CLEAN_FILES = omp.h omp_lib.h simdmath.h simdmath_f.h omp_lib.f90 libgomp_f.h \ - libgomp.spec - CONFIG_CLEAN_VPATH_FILES = - am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; -@@ -575,8 +575,8 @@ libgomp_la_SOURCES = alloc.c atomic.c ba - @PLUGIN_HSA_TRUE@libgomp_plugin_hsa_la_LIBADD = libgomp.la $(PLUGIN_HSA_LIBS) - @PLUGIN_HSA_TRUE@libgomp_plugin_hsa_la_LIBTOOLFLAGS = --tag=disable-static - nodist_noinst_HEADERS = libgomp_f.h --nodist_libsubinclude_HEADERS = omp.h openacc.h --@USE_FORTRAN_TRUE@nodist_finclude_HEADERS = omp_lib.h omp_lib.f90 omp_lib.mod omp_lib_kinds.mod \ -+nodist_libsubinclude_HEADERS = omp.h openacc.h simdmath.h -+@USE_FORTRAN_TRUE@nodist_finclude_HEADERS = omp_lib.h simdmath_f.h omp_lib.f90 omp_lib.mod omp_lib_kinds.mod \ - @USE_FORTRAN_TRUE@ openacc_lib.h openacc.f90 openacc.mod openacc_kinds.mod - - LTLDFLAGS = $(shell $(SHELL) $(top_srcdir)/../libtool-ldflags $(LDFLAGS)) -@@ -668,6 +668,10 @@ omp.h: $(top_builddir)/config.status $(s - cd $(top_builddir) && $(SHELL) ./config.status $@ - omp_lib.h: $(top_builddir)/config.status $(srcdir)/omp_lib.h.in - cd $(top_builddir) && $(SHELL) ./config.status $@ -+simdmath_f.h: $(top_builddir)/config.status $(srcdir)/simdmath_f.h.in -+ cd $(top_builddir) && $(SHELL) ./config.status $@ -+simdmath.h: $(top_builddir)/config.status $(srcdir)/simdmath.h.in -+ cd $(top_builddir) && $(SHELL) ./config.status $@ - omp_lib.f90: $(top_builddir)/config.status $(srcdir)/omp_lib.f90.in - cd $(top_builddir) && $(SHELL) ./config.status $@ - libgomp_f.h: $(top_builddir)/config.status $(srcdir)/libgomp_f.h.in -diff -Nurp a/libgomp/simdmath_f.h.in b/libgomp/simdmath_f.h.in ---- a/libgomp/simdmath_f.h.in 1970-01-01 08:00:00.000000000 +0800 -+++ b/libgomp/simdmath_f.h.in 2021-01-07 16:13:23.196000000 +0800 -@@ -0,0 +1,11 @@ -+!GCC$ builtin (cos) attributes simd (notinbranch) -+!GCC$ builtin (cosf) attributes simd (notinbranch) -+!GCC$ builtin (sin) attributes simd (notinbranch) -+!GCC$ builtin (sinf) attributes simd (notinbranch) -+!GCC$ builtin (exp) attributes simd (notinbranch) -+!GCC$ builtin (expf) attributes simd (notinbranch) -+!GCC$ builtin (exp2f) attributes simd (notinbranch) -+!GCC$ builtin (log) attributes simd (notinbranch) -+!GCC$ builtin (logf) attributes simd (notinbranch) -+!GCC$ builtin (pow) attributes simd (notinbranch) -+!GCC$ builtin (powf) attributes simd (notinbranch) -diff -Nurp a/libgomp/simdmath.h.in b/libgomp/simdmath.h.in ---- a/libgomp/simdmath.h.in 1970-01-01 08:00:00.000000000 +0800 -+++ b/libgomp/simdmath.h.in 2021-01-07 16:13:56.144000000 +0800 -@@ -0,0 +1,40 @@ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+#pragma omp declare simd simdlen(2) notinbranch -+double cos (double x); -+ -+#pragma omp declare simd simdlen(4) notinbranch -+float cosf (float x); -+ -+#pragma omp declare simd simdlen(2) notinbranch -+double sin (double x); -+ -+#pragma omp declare simd simdlen(4) notinbranch -+float sinf (float x); -+ -+#pragma omp declare simd simdlen(2) notinbranch -+double exp (double x); -+ -+#pragma omp declare simd simdlen(4) notinbranch -+float expf (float x); -+ -+#pragma omp declare simd simdlen(2) notinbranch -+double log (double x); -+ -+#pragma omp declare simd simdlen(4) notinbranch -+float logf (float x); -+ -+#pragma omp declare simd simdlen(2) notinbranch -+double pow (double x, double y); -+ -+#pragma omp declare simd simdlen(4) notinbranch -+float powf (float x, float y); -+ -+#pragma omp declare simd simdlen(4) notinbranch -+float exp2f (float x); -+ -+#ifdef __cplusplus -+} // extern "C" -+#endif diff --git a/fix-CTOR-vectorization.patch b/fix-CTOR-vectorization.patch deleted file mode 100644 index 3cb30ad3dd330765bd98f42c6f8059e14d45383d..0000000000000000000000000000000000000000 --- a/fix-CTOR-vectorization.patch +++ /dev/null @@ -1,18 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-fix-CTOR-vectorization.patch -3d42842c07f4143042f3dcc39a050b262bcf1b55 - -diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c -index 9d17e3386fa..fb13af7965e 100644 ---- a/gcc/tree-vect-slp.c -+++ b/gcc/tree-vect-slp.c -@@ -2257,6 +2257,7 @@ vect_analyze_slp_instance (vec_info *vinfo, - /* Value is defined in another basic block. */ - if (!def_info) - return false; -+ def_info = vect_stmt_to_vectorize (def_info); - scalar_stmts.safe_push (def_info); - } - else diff --git a/fix-ICE-IPA-compare-VRP-types.patch b/fix-ICE-IPA-compare-VRP-types.patch deleted file mode 100644 index 3f1b3165ddc7db97da4dbbb5ee4edbc865e03c50..0000000000000000000000000000000000000000 --- a/fix-ICE-IPA-compare-VRP-types.patch +++ /dev/null @@ -1,51 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-IPA-compare-VRP-types.patch -a86623902767122c71c7229150a8b8a79cbb3673 - -diff -Nurp a/gcc/ipa-prop.c b/gcc/ipa-prop.c ---- a/gcc/ipa-prop.c 2020-11-28 00:19:34.340000000 +0800 -+++ b/gcc/ipa-prop.c 2020-11-28 00:21:24.680000000 +0800 -@@ -122,7 +122,8 @@ struct ipa_vr_ggc_hash_traits : public g - static bool - equal (const value_range_base *a, const value_range_base *b) - { -- return a->equal_p (*b); -+ return (a->equal_p (*b) -+ && types_compatible_p (a->type (), b->type ())); - } - static void - mark_empty (value_range_base *&p) -diff -Nurp a/gcc/testsuite/gcc.c-torture/execute/pr97404.c b/gcc/testsuite/gcc.c-torture/execute/pr97404.c ---- a/gcc/testsuite/gcc.c-torture/execute/pr97404.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.c-torture/execute/pr97404.c 2020-11-28 00:21:24.680000000 +0800 -@@ -0,0 +1,28 @@ -+/* PR ipa/97404 */ -+/* { dg-additional-options "-fno-inline" } */ -+ -+char a, b; -+long c; -+short d, e; -+long *f = &c; -+int g; -+char h(signed char i) { return 0; } -+static short j(short i, int k) { return i < 0 ? 0 : i >> k; } -+void l(void); -+void m(void) -+{ -+ e = j(d | 9766, 11); -+ *f = e; -+} -+void l(void) -+{ -+ a = 5 | g; -+ b = h(a); -+} -+int main() -+{ -+ m(); -+ if (c != 4) -+ __builtin_abort(); -+ return 0; -+} diff --git a/fix-ICE-avoid-issueing-loads-in-SM-when-possible.patch b/fix-ICE-avoid-issueing-loads-in-SM-when-possible.patch deleted file mode 100644 index 01a33e31ee2cc0e4a7295b43d772e0c34e6faa2a..0000000000000000000000000000000000000000 --- a/fix-ICE-avoid-issueing-loads-in-SM-when-possible.patch +++ /dev/null @@ -1,123 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-tree-optimization-39612-avoid-issueing-loads-in-SM-w.patch -f9e1ea10e657af9fb02fafecf1a600740fd34409 - -diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr39612.c b/gcc/testsuite/gcc.dg/tree-ssa/pr39612.c ---- a/gcc/testsuite/gcc.dg/tree-ssa/pr39612.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr39612.c 2020-08-17 11:14:08.000000000 +0800 -@@ -0,0 +1,21 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -fdump-tree-lim2-details -Wuninitialized" } */ -+ -+void foo(int *); -+void f2(int dst[3], int R) -+{ -+ int i, inter[2]; -+ -+ for (i = 1; i < R; i++) { -+ if (i & 8) -+ { -+ inter[0] = 1; -+ inter[1] = 1; -+ } -+ } -+ -+ foo(inter); -+} -+ -+/* { dg-final { scan-tree-dump-times "Executing store motion" 2 "lim2" } } */ -+/* { dg-final { scan-tree-dump-not " = inter\\\[\[0-1\]\\\];" "lim2" } } */ -diff -Nurp a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c ---- a/gcc/tree-ssa-loop-im.c 2020-08-17 11:13:58.436000000 +0800 -+++ b/gcc/tree-ssa-loop-im.c 2020-08-17 11:14:08.000000000 +0800 -@@ -127,6 +127,8 @@ struct im_mem_ref - - bitmap stored; /* The set of loops in that this memory location - is stored to. */ -+ bitmap loaded; /* The set of loops in that this memory location -+ is loaded from. */ - vec accesses_in_loop; - /* The locations of the accesses. Vector - indexed by the loop number. */ -@@ -1394,6 +1396,7 @@ mem_ref_alloc (ao_ref *mem, unsigned has - ref->ref_decomposed = false; - ref->hash = hash; - ref->stored = NULL; -+ ref->loaded = NULL; - bitmap_initialize (&ref->indep_loop, &lim_bitmap_obstack); - bitmap_initialize (&ref->dep_loop, &lim_bitmap_obstack); - ref->accesses_in_loop.create (1); -@@ -1434,6 +1437,27 @@ mark_ref_stored (im_mem_ref *ref, struct - loop = loop_outer (loop); - } - -+/* Set the LOOP bit in REF loaded bitmap and allocate that if -+ necessary. Return whether a bit was changed. */ -+ -+static bool -+set_ref_loaded_in_loop (im_mem_ref *ref, class loop *loop) -+{ -+ if (!ref->loaded) -+ ref->loaded = BITMAP_ALLOC (&lim_bitmap_obstack); -+ return bitmap_set_bit (ref->loaded, loop->num); -+} -+ -+/* Marks reference REF as loaded in LOOP. */ -+ -+static void -+mark_ref_loaded (im_mem_ref *ref, class loop *loop) -+{ -+ while (loop != current_loops->tree_root -+ && set_ref_loaded_in_loop (ref, loop)) -+ loop = loop_outer (loop); -+} -+ - /* Gathers memory references in statement STMT in LOOP, storing the - information about them in the memory_accesses structure. Marks - the vops accessed through unrecognized statements there as -@@ -1569,6 +1593,8 @@ gather_mem_refs_stmt (struct loop *loop, - bitmap_set_bit (&memory_accesses.refs_stored_in_loop[loop->num], ref->id); - mark_ref_stored (ref, loop); - } -+ else -+ mark_ref_loaded (ref, loop); - init_lim_data (stmt)->ref = ref->id; - return; - } -@@ -1956,6 +1982,8 @@ execute_sm_if_changed (edge ex, tree mem - gsi = gsi_start_bb (then_bb); - /* Insert actual store. */ - stmt = gimple_build_assign (unshare_expr (mem), tmp_var); -+ /* Make sure to not warn about maybe-uninit uses of tmp_var here. */ -+ gimple_set_no_warning (stmt, true); - gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); - - edge e1 = single_succ_edge (new_bb); -@@ -2102,14 +2130,17 @@ execute_sm (struct loop *loop, vec - by move_computations after all dependencies. */ - gsi = gsi_for_stmt (first_mem_ref_loc (loop, ref)->stmt); - -- /* FIXME/TODO: For the multi-threaded variant, we could avoid this -- load altogether, since the store is predicated by a flag. We -- could, do the load only if it was originally in the loop. */ -- load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref)); -- lim_data = init_lim_data (load); -- lim_data->max_loop = loop; -- lim_data->tgt_loop = loop; -- gsi_insert_before (&gsi, load, GSI_SAME_STMT); -+ /* Avoid doing a load if there was no load of the ref in the loop. -+ Esp. when the ref is not always stored we cannot optimize it -+ away later. */ -+ if (ref->loaded && bitmap_bit_p (ref->loaded, loop->num)) -+ { -+ load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref)); -+ lim_data = init_lim_data (load); -+ lim_data->max_loop = loop; -+ lim_data->tgt_loop = loop; -+ gsi_insert_before (&gsi, load, GSI_SAME_STMT); -+ } - - if (multi_threaded_model_p) - { diff --git a/fix-ICE-during-GIMPLE-pass-dse.patch b/fix-ICE-during-GIMPLE-pass-dse.patch deleted file mode 100644 index 32791059bc8b593d46b5f869c749b78066c857cd..0000000000000000000000000000000000000000 --- a/fix-ICE-during-GIMPLE-pass-dse.patch +++ /dev/null @@ -1,35 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-Fix-ICE-when-__builtin_calloc-has-no-LHS-PR-tree-opt.patch -4c4be718fb65f9b8dd06d83c6fa3f697a5369d52 - -diff -Nurp a/gcc/testsuite/gcc.target/s390/pr91014.c b/gcc/testsuite/gcc.target/s390/pr91014.c ---- a/gcc/testsuite/gcc.target/s390/pr91014.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.target/s390/pr91014.c 2020-09-09 15:47:34.740000000 +0800 -@@ -0,0 +1,8 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O" } */ -+/* { dg-require-effective-target alloca } */ -+ -+void foo(void) -+{ -+ __builtin_calloc (1, 1); /* { dg-warning "ignoring return value of '__builtin_calloc' declared with attribute 'warn_unused_result'" } */ -+} -diff -Nurp a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c ---- a/gcc/tree-ssa-dse.c 2020-09-09 15:47:21.084000000 +0800 -+++ b/gcc/tree-ssa-dse.c 2020-09-09 15:47:34.740000000 +0800 -@@ -119,10 +119,11 @@ initialize_ao_ref_for_dse (gimple *stmt, - { - tree nelem = gimple_call_arg (stmt, 0); - tree selem = gimple_call_arg (stmt, 1); -+ tree lhs; - if (TREE_CODE (nelem) == INTEGER_CST -- && TREE_CODE (selem) == INTEGER_CST) -+ && TREE_CODE (selem) == INTEGER_CST -+ && (lhs = gimple_call_lhs (stmt)) != NULL_TREE) - { -- tree lhs = gimple_call_lhs (stmt); - tree size = fold_build2 (MULT_EXPR, TREE_TYPE (nelem), - nelem, selem); - ao_ref_init_from_ptr_and_size (write, lhs, size); diff --git a/fix-ICE-during-pass-ccp.patch b/fix-ICE-during-pass-ccp.patch deleted file mode 100644 index 15cb1df13fb87b53d60c8bbfbfb468e4996b15e6..0000000000000000000000000000000000000000 --- a/fix-ICE-during-pass-ccp.patch +++ /dev/null @@ -1,38 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-PR-tree-optimization-94574-aarch64-ICE-during-GIMPLE.patch -f65cecabc32fe12b024253502af953e657e1a878 - -diff -uprN a/gcc/testsuite/gcc.dg/pr94574.c b/gcc/testsuite/gcc.dg/pr94574.c ---- a/gcc/testsuite/gcc.dg/pr94574.c 1970-01-01 00:00:00.000000000 +0000 -+++ b/gcc/testsuite/gcc.dg/pr94574.c 2020-04-15 21:08:48.972000000 +0000 -@@ -0,0 +1,15 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+typedef unsigned int v4si __attribute__((vector_size(16))); -+typedef unsigned int v2si __attribute__((vector_size(8))); -+ -+/* The aliasing is somewhat dubious here, but it must compile. */ -+ -+v2si -+foo (v4si v) -+{ -+ v2si res; -+ *(v4si *) &res = v; -+ return res; -+} -diff -uprN a/gcc/tree-ssa.c b/gcc/tree-ssa.c ---- a/gcc/tree-ssa.c 2020-03-31 01:51:30.000000000 +0000 -+++ b/gcc/tree-ssa.c 2020-04-15 21:26:09.828000000 +0000 -@@ -1528,7 +1528,9 @@ non_rewritable_lvalue_p (tree lhs) - && known_gt (wi::to_poly_offset (TYPE_SIZE_UNIT (TREE_TYPE (decl))), - mem_ref_offset (lhs)) - && multiple_of_p (sizetype, TREE_OPERAND (lhs, 1), -- TYPE_SIZE_UNIT (TREE_TYPE (lhs)))) -+ TYPE_SIZE_UNIT (TREE_TYPE (lhs))) -+ && known_ge (wi::to_poly_offset (TYPE_SIZE (TREE_TYPE (decl))), -+ wi::to_poly_offset (TYPE_SIZE (TREE_TYPE (lhs))))) - return false; - } diff --git a/fix-ICE-in-affine-combination.patch b/fix-ICE-in-affine-combination.patch deleted file mode 100644 index e5826816f47d56a23088229a0f136812ae1838ba..0000000000000000000000000000000000000000 --- a/fix-ICE-in-affine-combination.patch +++ /dev/null @@ -1,396 +0,0 @@ -This backport contains 2 patchs from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-tree-affine.c-expr_to_aff_combination-New-function-s.patch -5120e0d8d48f4590a275e60565de6c5a4e772fc1 - -0001-PR-tree-optimization-94574-aarch64-ICE-during-GIMPLE.patch -0447929f11e6a3e1b076841712b90a8b6bc7d33a - -diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c ---- a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c 2020-12-08 14:54:11.467633230 +0800 -@@ -0,0 +1,8 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */ -+ -+#define TYPE unsigned int -+ -+#include "pr83403.h" -+ -+/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 "lim2" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c ---- a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c 2020-12-08 14:54:11.467633230 +0800 -@@ -0,0 +1,8 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */ -+ -+#define TYPE int -+ -+#include "pr83403.h" -+ -+/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 "lim2" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h ---- a/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h 2020-12-08 14:54:11.467633230 +0800 -@@ -0,0 +1,30 @@ -+__attribute__ ((noinline)) void -+calculate (const double *__restrict__ A, const double *__restrict__ B, -+ double *__restrict__ C) -+{ -+ TYPE m = 0; -+ TYPE n = 0; -+ TYPE k = 0; -+ -+ A = (const double *) __builtin_assume_aligned (A, 16); -+ B = (const double *) __builtin_assume_aligned (B, 16); -+ C = (double *) __builtin_assume_aligned (C, 16); -+ -+ for (n = 0; n < 9; n++) -+ { -+ for (m = 0; m < 10; m++) -+ { -+ C[(n * 10) + m] = 0.0; -+ } -+ -+ for (k = 0; k < 17; k++) -+ { -+#pragma simd -+ for (m = 0; m < 10; m++) -+ { -+ C[(n * 10) + m] += A[(k * 20) + m] * B[(n * 20) + k]; -+ } -+ } -+ } -+} -+ -diff -Nurp a/gcc/tree-affine.c b/gcc/tree-affine.c ---- a/gcc/tree-affine.c 2020-12-09 09:01:13.179633230 +0800 -+++ b/gcc/tree-affine.c 2020-12-08 14:54:11.467633230 +0800 -@@ -259,104 +259,66 @@ aff_combination_convert (aff_tree *comb, - } - } - --/* Splits EXPR into an affine combination of parts. */ -+/* Tries to handle OP0 CODE OP1 as affine combination of parts. Returns -+ true when that was successful and returns the combination in COMB. */ - --void --tree_to_aff_combination (tree expr, tree type, aff_tree *comb) -+static bool -+expr_to_aff_combination (aff_tree *comb, tree_code code, tree type, -+ tree op0, tree op1 = NULL_TREE) - { - aff_tree tmp; -- enum tree_code code; -- tree cst, core, toffset; - poly_int64 bitpos, bitsize, bytepos; -- machine_mode mode; -- int unsignedp, reversep, volatilep; -- -- STRIP_NOPS (expr); - -- code = TREE_CODE (expr); - switch (code) - { - case POINTER_PLUS_EXPR: -- tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb); -- tree_to_aff_combination (TREE_OPERAND (expr, 1), sizetype, &tmp); -+ tree_to_aff_combination (op0, type, comb); -+ tree_to_aff_combination (op1, sizetype, &tmp); - aff_combination_add (comb, &tmp); -- return; -+ return true; - - case PLUS_EXPR: - case MINUS_EXPR: -- tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb); -- tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp); -+ tree_to_aff_combination (op0, type, comb); -+ tree_to_aff_combination (op1, type, &tmp); - if (code == MINUS_EXPR) - aff_combination_scale (&tmp, -1); - aff_combination_add (comb, &tmp); -- return; -+ return true; - - case MULT_EXPR: -- cst = TREE_OPERAND (expr, 1); -- if (TREE_CODE (cst) != INTEGER_CST) -+ if (TREE_CODE (op1) != INTEGER_CST) - break; -- tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb); -- aff_combination_scale (comb, wi::to_widest (cst)); -- return; -+ tree_to_aff_combination (op0, type, comb); -+ aff_combination_scale (comb, wi::to_widest (op1)); -+ return true; - - case NEGATE_EXPR: -- tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb); -+ tree_to_aff_combination (op0, type, comb); - aff_combination_scale (comb, -1); -- return; -+ return true; - - case BIT_NOT_EXPR: - /* ~x = -x - 1 */ -- tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb); -+ tree_to_aff_combination (op0, type, comb); - aff_combination_scale (comb, -1); - aff_combination_add_cst (comb, -1); -- return; -- -- case ADDR_EXPR: -- /* Handle &MEM[ptr + CST] which is equivalent to POINTER_PLUS_EXPR. */ -- if (TREE_CODE (TREE_OPERAND (expr, 0)) == MEM_REF) -- { -- expr = TREE_OPERAND (expr, 0); -- tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb); -- tree_to_aff_combination (TREE_OPERAND (expr, 1), sizetype, &tmp); -- aff_combination_add (comb, &tmp); -- return; -- } -- core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos, -- &toffset, &mode, &unsignedp, &reversep, -- &volatilep); -- if (!multiple_p (bitpos, BITS_PER_UNIT, &bytepos)) -- break; -- aff_combination_const (comb, type, bytepos); -- if (TREE_CODE (core) == MEM_REF) -- { -- tree mem_offset = TREE_OPERAND (core, 1); -- aff_combination_add_cst (comb, wi::to_poly_widest (mem_offset)); -- core = TREE_OPERAND (core, 0); -- } -- else -- core = build_fold_addr_expr (core); -- -- if (TREE_CODE (core) == ADDR_EXPR) -- aff_combination_add_elt (comb, core, 1); -- else -- { -- tree_to_aff_combination (core, type, &tmp); -- aff_combination_add (comb, &tmp); -- } -- if (toffset) -- { -- tree_to_aff_combination (toffset, type, &tmp); -- aff_combination_add (comb, &tmp); -- } -- return; -+ return true; - - CASE_CONVERT: - { -- tree otype = TREE_TYPE (expr); -- tree inner = TREE_OPERAND (expr, 0); -+ tree otype = type; -+ tree inner = op0; - tree itype = TREE_TYPE (inner); - enum tree_code icode = TREE_CODE (inner); - -+ /* STRIP_NOPS */ -+ if (tree_nop_conversion_p (otype, itype)) -+ { -+ tree_to_aff_combination (op0, type, comb); -+ return true; -+ } -+ - /* In principle this is a valid folding, but it isn't necessarily - an optimization, so do it here and not in fold_unary. */ - if ((icode == PLUS_EXPR || icode == MINUS_EXPR || icode == MULT_EXPR) -@@ -376,38 +338,127 @@ tree_to_aff_combination (tree expr, tree - { - op0 = fold_convert (otype, op0); - op1 = fold_convert (otype, op1); -- expr = fold_build2 (icode, otype, op0, op1); -- tree_to_aff_combination (expr, type, comb); -- return; -+ return expr_to_aff_combination (comb, icode, otype, op0, op1); - } - wide_int minv, maxv; - /* If inner type has wrapping overflow behavior, fold conversion - for below case: -- (T1)(X - CST) -> (T1)X - (T1)CST -- if X - CST doesn't overflow by range information. Also handle -- (T1)(X + CST) as (T1)(X - (-CST)). */ -+ (T1)(X *+- CST) -> (T1)X *+- (T1)CST -+ if X *+- CST doesn't overflow by range information. */ - if (TYPE_UNSIGNED (itype) - && TYPE_OVERFLOW_WRAPS (itype) -- && TREE_CODE (op0) == SSA_NAME - && TREE_CODE (op1) == INTEGER_CST -- && icode != MULT_EXPR -- && get_range_info (op0, &minv, &maxv) == VR_RANGE) -+ && determine_value_range (op0, &minv, &maxv) == VR_RANGE) - { -+ wi::overflow_type overflow = wi::OVF_NONE; -+ signop sign = UNSIGNED; - if (icode == PLUS_EXPR) -- op1 = wide_int_to_tree (itype, -wi::to_wide (op1)); -- if (wi::geu_p (minv, wi::to_wide (op1))) -+ wi::add (maxv, wi::to_wide (op1), sign, &overflow); -+ else if (icode == MULT_EXPR) -+ wi::mul (maxv, wi::to_wide (op1), sign, &overflow); -+ else -+ wi::sub (minv, wi::to_wide (op1), sign, &overflow); -+ -+ if (overflow == wi::OVF_NONE) - { - op0 = fold_convert (otype, op0); - op1 = fold_convert (otype, op1); -- expr = fold_build2 (MINUS_EXPR, otype, op0, op1); -- tree_to_aff_combination (expr, type, comb); -- return; -+ return expr_to_aff_combination (comb, icode, otype, op0, -+ op1); - } - } - } - } - break; - -+ default:; -+ } -+ -+ return false; -+} -+ -+/* Splits EXPR into an affine combination of parts. */ -+ -+void -+tree_to_aff_combination (tree expr, tree type, aff_tree *comb) -+{ -+ aff_tree tmp; -+ enum tree_code code; -+ tree core, toffset; -+ poly_int64 bitpos, bitsize, bytepos; -+ machine_mode mode; -+ int unsignedp, reversep, volatilep; -+ -+ STRIP_NOPS (expr); -+ -+ code = TREE_CODE (expr); -+ switch (code) -+ { -+ case POINTER_PLUS_EXPR: -+ case PLUS_EXPR: -+ case MINUS_EXPR: -+ case MULT_EXPR: -+ if (expr_to_aff_combination (comb, code, type, TREE_OPERAND (expr, 0), -+ TREE_OPERAND (expr, 1))) -+ return; -+ break; -+ -+ case NEGATE_EXPR: -+ case BIT_NOT_EXPR: -+ if (expr_to_aff_combination (comb, code, type, TREE_OPERAND (expr, 0))) -+ return; -+ break; -+ -+ CASE_CONVERT: -+ /* ??? TREE_TYPE (expr) should be equal to type here, but IVOPTS -+ calls this with not showing an outer widening cast. */ -+ if (expr_to_aff_combination (comb, code, -+ TREE_TYPE (expr), TREE_OPERAND (expr, 0))) -+ { -+ aff_combination_convert (comb, type); -+ return; -+ } -+ break; -+ -+ case ADDR_EXPR: -+ /* Handle &MEM[ptr + CST] which is equivalent to POINTER_PLUS_EXPR. */ -+ if (TREE_CODE (TREE_OPERAND (expr, 0)) == MEM_REF) -+ { -+ expr = TREE_OPERAND (expr, 0); -+ tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb); -+ tree_to_aff_combination (TREE_OPERAND (expr, 1), sizetype, &tmp); -+ aff_combination_add (comb, &tmp); -+ return; -+ } -+ core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos, -+ &toffset, &mode, &unsignedp, &reversep, -+ &volatilep); -+ if (!multiple_p (bitpos, BITS_PER_UNIT, &bytepos)) -+ break; -+ aff_combination_const (comb, type, bytepos); -+ if (TREE_CODE (core) == MEM_REF) -+ { -+ tree mem_offset = TREE_OPERAND (core, 1); -+ aff_combination_add_cst (comb, wi::to_poly_widest (mem_offset)); -+ core = TREE_OPERAND (core, 0); -+ } -+ else -+ core = build_fold_addr_expr (core); -+ -+ if (TREE_CODE (core) == ADDR_EXPR) -+ aff_combination_add_elt (comb, core, 1); -+ else -+ { -+ tree_to_aff_combination (core, type, &tmp); -+ aff_combination_add (comb, &tmp); -+ } -+ if (toffset) -+ { -+ tree_to_aff_combination (toffset, type, &tmp); -+ aff_combination_add (comb, &tmp); -+ } -+ return; -+ - default: - { - if (poly_int_tree_p (expr)) -@@ -665,7 +716,7 @@ aff_combination_expand (aff_tree *comb A - { - unsigned i; - aff_tree to_add, current, curre; -- tree e, rhs; -+ tree e; - gimple *def; - widest_int scale; - struct name_expansion *exp; -@@ -715,20 +766,38 @@ aff_combination_expand (aff_tree *comb A - case PLUS_EXPR: - case MINUS_EXPR: - case MULT_EXPR: -+ if (!expr_to_aff_combination (¤t, code, TREE_TYPE (name), -+ gimple_assign_rhs1 (def), -+ gimple_assign_rhs2 (def))) -+ continue; -+ break; - case NEGATE_EXPR: - case BIT_NOT_EXPR: -+ if (!expr_to_aff_combination (¤t, code, TREE_TYPE (name), -+ gimple_assign_rhs1 (def))) -+ continue; -+ break; - CASE_CONVERT: -- rhs = gimple_assign_rhs_to_tree (def); -+ if (!expr_to_aff_combination (¤t, code, TREE_TYPE (name), -+ gimple_assign_rhs1 (def))) -+ /* This makes us always expand conversions which we did -+ in the past and makes gcc.dg/tree-ssa/ivopts-lt-2.c -+ PASS, eliminating one induction variable in IVOPTs. -+ ??? But it is really excessive and we should try -+ harder to do without it. */ -+ aff_combination_elt (¤t, TREE_TYPE (name), -+ fold_convert (TREE_TYPE (name), -+ gimple_assign_rhs1 (def))); - break; - case ADDR_EXPR: - case INTEGER_CST: - case POLY_INT_CST: -- rhs = gimple_assign_rhs1 (def); -+ tree_to_aff_combination (gimple_assign_rhs1 (def), -+ TREE_TYPE (name), ¤t); - break; - default: - continue; - } -- tree_to_aff_combination (rhs, TREE_TYPE (name), ¤t); - exp = XNEW (struct name_expansion); - exp->in_progress = 1; - if (!*cache) diff --git a/fix-ICE-in-compute_live_loop_exits.patch b/fix-ICE-in-compute_live_loop_exits.patch deleted file mode 100644 index 54794871ffd62a0fcfd0731dc76999edaa6850b4..0000000000000000000000000000000000000000 --- a/fix-ICE-in-compute_live_loop_exits.patch +++ /dev/null @@ -1,82 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-re-PR-tree-optimization-92085-ICE-tree-check-expecte.patch -3c8e341b996546607fa1f39a0fd9a9d7c2c38214 - -diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr92085-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr92085-1.c ---- a/gcc/testsuite/gcc.dg/tree-ssa/pr92085-1.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr92085-1.c 2020-07-09 11:05:23.136000000 +0800 -@@ -0,0 +1,20 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O1 -fexceptions -fnon-call-exceptions -ftree-loop-vectorize -fno-tree-sink --param dse-max-alias-queries-per-store=2 -w" } */ -+ -+void -+di (int y9, int qw) -+{ -+ if ((int) &y9 != 0) -+ { -+ int py; -+ int **fq = &py; -+ -+ while (qw < 1) -+ { -+ if ((0 < (**fq ? **fq : (**fq = 1))) / (**fq = y9)) -+ ; -+ -+ ++qw; -+ } -+ } -+} -diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr92085-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr92085-2.c ---- a/gcc/testsuite/gcc.dg/tree-ssa/pr92085-2.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr92085-2.c 2020-07-09 11:05:23.136000000 +0800 -@@ -0,0 +1,29 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O1 -ftree-loop-vectorize -fno-tree-dce -fno-tree-sink -w" } */ -+ -+int a8; -+ -+void -+c1 (int oz, int dk, int ub) -+{ -+ int *hd = 0; -+ long int *th = &dk; -+ -+ while (ub < 1) -+ { -+ oz || dk; -+ ++ub; -+ } -+ -+ while (oz < 2) -+ { -+ long int *lq = &oz; -+ -+ (*hd < (*lq = *th)) < oz; -+ -+ if (oz == 0) -+ *th = a8 = oz; -+ -+ *lq = 0; -+ } -+} -diff -Nurp a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c ---- a/gcc/tree-if-conv.c 2020-07-09 11:04:58.832000000 +0800 -+++ b/gcc/tree-if-conv.c 2020-07-09 11:05:23.136000000 +0800 -@@ -2984,10 +2984,11 @@ ifcvt_local_dce (class loop *loop) - ao_ref write; - ao_ref_init (&write, lhs); - -- if (dse_classify_store (&write, stmt, false, NULL, NULL, latch_vdef) -- == DSE_STORE_DEAD) -- delete_dead_or_redundant_assignment (&gsi, "dead"); -- gsi_next (&gsi); -+ if (dse_classify_store (&write, stmt, false, NULL, NULL, latch_vdef) -+ == DSE_STORE_DEAD) -+ delete_dead_or_redundant_assignment (&gsi, "dead"); -+ else -+ gsi_next (&gsi); - continue; - } - diff --git a/fix-ICE-in-copy_reference_ops_from_ref.patch b/fix-ICE-in-copy_reference_ops_from_ref.patch deleted file mode 100644 index 52c660d89c8cf3e92a282404bb725b9ea004239e..0000000000000000000000000000000000000000 --- a/fix-ICE-in-copy_reference_ops_from_ref.patch +++ /dev/null @@ -1,70 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-tree-ssa-sccvn.c-copy_reference_ops_from_ref-Adjust-.patch -2f215d2176608467aeee73b245beedfc60836b71 - -diff -Nurp gcc-9.3.0_org/gcc/tree-ssa-sccvn.c gcc-9.3.0/gcc/tree-ssa-sccvn.c ---- gcc-9.3.0_org/gcc/tree-ssa-sccvn.c 2020-08-18 15:31:39.308000000 +0800 -+++ gcc-9.3.0/gcc/tree-ssa-sccvn.c 2020-08-18 15:32:03.456000000 +0800 -@@ -797,39 +797,6 @@ vn_reference_eq (const_vn_reference_t co - static void - copy_reference_ops_from_ref (tree ref, vec *result) - { -- if (TREE_CODE (ref) == TARGET_MEM_REF) -- { -- vn_reference_op_s temp; -- -- result->reserve (3); -- -- memset (&temp, 0, sizeof (temp)); -- temp.type = TREE_TYPE (ref); -- temp.opcode = TREE_CODE (ref); -- temp.op0 = TMR_INDEX (ref); -- temp.op1 = TMR_STEP (ref); -- temp.op2 = TMR_OFFSET (ref); -- temp.off = -1; -- temp.clique = MR_DEPENDENCE_CLIQUE (ref); -- temp.base = MR_DEPENDENCE_BASE (ref); -- result->quick_push (temp); -- -- memset (&temp, 0, sizeof (temp)); -- temp.type = NULL_TREE; -- temp.opcode = ERROR_MARK; -- temp.op0 = TMR_INDEX2 (ref); -- temp.off = -1; -- result->quick_push (temp); -- -- memset (&temp, 0, sizeof (temp)); -- temp.type = NULL_TREE; -- temp.opcode = TREE_CODE (TMR_BASE (ref)); -- temp.op0 = TMR_BASE (ref); -- temp.off = -1; -- result->quick_push (temp); -- return; -- } -- - /* For non-calls, store the information that makes up the address. */ - tree orig = ref; - while (ref) -@@ -859,6 +826,20 @@ copy_reference_ops_from_ref (tree ref, v - temp.base = MR_DEPENDENCE_BASE (ref); - temp.reverse = REF_REVERSE_STORAGE_ORDER (ref); - break; -+ case TARGET_MEM_REF: -+ /* The base address gets its own vn_reference_op_s structure. */ -+ temp.op0 = TMR_INDEX (ref); -+ temp.op1 = TMR_STEP (ref); -+ temp.op2 = TMR_OFFSET (ref); -+ temp.clique = MR_DEPENDENCE_CLIQUE (ref); -+ temp.base = MR_DEPENDENCE_BASE (ref); -+ result->safe_push (temp); -+ memset (&temp, 0, sizeof (temp)); -+ temp.type = NULL_TREE; -+ temp.opcode = ERROR_MARK; -+ temp.op0 = TMR_INDEX2 (ref); -+ temp.off = -1; -+ break; - case BIT_FIELD_REF: - /* Record bits, position and storage order. */ - temp.op0 = TREE_OPERAND (ref, 1); diff --git a/fix-ICE-in-declare-return-variable.patch b/fix-ICE-in-declare-return-variable.patch deleted file mode 100644 index 4faa0cbced714c4269dfd41a4c11cb0f8cccc632..0000000000000000000000000000000000000000 --- a/fix-ICE-in-declare-return-variable.patch +++ /dev/null @@ -1,31 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-re-PR-ipa-92409-r277920-causes-ICE-in-gcc.dg-cast-fu.patch -e7399b548c866ee2e408e0855b3be794c056fb1d - -diff -uprN a/gcc/tree-inline.c b/gcc/tree-inline.c ---- a/gcc/tree-inline.c -+++ b/gcc/tree-inline.c -@@ -3593,7 +3593,9 @@ declare_return_variable (copy_body_data *id, tree return_slot, tree modify_dest, - vs. the call expression. */ - if (modify_dest) - caller_type = TREE_TYPE (modify_dest); -- else -+ else if (return_slot) -+ caller_type = TREE_TYPE (return_slot); -+ else /* No LHS on the call. */ - caller_type = TREE_TYPE (TREE_TYPE (callee)); - - /* We don't need to do anything for functions that don't return anything. */ -@@ -3634,6 +3636,10 @@ declare_return_variable (copy_body_data *id, tree return_slot, tree modify_dest, - && !DECL_GIMPLE_REG_P (result) - && DECL_P (var)) - DECL_GIMPLE_REG_P (var) = 0; -+ -+ if (!useless_type_conversion_p (callee_type, caller_type)) -+ var = build1 (VIEW_CONVERT_EXPR, callee_type, var); -+ - use = NULL; - goto done; - } diff --git a/fix-ICE-in-eliminate-stmt.patch b/fix-ICE-in-eliminate-stmt.patch deleted file mode 100644 index 7c5ee1372db0682a7d55a74039aaca4aaf65814d..0000000000000000000000000000000000000000 --- a/fix-ICE-in-eliminate-stmt.patch +++ /dev/null @@ -1,79 +0,0 @@ -commit ee80f0c6ba50ebf0300fb0cfe1079a1321295749 -Author: Richard Biener -Date: Thu Oct 24 11:23:54 2019 +0000 - - re PR tree-optimization/92203 (ICE in eliminate_stmt, at tree-ssa-sccvn.c:5492) - - 2019-10-24 Richard Biener - - PR tree-optimization/92203 - * treee-ssa-sccvn.c (eliminate_dom_walker::eliminate_stmt): - Skip eliminating conversion stmts inserted by insertion. - - * gcc.dg/torture/pr92203.c: New testcase. - - From-SVN: r277374 - -diff --git a/gcc/testsuite/gcc.dg/torture/pr92203.c b/gcc/testsuite/gcc.dg/torture/pr92203.c -new file mode 100644 -index 00000000000..c752969d5e5 ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/torture/pr92203.c -@@ -0,0 +1,37 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-Wno-div-by-zero" } */ -+ -+unsigned long int rr; -+ -+void -+cw (int z9) -+{ -+ int m5; -+ unsigned long int vz = 0; -+ long int *na; -+ -+ if (z9 == 0) -+ rr = 0; -+ else -+ { -+ na = (long int *) &m5; -+ for (*na = 0; *na < 1; ++*na) -+ { -+ na = (long int *) &vz; -+ rr /= 0; -+ } -+ } -+ -+ m5 = rr / 5; -+ ++vz; -+ if (vz != 0) -+ while (z9 < 1) -+ { -+ if (m5 >= 0) -+ rr += m5; -+ -+ na = (long int *) &rr; -+ if (*na >= 0) -+ rr = 0; -+ } -+} -diff --git a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c -index 57331ab44dc..3872168a4ed 100644 ---- a/gcc/tree-ssa-sccvn.c -+++ b/gcc/tree-ssa-sccvn.c -@@ -5459,8 +5459,13 @@ eliminate_dom_walker::eliminate_stmt (basic_block b, gimple_stmt_iterator *gsi) - - /* If this is an assignment from our leader (which - happens in the case the value-number is a constant) -- then there is nothing to do. */ -- if (gimple_assign_single_p (stmt) -+ then there is nothing to do. Likewise if we run into -+ inserted code that needed a conversion because of -+ our type-agnostic value-numbering of loads. */ -+ if ((gimple_assign_single_p (stmt) -+ || (is_gimple_assign (stmt) -+ && (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt)) -+ || gimple_assign_rhs_code (stmt) == VIEW_CONVERT_EXPR))) - && sprime == gimple_assign_rhs1 (stmt)) - return; - diff --git a/fix-ICE-in-eliminate_stmt.patch b/fix-ICE-in-eliminate_stmt.patch deleted file mode 100644 index 983e193c89e262b1f8c746ac2be3128b09acfa37..0000000000000000000000000000000000000000 --- a/fix-ICE-in-eliminate_stmt.patch +++ /dev/null @@ -1,162 +0,0 @@ -This backport contains 2 patchs from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-Tweak-gcc.dg-vect-bb-slp-4-01-.c-PR92366.patch -3771033244b3ee1b53a8a00d734580b16384fdd3 - -0001-tree-vect-slp.c-vect_analyze_slp_instance-Dump-const.patch -140ee00a961fda084c1b4b3f0e7e489a917858f7 - -diff -Nurp a/gcc/testsuite/gcc.dg/vect/bb-slp-40.c b/gcc/testsuite/gcc.dg/vect/bb-slp-40.c ---- a/gcc/testsuite/gcc.dg/vect/bb-slp-40.c 2020-09-14 21:24:20.899694710 +0800 -+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-40.c 2020-09-15 20:54:05.456027442 +0800 -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-O3 -fdump-tree-slp-all" } */ -+/* { dg-additional-options "-fvect-cost-model=dynamic" } */ - /* { dg-require-effective-target vect_int } */ - - char g_d[1024], g_s1[1024], g_s2[1024]; -@@ -30,5 +30,5 @@ void foo(void) - } - - /* See that we vectorize an SLP instance. */ --/* { dg-final { scan-tree-dump-times "Found vectorizable constructor" 1 "slp1" } } */ --/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "slp1" } } */ -+/* { dg-final { scan-tree-dump "Analyzing vectorizable constructor" "slp1" } } */ -+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "slp1" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/vect/bb-slp-41.c b/gcc/testsuite/gcc.dg/vect/bb-slp-41.c ---- a/gcc/testsuite/gcc.dg/vect/bb-slp-41.c 2020-09-14 21:24:20.899694710 +0800 -+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-41.c 2020-09-15 20:54:10.424087539 +0800 -@@ -1,10 +1,9 @@ --/* { dg-do run } */ --/* { dg-options "-O3 -fdump-tree-slp-all -fno-vect-cost-model" } */ - /* { dg-require-effective-target vect_int } */ - - #define ARR_SIZE 1000 - --void foo (int *a, int *b) -+void __attribute__((optimize (0))) -+foo (int *a, int *b) - { - int i; - for (i = 0; i < (ARR_SIZE - 2); ++i) -@@ -56,6 +55,4 @@ int main () - return 0; - - } --/* See that we vectorize an SLP instance. */ --/* { dg-final { scan-tree-dump-times "Found vectorizable constructor" 12 "slp1" } } */ --/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "slp1" } } */ -+/* { dg-final { scan-tree-dump-not "vectorizing stmts using SLP" "slp1" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/vect/bb-slp-42.c b/gcc/testsuite/gcc.dg/vect/bb-slp-42.c ---- a/gcc/testsuite/gcc.dg/vect/bb-slp-42.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-42.c 2020-09-15 20:54:14.724139555 +0800 -@@ -0,0 +1,48 @@ -+/* { dg-require-effective-target vect_int } */ -+/* { dg-require-effective-target vect_perm } */ -+ -+#include "tree-vect.h" -+ -+#define ARR_SIZE 1024 -+ -+void __attribute__((noipa)) -+foo (int a[][ARR_SIZE], int *b) -+{ -+ int i; -+ for (i = 0; i < ARR_SIZE; ++i) -+ { -+ a[0][i] += b[0]; -+ a[1][i] += b[1]; -+ a[2][i] += b[2]; -+ a[3][i] += b[3]; -+ } -+} -+ -+int -+main () -+{ -+ int a[4][ARR_SIZE]; -+ int b[4]; -+ -+ check_vect (); -+ -+ for (int i = 0; i < 4; ++i) -+ { -+ b[i] = 20 * i; -+ for (int j = 0; j < ARR_SIZE; ++j) -+ a[i][j] = (i + 1) * ARR_SIZE - j; -+ } -+ -+ foo (a, b); -+ -+ for (int i = 0; i < 4; ++i) -+ for (int j = 0; j < ARR_SIZE; ++j) -+ if (a[i][j] != (i + 1) * ARR_SIZE - j + 20 * i) -+ __builtin_abort (); -+ -+ return 0; -+ -+} -+ -+/* See that we do not try to vectorize the uniform CTORs. */ -+/* { dg-final { scan-tree-dump-not "Analyzing vectorizable constructor" "slp1" } } */ -diff -Nurp a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c ---- a/gcc/tree-vect-slp.c 2020-09-14 21:24:20.983695752 +0800 -+++ b/gcc/tree-vect-slp.c 2020-09-14 16:13:11.077779069 +0800 -@@ -2106,6 +2106,10 @@ vect_analyze_slp_instance (vec_info *vin - else - return false; - } -+ if (dump_enabled_p ()) -+ dump_printf_loc (MSG_NOTE, vect_location, -+ "Analyzing vectorizable constructor: %G\n", -+ stmt_info->stmt); - } - else - { -@@ -3049,31 +3053,22 @@ vect_slp_check_for_constructors (bb_vec_ - gimple_stmt_iterator gsi; - - for (gsi = bb_vinfo->region_begin; -- gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi)) -+ gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi)) - { -- gimple *stmt = gsi_stmt (gsi); -- -- if (is_gimple_assign (stmt) -- && gimple_assign_rhs_code (stmt) == CONSTRUCTOR -- && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME -- && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE) -- { -- tree rhs = gimple_assign_rhs1 (stmt); -- -- if (CONSTRUCTOR_NELTS (rhs) == 0) -- continue; -- -- poly_uint64 subparts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (rhs)); -+ gassign *stmt = dyn_cast (gsi_stmt (gsi)); -+ if (!stmt || gimple_assign_rhs_code (stmt) != CONSTRUCTOR) -+ continue; - -- if (maybe_ne (subparts, CONSTRUCTOR_NELTS (rhs))) -- continue; -+ tree rhs = gimple_assign_rhs1 (stmt); -+ if (!VECTOR_TYPE_P (TREE_TYPE (rhs)) -+ || maybe_ne (TYPE_VECTOR_SUBPARTS (TREE_TYPE (rhs)), -+ CONSTRUCTOR_NELTS (rhs)) -+ || VECTOR_TYPE_P (TREE_TYPE (CONSTRUCTOR_ELT (rhs, 0)->value)) -+ || uniform_vector_p (rhs)) -+ continue; - -- if (dump_enabled_p ()) -- dump_printf_loc (MSG_NOTE, vect_location, -- "Found vectorizable constructor: %G\n", stmt); -- stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (stmt); -- BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt_info); -- } -+ stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (stmt); -+ BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt_info); - } - } - diff --git a/fix-ICE-in-exact_div.patch b/fix-ICE-in-exact_div.patch deleted file mode 100644 index a60615705bb16d24f344e8947fce6a13c37ec1ff..0000000000000000000000000000000000000000 --- a/fix-ICE-in-exact_div.patch +++ /dev/null @@ -1,54 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-re-PR-tree-optimization-92555-ICE-in-exact_div-at-po.patch -f1e0c7e0eb3eafb122fc3d00242828c82a9286a2 - -diff -Nurp a/gcc/testsuite/gcc.dg/vect/pr92555.c b/gcc/testsuite/gcc.dg/vect/pr92555.c ---- a/gcc/testsuite/gcc.dg/vect/pr92555.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/vect/pr92555.c 2020-08-11 09:36:18.060000000 +0800 -@@ -0,0 +1,22 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-fwrapv" } */ -+ -+signed char rq; -+ -+signed char -+pu (int tr, int al) -+{ -+ signed char x8; -+ -+ while (tr != 0) -+ { -+ for (x8 = 0; x8 >= 0; x8 += 2) -+ ; -+ -+ rq ^= al ^ 1; -+ ++x8; -+ ++tr; -+ } -+ -+ return x8; -+} -diff -Nurp a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c ---- a/gcc/tree-vect-loop.c 2020-08-11 09:35:10.952000000 +0800 -+++ b/gcc/tree-vect-loop.c 2020-08-11 09:36:18.064000000 +0800 -@@ -1415,6 +1415,18 @@ vect_update_vf_for_slp (loop_vec_info lo - for (i = 0; i < nbbs; i++) - { - basic_block bb = bbs[i]; -+ for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si); -+ gsi_next (&si)) -+ { -+ stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (si.phi ()); -+ if (!stmt_info) -+ continue; -+ if ((STMT_VINFO_RELEVANT_P (stmt_info) -+ || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))) -+ && !PURE_SLP_STMT (stmt_info)) -+ /* STMT needs both SLP and loop-based vectorization. */ -+ only_slp_in_loop = false; -+ } - for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si); - gsi_next (&si)) - { diff --git a/fix-ICE-in-extract_constrain_insn.patch b/fix-ICE-in-extract_constrain_insn.patch deleted file mode 100644 index 4b604059eea540575284ed52481a494ea67a188a..0000000000000000000000000000000000000000 --- a/fix-ICE-in-extract_constrain_insn.patch +++ /dev/null @@ -1,207 +0,0 @@ -This backport contains 2 patchs from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-AArch64-Improve-SVE-constant-moves.patch -4aeb1ba7f62c1d680c819ae3e137c3bad6f520ca - -0002-aarch64-Add-vector-vector-vec_extract-patterns-PR928.patch -c15893df6eafc32efd6184379dd7f02c36da7d12 - -diff -Nurp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c ---- a/gcc/config/aarch64/aarch64.c 2020-09-03 19:50:00.484000000 +0800 -+++ b/gcc/config/aarch64/aarch64.c 2020-09-03 19:50:19.336943210 +0800 -@@ -3632,7 +3632,7 @@ aarch64_maybe_expand_sve_subreg_move (rt - attributes. Unlike gen_lowpart, this doesn't care whether the - mode change is valid. */ - --static rtx -+rtx - aarch64_replace_reg_mode (rtx x, machine_mode mode) - { - if (GET_MODE (x) == mode) -@@ -15016,6 +15016,36 @@ aarch64_simd_check_vect_par_cnst_half (r - return true; - } - -+/* Return a PARALLEL containing NELTS elements, with element I equal -+ to BASE + I * STEP. */ -+ -+rtx -+aarch64_gen_stepped_int_parallel (unsigned int nelts, int base, int step) -+{ -+ rtvec vec = rtvec_alloc (nelts); -+ for (unsigned int i = 0; i < nelts; ++i) -+ RTVEC_ELT (vec, i) = gen_int_mode (base + i * step, DImode); -+ return gen_rtx_PARALLEL (VOIDmode, vec); -+} -+ -+/* Return true if OP is a PARALLEL of CONST_INTs that form a linear -+ series with step STEP. */ -+ -+bool -+aarch64_stepped_int_parallel_p (rtx op, int step) -+{ -+ if (GET_CODE (op) != PARALLEL || !CONST_INT_P (XVECEXP (op, 0, 0))) -+ return false; -+ -+ unsigned HOST_WIDE_INT base = UINTVAL (XVECEXP (op, 0, 0)); -+ for (int i = 1; i < XVECLEN (op, 0); ++i) -+ if (!CONST_INT_P (XVECEXP (op, 0, i)) -+ || UINTVAL (XVECEXP (op, 0, i)) != base + i * step) -+ return false; -+ -+ return true; -+} -+ - /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and - HIGH (exclusive). */ - void -diff -Nurp a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h ---- a/gcc/config/aarch64/aarch64-protos.h 2020-09-03 19:50:00.484000000 +0800 -+++ b/gcc/config/aarch64/aarch64-protos.h 2020-09-03 19:50:29.137683100 +0800 -@@ -501,6 +501,8 @@ bool aarch64_sve_ld1r_operand_p (rtx); - bool aarch64_sve_ldr_operand_p (rtx); - bool aarch64_sve_struct_memory_operand_p (rtx); - rtx aarch64_simd_vect_par_cnst_half (machine_mode, int, bool); -+rtx aarch64_gen_stepped_int_parallel (unsigned int, int, int); -+bool aarch64_stepped_int_parallel_p (rtx, int); - rtx aarch64_tls_get_addr (void); - tree aarch64_fold_builtin (tree, int, tree *, bool); - unsigned aarch64_dbx_register_number (unsigned); -@@ -516,6 +518,7 @@ void aarch64_expand_mov_immediate (rtx, - void aarch64_emit_sve_pred_move (rtx, rtx, rtx); - void aarch64_expand_sve_mem_move (rtx, rtx, machine_mode); - bool aarch64_maybe_expand_sve_subreg_move (rtx, rtx); -+rtx aarch64_replace_reg_mode (rtx, machine_mode); - void aarch64_split_sve_subreg_move (rtx, rtx, rtx); - void aarch64_expand_prologue (void); - void aarch64_expand_vector_init (rtx, rtx); -diff -Nurp a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md ---- a/gcc/config/aarch64/aarch64-simd.md 2020-09-03 19:50:00.484000000 +0800 -+++ b/gcc/config/aarch64/aarch64-simd.md 2020-09-03 19:50:44.100673150 +0800 -@@ -282,37 +282,51 @@ - rtx dst_high_part = gen_highpart (mode, dst); - rtx lo = aarch64_simd_vect_par_cnst_half (mode, , false); - rtx hi = aarch64_simd_vect_par_cnst_half (mode, , true); -- -- emit_insn -- (gen_aarch64_simd_mov_from_low (dst_low_part, src, lo)); -- emit_insn -- (gen_aarch64_simd_mov_from_high (dst_high_part, src, hi)); -+ emit_insn (gen_aarch64_get_half (dst_low_part, src, lo)); -+ emit_insn (gen_aarch64_get_half (dst_high_part, src, hi)); - } - DONE; - } - ) - --(define_insn "aarch64_simd_mov_from_low" -- [(set (match_operand: 0 "register_operand" "=r") -+(define_expand "aarch64_get_half" -+ [(set (match_operand: 0 "register_operand") - (vec_select: -- (match_operand:VQ 1 "register_operand" "w") -- (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))] -- "TARGET_SIMD && reload_completed" -- "umov\t%0, %1.d[0]" -- [(set_attr "type" "neon_to_gp") -- (set_attr "length" "4") -- ]) -+ (match_operand:VQ 1 "register_operand") -+ (match_operand 2 "ascending_int_parallel")))] -+ "TARGET_SIMD" -+) -+ -+(define_insn_and_split "aarch64_simd_mov_from_low" -+ [(set (match_operand: 0 "register_operand" "=w,?r") -+ (vec_select: -+ (match_operand:VQ_NO2E 1 "register_operand" "w,w") -+ (match_operand:VQ_NO2E 2 "vect_par_cnst_lo_half" "")))] -+ "TARGET_SIMD" -+ "@ -+ # -+ umov\t%0, %1.d[0]" -+ "&& reload_completed && aarch64_simd_register (operands[0], mode)" -+ [(set (match_dup 0) (match_dup 1))] -+ { -+ operands[1] = aarch64_replace_reg_mode (operands[1], mode); -+ } -+ [(set_attr "type" "mov_reg,neon_to_gp") -+ (set_attr "length" "4")] -+) - - (define_insn "aarch64_simd_mov_from_high" -- [(set (match_operand: 0 "register_operand" "=r") -+ [(set (match_operand: 0 "register_operand" "=w,?r") - (vec_select: -- (match_operand:VQ 1 "register_operand" "w") -- (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))] -- "TARGET_SIMD && reload_completed" -- "umov\t%0, %1.d[1]" -- [(set_attr "type" "neon_to_gp") -- (set_attr "length" "4") -- ]) -+ (match_operand:VQ_NO2E 1 "register_operand" "w,w") -+ (match_operand:VQ_NO2E 2 "vect_par_cnst_hi_half" "")))] -+ "TARGET_SIMD" -+ "@ -+ dup\\t%d0, %1.d[1] -+ umov\t%0, %1.d[1]" -+ [(set_attr "type" "neon_dup,neon_to_gp") -+ (set_attr "length" "4")] -+) - - (define_insn "orn3" - [(set (match_operand:VDQ_I 0 "register_operand" "=w") -@@ -6016,6 +6030,35 @@ - DONE; - }) - -+;; Extract a 64-bit vector from one half of a 128-bit vector. -+(define_expand "vec_extract" -+ [(match_operand: 0 "register_operand") -+ (match_operand:VQ_NO2E 1 "register_operand") -+ (match_operand 2 "immediate_operand")] -+ "TARGET_SIMD" -+{ -+ int start = INTVAL (operands[2]); -+ if (start != 0 && start != / 2) -+ FAIL; -+ rtx sel = aarch64_gen_stepped_int_parallel ( / 2, start, 1); -+ emit_insn (gen_aarch64_get_half (operands[0], operands[1], sel)); -+ DONE; -+}) -+ -+;; Extract a single-element 64-bit vector from one half of a 128-bit vector. -+(define_expand "vec_extractv2dfv1df" -+ [(match_operand:V1DF 0 "register_operand") -+ (match_operand:V2DF 1 "register_operand") -+ (match_operand 2 "immediate_operand")] -+ "TARGET_SIMD" -+{ -+ /* V1DF is rarely used by other patterns, so it should be better to hide -+ it in a subreg destination of a normal DF op. */ -+ rtx scalar0 = gen_lowpart (DFmode, operands[0]); -+ emit_insn (gen_vec_extractv2dfdf (scalar0, operands[1], operands[2])); -+ DONE; -+}) -+ - ;; aes - - (define_insn "aarch64_crypto_aesv16qi" -diff -Nurp a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md ---- a/gcc/config/aarch64/predicates.md 2020-09-03 19:50:00.484000000 +0800 -+++ b/gcc/config/aarch64/predicates.md 2020-09-03 19:50:49.315344350 +0800 -@@ -438,6 +438,12 @@ - return aarch64_simd_check_vect_par_cnst_half (op, mode, false); - }) - -+(define_predicate "ascending_int_parallel" -+ (match_code "parallel") -+{ -+ return aarch64_stepped_int_parallel_p (op, 1); -+}) -+ - (define_special_predicate "aarch64_simd_lshift_imm" - (match_code "const,const_vector") - { diff --git a/fix-ICE-in-gimple_op.patch b/fix-ICE-in-gimple_op.patch deleted file mode 100644 index e6949dc192d4488cf42daa1c7be8e633853b2eae..0000000000000000000000000000000000000000 --- a/fix-ICE-in-gimple_op.patch +++ /dev/null @@ -1,65 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-re-PR-tree-optimization-92512-ICE-in-gimple_op-at-gi.patch -b9f71c51cd578c6ab6ad2986edb80ba48aa477bc - -diff -Nurp a/gcc/testsuite/gcc.dg/torture/pr92512.c b/gcc/testsuite/gcc.dg/torture/pr92512.c ---- a/gcc/testsuite/gcc.dg/torture/pr92512.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/torture/pr92512.c 2020-08-10 20:53:50.404000000 +0800 -@@ -0,0 +1,17 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-ftree-vectorize" } */ -+ -+long int -+nl (long int fy, int k3, int zr) -+{ -+ while (k3 < 1) -+ { -+ if (zr == 0) -+ fy = 0; -+ -+ fy *= fy < zr; -+ ++k3; -+ } -+ -+ return fy; -+} -diff -Nurp a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c ---- a/gcc/tree-vect-loop.c 2020-08-10 20:53:42.636000000 +0800 -+++ b/gcc/tree-vect-loop.c 2020-08-10 20:53:50.404000000 +0800 -@@ -2931,9 +2931,11 @@ pop: - /* The following make sure we can compute the operand index - easily plus it mostly disallows chaining via COND_EXPR condition - operands. */ -- || (gimple_assign_rhs1 (use_stmt) != op -- && gimple_assign_rhs2 (use_stmt) != op -- && gimple_assign_rhs3 (use_stmt) != op)) -+ || (gimple_assign_rhs1_ptr (use_stmt) != path[i].second->use -+ && (gimple_num_ops (use_stmt) <= 2 -+ || gimple_assign_rhs2_ptr (use_stmt) != path[i].second->use) -+ && (gimple_num_ops (use_stmt) <= 3 -+ || gimple_assign_rhs3_ptr (use_stmt) != path[i].second->use))) - { - fail = true; - break; -@@ -2946,7 +2948,18 @@ pop: - FOR_EACH_IMM_USE_STMT (op_use_stmt, imm_iter, op) - if (!is_gimple_debug (op_use_stmt) - && flow_bb_inside_loop_p (loop, gimple_bb (op_use_stmt))) -- cnt++; -+ { -+ /* We want to allow x + x but not x < 1 ? x : 2. */ -+ if (is_gimple_assign (op_use_stmt) -+ && gimple_assign_rhs_code (op_use_stmt) == COND_EXPR) -+ { -+ use_operand_p use_p; -+ FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter) -+ cnt++; -+ } -+ else -+ cnt++; -+ } - if (cnt != 1) - { - fail = true; diff --git a/fix-ICE-in-model_update_limit_points_in_group.patch b/fix-ICE-in-model_update_limit_points_in_group.patch deleted file mode 100644 index 2692196ad0ef0d71e87d05954cae1362ed42a151..0000000000000000000000000000000000000000 --- a/fix-ICE-in-model_update_limit_points_in_group.patch +++ /dev/null @@ -1,248 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-re-PR-tree-optimization-88828-Inefficient-update-of-.patch -3bc104bdb4b5aa99ff6dceb246beaa65b012c5ac - -diff -Nurp a/gcc/testsuite/gcc.target/i386/pr88828-0.c b/gcc/testsuite/gcc.target/i386/pr88828-0.c ---- a/gcc/testsuite/gcc.target/i386/pr88828-0.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.target/i386/pr88828-0.c 2020-08-24 21:08:23.028000000 +0800 -@@ -0,0 +1,27 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -msse4.2" } */ -+ -+typedef int v4si __attribute__((vector_size(16))); -+typedef float v4sf __attribute__((vector_size(16))); -+ -+v4si foo (v4si x) -+{ -+ return (v4si){ x[0], 1, x[2], 3 }; -+} -+ -+/* { dg-final { scan-assembler "pblendw" } } */ -+ -+v4si bar (v4sf x) -+{ -+ return (v4si){ 1, x[1], x[2], 3 }; -+} -+ -+/* { dg-final { scan-assembler "cvttps2dq" } } */ -+/* { dg-final { scan-assembler "pblendw" } } */ -+ -+v4si baz (v4si x) -+{ -+ return (v4si) { x[1], x[2], x[3], 0 }; -+} -+ -+/* { dg-final { scan-assembler "psrldq" } } */ -diff -Nurp a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c ---- a/gcc/tree-ssa-forwprop.c 2020-08-24 21:07:59.800000000 +0800 -+++ b/gcc/tree-ssa-forwprop.c 2020-08-24 21:08:23.028000000 +0800 -@@ -1997,17 +1997,54 @@ simplify_permutation (gimple_stmt_iterat - return 0; - } - -+/* Get the BIT_FIELD_REF definition of VAL, if any, looking through -+ conversions with code CONV_CODE or update it if still ERROR_MARK. -+ Return NULL_TREE if no such matching def was found. */ -+ -+static tree -+get_bit_field_ref_def (tree val, enum tree_code &conv_code) -+{ -+ if (TREE_CODE (val) != SSA_NAME) -+ return NULL_TREE ; -+ gimple *def_stmt = get_prop_source_stmt (val, false, NULL); -+ if (!def_stmt) -+ return NULL_TREE; -+ enum tree_code code = gimple_assign_rhs_code (def_stmt); -+ if (code == FLOAT_EXPR -+ || code == FIX_TRUNC_EXPR) -+ { -+ tree op1 = gimple_assign_rhs1 (def_stmt); -+ if (conv_code == ERROR_MARK) -+ { -+ if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (val))), -+ GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (op1))))) -+ return NULL_TREE; -+ conv_code = code; -+ } -+ else if (conv_code != code) -+ return NULL_TREE; -+ if (TREE_CODE (op1) != SSA_NAME) -+ return NULL_TREE; -+ def_stmt = SSA_NAME_DEF_STMT (op1); -+ if (! is_gimple_assign (def_stmt)) -+ return NULL_TREE; -+ code = gimple_assign_rhs_code (def_stmt); -+ } -+ if (code != BIT_FIELD_REF) -+ return NULL_TREE; -+ return gimple_assign_rhs1 (def_stmt); -+} -+ - /* Recognize a VEC_PERM_EXPR. Returns true if there were any changes. */ - - static bool - simplify_vector_constructor (gimple_stmt_iterator *gsi) - { - gimple *stmt = gsi_stmt (*gsi); -- gimple *def_stmt; - tree op, op2, orig[2], type, elem_type; - unsigned elem_size, i; - unsigned HOST_WIDE_INT nelts; -- enum tree_code code, conv_code; -+ enum tree_code conv_code; - constructor_elt *elt; - bool maybe_ident; - -@@ -2027,6 +2064,9 @@ simplify_vector_constructor (gimple_stmt - orig[1] = NULL; - conv_code = ERROR_MARK; - maybe_ident = true; -+ tree one_constant = NULL_TREE; -+ auto_vec constants; -+ constants.safe_grow_cleared (nelts); - FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (op), i, elt) - { - tree ref, op1; -@@ -2034,68 +2074,57 @@ simplify_vector_constructor (gimple_stmt - if (i >= nelts) - return false; - -- if (TREE_CODE (elt->value) != SSA_NAME) -- return false; -- def_stmt = get_prop_source_stmt (elt->value, false, NULL); -- if (!def_stmt) -- return false; -- code = gimple_assign_rhs_code (def_stmt); -- if (code == FLOAT_EXPR -- || code == FIX_TRUNC_EXPR) -+ op1 = get_bit_field_ref_def (elt->value, conv_code); -+ if (op1) - { -- op1 = gimple_assign_rhs1 (def_stmt); -- if (conv_code == ERROR_MARK) -+ ref = TREE_OPERAND (op1, 0); -+ unsigned int j; -+ for (j = 0; j < 2; ++j) - { -- if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (elt->value))), -- GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (op1))))) -- return false; -- conv_code = code; -+ if (!orig[j]) -+ { -+ if (TREE_CODE (ref) != SSA_NAME) -+ return false; -+ if (! VECTOR_TYPE_P (TREE_TYPE (ref)) -+ || ! useless_type_conversion_p (TREE_TYPE (op1), -+ TREE_TYPE (TREE_TYPE (ref)))) -+ return false; -+ if (j && !useless_type_conversion_p (TREE_TYPE (orig[0]), -+ TREE_TYPE (ref))) -+ return false; -+ orig[j] = ref; -+ break; -+ } -+ else if (ref == orig[j]) -+ break; - } -- else if (conv_code != code) -+ if (j == 2) - return false; -- if (TREE_CODE (op1) != SSA_NAME) -- return false; -- def_stmt = SSA_NAME_DEF_STMT (op1); -- if (! is_gimple_assign (def_stmt)) -+ -+ unsigned int elt; -+ if (maybe_ne (bit_field_size (op1), elem_size) -+ || !constant_multiple_p (bit_field_offset (op1), elem_size, &elt)) - return false; -- code = gimple_assign_rhs_code (def_stmt); -+ if (j) -+ elt += nelts; -+ if (elt != i) -+ maybe_ident = false; -+ sel.quick_push (elt); - } -- if (code != BIT_FIELD_REF) -- return false; -- op1 = gimple_assign_rhs1 (def_stmt); -- ref = TREE_OPERAND (op1, 0); -- unsigned int j; -- for (j = 0; j < 2; ++j) -+ else if (CONSTANT_CLASS_P (elt->value)) - { -- if (!orig[j]) -- { -- if (TREE_CODE (ref) != SSA_NAME) -- return false; -- if (! VECTOR_TYPE_P (TREE_TYPE (ref)) -- || ! useless_type_conversion_p (TREE_TYPE (op1), -- TREE_TYPE (TREE_TYPE (ref)))) -- return false; -- if (j && !useless_type_conversion_p (TREE_TYPE (orig[0]), -- TREE_TYPE (ref))) -- return false; -- orig[j] = ref; -- break; -- } -- else if (ref == orig[j]) -- break; -+ if (orig[1] -+ && orig[1] != error_mark_node) -+ return false; -+ orig[1] = error_mark_node; -+ if (!one_constant) -+ one_constant = elt->value; -+ constants[i] = elt->value; -+ sel.quick_push (i + nelts); -+ maybe_ident = false; - } -- if (j == 2) -- return false; -- -- unsigned int elt; -- if (maybe_ne (bit_field_size (op1), elem_size) -- || !constant_multiple_p (bit_field_offset (op1), elem_size, &elt)) -+ else - return false; -- if (j) -- elt += nelts; -- if (elt != i) -- maybe_ident = false; -- sel.quick_push (elt); - } - if (i < nelts) - return false; -@@ -2138,9 +2167,29 @@ simplify_vector_constructor (gimple_stmt - op2 = vec_perm_indices_to_tree (mask_type, indices); - if (!orig[1]) - orig[1] = orig[0]; -+ if (orig[1] == error_mark_node) -+ { -+ tree_vector_builder vec (type, nelts, 1); -+ for (unsigned i = 0; i < nelts; ++i) -+ if (constants[i]) -+ vec.quick_push (constants[i]); -+ else -+ /* ??? Push a don't-care value. */ -+ vec.quick_push (one_constant); -+ orig[1] = vec.build (); -+ } - if (conv_code == ERROR_MARK) - gimple_assign_set_rhs_with_ops (gsi, VEC_PERM_EXPR, orig[0], - orig[1], op2); -+ else if (TREE_CODE (orig[1]) == VECTOR_CST) -+ { -+ gimple *conv -+ = gimple_build_assign (make_ssa_name (type), conv_code, orig[0]); -+ orig[0] = gimple_assign_lhs (conv); -+ gsi_insert_before (gsi, conv, GSI_SAME_STMT); -+ gimple_assign_set_rhs_with_ops (gsi, VEC_PERM_EXPR, -+ orig[0], orig[1], op2); -+ } - else - { - gimple *perm diff --git a/fix-ICE-in-pass-vect.patch b/fix-ICE-in-pass-vect.patch deleted file mode 100644 index 38effd19c2c8adcdde2e0cb38ffe641bdea33b5e..0000000000000000000000000000000000000000 --- a/fix-ICE-in-pass-vect.patch +++ /dev/null @@ -1,37 +0,0 @@ -diff -uprN a/gcc/testsuite/gcc.target/aarch64/sve/slp_fix_1.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_fix_1.c ---- a/gcc/testsuite/gcc.target/aarch64/sve/slp_fix_1.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_fix_1.c 2020-11-17 02:38:45.284000000 +0800 -@@ -0,0 +1,19 @@ -+/* { dg-do compiler} */ -+/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=256 -funsafe-math-optimizations" } */ -+ -+long a, b; -+float c, e; -+float *d; -+void f() { -+ float g, h, i, j; -+ b = 0; -+ for (; b < a; b++) { -+ i = d[0]; -+ g = g + i * e; -+ j = d[1]; -+ h = h - j * e; -+ d = d + 2; -+ } -+ c = g; -+ e = h; -+} -diff -uprN a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c ---- a/gcc/tree-vect-slp.c 2020-11-16 10:59:36.000000000 +0800 -+++ b/gcc/tree-vect-slp.c 2020-11-16 23:30:19.560000000 +0800 -@@ -4140,8 +4140,8 @@ vect_schedule_slp_instance (slp_tree nod - gimple *vstmt; - vstmt = gimple_build_assign (make_ssa_name (vectype), - VEC_PERM_EXPR, -- gimple_assign_lhs (v0[j]->stmt), -- gimple_assign_lhs (v1[j]->stmt), -+ gimple_get_lhs (v0[j]->stmt), -+ gimple_get_lhs (v1[j]->stmt), - tmask); - SLP_TREE_VEC_STMTS (node).quick_push - (vect_finish_stmt_generation (stmt_info, vstmt, &si)); diff --git a/fix-ICE-in-reload.patch b/fix-ICE-in-reload.patch deleted file mode 100644 index e1cd079538625f2c749a5acf7420631b719dc985..0000000000000000000000000000000000000000 --- a/fix-ICE-in-reload.patch +++ /dev/null @@ -1,369 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-Remove-gimple_call_types_likely_match_p-PR-70929.patch -7313607478c11e9455a32fb0dbfd7867e04ea96a - -diff -uprN a/gcc/auto-profile.c b/gcc/auto-profile.c ---- a/gcc/auto-profile.c 2020-03-31 09:51:52.000000000 +0800 -+++ b/gcc/auto-profile.c 2020-07-28 11:15:31.469393370 +0800 -@@ -605,8 +605,6 @@ function_instance::find_icall_target_map - get_identifier (afdo_string_table->get_name (callee))); - if (node == NULL) - continue; -- if (!check_ic_target (stmt, node)) -- continue; - (*map)[callee] = iter->second->total_count (); - ret += iter->second->total_count (); - } -@@ -1033,7 +1031,7 @@ afdo_indirect_call (gimple_stmt_iterator - print_generic_expr (dump_file, direct_call->decl, TDF_SLIM); - } - -- if (direct_call == NULL || !check_ic_target (stmt, direct_call)) -+ if (direct_call == NULL) - { - if (dump_file) - fprintf (dump_file, " not transforming\n"); -diff -uprN a/gcc/cgraph.c b/gcc/cgraph.c ---- a/gcc/cgraph.c 2020-07-28 11:18:05.385393370 +0800 -+++ b/gcc/cgraph.c 2020-07-28 11:15:31.469393370 +0800 -@@ -876,19 +876,8 @@ symbol_table::create_edge (cgraph_node * - edge->can_throw_external - = call_stmt ? stmt_can_throw_external (DECL_STRUCT_FUNCTION (caller->decl), - call_stmt) : false; -- if (call_stmt -- && callee && callee->decl -- && !gimple_check_call_matching_types (call_stmt, callee->decl, -- false)) -- { -- edge->inline_failed = CIF_MISMATCHED_ARGUMENTS; -- edge->call_stmt_cannot_inline_p = true; -- } -- else -- { -- edge->inline_failed = CIF_FUNCTION_NOT_CONSIDERED; -- edge->call_stmt_cannot_inline_p = false; -- } -+ edge->inline_failed = CIF_FUNCTION_NOT_CONSIDERED; -+ edge->call_stmt_cannot_inline_p = false; - - edge->indirect_info = NULL; - edge->indirect_inlining_edge = 0; -@@ -1253,13 +1242,6 @@ cgraph_edge::make_direct (cgraph_node *c - /* Insert to callers list of the new callee. */ - edge->set_callee (callee); - -- if (call_stmt -- && !gimple_check_call_matching_types (call_stmt, callee->decl, false)) -- { -- call_stmt_cannot_inline_p = true; -- inline_failed = CIF_MISMATCHED_ARGUMENTS; -- } -- - /* We need to re-determine the inlining status of the edge. */ - initialize_inline_failed (edge); - return edge; -@@ -1288,28 +1270,9 @@ cgraph_edge::redirect_call_stmt_to_calle - substitution), forget about speculating. */ - if (decl) - e = e->resolve_speculation (decl); -- /* If types do not match, speculation was likely wrong. -- The direct edge was possibly redirected to the clone with a different -- signature. We did not update the call statement yet, so compare it -- with the reference that still points to the proper type. */ -- else if (!gimple_check_call_matching_types (e->call_stmt, -- ref->referred->decl, -- true)) -- { -- if (dump_file) -- fprintf (dump_file, "Not expanding speculative call of %s -> %s\n" -- "Type mismatch.\n", -- e->caller->dump_name (), -- e->callee->dump_name ()); -- e = e->resolve_speculation (); -- /* We are producing the final function body and will throw away the -- callgraph edges really soon. Reset the counts/frequencies to -- keep verifier happy in the case of roundoff errors. */ -- e->count = gimple_bb (e->call_stmt)->count; -- } -- /* Expand speculation into GIMPLE code. */ - else - { -+ /* Expand speculation into GIMPLE code. */ - if (dump_file) - { - fprintf (dump_file, -@@ -3664,102 +3627,6 @@ cgraph_node::get_fun (void) - return fun; - } - --/* Verify if the type of the argument matches that of the function -- declaration. If we cannot verify this or there is a mismatch, -- return false. */ -- --static bool --gimple_check_call_args (gimple *stmt, tree fndecl, bool args_count_match) --{ -- tree parms, p; -- unsigned int i, nargs; -- -- /* Calls to internal functions always match their signature. */ -- if (gimple_call_internal_p (stmt)) -- return true; -- -- nargs = gimple_call_num_args (stmt); -- -- /* Get argument types for verification. */ -- if (fndecl) -- parms = TYPE_ARG_TYPES (TREE_TYPE (fndecl)); -- else -- parms = TYPE_ARG_TYPES (gimple_call_fntype (stmt)); -- -- /* Verify if the type of the argument matches that of the function -- declaration. If we cannot verify this or there is a mismatch, -- return false. */ -- if (fndecl && DECL_ARGUMENTS (fndecl)) -- { -- for (i = 0, p = DECL_ARGUMENTS (fndecl); -- i < nargs; -- i++, p = DECL_CHAIN (p)) -- { -- tree arg; -- /* We cannot distinguish a varargs function from the case -- of excess parameters, still deferring the inlining decision -- to the callee is possible. */ -- if (!p) -- break; -- arg = gimple_call_arg (stmt, i); -- if (p == error_mark_node -- || DECL_ARG_TYPE (p) == error_mark_node -- || arg == error_mark_node -- || (!types_compatible_p (DECL_ARG_TYPE (p), TREE_TYPE (arg)) -- && !fold_convertible_p (DECL_ARG_TYPE (p), arg))) -- return false; -- } -- if (args_count_match && p) -- return false; -- } -- else if (parms) -- { -- for (i = 0, p = parms; i < nargs; i++, p = TREE_CHAIN (p)) -- { -- tree arg; -- /* If this is a varargs function defer inlining decision -- to callee. */ -- if (!p) -- break; -- arg = gimple_call_arg (stmt, i); -- if (TREE_VALUE (p) == error_mark_node -- || arg == error_mark_node -- || TREE_CODE (TREE_VALUE (p)) == VOID_TYPE -- || (!types_compatible_p (TREE_VALUE (p), TREE_TYPE (arg)) -- && !fold_convertible_p (TREE_VALUE (p), arg))) -- return false; -- } -- } -- else -- { -- if (nargs != 0) -- return false; -- } -- return true; --} -- --/* Verify if the type of the argument and lhs of CALL_STMT matches -- that of the function declaration CALLEE. If ARGS_COUNT_MATCH is -- true, the arg count needs to be the same. -- If we cannot verify this or there is a mismatch, return false. */ -- --bool --gimple_check_call_matching_types (gimple *call_stmt, tree callee, -- bool args_count_match) --{ -- tree lhs; -- -- if ((DECL_RESULT (callee) -- && !DECL_BY_REFERENCE (DECL_RESULT (callee)) -- && (lhs = gimple_call_lhs (call_stmt)) != NULL_TREE -- && !useless_type_conversion_p (TREE_TYPE (DECL_RESULT (callee)), -- TREE_TYPE (lhs)) -- && !fold_convertible_p (TREE_TYPE (DECL_RESULT (callee)), lhs)) -- || !gimple_check_call_args (call_stmt, callee, args_count_match)) -- return false; -- return true; --} -- - /* Reset all state within cgraph.c so that we can rerun the compiler - within the same process. For use by toplev::finalize. */ - -diff -uprN a/gcc/cgraph.h b/gcc/cgraph.h ---- a/gcc/cgraph.h 2020-07-28 11:18:04.361393370 +0800 -+++ b/gcc/cgraph.h 2020-07-28 11:15:31.469393370 +0800 -@@ -2412,8 +2412,6 @@ bool cgraph_function_possibly_inlined_p - const char* cgraph_inline_failed_string (cgraph_inline_failed_t); - cgraph_inline_failed_type_t cgraph_inline_failed_type (cgraph_inline_failed_t); - --extern bool gimple_check_call_matching_types (gimple *, tree, bool); -- - /* In cgraphunit.c */ - void cgraphunit_c_finalize (void); - -diff -uprN a/gcc/cif-code.def b/gcc/cif-code.def ---- a/gcc/cif-code.def 2020-03-31 09:51:52.000000000 +0800 -+++ b/gcc/cif-code.def 2020-07-28 11:15:31.469393370 +0800 -@@ -88,10 +88,6 @@ DEFCIFCODE(NOT_DECLARED_INLINED, CIF_FIN - N_("function not declared inline and code size would grow")) - - /* Caller and callee disagree on the arguments. */ --DEFCIFCODE(MISMATCHED_ARGUMENTS, CIF_FINAL_ERROR, -- N_("mismatched arguments")) -- --/* Caller and callee disagree on the arguments. */ - DEFCIFCODE(LTO_MISMATCHED_DECLARATIONS, CIF_FINAL_ERROR, - N_("mismatched declarations during linktime optimization")) - -diff -uprN a/gcc/ipa-inline.c b/gcc/ipa-inline.c ---- a/gcc/ipa-inline.c 2020-07-28 11:18:04.377393370 +0800 -+++ b/gcc/ipa-inline.c 2020-07-28 11:15:31.469393370 +0800 -@@ -2844,14 +2844,6 @@ early_inliner (function *fun) - = estimate_num_insns (edge->call_stmt, &eni_size_weights); - es->call_stmt_time - = estimate_num_insns (edge->call_stmt, &eni_time_weights); -- -- if (edge->callee->decl -- && !gimple_check_call_matching_types ( -- edge->call_stmt, edge->callee->decl, false)) -- { -- edge->inline_failed = CIF_MISMATCHED_ARGUMENTS; -- edge->call_stmt_cannot_inline_p = true; -- } - } - if (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS) - 1) - ipa_update_overall_fn_summary (node); -diff -uprN a/gcc/ipa-prop.c b/gcc/ipa-prop.c ---- a/gcc/ipa-prop.c 2020-07-28 11:18:04.377393370 +0800 -+++ b/gcc/ipa-prop.c 2020-07-28 11:15:31.469393370 +0800 -@@ -3841,11 +3841,6 @@ update_indirect_edges_after_inlining (st - else if (new_direct_edge) - { - new_direct_edge->indirect_inlining_edge = 1; -- if (new_direct_edge->call_stmt) -- new_direct_edge->call_stmt_cannot_inline_p -- = !gimple_check_call_matching_types ( -- new_direct_edge->call_stmt, -- new_direct_edge->callee->decl, false); - if (new_edges) - { - new_edges->safe_push (new_direct_edge); -diff -uprN a/gcc/testsuite/gcc.dg/winline-10.c b/gcc/testsuite/gcc.dg/winline-10.c ---- a/gcc/testsuite/gcc.dg/winline-10.c 2020-03-31 09:51:43.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/winline-10.c 2020-07-28 11:15:31.473393370 +0800 -@@ -1,9 +1,9 @@ - /* { dg-do compile } */ --/* { dg-options "-O2 -Winline" } */ -+/* { dg-options "-O2 -Winline -fopt-info-optimized-inline=stderr" } */ - - struct s { int a; }; - --inline void f (x) /* { dg-warning "inlining .* mismatched arg" } */ -+inline void f (x) - int x; - { - asm (""); -@@ -11,7 +11,7 @@ inline void f (x) /* { dg-warning "inlin - - void g (struct s x) - { -- f (x); /* { dg-message "called from here" } */ -+ f (x); /* { dg-optimized "Inlining f.* into g" } */ - } - - void f (int x); /* { dg-warning "follows non-prototype definition" } */ -diff -uprN a/gcc/testsuite/g++.dg/lto/pr70929_0.C b/gcc/testsuite/g++.dg/lto/pr70929_0.C ---- a/gcc/testsuite/g++.dg/lto/pr70929_0.C 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/g++.dg/lto/pr70929_0.C 2020-07-28 11:15:31.469393370 +0800 -@@ -0,0 +1,18 @@ -+// { dg-lto-do run } -+// { dg-lto-options { "-O3 -flto" } } -+ -+struct s -+{ -+ int a; -+ s() {a=1;} -+ ~s() {} -+}; -+int t(struct s s); -+int main() -+{ -+ s s; -+ int v=t(s); -+ if (!__builtin_constant_p (v)) -+ __builtin_abort (); -+ return 0; -+} -diff -uprN a/gcc/testsuite/g++.dg/lto/pr70929_1.C b/gcc/testsuite/g++.dg/lto/pr70929_1.C ---- a/gcc/testsuite/g++.dg/lto/pr70929_1.C 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/g++.dg/lto/pr70929_1.C 2020-07-28 11:15:31.473393370 +0800 -@@ -0,0 +1,10 @@ -+struct s -+{ -+ int a; -+ s() {a=1;} -+ ~s() {} -+}; -+int t(struct s s) -+{ -+ return s.a; -+} -diff -uprN a/gcc/value-prof.c b/gcc/value-prof.c ---- a/gcc/value-prof.c 2020-03-31 09:51:30.000000000 +0800 -+++ b/gcc/value-prof.c 2020-07-28 11:17:08.281393370 +0800 -@@ -1249,25 +1249,6 @@ find_func_by_profile_id (int profile_id) - return NULL; - } - --/* Perform sanity check on the indirect call target. Due to race conditions, -- false function target may be attributed to an indirect call site. If the -- call expression type mismatches with the target function's type, expand_call -- may ICE. Here we only do very minimal sanity check just to make compiler happy. -- Returns true if TARGET is considered ok for call CALL_STMT. */ -- --bool --check_ic_target (gcall *call_stmt, struct cgraph_node *target) --{ -- if (gimple_check_call_matching_types (call_stmt, target->decl, true)) -- return true; -- -- if (dump_enabled_p ()) -- dump_printf_loc (MSG_MISSED_OPTIMIZATION, call_stmt, -- "Skipping target %s with mismatching types for icall\n", -- target->name ()); -- return false; --} -- - /* Do transformation - - if (actual_callee_address == address_of_most_common_function/method) -@@ -1473,21 +1454,6 @@ gimple_ic_transform (gimple_stmt_iterato - return false; - } - -- if (!check_ic_target (stmt, direct_call)) -- { -- if (dump_file) -- { -- fprintf (dump_file, "Indirect call -> direct call "); -- print_generic_expr (dump_file, gimple_call_fn (stmt), TDF_SLIM); -- fprintf (dump_file, "=> "); -- print_generic_expr (dump_file, direct_call->decl, TDF_SLIM); -- fprintf (dump_file, " transformation skipped because of type mismatch"); -- print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); -- } -- gimple_remove_histogram_value (cfun, stmt, histogram); -- return false; -- } -- - if (dump_file) - { - fprintf (dump_file, "Indirect call -> direct call "); diff --git a/fix-ICE-in-store_constructor.patch b/fix-ICE-in-store_constructor.patch deleted file mode 100644 index 98cae504ff1258765e70bfb1f7f7bb9fc44c784f..0000000000000000000000000000000000000000 --- a/fix-ICE-in-store_constructor.patch +++ /dev/null @@ -1,356 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-SLP-SLP-vectorization-vectorize-vector-constructors.patch -818b3293f4545d899148810f4f7d676b81e989dd - -diff -N -urp a/gcc/expr.c b/gcc/expr.c ---- a/gcc/expr.c 2020-07-24 11:19:53.840000000 +0800 -+++ b/gcc/expr.c 2020-07-24 11:56:50.128000000 +0800 -@@ -6788,6 +6788,7 @@ store_constructor (tree exp, rtx target, - && n_elts.is_constant (&const_n_elts)) - { - machine_mode emode = eltmode; -+ bool vector_typed_elts_p = false; - - if (CONSTRUCTOR_NELTS (exp) - && (TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (exp, 0)->value)) -@@ -6798,13 +6799,14 @@ store_constructor (tree exp, rtx target, - * TYPE_VECTOR_SUBPARTS (etype), - n_elts)); - emode = TYPE_MODE (etype); -+ vector_typed_elts_p = true; - } - icode = convert_optab_handler (vec_init_optab, mode, emode); - if (icode != CODE_FOR_nothing) - { - unsigned int i, n = const_n_elts; - -- if (emode != eltmode) -+ if (vector_typed_elts_p) - { - n = CONSTRUCTOR_NELTS (exp); - vec_vec_init_p = true; -diff -N -urp a/gcc/testsuite/gcc.dg/vect/bb-slp-40.c b/gcc/testsuite/gcc.dg/vect/bb-slp-40.c ---- a/gcc/testsuite/gcc.dg/vect/bb-slp-40.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-40.c 2020-07-24 11:56:50.128000000 +0800 -@@ -0,0 +1,34 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O3 -fdump-tree-slp-all" } */ -+/* { dg-require-effective-target vect_int } */ -+ -+char g_d[1024], g_s1[1024], g_s2[1024]; -+void foo(void) -+{ -+ char *d = g_d, *s1 = g_s1, *s2 = g_s2; -+ -+ for ( int y = 0; y < 128; y++ ) -+ { -+ d[0 ] = s1[0 ] + s2[0 ]; -+ d[1 ] = s1[1 ] + s2[1 ]; -+ d[2 ] = s1[2 ] + s2[2 ]; -+ d[3 ] = s1[3 ] + s2[3 ]; -+ d[4 ] = s1[4 ] + s2[4 ]; -+ d[5 ] = s1[5 ] + s2[5 ]; -+ d[6 ] = s1[6 ] + s2[6 ]; -+ d[7 ] = s1[7 ] + s2[7 ]; -+ d[8 ] = s1[8 ] + s2[8 ]; -+ d[9 ] = s1[9 ] + s2[9 ]; -+ d[10] = s1[10] + s2[10]; -+ d[11] = s1[11] + s2[11]; -+ d[12] = s1[12] + s2[12]; -+ d[13] = s1[13] + s2[13]; -+ d[14] = s1[14] + s2[14]; -+ d[15] = s1[15] + s2[15]; -+ d += 16; -+ } -+} -+ -+/* See that we vectorize an SLP instance. */ -+/* { dg-final { scan-tree-dump-times "Found vectorizable constructor" 1 "slp1" } } */ -+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "slp1" } } */ -diff -N -urp a/gcc/testsuite/gcc.dg/vect/bb-slp-41.c b/gcc/testsuite/gcc.dg/vect/bb-slp-41.c ---- a/gcc/testsuite/gcc.dg/vect/bb-slp-41.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-41.c 2020-07-24 11:56:50.128000000 +0800 -@@ -0,0 +1,61 @@ -+/* { dg-do run } */ -+/* { dg-options "-O3 -fdump-tree-slp-all -fno-vect-cost-model" } */ -+/* { dg-require-effective-target vect_int } */ -+ -+#define ARR_SIZE 1000 -+ -+void foo (int *a, int *b) -+{ -+ int i; -+ for (i = 0; i < (ARR_SIZE - 2); ++i) -+ a[i] = b[0] + b[1] + b[i+1] + b[i+2]; -+} -+ -+void bar (int *a, int *b) -+{ -+ int i; -+ for (i = 0; i < (ARR_SIZE - 2); ++i) -+ { -+ a[i] = b[0]; -+ } -+ for (i = 0; i < (ARR_SIZE - 2); ++i) -+ { -+ a[i] = a[i] + b[1]; -+ } -+ for (i = 0; i < (ARR_SIZE - 2); ++i) -+ { -+ a[i] = a[i] + b[i+1]; -+ } -+ for (i = 0; i < (ARR_SIZE - 2); ++i) -+ { -+ a[i] = a[i] + b[i+2]; -+ } -+} -+ -+int main () -+{ -+ int a1[ARR_SIZE]; -+ int a2[ARR_SIZE]; -+ int b[ARR_SIZE]; -+ int i; -+ -+ for (i = 0; i < ARR_SIZE; i++) -+ { -+ a1[i] = 0; -+ a2[i] = 0; -+ b[i] = i; -+ } -+ -+ foo (a1, b); -+ bar (a2, b); -+ -+ for (i = 0; i < ARR_SIZE; i++) -+ if (a1[i] != a2[i]) -+ return 1; -+ -+ return 0; -+ -+} -+/* See that we vectorize an SLP instance. */ -+/* { dg-final { scan-tree-dump-times "Found vectorizable constructor" 12 "slp1" } } */ -+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "slp1" } } */ -diff -N -urp a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h ---- a/gcc/tree-vectorizer.h 2020-07-24 11:19:51.976000000 +0800 -+++ b/gcc/tree-vectorizer.h 2020-07-24 11:56:50.132000000 +0800 -@@ -151,6 +151,10 @@ typedef struct _slp_instance { - /* The root of SLP tree. */ - slp_tree root; - -+ /* For vector constructors, the constructor stmt that the SLP tree is built -+ from, NULL otherwise. */ -+ stmt_vec_info root_stmt; -+ - /* Size of groups of scalar stmts that will be replaced by SIMD stmt/s. */ - unsigned int group_size; - -@@ -170,6 +174,7 @@ typedef struct _slp_instance { - #define SLP_INSTANCE_GROUP_SIZE(S) (S)->group_size - #define SLP_INSTANCE_UNROLLING_FACTOR(S) (S)->unrolling_factor - #define SLP_INSTANCE_LOADS(S) (S)->loads -+#define SLP_INSTANCE_ROOT_STMT(S) (S)->root_stmt - - #define SLP_TREE_CHILDREN(S) (S)->children - #define SLP_TREE_SCALAR_STMTS(S) (S)->stmts -diff -N -urp a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c ---- a/gcc/tree-vect-slp.c 2020-07-24 11:19:51.980000000 +0800 -+++ b/gcc/tree-vect-slp.c 2020-07-24 11:56:50.132000000 +0800 -@@ -2019,6 +2019,7 @@ vect_analyze_slp_instance (vec_info *vin - unsigned int i; - struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); - vec scalar_stmts; -+ bool constructor = false; - - if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) - { -@@ -2032,6 +2033,13 @@ vect_analyze_slp_instance (vec_info *vin - vectype = STMT_VINFO_VECTYPE (stmt_info); - group_size = REDUC_GROUP_SIZE (stmt_info); - } -+ else if (is_gimple_assign (stmt_info->stmt) -+ && gimple_assign_rhs_code (stmt_info->stmt) == CONSTRUCTOR) -+ { -+ vectype = TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)); -+ group_size = CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt_info->stmt)); -+ constructor = true; -+ } - else - { - gcc_assert (is_a (vinfo)); -@@ -2079,6 +2087,25 @@ vect_analyze_slp_instance (vec_info *vin - STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) - = STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts.last ())); - } -+ else if (constructor) -+ { -+ tree rhs = gimple_assign_rhs1 (stmt_info->stmt); -+ tree val; -+ FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (rhs), i, val) -+ { -+ if (TREE_CODE (val) == SSA_NAME) -+ { -+ gimple* def = SSA_NAME_DEF_STMT (val); -+ stmt_vec_info def_info = vinfo->lookup_stmt (def); -+ /* Value is defined in another basic block. */ -+ if (!def_info) -+ return false; -+ scalar_stmts.safe_push (def_info); -+ } -+ else -+ return false; -+ } -+ } - else - { - /* Collect reduction statements. */ -@@ -2164,6 +2191,8 @@ vect_analyze_slp_instance (vec_info *vin - SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size; - SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor; - SLP_INSTANCE_LOADS (new_instance) = vNULL; -+ SLP_INSTANCE_ROOT_STMT (new_instance) = constructor ? stmt_info : NULL; -+ - vect_gather_slp_loads (new_instance, node); - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, -@@ -3032,6 +3061,43 @@ vect_bb_vectorization_profitable_p (bb_v - return true; - } - -+/* Find any vectorizable constructors and add them to the grouped_store -+ array. */ -+ -+static void -+vect_slp_check_for_constructors (bb_vec_info bb_vinfo) -+{ -+ gimple_stmt_iterator gsi; -+ -+ for (gsi = bb_vinfo->region_begin; -+ gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi)) -+ { -+ gimple *stmt = gsi_stmt (gsi); -+ -+ if (is_gimple_assign (stmt) -+ && gimple_assign_rhs_code (stmt) == CONSTRUCTOR -+ && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME -+ && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE) -+ { -+ tree rhs = gimple_assign_rhs1 (stmt); -+ -+ if (CONSTRUCTOR_NELTS (rhs) == 0) -+ continue; -+ -+ poly_uint64 subparts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (rhs)); -+ -+ if (maybe_ne (subparts, CONSTRUCTOR_NELTS (rhs))) -+ continue; -+ -+ if (dump_enabled_p ()) -+ dump_printf_loc (MSG_NOTE, vect_location, -+ "Found vectorizable constructor: %G\n", stmt); -+ stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (stmt); -+ BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt_info); -+ } -+ } -+} -+ - /* Check if the region described by BB_VINFO can be vectorized, returning - true if so. When returning false, set FATAL to true if the same failure - would prevent vectorization at other vector sizes, false if it is still -@@ -3079,6 +3145,8 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vi - return false; - } - -+ vect_slp_check_for_constructors (bb_vinfo); -+ - /* If there are no grouped stores in the region there is no need - to continue with pattern recog as vect_analyze_slp will fail - anyway. */ -@@ -3135,6 +3203,8 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vi - relevant. */ - vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance)); - vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance)); -+ if (SLP_INSTANCE_ROOT_STMT (instance)) -+ STMT_SLP_TYPE (SLP_INSTANCE_ROOT_STMT (instance)) = pure_slp; - - i++; - } -@@ -4175,6 +4245,49 @@ vect_remove_slp_scalar_calls (slp_tree n - vect_remove_slp_scalar_calls (node, visited); - } - -+/* Vectorize the instance root. */ -+ -+void -+vectorize_slp_instance_root_stmt (slp_tree node, slp_instance instance) -+{ -+ gassign *rstmt; -+ -+ if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) == 1) -+ { -+ stmt_vec_info child_stmt_info; -+ int j; -+ -+ FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node), j, child_stmt_info) -+ { -+ tree vect_lhs = gimple_get_lhs (child_stmt_info->stmt); -+ tree root_lhs = gimple_get_lhs (instance->root_stmt->stmt); -+ rstmt = gimple_build_assign (root_lhs, vect_lhs); -+ break; -+ } -+ } -+ else if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) > 1) -+ { -+ int nelts = SLP_TREE_NUMBER_OF_VEC_STMTS (node); -+ stmt_vec_info child_stmt_info; -+ int j; -+ vec *v; -+ vec_alloc (v, nelts); -+ -+ FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node), j, child_stmt_info) -+ { -+ CONSTRUCTOR_APPEND_ELT (v, -+ NULL_TREE, -+ gimple_get_lhs (child_stmt_info->stmt)); -+ } -+ tree lhs = gimple_get_lhs (instance->root_stmt->stmt); -+ tree rtype = TREE_TYPE (gimple_assign_rhs1 (instance->root_stmt->stmt)); -+ tree r_constructor = build_constructor (rtype, v); -+ rstmt = gimple_build_assign (lhs, r_constructor); -+ } -+ gimple_stmt_iterator rgsi = gsi_for_stmt (instance->root_stmt->stmt); -+ gsi_replace (&rgsi, rstmt, true); -+} -+ - /* Generate vector code for all SLP instances in the loop/basic block. */ - - void -@@ -4189,9 +4302,13 @@ vect_schedule_slp (vec_info *vinfo) - slp_instances = vinfo->slp_instances; - FOR_EACH_VEC_ELT (slp_instances, i, instance) - { -+ slp_tree node = SLP_INSTANCE_TREE (instance); - /* Schedule the tree of INSTANCE. */ -- vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance), -- instance, bst_map); -+ vect_schedule_slp_instance (node, instance, bst_map); -+ -+ if (SLP_INSTANCE_ROOT_STMT (instance)) -+ vectorize_slp_instance_root_stmt (node, instance); -+ - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, - "vectorizing stmts using SLP.\n"); -@@ -4220,6 +4337,9 @@ vect_schedule_slp (vec_info *vinfo) - if (!STMT_VINFO_DATA_REF (store_info)) - break; - -+ if (SLP_INSTANCE_ROOT_STMT (instance)) -+ continue; -+ - store_info = vect_orig_stmt (store_info); - /* Free the attached stmt_vec_info and remove the stmt. */ - vinfo->remove_stmt (store_info); diff --git a/fix-ICE-in-vec.patch b/fix-ICE-in-vec.patch deleted file mode 100644 index 30d1c7eb478d6db448f31f79af42ec38e765b119..0000000000000000000000000000000000000000 --- a/fix-ICE-in-vec.patch +++ /dev/null @@ -1,93 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-re-PR-tree-optimization-92345-ICE-in-vec-_stmt_vec_i.patch -a6ba623777513e31721030092e4d786f461a0f06 - -diff -Nurp a/gcc/testsuite/gcc.dg/torture/pr92345.c b/gcc/testsuite/gcc.dg/torture/pr92345.c ---- a/gcc/testsuite/gcc.dg/torture/pr92345.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/torture/pr92345.c 2020-08-10 15:08:19.992000000 +0800 -@@ -0,0 +1,18 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-ftree-vectorize" } */ -+ -+long int x1; -+int fr; -+ -+int -+us (int sk, int jx) -+{ -+ while (sk < 1) -+ { -+ jx *= 2; -+ fr += x1 + 1; -+ ++sk; -+ } -+ -+ return jx; -+} -diff -Nurp a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c ---- a/gcc/tree-vect-loop.c 2020-08-10 15:07:44.456000000 +0800 -+++ b/gcc/tree-vect-loop.c 2020-08-10 15:08:19.992000000 +0800 -@@ -155,7 +155,7 @@ along with GCC; see the file COPYING3. - - static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *); - static stmt_vec_info vect_is_simple_reduction (loop_vec_info, stmt_vec_info, -- bool *); -+ bool *, bool *); - - /* Subroutine of vect_determine_vf_for_stmt that handles only one - statement. VECTYPE_MAYBE_SET_P is true if STMT_VINFO_VECTYPE -@@ -489,7 +489,7 @@ vect_analyze_scalar_cycles_1 (loop_vec_i - tree init, step; - auto_vec worklist; - gphi_iterator gsi; -- bool double_reduc; -+ bool double_reduc, reduc_chain; - - DUMP_VECT_SCOPE ("vect_analyze_scalar_cycles"); - -@@ -561,7 +561,8 @@ vect_analyze_scalar_cycles_1 (loop_vec_i - && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type); - - stmt_vec_info reduc_stmt_info -- = vect_is_simple_reduction (loop_vinfo, stmt_vinfo, &double_reduc); -+ = vect_is_simple_reduction (loop_vinfo, stmt_vinfo, &double_reduc, -+ &reduc_chain); - if (reduc_stmt_info) - { - STMT_VINFO_REDUC_DEF (stmt_vinfo) = reduc_stmt_info; -@@ -596,7 +597,7 @@ vect_analyze_scalar_cycles_1 (loop_vec_i - /* Store the reduction cycles for possible vectorization in - loop-aware SLP if it was not detected as reduction - chain. */ -- if (! REDUC_GROUP_FIRST_ELEMENT (reduc_stmt_info)) -+ if (! reduc_chain) - LOOP_VINFO_REDUCTIONS (loop_vinfo).safe_push - (reduc_stmt_info); - } -@@ -3032,7 +3033,7 @@ check_reduction_path (dump_user_location - - static stmt_vec_info - vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info, -- bool *double_reduc) -+ bool *double_reduc, bool *reduc_chain_p) - { - gphi *phi = as_a (phi_info->stmt); - gimple *phi_use_stmt = NULL; -@@ -3040,6 +3041,7 @@ vect_is_simple_reduction (loop_vec_info - use_operand_p use_p; - - *double_reduc = false; -+ *reduc_chain_p = false; - STMT_VINFO_REDUC_TYPE (phi_info) = TREE_CODE_REDUCTION; - - tree phi_name = PHI_RESULT (phi); -@@ -3214,6 +3216,7 @@ vect_is_simple_reduction (loop_vec_info - LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push (reduc_chain[0]); - REDUC_GROUP_SIZE (reduc_chain[0]) = reduc_chain.length (); - -+ *reduc_chain_p = true; - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, - "reduction: detected reduction chain\n"); diff --git a/fix-ICE-in-vect.patch b/fix-ICE-in-vect.patch deleted file mode 100644 index 3b59dd2960b4a56c9eb888c89b78deb1ca0d8a50..0000000000000000000000000000000000000000 --- a/fix-ICE-in-vect.patch +++ /dev/null @@ -1,1037 +0,0 @@ -This backport contains 5 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-Improve-tree-vect-patterns.c-handling-of-boolean-com.patch -ce19a4822794992097deab96bf15bf78ff481ea1 -0002-Make-vectorizable_operation-punt-early-on-codes-it-d.patch -4177e933b309408e69eb5561fee7a3cc5e6f8899 -0003-Make-vect_get_mask_type_for_stmt-take-a-group-size.patch -1c5d68a677b076262c5508e6d4fbdb765cba2d2f -0004-Record-the-vector-mask-precision-in-stmt_vec_info.patch -0c3ea6b3424ee4d32d97ca5d7453891b587b3132 -0005-Don-t-defer-choice-of-vector-type-for-bools-PR-92596.patch -02d895504cc59be06fc3f7ec0cfd4eb160561211 - -diff -Nurp a/gcc/testsuite/gcc.dg/vect/bb-slp-43.c b/gcc/testsuite/gcc.dg/vect/bb-slp-43.c ---- a/gcc/testsuite/gcc.dg/vect/bb-slp-43.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-43.c 2021-02-08 09:21:04.487633230 +0800 -@@ -0,0 +1,17 @@ -+/* { dg-do compile } */ -+ -+void -+f (int *restrict x, short *restrict y) -+{ -+ x[0] = x[0] == 1 & y[0] == 2; -+ x[1] = x[1] == 1 & y[1] == 2; -+ x[2] = x[2] == 1 & y[2] == 2; -+ x[3] = x[3] == 1 & y[3] == 2; -+ x[4] = x[4] == 1 & y[4] == 2; -+ x[5] = x[5] == 1 & y[5] == 2; -+ x[6] = x[6] == 1 & y[6] == 2; -+ x[7] = x[7] == 1 & y[7] == 2; -+} -+ -+/* { dg-final { scan-tree-dump-not "mixed mask and nonmask" "slp2" } } */ -+/* { dg-final { scan-tree-dump-not "vector operands from scalars" "slp2" { target { { vect_int && vect_bool_cmp } && { vect_unpack && vect_hw_misalign } } xfail vect_variable_length } } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/vect/bb-slp-pr92596.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr92596.c ---- a/gcc/testsuite/gcc.dg/vect/bb-slp-pr92596.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr92596.c 2021-02-08 09:21:04.487633230 +0800 -@@ -0,0 +1,14 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-O3" } */ -+ -+typedef struct { -+ long n[5]; -+} secp256k1_fe; -+ -+secp256k1_fe a; -+ -+void fn1(int p1) { a.n[0] = a.n[1] = a.n[2] = p1; } -+void fn2() { -+ int b; -+ fn1(!b); -+} -diff -Nurp a/gcc/testsuite/gcc.dg/vect/vect-bool-cmp-2.c b/gcc/testsuite/gcc.dg/vect/vect-bool-cmp-2.c ---- a/gcc/testsuite/gcc.dg/vect/vect-bool-cmp-2.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/vect/vect-bool-cmp-2.c 2021-02-08 09:21:04.487633230 +0800 -@@ -0,0 +1,10 @@ -+/* { dg-do compile } */ -+ -+void -+f (_Bool *restrict x, _Bool *restrict y) -+{ -+ for (int i = 0; i < 128; ++i) -+ x[i] = x[i] == y[i]; -+} -+ -+/* { dg-final { scan-tree-dump "loop vectorized" "vect" { target vect_bool_cmp } } } */ -diff -Nurp a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp ---- a/gcc/testsuite/lib/target-supports.exp 2021-02-08 09:24:27.611633230 +0800 -+++ b/gcc/testsuite/lib/target-supports.exp 2021-02-08 09:21:03.859633230 +0800 -@@ -5668,6 +5668,16 @@ proc check_effective_target_vect_bswap { - || [istarget amdgcn-*-*] }}] - } - -+# Return 1 if the target supports comparison of bool vectors for at -+# least one vector length. -+ -+proc check_effective_target_vect_bool_cmp { } { -+ return [check_cached_effective_target_indexed vect_bool_cmp { -+ expr { [istarget i?86-*-*] || [istarget x86_64-*-*] -+ || [istarget aarch64*-*-*] -+ || [is-effective-target arm_neon] }}] -+} -+ - # Return 1 if the target supports hardware vector shift operation for char. - - proc check_effective_target_vect_shift_char { } { -diff -Nurp a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c ---- a/gcc/tree-vect-loop.c 2021-02-08 09:24:26.471633230 +0800 -+++ b/gcc/tree-vect-loop.c 2021-02-08 09:21:02.719633230 +0800 -@@ -164,8 +164,7 @@ static stmt_vec_info vect_is_simple_redu - static opt_result - vect_determine_vf_for_stmt_1 (stmt_vec_info stmt_info, - bool vectype_maybe_set_p, -- poly_uint64 *vf, -- vec *mask_producers) -+ poly_uint64 *vf) - { - gimple *stmt = stmt_info->stmt; - -@@ -193,8 +192,6 @@ vect_determine_vf_for_stmt_1 (stmt_vec_i - gcc_assert ((STMT_VINFO_DATA_REF (stmt_info) - || vectype_maybe_set_p) - && STMT_VINFO_VECTYPE (stmt_info) == stmt_vectype); -- else if (stmt_vectype == boolean_type_node) -- mask_producers->safe_push (stmt_info); - else - STMT_VINFO_VECTYPE (stmt_info) = stmt_vectype; - } -@@ -207,21 +204,17 @@ vect_determine_vf_for_stmt_1 (stmt_vec_i - - /* Subroutine of vect_determine_vectorization_factor. Set the vector - types of STMT_INFO and all attached pattern statements and update -- the vectorization factor VF accordingly. If some of the statements -- produce a mask result whose vector type can only be calculated later, -- add them to MASK_PRODUCERS. Return true on success or false if -- something prevented vectorization. */ -+ the vectorization factor VF accordingly. Return true on success -+ or false if something prevented vectorization. */ - - static opt_result --vect_determine_vf_for_stmt (stmt_vec_info stmt_info, poly_uint64 *vf, -- vec *mask_producers) -+vect_determine_vf_for_stmt (stmt_vec_info stmt_info, poly_uint64 *vf) - { - vec_info *vinfo = stmt_info->vinfo; - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G", - stmt_info->stmt); -- opt_result res -- = vect_determine_vf_for_stmt_1 (stmt_info, false, vf, mask_producers); -+ opt_result res = vect_determine_vf_for_stmt_1 (stmt_info, false, vf); - if (!res) - return res; - -@@ -240,10 +233,7 @@ vect_determine_vf_for_stmt (stmt_vec_inf - dump_printf_loc (MSG_NOTE, vect_location, - "==> examining pattern def stmt: %G", - def_stmt_info->stmt); -- if (!vect_determine_vf_for_stmt_1 (def_stmt_info, true, -- vf, mask_producers)) -- res = vect_determine_vf_for_stmt_1 (def_stmt_info, true, -- vf, mask_producers); -+ res = vect_determine_vf_for_stmt_1 (def_stmt_info, true, vf); - if (!res) - return res; - } -@@ -252,7 +242,7 @@ vect_determine_vf_for_stmt (stmt_vec_inf - dump_printf_loc (MSG_NOTE, vect_location, - "==> examining pattern statement: %G", - stmt_info->stmt); -- res = vect_determine_vf_for_stmt_1 (stmt_info, true, vf, mask_producers); -+ res = vect_determine_vf_for_stmt_1 (stmt_info, true, vf); - if (!res) - return res; - } -@@ -297,7 +287,6 @@ vect_determine_vectorization_factor (loo - tree vectype; - stmt_vec_info stmt_info; - unsigned i; -- auto_vec mask_producers; - - DUMP_VECT_SCOPE ("vect_determine_vectorization_factor"); - -@@ -355,8 +344,7 @@ vect_determine_vectorization_factor (loo - { - stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si)); - opt_result res -- = vect_determine_vf_for_stmt (stmt_info, &vectorization_factor, -- &mask_producers); -+ = vect_determine_vf_for_stmt (stmt_info, &vectorization_factor); - if (!res) - return res; - } -@@ -374,16 +362,6 @@ vect_determine_vectorization_factor (loo - return opt_result::failure_at (vect_location, - "not vectorized: unsupported data-type\n"); - LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor; -- -- for (i = 0; i < mask_producers.length (); i++) -- { -- stmt_info = mask_producers[i]; -- opt_tree mask_type = vect_get_mask_type_for_stmt (stmt_info); -- if (!mask_type) -- return opt_result::propagate_failure (mask_type); -- STMT_VINFO_VECTYPE (stmt_info) = mask_type; -- } -- - return opt_result::success (); - } - -diff -Nurp a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h ---- a/gcc/tree-vectorizer.h 2021-02-08 09:24:26.463633230 +0800 -+++ b/gcc/tree-vectorizer.h 2021-02-08 09:21:02.619633230 +0800 -@@ -1080,6 +1080,23 @@ struct _stmt_vec_info { - unsigned int operation_precision; - signop operation_sign; - -+ /* If the statement produces a boolean result, this value describes -+ how we should choose the associated vector type. The possible -+ values are: -+ -+ - an integer precision N if we should use the vector mask type -+ associated with N-bit integers. This is only used if all relevant -+ input booleans also want the vector mask type for N-bit integers, -+ or if we can convert them into that form by pattern-matching. -+ -+ - ~0U if we considered choosing a vector mask type but decided -+ to treat the boolean as a normal integer type instead. -+ -+ - 0 otherwise. This means either that the operation isn't one that -+ could have a vector mask type (and so should have a normal vector -+ type instead) or that we simply haven't made a choice either way. */ -+ unsigned int mask_precision; -+ - /* True if this is only suitable for SLP vectorization. */ - bool slp_vect_only_p; - }; -@@ -1236,6 +1253,15 @@ nested_in_vect_loop_p (struct loop *loop - && (loop->inner == (gimple_bb (stmt_info->stmt))->loop_father)); - } - -+/* Return true if STMT_INFO should produce a vector mask type rather than -+ a normal nonmask type. */ -+ -+static inline bool -+vect_use_mask_type_p (stmt_vec_info stmt_info) -+{ -+ return stmt_info->mask_precision && stmt_info->mask_precision != ~0U; -+} -+ - /* Return TRUE if a statement represented by STMT_INFO is a part of a - pattern. */ - -@@ -1620,7 +1646,7 @@ extern tree get_related_vectype_for_scal - poly_uint64 = 0); - extern tree get_vectype_for_scalar_type (vec_info *, tree, unsigned int = 0); - extern tree get_vectype_for_scalar_type (vec_info *, tree, slp_tree); --extern tree get_mask_type_for_scalar_type (vec_info *, tree, slp_tree = 0); -+extern tree get_mask_type_for_scalar_type (vec_info *, tree, unsigned int = 0); - extern tree get_same_sized_vectype (tree, tree); - extern bool vect_chooses_same_modes_p (vec_info *, machine_mode); - extern bool vect_get_loop_mask_type (loop_vec_info); -@@ -1673,7 +1699,7 @@ extern gcall *vect_gen_while (tree, tree - extern tree vect_gen_while_not (gimple_seq *, tree, tree, tree); - extern opt_result vect_get_vector_types_for_stmt (stmt_vec_info, tree *, - tree *, unsigned int = 0); --extern opt_tree vect_get_mask_type_for_stmt (stmt_vec_info, slp_tree = 0); -+extern opt_tree vect_get_mask_type_for_stmt (stmt_vec_info, unsigned int = 0); - - /* In tree-vect-data-refs.c. */ - extern bool vect_can_force_dr_alignment_p (const_tree, poly_uint64); -diff -Nurp a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c ---- a/gcc/tree-vect-patterns.c 2021-02-08 09:24:26.467633230 +0800 -+++ b/gcc/tree-vect-patterns.c 2021-02-08 09:21:02.543633230 +0800 -@@ -112,7 +112,12 @@ vect_init_pattern_stmt (gimple *pattern_ - STMT_VINFO_DEF_TYPE (pattern_stmt_info) - = STMT_VINFO_DEF_TYPE (orig_stmt_info); - if (!STMT_VINFO_VECTYPE (pattern_stmt_info)) -- STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype; -+ { -+ gcc_assert (VECTOR_BOOLEAN_TYPE_P (vectype) -+ == vect_use_mask_type_p (orig_stmt_info)); -+ STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype; -+ pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision; -+ } - return pattern_stmt_info; - } - -@@ -131,17 +136,25 @@ vect_set_pattern_stmt (gimple *pattern_s - - /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE - is nonnull, record that NEW_STMT's vector type is VECTYPE, which might -- be different from the vector type of the final pattern statement. */ -+ be different from the vector type of the final pattern statement. -+ If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type -+ from which it was derived. */ - - static inline void - append_pattern_def_seq (stmt_vec_info stmt_info, gimple *new_stmt, -- tree vectype = NULL_TREE) -+ tree vectype = NULL_TREE, -+ tree scalar_type_for_mask = NULL_TREE) - { -+ gcc_assert (!scalar_type_for_mask -+ == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))); - vec_info *vinfo = stmt_info->vinfo; - if (vectype) - { - stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt); - STMT_VINFO_VECTYPE (new_stmt_info) = vectype; -+ if (scalar_type_for_mask) -+ new_stmt_info->mask_precision -+ = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask)); - } - gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info), - new_stmt); -@@ -3875,107 +3888,22 @@ adjust_bool_stmts (hash_set & - return gimple_assign_lhs (pattern_stmt); - } - --/* Helper for search_type_for_mask. */ -+/* Return the proper type for converting bool VAR into -+ an integer value or NULL_TREE if no such type exists. -+ The type is chosen so that the converted value has the -+ same number of elements as VAR's vector type. */ - - static tree --search_type_for_mask_1 (tree var, vec_info *vinfo, -- hash_map &cache) -+integer_type_for_mask (tree var, vec_info *vinfo) - { -- tree rhs1; -- enum tree_code rhs_code; -- tree res = NULL_TREE, res2; -- - if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var))) - return NULL_TREE; - - stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var); -- if (!def_stmt_info) -+ if (!def_stmt_info || !vect_use_mask_type_p (def_stmt_info)) - return NULL_TREE; - -- gassign *def_stmt = dyn_cast (def_stmt_info->stmt); -- if (!def_stmt) -- return NULL_TREE; -- -- tree *c = cache.get (def_stmt); -- if (c) -- return *c; -- -- rhs_code = gimple_assign_rhs_code (def_stmt); -- rhs1 = gimple_assign_rhs1 (def_stmt); -- -- switch (rhs_code) -- { -- case SSA_NAME: -- case BIT_NOT_EXPR: -- CASE_CONVERT: -- res = search_type_for_mask_1 (rhs1, vinfo, cache); -- break; -- -- case BIT_AND_EXPR: -- case BIT_IOR_EXPR: -- case BIT_XOR_EXPR: -- res = search_type_for_mask_1 (rhs1, vinfo, cache); -- res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt), vinfo, -- cache); -- if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2))) -- res = res2; -- break; -- -- default: -- if (TREE_CODE_CLASS (rhs_code) == tcc_comparison) -- { -- tree comp_vectype, mask_type; -- -- if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1))) -- { -- res = search_type_for_mask_1 (rhs1, vinfo, cache); -- res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt), -- vinfo, cache); -- if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2))) -- res = res2; -- break; -- } -- -- comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1)); -- if (comp_vectype == NULL_TREE) -- { -- res = NULL_TREE; -- break; -- } -- -- mask_type = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (rhs1)); -- if (!mask_type -- || !expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code)) -- { -- res = NULL_TREE; -- break; -- } -- -- if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE -- || !TYPE_UNSIGNED (TREE_TYPE (rhs1))) -- { -- scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1)); -- res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1); -- } -- else -- res = TREE_TYPE (rhs1); -- } -- } -- -- cache.put (def_stmt, res); -- return res; --} -- --/* Return the proper type for converting bool VAR into -- an integer value or NULL_TREE if no such type exists. -- The type is chosen so that converted value has the -- same number of elements as VAR's vector type. */ -- --static tree --search_type_for_mask (tree var, vec_info *vinfo) --{ -- hash_map cache; -- return search_type_for_mask_1 (var, vinfo, cache); -+ return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1); - } - - /* Function vect_recog_bool_pattern -@@ -4067,7 +3995,7 @@ vect_recog_bool_pattern (stmt_vec_info s - } - else - { -- tree type = search_type_for_mask (var, vinfo); -+ tree type = integer_type_for_mask (var, vinfo); - tree cst0, cst1, tmp; - - if (!type) -@@ -4152,7 +4080,7 @@ vect_recog_bool_pattern (stmt_vec_info s - rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (vectype), stmt_vinfo); - else - { -- tree type = search_type_for_mask (var, vinfo); -+ tree type = integer_type_for_mask (var, vinfo); - tree cst0, cst1, new_vectype; - - if (!type) -@@ -4207,7 +4135,7 @@ build_mask_conversion (tree mask, tree v - masktype = truth_type_for (vectype); - tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL); - stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask); -- append_pattern_def_seq (stmt_vinfo, stmt, masktype); -+ append_pattern_def_seq (stmt_vinfo, stmt, masktype, TREE_TYPE (vectype)); - - return tmp; - } -@@ -4275,7 +4203,7 @@ vect_recog_mask_conversion_pattern (stmt - } - - tree mask_arg = gimple_call_arg (last_stmt, mask_argno); -- tree mask_arg_type = search_type_for_mask (mask_arg, vinfo); -+ tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo); - if (!mask_arg_type) - return NULL; - vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type); -@@ -4328,7 +4256,7 @@ vect_recog_mask_conversion_pattern (stmt - - if (TREE_CODE (rhs1) == SSA_NAME) - { -- rhs1_type = search_type_for_mask (rhs1, vinfo); -+ rhs1_type = integer_type_for_mask (rhs1, vinfo); - if (!rhs1_type) - return NULL; - } -@@ -4352,8 +4280,8 @@ vect_recog_mask_conversion_pattern (stmt - rhs1_op1 = TREE_OPERAND (rhs1, 1); - if (!rhs1_op0 || !rhs1_op1) - return NULL; -- rhs1_op0_type = search_type_for_mask (rhs1_op0, vinfo); -- rhs1_op1_type = search_type_for_mask (rhs1_op1, vinfo); -+ rhs1_op0_type = integer_type_for_mask (rhs1_op0, vinfo); -+ rhs1_op1_type = integer_type_for_mask (rhs1_op1, vinfo); - - if (!rhs1_op0_type) - rhs1_type = TREE_TYPE (rhs1_op0); -@@ -4441,7 +4369,8 @@ vect_recog_mask_conversion_pattern (stmt - pattern_stmt = gimple_build_assign (tmp, TREE_CODE (rhs1), - rhs1_op0, rhs1_op1); - rhs1 = tmp; -- append_pattern_def_seq (stmt_vinfo, pattern_stmt, vectype2); -+ append_pattern_def_seq (stmt_vinfo, pattern_stmt, vectype2, -+ rhs1_type); - } - - if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1), -@@ -4474,8 +4403,8 @@ vect_recog_mask_conversion_pattern (stmt - - rhs2 = gimple_assign_rhs2 (last_stmt); - -- rhs1_type = search_type_for_mask (rhs1, vinfo); -- rhs2_type = search_type_for_mask (rhs2, vinfo); -+ rhs1_type = integer_type_for_mask (rhs1, vinfo); -+ rhs2_type = integer_type_for_mask (rhs2, vinfo); - - if (!rhs1_type || !rhs2_type - || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type)) -@@ -4558,7 +4487,7 @@ static tree - vect_convert_mask_for_vectype (tree mask, tree vectype, - stmt_vec_info stmt_info, vec_info *vinfo) - { -- tree mask_type = search_type_for_mask (mask, vinfo); -+ tree mask_type = integer_type_for_mask (mask, vinfo); - if (mask_type) - { - tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type); -@@ -4997,6 +4926,148 @@ vect_determine_precisions_from_users (st - vect_set_min_input_precision (stmt_info, type, min_input_precision); - } - -+/* Return true if the statement described by STMT_INFO sets a boolean -+ SSA_NAME and if we know how to vectorize this kind of statement using -+ vector mask types. */ -+ -+static bool -+possible_vector_mask_operation_p (stmt_vec_info stmt_info) -+{ -+ tree lhs = gimple_get_lhs (stmt_info->stmt); -+ if (!lhs -+ || TREE_CODE (lhs) != SSA_NAME -+ || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs))) -+ return false; -+ -+ if (gassign *assign = dyn_cast (stmt_info->stmt)) -+ { -+ tree_code rhs_code = gimple_assign_rhs_code (assign); -+ switch (rhs_code) -+ { -+ CASE_CONVERT: -+ case SSA_NAME: -+ case BIT_NOT_EXPR: -+ case BIT_IOR_EXPR: -+ case BIT_XOR_EXPR: -+ case BIT_AND_EXPR: -+ return true; -+ -+ default: -+ return TREE_CODE_CLASS (rhs_code) == tcc_comparison; -+ } -+ } -+ return false; -+} -+ -+/* If STMT_INFO sets a boolean SSA_NAME, see whether we should use -+ a vector mask type instead of a normal vector type. Record the -+ result in STMT_INFO->mask_precision. */ -+ -+static void -+vect_determine_mask_precision (stmt_vec_info stmt_info) -+{ -+ vec_info *vinfo = stmt_info->vinfo; -+ -+ if (!possible_vector_mask_operation_p (stmt_info) -+ || stmt_info->mask_precision) -+ return; -+ -+ auto_vec worklist; -+ worklist.quick_push (stmt_info); -+ while (!worklist.is_empty ()) -+ { -+ stmt_info = worklist.last (); -+ unsigned int orig_length = worklist.length (); -+ -+ /* If at least one boolean input uses a vector mask type, -+ pick the mask type with the narrowest elements. -+ -+ ??? This is the traditional behavior. It should always produce -+ the smallest number of operations, but isn't necessarily the -+ optimal choice. For example, if we have: -+ -+ a = b & c -+ -+ where: -+ -+ - the user of a wants it to have a mask type for 16-bit elements (M16) -+ - b also uses M16 -+ - c uses a mask type for 8-bit elements (M8) -+ -+ then picking M8 gives: -+ -+ - 1 M16->M8 pack for b -+ - 1 M8 AND for a -+ - 2 M8->M16 unpacks for the user of a -+ -+ whereas picking M16 would have given: -+ -+ - 2 M8->M16 unpacks for c -+ - 2 M16 ANDs for a -+ -+ The number of operations are equal, but M16 would have given -+ a shorter dependency chain and allowed more ILP. */ -+ unsigned int precision = ~0U; -+ gassign *assign = as_a (stmt_info->stmt); -+ unsigned int nops = gimple_num_ops (assign); -+ for (unsigned int i = 1; i < nops; ++i) -+ { -+ tree rhs = gimple_op (assign, i); -+ if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs))) -+ continue; -+ -+ stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs); -+ if (!def_stmt_info) -+ /* Don't let external or constant operands influence the choice. -+ We can convert them to whichever vector type we pick. */ -+ continue; -+ -+ if (def_stmt_info->mask_precision) -+ { -+ if (precision > def_stmt_info->mask_precision) -+ precision = def_stmt_info->mask_precision; -+ } -+ else if (possible_vector_mask_operation_p (def_stmt_info)) -+ worklist.safe_push (def_stmt_info); -+ } -+ -+ /* Defer the choice if we need to visit operands first. */ -+ if (orig_length != worklist.length ()) -+ continue; -+ -+ /* If the statement compares two values that shouldn't use vector masks, -+ try comparing the values as normal scalars instead. */ -+ tree_code rhs_code = gimple_assign_rhs_code (assign); -+ if (precision == ~0U -+ && TREE_CODE_CLASS (rhs_code) == tcc_comparison) -+ { -+ tree rhs1_type = TREE_TYPE (gimple_assign_rhs1 (assign)); -+ scalar_mode mode; -+ tree vectype, mask_type; -+ if (is_a (TYPE_MODE (rhs1_type), &mode) -+ && (vectype = get_vectype_for_scalar_type (vinfo, rhs1_type)) -+ && (mask_type = get_mask_type_for_scalar_type (vinfo, rhs1_type)) -+ && expand_vec_cmp_expr_p (vectype, mask_type, rhs_code)) -+ precision = GET_MODE_BITSIZE (mode); -+ } -+ -+ if (dump_enabled_p ()) -+ { -+ if (precision == ~0U) -+ dump_printf_loc (MSG_NOTE, vect_location, -+ "using normal nonmask vectors for %G", -+ stmt_info->stmt); -+ else -+ dump_printf_loc (MSG_NOTE, vect_location, -+ "using boolean precision %d for %G", -+ precision, stmt_info->stmt); -+ } -+ -+ stmt_info->mask_precision = precision; -+ worklist.pop (); -+ } -+} -+ - /* Handle vect_determine_precisions for STMT_INFO, given that we - have already done so for the users of its result. */ - -@@ -5009,6 +5080,7 @@ vect_determine_stmt_precisions (stmt_vec - vect_determine_precisions_from_range (stmt_info, stmt); - vect_determine_precisions_from_users (stmt_info, stmt); - } -+ vect_determine_mask_precision (stmt_info); - } - - /* Walk backwards through the vectorizable region to determine the -+-search_type_for_mask (tree var, vec_info *vinfo) -+-{ -+- hash_map cache; -+- return search_type_for_mask_1 (var, vinfo, cache); -++ return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1); -+ } -+ -+ /* Function vect_recog_bool_pattern -+@@ -4371,7 +4298,7 @@ vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out) -+ -+ it is better for b1 and b2 to use the mask type associated -+ with int elements rather bool (byte) elements. */ -+- rhs1_type = search_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo); -++ rhs1_type = integer_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo); -+ if (!rhs1_type) -+ rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0)); -+ } -+@@ -4427,7 +4354,8 @@ vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out) -+ tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL); -+ pattern_stmt = gimple_build_assign (tmp, rhs1); -+ rhs1 = tmp; -+- append_pattern_def_seq (stmt_vinfo, pattern_stmt, vectype2); -++ append_pattern_def_seq (stmt_vinfo, pattern_stmt, vectype2, -++ rhs1_type); -+ } -+ -+ if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1), -diff -Nurp a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c ---- a/gcc/tree-vect-slp.c 2021-02-08 09:24:26.471633230 +0800 -+++ b/gcc/tree-vect-slp.c 2021-02-08 09:21:02.719633230 +0800 -@@ -906,17 +906,6 @@ vect_build_slp_tree_1 (unsigned char *sw - || rhs_code == LROTATE_EXPR - || rhs_code == RROTATE_EXPR) - { -- if (vectype == boolean_type_node) -- { -- if (dump_enabled_p ()) -- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, -- "Build SLP failed: shift of a" -- " boolean.\n"); -- /* Fatal mismatch. */ -- matches[0] = false; -- return false; -- } -- - vec_mode = TYPE_MODE (vectype); - - /* First see if we have a vector/vector shift. */ -@@ -1137,9 +1126,8 @@ vect_build_slp_tree_1 (unsigned char *sw - if (alt_stmt_code != ERROR_MARK - && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference) - { -- if (vectype == boolean_type_node -- || !vect_two_operations_perm_ok_p (stmts, group_size, -- vectype, alt_stmt_code)) -+ if (!vect_two_operations_perm_ok_p (stmts, group_size, -+ vectype, alt_stmt_code)) - { - for (i = 0; i < group_size; ++i) - if (gimple_assign_rhs_code (stmts[i]->stmt) == alt_stmt_code) -@@ -2746,24 +2734,6 @@ vect_slp_analyze_node_operations_1 (vec_ - stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0]; - gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect); - -- /* For BB vectorization vector types are assigned here. -- Memory accesses already got their vector type assigned -- in vect_analyze_data_refs. */ -- bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); -- if (bb_vinfo && STMT_VINFO_VECTYPE (stmt_info) == boolean_type_node) -- { -- tree vectype = vect_get_mask_type_for_stmt (stmt_info, node); -- if (!vectype) -- /* vect_get_mask_type_for_stmt has already explained the -- failure. */ -- return false; -- -- stmt_vec_info sstmt_info; -- unsigned int i; -- FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt_info) -- STMT_VINFO_VECTYPE (sstmt_info) = vectype; -- } -- - /* Calculate the number of vector statements to be created for the - scalar stmts in this node. For SLP reductions it is equal to the - number of vector statements in the children (which has already been -diff -Nurp a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c ---- a/gcc/tree-vect-stmts.c 2021-02-08 09:24:26.371633230 +0800 -+++ b/gcc/tree-vect-stmts.c 2021-02-08 09:21:02.543633230 +0800 -@@ -3334,6 +3334,15 @@ vectorizable_call (stmt_vec_info stmt_in - return false; - } - -+ if (VECTOR_BOOLEAN_TYPE_P (vectype_out) -+ != VECTOR_BOOLEAN_TYPE_P (vectype_in)) -+ { -+ if (dump_enabled_p ()) -+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, -+ "mixed mask and nonmask vector types\n"); -+ return false; -+ } -+ - /* FORNOW */ - nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in); - nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out); -@@ -5938,6 +5947,21 @@ vectorizable_operation (stmt_vec_info st - - orig_code = code = gimple_assign_rhs_code (stmt); - -+ /* Shifts are handled in vectorizable_shift. */ -+ if (code == LSHIFT_EXPR -+ || code == RSHIFT_EXPR -+ || code == LROTATE_EXPR -+ || code == RROTATE_EXPR) -+ return false; -+ -+ /* Comparisons are handled in vectorizable_comparison. */ -+ if (TREE_CODE_CLASS (code) == tcc_comparison) -+ return false; -+ -+ /* Conditions are handled in vectorizable_condition. */ -+ if (code == COND_EXPR) -+ return false; -+ - /* For pointer addition and subtraction, we should use the normal - plus and minus for the vector operation. */ - if (code == POINTER_PLUS_EXPR) -@@ -5961,7 +5985,8 @@ vectorizable_operation (stmt_vec_info st - - /* Most operations cannot handle bit-precision types without extra - truncations. */ -- if (!VECTOR_BOOLEAN_TYPE_P (vectype_out) -+ bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out); -+ if (!mask_op_p - && !type_has_mode_precision_p (TREE_TYPE (scalar_dest)) - /* Exception are bitwise binary operations. */ - && code != BIT_IOR_EXPR -@@ -6023,10 +6048,11 @@ vectorizable_operation (stmt_vec_info st - if (maybe_ne (nunits_out, nunits_in)) - return false; - -+ tree vectype2 = NULL_TREE, vectype3 = NULL_TREE; - if (op_type == binary_op || op_type == ternary_op) - { - op1 = gimple_assign_rhs2 (stmt); -- if (!vect_is_simple_use (op1, vinfo, &dt[1])) -+ if (!vect_is_simple_use (op1, vinfo, &dt[1], &vectype2)) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, -@@ -6037,7 +6063,7 @@ vectorizable_operation (stmt_vec_info st - if (op_type == ternary_op) - { - op2 = gimple_assign_rhs3 (stmt); -- if (!vect_is_simple_use (op2, vinfo, &dt[2])) -+ if (!vect_is_simple_use (op2, vinfo, &dt[2], &vectype3)) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, -@@ -6062,10 +6088,20 @@ vectorizable_operation (stmt_vec_info st - - gcc_assert (ncopies >= 1); - -- /* Shifts are handled in vectorizable_shift (). */ -- if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR -- || code == RROTATE_EXPR) -- return false; -+ /* Reject attempts to combine mask types with nonmask types, e.g. if -+ we have an AND between a (nonmask) boolean loaded from memory and -+ a (mask) boolean result of a comparison. -+ -+ TODO: We could easily fix these cases up using pattern statements. */ -+ if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p -+ || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p) -+ || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p)) -+ { -+ if (dump_enabled_p ()) -+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, -+ "mixed mask and nonmask vector types\n"); -+ return false; -+ } - - /* Supportable by target? */ - -@@ -10410,14 +10446,15 @@ get_vectype_for_scalar_type (vec_info *v - - Returns the mask type corresponding to a result of comparison - of vectors of specified SCALAR_TYPE as supported by target. -- NODE, if nonnull, is the SLP tree node that will use the returned -- vector type. */ -+ If GROUP_SIZE is nonzero and we're performing BB vectorization, -+ make sure that the number of elements in the vector is no bigger -+ than GROUP_SIZE. */ - - tree - get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type, -- slp_tree node) -+ unsigned int group_size) - { -- tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, node); -+ tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size); - - if (!vectype) - return NULL; -@@ -11112,9 +11149,6 @@ vect_gen_while_not (gimple_seq *seq, tre - - - Set *STMT_VECTYPE_OUT to: - - NULL_TREE if the statement doesn't need to be vectorized; -- - boolean_type_node if the statement is a boolean operation whose -- vector type can only be determined once all the other vector types -- are known; and - - the equivalent of STMT_VINFO_VECTYPE otherwise. - - - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum -@@ -11171,11 +11205,22 @@ vect_get_vector_types_for_stmt (stmt_vec - tree scalar_type = NULL_TREE; - if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info)) - { -- *stmt_vectype_out = vectype = STMT_VINFO_VECTYPE (stmt_info); -+ vectype = STMT_VINFO_VECTYPE (stmt_info); - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, - "precomputed vectype: %T\n", vectype); - } -+ else if (vect_use_mask_type_p (stmt_info)) -+ { -+ unsigned int precision = stmt_info->mask_precision; -+ scalar_type = build_nonstandard_integer_type (precision, 1); -+ vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size); -+ if (!vectype) -+ return opt_result::failure_at (stmt, "not vectorized: unsupported" -+ " data-type %T\n", scalar_type); -+ if (dump_enabled_p ()) -+ dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype); -+ } - else - { - if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info)) -@@ -11185,28 +11230,6 @@ vect_get_vector_types_for_stmt (stmt_vec - else - scalar_type = TREE_TYPE (gimple_get_lhs (stmt)); - -- /* Pure bool ops don't participate in number-of-units computation. -- For comparisons use the types being compared. */ -- if (!STMT_VINFO_DATA_REF (stmt_info) -- && VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type) -- && is_gimple_assign (stmt) -- && gimple_assign_rhs_code (stmt) != COND_EXPR) -- { -- *stmt_vectype_out = boolean_type_node; -- -- tree rhs1 = gimple_assign_rhs1 (stmt); -- if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison -- && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1))) -- scalar_type = TREE_TYPE (rhs1); -- else -- { -- if (dump_enabled_p ()) -- dump_printf_loc (MSG_NOTE, vect_location, -- "pure bool operation.\n"); -- return opt_result::success (); -- } -- } -- - if (dump_enabled_p ()) - { - if (group_size) -@@ -11224,18 +11247,15 @@ vect_get_vector_types_for_stmt (stmt_vec - " unsupported data-type %T\n", - scalar_type); - -- if (!*stmt_vectype_out) -- *stmt_vectype_out = vectype; -- - if (dump_enabled_p ()) - dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype); - } -+ *stmt_vectype_out = vectype; - - /* Don't try to compute scalar types if the stmt produces a boolean - vector; use the existing vector type instead. */ - tree nunits_vectype = vectype; -- if (!VECTOR_BOOLEAN_TYPE_P (vectype) -- && *stmt_vectype_out != boolean_type_node) -+ if (!VECTOR_BOOLEAN_TYPE_P (vectype)) - { - /* The number of units is set according to the smallest scalar - type (or the largest vector size, but we only support one -@@ -11260,9 +11280,8 @@ vect_get_vector_types_for_stmt (stmt_vec - } - } - -- gcc_assert (*stmt_vectype_out == boolean_type_node -- || multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype), -- TYPE_VECTOR_SUBPARTS (*stmt_vectype_out))); -+ gcc_assert (multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype), -+ TYPE_VECTOR_SUBPARTS (*stmt_vectype_out))); - - if (dump_enabled_p ()) - { -@@ -11274,82 +11293,3 @@ vect_get_vector_types_for_stmt (stmt_vec - *nunits_vectype_out = nunits_vectype; - return opt_result::success (); - } -- --/* Try to determine the correct vector type for STMT_INFO, which is a -- statement that produces a scalar boolean result. Return the vector -- type on success, otherwise return NULL_TREE. NODE, if nonnull, -- is the SLP tree node that will use the returned vector type. */ -- --opt_tree --vect_get_mask_type_for_stmt (stmt_vec_info stmt_info, slp_tree node) --{ -- vec_info *vinfo = stmt_info->vinfo; -- gimple *stmt = stmt_info->stmt; -- tree mask_type = NULL; -- tree vectype, scalar_type; -- -- if (is_gimple_assign (stmt) -- && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison -- && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt)))) -- { -- scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt)); -- mask_type = get_mask_type_for_scalar_type (vinfo, scalar_type, node); -- -- if (!mask_type) -- return opt_tree::failure_at (stmt, -- "not vectorized: unsupported mask\n"); -- } -- else -- { -- tree rhs; -- ssa_op_iter iter; -- enum vect_def_type dt; -- -- FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE) -- { -- if (!vect_is_simple_use (rhs, stmt_info->vinfo, &dt, &vectype)) -- return opt_tree::failure_at (stmt, -- "not vectorized:can't compute mask" -- " type for statement, %G", stmt); -- -- /* No vectype probably means external definition. -- Allow it in case there is another operand which -- allows to determine mask type. */ -- if (!vectype) -- continue; -- -- if (!mask_type) -- mask_type = vectype; -- else if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type), -- TYPE_VECTOR_SUBPARTS (vectype))) -- return opt_tree::failure_at (stmt, -- "not vectorized: different sized mask" -- " types in statement, %T and %T\n", -- mask_type, vectype); -- else if (VECTOR_BOOLEAN_TYPE_P (mask_type) -- != VECTOR_BOOLEAN_TYPE_P (vectype)) -- return opt_tree::failure_at (stmt, -- "not vectorized: mixed mask and " -- "nonmask vector types in statement, " -- "%T and %T\n", -- mask_type, vectype); -- } -- -- /* We may compare boolean value loaded as vector of integers. -- Fix mask_type in such case. */ -- if (mask_type -- && !VECTOR_BOOLEAN_TYPE_P (mask_type) -- && gimple_code (stmt) == GIMPLE_ASSIGN -- && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison) -- mask_type = truth_type_for (mask_type); -- } -- -- /* No mask_type should mean loop invariant predicate. -- This is probably a subject for optimization in if-conversion. */ -- if (!mask_type) -- return opt_tree::failure_at (stmt, -- "not vectorized: can't compute mask type " -- "for statement: %G", stmt); -- -- return opt_tree::success (mask_type); --} diff --git a/fix-ICE-in-vect_create_epilog_for_reduction.patch b/fix-ICE-in-vect_create_epilog_for_reduction.patch deleted file mode 100644 index 7abf6aa348cb38c421359c6ffdb6482e9778c55d..0000000000000000000000000000000000000000 --- a/fix-ICE-in-vect_create_epilog_for_reduction.patch +++ /dev/null @@ -1,87 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-Don-t-assign-a-cost-to-vectorizable_assignment.patch -e4020b28d02a00d478a3a769855ae6a8d9cc6b26 - -diff -Nurp a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c ---- a/gcc/tree-vect-loop.c 2020-07-09 10:42:35.824000000 +0800 -+++ b/gcc/tree-vect-loop.c 2020-07-09 10:43:23.920000000 +0800 -@@ -1143,7 +1143,9 @@ vect_compute_single_scalar_iteration_cos - else - kind = scalar_store; - } -- else -+ else if (vect_nop_conversion_p (stmt_info)) -+ continue; -+ else - kind = scalar_stmt; - - record_stmt_cost (&LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), -diff -Nurp a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h ---- a/gcc/tree-vectorizer.h 2020-07-09 10:42:35.824000000 +0800 -+++ b/gcc/tree-vectorizer.h 2020-07-09 10:43:23.920000000 +0800 -@@ -1645,6 +1645,7 @@ extern tree vect_get_vec_def_for_stmt_co - extern bool vect_transform_stmt (stmt_vec_info, gimple_stmt_iterator *, - slp_tree, slp_instance); - extern void vect_remove_stores (stmt_vec_info); -+extern bool vect_nop_conversion_p (stmt_vec_info); - extern opt_result vect_analyze_stmt (stmt_vec_info, bool *, slp_tree, - slp_instance, stmt_vector_for_cost *); - extern void vect_get_load_cost (stmt_vec_info, int, bool, -diff -Nurp a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c ---- a/gcc/tree-vect-slp.c 2020-07-09 10:42:35.736000000 +0800 -+++ b/gcc/tree-vect-slp.c 2020-07-09 10:43:23.920000000 +0800 -@@ -2940,6 +2940,8 @@ vect_bb_slp_scalar_cost (basic_block bb, - else - kind = scalar_store; - } -+ else if (vect_nop_conversion_p (stmt_info)) -+ continue; - else - kind = scalar_stmt; - record_stmt_cost (cost_vec, 1, kind, stmt_info, 0, vect_body); -diff -Nurp a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c ---- a/gcc/tree-vect-stmts.c 2020-07-09 10:42:35.732000000 +0800 -+++ b/gcc/tree-vect-stmts.c 2020-07-09 10:43:23.920000000 +0800 -@@ -5283,6 +5283,29 @@ vectorizable_conversion (stmt_vec_info s - return true; - } - -+/* Return true if we can assume from the scalar form of STMT_INFO that -+ neither the scalar nor the vector forms will generate code. STMT_INFO -+ is known not to involve a data reference. */ -+ -+bool -+vect_nop_conversion_p (stmt_vec_info stmt_info) -+{ -+ gassign *stmt = dyn_cast (stmt_info->stmt); -+ if (!stmt) -+ return false; -+ -+ tree lhs = gimple_assign_lhs (stmt); -+ tree_code code = gimple_assign_rhs_code (stmt); -+ tree rhs = gimple_assign_rhs1 (stmt); -+ -+ if (code == SSA_NAME || code == VIEW_CONVERT_EXPR) -+ return true; -+ -+ if (CONVERT_EXPR_CODE_P (code)) -+ return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)); -+ -+ return false; -+} - - /* Function vectorizable_assignment. - -@@ -5398,7 +5421,9 @@ vectorizable_assignment (stmt_vec_info s - { - STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type; - DUMP_VECT_SCOPE ("vectorizable_assignment"); -- vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec); -+ if (!vect_nop_conversion_p (stmt_info)) -+ vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, -+ cost_vec); - return true; - } - diff --git a/fix-ICE-in-vect_create_epilog_for_reduction_2.patch b/fix-ICE-in-vect_create_epilog_for_reduction_2.patch deleted file mode 100644 index 1130c05567e61cf9c2b615ddb8136e12eca518aa..0000000000000000000000000000000000000000 --- a/fix-ICE-in-vect_create_epilog_for_reduction_2.patch +++ /dev/null @@ -1,33 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-re-PR-tree-optimization-92162-ICE-in-vect_create_epi.patch -53b15ca96116544a7a3ca8bc5f4e1649b74f3d45 - -diff -Nurp gcc-9.3.0_org/gcc/tree-vect-loop.c gcc-9.3.0/gcc/tree-vect-loop.c ---- gcc-9.3.0_org/gcc/tree-vect-loop.c 2020-08-17 10:23:55.768000000 +0800 -+++ gcc-9.3.0/gcc/tree-vect-loop.c 2020-08-17 10:27:15.848000000 +0800 -@@ -4574,9 +4574,9 @@ vect_create_epilog_for_reduction (stmt_v - (CCOMPARE). The then and else values mirror the main VEC_COND_EXPR: - the reduction phi corresponds to NEW_PHI_TREE and the new values - correspond to INDEX_BEFORE_INCR. */ -- gcc_assert (STMT_VINFO_REDUC_IDX (stmt_info) >= 1); -+ gcc_assert (STMT_VINFO_REDUC_IDX (reduc_info) >= 1); - tree index_cond_expr; -- if (STMT_VINFO_REDUC_IDX (stmt_info) == 2) -+ if (STMT_VINFO_REDUC_IDX (reduc_info) == 2) - index_cond_expr = build3 (VEC_COND_EXPR, cr_index_vector_type, - ccompare, indx_before_incr, new_phi_tree); - else -diff -Nurp gcc-9.3.0_org/gcc/tree-vect-stmts.c gcc-9.3.0/gcc/tree-vect-stmts.c ---- gcc-9.3.0_org/gcc/tree-vect-stmts.c 2020-08-17 10:23:53.960000000 +0800 -+++ gcc-9.3.0/gcc/tree-vect-stmts.c 2020-08-17 10:27:15.848000000 +0800 -@@ -9077,7 +9077,7 @@ vectorizable_condition (stmt_vec_info st - return false; - reduc_info = info_for_reduction (stmt_info); - reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info); -- reduc_index = STMT_VINFO_REDUC_IDX (stmt_info); -+ reduc_index = STMT_VINFO_REDUC_IDX (reduc_info); - gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION - || reduc_index != -1); - } diff --git a/fix-ICE-in-vect_create_epilog_for_reduction_3.patch b/fix-ICE-in-vect_create_epilog_for_reduction_3.patch deleted file mode 100644 index f74be1ebcf57670bcda05b50d3aec4c506c7981a..0000000000000000000000000000000000000000 --- a/fix-ICE-in-vect_create_epilog_for_reduction_3.patch +++ /dev/null @@ -1,87 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-re-PR-tree-optimization-92554-ICE-in-vect_create_epi.patch -04c4599d30b1eb7c21d39b15a685aa1d9b8bf968 - -diff -Nurp a/gcc/testsuite/gcc.dg/vect/pr92554.c b/gcc/testsuite/gcc.dg/vect/pr92554.c ---- a/gcc/testsuite/gcc.dg/vect/pr92554.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/vect/pr92554.c 2020-08-17 11:08:28.424000000 +0800 -@@ -0,0 +1,11 @@ -+/* { dg-do compile } */ -+ -+short int w9; -+ -+void __attribute__ ((simd)) -+zc (int in) -+{ -+ int va = 1; -+ -+ w9 *= va != 0 ? in < 0 : 0; -+} -diff -Nurp a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c ---- a/gcc/tree-vect-loop.c 2020-08-17 10:41:56.756000000 +0800 -+++ b/gcc/tree-vect-loop.c 2020-08-17 11:09:36.474259880 +0800 -@@ -4515,12 +4515,21 @@ vect_create_epilog_for_reduction (stmt_v - zeroes. */ - if (STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION) - { -- tree indx_before_incr, indx_after_incr; -- poly_uint64 nunits_out = TYPE_VECTOR_SUBPARTS (vectype); -- -- gimple *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info)->stmt; -+ stmt_vec_info cond_info = STMT_VINFO_REDUC_DEF (reduc_info); -+ cond_info = vect_stmt_to_vectorize (cond_info); -+ while (gimple_assign_rhs_code (cond_info->stmt) != COND_EXPR) -+ { -+ cond_info -+ = loop_vinfo->lookup_def (gimple_op (cond_info->stmt, -+ 1 + STMT_VINFO_REDUC_IDX -+ (cond_info))); -+ cond_info = vect_stmt_to_vectorize (cond_info); -+ } -+ gimple *vec_stmt = STMT_VINFO_VEC_STMT (cond_info)->stmt; - gcc_assert (gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR); - -+ tree indx_before_incr, indx_after_incr; -+ poly_uint64 nunits_out = TYPE_VECTOR_SUBPARTS (vectype); - int scalar_precision - = GET_MODE_PRECISION (SCALAR_TYPE_MODE (TREE_TYPE (vectype))); - tree cr_index_scalar_type = make_unsigned_type (scalar_precision); -@@ -4574,9 +4583,9 @@ vect_create_epilog_for_reduction (stmt_v - (CCOMPARE). The then and else values mirror the main VEC_COND_EXPR: - the reduction phi corresponds to NEW_PHI_TREE and the new values - correspond to INDEX_BEFORE_INCR. */ -- gcc_assert (STMT_VINFO_REDUC_IDX (reduc_info) >= 1); -+ gcc_assert (STMT_VINFO_REDUC_IDX (cond_info) >= 1); - tree index_cond_expr; -- if (STMT_VINFO_REDUC_IDX (reduc_info) == 2) -+ if (STMT_VINFO_REDUC_IDX (cond_info) == 2) - index_cond_expr = build3 (VEC_COND_EXPR, cr_index_vector_type, - ccompare, indx_before_incr, new_phi_tree); - else -@@ -4772,10 +4781,11 @@ vect_create_epilog_for_reduction (stmt_v - be zero. */ - - /* Vector of {0, 0, 0,...}. */ -- tree zero_vec = make_ssa_name (vectype); -- tree zero_vec_rhs = build_zero_cst (vectype); -- gimple *zero_vec_stmt = gimple_build_assign (zero_vec, zero_vec_rhs); -- gsi_insert_before (&exit_gsi, zero_vec_stmt, GSI_SAME_STMT); -+ tree zero_vec = build_zero_cst (vectype); -+ -+ gimple_seq stmts = NULL; -+ new_phi_result = gimple_convert (&stmts, vectype, new_phi_result); -+ gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); - - /* Find maximum value from the vector of found indexes. */ - tree max_index = make_ssa_name (index_scalar_type); -@@ -4843,7 +4853,7 @@ vect_create_epilog_for_reduction (stmt_v - - /* Convert the reduced value back to the result type and set as the - result. */ -- gimple_seq stmts = NULL; -+ stmts = NULL; - new_temp = gimple_build (&stmts, VIEW_CONVERT_EXPR, scalar_type, - data_reduc); - gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); diff --git a/fix-ICE-in-vect_get_vec_def_for_stmt_copy.patch b/fix-ICE-in-vect_get_vec_def_for_stmt_copy.patch deleted file mode 100644 index b40d5e38dadfb4a2ac1fb8a65b4c6f516ef9aa2b..0000000000000000000000000000000000000000 --- a/fix-ICE-in-vect_get_vec_def_for_stmt_copy.patch +++ /dev/null @@ -1,54 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-re-PR-tree-optimization-92161-ICE-in-vect_get_vec_de.patch -ae7f3143a3876378d051e64c8e68718f27c41075 - -diff -Nurp a/gcc/testsuite/gfortran.dg/pr92161.f b/gcc/testsuite/gfortran.dg/pr92161.f ---- a/gcc/testsuite/gfortran.dg/pr92161.f 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gfortran.dg/pr92161.f 2020-08-17 10:18:05.996000000 +0800 -@@ -0,0 +1,23 @@ -+! { dg-do compile } -+! { dg-options "-O1 -ftree-loop-vectorize -fno-signed-zeros -fno-trapping-math" } -+! { dg-additional-options "-mvsx" { target { powerpc*-*-* } } } -+ COMPLEX FUNCTION R1 (ZR, CC, EA, U6) -+ -+ INTEGER ZR, U6, FZ, J2 -+ COMPLEX EA(*), CC -+ DOUBLE PRECISION OS, GA, YU, XT -+ -+ OS = DBLE(REAL(CC)) -+ GA = DBLE(AIMAG(CC)) -+ J2 = 1 -+ -+ DO 5 FZ = 1, ZR -+ YU = DBLE(REAL(EA(J2))) -+ XT = DBLE(AIMAG(EA(J2))) -+ OS = OS + (YU * 2) - (XT * 2) -+ GA = GA + (YU * 3) + (XT * 3) -+ J2 = J2 + U6 -+ 5 CONTINUE -+ R1 = CMPLX(REAL(OS), REAL(GA)) -+ RETURN -+ END -diff -Nurp a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c ---- a/gcc/tree-vect-loop.c 2020-08-17 10:17:08.288000000 +0800 -+++ b/gcc/tree-vect-loop.c 2020-08-17 10:18:05.996000000 +0800 -@@ -2339,6 +2339,17 @@ again: - { - stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si)); - STMT_SLP_TYPE (stmt_info) = loop_vect; -+ if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def -+ || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def) -+ { -+ /* vectorizable_reduction adjusts reduction stmt def-types, -+ restore them to that of the PHI. */ -+ STMT_VINFO_DEF_TYPE (STMT_VINFO_REDUC_DEF (stmt_info)) -+ = STMT_VINFO_DEF_TYPE (stmt_info); -+ STMT_VINFO_DEF_TYPE (vect_stmt_to_vectorize -+ (STMT_VINFO_REDUC_DEF (stmt_info))) -+ = STMT_VINFO_DEF_TYPE (stmt_info); -+ } - } - for (gimple_stmt_iterator si = gsi_start_bb (bb); - !gsi_end_p (si); gsi_next (&si)) diff --git a/fix-ICE-in-vect_slp_analyze_node_operations.patch b/fix-ICE-in-vect_slp_analyze_node_operations.patch deleted file mode 100644 index 5f5d336691755838676fb1d61697a72c9f364241..0000000000000000000000000000000000000000 --- a/fix-ICE-in-vect_slp_analyze_node_operations.patch +++ /dev/null @@ -1,381 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-re-PR-tree-optimization-92516-ICE-in-vect_schedule_s.patch -10a73df76280e12886cb20b028727436d73724c5 - -diff -Nurp a/gcc/testsuite/gcc.dg/vect/vect-ctor-1.c b/gcc/testsuite/gcc.dg/vect/vect-ctor-1.c ---- a/gcc/testsuite/gcc.dg/vect/vect-ctor-1.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/vect/vect-ctor-1.c 2020-08-17 10:33:56.052000000 +0800 -@@ -0,0 +1,17 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-O3" } */ -+/* { dg-additional-options "-mavx2" { target { i?86-*-* x86_64-*-* } } } */ -+ -+typedef struct { -+ unsigned short mprr_2[5][16][16]; -+} ImageParameters; -+int s[16][2]; -+void intrapred_luma_16x16(ImageParameters *img, int s0) -+{ -+ for (int j=0; j < 16; j++) -+ for (int i=0; i < 16; i++) -+ { -+ img->mprr_2[1 ][j][i]=s[j][1]; -+ img->mprr_2[2 ][j][i]=s0; -+ } -+} -diff -Nurp a/gcc/testsuite/g++.dg/vect/slp-pr92516.cc b/gcc/testsuite/g++.dg/vect/slp-pr92516.cc ---- a/gcc/testsuite/g++.dg/vect/slp-pr92516.cc 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/g++.dg/vect/slp-pr92516.cc 2020-08-17 10:33:56.052000000 +0800 -@@ -0,0 +1,43 @@ -+// { dg-do compile } -+// { dg-require-effective-target c++14 } -+ -+class a { -+public: -+ typedef int b; -+ operator b(); -+}; -+class c { -+public: -+ constexpr int m_fn1() const; -+ constexpr int d() const; -+ int e; -+ int f; -+}; -+constexpr int c::m_fn1() const { return e; } -+constexpr int c::d() const { return f; } -+class g { -+public: -+ g(); -+ constexpr void i(const c &) noexcept; -+ int j; -+ int k; -+ int l; -+ int m; -+}; -+constexpr void g::i(const c &n) noexcept { -+ int v = l - j, h = m - k; -+ j = n.m_fn1() - v / 2; -+ k = n.d() - h / 2; -+ l = j + v; -+ m = k + h; -+} -+class o { -+ void m_fn4() const; -+ a p; -+} r; -+void o::m_fn4() const { -+ g q; -+ c t; -+ q.i(t); -+ r.p || 0; -+} -diff -Nurp a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c ---- a/gcc/tree-vect-slp.c 2020-08-17 10:31:58.236000000 +0800 -+++ b/gcc/tree-vect-slp.c 2020-08-17 10:36:40.976796520 +0800 -@@ -2010,6 +2010,7 @@ calculate_unrolling_factor (poly_uint64 - - static bool - vect_analyze_slp_instance (vec_info *vinfo, -+ scalar_stmts_to_slp_tree_map_t *bst_map, - stmt_vec_info stmt_info, unsigned max_tree_size) - { - slp_instance new_instance; -@@ -2117,19 +2118,11 @@ vect_analyze_slp_instance (vec_info *vin - /* Build the tree for the SLP instance. */ - bool *matches = XALLOCAVEC (bool, group_size); - unsigned npermutes = 0; -- scalar_stmts_to_slp_tree_map_t *bst_map -- = new scalar_stmts_to_slp_tree_map_t (); - poly_uint64 max_nunits = nunits; - unsigned tree_size = 0; - node = vect_build_slp_tree (vinfo, scalar_stmts, group_size, - &max_nunits, matches, &npermutes, - &tree_size, bst_map); -- /* The map keeps a reference on SLP nodes built, release that. */ -- for (scalar_stmts_to_slp_tree_map_t::iterator it = bst_map->begin (); -- it != bst_map->end (); ++it) -- if ((*it).second) -- vect_free_slp_tree ((*it).second, false); -- delete bst_map; - if (node != NULL) - { - /* If this is a reduction chain with a conversion in front -@@ -2183,6 +2176,18 @@ vect_analyze_slp_instance (vec_info *vin - matches[group_size / const_max_nunits * const_max_nunits] = false; - vect_free_slp_tree (node, false); - } -+ else if (constructor -+ && SLP_TREE_DEF_TYPE (node) != vect_internal_def) -+ { -+ /* CONSTRUCTOR vectorization relies on a vector stmt being -+ generated, that doesn't work for fully external ones. */ -+ if (dump_enabled_p ()) -+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, -+ "Build SLP failed: CONSTRUCTOR of external " -+ "or constant elements\n"); -+ vect_free_slp_tree (node, false); -+ return false; -+ } - else - { - /* Create a new SLP instance. */ -@@ -2317,7 +2322,7 @@ vect_analyze_slp_instance (vec_info *vin - - stmt_vec_info rest = vect_split_slp_store_group (stmt_info, - group1_size); -- bool res = vect_analyze_slp_instance (vinfo, stmt_info, -+ bool res = vect_analyze_slp_instance (vinfo, bst_map, stmt_info, - max_tree_size); - /* If the first non-match was in the middle of a vector, - skip the rest of that vector. */ -@@ -2328,7 +2333,8 @@ vect_analyze_slp_instance (vec_info *vin - rest = vect_split_slp_store_group (rest, const_nunits); - } - if (i < group_size) -- res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size); -+ res |= vect_analyze_slp_instance (vinfo, bst_map, -+ rest, max_tree_size); - return res; - } - /* Even though the first vector did not all match, we might be able to SLP -@@ -2350,9 +2356,12 @@ vect_analyze_slp (vec_info *vinfo, unsig - - DUMP_VECT_SCOPE ("vect_analyze_slp"); - -+ scalar_stmts_to_slp_tree_map_t *bst_map -+ = new scalar_stmts_to_slp_tree_map_t (); -+ - /* Find SLP sequences starting from groups of grouped stores. */ - FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element) -- vect_analyze_slp_instance (vinfo, first_element, max_tree_size); -+ vect_analyze_slp_instance (vinfo, bst_map, first_element, max_tree_size); - - if (loop_vec_info loop_vinfo = dyn_cast (vinfo)) - { -@@ -2361,7 +2370,7 @@ vect_analyze_slp (vec_info *vinfo, unsig - { - /* Find SLP sequences starting from reduction chains. */ - FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element) -- if (! vect_analyze_slp_instance (vinfo, first_element, -+ if (! vect_analyze_slp_instance (vinfo, bst_map, first_element, - max_tree_size)) - { - /* Dissolve reduction chain group. */ -@@ -2383,10 +2392,17 @@ vect_analyze_slp (vec_info *vinfo, unsig - - /* Find SLP sequences starting from groups of reductions. */ - if (loop_vinfo->reductions.length () > 1) -- vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0], -+ vect_analyze_slp_instance (vinfo, bst_map, loop_vinfo->reductions[0], - max_tree_size); - } - -+ /* The map keeps a reference on SLP nodes built, release that. */ -+ for (scalar_stmts_to_slp_tree_map_t::iterator it = bst_map->begin (); -+ it != bst_map->end (); ++it) -+ if ((*it).second) -+ vect_free_slp_tree ((*it).second, false); -+ delete bst_map; -+ - return opt_result::success (); - } - -@@ -2513,13 +2529,6 @@ vect_detect_hybrid_slp_stmts (slp_tree n - vect_detect_hybrid_slp_stmts (child, i, stype, visited); - } - --static void --vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype) --{ -- hash_map visited; -- vect_detect_hybrid_slp_stmts (node, i, stype, visited); --} -- - /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */ - - static tree -@@ -2602,11 +2611,12 @@ vect_detect_hybrid_slp (loop_vec_info lo - /* Then walk the SLP instance trees marking stmts with uses in - non-SLP stmts as hybrid, also propagating hybrid down the - SLP tree, collecting the above info on-the-fly. */ -+ hash_map visited; - FOR_EACH_VEC_ELT (slp_instances, i, instance) - { - for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i) - vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance), -- i, pure_slp); -+ i, pure_slp, visited); - } - } - -@@ -2763,8 +2773,8 @@ vect_slp_convert_to_external (vec_info * - static bool - vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node, - slp_instance node_instance, -- scalar_stmts_to_slp_tree_map_t *visited, -- scalar_stmts_to_slp_tree_map_t *lvisited, -+ hash_set &visited, -+ hash_set &lvisited, - stmt_vector_for_cost *cost_vec) - { - int i, j; -@@ -2774,27 +2784,13 @@ vect_slp_analyze_node_operations (vec_in - return true; - - /* If we already analyzed the exact same set of scalar stmts we're done. -- We share the generated vector stmts for those. */ -- slp_tree *leader; -- if ((leader = visited->get (SLP_TREE_SCALAR_STMTS (node))) -- || (leader = lvisited->get (SLP_TREE_SCALAR_STMTS (node)))) -- { -- SLP_TREE_NUMBER_OF_VEC_STMTS (node) -- = SLP_TREE_NUMBER_OF_VEC_STMTS (*leader); -- /* Cope with cases in which we made a late decision to build the -- node from scalars. */ -- if (SLP_TREE_DEF_TYPE (*leader) == vect_external_def -- && vect_slp_convert_to_external (vinfo, node, node_instance)) -- ; -- else -- gcc_assert (SLP_TREE_DEF_TYPE (node) == SLP_TREE_DEF_TYPE (*leader)); -- return true; -- } -- -- /* The SLP graph is acyclic so not caching whether we failed or succeeded -+ We share the generated vector stmts for those. -+ The SLP graph is acyclic so not caching whether we failed or succeeded - doesn't result in any issue since we throw away the lvisited set - when we fail. */ -- lvisited->put (SLP_TREE_SCALAR_STMTS (node).copy (), node); -+ if (visited.contains (node) -+ || lvisited.add (node)) -+ return true; - - FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) - if (!vect_slp_analyze_node_operations (vinfo, child, node_instance, -@@ -2867,16 +2863,15 @@ vect_slp_analyze_operations (vec_info *v - - DUMP_VECT_SCOPE ("vect_slp_analyze_operations"); - -- scalar_stmts_to_slp_tree_map_t *visited -- = new scalar_stmts_to_slp_tree_map_t (); -+ hash_set visited; - for (i = 0; vinfo->slp_instances.iterate (i, &instance); ) - { -- scalar_stmts_to_slp_tree_map_t lvisited; -+ hash_set lvisited; - stmt_vector_for_cost cost_vec; - cost_vec.create (2); - if (!vect_slp_analyze_node_operations (vinfo, - SLP_INSTANCE_TREE (instance), -- instance, visited, &lvisited, -+ instance, visited, lvisited, - &cost_vec)) - { - slp_tree node = SLP_INSTANCE_TREE (instance); -@@ -2891,16 +2886,15 @@ vect_slp_analyze_operations (vec_info *v - } - else - { -- for (scalar_stmts_to_slp_tree_map_t::iterator x = lvisited.begin(); -+ for (hash_set::iterator x = lvisited.begin(); - x != lvisited.end(); ++x) -- visited->put ((*x).first.copy (), (*x).second); -+ visited.add (*x); - i++; - - add_stmt_costs (vinfo->target_cost_data, &cost_vec); - cost_vec.release (); - } - } -- delete visited; - - return !vinfo->slp_instances.is_empty (); - } -@@ -2991,15 +2985,6 @@ vect_bb_slp_scalar_cost (basic_block bb, - } - } - --static void --vect_bb_slp_scalar_cost (basic_block bb, -- slp_tree node, vec *life, -- stmt_vector_for_cost *cost_vec) --{ -- hash_set visited; -- vect_bb_slp_scalar_cost (bb, node, life, cost_vec, visited); --} -- - /* Check if vectorization of the basic block is profitable. */ - - static bool -@@ -3014,13 +2999,14 @@ vect_bb_vectorization_profitable_p (bb_v - /* Calculate scalar cost. */ - stmt_vector_for_cost scalar_costs; - scalar_costs.create (0); -+ hash_set visited; - FOR_EACH_VEC_ELT (slp_instances, i, instance) - { - auto_vec life; - life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance)); - vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo), - SLP_INSTANCE_TREE (instance), -- &life, &scalar_costs); -+ &life, &scalar_costs, visited); - } - void *target_cost_data = init_cost (NULL); - add_stmt_costs (target_cost_data, &scalar_costs); -@@ -4052,8 +4038,7 @@ vect_transform_slp_perm_load (slp_tree n - /* Vectorize SLP instance tree in postorder. */ - - static void --vect_schedule_slp_instance (slp_tree node, slp_instance instance, -- scalar_stmts_to_slp_tree_map_t *bst_map) -+vect_schedule_slp_instance (slp_tree node, slp_instance instance) - { - gimple_stmt_iterator si; - stmt_vec_info stmt_info; -@@ -4070,17 +4055,8 @@ vect_schedule_slp_instance (slp_tree nod - if (SLP_TREE_VEC_STMTS (node).exists ()) - return; - -- /* See if we have already vectorized the same set of stmts and reuse their -- vectorized stmts across instances. */ -- if (slp_tree *leader = bst_map->get (SLP_TREE_SCALAR_STMTS (node))) -- { -- SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (*leader)); -- return; -- } -- -- bst_map->put (SLP_TREE_SCALAR_STMTS (node).copy (), node); - FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) -- vect_schedule_slp_instance (child, instance, bst_map); -+ vect_schedule_slp_instance (child, instance); - - /* Push SLP node def-type to stmts. */ - FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) -@@ -4297,14 +4273,12 @@ vect_schedule_slp (vec_info *vinfo) - slp_instance instance; - unsigned int i; - -- scalar_stmts_to_slp_tree_map_t *bst_map -- = new scalar_stmts_to_slp_tree_map_t (); - slp_instances = vinfo->slp_instances; - FOR_EACH_VEC_ELT (slp_instances, i, instance) - { - slp_tree node = SLP_INSTANCE_TREE (instance); - /* Schedule the tree of INSTANCE. */ -- vect_schedule_slp_instance (node, instance, bst_map); -+ vect_schedule_slp_instance (node, instance); - - if (SLP_INSTANCE_ROOT_STMT (instance)) - vectorize_slp_instance_root_stmt (node, instance); -@@ -4313,7 +4287,6 @@ vect_schedule_slp (vec_info *vinfo) - dump_printf_loc (MSG_NOTE, vect_location, - "vectorizing stmts using SLP.\n"); - } -- delete bst_map; - - FOR_EACH_VEC_ELT (slp_instances, i, instance) - { diff --git a/fix-ICE-in-vect_stmt_to_vectorize.patch b/fix-ICE-in-vect_stmt_to_vectorize.patch deleted file mode 100644 index 80229b58aa32906057e7daba6f0935fd08fe64bc..0000000000000000000000000000000000000000 --- a/fix-ICE-in-vect_stmt_to_vectorize.patch +++ /dev/null @@ -1,47 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-re-PR-tree-optimization-92252-ICE-Segmentation-fault.patch -97c6bea819ec0a773041308e62a7c05c33f093b0 - -diff -Nurp a/gcc/testsuite/gcc.dg/torture/pr92252.c b/gcc/testsuite/gcc.dg/torture/pr92252.c ---- a/gcc/testsuite/gcc.dg/torture/pr92252.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/torture/pr92252.c 2020-07-03 10:39:44.808000000 +0800 -@@ -0,0 +1,23 @@ -+/* { do-do compile } */ -+/* { dg-additional-options "-ftree-vectorize" } */ -+ -+long int ar; -+int dt; -+ -+long int -+pc (unsigned long int q3, int zw) -+{ -+ long int em = 0; -+ -+ while (zw < 1) -+ { -+ q3 = zw * 2ul; -+ if (q3 != 0) -+ for (ar = 0; ar < 2; ++ar) -+ em = dt; -+ -+ ++zw; -+ } -+ -+ return em; -+} -diff -Nurp a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c ---- a/gcc/tree-vect-slp.c 2020-07-03 10:35:59.876000000 +0800 -+++ b/gcc/tree-vect-slp.c 2020-07-03 10:39:44.808000000 +0800 -@@ -581,6 +581,10 @@ again: - { - swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt), - gimple_assign_rhs3_ptr (stmt)); -+ if (STMT_VINFO_REDUC_IDX (stmt_info) == 1) -+ STMT_VINFO_REDUC_IDX (stmt_info) = 2; -+ else if (STMT_VINFO_REDUC_IDX (stmt_info) == 2) -+ STMT_VINFO_REDUC_IDX (stmt_info) = 1; - bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0)); - code = invert_tree_comparison (TREE_CODE (cond), honor_nans); - gcc_assert (code != ERROR_MARK); diff --git a/fix-ICE-in-vect_transform_stmt.patch b/fix-ICE-in-vect_transform_stmt.patch deleted file mode 100644 index 9433155026f8b220c10df1ec048c4a4416fbdf57..0000000000000000000000000000000000000000 --- a/fix-ICE-in-vect_transform_stmt.patch +++ /dev/null @@ -1,96 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-re-PR-fortran-92094-ice-in-vect_transform_stmt-at-tr.patch -c30587c0200f52f8845a5aea21bd7bef6cbe0bf4 - -diff -Nurp a/gcc/testsuite/gfortran.dg/pr92094.f90 b/gcc/testsuite/gfortran.dg/pr92094.f90 ---- a/gcc/testsuite/gfortran.dg/pr92094.f90 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gfortran.dg/pr92094.f90 2020-08-18 14:25:12.392000000 +0800 -@@ -0,0 +1,28 @@ -+! { dg-do compile } -+! { dg-options "-O3" } -+ subroutine hesfcn(n, x, h, ldh) -+ integer n,ldh -+ double precision x(n), h(ldh) -+ -+ integer i,j,k,kj -+ double precision th,u1,u2,v2 -+ -+ kj = 0 -+ do 770 j = 1, n -+ kj = kj - j -+ do 760 k = 1, j -+ kj = kj + 1 -+ v2 = 2 * x(k) - 1 -+ u1 = 0 -+ u2 = 2 -+ do 750 i = 1, n -+ h(kj) = h(kj) + u2 -+ th = 4 * v2 + u2 - u1 -+ u1 = u2 -+ u2 = th -+ th = v2 - 1 -+ 750 continue -+ 760 continue -+ 770 continue -+ -+ end -diff -Nurp a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c ---- a/gcc/tree-vect-loop.c 2020-08-18 14:19:43.784000000 +0800 -+++ b/gcc/tree-vect-loop.c 2020-08-18 14:25:12.396000000 +0800 -@@ -5891,20 +5891,9 @@ vectorizable_reduction (stmt_vec_info st - if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle) - { - if (is_a (stmt_info->stmt)) -- { -- /* Analysis for double-reduction is done on the outer -- loop PHI, nested cycles have no further restrictions. */ -- STMT_VINFO_TYPE (stmt_info) = cycle_phi_info_type; -- /* For nested cycles we want to let regular vectorizable_* -- routines handle code-generation. */ -- if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_double_reduction_def) -- { -- stmt_info = STMT_VINFO_REDUC_DEF (stmt_info); -- STMT_VINFO_DEF_TYPE (stmt_info) = vect_internal_def; -- STMT_VINFO_DEF_TYPE (vect_stmt_to_vectorize (stmt_info)) -- = vect_internal_def; -- } -- } -+ /* Analysis for double-reduction is done on the outer -+ loop PHI, nested cycles have no further restrictions. */ -+ STMT_VINFO_TYPE (stmt_info) = cycle_phi_info_type; - else - STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type; - return true; -diff -Nurp a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c ---- a/gcc/tree-vect-stmts.c 2020-08-18 14:19:45.556000000 +0800 -+++ b/gcc/tree-vect-stmts.c 2020-08-18 14:25:12.396000000 +0800 -@@ -10224,13 +10224,16 @@ vect_transform_stmt (stmt_vec_info stmt_ - && STMT_VINFO_REDUC_TYPE (reduc_info) != EXTRACT_LAST_REDUCTION) - { - gphi *phi; -+ edge e; - if (!slp_node - && (phi = dyn_cast - (STMT_VINFO_REDUC_DEF (orig_stmt_info)->stmt)) - && dominated_by_p (CDI_DOMINATORS, -- gimple_bb (orig_stmt_info->stmt), gimple_bb (phi))) -+ gimple_bb (orig_stmt_info->stmt), gimple_bb (phi)) -+ && (e = loop_latch_edge (gimple_bb (phi)->loop_father)) -+ && (PHI_ARG_DEF_FROM_EDGE (phi, e) -+ == gimple_get_lhs (orig_stmt_info->stmt))) - { -- edge e = loop_latch_edge (gimple_bb (phi)->loop_father); - stmt_vec_info phi_info - = STMT_VINFO_VEC_STMT (STMT_VINFO_REDUC_DEF (orig_stmt_info)); - stmt_vec_info vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); -@@ -10250,7 +10253,7 @@ vect_transform_stmt (stmt_vec_info stmt_ - { - slp_tree phi_node = slp_node_instance->reduc_phis; - gphi *phi = as_a (SLP_TREE_SCALAR_STMTS (phi_node)[0]->stmt); -- edge e = loop_latch_edge (gimple_bb (phi)->loop_father); -+ e = loop_latch_edge (gimple_bb (phi)->loop_father); - gcc_assert (SLP_TREE_VEC_STMTS (phi_node).length () - == SLP_TREE_VEC_STMTS (slp_node).length ()); - for (unsigned i = 0; i < SLP_TREE_VEC_STMTS (phi_node).length (); ++i) diff --git a/fix-ICE-in-vect_update_misalignment_for_peel.patch b/fix-ICE-in-vect_update_misalignment_for_peel.patch deleted file mode 100644 index 30a9548b5d2d90487b9a95615e5780dbc3ff899f..0000000000000000000000000000000000000000 --- a/fix-ICE-in-vect_update_misalignment_for_peel.patch +++ /dev/null @@ -1,784 +0,0 @@ -This backport contains 5 patchs from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -8801ca5c28c3a9e9f36fa39a6a4455b48c8221fa -9ac1403ca2c65ba4f28cf051b5326617fa9298d1 -7e99af4816cfad578094fcf08e2377f3ed76e201 -ef8777c14ce8694f53eab7a88d24513cbf541ba4 -dccbf1e2a6e544f71b4a5795f0c79015db019fc3 - - -diff -Nurp a/gcc/testsuite/gcc.dg/vect/pr92677.c b/gcc/testsuite/gcc.dg/vect/pr92677.c ---- a/gcc/testsuite/gcc.dg/vect/pr92677.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/vect/pr92677.c 2020-10-26 18:31:50.980000000 +0800 -@@ -0,0 +1,26 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-O3" } */ -+ -+int a, c; -+int *b; -+long d; -+double *e; -+ -+void fn1() { -+ long f; -+ double g, h; -+ while (c) { -+ if (d) { -+ g = *e; -+ *(b + 4) = g; -+ } -+ if (f) { -+ h = *(e + 2); -+ *(b + 6) = h; -+ } -+ e += a; -+ b += 8; -+ c--; -+ d += 2; -+ } -+} -diff -Nurp a/gcc/testsuite/gcc.dg/vect/slp-46.c b/gcc/testsuite/gcc.dg/vect/slp-46.c ---- a/gcc/testsuite/gcc.dg/vect/slp-46.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/vect/slp-46.c 2020-10-26 18:31:56.512000000 +0800 -@@ -0,0 +1,96 @@ -+/* { dg-require-effective-target vect_double } */ -+ -+#include "tree-vect.h" -+ -+double x[1024], y[1024]; -+ -+void __attribute__((noipa)) foo() -+{ -+ for (int i = 0; i < 512; ++i) -+ { -+ x[2*i] = y[i]; -+ x[2*i+1] = y[i]; -+ } -+} -+ -+void __attribute__((noipa)) bar() -+{ -+ for (int i = 0; i < 512; ++i) -+ { -+ x[2*i] = y[2*i]; -+ x[2*i+1] = y[2*i]; -+ } -+} -+ -+void __attribute__((noipa)) baz() -+{ -+ for (int i = 0; i < 512; ++i) -+ { -+ x[2*i] = y[511-i]; -+ x[2*i+1] = y[511-i]; -+ } -+} -+ -+void __attribute__((noipa)) boo() -+{ -+ for (int i = 0; i < 512; ++i) -+ { -+ x[2*i] = y[2*(511-i)]; -+ x[2*i+1] = y[2*(511-i)]; -+ } -+} -+ -+int -+main () -+{ -+ check_vect (); -+ -+ for (int i = 0; i < 1024; ++i) -+ { -+ x[i] = 0; -+ y[i] = i; -+ __asm__ volatile (""); -+ } -+ -+ foo (); -+ for (int i = 0; i < 1024; ++i) -+ if (x[i] != y[i/2]) -+ abort (); -+ -+ for (int i = 0; i < 1024; ++i) -+ { -+ x[i] = 0; -+ __asm__ volatile (""); -+ } -+ -+ bar (); -+ for (int i = 0; i < 1024; ++i) -+ if (x[i] != y[2*(i/2)]) -+ abort (); -+ -+ for (int i = 0; i < 1024; ++i) -+ { -+ x[i] = 0; -+ __asm__ volatile (""); -+ } -+ -+ baz (); -+ for (int i = 0; i < 1024; ++i) -+ if (x[i] != y[511 - i/2]) -+ abort (); -+ -+ for (int i = 0; i < 1024; ++i) -+ { -+ x[i] = 0; -+ __asm__ volatile (""); -+ } -+ -+ boo (); -+ for (int i = 0; i < 1024; ++i) -+ if (x[i] != y[2*(511 - i/2)]) -+ abort (); -+ -+ return 0; -+} -+ -+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-5.c b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-5.c ---- a/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-5.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-5.c 2020-10-26 18:31:53.584000000 +0800 -@@ -0,0 +1,36 @@ -+#include "tree-vect.h" -+ -+#define N 512 -+ -+int a[N], b[N]; -+ -+int __attribute__((noipa)) -+foo (int aval, int bval) -+{ -+ int i, res = 0; -+ for (i=0; inum, - chrec_convert (type, evol, at_stmt), - code, rhs1, at_stmt); -- res = follow_ssa_edge -- (loop, SSA_NAME_DEF_STMT (rhs0), halting_phi, &evol, limit); -+ res = follow_ssa_edge_expr -+ (loop, at_stmt, rhs0, halting_phi, &evol, limit); - if (res == t_true) - *evolution_of_loop = evol; - else if (res == t_false) -@@ -979,8 +979,8 @@ follow_ssa_edge_binary (struct loop *loo - (loop->num, - chrec_convert (type, *evolution_of_loop, at_stmt), - code, rhs0, at_stmt); -- res = follow_ssa_edge -- (loop, SSA_NAME_DEF_STMT (rhs1), halting_phi, -+ res = follow_ssa_edge_expr -+ (loop, at_stmt, rhs1, halting_phi, - evolution_of_loop, limit); - if (res == t_true) - ; -@@ -1000,8 +1000,8 @@ follow_ssa_edge_binary (struct loop *loo - (loop->num, chrec_convert (type, *evolution_of_loop, - at_stmt), - code, rhs1, at_stmt); -- res = follow_ssa_edge -- (loop, SSA_NAME_DEF_STMT (rhs0), halting_phi, -+ res = follow_ssa_edge_expr -+ (loop, at_stmt, rhs0, halting_phi, - evolution_of_loop, limit); - if (res == t_true) - ; -@@ -1018,8 +1018,8 @@ follow_ssa_edge_binary (struct loop *loo - (loop->num, chrec_convert (type, *evolution_of_loop, - at_stmt), - code, rhs0, at_stmt); -- res = follow_ssa_edge -- (loop, SSA_NAME_DEF_STMT (rhs1), halting_phi, -+ res = follow_ssa_edge_expr -+ (loop, at_stmt, rhs1, halting_phi, - evolution_of_loop, limit); - if (res == t_true) - ; -@@ -1050,8 +1050,8 @@ follow_ssa_edge_binary (struct loop *loo - *evolution_of_loop = add_to_evolution - (loop->num, chrec_convert (type, *evolution_of_loop, at_stmt), - MINUS_EXPR, rhs1, at_stmt); -- res = follow_ssa_edge (loop, SSA_NAME_DEF_STMT (rhs0), halting_phi, -- evolution_of_loop, limit); -+ res = follow_ssa_edge_expr (loop, at_stmt, rhs0, halting_phi, -+ evolution_of_loop, limit); - if (res == t_true) - ; - else if (res == t_dont_know) -@@ -1071,140 +1071,6 @@ follow_ssa_edge_binary (struct loop *loo - return res; - } - --/* Follow the ssa edge into the expression EXPR. -- Return true if the strongly connected component has been found. */ -- --static t_bool --follow_ssa_edge_expr (struct loop *loop, gimple *at_stmt, tree expr, -- gphi *halting_phi, tree *evolution_of_loop, -- int limit) --{ -- enum tree_code code = TREE_CODE (expr); -- tree type = TREE_TYPE (expr), rhs0, rhs1; -- t_bool res; -- -- /* The EXPR is one of the following cases: -- - an SSA_NAME, -- - an INTEGER_CST, -- - a PLUS_EXPR, -- - a POINTER_PLUS_EXPR, -- - a MINUS_EXPR, -- - an ASSERT_EXPR, -- - other cases are not yet handled. */ -- -- switch (code) -- { -- CASE_CONVERT: -- /* This assignment is under the form "a_1 = (cast) rhs. */ -- res = follow_ssa_edge_expr (loop, at_stmt, TREE_OPERAND (expr, 0), -- halting_phi, evolution_of_loop, limit); -- *evolution_of_loop = chrec_convert (type, *evolution_of_loop, at_stmt); -- break; -- -- case INTEGER_CST: -- /* This assignment is under the form "a_1 = 7". */ -- res = t_false; -- break; -- -- case SSA_NAME: -- /* This assignment is under the form: "a_1 = b_2". */ -- res = follow_ssa_edge -- (loop, SSA_NAME_DEF_STMT (expr), halting_phi, evolution_of_loop, limit); -- break; -- -- case POINTER_PLUS_EXPR: -- case PLUS_EXPR: -- case MINUS_EXPR: -- /* This case is under the form "rhs0 +- rhs1". */ -- rhs0 = TREE_OPERAND (expr, 0); -- rhs1 = TREE_OPERAND (expr, 1); -- type = TREE_TYPE (rhs0); -- STRIP_USELESS_TYPE_CONVERSION (rhs0); -- STRIP_USELESS_TYPE_CONVERSION (rhs1); -- res = follow_ssa_edge_binary (loop, at_stmt, type, rhs0, code, rhs1, -- halting_phi, evolution_of_loop, limit); -- break; -- -- case ADDR_EXPR: -- /* Handle &MEM[ptr + CST] which is equivalent to POINTER_PLUS_EXPR. */ -- if (TREE_CODE (TREE_OPERAND (expr, 0)) == MEM_REF) -- { -- expr = TREE_OPERAND (expr, 0); -- rhs0 = TREE_OPERAND (expr, 0); -- rhs1 = TREE_OPERAND (expr, 1); -- type = TREE_TYPE (rhs0); -- STRIP_USELESS_TYPE_CONVERSION (rhs0); -- STRIP_USELESS_TYPE_CONVERSION (rhs1); -- res = follow_ssa_edge_binary (loop, at_stmt, type, -- rhs0, POINTER_PLUS_EXPR, rhs1, -- halting_phi, evolution_of_loop, limit); -- } -- else -- res = t_false; -- break; -- -- case ASSERT_EXPR: -- /* This assignment is of the form: "a_1 = ASSERT_EXPR " -- It must be handled as a copy assignment of the form a_1 = a_2. */ -- rhs0 = ASSERT_EXPR_VAR (expr); -- if (TREE_CODE (rhs0) == SSA_NAME) -- res = follow_ssa_edge (loop, SSA_NAME_DEF_STMT (rhs0), -- halting_phi, evolution_of_loop, limit); -- else -- res = t_false; -- break; -- -- default: -- res = t_false; -- break; -- } -- -- return res; --} -- --/* Follow the ssa edge into the right hand side of an assignment STMT. -- Return true if the strongly connected component has been found. */ -- --static t_bool --follow_ssa_edge_in_rhs (struct loop *loop, gimple *stmt, -- gphi *halting_phi, tree *evolution_of_loop, -- int limit) --{ -- enum tree_code code = gimple_assign_rhs_code (stmt); -- tree type = gimple_expr_type (stmt), rhs1, rhs2; -- t_bool res; -- -- switch (code) -- { -- CASE_CONVERT: -- /* This assignment is under the form "a_1 = (cast) rhs. */ -- res = follow_ssa_edge_expr (loop, stmt, gimple_assign_rhs1 (stmt), -- halting_phi, evolution_of_loop, limit); -- *evolution_of_loop = chrec_convert (type, *evolution_of_loop, stmt); -- break; -- -- case POINTER_PLUS_EXPR: -- case PLUS_EXPR: -- case MINUS_EXPR: -- rhs1 = gimple_assign_rhs1 (stmt); -- rhs2 = gimple_assign_rhs2 (stmt); -- type = TREE_TYPE (rhs1); -- res = follow_ssa_edge_binary (loop, stmt, type, rhs1, code, rhs2, -- halting_phi, evolution_of_loop, limit); -- break; -- -- default: -- if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS) -- res = follow_ssa_edge_expr (loop, stmt, gimple_assign_rhs1 (stmt), -- halting_phi, evolution_of_loop, limit); -- else -- res = t_false; -- break; -- } -- -- return res; --} -- - /* Checks whether the I-th argument of a PHI comes from a backedge. */ - - static bool -@@ -1244,8 +1110,8 @@ follow_ssa_edge_in_condition_phi_branch - if (TREE_CODE (branch) == SSA_NAME) - { - *evolution_of_branch = init_cond; -- return follow_ssa_edge (loop, SSA_NAME_DEF_STMT (branch), halting_phi, -- evolution_of_branch, limit); -+ return follow_ssa_edge_expr (loop, condition_phi, branch, halting_phi, -+ evolution_of_branch, limit); - } - - /* This case occurs when one of the condition branches sets -@@ -1352,65 +1218,158 @@ follow_ssa_edge_inner_loop_phi (struct l - evolution_of_loop, limit); - } - --/* Follow an SSA edge from a loop-phi-node to itself, constructing a -- path that is analyzed on the return walk. */ -+/* Follow the ssa edge into the expression EXPR. -+ Return true if the strongly connected component has been found. */ - - static t_bool --follow_ssa_edge (struct loop *loop, gimple *def, gphi *halting_phi, -- tree *evolution_of_loop, int limit) -+follow_ssa_edge_expr (struct loop *loop, gimple *at_stmt, tree expr, -+ gphi *halting_phi, tree *evolution_of_loop, -+ int limit) - { -- struct loop *def_loop; -+ enum tree_code code; -+ tree type, rhs0, rhs1 = NULL_TREE; - -- if (gimple_nop_p (def)) -- return t_false; -+ /* The EXPR is one of the following cases: -+ - an SSA_NAME, -+ - an INTEGER_CST, -+ - a PLUS_EXPR, -+ - a POINTER_PLUS_EXPR, -+ - a MINUS_EXPR, -+ - an ASSERT_EXPR, -+ - other cases are not yet handled. */ - -- /* Give up if the path is longer than the MAX that we allow. */ -- if (limit > PARAM_VALUE (PARAM_SCEV_MAX_EXPR_COMPLEXITY)) -- return t_dont_know; -- -- def_loop = loop_containing_stmt (def); -- -- switch (gimple_code (def)) -- { -- case GIMPLE_PHI: -- if (!loop_phi_node_p (def)) -- /* DEF is a condition-phi-node. Follow the branches, and -- record their evolutions. Finally, merge the collected -- information and set the approximation to the main -- variable. */ -- return follow_ssa_edge_in_condition_phi -- (loop, as_a (def), halting_phi, evolution_of_loop, -- limit); -- -- /* When the analyzed phi is the halting_phi, the -- depth-first search is over: we have found a path from -- the halting_phi to itself in the loop. */ -- if (def == halting_phi) -- return t_true; -+ /* For SSA_NAME look at the definition statement, handling -+ PHI nodes and otherwise expand appropriately for the expression -+ handling below. */ -+ if (TREE_CODE (expr) == SSA_NAME) -+ { -+ gimple *def = SSA_NAME_DEF_STMT (expr); - -- /* Otherwise, the evolution of the HALTING_PHI depends -- on the evolution of another loop-phi-node, i.e. the -- evolution function is a higher degree polynomial. */ -- if (def_loop == loop) -+ if (gimple_nop_p (def)) - return t_false; - -- /* Inner loop. */ -- if (flow_loop_nested_p (loop, def_loop)) -- return follow_ssa_edge_inner_loop_phi -- (loop, as_a (def), halting_phi, evolution_of_loop, -- limit + 1); -+ /* Give up if the path is longer than the MAX that we allow. */ -+ if (limit > PARAM_VALUE (PARAM_SCEV_MAX_EXPR_COMPLEXITY)) -+ return t_dont_know; - -- /* Outer loop. */ -- return t_false; -+ if (gphi *phi = dyn_cast (def)) -+ { -+ if (!loop_phi_node_p (phi)) -+ /* DEF is a condition-phi-node. Follow the branches, and -+ record their evolutions. Finally, merge the collected -+ information and set the approximation to the main -+ variable. */ -+ return follow_ssa_edge_in_condition_phi -+ (loop, phi, halting_phi, evolution_of_loop, limit); -+ -+ /* When the analyzed phi is the halting_phi, the -+ depth-first search is over: we have found a path from -+ the halting_phi to itself in the loop. */ -+ if (phi == halting_phi) -+ return t_true; -+ -+ /* Otherwise, the evolution of the HALTING_PHI depends -+ on the evolution of another loop-phi-node, i.e. the -+ evolution function is a higher degree polynomial. */ -+ class loop *def_loop = loop_containing_stmt (def); -+ if (def_loop == loop) -+ return t_false; -+ -+ /* Inner loop. */ -+ if (flow_loop_nested_p (loop, def_loop)) -+ return follow_ssa_edge_inner_loop_phi -+ (loop, phi, halting_phi, evolution_of_loop, -+ limit + 1); - -- case GIMPLE_ASSIGN: -- return follow_ssa_edge_in_rhs (loop, def, halting_phi, -- evolution_of_loop, limit); -+ /* Outer loop. */ -+ return t_false; -+ } - -- default: - /* At this level of abstraction, the program is just a set - of GIMPLE_ASSIGNs and PHI_NODEs. In principle there is no -- other node to be handled. */ -+ other def to be handled. */ -+ if (!is_gimple_assign (def)) -+ return t_false; -+ -+ code = gimple_assign_rhs_code (def); -+ switch (get_gimple_rhs_class (code)) -+ { -+ case GIMPLE_BINARY_RHS: -+ rhs0 = gimple_assign_rhs1 (def); -+ rhs1 = gimple_assign_rhs2 (def); -+ break; -+ case GIMPLE_UNARY_RHS: -+ case GIMPLE_SINGLE_RHS: -+ rhs0 = gimple_assign_rhs1 (def); -+ break; -+ default: -+ return t_false; -+ } -+ type = TREE_TYPE (gimple_assign_lhs (def)); -+ at_stmt = def; -+ } -+ else -+ { -+ code = TREE_CODE (expr); -+ type = TREE_TYPE (expr); -+ switch (code) -+ { -+ CASE_CONVERT: -+ rhs0 = TREE_OPERAND (expr, 0); -+ break; -+ case POINTER_PLUS_EXPR: -+ case PLUS_EXPR: -+ case MINUS_EXPR: -+ rhs0 = TREE_OPERAND (expr, 0); -+ rhs1 = TREE_OPERAND (expr, 1); -+ break; -+ default: -+ rhs0 = expr; -+ } -+ } -+ -+ switch (code) -+ { -+ CASE_CONVERT: -+ { -+ /* This assignment is under the form "a_1 = (cast) rhs. */ -+ t_bool res = follow_ssa_edge_expr (loop, at_stmt, rhs0, halting_phi, -+ evolution_of_loop, limit); -+ *evolution_of_loop = chrec_convert (type, *evolution_of_loop, at_stmt); -+ return res; -+ } -+ -+ case INTEGER_CST: -+ /* This assignment is under the form "a_1 = 7". */ -+ return t_false; -+ -+ case ADDR_EXPR: -+ { -+ /* Handle &MEM[ptr + CST] which is equivalent to POINTER_PLUS_EXPR. */ -+ if (TREE_CODE (TREE_OPERAND (rhs0, 0)) != MEM_REF) -+ return t_false; -+ tree mem = TREE_OPERAND (rhs0, 0); -+ rhs0 = TREE_OPERAND (mem, 0); -+ rhs1 = TREE_OPERAND (mem, 1); -+ code = POINTER_PLUS_EXPR; -+ } -+ /* Fallthru. */ -+ case POINTER_PLUS_EXPR: -+ case PLUS_EXPR: -+ case MINUS_EXPR: -+ /* This case is under the form "rhs0 +- rhs1". */ -+ STRIP_USELESS_TYPE_CONVERSION (rhs0); -+ STRIP_USELESS_TYPE_CONVERSION (rhs1); -+ return follow_ssa_edge_binary (loop, at_stmt, type, rhs0, code, rhs1, -+ halting_phi, evolution_of_loop, limit); -+ -+ case ASSERT_EXPR: -+ /* This assignment is of the form: "a_1 = ASSERT_EXPR " -+ It must be handled as a copy assignment of the form a_1 = a_2. */ -+ return follow_ssa_edge_expr (loop, at_stmt, ASSERT_EXPR_VAR (rhs0), -+ halting_phi, evolution_of_loop, limit); -+ -+ default: - return t_false; - } - } -@@ -1504,7 +1463,6 @@ analyze_evolution_in_loop (gphi *loop_ph - for (i = 0; i < n; i++) - { - tree arg = PHI_ARG_DEF (loop_phi_node, i); -- gimple *ssa_chain; - tree ev_fn; - t_bool res; - -@@ -1517,11 +1475,10 @@ analyze_evolution_in_loop (gphi *loop_ph - { - bool val = false; - -- ssa_chain = SSA_NAME_DEF_STMT (arg); -- - /* Pass in the initial condition to the follow edge function. */ - ev_fn = init_cond; -- res = follow_ssa_edge (loop, ssa_chain, loop_phi_node, &ev_fn, 0); -+ res = follow_ssa_edge_expr (loop, loop_phi_node, arg, -+ loop_phi_node, &ev_fn, 0); - - /* If ev_fn has no evolution in the inner loop, and the - init_cond is not equal to ev_fn, then we have an -diff -Nurp a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c ---- a/gcc/tree-ssa-sccvn.c 2020-10-26 18:28:58.736000000 +0800 -+++ b/gcc/tree-ssa-sccvn.c 2020-10-26 18:31:45.768000000 +0800 -@@ -2456,7 +2456,8 @@ vn_reference_lookup_3 (ao_ref *ref, tree - (vuse, vr->set, vr->type, vr->operands, val); - } - /* For now handle clearing memory with partial defs. */ -- else if (integer_zerop (gimple_call_arg (def_stmt, 1)) -+ else if (known_eq (ref->size, maxsize) -+ && integer_zerop (gimple_call_arg (def_stmt, 1)) - && tree_to_poly_int64 (len).is_constant (&leni) - && offset.is_constant (&offseti) - && offset2.is_constant (&offset2i) -@@ -2494,7 +2495,8 @@ vn_reference_lookup_3 (ao_ref *ref, tree - return vn_reference_lookup_or_insert_for_pieces - (vuse, vr->set, vr->type, vr->operands, val); - } -- else if (maxsize.is_constant (&maxsizei) -+ else if (known_eq (ref->size, maxsize) -+ && maxsize.is_constant (&maxsizei) - && maxsizei % BITS_PER_UNIT == 0 - && offset.is_constant (&offseti) - && offseti % BITS_PER_UNIT == 0 -diff -Nurp a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c ---- a/gcc/tree-vect-data-refs.c 2020-10-26 18:28:58.792000000 +0800 -+++ b/gcc/tree-vect-data-refs.c 2020-10-26 18:31:56.512000000 +0800 -@@ -1045,7 +1045,7 @@ vect_compute_data_ref_alignment (dr_vec_ - if (tree_int_cst_sgn (drb->step) < 0) - /* PLUS because STEP is negative. */ - misalignment += ((TYPE_VECTOR_SUBPARTS (vectype) - 1) -- * TREE_INT_CST_LOW (drb->step)); -+ * -TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))); - - unsigned int const_misalignment; - if (!known_misalignment (misalignment, vect_align_c, &const_misalignment)) -diff -Nurp a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c ---- a/gcc/tree-vect-loop.c 2020-10-26 18:28:58.728000000 +0800 -+++ b/gcc/tree-vect-loop.c 2020-10-26 18:31:53.584000000 +0800 -@@ -1850,7 +1850,10 @@ vect_dissolve_slp_only_groups (loop_vec_ - DR_GROUP_FIRST_ELEMENT (vinfo) = vinfo; - DR_GROUP_NEXT_ELEMENT (vinfo) = NULL; - DR_GROUP_SIZE (vinfo) = 1; -- DR_GROUP_GAP (vinfo) = group_size - 1; -+ if (STMT_VINFO_STRIDED_P (first_element)) -+ DR_GROUP_GAP (vinfo) = 0; -+ else -+ DR_GROUP_GAP (vinfo) = group_size - 1; - vinfo = next; - } - } -@@ -4516,18 +4519,26 @@ vect_create_epilog_for_reduction (stmt_v - zeroes. */ - if (STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION) - { -+ auto_vec, 2> ccompares; - stmt_vec_info cond_info = STMT_VINFO_REDUC_DEF (reduc_info); - cond_info = vect_stmt_to_vectorize (cond_info); -- while (gimple_assign_rhs_code (cond_info->stmt) != COND_EXPR) -+ while (cond_info != reduc_info) - { -+ if (gimple_assign_rhs_code (cond_info->stmt) == COND_EXPR) -+ { -+ gimple *vec_stmt = STMT_VINFO_VEC_STMT (cond_info)->stmt; -+ gcc_assert (gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR); -+ ccompares.safe_push -+ (std::make_pair (unshare_expr (gimple_assign_rhs1 (vec_stmt)), -+ STMT_VINFO_REDUC_IDX (cond_info) == 2)); -+ } - cond_info - = loop_vinfo->lookup_def (gimple_op (cond_info->stmt, - 1 + STMT_VINFO_REDUC_IDX - (cond_info))); - cond_info = vect_stmt_to_vectorize (cond_info); - } -- gimple *vec_stmt = STMT_VINFO_VEC_STMT (cond_info)->stmt; -- gcc_assert (gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR); -+ gcc_assert (ccompares.length () != 0); - - tree indx_before_incr, indx_after_incr; - poly_uint64 nunits_out = TYPE_VECTOR_SUBPARTS (vectype); -@@ -4569,37 +4580,35 @@ vect_create_epilog_for_reduction (stmt_v - add_phi_arg (as_a (new_phi), vec_zero, - loop_preheader_edge (loop), UNKNOWN_LOCATION); - -- /* Now take the condition from the loops original cond_expr -- (VEC_STMT) and produce a new cond_expr (INDEX_COND_EXPR) which for -+ /* Now take the condition from the loops original cond_exprs -+ and produce a new cond_exprs (INDEX_COND_EXPR) which for - every match uses values from the induction variable - (INDEX_BEFORE_INCR) otherwise uses values from the phi node - (NEW_PHI_TREE). - Finally, we update the phi (NEW_PHI_TREE) to take the value of - the new cond_expr (INDEX_COND_EXPR). */ -- -- /* Duplicate the condition from vec_stmt. */ -- tree ccompare = unshare_expr (gimple_assign_rhs1 (vec_stmt)); -- -- /* Create a conditional, where the condition is taken from vec_stmt -- (CCOMPARE). The then and else values mirror the main VEC_COND_EXPR: -- the reduction phi corresponds to NEW_PHI_TREE and the new values -- correspond to INDEX_BEFORE_INCR. */ -- gcc_assert (STMT_VINFO_REDUC_IDX (cond_info) >= 1); -- tree index_cond_expr; -- if (STMT_VINFO_REDUC_IDX (cond_info) == 2) -- index_cond_expr = build3 (VEC_COND_EXPR, cr_index_vector_type, -- ccompare, indx_before_incr, new_phi_tree); -- else -- index_cond_expr = build3 (VEC_COND_EXPR, cr_index_vector_type, -- ccompare, new_phi_tree, indx_before_incr); -- induction_index = make_ssa_name (cr_index_vector_type); -- gimple *index_condition = gimple_build_assign (induction_index, -- index_cond_expr); -- gsi_insert_before (&incr_gsi, index_condition, GSI_SAME_STMT); -- stmt_vec_info index_vec_info = loop_vinfo->add_stmt (index_condition); -+ gimple_seq stmts = NULL; -+ for (int i = ccompares.length () - 1; i != -1; --i) -+ { -+ tree ccompare = ccompares[i].first; -+ if (ccompares[i].second) -+ new_phi_tree = gimple_build (&stmts, VEC_COND_EXPR, -+ cr_index_vector_type, -+ ccompare, -+ indx_before_incr, new_phi_tree); -+ else -+ new_phi_tree = gimple_build (&stmts, VEC_COND_EXPR, -+ cr_index_vector_type, -+ ccompare, -+ new_phi_tree, indx_before_incr); -+ } -+ gsi_insert_seq_before (&incr_gsi, stmts, GSI_SAME_STMT); -+ stmt_vec_info index_vec_info -+ = loop_vinfo->add_stmt (SSA_NAME_DEF_STMT (new_phi_tree)); - STMT_VINFO_VECTYPE (index_vec_info) = cr_index_vector_type; - - /* Update the phi with the vec cond. */ -+ induction_index = new_phi_tree; - add_phi_arg (as_a (new_phi), induction_index, - loop_latch_edge (loop), UNKNOWN_LOCATION); - } diff --git a/fix-ICE-in-vectorizable-load.patch b/fix-ICE-in-vectorizable-load.patch deleted file mode 100644 index bb316370f357bcf8e583d9ab93e1991a359e74cf..0000000000000000000000000000000000000000 --- a/fix-ICE-in-vectorizable-load.patch +++ /dev/null @@ -1,71 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-vect-ICE-in-vectorizable_load-at-tree-vect-stmts.c-9.patch: -f14b41d27124601284347a10d496362c8b4b8e1c - -diff -Nurp a/gcc/testsuite/gcc.target/aarch64/pr94398.c b/gcc/testsuite/gcc.target/aarch64/pr94398.c ---- a/gcc/testsuite/gcc.target/aarch64/pr94398.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.target/aarch64/pr94398.c 2020-04-17 17:15:58.176000000 +0800 -@@ -0,0 +1,24 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-loop-vectorize -funsafe-math-optimizations -march=armv8.2-a+sve -mstrict-align" } */ -+ -+float -+foo(long n, float *x, int inc_x, -+ float *y, int inc_y) -+{ -+ float dot = 0.0; -+ int ix = 0, iy = 0; -+ -+ if (n < 0) { -+ return dot; -+ } -+ -+ int i = 0; -+ while (i < n) { -+ dot += y[iy] * x[ix]; -+ ix += inc_x; -+ iy += inc_y; -+ i++; -+ } -+ -+ return dot; -+} -diff -Nurp a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c ---- a/gcc/tree-vect-stmts.c 2020-04-17 17:10:14.796000000 +0800 -+++ b/gcc/tree-vect-stmts.c 2020-04-17 17:15:08.611850850 +0800 -@@ -7025,8 +7025,14 @@ vectorizable_store (stmt_vec_info stmt_i - auto_vec dr_chain (group_size); - oprnds.create (group_size); - -- alignment_support_scheme -- = vect_supportable_dr_alignment (first_dr_info, false); -+ /* Gather-scatter accesses perform only component accesses, alignment -+ is irrelevant for them. */ -+ if (memory_access_type == VMAT_GATHER_SCATTER) -+ alignment_support_scheme = dr_unaligned_supported; -+ else -+ alignment_support_scheme -+ = vect_supportable_dr_alignment (first_dr_info, false); -+ - gcc_assert (alignment_support_scheme); - vec_loop_masks *loop_masks - = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo) -@@ -8162,8 +8168,14 @@ vectorizable_load (stmt_vec_info stmt_in - ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr)); - } - -- alignment_support_scheme -- = vect_supportable_dr_alignment (first_dr_info, false); -+ /* Gather-scatter accesses perform only component accesses, alignment -+ is irrelevant for them. */ -+ if (memory_access_type == VMAT_GATHER_SCATTER) -+ alignment_support_scheme = dr_unaligned_supported; -+ else -+ alignment_support_scheme -+ = vect_supportable_dr_alignment (first_dr_info, false); -+ - gcc_assert (alignment_support_scheme); - vec_loop_masks *loop_masks - = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo) diff --git a/fix-ICE-in-vectorizable_condition.patch b/fix-ICE-in-vectorizable_condition.patch deleted file mode 100644 index 98f367e8a476cd8dca223dd4cd2e07947e585d3f..0000000000000000000000000000000000000000 --- a/fix-ICE-in-vectorizable_condition.patch +++ /dev/null @@ -1,18 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-Fix-reduc_index-calculation-in-vectorizable_conditio.patch -1d149b7260bcc4c0c6367b3aea47a8b91a1cf345 - -diff -Nurp a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c ---- a/gcc/tree-vect-stmts.c 2020-08-18 19:35:06.352000000 +0800 -+++ b/gcc/tree-vect-stmts.c 2020-08-18 19:35:20.792000000 +0800 -@@ -9077,7 +9077,7 @@ vectorizable_condition (stmt_vec_info st - return false; - reduc_info = info_for_reduction (stmt_info); - reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info); -- reduc_index = STMT_VINFO_REDUC_IDX (reduc_info); -+ reduc_index = STMT_VINFO_REDUC_IDX (stmt_info); - gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION - || reduc_index != -1); - } diff --git a/fix-ICE-in-verify_ssa.patch b/fix-ICE-in-verify_ssa.patch deleted file mode 100644 index fa48fcbd2524bd1023d26ac0fb7c079ed9b54f60..0000000000000000000000000000000000000000 --- a/fix-ICE-in-verify_ssa.patch +++ /dev/null @@ -1,47 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-re-PR-tree-optimization-92461-ICE-verify_ssa-failed-.patch -830d1b18526dd1f085e8a2e1467a6dde18fc6434 - -diff -Nurp a/gcc/testsuite/gcc.dg/torture/pr92461.c b/gcc/testsuite/gcc.dg/torture/pr92461.c ---- a/gcc/testsuite/gcc.dg/torture/pr92461.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/torture/pr92461.c 2020-07-28 19:48:09.324000000 +0800 -@@ -0,0 +1,20 @@ -+/* { dg-do compile } */ -+/* { dg-additional-options "-ftree-vectorize" } */ -+ -+short int zb; -+ -+void -+gs (void) -+{ -+ while (zb < 1) -+ { -+ int at; -+ -+ zb %= 1; -+ -+ for (at = 0; at < 56; ++at) -+ zb += zb; -+ -+ ++zb; -+ } -+} -diff -Nurp a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c ---- a/gcc/tree-vect-loop.c 2020-07-28 19:47:53.896000000 +0800 -+++ b/gcc/tree-vect-loop.c 2020-07-28 19:48:09.324000000 +0800 -@@ -5459,8 +5459,11 @@ vect_create_epilog_for_reduction (stmt_v - orig_name = PHI_RESULT (exit_phi); - scalar_result = scalar_results[k]; - FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name) -- FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter) -- SET_USE (use_p, scalar_result); -+ { -+ FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter) -+ SET_USE (use_p, scalar_result); -+ update_stmt (use_stmt); -+ } - } - - phis.release (); diff --git a/fix-ICE-in-verify_target_availability.patch b/fix-ICE-in-verify_target_availability.patch deleted file mode 100644 index 8c59b0e1220690756fb36e2f09bba217a6a5e27f..0000000000000000000000000000000000000000 --- a/fix-ICE-in-verify_target_availability.patch +++ /dev/null @@ -1,88 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-arm-aarch64-Handle-no_insn-in-TARGET_SCHED_VARIABLE_.patch -d0bc0cb66bcb0e6a5a5a31a9e900e8ccc98e34e5 - -diff -Nurp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c ---- a/gcc/config/aarch64/aarch64.c 2020-09-03 15:54:20.136000000 +0800 -+++ b/gcc/config/aarch64/aarch64.c 2020-09-03 15:55:22.736000000 +0800 -@@ -11044,6 +11044,23 @@ aarch64_sched_issue_rate (void) - return aarch64_tune_params.issue_rate; - } - -+/* Implement TARGET_SCHED_VARIABLE_ISSUE. */ -+static int -+aarch64_sched_variable_issue (FILE *, int, rtx_insn *insn, int more) -+{ -+ if (DEBUG_INSN_P (insn)) -+ return more; -+ -+ rtx_code code = GET_CODE (PATTERN (insn)); -+ if (code == USE || code == CLOBBER) -+ return more; -+ -+ if (get_attr_type (insn) == TYPE_NO_INSN) -+ return more; -+ -+ return more - 1; -+} -+ - static int - aarch64_sched_first_cycle_multipass_dfa_lookahead (void) - { -@@ -19428,6 +19445,9 @@ aarch64_libgcc_floating_mode_supported_p - #undef TARGET_SCHED_ISSUE_RATE - #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate - -+#undef TARGET_SCHED_VARIABLE_ISSUE -+#define TARGET_SCHED_VARIABLE_ISSUE aarch64_sched_variable_issue -+ - #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD - #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \ - aarch64_sched_first_cycle_multipass_dfa_lookahead -diff -Nurp a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c ---- a/gcc/config/arm/arm.c 2020-09-03 15:54:20.100000000 +0800 -+++ b/gcc/config/arm/arm.c 2020-09-03 15:55:22.740000000 +0800 -@@ -258,6 +258,7 @@ static bool arm_sched_can_speculate_insn - static bool arm_macro_fusion_p (void); - static bool arm_cannot_copy_insn_p (rtx_insn *); - static int arm_issue_rate (void); -+static int arm_sched_variable_issue (FILE *, int, rtx_insn *, int); - static int arm_first_cycle_multipass_dfa_lookahead (void); - static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int); - static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED; -@@ -666,6 +667,9 @@ static const struct attribute_spec arm_a - #undef TARGET_SCHED_ISSUE_RATE - #define TARGET_SCHED_ISSUE_RATE arm_issue_rate - -+#undef TARGET_SCHED_VARIABLE_ISSUE -+#define TARGET_SCHED_VARIABLE_ISSUE arm_sched_variable_issue -+ - #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD - #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \ - arm_first_cycle_multipass_dfa_lookahead -@@ -28316,6 +28320,23 @@ arm_issue_rate (void) - return current_tune->issue_rate; - } - -+/* Implement TARGET_SCHED_VARIABLE_ISSUE. */ -+static int -+arm_sched_variable_issue (FILE *, int, rtx_insn *insn, int more) -+{ -+ if (DEBUG_INSN_P (insn)) -+ return more; -+ -+ rtx_code code = GET_CODE (PATTERN (insn)); -+ if (code == USE || code == CLOBBER) -+ return more; -+ -+ if (get_attr_type (insn) == TYPE_NO_INSN) -+ return more; -+ -+ return more - 1; -+} -+ - /* Return how many instructions should scheduler lookahead to choose the - best one. */ - static int diff --git a/fix-ICE-statement-uses-released-SSA-name.patch b/fix-ICE-statement-uses-released-SSA-name.patch deleted file mode 100644 index 06107b4d34386859230b6771808c7b2cfcf99deb..0000000000000000000000000000000000000000 --- a/fix-ICE-statement-uses-released-SSA-name.patch +++ /dev/null @@ -1,109 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-tree-ssa-sccvn.c-class-pass_fre-Add-may_iterate-pass.patch -744fd446c321f78f9a1ce4ef5f83df8dcfa44a9e - -diff -Nurp a/gcc/passes.def b/gcc/passes.def ---- a/gcc/passes.def 2020-08-17 09:46:40.340000000 +0800 -+++ b/gcc/passes.def 2020-08-17 10:09:10.808000000 +0800 -@@ -83,7 +83,7 @@ along with GCC; see the file COPYING3. - /* pass_build_ealias is a dummy pass that ensures that we - execute TODO_rebuild_alias at this point. */ - NEXT_PASS (pass_build_ealias); -- NEXT_PASS (pass_fre); -+ NEXT_PASS (pass_fre, true /* may_iterate */); - NEXT_PASS (pass_early_vrp); - NEXT_PASS (pass_merge_phi); - NEXT_PASS (pass_dse); -@@ -117,7 +117,7 @@ along with GCC; see the file COPYING3. - NEXT_PASS (pass_oacc_kernels); - PUSH_INSERT_PASSES_WITHIN (pass_oacc_kernels) - NEXT_PASS (pass_ch); -- NEXT_PASS (pass_fre); -+ NEXT_PASS (pass_fre, true /* may_iterate */); - /* We use pass_lim to rewrite in-memory iteration and reduction - variable accesses in loops into local variables accesses. */ - NEXT_PASS (pass_lim); -@@ -201,7 +201,7 @@ along with GCC; see the file COPYING3. - execute TODO_rebuild_alias at this point. */ - NEXT_PASS (pass_build_alias); - NEXT_PASS (pass_return_slot); -- NEXT_PASS (pass_fre); -+ NEXT_PASS (pass_fre, true /* may_iterate */); - NEXT_PASS (pass_merge_phi); - NEXT_PASS (pass_thread_jumps); - NEXT_PASS (pass_vrp, true /* warn_array_bounds_p */); -@@ -312,6 +312,7 @@ along with GCC; see the file COPYING3. - NEXT_PASS (pass_strength_reduction); - NEXT_PASS (pass_split_paths); - NEXT_PASS (pass_tracer); -+ NEXT_PASS (pass_fre, false /* may_iterate */); - NEXT_PASS (pass_thread_jumps); - NEXT_PASS (pass_dominator, false /* may_peel_loop_headers_p */); - NEXT_PASS (pass_strlen); -diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr77445-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr77445-2.c ---- a/gcc/testsuite/gcc.dg/tree-ssa/pr77445-2.c 2020-08-17 09:46:41.332000000 +0800 -+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr77445-2.c 2020-08-17 10:09:10.808000000 +0800 -@@ -125,7 +125,7 @@ enum STATES FMS( u8 **in , u32 *transiti - jump threading opportunities. Skip the later tests on aarch64. */ - /* { dg-final { scan-tree-dump "Jumps threaded: 1\[1-9\]" "thread1" } } */ - /* { dg-final { scan-tree-dump-times "Invalid sum" 3 "thread1" } } */ --/* { dg-final { scan-tree-dump-not "not considered" "thread1" } } */ --/* { dg-final { scan-tree-dump-not "not considered" "thread2" } } */ --/* { dg-final { scan-tree-dump-not "not considered" "thread3" { target { ! aarch64*-*-* } } } } */ --/* { dg-final { scan-tree-dump-not "not considered" "thread4" { target { ! aarch64*-*-* } } } } */ -+/* { dg-final { scan-tree-dump-not "optimizing for size" "thread1" } } */ -+/* { dg-final { scan-tree-dump-not "optimizing for size" "thread2" } } */ -+/* { dg-final { scan-tree-dump-not "optimizing for size" "thread3" { target { ! aarch64*-*-* } } } } */ -+/* { dg-final { scan-tree-dump-not "optimizing for size" "thread4" { target { ! aarch64*-*-* } } } } */ -diff -Nurp a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c ---- a/gcc/tree-ssa-sccvn.c 2020-08-17 09:46:42.212000000 +0800 -+++ b/gcc/tree-ssa-sccvn.c 2020-08-17 10:09:10.808000000 +0800 -@@ -7232,14 +7232,24 @@ class pass_fre : public gimple_opt_pass - { - public: - pass_fre (gcc::context *ctxt) -- : gimple_opt_pass (pass_data_fre, ctxt) -+ : gimple_opt_pass (pass_data_fre, ctxt), may_iterate (true) - {} - - /* opt_pass methods: */ - opt_pass * clone () { return new pass_fre (m_ctxt); } -- virtual bool gate (function *) { return flag_tree_fre != 0; } -+ void set_pass_param (unsigned int n, bool param) -+ { -+ gcc_assert (n == 0); -+ may_iterate = param; -+ } -+ virtual bool gate (function *) -+ { -+ return flag_tree_fre != 0 && (may_iterate || optimize > 1); -+ } - virtual unsigned int execute (function *); - -+private: -+ bool may_iterate; - }; // class pass_fre - - unsigned int -@@ -7248,15 +7258,16 @@ pass_fre::execute (function *fun) - unsigned todo = 0; - - /* At -O[1g] use the cheap non-iterating mode. */ -+ bool iterate_p = may_iterate && (optimize > 1); - calculate_dominance_info (CDI_DOMINATORS); -- if (optimize > 1) -+ if (iterate_p) - loop_optimizer_init (AVOID_CFG_MODIFICATIONS); - - default_vn_walk_kind = VN_WALKREWRITE; -- todo = do_rpo_vn (fun, NULL, NULL, optimize > 1, true); -+ todo = do_rpo_vn (fun, NULL, NULL, iterate_p, true); - free_rpo_vn (); - -- if (optimize > 1) -+ if (iterate_p) - loop_optimizer_finalize (); - - return todo; diff --git a/fix-ICE-vect_slp_analyze_node_operations.patch b/fix-ICE-vect_slp_analyze_node_operations.patch deleted file mode 100644 index 1536f1ec56f58b47fd09a25884b33bf797fc4772..0000000000000000000000000000000000000000 --- a/fix-ICE-vect_slp_analyze_node_operations.patch +++ /dev/null @@ -1,78 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-re-PR-tree-optimization-92537-ICE-in-vect_slp_analyz.patch -2439d584d5def75d705f33218bb3b97fca4c11a1 - -diff -Nurp a/gcc/testsuite/gfortran.dg/pr92537.f90 b/gcc/testsuite/gfortran.dg/pr92537.f90 ---- a/gcc/testsuite/gfortran.dg/pr92537.f90 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gfortran.dg/pr92537.f90 2020-09-03 16:53:43.024000000 +0800 -@@ -0,0 +1,32 @@ -+! { dg-do compile } -+! { dg-options "-O2 -ftree-vectorize -fno-inline" } -+! { dg-additional-options "-march=skylake" { target x86_64-*-* i?86-*-* } } -+MODULE pr93527 -+ implicit none -+ integer, parameter :: wp = kind (1.d0) -+ interface p_min -+ module procedure p_min_wp -+ end interface -+contains -+ subroutine foo (pr) -+ real(wp), pointer :: pr(:) -+ integer :: nzd -+ real(wp) :: pmin -+ real(wp) :: pmin_diag -+ integer :: i -+ nzd = 15 -+ allocate (pr(nzd)) -+ pmin_diag = 4000._wp -+ pmin = p_min(pmin_diag) -+ pmin = min (pmin,pmin_diag) -+ pr(1) = log(pmin) -+ do i=1,nzd-1 -+ pr(i+1) = log(pmin) + i -+ end do -+ end subroutine foo -+ function p_min_wp (x) result (p_min) -+ real(wp), intent(in) :: x -+ real(wp) :: p_min -+ p_min = x -+ end function p_min_wp -+end module pr93527 -diff -Nurp a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c ---- a/gcc/tree-vect-slp.c 2020-09-03 16:53:22.668000000 +0800 -+++ b/gcc/tree-vect-slp.c 2020-09-03 16:53:43.024000000 +0800 -@@ -2176,18 +2176,6 @@ vect_analyze_slp_instance (vec_info *vin - matches[group_size / const_max_nunits * const_max_nunits] = false; - vect_free_slp_tree (node, false); - } -- else if (constructor -- && SLP_TREE_DEF_TYPE (node) != vect_internal_def) -- { -- /* CONSTRUCTOR vectorization relies on a vector stmt being -- generated, that doesn't work for fully external ones. */ -- if (dump_enabled_p ()) -- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, -- "Build SLP failed: CONSTRUCTOR of external " -- "or constant elements\n"); -- vect_free_slp_tree (node, false); -- return false; -- } - else - { - /* Create a new SLP instance. */ -@@ -2872,7 +2860,12 @@ vect_slp_analyze_operations (vec_info *v - if (!vect_slp_analyze_node_operations (vinfo, - SLP_INSTANCE_TREE (instance), - instance, visited, lvisited, -- &cost_vec)) -+ &cost_vec) -+ /* Instances with a root stmt require vectorized defs for the -+ SLP tree root. */ -+ || (SLP_INSTANCE_ROOT_STMT (instance) -+ && (SLP_TREE_DEF_TYPE (SLP_INSTANCE_TREE (instance)) -+ != vect_internal_def))) - { - slp_tree node = SLP_INSTANCE_TREE (instance); - stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0]; diff --git a/fix-ICE-when-vectorizing-nested-cycles.patch b/fix-ICE-when-vectorizing-nested-cycles.patch deleted file mode 100644 index 2aa3f468203e094ea6119e33424e2c75d5fce91a..0000000000000000000000000000000000000000 --- a/fix-ICE-when-vectorizing-nested-cycles.patch +++ /dev/null @@ -1,151 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-tree-optimization-96698-fix-ICE-when-vectorizing-nes.patch -2130efe6ac7beba72d289e3dd145daa10aeaed54 - -diff -uprN a/gcc/testsuite/gcc.dg/vect/pr96698.c b/gcc/testsuite/gcc.dg/vect/pr96698.c ---- a/gcc/testsuite/gcc.dg/vect/pr96698.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/vect/pr96698.c 2020-08-27 17:53:24.396000000 +0800 -@@ -0,0 +1,19 @@ -+/* { dg-do compile } */ -+ -+void test(int a, int* i) -+{ -+ for (; a < 5; ++a) -+ { -+ int b = 0; -+ int c = 0; -+ for (; b != -11; b--) -+ for (int d = 0; d ==0; d++) -+ { -+ *i += c & a; -+ c = b; -+ } -+ } -+} -+ -+/* We should be able to vectorize the inner cycle. */ -+/* { dg-final { scan-tree-dump "OUTER LOOP VECTORIZED" "vect" { target vect_int } } } */ -diff -uprN a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c ---- a/gcc/tree-vect-loop.c 2020-08-27 09:25:58.000000000 +0800 -+++ b/gcc/tree-vect-loop.c 2020-08-27 18:41:41.016000000 +0800 -@@ -4325,7 +4325,8 @@ info_for_reduction (stmt_vec_info stmt_i - { - stmt_info = vect_orig_stmt (stmt_info); - gcc_assert (STMT_VINFO_REDUC_DEF (stmt_info)); -- if (!is_a (stmt_info->stmt)) -+ if (!is_a (stmt_info->stmt) -+ || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))) - stmt_info = STMT_VINFO_REDUC_DEF (stmt_info); - gphi *phi = as_a (stmt_info->stmt); - if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def) -@@ -8622,6 +8623,43 @@ vect_transform_loop (loop_vec_info loop_ - } - } - -+ /* Fill in backedge defs of reductions. */ -+ for (unsigned i = 0; i < loop_vinfo->reduc_latch_defs.length (); ++i) -+ { -+ stmt_vec_info stmt_info = loop_vinfo->reduc_latch_defs[i]; -+ stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info); -+ stmt_vec_info phi_info -+ = STMT_VINFO_VEC_STMT (STMT_VINFO_REDUC_DEF (orig_stmt_info)); -+ stmt_vec_info vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); -+ gphi *phi -+ = dyn_cast (STMT_VINFO_REDUC_DEF (orig_stmt_info)->stmt); -+ edge e = loop_latch_edge (gimple_bb (phi_info->stmt)->loop_father); -+ do -+ { -+ add_phi_arg (as_a (phi_info->stmt), -+ gimple_get_lhs (vec_stmt->stmt), e, -+ gimple_phi_arg_location (phi, e->dest_idx)); -+ phi_info = STMT_VINFO_RELATED_STMT (phi_info); -+ vec_stmt = STMT_VINFO_RELATED_STMT (vec_stmt); -+ } -+ while (phi_info); -+ gcc_assert (!vec_stmt); -+ } -+ for (unsigned i = 0; i < loop_vinfo->reduc_latch_slp_defs.length (); ++i) -+ { -+ slp_tree slp_node = loop_vinfo->reduc_latch_slp_defs[i].first; -+ slp_tree phi_node = loop_vinfo->reduc_latch_slp_defs[i].second; -+ gphi *phi = as_a (SLP_TREE_SCALAR_STMTS (phi_node)[0]->stmt); -+ e = loop_latch_edge (gimple_bb (phi)->loop_father); -+ gcc_assert (SLP_TREE_VEC_STMTS (phi_node).length () -+ == SLP_TREE_VEC_STMTS (slp_node).length ()); -+ for (unsigned j = 0; j < SLP_TREE_VEC_STMTS (phi_node).length (); ++j) -+ add_phi_arg (as_a (SLP_TREE_VEC_STMTS (phi_node)[j]->stmt), -+ gimple_get_lhs -+ (SLP_TREE_VEC_STMTS (slp_node)[j]->stmt), -+ e, gimple_phi_arg_location (phi, e->dest_idx)); -+ } -+ - /* Stub out scalar statements that must not survive vectorization. - Doing this here helps with grouped statements, or statements that - are involved in patterns. */ -diff -uprN a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h ---- a/gcc/tree-vectorizer.h 2020-08-27 09:25:57.000000000 +0800 -+++ b/gcc/tree-vectorizer.h 2020-08-27 17:53:24.400000000 +0800 -@@ -575,6 +575,11 @@ typedef struct _loop_vec_info : public v - stmt in the chain. */ - auto_vec reduction_chains; - -+ /* The vectorized stmts defining the latch values of the reduction -+ they are involved with. */ -+ auto_vec reduc_latch_defs; -+ auto_vec > reduc_latch_slp_defs; -+ - /* Cost vector for a single scalar iteration. */ - auto_vec scalar_cost_vec; - -diff -uprN a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c ---- a/gcc/tree-vect-stmts.c 2020-08-27 09:25:58.000000000 +0800 -+++ b/gcc/tree-vect-stmts.c 2020-08-27 17:53:24.400000000 +0800 -@@ -10213,8 +10213,8 @@ vect_transform_stmt (stmt_vec_info stmt_ - if (STMT_VINFO_TYPE (stmt_info) == store_vec_info_type) - return is_store; - -- /* If this stmt defines a value used on a backedge, update the -- vectorized PHIs. */ -+ /* If this stmt defines a value used on a backedge, record it so -+ we can update the vectorized PHIs later. */ - stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info); - stmt_vec_info reduc_info; - if (STMT_VINFO_REDUC_DEF (orig_stmt_info) -@@ -10234,32 +10234,13 @@ vect_transform_stmt (stmt_vec_info stmt_ - && (PHI_ARG_DEF_FROM_EDGE (phi, e) - == gimple_get_lhs (orig_stmt_info->stmt))) - { -- stmt_vec_info phi_info -- = STMT_VINFO_VEC_STMT (STMT_VINFO_REDUC_DEF (orig_stmt_info)); -- stmt_vec_info vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); -- do -- { -- add_phi_arg (as_a (phi_info->stmt), -- gimple_get_lhs (vec_stmt->stmt), e, -- gimple_phi_arg_location (phi, e->dest_idx)); -- phi_info = STMT_VINFO_RELATED_STMT (phi_info); -- vec_stmt = STMT_VINFO_RELATED_STMT (vec_stmt); -- } -- while (phi_info); -- gcc_assert (!vec_stmt); -+ as_a (vinfo)->reduc_latch_defs.safe_push (stmt_info); - } - else if (slp_node - && slp_node != slp_node_instance->reduc_phis) - { -- slp_tree phi_node = slp_node_instance->reduc_phis; -- gphi *phi = as_a (SLP_TREE_SCALAR_STMTS (phi_node)[0]->stmt); -- e = loop_latch_edge (gimple_bb (phi)->loop_father); -- gcc_assert (SLP_TREE_VEC_STMTS (phi_node).length () -- == SLP_TREE_VEC_STMTS (slp_node).length ()); -- for (unsigned i = 0; i < SLP_TREE_VEC_STMTS (phi_node).length (); ++i) -- add_phi_arg (as_a (SLP_TREE_VEC_STMTS (phi_node)[i]->stmt), -- gimple_get_lhs (SLP_TREE_VEC_STMTS (slp_node)[i]->stmt), -- e, gimple_phi_arg_location (phi, e->dest_idx)); -+ as_a (vinfo)->reduc_latch_slp_defs.safe_push -+ (std::make_pair (slp_node, slp_node_instance->reduc_phis)); - } - } - diff --git a/fix-PR-92351-When-peeling-for-alignment.patch b/fix-PR-92351-When-peeling-for-alignment.patch deleted file mode 100644 index 88866e65394c8c7ea4ca8e61dc6d9f6c78284591..0000000000000000000000000000000000000000 --- a/fix-PR-92351-When-peeling-for-alignment.patch +++ /dev/null @@ -1,152 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-vect-PR-92351-When-peeling-for-alignment-make-alignm.patch -4e9d58d16767b1bc686f0c4b3bd2da25dc71e8f3 - -diff --git a/gcc/testsuite/gcc.dg/vect/vect-peel-2-epilogues.c b/gcc/testsuite/gcc.dg/vect/vect-peel-2-epilogues.c -new file mode 100644 -index 00000000000..c06fa442faf ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/vect/vect-peel-2-epilogues.c -@@ -0,0 +1,3 @@ -+/* { dg-require-effective-target vect_int } */ -+ -+#include "vect-peel-2-src.c" -diff --git a/gcc/testsuite/gcc.dg/vect/vect-peel-2-src.c b/gcc/testsuite/gcc.dg/vect/vect-peel-2-src.c -new file mode 100644 -index 00000000000..f6fc134c870 ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/vect/vect-peel-2-src.c -@@ -0,0 +1,48 @@ -+#include -+#include "tree-vect.h" -+ -+#define N 128 -+ -+/* unaligned store. */ -+ -+int ib[N+7]; -+ -+__attribute__ ((noinline)) -+int main1 () -+{ -+ int i; -+ int ia[N+1]; -+ -+ /* The store is aligned and the loads are misaligned with the same -+ misalignment. Cost model is disabled. If misaligned stores are supported, -+ we peel according to the loads to align them. */ -+ for (i = 0; i <= N; i++) -+ { -+ ia[i] = ib[i+2] + ib[i+6]; -+ } -+ -+ /* check results: */ -+ for (i = 1; i <= N; i++) -+ { -+ if (ia[i] != ib[i+2] + ib[i+6]) -+ abort (); -+ } -+ -+ return 0; -+} -+ -+int main (void) -+{ -+ int i; -+ -+ check_vect (); -+ -+ for (i = 0; i <= N+6; i++) -+ { -+ asm volatile ("" : "+r" (i)); -+ ib[i] = i; -+ } -+ -+ return main1 (); -+} -+ -diff --git a/gcc/testsuite/gcc.dg/vect/vect-peel-2.c b/gcc/testsuite/gcc.dg/vect/vect-peel-2.c -index b6061c3b855..65e70bd4417 100644 ---- a/gcc/testsuite/gcc.dg/vect/vect-peel-2.c -+++ b/gcc/testsuite/gcc.dg/vect/vect-peel-2.c -@@ -1,52 +1,8 @@ - /* { dg-require-effective-target vect_int } */ -+/* Disabling epilogues until we find a better way to deal with scans. */ -+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */ - --#include --#include "tree-vect.h" -- --#define N 128 -- --/* unaligned store. */ -- --int ib[N+7]; -- --__attribute__ ((noinline)) --int main1 () --{ -- int i; -- int ia[N+1]; -- -- /* The store is aligned and the loads are misaligned with the same -- misalignment. Cost model is disabled. If misaligned stores are supported, -- we peel according to the loads to align them. */ -- for (i = 0; i <= N; i++) -- { -- ia[i] = ib[i+2] + ib[i+6]; -- } -- -- /* check results: */ -- for (i = 1; i <= N; i++) -- { -- if (ia[i] != ib[i+2] + ib[i+6]) -- abort (); -- } -- -- return 0; --} -- --int main (void) --{ -- int i; -- -- check_vect (); -- -- for (i = 0; i <= N+6; i++) -- { -- asm volatile ("" : "+r" (i)); -- ib[i] = i; -- } -- -- return main1 (); --} -+#include "vect-peel-2-src.c" - - /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ - /* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { target { { vect_element_align } && { vect_aligned_arrays } } } } } */ -diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c -index 36639b697f1..88f14e73d65 100644 ---- a/gcc/tree-vect-data-refs.c -+++ b/gcc/tree-vect-data-refs.c -@@ -938,6 +938,18 @@ vect_compute_data_ref_alignment (dr_vec_info *dr_info) - = exact_div (vect_calculate_target_alignment (dr_info), BITS_PER_UNIT); - DR_TARGET_ALIGNMENT (dr_info) = vector_alignment; - -+ /* If the main loop has peeled for alignment we have no way of knowing -+ whether the data accesses in the epilogues are aligned. We can't at -+ compile time answer the question whether we have entered the main loop or -+ not. Fixes PR 92351. */ -+ if (loop_vinfo) -+ { -+ loop_vec_info orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo); -+ if (orig_loop_vinfo -+ && LOOP_VINFO_PEELING_FOR_ALIGNMENT (orig_loop_vinfo) != 0) -+ return; -+ } -+ - unsigned HOST_WIDE_INT vect_align_c; - if (!vector_alignment.is_constant (&vect_align_c)) - return; diff --git a/fix-SSA-update-for-vectorizer-epilogue.patch b/fix-SSA-update-for-vectorizer-epilogue.patch deleted file mode 100644 index 96469b6662dff65a011917fbdbc9497c4a29e709..0000000000000000000000000000000000000000 --- a/fix-SSA-update-for-vectorizer-epilogue.patch +++ /dev/null @@ -1,47 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-tree-optimization-95717-fix-SSA-update-for-vectorize.patch -d0909f5858ad81e6d8b73fa6193be19cb5e6ed7b - -diff -Nurp a/gcc/testsuite/g++.dg/torture/pr95717.C b/gcc/testsuite/g++.dg/torture/pr95717.C ---- a/gcc/testsuite/g++.dg/torture/pr95717.C 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/g++.dg/torture/pr95717.C 2020-08-24 21:45:48.436000000 +0800 -@@ -0,0 +1,12 @@ -+// { dg-do compile } -+ -+bool a; -+extern bool b[]; -+long c, d; -+int *f; -+void g(bool h) -+{ -+ for (short e = 0; e < c; e = 4) -+ for (; d; d++) -+ b[d] = a = f[d] ? c ? h : 0 : h; -+} -diff -Nurp a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c ---- a/gcc/tree-vect-loop-manip.c 2020-08-24 21:45:23.620000000 +0800 -+++ b/gcc/tree-vect-loop-manip.c 2020-08-24 21:45:48.436000000 +0800 -@@ -1073,6 +1073,10 @@ slpeel_tree_duplicate_loop_to_edge_cfg ( - - add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL); - -+ /* Skip new preheader since it's deleted if copy loop is added at entry. */ -+ for (unsigned i = (at_exit ? 0 : 1); i < scalar_loop->num_nodes + 1; i++) -+ rename_variables_in_bb (new_bbs[i], duplicate_outer_loop); -+ - if (scalar_loop != loop) - { - /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from -@@ -1150,10 +1154,6 @@ slpeel_tree_duplicate_loop_to_edge_cfg ( - loop_preheader_edge (new_loop)->src); - } - -- /* Skip new preheader since it's deleted if copy loop is added at entry. */ -- for (unsigned i = (at_exit ? 0 : 1); i < scalar_loop->num_nodes + 1; i++) -- rename_variables_in_bb (new_bbs[i], duplicate_outer_loop); -- - if (scalar_loop != loop) - { - /* Update new_loop->header PHIs, so that on the preheader diff --git a/fix-SYMBOL_TINY_GOT-handling-for-ILP32.patch b/fix-SYMBOL_TINY_GOT-handling-for-ILP32.patch deleted file mode 100644 index 9f596ca8671a6c018e67e9aedafdf263b0320670..0000000000000000000000000000000000000000 --- a/fix-SYMBOL_TINY_GOT-handling-for-ILP32.patch +++ /dev/null @@ -1,87 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-aarch64-Fix-SYMBOL_TINY_GOT-handling-for-ILP32-PR942.patch: -d91480dee934478063fe5945b73ff3c108e40a91 - -diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c -index b0cbb6e2d55..58d38f74bde 100644 ---- a/gcc/config/aarch64/aarch64.c -+++ b/gcc/config/aarch64/aarch64.c -@@ -2739,8 +2739,21 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm, - } - - case SYMBOL_TINY_GOT: -- emit_insn (gen_ldr_got_tiny (dest, imm)); -- return; -+ { -+ rtx insn; -+ machine_mode mode = GET_MODE (dest); -+ -+ if (mode == ptr_mode) -+ insn = gen_ldr_got_tiny (mode, dest, imm); -+ else -+ { -+ gcc_assert (mode == Pmode); -+ insn = gen_ldr_got_tiny_sidi (dest, imm); -+ } -+ -+ emit_insn (insn); -+ return; -+ } - - case SYMBOL_TINY_TLSIE: - { -diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md -index 7ad4e918578..c7c4d1dd519 100644 ---- a/gcc/config/aarch64/aarch64.md -+++ b/gcc/config/aarch64/aarch64.md -@@ -6766,13 +6766,23 @@ - [(set_attr "type" "load_4")] - ) - --(define_insn "ldr_got_tiny" -- [(set (match_operand:DI 0 "register_operand" "=r") -- (unspec:DI [(match_operand:DI 1 "aarch64_valid_symref" "S")] -- UNSPEC_GOTTINYPIC))] -+(define_insn "@ldr_got_tiny_" -+ [(set (match_operand:PTR 0 "register_operand" "=r") -+ (unspec:PTR [(match_operand:PTR 1 "aarch64_valid_symref" "S")] -+ UNSPEC_GOTTINYPIC))] - "" -- "ldr\\t%0, %L1" -- [(set_attr "type" "load_8")] -+ "ldr\t%0, %L1" -+ [(set_attr "type" "load_")] -+) -+ -+(define_insn "ldr_got_tiny_sidi" -+ [(set (match_operand:DI 0 "register_operand" "=r") -+ (zero_extend:DI -+ (unspec:SI [(match_operand:DI 1 "aarch64_valid_symref" "S")] -+ UNSPEC_GOTTINYPIC)))] -+ "TARGET_ILP32" -+ "ldr\t%w0, %L1" -+ [(set_attr "type" "load_4")] - ) - - (define_insn "aarch64_load_tp_hard" -diff --git a/gcc/testsuite/gcc.target/aarch64/pr94201.c b/gcc/testsuite/gcc.target/aarch64/pr94201.c -new file mode 100644 -index 00000000000..69176169186 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/aarch64/pr94201.c -@@ -0,0 +1,13 @@ -+/* { dg-do compile } */ -+/* { dg-options "-mcmodel=tiny -mabi=ilp32 -fPIC" } */ -+ -+extern int bar (void *); -+extern long long a; -+ -+int -+foo (void) -+{ -+ a = 1; -+ return bar ((void *)bar); -+} -+ diff --git a/fix-addlosymdi-ICE-in-pass-reload.patch b/fix-addlosymdi-ICE-in-pass-reload.patch deleted file mode 100644 index 409a3ea872190d54eef484e83560626f5774cf3e..0000000000000000000000000000000000000000 --- a/fix-addlosymdi-ICE-in-pass-reload.patch +++ /dev/null @@ -1,30 +0,0 @@ -diff -uprN a/gcc/lra.c b/gcc/lra.c ---- a/gcc/lra.c 2020-12-14 15:26:36.331633230 +0800 -+++ b/gcc/lra.c 2020-12-15 18:56:33.699633230 +0800 -@@ -507,6 +507,26 @@ lra_emit_move (rtx x, rtx y) - data. */ - if (old != max_reg_num ()) - expand_reg_data (old); -+ while (insn != NULL) -+ { -+ if (GET_CODE (PATTERN (insn)) == SET -+ && GET_CODE (SET_SRC (PATTERN (insn))) == LO_SUM -+ && GET_CODE (SET_DEST (PATTERN (insn))) == REG -+ && strcmp (insn_data[recog_memoized (insn)].name, -+ "add_losym_di") == 0) -+ { -+ rtx add_losym_dest = SET_DEST (PATTERN (insn)); -+ for (int i = (int) max_reg_num () - 1; i >= old; i--) -+ { -+ if (regno_reg_rtx[i] == add_losym_dest) -+ { -+ setup_reg_classes (i, GENERAL_REGS, -+ NO_REGS, GENERAL_REGS); -+ } -+ } -+ } -+ insn = PREV_INSN (insn); -+ } - return; - } - lra_emit_add (x, XEXP (y, 0), XEXP (y, 1)); diff --git a/fix-an-ICE-in-vect_recog_mask_conversion_pattern.patch b/fix-an-ICE-in-vect_recog_mask_conversion_pattern.patch deleted file mode 100644 index fc236e947b3d5101abf6d57d8d1eb3e0911c5c94..0000000000000000000000000000000000000000 --- a/fix-an-ICE-in-vect_recog_mask_conversion_pattern.patch +++ /dev/null @@ -1,115 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-aarch64-Fix-an-ICE-in-vect_recog_mask_conversion_pattern.patch: -91d80cf4bd2827dd9c40fe6a7c719c909d79083d - -diff -Nurp a/gcc/testsuite/gcc.target/aarch64/pr96757.c b/gcc/testsuite/gcc.target/aarch64/pr96757.c ---- a/gcc/testsuite/gcc.target/aarch64/pr96757.c 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/gcc.target/aarch64/pr96757.c 2020-10-12 08:32:12.192000000 -0400 -@@ -0,0 +1,23 @@ -+/* PR target/96757 */ -+/* { dg-do compile } */ -+/* { dg-options "-O3" } */ -+ -+short -+fun1(short i, short j) -+{ -+ return i * j; -+} -+ -+int -+fun(int a, int b, int c) -+{ -+ int *v, z, k, m; -+ short f, d; -+ for (int i=0; i m; -+ z = f > k; -+ *v += fun1(z,b); -+ } -+} -diff -Nurp a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c ---- a/gcc/tree-vect-patterns.c 2020-10-12 08:05:18.924000000 -0400 -+++ b/gcc/tree-vect-patterns.c 2020-10-12 08:50:56.996000000 -0400 -@@ -3917,6 +3917,8 @@ vect_recog_mask_conversion_pattern (stmt - tree vectype1, vectype2; - stmt_vec_info pattern_stmt_info; - vec_info *vinfo = stmt_vinfo->vinfo; -+ tree rhs1_op0 = NULL_TREE, rhs1_op1 = NULL_TREE; -+ tree rhs1_op0_type = NULL_TREE, rhs1_op1_type = NULL_TREE; - - /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */ - if (is_gimple_call (last_stmt) -@@ -4016,9 +4018,37 @@ vect_recog_mask_conversion_pattern (stmt - - it is better for b1 and b2 to use the mask type associated - with int elements rather bool (byte) elements. */ -- rhs1_type = search_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo); -- if (!rhs1_type) -- rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0)); -+ rhs1_op0 = TREE_OPERAND (rhs1, 0); -+ rhs1_op1 = TREE_OPERAND (rhs1, 1); -+ if (!rhs1_op0 || !rhs1_op1) -+ return NULL; -+ rhs1_op0_type = search_type_for_mask (rhs1_op0, vinfo); -+ rhs1_op1_type = search_type_for_mask (rhs1_op1, vinfo); -+ -+ if (!rhs1_op0_type) -+ rhs1_type = TREE_TYPE (rhs1_op0); -+ else if (!rhs1_op1_type) -+ rhs1_type = TREE_TYPE (rhs1_op1); -+ else if (TYPE_PRECISION (rhs1_op0_type) -+ != TYPE_PRECISION (rhs1_op1_type)) -+ { -+ int tmp0 = (int) TYPE_PRECISION (rhs1_op0_type) -+ - (int) TYPE_PRECISION (TREE_TYPE (lhs)); -+ int tmp1 = (int) TYPE_PRECISION (rhs1_op1_type) -+ - (int) TYPE_PRECISION (TREE_TYPE (lhs)); -+ if ((tmp0 > 0 && tmp1 > 0) || (tmp0 < 0 && tmp1 < 0)) -+ { -+ if (abs (tmp0) > abs (tmp1)) -+ rhs1_type = rhs1_op1_type; -+ else -+ rhs1_type = rhs1_op0_type; -+ } -+ else -+ rhs1_type = build_nonstandard_integer_type -+ (TYPE_PRECISION (TREE_TYPE (lhs)), 1); -+ } -+ else -+ rhs1_type = rhs1_op0_type; - } - else - return NULL; -@@ -4036,8 +4066,8 @@ vect_recog_mask_conversion_pattern (stmt - name from the outset. */ - if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1), - TYPE_VECTOR_SUBPARTS (vectype2)) -- && (TREE_CODE (rhs1) == SSA_NAME -- || rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0)))) -+ && !rhs1_op0_type -+ && !rhs1_op1_type) - return NULL; - - /* If rhs1 is invariant and we can promote it leave the COND_EXPR -@@ -4069,7 +4099,16 @@ vect_recog_mask_conversion_pattern (stmt - if (TREE_CODE (rhs1) != SSA_NAME) - { - tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL); -- pattern_stmt = gimple_build_assign (tmp, rhs1); -+ if (rhs1_op0_type -+ && TYPE_PRECISION (rhs1_op0_type) != TYPE_PRECISION (rhs1_type)) -+ rhs1_op0 = build_mask_conversion (rhs1_op0, -+ vectype2, stmt_vinfo); -+ if (rhs1_op1_type -+ && TYPE_PRECISION (rhs1_op1_type) != TYPE_PRECISION (rhs1_type)) -+ rhs1_op1 = build_mask_conversion (rhs1_op1, -+ vectype2, stmt_vinfo); -+ pattern_stmt = gimple_build_assign (tmp, TREE_CODE (rhs1), -+ rhs1_op0, rhs1_op1); - rhs1 = tmp; - append_pattern_def_seq (stmt_vinfo, pattern_stmt, vectype2); - } diff --git a/fix-avoid-bogus-uninit-warning-with-store-motion.patch b/fix-avoid-bogus-uninit-warning-with-store-motion.patch deleted file mode 100644 index 0a664b417823fe16d581fe9c85fc696885fcce16..0000000000000000000000000000000000000000 --- a/fix-avoid-bogus-uninit-warning-with-store-motion.patch +++ /dev/null @@ -1,85 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-tree-optimization-94963-avoid-bogus-uninit-warning-with-store-motion.patch -371905d12259c180efb9b1f1b5716e969feb60f9 - -diff --git a/gcc/testsuite/gcc.dg/pr94963.c b/gcc/testsuite/gcc.dg/pr94963.c -new file mode 100644 -index 00000000000..aca9e161301 ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/pr94963.c -@@ -0,0 +1,35 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -Wall" } */ -+ -+typedef struct -+{ -+ int p1; -+ int p2; -+ int p3; -+} P; -+struct S -+{ -+ int field; -+}; -+extern int v2; -+extern void foo (struct S *map); -+static struct S var; -+const P *pv; -+int ps; -+void -+f (void) -+{ -+ if (pv != 0) -+ for (const P *ph = pv; ph < &pv[ps]; ++ph) -+ switch (ph->p1) -+ { -+ case 1: -+ v2 = ph->p2; -+ break; -+ case 2: -+ var.field = ph->p3; -+ break; -+ } -+ if (var.field != 0) /* { dg-bogus "uninitialized" } */ -+ foo (&var); -+} -diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c -index 554dd4be5bb..3056b4bfed2 100644 ---- a/gcc/tree-ssa-loop-im.c -+++ b/gcc/tree-ssa-loop-im.c -@@ -1994,8 +1994,6 @@ execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag, - gsi = gsi_start_bb (then_bb); - /* Insert actual store. */ - stmt = gimple_build_assign (unshare_expr (mem), tmp_var); -- /* Make sure to not warn about maybe-uninit uses of tmp_var here. */ -- gimple_set_no_warning (stmt, true); - gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); - - edge e1 = single_succ_edge (new_bb); -@@ -2149,13 +2147,19 @@ execute_sm (class loop *loop, vec exits, im_mem_ref *ref) - store then. */ - if ((!always_stored && !multi_threaded_model_p) - || (ref->loaded && bitmap_bit_p (ref->loaded, loop->num))) -+ load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref)); -+ else - { -- load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref)); -- lim_data = init_lim_data (load); -- lim_data->max_loop = loop; -- lim_data->tgt_loop = loop; -- gsi_insert_before (&gsi, load, GSI_SAME_STMT); -+ /* If not emitting a load mark the uninitialized state on the -+ loop entry as not to be warned for. */ -+ tree uninit = create_tmp_reg (TREE_TYPE (tmp_var)); -+ TREE_NO_WARNING (uninit) = 1; -+ load = gimple_build_assign (tmp_var, uninit); - } -+ lim_data = init_lim_data (load); -+ lim_data->max_loop = loop; -+ lim_data->tgt_loop = loop; -+ gsi_insert_before (&gsi, load, GSI_SAME_STMT); - - if (multi_threaded_model_p) - { diff --git a/fix-avx512vl-vcvttpd2dq-2-fail.patch b/fix-avx512vl-vcvttpd2dq-2-fail.patch deleted file mode 100644 index 60afadd603788e904e3042158f2b4bc52121fa6b..0000000000000000000000000000000000000000 --- a/fix-avx512vl-vcvttpd2dq-2-fail.patch +++ /dev/null @@ -1,301 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-re-PR-target-91124-gcc.target-i386-avx512vl-vpshldvd.patch -946732df902dbb23dd44abe97fea41e154e6e5f9 - -diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md -index 3ce22395c65..12d6dc0cb7e 100644 ---- a/gcc/config/i386/sse.md -+++ b/gcc/config/i386/sse.md -@@ -5927,16 +5927,16 @@ - (set_attr "btver2_decode" "vector") - (set_attr "mode" "OI")]) - --(define_insn "sse2_cvtpd2dq" -+(define_insn "sse2_cvtpd2dq" - [(set (match_operand:V4SI 0 "register_operand" "=v") - (vec_concat:V4SI - (unspec:V2SI [(match_operand:V2DF 1 "vector_operand" "vBm")] - UNSPEC_FIX_NOTRUNC) - (const_vector:V2SI [(const_int 0) (const_int 0)])))] -- "TARGET_SSE2 && " -+ "TARGET_SSE2" - { - if (TARGET_AVX) -- return "vcvtpd2dq{x}\t{%1, %0|%0, %1}"; -+ return "vcvtpd2dq{x}\t{%1, %0|%0, %1}"; - else - return "cvtpd2dq\t{%1, %0|%0, %1}"; - } -@@ -5949,6 +5949,38 @@ - (set_attr "athlon_decode" "vector") - (set_attr "bdver1_decode" "double")]) - -+(define_insn "sse2_cvtpd2dq_mask" -+ [(set (match_operand:V4SI 0 "register_operand" "=v") -+ (vec_concat:V4SI -+ (vec_merge:V2SI -+ (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vm")] -+ UNSPEC_FIX_NOTRUNC) -+ (vec_select:V2SI -+ (match_operand:V4SI 2 "nonimm_or_0_operand" "0C") -+ (parallel [(const_int 0) (const_int 1)])) -+ (match_operand:QI 3 "register_operand" "Yk")) -+ (const_vector:V2SI [(const_int 0) (const_int 0)])))] -+ "TARGET_AVX512VL" -+ "vcvtpd2dq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" -+ [(set_attr "type" "ssecvt") -+ (set_attr "prefix" "evex") -+ (set_attr "mode" "TI")]) -+ -+(define_insn "*sse2_cvtpd2dq_mask_1" -+ [(set (match_operand:V4SI 0 "register_operand" "=v") -+ (vec_concat:V4SI -+ (vec_merge:V2SI -+ (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vm")] -+ UNSPEC_FIX_NOTRUNC) -+ (const_vector:V2SI [(const_int 0) (const_int 0)]) -+ (match_operand:QI 2 "register_operand" "Yk")) -+ (const_vector:V2SI [(const_int 0) (const_int 0)])))] -+ "TARGET_AVX512VL" -+ "vcvtpd2dq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}" -+ [(set_attr "type" "ssecvt") -+ (set_attr "prefix" "evex") -+ (set_attr "mode" "TI")]) -+ - ;; For ufix_notrunc* insn patterns - (define_mode_attr pd2udqsuff - [(V8DF "") (V4DF "{y}")]) -@@ -5964,15 +5996,49 @@ - (set_attr "prefix" "evex") - (set_attr "mode" "")]) - --(define_insn "ufix_notruncv2dfv2si2" -+(define_insn "ufix_notruncv2dfv2si2" - [(set (match_operand:V4SI 0 "register_operand" "=v") - (vec_concat:V4SI - (unspec:V2SI - [(match_operand:V2DF 1 "nonimmediate_operand" "vm")] -- UNSPEC_UNSIGNED_FIX_NOTRUNC) -+ UNSPEC_UNSIGNED_FIX_NOTRUNC) - (const_vector:V2SI [(const_int 0) (const_int 0)])))] - "TARGET_AVX512VL" -- "vcvtpd2udq{x}\t{%1, %0|%0, %1}" -+ "vcvtpd2udq{x}\t{%1, %0|%0, %1}" -+ [(set_attr "type" "ssecvt") -+ (set_attr "prefix" "evex") -+ (set_attr "mode" "TI")]) -+ -+(define_insn "ufix_notruncv2dfv2si2_mask" -+ [(set (match_operand:V4SI 0 "register_operand" "=v") -+ (vec_concat:V4SI -+ (vec_merge:V2SI -+ (unspec:V2SI -+ [(match_operand:V2DF 1 "nonimmediate_operand" "vm")] -+ UNSPEC_UNSIGNED_FIX_NOTRUNC) -+ (vec_select:V2SI -+ (match_operand:V4SI 2 "nonimm_or_0_operand" "0C") -+ (parallel [(const_int 0) (const_int 1)])) -+ (match_operand:QI 3 "register_operand" "Yk")) -+ (const_vector:V2SI [(const_int 0) (const_int 0)])))] -+ "TARGET_AVX512VL" -+ "vcvtpd2udq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" -+ [(set_attr "type" "ssecvt") -+ (set_attr "prefix" "evex") -+ (set_attr "mode" "TI")]) -+ -+(define_insn "*ufix_notruncv2dfv2si2_mask_1" -+ [(set (match_operand:V4SI 0 "register_operand" "=v") -+ (vec_concat:V4SI -+ (vec_merge:V2SI -+ (unspec:V2SI -+ [(match_operand:V2DF 1 "nonimmediate_operand" "vm")] -+ UNSPEC_UNSIGNED_FIX_NOTRUNC) -+ (const_vector:V2SI [(const_int 0) (const_int 0)]) -+ (match_operand:QI 2 "register_operand" "Yk")) -+ (const_vector:V2SI [(const_int 0) (const_int 0)])))] -+ "TARGET_AVX512VL" -+ "vcvtpd2udq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}" - [(set_attr "type" "ssecvt") - (set_attr "prefix" "evex") - (set_attr "mode" "TI")]) -@@ -5987,13 +6053,43 @@ - (set_attr "prefix" "evex") - (set_attr "mode" "OI")]) - --(define_insn "ufix_truncv2dfv2si2" -+(define_insn "ufix_truncv2dfv2si2" - [(set (match_operand:V4SI 0 "register_operand" "=v") - (vec_concat:V4SI - (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm")) - (const_vector:V2SI [(const_int 0) (const_int 0)])))] - "TARGET_AVX512VL" -- "vcvttpd2udq{x}\t{%1, %0|%0, %1}" -+ "vcvttpd2udq{x}\t{%1, %0|%0, %1}" -+ [(set_attr "type" "ssecvt") -+ (set_attr "prefix" "evex") -+ (set_attr "mode" "TI")]) -+ -+(define_insn "ufix_truncv2dfv2si2_mask" -+ [(set (match_operand:V4SI 0 "register_operand" "=v") -+ (vec_concat:V4SI -+ (vec_merge:V2SI -+ (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm")) -+ (vec_select:V2SI -+ (match_operand:V4SI 2 "nonimm_or_0_operand" "0C") -+ (parallel [(const_int 0) (const_int 1)])) -+ (match_operand:QI 3 "register_operand" "Yk")) -+ (const_vector:V2SI [(const_int 0) (const_int 0)])))] -+ "TARGET_AVX512VL" -+ "vcvttpd2udq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" -+ [(set_attr "type" "ssecvt") -+ (set_attr "prefix" "evex") -+ (set_attr "mode" "TI")]) -+ -+(define_insn "*ufix_truncv2dfv2si2_mask_1" -+ [(set (match_operand:V4SI 0 "register_operand" "=v") -+ (vec_concat:V4SI -+ (vec_merge:V2SI -+ (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm")) -+ (const_vector:V2SI [(const_int 0) (const_int 0)]) -+ (match_operand:QI 2 "register_operand" "Yk")) -+ (const_vector:V2SI [(const_int 0) (const_int 0)])))] -+ "TARGET_AVX512VL" -+ "vcvttpd2udq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}" - [(set_attr "type" "ssecvt") - (set_attr "prefix" "evex") - (set_attr "mode" "TI")]) -@@ -6138,15 +6234,15 @@ - "TARGET_AVX" - "operands[2] = CONST0_RTX (V4SImode);") - --(define_insn "sse2_cvttpd2dq" -+(define_insn "sse2_cvttpd2dq" - [(set (match_operand:V4SI 0 "register_operand" "=v") - (vec_concat:V4SI - (fix:V2SI (match_operand:V2DF 1 "vector_operand" "vBm")) - (const_vector:V2SI [(const_int 0) (const_int 0)])))] -- "TARGET_SSE2 && " -+ "TARGET_SSE2" - { - if (TARGET_AVX) -- return "vcvttpd2dq{x}\t{%1, %0|%0, %1}"; -+ return "vcvttpd2dq{x}\t{%1, %0|%0, %1}"; - else - return "cvttpd2dq\t{%1, %0|%0, %1}"; - } -@@ -6157,6 +6253,36 @@ - (set_attr "prefix" "maybe_vex") - (set_attr "mode" "TI")]) - -+(define_insn "sse2_cvttpd2dq_mask" -+ [(set (match_operand:V4SI 0 "register_operand" "=v") -+ (vec_concat:V4SI -+ (vec_merge:V2SI -+ (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm")) -+ (vec_select:V2SI -+ (match_operand:V4SI 2 "nonimm_or_0_operand" "0C") -+ (parallel [(const_int 0) (const_int 1)])) -+ (match_operand:QI 3 "register_operand" "Yk")) -+ (const_vector:V2SI [(const_int 0) (const_int 0)])))] -+ "TARGET_AVX512VL" -+ "vcvttpd2dq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" -+ [(set_attr "type" "ssecvt") -+ (set_attr "prefix" "evex") -+ (set_attr "mode" "TI")]) -+ -+(define_insn "*sse2_cvttpd2dq_mask_1" -+ [(set (match_operand:V4SI 0 "register_operand" "=v") -+ (vec_concat:V4SI -+ (vec_merge:V2SI -+ (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm")) -+ (const_vector:V2SI [(const_int 0) (const_int 0)]) -+ (match_operand:QI 2 "register_operand" "Yk")) -+ (const_vector:V2SI [(const_int 0) (const_int 0)])))] -+ "TARGET_AVX512VL" -+ "vcvttpd2dq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}" -+ [(set_attr "type" "ssecvt") -+ (set_attr "prefix" "evex") -+ (set_attr "mode" "TI")]) -+ - (define_insn "sse2_cvtsd2ss" - [(set (match_operand:V4SF 0 "register_operand" "=x,x,v") - (vec_merge:V4SF -@@ -6276,26 +6402,28 @@ - - (define_expand "sse2_cvtpd2ps_mask" - [(set (match_operand:V4SF 0 "register_operand") -- (vec_merge:V4SF -- (vec_concat:V4SF -+ (vec_concat:V4SF -+ (vec_merge:V2SF - (float_truncate:V2SF - (match_operand:V2DF 1 "vector_operand")) -- (match_dup 4)) -- (match_operand:V4SF 2 "register_operand") -- (match_operand:QI 3 "register_operand")))] -+ (vec_select:V2SF -+ (match_operand:V4SF 2 "nonimm_or_0_operand") -+ (parallel [(const_int 0) (const_int 1)])) -+ (match_operand:QI 3 "register_operand")) -+ (match_dup 4)))] - "TARGET_SSE2" - "operands[4] = CONST0_RTX (V2SFmode);") - --(define_insn "*sse2_cvtpd2ps" -+(define_insn "*sse2_cvtpd2ps" - [(set (match_operand:V4SF 0 "register_operand" "=v") - (vec_concat:V4SF - (float_truncate:V2SF - (match_operand:V2DF 1 "vector_operand" "vBm")) -- (match_operand:V2SF 2 "const0_operand")))] -- "TARGET_SSE2 && " -+ (match_operand:V2SF 2 "const0_operand" "C")))] -+ "TARGET_SSE2" - { - if (TARGET_AVX) -- return "vcvtpd2ps{x}\t{%1, %0|%0, %1}"; -+ return "vcvtpd2ps{x}\t{%1, %0|%0, %1}"; - else - return "cvtpd2ps\t{%1, %0|%0, %1}"; - } -@@ -6307,6 +6435,38 @@ - (set_attr "prefix" "maybe_vex") - (set_attr "mode" "V4SF")]) - -+(define_insn "*sse2_cvtpd2ps_mask" -+ [(set (match_operand:V4SF 0 "register_operand" "=v") -+ (vec_concat:V4SF -+ (vec_merge:V2SF -+ (float_truncate:V2SF -+ (match_operand:V2DF 1 "nonimmediate_operand" "vm")) -+ (vec_select:V2SF -+ (match_operand:V4SF 2 "nonimm_or_0_operand" "0C") -+ (parallel [(const_int 0) (const_int 1)])) -+ (match_operand:QI 3 "register_operand" "Yk")) -+ (match_operand:V2SF 4 "const0_operand" "C")))] -+ "TARGET_AVX512VL" -+ "vcvtpd2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" -+ [(set_attr "type" "ssecvt") -+ (set_attr "prefix" "evex") -+ (set_attr "mode" "V4SF")]) -+ -+(define_insn "*sse2_cvtpd2ps_mask_1" -+ [(set (match_operand:V4SF 0 "register_operand" "=v") -+ (vec_concat:V4SF -+ (vec_merge:V2SF -+ (float_truncate:V2SF -+ (match_operand:V2DF 1 "nonimmediate_operand" "vm")) -+ (match_operand:V2SF 3 "const0_operand" "C") -+ (match_operand:QI 2 "register_operand" "Yk")) -+ (match_operand:V2SF 4 "const0_operand" "C")))] -+ "TARGET_AVX512VL" -+ "vcvtpd2ps{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}" -+ [(set_attr "type" "ssecvt") -+ (set_attr "prefix" "evex") -+ (set_attr "mode" "V4SF")]) -+ - ;; For _cvtps2pd insn pattern - (define_mode_attr sf2dfmode - [(V8DF "V8SF") (V4DF "V4SF")]) diff --git a/fix-cost-of-plus.patch b/fix-cost-of-plus.patch deleted file mode 100644 index 5a0e2f00c16662d5c53520d04f6c828e195069eb..0000000000000000000000000000000000000000 --- a/fix-cost-of-plus.patch +++ /dev/null @@ -1,19 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -AArch64-Fix-cost-of-plus-.-const_int-C.patch: -commit 835d50c66aa5bde2f354a6e63a2afa7d2f76a05a - -diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c -index 56a4a47db73..71d44de1d0a 100644 ---- a/gcc/config/aarch64/aarch64.c -+++ b/gcc/config/aarch64/aarch64.c -@@ -10753,7 +10753,7 @@ cost_plus: - } - - if (GET_MODE_CLASS (mode) == MODE_INT -- && ((CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1))) -+ && (aarch64_plus_immediate (op1, mode) - || aarch64_sve_addvl_addpl_immediate (op1, mode))) - { - *cost += rtx_cost (op0, mode, PLUS, 0, speed); diff --git a/fix-do-not-build-op.patch b/fix-do-not-build-op.patch deleted file mode 100644 index d3a59d45812a1f96f5df7adac95e5b2780f80de3..0000000000000000000000000000000000000000 --- a/fix-do-not-build-op.patch +++ /dev/null @@ -1,27 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-tree-vect-slp.c-vect_build_slp_tree_2-Do-not-build-o.patch -f99d62629933adf91e7e0bc1b1ff344ffb68e1a2 - -diff -Nurp a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c ---- a/gcc/tree-vect-slp.c 2020-08-24 21:31:24.780000000 +0800 -+++ b/gcc/tree-vect-slp.c 2020-08-24 21:31:53.516000000 +0800 -@@ -1326,7 +1326,7 @@ vect_build_slp_tree_2 (vec_info *vinfo, - slp_tree grandchild; - - FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild) -- if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def) -+ if (SLP_TREE_DEF_TYPE (grandchild) != vect_external_def) - break; - if (!grandchild) - { -@@ -1486,7 +1486,7 @@ vect_build_slp_tree_2 (vec_info *vinfo, - slp_tree grandchild; - - FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild) -- if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def) -+ if (SLP_TREE_DEF_TYPE (grandchild) != vect_external_def) - break; - if (!grandchild) - { diff --git a/fix-issue499-add-nop-convert.patch b/fix-issue499-add-nop-convert.patch deleted file mode 100644 index fad9584eafe4e127fd5b3e79b883d00efc09cdd9..0000000000000000000000000000000000000000 --- a/fix-issue499-add-nop-convert.patch +++ /dev/null @@ -1,928 +0,0 @@ -This patch is a combine of following 8 commits - -commit e944354ec05891474b0d204c6c239c04ee7b527b -Author: Robin Dapp -Date: Mon Aug 26 10:18:24 2019 +0000 - - [PATCH 1/2] Allow folding all statements. - -commit df7d46d925c7baca7bf9961aee900876d8aef225 -Author: Robin Dapp -Date: Mon Aug 26 10:24:44 2019 +0000 - - [PATCH 2/2] Add simplify rule for wrapped addition. - -commit 6c14d008122fcee4157be79a60f8d6685869ad19 -Author: Robin Dapp -Date: Tue Aug 27 12:08:58 2019 +0000 - - re PR testsuite/91549 (gcc.dg/wrapped-binop-simplify.c fails starting with r274925) - -commit 129bd066049f065e522990e63bb10ff92b3c018d -Author: Jakub Jelinek -Date: Tue Dec 3 10:20:43 2019 +0100 - - re PR tree-optimization/92734 (Missing match.pd simplification done by fold_binary_loc on generic) - -commit 526b4c716a340ee9464965e63eee2b9954fe21f1 -Author: Jakub Jelinek -Date: Wed Dec 4 10:38:48 2019 +0100 - - re PR tree-optimization/92734 (Missing match.pd simplification done by fold_binary_loc on generic) - -commit 28fabd43d9d249134244eb9d7815917c7ae44b64 -Author: Richard Biener -Date: Fri Dec 6 10:25:08 2019 +0000 - - genmatch.c (enum tree_code): Remove CONVERT{0,1,2} and VIEW_CONVERT{0,1,2}. - -commit e150da383346adc762bc904342f9877f2f071265 -Author: Richard Biener -Date: Fri Dec 6 11:44:27 2019 +0000 - - match.pd (nop_convert): Remove empty match. - -commit 496f4f884716ae061f771a62e44868a32dbd502f -Author: Jakub Jelinek -Date: Mon May 4 11:01:08 2020 +0200 - - match.pd: Decrease number of nop conversions around bitwise ops [PR94718] - -diff -Nurp a/gcc/genmatch.c b/gcc/genmatch.c ---- a/gcc/genmatch.c 2020-03-12 19:07:21.000000000 +0800 -+++ b/gcc/genmatch.c 2020-11-24 14:49:12.792000000 +0800 -@@ -224,12 +224,6 @@ output_line_directive (FILE *f, location - #define DEFTREECODE(SYM, STRING, TYPE, NARGS) SYM, - enum tree_code { - #include "tree.def" --CONVERT0, --CONVERT1, --CONVERT2, --VIEW_CONVERT0, --VIEW_CONVERT1, --VIEW_CONVERT2, - MAX_TREE_CODES - }; - #undef DEFTREECODE -@@ -695,11 +689,12 @@ struct expr : public operand - expr (id_base *operation_, location_t loc, bool is_commutative_ = false) - : operand (OP_EXPR, loc), operation (operation_), - ops (vNULL), expr_type (NULL), is_commutative (is_commutative_), -- is_generic (false), force_single_use (false) {} -+ is_generic (false), force_single_use (false), opt_grp (0) {} - expr (expr *e) - : operand (OP_EXPR, e->location), operation (e->operation), - ops (vNULL), expr_type (e->expr_type), is_commutative (e->is_commutative), -- is_generic (e->is_generic), force_single_use (e->force_single_use) {} -+ is_generic (e->is_generic), force_single_use (e->force_single_use), -+ opt_grp (e->opt_grp) {} - void append_op (operand *op) { ops.safe_push (op); } - /* The operator and its operands. */ - id_base *operation; -@@ -714,6 +709,8 @@ struct expr : public operand - /* Whether pushing any stmt to the sequence should be conditional - on this expression having a single-use. */ - bool force_single_use; -+ /* If non-zero, the group for optional handling. */ -+ unsigned char opt_grp; - virtual void gen_transform (FILE *f, int, const char *, bool, int, - const char *, capture_info *, - dt_operand ** = 0, int = 0); -@@ -1079,18 +1076,17 @@ lower_commutative (simplify *s, vec (o)) - { - if (c->what) - return new capture (c->location, c->where, -- lower_opt_convert (c->what, oper, to_oper, strip), -+ lower_opt (c->what, grp, strip), - c->value_match); - else - return c; -@@ -1100,36 +1096,34 @@ lower_opt_convert (operand *o, enum tree - if (!e) - return o; - -- if (*e->operation == oper) -+ if (e->opt_grp == grp) - { - if (strip) -- return lower_opt_convert (e->ops[0], oper, to_oper, strip); -+ return lower_opt (e->ops[0], grp, strip); - - expr *ne = new expr (e); -- ne->operation = (to_oper == CONVERT_EXPR -- ? get_operator ("CONVERT_EXPR") -- : get_operator ("VIEW_CONVERT_EXPR")); -- ne->append_op (lower_opt_convert (e->ops[0], oper, to_oper, strip)); -+ ne->opt_grp = 0; -+ ne->append_op (lower_opt (e->ops[0], grp, strip)); - return ne; - } - - expr *ne = new expr (e); - for (unsigned i = 0; i < e->ops.length (); ++i) -- ne->append_op (lower_opt_convert (e->ops[i], oper, to_oper, strip)); -+ ne->append_op (lower_opt (e->ops[i], grp, strip)); - - return ne; - } - --/* Determine whether O or its children uses the conditional conversion -- operator OPER. */ -+/* Determine whether O or its children uses the conditional operation -+ group GRP. */ - - static bool --has_opt_convert (operand *o, enum tree_code oper) -+has_opt (operand *o, unsigned char grp) - { - if (capture *c = dyn_cast (o)) - { - if (c->what) -- return has_opt_convert (c->what, oper); -+ return has_opt (c->what, grp); - else - return false; - } -@@ -1138,11 +1132,11 @@ has_opt_convert (operand *o, enum tree_c - if (!e) - return false; - -- if (*e->operation == oper) -+ if (e->opt_grp == grp) - return true; - - for (unsigned i = 0; i < e->ops.length (); ++i) -- if (has_opt_convert (e->ops[i], oper)) -+ if (has_opt (e->ops[i], grp)) - return true; - - return false; -@@ -1152,34 +1146,24 @@ has_opt_convert (operand *o, enum tree_c - if required. */ - - static vec --lower_opt_convert (operand *o) -+lower_opt (operand *o) - { - vec v1 = vNULL, v2; - - v1.safe_push (o); - -- enum tree_code opers[] -- = { CONVERT0, CONVERT_EXPR, -- CONVERT1, CONVERT_EXPR, -- CONVERT2, CONVERT_EXPR, -- VIEW_CONVERT0, VIEW_CONVERT_EXPR, -- VIEW_CONVERT1, VIEW_CONVERT_EXPR, -- VIEW_CONVERT2, VIEW_CONVERT_EXPR }; -- -- /* Conditional converts are lowered to a pattern with the -- conversion and one without. The three different conditional -- convert codes are lowered separately. */ -+ /* Conditional operations are lowered to a pattern with the -+ operation and one without. All different conditional operation -+ groups are lowered separately. */ - -- for (unsigned i = 0; i < sizeof (opers) / sizeof (enum tree_code); i += 2) -+ for (unsigned i = 1; i <= 10; ++i) - { - v2 = vNULL; - for (unsigned j = 0; j < v1.length (); ++j) -- if (has_opt_convert (v1[j], opers[i])) -+ if (has_opt (v1[j], i)) - { -- v2.safe_push (lower_opt_convert (v1[j], -- opers[i], opers[i+1], false)); -- v2.safe_push (lower_opt_convert (v1[j], -- opers[i], opers[i+1], true)); -+ v2.safe_push (lower_opt (v1[j], i, false)); -+ v2.safe_push (lower_opt (v1[j], i, true)); - } - - if (v2 != vNULL) -@@ -1197,9 +1181,9 @@ lower_opt_convert (operand *o) - the resulting multiple patterns to SIMPLIFIERS. */ - - static void --lower_opt_convert (simplify *s, vec& simplifiers) -+lower_opt (simplify *s, vec& simplifiers) - { -- vec matchers = lower_opt_convert (s->match); -+ vec matchers = lower_opt (s->match); - for (unsigned i = 0; i < matchers.length (); ++i) - { - simplify *ns = new simplify (s->kind, s->id, matchers[i], s->result, -@@ -1543,7 +1527,7 @@ lower (vec& simplifiers, boo - { - auto_vec out_simplifiers; - for (unsigned i = 0; i < simplifiers.length (); ++i) -- lower_opt_convert (simplifiers[i], out_simplifiers); -+ lower_opt (simplifiers[i], out_simplifiers); - - simplifiers.truncate (0); - for (unsigned i = 0; i < out_simplifiers.length (); ++i) -@@ -3927,7 +3911,7 @@ private: - - unsigned get_internal_capture_id (); - -- id_base *parse_operation (); -+ id_base *parse_operation (unsigned char &); - operand *parse_capture (operand *, bool); - operand *parse_expr (); - c_expr *parse_c_expr (cpp_ttype); -@@ -4118,47 +4102,36 @@ parser::record_operlist (location_t loc, - convert2? */ - - id_base * --parser::parse_operation () -+parser::parse_operation (unsigned char &opt_grp) - { - const cpp_token *id_tok = peek (); -+ char *alt_id = NULL; - const char *id = get_ident (); - const cpp_token *token = peek (); -- if (strcmp (id, "convert0") == 0) -- fatal_at (id_tok, "use 'convert?' here"); -- else if (strcmp (id, "view_convert0") == 0) -- fatal_at (id_tok, "use 'view_convert?' here"); -+ opt_grp = 0; - if (token->type == CPP_QUERY - && !(token->flags & PREV_WHITE)) - { -- if (strcmp (id, "convert") == 0) -- id = "convert0"; -- else if (strcmp (id, "convert1") == 0) -- ; -- else if (strcmp (id, "convert2") == 0) -- ; -- else if (strcmp (id, "view_convert") == 0) -- id = "view_convert0"; -- else if (strcmp (id, "view_convert1") == 0) -- ; -- else if (strcmp (id, "view_convert2") == 0) -- ; -- else -- fatal_at (id_tok, "non-convert operator conditionalized"); -- - if (!parsing_match_operand) - fatal_at (id_tok, "conditional convert can only be used in " - "match expression"); -+ if (ISDIGIT (id[strlen (id) - 1])) -+ { -+ opt_grp = id[strlen (id) - 1] - '0' + 1; -+ alt_id = xstrdup (id); -+ alt_id[strlen (id) - 1] = '\0'; -+ if (opt_grp == 1) -+ fatal_at (id_tok, "use '%s?' here", alt_id); -+ } -+ else -+ opt_grp = 1; - eat_token (CPP_QUERY); - } -- else if (strcmp (id, "convert1") == 0 -- || strcmp (id, "convert2") == 0 -- || strcmp (id, "view_convert1") == 0 -- || strcmp (id, "view_convert2") == 0) -- fatal_at (id_tok, "expected '?' after conditional operator"); -- id_base *op = get_operator (id); -+ id_base *op = get_operator (alt_id ? alt_id : id); - if (!op) -- fatal_at (id_tok, "unknown operator %s", id); -- -+ fatal_at (id_tok, "unknown operator %s", alt_id ? alt_id : id); -+ if (alt_id) -+ free (alt_id); - user_id *p = dyn_cast (op); - if (p && p->is_oper_list) - { -@@ -4214,7 +4187,8 @@ struct operand * - parser::parse_expr () - { - const cpp_token *token = peek (); -- expr *e = new expr (parse_operation (), token->src_loc); -+ unsigned char opt_grp; -+ expr *e = new expr (parse_operation (opt_grp), token->src_loc); - token = peek (); - operand *op; - bool is_commutative = false; -@@ -4310,6 +4284,12 @@ parser::parse_expr () - "commutative"); - } - e->expr_type = expr_type; -+ if (opt_grp != 0) -+ { -+ if (e->ops.length () != 1) -+ fatal_at (token, "only unary operations can be conditional"); -+ e->opt_grp = opt_grp; -+ } - return op; - } - else if (!(token->flags & PREV_WHITE)) -@@ -4692,10 +4672,6 @@ parser::parse_for (location_t) - id_base *idb = get_operator (oper, true); - if (idb == NULL) - fatal_at (token, "no such operator '%s'", oper); -- if (*idb == CONVERT0 || *idb == CONVERT1 || *idb == CONVERT2 -- || *idb == VIEW_CONVERT0 || *idb == VIEW_CONVERT1 -- || *idb == VIEW_CONVERT2) -- fatal_at (token, "conditional operators cannot be used inside for"); - - if (arity == -1) - arity = idb->nargs; -@@ -5102,12 +5078,6 @@ main (int argc, char **argv) - add_operator (SYM, # SYM, # TYPE, NARGS); - #define END_OF_BASE_TREE_CODES - #include "tree.def" --add_operator (CONVERT0, "convert0", "tcc_unary", 1); --add_operator (CONVERT1, "convert1", "tcc_unary", 1); --add_operator (CONVERT2, "convert2", "tcc_unary", 1); --add_operator (VIEW_CONVERT0, "view_convert0", "tcc_unary", 1); --add_operator (VIEW_CONVERT1, "view_convert1", "tcc_unary", 1); --add_operator (VIEW_CONVERT2, "view_convert2", "tcc_unary", 1); - #undef END_OF_BASE_TREE_CODES - #undef DEFTREECODE - -diff -Nurp a/gcc/gimple-loop-versioning.cc b/gcc/gimple-loop-versioning.cc ---- a/gcc/gimple-loop-versioning.cc 2020-03-12 19:07:21.000000000 +0800 -+++ b/gcc/gimple-loop-versioning.cc 2020-11-24 14:49:12.792000000 +0800 -@@ -1264,6 +1264,12 @@ loop_versioning::record_address_fragment - continue; - } - } -+ if (CONVERT_EXPR_CODE_P (code)) -+ { -+ tree op1 = gimple_assign_rhs1 (assign); -+ address->terms[i].expr = strip_casts (op1); -+ continue; -+ } - } - i += 1; - } -diff -Nurp a/gcc/match.pd b/gcc/match.pd ---- a/gcc/match.pd 2020-11-24 14:54:43.576000000 +0800 -+++ b/gcc/match.pd 2020-11-24 14:49:12.792000000 +0800 -@@ -97,8 +97,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) - (define_operator_list COND_TERNARY - IFN_COND_FMA IFN_COND_FMS IFN_COND_FNMA IFN_COND_FNMS) - --/* As opposed to convert?, this still creates a single pattern, so -- it is not a suitable replacement for convert? in all cases. */ -+/* With nop_convert? combine convert? and view_convert? in one pattern -+ plus conditionalize on tree_nop_conversion_p conversions. */ - (match (nop_convert @0) - (convert @0) - (if (tree_nop_conversion_p (type, TREE_TYPE (@0))))) -@@ -108,9 +108,6 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) - && known_eq (TYPE_VECTOR_SUBPARTS (type), - TYPE_VECTOR_SUBPARTS (TREE_TYPE (@0))) - && tree_nop_conversion_p (TREE_TYPE (type), TREE_TYPE (TREE_TYPE (@0)))))) --/* This one has to be last, or it shadows the others. */ --(match (nop_convert @0) -- @0) - - /* Transform likes of (char) ABS_EXPR <(int) x> into (char) ABSU_EXPR - ABSU_EXPR returns unsigned absolute value of the operand and the operand -@@ -1260,7 +1257,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) - We combine the above two cases by using a conditional convert. */ - (for bitop (bit_and bit_ior bit_xor) - (simplify -- (bitop (convert @0) (convert? @1)) -+ (bitop (convert@2 @0) (convert?@3 @1)) - (if (((TREE_CODE (@1) == INTEGER_CST - && INTEGRAL_TYPE_P (TREE_TYPE (@0)) - && int_fits_type_p (@1, TREE_TYPE (@0))) -@@ -1279,8 +1276,24 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) - || GET_MODE_CLASS (TYPE_MODE (type)) != MODE_INT - /* Or if the precision of TO is not the same as the precision - of its mode. */ -- || !type_has_mode_precision_p (type))) -- (convert (bitop @0 (convert @1)))))) -+ || !type_has_mode_precision_p (type) -+ /* In GIMPLE, getting rid of 2 conversions for one new results -+ in smaller IL. */ -+ || (GIMPLE -+ && TREE_CODE (@1) != INTEGER_CST -+ && tree_nop_conversion_p (type, TREE_TYPE (@0)) -+ && single_use (@2) -+ && single_use (@3)))) -+ (convert (bitop @0 (convert @1))))) -+ /* In GIMPLE, getting rid of 2 conversions for one new results -+ in smaller IL. */ -+ (simplify -+ (convert (bitop:cs@2 (nop_convert:s @0) @1)) -+ (if (GIMPLE -+ && TREE_CODE (@1) != INTEGER_CST -+ && tree_nop_conversion_p (type, TREE_TYPE (@2)) -+ && types_match (type, @0)) -+ (bitop @0 (convert @1))))) - - (for bitop (bit_and bit_ior) - rbitop (bit_ior bit_and) -@@ -1374,7 +1387,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) - - /* Convert - (~A) to A + 1. */ - (simplify -- (negate (nop_convert (bit_not @0))) -+ (negate (nop_convert? (bit_not @0))) - (plus (view_convert @0) { build_each_one_cst (type); })) - - /* Convert ~ (A - 1) or ~ (A + -1) to -A. */ -@@ -1401,7 +1414,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) - - /* Otherwise prefer ~(X ^ Y) to ~X ^ Y as more canonical. */ - (simplify -- (bit_xor:c (nop_convert:s (bit_not:s @0)) @1) -+ (bit_xor:c (nop_convert?:s (bit_not:s @0)) @1) - (if (tree_nop_conversion_p (type, TREE_TYPE (@0))) - (bit_not (bit_xor (view_convert @0) @1)))) - -@@ -1614,7 +1627,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) - /* For equality, this is also true with wrapping overflow. */ - (for op (eq ne) - (simplify -- (op:c (nop_convert@3 (plus:c@2 @0 (convert1? @1))) (convert2? @1)) -+ (op:c (nop_convert?@3 (plus:c@2 @0 (convert1? @1))) (convert2? @1)) - (if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0)) - && (TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0)) - || TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0))) -@@ -1623,7 +1636,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) - && tree_nop_conversion_p (TREE_TYPE (@3), TREE_TYPE (@1))) - (op @0 { build_zero_cst (TREE_TYPE (@0)); }))) - (simplify -- (op:c (nop_convert@3 (pointer_plus@2 (convert1? @0) @1)) (convert2? @0)) -+ (op:c (nop_convert?@3 (pointer_plus@2 (convert1? @0) @1)) (convert2? @0)) - (if (tree_nop_conversion_p (TREE_TYPE (@2), TREE_TYPE (@0)) - && tree_nop_conversion_p (TREE_TYPE (@3), TREE_TYPE (@0)) - && (CONSTANT_CLASS_P (@1) || (single_use (@2) && single_use (@3)))) -@@ -1866,7 +1879,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) - || !HONOR_SIGN_DEPENDENT_ROUNDING (type))) - (convert (negate @1)))) - (simplify -- (negate (nop_convert (negate @1))) -+ (negate (nop_convert? (negate @1))) - (if (!TYPE_OVERFLOW_SANITIZED (type) - && !TYPE_OVERFLOW_SANITIZED (TREE_TYPE (@1))) - (view_convert @1))) -@@ -1883,20 +1896,26 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) - /* A - (A +- B) -> -+ B */ - /* A +- (B -+ A) -> +- B */ - (simplify -- (minus (plus:c @0 @1) @0) -- @1) -- (simplify -- (minus (minus @0 @1) @0) -- (negate @1)) -+ (minus (nop_convert1? (plus:c (nop_convert2? @0) @1)) @0) -+ (view_convert @1)) - (simplify -- (plus:c (minus @0 @1) @1) -- @0) -+ (minus (nop_convert1? (minus (nop_convert2? @0) @1)) @0) -+ (if (!ANY_INTEGRAL_TYPE_P (type) -+ || TYPE_OVERFLOW_WRAPS (type)) -+ (negate (view_convert @1)) -+ (view_convert (negate @1)))) -+ (simplify -+ (plus:c (nop_convert1? (minus @0 (nop_convert2? @1))) @1) -+ (view_convert @0)) -+ (simplify -+ (minus @0 (nop_convert1? (plus:c (nop_convert2? @0) @1))) -+ (if (!ANY_INTEGRAL_TYPE_P (type) -+ || TYPE_OVERFLOW_WRAPS (type)) -+ (negate (view_convert @1)) -+ (view_convert (negate @1)))) - (simplify -- (minus @0 (plus:c @0 @1)) -- (negate @1)) -- (simplify -- (minus @0 (minus @0 @1)) -- @1) -+ (minus @0 (nop_convert1? (minus (nop_convert2? @0) @1))) -+ (view_convert @1)) - /* (A +- B) + (C - A) -> C +- B */ - /* (A + B) - (A - C) -> B + C */ - /* More cases are handled with comparisons. */ -@@ -1922,7 +1941,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) - (for inner_op (plus minus) - neg_inner_op (minus plus) - (simplify -- (outer_op (nop_convert (inner_op @0 CONSTANT_CLASS_P@1)) -+ (outer_op (nop_convert? (inner_op @0 CONSTANT_CLASS_P@1)) - CONSTANT_CLASS_P@2) - /* If one of the types wraps, use that one. */ - (if (!ANY_INTEGRAL_TYPE_P (type) || TYPE_OVERFLOW_WRAPS (type)) -@@ -1961,17 +1980,70 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) - /* (CST1 - A) +- CST2 -> CST3 - A */ - (for outer_op (plus minus) - (simplify -- (outer_op (minus CONSTANT_CLASS_P@1 @0) CONSTANT_CLASS_P@2) -- (with { tree cst = const_binop (outer_op, type, @1, @2); } -- (if (cst && !TREE_OVERFLOW (cst)) -- (minus { cst; } @0))))) -- -- /* CST1 - (CST2 - A) -> CST3 + A */ -- (simplify -- (minus CONSTANT_CLASS_P@1 (minus CONSTANT_CLASS_P@2 @0)) -- (with { tree cst = const_binop (MINUS_EXPR, type, @1, @2); } -- (if (cst && !TREE_OVERFLOW (cst)) -- (plus { cst; } @0)))) -+ (outer_op (nop_convert? (minus CONSTANT_CLASS_P@1 @0)) CONSTANT_CLASS_P@2) -+ /* If one of the types wraps, use that one. */ -+ (if (!ANY_INTEGRAL_TYPE_P (type) || TYPE_OVERFLOW_WRAPS (type)) -+ /* If all 3 captures are CONSTANT_CLASS_P, punt, as we might recurse -+ forever if something doesn't simplify into a constant. */ -+ (if (!CONSTANT_CLASS_P (@0)) -+ (minus (outer_op (view_convert @1) @2) (view_convert @0))) -+ (if (!ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0)) -+ || TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0))) -+ (view_convert (minus (outer_op @1 (view_convert @2)) @0)) -+ (if (types_match (type, @0)) -+ (with { tree cst = const_binop (outer_op, type, @1, @2); } -+ (if (cst && !TREE_OVERFLOW (cst)) -+ (minus { cst; } @0)))))))) -+ -+ /* CST1 - (CST2 - A) -> CST3 + A -+ Use view_convert because it is safe for vectors and equivalent for -+ scalars. */ -+ (simplify -+ (minus CONSTANT_CLASS_P@1 (nop_convert? (minus CONSTANT_CLASS_P@2 @0))) -+ /* If one of the types wraps, use that one. */ -+ (if (!ANY_INTEGRAL_TYPE_P (type) || TYPE_OVERFLOW_WRAPS (type)) -+ /* If all 3 captures are CONSTANT_CLASS_P, punt, as we might recurse -+ forever if something doesn't simplify into a constant. */ -+ (if (!CONSTANT_CLASS_P (@0)) -+ (plus (view_convert @0) (minus @1 (view_convert @2)))) -+ (if (!ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0)) -+ || TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0))) -+ (view_convert (plus @0 (minus (view_convert @1) @2))) -+ (if (types_match (type, @0)) -+ (with { tree cst = const_binop (MINUS_EXPR, type, @1, @2); } -+ (if (cst && !TREE_OVERFLOW (cst)) -+ (plus { cst; } @0))))))) -+ -+/* ((T)(A)) + CST -> (T)(A + CST) */ -+#if GIMPLE -+ (simplify -+ (plus (convert SSA_NAME@0) INTEGER_CST@1) -+ (if (TREE_CODE (TREE_TYPE (@0)) == INTEGER_TYPE -+ && TREE_CODE (type) == INTEGER_TYPE -+ && TYPE_PRECISION (type) > TYPE_PRECISION (TREE_TYPE (@0)) -+ && int_fits_type_p (@1, TREE_TYPE (@0))) -+ /* Perform binary operation inside the cast if the constant fits -+ and (A + CST)'s range does not overflow. */ -+ (with -+ { -+ wi::overflow_type min_ovf = wi::OVF_OVERFLOW, -+ max_ovf = wi::OVF_OVERFLOW; -+ tree inner_type = TREE_TYPE (@0); -+ -+ wide_int w1 = wide_int::from (wi::to_wide (@1), TYPE_PRECISION (inner_type), -+ TYPE_SIGN (inner_type)); -+ -+ wide_int wmin0, wmax0; -+ if (get_range_info (@0, &wmin0, &wmax0) == VR_RANGE) -+ { -+ wi::add (wmin0, w1, TYPE_SIGN (inner_type), &min_ovf); -+ wi::add (wmax0, w1, TYPE_SIGN (inner_type), &max_ovf); -+ } -+ } -+ (if (min_ovf == wi::OVF_NONE && max_ovf == wi::OVF_NONE) -+ (convert (plus @0 { wide_int_to_tree (TREE_TYPE (@0), w1); } ))) -+ ))) -+#endif - - /* ~A + A -> -1 */ - (simplify -diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-5.c b/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-5.c ---- a/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-5.c 2020-03-12 19:07:22.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-5.c 2020-11-24 14:49:14.568000000 +0800 -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-O2 -fdump-tree-ch2-details" } */ -+/* { dg-options "-O2 -fno-tree-vrp -fdump-tree-ch2-details" } */ - - int is_sorted(int *a, int n) - { -diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-7.c b/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-7.c ---- a/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-7.c 2020-03-12 19:07:22.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-7.c 2020-11-24 14:49:14.568000000 +0800 -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-O2 -fdump-tree-ch2-details --param logical-op-non-short-circuit=0" } */ -+/* { dg-options "-O2 -fno-tree-vrp -fdump-tree-ch2-details --param logical-op-non-short-circuit=0" } */ - - int is_sorted(int *a, int n, int m, int k) - { -diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/loop-15.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-15.c ---- a/gcc/testsuite/gcc.dg/tree-ssa/loop-15.c 2020-03-12 19:07:22.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-15.c 2020-11-24 14:49:14.568000000 +0800 -@@ -19,7 +19,7 @@ int bla(void) - } - - /* Since the loop is removed, there should be no addition. */ --/* { dg-final { scan-tree-dump-times " \\+ " 0 "optimized" { xfail *-*-* } } } */ -+/* { dg-final { scan-tree-dump-times " \\+ " 0 "optimized" } } */ - /* { dg-final { scan-tree-dump-times " \\* " 1 "optimized" } } */ - - /* The if from the loop header copying remains in the code. */ -diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr23744.c b/gcc/testsuite/gcc.dg/tree-ssa/pr23744.c ---- a/gcc/testsuite/gcc.dg/tree-ssa/pr23744.c 2020-03-12 19:07:22.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr23744.c 2020-11-24 14:49:14.568000000 +0800 -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-O2 -fno-tree-ccp -fdisable-tree-evrp -fdump-tree-vrp1" } */ -+/* { dg-options "-O2 -fno-tree-ccp -fdisable-tree-evrp -fdump-tree-vrp1-details" } */ - - void h (void); - -@@ -17,4 +17,4 @@ int g (int i, int j) - return 1; - } - --/* { dg-final { scan-tree-dump-times "Folding predicate.*to 1" 1 "vrp1" } } */ -+/* { dg-final { scan-tree-dump-times "gimple_simplified" 1 "vrp1" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr92734-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr92734-2.c ---- a/gcc/testsuite/gcc.dg/tree-ssa/pr92734-2.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr92734-2.c 2020-11-24 14:49:14.568000000 +0800 -@@ -0,0 +1,76 @@ -+/* PR tree-optimization/92734 */ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -fdump-tree-optimized" } */ -+/* Verify there are no binary additions or subtractions left. There can -+ be just casts and negations. */ -+/* { dg-final { scan-tree-dump-not " \[+-] " "optimized" } } */ -+ -+int -+f1 (int x, unsigned y) -+{ -+ int a = x + y; -+ return a - x; -+} -+ -+unsigned -+f2 (unsigned x, int y) -+{ -+ unsigned a = (int) x + y; -+ return a - x; -+} -+ -+int -+f3 (int x, unsigned y) -+{ -+ int a = x - y; -+ return a - x; -+} -+ -+unsigned -+f4 (unsigned x, int y) -+{ -+ unsigned a = (int) x - y; -+ return a - x; -+} -+ -+int -+f5 (unsigned x, int y) -+{ -+ int a = x - y; -+ return a + y; -+} -+ -+unsigned -+f6 (int x, unsigned y) -+{ -+ unsigned a = x - (int) y; -+ return a + y; -+} -+ -+int -+f7 (int x, unsigned y) -+{ -+ int a = x + y; -+ return x - a; -+} -+ -+unsigned -+f8 (unsigned x, int y) -+{ -+ unsigned a = (int) x + y; -+ return x - a; -+} -+ -+int -+f9 (int x, unsigned y) -+{ -+ int a = x - y; -+ return x - a; -+} -+ -+unsigned -+f10 (unsigned x, int y) -+{ -+ unsigned a = (int) x - y; -+ return x - a; -+} -diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr92734.c b/gcc/testsuite/gcc.dg/tree-ssa/pr92734.c ---- a/gcc/testsuite/gcc.dg/tree-ssa/pr92734.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr92734.c 2020-11-24 14:49:14.568000000 +0800 -@@ -0,0 +1,31 @@ -+/* PR tree-optimization/92734 */ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -fdump-tree-forwprop1" } */ -+/* { dg-final { scan-tree-dump-times "return t_\[0-9]*\\\(D\\\);" 4 "forwprop1" } } */ -+ -+int -+f1 (int t) -+{ -+ return 1 - (int) (1U - t); -+} -+ -+int -+f2 (int t) -+{ -+ int a = 7U - t; -+ return 7 - a; -+} -+ -+int -+f3 (int t) -+{ -+ int a = 32U - t; -+ return 32 - a; -+} -+ -+int -+f4 (int t) -+{ -+ int a = 32 - t; -+ return (int) (32 - (unsigned) a); -+} -diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr94718-3.c b/gcc/testsuite/gcc.dg/tree-ssa/pr94718-3.c ---- a/gcc/testsuite/gcc.dg/tree-ssa/pr94718-3.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr94718-3.c 2020-11-24 14:49:14.568000000 +0800 -@@ -0,0 +1,45 @@ -+/* PR tree-optimization/94718 */ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -fno-ipa-icf -fdump-tree-optimized" } */ -+/* { dg-final { scan-tree-dump-times " \\\(int\\\) " 2 "optimized" } } */ -+/* { dg-final { scan-tree-dump-times " \\\(unsigned int\\\) " 2 "optimized" } } */ -+ -+int -+f1 (int x, int y) -+{ -+ return (int) ((unsigned) x | (unsigned) y); -+} -+ -+int -+f2 (int x, int y) -+{ -+ unsigned a = x; -+ unsigned b = y; -+ return a | b; -+} -+ -+int -+f3 (int x, unsigned y) -+{ -+ return (int) ((unsigned) x | y); -+} -+ -+int -+f4 (int x, unsigned y) -+{ -+ unsigned a = x; -+ return a | y; -+} -+ -+unsigned -+f5 (int x, unsigned y) -+{ -+ return (unsigned) (x | (int) y); -+} -+ -+unsigned -+f6 (int x, unsigned y) -+{ -+ int a = y; -+ return x | a; -+} -diff -Nurp a/gcc/testsuite/gcc.dg/wrapped-binop-simplify.c b/gcc/testsuite/gcc.dg/wrapped-binop-simplify.c ---- a/gcc/testsuite/gcc.dg/wrapped-binop-simplify.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/wrapped-binop-simplify.c 2020-11-24 14:49:14.484000000 +0800 -@@ -0,0 +1,43 @@ -+/* { dg-do compile { target { { i?86-*-* x86_64-*-* s390*-*-* } && lp64 } } } */ -+/* { dg-options "-O2 -fdump-tree-vrp2-details" } */ -+/* { dg-final { scan-tree-dump-times "gimple_simplified to" 4 "vrp2" } } */ -+ -+void v1 (unsigned long *in, unsigned long *out, unsigned int n) -+{ -+ int i; -+ -+ for (i = 0; i < n; i++) -+ { -+ out[i] = in[i]; -+ } -+} -+ -+void v2 (unsigned long *in, unsigned long *out, int n) -+{ -+ int i; -+ -+ for (i = 0; i < n; i++) -+ { -+ out[i] = in[i]; -+ } -+} -+ -+void v3 (unsigned long *in, unsigned long *out, unsigned int n) -+{ -+ unsigned int i; -+ -+ for (i = 0; i < n; i++) -+ { -+ out[i] = in[i]; -+ } -+} -+ -+void v4 (unsigned long *in, unsigned long *out, int n) -+{ -+ unsigned int i; -+ -+ for (i = 0; i < n; i++) -+ { -+ out[i] = in[i]; -+ } -+} -diff -Nurp a/gcc/tree-ssa-propagate.c b/gcc/tree-ssa-propagate.c ---- a/gcc/tree-ssa-propagate.c 2020-11-24 14:54:42.556000000 +0800 -+++ b/gcc/tree-ssa-propagate.c 2020-11-24 14:49:12.792000000 +0800 -@@ -814,7 +814,6 @@ ssa_propagation_engine::ssa_propagate (v - ssa_prop_fini (); - } - -- - /* Return true if STMT is of the form 'mem_ref = RHS', where 'mem_ref' - is a non-volatile pointer dereference, a structure reference or a - reference to a single _DECL. Ignore volatile memory references -@@ -1071,6 +1070,14 @@ substitute_and_fold_dom_walker::before_d - stmt = gsi_stmt (i); - gimple_set_modified (stmt, true); - } -+ /* Also fold if we want to fold all statements. */ -+ else if (substitute_and_fold_engine->fold_all_stmts -+ && fold_stmt (&i, follow_single_use_edges)) -+ { -+ did_replace = true; -+ stmt = gsi_stmt (i); -+ gimple_set_modified (stmt, true); -+ } - - /* Some statements may be simplified using propagator - specific information. Do this before propagating -diff -Nurp a/gcc/tree-ssa-propagate.h b/gcc/tree-ssa-propagate.h ---- a/gcc/tree-ssa-propagate.h 2020-03-12 19:07:23.000000000 +0800 -+++ b/gcc/tree-ssa-propagate.h 2020-11-24 14:49:12.792000000 +0800 -@@ -100,6 +100,8 @@ class ssa_propagation_engine - class substitute_and_fold_engine - { - public: -+ substitute_and_fold_engine (bool fold_all_stmts = false) -+ : fold_all_stmts (fold_all_stmts) { } - virtual ~substitute_and_fold_engine (void) { } - virtual bool fold_stmt (gimple_stmt_iterator *) { return false; } - virtual tree get_value (tree) { return NULL_TREE; } -@@ -107,6 +109,10 @@ class substitute_and_fold_engine - bool substitute_and_fold (basic_block = NULL); - bool replace_uses_in (gimple *); - bool replace_phi_args_in (gphi *); -+ -+ /* Users like VRP can set this when they want to perform -+ folding for every propagation. */ -+ bool fold_all_stmts; - }; - - #endif /* _TREE_SSA_PROPAGATE_H */ -diff -Nurp a/gcc/tree-vrp.c b/gcc/tree-vrp.c ---- a/gcc/tree-vrp.c 2020-11-24 14:54:43.564000000 +0800 -+++ b/gcc/tree-vrp.c 2020-11-24 14:49:12.792000000 +0800 -@@ -6384,6 +6384,7 @@ vrp_prop::visit_phi (gphi *phi) - class vrp_folder : public substitute_and_fold_engine - { - public: -+ vrp_folder () : substitute_and_fold_engine (/* Fold all stmts. */ true) { } - tree get_value (tree) FINAL OVERRIDE; - bool fold_stmt (gimple_stmt_iterator *) FINAL OVERRIDE; - bool fold_predicate_in (gimple_stmt_iterator *); diff --git a/fix-issue604-ldist-dependency-fixup.patch b/fix-issue604-ldist-dependency-fixup.patch deleted file mode 100644 index 5aaf8582944b98b2465a049a5807b54e5fb48af6..0000000000000000000000000000000000000000 --- a/fix-issue604-ldist-dependency-fixup.patch +++ /dev/null @@ -1,108 +0,0 @@ -commit f6e1a4cd83190746b6544917f7526fa480ca5f18 -Author: Bin Cheng -Date: Wed May 13 11:37:47 2020 +0800 - - Add missing unit dependence vector in data dependence analysis - - Current data dependence analysis misses unit distant vector if DRs in - DDR have the same invariant access functions. This adds the vector as - the constant access function case. - - 2020-05-13 Bin Cheng - PR tree-optimization/94969 - - gcc/ - * tree-data-dependence.c (constant_access_functions): Rename to... - (invariant_access_functions): ...this. Add parameter. Check for - invariant access function, rather than constant. - (build_classic_dist_vector): Call above function. - * tree-loop-distribution.c (pg_add_dependence_edges): Add comment. - - gcc/testsuite/ - * gcc.dg/tree-ssa/pr94969.c: New test. - -diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr94969.c b/gcc/testsuite/gcc.dg/tree-ssa/pr94969.c -new file mode 100644 -index 00000000000..056b015f97c ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr94969.c -@@ -0,0 +1,28 @@ -+/* PR tree-optimization/52267 */ -+/* { dg-do run } */ -+/* { dg-options "-O3 -fdump-tree-ldist-details" } */ -+ -+int a = 0, b = 0, c = 0; -+struct S { -+ signed m : 7; -+ signed e : 2; -+}; -+struct S f[2] = {{0, 0}, {0, 0}}; -+struct S g = {0, 0}; -+ -+void __attribute__((noinline)) -+k() -+{ -+ for (; c <= 1; c++) { -+ f[b] = g; -+ f[b].e ^= 1; -+ } -+} -+int main() -+{ -+ k(); -+ if (f[b].e != 1) -+ __builtin_abort (); -+} -+ -+/* { dg-final { scan-tree-dump-not "ldist" "Loop 1 distributed: split to 3 loops"} } */ -diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c -index 851225e1171..5505ba46778 100644 ---- a/gcc/tree-data-ref.c -+++ b/gcc/tree-data-ref.c -@@ -4821,17 +4821,19 @@ build_classic_dist_vector_1 (struct data_dependence_relation *ddr, - return true; - } - --/* Return true when the DDR contains only constant access functions. */ -+/* Return true when the DDR contains only invariant access functions wrto. loop -+ number LNUM. */ - - static bool --constant_access_functions (const struct data_dependence_relation *ddr) -+invariant_access_functions (const struct data_dependence_relation *ddr, -+ int lnum) - { - unsigned i; - subscript *sub; - - FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub) -- if (!evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 0)) -- || !evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 1))) -+ if (!evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 0), lnum) -+ || !evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 1), lnum)) - return false; - - return true; -@@ -5030,7 +5032,7 @@ build_classic_dist_vector (struct data_dependence_relation *ddr, - dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); - save_dist_v (ddr, dist_v); - -- if (constant_access_functions (ddr)) -+ if (invariant_access_functions (ddr, loop_nest->num)) - add_distance_for_zero_overlaps (ddr); - - if (DDR_NB_LOOPS (ddr) > 1) -diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c -index 44423215332..b122c3964a0 100644 ---- a/gcc/tree-loop-distribution.c -+++ b/gcc/tree-loop-distribution.c -@@ -2080,7 +2080,8 @@ loop_distribution::pg_add_dependence_edges (struct graph *rdg, int dir, - this_dir = -this_dir; - - /* Known dependences can still be unordered througout the -- iteration space, see gcc.dg/tree-ssa/ldist-16.c. */ -+ iteration space, see gcc.dg/tree-ssa/ldist-16.c and -+ gcc.dg/tree-ssa/pr94969.c. */ - if (DDR_NUM_DIST_VECTS (ddr) != 1) - this_dir = 2; - /* If the overlap is exact preserve stmt order. */ diff --git a/fix-load-eliding-in-SM.patch b/fix-load-eliding-in-SM.patch deleted file mode 100644 index 5e25a3d19a93071eaf8fcc054ee8d572857cdd45..0000000000000000000000000000000000000000 --- a/fix-load-eliding-in-SM.patch +++ /dev/null @@ -1,55 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-tree-optimization-94949-fix-load-eliding-in-SM.patch -0424a5ece5307cc22bbc0fe97edf4707d7a798ed - -diff -Nurp a/gcc/testsuite/gcc.dg/torture/pr94949.c b/gcc/testsuite/gcc.dg/torture/pr94949.c ---- a/gcc/testsuite/gcc.dg/torture/pr94949.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/torture/pr94949.c 2020-08-24 21:40:32.208000000 +0800 -@@ -0,0 +1,17 @@ -+/* { dg-do run } */ -+/* { dg-additional-options "-fallow-store-data-races" } */ -+ -+static int x = 1; -+static volatile int y = -1; -+int -+main() -+{ -+ for (int i = 0; i < 128; ++i) -+ { -+ if (i == y) -+ x = i; -+ } -+ if (x != 1) -+ __builtin_abort (); -+ return 0; -+} -diff -Nurp a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c ---- a/gcc/tree-ssa-loop-im.c 2020-08-24 21:40:14.164000000 +0800 -+++ b/gcc/tree-ssa-loop-im.c 2020-08-24 21:40:32.208000000 +0800 -@@ -2115,9 +2115,9 @@ execute_sm (struct loop *loop, vec - fmt_data.orig_loop = loop; - for_each_index (&ref->mem.ref, force_move_till, &fmt_data); - -+ bool always_stored = ref_always_accessed_p (loop, ref, true); - if (bb_in_transaction (loop_preheader_edge (loop)->src) -- || (! flag_store_data_races -- && ! ref_always_accessed_p (loop, ref, true))) -+ || (! flag_store_data_races && ! always_stored)) - multi_threaded_model_p = true; - - if (multi_threaded_model_p) -@@ -2132,8 +2132,10 @@ execute_sm (struct loop *loop, vec - - /* Avoid doing a load if there was no load of the ref in the loop. - Esp. when the ref is not always stored we cannot optimize it -- away later. */ -- if (ref->loaded && bitmap_bit_p (ref->loaded, loop->num)) -+ away later. But when it is not always stored we must use a conditional -+ store then. */ -+ if ((!always_stored && !multi_threaded_model_p) -+ || (ref->loaded && bitmap_bit_p (ref->loaded, loop->num))) - { - load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref)); - lim_data = init_lim_data (load); diff --git a/fix-make-ifcvt-clean-up-dead-comparisons.patch b/fix-make-ifcvt-clean-up-dead-comparisons.patch deleted file mode 100644 index 58f74aab25974b669bfe246aeddd24b162efb466..0000000000000000000000000000000000000000 --- a/fix-make-ifcvt-clean-up-dead-comparisons.patch +++ /dev/null @@ -1,69 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. ^M -The commit id of these patchs list as following in the order of time. - -0001-Make-ifcvt-clean-up-dead-comparisons.patch -f1f10541903b082d27114db38947fb31f5364bcc - -diff -Nurp a/gcc/basic-block.h b/gcc/basic-block.h ---- a/gcc/basic-block.h 2020-09-12 14:42:34.268000000 +0800 -+++ b/gcc/basic-block.h 2020-09-12 14:42:48.448000000 +0800 -@@ -507,6 +507,8 @@ ei_cond (edge_iterator ei, edge *p) - #define CLEANUP_CFGLAYOUT 32 /* Do cleanup in cfglayout mode. */ - #define CLEANUP_CFG_CHANGED 64 /* The caller changed the CFG. */ - #define CLEANUP_NO_PARTITIONING 128 /* Do not try to fix partitions. */ -+#define CLEANUP_FORCE_FAST_DCE 0x100 /* Force run_fast_dce to be called -+ at least once. */ - - /* Return true if BB is in a transaction. */ - -diff -Nurp a/gcc/cfgcleanup.c b/gcc/cfgcleanup.c ---- a/gcc/cfgcleanup.c 2020-09-12 14:42:34.292000000 +0800 -+++ b/gcc/cfgcleanup.c 2020-09-12 14:42:48.448000000 +0800 -@@ -3197,7 +3197,10 @@ cleanup_cfg (int mode) - && !delete_trivially_dead_insns (get_insns (), max_reg_num ())) - break; - if ((mode & CLEANUP_CROSSJUMP) && crossjumps_occurred) -- run_fast_dce (); -+ { -+ run_fast_dce (); -+ mode &= ~CLEANUP_FORCE_FAST_DCE; -+ } - } - else - break; -@@ -3206,6 +3209,9 @@ cleanup_cfg (int mode) - if (mode & CLEANUP_CROSSJUMP) - remove_fake_exit_edges (); - -+ if (mode & CLEANUP_FORCE_FAST_DCE) -+ run_fast_dce (); -+ - /* Don't call delete_dead_jumptables in cfglayout mode, because - that function assumes that jump tables are in the insns stream. - But we also don't _have_ to delete dead jumptables in cfglayout -diff -Nurp a/gcc/ifcvt.c b/gcc/ifcvt.c ---- a/gcc/ifcvt.c 2020-09-12 14:42:34.300000000 +0800 -+++ b/gcc/ifcvt.c 2020-09-12 14:42:48.448000000 +0800 -@@ -5457,6 +5457,8 @@ if_convert (bool after_combine) - static unsigned int - rest_of_handle_if_conversion (void) - { -+ int flags = 0; -+ - if (flag_if_conversion) - { - if (dump_file) -@@ -5466,9 +5468,12 @@ rest_of_handle_if_conversion (void) - } - cleanup_cfg (CLEANUP_EXPENSIVE); - if_convert (false); -+ if (num_updated_if_blocks) -+ /* Get rid of any dead CC-related instructions. */ -+ flags |= CLEANUP_FORCE_FAST_DCE; - } - -- cleanup_cfg (0); -+ cleanup_cfg (flags); - return 0; - } - diff --git a/fix-range-set-by-vectorization-on-niter-IVs.patch b/fix-range-set-by-vectorization-on-niter-IVs.patch deleted file mode 100644 index d64a4b93bea4bcd5c6d7a0c82266138d9a5c20e5..0000000000000000000000000000000000000000 --- a/fix-range-set-by-vectorization-on-niter-IVs.patch +++ /dev/null @@ -1,74 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-tree-optimization-98117-fix-range-set-by-vectorizati.patch -cdcbef3c3310a14f2994982b44cb1f8e14c77232 - -diff --git a/gcc/testsuite/gcc.dg/torture/pr98117.c b/gcc/testsuite/gcc.dg/torture/pr98117.c -new file mode 100644 -index 00000000000..f2160257263 ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/torture/pr98117.c -@@ -0,0 +1,19 @@ -+/* { dg-do run } */ -+/* { dg-additional-options "-fno-tree-scev-cprop" } */ -+ -+unsigned char c; -+void __attribute__((noipa)) -+e() -+{ -+ do -+ { -+ } -+ while (++c); -+} -+int main() -+{ -+ e(); -+ if (c != 0) -+ __builtin_abort (); -+ return 0; -+} -diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c -index 36179188f6d..2370b879b21 100644 ---- a/gcc/tree-vect-loop-manip.c -+++ b/gcc/tree-vect-loop-manip.c -@@ -2034,13 +2034,29 @@ vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters, - niters_vector = force_gimple_operand (niters_vector, &stmts, true, var); - gsi_insert_seq_on_edge_immediate (pe, stmts); - /* Peeling algorithm guarantees that vector loop bound is at least ONE, -- we set range information to make niters analyzer's life easier. */ -+ we set range information to make niters analyzer's life easier. -+ Note the number of latch iteration value can be TYPE_MAX_VALUE so -+ we have to represent the vector niter TYPE_MAX_VALUE + 1 >> log_vf. */ - if (stmts != NULL && log_vf) -- set_range_info (niters_vector, VR_RANGE, -- wi::to_wide (build_int_cst (type, 1)), -- wi::to_wide (fold_build2 (RSHIFT_EXPR, type, -- TYPE_MAX_VALUE (type), -- log_vf))); -+ { -+ if (niters_no_overflow) -+ set_range_info (niters_vector, VR_RANGE, -+ wi::one (TYPE_PRECISION (type)), -+ wi::rshift (wi::max_value (TYPE_PRECISION (type), -+ TYPE_SIGN (type)), -+ exact_log2 (const_vf), -+ TYPE_SIGN (type))); -+ /* For VF == 1 the vector IV might also overflow so we cannot -+ assert a minimum value of 1. */ -+ else if (const_vf > 1) -+ set_range_info (niters_vector, VR_RANGE, -+ wi::one (TYPE_PRECISION (type)), -+ wi::rshift (wi::max_value (TYPE_PRECISION (type), -+ TYPE_SIGN (type)) -+ - (const_vf - 1), -+ exact_log2 (const_vf), TYPE_SIGN (type)) -+ + 1); -+ } - } - *niters_vector_ptr = niters_vector; - *step_vector_ptr = step_vector; --- -2.19.1 - diff --git a/fix-regno-out-of-range.patch b/fix-regno-out-of-range.patch deleted file mode 100644 index cf2746b33bb640887a1dd5eba7d7be8dc8c4dcdd..0000000000000000000000000000000000000000 --- a/fix-regno-out-of-range.patch +++ /dev/null @@ -1,18 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-PR93561-bounds-checking-memory-overflow-for-spill_fo.patch: -d26f37a16e3ed3d75a93ffb1da10c44c36a8a36d - -diff -Nurp a/gcc/lra-assigns.c b/gcc/lra-assigns.c ---- a/gcc/lra-assigns.c 2020-04-17 16:27:46.192000000 +0800 -+++ b/gcc/lra-assigns.c 2020-04-17 16:29:37.125688580 +0800 -@@ -968,6 +968,8 @@ spill_for (int regno, bitmap spilled_pse - bitmap_clear (&spill_pseudos_bitmap); - for (j = hard_regno_nregs (hard_regno, mode) - 1; j >= 0; j--) - { -+ if (hard_regno + j >= FIRST_PSEUDO_REGISTER) -+ break; - if (try_hard_reg_pseudos_check[hard_regno + j] != curr_pseudo_check) - continue; - lra_assert (!bitmap_empty_p (&try_hard_reg_pseudos[hard_regno + j])); diff --git a/fix-strncpy-inline-warning.patch b/fix-strncpy-inline-warning.patch deleted file mode 100644 index 0fb8b30d05f1fae6fabf3a6f371aa3c1eaa50589..0000000000000000000000000000000000000000 --- a/fix-strncpy-inline-warning.patch +++ /dev/null @@ -1,371 +0,0 @@ -This backport contains 3 patchs from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-tree-ssa-dse.c-initialize_ao_ref_for_dse-Handle-_chk.patch -4a61cf9c62212fd04f21704efc2decffe9544651 - -0002-tree-ssa-dse.c-initialize_ao_ref_for_dse-Fix-formatt.patch -caffb6e56c2914e64e65f3c336b770c178f265a3 - -0003-tree-ssa-dse.c-initialize_ao_ref_for_dse-Handle-strn.patch -192ece9e15d25fd9b6534b2a8bd271684bf76d38 - -diff --git a/gcc/testsuite/gcc.c-torture/execute/builtins/builtins.exp b/gcc/testsuite/gcc.c-torture/execute/builtins/builtins.exp -index 3560a1ff2..e9d3c9aab 100644 ---- a/gcc/testsuite/gcc.c-torture/execute/builtins/builtins.exp -+++ b/gcc/testsuite/gcc.c-torture/execute/builtins/builtins.exp -@@ -37,7 +37,7 @@ load_lib c-torture.exp - torture-init - set-torture-options $C_TORTURE_OPTIONS {{}} $LTO_TORTURE_OPTIONS - --set additional_flags "-fno-tree-loop-distribute-patterns -fno-tracer -fno-ipa-ra -fno-inline-functions" -+set additional_flags "-fno-tree-dse -fno-tree-loop-distribute-patterns -fno-tracer -fno-ipa-ra -fno-inline-functions" - if [istarget "powerpc-*-darwin*"] { - lappend additional_flags "-Wl,-multiply_defined,suppress" - } -diff --git a/gcc/testsuite/gcc.dg/builtin-stringop-chk-1.c b/gcc/testsuite/gcc.dg/builtin-stringop-chk-1.c -index afd07ddd0..40cfa0472 100644 ---- a/gcc/testsuite/gcc.dg/builtin-stringop-chk-1.c -+++ b/gcc/testsuite/gcc.dg/builtin-stringop-chk-1.c -@@ -1,7 +1,7 @@ - /* Test whether buffer overflow warnings for __*_chk builtins - are emitted properly. */ - /* { dg-do compile } */ --/* { dg-options "-O2 -Wno-format -std=gnu99 -ftrack-macro-expansion=0" } */ -+/* { dg-options "-O2 -Wno-format -std=gnu99 -ftrack-macro-expansion=0 -fno-tree-dse" } */ - // { dg-skip-if "packed attribute missing for t" { "epiphany-*-*" } } - - extern void abort (void); -diff --git a/gcc/testsuite/gcc.dg/memcpy-2.c b/gcc/testsuite/gcc.dg/memcpy-2.c -index 7f839d27a..6ad887416 100644 ---- a/gcc/testsuite/gcc.dg/memcpy-2.c -+++ b/gcc/testsuite/gcc.dg/memcpy-2.c -@@ -1,6 +1,6 @@ - /* PR middle-end/38454 */ - /* { dg-do compile } */ --/* { dg-options "-O2" } */ -+/* { dg-options "-O2 -fno-tree-dse" } */ - - typedef __SIZE_TYPE__ size_t; - -diff --git a/gcc/testsuite/gcc.dg/pr40340-1.c b/gcc/testsuite/gcc.dg/pr40340-1.c -index 8fbb206a2..6307e064c 100644 ---- a/gcc/testsuite/gcc.dg/pr40340-1.c -+++ b/gcc/testsuite/gcc.dg/pr40340-1.c -@@ -1,6 +1,6 @@ - /* PR middle-end/40340 */ - /* { dg-do compile } */ --/* { dg-options "-O2 -Wall -Wno-system-headers" } */ -+/* { dg-options "-O2 -Wall -Wno-system-headers -fno-tree-dse" } */ - - #include "pr40340.h" - -diff --git a/gcc/testsuite/gcc.dg/pr40340-2.c b/gcc/testsuite/gcc.dg/pr40340-2.c -index 10083acd1..ea76e1008 100644 ---- a/gcc/testsuite/gcc.dg/pr40340-2.c -+++ b/gcc/testsuite/gcc.dg/pr40340-2.c -@@ -1,6 +1,6 @@ - /* PR middle-end/40340 */ - /* { dg-do compile } */ --/* { dg-options "-O2 -Wall -Wno-system-headers" } */ -+/* { dg-options "-O2 -Wall -Wno-system-headers -fno-tree-dse" } */ - - #include "pr40340.h" - -diff --git a/gcc/testsuite/gcc.dg/pr40340-5.c b/gcc/testsuite/gcc.dg/pr40340-5.c -index 0e48a2ca9..99e58f2ab 100644 ---- a/gcc/testsuite/gcc.dg/pr40340-5.c -+++ b/gcc/testsuite/gcc.dg/pr40340-5.c -@@ -1,6 +1,6 @@ - /* PR middle-end/40340 */ - /* { dg-do compile } */ --/* { dg-options "-O2 -Wall -Wsystem-headers -g" } */ -+/* { dg-options "-O2 -Wall -Wsystem-headers -g -fno-tree-dse" } */ - - #define TEST3 - #include "pr40340.h" -diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-37.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-37.c -new file mode 100644 -index 000000000..56251fc34 ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-37.c -@@ -0,0 +1,60 @@ -+/* { dg-options "-O2 -fdump-tree-dse-details -fno-tree-fre" } */ -+ -+ -+#ifndef SCOPE -+#define SCOPE -+#endif -+ -+extern void frob (char *); -+ -+void g (char *s) -+{ -+ SCOPE char a[8]; -+ __builtin_strncpy (a, s, sizeof a); -+ __builtin_memset (a, 0, sizeof a); -+ frob (a); -+} -+ -+void h (char *s) -+{ -+ SCOPE char a[8]; -+ __builtin_memset (a, 0, sizeof a); -+ __builtin_strncpy (a, s, sizeof a); -+ frob (a); -+} -+ -+void i (char *s) -+{ -+ SCOPE char a[8]; -+ __builtin_strncpy (a, s, sizeof a); -+ __builtin_memset (a, 0, sizeof a - 5); -+ frob (a); -+} -+ -+void j (char *s) -+{ -+ SCOPE char a[8]; -+ __builtin_memset (a, 0, sizeof a); -+ __builtin_strncpy (a, s, sizeof a - 5); -+ frob (a); -+} -+ -+void l (char *s) -+{ -+ SCOPE char a[8]; -+ __builtin_strncpy (a, s, sizeof a); -+ __builtin_memset (a + 2, 0, sizeof a - 2); -+ frob (a); -+} -+ -+void m (char *s) -+{ -+ SCOPE char a[8]; -+ __builtin_memset (a, 0, sizeof a); -+ __builtin_strncpy (a + 2, s, sizeof a - 2); -+ frob (a); -+} -+ -+/* { dg-final { scan-tree-dump-times "Deleted dead call" 2 "dse1" } } */ -+/* { dg-final { scan-tree-dump-times "Trimming statement " 4 "dse1" } } */ -+ -diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-38.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-38.c -new file mode 100644 -index 000000000..7ae33bfd1 ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-38.c -@@ -0,0 +1,12 @@ -+/* { dg-options "-O2 -fdump-tree-dse-details -fno-tree-fre" } */ -+ -+ -+/* This changes the scope of the destination object and exposes -+ missed optimizations in DSE. */ -+#define SCOPE extern -+#include "ssa-dse-37.c" -+ -+/* { dg-final { scan-tree-dump-times "Deleted dead call" 2 "dse1" { xfail *-*-* } } } */ -+/* { dg-final { scan-tree-dump-times "Trimming statement " 4 "dse1" { xfail *-*-* } } } */ -+ -+ -diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c -index 2fb471b69..b593b0d81 100644 ---- a/gcc/tree-ssa-dse.c -+++ b/gcc/tree-ssa-dse.c -@@ -100,39 +100,42 @@ initialize_ao_ref_for_dse (gimple *stmt, ao_ref *write) - { - switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))) - { -- case BUILT_IN_MEMCPY: -- case BUILT_IN_MEMMOVE: -- case BUILT_IN_MEMSET: -- { -- tree size = NULL_TREE; -- if (gimple_call_num_args (stmt) == 3) -- size = gimple_call_arg (stmt, 2); -- tree ptr = gimple_call_arg (stmt, 0); -- ao_ref_init_from_ptr_and_size (write, ptr, size); -- return true; -- } -+ case BUILT_IN_MEMCPY: -+ case BUILT_IN_MEMMOVE: -+ case BUILT_IN_MEMSET: -+ case BUILT_IN_MEMCPY_CHK: -+ case BUILT_IN_MEMMOVE_CHK: -+ case BUILT_IN_MEMSET_CHK: -+ case BUILT_IN_STRNCPY: -+ case BUILT_IN_STRNCPY_CHK: -+ { -+ tree size = gimple_call_arg (stmt, 2); -+ tree ptr = gimple_call_arg (stmt, 0); -+ ao_ref_init_from_ptr_and_size (write, ptr, size); -+ return true; -+ } - -- /* A calloc call can never be dead, but it can make -- subsequent stores redundant if they store 0 into -- the same memory locations. */ -- case BUILT_IN_CALLOC: -- { -- tree nelem = gimple_call_arg (stmt, 0); -- tree selem = gimple_call_arg (stmt, 1); -- tree lhs; -- if (TREE_CODE (nelem) == INTEGER_CST -- && TREE_CODE (selem) == INTEGER_CST -- && (lhs = gimple_call_lhs (stmt)) != NULL_TREE) -- { -- tree size = fold_build2 (MULT_EXPR, TREE_TYPE (nelem), -- nelem, selem); -- ao_ref_init_from_ptr_and_size (write, lhs, size); -- return true; -- } -- } -+ /* A calloc call can never be dead, but it can make -+ subsequent stores redundant if they store 0 into -+ the same memory locations. */ -+ case BUILT_IN_CALLOC: -+ { -+ tree nelem = gimple_call_arg (stmt, 0); -+ tree selem = gimple_call_arg (stmt, 1); -+ tree lhs; -+ if (TREE_CODE (nelem) == INTEGER_CST -+ && TREE_CODE (selem) == INTEGER_CST -+ && (lhs = gimple_call_lhs (stmt)) != NULL_TREE) -+ { -+ tree size = fold_build2 (MULT_EXPR, TREE_TYPE (nelem), -+ nelem, selem); -+ ao_ref_init_from_ptr_and_size (write, lhs, size); -+ return true; -+ } -+ } - -- default: -- break; -+ default: -+ break; - } - } - else if (is_gimple_assign (stmt)) -@@ -459,6 +462,10 @@ maybe_trim_memstar_call (ao_ref *ref, sbitmap live, gimple *stmt) - { - case BUILT_IN_MEMCPY: - case BUILT_IN_MEMMOVE: -+ case BUILT_IN_STRNCPY: -+ case BUILT_IN_MEMCPY_CHK: -+ case BUILT_IN_MEMMOVE_CHK: -+ case BUILT_IN_STRNCPY_CHK: - { - int head_trim, tail_trim; - compute_trims (ref, live, &head_trim, &tail_trim, stmt); -@@ -480,6 +487,7 @@ maybe_trim_memstar_call (ao_ref *ref, sbitmap live, gimple *stmt) - } - - case BUILT_IN_MEMSET: -+ case BUILT_IN_MEMSET_CHK: - { - int head_trim, tail_trim; - compute_trims (ref, live, &head_trim, &tail_trim, stmt); -@@ -956,54 +964,60 @@ dse_dom_walker::dse_optimize_stmt (gimple_stmt_iterator *gsi) - tree fndecl = gimple_call_fndecl (stmt); - switch (DECL_FUNCTION_CODE (fndecl)) - { -- case BUILT_IN_MEMCPY: -- case BUILT_IN_MEMMOVE: -- case BUILT_IN_MEMSET: -- { -- /* Occasionally calls with an explicit length of zero -- show up in the IL. It's pointless to do analysis -- on them, they're trivially dead. */ -- tree size = gimple_call_arg (stmt, 2); -- if (integer_zerop (size)) -- { -- delete_dead_or_redundant_call (gsi, "dead"); -- return; -- } -- -- /* If this is a memset call that initializes an object -- to zero, it may be redundant with an earlier memset -- or empty CONSTRUCTOR of a larger object. */ -- if ((DECL_FUNCTION_CODE (fndecl) == BUILT_IN_MEMSET -- || DECL_FUNCTION_CODE (fndecl) == BUILT_IN_MEMSET_CHK) -- && integer_zerop (gimple_call_arg (stmt, 1))) -- dse_optimize_redundant_stores (stmt); -- -- enum dse_store_status store_status; -- m_byte_tracking_enabled -- = setup_live_bytes_from_ref (&ref, m_live_bytes); -- store_status = dse_classify_store (&ref, stmt, -- m_byte_tracking_enabled, -- m_live_bytes); -- if (store_status == DSE_STORE_LIVE) -- return; -- -- if (store_status == DSE_STORE_MAYBE_PARTIAL_DEAD) -- { -- maybe_trim_memstar_call (&ref, m_live_bytes, stmt); -- return; -- } -- -- if (store_status == DSE_STORE_DEAD) -+ case BUILT_IN_MEMCPY: -+ case BUILT_IN_MEMMOVE: -+ case BUILT_IN_STRNCPY: -+ case BUILT_IN_MEMSET: -+ case BUILT_IN_MEMCPY_CHK: -+ case BUILT_IN_MEMMOVE_CHK: -+ case BUILT_IN_STRNCPY_CHK: -+ case BUILT_IN_MEMSET_CHK: -+ { -+ /* Occasionally calls with an explicit length of zero -+ show up in the IL. It's pointless to do analysis -+ on them, they're trivially dead. */ -+ tree size = gimple_call_arg (stmt, 2); -+ if (integer_zerop (size)) -+ { - delete_dead_or_redundant_call (gsi, "dead"); -+ return; -+ } -+ -+ /* If this is a memset call that initializes an object -+ to zero, it may be redundant with an earlier memset -+ or empty CONSTRUCTOR of a larger object. */ -+ if ((DECL_FUNCTION_CODE (fndecl) == BUILT_IN_MEMSET -+ || DECL_FUNCTION_CODE (fndecl) == BUILT_IN_MEMSET_CHK) -+ && integer_zerop (gimple_call_arg (stmt, 1))) -+ dse_optimize_redundant_stores (stmt); -+ -+ enum dse_store_status store_status; -+ m_byte_tracking_enabled -+ = setup_live_bytes_from_ref (&ref, m_live_bytes); -+ store_status = dse_classify_store (&ref, stmt, -+ m_byte_tracking_enabled, -+ m_live_bytes); -+ if (store_status == DSE_STORE_LIVE) - return; -- } - -- case BUILT_IN_CALLOC: -- /* We already know the arguments are integer constants. */ -- dse_optimize_redundant_stores (stmt); -+ if (store_status == DSE_STORE_MAYBE_PARTIAL_DEAD) -+ { -+ maybe_trim_memstar_call (&ref, m_live_bytes, stmt); -+ return; -+ } - -- default: -+ if (store_status == DSE_STORE_DEAD) -+ delete_dead_or_redundant_call (gsi, "dead"); - return; -+ } -+ -+ case BUILT_IN_CALLOC: -+ /* We already know the arguments are integer constants. */ -+ dse_optimize_redundant_stores (stmt); -+ return; -+ -+ default: -+ return; - } - } - diff --git a/fix-wrong-vectorizer-code.patch b/fix-wrong-vectorizer-code.patch deleted file mode 100644 index e3387bc6ce8557dacf9ee2da0998254ee6420812..0000000000000000000000000000000000000000 --- a/fix-wrong-vectorizer-code.patch +++ /dev/null @@ -1,71 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-re-PR-bootstrap-92301-Wrong-vectorizer-code-since-r2.patch -b76f4e6c06bd494d2383c4c16d1e1a034da74641 - -diff -Nurp a/gcc/testsuite/gcc.dg/pr92301.c b/gcc/testsuite/gcc.dg/pr92301.c ---- a/gcc/testsuite/gcc.dg/pr92301.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/pr92301.c 2020-08-24 21:36:23.556000000 +0800 -@@ -0,0 +1,35 @@ -+/* { dg-do run } */ -+/* { dg-options "-O3" } */ -+ -+unsigned int m; -+ -+#define N 128 -+unsigned int a[N]; -+ -+unsigned int -+__attribute__((noipa)) -+df_count_refs (_Bool include_defs) -+{ -+ int size = 0; -+ -+ for (unsigned int regno = 0; regno < m; regno++) -+ if (include_defs) -+ size += a[regno]; -+ return size; -+} -+ -+int main(int argc, char **argv) -+{ -+ for (unsigned i = 0; i < N; i++) -+ a[i] = i; -+ -+ if (argc == 1) -+ m = 17; -+ -+ unsigned int r = df_count_refs(1); -+ __builtin_printf ("r: %d\n", r); -+ if (r != 136) -+ __builtin_abort (); -+ -+ return 0; -+} -diff -Nurp a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c ---- a/gcc/tree-vect-stmts.c 2020-08-24 21:35:23.664000000 +0800 -+++ b/gcc/tree-vect-stmts.c 2020-08-24 21:36:23.556000000 +0800 -@@ -474,6 +474,22 @@ process_use (stmt_vec_info stmt_vinfo, t - basic_block def_bb = gimple_bb (dstmt_vinfo->stmt); - basic_block bb = gimple_bb (stmt_vinfo->stmt); - -+ /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO). -+ We have to force the stmt live since the epilogue loop needs it to -+ continue computing the reduction. */ -+ if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI -+ && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def -+ && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI -+ && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def -+ && bb->loop_father == def_bb->loop_father) -+ { -+ if (dump_enabled_p ()) -+ dump_printf_loc (MSG_NOTE, vect_location, -+ "reduc-stmt defining reduc-phi in the same nest.\n"); -+ vect_mark_relevant (worklist, dstmt_vinfo, relevant, true); -+ return opt_result::success (); -+ } -+ - /* case 3a: outer-loop stmt defining an inner-loop stmt: - outer-loop-header-bb: - d = dstmt_vinfo diff --git a/gcc-9.3.0.tar.xz b/gcc-10.3.0.tar.xz similarity index 79% rename from gcc-9.3.0.tar.xz rename to gcc-10.3.0.tar.xz index 36d54ee1c18e3ba1c6d8c18f1c2c93b034010bb9..d17718753fbe0fbaa3342334e7f0c10744aa6c6c 100644 Binary files a/gcc-9.3.0.tar.xz and b/gcc-10.3.0.tar.xz differ diff --git a/gcc.spec b/gcc.spec index 63bdc1072099b08930fa1f6876630f10ab9ef7a5..5818b77abc79c70ec40e1b5ae65d7f4fc3c133a1 100644 --- a/gcc.spec +++ b/gcc.spec @@ -1,7 +1,7 @@ -%global DATE 20210628 +%global DATE 20210727 -%global gcc_version 9.3.1 -%global gcc_major 9.3.1 +%global gcc_version 10.3.0 +%global gcc_major 10.3.0 %global _unpackaged_files_terminate_build 0 %global _performance_build 1 @@ -13,7 +13,7 @@ %global build_go 0 %global build_d 0 %global build_check 0 -%ifarch %{ix86} x86_64 ia64 ppc64le aarch64 +%ifarch %{ix86} x86_64 ia64 ppc64le %global build_libquadmath 1 %else %global build_libquadmath 0 @@ -38,7 +38,7 @@ %else %global build_libubsan 0 %endif -%ifarch %{ix86} x86_64 ppc ppc64 ppc64le ppc64p7 s390 s390x %{arm} aarch64 %{mips} +%ifarch %{ix86} x86_64 ppc ppc64 ppc64le ppc64p7 s390 s390x %{arm} aarch64 %{mips} riscv64 %global build_libatomic 1 %else %global build_libatomic 0 @@ -59,24 +59,24 @@ Summary: Various compilers (C, C++, Objective-C, ...) Name: gcc Version: %{gcc_version} -Release: %{DATE}.21 +Release: %{DATE}.1 License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD URL: https://gcc.gnu.org -Source0: https://ftp.gnu.org/gnu/gcc/gcc-9.3.0/gcc-9.3.0.tar.xz +Source0: https://ftp.gnu.org/gnu/gcc/gcc-10.3.0/gcc-10.3.0.tar.xz %global isl_version 0.16.1 BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) BuildRequires: binutils >= 2.31 BuildRequires: glibc-headers BuildRequires: libtool, zlib-devel, texinfo, flex, bison -BuildRequires: gmp-devel >= 4.1.2-8, mpfr-devel >= 2.2.1, libmpc-devel >= 0.8.1 -BuildRequires: gcc, gcc-c++ +BuildRequires: gmp-devel >= 4.1.2-8, mpfr-devel >= 3.1.0, libmpc-devel >= 0.8.1 +BuildRequires: gcc, gcc-c++, make %if %{build_go} BuildRequires: hostname, procps %endif BuildRequires: gdb -BuildRequires: glibc-devel >= 2.16 +BuildRequires: glibc-devel >= 2.17 %ifarch %{multilib_64_archs} sparcv9 ppc BuildRequires: /lib/libc.so.6 /usr/lib/libc.so /lib64/libc.so.6 /usr/lib64/libc.so %endif @@ -102,7 +102,7 @@ BuildRequires: graphviz, dblatex, texlive-collection-latex, docbook5-style-xsl Requires: cpp = %{version}-%{release} Requires: binutils >= 2.31 Conflicts: gdb < 5.1-2 -Requires: glibc-devel >= 2.16 +Requires: glibc-devel >= 2.17 Requires: libgcc >= %{version}-%{release} Requires: libgomp = %{version}-%{release} %if !%{build_ada} @@ -113,136 +113,14 @@ AutoReq: true Provides: bundled(libiberty) Provides: gcc(major) = %{gcc_major} -Patch0: enable-aarch64-libquadmath.patch -Patch1: generate-csel.patch -Patch2: delete-incorrect-smw.patch -Patch3: remove-array-index-inliner-hint.patch -Patch4: ivopts-1.patch -Patch5: ivopts-2.patch -Patch6: dont-generate-IF_THEN_ELSE.patch -Patch7: fix-cost-of-plus.patch -Patch8: div-opti.patch -Patch9: fix-SYMBOL_TINY_GOT-handling-for-ILP32.patch -Patch10: fix-ICE-during-pass-ccp.patch -Patch11: loop-split.patch -Patch12: loop-finite.patch -Patch13: loop-finite-bugfix.patch -Patch14: fix-regno-out-of-range.patch -Patch15: fix-ICE-in-vectorizable-load.patch -Patch16: address-calculation-optimization-within-loop.patch -Patch17: skip-debug-insns-when-computing-inline-costs.patch -Patch18: ipa-const-prop.patch -Patch19: ipa-const-prop-self-recursion-bugfix.patch -Patch20: ipa-const-prop-null-point-check-bugfix.patch -Patch21: change-gcc-BASE-VER.patch -Patch22: add-option-fallow-store-data-races.patch -Patch23: tighten-range-for-generating-csel.patch -Patch24: generate-csel-for-arrayref.patch -Patch25: vectorization-enhancement.patch -Patch26: ipa-struct-reorg.patch -Patch27: ipa-struct-reorg-bugfix.patch -Patch28: enable-simd-math.patch -Patch29: complete-struct-reorg.patch -Patch30: reductions-slp-enhancement.patch -Patch31: cse-in-vectorization.patch -Patch32: PR92303-Try-to-simplify-memory-subreg.patch -Patch33: Fix-PR94185.patch -Patch34: testsuite-Fix-pr94185.patch -Patch35: fix-ICE-in-vect_stmt_to_vectorize.patch -Patch36: add-checks-to-avoid-spoiling-if-conversion.patch -Patch37: fix-ICE-in-vect_create_epilog_for_reduction.patch -Patch38: fix-ICE-in-compute_live_loop_exits.patch -Patch39: fix-ICE-in-store_constructor.patch -Patch40: fix-ICE-in-verify_ssa.patch -Patch41: fix-ICE-in-reload.patch -Patch42: fix-ICE-in-declare-return-variable.patch -Patch43: simplify-removing-subregs.patch -Patch44: fix-ICE-in-vec.patch -Patch45: fix-ICE-in-gimple_op.patch -Patch46: fix-ICE-in-exact_div.patch -Patch47: fix-ICE-statement-uses-released-SSA-name.patch -Patch48: fix-ICE-in-vect_get_vec_def_for_stmt_copy.patch -Patch49: fix-ICE-in-vect_create_epilog_for_reduction_2.patch -Patch50: fix-ICE-in-vect_slp_analyze_node_operations.patch -Patch51: fix-ICE-in-vect_create_epilog_for_reduction_3.patch -Patch52: fix-ICE-avoid-issueing-loads-in-SM-when-possible.patch -Patch53: fix-ICE-in-vect_transform_stmt.patch -Patch54: fix-ICE-in-copy_reference_ops_from_ref.patch -Patch55: fix-ICE-in-vectorizable_condition.patch -Patch56: reduction-chain-slp-option.patch -Patch57: fix-ICE-in-model_update_limit_points_in_group.patch -Patch58: fix-do-not-build-op.patch -Patch59: fix-wrong-vectorizer-code.patch -Patch60: fix-load-eliding-in-SM.patch -Patch61: fix-SSA-update-for-vectorizer-epilogue.patch -Patch62: fix-ICE-when-vectorizing-nested-cycles.patch -Patch63: fix-avoid-bogus-uninit-warning-with-store-motion.patch -Patch64: avoid-cycling-on-vertain-subreg-reloads.patch -Patch65: fix-ICE-in-verify_target_availability.patch -Patch66: fix-ICE-vect_slp_analyze_node_operations.patch -Patch67: fix-ICE-in-extract_constrain_insn.patch -Patch68: fix-ICE-during-GIMPLE-pass-dse.patch -Patch69: ipa-const-prop-buffer-overflow-bugfix.patch -Patch70: fix-ICE-in-eliminate_stmt.patch -Patch71: fix-make-ifcvt-clean-up-dead-comparisons.patch -Patch72: fix-an-ICE-in-vect_recog_mask_conversion_pattern.patch -Patch73: fix-ICE-in-vect_update_misalignment_for_peel.patch -Patch74: redundant-loop-elimination.patch -Patch75: bf16-and-matrix-characteristic.patch -Patch76: medium-code-mode.patch -Patch77: tree-optimization-96920-another-ICE-when-vectorizing.patch -Patch78: reduction-paths-with-unhandled-live-stmt.patch -Patch79: aarch64-Fix-ash-lr-lshr-mode-3-expanders.patch -Patch80: tree-optimization-97812-fix-range-query-in-VRP-asser.patch -Patch81: aarch64-Fix-bf16-and-matrix-g++-gfortran.patch -Patch82: IRA-Handle-fully-tied-destinations.patch -Patch83: fix-ICE-in-pass-vect.patch -Patch84: SLP-VECT-Add-check-to-fix-96837.patch -Patch85: adjust-vector-cost-and-move-EXTRACT_LAST_REDUCTION-costing.patch -Patch86: fix-issue499-add-nop-convert.patch -Patch87: aarch64-fix-sve-acle-error.patch -Patch88: fix-ICE-IPA-compare-VRP-types.patch -Patch89: vectorizable-comparison-Swap-operands-only-once.patch -Patch90: sccvn-Improve-handling-of-load-masked-with-integer.patch -Patch91: speed-up-DDG-analysis-and-fix-bootstrap-compare-debug.patch -Patch92: x86-Fix-bf16-and-matrix.patch -Patch93: Fix-up-push_partial_def-little-endian-bitfield.patch -Patch94: modulo-sched-Carefully-process-loop-counter-initiali.patch -Patch95: fix-ICE-in-affine-combination.patch -Patch96: aarch64-Fix-mismatched-SVE-predicate-modes.patch -Patch97: Fix-EXTRACT_LAST_REDUCTION-segfault.patch -Patch98: fix-PR-92351-When-peeling-for-alignment.patch -Patch99: fix-addlosymdi-ICE-in-pass-reload.patch -Patch100: store-merging-Consider-also-overlapping-stores-earlier.patch -Patch101: AArch64-Fix-constraints-for-CPY-M.patch -Patch102: Fix-zero-masking-for-vcvtps2ph.patch -Patch103: re-PR-target-91124-gcc.target-i386-avx512vl-vpshldvd.patch -Patch104: fix-avx512vl-vcvttpd2dq-2-fail.patch -Patch105: fix-issue604-ldist-dependency-fixup.patch -Patch106: Apply-maximum-nunits-for-BB-SLP.patch -Patch107: Fix-interaction-between-aka-changes-and-DR1558.patch -Patch108: fix-range-set-by-vectorization-on-niter-IVs.patch -Patch109: optabs-Dont-use-scalar-conversions-for-vectors.patch -Patch110: add-fp-model-options.patch -Patch111: fix-CTOR-vectorization.patch -Patch112: PR92429-do-not-fold-when-updating.patch -Patch113: Handle-POLY_INT_CSTs-in-declare_return_value.patch -Patch114: Handle-POLY_INT_CST-in-copy_reference_ops_from_ref.patch -Patch115: fix-strncpy-inline-warning.patch -Patch116: fix-ICE-in-vect.patch -Patch118: Fix-type-mismatch-in-SLPed-constructors.patch -Patch119: add-check-for-pressure-in-sche1.patch -Patch120: revert-moutline-atomics.patch -Patch121: fix-ICE-in-eliminate-stmt.patch -Patch122: revise-type-before-build-MULT.patch -Patch123: Simplify-X-C1-C2.patch + %global gcc_target_platform %{_arch}-linux-gnu %if %{build_go} # Avoid stripping these libraries and binaries. %global __os_install_post \ -chmod 644 %{buildroot}%{_prefix}/%{_lib}/libgo.so.14.* \ +chmod 644 %{buildroot}%{_prefix}/%{_lib}/libgo.so.16.* \ chmod 644 %{buildroot}%{_prefix}/bin/go.gcc \ chmod 644 %{buildroot}%{_prefix}/bin/gofmt.gcc \ chmod 644 %{buildroot}%{_prefix}/libexec/gcc/%{gcc_target_platform}/%{gcc_major}/cgo \ @@ -250,7 +128,7 @@ chmod 644 %{buildroot}%{_prefix}/libexec/gcc/%{gcc_target_platform}/%{gcc_major} chmod 644 %{buildroot}%{_prefix}/libexec/gcc/%{gcc_target_platform}/%{gcc_major}/test2json \ chmod 644 %{buildroot}%{_prefix}/libexec/gcc/%{gcc_target_platform}/%{gcc_major}/vet \ %__os_install_post \ -chmod 755 %{buildroot}%{_prefix}/%{_lib}/libgo.so.14.* \ +chmod 755 %{buildroot}%{_prefix}/%{_lib}/libgo.so.16.* \ chmod 755 %{buildroot}%{_prefix}/bin/go.gcc \ chmod 755 %{buildroot}%{_prefix}/bin/gofmt.gcc \ chmod 755 %{buildroot}%{_prefix}/libexec/gcc/%{gcc_target_platform}/%{gcc_major}/cgo \ @@ -261,11 +139,11 @@ chmod 755 %{buildroot}%{_prefix}/libexec/gcc/%{gcc_target_platform}/%{gcc_major} %endif %description -The gcc package contains the GNU Compiler Collection version 9. +The gcc package contains the GNU Compiler Collection version 10. You'll need this package in order to compile C code. %package -n libgcc -Summary: GCC version 9 shared support library +Summary: GCC version 10 shared support library Autoreq: false %if !%{build_ada} Obsoletes: libgnat < %{version}-%{release} @@ -666,7 +544,7 @@ This package contains static Go libraries. %package plugin-devel Summary: Support for compiling GCC plugins Requires: gcc = %{version}-%{release} -Requires: gmp-devel >= 4.1.2-8, mpfr-devel >= 2.2.1, libmpc-devel >= 0.8.1 +Requires: gmp-devel >= 4.1.2-8, mpfr-devel >= 3.1.0, libmpc-devel >= 0.8.1 %description plugin-devel This package contains header files and other support files @@ -674,132 +552,9 @@ for compiling GCC plugins. The GCC plugin ABI is currently not stable, so plugins must be rebuilt any time GCC is updated. %prep -%setup -q -n gcc-9.3.0 +%setup -q -n gcc-10.3.0 /bin/pwd -%patch0 -p1 -%patch1 -p1 -%patch2 -p1 -%patch3 -p1 -%patch4 -p1 -%patch5 -p1 -%patch6 -p1 -%patch7 -p1 -%patch8 -p1 -%patch9 -p1 -%patch10 -p1 -%patch11 -p1 -%patch12 -p1 -%patch13 -p1 -%patch14 -p1 -%patch15 -p1 -%patch16 -p1 -%patch17 -p1 -%patch18 -p1 -%patch19 -p1 -%patch20 -p1 -%patch21 -p1 -%patch22 -p1 -%patch23 -p1 -%patch24 -p1 -%patch25 -p1 -%patch26 -p1 -%patch27 -p1 -%patch28 -p1 -%patch29 -p1 -%patch30 -p1 -%patch31 -p1 -%patch32 -p1 -%patch33 -p1 -%patch34 -p1 -%patch35 -p1 -%patch36 -p1 -%patch37 -p1 -%patch38 -p1 -%patch39 -p1 -%patch40 -p1 -%patch41 -p1 -%patch42 -p1 -%patch43 -p1 -%patch44 -p1 -%patch45 -p1 -%patch46 -p1 -%patch47 -p1 -%patch48 -p1 -%patch49 -p1 -%patch50 -p1 -%patch51 -p1 -%patch52 -p1 -%patch53 -p1 -%patch54 -p1 -%patch55 -p1 -%patch56 -p1 -%patch57 -p1 -%patch58 -p1 -%patch59 -p1 -%patch60 -p1 -%patch61 -p1 -%patch62 -p1 -%patch63 -p1 -%patch64 -p1 -%patch65 -p1 -%patch66 -p1 -%patch67 -p1 -%patch68 -p1 -%patch69 -p1 -%patch70 -p1 -%patch71 -p1 -%patch72 -p1 -%patch73 -p1 -%patch74 -p1 -%patch75 -p1 -%patch76 -p1 -%patch77 -p1 -%patch78 -p1 -%patch79 -p1 -%patch80 -p1 -%patch81 -p1 -%patch82 -p1 -%patch83 -p1 -%patch84 -p1 -%patch85 -p1 -%patch86 -p1 -%patch87 -p1 -%patch88 -p1 -%patch89 -p1 -%patch90 -p1 -%patch91 -p1 -%patch92 -p1 -%patch93 -p1 -%patch94 -p1 -%patch95 -p1 -%patch96 -p1 -%patch97 -p1 -%patch98 -p1 -%patch99 -p1 -%patch100 -p1 -%patch101 -p1 -%patch102 -p1 -%patch103 -p1 -%patch104 -p1 -%patch105 -p1 -%patch106 -p1 -%patch107 -p1 -%patch108 -p1 -%patch109 -p1 -%patch110 -p1 -%patch111 -p1 -%patch112 -p1 -%patch113 -p1 -%patch114 -p1 -%patch115 -p1 -%patch116 -p1 -%patch118 -p1 -%patch119 -p1 -%patch120 -p1 -%patch121 -p1 -%patch122 -p1 -%patch123 -p1 %build @@ -808,7 +563,8 @@ export CONFIG_SITE=NONE CC=gcc CXX=g++ -OPT_FLAGS=`echo %{optflags}|sed -e 's/-m64//g;s/-m32//g;s/-m31//g'` +OPT_FLAGS=`echo %{optflags}|sed -e 's/-flto=auto//g;s/-flto//g;s/-ffat-lto-objects//g'` +OPT_FLAGS=`echo $OPT_FLAGS|sed -e 's/-m64//g;s/-m32//g;s/-m31//g'` OPT_FLAGS=`echo $OPT_FLAGS|sed -e 's/-mfpmath=sse/-mfpmath=sse -msse2/g'` OPT_FLAGS=`echo $OPT_FLAGS|sed -e 's/ -pipe / /g'` OPT_FLAGS=`echo $OPT_FLAGS|sed -e 's/-Werror=format-security/ /g'` @@ -825,7 +581,7 @@ OPT_FLAGS=`echo "$OPT_FLAGS" | sed -e 's/[[:blank:]]\+/ /g'` case "$OPT_FLAGS" in *-fasynchronous-unwind-tables*) sed -i -e 's/-fno-exceptions /-fno-exceptions -fno-asynchronous-unwind-tables /' \ - gcc/Makefile.in + libgcc/Makefile.in ;; esac @@ -877,6 +633,9 @@ CC="$CC" CFLAGS="$OPT_FLAGS" \ %ifarch aarch64 --with-multilib-list=lp64 %endif +%ifarch riscv64 + --with-arch=rv64gc --with-abi=lp64d --with-multilib-list=lp64d +%endif %ifarch sparc sparcv9 sparc64 make -j32 BOOT_CFLAGS="$OPT_FLAGS" bootstrap @@ -954,6 +713,20 @@ find rpm.doc -name \*ChangeLog\* | xargs bzip2 -9 %install rm -rf %{buildroot} +mkdir -p %{buildroot} + +# RISC-V ABI wants to install everything in /lib64/lp64d or /usr/lib64/lp64d. +# Make these be symlinks to /lib64 or /usr/lib64 respectively. See: +# https://lists.fedoraproject.org/archives/list/devel@lists.fedoraproject.org/thread/DRHT5YTPK4WWVGL3GIN5BF2IKX2ODHZ3/ +%ifarch riscv64 +for d in %{buildroot}%{_libdir} %{buildroot}/%{_lib} \ + %{buildroot}%{_datadir}/gdb/auto-load/%{_prefix}/%{_lib} \ + %{buildroot}%{_prefix}/include/c++/%{gcc_major}/%{gcc_target_platform}/%{_lib}; do + mkdir -p $d + (cd $d && ln -sf . lp64d) +done +%endif + cd obj-%{gcc_target_platform} @@ -1083,7 +856,7 @@ mkdir -p %{buildroot}/%{_lib} mv -f %{buildroot}%{_prefix}/%{_lib}/libgcc_s.so.1 %{buildroot}/%{_lib}/libgcc_s-%{gcc_major}-%{DATE}.so.1 chmod 755 %{buildroot}/%{_lib}/libgcc_s-%{gcc_major}-%{DATE}.so.1 ln -sf libgcc_s-%{gcc_major}-%{DATE}.so.1 %{buildroot}/%{_lib}/libgcc_s.so.1 -%ifarch %{ix86} x86_64 ppc ppc64 ppc64p7 ppc64le %{arm} +%ifarch %{ix86} x86_64 ppc ppc64 ppc64p7 ppc64le %{arm} aarch64 riscv64 rm -f $FULLPATH/libgcc_s.so echo '/* GNU ld script Use the shared library, but some functions are only in @@ -1152,14 +925,14 @@ ln -sf ../../../libstdc++.so.6.*[0-9] libstdc++.so ln -sf ../../../libgfortran.so.5.* libgfortran.so ln -sf ../../../libgomp.so.1.* libgomp.so %if %{build_go} -ln -sf ../../../libgo.so.14.* libgo.so +ln -sf ../../../libgo.so.16.* libgo.so %endif %if %{build_libquadmath} ln -sf ../../../libquadmath.so.0.* libquadmath.so %endif %if %{build_d} -ln -sf ../../../libgdruntime.so.76.* libgdruntime.so -ln -sf ../../../libgphobos.so.76.* libgphobos.so +ln -sf ../../../libgdruntime.so.1.* libgdruntime.so +ln -sf ../../../libgphobos.so.1.* libgphobos.so %endif %if %{build_libitm} ln -sf ../../../libitm.so.1.* libitm.so @@ -1168,7 +941,7 @@ ln -sf ../../../libitm.so.1.* libitm.so ln -sf ../../../libatomic.so.1.* libatomic.so %endif %if %{build_libasan} -ln -sf ../../../libasan.so.5.* libasan.so +ln -sf ../../../libasan.so.6.* libasan.so mv ../../../libasan_preinit.o libasan_preinit.o %endif %if %{build_libubsan} @@ -1182,14 +955,14 @@ ln -sf ../../../../%{_lib}/libstdc++.so.6.*[0-9] libstdc++.so ln -sf ../../../../%{_lib}/libgfortran.so.5.* libgfortran.so ln -sf ../../../../%{_lib}/libgomp.so.1.* libgomp.so %if %{build_go} -ln -sf ../../../../%{_lib}/libgo.so.14.* libgo.so +ln -sf ../../../../%{_lib}/libgo.so.16.* libgo.so %endif %if %{build_libquadmath} ln -sf ../../../../%{_lib}/libquadmath.so.0.* libquadmath.so %endif %if %{build_d} -ln -sf ../../../../%{_lib}/libgdruntime.so.76.* libgdruntime.so -ln -sf ../../../../%{_lib}/libgphobos.so.76.* libgphobos.so +ln -sf ../../../../%{_lib}/libgdruntime.so.1.* libgdruntime.so +ln -sf ../../../../%{_lib}/libgphobos.so.1.* libgphobos.so %endif %if %{build_libitm} ln -sf ../../../../%{_lib}/libitm.so.1.* libitm.so @@ -1198,7 +971,7 @@ ln -sf ../../../../%{_lib}/libitm.so.1.* libitm.so ln -sf ../../../../%{_lib}/libatomic.so.1.* libatomic.so %endif %if %{build_libasan} -ln -sf ../../../../%{_lib}/libasan.so.5.* libasan.so +ln -sf ../../../../%{_lib}/libasan.so.6.* libasan.so mv ../../../../%{_lib}/libasan_preinit.o libasan_preinit.o %endif %if %{build_libubsan} @@ -1302,8 +1075,8 @@ ln -sf ../`echo ../../../../lib/libgfortran.so.5.* | sed s~/lib/~/lib64/~` 64/li ln -sf ../`echo ../../../../lib/libgomp.so.1.* | sed s~/lib/~/lib64/~` 64/libgomp.so %if %{build_go} rm -f libgo.so -echo 'INPUT ( %{_prefix}/lib/'`echo ../../../../lib/libgo.so.14.* | sed 's,^.*libg,libg,'`' )' > libgo.so -echo 'INPUT ( %{_prefix}/lib64/'`echo ../../../../lib/libgo.so.14.* | sed 's,^.*libg,libg,'`' )' > 64/libgo.so +echo 'INPUT ( %{_prefix}/lib/'`echo ../../../../lib/libgo.so.16.* | sed 's,^.*libg,libg,'`' )' > libgo.so +echo 'INPUT ( %{_prefix}/lib64/'`echo ../../../../lib/libgo.so.16.* | sed 's,^.*libg,libg,'`' )' > 64/libgo.so %endif %if %{build_libquadmath} rm -f libquadmath.so @@ -1312,10 +1085,10 @@ echo 'INPUT ( %{_prefix}/lib64/'`echo ../../../../lib/libquadmath.so.0.* | sed ' %endif %if %{build_d} rm -f libgdruntime.so libgphobos.so -echo 'INPUT ( %{_prefix}/lib/'`echo ../../../../lib/libgdruntime.so.76.* | sed 's,^.*libg,libg,'`' )' > libgdruntime.so -echo 'INPUT ( %{_prefix}/lib64/'`echo ../../../../lib/libgdruntime.so.76.* | sed 's,^.*libg,libg,'`' )' > 64/libgdruntime.so -echo 'INPUT ( %{_prefix}/lib/'`echo ../../../../lib/libgphobos.so.76.* | sed 's,^.*libg,libg,'`' )' > libgphobos.so -echo 'INPUT ( %{_prefix}/lib64/'`echo ../../../../lib/libgphobos.so.76.* | sed 's,^.*libg,libg,'`' )' > 64/libgphobos.so +echo 'INPUT ( %{_prefix}/lib/'`echo ../../../../lib/libgdruntime.so.1.* | sed 's,^.*libg,libg,'`' )' > libgdruntime.so +echo 'INPUT ( %{_prefix}/lib64/'`echo ../../../../lib/libgdruntime.so.1.* | sed 's,^.*libg,libg,'`' )' > 64/libgdruntime.so +echo 'INPUT ( %{_prefix}/lib/'`echo ../../../../lib/libgphobos.so.1.* | sed 's,^.*libg,libg,'`' )' > libgphobos.so +echo 'INPUT ( %{_prefix}/lib64/'`echo ../../../../lib/libgphobos.so.1.* | sed 's,^.*libg,libg,'`' )' > 64/libgphobos.so %endif %if %{build_libitm} rm -f libitm.so @@ -1329,8 +1102,8 @@ echo 'INPUT ( %{_prefix}/lib64/'`echo ../../../../lib/libatomic.so.1.* | sed 's, %endif %if %{build_libasan} rm -f libasan.so -echo 'INPUT ( %{_prefix}/lib/'`echo ../../../../lib/libasan.so.5.* | sed 's,^.*liba,liba,'`' )' > libasan.so -echo 'INPUT ( %{_prefix}/lib64/'`echo ../../../../lib/libasan.so.5.* | sed 's,^.*liba,liba,'`' )' > 64/libasan.so +echo 'INPUT ( %{_prefix}/lib/'`echo ../../../../lib/libasan.so.6.* | sed 's,^.*liba,liba,'`' )' > libasan.so +echo 'INPUT ( %{_prefix}/lib64/'`echo ../../../../lib/libasan.so.6.* | sed 's,^.*liba,liba,'`' )' > 64/libasan.so mv ../../../../lib64/libasan_preinit.o 64/libasan_preinit.o %endif %if %{build_libubsan} @@ -1401,8 +1174,8 @@ ln -sf ../`echo ../../../../lib64/libgfortran.so.5.* | sed s~/../lib64/~/~` 32/l ln -sf ../`echo ../../../../lib64/libgomp.so.1.* | sed s~/../lib64/~/~` 32/libgomp.so %if %{build_go} rm -f libgo.so -echo 'INPUT ( %{_prefix}/lib64/'`echo ../../../../lib64/libgo.so.14.* | sed 's,^.*libg,libg,'`' )' > libgo.so -echo 'INPUT ( %{_prefix}/lib/'`echo ../../../../lib64/libgo.so.14.* | sed 's,^.*libg,libg,'`' )' > 32/libgo.so +echo 'INPUT ( %{_prefix}/lib64/'`echo ../../../../lib64/libgo.so.16.* | sed 's,^.*libg,libg,'`' )' > libgo.so +echo 'INPUT ( %{_prefix}/lib/'`echo ../../../../lib64/libgo.so.16.* | sed 's,^.*libg,libg,'`' )' > 32/libgo.so %endif %if %{build_libquadmath} rm -f libquadmath.so @@ -1411,10 +1184,10 @@ echo 'INPUT ( %{_prefix}/lib/'`echo ../../../../lib64/libquadmath.so.0.* | sed ' %endif %if %{build_d} rm -f libgdruntime.so libgphobos.so -echo 'INPUT ( %{_prefix}/lib64/'`echo ../../../../lib64/libgdruntime.so.76.* | sed 's,^.*libg,libg,'`' )' > libgdruntime.so -echo 'INPUT ( %{_prefix}/lib/'`echo ../../../../lib64/libgdruntime.so.76.* | sed 's,^.*libg,libg,'`' )' > 32/libgdruntime.so -echo 'INPUT ( %{_prefix}/lib64/'`echo ../../../../lib64/libgphobos.so.76.* | sed 's,^.*libg,libg,'`' )' > libgphobos.so -echo 'INPUT ( %{_prefix}/lib/'`echo ../../../../lib64/libgphobos.so.76.* | sed 's,^.*libg,libg,'`' )' > 32/libgphobos.so +echo 'INPUT ( %{_prefix}/lib64/'`echo ../../../../lib64/libgdruntime.so.1.* | sed 's,^.*libg,libg,'`' )' > libgdruntime.so +echo 'INPUT ( %{_prefix}/lib/'`echo ../../../../lib64/libgdruntime.so.1.* | sed 's,^.*libg,libg,'`' )' > 32/libgdruntime.so +echo 'INPUT ( %{_prefix}/lib64/'`echo ../../../../lib64/libgphobos.so.1.* | sed 's,^.*libg,libg,'`' )' > libgphobos.so +echo 'INPUT ( %{_prefix}/lib/'`echo ../../../../lib64/libgphobos.so.1.* | sed 's,^.*libg,libg,'`' )' > 32/libgphobos.so %endif %if %{build_libitm} rm -f libitm.so @@ -1428,8 +1201,8 @@ echo 'INPUT ( %{_prefix}/lib/'`echo ../../../../lib64/libatomic.so.1.* | sed 's, %endif %if %{build_libasan} rm -f libasan.so -echo 'INPUT ( %{_prefix}/lib64/'`echo ../../../../lib64/libasan.so.5.* | sed 's,^.*liba,liba,'`' )' > libasan.so -echo 'INPUT ( %{_prefix}/lib/'`echo ../../../../lib64/libasan.so.5.* | sed 's,^.*liba,liba,'`' )' > 32/libasan.so +echo 'INPUT ( %{_prefix}/lib64/'`echo ../../../../lib64/libasan.so.6.* | sed 's,^.*liba,liba,'`' )' > libasan.so +echo 'INPUT ( %{_prefix}/lib/'`echo ../../../../lib64/libasan.so.6.* | sed 's,^.*liba,liba,'`' )' > 32/libasan.so mv ../../../../lib/libasan_preinit.o 32/libasan_preinit.o %endif %if %{build_libubsan} @@ -1566,8 +1339,8 @@ chmod 755 %{buildroot}%{_prefix}/%{_lib}/libcc1.so.0.* chmod 755 %{buildroot}%{_prefix}/%{_lib}/libquadmath.so.0.* %endif %if %{build_d} -chmod 755 %{buildroot}%{_prefix}/%{_lib}/libgdruntime.so.76.* -chmod 755 %{buildroot}%{_prefix}/%{_lib}/libgphobos.so.76.* +chmod 755 %{buildroot}%{_prefix}/%{_lib}/libgdruntime.so.1.* +chmod 755 %{buildroot}%{_prefix}/%{_lib}/libgphobos.so.1.* %endif %if %{build_libitm} chmod 755 %{buildroot}%{_prefix}/%{_lib}/libitm.so.1.* @@ -1576,7 +1349,7 @@ chmod 755 %{buildroot}%{_prefix}/%{_lib}/libitm.so.1.* chmod 755 %{buildroot}%{_prefix}/%{_lib}/libatomic.so.1.* %endif %if %{build_libasan} -chmod 755 %{buildroot}%{_prefix}/%{_lib}/libasan.so.5.* +chmod 755 %{buildroot}%{_prefix}/%{_lib}/libasan.so.6.* %endif %if %{build_libubsan} chmod 755 %{buildroot}%{_prefix}/%{_lib}/libubsan.so.1.* @@ -1589,7 +1362,7 @@ chmod 755 %{buildroot}%{_prefix}/%{_lib}/liblsan.so.0.* %endif %if %{build_go} # Avoid stripping these libraries and binaries. -chmod 644 %{buildroot}%{_prefix}/%{_lib}/libgo.so.14.* +chmod 644 %{buildroot}%{_prefix}/%{_lib}/libgo.so.16.* chmod 644 %{buildroot}%{_prefix}/bin/go.gcc chmod 644 %{buildroot}%{_prefix}/bin/gofmt.gcc chmod 644 %{buildroot}%{_prefix}/libexec/gcc/%{gcc_target_platform}/%{gcc_major}/cgo @@ -1781,6 +1554,7 @@ end %{_prefix}/bin/gcc-ar %{_prefix}/bin/gcc-nm %{_prefix}/bin/gcc-ranlib +%{_prefix}/bin/lto-dump %ifarch ppc %{_prefix}/bin/%{_target_platform}-gcc %endif @@ -1796,6 +1570,7 @@ end %{_mandir}/man1/gcov.1* %{_mandir}/man1/gcov-tool.1* %{_mandir}/man1/gcov-dump.1* +%{_mandir}/man1/lto-dump.1* %{_infodir}/gcc* %dir %{_prefix}/lib/gcc %dir %{_prefix}/lib/gcc/%{gcc_target_platform} @@ -1820,13 +1595,13 @@ end %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/unwind.h %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/omp.h %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/openacc.h +%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/acc_prof.h %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/stdint.h %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/stdint-gcc.h %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/stdalign.h %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/stdnoreturn.h %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/stdatomic.h %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/gcov.h -%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/simdmath.h %ifarch %{ix86} x86_64 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/mmintrin.h %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/xmmintrin.h @@ -1906,6 +1681,11 @@ end %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/movdirintrin.h %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/waitpkgintrin.h %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/cldemoteintrin.h +%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/avx512bf16vlintrin.h +%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/avx512bf16intrin.h +%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/enqcmdintrin.h +%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/avx512vp2intersectintrin.h +%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/avx512vp2intersectvlintrin.h %endif %ifarch ia64 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/ia64intrin.h @@ -2079,7 +1859,7 @@ end %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/liblsan_preinit.o %endif %{_prefix}/libexec/getconf/default -%doc gcc/README* rpm.doc/changelogs/gcc/ChangeLog* +%doc gcc/README* rpm.doc/changelogs/gcc/ChangeLog* %{!?_licensedir:%global license %%doc} %license gcc/COPYING* COPYING.RUNTIME @@ -2141,6 +1921,10 @@ end %dir %{_datadir}/gdb/auto-load %dir %{_datadir}/gdb/auto-load/%{_prefix} %dir %{_datadir}/gdb/auto-load/%{_prefix}/%{_lib}/ +# Package symlink to keep compatibility +%ifarch riscv64 +%{_datadir}/gdb/auto-load/%{_prefix}/%{_lib}/lp64d +%endif %{_datadir}/gdb/auto-load/%{_prefix}/%{_lib}/libstdc*gdb.py* %dir %{_prefix}/share/gcc-%{gcc_major} %dir %{_prefix}/share/gcc-%{gcc_major}/python @@ -2254,7 +2038,6 @@ end %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/finclude/ieee_arithmetic.mod %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/finclude/ieee_exceptions.mod %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/finclude/ieee_features.mod -%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/finclude/simdmath_f.h %{_prefix}/libexec/gcc/%{gcc_target_platform}/%{gcc_major}/f951 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/libgfortran.spec %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/libcaf_single.a @@ -2336,8 +2119,8 @@ end %doc rpm.doc/gdc/* %files -n libgphobos -%{_prefix}/%{_lib}/libgdruntime.so.76* -%{_prefix}/%{_lib}/libgphobos.so.76* +%{_prefix}/%{_lib}/libgdruntime.so.1* +%{_prefix}/%{_lib}/libgphobos.so.1* %doc rpm.doc/libphobos/* %files -n libgphobos-static @@ -2375,16 +2158,19 @@ end %dir %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/64 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/64/adainclude %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/64/adalib +%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/64/ada_target_properties %endif %ifarch %{multilib_64_archs} %dir %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/32 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/32/adainclude %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/32/adalib +%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/32/ada_target_properties %endif %ifarch sparcv9 sparc64 ppc ppc64 ppc64p7 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/adainclude %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/adalib %endif +%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/ada_target_properties %{_prefix}/libexec/gcc/%{gcc_target_platform}/%{gcc_major}/gnat1 %doc rpm.doc/changelogs/gcc/ada/ChangeLog* @@ -2539,7 +2325,7 @@ end %if %{build_libasan} %files -n libasan -%{_prefix}/%{_lib}/libasan.so.5* +%{_prefix}/%{_lib}/libasan.so.6* %files -n libasan-static %dir %{_prefix}/lib/gcc @@ -2659,7 +2445,7 @@ end %doc rpm.doc/go/* %files -n libgo -%attr(755,root,root) %{_prefix}/%{_lib}/libgo.so.14* +%attr(755,root,root) %{_prefix}/%{_lib}/libgo.so.16* %doc rpm.doc/libgo/* %files -n libgo-devel @@ -2732,210 +2518,8 @@ end %doc rpm.doc/changelogs/libcc1/ChangeLog* %changelog -* Mon Jun 28 2021 eastb233 - 9.3.1-20210628.21 -- complete-struct-reorg.patch: Revert modification - -* Fri May 28 2021 eastb233 - 9.3.1-20210528.20 -- gcc.spec: Disable bootstrap to reduce building time - -* Wed Apr 28 2021 eastb233 - 9.3.1-20210428.19 -- add-fp-model-options.patch: New file -- enable-simd-math.patch: Enable simd math library in C and Fortran -- fix-CTOR-vectorization.patch: New file -- fix-range-set-by-vectorization-on-niter-IVs.patch: New file -- medium-code-mode.patch: Fix bugs when used with fpic -- optabs-Dont-use-scalar-conversions-for-vectors.patch: New file -- PR92429-do-not-fold-when-updating.patch: New file -- redundant-loop-elimination.patch: Fix some programming specifications -- fix-ICE-in-vect.patch: New file -- Fix-type-mismatch-in-SLPed-constructors.patch: New file -- add-check-for-pressure-in-sche1.patch: New file -- revert-moutline-atomics.patch: New file -- fix-ICE-in-eliminate-stmt.patch: New file -- revise-type-before-build-MULT.patch: New file -- Simplify-X-C1-C2.patch: New file -- gcc.spec: Add new patches - -* Wed Apr 21 2021 eastb233 - 9.3.1-20210204.18 -- Type:bugfix -- ID:NA -- SUG:NA -- DESC:NA - -* Mon Mar 15 2021 tianwei - 9.3.1-20210204.17 -- Type:bugfix +* Tue Jul 27 2021 eastb233 - 10.3.0-20210727.1 +- Type:Init - ID:NA - SUG:NA -- DESC:add SP and FS for x86 - -* Thu Feb 04 2021 eastb233 - 9.3.1-20210204.16 -- Handle-POLY_INT_CSTs-in-declare_return_value.patch: New file -- Handle-POLY_INT_CST-in-copy_reference_ops_from_ref.patch: New file -- fix-strncpy-inline-warning.patch: New file - -* Fri Jan 15 2021 eastb233 - 9.3.1-20210115.15 -- Fix-interaction-between-aka-changes-and-DR1558.patch: New file - -* Mon Jan 04 2021 eastb233 - 9.3.1-20210104.14 -- gcc.spec: Pack arm_bf16.h and arm_sve.h in aarch64 port - -* Tue Dec 29 2020 eastb233 - 9.3.1-20201229.13 -- avoid-cycling-on-vertain-subreg-reloads.patch: Add patch source comment -- change-gcc-BASE-VER.patch: Likewise -- dont-generate-IF_THEN_ELSE.patch: Likewise -- fix-ICE-in-compute_live_loop_exits.patch: Likewise -- fix-ICE-in-eliminate_stmt.patch: Likewise -- fix-ICE-in-vect_create_epilog_for_reduction.patch: Likewise -- fix-ICE-in-vect_stmt_to_vectorize.patch: Likewise -- fix-ICE-in-verify_ssa.patch: Likewise -- fix-ICE-when-vectorizing-nested-cycles.patch: Likewise -- fix-cost-of-plus.patch: Likewise -- ipa-const-prop-self-recursion-bugfix.patch: Likewise -- simplify-removing-subregs.patch: Likewise -- medium-code-mode.patch: Bugfix -- fix-when-peeling-for-alignment.patch: Move to ... -- fix-PR-92351-When-peeling-for-alignment.patch: ... this -- AArch64-Fix-constraints-for-CPY-M.patch: New file -- Apply-maximum-nunits-for-BB-SLP.patch: New file -- Fix-EXTRACT_LAST_REDUCTION-segfault.patch: New file -- Fix-up-push_partial_def-little-endian-bitfield.patch: New file -- Fix-zero-masking-for-vcvtps2ph.patch: New file -- IRA-Handle-fully-tied-destinations.patch: New file -- SLP-VECT-Add-check-to-fix-96837.patch: New file -- aarch64-Fix-ash-lr-lshr-mode-3-expanders.patch: New file -- aarch64-Fix-bf16-and-matrix-g++-gfortran.patch: New file -- aarch64-Fix-mismatched-SVE-predicate-modes.patch: New file -- aarch64-fix-sve-acle-error.patch: New file -- adjust-vector-cost-and-move-EXTRACT_LAST_REDUCTION-costing.patch: New file -- bf16-and-matrix-characteristic.patch: New file -- fix-ICE-IPA-compare-VRP-types.patch: New file -- fix-ICE-in-affine-combination.patch: New file -- fix-ICE-in-pass-vect.patch: New file -- fix-ICE-in-vect_update_misalignment_for_peel.patch: New file -- fix-addlosymdi-ICE-in-pass-reload.patch: New file -- fix-an-ICE-in-vect_recog_mask_conversion_pattern.patch: New file -- fix-avx512vl-vcvttpd2dq-2-fail.patch: New file -- fix-issue499-add-nop-convert.patch: New file -- fix-issue604-ldist-dependency-fixup.patch: New file -- modulo-sched-Carefully-process-loop-counter-initiali.patch: New file -- re-PR-target-91124-gcc.target-i386-avx512vl-vpshldvd.patch: New file -- reduction-paths-with-unhandled-live-stmt.patch: New file -- redundant-loop-elimination.patch: New file -- sccvn-Improve-handling-of-load-masked-with-integer.patch: New file -- speed-up-DDG-analysis-and-fix-bootstrap-compare-debug.patch: New file -- store-merging-Consider-also-overlapping-stores-earlier.patch: New file -- tree-optimization-96920-another-ICE-when-vectorizing.patch: New file -- tree-optimization-97812-fix-range-query-in-VRP-asser.patch: New file -- vectorizable-comparison-Swap-operands-only-once.patch: New file -- x86-Fix-bf16-and-matrix.patch: New file -- gcc.spec: Add uploaded patch - -* Tue Sep 22 2020 eastb233 - 9.3.1-20200922.12 -- fix-when-peeling-for-alignment.patch: New file - -* Tue Sep 22 2020 eastb233 - 9.3.1-20200922.11 -- gcc.spec: Delete pkgversion - -* Mon Sep 21 2020 eastb233 - 9.3.1-20200921.10 -- complete-struct-reorg.patch: Fix secure coding -- ipa-struct-reorg-bugfix.patch: Likewise -- simplify-removing-subregs.patch: Likewise -- fix-ICE-in-eliminate_stmt.patch: New file -- fix-make-ifcvt-clean-up-dead-comparisons.patch: New file -- ipa-const-prop-buffer-overflow-bugfix.patch: New file -- gcc.spec: Add pkgversion, pack libquadmath library on aarch64 - -* Thu Sep 15 2020 eastb233 - 9.3.1-20200915.9 -- avoid-cycling-on-vertain-subreg-reloads.patch: New file -- fix-ICE-in-verify_target_availability.patch: New file -- fix-ICE-in-extract_constrain_insn.patch: New file -- fix-ICE-vect_slp_analyze_node_operations.patch: New file -- ipa-const-prop-null-point-check-bugfix.patch: New file - -* Thu Sep 15 2020 huanghaitao - 9.3.1-20200911.8 -- revert patches to fix build errors - -* Fri Sep 11 2020 eastb233 - 9.3.1-20200911.7 -- avoid-cycling-on-vertain-subreg-reloads.patch: New file -- fix-ICE-in-verify_target_availability.patch: New file -- fix-ICE-in-extract_constrain_insn.patch: New file -- fix-ICE-vect_slp_analyze_node_operations.patch: New file -- ipa-const-prop-null-point-check-bugfix.patch: New file - -* Fri Sep 11 2020 eastb233 - 9.3.1-20200911.6 -- fix-ICE-during-GIMPLE-pass-dse.patch: Add test case - -* Wed Sep 09 2020 jdkboy - 9.3.1-20200909.5 -- add backport-fix-ICE-during-GIMPLE-pass-dse.patch - -* Sat Sep 05 2020 eastb233 - 9.3.1-20200905.4 -- fix-avoid-bogus-uninit-warning-with-store-motion.patch: New file - -* Mon Aug 28 2020 eastb233 - 9.3.1-20200828.4 - - Add add-checks-to-avoid-spoiling-if-conversion.patch - - Add add-option-fallow-store-data-races.patch - - Add complete-struct-reorg.patch - - Add cse-in-vectorization.patch - - Add enable-simd-math.patch - - Add fix-ICE-avoid-issueing-loads-in-SM-when-possible.patch - - Add fix-ICE-in-compute_live_loop_exits.patch - - Add fix-ICE-in-copy_reference_ops_from_ref.patch - - Add fix-ICE-in-declare-return-variable.patch - - Add fix-ICE-in-exact_div.patch - - Add fix-ICE-in-gimple_op.patch - - Add fix-ICE-in-model_update_limit_points_in_group.patch - - Add fix-ICE-in-reload.patch - - Add fix-ICE-in-store_constructor.patch - - Add fix-ICE-in-vec.patch - - Add fix-ICE-in-vect_create_epilog_for_reduction.patch - - Add fix-ICE-in-vect_create_epilog_for_reduction_2.patch - - Add fix-ICE-in-vect_create_epilog_for_reduction_3.patch - - Add fix-ICE-in-vect_get_vec_def_for_stmt_copy.patch - - Add fix-ICE-in-vect_slp_analyze_node_operations.patch - - Add fix-ICE-in-vect_stmt_to_vectorize.patch - - Add fix-ICE-in-vect_transform_stmt.patch - - Add fix-ICE-in-vectorizable_condition.patch - - Add fix-ICE-in-verify_ssa.patch - - Add fix-ICE-statement-uses-released-SSA-name.patch - - Add fix-ICE-when-vectorizing-nested-cycles.patch - - Add fix-SSA-update-for-vectorizer-epilogue.patch - - Add fix-do-not-build-op.patch - - Add fix-load-eliding-in-SM.patch - - Add fix-wrong-vectorizer-code.patch - - Add generate-csel-for-arrayref.patch - - Add ipa-const-prop-self-recursion-bugfix.patch - - Add ipa-const-prop.patch - - Add ipa-struct-reorg-bugfix.patch - - Add ipa-struct-reorg.patch - - Add medium-code-mode.patch - - Add reduction-chain-slp-option.patch - - Add reductions-slp-enhancement.patch - - Add simplify-removing-subregs.patch - - Add tighten-range-for-generating-csel.patch - - Add vectorization-enhancement.patch - -* Mon Jun 29 2020 eastb233 - 9.3.1-20200629.3 -- gcc.spec: Change release version - -* Mon Jun 29 2020 eastb233 - 9.3.1-20200312.3 -- PR92303-Try-to-simplify-memory-subreg.patch: New file, fix ICE -- Fix-PR94185.patch: Likewise -- testsuite-Fix-pr94185.patch: Likewise -- gcc.spec: Add new patch - -* Fri May 22 2020 eastb233 - 9.3.1-20200312.2 -- gcc.spec: Modify Release to %{release}.2 - -* Wed May 20 2020 eastb233 - 9.3.1-20200312.h1 -- address-calculation-optimization-within-loop.patch: Modify testsuite -- generate-csel.patch: Modify testsuite -- change-gcc-BASE-VER.patch: New file, change GCC base version -- gcc.spec: Add new Patch, change GCC version to 9.3.1 - -* Tue Apr 28 2020 eastb233 - 9.3.0-20200312.h1 -- Type:modify -- Desc:modify patch name and gcc.spec - -* Sun Apr 26 2020 jdkboy - 9.3.0-20200312.h1 -- Type:init -- Desc:Init gcc 9.3.0 +- DESC:Init GCC 10.3.0 repository diff --git a/generate-csel-for-arrayref.patch b/generate-csel-for-arrayref.patch deleted file mode 100644 index c94311eef81f08f7992c9e65836102771fbb1768..0000000000000000000000000000000000000000 --- a/generate-csel-for-arrayref.patch +++ /dev/null @@ -1,218 +0,0 @@ -diff -uprN a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c ---- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c 2020-05-26 21:03:43.132721856 +0800 -+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c 2020-05-19 20:12:32.655794652 +0800 -@@ -9,4 +9,4 @@ unsigned test(unsigned k, unsigned b) { - return a[0]+a[1]; - } - --/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */ -+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */ -diff -uprN a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c ---- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c 2020-05-26 21:03:43.132721856 +0800 -+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c 2020-05-19 20:12:32.667794652 +0800 -@@ -11,4 +11,4 @@ unsigned test(unsigned k, unsigned b) { - return a[0]+a[1]; - } - --/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */ -+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */ -diff -uprN a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c ---- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c 2020-05-26 21:03:43.132721856 +0800 -+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c 2020-05-19 20:12:32.667794652 +0800 -@@ -13,4 +13,4 @@ int test(int b, int k) { - return a.data[0] + a.data[1]; - } - --/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */ -+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */ -diff -uprN a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c ---- a/gcc/tree-ssa-phiopt.c 2020-05-26 21:03:43.132721856 +0800 -+++ b/gcc/tree-ssa-phiopt.c 2020-05-26 21:02:02.872006469 +0800 -@@ -47,6 +47,7 @@ along with GCC; see the file COPYING3. - #include "params.h" - #include "case-cfn-macros.h" - #include "tree-eh.h" -+#include "inchash.h" - - static unsigned int tree_ssa_phiopt_worker (bool, bool, bool); - static bool two_value_replacement (basic_block, basic_block, edge, gphi *, -@@ -1984,6 +1985,18 @@ struct name_to_bb - basic_block bb; - }; - -+/* A hash-table of ARRAY_REF with a base of VAR_DECL and an offset of -+ SSA_NAME, and in which basic block it was seen, which would constitute -+ a no-trap region for same accessed. */ -+struct array_ref_to_bb -+{ -+ unsigned int ssa_name_ver; -+ unsigned int phase; -+ HOST_WIDE_INT size; -+ tree var_decl; -+ basic_block bb; -+}; -+ - /* Hashtable helpers. */ - - struct ssa_names_hasher : free_ptr_hash -@@ -1992,6 +2005,12 @@ struct ssa_names_hasher : free_ptr_hash - static inline bool equal (const name_to_bb *, const name_to_bb *); - }; - -+struct array_refs_hasher : free_ptr_hash -+{ -+ static inline hashval_t hash (const array_ref_to_bb *); -+ static inline bool equal (const array_ref_to_bb *, const array_ref_to_bb *); -+}; -+ - /* Used for quick clearing of the hash-table when we see calls. - Hash entries with phase < nt_call_phase are invalid. */ - static unsigned int nt_call_phase; -@@ -2005,6 +2024,16 @@ ssa_names_hasher::hash (const name_to_bb - ^ (n->offset << 6) ^ (n->size << 3); - } - -+inline hashval_t -+array_refs_hasher::hash (const array_ref_to_bb *n) -+{ -+ inchash::hash hstate (0); -+ hstate.add_int (n->ssa_name_ver); -+ hstate.add_hwi (n->size); -+ hstate.add_ptr (n->var_decl); -+ return hstate.end (); -+} -+ - /* The equality function of *P1 and *P2. */ - - inline bool -@@ -2016,11 +2045,21 @@ ssa_names_hasher::equal (const name_to_b - && n1->size == n2->size; - } - -+inline bool -+array_refs_hasher::equal (const array_ref_to_bb *n1, const array_ref_to_bb *n2) -+{ -+ return n1->ssa_name_ver == n2->ssa_name_ver -+ && n1->size == n2->size -+ && n1->var_decl == n2->var_decl; -+} -+ - class nontrapping_dom_walker : public dom_walker - { - public: - nontrapping_dom_walker (cdi_direction direction, hash_set *ps) -- : dom_walker (direction), m_nontrapping (ps), m_seen_ssa_names (128) {} -+ : dom_walker (direction), m_nontrapping (ps), -+ m_seen_ssa_names (128), m_seen_array_refs (128) -+ {} - - virtual edge before_dom_children (basic_block); - virtual void after_dom_children (basic_block); -@@ -2028,16 +2067,18 @@ public: - private: - - /* We see the expression EXP in basic block BB. If it's an interesting -- expression (an MEM_REF through an SSA_NAME) possibly insert the -- expression into the set NONTRAP or the hash table of seen expressions. -- STORE is true if this expression is on the LHS, otherwise it's on -- the RHS. */ -+ expression (an MEM_REF through an SSA_NAME or an ARRAY_REF with a base -+ of VAR_DECL and an offset of SSA_NAME) possibly insert the expression -+ into the set NONTRAP or the hash table of seen expressions. STORE -+ is true if this expression is on the LHS, otherwise it's on the RHS. */ - void add_or_mark_expr (basic_block, tree, bool); -+ void add_or_mark_array_ref (basic_block, tree); - - hash_set *m_nontrapping; - - /* The hash table for remembering what we've seen. */ - hash_table m_seen_ssa_names; -+ hash_table m_seen_array_refs; - }; - - /* Called by walk_dominator_tree, when entering the block BB. */ -@@ -2071,7 +2112,9 @@ nontrapping_dom_walker::before_dom_child - else if (gimple_assign_single_p (stmt) && !gimple_has_volatile_ops (stmt)) - { - add_or_mark_expr (bb, gimple_assign_lhs (stmt), true); -+ add_or_mark_array_ref (bb, gimple_assign_lhs (stmt)); - add_or_mark_expr (bb, gimple_assign_rhs1 (stmt), false); -+ add_or_mark_array_ref (bb, gimple_assign_rhs1 (stmt)); - } - } - return NULL; -@@ -2148,6 +2191,74 @@ nontrapping_dom_walker::add_or_mark_expr - } - } - } -+} -+ -+/* We see the expression EXP in basic block BB. If it's an interesting -+ expression (an ARRAY_REF with a base of VAR_DECL and an offset of -+ SSA_NAME) possibly insert the expression into the set NONTRAP or the -+ hash table of seen expressions. */ -+void -+nontrapping_dom_walker::add_or_mark_array_ref (basic_block bb, tree exp) -+{ -+ if (TREE_CODE (exp) == ARRAY_REF -+ && TREE_CODE (TREE_OPERAND (exp, 1)) == SSA_NAME -+ && int_size_in_bytes (TREE_TYPE (exp)) > 0) -+ { -+ HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp)); -+ tree base = get_base_address (exp); -+ /* if BASE is a local variable without address-taken, which can't be -+ read-only, a dominating load can constitute a no-trap region for -+ a store as well. */ -+ if (TREE_CODE (base) == VAR_DECL -+ && auto_var_p (base) && !TREE_ADDRESSABLE (base)) -+ { -+ struct array_ref_to_bb array_map; -+ basic_block found_array_bb = 0; -+ -+ /* Try to find the last seen ARRAY_REF with the same base and -+ offset, which can trap. */ -+ array_map.ssa_name_ver = SSA_NAME_VERSION (TREE_OPERAND (exp, 1)); -+ array_map.phase = 0; -+ array_map.bb = 0; -+ array_map.size = size; -+ array_map.var_decl = base; -+ -+ array_ref_to_bb **slot -+ = m_seen_array_refs.find_slot (&array_map, INSERT); -+ struct array_ref_to_bb *a2bb = *slot; -+ if (a2bb != NULL && a2bb->phase >= nt_call_phase) -+ { -+ found_array_bb = a2bb->bb; -+ } -+ -+ /* If we've found a trapping MEM_REF, _and_ it dominates EXP -+ (it's in a basic block on the path from us to the dominator root) -+ then we can't trap. */ -+ if (found_array_bb && (((size_t)found_array_bb->aux) & 1) == 1) -+ { -+ m_nontrapping->add (exp); -+ } -+ else -+ { -+ /* EXP might trap, so insert it into the hash table. */ -+ if (a2bb != NULL) -+ { -+ a2bb->phase = nt_call_phase; -+ a2bb->bb = bb; -+ } -+ else -+ { -+ a2bb = XNEW (struct array_ref_to_bb); -+ a2bb->ssa_name_ver = SSA_NAME_VERSION (TREE_OPERAND (exp, 1)); -+ a2bb->phase = nt_call_phase; -+ a2bb->bb = bb; -+ a2bb->size = size; -+ a2bb->var_decl = base; -+ *slot = a2bb; -+ } -+ } -+ } -+ } - } - - /* This is the entry point of gathering non trapping memory accesses. diff --git a/generate-csel.patch b/generate-csel.patch deleted file mode 100644 index 3aaf2616913ee8f001d52d0977994f8d56012784..0000000000000000000000000000000000000000 --- a/generate-csel.patch +++ /dev/null @@ -1,187 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-re-PR-tree-optimization-89430-A-missing-ifcvt-optimi.patch -b9ef6a2e04bfd01329902781818ef80c52cd8b97 - -diff -uprN a/gcc/testsuite/gcc.dg/graphite/scop-21.c b/gcc/testsuite/gcc.dg/graphite/scop-21.c ---- a/gcc/testsuite/gcc.dg/graphite/scop-21.c -+++ b/gcc/testsuite/gcc.dg/graphite/scop-21.c -@@ -30,5 +30,4 @@ int test () - - return a[20]; - } --/* XFAILed by the fix for PR86865. */ --/* { dg-final { scan-tree-dump-times "number of SCoPs: 1" 1 "graphite" { xfail *-*-* } } } */ -+/* { dg-final { scan-tree-dump-times "number of SCoPs: 1" 1 "graphite" } } */ -diff -uprN a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c -new file mode 100644 ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c -@@ -0,0 +1,12 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -fdump-tree-cselim-details" } */ -+ -+unsigned test(unsigned k, unsigned b) { -+ unsigned a[2]; -+ if (b < a[k]) { -+ a[k] = b; -+ } -+ return a[0]+a[1]; -+} -+ -+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */ -diff -uprN a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c -new file mode 100644 ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c -@@ -0,0 +1,14 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -fdump-tree-cselim-details" } */ -+ -+int c; -+unsigned test(unsigned k, unsigned b) { -+ unsigned a[2]; -+ a[k] = c; -+ if (b < a[k]) { -+ a[k] = b; -+ } -+ return a[0]+a[1]; -+} -+ -+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */ -diff -uprN a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-3.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-3.c -new file mode 100644 ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-3.c -@@ -0,0 +1,12 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -fdump-tree-cselim-details" } */ -+ -+unsigned a[2]; -+unsigned test(unsigned k, unsigned b) { -+ if (b < a[k]) { -+ a[k] = b; -+ } -+ return a[0]+a[1]; -+} -+ -+/* { dg-final { scan-tree-dump-not "Conditional store replacement" "cselim" } } */ -diff -uprN a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-4.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-4.c -new file mode 100644 ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-4.c -@@ -0,0 +1,14 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -fdump-tree-cselim-details" } */ -+ -+int *p; -+unsigned test(unsigned k, unsigned b) { -+ unsigned a[2]; -+ p = a; -+ if (b < a[k]) { -+ a[k] = b; -+ } -+ return a[0]+a[1]; -+} -+ -+/* { dg-final { scan-tree-dump-not "Conditional store replacement" "cselim" } } */ -diff -uprN a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c -new file mode 100644 ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c -@@ -0,0 +1,16 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -fdump-tree-cselim-details" } */ -+ -+int test(int b, int k) { -+ struct { -+ int data[2]; -+ } a; -+ -+ if (b < a.data[k]) { -+ a.data[k] = b; -+ } -+ -+ return a.data[0] + a.data[1]; -+} -+ -+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */ -diff -uprN a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c -new file mode 100644 ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c -@@ -0,0 +1,19 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -fdump-tree-cselim-details" } */ -+ -+int test(int b, int k) { -+ typedef struct { -+ int x; -+ } SS; -+ struct { -+ SS data[2]; -+ } a; -+ -+ if (b < a.data[k].x) { -+ a.data[k].x = b; -+ } -+ -+ return a.data[0].x + a.data[1].x; -+} -+ -+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */ -diff -uprN a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c ---- a/gcc/tree-ssa-phiopt.c -+++ b/gcc/tree-ssa-phiopt.c -@@ -2196,7 +2196,8 @@ get_non_trapping (void) - - We check that MIDDLE_BB contains only one store, that that store - doesn't trap (not via NOTRAP, but via checking if an access to the same -- memory location dominates us) and that the store has a "simple" RHS. */ -+ memory location dominates us, or the store is to a local addressable -+ object) and that the store has a "simple" RHS. */ - - static bool - cond_store_replacement (basic_block middle_bb, basic_block join_bb, -@@ -2218,8 +2219,9 @@ cond_store_replacement (basic_block middle_bb, basic_block join_bb, - locus = gimple_location (assign); - lhs = gimple_assign_lhs (assign); - rhs = gimple_assign_rhs1 (assign); -- if (TREE_CODE (lhs) != MEM_REF -- || TREE_CODE (TREE_OPERAND (lhs, 0)) != SSA_NAME -+ if ((TREE_CODE (lhs) != MEM_REF -+ && TREE_CODE (lhs) != ARRAY_REF -+ && TREE_CODE (lhs) != COMPONENT_REF) - || !is_gimple_reg_type (TREE_TYPE (lhs))) - return false; - -@@ -2227,7 +2229,13 @@ cond_store_replacement (basic_block middle_bb, basic_block join_bb, - TREE_THIS_NOTRAP here, but in that case we also could move stores, - whose value is not available readily, which we want to avoid. */ - if (!nontrap->contains (lhs)) -- return false; -+ { -+ /* If LHS is a local variable without address-taken, we could -+ always safely move down the store. */ -+ tree base = get_base_address (lhs); -+ if (!auto_var_p (base) || TREE_ADDRESSABLE (base)) -+ return false; -+ } - - /* Now we've checked the constraints, so do the transformation: - 1) Remove the single store. */ -@@ -2280,6 +2288,14 @@ cond_store_replacement (basic_block middle_bb, basic_block join_bb, - else - gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT); - -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ { -+ fprintf (dump_file, "\nConditional store replacement happened!"); -+ fprintf (dump_file, "\nReplaced the store with a load."); -+ fprintf (dump_file, "\nInserted a new PHI statement in joint block:\n"); -+ print_gimple_stmt (dump_file, new_stmt, 0, TDF_VOPS|TDF_MEMSYMS); -+ } -+ - return true; - } diff --git a/ipa-const-prop-buffer-overflow-bugfix.patch b/ipa-const-prop-buffer-overflow-bugfix.patch deleted file mode 100644 index 69e20c7aaf4afa72aec38033e0da3bac705c8a99..0000000000000000000000000000000000000000 --- a/ipa-const-prop-buffer-overflow-bugfix.patch +++ /dev/null @@ -1,119 +0,0 @@ -This patch is to solve issue409, which merge following 3 commits with some style fix - -commit 9505acd8501e6c79bc4fa9ed9f1ee174462601d1 -Author: Richard Biener -Date: Wed Jul 17 09:35:04 2019 +0000 - - re PR tree-optimization/91180 (wrong code at -O and above with __builtin_memset()) - - 2019-07-17 Richard Biener - - PR tree-optimization/91180 - * tree-ssa-sccvn.c (vn_reference_lookup_3): Fix offset - computation for memset partial defs. - - * gcc.dg/torture/pr91180.c: New testcase. - - From-SVN: r273548 - -commit 6b68f00d4c2b375dad66bd6e72c01c309b4085c5 -Author: Richard Biener -Date: Fri Jul 19 16:19:39 2019 +0000 - - re PR tree-optimization/91211 (wrong code with __builtin_memset() and __builtin_memcpy() at -O1 and above) - - 2019-07-19 Richard Biener - - PR tree-optimization/91211 - * tree-ssa-sccvn.c (vn_walk_cb_data::push_partial_def): Fix - memset encoding size. - - * gcc.dg/torture/pr91211.c: New testcase. - - From-SVN: r273605 - -commit 599331c858294dec6ac94400e63d275c4836607f -Author: Richard Biener -Date: Thu Jul 25 06:57:46 2019 +0000 - - re PR tree-optimization/91236 (ICE in walk_non_aliased_vuses at gcc/tree-ssa-alias.c:3395 on aarch64) - - 2019-07-25 Richard Biener - - PR tree-optimization/91236 - * tree-ssa-sccvn.c (vn_walk_cb_data::push_partial_def): Fix - size of CONSTRUCTOR write. Fix buffer size we pass to - native_encode_expr. - - From-SVN: r273787 - -diff -Nurp a/gcc/testsuite/gcc.dg/torture/pr91180.c b/gcc/testsuite/gcc.dg/torture/pr91180.c ---- a/gcc/testsuite/gcc.dg/torture/pr91180.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/torture/pr91180.c 2020-09-15 20:52:58.796000000 +0800 -@@ -0,0 +1,13 @@ -+/* { dg-do run } */ -+ -+int -+main () -+{ -+#if __SIZEOF_INT__ == 4 -+ unsigned x = 0xffffffff; -+ __builtin_memset (1 + (char *) &x, 0, 2); -+ if (x != 0xff0000ff) -+ __builtin_abort (); -+#endif -+ return 0; -+} -diff -Nurp a/gcc/testsuite/gcc.dg/torture/pr91211.c b/gcc/testsuite/gcc.dg/torture/pr91211.c ---- a/gcc/testsuite/gcc.dg/torture/pr91211.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/torture/pr91211.c 2020-09-15 20:52:43.932000000 +0800 -@@ -0,0 +1,19 @@ -+/* { dg-do run } */ -+ -+typedef __UINT32_TYPE__ u32; -+ -+int -+main (void) -+{ -+ u32 b = 0x027C5902; -+ u32 a = 0; -+ __builtin_memset (1 + (char *) &b, 0, 2); -+ __builtin_memcpy (&a, 2 + (char *) &b, 2); -+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ -+ if (a != 0x00000200) -+#else -+ if (a != 0x00020000) -+#endif -+ __builtin_abort(); -+ return 0; -+} -diff -Nurp a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c ---- a/gcc/tree-ssa-sccvn.c 2020-09-14 16:44:05.476000000 +0800 -+++ b/gcc/tree-ssa-sccvn.c 2020-09-16 09:29:22.520000000 +0800 -@@ -1840,12 +1840,15 @@ vn_walk_cb_data::push_partial_def (const - if (TREE_CODE (pd.rhs) == CONSTRUCTOR) - /* Empty CONSTRUCTOR. */ - memset (buffer + MAX (0, pd.offset), -- 0, MIN ((HOST_WIDE_INT)sizeof (buffer), pd.size)); -+ 0, MIN ((HOST_WIDE_INT)sizeof (buffer) -+ - MAX (0, pd.offset), -+ pd.size + MIN (0, pd.offset))); - else - { - len = native_encode_expr (pd.rhs, - buffer + MAX (0, pd.offset), -- sizeof (buffer - MAX (0, pd.offset)), -+ sizeof (buffer) -+ - MAX (0, pd.offset), - MAX (0, -pd.offset)); - if (len <= 0 - || len < (pd.size - MAX (0, -pd.offset))) -@@ -2461,7 +2464,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree - { - pd_data pd; - pd.rhs = build_constructor (NULL_TREE, NULL); -- pd.offset = offset2i - offseti; -+ pd.offset = (offset2i - offseti) / BITS_PER_UNIT; - pd.size = leni; - return data->push_partial_def (pd, vuse, maxsizei); - } diff --git a/ipa-const-prop-null-point-check-bugfix.patch b/ipa-const-prop-null-point-check-bugfix.patch deleted file mode 100644 index 416400a2b46c8838e026ba20db8ec499dbb28cb7..0000000000000000000000000000000000000000 --- a/ipa-const-prop-null-point-check-bugfix.patch +++ /dev/null @@ -1,97 +0,0 @@ -commit 3c4fa8a8562d3788bb763ca5c8fb1563b8d4eb1a -Author: Martin Jambor -Date: Wed Nov 13 15:12:58 2019 +0100 - - Add a few missing checks that IPA_NODE_REF is not NULL (PR 92454) - - 2019-11-13 Jan Hubicka - Martin Jambor - - PR ipa/92454 - * ipa-cp.c (spread_undeadness): Check that IPA_NODE_REF exists. - (identify_dead_nodes): Likewise. - - testsuite/ - * g++.dg/ipa/pr92454.C: New test. - - From-SVN: r278142 - -diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c -index 54b9724998a..207d7c88bbd 100644 ---- a/gcc/ipa-cp.c -+++ b/gcc/ipa-cp.c -@@ -4979,7 +4979,7 @@ spread_undeadness (struct cgraph_node *node) - callee = cs->callee->function_symbol (NULL); - info = IPA_NODE_REF (callee); - -- if (info->node_dead) -+ if (info && info->node_dead) - { - info->node_dead = 0; - spread_undeadness (callee); -@@ -5017,18 +5017,19 @@ identify_dead_nodes (struct cgraph_node *node) - struct cgraph_node *v; - for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle) - if (v->local.local -+ && IPA_NODE_REF (v) - && !v->call_for_symbol_thunks_and_aliases - (has_undead_caller_from_outside_scc_p, NULL, true)) - IPA_NODE_REF (v)->node_dead = 1; - - for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle) -- if (!IPA_NODE_REF (v)->node_dead) -+ if (IPA_NODE_REF (v) && !IPA_NODE_REF (v)->node_dead) - spread_undeadness (v); - - if (dump_file && (dump_flags & TDF_DETAILS)) - { - for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle) -- if (IPA_NODE_REF (v)->node_dead) -+ if (IPA_NODE_REF (v) && IPA_NODE_REF (v)->node_dead) - fprintf (dump_file, " Marking node as dead: %s.\n", v->dump_name ()); - } - } -diff --git a/gcc/testsuite/g++.dg/ipa/pr92454.C b/gcc/testsuite/g++.dg/ipa/pr92454.C -new file mode 100644 -index 00000000000..de67c66aed0 ---- /dev/null -+++ b/gcc/testsuite/g++.dg/ipa/pr92454.C -@@ -0,0 +1,38 @@ -+/* Originally PR ipa/91969, options adjusted for PR ipa/92454 */ -+/* { dg-options "-O3 --param ipa-cp-eval-threshold=1" } */ -+ -+enum by -+{ -+}; -+class A -+{ -+public: -+ class B -+ { -+ public: -+ virtual void m_fn2 (by) = 0; -+ }; -+ virtual int m_fn1 (); -+ B *cf; -+}; -+by a; -+class C : A, A::B -+{ -+ void m_fn2 (by); -+}; -+void C::m_fn2 (by) { cf->m_fn2 (a); } -+ -+struct a -+{ -+ virtual ~a (); -+}; -+ -+struct b -+{ -+ virtual void d (...); -+}; -+ -+struct c : a, b -+{ -+ void d (...) {} -+}; diff --git a/ipa-const-prop-self-recursion-bugfix.patch b/ipa-const-prop-self-recursion-bugfix.patch deleted file mode 100644 index e407ff9d946ee269e009cd660926b0f4aedc585d..0000000000000000000000000000000000000000 --- a/ipa-const-prop-self-recursion-bugfix.patch +++ /dev/null @@ -1,188 +0,0 @@ -This backport contains 2 patchs from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-Find-matched-aggregate-lattice-for-self-recursive-CP.patch -709d7838e753bbb6f16e2ed88a118ed81c367040 - -0002-Do-not-propagate-self-dependent-value-PR-ipa-93763.patch -47772af10c00f7e1e95cd52557fc893dc602a420 - -diff -Nurp a/gcc/ipa-cp.c b/gcc/ipa-cp.c ---- a/gcc/ipa-cp.c 2020-05-23 16:16:58.032000000 +0800 -+++ b/gcc/ipa-cp.c 2020-05-22 18:03:41.980000000 +0800 -@@ -1766,8 +1766,8 @@ ipcp_lattice::add_value (valtyp - } - - /* Return true, if a ipcp_value VAL is orginated from parameter value of -- self-feeding recursive function by applying non-passthrough arithmetic -- transformation. */ -+ self-feeding recursive function via some kind of pass-through jump -+ function. */ - - static bool - self_recursively_generated_p (ipcp_value *val) -@@ -1778,19 +1778,36 @@ self_recursively_generated_p (ipcp_value - { - cgraph_edge *cs = src->cs; - -- if (!src->val || cs->caller != cs->callee->function_symbol () -- || src->val == val) -+ if (!src->val || cs->caller != cs->callee->function_symbol ()) - return false; - -+ if (src->val == val) -+ continue; -+ - if (!info) - info = IPA_NODE_REF (cs->caller); - - class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, - src->index); -- ipcp_lattice *src_lat = src->offset == -1 ? &plats->itself -- : plats->aggs; -+ ipcp_lattice *src_lat; - ipcp_value *src_val; - -+ if (src->offset == -1) -+ src_lat = &plats->itself; -+ else -+ { -+ struct ipcp_agg_lattice *src_aglat; -+ -+ for (src_aglat = plats->aggs; src_aglat; src_aglat = src_aglat->next) -+ if (src_aglat->offset == src->offset) -+ break; -+ -+ if (!src_aglat) -+ return false; -+ -+ src_lat = src_aglat; -+ } -+ - for (src_val = src_lat->values; src_val; src_val = src_val->next) - if (src_val == val) - break; -@@ -1887,6 +1904,8 @@ propagate_vals_across_arith_jfunc (cgrap - val_seeds.safe_push (src_val); - } - -+ gcc_assert ((int) val_seeds.length () -+ <= PARAM_VALUE (PARAM_IPA_CP_VALUE_LIST_SIZE)); - /* Recursively generate lattice values with a limited count. */ - FOR_EACH_VEC_ELT (val_seeds, i, src_val) - { -diff -Nurp a/gcc/testsuite/gcc.dg/ipa/ipa-clone-3.c b/gcc/testsuite/gcc.dg/ipa/ipa-clone-3.c ---- a/gcc/testsuite/gcc.dg/ipa/ipa-clone-3.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/ipa/ipa-clone-3.c 2020-05-22 17:55:24.036000000 +0800 -@@ -0,0 +1,42 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O3 -fdump-ipa-cp-details -fno-early-inlining --param ipa-cp-max-recursive-depth=8 --param ipa-cp-eval-threshold=1" } */ -+ -+struct V { -+ int f0; -+ int f1; -+}; -+ -+int data[100]; -+ -+int fn (); -+ -+int recur_fn (struct V * __restrict v) -+{ -+ int i = v->f0; -+ int j = v->f1; -+ struct V t; -+ -+ if (j > 100) -+ { -+ fn (); -+ return 1; -+ } -+ -+ data[i] = i; -+ -+ t.f0 = i - 2; -+ t.f1 = j + 1; -+ -+ recur_fn (&t); -+ -+ return i * j; -+} -+ -+int main () -+{ -+ struct V v = {1, 3}; -+ -+ return recur_fn (&v); -+} -+ -+/* { dg-final { scan-ipa-dump-times "Creating a specialized node of recur_fn/\[0-9\]*\\." 8 "cp" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/ipa/pr93763.c b/gcc/testsuite/gcc.dg/ipa/pr93763.c ---- a/gcc/testsuite/gcc.dg/ipa/pr93763.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/ipa/pr93763.c 2020-05-22 17:57:10.532000000 +0800 -@@ -0,0 +1,46 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O3" } */ -+ -+typedef struct a a; -+struct a { -+ a *b -+} d; -+e, k, ah, al; -+f(aa) { -+ if (aa & 1) -+ goto g; -+ f(aa | 2); -+g: -+ h(); -+} -+l() { -+ { -+ f(072); -+ i(e, d, 92); -+ } -+} -+ag() { -+ { i(e, d, 36); } -+} -+ai(a *m, a *n, unsigned aa) { -+ f(aa); -+ j(k, l, ah, 1); -+} -+j(int c, a m, int aj, int aa) { -+ int ak = aa; -+ { i(e, d, ak); } -+} -+i(int c, a *m, unsigned aa) { -+ { -+ { i(c, (*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*( -+*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*( -+*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*m).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b) -+.b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b) -+.b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b) -+.b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b) -+.b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b, 0); -+ } -+ } -+ int am = aa; -+ ai(ag, al, am); -+} -diff -Nurp a/gcc/testsuite/g++.dg/ipa/pr93763.C b/gcc/testsuite/g++.dg/ipa/pr93763.C ---- a/gcc/testsuite/g++.dg/ipa/pr93763.C 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/g++.dg/ipa/pr93763.C 2020-05-22 17:57:10.532000000 +0800 -@@ -0,0 +1,15 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O3" } */ -+ -+struct search_param { -+ int total; -+}; -+void search_trivial(search_param error_left) { -+ search_trivial(error_left); -+ search_param error_left2{error_left}; -+ error_left2.total--; -+ search_trivial(error_left2); -+} -+void search_algo_uni(search_param error_left) { search_trivial(error_left); } -+void search_algo(search_param error_left) { search_algo_uni(error_left); } -+int main() { search_algo({}); return 0; } diff --git a/ipa-const-prop.patch b/ipa-const-prop.patch deleted file mode 100644 index 7cad13f20f4a84741664738b45361bd84f0f570b..0000000000000000000000000000000000000000 --- a/ipa-const-prop.patch +++ /dev/null @@ -1,11040 +0,0 @@ -This backport contains 50 patchs from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -ipa-const-prop-2019-06-10-add-ignore-edge-func.patch: -commit 97e59627567757759b047479c75be2f238ea45c3 - -ipa-const-prop-2019-06-14-prop-by-ref-to-callee.patch: -commit 46771da57463c62f66af32e9189f1b6fb8bbe8c7 - -ipa-const-prop-2019-07-05-add-tbaa-para.patch: -ipa-const-prop-2019-07-05-add-tbaa-para-conflict-fix.patch -commit fb4697e30bd0cd4bda66932e21c183273a5d1e63 - -ipa-const-prop-2019-07-08-bugfix-drop-useless-instr.patch: -ipa-const-prop-2019-07-08-bugfix-drop-useless-instr-conflict-fix.patch -commit 38988cbf9ebaa96fb1e891a46aa063f0c298a2e2 - -ipa-const-prop-2019-07-09-ipa-cp-class-change.patch -ipa-const-prop-2019-07-09-ipa-fnsummary-class-change.patch -ipa-const-prop-2019-07-09-ipa-inline-analysis-class-change.patch -ipa-const-prop-2019-07-09-ipa-prop-class-change.patch -ipa-const-prop-2019-07-09-ipa-prop-class-change-conflic-fix.patch -ipa-const-prop-2019-07-09-ipa-predicate-class-change.patch -commit 99b1c316ec974a39bdd949f8559bb28861b69592 - -ipa-const-prop-2019-08-07-change-to-poly_64.patch: -commit 8600364582f24d2a3f227111c6a87b7d98561c69 - -ipa-const-prop-2019-08-12-bugfix-add-condition-fix.patch: -commit 52c9b7face987062527c612e0a65f084e43c85fd - -ipa-const-prop-2019-09-17-new-para-ipa-max-switch.patch: -commit 351e7c3b5fbd45bde3efb601f7fee9a31c4f2063 - -ipa-const-prop-2019-09-19-auto-switch-predicate.patch: -commit efe126563bb8d28cb3958423a735d0021e75702f - -ipa-const-prop-2019-10-03-generate-ipa-on-para-ref.patch: -commit 4307a485c39fd1c317d6cead2707a903052c4753 - -ipa-const-prop-2019-10-05-inline-size-para-change.patch: -commit 6c291ad828fcb5f01a1d2cb23f6078e9a6f958b9 - -ipa-const-prop-2019-10-10-bugfix-20040708-split-splay-tree.patch: -commit 6488759f404f3aff6642b005242a9c82a1c2cee2 - -ipa-const-prop-2019-10-23-bugfix-20040708-fix-uid-func.patch: -commit b5b6485f1cc54f21713b5b03c5d63d56839ca458 - -ipa-const-prop-2019-10-23-bugfix-20040708-fix-uid-func-2nd.patch: -commit 45012be1f5c7e6039e594bab41ebb94d89a9aca0 - -ipa-const-prop-2019-10-24-toggle-static-write.patch: -commit abebffc609506176f8ba3f64533e15ece49446c0 - -ipa-const-prop-2019-10-25-bugfix-empty-edge-ICE.patch: -commit 5a0236f8ca9d239bb62ef54c9273e6ca3f068f87 - -ipa-const-prop-2019-10-25-call-size-summary.patch: -ipa-const-prop-2019-10-25-call-size-summary-confict-fix.patch -commit f658ad3002a0afc8aa86d5646ee704921d969ebe - -ipa-const-prop-2019-10-27-bugfix-solve-LTO-ICE.patch: -commit b1e655646f5b0be3d146825c130690078a8601c3 - -ipa-const-prop-2019-10-27-do-not-move-jump.patch: -commit 051d8a5faa3b37b0dda84c8382174ee70d5b7992 - -ipa-const-prop-2019-10-27-drop-if-no-arg.patch: -commit a33c028eb38268b5084ebc4cc17a1cb64b3a838b - -ipa-const-prop-2019-10-27-update-sum-after-expand.patch: -commit a088d7b10f296dbd57bccbac1bfcf8abb207b034 - -ipa-const-prop-2019-10-30-remove-global.patch: -commit a62bfab5d2a332925fcf10c45b4c5d8ca499439d - -ipa-const-prop-2019-11-03-add-deplicate-form.patch: -commit ac6f2e594886e2209446114023ecdff96b0bd7c4 - -ipa-const-prop-2019-11-03-ipa-inline-analysis-conflict-fix.patch: -ipa-const-prop-2019-11-03-improve-efficiency-of-ipa-poly.patch: -commit 40a777e840f74dd5c19ea26c55d1248a335fd11b - -ipa-const-prop-2019-11-03-ipa-fnsummary-add-call-context.patch: -commit 1532500ecbe8dbf59bef498e46b447b3a6b0fa65 - -ipa-const-prop-2019-11-03-size-ahead-time.patch: -commit 360386c7ef1c3fa30de216b1d68ed6a27296fd80 - -ipa-const-prop-2019-11-04-ipa-inline-includes-ipa-utils.patch: -commit 2bc2379be5c98d34ecbb347b2abf059aa6d94499 - -ipa-const-prop-2019-11-09-add-ipacp-clone.patch: -commit 6cf67b62c8cda035dccaca2ae6ff94d560b37a6f - -ipa-const-prop-2019-11-09-call-nodeRef-on-func-sym.patch: -commit 2ee6e04aaecc856bced29711f9765660e0888994 - -ipa-const-prop-2019-11-13-bugfix-inline-check-before-flatten.patch: -commit 2895b172d56c355373b64517a3298a01a2f10ec0 - -ipa-const-prop-2019-11-13-bugfix-inline-empty-edge.patch: -commit 367c959f0303e11e0a6d875abba7d03c72686668 - -ipa-const-prop-2019-11-13-bugfix-inline-small-function.patch: -commit b914768c1968d924d77bbe3f4e707c6105f3682c - -ipa-const-prop-2019-11-13-bugfix-lto-ICE.patch: -commit d200a49f5c83fa0f2e7332aecf69b6ab4a51b052 - -ipa-const-prop-2019-11-13-fix-ipa-profile-indirect-call.patch: -commit 7b34a284cab5d533552c1df995a88f7167d243bd - -ipa-const-prop-2019-11-14-by-ref-const-prop.patch: -ipa-const-prop-2019-11-14-by-ref-const-prop-conflict-fix.patch -commit eb270950acbae6f70e3487a6e63a26c1294656b3 - -ipa-const-prop-2019-11-15-bugfix-segfault-with-null-top.patch: -commit 1c3c3f455021130c429f57b09ef39bc218bd7fff - -ipa-const-prop-2019-11-18-bugfix-ICE-null-edge.patch: -commit 8d890d37e0183735586c18f1f056deb5848617ca - -ipa-const-prop-2019-11-18-bug-fix-ICE.patch: -commit 8d890d37e0183735586c18f1f056deb5848617ca - -ipa-const-prop-2019-12-02-recusion-versioning.patch: -ipa-const-prop-2019-12-02-param-conflict-fix.patch -commit 9b14fc3326e087975653b1af8ac54114041cde51 - -The original of these commit can be found on - https://github.com/gcc-mirror/gcc - -Not all these commits are applied directly. If the commit node contains -code that affact other modules that unrelated to ipa constant propgation -optimization, the part that the optimization need is regrouped into -a small new patch, which usually named conflict-fix. - -diff -Nurp a/gcc/cgraphbuild.c b/gcc/cgraphbuild.c ---- a/gcc/cgraphbuild.c 2020-04-30 15:14:04.580000000 +0800 -+++ b/gcc/cgraphbuild.c 2020-04-30 15:14:56.584000000 +0800 -@@ -428,7 +428,7 @@ cgraph_edge::rebuild_edges (void) - node->record_stmt_references (gsi_stmt (gsi)); - } - record_eh_tables (node, cfun); -- gcc_assert (!node->global.inlined_to); -+ gcc_assert (!node->inlined_to); - return 0; - } - -diff -Nurp a/gcc/cgraph.c b/gcc/cgraph.c ---- a/gcc/cgraph.c 2020-04-30 15:14:04.576000000 +0800 -+++ b/gcc/cgraph.c 2020-04-30 15:14:56.584000000 +0800 -@@ -539,7 +539,7 @@ cgraph_node::get_create (tree decl) - { - cgraph_node *first_clone = cgraph_node::get (decl); - -- if (first_clone && !first_clone->global.inlined_to) -+ if (first_clone && !first_clone->inlined_to) - return first_clone; - - cgraph_node *node = cgraph_node::create (decl); -@@ -659,7 +659,7 @@ cgraph_node::get_for_asmname (tree asmna - node = node->next_sharing_asm_name) - { - cgraph_node *cn = dyn_cast (node); -- if (cn && !cn->global.inlined_to) -+ if (cn && !cn->inlined_to) - return cn; - } - return NULL; -@@ -1857,7 +1857,7 @@ cgraph_node::remove (void) - { - cgraph_node *n = cgraph_node::get (decl); - if (!n -- || (!n->clones && !n->clone_of && !n->global.inlined_to -+ || (!n->clones && !n->clone_of && !n->inlined_to - && ((symtab->global_info_ready || in_lto_p) - && (TREE_ASM_WRITTEN (n->decl) - || DECL_EXTERNAL (n->decl) -@@ -1888,7 +1888,7 @@ cgraph_node::mark_address_taken (void) - { - /* Indirect inlining can figure out that all uses of the address are - inlined. */ -- if (global.inlined_to) -+ if (inlined_to) - { - gcc_assert (cfun->after_inlining); - gcc_assert (callers->indirect_inlining_edge); -@@ -2012,10 +2012,10 @@ cgraph_node::dump (FILE *f) - - dump_base (f); - -- if (global.inlined_to) -+ if (inlined_to) - fprintf (f, " Function %s is inline copy in %s\n", - dump_name (), -- global.inlined_to->dump_name ()); -+ inlined_to->dump_name ()); - if (clone_of) - fprintf (f, " Clone of %s\n", clone_of->dump_asm_name ()); - if (symtab->function_flags_ready) -@@ -2159,7 +2159,7 @@ cgraph_node::dump (FILE *f) - if (dyn_cast (ref->referring)->count.initialized_p ()) - sum += dyn_cast (ref->referring)->count.ipa (); - -- if (global.inlined_to -+ if (inlined_to - || (symtab->state < EXPANSION - && ultimate_alias_target () == this && only_called_directly_p ())) - ok = !count.ipa ().differs_from_p (sum); -@@ -2259,14 +2259,14 @@ cgraph_node::get_availability (symtab_no - { - cgraph_node *cref = dyn_cast (ref); - if (cref) -- ref = cref->global.inlined_to; -+ ref = cref->inlined_to; - } - enum availability avail; - if (!analyzed) - avail = AVAIL_NOT_AVAILABLE; - else if (local.local) - avail = AVAIL_LOCAL; -- else if (global.inlined_to) -+ else if (inlined_to) - avail = AVAIL_AVAILABLE; - else if (transparent_alias) - ultimate_alias_target (&avail, ref); -@@ -2878,7 +2878,7 @@ bool - cgraph_node::will_be_removed_from_program_if_no_direct_calls_p - (bool will_inline) - { -- gcc_assert (!global.inlined_to); -+ gcc_assert (!inlined_to); - if (DECL_EXTERNAL (decl)) - return true; - -@@ -3065,7 +3065,7 @@ cgraph_edge::verify_corresponds_to_fndec - { - cgraph_node *node; - -- if (!decl || callee->global.inlined_to) -+ if (!decl || callee->inlined_to) - return false; - if (symtab->state == LTO_STREAMING) - return false; -@@ -3126,7 +3126,7 @@ cgraph_node::verify_node (void) - error ("cgraph count invalid"); - error_found = true; - } -- if (global.inlined_to && same_comdat_group) -+ if (inlined_to && same_comdat_group) - { - error ("inline clone in same comdat group list"); - error_found = true; -@@ -3136,17 +3136,17 @@ cgraph_node::verify_node (void) - error ("local symbols must be defined"); - error_found = true; - } -- if (global.inlined_to && externally_visible) -+ if (inlined_to && externally_visible) - { - error ("externally visible inline clone"); - error_found = true; - } -- if (global.inlined_to && address_taken) -+ if (inlined_to && address_taken) - { - error ("inline clone with address taken"); - error_found = true; - } -- if (global.inlined_to && force_output) -+ if (inlined_to && force_output) - { - error ("inline clone is forced to output"); - error_found = true; -@@ -3183,9 +3183,9 @@ cgraph_node::verify_node (void) - } - if (!e->inline_failed) - { -- if (global.inlined_to -- != (e->caller->global.inlined_to -- ? e->caller->global.inlined_to : e->caller)) -+ if (inlined_to -+ != (e->caller->inlined_to -+ ? e->caller->inlined_to : e->caller)) - { - error ("inlined_to pointer is wrong"); - error_found = true; -@@ -3197,7 +3197,7 @@ cgraph_node::verify_node (void) - } - } - else -- if (global.inlined_to) -+ if (inlined_to) - { - error ("inlined_to pointer set for noninline callers"); - error_found = true; -@@ -3208,7 +3208,7 @@ cgraph_node::verify_node (void) - if (e->verify_count ()) - error_found = true; - if (gimple_has_body_p (e->caller->decl) -- && !e->caller->global.inlined_to -+ && !e->caller->inlined_to - && !e->speculative - /* Optimized out calls are redirected to __builtin_unreachable. */ - && (e->count.nonzero_p () -@@ -3233,7 +3233,7 @@ cgraph_node::verify_node (void) - if (e->verify_count ()) - error_found = true; - if (gimple_has_body_p (e->caller->decl) -- && !e->caller->global.inlined_to -+ && !e->caller->inlined_to - && !e->speculative - && e->count.ipa_p () - && count -@@ -3250,12 +3250,12 @@ cgraph_node::verify_node (void) - error_found = true; - } - } -- if (!callers && global.inlined_to) -+ if (!callers && inlined_to) - { - error ("inlined_to pointer is set but no predecessors found"); - error_found = true; - } -- if (global.inlined_to == this) -+ if (inlined_to == this) - { - error ("inlined_to pointer refers to itself"); - error_found = true; -@@ -3344,7 +3344,7 @@ cgraph_node::verify_node (void) - error ("More than one edge out of thunk node"); - error_found = true; - } -- if (gimple_has_body_p (decl) && !global.inlined_to) -+ if (gimple_has_body_p (decl) && !inlined_to) - { - error ("Thunk is not supposed to have body"); - error_found = true; -@@ -3352,7 +3352,7 @@ cgraph_node::verify_node (void) - } - else if (analyzed && gimple_has_body_p (decl) - && !TREE_ASM_WRITTEN (decl) -- && (!DECL_EXTERNAL (decl) || global.inlined_to) -+ && (!DECL_EXTERNAL (decl) || inlined_to) - && !flag_wpa) - { - if (this_cfun->cfg) -@@ -3623,7 +3623,7 @@ cgraph_node::get_body (void) - early. - TODO: Materializing clones here will likely lead to smaller LTRANS - footprint. */ -- gcc_assert (!global.inlined_to && !clone_of); -+ gcc_assert (!inlined_to && !clone_of); - if (ipa_transforms_to_apply.exists ()) - { - opt_pass *saved_current_pass = current_pass; -@@ -3813,8 +3813,8 @@ cgraph_node::has_thunk_p (cgraph_node *n - sreal - cgraph_edge::sreal_frequency () - { -- return count.to_sreal_scale (caller->global.inlined_to -- ? caller->global.inlined_to->count -+ return count.to_sreal_scale (caller->inlined_to -+ ? caller->inlined_to->count - : caller->count); - } - -diff -Nurp a/gcc/cgraphclones.c b/gcc/cgraphclones.c ---- a/gcc/cgraphclones.c 2020-04-30 15:14:04.644000000 +0800 -+++ b/gcc/cgraphclones.c 2020-04-30 15:14:56.628000000 +0800 -@@ -458,8 +458,7 @@ cgraph_node::create_clone (tree new_decl - new_node->externally_visible = false; - new_node->no_reorder = no_reorder; - new_node->local.local = true; -- new_node->global = global; -- new_node->global.inlined_to = new_inlined_to; -+ new_node->inlined_to = new_inlined_to; - new_node->rtl = rtl; - new_node->frequency = frequency; - new_node->tp_first_run = tp_first_run; -@@ -671,6 +670,7 @@ cgraph_node::create_virtual_clone (vecipcp_clone = ipcp_clone; - new_node->clone.tree_map = tree_map; - if (!implicit_section) - new_node->set_section (get_section ()); -@@ -965,7 +965,7 @@ cgraph_node::create_version_clone (tree - new_version->externally_visible = false; - new_version->no_reorder = no_reorder; - new_version->local.local = new_version->definition; -- new_version->global = global; -+ new_version->inlined_to = inlined_to; - new_version->rtl = rtl; - new_version->count = count; - -diff -Nurp a/gcc/cgraph.h b/gcc/cgraph.h ---- a/gcc/cgraph.h 2020-04-30 15:14:04.624000000 +0800 -+++ b/gcc/cgraph.h 2020-04-30 15:14:56.628000000 +0800 -@@ -718,15 +718,6 @@ struct GTY(()) cgraph_local_info { - unsigned tm_may_enter_irr : 1; - }; - --/* Information about the function that needs to be computed globally -- once compilation is finished. Available only with -funit-at-a-time. */ -- --struct GTY(()) cgraph_global_info { -- /* For inline clones this points to the function they will be -- inlined into. */ -- cgraph_node *inlined_to; --}; -- - /* Represent which DECL tree (or reference to such tree) - will be replaced by another tree while versioning. */ - struct GTY(()) ipa_replace_map -@@ -959,7 +950,7 @@ public: - - If the new node is being inlined into another one, NEW_INLINED_TO should be - the outline function the new one is (even indirectly) inlined to. -- All hooks will see this in node's global.inlined_to, when invoked. -+ All hooks will see this in node's inlined_to, when invoked. - Can be NULL if the node is not inlined. SUFFIX is string that is appended - to the original name. */ - cgraph_node *create_clone (tree decl, profile_count count, -@@ -1420,7 +1411,11 @@ public: - vec GTY((skip)) ipa_transforms_to_apply; - - cgraph_local_info local; -- cgraph_global_info global; -+ -+ /* For inline clones this points to the function they will be -+ inlined into. */ -+ cgraph_node *inlined_to; -+ - struct cgraph_rtl_info *rtl; - cgraph_clone_info clone; - cgraph_thunk_info thunk; -@@ -1474,6 +1469,8 @@ public: - unsigned split_part : 1; - /* True if the function appears as possible target of indirect call. */ - unsigned indirect_call_target : 1; -+ /* True if this was a clone created by ipa-cp. */ -+ unsigned ipcp_clone : 1; - - private: - /* Unique id of the node. */ -@@ -2474,7 +2471,7 @@ symtab_node::real_symbol_p (void) - if (!is_a (this)) - return true; - cnode = dyn_cast (this); -- if (cnode->global.inlined_to) -+ if (cnode->inlined_to) - return false; - return true; - } -@@ -2497,13 +2494,13 @@ symtab_node::in_same_comdat_group_p (sym - - if (cgraph_node *cn = dyn_cast (target)) - { -- if (cn->global.inlined_to) -- source = cn->global.inlined_to; -+ if (cn->inlined_to) -+ source = cn->inlined_to; - } - if (cgraph_node *cn = dyn_cast (target)) - { -- if (cn->global.inlined_to) -- target = cn->global.inlined_to; -+ if (cn->inlined_to) -+ target = cn->inlined_to; - } - - return source->get_comdat_group () == target->get_comdat_group (); -@@ -2964,7 +2961,7 @@ struct GTY((for_user)) constant_descript - inline bool - cgraph_node::only_called_directly_or_aliased_p (void) - { -- gcc_assert (!global.inlined_to); -+ gcc_assert (!inlined_to); - return (!force_output && !address_taken - && !ifunc_resolver - && !used_from_other_partition -@@ -2981,7 +2978,7 @@ cgraph_node::only_called_directly_or_ali - inline bool - cgraph_node::can_remove_if_no_direct_calls_and_refs_p (void) - { -- gcc_checking_assert (!global.inlined_to); -+ gcc_checking_assert (!inlined_to); - /* Extern inlines can always go, we will use the external definition. */ - if (DECL_EXTERNAL (decl)) - return true; -@@ -3152,8 +3149,8 @@ inline bool - cgraph_edge::recursive_p (void) - { - cgraph_node *c = callee->ultimate_alias_target (); -- if (caller->global.inlined_to) -- return caller->global.inlined_to->decl == c->decl; -+ if (caller->inlined_to) -+ return caller->inlined_to->decl == c->decl; - else - return caller->decl == c->decl; - } -@@ -3190,8 +3187,8 @@ cgraph_edge::binds_to_current_def_p () - inline int - cgraph_edge::frequency () - { -- return count.to_cgraph_frequency (caller->global.inlined_to -- ? caller->global.inlined_to->count -+ return count.to_cgraph_frequency (caller->inlined_to -+ ? caller->inlined_to->count - : caller->count); - } - -@@ -3213,7 +3210,7 @@ inline void - cgraph_node::mark_force_output (void) - { - force_output = 1; -- gcc_checking_assert (!global.inlined_to); -+ gcc_checking_assert (!inlined_to); - } - - /* Return true if function should be optimized for size. */ -diff -Nurp a/gcc/cgraphunit.c b/gcc/cgraphunit.c ---- a/gcc/cgraphunit.c 2020-04-30 15:14:04.592000000 +0800 -+++ b/gcc/cgraphunit.c 2020-04-30 15:14:56.584000000 +0800 -@@ -340,7 +340,10 @@ symbol_table::process_new_functions (voi - and splitting. This is redundant for functions added late. - Just throw away whatever it did. */ - if (!summaried_computed) -- ipa_free_fn_summary (); -+ { -+ ipa_free_fn_summary (); -+ ipa_free_size_summary (); -+ } - } - else if (ipa_fn_summaries != NULL) - compute_fn_summary (node, true); -@@ -389,7 +392,7 @@ cgraph_node::reset (void) - - /* Reset our data structures so we can analyze the function again. */ - memset (&local, 0, sizeof (local)); -- memset (&global, 0, sizeof (global)); -+ inlined_to = NULL; - memset (&rtl, 0, sizeof (rtl)); - analyzed = false; - definition = false; -@@ -1504,7 +1507,7 @@ mark_functions_to_output (void) - if (node->analyzed - && !node->thunk.thunk_p - && !node->alias -- && !node->global.inlined_to -+ && !node->inlined_to - && !TREE_ASM_WRITTEN (decl) - && !DECL_EXTERNAL (decl)) - { -@@ -1529,7 +1532,7 @@ mark_functions_to_output (void) - { - /* We should've reclaimed all functions that are not needed. */ - if (flag_checking -- && !node->global.inlined_to -+ && !node->inlined_to - && gimple_has_body_p (decl) - /* FIXME: in ltrans unit when offline copy is outside partition but inline copies - are inside partition, we can end up not removing the body since we no longer -@@ -1542,7 +1545,7 @@ mark_functions_to_output (void) - node->debug (); - internal_error ("failed to reclaim unneeded function"); - } -- gcc_assert (node->global.inlined_to -+ gcc_assert (node->inlined_to - || !gimple_has_body_p (decl) - || node->in_other_partition - || node->clones -@@ -1557,7 +1560,7 @@ mark_functions_to_output (void) - if (node->same_comdat_group && !node->process) - { - tree decl = node->decl; -- if (!node->global.inlined_to -+ if (!node->inlined_to - && gimple_has_body_p (decl) - /* FIXME: in an ltrans unit when the offline copy is outside a - partition but inline copies are inside a partition, we can -@@ -2118,7 +2121,7 @@ cgraph_node::assemble_thunks_and_aliases - - for (e = callers; e;) - if (e->caller->thunk.thunk_p -- && !e->caller->global.inlined_to) -+ && !e->caller->inlined_to) - { - cgraph_node *thunk = e->caller; - -@@ -2155,7 +2158,7 @@ cgraph_node::expand (void) - location_t saved_loc; - - /* We ought to not compile any inline clones. */ -- gcc_assert (!global.inlined_to); -+ gcc_assert (!inlined_to); - - /* __RTL functions are compiled as soon as they are parsed, so don't - do it again. */ -@@ -2707,7 +2710,7 @@ symbol_table::compile (void) - bool error_found = false; - - FOR_EACH_DEFINED_FUNCTION (node) -- if (node->global.inlined_to -+ if (node->inlined_to - || gimple_has_body_p (node->decl)) - { - error_found = true; -diff -Nurp a/gcc/data-streamer.h b/gcc/data-streamer.h ---- a/gcc/data-streamer.h 2020-04-30 15:14:04.648000000 +0800 -+++ b/gcc/data-streamer.h 2020-04-30 15:14:56.504000000 +0800 -@@ -53,6 +53,7 @@ HOST_WIDE_INT bp_unpack_var_len_int (str - void streamer_write_zero (struct output_block *); - void streamer_write_uhwi (struct output_block *, unsigned HOST_WIDE_INT); - void streamer_write_hwi (struct output_block *, HOST_WIDE_INT); -+void streamer_write_poly_uint64 (struct output_block *, poly_uint64); - void streamer_write_gcov_count (struct output_block *, gcov_type); - void streamer_write_string (struct output_block *, struct lto_output_stream *, - const char *, bool); -@@ -82,6 +83,7 @@ const char *bp_unpack_indexed_string (st - const char *bp_unpack_string (struct data_in *, struct bitpack_d *); - unsigned HOST_WIDE_INT streamer_read_uhwi (struct lto_input_block *); - HOST_WIDE_INT streamer_read_hwi (struct lto_input_block *); -+poly_uint64 streamer_read_poly_uint64 (struct lto_input_block *); - gcov_type streamer_read_gcov_count (struct lto_input_block *); - wide_int streamer_read_wide_int (struct lto_input_block *); - widest_int streamer_read_widest_int (struct lto_input_block *); -diff -Nurp a/gcc/data-streamer-in.c b/gcc/data-streamer-in.c ---- a/gcc/data-streamer-in.c 2020-04-30 15:14:04.628000000 +0800 -+++ b/gcc/data-streamer-in.c 2020-04-30 15:14:56.504000000 +0800 -@@ -175,6 +175,17 @@ streamer_read_hwi (struct lto_input_bloc - } - } - -+/* Read a poly_uint64 from IB. */ -+ -+poly_uint64 -+streamer_read_poly_uint64 (class lto_input_block *ib) -+{ -+ poly_uint64 res; -+ for (unsigned int i = 0; i < NUM_POLY_INT_COEFFS; ++i) -+ res.coeffs[i] = streamer_read_uhwi (ib); -+ return res; -+} -+ - /* Read gcov_type value from IB. */ - - gcov_type -diff -Nurp a/gcc/data-streamer-out.c b/gcc/data-streamer-out.c ---- a/gcc/data-streamer-out.c 2020-04-30 15:14:04.600000000 +0800 -+++ b/gcc/data-streamer-out.c 2020-04-30 15:14:56.504000000 +0800 -@@ -220,6 +220,15 @@ streamer_write_hwi (struct output_block - streamer_write_hwi_stream (ob->main_stream, work); - } - -+/* Write a poly_uint64 value WORK to OB->main_stream. */ -+ -+void -+streamer_write_poly_uint64 (struct output_block *ob, poly_uint64 work) -+{ -+ for (int i = 0; i < NUM_POLY_INT_COEFFS; ++i) -+ streamer_write_uhwi_stream (ob->main_stream, work.coeffs[i]); -+} -+ - /* Write a gcov counter value WORK to OB->main_stream. */ - - void -diff -Nurp a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi ---- a/gcc/doc/invoke.texi 2020-04-30 15:14:04.664000000 +0800 -+++ b/gcc/doc/invoke.texi 2020-04-30 15:14:56.692000000 +0800 -@@ -11836,6 +11836,13 @@ IPA-CP calculates its own score of cloni - and performs those cloning opportunities with scores that exceed - @option{ipa-cp-eval-threshold}. - -+@item ipa-cp-max-recursive-depth -+Maximum depth of recursive cloning for self-recursive function. -+ -+@item ipa-cp-min-recursive-probability -+Recursive cloning only when the probability of call being executed exceeds -+the parameter. -+ - @item ipa-cp-recursion-penalty - Percentage penalty the recursive functions will receive when they - are evaluated for cloning. -diff -Nurp a/gcc/gimple-fold.c b/gcc/gimple-fold.c ---- a/gcc/gimple-fold.c 2020-04-30 15:14:04.632000000 +0800 -+++ b/gcc/gimple-fold.c 2020-04-30 15:14:56.584000000 +0800 -@@ -135,7 +135,7 @@ can_refer_decl_in_current_unit_p (tree d - if (!snode || !snode->definition) - return false; - node = dyn_cast (snode); -- return !node || !node->global.inlined_to; -+ return !node || !node->inlined_to; - } - - /* We will later output the initializer, so we can refer to it. -@@ -184,7 +184,7 @@ can_refer_decl_in_current_unit_p (tree d - || (!snode->forced_by_abi && !snode->force_output)))) - return false; - node = dyn_cast (snode); -- return !node || !node->global.inlined_to; -+ return !node || !node->inlined_to; - } - - /* Create a temporary for TYPE for a statement STMT. If the current function -diff -Nurp a/gcc/ipa.c b/gcc/ipa.c ---- a/gcc/ipa.c 2020-04-30 15:14:04.636000000 +0800 -+++ b/gcc/ipa.c 2020-04-30 15:14:56.588000000 +0800 -@@ -71,9 +71,9 @@ update_inlined_to_pointer (struct cgraph - { - struct cgraph_edge *e; - for (e = node->callees; e; e = e->next_callee) -- if (e->callee->global.inlined_to) -+ if (e->callee->inlined_to) - { -- e->callee->global.inlined_to = inlined_to; -+ e->callee->inlined_to = inlined_to; - update_inlined_to_pointer (e->callee, inlined_to); - } - } -@@ -335,11 +335,11 @@ symbol_table::remove_unreachable_nodes ( - node->used_as_abstract_origin = false; - node->indirect_call_target = false; - if (node->definition -- && !node->global.inlined_to -+ && !node->inlined_to - && !node->in_other_partition - && !node->can_remove_if_no_direct_calls_and_refs_p ()) - { -- gcc_assert (!node->global.inlined_to); -+ gcc_assert (!node->inlined_to); - reachable.add (node); - enqueue_node (node, &first, &reachable); - } -@@ -451,7 +451,7 @@ symbol_table::remove_unreachable_nodes ( - - /* When inline clone exists, mark body to be preserved so when removing - offline copy of the function we don't kill it. */ -- if (cnode->global.inlined_to) -+ if (cnode->inlined_to) - body_needed_for_clonning.add (cnode->decl); - - /* For non-inline clones, force their origins to the boundary and ensure -@@ -560,11 +560,11 @@ symbol_table::remove_unreachable_nodes ( - to turn it into normal cone. */ - FOR_EACH_FUNCTION (node) - { -- if (node->global.inlined_to -+ if (node->inlined_to - && !node->callers) - { - gcc_assert (node->clones); -- node->global.inlined_to = NULL; -+ node->inlined_to = NULL; - update_inlined_to_pointer (node, node); - } - node->aux = NULL; -@@ -1207,8 +1207,8 @@ propagate_single_user (varpool_node *vno - struct cgraph_node *cnode = dyn_cast (ref->referring); - if (cnode) - { -- if (cnode->global.inlined_to) -- cnode = cnode->global.inlined_to; -+ if (cnode->inlined_to) -+ cnode = cnode->inlined_to; - if (!function) - function = cnode; - else if (function != cnode) -diff -Nurp a/gcc/ipa-comdats.c b/gcc/ipa-comdats.c ---- a/gcc/ipa-comdats.c 2020-04-30 15:14:04.612000000 +0800 -+++ b/gcc/ipa-comdats.c 2020-04-30 15:14:56.584000000 +0800 -@@ -98,8 +98,8 @@ propagate_comdat_group (struct symtab_no - - if (cgraph_node * cn = dyn_cast (symbol2)) - { -- if (cn->global.inlined_to) -- symbol2 = cn->global.inlined_to; -+ if (cn->inlined_to) -+ symbol2 = cn->inlined_to; - } - - /* The actual merge operation. */ -@@ -133,8 +133,8 @@ propagate_comdat_group (struct symtab_no - /* If we see inline clone, its comdat group actually - corresponds to the comdat group of the function it - is inlined to. */ -- if (cn->global.inlined_to) -- symbol2 = cn->global.inlined_to; -+ if (cn->inlined_to) -+ symbol2 = cn->inlined_to; - } - - /* The actual merge operation. */ -diff -Nurp a/gcc/ipa-cp.c b/gcc/ipa-cp.c ---- a/gcc/ipa-cp.c 2020-04-30 15:14:04.592000000 +0800 -+++ b/gcc/ipa-cp.c 2020-04-30 15:14:56.700000000 +0800 -@@ -229,7 +229,9 @@ public: - inline bool set_contains_variable (); - bool add_value (valtype newval, cgraph_edge *cs, - ipcp_value *src_val = NULL, -- int src_idx = 0, HOST_WIDE_INT offset = -1); -+ int src_idx = 0, HOST_WIDE_INT offset = -1, -+ ipcp_value **val_p = NULL, -+ bool unlimited = false); - void print (FILE * f, bool dump_sources, bool dump_benefits); - }; - -@@ -381,8 +383,8 @@ static hash_map - - /* Return the param lattices structure corresponding to the Ith formal - parameter of the function described by INFO. */ --static inline struct ipcp_param_lattices * --ipa_get_parm_lattices (struct ipa_node_params *info, int i) -+static inline class ipcp_param_lattices * -+ipa_get_parm_lattices (class ipa_node_params *info, int i) - { - gcc_assert (i >= 0 && i < ipa_get_param_count (info)); - gcc_checking_assert (!info->ipcp_orig_node); -@@ -393,18 +395,18 @@ ipa_get_parm_lattices (struct ipa_node_p - /* Return the lattice corresponding to the scalar value of the Ith formal - parameter of the function described by INFO. */ - static inline ipcp_lattice * --ipa_get_scalar_lat (struct ipa_node_params *info, int i) -+ipa_get_scalar_lat (class ipa_node_params *info, int i) - { -- struct ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); -+ class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); - return &plats->itself; - } - - /* Return the lattice corresponding to the scalar value of the Ith formal - parameter of the function described by INFO. */ - static inline ipcp_lattice * --ipa_get_poly_ctx_lat (struct ipa_node_params *info, int i) -+ipa_get_poly_ctx_lat (class ipa_node_params *info, int i) - { -- struct ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); -+ class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); - return &plats->ctxlat; - } - -@@ -539,7 +541,7 @@ print_all_lattices (FILE * f, bool dump_ - fprintf (f, "\nLattices:\n"); - FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node) - { -- struct ipa_node_params *info; -+ class ipa_node_params *info; - - info = IPA_NODE_REF (node); - /* Skip constprop clones since we don't make lattices for them. */ -@@ -550,7 +552,7 @@ print_all_lattices (FILE * f, bool dump_ - for (i = 0; i < count; i++) - { - struct ipcp_agg_lattice *aglat; -- struct ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); -+ class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); - fprintf (f, " param [%d]: ", i); - plats->itself.print (f, dump_sources, dump_benefits); - fprintf (f, " ctxs: "); -@@ -585,7 +587,7 @@ print_all_lattices (FILE * f, bool dump_ - - static void - determine_versionability (struct cgraph_node *node, -- struct ipa_node_params *info) -+ class ipa_node_params *info) - { - const char *reason = NULL; - -@@ -656,7 +658,7 @@ determine_versionability (struct cgraph_ - static bool - ipcp_versionable_function_p (struct cgraph_node *node) - { -- return IPA_NODE_REF (node)->versionable; -+ return IPA_NODE_REF (node) && IPA_NODE_REF (node)->versionable; - } - - /* Structure holding accumulated information about callers of a node. */ -@@ -731,7 +733,7 @@ ipcp_cloning_candidate_p (struct cgraph_ - init_caller_stats (&stats); - node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats, false); - -- if (ipa_fn_summaries->get (node)->self_size < stats.n_calls) -+ if (ipa_size_summaries->get (node)->self_size < stats.n_calls) - { - if (dump_file) - fprintf (dump_file, "Considering %s for cloning; code might shrink.\n", -@@ -806,23 +808,39 @@ public: - {} - }; - -+/* Skip edges from and to nodes without ipa_cp enabled. -+ Ignore not available symbols. */ -+ -+static bool -+ignore_edge_p (cgraph_edge *e) -+{ -+ enum availability avail; -+ cgraph_node *ultimate_target -+ = e->callee->function_or_virtual_thunk_symbol (&avail, e->caller); -+ -+ return (avail <= AVAIL_INTERPOSABLE -+ || !opt_for_fn (e->caller->decl, flag_ipa_cp) -+ || !opt_for_fn (ultimate_target->decl, flag_ipa_cp)); -+} -+ - /* Allocate the arrays in TOPO and topologically sort the nodes into order. */ - - static void --build_toporder_info (struct ipa_topo_info *topo) -+build_toporder_info (class ipa_topo_info *topo) - { - topo->order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count); - topo->stack = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count); - - gcc_checking_assert (topo->stack_top == 0); -- topo->nnodes = ipa_reduced_postorder (topo->order, true, NULL); -+ topo->nnodes = ipa_reduced_postorder (topo->order, true, -+ ignore_edge_p); - } - - /* Free information about strongly connected components and the arrays in - TOPO. */ - - static void --free_toporder_info (struct ipa_topo_info *topo) -+free_toporder_info (class ipa_topo_info *topo) - { - ipa_free_postorder_info (); - free (topo->order); -@@ -832,9 +850,9 @@ free_toporder_info (struct ipa_topo_info - /* Add NODE to the stack in TOPO, unless it is already there. */ - - static inline void --push_node_to_stack (struct ipa_topo_info *topo, struct cgraph_node *node) -+push_node_to_stack (class ipa_topo_info *topo, struct cgraph_node *node) - { -- struct ipa_node_params *info = IPA_NODE_REF (node); -+ class ipa_node_params *info = IPA_NODE_REF (node); - if (info->node_enqueued) - return; - info->node_enqueued = 1; -@@ -845,7 +863,7 @@ push_node_to_stack (struct ipa_topo_info - is empty. */ - - static struct cgraph_node * --pop_node_from_stack (struct ipa_topo_info *topo) -+pop_node_from_stack (class ipa_topo_info *topo) - { - if (topo->stack_top) - { -@@ -887,7 +905,7 @@ ipcp_lattice::set_contains_vari - not previously set as such. */ - - static inline bool --set_agg_lats_to_bottom (struct ipcp_param_lattices *plats) -+set_agg_lats_to_bottom (class ipcp_param_lattices *plats) - { - bool ret = !plats->aggs_bottom; - plats->aggs_bottom = true; -@@ -898,7 +916,7 @@ set_agg_lats_to_bottom (struct ipcp_para - return true if they were not previously marked as such. */ - - static inline bool --set_agg_lats_contain_variable (struct ipcp_param_lattices *plats) -+set_agg_lats_contain_variable (class ipcp_param_lattices *plats) - { - bool ret = !plats->aggs_contain_variable; - plats->aggs_contain_variable = true; -@@ -1108,7 +1126,7 @@ ipcp_bits_lattice::meet_with (ipcp_bits_ - return true is any of them has not been marked as such so far. */ - - static inline bool --set_all_contains_variable (struct ipcp_param_lattices *plats) -+set_all_contains_variable (class ipcp_param_lattices *plats) - { - bool ret; - ret = plats->itself.set_contains_variable (); -@@ -1158,7 +1176,7 @@ set_single_call_flag (cgraph_node *node, - static void - initialize_node_lattices (struct cgraph_node *node) - { -- struct ipa_node_params *info = IPA_NODE_REF (node); -+ class ipa_node_params *info = IPA_NODE_REF (node); - struct cgraph_edge *ie; - bool disable = false, variable = false; - int i; -@@ -1188,7 +1206,7 @@ initialize_node_lattices (struct cgraph_ - - for (i = 0; i < ipa_get_param_count (info); i++) - { -- struct ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); -+ class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); - plats->m_value_range.init (); - } - -@@ -1196,7 +1214,7 @@ initialize_node_lattices (struct cgraph_ - { - for (i = 0; i < ipa_get_param_count (info); i++) - { -- struct ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); -+ class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); - if (disable) - { - plats->itself.set_to_bottom (); -@@ -1224,23 +1242,23 @@ initialize_node_lattices (struct cgraph_ - } - } - --/* Return the result of a (possibly arithmetic) pass through jump function -- JFUNC on the constant value INPUT. RES_TYPE is the type of the parameter -- to which the result is passed. Return NULL_TREE if that cannot be -- determined or be considered an interprocedural invariant. */ -+/* Return the result of a (possibly arithmetic) operation on the constant -+ value INPUT. OPERAND is 2nd operand for binary operation. RES_TYPE is -+ the type of the parameter to which the result is passed. Return -+ NULL_TREE if that cannot be determined or be considered an -+ interprocedural invariant. */ - - static tree --ipa_get_jf_pass_through_result (struct ipa_jump_func *jfunc, tree input, -- tree res_type) -+ipa_get_jf_arith_result (enum tree_code opcode, tree input, tree operand, -+ tree res_type) - { - tree res; - -- if (ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR) -+ if (opcode == NOP_EXPR) - return input; - if (!is_gimple_ip_invariant (input)) - return NULL_TREE; - -- tree_code opcode = ipa_get_jf_pass_through_operation (jfunc); - if (!res_type) - { - if (TREE_CODE_CLASS (opcode) == tcc_comparison) -@@ -1254,8 +1272,7 @@ ipa_get_jf_pass_through_result (struct i - if (TREE_CODE_CLASS (opcode) == tcc_unary) - res = fold_unary (opcode, res_type, input); - else -- res = fold_binary (opcode, res_type, input, -- ipa_get_jf_pass_through_operand (jfunc)); -+ res = fold_binary (opcode, res_type, input, operand); - - if (res && !is_gimple_ip_invariant (res)) - return NULL_TREE; -@@ -1263,6 +1280,21 @@ ipa_get_jf_pass_through_result (struct i - return res; - } - -+/* Return the result of a (possibly arithmetic) pass through jump function -+ JFUNC on the constant value INPUT. RES_TYPE is the type of the parameter -+ to which the result is passed. Return NULL_TREE if that cannot be -+ determined or be considered an interprocedural invariant. */ -+ -+static tree -+ipa_get_jf_pass_through_result (struct ipa_jump_func *jfunc, tree input, -+ tree res_type) -+{ -+ return ipa_get_jf_arith_result (ipa_get_jf_pass_through_operation (jfunc), -+ input, -+ ipa_get_jf_pass_through_operand (jfunc), -+ res_type); -+} -+ - /* Return the result of an ancestor jump function JFUNC on the constant value - INPUT. Return NULL_TREE if that cannot be determined. */ - -@@ -1289,7 +1321,7 @@ ipa_get_jf_ancestor_result (struct ipa_j - passed. */ - - tree --ipa_value_from_jfunc (struct ipa_node_params *info, struct ipa_jump_func *jfunc, -+ipa_value_from_jfunc (class ipa_node_params *info, struct ipa_jump_func *jfunc, - tree parm_type) - { - if (jfunc->type == IPA_JF_CONST) -@@ -1396,6 +1428,146 @@ ipa_context_from_jfunc (ipa_node_params - return ctx; - } - -+/* See if NODE is a clone with a known aggregate value at a given OFFSET of a -+ parameter with the given INDEX. */ -+ -+static tree -+get_clone_agg_value (struct cgraph_node *node, HOST_WIDE_INT offset, -+ int index) -+{ -+ struct ipa_agg_replacement_value *aggval; -+ -+ aggval = ipa_get_agg_replacements_for_node (node); -+ while (aggval) -+ { -+ if (aggval->offset == offset -+ && aggval->index == index) -+ return aggval->value; -+ aggval = aggval->next; -+ } -+ return NULL_TREE; -+} -+ -+/* Determine whether ITEM, jump function for an aggregate part, evaluates to a -+ single known constant value and if so, return it. Otherwise return NULL. -+ NODE and INFO describes the caller node or the one it is inlined to, and -+ its related info. */ -+ -+static tree -+ipa_agg_value_from_node (class ipa_node_params *info, -+ struct cgraph_node *node, -+ struct ipa_agg_jf_item *item) -+{ -+ tree value = NULL_TREE; -+ int src_idx; -+ -+ if (item->offset < 0 || item->jftype == IPA_JF_UNKNOWN) -+ return NULL_TREE; -+ -+ if (item->jftype == IPA_JF_CONST) -+ return item->value.constant; -+ -+ gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH -+ || item->jftype == IPA_JF_LOAD_AGG); -+ -+ src_idx = item->value.pass_through.formal_id; -+ -+ if (info->ipcp_orig_node) -+ { -+ if (item->jftype == IPA_JF_PASS_THROUGH) -+ value = info->known_csts[src_idx]; -+ else -+ value = get_clone_agg_value (node, item->value.load_agg.offset, -+ src_idx); -+ } -+ else if (info->lattices) -+ { -+ class ipcp_param_lattices *src_plats -+ = ipa_get_parm_lattices (info, src_idx); -+ -+ if (item->jftype == IPA_JF_PASS_THROUGH) -+ { -+ struct ipcp_lattice *lat = &src_plats->itself; -+ -+ if (!lat->is_single_const ()) -+ return NULL_TREE; -+ -+ value = lat->values->value; -+ } -+ else if (src_plats->aggs -+ && !src_plats->aggs_bottom -+ && !src_plats->aggs_contain_variable -+ && src_plats->aggs_by_ref == item->value.load_agg.by_ref) -+ { -+ struct ipcp_agg_lattice *aglat; -+ -+ for (aglat = src_plats->aggs; aglat; aglat = aglat->next) -+ { -+ if (aglat->offset > item->value.load_agg.offset) -+ break; -+ -+ if (aglat->offset == item->value.load_agg.offset) -+ { -+ if (aglat->is_single_const ()) -+ value = aglat->values->value; -+ break; -+ } -+ } -+ } -+ } -+ -+ if (!value) -+ return NULL_TREE; -+ -+ if (item->jftype == IPA_JF_LOAD_AGG) -+ { -+ tree load_type = item->value.load_agg.type; -+ tree value_type = TREE_TYPE (value); -+ -+ /* Ensure value type is compatible with load type. */ -+ if (!useless_type_conversion_p (load_type, value_type)) -+ return NULL_TREE; -+ } -+ -+ return ipa_get_jf_arith_result (item->value.pass_through.operation, -+ value, -+ item->value.pass_through.operand, -+ item->type); -+} -+ -+/* Determine whether AGG_JFUNC evaluates to a set of known constant value for -+ an aggregate and if so, return it. Otherwise return an empty set. NODE -+ and INFO describes the caller node or the one it is inlined to, and its -+ related info. */ -+ -+struct ipa_agg_value_set -+ipa_agg_value_set_from_jfunc (class ipa_node_params *info, cgraph_node *node, -+ struct ipa_agg_jump_function *agg_jfunc) -+{ -+ struct ipa_agg_value_set agg; -+ struct ipa_agg_jf_item *item; -+ int i; -+ -+ agg.items = vNULL; -+ agg.by_ref = agg_jfunc->by_ref; -+ -+ FOR_EACH_VEC_SAFE_ELT (agg_jfunc->items, i, item) -+ { -+ tree value = ipa_agg_value_from_node (info, node, item); -+ -+ if (value) -+ { -+ struct ipa_agg_value value_item; -+ -+ value_item.offset = item->offset; -+ value_item.value = value; -+ -+ agg.items.safe_push (value_item); -+ } -+ } -+ return agg; -+} -+ - /* If checking is enabled, verify that no lattice is in the TOP state, i.e. not - bottom, not containing a variable component and without any known value at - the same time. */ -@@ -1407,7 +1579,9 @@ ipcp_verify_propagated_values (void) - - FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node) - { -- struct ipa_node_params *info = IPA_NODE_REF (node); -+ class ipa_node_params *info = IPA_NODE_REF (node); -+ if (!opt_for_fn (node->decl, flag_ipa_cp)) -+ continue; - int i, count = ipa_get_param_count (info); - - for (i = 0; i < count; i++) -@@ -1516,22 +1690,32 @@ allocate_and_init_ipcp_value (ipa_polymo - /* Try to add NEWVAL to LAT, potentially creating a new ipcp_value for it. CS, - SRC_VAL SRC_INDEX and OFFSET are meant for add_source and have the same - meaning. OFFSET -1 means the source is scalar and not a part of an -- aggregate. */ -+ aggregate. If non-NULL, VAL_P records address of existing or newly added -+ ipcp_value. UNLIMITED means whether value count should not exceed the limit -+ given by PARAM_IPA_CP_VALUE_LIST_SIZE. */ - - template - bool - ipcp_lattice::add_value (valtype newval, cgraph_edge *cs, - ipcp_value *src_val, -- int src_idx, HOST_WIDE_INT offset) -+ int src_idx, HOST_WIDE_INT offset, -+ ipcp_value **val_p, -+ bool unlimited) - { -- ipcp_value *val; -+ ipcp_value *val, *last_val = NULL; -+ -+ if (val_p) -+ *val_p = NULL; - - if (bottom) - return false; - -- for (val = values; val; val = val->next) -+ for (val = values; val; last_val = val, val = val->next) - if (values_equal_for_ipcp_p (val->value, newval)) - { -+ if (val_p) -+ *val_p = val; -+ - if (ipa_edge_within_scc (cs)) - { - ipcp_value_source *s; -@@ -1546,7 +1730,7 @@ ipcp_lattice::add_value (valtyp - return false; - } - -- if (values_count == PARAM_VALUE (PARAM_IPA_CP_VALUE_LIST_SIZE)) -+ if (!unlimited && values_count == PARAM_VALUE (PARAM_IPA_CP_VALUE_LIST_SIZE)) - { - /* We can only free sources, not the values themselves, because sources - of other values in this SCC might point to them. */ -@@ -1559,7 +1743,6 @@ ipcp_lattice::add_value (valtyp - ipcp_sources_pool.remove ((ipcp_value_source*)src); - } - } -- - values = NULL; - return set_to_bottom (); - } -@@ -1567,41 +1750,177 @@ ipcp_lattice::add_value (valtyp - values_count++; - val = allocate_and_init_ipcp_value (newval); - val->add_source (cs, src_val, src_idx, offset); -- val->next = values; -- values = val; -+ val->next = NULL; -+ -+ /* Add the new value to end of value list, which can reduce iterations -+ of propagation stage for recursive function. */ -+ if (last_val) -+ last_val->next = val; -+ else -+ values = val; -+ -+ if (val_p) -+ *val_p = val; -+ - return true; - } - --/* Propagate values through a pass-through jump function JFUNC associated with -- edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX -- is the index of the source parameter. PARM_TYPE is the type of the -- parameter to which the result is passed. */ -+/* Return true, if a ipcp_value VAL is orginated from parameter value of -+ self-feeding recursive function by applying non-passthrough arithmetic -+ transformation. */ - - static bool --propagate_vals_across_pass_through (cgraph_edge *cs, ipa_jump_func *jfunc, -- ipcp_lattice *src_lat, -- ipcp_lattice *dest_lat, int src_idx, -- tree parm_type) -+self_recursively_generated_p (ipcp_value *val) -+{ -+ class ipa_node_params *info = NULL; -+ -+ for (ipcp_value_source *src = val->sources; src; src = src->next) -+ { -+ cgraph_edge *cs = src->cs; -+ -+ if (!src->val || cs->caller != cs->callee->function_symbol () -+ || src->val == val) -+ return false; -+ -+ if (!info) -+ info = IPA_NODE_REF (cs->caller); -+ -+ class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, -+ src->index); -+ ipcp_lattice *src_lat = src->offset == -1 ? &plats->itself -+ : plats->aggs; -+ ipcp_value *src_val; -+ -+ for (src_val = src_lat->values; src_val; src_val = src_val->next) -+ if (src_val == val) -+ break; -+ -+ if (!src_val) -+ return false; -+ } -+ -+ return true; -+} -+ -+/* A helper function that returns result of operation specified by OPCODE on -+ the value of SRC_VAL. If non-NULL, OPND1_TYPE is expected type for the -+ value of SRC_VAL. If the operation is binary, OPND2 is a constant value -+ acting as its second operand. If non-NULL, RES_TYPE is expected type of -+ the result. */ -+ -+static tree -+get_val_across_arith_op (enum tree_code opcode, -+ tree opnd1_type, -+ tree opnd2, -+ ipcp_value *src_val, -+ tree res_type) -+{ -+ tree opnd1 = src_val->value; -+ -+ /* Skip source values that is incompatible with specified type. */ -+ if (opnd1_type -+ && !useless_type_conversion_p (opnd1_type, TREE_TYPE (opnd1))) -+ return NULL_TREE; -+ -+ return ipa_get_jf_arith_result (opcode, opnd1, opnd2, res_type); -+} -+ -+/* Propagate values through an arithmetic transformation described by a jump -+ function associated with edge CS, taking values from SRC_LAT and putting -+ them into DEST_LAT. OPND1_TYPE is expected type for the values in SRC_LAT. -+ OPND2 is a constant value if transformation is a binary operation. -+ SRC_OFFSET specifies offset in an aggregate if SRC_LAT describes lattice of -+ a part of the aggregate. SRC_IDX is the index of the source parameter. -+ RES_TYPE is the value type of result being propagated into. Return true if -+ DEST_LAT changed. */ -+ -+static bool -+propagate_vals_across_arith_jfunc (cgraph_edge *cs, -+ enum tree_code opcode, -+ tree opnd1_type, -+ tree opnd2, -+ ipcp_lattice *src_lat, -+ ipcp_lattice *dest_lat, -+ HOST_WIDE_INT src_offset, -+ int src_idx, -+ tree res_type) - { - ipcp_value *src_val; - bool ret = false; - -- /* Do not create new values when propagating within an SCC because if there -- are arithmetic functions with circular dependencies, there is infinite -- number of them and we would just make lattices bottom. If this condition -- is ever relaxed we have to detect self-feeding recursive calls in -- cgraph_edge_brings_value_p in a smarter way. */ -- if ((ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR) -- && ipa_edge_within_scc (cs)) -- ret = dest_lat->set_contains_variable (); -+ /* Due to circular dependencies, propagating within an SCC through arithmetic -+ transformation would create infinite number of values. But for -+ self-feeding recursive function, we could allow propagation in a limited -+ count, and this can enable a simple kind of recursive function versioning. -+ For other scenario, we would just make lattices bottom. */ -+ if (opcode != NOP_EXPR && ipa_edge_within_scc (cs)) -+ { -+ int i; -+ -+ if (src_lat != dest_lat || PARAM_VALUE(PARAM_IPA_CP_MAX_RECURSIVE_DEPTH) < 1) -+ return dest_lat->set_contains_variable (); -+ -+ /* No benefit if recursive execution is in low probability. */ -+ if (cs->sreal_frequency () * 100 -+ <= ((sreal) 1) * PARAM_VALUE(PARAM_IPA_CP_MIN_RECURSIVE_PROBABILITY)) -+ return dest_lat->set_contains_variable (); -+ -+ auto_vec *, 8> val_seeds; -+ -+ for (src_val = src_lat->values; src_val; src_val = src_val->next) -+ { -+ /* Now we do not use self-recursively generated value as propagation -+ source, this is absolutely conservative, but could avoid explosion -+ of lattice's value space, especially when one recursive function -+ calls another recursive. */ -+ if (self_recursively_generated_p (src_val)) -+ { -+ ipcp_value_source *s; -+ -+ /* If the lattice has already been propagated for the call site, -+ no need to do that again. */ -+ for (s = src_val->sources; s; s = s->next) -+ if (s->cs == cs) -+ return dest_lat->set_contains_variable (); -+ } -+ else -+ val_seeds.safe_push (src_val); -+ } -+ -+ /* Recursively generate lattice values with a limited count. */ -+ FOR_EACH_VEC_ELT (val_seeds, i, src_val) -+ { -+ for (int j = 1; j < PARAM_VALUE(PARAM_IPA_CP_MAX_RECURSIVE_DEPTH); j++) -+ { -+ tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2, -+ src_val, res_type); -+ if (!cstval) -+ break; -+ -+ ret |= dest_lat->add_value (cstval, cs, src_val, src_idx, -+ src_offset, &src_val, true); -+ gcc_checking_assert (src_val); -+ } -+ } -+ ret |= dest_lat->set_contains_variable (); -+ } - else - for (src_val = src_lat->values; src_val; src_val = src_val->next) - { -- tree cstval = ipa_get_jf_pass_through_result (jfunc, src_val->value, -- parm_type); -+ /* Now we do not use self-recursively generated value as propagation -+ source, otherwise it is easy to make value space of normal lattice -+ overflow. */ -+ if (self_recursively_generated_p (src_val)) -+ { -+ ret |= dest_lat->set_contains_variable (); -+ continue; -+ } - -+ tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2, -+ src_val, res_type); - if (cstval) -- ret |= dest_lat->add_value (cstval, cs, src_val, src_idx); -+ ret |= dest_lat->add_value (cstval, cs, src_val, src_idx, -+ src_offset); - else - ret |= dest_lat->set_contains_variable (); - } -@@ -1609,6 +1928,24 @@ propagate_vals_across_pass_through (cgra - return ret; - } - -+/* Propagate values through a pass-through jump function JFUNC associated with -+ edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX -+ is the index of the source parameter. PARM_TYPE is the type of the -+ parameter to which the result is passed. */ -+ -+static bool -+propagate_vals_across_pass_through (cgraph_edge *cs, ipa_jump_func *jfunc, -+ ipcp_lattice *src_lat, -+ ipcp_lattice *dest_lat, int src_idx, -+ tree parm_type) -+{ -+ return propagate_vals_across_arith_jfunc (cs, -+ ipa_get_jf_pass_through_operation (jfunc), -+ NULL_TREE, -+ ipa_get_jf_pass_through_operand (jfunc), -+ src_lat, dest_lat, -1, src_idx, parm_type); -+} -+ - /* Propagate values through an ancestor jump function JFUNC associated with - edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX - is the index of the source parameter. */ -@@ -1659,7 +1996,7 @@ propagate_scalar_across_jump_function (s - else if (jfunc->type == IPA_JF_PASS_THROUGH - || jfunc->type == IPA_JF_ANCESTOR) - { -- struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); -+ class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); - ipcp_lattice *src_lat; - int src_idx; - bool ret; -@@ -1721,7 +2058,7 @@ propagate_context_across_jump_function ( - if (jfunc->type == IPA_JF_PASS_THROUGH - || jfunc->type == IPA_JF_ANCESTOR) - { -- struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); -+ class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); - int src_idx; - ipcp_lattice *src_lat; - -@@ -1769,7 +2106,6 @@ propagate_context_across_jump_function ( - added_sth = true; - } - } -- - } - - prop_fail: -@@ -1797,7 +2133,7 @@ propagate_bits_across_jump_function (cgr - - enum availability availability; - cgraph_node *callee = cs->callee->function_symbol (&availability); -- struct ipa_node_params *callee_info = IPA_NODE_REF (callee); -+ class ipa_node_params *callee_info = IPA_NODE_REF (callee); - tree parm_type = ipa_get_type (callee_info, idx); - - /* For K&R C programs, ipa_get_type() could return NULL_TREE. Avoid the -@@ -1820,7 +2156,7 @@ propagate_bits_across_jump_function (cgr - if (jfunc->type == IPA_JF_PASS_THROUGH - || jfunc->type == IPA_JF_ANCESTOR) - { -- struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); -+ class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); - tree operand = NULL_TREE; - enum tree_code code; - unsigned src_idx; -@@ -1840,7 +2176,7 @@ propagate_bits_across_jump_function (cgr - operand = build_int_cstu (size_type_node, offset); - } - -- struct ipcp_param_lattices *src_lats -+ class ipcp_param_lattices *src_lats - = ipa_get_parm_lattices (caller_info, src_idx); - - /* Try to propagate bits if src_lattice is bottom, but jfunc is known. -@@ -1894,7 +2230,7 @@ ipa_vr_operation_and_type_effects (value - - static bool - propagate_vr_across_jump_function (cgraph_edge *cs, ipa_jump_func *jfunc, -- struct ipcp_param_lattices *dest_plats, -+ class ipcp_param_lattices *dest_plats, - tree param_type) - { - ipcp_vr_lattice *dest_lat = &dest_plats->m_value_range; -@@ -1913,10 +2249,10 @@ propagate_vr_across_jump_function (cgrap - - if (TREE_CODE_CLASS (operation) == tcc_unary) - { -- struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); -+ class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); - int src_idx = ipa_get_jf_pass_through_formal_id (jfunc); - tree operand_type = ipa_get_type (caller_info, src_idx); -- struct ipcp_param_lattices *src_lats -+ class ipcp_param_lattices *src_lats - = ipa_get_parm_lattices (caller_info, src_idx); - - if (src_lats->m_value_range.bottom_p ()) -@@ -1959,7 +2295,7 @@ propagate_vr_across_jump_function (cgrap - aggs_by_ref to NEW_AGGS_BY_REF. */ - - static bool --set_check_aggs_by_ref (struct ipcp_param_lattices *dest_plats, -+set_check_aggs_by_ref (class ipcp_param_lattices *dest_plats, - bool new_aggs_by_ref) - { - if (dest_plats->aggs) -@@ -1986,7 +2322,7 @@ set_check_aggs_by_ref (struct ipcp_param - true. */ - - static bool --merge_agg_lats_step (struct ipcp_param_lattices *dest_plats, -+merge_agg_lats_step (class ipcp_param_lattices *dest_plats, - HOST_WIDE_INT offset, HOST_WIDE_INT val_size, - struct ipcp_agg_lattice ***aglat, - bool pre_existing, bool *change) -@@ -2064,8 +2400,8 @@ set_chain_of_aglats_contains_variable (s - - static bool - merge_aggregate_lattices (struct cgraph_edge *cs, -- struct ipcp_param_lattices *dest_plats, -- struct ipcp_param_lattices *src_plats, -+ class ipcp_param_lattices *dest_plats, -+ class ipcp_param_lattices *src_plats, - int src_idx, HOST_WIDE_INT offset_delta) - { - bool pre_existing = dest_plats->aggs != NULL; -@@ -2119,7 +2455,7 @@ merge_aggregate_lattices (struct cgraph_ - rules about propagating values passed by reference. */ - - static bool --agg_pass_through_permissible_p (struct ipcp_param_lattices *src_plats, -+agg_pass_through_permissible_p (class ipcp_param_lattices *src_plats, - struct ipa_jump_func *jfunc) - { - return src_plats->aggs -@@ -2127,13 +2463,92 @@ agg_pass_through_permissible_p (struct i - || ipa_get_jf_pass_through_agg_preserved (jfunc)); - } - -+/* Propagate values through ITEM, jump function for a part of an aggregate, -+ into corresponding aggregate lattice AGLAT. CS is the call graph edge -+ associated with the jump function. Return true if AGLAT changed in any -+ way. */ -+ -+static bool -+propagate_aggregate_lattice (struct cgraph_edge *cs, -+ struct ipa_agg_jf_item *item, -+ struct ipcp_agg_lattice *aglat) -+{ -+ class ipa_node_params *caller_info; -+ class ipcp_param_lattices *src_plats; -+ struct ipcp_lattice *src_lat; -+ HOST_WIDE_INT src_offset; -+ int src_idx; -+ tree load_type; -+ bool ret; -+ -+ if (item->jftype == IPA_JF_CONST) -+ { -+ tree value = item->value.constant; -+ -+ gcc_checking_assert (is_gimple_ip_invariant (value)); -+ return aglat->add_value (value, cs, NULL, 0); -+ } -+ -+ gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH -+ || item->jftype == IPA_JF_LOAD_AGG); -+ -+ caller_info = IPA_NODE_REF (cs->caller); -+ src_idx = item->value.pass_through.formal_id; -+ src_plats = ipa_get_parm_lattices (caller_info, src_idx); -+ -+ if (item->jftype == IPA_JF_PASS_THROUGH) -+ { -+ load_type = NULL_TREE; -+ src_lat = &src_plats->itself; -+ src_offset = -1; -+ } -+ else -+ { -+ HOST_WIDE_INT load_offset = item->value.load_agg.offset; -+ struct ipcp_agg_lattice *src_aglat; -+ -+ for (src_aglat = src_plats->aggs; src_aglat; src_aglat = src_aglat->next) -+ if (src_aglat->offset >= load_offset) -+ break; -+ -+ load_type = item->value.load_agg.type; -+ if (!src_aglat -+ || src_aglat->offset > load_offset -+ || src_aglat->size != tree_to_shwi (TYPE_SIZE (load_type)) -+ || src_plats->aggs_by_ref != item->value.load_agg.by_ref) -+ return aglat->set_contains_variable (); -+ -+ src_lat = src_aglat; -+ src_offset = load_offset; -+ } -+ -+ if (src_lat->bottom -+ || (!ipcp_versionable_function_p (cs->caller) -+ && !src_lat->is_single_const ())) -+ return aglat->set_contains_variable (); -+ -+ ret = propagate_vals_across_arith_jfunc (cs, -+ item->value.pass_through.operation, -+ load_type, -+ item->value.pass_through.operand, -+ src_lat, aglat, -+ src_offset, -+ src_idx, -+ item->type); -+ -+ if (src_lat->contains_variable) -+ ret |= aglat->set_contains_variable (); -+ -+ return ret; -+} -+ - /* Propagate scalar values across jump function JFUNC that is associated with - edge CS and put the values into DEST_LAT. */ - - static bool - propagate_aggs_across_jump_function (struct cgraph_edge *cs, - struct ipa_jump_func *jfunc, -- struct ipcp_param_lattices *dest_plats) -+ class ipcp_param_lattices *dest_plats) - { - bool ret = false; - -@@ -2143,9 +2558,9 @@ propagate_aggs_across_jump_function (str - if (jfunc->type == IPA_JF_PASS_THROUGH - && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR) - { -- struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); -+ class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); - int src_idx = ipa_get_jf_pass_through_formal_id (jfunc); -- struct ipcp_param_lattices *src_plats; -+ class ipcp_param_lattices *src_plats; - - src_plats = ipa_get_parm_lattices (caller_info, src_idx); - if (agg_pass_through_permissible_p (src_plats, jfunc)) -@@ -2162,9 +2577,9 @@ propagate_aggs_across_jump_function (str - else if (jfunc->type == IPA_JF_ANCESTOR - && ipa_get_jf_ancestor_agg_preserved (jfunc)) - { -- struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); -+ class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); - int src_idx = ipa_get_jf_ancestor_formal_id (jfunc); -- struct ipcp_param_lattices *src_plats; -+ class ipcp_param_lattices *src_plats; - - src_plats = ipa_get_parm_lattices (caller_info, src_idx); - if (src_plats->aggs && src_plats->aggs_by_ref) -@@ -2194,15 +2609,14 @@ propagate_aggs_across_jump_function (str - { - HOST_WIDE_INT val_size; - -- if (item->offset < 0) -+ if (item->offset < 0 || item->jftype == IPA_JF_UNKNOWN) - continue; -- gcc_checking_assert (is_gimple_ip_invariant (item->value)); -- val_size = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (item->value))); -+ val_size = tree_to_shwi (TYPE_SIZE (item->type)); - - if (merge_agg_lats_step (dest_plats, item->offset, val_size, - &aglat, pre_existing, &ret)) - { -- ret |= (*aglat)->add_value (item->value, cs, NULL, 0, 0); -+ ret |= propagate_aggregate_lattice (cs, item, *aglat); - aglat = &(*aglat)->next; - } - else if (dest_plats->aggs_bottom) -@@ -2235,10 +2649,10 @@ call_passes_through_thunk_p (cgraph_edge - static bool - propagate_constants_across_call (struct cgraph_edge *cs) - { -- struct ipa_node_params *callee_info; -+ class ipa_node_params *callee_info; - enum availability availability; - cgraph_node *callee; -- struct ipa_edge_args *args; -+ class ipa_edge_args *args; - bool ret = false; - int i, args_count, parms_count; - -@@ -2247,12 +2661,21 @@ propagate_constants_across_call (struct - return false; - gcc_checking_assert (callee->has_gimple_body_p ()); - callee_info = IPA_NODE_REF (callee); -+ if (!callee_info) -+ return false; - - args = IPA_EDGE_REF (cs); -- args_count = ipa_get_cs_argument_count (args); - parms_count = ipa_get_param_count (callee_info); - if (parms_count == 0) - return false; -+ if (!args) -+ { -+ for (i = 0; i < parms_count; i++) -+ ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info, -+ i)); -+ return ret; -+ } -+ args_count = ipa_get_cs_argument_count (args); - - /* If this call goes through a thunk we must not propagate to the first (0th) - parameter. However, we might need to uncover a thunk from below a series -@@ -2269,7 +2692,7 @@ propagate_constants_across_call (struct - for (; (i < args_count) && (i < parms_count); i++) - { - struct ipa_jump_func *jump_func = ipa_get_ith_jump_func (args, i); -- struct ipcp_param_lattices *dest_plats; -+ class ipcp_param_lattices *dest_plats; - tree param_type = ipa_get_type (callee_info, i); - - dest_plats = ipa_get_parm_lattices (callee_info, i); -@@ -2308,7 +2731,7 @@ static tree - ipa_get_indirect_edge_target_1 (struct cgraph_edge *ie, - vec known_csts, - vec known_contexts, -- vec known_aggs, -+ vec known_aggs, - struct ipa_agg_replacement_value *agg_reps, - bool *speculative) - { -@@ -2346,9 +2769,9 @@ ipa_get_indirect_edge_target_1 (struct c - } - if (!t) - { -- struct ipa_agg_jump_function *agg; -+ struct ipa_agg_value_set *agg; - if (known_aggs.length () > (unsigned int) param_index) -- agg = known_aggs[param_index]; -+ agg = &known_aggs[param_index]; - else - agg = NULL; - bool from_global_constant; -@@ -2402,8 +2825,7 @@ ipa_get_indirect_edge_target_1 (struct c - if (!t && known_aggs.length () > (unsigned int) param_index - && !ie->indirect_info->by_ref) - { -- struct ipa_agg_jump_function *agg; -- agg = known_aggs[param_index]; -+ struct ipa_agg_value_set *agg = &known_aggs[param_index]; - t = ipa_find_agg_cst_for_param (agg, known_csts[param_index], - ie->indirect_info->offset, true); - } -@@ -2526,7 +2948,7 @@ tree - ipa_get_indirect_edge_target (struct cgraph_edge *ie, - vec known_csts, - vec known_contexts, -- vec known_aggs, -+ vec known_aggs, - bool *speculative) - { - return ipa_get_indirect_edge_target_1 (ie, known_csts, known_contexts, -@@ -2540,7 +2962,7 @@ static int - devirtualization_time_bonus (struct cgraph_node *node, - vec known_csts, - vec known_contexts, -- vec known_aggs) -+ vec known_aggs) - { - struct cgraph_edge *ie; - int res = 0; -@@ -2548,7 +2970,7 @@ devirtualization_time_bonus (struct cgra - for (ie = node->indirect_calls; ie; ie = ie->next_callee) - { - struct cgraph_node *callee; -- struct ipa_fn_summary *isummary; -+ class ipa_fn_summary *isummary; - enum availability avail; - tree target; - bool speculative; -@@ -2570,13 +2992,14 @@ devirtualization_time_bonus (struct cgra - if (!isummary || !isummary->inlinable) - continue; - -+ int size = ipa_size_summaries->get (callee)->size; - /* FIXME: The values below need re-considering and perhaps also - integrating into the cost metrics, at lest in some very basic way. */ -- if (isummary->size <= MAX_INLINE_INSNS_AUTO / 4) -+ if (size <= MAX_INLINE_INSNS_AUTO / 4) - res += 31 / ((int)speculative + 1); -- else if (isummary->size <= MAX_INLINE_INSNS_AUTO / 2) -+ else if (size <= MAX_INLINE_INSNS_AUTO / 2) - res += 15 / ((int)speculative + 1); -- else if (isummary->size <= MAX_INLINE_INSNS_AUTO -+ else if (size <= MAX_INLINE_INSNS_AUTO - || DECL_DECLARED_INLINE_P (callee->decl)) - res += 7 / ((int)speculative + 1); - } -@@ -2601,7 +3024,7 @@ hint_time_bonus (ipa_hints hints) - static inline int64_t - incorporate_penalties (ipa_node_params *info, int64_t evaluation) - { -- if (info->node_within_scc) -+ if (info->node_within_scc && !info->node_is_self_scc) - evaluation = (evaluation - * (100 - PARAM_VALUE (PARAM_IPA_CP_RECURSION_PENALTY))) / 100; - -@@ -2628,7 +3051,7 @@ good_cloning_opportunity_p (struct cgrap - - gcc_assert (size_cost > 0); - -- struct ipa_node_params *info = IPA_NODE_REF (node); -+ class ipa_node_params *info = IPA_NODE_REF (node); - if (max_count > profile_count::zero ()) - { - int factor = RDIV (count_sum.probability_in -@@ -2645,7 +3068,8 @@ good_cloning_opportunity_p (struct cgrap - count_sum.dump (dump_file); - fprintf (dump_file, "%s%s) -> evaluation: " "%" PRId64 - ", threshold: %i\n", -- info->node_within_scc ? ", scc" : "", -+ info->node_within_scc -+ ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "", - info->node_calling_single_call ? ", single_call" : "", - evaluation, PARAM_VALUE (PARAM_IPA_CP_EVAL_THRESHOLD)); - } -@@ -2663,7 +3087,8 @@ good_cloning_opportunity_p (struct cgrap - "size: %i, freq_sum: %i%s%s) -> evaluation: " - "%" PRId64 ", threshold: %i\n", - time_benefit, size_cost, freq_sum, -- info->node_within_scc ? ", scc" : "", -+ info->node_within_scc -+ ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "", - info->node_calling_single_call ? ", single_call" : "", - evaluation, PARAM_VALUE (PARAM_IPA_CP_EVAL_THRESHOLD)); - -@@ -2674,25 +3099,25 @@ good_cloning_opportunity_p (struct cgrap - /* Return all context independent values from aggregate lattices in PLATS in a - vector. Return NULL if there are none. */ - --static vec * --context_independent_aggregate_values (struct ipcp_param_lattices *plats) -+static vec -+context_independent_aggregate_values (class ipcp_param_lattices *plats) - { -- vec *res = NULL; -+ vec res = vNULL; - - if (plats->aggs_bottom - || plats->aggs_contain_variable - || plats->aggs_count == 0) -- return NULL; -+ return vNULL; - - for (struct ipcp_agg_lattice *aglat = plats->aggs; - aglat; - aglat = aglat->next) - if (aglat->is_single_const ()) - { -- struct ipa_agg_jf_item item; -+ struct ipa_agg_value item; - item.offset = aglat->offset; - item.value = aglat->values->value; -- vec_safe_push (res, item); -+ res.safe_push (item); - } - return res; - } -@@ -2704,11 +3129,11 @@ context_independent_aggregate_values (st - it. */ - - static bool --gather_context_independent_values (struct ipa_node_params *info, -+gather_context_independent_values (class ipa_node_params *info, - vec *known_csts, - vec - *known_contexts, -- vec *known_aggs, -+ vec *known_aggs, - int *removable_params_cost) - { - int i, count = ipa_get_param_count (info); -@@ -2729,7 +3154,7 @@ gather_context_independent_values (struc - - for (i = 0; i < count; i++) - { -- struct ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); -+ class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); - ipcp_lattice *lat = &plats->itself; - - if (lat->is_single_const ()) -@@ -2758,40 +3183,20 @@ gather_context_independent_values (struc - - if (known_aggs) - { -- vec *agg_items; -- struct ipa_agg_jump_function *ajf; -+ vec agg_items; -+ struct ipa_agg_value_set *agg; - - agg_items = context_independent_aggregate_values (plats); -- ajf = &(*known_aggs)[i]; -- ajf->items = agg_items; -- ajf->by_ref = plats->aggs_by_ref; -- ret |= agg_items != NULL; -+ agg = &(*known_aggs)[i]; -+ agg->items = agg_items; -+ agg->by_ref = plats->aggs_by_ref; -+ ret |= !agg_items.is_empty (); - } - } - - return ret; - } - --/* The current interface in ipa-inline-analysis requires a pointer vector. -- Create it. -- -- FIXME: That interface should be re-worked, this is slightly silly. Still, -- I'd like to discuss how to change it first and this demonstrates the -- issue. */ -- --static vec --agg_jmp_p_vec_for_t_vec (vec known_aggs) --{ -- vec ret; -- struct ipa_agg_jump_function *ajf; -- int i; -- -- ret.create (known_aggs.length ()); -- FOR_EACH_VEC_ELT (known_aggs, i, ajf) -- ret.quick_push (ajf); -- return ret; --} -- - /* Perform time and size measurement of NODE with the context given in - KNOWN_CSTS, KNOWN_CONTEXTS and KNOWN_AGGS, calculate the benefit and cost - given BASE_TIME of the node without specialization, REMOVABLE_PARAMS_COST of -@@ -2801,7 +3206,7 @@ agg_jmp_p_vec_for_t_vec (vec known_csts, - vec known_contexts, -- vec known_aggs_ptrs, -+ vec known_aggs, - int removable_params_cost, - int est_move_cost, ipcp_value_base *val) - { -@@ -2810,7 +3215,7 @@ perform_estimation_of_a_value (cgraph_no - ipa_hints hints; - - estimate_ipcp_clone_size_and_time (node, known_csts, known_contexts, -- known_aggs_ptrs, &size, &time, -+ known_aggs, &size, &time, - &base_time, &hints); - base_time -= time; - if (base_time > 65535) -@@ -2824,7 +3229,7 @@ perform_estimation_of_a_value (cgraph_no - else - time_benefit = base_time.to_int () - + devirtualization_time_bonus (node, known_csts, known_contexts, -- known_aggs_ptrs) -+ known_aggs) - + hint_time_bonus (hints) - + removable_params_cost + est_move_cost; - -@@ -2846,12 +3251,11 @@ perform_estimation_of_a_value (cgraph_no - static void - estimate_local_effects (struct cgraph_node *node) - { -- struct ipa_node_params *info = IPA_NODE_REF (node); -+ class ipa_node_params *info = IPA_NODE_REF (node); - int i, count = ipa_get_param_count (info); - vec known_csts; - vec known_contexts; -- vec known_aggs; -- vec known_aggs_ptrs; -+ vec known_aggs; - bool always_const; - int removable_params_cost; - -@@ -2864,9 +3268,8 @@ estimate_local_effects (struct cgraph_no - always_const = gather_context_independent_values (info, &known_csts, - &known_contexts, &known_aggs, - &removable_params_cost); -- known_aggs_ptrs = agg_jmp_p_vec_for_t_vec (known_aggs); - int devirt_bonus = devirtualization_time_bonus (node, known_csts, -- known_contexts, known_aggs_ptrs); -+ known_contexts, known_aggs); - if (always_const || devirt_bonus - || (removable_params_cost && node->local.can_change_signature)) - { -@@ -2879,7 +3282,7 @@ estimate_local_effects (struct cgraph_no - node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats, - false); - estimate_ipcp_clone_size_and_time (node, known_csts, known_contexts, -- known_aggs_ptrs, &size, &time, -+ known_aggs, &size, &time, - &base_time, &hints); - time -= devirt_bonus; - time -= hint_time_bonus (hints); -@@ -2926,7 +3329,7 @@ estimate_local_effects (struct cgraph_no - - for (i = 0; i < count; i++) - { -- struct ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); -+ class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); - ipcp_lattice *lat = &plats->itself; - ipcp_value *val; - -@@ -2942,7 +3345,7 @@ estimate_local_effects (struct cgraph_no - - int emc = estimate_move_cost (TREE_TYPE (val->value), true); - perform_estimation_of_a_value (node, known_csts, known_contexts, -- known_aggs_ptrs, -+ known_aggs, - removable_params_cost, emc, val); - - if (dump_file && (dump_flags & TDF_DETAILS)) -@@ -2960,7 +3363,7 @@ estimate_local_effects (struct cgraph_no - - for (i = 0; i < count; i++) - { -- struct ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); -+ class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); - - if (!plats->virt_call) - continue; -@@ -2977,7 +3380,7 @@ estimate_local_effects (struct cgraph_no - { - known_contexts[i] = val->value; - perform_estimation_of_a_value (node, known_csts, known_contexts, -- known_aggs_ptrs, -+ known_aggs, - removable_params_cost, 0, val); - - if (dump_file && (dump_flags & TDF_DETAILS)) -@@ -2995,14 +3398,14 @@ estimate_local_effects (struct cgraph_no - - for (i = 0; i < count; i++) - { -- struct ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); -- struct ipa_agg_jump_function *ajf; -+ class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); -+ struct ipa_agg_value_set *agg; - struct ipcp_agg_lattice *aglat; - - if (plats->aggs_bottom || !plats->aggs) - continue; - -- ajf = &known_aggs[i]; -+ agg = &known_aggs[i]; - for (aglat = plats->aggs; aglat; aglat = aglat->next) - { - ipcp_value *val; -@@ -3014,14 +3417,14 @@ estimate_local_effects (struct cgraph_no - - for (val = aglat->values; val; val = val->next) - { -- struct ipa_agg_jf_item item; -+ struct ipa_agg_value item; - - item.offset = aglat->offset; - item.value = val->value; -- vec_safe_push (ajf->items, item); -+ agg->items.safe_push (item); - - perform_estimation_of_a_value (node, known_csts, known_contexts, -- known_aggs_ptrs, -+ known_aggs, - removable_params_cost, 0, val); - - if (dump_file && (dump_flags & TDF_DETAILS)) -@@ -3037,18 +3440,14 @@ estimate_local_effects (struct cgraph_no - val->local_time_benefit, val->local_size_cost); - } - -- ajf->items->pop (); -+ agg->items.pop (); - } - } - } - -- for (i = 0; i < count; i++) -- vec_free (known_aggs[i].items); -- - known_csts.release (); - known_contexts.release (); -- known_aggs.release (); -- known_aggs_ptrs.release (); -+ ipa_release_agg_values (known_aggs); - } - - -@@ -3112,12 +3511,12 @@ value_topo_info::add_val (ipcp_ - static void - add_all_node_vals_to_toposort (cgraph_node *node, ipa_topo_info *topo) - { -- struct ipa_node_params *info = IPA_NODE_REF (node); -+ class ipa_node_params *info = IPA_NODE_REF (node); - int i, count = ipa_get_param_count (info); - - for (i = 0; i < count; i++) - { -- struct ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); -+ class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); - ipcp_lattice *lat = &plats->itself; - struct ipcp_agg_lattice *aglat; - -@@ -3152,7 +3551,7 @@ add_all_node_vals_to_toposort (cgraph_no - connected components. */ - - static void --propagate_constants_topo (struct ipa_topo_info *topo) -+propagate_constants_topo (class ipa_topo_info *topo) - { - int i; - -@@ -3166,20 +3565,46 @@ propagate_constants_topo (struct ipa_top - until all lattices stabilize. */ - FOR_EACH_VEC_ELT (cycle_nodes, j, v) - if (v->has_gimple_body_p ()) -- push_node_to_stack (topo, v); -+ { -+ if (opt_for_fn (v->decl, flag_ipa_cp)) -+ push_node_to_stack (topo, v); -+ /* When V is not optimized, we can not push it to stac, but -+ still we need to set all its callees lattices to bottom. */ -+ else -+ { -+ for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee) -+ propagate_constants_across_call (cs); -+ } -+ } - - v = pop_node_from_stack (topo); - while (v) - { - struct cgraph_edge *cs; -+ class ipa_node_params *info = NULL; -+ bool self_scc = true; - - for (cs = v->callees; cs; cs = cs->next_callee) - if (ipa_edge_within_scc (cs)) - { -- IPA_NODE_REF (v)->node_within_scc = true; -+ cgraph_node *callee = cs->callee->function_symbol (); -+ -+ if (v != callee) -+ self_scc = false; -+ -+ if (!info) -+ { -+ info = IPA_NODE_REF (v); -+ info->node_within_scc = true; -+ } -+ - if (propagate_constants_across_call (cs)) -- push_node_to_stack (topo, cs->callee->function_symbol ()); -+ push_node_to_stack (topo, callee); - } -+ -+ if (info) -+ info->node_is_self_scc = self_scc; -+ - v = pop_node_from_stack (topo); - } - -@@ -3187,7 +3612,8 @@ propagate_constants_topo (struct ipa_top - the local effects of the discovered constants and all valid values to - their topological sort. */ - FOR_EACH_VEC_ELT (cycle_nodes, j, v) -- if (v->has_gimple_body_p ()) -+ if (v->has_gimple_body_p () -+ && opt_for_fn (v->decl, flag_ipa_cp)) - { - struct cgraph_edge *cs; - -@@ -3255,7 +3681,7 @@ value_topo_info::propagate_effe - summaries interprocedurally. */ - - static void --ipcp_propagate_stage (struct ipa_topo_info *topo) -+ipcp_propagate_stage (class ipa_topo_info *topo) - { - struct cgraph_node *node; - -@@ -3266,16 +3692,15 @@ ipcp_propagate_stage (struct ipa_topo_in - - FOR_EACH_DEFINED_FUNCTION (node) - { -- struct ipa_node_params *info = IPA_NODE_REF (node); -- -- determine_versionability (node, info); -- if (node->has_gimple_body_p ()) -+ if (node->has_gimple_body_p () && opt_for_fn (node->decl, flag_ipa_cp)) - { -- info->lattices = XCNEWVEC (struct ipcp_param_lattices, -+ class ipa_node_params *info = IPA_NODE_REF (node); -+ determine_versionability (node, info); -+ info->lattices = XCNEWVEC (class ipcp_param_lattices, - ipa_get_param_count (info)); - initialize_node_lattices (node); - } -- ipa_fn_summary *s = ipa_fn_summaries->get (node); -+ ipa_size_summary *s = ipa_size_summaries->get (node); - if (node->definition && !node->alias && s != NULL) - overall_size += s->self_size; - max_count = max_count.max (node->count.ipa ()); -@@ -3335,7 +3760,7 @@ ipcp_discover_new_direct_edges (struct c - - if (cs && !agg_contents && !polymorphic) - { -- struct ipa_node_params *info = IPA_NODE_REF (node); -+ class ipa_node_params *info = IPA_NODE_REF (node); - int c = ipa_get_controlled_uses (info, param_index); - if (c != IPA_UNDESCRIBED_USE) - { -@@ -3415,26 +3840,6 @@ edge_clone_summary_t::duplicate (cgraph_ - src_data->next_clone = dst_edge; - } - --/* See if NODE is a clone with a known aggregate value at a given OFFSET of a -- parameter with the given INDEX. */ -- --static tree --get_clone_agg_value (struct cgraph_node *node, HOST_WIDE_INT offset, -- int index) --{ -- struct ipa_agg_replacement_value *aggval; -- -- aggval = ipa_get_agg_replacements_for_node (node); -- while (aggval) -- { -- if (aggval->offset == offset -- && aggval->index == index) -- return aggval->value; -- aggval = aggval->next; -- } -- return NULL_TREE; --} -- - /* Return true is NODE is DEST or its clone for all contexts. */ - - static bool -@@ -3443,7 +3848,7 @@ same_node_or_its_all_contexts_clone_p (c - if (node == dest) - return true; - -- struct ipa_node_params *info = IPA_NODE_REF (node); -+ class ipa_node_params *info = IPA_NODE_REF (node); - return info->is_all_contexts_clone && info->ipcp_orig_node == dest; - } - -@@ -3454,12 +3859,12 @@ static bool - cgraph_edge_brings_value_p (cgraph_edge *cs, ipcp_value_source *src, - cgraph_node *dest, ipcp_value *dest_val) - { -- struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); -+ class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); - enum availability availability; - cgraph_node *real_dest = cs->callee->function_symbol (&availability); - -- if (!same_node_or_its_all_contexts_clone_p (real_dest, dest) -- || availability <= AVAIL_INTERPOSABLE -+ if (availability <= AVAIL_INTERPOSABLE -+ || !same_node_or_its_all_contexts_clone_p (real_dest, dest) - || caller_info->node_dead) - return false; - -@@ -3485,7 +3890,7 @@ cgraph_edge_brings_value_p (cgraph_edge - return true; - - struct ipcp_agg_lattice *aglat; -- struct ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info, -+ class ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info, - src->index); - if (src->offset == -1) - return (plats->itself.is_single_const () -@@ -3514,10 +3919,12 @@ cgraph_edge_brings_value_p (cgraph_edge - cgraph_node *dest, - ipcp_value *) - { -- struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); -- cgraph_node *real_dest = cs->callee->function_symbol (); -+ class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); -+ enum availability avail; -+ cgraph_node *real_dest = cs->callee->function_symbol (&avail); - -- if (!same_node_or_its_all_contexts_clone_p (real_dest, dest) -+ if (avail <= AVAIL_INTERPOSABLE -+ || !same_node_or_its_all_contexts_clone_p (real_dest, dest) - || caller_info->node_dead) - return false; - if (!src->val) -@@ -3528,7 +3935,7 @@ cgraph_edge_brings_value_p (cgraph_edge - && values_equal_for_ipcp_p (src->val->value, - caller_info->known_contexts[src->index]); - -- struct ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info, -+ class ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info, - src->index); - return plats->ctxlat.is_single_const () - && values_equal_for_ipcp_p (src->val->value, -@@ -3575,6 +3982,9 @@ get_info_about_necessary_edges (ipcp_val - hot |= cs->maybe_hot_p (); - if (cs->caller != dest) - non_self_recursive = true; -+ else if (src->val) -+ gcc_assert (values_equal_for_ipcp_p (src->val->value, -+ val->value)); - } - cs = get_next_cgraph_edge_clone (cs); - } -@@ -3588,6 +3998,19 @@ get_info_about_necessary_edges (ipcp_val - *freq_sum = freq; - *count_sum = cnt; - *caller_count = count; -+ -+ if (!hot && IPA_NODE_REF (dest)->node_within_scc) -+ { -+ struct cgraph_edge *cs; -+ -+ /* Cold non-SCC source edge could trigger hot recursive execution of -+ function. Consider the case as hot and rely on following cost model -+ computation to further select right one. */ -+ for (cs = dest->callers; cs; cs = cs->next_caller) -+ if (cs->caller == dest && cs->maybe_hot_p ()) -+ return true; -+ } -+ - return hot; - } - -@@ -3621,7 +4044,7 @@ gather_edges_for_value (ipcp_value callers) - { -- struct ipa_node_params *new_info, *info = IPA_NODE_REF (node); -+ class ipa_node_params *new_info, *info = IPA_NODE_REF (node); - vec *replace_trees = NULL; - struct ipa_agg_replacement_value *av; - struct cgraph_node *new_node; -@@ -3891,6 +4314,7 @@ create_specialized_node (struct cgraph_n - update_profiling_info (node, new_node); - new_info = IPA_NODE_REF (new_node); - new_info->ipcp_orig_node = node; -+ new_node->ipcp_clone = true; - new_info->known_csts = known_csts; - new_info->known_contexts = known_contexts; - -@@ -3924,7 +4348,7 @@ find_more_scalar_values_for_callers_subs - vec known_csts, - vec callers) - { -- struct ipa_node_params *info = IPA_NODE_REF (node); -+ class ipa_node_params *info = IPA_NODE_REF (node); - int i, count = ipa_get_param_count (info); - - for (i = 0; i < count; i++) -@@ -3946,7 +4370,8 @@ find_more_scalar_values_for_callers_subs - if (IPA_NODE_REF (cs->caller)->node_dead) - continue; - -- if (i >= ipa_get_cs_argument_count (IPA_EDGE_REF (cs)) -+ if (!IPA_EDGE_REF (cs) -+ || i >= ipa_get_cs_argument_count (IPA_EDGE_REF (cs)) - || (i == 0 - && call_passes_through_thunk_p (cs))) - { -@@ -4015,7 +4440,8 @@ find_more_contexts_for_caller_subset (cg - - FOR_EACH_VEC_ELT (callers, j, cs) - { -- if (i >= ipa_get_cs_argument_count (IPA_EDGE_REF (cs))) -+ if (!IPA_EDGE_REF (cs) -+ || i >= ipa_get_cs_argument_count (IPA_EDGE_REF (cs))) - return; - ipa_jump_func *jfunc = ipa_get_ith_jump_func (IPA_EDGE_REF (cs), - i); -@@ -4056,10 +4482,10 @@ find_more_contexts_for_caller_subset (cg - /* Go through PLATS and create a vector of values consisting of values and - offsets (minus OFFSET) of lattices that contain only a single value. */ - --static vec --copy_plats_to_inter (struct ipcp_param_lattices *plats, HOST_WIDE_INT offset) -+static vec -+copy_plats_to_inter (class ipcp_param_lattices *plats, HOST_WIDE_INT offset) - { -- vec res = vNULL; -+ vec res = vNULL; - - if (!plats->aggs || plats->aggs_contain_variable || plats->aggs_bottom) - return vNULL; -@@ -4067,7 +4493,7 @@ copy_plats_to_inter (struct ipcp_param_l - for (struct ipcp_agg_lattice *aglat = plats->aggs; aglat; aglat = aglat->next) - if (aglat->is_single_const ()) - { -- struct ipa_agg_jf_item ti; -+ struct ipa_agg_value ti; - ti.offset = aglat->offset - offset; - ti.value = aglat->values->value; - res.safe_push (ti); -@@ -4079,12 +4505,12 @@ copy_plats_to_inter (struct ipcp_param_l - subtracting OFFSET). */ - - static void --intersect_with_plats (struct ipcp_param_lattices *plats, -- vec *inter, -+intersect_with_plats (class ipcp_param_lattices *plats, -+ vec *inter, - HOST_WIDE_INT offset) - { - struct ipcp_agg_lattice *aglat; -- struct ipa_agg_jf_item *item; -+ struct ipa_agg_value *item; - int k; - - if (!plats->aggs || plats->aggs_contain_variable || plats->aggs_bottom) -@@ -4122,18 +4548,18 @@ intersect_with_plats (struct ipcp_param_ - /* Copy aggregate replacement values of NODE (which is an IPA-CP clone) to the - vector result while subtracting OFFSET from the individual value offsets. */ - --static vec -+static vec - agg_replacements_to_vector (struct cgraph_node *node, int index, - HOST_WIDE_INT offset) - { - struct ipa_agg_replacement_value *av; -- vec res = vNULL; -+ vec res = vNULL; - - for (av = ipa_get_agg_replacements_for_node (node); av; av = av->next) - if (av->index == index - && (av->offset - offset) >= 0) - { -- struct ipa_agg_jf_item item; -+ struct ipa_agg_value item; - gcc_checking_assert (av->value); - item.offset = av->offset - offset; - item.value = av->value; -@@ -4149,11 +4575,11 @@ agg_replacements_to_vector (struct cgrap - - static void - intersect_with_agg_replacements (struct cgraph_node *node, int index, -- vec *inter, -+ vec *inter, - HOST_WIDE_INT offset) - { - struct ipa_agg_replacement_value *srcvals; -- struct ipa_agg_jf_item *item; -+ struct ipa_agg_value *item; - int i; - - srcvals = ipa_get_agg_replacements_for_node (node); -@@ -4190,22 +4616,22 @@ intersect_with_agg_replacements (struct - copy all incoming values to it. If we determine we ended up with no values - whatsoever, return a released vector. */ - --static vec -+static vec - intersect_aggregates_with_edge (struct cgraph_edge *cs, int index, -- vec inter) -+ vec inter) - { - struct ipa_jump_func *jfunc; - jfunc = ipa_get_ith_jump_func (IPA_EDGE_REF (cs), index); - if (jfunc->type == IPA_JF_PASS_THROUGH - && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR) - { -- struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); -+ class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); - int src_idx = ipa_get_jf_pass_through_formal_id (jfunc); - - if (caller_info->ipcp_orig_node) - { - struct cgraph_node *orig_node = caller_info->ipcp_orig_node; -- struct ipcp_param_lattices *orig_plats; -+ class ipcp_param_lattices *orig_plats; - orig_plats = ipa_get_parm_lattices (IPA_NODE_REF (orig_node), - src_idx); - if (agg_pass_through_permissible_p (orig_plats, jfunc)) -@@ -4224,7 +4650,7 @@ intersect_aggregates_with_edge (struct c - } - else - { -- struct ipcp_param_lattices *src_plats; -+ class ipcp_param_lattices *src_plats; - src_plats = ipa_get_parm_lattices (caller_info, src_idx); - if (agg_pass_through_permissible_p (src_plats, jfunc)) - { -@@ -4246,9 +4672,9 @@ intersect_aggregates_with_edge (struct c - else if (jfunc->type == IPA_JF_ANCESTOR - && ipa_get_jf_ancestor_agg_preserved (jfunc)) - { -- struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); -+ class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); - int src_idx = ipa_get_jf_ancestor_formal_id (jfunc); -- struct ipcp_param_lattices *src_plats; -+ class ipcp_param_lattices *src_plats; - HOST_WIDE_INT delta = ipa_get_jf_ancestor_offset (jfunc); - - if (caller_info->ipcp_orig_node) -@@ -4273,12 +4699,26 @@ intersect_aggregates_with_edge (struct c - } - else if (jfunc->agg.items) - { -- struct ipa_agg_jf_item *item; -+ class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller); -+ struct ipa_agg_value *item; - int k; - - if (!inter.exists ()) - for (unsigned i = 0; i < jfunc->agg.items->length (); i++) -- inter.safe_push ((*jfunc->agg.items)[i]); -+ { -+ struct ipa_agg_jf_item *agg_item = &(*jfunc->agg.items)[i]; -+ tree value = ipa_agg_value_from_node (caller_info, cs->caller, -+ agg_item); -+ if (value) -+ { -+ struct ipa_agg_value agg_value; -+ -+ agg_value.offset = agg_item->offset; -+ agg_value.value = value; -+ -+ inter.safe_push (agg_value); -+ } -+ } - else - FOR_EACH_VEC_ELT (inter, k, item) - { -@@ -4296,9 +4736,10 @@ intersect_aggregates_with_edge (struct c - break; - if (ti->offset == item->offset) - { -- gcc_checking_assert (ti->value); -- if (values_equal_for_ipcp_p (item->value, -- ti->value)) -+ tree value = ipa_agg_value_from_node (caller_info, -+ cs->caller, ti); -+ if (value -+ && values_equal_for_ipcp_p (item->value, value)) - found = true; - break; - } -@@ -4311,7 +4752,7 @@ intersect_aggregates_with_edge (struct c - else - { - inter.release (); -- return vec(); -+ return vNULL; - } - return inter; - } -@@ -4323,7 +4764,7 @@ static struct ipa_agg_replacement_value - find_aggregate_values_for_callers_subset (struct cgraph_node *node, - vec callers) - { -- struct ipa_node_params *dest_info = IPA_NODE_REF (node); -+ class ipa_node_params *dest_info = IPA_NODE_REF (node); - struct ipa_agg_replacement_value *res; - struct ipa_agg_replacement_value **tail = &res; - struct cgraph_edge *cs; -@@ -4331,6 +4772,11 @@ find_aggregate_values_for_callers_subset - - FOR_EACH_VEC_ELT (callers, j, cs) - { -+ if (!IPA_EDGE_REF (cs)) -+ { -+ count = 0; -+ break; -+ } - int c = ipa_get_cs_argument_count (IPA_EDGE_REF (cs)); - if (c < count) - count = c; -@@ -4339,9 +4785,9 @@ find_aggregate_values_for_callers_subset - for (i = 0; i < count; i++) - { - struct cgraph_edge *cs; -- vec inter = vNULL; -- struct ipa_agg_jf_item *item; -- struct ipcp_param_lattices *plats = ipa_get_parm_lattices (dest_info, i); -+ vec inter = vNULL; -+ struct ipa_agg_value *item; -+ class ipcp_param_lattices *plats = ipa_get_parm_lattices (dest_info, i); - int j; - - /* Among other things, the following check should deal with all by_ref -@@ -4394,10 +4840,10 @@ static bool - cgraph_edge_brings_all_scalars_for_node (struct cgraph_edge *cs, - struct cgraph_node *node) - { -- struct ipa_node_params *dest_info = IPA_NODE_REF (node); -+ class ipa_node_params *dest_info = IPA_NODE_REF (node); - int count = ipa_get_param_count (dest_info); -- struct ipa_node_params *caller_info; -- struct ipa_edge_args *args; -+ class ipa_node_params *caller_info; -+ class ipa_edge_args *args; - int i; - - caller_info = IPA_NODE_REF (cs->caller); -@@ -4428,8 +4874,7 @@ static bool - cgraph_edge_brings_all_agg_vals_for_node (struct cgraph_edge *cs, - struct cgraph_node *node) - { -- struct ipa_node_params *orig_caller_info = IPA_NODE_REF (cs->caller); -- struct ipa_node_params *orig_node_info; -+ class ipa_node_params *orig_node_info; - struct ipa_agg_replacement_value *aggval; - int i, ec, count; - -@@ -4445,12 +4890,10 @@ cgraph_edge_brings_all_agg_vals_for_node - return false; - - orig_node_info = IPA_NODE_REF (IPA_NODE_REF (node)->ipcp_orig_node); -- if (orig_caller_info->ipcp_orig_node) -- orig_caller_info = IPA_NODE_REF (orig_caller_info->ipcp_orig_node); - - for (i = 0; i < count; i++) - { -- struct ipcp_param_lattices *plats; -+ class ipcp_param_lattices *plats; - bool interesting = false; - for (struct ipa_agg_replacement_value *av = aggval; av; av = av->next) - if (aggval->index == i) -@@ -4465,15 +4908,14 @@ cgraph_edge_brings_all_agg_vals_for_node - if (plats->aggs_bottom) - return false; - -- vec values -- = intersect_aggregates_with_edge (cs, i, vNULL); -+ vec values = intersect_aggregates_with_edge (cs, i, vNULL); - if (!values.exists ()) - return false; - - for (struct ipa_agg_replacement_value *av = aggval; av; av = av->next) - if (aggval->index == i) - { -- struct ipa_agg_jf_item *item; -+ struct ipa_agg_value *item; - int j; - bool found = false; - FOR_EACH_VEC_ELT (values, j, item) -@@ -4708,11 +5150,10 @@ decide_about_value (struct cgraph_node * - static bool - decide_whether_version_node (struct cgraph_node *node) - { -- struct ipa_node_params *info = IPA_NODE_REF (node); -+ class ipa_node_params *info = IPA_NODE_REF (node); - int i, count = ipa_get_param_count (info); - vec known_csts; - vec known_contexts; -- vec known_aggs = vNULL; - bool ret = false; - - if (count == 0) -@@ -4723,12 +5164,11 @@ decide_whether_version_node (struct cgra - node->dump_name ()); - - gather_context_independent_values (info, &known_csts, &known_contexts, -- info->do_clone_for_all_contexts ? &known_aggs -- : NULL, NULL); -+ NULL, NULL); - - for (i = 0; i < count;i++) - { -- struct ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); -+ class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i); - ipcp_lattice *lat = &plats->itself; - ipcp_lattice *ctxlat = &plats->ctxlat; - -@@ -4793,9 +5233,6 @@ decide_whether_version_node (struct cgra - info = IPA_NODE_REF (node); - info->do_clone_for_all_contexts = false; - IPA_NODE_REF (clone)->is_all_contexts_clone = true; -- for (i = 0; i < count; i++) -- vec_free (known_aggs[i].items); -- known_aggs.release (); - ret = true; - } - else -@@ -4818,7 +5255,7 @@ spread_undeadness (struct cgraph_node *n - if (ipa_edge_within_scc (cs)) - { - struct cgraph_node *callee; -- struct ipa_node_params *info; -+ class ipa_node_params *info; - - callee = cs->callee->function_symbol (NULL); - info = IPA_NODE_REF (callee); -@@ -4881,7 +5318,7 @@ identify_dead_nodes (struct cgraph_node - TOPO and make specialized clones if deemed beneficial. */ - - static void --ipcp_decision_stage (struct ipa_topo_info *topo) -+ipcp_decision_stage (class ipa_topo_info *topo) - { - int i; - -@@ -4923,7 +5360,7 @@ ipcp_store_bits_results (void) - bool dumped_sth = false; - bool found_useful_result = false; - -- if (!opt_for_fn (node->decl, flag_ipa_bit_cp)) -+ if (!opt_for_fn (node->decl, flag_ipa_bit_cp) || !info) - { - if (dump_file) - fprintf (dump_file, "Not considering %s for ipa bitwise propagation " -@@ -5055,7 +5492,7 @@ ipcp_store_vr_results (void) - static unsigned int - ipcp_driver (void) - { -- struct ipa_topo_info topo; -+ class ipa_topo_info topo; - - if (edge_clone_summaries == NULL) - edge_clone_summaries = new edge_clone_summary_t (symtab); -diff -Nurp a/gcc/ipa-devirt.c b/gcc/ipa-devirt.c ---- a/gcc/ipa-devirt.c 2020-04-30 15:14:04.624000000 +0800 -+++ b/gcc/ipa-devirt.c 2020-04-30 15:14:56.624000000 +0800 -@@ -172,6 +172,11 @@ struct default_hash_traits - } - }; - -+/* HACK alert: this is used to communicate with ipa-inline-transform that -+ thunk is being expanded and there is no need to clear the polymorphic -+ call target cache. */ -+bool thunk_expansion; -+ - static bool odr_types_equivalent_p (tree, tree, bool, bool *, - hash_set *, - location_t, location_t); -@@ -2557,7 +2562,7 @@ maybe_record_node (vec & - || target_node->definition) - && target_node->real_symbol_p ()) - { -- gcc_assert (!target_node->global.inlined_to); -+ gcc_assert (!target_node->inlined_to); - gcc_assert (target_node->real_symbol_p ()); - /* When sanitizing, do not assume that __cxa_pure_virtual is not called - by valid program. */ -@@ -2892,6 +2897,7 @@ static void - devirt_node_removal_hook (struct cgraph_node *n, void *d ATTRIBUTE_UNUSED) - { - if (cached_polymorphic_call_targets -+ && !thunk_expansion - && cached_polymorphic_call_targets->contains (n)) - free_polymorphic_call_targets_hash (); - } -diff -Nurp a/gcc/ipa-fnsummary.c b/gcc/ipa-fnsummary.c ---- a/gcc/ipa-fnsummary.c 2020-04-30 15:14:04.568000000 +0800 -+++ b/gcc/ipa-fnsummary.c 2020-04-30 15:14:56.664000000 +0800 -@@ -86,6 +86,7 @@ along with GCC; see the file COPYING3. - - /* Summaries. */ - fast_function_summary *ipa_fn_summaries; -+fast_function_summary *ipa_size_summaries; - fast_call_summary *ipa_call_summaries; - - /* Edge predicates goes here. */ -@@ -207,7 +208,7 @@ ipa_fn_summary::account_size_time (int s - } - if (!found) - { -- struct size_time_entry new_entry; -+ class size_time_entry new_entry; - new_entry.size = size; - new_entry.time = time; - new_entry.exec_predicate = exec_pred; -@@ -236,7 +237,7 @@ redirect_to_unreachable (struct cgraph_e - e->make_direct (target); - else - e->redirect_callee (target); -- struct ipa_call_summary *es = ipa_call_summaries->get (e); -+ class ipa_call_summary *es = ipa_call_summaries->get (e); - e->inline_failed = CIF_UNREACHABLE; - e->count = profile_count::zero (); - es->call_stmt_size = 0; -@@ -261,7 +262,7 @@ edge_set_predicate (struct cgraph_edge * - && (!e->speculative || e->callee)) - e = redirect_to_unreachable (e); - -- struct ipa_call_summary *es = ipa_call_summaries->get (e); -+ class ipa_call_summary *es = ipa_call_summaries->get (e); - if (predicate && *predicate != true) - { - if (!es->predicate) -@@ -306,9 +307,9 @@ set_hint_predicate (predicate **p, predi - the fact that parameter is indeed a constant. - - KNOWN_VALS is partial mapping of parameters of NODE to constant values. -- KNOWN_AGGS is a vector of aggreggate jump functions for each parameter. -- Return clause of possible truths. When INLINE_P is true, assume that we are -- inlining. -+ KNOWN_AGGS is a vector of aggreggate known offset/value set for each -+ parameter. Return clause of possible truths. When INLINE_P is true, assume -+ that we are inlining. - - ERROR_MARK means compile time invariant. */ - -@@ -316,14 +317,13 @@ static void - evaluate_conditions_for_known_args (struct cgraph_node *node, - bool inline_p, - vec known_vals, -- vec -- known_aggs, -+ vec known_aggs, - clause_t *ret_clause, - clause_t *ret_nonspec_clause) - { - clause_t clause = inline_p ? 0 : 1 << predicate::not_inlined_condition; - clause_t nonspec_clause = 1 << predicate::not_inlined_condition; -- struct ipa_fn_summary *info = ipa_fn_summaries->get (node); -+ class ipa_fn_summary *info = ipa_fn_summaries->get (node); - int i; - struct condition *c; - -@@ -331,6 +331,8 @@ evaluate_conditions_for_known_args (stru - { - tree val; - tree res; -+ int j; -+ struct expr_eval_op *op; - - /* We allow call stmt to have fewer arguments than the callee function - (especially for K&R style programs). So bound check here (we assume -@@ -347,7 +349,7 @@ evaluate_conditions_for_known_args (stru - - if (c->agg_contents) - { -- struct ipa_agg_jump_function *agg; -+ struct ipa_agg_value_set *agg; - - if (c->code == predicate::changed - && !c->by_ref -@@ -356,7 +358,7 @@ evaluate_conditions_for_known_args (stru - - if (known_aggs.exists ()) - { -- agg = known_aggs[c->operand_num]; -+ agg = &known_aggs[c->operand_num]; - val = ipa_find_agg_cst_for_param (agg, known_vals[c->operand_num], - c->offset, c->by_ref); - } -@@ -382,7 +384,7 @@ evaluate_conditions_for_known_args (stru - continue; - } - -- if (tree_to_shwi (TYPE_SIZE (TREE_TYPE (val))) != c->size) -+ if (TYPE_SIZE (c->type) != TYPE_SIZE (TREE_TYPE (val))) - { - clause |= 1 << (i + predicate::first_dynamic_condition); - nonspec_clause |= 1 << (i + predicate::first_dynamic_condition); -@@ -394,7 +396,30 @@ evaluate_conditions_for_known_args (stru - continue; - } - -- val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (c->val), val); -+ val = fold_unary (VIEW_CONVERT_EXPR, c->type, val); -+ for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++) -+ { -+ if (!val) -+ break; -+ if (!op->val[0]) -+ val = fold_unary (op->code, op->type, val); -+ else if (!op->val[1]) -+ val = fold_binary (op->code, op->type, -+ op->index ? op->val[0] : val, -+ op->index ? val : op->val[0]); -+ else if (op->index == 0) -+ val = fold_ternary (op->code, op->type, -+ val, op->val[0], op->val[1]); -+ else if (op->index == 1) -+ val = fold_ternary (op->code, op->type, -+ op->val[0], val, op->val[1]); -+ else if (op->index == 2) -+ val = fold_ternary (op->code, op->type, -+ op->val[0], op->val[1], val); -+ else -+ val = NULL_TREE; -+ } -+ - res = val - ? fold_binary_to_constant (c->code, boolean_type_node, val, c->val) - : NULL; -@@ -420,12 +445,13 @@ evaluate_properties_for_edge (struct cgr - vec *known_vals_ptr, - vec - *known_contexts_ptr, -- vec *known_aggs_ptr) -+ vec *known_aggs_ptr) - { - struct cgraph_node *callee = e->callee->ultimate_alias_target (); -- struct ipa_fn_summary *info = ipa_fn_summaries->get (callee); -+ class ipa_fn_summary *info = ipa_fn_summaries->get (callee); - vec known_vals = vNULL; -- vec known_aggs = vNULL; -+ vec known_aggs = vNULL; -+ class ipa_edge_args *args; - - if (clause_ptr) - *clause_ptr = inline_p ? 0 : 1 << predicate::not_inlined_condition; -@@ -436,18 +462,20 @@ evaluate_properties_for_edge (struct cgr - - if (ipa_node_params_sum - && !e->call_stmt_cannot_inline_p -- && ((clause_ptr && info->conds) || known_vals_ptr || known_contexts_ptr)) -+ && ((clause_ptr && info->conds) || known_vals_ptr || known_contexts_ptr) -+ && (args = IPA_EDGE_REF (e)) != NULL) - { -- struct ipa_node_params *caller_parms_info, *callee_pi; -- struct ipa_edge_args *args = IPA_EDGE_REF (e); -- struct ipa_call_summary *es = ipa_call_summaries->get (e); -+ struct cgraph_node *caller; -+ class ipa_node_params *caller_parms_info, *callee_pi; -+ class ipa_call_summary *es = ipa_call_summaries->get (e); - int i, count = ipa_get_cs_argument_count (args); - -- if (e->caller->global.inlined_to) -- caller_parms_info = IPA_NODE_REF (e->caller->global.inlined_to); -+ if (e->caller->inlined_to) -+ caller = e->caller->inlined_to; - else -- caller_parms_info = IPA_NODE_REF (e->caller); -- callee_pi = IPA_NODE_REF (e->callee); -+ caller = e->caller; -+ caller_parms_info = IPA_NODE_REF (caller); -+ callee_pi = IPA_NODE_REF (callee); - - if (count && (info->conds || known_vals_ptr)) - known_vals.safe_grow_cleared (count); -@@ -456,36 +484,38 @@ evaluate_properties_for_edge (struct cgr - if (count && known_contexts_ptr) - known_contexts_ptr->safe_grow_cleared (count); - -- for (i = 0; i < count; i++) -- { -- struct ipa_jump_func *jf = ipa_get_ith_jump_func (args, i); -- tree cst = ipa_value_from_jfunc (caller_parms_info, jf, -- ipa_get_type (callee_pi, i)); -- -- if (!cst && e->call_stmt -- && i < (int)gimple_call_num_args (e->call_stmt)) -- { -- cst = gimple_call_arg (e->call_stmt, i); -- if (!is_gimple_min_invariant (cst)) -- cst = NULL; -- } -- if (cst) -- { -- gcc_checking_assert (TREE_CODE (cst) != TREE_BINFO); -- if (known_vals.exists ()) -- known_vals[i] = cst; -- } -- else if (inline_p && !es->param[i].change_prob) -- known_vals[i] = error_mark_node; -- -- if (known_contexts_ptr) -- (*known_contexts_ptr)[i] -- = ipa_context_from_jfunc (caller_parms_info, e, i, jf); -- /* TODO: When IPA-CP starts propagating and merging aggregate jump -- functions, use its knowledge of the caller too, just like the -- scalar case above. */ -- known_aggs[i] = &jf->agg; -- } -+ if (callee_pi) -+ for (i = 0; i < count; i++) -+ { -+ struct ipa_jump_func *jf = ipa_get_ith_jump_func (args, i); -+ tree cst = ipa_value_from_jfunc (caller_parms_info, jf, -+ ipa_get_type (callee_pi, i)); -+ -+ if (!cst && e->call_stmt -+ && i < (int)gimple_call_num_args (e->call_stmt)) -+ { -+ cst = gimple_call_arg (e->call_stmt, i); -+ if (!is_gimple_min_invariant (cst)) -+ cst = NULL; -+ } -+ if (cst) -+ { -+ gcc_checking_assert (TREE_CODE (cst) != TREE_BINFO); -+ if (known_vals.exists ()) -+ known_vals[i] = cst; -+ } -+ else if (inline_p && !es->param[i].change_prob) -+ known_vals[i] = error_mark_node; -+ -+ if (known_contexts_ptr) -+ (*known_contexts_ptr)[i] -+ = ipa_context_from_jfunc (caller_parms_info, e, i, jf); -+ -+ known_aggs[i] = ipa_agg_value_set_from_jfunc (caller_parms_info, -+ caller, &jf->agg); -+ } -+ else -+ gcc_assert (callee->thunk.thunk_p); - } - else if (e->call_stmt && !e->call_stmt_cannot_inline_p - && ((clause_ptr && info->conds) || known_vals_ptr)) -@@ -516,7 +546,7 @@ evaluate_properties_for_edge (struct cgr - if (known_aggs_ptr) - *known_aggs_ptr = known_aggs; - else -- known_aggs.release (); -+ ipa_release_agg_values (known_aggs); - } - - -@@ -527,6 +557,8 @@ ipa_fn_summary_alloc (void) - { - gcc_checking_assert (!ipa_fn_summaries); - ipa_fn_summaries = ipa_fn_summary_t::create_ggc (symtab); -+ ipa_size_summaries = new fast_function_summary -+ (symtab); - ipa_call_summaries = new ipa_call_summary_t (symtab); - } - -@@ -597,7 +629,7 @@ ipa_fn_summary_t::duplicate (cgraph_node - { - vec *entry = info->size_time_table; - /* Use SRC parm info since it may not be copied yet. */ -- struct ipa_node_params *parms_info = IPA_NODE_REF (src); -+ class ipa_node_params *parms_info = IPA_NODE_REF (src); - vec known_vals = vNULL; - int count = ipa_get_param_count (parms_info); - int i, j; -@@ -661,7 +693,7 @@ ipa_fn_summary_t::duplicate (cgraph_node - for (edge = dst->callees; edge; edge = next) - { - predicate new_predicate; -- struct ipa_call_summary *es = ipa_call_summaries->get_create (edge); -+ class ipa_call_summary *es = ipa_call_summaries->get_create (edge); - next = edge->next_callee; - - if (!edge->inline_failed) -@@ -680,7 +712,7 @@ ipa_fn_summary_t::duplicate (cgraph_node - for (edge = dst->indirect_calls; edge; edge = next) - { - predicate new_predicate; -- struct ipa_call_summary *es = ipa_call_summaries->get_create (edge); -+ class ipa_call_summary *es = ipa_call_summaries->get_create (edge); - next = edge->next_callee; - - gcc_checking_assert (edge->inline_failed); -@@ -719,7 +751,7 @@ ipa_fn_summary_t::duplicate (cgraph_node - set_hint_predicate (&info->loop_stride, p); - } - } -- if (!dst->global.inlined_to) -+ if (!dst->inlined_to) - ipa_update_overall_fn_summary (dst); - } - -@@ -729,8 +761,8 @@ ipa_fn_summary_t::duplicate (cgraph_node - void - ipa_call_summary_t::duplicate (struct cgraph_edge *src, - struct cgraph_edge *dst, -- struct ipa_call_summary *srcinfo, -- struct ipa_call_summary *info) -+ class ipa_call_summary *srcinfo, -+ class ipa_call_summary *info) - { - new (info) ipa_call_summary (*srcinfo); - info->predicate = NULL; -@@ -750,12 +782,12 @@ ipa_call_summary_t::duplicate (struct cg - - static void - dump_ipa_call_summary (FILE *f, int indent, struct cgraph_node *node, -- struct ipa_fn_summary *info) -+ class ipa_fn_summary *info) - { - struct cgraph_edge *edge; - for (edge = node->callees; edge; edge = edge->next_callee) - { -- struct ipa_call_summary *es = ipa_call_summaries->get (edge); -+ class ipa_call_summary *es = ipa_call_summaries->get (edge); - struct cgraph_node *callee = edge->callee->ultimate_alias_target (); - int i; - -@@ -768,9 +800,10 @@ dump_ipa_call_summary (FILE *f, int inde - es->call_stmt_size, es->call_stmt_time); - - ipa_fn_summary *s = ipa_fn_summaries->get (callee); -+ ipa_size_summary *ss = ipa_size_summaries->get (callee); - if (s != NULL) -- fprintf (f, "callee size:%2i stack:%2i", -- (int) (s->size / ipa_fn_summary::size_scale), -+ fprintf (f, " callee size:%2i stack:%2i", -+ (int) (ss->size / ipa_fn_summary::size_scale), - (int) s->estimated_stack_size); - - if (es->predicate) -@@ -794,19 +827,17 @@ dump_ipa_call_summary (FILE *f, int inde - } - if (!edge->inline_failed) - { -- ipa_fn_summary *s = ipa_fn_summaries->get (callee); -- fprintf (f, "%*sStack frame offset %i, callee self size %i," -- " callee size %i\n", -+ ipa_size_summary *ss = ipa_size_summaries->get (callee); -+ fprintf (f, "%*sStack frame offset %i, callee self size %i\n", - indent + 2, "", -- (int) s->stack_frame_offset, -- (int) s->estimated_self_stack_size, -- (int) s->estimated_stack_size); -+ (int) ipa_get_stack_frame_offset (callee), -+ (int) ss->estimated_self_stack_size); - dump_ipa_call_summary (f, indent + 2, callee, info); - } - } - for (edge = node->indirect_calls; edge; edge = edge->next_callee) - { -- struct ipa_call_summary *es = ipa_call_summaries->get (edge); -+ class ipa_call_summary *es = ipa_call_summaries->get (edge); - fprintf (f, "%*sindirect call loop depth:%2i freq:%4.2f size:%2i" - " time: %2i", - indent, "", -@@ -829,7 +860,8 @@ ipa_dump_fn_summary (FILE *f, struct cgr - { - if (node->definition) - { -- struct ipa_fn_summary *s = ipa_fn_summaries->get (node); -+ class ipa_fn_summary *s = ipa_fn_summaries->get (node); -+ class ipa_size_summary *ss = ipa_size_summaries->get (node); - if (s != NULL) - { - size_time_entry *e; -@@ -842,11 +874,11 @@ ipa_dump_fn_summary (FILE *f, struct cgr - if (s->fp_expressions) - fprintf (f, " fp_expression"); - fprintf (f, "\n global time: %f\n", s->time.to_double ()); -- fprintf (f, " self size: %i\n", s->self_size); -- fprintf (f, " global size: %i\n", s->size); -+ fprintf (f, " self size: %i\n", ss->self_size); -+ fprintf (f, " global size: %i\n", ss->size); - fprintf (f, " min size: %i\n", s->min_size); - fprintf (f, " self stack: %i\n", -- (int) s->estimated_self_stack_size); -+ (int) ss->estimated_self_stack_size); - fprintf (f, " global stack: %i\n", (int) s->estimated_stack_size); - if (s->growth) - fprintf (f, " estimated growth:%i\n", (int) s->growth); -@@ -900,7 +932,7 @@ ipa_dump_fn_summaries (FILE *f) - struct cgraph_node *node; - - FOR_EACH_DEFINED_FUNCTION (node) -- if (!node->global.inlined_to) -+ if (!node->inlined_to) - ipa_dump_fn_summary (f, node); - } - -@@ -922,7 +954,7 @@ mark_modified (ao_ref *ao ATTRIBUTE_UNUS - - static tree - unmodified_parm_1 (ipa_func_body_info *fbi, gimple *stmt, tree op, -- HOST_WIDE_INT *size_p) -+ poly_int64 *size_p) - { - /* SSA_NAME referring to parm default def? */ - if (TREE_CODE (op) == SSA_NAME -@@ -930,7 +962,7 @@ unmodified_parm_1 (ipa_func_body_info *f - && TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL) - { - if (size_p) -- *size_p = tree_to_shwi (TYPE_SIZE (TREE_TYPE (op))); -+ *size_p = tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op))); - return SSA_NAME_VAR (op); - } - /* Non-SSA parm reference? */ -@@ -951,7 +983,7 @@ unmodified_parm_1 (ipa_func_body_info *f - if (!modified) - { - if (size_p) -- *size_p = tree_to_shwi (TYPE_SIZE (TREE_TYPE (op))); -+ *size_p = tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op))); - return op; - } - } -@@ -965,7 +997,7 @@ unmodified_parm_1 (ipa_func_body_info *f - - static tree - unmodified_parm (ipa_func_body_info *fbi, gimple *stmt, tree op, -- HOST_WIDE_INT *size_p) -+ poly_int64 *size_p) - { - tree res = unmodified_parm_1 (fbi, stmt, op, size_p); - if (res) -@@ -990,7 +1022,7 @@ unmodified_parm (ipa_func_body_info *fbi - static bool - unmodified_parm_or_parm_agg_item (struct ipa_func_body_info *fbi, - gimple *stmt, tree op, int *index_p, -- HOST_WIDE_INT *size_p, -+ poly_int64 *size_p, - struct agg_position_info *aggpos) - { - tree res = unmodified_parm_1 (fbi, stmt, op, size_p); -@@ -1157,25 +1189,147 @@ eliminated_by_inlining_prob (ipa_func_bo - } - } - -+/* Analyze EXPR if it represents a series of simple operations performed on -+ a function parameter and return true if so. FBI, STMT, EXPR, INDEX_P and -+ AGGPOS have the same meaning like in unmodified_parm_or_parm_agg_item. -+ Type of the parameter or load from an aggregate via the parameter is -+ stored in *TYPE_P. Operations on the parameter are recorded to -+ PARAM_OPS_P if it is not NULL. */ -+ -+static bool -+decompose_param_expr (struct ipa_func_body_info *fbi, -+ gimple *stmt, tree expr, -+ int *index_p, tree *type_p, -+ struct agg_position_info *aggpos, -+ expr_eval_ops *param_ops_p = NULL) -+{ -+ int op_limit = PARAM_VALUE (PARAM_IPA_MAX_PARAM_EXPR_OPS); -+ int op_count = 0; -+ -+ if (param_ops_p) -+ *param_ops_p = NULL; -+ -+ while (true) -+ { -+ expr_eval_op eval_op; -+ unsigned rhs_count; -+ unsigned cst_count = 0; -+ -+ if (unmodified_parm_or_parm_agg_item (fbi, stmt, expr, index_p, NULL, -+ aggpos)) -+ { -+ tree type = TREE_TYPE (expr); -+ -+ if (aggpos->agg_contents) -+ { -+ /* Stop if containing bit-field. */ -+ if (TREE_CODE (expr) == BIT_FIELD_REF -+ || contains_bitfld_component_ref_p (expr)) -+ break; -+ } -+ -+ *type_p = type; -+ return true; -+ } -+ -+ if (TREE_CODE (expr) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (expr)) -+ break; -+ -+ if (!is_gimple_assign (stmt = SSA_NAME_DEF_STMT (expr))) -+ break; -+ -+ switch (gimple_assign_rhs_class (stmt)) -+ { -+ case GIMPLE_SINGLE_RHS: -+ expr = gimple_assign_rhs1 (stmt); -+ continue; -+ -+ case GIMPLE_UNARY_RHS: -+ rhs_count = 1; -+ break; -+ -+ case GIMPLE_BINARY_RHS: -+ rhs_count = 2; -+ break; -+ -+ case GIMPLE_TERNARY_RHS: -+ rhs_count = 3; -+ break; -+ -+ default: -+ goto fail; -+ } -+ -+ /* Stop if expression is too complex. */ -+ if (op_count++ == op_limit) -+ break; -+ -+ if (param_ops_p) -+ { -+ eval_op.code = gimple_assign_rhs_code (stmt); -+ eval_op.type = TREE_TYPE (gimple_assign_lhs (stmt)); -+ eval_op.val[0] = NULL_TREE; -+ eval_op.val[1] = NULL_TREE; -+ } -+ -+ expr = NULL_TREE; -+ for (unsigned i = 0; i < rhs_count; i++) -+ { -+ tree op = gimple_op (stmt, i + 1); -+ -+ gcc_assert (op && !TYPE_P (op)); -+ if (is_gimple_ip_invariant (op)) -+ { -+ if (++cst_count == rhs_count) -+ goto fail; -+ -+ eval_op.val[cst_count - 1] = op; -+ } -+ else if (!expr) -+ { -+ /* Found a non-constant operand, and record its index in rhs -+ operands. */ -+ eval_op.index = i; -+ expr = op; -+ } -+ else -+ { -+ /* Found more than one non-constant operands. */ -+ goto fail; -+ } -+ } -+ -+ if (param_ops_p) -+ vec_safe_insert (*param_ops_p, 0, eval_op); -+ } -+ -+ /* Failed to decompose, free resource and return. */ -+fail: -+ if (param_ops_p) -+ vec_free (*param_ops_p); -+ -+ return false; -+} - - /* If BB ends by a conditional we can turn into predicates, attach corresponding - predicates to the CFG edges. */ - - static void - set_cond_stmt_execution_predicate (struct ipa_func_body_info *fbi, -- struct ipa_fn_summary *summary, -+ class ipa_fn_summary *summary, -+ class ipa_node_params *params_summary, - basic_block bb) - { - gimple *last; -- tree op; -+ tree op, op2; - int index; -- HOST_WIDE_INT size; - struct agg_position_info aggpos; - enum tree_code code, inverted_code; - edge e; - edge_iterator ei; - gimple *set_stmt; -- tree op2; -+ tree param_type; -+ expr_eval_ops param_ops; - - last = last_stmt (bb); - if (!last || gimple_code (last) != GIMPLE_COND) -@@ -1183,10 +1337,9 @@ set_cond_stmt_execution_predicate (struc - if (!is_gimple_ip_invariant (gimple_cond_rhs (last))) - return; - op = gimple_cond_lhs (last); -- /* TODO: handle conditionals like -- var = op0 < 4; -- if (var != 0). */ -- if (unmodified_parm_or_parm_agg_item (fbi, last, op, &index, &size, &aggpos)) -+ -+ if (decompose_param_expr (fbi, last, op, &index, ¶m_type, &aggpos, -+ ¶m_ops)) - { - code = gimple_cond_code (last); - inverted_code = invert_tree_comparison (code, HONOR_NANS (op)); -@@ -1197,17 +1350,24 @@ set_cond_stmt_execution_predicate (struc - ? code : inverted_code); - /* invert_tree_comparison will return ERROR_MARK on FP - comparsions that are not EQ/NE instead of returning proper -- unordered one. Be sure it is not confused with NON_CONSTANT. */ -- if (this_code != ERROR_MARK) -+ unordered one. Be sure it is not confused with NON_CONSTANT. -+ -+ And if the edge's target is the final block of diamond CFG graph -+ of this conditional statement, we do not need to compute -+ predicate for the edge because the final block's predicate must -+ be at least as that of the first block of the statement. */ -+ if (this_code != ERROR_MARK -+ && !dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest)) - { - predicate p -- = add_condition (summary, index, size, &aggpos, this_code, -- unshare_expr_without_location -- (gimple_cond_rhs (last))); -+ = add_condition (summary, params_summary, index, -+ param_type, &aggpos, -+ this_code, gimple_cond_rhs (last), param_ops); - e->aux = edge_predicate_pool.allocate (); - *(predicate *) e->aux = p; - } - } -+ vec_free (param_ops); - } - - if (TREE_CODE (op) != SSA_NAME) -@@ -1230,12 +1390,12 @@ set_cond_stmt_execution_predicate (struc - || gimple_call_num_args (set_stmt) != 1) - return; - op2 = gimple_call_arg (set_stmt, 0); -- if (!unmodified_parm_or_parm_agg_item (fbi, set_stmt, op2, &index, &size, -- &aggpos)) -+ if (!decompose_param_expr (fbi, set_stmt, op2, &index, ¶m_type, &aggpos)) - return; - FOR_EACH_EDGE (e, ei, bb->succs) if (e->flags & EDGE_FALSE_VALUE) - { -- predicate p = add_condition (summary, index, size, &aggpos, -+ predicate p = add_condition (summary, params_summary, index, -+ param_type, &aggpos, - predicate::is_not_constant, NULL_TREE); - e->aux = edge_predicate_pool.allocate (); - *(predicate *) e->aux = p; -@@ -1248,63 +1408,200 @@ set_cond_stmt_execution_predicate (struc - - static void - set_switch_stmt_execution_predicate (struct ipa_func_body_info *fbi, -- struct ipa_fn_summary *summary, -+ class ipa_fn_summary *summary, -+ class ipa_node_params *params_summary, - basic_block bb) - { - gimple *lastg; - tree op; - int index; -- HOST_WIDE_INT size; - struct agg_position_info aggpos; - edge e; - edge_iterator ei; - size_t n; - size_t case_idx; -+ tree param_type; -+ expr_eval_ops param_ops; - - lastg = last_stmt (bb); - if (!lastg || gimple_code (lastg) != GIMPLE_SWITCH) - return; - gswitch *last = as_a (lastg); - op = gimple_switch_index (last); -- if (!unmodified_parm_or_parm_agg_item (fbi, last, op, &index, &size, &aggpos)) -+ if (!decompose_param_expr (fbi, last, op, &index, ¶m_type, &aggpos, -+ ¶m_ops)) - return; - -+ auto_vec > ranges; -+ tree type = TREE_TYPE (op); -+ int bound_limit = PARAM_VALUE (PARAM_IPA_MAX_SWITCH_PREDICATE_BOUNDS); -+ int bound_count = 0; -+ wide_int vr_wmin, vr_wmax; -+ value_range_kind vr_type = get_range_info (op, &vr_wmin, &vr_wmax); -+ - FOR_EACH_EDGE (e, ei, bb->succs) - { - e->aux = edge_predicate_pool.allocate (); - *(predicate *) e->aux = false; - } -+ -+ e = gimple_switch_edge (cfun, last, 0); -+ /* Set BOUND_COUNT to maximum count to bypass computing predicate for -+ default case if its target basic block is in convergence point of all -+ switch cases, which can be determined by checking whether it -+ post-dominates the switch statement. */ -+ if (dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest)) -+ bound_count = INT_MAX; -+ - n = gimple_switch_num_labels (last); -- for (case_idx = 0; case_idx < n; ++case_idx) -+ for (case_idx = 1; case_idx < n; ++case_idx) - { - tree cl = gimple_switch_label (last, case_idx); -- tree min, max; -+ tree min = CASE_LOW (cl); -+ tree max = CASE_HIGH (cl); - predicate p; - - e = gimple_switch_edge (cfun, last, case_idx); -- min = CASE_LOW (cl); -- max = CASE_HIGH (cl); - -- /* For default we might want to construct predicate that none -- of cases is met, but it is bit hard to do not having negations -- of conditionals handy. */ -- if (!min && !max) -+ /* The case value might not have same type as switch expression, -+ extend the value based on the expression type. */ -+ if (TREE_TYPE (min) != type) -+ min = wide_int_to_tree (type, wi::to_wide (min)); -+ -+ if (!max) -+ max = min; -+ else if (TREE_TYPE (max) != type) -+ max = wide_int_to_tree (type, wi::to_wide (max)); -+ -+ /* The case's target basic block is in convergence point of all switch -+ cases, its predicate should be at least as that of the switch -+ statement. */ -+ if (dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest)) - p = true; -- else if (!max) -- p = add_condition (summary, index, size, &aggpos, EQ_EXPR, -- unshare_expr_without_location (min)); -+ else if (min == max) -+ p = add_condition (summary, params_summary, index, param_type, -+ &aggpos, EQ_EXPR, min, param_ops); - else - { - predicate p1, p2; -- p1 = add_condition (summary, index, size, &aggpos, GE_EXPR, -- unshare_expr_without_location (min)); -- p2 = add_condition (summary, index, size, &aggpos, LE_EXPR, -- unshare_expr_without_location (max)); -+ p1 = add_condition (summary, params_summary, index, param_type, -+ &aggpos, GE_EXPR, min, param_ops); -+ p2 = add_condition (summary, params_summary,index, param_type, -+ &aggpos, LE_EXPR, max, param_ops); - p = p1 & p2; - } -- *(struct predicate *) e->aux -- = p.or_with (summary->conds, *(struct predicate *) e->aux); -+ *(class predicate *) e->aux -+ = p.or_with (summary->conds, *(class predicate *) e->aux); -+ -+ /* If there are too many disjoint case ranges, predicate for default -+ case might become too complicated. So add a limit here. */ -+ if (bound_count > bound_limit) -+ continue; -+ -+ bool new_range = true; -+ -+ if (!ranges.is_empty ()) -+ { -+ wide_int curr_wmin = wi::to_wide (min); -+ wide_int last_wmax = wi::to_wide (ranges.last ().second); -+ -+ /* Merge case ranges if they are continuous. */ -+ if (curr_wmin == last_wmax + 1) -+ new_range = false; -+ else if (vr_type == VR_ANTI_RANGE) -+ { -+ /* If two disjoint case ranges can be connected by anti-range -+ of switch index, combine them to one range. */ -+ if (wi::lt_p (vr_wmax, curr_wmin - 1, TYPE_SIGN (type))) -+ vr_type = VR_UNDEFINED; -+ else if (wi::le_p (vr_wmin, last_wmax + 1, TYPE_SIGN (type))) -+ new_range = false; -+ } -+ } -+ -+ /* Create/extend a case range. And we count endpoints of range set, -+ this number nearly equals to number of conditions that we will create -+ for predicate of default case. */ -+ if (new_range) -+ { -+ bound_count += (min == max) ? 1 : 2; -+ ranges.safe_push (std::make_pair (min, max)); -+ } -+ else -+ { -+ bound_count += (ranges.last ().first == ranges.last ().second); -+ ranges.last ().second = max; -+ } -+ } -+ -+ e = gimple_switch_edge (cfun, last, 0); -+ if (bound_count > bound_limit) -+ { -+ *(class predicate *) e->aux = true; -+ vec_free (param_ops); -+ return; -+ } -+ -+ predicate p_seg = true; -+ predicate p_all = false; -+ -+ if (vr_type != VR_RANGE) -+ { -+ vr_wmin = wi::to_wide (TYPE_MIN_VALUE (type)); -+ vr_wmax = wi::to_wide (TYPE_MAX_VALUE (type)); - } -+ -+ /* Construct predicate to represent default range set that is negation of -+ all case ranges. Case range is classified as containing single/non-single -+ values. Suppose a piece of case ranges in the following. -+ -+ [D1...D2] [S1] ... [Sn] [D3...D4] -+ -+ To represent default case's range sets between two non-single value -+ case ranges (From D2 to D3), we construct predicate as: -+ -+ D2 < x < D3 && x != S1 && ... && x != Sn -+ */ -+ for (size_t i = 0; i < ranges.length (); i++) -+ { -+ tree min = ranges[i].first; -+ tree max = ranges[i].second; -+ -+ if (min == max) -+ p_seg &= add_condition (summary, params_summary, index, -+ param_type, &aggpos, NE_EXPR, -+ min, param_ops); -+ else -+ { -+ /* Do not create sub-predicate for range that is beyond low bound -+ of switch index. */ -+ if (wi::lt_p (vr_wmin, wi::to_wide (min), TYPE_SIGN (type))) -+ { -+ p_seg &= add_condition (summary, params_summary, index, -+ param_type, &aggpos, -+ LT_EXPR, min, param_ops); -+ p_all = p_all.or_with (summary->conds, p_seg); -+ } -+ -+ /* Do not create sub-predicate for range that is beyond up bound -+ of switch index. */ -+ if (wi::le_p (vr_wmax, wi::to_wide (max), TYPE_SIGN (type))) -+ { -+ p_seg = false; -+ break; -+ } -+ -+ p_seg = add_condition (summary, params_summary, index, -+ param_type, &aggpos, GT_EXPR, -+ max, param_ops); -+ } -+ } -+ -+ p_all = p_all.or_with (summary->conds, p_seg); -+ *(class predicate *) e->aux -+ = p_all.or_with (summary->conds, *(class predicate *) e->aux); -+ -+ vec_free (param_ops); - } - - -@@ -1314,7 +1611,8 @@ set_switch_stmt_execution_predicate (str - static void - compute_bb_predicates (struct ipa_func_body_info *fbi, - struct cgraph_node *node, -- struct ipa_fn_summary *summary) -+ class ipa_fn_summary *summary, -+ class ipa_node_params *params_summary) - { - struct function *my_function = DECL_STRUCT_FUNCTION (node->decl); - bool done = false; -@@ -1322,8 +1620,8 @@ compute_bb_predicates (struct ipa_func_b - - FOR_EACH_BB_FN (bb, my_function) - { -- set_cond_stmt_execution_predicate (fbi, summary, bb); -- set_switch_stmt_execution_predicate (fbi, summary, bb); -+ set_cond_stmt_execution_predicate (fbi, summary, params_summary, bb); -+ set_switch_stmt_execution_predicate (fbi, summary, params_summary, bb); - } - - /* Entry block is always executable. */ -@@ -1348,16 +1646,16 @@ compute_bb_predicates (struct ipa_func_b - predicate this_bb_predicate - = *(predicate *) e->src->aux; - if (e->aux) -- this_bb_predicate &= (*(struct predicate *) e->aux); -+ this_bb_predicate &= (*(class predicate *) e->aux); - p = p.or_with (summary->conds, this_bb_predicate); - if (p == true) - break; - } - } -- if (p == false) -- gcc_checking_assert (!bb->aux); -- else -+ if (p != false) - { -+ basic_block pdom_bb; -+ - if (!bb->aux) - { - done = false; -@@ -1376,6 +1674,34 @@ compute_bb_predicates (struct ipa_func_b - *((predicate *) bb->aux) = p; - } - } -+ -+ /* For switch/if statement, we can OR-combine predicates of all -+ its cases/branches to get predicate for basic block in their -+ convergence point, but sometimes this will generate very -+ complicated predicate. Actually, we can get simplified -+ predicate in another way by using the fact that predicate -+ for a basic block must also hold true for its post dominators. -+ To be specific, basic block in convergence point of -+ conditional statement should include predicate of the -+ statement. */ -+ pdom_bb = get_immediate_dominator (CDI_POST_DOMINATORS, bb); -+ if (pdom_bb == EXIT_BLOCK_PTR_FOR_FN (my_function) || !pdom_bb) -+ ; -+ else if (!pdom_bb->aux) -+ { -+ done = false; -+ pdom_bb->aux = edge_predicate_pool.allocate (); -+ *((predicate *) pdom_bb->aux) = p; -+ } -+ else if (p != *(predicate *) pdom_bb->aux) -+ { -+ p = p.or_with (summary->conds, *(predicate *)pdom_bb->aux); -+ if (p != *(predicate *) pdom_bb->aux) -+ { -+ done = false; -+ *((predicate *) pdom_bb->aux) = p; -+ } -+ } - } - } - } -@@ -1387,21 +1713,21 @@ compute_bb_predicates (struct ipa_func_b - - static predicate - will_be_nonconstant_expr_predicate (ipa_func_body_info *fbi, -- struct ipa_fn_summary *summary, -+ class ipa_fn_summary *summary, -+ class ipa_node_params *params_summary, - tree expr, - vec nonconstant_names) - { - tree parm; - int index; -- HOST_WIDE_INT size; - - while (UNARY_CLASS_P (expr)) - expr = TREE_OPERAND (expr, 0); - -- parm = unmodified_parm (fbi, NULL, expr, &size); -+ parm = unmodified_parm (fbi, NULL, expr, NULL); - if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0) -- return add_condition (summary, index, size, NULL, predicate::changed, -- NULL_TREE); -+ return add_condition (summary, params_summary, index, TREE_TYPE (parm), NULL, -+ predicate::changed, NULL_TREE); - if (is_gimple_min_invariant (expr)) - return false; - if (TREE_CODE (expr) == SSA_NAME) -@@ -1410,6 +1736,7 @@ will_be_nonconstant_expr_predicate (ipa_ - { - predicate p1 - = will_be_nonconstant_expr_predicate (fbi, summary, -+ params_summary, - TREE_OPERAND (expr, 0), - nonconstant_names); - if (p1 == true) -@@ -1417,6 +1744,7 @@ will_be_nonconstant_expr_predicate (ipa_ - - predicate p2 - = will_be_nonconstant_expr_predicate (fbi, summary, -+ params_summary, - TREE_OPERAND (expr, 1), - nonconstant_names); - return p1.or_with (summary->conds, p2); -@@ -1425,6 +1753,7 @@ will_be_nonconstant_expr_predicate (ipa_ - { - predicate p1 - = will_be_nonconstant_expr_predicate (fbi, summary, -+ params_summary, - TREE_OPERAND (expr, 0), - nonconstant_names); - if (p1 == true) -@@ -1432,12 +1761,14 @@ will_be_nonconstant_expr_predicate (ipa_ - - predicate p2 - = will_be_nonconstant_expr_predicate (fbi, summary, -+ params_summary, - TREE_OPERAND (expr, 1), - nonconstant_names); - if (p2 == true) - return p2; - p1 = p1.or_with (summary->conds, p2); - p2 = will_be_nonconstant_expr_predicate (fbi, summary, -+ params_summary, - TREE_OPERAND (expr, 2), - nonconstant_names); - return p2.or_with (summary->conds, p1); -@@ -1458,17 +1789,18 @@ will_be_nonconstant_expr_predicate (ipa_ - - static predicate - will_be_nonconstant_predicate (struct ipa_func_body_info *fbi, -- struct ipa_fn_summary *summary, -+ class ipa_fn_summary *summary, -+ class ipa_node_params *params_summary, - gimple *stmt, - vec nonconstant_names) - { - predicate p = true; - ssa_op_iter iter; - tree use; -+ tree param_type = NULL_TREE; - predicate op_non_const; - bool is_load; - int base_index; -- HOST_WIDE_INT size; - struct agg_position_info aggpos; - - /* What statments might be optimized away -@@ -1489,11 +1821,9 @@ will_be_nonconstant_predicate (struct ip - /* Loads can be optimized when the value is known. */ - if (is_load) - { -- tree op; -- gcc_assert (gimple_assign_single_p (stmt)); -- op = gimple_assign_rhs1 (stmt); -- if (!unmodified_parm_or_parm_agg_item (fbi, stmt, op, &base_index, &size, -- &aggpos)) -+ tree op = gimple_assign_rhs1 (stmt); -+ if (!decompose_param_expr (fbi, stmt, op, &base_index, ¶m_type, -+ &aggpos)) - return p; - } - else -@@ -1518,21 +1848,22 @@ will_be_nonconstant_predicate (struct ip - - if (is_load) - op_non_const = -- add_condition (summary, base_index, size, &aggpos, predicate::changed, -- NULL); -+ add_condition (summary, params_summary, -+ base_index, param_type, &aggpos, -+ predicate::changed, NULL_TREE); - else - op_non_const = false; - FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE) - { -- HOST_WIDE_INT size; -- tree parm = unmodified_parm (fbi, stmt, use, &size); -+ tree parm = unmodified_parm (fbi, stmt, use, NULL); - int index; - - if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0) - { - if (index != base_index) -- p = add_condition (summary, index, size, NULL, predicate::changed, -- NULL_TREE); -+ p = add_condition (summary, params_summary, index, -+ TREE_TYPE (parm), NULL, -+ predicate::changed, NULL_TREE); - else - continue; - } -@@ -1566,7 +1897,7 @@ struct record_modified_bb_info - static basic_block - get_minimal_bb (basic_block init_bb, basic_block use_bb) - { -- struct loop *l = find_common_loop (init_bb->loop_father, use_bb->loop_father); -+ class loop *l = find_common_loop (init_bb->loop_father, use_bb->loop_father); - if (l && l->header->count < init_bb->count) - return l->header; - return init_bb; -@@ -1664,7 +1995,7 @@ param_change_prob (ipa_func_body_info *f - return REG_BR_PROB_BASE; - if (dump_file) - { -- fprintf (dump_file, " Analyzing param change probablity of "); -+ fprintf (dump_file, " Analyzing param change probability of "); - print_generic_expr (dump_file, op, TDF_SLIM); - fprintf (dump_file, "\n"); - } -@@ -1718,7 +2049,9 @@ param_change_prob (ipa_func_body_info *f - - static bool - phi_result_unknown_predicate (ipa_func_body_info *fbi, -- ipa_fn_summary *summary, basic_block bb, -+ ipa_fn_summary *summary, -+ class ipa_node_params *params_summary, -+ basic_block bb, - predicate *p, - vec nonconstant_names) - { -@@ -1762,7 +2095,7 @@ phi_result_unknown_predicate (ipa_func_b - || !is_gimple_ip_invariant (gimple_cond_rhs (stmt))) - return false; - -- *p = will_be_nonconstant_expr_predicate (fbi, summary, -+ *p = will_be_nonconstant_expr_predicate (fbi, summary, params_summary, - gimple_cond_lhs (stmt), - nonconstant_names); - if (*p == true) -@@ -1777,7 +2110,7 @@ phi_result_unknown_predicate (ipa_func_b - NONCONSTANT_NAMES, if possible. */ - - static void --predicate_for_phi_result (struct ipa_fn_summary *summary, gphi *phi, -+predicate_for_phi_result (class ipa_fn_summary *summary, gphi *phi, - predicate *p, - vec nonconstant_names) - { -@@ -1954,7 +2287,8 @@ analyze_function_body (struct cgraph_nod - basic_block bb; - struct function *my_function = DECL_STRUCT_FUNCTION (node->decl); - sreal freq; -- struct ipa_fn_summary *info = ipa_fn_summaries->get_create (node); -+ class ipa_fn_summary *info = ipa_fn_summaries->get_create (node); -+ class ipa_node_params *params_summary = early ? NULL : IPA_NODE_REF (node); - predicate bb_predicate; - struct ipa_func_body_info fbi; - vec nonconstant_names = vNULL; -@@ -1980,6 +2314,7 @@ analyze_function_body (struct cgraph_nod - if (opt_for_fn (node->decl, optimize)) - { - calculate_dominance_info (CDI_DOMINATORS); -+ calculate_dominance_info (CDI_POST_DOMINATORS); - if (!early) - loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS); - else -@@ -2019,7 +2354,7 @@ analyze_function_body (struct cgraph_nod - bb_predicate); - - if (fbi.info) -- compute_bb_predicates (&fbi, node, info); -+ compute_bb_predicates (&fbi, node, info, params_summary); - order = XNEWVEC (int, n_basic_blocks_for_fn (cfun)); - nblocks = pre_and_rev_post_order_compute (NULL, order, false); - for (n = 0; n < nblocks; n++) -@@ -2061,7 +2396,9 @@ analyze_function_body (struct cgraph_nod - gsi_next (&bsi)) - { - if (first_phi -- && !phi_result_unknown_predicate (&fbi, info, bb, -+ && !phi_result_unknown_predicate (&fbi, info, -+ params_summary, -+ bb, - &phi_predicate, - nonconstant_names)) - break; -@@ -2159,7 +2496,7 @@ analyze_function_body (struct cgraph_nod - just maximum of the possible paths. */ - if (fbi.info) - will_be_nonconstant -- = will_be_nonconstant_predicate (&fbi, info, -+ = will_be_nonconstant_predicate (&fbi, info, params_summary, - stmt, nonconstant_names); - else - will_be_nonconstant = true; -@@ -2174,7 +2511,7 @@ analyze_function_body (struct cgraph_nod - if (prob == 2 && dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "\t\tWill be eliminated by inlining\n"); - -- struct predicate p = bb_predicate & will_be_nonconstant; -+ class predicate p = bb_predicate & will_be_nonconstant; - - /* We can ignore statement when we proved it is never going - to happen, but we cannot do that for call statements -@@ -2226,7 +2563,8 @@ analyze_function_body (struct cgraph_nod - predicate p = bb_predicate; - if (fbi.info) - p = p & will_be_nonconstant_expr_predicate -- (&fbi, info, TREE_OPERAND (op, 1), -+ (&fbi, info, params_summary, -+ TREE_OPERAND (op, 1), - nonconstant_names); - if (p != false) - { -@@ -2249,7 +2587,7 @@ analyze_function_body (struct cgraph_nod - - if (nonconstant_names.exists () && !early) - { -- struct loop *loop; -+ class loop *loop; - predicate loop_iterations = true; - predicate loop_stride = true; - -@@ -2261,7 +2599,7 @@ analyze_function_body (struct cgraph_nod - vec exits; - edge ex; - unsigned int j; -- struct tree_niter_desc niter_desc; -+ class tree_niter_desc niter_desc; - bb_predicate = *(predicate *) loop->header->aux; - - exits = get_loop_exit_edges (loop); -@@ -2271,6 +2609,7 @@ analyze_function_body (struct cgraph_nod - { - predicate will_be_nonconstant - = will_be_nonconstant_expr_predicate (&fbi, info, -+ params_summary, - niter_desc.niter, - nonconstant_names); - if (will_be_nonconstant != true) -@@ -2315,7 +2654,9 @@ analyze_function_body (struct cgraph_nod - continue; - - predicate will_be_nonconstant -- = will_be_nonconstant_expr_predicate (&fbi, info, iv.step, -+ = will_be_nonconstant_expr_predicate (&fbi, info, -+ params_summary, -+ iv.step, - nonconstant_names); - if (will_be_nonconstant != true) - will_be_nonconstant = bb_predicate & will_be_nonconstant; -@@ -2349,8 +2690,9 @@ analyze_function_body (struct cgraph_nod - } - } - ipa_fn_summary *s = ipa_fn_summaries->get (node); -+ ipa_size_summary *ss = ipa_size_summaries->get (node); - s->time = time; -- s->self_size = size; -+ ss->self_size = size; - nonconstant_names.release (); - ipa_release_body_info (&fbi); - if (opt_for_fn (node->decl, optimize)) -@@ -2360,6 +2702,7 @@ analyze_function_body (struct cgraph_nod - else if (!ipa_edge_args_sum) - ipa_free_all_node_params (); - free_dominance_info (CDI_DOMINATORS); -+ free_dominance_info (CDI_POST_DOMINATORS); - } - if (dump_file) - { -@@ -2377,9 +2720,8 @@ compute_fn_summary (struct cgraph_node * - { - HOST_WIDE_INT self_stack_size; - struct cgraph_edge *e; -- struct ipa_fn_summary *info; - -- gcc_assert (!node->global.inlined_to); -+ gcc_assert (!node->inlined_to); - - if (!ipa_fn_summaries) - ipa_fn_summary_alloc (); -@@ -2387,14 +2729,14 @@ compute_fn_summary (struct cgraph_node * - /* Create a new ipa_fn_summary. */ - ((ipa_fn_summary_t *)ipa_fn_summaries)->remove_callees (node); - ipa_fn_summaries->remove (node); -- info = ipa_fn_summaries->get_create (node); -+ class ipa_fn_summary *info = ipa_fn_summaries->get_create (node); -+ class ipa_size_summary *size_info = ipa_size_summaries->get_create (node); - - /* Estimate the stack size for the function if we're optimizing. */ - self_stack_size = optimize && !node->thunk.thunk_p - ? estimated_stack_frame_size (node) : 0; -- info->estimated_self_stack_size = self_stack_size; -+ size_info->estimated_self_stack_size = self_stack_size; - info->estimated_stack_size = self_stack_size; -- info->stack_frame_offset = 0; - - if (node->thunk.thunk_p) - { -@@ -2412,7 +2754,7 @@ compute_fn_summary (struct cgraph_node * - t = predicate::not_inlined (); - info->account_size_time (2 * ipa_fn_summary::size_scale, 0, t, t); - ipa_update_overall_fn_summary (node); -- info->self_size = info->size; -+ size_info->self_size = size_info->size; - if (stdarg_p (TREE_TYPE (node->decl))) - { - info->inlinable = false; -@@ -2468,16 +2810,15 @@ compute_fn_summary (struct cgraph_node * - node->calls_comdat_local = (e != NULL); - - /* Inlining characteristics are maintained by the cgraph_mark_inline. */ -- info->size = info->self_size; -- info->stack_frame_offset = 0; -- info->estimated_stack_size = info->estimated_self_stack_size; -+ size_info->size = size_info->self_size; -+ info->estimated_stack_size = size_info->estimated_self_stack_size; - - /* Code above should compute exactly the same result as - ipa_update_overall_fn_summary but because computation happens in - different order the roundoff errors result in slight changes. */ - ipa_update_overall_fn_summary (node); - /* In LTO mode we may have speculative edges set. */ -- gcc_assert (in_lto_p || info->size == info->self_size); -+ gcc_assert (in_lto_p || size_info->size == size_info->self_size); - } - - -@@ -2499,11 +2840,11 @@ estimate_edge_devirt_benefit (struct cgr - int *size, int *time, - vec known_vals, - vec known_contexts, -- vec known_aggs) -+ vec known_aggs) - { - tree target; - struct cgraph_node *callee; -- struct ipa_fn_summary *isummary; -+ class ipa_fn_summary *isummary; - enum availability avail; - bool speculative; - -@@ -2548,10 +2889,10 @@ estimate_edge_size_and_time (struct cgra - int prob, - vec known_vals, - vec known_contexts, -- vec known_aggs, -+ vec known_aggs, - ipa_hints *hints) - { -- struct ipa_call_summary *es = ipa_call_summaries->get (e); -+ class ipa_call_summary *es = ipa_call_summaries->get (e); - int call_size = es->call_stmt_size; - int call_time = es->call_stmt_time; - int cur_size; -@@ -2583,12 +2924,12 @@ estimate_calls_size_and_time (struct cgr - clause_t possible_truths, - vec known_vals, - vec known_contexts, -- vec known_aggs) -+ vec known_aggs) - { - struct cgraph_edge *e; - for (e = node->callees; e; e = e->next_callee) - { -- struct ipa_call_summary *es = ipa_call_summaries->get_create (e); -+ class ipa_call_summary *es = ipa_call_summaries->get_create (e); - - /* Do not care about zero sized builtins. */ - if (e->inline_failed && !es->call_stmt_size) -@@ -2619,7 +2960,7 @@ estimate_calls_size_and_time (struct cgr - } - for (e = node->indirect_calls; e; e = e->next_callee) - { -- struct ipa_call_summary *es = ipa_call_summaries->get_create (e); -+ class ipa_call_summary *es = ipa_call_summaries->get_create (e); - if (!es->predicate - || es->predicate->evaluate (possible_truths)) - estimate_edge_size_and_time (e, size, -@@ -2630,31 +2971,250 @@ estimate_calls_size_and_time (struct cgr - } - } - -+/* Default constructor for ipa call context. -+ Memory alloction of known_vals, known_contexts -+ and known_aggs vectors is owned by the caller, but can -+ be release by ipa_call_context::release. -+ -+ inline_param_summary is owned by the caller. */ -+ipa_call_context::ipa_call_context (cgraph_node *node, -+ clause_t possible_truths, -+ clause_t nonspec_possible_truths, -+ vec known_vals, -+ vec -+ known_contexts, -+ vec known_aggs, -+ vec -+ inline_param_summary) -+: m_node (node), m_possible_truths (possible_truths), -+ m_nonspec_possible_truths (nonspec_possible_truths), -+ m_inline_param_summary (inline_param_summary), -+ m_known_vals (known_vals), -+ m_known_contexts (known_contexts), -+ m_known_aggs (known_aggs) -+{ -+} -+ -+/* Set THIS to be a duplicate of CTX. Copy all relevant info. */ -+ -+void -+ipa_call_context::duplicate_from (const ipa_call_context &ctx) -+{ -+ m_node = ctx.m_node; -+ m_possible_truths = ctx.m_possible_truths; -+ m_nonspec_possible_truths = ctx.m_nonspec_possible_truths; -+ class ipa_node_params *params_summary = IPA_NODE_REF (m_node); -+ unsigned int nargs = params_summary -+ ? ipa_get_param_count (params_summary) : 0; -+ -+ m_inline_param_summary = vNULL; -+ /* Copy the info only if there is at least one useful entry. */ -+ if (ctx.m_inline_param_summary.exists ()) -+ { -+ unsigned int n = MIN (ctx.m_inline_param_summary.length (), nargs); -+ -+ for (unsigned int i = 0; i < n; i++) -+ if (ipa_is_param_used_by_ipa_predicates (params_summary, i) -+ && !ctx.m_inline_param_summary[i].useless_p ()) -+ { -+ m_inline_param_summary -+ = ctx.m_inline_param_summary.copy (); -+ break; -+ } -+ } -+ m_known_vals = vNULL; -+ if (ctx.m_known_vals.exists ()) -+ { -+ unsigned int n = MIN (ctx.m_known_vals.length (), nargs); -+ -+ for (unsigned int i = 0; i < n; i++) -+ if (ipa_is_param_used_by_indirect_call (params_summary, i) -+ && ctx.m_known_vals[i]) -+ { -+ m_known_vals = ctx.m_known_vals.copy (); -+ break; -+ } -+ } -+ -+ m_known_contexts = vNULL; -+ if (ctx.m_known_contexts.exists ()) -+ { -+ unsigned int n = MIN (ctx.m_known_contexts.length (), nargs); -+ -+ for (unsigned int i = 0; i < n; i++) -+ if (ipa_is_param_used_by_polymorphic_call (params_summary, i) -+ && !ctx.m_known_contexts[i].useless_p ()) -+ { -+ m_known_contexts = ctx.m_known_contexts.copy (); -+ break; -+ } -+ } -+ -+ m_known_aggs = vNULL; -+ if (ctx.m_known_aggs.exists ()) -+ { -+ unsigned int n = MIN (ctx.m_known_aggs.length (), nargs); -+ -+ for (unsigned int i = 0; i < n; i++) -+ if (ipa_is_param_used_by_indirect_call (params_summary, i) -+ && !ctx.m_known_aggs[i].is_empty ()) -+ { -+ m_known_aggs = ipa_copy_agg_values (ctx.m_known_aggs); -+ break; -+ } -+ } -+} -+ -+/* Release memory used by known_vals/contexts/aggs vectors. -+ If ALL is true release also inline_param_summary. -+ This happens when context was previously duplciated to be stored -+ into cache. */ -+ -+void -+ipa_call_context::release (bool all) -+{ -+ /* See if context is initialized at first place. */ -+ if (!m_node) -+ return; -+ m_known_vals.release (); -+ m_known_contexts.release (); -+ ipa_release_agg_values (m_known_aggs); -+ if (all) -+ m_inline_param_summary.release (); -+} -+ -+/* Return true if CTX describes the same call context as THIS. */ -+ -+bool -+ipa_call_context::equal_to (const ipa_call_context &ctx) -+{ -+ if (m_node != ctx.m_node -+ || m_possible_truths != ctx.m_possible_truths -+ || m_nonspec_possible_truths != ctx.m_nonspec_possible_truths) -+ return false; -+ -+ class ipa_node_params *params_summary = IPA_NODE_REF (m_node); -+ unsigned int nargs = params_summary -+ ? ipa_get_param_count (params_summary) : 0; -+ -+ if (m_inline_param_summary.exists () || ctx.m_inline_param_summary.exists ()) -+ { -+ for (unsigned int i = 0; i < nargs; i++) -+ { -+ if (!ipa_is_param_used_by_ipa_predicates (params_summary, i)) -+ continue; -+ if (i >= m_inline_param_summary.length () -+ || m_inline_param_summary[i].useless_p ()) -+ { -+ if (i < ctx.m_inline_param_summary.length () -+ && !ctx.m_inline_param_summary[i].useless_p ()) -+ return false; -+ continue; -+ } -+ if (i >= ctx.m_inline_param_summary.length () -+ || ctx.m_inline_param_summary[i].useless_p ()) -+ { -+ if (i < m_inline_param_summary.length () -+ && !m_inline_param_summary[i].useless_p ()) -+ return false; -+ continue; -+ } -+ if (!m_inline_param_summary[i].equal_to -+ (ctx.m_inline_param_summary[i])) -+ return false; -+ } -+ } -+ if (m_known_vals.exists () || ctx.m_known_vals.exists ()) -+ { -+ for (unsigned int i = 0; i < nargs; i++) -+ { -+ if (!ipa_is_param_used_by_indirect_call (params_summary, i)) -+ continue; -+ if (i >= m_known_vals.length () || !m_known_vals[i]) -+ { -+ if (i < ctx.m_known_vals.length () && ctx.m_known_vals[i]) -+ return false; -+ continue; -+ } -+ if (i >= ctx.m_known_vals.length () || !ctx.m_known_vals[i]) -+ { -+ if (i < m_known_vals.length () && m_known_vals[i]) -+ return false; -+ continue; -+ } -+ if (m_known_vals[i] != ctx.m_known_vals[i]) -+ return false; -+ } -+ } -+ if (m_known_contexts.exists () || ctx.m_known_contexts.exists ()) -+ { -+ for (unsigned int i = 0; i < nargs; i++) -+ { -+ if (!ipa_is_param_used_by_polymorphic_call (params_summary, i)) -+ continue; -+ if (i >= m_known_contexts.length () -+ || m_known_contexts[i].useless_p ()) -+ { -+ if (i < ctx.m_known_contexts.length () -+ && !ctx.m_known_contexts[i].useless_p ()) -+ return false; -+ continue; -+ } -+ if (i >= ctx.m_known_contexts.length () -+ || ctx.m_known_contexts[i].useless_p ()) -+ { -+ if (i < m_known_contexts.length () -+ && !m_known_contexts[i].useless_p ()) -+ return false; -+ continue; -+ } -+ if (!m_known_contexts[i].equal_to -+ (ctx.m_known_contexts[i])) -+ return false; -+ } -+ } -+ if (m_known_aggs.exists () || ctx.m_known_aggs.exists ()) -+ { -+ for (unsigned int i = 0; i < nargs; i++) -+ { -+ if (!ipa_is_param_used_by_indirect_call (params_summary, i)) -+ continue; -+ if (i >= m_known_aggs.length () || m_known_aggs[i].is_empty ()) -+ { -+ if (i < ctx.m_known_aggs.length () -+ && !ctx.m_known_aggs[i].is_empty ()) -+ return false; -+ continue; -+ } -+ if (i >= ctx.m_known_aggs.length () -+ || ctx.m_known_aggs[i].is_empty ()) -+ { -+ if (i < m_known_aggs.length () -+ && !m_known_aggs[i].is_empty ()) -+ return false; -+ continue; -+ } -+ if (!m_known_aggs[i].equal_to (ctx.m_known_aggs[i])) -+ return false; -+ } -+ } -+ return true; -+} - --/* Estimate size and time needed to execute NODE assuming -- POSSIBLE_TRUTHS clause, and KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS -- information about NODE's arguments. If non-NULL use also probability -- information present in INLINE_PARAM_SUMMARY vector. -+/* Estimate size and time needed to execute call in the given context. - Additionally detemine hints determined by the context. Finally compute - minimal size needed for the call that is independent on the call context and - can be used for fast estimates. Return the values in RET_SIZE, - RET_MIN_SIZE, RET_TIME and RET_HINTS. */ - - void --estimate_node_size_and_time (struct cgraph_node *node, -- clause_t possible_truths, -- clause_t nonspec_possible_truths, -- vec known_vals, -- vec known_contexts, -- vec known_aggs, -- int *ret_size, int *ret_min_size, -- sreal *ret_time, -- sreal *ret_nonspecialized_time, -- ipa_hints *ret_hints, -- vec -- inline_param_summary) -+ipa_call_context::estimate_size_and_time (int *ret_size, -+ int *ret_min_size, -+ sreal *ret_time, -+ sreal *ret_nonspecialized_time, -+ ipa_hints *ret_hints) - { -- struct ipa_fn_summary *info = ipa_fn_summaries->get_create (node); -+ class ipa_fn_summary *info = ipa_fn_summaries->get_create (m_node); - size_time_entry *e; - int size = 0; - sreal time = 0; -@@ -2666,13 +3226,13 @@ estimate_node_size_and_time (struct cgra - { - bool found = false; - fprintf (dump_file, " Estimating body: %s/%i\n" -- " Known to be false: ", node->name (), -- node->order); -+ " Known to be false: ", m_node->name (), -+ m_node->order); - - for (i = predicate::not_inlined_condition; - i < (predicate::first_dynamic_condition - + (int) vec_safe_length (info->conds)); i++) -- if (!(possible_truths & (1 << i))) -+ if (!(m_possible_truths & (1 << i))) - { - if (found) - fprintf (dump_file, ", "); -@@ -2681,19 +3241,19 @@ estimate_node_size_and_time (struct cgra - } - } - -- estimate_calls_size_and_time (node, &size, &min_size, &time, &hints, possible_truths, -- known_vals, known_contexts, known_aggs); -+ estimate_calls_size_and_time (m_node, &size, &min_size, &time, &hints, m_possible_truths, -+ m_known_vals, m_known_contexts, m_known_aggs); - sreal nonspecialized_time = time; - - for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++) - { -- bool exec = e->exec_predicate.evaluate (nonspec_possible_truths); -+ bool exec = e->exec_predicate.evaluate (m_nonspec_possible_truths); - - /* Because predicates are conservative, it can happen that nonconst is 1 - but exec is 0. */ - if (exec) - { -- bool nonconst = e->nonconst_predicate.evaluate (possible_truths); -+ bool nonconst = e->nonconst_predicate.evaluate (m_possible_truths); - - gcc_checking_assert (e->time >= 0); - gcc_checking_assert (time >= 0); -@@ -2709,7 +3269,7 @@ estimate_node_size_and_time (struct cgra - nonspecialized_time += e->time; - if (!nonconst) - ; -- else if (!inline_param_summary.exists ()) -+ else if (!m_inline_param_summary.exists ()) - { - if (nonconst) - time += e->time; -@@ -2717,8 +3277,8 @@ estimate_node_size_and_time (struct cgra - else - { - int prob = e->nonconst_predicate.probability -- (info->conds, possible_truths, -- inline_param_summary); -+ (info->conds, m_possible_truths, -+ m_inline_param_summary); - gcc_checking_assert (prob >= 0); - gcc_checking_assert (prob <= REG_BR_PROB_BASE); - time += e->time * prob / REG_BR_PROB_BASE; -@@ -2742,14 +3302,14 @@ estimate_node_size_and_time (struct cgra - time = nonspecialized_time; - - if (info->loop_iterations -- && !info->loop_iterations->evaluate (possible_truths)) -+ && !info->loop_iterations->evaluate (m_possible_truths)) - hints |= INLINE_HINT_loop_iterations; - if (info->loop_stride -- && !info->loop_stride->evaluate (possible_truths)) -+ && !info->loop_stride->evaluate (m_possible_truths)) - hints |= INLINE_HINT_loop_stride; - if (info->scc_no) - hints |= INLINE_HINT_in_scc; -- if (DECL_DECLARED_INLINE_P (node->decl)) -+ if (DECL_DECLARED_INLINE_P (m_node->decl)) - hints |= INLINE_HINT_declared_inline; - - size = RDIV (size, ipa_fn_summary::size_scale); -@@ -2782,7 +3342,7 @@ estimate_ipcp_clone_size_and_time (struc - vec known_vals, - vec - known_contexts, -- vec known_aggs, -+ vec known_aggs, - int *ret_size, sreal *ret_time, - sreal *ret_nonspec_time, - ipa_hints *hints) -@@ -2791,10 +3351,31 @@ estimate_ipcp_clone_size_and_time (struc - - evaluate_conditions_for_known_args (node, false, known_vals, known_aggs, - &clause, &nonspec_clause); -- estimate_node_size_and_time (node, clause, nonspec_clause, -- known_vals, known_contexts, -- known_aggs, ret_size, NULL, ret_time, -- ret_nonspec_time, hints, vNULL); -+ ipa_call_context ctx (node, clause, nonspec_clause, -+ known_vals, known_contexts, -+ known_aggs, vNULL); -+ ctx.estimate_size_and_time (ret_size, NULL, ret_time, -+ ret_nonspec_time, hints); -+} -+ -+/* Return stack frame offset where frame of NODE is supposed to start inside -+ of the function it is inlined to. -+ Return 0 for functions that are not inlined. */ -+ -+HOST_WIDE_INT -+ipa_get_stack_frame_offset (struct cgraph_node *node) -+{ -+ HOST_WIDE_INT offset = 0; -+ if (!node->inlined_to) -+ return 0; -+ node = node->callers->caller; -+ while (true) -+ { -+ offset += ipa_size_summaries->get (node)->estimated_self_stack_size; -+ if (!node->inlined_to) -+ return offset; -+ node = node->callers->caller; -+ } - } - - -@@ -2805,19 +3386,7 @@ static void - inline_update_callee_summaries (struct cgraph_node *node, int depth) - { - struct cgraph_edge *e; -- ipa_fn_summary *callee_info = ipa_fn_summaries->get (node); -- ipa_fn_summary *caller_info = ipa_fn_summaries->get (node->callers->caller); -- HOST_WIDE_INT peak; -- -- callee_info->stack_frame_offset -- = caller_info->stack_frame_offset -- + caller_info->estimated_self_stack_size; -- peak = callee_info->stack_frame_offset -- + callee_info->estimated_self_stack_size; -- -- ipa_fn_summary *s = ipa_fn_summaries->get (node->global.inlined_to); -- if (s->estimated_stack_size < peak) -- s->estimated_stack_size = peak; -+ - ipa_propagate_frequency (node); - for (e = node->callees; e; e = e->next_callee) - { -@@ -2830,7 +3399,7 @@ inline_update_callee_summaries (struct c - } - - /* Update change_prob of EDGE after INLINED_EDGE has been inlined. -- When functoin A is inlined in B and A calls C with parameter that -+ When function A is inlined in B and A calls C with parameter that - changes with probability PROB1 and C is known to be passthroug - of argument if B that change with probability PROB2, the probability - of change is now PROB1*PROB2. */ -@@ -2842,9 +3411,11 @@ remap_edge_change_prob (struct cgraph_ed - if (ipa_node_params_sum) - { - int i; -- struct ipa_edge_args *args = IPA_EDGE_REF (edge); -- struct ipa_call_summary *es = ipa_call_summaries->get (edge); -- struct ipa_call_summary *inlined_es -+ class ipa_edge_args *args = IPA_EDGE_REF (edge); -+ if (!args) -+ return; -+ class ipa_call_summary *es = ipa_call_summaries->get (edge); -+ class ipa_call_summary *inlined_es - = ipa_call_summaries->get (inlined_edge); - - if (es->param.length () == 0) -@@ -2885,8 +3456,9 @@ remap_edge_change_prob (struct cgraph_ed - static void - remap_edge_summaries (struct cgraph_edge *inlined_edge, - struct cgraph_node *node, -- struct ipa_fn_summary *info, -- struct ipa_fn_summary *callee_info, -+ class ipa_fn_summary *info, -+ class ipa_node_params *params_summary, -+ class ipa_fn_summary *callee_info, - vec operand_map, - vec offset_map, - clause_t possible_truths, -@@ -2895,18 +3467,19 @@ remap_edge_summaries (struct cgraph_edge - struct cgraph_edge *e, *next; - for (e = node->callees; e; e = next) - { -- struct ipa_call_summary *es = ipa_call_summaries->get (e); - predicate p; - next = e->next_callee; - - if (e->inline_failed) - { -+ class ipa_call_summary *es = ipa_call_summaries->get (e); - remap_edge_change_prob (inlined_edge, e); - - if (es->predicate) - { - p = es->predicate->remap_after_inlining -- (info, callee_info, operand_map, -+ (info, params_summary, -+ callee_info, operand_map, - offset_map, possible_truths, - *toplev_predicate); - edge_set_predicate (e, &p); -@@ -2915,13 +3488,14 @@ remap_edge_summaries (struct cgraph_edge - edge_set_predicate (e, toplev_predicate); - } - else -- remap_edge_summaries (inlined_edge, e->callee, info, callee_info, -+ remap_edge_summaries (inlined_edge, e->callee, info, -+ params_summary, callee_info, - operand_map, offset_map, possible_truths, - toplev_predicate); - } - for (e = node->indirect_calls; e; e = next) - { -- struct ipa_call_summary *es = ipa_call_summaries->get (e); -+ class ipa_call_summary *es = ipa_call_summaries->get (e); - predicate p; - next = e->next_callee; - -@@ -2929,7 +3503,8 @@ remap_edge_summaries (struct cgraph_edge - if (es->predicate) - { - p = es->predicate->remap_after_inlining -- (info, callee_info, operand_map, offset_map, -+ (info, params_summary, -+ callee_info, operand_map, offset_map, - possible_truths, *toplev_predicate); - edge_set_predicate (e, &p); - } -@@ -2941,8 +3516,9 @@ remap_edge_summaries (struct cgraph_edge - /* Same as remap_predicate, but set result into hint *HINT. */ - - static void --remap_hint_predicate (struct ipa_fn_summary *info, -- struct ipa_fn_summary *callee_info, -+remap_hint_predicate (class ipa_fn_summary *info, -+ class ipa_node_params *params_summary, -+ class ipa_fn_summary *callee_info, - predicate **hint, - vec operand_map, - vec offset_map, -@@ -2954,7 +3530,7 @@ remap_hint_predicate (struct ipa_fn_summ - if (!*hint) - return; - p = (*hint)->remap_after_inlining -- (info, callee_info, -+ (info, params_summary, callee_info, - operand_map, offset_map, - possible_truths, *toplev_predicate); - if (p != false && p != true) -@@ -2972,17 +3548,18 @@ void - ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge) - { - ipa_fn_summary *callee_info = ipa_fn_summaries->get (edge->callee); -- struct cgraph_node *to = (edge->caller->global.inlined_to -- ? edge->caller->global.inlined_to : edge->caller); -- struct ipa_fn_summary *info = ipa_fn_summaries->get (to); -+ struct cgraph_node *to = (edge->caller->inlined_to -+ ? edge->caller->inlined_to : edge->caller); -+ class ipa_fn_summary *info = ipa_fn_summaries->get (to); - clause_t clause = 0; /* not_inline is known to be false. */ - size_time_entry *e; -- vec operand_map = vNULL; -- vec offset_map = vNULL; -+ auto_vec operand_map; -+ auto_vec offset_map; - int i; - predicate toplev_predicate; -- predicate true_p = true; -- struct ipa_call_summary *es = ipa_call_summaries->get (edge); -+ class ipa_call_summary *es = ipa_call_summaries->get (edge); -+ class ipa_node_params *params_summary = (ipa_node_params_sum -+ ? IPA_NODE_REF (to) : NULL); - - if (es->predicate) - toplev_predicate = *es->predicate; -@@ -2995,8 +3572,8 @@ ipa_merge_fn_summary_after_inlining (str - evaluate_properties_for_edge (edge, true, &clause, NULL, NULL, NULL, NULL); - if (ipa_node_params_sum && callee_info->conds) - { -- struct ipa_edge_args *args = IPA_EDGE_REF (edge); -- int count = ipa_get_cs_argument_count (args); -+ class ipa_edge_args *args = IPA_EDGE_REF (edge); -+ int count = args ? ipa_get_cs_argument_count (args) : 0; - int i; - - if (count) -@@ -3029,19 +3606,21 @@ ipa_merge_fn_summary_after_inlining (str - } - } - operand_map[i] = map; -- gcc_assert (map < ipa_get_param_count (IPA_NODE_REF (to))); -+ gcc_assert (map < ipa_get_param_count (params_summary)); - } - } - for (i = 0; vec_safe_iterate (callee_info->size_time_table, i, &e); i++) - { - predicate p; - p = e->exec_predicate.remap_after_inlining -- (info, callee_info, operand_map, -+ (info, params_summary, -+ callee_info, operand_map, - offset_map, clause, - toplev_predicate); - predicate nonconstp; - nonconstp = e->nonconst_predicate.remap_after_inlining -- (info, callee_info, operand_map, -+ (info, params_summary, -+ callee_info, operand_map, - offset_map, clause, - toplev_predicate); - if (p != false && nonconstp != false) -@@ -3059,48 +3638,53 @@ ipa_merge_fn_summary_after_inlining (str - info->account_size_time (e->size, add_time, p, nonconstp); - } - } -- remap_edge_summaries (edge, edge->callee, info, callee_info, operand_map, -+ remap_edge_summaries (edge, edge->callee, info, params_summary, -+ callee_info, operand_map, - offset_map, clause, &toplev_predicate); -- remap_hint_predicate (info, callee_info, -+ remap_hint_predicate (info, params_summary, callee_info, - &callee_info->loop_iterations, - operand_map, offset_map, clause, &toplev_predicate); -- remap_hint_predicate (info, callee_info, -+ remap_hint_predicate (info, params_summary, callee_info, - &callee_info->loop_stride, - operand_map, offset_map, clause, &toplev_predicate); - -- ipa_call_summary *s = ipa_call_summaries->get (edge); -- inline_update_callee_summaries (edge->callee, s->loop_depth); -+ HOST_WIDE_INT stack_frame_offset = ipa_get_stack_frame_offset (edge->callee); -+ HOST_WIDE_INT peak = stack_frame_offset + callee_info->estimated_stack_size; - -- /* We do not maintain predicates of inlined edges, free it. */ -- edge_set_predicate (edge, &true_p); -- /* Similarly remove param summaries. */ -- es->param.release (); -- operand_map.release (); -- offset_map.release (); -+ if (info->estimated_stack_size < peak) -+ info->estimated_stack_size = peak; -+ -+ inline_update_callee_summaries (edge->callee, es->loop_depth); -+ -+ /* Free summaries that are not maintained for inline clones/edges. */ -+ ipa_call_summaries->remove (edge); -+ ipa_fn_summaries->remove (edge->callee); - } - --/* For performance reasons ipa_merge_fn_summary_after_inlining is not updating overall size -- and time. Recompute it. */ -+/* For performance reasons ipa_merge_fn_summary_after_inlining is not updating -+ overall size and time. Recompute it. */ - - void - ipa_update_overall_fn_summary (struct cgraph_node *node) - { -- struct ipa_fn_summary *info = ipa_fn_summaries->get_create (node); -+ class ipa_fn_summary *info = ipa_fn_summaries->get_create (node); -+ class ipa_size_summary *size_info = ipa_size_summaries->get_create (node); - size_time_entry *e; - int i; - -- info->size = 0; -+ size_info->size = 0; - info->time = 0; - for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++) - { -- info->size += e->size; -+ size_info->size += e->size; - info->time += e->time; - } -- estimate_calls_size_and_time (node, &info->size, &info->min_size, -+ estimate_calls_size_and_time (node, &size_info->size, &info->min_size, - &info->time, NULL, - ~(clause_t) (1 << predicate::false_condition), - vNULL, vNULL, vNULL); -- info->size = (info->size + ipa_fn_summary::size_scale / 2) / ipa_fn_summary::size_scale; -+ size_info->size = (size_info->size + ipa_fn_summary::size_scale / 2) -+ / ipa_fn_summary::size_scale; - } - - -@@ -3181,10 +3765,10 @@ ipa_fn_summary_generate (void) - /* Write inline summary for edge E to OB. */ - - static void --read_ipa_call_summary (struct lto_input_block *ib, struct cgraph_edge *e, -+read_ipa_call_summary (class lto_input_block *ib, struct cgraph_edge *e, - bool prevails) - { -- struct ipa_call_summary *es = prevails -+ class ipa_call_summary *es = prevails - ? ipa_call_summaries->get_create (e) : NULL; - predicate p; - int length, i; -@@ -3235,7 +3819,7 @@ inline_read_section (struct lto_file_dec - const int cfg_offset = sizeof (struct lto_function_header); - const int main_offset = cfg_offset + header->cfg_size; - const int string_offset = main_offset + header->main_size; -- struct data_in *data_in; -+ class data_in *data_in; - unsigned int i, count2, j; - unsigned int f_count; - -@@ -3250,7 +3834,9 @@ inline_read_section (struct lto_file_dec - { - unsigned int index; - struct cgraph_node *node; -- struct ipa_fn_summary *info; -+ class ipa_fn_summary *info; -+ class ipa_node_params *params_summary; -+ class ipa_size_summary *size_info; - lto_symtab_encoder_t encoder; - struct bitpack_d bp; - struct cgraph_edge *e; -@@ -3261,6 +3847,9 @@ inline_read_section (struct lto_file_dec - node = dyn_cast (lto_symtab_encoder_deref (encoder, - index)); - info = node->prevailing_p () ? ipa_fn_summaries->get_create (node) : NULL; -+ params_summary = node->prevailing_p () ? IPA_NODE_REF (node) : NULL; -+ size_info = node->prevailing_p () -+ ? ipa_size_summaries->get_create (node) : NULL; - - int stack_size = streamer_read_uhwi (&ib); - int size = streamer_read_uhwi (&ib); -@@ -3269,8 +3858,8 @@ inline_read_section (struct lto_file_dec - if (info) - { - info->estimated_stack_size -- = info->estimated_self_stack_size = stack_size; -- info->size = info->self_size = size; -+ = size_info->estimated_self_stack_size = stack_size; -+ size_info->size = size_info->self_size = size; - info->time = time; - } - -@@ -3288,26 +3877,70 @@ inline_read_section (struct lto_file_dec - - count2 = streamer_read_uhwi (&ib); - gcc_assert (!info || !info->conds); -+ if (info) -+ vec_safe_reserve_exact (info->conds, count2); - for (j = 0; j < count2; j++) - { - struct condition c; -+ unsigned int k, count3; - c.operand_num = streamer_read_uhwi (&ib); -- c.size = streamer_read_uhwi (&ib); - c.code = (enum tree_code) streamer_read_uhwi (&ib); -+ c.type = stream_read_tree (&ib, data_in); - c.val = stream_read_tree (&ib, data_in); - bp = streamer_read_bitpack (&ib); - c.agg_contents = bp_unpack_value (&bp, 1); - c.by_ref = bp_unpack_value (&bp, 1); - if (c.agg_contents) - c.offset = streamer_read_uhwi (&ib); -+ count3 = streamer_read_uhwi (&ib); -+ c.param_ops = NULL; - if (info) -- vec_safe_push (info->conds, c); -+ vec_safe_reserve_exact (c.param_ops, count3); -+ if (params_summary) -+ ipa_set_param_used_by_ipa_predicates -+ (params_summary, c.operand_num, true); -+ for (k = 0; k < count3; k++) -+ { -+ struct expr_eval_op op; -+ enum gimple_rhs_class rhs_class; -+ op.code = (enum tree_code) streamer_read_uhwi (&ib); -+ op.type = stream_read_tree (&ib, data_in); -+ switch (rhs_class = get_gimple_rhs_class (op.code)) -+ { -+ case GIMPLE_UNARY_RHS: -+ op.index = 0; -+ op.val[0] = NULL_TREE; -+ op.val[1] = NULL_TREE; -+ break; -+ -+ case GIMPLE_BINARY_RHS: -+ case GIMPLE_TERNARY_RHS: -+ bp = streamer_read_bitpack (&ib); -+ op.index = bp_unpack_value (&bp, 2); -+ op.val[0] = stream_read_tree (&ib, data_in); -+ if (rhs_class == GIMPLE_BINARY_RHS) -+ op.val[1] = NULL_TREE; -+ else -+ op.val[1] = stream_read_tree (&ib, data_in); -+ break; -+ -+ default: -+ fatal_error (UNKNOWN_LOCATION, -+ "invalid fnsummary in LTO stream"); -+ } -+ if (info) -+ c.param_ops->quick_push (op); -+ } -+ if (info) -+ info->conds->quick_push (c); - } - count2 = streamer_read_uhwi (&ib); - gcc_assert (!info || !info->size_time_table); -+ if (info && count2) -+ vec_safe_reserve_exact (info->size_time_table, count2); - for (j = 0; j < count2; j++) - { -- struct size_time_entry e; -+ class size_time_entry e; - - e.size = streamer_read_uhwi (&ib); - e.time = sreal::stream_in (&ib); -@@ -3315,7 +3948,7 @@ inline_read_section (struct lto_file_dec - e.nonconst_predicate.stream_in (&ib); - - if (info) -- vec_safe_push (info->size_time_table, e); -+ info->size_time_table->quick_push (e); - } - - p.stream_in (&ib); -@@ -3378,7 +4011,7 @@ ipa_fn_summary_read (void) - static void - write_ipa_call_summary (struct output_block *ob, struct cgraph_edge *e) - { -- struct ipa_call_summary *es = ipa_call_summaries->get (e); -+ class ipa_call_summary *es = ipa_call_summaries->get (e); - int i; - - streamer_write_uhwi (ob, es->call_stmt_size); -@@ -3426,7 +4059,8 @@ ipa_fn_summary_write (void) - cgraph_node *cnode = lsei_cgraph_node (lsei); - if (cnode->definition && !cnode->alias) - { -- struct ipa_fn_summary *info = ipa_fn_summaries->get (cnode); -+ class ipa_fn_summary *info = ipa_fn_summaries->get (cnode); -+ class ipa_size_summary *size_info = ipa_size_summaries->get (cnode); - struct bitpack_d bp; - struct cgraph_edge *edge; - int i; -@@ -3434,8 +4068,8 @@ ipa_fn_summary_write (void) - struct condition *c; - - streamer_write_uhwi (ob, lto_symtab_encoder_encode (encoder, cnode)); -- streamer_write_hwi (ob, info->estimated_self_stack_size); -- streamer_write_hwi (ob, info->self_size); -+ streamer_write_hwi (ob, size_info->estimated_self_stack_size); -+ streamer_write_hwi (ob, size_info->self_size); - info->time.stream_out (ob); - bp = bitpack_create (ob->main_stream); - bp_pack_value (&bp, info->inlinable, 1); -@@ -3445,9 +4079,12 @@ ipa_fn_summary_write (void) - streamer_write_uhwi (ob, vec_safe_length (info->conds)); - for (i = 0; vec_safe_iterate (info->conds, i, &c); i++) - { -+ int j; -+ struct expr_eval_op *op; -+ - streamer_write_uhwi (ob, c->operand_num); -- streamer_write_uhwi (ob, c->size); - streamer_write_uhwi (ob, c->code); -+ stream_write_tree (ob, c->type, true); - stream_write_tree (ob, c->val, true); - bp = bitpack_create (ob->main_stream); - bp_pack_value (&bp, c->agg_contents, 1); -@@ -3455,6 +4092,21 @@ ipa_fn_summary_write (void) - streamer_write_bitpack (&bp); - if (c->agg_contents) - streamer_write_uhwi (ob, c->offset); -+ streamer_write_uhwi (ob, vec_safe_length (c->param_ops)); -+ for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++) -+ { -+ streamer_write_uhwi (ob, op->code); -+ stream_write_tree (ob, op->type, true); -+ if (op->val[0]) -+ { -+ bp = bitpack_create (ob->main_stream); -+ bp_pack_value (&bp, op->index, 2); -+ streamer_write_bitpack (&bp); -+ stream_write_tree (ob, op->val[0], true); -+ if (op->val[1]) -+ stream_write_tree (ob, op->val[1], true); -+ } -+ } - } - streamer_write_uhwi (ob, vec_safe_length (info->size_time_table)); - for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++) -@@ -3487,23 +4139,33 @@ ipa_fn_summary_write (void) - } - - --/* Release inline summary. */ -+/* Release function summary. */ - - void - ipa_free_fn_summary (void) - { -- struct cgraph_node *node; - if (!ipa_call_summaries) - return; -- FOR_EACH_DEFINED_FUNCTION (node) -- if (!node->alias) -- ipa_fn_summaries->remove (node); - ipa_fn_summaries->release (); - ipa_fn_summaries = NULL; - ipa_call_summaries->release (); - delete ipa_call_summaries; - ipa_call_summaries = NULL; - edge_predicate_pool.release (); -+ /* During IPA this is one of largest datastructures to release. */ -+ if (flag_wpa) -+ ggc_trim (); -+} -+ -+/* Release function summary. */ -+ -+void -+ipa_free_size_summary (void) -+{ -+ if (!ipa_size_summaries) -+ return; -+ ipa_size_summaries->release (); -+ ipa_size_summaries = NULL; - } - - namespace { -@@ -3578,10 +4240,12 @@ public: - gcc_assert (n == 0); - small_p = param; - } -- virtual bool gate (function *) { return small_p || !flag_wpa; } -+ virtual bool gate (function *) { return true; } - virtual unsigned int execute (function *) - { - ipa_free_fn_summary (); -+ if (!flag_wpa) -+ ipa_free_size_summary (); - return 0; - } - -diff -Nurp a/gcc/ipa-fnsummary.h b/gcc/ipa-fnsummary.h ---- a/gcc/ipa-fnsummary.h 2020-04-30 15:14:04.588000000 +0800 -+++ b/gcc/ipa-fnsummary.h 2020-04-30 15:14:56.664000000 +0800 -@@ -81,16 +81,40 @@ struct GTY(()) size_time_entry - sreal GTY((skip)) time; - }; - -+/* Summary about function and stack frame sizes. We keep this info -+ for inline clones and also for WPA streaming. For this reason this is not -+ part of ipa_fn_summary which exists only for offline functions. */ -+class ipa_size_summary -+{ -+public: -+ /* Estimated stack frame consumption by the function. */ -+ HOST_WIDE_INT estimated_self_stack_size; -+ /* Size of the function body. */ -+ int self_size; -+ /* Estimated size of the function after inlining. */ -+ int size; -+ -+ ipa_size_summary () -+ : estimated_self_stack_size (0), self_size (0), size (0) -+ { -+ } -+ /* Copy constructor. */ -+ ipa_size_summary (const ipa_size_summary &s) -+ : estimated_self_stack_size (0), self_size (s.self_size), size (s.size) -+ { -+ } -+}; -+ - /* Function inlining information. */ - struct GTY(()) ipa_fn_summary - { - /* Keep all field empty so summary dumping works during its computation. - This is useful for debugging. */ - ipa_fn_summary () -- : estimated_self_stack_size (0), self_size (0), min_size (0), -+ : min_size (0), - inlinable (false), single_caller (false), - fp_expressions (false), estimated_stack_size (false), -- stack_frame_offset (false), time (0), size (0), conds (NULL), -+ time (0), conds (NULL), - size_time_table (NULL), loop_iterations (NULL), loop_stride (NULL), - growth (0), scc_no (0) - { -@@ -98,13 +122,11 @@ struct GTY(()) ipa_fn_summary - - /* Copy constructor. */ - ipa_fn_summary (const ipa_fn_summary &s) -- : estimated_self_stack_size (s.estimated_self_stack_size), -- self_size (s.self_size), min_size (s.min_size), -+ : min_size (s.min_size), - inlinable (s.inlinable), single_caller (s.single_caller), - fp_expressions (s.fp_expressions), - estimated_stack_size (s.estimated_stack_size), -- stack_frame_offset (s.stack_frame_offset), time (s.time), size (s.size), -- conds (s.conds), size_time_table (s.size_time_table), -+ time (s.time), conds (s.conds), size_time_table (s.size_time_table), - loop_iterations (s.loop_iterations), loop_stride (s.loop_stride), - growth (s.growth), scc_no (s.scc_no) - {} -@@ -114,10 +136,6 @@ struct GTY(()) ipa_fn_summary - - /* Information about the function body itself. */ - -- /* Estimated stack frame consumption by the function. */ -- HOST_WIDE_INT estimated_self_stack_size; -- /* Size of the function body. */ -- int self_size; - /* Minimal size increase after inlining. */ - int min_size; - -@@ -135,11 +153,8 @@ struct GTY(()) ipa_fn_summary - - /* Estimated stack frame consumption by the function. */ - HOST_WIDE_INT estimated_stack_size; -- /* Expected offset of the stack frame of function. */ -- HOST_WIDE_INT stack_frame_offset; -- /* Estimated size of the function after inlining. */ -+ /* Estimated runtime of function after inlining. */ - sreal GTY((skip)) time; -- int size; - - /* Conditional size/time information. The summaries are being - merged during inlining. */ -@@ -177,7 +192,7 @@ public: - - static ipa_fn_summary_t *create_ggc (symbol_table *symtab) - { -- struct ipa_fn_summary_t *summary = new (ggc_alloc ()) -+ class ipa_fn_summary_t *summary = new (ggc_alloc ()) - ipa_fn_summary_t (symtab); - summary->disable_insertion_hook (); - return summary; -@@ -199,6 +214,24 @@ public: - extern GTY(()) fast_function_summary - *ipa_fn_summaries; - -+class ipa_size_summary_t: -+ public fast_function_summary -+{ -+public: -+ ipa_size_summary_t (symbol_table *symtab): -+ fast_function_summary (symtab) {} -+ -+ static ipa_size_summary_t *create_ggc (symbol_table *symtab) -+ { -+ class ipa_size_summary_t *summary = new (ggc_alloc ()) -+ ipa_size_summary_t (symtab); -+ summary->disable_insertion_hook (); -+ return summary; -+ } -+}; -+extern fast_function_summary -+ *ipa_size_summaries; -+ - /* Information kept about callgraph edges. */ - struct ipa_call_summary - { -@@ -245,6 +278,57 @@ public: - ipa_call_summary *dst_data); - }; - -+/* This object describe a context of call. That is a summary of known -+ information about its parameters. Main purpose of this context is -+ to give more realistic esitmations of function runtime, size and -+ inline hints. */ -+class ipa_call_context -+{ -+public: -+ ipa_call_context (cgraph_node *node, -+ clause_t possible_truths, -+ clause_t nonspec_possible_truths, -+ vec known_vals, -+ vec known_contexts, -+ vec known_aggs, -+ vec m_inline_param_summary); -+ ipa_call_context () -+ : m_node(NULL) -+ { -+ } -+ void estimate_size_and_time (int *ret_size, int *ret_min_size, -+ sreal *ret_time, -+ sreal *ret_nonspecialized_time, -+ ipa_hints *ret_hints); -+ void duplicate_from (const ipa_call_context &ctx); -+ void release (bool all = false); -+ bool equal_to (const ipa_call_context &); -+ bool exists_p () -+ { -+ return m_node != NULL; -+ } -+private: -+ /* Called function. */ -+ cgraph_node *m_node; -+ /* Clause describing what predicate conditionals can be satisfied -+ in this context if function is inlined/specialised. */ -+ clause_t m_possible_truths; -+ /* Clause describing what predicate conditionals can be satisfied -+ in this context if function is kept offline. */ -+ clause_t m_nonspec_possible_truths; -+ /* Inline summary maintains info about change probabilities. */ -+ vec m_inline_param_summary; -+ -+ /* The following is used only to resolve indirect calls. */ -+ -+ /* Vector describing known values of parameters. */ -+ vec m_known_vals; -+ /* Vector describing known polymorphic call contexts. */ -+ vec m_known_contexts; -+ /* Vector describing known aggregate values. */ -+ vec m_known_aggs; -+}; -+ - extern fast_call_summary *ipa_call_summaries; - - /* In ipa-fnsummary.c */ -@@ -253,11 +337,12 @@ void ipa_dump_fn_summaries (FILE *f); - void ipa_dump_fn_summary (FILE *f, struct cgraph_node *node); - void ipa_dump_hints (FILE *f, ipa_hints); - void ipa_free_fn_summary (void); -+void ipa_free_size_summary (void); - void inline_analyze_function (struct cgraph_node *node); - void estimate_ipcp_clone_size_and_time (struct cgraph_node *, - vec, - vec, -- vec, -+ vec, - int *, sreal *, sreal *, - ipa_hints *); - void ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge); -@@ -265,26 +350,16 @@ void ipa_update_overall_fn_summary (stru - void compute_fn_summary (struct cgraph_node *, bool); - - --void evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p, -+void evaluate_properties_for_edge (struct cgraph_edge *e, -+ bool inline_p, - clause_t *clause_ptr, - clause_t *nonspec_clause_ptr, - vec *known_vals_ptr, - vec - *known_contexts_ptr, -- vec *); --void estimate_node_size_and_time (struct cgraph_node *node, -- clause_t possible_truths, -- clause_t nonspec_possible_truths, -- vec known_vals, -- vec, -- vec known_aggs, -- int *ret_size, int *ret_min_size, -- sreal *ret_time, -- sreal *ret_nonspecialized_time, -- ipa_hints *ret_hints, -- vec -- inline_param_summary); -+ vec *); - - void ipa_fnsummary_c_finalize (void); -+HOST_WIDE_INT ipa_get_stack_frame_offset (struct cgraph_node *node); - - #endif /* GCC_IPA_FNSUMMARY_H */ -diff -Nurp a/gcc/ipa-icf.c b/gcc/ipa-icf.c ---- a/gcc/ipa-icf.c 2020-04-30 15:14:04.596000000 +0800 -+++ b/gcc/ipa-icf.c 2020-04-30 15:14:56.632000000 +0800 -@@ -491,7 +491,7 @@ sem_function::param_used_p (unsigned int - - struct ipa_node_params *parms_info = IPA_NODE_REF (get_node ()); - -- if (vec_safe_length (parms_info->descriptors) <= i) -+ if (!parms_info || vec_safe_length (parms_info->descriptors) <= i) - return true; - - return ipa_is_param_used (IPA_NODE_REF (get_node ()), i); -@@ -1149,8 +1149,8 @@ sem_function::merge (sem_item *alias_ite - "cannot create wrapper of stdarg function.\n"); - } - else if (ipa_fn_summaries -- && ipa_fn_summaries->get (alias) != NULL -- && ipa_fn_summaries->get (alias)->self_size <= 2) -+ && ipa_size_summaries->get (alias) != NULL -+ && ipa_size_summaries->get (alias)->self_size <= 2) - { - if (dump_file) - fprintf (dump_file, "Wrapper creation is not " -@@ -1268,6 +1268,7 @@ sem_function::merge (sem_item *alias_ite - - /* Remove the function's body. */ - ipa_merge_profiles (original, alias); -+ symtab->call_cgraph_removal_hooks (alias); - alias->release_body (true); - alias->reset (); - /* Notice global symbol possibly produced RTL. */ -@@ -1288,11 +1289,13 @@ sem_function::merge (sem_item *alias_ite - { - gcc_assert (!create_alias); - alias->icf_merged = true; -+ symtab->call_cgraph_removal_hooks (alias); - local_original->icf_merged = true; - - /* FIXME update local_original counts. */ - ipa_merge_profiles (original, alias, true); - alias->create_wrapper (local_original); -+ symtab->call_cgraph_insertion_hooks (alias); - - if (dump_file) - fprintf (dump_file, "Unified; Wrapper has been created.\n\n"); -diff -Nurp a/gcc/ipa-inline-analysis.c b/gcc/ipa-inline-analysis.c ---- a/gcc/ipa-inline-analysis.c 2020-04-30 15:14:04.556000000 +0800 -+++ b/gcc/ipa-inline-analysis.c 2020-04-30 15:14:56.680000000 +0800 -@@ -53,6 +53,48 @@ along with GCC; see the file COPYING3. - /* Cached node/edge growths. */ - call_summary *edge_growth_cache = NULL; - -+/* The context cache remembers estimated time/size and hints for given -+ ipa_call_context of a call. */ -+class node_context_cache_entry -+{ -+public: -+ ipa_call_context ctx; -+ sreal time, nonspec_time; -+ int size; -+ ipa_hints hints; -+ -+ node_context_cache_entry () -+ : ctx () -+ { -+ } -+ ~node_context_cache_entry () -+ { -+ ctx.release (); -+ } -+}; -+ -+/* At the moment we implement primitive single entry LRU cache. */ -+class node_context_summary -+{ -+public: -+ node_context_cache_entry entry; -+ -+ node_context_summary () -+ : entry () -+ { -+ } -+ ~node_context_summary () -+ { -+ } -+}; -+ -+/* Summary holding the context cache. */ -+static fast_function_summary -+ *node_context_cache = NULL; -+/* Statistics about the context cache effectivity. */ -+static long node_context_cache_hit, node_context_cache_miss, -+ node_context_cache_clear; -+ - /* Give initial reasons why inlining would fail on EDGE. This gets either - nullified or usually overwritten by more precise reasons later. */ - -@@ -77,6 +119,16 @@ initialize_inline_failed (struct cgraph_ - == CIF_FINAL_ERROR); - } - -+/* Allocate edge growth caches. */ -+ -+void -+initialize_growth_caches () -+{ -+ edge_growth_cache -+ = new call_summary (symtab, false); -+ node_context_cache -+ = new fast_function_summary (symtab); -+} - - /* Free growth caches. */ - -@@ -84,7 +136,17 @@ void - free_growth_caches (void) - { - delete edge_growth_cache; -+ delete node_context_cache; - edge_growth_cache = NULL; -+ node_context_cache = NULL; -+ if (dump_file) -+ fprintf (dump_file, "node context cache: %li hits, %li misses," -+ " %li initializations\n", -+ node_context_cache_hit, node_context_cache_miss, -+ node_context_cache_clear); -+ node_context_cache_hit = 0; -+ node_context_cache_miss = 0; -+ node_context_cache_clear = 0; - } - - /* Return hints derrived from EDGE. */ -@@ -93,8 +155,8 @@ int - simple_edge_hints (struct cgraph_edge *edge) - { - int hints = 0; -- struct cgraph_node *to = (edge->caller->global.inlined_to -- ? edge->caller->global.inlined_to : edge->caller); -+ struct cgraph_node *to = (edge->caller->inlined_to -+ ? edge->caller->inlined_to : edge->caller); - struct cgraph_node *callee = edge->callee->ultimate_alias_target (); - int to_scc_no = ipa_fn_summaries->get (to)->scc_no; - int callee_scc_no = ipa_fn_summaries->get (callee)->scc_no; -@@ -127,9 +189,9 @@ do_estimate_edge_time (struct cgraph_edg - clause_t clause, nonspec_clause; - vec known_vals; - vec known_contexts; -- vec known_aggs; -- struct ipa_call_summary *es = ipa_call_summaries->get (edge); -- int min_size; -+ vec known_aggs; -+ class ipa_call_summary *es = ipa_call_summaries->get (edge); -+ int min_size = -1; - - callee = edge->callee->ultimate_alias_target (); - -@@ -137,9 +199,53 @@ do_estimate_edge_time (struct cgraph_edg - evaluate_properties_for_edge (edge, true, - &clause, &nonspec_clause, &known_vals, - &known_contexts, &known_aggs); -- estimate_node_size_and_time (callee, clause, nonspec_clause, known_vals, -- known_contexts, known_aggs, &size, &min_size, -- &time, &nonspec_time, &hints, es->param); -+ ipa_call_context ctx (callee, clause, nonspec_clause, known_vals, -+ known_contexts, known_aggs, es->param); -+ if (node_context_cache != NULL) -+ { -+ node_context_summary *e = node_context_cache->get_create (callee); -+ if (e->entry.ctx.equal_to (ctx)) -+ { -+ node_context_cache_hit++; -+ size = e->entry.size; -+ time = e->entry.time; -+ nonspec_time = e->entry.nonspec_time; -+ hints = e->entry.hints; -+ if (flag_checking -+ && !callee->count.ipa_p ()) -+ { -+ sreal chk_time, chk_nonspec_time; -+ int chk_size, chk_min_size; -+ -+ ipa_hints chk_hints; -+ ctx.estimate_size_and_time (&chk_size, &chk_min_size, -+ &chk_time, &chk_nonspec_time, -+ &chk_hints); -+ gcc_assert (chk_size == size && chk_time == time -+ && chk_nonspec_time == nonspec_time -+ && chk_hints == hints); -+ } -+ } -+ else -+ { -+ if (e->entry.ctx.exists_p ()) -+ node_context_cache_miss++; -+ else -+ node_context_cache_clear++; -+ e->entry.ctx.release (true); -+ e->entry.ctx = ctx; -+ ctx.estimate_size_and_time (&size, &min_size, -+ &time, &nonspec_time, &hints); -+ e->entry.size = size; -+ e->entry.time = time; -+ e->entry.nonspec_time = nonspec_time; -+ e->entry.hints = hints; -+ e->entry.ctx.duplicate_from (ctx); -+ } -+ } -+ else -+ ctx.estimate_size_and_time (&size, &min_size, -+ &time, &nonspec_time, &hints); - - /* When we have profile feedback, we can quite safely identify hot - edges and for those we disable size limits. Don't do that when -@@ -147,21 +253,21 @@ do_estimate_edge_time (struct cgraph_edg - may hurt optimization of the caller's hot path. */ - if (edge->count.ipa ().initialized_p () && edge->maybe_hot_p () - && (edge->count.ipa ().apply_scale (2, 1) -- > (edge->caller->global.inlined_to -- ? edge->caller->global.inlined_to->count.ipa () -+ > (edge->caller->inlined_to -+ ? edge->caller->inlined_to->count.ipa () - : edge->caller->count.ipa ()))) - hints |= INLINE_HINT_known_hot; - -- known_vals.release (); -- known_contexts.release (); -- known_aggs.release (); -+ ctx.release (); - gcc_checking_assert (size >= 0); - gcc_checking_assert (time >= 0); - - /* When caching, update the cache entry. */ - if (edge_growth_cache != NULL) - { -- ipa_fn_summaries->get_create (edge->callee)->min_size = min_size; -+ if (min_size >= 0) -+ ipa_fn_summaries->get (edge->callee->function_symbol ())->min_size -+ = min_size; - edge_growth_cache_entry *entry - = edge_growth_cache->get_create (edge); - entry->time = time; -@@ -174,6 +280,14 @@ do_estimate_edge_time (struct cgraph_edg - return time; - } - -+/* Reset cache for NODE. -+ This must be done each time NODE body is modified. */ -+void -+reset_node_cache (struct cgraph_node *node) -+{ -+ if (node_context_cache) -+ node_context_cache->remove (node); -+} - - /* Return estimated callee growth after inlining EDGE. - Only to be called via estimate_edge_size. */ -@@ -186,7 +300,7 @@ do_estimate_edge_size (struct cgraph_edg - clause_t clause, nonspec_clause; - vec known_vals; - vec known_contexts; -- vec known_aggs; -+ vec known_aggs; - - /* When we do caching, use do_estimate_edge_time to populate the entry. */ - -@@ -206,12 +320,10 @@ do_estimate_edge_size (struct cgraph_edg - &clause, &nonspec_clause, - &known_vals, &known_contexts, - &known_aggs); -- estimate_node_size_and_time (callee, clause, nonspec_clause, known_vals, -- known_contexts, known_aggs, &size, NULL, NULL, -- NULL, NULL, vNULL); -- known_vals.release (); -- known_contexts.release (); -- known_aggs.release (); -+ ipa_call_context ctx (callee, clause, nonspec_clause, known_vals, -+ known_contexts, known_aggs, vNULL); -+ ctx.estimate_size_and_time (&size, NULL, NULL, NULL, NULL); -+ ctx.release (); - return size; - } - -@@ -227,7 +339,7 @@ do_estimate_edge_hints (struct cgraph_ed - clause_t clause, nonspec_clause; - vec known_vals; - vec known_contexts; -- vec known_aggs; -+ vec known_aggs; - - /* When we do caching, use do_estimate_edge_time to populate the entry. */ - -@@ -247,12 +359,10 @@ do_estimate_edge_hints (struct cgraph_ed - &clause, &nonspec_clause, - &known_vals, &known_contexts, - &known_aggs); -- estimate_node_size_and_time (callee, clause, nonspec_clause, known_vals, -- known_contexts, known_aggs, NULL, NULL, -- NULL, NULL, &hints, vNULL); -- known_vals.release (); -- known_contexts.release (); -- known_aggs.release (); -+ ipa_call_context ctx (callee, clause, nonspec_clause, known_vals, -+ known_contexts, known_aggs, vNULL); -+ ctx.estimate_size_and_time (NULL, NULL, NULL, NULL, &hints); -+ ctx.release (); - hints |= simple_edge_hints (edge); - return hints; - } -@@ -264,8 +374,8 @@ int - estimate_size_after_inlining (struct cgraph_node *node, - struct cgraph_edge *edge) - { -- struct ipa_call_summary *es = ipa_call_summaries->get (edge); -- ipa_fn_summary *s = ipa_fn_summaries->get (node); -+ class ipa_call_summary *es = ipa_call_summaries->get (edge); -+ ipa_size_summary *s = ipa_size_summaries->get (node); - if (!es->predicate || *es->predicate != false) - { - int size = s->size + estimate_edge_growth (edge); -@@ -321,7 +431,7 @@ int - estimate_growth (struct cgraph_node *node) - { - struct growth_data d = { node, false, false, 0 }; -- struct ipa_fn_summary *info = ipa_fn_summaries->get (node); -+ class ipa_size_summary *info = ipa_size_summaries->get (node); - - node->call_for_symbol_and_aliases (do_estimate_growth_1, &d, true); - -@@ -396,7 +506,7 @@ growth_likely_positive (struct cgraph_no - || node->address_taken) - return true; - -- max_callers = ipa_fn_summaries->get (node)->size * 4 / edge_growth + 2; -+ max_callers = ipa_size_summaries->get (node)->size * 4 / edge_growth + 2; - - for (e = node->callers; e; e = e->next_caller) - { -diff -Nurp a/gcc/ipa-inline.c b/gcc/ipa-inline.c ---- a/gcc/ipa-inline.c 2020-04-30 15:14:04.652000000 +0800 -+++ b/gcc/ipa-inline.c 2020-04-30 15:14:56.684000000 +0800 -@@ -150,8 +150,7 @@ caller_growth_limits (struct cgraph_edge - int newsize; - int limit = 0; - HOST_WIDE_INT stack_size_limit = 0, inlined_stack; -- ipa_fn_summary *info, *what_info; -- ipa_fn_summary *outer_info = ipa_fn_summaries->get (to); -+ ipa_size_summary *outer_info = ipa_size_summaries->get (to); - - /* Look for function e->caller is inlined to. While doing - so work out the largest function body on the way. As -@@ -163,28 +162,29 @@ caller_growth_limits (struct cgraph_edge - too much in order to prevent compiler from exploding". */ - while (true) - { -- info = ipa_fn_summaries->get (to); -- if (limit < info->self_size) -- limit = info->self_size; -- if (stack_size_limit < info->estimated_self_stack_size) -- stack_size_limit = info->estimated_self_stack_size; -- if (to->global.inlined_to) -+ ipa_size_summary *size_info = ipa_size_summaries->get (to); -+ if (limit < size_info->self_size) -+ limit = size_info->self_size; -+ if (stack_size_limit < size_info->estimated_self_stack_size) -+ stack_size_limit = size_info->estimated_self_stack_size; -+ if (to->inlined_to) - to = to->callers->caller; - else - break; - } - -- what_info = ipa_fn_summaries->get (what); -+ ipa_fn_summary *what_info = ipa_fn_summaries->get (what); -+ ipa_size_summary *what_size_info = ipa_size_summaries->get (what); - -- if (limit < what_info->self_size) -- limit = what_info->self_size; -+ if (limit < what_size_info->self_size) -+ limit = what_size_info->self_size; - - limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100; - - /* Check the size after inlining against the function limits. But allow - the function to shrink if it went over the limits by forced inlining. */ - newsize = estimate_size_after_inlining (to, e); -- if (newsize >= info->size -+ if (newsize >= ipa_size_summaries->get (what)->size - && newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS) - && newsize > limit) - { -@@ -203,7 +203,7 @@ caller_growth_limits (struct cgraph_edge - stack_size_limit += ((gcov_type)stack_size_limit - * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100); - -- inlined_stack = (outer_info->stack_frame_offset -+ inlined_stack = (ipa_get_stack_frame_offset (to) - + outer_info->estimated_self_stack_size - + what_info->estimated_stack_size); - /* Check new stack consumption with stack consumption at the place -@@ -213,7 +213,7 @@ caller_growth_limits (struct cgraph_edge - inline call, we can inline, too. - This bit overoptimistically assume that we are good at stack - packing. */ -- && inlined_stack > info->estimated_stack_size -+ && inlined_stack > ipa_fn_summaries->get (to)->estimated_stack_size - && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME)) - { - e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT; -@@ -321,8 +321,8 @@ can_inline_edge_p (struct cgraph_edge *e - - bool inlinable = true; - enum availability avail; -- cgraph_node *caller = e->caller->global.inlined_to -- ? e->caller->global.inlined_to : e->caller; -+ cgraph_node *caller = (e->caller->inlined_to -+ ? e->caller->inlined_to : e->caller); - cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller); - - if (!callee->definition) -@@ -414,8 +414,8 @@ can_inline_edge_by_limits_p (struct cgra - - bool inlinable = true; - enum availability avail; -- cgraph_node *caller = e->caller->global.inlined_to -- ? e->caller->global.inlined_to : e->caller; -+ cgraph_node *caller = (e->caller->inlined_to -+ ? e->caller->inlined_to : e->caller); - cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller); - tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (caller->decl); - tree callee_tree -@@ -687,8 +687,8 @@ inline sreal - compute_uninlined_call_time (struct cgraph_edge *edge, - sreal uninlined_call_time) - { -- cgraph_node *caller = (edge->caller->global.inlined_to -- ? edge->caller->global.inlined_to -+ cgraph_node *caller = (edge->caller->inlined_to -+ ? edge->caller->inlined_to - : edge->caller); - - sreal freq = edge->sreal_frequency (); -@@ -708,8 +708,8 @@ inline sreal - compute_inlined_call_time (struct cgraph_edge *edge, - sreal time) - { -- cgraph_node *caller = (edge->caller->global.inlined_to -- ? edge->caller->global.inlined_to -+ cgraph_node *caller = (edge->caller->inlined_to -+ ? edge->caller->inlined_to - : edge->caller); - sreal caller_time = ipa_fn_summaries->get (caller)->time; - -@@ -895,7 +895,7 @@ want_inline_self_recursive_call_p (struc - reason = "--param max-inline-recursive-depth exceeded."; - want_inline = false; - } -- else if (outer_node->global.inlined_to -+ else if (outer_node->inlined_to - && (caller_freq = outer_node->callers->sreal_frequency ()) == 0) - { - reason = "caller frequency is 0"; -@@ -1005,7 +1005,7 @@ want_inline_function_to_all_callers_p (s - if (node->alias) - return false; - /* Already inlined? */ -- if (node->global.inlined_to) -+ if (node->inlined_to) - return false; - /* Does it have callers? */ - if (!node->call_for_symbol_and_aliases (has_caller_p, NULL, true)) -@@ -1037,8 +1037,8 @@ edge_badness (struct cgraph_edge *edge, - struct cgraph_node *callee = edge->callee->ultimate_alias_target (); - struct ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee); - ipa_hints hints; -- cgraph_node *caller = (edge->caller->global.inlined_to -- ? edge->caller->global.inlined_to -+ cgraph_node *caller = (edge->caller->inlined_to -+ ? edge->caller->inlined_to - : edge->caller); - - growth = estimate_edge_growth (edge); -@@ -1051,7 +1051,7 @@ edge_badness (struct cgraph_edge *edge, - gcc_checking_assert ((edge_time * 100 - - callee_info->time * 101).to_int () <= 0 - || callee->count.ipa ().initialized_p ()); -- gcc_checking_assert (growth <= callee_info->size); -+ gcc_checking_assert (growth <= ipa_size_summaries->get (callee)->size); - - if (dump) - { -@@ -1122,7 +1122,7 @@ edge_badness (struct cgraph_edge *edge, - if (need_more_work) - noninline_callee (); - } -- Withhout panilizing this case, we usually inline noninline_callee -+ Withhout penalizing this case, we usually inline noninline_callee - into the inline_caller because overall_growth is small preventing - further inlining of inline_caller. - -@@ -1132,7 +1132,7 @@ edge_badness (struct cgraph_edge *edge, - if (growth > overall_growth - /* ... and having only one caller which is not inlined ... */ - && callee_info->single_caller -- && !edge->caller->global.inlined_to -+ && !edge->caller->inlined_to - /* ... and edges executed only conditionally ... */ - && edge->sreal_frequency () < 1 - /* ... consider case where callee is not inline but caller is ... */ -@@ -1155,7 +1155,7 @@ edge_badness (struct cgraph_edge *edge, - and it is not called once and. */ - if (!caller_info->single_caller && overall_growth < caller_growth - && caller_info->inlinable -- && caller_info->size -+ && ipa_size_summaries->get (caller)->size - < (DECL_DECLARED_INLINE_P (caller->decl) - ? MAX_INLINE_INSNS_SINGLE : MAX_INLINE_INSNS_AUTO)) - { -@@ -1178,7 +1178,7 @@ edge_badness (struct cgraph_edge *edge, - overall_growth += 256 * 256 - 256; - denominator *= overall_growth; - } -- denominator *= ipa_fn_summaries->get (caller)->self_size + growth; -+ denominator *= ipa_size_summaries->get (caller)->size + growth; - - badness = - numerator / denominator; - -@@ -1300,8 +1300,10 @@ reset_edge_caches (struct cgraph_node *n - struct cgraph_node *where = node; - struct ipa_ref *ref; - -- if (where->global.inlined_to) -- where = where->global.inlined_to; -+ if (where->inlined_to) -+ where = where->inlined_to; -+ -+ reset_node_cache (where); - - if (edge_growth_cache != NULL) - for (edge = where->callers; edge; edge = edge->next_caller) -@@ -1351,7 +1353,7 @@ update_caller_keys (edge_heap_t *heap, s - struct ipa_ref *ref; - - if ((!node->alias && !ipa_fn_summaries->get (node)->inlinable) -- || node->global.inlined_to) -+ || node->inlined_to) - return; - if (!bitmap_set_bit (updated_nodes, node->get_uid ())) - return; -@@ -1479,8 +1481,8 @@ recursive_inlining (struct cgraph_edge * - int n = 0; - - node = edge->caller; -- if (node->global.inlined_to) -- node = node->global.inlined_to; -+ if (node->inlined_to) -+ node = node->inlined_to; - - if (DECL_DECLARED_INLINE_P (node->decl)) - limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE); -@@ -1528,7 +1530,7 @@ recursive_inlining (struct cgraph_edge * - - depth = 1; - for (cnode = curr->caller; -- cnode->global.inlined_to; cnode = cnode->callers->caller) -+ cnode->inlined_to; cnode = cnode->callers->caller) - if (node->decl - == curr->callee->ultimate_alias_target ()->decl) - depth++; -@@ -1567,6 +1569,7 @@ recursive_inlining (struct cgraph_edge * - } - - inline_call (curr, false, new_edges, &overall_size, true); -+ reset_node_cache (node); - lookup_recursive_calls (node, curr->callee, &heap); - n++; - } -@@ -1581,8 +1584,8 @@ recursive_inlining (struct cgraph_edge * - dump_printf_loc (MSG_NOTE, edge->call_stmt, - "\n Inlined %i times, " - "body grown from size %i to %i, time %f to %f\n", n, -- ipa_fn_summaries->get (master_clone)->size, -- ipa_fn_summaries->get (node)->size, -+ ipa_size_summaries->get (master_clone)->size, -+ ipa_size_summaries->get (node)->size, - ipa_fn_summaries->get (master_clone)->time.to_double (), - ipa_fn_summaries->get (node)->time.to_double ()); - -@@ -1593,7 +1596,7 @@ recursive_inlining (struct cgraph_edge * - node = next) - { - next = symtab->next_function (node); -- if (node->global.inlined_to == master_clone) -+ if (node->inlined_to == master_clone) - node->remove (); - } - master_clone->remove (); -@@ -1707,8 +1710,8 @@ resolve_noninline_speculation (edge_heap - if (edge->speculative && !speculation_useful_p (edge, false)) - { - struct cgraph_node *node = edge->caller; -- struct cgraph_node *where = node->global.inlined_to -- ? node->global.inlined_to : node; -+ struct cgraph_node *where = node->inlined_to -+ ? node->inlined_to : node; - auto_bitmap updated_nodes; - - if (edge->count.ipa ().initialized_p ()) -@@ -1749,6 +1752,16 @@ sum_callers (struct cgraph_node *node, v - return false; - } - -+/* We only propagate across edges with non-interposable callee. */ -+ -+inline bool -+ignore_edge_p (struct cgraph_edge *e) -+{ -+ enum availability avail; -+ e->callee->function_or_virtual_thunk_symbol (&avail, e->caller); -+ return (avail <= AVAIL_INTERPOSABLE); -+} -+ - /* We use greedy algorithm for inlining of small functions: - All inline candidates are put into prioritized heap ordered in - increasing badness. -@@ -1776,11 +1789,11 @@ inline_small_functions (void) - metrics. */ - - max_count = profile_count::uninitialized (); -- ipa_reduced_postorder (order, true, NULL); -+ ipa_reduced_postorder (order, true, ignore_edge_p); - free (order); - - FOR_EACH_DEFINED_FUNCTION (node) -- if (!node->global.inlined_to) -+ if (!node->inlined_to) - { - if (!node->alias && node->analyzed - && (node->has_gimple_body_p () || node->thunk.thunk_p) -@@ -1792,7 +1805,7 @@ inline_small_functions (void) - /* Do not account external functions, they will be optimized out - if not inlined. Also only count the non-cold portion of program. */ - if (inline_account_function_p (node)) -- initial_size += info->size; -+ initial_size += ipa_size_summaries->get (node)->size; - info->growth = estimate_growth (node); - - int num_calls = 0; -@@ -1808,7 +1821,8 @@ inline_small_functions (void) - n2 = ((struct ipa_dfs_info *) n2->aux)->next_cycle) - if (opt_for_fn (n2->decl, optimize)) - { -- ipa_fn_summary *info2 = ipa_fn_summaries->get (n2); -+ ipa_fn_summary *info2 = ipa_fn_summaries->get -+ (n2->inlined_to ? n2->inlined_to : n2); - if (info2->scc_no) - break; - info2->scc_no = id; -@@ -1820,8 +1834,7 @@ inline_small_functions (void) - max_count = max_count.max (edge->count.ipa ()); - } - ipa_free_postorder_info (); -- edge_growth_cache -- = new call_summary (symtab, false); -+ initialize_growth_caches (); - - if (dump_file) - fprintf (dump_file, -@@ -1872,8 +1885,8 @@ inline_small_functions (void) - } - if (update) - { -- struct cgraph_node *where = node->global.inlined_to -- ? node->global.inlined_to : node; -+ struct cgraph_node *where = node->inlined_to -+ ? node->inlined_to : node; - ipa_update_overall_fn_summary (where); - reset_edge_caches (where); - update_caller_keys (&edge_heap, where, -@@ -1902,11 +1915,10 @@ inline_small_functions (void) - if (!edge->inline_failed || !edge->callee->analyzed) - continue; - --#if CHECKING_P - /* Be sure that caches are maintained consistent. - This check is affected by scaling roundoff errors when compiling for - IPA this we skip it in that case. */ -- if (!edge->callee->count.ipa_p () -+ if (flag_checking && !edge->callee->count.ipa_p () - && (!max_count.initialized_p () || !max_count.nonzero_p ())) - { - sreal cached_badness = edge_badness (edge, false); -@@ -1917,6 +1929,9 @@ inline_small_functions (void) - - if (edge_growth_cache != NULL) - edge_growth_cache->remove (edge); -+ reset_node_cache (edge->caller->inlined_to -+ ? edge->caller->inlined_to -+ : edge->caller); - gcc_assert (old_size_est == estimate_edge_size (edge)); - gcc_assert (old_time_est == estimate_edge_time (edge)); - /* FIXME: -@@ -1941,9 +1956,6 @@ inline_small_functions (void) - } - else - current_badness = edge_badness (edge, false); --#else -- current_badness = edge_badness (edge, false); --#endif - if (current_badness != badness) - { - if (edge_heap.min () && current_badness > edge_heap.min_key ()) -@@ -1969,7 +1981,7 @@ inline_small_functions (void) - fprintf (dump_file, - "\nConsidering %s with %i size\n", - callee->dump_name (), -- ipa_fn_summaries->get (callee)->size); -+ ipa_size_summaries->get (callee)->size); - fprintf (dump_file, - " to be inlined into %s in %s:%i\n" - " Estimated badness is %f, frequency %.2f.\n", -@@ -2017,8 +2029,8 @@ inline_small_functions (void) - if (edge->recursive_p ()) - { - where = edge->caller; -- if (where->global.inlined_to) -- where = where->global.inlined_to; -+ if (where->inlined_to) -+ where = where->inlined_to; - if (!recursive_inlining (edge, - opt_for_fn (edge->caller->decl, - flag_indirect_inlining) -@@ -2048,7 +2060,7 @@ inline_small_functions (void) - selective. */ - - where = edge->caller; -- while (where->global.inlined_to) -+ while (where->inlined_to) - { - if (where->decl == callee->decl) - outer_node = where, depth++; -@@ -2067,17 +2079,16 @@ inline_small_functions (void) - else if (depth && dump_file) - fprintf (dump_file, " Peeling recursion with depth %i\n", depth); - -- gcc_checking_assert (!callee->global.inlined_to); -+ gcc_checking_assert (!callee->inlined_to); - inline_call (edge, true, &new_indirect_edges, &overall_size, true); -- add_new_edges_to_heap (&edge_heap, new_indirect_edges); -- - reset_edge_caches (edge->callee); -+ add_new_edges_to_heap (&edge_heap, new_indirect_edges); - - update_callee_keys (&edge_heap, where, updated_nodes); - } - where = edge->caller; -- if (where->global.inlined_to) -- where = where->global.inlined_to; -+ if (where->inlined_to) -+ where = where->inlined_to; - - /* Our profitability metric can depend on local properties - such as number of inlinable calls and size of the function body. -@@ -2095,7 +2106,7 @@ inline_small_functions (void) - - if (dump_enabled_p ()) - { -- ipa_fn_summary *s = ipa_fn_summaries->get (edge->caller); -+ ipa_fn_summary *s = ipa_fn_summaries->get (where); - - /* dump_printf can't handle %+i. */ - char buf_net_change[100]; -@@ -2106,7 +2117,9 @@ inline_small_functions (void) - " Inlined %C into %C which now has time %f and " - "size %i, net change of %s.\n", - edge->callee, edge->caller, -- s->time.to_double (), s->size, buf_net_change); -+ s->time.to_double (), -+ ipa_size_summaries->get (edge->caller)->size, -+ buf_net_change); - } - if (min_size > overall_size) - { -@@ -2208,8 +2221,8 @@ flatten_function (struct cgraph_node *no - - node->aux = NULL; - if (update) -- ipa_update_overall_fn_summary (node->global.inlined_to -- ? node->global.inlined_to : node); -+ ipa_update_overall_fn_summary (node->inlined_to -+ ? node->inlined_to : node); - } - - /* Inline NODE to all callers. Worker for cgraph_for_node_and_aliases. -@@ -2223,7 +2236,7 @@ inline_to_all_callers_1 (struct cgraph_n - int *num_calls = (int *)data; - bool callee_removed = false; - -- while (node->callers && !node->global.inlined_to) -+ while (node->callers && !node->inlined_to) - { - struct cgraph_node *caller = node->callers->caller; - -@@ -2243,11 +2256,11 @@ inline_to_all_callers_1 (struct cgraph_n - fprintf (dump_file, - "\nInlining %s size %i.\n", - ultimate->name (), -- ipa_fn_summaries->get (ultimate)->size); -+ ipa_size_summaries->get (ultimate)->size); - fprintf (dump_file, - " Called once from %s %i insns.\n", - node->callers->caller->name (), -- ipa_fn_summaries->get (node->callers->caller)->size); -+ ipa_size_summaries->get (node->callers->caller)->size); - } - - /* Remember which callers we inlined to, delaying updating the -@@ -2258,7 +2271,7 @@ inline_to_all_callers_1 (struct cgraph_n - fprintf (dump_file, - " Inlined into %s which now has %i size\n", - caller->name (), -- ipa_fn_summaries->get (caller)->size); -+ ipa_size_summaries->get (caller)->size); - if (!(*num_calls)--) - { - if (dump_file) -@@ -2296,7 +2309,7 @@ dump_overall_stats (void) - struct cgraph_node *node; - - FOR_EACH_DEFINED_FUNCTION (node) -- if (!node->global.inlined_to -+ if (!node->inlined_to - && !node->alias) - { - ipa_fn_summary *s = ipa_fn_summaries->get (node); -@@ -2482,8 +2495,9 @@ ipa_inline (void) - for (i = nnodes - 1, j = i; i >= 0; i--) - { - node = order[i]; -- if (lookup_attribute ("flatten", -- DECL_ATTRIBUTES (node->decl)) != NULL) -+ if (node->definition -+ && lookup_attribute ("flatten", -+ DECL_ATTRIBUTES (node->decl)) != NULL) - order[j--] = order[i]; - } - -@@ -2588,8 +2602,8 @@ ipa_inline (void) - } - if (update) - { -- struct cgraph_node *where = node->global.inlined_to -- ? node->global.inlined_to : node; -+ struct cgraph_node *where = node->inlined_to -+ ? node->inlined_to : node; - reset_edge_caches (where); - ipa_update_overall_fn_summary (where); - } -diff -Nurp a/gcc/ipa-inline.h b/gcc/ipa-inline.h ---- a/gcc/ipa-inline.h 2020-04-30 15:14:04.608000000 +0800 -+++ b/gcc/ipa-inline.h 2020-04-30 15:14:56.608000000 +0800 -@@ -47,6 +47,8 @@ bool growth_likely_positive (struct cgra - int do_estimate_edge_size (struct cgraph_edge *edge); - sreal do_estimate_edge_time (struct cgraph_edge *edge); - ipa_hints do_estimate_edge_hints (struct cgraph_edge *edge); -+void reset_node_cache (struct cgraph_node *node); -+void initialize_growth_caches (); - void free_growth_caches (void); - - /* In ipa-inline.c */ -diff -Nurp a/gcc/ipa-inline-transform.c b/gcc/ipa-inline-transform.c ---- a/gcc/ipa-inline-transform.c 2020-04-30 15:14:04.568000000 +0800 -+++ b/gcc/ipa-inline-transform.c 2020-04-30 15:14:56.624000000 +0800 -@@ -47,6 +47,7 @@ along with GCC; see the file COPYING3. - #include "function.h" - #include "cfg.h" - #include "basic-block.h" -+#include "ipa-utils.h" - - int ncalls_inlined; - int nfunctions_inlined; -@@ -166,8 +167,8 @@ clone_inlined_nodes (struct cgraph_edge - struct cgraph_node *inlining_into; - struct cgraph_edge *next; - -- if (e->caller->global.inlined_to) -- inlining_into = e->caller->global.inlined_to; -+ if (e->caller->inlined_to) -+ inlining_into = e->caller->inlined_to; - else - inlining_into = e->caller; - -@@ -193,14 +194,14 @@ clone_inlined_nodes (struct cgraph_edge - - For now we keep the ohter functions in the group in program until - cgraph_remove_unreachable_functions gets rid of them. */ -- gcc_assert (!e->callee->global.inlined_to); -+ gcc_assert (!e->callee->inlined_to); - e->callee->remove_from_same_comdat_group (); - if (e->callee->definition - && inline_account_function_p (e->callee)) - { - gcc_assert (!e->callee->alias); - if (overall_size) -- *overall_size -= ipa_fn_summaries->get (e->callee)->size; -+ *overall_size -= ipa_size_summaries->get (e->callee)->size; - nfunctions_inlined++; - } - duplicate = false; -@@ -226,7 +227,7 @@ clone_inlined_nodes (struct cgraph_edge - else - e->callee->remove_from_same_comdat_group (); - -- e->callee->global.inlined_to = inlining_into; -+ e->callee->inlined_to = inlining_into; - - /* Recursively clone all bodies. */ - for (e = e->callee->callees; e; e = next) -@@ -310,20 +311,24 @@ inline_call (struct cgraph_edge *e, bool - /* Don't inline inlined edges. */ - gcc_assert (e->inline_failed); - /* Don't even think of inlining inline clone. */ -- gcc_assert (!callee->global.inlined_to); -+ gcc_assert (!callee->inlined_to); - - to = e->caller; -- if (to->global.inlined_to) -- to = to->global.inlined_to; -+ if (to->inlined_to) -+ to = to->inlined_to; - if (to->thunk.thunk_p) - { - struct cgraph_node *target = to->callees->callee; -+ thunk_expansion = true; -+ symtab->call_cgraph_removal_hooks (to); - if (in_lto_p) - to->get_untransformed_body (); - to->expand_thunk (false, true); - /* When thunk is instrumented we may have multiple callees. */ - for (e = to->callees; e && e->callee != target; e = e->next_callee) - ; -+ symtab->call_cgraph_insertion_hooks (to); -+ thunk_expansion = false; - gcc_assert (e); - } - -@@ -442,9 +447,9 @@ inline_call (struct cgraph_edge *e, bool - - clone_inlined_nodes (e, true, update_original, overall_size); - -- gcc_assert (curr->callee->global.inlined_to == to); -+ gcc_assert (curr->callee->inlined_to == to); - -- old_size = ipa_fn_summaries->get (to)->size; -+ old_size = ipa_size_summaries->get (to)->size; - ipa_merge_fn_summary_after_inlining (e); - if (e->in_polymorphic_cdtor) - mark_all_inlined_calls_cdtor (e->callee); -@@ -458,8 +463,8 @@ inline_call (struct cgraph_edge *e, bool - work for further inlining into this function. Before inlining - the function we inlined to again we expect the caller to update - the overall summary. */ -- ipa_fn_summaries->get (to)->size += estimated_growth; -- new_size = ipa_fn_summaries->get (to)->size; -+ ipa_size_summaries->get (to)->size += estimated_growth; -+ new_size = ipa_size_summaries->get (to)->size; - - if (callee->calls_comdat_local) - to->calls_comdat_local = true; -diff -Nurp a/gcc/ipa-predicate.c b/gcc/ipa-predicate.c ---- a/gcc/ipa-predicate.c 2020-04-30 15:14:04.620000000 +0800 -+++ b/gcc/ipa-predicate.c 2020-04-30 15:14:56.620000000 +0800 -@@ -33,9 +33,36 @@ along with GCC; see the file COPYING3. - #include "fold-const.h" - #include "tree-pretty-print.h" - #include "gimple.h" -+#include "gimplify.h" - #include "data-streamer.h" - - -+/* Check whether two set of operations have same effects. */ -+static bool -+expr_eval_ops_equal_p (expr_eval_ops ops1, expr_eval_ops ops2) -+{ -+ if (ops1) -+ { -+ if (!ops2 || ops1->length () != ops2->length ()) -+ return false; -+ -+ for (unsigned i = 0; i < ops1->length (); i++) -+ { -+ expr_eval_op &op1 = (*ops1)[i]; -+ expr_eval_op &op2 = (*ops2)[i]; -+ -+ if (op1.code != op2.code -+ || op1.index != op2.index -+ || !vrp_operand_equal_p (op1.val[0], op2.val[0]) -+ || !vrp_operand_equal_p (op1.val[1], op2.val[1]) -+ || !types_compatible_p (op1.type, op2.type)) -+ return false; -+ } -+ return true; -+ } -+ return !ops2; -+} -+ - /* Add clause CLAUSE into the predicate P. - When CONDITIONS is NULL do not perform checking whether NEW_CLAUSE - is obviously true. This is useful only when NEW_CLAUSE is known to be -@@ -110,14 +137,16 @@ predicate::add_clause (conditions condit - for (c2 = c1 + 1; c2 < num_conditions; c2++) - if (new_clause & (1 << c2)) - { -- condition *cc1 = -- &(*conditions)[c1 - predicate::first_dynamic_condition]; - condition *cc2 = - &(*conditions)[c2 - predicate::first_dynamic_condition]; - if (cc1->operand_num == cc2->operand_num -- && cc1->val == cc2->val -+ && vrp_operand_equal_p (cc1->val, cc2->val) - && cc2->code != is_not_constant -- && cc2->code != predicate::changed -+ && cc2->code != changed -+ && expr_eval_ops_equal_p (cc1->param_ops, cc2->param_ops) -+ && cc2->agg_contents == cc1->agg_contents -+ && cc2->by_ref == cc1->by_ref -+ && types_compatible_p (cc2->type, cc1->type) - && cc1->code == invert_tree_comparison (cc2->code, - HONOR_NANS (cc1->val))) - return; -@@ -300,6 +329,83 @@ dump_condition (FILE *f, conditions cond - if (c->agg_contents) - fprintf (f, "[%soffset: " HOST_WIDE_INT_PRINT_DEC "]", - c->by_ref ? "ref " : "", c->offset); -+ -+ for (unsigned i = 0; i < vec_safe_length (c->param_ops); i++) -+ { -+ expr_eval_op &op = (*(c->param_ops))[i]; -+ const char *op_name = op_symbol_code (op.code); -+ -+ if (op_name == op_symbol_code (ERROR_MARK)) -+ op_name = get_tree_code_name (op.code); -+ -+ fprintf (f, ",("); -+ -+ if (!op.val[0]) -+ { -+ switch (op.code) -+ { -+ case FLOAT_EXPR: -+ case FIX_TRUNC_EXPR: -+ case FIXED_CONVERT_EXPR: -+ case VIEW_CONVERT_EXPR: -+ CASE_CONVERT: -+ if (op.code == VIEW_CONVERT_EXPR) -+ fprintf (f, "VCE"); -+ fprintf (f, "("); -+ print_generic_expr (f, op.type); -+ fprintf (f, ")" ); -+ break; -+ -+ default: -+ fprintf (f, "%s", op_name); -+ } -+ fprintf (f, " #"); -+ } -+ else if (!op.val[1]) -+ { -+ if (op.index) -+ { -+ print_generic_expr (f, op.val[0]); -+ fprintf (f, " %s #", op_name); -+ } -+ else -+ { -+ fprintf (f, "# %s ", op_name); -+ print_generic_expr (f, op.val[0]); -+ } -+ } -+ else -+ { -+ fprintf (f, "%s ", op_name); -+ switch (op.index) -+ { -+ case 0: -+ fprintf (f, "#, "); -+ print_generic_expr (f, op.val[0]); -+ fprintf (f, ", "); -+ print_generic_expr (f, op.val[1]); -+ break; -+ -+ case 1: -+ print_generic_expr (f, op.val[0]); -+ fprintf (f, ", #, "); -+ print_generic_expr (f, op.val[1]); -+ break; -+ -+ case 2: -+ print_generic_expr (f, op.val[0]); -+ fprintf (f, ", "); -+ print_generic_expr (f, op.val[1]); -+ fprintf (f, ", #"); -+ break; -+ -+ default: -+ fprintf (f, "*, *, *"); -+ } -+ } -+ fprintf (f, ")"); -+ } -+ - if (c->code == predicate::is_not_constant) - { - fprintf (f, " not constant"); -@@ -398,8 +504,9 @@ predicate::remap_after_duplication (clau - for other purposes). */ - - predicate --predicate::remap_after_inlining (struct ipa_fn_summary *info, -- struct ipa_fn_summary *callee_info, -+predicate::remap_after_inlining (class ipa_fn_summary *info, -+ class ipa_node_params *params_summary, -+ class ipa_fn_summary *callee_info, - vec operand_map, - vec offset_map, - clause_t possible_truths, -@@ -460,10 +567,10 @@ predicate::remap_after_inlining (struct - ap.offset = c->offset + offset_delta; - ap.agg_contents = c->agg_contents; - ap.by_ref = c->by_ref; -- cond_predicate = add_condition (info, -+ cond_predicate = add_condition (info, params_summary, - operand_map[c->operand_num], -- c->size, &ap, c->code, -- c->val); -+ c->type, &ap, c->code, -+ c->val, c->param_ops); - } - } - /* Fixed conditions remains same, construct single -@@ -483,7 +590,7 @@ predicate::remap_after_inlining (struct - /* Read predicate from IB. */ - - void --predicate::stream_in (struct lto_input_block *ib) -+predicate::stream_in (class lto_input_block *ib) - { - clause_t clause; - int k = 0; -@@ -516,21 +623,28 @@ predicate::stream_out (struct output_blo - } - - --/* Add condition to condition list SUMMARY. OPERAND_NUM, SIZE, CODE and VAL -- correspond to fields of condition structure. AGGPOS describes whether the -- used operand is loaded from an aggregate and where in the aggregate it is. -- It can be NULL, which means this not a load from an aggregate. */ -+/* Add condition to condition list SUMMARY. OPERAND_NUM, TYPE, CODE, VAL and -+ PARAM_OPS correspond to fields of condition structure. AGGPOS describes -+ whether the used operand is loaded from an aggregate and where in the -+ aggregate it is. It can be NULL, which means this not a load from an -+ aggregate. */ - - predicate --add_condition (struct ipa_fn_summary *summary, int operand_num, -- HOST_WIDE_INT size, struct agg_position_info *aggpos, -- enum tree_code code, tree val) -+add_condition (class ipa_fn_summary *summary, -+ class ipa_node_params *params_summary, -+ int operand_num, -+ tree type, struct agg_position_info *aggpos, -+ enum tree_code code, tree val, expr_eval_ops param_ops) - { -- int i; -+ int i, j; - struct condition *c; - struct condition new_cond; - HOST_WIDE_INT offset; - bool agg_contents, by_ref; -+ expr_eval_op *op; -+ -+ if (params_summary) -+ ipa_set_param_used_by_ipa_predicates (params_summary, operand_num, true); - - if (aggpos) - { -@@ -549,10 +663,11 @@ add_condition (struct ipa_fn_summary *su - for (i = 0; vec_safe_iterate (summary->conds, i, &c); i++) - { - if (c->operand_num == operand_num -- && c->size == size - && c->code == code -- && c->val == val -+ && types_compatible_p (c->type, type) -+ && vrp_operand_equal_p (c->val, val) - && c->agg_contents == agg_contents -+ && expr_eval_ops_equal_p (c->param_ops, param_ops) - && (!agg_contents || (c->offset == offset && c->by_ref == by_ref))) - return predicate::predicate_testing_cond (i); - } -@@ -562,11 +677,21 @@ add_condition (struct ipa_fn_summary *su - - new_cond.operand_num = operand_num; - new_cond.code = code; -- new_cond.val = val; -+ new_cond.type = unshare_expr_without_location (type); -+ new_cond.val = val ? unshare_expr_without_location (val) : val; - new_cond.agg_contents = agg_contents; - new_cond.by_ref = by_ref; - new_cond.offset = offset; -- new_cond.size = size; -+ new_cond.param_ops = vec_safe_copy (param_ops); -+ -+ for (j = 0; vec_safe_iterate (new_cond.param_ops, j, &op); j++) -+ { -+ if (op->val[0]) -+ op->val[0] = unshare_expr_without_location (op->val[0]); -+ if (op->val[1]) -+ op->val[1] = unshare_expr_without_location (op->val[1]); -+ } -+ - vec_safe_push (summary->conds, new_cond); - - return predicate::predicate_testing_cond (i); -diff -Nurp a/gcc/ipa-predicate.h b/gcc/ipa-predicate.h ---- a/gcc/ipa-predicate.h 2020-04-30 15:14:04.612000000 +0800 -+++ b/gcc/ipa-predicate.h 2020-04-30 15:14:56.620000000 +0800 -@@ -22,16 +22,36 @@ along with GCC; see the file COPYING3. - inlined into (i.e. known constant values of function parameters. - - Conditions that are interesting for function body are collected into CONDS -- vector. They are of simple for function_param OP VAL, where VAL is -- IPA invariant. The conditions are then referred by predicates. */ -+ vector. They are of simple as kind of a mathematical transformation on -+ function parameter, T(function_param), in which the parameter occurs only -+ once, and other operands are IPA invariant. The conditions are then -+ referred by predicates. */ -+ -+ -+/* A simplified representation of tree node, for unary, binary and ternary -+ operation. Computations on parameter are decomposed to a series of this -+ kind of structure. */ -+struct GTY(()) expr_eval_op -+{ -+ /* Result type of expression. */ -+ tree type; -+ /* Constant operands in expression, there are at most two. */ -+ tree val[2]; -+ /* Index of parameter operand in expression. */ -+ unsigned index : 2; -+ /* Operation code of expression. */ -+ ENUM_BITFIELD(tree_code) code : 16; -+}; -+ -+typedef vec *expr_eval_ops; - - struct GTY(()) condition - { - /* If agg_contents is set, this is the offset from which the used data was - loaded. */ - HOST_WIDE_INT offset; -- /* Size of the access reading the data (or the PARM_DECL SSA_NAME). */ -- HOST_WIDE_INT size; -+ /* Type of the access reading the data (or the PARM_DECL SSA_NAME). */ -+ tree type; - tree val; - int operand_num; - ENUM_BITFIELD(tree_code) code : 16; -@@ -41,6 +61,9 @@ struct GTY(()) condition - /* If agg_contents is set, this differentiates between loads from data - passed by reference and by value. */ - unsigned by_ref : 1; -+ /* A set of sequential operations on the parameter, which can be seen as -+ a mathmatical function on the parameter. */ -+ expr_eval_ops param_ops; - }; - - /* Information kept about parameter of call site. */ -@@ -54,6 +77,14 @@ struct inline_param_summary - - Value 0 is reserved for compile time invariants. */ - int change_prob; -+ bool equal_to (const inline_param_summary &other) const -+ { -+ return change_prob == other.change_prob; -+ } -+ bool useless_p (void) const -+ { -+ return change_prob == REG_BR_PROB_BASE; -+ } - }; - - typedef vec *conditions; -@@ -205,11 +236,12 @@ public: - predicate remap_after_duplication (clause_t); - - /* Return predicate equal to THIS after inlining. */ -- predicate remap_after_inlining (struct ipa_fn_summary *, -- struct ipa_fn_summary *, -+ predicate remap_after_inlining (class ipa_fn_summary *, -+ class ipa_node_params *params_summary, -+ class ipa_fn_summary *, - vec, vec, clause_t, const predicate &); - -- void stream_in (struct lto_input_block *); -+ void stream_in (class lto_input_block *); - void stream_out (struct output_block *); - - private: -@@ -227,6 +259,9 @@ private: - }; - - void dump_condition (FILE *f, conditions conditions, int cond); --predicate add_condition (struct ipa_fn_summary *summary, int operand_num, -- HOST_WIDE_INT size, struct agg_position_info *aggpos, -- enum tree_code code, tree val); -+predicate add_condition (class ipa_fn_summary *summary, -+ class ipa_node_params *params_summary, -+ int operand_num, -+ tree type, struct agg_position_info *aggpos, -+ enum tree_code code, tree val, -+ expr_eval_ops param_ops = NULL); -diff -Nurp a/gcc/ipa-profile.c b/gcc/ipa-profile.c ---- a/gcc/ipa-profile.c 2020-04-30 15:14:04.632000000 +0800 -+++ b/gcc/ipa-profile.c 2020-04-30 15:14:56.652000000 +0800 -@@ -326,8 +326,8 @@ ipa_propagate_frequency_1 (struct cgraph - if (profile_info - && !(edge->callee->count.ipa () == profile_count::zero ()) - && (edge->caller->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED -- || (edge->caller->global.inlined_to -- && edge->caller->global.inlined_to->frequency -+ || (edge->caller->inlined_to -+ && edge->caller->inlined_to->frequency - != NODE_FREQUENCY_UNLIKELY_EXECUTED))) - d->maybe_unlikely_executed = false; - if (edge->count.ipa ().initialized_p () -@@ -477,6 +477,29 @@ ipa_propagate_frequency (struct cgraph_n - return changed; - } - -+/* Check that number of arguments of N agrees with E. -+ Be conservative when summaries are not present. */ -+ -+static bool -+check_argument_count (struct cgraph_node *n, struct cgraph_edge *e) -+{ -+ if (!ipa_node_params_sum || !ipa_edge_args_sum) -+ return true; -+ class ipa_node_params *info = IPA_NODE_REF (n->function_symbol ()); -+ if (!info) -+ return true; -+ if (!info->descriptors) -+ return true; -+ ipa_edge_args *e_info = IPA_EDGE_REF (e); -+ if (!e) -+ return true; -+ if (ipa_get_param_count (info) != ipa_get_cs_argument_count (e_info) -+ && (ipa_get_param_count (info) >= ipa_get_cs_argument_count (e_info) -+ || !stdarg_p (TREE_TYPE (n->decl)))) -+ return false; -+ return true; -+} -+ - /* Simple ipa profile pass propagating frequencies across the callgraph. */ - - static unsigned int -@@ -600,14 +623,7 @@ ipa_profile (void) - "Not speculating: target is overwritable " - "and can be discarded.\n"); - } -- else if (ipa_node_params_sum && ipa_edge_args_sum -- && (!vec_safe_is_empty -- (IPA_NODE_REF (n2)->descriptors)) -- && ipa_get_param_count (IPA_NODE_REF (n2)) -- != ipa_get_cs_argument_count (IPA_EDGE_REF (e)) -- && (ipa_get_param_count (IPA_NODE_REF (n2)) -- >= ipa_get_cs_argument_count (IPA_EDGE_REF (e)) -- || !stdarg_p (TREE_TYPE (n2->decl)))) -+ else if (!check_argument_count (n2, e)) - { - nmismatch++; - if (dump_file) -diff -Nurp a/gcc/ipa-prop.c b/gcc/ipa-prop.c ---- a/gcc/ipa-prop.c 2020-04-30 15:14:04.616000000 +0800 -+++ b/gcc/ipa-prop.c 2020-04-30 15:14:56.676000000 +0800 -@@ -203,7 +203,7 @@ ipa_get_param_decl_index_1 (vecdescriptors, ptree); - } -@@ -227,8 +227,10 @@ ipa_populate_param_decls (struct cgraph_ - for (parm = fnargs; parm; parm = DECL_CHAIN (parm)) - { - descriptors[param_num].decl_or_type = parm; -- descriptors[param_num].move_cost = estimate_move_cost (TREE_TYPE (parm), -- true); -+ unsigned int cost = estimate_move_cost (TREE_TYPE (parm), true); -+ descriptors[param_num].move_cost = cost; -+ /* Watch overflow, move_cost is a bitfield. */ -+ gcc_checking_assert (cost == descriptors[param_num].move_cost); - param_num++; - } - } -@@ -253,7 +255,7 @@ count_formal_params (tree fndecl) - using ipa_initialize_node_params. */ - - void --ipa_dump_param (FILE *file, struct ipa_node_params *info, int i) -+ipa_dump_param (FILE *file, class ipa_node_params *info, int i) - { - fprintf (file, "param #%i", i); - if ((*info->descriptors)[i].decl_or_type) -@@ -269,7 +271,7 @@ ipa_dump_param (FILE *file, struct ipa_n - static bool - ipa_alloc_node_params (struct cgraph_node *node, int param_count) - { -- struct ipa_node_params *info = IPA_NODE_REF (node); -+ class ipa_node_params *info = IPA_NODE_REF_GET_CREATE (node); - - if (!info->descriptors && param_count) - { -@@ -287,7 +289,7 @@ ipa_alloc_node_params (struct cgraph_nod - void - ipa_initialize_node_params (struct cgraph_node *node) - { -- struct ipa_node_params *info = IPA_NODE_REF (node); -+ class ipa_node_params *info = IPA_NODE_REF_GET_CREATE (node); - - if (!info->descriptors - && ipa_alloc_node_params (node, count_formal_params (node->decl))) -@@ -359,23 +361,50 @@ ipa_print_node_jump_functions_for_edge ( - - fprintf (f, " Aggregate passed by %s:\n", - jump_func->agg.by_ref ? "reference" : "value"); -- FOR_EACH_VEC_SAFE_ELT (jump_func->agg.items, j, item) -+ FOR_EACH_VEC_ELT (*jump_func->agg.items, j, item) - { - fprintf (f, " offset: " HOST_WIDE_INT_PRINT_DEC ", ", - item->offset); -- if (TYPE_P (item->value)) -- fprintf (f, "clobber of " HOST_WIDE_INT_PRINT_DEC " bits", -- tree_to_uhwi (TYPE_SIZE (item->value))); -- else -+ fprintf (f, "type: "); -+ print_generic_expr (f, item->type); -+ fprintf (f, ", "); -+ if (item->jftype == IPA_JF_PASS_THROUGH) -+ fprintf (f, "PASS THROUGH: %d,", -+ item->value.pass_through.formal_id); -+ else if (item->jftype == IPA_JF_LOAD_AGG) -+ { -+ fprintf (f, "LOAD AGG: %d", -+ item->value.pass_through.formal_id); -+ fprintf (f, " [offset: " HOST_WIDE_INT_PRINT_DEC ", by %s],", -+ item->value.load_agg.offset, -+ item->value.load_agg.by_ref ? "reference" -+ : "value"); -+ } -+ -+ if (item->jftype == IPA_JF_PASS_THROUGH -+ || item->jftype == IPA_JF_LOAD_AGG) -+ { -+ fprintf (f, " op %s", -+ get_tree_code_name (item->value.pass_through.operation)); -+ if (item->value.pass_through.operation != NOP_EXPR) -+ { -+ fprintf (f, " "); -+ print_generic_expr (f, item->value.pass_through.operand); -+ } -+ } -+ else if (item->jftype == IPA_JF_CONST) - { -- fprintf (f, "cst: "); -- print_generic_expr (f, item->value); -+ fprintf (f, "CONST: "); -+ print_generic_expr (f, item->value.constant); - } -+ else if (item->jftype == IPA_JF_UNKNOWN) -+ fprintf (f, "UNKNOWN: " HOST_WIDE_INT_PRINT_DEC " bits", -+ tree_to_uhwi (TYPE_SIZE (item->type))); - fprintf (f, "\n"); - } - } - -- struct ipa_polymorphic_call_context *ctx -+ class ipa_polymorphic_call_context *ctx - = ipa_get_ith_polymorhic_call_context (IPA_EDGE_REF (cs), i); - if (ctx && !ctx->useless_p ()) - { -@@ -432,7 +461,7 @@ ipa_print_node_jump_functions (FILE *f, - - for (cs = node->indirect_calls; cs; cs = cs->next_callee) - { -- struct cgraph_indirect_call_info *ii; -+ class cgraph_indirect_call_info *ii; - if (!ipa_edge_args_info_available_for_edge_p (cs)) - continue; - -@@ -1059,7 +1088,7 @@ bool - ipa_load_from_parm_agg (struct ipa_func_body_info *fbi, - vec *descriptors, - gimple *stmt, tree op, int *index_p, -- HOST_WIDE_INT *offset_p, HOST_WIDE_INT *size_p, -+ HOST_WIDE_INT *offset_p, poly_int64 *size_p, - bool *by_ref_p, bool *guaranteed_unmodified) - { - int index; -@@ -1135,6 +1164,67 @@ ipa_load_from_parm_agg (struct ipa_func_ - return false; - } - -+/* If STMT is an assignment that loads a value from a parameter declaration, -+ or from an aggregate passed as the parameter either by value or reference, -+ return the index of the parameter in ipa_node_params. Otherwise return -1. -+ -+ FBI holds gathered information about the function. INFO describes -+ parameters of the function, STMT is the assignment statement. If it is a -+ memory load from an aggregate, *OFFSET_P is filled with offset within the -+ aggregate, and *BY_REF_P specifies whether the aggregate is passed by -+ reference. */ -+ -+static int -+load_from_unmodified_param_or_agg (struct ipa_func_body_info *fbi, -+ class ipa_node_params *info, -+ gimple *stmt, -+ HOST_WIDE_INT *offset_p, -+ bool *by_ref_p) -+{ -+ int index = load_from_unmodified_param (fbi, info->descriptors, stmt); -+ poly_int64 size; -+ -+ /* Load value from a parameter declaration. */ -+ if (index >= 0) -+ { -+ *offset_p = -1; -+ return index; -+ } -+ -+ if (!gimple_assign_load_p (stmt)) -+ return -1; -+ -+ tree rhs = gimple_assign_rhs1 (stmt); -+ -+ /* Skip memory reference containing VIEW_CONVERT_EXPR. */ -+ for (tree t = rhs; handled_component_p (t); t = TREE_OPERAND (t, 0)) -+ if (TREE_CODE (t) == VIEW_CONVERT_EXPR) -+ return -1; -+ -+ /* Skip memory reference containing bit-field. */ -+ if (TREE_CODE (rhs) == BIT_FIELD_REF -+ || contains_bitfld_component_ref_p (rhs)) -+ return -1; -+ -+ if (!ipa_load_from_parm_agg (fbi, info->descriptors, stmt, rhs, &index, -+ offset_p, &size, by_ref_p)) -+ return -1; -+ -+ gcc_assert (!maybe_ne (tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (rhs))), -+ size)); -+ if (!*by_ref_p) -+ { -+ tree param_type = ipa_get_type (info, index); -+ -+ if (!param_type || !AGGREGATE_TYPE_P (param_type)) -+ return -1; -+ } -+ else if (TREE_THIS_VOLATILE (rhs)) -+ return -1; -+ -+ return index; -+} -+ - /* Given that an actual argument is an SSA_NAME (given in NAME) and is a result - of an assignment statement STMT, try to determine whether we are actually - handling any of the following cases and construct an appropriate jump -@@ -1190,7 +1280,7 @@ ipa_load_from_parm_agg (struct ipa_func_ - - static void - compute_complex_assign_jump_func (struct ipa_func_body_info *fbi, -- struct ipa_node_params *info, -+ class ipa_node_params *info, - struct ipa_jump_func *jfunc, - gcall *call, gimple *stmt, tree name, - tree param_type) -@@ -1346,7 +1436,7 @@ get_ancestor_addr_info (gimple *assign, - - static void - compute_complex_ancestor_jump_func (struct ipa_func_body_info *fbi, -- struct ipa_node_params *info, -+ class ipa_node_params *info, - struct ipa_jump_func *jfunc, - gcall *call, gphi *phi) - { -@@ -1440,11 +1530,11 @@ type_like_member_ptr_p (tree type, tree - } - - /* If RHS is an SSA_NAME and it is defined by a simple copy assign statement, -- return the rhs of its defining statement. Otherwise return RHS as it -- is. */ -+ return the rhs of its defining statement, and this statement is stored in -+ *RHS_STMT. Otherwise return RHS as it is. */ - - static inline tree --get_ssa_def_if_simple_copy (tree rhs) -+get_ssa_def_if_simple_copy (tree rhs, gimple **rhs_stmt) - { - while (TREE_CODE (rhs) == SSA_NAME && !SSA_NAME_IS_DEFAULT_DEF (rhs)) - { -@@ -1454,100 +1544,323 @@ get_ssa_def_if_simple_copy (tree rhs) - rhs = gimple_assign_rhs1 (def_stmt); - else - break; -+ *rhs_stmt = def_stmt; - } - return rhs; - } - --/* Simple linked list, describing known contents of an aggregate beforere -- call. */ -+/* Simple linked list, describing contents of an aggregate before call. */ - - struct ipa_known_agg_contents_list - { - /* Offset and size of the described part of the aggregate. */ - HOST_WIDE_INT offset, size; -- /* Known constant value or NULL if the contents is known to be unknown. */ -- tree constant; -+ -+ /* Type of the described part of the aggregate. */ -+ tree type; -+ -+ /* Known constant value or jump function data describing contents. */ -+ struct ipa_load_agg_data value; -+ - /* Pointer to the next structure in the list. */ - struct ipa_known_agg_contents_list *next; - }; - --/* Find the proper place in linked list of ipa_known_agg_contents_list -- structures where to put a new one with the given LHS_OFFSET and LHS_SIZE, -- unless there is a partial overlap, in which case return NULL, or such -- element is already there, in which case set *ALREADY_THERE to true. */ -- --static struct ipa_known_agg_contents_list ** --get_place_in_agg_contents_list (struct ipa_known_agg_contents_list **list, -- HOST_WIDE_INT lhs_offset, -- HOST_WIDE_INT lhs_size, -- bool *already_there) -+/* Add an aggregate content item into a linked list of -+ ipa_known_agg_contents_list structure, in which all elements -+ are sorted ascendingly by offset. */ -+ -+static inline void -+add_to_agg_contents_list (struct ipa_known_agg_contents_list **plist, -+ struct ipa_known_agg_contents_list *item) - { -- struct ipa_known_agg_contents_list **p = list; -- while (*p && (*p)->offset < lhs_offset) -+ struct ipa_known_agg_contents_list *list = *plist; -+ -+ for (; list; list = list->next) - { -- if ((*p)->offset + (*p)->size > lhs_offset) -- return NULL; -- p = &(*p)->next; -+ if (list->offset >= item->offset) -+ break; -+ -+ plist = &list->next; - } - -- if (*p && (*p)->offset < lhs_offset + lhs_size) -+ item->next = list; -+ *plist = item; -+} -+ -+/* Check whether a given aggregate content is clobbered by certain element in -+ a linked list of ipa_known_agg_contents_list. */ -+ -+static inline bool -+clobber_by_agg_contents_list_p (struct ipa_known_agg_contents_list *list, -+ struct ipa_known_agg_contents_list *item) -+{ -+ for (; list; list = list->next) - { -- if ((*p)->offset == lhs_offset && (*p)->size == lhs_size) -- /* We already know this value is subsequently overwritten with -- something else. */ -- *already_there = true; -- else -- /* Otherwise this is a partial overlap which we cannot -- represent. */ -- return NULL; -+ if (list->offset >= item->offset) -+ return list->offset < item->offset + item->size; -+ -+ if (list->offset + list->size > item->offset) -+ return true; - } -- return p; -+ -+ return false; - } - - /* Build aggregate jump function from LIST, assuming there are exactly -- CONST_COUNT constant entries there and that th offset of the passed argument -+ VALUE_COUNT entries there and that offset of the passed argument - is ARG_OFFSET and store it into JFUNC. */ - - static void - build_agg_jump_func_from_list (struct ipa_known_agg_contents_list *list, -- int const_count, HOST_WIDE_INT arg_offset, -+ int value_count, HOST_WIDE_INT arg_offset, - struct ipa_jump_func *jfunc) - { -- vec_alloc (jfunc->agg.items, const_count); -- while (list) -+ vec_alloc (jfunc->agg.items, value_count); -+ for (; list; list = list->next) -+ { -+ struct ipa_agg_jf_item item; -+ tree operand = list->value.pass_through.operand; -+ -+ if (list->value.pass_through.formal_id >= 0) -+ { -+ /* Content value is derived from some formal paramerter. */ -+ if (list->value.offset >= 0) -+ item.jftype = IPA_JF_LOAD_AGG; -+ else -+ item.jftype = IPA_JF_PASS_THROUGH; -+ -+ item.value.load_agg = list->value; -+ if (operand) -+ item.value.pass_through.operand -+ = unshare_expr_without_location (operand); -+ } -+ else if (operand) -+ { -+ /* Content value is known constant. */ -+ item.jftype = IPA_JF_CONST; -+ item.value.constant = unshare_expr_without_location (operand); -+ } -+ else -+ continue; -+ -+ item.type = list->type; -+ gcc_assert (tree_to_shwi (TYPE_SIZE (list->type)) == list->size); -+ -+ item.offset = list->offset - arg_offset; -+ gcc_assert ((item.offset % BITS_PER_UNIT) == 0); -+ -+ jfunc->agg.items->quick_push (item); -+ } -+} -+ -+/* Given an assignment statement STMT, try to collect information into -+ AGG_VALUE that will be used to construct jump function for RHS of the -+ assignment, from which content value of an aggregate part comes. -+ -+ Besides constant and simple pass-through jump functions, also try to -+ identify whether it matches the following pattern that can be described by -+ a load-value-from-aggregate jump function, which is a derivative of simple -+ pass-through jump function. -+ -+ foo (int *p) -+ { -+ ... -+ -+ *(q_5 + 4) = *(p_3(D) + 28) op 1; -+ bar (q_5); -+ } -+ -+ Here IPA_LOAD_AGG_DATA data structure is informative enough to describe -+ constant, simple pass-through and load-vale-from-aggregate. If value -+ is constant, it will be kept in field OPERAND, and field FORMAL_ID is -+ set to -1. For simple pass-through and load-value-from-aggregate, field -+ FORMAL_ID specifies the related formal parameter index, and field -+ OFFSET can be used to distinguish them, -1 means simple pass-through, -+ otherwise means load-value-from-aggregate. */ -+ -+static void -+analyze_agg_content_value (struct ipa_func_body_info *fbi, -+ struct ipa_load_agg_data *agg_value, -+ gimple *stmt) -+{ -+ tree lhs = gimple_assign_lhs (stmt); -+ tree rhs1 = gimple_assign_rhs1 (stmt); -+ enum tree_code code; -+ int index = -1; -+ -+ /* Initialize jump function data for the aggregate part. */ -+ memset (agg_value, 0, sizeof (*agg_value)); -+ agg_value->pass_through.operation = NOP_EXPR; -+ agg_value->pass_through.formal_id = -1; -+ agg_value->offset = -1; -+ -+ if (AGGREGATE_TYPE_P (TREE_TYPE (lhs)) /* TODO: Support aggregate type. */ -+ || TREE_THIS_VOLATILE (lhs) -+ || TREE_CODE (lhs) == BIT_FIELD_REF -+ || contains_bitfld_component_ref_p (lhs)) -+ return; -+ -+ /* Skip SSA copies. */ -+ while (gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS) -+ { -+ if (TREE_CODE (rhs1) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (rhs1)) -+ break; -+ -+ stmt = SSA_NAME_DEF_STMT (rhs1); -+ if (!is_gimple_assign (stmt)) -+ return; -+ -+ rhs1 = gimple_assign_rhs1 (stmt); -+ } -+ -+ code = gimple_assign_rhs_code (stmt); -+ switch (gimple_assign_rhs_class (stmt)) - { -- if (list->constant) -+ case GIMPLE_SINGLE_RHS: -+ if (is_gimple_ip_invariant (rhs1)) - { -- struct ipa_agg_jf_item item; -- item.offset = list->offset - arg_offset; -- gcc_assert ((item.offset % BITS_PER_UNIT) == 0); -- item.value = unshare_expr_without_location (list->constant); -- jfunc->agg.items->quick_push (item); -+ agg_value->pass_through.operand = rhs1; -+ return; - } -- list = list->next; -+ code = NOP_EXPR; -+ break; -+ -+ case GIMPLE_UNARY_RHS: -+ /* NOTE: A GIMPLE_UNARY_RHS operation might not be tcc_unary -+ (truth_not_expr is example), GIMPLE_BINARY_RHS does not imply -+ tcc_binary, this subtleness is somewhat misleading. -+ -+ Since tcc_unary is widely used in IPA-CP code to check an operation -+ with one operand, here we only allow tc_unary operation to avoid -+ possible problem. Then we can use (opclass == tc_unary) or not to -+ distinguish unary and binary. */ -+ if (TREE_CODE_CLASS (code) != tcc_unary || CONVERT_EXPR_CODE_P (code)) -+ return; -+ -+ rhs1 = get_ssa_def_if_simple_copy (rhs1, &stmt); -+ break; -+ -+ case GIMPLE_BINARY_RHS: -+ { -+ gimple *rhs1_stmt = stmt; -+ gimple *rhs2_stmt = stmt; -+ tree rhs2 = gimple_assign_rhs2 (stmt); -+ -+ rhs1 = get_ssa_def_if_simple_copy (rhs1, &rhs1_stmt); -+ rhs2 = get_ssa_def_if_simple_copy (rhs2, &rhs2_stmt); -+ -+ if (is_gimple_ip_invariant (rhs2)) -+ { -+ agg_value->pass_through.operand = rhs2; -+ stmt = rhs1_stmt; -+ } -+ else if (is_gimple_ip_invariant (rhs1)) -+ { -+ if (TREE_CODE_CLASS (code) == tcc_comparison) -+ code = swap_tree_comparison (code); -+ else if (!commutative_tree_code (code)) -+ return; -+ -+ agg_value->pass_through.operand = rhs1; -+ stmt = rhs2_stmt; -+ rhs1 = rhs2; -+ } -+ else -+ return; -+ -+ if (TREE_CODE_CLASS (code) != tcc_comparison -+ && !useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs1))) -+ return; -+ } -+ break; -+ -+ default: -+ return; -+ } -+ -+ if (TREE_CODE (rhs1) != SSA_NAME) -+ index = load_from_unmodified_param_or_agg (fbi, fbi->info, stmt, -+ &agg_value->offset, -+ &agg_value->by_ref); -+ else if (SSA_NAME_IS_DEFAULT_DEF (rhs1)) -+ index = ipa_get_param_decl_index (fbi->info, SSA_NAME_VAR (rhs1)); -+ -+ if (index >= 0) -+ { -+ if (agg_value->offset >= 0) -+ agg_value->type = TREE_TYPE (rhs1); -+ agg_value->pass_through.formal_id = index; -+ agg_value->pass_through.operation = code; - } -+ else -+ agg_value->pass_through.operand = NULL_TREE; -+} -+ -+/* If STMT is a memory store to the object whose address is BASE, extract -+ information (offset, size, and value) into CONTENT, and return true, -+ otherwise we conservatively assume the whole object is modified with -+ unknown content, and return false. CHECK_REF means that access to object -+ is expected to be in form of MEM_REF expression. */ -+ -+static bool -+extract_mem_content (struct ipa_func_body_info *fbi, -+ gimple *stmt, tree base, bool check_ref, -+ struct ipa_known_agg_contents_list *content) -+{ -+ HOST_WIDE_INT lhs_offset, lhs_size; -+ bool reverse; -+ -+ if (!is_gimple_assign (stmt)) -+ return false; -+ -+ tree lhs = gimple_assign_lhs (stmt); -+ tree lhs_base = get_ref_base_and_extent_hwi (lhs, &lhs_offset, &lhs_size, -+ &reverse); -+ if (!lhs_base) -+ return false; -+ -+ if (check_ref) -+ { -+ if (TREE_CODE (lhs_base) != MEM_REF -+ || TREE_OPERAND (lhs_base, 0) != base -+ || !integer_zerop (TREE_OPERAND (lhs_base, 1))) -+ return false; -+ } -+ else if (lhs_base != base) -+ return false; -+ -+ content->offset = lhs_offset; -+ content->size = lhs_size; -+ content->type = TREE_TYPE (lhs); -+ content->next = NULL; -+ -+ analyze_agg_content_value (fbi, &content->value, stmt); -+ return true; - } - - /* Traverse statements from CALL backwards, scanning whether an aggregate given -- in ARG is filled in with constant values. ARG can either be an aggregate -- expression or a pointer to an aggregate. ARG_TYPE is the type of the -- aggregate. JFUNC is the jump function into which the constants are -- subsequently stored. */ -+ in ARG is filled in constants or values that are derived from caller's -+ formal parameter in the way described by some kinds of jump functions. FBI -+ is the context of the caller function for interprocedural analysis. ARG can -+ either be an aggregate expression or a pointer to an aggregate. ARG_TYPE is -+ the type of the aggregate, JFUNC is the jump function for the aggregate. */ - - static void --determine_locally_known_aggregate_parts (gcall *call, tree arg, -- tree arg_type, -- struct ipa_jump_func *jfunc) --{ -- struct ipa_known_agg_contents_list *list = NULL; -- int item_count = 0, const_count = 0; -+determine_known_aggregate_parts (struct ipa_func_body_info *fbi, -+ gcall *call, tree arg, -+ tree arg_type, -+ struct ipa_jump_func *jfunc) -+{ -+ struct ipa_known_agg_contents_list *list = NULL, *all_list = NULL; -+ bitmap visited = NULL; -+ int item_count = 0, value_count = 0; - HOST_WIDE_INT arg_offset, arg_size; -- gimple_stmt_iterator gsi; - tree arg_base; - bool check_ref, by_ref; - ao_ref r; - -- if (PARAM_VALUE (PARAM_IPA_MAX_AGG_ITEMS) == 0) -+ if ( PARAM_VALUE (PARAM_IPA_MAX_AGG_ITEMS) == 0) - return; - - /* The function operates in three stages. First, we prepare check_ref, r, -@@ -1606,91 +1919,73 @@ determine_locally_known_aggregate_parts - ao_ref_init (&r, arg); - } - -- /* Second stage walks back the BB, looks at individual statements and as long -- as it is confident of how the statements affect contents of the -- aggregates, it builds a sorted linked list of ipa_agg_jf_list structures -- describing it. */ -- gsi = gsi_for_stmt (call); -- gsi_prev (&gsi); -- for (; !gsi_end_p (gsi); gsi_prev (&gsi)) -- { -- struct ipa_known_agg_contents_list *n, **p; -- gimple *stmt = gsi_stmt (gsi); -- HOST_WIDE_INT lhs_offset, lhs_size; -- tree lhs, rhs, lhs_base; -- bool reverse; -- -- if (!stmt_may_clobber_ref_p_1 (stmt, &r)) -- continue; -- if (!gimple_assign_single_p (stmt)) -- break; -- -- lhs = gimple_assign_lhs (stmt); -- rhs = gimple_assign_rhs1 (stmt); -- if (!is_gimple_reg_type (TREE_TYPE (rhs)) -- || TREE_CODE (lhs) == BIT_FIELD_REF -- || contains_bitfld_component_ref_p (lhs)) -- break; -+ /* Second stage traverses virtual SSA web backwards starting from the call -+ statement, only looks at individual dominating virtual operand (its -+ definition dominates the call), as long as it is confident that content -+ of the aggregate is affected by definition of the virtual operand, it -+ builds a sorted linked list of ipa_agg_jf_list describing that. */ - -- lhs_base = get_ref_base_and_extent_hwi (lhs, &lhs_offset, -- &lhs_size, &reverse); -- if (!lhs_base) -- break; -+ for (tree dom_vuse = gimple_vuse (call); dom_vuse;) -+ { -+ gimple *stmt = SSA_NAME_DEF_STMT (dom_vuse); - -- if (check_ref) -+ if (gimple_code (stmt) == GIMPLE_PHI) - { -- if (TREE_CODE (lhs_base) != MEM_REF -- || TREE_OPERAND (lhs_base, 0) != arg_base -- || !integer_zerop (TREE_OPERAND (lhs_base, 1))) -- break; -+ dom_vuse = get_continuation_for_phi (stmt, &r, true, -+ fbi->aa_walk_budget, -+ &visited, false, NULL, NULL); -+ continue; - } -- else if (lhs_base != arg_base) -+ -+ if (stmt_may_clobber_ref_p_1 (stmt, &r)) - { -- if (DECL_P (lhs_base)) -- continue; -- else -+ struct ipa_known_agg_contents_list *content -+ = XALLOCA (struct ipa_known_agg_contents_list); -+ -+ if (!extract_mem_content (fbi, stmt, arg_base, check_ref, content)) - break; -- } - -- bool already_there = false; -- p = get_place_in_agg_contents_list (&list, lhs_offset, lhs_size, -- &already_there); -- if (!p) -- break; -- if (already_there) -- continue; -+ /* Now we get a dominating virtual operand, and need to check -+ whether its value is clobbered any other dominating one. */ -+ if ((content->value.pass_through.formal_id >= 0 -+ || content->value.pass_through.operand) -+ && !clobber_by_agg_contents_list_p (all_list, content)) -+ { -+ struct ipa_known_agg_contents_list *copy -+ = XALLOCA (struct ipa_known_agg_contents_list); - -- rhs = get_ssa_def_if_simple_copy (rhs); -- n = XALLOCA (struct ipa_known_agg_contents_list); -- n->size = lhs_size; -- n->offset = lhs_offset; -- if (is_gimple_ip_invariant (rhs)) -- { -- n->constant = rhs; -- const_count++; -+ /* Add to the list consisting of only dominating virtual -+ operands, whose definitions can finally reach the call. */ -+ add_to_agg_contents_list (&list, (*copy = *content, copy)); -+ -+ if (++value_count == PARAM_VALUE (PARAM_IPA_MAX_AGG_ITEMS)) -+ break; -+ } -+ -+ /* Add to the list consisting of all dominating virtual operands. */ -+ add_to_agg_contents_list (&all_list, content); -+ -+ if (++item_count == 2 * PARAM_VALUE (PARAM_IPA_MAX_AGG_ITEMS)) -+ break; - } -- else -- n->constant = NULL_TREE; -- n->next = *p; -- *p = n; -- -- item_count++; -- if (const_count == PARAM_VALUE (PARAM_IPA_MAX_AGG_ITEMS) -- || item_count == 2 * PARAM_VALUE (PARAM_IPA_MAX_AGG_ITEMS)) -- break; -- } -+ dom_vuse = gimple_vuse (stmt); -+ } -+ -+ if (visited) -+ BITMAP_FREE (visited); - - /* Third stage just goes over the list and creates an appropriate vector of -- ipa_agg_jf_item structures out of it, of sourse only if there are -- any known constants to begin with. */ -+ ipa_agg_jf_item structures out of it, of course only if there are -+ any meaningful items to begin with. */ - -- if (const_count) -+ if (value_count) - { - jfunc->agg.by_ref = by_ref; -- build_agg_jump_func_from_list (list, const_count, arg_offset, jfunc); -+ build_agg_jump_func_from_list (list, value_count, arg_offset, jfunc); - } - } - -+ - /* Return the Ith param type of callee associated with call graph - edge E. */ - -@@ -1797,7 +2092,7 @@ ipa_set_jfunc_vr (ipa_jump_func *jf, enu - jf->m_vr = ipa_get_value_range (type, min, max); - } - --/* Assign to JF a pointer to a value_range just liek TMP but either fetch a -+/* Assign to JF a pointer to a value_range just like TMP but either fetch a - copy from ipa_vr_hash_table or allocate a new on in GC memory. */ - - static void -@@ -1814,8 +2109,8 @@ static void - ipa_compute_jump_functions_for_edge (struct ipa_func_body_info *fbi, - struct cgraph_edge *cs) - { -- struct ipa_node_params *info = IPA_NODE_REF (cs->caller); -- struct ipa_edge_args *args = IPA_EDGE_REF (cs); -+ class ipa_node_params *info = IPA_NODE_REF (cs->caller); -+ class ipa_edge_args *args = IPA_EDGE_REF_GET_CREATE (cs); - gcall *call = cs->call_stmt; - int n, arg_num = gimple_call_num_args (call); - bool useful_context = false; -@@ -1839,7 +2134,7 @@ ipa_compute_jump_functions_for_edge (str - if (flag_devirtualize && POINTER_TYPE_P (TREE_TYPE (arg))) - { - tree instance; -- struct ipa_polymorphic_call_context context (cs->caller->decl, -+ class ipa_polymorphic_call_context context (cs->caller->decl, - arg, cs->call_stmt, - &instance); - context.get_dynamic_type (instance, arg, NULL, cs->call_stmt, -@@ -1978,7 +2273,7 @@ ipa_compute_jump_functions_for_edge (str - || !ipa_get_jf_ancestor_agg_preserved (jfunc)) - && (AGGREGATE_TYPE_P (TREE_TYPE (arg)) - || POINTER_TYPE_P (param_type))) -- determine_locally_known_aggregate_parts (call, arg, param_type, jfunc); -+ determine_known_aggregate_parts (fbi, call, arg, param_type, jfunc); - } - if (!useful_context) - vec_free (args->polymorphic_call_contexts); -@@ -2076,11 +2371,12 @@ ipa_is_ssa_with_stmt_def (tree t) - - /* Find the indirect call graph edge corresponding to STMT and mark it as a - call to a parameter number PARAM_INDEX. NODE is the caller. Return the -- indirect call graph edge. */ -+ indirect call graph edge. -+ If POLYMORPHIC is true record is as a destination of polymorphic call. */ - - static struct cgraph_edge * - ipa_note_param_call (struct cgraph_node *node, int param_index, -- gcall *stmt) -+ gcall *stmt, bool polymorphic) - { - struct cgraph_edge *cs; - -@@ -2089,6 +2385,11 @@ ipa_note_param_call (struct cgraph_node - cs->indirect_info->agg_contents = 0; - cs->indirect_info->member_ptr = 0; - cs->indirect_info->guaranteed_unmodified = 0; -+ ipa_set_param_used_by_indirect_call (IPA_NODE_REF (node), -+ param_index, true); -+ if (cs->indirect_info->polymorphic || polymorphic) -+ ipa_set_param_used_by_polymorphic_call -+ (IPA_NODE_REF (node), param_index, true); - return cs; - } - -@@ -2155,7 +2456,7 @@ static void - ipa_analyze_indirect_call_uses (struct ipa_func_body_info *fbi, gcall *call, - tree target) - { -- struct ipa_node_params *info = fbi->info; -+ class ipa_node_params *info = fbi->info; - HOST_WIDE_INT offset; - bool by_ref; - -@@ -2164,7 +2465,7 @@ ipa_analyze_indirect_call_uses (struct i - tree var = SSA_NAME_VAR (target); - int index = ipa_get_param_decl_index (info, var); - if (index >= 0) -- ipa_note_param_call (fbi->node, index, call); -+ ipa_note_param_call (fbi->node, index, call, false); - return; - } - -@@ -2176,7 +2477,8 @@ ipa_analyze_indirect_call_uses (struct i - gimple_assign_rhs1 (def), &index, &offset, - NULL, &by_ref, &guaranteed_unmodified)) - { -- struct cgraph_edge *cs = ipa_note_param_call (fbi->node, index, call); -+ struct cgraph_edge *cs = ipa_note_param_call (fbi->node, index, -+ call, false); - cs->indirect_info->offset = offset; - cs->indirect_info->agg_contents = 1; - cs->indirect_info->by_ref = by_ref; -@@ -2277,7 +2579,8 @@ ipa_analyze_indirect_call_uses (struct i - if (index >= 0 - && parm_preserved_before_stmt_p (fbi, index, call, rec)) - { -- struct cgraph_edge *cs = ipa_note_param_call (fbi->node, index, call); -+ struct cgraph_edge *cs = ipa_note_param_call (fbi->node, index, -+ call, false); - cs->indirect_info->offset = offset; - cs->indirect_info->agg_contents = 1; - cs->indirect_info->member_ptr = 1; -@@ -2306,7 +2609,7 @@ ipa_analyze_virtual_call_uses (struct ip - if (TREE_CODE (obj) != SSA_NAME) - return; - -- struct ipa_node_params *info = fbi->info; -+ class ipa_node_params *info = fbi->info; - if (SSA_NAME_IS_DEFAULT_DEF (obj)) - { - struct ipa_jump_func jfunc; -@@ -2337,8 +2640,9 @@ ipa_analyze_virtual_call_uses (struct ip - return; - } - -- struct cgraph_edge *cs = ipa_note_param_call (fbi->node, index, call); -- struct cgraph_indirect_call_info *ii = cs->indirect_info; -+ struct cgraph_edge *cs = ipa_note_param_call (fbi->node, index, -+ call, true); -+ class cgraph_indirect_call_info *ii = cs->indirect_info; - ii->offset = anc_offset; - ii->otr_token = tree_to_uhwi (OBJ_TYPE_REF_TOKEN (target)); - ii->otr_type = obj_type_ref_class (target); -@@ -2410,7 +2714,7 @@ ipa_analyze_stmt_uses (struct ipa_func_b - static bool - visit_ref_for_mod_analysis (gimple *, tree op, tree, void *data) - { -- struct ipa_node_params *info = (struct ipa_node_params *) data; -+ class ipa_node_params *info = (class ipa_node_params *) data; - - op = get_base_address (op); - if (op -@@ -2458,7 +2762,7 @@ ipa_analyze_params_uses_in_bb (struct ip - static void - ipa_analyze_controlled_uses (struct cgraph_node *node) - { -- struct ipa_node_params *info = IPA_NODE_REF (node); -+ class ipa_node_params *info = IPA_NODE_REF (node); - - for (int i = 0; i < ipa_get_param_count (info); i++) - { -@@ -2550,11 +2854,11 @@ void - ipa_analyze_node (struct cgraph_node *node) - { - struct ipa_func_body_info fbi; -- struct ipa_node_params *info; -+ class ipa_node_params *info; - - ipa_check_create_node_params (); - ipa_check_create_edge_args (); -- info = IPA_NODE_REF (node); -+ info = IPA_NODE_REF_GET_CREATE (node); - - if (info->analysis_done) - return; -@@ -2610,22 +2914,96 @@ static void - update_jump_functions_after_inlining (struct cgraph_edge *cs, - struct cgraph_edge *e) - { -- struct ipa_edge_args *top = IPA_EDGE_REF (cs); -- struct ipa_edge_args *args = IPA_EDGE_REF (e); -+ class ipa_edge_args *top = IPA_EDGE_REF (cs); -+ class ipa_edge_args *args = IPA_EDGE_REF (e); -+ if (!args) -+ return; - int count = ipa_get_cs_argument_count (args); - int i; - - for (i = 0; i < count; i++) - { - struct ipa_jump_func *dst = ipa_get_ith_jump_func (args, i); -- struct ipa_polymorphic_call_context *dst_ctx -+ class ipa_polymorphic_call_context *dst_ctx - = ipa_get_ith_polymorhic_call_context (args, i); - -+ if (dst->agg.items) -+ { -+ struct ipa_agg_jf_item *item; -+ int j; -+ -+ FOR_EACH_VEC_ELT (*dst->agg.items, j, item) -+ { -+ int dst_fid; -+ struct ipa_jump_func *src; -+ -+ if (item->jftype != IPA_JF_PASS_THROUGH -+ && item->jftype != IPA_JF_LOAD_AGG) -+ continue; -+ -+ dst_fid = item->value.pass_through.formal_id; -+ if (!top || dst_fid >= ipa_get_cs_argument_count (top)) -+ { -+ item->jftype = IPA_JF_UNKNOWN; -+ continue; -+ } -+ -+ item->value.pass_through.formal_id = -1; -+ src = ipa_get_ith_jump_func (top, dst_fid); -+ if (src->type == IPA_JF_CONST) -+ { -+ if (item->jftype == IPA_JF_PASS_THROUGH -+ && item->value.pass_through.operation == NOP_EXPR) -+ { -+ item->jftype = IPA_JF_CONST; -+ item->value.constant = src->value.constant.value; -+ continue; -+ } -+ } -+ else if (src->type == IPA_JF_PASS_THROUGH -+ && src->value.pass_through.operation == NOP_EXPR) -+ { -+ if (item->jftype == IPA_JF_PASS_THROUGH -+ || !item->value.load_agg.by_ref -+ || src->value.pass_through.agg_preserved) -+ item->value.pass_through.formal_id -+ = src->value.pass_through.formal_id; -+ } -+ else if (src->type == IPA_JF_ANCESTOR) -+ { -+ if (item->jftype == IPA_JF_PASS_THROUGH) -+ { -+ if (!src->value.ancestor.offset) -+ item->value.pass_through.formal_id -+ = src->value.ancestor.formal_id; -+ } -+ else if (src->value.ancestor.agg_preserved) -+ { -+ gcc_checking_assert (item->value.load_agg.by_ref); -+ -+ item->value.pass_through.formal_id -+ = src->value.ancestor.formal_id; -+ item->value.load_agg.offset -+ += src->value.ancestor.offset; -+ } -+ } -+ -+ if (item->value.pass_through.formal_id < 0) -+ item->jftype = IPA_JF_UNKNOWN; -+ } -+ } -+ -+ if (!top) -+ { -+ ipa_set_jf_unknown (dst); -+ continue; -+ } -+ - if (dst->type == IPA_JF_ANCESTOR) - { - struct ipa_jump_func *src; - int dst_fid = dst->value.ancestor.formal_id; -- struct ipa_polymorphic_call_context *src_ctx -+ class ipa_polymorphic_call_context *src_ctx - = ipa_get_ith_polymorhic_call_context (top, dst_fid); - - /* Variable number of arguments can cause havoc if we try to access -@@ -2641,7 +3019,7 @@ update_jump_functions_after_inlining (st - - if (src_ctx && !src_ctx->useless_p ()) - { -- struct ipa_polymorphic_call_context ctx = *src_ctx; -+ class ipa_polymorphic_call_context ctx = *src_ctx; - - /* TODO: Make type preserved safe WRT contexts. */ - if (!ipa_get_jf_ancestor_type_preserved (dst)) -@@ -2660,8 +3038,11 @@ update_jump_functions_after_inlining (st - } - } - -- if (src->agg.items -- && (dst->value.ancestor.agg_preserved || !src->agg.by_ref)) -+ /* Parameter and argument in ancestor jump function must be pointer -+ type, which means access to aggregate must be by-reference. */ -+ gcc_assert (!src->agg.items || src->agg.by_ref); -+ -+ if (src->agg.items && dst->value.ancestor.agg_preserved) - { - struct ipa_agg_jf_item *item; - int j; -@@ -2705,18 +3086,18 @@ update_jump_functions_after_inlining (st - /* We must check range due to calls with variable number of arguments - and we cannot combine jump functions with operations. */ - if (dst->value.pass_through.operation == NOP_EXPR -- && (dst->value.pass_through.formal_id -+ && (top && dst->value.pass_through.formal_id - < ipa_get_cs_argument_count (top))) - { - int dst_fid = dst->value.pass_through.formal_id; - src = ipa_get_ith_jump_func (top, dst_fid); - bool dst_agg_p = ipa_get_jf_pass_through_agg_preserved (dst); -- struct ipa_polymorphic_call_context *src_ctx -+ class ipa_polymorphic_call_context *src_ctx - = ipa_get_ith_polymorhic_call_context (top, dst_fid); - - if (src_ctx && !src_ctx->useless_p ()) - { -- struct ipa_polymorphic_call_context ctx = *src_ctx; -+ class ipa_polymorphic_call_context ctx = *src_ctx; - - /* TODO: Make type preserved safe WRT contexts. */ - if (!ipa_get_jf_pass_through_type_preserved (dst)) -@@ -2856,7 +3237,7 @@ ipa_make_edge_direct_to_target (struct c - - /* Because may-edges are not explicitely represented and vtable may be external, - we may create the first reference to the object in the unit. */ -- if (!callee || callee->global.inlined_to) -+ if (!callee || callee->inlined_to) - { - - /* We are better to ensure we can refer to it. -@@ -2909,7 +3290,7 @@ ipa_make_edge_direct_to_target (struct c - - /* We cannot make edges to inline clones. It is bug that someone removed - the cgraph node too early. */ -- gcc_assert (!callee->global.inlined_to); -+ gcc_assert (!callee->inlined_to); - - if (dump_file && !unreachable) - { -@@ -3059,18 +3440,19 @@ ipa_find_agg_cst_from_init (tree scalar, - return find_constructor_constant_at_offset (DECL_INITIAL (scalar), offset); - } - --/* Retrieve value from aggregate jump function AGG or static initializer of -- SCALAR (which can be NULL) for the given OFFSET or return NULL if there is -- none. BY_REF specifies whether the value has to be passed by reference or -- by value. If FROM_GLOBAL_CONSTANT is non-NULL, then the boolean it points -- to is set to true if the value comes from an initializer of a constant. */ -+/* Retrieve value from AGG, a set of known offset/value for an aggregate or -+ static initializer of SCALAR (which can be NULL) for the given OFFSET or -+ return NULL if there is none. BY_REF specifies whether the value has to be -+ passed by reference or by value. If FROM_GLOBAL_CONSTANT is non-NULL, then -+ the boolean it points to is set to true if the value comes from an -+ initializer of a constant. */ - - tree --ipa_find_agg_cst_for_param (struct ipa_agg_jump_function *agg, tree scalar, -+ipa_find_agg_cst_for_param (struct ipa_agg_value_set *agg, tree scalar, - HOST_WIDE_INT offset, bool by_ref, - bool *from_global_constant) - { -- struct ipa_agg_jf_item *item; -+ struct ipa_agg_value *item; - int i; - - if (scalar) -@@ -3088,7 +3470,7 @@ ipa_find_agg_cst_for_param (struct ipa_a - || by_ref != agg->by_ref) - return NULL; - -- FOR_EACH_VEC_SAFE_ELT (agg->items, i, item) -+ FOR_EACH_VEC_ELT (agg->items, i, item) - if (item->offset == offset) - { - /* Currently we do not have clobber values, return NULL for them once -@@ -3184,12 +3566,14 @@ try_decrement_rdesc_refcount (struct ipa - pointer formal parameter described by jump function JFUNC. TARGET_TYPE is - the type of the parameter to which the result of JFUNC is passed. If it can - be determined, return the newly direct edge, otherwise return NULL. -- NEW_ROOT_INFO is the node info that JFUNC lattices are relative to. */ -+ NEW_ROOT and NEW_ROOT_INFO is the node and its info that JFUNC lattices are -+ relative to. */ - - static struct cgraph_edge * - try_make_edge_direct_simple_call (struct cgraph_edge *ie, - struct ipa_jump_func *jfunc, tree target_type, -- struct ipa_node_params *new_root_info) -+ struct cgraph_node *new_root, -+ class ipa_node_params *new_root_info) - { - struct cgraph_edge *cs; - tree target; -@@ -3198,10 +3582,14 @@ try_make_edge_direct_simple_call (struct - if (agg_contents) - { - bool from_global_constant; -- target = ipa_find_agg_cst_for_param (&jfunc->agg, scalar, -+ ipa_agg_value_set agg = ipa_agg_value_set_from_jfunc (new_root_info, -+ new_root, -+ &jfunc->agg); -+ target = ipa_find_agg_cst_for_param (&agg, scalar, - ie->indirect_info->offset, - ie->indirect_info->by_ref, - &from_global_constant); -+ agg.release (); - if (target - && !from_global_constant - && !ie->indirect_info->guaranteed_unmodified) -@@ -3255,12 +3643,16 @@ ipa_impossible_devirt_target (struct cgr - call based on a formal parameter which is described by jump function JFUNC - and if it can be determined, make it direct and return the direct edge. - Otherwise, return NULL. CTX describes the polymorphic context that the -- parameter the call is based on brings along with it. */ -+ parameter the call is based on brings along with it. NEW_ROOT and -+ NEW_ROOT_INFO is the node and its info that JFUNC lattices are relative -+ to. */ - - static struct cgraph_edge * - try_make_edge_direct_virtual_call (struct cgraph_edge *ie, - struct ipa_jump_func *jfunc, -- struct ipa_polymorphic_call_context ctx) -+ class ipa_polymorphic_call_context ctx, -+ struct cgraph_node *new_root, -+ class ipa_node_params *new_root_info) - { - tree target = NULL; - bool speculative = false; -@@ -3278,9 +3670,13 @@ try_make_edge_direct_virtual_call (struc - unsigned HOST_WIDE_INT offset; - tree scalar = (jfunc->type == IPA_JF_CONST) ? ipa_get_jf_constant (jfunc) - : NULL; -- tree t = ipa_find_agg_cst_for_param (&jfunc->agg, scalar, -+ ipa_agg_value_set agg = ipa_agg_value_set_from_jfunc (new_root_info, -+ new_root, -+ &jfunc->agg); -+ tree t = ipa_find_agg_cst_for_param (&agg, scalar, - ie->indirect_info->offset, - true); -+ agg.release (); - if (t && vtable_pointer_value_to_vtable (t, &vtable, &offset)) - { - bool can_refer; -@@ -3370,21 +3766,22 @@ update_indirect_edges_after_inlining (st - struct cgraph_node *node, - vec *new_edges) - { -- struct ipa_edge_args *top; -+ class ipa_edge_args *top; - struct cgraph_edge *ie, *next_ie, *new_direct_edge; -- struct ipa_node_params *new_root_info, *inlined_node_info; -+ struct cgraph_node *new_root; -+ class ipa_node_params *new_root_info, *inlined_node_info; - bool res = false; - - ipa_check_create_edge_args (); - top = IPA_EDGE_REF (cs); -- new_root_info = IPA_NODE_REF (cs->caller->global.inlined_to -- ? cs->caller->global.inlined_to -- : cs->caller); -+ new_root = cs->caller->inlined_to -+ ? cs->caller->inlined_to : cs->caller; -+ new_root_info = IPA_NODE_REF (new_root); - inlined_node_info = IPA_NODE_REF (cs->callee->function_symbol ()); - - for (ie = node->indirect_calls; ie; ie = next_ie) - { -- struct cgraph_indirect_call_info *ici = ie->indirect_info; -+ class cgraph_indirect_call_info *ici = ie->indirect_info; - struct ipa_jump_func *jfunc; - int param_index; - cgraph_node *spec_target = NULL; -@@ -3395,7 +3792,7 @@ update_indirect_edges_after_inlining (st - continue; - - /* We must check range due to calls with variable number of arguments: */ -- if (ici->param_index >= ipa_get_cs_argument_count (top)) -+ if (!top || ici->param_index >= ipa_get_cs_argument_count (top)) - { - ici->param_index = -1; - continue; -@@ -3418,13 +3815,16 @@ update_indirect_edges_after_inlining (st - { - ipa_polymorphic_call_context ctx; - ctx = ipa_context_from_jfunc (new_root_info, cs, param_index, jfunc); -- new_direct_edge = try_make_edge_direct_virtual_call (ie, jfunc, ctx); -+ new_direct_edge = try_make_edge_direct_virtual_call (ie, jfunc, ctx, -+ new_root, -+ new_root_info); - } - else - { - tree target_type = ipa_get_type (inlined_node_info, param_index); - new_direct_edge = try_make_edge_direct_simple_call (ie, jfunc, - target_type, -+ new_root, - new_root_info); - } - -@@ -3470,6 +3870,11 @@ update_indirect_edges_after_inlining (st - if (ici->polymorphic - && !ipa_get_jf_pass_through_type_preserved (jfunc)) - ici->vptr_changed = true; -+ ipa_set_param_used_by_indirect_call (new_root_info, -+ ici->param_index, true); -+ if (ici->polymorphic) -+ ipa_set_param_used_by_polymorphic_call (new_root_info, -+ ici->param_index, true); - } - } - else if (jfunc->type == IPA_JF_ANCESTOR) -@@ -3485,6 +3890,11 @@ update_indirect_edges_after_inlining (st - if (ici->polymorphic - && !ipa_get_jf_ancestor_type_preserved (jfunc)) - ici->vptr_changed = true; -+ ipa_set_param_used_by_indirect_call (new_root_info, -+ ici->param_index, true); -+ if (ici->polymorphic) -+ ipa_set_param_used_by_polymorphic_call (new_root_info, -+ ici->param_index, true); - } - } - else -@@ -3541,13 +3951,18 @@ combine_controlled_uses_counters (int c, - static void - propagate_controlled_uses (struct cgraph_edge *cs) - { -- struct ipa_edge_args *args = IPA_EDGE_REF (cs); -- struct cgraph_node *new_root = cs->caller->global.inlined_to -- ? cs->caller->global.inlined_to : cs->caller; -- struct ipa_node_params *new_root_info = IPA_NODE_REF (new_root); -- struct ipa_node_params *old_root_info = IPA_NODE_REF (cs->callee); -+ class ipa_edge_args *args = IPA_EDGE_REF (cs); -+ if (!args) -+ return; -+ struct cgraph_node *new_root = cs->caller->inlined_to -+ ? cs->caller->inlined_to : cs->caller; -+ class ipa_node_params *new_root_info = IPA_NODE_REF (new_root); -+ class ipa_node_params *old_root_info = IPA_NODE_REF (cs->callee); - int count, i; - -+ if (!old_root_info) -+ return; -+ - count = MIN (ipa_get_cs_argument_count (args), - ipa_get_param_count (old_root_info)); - for (i = 0; i < count; i++) -@@ -3608,9 +4023,9 @@ propagate_controlled_uses (struct cgraph - gcc_checking_assert (ok); - - clone = cs->caller; -- while (clone->global.inlined_to -- && clone != rdesc->cs->caller -- && IPA_NODE_REF (clone)->ipcp_orig_node) -+ while (clone->inlined_to -+ && clone->ipcp_clone -+ && clone != rdesc->cs->caller) - { - struct ipa_ref *ref; - ref = clone->find_reference (n, NULL, 0); -@@ -3669,6 +4084,7 @@ ipa_propagate_indirect_call_infos (struc - - propagate_controlled_uses (cs); - changed = propagate_info_to_inlined_callees (cs, cs->callee, new_edges); -+ ipa_node_params_sum->remove (cs->callee); - - return changed; - } -@@ -3830,16 +4246,16 @@ ipa_edge_args_sum_t::duplicate (cgraph_e - We need to find the duplicate that refers to our tree of - inline clones. */ - -- gcc_assert (dst->caller->global.inlined_to); -+ gcc_assert (dst->caller->inlined_to); - for (dst_rdesc = src_rdesc->next_duplicate; - dst_rdesc; - dst_rdesc = dst_rdesc->next_duplicate) - { - struct cgraph_node *top; -- top = dst_rdesc->cs->caller->global.inlined_to -- ? dst_rdesc->cs->caller->global.inlined_to -+ top = dst_rdesc->cs->caller->inlined_to -+ ? dst_rdesc->cs->caller->inlined_to - : dst_rdesc->cs->caller; -- if (dst->caller->global.inlined_to == top) -+ if (dst->caller->inlined_to == top) - break; - } - gcc_assert (dst_rdesc); -@@ -3849,9 +4265,9 @@ ipa_edge_args_sum_t::duplicate (cgraph_e - else if (dst_jf->type == IPA_JF_PASS_THROUGH - && src->caller == dst->caller) - { -- struct cgraph_node *inline_root = dst->caller->global.inlined_to -- ? dst->caller->global.inlined_to : dst->caller; -- struct ipa_node_params *root_info = IPA_NODE_REF (inline_root); -+ struct cgraph_node *inline_root = dst->caller->inlined_to -+ ? dst->caller->inlined_to : dst->caller; -+ class ipa_node_params *root_info = IPA_NODE_REF (inline_root); - int idx = ipa_get_jf_pass_through_formal_id (dst_jf); - - int c = ipa_get_controlled_uses (root_info, idx); -@@ -3995,7 +4411,7 @@ void - ipa_print_node_params (FILE *f, struct cgraph_node *node) - { - int i, count; -- struct ipa_node_params *info; -+ class ipa_node_params *info; - - if (!node->definition) - return; -@@ -4010,6 +4426,12 @@ ipa_print_node_params (FILE *f, struct c - ipa_dump_param (f, info, i); - if (ipa_is_param_used (info, i)) - fprintf (f, " used"); -+ if (ipa_is_param_used_by_ipa_predicates (info, i)) -+ fprintf (f, " used_by_ipa_predicates"); -+ if (ipa_is_param_used_by_indirect_call (info, i)) -+ fprintf (f, " used_by_indirect_call"); -+ if (ipa_is_param_used_by_polymorphic_call (info, i)) -+ fprintf (f, " used_by_polymorphic_call"); - c = ipa_get_controlled_uses (info, i); - if (c == IPA_UNDESCRIBED_USE) - fprintf (f, " undescribed_use"); -@@ -4104,6 +4526,8 @@ ipa_write_jump_function (struct output_b - bp_pack_value (&bp, jump_func->value.ancestor.agg_preserved, 1); - streamer_write_bitpack (&bp); - break; -+ default: -+ fatal_error (UNKNOWN_LOCATION, "invalid jump function in LTO stream"); - } - - count = vec_safe_length (jump_func->agg.items); -@@ -4117,8 +4541,36 @@ ipa_write_jump_function (struct output_b - - FOR_EACH_VEC_SAFE_ELT (jump_func->agg.items, i, item) - { -+ stream_write_tree (ob, item->type, true); - streamer_write_uhwi (ob, item->offset); -- stream_write_tree (ob, item->value, true); -+ streamer_write_uhwi (ob, item->jftype); -+ switch (item->jftype) -+ { -+ case IPA_JF_UNKNOWN: -+ break; -+ case IPA_JF_CONST: -+ stream_write_tree (ob, item->value.constant, true); -+ break; -+ case IPA_JF_PASS_THROUGH: -+ case IPA_JF_LOAD_AGG: -+ streamer_write_uhwi (ob, item->value.pass_through.operation); -+ streamer_write_uhwi (ob, item->value.pass_through.formal_id); -+ if (TREE_CODE_CLASS (item->value.pass_through.operation) -+ != tcc_unary) -+ stream_write_tree (ob, item->value.pass_through.operand, true); -+ if (item->jftype == IPA_JF_LOAD_AGG) -+ { -+ stream_write_tree (ob, item->value.load_agg.type, true); -+ streamer_write_uhwi (ob, item->value.load_agg.offset); -+ bp = bitpack_create (ob->main_stream); -+ bp_pack_value (&bp, item->value.load_agg.by_ref, 1); -+ streamer_write_bitpack (&bp); -+ } -+ break; -+ default: -+ fatal_error (UNKNOWN_LOCATION, -+ "invalid jump function in LTO stream"); -+ } - } - - bp = bitpack_create (ob->main_stream); -@@ -4143,10 +4595,10 @@ ipa_write_jump_function (struct output_b - /* Read in jump function JUMP_FUNC from IB. */ - - static void --ipa_read_jump_function (struct lto_input_block *ib, -+ipa_read_jump_function (class lto_input_block *ib, - struct ipa_jump_func *jump_func, - struct cgraph_edge *cs, -- struct data_in *data_in, -+ class data_in *data_in, - bool prevails) - { - enum jump_func_type jftype; -@@ -4215,8 +4667,39 @@ ipa_read_jump_function (struct lto_input - for (i = 0; i < count; i++) - { - struct ipa_agg_jf_item item; -+ item.type = stream_read_tree (ib, data_in); - item.offset = streamer_read_uhwi (ib); -- item.value = stream_read_tree (ib, data_in); -+ item.jftype = (enum jump_func_type) streamer_read_uhwi (ib); -+ -+ switch (item.jftype) -+ { -+ case IPA_JF_UNKNOWN: -+ break; -+ case IPA_JF_CONST: -+ item.value.constant = stream_read_tree (ib, data_in); -+ break; -+ case IPA_JF_PASS_THROUGH: -+ case IPA_JF_LOAD_AGG: -+ operation = (enum tree_code) streamer_read_uhwi (ib); -+ item.value.pass_through.operation = operation; -+ item.value.pass_through.formal_id = streamer_read_uhwi (ib); -+ if (TREE_CODE_CLASS (operation) == tcc_unary) -+ item.value.pass_through.operand = NULL_TREE; -+ else -+ item.value.pass_through.operand = stream_read_tree (ib, data_in); -+ if (item.jftype == IPA_JF_LOAD_AGG) -+ { -+ struct bitpack_d bp; -+ item.value.load_agg.type = stream_read_tree (ib, data_in); -+ item.value.load_agg.offset = streamer_read_uhwi (ib); -+ bp = streamer_read_bitpack (ib); -+ item.value.load_agg.by_ref = bp_unpack_value (&bp, 1); -+ } -+ break; -+ default: -+ fatal_error (UNKNOWN_LOCATION, -+ "invalid jump function in LTO stream"); -+ } - if (prevails) - jump_func->agg.items->quick_push (item); - } -@@ -4255,7 +4738,7 @@ static void - ipa_write_indirect_edge_info (struct output_block *ob, - struct cgraph_edge *cs) - { -- struct cgraph_indirect_call_info *ii = cs->indirect_info; -+ class cgraph_indirect_call_info *ii = cs->indirect_info; - struct bitpack_d bp; - - streamer_write_hwi (ob, ii->param_index); -@@ -4284,11 +4767,12 @@ ipa_write_indirect_edge_info (struct out - relevant to indirect inlining from IB. */ - - static void --ipa_read_indirect_edge_info (struct lto_input_block *ib, -- struct data_in *data_in, -- struct cgraph_edge *cs) -+ipa_read_indirect_edge_info (class lto_input_block *ib, -+ class data_in *data_in, -+ struct cgraph_edge *cs, -+ class ipa_node_params *info) - { -- struct cgraph_indirect_call_info *ii = cs->indirect_info; -+ class cgraph_indirect_call_info *ii = cs->indirect_info; - struct bitpack_d bp; - - ii->param_index = (int) streamer_read_hwi (ib); -@@ -4309,6 +4793,14 @@ ipa_read_indirect_edge_info (struct lto_ - ii->otr_type = stream_read_tree (ib, data_in); - ii->context.stream_in (ib, data_in); - } -+ if (info && ii->param_index >= 0) -+ { -+ if (ii->polymorphic) -+ ipa_set_param_used_by_polymorphic_call (info, -+ ii->param_index , true); -+ ipa_set_param_used_by_indirect_call (info, -+ ii->param_index, true); -+ } - } - - /* Stream out NODE info to OB. */ -@@ -4318,7 +4810,7 @@ ipa_write_node_info (struct output_block - { - int node_ref; - lto_symtab_encoder_t encoder; -- struct ipa_node_params *info = IPA_NODE_REF (node); -+ class ipa_node_params *info = IPA_NODE_REF (node); - int j; - struct cgraph_edge *e; - struct bitpack_d bp; -@@ -4345,7 +4837,13 @@ ipa_write_node_info (struct output_block - } - for (e = node->callees; e; e = e->next_callee) - { -- struct ipa_edge_args *args = IPA_EDGE_REF (e); -+ class ipa_edge_args *args = IPA_EDGE_REF (e); -+ -+ if (!args) -+ { -+ streamer_write_uhwi (ob, 0); -+ continue; -+ } - - streamer_write_uhwi (ob, - ipa_get_cs_argument_count (args) * 2 -@@ -4359,16 +4857,20 @@ ipa_write_node_info (struct output_block - } - for (e = node->indirect_calls; e; e = e->next_callee) - { -- struct ipa_edge_args *args = IPA_EDGE_REF (e); -- -- streamer_write_uhwi (ob, -- ipa_get_cs_argument_count (args) * 2 -- + (args->polymorphic_call_contexts != NULL)); -- for (j = 0; j < ipa_get_cs_argument_count (args); j++) -+ class ipa_edge_args *args = IPA_EDGE_REF (e); -+ if (!args) -+ streamer_write_uhwi (ob, 0); -+ else - { -- ipa_write_jump_function (ob, ipa_get_ith_jump_func (args, j)); -- if (args->polymorphic_call_contexts != NULL) -- ipa_get_ith_polymorhic_call_context (args, j)->stream_out (ob); -+ streamer_write_uhwi (ob, -+ ipa_get_cs_argument_count (args) * 2 -+ + (args->polymorphic_call_contexts != NULL)); -+ for (j = 0; j < ipa_get_cs_argument_count (args); j++) -+ { -+ ipa_write_jump_function (ob, ipa_get_ith_jump_func (args, j)); -+ if (args->polymorphic_call_contexts != NULL) -+ ipa_get_ith_polymorhic_call_context (args, j)->stream_out (ob); -+ } - } - ipa_write_indirect_edge_info (ob, e); - } -@@ -4377,8 +4879,8 @@ ipa_write_node_info (struct output_block - /* Stream in edge E from IB. */ - - static void --ipa_read_edge_info (struct lto_input_block *ib, -- struct data_in *data_in, -+ipa_read_edge_info (class lto_input_block *ib, -+ class data_in *data_in, - struct cgraph_edge *e, bool prevails) - { - int count = streamer_read_uhwi (ib); -@@ -4389,7 +4891,7 @@ ipa_read_edge_info (struct lto_input_blo - return; - if (prevails && e->possibly_call_in_translation_unit_p ()) - { -- struct ipa_edge_args *args = IPA_EDGE_REF (e); -+ class ipa_edge_args *args = IPA_EDGE_REF_GET_CREATE (e); - vec_safe_grow_cleared (args->jump_functions, count); - if (contexts_computed) - vec_safe_grow_cleared (args->polymorphic_call_contexts, count); -@@ -4411,7 +4913,7 @@ ipa_read_edge_info (struct lto_input_blo - data_in, prevails); - if (contexts_computed) - { -- struct ipa_polymorphic_call_context ctx; -+ class ipa_polymorphic_call_context ctx; - ctx.stream_in (ib, data_in); - } - } -@@ -4421,14 +4923,15 @@ ipa_read_edge_info (struct lto_input_blo - /* Stream in NODE info from IB. */ - - static void --ipa_read_node_info (struct lto_input_block *ib, struct cgraph_node *node, -- struct data_in *data_in) -+ipa_read_node_info (class lto_input_block *ib, struct cgraph_node *node, -+ class data_in *data_in) - { - int k; - struct cgraph_edge *e; - struct bitpack_d bp; - bool prevails = node->prevailing_p (); -- struct ipa_node_params *info = prevails ? IPA_NODE_REF (node) : NULL; -+ class ipa_node_params *info = prevails -+ ? IPA_NODE_REF_GET_CREATE (node) : NULL; - - int param_count = streamer_read_uhwi (ib); - if (prevails) -@@ -4468,7 +4971,7 @@ ipa_read_node_info (struct lto_input_blo - for (e = node->indirect_calls; e; e = e->next_callee) - { - ipa_read_edge_info (ib, data_in, e, prevails); -- ipa_read_indirect_edge_info (ib, data_in, e); -+ ipa_read_indirect_edge_info (ib, data_in, e, info); - } - } - -@@ -4525,7 +5028,7 @@ ipa_prop_read_section (struct lto_file_d - const int cfg_offset = sizeof (struct lto_function_header); - const int main_offset = cfg_offset + header->cfg_size; - const int string_offset = main_offset + header->main_size; -- struct data_in *data_in; -+ class data_in *data_in; - unsigned int i; - unsigned int count; - -@@ -4774,7 +5277,7 @@ read_replacements_section (struct lto_fi - const int cfg_offset = sizeof (struct lto_function_header); - const int main_offset = cfg_offset + header->cfg_size; - const int string_offset = main_offset + header->main_size; -- struct data_in *data_in; -+ class data_in *data_in; - unsigned int i; - unsigned int count; - -@@ -4888,7 +5391,8 @@ ipcp_modif_dom_walker::before_dom_childr - struct ipa_agg_replacement_value *v; - gimple *stmt = gsi_stmt (gsi); - tree rhs, val, t; -- HOST_WIDE_INT offset, size; -+ HOST_WIDE_INT offset; -+ poly_int64 size; - int index; - bool by_ref, vce; - -@@ -4923,7 +5427,8 @@ ipcp_modif_dom_walker::before_dom_childr - break; - if (!v - || v->by_ref != by_ref -- || tree_to_shwi (TYPE_SIZE (TREE_TYPE (v->value))) != size) -+ || maybe_ne (tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (v->value))), -+ size)) - continue; - - gcc_checking_assert (is_gimple_ip_invariant (v->value)); -@@ -5194,4 +5699,12 @@ ipcp_transform_function (struct cgraph_n - return TODO_update_ssa_only_virtuals; - } - -+ -+/* Return true if OTHER describes same agg value. */ -+bool -+ipa_agg_value::equal_to (const ipa_agg_value &other) -+{ -+ return offset == other.offset -+ && operand_equal_p (value, other.value, 0); -+} - #include "gt-ipa-prop.h" -diff -Nurp a/gcc/ipa-prop.h b/gcc/ipa-prop.h ---- a/gcc/ipa-prop.h 2020-04-30 15:14:04.624000000 +0800 -+++ b/gcc/ipa-prop.h 2020-04-30 15:14:56.696000000 +0800 -@@ -39,6 +39,15 @@ along with GCC; see the file COPYING3. - argument. - Unknown - neither of the above. - -+ IPA_JF_LOAD_AGG is a compound pass-through jump function, in which primary -+ operation on formal parameter is memory dereference that loads a value from -+ a part of an aggregate, which is represented or pointed to by the formal -+ parameter. Moreover, an additional unary/binary operation can be applied on -+ the loaded value, and final result is passed as actual argument of callee -+ (e.g. *(param_1(D) + 4) op 24 ). It is meant to describe usage of aggregate -+ parameter or by-reference parameter referenced in argument passing, commonly -+ found in C++ and Fortran. -+ - IPA_JF_ANCESTOR is a special pass-through jump function, which means that - the result is an address of a part of the object pointed to by the formal - parameter to which the function refers. It is mainly intended to represent -@@ -60,6 +69,7 @@ enum jump_func_type - IPA_JF_UNKNOWN = 0, /* newly allocated and zeroed jump functions default */ - IPA_JF_CONST, /* represented by field costant */ - IPA_JF_PASS_THROUGH, /* represented by field pass_through */ -+ IPA_JF_LOAD_AGG, /* represented by field load_agg */ - IPA_JF_ANCESTOR /* represented by field ancestor */ - }; - -@@ -97,6 +107,26 @@ struct GTY(()) ipa_pass_through_data - unsigned agg_preserved : 1; - }; - -+/* Structure holding data required to describe a load-value-from-aggregate -+ jump function. */ -+ -+struct GTY(()) ipa_load_agg_data -+{ -+ /* Inherit from pass through jump function, describing unary/binary -+ operation on the value loaded from aggregate that is represented or -+ pointed to by the formal parameter, specified by formal_id in this -+ pass_through jump function data structure. */ -+ struct ipa_pass_through_data pass_through; -+ /* Type of the value loaded from the aggregate. */ -+ tree type; -+ /* Offset at which the value is located within the aggregate. */ -+ HOST_WIDE_INT offset; -+ /* True if loaded by reference (the aggregate is pointed to by the formal -+ parameter) or false if loaded by value (the aggregate is represented -+ by the formal parameter). */ -+ bool by_ref; -+}; -+ - /* Structure holding data required to describe an ancestor pass-through - jump function. */ - -@@ -110,38 +140,139 @@ struct GTY(()) ipa_ancestor_jf_data - unsigned agg_preserved : 1; - }; - --/* An element in an aggegate part of a jump function describing a known value -- at a given offset. When it is part of a pass-through jump function with -- agg_preserved set or an ancestor jump function with agg_preserved set, all -- unlisted positions are assumed to be preserved but the value can be a type -- node, which means that the particular piece (starting at offset and having -- the size of the type) is clobbered with an unknown value. When -- agg_preserved is false or the type of the containing jump function is -- different, all unlisted parts are assumed to be unknown and all values must -- fulfill is_gimple_ip_invariant. */ -+/* A jump function for an aggregate part at a given offset, which describes how -+ it content value is generated. All unlisted positions are assumed to have a -+ value defined in an unknown way. */ - - struct GTY(()) ipa_agg_jf_item - { -- /* The offset at which the known value is located within the aggregate. */ -+ /* The offset for the aggregate part. */ - HOST_WIDE_INT offset; - -- /* The known constant or type if this is a clobber. */ -- tree value; --}; -+ /* Data type of the aggregate part. */ -+ tree type; - -+ /* Jump function type. */ -+ enum jump_func_type jftype; - --/* Aggregate jump function - i.e. description of contents of aggregates passed -- either by reference or value. */ -+ /* Represents a value of jump function. constant represents the actual constant -+ in constant jump function content. pass_through is used only in simple pass -+ through jump function context. load_agg is for load-value-from-aggregate -+ jump function context. */ -+ union jump_func_agg_value -+ { -+ tree GTY ((tag ("IPA_JF_CONST"))) constant; -+ struct ipa_pass_through_data GTY ((tag ("IPA_JF_PASS_THROUGH"))) pass_through; -+ struct ipa_load_agg_data GTY ((tag ("IPA_JF_LOAD_AGG"))) load_agg; -+ } GTY ((desc ("%1.jftype"))) value; -+}; -+ -+/* Jump functions describing a set of aggregate contents. */ - - struct GTY(()) ipa_agg_jump_function - { -- /* Description of the individual items. */ -+ /* Description of the individual jump function item. */ - vec *items; -- /* True if the data was passed by reference (as opposed to by value). */ -+ /* True if the data was passed by reference (as opposed to by value). */ -+ bool by_ref; -+}; -+ -+/* An element in an aggregate part describing a known value at a given offset. -+ All unlisted positions are assumed to be unknown and all listed values must -+ fulfill is_gimple_ip_invariant. */ -+ -+struct ipa_agg_value -+{ -+ /* The offset at which the known value is located within the aggregate. */ -+ HOST_WIDE_INT offset; -+ -+ /* The known constant. */ -+ tree value; -+ -+ /* Return true if OTHER describes same agg value. */ -+ bool equal_to (const ipa_agg_value &other); -+}; -+ -+/* Structure describing a set of known offset/value for aggregate. */ -+ -+struct ipa_agg_value_set -+{ -+ /* Description of the individual item. */ -+ vec items; -+ /* True if the data was passed by reference (as opposed to by value). */ - bool by_ref; -+ -+ /* Return true if OTHER describes same agg values. */ -+ bool equal_to (const ipa_agg_value_set &other) -+ { -+ if (by_ref != other.by_ref) -+ return false; -+ if (items.length () != other.items.length ()) -+ return false; -+ for (unsigned int i = 0; i < items.length (); i++) -+ if (!items[i].equal_to (other.items[i])) -+ return false; -+ return true; -+ } -+ -+ /* Return true if there is any value for aggregate. */ -+ bool is_empty () const -+ { -+ return items.is_empty (); -+ } -+ -+ ipa_agg_value_set copy () const -+ { -+ ipa_agg_value_set new_copy; -+ -+ new_copy.items = items.copy (); -+ new_copy.by_ref = by_ref; -+ -+ return new_copy; -+ } -+ -+ void release () -+ { -+ items.release (); -+ } - }; - --typedef struct ipa_agg_jump_function *ipa_agg_jump_function_p; -+/* Return copy of a vec. */ -+ -+static inline vec -+ipa_copy_agg_values (const vec &aggs) -+{ -+ vec aggs_copy = vNULL; -+ -+ if (!aggs.is_empty ()) -+ { -+ ipa_agg_value_set *agg; -+ int i; -+ -+ aggs_copy.reserve_exact (aggs.length ()); -+ -+ FOR_EACH_VEC_ELT (aggs, i, agg) -+ aggs_copy.quick_push (agg->copy ()); -+ } -+ -+ return aggs_copy; -+} -+ -+/* For vec, DO NOT call release(), use below function -+ instead. Because ipa_agg_value_set contains a field of vector type, we -+ should release this child vector in each element before reclaiming the -+ whole vector. */ -+ -+static inline void -+ipa_release_agg_values (vec &aggs) -+{ -+ ipa_agg_value_set *agg; -+ int i; -+ -+ FOR_EACH_VEC_ELT (aggs, i, agg) -+ agg->release (); -+ aggs.release (); -+} - - /* Information about zero/non-zero bits. */ - struct GTY(()) ipa_bits -@@ -170,19 +301,19 @@ struct GTY(()) ipa_vr - types of jump functions supported. */ - struct GTY (()) ipa_jump_func - { -- /* Aggregate contants description. See struct ipa_agg_jump_function and its -- description. */ -+ /* Aggregate jump function description. See struct ipa_agg_jump_function -+ and its description. */ - struct ipa_agg_jump_function agg; - - /* Information about zero/non-zero bits. The pointed to structure is shared - betweed different jump functions. Use ipa_set_jfunc_bits to set this - field. */ -- struct ipa_bits *bits; -+ class ipa_bits *bits; - - /* Information about value range, containing valid data only when vr_known is - true. The pointed to structure is shared betweed different jump - functions. Use ipa_set_jfunc_vr to set this field. */ -- struct value_range_base *m_vr; -+ class value_range_base *m_vr; - - enum jump_func_type type; - /* Represents a value of a jump function. pass_through is used only in jump -@@ -310,9 +441,12 @@ struct GTY(()) ipa_param_descriptor - says how many there are. If any use could not be described by means of - ipa-prop structures, this is IPA_UNDESCRIBED_USE. */ - int controlled_uses; -- unsigned int move_cost : 31; -+ unsigned int move_cost : 28; - /* The parameter is used. */ - unsigned used : 1; -+ unsigned used_by_ipa_predicates : 1; -+ unsigned used_by_indirect_call : 1; -+ unsigned used_by_polymorphic_call : 1; - }; - - /* ipa_node_params stores information related to formal parameters of functions -@@ -332,7 +466,7 @@ struct GTY((for_user)) ipa_node_params - vec *descriptors; - /* Pointer to an array of structures describing individual formal - parameters. */ -- struct ipcp_param_lattices * GTY((skip)) lattices; -+ class ipcp_param_lattices * GTY((skip)) lattices; - /* Only for versioned nodes this field would not be NULL, - it points to the node that IPA cp cloned from. */ - struct cgraph_node * GTY((skip)) ipcp_orig_node; -@@ -357,6 +491,8 @@ struct GTY((for_user)) ipa_node_params - unsigned node_dead : 1; - /* Node is involved in a recursion, potentionally indirect. */ - unsigned node_within_scc : 1; -+ /* Node contains only direct recursion. */ -+ unsigned node_is_self_scc : 1; - /* Node is calling a private function called only once. */ - unsigned node_calling_single_call : 1; - /* False when there is something makes versioning impossible. */ -@@ -420,7 +556,7 @@ struct ipa_func_body_info - cgraph_node *node; - - /* Its info. */ -- struct ipa_node_params *info; -+ class ipa_node_params *info; - - /* Information about individual BBs. */ - vec bb_infos; -@@ -439,7 +575,7 @@ struct ipa_func_body_info - /* Return the number of formal parameters. */ - - static inline int --ipa_get_param_count (struct ipa_node_params *info) -+ipa_get_param_count (class ipa_node_params *info) - { - return vec_safe_length (info->descriptors); - } -@@ -450,10 +586,9 @@ ipa_get_param_count (struct ipa_node_par - WPA. */ - - static inline tree --ipa_get_param (struct ipa_node_params *info, int i) -+ipa_get_param (class ipa_node_params *info, int i) - { - gcc_checking_assert (info->descriptors); -- gcc_checking_assert (!flag_wpa); - tree t = (*info->descriptors)[i].decl_or_type; - gcc_checking_assert (TREE_CODE (t) == PARM_DECL); - return t; -@@ -463,7 +598,7 @@ ipa_get_param (struct ipa_node_params *i - to INFO if it is known or NULL if not. */ - - static inline tree --ipa_get_type (struct ipa_node_params *info, int i) -+ipa_get_type (class ipa_node_params *info, int i) - { - if (vec_safe_length (info->descriptors) <= (unsigned) i) - return NULL; -@@ -480,7 +615,7 @@ ipa_get_type (struct ipa_node_params *in - to INFO. */ - - static inline int --ipa_get_param_move_cost (struct ipa_node_params *info, int i) -+ipa_get_param_move_cost (class ipa_node_params *info, int i) - { - gcc_checking_assert (info->descriptors); - return (*info->descriptors)[i].move_cost; -@@ -490,17 +625,47 @@ ipa_get_param_move_cost (struct ipa_node - associated with INFO to VAL. */ - - static inline void --ipa_set_param_used (struct ipa_node_params *info, int i, bool val) -+ipa_set_param_used (class ipa_node_params *info, int i, bool val) - { - gcc_checking_assert (info->descriptors); - (*info->descriptors)[i].used = val; - } - -+/* Set the used_by_ipa_predicates flag corresponding to the Ith formal -+ parameter of the function associated with INFO to VAL. */ -+ -+static inline void -+ipa_set_param_used_by_ipa_predicates (class ipa_node_params *info, int i, bool val) -+{ -+ gcc_checking_assert (info->descriptors); -+ (*info->descriptors)[i].used_by_ipa_predicates = val; -+} -+ -+/* Set the used_by_indirect_call flag corresponding to the Ith formal -+ parameter of the function associated with INFO to VAL. */ -+ -+static inline void -+ipa_set_param_used_by_indirect_call (class ipa_node_params *info, int i, bool val) -+{ -+ gcc_checking_assert (info->descriptors); -+ (*info->descriptors)[i].used_by_indirect_call = val; -+} -+ -+/* Set the .used_by_polymorphic_call flag corresponding to the Ith formal -+ parameter of the function associated with INFO to VAL. */ -+ -+static inline void -+ipa_set_param_used_by_polymorphic_call (class ipa_node_params *info, int i, bool val) -+{ -+ gcc_checking_assert (info->descriptors); -+ (*info->descriptors)[i].used_by_polymorphic_call = val; -+} -+ - /* Return how many uses described by ipa-prop a parameter has or - IPA_UNDESCRIBED_USE if there is a use that is not described by these - structures. */ - static inline int --ipa_get_controlled_uses (struct ipa_node_params *info, int i) -+ipa_get_controlled_uses (class ipa_node_params *info, int i) - { - /* FIXME: introducing speculation causes out of bounds access here. */ - if (vec_safe_length (info->descriptors) > (unsigned)i) -@@ -511,7 +676,7 @@ ipa_get_controlled_uses (struct ipa_node - /* Set the controlled counter of a given parameter. */ - - static inline void --ipa_set_controlled_uses (struct ipa_node_params *info, int i, int val) -+ipa_set_controlled_uses (class ipa_node_params *info, int i, int val) - { - gcc_checking_assert (info->descriptors); - (*info->descriptors)[i].controlled_uses = val; -@@ -521,12 +686,42 @@ ipa_set_controlled_uses (struct ipa_node - function associated with INFO. */ - - static inline bool --ipa_is_param_used (struct ipa_node_params *info, int i) -+ipa_is_param_used (class ipa_node_params *info, int i) - { - gcc_checking_assert (info->descriptors); - return (*info->descriptors)[i].used; - } - -+/* Return the used_by_ipa_predicates flag corresponding to the Ith formal -+ parameter of the function associated with INFO. */ -+ -+static inline bool -+ipa_is_param_used_by_ipa_predicates (class ipa_node_params *info, int i) -+{ -+ gcc_checking_assert (info->descriptors); -+ return (*info->descriptors)[i].used_by_ipa_predicates; -+} -+ -+/* Return the used_by_indirect_call flag corresponding to the Ith formal -+ parameter of the function associated with INFO. */ -+ -+static inline bool -+ipa_is_param_used_by_indirect_call (class ipa_node_params *info, int i) -+{ -+ gcc_checking_assert (info->descriptors); -+ return (*info->descriptors)[i].used_by_indirect_call; -+} -+ -+/* Return the used_by_polymorphic_call flag corresponding to the Ith formal -+ parameter of the function associated with INFO. */ -+ -+static inline bool -+ipa_is_param_used_by_polymorphic_call (class ipa_node_params *info, int i) -+{ -+ gcc_checking_assert (info->descriptors); -+ return (*info->descriptors)[i].used_by_polymorphic_call; -+} -+ - /* Information about replacements done in aggregates for a given node (each - node has its linked list). */ - struct GTY(()) ipa_agg_replacement_value -@@ -590,7 +785,7 @@ class GTY((for_user)) ipa_edge_args - /* Return the number of actual arguments. */ - - static inline int --ipa_get_cs_argument_count (struct ipa_edge_args *args) -+ipa_get_cs_argument_count (class ipa_edge_args *args) - { - return vec_safe_length (args->jump_functions); - } -@@ -600,15 +795,15 @@ ipa_get_cs_argument_count (struct ipa_ed - ipa_compute_jump_functions. */ - - static inline struct ipa_jump_func * --ipa_get_ith_jump_func (struct ipa_edge_args *args, int i) -+ipa_get_ith_jump_func (class ipa_edge_args *args, int i) - { - return &(*args->jump_functions)[i]; - } - - /* Returns a pointer to the polymorphic call context for the ith argument. - NULL if contexts are not computed. */ --static inline struct ipa_polymorphic_call_context * --ipa_get_ith_polymorhic_call_context (struct ipa_edge_args *args, int i) -+static inline class ipa_polymorphic_call_context * -+ipa_get_ith_polymorhic_call_context (class ipa_edge_args *args, int i) - { - if (!args->polymorphic_call_contexts) - return NULL; -@@ -637,7 +832,12 @@ class GTY((user)) ipa_edge_args_sum_t : - ipa_edge_args_sum_t (symbol_table *table, bool ggc) - : call_summary (table, ggc) { } - -- /* Hook that is called by summary when an edge is duplicated. */ -+ void remove (cgraph_edge *edge) -+ { -+ call_summary ::remove (edge); -+ } -+ -+ /* Hook that is called by summary when an edge is removed. */ - virtual void remove (cgraph_edge *cs, ipa_edge_args *args); - /* Hook that is called by summary when an edge is duplicated. */ - virtual void duplicate (cgraph_edge *src, -@@ -675,8 +875,10 @@ extern GTY(()) function_summary get_create (NODE)) --#define IPA_EDGE_REF(EDGE) (ipa_edge_args_sum->get_create (EDGE)) -+#define IPA_NODE_REF(NODE) (ipa_node_params_sum->get (NODE)) -+#define IPA_NODE_REF_GET_CREATE(NODE) (ipa_node_params_sum->get_create (NODE)) -+#define IPA_EDGE_REF(EDGE) (ipa_edge_args_sum->get (EDGE)) -+#define IPA_EDGE_REF_GET_CREATE(EDGE) (ipa_edge_args_sum->get_create (EDGE)) - /* This macro checks validity of index returned by - ipa_get_param_decl_index function. */ - #define IS_VALID_JUMP_FUNC_INDEX(I) ((I) != -1) -@@ -740,9 +942,9 @@ bool ipa_propagate_indirect_call_infos ( - - /* Indirect edge and binfo processing. */ - tree ipa_get_indirect_edge_target (struct cgraph_edge *ie, -- vec , -+ vec, - vec, -- vec, -+ vec, - bool *); - struct cgraph_edge *ipa_make_edge_direct_to_target (struct cgraph_edge *, tree, - bool speculative = false); -@@ -755,13 +957,13 @@ ipa_bits *ipa_get_ipa_bits_for_value (co - void ipa_analyze_node (struct cgraph_node *); - - /* Aggregate jump function related functions. */ --tree ipa_find_agg_cst_for_param (struct ipa_agg_jump_function *agg, tree scalar, -+tree ipa_find_agg_cst_for_param (struct ipa_agg_value_set *agg, tree scalar, - HOST_WIDE_INT offset, bool by_ref, - bool *from_global_constant = NULL); - bool ipa_load_from_parm_agg (struct ipa_func_body_info *fbi, - vec *descriptors, - gimple *stmt, tree op, int *index_p, -- HOST_WIDE_INT *offset_p, HOST_WIDE_INT *size_p, -+ HOST_WIDE_INT *offset_p, poly_int64 *size_p, - bool *by_ref, bool *guaranteed_unmodified = NULL); - - /* Debugging interface. */ -@@ -779,11 +981,11 @@ extern object_allocator --class ipcp_value_source; -+struct ipcp_value_source; - - extern object_allocator > ipcp_sources_pool; - --class ipcp_agg_lattice; -+struct ipcp_agg_lattice; - - extern object_allocator ipcp_agg_lattice_pool; - -@@ -793,15 +995,18 @@ void ipa_prop_write_jump_functions (void - void ipa_prop_read_jump_functions (void); - void ipcp_write_transformation_summaries (void); - void ipcp_read_transformation_summaries (void); --int ipa_get_param_decl_index (struct ipa_node_params *, tree); --tree ipa_value_from_jfunc (struct ipa_node_params *info, -+int ipa_get_param_decl_index (class ipa_node_params *, tree); -+tree ipa_value_from_jfunc (class ipa_node_params *info, - struct ipa_jump_func *jfunc, tree type); - unsigned int ipcp_transform_function (struct cgraph_node *node); - ipa_polymorphic_call_context ipa_context_from_jfunc (ipa_node_params *, - cgraph_edge *, - int, - ipa_jump_func *); --void ipa_dump_param (FILE *, struct ipa_node_params *info, int i); -+ipa_agg_value_set ipa_agg_value_set_from_jfunc (ipa_node_params *, -+ cgraph_node *, -+ ipa_agg_jump_function *); -+void ipa_dump_param (FILE *, class ipa_node_params *info, int i); - void ipa_release_body_info (struct ipa_func_body_info *); - tree ipa_get_callee_param_type (struct cgraph_edge *e, int i); - -diff -Nurp a/gcc/ipa-pure-const.c b/gcc/ipa-pure-const.c ---- a/gcc/ipa-pure-const.c 2020-04-30 15:14:04.600000000 +0800 -+++ b/gcc/ipa-pure-const.c 2020-04-30 15:14:56.588000000 +0800 -@@ -1360,12 +1360,14 @@ ignore_edge_for_nothrow (struct cgraph_e - return true; - - enum availability avail; -- cgraph_node *n = e->callee->function_or_virtual_thunk_symbol (&avail, -- e->caller); -- if (avail <= AVAIL_INTERPOSABLE || TREE_NOTHROW (n->decl)) -+ cgraph_node *ultimate_target -+ = e->callee->function_or_virtual_thunk_symbol (&avail, e->caller); -+ if (avail <= AVAIL_INTERPOSABLE || TREE_NOTHROW (ultimate_target->decl)) - return true; -- return opt_for_fn (e->callee->decl, flag_non_call_exceptions) -- && !e->callee->binds_to_current_def_p (e->caller); -+ return ((opt_for_fn (e->callee->decl, flag_non_call_exceptions) -+ && !e->callee->binds_to_current_def_p (e->caller)) -+ || !opt_for_fn (e->caller->decl, flag_ipa_pure_const) -+ || !opt_for_fn (ultimate_target->decl, flag_ipa_pure_const)); - } - - /* Return true if NODE is self recursive function. -@@ -1395,16 +1397,21 @@ cdtor_p (cgraph_node *n, void *) - return false; - } - --/* We only propagate across edges with non-interposable callee. */ -+/* Skip edges from and to nodes without ipa_pure_const enabled. -+ Ignore not available symbols. */ - - static bool - ignore_edge_for_pure_const (struct cgraph_edge *e) - { - enum availability avail; -- e->callee->function_or_virtual_thunk_symbol (&avail, e->caller); -- return (avail <= AVAIL_INTERPOSABLE); --} -+ cgraph_node *ultimate_target -+ = e->callee->function_or_virtual_thunk_symbol (&avail, e->caller); - -+ return (avail <= AVAIL_INTERPOSABLE -+ || !opt_for_fn (e->caller->decl, flag_ipa_pure_const) -+ || !opt_for_fn (ultimate_target->decl, -+ flag_ipa_pure_const)); -+} - - /* Produce transitive closure over the callgraph and compute pure/const - attributes. */ -@@ -1670,7 +1677,7 @@ propagate_pure_const (void) - /* Inline clones share declaration with their offline copies; - do not modify their declarations since the offline copy may - be different. */ -- if (!w->global.inlined_to) -+ if (!w->inlined_to) - switch (this_state) - { - case IPA_CONST: -@@ -1831,7 +1838,7 @@ propagate_nothrow (void) - /* Inline clones share declaration with their offline copies; - do not modify their declarations since the offline copy may - be different. */ -- if (!w->global.inlined_to) -+ if (!w->inlined_to) - { - w->set_nothrow_flag (true); - if (dump_file) -@@ -1958,7 +1965,7 @@ propagate_malloc (void) - funct_state l = funct_state_summaries->get (node); - if (!node->alias - && l->malloc_state == STATE_MALLOC -- && !node->global.inlined_to) -+ && !node->inlined_to) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Function %s found to be malloc\n", -diff -Nurp a/gcc/ipa-reference.c b/gcc/ipa-reference.c ---- a/gcc/ipa-reference.c 2020-04-30 15:14:04.644000000 +0800 -+++ b/gcc/ipa-reference.c 2020-04-30 15:14:56.588000000 +0800 -@@ -46,7 +46,6 @@ along with GCC; see the file COPYING3. - #include "cgraph.h" - #include "data-streamer.h" - #include "calls.h" --#include "splay-tree.h" - #include "ipa-utils.h" - #include "ipa-reference.h" - #include "symbol-summary.h" -@@ -75,8 +74,8 @@ struct ipa_reference_global_vars_info_d - - struct ipa_reference_optimization_summary_d - { -- bitmap statics_not_read; -- bitmap statics_not_written; -+ bitmap statics_read; -+ bitmap statics_written; - }; - - typedef ipa_reference_local_vars_info_d *ipa_reference_local_vars_info_t; -@@ -92,14 +91,20 @@ struct ipa_reference_vars_info_d - - typedef struct ipa_reference_vars_info_d *ipa_reference_vars_info_t; - --/* This splay tree contains all of the static variables that are -+/* This map contains all of the static variables that are - being considered by the compilation level alias analysis. */ --static splay_tree reference_vars_to_consider; -+typedef hash_map reference_vars_map_t; -+static reference_vars_map_t *ipa_reference_vars_map; -+static int ipa_reference_vars_uids; -+static vec *reference_vars_to_consider; -+varpool_node_hook_list *varpool_node_hooks; - - /* Set of all interesting module statics. A bit is set for every module - static we are considering. This is added to the local info when asm - code is found that clobbers all memory. */ - static bitmap all_module_statics; -+/* Zero bitmap. */ -+static bitmap no_module_statics; - /* Set of all statics that should be ignored because they are touched by - -fno-ipa-reference code. */ - static bitmap ignore_module_statics; -@@ -136,6 +141,31 @@ public: - - static ipa_ref_opt_summary_t *ipa_ref_opt_sum_summaries = NULL; - -+/* Return ID used by ipa-reference bitmaps. -1 if failed. */ -+int -+ipa_reference_var_uid (tree t) -+{ -+ if (!ipa_reference_vars_map) -+ return -1; -+ int *id = ipa_reference_vars_map->get -+ (symtab_node::get (t)->ultimate_alias_target (NULL)->decl); -+ if (!id) -+ return -1; -+ return *id; -+} -+ -+/* Return ID used by ipa-reference bitmaps. Create new entry if -+ T is not in map. Set EXISTED accordinly */ -+int -+ipa_reference_var_get_or_insert_uid (tree t, bool *existed) -+{ -+ int &id = ipa_reference_vars_map->get_or_insert -+ (symtab_node::get (t)->ultimate_alias_target (NULL)->decl, existed); -+ if (!*existed) -+ id = ipa_reference_vars_uids++; -+ return id; -+} -+ - /* Return the ipa_reference_vars structure starting from the cgraph NODE. */ - static inline ipa_reference_vars_info_t - get_reference_vars_info (struct cgraph_node *node) -@@ -165,7 +195,7 @@ get_reference_optimization_summary (stru - NULL if no data is available. */ - - bitmap --ipa_reference_get_not_read_global (struct cgraph_node *fn) -+ipa_reference_get_read_global (struct cgraph_node *fn) - { - if (!opt_for_fn (current_function_decl, flag_ipa_reference)) - return NULL; -@@ -180,10 +210,10 @@ ipa_reference_get_not_read_global (struc - || (avail == AVAIL_INTERPOSABLE - && flags_from_decl_or_type (fn->decl) & ECF_LEAF)) - && opt_for_fn (fn2->decl, flag_ipa_reference)) -- return info->statics_not_read; -+ return info->statics_read; - else if (avail == AVAIL_NOT_AVAILABLE - && flags_from_decl_or_type (fn->decl) & ECF_LEAF) -- return all_module_statics; -+ return no_module_statics; - else - return NULL; - } -@@ -194,7 +224,7 @@ ipa_reference_get_not_read_global (struc - call. Returns NULL if no data is available. */ - - bitmap --ipa_reference_get_not_written_global (struct cgraph_node *fn) -+ipa_reference_get_written_global (struct cgraph_node *fn) - { - if (!opt_for_fn (current_function_decl, flag_ipa_reference)) - return NULL; -@@ -209,10 +239,10 @@ ipa_reference_get_not_written_global (st - || (avail == AVAIL_INTERPOSABLE - && flags_from_decl_or_type (fn->decl) & ECF_LEAF)) - && opt_for_fn (fn2->decl, flag_ipa_reference)) -- return info->statics_not_written; -+ return info->statics_written; - else if (avail == AVAIL_NOT_AVAILABLE - && flags_from_decl_or_type (fn->decl) & ECF_LEAF) -- return all_module_statics; -+ return no_module_statics; - else - return NULL; - } -@@ -256,7 +286,9 @@ is_improper (symtab_node *n, void *v ATT - static inline bool - is_proper_for_analysis (tree t) - { -- if (bitmap_bit_p (ignore_module_statics, ipa_reference_var_uid (t))) -+ int id = ipa_reference_var_uid (t); -+ -+ if (id != -1 && bitmap_bit_p (ignore_module_statics, id)) - return false; - - if (symtab_node::get (t) -@@ -272,9 +304,7 @@ is_proper_for_analysis (tree t) - static const char * - get_static_name (int index) - { -- splay_tree_node stn = -- splay_tree_lookup (reference_vars_to_consider, index); -- return fndecl_name ((tree)(stn->value)); -+ return fndecl_name ((*reference_vars_to_consider)[index]); - } - - /* Dump a set of static vars to FILE. */ -@@ -287,6 +317,8 @@ dump_static_vars_set_to_file (FILE *f, b - return; - else if (set == all_module_statics) - fprintf (f, "ALL"); -+ else if (set == no_module_statics) -+ fprintf (f, "NO"); - else - EXECUTE_IF_SET_IN_BITMAP (set, 0, index, bi) - { -@@ -330,10 +362,12 @@ union_static_var_sets (bitmap &x, bitmap - But if SET is NULL or the maximum set, return that instead. */ - - static bitmap --copy_static_var_set (bitmap set) -+copy_static_var_set (bitmap set, bool for_propagation) - { - if (set == NULL || set == all_module_statics) - return set; -+ if (!for_propagation && set == no_module_statics) -+ return set; - bitmap_obstack *o = set->obstack; - gcc_checking_assert (o); - bitmap copy = BITMAP_ALLOC (o); -@@ -403,6 +437,14 @@ propagate_bits (ipa_reference_global_var - } - } - -+/* Delete NODE from map. */ -+ -+static void -+varpool_removal_hook (varpool_node *node, void *) -+{ -+ ipa_reference_vars_map->remove (node->decl); -+} -+ - static bool ipa_init_p = false; - - /* The init routine for analyzing global static variable usage. See -@@ -415,22 +457,28 @@ ipa_init (void) - - ipa_init_p = true; - -- if (dump_file) -- reference_vars_to_consider = splay_tree_new (splay_tree_compare_ints, 0, 0); -+ vec_alloc (reference_vars_to_consider, 10); -+ -+ -+ if (ipa_ref_opt_sum_summaries != NULL) -+ { -+ delete ipa_ref_opt_sum_summaries; -+ ipa_ref_opt_sum_summaries = NULL; -+ delete ipa_reference_vars_map; -+ } -+ ipa_reference_vars_map = new reference_vars_map_t(257); -+ varpool_node_hooks -+ = symtab->add_varpool_removal_hook (varpool_removal_hook, NULL); -+ ipa_reference_vars_uids = 0; - - bitmap_obstack_initialize (&local_info_obstack); - bitmap_obstack_initialize (&optimization_summary_obstack); - all_module_statics = BITMAP_ALLOC (&optimization_summary_obstack); -+ no_module_statics = BITMAP_ALLOC (&optimization_summary_obstack); - ignore_module_statics = BITMAP_ALLOC (&optimization_summary_obstack); - - if (ipa_ref_var_info_summaries == NULL) - ipa_ref_var_info_summaries = new ipa_ref_var_info_summary_t (symtab); -- -- if (ipa_ref_opt_sum_summaries != NULL) -- { -- delete ipa_ref_opt_sum_summaries; -- ipa_ref_opt_sum_summaries = NULL; -- } - } - - -@@ -465,6 +513,8 @@ analyze_function (struct cgraph_node *fn - local = init_function_info (fn); - for (i = 0; fn->iterate_reference (i, ref); i++) - { -+ int id; -+ bool existed; - if (!is_a (ref->referred)) - continue; - var = ref->referred->decl; -@@ -472,23 +522,22 @@ analyze_function (struct cgraph_node *fn - continue; - /* This is a variable we care about. Check if we have seen it - before, and if not add it the set of variables we care about. */ -- if (all_module_statics -- && bitmap_set_bit (all_module_statics, ipa_reference_var_uid (var))) -+ id = ipa_reference_var_get_or_insert_uid (var, &existed); -+ if (!existed) - { -+ bitmap_set_bit (all_module_statics, id); - if (dump_file) -- splay_tree_insert (reference_vars_to_consider, -- ipa_reference_var_uid (var), -- (splay_tree_value)var); -+ reference_vars_to_consider->safe_push (var); - } - switch (ref->use) - { - case IPA_REF_LOAD: -- bitmap_set_bit (local->statics_read, ipa_reference_var_uid (var)); -+ bitmap_set_bit (local->statics_read, id); - break; - case IPA_REF_STORE: - if (ref->cannot_lead_to_return ()) - break; -- bitmap_set_bit (local->statics_written, ipa_reference_var_uid (var)); -+ bitmap_set_bit (local->statics_written, id); - break; - case IPA_REF_ADDR: - break; -@@ -510,10 +559,10 @@ ipa_ref_opt_summary_t::duplicate (cgraph - ipa_reference_optimization_summary_d - *dst_ginfo) - { -- dst_ginfo->statics_not_read = -- copy_static_var_set (ginfo->statics_not_read); -- dst_ginfo->statics_not_written = -- copy_static_var_set (ginfo->statics_not_written); -+ dst_ginfo->statics_read = -+ copy_static_var_set (ginfo->statics_read, false); -+ dst_ginfo->statics_written = -+ copy_static_var_set (ginfo->statics_written, false); - } - - /* Called when node is removed. */ -@@ -522,13 +571,15 @@ void - ipa_ref_opt_summary_t::remove (cgraph_node *, - ipa_reference_optimization_summary_d *ginfo) - { -- if (ginfo->statics_not_read -- && ginfo->statics_not_read != all_module_statics) -- BITMAP_FREE (ginfo->statics_not_read); -- -- if (ginfo->statics_not_written -- && ginfo->statics_not_written != all_module_statics) -- BITMAP_FREE (ginfo->statics_not_written); -+ if (ginfo->statics_read -+ && ginfo->statics_read != all_module_statics -+ && ginfo->statics_read != no_module_statics) -+ BITMAP_FREE (ginfo->statics_read); -+ -+ if (ginfo->statics_written -+ && ginfo->statics_written != all_module_statics -+ && ginfo->statics_written != no_module_statics) -+ BITMAP_FREE (ginfo->statics_written); - } - - /* Analyze each function in the cgraph to see which global or statics -@@ -676,16 +727,23 @@ get_read_write_all_from_node (struct cgr - } - } - --/* Skip edges from and to nodes without ipa_reference enables. This leave -- them out of strongy connected coponents and makes them easyto skip in the -+/* Skip edges from and to nodes without ipa_reference enabled. -+ Ignore not available symbols. This leave -+ them out of strongly connected components and makes them easy to skip in the - propagation loop bellow. */ - - static bool - ignore_edge_p (cgraph_edge *e) - { -- return (!opt_for_fn (e->caller->decl, flag_ipa_reference) -- || !opt_for_fn (e->callee->function_symbol ()->decl, -- flag_ipa_reference)); -+ enum availability avail; -+ cgraph_node *ultimate_target -+ = e->callee->function_or_virtual_thunk_symbol (&avail, e->caller); -+ -+ return (avail < AVAIL_INTERPOSABLE -+ || (avail == AVAIL_INTERPOSABLE -+ && !(flags_from_decl_or_type (e->callee->decl) & ECF_LEAF)) -+ || !opt_for_fn (e->caller->decl, flag_ipa_reference) -+ || !opt_for_fn (ultimate_target->decl, flag_ipa_reference)); - } - - /* Produce the global information by preforming a transitive closure -@@ -753,11 +811,12 @@ propagate (void) - if (read_all) - node_g->statics_read = all_module_statics; - else -- node_g->statics_read = copy_static_var_set (node_l->statics_read); -+ node_g->statics_read = copy_static_var_set (node_l->statics_read, true); - if (write_all) - node_g->statics_written = all_module_statics; - else -- node_g->statics_written = copy_static_var_set (node_l->statics_written); -+ node_g->statics_written -+ = copy_static_var_set (node_l->statics_written, true); - - /* Merge the sets of this cycle with all sets of callees reached - from this cycle. */ -@@ -841,12 +900,26 @@ propagate (void) - ipa_reference_vars_info_t node_info; - ipa_reference_global_vars_info_t node_g; - -+ /* No need to produce summaries for inline clones. */ -+ if (node->inlined_to) -+ continue; -+ - node_info = get_reference_vars_info (node); -- if (!node->alias && opt_for_fn (node->decl, flag_ipa_reference) -- && (node->get_availability () > AVAIL_INTERPOSABLE -- || (flags_from_decl_or_type (node->decl) & ECF_LEAF))) -+ if (!node->alias && opt_for_fn (node->decl, flag_ipa_reference)) - { - node_g = &node_info->global; -+ bool read_all = -+ (node_g->statics_read == all_module_statics -+ || bitmap_equal_p (node_g->statics_read, all_module_statics)); -+ bool written_all = -+ (node_g->statics_written == all_module_statics -+ || bitmap_equal_p (node_g->statics_written, -+ all_module_statics)); -+ -+ /* There is no need to produce summary if we collected nothing -+ useful. */ -+ if (read_all && written_all) -+ continue; - - ipa_reference_optimization_summary_d *opt - = ipa_ref_opt_sum_summaries->get_create (node); -@@ -854,27 +927,25 @@ propagate (void) - /* Create the complimentary sets. */ - - if (bitmap_empty_p (node_g->statics_read)) -- opt->statics_not_read = all_module_statics; -+ opt->statics_read = no_module_statics; -+ else if (read_all) -+ opt->statics_read = all_module_statics; - else - { -- opt->statics_not_read -+ opt->statics_read - = BITMAP_ALLOC (&optimization_summary_obstack); -- if (node_g->statics_read != all_module_statics) -- bitmap_and_compl (opt->statics_not_read, -- all_module_statics, -- node_g->statics_read); -+ bitmap_copy (opt->statics_read, node_g->statics_read); - } - - if (bitmap_empty_p (node_g->statics_written)) -- opt->statics_not_written = all_module_statics; -+ opt->statics_written = no_module_statics; -+ else if (written_all) -+ opt->statics_written = all_module_statics; - else - { -- opt->statics_not_written -+ opt->statics_written - = BITMAP_ALLOC (&optimization_summary_obstack); -- if (node_g->statics_written != all_module_statics) -- bitmap_and_compl (opt->statics_not_written, -- all_module_statics, -- node_g->statics_written); -+ bitmap_copy (opt->statics_written, node_g->statics_written); - } - } - } -@@ -892,7 +963,7 @@ propagate (void) - - ipa_ref_var_info_summaries = NULL; - if (dump_file) -- splay_tree_delete (reference_vars_to_consider); -+ vec_free (reference_vars_to_consider); - reference_vars_to_consider = NULL; - return remove_p ? TODO_remove_functions : 0; - } -@@ -907,12 +978,10 @@ write_node_summary_p (struct cgraph_node - ipa_reference_optimization_summary_t info; - - /* See if we have (non-empty) info. */ -- if (!node->definition || node->global.inlined_to) -+ if (!node->definition || node->inlined_to) - return false; - info = get_reference_optimization_summary (node); -- if (!info -- || (bitmap_empty_p (info->statics_not_read) -- && bitmap_empty_p (info->statics_not_written))) -+ if (!info) - return false; - - /* See if we want to encode it. -@@ -925,11 +994,17 @@ write_node_summary_p (struct cgraph_node - && !referenced_from_this_partition_p (node, encoder)) - return false; - -- /* See if the info has non-empty intersections with vars we want to encode. */ -- if (!bitmap_intersect_p (info->statics_not_read, ltrans_statics) -- && !bitmap_intersect_p (info->statics_not_written, ltrans_statics)) -- return false; -- return true; -+ /* See if the info has non-empty intersections with vars we want to -+ encode. */ -+ bitmap_iterator bi; -+ unsigned int i; -+ EXECUTE_IF_AND_COMPL_IN_BITMAP (ltrans_statics, info->statics_read, 0, -+ i, bi) -+ return true; -+ EXECUTE_IF_AND_COMPL_IN_BITMAP (ltrans_statics, info->statics_written, 0, -+ i, bi) -+ return true; -+ return false; - } - - /* Stream out BITS<RANS_STATICS as list of decls to OB. -@@ -962,8 +1037,7 @@ stream_out_bitmap (struct lto_simple_out - return; - EXECUTE_IF_AND_IN_BITMAP (bits, ltrans_statics, 0, index, bi) - { -- tree decl = (tree)splay_tree_lookup (reference_vars_to_consider, -- index)->value; -+ tree decl = (*reference_vars_to_consider) [index]; - lto_output_var_decl_index (ob->decl_state, ob->main_stream, decl); - } - } -@@ -981,23 +1055,23 @@ ipa_reference_write_optimization_summary - auto_bitmap ltrans_statics; - int i; - -- reference_vars_to_consider = splay_tree_new (splay_tree_compare_ints, 0, 0); -+ vec_alloc (reference_vars_to_consider, ipa_reference_vars_uids); -+ reference_vars_to_consider->safe_grow (ipa_reference_vars_uids); - - /* See what variables we are interested in. */ - for (i = 0; i < lto_symtab_encoder_size (encoder); i++) - { - symtab_node *snode = lto_symtab_encoder_deref (encoder, i); - varpool_node *vnode = dyn_cast (snode); -+ int id; -+ - if (vnode -- && bitmap_bit_p (all_module_statics, -- ipa_reference_var_uid (vnode->decl)) -+ && (id = ipa_reference_var_uid (vnode->decl)) != -1 - && referenced_from_this_partition_p (vnode, encoder)) - { - tree decl = vnode->decl; -- bitmap_set_bit (ltrans_statics, ipa_reference_var_uid (decl)); -- splay_tree_insert (reference_vars_to_consider, -- ipa_reference_var_uid (decl), -- (splay_tree_value)decl); -+ bitmap_set_bit (ltrans_statics, id); -+ (*reference_vars_to_consider)[id] = decl; - ltrans_statics_bitcount ++; - } - } -@@ -1032,14 +1106,14 @@ ipa_reference_write_optimization_summary - node_ref = lto_symtab_encoder_encode (encoder, snode); - streamer_write_uhwi_stream (ob->main_stream, node_ref); - -- stream_out_bitmap (ob, info->statics_not_read, ltrans_statics, -+ stream_out_bitmap (ob, info->statics_read, ltrans_statics, - ltrans_statics_bitcount); -- stream_out_bitmap (ob, info->statics_not_written, ltrans_statics, -+ stream_out_bitmap (ob, info->statics_written, ltrans_statics, - ltrans_statics_bitcount); - } - } - lto_destroy_simple_output_block (ob); -- splay_tree_delete (reference_vars_to_consider); -+ delete reference_vars_to_consider; - } - - /* Deserialize the ipa info for lto. */ -@@ -1053,10 +1127,15 @@ ipa_reference_read_optimization_summary - unsigned int j = 0; - bitmap_obstack_initialize (&optimization_summary_obstack); - -- if (ipa_ref_opt_sum_summaries == NULL) -- ipa_ref_opt_sum_summaries = new ipa_ref_opt_summary_t (symtab); -+ gcc_checking_assert (ipa_ref_opt_sum_summaries == NULL); -+ ipa_ref_opt_sum_summaries = new ipa_ref_opt_summary_t (symtab); -+ ipa_reference_vars_map = new reference_vars_map_t(257); -+ varpool_node_hooks -+ = symtab->add_varpool_removal_hook (varpool_removal_hook, NULL); -+ ipa_reference_vars_uids = 0; - - all_module_statics = BITMAP_ALLOC (&optimization_summary_obstack); -+ no_module_statics = BITMAP_ALLOC (&optimization_summary_obstack); - - while ((file_data = file_data_vec[j++])) - { -@@ -1081,8 +1160,11 @@ ipa_reference_read_optimization_summary - unsigned int var_index = streamer_read_uhwi (ib); - tree v_decl = lto_file_decl_data_get_var_decl (file_data, - var_index); -+ bool existed; - bitmap_set_bit (all_module_statics, -- ipa_reference_var_uid (v_decl)); -+ ipa_reference_var_get_or_insert_uid -+ (v_decl, &existed)); -+ gcc_checking_assert (!existed); - if (dump_file) - fprintf (dump_file, " %s", fndecl_name (v_decl)); - } -@@ -1102,57 +1184,65 @@ ipa_reference_read_optimization_summary - ipa_reference_optimization_summary_d *info - = ipa_ref_opt_sum_summaries->get_create (node); - -- info->statics_not_read = BITMAP_ALLOC -- (&optimization_summary_obstack); -- info->statics_not_written = BITMAP_ALLOC -- (&optimization_summary_obstack); - if (dump_file) - fprintf (dump_file, -- "\nFunction name:%s:\n static not read:", -+ "\nFunction name:%s:\n static read:", - node->dump_asm_name ()); - -- /* Set the statics not read. */ -+ /* Set the statics read. */ - v_count = streamer_read_hwi (ib); - if (v_count == -1) - { -- info->statics_not_read = all_module_statics; -+ info->statics_read = all_module_statics; - if (dump_file) - fprintf (dump_file, " all module statics"); - } -+ else if (v_count == 0) -+ info->statics_read = no_module_statics; - else -- for (j = 0; j < (unsigned int)v_count; j++) -- { -- unsigned int var_index = streamer_read_uhwi (ib); -- tree v_decl = lto_file_decl_data_get_var_decl (file_data, -- var_index); -- bitmap_set_bit (info->statics_not_read, -- ipa_reference_var_uid (v_decl)); -- if (dump_file) -- fprintf (dump_file, " %s", fndecl_name (v_decl)); -- } -+ { -+ info->statics_read = BITMAP_ALLOC -+ (&optimization_summary_obstack); -+ for (j = 0; j < (unsigned int)v_count; j++) -+ { -+ unsigned int var_index = streamer_read_uhwi (ib); -+ tree v_decl = lto_file_decl_data_get_var_decl (file_data, -+ var_index); -+ bitmap_set_bit (info->statics_read, -+ ipa_reference_var_uid (v_decl)); -+ if (dump_file) -+ fprintf (dump_file, " %s", fndecl_name (v_decl)); -+ } -+ } - - if (dump_file) - fprintf (dump_file, -- "\n static not written:"); -- /* Set the statics not written. */ -+ "\n static written:"); -+ /* Set the statics written. */ - v_count = streamer_read_hwi (ib); - if (v_count == -1) - { -- info->statics_not_written = all_module_statics; -+ info->statics_written = all_module_statics; - if (dump_file) - fprintf (dump_file, " all module statics"); - } -+ else if (v_count == 0) -+ info->statics_written = no_module_statics; - else -- for (j = 0; j < (unsigned int)v_count; j++) -- { -- unsigned int var_index = streamer_read_uhwi (ib); -- tree v_decl = lto_file_decl_data_get_var_decl (file_data, -- var_index); -- bitmap_set_bit (info->statics_not_written, -- ipa_reference_var_uid (v_decl)); -- if (dump_file) -- fprintf (dump_file, " %s", fndecl_name (v_decl)); -- } -+ { -+ info->statics_written = BITMAP_ALLOC -+ (&optimization_summary_obstack); -+ for (j = 0; j < (unsigned int)v_count; j++) -+ { -+ unsigned int var_index = streamer_read_uhwi (ib); -+ tree v_decl = lto_file_decl_data_get_var_decl (file_data, -+ var_index); -+ bitmap_set_bit (info->statics_written, -+ ipa_reference_var_uid (v_decl)); -+ if (dump_file) -+ fprintf (dump_file, " %s", fndecl_name (v_decl)); -+ } -+ } - if (dump_file) - fprintf (dump_file, "\n"); - } -@@ -1233,6 +1323,9 @@ ipa_reference_c_finalize (void) - { - delete ipa_ref_opt_sum_summaries; - ipa_ref_opt_sum_summaries = NULL; -+ delete ipa_reference_vars_map; -+ ipa_reference_vars_map = NULL; -+ symtab->remove_varpool_removal_hook (varpool_node_hooks); - } - - if (ipa_init_p) -diff -Nurp a/gcc/ipa-reference.h b/gcc/ipa-reference.h ---- a/gcc/ipa-reference.h 2020-04-30 15:14:04.580000000 +0800 -+++ b/gcc/ipa-reference.h 2020-04-30 15:14:56.540000000 +0800 -@@ -22,15 +22,10 @@ along with GCC; see the file COPYING3. - #define GCC_IPA_REFERENCE_H - - /* In ipa-reference.c */ --bitmap ipa_reference_get_not_read_global (struct cgraph_node *fn); --bitmap ipa_reference_get_not_written_global (struct cgraph_node *fn); -+bitmap ipa_reference_get_read_global (struct cgraph_node *fn); -+bitmap ipa_reference_get_written_global (struct cgraph_node *fn); - void ipa_reference_c_finalize (void); -- --inline int --ipa_reference_var_uid (tree t) --{ -- return DECL_UID (symtab_node::get (t)->ultimate_alias_target (NULL)->decl); --} -+int ipa_reference_var_uid (tree t); - - #endif /* GCC_IPA_REFERENCE_H */ - -diff -Nurp a/gcc/ipa-utils.c b/gcc/ipa-utils.c ---- a/gcc/ipa-utils.c 2020-04-30 15:14:04.576000000 +0800 -+++ b/gcc/ipa-utils.c 2020-04-30 15:14:56.588000000 +0800 -@@ -103,8 +103,7 @@ searchc (struct searchc_env* env, struct - continue; - - if (w->aux -- && (avail > AVAIL_INTERPOSABLE -- || avail == AVAIL_INTERPOSABLE)) -+ && (avail >= AVAIL_INTERPOSABLE)) - { - w_info = (struct ipa_dfs_info *) w->aux; - if (w_info->new_node) -@@ -297,7 +296,7 @@ ipa_reverse_postorder (struct cgraph_nod - if (!node->aux - && (pass - || (!node->address_taken -- && !node->global.inlined_to -+ && !node->inlined_to - && !node->alias && !node->thunk.thunk_p - && !node->only_called_directly_p ()))) - { -diff -Nurp a/gcc/ipa-utils.h b/gcc/ipa-utils.h ---- a/gcc/ipa-utils.h 2020-04-30 15:14:04.652000000 +0800 -+++ b/gcc/ipa-utils.h 2020-04-30 15:14:56.624000000 +0800 -@@ -47,6 +47,9 @@ void ipa_merge_profiles (struct cgraph_n - struct cgraph_node *src, bool preserve_body = false); - bool recursive_call_p (tree, tree); - -+/* In ipa-prop.c */ -+void ipa_remove_useless_jump_functions (); -+ - /* In ipa-profile.c */ - bool ipa_propagate_frequency (struct cgraph_node *node); - -@@ -54,6 +57,7 @@ bool ipa_propagate_frequency (struct cgr - - struct odr_type_d; - typedef odr_type_d *odr_type; -+extern bool thunk_expansion; - void build_type_inheritance_graph (void); - void rebuild_type_inheritance_graph (void); - void update_type_inheritance_graph (void); -@@ -263,5 +267,3 @@ odr_type_p (const_tree t) - } - - #endif /* GCC_IPA_UTILS_H */ -- -- -diff -Nurp a/gcc/ipa-visibility.c b/gcc/ipa-visibility.c ---- a/gcc/ipa-visibility.c 2020-04-30 15:14:04.568000000 +0800 -+++ b/gcc/ipa-visibility.c 2020-04-30 15:14:56.588000000 +0800 -@@ -707,7 +707,7 @@ function_and_variable_visibility (bool w - || DECL_EXTERNAL (node->decl)); - if (cgraph_externally_visible_p (node, whole_program)) - { -- gcc_assert (!node->global.inlined_to); -+ gcc_assert (!node->inlined_to); - node->externally_visible = true; - } - else -diff -Nurp a/gcc/lto/lto.c b/gcc/lto/lto.c ---- a/gcc/lto/lto.c 2020-04-30 15:14:04.664000000 +0800 -+++ b/gcc/lto/lto.c 2020-04-30 15:14:56.552000000 +0800 -@@ -3211,9 +3211,9 @@ do_whole_program_analysis (void) - else - gcc_unreachable (); - -- /* Inline summaries are needed for balanced partitioning. Free them now so -+ /* Size summaries are needed for balanced partitioning. Free them now so - the memory can be used for streamer caches. */ -- ipa_free_fn_summary (); -+ ipa_free_size_summary (); - - /* AUX pointers are used by partitioning code to bookkeep number of - partitions symbol is in. This is no longer needed. */ -diff -Nurp a/gcc/lto/lto-partition.c b/gcc/lto/lto-partition.c ---- a/gcc/lto/lto-partition.c 2020-04-30 15:14:04.664000000 +0800 -+++ b/gcc/lto/lto-partition.c 2020-04-30 15:14:56.592000000 +0800 -@@ -171,7 +171,7 @@ add_symbol_to_partition_1 (ltrans_partit - { - struct cgraph_edge *e; - if (!node->alias && c == SYMBOL_PARTITION) -- part->insns += ipa_fn_summaries->get (cnode)->size; -+ part->insns += ipa_size_summaries->get (cnode)->size; - - /* Add all inline clones and callees that are duplicated. */ - for (e = cnode->callees; e; e = e->next_callee) -@@ -182,7 +182,7 @@ add_symbol_to_partition_1 (ltrans_partit - - /* Add all thunks associated with the function. */ - for (e = cnode->callers; e; e = e->next_caller) -- if (e->caller->thunk.thunk_p && !e->caller->global.inlined_to) -+ if (e->caller->thunk.thunk_p && !e->caller->inlined_to) - add_symbol_to_partition_1 (part, e->caller); - } - -@@ -233,8 +233,8 @@ contained_in_symbol (symtab_node *node) - if (cgraph_node *cnode = dyn_cast (node)) - { - cnode = cnode->function_symbol (); -- if (cnode->global.inlined_to) -- cnode = cnode->global.inlined_to; -+ if (cnode->inlined_to) -+ cnode = cnode->inlined_to; - return cnode; - } - else if (varpool_node *vnode = dyn_cast (node)) -@@ -291,7 +291,7 @@ undo_partition (ltrans_partition partiti - - if (!node->alias && (cnode = dyn_cast (node)) - && node->get_partitioning_class () == SYMBOL_PARTITION) -- partition->insns -= ipa_fn_summaries->get (cnode)->size; -+ partition->insns -= ipa_size_summaries->get (cnode)->size; - lto_symtab_encoder_delete_node (partition->encoder, node); - node->aux = (void *)((size_t)node->aux - 1); - } -@@ -529,7 +529,7 @@ lto_balanced_map (int n_lto_partitions, - else - order.safe_push (node); - if (!node->alias) -- total_size += ipa_fn_summaries->get (node)->size; -+ total_size += ipa_size_summaries->get (node)->size; - } - - original_total_size = total_size; -diff -Nurp a/gcc/lto/lto-symtab.c b/gcc/lto/lto-symtab.c ---- a/gcc/lto/lto-symtab.c 2020-04-30 15:14:04.664000000 +0800 -+++ b/gcc/lto/lto-symtab.c 2020-04-30 15:14:56.592000000 +0800 -@@ -63,7 +63,7 @@ lto_cgraph_replace_node (struct cgraph_n - prevailing_node->forced_by_abi = true; - if (node->address_taken) - { -- gcc_assert (!prevailing_node->global.inlined_to); -+ gcc_assert (!prevailing_node->inlined_to); - prevailing_node->mark_address_taken (); - } - if (node->definition && prevailing_node->definition -@@ -909,7 +909,7 @@ lto_symtab_merge_symbols_1 (symtab_node - cgraph_node *ce = dyn_cast (e); - - if ((!TREE_PUBLIC (e->decl) && !DECL_EXTERNAL (e->decl)) -- || (ce != NULL && ce->global.inlined_to)) -+ || (ce != NULL && ce->inlined_to)) - continue; - symtab_node *to = symtab_node::get (lto_symtab_prevailing_decl (e->decl)); - -diff -Nurp a/gcc/lto-cgraph.c b/gcc/lto-cgraph.c ---- a/gcc/lto-cgraph.c 2020-04-30 15:14:04.636000000 +0800 -+++ b/gcc/lto-cgraph.c 2020-04-30 15:14:56.588000000 +0800 -@@ -329,7 +329,7 @@ reachable_from_other_partition_p (struct - struct cgraph_edge *e; - if (!node->definition) - return false; -- if (node->global.inlined_to) -+ if (node->inlined_to) - return false; - for (e = node->callers; e; e = e->next_caller) - { -@@ -399,7 +399,7 @@ lto_output_node (struct lto_simple_outpu - boundary_p = !lto_symtab_encoder_in_partition_p (encoder, node); - - if (node->analyzed && (!boundary_p || node->alias -- || (node->thunk.thunk_p && !node->global.inlined_to))) -+ || (node->thunk.thunk_p && !node->inlined_to))) - tag = LTO_symtab_analyzed_node; - else - tag = LTO_symtab_unavail_node; -@@ -422,7 +422,7 @@ lto_output_node (struct lto_simple_outpu - && node->get_partitioning_class () == SYMBOL_PARTITION) - { - /* Inline clones cannot be part of boundary. -- gcc_assert (!node->global.inlined_to); -+ gcc_assert (!node->inlined_to); - - FIXME: At the moment they can be, when partition contains an inline - clone that is clone of inline clone from outside partition. We can -@@ -468,9 +468,9 @@ lto_output_node (struct lto_simple_outpu - - if (tag == LTO_symtab_analyzed_node) - { -- if (node->global.inlined_to) -+ if (node->inlined_to) - { -- ref = lto_symtab_encoder_lookup (encoder, node->global.inlined_to); -+ ref = lto_symtab_encoder_lookup (encoder, node->inlined_to); - gcc_assert (ref != LCC_NOT_FOUND); - } - else -@@ -884,7 +884,7 @@ compute_ltrans_boundary (lto_symtab_enco - if (!lto_symtab_encoder_in_partition_p (encoder, callee)) - { - /* We should have moved all the inlines. */ -- gcc_assert (!callee->global.inlined_to); -+ gcc_assert (!callee->inlined_to); - add_node_to (encoder, callee, false); - } - } -@@ -911,7 +911,7 @@ compute_ltrans_boundary (lto_symtab_enco - && !lto_symtab_encoder_in_partition_p - (encoder, callee)) - { -- gcc_assert (!callee->global.inlined_to); -+ gcc_assert (!callee->inlined_to); - add_node_to (encoder, callee, false); - } - } -@@ -928,7 +928,7 @@ compute_ltrans_boundary (lto_symtab_enco - if (node->alias && node->analyzed) - create_references (encoder, node); - if (cnode -- && cnode->thunk.thunk_p && !cnode->global.inlined_to) -+ && cnode->thunk.thunk_p && !cnode->inlined_to) - add_node_to (encoder, cnode->callees->callee, false); - while (node->transparent_alias && node->analyzed) - { -@@ -984,7 +984,7 @@ output_symtab (void) - { - node = dyn_cast (lto_symtab_encoder_deref (encoder, i)); - if (node -- && ((node->thunk.thunk_p && !node->global.inlined_to) -+ && ((node->thunk.thunk_p && !node->inlined_to) - || lto_symtab_encoder_in_partition_p (encoder, node))) - { - output_outgoing_cgraph_edges (node->callees, ob, encoder); -@@ -1283,7 +1283,7 @@ input_node (struct lto_file_decl_data *f - input_overwrite_node (file_data, node, tag, &bp); - - /* Store a reference for now, and fix up later to be a pointer. */ -- node->global.inlined_to = (cgraph_node *) (intptr_t) ref; -+ node->inlined_to = (cgraph_node *) (intptr_t) ref; - - if (group) - { -@@ -1542,7 +1542,7 @@ input_cgraph_1 (struct lto_file_decl_dat - int ref; - if (cgraph_node *cnode = dyn_cast (node)) - { -- ref = (int) (intptr_t) cnode->global.inlined_to; -+ ref = (int) (intptr_t) cnode->inlined_to; - - /* We share declaration of builtins, so we may read same node twice. */ - if (!node->aux) -@@ -1551,10 +1551,10 @@ input_cgraph_1 (struct lto_file_decl_dat - - /* Fixup inlined_to from reference to pointer. */ - if (ref != LCC_NOT_FOUND) -- dyn_cast (node)->global.inlined_to -+ dyn_cast (node)->inlined_to - = dyn_cast (nodes[ref]); - else -- cnode->global.inlined_to = NULL; -+ cnode->inlined_to = NULL; - } - - ref = (int) (intptr_t) node->same_comdat_group; -diff -Nurp a/gcc/omp-simd-clone.c b/gcc/omp-simd-clone.c ---- a/gcc/omp-simd-clone.c 2020-04-30 15:14:04.644000000 +0800 -+++ b/gcc/omp-simd-clone.c 2020-04-30 15:14:56.592000000 +0800 -@@ -1635,7 +1635,7 @@ expand_simd_clones (struct cgraph_node * - tree attr = lookup_attribute ("omp declare simd", - DECL_ATTRIBUTES (node->decl)); - if (attr == NULL_TREE -- || node->global.inlined_to -+ || node->inlined_to - || lookup_attribute ("noclone", DECL_ATTRIBUTES (node->decl))) - return; - -diff -Nurp a/gcc/params.def b/gcc/params.def ---- a/gcc/params.def 2020-04-30 15:14:04.560000000 +0800 -+++ b/gcc/params.def 2020-04-30 15:14:56.700000000 +0800 -@@ -1093,6 +1093,18 @@ DEFPARAM (PARAM_IPA_CP_VALUE_LIST_SIZE, - "interprocedural constant propagation.", - 8, 0, 0) - -+DEFPARAM (PARAM_IPA_CP_MIN_RECURSIVE_PROBABILITY, -+ "ipa-cp-min-recursive-probability", -+ "Recursive cloning only when the probability of call being executed " -+ "exceeds the parameter. ", -+ 2, 0, 0) -+ -+DEFPARAM (PARAM_IPA_CP_MAX_RECURSIVE_DEPTH, -+ "ipa-cp-max-recursive-depth", -+ "Threshold ipa-cp opportunity evaluation that is still considered " -+ "Maximum depth of recursive cloning for self-recursive function.", -+ 8, 0, 0) -+ - DEFPARAM (PARAM_IPA_CP_EVAL_THRESHOLD, - "ipa-cp-eval-threshold", - "Threshold ipa-cp opportunity evaluation that is still considered " -@@ -1129,6 +1141,18 @@ DEFPARAM (PARAM_IPA_MAX_AA_STEPS, - "parameter analysis based on alias analysis in any given function.", - 25000, 0, 0) - -+DEFPARAM (PARAM_IPA_MAX_SWITCH_PREDICATE_BOUNDS, -+ "ipa-max-switch-predicate-bounds", -+ "Maximal number of boundary endpoints of case ranges of switch " -+ "statement used during IPA functoin summary generation.", -+ 5, 0, 0) -+ -+DEFPARAM (PARAM_IPA_MAX_PARAM_EXPR_OPS, -+ "ipa-max-param-expr-ops", -+ "Maximum number of operations in a parameter expression that can " -+ "be handled by IPA analysis.", -+ 10, 0, 0) -+ - /* WHOPR partitioning configuration. */ - - DEFPARAM (PARAM_LTO_PARTITIONS, -diff -Nurp a/gcc/passes.c b/gcc/passes.c ---- a/gcc/passes.c 2020-04-30 15:14:04.632000000 +0800 -+++ b/gcc/passes.c 2020-04-30 15:14:56.592000000 +0800 -@@ -3047,7 +3047,7 @@ function_called_by_processed_nodes_p (vo - continue; - if (TREE_ASM_WRITTEN (e->caller->decl)) - continue; -- if (!e->caller->process && !e->caller->global.inlined_to) -+ if (!e->caller->process && !e->caller->inlined_to) - break; - } - if (dump_file && e) -diff -Nurp a/gcc/symtab.c b/gcc/symtab.c ---- a/gcc/symtab.c 2020-04-30 15:14:04.636000000 +0800 -+++ b/gcc/symtab.c 2020-04-30 15:14:56.592000000 +0800 -@@ -1874,7 +1874,7 @@ symtab_node::get_partitioning_class (voi - if (DECL_ABSTRACT_P (decl)) - return SYMBOL_EXTERNAL; - -- if (cnode && cnode->global.inlined_to) -+ if (cnode && cnode->inlined_to) - return SYMBOL_DUPLICATE; - - /* Transparent aliases are always duplicated. */ -@@ -2274,7 +2274,7 @@ symtab_node::binds_to_current_def_p (sym - return true; - - /* Inline clones always binds locally. */ -- if (cnode && cnode->global.inlined_to) -+ if (cnode && cnode->inlined_to) - return true; - - if (DECL_EXTERNAL (decl)) -@@ -2286,7 +2286,7 @@ symtab_node::binds_to_current_def_p (sym - { - cgraph_node *cref = dyn_cast (ref); - if (cref) -- ref = cref->global.inlined_to; -+ ref = cref->inlined_to; - } - - /* If this is a reference from symbol itself and there are no aliases, we -diff -Nurp a/gcc/testsuite/gcc.c-torture/compile/flatten.c b/gcc/testsuite/gcc.c-torture/compile/flatten.c ---- a/gcc/testsuite/gcc.c-torture/compile/flatten.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.c-torture/compile/flatten.c 2020-04-30 15:14:56.684000000 +0800 -@@ -0,0 +1,5 @@ -+int you_shall_not_flatten_me () __attribute__ ((flatten)); -+main() -+{ -+ you_shall_not_flatten_me (); -+} -diff -Nurp a/gcc/testsuite/gcc.dg/ipa/ipa-clone-2.c b/gcc/testsuite/gcc.dg/ipa/ipa-clone-2.c ---- a/gcc/testsuite/gcc.dg/ipa/ipa-clone-2.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/ipa/ipa-clone-2.c 2020-04-30 15:14:56.696000000 +0800 -@@ -0,0 +1,47 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O3 -fdump-ipa-cp-details -fno-early-inlining --param ipa-cp-max-recursive-depth=8" } */ -+ -+int fn(); -+ -+int data[100]; -+ -+int recur_fn (int i) -+{ -+ int j; -+ -+ if (i == 6) -+ { -+ fn(); -+ fn(); -+ fn(); -+ fn(); -+ fn(); -+ fn(); -+ fn(); -+ fn(); -+ fn(); -+ fn(); -+ fn(); -+ fn(); -+ return 10; -+ } -+ -+ data[i] = i; -+ -+ for (j = 0; j < 100; j++) -+ recur_fn (i + 1); -+ -+ return i; -+} -+ -+int main () -+{ -+ int i; -+ -+ for (i = 0; i < 100; i++) -+ recur_fn (1) + recur_fn (-5); -+ -+ return 1; -+} -+ -+/* { dg-final { scan-ipa-dump-times "Creating a specialized node of recur_fn/\[0-9\]*\\." 12 "cp" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/ipa/ipcp-agg-10.c b/gcc/testsuite/gcc.dg/ipa/ipcp-agg-10.c ---- a/gcc/testsuite/gcc.dg/ipa/ipcp-agg-10.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/ipa/ipcp-agg-10.c 2020-04-30 15:14:56.664000000 +0800 -@@ -0,0 +1,78 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O3 -fdump-ipa-cp-details -fno-inline" } */ -+ -+int data1; -+ -+int callee1(int *v) -+{ -+ if (*v < 2) -+ return 0; -+ else -+ { -+ int t = data1; -+ -+ data1 = *v; -+ *v = t; -+ -+ return 1; -+ } -+} -+ -+int __attribute__((pure)) callee2(int *v) -+{ -+ if (*v < 2) -+ return 0; -+ else -+ { -+ data1 = v[0] + v[2]; -+ -+ return 1; -+ } -+} -+ -+int caller1(int c, int *r) -+{ -+ int a = 1; -+ -+ if (c) -+ return callee1(&a); -+ else -+ { -+ *r = 2; -+ return callee1(r); -+ } -+} -+ -+int data2[200]; -+int data3; -+ -+int __attribute__((const)) gen_cond(int); -+ -+int caller2(void) -+{ -+ int i, j; -+ int sum = 0; -+ int a[8]; -+ -+ a[0] = 3; -+ for (i = 0; i < 100; i++) -+ { -+ if (gen_cond (i)) -+ continue; -+ -+ a[2] = 4; -+ for (j = 0; j < 100; j++) -+ { -+ data2[i + j] = (i ^ j) + data3; -+ -+ sum += callee2(a); -+ } -+ } -+ -+ return sum; -+} -+ -+/* { dg-final { scan-ipa-dump-times "offset: 0, type: int, CONST: 1" 1 "cp" } } */ -+/* { dg-final { scan-ipa-dump-times "offset: 0, type: int, CONST: 2" 1 "cp" } } */ -+/* { dg-final { scan-ipa-dump-times "offset: 0, type: int, CONST: 3" 1 "cp" } } */ -+/* { dg-final { scan-ipa-dump-times "offset: 64, type: int, CONST: 4" 1 "cp" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/ipa/ipcp-agg-11.c b/gcc/testsuite/gcc.dg/ipa/ipcp-agg-11.c ---- a/gcc/testsuite/gcc.dg/ipa/ipcp-agg-11.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/ipa/ipcp-agg-11.c 2020-04-30 15:14:56.664000000 +0800 -@@ -0,0 +1,77 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O3 -fno-ipa-sra -fdump-ipa-cp-details -fno-early-inlining" } */ -+/* { dg-add-options bind_pic_locally } */ -+ -+struct S -+{ -+ int a, b, c; -+}; -+ -+void *blah(int, void *); -+ -+#define foo_body(p)\ -+{ \ -+ int i, c = (p)->c; \ -+ int b = (p)->b; \ -+ void *v = (void *) (p); \ -+ \ -+ for (i= 0; i< c; i++) \ -+ v = blah(b + i, v); \ -+} -+ -+static void __attribute__ ((noinline)) -+foo_v (struct S s) -+{ -+ foo_body (&s); -+} -+ -+static void __attribute__ ((noinline)) -+foo_r (struct S *p) -+{ -+ foo_body (p); -+} -+ -+static void -+goo_v (int a, int *p) -+{ -+ struct S s; -+ s.a = 101; -+ s.b = a % 7; -+ s.c = *p + 6; -+ foo_v (s); -+} -+ -+static void -+goo_r (int a, struct S n) -+{ -+ struct S s; -+ s.a = 1; -+ s.b = a + 5; -+ s.c = -n.b; -+ foo_r (&s); -+} -+ -+void -+entry () -+{ -+ int a; -+ int v; -+ struct S s; -+ -+ a = 9; -+ v = 3; -+ goo_v (a, &v); -+ -+ a = 100; -+ s.b = 18; -+ goo_r (a, s); -+} -+ -+/* { dg-final { scan-ipa-dump "offset: 0, type: int, CONST: 1" "cp" } } */ -+/* { dg-final { scan-ipa-dump "offset: 32, type: int, PASS THROUGH: 0, op plus_expr 5" "cp" } } */ -+/* { dg-final { scan-ipa-dump "offset: 64, type: int, LOAD AGG: 1 \\\[offset: 32, by value], op negate_expr" "cp" } } */ -+/* { dg-final { scan-ipa-dump "offset: 0, type: int, CONST: 101" "cp" } } */ -+/* { dg-final { scan-ipa-dump "offset: 32, type: int, PASS THROUGH: 0, op trunc_mod_expr 7" "cp" } } */ -+/* { dg-final { scan-ipa-dump "offset: 64, type: int, LOAD AGG: 1 \\\[offset: 0, by reference], op plus_expr 6" "cp" } } */ -+/* { dg-final { scan-ipa-dump "Aggregate replacements: 0\\\[0]=1, 0\\\[32]=105, 0\\\[64]=-18" "cp" } } */ -+/* { dg-final { scan-ipa-dump "Aggregate replacements: 0\\\[0]=101, 0\\\[32]=2, 0\\\[64]=9" "cp" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/ipa/pr91089.c b/gcc/testsuite/gcc.dg/ipa/pr91089.c ---- a/gcc/testsuite/gcc.dg/ipa/pr91089.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/ipa/pr91089.c 2020-04-30 15:14:56.516000000 +0800 -@@ -0,0 +1,62 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O3 -fdump-ipa-cp-details -fdump-ipa-fnsummary-details --param ipa-max-switch-predicate-bounds=10 -fno-inline" } */ -+ -+int fn (); -+ -+int data; -+ -+int callee (int i) -+{ -+ switch (i) -+ { -+ case -126: return i + 13; -+ case -127: return i + 5; -+ case -8: return i * i; -+ case 0: return i % 9; -+ case 5: -+ case 7: -+ case 6: return 3; -+ default: -+ fn (); -+ fn (); -+ fn (); -+ fn (); -+ fn (); -+ fn (); -+ fn (); -+ fn (); -+ fn (); -+ fn (); -+ fn (); -+ fn (); -+ fn (); -+ fn (); -+ fn (); -+ fn (); -+ fn (); -+ fn (); -+ fn (); -+ } -+ -+ return data += i; -+} -+ -+int caller () -+{ -+ return callee (-127) + -+ callee (-126) + -+ callee (-8) + -+ callee (0) + -+ callee (5) + -+ callee (6) + -+ callee (7) + -+ callee (100); -+} -+ -+/* { dg-final { scan-ipa-dump-times "Creating a specialized node of callee" 7 "cp" } } */ -+/* { dg-final { scan-ipa-dump "op0 < -127" "fnsummary" } } */ -+/* { dg-final { scan-ipa-dump "op0 > -126" "fnsummary" } } */ -+/* { dg-final { scan-ipa-dump "op0 != -8" "fnsummary" } } */ -+/* { dg-final { scan-ipa-dump "op0 != 0" "fnsummary" } } */ -+/* { dg-final { scan-ipa-dump "op0 < 5" "fnsummary" } } */ -+/* { dg-final { scan-ipa-dump "op0 > 7" "fnsummary" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr46076.c b/gcc/testsuite/gcc.dg/tree-ssa/pr46076.c ---- a/gcc/testsuite/gcc.dg/tree-ssa/pr46076.c 2020-04-30 15:14:05.756000000 +0800 -+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr46076.c 2020-04-30 15:14:56.640000000 +0800 -@@ -19,9 +19,12 @@ main() - { - /* Make sure we perform indirect inlining of one and two and optimize - the result to a constant. */ -- if (print(one) != 3) -- link_error (); -- if (print(two) != 5) -- link_error (); -+ for (int i = 0; i < 100; i++) -+ { -+ if (print(one) != 3) -+ link_error (); -+ if (print(two) != 5) -+ link_error (); -+ } - return 0; - } -diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-73.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-73.c ---- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-73.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-73.c 2020-04-30 15:14:56.472000000 +0800 -@@ -0,0 +1,14 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -fdump-tree-fre1" } */ -+ -+typedef int v2si __attribute__((vector_size(__SIZEOF_INT__ * 2))); -+int foo (int *a) -+{ -+ a[0] = 1; -+ a[1] = 2; -+ v2si x = *(v2si *)a; -+ *(v2si *)&a[2] = x; -+ return a[3]; -+} -+ -+/* { dg-final { scan-tree-dump "return 2;" "fre1" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-74.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-74.c ---- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-74.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-74.c 2020-04-30 15:14:56.472000000 +0800 -@@ -0,0 +1,16 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -fdump-tree-fre1" } */ -+ -+typedef int v4si __attribute__((vector_size(__SIZEOF_INT__ * 4))); -+int foo (int *a) -+{ -+ a[2] = 2; -+ a[0] = 0; -+ a[1] = 1; -+ a[3] = 4; -+ v4si x = *(v4si *)a; -+ *(v4si *)&a[4] = x; -+ return a[4] + a[7]; -+} -+ -+/* { dg-final { scan-tree-dump "return 4;" "fre1" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-76.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-76.c ---- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-76.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-76.c 2020-04-30 15:14:56.472000000 +0800 -@@ -0,0 +1,16 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O -fdump-tree-fre1" } */ -+ -+typedef int v4si __attribute__((vector_size(__SIZEOF_INT__ * 4))); -+int foo (int *a) -+{ -+ __builtin_memset (a, 0, 2 * __SIZEOF_INT__); -+ a[2] = 2; -+ a[0] = 1; -+ a[3] = 3; -+ v4si x = *(v4si *)a; -+ *(v4si *)&a[4] = x; -+ return a[4] + a[5] + a[7]; -+} -+ -+/* { dg-final { scan-tree-dump "return 4;" "fre1" } } */ -diff -Nurp a/gcc/tree-sra.c b/gcc/tree-sra.c ---- a/gcc/tree-sra.c 2020-04-30 15:14:04.568000000 +0800 -+++ b/gcc/tree-sra.c 2020-04-30 15:14:56.556000000 +0800 -@@ -5488,7 +5488,7 @@ ipa_sra_preliminary_function_checks (str - - if ((DECL_ONE_ONLY (node->decl) || DECL_EXTERNAL (node->decl)) - && ipa_fn_summaries->get (node) -- && ipa_fn_summaries->get (node)->size >= MAX_INLINE_INSNS_AUTO) -+ && ipa_size_summaries->get (node)->size >= MAX_INLINE_INSNS_AUTO) - { - if (dump_file) - fprintf (dump_file, "Function too big to be made truly local.\n"); -diff -Nurp a/gcc/tree-ssa-alias.c b/gcc/tree-ssa-alias.c ---- a/gcc/tree-ssa-alias.c 2020-04-30 15:14:04.648000000 +0800 -+++ b/gcc/tree-ssa-alias.c 2020-04-30 15:14:56.540000000 +0800 -@@ -1822,14 +1822,16 @@ ref_maybe_used_by_call_p_1 (gcall *call, - if (callee != NULL_TREE && VAR_P (base) && TREE_STATIC (base)) - { - struct cgraph_node *node = cgraph_node::get (callee); -- bitmap not_read; -+ bitmap read; -+ int id; - - /* FIXME: Callee can be an OMP builtin that does not have a call graph - node yet. We should enforce that there are nodes for all decls in the - IL and remove this check instead. */ - if (node -- && (not_read = ipa_reference_get_not_read_global (node)) -- && bitmap_bit_p (not_read, ipa_reference_var_uid (base))) -+ && (id = ipa_reference_var_uid (base)) != -1 -+ && (read = ipa_reference_get_read_global (node)) -+ && !bitmap_bit_p (read, id)) - goto process_args; - } - -@@ -2217,11 +2219,13 @@ call_may_clobber_ref_p_1 (gcall *call, a - if (callee != NULL_TREE && VAR_P (base) && TREE_STATIC (base)) - { - struct cgraph_node *node = cgraph_node::get (callee); -- bitmap not_written; -+ bitmap written; -+ int id; - - if (node -- && (not_written = ipa_reference_get_not_written_global (node)) -- && bitmap_bit_p (not_written, ipa_reference_var_uid (base))) -+ && (id = ipa_reference_var_uid (base)) != -1 -+ && (written = ipa_reference_get_written_global (node)) -+ && !bitmap_bit_p (written, id)) - return false; - } - -diff -Nurp a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c ---- a/gcc/tree-ssa-sccvn.c 2020-04-30 15:14:04.632000000 +0800 -+++ b/gcc/tree-ssa-sccvn.c 2020-04-30 15:14:56.480000000 +0800 -@@ -21,6 +21,7 @@ along with GCC; see the file COPYING3. - #include "config.h" - #include "system.h" - #include "coretypes.h" -+#include "splay-tree.h" - #include "backend.h" - #include "rtl.h" - #include "tree.h" -@@ -361,6 +362,8 @@ static void init_vn_nary_op_from_stmt (v - static void init_vn_nary_op_from_pieces (vn_nary_op_t, unsigned int, - enum tree_code, tree, tree *); - static tree vn_lookup_simplify_result (gimple_match_op *); -+static vn_reference_t vn_reference_lookup_or_insert_for_pieces -+ (tree, alias_set_type, tree, vec, tree); - - /* Return whether there is value numbering information for a given SSA name. */ - -@@ -1676,20 +1679,245 @@ vn_reference_lookup_1 (vn_reference_t vr - return NULL_TREE; - } - -+ -+/* Partial definition tracking support. */ -+ -+struct pd_range -+{ -+ HOST_WIDE_INT offset; -+ HOST_WIDE_INT size; -+}; -+ -+struct pd_data -+{ -+ tree rhs; -+ HOST_WIDE_INT offset; -+ HOST_WIDE_INT size; -+}; -+ -+/* Context for alias walking. */ -+ - struct vn_walk_cb_data - { - vn_walk_cb_data (vn_reference_t vr_, tree *last_vuse_ptr_, -- vn_lookup_kind vn_walk_kind_, bool tbaa_p_) -+ vn_lookup_kind vn_walk_kind_, bool tbaa_p_) - : vr (vr_), last_vuse_ptr (last_vuse_ptr_), vn_walk_kind (vn_walk_kind_), -- tbaa_p (tbaa_p_) -- {} -+ tbaa_p (tbaa_p_), known_ranges (NULL) -+ {} -+ ~vn_walk_cb_data (); -+ void *push_partial_def (const pd_data& pd, tree, HOST_WIDE_INT); - - vn_reference_t vr; - tree *last_vuse_ptr; - vn_lookup_kind vn_walk_kind; - bool tbaa_p; -+ -+ /* The VDEFs of partial defs we come along. */ -+ auto_vec partial_defs; -+ /* The first defs range to avoid splay tree setup in most cases. */ -+ pd_range first_range; -+ tree first_vuse; -+ splay_tree known_ranges; -+ obstack ranges_obstack; - }; - -+vn_walk_cb_data::~vn_walk_cb_data () -+{ -+ if (known_ranges) -+ { -+ splay_tree_delete (known_ranges); -+ obstack_free (&ranges_obstack, NULL); -+ } -+} -+ -+/* pd_range splay-tree helpers. */ -+ -+static int -+pd_range_compare (splay_tree_key offset1p, splay_tree_key offset2p) -+{ -+ HOST_WIDE_INT offset1 = *(HOST_WIDE_INT *)offset1p; -+ HOST_WIDE_INT offset2 = *(HOST_WIDE_INT *)offset2p; -+ if (offset1 < offset2) -+ return -1; -+ else if (offset1 > offset2) -+ return 1; -+ return 0; -+} -+ -+static void * -+pd_tree_alloc (int size, void *data_) -+{ -+ vn_walk_cb_data *data = (vn_walk_cb_data *)data_; -+ return obstack_alloc (&data->ranges_obstack, size); -+} -+ -+static void -+pd_tree_dealloc (void *, void *) -+{ -+} -+ -+/* Push PD to the vector of partial definitions returning a -+ value when we are ready to combine things with VUSE and MAXSIZEI, -+ NULL when we want to continue looking for partial defs or -1 -+ on failure. */ -+ -+void * -+vn_walk_cb_data::push_partial_def (const pd_data &pd, tree vuse, -+ HOST_WIDE_INT maxsizei) -+{ -+ if (partial_defs.is_empty ()) -+ { -+ partial_defs.safe_push (pd); -+ first_range.offset = pd.offset; -+ first_range.size = pd.size; -+ first_vuse = vuse; -+ last_vuse_ptr = NULL; -+ } -+ else -+ { -+ if (!known_ranges) -+ { -+ /* ??? Optimize the case where the second partial def -+ completes things. */ -+ gcc_obstack_init (&ranges_obstack); -+ known_ranges -+ = splay_tree_new_with_allocator (pd_range_compare, 0, 0, -+ pd_tree_alloc, -+ pd_tree_dealloc, this); -+ splay_tree_insert (known_ranges, -+ (splay_tree_key)&first_range.offset, -+ (splay_tree_value)&first_range); -+ } -+ if (known_ranges) -+ { -+ pd_range newr = { pd.offset, pd.size }; -+ splay_tree_node n; -+ pd_range *r; -+ /* Lookup the predecessor of offset + 1 and see if -+ we need to merge with it. */ -+ HOST_WIDE_INT loffset = newr.offset + 1; -+ if ((n = splay_tree_predecessor (known_ranges, -+ (splay_tree_key)&loffset)) -+ && ((r = (pd_range *)n->value), true) -+ && ranges_known_overlap_p (r->offset, r->size + 1, -+ newr.offset, newr.size)) -+ { -+ /* Ignore partial defs already covered. */ -+ if (known_subrange_p (newr.offset, newr.size, -+ r->offset, r->size)) -+ return NULL; -+ r->size = MAX (r->offset + r->size, -+ newr.offset + newr.size) - r->offset; -+ } -+ else -+ { -+ /* newr.offset wasn't covered yet, insert the -+ range. */ -+ r = XOBNEW (&ranges_obstack, pd_range); -+ *r = newr; -+ splay_tree_insert (known_ranges, -+ (splay_tree_key)&r->offset, -+ (splay_tree_value)r); -+ } -+ /* Merge r which now contains newr and is a member -+ of the splay tree with adjacent overlapping ranges. */ -+ pd_range *rafter; -+ while ((n = splay_tree_successor (known_ranges, -+ (splay_tree_key)&r->offset)) -+ && ((rafter = (pd_range *)n->value), true) -+ && ranges_known_overlap_p (r->offset, r->size + 1, -+ rafter->offset, rafter->size)) -+ { -+ r->size = MAX (r->offset + r->size, -+ rafter->offset + rafter->size) - r->offset; -+ splay_tree_remove (known_ranges, -+ (splay_tree_key)&rafter->offset); -+ } -+ partial_defs.safe_push (pd); -+ -+ /* Now we have merged newr into the range tree. -+ When we have covered [offseti, sizei] then the -+ tree will contain exactly one node which has -+ the desired properties and it will be 'r'. */ -+ if (known_subrange_p (0, maxsizei / BITS_PER_UNIT, -+ r->offset, r->size)) -+ { -+ /* Now simply native encode all partial defs -+ in reverse order. */ -+ unsigned ndefs = partial_defs.length (); -+ /* We support up to 512-bit values (for V8DFmode). */ -+ unsigned char buffer[64]; -+ int len; -+ -+ while (!partial_defs.is_empty ()) -+ { -+ pd_data pd = partial_defs.pop (); -+ if (TREE_CODE (pd.rhs) == CONSTRUCTOR) -+ /* Empty CONSTRUCTOR. */ -+ memset (buffer + MAX (0, pd.offset), -+ 0, MIN ((HOST_WIDE_INT)sizeof (buffer), pd.size)); -+ else -+ { -+ len = native_encode_expr (pd.rhs, -+ buffer + MAX (0, pd.offset), -+ sizeof (buffer - MAX (0, pd.offset)), -+ MAX (0, -pd.offset)); -+ if (len <= 0 -+ || len < (pd.size - MAX (0, -pd.offset))) -+ { -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ fprintf (dump_file, "Failed to encode %u " -+ "partial definitions\n", ndefs); -+ return (void *)-1; -+ } -+ } -+ } -+ -+ tree type = vr->type; -+ /* Make sure to interpret in a type that has a range -+ covering the whole access size. */ -+ if (INTEGRAL_TYPE_P (vr->type) -+ && maxsizei != TYPE_PRECISION (vr->type)) -+ type = build_nonstandard_integer_type (maxsizei, -+ TYPE_UNSIGNED (type)); -+ tree val = native_interpret_expr (type, buffer, -+ maxsizei / BITS_PER_UNIT); -+ /* If we chop off bits because the types precision doesn't -+ match the memory access size this is ok when optimizing -+ reads but not when called from the DSE code during -+ elimination. */ -+ if (val -+ && type != vr->type) -+ { -+ if (! int_fits_type_p (val, vr->type)) -+ val = NULL_TREE; -+ else -+ val = fold_convert (vr->type, val); -+ } -+ -+ if (val) -+ { -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ fprintf (dump_file, "Successfully combined %u " -+ "partial definitions\n", ndefs); -+ return vn_reference_lookup_or_insert_for_pieces -+ (first_vuse, -+ vr->set, vr->type, vr->operands, val); -+ } -+ else -+ { -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ fprintf (dump_file, "Failed to interpret %u " -+ "encoded partial definitions\n", ndefs); -+ return (void *)-1; -+ } -+ } -+ } -+ } -+ /* Continue looking for partial defs. */ -+ return NULL; -+} -+ - /* Callback for walk_non_aliased_vuses. Adjusts the vn_reference_t VR_ - with the current VUSE and performs the expression lookup. */ - -@@ -1701,6 +1929,11 @@ vn_reference_lookup_2 (ao_ref *op ATTRIB - vn_reference_s **slot; - hashval_t hash; - -+ /* If we have partial definitions recorded we have to go through -+ vn_reference_lookup_3. */ -+ if (!data->partial_defs.is_empty ()) -+ return NULL; -+ - if (data->last_vuse_ptr) - *data->last_vuse_ptr = vuse; - -@@ -1964,6 +2197,33 @@ public: - static rpo_elim *rpo_avail; - basic_block vn_context_bb; - -+/* Return true if BASE1 and BASE2 can be adjusted so they have the -+ same address and adjust *OFFSET1 and *OFFSET2 accordingly. -+ Otherwise return false. */ -+ -+static bool -+adjust_offsets_for_equal_base_address (tree base1, poly_int64 *offset1, -+ tree base2, poly_int64 *offset2) -+{ -+ poly_int64 soff; -+ if (TREE_CODE (base1) == MEM_REF -+ && TREE_CODE (base2) == MEM_REF) -+ { -+ if (mem_ref_offset (base1).to_shwi (&soff)) -+ { -+ base1 = TREE_OPERAND (base1, 0); -+ *offset1 += soff * BITS_PER_UNIT; -+ } -+ if (mem_ref_offset (base2).to_shwi (&soff)) -+ { -+ base2 = TREE_OPERAND (base2, 0); -+ *offset2 += soff * BITS_PER_UNIT; -+ } -+ return operand_equal_p (base1, base2, 0); -+ } -+ return operand_equal_p (base1, base2, OEP_ADDRESS_OF); -+} -+ - /* Callback for walk_non_aliased_vuses. Tries to perform a lookup - from the statement defining VUSE and if not successful tries to - translate *REFP and VR_ through an aggregate copy at the definition -@@ -2175,8 +2435,10 @@ vn_reference_lookup_3 (ao_ref *ref, tree - else - return (void *)-1; - tree len = gimple_call_arg (def_stmt, 2); -- if (known_subrange_p (offset, maxsize, offset2, -- wi::to_poly_offset (len) << LOG2_BITS_PER_UNIT)) -+ HOST_WIDE_INT leni, offset2i, offseti; -+ if (data->partial_defs.is_empty () -+ && known_subrange_p (offset, maxsize, offset2, -+ wi::to_poly_offset (len) << LOG2_BITS_PER_UNIT)) - { - tree val; - if (integer_zerop (gimple_call_arg (def_stmt, 1))) -@@ -2205,6 +2467,19 @@ vn_reference_lookup_3 (ao_ref *ref, tree - return vn_reference_lookup_or_insert_for_pieces - (vuse, vr->set, vr->type, vr->operands, val); - } -+ /* For now handle clearing memory with partial defs. */ -+ else if (integer_zerop (gimple_call_arg (def_stmt, 1)) -+ && tree_to_poly_int64 (len).is_constant (&leni) -+ && offset.is_constant (&offseti) -+ && offset2.is_constant (&offset2i) -+ && maxsize.is_constant (&maxsizei)) -+ { -+ pd_data pd; -+ pd.rhs = build_constructor (NULL_TREE, NULL); -+ pd.offset = offset2i - offseti; -+ pd.size = leni; -+ return data->push_partial_def (pd, vuse, maxsizei); -+ } - } - - /* 2) Assignment from an empty CONSTRUCTOR. */ -@@ -2215,17 +2490,37 @@ vn_reference_lookup_3 (ao_ref *ref, tree - { - tree base2; - poly_int64 offset2, size2, maxsize2; -+ HOST_WIDE_INT offset2i, size2i; - bool reverse; - base2 = get_ref_base_and_extent (gimple_assign_lhs (def_stmt), - &offset2, &size2, &maxsize2, &reverse); - if (known_size_p (maxsize2) - && known_eq (maxsize2, size2) -- && operand_equal_p (base, base2, 0) -- && known_subrange_p (offset, maxsize, offset2, size2)) -+ && adjust_offsets_for_equal_base_address (base, &offset, -+ base2, &offset2)) - { -- tree val = build_zero_cst (vr->type); -- return vn_reference_lookup_or_insert_for_pieces -- (vuse, vr->set, vr->type, vr->operands, val); -+ if (data->partial_defs.is_empty () -+ && known_subrange_p (offset, maxsize, offset2, size2)) -+ { -+ tree val = build_zero_cst (vr->type); -+ return vn_reference_lookup_or_insert_for_pieces -+ (vuse, vr->set, vr->type, vr->operands, val); -+ } -+ else if (maxsize.is_constant (&maxsizei) -+ && maxsizei % BITS_PER_UNIT == 0 -+ && offset.is_constant (&offseti) -+ && offseti % BITS_PER_UNIT == 0 -+ && offset2.is_constant (&offset2i) -+ && offset2i % BITS_PER_UNIT == 0 -+ && size2.is_constant (&size2i) -+ && size2i % BITS_PER_UNIT == 0) -+ { -+ pd_data pd; -+ pd.rhs = gimple_assign_rhs1 (def_stmt); -+ pd.offset = (offset2i - offseti) / BITS_PER_UNIT; -+ pd.size = size2i / BITS_PER_UNIT; -+ return data->push_partial_def (pd, vuse, maxsizei); -+ } - } - } - -@@ -2247,65 +2542,85 @@ vn_reference_lookup_3 (ao_ref *ref, tree - && is_gimple_min_invariant (SSA_VAL (gimple_assign_rhs1 (def_stmt)))))) - { - tree base2; -- HOST_WIDE_INT offset2, size2; -+ poly_int64 offset2, size2, maxsize2; -+ HOST_WIDE_INT offset2i, size2i; - bool reverse; -- base2 = get_ref_base_and_extent_hwi (gimple_assign_lhs (def_stmt), -- &offset2, &size2, &reverse); -+ base2 = get_ref_base_and_extent (gimple_assign_lhs (def_stmt), -+ &offset2, &size2, &maxsize2, &reverse); - if (base2 - && !reverse -- && size2 % BITS_PER_UNIT == 0 -- && offset2 % BITS_PER_UNIT == 0 -- && operand_equal_p (base, base2, 0) -- && known_subrange_p (offseti, maxsizei, offset2, size2)) -- { -- /* We support up to 512-bit values (for V8DFmode). */ -- unsigned char buffer[64]; -- int len; -- -- tree rhs = gimple_assign_rhs1 (def_stmt); -- if (TREE_CODE (rhs) == SSA_NAME) -- rhs = SSA_VAL (rhs); -- unsigned pad = 0; -- if (BYTES_BIG_ENDIAN -- && is_a (TYPE_MODE (TREE_TYPE (rhs)))) -- { -- /* On big-endian the padding is at the 'front' so -- just skip the initial bytes. */ -- fixed_size_mode mode -- = as_a (TYPE_MODE (TREE_TYPE (rhs))); -- pad = GET_MODE_SIZE (mode) - size2 / BITS_PER_UNIT; -- } -- len = native_encode_expr (rhs, -- buffer, sizeof (buffer), -- ((offseti - offset2) / BITS_PER_UNIT -- + pad)); -- if (len > 0 && len * BITS_PER_UNIT >= maxsizei) -- { -- tree type = vr->type; -- /* Make sure to interpret in a type that has a range -- covering the whole access size. */ -- if (INTEGRAL_TYPE_P (vr->type) -- && maxsizei != TYPE_PRECISION (vr->type)) -- type = build_nonstandard_integer_type (maxsizei, -- TYPE_UNSIGNED (type)); -- tree val = native_interpret_expr (type, buffer, -- maxsizei / BITS_PER_UNIT); -- /* If we chop off bits because the types precision doesn't -- match the memory access size this is ok when optimizing -- reads but not when called from the DSE code during -- elimination. */ -- if (val -- && type != vr->type) -+ && known_eq (maxsize2, size2) -+ && multiple_p (size2, BITS_PER_UNIT) -+ && multiple_p (offset2, BITS_PER_UNIT) -+ && adjust_offsets_for_equal_base_address (base, &offset, -+ base2, &offset2) -+ && offset.is_constant (&offseti) -+ && offset2.is_constant (&offset2i) -+ && size2.is_constant (&size2i)) -+ { -+ if (data->partial_defs.is_empty () -+ && known_subrange_p (offseti, maxsizei, offset2, size2)) -+ { -+ /* We support up to 512-bit values (for V8DFmode). */ -+ unsigned char buffer[64]; -+ int len; -+ -+ tree rhs = gimple_assign_rhs1 (def_stmt); -+ if (TREE_CODE (rhs) == SSA_NAME) -+ rhs = SSA_VAL (rhs); -+ unsigned pad = 0; -+ if (BYTES_BIG_ENDIAN -+ && is_a (TYPE_MODE (TREE_TYPE (rhs)))) - { -- if (! int_fits_type_p (val, vr->type)) -- val = NULL_TREE; -- else -- val = fold_convert (vr->type, val); -+ /* On big-endian the padding is at the 'front' so -+ just skip the initial bytes. */ -+ fixed_size_mode mode -+ = as_a (TYPE_MODE (TREE_TYPE (rhs))); -+ pad = GET_MODE_SIZE (mode) - size2i / BITS_PER_UNIT; - } -- -- if (val) -- return vn_reference_lookup_or_insert_for_pieces -- (vuse, vr->set, vr->type, vr->operands, val); -+ len = native_encode_expr (rhs, -+ buffer, sizeof (buffer), -+ ((offseti - offset2i) / BITS_PER_UNIT -+ + pad)); -+ if (len > 0 && len * BITS_PER_UNIT >= maxsizei) -+ { -+ tree type = vr->type; -+ /* Make sure to interpret in a type that has a range -+ covering the whole access size. */ -+ if (INTEGRAL_TYPE_P (vr->type) -+ && maxsizei != TYPE_PRECISION (vr->type)) -+ type = build_nonstandard_integer_type (maxsizei, -+ TYPE_UNSIGNED (type)); -+ tree val = native_interpret_expr (type, buffer, -+ maxsizei / BITS_PER_UNIT); -+ /* If we chop off bits because the types precision doesn't -+ match the memory access size this is ok when optimizing -+ reads but not when called from the DSE code during -+ elimination. */ -+ if (val -+ && type != vr->type) -+ { -+ if (! int_fits_type_p (val, vr->type)) -+ val = NULL_TREE; -+ else -+ val = fold_convert (vr->type, val); -+ } -+ -+ if (val) -+ return vn_reference_lookup_or_insert_for_pieces -+ (vuse, vr->set, vr->type, vr->operands, val); -+ } -+ } -+ else if (ranges_known_overlap_p (offseti, maxsizei, offset2i, size2i)) -+ { -+ pd_data pd; -+ tree rhs = gimple_assign_rhs1 (def_stmt); -+ if (TREE_CODE (rhs) == SSA_NAME) -+ rhs = SSA_VAL (rhs); -+ pd.rhs = rhs; -+ pd.offset = (offset2i - offseti) / BITS_PER_UNIT; -+ pd.size = size2i / BITS_PER_UNIT; -+ return data->push_partial_def (pd, vuse, maxsizei); - } - } - } -@@ -2316,7 +2631,12 @@ vn_reference_lookup_3 (ao_ref *ref, tree - && is_gimple_reg_type (vr->type) - && !contains_storage_order_barrier_p (vr->operands) - && gimple_assign_single_p (def_stmt) -- && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME) -+ && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME -+ /* A subset of partial defs from non-constants can be handled -+ by for example inserting a CONSTRUCTOR, a COMPLEX_EXPR or -+ even a (series of) BIT_INSERT_EXPR hoping for simplifications -+ downstream, not so much for actually doing the insertion. */ -+ && data->partial_defs.is_empty ()) - { - tree base2; - poly_int64 offset2, size2, maxsize2; -@@ -2328,7 +2648,8 @@ vn_reference_lookup_3 (ao_ref *ref, tree - if (!reverse - && known_size_p (maxsize2) - && known_eq (maxsize2, size2) -- && operand_equal_p (base, base2, 0) -+ && adjust_offsets_for_equal_base_address (base, &offset, -+ base2, &offset2) - && known_subrange_p (offset, maxsize, offset2, size2) - /* ??? We can't handle bitfield precision extracts without - either using an alternate type for the BIT_FIELD_REF and -@@ -2363,7 +2684,9 @@ vn_reference_lookup_3 (ao_ref *ref, tree - && gimple_assign_single_p (def_stmt) - && (DECL_P (gimple_assign_rhs1 (def_stmt)) - || TREE_CODE (gimple_assign_rhs1 (def_stmt)) == MEM_REF -- || handled_component_p (gimple_assign_rhs1 (def_stmt)))) -+ || handled_component_p (gimple_assign_rhs1 (def_stmt))) -+ /* Handling this is more complicated, give up for now. */ -+ && data->partial_defs.is_empty ()) - { - tree base2; - int i, j, k; -@@ -2497,7 +2820,9 @@ vn_reference_lookup_3 (ao_ref *ref, tree - || TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME) - && (TREE_CODE (gimple_call_arg (def_stmt, 1)) == ADDR_EXPR - || TREE_CODE (gimple_call_arg (def_stmt, 1)) == SSA_NAME) -- && poly_int_tree_p (gimple_call_arg (def_stmt, 2), ©_size)) -+ && poly_int_tree_p (gimple_call_arg (def_stmt, 2), ©_size) -+ /* Handling this is more complicated, give up for now. */ -+ && data->partial_defs.is_empty ()) - { - tree lhs, rhs; - ao_ref r; -diff -Nurp a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c ---- a/gcc/tree-ssa-structalias.c 2020-04-30 15:14:04.644000000 +0800 -+++ b/gcc/tree-ssa-structalias.c 2020-04-30 15:14:56.592000000 +0800 -@@ -7817,7 +7817,7 @@ associate_varinfo_to_alias (struct cgrap - { - if ((node->alias - || (node->thunk.thunk_p -- && ! node->global.inlined_to)) -+ && ! node->inlined_to)) - && node->analyzed - && !node->ifunc_resolver) - insert_vi_for_tree (node->decl, (varinfo_t)data); -@@ -7987,7 +7987,7 @@ ipa_pta_execute (void) - /* Nodes without a body are not interesting. Especially do not - visit clones at this point for now - we get duplicate decls - there for inline clones at least. */ -- if (!node->has_gimple_body_p () || node->global.inlined_to) -+ if (!node->has_gimple_body_p () || node->inlined_to) - continue; - node->get_body (); - diff --git a/ipa-struct-reorg-bugfix.patch b/ipa-struct-reorg-bugfix.patch deleted file mode 100644 index 571f608a0cb196950945e02ae3766ed80b88e3ae..0000000000000000000000000000000000000000 --- a/ipa-struct-reorg-bugfix.patch +++ /dev/null @@ -1,619 +0,0 @@ -diff -Nurp a/gcc/fold-const.c b/gcc/fold-const.c ---- a/gcc/fold-const.c 2020-09-17 02:26:36.900000000 -0400 -+++ b/gcc/fold-const.c 2020-09-17 02:27:57.368000000 -0400 -@@ -7165,15 +7165,9 @@ fold_plusminus_mult_expr (location_t loc - increased the number of multiplications necessary. */ - && TREE_CODE (arg10) != INTEGER_CST) - { -- HOST_WIDE_INT tmp1 = int01 / int11; -- HOST_WIDE_INT t = exact_log2 (absu_hwi (int11)); -- HOST_WIDE_INT size = tree_to_shwi (TYPE_SIZE_UNIT (TREE_TYPE (arg00))) * BITS_PER_UNIT; -- HOST_WIDE_INT sign_bit = HOST_WIDE_INT_1U << (size - t - 1); -- if (tmp1 & sign_bit) -- tmp1 |= HOST_WIDE_INT_M1U << (size - t); -- tree tmp2 = build_int_cst (TREE_TYPE (arg00), tmp1); - alt0 = fold_build2_loc (loc, MULT_EXPR, TREE_TYPE (arg00), arg00, -- tmp2); -+ build_int_cst (TREE_TYPE (arg00), -+ int01 / int11)); - alt1 = arg10; - same = maybe_same; - if (swap) -diff -Nurp a/gcc/ipa-struct-reorg/ipa-struct-reorg.c b/gcc/ipa-struct-reorg/ipa-struct-reorg.c ---- a/gcc/ipa-struct-reorg/ipa-struct-reorg.c 2020-09-17 02:26:36.900000000 -0400 -+++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.c 2020-09-17 02:34:04.040000000 -0400 -@@ -112,6 +112,29 @@ is_va_list_type (tree type) - return TYPE_MAIN_VARIANT (type) == TYPE_MAIN_VARIANT (va_list_type_node); - } - -+static const char * -+get_type_name (tree type) -+{ -+ const char *tname = NULL; -+ -+ if (type == NULL) -+ { -+ return NULL; -+ } -+ -+ if (TYPE_NAME (type) != NULL) -+ { -+ if (TREE_CODE (TYPE_NAME (type)) == IDENTIFIER_NODE) -+ { -+ tname = IDENTIFIER_POINTER (TYPE_NAME (type)); -+ } -+ else if (DECL_NAME (TYPE_NAME (type)) != NULL) -+ { -+ tname = IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))); -+ } -+ } -+ return tname; -+} - - /* Return the inner most type for arrays and pointers of TYPE. */ - -@@ -463,10 +486,10 @@ srtype::analyze (void) - if (fields.length () == 2) - fields[1]->clusternum = 1; - -- /* REMOVEME: FIXME: this is here for testing more testcases. */ -+ /* Otherwise we do nothing. */ - if (fields.length () >= 3) - { -- fields[1]->clusternum = 1; -+ return; - } - } - -@@ -875,6 +898,7 @@ private: - void analyze_types (void); - void clear_visited (void); - bool create_new_types (void); -+ void restore_field_type (void); - void create_new_decls (void); - srdecl *find_decl (tree); - void create_new_functions (void); -@@ -1096,6 +1120,11 @@ ipa_struct_reorg::record_type (tree type - { - tree t = TREE_TYPE (field); - process_union (t); -+ if (TREE_CODE (inner_type (t)) == UNION_TYPE -+ || TREE_CODE (inner_type (t)) == QUAL_UNION_TYPE) -+ { -+ type1->mark_escape (escape_union, NULL); -+ } - if (isvolatile_type (t)) - type1->mark_escape (escape_volatile, NULL); - escape_type e = escape_type_volatile_array_or_ptrptr (t); -@@ -2818,6 +2847,49 @@ ipa_struct_reorg::analyze_types (void) - } - } - -+/* When struct A has a struct B member, B's type info -+ is not stored in -+ TYPE_FIELDS (TREE_TYPE (TYPE_FIELDS (typeA))) -+ Try to restore B's type information. */ -+void -+ipa_struct_reorg::restore_field_type (void) -+{ -+ for (unsigned i = 0; i < types.length (); i++) -+ { -+ for (unsigned j = 0; j < types[i]->fields.length (); j++) -+ { -+ srfield *field = types[i]->fields[j]; -+ if (TREE_CODE (inner_type (field->fieldtype)) == RECORD_TYPE) -+ { -+ /* If field type has TYPE_FIELDS information, -+ we do not need to do this. */ -+ if (TYPE_FIELDS (field->type->type) != NULL) -+ { -+ continue; -+ } -+ for (unsigned k = 0; k < types.length (); k++) -+ { -+ if (i == k) -+ { -+ continue; -+ } -+ const char *type1 = get_type_name (field->type->type); -+ const char *type2 = get_type_name (types[k]->type); -+ if (type1 == NULL || type2 == NULL) -+ { -+ continue; -+ } -+ if (type1 == type2 -+ && TYPE_FIELDS (types[k]->type)) -+ { -+ field->type = types[k]; -+ } -+ } -+ } -+ } -+ } -+} -+ - /* Create all new types we want to create. */ - - bool -@@ -3669,7 +3741,7 @@ ipa_struct_reorg::rewrite_functions (voi - { - unsigned retval = 0; - -- -+ restore_field_type (); - /* Create new types, if we did not create any new types, - then don't rewrite any accesses. */ - if (!create_new_types ()) -diff -Nurp a/gcc/testsuite/gcc.c-torture/compile/20170404-1.c b/gcc/testsuite/gcc.c-torture/compile/20170404-1.c ---- a/gcc/testsuite/gcc.c-torture/compile/20170404-1.c 2020-09-17 02:26:36.904000000 -0400 -+++ b/gcc/testsuite/gcc.c-torture/compile/20170404-1.c 1969-12-31 19:00:00.000000000 -0500 -@@ -1,19 +0,0 @@ --struct a --{ -- int t, t1; --}; -- --static struct a *b; -- --void *xmalloc(int); -- -- --void f(void) --{ -- b = xmalloc (sizeof(*b)); --} -- --int g(void) --{ -- return b->t; --} -diff -Nurp a/gcc/testsuite/gcc.c-torture/compile/nested-3.c b/gcc/testsuite/gcc.c-torture/compile/nested-3.c ---- a/gcc/testsuite/gcc.c-torture/compile/nested-3.c 2020-09-17 02:26:36.904000000 -0400 -+++ b/gcc/testsuite/gcc.c-torture/compile/nested-3.c 2020-09-17 02:27:57.372000000 -0400 -@@ -1,4 +1,3 @@ --/* This used to crash Struct reorg. */ - struct a - { - int t; -diff -Nurp a/gcc/testsuite/gcc.c-torture/compile/struct-reorg-1.c b/gcc/testsuite/gcc.c-torture/compile/struct-reorg-1.c ---- a/gcc/testsuite/gcc.c-torture/compile/struct-reorg-1.c 2020-09-17 02:26:36.904000000 -0400 -+++ b/gcc/testsuite/gcc.c-torture/compile/struct-reorg-1.c 1969-12-31 19:00:00.000000000 -0500 -@@ -1,18 +0,0 @@ --#include --typedef struct { -- long laststart_offset; -- unsigned regnum; --} compile_stack_elt_t; --typedef struct { -- compile_stack_elt_t *stack; -- unsigned size; --} compile_stack_type; --void f (const char *p, const char *pend, int c) --{ -- compile_stack_type compile_stack; -- while (p != pend) -- if (c) -- compile_stack.stack = realloc (compile_stack.stack, -- (compile_stack.size << 1) -- * sizeof (compile_stack_elt_t)); --} -diff -Nurp a/gcc/testsuite/gcc.dg/pr33136-4.c b/gcc/testsuite/gcc.dg/pr33136-4.c ---- a/gcc/testsuite/gcc.dg/pr33136-4.c 2020-09-17 02:26:36.904000000 -0400 -+++ b/gcc/testsuite/gcc.dg/pr33136-4.c 1969-12-31 19:00:00.000000000 -0500 -@@ -1,59 +0,0 @@ --/* PR tree-optimization/33136 */ --/* { dg-do run } */ --/* { dg-options "-O2" } */ -- --extern void abort (void); -- --struct S --{ -- int b; -- int *c; --}; --static int d, e; -- --static struct S s; -- --static int * --__attribute__((noinline, const)) --foo (void) --{ -- return &s.b; --} -- --int * --__attribute__((noinline)) --bar (int **f) --{ -- s.c = &d; -- *f = &e; -- /* As nothing ever takes the address of any int * field in struct S, -- the write to *f can't alias with the s.c field. */ -- return s.c; --} -- --int --__attribute__((noinline)) --baz (int *x) --{ -- s.b = 1; -- *x = 4; -- /* Function foo takes address of an int field in struct S, -- so *x can alias with the s.b field (and it does in this testcase). */ -- return s.b; --} -- --int --__attribute__((noinline)) --t (void) --{ -- int *f = (int *) 0; -- return 10 * (bar (&f) != &d) + baz (foo ()); --} -- --int --main (void) --{ -- if (t () != 4) -- abort (); -- return 0; --} -diff -Nurp a/gcc/testsuite/gcc.dg/struct/struct_reorg-1.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-1.c ---- a/gcc/testsuite/gcc.dg/struct/struct_reorg-1.c 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-1.c 2020-09-17 02:27:57.372000000 -0400 -@@ -0,0 +1,24 @@ -+// { dg-do compile } -+// { dg-options "-O3 -flto-partition=one -fipa-struct-reorg -fdump-ipa-all" } -+ -+struct a -+{ -+ int t, t1; -+}; -+ -+static struct a *b; -+ -+void *xmalloc(int); -+ -+ -+void f(void) -+{ -+ b = xmalloc (sizeof(*b)); -+} -+ -+int g(void) -+{ -+ return b->t; -+} -+ -+/* { dg-final { scan-ipa-dump "No structures to transform." "struct_reorg" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/struct/struct_reorg-2.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-2.c ---- a/gcc/testsuite/gcc.dg/struct/struct_reorg-2.c 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-2.c 2020-09-17 02:27:57.372000000 -0400 -@@ -0,0 +1,29 @@ -+// { dg-do run } -+ -+#include -+ -+struct a -+{ -+ int t; -+ int t1; -+}; -+ -+__attribute__((noinline)) int f(int i, int j) -+{ -+ struct a *t; -+ struct a t1 = {i, j}; -+ t = &t1; -+ auto int g(void) __attribute__((noinline)); -+ int g(void) -+ { -+ return t->t + t->t1; -+ } -+ return g(); -+} -+ -+int main() -+{ -+ assert (f(1, 2) == 3); -+} -+ -+/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/struct/struct_reorg-3.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-3.c ---- a/gcc/testsuite/gcc.dg/struct/struct_reorg-3.c 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-3.c 2020-09-17 02:27:57.372000000 -0400 -@@ -0,0 +1,23 @@ -+// { dg-do compile } -+// { dg-options "-O3 -flto-partition=one -fipa-struct-reorg -fdump-ipa-all" } -+ -+#include -+typedef struct { -+ long laststart_offset; -+ unsigned regnum; -+} compile_stack_elt_t; -+typedef struct { -+ compile_stack_elt_t *stack; -+ unsigned size; -+} compile_stack_type; -+void f (const char *p, const char *pend, int c) -+{ -+ compile_stack_type compile_stack; -+ while (p != pend) -+ if (c) -+ compile_stack.stack = realloc (compile_stack.stack, -+ (compile_stack.size << 1) -+ * sizeof (compile_stack_elt_t)); -+} -+ -+/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/struct/struct_reorg-4.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-4.c ---- a/gcc/testsuite/gcc.dg/struct/struct_reorg-4.c 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-4.c 2020-09-17 02:27:57.372000000 -0400 -@@ -0,0 +1,59 @@ -+/* { dg-do run } */ -+ -+extern void abort (void); -+ -+struct S -+{ -+ int b; -+ int *c; -+}; -+static int d, e; -+ -+static struct S s; -+ -+static int * -+__attribute__((noinline, const)) -+foo (void) -+{ -+ return &s.b; -+} -+ -+int * -+__attribute__((noinline)) -+bar (int **f) -+{ -+ s.c = &d; -+ *f = &e; -+ /* As nothing ever takes the address of any int * field in struct S, -+ the write to *f can't alias with the s.c field. */ -+ return s.c; -+} -+ -+int -+__attribute__((noinline)) -+baz (int *x) -+{ -+ s.b = 1; -+ *x = 4; -+ /* Function foo takes address of an int field in struct S, -+ so *x can alias with the s.b field (and it does in this testcase). */ -+ return s.b; -+} -+ -+int -+__attribute__((noinline)) -+t (void) -+{ -+ int *f = (int *) 0; -+ return 10 * (bar (&f) != &d) + baz (foo ()); -+} -+ -+int -+main (void) -+{ -+ if (t () != 4) -+ abort (); -+ return 0; -+} -+ -+/* { dg-final { scan-ipa-dump "No structures to transform." "struct_reorg" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/struct/struct-reorg.exp b/gcc/testsuite/gcc.dg/struct/struct-reorg.exp ---- a/gcc/testsuite/gcc.dg/struct/struct-reorg.exp 2020-09-17 02:26:36.904000000 -0400 -+++ b/gcc/testsuite/gcc.dg/struct/struct-reorg.exp 2020-09-17 02:27:57.372000000 -0400 -@@ -1,5 +1,4 @@ --# Copyright (C) 2007, 2008, 2009, 2010 --# Free Software Foundation, Inc. -+# Copyright (C) 1997-2019 Free Software Foundation, Inc. - - # This program is free software; you can redistribute it and/or modify - # it under the terms of the GNU General Public License as published by -@@ -12,12 +11,9 @@ - # GNU General Public License for more details. - # - # You should have received a copy of the GNU General Public License --# along with this program; see the file COPYING3. If not see -+# along with GCC; see the file COPYING3. If not see - # . - --# Test the functionality of programs compiled with profile-directed structure --# rearrangement using -fprofile-generate followed by -fprofile-use. -- - load_lib gcc-dg.exp - load_lib target-supports.exp - -@@ -26,62 +22,14 @@ dg-init - torture-init - - set STRUCT_REORG_TORTURE_OPTIONS [list \ -- { -O1 } \ -- { -O1 -g } \ -- { -O2 } \ -- { -O2 -g } \ -- { -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions } \ -- { -O3 -g } \ -- { -Os } ] -- -+ { -O3 } \ -+ { -Ofast } ] - --set-torture-options $STRUCT_REORG_TORTURE_OPTIONS {{}} $LTO_TORTURE_OPTIONS -+set-torture-options $STRUCT_REORG_TORTURE_OPTIONS {{}} - --gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/wo_prof_*.c]] "" "-fipa-struct-reorg -fdump-ipa-all -fwhole-program" -+gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.c]] \ -+ "" "-fipa-struct-reorg -fdump-ipa-all -flto-partition=one -fwhole-program" - -+# All done. - torture-finish --dg-final -- --# Some targets don't support tree profiling. --if { ![check_profiling_available ""] } { -- return --} -- --# The procedures in profopt.exp need these parameters. --set tool gcc --set prof_ext "gcda" -- --# Override the list defined in profopt.exp. --set PROFOPT_OPTIONS [list {}] -- --if $tracelevel then { -- strace $tracelevel --} -- --# Load support procs. --load_lib profopt.exp -- --# These are globals used by profopt-execute. The first is options --# needed to generate profile data, the second is options to use the --# profile data. --set common "-O3 -fwhole-program" --set profile_option [concat $common " -fprofile-generate"] --set feedback_option [concat $common " -fprofile-use -fipa-struct-reorg -fdump-ipa-all"] -- --foreach src [lsort [glob -nocomplain $srcdir/$subdir/w_prof_*.c]] { -- # If we're only testing specific files and this isn't one of them, skip it. -- if ![runtest_file_p $runtests $src] then { -- continue -- } -- profopt-execute $src --} -- --set feedback_option [concat $feedback_option " --param struct-reorg-cold-struct-ratio=30"] -- --foreach src [lsort [glob -nocomplain $srcdir/$subdir/w_ratio_*.c]] { -- # If we're only testing specific files and this isn't one of them, skip it. -- if ![runtest_file_p $runtests $src] then { -- continue -- } -- profopt-execute $src --} -+dg-finish -diff -Nurp a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_str_init.c b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_str_init.c ---- a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_str_init.c 2020-09-17 02:26:36.904000000 -0400 -+++ b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_str_init.c 2020-09-17 02:27:57.372000000 -0400 -@@ -28,4 +28,4 @@ main () - } - - /*--------------------------------------------------------------------------*/ --/* { dg-final { scan-ipa-dump "has escaped...Type is used in an array" "struct_reorg" } } */ -+/* { dg-final { scan-ipa-dump "No structures to transform." "struct_reorg" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/struct/wo_prof_mult_field_peeling.c b/gcc/testsuite/gcc.dg/struct/wo_prof_mult_field_peeling.c ---- a/gcc/testsuite/gcc.dg/struct/wo_prof_mult_field_peeling.c 2020-09-17 02:26:36.904000000 -0400 -+++ b/gcc/testsuite/gcc.dg/struct/wo_prof_mult_field_peeling.c 2020-09-17 02:27:57.372000000 -0400 -@@ -38,5 +38,5 @@ main () - } - - /*--------------------------------------------------------------------------*/ --/* The structure str_t is erroneously peeled into 4 structures instead of 2. */ --/* { dg-final { scan-ipa-dump "the number of new types is 2" "struct_reorg" } } */ -+/* Two more fields structure is not splitted. */ -+/* { dg-final { scan-ipa-dump "No structures to transform." "struct_reorg" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/struct/w_prof_global_array.c b/gcc/testsuite/gcc.dg/struct/w_prof_global_array.c ---- a/gcc/testsuite/gcc.dg/struct/w_prof_global_array.c 2020-09-17 02:26:36.904000000 -0400 -+++ b/gcc/testsuite/gcc.dg/struct/w_prof_global_array.c 2020-09-17 02:27:57.372000000 -0400 -@@ -26,4 +26,4 @@ main () - } - - /*--------------------------------------------------------------------------*/ --/* { dg-final-use { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ -+/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/struct/w_prof_global_var.c b/gcc/testsuite/gcc.dg/struct/w_prof_global_var.c ---- a/gcc/testsuite/gcc.dg/struct/w_prof_global_var.c 2020-09-17 02:26:36.908000000 -0400 -+++ b/gcc/testsuite/gcc.dg/struct/w_prof_global_var.c 2020-09-17 02:27:57.372000000 -0400 -@@ -39,4 +39,4 @@ main () - } - - /*--------------------------------------------------------------------------*/ --/* { dg-final-use { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ -+/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/struct/w_prof_local_array.c b/gcc/testsuite/gcc.dg/struct/w_prof_local_array.c ---- a/gcc/testsuite/gcc.dg/struct/w_prof_local_array.c 2020-09-17 02:26:36.908000000 -0400 -+++ b/gcc/testsuite/gcc.dg/struct/w_prof_local_array.c 2020-09-17 02:27:57.372000000 -0400 -@@ -34,4 +34,4 @@ main () - } - - /*--------------------------------------------------------------------------*/ --/* { dg-final-use { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ -+/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/struct/w_prof_local_var.c b/gcc/testsuite/gcc.dg/struct/w_prof_local_var.c ---- a/gcc/testsuite/gcc.dg/struct/w_prof_local_var.c 2020-09-17 02:26:36.908000000 -0400 -+++ b/gcc/testsuite/gcc.dg/struct/w_prof_local_var.c 2020-09-17 02:27:57.372000000 -0400 -@@ -37,4 +37,4 @@ main () - } - - /*--------------------------------------------------------------------------*/ --/* { dg-final-use { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ -+/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/struct/w_prof_single_str_global.c b/gcc/testsuite/gcc.dg/struct/w_prof_single_str_global.c ---- a/gcc/testsuite/gcc.dg/struct/w_prof_single_str_global.c 2020-09-17 02:26:36.908000000 -0400 -+++ b/gcc/testsuite/gcc.dg/struct/w_prof_single_str_global.c 2020-09-17 02:27:57.372000000 -0400 -@@ -28,4 +28,4 @@ main () - } - - /*--------------------------------------------------------------------------*/ --/* { dg-final-use { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ -+/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/struct/w_prof_two_strs.c b/gcc/testsuite/gcc.dg/struct/w_prof_two_strs.c ---- a/gcc/testsuite/gcc.dg/struct/w_prof_two_strs.c 2020-09-17 02:26:36.908000000 -0400 -+++ b/gcc/testsuite/gcc.dg/struct/w_prof_two_strs.c 2020-09-17 02:27:57.372000000 -0400 -@@ -61,4 +61,4 @@ main () - } - - /*--------------------------------------------------------------------------*/ --/* { dg-final-use { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ -+/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/struct/w_ratio_cold_str.c b/gcc/testsuite/gcc.dg/struct/w_ratio_cold_str.c ---- a/gcc/testsuite/gcc.dg/struct/w_ratio_cold_str.c 2020-09-17 02:26:36.908000000 -0400 -+++ b/gcc/testsuite/gcc.dg/struct/w_ratio_cold_str.c 2020-09-17 02:27:57.372000000 -0400 -@@ -40,4 +40,4 @@ main () - - /*--------------------------------------------------------------------------*/ - /* Arrays are not handled. */ --/* { dg-final-use { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ -+/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ -diff -Nurp a/gcc/testsuite/g++.dg/torture/pr38355.C b/gcc/testsuite/g++.dg/torture/pr38355.C ---- a/gcc/testsuite/g++.dg/torture/pr38355.C 2020-09-17 02:26:36.908000000 -0400 -+++ b/gcc/testsuite/g++.dg/torture/pr38355.C 1969-12-31 19:00:00.000000000 -0500 -@@ -1,25 +0,0 @@ --// { dg-do run } --// { dg-options "-fwhole-program -fipa-struct-reorg" } --template struct A --{ -- char c; -- void foo(int); -- void bar(int i) { foo(i+1); } --}; -- --template struct B : virtual A<0> {}; -- --template inline void baz(B& b, int i) --{ -- if (i) b.bar(0); --} -- --extern template class A<0>; --extern template void baz(B<0>&, int); -- --int main() --{ -- B<0> b; -- baz(b, 0); -- return 0; --} diff --git a/ipa-struct-reorg.patch b/ipa-struct-reorg.patch deleted file mode 100644 index cf3ae235f453b657d5fe008bdfbd4e0c180d507b..0000000000000000000000000000000000000000 --- a/ipa-struct-reorg.patch +++ /dev/null @@ -1,5846 +0,0 @@ -This backport contains 1 patch from gcc personal branch tree. - -ipa-struct-reorg-2019-06-07-Update-with-Andrew-Pinski-s-struct-reorg-patch.patch -commit 6e1bd1c900533c627b5e4fbbecb41dcd7974b522 - -The original of this commit can be found on - https://gcc.gnu.org/git/?p=gcc-old.git;a=shortlog;h=refs/heads/sje/struct-reorg - -diff -Nurp a/gcc/common.opt b/gcc/common.opt ---- a/gcc/common.opt 2020-03-12 07:07:21.000000000 -0400 -+++ b/gcc/common.opt 2020-06-16 22:56:07.720000000 -0400 -@@ -1762,8 +1762,8 @@ Common Ignore - Does nothing. Preserved for backward compatibility. - - fipa-struct-reorg --Common Ignore --Does nothing. Preserved for backward compatibility. -+Common Report Var(flag_ipa_struct_reorg) Init(0) Optimization -+Perform structure layout optimizations. - - fipa-vrp - Common Report Var(flag_ipa_vrp) Optimization -diff -Nurp a/gcc/configure b/gcc/configure ---- a/gcc/configure 2020-03-12 07:08:30.000000000 -0400 -+++ b/gcc/configure 2020-06-16 22:56:07.724000000 -0400 -@@ -31614,7 +31614,7 @@ $as_echo "$as_me: executing $ac_file com - "depdir":C) $SHELL $ac_aux_dir/mkinstalldirs $DEPDIR ;; - "gccdepdir":C) - ${CONFIG_SHELL-/bin/sh} $ac_aux_dir/mkinstalldirs build/$DEPDIR -- for lang in $subdirs c-family common -+ for lang in $subdirs c-family common ipa-struct-reorg - do - ${CONFIG_SHELL-/bin/sh} $ac_aux_dir/mkinstalldirs $lang/$DEPDIR - done ;; -diff -Nurp a/gcc/configure.ac b/gcc/configure.ac ---- a/gcc/configure.ac 2020-03-12 07:07:21.000000000 -0400 -+++ b/gcc/configure.ac 2020-06-16 22:56:07.724000000 -0400 -@@ -1170,7 +1170,7 @@ AC_CHECK_HEADERS(ext/hash_map) - ZW_CREATE_DEPDIR - AC_CONFIG_COMMANDS([gccdepdir],[ - ${CONFIG_SHELL-/bin/sh} $ac_aux_dir/mkinstalldirs build/$DEPDIR -- for lang in $subdirs c-family common -+ for lang in $subdirs c-family common ipa-struct-reorg - do - ${CONFIG_SHELL-/bin/sh} $ac_aux_dir/mkinstalldirs $lang/$DEPDIR - done], [subdirs="$subdirs" ac_aux_dir=$ac_aux_dir DEPDIR=$DEPDIR]) -diff -Nurp a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi ---- a/gcc/doc/invoke.texi 2020-03-12 07:07:21.000000000 -0400 -+++ b/gcc/doc/invoke.texi 2020-06-16 22:56:07.728000000 -0400 -@@ -420,6 +420,7 @@ Objective-C and Objective-C++ Dialects}. - -finline-functions -finline-functions-called-once -finline-limit=@var{n} @gol - -finline-small-functions -fipa-cp -fipa-cp-clone @gol - -fipa-bit-cp -fipa-vrp -fipa-pta -fipa-profile -fipa-pure-const @gol -+-fipa-struct-reorg @gol - -fipa-reference -fipa-reference-addressable @gol - -fipa-stack-alignment -fipa-icf -fira-algorithm=@var{algorithm} @gol - -flive-patching=@var{level} @gol -@@ -9312,6 +9313,19 @@ Enabled by default at @option{-O} and hi - Reduce stack alignment on call sites if possible. - Enabled by default. - -+@item -fipa-struct-reorg -+@opindex fipa-struct-reorg -+Perform structure reorganization optimization, that change C-like structures -+layout in order to better utilize spatial locality. This transformation is -+affective for programs containing arrays of structures. Available in two -+compilation modes: profile-based (enabled with @option{-fprofile-generate}) -+or static (which uses built-in heuristics). It works only in whole program -+mode, so it requires @option{-fwhole-program} to be -+enabled. Structures considered @samp{cold} by this transformation are not -+affected (see @option{--param struct-reorg-cold-struct-ratio=@var{value}}). -+ -+With this flag, the program debug info reflects a new structure layout. -+ - @item -fipa-pta - @opindex fipa-pta - Perform interprocedural pointer analysis and interprocedural modification -@@ -11025,6 +11039,15 @@ In each case, the @var{value} is an inte - @var{name} are: - - @table @gcctabopt -+@item struct-reorg-cold-struct-ratio -+The threshold ratio (as a percentage) between a structure frequency -+and the frequency of the hottest structure in the program. This parameter -+is used by struct-reorg optimization enabled by @option{-fipa-struct-reorg}. -+We say that if the ratio of a structure frequency, calculated by profiling, -+to the hottest structure frequency in the program is less than this -+parameter, then structure reorganization is not applied to this structure. -+The default is 10. -+ - @item predictable-branch-outcome - When branch is predicted to be taken with probability lower than this threshold - (in percent), then it is considered well predictable. -diff -Nurp a/gcc/fold-const.c b/gcc/fold-const.c ---- a/gcc/fold-const.c 2020-03-12 07:07:21.000000000 -0400 -+++ b/gcc/fold-const.c 2020-06-16 22:56:07.732000000 -0400 -@@ -7165,9 +7165,15 @@ fold_plusminus_mult_expr (location_t loc - increased the number of multiplications necessary. */ - && TREE_CODE (arg10) != INTEGER_CST) - { -+ HOST_WIDE_INT tmp1 = int01 / int11; -+ HOST_WIDE_INT t = exact_log2 (absu_hwi (int11)); -+ HOST_WIDE_INT size = tree_to_shwi (TYPE_SIZE_UNIT (TREE_TYPE (arg00))) * BITS_PER_UNIT; -+ HOST_WIDE_INT sign_bit = HOST_WIDE_INT_1U << (size - t - 1); -+ if (tmp1 & sign_bit) -+ tmp1 |= HOST_WIDE_INT_M1U << (size - t); -+ tree tmp2 = build_int_cst (TREE_TYPE (arg00), tmp1); - alt0 = fold_build2_loc (loc, MULT_EXPR, TREE_TYPE (arg00), arg00, -- build_int_cst (TREE_TYPE (arg00), -- int01 / int11)); -+ tmp2); - alt1 = arg10; - same = maybe_same; - if (swap) -diff -Nurp a/gcc/ipa-struct-reorg/escapes.def b/gcc/ipa-struct-reorg/escapes.def ---- a/gcc/ipa-struct-reorg/escapes.def 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/ipa-struct-reorg/escapes.def 2020-06-16 22:56:07.732000000 -0400 -@@ -0,0 +1,60 @@ -+/* Copyright (C) 2016 Free Software Foundation, Inc. -+ -+This file is part of GCC. -+ -+GCC is free software; you can redistribute it and/or modify it under -+the terms of the GNU General Public License as published by the Free -+Software Foundation; either version 3, or (at your option) any later -+version. -+ -+GCC is distributed in the hope that it will be useful, but WITHOUT ANY -+WARRANTY; without even the implied warranty of MERCHANTABILITY or -+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+for more details. -+ -+You should have received a copy of the GNU General Public License -+along with GCC; see the file COPYING3. If not see -+. */ -+ -+/* Before including this file, you should define a macro: -+ DEF_ESCAPE (ENUM, TEXT) -+ -+ This macro will be called once for each escape reason. The -+ ENUM will be of type "escape_type". The TEXT is describing -+ the reason for the escape. -+*/ -+DEF_ESCAPE (escape_marked_as_used, "Type used in variable marked as used") -+DEF_ESCAPE (escape_via_global_var, "Type used via a external visible variable") -+DEF_ESCAPE (escape_via_global_init, "Type used via a global init of a variable") -+DEF_ESCAPE (escape_non_supported_allocator, "Type used by allocation which is not currently supported") -+DEF_ESCAPE (escape_dependent_type_escapes, "Type uses a type which escapes or is used by a type which escapes") -+DEF_ESCAPE (escape_var_arg_function, "Types escapes via a variable argument function") -+DEF_ESCAPE (escape_bitfields, "Types has bitfields") -+DEF_ESCAPE (escape_recusive_type, "Type has a recusive relationship") -+DEF_ESCAPE (escape_variable_sized_array, "Type has a variable sized type") -+DEF_ESCAPE (escape_external_function, "Type escapes via an external function call") -+DEF_ESCAPE (escape_visible_function, "Type escapes via expternally visible function call") -+DEF_ESCAPE (escape_pointer_function, "Type escapes via an function pointer call") -+DEF_ESCAPE (escape_unkown_field, "Type escapes via an unkown field accessed") -+DEF_ESCAPE (escape_union, "Type escapes via an union") -+DEF_ESCAPE (escape_inline_asm, "Type escapes via inline-asm") -+DEF_ESCAPE (escape_non_multiply_size, "Type escapes a pointer plus which is not a multiplicate of the size") -+DEF_ESCAPE (escape_cast_void, "Type escapes a cast to/from void*") -+DEF_ESCAPE (escape_cast_another_ptr, "Type escapes a cast to a different pointer") -+DEF_ESCAPE (escape_cast_int, "Type escapes a cast from/to intergral type") -+DEF_ESCAPE (escape_int_const, "Type escapes via integer constant") -+DEF_ESCAPE (escape_vce, "Type escapes via a VIEW_CONVERT_EXPR") -+DEF_ESCAPE (escape_array_access, "Type escapes via an array access") -+DEF_ESCAPE (escape_noclonable_function, "Type escapes via a non-clonable function") -+DEF_ESCAPE (escape_rescusive_type, "Recusive type") -+DEF_ESCAPE (escape_user_alignment, "Type has an user alignment set") -+DEF_ESCAPE (escape_volatile, "Type has an variable which is volatile") -+DEF_ESCAPE (escape_non_eq, "Type has a comparison other than equals or not equals") -+DEF_ESCAPE (escape_addr, "Type escapes via taking the address of field") -+DEF_ESCAPE (escape_cannot_change_signature, "Type used in a call that cannot change signature") -+DEF_ESCAPE (escape_non_optimize, "Type used by a function which turns off struct reorg") -+DEF_ESCAPE (escape_array, "Type is used in an array [not handled yet]") -+DEF_ESCAPE (escape_ptr_ptr, "Type is used in a pointer to a pointer [not handled yet]") -+DEF_ESCAPE (escape_return, "Type escapes via a return [not handled yet]") -+ -+#undef DEF_ESCAPE -diff -Nurp a/gcc/ipa-struct-reorg/ipa-struct-reorg.c b/gcc/ipa-struct-reorg/ipa-struct-reorg.c ---- a/gcc/ipa-struct-reorg/ipa-struct-reorg.c 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.c 2020-06-16 22:56:27.548000000 -0400 -@@ -0,0 +1,3840 @@ -+/* Struct-reorg optimizations. -+ Copyright (C) 2016-2017 Free Software Foundation, Inc. -+ Contributed by Andrew Pinski -+ -+This file is part of GCC. -+ -+GCC is free software; you can redistribute it and/or modify it under -+the terms of the GNU General Public License as published by the Free -+Software Foundation; either version 3, or (at your option) any later -+version. -+ -+GCC is distributed in the hope that it will be useful, but WITHOUT ANY -+WARRANTY; without even the implied warranty of MERCHANTABILITY or -+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+for more details. -+ -+You should have received a copy of the GNU General Public License -+along with GCC; see the file COPYING3. If not see -+. */ -+ -+/* This pass implements the structure reorganization organization (struct-reorg). -+ Right now it handles just splitting off the hottest fields for a struct of 2 fields: -+ struct s { -+ type1 field1; // Hot field -+ type2 field2; -+ }; -+ s *v; -+ into: -+ struct s_hot { -+ type1 field1; -+ }; -+ struct c_cold { -+ type2 field2; -+ }; -+ s_hot *v_hot; -+ s_cold *v_cold; -+ -+ TODO: This pass can be extended to more fields, and other alogrothims like reordering. -+ -+ This pass operate in four stages: -+ 1. All of the field accesses, declarations (struct types and pointers to that type) -+ and struct types are scanned and recorded. This includes global declarations. -+ Also record all allocation and freeing sites; this is needed for the rewriting -+ phase. -+ -+ FIXME: If there is a top-level inline-asm, the pass immediately returns. -+ -+ 2. Prune out the types which are considered escaping. -+ Examples of types which are considered escaping: -+ 1. A declaration has been marked as having the attribute used or has user defined -+ alignment (type too). -+ 2. Accesses are via a BIT_FIELD_REF. FIXME: Handle VECTOR_TYPE for this case. -+ 3. The "allocation" site is not a known builtin function. -+ 4. Casting to/from an integer. -+ -+ 3. Analyze the types for which optimization to do. -+ a. Split the fields into two different structs. -+ (FIXME: two field case handled only) -+ Look at all structs which contain two fields, if one of the fields is hotter -+ then split it and put it on the rewritting for accesses. -+ Allocations and freeing are marked to split into two functions; all uses of -+ that type will now be considered as two. -+ b. Reorder fields hottest to the coldest. TODO: Implement. -+ -+ 4. Rewrite each access and allocation and free which is marked as rewriting. -+ -+ */ -+ -+#include "config.h" -+#include "system.h" -+#include "coretypes.h" -+#include "tm.h" -+#include "tree.h" -+#include "tree-pass.h" -+#include "cgraph.h" -+#include "diagnostic-core.h" -+#include "function.h" -+#include "basic-block.h" -+#include "gimple.h" -+#include "vec.h" -+#include "tree-pretty-print.h" -+#include "gimple-pretty-print.h" -+#include "gimple-iterator.h" -+#include "cfg.h" -+#include "ssa.h" -+#include "tree-dfa.h" -+#include "fold-const.h" -+#include "tree-inline.h" -+#include "stor-layout.h" -+#include "tree-into-ssa.h" -+#include "tree-cfg.h" -+#include "symbol-summary.h" -+#include "alloc-pool.h" -+#include "ipa-prop.h" -+#include "ipa-struct-reorg.h" -+#include "tree-eh.h" -+#include "bitmap.h" -+#include "ipa-param-manipulation.h" -+#include "tree-ssa-live.h" /* For remove_unused_locals. */ -+ -+#define VOID_POINTER_P(type) (POINTER_TYPE_P (type) && VOID_TYPE_P (TREE_TYPE (type))) -+ -+namespace { -+ -+using namespace struct_reorg; -+ -+/* Return true iff TYPE is stdarg va_list type. */ -+ -+static inline bool -+is_va_list_type (tree type) -+{ -+ return TYPE_MAIN_VARIANT (type) == TYPE_MAIN_VARIANT (va_list_type_node); -+} -+ -+ -+/* Return the inner most type for arrays and pointers of TYPE. */ -+ -+tree -+inner_type (tree type) -+{ -+ while (POINTER_TYPE_P (type) -+ || TREE_CODE (type) == ARRAY_TYPE) -+ type = TREE_TYPE (type); -+ return type; -+} -+ -+/* Return true if TYPE is a type which struct reorg should handled. */ -+ -+bool -+handled_type (tree type) -+{ -+ type = inner_type (type); -+ if (TREE_CODE (type) == RECORD_TYPE) -+ return !is_va_list_type (type); -+ return false; -+} -+ -+} // anon namespace -+ -+namespace struct_reorg { -+ -+/* Constructor of srfunction. */ -+ -+srfunction::srfunction (cgraph_node *n) -+ : node (n), -+ old (NULL), -+ newnode (NULL), -+ newf (NULL) -+{ -+} -+ -+/* Add an ARG to the list of arguments for the function. */ -+ -+void -+srfunction::add_arg(srdecl *arg) -+{ -+ args.safe_push(arg); -+} -+ -+/* Dump the SRFUNCTION to the file FILE. */ -+ -+void -+srfunction::dump (FILE *file) -+{ -+ if (node) -+ { -+ fprintf (file, "function : "); -+ print_generic_expr (file, node->decl); -+ fprintf (file, " with arguments: "); -+ for (unsigned i = 0; i < args.length (); i++) -+ { -+ if (i == 0) -+ fprintf (file, "\n "); -+ else -+ fprintf (file, "\n, "); -+ args[i]->dump (file); -+ } -+ -+ fprintf (file, "\nuses globals: "); -+ for(unsigned i = 0; i < globals.length (); i++) -+ { -+ fprintf (file, "\n "); -+ globals[i]->dump (file); -+ } -+ -+ fprintf (file, "\ndecls: "); -+ } -+ else -+ fprintf (file, "globals : "); -+ -+ for(unsigned i = 0; i < decls.length (); i++) -+ { -+ fprintf (file, "\n "); -+ decls[i]->dump (file); -+ } -+} -+ -+/* Simple dump the SRFUNCTION to the file FILE; used so it is not recusive. */ -+ -+void -+srfunction::simple_dump (FILE *file) -+{ -+ print_generic_expr (file, node->decl); -+} -+ -+ -+/* Constructor of FIELD. */ -+ -+srfield::srfield (tree field, srtype *base) -+ : offset(int_byte_position (field)), -+ fieldtype (TREE_TYPE (field)), -+ fielddecl (field), -+ base(base), -+ type(NULL), -+ clusternum(0) -+{ -+ for(int i = 0;i < max_split; i++) -+ newfield[i] = NULL_TREE; -+} -+ -+/* Constructor of TYPE. */ -+ -+srtype::srtype (tree type) -+ : type (type), -+ chain_type (false), -+ escapes (does_not_escape), -+ visited (false) -+{ -+ for (int i = 0; i < max_split; i++) -+ newtype[i] = NULL_TREE; -+ -+ for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) -+ { -+ if (TREE_CODE (field) == FIELD_DECL) -+ { -+ if (DECL_BIT_FIELD (field)) -+ { -+ escapes = escape_bitfields; -+ continue; -+ } -+ else if (!DECL_SIZE (field) -+ || TREE_CODE (DECL_SIZE (field)) != INTEGER_CST) -+ { -+ escapes = escape_variable_sized_array; -+ break; -+ } -+ srfield *t = new srfield (field, this); -+ fields.safe_push(t); -+ } -+ } -+} -+ -+/* Mark the type as escaping type E at statement STMT. */ -+ -+void -+srtype::mark_escape (escape_type e, gimple *stmt) -+{ -+ /* Once the type has escaped, it should never -+ change back to non escaping. */ -+ gcc_assert (e != does_not_escape); -+ if (has_escaped ()) -+ { -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ { -+ fprintf (dump_file, "\nO type: "); -+ simple_dump (dump_file); -+ fprintf (dump_file, " has already escaped."); -+ fprintf (dump_file, " old = \"%s\" ", escape_type_string[escapes - 1]); -+ fprintf (dump_file, " new = \"%s\"\n", escape_type_string[e - 1]); -+ if (stmt) -+ print_gimple_stmt (dump_file, stmt, 0); -+ fprintf (dump_file, "\n"); -+ } -+ return; -+ } -+ escapes = e; -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ { -+ fprintf (dump_file, "\nN type: "); -+ simple_dump (dump_file); -+ fprintf (dump_file, " new = \"%s\"\n", escape_reason ()); -+ if (stmt) -+ print_gimple_stmt (dump_file, stmt, 0); -+ fprintf (dump_file, "\n"); -+ } -+} -+ -+/* Add FIELD to the list of fields that use this type. */ -+ -+void -+srtype::add_field_site (srfield *field) -+{ -+ field_sites.safe_push(field); -+} -+ -+ -+/* Constructor of DECL. */ -+ -+srdecl::srdecl (srtype *tp, tree decl, int argnum) -+ : type (tp), -+ decl (decl), -+ func (NULL_TREE), -+ argumentnum (argnum), -+ visited (false) -+{ -+ if (TREE_CODE (decl) == SSA_NAME) -+ func = current_function_decl; -+ else if (!is_global_var (decl)) -+ func = DECL_CONTEXT (decl); -+ for(int i = 0;i < max_split; i++) -+ newdecl[i] = NULL_TREE; -+} -+ -+/* Find DECL in the function. */ -+ -+srdecl * -+srfunction::find_decl (tree decl) -+{ -+ for (unsigned i = 0; i < decls.length (); i++) -+ if (decls[i]->decl == decl) -+ return decls[i]; -+ return NULL; -+} -+ -+/* Record DECL of the TYPE with argument num ARG. */ -+ -+srdecl * -+srfunction::record_decl (srtype *type, tree decl, int arg) -+{ -+ // Search for the decl to see if it is already there. -+ srdecl *decl1 = find_decl (decl); -+ -+ if (decl1) -+ return decl1; -+ -+ gcc_assert (type); -+ -+ decl1 = new srdecl (type, decl, arg); -+ decls.safe_push(decl1); -+ return decl1; -+} -+ -+/* Find the field at OFF offset. */ -+ -+srfield * -+srtype::find_field (unsigned HOST_WIDE_INT off) -+{ -+ unsigned int i; -+ srfield *field; -+ -+ /* FIXME: handle array/struct field inside the current struct. */ -+ /* NOTE This does not need to be fixed to handle libquatumn */ -+ FOR_EACH_VEC_ELT (fields, i, field) -+ { -+ if (off == field->offset) -+ return field; -+ } -+ return NULL; -+} -+ -+/* Add the function FN to the list of functions if it -+ is there not already. */ -+ -+void -+srtype::add_function (srfunction *fn) -+{ -+ unsigned decluid; -+ unsigned i; -+ decluid = DECL_UID (fn->node->decl); -+ -+ srfunction *fn1; -+ // Search for the decl to see if it is already there. -+ FOR_EACH_VEC_ELT (functions, i, fn1) -+ { -+ if (DECL_UID (fn1->node->decl) == decluid) -+ return; -+ } -+ -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ fprintf (dump_file, "Recording new function: %u.\n", decluid); -+ -+ functions.safe_push(fn); -+} -+ -+/* Dump out the type structure to FILE. */ -+ -+void -+srtype::dump (FILE *f) -+{ -+ unsigned int i; -+ srfield *field; -+ srfunction *fn; -+ sraccess *access; -+ -+ if (chain_type) -+ fprintf (f, "chain decl "); -+ -+ fprintf (f, "type : "); -+ print_generic_expr (f, type); -+ fprintf (f, "(%d) { ", TYPE_UID (type)); -+ if (escapes != does_not_escape) -+ fprintf (f, " escapes = \"%s\"\n", escape_reason ()); -+ fprintf (f, " fields = { "); -+ FOR_EACH_VEC_ELT (fields, i, field) -+ { -+ if (i == 0) -+ fprintf (f, "\n "); -+ else -+ fprintf (f, "\n, "); -+ field->dump (f); -+ } -+ fprintf (f, " }\n "); -+ fprintf (f, "\n accesses = {"); -+ FOR_EACH_VEC_ELT (accesses, i, access) -+ { -+ fprintf (f, "\n"); -+ access->dump (f); -+ } -+ fprintf (f, " }\n "); -+ fprintf (f, "\n functions = {"); -+ FOR_EACH_VEC_ELT (functions, i, fn) -+ { -+ fprintf (f, " \n"); -+ fn->simple_dump (f); -+ } -+ fprintf (f, "\n }\n"); -+ fprintf (f, "\n field_sites = {"); -+ FOR_EACH_VEC_ELT (field_sites, i, field) -+ { -+ fprintf (f, " \n"); -+ field->simple_dump (f); -+ } -+ fprintf (f, "\n }\n"); -+ fprintf (f, "}\n"); -+} -+ -+/* A simplified dump out the type structure to FILE. */ -+ -+void -+srtype::simple_dump (FILE *f) -+{ -+ print_generic_expr (f, type); -+} -+ -+/* Analyze the type and decide what to be done with it. */ -+ -+void -+srtype::analyze (void) -+{ -+ /* Chain decl types can't be split -+ so don't try. */ -+ if (chain_type) -+ return; -+ -+ /* If there is only one field then there is nothing -+ to be done. */ -+ if (fields.length () == 1) -+ return; -+ -+ /* For now we unconditionally split only structures with 2 fields -+ into 2 different structures. In future we intend to add profile -+ info and/or static heuristics to differentiate splitting process. */ -+ if (fields.length () == 2) -+ fields[1]->clusternum = 1; -+ -+ /* REMOVEME: FIXME: this is here for testing more testcases. */ -+ if (fields.length () >= 3) -+ { -+ fields[1]->clusternum = 1; -+ } -+} -+ -+/* Create the new fields for this field. */ -+ -+void -+srfield::create_new_fields (tree newtype[max_split], -+ tree newfields[max_split], -+ tree newlast[max_split]) -+{ -+ tree nt[max_split]; -+ -+ for (unsigned i = 0; i < max_split; i++) -+ nt[i] = NULL; -+ -+ if (type == NULL) -+ nt[0] = fieldtype; -+ else -+ memcpy (nt, type->newtype, sizeof(type->newtype)); -+ -+ for (unsigned i = 0; i < max_split && nt[i] != NULL; i++) -+ { -+ tree field = make_node (FIELD_DECL); -+ if (nt[1] != NULL && DECL_NAME (fielddecl)) -+ { -+ const char *tname = IDENTIFIER_POINTER (DECL_NAME (fielddecl)); -+ char id[10]; -+ char *name; -+ -+ sprintf(id, "%d", i); -+ name = concat (tname, ".reorg.", id, NULL); -+ DECL_NAME (field) = get_identifier (name); -+ free (name); -+ } -+ else -+ DECL_NAME (field) = DECL_NAME (fielddecl); -+ -+ TREE_TYPE (field) = reconstruct_complex_type (TREE_TYPE (fielddecl), nt[i]); -+ DECL_SOURCE_LOCATION (field) = DECL_SOURCE_LOCATION (fielddecl); -+ SET_DECL_ALIGN (field, DECL_ALIGN (fielddecl)); -+ DECL_USER_ALIGN (field) = DECL_USER_ALIGN (fielddecl); -+ TREE_ADDRESSABLE (field) = TREE_ADDRESSABLE (fielddecl); -+ DECL_NONADDRESSABLE_P (field) = !TREE_ADDRESSABLE (fielddecl); -+ TREE_THIS_VOLATILE (field) = TREE_THIS_VOLATILE (fielddecl); -+ DECL_CONTEXT (field) = newtype[clusternum]; -+ -+ if (newfields[clusternum] == NULL) -+ newfields[clusternum] = newlast[clusternum] = field; -+ else -+ { -+ DECL_CHAIN (newlast[clusternum]) = field; -+ newlast[clusternum] = field; -+ } -+ newfield[i] = field; -+ } -+ -+} -+ -+/* Create the new TYPE corresponding to THIS type. */ -+ -+bool -+srtype::create_new_type (void) -+{ -+ /* If the type has been visited, -+ then return if a new type was -+ created or not. */ -+ if (visited) -+ return has_new_type (); -+ -+ visited = true; -+ -+ if (escapes != does_not_escape) -+ { -+ newtype[0] = type; -+ return false; -+ } -+ -+ bool createnewtype = false; -+ unsigned maxclusters = 0; -+ -+ /* Create a new type for each field. */ -+ for (unsigned i = 0; i < fields.length (); i++) -+ { -+ srfield *field = fields[i]; -+ if (field->type) -+ createnewtype |= field->type->create_new_type (); -+ if (field->clusternum > maxclusters) -+ maxclusters = field->clusternum; -+ } -+ -+ /* If the fields' types did have a change or -+ we are not splitting the struct into two clusters, -+ then just return false and don't change the type. */ -+ if (!createnewtype && maxclusters == 0) -+ { -+ newtype[0] = type; -+ return false; -+ } -+ -+ /* Should have at most max_split clusters. */ -+ gcc_assert (maxclusters < max_split); -+ -+ tree newfields[max_split]; -+ tree newlast[max_split]; -+ -+ maxclusters++; -+ -+ const char *tname = NULL; -+ -+ if (TYPE_NAME (type) != NULL) -+ { -+ if (TREE_CODE (TYPE_NAME (type)) == IDENTIFIER_NODE) -+ tname = IDENTIFIER_POINTER (TYPE_NAME (type)); -+ else if (DECL_NAME (TYPE_NAME (type)) != NULL) -+ tname = IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))); -+ } -+ -+ for (unsigned i = 0; i < maxclusters; i++) -+ { -+ newfields[i] = NULL_TREE; -+ newlast[i] = NULL_TREE; -+ newtype[i] = make_node (RECORD_TYPE); -+ -+ char *name = NULL; -+ char id[10]; -+ sprintf(id, "%d", i); -+ if (tname) -+ { -+ name = concat (tname, ".reorg.", id, NULL); -+ TYPE_NAME (newtype[i]) = get_identifier (name); -+ free (name); -+ } -+ } -+ -+ for (unsigned i = 0; i < fields.length (); i++) -+ { -+ srfield *f = fields[i]; -+ f->create_new_fields (newtype, newfields, newlast); -+ } -+ -+ -+ /* No reason to warn about these structs since the warning would -+ have happened already. */ -+ int save_warn_padded = warn_padded; -+ warn_padded = 0; -+ -+ for (unsigned i = 0; i < maxclusters; i++) -+ { -+ TYPE_FIELDS (newtype[i]) = newfields[i]; -+ layout_type (newtype[i]); -+ } -+ -+ warn_padded = save_warn_padded; -+ -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ { -+ fprintf (dump_file, "Created %d types:\n", maxclusters); -+ for (unsigned i = 0; i < maxclusters; i++) -+ { -+ print_generic_expr (dump_file, newtype[i]); -+ fprintf (dump_file, "\n"); -+ } -+ } -+ -+ return true; -+} -+ -+/* Helper function to copy some attributes from ORIG_DECL to the NEW_DECL. */ -+ -+static inline void -+copy_var_attributes (tree new_decl, tree orig_decl) -+{ -+ DECL_ARTIFICIAL (new_decl) = 1; -+ DECL_EXTERNAL (new_decl) = DECL_EXTERNAL (orig_decl); -+ TREE_STATIC (new_decl) = TREE_STATIC (orig_decl); -+ TREE_PUBLIC (new_decl) = TREE_PUBLIC (orig_decl); -+ TREE_USED (new_decl) = TREE_USED (orig_decl); -+ DECL_CONTEXT (new_decl) = DECL_CONTEXT (orig_decl); -+ TREE_THIS_VOLATILE (new_decl) = TREE_THIS_VOLATILE (orig_decl); -+ TREE_ADDRESSABLE (new_decl) = TREE_ADDRESSABLE (orig_decl); -+ TREE_READONLY (new_decl) = TREE_READONLY (orig_decl); -+ if (is_global_var (orig_decl)) -+ set_decl_tls_model (new_decl, DECL_TLS_MODEL (orig_decl)); -+} -+ -+/* Create all of the new decls (SSA_NAMES included) for THIS function. */ -+ -+void -+srfunction::create_new_decls (void) -+{ -+ /* If this function has been cloned, we don't need to -+ create the new decls. */ -+ if (newnode) -+ return; -+ -+ if (node) -+ set_cfun (DECL_STRUCT_FUNCTION (node->decl)); -+ -+ for (unsigned i = 0; i < decls.length (); i++) -+ { -+ srdecl *decl = decls[i]; -+ srtype *type = decl->type; -+ /* If the type of the decl does not change, -+ then don't create a new decl. */ -+ if (!type->has_new_type ()) -+ { -+ decl->newdecl[0] = decl->decl; -+ continue; -+ } -+ -+ /* Handle SSA_NAMEs. */ -+ if (TREE_CODE (decl->decl) == SSA_NAME) -+ { -+ tree newtype1[max_split]; -+ tree inner = SSA_NAME_VAR (decl->decl); -+ tree newinner[max_split]; -+ memset (newinner, 0, sizeof(newinner)); -+ for (unsigned j = 0; j < max_split && type->newtype[j]; j++) -+ newtype1[j] = reconstruct_complex_type (TREE_TYPE (decls[i]->decl), type->newtype[j]); -+ if (inner) -+ { -+ srdecl *in = find_decl (inner); -+ gcc_assert (in); -+ memcpy (newinner, in->newdecl, sizeof(newinner)); -+ } -+ tree od = decls[i]->decl; -+ /* Create the new ssa names and copy some attributes from the old one. */ -+ for (unsigned j = 0; j < max_split && type->newtype[j]; j++) -+ { -+ tree nd = make_ssa_name (newinner[j] ? newinner[j] : newtype1[j]); -+ decl->newdecl[j] = nd; -+ /* If the old decl was a default defition, handle it specially. */ -+ if (SSA_NAME_IS_DEFAULT_DEF (od)) -+ { -+ SSA_NAME_IS_DEFAULT_DEF (nd) = true; -+ SSA_NAME_DEF_STMT (nd) = gimple_build_nop (); -+ -+ /* Set the default definition for the ssaname if needed. */ -+ if (inner) -+ { -+ gcc_assert (newinner[j]); -+ set_ssa_default_def (cfun, newinner[j], nd); -+ } -+ } -+ SSA_NAME_OCCURS_IN_ABNORMAL_PHI (nd) -+ = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (od); -+ statistics_counter_event (cfun, "Create new ssa_name", 1); -+ } -+ } -+ else if (TREE_CODE (decls[i]->decl) == VAR_DECL) -+ { -+ tree orig_var = decl->decl; -+ const char *tname = NULL; -+ if (DECL_NAME (orig_var)) -+ tname = IDENTIFIER_POINTER (DECL_NAME (orig_var)); -+ for (unsigned j = 0; j < max_split && type->newtype[j]; j++) -+ { -+ tree new_name = NULL; -+ char *name = NULL; -+ char id[10]; -+ sprintf(id, "%d", j); -+ if (tname) -+ { -+ name = concat (tname, ".reorg.", id, NULL); -+ new_name = get_identifier (name); -+ free (name); -+ } -+ tree newtype1 = reconstruct_complex_type (TREE_TYPE (orig_var), type->newtype[j]); -+ decl->newdecl[j] = build_decl (DECL_SOURCE_LOCATION (orig_var), -+ VAR_DECL, new_name, newtype1); -+ copy_var_attributes (decl->newdecl[j], orig_var); -+ if (!is_global_var (orig_var)) -+ add_local_decl (cfun, decl->newdecl[j]); -+ else -+ varpool_node::add (decl->newdecl[j]); -+ statistics_counter_event (cfun, "Create new var decl", 1); -+ } -+ } -+ /* Paramater decls are already handled in create_new_functions. */ -+ else if (TREE_CODE (decls[i]->decl) == PARM_DECL) -+ ; -+ else -+ internal_error ("Unhandled decl type stored"); -+ -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ { -+ fprintf (dump_file, "Created New decls for decl:\n"); -+ fprintf (dump_file, "\n"); -+ decls[i]->dump (dump_file); -+ fprintf (dump_file, "\n"); -+ for (unsigned j = 0; j < max_split && decls[i]->newdecl[j]; j++) -+ { -+ print_generic_expr (dump_file, decls[i]->newdecl[j]); -+ fprintf (dump_file, "\n"); -+ } -+ fprintf (dump_file, "\n"); -+ } -+ } -+ -+ set_cfun (NULL); -+ -+} -+ -+/* Dump out the field structure to FILE. */ -+ -+void -+srfield::dump (FILE *f) -+{ -+ fprintf (f, "field (%d) { ", DECL_UID (fielddecl)); -+ fprintf (f, "base = "); -+ base->simple_dump (f); -+ fprintf (f, ", offset = " HOST_WIDE_INT_PRINT_DEC, offset); -+ fprintf (f, ", type = "); -+ print_generic_expr (f, fieldtype); -+ if (type) -+ { -+ fprintf (f, "( srtype = "); -+ type->simple_dump (f); -+ fprintf (f, ")"); -+ } -+ fprintf (f, "\n}\n"); -+} -+ -+ -+/* A simplified dump out the field structure to FILE. */ -+ -+void -+srfield::simple_dump (FILE *f) -+{ -+ fprintf (f, "field (%d)", DECL_UID (fielddecl)); -+} -+ -+/* Dump out the access structure to FILE. */ -+ -+void -+sraccess::dump (FILE *f) -+{ -+ fprintf (f, "access { "); -+ fprintf (f, "type = '("); -+ type->simple_dump (f); -+ fprintf (f, ")'"); -+ if (field) -+ { -+ fprintf (f, ", field = '("); -+ field->simple_dump (f); -+ fprintf (f, ")'"); -+ } -+ else -+ fprintf (f, ", whole type"); -+ fprintf (f, " in function: %s/%d", node->name (), node->order); -+ fprintf (f, ", stmt:\n"); -+ print_gimple_stmt (f, stmt, 0); -+ fprintf (f, "\n }\n"); -+ -+} -+ -+/* Dump out the decl structure to FILE. */ -+ -+void -+srdecl::dump (FILE *file) -+{ -+ if (!func) -+ fprintf (file, "global "); -+ if (argumentnum != -1) -+ fprintf (file, "argument(%d) ", argumentnum); -+ fprintf (file, "decl: "); -+ print_generic_expr (file, decl); -+ fprintf (file, " type: "); -+ type->simple_dump (file); -+} -+ -+} // namespace struct_reorg -+ -+namespace { -+ -+struct ipa_struct_reorg -+{ -+ // Constructors -+ ipa_struct_reorg(void) -+ : current_function (NULL), -+ done_recording(false) -+ { -+ } -+ -+ // public methods -+ unsigned execute(void); -+ void mark_type_as_escape (tree type, escape_type, gimple *stmt = NULL); -+private: -+ // fields -+ auto_vec_del types; -+ auto_vec_del functions; -+ srglobal globals; -+ srfunction *current_function; -+ -+ bool done_recording; -+ -+ // private methods -+ void dump_types (FILE *f); -+ void dump_types_escaped (FILE *f); -+ void dump_functions (FILE *f); -+ void record_accesses (void); -+ void detect_cycles (void); -+ bool walk_field_for_cycles (srtype*); -+ void prune_escaped_types (void); -+ void propagate_escape (void); -+ void analyze_types (void); -+ void clear_visited (void); -+ bool create_new_types (void); -+ void create_new_decls (void); -+ srdecl *find_decl (tree); -+ void create_new_functions (void); -+ void create_new_args (cgraph_node *new_node); -+ unsigned rewrite_functions (void); -+ srdecl *record_var (tree decl, escape_type escapes = does_not_escape, int arg = -1); -+ srfunction *record_function (cgraph_node *node); -+ srfunction *find_function (cgraph_node *node); -+ srtype *record_type (tree type); -+ void process_union (tree type); -+ srtype *find_type (tree type); -+ void maybe_record_stmt (cgraph_node *, gimple *); -+ void maybe_record_assign (cgraph_node *, gassign *); -+ void maybe_record_call (cgraph_node *, gcall *); -+ void maybe_record_allocation_site (cgraph_node *, gimple *); -+ void record_stmt_expr (tree expr, cgraph_node *node, gimple *stmt); -+ void mark_expr_escape(tree, escape_type, gimple *stmt); -+ tree allocate_size (srtype *t, gimple *stmt); -+ -+ void mark_decls_in_as_not_needed (tree fn); -+ -+ bool rewrite_stmt (gimple*, gimple_stmt_iterator *); -+ bool rewrite_assign (gassign *, gimple_stmt_iterator *); -+ bool rewrite_call (gcall *, gimple_stmt_iterator *); -+ bool rewrite_cond (gcond *, gimple_stmt_iterator *); -+ bool rewrite_debug (gimple *, gimple_stmt_iterator *); -+ bool rewrite_phi (gphi *); -+ bool rewrite_expr (tree expr, tree newexpr[max_split], bool ignore_missing_decl = false); -+ bool rewrite_lhs_rhs (tree lhs, tree rhs, tree newlhs[max_split], tree newrhs[max_split]); -+ bool get_type_field (tree expr, tree &base, bool &indirect, srtype *&type, srfield *&field, bool &realpart, bool &imagpart, bool &address, bool should_create = false, bool can_escape = false); -+ bool wholeaccess (tree expr, tree base, tree accesstype, srtype *t); -+ -+ void check_definition (srdecl *decl, vec&); -+ void check_uses (srdecl *decl, vec&); -+ void check_use (srdecl *decl, gimple *stmt, vec&); -+ void check_type_and_push (tree newdecl, srtype *type, vec &worklist, gimple *stmt); -+ void check_other_side (srdecl *decl, tree other, gimple *stmt, vec &worklist); -+ -+ void find_vars (gimple *stmt); -+ void find_var (tree expr, gimple *stmt); -+ void mark_types_asm (gasm *astmt); -+ -+ bool has_rewritten_type (srfunction*); -+ void maybe_mark_or_record_other_side (tree side, tree other, gimple *stmt); -+}; -+ -+/* Dump all of the recorded types to file F. */ -+ -+void -+ipa_struct_reorg::dump_types (FILE *f) -+{ -+ unsigned i; -+ srtype *type; -+ FOR_EACH_VEC_ELT (types, i, type) -+ { -+ type->dump(f); -+ } -+ fprintf (f, "\n"); -+} -+ -+/* Dump all of the recorded types to file F. */ -+ -+void -+ipa_struct_reorg::dump_types_escaped (FILE *f) -+{ -+ unsigned i; -+ srtype *type; -+ FOR_EACH_VEC_ELT (types, i, type) -+ { -+ if (type->has_escaped ()) -+ { -+ type->simple_dump (f); -+ fprintf (f, " has escaped: \"%s\"\n", type->escape_reason()); -+ } -+ } -+ fprintf (f, "\n"); -+} -+ -+ -+/* Dump all of the record functions to file F. */ -+ -+void -+ipa_struct_reorg::dump_functions (FILE *f) -+{ -+ unsigned i; -+ srfunction *fn; -+ -+ fprintf (f, "\n\n"); -+ globals.dump (f); -+ fprintf (f, "\n\n"); -+ FOR_EACH_VEC_ELT (functions, i, fn) -+ { -+ fn->dump(f); -+ fprintf (f, "\n"); -+ } -+ fprintf (f, "\n\n"); -+} -+ -+/* Find the recorded srtype corresponding to TYPE. */ -+ -+srtype * -+ipa_struct_reorg::find_type (tree type) -+{ -+ unsigned i; -+ /* Get the main variant as we are going -+ to find that type only. */ -+ type = TYPE_MAIN_VARIANT (type); -+ -+ srtype *type1; -+ // Search for the type to see if it is already there. -+ FOR_EACH_VEC_ELT (types, i, type1) -+ { -+ if (types_compatible_p (type1->type, type)) -+ return type1; -+ } -+ return NULL; -+} -+ -+/* Is TYPE a volatile type or one which points -+ to a volatile type. */ -+ -+bool isvolatile_type (tree type) -+{ -+ if (TYPE_VOLATILE (type)) -+ return true; -+ while (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE) -+ { -+ type = TREE_TYPE (type); -+ if (TYPE_VOLATILE (type)) -+ return true; -+ } -+ return false; -+} -+ -+/* Is TYPE an array type or points to an array type. */ -+ -+bool isarraytype (tree type) -+{ -+ if (TREE_CODE (type) == ARRAY_TYPE) -+ return true; -+ while (POINTER_TYPE_P (type)) -+ { -+ type = TREE_TYPE (type); -+ if (TREE_CODE (type) == ARRAY_TYPE) -+ return true; -+ } -+ return false; -+} -+ -+/* Is TYPE a pointer to another pointer. */ -+ -+bool isptrptr (tree type) -+{ -+ bool firstptr = false; -+ while (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE) -+ { -+ if (POINTER_TYPE_P (type)) -+ { -+ if (firstptr) -+ return true; -+ firstptr = true; -+ } -+ type = TREE_TYPE (type); -+ } -+ return false; -+} -+ -+/* Return the escape type which corresponds to if -+ this is an volatile type, an array type or a pointer -+ to a pointer type. */ -+ -+escape_type escape_type_volatile_array_or_ptrptr (tree type) -+{ -+ if (isvolatile_type (type)) -+ return escape_volatile; -+ if (isarraytype (type)) -+ return escape_array; -+ if (isptrptr (type)) -+ return escape_ptr_ptr; -+ return does_not_escape; -+} -+ -+/* Record TYPE if not already recorded. */ -+ -+srtype * -+ipa_struct_reorg::record_type (tree type) -+{ -+ unsigned typeuid; -+ -+ /* Get the main variant as we are going -+ to record that type only. */ -+ type = TYPE_MAIN_VARIANT (type); -+ typeuid = TYPE_UID (type); -+ -+ srtype *type1; -+ -+ type1 = find_type (type); -+ if (type1) -+ return type1; -+ -+ /* If already done recording just return NULL. */ -+ if (done_recording) -+ return NULL; -+ -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ fprintf (dump_file, "Recording new type: %u.\n", typeuid); -+ -+ type1 = new srtype (type); -+ types.safe_push(type1); -+ -+ /* If the type has an user alignment set, -+ that means the user most likely already setup the type. */ -+ if (TYPE_USER_ALIGN (type)) -+ type1->mark_escape (escape_user_alignment, NULL); -+ -+ for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) -+ { -+ if (TREE_CODE (field) == FIELD_DECL) -+ { -+ tree t = TREE_TYPE (field); -+ process_union (t); -+ if (isvolatile_type (t)) -+ type1->mark_escape (escape_volatile, NULL); -+ escape_type e = escape_type_volatile_array_or_ptrptr (t); -+ if (e != does_not_escape) -+ type1->mark_escape (e, NULL); -+ if (handled_type (t)) -+ { -+ srtype *t1 = record_type (inner_type (t)); -+ srfield *f = type1->find_field (int_byte_position (field)); -+ /* We might have an variable sized type which we don't set the handle. */ -+ if (f) -+ { -+ f->type = t1; -+ t1->add_field_site (f); -+ } -+ if (t1 == type1) -+ type1->mark_escape (escape_rescusive_type, NULL); -+ } -+ } -+ } -+ -+ return type1; -+} -+ -+/* Mark TYPE as escaping with ESCAPES as the reason. */ -+ -+void -+ipa_struct_reorg::mark_type_as_escape (tree type, escape_type escapes, gimple *stmt) -+{ -+ if (handled_type (type)) -+ { -+ srtype *stype = record_type (inner_type (type)); -+ -+ if (!stype) -+ return; -+ -+ stype->mark_escape (escapes, stmt); -+ } -+} -+ -+/* Maybe process the union of type TYPE, such that marking all of the fields' -+ types as being escaping. */ -+ -+void -+ipa_struct_reorg::process_union (tree type) -+{ -+ static hash_set unions_recorded; -+ -+ type = inner_type (type); -+ if (TREE_CODE (type) != UNION_TYPE -+ && TREE_CODE (type) != QUAL_UNION_TYPE) -+ return; -+ -+ type = TYPE_MAIN_VARIANT (type); -+ -+ /* We already processed this type. */ -+ if (unions_recorded.add (type)) -+ return; -+ -+ for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) -+ { -+ if (TREE_CODE (field) == FIELD_DECL) -+ { -+ mark_type_as_escape (TREE_TYPE (field), escape_union); -+ process_union (TREE_TYPE (field)); -+ } -+ } -+} -+ -+/* Used by record_var function as a callback to walk_tree. -+ Mark the type as escaping if it has expressions which -+ cannot be converted for global initializations. */ -+ -+static tree -+record_init_types (tree *tp, int *walk_subtrees, void *data) -+{ -+ ipa_struct_reorg *c = (ipa_struct_reorg *)data; -+ switch (TREE_CODE (*tp)) -+ { -+ CASE_CONVERT: -+ case COMPONENT_REF: -+ case VIEW_CONVERT_EXPR: -+ case ARRAY_REF: -+ { -+ tree typeouter = TREE_TYPE (*tp); -+ tree typeinner = TREE_TYPE (TREE_OPERAND (*tp, 0)); -+ c->mark_type_as_escape (typeouter, escape_via_global_init); -+ c->mark_type_as_escape (typeinner, escape_via_global_init); -+ break; -+ } -+ case INTEGER_CST: -+ if (!integer_zerop (*tp)) -+ c->mark_type_as_escape (TREE_TYPE (*tp), escape_via_global_init); -+ break; -+ case VAR_DECL: -+ case PARM_DECL: -+ case FIELD_DECL: -+ c->mark_type_as_escape (TREE_TYPE (*tp), escape_via_global_init); -+ *walk_subtrees = false; -+ break; -+ default: -+ *walk_subtrees = true; -+ break; -+ } -+ return NULL_TREE; -+} -+ -+/* Record var DECL; optionally specify the escape reason and the argument -+ number in a function. */ -+ -+srdecl * -+ipa_struct_reorg::record_var (tree decl, escape_type escapes, int arg) -+{ -+ srtype *type; -+ srdecl *sd = NULL; -+ -+ process_union (TREE_TYPE (decl)); -+ -+ /* */ -+ if (handled_type (TREE_TYPE (decl))) -+ { -+ type = record_type (inner_type (TREE_TYPE (decl))); -+ escape_type e; -+ -+ if (done_recording && !type) -+ return NULL; -+ -+ gcc_assert (type); -+ if (TREE_CODE (decl) == VAR_DECL && is_global_var (decl)) -+ sd = globals.record_decl (type, decl, arg); -+ else -+ { -+ gcc_assert (current_function); -+ sd = current_function->record_decl (type, decl, arg); -+ } -+ -+ /* If the variable has the "used" attribute, then treat the type as escaping. */ -+ if (escapes != does_not_escape) -+ e = escapes; -+ else if (TREE_CODE (decl) != SSA_NAME && DECL_PRESERVE_P (decl)) -+ e = escape_marked_as_used; -+ else if (TREE_THIS_VOLATILE (decl)) -+ e = escape_volatile; -+ else if (TREE_CODE (decl) != SSA_NAME && DECL_USER_ALIGN (decl)) -+ e = escape_user_alignment; -+ else if (TREE_CODE (decl) != SSA_NAME && TREE_STATIC (decl) && TREE_PUBLIC (decl)) -+ e = escape_via_global_var; -+ /* We don't have an initlizer. */ -+ else if (TREE_CODE (decl) != SSA_NAME && DECL_INITIAL (decl) == error_mark_node) -+ e = escape_via_global_var; -+ else -+ e = escape_type_volatile_array_or_ptrptr (TREE_TYPE (decl)); -+ -+ if (e != does_not_escape) -+ type->mark_escape (e, NULL); -+ } -+ -+ /* Record the initial usage of variables as types escapes. */ -+ if (TREE_CODE (decl) != SSA_NAME && TREE_STATIC (decl) && DECL_INITIAL (decl)) -+ { -+ walk_tree_without_duplicates (&DECL_INITIAL (decl), record_init_types, this); -+ if (!integer_zerop (DECL_INITIAL (decl)) -+ && DECL_INITIAL (decl) != error_mark_node) -+ mark_type_as_escape (TREE_TYPE (decl), escape_via_global_init); -+ } -+ return sd; -+} -+ -+/* Find void* ssa_names which are used inside MEM[] or if we have &a.c, -+ mark the type as escaping. */ -+ -+void -+ipa_struct_reorg::find_var (tree expr, gimple *stmt) -+{ -+ /* If we have VCE mark the outer type as escaping and the inner one -+ Also mark the inner most operand. */ -+ if (TREE_CODE (expr) == VIEW_CONVERT_EXPR) -+ { -+ mark_type_as_escape (TREE_TYPE (expr), escape_vce, stmt); -+ mark_type_as_escape (TREE_TYPE (TREE_OPERAND (expr, 0)), -+ escape_vce, stmt); -+ } -+ -+ /* If we have &b.c then we need to mark the type of b -+ as escaping as tracking a will be hard. */ -+ if (TREE_CODE (expr) == ADDR_EXPR -+ || TREE_CODE (expr) == VIEW_CONVERT_EXPR) -+ { -+ tree r = TREE_OPERAND (expr, 0); -+ if (handled_component_p (r) -+ || TREE_CODE (r) == MEM_REF) -+ { -+ while (handled_component_p (r) -+ || TREE_CODE (r) == MEM_REF) -+ { -+ if (TREE_CODE (r) == VIEW_CONVERT_EXPR) -+ { -+ mark_type_as_escape (TREE_TYPE (r), escape_vce, stmt); -+ mark_type_as_escape (TREE_TYPE (TREE_OPERAND (r, 0)), -+ escape_vce, stmt); -+ } -+ if (TREE_CODE (r) == MEM_REF) -+ mark_type_as_escape (TREE_TYPE (TREE_OPERAND (r, 1)), -+ escape_addr, stmt); -+ r = TREE_OPERAND (r, 0); -+ } -+ mark_expr_escape (r, escape_addr, stmt); -+ } -+ } -+ -+ tree base; -+ bool indirect; -+ srtype *type; -+ srfield *field; -+ bool realpart, imagpart, address; -+ get_type_field (expr, base, indirect, type, field, -+ realpart, imagpart, address, true, true); -+} -+ -+ -+void -+ipa_struct_reorg::find_vars (gimple *stmt) -+{ -+ gasm *astmt; -+ switch (gimple_code (stmt)) -+ { -+ case GIMPLE_ASSIGN: -+ if (gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS -+ || gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR) -+ { -+ tree lhs = gimple_assign_lhs (stmt); -+ tree rhs = gimple_assign_rhs1 (stmt); -+ find_var (gimple_assign_lhs (stmt), stmt); -+ find_var (gimple_assign_rhs1 (stmt), stmt); -+ if (TREE_CODE (lhs) == SSA_NAME -+ && VOID_POINTER_P (TREE_TYPE (lhs)) -+ && handled_type (TREE_TYPE (rhs))) -+ { -+ srtype *t = find_type (inner_type (TREE_TYPE (rhs))); -+ srdecl *d = find_decl (lhs); -+ if (!d && t) -+ current_function->record_decl (t, lhs, -1); -+ } -+ if (TREE_CODE (rhs) == SSA_NAME -+ && VOID_POINTER_P (TREE_TYPE (rhs)) -+ && handled_type (TREE_TYPE (lhs))) -+ { -+ srtype *t = find_type (inner_type (TREE_TYPE (lhs))); -+ srdecl *d = find_decl (rhs); -+ if (!d && t) -+ current_function->record_decl (t, rhs, -1); -+ } -+ } -+ break; -+ -+ case GIMPLE_CALL: -+ if (gimple_call_lhs (stmt)) -+ find_var (gimple_call_lhs (stmt), stmt); -+ -+ if (gimple_call_chain (stmt)) -+ find_var (gimple_call_chain (stmt), stmt); -+ -+ for (unsigned i = 0; i < gimple_call_num_args (stmt); i++) -+ find_var (gimple_call_arg (stmt, i), stmt); -+ break; -+ -+ case GIMPLE_ASM: -+ astmt = as_a (stmt); -+ for (unsigned i = 0; i < gimple_asm_ninputs (astmt); i++) -+ find_var (TREE_VALUE (gimple_asm_input_op (astmt, i)), stmt); -+ for (unsigned i = 0; i < gimple_asm_noutputs (astmt); i++) -+ find_var (TREE_VALUE (gimple_asm_output_op (astmt, i)), stmt); -+ mark_types_asm (astmt); -+ break; -+ -+ case GIMPLE_RETURN: -+ { -+ tree expr = gimple_return_retval (as_a(stmt)); -+ if (expr) -+ find_var (expr, stmt); -+ /* return &a; should mark the type of a as escaping through a return. */ -+ if (expr && TREE_CODE (expr) == ADDR_EXPR) -+ { -+ expr = TREE_OPERAND (expr, 0); -+ srdecl *d = find_decl (expr); -+ if (d) -+ d->type->mark_escape (escape_return, stmt); -+ } -+ } -+ break; -+ -+ default: -+ break; -+ } -+} -+ -+/* Maybe record access of statement for further analaysis. */ -+ -+void -+ipa_struct_reorg::maybe_record_stmt (cgraph_node *node, gimple *stmt) -+{ -+ switch (gimple_code (stmt)) -+ { -+ case GIMPLE_ASSIGN: -+ maybe_record_assign (node, as_a (stmt)); -+ break; -+ case GIMPLE_CALL: -+ maybe_record_call (node, as_a (stmt)); -+ break; -+ case GIMPLE_DEBUG: -+ break; -+ case GIMPLE_GOTO: -+ case GIMPLE_SWITCH: -+ break; -+ default: -+ break; -+ } -+} -+ -+/* This function checks whether ARG is a result of multiplication -+ of some number by STRUCT_SIZE. If yes, the function returns true -+ and this number is filled into NUM. */ -+ -+static bool -+is_result_of_mult (tree arg, tree *num, tree struct_size) -+{ -+ if (!struct_size -+ || TREE_CODE (struct_size) != INTEGER_CST -+ || integer_zerop (struct_size)) -+ return false; -+ -+ /* If we have a integer, just check if it is a multiply of STRUCT_SIZE. */ -+ if (TREE_CODE (arg) == INTEGER_CST) -+ { -+ if (integer_zerop (size_binop (FLOOR_MOD_EXPR, arg, struct_size))) -+ { -+ *num = size_binop (FLOOR_DIV_EXPR, arg, struct_size); -+ return true; -+ } -+ return false; -+ } -+ gimple *size_def_stmt = SSA_NAME_DEF_STMT (arg); -+ -+ /* If the allocation statement was of the form -+ D.2229_10 = (D.2228_9); -+ then size_def_stmt can be D.2228_9 = num.3_8 * 8; */ -+ -+ while (size_def_stmt && is_gimple_assign (size_def_stmt)) -+ { -+ tree lhs = gimple_assign_lhs (size_def_stmt); -+ -+ /* We expect temporary here. */ -+ if (!is_gimple_reg (lhs)) -+ return false; -+ -+ // FIXME: this should handle SHIFT also. -+ if (gimple_assign_rhs_code (size_def_stmt) == PLUS_EXPR) -+ { -+ tree num1, num2; -+ tree arg0 = gimple_assign_rhs1 (size_def_stmt); -+ tree arg1 = gimple_assign_rhs2 (size_def_stmt); -+ if (!is_result_of_mult (arg0, &num1, struct_size)) -+ return false; -+ if (!is_result_of_mult (arg1, &num2, struct_size)) -+ return false; -+ *num = size_binop (PLUS_EXPR, num1, num2); -+ return true; -+ } -+ if (gimple_assign_rhs_code (size_def_stmt) == MULT_EXPR) -+ { -+ tree arg0 = gimple_assign_rhs1 (size_def_stmt); -+ tree arg1 = gimple_assign_rhs2 (size_def_stmt); -+ tree num1; -+ -+ if (is_result_of_mult (arg0, &num1, struct_size)) -+ { -+ *num = size_binop (MULT_EXPR, arg1, num1); -+ return true; -+ } -+ if (is_result_of_mult (arg1, &num1, struct_size)) -+ { -+ *num = size_binop (MULT_EXPR, arg0, num1); -+ return true; -+ } -+ -+ *num = NULL_TREE; -+ return false; -+ } -+ else if (gimple_assign_rhs_code (size_def_stmt) == SSA_NAME) -+ { -+ arg = gimple_assign_rhs1 (size_def_stmt); -+ size_def_stmt = SSA_NAME_DEF_STMT (arg); -+ } -+ else -+ { -+ *num = NULL_TREE; -+ return false; -+ } -+ } -+ -+ *num = NULL_TREE; -+ return false; -+} -+ -+/* Return TRUE if STMT is an allocation statement that is handled. */ -+ -+static bool -+handled_allocation_stmt (gimple *stmt) -+{ -+ if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC) -+ || gimple_call_builtin_p (stmt, BUILT_IN_MALLOC) -+ || gimple_call_builtin_p (stmt, BUILT_IN_CALLOC) -+ || gimple_call_builtin_p (stmt, BUILT_IN_ALIGNED_ALLOC) -+ || gimple_call_builtin_p (stmt, BUILT_IN_ALLOCA) -+ || gimple_call_builtin_p (stmt, BUILT_IN_ALLOCA_WITH_ALIGN)) -+ return true; -+ return false; -+} -+ -+ -+/* Returns the allocated size / T size for STMT. That is the number of -+ elements in the array allocated. */ -+ -+tree -+ipa_struct_reorg::allocate_size (srtype *type, gimple *stmt) -+{ -+ if (!stmt -+ || gimple_code (stmt) != GIMPLE_CALL -+ || !handled_allocation_stmt (stmt)) -+ { -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ { -+ fprintf (dump_file, "\nNot a allocate statment:\n"); -+ print_gimple_stmt (dump_file, stmt, 0); -+ fprintf (dump_file, "\n"); -+ } -+ return NULL; -+ } -+ -+ if (type->has_escaped ()) -+ return NULL; -+ -+ tree struct_size = TYPE_SIZE_UNIT (type->type); -+ -+ tree size = gimple_call_arg (stmt, 0); -+ -+ if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC) -+ || gimple_call_builtin_p (stmt, BUILT_IN_ALIGNED_ALLOC)) -+ size = gimple_call_arg (stmt, 1); -+ else if (gimple_call_builtin_p (stmt, BUILT_IN_CALLOC)) -+ { -+ tree arg1; -+ arg1 = gimple_call_arg (stmt, 1); -+ /* Check that second argument is a constant equal to the size of structure. */ -+ if (operand_equal_p (arg1, struct_size, 0)) -+ return size; -+ /* Check that first argument is a constant equal to the size of structure. */ -+ if (operand_equal_p (size, struct_size, 0)) -+ return arg1; -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ { -+ fprintf (dump_file, "\ncalloc the correct size:\n"); -+ print_gimple_stmt (dump_file, stmt, 0); -+ fprintf (dump_file, "\n"); -+ } -+ return NULL; -+ } -+ -+ tree num; -+ if (!is_result_of_mult (size, &num, struct_size)) -+ return NULL; -+ -+ return num; -+ -+} -+ -+ -+void -+ipa_struct_reorg::maybe_mark_or_record_other_side (tree side, tree other, gimple *stmt) -+{ -+ gcc_assert (TREE_CODE (side) == SSA_NAME || TREE_CODE (side) == ADDR_EXPR); -+ srtype *type = NULL; -+ if (handled_type (TREE_TYPE (other))) -+ type = record_type (inner_type (TREE_TYPE (other))); -+ if (TREE_CODE (side) == ADDR_EXPR) -+ side = TREE_OPERAND (side, 0); -+ srdecl *d = find_decl (side); -+ if (!type) -+ { -+ if (!d) -+ return; -+ if (TREE_CODE (side) == SSA_NAME -+ && VOID_POINTER_P (TREE_TYPE (side))) -+ return; -+ d->type->mark_escape (escape_cast_another_ptr, stmt); -+ return; -+ } -+ -+ if (!d) -+ { -+ if (VOID_POINTER_P (TREE_TYPE (side)) -+ && TREE_CODE (side) == SSA_NAME) -+ current_function->record_decl (type, side, -1); -+ else -+ type->mark_escape (escape_cast_another_ptr, stmt); -+ } -+ else if (type != d->type) -+ { -+ type->mark_escape (escape_cast_another_ptr, stmt); -+ d->type->mark_escape (escape_cast_another_ptr, stmt); -+ } -+} -+ -+/* Record accesses in an assignment statement STMT. */ -+ -+void -+ipa_struct_reorg::maybe_record_assign (cgraph_node *node, gassign *stmt) -+{ -+ -+ /* */ -+ -+ if (gimple_clobber_p (stmt)) -+ { -+ record_stmt_expr (gimple_assign_lhs (stmt), node, stmt); -+ return; -+ } -+ -+ if (gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR) -+ { -+ tree lhs = gimple_assign_lhs (stmt); -+ tree rhs1 = gimple_assign_rhs1 (stmt); -+ tree rhs2 = gimple_assign_rhs2 (stmt); -+ tree num; -+ if (!handled_type (TREE_TYPE (lhs))) -+ return; -+ /* Check if rhs2 is a multiplication of the size of the type. */ -+ if (is_result_of_mult (rhs2, &num, TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (lhs))))) -+ { -+ record_stmt_expr (lhs, node, stmt); -+ record_stmt_expr (rhs1, node, stmt); -+ } -+ else -+ { -+ mark_expr_escape (lhs, escape_non_multiply_size, stmt); -+ mark_expr_escape (rhs1, escape_non_multiply_size, stmt); -+ } -+ return; -+ } -+ /* Copies, References, Taking addresses. */ -+ if (gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS) -+ { -+ tree lhs = gimple_assign_lhs (stmt); -+ tree rhs = gimple_assign_rhs1 (stmt); -+ /* If we have a = &b.c then we need to mark the type of b -+ as escaping as tracking a will be hard. */ -+ if (TREE_CODE (rhs) == ADDR_EXPR) -+ { -+ tree r = TREE_OPERAND (rhs, 0); -+ if (handled_component_p (r)) -+ { -+ while (handled_component_p (r)) -+ r = TREE_OPERAND (r, 0); -+ mark_expr_escape (r, escape_addr, stmt); -+ return; -+ } -+ } -+ if ((TREE_CODE (rhs) == SSA_NAME || TREE_CODE (rhs) == ADDR_EXPR)) -+ maybe_mark_or_record_other_side (rhs, lhs, stmt); -+ if (TREE_CODE (lhs) == SSA_NAME) -+ maybe_mark_or_record_other_side (lhs, rhs, stmt); -+ } -+} -+ -+tree -+get_ref_base_and_offset (tree &e, HOST_WIDE_INT &offset, bool &realpart, bool &imagpart, tree &accesstype) -+{ -+ offset = 0; -+ realpart = false; -+ imagpart = false; -+ accesstype = NULL_TREE; -+ if (TREE_CODE (e) == REALPART_EXPR) -+ { -+ e = TREE_OPERAND (e, 0); -+ realpart = true; -+ } -+ if (TREE_CODE (e) == IMAGPART_EXPR) -+ { -+ e = TREE_OPERAND (e, 0); -+ imagpart = true; -+ } -+ tree expr = e; -+ while (true) -+ { -+ switch (TREE_CODE (expr)) -+ { -+ case COMPONENT_REF: -+ { -+ tree field = TREE_OPERAND (expr, 1); -+ tree field_off = byte_position (field); -+ if (TREE_CODE (field_off) != INTEGER_CST) -+ return NULL; -+ offset += tree_to_shwi (field_off); -+ expr = TREE_OPERAND (expr, 0); -+ accesstype = NULL; -+ break; -+ } -+ case MEM_REF: -+ { -+ tree field_off = TREE_OPERAND (expr, 1); -+ gcc_assert (TREE_CODE (field_off) == INTEGER_CST); -+ /* So we can mark the types as escaping if different. */ -+ accesstype = TREE_TYPE (field_off); -+ offset += tree_to_uhwi (field_off); -+ return TREE_OPERAND (expr, 0); -+ } -+ default: -+ return expr; -+ } -+ } -+} -+ -+/* Return true if EXPR was accessing the whole type T. */ -+ -+bool -+ipa_struct_reorg::wholeaccess (tree expr, tree base, tree accesstype, srtype *t) -+{ -+ if (expr == base) -+ return true; -+ -+ if (TREE_CODE (expr) == ADDR_EXPR && TREE_OPERAND (expr, 0) == base) -+ return true; -+ -+ if (!accesstype) -+ return false; -+ -+ if (!types_compatible_p (TREE_TYPE (expr), TREE_TYPE (accesstype))) -+ return false; -+ -+ if (!handled_type (TREE_TYPE (expr))) -+ return false; -+ -+ srtype *other_type = find_type (inner_type (TREE_TYPE (expr))); -+ -+ if (t == other_type) -+ return true; -+ -+ return false; -+} -+ -+bool -+ipa_struct_reorg::get_type_field (tree expr, tree &base, bool &indirect, srtype *&type, srfield *&field, bool &realpart, bool &imagpart, bool &address, bool should_create, bool can_escape) -+{ -+ HOST_WIDE_INT offset; -+ tree accesstype; -+ address = false; -+ bool mark_as_bit_field = false; -+ -+ if (TREE_CODE (expr) == BIT_FIELD_REF) -+ { -+ expr = TREE_OPERAND (expr, 0); -+ mark_as_bit_field = true; -+ } -+ -+ base = get_ref_base_and_offset (expr, offset, realpart, imagpart, accesstype); -+ -+ /* Variable access, unkown type. */ -+ if (base == NULL) -+ return false; -+ -+ if (TREE_CODE (base) == ADDR_EXPR) -+ { -+ address = true; -+ base = TREE_OPERAND (base, 0); -+ } -+ -+ if (offset != 0 && accesstype) -+ { -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ { -+ fprintf (dump_file, "Non zero offset (%d) with MEM.\n", (int)offset); -+ print_generic_expr (dump_file, expr); -+ fprintf (dump_file, "\n"); -+ print_generic_expr (dump_file, base); -+ fprintf (dump_file, "\n"); -+ } -+ } -+ -+ srdecl *d = find_decl (base); -+ srtype *t; -+ -+ if (integer_zerop (base)) -+ { -+ gcc_assert (!d); -+ if (!accesstype) -+ return false; -+ t = find_type (inner_type (inner_type (accesstype))); -+ if (!t && should_create && handled_type (accesstype)) -+ t = record_type (inner_type (accesstype)); -+ if (!t) -+ return false; -+ } -+ else if (!d && accesstype) -+ { -+ if (!should_create) -+ return false; -+ if (!handled_type (accesstype)) -+ return false; -+ t = find_type (inner_type (inner_type (accesstype))); -+ if (!t) -+ t = record_type (inner_type (accesstype)); -+ if (!t || t->has_escaped ()) -+ return false; -+ /* If base is not void* mark the type as escaping. */ -+ if (!VOID_POINTER_P (TREE_TYPE (base))) -+ { -+ gcc_assert (can_escape); -+ t->mark_escape (escape_cast_another_ptr, NULL); -+ return false; -+ } -+ if (TREE_CODE (base) == SSA_NAME) -+ current_function->record_decl (t, base, -1); -+ } -+ else if (!d) -+ return false; -+ else -+ t = d->type; -+ -+ if (t->has_escaped ()) -+ return false; -+ -+ if (mark_as_bit_field) -+ { -+ gcc_assert (can_escape); -+ t->mark_escape (escape_bitfields, NULL); -+ return false; -+ } -+ -+ if (wholeaccess (expr, base, accesstype, t)) -+ { -+ field = NULL; -+ type = t; -+ indirect = accesstype != NULL; -+ return true; -+ } -+ -+ srfield *f = t->find_field (offset); -+ if (!f) -+ { -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ { -+ fprintf (dump_file, "\nunkown field\n"); -+ print_generic_expr (dump_file, expr); -+ fprintf (dump_file, "\n"); -+ print_generic_expr (dump_file, base); -+ fprintf (dump_file, "\n"); -+ } -+ gcc_assert (can_escape); -+ t->mark_escape (escape_unkown_field, NULL); -+ return false; -+ } -+ if (!types_compatible_p (f->fieldtype, TREE_TYPE (expr))) -+ { -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ { -+ fprintf (dump_file, "\nfieldtype = "); -+ print_generic_expr (dump_file, f->fieldtype); -+ fprintf (dump_file, "\naccess type = "); -+ print_generic_expr (dump_file, TREE_TYPE (expr)); -+ fprintf (dump_file, "original expr = "); -+ print_generic_expr (dump_file, expr); -+ fprintf (dump_file, "\n"); -+ } -+ gcc_assert (can_escape); -+ t->mark_escape (escape_unkown_field, NULL); -+ return false; -+ } -+ field = f; -+ type = t; -+ indirect = accesstype != NULL; -+ return true; -+} -+ -+/* Mark the type used in EXPR as escaping. */ -+ -+void -+ipa_struct_reorg::mark_expr_escape (tree expr, escape_type escapes, gimple *stmt) -+{ -+ tree base; -+ bool indirect; -+ srtype *type; -+ srfield *field; -+ bool realpart, imagpart, address; -+ if (!get_type_field (expr, base, indirect, type, field, realpart, imagpart, address)) -+ return; -+ -+ type->mark_escape (escapes, stmt); -+} -+ -+/* Record accesses in a call statement STMT. */ -+ -+void -+ipa_struct_reorg::maybe_record_call (cgraph_node *node, gcall *stmt) -+{ -+ tree argtype; -+ tree fndecl; -+ escape_type escapes = does_not_escape; -+ bool free_or_realloc = gimple_call_builtin_p (stmt, BUILT_IN_FREE) -+ || gimple_call_builtin_p (stmt, BUILT_IN_REALLOC); -+ -+ /* We check allocation sites in a different location. */ -+ if (handled_allocation_stmt (stmt)) -+ return; -+ -+ -+ /* A few cases here: -+ 1) assigned from the lhs -+ 2) Used in argument -+ If a function being called is global (or indirect) -+ then we reject the types as being escaping. */ -+ -+ if (tree chain = gimple_call_chain (stmt)) -+ record_stmt_expr (chain, node, stmt); -+ -+ /* Assigned from LHS. */ -+ if (tree lhs = gimple_call_lhs (stmt)) -+ { -+ /* FIXME: handle return types.. */ -+ mark_type_as_escape (TREE_TYPE (lhs), escape_return); -+ } -+ -+ /* If we have an internal call, just record the stmt. */ -+ if (gimple_call_internal_p (stmt)) -+ { -+ for (unsigned i = 0; i < gimple_call_num_args (stmt); i++) -+ record_stmt_expr (gimple_call_arg (stmt, i), node, stmt); -+ return; -+ } -+ -+ fndecl = gimple_call_fndecl (stmt); -+ -+ /* If we have an indrect call, just mark the types as escape. */ -+ if (!fndecl) -+ escapes = escape_pointer_function; -+ /* Non local functions cause escape except for calls to free -+ and realloc. -+ FIXME: should support function annotations too. */ -+ else if (!free_or_realloc -+ && !cgraph_node::local_info (fndecl)->local) -+ escapes = escape_external_function; -+ else if (!free_or_realloc -+ && !cgraph_node::local_info (fndecl)->can_change_signature) -+ escapes = escape_cannot_change_signature; -+ /* FIXME: we should be able to handle functions in other partitions. */ -+ else if (symtab_node::get(fndecl)->in_other_partition) -+ escapes = escape_external_function; -+ -+ if (escapes != does_not_escape) -+ { -+ for (unsigned i = 0; i < gimple_call_num_args (stmt); i++) -+ mark_type_as_escape (TREE_TYPE (gimple_call_arg (stmt, i)), -+ escapes); -+ return; -+ } -+ -+ argtype = TYPE_ARG_TYPES (gimple_call_fntype (stmt)); -+ for (unsigned i = 0; i < gimple_call_num_args (stmt); i++) -+ { -+ tree arg = gimple_call_arg (stmt, i); -+ if (argtype) -+ { -+ tree argtypet = TREE_VALUE (argtype); -+ if (!free_or_realloc -+ && VOID_POINTER_P (argtypet)) -+ mark_type_as_escape (TREE_TYPE (arg), escape_cast_void); -+ else -+ record_stmt_expr (arg, node, stmt); -+ } -+ else -+ mark_type_as_escape (TREE_TYPE (arg), escape_var_arg_function); -+ -+ argtype = argtype ? TREE_CHAIN (argtype) : NULL_TREE; -+ } -+ -+} -+ -+ -+void -+ipa_struct_reorg::record_stmt_expr (tree expr, cgraph_node *node, gimple *stmt) -+{ -+ tree base; -+ bool indirect; -+ srtype *type; -+ srfield *field; -+ bool realpart, imagpart, address; -+ if (!get_type_field (expr, base, indirect, type, field, realpart, imagpart, address)) -+ return; -+ -+ if (!opt_for_fn (current_function_decl, flag_ipa_struct_reorg)) -+ type->mark_escape (escape_non_optimize, stmt); -+ -+ /* Record it. */ -+ type->add_access (new sraccess (stmt, node, type, field)); -+} -+ -+/* Find function corresponding to NODE. */ -+ -+srfunction * -+ipa_struct_reorg::find_function (cgraph_node *node) -+{ -+ for (unsigned i = 0; i < functions.length (); i++) -+ if (functions[i]->node == node) -+ return functions[i]; -+ return NULL; -+} -+ -+void -+ipa_struct_reorg::check_type_and_push (tree newdecl, srtype *type, vec &worklist, gimple *stmt) -+{ -+ if (integer_zerop (newdecl)) -+ return; -+ -+ if (TREE_CODE (newdecl) == ADDR_EXPR) -+ { -+ srdecl *d = find_decl (TREE_OPERAND (newdecl, 0)); -+ if (!d) -+ { -+ type->mark_escape (escape_cast_another_ptr, stmt); -+ return; -+ } -+ if (d->type == type) -+ return; -+ -+ srtype *type1 = d->type; -+ type->mark_escape (escape_cast_another_ptr, stmt); -+ type1->mark_escape (escape_cast_another_ptr, stmt); -+ return; -+ } -+ -+ srdecl *d = find_decl (newdecl); -+ if (!d) -+ { -+ if (TREE_CODE (newdecl) == INTEGER_CST) -+ { -+ type->mark_escape (escape_int_const, stmt); -+ return; -+ } -+ /* If we have a non void* or a decl (which is hard to track), -+ then mark the type as escaping. */ -+ if (!VOID_POINTER_P (TREE_TYPE (newdecl)) -+ || DECL_P (newdecl)) -+ { -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ { -+ fprintf (dump_file, "\nunkown decl: "); -+ print_generic_expr (dump_file, newdecl); -+ fprintf (dump_file, " in type:\n"); -+ print_generic_expr (dump_file, TREE_TYPE (newdecl)); -+ fprintf (dump_file, "\n"); -+ } -+ type->mark_escape (escape_cast_another_ptr, stmt); -+ return; -+ } -+ /* At this point there should only be unkown void* ssa names. */ -+ gcc_assert (TREE_CODE (newdecl) == SSA_NAME); -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ { -+ fprintf (dump_file, "\nrecording unkown decl: "); -+ print_generic_expr (dump_file, newdecl); -+ fprintf (dump_file, " as type:\n"); -+ type->simple_dump (dump_file); -+ fprintf (dump_file, "\n"); -+ } -+ d = current_function->record_decl (type, newdecl, -1); -+ worklist.safe_push (d); -+ return; -+ } -+ -+ /* Only add to the worklist if the decl is a SSA_NAME. */ -+ if (TREE_CODE (newdecl) == SSA_NAME) -+ worklist.safe_push (d); -+ if (d->type == type) -+ return; -+ -+ srtype *type1 = d->type; -+ type->mark_escape (escape_cast_another_ptr, stmt); -+ type1->mark_escape (escape_cast_another_ptr, stmt); -+ -+} -+ -+/* -+ 2) Check SSA_NAMEs for non type usages (source or use) (worlist of srdecl) -+ a) if the SSA_NAME is sourced from a pointer plus, record the pointer and -+ check to make sure the addition was a multiple of the size. -+ check the pointer type too. -+ b) If the name is sourced from an allocation check the allocation -+ i) Add SSA_NAME (void*) to the worklist if allocated from realloc -+ c) if the name is from a param, make sure the param type was of the original type -+ d) if the name is from a cast/assignment, make sure it is used as that type or void* -+ i) If void* then push the ssa_name into worklist -+*/ -+void -+ipa_struct_reorg::check_definition (srdecl *decl, vec &worklist) -+{ -+ tree ssa_name = decl->decl; -+ srtype *type = decl->type; -+ -+ /* c) if the name is from a param, make sure the param type was -+ of the original type */ -+ if (SSA_NAME_IS_DEFAULT_DEF (ssa_name)) -+ { -+ tree var = SSA_NAME_VAR (ssa_name); -+ if (var -+ && TREE_CODE (var) == PARM_DECL -+ && VOID_POINTER_P (TREE_TYPE (ssa_name))) -+ type->mark_escape (escape_cast_void, NULL); -+ return; -+ } -+ gimple *stmt = SSA_NAME_DEF_STMT (ssa_name); -+ -+ /* -+ b) If the name is sourced from an allocation check the allocation -+ i) Add SSA_NAME (void*) to the worklist if allocated from realloc -+ */ -+ if (gimple_code (stmt) == GIMPLE_CALL) -+ { -+ /* For realloc, check the type of the argument. */ -+ if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC)) -+ check_type_and_push (gimple_call_arg (stmt, 0), type, worklist, stmt); -+ -+ if (!handled_allocation_stmt (stmt) -+ || !allocate_size (type, stmt)) -+ type->mark_escape (escape_return, stmt); -+ return; -+ } -+ /* If the SSA_NAME is sourced from an inline-asm, just mark the type as escaping. */ -+ if (gimple_code (stmt) == GIMPLE_ASM) -+ { -+ type->mark_escape (escape_inline_asm, stmt); -+ return; -+ } -+ -+ /* If the SSA_NAME is sourced from a PHI check add each name to the worklist and -+ check to make sure they are used correctly. */ -+ if (gimple_code (stmt) == GIMPLE_PHI) -+ { -+ for (unsigned i = 0; i < gimple_phi_num_args (stmt); i++) -+ check_type_and_push (gimple_phi_arg_def (stmt, i), type, worklist, stmt); -+ return; -+ } -+ -+ gcc_assert (gimple_code (stmt) == GIMPLE_ASSIGN); -+ /* -+ a) if the SSA_NAME is sourced from a pointer plus, record the pointer and -+ check to make sure the addition was a multiple of the size. -+ check the pointer type too. -+ */ -+ -+ tree rhs = gimple_assign_rhs1 (stmt); -+ if (gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR) -+ { -+ tree rhs2 = gimple_assign_rhs2 (stmt); -+ tree num; -+ if (!is_result_of_mult (rhs2, &num, TYPE_SIZE_UNIT (type->type))) -+ type->mark_escape (escape_non_multiply_size, stmt); -+ -+ if (TREE_CODE (rhs) == SSA_NAME) -+ check_type_and_push (rhs, type, worklist, stmt); -+ return; -+ } -+ -+ /* Casts between pointers and integer are escaping. */ -+ if (gimple_assign_cast_p (stmt)) -+ { -+ type->mark_escape (escape_cast_int, stmt); -+ return; -+ } -+ -+ /* -+ d) if the name is from a cast/assignment, make sure it is used as that type or void* -+ i) If void* then push the ssa_name into worklist -+ */ -+ gcc_assert (gimple_assign_single_p (stmt)); -+ check_other_side (decl, rhs, stmt, worklist); -+} -+ -+/* Mark the types used by the inline-asm as escaping. It is unkown what happens inside -+ an inline-asm. */ -+ -+void -+ipa_struct_reorg::mark_types_asm (gasm *astmt) -+{ -+ for (unsigned i = 0; i < gimple_asm_ninputs (astmt); i++) -+ { -+ tree v = TREE_VALUE (gimple_asm_input_op (astmt, i)); -+ /* If we have &b, just strip the & here. */ -+ if (TREE_CODE (v) == ADDR_EXPR) -+ v = TREE_OPERAND (v, 0); -+ mark_expr_escape (v, escape_inline_asm, astmt); -+ } -+ for (unsigned i = 0; i < gimple_asm_noutputs (astmt); i++) -+ { -+ tree v = TREE_VALUE (gimple_asm_output_op (astmt, i)); -+ /* If we have &b, just strip the & here. */ -+ if (TREE_CODE (v) == ADDR_EXPR) -+ v = TREE_OPERAND (v, 0); -+ mark_expr_escape (v, escape_inline_asm, astmt); -+ } -+} -+ -+void -+ipa_struct_reorg::check_other_side (srdecl *decl, tree other, gimple *stmt, vec &worklist) -+{ -+ srtype *type = decl->type; -+ -+ if (TREE_CODE (other) == SSA_NAME -+ || DECL_P (other) -+ || TREE_CODE (other) == INTEGER_CST) -+ { -+ check_type_and_push (other, type, worklist, stmt); -+ return; -+ } -+ -+ tree t = TREE_TYPE (other); -+ if (!handled_type (t)) -+ { -+ type->mark_escape (escape_cast_another_ptr, stmt); -+ return; -+ } -+ -+ srtype *t1 = find_type (inner_type (t)); -+ if (t1 == type) -+ { -+ tree base; -+ bool indirect; -+ srtype *type1; -+ srfield *field; -+ bool realpart, imagpart, address; -+ if (!get_type_field (other, base, indirect, type1, field, realpart, imagpart, address)) -+ type->mark_escape (escape_cast_another_ptr, stmt); -+ -+ return; -+ } -+ -+ if (t1) -+ t1->mark_escape (escape_cast_another_ptr, stmt); -+ -+ type->mark_escape (escape_cast_another_ptr, stmt); -+} -+ -+ -+void -+ipa_struct_reorg::check_use (srdecl *decl, gimple *stmt, vec &worklist) -+{ -+ srtype *type = decl->type; -+ -+ if (gimple_code (stmt) == GIMPLE_RETURN) -+ { -+ type->mark_escape (escape_return, stmt); -+ return; -+ } -+ /* If the SSA_NAME PHI check and add the src to the worklist and -+ check to make sure they are used correctly. */ -+ if (gimple_code (stmt) == GIMPLE_PHI) -+ { -+ check_type_and_push (gimple_phi_result (stmt), type, worklist, stmt); -+ return; -+ } -+ -+ if (gimple_code (stmt) == GIMPLE_ASM) -+ { -+ mark_types_asm (as_a (stmt)); -+ return; -+ } -+ -+ if (gimple_code (stmt) == GIMPLE_COND) -+ { -+ tree rhs1 = gimple_cond_lhs (stmt); -+ tree rhs2 = gimple_cond_rhs (stmt); -+ tree orhs = rhs1; -+ if (gimple_cond_code (stmt) != EQ_EXPR -+ && gimple_cond_code (stmt) != NE_EXPR) -+ { -+ mark_expr_escape (rhs1, escape_non_eq, stmt); -+ mark_expr_escape (rhs2, escape_non_eq, stmt); -+ } -+ if (rhs1 == decl->decl) -+ orhs = rhs2; -+ if (integer_zerop (orhs)) -+ return; -+ if (TREE_CODE (orhs) != SSA_NAME) -+ mark_expr_escape (rhs1, escape_non_eq, stmt); -+ check_type_and_push (orhs, type, worklist, stmt); -+ return; -+ } -+ -+ -+ /* Casts between pointers and integer are escaping. */ -+ if (gimple_assign_cast_p (stmt)) -+ { -+ type->mark_escape (escape_cast_int, stmt); -+ return; -+ } -+ -+ /* We might have a_1 = ptr_2 == ptr_3; */ -+ if (is_gimple_assign (stmt) -+ && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison) -+ { -+ tree rhs1 = gimple_assign_rhs1 (stmt); -+ tree rhs2 = gimple_assign_rhs2 (stmt); -+ tree orhs = rhs1; -+ if (gimple_assign_rhs_code (stmt) != EQ_EXPR -+ && gimple_assign_rhs_code (stmt) != NE_EXPR) -+ { -+ mark_expr_escape (rhs1, escape_non_eq, stmt); -+ mark_expr_escape (rhs2, escape_non_eq, stmt); -+ } -+ if (rhs1 == decl->decl) -+ orhs = rhs2; -+ if (integer_zerop (orhs)) -+ return; -+ if (TREE_CODE (orhs) != SSA_NAME) -+ mark_expr_escape (rhs1, escape_non_eq, stmt); -+ check_type_and_push (orhs, type, worklist, stmt); -+ return; -+ } -+ -+ if (gimple_assign_single_p (stmt)) -+ { -+ tree lhs = gimple_assign_lhs (stmt); -+ tree rhs = gimple_assign_rhs1 (stmt); -+ /* Check if we have a_1 = b_2; that a_1 is in the correct type. */ -+ if (decl->decl == rhs) -+ { -+ check_other_side (decl, lhs, stmt, worklist); -+ return; -+ } -+ } -+ -+ if (is_gimple_assign (stmt) -+ && gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR) -+ { -+ tree rhs2 = gimple_assign_rhs2 (stmt); -+ tree lhs = gimple_assign_lhs (stmt); -+ tree num; -+ check_other_side (decl, lhs, stmt, worklist); -+ if (!is_result_of_mult (rhs2, &num, TYPE_SIZE_UNIT (type->type))) -+ type->mark_escape (escape_non_multiply_size, stmt); -+ } -+ -+} -+ -+/* -+ 2) Check SSA_NAMEs for non type usages (source or use) (worlist of srdecl) -+ d) if the name is used in a cast/assignment, make sure it is used as that type or void* -+ i) If void* then push the ssa_name into worklist -+ e) if used in conditional check the other side -+ i) If the conditional is non NE/EQ then mark the type as non rejecting -+ f) Check if the use in a Pointer PLUS EXPR Is used by mulitplication of its size -+ */ -+void -+ipa_struct_reorg::check_uses (srdecl *decl, vec &worklist) -+{ -+ tree ssa_name = decl->decl; -+ imm_use_iterator imm_iter; -+ use_operand_p use_p; -+ -+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, ssa_name) -+ { -+ gimple *stmt = USE_STMT (use_p); -+ -+ if (is_gimple_debug (stmt)) -+ continue; -+ -+ check_use (decl, stmt, worklist); -+ } -+} -+ -+/* Record function corresponding to NODE. */ -+ -+srfunction * -+ipa_struct_reorg::record_function (cgraph_node *node) -+{ -+ function *fn; -+ tree parm, var; -+ unsigned int i; -+ srfunction *sfn; -+ escape_type escapes = does_not_escape; -+ -+ sfn = new srfunction (node); -+ functions.safe_push (sfn); -+ -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ fprintf (dump_file, "\nRecording accesses and types from function: %s/%u\n", -+ node->name (), node->order); -+ -+ /* Nodes without a body are not interesting. Especially do not -+ visit clones at this point for now - we get duplicate decls -+ there for inline clones at least. */ -+ if (!node->has_gimple_body_p () || node->inlined_to) -+ return sfn; -+ -+ node->get_body (); -+ fn = DECL_STRUCT_FUNCTION (node->decl); -+ -+ if (!fn) -+ return sfn; -+ -+ current_function = sfn; -+ -+ if (DECL_PRESERVE_P (node->decl)) -+ escapes = escape_marked_as_used; -+ else if (!node->local.local) -+ escapes = escape_visible_function; -+ else if (!node->local.can_change_signature) -+ escapes = escape_cannot_change_signature; -+ else if (!tree_versionable_function_p (node->decl)) -+ escapes = escape_noclonable_function; -+ else if (!opt_for_fn (node->decl, flag_ipa_struct_reorg)) -+ escapes = escape_non_optimize; -+ -+ basic_block bb; -+ gimple_stmt_iterator si; -+ -+ /* Record the static chain decl. */ -+ if (fn->static_chain_decl) -+ { -+ srdecl *sd = record_var (fn->static_chain_decl, -+ escapes, -+ -2); -+ if (sd) -+ { -+ /* Specify that this type is used by the static -+ chain so it cannot be split. */ -+ sd->type->chain_type = true; -+ sfn->add_arg (sd); -+ sd->type->add_function (sfn); -+ } -+ } -+ -+ /* Record the arguments. */ -+ for (parm = DECL_ARGUMENTS (node->decl), i = 0; -+ parm; -+ parm = DECL_CHAIN (parm), i++) -+ { -+ srdecl *sd = record_var (parm, escapes, i); -+ if (sd) -+ { -+ sfn->add_arg (sd); -+ sd->type->add_function (sfn); -+ } -+ } -+ -+ /* Mark the return type as escaping */ -+ { -+ tree return_type = TREE_TYPE (TREE_TYPE (node->decl)); -+ mark_type_as_escape (return_type, escape_return, NULL); -+ } -+ -+ /* If the cfg does not exist for the function, don't process the function. */ -+ if (!fn->cfg) -+ { -+ current_function = NULL; -+ return sfn; -+ } -+ -+ /* The following order is done for recording stage: -+ 0) Record all variables/SSA_NAMES that are of struct type -+ 1) Record MEM_REF/COMPONENT_REFs -+ a) Record SSA_NAMEs (void*) and record that as the accessed type. -+ */ -+ -+ push_cfun (fn); -+ -+ FOR_EACH_LOCAL_DECL (cfun, i, var) -+ { -+ if (TREE_CODE (var) != VAR_DECL) -+ continue; -+ -+ record_var (var); -+ } -+ -+ for (i = 1; i < num_ssa_names; ++i) -+ { -+ tree name = ssa_name (i); -+ if (!name -+ || has_zero_uses (name) -+ || virtual_operand_p (name)) -+ continue; -+ -+ record_var (name); -+ } -+ -+ /* Find the variables which are used via MEM_REF and are void* types. */ -+ FOR_EACH_BB_FN (bb, cfun) -+ { -+ for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) -+ { -+ gimple *stmt = gsi_stmt (si); -+ find_vars (stmt); -+ } -+ } -+ -+ auto_vec worklist; -+ for (unsigned i = 0; i < current_function->decls.length (); i++) -+ { -+ srdecl *decl = current_function->decls[i]; -+ if (TREE_CODE (decl->decl) == SSA_NAME) -+ { -+ decl->visited = false; -+ worklist.safe_push (decl); -+ } -+ } -+ -+ /* -+ 2) Check SSA_NAMEs for non type usages (source or use) (worlist of srdecl) -+ a) if the SSA_NAME is sourced from a pointer plus, record the pointer and -+ check to make sure the addition was a multiple of the size. -+ check the pointer type too. -+ b) If the name is sourced from an allocation check the allocation -+ i) Add SSA_NAME (void*) to the worklist if allocated from realloc -+ c) if the name is from a param, make sure the param type was of the original type -+ d) if the name is used in a cast/assignment, make sure it is used as that type or void* -+ i) If void* then push the ssa_name into worklist -+ e) if used in conditional check the other side -+ i) If the conditional is non NE/EQ then mark the type as non rejecting -+ f) Check if the use in a POinter PLUS EXPR Is used by mulitplication of its size -+ */ -+ -+ while (!worklist.is_empty ()) -+ { -+ srdecl *decl = worklist.pop (); -+ if (decl->visited) -+ continue; -+ decl->visited = true; -+ check_definition (decl, worklist); -+ check_uses (decl, worklist); -+ } -+ -+ FOR_EACH_BB_FN (bb, cfun) -+ { -+ for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) -+ { -+ gimple *stmt = gsi_stmt (si); -+ maybe_record_stmt (node, stmt); -+ } -+ } -+ -+ pop_cfun (); -+ current_function = NULL; -+ return sfn; -+} -+ -+ -+/* Record all accesses for all types including global variables. */ -+ -+void -+ipa_struct_reorg::record_accesses (void) -+{ -+ varpool_node *var; -+ cgraph_node *cnode; -+ -+ /* Record global (non-auto) variables first. */ -+ FOR_EACH_VARIABLE (var) -+ { -+ if (!var->real_symbol_p ()) -+ continue; -+ -+ /* Record all variables including the accesses inside a variable. */ -+ escape_type escapes = does_not_escape; -+ if (var->externally_visible || !var->definition) -+ escapes = escape_via_global_var; -+ if (var->in_other_partition) -+ escapes = escape_via_global_var; -+ if (!var->externally_visible && var->definition) -+ var->get_constructor (); -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ { -+ fprintf (dump_file, "Recording global variable: "); -+ print_generic_expr (dump_file, var->decl); -+ fprintf (dump_file, "\n"); -+ } -+ record_var (var->decl, escapes); -+ } -+ -+ FOR_EACH_FUNCTION (cnode) -+ { -+ if (!cnode->real_symbol_p ()) -+ continue; -+ -+ /* Record accesses inside a function. */ -+ if(cnode->definition) -+ record_function (cnode); -+ } -+ -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ { -+ fprintf (dump_file, "all types (before pruning):\n"); -+ dump_types (dump_file); -+ fprintf (dump_file, "all functions (before pruning):\n"); -+ dump_functions (dump_file); -+ } -+ done_recording = true; -+} -+ -+/* A helper function to detect cycles (recusive) types. -+ Return TRUE if TYPE was a rescusive type. */ -+ -+bool -+ipa_struct_reorg::walk_field_for_cycles (srtype *type) -+{ -+ unsigned i; -+ srfield *field; -+ -+ type->visited = true; -+ if (type->escaped_rescusive ()) -+ return true; -+ -+ if (type->has_escaped ()) -+ return false; -+ -+ FOR_EACH_VEC_ELT (type->fields, i, field) -+ { -+ if (!field->type) -+ ; -+ else if (field->type->visited -+ || walk_field_for_cycles (field->type)) -+ { -+ type->mark_escape (escape_rescusive_type, NULL); -+ return true; -+ } -+ } -+ -+ return false; -+} -+ -+/* Clear visited on all types. */ -+ -+void -+ipa_struct_reorg::clear_visited (void) -+{ -+ for (unsigned i = 0; i < types.length (); i++) -+ types[i]->visited = false; -+} -+ -+/* Detect recusive types and mark them as escaping. */ -+ -+void -+ipa_struct_reorg::detect_cycles (void) -+{ -+ for (unsigned i = 0; i < types.length (); i++) -+ { -+ if (types[i]->has_escaped ()) -+ continue; -+ -+ clear_visited (); -+ walk_field_for_cycles (types[i]); -+ } -+} -+ -+/* Propagate escaping to depdenent types. */ -+ -+void -+ipa_struct_reorg::propagate_escape (void) -+{ -+ -+ unsigned i; -+ srtype *type; -+ bool changed = false; -+ -+ do -+ { -+ changed = false; -+ FOR_EACH_VEC_ELT (types, i, type) -+ { -+ for (tree field = TYPE_FIELDS (type->type); -+ field; -+ field = DECL_CHAIN (field)) -+ { -+ if (TREE_CODE (field) == FIELD_DECL -+ && handled_type (TREE_TYPE (field))) -+ { -+ tree t = inner_type (TREE_TYPE (field)); -+ srtype *type1 = find_type (t); -+ if (!type1) -+ continue; -+ if (type1->has_escaped () -+ && !type->has_escaped ()) -+ { -+ type->mark_escape (escape_dependent_type_escapes, NULL); -+ changed = true; -+ } -+ if (type->has_escaped () -+ && !type1->has_escaped ()) -+ { -+ type1->mark_escape (escape_dependent_type_escapes, NULL); -+ changed = true; -+ } -+ } -+ } -+ } -+ } while (changed); -+} -+ -+/* Prune the escaped types and their decls from what was recorded. */ -+ -+void -+ipa_struct_reorg::prune_escaped_types (void) -+{ -+ detect_cycles (); -+ propagate_escape (); -+ -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ { -+ fprintf (dump_file, "all types (after prop but before pruning):\n"); -+ dump_types (dump_file); -+ fprintf (dump_file, "all functions (after prop but before pruning):\n"); -+ dump_functions (dump_file); -+ } -+ -+ if (dump_file) -+ dump_types_escaped (dump_file); -+ -+ -+ /* Prune the function arguments which escape -+ and functions which have no types as arguments. */ -+ for (unsigned i = 0; i < functions.length (); ) -+ { -+ srfunction *function = functions[i]; -+ -+ /* Prune function arguments of types that escape. */ -+ for (unsigned j = 0; j < function->args.length ();) -+ { -+ if (function->args[j]->type->has_escaped ()) -+ function->args.ordered_remove (j); -+ else -+ j++; -+ } -+ -+ /* Prune global variables that the function uses of types that escape. */ -+ for (unsigned j = 0; j < function->globals.length ();) -+ { -+ if (function->globals[j]->type->has_escaped ()) -+ function->globals.ordered_remove (j); -+ else -+ j++; -+ } -+ -+ /* Prune variables that the function uses of types that escape. */ -+ for (unsigned j = 0; j < function->decls.length ();) -+ { -+ srdecl *decl = function->decls[j]; -+ if (decl->type->has_escaped ()) -+ { -+ function->decls.ordered_remove (j); -+ delete decl; -+ } -+ else -+ j++; -+ } -+ -+ /* Prune functions which don't refer to any variables any more. */ -+ if (function->args.is_empty () -+ && function->decls.is_empty () -+ && function->globals.is_empty ()) -+ { -+ delete function; -+ functions.ordered_remove (i); -+ } -+ else -+ i++; -+ } -+ -+ /* Prune globals of types that escape, all references to those decls -+ will have been removed in the first loop. */ -+ for (unsigned j = 0; j < globals.decls.length ();) -+ { -+ srdecl *decl = globals.decls[j]; -+ if (decl->type->has_escaped ()) -+ { -+ globals.decls.ordered_remove (j); -+ delete decl; -+ } -+ else -+ j++; -+ } -+ -+ /* Prune types that escape, all references to those types -+ will have been removed in the above loops. */ -+ for (unsigned i = 0; i < types.length (); ) -+ { -+ srtype *type = types[i]; -+ if (type->has_escaped ()) -+ { -+ /* All references to this type should have been removed now. */ -+ delete type; -+ types.ordered_remove (i); -+ } -+ else -+ i++; -+ } -+ -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ { -+ fprintf (dump_file, "all types (after pruning):\n"); -+ dump_types (dump_file); -+ fprintf (dump_file, "all functions (after pruning):\n"); -+ dump_functions (dump_file); -+ } -+} -+ -+/* Analyze all of the types. */ -+ -+void -+ipa_struct_reorg::analyze_types (void) -+{ -+ for (unsigned i = 0; i < types.length (); i++) -+ { -+ if (!types[i]->has_escaped ()) -+ types[i]->analyze(); -+ } -+} -+ -+/* Create all new types we want to create. */ -+ -+bool -+ipa_struct_reorg::create_new_types (void) -+{ -+ int newtypes = 0; -+ clear_visited (); -+ for (unsigned i = 0; i < types.length (); i++) -+ newtypes += types[i]->create_new_type (); -+ -+ if (dump_file) -+ { -+ if (newtypes) -+ fprintf (dump_file, "\nNumber of structures to transform is %d\n", newtypes); -+ else -+ fprintf (dump_file, "\nNo structures to transform.\n"); -+ } -+ -+ return newtypes != 0; -+} -+ -+/* Create all the new decls except for the new arguments -+ which create_new_functions would have created. */ -+ -+void -+ipa_struct_reorg::create_new_decls (void) -+{ -+ globals.create_new_decls (); -+ for (unsigned i = 0; i < functions.length (); i++) -+ functions[i]->create_new_decls (); -+} -+ -+/* Create the new arguments for the function corresponding to NODE. */ -+ -+void -+ipa_struct_reorg::create_new_args (cgraph_node *new_node) -+{ -+ tree decl = new_node->decl; -+ vec params = ipa_get_vector_of_formal_parms (decl); -+ ipa_parm_adjustment_vec adjs; -+ adjs.create (params.length ()); -+ for (unsigned i = 0; i < params.length (); i++) -+ { -+ struct ipa_parm_adjustment adj; -+ tree parm = params[i]; -+ memset (&adj, 0, sizeof (adj)); -+ adj.base_index = i; -+ adj.base = parm; -+ srtype *t = find_type (inner_type (TREE_TYPE (parm))); -+ if (!t -+ || t->has_escaped () -+ || !t->has_new_type ()) -+ { -+ adj.op = IPA_PARM_OP_COPY; -+ adjs.safe_push (adj); -+ continue; -+ } -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ { -+ fprintf (dump_file, "Creating a new argument for: "); -+ print_generic_expr (dump_file, params[i]); -+ fprintf (dump_file, " in function: "); -+ print_generic_expr (dump_file, decl); -+ fprintf (dump_file, "\n"); -+ } -+ adj.arg_prefix = "struct_reorg"; -+ adj.op = IPA_PARM_OP_NONE; -+ for (unsigned j = 0; j < max_split && t->newtype[j]; j++) -+ { -+ adj.type = reconstruct_complex_type (TREE_TYPE (parm), t->newtype[j]); -+ adjs.safe_push (adj); -+ } -+ } -+ ipa_modify_formal_parameters (decl, adjs); -+ params.release (); -+ for (unsigned i = 0; i < adjs.length (); i++) -+ { -+ if (adjs[i].op != IPA_PARM_OP_NONE) -+ continue; -+ tree decl = adjs[i].base; -+ srdecl *d = find_decl (decl); -+ if (!d) -+ continue; -+ unsigned j = 0; -+ while (j < max_split && d->newdecl[j]) -+ j++; -+ d->newdecl[j] = adjs[i].new_decl; -+ } -+ adjs.release (); -+ -+ function *fn = DECL_STRUCT_FUNCTION (decl); -+ -+ if (!fn->static_chain_decl) -+ return; -+ srdecl *chain = find_decl (fn->static_chain_decl); -+ if (!chain) -+ return; -+ -+ srtype *type = chain->type; -+ tree orig_var = chain->decl; -+ const char *tname = NULL; -+ if (DECL_NAME (orig_var)) -+ tname = IDENTIFIER_POINTER (DECL_NAME (orig_var)); -+ gcc_assert (!type->newtype[1]); -+ tree new_name = NULL; -+ char *name = NULL; -+ if (tname) -+ { -+ name = concat (tname, ".reorg.0", NULL); -+ new_name = get_identifier (name); -+ free (name); -+ } -+ tree newtype1 = reconstruct_complex_type (TREE_TYPE (orig_var), type->newtype[0]); -+ chain->newdecl[0] = build_decl (DECL_SOURCE_LOCATION (orig_var), -+ PARM_DECL, new_name, newtype1); -+ copy_var_attributes (chain->newdecl[0], orig_var); -+ fn->static_chain_decl = chain->newdecl[0]; -+ -+} -+ -+/* Find the refered DECL in the current function or globals. -+ If this is a global decl, record that as being used -+ in the current function. */ -+ -+srdecl * -+ipa_struct_reorg::find_decl (tree decl) -+{ -+ srdecl *d; -+ d = globals.find_decl (decl); -+ if (d) -+ { -+ /* Record the global usage in the current function. */ -+ if (!done_recording && current_function) -+ { -+ bool add = true; -+ /* No reason to add it to the current function if it is -+ already recorded as such. */ -+ for (unsigned i = 0; i < current_function->globals.length (); i++) -+ { -+ if (current_function->globals[i] == d) -+ { -+ add = false; -+ break; -+ } -+ } -+ if (add) -+ current_function->globals.safe_push (d); -+ } -+ return d; -+ } -+ if (current_function) -+ return current_function->find_decl (decl); -+ return NULL; -+} -+ -+/* Create new function clones for the cases where the arguments -+ need to be changed. */ -+ -+void -+ipa_struct_reorg::create_new_functions (void) -+{ -+ for (unsigned i = 0; i < functions.length (); i++) -+ { -+ srfunction *f = functions[i]; -+ bool anyargchanges = false; -+ cgraph_node *new_node; -+ cgraph_node *node = f->node; -+ int newargs = 0; -+ if (f->old) -+ continue; -+ -+ if (f->args.length () == 0) -+ continue; -+ -+ for (unsigned j = 0; j < f->args.length (); j++) -+ { -+ srdecl *d = f->args[j]; -+ srtype *t = d->type; -+ if (t->has_new_type ()) -+ { -+ newargs += t->newtype[1] != NULL; -+ anyargchanges = true; -+ } -+ } -+ if (!anyargchanges) -+ continue; -+ -+ if (dump_file) -+ { -+ fprintf (dump_file, "Creating a clone of function: "); -+ f->simple_dump (dump_file); -+ fprintf (dump_file, "\n"); -+ } -+ statistics_counter_event (NULL, "Create new function", 1); -+ new_node = node->create_version_clone_with_body (vNULL, NULL, -+ NULL, false, NULL, NULL, -+ "struct_reorg"); -+ new_node->make_local (); -+ f->newnode = new_node; -+ srfunction *n = record_function (new_node); -+ current_function = n; -+ n->old = f; -+ f->newf = n; -+ /* Create New arguments. */ -+ create_new_args (new_node); -+ current_function = NULL; -+ } -+} -+ -+bool -+ipa_struct_reorg::rewrite_lhs_rhs (tree lhs, tree rhs, tree newlhs[max_split], tree newrhs[max_split]) -+{ -+ bool l = rewrite_expr (lhs, newlhs); -+ bool r = rewrite_expr (rhs, newrhs); -+ -+ /* Handle NULL pointer specially. */ -+ if (l && !r && integer_zerop (rhs)) -+ { -+ r = true; -+ for (unsigned i = 0; i < max_split && newlhs[i]; i++) -+ newrhs[i] = fold_convert (TREE_TYPE (newlhs[i]), rhs); -+ } -+ -+ return l || r; -+} -+ -+bool -+ipa_struct_reorg::rewrite_expr (tree expr, tree newexpr[max_split], bool ignore_missing_decl) -+{ -+ tree base; -+ bool indirect; -+ srtype *t; -+ srfield *f; -+ bool realpart, imagpart; -+ bool address; -+ -+ tree newbase[max_split]; -+ memset (newexpr, 0, sizeof(tree[max_split])); -+ -+ if (TREE_CODE (expr) == CONSTRUCTOR) -+ { -+ srtype *t = find_type (TREE_TYPE (expr)); -+ if (!t) -+ return false; -+ gcc_assert (CONSTRUCTOR_NELTS (expr) == 0); -+ if (!t->has_new_type ()) -+ return false; -+ for (unsigned i = 0; i < max_split && t->newtype[i]; i++) -+ newexpr[i] = build_constructor (t->newtype[i], NULL); -+ return true; -+ } -+ -+ if (!get_type_field (expr, base, indirect, t, f, realpart, imagpart, address)) -+ return false; -+ -+ /* If the type is not changed, then just return false. */ -+ if (!t->has_new_type ()) -+ return false; -+ -+ /* NULL pointer handling is "special". */ -+ if (integer_zerop (base)) -+ { -+ gcc_assert (indirect && !address); -+ for (unsigned i = 0; i < max_split && t->newtype[i]; i++) -+ { -+ tree newtype1 = reconstruct_complex_type (TREE_TYPE (base), t->newtype[i]); -+ newbase[i] = fold_convert (newtype1, base); -+ } -+ } -+ else -+ { -+ srdecl *d = find_decl (base); -+ -+ if (!d && dump_file && (dump_flags & TDF_DETAILS)) -+ { -+ fprintf (dump_file, "Can't find decl:\n"); -+ print_generic_expr (dump_file, base); -+ fprintf (dump_file, "\ntype:\n"); -+ t->dump (dump_file); -+ } -+ if (!d && ignore_missing_decl) -+ return true; -+ gcc_assert (d); -+ memcpy (newbase, d->newdecl, sizeof(d->newdecl)); -+ } -+ -+ if (f == NULL) -+ { -+ memcpy (newexpr, newbase, sizeof(newbase)); -+ for (unsigned i = 0; i < max_split && newexpr[i]; i++) -+ { -+ if (address) -+ newexpr[i] = build_fold_addr_expr (newexpr[i]); -+ if (indirect) -+ newexpr[i] = build_simple_mem_ref (newexpr[i]); -+ if (imagpart) -+ newexpr[i] = build1 (IMAGPART_EXPR, TREE_TYPE (TREE_TYPE (newexpr[i])), newexpr[i]); -+ if (realpart) -+ newexpr[i] = build1 (REALPART_EXPR, TREE_TYPE (TREE_TYPE (newexpr[i])), newexpr[i]); -+ } -+ return true; -+ } -+ -+ tree newdecl = newbase[f->clusternum]; -+ for (unsigned i = 0; i < max_split && f->newfield[i]; i++) -+ { -+ tree newbase1 = newdecl; -+ if (address) -+ newbase1 = build_fold_addr_expr (newbase1); -+ if (indirect) -+ newbase1 = build_simple_mem_ref (newbase1); -+ newexpr[i] = build3 (COMPONENT_REF, TREE_TYPE (f->newfield[i]), -+ newbase1, f->newfield[i], NULL_TREE); -+ if (imagpart) -+ newexpr[i] = build1 (IMAGPART_EXPR, TREE_TYPE (TREE_TYPE (newexpr[i])), newexpr[i]); -+ if (realpart) -+ newexpr[i] = build1 (REALPART_EXPR, TREE_TYPE (TREE_TYPE (newexpr[i])), newexpr[i]); -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ { -+ fprintf (dump_file, "cluster: %d. decl = ", (int)f->clusternum); -+ print_generic_expr (dump_file, newbase1); -+ fprintf (dump_file, "\nnewexpr = "); -+ print_generic_expr (dump_file, newexpr[i]); -+ fprintf (dump_file, "\n"); -+ } -+ } -+ return true; -+} -+ -+bool -+ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi) -+{ -+ bool remove = false; -+ if (gimple_clobber_p (stmt)) -+ { -+ tree lhs = gimple_assign_lhs (stmt); -+ tree newlhs[max_split]; -+ if (!rewrite_expr (lhs, newlhs)) -+ return false; -+ for (unsigned i = 0; i < max_split && newlhs[i]; i++) -+ { -+ tree clobber = build_constructor (TREE_TYPE (newlhs[i]), NULL); -+ TREE_THIS_VOLATILE (clobber) = true; -+ gimple *newstmt = gimple_build_assign (newlhs[i], clobber); -+ gsi_insert_before (gsi, newstmt, GSI_SAME_STMT); -+ remove = true; -+ } -+ return remove; -+ } -+ -+ if (gimple_assign_rhs_code (stmt) == EQ_EXPR -+ || gimple_assign_rhs_code (stmt) == NE_EXPR) -+ { -+ tree rhs1 = gimple_assign_rhs1 (stmt); -+ tree rhs2 = gimple_assign_rhs2 (stmt); -+ tree newrhs1[max_split]; -+ tree newrhs2[max_split]; -+ tree_code rhs_code = gimple_assign_rhs_code (stmt); -+ tree_code code = rhs_code == EQ_EXPR ? BIT_AND_EXPR : BIT_IOR_EXPR; -+ if (!rewrite_lhs_rhs (rhs1, rhs2, newrhs1, newrhs2)) -+ return false; -+ tree newexpr = NULL_TREE; -+ for (unsigned i = 0; i < max_split && newrhs1[i]; i++) -+ { -+ tree expr = gimplify_build2 (gsi, rhs_code, boolean_type_node, newrhs1[i], newrhs2[i]); -+ if (!newexpr) -+ newexpr = expr; -+ else -+ newexpr = gimplify_build2 (gsi, code, boolean_type_node, newexpr, expr); -+ } -+ -+ if (newexpr) -+ { -+ newexpr = fold_convert (TREE_TYPE (gimple_assign_lhs (stmt)), newexpr); -+ gimple_assign_set_rhs_from_tree (gsi, newexpr); -+ update_stmt (stmt); -+ } -+ return false; -+ } -+ -+ if (gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR) -+ { -+ tree lhs = gimple_assign_lhs (stmt); -+ tree rhs1 = gimple_assign_rhs1 (stmt); -+ tree rhs2 = gimple_assign_rhs2 (stmt); -+ tree newlhs[max_split]; -+ tree newrhs[max_split]; -+ -+ if (!rewrite_lhs_rhs (lhs, rhs1, newlhs, newrhs)) -+ return false; -+ tree size = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (lhs))); -+ tree num; -+ /* Check if rhs2 is a multiplication of the size of the type. */ -+ if (!is_result_of_mult (rhs2, &num, size)) -+ internal_error ("the rhs of pointer was not a multiplicate and it slipped through."); -+ -+ num = gimplify_build1 (gsi, NOP_EXPR, sizetype, num); -+ for (unsigned i = 0; i < max_split && newlhs[i]; i++) -+ { -+ gimple *new_stmt; -+ -+ tree newsize = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (newlhs[i]))); -+ newsize = gimplify_build2 (gsi, MULT_EXPR, sizetype, num, newsize); -+ new_stmt = gimple_build_assign (newlhs[i], POINTER_PLUS_EXPR, newrhs[i], newsize); -+ gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); -+ remove = true; -+ } -+ return remove; -+ } -+ if (gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS) -+ { -+ tree lhs = gimple_assign_lhs (stmt); -+ tree rhs = gimple_assign_rhs1 (stmt); -+ -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ { -+ fprintf (dump_file, "rewriting stamtenet:\n"); -+ print_gimple_stmt (dump_file, stmt, 0); -+ fprintf (dump_file, "\n"); -+ } -+ tree newlhs[max_split]; -+ tree newrhs[max_split]; -+ if (!rewrite_lhs_rhs (lhs, rhs, newlhs, newrhs)) -+ { -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ fprintf (dump_file, "\nDid nothing to statement.\n"); -+ return false; -+ } -+ -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ fprintf (dump_file, "\nreplaced with:\n"); -+ for (unsigned i = 0; i < max_split && (newlhs[i] || newrhs[i]); i++) -+ { -+ gimple *newstmt = gimple_build_assign (newlhs[i] ? newlhs[i] : lhs, newrhs[i] ? newrhs[i] : rhs); -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ { -+ print_gimple_stmt (dump_file, newstmt, 0); -+ fprintf (dump_file, "\n"); -+ } -+ gsi_insert_before (gsi, newstmt, GSI_SAME_STMT); -+ remove = true; -+ } -+ return remove; -+ } -+ -+ return remove; -+} -+ -+/* Rewrite function call statement STMT. Return TRUE if the statement -+ is to be removed. */ -+ -+bool -+ipa_struct_reorg::rewrite_call (gcall *stmt, gimple_stmt_iterator *gsi) -+{ -+ /* Handled allocation calls are handled seperately from normal -+ function calls. */ -+ if (handled_allocation_stmt (stmt)) -+ { -+ tree lhs = gimple_call_lhs (stmt); -+ tree newrhs1[max_split]; -+ srdecl *decl = find_decl (lhs); -+ if (!decl || !decl->type) -+ return false; -+ srtype *type = decl->type; -+ tree num = allocate_size (type, stmt); -+ gcc_assert (num); -+ memset (newrhs1, 0, sizeof(newrhs1)); -+ -+ /* The realloc call needs to have its first argument rewritten. */ -+ if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC)) -+ { -+ tree rhs1 = gimple_call_arg (stmt, 0); -+ if (integer_zerop (rhs1)) -+ { -+ for (unsigned i = 0; i < max_split; i++) -+ newrhs1[i] = rhs1; -+ } -+ else if (!rewrite_expr (rhs1, newrhs1)) -+ internal_error ("rewrite failed for realloc"); -+ } -+ -+ /* Go through each new lhs. */ -+ for (unsigned i = 0; i < max_split && decl->newdecl[i]; i++) -+ { -+ tree newsize = TYPE_SIZE_UNIT (type->type); -+ gimple *g; -+ /* Every allocation except for calloc needs the size multiplied out. */ -+ if (!gimple_call_builtin_p (stmt, BUILT_IN_CALLOC)) -+ newsize = gimplify_build2 (gsi, MULT_EXPR, sizetype, num, newsize); -+ -+ if (gimple_call_builtin_p (stmt, BUILT_IN_MALLOC) -+ || gimple_call_builtin_p (stmt, BUILT_IN_ALLOCA)) -+ g = gimple_build_call (gimple_call_fndecl (stmt), -+ 1, newsize); -+ else if (gimple_call_builtin_p (stmt, BUILT_IN_CALLOC)) -+ g = gimple_build_call (gimple_call_fndecl (stmt), -+ 2, num, newsize); -+ else if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC)) -+ g = gimple_build_call (gimple_call_fndecl (stmt), -+ 2, newrhs1[i], newsize); -+ else -+ gcc_assert (false); -+ gimple_call_set_lhs (g, decl->newdecl[i]); -+ gsi_insert_before (gsi, g, GSI_SAME_STMT); -+ } -+ return true; -+ } -+ -+ /* The function call free needs to be handled special. */ -+ if (gimple_call_builtin_p (stmt, BUILT_IN_FREE)) -+ { -+ tree expr = gimple_call_arg (stmt, 0); -+ tree newexpr[max_split]; -+ if (!rewrite_expr (expr, newexpr)) -+ return false; -+ -+ if (newexpr[1] == NULL) -+ { -+ gimple_call_set_arg (stmt, 0, newexpr[0]); -+ update_stmt (stmt); -+ return false; -+ } -+ -+ for (unsigned i = 0; i < max_split && newexpr[i]; i++) -+ { -+ gimple *g = gimple_build_call (gimple_call_fndecl (stmt), -+ 1, newexpr[i]); -+ gsi_insert_before (gsi, g, GSI_SAME_STMT); -+ } -+ return true; -+ } -+ -+ /* Otherwise, look up the function to see if we have cloned it -+ and rewrite the arguments. */ -+ tree fndecl = gimple_call_fndecl (stmt); -+ -+ /* Indirect calls are already marked as escaping so ignore. */ -+ if (!fndecl) -+ return false; -+ -+ cgraph_node *node = cgraph_node::get (fndecl); -+ gcc_assert (node); -+ srfunction *f = find_function (node); -+ -+ /* Did not find the function or had not cloned it return saying don't -+ change the function call. */ -+ if (!f || !f->newf) -+ return false; -+ -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ { -+ fprintf (dump_file, "Changing arguments for function call :\n"); -+ print_gimple_expr (dump_file, stmt, 0); -+ fprintf (dump_file, "\n"); -+ } -+ -+ /* Move over to the new function. */ -+ f = f->newf; -+ -+ tree chain = gimple_call_chain (stmt); -+ unsigned nargs = gimple_call_num_args (stmt); -+ auto_vec vargs (nargs); -+ -+ if (chain) -+ { -+ tree newchains[max_split]; -+ if (rewrite_expr (chain, newchains)) -+ { -+ /* Chain decl's type cannot be split and but it can change. */ -+ gcc_assert (newchains[1] == NULL); -+ chain = newchains[0]; -+ } -+ } -+ -+ for (unsigned i = 0; i < nargs; i++) -+ vargs.quick_push (gimple_call_arg (stmt, i)); -+ -+ int extraargs = 0; -+ -+ for (unsigned i = 0; i < f->args.length (); i++) -+ { -+ srdecl *d = f->args[i]; -+ if (d->argumentnum == -2) -+ continue; -+ gcc_assert (d->argumentnum != -1); -+ tree arg = vargs[d->argumentnum + extraargs]; -+ tree newargs[max_split]; -+ if (!rewrite_expr (arg, newargs)) -+ continue; -+ -+ /* If this ARG has a replacement handle the replacement. */ -+ for (unsigned j = 0; j < max_split && d->newdecl[j]; j++) -+ { -+ gcc_assert (newargs[j]); -+ /* If this is the first replacement of the arugment, -+ then just replace it. */ -+ if (j == 0) -+ vargs[d->argumentnum + extraargs] = newargs[j]; -+ else -+ { -+ /* More than one replacement, we need to insert into the array. */ -+ extraargs++; -+ vargs.safe_insert(d->argumentnum + extraargs, newargs[j]); -+ } -+ } -+ } -+ -+ gcall *new_stmt; -+ -+ new_stmt = gimple_build_call_vec (f->node->decl, vargs); -+ -+ if (gimple_call_lhs (stmt)) -+ gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt)); -+ -+ gimple_set_vuse (new_stmt, gimple_vuse (stmt)); -+ gimple_set_vdef (new_stmt, gimple_vdef (stmt)); -+ -+ if (gimple_has_location (stmt)) -+ gimple_set_location (new_stmt, gimple_location (stmt)); -+ gimple_call_copy_flags (new_stmt, stmt); -+ gimple_call_set_chain (new_stmt, chain); -+ -+ gimple_set_modified (new_stmt, true); -+ -+ if (gimple_vdef (new_stmt) -+ && TREE_CODE (gimple_vdef (new_stmt)) == SSA_NAME) -+ SSA_NAME_DEF_STMT (gimple_vdef (new_stmt)) = new_stmt; -+ -+ gsi_replace (gsi, new_stmt, false); -+ -+ /* We need to defer cleaning EH info on the new statement to -+ fixup-cfg. We may not have dominator information at this point -+ and thus would end up with unreachable blocks and have no way -+ to communicate that we need to run CFG cleanup then. */ -+ int lp_nr = lookup_stmt_eh_lp (stmt); -+ if (lp_nr != 0) -+ { -+ remove_stmt_from_eh_lp (stmt); -+ add_stmt_to_eh_lp (new_stmt, lp_nr); -+ } -+ -+ -+ return false; -+} -+ -+/* Rewrite the conditional statement STMT. Return TRUE if the -+ old statement is to be removed. */ -+ -+bool -+ipa_struct_reorg::rewrite_cond (gcond *stmt, gimple_stmt_iterator *gsi) -+{ -+ tree_code rhs_code = gimple_cond_code (stmt); -+ -+ /* Handle only equals or not equals conditionals. */ -+ if (rhs_code != EQ_EXPR -+ && rhs_code != NE_EXPR) -+ return false; -+ tree rhs1 = gimple_cond_lhs (stmt); -+ tree rhs2 = gimple_cond_rhs (stmt); -+ -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ { -+ fprintf (dump_file, "COND: Rewriting\n"); -+ print_gimple_stmt (dump_file, stmt, 0); -+ fprintf (dump_file, "\n"); -+ print_generic_expr (dump_file, rhs1); -+ fprintf (dump_file, "\n"); -+ print_generic_expr (dump_file, rhs2); -+ fprintf (dump_file, "\n"); -+ } -+ -+ tree newrhs1[max_split]; -+ tree newrhs2[max_split]; -+ tree_code code = rhs_code == EQ_EXPR ? BIT_AND_EXPR : BIT_IOR_EXPR; -+ if (!rewrite_lhs_rhs (rhs1, rhs2, newrhs1, newrhs2)) -+ { -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ fprintf (dump_file, "\nDid nothing to statement.\n"); -+ return false; -+ } -+ -+ tree newexpr = NULL_TREE; -+ for (unsigned i = 0; i < max_split && newrhs1[i]; i++) -+ { -+ tree expr = gimplify_build2 (gsi, rhs_code, boolean_type_node, newrhs1[i], newrhs2[i]); -+ if (!newexpr) -+ newexpr = expr; -+ else -+ newexpr = gimplify_build2 (gsi, code, boolean_type_node, newexpr, expr); -+ } -+ -+ if (newexpr) -+ { -+ gimple_cond_set_lhs (stmt, newexpr); -+ gimple_cond_set_rhs (stmt, boolean_true_node); -+ update_stmt (stmt); -+ } -+ return false; -+} -+ -+/* Rewrite debug statments if possible. Return TRUE if the statement -+ should be removed. */ -+ -+bool -+ipa_struct_reorg::rewrite_debug (gimple *stmt, gimple_stmt_iterator *) -+{ -+ bool remove = false; -+ if (gimple_debug_bind_p (stmt)) -+ { -+ tree var = gimple_debug_bind_get_var (stmt); -+ tree newvar[max_split]; -+ if (rewrite_expr (var, newvar, true)) -+ remove = true; -+ if (gimple_debug_bind_has_value_p (stmt)) -+ { -+ var = gimple_debug_bind_get_value (stmt); -+ if (TREE_CODE (var) == POINTER_PLUS_EXPR) -+ var = TREE_OPERAND (var, 0); -+ if (rewrite_expr (var, newvar, true)) -+ remove = true; -+ } -+ } -+ else if (gimple_debug_source_bind_p (stmt)) -+ { -+ tree var = gimple_debug_source_bind_get_var (stmt); -+ tree newvar[max_split]; -+ if (rewrite_expr (var, newvar, true)) -+ remove = true; -+ var = gimple_debug_source_bind_get_value (stmt); -+ if (TREE_CODE (var) == POINTER_PLUS_EXPR) -+ var = TREE_OPERAND (var, 0); -+ if (rewrite_expr (var, newvar, true)) -+ remove = true; -+ } -+ -+ return remove; -+} -+ -+/* Rewrite PHI nodes, return true if the PHI was replaced. */ -+ -+bool -+ipa_struct_reorg::rewrite_phi (gphi *phi) -+{ -+ tree newlhs[max_split]; -+ gphi *newphi[max_split]; -+ tree result = gimple_phi_result (phi); -+ gphi_iterator gsi; -+ -+ memset(newphi, 0, sizeof(newphi)); -+ -+ if (!rewrite_expr (result, newlhs)) -+ return false; -+ -+ if (newlhs[0] == NULL) -+ return false; -+ -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ { -+ fprintf (dump_file, "\nrewriting PHI:"); -+ print_gimple_stmt (dump_file, phi, 0); -+ } -+ -+ for (unsigned i = 0; i < max_split && newlhs[i]; i++) -+ newphi[i] = create_phi_node (newlhs[i], gimple_bb (phi)); -+ -+ for(unsigned i = 0; i < gimple_phi_num_args (phi); i++) -+ { -+ tree newrhs[max_split]; -+ phi_arg_d rhs = *gimple_phi_arg (phi, i); -+ rewrite_expr (rhs.def, newrhs); -+ for (unsigned j = 0; j < max_split && newlhs[j]; j++) -+ { -+ SET_PHI_ARG_DEF (newphi[j], i, newrhs[j]); -+ gimple_phi_arg_set_location (newphi[j], i, rhs.locus); -+ update_stmt (newphi[j]); -+ } -+ } -+ -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ { -+ fprintf (dump_file, "\ninto\n:"); -+ for (unsigned i = 0; i < max_split && newlhs[i]; i++) -+ { -+ print_gimple_stmt (dump_file, newphi[i], 0); -+ fprintf (dump_file, "\n"); -+ } -+ } -+ -+ gsi = gsi_for_phi (phi); -+ remove_phi_node (&gsi, false); -+ -+ return true; -+} -+ -+/* Rewrite gimple statement STMT, return true if the STATEMENT -+ is to be removed. */ -+ -+bool -+ipa_struct_reorg::rewrite_stmt (gimple *stmt, gimple_stmt_iterator *gsi) -+{ -+ switch (gimple_code (stmt)) -+ { -+ case GIMPLE_ASSIGN: -+ return rewrite_assign (as_a (stmt), gsi); -+ case GIMPLE_CALL: -+ return rewrite_call (as_a (stmt), gsi); -+ case GIMPLE_COND: -+ return rewrite_cond (as_a (stmt), gsi); -+ break; -+ case GIMPLE_GOTO: -+ case GIMPLE_SWITCH: -+ break; -+ case GIMPLE_DEBUG: -+ case GIMPLE_ASM: -+ break; -+ default: -+ break; -+ } -+ return false; -+} -+ -+/* Does the function F uses any decl which has changed. */ -+ -+bool -+ipa_struct_reorg::has_rewritten_type (srfunction *f) -+{ -+ for (unsigned i = 0; i < f->decls.length (); i++) -+ { -+ srdecl *d = f->decls[i]; -+ if (d->newdecl[0] != d->decl) -+ return true; -+ } -+ -+ for (unsigned i = 0; i < f->globals.length (); i++) -+ { -+ srdecl *d = f->globals[i]; -+ if (d->newdecl[0] != d->decl) -+ return true; -+ } -+ return false; -+ -+} -+ -+/* Rewrite the functions if needed, return -+ the TODOs requested. */ -+ -+unsigned -+ipa_struct_reorg::rewrite_functions (void) -+{ -+ unsigned retval = 0; -+ -+ -+ /* Create new types, if we did not create any new types, -+ then don't rewrite any accesses. */ -+ if (!create_new_types ()) -+ return 0; -+ -+ if (functions.length ()) -+ { -+ retval = TODO_remove_functions; -+ create_new_functions (); -+ } -+ -+ create_new_decls (); -+ -+ for (unsigned i = 0; i < functions.length (); i++) -+ { -+ srfunction *f = functions[i]; -+ if (f->newnode) -+ continue; -+ -+ /* Function uses no rewriten types so don't cause a rewrite. */ -+ if (!has_rewritten_type (f)) -+ continue; -+ -+ cgraph_node *node = f->node; -+ basic_block bb; -+ -+ push_cfun (DECL_STRUCT_FUNCTION (node->decl)); -+ current_function = f; -+ -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ { -+ fprintf (dump_file, "\nBefore rewrite:\n"); -+ dump_function_to_file (current_function_decl, dump_file, dump_flags | TDF_VOPS); -+ } -+ FOR_EACH_BB_FN (bb, cfun) -+ { -+ for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si); ) -+ { -+ if (rewrite_phi (si.phi ())) -+ si = gsi_start_phis (bb); -+ else -+ gsi_next (&si); -+ } -+ -+ for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si); ) -+ { -+ gimple *stmt = gsi_stmt (si); -+ if (rewrite_stmt (stmt, &si)) -+ gsi_remove (&si, true); -+ else -+ gsi_next (&si); -+ } -+ } -+ -+ /* Debug statements need to happen after all other statements -+ have changed. */ -+ FOR_EACH_BB_FN (bb, cfun) -+ { -+ for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si); ) -+ { -+ gimple *stmt = gsi_stmt (si); -+ if (gimple_code (stmt) == GIMPLE_DEBUG -+ && rewrite_debug (stmt, &si)) -+ gsi_remove (&si, true); -+ else -+ gsi_next (&si); -+ } -+ } -+ -+ /* Release the old SSA_NAMES for old arguments. */ -+ if (f->old) -+ { -+ for (unsigned i = 0; i < f->args.length (); i++) -+ { -+ srdecl *d = f->args[i]; -+ if (d->newdecl[0] != d->decl) -+ { -+ tree ssa_name = ssa_default_def (cfun, d->decl); -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ { -+ fprintf (dump_file, "Found "); -+ print_generic_expr (dump_file, ssa_name); -+ fprintf (dump_file, " to be released.\n"); -+ } -+ release_ssa_name (ssa_name); -+ } -+ } -+ } -+ -+ update_ssa (TODO_update_ssa_only_virtuals); -+ -+ if (flag_tree_pta) -+ compute_may_aliases (); -+ -+ remove_unused_locals (); -+ -+ cgraph_edge::rebuild_edges (); -+ -+ free_dominance_info (CDI_DOMINATORS); -+ -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ { -+ fprintf (dump_file, "\nAfter rewrite:\n"); -+ dump_function_to_file (current_function_decl, dump_file, dump_flags | TDF_VOPS); -+ } -+ -+ pop_cfun (); -+ current_function = NULL; -+ } -+ -+ return retval | TODO_verify_all; -+} -+ -+unsigned int -+ipa_struct_reorg::execute (void) -+{ -+ /* FIXME: If there is a top-level inline-asm, the pass immediately returns. */ -+ if (symtab->first_asm_symbol ()) -+ return 0; -+ record_accesses (); -+ prune_escaped_types (); -+ analyze_types (); -+ -+ return rewrite_functions (); -+} -+ -+const pass_data pass_data_ipa_struct_reorg = -+{ -+ SIMPLE_IPA_PASS, /* type */ -+ "struct_reorg", /* name */ -+ OPTGROUP_NONE, /* optinfo_flags */ -+ TV_IPA_STRUCT_REORG, /* tv_id */ -+ 0, /* properties_required */ -+ 0, /* properties_provided */ -+ 0, /* properties_destroyed */ -+ 0, /* todo_flags_start */ -+ 0, /* todo_flags_finish */ -+}; -+ -+class pass_ipa_struct_reorg : public simple_ipa_opt_pass -+{ -+public: -+ pass_ipa_struct_reorg (gcc::context *ctxt) -+ : simple_ipa_opt_pass (pass_data_ipa_struct_reorg, ctxt) -+ {} -+ -+ /* opt_pass methods: */ -+ virtual bool gate (function *); -+ virtual unsigned int execute (function *) { return ipa_struct_reorg ().execute(); } -+ -+}; // class pass_ipa_struct_reorg -+ -+bool -+pass_ipa_struct_reorg::gate (function *) -+{ -+ return (optimize -+ && flag_ipa_struct_reorg -+ /* Don't bother doing anything if the program has errors. */ -+ && !seen_error ()); -+} -+ -+} // anon namespace -+ -+simple_ipa_opt_pass * -+make_pass_ipa_struct_reorg (gcc::context *ctxt) -+{ -+ return new pass_ipa_struct_reorg (ctxt); -+} -diff -Nurp a/gcc/ipa-struct-reorg/ipa-struct-reorg.h b/gcc/ipa-struct-reorg/ipa-struct-reorg.h ---- a/gcc/ipa-struct-reorg/ipa-struct-reorg.h 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.h 2020-06-16 22:56:07.732000000 -0400 -@@ -0,0 +1,235 @@ -+/* Struct-reorg optimizations. -+ Copyright (C) 2016-2017 Free Software Foundation, Inc. -+ Contributed by Andrew Pinski -+ -+This file is part of GCC. -+ -+GCC is free software; you can redistribute it and/or modify it under -+the terms of the GNU General Public License as published by the Free -+Software Foundation; either version 3, or (at your option) any later -+version. -+ -+GCC is distributed in the hope that it will be useful, but WITHOUT ANY -+WARRANTY; without even the implied warranty of MERCHANTABILITY or -+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+for more details. -+ -+You should have received a copy of the GNU General Public License -+along with GCC; see the file COPYING3. If not see -+. */ -+ -+#ifndef IPA_STRUCT_REORG_H -+#define IPA_STRUCT_REORG_H -+ -+namespace struct_reorg { -+ -+const int max_split = 2; -+ -+template -+struct auto_vec_del : auto_vec -+{ -+ ~auto_vec_del(); -+}; -+ -+template -+auto_vec_del::~auto_vec_del(void) -+{ -+ unsigned i; -+ T *t; -+ FOR_EACH_VEC_ELT (*this, i, t) -+ { -+ delete t; -+ } -+} -+ -+enum escape_type -+{ -+ does_not_escape, -+#define DEF_ESCAPE(ENUM, TEXT) ENUM, -+#include "escapes.def" -+ escape_max_escape -+}; -+ -+const char *escape_type_string[escape_max_escape - 1] = -+{ -+#define DEF_ESCAPE(ENUM, TEXT) TEXT, -+#include "escapes.def" -+}; -+ -+struct srfield; -+struct srtype; -+struct sraccess; -+struct srdecl; -+struct srfunction; -+ -+struct srfunction -+{ -+ cgraph_node *node; -+ auto_vec args; -+ auto_vec globals; -+ auto_vec_del decls; -+ srdecl *record_decl (srtype *, tree, int arg); -+ -+ srfunction *old; -+ cgraph_node *newnode; -+ srfunction *newf; -+ -+ // Constructors -+ srfunction (cgraph_node *n); -+ -+ // Methods -+ void add_arg (srdecl *arg); -+ void dump (FILE *file); -+ void simple_dump (FILE *file); -+ -+ bool check_args (void); -+ void create_new_decls (void); -+ srdecl *find_decl (tree); -+}; -+ -+struct srglobal : private srfunction -+{ -+ srglobal () -+ : srfunction (NULL) -+ { -+ } -+ -+ using srfunction::dump; -+ using srfunction::create_new_decls; -+ using srfunction::find_decl; -+ using srfunction::record_decl; -+ using srfunction::decls; -+}; -+ -+struct srtype -+{ -+ tree type; -+ auto_vec_del fields; -+ -+ // array of fields that use this type. -+ auto_vec field_sites; -+ -+ // array of functions which use directly the type -+ auto_vec functions; -+ -+ auto_vec_del accesses; -+ bool chain_type; -+ -+private: -+ escape_type escapes; -+public: -+ -+ tree newtype[max_split]; -+ bool visited; -+ -+ // Constructors -+ srtype(tree type); -+ -+ // Methods -+ void dump (FILE *file); -+ void simple_dump (FILE *file); -+ void add_function (srfunction *); -+ void add_access (sraccess *a) -+ { -+ accesses.safe_push (a); -+ } -+ void add_field_site (srfield *); -+ -+ srfield *find_field (unsigned HOST_WIDE_INT offset); -+ -+ bool create_new_type (void); -+ void analyze (void); -+ void mark_escape (escape_type, gimple *stmt); -+ bool has_escaped (void) -+ { -+ return escapes != does_not_escape; -+ } -+ const char *escape_reason (void) -+ { -+ if (!has_escaped()) -+ return NULL; -+ return escape_type_string[escapes-1]; -+ } -+ bool escaped_rescusive (void) -+ { -+ return escapes == escape_rescusive_type; -+ } -+ bool has_new_type (void) -+ { -+ return newtype[0] && newtype[0] != type; -+ } -+}; -+ -+struct srfield -+{ -+ unsigned HOST_WIDE_INT offset; -+ tree fieldtype; -+ tree fielddecl; -+ srtype *base; -+ srtype *type; -+ -+ unsigned clusternum; -+ -+ tree newfield[max_split]; -+ -+ // Constructors -+ srfield (tree field, srtype *base); -+ -+ // Methods -+ void dump (FILE *file); -+ void simple_dump (FILE *file); -+ -+ void create_new_fields (tree newtype[max_split], -+ tree newfields[max_split], -+ tree newlast[max_split]); -+}; -+ -+struct sraccess -+{ -+ gimple *stmt; -+ cgraph_node *node; -+ -+ srtype *type; -+ // NULL field means the whole type is accessed -+ srfield *field; -+ -+ // constructors -+ sraccess (gimple *s, cgraph_node *n, srtype *t, srfield *f = NULL) -+ : stmt (s), -+ node (n), -+ type (t), -+ field (f) -+ {} -+ -+ // Methods -+ void dump (FILE *file); -+}; -+ -+struct srdecl -+{ -+ srtype *type; -+ tree decl; -+ tree func; -+ /* -1 : not an argument -+ -2 : static chain */ -+ int argumentnum; -+ -+ bool visited; -+ -+ tree newdecl[max_split]; -+ -+ // Constructors -+ srdecl (srtype *type, tree decl, int argumentnum = -1); -+ -+ // Methods -+ void dump (FILE *file); -+ bool has_new_decl (void) -+ { -+ return newdecl[0] && newdecl[0] != decl; -+ } -+}; -+ -+ -+} // namespace struct_reorg -+ -+#endif -diff -Nurp a/gcc/Makefile.in b/gcc/Makefile.in ---- a/gcc/Makefile.in 2020-03-12 07:07:20.000000000 -0400 -+++ b/gcc/Makefile.in 2020-06-16 22:56:07.732000000 -0400 -@@ -1367,6 +1367,7 @@ OBJS = \ - incpath.o \ - init-regs.o \ - internal-fn.o \ -+ ipa-struct-reorg/ipa-struct-reorg.o \ - ipa-cp.o \ - ipa-devirt.o \ - ipa-fnsummary.o \ -diff -Nurp a/gcc/params.def b/gcc/params.def ---- a/gcc/params.def 2020-03-12 07:07:21.000000000 -0400 -+++ b/gcc/params.def 2020-06-16 22:56:07.732000000 -0400 -@@ -42,6 +42,16 @@ along with GCC; see the file COPYING3. - - Be sure to add an entry to invoke.texi summarizing the parameter. */ - -+/* The threshold ratio between current and hottest structure counts. -+ We say that if the ratio of the current structure count, -+ calculated by profiling, to the hottest structure count -+ in the program is less than this parameter, then structure -+ reorganization is not applied. The default is 10%. */ -+DEFPARAM (PARAM_STRUCT_REORG_COLD_STRUCT_RATIO, -+ "struct-reorg-cold-struct-ratio", -+ "The threshold ratio between current and hottest structure counts", -+ 10, 0, 100) -+ - /* When branch is predicted to be taken with probability lower than this - threshold (in percent), then it is considered well predictable. */ - DEFPARAM (PARAM_PREDICTABLE_BRANCH_OUTCOME, -diff -Nurp a/gcc/params.h b/gcc/params.h ---- a/gcc/params.h 2020-03-12 07:07:21.000000000 -0400 -+++ b/gcc/params.h 2020-06-16 22:56:07.732000000 -0400 -@@ -130,6 +130,8 @@ extern int default_param_value (compiler - extern void init_param_values (int *params); - - /* Macros for the various parameters. */ -+#define STRUCT_REORG_COLD_STRUCT_RATIO \ -+ PARAM_VALUE (PARAM_STRUCT_REORG_COLD_STRUCT_RATIO) - #define MAX_INLINE_INSNS_SINGLE \ - PARAM_VALUE (PARAM_MAX_INLINE_INSNS_SINGLE) - #define MAX_INLINE_INSNS \ -diff -Nurp a/gcc/passes.def b/gcc/passes.def ---- a/gcc/passes.def 2020-03-12 07:07:21.000000000 -0400 -+++ b/gcc/passes.def 2020-06-16 22:56:07.732000000 -0400 -@@ -169,6 +169,8 @@ along with GCC; see the file COPYING3. - INSERT_PASSES_AFTER (all_late_ipa_passes) - NEXT_PASS (pass_materialize_all_clones); - NEXT_PASS (pass_ipa_pta); -+ /* FIXME: this should a normal IP pass */ -+ NEXT_PASS (pass_ipa_struct_reorg); - NEXT_PASS (pass_omp_simd_clone); - TERMINATE_PASS_LIST (all_late_ipa_passes) - -diff -Nurp a/gcc/testsuite/gcc.c-torture/compile/20170404-1.c b/gcc/testsuite/gcc.c-torture/compile/20170404-1.c ---- a/gcc/testsuite/gcc.c-torture/compile/20170404-1.c 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/gcc.c-torture/compile/20170404-1.c 2020-06-16 22:56:07.732000000 -0400 -@@ -0,0 +1,19 @@ -+struct a -+{ -+ int t, t1; -+}; -+ -+static struct a *b; -+ -+void *xmalloc(int); -+ -+ -+void f(void) -+{ -+ b = xmalloc (sizeof(*b)); -+} -+ -+int g(void) -+{ -+ return b->t; -+} -diff -Nurp a/gcc/testsuite/gcc.c-torture/compile/nested-3.c b/gcc/testsuite/gcc.c-torture/compile/nested-3.c ---- a/gcc/testsuite/gcc.c-torture/compile/nested-3.c 2020-03-12 07:07:22.000000000 -0400 -+++ b/gcc/testsuite/gcc.c-torture/compile/nested-3.c 2020-06-16 22:56:07.736000000 -0400 -@@ -1,3 +1,4 @@ -+/* This used to crash Struct reorg. */ - struct a - { - int t; -diff -Nurp a/gcc/testsuite/gcc.c-torture/compile/struct-reorg-1.c b/gcc/testsuite/gcc.c-torture/compile/struct-reorg-1.c ---- a/gcc/testsuite/gcc.c-torture/compile/struct-reorg-1.c 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/gcc.c-torture/compile/struct-reorg-1.c 2020-06-16 22:56:07.736000000 -0400 -@@ -0,0 +1,18 @@ -+#include -+typedef struct { -+ long laststart_offset; -+ unsigned regnum; -+} compile_stack_elt_t; -+typedef struct { -+ compile_stack_elt_t *stack; -+ unsigned size; -+} compile_stack_type; -+void f (const char *p, const char *pend, int c) -+{ -+ compile_stack_type compile_stack; -+ while (p != pend) -+ if (c) -+ compile_stack.stack = realloc (compile_stack.stack, -+ (compile_stack.size << 1) -+ * sizeof (compile_stack_elt_t)); -+} -diff -Nurp a/gcc/testsuite/gcc.dg/pr33136-4.c b/gcc/testsuite/gcc.dg/pr33136-4.c ---- a/gcc/testsuite/gcc.dg/pr33136-4.c 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/gcc.dg/pr33136-4.c 2020-06-16 22:56:07.736000000 -0400 -@@ -0,0 +1,59 @@ -+/* PR tree-optimization/33136 */ -+/* { dg-do run } */ -+/* { dg-options "-O2" } */ -+ -+extern void abort (void); -+ -+struct S -+{ -+ int b; -+ int *c; -+}; -+static int d, e; -+ -+static struct S s; -+ -+static int * -+__attribute__((noinline, const)) -+foo (void) -+{ -+ return &s.b; -+} -+ -+int * -+__attribute__((noinline)) -+bar (int **f) -+{ -+ s.c = &d; -+ *f = &e; -+ /* As nothing ever takes the address of any int * field in struct S, -+ the write to *f can't alias with the s.c field. */ -+ return s.c; -+} -+ -+int -+__attribute__((noinline)) -+baz (int *x) -+{ -+ s.b = 1; -+ *x = 4; -+ /* Function foo takes address of an int field in struct S, -+ so *x can alias with the s.b field (and it does in this testcase). */ -+ return s.b; -+} -+ -+int -+__attribute__((noinline)) -+t (void) -+{ -+ int *f = (int *) 0; -+ return 10 * (bar (&f) != &d) + baz (foo ()); -+} -+ -+int -+main (void) -+{ -+ if (t () != 4) -+ abort (); -+ return 0; -+} -diff -Nurp a/gcc/testsuite/gcc.dg/struct/struct-reorg.exp b/gcc/testsuite/gcc.dg/struct/struct-reorg.exp ---- a/gcc/testsuite/gcc.dg/struct/struct-reorg.exp 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/gcc.dg/struct/struct-reorg.exp 2020-06-16 22:56:07.736000000 -0400 -@@ -0,0 +1,87 @@ -+# Copyright (C) 2007, 2008, 2009, 2010 -+# Free Software Foundation, Inc. -+ -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 3 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with this program; see the file COPYING3. If not see -+# . -+ -+# Test the functionality of programs compiled with profile-directed structure -+# rearrangement using -fprofile-generate followed by -fprofile-use. -+ -+load_lib gcc-dg.exp -+load_lib target-supports.exp -+ -+# Initialize `dg'. -+dg-init -+torture-init -+ -+set STRUCT_REORG_TORTURE_OPTIONS [list \ -+ { -O1 } \ -+ { -O1 -g } \ -+ { -O2 } \ -+ { -O2 -g } \ -+ { -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions } \ -+ { -O3 -g } \ -+ { -Os } ] -+ -+ -+set-torture-options $STRUCT_REORG_TORTURE_OPTIONS {{}} $LTO_TORTURE_OPTIONS -+ -+gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/wo_prof_*.c]] "" "-fipa-struct-reorg -fdump-ipa-all -fwhole-program" -+ -+torture-finish -+dg-final -+ -+# Some targets don't support tree profiling. -+if { ![check_profiling_available ""] } { -+ return -+} -+ -+# The procedures in profopt.exp need these parameters. -+set tool gcc -+set prof_ext "gcda" -+ -+# Override the list defined in profopt.exp. -+set PROFOPT_OPTIONS [list {}] -+ -+if $tracelevel then { -+ strace $tracelevel -+} -+ -+# Load support procs. -+load_lib profopt.exp -+ -+# These are globals used by profopt-execute. The first is options -+# needed to generate profile data, the second is options to use the -+# profile data. -+set common "-O3 -fwhole-program" -+set profile_option [concat $common " -fprofile-generate"] -+set feedback_option [concat $common " -fprofile-use -fipa-struct-reorg -fdump-ipa-all"] -+ -+foreach src [lsort [glob -nocomplain $srcdir/$subdir/w_prof_*.c]] { -+ # If we're only testing specific files and this isn't one of them, skip it. -+ if ![runtest_file_p $runtests $src] then { -+ continue -+ } -+ profopt-execute $src -+} -+ -+set feedback_option [concat $feedback_option " --param struct-reorg-cold-struct-ratio=30"] -+ -+foreach src [lsort [glob -nocomplain $srcdir/$subdir/w_ratio_*.c]] { -+ # If we're only testing specific files and this isn't one of them, skip it. -+ if ![runtest_file_p $runtests $src] then { -+ continue -+ } -+ profopt-execute $src -+} -diff -Nurp a/gcc/testsuite/gcc.dg/struct/wo_prof_array_field.c b/gcc/testsuite/gcc.dg/struct/wo_prof_array_field.c ---- a/gcc/testsuite/gcc.dg/struct/wo_prof_array_field.c 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/gcc.dg/struct/wo_prof_array_field.c 2020-06-16 22:56:07.736000000 -0400 -@@ -0,0 +1,26 @@ -+/* { dg-do compile } */ -+/* { dg-do run } */ -+ -+#include -+typedef struct basic -+{ -+ int a; -+ int b[10]; -+} type_struct; -+ -+type_struct *str1; -+ -+int main() -+{ -+ int i; -+ -+ str1 = malloc (10 * sizeof (type_struct)); -+ -+ for (i=0; i<=9; i++) -+ str1[i].a = str1[i].b[0]; -+ -+ return 0; -+} -+ -+/*--------------------------------------------------------------------------*/ -+/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/struct/wo_prof_array_through_pointer.c b/gcc/testsuite/gcc.dg/struct/wo_prof_array_through_pointer.c ---- a/gcc/testsuite/gcc.dg/struct/wo_prof_array_through_pointer.c 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/gcc.dg/struct/wo_prof_array_through_pointer.c 2020-06-16 22:56:07.736000000 -0400 -@@ -0,0 +1,38 @@ -+/* { dg-do compile } */ -+/* { dg-do run } */ -+ -+#include -+typedef struct -+{ -+ int a; -+ float b; -+}str_t; -+ -+#ifdef STACK_SIZE -+#if STACK_SIZE > 8000 -+#define N 1000 -+#else -+#define N (STACK_SIZE/8) -+#endif -+#else -+#define N 1000 -+#endif -+ -+int -+main () -+{ -+ int i; -+ str_t A[N]; -+ str_t *p = A; -+ -+ for (i = 0; i < N; i++) -+ p[i].a = 0; -+ -+ for (i = 0; i < N; i++) -+ if (p[i].a != 0) -+ abort (); -+ -+ return 0; -+} -+ -+/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/struct/wo_prof_double_malloc.c b/gcc/testsuite/gcc.dg/struct/wo_prof_double_malloc.c ---- a/gcc/testsuite/gcc.dg/struct/wo_prof_double_malloc.c 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/gcc.dg/struct/wo_prof_double_malloc.c 2020-06-16 22:56:07.736000000 -0400 -@@ -0,0 +1,29 @@ -+/* { dg-do compile } */ -+/* { dg-do run } */ -+ -+#include -+ -+typedef struct test_struct -+{ -+ int a; -+ int b; -+} type_struct; -+ -+typedef type_struct **struct_pointer2; -+ -+struct_pointer2 str1; -+ -+int main() -+{ -+ int i, j; -+ -+ str1 = malloc (2 * sizeof (type_struct *)); -+ -+ for (i = 0; i <= 1; i++) -+ str1[i] = malloc (2 * sizeof (type_struct)); -+ -+ return 0; -+} -+ -+/*--------------------------------------------------------------------------*/ -+/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/struct/wo_prof_empty_str.c b/gcc/testsuite/gcc.dg/struct/wo_prof_empty_str.c ---- a/gcc/testsuite/gcc.dg/struct/wo_prof_empty_str.c 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/gcc.dg/struct/wo_prof_empty_str.c 2020-06-16 22:56:07.736000000 -0400 -@@ -0,0 +1,44 @@ -+/* { dg-do run } */ -+ -+#include -+ -+struct S { int a; struct V *b; }; -+typedef struct { int c; } T; -+typedef struct { int d; int e; } U; -+ -+void * -+fn (void *x) -+{ -+ return x; -+} -+ -+int -+foo (struct S *s) -+{ -+ T x; -+ -+ T y = *(T *)fn (&x); -+ return y.c; -+} -+ -+int -+bar (struct S *s) -+{ -+ U x; -+ -+ U y = *(U *)fn (&x); -+ return y.d + s->a; -+} -+ -+int -+main () -+{ -+ struct S s; -+ -+ foo(&s) + bar (&s); -+ -+ return 0; -+} -+ -+/*--------------------------------------------------------------------------*/ -+/* { dg-final { scan-ipa-dump "No structures to transform" "struct_reorg" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_arg_to_local.c b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_arg_to_local.c ---- a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_arg_to_local.c 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_arg_to_local.c 2020-06-16 22:56:07.736000000 -0400 -@@ -0,0 +1,44 @@ -+/* { dg-do run } */ -+ -+#include -+struct str -+{ -+ int a; -+ float b; -+}; -+ -+#ifdef STACK_SIZE -+#if STACK_SIZE > 8000 -+#define N 1000 -+#else -+#define N (STACK_SIZE/8) -+#endif -+#else -+#define N 1000 -+#endif -+ -+int -+foo (struct str * p_str) -+{ -+ static int sum = 0; -+ -+ sum = sum + p_str->a; -+ return sum; -+} -+ -+int -+main () -+{ -+ int i, sum; -+ struct str * p = malloc (N * sizeof (struct str)); -+ if (p == NULL) -+ return 0; -+ for (i = 0; i < N; i++) -+ sum = foo (p+i); -+ -+ return 0; -+} -+ -+/*--------------------------------------------------------------------------*/ -+/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ -+ -diff -Nurp a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_return-1.c b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_return-1.c ---- a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_return-1.c 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_return-1.c 2020-06-16 22:56:07.736000000 -0400 -@@ -0,0 +1,32 @@ -+/* { dg-do run } */ -+ -+#include -+ -+struct A { -+ int d; -+ int d1; -+}; -+ -+struct A a; -+ -+struct A *foo () __attribute__((noinline)); -+struct A *foo () -+{ -+ a.d = 5; -+ return &a; -+} -+ -+int -+main () -+{ -+ a.d = 0; -+ foo (); -+ -+ if (a.d != 5) -+ abort (); -+ -+ return 0; -+} -+ -+/*--------------------------------------------------------------------------*/ -+/* { dg-final { scan-ipa-dump "has escaped. .Type escapes via a return" "struct_reorg" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_return.c b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_return.c ---- a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_return.c 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_return.c 2020-06-16 22:56:07.736000000 -0400 -@@ -0,0 +1,31 @@ -+/* { dg-do run } */ -+ -+#include -+ -+struct A { -+ int d; -+}; -+ -+struct A a; -+ -+struct A foo () __attribute__((noinline)); -+struct A foo () -+{ -+ a.d = 5; -+ return a; -+} -+ -+int -+main () -+{ -+ a.d = 0; -+ foo (); -+ -+ if (a.d != 5) -+ abort (); -+ -+ return 0; -+} -+ -+/*--------------------------------------------------------------------------*/ -+/* { dg-final { scan-ipa-dump "has escaped: \"Type escapes via a return" "struct_reorg" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_str_init.c b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_str_init.c ---- a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_str_init.c 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_str_init.c 2020-06-16 22:56:07.736000000 -0400 -@@ -0,0 +1,31 @@ -+/* { dg-do compile } */ -+/* { dg-do run } */ -+ -+#include -+typedef struct -+{ -+ int a; -+ int b; -+}str_t; -+ -+#define N 2 -+ -+str_t A[2] = {{1,1},{2,2}}; -+ -+int -+main () -+{ -+ int i; -+ -+ for (i = 0; i < N; i++) -+ A[i].b = A[i].a; -+ -+ for (i = 0; i < N; i++) -+ if (A[i].b != A[i].a) -+ abort (); -+ -+ return 0; -+} -+ -+/*--------------------------------------------------------------------------*/ -+/* { dg-final { scan-ipa-dump "has escaped...Type is used in an array" "struct_reorg" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_array.c b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_array.c ---- a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_array.c 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_array.c 2020-06-16 22:56:07.736000000 -0400 -@@ -0,0 +1,33 @@ -+/* { dg-do compile } */ -+/* { dg-do run } */ -+ -+#include -+typedef struct -+{ -+ int a; -+ float b; -+}str_t; -+ -+#define N 1000 -+ -+typedef struct -+{ -+ str_t A[N]; -+ int c; -+}str_with_substr_t; -+ -+str_with_substr_t a; -+ -+int -+main () -+{ -+ int i; -+ -+ for (i = 0; i < N; i++) -+ a.A[i].b = 0; -+ -+ return 0; -+} -+ -+/*--------------------------------------------------------------------------*/ -+/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_pointer.c b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_pointer.c ---- a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_pointer.c 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_pointer.c 2020-06-16 22:56:07.736000000 -0400 -@@ -0,0 +1,48 @@ -+/* { dg-do compile } */ -+/* { dg-do run } */ -+ -+#include -+typedef struct -+{ -+ int a; -+ float b; -+}str_t; -+ -+#ifdef STACK_SIZE -+#if STACK_SIZE > 16000 -+#define N 1000 -+#else -+#define N (STACK_SIZE/16) -+#endif -+#else -+#define N 1000 -+#endif -+ -+typedef struct -+{ -+ str_t * sub_str; -+ int c; -+}str_with_substr_t; -+ -+int foo; -+ -+int -+main (void) -+{ -+ int i; -+ str_with_substr_t A[N]; -+ str_t a[N]; -+ -+ for (i=0; i < N; i++) -+ A[i].sub_str = &(a[i]); -+ -+ for (i=0; i < N; i++) -+ A[i].sub_str->a = 5; -+ -+ foo = A[56].sub_str->a; -+ -+ return 0; -+} -+ -+/*--------------------------------------------------------------------------*/ -+/* { dg-final { scan-ipa-dump "has escaped...Type is used in an array" "struct_reorg" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_value.c b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_value.c ---- a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_value.c 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_value.c 2020-06-16 22:56:07.736000000 -0400 -@@ -0,0 +1,45 @@ -+/* { dg-do compile } */ -+/* { dg-do run } */ -+ -+#include -+typedef struct -+{ -+ int a; -+ float b; -+}str_t; -+ -+#ifdef STACK_SIZE -+#if STACK_SIZE > 8000 -+#define N 1000 -+#else -+#define N (STACK_SIZE/8) -+#endif -+#else -+#define N 1000 -+#endif -+ -+ -+typedef struct -+{ -+ str_t sub_str; -+ int c; -+}str_with_substr_t; -+ -+int -+main () -+{ -+ int i; -+ str_with_substr_t A[N]; -+ -+ for (i = 0; i < N; i++) -+ A[i].sub_str.a = 5; -+ -+ for (i = 0; i < N; i++) -+ if (A[i].sub_str.a != 5) -+ abort (); -+ -+ return 0; -+} -+ -+/*--------------------------------------------------------------------------*/ -+/* { dg-final { scan-ipa-dump "has escaped...Type is used in an array" "struct_reorg" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/struct/wo_prof_global_array.c b/gcc/testsuite/gcc.dg/struct/wo_prof_global_array.c ---- a/gcc/testsuite/gcc.dg/struct/wo_prof_global_array.c 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/gcc.dg/struct/wo_prof_global_array.c 2020-06-16 22:56:07.736000000 -0400 -@@ -0,0 +1,32 @@ -+/* { dg-do compile } */ -+/* { dg-do run } */ -+ -+#include -+typedef struct -+{ -+ int a; -+ float b; -+}str_t; -+ -+#define N 1000 -+str_t A[N]; -+ -+int -+main () -+{ -+ int i; -+ -+ for (i = 0; i < N; i++) -+ { -+ A[i].a = 0; -+ } -+ -+ for (i = 0; i < N; i++) -+ if (A[i].a != 0) -+ abort (); -+ -+ return 0; -+} -+ -+/*--------------------------------------------------------------------------*/ -+/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/struct/wo_prof_global_var.c b/gcc/testsuite/gcc.dg/struct/wo_prof_global_var.c ---- a/gcc/testsuite/gcc.dg/struct/wo_prof_global_var.c 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/gcc.dg/struct/wo_prof_global_var.c 2020-06-16 22:56:07.736000000 -0400 -@@ -0,0 +1,45 @@ -+/* { dg-do compile } */ -+/* { dg-do run } */ -+ -+#include -+typedef struct -+{ -+ int a; -+ float b; -+}str_t; -+ -+#ifdef STACK_SIZE -+#if STACK_SIZE > 8000 -+#define N 1000 -+#else -+#define N (STACK_SIZE/8) -+#endif -+#else -+#define N 1000 -+#endif -+ -+str_t *p; -+ -+int -+main () -+{ -+ int i, sum; -+ -+ p = malloc (N * sizeof (str_t)); -+ if (p == NULL) -+ return 0; -+ for (i = 0; i < N; i++) -+ p[i].b = i; -+ -+ for (i = 0; i < N; i++) -+ p[i].b = p[i].a + 1; -+ -+ for (i = 0; i < N; i++) -+ if (p[i].b != p[i].a + 1) -+ abort (); -+ -+ return 0; -+} -+ -+/*--------------------------------------------------------------------------*/ -+/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/struct/wo_prof_local_array.c b/gcc/testsuite/gcc.dg/struct/wo_prof_local_array.c ---- a/gcc/testsuite/gcc.dg/struct/wo_prof_local_array.c 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/gcc.dg/struct/wo_prof_local_array.c 2020-06-16 22:56:07.736000000 -0400 -@@ -0,0 +1,40 @@ -+/* { dg-do compile } */ -+/* { dg-do run } */ -+ -+#include -+typedef struct -+{ -+ int a; -+ float b; -+}str_t; -+ -+#ifdef STACK_SIZE -+#if STACK_SIZE > 8000 -+#define N 1000 -+#else -+#define N (STACK_SIZE/8) -+#endif -+#else -+#define N 1000 -+#endif -+ -+int -+main () -+{ -+ int i; -+ str_t A[N]; -+ -+ for (i = 0; i < N; i++) -+ { -+ A[i].a = 0; -+ } -+ -+ for (i = 0; i < N; i++) -+ if (A[i].a != 0) -+ abort (); -+ -+ return 0; -+} -+ -+/*--------------------------------------------------------------------------*/ -+/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/struct/wo_prof_local_var.c b/gcc/testsuite/gcc.dg/struct/wo_prof_local_var.c ---- a/gcc/testsuite/gcc.dg/struct/wo_prof_local_var.c 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/gcc.dg/struct/wo_prof_local_var.c 2020-06-16 22:56:07.736000000 -0400 -@@ -0,0 +1,43 @@ -+/* { dg-do compile } */ -+/* { dg-do run } */ -+ -+#include -+typedef struct -+{ -+ int a; -+ float b; -+}str_t; -+ -+#ifdef STACK_SIZE -+#if STACK_SIZE > 8000 -+#define N 1000 -+#else -+#define N (STACK_SIZE/8) -+#endif -+#else -+#define N 1000 -+#endif -+ -+int -+main () -+{ -+ int i, sum; -+ -+ str_t * p = malloc (N * sizeof (str_t)); -+ if (p == NULL) -+ return 0; -+ for (i = 0; i < N; i++) -+ p[i].b = i; -+ -+ for (i = 0; i < N; i++) -+ p[i].b = p[i].a + 1; -+ -+ for (i = 0; i < N; i++) -+ if (p[i].b != p[i].a + 1) -+ abort (); -+ -+ return 0; -+} -+ -+/*--------------------------------------------------------------------------*/ -+/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var-1.c b/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var-1.c ---- a/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var-1.c 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var-1.c 2020-06-16 22:56:07.736000000 -0400 -@@ -0,0 +1,47 @@ -+/* { dg-do compile } */ -+/* { dg-do run } */ -+ -+#include -+typedef struct -+{ -+ int a; -+ float b; -+}str_t; -+ -+#ifdef STACK_SIZE -+#if STACK_SIZE > 8000 -+#define N 1000 -+#else -+#define N (STACK_SIZE/8) -+#endif -+#else -+#define N 1000 -+#endif -+ -+int -+main () -+{ -+ long i, num; -+ -+ num = rand(); -+ num = num > N ? N : num; -+ str_t * p = malloc (num * sizeof (str_t)); -+ -+ if (p == 0) -+ return 0; -+ -+ for (i = 1; i <= num; i++) -+ p[i-1].b = i; -+ -+ for (i = 1; i <= num; i++) -+ p[i-1].a = p[i-1].b + 1; -+ -+ for (i = 0; i < num; i++) -+ if (p[i].a != p[i].b + 1) -+ abort (); -+ -+ return 0; -+} -+ -+/*--------------------------------------------------------------------------*/ -+/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var.c b/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var.c ---- a/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var.c 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var.c 2020-06-16 22:56:07.736000000 -0400 -@@ -0,0 +1,47 @@ -+/* { dg-do compile } */ -+/* { dg-do run } */ -+ -+#include -+typedef struct -+{ -+ int a; -+ float b; -+}str_t; -+ -+#ifdef STACK_SIZE -+#if STACK_SIZE > 8000 -+#define N 1000 -+#else -+#define N (STACK_SIZE/8) -+#endif -+#else -+#define N 1000 -+#endif -+ -+int -+main () -+{ -+ int i, num; -+ -+ num = rand(); -+ num = num > N ? N : num; -+ str_t * p = malloc (num * sizeof (str_t)); -+ -+ if (p == 0) -+ return 0; -+ -+ for (i = 0; i < num; i++) -+ p[i].b = i; -+ -+ for (i = 0; i < num; i++) -+ p[i].a = p[i].b + 1; -+ -+ for (i = 0; i < num; i++) -+ if (p[i].a != p[i].b + 1) -+ abort (); -+ -+ return 0; -+} -+ -+/*--------------------------------------------------------------------------*/ -+/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/struct/wo_prof_mult_field_peeling.c b/gcc/testsuite/gcc.dg/struct/wo_prof_mult_field_peeling.c ---- a/gcc/testsuite/gcc.dg/struct/wo_prof_mult_field_peeling.c 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/gcc.dg/struct/wo_prof_mult_field_peeling.c 2020-06-16 22:56:07.736000000 -0400 -@@ -0,0 +1,42 @@ -+/* { dg-do compile } */ -+/* { dg-do run } */ -+ -+#include -+typedef struct -+{ -+ int a; -+ float b; -+ int c; -+ float d; -+}str_t; -+ -+#ifdef STACK_SIZE -+#if STACK_SIZE > 1600 -+#define N 100 -+#else -+#define N (STACK_SIZE/16) -+#endif -+#else -+#define N 100 -+#endif -+ -+int -+main () -+{ -+ int i; -+ str_t *p = malloc (N * sizeof (str_t)); -+ if (p == NULL) -+ return 0; -+ for (i = 0; i < N; i++) -+ p[i].a = 5; -+ -+ for (i = 0; i < N; i++) -+ if (p[i].a != 5) -+ abort (); -+ -+ return 0; -+} -+ -+/*--------------------------------------------------------------------------*/ -+/* The structure str_t is erroneously peeled into 4 structures instead of 2. */ -+/* { dg-final { scan-ipa-dump "the number of new types is 2" "struct_reorg" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/struct/wo_prof_single_str_global.c b/gcc/testsuite/gcc.dg/struct/wo_prof_single_str_global.c ---- a/gcc/testsuite/gcc.dg/struct/wo_prof_single_str_global.c 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/gcc.dg/struct/wo_prof_single_str_global.c 2020-06-16 22:56:07.736000000 -0400 -@@ -0,0 +1,34 @@ -+/* { dg-do compile } */ -+/* { dg-do run } */ -+ -+#include -+typedef struct -+{ -+ int a; -+ int b; -+}str_t; -+ -+#define N 3 -+ -+str_t str; -+ -+int -+main () -+{ -+ int i; -+ int res = 1<<(1< -+typedef struct -+{ -+ int a; -+ int b; -+}str_t; -+ -+#define N 3 -+ -+int -+main () -+{ -+ int i; -+ int res = 1<<(1< -+typedef struct -+{ -+ int a; -+ int *b; -+}str_t; -+ -+#define N 3 -+ -+str_t *p; -+ -+int -+main () -+{ -+ str_t str; -+ int i; -+ int res = 1 << (1 << N); -+ p = &str; -+ str.a = 2; -+ -+ p->b = &(p->a); -+ -+ for (i=0; i < N; i++) -+ p->a = *(p->b)*(*(p->b)); -+ -+ if (p->a != res) -+ abort (); -+ -+ /* POSIX ignores all but the 8 low-order bits, but other -+ environments may not. */ -+ return (p->a & 255); -+} -+ -+/*--------------------------------------------------------------------------*/ -+/* { dg-final { scan-ipa-dump "has escaped...Type escapes a cast to a different" "struct_reorg" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/struct/wo_prof_two_strs.c b/gcc/testsuite/gcc.dg/struct/wo_prof_two_strs.c ---- a/gcc/testsuite/gcc.dg/struct/wo_prof_two_strs.c 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/gcc.dg/struct/wo_prof_two_strs.c 2020-06-16 22:56:07.736000000 -0400 -@@ -0,0 +1,67 @@ -+/* { dg-do compile } */ -+/* { dg-do run } */ -+ -+#include -+ -+typedef struct -+{ -+ int a; -+ float b; -+}str_t1; -+ -+typedef struct -+{ -+ int c; -+ float d; -+}str_t2; -+ -+#ifdef STACK_SIZE -+#if STACK_SIZE > 16000 -+#define N 1000 -+#else -+#define N (STACK_SIZE/16) -+#endif -+#else -+#define N 1000 -+#endif -+ -+str_t1 *p1; -+str_t2 *p2; -+int num; -+ -+void -+foo (void) -+{ -+ int i; -+ -+ for (i=0; i < num; i++) -+ p2[i].c = 2; -+} -+ -+int -+main () -+{ -+ int i, r; -+ -+ r = rand (); -+ num = r > N ? N : r; -+ p1 = malloc (num * sizeof (str_t1)); -+ p2 = malloc (num * sizeof (str_t2)); -+ -+ if (p1 == NULL || p2 == NULL) -+ return 0; -+ -+ for (i = 0; i < num; i++) -+ p1[i].a = 1; -+ -+ foo (); -+ -+ for (i = 0; i < num; i++) -+ if (p1[i].a != 1 || p2[i].c != 2) -+ abort (); -+ -+ return 0; -+} -+ -+/*--------------------------------------------------------------------------*/ -+/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/struct/w_prof_global_array.c b/gcc/testsuite/gcc.dg/struct/w_prof_global_array.c ---- a/gcc/testsuite/gcc.dg/struct/w_prof_global_array.c 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/gcc.dg/struct/w_prof_global_array.c 2020-06-16 22:56:07.736000000 -0400 -@@ -0,0 +1,29 @@ -+#include -+typedef struct -+{ -+ int a; -+ float b; -+}str_t; -+ -+#define N 1000 -+str_t A[N]; -+ -+int -+main () -+{ -+ int i; -+ -+ for (i = 0; i < N; i++) -+ { -+ A[i].a = 0; -+ } -+ -+ for (i = 0; i < N; i++) -+ if (A[i].a != 0) -+ abort (); -+ -+ return 0; -+} -+ -+/*--------------------------------------------------------------------------*/ -+/* { dg-final-use { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/struct/w_prof_global_var.c b/gcc/testsuite/gcc.dg/struct/w_prof_global_var.c ---- a/gcc/testsuite/gcc.dg/struct/w_prof_global_var.c 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/gcc.dg/struct/w_prof_global_var.c 2020-06-16 22:56:07.736000000 -0400 -@@ -0,0 +1,42 @@ -+#include -+typedef struct -+{ -+ int a; -+ float b; -+}str_t; -+ -+#ifdef STACK_SIZE -+#if STACK_SIZE > 8000 -+#define N 1000 -+#else -+#define N (STACK_SIZE/8) -+#endif -+#else -+#define N 1000 -+#endif -+ -+str_t *p; -+ -+int -+main () -+{ -+ int i, sum; -+ -+ p = malloc (N * sizeof (str_t)); -+ if (p == NULL) -+ return 0; -+ for (i = 0; i < N; i++) -+ p[i].b = i; -+ -+ for (i = 0; i < N; i++) -+ p[i].a = p[i].b + 1; -+ -+ for (i = 0; i < N; i++) -+ if (p[i].a != p[i].b + 1) -+ abort (); -+ -+ return 0; -+} -+ -+/*--------------------------------------------------------------------------*/ -+/* { dg-final-use { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/struct/w_prof_local_array.c b/gcc/testsuite/gcc.dg/struct/w_prof_local_array.c ---- a/gcc/testsuite/gcc.dg/struct/w_prof_local_array.c 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/gcc.dg/struct/w_prof_local_array.c 2020-06-16 22:56:07.736000000 -0400 -@@ -0,0 +1,37 @@ -+#include -+typedef struct -+{ -+ int a; -+ float b; -+}str_t; -+ -+#ifdef STACK_SIZE -+#if STACK_SIZE > 8000 -+#define N 1000 -+#else -+#define N (STACK_SIZE/8) -+#endif -+#else -+#define N 1000 -+#endif -+ -+int -+main () -+{ -+ int i; -+ str_t A[N]; -+ -+ for (i = 0; i < N; i++) -+ { -+ A[i].a = 0; -+ } -+ -+ for (i = 0; i < N; i++) -+ if (A[i].a != 0) -+ abort (); -+ -+ return 0; -+} -+ -+/*--------------------------------------------------------------------------*/ -+/* { dg-final-use { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/struct/w_prof_local_var.c b/gcc/testsuite/gcc.dg/struct/w_prof_local_var.c ---- a/gcc/testsuite/gcc.dg/struct/w_prof_local_var.c 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/gcc.dg/struct/w_prof_local_var.c 2020-06-16 22:56:07.736000000 -0400 -@@ -0,0 +1,40 @@ -+#include -+typedef struct -+{ -+ int a; -+ float b; -+}str_t; -+ -+#ifdef STACK_SIZE -+#if STACK_SIZE > 8000 -+#define N 1000 -+#else -+#define N (STACK_SIZE/8) -+#endif -+#else -+#define N 1000 -+#endif -+ -+int -+main () -+{ -+ int i, sum; -+ -+ str_t * p = malloc (N * sizeof (str_t)); -+ if (p == NULL) -+ return 0; -+ for (i = 0; i < N; i++) -+ p[i].b = i; -+ -+ for (i = 0; i < N; i++) -+ p[i].a = p[i].b + 1; -+ -+ for (i = 0; i < N; i++) -+ if (p[i].a != p[i].b + 1) -+ abort (); -+ -+ return 0; -+} -+ -+/*--------------------------------------------------------------------------*/ -+/* { dg-final-use { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/struct/w_prof_single_str_global.c b/gcc/testsuite/gcc.dg/struct/w_prof_single_str_global.c ---- a/gcc/testsuite/gcc.dg/struct/w_prof_single_str_global.c 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/gcc.dg/struct/w_prof_single_str_global.c 2020-06-16 22:56:07.736000000 -0400 -@@ -0,0 +1,31 @@ -+#include -+typedef struct -+{ -+ int a; -+ int b; -+}str_t; -+ -+#define N 3 -+ -+str_t str; -+ -+int -+main () -+{ -+ int i; -+ int res = 1<<(1< -+ -+typedef struct -+{ -+ int a; -+ float b; -+}str_t1; -+ -+typedef struct -+{ -+ int c; -+ float d; -+}str_t2; -+ -+#ifdef STACK_SIZE -+#if STACK_SIZE > 16000 -+#define N 1000 -+#else -+#define N (STACK_SIZE/16) -+#endif -+#else -+#define N 1000 -+#endif -+ -+str_t1 *p1; -+str_t2 *p2; -+int num; -+ -+void -+foo (void) -+{ -+ int i; -+ -+ for (i=0; i < num; i++) -+ p2[i].c = 2; -+} -+ -+int -+main () -+{ -+ int i, r; -+ -+ r = rand (); -+ num = r > N ? N : r; -+ p1 = malloc (num * sizeof (str_t1)); -+ p2 = malloc (num * sizeof (str_t2)); -+ -+ if (p1 == NULL || p2 == NULL) -+ return 0; -+ -+ for (i = 0; i < num; i++) -+ p1[i].a = 1; -+ -+ foo (); -+ -+ for (i = 0; i < num; i++) -+ if (p1[i].a != 1 || p2[i].c != 2) -+ abort (); -+ -+ return 0; -+} -+ -+/*--------------------------------------------------------------------------*/ -+/* { dg-final-use { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */ -diff -Nurp a/gcc/testsuite/gcc.dg/struct/w_ratio_cold_str.c b/gcc/testsuite/gcc.dg/struct/w_ratio_cold_str.c ---- a/gcc/testsuite/gcc.dg/struct/w_ratio_cold_str.c 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/gcc.dg/struct/w_ratio_cold_str.c 2020-06-16 22:56:07.736000000 -0400 -@@ -0,0 +1,43 @@ -+#include -+typedef struct -+{ -+ int a; -+ int b; -+}str_t1; -+ -+typedef struct -+{ -+ float a; -+ float b; -+}str_t2; -+ -+#define N1 1000 -+#define N2 100 -+str_t1 A1[N1]; -+str_t2 A2[N2]; -+ -+int -+main () -+{ -+ int i; -+ -+ for (i = 0; i < N1; i++) -+ A1[i].a = 0; -+ -+ for (i = 0; i < N2; i++) -+ A2[i].a = 0; -+ -+ for (i = 0; i < N1; i++) -+ if (A1[i].a != 0) -+ abort (); -+ -+ for (i = 0; i < N2; i++) -+ if (A2[i].a != 0) -+ abort (); -+ -+ return 0; -+} -+ -+/*--------------------------------------------------------------------------*/ -+/* Arrays are not handled. */ -+/* { dg-final-use { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */ -diff -Nurp a/gcc/testsuite/g++.dg/torture/pr38355.C b/gcc/testsuite/g++.dg/torture/pr38355.C ---- a/gcc/testsuite/g++.dg/torture/pr38355.C 1969-12-31 19:00:00.000000000 -0500 -+++ b/gcc/testsuite/g++.dg/torture/pr38355.C 2020-06-16 22:56:07.736000000 -0400 -@@ -0,0 +1,25 @@ -+// { dg-do run } -+// { dg-options "-fwhole-program -fipa-struct-reorg" } -+template struct A -+{ -+ char c; -+ void foo(int); -+ void bar(int i) { foo(i+1); } -+}; -+ -+template struct B : virtual A<0> {}; -+ -+template inline void baz(B& b, int i) -+{ -+ if (i) b.bar(0); -+} -+ -+extern template class A<0>; -+extern template void baz(B<0>&, int); -+ -+int main() -+{ -+ B<0> b; -+ baz(b, 0); -+ return 0; -+} -diff -Nurp a/gcc/timevar.def b/gcc/timevar.def ---- a/gcc/timevar.def 2020-03-12 07:07:23.000000000 -0400 -+++ b/gcc/timevar.def 2020-06-16 22:56:07.736000000 -0400 -@@ -77,6 +77,7 @@ DEFTIMEVAR (TV_IPA_CONSTANT_PROP , " - DEFTIMEVAR (TV_IPA_INLINING , "ipa inlining heuristics") - DEFTIMEVAR (TV_IPA_FNSPLIT , "ipa function splitting") - DEFTIMEVAR (TV_IPA_COMDATS , "ipa comdats") -+DEFTIMEVAR (TV_IPA_STRUCT_REORG , "ipa struct reorg optimization") - DEFTIMEVAR (TV_IPA_OPT , "ipa various optimizations") - DEFTIMEVAR (TV_IPA_LTO_DECOMPRESS , "lto stream inflate") - DEFTIMEVAR (TV_IPA_LTO_COMPRESS , "lto stream deflate") -diff -Nurp a/gcc/tree-pass.h b/gcc/tree-pass.h ---- a/gcc/tree-pass.h 2020-03-12 07:07:23.000000000 -0400 -+++ b/gcc/tree-pass.h 2020-06-16 22:56:07.736000000 -0400 -@@ -504,6 +504,7 @@ extern ipa_opt_pass_d *make_pass_ipa_dev - extern ipa_opt_pass_d *make_pass_ipa_reference (gcc::context *ctxt); - extern ipa_opt_pass_d *make_pass_ipa_hsa (gcc::context *ctxt); - extern ipa_opt_pass_d *make_pass_ipa_pure_const (gcc::context *ctxt); -+extern simple_ipa_opt_pass *make_pass_ipa_struct_reorg (gcc::context *ctxt); - extern simple_ipa_opt_pass *make_pass_ipa_pta (gcc::context *ctxt); - extern simple_ipa_opt_pass *make_pass_ipa_tm (gcc::context *ctxt); - extern simple_ipa_opt_pass *make_pass_target_clone (gcc::context *ctxt); diff --git a/ivopts-1.patch b/ivopts-1.patch deleted file mode 100644 index 6f2e0410842d6b410adde23e2ed1ec597eba15d6..0000000000000000000000000000000000000000 --- a/ivopts-1.patch +++ /dev/null @@ -1,181 +0,0 @@ -re-PR-tree-optimization-90240-ICE-in-try_improve_iv_.patch: -commit 98d8f142132ac670da2dc99cce530048343ab948 - -diff -urpN a/gcc/testsuite/gfortran.dg/graphite/pr90240.f b/gcc/testsuite/gfortran.dg/graphite/pr90240.f -new file mode 100644 ---- /dev/null -+++ b/gcc/testsuite/gfortran.dg/graphite/pr90240.f -@@ -0,0 +1,18 @@ -+! { dg-do compile } -+! { dg-options "-O1 -floop-nest-optimize" } -+ -+ PARAMETER (n=1335, N2=1335) -+ COMMON a(n,N2), b(n,N2), c(n,N2), -+ * d(n,N2), -+ 2 e(n,N2), f(n,N2), -+ * g(n,N2), h(n,N2) -+ DO 200 j=1,i -+ DO 300 k=1,l -+ a(k,j) = c(k,j)*g(k,j)*f(k+1,m)+f(k,m)+f(k,j) -+ 2 +f(k+1,j)*h(k+1,j) -+ b(k,j+1) = d(k,j+1)*g(k,m)+g(k,j+1) -+ 1 *e(k,m)+e(k,j+1)+e(k,j)+e(k+1,j) -+ 2 *h(k,j+1)-h(k,j) -+ 300 ENDDO -+ 200 ENDDO -+ END -diff -urpN a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c ---- a/gcc/tree-ssa-loop-ivopts.c -+++ b/gcc/tree-ssa-loop-ivopts.c -@@ -4557,22 +4557,25 @@ get_address_cost (struct ivopts_data *data, struct iv_use *use, - static comp_cost - get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost) - { -- int loop_freq = data->current_loop->header->count.to_frequency (cfun); -- int bb_freq = gimple_bb (at)->count.to_frequency (cfun); -- if (loop_freq != 0) -- { -- gcc_assert (cost.scratch <= cost.cost); -- int scaled_cost -- = cost.scratch + (cost.cost - cost.scratch) * bb_freq / loop_freq; -+ if (data->speed -+ && data->current_loop->header->count.to_frequency (cfun) > 0) -+ { -+ basic_block bb = gimple_bb (at); -+ gcc_assert (cost.scratch <= cost.cost); -+ int scale_factor = (int)(intptr_t) bb->aux; -+ if (scale_factor == 1) -+ return cost; - -- if (dump_file && (dump_flags & TDF_DETAILS)) -- fprintf (dump_file, "Scaling cost based on bb prob " -- "by %2.2f: %d (scratch: %d) -> %d (%d/%d)\n", -- 1.0f * bb_freq / loop_freq, cost.cost, -- cost.scratch, scaled_cost, bb_freq, loop_freq); -+ int scaled_cost -+ = cost.scratch + (cost.cost - cost.scratch) * scale_factor; - -- cost.cost = scaled_cost; -- } -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ fprintf (dump_file, "Scaling cost based on bb prob " -+ "by %2.2f: %d (scratch: %d) -> %d\n", -+ 1.0f * scale_factor, cost.cost, cost.scratch, scaled_cost); -+ -+ cost.cost = scaled_cost; -+ } - - return cost; - } -@@ -6678,9 +6681,8 @@ try_improve_iv_set (struct ivopts_data *data, - } - - iv_ca_delta_commit (data, ivs, best_delta, true); -- gcc_assert (best_cost == iv_ca_cost (ivs)); - iv_ca_delta_free (&best_delta); -- return true; -+ return best_cost == iv_ca_cost (ivs); - } - - /* Attempts to find the optimal set of induction variables. We do simple -@@ -6717,6 +6719,14 @@ find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp) - } - } - -+ /* If the set has infinite_cost, it can't be optimal. */ -+ if (iv_ca_cost (set).infinite_cost_p ()) -+ { -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ fprintf (dump_file, -+ "Overflow to infinite cost in try_improve_iv_set.\n"); -+ iv_ca_free (&set); -+ } - return set; - } - -@@ -7522,6 +7532,49 @@ loop_body_includes_call (basic_block *body, unsigned num_nodes) - return false; - } - -+/* Determine cost scaling factor for basic blocks in loop. */ -+#define COST_SCALING_FACTOR_BOUND (20) -+ -+static void -+determine_scaling_factor (struct ivopts_data *data, basic_block *body) -+{ -+ int lfreq = data->current_loop->header->count.to_frequency (cfun); -+ if (!data->speed || lfreq <= 0) -+ return; -+ -+ int max_freq = lfreq; -+ for (unsigned i = 0; i < data->current_loop->num_nodes; i++) -+ { -+ body[i]->aux = (void *)(intptr_t) 1; -+ if (max_freq < body[i]->count.to_frequency (cfun)) -+ max_freq = body[i]->count.to_frequency (cfun); -+ } -+ if (max_freq > lfreq) -+ { -+ int divisor, factor; -+ /* Check if scaling factor itself needs to be scaled by the bound. This -+ is to avoid overflow when scaling cost according to profile info. */ -+ if (max_freq / lfreq > COST_SCALING_FACTOR_BOUND) -+ { -+ divisor = max_freq; -+ factor = COST_SCALING_FACTOR_BOUND; -+ } -+ else -+ { -+ divisor = lfreq; -+ factor = 1; -+ } -+ for (unsigned i = 0; i < data->current_loop->num_nodes; i++) -+ { -+ int bfreq = body[i]->count.to_frequency (cfun); -+ if (bfreq <= lfreq) -+ continue; -+ -+ body[i]->aux = (void*)(intptr_t) (factor * bfreq / divisor); -+ } -+ } -+} -+ - /* Optimizes the LOOP. Returns true if anything changed. */ - - static bool -@@ -7560,7 +7613,6 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop, - body = get_loop_body (loop); - data->body_includes_call = loop_body_includes_call (body, loop->num_nodes); - renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes); -- free (body); - - data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit); - -@@ -7574,6 +7626,9 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop, - if (data->vgroups.length () > MAX_CONSIDERED_GROUPS) - goto finish; - -+ /* Determine cost scaling factor for basic blocks in loop. */ -+ determine_scaling_factor (data, body); -+ - /* Finds candidates for the induction variables (item 2). */ - find_iv_candidates (data); - -@@ -7584,6 +7639,9 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop, - - /* Find the optimal set of induction variables (item 3, part 2). */ - iv_ca = find_optimal_iv_set (data); -+ /* Cleanup basic block aux field. */ -+ for (unsigned i = 0; i < data->current_loop->num_nodes; i++) -+ body[i]->aux = NULL; - if (!iv_ca) - goto finish; - changed = true; -@@ -7599,6 +7657,7 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop, - remove_unused_ivs (data, toremove); - - finish: -+ free (body); - free_loop_data (data); - - return changed; diff --git a/ivopts-2.patch b/ivopts-2.patch deleted file mode 100644 index 9bd0b68d75b3843079e40fe5f957beebd934cfc8..0000000000000000000000000000000000000000 --- a/ivopts-2.patch +++ /dev/null @@ -1,410 +0,0 @@ -re-PR-tree-optimization-90078-ICE-with-deep-template.patch: -commit 8363a2f1f7c47d7b3d1760ce631a6824e91c0d80 - -diff -urpN a/gcc/testsuite/g++.dg/tree-ssa/pr90078.C b/gcc/testsuite/g++.dg/tree-ssa/pr90078.C -new file mode 100644 ---- /dev/null -+++ b/gcc/testsuite/g++.dg/tree-ssa/pr90078.C -@@ -0,0 +1,199 @@ -+// { dg-do compile } -+// { dg-options "-std=c++14 -O2 -ftemplate-depth=1000000" } -+ -+template struct Tensor3; -+template -+struct Tensor3_Expr; -+ -+template struct Tensor4; -+template -+struct Tensor4_Expr; -+ -+template struct Index -+{}; -+template struct Number -+{ -+ Number(){}; -+ operator int() const { return N; } -+}; -+ -+template -+struct Tensor3 -+{ -+ T data[Tensor_Dim0][Tensor_Dim1][Tensor_Dim2]; -+ -+ T operator()(const int N1, const int N2, const int N3) const -+ { -+ return data[N1][N2][N3]; -+ } -+ -+ template -+ Tensor3_Expr, T, -+ Dim0, Dim1, Dim2, i, j, k> -+ operator()(const Index, const Index, -+ const Index) const -+ { -+ return Tensor3_Expr, -+ T, Dim0, Dim1, Dim2, i, j, k>(*this); -+ } -+}; -+ -+template -+struct Tensor3_Expr -+{ -+ A iter; -+ -+ Tensor3_Expr(const A &a) : iter(a) {} -+ T operator()(const int N1, const int N2, const int N3) const -+ { -+ return iter(N1, N2, N3); -+ } -+}; -+ -+template -+struct Tensor3_Expr, T, Dim0, -+ Dim1, Dim2, i, j, k> -+{ -+ Tensor3 &iter; -+ -+ Tensor3_Expr(Tensor3 &a) : iter(a) -+ {} -+ T operator()(const int N1, const int N2, const int N3) const -+ { -+ return iter(N1, N2, N3); -+ } -+}; -+ -+template -+struct Tensor3_times_Tensor3_21 -+{ -+ Tensor3_Expr iterA; -+ Tensor3_Expr iterB; -+ -+ template -+ T eval(const int N1, const int N2, const int N3, const int N4, -+ const Number &) const -+ { -+ return iterA(N1, N2, CurrentDim - 1) * iterB(CurrentDim - 1, N3, N4) -+ + eval(N1, N2, N3, N4, Number()); -+ } -+ T eval(const int N1, const int N2, const int N3, const int N4, -+ const Number<1> &) const -+ { -+ return iterA(N1, N2, 0) * iterB(0, N3, N4); -+ } -+ -+ Tensor3_times_Tensor3_21( -+ const Tensor3_Expr &a, -+ const Tensor3_Expr &b) -+ : iterA(a), iterB(b) -+ {} -+ T operator()(const int &N1, const int &N2, const int &N3, -+ const int &N4) const -+ { -+ return eval(N1, N2, N3, N4, Number()); -+ } -+}; -+ -+template -+Tensor4_Expr, -+ T, Dim0, Dim1, Dim4, Dim5, i, j, l, m> -+operator*(const Tensor3_Expr &a, -+ const Tensor3_Expr &b) -+{ -+ using TensorExpr = Tensor3_times_Tensor3_21; -+ return Tensor4_Expr( -+ TensorExpr(a, b)); -+}; -+ -+template -+struct Tensor4 -+{ -+ T data[Tensor_Dim0][Tensor_Dim1][Tensor_Dim2][Tensor_Dim3]; -+ -+ Tensor4() {} -+ T &operator()(const int N1, const int N2, const int N3, const int N4) -+ { -+ return data[N1][N2][N3][N4]; -+ } -+ -+ template -+ Tensor4_Expr, -+ T, Dim0, Dim1, Dim2, Dim3, i, j, k, l> -+ operator()(const Index, const Index, const Index, -+ const Index) -+ { -+ return Tensor4_Expr< -+ Tensor4, T, Dim0, -+ Dim1, Dim2, Dim3, i, j, k, l>(*this); -+ }; -+}; -+ -+template -+struct Tensor4_Expr -+{ -+ A iter; -+ -+ Tensor4_Expr(const A &a) : iter(a) {} -+ T operator()(const int N1, const int N2, const int N3, const int N4) const -+ { -+ return iter(N1, N2, N3, N4); -+ } -+}; -+ -+template -+struct Tensor4_Expr, T, Dim0, Dim1, Dim2, -+ Dim3, i, j, k, l> -+{ -+ Tensor4 &iter; -+ -+ Tensor4_Expr(Tensor4 &a) : iter(a) {} -+ T operator()(const int N1, const int N2, const int N3, const int N4) const -+ { -+ return iter(N1, N2, N3, N4); -+ } -+ -+ template -+ auto &operator=(const Tensor4_Expr &rhs) -+ { -+ for(int ii = 0; ii < Dim0; ++ii) -+ for(int jj = 0; jj < Dim1; ++jj) -+ for(int kk = 0; kk < Dim2; ++kk) -+ for(int ll = 0; ll < Dim3; ++ll) -+ { -+ iter(ii, jj, kk, ll) = rhs(ii, jj, kk, ll); -+ } -+ return *this; -+ } -+}; -+ -+int main() -+{ -+ Tensor3 t1; -+ Tensor3 t2; -+ -+ Index<'l', 100> l; -+ Index<'m', 100> m; -+ Index<'k', 1000> k; -+ Index<'n', 100> n; -+ Index<'o', 100> o; -+ -+ Tensor4 res; -+ res(l, m, n, o) = t1(l, m, k) * t2(k, n, o); -+ return 0; -+} -+ -diff -urpN a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c ---- a/gcc/tree-ssa-loop-ivopts.c -+++ b/gcc/tree-ssa-loop-ivopts.c -@@ -114,7 +114,7 @@ along with GCC; see the file COPYING3. If not see - interface between the GIMPLE and RTL worlds. */ - - /* The infinite cost. */ --#define INFTY 10000000 -+#define INFTY 1000000000 - - /* Returns the expected number of loop iterations for LOOP. - The average trip count is computed from profile data if it -@@ -180,7 +180,7 @@ struct comp_cost - comp_cost (): cost (0), complexity (0), scratch (0) - {} - -- comp_cost (int cost, unsigned complexity, int scratch = 0) -+ comp_cost (int64_t cost, unsigned complexity, int64_t scratch = 0) - : cost (cost), complexity (complexity), scratch (scratch) - {} - -@@ -220,16 +220,16 @@ struct comp_cost - /* Returns true if COST1 is smaller or equal than COST2. */ - friend bool operator<= (comp_cost cost1, comp_cost cost2); - -- int cost; /* The runtime cost. */ -+ int64_t cost; /* The runtime cost. */ - unsigned complexity; /* The estimate of the complexity of the code for - the computation (in no concrete units -- - complexity field should be larger for more - complex expressions and addressing modes). */ -- int scratch; /* Scratch used during cost computation. */ -+ int64_t scratch; /* Scratch used during cost computation. */ - }; - - static const comp_cost no_cost; --static const comp_cost infinite_cost (INFTY, INFTY, INFTY); -+static const comp_cost infinite_cost (INFTY, 0, INFTY); - - bool - comp_cost::infinite_cost_p () -@@ -243,6 +243,7 @@ operator+ (comp_cost cost1, comp_cost cost2) - if (cost1.infinite_cost_p () || cost2.infinite_cost_p ()) - return infinite_cost; - -+ gcc_assert (cost1.cost + cost2.cost < infinite_cost.cost); - cost1.cost += cost2.cost; - cost1.complexity += cost2.complexity; - -@@ -256,6 +257,7 @@ operator- (comp_cost cost1, comp_cost cost2) - return infinite_cost; - - gcc_assert (!cost2.infinite_cost_p ()); -+ gcc_assert (cost1.cost - cost2.cost < infinite_cost.cost); - - cost1.cost -= cost2.cost; - cost1.complexity -= cost2.complexity; -@@ -276,6 +278,7 @@ comp_cost::operator+= (HOST_WIDE_INT c) - if (infinite_cost_p ()) - return *this; - -+ gcc_assert (this->cost + c < infinite_cost.cost); - this->cost += c; - - return *this; -@@ -287,6 +290,7 @@ comp_cost::operator-= (HOST_WIDE_INT c) - if (infinite_cost_p ()) - return *this; - -+ gcc_assert (this->cost - c < infinite_cost.cost); - this->cost -= c; - - return *this; -@@ -295,6 +299,7 @@ comp_cost::operator-= (HOST_WIDE_INT c) - comp_cost - comp_cost::operator/= (HOST_WIDE_INT c) - { -+ gcc_assert (c != 0); - if (infinite_cost_p ()) - return *this; - -@@ -309,6 +314,7 @@ comp_cost::operator*= (HOST_WIDE_INT c) - if (infinite_cost_p ()) - return *this; - -+ gcc_assert (this->cost * c < infinite_cost.cost); - this->cost *= c; - - return *this; -@@ -638,7 +644,7 @@ struct iv_ca - comp_cost cand_use_cost; - - /* Total cost of candidates. */ -- unsigned cand_cost; -+ int64_t cand_cost; - - /* Number of times each invariant variable is used. */ - unsigned *n_inv_var_uses; -@@ -4025,16 +4031,16 @@ get_computation_at (struct loop *loop, gimple *at, - if we're optimizing for speed, amortize it over the per-iteration cost. - If ROUND_UP_P is true, the result is round up rather than to zero when - optimizing for speed. */ --static unsigned --adjust_setup_cost (struct ivopts_data *data, unsigned cost, -+static int64_t -+adjust_setup_cost (struct ivopts_data *data, int64_t cost, - bool round_up_p = false) - { - if (cost == INFTY) - return cost; - else if (optimize_loop_for_speed_p (data->current_loop)) - { -- HOST_WIDE_INT niters = avg_loop_niter (data->current_loop); -- return ((HOST_WIDE_INT) cost + (round_up_p ? niters - 1 : 0)) / niters; -+ int64_t niters = (int64_t) avg_loop_niter (data->current_loop); -+ return (cost + (round_up_p ? niters - 1 : 0)) / niters; - } - else - return cost; -@@ -4305,7 +4311,7 @@ enum ainc_type - - struct ainc_cost_data - { -- unsigned costs[AINC_NONE]; -+ int64_t costs[AINC_NONE]; - }; - - static comp_cost -@@ -4566,12 +4572,12 @@ get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost) - if (scale_factor == 1) - return cost; - -- int scaled_cost -+ int64_t scaled_cost - = cost.scratch + (cost.cost - cost.scratch) * scale_factor; - - if (dump_file && (dump_flags & TDF_DETAILS)) -- fprintf (dump_file, "Scaling cost based on bb prob " -- "by %2.2f: %d (scratch: %d) -> %d\n", -+ fprintf (dump_file, "Scaling cost based on bb prob by %2.2f: " -+ "%" PRId64 " (scratch: %" PRId64 ") -> %" PRId64 "\n", - 1.0f * scale_factor, cost.cost, cost.scratch, scaled_cost); - - cost.cost = scaled_cost; -@@ -5539,7 +5545,7 @@ determine_group_iv_costs (struct ivopts_data *data) - || group->cost_map[j].cost.infinite_cost_p ()) - continue; - -- fprintf (dump_file, " %d\t%d\t%d\t", -+ fprintf (dump_file, " %d\t%" PRId64 "\t%d\t", - group->cost_map[j].cand->id, - group->cost_map[j].cost.cost, - group->cost_map[j].cost.complexity); -@@ -5569,7 +5575,7 @@ static void - determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand) - { - comp_cost cost_base; -- unsigned cost, cost_step; -+ int64_t cost, cost_step; - tree base; - - gcc_assert (cand->iv != NULL); -@@ -6139,11 +6145,11 @@ iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs) - unsigned i; - comp_cost cost = iv_ca_cost (ivs); - -- fprintf (file, " cost: %d (complexity %d)\n", cost.cost, -+ fprintf (file, " cost: %" PRId64 " (complexity %d)\n", cost.cost, - cost.complexity); -- fprintf (file, " cand_cost: %d\n cand_group_cost: %d (complexity %d)\n", -- ivs->cand_cost, ivs->cand_use_cost.cost, -- ivs->cand_use_cost.complexity); -+ fprintf (file, " cand_cost: %" PRId64 "\n cand_group_cost: " -+ "%" PRId64 " (complexity %d)\n", ivs->cand_cost, -+ ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity); - bitmap_print (file, ivs->cands, " candidates: ","\n"); - - for (i = 0; i < ivs->upto; i++) -@@ -6151,9 +6157,9 @@ iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs) - struct iv_group *group = data->vgroups[i]; - struct cost_pair *cp = iv_ca_cand_for_group (ivs, group); - if (cp) -- fprintf (file, " group:%d --> iv_cand:%d, cost=(%d,%d)\n", -- group->id, cp->cand->id, cp->cost.cost, -- cp->cost.complexity); -+ fprintf (file, " group:%d --> iv_cand:%d, cost=(" -+ "%" PRId64 ",%d)\n", group->id, cp->cand->id, -+ cp->cost.cost, cp->cost.complexity); - else - fprintf (file, " group:%d --> ??\n", group->id); - } -@@ -6751,9 +6757,9 @@ find_optimal_iv_set (struct ivopts_data *data) - - if (dump_file && (dump_flags & TDF_DETAILS)) - { -- fprintf (dump_file, "Original cost %d (complexity %d)\n\n", -+ fprintf (dump_file, "Original cost %" PRId64 " (complexity %d)\n\n", - origcost.cost, origcost.complexity); -- fprintf (dump_file, "Final cost %d (complexity %d)\n\n", -+ fprintf (dump_file, "Final cost %" PRId64 " (complexity %d)\n\n", - cost.cost, cost.complexity); - } - diff --git a/loop-finite-bugfix.patch b/loop-finite-bugfix.patch deleted file mode 100644 index a29050726972276205114532c6ce661119c1816d..0000000000000000000000000000000000000000 --- a/loop-finite-bugfix.patch +++ /dev/null @@ -1,166 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-c-94392-only-enable-ffinite-loops-for-C.patch -75efe9cb1f8938a713ce540dc3b27bc2afcd3fae - -diff --git a/gcc/c-family/c-opts.c b/gcc/c-family/c-opts.c -index 6b6c754ad86..58ba0948e79 100644 ---- a/gcc/c-family/c-opts.c -+++ b/gcc/c-family/c-opts.c -@@ -989,6 +989,10 @@ c_common_post_options (const char **pfilename) - if (!global_options_set.x_flag_new_ttp) - flag_new_ttp = (cxx_dialect >= cxx17); - -+ /* C++11 guarantees forward progress. */ -+ if (!global_options_set.x_flag_finite_loops) -+ flag_finite_loops = (optimize >= 2 && cxx_dialect >= cxx11); -+ - if (cxx_dialect >= cxx11) - { - /* If we're allowing C++0x constructs, don't warn about C++98 -diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h -index 1c49a8b8c2d..18b404e292f 100644 ---- a/gcc/cfgloop.h -+++ b/gcc/cfgloop.h -@@ -226,6 +226,10 @@ public: - /* True if the loop is part of an oacc kernels region. */ - unsigned in_oacc_kernels_region : 1; - -+ /* True if the loop is known to be finite. This is a localized -+ flag_finite_loops or similar pragmas state. */ -+ unsigned finite_p : 1; -+ - /* The number of times to unroll the loop. 0 means no information given, - just do what we always do. A value of 1 means do not unroll the loop. - A value of USHRT_MAX means unroll with no specific unrolling factor. -diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c -index c9375565f62..50c7267ec49 100644 ---- a/gcc/cfgloopmanip.c -+++ b/gcc/cfgloopmanip.c -@@ -1023,6 +1023,7 @@ copy_loop_info (class loop *loop, class loop *target) - target->dont_vectorize = loop->dont_vectorize; - target->force_vectorize = loop->force_vectorize; - target->in_oacc_kernels_region = loop->in_oacc_kernels_region; -+ target->finite_p = loop->finite_p; - target->unroll = loop->unroll; - target->owned_clique = loop->owned_clique; - } -diff --git a/gcc/common.opt b/gcc/common.opt -index 4368910cb54..bb2ea4c905d 100644 ---- a/gcc/common.opt -+++ b/gcc/common.opt -@@ -1490,7 +1490,7 @@ Common Report Var(flag_finite_math_only) Optimization SetByCombined - Assume no NaNs or infinities are generated. - - ffinite-loops --Common Report Var(flag_finite_loops) Optimization -+Common Report Var(flag_finite_loops) Optimization Init(0) - Assume that loops with an exit will terminate and not loop indefinitely. - - ffixed- -diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi -index e9e1683e9a8..e3e652ff6c1 100644 ---- a/gcc/doc/invoke.texi -+++ b/gcc/doc/invoke.texi -@@ -10432,7 +10432,8 @@ Assume that a loop with an exit will eventually take the exit and not loop - indefinitely. This allows the compiler to remove loops that otherwise have - no side-effects, not considering eventual endless looping as such. - --This option is enabled by default at @option{-O2}. -+This option is enabled by default at @option{-O2} for C++ with -std=c++11 -+or higher. - - @item -ftree-dominator-opts - @opindex ftree-dominator-opts -diff --git a/gcc/lto-streamer-in.c b/gcc/lto-streamer-in.c -index 9566e5ee102..244f5b8aa5c 100644 ---- a/gcc/lto-streamer-in.c -+++ b/gcc/lto-streamer-in.c -@@ -821,6 +821,7 @@ input_cfg (class lto_input_block *ib, class data_in *data_in, - loop->owned_clique = streamer_read_hwi (ib); - loop->dont_vectorize = streamer_read_hwi (ib); - loop->force_vectorize = streamer_read_hwi (ib); -+ loop->finite_p = streamer_read_hwi (ib); - loop->simduid = stream_read_tree (ib, data_in); - - place_new_loop (fn, loop); -diff --git a/gcc/lto-streamer-out.c b/gcc/lto-streamer-out.c -index a219c1d0dd1..52ef94718db 100644 ---- a/gcc/lto-streamer-out.c -+++ b/gcc/lto-streamer-out.c -@@ -1950,6 +1950,7 @@ output_cfg (struct output_block *ob, struct function *fn) - streamer_write_hwi (ob, loop->owned_clique); - streamer_write_hwi (ob, loop->dont_vectorize); - streamer_write_hwi (ob, loop->force_vectorize); -+ streamer_write_hwi (ob, loop->finite_p); - stream_write_tree (ob, loop->simduid, true); - } - -diff --git a/gcc/opts.c b/gcc/opts.c -index 5dc7d65dedd..d4df8627bf7 100644 ---- a/gcc/opts.c -+++ b/gcc/opts.c -@@ -478,7 +478,6 @@ static const struct default_options default_options_table[] = - { OPT_LEVELS_2_PLUS, OPT_fdevirtualize, NULL, 1 }, - { OPT_LEVELS_2_PLUS, OPT_fdevirtualize_speculatively, NULL, 1 }, - { OPT_LEVELS_2_PLUS, OPT_fexpensive_optimizations, NULL, 1 }, -- { OPT_LEVELS_2_PLUS, OPT_ffinite_loops, NULL, 1 }, - { OPT_LEVELS_2_PLUS, OPT_fgcse, NULL, 1 }, - { OPT_LEVELS_2_PLUS, OPT_fhoist_adjacent_loads, NULL, 1 }, - { OPT_LEVELS_2_PLUS, OPT_findirect_inlining, NULL, 1 }, -diff --git a/gcc/testsuite/gcc.dg/torture/pr94392.c b/gcc/testsuite/gcc.dg/torture/pr94392.c -new file mode 100644 -index 00000000000..373f18ce983 ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/torture/pr94392.c -@@ -0,0 +1,22 @@ -+/* { dg-do compile } */ -+/* { dg-skip-if "finite loops" { *-*-* } { "-ffinite-loops" } } */ -+/* { dg-skip-if "LTO optimizes the test" { *-*-* } { "-flto" } } */ -+/* { dg-additional-options "-fdump-tree-optimized" } */ -+ -+int a, b; -+ -+int -+main() -+{ -+ while (1) -+ { -+ /* Try really hard. */ -+ if (a != b) -+ return 1; -+ } -+ return 0; -+} -+ -+/* ISO C does not guarantee forward progress like C++ does so we -+ cannot assume the loop is finite and optimize it to return 1. */ -+/* { dg-final { scan-tree-dump "if" "optimized" } } */ -diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c -index f7b817d94e6..e99fb9ff5d1 100644 ---- a/gcc/tree-cfg.c -+++ b/gcc/tree-cfg.c -@@ -324,6 +324,9 @@ replace_loop_annotate (void) - /* Then look into the latch, if any. */ - if (loop->latch) - replace_loop_annotate_in_block (loop->latch, loop); -+ -+ /* Push the global flag_finite_loops state down to individual loops. */ -+ loop->finite_p = flag_finite_loops; - } - - /* Remove IFN_ANNOTATE. Safeguard for the case loop->latch == NULL. */ -diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c -index 6e6df0bfdb8..7d61ef080eb 100644 ---- a/gcc/tree-ssa-loop-niter.c -+++ b/gcc/tree-ssa-loop-niter.c -@@ -2834,7 +2834,7 @@ finite_loop_p (class loop *loop) - return true; - } - -- if (flag_finite_loops) -+ if (loop->finite_p) - { - unsigned i; - vec exits = get_loop_exit_edges (loop); diff --git a/loop-finite.patch b/loop-finite.patch deleted file mode 100644 index cc2543eb6b777c946c6dd7d36dd9d11b524df6a8..0000000000000000000000000000000000000000 --- a/loop-finite.patch +++ /dev/null @@ -1,373 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-PR-tree-optimization-89713-Assume-loop-with-an-exit-.patch -c29c92c789d93848cc1c929838771bfc68cb272c - -diff --git a/gcc/common.opt b/gcc/common.opt -index e1404165feb..a1544d06824 100644 ---- a/gcc/common.opt -+++ b/gcc/common.opt -@@ -1437,6 +1437,10 @@ ffinite-math-only - Common Report Var(flag_finite_math_only) Optimization SetByCombined - Assume no NaNs or infinities are generated. - -+ffinite-loops -+Common Report Var(flag_finite_loops) Optimization -+Assume that loops with an exit will terminate and not loop indefinitely. -+ - ffixed- - Common Joined RejectNegative Var(common_deferred_options) Defer - -ffixed- Mark as being unavailable to the compiler. -diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi -index 090d606b3ba..bf9da0f0a6e 100644 ---- a/gcc/doc/invoke.texi -+++ b/gcc/doc/invoke.texi -@@ -413,6 +413,7 @@ Objective-C and Objective-C++ Dialects}. - -fdevirtualize-at-ltrans -fdse @gol - -fearly-inlining -fipa-sra -fexpensive-optimizations -ffat-lto-objects @gol - -ffast-math -ffinite-math-only -ffloat-store -fexcess-precision=@var{style} @gol -+-ffinite-loops @gol - -fforward-propagate -ffp-contract=@var{style} -ffunction-sections @gol - -fgcse -fgcse-after-reload -fgcse-las -fgcse-lm -fgraphite-identity @gol - -fgcse-sm -fhoist-adjacent-loads -fif-conversion @gol -@@ -8303,6 +8304,7 @@ also turns on the following optimization flags: - -fdelete-null-pointer-checks @gol - -fdevirtualize -fdevirtualize-speculatively @gol - -fexpensive-optimizations @gol -+-ffinite-loops @gol - -fgcse -fgcse-lm @gol - -fhoist-adjacent-loads @gol - -finline-small-functions @gol -@@ -9524,6 +9526,15 @@ that may set @code{errno} but are otherwise free of side effects. This flag is - enabled by default at @option{-O2} and higher if @option{-Os} is not also - specified. - -+@item -ffinite-loops -+@opindex ffinite-loops -+@opindex fno-finite-loops -+Assume that a loop with an exit will eventually take the exit and not loop -+indefinitely. This allows the compiler to remove loops that otherwise have -+no side-effects, not considering eventual endless looping as such. -+ -+This option is enabled by default at @option{-O2}. -+ - @item -ftree-dominator-opts - @opindex ftree-dominator-opts - Perform a variety of simple scalar cleanups (constant/copy -diff --git a/gcc/omp-offload.c b/gcc/omp-offload.c -index 97ae47b3135..c8a281c6d28 100644 ---- a/gcc/omp-offload.c -+++ b/gcc/omp-offload.c -@@ -300,7 +300,7 @@ oacc_xform_loop (gcall *call) - tree chunk_size = NULL_TREE; - unsigned mask = (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call, 5)); - tree lhs = gimple_call_lhs (call); -- tree type = TREE_TYPE (lhs); -+ tree type = NULL_TREE; - tree diff_type = TREE_TYPE (range); - tree r = NULL_TREE; - gimple_seq seq = NULL; -@@ -308,6 +308,15 @@ oacc_xform_loop (gcall *call) - unsigned outer_mask = mask & (~mask + 1); // Outermost partitioning - unsigned inner_mask = mask & ~outer_mask; // Inner partitioning (if any) - -+ /* Skip lowering if return value of IFN_GOACC_LOOP call is not used. */ -+ if (!lhs) -+ { -+ gsi_replace_with_seq (&gsi, seq, true); -+ return; -+ } -+ -+ type = TREE_TYPE (lhs); -+ - #ifdef ACCEL_COMPILER - chunk_size = gimple_call_arg (call, 4); - if (integer_minus_onep (chunk_size) /* Force static allocation. */ -diff --git a/gcc/opts.c b/gcc/opts.c -index 64f94ac8ffd..b38bfb15a56 100644 ---- a/gcc/opts.c -+++ b/gcc/opts.c -@@ -494,6 +494,7 @@ static const struct default_options default_options_table[] = - { OPT_LEVELS_2_PLUS, OPT_fdevirtualize, NULL, 1 }, - { OPT_LEVELS_2_PLUS, OPT_fdevirtualize_speculatively, NULL, 1 }, - { OPT_LEVELS_2_PLUS, OPT_fexpensive_optimizations, NULL, 1 }, -+ { OPT_LEVELS_2_PLUS, OPT_ffinite_loops, NULL, 1 }, - { OPT_LEVELS_2_PLUS, OPT_fgcse, NULL, 1 }, - { OPT_LEVELS_2_PLUS, OPT_fhoist_adjacent_loads, NULL, 1 }, - { OPT_LEVELS_2_PLUS, OPT_findirect_inlining, NULL, 1 }, -diff --git a/gcc/testsuite/g++.dg/tree-ssa/empty-loop.C b/gcc/testsuite/g++.dg/tree-ssa/empty-loop.C -new file mode 100644 -index 00000000000..6b1e879e6a9 ---- /dev/null -+++ b/gcc/testsuite/g++.dg/tree-ssa/empty-loop.C -@@ -0,0 +1,33 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -fdump-tree-cddce2 -ffinite-loops" } */ -+ -+#include -+#include -+#include -+#include -+#include -+ -+using namespace std; -+ -+int foo (vector &v, list &l, set &s, map &m) -+{ -+ for (vector::iterator it = v.begin (); it != v.end (); ++it) -+ it->length(); -+ -+ for (list::iterator it = l.begin (); it != l.end (); ++it) -+ it->length(); -+ -+ for (map::iterator it = m.begin (); it != m.end (); ++it) -+ it->first + it->second.length(); -+ -+ for (set::iterator it0 = s.begin (); it0 != s.end(); ++it0) -+ for (vector::reverse_iterator it1 = v.rbegin(); it1 != v.rend(); ++it1) -+ { -+ it0->length(); -+ it1->length(); -+ } -+ -+ return 0; -+} -+/* { dg-final { scan-tree-dump-not "if" "cddce2"} } */ -+ -diff --git a/gcc/testsuite/gcc.dg/const-1.c b/gcc/testsuite/gcc.dg/const-1.c -index a5b2b167728..2e95bd8e2ea 100644 ---- a/gcc/testsuite/gcc.dg/const-1.c -+++ b/gcc/testsuite/gcc.dg/const-1.c -@@ -1,5 +1,5 @@ - /* { dg-do compile { target nonpic } } */ --/* { dg-options "-O2 -Wsuggest-attribute=const" } */ -+/* { dg-options "-O2 -Wsuggest-attribute=const -fno-finite-loops" } */ - - extern int extern_const(int a) __attribute__ ((const)); - -diff --git a/gcc/testsuite/gcc.dg/graphite/graphite.exp b/gcc/testsuite/gcc.dg/graphite/graphite.exp -index ea6144607e2..523a955e82d 100644 ---- a/gcc/testsuite/gcc.dg/graphite/graphite.exp -+++ b/gcc/testsuite/gcc.dg/graphite/graphite.exp -@@ -56,7 +56,7 @@ set vect_files [lsort [glob -nocomplain $srcdir/$subdir/vect-*.c ] ] - - # Tests to be compiled. - set dg-do-what-default compile --dg-runtest $scop_files "" "-O2 -fgraphite -fdump-tree-graphite-all" -+dg-runtest $scop_files "" "-O2 -fgraphite -fdump-tree-graphite-all -fno-finite-loops" - dg-runtest $id_files "" "-O2 -fgraphite-identity -ffast-math -fdump-tree-graphite-details" - - # Tests to be run. -diff --git a/gcc/testsuite/gcc.dg/loop-unswitch-1.c b/gcc/testsuite/gcc.dg/loop-unswitch-1.c -index f6fc41d6bcc..de2fb2c0e4b 100644 ---- a/gcc/testsuite/gcc.dg/loop-unswitch-1.c -+++ b/gcc/testsuite/gcc.dg/loop-unswitch-1.c -@@ -1,6 +1,6 @@ - /* For PR rtl-optimization/27735 */ - /* { dg-do compile } */ --/* { dg-options "-O2 -funswitch-loops -fdump-tree-unswitch-details" } */ -+/* { dg-options "-O2 -funswitch-loops -fdump-tree-unswitch-details -fno-finite-loops" } */ - - void set_color(void); - void xml_colorize_line(unsigned int *p, int state) -diff --git a/gcc/testsuite/gcc.dg/predict-9.c b/gcc/testsuite/gcc.dg/predict-9.c -index 7e5ba085ece..f491c511bd9 100644 ---- a/gcc/testsuite/gcc.dg/predict-9.c -+++ b/gcc/testsuite/gcc.dg/predict-9.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-O2 -fdisable-tree-evrp -fdump-tree-profile_estimate" } */ -+/* { dg-options "-O2 -fdisable-tree-evrp -fdump-tree-profile_estimate -fno-finite-loops" } */ - - extern int global; - extern int global2; -diff --git a/gcc/testsuite/gcc.dg/pure-2.c b/gcc/testsuite/gcc.dg/pure-2.c -index fe6e2bce695..318cfd18630 100644 ---- a/gcc/testsuite/gcc.dg/pure-2.c -+++ b/gcc/testsuite/gcc.dg/pure-2.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-O2 -Wsuggest-attribute=pure" } */ -+/* { dg-options "-O2 -Wsuggest-attribute=pure -fno-finite-loops" } */ - /* { dg-add-options bind_pic_locally } */ - - extern int extern_const(int a) __attribute__ ((pure)); -diff --git a/gcc/testsuite/gcc.dg/tree-ssa/20040211-1.c b/gcc/testsuite/gcc.dg/tree-ssa/20040211-1.c -index d289e5d0f55..a9bdf26931a 100644 ---- a/gcc/testsuite/gcc.dg/tree-ssa/20040211-1.c -+++ b/gcc/testsuite/gcc.dg/tree-ssa/20040211-1.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-O2 -fdump-tree-cddce2" } */ -+/* { dg-options "-O2 -fdump-tree-cddce2 -fno-finite-loops" } */ - - struct rtx_def; - typedef struct rtx_def *rtx; -diff --git a/gcc/testsuite/gcc.dg/tree-ssa/dce-2.c b/gcc/testsuite/gcc.dg/tree-ssa/dce-2.c -new file mode 100644 -index 00000000000..18c1ddb819e ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/tree-ssa/dce-2.c -@@ -0,0 +1,37 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -fdump-tree-cddce1 -ffinite-loops" } */ -+ -+typedef struct list { -+ char pad[15]; -+ struct list *next; -+} list; -+ -+int data; -+ -+list *head, *tail; -+ -+int __attribute__((pure)) pfn (int); -+ -+int foo (unsigned u, int s) -+{ -+ unsigned i; -+ list *p; -+ int j; -+ -+ for (i = 0; i < u; i += 2) -+ ; -+ -+ for (p = head; p; p = p->next) -+ ; -+ -+ for (j = data; j & s; j = pfn (j + 3)) -+ ; -+ -+ for (p = head; p != tail; p = p->next) -+ for (j = data + 1; j > s; j = pfn (j + 2)) -+ ; -+ -+ return 0; -+} -+/* { dg-final { scan-tree-dump-not "if" "cddce1"} } */ -+ -diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-10.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-10.c -index a29c9fb2501..3d05ad2d073 100644 ---- a/gcc/testsuite/gcc.dg/tree-ssa/loop-10.c -+++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-10.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-O2 -fdump-tree-optimized" } */ -+/* { dg-options "-O2 -fdump-tree-optimized -fno-finite-loops" } */ - /* { dg-require-effective-target int32plus } */ - - int bar (void); -diff --git a/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c b/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c -index e9b4f2628d5..187c08407d5 100644 ---- a/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c -+++ b/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-O2 -fsplit-paths -fno-tree-cselim -fdump-tree-split-paths-details -w" } */ -+/* { dg-options "-O2 -fsplit-paths -fno-tree-cselim -fdump-tree-split-paths-details -w -fno-finite-loops" } */ - - struct __sFILE - { -diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-12.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-12.c -index d829b04d177..67526762f2c 100644 ---- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-12.c -+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-12.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-O2 -fdump-tree-thread2-details -fdump-tree-thread3-details -fdump-tree-thread4-details" } */ -+/* { dg-options "-O2 -fdump-tree-thread2-details -fdump-tree-thread3-details -fdump-tree-thread4-details -fno-finite-loops" } */ - /* { dg-final { scan-tree-dump "FSM" "thread2" } } */ - /* { dg-final { scan-tree-dump "FSM" "thread3" } } */ - /* { dg-final { scan-tree-dump "FSM" "thread4" { xfail *-*-* } } } */ -diff --git a/gcc/tree-ssa-dce.c b/gcc/tree-ssa-dce.c -index 2478219d873..a38899edd6c 100644 ---- a/gcc/tree-ssa-dce.c -+++ b/gcc/tree-ssa-dce.c -@@ -245,6 +245,17 @@ mark_stmt_if_obviously_necessary (gimple *stmt, bool aggressive) - mark_stmt_necessary (stmt, true); - return; - } -+ /* IFN_GOACC_LOOP calls are necessary in that they are used to -+ represent parameter (i.e. step, bound) of a lowered OpenACC -+ partitioned loop. But this kind of partitioned loop might not -+ survive from aggressive loop removal for it has loop exit and -+ is assumed to be finite. Therefore, we need to explicitly mark -+ these calls. (An example is libgomp.oacc-c-c++-common/pr84955.c) */ -+ if (gimple_call_internal_p (stmt, IFN_GOACC_LOOP)) -+ { -+ mark_stmt_necessary (stmt, true); -+ return; -+ } - if (!gimple_call_lhs (stmt)) - return; - break; -diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c -index 84e6e313c85..f51385900ed 100644 ---- a/gcc/tree-ssa-loop-niter.c -+++ b/gcc/tree-ssa-loop-niter.c -@@ -2830,6 +2830,27 @@ finite_loop_p (struct loop *loop) - loop->num); - return true; - } -+ -+ if (flag_finite_loops) -+ { -+ unsigned i; -+ vec exits = get_loop_exit_edges (loop); -+ edge ex; -+ -+ /* If the loop has a normal exit, we can assume it will terminate. */ -+ FOR_EACH_VEC_ELT (exits, i, ex) -+ if (!(ex->flags & (EDGE_EH | EDGE_ABNORMAL | EDGE_FAKE))) -+ { -+ exits.release (); -+ if (dump_file) -+ fprintf (dump_file, "Assume loop %i to be finite: it has an exit " -+ "and -ffinite-loops is on.\n", loop->num); -+ return true; -+ } -+ -+ exits.release (); -+ } -+ - return false; - } - -diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr84955-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr84955-1.c -new file mode 100644 -index 00000000000..44767cd27c3 ---- /dev/null -+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr84955-1.c -@@ -0,0 +1,31 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -fdump-tree-cddce2 -ffinite-loops" } */ -+ -+int -+f1 (void) -+{ -+ int i, j; -+ -+#pragma acc parallel loop tile(2,3) -+ for (i = 1; i < 10; i++) -+ for (j = 1; j < 10; j++) -+ for (;;) -+ ; -+ -+ return i + j; -+} -+ -+int -+f2 (void) -+{ -+ int i, j, k; -+ -+#pragma acc parallel loop tile(2,3) -+ for (i = 1; i < 10; i++) -+ for (j = 1; j < 10; j++) -+ for (k = 1; k < 10; k++) -+ ; -+ -+ return i + j; -+} -+/* { dg-final { scan-tree-dump-not "if" "cddce2"} } */ diff --git a/loop-split.patch b/loop-split.patch deleted file mode 100644 index d99db75bf1769d4268a6f1d237f2c71027bd99e7..0000000000000000000000000000000000000000 --- a/loop-split.patch +++ /dev/null @@ -1,1282 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-Loop-split-on-semi-invariant-conditional-statement.patch -095f78c62157124ad479a3f98b6995ced090b807 - -diff --git a/gcc/params.def b/gcc/params.def -index 942447d77e6..df7d1f7c5e7 100644 ---- a/gcc/params.def -+++ b/gcc/params.def -@@ -415,6 +415,12 @@ DEFPARAM(PARAM_MAX_UNSWITCH_LEVEL, - "The maximum number of unswitchings in a single loop.", - 3, 0, 0) - -+DEFPARAM(PARAM_MIN_LOOP_COND_SPLIT_PROB, -+ "min-loop-cond-split-prob", -+ "The minimum threshold for probability of semi-invariant condition " -+ "statement to trigger loop split.", -+ 30, 0, 100) -+ - /* The maximum number of insns in loop header duplicated by the copy loop - headers pass. */ - DEFPARAM(PARAM_MAX_LOOP_HEADER_INSNS, -diff --git a/gcc/testsuite/g++.dg/tree-ssa/loop-cond-split-1.C b/gcc/testsuite/g++.dg/tree-ssa/loop-cond-split-1.C -new file mode 100644 -index 00000000000..0d679cb9035 ---- /dev/null -+++ b/gcc/testsuite/g++.dg/tree-ssa/loop-cond-split-1.C -@@ -0,0 +1,33 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O3 -fdump-tree-lsplit-details" } */ -+ -+#include -+#include -+ -+using namespace std; -+ -+class A -+{ -+public: -+ bool empty; -+ void set (string s); -+}; -+ -+class B -+{ -+ map m; -+ void f (); -+}; -+ -+extern A *ga; -+ -+void B::f () -+{ -+ for (map::iterator iter = m.begin (); iter != m.end (); ++iter) -+ { -+ if (ga->empty) -+ ga->set (iter->second); -+ } -+} -+ -+/* { dg-final { scan-tree-dump-times "loop split on semi-invariant condition at false branch" 1 "lsplit" } } */ -diff --git a/gcc/testsuite/gcc.dg/torture/pr55107.c b/gcc/testsuite/gcc.dg/torture/pr55107.c -index 2402716be30..d757c041220 100644 ---- a/gcc/testsuite/gcc.dg/torture/pr55107.c -+++ b/gcc/testsuite/gcc.dg/torture/pr55107.c -@@ -1,4 +1,5 @@ - /* { dg-do compile } */ -+/* { dg-additional-options "-fno-split-loops" } */ - - typedef unsigned short uint16_t; - -diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-cond-split-1.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-cond-split-1.c -new file mode 100644 -index 00000000000..feb776e8373 ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-cond-split-1.c -@@ -0,0 +1,97 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O3 -fdump-tree-lsplit-details" } */ -+ -+extern const int step; -+ -+int ga, gb; -+ -+__attribute__((pure)) __attribute__((noinline)) int inc (int i) -+{ -+ return i + step; -+} -+ -+extern int do_something (void); -+ -+void test1 (int n) -+{ -+ int i; -+ -+ for (i = 0; i < n; i = inc (i)) -+ { -+ if (ga) -+ ga = do_something (); -+ } -+} -+ -+void test2 (int n, int p) -+{ -+ int i; -+ int v; -+ -+ for (i = 0; i < n ; i = inc (i)) -+ { -+ if (ga) -+ { -+ v = inc (2); -+ gb += 1; -+ } -+ else -+ { -+ v = p * p; -+ gb *= 3; -+ } -+ -+ if (v < 10) -+ ga = do_something (); -+ } -+} -+ -+void test3 (int n, int p) -+{ -+ int i; -+ int c = p + 1; -+ int v; -+ -+ for (i = 0; i < n ; i = inc (i)) -+ { -+ if (c) -+ { -+ v = inc (c); -+ gb += 1; -+ } -+ else -+ { -+ v = p * p; -+ gb *= 3; -+ } -+ -+ if (v < 10) -+ c = do_something (); -+ } -+} -+ -+void test4 (int n, int p) -+{ -+ int i; -+ int v; -+ -+ for (i = 0; i < n ; i = inc (i)) -+ { -+ if (ga) -+ { -+ v = inc (2); -+ if (gb > 16) -+ v = inc (5); -+ } -+ else -+ { -+ v = p * p; -+ gb += 2; -+ } -+ -+ if (v < 10) -+ ga = do_something (); -+ } -+} -+ -+/* { dg-final { scan-tree-dump-times "loop split on semi-invariant condition at false branch" 3 "lsplit" } } */ -diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c -index f5f083384bc..6302d044e09 100644 ---- a/gcc/tree-ssa-loop-split.c -+++ b/gcc/tree-ssa-loop-split.c -@@ -32,7 +32,10 @@ along with GCC; see the file COPYING3. If not see - #include "tree-ssa-loop.h" - #include "tree-ssa-loop-manip.h" - #include "tree-into-ssa.h" -+#include "tree-inline.h" -+#include "tree-cfgcleanup.h" - #include "cfgloop.h" -+#include "params.h" - #include "tree-scalar-evolution.h" - #include "gimple-iterator.h" - #include "gimple-pretty-print.h" -@@ -40,7 +43,9 @@ along with GCC; see the file COPYING3. If not see - #include "gimple-fold.h" - #include "gimplify-me.h" - --/* This file implements loop splitting, i.e. transformation of loops like -+/* This file implements two kinds of loop splitting. -+ -+ One transformation of loops like: - - for (i = 0; i < 100; i++) - { -@@ -487,8 +492,9 @@ compute_new_first_bound (gimple_seq *stmts, class tree_niter_desc *niter, - single exit of LOOP. */ - - static bool --split_loop (struct loop *loop1, struct tree_niter_desc *niter) -+split_loop (struct loop *loop1) - { -+ struct tree_niter_desc niter; - basic_block *bbs; - unsigned i; - bool changed = false; -@@ -496,8 +502,28 @@ split_loop (class loop *loop1, class tree_niter_desc *niter) - tree border = NULL_TREE; - affine_iv iv; - -+ if (!single_exit (loop1) -+ /* ??? We could handle non-empty latches when we split the latch edge -+ (not the exit edge), and put the new exit condition in the new block. -+ OTOH this executes some code unconditionally that might have been -+ skipped by the original exit before. */ -+ || !empty_block_p (loop1->latch) -+ || !easy_exit_values (loop1) -+ || !number_of_iterations_exit (loop1, single_exit (loop1), &niter, -+ false, true) -+ || niter.cmp == ERROR_MARK -+ /* We can't yet handle loops controlled by a != predicate. */ -+ || niter.cmp == NE_EXPR) -+ return false; -+ - bbs = get_loop_body (loop1); - -+ if (!can_copy_bbs_p (bbs, loop1->num_nodes)) -+ { -+ free (bbs); -+ return false; -+ } -+ - /* Find a splitting opportunity. */ - for (i = 0; i < loop1->num_nodes; i++) - if ((guard_iv = split_at_bb_p (loop1, bbs[i], &border, &iv))) -@@ -505,8 +531,8 @@ split_loop (class loop *loop1, class tree_niter_desc *niter) - /* Handling opposite steps is not implemented yet. Neither - is handling different step sizes. */ - if ((tree_int_cst_sign_bit (iv.step) -- != tree_int_cst_sign_bit (niter->control.step)) -- || !tree_int_cst_equal (iv.step, niter->control.step)) -+ != tree_int_cst_sign_bit (niter.control.step)) -+ || !tree_int_cst_equal (iv.step, niter.control.step)) - continue; - - /* Find a loop PHI node that defines guard_iv directly, -@@ -575,7 +601,7 @@ split_loop (class loop *loop1, class tree_niter_desc *niter) - Compute the new bound for the guarding IV and patch the - loop exit to use it instead of original IV and bound. */ - gimple_seq stmts = NULL; -- tree newend = compute_new_first_bound (&stmts, niter, border, -+ tree newend = compute_new_first_bound (&stmts, &niter, border, - guard_code, guard_init); - if (stmts) - gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop1), -@@ -612,6 +638,956 @@ split_loop (class loop *loop1, class tree_niter_desc *niter) - return changed; - } - -+/* Another transformation of loops like: -+ -+ for (i = INIT (); CHECK (i); i = NEXT ()) -+ { -+ if (expr (a_1, a_2, ..., a_n)) // expr is pure -+ a_j = ...; // change at least one a_j -+ else -+ S; // not change any a_j -+ } -+ -+ into: -+ -+ for (i = INIT (); CHECK (i); i = NEXT ()) -+ { -+ if (expr (a_1, a_2, ..., a_n)) -+ a_j = ...; -+ else -+ { -+ S; -+ i = NEXT (); -+ break; -+ } -+ } -+ -+ for (; CHECK (i); i = NEXT ()) -+ { -+ S; -+ } -+ -+ */ -+ -+/* Data structure to hold temporary information during loop split upon -+ semi-invariant conditional statement. */ -+class split_info { -+public: -+ /* Array of all basic blocks in a loop, returned by get_loop_body(). */ -+ basic_block *bbs; -+ -+ /* All memory store/clobber statements in a loop. */ -+ auto_vec memory_stores; -+ -+ /* Whether above memory stores vector has been filled. */ -+ int need_init; -+ -+ /* Control dependencies of basic blocks in a loop. */ -+ auto_vec *> control_deps; -+ -+ split_info () : bbs (NULL), need_init (true) { } -+ -+ ~split_info () -+ { -+ if (bbs) -+ free (bbs); -+ -+ for (unsigned i = 0; i < control_deps.length (); i++) -+ delete control_deps[i]; -+ } -+}; -+ -+/* Find all statements with memory-write effect in LOOP, including memory -+ store and non-pure function call, and keep those in a vector. This work -+ is only done one time, for the vector should be constant during analysis -+ stage of semi-invariant condition. */ -+ -+static void -+find_vdef_in_loop (struct loop *loop) -+{ -+ split_info *info = (split_info *) loop->aux; -+ gphi *vphi = get_virtual_phi (loop->header); -+ -+ /* Indicate memory store vector has been filled. */ -+ info->need_init = false; -+ -+ /* If loop contains memory operation, there must be a virtual PHI node in -+ loop header basic block. */ -+ if (vphi == NULL) -+ return; -+ -+ /* All virtual SSA names inside the loop are connected to be a cyclic -+ graph via virtual PHI nodes. The virtual PHI node in loop header just -+ links the first and the last virtual SSA names, by using the last as -+ PHI operand to define the first. */ -+ const edge latch = loop_latch_edge (loop); -+ const tree first = gimple_phi_result (vphi); -+ const tree last = PHI_ARG_DEF_FROM_EDGE (vphi, latch); -+ -+ /* The virtual SSA cyclic graph might consist of only one SSA name, who -+ is defined by itself. -+ -+ .MEM_1 = PHI <.MEM_2(loop entry edge), .MEM_1(latch edge)> -+ -+ This means the loop contains only memory loads, so we can skip it. */ -+ if (first == last) -+ return; -+ -+ auto_vec other_stores; -+ auto_vec worklist; -+ auto_bitmap visited; -+ -+ bitmap_set_bit (visited, SSA_NAME_VERSION (first)); -+ bitmap_set_bit (visited, SSA_NAME_VERSION (last)); -+ worklist.safe_push (last); -+ -+ do -+ { -+ tree vuse = worklist.pop (); -+ gimple *stmt = SSA_NAME_DEF_STMT (vuse); -+ -+ /* We mark the first and last SSA names as visited at the beginning, -+ and reversely start the process from the last SSA name towards the -+ first, which ensures that this do-while will not touch SSA names -+ defined outside the loop. */ -+ gcc_assert (gimple_bb (stmt) -+ && flow_bb_inside_loop_p (loop, gimple_bb (stmt))); -+ -+ if (gimple_code (stmt) == GIMPLE_PHI) -+ { -+ gphi *phi = as_a (stmt); -+ -+ for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i) -+ { -+ tree arg = gimple_phi_arg_def (stmt, i); -+ -+ if (bitmap_set_bit (visited, SSA_NAME_VERSION (arg))) -+ worklist.safe_push (arg); -+ } -+ } -+ else -+ { -+ tree prev = gimple_vuse (stmt); -+ -+ /* Non-pure call statement is conservatively assumed to impact all -+ memory locations. So place call statements ahead of other memory -+ stores in the vector with an idea of of using them as shortcut -+ terminators to memory alias analysis. */ -+ if (gimple_code (stmt) == GIMPLE_CALL) -+ info->memory_stores.safe_push (stmt); -+ else -+ other_stores.safe_push (stmt); -+ -+ if (bitmap_set_bit (visited, SSA_NAME_VERSION (prev))) -+ worklist.safe_push (prev); -+ } -+ } while (!worklist.is_empty ()); -+ -+ info->memory_stores.safe_splice (other_stores); -+} -+ -+/* Two basic blocks have equivalent control dependency if one dominates to -+ the other, and it is post-dominated by the latter. Given a basic block -+ BB in LOOP, find farest equivalent dominating basic block. For BB, there -+ is a constraint that BB does not post-dominate loop header of LOOP, this -+ means BB is control-dependent on at least one basic block in LOOP. */ -+ -+static basic_block -+get_control_equiv_head_block (struct loop *loop, basic_block bb) -+{ -+ while (!bb->aux) -+ { -+ basic_block dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb); -+ -+ gcc_checking_assert (dom_bb && flow_bb_inside_loop_p (loop, dom_bb)); -+ -+ if (!dominated_by_p (CDI_POST_DOMINATORS, dom_bb, bb)) -+ break; -+ -+ bb = dom_bb; -+ } -+ return bb; -+} -+ -+/* Given a BB in LOOP, find out all basic blocks in LOOP that BB is control- -+ dependent on. */ -+ -+static hash_set * -+find_control_dep_blocks (struct loop *loop, basic_block bb) -+{ -+ /* BB has same control dependency as loop header, then it is not control- -+ dependent on any basic block in LOOP. */ -+ if (dominated_by_p (CDI_POST_DOMINATORS, loop->header, bb)) -+ return NULL; -+ -+ basic_block equiv_head = get_control_equiv_head_block (loop, bb); -+ -+ if (equiv_head->aux) -+ { -+ /* There is a basic block containing control dependency equivalent -+ to BB. No need to recompute that, and also set this information -+ to other equivalent basic blocks. */ -+ for (; bb != equiv_head; -+ bb = get_immediate_dominator (CDI_DOMINATORS, bb)) -+ bb->aux = equiv_head->aux; -+ return (hash_set *) equiv_head->aux; -+ } -+ -+ /* A basic block X is control-dependent on another Y iff there exists -+ a path from X to Y, in which every basic block other than X and Y -+ is post-dominated by Y, but X is not post-dominated by Y. -+ -+ According to this rule, traverse basic blocks in the loop backwards -+ starting from BB, if a basic block is post-dominated by BB, extend -+ current post-dominating path to this block, otherwise it is another -+ one that BB is control-dependent on. */ -+ -+ auto_vec pdom_worklist; -+ hash_set pdom_visited; -+ hash_set *dep_bbs = new hash_set; -+ -+ pdom_worklist.safe_push (equiv_head); -+ -+ do -+ { -+ basic_block pdom_bb = pdom_worklist.pop (); -+ edge_iterator ei; -+ edge e; -+ -+ if (pdom_visited.add (pdom_bb)) -+ continue; -+ -+ FOR_EACH_EDGE (e, ei, pdom_bb->preds) -+ { -+ basic_block pred_bb = e->src; -+ -+ if (!dominated_by_p (CDI_POST_DOMINATORS, pred_bb, bb)) -+ { -+ dep_bbs->add (pred_bb); -+ continue; -+ } -+ -+ pred_bb = get_control_equiv_head_block (loop, pred_bb); -+ -+ if (pdom_visited.contains (pred_bb)) -+ continue; -+ -+ if (!pred_bb->aux) -+ { -+ pdom_worklist.safe_push (pred_bb); -+ continue; -+ } -+ -+ /* If control dependency of basic block is available, fast extend -+ post-dominating path using the information instead of advancing -+ forward step-by-step. */ -+ hash_set *pred_dep_bbs -+ = (hash_set *) pred_bb->aux; -+ -+ for (hash_set::iterator iter = pred_dep_bbs->begin (); -+ iter != pred_dep_bbs->end (); ++iter) -+ { -+ basic_block pred_dep_bb = *iter; -+ -+ /* Basic blocks can either be in control dependency of BB, or -+ must be post-dominated by BB, if so, extend the path from -+ these basic blocks. */ -+ if (!dominated_by_p (CDI_POST_DOMINATORS, pred_dep_bb, bb)) -+ dep_bbs->add (pred_dep_bb); -+ else if (!pdom_visited.contains (pred_dep_bb)) -+ pdom_worklist.safe_push (pred_dep_bb); -+ } -+ } -+ } while (!pdom_worklist.is_empty ()); -+ -+ /* Record computed control dependencies in loop so that we can reach them -+ when reclaiming resources. */ -+ ((split_info *) loop->aux)->control_deps.safe_push (dep_bbs); -+ -+ /* Associate control dependence with related equivalent basic blocks. */ -+ for (equiv_head->aux = dep_bbs; bb != equiv_head; -+ bb = get_immediate_dominator (CDI_DOMINATORS, bb)) -+ bb->aux = dep_bbs; -+ -+ return dep_bbs; -+} -+ -+/* Forward declaration */ -+ -+static bool -+stmt_semi_invariant_p_1 (struct loop *loop, gimple *stmt, -+ const_basic_block skip_head, -+ hash_map &stmt_stat); -+ -+/* Given STMT, memory load or pure call statement, check whether it is impacted -+ by some memory store in LOOP, excluding trace starting from SKIP_HEAD (the -+ trace is composed of SKIP_HEAD and those basic block dominated by it, always -+ corresponds to one branch of a conditional statement). If SKIP_HEAD is -+ NULL, all basic blocks of LOOP are checked. */ -+ -+static bool -+vuse_semi_invariant_p (struct loop *loop, gimple *stmt, -+ const_basic_block skip_head) -+{ -+ split_info *info = (split_info *) loop->aux; -+ tree rhs = NULL_TREE; -+ ao_ref ref; -+ gimple *store; -+ unsigned i; -+ -+ /* Collect memory store/clobber statements if haven't done that. */ -+ if (info->need_init) -+ find_vdef_in_loop (loop); -+ -+ if (is_gimple_assign (stmt)) -+ rhs = gimple_assign_rhs1 (stmt); -+ -+ ao_ref_init (&ref, rhs); -+ -+ FOR_EACH_VEC_ELT (info->memory_stores, i, store) -+ { -+ /* Skip basic blocks dominated by SKIP_HEAD, if non-NULL. */ -+ if (skip_head -+ && dominated_by_p (CDI_DOMINATORS, gimple_bb (store), skip_head)) -+ continue; -+ -+ if (!ref.ref || stmt_may_clobber_ref_p_1 (store, &ref)) -+ return false; -+ } -+ -+ return true; -+} -+ -+/* Suppose one condition branch, led by SKIP_HEAD, is not executed since -+ certain iteration of LOOP, check whether an SSA name (NAME) remains -+ unchanged in next iteration. We call this characteristic semi- -+ invariantness. SKIP_HEAD might be NULL, if so, nothing excluded, all basic -+ blocks and control flows in the loop will be considered. Semi-invariant -+ state of checked statement is cached in hash map STMT_STAT to avoid -+ redundant computation in possible following re-check. */ -+ -+static inline bool -+ssa_semi_invariant_p (struct loop *loop, tree name, -+ const_basic_block skip_head, -+ hash_map &stmt_stat) -+{ -+ gimple *def = SSA_NAME_DEF_STMT (name); -+ const_basic_block def_bb = gimple_bb (def); -+ -+ /* An SSA name defined outside loop is definitely semi-invariant. */ -+ if (!def_bb || !flow_bb_inside_loop_p (loop, def_bb)) -+ return true; -+ -+ return stmt_semi_invariant_p_1 (loop, def, skip_head, stmt_stat); -+} -+ -+/* Check whether a loop iteration PHI node (LOOP_PHI) defines a value that is -+ semi-invariant in LOOP. Basic blocks dominated by SKIP_HEAD (if non-NULL), -+ are excluded from LOOP. */ -+ -+static bool -+loop_iter_phi_semi_invariant_p (struct loop *loop, gphi *loop_phi, -+ const_basic_block skip_head) -+{ -+ const_edge latch = loop_latch_edge (loop); -+ tree name = gimple_phi_result (loop_phi); -+ tree from = PHI_ARG_DEF_FROM_EDGE (loop_phi, latch); -+ -+ gcc_checking_assert (from); -+ -+ /* Loop iteration PHI node locates in loop header, and it has two source -+ operands, one is an initial value coming from outside the loop, the other -+ is a value through latch of the loop, which is derived in last iteration, -+ we call the latter latch value. From the PHI node to definition of latch -+ value, if excluding branch trace starting from SKIP_HEAD, except copy- -+ assignment or likewise, there is no other kind of value redefinition, SSA -+ name defined by the PHI node is semi-invariant. -+ -+ loop entry -+ | .--- latch ---. -+ | | | -+ v v | -+ x_1 = PHI | -+ | | -+ v | -+ .------- if (cond) -------. | -+ | | | -+ | [ SKIP ] | -+ | | | -+ | x_2 = ... | -+ | | | -+ '---- T ---->.<---- F ----' | -+ | | -+ v | -+ x_3 = PHI | -+ | | -+ '----------------------' -+ -+ Suppose in certain iteration, execution flow in above graph goes through -+ true branch, which means that one source value to define x_3 in false -+ branch (x_2) is skipped, x_3 only comes from x_1, and x_1 in next -+ iterations is defined by x_3, we know that x_1 will never changed if COND -+ always chooses true branch from then on. */ -+ -+ while (from != name) -+ { -+ /* A new value comes from a CONSTANT. */ -+ if (TREE_CODE (from) != SSA_NAME) -+ return false; -+ -+ gimple *stmt = SSA_NAME_DEF_STMT (from); -+ const_basic_block bb = gimple_bb (stmt); -+ -+ /* A new value comes from outside the loop. */ -+ if (!bb || !flow_bb_inside_loop_p (loop, bb)) -+ return false; -+ -+ from = NULL_TREE; -+ -+ if (gimple_code (stmt) == GIMPLE_PHI) -+ { -+ gphi *phi = as_a (stmt); -+ -+ for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i) -+ { -+ if (skip_head) -+ { -+ const_edge e = gimple_phi_arg_edge (phi, i); -+ -+ /* Don't consider redefinitions in excluded basic blocks. */ -+ if (dominated_by_p (CDI_DOMINATORS, e->src, skip_head)) -+ continue; -+ } -+ -+ tree arg = gimple_phi_arg_def (phi, i); -+ -+ if (!from) -+ from = arg; -+ else if (!operand_equal_p (from, arg, 0)) -+ /* There are more than one source operands that provide -+ different values to the SSA name, it is variant. */ -+ return false; -+ } -+ } -+ else if (gimple_code (stmt) == GIMPLE_ASSIGN) -+ { -+ /* For simple value copy, check its rhs instead. */ -+ if (gimple_assign_ssa_name_copy_p (stmt)) -+ from = gimple_assign_rhs1 (stmt); -+ } -+ -+ /* Any other kind of definition is deemed to introduce a new value -+ to the SSA name. */ -+ if (!from) -+ return false; -+ } -+ return true; -+} -+ -+/* Check whether conditional predicates that BB is control-dependent on, are -+ semi-invariant in LOOP. Basic blocks dominated by SKIP_HEAD (if non-NULL), -+ are excluded from LOOP. Semi-invariant state of checked statement is cached -+ in hash map STMT_STAT. */ -+ -+static bool -+control_dep_semi_invariant_p (struct loop *loop, basic_block bb, -+ const_basic_block skip_head, -+ hash_map &stmt_stat) -+{ -+ hash_set *dep_bbs = find_control_dep_blocks (loop, bb); -+ -+ if (!dep_bbs) -+ return true; -+ -+ for (hash_set::iterator iter = dep_bbs->begin (); -+ iter != dep_bbs->end (); ++iter) -+ { -+ gimple *last = last_stmt (*iter); -+ -+ if (!last) -+ return false; -+ -+ /* Only check condition predicates. */ -+ if (gimple_code (last) != GIMPLE_COND -+ && gimple_code (last) != GIMPLE_SWITCH) -+ return false; -+ -+ if (!stmt_semi_invariant_p_1 (loop, last, skip_head, stmt_stat)) -+ return false; -+ } -+ -+ return true; -+} -+ -+/* Check whether STMT is semi-invariant in LOOP, iff all its operands are -+ semi-invariant, consequently, all its defined values are semi-invariant. -+ Basic blocks dominated by SKIP_HEAD (if non-NULL), are excluded from LOOP. -+ Semi-invariant state of checked statement is cached in hash map -+ STMT_STAT. */ -+ -+static bool -+stmt_semi_invariant_p_1 (struct loop *loop, gimple *stmt, -+ const_basic_block skip_head, -+ hash_map &stmt_stat) -+{ -+ bool existed; -+ bool &invar = stmt_stat.get_or_insert (stmt, &existed); -+ -+ if (existed) -+ return invar; -+ -+ /* A statement might depend on itself, which is treated as variant. So set -+ state of statement under check to be variant to ensure that. */ -+ invar = false; -+ -+ if (gimple_code (stmt) == GIMPLE_PHI) -+ { -+ gphi *phi = as_a (stmt); -+ -+ if (gimple_bb (stmt) == loop->header) -+ { -+ invar = loop_iter_phi_semi_invariant_p (loop, phi, skip_head); -+ return invar; -+ } -+ -+ /* For a loop PHI node that does not locate in loop header, it is semi- -+ invariant only if two conditions are met. The first is its source -+ values are derived from CONSTANT (including loop-invariant value), or -+ from SSA name defined by semi-invariant loop iteration PHI node. The -+ second is its source incoming edges are control-dependent on semi- -+ invariant conditional predicates. */ -+ for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i) -+ { -+ const_edge e = gimple_phi_arg_edge (phi, i); -+ tree arg = gimple_phi_arg_def (phi, i); -+ -+ if (TREE_CODE (arg) == SSA_NAME) -+ { -+ if (!ssa_semi_invariant_p (loop, arg, skip_head, stmt_stat)) -+ return false; -+ -+ /* If source value is defined in location from where the source -+ edge comes in, no need to check control dependency again -+ since this has been done in above SSA name check stage. */ -+ if (e->src == gimple_bb (SSA_NAME_DEF_STMT (arg))) -+ continue; -+ } -+ -+ if (!control_dep_semi_invariant_p (loop, e->src, skip_head, -+ stmt_stat)) -+ return false; -+ } -+ } -+ else -+ { -+ ssa_op_iter iter; -+ tree use; -+ -+ /* Volatile memory load or return of normal (non-const/non-pure) call -+ should not be treated as constant in each iteration of loop. */ -+ if (gimple_has_side_effects (stmt)) -+ return false; -+ -+ /* Check if any memory store may kill memory load at this place. */ -+ if (gimple_vuse (stmt) && !vuse_semi_invariant_p (loop, stmt, skip_head)) -+ return false; -+ -+ /* Although operand of a statement might be SSA name, CONSTANT or -+ VARDECL, here we only need to check SSA name operands. This is -+ because check on VARDECL operands, which involve memory loads, -+ must have been done prior to invocation of this function in -+ vuse_semi_invariant_p. */ -+ FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE) -+ if (!ssa_semi_invariant_p (loop, use, skip_head, stmt_stat)) -+ return false; -+ } -+ -+ if (!control_dep_semi_invariant_p (loop, gimple_bb (stmt), skip_head, -+ stmt_stat)) -+ return false; -+ -+ /* Here we SHOULD NOT use invar = true, since hash map might be changed due -+ to new insertion, and thus invar may point to invalid memory. */ -+ stmt_stat.put (stmt, true); -+ return true; -+} -+ -+/* A helper function to check whether STMT is semi-invariant in LOOP. Basic -+ blocks dominated by SKIP_HEAD (if non-NULL), are excluded from LOOP. */ -+ -+static bool -+stmt_semi_invariant_p (struct loop *loop, gimple *stmt, -+ const_basic_block skip_head) -+{ -+ hash_map stmt_stat; -+ return stmt_semi_invariant_p_1 (loop, stmt, skip_head, stmt_stat); -+} -+ -+/* Determine when conditional statement never transfers execution to one of its -+ branch, whether we can remove the branch's leading basic block (BRANCH_BB) -+ and those basic blocks dominated by BRANCH_BB. */ -+ -+static bool -+branch_removable_p (basic_block branch_bb) -+{ -+ edge_iterator ei; -+ edge e; -+ -+ if (single_pred_p (branch_bb)) -+ return true; -+ -+ FOR_EACH_EDGE (e, ei, branch_bb->preds) -+ { -+ if (dominated_by_p (CDI_DOMINATORS, e->src, branch_bb)) -+ continue; -+ -+ if (dominated_by_p (CDI_DOMINATORS, branch_bb, e->src)) -+ continue; -+ -+ /* The branch can be reached from opposite branch, or from some -+ statement not dominated by the conditional statement. */ -+ return false; -+ } -+ -+ return true; -+} -+ -+/* Find out which branch of a conditional statement (COND) is invariant in the -+ execution context of LOOP. That is: once the branch is selected in certain -+ iteration of the loop, any operand that contributes to computation of the -+ conditional statement remains unchanged in all following iterations. */ -+ -+static edge -+get_cond_invariant_branch (struct loop *loop, gcond *cond) -+{ -+ basic_block cond_bb = gimple_bb (cond); -+ basic_block targ_bb[2]; -+ bool invar[2]; -+ unsigned invar_checks = 0; -+ -+ for (unsigned i = 0; i < 2; i++) -+ { -+ targ_bb[i] = EDGE_SUCC (cond_bb, i)->dest; -+ -+ /* One branch directs to loop exit, no need to perform loop split upon -+ this conditional statement. Firstly, it is trivial if the exit branch -+ is semi-invariant, for the statement is just to break loop. Secondly, -+ if the opposite branch is semi-invariant, it means that the statement -+ is real loop-invariant, which is covered by loop unswitch. */ -+ if (!flow_bb_inside_loop_p (loop, targ_bb[i])) -+ return NULL; -+ } -+ -+ for (unsigned i = 0; i < 2; i++) -+ { -+ invar[!i] = false; -+ -+ if (!branch_removable_p (targ_bb[i])) -+ continue; -+ -+ /* Given a semi-invariant branch, if its opposite branch dominates -+ loop latch, it and its following trace will only be executed in -+ final iteration of loop, namely it is not part of repeated body -+ of the loop. Similar to the above case that the branch is loop -+ exit, no need to split loop. */ -+ if (dominated_by_p (CDI_DOMINATORS, loop->latch, targ_bb[i])) -+ continue; -+ -+ invar[!i] = stmt_semi_invariant_p (loop, cond, targ_bb[i]); -+ invar_checks++; -+ } -+ -+ /* With both branches being invariant (handled by loop unswitch) or -+ variant is not what we want. */ -+ if (invar[0] ^ !invar[1]) -+ return NULL; -+ -+ /* Found a real loop-invariant condition, do nothing. */ -+ if (invar_checks < 2 && stmt_semi_invariant_p (loop, cond, NULL)) -+ return NULL; -+ -+ return EDGE_SUCC (cond_bb, invar[0] ? 0 : 1); -+} -+ -+/* Calculate increased code size measured by estimated insn number if applying -+ loop split upon certain branch (BRANCH_EDGE) of a conditional statement. */ -+ -+static int -+compute_added_num_insns (struct loop *loop, const_edge branch_edge) -+{ -+ basic_block cond_bb = branch_edge->src; -+ unsigned branch = EDGE_SUCC (cond_bb, 1) == branch_edge; -+ basic_block opposite_bb = EDGE_SUCC (cond_bb, !branch)->dest; -+ basic_block *bbs = ((split_info *) loop->aux)->bbs; -+ int num = 0; -+ -+ for (unsigned i = 0; i < loop->num_nodes; i++) -+ { -+ /* Do no count basic blocks only in opposite branch. */ -+ if (dominated_by_p (CDI_DOMINATORS, bbs[i], opposite_bb)) -+ continue; -+ -+ num += estimate_num_insns_seq (bb_seq (bbs[i]), &eni_size_weights); -+ } -+ -+ /* It is unnecessary to evaluate expression of the conditional statement -+ in new loop that contains only invariant branch. This expression should -+ be constant value (either true or false). Exclude code size of insns -+ that contribute to computation of the expression. */ -+ -+ auto_vec worklist; -+ hash_set removed; -+ gimple *stmt = last_stmt (cond_bb); -+ -+ worklist.safe_push (stmt); -+ removed.add (stmt); -+ num -= estimate_num_insns (stmt, &eni_size_weights); -+ -+ do -+ { -+ ssa_op_iter opnd_iter; -+ use_operand_p opnd_p; -+ -+ stmt = worklist.pop (); -+ FOR_EACH_PHI_OR_STMT_USE (opnd_p, stmt, opnd_iter, SSA_OP_USE) -+ { -+ tree opnd = USE_FROM_PTR (opnd_p); -+ -+ if (TREE_CODE (opnd) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (opnd)) -+ continue; -+ -+ gimple *opnd_stmt = SSA_NAME_DEF_STMT (opnd); -+ use_operand_p use_p; -+ imm_use_iterator use_iter; -+ -+ if (removed.contains (opnd_stmt) -+ || !flow_bb_inside_loop_p (loop, gimple_bb (opnd_stmt))) -+ continue; -+ -+ FOR_EACH_IMM_USE_FAST (use_p, use_iter, opnd) -+ { -+ gimple *use_stmt = USE_STMT (use_p); -+ -+ if (!is_gimple_debug (use_stmt) && !removed.contains (use_stmt)) -+ { -+ opnd_stmt = NULL; -+ break; -+ } -+ } -+ -+ if (opnd_stmt) -+ { -+ worklist.safe_push (opnd_stmt); -+ removed.add (opnd_stmt); -+ num -= estimate_num_insns (opnd_stmt, &eni_size_weights); -+ } -+ } -+ } while (!worklist.is_empty ()); -+ -+ gcc_assert (num >= 0); -+ return num; -+} -+ -+/* Find out loop-invariant branch of a conditional statement (COND) if it has, -+ and check whether it is eligible and profitable to perform loop split upon -+ this branch in LOOP. */ -+ -+static edge -+get_cond_branch_to_split_loop (struct loop *loop, gcond *cond) -+{ -+ edge invar_branch = get_cond_invariant_branch (loop, cond); -+ if (!invar_branch) -+ return NULL; -+ -+ /* When accurate profile information is available, and execution -+ frequency of the branch is too low, just let it go. */ -+ profile_probability prob = invar_branch->probability; -+ if (prob.reliable_p ()) -+ { -+ int thres = PARAM_VALUE (PARAM_MIN_LOOP_COND_SPLIT_PROB); -+ -+ if (prob < profile_probability::always ().apply_scale (thres, 100)) -+ return NULL; -+ } -+ -+ /* Add a threshold for increased code size to disable loop split. */ -+ if (compute_added_num_insns (loop, invar_branch) -+ > PARAM_VALUE (PARAM_MAX_PEELED_INSNS)) -+ return NULL; -+ -+ return invar_branch; -+} -+ -+/* Given a loop (LOOP1) with a loop-invariant branch (INVAR_BRANCH) of some -+ conditional statement, perform loop split transformation illustrated -+ as the following graph. -+ -+ .-------T------ if (true) ------F------. -+ | .---------------. | -+ | | | | -+ v | v v -+ pre-header | pre-header -+ | .------------. | | .------------. -+ | | | | | | | -+ | v | | | v | -+ header | | header | -+ | | | | | -+ .--- if (cond) ---. | | .--- if (true) ---. | -+ | | | | | | | -+ invariant | | | invariant | | -+ | | | | | | | -+ '---T--->.<---F---' | | '---T--->.<---F---' | -+ | | / | | -+ stmts | / stmts | -+ | F T | | -+ / \ | / / \ | -+ .-------* * [ if (cond) ] .-------* * | -+ | | | | | | -+ | latch | | latch | -+ | | | | | | -+ | '------------' | '------------' -+ '------------------------. .-----------' -+ loop1 | | loop2 -+ v v -+ exits -+ -+ In the graph, loop1 represents the part derived from original one, and -+ loop2 is duplicated using loop_version (), which corresponds to the part -+ of original one being splitted out. In original latch edge of loop1, we -+ insert a new conditional statement duplicated from the semi-invariant cond, -+ and one of its branch goes back to loop1 header as a latch edge, and the -+ other branch goes to loop2 pre-header as an entry edge. And also in loop2, -+ we abandon the variant branch of the conditional statement by setting a -+ constant bool condition, based on which branch is semi-invariant. */ -+ -+static bool -+do_split_loop_on_cond (struct loop *loop1, edge invar_branch) -+{ -+ basic_block cond_bb = invar_branch->src; -+ bool true_invar = !!(invar_branch->flags & EDGE_TRUE_VALUE); -+ gcond *cond = as_a (last_stmt (cond_bb)); -+ -+ gcc_assert (cond_bb->loop_father == loop1); -+ -+ if (dump_enabled_p ()) -+ dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, cond, -+ "loop split on semi-invariant condition at %s branch\n", -+ true_invar ? "true" : "false"); -+ -+ initialize_original_copy_tables (); -+ -+ struct loop *loop2 = loop_version (loop1, boolean_true_node, NULL, -+ profile_probability::always (), -+ profile_probability::never (), -+ profile_probability::always (), -+ profile_probability::always (), -+ true); -+ if (!loop2) -+ { -+ free_original_copy_tables (); -+ return false; -+ } -+ -+ basic_block cond_bb_copy = get_bb_copy (cond_bb); -+ gcond *cond_copy = as_a (last_stmt (cond_bb_copy)); -+ -+ /* Replace the condition in loop2 with a bool constant to let PassManager -+ remove the variant branch after current pass completes. */ -+ if (true_invar) -+ gimple_cond_make_true (cond_copy); -+ else -+ gimple_cond_make_false (cond_copy); -+ -+ update_stmt (cond_copy); -+ -+ /* Insert a new conditional statement on latch edge of loop1, its condition -+ is duplicated from the semi-invariant. This statement acts as a switch -+ to transfer execution from loop1 to loop2, when loop1 enters into -+ invariant state. */ -+ basic_block latch_bb = split_edge (loop_latch_edge (loop1)); -+ basic_block break_bb = split_edge (single_pred_edge (latch_bb)); -+ gimple *break_cond = gimple_build_cond (gimple_cond_code(cond), -+ gimple_cond_lhs (cond), -+ gimple_cond_rhs (cond), -+ NULL_TREE, NULL_TREE); -+ -+ gimple_stmt_iterator gsi = gsi_last_bb (break_bb); -+ gsi_insert_after (&gsi, break_cond, GSI_NEW_STMT); -+ -+ edge to_loop1 = single_succ_edge (break_bb); -+ edge to_loop2 = make_edge (break_bb, loop_preheader_edge (loop2)->src, 0); -+ -+ to_loop1->flags &= ~EDGE_FALLTHRU; -+ to_loop1->flags |= true_invar ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE; -+ to_loop2->flags |= true_invar ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE; -+ -+ update_ssa (TODO_update_ssa); -+ -+ /* Due to introduction of a control flow edge from loop1 latch to loop2 -+ pre-header, we should update PHIs in loop2 to reflect this connection -+ between loop1 and loop2. */ -+ connect_loop_phis (loop1, loop2, to_loop2); -+ -+ free_original_copy_tables (); -+ -+ rewrite_into_loop_closed_ssa_1 (NULL, 0, SSA_OP_USE, loop1); -+ -+ return true; -+} -+ -+/* Traverse all conditional statements in LOOP, to find out a good candidate -+ upon which we can do loop split. */ -+ -+static bool -+split_loop_on_cond (struct loop *loop) -+{ -+ split_info *info = new split_info (); -+ basic_block *bbs = info->bbs = get_loop_body (loop); -+ bool do_split = false; -+ -+ /* Allocate an area to keep temporary info, and associate its address -+ with loop aux field. */ -+ loop->aux = info; -+ -+ for (unsigned i = 0; i < loop->num_nodes; i++) -+ bbs[i]->aux = NULL; -+ -+ for (unsigned i = 0; i < loop->num_nodes; i++) -+ { -+ basic_block bb = bbs[i]; -+ -+ /* We only consider conditional statement, which be executed at most once -+ in each iteration of the loop. So skip statements in inner loops. */ -+ if ((bb->loop_father != loop) || (bb->flags & BB_IRREDUCIBLE_LOOP)) -+ continue; -+ -+ /* Actually this check is not a must constraint. With it, we can ensure -+ conditional statement will always be executed in each iteration. */ -+ if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) -+ continue; -+ -+ gimple *last = last_stmt (bb); -+ -+ if (!last || gimple_code (last) != GIMPLE_COND) -+ continue; -+ -+ gcond *cond = as_a (last); -+ edge branch_edge = get_cond_branch_to_split_loop (loop, cond); -+ -+ if (branch_edge) -+ { -+ do_split_loop_on_cond (loop, branch_edge); -+ do_split = true; -+ break; -+ } -+ } -+ -+ delete info; -+ loop->aux = NULL; -+ -+ return do_split; -+} -+ - /* Main entry point. Perform loop splitting on all suitable loops. */ - - static unsigned int -@@ -621,13 +1597,15 @@ tree_ssa_split_loops (void) - bool changed = false; - - gcc_assert (scev_initialized_p ()); -+ -+ calculate_dominance_info (CDI_POST_DOMINATORS); -+ - FOR_EACH_LOOP (loop, LI_INCLUDE_ROOT) - loop->aux = NULL; - - /* Go through all loops starting from innermost. */ - FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) - { -- struct tree_niter_desc niter; - if (loop->aux) - { - /* If any of our inner loops was split, don't split us, -@@ -636,35 +1614,24 @@ tree_ssa_split_loops (void) - continue; - } - -- if (single_exit (loop) -- /* ??? We could handle non-empty latches when we split -- the latch edge (not the exit edge), and put the new -- exit condition in the new block. OTOH this executes some -- code unconditionally that might have been skipped by the -- original exit before. */ -- && empty_block_p (loop->latch) -- && !optimize_loop_for_size_p (loop) -- && easy_exit_values (loop) -- && number_of_iterations_exit (loop, single_exit (loop), &niter, -- false, true) -- && niter.cmp != ERROR_MARK -- /* We can't yet handle loops controlled by a != predicate. */ -- && niter.cmp != NE_EXPR -- && can_duplicate_loop_p (loop)) -+ if (optimize_loop_for_size_p (loop)) -+ continue; -+ -+ if (split_loop (loop) || split_loop_on_cond (loop)) - { -- if (split_loop (loop, &niter)) -- { -- /* Mark our containing loop as having had some split inner -- loops. */ -- loop_outer (loop)->aux = loop; -- changed = true; -- } -+ /* Mark our containing loop as having had some split inner loops. */ -+ loop_outer (loop)->aux = loop; -+ changed = true; - } - } - - FOR_EACH_LOOP (loop, LI_INCLUDE_ROOT) - loop->aux = NULL; - -+ clear_aux_for_blocks (); -+ -+ free_dominance_info (CDI_POST_DOMINATORS); -+ - if (changed) - return TODO_cleanup_cfg; - return 0; diff --git a/medium-code-mode.patch b/medium-code-mode.patch deleted file mode 100644 index f63e920da48f2c9bf8232eb832091eb66cf3339e..0000000000000000000000000000000000000000 --- a/medium-code-mode.patch +++ /dev/null @@ -1,458 +0,0 @@ -diff -Nurp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c ---- a/gcc/config/aarch64/aarch64.c 2021-02-18 11:03:29.728000000 +0800 -+++ b/gcc/config/aarch64/aarch64.c 2021-02-18 14:59:54.432000000 +0800 -@@ -2417,6 +2417,29 @@ aarch64_load_symref_appropriately (rtx d - emit_insn (gen_add_losym (dest, tmp_reg, imm)); - return; - } -+ case SYMBOL_MEDIUM_ABSOLUTE: -+ { -+ rtx tmp_reg = dest; -+ machine_mode mode = GET_MODE (dest); -+ -+ gcc_assert (mode == Pmode || mode == ptr_mode); -+ if (can_create_pseudo_p ()) -+ tmp_reg = gen_reg_rtx (mode); -+ -+ if (mode == DImode) -+ { -+ emit_insn (gen_load_symbol_medium_di (dest, tmp_reg, imm)); -+ } -+ else -+ { -+ emit_insn (gen_load_symbol_medium_si (dest, tmp_reg, imm)); -+ } -+ if (REG_P (dest)) -+ { -+ set_unique_reg_note (get_last_insn (), REG_EQUAL, copy_rtx (imm)); -+ } -+ return; -+ } - - case SYMBOL_TINY_ABSOLUTE: - emit_insn (gen_rtx_SET (dest, imm)); -@@ -2539,6 +2562,60 @@ aarch64_load_symref_appropriately (rtx d - return; - } - -+ case SYMBOL_MEDIUM_GOT_4G: -+ { -+ rtx tmp_reg = dest; -+ machine_mode mode = GET_MODE (dest); -+ if (can_create_pseudo_p ()) -+ { -+ tmp_reg = gen_reg_rtx (mode); -+ } -+ rtx insn; -+ rtx mem; -+ rtx s = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_"); -+ -+ if (mode == DImode) -+ { -+ emit_insn (gen_load_symbol_medium_di (tmp_reg, dest, s)); -+ } -+ else -+ { -+ emit_insn (gen_load_symbol_medium_si (tmp_reg, dest, s)); -+ } -+ if (REG_P (dest)) -+ { -+ set_unique_reg_note (get_last_insn (), REG_EQUAL, copy_rtx (s)); -+ } -+ -+ if (mode == ptr_mode) -+ { -+ if (mode == DImode) -+ { -+ emit_insn (gen_get_gotoff_di (dest, imm)); -+ insn = gen_ldr_got_medium_di (dest, tmp_reg, dest); -+ } -+ else -+ { -+ emit_insn (gen_get_gotoff_si (dest, imm)); -+ insn = gen_ldr_got_medium_si (dest, tmp_reg, dest); -+ } -+ mem = XVECEXP (SET_SRC (insn), 0, 0); -+ } -+ else -+ { -+ gcc_assert (mode == Pmode); -+ emit_insn (gen_get_gotoff_di (dest, imm)); -+ insn = gen_ldr_got_medium_sidi (dest, tmp_reg, dest); -+ mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0); -+ } -+ -+ gcc_assert (GET_CODE (mem) == MEM); -+ MEM_READONLY_P (mem) = 1; -+ MEM_NOTRAP_P (mem) = 1; -+ emit_insn (insn); -+ return; -+ } -+ - case SYMBOL_SMALL_TLSGD: - { - rtx_insn *insns; -@@ -4531,11 +4608,12 @@ aarch64_expand_mov_immediate (rtx dest, - - return; - -- case SYMBOL_SMALL_TLSGD: -- case SYMBOL_SMALL_TLSDESC: -+ case SYMBOL_SMALL_TLSGD: -+ case SYMBOL_SMALL_TLSDESC: - case SYMBOL_SMALL_TLSIE: - case SYMBOL_SMALL_GOT_28K: - case SYMBOL_SMALL_GOT_4G: -+ case SYMBOL_MEDIUM_GOT_4G: - case SYMBOL_TINY_GOT: - case SYMBOL_TINY_TLSIE: - if (const_offset != 0) -@@ -4554,6 +4632,7 @@ aarch64_expand_mov_immediate (rtx dest, - case SYMBOL_TLSLE24: - case SYMBOL_TLSLE32: - case SYMBOL_TLSLE48: -+ case SYMBOL_MEDIUM_ABSOLUTE: - aarch64_load_symref_appropriately (dest, imm, sty); - return; - -@@ -8450,7 +8529,14 @@ aarch64_classify_address (struct aarch64 - split_const (info->offset, &sym, &offs); - if (GET_CODE (sym) == SYMBOL_REF - && (aarch64_classify_symbol (sym, INTVAL (offs)) -- == SYMBOL_SMALL_ABSOLUTE)) -+ == SYMBOL_SMALL_ABSOLUTE -+ /* Fix fail on dbl_mov_immediate_1.c. If end up here with -+ MEDIUM_ABSOLUTE, the symbol is a constant number that is -+ forced to memory in reload pass, which is ok to go on with -+ the original design that subtitude the mov to -+ 'adrp and ldr :losum'. */ -+ || aarch64_classify_symbol (sym, INTVAL (offs)) -+ == SYMBOL_MEDIUM_ABSOLUTE)) - { - /* The symbol and offset must be aligned to the access size. */ - unsigned int align; -@@ -10365,7 +10451,13 @@ static inline bool - aarch64_can_use_per_function_literal_pools_p (void) - { - return (aarch64_pcrelative_literal_loads -- || aarch64_cmodel == AARCH64_CMODEL_LARGE); -+ || aarch64_cmodel == AARCH64_CMODEL_LARGE -+ /* Fix const9.C so that constants goes to function_literal_pools. -+ According to the orignal design of aarch64 mcmodel=medium, we -+ don't care where this symbol is put. For the benefit of code size -+ and behaviour consistent with other mcmodel, put it into -+ function_literal_pools. */ -+ || aarch64_cmodel == AARCH64_CMODEL_MEDIUM); - } - - static bool -@@ -11993,6 +12085,13 @@ cost_plus: - if (speed) - *cost += extra_cost->alu.arith; - } -+ else if (aarch64_cmodel == AARCH64_CMODEL_MEDIUM -+ || aarch64_cmodel == AARCH64_CMODEL_MEDIUM_PIC) -+ { -+ /* 4 movs adr sub add 2movs ldr. */ -+ if (speed) -+ *cost += 7*extra_cost->alu.arith; -+ } - - if (flag_pic) - { -@@ -12000,6 +12099,8 @@ cost_plus: - *cost += COSTS_N_INSNS (1); - if (speed) - *cost += extra_cost->ldst.load; -+ if (aarch64_cmodel == AARCH64_CMODEL_MEDIUM_PIC) -+ *cost += 2*extra_cost->alu.arith; - } - return true; - -@@ -13176,6 +13277,7 @@ initialize_aarch64_tls_size (struct gcc_ - if (aarch64_tls_size > 32) - aarch64_tls_size = 32; - break; -+ case AARCH64_CMODEL_MEDIUM: - case AARCH64_CMODEL_LARGE: - /* The maximum TLS size allowed under large is 16E. - FIXME: 16E should be 64bit, we only support 48bit offset now. */ -@@ -13968,6 +14070,9 @@ initialize_aarch64_code_model (struct gc - aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC; - #endif - break; -+ case AARCH64_CMODEL_MEDIUM: -+ aarch64_cmodel = AARCH64_CMODEL_MEDIUM_PIC; -+ break; - case AARCH64_CMODEL_LARGE: - sorry ("code model %qs with %<-f%s%>", "large", - opts->x_flag_pic > 1 ? "PIC" : "pic"); -@@ -13986,6 +14091,7 @@ static void - aarch64_option_save (struct cl_target_option *ptr, struct gcc_options *opts) - { - ptr->x_aarch64_override_tune_string = opts->x_aarch64_override_tune_string; -+ ptr->x_aarch64_data_threshold = opts->x_aarch64_data_threshold; - ptr->x_aarch64_branch_protection_string - = opts->x_aarch64_branch_protection_string; - } -@@ -14001,6 +14107,7 @@ aarch64_option_restore (struct gcc_optio - opts->x_explicit_arch = ptr->x_explicit_arch; - selected_arch = aarch64_get_arch (ptr->x_explicit_arch); - opts->x_aarch64_override_tune_string = ptr->x_aarch64_override_tune_string; -+ opts->x_aarch64_data_threshold = ptr->x_aarch64_data_threshold; - opts->x_aarch64_branch_protection_string - = ptr->x_aarch64_branch_protection_string; - if (opts->x_aarch64_branch_protection_string) -@@ -14868,6 +14975,8 @@ aarch64_classify_symbol (rtx x, HOST_WID - - case AARCH64_CMODEL_SMALL_SPIC: - case AARCH64_CMODEL_SMALL_PIC: -+ case AARCH64_CMODEL_MEDIUM_PIC: -+ case AARCH64_CMODEL_MEDIUM: - case AARCH64_CMODEL_SMALL: - return SYMBOL_SMALL_ABSOLUTE; - -@@ -14904,6 +15013,7 @@ aarch64_classify_symbol (rtx x, HOST_WID - return SYMBOL_TINY_ABSOLUTE; - - case AARCH64_CMODEL_SMALL: -+ AARCH64_SMALL_ROUTINE: - /* Same reasoning as the tiny code model, but the offset cap here is - 1MB, allowing +/-3.9GB for the offset to the symbol. */ - -@@ -14927,7 +15037,50 @@ aarch64_classify_symbol (rtx x, HOST_WID - ? SYMBOL_SMALL_GOT_28K : SYMBOL_SMALL_GOT_4G); - return SYMBOL_SMALL_ABSOLUTE; - -+ case AARCH64_CMODEL_MEDIUM: -+ { -+ tree decl_local = SYMBOL_REF_DECL (x); -+ if (decl_local != NULL -+ && tree_fits_uhwi_p (DECL_SIZE_UNIT (decl_local))) -+ { -+ HOST_WIDE_INT size = tree_to_uhwi (DECL_SIZE_UNIT (decl_local)); -+ /* If the data is smaller than the threshold, goto -+ the small code model. Else goto the large code -+ model. */ -+ if (size >= HOST_WIDE_INT (aarch64_data_threshold)) -+ goto AARCH64_LARGE_ROUTINE; -+ } -+ goto AARCH64_SMALL_ROUTINE; -+ } -+ -+ case AARCH64_CMODEL_MEDIUM_PIC: -+ { -+ tree decl_local = SYMBOL_REF_DECL (x); -+ if (decl_local != NULL -+ && tree_fits_uhwi_p (DECL_SIZE_UNIT (decl_local))) -+ { -+ HOST_WIDE_INT size = tree_to_uhwi (DECL_SIZE_UNIT (decl_local)); -+ if (size < HOST_WIDE_INT (aarch64_data_threshold)) -+ { -+ if (!aarch64_symbol_binds_local_p (x)) -+ { -+ /* flag_pic is 2 only when -fPIC is on, when we should -+ use 4G GOT. */ -+ return flag_pic == 2 ? SYMBOL_SMALL_GOT_4G -+ : SYMBOL_SMALL_GOT_28K ; -+ } -+ return SYMBOL_SMALL_ABSOLUTE; -+ } -+ } -+ if (!aarch64_symbol_binds_local_p (x)) -+ { -+ return SYMBOL_MEDIUM_GOT_4G; -+ } -+ return SYMBOL_MEDIUM_ABSOLUTE; -+ } -+ - case AARCH64_CMODEL_LARGE: -+ AARCH64_LARGE_ROUTINE: - /* This is alright even in PIC code as the constant - pool reference is always PC relative and within - the same translation unit. */ -@@ -17789,6 +17942,8 @@ aarch64_asm_preferred_eh_data_format (in - case AARCH64_CMODEL_SMALL: - case AARCH64_CMODEL_SMALL_PIC: - case AARCH64_CMODEL_SMALL_SPIC: -+ case AARCH64_CMODEL_MEDIUM: -+ case AARCH64_CMODEL_MEDIUM_PIC: - /* text+got+data < 4Gb. 4-byte signed relocs are sufficient - for everything. */ - type = DW_EH_PE_sdata4; -@@ -21014,7 +21169,14 @@ aarch64_empty_mask_is_expensive (unsigne - bool - aarch64_use_pseudo_pic_reg (void) - { -- return aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC; -+ /* flag_pic is 2 when -fPIC is on, where we do not need the pseudo -+ pic reg. In medium code mode, when combine with -fpie/-fpic, there are -+ possibility that some symbol size smaller than the -mlarge-data-threshold -+ will still use SMALL_SPIC relocation, which need the pseudo pic reg. -+ Fix spill_1.c fail. */ -+ return aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC -+ || (aarch64_cmodel == AARCH64_CMODEL_MEDIUM_PIC -+ && flag_pic != 2); - } - - /* Implement TARGET_UNSPEC_MAY_TRAP_P. */ -@@ -21024,6 +21186,7 @@ aarch64_unspec_may_trap_p (const_rtx x, - { - switch (XINT (x, 1)) - { -+ case UNSPEC_GOTMEDIUMPIC4G: - case UNSPEC_GOTSMALLPIC: - case UNSPEC_GOTSMALLPIC28K: - case UNSPEC_GOTTINYPIC: -diff -Nurp a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h ---- a/gcc/config/aarch64/aarch64.h 2021-02-18 11:03:28.336000000 +0800 -+++ b/gcc/config/aarch64/aarch64.h 2021-02-18 10:57:45.488000000 +0800 -@@ -33,6 +33,10 @@ - - #define REGISTER_TARGET_PRAGMAS() aarch64_register_pragmas () - -+/* Default threshold 64-bit relocation data -+ with aarch64 medium memory model. */ -+#define AARCH64_DEFAULT_LARGE_DATA_THRESHOLD 65536 -+ - /* Target machine storage layout. */ - - #define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE) \ -diff -Nurp a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md ---- a/gcc/config/aarch64/aarch64.md 2021-02-18 11:03:28.340000000 +0800 -+++ b/gcc/config/aarch64/aarch64.md 2021-02-18 10:57:45.488000000 +0800 -@@ -224,6 +224,11 @@ - UNSPEC_RSQRTS - UNSPEC_NZCV - UNSPEC_XPACLRI -+ UNSPEC_MOV_MEDIUM_SYMBOL -+ UNSPEC_GET_LAST_PC -+ UNSPEC_GOTMEDIUMPIC4G -+ UNSPEC_GET_GOTOFF -+ UNSPEC_LOAD_SYMBOL_MEDIUM - UNSPEC_LD1_SVE - UNSPEC_ST1_SVE - UNSPEC_LDNT1_SVE -@@ -6689,6 +6694,39 @@ - [(set_attr "type" "load_4")] - ) - -+(define_insn "get_gotoff_" -+ [(set (match_operand:GPI 0 "register_operand" "=r") -+ (unspec:GPI [(match_operand 1 "aarch64_valid_symref" "S")] -+ UNSPEC_GET_GOTOFF))] -+ "" -+ "movz\\t%x0, :gotoff_g1:%A1\;movk\\t%x0, :gotoff_g0_nc:%A1" -+ [(set_attr "type" "multiple") -+ (set_attr "length" "8")] -+) -+ -+(define_insn "ldr_got_medium_" -+ [(set (match_operand:PTR 0 "register_operand" "=r") -+ (unspec:PTR [(mem:PTR (lo_sum:PTR -+ (match_operand:PTR 1 "register_operand" "r") -+ (match_operand:PTR 2 "register_operand" "r")))] -+ UNSPEC_GOTMEDIUMPIC4G))] -+ "" -+ "ldr\\t%0, [%1, %2]" -+ [(set_attr "type" "load_4")] -+) -+ -+(define_insn "ldr_got_medium_sidi" -+ [(set (match_operand:DI 0 "register_operand" "=r") -+ (zero_extend:DI -+ (unspec:SI [(mem:SI (lo_sum:DI -+ (match_operand:DI 1 "register_operand" "r") -+ (match_operand:DI 2 "register_operand" "r")))] -+ UNSPEC_GOTMEDIUMPIC4G)))] -+ "TARGET_ILP32" -+ "ldr\\t%0, [%1, %2]" -+ [(set_attr "type" "load_4")] -+) -+ - (define_insn "ldr_got_small_28k_" - [(set (match_operand:PTR 0 "register_operand" "=r") - (unspec:PTR [(mem:PTR (lo_sum:PTR -@@ -6852,6 +6890,23 @@ - (set_attr "length" "12")] - ) - -+(define_insn "load_symbol_medium_" -+ [(set (match_operand:GPI 0 "register_operand" "=r") -+ (unspec:GPI [(match_operand 2 "aarch64_valid_symref" "S")] -+ UNSPEC_LOAD_SYMBOL_MEDIUM)) -+ (clobber (match_operand:GPI 1 "register_operand" "=r"))] -+ "" -+ "movz\\t%x0, :prel_g3:%A2\;\\ -+ movk\\t%x0, :prel_g2_nc:%A2\;\\ -+ movk\\t%x0, :prel_g1_nc:%A2\;\\ -+ movk\\t%x0, :prel_g0_nc:%A2\;\\ -+ adr\\t%x1, .\;\\ -+ sub\\t%x1, %x1, 0x4\;\\ -+ add\\t%x0, %x0, %x1" -+ [(set_attr "type" "multiple") -+ (set_attr "length" "28")] -+) -+ - (define_expand "tlsdesc_small_" - [(unspec:PTR [(match_operand 0 "aarch64_valid_symref")] UNSPEC_TLSDESC)] - "TARGET_TLS_DESC" -diff -Nurp a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt ---- a/gcc/config/aarch64/aarch64.opt 2021-02-18 11:03:28.340000000 +0800 -+++ b/gcc/config/aarch64/aarch64.opt 2021-02-18 10:57:45.488000000 +0800 -@@ -27,6 +27,10 @@ enum aarch64_processor explicit_tune_cor - TargetVariable - enum aarch64_arch explicit_arch = aarch64_no_arch - -+;; -mlarge-data-threshold= -+TargetSave -+int x_aarch64_data_threshold -+ - TargetSave - const char *x_aarch64_override_tune_string - -@@ -61,8 +65,15 @@ EnumValue - Enum(cmodel) String(small) Value(AARCH64_CMODEL_SMALL) - - EnumValue -+Enum(cmodel) String(medium) Value(AARCH64_CMODEL_MEDIUM) -+ -+EnumValue - Enum(cmodel) String(large) Value(AARCH64_CMODEL_LARGE) - -+mlarge-data-threshold= -+Target RejectNegative Joined UInteger Var(aarch64_data_threshold) Init(AARCH64_DEFAULT_LARGE_DATA_THRESHOLD) -+-mlarge-data-threshold= Data greater than given threshold will be assume that it should be relocated using 64-bit relocation. -+ - mbig-endian - Target Report RejectNegative Mask(BIG_END) - Assume target CPU is configured as big endian. -diff -Nurp a/gcc/config/aarch64/aarch64-opts.h b/gcc/config/aarch64/aarch64-opts.h ---- a/gcc/config/aarch64/aarch64-opts.h 2020-03-12 19:07:21.000000000 +0800 -+++ b/gcc/config/aarch64/aarch64-opts.h 2021-02-18 10:57:45.488000000 +0800 -@@ -66,6 +66,10 @@ enum aarch64_code_model { - /* -fpic for small memory model. - GOT size to 28KiB (4K*8-4K) or 3580 entries. */ - AARCH64_CMODEL_SMALL_SPIC, -+ /* Using movk insn sequence to do 64bit PC relative relocation. */ -+ AARCH64_CMODEL_MEDIUM, -+ /* Using movk insn sequence to do 64bit PC relative got relocation. */ -+ AARCH64_CMODEL_MEDIUM_PIC, - /* No assumptions about addresses of code and data. - The PIC variant is not yet implemented. */ - AARCH64_CMODEL_LARGE -diff -Nurp a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h ---- a/gcc/config/aarch64/aarch64-protos.h 2021-02-18 11:03:29.432000000 +0800 -+++ b/gcc/config/aarch64/aarch64-protos.h 2021-02-18 10:57:45.488000000 +0800 -@@ -95,9 +95,11 @@ - */ - enum aarch64_symbol_type - { -+ SYMBOL_MEDIUM_ABSOLUTE, - SYMBOL_SMALL_ABSOLUTE, - SYMBOL_SMALL_GOT_28K, - SYMBOL_SMALL_GOT_4G, -+ SYMBOL_MEDIUM_GOT_4G, - SYMBOL_SMALL_TLSGD, - SYMBOL_SMALL_TLSDESC, - SYMBOL_SMALL_TLSIE, diff --git a/modulo-sched-Carefully-process-loop-counter-initiali.patch b/modulo-sched-Carefully-process-loop-counter-initiali.patch deleted file mode 100644 index 536d1494a6dc36a1e784d15eb0f054192fea1cc1..0000000000000000000000000000000000000000 --- a/modulo-sched-Carefully-process-loop-counter-initiali.patch +++ /dev/null @@ -1,251 +0,0 @@ -This backport contains 1 patchs from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-modulo-sched-Carefully-process-loop-counter-initiali.patch -4eb8f93d026eaa1de9b4820337069f3ce3465cd0 - -diff --git a/gcc/modulo-sched.c b/gcc/modulo-sched.c -index 6f699a874e3..4568674aa6c 100644 ---- a/gcc/modulo-sched.c -+++ b/gcc/modulo-sched.c -@@ -210,8 +210,6 @@ static int sms_order_nodes (ddg_ptr, int, int *, int *); - static void set_node_sched_params (ddg_ptr); - static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *); - static void permute_partial_schedule (partial_schedule_ptr, rtx_insn *); --static void generate_prolog_epilog (partial_schedule_ptr, struct loop *, -- rtx, rtx); - static int calculate_stage_count (partial_schedule_ptr, int); - static void calculate_must_precede_follow (ddg_node_ptr, int, int, - int, int, sbitmap, sbitmap, sbitmap); -@@ -391,30 +389,40 @@ doloop_register_get (rtx_insn *head, rtx_insn *tail) - this constant. Otherwise return 0. */ - static rtx_insn * - const_iteration_count (rtx count_reg, basic_block pre_header, -- int64_t * count) -+ int64_t *count, bool* adjust_inplace) - { - rtx_insn *insn; - rtx_insn *head, *tail; - -+ *adjust_inplace = false; -+ bool read_after = false; -+ - if (! pre_header) - return NULL; - - get_ebb_head_tail (pre_header, pre_header, &head, &tail); - - for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn)) -- if (NONDEBUG_INSN_P (insn) && single_set (insn) && -- rtx_equal_p (count_reg, SET_DEST (single_set (insn)))) -+ if (single_set (insn) && rtx_equal_p (count_reg, -+ SET_DEST (single_set (insn)))) - { - rtx pat = single_set (insn); - - if (CONST_INT_P (SET_SRC (pat))) - { - *count = INTVAL (SET_SRC (pat)); -+ *adjust_inplace = !read_after; - return insn; - } - - return NULL; - } -+ else if (NONDEBUG_INSN_P (insn) && reg_mentioned_p (count_reg, insn)) -+ { -+ read_after = true; -+ if (reg_set_p (count_reg, insn)) -+ break; -+ } - - return NULL; - } -@@ -1126,7 +1134,7 @@ duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage, - /* Generate the instructions (including reg_moves) for prolog & epilog. */ - static void - generate_prolog_epilog (partial_schedule_ptr ps, struct loop *loop, -- rtx count_reg, rtx count_init) -+ rtx count_reg, bool adjust_init) - { - int i; - int last_stage = PS_STAGE_COUNT (ps) - 1; -@@ -1135,12 +1143,12 @@ generate_prolog_epilog (partial_schedule_ptr ps, class loop *loop, - /* Generate the prolog, inserting its insns on the loop-entry edge. */ - start_sequence (); - -- if (!count_init) -+ if (adjust_init) - { - /* Generate instructions at the beginning of the prolog to -- adjust the loop count by STAGE_COUNT. If loop count is constant -- (count_init), this constant is adjusted by STAGE_COUNT in -- generate_prolog_epilog function. */ -+ adjust the loop count by STAGE_COUNT. If loop count is constant -+ and it not used anywhere in prologue, this constant is adjusted by -+ STAGE_COUNT outside of generate_prolog_epilog function. */ - rtx sub_reg = NULL_RTX; - - sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS, count_reg, -@@ -1528,7 +1536,8 @@ sms_schedule (void) - rtx_insn *count_init; - int mii, rec_mii, stage_count, min_cycle; - int64_t loop_count = 0; -- bool opt_sc_p; -+ bool opt_sc_p, adjust_inplace = false; -+ basic_block pre_header; - - if (! (g = g_arr[loop->num])) - continue; -@@ -1569,19 +1578,13 @@ sms_schedule (void) - } - - -- /* In case of th loop have doloop register it gets special -- handling. */ -- count_init = NULL; -- if ((count_reg = doloop_register_get (head, tail))) -- { -- basic_block pre_header; -- -- pre_header = loop_preheader_edge (loop)->src; -- count_init = const_iteration_count (count_reg, pre_header, -- &loop_count); -- } -+ count_reg = doloop_register_get (head, tail); - gcc_assert (count_reg); - -+ pre_header = loop_preheader_edge (loop)->src; -+ count_init = const_iteration_count (count_reg, pre_header, &loop_count, -+ &adjust_inplace); -+ - if (dump_file && count_init) - { - fprintf (dump_file, "SMS const-doloop "); -@@ -1701,9 +1704,20 @@ sms_schedule (void) - print_partial_schedule (ps, dump_file); - } - -- /* case the BCT count is not known , Do loop-versioning */ -- if (count_reg && ! count_init) -+ if (count_init) -+ { -+ if (adjust_inplace) -+ { -+ /* When possible, set new iteration count of loop kernel in -+ place. Otherwise, generate_prolog_epilog creates an insn -+ to adjust. */ -+ SET_SRC (single_set (count_init)) = GEN_INT (loop_count -+ - stage_count + 1); -+ } -+ } -+ else - { -+ /* case the BCT count is not known , Do loop-versioning */ - rtx comp_rtx = gen_rtx_GT (VOIDmode, count_reg, - gen_int_mode (stage_count, - GET_MODE (count_reg))); -@@ -1713,12 +1727,7 @@ sms_schedule (void) - loop_version (loop, comp_rtx, &condition_bb, - prob, prob.invert (), - prob, prob.invert (), true); -- } -- -- /* Set new iteration count of loop kernel. */ -- if (count_reg && count_init) -- SET_SRC (single_set (count_init)) = GEN_INT (loop_count -- - stage_count + 1); -+ } - - /* Now apply the scheduled kernel to the RTL of the loop. */ - permute_partial_schedule (ps, g->closing_branch->first_note); -@@ -1735,7 +1744,7 @@ sms_schedule (void) - if (dump_file) - print_node_sched_params (dump_file, g->num_nodes, ps); - /* Generate prolog and epilog. */ -- generate_prolog_epilog (ps, loop, count_reg, count_init); -+ generate_prolog_epilog (ps, loop, count_reg, !adjust_inplace); - break; - } - -diff --git a/gcc/testsuite/gcc.c-torture/execute/pr97421-1.c b/gcc/testsuite/gcc.c-torture/execute/pr97421-1.c -new file mode 100644 -index 00000000000..e32fb129f18 ---- /dev/null -+++ b/gcc/testsuite/gcc.c-torture/execute/pr97421-1.c -@@ -0,0 +1,23 @@ -+/* PR rtl-optimization/97421 */ -+/* { dg-additional-options "-fmodulo-sched" } */ -+ -+int a, b, d, e; -+int *volatile c = &a; -+ -+__attribute__((noinline)) -+void f(void) -+{ -+ for (int g = 2; g >= 0; g--) { -+ d = 0; -+ for (b = 0; b <= 2; b++) -+ ; -+ e = *c; -+ } -+} -+ -+int main(void) -+{ -+ f(); -+ if (b != 3) -+ __builtin_abort(); -+} -diff --git a/gcc/testsuite/gcc.c-torture/execute/pr97421-2.c b/gcc/testsuite/gcc.c-torture/execute/pr97421-2.c -new file mode 100644 -index 00000000000..142bcbcee91 ---- /dev/null -+++ b/gcc/testsuite/gcc.c-torture/execute/pr97421-2.c -@@ -0,0 +1,18 @@ -+/* PR rtl-optimization/97421 */ -+/* { dg-additional-options "-fmodulo-sched -fno-dce -fno-strict-aliasing" } */ -+ -+static int a, b, c; -+int *d = &c; -+int **e = &d; -+int ***f = &e; -+int main() -+{ -+ int h; -+ for (a = 2; a; a--) -+ for (h = 0; h <= 2; h++) -+ for (b = 0; b <= 2; b++) -+ ***f = 6; -+ -+ if (b != 3) -+ __builtin_abort(); -+} -diff --git a/gcc/testsuite/gcc.c-torture/execute/pr97421-3.c b/gcc/testsuite/gcc.c-torture/execute/pr97421-3.c -new file mode 100644 -index 00000000000..3f1485a4a3d ---- /dev/null -+++ b/gcc/testsuite/gcc.c-torture/execute/pr97421-3.c -@@ -0,0 +1,22 @@ -+/* PR rtl-optimization/97421 */ -+/* { dg-additional-options "-fmodulo-sched" } */ -+ -+int a, b, c; -+short d; -+void e(void) { -+ unsigned f = 0; -+ for (; f <= 2; f++) { -+ int g[1]; -+ int h = (long)g; -+ c = 0; -+ for (; c < 10; c++) -+ g[0] = a = 0; -+ for (; a <= 2; a++) -+ b = d; -+ } -+} -+int main(void) { -+ e(); -+ if (a != 3) -+ __builtin_abort(); -+} diff --git a/optabs-Dont-use-scalar-conversions-for-vectors.patch b/optabs-Dont-use-scalar-conversions-for-vectors.patch deleted file mode 100644 index 91407d8c6786c95d6722f8c96c0241b45e6dee49..0000000000000000000000000000000000000000 --- a/optabs-Dont-use-scalar-conversions-for-vectors.patch +++ /dev/null @@ -1,69 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-optabs-Don-t-use-scalar-conversions-for-vectors-PR93.patch -b6268016bf46dd63227dcbb73d13c30a3b4b9d2a - -diff --git a/gcc/optabs-tree.c b/gcc/optabs-tree.c -index 3d829c27826..badd30bfda8 100644 ---- a/gcc/optabs-tree.c -+++ b/gcc/optabs-tree.c -@@ -284,9 +284,14 @@ supportable_convert_operation (enum tree_code code, - machine_mode m1,m2; - bool truncp; - -+ gcc_assert (VECTOR_TYPE_P (vectype_out) && VECTOR_TYPE_P (vectype_in)); -+ - m1 = TYPE_MODE (vectype_out); - m2 = TYPE_MODE (vectype_in); - -+ if (!VECTOR_MODE_P (m1) || !VECTOR_MODE_P (m2)) -+ return false; -+ - /* First check if we can done conversion directly. */ - if ((code == FIX_TRUNC_EXPR - && can_fix_p (m1,m2,TYPE_UNSIGNED (vectype_out), &truncp) -diff --git a/gcc/testsuite/gcc.dg/vect/pr93843-1.c b/gcc/testsuite/gcc.dg/vect/pr93843-1.c -new file mode 100644 -index 00000000000..23a79ca4c96 ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/vect/pr93843-1.c -@@ -0,0 +1,21 @@ -+char a; -+struct S { short b, c; } d; -+ -+__attribute__((noipa)) void -+foo (int x) -+{ -+ if (x != 4) -+ __builtin_abort (); -+} -+ -+int -+main () -+{ -+ short *g = &d.c, *h = &d.b; -+ char e = 4 - a; -+ int f; -+ *h = *g = e; -+ for (f = 0; f < 2; f++) -+ foo (d.c); -+ return 0; -+} -diff --git a/gcc/testsuite/gcc.dg/vect/pr93843-2.c b/gcc/testsuite/gcc.dg/vect/pr93843-2.c -new file mode 100644 -index 00000000000..5fae3e5be17 ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/vect/pr93843-2.c -@@ -0,0 +1,11 @@ -+char in[2] = {2, 2}; -+short out[2] = {}; -+ -+int -+main() -+{ -+ for (int i = 0; i < 2; ++i) -+ out[i] = in[i]; -+ asm("":::"memory"); -+ if (out[0] != 2) __builtin_abort(); -+} diff --git a/re-PR-target-91124-gcc.target-i386-avx512vl-vpshldvd.patch b/re-PR-target-91124-gcc.target-i386-avx512vl-vpshldvd.patch deleted file mode 100644 index d95d3b2096ff676e508784fb410458f3b6d4fbf8..0000000000000000000000000000000000000000 --- a/re-PR-target-91124-gcc.target-i386-avx512vl-vpshldvd.patch +++ /dev/null @@ -1,215 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -491b0b4015a70071a05e0faa5c2082c43a51a0d3 -0001-re-PR-target-91124-gcc.target-i386-avx512vl-vpshldvd.patch - -diff -urpN a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def ---- a/gcc/config/i386/i386-builtin.def 2020-03-12 07:07:21.000000000 -0400 -+++ b/gcc/config/i386/i386-builtin.def 2020-12-17 20:46:53.868000000 -0500 -@@ -2516,60 +2516,60 @@ BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPT - BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v2di_mask, "__builtin_ia32_vpshld_v2di_mask", IX86_BUILTIN_VPSHLDV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_INT) - - BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v32hi, "__builtin_ia32_vpshrdv_v32hi", IX86_BUILTIN_VPSHRDVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI) --BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vpshrdv_v32hi_mask, "__builtin_ia32_vpshrdv_v32hi_mask", IX86_BUILTIN_VPSHRDVV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_INT) --BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vpshrdv_v32hi_maskz, "__builtin_ia32_vpshrdv_v32hi_maskz", IX86_BUILTIN_VPSHRDVV32HI_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_INT) -+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vpshrdv_v32hi_mask, "__builtin_ia32_vpshrdv_v32hi_mask", IX86_BUILTIN_VPSHRDVV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vpshrdv_v32hi_maskz, "__builtin_ia32_vpshrdv_v32hi_maskz", IX86_BUILTIN_VPSHRDVV32HI_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) - BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v16hi, "__builtin_ia32_vpshrdv_v16hi", IX86_BUILTIN_VPSHRDVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI) --BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v16hi_mask, "__builtin_ia32_vpshrdv_v16hi_mask", IX86_BUILTIN_VPSHRDVV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_INT) --BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v16hi_maskz, "__builtin_ia32_vpshrdv_v16hi_maskz", IX86_BUILTIN_VPSHRDVV16HI_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_INT) -+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v16hi_mask, "__builtin_ia32_vpshrdv_v16hi_mask", IX86_BUILTIN_VPSHRDVV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI) -+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v16hi_maskz, "__builtin_ia32_vpshrdv_v16hi_maskz", IX86_BUILTIN_VPSHRDVV16HI_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI) - BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8hi, "__builtin_ia32_vpshrdv_v8hi", IX86_BUILTIN_VPSHRDVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI) --BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8hi_mask, "__builtin_ia32_vpshrdv_v8hi_mask", IX86_BUILTIN_VPSHRDVV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_INT) --BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8hi_maskz, "__builtin_ia32_vpshrdv_v8hi_maskz", IX86_BUILTIN_VPSHRDVV8HI_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_INT) -+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8hi_mask, "__builtin_ia32_vpshrdv_v8hi_mask", IX86_BUILTIN_VPSHRDVV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI) -+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8hi_maskz, "__builtin_ia32_vpshrdv_v8hi_maskz", IX86_BUILTIN_VPSHRDVV8HI_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI) - BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v16si, "__builtin_ia32_vpshrdv_v16si", IX86_BUILTIN_VPSHRDVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) --BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v16si_mask, "__builtin_ia32_vpshrdv_v16si_mask", IX86_BUILTIN_VPSHRDVV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT) --BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v16si_maskz, "__builtin_ia32_vpshrdv_v16si_maskz", IX86_BUILTIN_VPSHRDVV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT) -+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v16si_mask, "__builtin_ia32_vpshrdv_v16si_mask", IX86_BUILTIN_VPSHRDVV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v16si_maskz, "__builtin_ia32_vpshrdv_v16si_maskz", IX86_BUILTIN_VPSHRDVV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) - BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8si, "__builtin_ia32_vpshrdv_v8si", IX86_BUILTIN_VPSHRDVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) --BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8si_mask, "__builtin_ia32_vpshrdv_v8si_mask", IX86_BUILTIN_VPSHRDVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT) --BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8si_maskz, "__builtin_ia32_vpshrdv_v8si_maskz", IX86_BUILTIN_VPSHRDVV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT) -+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8si_mask, "__builtin_ia32_vpshrdv_v8si_mask", IX86_BUILTIN_VPSHRDVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) -+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8si_maskz, "__builtin_ia32_vpshrdv_v8si_maskz", IX86_BUILTIN_VPSHRDVV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) - BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4si, "__builtin_ia32_vpshrdv_v4si", IX86_BUILTIN_VPSHRDVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) --BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4si_mask, "__builtin_ia32_vpshrdv_v4si_mask", IX86_BUILTIN_VPSHRDVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT) --BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4si_maskz, "__builtin_ia32_vpshrdv_v4si_maskz", IX86_BUILTIN_VPSHRDVV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT) -+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4si_mask, "__builtin_ia32_vpshrdv_v4si_mask", IX86_BUILTIN_VPSHRDVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) -+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4si_maskz, "__builtin_ia32_vpshrdv_v4si_maskz", IX86_BUILTIN_VPSHRDVV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) - BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v8di, "__builtin_ia32_vpshrdv_v8di", IX86_BUILTIN_VPSHRDVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI) --BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v8di_mask, "__builtin_ia32_vpshrdv_v8di_mask", IX86_BUILTIN_VPSHRDVV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT) --BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v8di_maskz, "__builtin_ia32_vpshrdv_v8di_maskz", IX86_BUILTIN_VPSHRDVV8DI_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT) -+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v8di_mask, "__builtin_ia32_vpshrdv_v8di_mask", IX86_BUILTIN_VPSHRDVV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v8di_maskz, "__builtin_ia32_vpshrdv_v8di_maskz", IX86_BUILTIN_VPSHRDVV8DI_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) - BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4di, "__builtin_ia32_vpshrdv_v4di", IX86_BUILTIN_VPSHRDVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI) --BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4di_mask, "__builtin_ia32_vpshrdv_v4di_mask", IX86_BUILTIN_VPSHRDVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT) --BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4di_maskz, "__builtin_ia32_vpshrdv_v4di_maskz", IX86_BUILTIN_VPSHRDVV4DI_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT) -+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4di_mask, "__builtin_ia32_vpshrdv_v4di_mask", IX86_BUILTIN_VPSHRDVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI) -+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4di_maskz, "__builtin_ia32_vpshrdv_v4di_maskz", IX86_BUILTIN_VPSHRDVV4DI_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI) - BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v2di, "__builtin_ia32_vpshrdv_v2di", IX86_BUILTIN_VPSHRDVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI) --BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v2di_mask, "__builtin_ia32_vpshrdv_v2di_mask", IX86_BUILTIN_VPSHRDVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT) --BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v2di_maskz, "__builtin_ia32_vpshrdv_v2di_maskz", IX86_BUILTIN_VPSHRDVV2DI_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT) -+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v2di_mask, "__builtin_ia32_vpshrdv_v2di_mask", IX86_BUILTIN_VPSHRDVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI) -+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v2di_maskz, "__builtin_ia32_vpshrdv_v2di_maskz", IX86_BUILTIN_VPSHRDVV2DI_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI) - - BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v32hi, "__builtin_ia32_vpshldv_v32hi", IX86_BUILTIN_VPSHLDVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI) --BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vpshldv_v32hi_mask, "__builtin_ia32_vpshldv_v32hi_mask", IX86_BUILTIN_VPSHLDVV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_INT) --BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vpshldv_v32hi_maskz, "__builtin_ia32_vpshldv_v32hi_maskz", IX86_BUILTIN_VPSHLDVV32HI_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_INT) -+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vpshldv_v32hi_mask, "__builtin_ia32_vpshldv_v32hi_mask", IX86_BUILTIN_VPSHLDVV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vpshldv_v32hi_maskz, "__builtin_ia32_vpshldv_v32hi_maskz", IX86_BUILTIN_VPSHLDVV32HI_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) - BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v16hi, "__builtin_ia32_vpshldv_v16hi", IX86_BUILTIN_VPSHLDVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI) --BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v16hi_mask, "__builtin_ia32_vpshldv_v16hi_mask", IX86_BUILTIN_VPSHLDVV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_INT) --BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v16hi_maskz, "__builtin_ia32_vpshldv_v16hi_maskz", IX86_BUILTIN_VPSHLDVV16HI_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_INT) -+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v16hi_mask, "__builtin_ia32_vpshldv_v16hi_mask", IX86_BUILTIN_VPSHLDVV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI) -+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v16hi_maskz, "__builtin_ia32_vpshldv_v16hi_maskz", IX86_BUILTIN_VPSHLDVV16HI_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI) - BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8hi, "__builtin_ia32_vpshldv_v8hi", IX86_BUILTIN_VPSHLDVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI) --BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8hi_mask, "__builtin_ia32_vpshldv_v8hi_mask", IX86_BUILTIN_VPSHLDVV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_INT) --BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8hi_maskz, "__builtin_ia32_vpshldv_v8hi_maskz", IX86_BUILTIN_VPSHLDVV8HI_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_INT) -+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8hi_mask, "__builtin_ia32_vpshldv_v8hi_mask", IX86_BUILTIN_VPSHLDVV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI) -+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8hi_maskz, "__builtin_ia32_vpshldv_v8hi_maskz", IX86_BUILTIN_VPSHLDVV8HI_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI) - BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v16si, "__builtin_ia32_vpshldv_v16si", IX86_BUILTIN_VPSHLDVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) --BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v16si_mask, "__builtin_ia32_vpshldv_v16si_mask", IX86_BUILTIN_VPSHLDVV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT) --BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v16si_maskz, "__builtin_ia32_vpshldv_v16si_maskz", IX86_BUILTIN_VPSHLDVV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT) -+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v16si_mask, "__builtin_ia32_vpshldv_v16si_mask", IX86_BUILTIN_VPSHLDVV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v16si_maskz, "__builtin_ia32_vpshldv_v16si_maskz", IX86_BUILTIN_VPSHLDVV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) - BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8si, "__builtin_ia32_vpshldv_v8si", IX86_BUILTIN_VPSHLDVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) --BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8si_mask, "__builtin_ia32_vpshldv_v8si_mask", IX86_BUILTIN_VPSHLDVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT) --BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8si_maskz, "__builtin_ia32_vpshldv_v8si_maskz", IX86_BUILTIN_VPSHLDVV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT) -+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8si_mask, "__builtin_ia32_vpshldv_v8si_mask", IX86_BUILTIN_VPSHLDVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) -+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8si_maskz, "__builtin_ia32_vpshldv_v8si_maskz", IX86_BUILTIN_VPSHLDVV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) - BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4si, "__builtin_ia32_vpshldv_v4si", IX86_BUILTIN_VPSHLDVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) --BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4si_mask, "__builtin_ia32_vpshldv_v4si_mask", IX86_BUILTIN_VPSHLDVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT) --BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4si_maskz, "__builtin_ia32_vpshldv_v4si_maskz", IX86_BUILTIN_VPSHLDVV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT) -+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4si_mask, "__builtin_ia32_vpshldv_v4si_mask", IX86_BUILTIN_VPSHLDVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) -+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4si_maskz, "__builtin_ia32_vpshldv_v4si_maskz", IX86_BUILTIN_VPSHLDVV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) - BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v8di, "__builtin_ia32_vpshldv_v8di", IX86_BUILTIN_VPSHLDVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI) --BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v8di_mask, "__builtin_ia32_vpshldv_v8di_mask", IX86_BUILTIN_VPSHLDVV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT) --BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v8di_maskz, "__builtin_ia32_vpshldv_v8di_maskz", IX86_BUILTIN_VPSHLDVV8DI_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT) -+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v8di_mask, "__builtin_ia32_vpshldv_v8di_mask", IX86_BUILTIN_VPSHLDVV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) -+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v8di_maskz, "__builtin_ia32_vpshldv_v8di_maskz", IX86_BUILTIN_VPSHLDVV8DI_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI) - BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4di, "__builtin_ia32_vpshldv_v4di", IX86_BUILTIN_VPSHLDVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI) --BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4di_mask, "__builtin_ia32_vpshldv_v4di_mask", IX86_BUILTIN_VPSHLDVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT) --BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4di_maskz, "__builtin_ia32_vpshldv_v4di_maskz", IX86_BUILTIN_VPSHLDVV4DI_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT) -+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4di_mask, "__builtin_ia32_vpshldv_v4di_mask", IX86_BUILTIN_VPSHLDVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI) -+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4di_maskz, "__builtin_ia32_vpshldv_v4di_maskz", IX86_BUILTIN_VPSHLDVV4DI_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI) - BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v2di, "__builtin_ia32_vpshldv_v2di", IX86_BUILTIN_VPSHLDVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI) --BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v2di_mask, "__builtin_ia32_vpshldv_v2di_mask", IX86_BUILTIN_VPSHLDVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT) --BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v2di_maskz, "__builtin_ia32_vpshldv_v2di_maskz", IX86_BUILTIN_VPSHLDVV2DI_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT) -+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v2di_mask, "__builtin_ia32_vpshldv_v2di_mask", IX86_BUILTIN_VPSHLDVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI) -+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v2di_maskz, "__builtin_ia32_vpshldv_v2di_maskz", IX86_BUILTIN_VPSHLDVV2DI_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI) - - /* GFNI */ - BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_vgf2p8affineinvqb_v64qi, "__builtin_ia32_vgf2p8affineinvqb_v64qi", IX86_BUILTIN_VGF2P8AFFINEINVQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT) -@@ -2594,44 +2594,44 @@ BDESC (OPTION_MASK_ISA_GFNI | OPTION_MAS - /* VNNI */ - - BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusd_v16si, "__builtin_ia32_vpdpbusd_v16si", IX86_BUILTIN_VPDPBUSDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) --BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusd_v16si_mask, "__builtin_ia32_vpdpbusd_v16si_mask", IX86_BUILTIN_VPDPBUSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT) --BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusd_v16si_maskz, "__builtin_ia32_vpdpbusd_v16si_maskz", IX86_BUILTIN_VPDPBUSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT) -+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusd_v16si_mask, "__builtin_ia32_vpdpbusd_v16si_mask", IX86_BUILTIN_VPDPBUSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusd_v16si_maskz, "__builtin_ia32_vpdpbusd_v16si_maskz", IX86_BUILTIN_VPDPBUSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) - BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v8si, "__builtin_ia32_vpdpbusd_v8si", IX86_BUILTIN_VPDPBUSDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) --BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v8si_mask, "__builtin_ia32_vpdpbusd_v8si_mask", IX86_BUILTIN_VPDPBUSDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT) --BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v8si_maskz, "__builtin_ia32_vpdpbusd_v8si_maskz", IX86_BUILTIN_VPDPBUSDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT) -+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v8si_mask, "__builtin_ia32_vpdpbusd_v8si_mask", IX86_BUILTIN_VPDPBUSDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) -+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v8si_maskz, "__builtin_ia32_vpdpbusd_v8si_maskz", IX86_BUILTIN_VPDPBUSDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) - BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v4si, "__builtin_ia32_vpdpbusd_v4si", IX86_BUILTIN_VPDPBUSDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) --BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v4si_mask, "__builtin_ia32_vpdpbusd_v4si_mask", IX86_BUILTIN_VPDPBUSDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT) --BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v4si_maskz, "__builtin_ia32_vpdpbusd_v4si_maskz", IX86_BUILTIN_VPDPBUSDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT) -+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v4si_mask, "__builtin_ia32_vpdpbusd_v4si_mask", IX86_BUILTIN_VPDPBUSDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) -+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v4si_maskz, "__builtin_ia32_vpdpbusd_v4si_maskz", IX86_BUILTIN_VPDPBUSDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) - - BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusds_v16si, "__builtin_ia32_vpdpbusds_v16si", IX86_BUILTIN_VPDPBUSDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) --BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusds_v16si_mask, "__builtin_ia32_vpdpbusds_v16si_mask", IX86_BUILTIN_VPDPBUSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT) --BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusds_v16si_maskz, "__builtin_ia32_vpdpbusds_v16si_maskz", IX86_BUILTIN_VPDPBUSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT) -+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusds_v16si_mask, "__builtin_ia32_vpdpbusds_v16si_mask", IX86_BUILTIN_VPDPBUSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusds_v16si_maskz, "__builtin_ia32_vpdpbusds_v16si_maskz", IX86_BUILTIN_VPDPBUSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) - BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v8si, "__builtin_ia32_vpdpbusds_v8si", IX86_BUILTIN_VPDPBUSDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) --BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v8si_mask, "__builtin_ia32_vpdpbusds_v8si_mask", IX86_BUILTIN_VPDPBUSDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT) --BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v8si_maskz, "__builtin_ia32_vpdpbusds_v8si_maskz", IX86_BUILTIN_VPDPBUSDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT) -+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v8si_mask, "__builtin_ia32_vpdpbusds_v8si_mask", IX86_BUILTIN_VPDPBUSDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) -+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v8si_maskz, "__builtin_ia32_vpdpbusds_v8si_maskz", IX86_BUILTIN_VPDPBUSDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) - BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v4si, "__builtin_ia32_vpdpbusds_v4si", IX86_BUILTIN_VPDPBUSDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) --BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v4si_mask, "__builtin_ia32_vpdpbusds_v4si_mask", IX86_BUILTIN_VPDPBUSDSV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT) --BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v4si_maskz, "__builtin_ia32_vpdpbusds_v4si_maskz", IX86_BUILTIN_VPDPBUSDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT) -+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v4si_mask, "__builtin_ia32_vpdpbusds_v4si_mask", IX86_BUILTIN_VPDPBUSDSV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) -+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v4si_maskz, "__builtin_ia32_vpdpbusds_v4si_maskz", IX86_BUILTIN_VPDPBUSDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) - - BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssd_v16si, "__builtin_ia32_vpdpwssd_v16si", IX86_BUILTIN_VPDPWSSDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) --BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssd_v16si_mask, "__builtin_ia32_vpdpwssd_v16si_mask", IX86_BUILTIN_VPDPWSSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT) --BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssd_v16si_maskz, "__builtin_ia32_vpdpwssd_v16si_maskz", IX86_BUILTIN_VPDPWSSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT) -+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssd_v16si_mask, "__builtin_ia32_vpdpwssd_v16si_mask", IX86_BUILTIN_VPDPWSSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssd_v16si_maskz, "__builtin_ia32_vpdpwssd_v16si_maskz", IX86_BUILTIN_VPDPWSSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) - BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v8si, "__builtin_ia32_vpdpwssd_v8si", IX86_BUILTIN_VPDPWSSDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) --BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v8si_mask, "__builtin_ia32_vpdpwssd_v8si_mask", IX86_BUILTIN_VPDPWSSDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT) --BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v8si_maskz, "__builtin_ia32_vpdpwssd_v8si_maskz", IX86_BUILTIN_VPDPWSSDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT) -+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v8si_mask, "__builtin_ia32_vpdpwssd_v8si_mask", IX86_BUILTIN_VPDPWSSDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) -+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v8si_maskz, "__builtin_ia32_vpdpwssd_v8si_maskz", IX86_BUILTIN_VPDPWSSDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) - BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v4si, "__builtin_ia32_vpdpwssd_v4si", IX86_BUILTIN_VPDPWSSDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) --BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v4si_mask, "__builtin_ia32_vpdpwssd_v4si_mask", IX86_BUILTIN_VPDPWSSDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT) --BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v4si_maskz, "__builtin_ia32_vpdpwssd_v4si_maskz", IX86_BUILTIN_VPDPWSSDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT) -+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v4si_mask, "__builtin_ia32_vpdpwssd_v4si_mask", IX86_BUILTIN_VPDPWSSDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) -+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v4si_maskz, "__builtin_ia32_vpdpwssd_v4si_maskz", IX86_BUILTIN_VPDPWSSDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) - - BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssds_v16si, "__builtin_ia32_vpdpwssds_v16si", IX86_BUILTIN_VPDPWSSDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) --BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssds_v16si_mask, "__builtin_ia32_vpdpwssds_v16si_mask", IX86_BUILTIN_VPDPWSSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT) --BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssds_v16si_maskz, "__builtin_ia32_vpdpwssds_v16si_maskz", IX86_BUILTIN_VPDPWSSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT) -+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssds_v16si_mask, "__builtin_ia32_vpdpwssds_v16si_mask", IX86_BUILTIN_VPDPWSSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssds_v16si_maskz, "__builtin_ia32_vpdpwssds_v16si_maskz", IX86_BUILTIN_VPDPWSSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) - BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v8si, "__builtin_ia32_vpdpwssds_v8si", IX86_BUILTIN_VPDPWSSDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) --BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v8si_mask, "__builtin_ia32_vpdpwssds_v8si_mask", IX86_BUILTIN_VPDPWSSDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT) --BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v8si_maskz, "__builtin_ia32_vpdpwssds_v8si_maskz", IX86_BUILTIN_VPDPWSSDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT) -+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v8si_mask, "__builtin_ia32_vpdpwssds_v8si_mask", IX86_BUILTIN_VPDPWSSDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) -+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v8si_maskz, "__builtin_ia32_vpdpwssds_v8si_maskz", IX86_BUILTIN_VPDPWSSDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) - BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v4si, "__builtin_ia32_vpdpwssds_v4si", IX86_BUILTIN_VPDPWSSDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) --BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v4si_mask, "__builtin_ia32_vpdpwssds_v4si_mask", IX86_BUILTIN_VPDPWSSDSV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT) --BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v4si_maskz, "__builtin_ia32_vpdpwssds_v4si_maskz", IX86_BUILTIN_VPDPWSSDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT) -+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v4si_mask, "__builtin_ia32_vpdpwssds_v4si_mask", IX86_BUILTIN_VPDPWSSDSV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) -+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v4si_maskz, "__builtin_ia32_vpdpwssds_v4si_maskz", IX86_BUILTIN_VPDPWSSDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) - - /* VPCLMULQDQ */ - BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpclmulqdq_v2di, "__builtin_ia32_vpclmulqdq_v2di", IX86_BUILTIN_VPCLMULQDQ2, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT) -diff -urpN a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def ---- a/gcc/config/i386/i386-builtin-types.def 2020-03-12 07:07:21.000000000 -0400 -+++ b/gcc/config/i386/i386-builtin-types.def 2020-12-17 20:46:53.868000000 -0500 -@@ -1246,17 +1246,8 @@ DEF_FUNCTION_TYPE (V8HI, V8HI, V8HI, INT - DEF_FUNCTION_TYPE (V4SI, V4SI, V4SI, INT, V4SI, INT) - DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, INT, V2DI, INT) - DEF_FUNCTION_TYPE (V32HI, V32HI, V32HI, V32HI) --DEF_FUNCTION_TYPE (V32HI, V32HI, V32HI, V32HI, INT) --DEF_FUNCTION_TYPE (V16HI, V16HI, V16HI, V16HI, INT) --DEF_FUNCTION_TYPE (V8HI, V8HI, V8HI, V8HI, INT) --DEF_FUNCTION_TYPE (V8SI, V8SI, V8SI, V8SI, INT) --DEF_FUNCTION_TYPE (V4SI, V4SI, V4SI, V4SI, INT) - DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI, V8DI) --DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI, V8DI, INT) --DEF_FUNCTION_TYPE (V4DI, V4DI, V4DI, V4DI, INT) - DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, V16SI) --DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, V16SI, INT) --DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, V2DI, INT) - - # BITALG builtins - DEF_FUNCTION_TYPE (V4DI, V4DI) -diff -urpN a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c ---- a/gcc/config/i386/i386-expand.c 2020-12-17 20:44:55.508000000 -0500 -+++ b/gcc/config/i386/i386-expand.c 2020-12-17 20:46:53.872000000 -0500 -@@ -9437,15 +9437,6 @@ ix86_expand_args_builtin (const struct b - case USI_FTYPE_V32HI_V32HI_INT_USI: - case UHI_FTYPE_V16HI_V16HI_INT_UHI: - case UQI_FTYPE_V8HI_V8HI_INT_UQI: -- case V32HI_FTYPE_V32HI_V32HI_V32HI_INT: -- case V16HI_FTYPE_V16HI_V16HI_V16HI_INT: -- case V8HI_FTYPE_V8HI_V8HI_V8HI_INT: -- case V8SI_FTYPE_V8SI_V8SI_V8SI_INT: -- case V4DI_FTYPE_V4DI_V4DI_V4DI_INT: -- case V8DI_FTYPE_V8DI_V8DI_V8DI_INT: -- case V16SI_FTYPE_V16SI_V16SI_V16SI_INT: -- case V2DI_FTYPE_V2DI_V2DI_V2DI_INT: -- case V4SI_FTYPE_V4SI_V4SI_V4SI_INT: - nargs = 4; - mask_pos = 1; - nargs_constant = 1; diff --git a/reduction-chain-slp-option.patch b/reduction-chain-slp-option.patch deleted file mode 100644 index 1b10c5cdde1b15c9d9752df5f49b367e81392ee9..0000000000000000000000000000000000000000 --- a/reduction-chain-slp-option.patch +++ /dev/null @@ -1,52 +0,0 @@ -diff -Nurp a/gcc/common.opt b/gcc/common.opt ---- a/gcc/common.opt 2020-06-20 23:53:56.124000000 +0800 -+++ b/gcc/common.opt 2020-06-22 23:02:18.808000000 +0800 -@@ -2858,6 +2858,10 @@ ftree-slp-vectorize - Common Report Var(flag_tree_slp_vectorize) Optimization EnabledBy(ftree-vectorize) - Enable basic block vectorization (SLP) on trees. - -+ftree-vect-analyze-slp-group -+Common Report Var(flag_tree_slp_group) Init(0) -+Disable SLP vectorization for reduction chain on tree. -+ - fvect-cost-model= - Common Joined RejectNegative Enum(vect_cost_model) Var(flag_vect_cost_model) Init(VECT_COST_MODEL_DEFAULT) Optimization - -fvect-cost-model=[unlimited|dynamic|cheap] Specifies the cost model for vectorization. -diff -Nurp a/gcc/testsuite/gcc.dg/vect/vect-reduc-12.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-12.c ---- a/gcc/testsuite/gcc.dg/vect/vect-reduc-12.c 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-12.c 2020-06-22 23:04:08.260000000 +0800 -@@ -0,0 +1,20 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -funsafe-math-optimizations -fno-tree-reassoc -ftree-vect-analyze-slp-group" } */ -+void f(double *a, double *res, double m) { -+ double res1, res0; -+ res1 = 0; -+ res0 = 0; -+ for (int i = 0; i < 1000; i+=8) { -+ res0 += a[i] * m; -+ res1 += a[i+1] * m; -+ res0 += a[i+2] * m; -+ res1 += a[i+3] * m; -+ res0 += a[i+4] * m; -+ res1 += a[i+5] * m; -+ res0 += a[i+6] * m; -+ res1 += a[i+7] * m; -+ } -+ res[0] += res0; -+ res[1] += res1; -+} -+/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */ -diff -Nurp a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c ---- a/gcc/tree-vect-slp.c 2020-06-21 01:07:56.516000000 +0800 -+++ b/gcc/tree-vect-slp.c 2020-06-22 23:02:54.540000000 +0800 -@@ -2327,8 +2327,9 @@ vect_analyze_slp (vec_info *vinfo, unsig - { - /* Find SLP sequences starting from reduction chains. */ - FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element) -- if (! vect_analyze_slp_instance (vinfo, bst_map, first_element, -+ if (flag_tree_slp_group -+ || ! vect_analyze_slp_instance (vinfo, bst_map, first_element, - max_tree_size)) - { - /* Dissolve reduction chain group. */ - stmt_vec_info vinfo = first_element; diff --git a/reduction-paths-with-unhandled-live-stmt.patch b/reduction-paths-with-unhandled-live-stmt.patch deleted file mode 100644 index 22dc08d6a9399dd636b018a2d96759fee06433dd..0000000000000000000000000000000000000000 --- a/reduction-paths-with-unhandled-live-stmt.patch +++ /dev/null @@ -1,64 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -2686de5617bfb572343933be2883e8274c9735b5 -0001-tree-optimization-97760-reduction-paths-with-unhandl.patch - -diff --git a/gcc/testsuite/gcc.dg/vect/pr97760.c b/gcc/testsuite/gcc.dg/vect/pr97760.c -new file mode 100644 -index 00000000000..da5ac937a43 ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/vect/pr97760.c -@@ -0,0 +1,26 @@ -+#include "tree-vect.h" -+ -+int b=1; -+static int *g = &b; -+ -+void __attribute__((noipa)) -+h (unsigned int n) -+{ -+ int i = 3; -+ int f = 3; -+ for (; f <= 50; f += 4) { -+ i += 4; -+ *g = i; -+ i += n; -+ } -+} -+ -+int main () -+{ -+ check_vect (); -+ -+ h (9); -+ if (*g != 150 || b != 150) -+ __builtin_abort (); -+ return 0; -+} -diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c -index 977633a3ce3..39b7319e825 100644 ---- a/gcc/tree-vect-loop.c -+++ b/gcc/tree-vect-loop.c -@@ -3326,14 +3326,17 @@ pop: - fail = true; - break; - } -- /* Check there's only a single stmt the op is used on inside -- of the loop. */ -+ /* Check there's only a single stmt the op is used on. For the -+ not value-changing tail and the last stmt allow out-of-loop uses. -+ ??? We could relax this and handle arbitrary live stmts by -+ forcing a scalar epilogue for example. */ - imm_use_iterator imm_iter; - gimple *op_use_stmt; - unsigned cnt = 0; - FOR_EACH_IMM_USE_STMT (op_use_stmt, imm_iter, op) - if (!is_gimple_debug (op_use_stmt) -- && flow_bb_inside_loop_p (loop, gimple_bb (op_use_stmt))) -+ && (*code != ERROR_MARK -+ || flow_bb_inside_loop_p (loop, gimple_bb (op_use_stmt)))) - { - /* We want to allow x + x but not x < 1 ? x : 2. */ - if (is_gimple_assign (op_use_stmt) - diff --git a/reductions-slp-enhancement.patch b/reductions-slp-enhancement.patch deleted file mode 100644 index de426a3cb99d4207895fac9fe9a2d760e8100f65..0000000000000000000000000000000000000000 --- a/reductions-slp-enhancement.patch +++ /dev/null @@ -1,59 +0,0 @@ -This backport contains 1 patch from gcc main stream tree. -The commit id of these patchs list as following in the order of time. - -0001-tree-vect-slp.c-vect_analyze_slp-When-reduction-grou.patch -0214d31a48f867b9b00134cea7223d35ed7865aa - -diff --git a/gcc/testsuite/gcc.dg/vect/slp-reduc-9.c b/gcc/testsuite/gcc.dg/vect/slp-reduc-9.c -new file mode 100644 -index 00000000000..bee642ee999 ---- /dev/null -+++ b/gcc/testsuite/gcc.dg/vect/slp-reduc-9.c -@@ -0,0 +1,25 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target vect_int_mult } */ -+ -+int -+bar (int *x, int a, int b, int n) -+{ -+ x = __builtin_assume_aligned (x, __BIGGEST_ALIGNMENT__); -+ int sum1 = 0; -+ int sum2 = 0; -+ for (int i = 0; i < n; ++i) -+ { -+ /* Reduction chain vectorization fails here because of the -+ different operations but we can still vectorize both -+ reductions as SLP reductions, saving IVs. */ -+ sum1 += x[2*i] - a; -+ sum1 += x[2*i+1] * b; -+ sum2 += x[2*i] - b; -+ sum2 += x[2*i+1] * a; -+ } -+ return sum1 + sum2; -+} -+ -+/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" "vect" } } */ -+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */ -+/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */ -diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c -index e1061ede061..0af51197a84 100644 ---- a/gcc/tree-vect-slp.c -+++ b/gcc/tree-vect-slp.c -@@ -2271,14 +2271,18 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size) - { - /* Dissolve reduction chain group. */ - stmt_vec_info vinfo = first_element; -+ stmt_vec_info last = NULL; - while (vinfo) - { - stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (vinfo); - REDUC_GROUP_FIRST_ELEMENT (vinfo) = NULL; - REDUC_GROUP_NEXT_ELEMENT (vinfo) = NULL; -+ last = vinfo; - vinfo = next; - } - STMT_VINFO_DEF_TYPE (first_element) = vect_internal_def; -+ /* It can be still vectorized as part of an SLP reduction. */ -+ loop_vinfo->reductions.safe_push (last); - } - } diff --git a/redundant-loop-elimination.patch b/redundant-loop-elimination.patch deleted file mode 100644 index 53a506335239fff539c86c914e3b80daed73ee59..0000000000000000000000000000000000000000 --- a/redundant-loop-elimination.patch +++ /dev/null @@ -1,486 +0,0 @@ -diff -Nurp a/gcc/common.opt b/gcc/common.opt ---- a/gcc/common.opt 2021-02-18 21:32:50.724000000 -0500 -+++ b/gcc/common.opt 2021-02-18 21:33:36.920000000 -0500 -@@ -1150,6 +1150,10 @@ fcompare-elim - Common Report Var(flag_compare_elim_after_reload) Optimization - Perform comparison elimination after register allocation has finished. - -+floop-elim -+Common Report Var(flag_loop_elim) Init(0) Optimization -+Perform redundant loop elimination. -+ - fconserve-stack - Common Var(flag_conserve_stack) Optimization - Do not perform optimizations increasing noticeably stack usage. -diff -Nurp a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c ---- a/gcc/tree-ssa-phiopt.c 2021-02-18 21:32:52.648000000 -0500 -+++ b/gcc/tree-ssa-phiopt.c 2021-02-19 01:55:10.128000000 -0500 -@@ -71,6 +71,7 @@ static hash_set * get_non_trapping - static void replace_phi_edge_with_variable (basic_block, edge, gimple *, tree); - static void hoist_adjacent_loads (basic_block, basic_block, - basic_block, basic_block); -+static bool do_phiopt_pattern (basic_block, basic_block, basic_block); - static bool gate_hoist_loads (void); - - /* This pass tries to transform conditional stores into unconditional -@@ -259,6 +260,10 @@ tree_ssa_phiopt_worker (bool do_store_el - hoist_adjacent_loads (bb, bb1, bb2, bb3); - continue; - } -+ else if (flag_loop_elim && do_phiopt_pattern (bb, bb1, bb2)) -+ { -+ continue; -+ } - else - continue; - -@@ -2899,6 +2904,449 @@ hoist_adjacent_loads (basic_block bb0, b - } - } - -+static bool check_uses (tree, hash_set *); -+ -+/* Check SSA_NAME is used in -+ if (SSA_NAME == 0) -+ ... -+ or -+ if (SSA_NAME != 0) -+ ... -+*/ -+static bool -+check_uses_cond (const_tree ssa_name, gimple *stmt, -+ hash_set *hset ATTRIBUTE_UNUSED) -+{ -+ tree_code code = gimple_cond_code (stmt); -+ if (code != EQ_EXPR && code != NE_EXPR) -+ { -+ return false; -+ } -+ -+ tree lhs = gimple_cond_lhs (stmt); -+ tree rhs = gimple_cond_rhs (stmt); -+ if ((lhs == ssa_name && integer_zerop (rhs)) -+ || (rhs == ssa_name && integer_zerop (lhs))) -+ { -+ return true; -+ } -+ -+ return false; -+} -+ -+/* Check SSA_NAME is used in -+ _tmp = SSA_NAME == 0; -+ or -+ _tmp = SSA_NAME != 0; -+ or -+ _tmp = SSA_NAME | _tmp2; -+*/ -+static bool -+check_uses_assign (const_tree ssa_name, gimple *stmt, hash_set *hset) -+{ -+ tree_code code = gimple_assign_rhs_code (stmt); -+ tree lhs, rhs1, rhs2; -+ -+ switch (code) -+ { -+ case EQ_EXPR: -+ case NE_EXPR: -+ rhs1 = gimple_assign_rhs1 (stmt); -+ rhs2 = gimple_assign_rhs2 (stmt); -+ if ((rhs1 == ssa_name && integer_zerop (rhs2)) -+ || (rhs2 == ssa_name && integer_zerop (rhs1))) -+ { -+ return true; -+ } -+ break; -+ -+ case BIT_IOR_EXPR: -+ lhs = gimple_assign_lhs (stmt); -+ if (hset->contains (lhs)) -+ { -+ return false; -+ } -+ /* We should check the use of _tmp further. */ -+ return check_uses (lhs, hset); -+ -+ default: -+ break; -+ } -+ return false; -+} -+ -+/* Check SSA_NAME is used in -+ # result = PHI -+*/ -+static bool -+check_uses_phi (const_tree ssa_name, gimple *stmt, hash_set *hset) -+{ -+ for (unsigned i = 0; i < gimple_phi_num_args (stmt); i++) -+ { -+ tree arg = gimple_phi_arg_def (stmt, i); -+ if (!integer_zerop (arg) && arg != ssa_name) -+ { -+ return false; -+ } -+ } -+ -+ tree result = gimple_phi_result (stmt); -+ -+ /* It is used to avoid infinite recursion, -+ -+ if (cond) -+ goto -+ else -+ goto -+ -+ -+ # _tmp2 = PHI <0 (bb 1), _tmp3 (bb 3)> -+ {BODY} -+ if (cond) -+ goto -+ else -+ goto -+ -+ -+ # _tmp3 = PHI <0 (bb 1), _tmp2 (bb 2)> -+ {BODY} -+ if (cond) -+ goto -+ else -+ goto -+ -+ -+ ... -+ */ -+ if (hset->contains (result)) -+ { -+ return false; -+ } -+ -+ return check_uses (result, hset); -+} -+ -+/* Check the use of SSA_NAME, it should only be used in comparison -+ operation and PHI node. HSET is used to record the ssa_names -+ that have been already checked. */ -+static bool -+check_uses (tree ssa_name, hash_set *hset) -+{ -+ imm_use_iterator imm_iter; -+ use_operand_p use_p; -+ -+ if (TREE_CODE (ssa_name) != SSA_NAME) -+ { -+ return false; -+ } -+ -+ if (SSA_NAME_VAR (ssa_name) -+ && is_global_var (SSA_NAME_VAR (ssa_name))) -+ { -+ return false; -+ } -+ -+ hset->add (ssa_name); -+ -+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, ssa_name) -+ { -+ gimple *stmt = USE_STMT (use_p); -+ -+ /* Ignore debug gimple statements. */ -+ if (is_gimple_debug (stmt)) -+ { -+ continue; -+ } -+ -+ switch (gimple_code (stmt)) -+ { -+ case GIMPLE_COND: -+ if (!check_uses_cond (ssa_name, stmt, hset)) -+ { -+ return false; -+ } -+ break; -+ -+ case GIMPLE_ASSIGN: -+ if (!check_uses_assign (ssa_name, stmt, hset)) -+ { -+ return false; -+ } -+ break; -+ -+ case GIMPLE_PHI: -+ if (!check_uses_phi (ssa_name, stmt, hset)) -+ { -+ return false; -+ } -+ break; -+ -+ default: -+ return false; -+ } -+ } -+ return true; -+} -+ -+static bool -+check_def_gimple (gimple *def1, gimple *def2, const_tree result) -+{ -+ /* def1 and def2 should be POINTER_PLUS_EXPR. */ -+ if (!is_gimple_assign (def1) || !is_gimple_assign (def2) -+ || gimple_assign_rhs_code (def1) != POINTER_PLUS_EXPR -+ || gimple_assign_rhs_code (def2) != POINTER_PLUS_EXPR) -+ { -+ return false; -+ } -+ -+ tree rhs12 = gimple_assign_rhs2 (def1); -+ -+ tree rhs21 = gimple_assign_rhs1 (def2); -+ tree rhs22 = gimple_assign_rhs2 (def2); -+ -+ if (rhs21 != result) -+ { -+ return false; -+ } -+ -+ /* We should have a positive pointer-plus constant to ensure -+ that the pointer value is continuously increasing. */ -+ if (TREE_CODE (rhs12) != INTEGER_CST || TREE_CODE (rhs22) != INTEGER_CST -+ || compare_tree_int (rhs12, 0) <= 0 || compare_tree_int (rhs22, 0) <= 0) -+ { -+ return false; -+ } -+ -+ return true; -+} -+ -+static bool -+check_loop_body (basic_block bb0, basic_block bb2, const_tree result) -+{ -+ gimple *g01 = first_stmt (bb0); -+ if (!g01 || !is_gimple_assign (g01) -+ || gimple_assign_rhs_code (g01) != MEM_REF -+ || TREE_OPERAND (gimple_assign_rhs1 (g01), 0) != result) -+ { -+ return false; -+ } -+ -+ gimple *g02 = g01->next; -+ /* GIMPLE_COND would be the last gimple in a basic block, -+ and have no other side effects on RESULT. */ -+ if (!g02 || gimple_code (g02) != GIMPLE_COND) -+ { -+ return false; -+ } -+ -+ if (first_stmt (bb2) != last_stmt (bb2)) -+ { -+ return false; -+ } -+ -+ return true; -+} -+ -+/* Pattern is like -+
-+   arg1 = base (rhs11) + cst (rhs12); [def1]
-+   goto 
-+
-+   
-+   arg2 = result (rhs21) + cst (rhs22); [def2]
-+
-+   
-+   # result = PHI 
-+   _v = *result;  [g01]
-+   if (_v == 0)   [g02]
-+     goto 
-+   else
-+     goto 
-+
-+   
-+   _1 = result - base;     [g1]
-+   _2 = _1 /[ex] cst;      [g2]
-+   _3 = (unsigned int) _2; [g3]
-+   if (_3 == 0)
-+   ...
-+*/
-+static bool
-+check_bb_order (basic_block bb0, basic_block &bb1, basic_block &bb2,
-+		gphi *phi_stmt, gimple *&output)
-+{
-+  /* Start check from PHI node in BB0.  */
-+  if (gimple_phi_num_args (phi_stmt) != 2
-+      || virtual_operand_p (gimple_phi_result (phi_stmt)))
-+    {
-+      return false;
-+    }
-+
-+  tree result = gimple_phi_result (phi_stmt);
-+  tree arg1 = gimple_phi_arg_def (phi_stmt, 0);
-+  tree arg2 = gimple_phi_arg_def (phi_stmt, 1);
-+
-+  if (TREE_CODE (arg1) != SSA_NAME
-+      || TREE_CODE (arg2) != SSA_NAME
-+      || SSA_NAME_IS_DEFAULT_DEF (arg1)
-+      || SSA_NAME_IS_DEFAULT_DEF (arg2))
-+    {
-+      return false;
-+    }
-+
-+  gimple *def1 = SSA_NAME_DEF_STMT (arg1);
-+  gimple *def2 = SSA_NAME_DEF_STMT (arg2);
-+
-+  /* Swap bb1 and bb2 if pattern is like
-+     if (_v != 0)
-+       goto 
-+     else
-+       goto 
-+  */
-+  if (gimple_bb (def2) == bb1 && EDGE_SUCC (bb1, 0)->dest == bb0)
-+    {
-+      std::swap (bb1, bb2);
-+    }
-+
-+  /* prebb[def1] --> bb0 <-- bb2[def2] */
-+  if (!gimple_bb (def1)
-+      || EDGE_SUCC (gimple_bb (def1), 0)->dest != bb0
-+      || gimple_bb (def2) != bb2 || EDGE_SUCC (bb2, 0)->dest != bb0)
-+    {
-+      return false;
-+    }
-+
-+  /* Check whether define gimple meets the pattern requirements.  */
-+  if (!check_def_gimple (def1, def2, result))
-+    {
-+      return false;
-+    }
-+
-+  if (!check_loop_body (bb0, bb2, result))
-+    {
-+      return false;
-+    }
-+
-+  output = def1;
-+  return true;
-+}
-+
-+/* Check pattern
-+   
-+   _1 = result - base;     [g1]
-+   _2 = _1 /[ex] cst;      [g2]
-+   _3 = (unsigned int) _2; [g3]
-+   if (_3 == 0)
-+   ...
-+*/
-+static bool
-+check_gimple_order (basic_block bb1, const_tree base, const_tree cst,
-+		    const_tree result, gimple *&output)
-+{
-+  gimple *g1 = first_stmt (bb1);
-+  if (!g1 || !is_gimple_assign (g1)
-+      || gimple_assign_rhs_code (g1) != POINTER_DIFF_EXPR
-+      || gimple_assign_rhs1 (g1) != result
-+      || gimple_assign_rhs2 (g1) != base)
-+    {
-+      return false;
-+    }
-+
-+  gimple *g2 = g1->next;
-+  if (!g2 || !is_gimple_assign (g2)
-+      || gimple_assign_rhs_code (g2) != EXACT_DIV_EXPR
-+      || gimple_assign_lhs (g1) != gimple_assign_rhs1 (g2)
-+      || TREE_CODE (gimple_assign_rhs2 (g2)) != INTEGER_CST)
-+    {
-+      return false;
-+    }
-+
-+  /* INTEGER_CST cst in gimple def1.  */
-+  HOST_WIDE_INT num1 = TREE_INT_CST_LOW (cst);
-+  /* INTEGER_CST cst in gimple g2.  */
-+  HOST_WIDE_INT num2 = TREE_INT_CST_LOW (gimple_assign_rhs2 (g2));
-+  /* _2 must be at least a positive number.  */
-+  if (num2 == 0 || num1 / num2 <= 0)
-+    {
-+      return false;
-+    }
-+
-+  gimple *g3 = g2->next;
-+  if (!g3 || !is_gimple_assign (g3)
-+      || gimple_assign_rhs_code (g3) != NOP_EXPR
-+      || gimple_assign_lhs (g2) != gimple_assign_rhs1 (g3)
-+      || TREE_CODE (gimple_assign_lhs (g3)) != SSA_NAME)
-+    {
-+      return false;
-+    }
-+
-+  /* _3 should only be used in comparison operation or PHI node.  */
-+  hash_set *hset = new hash_set;
-+  if (!check_uses (gimple_assign_lhs (g3), hset))
-+    {
-+      delete hset;
-+      return false;
-+    }
-+  delete hset;
-+
-+  output = g3;
-+  return true;
-+}
-+
-+static bool
-+do_phiopt_pattern (basic_block bb0, basic_block bb1, basic_block bb2)
-+{
-+  gphi_iterator gsi;
-+
-+  for (gsi = gsi_start_phis (bb0); !gsi_end_p (gsi); gsi_next (&gsi))
-+    {
-+      gphi *phi_stmt = gsi.phi ();
-+      gimple *def1 = NULL;
-+      tree base, cst, result;
-+
-+      if (!check_bb_order (bb0, bb1, bb2, phi_stmt, def1))
-+	{
-+	  continue;
-+	}
-+
-+      base = gimple_assign_rhs1 (def1);
-+      cst = gimple_assign_rhs2 (def1);
-+      result = gimple_phi_result (phi_stmt);
-+
-+      gimple *stmt = NULL;
-+      if (!check_gimple_order (bb1, base, cst, result, stmt))
-+	{
-+	  continue;
-+	}
-+
-+      gcc_assert (stmt);
-+
-+      if (dump_file && (dump_flags & TDF_DETAILS))
-+	{
-+	  fprintf (dump_file, "PHIOPT pattern optimization (1) - Rewrite:\n");
-+	  print_gimple_stmt (dump_file, stmt, 0);
-+	  fprintf (dump_file, "to\n");
-+	}
-+
-+      /* Rewrite statement
-+	   _3 = (unsigned int) _2;
-+	 to
-+	   _3 = (unsigned int) 1;
-+      */
-+      tree type = TREE_TYPE (gimple_assign_rhs1 (stmt));
-+      gimple_assign_set_rhs1 (stmt, build_int_cst (type, 1));
-+      update_stmt (stmt);
-+
-+      if (dump_file && (dump_flags & TDF_DETAILS))
-+	{
-+	  print_gimple_stmt (dump_file, stmt, 0);
-+	  fprintf (dump_file, "\n");
-+	}
-+
-+      return true;
-+    }
-+  return false;
-+}
-+
- /* Determine whether we should attempt to hoist adjacent loads out of
-    diamond patterns in pass_phiopt.  Always hoist loads if
-    -fhoist-adjacent-loads is specified and the target machine has
diff --git a/remove-array-index-inliner-hint.patch b/remove-array-index-inliner-hint.patch
deleted file mode 100644
index 416cd9af63a17526caf873bcb4578fa8e4891d62..0000000000000000000000000000000000000000
--- a/remove-array-index-inliner-hint.patch
+++ /dev/null
@@ -1,327 +0,0 @@
-This backport contains 1 patch from gcc main stream tree.
-The commit id of these patchs list as following in the order of time.
-
-0001-ipa-fnsummary.c-ipa_dump_hints-Do-not-dump-array_ind.patch
-a20f263ba1a76af40eb4e6734529739a2a30ed65
-
-diff -uprN a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
---- a/gcc/doc/invoke.texi
-+++ b/gcc/doc/invoke.texi
-@@ -11895,12 +11895,6 @@ of iterations of a loop known, it adds a bonus of
- @option{ipa-cp-loop-hint-bonus} to the profitability score of
- the candidate.
- 
--@item ipa-cp-array-index-hint-bonus
--When IPA-CP determines that a cloning candidate would make the index of
--an array access known, it adds a bonus of
--@option{ipa-cp-array-index-hint-bonus} to the profitability
--score of the candidate.
--
- @item ipa-max-aa-steps
- During its analysis of function bodies, IPA-CP employs alias analysis
- in order to track values pointed to by function parameters.  In order
-diff -uprN a/gcc/ipa-cp.c b/gcc/ipa-cp.c
---- a/gcc/ipa-cp.c
-+++ b/gcc/ipa-cp.c
-@@ -2607,8 +2607,6 @@ hint_time_bonus (ipa_hints hints)
-   int result = 0;
-   if (hints & (INLINE_HINT_loop_iterations | INLINE_HINT_loop_stride))
-     result += PARAM_VALUE (PARAM_IPA_CP_LOOP_HINT_BONUS);
--  if (hints & INLINE_HINT_array_index)
--    result += PARAM_VALUE (PARAM_IPA_CP_ARRAY_INDEX_HINT_BONUS);
-   return result;
- }
- 
-diff -uprN a/gcc/ipa-fnsummary.c b/gcc/ipa-fnsummary.c
---- a/gcc/ipa-fnsummary.c
-+++ b/gcc/ipa-fnsummary.c
-@@ -134,11 +134,6 @@ ipa_dump_hints (FILE *f, ipa_hints hints)
-       hints &= ~INLINE_HINT_declared_inline;
-       fprintf (f, " declared_inline");
-     }
--  if (hints & INLINE_HINT_array_index)
--    {
--      hints &= ~INLINE_HINT_array_index;
--      fprintf (f, " array_index");
--    }
-   if (hints & INLINE_HINT_known_hot)
-     {
-       hints &= ~INLINE_HINT_known_hot;
-@@ -549,8 +544,6 @@ ipa_fn_summary::~ipa_fn_summary ()
-     edge_predicate_pool.remove (loop_iterations);
-   if (loop_stride)
-     edge_predicate_pool.remove (loop_stride);
--  if (array_index)
--    edge_predicate_pool.remove (array_index);
-   vec_free (conds);
-   vec_free (size_time_table);
- }
-@@ -703,8 +696,6 @@ ipa_fn_summary_t::duplicate (cgraph_node *src,
- 					      possible_truths);
-       remap_hint_predicate_after_duplication (&info->loop_stride,
- 					      possible_truths);
--      remap_hint_predicate_after_duplication (&info->array_index,
--					      possible_truths);
- 
-       /* If inliner or someone after inliner will ever start producing
-          non-trivial clones, we will get trouble with lack of information
-@@ -727,12 +718,6 @@ ipa_fn_summary_t::duplicate (cgraph_node *src,
- 	  info->loop_stride = NULL;
- 	  set_hint_predicate (&info->loop_stride, p);
- 	}
--      if (info->array_index)
--	{
--	  predicate p = *info->array_index;
--	  info->array_index = NULL;
--	  set_hint_predicate (&info->array_index, p);
--	}
-     }
-   if (!dst->global.inlined_to)
-     ipa_update_overall_fn_summary (dst);
-@@ -894,11 +879,6 @@ ipa_dump_fn_summary (FILE *f, struct cgraph_node *node)
- 	      fprintf (f, "  loop stride:");
- 	      s->loop_stride->dump (f, s->conds);
- 	    }
--	  if (s->array_index)
--	    {
--	      fprintf (f, "  array index:");
--	      s->array_index->dump (f, s->conds);
--	    }
- 	  fprintf (f, "  calls:\n");
- 	  dump_ipa_call_summary (f, 4, node, s);
- 	  fprintf (f, "\n");
-@@ -1824,27 +1804,6 @@ predicate_for_phi_result (class ipa_fn_summary *summary, gphi *phi,
-   nonconstant_names[SSA_NAME_VERSION (gimple_phi_result (phi))] = *p;
- }
- 
--/* Return predicate specifying when array index in access OP becomes non-constant.  */
--
--static predicate
--array_index_predicate (ipa_fn_summary *info,
--		       vec< predicate> nonconstant_names, tree op)
--{
--  predicate p = false;
--  while (handled_component_p (op))
--    {
--      if (TREE_CODE (op) == ARRAY_REF || TREE_CODE (op) == ARRAY_RANGE_REF)
--	{
--	  if (TREE_CODE (TREE_OPERAND (op, 1)) == SSA_NAME)
--	    p = p.or_with (info->conds, 
--			   nonconstant_names[SSA_NAME_VERSION
--						  (TREE_OPERAND (op, 1))]);
--	}
--      op = TREE_OPERAND (op, 0);
--    }
--  return p;
--}
--
- /* For a typical usage of __builtin_expect (a nonconstant_names = vNULL;
-   int nblocks, n;
-   int *order;
--  predicate array_index = true;
-   gimple *fix_builtin_expect_stmt;
- 
-   gcc_assert (my_function && my_function->cfg);
-@@ -2146,26 +2104,6 @@ analyze_function_body (struct cgraph_node *node, bool early)
- 		       this_time);
- 	    }
- 
--	  if (gimple_assign_load_p (stmt) && nonconstant_names.exists ())
--	    {
--	      predicate this_array_index;
--	      this_array_index =
--		array_index_predicate (info, nonconstant_names,
--				       gimple_assign_rhs1 (stmt));
--	      if (this_array_index != false)
--		array_index &= this_array_index;
--	    }
--	  if (gimple_store_p (stmt) && nonconstant_names.exists ())
--	    {
--	      predicate this_array_index;
--	      this_array_index =
--		array_index_predicate (info, nonconstant_names,
--				       gimple_get_lhs (stmt));
--	      if (this_array_index != false)
--		array_index &= this_array_index;
--	    }
--
--
- 	  if (is_gimple_call (stmt)
- 	      && !gimple_call_internal_p (stmt))
- 	    {
-@@ -2273,14 +2211,40 @@ analyze_function_body (struct cgraph_node *node, bool early)
- 		  if (dump_file)
- 		    fprintf (dump_file, "   fp_expression set\n");
- 		}
-+	    }
- 
--	      gcc_assert (time >= 0);
--	      gcc_assert (size >= 0);
-+	  /* Account cost of address calculations in the statements.  */
-+	  for (unsigned int i = 0; i < gimple_num_ops (stmt); i++)
-+	    {
-+	      for (tree op = gimple_op (stmt, i);
-+		   op && handled_component_p (op);
-+		   op = TREE_OPERAND (op, 0))
-+	        if ((TREE_CODE (op) == ARRAY_REF
-+		     || TREE_CODE (op) == ARRAY_RANGE_REF)
-+		    && TREE_CODE (TREE_OPERAND (op, 1)) == SSA_NAME)
-+		  {
-+		    predicate p = bb_predicate;
-+		    if (fbi.info)
-+		      p = p & will_be_nonconstant_expr_predicate
-+				 (&fbi, info, TREE_OPERAND (op, 1),
-+			          nonconstant_names);
-+		    if (p != false)
-+		      {
-+			time += freq;
-+			size += 1;
-+			if (dump_file)
-+			  fprintf (dump_file,
-+				   "\t\tAccounting address calculation.\n");
-+			info->account_size_time (ipa_fn_summary::size_scale,
-+						 freq,
-+						 bb_predicate,
-+						 p);
-+		      }
-+		  }
- 	    }
-+
- 	}
-     }
--  set_hint_predicate (&ipa_fn_summaries->get_create (node)->array_index,
--		      array_index);
-   free (order);
- 
-   if (nonconstant_names.exists () && !early)
-@@ -2783,9 +2747,6 @@ estimate_node_size_and_time (struct cgraph_node *node,
-   if (info->loop_stride
-       && !info->loop_stride->evaluate (possible_truths))
-     hints |= INLINE_HINT_loop_stride;
--  if (info->array_index
--      && !info->array_index->evaluate (possible_truths))
--    hints |= INLINE_HINT_array_index;
-   if (info->scc_no)
-     hints |= INLINE_HINT_in_scc;
-   if (DECL_DECLARED_INLINE_P (node->decl))
-@@ -3106,9 +3067,6 @@ ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge)
-   remap_hint_predicate (info, callee_info,
- 			&callee_info->loop_stride,
- 			operand_map, offset_map, clause, &toplev_predicate);
--  remap_hint_predicate (info, callee_info,
--			&callee_info->array_index,
--			operand_map, offset_map, clause, &toplev_predicate);
- 
-   ipa_call_summary *s = ipa_call_summaries->get (edge);
-   inline_update_callee_summaries (edge->callee, s->loop_depth);
-@@ -3366,9 +3324,6 @@ inline_read_section (struct lto_file_decl_data *file_data, const char *data,
-       p.stream_in (&ib);
-       if (info)
-         set_hint_predicate (&info->loop_stride, p);
--      p.stream_in (&ib);
--      if (info)
--        set_hint_predicate (&info->array_index, p);
-       for (e = node->callees; e; e = e->next_callee)
- 	read_ipa_call_summary (&ib, e, info != NULL);
-       for (e = node->indirect_calls; e; e = e->next_callee)
-@@ -3517,10 +3472,6 @@ ipa_fn_summary_write (void)
- 	    info->loop_stride->stream_out (ob);
-  	  else
- 	    streamer_write_uhwi (ob, 0);
--	  if (info->array_index)
--	    info->array_index->stream_out (ob);
--	  else
--	    streamer_write_uhwi (ob, 0);
- 	  for (edge = cnode->callees; edge; edge = edge->next_callee)
- 	    write_ipa_call_summary (ob, edge);
- 	  for (edge = cnode->indirect_calls; edge; edge = edge->next_callee)
-diff -uprN a/gcc/ipa-fnsummary.h b/gcc/ipa-fnsummary.h
---- a/gcc/ipa-fnsummary.h
-+++ b/gcc/ipa-fnsummary.h
-@@ -48,11 +48,8 @@ enum ipa_hints_vals {
-      if functions are in different modules, inlining may not be so important. 
-      Set by simple_edge_hints in ipa-inline-analysis.c.   */
-   INLINE_HINT_cross_module = 64,
--  /* If array indexes of loads/stores become known there may be room for
--     further optimization.  */
--  INLINE_HINT_array_index = 128,
-   /* We know that the callee is hot by profile.  */
--  INLINE_HINT_known_hot = 256
-+  INLINE_HINT_known_hot = 128
- };
- 
- typedef int ipa_hints;
-@@ -97,7 +94,7 @@ public:
-       fp_expressions (false), estimated_stack_size (false),
-       stack_frame_offset (false), time (0), size (0), conds (NULL),
-       size_time_table (NULL), loop_iterations (NULL), loop_stride (NULL),
--      array_index (NULL), growth (0), scc_no (0)
-+      growth (0), scc_no (0)
-   {
-   }
- 
-@@ -111,7 +108,7 @@ public:
-     stack_frame_offset (s.stack_frame_offset), time (s.time), size (s.size),
-     conds (s.conds), size_time_table (s.size_time_table),
-     loop_iterations (s.loop_iterations), loop_stride (s.loop_stride),
--    array_index (s.array_index), growth (s.growth), scc_no (s.scc_no)
-+    growth (s.growth), scc_no (s.scc_no)
-   {}
- 
-   /* Default constructor.  */
-@@ -157,8 +154,6 @@ public:
-   /* Predicate on when some loop in the function becomes to have known
-      stride.   */
-   predicate * GTY((skip)) loop_stride;
--  /* Predicate on when some array indexes become constants.  */
--  predicate * GTY((skip)) array_index;
-   /* Estimated growth for inlining all copies of the function before start
-      of small functions inlining.
-      This value will get out of date as the callers are duplicated, but
-diff -uprN a/gcc/ipa-inline.c b/gcc/ipa-inline.c
---- a/gcc/ipa-inline.c
-+++ b/gcc/ipa-inline.c
-@@ -807,7 +807,6 @@ want_inline_small_function_p (struct cgraph_edge *e, bool report)
- 		   || (!(hints & (INLINE_HINT_indirect_call
- 				  | INLINE_HINT_known_hot
- 				  | INLINE_HINT_loop_iterations
--				  | INLINE_HINT_array_index
- 				  | INLINE_HINT_loop_stride))
- 		       && !(big_speedup = big_speedup_p (e)))))
- 	{
-@@ -833,7 +832,6 @@ want_inline_small_function_p (struct cgraph_edge *e, bool report)
- 	       && !(hints & INLINE_HINT_known_hot)
- 	       && growth >= ((hints & (INLINE_HINT_indirect_call
- 				       | INLINE_HINT_loop_iterations
--			               | INLINE_HINT_array_index
- 				       | INLINE_HINT_loop_stride))
- 			     ? MAX (MAX_INLINE_INSNS_AUTO,
- 				    MAX_INLINE_INSNS_SINGLE)
-@@ -1227,7 +1225,6 @@ edge_badness (struct cgraph_edge *edge, bool dump)
-     badness = badness.shift (badness > 0 ? 4 : -4);
-   if ((hints & (INLINE_HINT_indirect_call
- 		| INLINE_HINT_loop_iterations
--		| INLINE_HINT_array_index
- 		| INLINE_HINT_loop_stride))
-       || callee_info->growth <= 0)
-     badness = badness.shift (badness > 0 ? -2 : 2);
-diff -uprN a/gcc/params.def b/gcc/params.def
---- a/gcc/params.def
-+++ b/gcc/params.def
-@@ -1109,12 +1109,6 @@ DEFPARAM (PARAM_IPA_CP_LOOP_HINT_BONUS,
- 	  "bounds or strides known.",
- 	  64, 0, 0)
- 
--DEFPARAM (PARAM_IPA_CP_ARRAY_INDEX_HINT_BONUS,
--	  "ipa-cp-array-index-hint-bonus",
--	  "Compile-time bonus IPA-CP assigns to candidates which make an array "
--	  "index known.",
--	  48, 0, 0)
--
- DEFPARAM (PARAM_IPA_MAX_AA_STEPS,
- 	  "ipa-max-aa-steps",
- 	  "Maximum number of statements that will be visited by IPA formal "
diff --git a/revert-moutline-atomics.patch b/revert-moutline-atomics.patch
deleted file mode 100644
index 59b5a944c3fd985b24efdfb9803e1b85346a18e4..0000000000000000000000000000000000000000
--- a/revert-moutline-atomics.patch
+++ /dev/null
@@ -1,418 +0,0 @@
-This backport contains 1 patch from gcc main stream tree.
-The commit id of these patchs list as following in the order of time.
-
-0001-aarch64-Implement-moutline-atomics.patch
-3950b229a5ed6710f30241c2ddc3c74909bf4740
-
-diff -Nurp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
---- a/gcc/config/aarch64/aarch64.c	2021-03-11 17:12:30.380000000 +0800
-+++ b/gcc/config/aarch64/aarch64.c	2021-03-11 17:13:29.992000000 +0800
-@@ -18150,82 +18150,6 @@ aarch64_emit_unlikely_jump (rtx insn)
-   add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
- }
- 
--/* We store the names of the various atomic helpers in a 5x4 array.
--   Return the libcall function given MODE, MODEL and NAMES.  */
--
--rtx
--aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx,
--			const atomic_ool_names *names)
--{
--  memmodel model = memmodel_base (INTVAL (model_rtx));
--  int mode_idx, model_idx;
--
--  switch (mode)
--    {
--    case E_QImode:
--      mode_idx = 0;
--      break;
--    case E_HImode:
--      mode_idx = 1;
--      break;
--    case E_SImode:
--      mode_idx = 2;
--      break;
--    case E_DImode:
--      mode_idx = 3;
--      break;
--    case E_TImode:
--      mode_idx = 4;
--      break;
--    default:
--      gcc_unreachable ();
--    }
--
--  switch (model)
--    {
--    case MEMMODEL_RELAXED:
--      model_idx = 0;
--      break;
--    case MEMMODEL_CONSUME:
--    case MEMMODEL_ACQUIRE:
--      model_idx = 1;
--      break;
--    case MEMMODEL_RELEASE:
--      model_idx = 2;
--      break;
--    case MEMMODEL_ACQ_REL:
--    case MEMMODEL_SEQ_CST:
--      model_idx = 3;
--      break;
--    default:
--      gcc_unreachable ();
--    }
--
--  return init_one_libfunc_visibility (names->str[mode_idx][model_idx],
--				      VISIBILITY_HIDDEN);
--}
--
--#define DEF0(B, N) \
--  { "__aarch64_" #B #N "_relax", \
--    "__aarch64_" #B #N "_acq", \
--    "__aarch64_" #B #N "_rel", \
--    "__aarch64_" #B #N "_acq_rel" }
--
--#define DEF4(B)  DEF0(B, 1), DEF0(B, 2), DEF0(B, 4), DEF0(B, 8), \
--		 { NULL, NULL, NULL, NULL }
--#define DEF5(B)  DEF0(B, 1), DEF0(B, 2), DEF0(B, 4), DEF0(B, 8), DEF0(B, 16)
--
--static const atomic_ool_names aarch64_ool_cas_names = { { DEF5(cas) } };
--const atomic_ool_names aarch64_ool_swp_names = { { DEF4(swp) } };
--const atomic_ool_names aarch64_ool_ldadd_names = { { DEF4(ldadd) } };
--const atomic_ool_names aarch64_ool_ldset_names = { { DEF4(ldset) } };
--const atomic_ool_names aarch64_ool_ldclr_names = { { DEF4(ldclr) } };
--const atomic_ool_names aarch64_ool_ldeor_names = { { DEF4(ldeor) } };
--
--#undef DEF0
--#undef DEF4
--#undef DEF5
--
- /* Expand a compare and swap pattern.  */
- 
- void
-@@ -18272,17 +18196,6 @@ aarch64_expand_compare_and_swap (rtx ope
- 						   newval, mod_s));
-       cc_reg = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode);
-     }
--  else if (TARGET_OUTLINE_ATOMICS)
--    {
--      /* Oldval must satisfy compare afterward.  */
--      if (!aarch64_plus_operand (oldval, mode))
--	oldval = force_reg (mode, oldval);
--      rtx func = aarch64_atomic_ool_func (mode, mod_s, &aarch64_ool_cas_names);
--      rval = emit_library_call_value (func, NULL_RTX, LCT_NORMAL, r_mode,
--				      oldval, mode, newval, mode,
--				      XEXP (mem, 0), Pmode);
--      cc_reg = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode);
--    }
-   else
-     {
-       /* The oldval predicate varies by mode.  Test it and force to reg.  */
-diff -Nurp a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
---- a/gcc/config/aarch64/aarch64.opt	2021-03-11 17:12:30.380000000 +0800
-+++ b/gcc/config/aarch64/aarch64.opt	2021-03-11 17:13:29.992000000 +0800
-@@ -272,6 +272,3 @@ user-land code.
- TargetVariable
- long aarch64_stack_protector_guard_offset = 0
- 
--moutline-atomics
--Target Report Mask(OUTLINE_ATOMICS) Save
--Generate local calls to out-of-line atomic operations.
-diff -Nurp a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md
---- a/gcc/config/aarch64/atomics.md	2021-03-11 17:12:30.380000000 +0800
-+++ b/gcc/config/aarch64/atomics.md	2021-03-11 17:13:29.992000000 +0800
-@@ -186,27 +186,16 @@
-   (match_operand:SI 3 "const_int_operand")]
-   ""
-   {
-+    rtx (*gen) (rtx, rtx, rtx, rtx);
-+
-     /* Use an atomic SWP when available.  */
-     if (TARGET_LSE)
--      {
--	emit_insn (gen_aarch64_atomic_exchange_lse
--		   (operands[0], operands[1], operands[2], operands[3]));
--      }
--    else if (TARGET_OUTLINE_ATOMICS)
--      {
--	machine_mode mode = mode;
--	rtx func = aarch64_atomic_ool_func (mode, operands[3],
--					    &aarch64_ool_swp_names);
--	rtx rval = emit_library_call_value (func, operands[0], LCT_NORMAL,
--					    mode, operands[2], mode,
--					    XEXP (operands[1], 0), Pmode);
--        emit_move_insn (operands[0], rval);
--      }
-+      gen = gen_aarch64_atomic_exchange_lse;
-     else
--      {
--	emit_insn (gen_aarch64_atomic_exchange
--		   (operands[0], operands[1], operands[2], operands[3]));
--      }
-+      gen = gen_aarch64_atomic_exchange;
-+
-+    emit_insn (gen (operands[0], operands[1], operands[2], operands[3]));
-+
-     DONE;
-   }
- )
-@@ -291,39 +280,6 @@
- 	  }
- 	operands[1] = force_reg (mode, operands[1]);
-       }
--    else if (TARGET_OUTLINE_ATOMICS)
--      {
--        const atomic_ool_names *names;
--	switch ()
--	  {
--	  case MINUS:
--	    operands[1] = expand_simple_unop (mode, NEG, operands[1],
--					      NULL, 1);
--	    /* fallthru */
--	  case PLUS:
--	    names = &aarch64_ool_ldadd_names;
--	    break;
--	  case IOR:
--	    names = &aarch64_ool_ldset_names;
--	    break;
--	  case XOR:
--	    names = &aarch64_ool_ldeor_names;
--	    break;
--	  case AND:
--	    operands[1] = expand_simple_unop (mode, NOT, operands[1],
--					      NULL, 1);
--	    names = &aarch64_ool_ldclr_names;
--	    break;
--	  default:
--	    gcc_unreachable ();
--	  }
--        machine_mode mode = mode;
--	rtx func = aarch64_atomic_ool_func (mode, operands[2], names);
--	emit_library_call_value (func, NULL_RTX, LCT_NORMAL, mode,
--				 operands[1], mode,
--				 XEXP (operands[0], 0), Pmode);
--        DONE;
--      }
-     else
-       gen = gen_aarch64_atomic_;
- 
-@@ -449,40 +405,6 @@
- 	}
-       operands[2] = force_reg (mode, operands[2]);
-     }
--  else if (TARGET_OUTLINE_ATOMICS)
--    {
--      const atomic_ool_names *names;
--      switch ()
--	{
--	case MINUS:
--	  operands[2] = expand_simple_unop (mode, NEG, operands[2],
--					    NULL, 1);
--	  /* fallthru */
--	case PLUS:
--	  names = &aarch64_ool_ldadd_names;
--	  break;
--	case IOR:
--	  names = &aarch64_ool_ldset_names;
--	  break;
--	case XOR:
--	  names = &aarch64_ool_ldeor_names;
--	  break;
--	case AND:
--	  operands[2] = expand_simple_unop (mode, NOT, operands[2],
--					    NULL, 1);
--	  names = &aarch64_ool_ldclr_names;
--	  break;
--	default:
--	  gcc_unreachable ();
--	}
--      machine_mode mode = mode;
--      rtx func = aarch64_atomic_ool_func (mode, operands[3], names);
--      rtx rval = emit_library_call_value (func, operands[0], LCT_NORMAL, mode,
--					  operands[2], mode,
--					  XEXP (operands[1], 0), Pmode);
--      emit_move_insn (operands[0], rval);
--      DONE;
--    }
-   else
-     gen = gen_aarch64_atomic_fetch_;
- 
-@@ -572,7 +494,7 @@
- {
-   /* Use an atomic load-operate instruction when possible.  In this case
-      we will re-compute the result from the original mem value. */
--  if (TARGET_LSE || TARGET_OUTLINE_ATOMICS)
-+  if (TARGET_LSE)
-     {
-       rtx tmp = gen_reg_rtx (mode);
-       operands[2] = force_reg (mode, operands[2]);
-diff -Nurp a/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c b/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c
---- a/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c	2021-03-11 17:12:34.168000000 +0800
-+++ b/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c	2021-03-11 17:13:30.656000000 +0800
-@@ -1,5 +1,5 @@
- /* { dg-do compile } */
--/* { dg-options "-O2 -march=armv8-a+nolse -mno-outline-atomics" } */
-+/* { dg-options "-O2 -march=armv8-a+nolse" } */
- /* { dg-skip-if "" { *-*-* } { "-mcpu=*" } { "" } } */
- 
- int
-diff -Nurp a/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c b/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c
---- a/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c	2021-03-11 17:12:34.168000000 +0800
-+++ b/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c	2021-03-11 17:13:30.656000000 +0800
-@@ -1,5 +1,5 @@
- /* { dg-do compile } */
--/* { dg-options "-O2 -march=armv8-a+nolse -mno-outline-atomics" } */
-+/* { dg-options "-O2 -march=armv8-a+nolse" } */
- /* { dg-skip-if "" { *-*-* } { "-mcpu=*" } { "" } } */
- 
- int
-diff -Nurp a/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.c b/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.c
---- a/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.c	2021-03-11 17:12:33.988000000 +0800
-+++ b/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.c	2021-03-11 17:13:30.648000000 +0800
-@@ -1,5 +1,5 @@
- /* { dg-do compile } */
--/* { dg-options "-march=armv8-a+nolse -O2 -fno-ipa-icf -mno-outline-atomics" } */
-+/* { dg-options "-march=armv8-a+nolse -O2 -fno-ipa-icf" } */
- 
- #include "atomic-comp-swap-release-acquire.x"
- 
-diff -Nurp a/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.c
---- a/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.c	2021-03-11 17:12:33.988000000 +0800
-+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.c	2021-03-11 17:13:30.648000000 +0800
-@@ -1,5 +1,5 @@
- /* { dg-do compile } */
--/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */
-+/* { dg-options "-march=armv8-a+nolse -O2" } */
- 
- #include "atomic-op-acq_rel.x"
- 
-diff -Nurp a/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.c
---- a/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.c	2021-03-11 17:12:33.988000000 +0800
-+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.c	2021-03-11 17:13:30.648000000 +0800
-@@ -1,5 +1,5 @@
- /* { dg-do compile } */
--/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */
-+/* { dg-options "-march=armv8-a+nolse -O2" } */
- 
- #include "atomic-op-acquire.x"
- 
-diff -Nurp a/gcc/testsuite/gcc.target/aarch64/atomic-op-char.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-char.c
---- a/gcc/testsuite/gcc.target/aarch64/atomic-op-char.c	2021-03-11 17:12:33.992000000 +0800
-+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-char.c	2021-03-11 17:13:30.648000000 +0800
-@@ -1,5 +1,5 @@
- /* { dg-do compile } */
--/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */
-+/* { dg-options "-march=armv8-a+nolse -O2" } */
- 
- #include "atomic-op-char.x"
- 
-diff -Nurp a/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c
---- a/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c	2021-03-11 17:12:33.992000000 +0800
-+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c	2021-03-11 17:13:30.648000000 +0800
-@@ -1,5 +1,5 @@
- /* { dg-do compile } */
--/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */
-+/* { dg-options "-march=armv8-a+nolse -O2" } */
- 
- #include "atomic-op-consume.x"
- 
-diff -Nurp a/gcc/testsuite/gcc.target/aarch64/atomic-op-imm.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-imm.c
---- a/gcc/testsuite/gcc.target/aarch64/atomic-op-imm.c	2021-03-11 17:12:33.992000000 +0800
-+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-imm.c	2021-03-11 17:13:30.648000000 +0800
-@@ -1,5 +1,5 @@
- /* { dg-do compile } */
--/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */
-+/* { dg-options "-march=armv8-a+nolse -O2" } */
- 
- int v = 0;
- 
-diff -Nurp a/gcc/testsuite/gcc.target/aarch64/atomic-op-int.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-int.c
---- a/gcc/testsuite/gcc.target/aarch64/atomic-op-int.c	2021-03-11 17:12:33.992000000 +0800
-+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-int.c	2021-03-11 17:13:30.648000000 +0800
-@@ -1,5 +1,5 @@
- /* { dg-do compile } */
--/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */
-+/* { dg-options "-march=armv8-a+nolse -O2" } */
- 
- #include "atomic-op-int.x"
- 
-diff -Nurp a/gcc/testsuite/gcc.target/aarch64/atomic-op-long.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-long.c
---- a/gcc/testsuite/gcc.target/aarch64/atomic-op-long.c	2021-03-11 17:12:33.992000000 +0800
-+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-long.c	2021-03-11 17:13:30.648000000 +0800
-@@ -1,5 +1,5 @@
- /* { dg-do compile } */
--/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */
-+/* { dg-options "-march=armv8-a+nolse -O2" } */
- 
- long v = 0;
- 
-diff -Nurp a/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.c
---- a/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.c	2021-03-11 17:12:33.992000000 +0800
-+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.c	2021-03-11 17:13:30.648000000 +0800
-@@ -1,5 +1,5 @@
- /* { dg-do compile } */
--/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */
-+/* { dg-options "-march=armv8-a+nolse -O2" } */
- 
- #include "atomic-op-relaxed.x"
- 
-diff -Nurp a/gcc/testsuite/gcc.target/aarch64/atomic-op-release.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-release.c
---- a/gcc/testsuite/gcc.target/aarch64/atomic-op-release.c	2021-03-11 17:12:34.012000000 +0800
-+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-release.c	2021-03-11 17:13:30.648000000 +0800
-@@ -1,5 +1,5 @@
- /* { dg-do compile } */
--/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */
-+/* { dg-options "-march=armv8-a+nolse -O2" } */
- 
- #include "atomic-op-release.x"
- 
-diff -Nurp a/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.c
---- a/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.c	2021-03-11 17:12:34.012000000 +0800
-+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.c	2021-03-11 17:13:30.648000000 +0800
-@@ -1,5 +1,5 @@
- /* { dg-do compile } */
--/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */
-+/* { dg-options "-march=armv8-a+nolse -O2" } */
- 
- #include "atomic-op-seq_cst.x"
- 
-diff -Nurp a/gcc/testsuite/gcc.target/aarch64/atomic-op-short.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-short.c
---- a/gcc/testsuite/gcc.target/aarch64/atomic-op-short.c	2021-03-11 17:12:34.168000000 +0800
-+++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-short.c	2021-03-11 17:13:30.652000000 +0800
-@@ -1,5 +1,5 @@
- /* { dg-do compile } */
--/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */
-+/* { dg-options "-march=armv8-a+nolse -O2" } */
- 
- #include "atomic-op-short.x"
- 
-diff -Nurp a/gcc/testsuite/gcc.target/aarch64/sync-comp-swap.c b/gcc/testsuite/gcc.target/aarch64/sync-comp-swap.c
---- a/gcc/testsuite/gcc.target/aarch64/sync-comp-swap.c	2021-03-11 17:12:34.168000000 +0800
-+++ b/gcc/testsuite/gcc.target/aarch64/sync-comp-swap.c	2021-03-11 17:13:30.656000000 +0800
-@@ -1,5 +1,5 @@
- /* { dg-do compile } */
--/* { dg-options "-march=armv8-a+nolse -O2 -fno-ipa-icf -mno-outline-atomics" } */
-+/* { dg-options "-march=armv8-a+nolse -O2 -fno-ipa-icf" } */
- 
- #include "sync-comp-swap.x"
- 
-diff -Nurp a/gcc/testsuite/gcc.target/aarch64/sync-op-acquire.c b/gcc/testsuite/gcc.target/aarch64/sync-op-acquire.c
---- a/gcc/testsuite/gcc.target/aarch64/sync-op-acquire.c	2021-03-11 17:12:34.168000000 +0800
-+++ b/gcc/testsuite/gcc.target/aarch64/sync-op-acquire.c	2021-03-11 17:13:30.656000000 +0800
-@@ -1,5 +1,5 @@
- /* { dg-do compile } */
--/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */
-+/* { dg-options "-march=armv8-a+nolse -O2" } */
- 
- #include "sync-op-acquire.x"
- 
-diff -Nurp a/gcc/testsuite/gcc.target/aarch64/sync-op-full.c b/gcc/testsuite/gcc.target/aarch64/sync-op-full.c
---- a/gcc/testsuite/gcc.target/aarch64/sync-op-full.c	2021-03-11 17:12:34.168000000 +0800
-+++ b/gcc/testsuite/gcc.target/aarch64/sync-op-full.c	2021-03-11 17:13:30.656000000 +0800
-@@ -1,5 +1,5 @@
- /* { dg-do compile } */
--/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */
-+/* { dg-options "-march=armv8-a+nolse -O2" } */
- 
- #include "sync-op-full.x"
- 
diff --git a/revise-type-before-build-MULT.patch b/revise-type-before-build-MULT.patch
deleted file mode 100644
index ddcb05e64297b67c78d9376b6e97d318353487a3..0000000000000000000000000000000000000000
--- a/revise-type-before-build-MULT.patch
+++ /dev/null
@@ -1,80 +0,0 @@
-diff -uprN a/gcc/testsuite/gcc.dg/affine-add-1.c b/gcc/testsuite/gcc.dg/affine-add-1.c
---- a/gcc/testsuite/gcc.dg/affine-add-1.c	1970-01-01 08:00:00.000000000 +0800
-+++ b/gcc/testsuite/gcc.dg/affine-add-1.c	2021-03-18 19:41:21.308000000 +0800
-@@ -0,0 +1,16 @@
-+/* { dg-do compile } */
-+/* { dg-options "-O2" } */
-+
-+extern unsigned char a[][13][23][15][11];
-+short b;
-+int c, d;
-+void e(int f, int g[][3][4][3]) {
-+  for (char h = 0;; h = 2)
-+    for (; f;)
-+      for (short i;; i = d)
-+        for (char j; j; j = c)
-+          for (char k = 0; k < 4; k = g[h][b][i][j])
-+            a[h][b][i][j][k] = 0;
-+}
-+unsigned char a[3][13][23][15][11];
-+int main() {}
-diff -uprN a/gcc/testsuite/g++.dg/affine-add-1.C b/gcc/testsuite/g++.dg/affine-add-1.C
---- a/gcc/testsuite/g++.dg/affine-add-1.C	1970-01-01 08:00:00.000000000 +0800
-+++ b/gcc/testsuite/g++.dg/affine-add-1.C	2021-03-18 19:40:28.432000000 +0800
-@@ -0,0 +1,33 @@
-+/* { dg-do compile } */
-+/* { dg-options "-O3" } */
-+
-+#include 
-+
-+extern int a[];
-+extern unsigned char b[][151800];
-+extern long long c[][20][23][22][11];
-+char d, e;
-+int f;
-+unsigned g;
-+long h;
-+void i(unsigned long long s, unsigned short j) {
-+  for (char k = 0; k < 12; k += 3)
-+    for (short l = 0; l < 9; l = std::min(j, (unsigned short)4050683)) {
-+      for (bool m(h); m < bool(~0); m = 1)
-+        for (int t = 0; t < 4; t = std::min(s, (unsigned long long)40808803))
-+          for (int n = 0; n < 9; n += e)
-+            a[n] = 0;
-+      for (char o = 0; o < g; o = 4)
-+        for (bool p; p < f; p = d) {
-+          for (long q(s); q < 4ULL; q += 1ULL)
-+            b[k][o + q] = 0;
-+          for (int r = 0; r < 11; r += ~0 || 0)
-+            c[k][l][o][d][r] = 0;
-+        }
-+    }
-+}
-+int a[0];
-+unsigned char b[3][151800];
-+long long c[3][20][23][22][11];
-+int main() {}
-+
-diff -uprN a/gcc/tree-affine.c b/gcc/tree-affine.c
---- a/gcc/tree-affine.c	2021-03-15 18:55:31.928000000 +0800
-+++ b/gcc/tree-affine.c	2021-03-18 16:34:05.932000000 +0800
-@@ -184,9 +184,16 @@ aff_combination_add_elt (aff_tree *comb,
-   if (scale == 1)
-     elt = fold_convert (type, elt);
-   else
--    elt = fold_build2 (MULT_EXPR, type,
--		       fold_convert (type, elt),
--		       wide_int_to_tree (type, scale));
-+    {
-+      if (POINTER_TYPE_P (TREE_TYPE (elt)))
-+	{
-+	  elt = copy_node (elt);
-+	  TREE_TYPE (elt) = sizetype;
-+	}
-+      elt = fold_build2 (MULT_EXPR, type,
-+			 fold_convert (type, elt),
-+			 wide_int_to_tree (type, scale));
-+    }
- 
-   if (comb->rest)
-     comb->rest = fold_build2 (PLUS_EXPR, type, comb->rest,
diff --git a/sccvn-Improve-handling-of-load-masked-with-integer.patch b/sccvn-Improve-handling-of-load-masked-with-integer.patch
deleted file mode 100644
index f24d0c35a3c8ac5c2d119c0ca01e08a251c2cd6a..0000000000000000000000000000000000000000
--- a/sccvn-Improve-handling-of-load-masked-with-integer.patch
+++ /dev/null
@@ -1,2397 +0,0 @@
-This backport contains 14 patch from gcc main stream tree.
-The commit id of these patchs list as following in the order of time.
-
-c2851dc2896bfc0d27b32c90cafc873f67cd6727
-0001-tree-ssa-sccvn.c-struct-vn_walk_cb_data-Add-orig_ref.patch
-
-69b5279e977593d656906288316ee03a8bf79c6a
-0001-gimple-parser.c-c_parser_gimple_postfix_expression-H.patch
-
-8389386c6d55d57afc3ae01f71546ac4468f7926
-0001-gimple-parser.c-c_parser_gimple_postfix_expression-S.patch
-
-d1f2e4c1027b826cf3ba353e86c37589f63f8efe
-0001-tree-ssa-sccvn.c-vn_walk_cb_data-push_partial_def-Re.patch
-
-62e3e66f130fc280eac0bbb6b69e9adca328c03b
-0001-re-PR-tree-optimization-83518-Missing-optimization-u.patch
-
-10f30ac9cda947d117e50f0cbd4cf94ee70a944f
-0001-re-PR-tree-optimization-91756-g-.dg-lto-alias-3-FAIL.patch
-
-1284e2b104a81ad93daab5110cd844981e501086
-0001-re-PR-tree-optimization-90883-Generated-code-is-wors.patch
-
-fb08a53b2eb01cc06d66f479c865aca55c91fd26
-0001-tree-ssa-sccvn.c-vn_walk_cb_data-push_partial_def-Ba.patch
-
-0849cdae714ddf056a4944f31eef53a465f1bcd0
-0001-tree-ssa-sccvn.c-vn_walk_cb_data-push_partial_def-Ha.patch
-
-5f0653a8b75a5ad5a5405a27dd92d3a5759eed4c
-0001-tree-optimization-91123-restore-redundant-store-remo.patch
-
-8aba425f4ebc5e2c054776d3cdddf13f7c1918f8
-0001-sccvn-Handle-bitfields-in-vn_reference_lookup_3-PR93.patch
-
-7f5617b00445dcc861a498a4cecc8aaa59e05b8c
-0001-sccvn-Handle-bitfields-in-push_partial_def-PR93582.patch
-
-5f9cd512c4278621435cce486dd00248ea2e821c
-0001-sccvn-Handle-non-byte-aligned-offset-or-size-for-mem.patch
-
-b07e4e7c7520ca3e798f514dec0711eea2c027be
-0001-sccvn-Improve-handling-of-load-masked-with-integer-c.patch
-
-diff -urpN a/gcc/c/gimple-parser.c b/gcc/c/gimple-parser.c
---- a/gcc/c/gimple-parser.c	2020-11-26 22:26:34.848000000 -0500
-+++ b/gcc/c/gimple-parser.c	2020-11-26 22:06:08.032000000 -0500
-@@ -1320,17 +1320,24 @@ c_parser_gimple_postfix_expression (gimp
- 		}
- 	      else
- 		{
--		  bool neg_p;
-+		  bool neg_p, addr_p;
- 		  if ((neg_p = c_parser_next_token_is (parser, CPP_MINUS)))
- 		    c_parser_consume_token (parser);
-+		  if ((addr_p = c_parser_next_token_is (parser, CPP_AND)))
-+		    c_parser_consume_token (parser);
- 		  tree val = c_parser_gimple_postfix_expression (parser).value;
- 		  if (! val
- 		      || val == error_mark_node
--		      || ! CONSTANT_CLASS_P (val))
-+		      || (!CONSTANT_CLASS_P (val)
-+			  && !(addr_p
-+			       && (TREE_CODE (val) == STRING_CST
-+				   || DECL_P (val)))))
- 		    {
- 		      c_parser_error (parser, "invalid _Literal");
- 		      return expr;
- 		    }
-+		  if (addr_p)
-+		    val = build1 (ADDR_EXPR, type, val);
- 		  if (neg_p)
- 		    {
- 		      val = const_unop (NEGATE_EXPR, TREE_TYPE (val), val);
-diff -urpN a/gcc/fold-const.c b/gcc/fold-const.c
---- a/gcc/fold-const.c	2020-11-26 22:26:32.816000000 -0500
-+++ b/gcc/fold-const.c	2020-11-26 22:06:08.036000000 -0500
-@@ -7773,6 +7773,70 @@ native_decode_vector_tree (tree type, ve
-   return builder.build ();
- }
- 
-+/* Routines for manipulation of native_encode_expr encoded data if the encoded
-+   or extracted constant positions and/or sizes aren't byte aligned.  */
-+
-+/* Shift left the bytes in PTR of SZ elements by AMNT bits, carrying over the
-+   bits between adjacent elements.  AMNT should be within
-+   [0, BITS_PER_UNIT).
-+   Example, AMNT = 2:
-+   00011111|11100000 << 2 = 01111111|10000000
-+   PTR[1]  | PTR[0]	     PTR[1]  | PTR[0].  */
-+
-+void
-+shift_bytes_in_array_left (unsigned char *ptr, unsigned int sz,
-+			   unsigned int amnt)
-+{
-+  if (amnt == 0)
-+    return;
-+
-+  unsigned char carry_over = 0U;
-+  unsigned char carry_mask = (~0U) << (unsigned char) (BITS_PER_UNIT - amnt);
-+  unsigned char clear_mask = (~0U) << amnt;
-+
-+  for (unsigned int i = 0; i < sz; i++)
-+    {
-+      unsigned prev_carry_over = carry_over;
-+      carry_over = (ptr[i] & carry_mask) >> (BITS_PER_UNIT - amnt);
-+
-+      ptr[i] <<= amnt;
-+      if (i != 0)
-+	{
-+	  ptr[i] &= clear_mask;
-+	  ptr[i] |= prev_carry_over;
-+	}
-+    }
-+}
-+
-+/* Like shift_bytes_in_array_left but for big-endian.
-+   Shift right the bytes in PTR of SZ elements by AMNT bits, carrying over the
-+   bits between adjacent elements.  AMNT should be within
-+   [0, BITS_PER_UNIT).
-+   Example, AMNT = 2:
-+   00011111|11100000 >> 2 = 00000111|11111000
-+   PTR[0]  | PTR[1]	     PTR[0]  | PTR[1].  */
-+
-+void
-+shift_bytes_in_array_right (unsigned char *ptr, unsigned int sz,
-+			    unsigned int amnt)
-+{
-+  if (amnt == 0)
-+    return;
-+
-+  unsigned char carry_over = 0U;
-+  unsigned char carry_mask = ~(~0U << amnt);
-+
-+  for (unsigned int i = 0; i < sz; i++)
-+    {
-+      unsigned prev_carry_over = carry_over;
-+      carry_over = ptr[i] & carry_mask;
-+
-+      carry_over <<= (unsigned char) BITS_PER_UNIT - amnt;
-+      ptr[i] >>= amnt;
-+      ptr[i] |= prev_carry_over;
-+    }
-+}
-+
- /* Try to view-convert VECTOR_CST EXPR to VECTOR_TYPE TYPE by operating
-    directly on the VECTOR_CST encoding, in a way that works for variable-
-    length vectors.  Return the resulting VECTOR_CST on success or null
-diff -urpN a/gcc/fold-const.h b/gcc/fold-const.h
---- a/gcc/fold-const.h	2020-11-26 22:26:32.816000000 -0500
-+++ b/gcc/fold-const.h	2020-11-26 22:06:08.036000000 -0500
-@@ -27,6 +27,10 @@ extern int folding_initializer;
- /* Convert between trees and native memory representation.  */
- extern int native_encode_expr (const_tree, unsigned char *, int, int off = -1);
- extern tree native_interpret_expr (tree, const unsigned char *, int);
-+extern void shift_bytes_in_array_left (unsigned char *, unsigned int,
-+				       unsigned int);
-+extern void shift_bytes_in_array_right (unsigned char *, unsigned int,
-+					unsigned int);
- 
- /* Fold constants as much as possible in an expression.
-    Returns the simplified expression.
-diff -urpN a/gcc/gimple-ssa-store-merging.c b/gcc/gimple-ssa-store-merging.c
---- a/gcc/gimple-ssa-store-merging.c	2020-11-26 22:26:32.860000000 -0500
-+++ b/gcc/gimple-ssa-store-merging.c	2020-11-26 22:06:08.036000000 -0500
-@@ -1464,66 +1464,6 @@ dump_char_array (FILE *fd, unsigned char
-   fprintf (fd, "\n");
- }
- 
--/* Shift left the bytes in PTR of SZ elements by AMNT bits, carrying over the
--   bits between adjacent elements.  AMNT should be within
--   [0, BITS_PER_UNIT).
--   Example, AMNT = 2:
--   00011111|11100000 << 2 = 01111111|10000000
--   PTR[1]  | PTR[0]         PTR[1]  | PTR[0].  */
--
--static void
--shift_bytes_in_array (unsigned char *ptr, unsigned int sz, unsigned int amnt)
--{
--  if (amnt == 0)
--    return;
--
--  unsigned char carry_over = 0U;
--  unsigned char carry_mask = (~0U) << (unsigned char) (BITS_PER_UNIT - amnt);
--  unsigned char clear_mask = (~0U) << amnt;
--
--  for (unsigned int i = 0; i < sz; i++)
--    {
--      unsigned prev_carry_over = carry_over;
--      carry_over = (ptr[i] & carry_mask) >> (BITS_PER_UNIT - amnt);
--
--      ptr[i] <<= amnt;
--      if (i != 0)
--	{
--	  ptr[i] &= clear_mask;
--	  ptr[i] |= prev_carry_over;
--	}
--    }
--}
--
--/* Like shift_bytes_in_array but for big-endian.
--   Shift right the bytes in PTR of SZ elements by AMNT bits, carrying over the
--   bits between adjacent elements.  AMNT should be within
--   [0, BITS_PER_UNIT).
--   Example, AMNT = 2:
--   00011111|11100000 >> 2 = 00000111|11111000
--   PTR[0]  | PTR[1]         PTR[0]  | PTR[1].  */
--
--static void
--shift_bytes_in_array_right (unsigned char *ptr, unsigned int sz,
--			    unsigned int amnt)
--{
--  if (amnt == 0)
--    return;
--
--  unsigned char carry_over = 0U;
--  unsigned char carry_mask = ~(~0U << amnt);
--
--  for (unsigned int i = 0; i < sz; i++)
--    {
--      unsigned prev_carry_over = carry_over;
--      carry_over = ptr[i] & carry_mask;
--
--      carry_over <<= (unsigned char) BITS_PER_UNIT - amnt;
--      ptr[i] >>= amnt;
--      ptr[i] |= prev_carry_over;
--    }
--}
--
- /* Clear out LEN bits starting from bit START in the byte array
-    PTR.  This clears the bits to the *right* from START.
-    START must be within [0, BITS_PER_UNIT) and counts starting from
-@@ -1749,7 +1689,7 @@ encode_tree_to_bitpos (tree expr, unsign
-   /* Create the shifted version of EXPR.  */
-   if (!BYTES_BIG_ENDIAN)
-     {
--      shift_bytes_in_array (tmpbuf, byte_size, shift_amnt);
-+      shift_bytes_in_array_left (tmpbuf, byte_size, shift_amnt);
-       if (shift_amnt == 0)
- 	byte_size--;
-     }
-@@ -4667,11 +4607,11 @@ verify_array_eq (unsigned char *x, unsig
-     }
- }
- 
--/* Test shift_bytes_in_array and that it carries bits across between
-+/* Test shift_bytes_in_array_left and that it carries bits across between
-    bytes correctly.  */
- 
- static void
--verify_shift_bytes_in_array (void)
-+verify_shift_bytes_in_array_left (void)
- {
-    /* byte 1   | byte 0
-       00011111 | 11100000.  */
-@@ -4680,13 +4620,13 @@ verify_shift_bytes_in_array (void)
-   memcpy (in, orig, sizeof orig);
- 
-   unsigned char expected[2] = { 0x80, 0x7f };
--  shift_bytes_in_array (in, sizeof (in), 2);
-+  shift_bytes_in_array_left (in, sizeof (in), 2);
-   verify_array_eq (in, expected, sizeof (in));
- 
-   memcpy (in, orig, sizeof orig);
-   memcpy (expected, orig, sizeof orig);
-   /* Check that shifting by zero doesn't change anything.  */
--  shift_bytes_in_array (in, sizeof (in), 0);
-+  shift_bytes_in_array_left (in, sizeof (in), 0);
-   verify_array_eq (in, expected, sizeof (in));
- 
- }
-@@ -4771,7 +4711,7 @@ verify_clear_bit_region_be (void)
- void
- store_merging_c_tests (void)
- {
--  verify_shift_bytes_in_array ();
-+  verify_shift_bytes_in_array_left ();
-   verify_shift_bytes_in_array_right ();
-   verify_clear_bit_region ();
-   verify_clear_bit_region_be ();
-diff -urpN a/gcc/testsuite/gcc.c-torture/execute/pr93582.c b/gcc/testsuite/gcc.c-torture/execute/pr93582.c
---- a/gcc/testsuite/gcc.c-torture/execute/pr93582.c	1969-12-31 19:00:00.000000000 -0500
-+++ b/gcc/testsuite/gcc.c-torture/execute/pr93582.c	2020-11-26 22:25:43.532000000 -0500
-@@ -0,0 +1,22 @@
-+/* PR tree-optimization/93582 */
-+
-+short a;
-+int b, c;
-+
-+__attribute__((noipa)) void
-+foo (void)
-+{
-+  b = c;
-+  a &= 7;
-+}
-+
-+int
-+main ()
-+{
-+  c = 27;
-+  a = 14;
-+  foo ();
-+  if (b != 27 || a != 6)
-+    __builtin_abort ();
-+  return 0;
-+}
-diff -urpN a/gcc/testsuite/gcc.dg/gimplefe-42.c b/gcc/testsuite/gcc.dg/gimplefe-42.c
---- a/gcc/testsuite/gcc.dg/gimplefe-42.c	1969-12-31 19:00:00.000000000 -0500
-+++ b/gcc/testsuite/gcc.dg/gimplefe-42.c	2020-11-26 22:06:08.036000000 -0500
-@@ -0,0 +1,18 @@
-+/* { dg-do compile } */
-+/* { dg-options "-fgimple" } */
-+
-+typedef char ref_all_char __attribute__((may_alias));
-+char a[7];
-+__GIMPLE void f()
-+{
-+  int _1;
-+  /* string literals inside __MEM need their address taken.  */
-+  __MEM  ((ref_all_char *)&a)
-+    = __MEM  (_Literal (char *) &"654321");
-+  /* but plain assignment also works.  */
-+  __MEM  ((ref_all_char *)&a) = "654321";
-+  /* also punning with int.  */
-+  _1 = __MEM  (_Literal (char *) &"654321");
-+  __MEM  ((ref_all_char *)&a) = _1;
-+  return;
-+}
-diff -urpN a/gcc/testsuite/gcc.dg/pr93582.c b/gcc/testsuite/gcc.dg/pr93582.c
---- a/gcc/testsuite/gcc.dg/pr93582.c	1969-12-31 19:00:00.000000000 -0500
-+++ b/gcc/testsuite/gcc.dg/pr93582.c	2020-11-26 22:26:15.784000000 -0500
-@@ -0,0 +1,57 @@
-+/* PR tree-optimization/93582 */
-+/* { dg-do compile } */
-+/* { dg-options "-O2 -Warray-bounds" } */
-+
-+struct S {
-+  unsigned int s1:1;
-+  unsigned int s2:1;
-+  unsigned int s3:1;
-+  unsigned int s4:1;
-+  unsigned int s5:4;
-+  unsigned char s6;
-+  unsigned short s7;
-+  unsigned short s8;
-+};
-+struct T {
-+  int t1;
-+  int t2;
-+};
-+
-+static inline int
-+bar (struct S *x)
-+{
-+  if (x->s4)
-+    return ((struct T *)(x + 1))->t1 + ((struct T *)(x + 1))->t2;	/* { dg-bogus "array subscript 1 is outside array bounds of" } */
-+  else
-+    return 0;
-+}
-+
-+int
-+foo (int x, int y)
-+{
-+  struct S s;								/* { dg-bogus "while referencing" } */
-+  s.s6 = x;
-+  s.s7 = y & 0x1FFF;
-+  s.s4 = 0;
-+  return bar (&s);
-+}
-+
-+static inline int
-+qux (struct S *x)
-+{
-+  int s4 = x->s4;
-+  if (s4)
-+    return ((struct T *)(x + 1))->t1 + ((struct T *)(x + 1))->t2;
-+  else
-+    return 0;
-+}
-+
-+int
-+baz (int x, int y)
-+{
-+  struct S s;
-+  s.s6 = x;
-+  s.s7 = y & 0x1FFF;
-+  s.s4 = 0;
-+  return qux (&s);
-+}
-diff -urpN a/gcc/testsuite/gcc.dg/torture/ssa-fre-5.c b/gcc/testsuite/gcc.dg/torture/ssa-fre-5.c
---- a/gcc/testsuite/gcc.dg/torture/ssa-fre-5.c	1969-12-31 19:00:00.000000000 -0500
-+++ b/gcc/testsuite/gcc.dg/torture/ssa-fre-5.c	2020-11-26 22:06:08.036000000 -0500
-@@ -0,0 +1,27 @@
-+/* { dg-do compile } */
-+/* { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-+/* { dg-additional-options "-fgimple -fdump-tree-fre1" } */
-+
-+typedef int v4si __attribute__((vector_size(16)));
-+
-+int __GIMPLE (ssa,startwith("fre"))
-+foo ()
-+{
-+  int * p;
-+  int i;
-+  int x[4];
-+  long unsigned int _1;
-+  long unsigned int _2;
-+  int _7;
-+
-+  __BB(2):
-+  i_3 = 0;
-+  _1 = (long unsigned int) i_3;
-+  _2 = _1 * 4ul;
-+  p_4 = _Literal (int *) &x + _2;
-+  __MEM  ((v4si *)p_4) = _Literal (v4si) { 1, 2, 3, 4 };
-+  _7 = x[0];
-+  return _7;
-+}
-+
-+/* { dg-final { scan-tree-dump "return 1;" "fre1" } } */
-diff -urpN a/gcc/testsuite/gcc.dg/torture/ssa-fre-6.c b/gcc/testsuite/gcc.dg/torture/ssa-fre-6.c
---- a/gcc/testsuite/gcc.dg/torture/ssa-fre-6.c	1969-12-31 19:00:00.000000000 -0500
-+++ b/gcc/testsuite/gcc.dg/torture/ssa-fre-6.c	2020-11-26 22:06:08.036000000 -0500
-@@ -0,0 +1,27 @@
-+/* { dg-do compile } */
-+/* { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-+/* { dg-additional-options "-fgimple -fdump-tree-fre1" } */
-+
-+typedef int v4si __attribute__((vector_size(16)));
-+
-+int __GIMPLE (ssa,startwith("fre"))
-+foo ()
-+{
-+  int * p;
-+  int i;
-+  int x[4];
-+  long unsigned int _1;
-+  long unsigned int _2;
-+  int _7;
-+
-+  __BB(2):
-+  i_3 = 0;
-+  _1 = (long unsigned int) i_3;
-+  _2 = _1 * 4ul;
-+  p_4 = _Literal (int *) &x + _2;
-+  __MEM  ((v4si *)p_4) = _Literal (v4si) {};
-+  _7 = x[0];
-+  return _7;
-+}
-+
-+/* { dg-final { scan-tree-dump "return 0;" "fre1" } } */
-diff -urpN a/gcc/testsuite/gcc.dg/torture/ssa-fre-7.c b/gcc/testsuite/gcc.dg/torture/ssa-fre-7.c
---- a/gcc/testsuite/gcc.dg/torture/ssa-fre-7.c	1969-12-31 19:00:00.000000000 -0500
-+++ b/gcc/testsuite/gcc.dg/torture/ssa-fre-7.c	2020-11-26 22:06:08.036000000 -0500
-@@ -0,0 +1,29 @@
-+/* { dg-do compile } */
-+/* { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
-+/* { dg-additional-options "-fgimple -fdump-tree-fre1" } */
-+
-+typedef int v4si __attribute__((vector_size(16)));
-+
-+int __GIMPLE (ssa,startwith("fre"))
-+foo (int c)
-+{
-+  int * p;
-+  int i;
-+  int x[4];
-+  long unsigned int _1;
-+  long unsigned int _2;
-+  int _7;
-+  v4si _6;
-+
-+  __BB(2):
-+  i_3 = 0;
-+  _1 = (long unsigned int) i_3;
-+  _2 = _1 * 4ul;
-+  p_4 = _Literal (int *) &x + _2;
-+  _6 = _Literal (v4si) { c_5(D), c_5(D), c_5(D), c_5(D) };
-+  __MEM  ((v4si *)p_4) = _6;
-+  _7 = x[0];
-+  return _7;
-+}
-+
-+/* { dg-final { scan-tree-dump "return c_5\\(D\\);" "fre1" } } */
-diff -urpN a/gcc/testsuite/gcc.dg/tree-ssa/alias-access-path-1.c b/gcc/testsuite/gcc.dg/tree-ssa/alias-access-path-1.c
---- a/gcc/testsuite/gcc.dg/tree-ssa/alias-access-path-1.c	2020-11-26 22:26:34.324000000 -0500
-+++ b/gcc/testsuite/gcc.dg/tree-ssa/alias-access-path-1.c	2020-11-26 22:06:08.036000000 -0500
-@@ -1,5 +1,5 @@
- /* { dg-do compile } */
--/* { dg-options "-O2 -fdump-tree-fre3" } */
-+/* { dg-options "-O2 -fdump-tree-fre1" } */
- struct foo
- {
-   int val;
-@@ -18,4 +18,4 @@ test ()
-   return barptr->val2;
- }
- 
--/* { dg-final { scan-tree-dump-times "return 123" 1 "fre3"} } */
-+/* { dg-final { scan-tree-dump-times "return 123" 1 "fre1"} } */
-diff -urpN a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-10.c b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-10.c
---- a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-10.c	1969-12-31 19:00:00.000000000 -0500
-+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-10.c	2020-11-26 22:24:45.812000000 -0500
-@@ -0,0 +1,29 @@
-+/* PR tree-optimization/93582 */
-+/* { dg-do compile { target int32 } } */
-+/* { dg-options "-O2 -fdump-tree-fre1" } */
-+/* { dg-final { scan-tree-dump "return 72876566;" "fre1" { target le } } } */
-+/* { dg-final { scan-tree-dump "return 559957376;" "fre1" { target be } } } */
-+
-+union U {
-+  struct S { int a : 12, b : 5, c : 10, d : 5; } s;
-+  unsigned int i;
-+};
-+struct A { char a[12]; union U u; };
-+void bar (struct A *);
-+
-+unsigned
-+foo (void)
-+{
-+  struct A a;
-+  bar (&a);
-+  a.u.s.a = 1590;
-+  a.u.s.c = -404;
-+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-+#define M 0x67e0a5f
-+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-+#define M 0xa5f067e0
-+#else
-+#define M 0
-+#endif
-+  return a.u.i & M;
-+}
-diff -urpN a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-1.c
---- a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-1.c	1969-12-31 19:00:00.000000000 -0500
-+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-1.c	2020-11-26 22:18:39.368000000 -0500
-@@ -0,0 +1,18 @@
-+/* PR tree-optimization/93582 */
-+/* { dg-do compile { target int32 } } */
-+/* { dg-options "-O2 -fdump-tree-fre1" } */
-+/* { dg-final { scan-tree-dump "return 1;" "fre1" } } */
-+
-+union U {
-+  struct S { int a : 1, b : 4, c : 27; } s;
-+  struct T { int d : 2; int e : 2; int f : 28; } t;
-+};
-+
-+int
-+foo (void)
-+{
-+  union U u;
-+  u.s.b = 10;
-+  return u.t.e;
-+}
-+
-diff -urpN a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-2.c
---- a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-2.c	1969-12-31 19:00:00.000000000 -0500
-+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-2.c	2020-11-26 22:18:44.832000000 -0500
-@@ -0,0 +1,17 @@
-+/* PR tree-optimization/93582 */
-+/* { dg-do compile { target int32 } } */
-+/* { dg-options "-O2 -fdump-tree-fre1" } */
-+/* { dg-final { scan-tree-dump "return 593;" "fre1" } } */
-+
-+union U {
-+  struct S { int a : 1, b : 14, c : 17; } s;
-+  struct T { int d : 2; int e : 12; int f : 18; } t;
-+};
-+
-+int
-+foo (void)
-+{
-+  union U u;
-+  u.s.b = -7005;
-+  return u.t.e;
-+}
-diff -urpN a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-3.c b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-3.c
---- a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-3.c	1969-12-31 19:00:00.000000000 -0500
-+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-3.c	2020-11-26 22:21:44.936000000 -0500
-@@ -0,0 +1,19 @@
-+/* PR tree-optimization/93582 */
-+/* { dg-do compile { target int32 } } */
-+/* { dg-options "-O2 -fdump-tree-fre1" } */
-+/* { dg-final { scan-tree-dump "return 1;" "fre1" { target be } } } */
-+/* { dg-final { scan-tree-dump "return 2;" "fre1" { target le } } } */
-+
-+union U {
-+  struct S { int a : 1, b : 14, c : 17; } s;
-+  struct T { int d : 10; int e : 4; int f : 18; } t;
-+};
-+
-+int
-+foo (void)
-+{
-+  union U u;
-+  u.s.b = -7005;
-+  return u.t.e;
-+}
-+
-diff -urpN a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-4.c b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-4.c
---- a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-4.c	1969-12-31 19:00:00.000000000 -0500
-+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-4.c	2020-11-26 22:23:33.236000000 -0500
-@@ -0,0 +1,24 @@
-+/* PR tree-optimization/93582 */
-+/* { dg-do compile { target int32 } } */
-+/* { dg-options "-O2 -fdump-tree-fre1" } */
-+/* { dg-final { scan-tree-dump "return -1991560811;" "fre1" { target le } } } */
-+/* { dg-final { scan-tree-dump "return -733324916;" "fre1" { target be } } } */
-+
-+union U {
-+  struct S { int a : 1, b : 4, c : 27; } s;
-+  unsigned int i;
-+};
-+struct A { char a[24]; union U u; };
-+void bar (struct A *);
-+
-+int
-+foo (void)
-+{
-+  struct A a;
-+  bar (&a);
-+  a.u.s.a = -1;
-+  a.u.s.b = -6;
-+  a.u.s.c = -62236276;
-+  return a.u.i;
-+}
-+
-diff -urpN a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-5.c b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-5.c
---- a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-5.c	1969-12-31 19:00:00.000000000 -0500
-+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-5.c	2020-11-26 22:23:38.324000000 -0500
-@@ -0,0 +1,26 @@
-+/* PR tree-optimization/93582 */
-+/* { dg-do compile { target int32 } } */
-+/* { dg-options "-O2 -fdump-tree-fre1" } */
-+/* { dg-final { scan-tree-dump "return -1462729318;" "fre1" { target le } } } */
-+/* { dg-final { scan-tree-dump "return 1300568597;" "fre1" { target be } } } */
-+
-+union U {
-+  struct S { int a : 1, b : 7, c : 8, d : 11, e : 5; } s;
-+  unsigned int i;
-+};
-+struct A { char a[8]; union U u; };
-+void bar (struct A *);
-+
-+int
-+foo (void)
-+{
-+  struct A a;
-+  bar (&a);
-+  a.u.s.a = 0;
-+  a.u.s.b = -51;
-+  a.u.s.c = -123;
-+  a.u.s.d = 208;
-+  a.u.s.e = -11;
-+  return a.u.i;
-+}
-+
-diff -urpN a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-6.c b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-6.c
---- a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-6.c	1969-12-31 19:00:00.000000000 -0500
-+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-6.c	2020-11-26 22:23:42.348000000 -0500
-@@ -0,0 +1,25 @@
-+/* PR tree-optimization/93582 */
-+/* { dg-do compile { target int32 } } */
-+/* { dg-options "-O2 -fdump-tree-fre1" } */
-+/* { dg-final { scan-tree-dump "return 890118;" "fre1" { target le } } } */
-+/* { dg-final { scan-tree-dump "return 447899;" "fre1" { target be } } } */
-+
-+union U {
-+  struct S { int a : 16, b : 5, c : 10, d : 1; } s;
-+  struct T { int a : 8, b : 21, c : 3; } t;
-+};
-+struct A { char a[4]; union U u; };
-+void bar (struct A *);
-+
-+int
-+foo (void)
-+{
-+  struct A a;
-+  bar (&a);
-+  a.u.s.a = 1590;
-+  a.u.s.b = -11;
-+  a.u.s.c = 620;
-+  a.u.s.d = -1;
-+  return a.u.t.b;
-+}
-+
-diff -urpN a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-7.c b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-7.c
---- a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-7.c	1969-12-31 19:00:00.000000000 -0500
-+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-7.c	2020-11-26 22:23:45.756000000 -0500
-@@ -0,0 +1,25 @@
-+/* PR tree-optimization/93582 */
-+/* { dg-do compile { target int32 } } */
-+/* { dg-options "-O2 -fdump-tree-fre1" } */
-+/* { dg-final { scan-tree-dump "return -413012;" "fre1" { target le } } } */
-+/* { dg-final { scan-tree-dump "return -611112;" "fre1" { target be } } } */
-+
-+union U {
-+  struct S { int a : 12, b : 5, c : 10, d : 5; } s;
-+  struct T { int a : 7, b : 21, c : 4; } t;
-+};
-+struct A { char a[48]; union U u; };
-+void bar (struct A *);
-+
-+int
-+foo (void)
-+{
-+  struct A a;
-+  bar (&a);
-+  a.u.s.a = 1590;
-+  a.u.s.b = -11;
-+  a.u.s.c = -404;
-+  a.u.s.d = 7;
-+  return a.u.t.b;
-+}
-+
-diff -urpN a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-8.c b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-8.c
---- a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-8.c	1969-12-31 19:00:00.000000000 -0500
-+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-8.c	2020-11-26 22:23:53.088000000 -0500
-@@ -0,0 +1,15 @@
-+/* PR tree-optimization/93582 */
-+/* { dg-do compile { target int32 } } */
-+/* { dg-options "-O2 -fdump-tree-fre1" } */
-+/* { dg-final { scan-tree-dump "return 0;" "fre1" { target le } } } */
-+/* { dg-final { scan-tree-dump "return -8531;" "fre1" { target be } } } */
-+
-+short
-+foo (void)
-+{
-+  union U { char c[32]; short s[16]; int i[8]; } u;
-+  __builtin_memset (u.c + 1, '\0', 5);
-+  u.s[3] = 0xdead;
-+  return u.i[1];
-+}
-+
-diff -urpN a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-82.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-82.c
---- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-82.c	1969-12-31 19:00:00.000000000 -0500
-+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-82.c	2020-11-26 22:06:08.036000000 -0500
-@@ -0,0 +1,25 @@
-+/* { dg-do run } */
-+/* { dg-options "-O -fdump-tree-fre1-details" } */
-+
-+struct S { _Bool x; };
-+
-+void
-+foo (struct S *s)
-+{
-+  __builtin_memset (s, 1, sizeof (struct S));
-+  s->x = 1;
-+}
-+
-+int
-+main ()
-+{
-+  struct S s;
-+  foo (&s);
-+  char c;
-+  __builtin_memcpy (&c, &s.x, 1);
-+  if (c != 1)
-+    __builtin_abort ();
-+  return 0;
-+}
-+
-+/* { dg-final { scan-tree-dump "Deleted redundant store" "fre1" } } */
-diff -urpN a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-83.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-83.c
---- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-83.c	1969-12-31 19:00:00.000000000 -0500
-+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-83.c	2020-11-26 22:06:08.036000000 -0500
-@@ -0,0 +1,32 @@
-+/* { dg-do compile } */
-+/* { dg-options "-O -fdump-tree-fre1-details" } */
-+
-+struct X
-+{
-+   int a : 1;
-+   int b : 1;
-+} x;
-+
-+void foo (int v)
-+{
-+  x.a = 1;
-+  x.b = v;
-+  x.a = 1;
-+  x.b = v;
-+}
-+
-+struct Y
-+{
-+   _Bool a;
-+   _Bool b;
-+} y;
-+
-+void bar (int v)
-+{
-+  y.a = 1;
-+  y.b = v;
-+  y.a = 1;
-+  y.b = v;
-+}
-+
-+/* { dg-final { scan-tree-dump-times "Deleted redundant store" 4 "fre1" } } */
-diff -urpN a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-84.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-84.c
---- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-84.c	1969-12-31 19:00:00.000000000 -0500
-+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-84.c	2020-11-26 22:06:08.036000000 -0500
-@@ -0,0 +1,19 @@
-+/* { dg-do compile } */
-+/* { dg-options "-O -fdump-tree-fre1" } */
-+
-+typedef int v4si __attribute__((vector_size(16)));
-+
-+void foo (v4si *dst, int x)
-+{
-+  v4si v[2];
-+  v[0][0] = 1;
-+  v[0][1] = x;
-+  v[0][2] = 2;
-+  v[0][3] = 3;
-+  v[0][1] = 0;
-+  *dst = v[0];
-+}
-+
-+/* The shadowed non-constant assign to v[0][1] shouldn't prevent us from
-+   value-numbering the load to a constant.  */
-+/* { dg-final { scan-tree-dump "\\*dst_\[0-9\]*\\\(D\\) = { 1, 0, 2, 3 };" "fre1" } } */
-diff -urpN a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-85.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-85.c
---- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-85.c	1969-12-31 19:00:00.000000000 -0500
-+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-85.c	2020-11-26 22:06:08.036000000 -0500
-@@ -0,0 +1,14 @@
-+/* { dg-do compile } */
-+/* { dg-options "-O -fstrict-aliasing -fdump-tree-fre1-details" } */
-+
-+struct X { int i; int j; };
-+
-+struct X x, y;
-+void foo ()
-+{
-+  x.i = 1;
-+  y = x;
-+  y.i = 1; // redundant
-+}
-+
-+/* { dg-final { scan-tree-dump "Deleted redundant store y.i" "fre1" } } */
-diff -urpN a/gcc/tree-ssa-alias.c b/gcc/tree-ssa-alias.c
---- a/gcc/tree-ssa-alias.c	2020-11-26 22:26:32.884000000 -0500
-+++ b/gcc/tree-ssa-alias.c	2020-11-26 22:06:08.036000000 -0500
-@@ -2628,7 +2628,8 @@ static bool
- maybe_skip_until (gimple *phi, tree &target, basic_block target_bb,
- 		  ao_ref *ref, tree vuse, bool tbaa_p, unsigned int &limit,
- 		  bitmap *visited, bool abort_on_visited,
--		  void *(*translate)(ao_ref *, tree, void *, bool *),
-+		  void *(*translate)(ao_ref *, tree, void *, translate_flags *),
-+		  translate_flags disambiguate_only,
- 		  void *data)
- {
-   basic_block bb = gimple_bb (phi);
-@@ -2663,7 +2664,7 @@ maybe_skip_until (gimple *phi, tree &tar
- 	    return !abort_on_visited;
- 	  vuse = get_continuation_for_phi (def_stmt, ref, tbaa_p, limit,
- 					   visited, abort_on_visited,
--					   translate, data);
-+					   translate, data, disambiguate_only);
- 	  if (!vuse)
- 	    return false;
- 	  continue;
-@@ -2678,9 +2679,9 @@ maybe_skip_until (gimple *phi, tree &tar
- 	  --limit;
- 	  if (stmt_may_clobber_ref_p_1 (def_stmt, ref, tbaa_p))
- 	    {
--	      bool disambiguate_only = true;
-+	      translate_flags tf = disambiguate_only;
- 	      if (translate
--		  && (*translate) (ref, vuse, data, &disambiguate_only) == NULL)
-+		  && (*translate) (ref, vuse, data, &tf) == NULL)
- 		;
- 	      else
- 		return false;
-@@ -2711,8 +2712,10 @@ tree
- get_continuation_for_phi (gimple *phi, ao_ref *ref, bool tbaa_p,
- 			  unsigned int &limit, bitmap *visited,
- 			  bool abort_on_visited,
--			  void *(*translate)(ao_ref *, tree, void *, bool *),
--			  void *data)
-+			  void *(*translate)(ao_ref *, tree, void *,
-+					     translate_flags *),
-+			  void *data,
-+			  translate_flags disambiguate_only)
- {
-   unsigned nargs = gimple_phi_num_args (phi);
- 
-@@ -2754,13 +2757,15 @@ get_continuation_for_phi (gimple *phi, a
-       else if (! maybe_skip_until (phi, arg0, dom, ref, arg1, tbaa_p,
- 				   limit, visited,
- 				   abort_on_visited,
--				   /* Do not translate when walking over
-+				   translate,
-+				   /* Do not valueize when walking over
- 				      backedges.  */
- 				   dominated_by_p
- 				     (CDI_DOMINATORS,
- 				      gimple_bb (SSA_NAME_DEF_STMT (arg1)),
- 				      phi_bb)
--				   ? NULL : translate, data))
-+				   ? TR_DISAMBIGUATE
-+				   : disambiguate_only, data))
- 	return NULL_TREE;
-     }
- 
-@@ -2798,7 +2803,8 @@ get_continuation_for_phi (gimple *phi, a
- void *
- walk_non_aliased_vuses (ao_ref *ref, tree vuse, bool tbaa_p,
- 			void *(*walker)(ao_ref *, tree, void *),
--			void *(*translate)(ao_ref *, tree, void *, bool *),
-+			void *(*translate)(ao_ref *, tree, void *,
-+					   translate_flags *),
- 			tree (*valueize)(tree),
- 			unsigned &limit, void *data)
- {
-@@ -2851,7 +2857,7 @@ walk_non_aliased_vuses (ao_ref *ref, tre
- 	    {
- 	      if (!translate)
- 		break;
--	      bool disambiguate_only = false;
-+	      translate_flags disambiguate_only = TR_TRANSLATE;
- 	      res = (*translate) (ref, vuse, data, &disambiguate_only);
- 	      /* Failed lookup and translation.  */
- 	      if (res == (void *)-1)
-@@ -2863,7 +2869,7 @@ walk_non_aliased_vuses (ao_ref *ref, tre
- 	      else if (res != NULL)
- 		break;
- 	      /* Translation succeeded, continue walking.  */
--	      translated = translated || !disambiguate_only;
-+	      translated = translated || disambiguate_only == TR_TRANSLATE;
- 	    }
- 	  vuse = gimple_vuse (def_stmt);
- 	}
-diff -urpN a/gcc/tree-ssa-alias.h b/gcc/tree-ssa-alias.h
---- a/gcc/tree-ssa-alias.h	2020-11-26 22:26:32.868000000 -0500
-+++ b/gcc/tree-ssa-alias.h	2020-11-26 22:06:08.040000000 -0500
-@@ -131,13 +131,18 @@ extern bool call_may_clobber_ref_p (gcal
- extern bool call_may_clobber_ref_p_1 (gcall *, ao_ref *);
- extern bool stmt_kills_ref_p (gimple *, tree);
- extern bool stmt_kills_ref_p (gimple *, ao_ref *);
-+enum translate_flags
-+  { TR_TRANSLATE, TR_VALUEIZE_AND_DISAMBIGUATE, TR_DISAMBIGUATE };
- extern tree get_continuation_for_phi (gimple *, ao_ref *, bool,
- 				      unsigned int &, bitmap *, bool,
--				      void *(*)(ao_ref *, tree, void *, bool *),
--				      void *);
-+				      void *(*)(ao_ref *, tree, void *,
-+						translate_flags *),
-+				      void *, translate_flags
-+					= TR_VALUEIZE_AND_DISAMBIGUATE);
- extern void *walk_non_aliased_vuses (ao_ref *, tree, bool,
- 				     void *(*)(ao_ref *, tree, void *),
--				     void *(*)(ao_ref *, tree, void *, bool *),
-+				     void *(*)(ao_ref *, tree, void *,
-+					       translate_flags *),
- 				     tree (*)(tree), unsigned &, void *);
- extern int walk_aliased_vdefs (ao_ref *, tree,
- 			       bool (*)(ao_ref *, tree, void *),
-diff -urpN a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c
---- a/gcc/tree-ssa-sccvn.c	2020-11-26 22:26:32.836000000 -0500
-+++ b/gcc/tree-ssa-sccvn.c	2020-11-27 03:17:41.080000000 -0500
-@@ -1684,24 +1684,75 @@ struct pd_data
- 
- struct vn_walk_cb_data
- {
--  vn_walk_cb_data (vn_reference_t vr_, tree *last_vuse_ptr_,
--		   vn_lookup_kind vn_walk_kind_, bool tbaa_p_)
--    : vr (vr_), last_vuse_ptr (last_vuse_ptr_), vn_walk_kind (vn_walk_kind_),
--      tbaa_p (tbaa_p_), known_ranges (NULL)
--   {}
-+  vn_walk_cb_data (vn_reference_t vr_, tree orig_ref_, tree *last_vuse_ptr_,
-+		   vn_lookup_kind vn_walk_kind_, bool tbaa_p_, tree mask_)
-+    : vr (vr_), last_vuse_ptr (last_vuse_ptr_), last_vuse (NULL_TREE),
-+      mask (mask_), masked_result (NULL_TREE), vn_walk_kind (vn_walk_kind_),
-+      tbaa_p (tbaa_p_), saved_operands (vNULL), first_set (-2),
-+      known_ranges (NULL)
-+  {
-+    if (!last_vuse_ptr)
-+      last_vuse_ptr = &last_vuse;
-+    ao_ref_init (&orig_ref, orig_ref_);
-+    if (mask)
-+      {
-+	wide_int w = wi::to_wide (mask);
-+	unsigned int pos = 0, prec = w.get_precision ();
-+	pd_data pd;
-+	pd.rhs = build_constructor (NULL_TREE, NULL);
-+	/* When bitwise and with a constant is done on a memory load,
-+	   we don't really need all the bits to be defined or defined
-+	   to constants, we don't really care what is in the position
-+	   corresponding to 0 bits in the mask.
-+	   So, push the ranges of those 0 bits in the mask as artificial
-+	   zero stores and let the partial def handling code do the
-+	   rest.  */
-+	while (pos < prec)
-+	  {
-+	    int tz = wi::ctz (w);
-+	    if (pos + tz > prec)
-+	      tz = prec - pos;
-+	    if (tz)
-+	      {
-+		if (BYTES_BIG_ENDIAN)
-+		  pd.offset = prec - pos - tz;
-+		else
-+		  pd.offset = pos;
-+		pd.size = tz;
-+		void *r = push_partial_def (pd, 0, prec);
-+		gcc_assert (r == NULL_TREE);
-+	      }
-+	    pos += tz;
-+	    if (pos == prec)
-+	      break;
-+	    w = wi::lrshift (w, tz);
-+	    tz = wi::ctz (wi::bit_not (w));
-+	    if (pos + tz > prec)
-+	      tz = prec - pos;
-+	    pos += tz;
-+	    w = wi::lrshift (w, tz);
-+	  }
-+      }
-+  }
-   ~vn_walk_cb_data ();
--  void *push_partial_def (const pd_data& pd, tree, HOST_WIDE_INT);
-+  void *finish (alias_set_type, tree);
-+  void *push_partial_def (const pd_data& pd, alias_set_type, HOST_WIDE_INT);
- 
-   vn_reference_t vr;
-+  ao_ref orig_ref;
-   tree *last_vuse_ptr;
-+  tree last_vuse;
-+  tree mask;
-+  tree masked_result;
-   vn_lookup_kind vn_walk_kind;
-   bool tbaa_p;
-+  vec saved_operands;
- 
-   /* The VDEFs of partial defs we come along.  */
-   auto_vec partial_defs;
-   /* The first defs range to avoid splay tree setup in most cases.  */
-   pd_range first_range;
--  tree first_vuse;
-+  alias_set_type first_set;
-   splay_tree known_ranges;
-   obstack ranges_obstack;
- };
-@@ -1713,6 +1764,23 @@ vn_walk_cb_data::~vn_walk_cb_data ()
-       splay_tree_delete (known_ranges);
-       obstack_free (&ranges_obstack, NULL);
-     }
-+  saved_operands.release ();
-+}
-+
-+void *
-+vn_walk_cb_data::finish (alias_set_type set, tree val)
-+{
-+  if (first_set != -2)
-+    set = first_set;
-+  if (mask)
-+    {
-+      masked_result = val;
-+      return (void *) -1;
-+    }
-+  vec &operands
-+    = saved_operands.exists () ? saved_operands : vr->operands;
-+  return vn_reference_lookup_or_insert_for_pieces (last_vuse, set,
-+		  vr->type, operands, val);
- }
- 
- /* pd_range splay-tree helpers.  */
-@@ -1742,168 +1810,306 @@ pd_tree_dealloc (void *, void *)
- }
- 
- /* Push PD to the vector of partial definitions returning a
--   value when we are ready to combine things with VUSE and MAXSIZEI,
-+   value when we are ready to combine things with VUSE, SET and MAXSIZEI,
-    NULL when we want to continue looking for partial defs or -1
-    on failure.  */
- 
- void *
--vn_walk_cb_data::push_partial_def (const pd_data &pd, tree vuse,
--				   HOST_WIDE_INT maxsizei)
-+vn_walk_cb_data::push_partial_def (const pd_data &pd,
-+				   alias_set_type set, HOST_WIDE_INT maxsizei)
- {
-+  const HOST_WIDE_INT bufsize = 64;
-+  /* We're using a fixed buffer for encoding so fail early if the object
-+     we want to interpret is bigger.  */
-+  if (maxsizei > bufsize * BITS_PER_UNIT
-+      || CHAR_BIT != 8
-+      || BITS_PER_UNIT != 8
-+      /* Not prepared to handle PDP endian.  */
-+      || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
-+    return (void *)-1;
-+
-+  bool pd_constant_p = (TREE_CODE (pd.rhs) == CONSTRUCTOR
-+			|| CONSTANT_CLASS_P (pd.rhs));
-   if (partial_defs.is_empty ())
-     {
-+      if (!pd_constant_p)
-+	return (void *)-1;
-       partial_defs.safe_push (pd);
-       first_range.offset = pd.offset;
-       first_range.size = pd.size;
--      first_vuse = vuse;
-+      first_set = set;
-       last_vuse_ptr = NULL;
-+      /* Continue looking for partial defs.  */
-+      return NULL;
-+    }
-+
-+  if (!known_ranges)
-+    {
-+      /* ???  Optimize the case where the 2nd partial def completes things.  */
-+      gcc_obstack_init (&ranges_obstack);
-+      known_ranges = splay_tree_new_with_allocator (pd_range_compare, 0, 0,
-+						    pd_tree_alloc,
-+						    pd_tree_dealloc, this);
-+      splay_tree_insert (known_ranges,
-+			 (splay_tree_key)&first_range.offset,
-+			 (splay_tree_value)&first_range);
-+    }
-+
-+  pd_range newr = { pd.offset, pd.size };
-+  splay_tree_node n;
-+  pd_range *r;
-+  /* Lookup the predecessor of offset + 1 and see if we need to merge.  */
-+  HOST_WIDE_INT loffset = newr.offset + 1;
-+  if ((n = splay_tree_predecessor (known_ranges, (splay_tree_key)&loffset))
-+      && ((r = (pd_range *)n->value), true)
-+      && ranges_known_overlap_p (r->offset, r->size + 1,
-+				 newr.offset, newr.size))
-+    {
-+      /* Ignore partial defs already covered.  */
-+      if (known_subrange_p (newr.offset, newr.size, r->offset, r->size))
-+	return NULL;
-+      r->size = MAX (r->offset + r->size, newr.offset + newr.size) - r->offset;
-     }
-   else
-     {
--      if (!known_ranges)
--	{
--	  /* ???  Optimize the case where the second partial def
--	     completes things.  */
--	  gcc_obstack_init (&ranges_obstack);
--	  known_ranges
--	      = splay_tree_new_with_allocator (pd_range_compare, 0, 0,
--					       pd_tree_alloc,
--					       pd_tree_dealloc, this);
--	  splay_tree_insert (known_ranges,
--			     (splay_tree_key)&first_range.offset,
--			     (splay_tree_value)&first_range);
--	}
--      if (known_ranges)
--	{
--	  pd_range newr = { pd.offset, pd.size };
--	  splay_tree_node n;
--	  pd_range *r;
--	  /* Lookup the predecessor of offset + 1 and see if
--	     we need to merge with it.  */
--	  HOST_WIDE_INT loffset = newr.offset + 1;
--	  if ((n = splay_tree_predecessor (known_ranges,
--					   (splay_tree_key)&loffset))
--	      && ((r = (pd_range *)n->value), true)
--	      && ranges_known_overlap_p (r->offset, r->size + 1,
--					 newr.offset, newr.size))
--	    {
--	      /* Ignore partial defs already covered.  */
--	      if (known_subrange_p (newr.offset, newr.size,
--				    r->offset, r->size))
--		return NULL;
--	      r->size = MAX (r->offset + r->size,
--			     newr.offset + newr.size) - r->offset;
--	    }
--	  else
--	    {
--	      /* newr.offset wasn't covered yet, insert the
--		 range.  */
--	      r = XOBNEW (&ranges_obstack, pd_range);
--	      *r = newr;
--	      splay_tree_insert (known_ranges,
--				 (splay_tree_key)&r->offset,
--				 (splay_tree_value)r);
--	    }
--	  /* Merge r which now contains newr and is a member
--	     of the splay tree with adjacent overlapping ranges.  */
--	  pd_range *rafter;
--	  while ((n = splay_tree_successor (known_ranges,
--					    (splay_tree_key)&r->offset))
--		 && ((rafter = (pd_range *)n->value), true)
--		 && ranges_known_overlap_p (r->offset, r->size + 1,
--					    rafter->offset, rafter->size))
--	    {
--	      r->size = MAX (r->offset + r->size,
--			     rafter->offset + rafter->size) - r->offset;
--	      splay_tree_remove (known_ranges,
--				 (splay_tree_key)&rafter->offset);
--	    }
--	  partial_defs.safe_push (pd);
--
--	  /* Now we have merged newr into the range tree.
--	     When we have covered [offseti, sizei] then the
--	     tree will contain exactly one node which has
--	     the desired properties and it will be 'r'.  */
--	  if (known_subrange_p (0, maxsizei / BITS_PER_UNIT,
--				r->offset, r->size))
--	    {
--	      /* Now simply native encode all partial defs
--		 in reverse order.  */
--	      unsigned ndefs = partial_defs.length ();
--	      /* We support up to 512-bit values (for V8DFmode).  */
--	      unsigned char buffer[64];
--	      int len;
-+      /* newr.offset wasn't covered yet, insert the range.  */
-+      r = XOBNEW (&ranges_obstack, pd_range);
-+      *r = newr;
-+      splay_tree_insert (known_ranges, (splay_tree_key)&r->offset,
-+			 (splay_tree_value)r);
-+    }
-+  /* Merge r which now contains newr and is a member of the splay tree with
-+     adjacent overlapping ranges.  */
-+  pd_range *rafter;
-+  while ((n = splay_tree_successor (known_ranges, (splay_tree_key)&r->offset))
-+	 && ((rafter = (pd_range *)n->value), true)
-+	 && ranges_known_overlap_p (r->offset, r->size + 1,
-+				    rafter->offset, rafter->size))
-+    {
-+      r->size = MAX (r->offset + r->size,
-+		     rafter->offset + rafter->size) - r->offset;
-+      splay_tree_remove (known_ranges, (splay_tree_key)&rafter->offset);
-+    }
-+  /* Non-constants are OK as long as they are shadowed by a constant.  */
-+  if (!pd_constant_p)
-+    return (void *)-1;
-+  partial_defs.safe_push (pd);
-+
-+  /* Now we have merged newr into the range tree.  When we have covered
-+     [offseti, sizei] then the tree will contain exactly one node which has
-+     the desired properties and it will be 'r'.  */
-+  if (!known_subrange_p (0, maxsizei, r->offset, r->size))
-+    /* Continue looking for partial defs.  */
-+    return NULL;
- 
--	      while (!partial_defs.is_empty ())
-+  /* Now simply native encode all partial defs in reverse order.  */
-+  unsigned ndefs = partial_defs.length ();
-+  /* We support up to 512-bit values (for V8DFmode).  */
-+  unsigned char buffer[bufsize + 1];
-+  unsigned char this_buffer[bufsize + 1];
-+  int len;
-+
-+  memset (buffer, 0, bufsize + 1);
-+  unsigned needed_len = ROUND_UP (maxsizei, BITS_PER_UNIT) / BITS_PER_UNIT;
-+  while (!partial_defs.is_empty ())
-+    {
-+      pd_data pd = partial_defs.pop ();
-+      unsigned int amnt;
-+      if (TREE_CODE (pd.rhs) == CONSTRUCTOR)
-+	{
-+	  /* Empty CONSTRUCTOR.  */
-+	  if (pd.size >= needed_len * BITS_PER_UNIT)
-+	    len = needed_len;
-+	  else
-+	    len = ROUND_UP (pd.size, BITS_PER_UNIT) / BITS_PER_UNIT;
-+	  memset (this_buffer, 0, len);
-+	}
-+      else
-+ 	{
-+	  len = native_encode_expr (pd.rhs, this_buffer, bufsize,
-+				    MAX (0, -pd.offset) / BITS_PER_UNIT);
-+	  if (len <= 0
-+	      || len < (ROUND_UP (pd.size, BITS_PER_UNIT) / BITS_PER_UNIT
-+			- MAX (0, -pd.offset) / BITS_PER_UNIT))
-+ 	    {
-+	      if (dump_file && (dump_flags & TDF_DETAILS))
-+		fprintf (dump_file, "Failed to encode %u "
-+			 "partial definitions\n", ndefs);
-+	      return (void *)-1;
-+ 	    }
-+	}
-+
-+      unsigned char *p = buffer;
-+      HOST_WIDE_INT size = pd.size;
-+      if (pd.offset < 0)
-+	size -= ROUND_DOWN (-pd.offset, BITS_PER_UNIT);
-+      this_buffer[len] = 0;
-+      if (BYTES_BIG_ENDIAN)
-+	{
-+	  /* LSB of this_buffer[len - 1] byte should be at
-+	     pd.offset + pd.size - 1 bits in buffer.  */
-+	  amnt = ((unsigned HOST_WIDE_INT) pd.offset
-+		  + pd.size) % BITS_PER_UNIT;
-+	  if (amnt)
-+	    shift_bytes_in_array_right (this_buffer, len + 1, amnt);
-+	  unsigned char *q = this_buffer;
-+	  unsigned int off = 0;
-+	  if (pd.offset >= 0)
-+	    {
-+	      unsigned int msk;
-+	      off = pd.offset / BITS_PER_UNIT;
-+	      gcc_assert (off < needed_len);
-+	      p = buffer + off;
-+	      if (size <= amnt)
- 		{
--		  pd_data pd = partial_defs.pop ();
--		  if (TREE_CODE (pd.rhs) == CONSTRUCTOR)
--		    /* Empty CONSTRUCTOR.  */
--		    memset (buffer + MAX (0, pd.offset),
--			    0, MIN ((HOST_WIDE_INT)sizeof (buffer)
--				     - MAX (0, pd.offset),
--				    pd.size + MIN (0, pd.offset)));
--		  else
-+		  msk = ((1 << size) - 1) << (BITS_PER_UNIT - amnt);
-+		  *p = (*p & ~msk) | (this_buffer[len] & msk);
-+		  size = 0;
-+		}
-+	      else
-+		{
-+		  if (TREE_CODE (pd.rhs) != CONSTRUCTOR)
-+		    q = (this_buffer + len
-+			 - (ROUND_UP (size - amnt, BITS_PER_UNIT)
-+			    / BITS_PER_UNIT));
-+		  if (pd.offset % BITS_PER_UNIT)
- 		    {
--		      len = native_encode_expr (pd.rhs,
--						buffer + MAX (0, pd.offset),
--						sizeof (buffer)
--						- MAX (0, pd.offset),
--						MAX (0, -pd.offset));
--		      if (len <= 0
--			  || len < (pd.size - MAX (0, -pd.offset)))
--			{
--			  if (dump_file && (dump_flags & TDF_DETAILS))
--			    fprintf (dump_file, "Failed to encode %u "
--				     "partial definitions\n", ndefs);
--			  return (void *)-1;
--			}
-+		      msk = -1U << (BITS_PER_UNIT
-+				    - (pd.offset % BITS_PER_UNIT));
-+		      *p = (*p & msk) | (*q & ~msk);
-+		      p++;
-+		      q++;
-+		      off++;
-+		      size -= BITS_PER_UNIT - (pd.offset % BITS_PER_UNIT);
-+		      gcc_assert (size >= 0);
- 		    }
- 		}
--
--	      tree type = vr->type;
--	      /* Make sure to interpret in a type that has a range
--		 covering the whole access size.  */
--	      if (INTEGRAL_TYPE_P (vr->type)
--		  && maxsizei != TYPE_PRECISION (vr->type))
--		type = build_nonstandard_integer_type (maxsizei,
--						       TYPE_UNSIGNED (type));
--	      tree val = native_interpret_expr (type, buffer,
--						maxsizei / BITS_PER_UNIT);
--	      /* If we chop off bits because the types precision doesn't
--		 match the memory access size this is ok when optimizing
--		 reads but not when called from the DSE code during
--		 elimination.  */
--	      if (val
--		  && type != vr->type)
-+	    }
-+	  else if (TREE_CODE (pd.rhs) != CONSTRUCTOR)
-+	    {
-+	      q = (this_buffer + len
-+		   - (ROUND_UP (size - amnt, BITS_PER_UNIT)
-+		      / BITS_PER_UNIT));
-+	      if (pd.offset % BITS_PER_UNIT)
- 		{
--		  if (! int_fits_type_p (val, vr->type))
--		    val = NULL_TREE;
--		  else
--		    val = fold_convert (vr->type, val);
-+		  q++;
-+		  size -= BITS_PER_UNIT - ((unsigned HOST_WIDE_INT) pd.offset
-+					   % BITS_PER_UNIT);
-+		  gcc_assert (size >= 0);
- 		}
--
--	      if (val)
-+	    }
-+	  if ((unsigned HOST_WIDE_INT) size / BITS_PER_UNIT + off
-+	      > needed_len)
-+	    size = (needed_len - off) * BITS_PER_UNIT;
-+	  memcpy (p, q, size / BITS_PER_UNIT);
-+	  if (size % BITS_PER_UNIT)
-+	    {
-+	      unsigned int msk
-+		= -1U << (BITS_PER_UNIT - (size % BITS_PER_UNIT));
-+	      p += size / BITS_PER_UNIT;
-+	      q += size / BITS_PER_UNIT;
-+	      *p = (*q & msk) | (*p & ~msk);
-+	    }
-+	}
-+      else
-+	{
-+	  size = MIN (size, (HOST_WIDE_INT) needed_len * BITS_PER_UNIT);
-+	  if (pd.offset >= 0)
-+	    {
-+	      /* LSB of this_buffer[0] byte should be at pd.offset bits
-+		 in buffer.  */
-+	      unsigned int msk;
-+	      amnt = pd.offset % BITS_PER_UNIT;
-+	      if (amnt)
-+		shift_bytes_in_array_left (this_buffer, len + 1, amnt);
-+	      unsigned int off = pd.offset / BITS_PER_UNIT;
-+	      gcc_assert (off < needed_len);
-+	      p = buffer + off;
-+	      if (amnt + size < BITS_PER_UNIT)
- 		{
--		  if (dump_file && (dump_flags & TDF_DETAILS))
--		    fprintf (dump_file, "Successfully combined %u "
--			     "partial definitions\n", ndefs);
--		  return vn_reference_lookup_or_insert_for_pieces
--		      (first_vuse,
--		       vr->set, vr->type, vr->operands, val);
-+		  /* Low amnt bits come from *p, then size bits
-+		     from this_buffer[0] and the remaining again from
-+		     *p.  */
-+		  msk = ((1 << size) - 1) << amnt;
-+		  *p = (*p & ~msk) | (this_buffer[0] & msk);
-+		  size = 0;
- 		}
--	      else
-+	      else if (amnt)
- 		{
--		  if (dump_file && (dump_flags & TDF_DETAILS))
--		    fprintf (dump_file, "Failed to interpret %u "
--			     "encoded partial definitions\n", ndefs);
--		  return (void *)-1;
-+		  msk = -1U << amnt;
-+		  *p = (*p & ~msk) | (this_buffer[0] & msk);
-+		  p++;
-+		  size -= (BITS_PER_UNIT - amnt);
- 		}
- 	    }
-+	  else
-+	    {
-+	      amnt = (unsigned HOST_WIDE_INT) pd.offset % BITS_PER_UNIT;
-+	      if (amnt)
-+		shift_bytes_in_array_left (this_buffer, len + 1, amnt);
-+	    }
-+	  memcpy (p, this_buffer + (amnt != 0), size / BITS_PER_UNIT);
-+	  p += size / BITS_PER_UNIT;
-+	  if (size % BITS_PER_UNIT)
-+	    {
-+	      unsigned int msk = -1U << (size % BITS_PER_UNIT);
-+	      *p = (this_buffer[(amnt != 0) + size / BITS_PER_UNIT]
-+		    & ~msk) | (*p & msk);
-+	    }
- 	}
-     }
--  /* Continue looking for partial defs.  */
--  return NULL;
-+
-+  tree type = vr->type;
-+  /* Make sure to interpret in a type that has a range covering the whole
-+     access size.  */
-+  if (INTEGRAL_TYPE_P (vr->type) && maxsizei != TYPE_PRECISION (vr->type))
-+    type = build_nonstandard_integer_type (maxsizei, TYPE_UNSIGNED (type));
-+  tree val;
-+  if (BYTES_BIG_ENDIAN)
-+    {
-+      unsigned sz = needed_len;
-+      if (maxsizei % BITS_PER_UNIT)
-+	shift_bytes_in_array_right (buffer, needed_len,
-+				    BITS_PER_UNIT
-+				    - (maxsizei % BITS_PER_UNIT));
-+      if (INTEGRAL_TYPE_P (type))
-+	sz = GET_MODE_SIZE (SCALAR_INT_TYPE_MODE (type));
-+      if (sz > needed_len)
-+	{
-+	  memcpy (this_buffer + (sz - needed_len), buffer, needed_len);
-+	  val = native_interpret_expr (type, this_buffer, sz);
-+	}
-+      else
-+	val = native_interpret_expr (type, buffer, needed_len);
-+    }
-+  else
-+    val = native_interpret_expr (type, buffer, bufsize);
-+  /* If we chop off bits because the types precision doesn't match the memory
-+     access size this is ok when optimizing reads but not when called from
-+     the DSE code during elimination.  */
-+  if (val && type != vr->type)
-+    {
-+      if (! int_fits_type_p (val, vr->type))
-+	val = NULL_TREE;
-+      else
-+	val = fold_convert (vr->type, val);
-+    }
-+  if (val)
-+    {
-+      if (dump_file && (dump_flags & TDF_DETAILS))
-+	fprintf (dump_file,
-+		 "Successfully combined %u partial definitions\n", ndefs);
-+      /* We are using the alias-set of the first store we encounter which
-+	 should be appropriate here.  */
-+      return finish (first_set, val);
-+    }
-+  else
-+    {
-+      if (dump_file && (dump_flags & TDF_DETAILS))
-+	fprintf (dump_file,
-+		 "Failed to interpret %u encoded partial definitions\n", ndefs);
-+      return (void *)-1;
-+     }
- }
- 
- /* Callback for walk_non_aliased_vuses.  Adjusts the vn_reference_t VR_
-@@ -1923,7 +2129,10 @@ vn_reference_lookup_2 (ao_ref *op ATTRIB
-     return NULL;
- 
-   if (data->last_vuse_ptr)
--    *data->last_vuse_ptr = vuse;
-+    {
-+      *data->last_vuse_ptr = vuse;
-+      data->last_vuse = vuse;
-+    }
- 
-   /* Fixup vuse and hash.  */
-   if (vr->vuse)
-@@ -1935,7 +2144,11 @@ vn_reference_lookup_2 (ao_ref *op ATTRIB
-   hash = vr->hashcode;
-   slot = valid_info->references->find_slot_with_hash (vr, hash, NO_INSERT);
-   if (slot)
--    return *slot;
-+    {
-+      if ((*slot)->result && data->saved_operands.exists ())
-+	return data->finish (vr->set, (*slot)->result);
-+      return *slot;
-+    }
- 
-   return NULL;
- }
-@@ -2221,13 +2434,13 @@ adjust_offsets_for_equal_base_address (t
- 
- static void *
- vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
--		       bool *disambiguate_only)
-+		       translate_flags *disambiguate_only)
- {
-   vn_walk_cb_data *data = (vn_walk_cb_data *)data_;
-   vn_reference_t vr = data->vr;
-   gimple *def_stmt = SSA_NAME_DEF_STMT (vuse);
-   tree base = ao_ref_base (ref);
--  HOST_WIDE_INT offseti, maxsizei;
-+  HOST_WIDE_INT offseti = 0, maxsizei, sizei = 0;
-   static vec lhs_ops;
-   ao_ref lhs_ref;
-   bool lhs_ref_ok = false;
-@@ -2242,8 +2455,11 @@ vn_reference_lookup_3 (ao_ref *ref, tree
-       lhs_ops.truncate (0);
-       basic_block saved_rpo_bb = vn_context_bb;
-       vn_context_bb = gimple_bb (def_stmt);
--      copy_reference_ops_from_ref (lhs, &lhs_ops);
--      lhs_ops = valueize_refs_1 (lhs_ops, &valueized_anything, true);
-+      if (*disambiguate_only <= TR_VALUEIZE_AND_DISAMBIGUATE)
-+	{
-+	  copy_reference_ops_from_ref (lhs, &lhs_ops);
-+	  lhs_ops = valueize_refs_1 (lhs_ops, &valueized_anything, true);
-+	}
-       vn_context_bb = saved_rpo_bb;
-       if (valueized_anything)
- 	{
-@@ -2253,7 +2469,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree
- 	  if (lhs_ref_ok
- 	      && !refs_may_alias_p_1 (ref, &lhs_ref, data->tbaa_p))
- 	    {
--	      *disambiguate_only = true;
-+	      *disambiguate_only = TR_VALUEIZE_AND_DISAMBIGUATE;
- 	      return NULL;
- 	    }
- 	}
-@@ -2263,6 +2479,30 @@ vn_reference_lookup_3 (ao_ref *ref, tree
- 	  lhs_ref_ok = true;
- 	}
- 
-+      /* Besides valueizing the LHS we can also use access-path based
-+	  disambiguation on the original non-valueized ref.  */
-+      if (!ref->ref
-+	  && lhs_ref_ok
-+	  && data->orig_ref.ref)
-+	{
-+	  /* We want to use the non-valueized LHS for this, but avoid redundant
-+	     work.  */
-+	  ao_ref *lref = &lhs_ref;
-+	  ao_ref lref_alt;
-+	  if (valueized_anything)
-+	    {
-+	      ao_ref_init (&lref_alt, lhs);
-+	      lref = &lref_alt;
-+	    }
-+	  if (!refs_may_alias_p_1 (&data->orig_ref, lref, data->tbaa_p))
-+	    {
-+	      *disambiguate_only = (valueized_anything
-+				    ? TR_VALUEIZE_AND_DISAMBIGUATE
-+				    : TR_DISAMBIGUATE);
-+	      return NULL;
-+	    }
-+	}
-+
-       /* If we reach a clobbering statement try to skip it and see if
-          we find a VN result with exactly the same value as the
- 	 possible clobber.  In this case we can ignore the clobber
-@@ -2299,7 +2539,8 @@ vn_reference_lookup_3 (ao_ref *ref, tree
- 	    }
- 	}
-     }
--  else if (gimple_call_builtin_p (def_stmt, BUILT_IN_NORMAL)
-+  else if (*disambiguate_only <= TR_VALUEIZE_AND_DISAMBIGUATE
-+	   && gimple_call_builtin_p (def_stmt, BUILT_IN_NORMAL)
- 	   && gimple_call_num_args (def_stmt) <= 4)
-     {
-       /* For builtin calls valueize its arguments and call the
-@@ -2328,15 +2569,13 @@ vn_reference_lookup_3 (ao_ref *ref, tree
- 	    gimple_call_set_arg (def_stmt, i, oldargs[i]);
- 	  if (!res)
- 	    {
--	      *disambiguate_only = true;
-+	      *disambiguate_only = TR_VALUEIZE_AND_DISAMBIGUATE;
- 	      return NULL;
- 	    }
- 	}
-     }
- 
--  /* If we are looking for redundant stores do not create new hashtable
--     entries from aliasing defs with made up alias-sets.  */
--  if (*disambiguate_only || !data->tbaa_p)
-+  if (*disambiguate_only > TR_TRANSLATE)
-     return (void *)-1;
- 
-   /* If we cannot constrain the size of the reference we cannot
-@@ -2359,10 +2598,14 @@ vn_reference_lookup_3 (ao_ref *ref, tree
-       && (integer_zerop (gimple_call_arg (def_stmt, 1))
- 	  || ((TREE_CODE (gimple_call_arg (def_stmt, 1)) == INTEGER_CST
- 	       || (INTEGRAL_TYPE_P (vr->type) && known_eq (ref->size, 8)))
--	      && CHAR_BIT == 8 && BITS_PER_UNIT == 8
-+	      && CHAR_BIT == 8
-+	      && BITS_PER_UNIT == 8
-+	      && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
- 	      && offset.is_constant (&offseti)
--	      && offseti % BITS_PER_UNIT == 0
- 	      && multiple_p (ref->size, BITS_PER_UNIT)))
-+	      && ref->size.is_constant (&sizei)
-+	      && (offseti % BITS_PER_UNIT == 0
-+		  || TREE_CODE (gimple_call_arg (def_stmt, 1)) == INTEGER_CST)
-       && poly_int_tree_p (gimple_call_arg (def_stmt, 2))
-       && (TREE_CODE (gimple_call_arg (def_stmt, 0)) == ADDR_EXPR
- 	  || TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME))
-@@ -2423,7 +2666,13 @@ vn_reference_lookup_3 (ao_ref *ref, tree
-       else
- 	return (void *)-1;
-       tree len = gimple_call_arg (def_stmt, 2);
--      HOST_WIDE_INT leni, offset2i, offseti;
-+      HOST_WIDE_INT leni, offset2i;
-+      /* Sometimes the above trickery is smarter than alias analysis.  Take
-+	  advantage of that.  */
-+      if (!ranges_maybe_overlap_p (offset, maxsize, offset2,
-+				   (wi::to_poly_offset (len)
-+				    << LOG2_BITS_PER_UNIT)))
-+	return NULL;
-       if (data->partial_defs.is_empty ()
- 	  && known_subrange_p (offset, maxsize, offset2,
- 			       wi::to_poly_offset (len) << LOG2_BITS_PER_UNIT))
-@@ -2432,7 +2681,8 @@ vn_reference_lookup_3 (ao_ref *ref, tree
- 	  if (integer_zerop (gimple_call_arg (def_stmt, 1)))
- 	    val = build_zero_cst (vr->type);
- 	  else if (INTEGRAL_TYPE_P (vr->type)
--		   && known_eq (ref->size, 8))
-+		   && known_eq (ref->size, 8)
-+		   && offseti % BITS_PER_UNIT == 0)
- 	    {
- 	      gimple_match_op res_op (gimple_match_cond::UNCOND, NOP_EXPR,
- 				      vr->type, gimple_call_arg (def_stmt, 1));
-@@ -2444,30 +2694,57 @@ vn_reference_lookup_3 (ao_ref *ref, tree
- 	    }
- 	  else
- 	    {
--	      unsigned len = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (vr->type));
--	      unsigned char *buf = XALLOCAVEC (unsigned char, len);
-+	      unsigned buflen = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (vr->type))
-+						  + 1;
-+	      if (INTEGRAL_TYPE_P (vr->type))
-+		buflen = GET_MODE_SIZE (SCALAR_INT_TYPE_MODE (vr->type)) + 1;
-+	      unsigned char *buf = XALLOCAVEC (unsigned char, buflen);
- 	      memset (buf, TREE_INT_CST_LOW (gimple_call_arg (def_stmt, 1)),
--		      len);
--	      val = native_interpret_expr (vr->type, buf, len);
-+		      buflen);
-+	      if (BYTES_BIG_ENDIAN)
-+		{
-+		  unsigned int amnt
-+		    = (((unsigned HOST_WIDE_INT) offseti + sizei)
-+		       % BITS_PER_UNIT);
-+		  if (amnt)
-+		    {
-+		      shift_bytes_in_array_right (buf, buflen,
-+						  BITS_PER_UNIT - amnt);
-+		      buf++;
-+		      buflen--;
-+		    }
-+		}
-+	      else if (offseti % BITS_PER_UNIT != 0)
-+		{
-+		  unsigned int amnt
-+		    = BITS_PER_UNIT - ((unsigned HOST_WIDE_INT) offseti
-+				       % BITS_PER_UNIT);
-+		  shift_bytes_in_array_left (buf, buflen, amnt);
-+		  buf++;
-+		  buflen--;
-+		}
-+	      val = native_interpret_expr (vr->type, buf, buflen);
- 	      if (!val)
- 		return (void *)-1;
- 	    }
--	  return vn_reference_lookup_or_insert_for_pieces
--	           (vuse, vr->set, vr->type, vr->operands, val);
-+	  return data->finish (0, val);
- 	}
-       /* For now handle clearing memory with partial defs.  */
-       else if (known_eq (ref->size, maxsize)
- 	       && integer_zerop (gimple_call_arg (def_stmt, 1))
- 	       && tree_to_poly_int64 (len).is_constant (&leni)
-+	       && leni <= INTTYPE_MAXIMUM (HOST_WIDE_INT) / BITS_PER_UNIT
- 	       && offset.is_constant (&offseti)
- 	       && offset2.is_constant (&offset2i)
--	       && maxsize.is_constant (&maxsizei))
-+	       && maxsize.is_constant (&maxsizei)
-+	       && ranges_known_overlap_p (offseti, maxsizei, offset2i,
-+		       			  leni << LOG2_BITS_PER_UNIT))
- 	{
- 	  pd_data pd;
- 	  pd.rhs = build_constructor (NULL_TREE, NULL);
--	  pd.offset = (offset2i - offseti) / BITS_PER_UNIT;
--	  pd.size = leni;
--	  return data->push_partial_def (pd, vuse, maxsizei);
-+	  pd.offset = offset2i - offseti;
-+	  pd.size = leni << LOG2_BITS_PER_UNIT;
-+	  return data->push_partial_def (pd, 0, maxsizei);
- 	}
-     }
- 
-@@ -2477,12 +2754,22 @@ vn_reference_lookup_3 (ao_ref *ref, tree
- 	   && gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR
- 	   && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (def_stmt)) == 0)
-     {
-+      tree lhs = gimple_assign_lhs (def_stmt);
-       tree base2;
-       poly_int64 offset2, size2, maxsize2;
-       HOST_WIDE_INT offset2i, size2i;
-       bool reverse;
--      base2 = get_ref_base_and_extent (gimple_assign_lhs (def_stmt),
--				       &offset2, &size2, &maxsize2, &reverse);
-+      if (lhs_ref_ok)
-+	{
-+	  base2 = ao_ref_base (&lhs_ref);
-+	  offset2 = lhs_ref.offset;
-+	  size2 = lhs_ref.size;
-+	  maxsize2 = lhs_ref.max_size;
-+	  reverse = reverse_storage_order_for_component_p (lhs);
-+	}
-+      else
-+	base2 = get_ref_base_and_extent (lhs,
-+					 &offset2, &size2, &maxsize2, &reverse);
-       if (known_size_p (maxsize2)
- 	  && known_eq (maxsize2, size2)
- 	  && adjust_offsets_for_equal_base_address (base, &offset,
-@@ -2492,24 +2779,21 @@ vn_reference_lookup_3 (ao_ref *ref, tree
- 	      && known_subrange_p (offset, maxsize, offset2, size2))
- 	    {
- 	      tree val = build_zero_cst (vr->type);
--	      return vn_reference_lookup_or_insert_for_pieces
--		  (vuse, vr->set, vr->type, vr->operands, val);
-+	      return data->finish (get_alias_set (lhs), val);
- 	    }
- 	  else if (known_eq (ref->size, maxsize)
- 		   && maxsize.is_constant (&maxsizei)
--		   && maxsizei % BITS_PER_UNIT == 0
- 		   && offset.is_constant (&offseti)
--		   && offseti % BITS_PER_UNIT == 0
- 		   && offset2.is_constant (&offset2i)
--		   && offset2i % BITS_PER_UNIT == 0
- 		   && size2.is_constant (&size2i)
--		   && size2i % BITS_PER_UNIT == 0)
-+		   && ranges_known_overlap_p (offseti, maxsizei,
-+					      offset2i, size2i))
- 	    {
- 	      pd_data pd;
- 	      pd.rhs = gimple_assign_rhs1 (def_stmt);
--	      pd.offset = (offset2i - offseti) / BITS_PER_UNIT;
--	      pd.size = size2i / BITS_PER_UNIT;
--	      return data->push_partial_def (pd, vuse, maxsizei);
-+	      pd.offset = offset2i - offseti;
-+	      pd.size = size2i;
-+	      return data->push_partial_def (pd, get_alias_set (lhs), maxsizei);
- 	    }
- 	}
-     }
-@@ -2520,28 +2804,36 @@ vn_reference_lookup_3 (ao_ref *ref, tree
- 	   && is_gimple_reg_type (vr->type)
- 	   && !contains_storage_order_barrier_p (vr->operands)
- 	   && gimple_assign_single_p (def_stmt)
--	   && CHAR_BIT == 8 && BITS_PER_UNIT == 8
-+	   && CHAR_BIT == 8
-+	   && BITS_PER_UNIT == 8
-+	   && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
- 	   /* native_encode and native_decode operate on arrays of bytes
- 	      and so fundamentally need a compile-time size and offset.  */
- 	   && maxsize.is_constant (&maxsizei)
--	   && maxsizei % BITS_PER_UNIT == 0
- 	   && offset.is_constant (&offseti)
--	   && offseti % BITS_PER_UNIT == 0
- 	   && (is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt))
- 	       || (TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME
- 		   && is_gimple_min_invariant (SSA_VAL (gimple_assign_rhs1 (def_stmt))))))
-     {
-+      tree lhs = gimple_assign_lhs (def_stmt);
-       tree base2;
-       poly_int64 offset2, size2, maxsize2;
-       HOST_WIDE_INT offset2i, size2i;
-       bool reverse;
--      base2 = get_ref_base_and_extent (gimple_assign_lhs (def_stmt),
--				       &offset2, &size2, &maxsize2, &reverse);
-+      if (lhs_ref_ok)
-+	{
-+	  base2 = ao_ref_base (&lhs_ref);
-+	  offset2 = lhs_ref.offset;
-+	  size2 = lhs_ref.size;
-+	  maxsize2 = lhs_ref.max_size;
-+	  reverse = reverse_storage_order_for_component_p (lhs);
-+	}
-+      else
-+	base2 = get_ref_base_and_extent (lhs,
-+					 &offset2, &size2, &maxsize2, &reverse);
-       if (base2
- 	  && !reverse
- 	  && known_eq (maxsize2, size2)
--	  && multiple_p (size2, BITS_PER_UNIT)
--	  && multiple_p (offset2, BITS_PER_UNIT)
- 	  && adjust_offsets_for_equal_base_address (base, &offset,
- 						    base2, &offset2)
- 	  && offset.is_constant (&offseti)
-@@ -2552,37 +2844,80 @@ vn_reference_lookup_3 (ao_ref *ref, tree
- 	      && known_subrange_p (offseti, maxsizei, offset2, size2))
- 	    {
- 	      /* We support up to 512-bit values (for V8DFmode).  */
--	      unsigned char buffer[64];
-+	      unsigned char buffer[65];
- 	      int len;
- 
- 	      tree rhs = gimple_assign_rhs1 (def_stmt);
- 	      if (TREE_CODE (rhs) == SSA_NAME)
- 		rhs = SSA_VAL (rhs);
--	      unsigned pad = 0;
--	      if (BYTES_BIG_ENDIAN
--		  && is_a  (TYPE_MODE (TREE_TYPE (rhs))))
--		{
--		  /* On big-endian the padding is at the 'front' so
--		     just skip the initial bytes.  */
--		  fixed_size_mode mode
--		    = as_a  (TYPE_MODE (TREE_TYPE (rhs)));
--		  pad = GET_MODE_SIZE (mode) - size2i / BITS_PER_UNIT;
--		}
- 	      len = native_encode_expr (rhs,
--					buffer, sizeof (buffer),
--					((offseti - offset2i) / BITS_PER_UNIT
--					 + pad));
-+					buffer, sizeof (buffer) - 1,
-+					(offseti - offset2i) / BITS_PER_UNIT);
- 	      if (len > 0 && len * BITS_PER_UNIT >= maxsizei)
- 		{
- 		  tree type = vr->type;
-+		  unsigned char *buf = buffer;
-+		  unsigned int amnt = 0;
- 		  /* Make sure to interpret in a type that has a range
- 		     covering the whole access size.  */
- 		  if (INTEGRAL_TYPE_P (vr->type)
- 		      && maxsizei != TYPE_PRECISION (vr->type))
- 		    type = build_nonstandard_integer_type (maxsizei,
- 							   TYPE_UNSIGNED (type));
--		  tree val = native_interpret_expr (type, buffer,
--						    maxsizei / BITS_PER_UNIT);
-+		  if (BYTES_BIG_ENDIAN)
-+		    {
-+		      /* For big-endian native_encode_expr stored the rhs
-+			 such that the LSB of it is the LSB of buffer[len - 1].
-+			 That bit is stored into memory at position
-+			 offset2 + size2 - 1, i.e. in byte
-+			 base + (offset2 + size2 - 1) / BITS_PER_UNIT.
-+			 E.g. for offset2 1 and size2 14, rhs -1 and memory
-+			 previously cleared that is:
-+			 0	  1
-+			 01111111|11111110
-+			 Now, if we want to extract offset 2 and size 12 from
-+			 it using native_interpret_expr (which actually works
-+			 for integral bitfield types in terms of byte size of
-+			 the mode), the native_encode_expr stored the value
-+			 into buffer as
-+			 XX111111|11111111
-+			 and returned len 2 (the X bits are outside of
-+			 precision).
-+			 Let sz be maxsize / BITS_PER_UNIT if not extracting
-+			 a bitfield, and GET_MODE_SIZE otherwise.
-+			 We need to align the LSB of the value we want to
-+			 extract as the LSB of buf[sz - 1].
-+			 The LSB from memory we need to read is at position
-+			 offset + maxsize - 1.  */
-+		      HOST_WIDE_INT sz = maxsizei / BITS_PER_UNIT;
-+		      if (INTEGRAL_TYPE_P (type))
-+			sz = GET_MODE_SIZE (SCALAR_INT_TYPE_MODE (type));
-+		      amnt = ((unsigned HOST_WIDE_INT) offset2i + size2i
-+			      - offseti - maxsizei) % BITS_PER_UNIT;
-+		      if (amnt)
-+			shift_bytes_in_array_right (buffer, len, amnt);
-+		      amnt = ((unsigned HOST_WIDE_INT) offset2i + size2i
-+			      - offseti - maxsizei - amnt) / BITS_PER_UNIT;
-+		      if ((unsigned HOST_WIDE_INT) sz + amnt > (unsigned) len)
-+			len = 0;
-+		      else
-+			{
-+			  buf = buffer + len - sz - amnt;
-+			  len -= (buf - buffer);
-+			}
-+		    }
-+		  else
-+		    {
-+		      amnt = ((unsigned HOST_WIDE_INT) offset2i
-+			      - offseti) % BITS_PER_UNIT;
-+		      if (amnt)
-+			{
-+			  buffer[len] = 0;
-+			  shift_bytes_in_array_left (buffer, len + 1, amnt);
-+			  buf = buffer + 1;
-+			}
-+		    }
-+		  tree val = native_interpret_expr (type, buf, len);
- 		  /* If we chop off bits because the types precision doesn't
- 		     match the memory access size this is ok when optimizing
- 		     reads but not when called from the DSE code during
-@@ -2597,73 +2932,95 @@ vn_reference_lookup_3 (ao_ref *ref, tree
- 		    }
- 
- 		  if (val)
--		    return vn_reference_lookup_or_insert_for_pieces
--		      (vuse, vr->set, vr->type, vr->operands, val);
-+		    return data->finish (get_alias_set (lhs), val);
- 		}
- 	    }
--	  else if (ranges_known_overlap_p (offseti, maxsizei, offset2i, size2i))
-+	  else if (ranges_known_overlap_p (offseti, maxsizei, offset2i,
-+					   size2i))
- 	    {
- 	      pd_data pd;
- 	      tree rhs = gimple_assign_rhs1 (def_stmt);
- 	      if (TREE_CODE (rhs) == SSA_NAME)
- 		rhs = SSA_VAL (rhs);
- 	      pd.rhs = rhs;
--	      pd.offset = (offset2i - offseti) / BITS_PER_UNIT;
--	      pd.size = size2i / BITS_PER_UNIT;
--	      return data->push_partial_def (pd, vuse, maxsizei);
-+	      pd.offset = offset2i - offseti;
-+	      pd.size = size2i;
-+	      return data->push_partial_def (pd, get_alias_set (lhs), maxsizei);
- 	    }
- 	}
-     }
- 
-   /* 4) Assignment from an SSA name which definition we may be able
--     to access pieces from.  */
-+     to access pieces from or we can combine to a larger entity.  */
-   else if (known_eq (ref->size, maxsize)
- 	   && is_gimple_reg_type (vr->type)
- 	   && !contains_storage_order_barrier_p (vr->operands)
- 	   && gimple_assign_single_p (def_stmt)
--	   && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME
--	   /* A subset of partial defs from non-constants can be handled
--	      by for example inserting a CONSTRUCTOR, a COMPLEX_EXPR or
--	      even a (series of) BIT_INSERT_EXPR hoping for simplifications
--	      downstream, not so much for actually doing the insertion.  */
--	   && data->partial_defs.is_empty ())
-+	   && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
-     {
-+      tree lhs = gimple_assign_lhs (def_stmt);
-       tree base2;
-       poly_int64 offset2, size2, maxsize2;
-+      HOST_WIDE_INT offset2i, size2i, offseti;
-       bool reverse;
--      base2 = get_ref_base_and_extent (gimple_assign_lhs (def_stmt),
--				       &offset2, &size2, &maxsize2,
--				       &reverse);
-+      if (lhs_ref_ok)
-+	{
-+	  base2 = ao_ref_base (&lhs_ref);
-+	  offset2 = lhs_ref.offset;
-+	  size2 = lhs_ref.size;
-+	  maxsize2 = lhs_ref.max_size;
-+	  reverse = reverse_storage_order_for_component_p (lhs);
-+	}
-+      else
-+	base2 = get_ref_base_and_extent (lhs,
-+					 &offset2, &size2, &maxsize2, &reverse);
-       tree def_rhs = gimple_assign_rhs1 (def_stmt);
-       if (!reverse
- 	  && known_size_p (maxsize2)
- 	  && known_eq (maxsize2, size2)
- 	  && adjust_offsets_for_equal_base_address (base, &offset,
--						    base2, &offset2)
--	  && known_subrange_p (offset, maxsize, offset2, size2)
--	  /* ???  We can't handle bitfield precision extracts without
--	     either using an alternate type for the BIT_FIELD_REF and
--	     then doing a conversion or possibly adjusting the offset
--	     according to endianness.  */
--	  && (! INTEGRAL_TYPE_P (vr->type)
--	      || known_eq (ref->size, TYPE_PRECISION (vr->type)))
--	  && multiple_p (ref->size, BITS_PER_UNIT)
--	  && (! INTEGRAL_TYPE_P (TREE_TYPE (def_rhs))
--	      || type_has_mode_precision_p (TREE_TYPE (def_rhs))))
--	{
--	  gimple_match_op op (gimple_match_cond::UNCOND,
--			      BIT_FIELD_REF, vr->type,
--			      vn_valueize (def_rhs),
--			      bitsize_int (ref->size),
--			      bitsize_int (offset - offset2));
--	  tree val = vn_nary_build_or_lookup (&op);
--	  if (val
--	      && (TREE_CODE (val) != SSA_NAME
--		  || ! SSA_NAME_OCCURS_IN_ABNORMAL_PHI (val)))
--	    {
--	      vn_reference_t res = vn_reference_lookup_or_insert_for_pieces
--		  (vuse, vr->set, vr->type, vr->operands, val);
--	      return res;
-+						    base2, &offset2))
-+	{
-+	  if (data->partial_defs.is_empty ()
-+	      && known_subrange_p (offset, maxsize, offset2, size2)
-+	      /* ???  We can't handle bitfield precision extracts without
-+		 either using an alternate type for the BIT_FIELD_REF and
-+		 then doing a conversion or possibly adjusting the offset
-+		 according to endianness.  */
-+	      && (! INTEGRAL_TYPE_P (vr->type)
-+		  || known_eq (ref->size, TYPE_PRECISION (vr->type)))
-+	      && multiple_p (ref->size, BITS_PER_UNIT))
-+	    {
-+	      if (known_eq (ref->size, size2))
-+		return vn_reference_lookup_or_insert_for_pieces
-+		    (vuse, get_alias_set (lhs), vr->type, vr->operands,
-+		     SSA_VAL (def_rhs));
-+	      else if (! INTEGRAL_TYPE_P (TREE_TYPE (def_rhs))
-+		       || type_has_mode_precision_p (TREE_TYPE (def_rhs)))
-+		{
-+		  gimple_match_op op (gimple_match_cond::UNCOND,
-+				      BIT_FIELD_REF, vr->type,
-+				      SSA_VAL (def_rhs),
-+				      bitsize_int (ref->size),
-+				      bitsize_int (offset - offset2));
-+		  tree val = vn_nary_build_or_lookup (&op);
-+		  if (val
-+		      && (TREE_CODE (val) != SSA_NAME
-+			  || ! SSA_NAME_OCCURS_IN_ABNORMAL_PHI (val)))
-+		    return data->finish (get_alias_set (lhs), val);
-+		}
-+	    }
-+	  else if (maxsize.is_constant (&maxsizei)
-+		   && offset.is_constant (&offseti)
-+		   && offset2.is_constant (&offset2i)
-+		   && size2.is_constant (&size2i)
-+		   && ranges_known_overlap_p (offset, maxsize, offset2, size2))
-+	    {
-+	      pd_data pd;
-+	      pd.rhs = SSA_VAL (def_rhs);
-+	      pd.offset = offset2i - offseti;
-+	      pd.size = size2i;
-+	      return data->push_partial_def (pd, get_alias_set (lhs), maxsizei);
- 	    }
- 	}
-     }
-@@ -2678,6 +3035,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree
- 	   /* Handling this is more complicated, give up for now.  */
- 	   && data->partial_defs.is_empty ())
-     {
-+      tree lhs = gimple_assign_lhs (def_stmt);
-       tree base2;
-       int i, j, k;
-       auto_vec rhs;
-@@ -2747,7 +3105,8 @@ vn_reference_lookup_3 (ao_ref *ref, tree
- 	}
- 
-       /* Now re-write REF to be based on the rhs of the assignment.  */
--      copy_reference_ops_from_ref (gimple_assign_rhs1 (def_stmt), &rhs);
-+      tree rhs1 = gimple_assign_rhs1 (def_stmt);
-+      copy_reference_ops_from_ref (rhs1, &rhs);
- 
-       /* Apply an extra offset to the inner MEM_REF of the RHS.  */
-       if (maybe_ne (extra_off, 0))
-@@ -2764,6 +3123,11 @@ vn_reference_lookup_3 (ao_ref *ref, tree
- 							extra_off));
- 	}
- 
-+      /* Save the operands since we need to use the original ones for
-+	 the hash entry we use.  */
-+      if (!data->saved_operands.exists ())
-+	data->saved_operands = vr->operands.copy ();
-+
-       /* We need to pre-pend vr->operands[0..i] to rhs.  */
-       vec old = vr->operands;
-       if (i + 1 + rhs.length () > vr->operands.length ())
-@@ -2780,11 +3144,11 @@ vn_reference_lookup_3 (ao_ref *ref, tree
-       /* Try folding the new reference to a constant.  */
-       tree val = fully_constant_vn_reference_p (vr);
-       if (val)
--	return vn_reference_lookup_or_insert_for_pieces
--		 (vuse, vr->set, vr->type, vr->operands, val);
-+	return data->finish (get_alias_set (lhs), val);
- 
-       /* Adjust *ref from the new operands.  */
--      if (!ao_ref_init_from_vn_reference (&r, vr->set, vr->type, vr->operands))
-+      if (!ao_ref_init_from_vn_reference (&r, get_alias_set (rhs1),
-+					  vr->type, vr->operands))
- 	return (void *)-1;
-       /* This can happen with bitfields.  */
-       if (maybe_ne (ref->size, r.size))
-@@ -2793,6 +3157,12 @@ vn_reference_lookup_3 (ao_ref *ref, tree
- 
-       /* Do not update last seen VUSE after translating.  */
-       data->last_vuse_ptr = NULL;
-+      /* Invalidate the original access path since it now contains
-+	  the wrong base.  */
-+      data->orig_ref.ref = NULL_TREE;
-+      /* Use the alias-set of this LHS for recording an eventual result.  */
-+      if (data->first_set == -2)
-+	data->first_set = get_alias_set (lhs);
- 
-       /* Keep looking for the adjusted *REF / VR pair.  */
-       return NULL;
-@@ -2912,6 +3282,11 @@ vn_reference_lookup_3 (ao_ref *ref, tree
-       if (!known_subrange_p (at, byte_maxsize, lhs_offset, copy_size))
- 	return (void *)-1;
- 
-+      /* Save the operands since we need to use the original ones for
-+	 the hash entry we use.  */
-+      if (!data->saved_operands.exists ())
-+	data->saved_operands = vr->operands.copy ();
-+
-       /* Make room for 2 operands in the new reference.  */
-       if (vr->operands.length () < 2)
- 	{
-@@ -2940,11 +3315,10 @@ vn_reference_lookup_3 (ao_ref *ref, tree
-       /* Try folding the new reference to a constant.  */
-       tree val = fully_constant_vn_reference_p (vr);
-       if (val)
--	return vn_reference_lookup_or_insert_for_pieces
--		 (vuse, vr->set, vr->type, vr->operands, val);
-+	return data->finish (0, val);
- 
-       /* Adjust *ref from the new operands.  */
--      if (!ao_ref_init_from_vn_reference (&r, vr->set, vr->type, vr->operands))
-+      if (!ao_ref_init_from_vn_reference (&r, 0, vr->type, vr->operands))
- 	return (void *)-1;
-       /* This can happen with bitfields.  */
-       if (maybe_ne (ref->size, r.size))
-@@ -2953,6 +3327,12 @@ vn_reference_lookup_3 (ao_ref *ref, tree
- 
-       /* Do not update last seen VUSE after translating.  */
-       data->last_vuse_ptr = NULL;
-+      /* Invalidate the original access path since it now contains
-+	  the wrong base.  */
-+      data->orig_ref.ref = NULL_TREE;
-+      /* Use the alias-set of this stmt for recording an eventual result.  */
-+      if (data->first_set == -2)
-+	data->first_set = 0;
- 
-       /* Keep looking for the adjusted *REF / VR pair.  */
-       return NULL;
-@@ -3013,13 +3393,13 @@ vn_reference_lookup_pieces (tree vuse, a
-     {
-       ao_ref r;
-       unsigned limit = PARAM_VALUE (PARAM_SCCVN_MAX_ALIAS_QUERIES_PER_ACCESS);
--      vn_walk_cb_data data (&vr1, NULL, kind, true);
-+      vn_walk_cb_data data (&vr1, NULL_TREE, NULL, kind, true, NULL_TREE);
-       if (ao_ref_init_from_vn_reference (&r, set, type, vr1.operands))
--	*vnresult =
--	  (vn_reference_t)walk_non_aliased_vuses (&r, vr1.vuse, true,
--						  vn_reference_lookup_2,
--						  vn_reference_lookup_3,
--						  vuse_valueize, limit, &data);
-+	*vnresult
-+	  = ((vn_reference_t)
-+	     walk_non_aliased_vuses (&r, vr1.vuse, true, vn_reference_lookup_2,
-+				     vn_reference_lookup_3, vuse_valueize,
-+				     limit, &data));
-       gcc_checking_assert (vr1.operands == shared_lookup_references);
-     }
- 
-@@ -3035,15 +3415,19 @@ vn_reference_lookup_pieces (tree vuse, a
-    was NULL..  VNRESULT will be filled in with the vn_reference_t
-    stored in the hashtable if one exists.  When TBAA_P is false assume
-    we are looking up a store and treat it as having alias-set zero.
--   *LAST_VUSE_PTR will be updated with the VUSE the value lookup succeeded.  */
-+   *LAST_VUSE_PTR will be updated with the VUSE the value lookup succeeded.
-+   MASK is either NULL_TREE, or can be an INTEGER_CST if the result of the
-+   load is bitwise anded with MASK and so we are only interested in a subset
-+   of the bits and can ignore if the other bits are uninitialized or
-+   not initialized with constants.  */
- 
- tree
- vn_reference_lookup (tree op, tree vuse, vn_lookup_kind kind,
--		     vn_reference_t *vnresult, bool tbaa_p, tree *last_vuse_ptr)
-+		     vn_reference_t *vnresult, bool tbaa_p,
-+		     tree *last_vuse_ptr, tree mask)
- {
-   vec operands;
-   struct vn_reference_s vr1;
--  tree cst;
-   bool valuezied_anything;
- 
-   if (vnresult)
-@@ -3055,11 +3439,11 @@ vn_reference_lookup (tree op, tree vuse,
-   vr1.type = TREE_TYPE (op);
-   vr1.set = get_alias_set (op);
-   vr1.hashcode = vn_reference_compute_hash (&vr1);
--  if ((cst = fully_constant_vn_reference_p (&vr1)))
--    return cst;
-+  if (mask == NULL_TREE)
-+    if (tree cst = fully_constant_vn_reference_p (&vr1))
-+      return cst;
- 
--  if (kind != VN_NOWALK
--      && vr1.vuse)
-+  if (kind != VN_NOWALK && vr1.vuse)
-     {
-       vn_reference_t wvnresult;
-       ao_ref r;
-@@ -3070,23 +3454,32 @@ vn_reference_lookup (tree op, tree vuse,
- 	  || !ao_ref_init_from_vn_reference (&r, vr1.set, vr1.type,
- 					     vr1.operands))
- 	ao_ref_init (&r, op);
--      vn_walk_cb_data data (&vr1, last_vuse_ptr, kind, tbaa_p);
--      wvnresult =
--	(vn_reference_t)walk_non_aliased_vuses (&r, vr1.vuse, tbaa_p,
--						vn_reference_lookup_2,
--						vn_reference_lookup_3,
--						vuse_valueize, limit, &data);
-+      vn_walk_cb_data data (&vr1, r.ref ? NULL_TREE : op,
-+			    last_vuse_ptr, kind, tbaa_p, mask);
-+
-+      wvnresult
-+       = ((vn_reference_t)
-+	   walk_non_aliased_vuses (&r, vr1.vuse, tbaa_p, vn_reference_lookup_2,
-+				   vn_reference_lookup_3, vuse_valueize, limit,
-+				   &data));
-       gcc_checking_assert (vr1.operands == shared_lookup_references);
-       if (wvnresult)
- 	{
-+	  gcc_assert (mask == NULL_TREE);
- 	  if (vnresult)
- 	    *vnresult = wvnresult;
- 	  return wvnresult->result;
- 	}
-+      else if (mask)
-+	return data.masked_result;
- 
-       return NULL_TREE;
-     }
- 
-+  if (last_vuse_ptr)
-+    *last_vuse_ptr = vr1.vuse;
-+  if (mask)
-+    return NULL_TREE;
-   return vn_reference_lookup_1 (&vr1, vnresult);
- }
- 
-@@ -4333,7 +4726,39 @@ visit_nary_op (tree lhs, gassign *stmt)
- 		}
- 	    }
- 	}
--    default:;
-+      break;
-+    case BIT_AND_EXPR:
-+      if (INTEGRAL_TYPE_P (type)
-+	  && TREE_CODE (rhs1) == SSA_NAME
-+	  && TREE_CODE (gimple_assign_rhs2 (stmt)) == INTEGER_CST
-+	  && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1)
-+	  && default_vn_walk_kind != VN_NOWALK
-+	  && CHAR_BIT == 8
-+	  && BITS_PER_UNIT == 8
-+	  && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
-+	  && !integer_all_onesp (gimple_assign_rhs2 (stmt))
-+	  && !integer_zerop (gimple_assign_rhs2 (stmt)))
-+	{
-+	  gassign *ass = dyn_cast  (SSA_NAME_DEF_STMT (rhs1));
-+	  if (ass
-+	      && !gimple_has_volatile_ops (ass)
-+	      && vn_get_stmt_kind (ass) == VN_REFERENCE)
-+	    {
-+	      tree last_vuse = gimple_vuse (ass);
-+	      tree op = gimple_assign_rhs1 (ass);
-+	      tree result = vn_reference_lookup (op, gimple_vuse (ass),
-+						 default_vn_walk_kind,
-+						 NULL, true, &last_vuse,
-+						 gimple_assign_rhs2 (stmt));
-+	      if (result
-+		  && useless_type_conversion_p (TREE_TYPE (result),
-+						TREE_TYPE (op)))
-+		return set_ssa_val_to (lhs, result);
-+	    }
-+	}
-+      break;
-+    default:
-+      break;
-     }
- 
-   bool changed = set_ssa_val_to (lhs, lhs);
-@@ -4844,14 +5269,14 @@ visit_stmt (gimple *stmt, bool backedges
- 	      switch (vn_get_stmt_kind (ass))
- 		{
- 		case VN_NARY:
--		changed = visit_nary_op (lhs, ass);
--		break;
-+		  changed = visit_nary_op (lhs, ass);
-+		  break;
- 		case VN_REFERENCE:
--		changed = visit_reference_op_load (lhs, rhs1, ass);
--		break;
-+		  changed = visit_reference_op_load (lhs, rhs1, ass);
-+		  break;
- 		default:
--		changed = defs_to_varying (ass);
--		break;
-+		  changed = defs_to_varying (ass);
-+		  break;
- 		}
- 	    }
- 	}
-@@ -5525,8 +5950,48 @@ eliminate_dom_walker::eliminate_stmt (ba
-       tree val;
-       tree rhs = gimple_assign_rhs1 (stmt);
-       vn_reference_t vnresult;
--      val = vn_reference_lookup (lhs, gimple_vuse (stmt), VN_WALKREWRITE,
--				 &vnresult, false);
-+      /* ???  gcc.dg/torture/pr91445.c shows that we lookup a boolean
-+	 typed load of a byte known to be 0x11 as 1 so a store of
-+	 a boolean 1 is detected as redundant.  Because of this we
-+	 have to make sure to lookup with a ref where its size
-+	 matches the precision.  */
-+      tree lookup_lhs = lhs;
-+      if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
-+	  && (TREE_CODE (lhs) != COMPONENT_REF
-+	      || !DECL_BIT_FIELD_TYPE (TREE_OPERAND (lhs, 1)))
-+	  && !type_has_mode_precision_p (TREE_TYPE (lhs)))
-+	{
-+	  if (TREE_CODE (lhs) == COMPONENT_REF
-+	      || TREE_CODE (lhs) == MEM_REF)
-+	    {
-+	      tree ltype = build_nonstandard_integer_type
-+				(TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (lhs))),
-+				 TYPE_UNSIGNED (TREE_TYPE (lhs)));
-+	      if (TREE_CODE (lhs) == COMPONENT_REF)
-+		{
-+		  tree foff = component_ref_field_offset (lhs);
-+		  tree f = TREE_OPERAND (lhs, 1);
-+		  if (!poly_int_tree_p (foff))
-+		    lookup_lhs = NULL_TREE;
-+		  else
-+		    lookup_lhs = build3 (BIT_FIELD_REF, ltype,
-+					 TREE_OPERAND (lhs, 0),
-+					 TYPE_SIZE (TREE_TYPE (lhs)),
-+					 bit_from_pos
-+					   (foff, DECL_FIELD_BIT_OFFSET (f)));
-+		}
-+	      else
-+		lookup_lhs = build2 (MEM_REF, ltype,
-+				     TREE_OPERAND (lhs, 0),
-+				     TREE_OPERAND (lhs, 1));
-+	    }
-+	  else
-+	    lookup_lhs = NULL_TREE;
-+	}
-+      val = NULL_TREE;
-+      if (lookup_lhs)
-+	val = vn_reference_lookup (lookup_lhs, gimple_vuse (stmt),
-+				   VN_WALKREWRITE, &vnresult, false);
-       if (TREE_CODE (rhs) == SSA_NAME)
- 	rhs = VN_INFO (rhs)->valnum;
-       if (val
-diff -urpN a/gcc/tree-ssa-sccvn.h b/gcc/tree-ssa-sccvn.h
---- a/gcc/tree-ssa-sccvn.h	2020-11-26 22:26:32.856000000 -0500
-+++ b/gcc/tree-ssa-sccvn.h	2020-11-26 22:06:08.040000000 -0500
-@@ -235,7 +235,7 @@ tree vn_reference_lookup_pieces (tree, a
- 				 vec ,
- 				 vn_reference_t *, vn_lookup_kind);
- tree vn_reference_lookup (tree, tree, vn_lookup_kind, vn_reference_t *, bool,
--			  tree * = NULL);
-+			  tree * = NULL, tree = NULL_TREE);
- void vn_reference_lookup_call (gcall *, vn_reference_t *, vn_reference_t);
- vn_reference_t vn_reference_insert_pieces (tree, alias_set_type, tree,
- 					   vec ,
diff --git a/simplify-removing-subregs.patch b/simplify-removing-subregs.patch
deleted file mode 100644
index cfd58041ec9ffe53f2c5c18f2a646fd5f9d853e7..0000000000000000000000000000000000000000
--- a/simplify-removing-subregs.patch
+++ /dev/null
@@ -1,123 +0,0 @@
-This backport contains 1 patch from gcc main stream tree.
-The commit id of these patchs list as following in the order of time.
-
-0001-expand-Simplify-removing-subregs-when-expanding-a-co.patch
-9a182ef9ee011935d827ab5c6c9a7cd8e22257d8
-
-diff -Nurp a/gcc/expr.c b/gcc/expr.c
---- a/gcc/expr.c	2020-08-05 20:33:04.068000000 +0800
-+++ b/gcc/expr.c	2020-08-05 20:33:21.420000000 +0800
-@@ -3770,6 +3770,78 @@ emit_move_insn (rtx x, rtx y)
-   gcc_assert (mode != BLKmode
- 	      && (GET_MODE (y) == mode || GET_MODE (y) == VOIDmode));
- 
-+  /* If we have a copy that looks like one of the following patterns:
-+       (set (subreg:M1 (reg:M2 ...)) (subreg:M1 (reg:M2 ...)))
-+       (set (subreg:M1 (reg:M2 ...)) (mem:M1 ADDR))
-+       (set (mem:M1 ADDR) (subreg:M1 (reg:M2 ...)))
-+       (set (subreg:M1 (reg:M2 ...)) (constant C))
-+     where mode M1 is equal in size to M2, try to detect whether the
-+     mode change involves an implicit round trip through memory.
-+     If so, see if we can avoid that by removing the subregs and
-+     doing the move in mode M2 instead.  */
-+
-+  rtx x_inner = NULL_RTX;
-+  rtx y_inner = NULL_RTX;
-+
-+#define CANDIDATE_SUBREG_P(subreg) \
-+  (REG_P (SUBREG_REG (subreg)) \
-+   && known_eq (GET_MODE_SIZE (GET_MODE (SUBREG_REG (subreg))), \
-+		GET_MODE_SIZE (GET_MODE (subreg))) \
-+   && optab_handler (mov_optab, GET_MODE (SUBREG_REG (subreg))) \
-+      != CODE_FOR_nothing)
-+
-+#define CANDIDATE_MEM_P(innermode, mem) \
-+  (!targetm.can_change_mode_class ((innermode), GET_MODE (mem), ALL_REGS) \
-+   && !push_operand ((mem), GET_MODE (mem))				  \
-+   /* Not a candiate if innermode requires too much alignment.  */	  \
-+   && (MEM_ALIGN (mem) >= GET_MODE_ALIGNMENT (innermode)		  \
-+       || targetm.slow_unaligned_access (GET_MODE (mem),		  \
-+					 MEM_ALIGN (mem))		  \
-+       || !targetm.slow_unaligned_access ((innermode),			  \
-+					  MEM_ALIGN (mem))))
-+
-+  if (SUBREG_P (x) && CANDIDATE_SUBREG_P (x))
-+    x_inner = SUBREG_REG (x);
-+
-+  if (SUBREG_P (y) && CANDIDATE_SUBREG_P (y))
-+    y_inner = SUBREG_REG (y);
-+
-+  if (x_inner != NULL_RTX
-+      && y_inner != NULL_RTX
-+      && GET_MODE (x_inner) == GET_MODE (y_inner)
-+      && !targetm.can_change_mode_class (GET_MODE (x_inner), mode, ALL_REGS))
-+    {
-+      x = x_inner;
-+      y = y_inner;
-+      mode = GET_MODE (x_inner);
-+    }
-+  else if (x_inner != NULL_RTX
-+	   && MEM_P (y)
-+	   && CANDIDATE_MEM_P (GET_MODE (x_inner), y))
-+    {
-+      x = x_inner;
-+      y = adjust_address (y, GET_MODE (x_inner), 0);
-+      mode = GET_MODE (x_inner);
-+    }
-+  else if (y_inner != NULL_RTX
-+	   && MEM_P (x)
-+	   && CANDIDATE_MEM_P (GET_MODE (y_inner), x))
-+    {
-+      x = adjust_address (x, GET_MODE (y_inner), 0);
-+      y = y_inner;
-+      mode = GET_MODE (y_inner);
-+    }
-+  else if (x_inner != NULL_RTX
-+	   && CONSTANT_P (y)
-+	   && !targetm.can_change_mode_class (GET_MODE (x_inner),
-+					      mode, ALL_REGS)
-+	   && (y_inner = simplify_subreg (GET_MODE (x_inner), y, mode, 0)))
-+    {
-+      x = x_inner;
-+      y = y_inner;
-+      mode = GET_MODE (x_inner);
-+    }
-+
-   if (CONSTANT_P (y))
-     {
-       if (optimize
-diff -Nurp a/gcc/testsuite/gcc.target/aarch64/pr95254.c b/gcc/testsuite/gcc.target/aarch64/pr95254.c
---- a/gcc/testsuite/gcc.target/aarch64/pr95254.c	1970-01-01 08:00:00.000000000 +0800
-+++ b/gcc/testsuite/gcc.target/aarch64/pr95254.c	2020-08-05 20:33:21.424000000 +0800
-@@ -0,0 +1,19 @@
-+/* { dg-do compile } */
-+/* { dg-options "-O2 -ftree-slp-vectorize -march=armv8.2-a+sve -msve-vector-bits=256" } */
-+
-+typedef short __attribute__((vector_size (8))) v4hi;
-+
-+typedef union U4HI { v4hi v; short a[4]; } u4hi;
-+
-+short b[4];
-+
-+void pass_v4hi (v4hi v)
-+{
-+    int i;
-+    u4hi u;
-+    u.v = v;
-+    for (i = 0; i < 4; i++)
-+      b[i] = u.a[i];
-+};
-+
-+/* { dg-final { scan-assembler-not "ptrue" } } */
-diff -Nurp a/gcc/testsuite/gcc.target/i386/pr67609.c b/gcc/testsuite/gcc.target/i386/pr67609.c
---- a/gcc/testsuite/gcc.target/i386/pr67609.c	2020-08-05 20:33:04.628000000 +0800
-+++ b/gcc/testsuite/gcc.target/i386/pr67609.c	2020-08-05 20:33:21.424000000 +0800
-@@ -1,7 +1,7 @@
- /* { dg-do compile } */
- /* { dg-options "-O2 -msse2" } */
- /* { dg-require-effective-target lp64 } */
--/* { dg-final { scan-assembler "movdqa" } } */
-+/* { dg-final { scan-assembler "movq\t%xmm0" } } */
- 
- #include 
- __m128d reg;
diff --git a/skip-debug-insns-when-computing-inline-costs.patch b/skip-debug-insns-when-computing-inline-costs.patch
deleted file mode 100644
index 2f09c274a237a80fa101f59e606802086478f188..0000000000000000000000000000000000000000
--- a/skip-debug-insns-when-computing-inline-costs.patch
+++ /dev/null
@@ -1,20 +0,0 @@
-This backport contains 1 patch from gcc main stream tree.
-The commit id of these patchs list as following in the order of time.
-
-0001-PR91176-Skip-debug-insns-when-computing-inline-costs.patch
-d3ed5b56646511a52db9992f4024969bfc9a13f9
-
-diff -uprN a/gcc/ipa-fnsummary.c b/gcc/ipa-fnsummary.c
---- a/gcc/ipa-fnsummary.c
-+++ b/gcc/ipa-fnsummary.c
-@@ -2078,8 +2078,8 @@ analyze_function_body (struct cgraph_node *node, bool early)
- 
-       fix_builtin_expect_stmt = find_foldable_builtin_expect (bb);
- 
--      for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
--	   gsi_next (&bsi))
-+      for (gimple_stmt_iterator bsi = gsi_start_nondebug_bb (bb);
-+	   !gsi_end_p (bsi); gsi_next_nondebug (&bsi))
- 	{
- 	  gimple *stmt = gsi_stmt (bsi);
- 	  int this_size = estimate_num_insns (stmt, &eni_size_weights);
diff --git a/speed-up-DDG-analysis-and-fix-bootstrap-compare-debug.patch b/speed-up-DDG-analysis-and-fix-bootstrap-compare-debug.patch
deleted file mode 100644
index da7b905a86f3158fcd44de855b8d15284d69433f..0000000000000000000000000000000000000000
--- a/speed-up-DDG-analysis-and-fix-bootstrap-compare-debug.patch
+++ /dev/null
@@ -1,718 +0,0 @@
-This backport contains 2 patchs from gcc main stream tree.
-The commit id of these patchs list as following in the order of time.
-
-728c2e5eeaa91cf708f2b1b1f996653a7eebae59
-0001-modulo-sched-speed-up-DDG-analysis-PR90001.patch
-
-06d5d63d9944691bb4286e5f6b2422cc97148336
-0001-modulo-sched-fix-bootstrap-compare-debug-issue.patch
-
-diff -Nurp a/gcc/ddg.c b/gcc/ddg.c
---- a/gcc/ddg.c	2020-11-28 18:40:12.371633230 +0800
-+++ b/gcc/ddg.c	2020-11-28 18:38:33.835633230 +0800
-@@ -32,9 +32,6 @@ along with GCC; see the file COPYING3.
- 
- #ifdef INSN_SCHEDULING
- 
--/* A flag indicating that a ddg edge belongs to an SCC or not.  */
--enum edge_flag {NOT_IN_SCC = 0, IN_SCC};
--
- /* Forward declarations.  */
- static void add_backarc_to_ddg (ddg_ptr, ddg_edge_ptr);
- static void add_backarc_to_scc (ddg_scc_ptr, ddg_edge_ptr);
-@@ -188,9 +185,6 @@ create_ddg_dep_from_intra_loop_link (ddg
-   else if (DEP_TYPE (link) == REG_DEP_OUTPUT)
-     t = OUTPUT_DEP;
- 
--  gcc_assert (!DEBUG_INSN_P (dest_node->insn) || t == ANTI_DEP);
--  gcc_assert (!DEBUG_INSN_P (src_node->insn) || t == ANTI_DEP);
--
-   /* We currently choose not to create certain anti-deps edges and
-      compensate for that by generating reg-moves based on the life-range
-      analysis.  The anti-deps that will be deleted are the ones which
-@@ -225,9 +219,9 @@ create_ddg_dep_from_intra_loop_link (ddg
-         }
-     }
- 
--   latency = dep_cost (link);
--   e = create_ddg_edge (src_node, dest_node, t, dt, latency, distance);
--   add_edge_to_ddg (g, e);
-+  latency = dep_cost (link);
-+  e = create_ddg_edge (src_node, dest_node, t, dt, latency, distance);
-+  add_edge_to_ddg (g, e);
- }
- 
- /* The same as the above function, but it doesn't require a link parameter.  */
-@@ -240,9 +234,6 @@ create_ddg_dep_no_link (ddg_ptr g, ddg_n
-   enum reg_note dep_kind;
-   struct _dep _dep, *dep = &_dep;
- 
--  gcc_assert (!DEBUG_INSN_P (to->insn) || d_t == ANTI_DEP);
--  gcc_assert (!DEBUG_INSN_P (from->insn) || d_t == ANTI_DEP);
--
-   if (d_t == ANTI_DEP)
-     dep_kind = REG_DEP_ANTI;
-   else if (d_t == OUTPUT_DEP)
-@@ -275,16 +266,15 @@ create_ddg_dep_no_link (ddg_ptr g, ddg_n
- static void
- add_cross_iteration_register_deps (ddg_ptr g, df_ref last_def)
- {
--  int regno = DF_REF_REGNO (last_def);
-   struct df_link *r_use;
-   int has_use_in_bb_p = false;
--  rtx_insn *def_insn = DF_REF_INSN (last_def);
--  ddg_node_ptr last_def_node = get_node_of_insn (g, def_insn);
--  ddg_node_ptr use_node;
-+  int regno = DF_REF_REGNO (last_def);
-+  ddg_node_ptr last_def_node = get_node_of_insn (g, DF_REF_INSN (last_def));
-   df_ref first_def = df_bb_regno_first_def_find (g->bb, regno);
-+  ddg_node_ptr first_def_node = get_node_of_insn (g, DF_REF_INSN (first_def));
-+  ddg_node_ptr use_node;
- 
--  gcc_assert (last_def_node);
--  gcc_assert (first_def);
-+  gcc_assert (last_def_node && first_def && first_def_node);
- 
-   if (flag_checking && DF_REF_ID (last_def) != DF_REF_ID (first_def))
-     {
-@@ -303,6 +293,9 @@ add_cross_iteration_register_deps (ddg_p
- 
-       rtx_insn *use_insn = DF_REF_INSN (r_use->ref);
- 
-+      if (DEBUG_INSN_P (use_insn))
-+	continue;
-+
-       /* ??? Do not handle uses with DF_REF_IN_NOTE notes.  */
-       use_node = get_node_of_insn (g, use_insn);
-       gcc_assert (use_node);
-@@ -313,35 +306,28 @@ add_cross_iteration_register_deps (ddg_p
- 	     iteration.  Any such upwards exposed use appears before
- 	     the last_def def.  */
- 	  create_ddg_dep_no_link (g, last_def_node, use_node,
--				  DEBUG_INSN_P (use_insn) ? ANTI_DEP : TRUE_DEP,
--				  REG_DEP, 1);
-+				  TRUE_DEP, REG_DEP, 1);
- 	}
--      else if (!DEBUG_INSN_P (use_insn))
-+      else
- 	{
- 	  /* Add anti deps from last_def's uses in the current iteration
- 	     to the first def in the next iteration.  We do not add ANTI
- 	     dep when there is an intra-loop TRUE dep in the opposite
- 	     direction, but use regmoves to fix such disregarded ANTI
- 	     deps when broken.	If the first_def reaches the USE then
--	     there is such a dep.  */
--	  ddg_node_ptr first_def_node = get_node_of_insn (g,
--							  DF_REF_INSN (first_def));
--
--	  gcc_assert (first_def_node);
--
--         /* Always create the edge if the use node is a branch in
--            order to prevent the creation of reg-moves.  
--            If the address that is being auto-inc or auto-dec in LAST_DEF
--            is used in USE_INSN then do not remove the edge to make sure
--            reg-moves will not be created for that address.  */
--          if (DF_REF_ID (last_def) != DF_REF_ID (first_def)
--              || !flag_modulo_sched_allow_regmoves
-+	     there is such a dep.
-+	     Always create the edge if the use node is a branch in
-+	     order to prevent the creation of reg-moves.
-+	     If the address that is being auto-inc or auto-dec in LAST_DEF
-+	     is used in USE_INSN then do not remove the edge to make sure
-+	     reg-moves will not be created for that address.  */
-+	  if (DF_REF_ID (last_def) != DF_REF_ID (first_def)
-+	      || !flag_modulo_sched_allow_regmoves
- 	      || JUMP_P (use_node->insn)
--              || autoinc_var_is_used_p (DF_REF_INSN (last_def), use_insn)
-+	      || autoinc_var_is_used_p (DF_REF_INSN (last_def), use_insn)
- 	      || def_has_ccmode_p (DF_REF_INSN (last_def)))
--            create_ddg_dep_no_link (g, use_node, first_def_node, ANTI_DEP,
--                                    REG_DEP, 1);
--
-+	    create_ddg_dep_no_link (g, use_node, first_def_node, ANTI_DEP,
-+				    REG_DEP, 1);
- 	}
-     }
-   /* Create an inter-loop output dependence between LAST_DEF (which is the
-@@ -351,19 +337,11 @@ add_cross_iteration_register_deps (ddg_p
-      defs starting with a true dependence to a use which can be in the
-      next iteration; followed by an anti dependence of that use to the
-      first def (i.e. if there is a use between the two defs.)  */
--  if (!has_use_in_bb_p)
--    {
--      ddg_node_ptr dest_node;
--
--      if (DF_REF_ID (last_def) == DF_REF_ID (first_def))
--	return;
--
--      dest_node = get_node_of_insn (g, DF_REF_INSN (first_def));
--      gcc_assert (dest_node);
--      create_ddg_dep_no_link (g, last_def_node, dest_node,
--			      OUTPUT_DEP, REG_DEP, 1);
--    }
-+  if (!has_use_in_bb_p && DF_REF_ID (last_def) != DF_REF_ID (first_def))
-+    create_ddg_dep_no_link (g, last_def_node, first_def_node,
-+			    OUTPUT_DEP, REG_DEP, 1);
- }
-+
- /* Build inter-loop dependencies, by looking at DF analysis backwards.  */
- static void
- build_inter_loop_deps (ddg_ptr g)
-@@ -420,13 +398,9 @@ add_intra_loop_mem_dep (ddg_ptr g, ddg_n
-   if (mem_write_insn_p (from->insn))
-     {
-       if (mem_read_insn_p (to->insn))
--	create_ddg_dep_no_link (g, from, to,
--				DEBUG_INSN_P (to->insn)
--				? ANTI_DEP : TRUE_DEP, MEM_DEP, 0);
-+	create_ddg_dep_no_link (g, from, to, TRUE_DEP, MEM_DEP, 0);
-       else
--	create_ddg_dep_no_link (g, from, to,
--				DEBUG_INSN_P (to->insn)
--				? ANTI_DEP : OUTPUT_DEP, MEM_DEP, 0);
-+	create_ddg_dep_no_link (g, from, to, OUTPUT_DEP, MEM_DEP, 0);
-     }
-   else if (!mem_read_insn_p (to->insn))
-     create_ddg_dep_no_link (g, from, to, ANTI_DEP, MEM_DEP, 0);
-@@ -444,13 +418,9 @@ add_inter_loop_mem_dep (ddg_ptr g, ddg_n
-   if (mem_write_insn_p (from->insn))
-     {
-       if (mem_read_insn_p (to->insn))
--  	create_ddg_dep_no_link (g, from, to,
--				DEBUG_INSN_P (to->insn)
--				? ANTI_DEP : TRUE_DEP, MEM_DEP, 1);
-+	create_ddg_dep_no_link (g, from, to, TRUE_DEP, MEM_DEP, 1);
-       else if (from->cuid != to->cuid)
--  	create_ddg_dep_no_link (g, from, to,
--				DEBUG_INSN_P (to->insn)
--				? ANTI_DEP : OUTPUT_DEP, MEM_DEP, 1);
-+	create_ddg_dep_no_link (g, from, to, OUTPUT_DEP, MEM_DEP, 1);
-     }
-   else
-     {
-@@ -459,13 +429,9 @@ add_inter_loop_mem_dep (ddg_ptr g, ddg_n
-       else if (from->cuid != to->cuid)
- 	{
- 	  create_ddg_dep_no_link (g, from, to, ANTI_DEP, MEM_DEP, 1);
--	  if (DEBUG_INSN_P (from->insn) || DEBUG_INSN_P (to->insn))
--	    create_ddg_dep_no_link (g, to, from, ANTI_DEP, MEM_DEP, 1);
--	  else
--	    create_ddg_dep_no_link (g, to, from, TRUE_DEP, MEM_DEP, 1);
-+	  create_ddg_dep_no_link (g, to, from, TRUE_DEP, MEM_DEP, 1);
- 	}
-     }
--
- }
- 
- /* Perform intra-block Data Dependency analysis and connect the nodes in
-@@ -494,20 +460,10 @@ build_intra_loop_deps (ddg_ptr g)
-       sd_iterator_def sd_it;
-       dep_t dep;
- 
--      if (! INSN_P (dest_node->insn))
--	continue;
--
-       FOR_EACH_DEP (dest_node->insn, SD_LIST_BACK, sd_it, dep)
- 	{
- 	  rtx_insn *src_insn = DEP_PRO (dep);
--	  ddg_node_ptr src_node;
--
--	  /* Don't add dependencies on debug insns to non-debug insns
--	     to avoid codegen differences between -g and -g0.  */
--	  if (DEBUG_INSN_P (src_insn) && !DEBUG_INSN_P (dest_node->insn))
--	    continue;
--
--	  src_node = get_node_of_insn (g, src_insn);
-+	  ddg_node_ptr src_node = get_node_of_insn (g, src_insn);
- 
- 	  if (!src_node)
- 	    continue;
-@@ -524,8 +480,7 @@ build_intra_loop_deps (ddg_ptr g)
- 	  for (j = 0; j <= i; j++)
- 	    {
- 	      ddg_node_ptr j_node = &g->nodes[j];
--	      if (DEBUG_INSN_P (j_node->insn))
--		continue;
-+
- 	      if (mem_access_insn_p (j_node->insn))
- 		{
- 		  /* Don't bother calculating inter-loop dep if an intra-loop dep
-@@ -564,7 +519,7 @@ create_ddg (basic_block bb, int closing_
- {
-   ddg_ptr g;
-   rtx_insn *insn, *first_note;
--  int i;
-+  int i, j;
-   int num_nodes = 0;
- 
-   g = (ddg_ptr) xcalloc (1, sizeof (struct ddg));
-@@ -576,23 +531,21 @@ create_ddg (basic_block bb, int closing_
-   for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
-        insn = NEXT_INSN (insn))
-     {
--      if (! INSN_P (insn) || GET_CODE (PATTERN (insn)) == USE)
-+      if (!INSN_P (insn) || GET_CODE (PATTERN (insn)) == USE)
- 	continue;
- 
--      if (DEBUG_INSN_P (insn))
--	g->num_debug++;
--      else
-+      if (NONDEBUG_INSN_P (insn))
- 	{
- 	  if (mem_read_insn_p (insn))
- 	    g->num_loads++;
- 	  if (mem_write_insn_p (insn))
- 	    g->num_stores++;
-+	  num_nodes++;
- 	}
--      num_nodes++;
-     }
- 
-   /* There is nothing to do for this BB.  */
--  if ((num_nodes - g->num_debug) <= 1)
-+  if (num_nodes <= 1)
-     {
-       free (g);
-       return NULL;
-@@ -607,32 +560,39 @@ create_ddg (basic_block bb, int closing_
-   for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
-        insn = NEXT_INSN (insn))
-     {
--      if (! INSN_P (insn))
--	{
--	  if (! first_note && NOTE_P (insn)
--	      && NOTE_KIND (insn) !=  NOTE_INSN_BASIC_BLOCK)
--	    first_note = insn;
--	  continue;
--	}
-+      if (LABEL_P (insn) || NOTE_INSN_BASIC_BLOCK_P (insn))
-+	continue;
-+
-+      if (!first_note && (INSN_P (insn) || NOTE_P (insn)))
-+	first_note = insn;
-+
-+      if (!INSN_P (insn) || GET_CODE (PATTERN (insn)) == USE)
-+	continue;
-+
-       if (JUMP_P (insn))
- 	{
- 	  gcc_assert (!g->closing_branch);
- 	  g->closing_branch = &g->nodes[i];
- 	}
--      else if (GET_CODE (PATTERN (insn)) == USE)
-+
-+      if (NONDEBUG_INSN_P (insn))
- 	{
--	  if (! first_note)
--	    first_note = insn;
--	  continue;
--	}
-+	  g->nodes[i].cuid = i;
-+	  g->nodes[i].successors = sbitmap_alloc (num_nodes);
-+	  bitmap_clear (g->nodes[i].successors);
-+	  g->nodes[i].predecessors = sbitmap_alloc (num_nodes);
-+	  bitmap_clear (g->nodes[i].predecessors);
-+
-+	  gcc_checking_assert (first_note);
-+	  g->nodes[i].first_note = first_note;
-+
-+	  g->nodes[i].aux.count = -1;
-+	  g->nodes[i].max_dist = XCNEWVEC (int, num_nodes);
-+	  for (j = 0; j < num_nodes; j++)
-+	    g->nodes[i].max_dist[j] = -1;
- 
--      g->nodes[i].cuid = i;
--      g->nodes[i].successors = sbitmap_alloc (num_nodes);
--      bitmap_clear (g->nodes[i].successors);
--      g->nodes[i].predecessors = sbitmap_alloc (num_nodes);
--      bitmap_clear (g->nodes[i].predecessors);
--      g->nodes[i].first_note = (first_note ? first_note : insn);
--      g->nodes[i++].insn = insn;
-+	  g->nodes[i++].insn = insn;
-+	}
-       first_note = NULL;
-     }
- 
-@@ -668,6 +628,7 @@ free_ddg (ddg_ptr g)
- 	}
-       sbitmap_free (g->nodes[i].successors);
-       sbitmap_free (g->nodes[i].predecessors);
-+      free (g->nodes[i].max_dist);
-     }
-   if (g->num_backarcs > 0)
-     free (g->backarcs);
-@@ -792,7 +753,7 @@ create_ddg_edge (ddg_node_ptr src, ddg_n
-   e->latency = l;
-   e->distance = d;
-   e->next_in = e->next_out = NULL;
--  e->aux.info = 0;
-+  e->in_scc = false;
-   return e;
- }
- 
-@@ -820,7 +781,7 @@ add_edge_to_ddg (ddg_ptr g ATTRIBUTE_UNU
-    for now that cycles in the data dependence graph contain a single backarc.
-    This simplifies the algorithm, and can be generalized later.  */
- static void
--set_recurrence_length (ddg_scc_ptr scc, ddg_ptr g)
-+set_recurrence_length (ddg_scc_ptr scc)
- {
-   int j;
-   int result = -1;
-@@ -828,17 +789,14 @@ set_recurrence_length (ddg_scc_ptr scc,
-   for (j = 0; j < scc->num_backarcs; j++)
-     {
-       ddg_edge_ptr backarc = scc->backarcs[j];
--      int length;
-       int distance = backarc->distance;
-       ddg_node_ptr src = backarc->dest;
-       ddg_node_ptr dest = backarc->src;
-+      int length = src->max_dist[dest->cuid];
-+
-+      if (length < 0)
-+        continue;
- 
--      length = longest_simple_path (g, src->cuid, dest->cuid, scc->nodes);
--      if (length < 0 )
--	{
--	  /* fprintf (stderr, "Backarc not on simple cycle in SCC.\n"); */
--	  continue;
--	}
-       length += backarc->latency;
-       result = MAX (result, (length / distance));
-     }
-@@ -846,9 +804,9 @@ set_recurrence_length (ddg_scc_ptr scc,
- }
- 
- /* Create a new SCC given the set of its nodes.  Compute its recurrence_length
--   and mark edges that belong to this scc as IN_SCC.  */
-+   and mark edges that belong to this scc.  */
- static ddg_scc_ptr
--create_scc (ddg_ptr g, sbitmap nodes)
-+create_scc (ddg_ptr g, sbitmap nodes, int id)
- {
-   ddg_scc_ptr scc;
-   unsigned int u = 0;
-@@ -866,16 +824,18 @@ create_scc (ddg_ptr g, sbitmap nodes)
-       ddg_edge_ptr e;
-       ddg_node_ptr n = &g->nodes[u];
- 
-+      gcc_assert (n->aux.count == -1);
-+      n->aux.count = id;
-+
-       for (e = n->out; e; e = e->next_out)
- 	if (bitmap_bit_p (nodes, e->dest->cuid))
- 	  {
--	    e->aux.count = IN_SCC;
-+	    e->in_scc = true;
- 	    if (e->distance > 0)
- 	      add_backarc_to_scc (scc, e);
- 	  }
-     }
- 
--  set_recurrence_length (scc, g);
-   return scc;
- }
- 
-@@ -1018,7 +978,7 @@ check_sccs (ddg_all_sccs_ptr sccs, int n
- ddg_all_sccs_ptr
- create_ddg_all_sccs (ddg_ptr g)
- {
--  int i;
-+  int i, j, k, scc, way;
-   int num_nodes = g->num_nodes;
-   auto_sbitmap from (num_nodes);
-   auto_sbitmap to (num_nodes);
-@@ -1038,7 +998,7 @@ create_ddg_all_sccs (ddg_ptr g)
-       ddg_node_ptr dest = backarc->dest;
- 
-       /* If the backarc already belongs to an SCC, continue.  */
--      if (backarc->aux.count == IN_SCC)
-+      if (backarc->in_scc)
- 	continue;
- 
-       bitmap_clear (scc_nodes);
-@@ -1049,10 +1009,52 @@ create_ddg_all_sccs (ddg_ptr g)
- 
-       if (find_nodes_on_paths (scc_nodes, g, from, to))
- 	{
--	  scc = create_scc (g, scc_nodes);
-+	  scc = create_scc (g, scc_nodes, sccs->num_sccs);
- 	  add_scc_to_ddg (sccs, scc);
- 	}
-     }
-+
-+  /* Init max_dist arrays for Floyd–Warshall-like
-+     longest patch calculation algorithm.  */
-+  for (k = 0; k < num_nodes; k++)
-+    {
-+      ddg_edge_ptr e;
-+      ddg_node_ptr n = &g->nodes[k];
-+
-+      if (n->aux.count == -1)
-+        continue;
-+
-+      n->max_dist[k] = 0;
-+      for (e = n->out; e; e = e->next_out)
-+        if (e->distance == 0 && g->nodes[e->dest->cuid].aux.count == n->aux.count)
-+          n->max_dist[e->dest->cuid] = e->latency;
-+    }
-+
-+  /* Run main Floid-Warshall loop.  We use only non-backarc edges
-+     inside each scc.  */
-+  for (k = 0; k < num_nodes; k++)
-+    {
-+      scc = g->nodes[k].aux.count;
-+      if (scc != -1)
-+        {
-+          for (i = 0; i < num_nodes; i++)
-+            if (g->nodes[i].aux.count == scc)
-+              for (j = 0; j < num_nodes; j++)
-+                if (g->nodes[j].aux.count == scc
-+                    && g->nodes[i].max_dist[k] >= 0
-+                    && g->nodes[k].max_dist[j] >= 0)
-+                  {
-+                    way = g->nodes[i].max_dist[k] + g->nodes[k].max_dist[j];
-+                    if (g->nodes[i].max_dist[j] < way)
-+                      g->nodes[i].max_dist[j] = way;
-+                  }
-+        }
-+    }
-+
-+  /* Calculate recurrence_length using max_dist info.  */
-+  for (i = 0; i < sccs->num_sccs; i++)
-+    set_recurrence_length (sccs->sccs[i]);
-+
-   order_sccs (sccs);
- 
-   if (flag_checking)
-@@ -1155,72 +1157,4 @@ find_nodes_on_paths (sbitmap result, ddg
-   return bitmap_and (result, reachable_from, reach_to);
- }
- 
--
--/* Updates the counts of U_NODE's successors (that belong to NODES) to be
--   at-least as large as the count of U_NODE plus the latency between them.
--   Sets a bit in TMP for each successor whose count was changed (increased).
--   Returns nonzero if any count was changed.  */
--static int
--update_dist_to_successors (ddg_node_ptr u_node, sbitmap nodes, sbitmap tmp)
--{
--  ddg_edge_ptr e;
--  int result = 0;
--
--  for (e = u_node->out; e; e = e->next_out)
--    {
--      ddg_node_ptr v_node = e->dest;
--      int v = v_node->cuid;
--
--      if (bitmap_bit_p (nodes, v)
--	  && (e->distance == 0)
--	  && (v_node->aux.count < u_node->aux.count + e->latency))
--	{
--	  v_node->aux.count = u_node->aux.count + e->latency;
--	  bitmap_set_bit (tmp, v);
--	  result = 1;
--	}
--    }
--  return result;
--}
--
--
--/* Find the length of a longest path from SRC to DEST in G,
--   going only through NODES, and disregarding backarcs.  */
--int
--longest_simple_path (struct ddg * g, int src, int dest, sbitmap nodes)
--{
--  int i;
--  unsigned int u = 0;
--  int change = 1;
--  int num_nodes = g->num_nodes;
--  auto_sbitmap workset (num_nodes);
--  auto_sbitmap tmp (num_nodes);
--
--
--  /* Data will hold the distance of the longest path found so far from
--     src to each node.  Initialize to -1 = less than minimum.  */
--  for (i = 0; i < g->num_nodes; i++)
--    g->nodes[i].aux.count = -1;
--  g->nodes[src].aux.count = 0;
--
--  bitmap_clear (tmp);
--  bitmap_set_bit (tmp, src);
--
--  while (change)
--    {
--      sbitmap_iterator sbi;
--
--      change = 0;
--      bitmap_copy (workset, tmp);
--      bitmap_clear (tmp);
--      EXECUTE_IF_SET_IN_BITMAP (workset, 0, u, sbi)
--	{
--	  ddg_node_ptr u_node = &g->nodes[u];
--
--	  change |= update_dist_to_successors (u_node, nodes, tmp);
--	}
--    }
--  return g->nodes[dest].aux.count;
--}
--
- #endif /* INSN_SCHEDULING */
-diff -Nurp a/gcc/ddg.h b/gcc/ddg.h
---- a/gcc/ddg.h	2020-03-12 19:07:21.000000000 +0800
-+++ b/gcc/ddg.h	2020-11-28 18:38:33.835633230 +0800
-@@ -64,6 +64,10 @@ struct ddg_node
-   sbitmap successors;
-   sbitmap predecessors;
- 
-+  /* Temporary array used for Floyd-Warshall algorithm to find
-+     scc recurrence length.  */
-+  int *max_dist;
-+
-   /* For general use by algorithms manipulating the ddg.  */
-   union {
-     int count;
-@@ -95,11 +99,8 @@ struct ddg_edge
-   ddg_edge_ptr next_in;
-   ddg_edge_ptr next_out;
- 
--  /* For general use by algorithms manipulating the ddg.  */
--  union {
--    int count;
--    void *info;
--  } aux;
-+  /* Is true when edge is already in scc.  */
-+  bool in_scc;
- };
- 
- /* This structure holds the Data Dependence Graph for a basic block.  */
-@@ -115,9 +116,6 @@ struct ddg
-   int num_loads;
-   int num_stores;
- 
--  /* Number of debug instructions in the BB.  */
--  int num_debug;
--
-   /* This array holds the nodes in the graph; it is indexed by the node
-      cuid, which follows the order of the instructions in the BB.  */
-   ddg_node_ptr nodes;
-@@ -178,7 +176,6 @@ ddg_all_sccs_ptr create_ddg_all_sccs (dd
- void free_ddg_all_sccs (ddg_all_sccs_ptr);
- 
- int find_nodes_on_paths (sbitmap result, ddg_ptr, sbitmap from, sbitmap to);
--int longest_simple_path (ddg_ptr, int from, int to, sbitmap via);
- 
- bool autoinc_var_is_used_p (rtx_insn *, rtx_insn *);
- 
-diff -Nurp a/gcc/modulo-sched.c b/gcc/modulo-sched.c
---- a/gcc/modulo-sched.c	2020-03-12 19:07:21.000000000 +0800
-+++ b/gcc/modulo-sched.c	2020-11-28 18:38:33.835633230 +0800
-@@ -370,7 +370,7 @@ doloop_register_get (rtx_insn *head, rtx
-                              : prev_nondebug_insn (tail));
- 
-   for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn))
--    if (!DEBUG_INSN_P (insn) && reg_mentioned_p (reg, insn))
-+    if (NONDEBUG_INSN_P (insn) && reg_mentioned_p (reg, insn))
-       {
-         if (dump_file)
-         {
-@@ -429,7 +429,7 @@ res_MII (ddg_ptr g)
-   if (targetm.sched.sms_res_mii)
-     return targetm.sched.sms_res_mii (g);
- 
--  return ((g->num_nodes - g->num_debug) / issue_rate);
-+  return g->num_nodes / issue_rate;
- }
- 
- 
-@@ -2156,11 +2156,7 @@ sms_schedule_by_order (ddg_ptr g, int mi
-   	  ddg_node_ptr u_node = &ps->g->nodes[u];
- 	  rtx_insn *insn = u_node->insn;
- 
--	  if (!NONDEBUG_INSN_P (insn))
--	    {
--	      bitmap_clear_bit (tobe_scheduled, u);
--	      continue;
--	    }
-+	  gcc_checking_assert (NONDEBUG_INSN_P (insn));
- 
- 	  if (bitmap_bit_p (sched_nodes, u))
- 	    continue;
-@@ -3162,9 +3158,6 @@ ps_has_conflicts (partial_schedule_ptr p
- 	{
- 	  rtx_insn *insn = ps_rtl_insn (ps, crr_insn->id);
- 
--	  if (!NONDEBUG_INSN_P (insn))
--	    continue;
--
- 	  /* Check if there is room for the current insn.  */
- 	  if (!can_issue_more || state_dead_lock_p (curr_state))
- 	    return true;
-diff -Nurp a/gcc/testsuite/gcc.c-torture/execute/pr70127-debug-sms.c b/gcc/testsuite/gcc.c-torture/execute/pr70127-debug-sms.c
---- a/gcc/testsuite/gcc.c-torture/execute/pr70127-debug-sms.c	1970-01-01 08:00:00.000000000 +0800
-+++ b/gcc/testsuite/gcc.c-torture/execute/pr70127-debug-sms.c	2020-11-28 18:38:33.835633230 +0800
-@@ -0,0 +1,23 @@
-+/* { dg-additional-options "-fcompare-debug -fmodulo-sched" } */
-+
-+struct S { int f; signed int g : 2; } a[1], c = {5, 1}, d;
-+short b;
-+
-+__attribute__((noinline, noclone)) void
-+foo (int x)
-+{
-+  if (x != 1)
-+    __builtin_abort ();
-+}
-+
-+int
-+main ()
-+{
-+  while (b++ <= 0)
-+    {
-+      struct S e = {1, 1};
-+      d = e = a[0] = c;
-+    }
-+  foo (a[0].g);
-+  return 0;
-+}
-diff -Nurp a/gcc/testsuite/gcc.dg/torture/pr87197-debug-sms.c b/gcc/testsuite/gcc.dg/torture/pr87197-debug-sms.c
---- a/gcc/testsuite/gcc.dg/torture/pr87197-debug-sms.c	1970-01-01 08:00:00.000000000 +0800
-+++ b/gcc/testsuite/gcc.dg/torture/pr87197-debug-sms.c	2020-11-28 18:38:33.835633230 +0800
-@@ -0,0 +1,36 @@
-+/* { dg-do compile } */
-+/* { dg-additional-options "-fcompare-debug -fmodulo-sched --param sms-min-sc=1" } */
-+
-+int a, c, e, f, g;
-+void
-+h (int i)
-+{
-+  a = i;
-+}
-+void
-+j (char *i, long k)
-+{
-+  while (k--)
-+    c = *i++;
-+}
-+void
-+l (unsigned char *i, long k)
-+{
-+  unsigned char *b = i + k;
-+  while (i < b)
-+    {
-+      h (*i);
-+      i++;
-+    }
-+}
-+void
-+m ()
-+{
-+  while (e)
-+    {
-+      float d = g;
-+      l ((char *) &d, sizeof (g));
-+      if (f)
-+	j ((char *) &d, sizeof (g));
-+    }
-+}
diff --git a/store-merging-Consider-also-overlapping-stores-earlier.patch b/store-merging-Consider-also-overlapping-stores-earlier.patch
deleted file mode 100644
index 15dd0d1ae59f51dd7a10252e08997587e9b5cac5..0000000000000000000000000000000000000000
--- a/store-merging-Consider-also-overlapping-stores-earlier.patch
+++ /dev/null
@@ -1,359 +0,0 @@
-This backport contains 1 patch from gcc main stream tree.
-The commit id of these patchs list as following in the order of time.
-
-0001-store-merging-Consider-also-overlapping-stores-earli.patch
-bd909071ac04e94f4b6f0baab64d0687ec55681d
-
-diff -uprN a/gcc/gimple-ssa-store-merging.c b/gcc/gimple-ssa-store-merging.c
---- a/gcc/gimple-ssa-store-merging.c	2020-12-16 17:03:16.155633230 +0800
-+++ b/gcc/gimple-ssa-store-merging.c	2020-12-16 11:15:58.575633230 +0800
-@@ -2021,7 +2021,8 @@ struct imm_store_chain_info
-       }
-   }
-   bool terminate_and_process_chain ();
--  bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int);
-+  bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int,
-+			   unsigned int);
-   bool coalesce_immediate_stores ();
-   bool output_merged_store (merged_store_group *);
-   bool output_merged_stores ();
-@@ -2342,14 +2343,39 @@ gather_bswap_load_refs (vec *refs,
-    into the group.  That way it will be its own store group and will
-    not be touched.  If ALL_INTEGER_CST_P and there are overlapping
-    INTEGER_CST stores, those are mergeable using merge_overlapping,
--   so don't return false for those.  */
-+   so don't return false for those.
-+
-+   Similarly, check stores from FIRST_EARLIER (inclusive) to END_EARLIER
-+   (exclusive), whether they don't overlap the bitrange START to END
-+   and have order in between FIRST_ORDER and LAST_ORDER.  This is to
-+   prevent merging in cases like:
-+     MEM  [&b + 8B] = {};
-+     MEM[(short *) &b] = 5;
-+     _5 = *x_4(D);
-+     MEM  [&b + 2B] = _5;
-+     MEM[(char *)&b + 16B] = 88;
-+     MEM[(int *)&b + 20B] = 1;
-+   The = {} store comes in sort_by_bitpos before the = 88 store, and can't
-+   be merged with it, because the = _5 store overlaps these and is in between
-+   them in sort_by_order ordering.  If it was merged, the merged store would
-+   go after the = _5 store and thus change behavior.  */
- 
- static bool
- check_no_overlap (vec m_store_info, unsigned int i,
--		  bool all_integer_cst_p, unsigned int last_order,
--		  unsigned HOST_WIDE_INT end)
-+		  bool all_integer_cst_p, unsigned int first_order,
-+		  unsigned int last_order, unsigned HOST_WIDE_INT start,
-+		  unsigned HOST_WIDE_INT end, unsigned int first_earlier,
-+		  unsigned end_earlier)
- {
-   unsigned int len = m_store_info.length ();
-+  for (unsigned int j = first_earlier; j < end_earlier; j++)
-+    {
-+      store_immediate_info *info = m_store_info[j];
-+      if (info->order > first_order
-+	  && info->order < last_order
-+	  && info->bitpos + info->bitsize > start)
-+	return false;
-+    }
-   for (++i; i < len; ++i)
-     {
-       store_immediate_info *info = m_store_info[i];
-@@ -2370,7 +2396,8 @@ check_no_overlap (vecbitsize;
-@@ -2509,7 +2536,8 @@ imm_store_chain_info::try_coalesce_bswap
-   if (n.base_addr == NULL_TREE && !is_gimple_val (n.src))
-     return false;
- 
--  if (!check_no_overlap (m_store_info, last, false, last_order, end))
-+  if (!check_no_overlap (m_store_info, last, false, first_order, last_order,
-+			 merged_store->start, end, first_earlier, first))
-     return false;
- 
-   /* Don't handle memory copy this way if normal non-bswap processing
-@@ -2601,6 +2629,8 @@ imm_store_chain_info::coalesce_immediate
- 
-   store_immediate_info *info;
-   unsigned int i, ignore = 0;
-+  unsigned int first_earlier = 0;
-+  unsigned int end_earlier = 0;
- 
-   /* Order the stores by the bitposition they write to.  */
-   m_store_info.qsort (sort_by_bitpos);
-@@ -2615,6 +2645,12 @@ imm_store_chain_info::coalesce_immediate
-       if (i <= ignore)
- 	goto done;
- 
-+      while (first_earlier < end_earlier
-+	     && (m_store_info[first_earlier]->bitpos
-+		 + m_store_info[first_earlier]->bitsize
-+		 <= merged_store->start))
-+	first_earlier++;
-+
-       /* First try to handle group of stores like:
- 	 p[0] = data >> 24;
- 	 p[1] = data >> 16;
-@@ -2628,7 +2664,8 @@ imm_store_chain_info::coalesce_immediate
- 	{
- 	  unsigned int try_size;
- 	  for (try_size = 64; try_size >= 16; try_size >>= 1)
--	    if (try_coalesce_bswap (merged_store, i - 1, try_size))
-+	    if (try_coalesce_bswap (merged_store, i - 1, try_size,
-+				    first_earlier))
- 	      break;
- 
- 	  if (try_size >= 16)
-@@ -2636,7 +2673,10 @@ imm_store_chain_info::coalesce_immediate
- 	      ignore = i + merged_store->stores.length () - 1;
- 	      m_merged_store_groups.safe_push (merged_store);
- 	      if (ignore < m_store_info.length ())
--		merged_store = new merged_store_group (m_store_info[ignore]);
-+		{
-+		  merged_store = new merged_store_group (m_store_info[ignore]);
-+		  end_earlier = ignore;
-+		}
- 	      else
- 		merged_store = NULL;
- 	      goto done;
-@@ -2662,12 +2702,16 @@ imm_store_chain_info::coalesce_immediate
- 	  /* Only allow overlapping stores of constants.  */
- 	  if (info->rhs_code == INTEGER_CST && merged_store->only_constants)
- 	    {
-+	      unsigned int first_order
-+		= MIN (merged_store->first_order, info->order);
- 	      unsigned int last_order
- 		= MAX (merged_store->last_order, info->order);
- 	      unsigned HOST_WIDE_INT end
- 		= MAX (merged_store->start + merged_store->width,
- 		       info->bitpos + info->bitsize);
--	      if (check_no_overlap (m_store_info, i, true, last_order, end))
-+	      if (check_no_overlap (m_store_info, i, true, first_order,
-+				    last_order, merged_store->start, end,
-+				    first_earlier, end_earlier))
- 		{
- 		  /* check_no_overlap call above made sure there are no
- 		     overlapping stores with non-INTEGER_CST rhs_code
-@@ -2696,6 +2740,7 @@ imm_store_chain_info::coalesce_immediate
- 		  do
- 		    {
- 		      unsigned int max_order = 0;
-+		      unsigned int min_order = first_order;
- 		      unsigned first_nonmergeable_int_order = ~0U;
- 		      unsigned HOST_WIDE_INT this_end = end;
- 		      k = i;
-@@ -2721,6 +2766,7 @@ imm_store_chain_info::coalesce_immediate
- 				  break;
- 				}
- 			      k = j;
-+			      min_order = MIN (min_order, info2->order);
- 			      this_end = MAX (this_end,
- 					      info2->bitpos + info2->bitsize);
- 			    }
-@@ -2736,6 +2782,12 @@ imm_store_chain_info::coalesce_immediate
- 			    first_nonmergeable_order
- 			      = MIN (first_nonmergeable_order, info2->order);
- 			}
-+		      if (k > i
-+			  && !check_no_overlap (m_store_info, len - 1, true,
-+						min_order, try_order,
-+						merged_store->start, this_end,
-+						first_earlier, end_earlier))
-+			k = 0;
- 		      if (k == 0)
- 			{
- 			  if (last_order == try_order)
-@@ -2821,9 +2873,12 @@ imm_store_chain_info::coalesce_immediate
- 	      info->ops_swapped_p = true;
- 	    }
- 	  if (check_no_overlap (m_store_info, i, false,
-+				MIN (merged_store->first_order, info->order),
- 				MAX (merged_store->last_order, info->order),
-+				merged_store->start,
- 				MAX (merged_store->start + merged_store->width,
--				     info->bitpos + info->bitsize)))
-+				     info->bitpos + info->bitsize),
-+				first_earlier, end_earlier))
- 	    {
- 	      /* Turn MEM_REF into BIT_INSERT_EXPR for bit-field stores.  */
- 	      if (info->rhs_code == MEM_REF && infof->rhs_code != MEM_REF)
-@@ -2868,6 +2923,7 @@ imm_store_chain_info::coalesce_immediate
- 	delete merged_store;
- 
-       merged_store = new merged_store_group (info);
-+      end_earlier = i;
-       if (dump_file && (dump_flags & TDF_DETAILS))
- 	fputs ("New store group\n", dump_file);
- 
-diff -uprN a/gcc/testsuite/gcc.dg/store_merging_31.c b/gcc/testsuite/gcc.dg/store_merging_31.c
---- a/gcc/testsuite/gcc.dg/store_merging_31.c	1970-01-01 08:00:00.000000000 +0800
-+++ b/gcc/testsuite/gcc.dg/store_merging_31.c	2020-12-16 11:15:58.575633230 +0800
-@@ -0,0 +1,27 @@
-+/* PR tree-optimization/97053 */
-+/* { dg-do run } */
-+/* { dg-options "-O2" } */
-+
-+struct S { short a; char b[9]; int c; char d; int e; };
-+
-+__attribute__((noipa)) void
-+foo (char *x, char *y)
-+{
-+  if (__builtin_strcmp (x, "ABCDXXXX") != 0
-+      || __builtin_strcmp (y, "ABCDXXXX") != 0)
-+    __builtin_abort ();
-+}
-+
-+int
-+main ()
-+{
-+  char a[9] = "XXXXXXXX";
-+  struct S b = {};
-+  __builtin_memcpy (a, "ABCD", 4);
-+  b.a = 5;
-+  __builtin_memcpy (b.b, a, 8); 
-+  b.d = 'X';
-+  b.e = 1;
-+  foo (a, b.b);
-+  return 0;
-+}
-diff -uprN a/gcc/testsuite/gcc.dg/store_merging_32.c b/gcc/testsuite/gcc.dg/store_merging_32.c
---- a/gcc/testsuite/gcc.dg/store_merging_32.c	1970-01-01 08:00:00.000000000 +0800
-+++ b/gcc/testsuite/gcc.dg/store_merging_32.c	2020-12-16 11:15:58.575633230 +0800
-@@ -0,0 +1,129 @@
-+/* PR tree-optimization/97053 */
-+/* { dg-do run } */
-+/* { dg-options "-O2 -fno-tree-dse" } */
-+
-+struct __attribute__((packed, may_alias)) S { long long s; };
-+struct __attribute__((packed, may_alias)) T { short t; };
-+
-+__attribute__((noipa)) void
-+test (char *p, char *q, int s)
-+{
-+  if ((s & 1) == 0)
-+    {
-+      if (*(short __attribute__((may_alias)) *) &p[sizeof (short)]
-+	  != *(short __attribute__((may_alias)) *) &q[sizeof (short)]
-+	  || (((struct S __attribute__((may_alias)) *) &p[1])->s
-+	      != ((struct S __attribute__((may_alias)) *) &q[1])->s)
-+	  || (*(short __attribute__((may_alias)) *) &p[2 * sizeof (short)]
-+	      != *(short __attribute__((may_alias)) *) &q[2 * sizeof (short)]))
-+	__builtin_abort ();
-+    }
-+  else
-+    {
-+      if (*(short __attribute__((may_alias)) *) &p[sizeof (short)]
-+	  != *(short __attribute__((may_alias)) *) &q[sizeof (short)]
-+	  || (((struct S __attribute__((may_alias)) *) &p[1])->s
-+	      != ((struct S __attribute__((may_alias)) *) &q[1])->s)
-+	  || (((struct T __attribute__((may_alias)) *) &p[2 * sizeof (short) - 1])->t
-+	      != ((struct T __attribute__((may_alias)) *) &q[2 * sizeof (short) - 1])->t)
-+	  || p[3 * sizeof (short) - 2] != q[3 * sizeof (short) - 2])
-+	__builtin_abort ();
-+    }
-+}
-+
-+__attribute__((noipa)) void
-+foo (long long *p, char *q, char *r, char *s)
-+{
-+  char a[64] __attribute__((aligned (__alignof (short))));
-+  *(short __attribute__((may_alias)) *) &a[sizeof (short)] = 1;
-+  ((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
-+  *(short __attribute__((may_alias)) *) &a[2 * sizeof (short)] = 2;
-+  *(short __attribute__((may_alias)) *) &q[sizeof (short)] = 1;
-+  ((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
-+  *(short __attribute__((may_alias)) *) &s[2 * sizeof (short)] = 2;
-+  test (a, q, 0);
-+}
-+
-+__attribute__((noipa)) void
-+bar (long long *p, char *q, char *r, char *s, char *t)
-+{
-+  char a[64] __attribute__((aligned (__alignof (short))));
-+  *(short __attribute__((may_alias)) *) &a[sizeof (short)] = 1;
-+  ((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
-+  ((struct T __attribute__((may_alias)) *) &a[2 * sizeof (short) - 1])->t = 2;
-+  a[3 * sizeof (short) - 2] = 3;
-+  *(short __attribute__((may_alias)) *) &q[sizeof (short)] = 1;
-+  ((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
-+  ((struct T __attribute__((may_alias)) *) &s[2 * sizeof (short) - 1])->t = 2;
-+  t[3 * sizeof (short) - 2] = 3;
-+  test (a, q, 1);
-+}
-+
-+__attribute__((noipa)) void
-+baz (long long *p, char *q, char *r, char *s)
-+{
-+  char a[64] __attribute__((aligned (__alignof (short))));
-+  *(short __attribute__((may_alias)) *) &a[2 * sizeof (short)] = 2;
-+  ((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
-+  *(short __attribute__((may_alias)) *) &a[sizeof (short)] = 1;
-+  *(short __attribute__((may_alias)) *) &q[2 * sizeof (short)] = 2;
-+  ((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
-+  *(short __attribute__((may_alias)) *) &s[sizeof (short)] = 1;
-+  test (a, q, 2);
-+}
-+
-+__attribute__((noipa)) void
-+qux (long long *p, char *q, char *r, char *s, char *t)
-+{
-+  char a[64] __attribute__((aligned (__alignof (short))));
-+  *(short __attribute__((may_alias)) *) &a[2 * sizeof (short) - 1] = 2;
-+  ((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
-+  a[3 * sizeof (short) - 2] = 3;
-+  *(short __attribute__((may_alias)) *) &a[sizeof (short)] = 1;
-+  ((struct T __attribute__((may_alias)) *) &q[2 * sizeof (short) - 1])->t = 2;
-+  ((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
-+  s[3 * sizeof (short) - 2] = 3;
-+  ((struct T __attribute__((may_alias)) *) &t[sizeof (short)])->t = 1;
-+  test (a, q, 3);
-+}
-+
-+__attribute__((noipa)) void
-+corge (long long *p, char *q, char *r, char *s, short u[3])
-+{
-+  char a[64] __attribute__((aligned (__alignof (short))));
-+  *(short __attribute__((may_alias)) *) &a[2 * sizeof (short)] = u[2];
-+  ((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
-+  *(short __attribute__((may_alias)) *) &a[sizeof (short)] = u[1];
-+  *(short __attribute__((may_alias)) *) &q[2 * sizeof (short)] = u[2];
-+  ((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
-+  *(short __attribute__((may_alias)) *) &s[sizeof (short)] = u[1];
-+  test (a, q, 4);
-+}
-+
-+__attribute__((noipa)) void
-+garply (long long *p, char *q, char *r, char *s, short u[3])
-+{
-+  char a[64] __attribute__((aligned (__alignof (short))));
-+  *(short __attribute__((may_alias)) *) &a[sizeof (short)] = u[1];
-+  ((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
-+  *(short __attribute__((may_alias)) *) &a[2 * sizeof (short)] = u[2];
-+  *(short __attribute__((may_alias)) *) &s[sizeof (short)] = u[1];
-+  ((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
-+  *(short __attribute__((may_alias)) *) &q[2 * sizeof (short)] = u[2];
-+  test (a, q, 6);
-+}
-+
-+int
-+main ()
-+{
-+  char a[64] __attribute__((aligned (__alignof (short))));
-+  long long p = -1LL;
-+  short u[] = { 1, 2, 3 };
-+  foo (&p, &a[0], &a[0], &a[0]);
-+  bar (&p, &a[0], &a[0], &a[0], &a[0]);
-+  baz (&p, &a[0], &a[0], &a[0]);
-+  qux (&p, &a[0], &a[0], &a[0], &a[0]);
-+  corge (&p, &a[0], &a[0], &a[0], u);
-+  garply (&p, &a[0], &a[0], &a[0], u);
-+  return 0;
-+}
diff --git a/testsuite-Fix-pr94185.patch b/testsuite-Fix-pr94185.patch
deleted file mode 100644
index d4d4da218bcb60db3386de6c07521a926dd761e1..0000000000000000000000000000000000000000
--- a/testsuite-Fix-pr94185.patch
+++ /dev/null
@@ -1,18 +0,0 @@
-This backport contains 1 patch from gcc main stream tree.
-The commit id of these patchs list as following in the order of time.
-
-0001-testsuite-Fix-pr94185.C-testcase-on-i686-linux-with-.patch
-994d48620621fa33d32018be5fb70042e38546d5
-
-diff -uprN a/gcc/testsuite/g++.target/i386/pr94185.C b/gcc/testsuite/g++.target/i386/pr94185.C
---- a/gcc/testsuite/g++.target/i386/pr94185.C
-+++ b/gcc/testsuite/g++.target/i386/pr94185.C
-@@ -22,7 +22,7 @@ int d;
- void l(char *, ar m, long n) {
-   switch (m.au[d])
-   case 0:
--    n &= 4294967295;
-+    n &= 4294967295U;
-   bb.h(0).g(n);
- }
- void o() {
diff --git a/tighten-range-for-generating-csel.patch b/tighten-range-for-generating-csel.patch
deleted file mode 100644
index 8e628f8a8ca40cb6a782da2ec66abe6108fc4434..0000000000000000000000000000000000000000
--- a/tighten-range-for-generating-csel.patch
+++ /dev/null
@@ -1,132 +0,0 @@
-This backport contains 1 patch from gcc main stream tree.
-The commit id of these patchs list as following in the order of time.
-
-0001-cselim-Don-t-assume-it-is-safe-to-cstore-replace-a-s.patch
-cf39dccf9284d2fd9f9aa7050760adea110c8d88
-
-diff -uprN a/gcc/testsuite/gcc.c-torture/execute/pr94734.c b/gcc/testsuite/gcc.c-torture/execute/pr94734.c
-new file mode 100644
---- /dev/null
-+++ b/gcc/testsuite/gcc.c-torture/execute/pr94734.c
-@@ -0,0 +1,59 @@
-+/* PR tree-optimization/94734 */
-+
-+__attribute__((noipa)) int
-+foo (int n)
-+{
-+  int arr[16], s = 0;
-+  for (int i = 0; i < n; i++)
-+    {
-+      if (i < 16)
-+	arr[i] = i;
-+    }
-+  for (int i = 0; i < 16; i++)
-+    s += arr[i];
-+  return s;
-+}
-+
-+__attribute__((noipa)) int
-+bar (int n, int x, unsigned long y, unsigned long z)
-+{
-+  int arr[16], s = 0;
-+  arr[4] = 42;
-+  for (int i = 0; i < n; i++)
-+    {
-+      if (x == (i & 0x25))
-+	arr[y] = i;
-+    }
-+  return arr[z];
-+}
-+
-+__attribute__((noipa)) int
-+baz (int n, int x, unsigned long z)
-+{
-+  int arr[16], s = 0;
-+  arr[12] = 42;
-+  for (int i = 0; i < n; i++)
-+    {
-+      if (x == (i & 0x25))
-+	arr[7] = i;
-+    }
-+  return arr[z];
-+}
-+
-+int
-+main ()
-+{
-+  if (foo (10374) != 15 * 16 / 2)
-+    __builtin_abort ();
-+  if (bar (25, 0x25, (unsigned long) 0xdeadbeefbeefdeadULL, 4) != 42)
-+    __builtin_abort ();
-+  if (bar (25, 4, 15, 15) != 22)
-+    __builtin_abort ();
-+  if (baz (25, 0x25, 12) != 42)
-+    __builtin_abort ();
-+  if (baz (25, 4, 7) != 22)
-+    __builtin_abort ();
-+  if (baz (25, 4, 12) != 42)
-+    __builtin_abort ();
-+  return 0;
-+}
-diff -uprN a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c
---- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c
-+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c
-@@ -9,4 +9,4 @@ unsigned test(unsigned k, unsigned b) {
-         return a[0]+a[1];
- }
- 
--/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
-+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */
-diff -uprN a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c
---- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c
-+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c
-@@ -11,4 +11,4 @@ unsigned test(unsigned k, unsigned b) {
-         return a[0]+a[1];
- }
- 
--/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
-+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */
-diff -uprN a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c
---- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c
-+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c
-@@ -13,4 +13,4 @@ int test(int b, int k) {
-     return a.data[0] + a.data[1];
- }
- 
--/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
-+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */
-diff -uprN a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c
---- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c
-+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c
-@@ -16,4 +16,4 @@ int test(int b, int k) {
-     return a.data[0].x + a.data[1].x;
- }
- 
--/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
-+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */
-diff -uprN a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
---- a/gcc/tree-ssa-phiopt.c
-+++ b/gcc/tree-ssa-phiopt.c
-@@ -45,6 +45,7 @@ along with GCC; see the file COPYING3.  If not see
- #include "tree-inline.h"
- #include "params.h"
- #include "case-cfn-macros.h"
-+#include "tree-eh.h"
- 
- static unsigned int tree_ssa_phiopt_worker (bool, bool, bool);
- static bool two_value_replacement (basic_block, basic_block, edge, gphi *,
-@@ -2237,10 +2238,13 @@ cond_store_replacement (basic_block middle_bb, basic_block join_bb,
-      whose value is not available readily, which we want to avoid.  */
-   if (!nontrap->contains (lhs))
-     {
--      /* If LHS is a local variable without address-taken, we could
-+      /* If LHS is an access to a local variable without address-taken
-+	 (or when we allow data races) and known not to trap, we could
- 	 always safely move down the store.  */
-       tree base = get_base_address (lhs);
--      if (!auto_var_p (base) || TREE_ADDRESSABLE (base))
-+      if (!auto_var_p (base)
-+	  || (TREE_ADDRESSABLE (base) && !flag_store_data_races)
-+	  || tree_could_trap_p (lhs))
- 	return false;
-     }
diff --git a/tree-optimization-96920-another-ICE-when-vectorizing.patch b/tree-optimization-96920-another-ICE-when-vectorizing.patch
deleted file mode 100644
index ae6122c2e1aaf8c5b0d25027839d9a48c3d83573..0000000000000000000000000000000000000000
--- a/tree-optimization-96920-another-ICE-when-vectorizing.patch
+++ /dev/null
@@ -1,316 +0,0 @@
-This backport contains 1 patchs from gcc main stream tree.
-The commit id of these patchs list as following in the order of time.
-
-46a58c779af3055a4b10b285a1f4be28abe4351c
-0001-tree-optimization-96920-another-ICE-when-vectorizing.patch
-
-diff -uprN a/gcc/testsuite/gcc.dg/vect/pr96920.c b/gcc/testsuite/gcc.dg/vect/pr96920.c
---- a/gcc/testsuite/gcc.dg/vect/pr96920.c	1970-01-01 08:00:00.000000000 +0800
-+++ b/gcc/testsuite/gcc.dg/vect/pr96920.c	2020-10-26 21:46:25.316000000 +0800
-@@ -0,0 +1,20 @@
-+/* { dg-do compile } */
-+
-+int a[1024];
-+int b[2048];
-+
-+void foo (int x, int y)
-+{
-+  for (int i = 0; i < 1024; ++i)
-+    {
-+      int tem0 = b[2*i];
-+      int tem1 = b[2*i+1];
-+      for (int j = 0; j < 32; ++j)
-+	{
-+	  int tem = tem0;
-+	  tem0 = tem1;
-+	  tem1 = tem;
-+	  a[i] += tem0;
-+	}
-+    }
-+}
-diff -uprN a/gcc/testsuite/gfortran.dg/vect/pr96920.f90 b/gcc/testsuite/gfortran.dg/vect/pr96920.f90
---- a/gcc/testsuite/gfortran.dg/vect/pr96920.f90	1970-01-01 08:00:00.000000000 +0800
-+++ b/gcc/testsuite/gfortran.dg/vect/pr96920.f90	2020-10-26 21:46:25.316000000 +0800
-@@ -0,0 +1,37 @@
-+! { dg-do compile }
-+      subroutine ice(npoint, nterm, x, g)
-+      implicit none
-+      integer    norder
-+      parameter (norder=10)
-+      integer j
-+      integer k
-+      integer ii
-+      integer nterm
-+      integer npoint
-+      real b(norder)
-+      real c(norder)
-+      real d(norder)
-+      real x(npoint)
-+      real g(npoint)
-+      real gg
-+      real prev
-+      real prev2
-+
-+          j = 1
-+    100   continue
-+          j = j+1
-+          if (nterm == j)  then
-+             do ii=1,npoint
-+                k = nterm
-+                gg= d(k)
-+                prev= 0.0
-+                do k=k-1,1,-1
-+                   prev2= prev
-+                   prev= gg
-+                   gg = d(k)+(x(ii)-b(k))*prev-c(k+1)*prev2
-+                enddo
-+                g(ii) = gg
-+             enddo
-+          endif
-+          go to 100
-+      end
-diff -uprN a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
---- a/gcc/tree-vect-loop.c	2020-10-26 21:45:23.056000000 +0800
-+++ b/gcc/tree-vect-loop.c	2020-10-26 21:49:02.884000000 +0800
-@@ -8166,6 +8166,47 @@ scale_profile_for_vect_loop (struct loop
-     scale_bbs_frequencies (&loop->latch, 1, exit_l->probability / prob);
- }
- 
-+/* For a vectorized stmt DEF_STMT_INFO adjust all vectorized PHI
-+   latch edge values originally defined by it.  */
-+
-+static void
-+maybe_set_vectorized_backedge_value (loop_vec_info loop_vinfo,
-+				     stmt_vec_info def_stmt_info)
-+{
-+  tree def = gimple_get_lhs (vect_orig_stmt (def_stmt_info)->stmt);
-+  if (!def || TREE_CODE (def) != SSA_NAME)
-+    return;
-+  stmt_vec_info phi_info;
-+  imm_use_iterator iter;
-+  use_operand_p use_p;
-+  FOR_EACH_IMM_USE_FAST (use_p, iter, def)
-+    if (gphi *phi = dyn_cast  (USE_STMT (use_p)))
-+      if (gimple_bb (phi)->loop_father->header == gimple_bb (phi)
-+	  && (phi_info = loop_vinfo->lookup_stmt (phi))
-+	  && VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (phi_info))
-+	  && STMT_VINFO_REDUC_TYPE (phi_info) != FOLD_LEFT_REDUCTION
-+	  && STMT_VINFO_REDUC_TYPE (phi_info) != EXTRACT_LAST_REDUCTION)
-+	{
-+	  loop_p loop = gimple_bb (phi)->loop_father;
-+	  edge e = loop_latch_edge (loop);
-+	  if (PHI_ARG_DEF_FROM_EDGE (phi, e) == def)
-+	    {
-+	      stmt_vec_info phi_defs = STMT_VINFO_VEC_STMT (phi_info);
-+	      stmt_vec_info latch_defs = STMT_VINFO_VEC_STMT (def_stmt_info);
-+	      while (phi_defs && latch_defs)
-+		{
-+		  add_phi_arg (as_a  (phi_defs->stmt),
-+			       gimple_get_lhs (latch_defs->stmt), e,
-+			       gimple_phi_arg_location (phi, e->dest_idx));
-+		  phi_defs = STMT_VINFO_RELATED_STMT (phi_defs);
-+		  latch_defs = STMT_VINFO_RELATED_STMT (latch_defs);
-+		}
-+	      gcc_assert (!latch_defs);
-+	      gcc_assert (!phi_defs);
-+	    }
-+	}
-+}
-+
- /* Vectorize STMT_INFO if relevant, inserting any new instructions before GSI.
-    When vectorizing STMT_INFO as a store, set *SEEN_STORE to its
-    stmt_vec_info.  */
-@@ -8533,7 +8574,7 @@ vect_transform_loop (loop_vec_info loop_
- 
-       for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
- 	   gsi_next (&si))
--        {
-+	{
- 	  gphi *phi = si.phi ();
- 	  if (dump_enabled_p ())
- 	    dump_printf_loc (MSG_NOTE, vect_location,
-@@ -8568,6 +8609,27 @@ vect_transform_loop (loop_vec_info loop_
- 	    }
- 	}
- 
-+      for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
-+	   gsi_next (&si))
-+	{
-+	  gphi *phi = si.phi ();
-+	  stmt_info = loop_vinfo->lookup_stmt (phi);
-+	  if (!stmt_info)
-+	    continue;
-+
-+	  if (!STMT_VINFO_RELEVANT_P (stmt_info)
-+	      && !STMT_VINFO_LIVE_P (stmt_info))
-+	    continue;
-+
-+	  if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def
-+	       || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
-+	       || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def
-+	       || STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
-+	       || STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def)
-+	      && ! PURE_SLP_STMT (stmt_info))
-+	    maybe_set_vectorized_backedge_value (loop_vinfo, stmt_info);
-+	}
-+
-       for (gimple_stmt_iterator si = gsi_start_bb (bb);
- 	   !gsi_end_p (si);)
- 	{
-@@ -8604,9 +8666,16 @@ vect_transform_loop (loop_vec_info loop_
- 			= STMT_VINFO_RELATED_STMT (stmt_info);
- 		      vect_transform_loop_stmt (loop_vinfo, pat_stmt_info, &si,
- 						&seen_store);
-+		      maybe_set_vectorized_backedge_value (loop_vinfo,
-+				      			   pat_stmt_info);
-+		    }
-+		  else
-+		    {
-+		      vect_transform_loop_stmt (loop_vinfo, stmt_info, &si,
-+				      		&seen_store);
-+		      maybe_set_vectorized_backedge_value (loop_vinfo,
-+				      			   stmt_info);
- 		    }
--		  vect_transform_loop_stmt (loop_vinfo, stmt_info, &si,
--					    &seen_store);
- 		}
- 	      gsi_next (&si);
- 	      if (seen_store)
-@@ -8623,43 +8692,6 @@ vect_transform_loop (loop_vec_info loop_
- 	    }
- 	}
- 
--      /* Fill in backedge defs of reductions.  */
--      for (unsigned i = 0; i < loop_vinfo->reduc_latch_defs.length (); ++i)
--	{
--	  stmt_vec_info stmt_info = loop_vinfo->reduc_latch_defs[i];
--	  stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
--	  stmt_vec_info phi_info
--	    = STMT_VINFO_VEC_STMT (STMT_VINFO_REDUC_DEF (orig_stmt_info));
--	  stmt_vec_info vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
--	  gphi *phi
--	    = dyn_cast  (STMT_VINFO_REDUC_DEF (orig_stmt_info)->stmt);
--	  edge e = loop_latch_edge (gimple_bb (phi_info->stmt)->loop_father);
--	  do
--	    {
--	      add_phi_arg (as_a  (phi_info->stmt),
--			   gimple_get_lhs (vec_stmt->stmt), e,
--			   gimple_phi_arg_location (phi, e->dest_idx));
--	      phi_info = STMT_VINFO_RELATED_STMT (phi_info);
--	      vec_stmt = STMT_VINFO_RELATED_STMT (vec_stmt);
--	    }
--	  while (phi_info);
--	  gcc_assert (!vec_stmt);
--	}
--      for (unsigned i = 0; i < loop_vinfo->reduc_latch_slp_defs.length (); ++i)
--	{
--	  slp_tree slp_node = loop_vinfo->reduc_latch_slp_defs[i].first;
--	  slp_tree phi_node = loop_vinfo->reduc_latch_slp_defs[i].second;
--	  gphi *phi = as_a  (SLP_TREE_SCALAR_STMTS (phi_node)[0]->stmt);
--	  e = loop_latch_edge (gimple_bb (phi)->loop_father);
--	  gcc_assert (SLP_TREE_VEC_STMTS (phi_node).length ()
--		      == SLP_TREE_VEC_STMTS (slp_node).length ());
--	  for (unsigned j = 0; j < SLP_TREE_VEC_STMTS (phi_node).length (); ++j)
--	    add_phi_arg (as_a  (SLP_TREE_VEC_STMTS (phi_node)[j]->stmt),
--			 gimple_get_lhs
--			     (SLP_TREE_VEC_STMTS (slp_node)[j]->stmt),
--			 e, gimple_phi_arg_location (phi, e->dest_idx));
--	}
--
-       /* Stub out scalar statements that must not survive vectorization.
- 	 Doing this here helps with grouped statements, or statements that
- 	 are involved in patterns.  */
-diff -uprN a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
---- a/gcc/tree-vectorizer.h	2020-10-26 21:45:23.052000000 +0800
-+++ b/gcc/tree-vectorizer.h	2020-10-26 21:46:25.316000000 +0800
-@@ -575,11 +575,6 @@ typedef struct _loop_vec_info : public v
-      stmt in the chain.  */
-   auto_vec reduction_chains;
- 
--  /* The vectorized stmts defining the latch values of the reduction
--     they are involved with.  */
--  auto_vec reduc_latch_defs;
--  auto_vec > reduc_latch_slp_defs;
--
-   /* Cost vector for a single scalar iteration.  */
-   auto_vec scalar_cost_vec;
- 
-diff -uprN a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
---- a/gcc/tree-vect-slp.c	2020-10-26 21:45:23.052000000 +0800
-+++ b/gcc/tree-vect-slp.c	2020-10-26 21:46:25.320000000 +0800
-@@ -2189,6 +2189,7 @@ vect_analyze_slp_instance (vec_info *vin
- 	  SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
- 	  SLP_INSTANCE_LOADS (new_instance) = vNULL;
- 	  SLP_INSTANCE_ROOT_STMT (new_instance) = constructor ? stmt_info : NULL;
-+	  new_instance->reduc_phis = NULL;
- 
- 	  vect_gather_slp_loads (new_instance, node);
- 	  if (dump_enabled_p ())
-@@ -4282,6 +4283,26 @@ vect_schedule_slp (vec_info *vinfo)
-       stmt_vec_info store_info;
-       unsigned int j;
- 
-+      /* For reductions set the latch values of the vectorized PHIs.  */
-+      if (instance->reduc_phis
-+	  && STMT_VINFO_REDUC_TYPE (SLP_TREE_SCALAR_STMTS
-+		  	(instance->reduc_phis)[0]) != FOLD_LEFT_REDUCTION
-+	  && STMT_VINFO_REDUC_TYPE (SLP_TREE_SCALAR_STMTS
-+		  	(instance->reduc_phis)[0]) != EXTRACT_LAST_REDUCTION)
-+	{
-+	  slp_tree slp_node = root;
-+	  slp_tree phi_node = instance->reduc_phis;
-+	  gphi *phi = as_a  (SLP_TREE_SCALAR_STMTS (phi_node)[0]->stmt);
-+	  edge e = loop_latch_edge (gimple_bb (phi)->loop_father);
-+	  gcc_assert (SLP_TREE_VEC_STMTS (phi_node).length ()
-+		      == SLP_TREE_VEC_STMTS (slp_node).length ());
-+	  for (unsigned j = 0; j < SLP_TREE_VEC_STMTS (phi_node).length (); ++j)
-+	    add_phi_arg (as_a  (SLP_TREE_VEC_STMTS (phi_node)[j]->stmt),
-+			 gimple_get_lhs
-+			     (SLP_TREE_VEC_STMTS (slp_node)[j]->stmt),
-+			 e, gimple_phi_arg_location (phi, e->dest_idx));
-+	}
-+
-       /* Remove scalar call stmts.  Do not do this for basic-block
- 	 vectorization as not all uses may be vectorized.
- 	 ???  Why should this be necessary?  DCE should be able to
-diff -uprN a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
---- a/gcc/tree-vect-stmts.c	2020-10-26 21:45:23.012000000 +0800
-+++ b/gcc/tree-vect-stmts.c	2020-10-26 21:46:25.320000000 +0800
-@@ -10229,37 +10229,6 @@ vect_transform_stmt (stmt_vec_info stmt_
-   if (STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
-     return is_store;
- 
--  /* If this stmt defines a value used on a backedge, record it so
--     we can update the vectorized PHIs later.  */
--  stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
--  stmt_vec_info reduc_info;
--  if (STMT_VINFO_REDUC_DEF (orig_stmt_info)
--      && vect_stmt_to_vectorize (orig_stmt_info) == stmt_info
--      && (reduc_info = info_for_reduction (orig_stmt_info))
--      && STMT_VINFO_REDUC_TYPE (reduc_info) != FOLD_LEFT_REDUCTION
--      && STMT_VINFO_REDUC_TYPE (reduc_info) != EXTRACT_LAST_REDUCTION)
--    {
--      gphi *phi;
--      edge e;
--      if (!slp_node
--	  && (phi = dyn_cast 
--		      (STMT_VINFO_REDUC_DEF (orig_stmt_info)->stmt))
--	  && dominated_by_p (CDI_DOMINATORS,
--			     gimple_bb (orig_stmt_info->stmt), gimple_bb (phi))
--	  && (e = loop_latch_edge (gimple_bb (phi)->loop_father))
--	  && (PHI_ARG_DEF_FROM_EDGE (phi, e)
--	      == gimple_get_lhs (orig_stmt_info->stmt)))
--	{
--	  as_a  (vinfo)->reduc_latch_defs.safe_push (stmt_info);
--	}
--      else if (slp_node
--	       && slp_node != slp_node_instance->reduc_phis)
--	{
--	  as_a  (vinfo)->reduc_latch_slp_defs.safe_push
--	    (std::make_pair (slp_node, slp_node_instance->reduc_phis));
--	}
--    }
--
-   /* Handle stmts whose DEF is used outside the loop-nest that is
-      being vectorized.  */
-   done = can_vectorize_live_stmts (stmt_info, gsi, slp_node,
diff --git a/tree-optimization-97812-fix-range-query-in-VRP-asser.patch b/tree-optimization-97812-fix-range-query-in-VRP-asser.patch
deleted file mode 100644
index 09c77ee02225e00129029b1a42424a1b1476ae9c..0000000000000000000000000000000000000000
--- a/tree-optimization-97812-fix-range-query-in-VRP-asser.patch
+++ /dev/null
@@ -1,48 +0,0 @@
-This backport contains 1 patch from gcc main stream tree.
-The commit id of these patchs list as following in the order of time.
-
-dcfd302a79a5e2ea3bb16fc4fc45a5ee31cc0eab
-0001-tree-optimization-97812-fix-range-query-in-VRP-asser.patch
-
-diff --git a/gcc/testsuite/gcc.dg/torture/pr97812.c b/gcc/testsuite/gcc.dg/torture/pr97812.c
-new file mode 100644
-index 00000000000..4d468adf8fa
---- /dev/null
-+++ b/gcc/testsuite/gcc.dg/torture/pr97812.c
-@@ -0,0 +1,15 @@
-+/* { dg-do run } */
-+/* { dg-additional-options "-fdisable-tree-evrp" } */
-+
-+unsigned char c;
-+
-+int main() {
-+volatile short b = 4066;
-+  unsigned short bp = b;
-+  unsigned d = bp & 2305;
-+  signed char e = d;
-+  c = e ? : e;
-+  if (!d)
-+    __builtin_abort ();
-+  return 0;
-+}
-diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
-index 54ce017e8b2..d661866630e 100644
---- a/gcc/tree-vrp.c
-+++ b/gcc/tree-vrp.c
-@@ -1740,8 +1740,14 @@ register_edge_assert_for_2 (tree name, edge e,
- 	      && ((TYPE_PRECISION (TREE_TYPE (name))
- 		   > TYPE_PRECISION (TREE_TYPE (rhs1)))
- 		  || (get_range_info (rhs1, &rmin, &rmax) == VR_RANGE
--		      && wi::fits_to_tree_p (rmin, TREE_TYPE (name))
--		      && wi::fits_to_tree_p (rmax, TREE_TYPE (name)))))
-+		      && wi::fits_to_tree_p
-+			   (widest_int::from (rmin,
-+					      TYPE_SIGN (TREE_TYPE (rhs1))),
-+			    TREE_TYPE (name))
-+		      && wi::fits_to_tree_p
-+			   (widest_int::from (rmax,
-+					      TYPE_SIGN (TREE_TYPE (rhs1))),
-+			    TREE_TYPE (name)))))
- 	    add_assert_info (asserts, rhs1, rhs1,
- 		 	     comp_code, fold_convert (TREE_TYPE (rhs1), val));
- 	}
diff --git a/vectorizable-comparison-Swap-operands-only-once.patch b/vectorizable-comparison-Swap-operands-only-once.patch
deleted file mode 100644
index e42ef9601ee9435dae1f52a3d3154f57eaa73d30..0000000000000000000000000000000000000000
--- a/vectorizable-comparison-Swap-operands-only-once.patch
+++ /dev/null
@@ -1,19 +0,0 @@
-This backport contains 1 patch from gcc main stream tree.
-The commit id of these patchs list as following in the order of time.
-
-a0aeb7fb93da156b64fd08391c79ff35a69af7ba
-0001-tree-vect-stmts.c-vectorizable_comparison-Swap-opera.patch
-
-diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
-index e921225b5ec..601a6f55fbf 100644
---- a/gcc/tree-vect-stmts.c
-+++ b/gcc/tree-vect-stmts.c
-@@ -10369,7 +10369,7 @@ vectorizable_comparison (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- 
-       if (!slp_node)
- 	{
--	  if (swap_p)
-+	  if (swap_p && j == 0)
- 	    std::swap (vec_rhs1, vec_rhs2);
- 	  vec_oprnds0.quick_push (vec_rhs1);
- 	  vec_oprnds1.quick_push (vec_rhs2);
diff --git a/vectorization-enhancement.patch b/vectorization-enhancement.patch
deleted file mode 100644
index 3c7f0afe78fe62434a34fe82ba3364ba7c953a49..0000000000000000000000000000000000000000
--- a/vectorization-enhancement.patch
+++ /dev/null
@@ -1,20239 +0,0 @@
-This backport contains 128 patchs from gcc main stream tree. 
-The commit id of these patchs list as following in the order of time.
-
-0001-Aarch64-SVE-Dot-product-support.patch
-9feeafd7f95ea9f7211908c137c60074b3a52da2
-
-0002-tree-vect-stmts.c-get_group_load_store_type-Avoid-pe.patch
-419c5f99876d9ee517f6b646dd785cdcaf5cb6fe
-
-0003-re-PR-tree-optimization-90358-526.blender_r-train-ru.patch
-898758504fa87d9f5e72c2c8b32139b413276a10
-
-0004-tree-vect-slp.c-vect_build_slp_tree_2-Bump-size-when.patch
-9f708a844853eb2fe87e696d27de14cbd68896f8
-
-0005-cfgloop.h-struct-loop-Add-simdlen-member.patch
-f63445e56c265757ebd50dc12fcd01773341b49f
-
-0006-Current-vectoriser-doesn-t-support-masked-loads-for-.patch
-997636716c5dde7d59d026726a6f58918069f122
-
-0007-tree-vrp.h-value_range_base-nonzero_p-New.patch
-f2b00d2ba461d6dafdeccf6d93828b349b5e7f76
-
-0008-AArch64-PR-tree-optimization-90332-Implement-vec_ini.patch
-41dab855dce20d5d7042c9330dd8124d0ece19c0
-
-0009-Fix-a-thinko-in-tree-ssa-loop.c.patch
-cc261f66c268107b120add99942d729b3a489452
-
-0010-re-PR-tree-optimization-90883-Generated-code-is-wors.patch
-3fe0ddc88334f9afd622458653a6d103948994bd
-
-0011-re-PR-tree-optimization-90883-Generated-code-is-wors.patch
-08c1638dab9becfafc65064891c1c59f5711c27f
-
-0012-Remove-quite-obvious-dead-assignments.patch
-45309d286c80ecad8b7a4efba0e9aba35d847af6
-
-0013-Fix-various-issues-seen-with-clang-static-analyzer.patch
-ef874db611879d5004e1d834543e55d31f2bfe1c
-
-0014-re-PR-tree-optimization-91033-ICE-in-vect_analyze_lo.patch
-a7b3509eb6aa51d696be5edba6f4e451ceff03a0
-
-0015-re-PR-tree-optimization-91069-Miscompare-of-453.povr.patch
-75da268e1a563a1a52389cd2ecee12d07c45a655
-
-0016-tree-vrp.c-extract_range_from_multiplicative_op-Add-.patch
-e2cfa983c31fa7886f496a47feb8714297ca0063
-
-0017-re-PR-tree-optimization-91257-Compile-time-and-memor.patch
-a55d6091230ae8d0d6f6c20dcc55158f6705090e
-
-0018-re-PR-tree-optimization-91257-Compile-time-and-memor.patch
-ce52e0ffb4f1ea7bd4fb99aea5dda75d260e438f
-
-0019-Enforce-canonicalization-in-value_range.patch:
-c7cf3a9bb00b6d64ba0c0e0761f000758e9428a6
-
-0020-tree-vectorizer.h-get_initial_def_for_reduction-Remo.patch
-5fdd6038147e4ba30c8c01332dae8ab0d717bc14
-
-0021-tree-parloops.c-report_ploop_op-Copy-from-report_vec.patch
-31de92e39bbeffb9f1641d292e94b48f70809ae1
-
-0022-tree-vect-loop.c-vect_is_simple_reduction-Remove-ope.patch
-901083b9bdf69a7b1382f9682c6fd1d5759667dd
-
-0023-Enforce-correct-COND_EXPR-order-for-EXTRACT_LAST_RED.patch
-c449d3ae28ff4e133114fb67dbf7dcc7a95ca5d5
-
-0024-tree-vect-loop.c-vect_is_slp_reduction-Remove.patch
-b3c4d0dd309b7027f6e0f0b9a84829fcd53f7d64
-
-0025-re-PR-tree-optimization-91822-FAIL-gcc.dg-pr88031.c-.patch
-6e222b2a3aede20f3093802d1649e75848e3bd2b
-
-0026-re-PR-target-91269-unaligned-floating-point-register.patch
-d63eadac7db10d4846bdffa93fd164cb035fb102
-
-0027-tree-vect-loop.c-get_initial_def_for_reduction-Simpl.patch
-d469a71e5a0eb512b522248841c56496abca8cd6
-
-0028-tree-vectorizer.h-_stmt_vec_info-const_cond_reduc_co.patch
-a7701dd16103048432ec8051e4773760c0e2cf90
-
-0029-re-PR-tree-optimization-91896-ICE-in-vect_get_vec_de.patch
-fadb01364d36a50836201bc9a6a03e525d267967
-
-0030-tree-vect-loop.c-vect_analyze_loop_operations-Also-c.patch
-9593e8e5e391e77bb065d4689b7511bed6a640a3
-
-0031-tree-vect-loop.c-vect_analyze_loop_operations-Analyz.patch
-1b4dbccc1f828fa00e6acc8b88d24301c65552df
-
-0032-Fix-reduc_index-1-handling-for-COND_REDUCTION-PR9190.patch
-18908a56e18f15f84a91a4529923dd0878b2294f
-
-0033-tree-vectorizer.h-_stmt_vec_info-reduc_fn-New.patch
-29f26978866f32bddd656847441a3a953ffd7a21
-
-0034-gimple.c-gimple_get_lhs-For-PHIs-return-the-result.patch
-61362d9d18916bd5b694385982cf4a02b7537b0e
-
-0035-tree-vect-loop.c-vectorizable_reduction-Move-variabl.patch
-c7ea76ea5629e9f0357de49847274cf80e35f2f8
-
-0036-tree-if-conv.c-tree_if_conversion-Move-call-to-ifcvt.patch
-f30b3d2891cef9803badb3f85d739c0fcfafd585
-
-0037-tree-vectorizer.h-stmt_vec_info_type-cycle_phi_info_.patch
-291fa23ac04e317877c1e102937532f080180bb2
-
-0038-re-PR-tree-optimization-91940-__builtin_bswap16-loop.patch
-9ff9a0a5e6edd8729f559bf86ca06f781c4da246
-
-0039-tree-vectorizer.h-vect_transform_reduction-Declare.patch
-9f4d9a366b3299c276043ab987234c7bed7d29f2
-
-0040-re-PR-target-91982-gcc.target-aarch64-sve-clastb_-.c.patch
-48528394eafa9d1db9f956570f910c76d429a3e5
-
-0041-re-PR-tree-optimization-91532-SVE-Redundant-predicat.patch
-b238b34ea47222ffca7addc5fe4e8c052ade88b3
-
-0042-tree-vectorizer.h-_stmt_vec_info-v_reduc_type-Remove.patch
-69f8c1aef5cdcc54d5cb2ca4f99f4f26c2f822a9
-
-0043-tree-vectorizer.h-_stmt_vec_info-reduc_vectype_in-Ne.patch
-f78347996e02a8a767a525bfb764e769afe29d67
-
-0044-tree-vect-loop.c-vect_is_simple_reduction-Simplify-a.patch
-4a8841c0413d52261a8d024577381582d07a866a
-
-0045-re-PR-tree-optimization-92069-ice-in-vect_analyze_sc.patch
-7bd8bec53f0e43c7a7852c54650746e65324514b
-
-0046-Deal-with-incoming-POLY_INT_CST-ranges-PR92033.patch
-96eb7d7a642085f651e9940f0ee75568d7c4441d
-
-0047-tree-vect-loop.c-vect_valid_reduction_input_p-Remove.patch
-aab8c2fd6542a52663243eec160b80bdd61516d5
-
-0048-tree-vect-loop.c-needs_fold_left_reduction_p-Export.patch
-aa9dffac731d0359a0e7a925ff8f4a1bef182eac
-
-0049-vect-Refactor-versioning-threshold.patch
-a421fe9e610b5dbfce1913cd724c8ba193addd47
-
-0050-vect-Outline-code-into-new-function-determine_peel_f.patch
-31b35fd503e1c6713839db24044812d237aba5f1
-
-0051-vect-Be-consistent-in-versioning-threshold-use.patch
-f261d4808cc28a2dfd47fe06c97364c0869bb78f
-
-0052-tree-vect-loop.c-check_reduction_path-Compute-reduct.patch
-58baf7ab85cbb1068a651c96f7d56e2902ead6cc
-
-0053-tree-vectorizer.h-_stmt_vec_info-cond_reduc_code-Rem.patch
-c11cccc0285f02f117a1e80924fb7673b6486ce9
-
-0054-re-PR-target-86753-gcc.target-aarch64-sve-vcond_-45-.patch
-cc1facefe3b4e3b067d95291a7dba834b830ff18
-
-0055-Avoid-recomputing-data-references-in-BB-SLP.patch
-fa0c8df71d4f0476834db0b7cd88524878b46cf7
-
-0056-Move-code-out-of-vect_slp_analyze_bb_1.patch
-1d778697b37aec23db5b6003dfe08d2d78bd9424
-
-0057-Avoid-setting-current_vector_size-in-get_vec_alignme.patch
-da157e2ee9e12348df78246ee33b244b7cc334df
-
-0058-Pass-a-vec_info-to-vect_supportable_shift.patch
-a5c3185a503fbdbc1bf05efe8ab9d12850a211c1
-
-0059-Pass-a-vec_info-to-vect_supportable_direct_optab_p.patch
-dcab2a0d1d4b2c0b4bba6f5e3834ec0678a2a5c8
-
-0060-Pass-a-vec_info-to-get_mask_type_for_scalar_type.patch
-1bd5196c9b1a0cd7280adadd6d788f81a82ca023
-
-0061-Pass-a-vec_info-to-get_vectype_for_scalar_type.patch
-7ed54790da87bbb4a134020a9fb8bd1b72fd0acb
-
-0062-Pass-a-vec_info-to-duplicate_and_interleave.patch
-cdbe6e9bb4ae2882f77f94993783085fa342a9f9
-
-0063-Pass-a-vec_info-to-can_duplicate_and_interleave_p.patch
-43fdde5738ea0554fa000987e9769add027f4876
-
-0064-Pass-a-vec_info-to-simple_integer_narrowing.patch
-6c261c667801eee46a6221d3681d17493c0bbd65
-
-0065-Pass-a-vec_info-to-supportable_narrowing_operation.patch
-db8374a63fd0ea84f72ac76cc899be44df36df6a
-
-0066-Pass-a-loop_vec_info-to-vect_maybe_permute_loop_mask.patch
-b0dab10e71b03441beefbbf951c0812056413cd3
-
-0067-Pass-a-vec_info-to-vect_halve_mask_nunits.patch
-830e90dab3dee5c8129c7760ff09ab112c2cd271
-
-0068-Pass-a-vec_info-to-vect_double_mask_nunits.patch
-8d1473958808fe4714ec24991ac83ee6cbf45397
-
-0069-Replace-current_vector_size-with-vec_info-vector_siz.patch
-ba7f76dd6bbf038948bbe516764a8bb0c851f750
-
-0070-tree-vectorizer.h-_slp_tree-ops-New-member.patch
-30c0d1e3cf8b03992e08cfd00ccf1fcb638d3c03
-
-0071-re-PR-tree-optimization-92162-ICE-in-vect_create_epi.patch
-53b15ca96116544a7a3ca8bc5f4e1649b74f3d45
-
-0072-Fix-use-after-free-in-vector_size-change.patch
-87121696fb2ddbec5f33daa359234850f7fd306d
-
-0073-re-PR-tree-optimization-92173-ICE-in-optab_for_tree_.patch
-9107d6526b938eba8168025c0d90d06ad3634e69
-
-0074-re-PR-tree-optimization-92173-ICE-in-optab_for_tree_.patch
-6c7b0df8029d01e05577668333660d0bc58a3023
-
-0075-AArch64-Don-t-apply-mode_for_int_vector-to-scalars.patch
-d7814449f229cecdee48afe381519a61ea7e3378
-
-0076-re-PR-tree-optimization-65930-Reduction-with-sign-ch.patch
-82e8e335f917b9ce40801838c06f7945cf88da43
-
-0077-re-PR-tree-optimization-92205-ICE-in-vect_get_vec_de.patch
-e227594789d909fbad56f6036910938678738f92
-
-0078-tree-vect-slp.c-vect_get_and_check_slp_defs-For-redu.patch
-4352288a3df915575a2b820f702242908740106f
-
-0079-tree-vect-loop.c-vectorizable_reduction-Verify-STMT_.patch
-ea133b14f48ed5730748a7e02e322fb07ccc2d85
-
-0080-Fix-reductions-for-fully-masked-loops.patch
-89d0345ad7b8d84045813972ee60557a6b511c57
-
-0081-tree-vect-loop.c-vect_create_epilog_for_reduction-Us.patch
-e0c4f7fbd6a4ee8e3a1468514044bd941fa28522
-
-0082-re-PR-tree-optimization-92241-ice-in-vect_mark_patte.patch
-97c6bea819ec0a773041308e62a7c05c33f093b0
-
-0083-re-PR-tree-optimization-65930-Reduction-with-sign-ch.patch
-b7ff7cef5005721e78d6936bed3ae1c059b4e8d2
-
-0084-Fix-reduc_index-calculation-in-vectorizable_conditio.patch
-1d149b7260bcc4c0c6367b3aea47a8b91a1cf345
-
-0085-vect-PR-88915-Vectorize-epilogues-when-versioning-lo.patch
-97c146036750e7cb3966d292572ec158a78f356e
-
-0086-re-PR-tree-optimization-65930-Reduction-with-sign-ch.patch
-b4673569c2a8b974e3f84ffaa547941c5d40cfe5
-
-0087-Come-up-with-an-abstraction.patch
-7f4a8ee03d404c560dcb75ba684fd57ffbc77e85
-
-0088-re-PR-tree-optimization-92275-ICE-error-definition-i.patch
-b81f2dafdbd2c5aa49213b35dc12d4610834e39e
-
-0089-vect-Make-vect-epilogues-nomask-1-default.patch
-1297712fb4af6c6bfd827e0f0a9695b14669f87d
-
-0090-vect-Clean-up-orig_loop_vinfo-from-vect_analyze_loop.patch
-494d6c28c53d0852bb6468b1f1ca189159775fcc
-
-0091-re-PR-tree-optimization-92371-ICE-in-info_for_reduct.patch
-02bf7e6fa219f939b3225c54fbe8bab2133b1aeb
-
-0092-vect-PR92317-fix-skip_epilogue-creation-for-epilogue.patch
-2e7a4f579b1157754ea20a03431b4fa80cd4567a
-
-0093-Restructure-vect_analyze_loop.patch
-72d6aeecd95ec49fff1d258e4631167a03351cbb
-
-0094-Check-the-VF-is-small-enough-for-an-epilogue-loop.patch
-8ec5b16a9a3dbd6d825596c22f1bc32646de28fe
-
-0095-tree-vect-loop.c-vectorizable_reduction-Remember-red.patch
-06af1f1a0def9de076ec629ea634122f15882ce6
-
-0096-Don-t-vectorise-single-iteration-epilogues.patch
-4b205bf82d06c4d9d0ae7b78e54c712d79d5b021
-
-0097-re-PR-tree-optimization-92405-ICE-in-vect_get_vec_de.patch
-084d390246c2172853f9e12ce04aef23cba79590
-
-0098-re-PR-tree-optimization-92324-ICE-in-expand_direct_o.patch
-f1e1ed3314b7c6308f64cbbcf6d1916e239c8e35
-
-0099-vect-Disable-vectorization-of-epilogues-for-loops-wi.patch
-b602712b3ea2a0729a2eda61bd9ee795aba6138f
-
-0100-Use-correct-vector-type-in-neutral_op_for_slp_reduct.patch
-d308ca27c71e43625b378dc6c2774105867d4fa7
-
-0101-vect-Account-for-epilogue-s-peeling-for-gaps-when-ch.patch
-87b47251924c7539a9a8e191587d118a14496473
-
-0102-Add-a-targetm.vectorize.related_mode-hook.patch
-f09552335030433018fd5f7f6b9848339b5ca2da
-
-0103-Replace-mode_for_int_vector-with-related_int_vector_.patch
-d083ee47a9828236016841356fc7207e7c90bbbd
-
-0104-Add-build_truth_vector_type_for_mode.patch
-0a0ef2387cc1561d537d8d949aef9479ef17ba35
-
-0105-Remove-build_-same_sized_-truth_vector_type.patch
-e8738f4e9686203451fd11f05b268b8a31b95ebd
-
-0106-Pass-the-data-vector-mode-to-get_mask_mode.patch
-10116ec1c147a76522cafba6b6a5b4ed1cb37b77
-
-0107-Use-build_vector_type_for_mode-in-get_vectype_for_sc.patch
-95da266b86fcdeff84fcadc5e3cde3d0027e571d
-
-0108-Use-consistent-compatibility-checks-in-vectorizable_.patch
-0203c4f3bfb3e3242635b0cee0b9deedb4070a62
-
-0109-Use-consistent-compatibility-checks-in-vectorizable_.patch
-e021fb865564b62a10adb1e98f75b5ea05058047
-
-0110-Replace-vec_info-vector_size-with-vec_info-vector_mo.patch
-1c84a2d25ecd4c03dde745f36a4762dd45f97c85
-
-0111-Make-less-use-of-get_same_sized_vectype.patch
-2df4150075c03f8a292c40afd3bb25febb673578
-
-0112-Require-equal-type-sizes-for-vectorised-calls.patch
-7f52eb891b738337d5cf82c7c440a5eea8c7b0c9
-
-0113-Support-vectorisation-with-mixed-vector-sizes.patch
-df7c22831f1e48dba49479c5960c1c180d8eab2c
-
-0114-Avoid-retrying-with-the-same-vector-modes.patch
-a55d8232df3dd4f7a3f5b70025074c3919b802a6
-
-0115-AArch64-Support-vectorising-with-multiple-vector-siz.patch
-74166aabeb7f22990476b1169bba031b8323ee92
-
-0116-Allow-mixed-vector-sizes-within-a-single-vectorised-.patch
-05101d1b575a57ca26e4275e971da85a0dd1d52a
-
-0117-Vectorise-conversions-between-differently-sized-inte.patch
-9c437a108a14b9bdc44659c131b0da944e5ffeab
-
-0118-Consider-building-nodes-from-scalars-in-vect_slp_ana.patch
-60838d634634a70d65a126166c944b159ac7649c
-
-0119-Optionally-pick-the-cheapest-loop_vec_info.patch
-bcc7e346bf9b5dc77797ea949d6adc740deb30ca
-
-0120-Move-canonicalisation-of-dr_with_seg_len_pair_ts.patch
-1fb2b0f69ee849142b669ba1b82264ce6d0f75f9
-
-0121-Delay-swapping-data-refs-in-prune_runtime_alias_test.patch
-97602450b04e94aff034381bf6ee4236b95727ed
-
-0122-Add-flags-to-dr_with_seg_len_pair_t.patch
-e9acf80c96d681917d930869b7cbfb7d2fa54d51
-
-0123-Record-whether-a-dr_with_seg_len-contains-mixed-step.patch
-52c29905259363ce2b78dd7aa8a25cf531cddb3a
-
-0124-Dump-the-list-of-merged-alias-pairs.patch
-cad984b289e2b3aca786314c673339eb0500fefa
-
-0125-Print-the-type-of-alias-check-in-a-dump-message.patch
-b4d1b635737a4780e5be247f8be9550eaf83dae5
-
-0126-Use-a-single-comparison-for-index-based-alias-checks.patch
-f9d6338bd15ce1fae36bf25d3a0545e9678ddc58
-
-0127-Optimise-WAR-and-WAW-alias-checks.patch
-8489e1f45b50600c01eb8ed8c5d0ca914ded281c
-
-0128-Avoid-quadratic-behaviour-in-prune_runtime_alias_tes.patch
-ea1ff9e46c7ec5e49ec671616cfcf405ef665054
-
-diff --git a/gcc/asan.c b/gcc/asan.c
-index 3b800b26b69..605d04f87f7 100644
---- a/gcc/asan.c
-+++ b/gcc/asan.c
-@@ -1713,8 +1713,8 @@ asan_emit_allocas_unpoison (rtx top, rtx bot, rtx_insn *before)
-   rtx ret = init_one_libfunc ("__asan_allocas_unpoison");
-   top = convert_memory_address (ptr_mode, top);
-   bot = convert_memory_address (ptr_mode, bot);
--  ret = emit_library_call_value (ret, NULL_RTX, LCT_NORMAL, ptr_mode,
--				 top, ptr_mode, bot, ptr_mode);
-+  emit_library_call (ret, LCT_NORMAL, ptr_mode,
-+		     top, ptr_mode, bot, ptr_mode);
- 
-   do_pending_stack_adjust ();
-   rtx_insn *insns = get_insns ();
-diff --git a/gcc/bt-load.c b/gcc/bt-load.c
-index a7d9d53954e..f68879ca49a 100644
---- a/gcc/bt-load.c
-+++ b/gcc/bt-load.c
-@@ -1169,7 +1169,6 @@ move_btr_def (basic_block new_def_bb, int btr, btr_def *def, bitmap live_range,
- 
-   if (def->other_btr_uses_before_def)
-     {
--      insp = BB_END (b);
-       for (insp = BB_END (b); ! INSN_P (insp); insp = PREV_INSN (insp))
- 	gcc_assert (insp != BB_HEAD (b));
- 
-diff --git a/gcc/builtins.c b/gcc/builtins.c
-index ed11f79ff0b..910e614a4d1 100644
---- a/gcc/builtins.c
-+++ b/gcc/builtins.c
-@@ -1653,11 +1653,8 @@ expand_builtin_apply_args_1 (void)
-   /* Save the structure value address unless this is passed as an
-      "invisible" first argument.  */
-   if (struct_incoming_value)
--    {
--      emit_move_insn (adjust_address (registers, Pmode, size),
--		      copy_to_reg (struct_incoming_value));
--      size += GET_MODE_SIZE (Pmode);
--    }
-+    emit_move_insn (adjust_address (registers, Pmode, size),
-+		    copy_to_reg (struct_incoming_value));
- 
-   /* Return the address of the block.  */
-   return copy_addr_to_reg (XEXP (registers, 0));
-@@ -1806,7 +1803,6 @@ expand_builtin_apply (rtx function, rtx arguments, rtx argsize)
-       emit_move_insn (struct_value, value);
-       if (REG_P (struct_value))
- 	use_reg (&call_fusage, struct_value);
--      size += GET_MODE_SIZE (Pmode);
-     }
- 
-   /* All arguments and registers used for the call are set up by now!  */
-diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c
-index c0582a54c93..cb999cbf82f 100644
---- a/gcc/c/c-typeck.c
-+++ b/gcc/c/c-typeck.c
-@@ -5424,7 +5424,7 @@ build_conditional_expr (location_t colon_loc, tree ifexp, bool ifexp_bcp,
-       tree elem_type = TREE_TYPE (vectype);
-       tree zero = build_int_cst (elem_type, 0);
-       tree zero_vec = build_vector_from_val (vectype, zero);
--      tree cmp_type = build_same_sized_truth_vector_type (vectype);
-+      tree cmp_type = truth_type_for (vectype);
-       ifexp = build2 (NE_EXPR, cmp_type, ifexp, zero_vec);
-     }
- 
-@@ -11327,7 +11327,7 @@ build_vec_cmp (tree_code code, tree type,
- {
-   tree zero_vec = build_zero_cst (type);
-   tree minus_one_vec = build_minus_one_cst (type);
--  tree cmp_type = build_same_sized_truth_vector_type (type);
-+  tree cmp_type = truth_type_for (type);
-   tree cmp = build2 (code, cmp_type, arg0, arg1);
-   return build3 (VEC_COND_EXPR, type, cmp, minus_one_vec, zero_vec);
- }
-diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
-index e252975f546..4ae8e3b3297 100644
---- a/gcc/cfgexpand.c
-+++ b/gcc/cfgexpand.c
-@@ -3029,7 +3029,6 @@ expand_asm_stmt (gasm *stmt)
- 	      }
- 	}
-     }
--  unsigned nclobbers = clobber_rvec.length();
- 
-   /* First pass over inputs and outputs checks validity and sets
-      mark_addressable if needed.  */
-@@ -3301,7 +3300,7 @@ expand_asm_stmt (gasm *stmt)
-   gcc_assert (constraints.length() == noutputs + ninputs);
- 
-   /* But it certainly can adjust the clobbers.  */
--  nclobbers = clobber_rvec.length();
-+  unsigned nclobbers = clobber_rvec.length ();
- 
-   /* Third pass checks for easy conflicts.  */
-   /* ??? Why are we doing this on trees instead of rtx.  */
-@@ -5979,11 +5978,11 @@ construct_init_block (void)
-     {
-       first_block = e->dest;
-       redirect_edge_succ (e, init_block);
--      e = make_single_succ_edge (init_block, first_block, flags);
-+      make_single_succ_edge (init_block, first_block, flags);
-     }
-   else
--    e = make_single_succ_edge (init_block, EXIT_BLOCK_PTR_FOR_FN (cfun),
--			       EDGE_FALLTHRU);
-+    make_single_succ_edge (init_block, EXIT_BLOCK_PTR_FOR_FN (cfun),
-+			   EDGE_FALLTHRU);
- 
-   update_bb_for_insn (init_block);
-   return init_block;
-diff --git a/gcc/cfghooks.c b/gcc/cfghooks.c
-index a1d603a207e..a18b6490bdd 100644
---- a/gcc/cfghooks.c
-+++ b/gcc/cfghooks.c
-@@ -253,8 +253,6 @@ verify_flow_info (void)
- 	err = 1;
-       }
- 
--  last_bb_seen = ENTRY_BLOCK_PTR_FOR_FN (cfun);
--
-   /* Clean up.  */
-   free (last_visited);
-   free (edge_checksum);
-diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h
-index b78d87d22f1..98bf6d2adda 100644
---- a/gcc/cfgloop.h
-+++ b/gcc/cfgloop.h
-@@ -174,6 +174,9 @@ struct GTY ((chain_next ("%h.next"))) loop {
-      of the loop can be safely evaluated concurrently.  */
-   int safelen;
- 
-+  /* Preferred vectorization factor for the loop if non-zero.  */
-+  int simdlen;
-+
-   /* Constraints are generally set by consumers and affect certain
-      semantics of niter analyzer APIs.  Currently the APIs affected are
-      number_of_iterations_exit* functions and their callers.  One typical
-diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c
-index ea4b914c15b..8fc697ecf5d 100644
---- a/gcc/cfgloopmanip.c
-+++ b/gcc/cfgloopmanip.c
-@@ -364,7 +364,6 @@ remove_path (edge e, bool *irred_invalidated,
- 
-   for (i = 0; i < nrem; i++)
-     {
--      bb = rem_bbs[i];
-       FOR_EACH_EDGE (ae, ei, rem_bbs[i]->succs)
- 	if (ae->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
- 	    && !bitmap_bit_p (seen, ae->dest->index))
-@@ -1016,6 +1015,7 @@ copy_loop_info (struct loop *loop, struct loop *target)
-   target->nb_iterations_estimate = loop->nb_iterations_estimate;
-   target->estimate_state = loop->estimate_state;
-   target->safelen = loop->safelen;
-+  target->simdlen = loop->simdlen;
-   target->constraints = loop->constraints;
-   target->can_be_parallel = loop->can_be_parallel;
-   target->warned_aggressive_loop_optimizations
-diff --git a/gcc/cfgrtl.c b/gcc/cfgrtl.c
-index 08e534f2485..b5f15907bde 100644
---- a/gcc/cfgrtl.c
-+++ b/gcc/cfgrtl.c
-@@ -2958,7 +2958,6 @@ rtl_verify_bb_layout (void)
-   basic_block last_bb_seen = ENTRY_BLOCK_PTR_FOR_FN (cfun), curr_bb = NULL;
- 
-   num_bb_notes = 0;
--  last_bb_seen = ENTRY_BLOCK_PTR_FOR_FN (cfun);
- 
-   for (x = rtx_first; x; x = NEXT_INSN (x))
-     {
-diff --git a/gcc/cgraph.c b/gcc/cgraph.c
-index a16f4668b3c..bed6838d22b 100644
---- a/gcc/cgraph.c
-+++ b/gcc/cgraph.c
-@@ -2717,8 +2717,6 @@ bool
- cgraph_node::set_pure_flag (bool pure, bool looping)
- {
-   struct set_pure_flag_info info = {pure, looping, false};
--  if (!pure)
--    looping = false;
-   call_for_symbol_thunks_and_aliases (set_pure_flag_1, &info, !pure, true);
-   return info.changed;
- }
-diff --git a/gcc/combine.c b/gcc/combine.c
-index 567aa2c3715..b9d674c96cc 100644
---- a/gcc/combine.c
-+++ b/gcc/combine.c
-@@ -6591,7 +6591,6 @@ simplify_if_then_else (rtx x)
- 	  || reg_mentioned_p (true_rtx, false_rtx)
- 	  || rtx_equal_p (false_rtx, XEXP (cond, 0))))
-     {
--      true_code = reversed_comparison_code (cond, NULL);
-       SUBST (XEXP (x, 0), reversed_comparison (cond, GET_MODE (cond)));
-       SUBST (XEXP (x, 1), false_rtx);
-       SUBST (XEXP (x, 2), true_rtx);
-diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
-index e3852c5d182..28f93a70801 100644
---- a/gcc/config/aarch64/aarch64-simd.md
-+++ b/gcc/config/aarch64/aarch64-simd.md
-@@ -3183,7 +3183,7 @@
- ;; In this insn, operand 1 should be low, and operand 2 the high part of the
- ;; dest vector.
- 
--(define_insn "*aarch64_combinez"
-+(define_insn "@aarch64_combinez"
-   [(set (match_operand: 0 "register_operand" "=w,w,w")
- 	(vec_concat:
- 	  (match_operand:VDC 1 "general_operand" "w,?r,m")
-@@ -3197,7 +3197,7 @@
-    (set_attr "arch" "simd,fp,simd")]
- )
- 
--(define_insn "*aarch64_combinez_be"
-+(define_insn "@aarch64_combinez_be"
-   [(set (match_operand: 0 "register_operand" "=w,w,w")
-         (vec_concat:
- 	  (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
-@@ -5926,6 +5926,15 @@
-   DONE;
- })
- 
-+(define_expand "vec_init"
-+  [(match_operand:VQ_NO2E 0 "register_operand" "")
-+   (match_operand 1 "" "")]
-+  "TARGET_SIMD"
-+{
-+  aarch64_expand_vector_init (operands[0], operands[1]);
-+  DONE;
-+})
-+
- (define_insn "*aarch64_simd_ld1r"
-   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
- 	(vec_duplicate:VALL_F16
-@@ -6937,3 +6946,21 @@
-   "pmull2\\t%0.1q, %1.2d, %2.2d"
-   [(set_attr "type" "crypto_pmull")]
- )
-+
-+;; Sign- or zero-extend a 64-bit integer vector to a 128-bit vector.
-+(define_insn "2"
-+  [(set (match_operand:VQN 0 "register_operand" "=w")
-+	(ANY_EXTEND:VQN (match_operand: 1 "register_operand" "w")))]
-+  "TARGET_SIMD"
-+  "xtl\t%0., %1."
-+  [(set_attr "type" "neon_shift_imm_long")]
-+)
-+
-+;; Truncate a 128-bit integer vector to a 64-bit vector.
-+(define_insn "trunc2"
-+  [(set (match_operand: 0 "register_operand" "=w")
-+	(truncate: (match_operand:VQN 1 "register_operand" "w")))]
-+  "TARGET_SIMD"
-+  "xtn\t%0., %1."
-+  [(set_attr "type" "neon_shift_imm_narrow_q")]
-+)
-diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
-index 3f39c4c5b63..02d33b7276f 100644
---- a/gcc/config/aarch64/aarch64-sve.md
-+++ b/gcc/config/aarch64/aarch64-sve.md
-@@ -3132,3 +3132,19 @@
-     DONE;
-   }
- )
-+
-+;; Unpredicated DOT product.
-+(define_insn "dot_prod"
-+  [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w")
-+	(plus:SVE_SDI
-+	  (unspec:SVE_SDI
-+	    [(match_operand: 1 "register_operand" "w, w")
-+	     (match_operand: 2 "register_operand" "w, w")]
-+	    DOTPROD)
-+	  (match_operand:SVE_SDI 3 "register_operand" "0, w")))]
-+  "TARGET_SVE"
-+  "@
-+   dot\\t%0., %1., %2.
-+   movprfx\t%0, %3\;dot\\t%0., %1., %2."
-+  [(set_attr "movprfx" "*,yes")]
-+)
-diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
-index 2ff0bc0a686..128c250dffe 100644
---- a/gcc/config/aarch64/aarch64.c
-+++ b/gcc/config/aarch64/aarch64.c
-@@ -1549,17 +1549,37 @@ aarch64_sve_pred_mode (unsigned int elem_nbytes)
- /* Implement TARGET_VECTORIZE_GET_MASK_MODE.  */
- 
- static opt_machine_mode
--aarch64_get_mask_mode (poly_uint64 nunits, poly_uint64 nbytes)
-+aarch64_get_mask_mode (machine_mode mode)
- {
--  if (TARGET_SVE && known_eq (nbytes, BYTES_PER_SVE_VECTOR))
-+  unsigned int vec_flags = aarch64_classify_vector_mode (mode);
-+  if (vec_flags & VEC_SVE_DATA)
-+    return aarch64_sve_pred_mode (GET_MODE_UNIT_SIZE (mode));
-+
-+  return default_get_mask_mode (mode);
-+}
-+
-+/* Implement TARGET_VECTORIZE_RELATED_MODE.  */
-+
-+static opt_machine_mode
-+aarch64_vectorize_related_mode (machine_mode vector_mode,
-+				scalar_mode element_mode,
-+				poly_uint64 nunits)
-+{
-+  unsigned int vec_flags = aarch64_classify_vector_mode (vector_mode);
-+
-+  /* Prefer to use 1 128-bit vector instead of 2 64-bit vectors.  */
-+  if ((vec_flags & VEC_ADVSIMD)
-+      && known_eq (nunits, 0U)
-+      && known_eq (GET_MODE_BITSIZE (vector_mode), 64U)
-+      && maybe_ge (GET_MODE_BITSIZE (element_mode)
-+		   * GET_MODE_NUNITS (vector_mode), 128U))
-     {
--      unsigned int elem_nbytes = vector_element_size (nbytes, nunits);
--      machine_mode pred_mode;
--      if (aarch64_sve_pred_mode (elem_nbytes).exists (&pred_mode))
--	return pred_mode;
-+      machine_mode res = aarch64_simd_container_mode (element_mode, 128);
-+      if (VECTOR_MODE_P (res))
-+	return res;
-     }
- 
--  return default_get_mask_mode (nunits, nbytes);
-+  return default_vectorize_related_mode (vector_mode, element_mode, nunits);
- }
- 
- /* Implement TARGET_PREFERRED_ELSE_VALUE.  For binary operations,
-@@ -10897,7 +10917,9 @@ aarch64_emit_approx_sqrt (rtx dst, rtx src, bool recp)
-     /* Caller assumes we cannot fail.  */
-     gcc_assert (use_rsqrt_p (mode));
- 
--  machine_mode mmsk = mode_for_int_vector (mode).require ();
-+  machine_mode mmsk = (VECTOR_MODE_P (mode)
-+		       ? related_int_vector_mode (mode).require ()
-+		       : int_mode_for_mode (mode).require ());
-   rtx xmsk = gen_reg_rtx (mmsk);
-   if (!recp)
-     /* When calculating the approximate square root, compare the
-@@ -14226,13 +14248,34 @@ aarch64_preferred_simd_mode (scalar_mode mode)
- 
- /* Return a list of possible vector sizes for the vectorizer
-    to iterate over.  */
--static void
--aarch64_autovectorize_vector_sizes (vector_sizes *sizes)
-+static unsigned int
-+aarch64_autovectorize_vector_modes (vector_modes *modes, bool)
- {
-   if (TARGET_SVE)
--    sizes->safe_push (BYTES_PER_SVE_VECTOR);
--  sizes->safe_push (16);
--  sizes->safe_push (8);
-+    modes->safe_push (VNx16QImode);
-+
-+  /* Try using 128-bit vectors for all element types.  */
-+  modes->safe_push (V16QImode);
-+
-+  /* Try using 64-bit vectors for 8-bit elements and 128-bit vectors
-+     for wider elements.  */
-+  modes->safe_push (V8QImode);
-+
-+  /* Try using 64-bit vectors for 16-bit elements and 128-bit vectors
-+     for wider elements.
-+
-+     TODO: We could support a limited form of V4QImode too, so that
-+     we use 32-bit vectors for 8-bit elements.  */
-+  modes->safe_push (V4HImode);
-+
-+  /* Try using 64-bit vectors for 32-bit elements and 128-bit vectors
-+     for 64-bit elements.
-+
-+     TODO: We could similarly support limited forms of V2QImode and V2HImode
-+     for this case.  */
-+  modes->safe_push (V2SImode);
-+
-+  return 0;
- }
- 
- /* Implement TARGET_MANGLE_TYPE.  */
-@@ -15191,6 +15234,45 @@ aarch64_expand_vector_init (rtx target, rtx vals)
-   rtx v0 = XVECEXP (vals, 0, 0);
-   bool all_same = true;
- 
-+  /* This is a special vec_init where N is not an element mode but a
-+     vector mode with half the elements of M.  We expect to find two entries
-+     of mode N in VALS and we must put their concatentation into TARGET.  */
-+  if (XVECLEN (vals, 0) == 2 && VECTOR_MODE_P (GET_MODE (XVECEXP (vals, 0, 0))))
-+    {
-+      gcc_assert (known_eq (GET_MODE_SIZE (mode),
-+		  2 * GET_MODE_SIZE (GET_MODE (XVECEXP (vals, 0, 0)))));
-+      rtx lo = XVECEXP (vals, 0, 0);
-+      rtx hi = XVECEXP (vals, 0, 1);
-+      machine_mode narrow_mode = GET_MODE (lo);
-+      gcc_assert (GET_MODE_INNER (narrow_mode) == inner_mode);
-+      gcc_assert (narrow_mode == GET_MODE (hi));
-+
-+      /* When we want to concatenate a half-width vector with zeroes we can
-+	 use the aarch64_combinez[_be] patterns.  Just make sure that the
-+	 zeroes are in the right half.  */
-+      if (BYTES_BIG_ENDIAN
-+	  && aarch64_simd_imm_zero (lo, narrow_mode)
-+	  && general_operand (hi, narrow_mode))
-+	emit_insn (gen_aarch64_combinez_be (narrow_mode, target, hi, lo));
-+      else if (!BYTES_BIG_ENDIAN
-+	       && aarch64_simd_imm_zero (hi, narrow_mode)
-+	       && general_operand (lo, narrow_mode))
-+	emit_insn (gen_aarch64_combinez (narrow_mode, target, lo, hi));
-+      else
-+	{
-+	  /* Else create the two half-width registers and combine them.  */
-+	  if (!REG_P (lo))
-+	    lo = force_reg (GET_MODE (lo), lo);
-+	  if (!REG_P (hi))
-+	    hi = force_reg (GET_MODE (hi), hi);
-+
-+	  if (BYTES_BIG_ENDIAN)
-+	    std::swap (lo, hi);
-+	  emit_insn (gen_aarch64_simd_combine (narrow_mode, target, lo, hi));
-+	}
-+     return;
-+   }
-+
-   /* Count the number of variable elements to initialise.  */
-   for (int i = 0; i < n_elts; ++i)
-     {
-@@ -16684,7 +16766,7 @@ aarch64_evpc_sve_tbl (struct expand_vec_perm_d *d)
-   if (d->testing_p)
-     return true;
- 
--  machine_mode sel_mode = mode_for_int_vector (d->vmode).require ();
-+  machine_mode sel_mode = related_int_vector_mode (d->vmode).require ();
-   rtx sel = vec_perm_indices_to_rtx (sel_mode, d->perm);
-   if (d->one_vector_p)
-     emit_unspec2 (d->target, UNSPEC_TBL, d->op0, force_reg (sel_mode, sel));
-@@ -17064,9 +17146,7 @@ void
- aarch64_expand_sve_vcond (machine_mode data_mode, machine_mode cmp_mode,
- 			  rtx *ops)
- {
--  machine_mode pred_mode
--    = aarch64_get_mask_mode (GET_MODE_NUNITS (cmp_mode),
--			     GET_MODE_SIZE (cmp_mode)).require ();
-+  machine_mode pred_mode = aarch64_get_mask_mode (cmp_mode).require ();
-   rtx pred = gen_reg_rtx (pred_mode);
-   if (FLOAT_MODE_P (cmp_mode))
-     {
-@@ -19363,9 +19443,9 @@ aarch64_libgcc_floating_mode_supported_p
- #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
-   aarch64_builtin_vectorized_function
- 
--#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
--#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
--  aarch64_autovectorize_vector_sizes
-+#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
-+#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
-+  aarch64_autovectorize_vector_modes
- 
- #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
- #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
-@@ -19398,6 +19478,8 @@ aarch64_libgcc_floating_mode_supported_p
- #define TARGET_VECTORIZE_VEC_PERM_CONST \
-   aarch64_vectorize_vec_perm_const
- 
-+#undef TARGET_VECTORIZE_RELATED_MODE
-+#define TARGET_VECTORIZE_RELATED_MODE aarch64_vectorize_related_mode
- #undef TARGET_VECTORIZE_GET_MASK_MODE
- #define TARGET_VECTORIZE_GET_MASK_MODE aarch64_get_mask_mode
- #undef TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE
-diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
-index 6caeeac8086..c7ccd5bf6fe 100644
---- a/gcc/config/aarch64/iterators.md
-+++ b/gcc/config/aarch64/iterators.md
-@@ -663,6 +663,9 @@
- 			  (QI "b")   (HI "h")
- 			  (SI "s")   (DI "d")])
- 
-+;; Like Vetype, but map to types that are a quarter of the element size.
-+(define_mode_attr Vetype_fourth [(VNx4SI "b") (VNx2DI "h")])
-+
- ;; Equivalent of "size" for a vector element.
- (define_mode_attr Vesize [(VNx16QI "b")
- 			  (VNx8HI  "h") (VNx8HF  "h")
-@@ -765,6 +768,7 @@
- ;; Half modes of all vector modes, in lower-case.
- (define_mode_attr Vhalf [(V8QI "v4qi")  (V16QI "v8qi")
- 			 (V4HI "v2hi")  (V8HI  "v4hi")
-+			 (V8HF  "v4hf")
- 			 (V2SI "si")    (V4SI  "v2si")
- 			 (V2DI "di")    (V2SF  "sf")
- 			 (V4SF "v2sf")  (V2DF  "df")])
-@@ -800,6 +804,8 @@
- 			    (V2DI "V2SI")
- 			    (DI	  "SI")	  (SI	"HI")
- 			    (HI	  "QI")])
-+(define_mode_attr Vnarrowq [(V8HI "v8qi") (V4SI "v4hi")
-+			    (V2DI "v2si")])
- 
- ;; Narrowed quad-modes for VQN (Used for XTN2).
- (define_mode_attr VNARROWQ2 [(V8HI "V16QI") (V4SI "V8HI")
-@@ -1029,8 +1035,10 @@
- 		      (V2SF "p") (V4SF  "v")
- 		      (V4HF "v") (V8HF  "v")])
- 
--(define_mode_attr vsi2qi [(V2SI "v8qi") (V4SI "v16qi")])
--(define_mode_attr VSI2QI [(V2SI "V8QI") (V4SI "V16QI")])
-+(define_mode_attr vsi2qi [(V2SI "v8qi") (V4SI "v16qi")
-+			  (VNx4SI "vnx16qi") (VNx2DI "vnx8hi")])
-+(define_mode_attr VSI2QI [(V2SI "V8QI") (V4SI "V16QI")
-+			  (VNx4SI "VNx16QI") (VNx2DI "VNx8HI")])
- 
- 
- ;; Register suffix for DOTPROD input types from the return type.
-diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
-index f7ff95a0edf..325dd3cea9a 100644
---- a/gcc/config/arc/arc.c
-+++ b/gcc/config/arc/arc.c
-@@ -477,16 +477,17 @@ arc_preferred_simd_mode (scalar_mode mode)
- }
- 
- /* Implements target hook
--   TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES.  */
-+   TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES.  */
- 
--static void
--arc_autovectorize_vector_sizes (vector_sizes *sizes)
-+static unsigned int
-+arc_autovectorize_vector_modes (vector_modes *modes, bool)
- {
-   if (TARGET_PLUS_QMACW)
-     {
--      sizes->quick_push (8);
--      sizes->quick_push (4);
-+      modes->quick_push (V4HImode);
-+      modes->quick_push (V2HImode);
-     }
-+  return 0;
- }
- 
- 
-@@ -596,8 +597,8 @@ static rtx arc_legitimize_address_0 (rtx, rtx, machine_mode mode);
- #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
- #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arc_preferred_simd_mode
- 
--#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
--#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES arc_autovectorize_vector_sizes
-+#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
-+#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES arc_autovectorize_vector_modes
- 
- #undef TARGET_CAN_USE_DOLOOP_P
- #define TARGET_CAN_USE_DOLOOP_P arc_can_use_doloop_p
-diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
-index cdfc0f9e72f..1a4a4b7bc58 100644
---- a/gcc/config/arm/arm.c
-+++ b/gcc/config/arm/arm.c
-@@ -288,7 +288,7 @@ static bool arm_builtin_support_vector_misalignment (machine_mode mode,
- static void arm_conditional_register_usage (void);
- static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
- static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
--static void arm_autovectorize_vector_sizes (vector_sizes *);
-+static unsigned int arm_autovectorize_vector_modes (vector_modes *, bool);
- static int arm_default_branch_cost (bool, bool);
- static int arm_cortex_a5_branch_cost (bool, bool);
- static int arm_cortex_m_branch_cost (bool, bool);
-@@ -519,9 +519,9 @@ static const struct attribute_spec arm_attribute_table[] =
- #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
- #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
- #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
--#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
--#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
--  arm_autovectorize_vector_sizes
-+#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
-+#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
-+  arm_autovectorize_vector_modes
- 
- #undef  TARGET_MACHINE_DEPENDENT_REORG
- #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
-@@ -28446,14 +28446,15 @@ arm_vector_alignment (const_tree type)
-   return align;
- }
- 
--static void
--arm_autovectorize_vector_sizes (vector_sizes *sizes)
-+static unsigned int
-+arm_autovectorize_vector_modes (vector_modes *modes, bool)
- {
-   if (!TARGET_NEON_VECTORIZE_DOUBLE)
-     {
--      sizes->safe_push (16);
--      sizes->safe_push (8);
-+      modes->safe_push (V16QImode);
-+      modes->safe_push (V8QImode);
-     }
-+  return 0;
- }
- 
- static bool
-diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c
-index 99fa45edcd4..eb06ff9e05b 100644
---- a/gcc/config/gcn/gcn.c
-+++ b/gcc/config/gcn/gcn.c
-@@ -3800,8 +3800,7 @@ gcn_expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode,
-    a vector.  */
- 
- opt_machine_mode
--gcn_vectorize_get_mask_mode (poly_uint64 ARG_UNUSED (nunits),
--			     poly_uint64 ARG_UNUSED (length))
-+gcn_vectorize_get_mask_mode (machine_mode)
- {
-   /* GCN uses a DImode bit-mask.  */
-   return DImode;
-diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
-index 1bca5a7eea6..5a0f8a0eb72 100644
---- a/gcc/config/i386/i386.c
-+++ b/gcc/config/i386/i386.c
-@@ -9647,7 +9647,6 @@ ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
-   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
-   CUMULATIVE_ARGS next_cum;
-   tree fntype;
--  int max;
- 
-   gcc_assert (!no_rtl);
- 
-@@ -9663,10 +9662,6 @@ ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
-   if (stdarg_p (fntype))
-     ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
- 			       true);
--
--  max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
--  if (max > X86_64_REGPARM_MAX)
--    max = X86_64_REGPARM_MAX;
- }
- 
- 
-@@ -11806,7 +11801,6 @@ choose_basereg (HOST_WIDE_INT cfa_offset, rtx &base_reg,
- 	    {
- 	      base_reg = hard_frame_pointer_rtx;
- 	      base_offset = toffset;
--	      len = tlen;
- 	    }
- 	}
-     }
-@@ -39699,12 +39693,10 @@ ix86_preferred_reload_class (rtx x, reg_class_t regclass)
- static reg_class_t
- ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
- {
--  machine_mode mode = GET_MODE (x);
--
-   /* Restrict the output reload class to the register bank that we are doing
-      math on.  If we would like not to return a subset of CLASS, reject this
-      alternative: if reload cannot do this, it will still use its choice.  */
--  mode = GET_MODE (x);
-+  machine_mode mode = GET_MODE (x);
-   if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
-     return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
- 
-@@ -45666,14 +45658,13 @@ ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
- 			       0, OPTAB_DIRECT);
- 
-   /* Compensate.  */
--  tmp = gen_reg_rtx (mode);
-   /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
-   tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
--  emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
-+  emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, tmp, one)));
-   xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
-   /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
-   tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
--  emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
-+  emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, tmp, one)));
-   xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
- 
-   /* res = copysign (xa2, operand1) */
-@@ -50238,27 +50229,42 @@ ix86_split_reduction (machine_mode mode)
-    vectors.  If AVX512F is enabled then try vectorizing with 512bit,
-    256bit and 128bit vectors.  */
- 
--static void
--ix86_autovectorize_vector_sizes (vector_sizes *sizes)
-+static unsigned int
-+ix86_autovectorize_vector_modes (vector_modes *modes, bool all)
- {
-   if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
-     {
--      sizes->safe_push (64);
--      sizes->safe_push (32);
--      sizes->safe_push (16);
-+      modes->safe_push (V64QImode);
-+      modes->safe_push (V32QImode);
-+      modes->safe_push (V16QImode);
-+    }
-+  else if (TARGET_AVX512F && all)
-+    {
-+      modes->safe_push (V32QImode);
-+      modes->safe_push (V16QImode);
-+      modes->safe_push (V64QImode);
-     }
-   else if (TARGET_AVX && !TARGET_PREFER_AVX128)
-     {
--      sizes->safe_push (32);
--      sizes->safe_push (16);
-+      modes->safe_push (V32QImode);
-+      modes->safe_push (V16QImode);
-+    }
-+  else if (TARGET_AVX && all)
-+    {
-+      modes->safe_push (V16QImode);
-+      modes->safe_push (V32QImode);
-     }
-+
-+  return 0;
- }
- 
- /* Implemenation of targetm.vectorize.get_mask_mode.  */
- 
- static opt_machine_mode
--ix86_get_mask_mode (poly_uint64 nunits, poly_uint64 vector_size)
-+ix86_get_mask_mode (machine_mode data_mode)
- {
-+  unsigned vector_size = GET_MODE_SIZE (data_mode);
-+  unsigned nunits = GET_MODE_NUNITS (data_mode);
-   unsigned elem_size = vector_size / nunits;
- 
-   /* Scalar mask case.  */
-@@ -51849,9 +51855,9 @@ ix86_run_selftests (void)
- #undef TARGET_VECTORIZE_SPLIT_REDUCTION
- #define TARGET_VECTORIZE_SPLIT_REDUCTION \
-   ix86_split_reduction
--#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
--#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
--  ix86_autovectorize_vector_sizes
-+#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
-+#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
-+  ix86_autovectorize_vector_modes
- #undef TARGET_VECTORIZE_GET_MASK_MODE
- #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
- #undef TARGET_VECTORIZE_INIT_COST
-diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
-index 18cc39ae521..8c961f12a42 100644
---- a/gcc/config/i386/sse.md
-+++ b/gcc/config/i386/sse.md
-@@ -16441,10 +16441,9 @@
- 	(unspec:VF_128_256
- 	  [(match_operand:VF_128_256 1 "register_operand" "0,0,x")
- 	   (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
--	   (subreg:VF_128_256
--	     (lt:
--	       (match_operand: 3 "register_operand" "Yz,Yz,x")
--	       (match_operand: 4 "const0_operand" "C,C,C")) 0)]
-+	   (lt:VF_128_256
-+	     (match_operand: 3 "register_operand" "Yz,Yz,x")
-+	     (match_operand: 4 "const0_operand" "C,C,C"))]
- 	  UNSPEC_BLENDV))]
-   "TARGET_SSE4_1"
-   "#"
-diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c
-index d758fbf1be6..1008947209e 100644
---- a/gcc/config/mips/mips.c
-+++ b/gcc/config/mips/mips.c
-@@ -13457,13 +13457,14 @@ mips_preferred_simd_mode (scalar_mode mode)
-   return word_mode;
- }
- 
--/* Implement TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES.  */
-+/* Implement TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES.  */
- 
--static void
--mips_autovectorize_vector_sizes (vector_sizes *sizes)
-+static unsigned int
-+mips_autovectorize_vector_modes (vector_modes *modes, bool)
- {
-   if (ISA_HAS_MSA)
--    sizes->safe_push (16);
-+    modes->safe_push (V16QImode);
-+  return 0;
- }
- 
- /* Implement TARGET_INIT_LIBFUNCS.  */
-@@ -22676,9 +22677,9 @@ mips_starting_frame_offset (void)
- 
- #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
- #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE mips_preferred_simd_mode
--#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
--#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
--  mips_autovectorize_vector_sizes
-+#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
-+#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
-+  mips_autovectorize_vector_modes
- 
- #undef TARGET_INIT_BUILTINS
- #define TARGET_INIT_BUILTINS mips_init_builtins
-diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
-index 87d60078bb0..8f046de424c 100644
---- a/gcc/config/rs6000/rs6000.c
-+++ b/gcc/config/rs6000/rs6000.c
-@@ -15457,7 +15457,7 @@ static tree
- fold_build_vec_cmp (tree_code code, tree type,
- 		    tree arg0, tree arg1)
- {
--  tree cmp_type = build_same_sized_truth_vector_type (type);
-+  tree cmp_type = truth_type_for (type);
-   tree zero_vec = build_zero_cst (type);
-   tree minus_one_vec = build_minus_one_cst (type);
-   tree cmp = fold_build2 (code, cmp_type, arg0, arg1);
-diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
-index db3f94978ec..c35666dec83 100644
---- a/gcc/config/s390/s390.c
-+++ b/gcc/config/s390/s390.c
-@@ -6588,7 +6588,7 @@ s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
- 	case LE:   cc_producer_mode = CCVFHEmode; code = GE; swap_p = true; break;
- 	default: gcc_unreachable ();
- 	}
--      scratch_mode = mode_for_int_vector (GET_MODE (cmp1)).require ();
-+      scratch_mode = related_int_vector_mode (GET_MODE (cmp1)).require ();
- 
-       if (inv_p)
- 	all_p = !all_p;
-@@ -6694,7 +6694,7 @@ s390_expand_vcond (rtx target, rtx then, rtx els,
- 
-   /* We always use an integral type vector to hold the comparison
-      result.  */
--  result_mode = mode_for_int_vector (cmp_mode).require ();
-+  result_mode = related_int_vector_mode (cmp_mode).require ();
-   result_target = gen_reg_rtx (result_mode);
- 
-   /* We allow vector immediates as comparison operands that
-diff --git a/gcc/cp/call.c b/gcc/cp/call.c
-index f365a5a7f7b..23a54f3c332 100644
---- a/gcc/cp/call.c
-+++ b/gcc/cp/call.c
-@@ -5161,7 +5161,7 @@ build_conditional_expr_1 (const op_location_t &loc,
- 
-       if (!COMPARISON_CLASS_P (arg1))
- 	{
--	  tree cmp_type = build_same_sized_truth_vector_type (arg1_type);
-+	  tree cmp_type = truth_type_for (arg1_type);
- 	  arg1 = build2 (NE_EXPR, cmp_type, arg1, build_zero_cst (arg1_type));
- 	}
-       return build3_loc (loc, VEC_COND_EXPR, arg2_type, arg1, arg2, arg3);
-diff --git a/gcc/cp/class.c b/gcc/cp/class.c
-index 6b57184e081..5b0a60d61cc 100644
---- a/gcc/cp/class.c
-+++ b/gcc/cp/class.c
-@@ -4760,8 +4760,6 @@ adjust_clone_args (tree decl)
-       tree orig_decl_parms = TYPE_ARG_TYPES (TREE_TYPE (decl));
-       tree decl_parms, clone_parms;
- 
--      clone_parms = orig_clone_parms;
--
-       /* Skip the 'this' parameter.  */
-       orig_clone_parms = TREE_CHAIN (orig_clone_parms);
-       orig_decl_parms = TREE_CHAIN (orig_decl_parms);
-@@ -8581,7 +8579,6 @@ dump_class_hierarchy_r (FILE *stream,
-   tree base_binfo;
-   int i;
- 
--  indented = maybe_indent_hierarchy (stream, indent, 0);
-   fprintf (stream, "%s (0x" HOST_WIDE_INT_PRINT_HEX ") ",
- 	   type_as_string (BINFO_TYPE (binfo), TFF_PLAIN_IDENTIFIER),
- 	   (HOST_WIDE_INT) (uintptr_t) binfo);
-@@ -8602,7 +8599,6 @@ dump_class_hierarchy_r (FILE *stream,
-     fprintf (stream, " virtual");
-   fprintf (stream, "\n");
- 
--  indented = 0;
-   if (BINFO_PRIMARY_P (binfo))
-     {
-       indented = maybe_indent_hierarchy (stream, indent + 3, indented);
-diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
-index 39d55589ef3..5c82c2272c2 100644
---- a/gcc/cp/decl.c
-+++ b/gcc/cp/decl.c
-@@ -6387,7 +6387,7 @@ build_aggr_init_full_exprs (tree decl, tree init, int flags)
- static tree
- check_initializer (tree decl, tree init, int flags, vec **cleanups)
- {
--  tree type = TREE_TYPE (decl);
-+  tree type;
-   tree init_code = NULL;
-   tree core_type;
- 
-diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
-index e1c02d7b718..60fe58e0313 100644
---- a/gcc/cp/parser.c
-+++ b/gcc/cp/parser.c
-@@ -10485,7 +10485,7 @@ cp_parser_lambda_expression (cp_parser* parser)
-     if (ok)
-       maybe_add_lambda_conv_op (type);
- 
--    type = finish_struct (type, /*attributes=*/NULL_TREE);
-+    finish_struct (type, /*attributes=*/NULL_TREE);
- 
-     in_discarded_stmt = discarded;
- 
-diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
-index 4787747b6ff..ff7921533cb 100644
---- a/gcc/cp/pt.c
-+++ b/gcc/cp/pt.c
-@@ -7459,8 +7459,7 @@ unify_bound_ttp_args (tree tparms, tree targs, tree parm, tree& arg,
-     {
-       /* In keeping with P0522R0, adjust P's template arguments
- 	 to apply to A's template; then flatten it again.  */
--      tree nparmvec = parmvec;
--      nparmvec = coerce_ttp_args_for_tta (arg, parmvec, tf_none);
-+      tree nparmvec = coerce_ttp_args_for_tta (arg, parmvec, tf_none);
-       nparmvec = expand_template_argument_pack (nparmvec);
- 
-       if (unify (tparms, targs, nparmvec, argvec,
-@@ -7887,7 +7886,6 @@ convert_template_argument (tree parm,
- 	 invalid, but static members are OK.  In any
- 	 case, grab the underlying fields/functions
- 	 and issue an error later if required.  */
--      orig_arg = TREE_VALUE (arg);
-       TREE_TYPE (arg) = unknown_type_node;
-     }
- 
-diff --git a/gcc/cp/rtti.c b/gcc/cp/rtti.c
-index 3ca2b5e7b88..9aea6b939ec 100644
---- a/gcc/cp/rtti.c
-+++ b/gcc/cp/rtti.c
-@@ -209,8 +209,8 @@ build_headof (tree exp)
-   offset = build_vtbl_ref (cp_build_fold_indirect_ref (exp),
-                            index);
- 
--  type = cp_build_qualified_type (ptr_type_node,
--				  cp_type_quals (TREE_TYPE (exp)));
-+  cp_build_qualified_type (ptr_type_node,
-+			   cp_type_quals (TREE_TYPE (exp)));
-   return fold_build_pointer_plus (exp, offset);
- }
- 
-diff --git a/gcc/cp/typeck.c b/gcc/cp/typeck.c
-index 2169f8c4efd..c42fd731cd2 100644
---- a/gcc/cp/typeck.c
-+++ b/gcc/cp/typeck.c
-@@ -4293,7 +4293,7 @@ build_vec_cmp (tree_code code, tree type,
- {
-   tree zero_vec = build_zero_cst (type);
-   tree minus_one_vec = build_minus_one_cst (type);
--  tree cmp_type = build_same_sized_truth_vector_type(type);
-+  tree cmp_type = truth_type_for (type);
-   tree cmp = build2 (code, cmp_type, arg0, arg1);
-   return build3 (VEC_COND_EXPR, type, cmp, minus_one_vec, zero_vec);
- }
-@@ -9189,8 +9189,6 @@ convert_for_initialization (tree exp, tree type, tree rhs, int flags,
-   if (exp == error_mark_node)
-     return error_mark_node;
- 
--  rhstype = non_reference (rhstype);
--
-   type = complete_type (type);
- 
-   if (DIRECT_INIT_EXPR_P (type, rhs))
-diff --git a/gcc/cselib.c b/gcc/cselib.c
-index 84c17c23f6d..108b2588cf9 100644
---- a/gcc/cselib.c
-+++ b/gcc/cselib.c
-@@ -2518,13 +2518,12 @@ cselib_record_sets (rtx_insn *insn)
-   int n_sets = 0;
-   int i;
-   struct cselib_set sets[MAX_SETS];
--  rtx body = PATTERN (insn);
-   rtx cond = 0;
-   int n_sets_before_autoinc;
-   int n_strict_low_parts = 0;
-   struct cselib_record_autoinc_data data;
- 
--  body = PATTERN (insn);
-+  rtx body = PATTERN (insn);
-   if (GET_CODE (body) == COND_EXEC)
-     {
-       cond = COND_EXEC_TEST (body);
-diff --git a/gcc/d/d-codegen.cc b/gcc/d/d-codegen.cc
-index 2abff92fc88..6f5499b08ee 100644
---- a/gcc/d/d-codegen.cc
-+++ b/gcc/d/d-codegen.cc
-@@ -1397,7 +1397,7 @@ build_boolop (tree_code code, tree arg0, tree arg1)
-       /* Build a vector comparison.
- 	 VEC_COND_EXPR ; */
-       tree type = TREE_TYPE (arg0);
--      tree cmptype = build_same_sized_truth_vector_type (type);
-+      tree cmptype = truth_type_for (type);
-       tree cmp = fold_build2_loc (input_location, code, cmptype, arg0, arg1);
- 
-       return fold_build3_loc (input_location, VEC_COND_EXPR, type, cmp,
-diff --git a/gcc/df-scan.c b/gcc/df-scan.c
-index 08d7af33371..84c2e54c855 100644
---- a/gcc/df-scan.c
-+++ b/gcc/df-scan.c
-@@ -229,7 +229,6 @@ void
- df_scan_alloc (bitmap all_blocks ATTRIBUTE_UNUSED)
- {
-   struct df_scan_problem_data *problem_data;
--  unsigned int insn_num = get_max_uid () + 1;
-   basic_block bb;
- 
-   /* Given the number of pools, this is really faster than tearing
-@@ -257,7 +256,6 @@ df_scan_alloc (bitmap all_blocks ATTRIBUTE_UNUSED)
-   bitmap_obstack_initialize (&problem_data->reg_bitmaps);
-   bitmap_obstack_initialize (&problem_data->insn_bitmaps);
- 
--  insn_num += insn_num / 4;
-   df_grow_reg_info ();
- 
-   df_grow_insn_info ();
-diff --git a/gcc/doc/poly-int.texi b/gcc/doc/poly-int.texi
-index 1023e823cb3..d60bb02aabf 100644
---- a/gcc/doc/poly-int.texi
-+++ b/gcc/doc/poly-int.texi
-@@ -803,6 +803,18 @@ the assertion is known to hold.
- @item constant_lower_bound (@var{a})
- Assert that @var{a} is nonnegative and return the smallest value it can have.
- 
-+@item constant_lower_bound_with_limit (@var{a}, @var{b})
-+Return the least value @var{a} can have, given that the context in
-+which @var{a} appears guarantees that the answer is no less than @var{b}.
-+In other words, the caller is asserting that @var{a} is greater than or
-+equal to @var{b} even if @samp{known_ge (@var{a}, @var{b})} doesn't hold.
-+
-+@item constant_upper_bound_with_limit (@var{a}, @var{b})
-+Return the greatest value @var{a} can have, given that the context in
-+which @var{a} appears guarantees that the answer is no greater than @var{b}.
-+In other words, the caller is asserting that @var{a} is less than or equal
-+to @var{b} even if @samp{known_le (@var{a}, @var{b})} doesn't hold.
-+
- @item lower_bound (@var{a}, @var{b})
- Return a value that is always less than or equal to both @var{a} and @var{b}.
- It will be the greatest such value for some indeterminate values
-diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
-index 8c8978bb13a..73db70867b4 100644
---- a/gcc/doc/tm.texi
-+++ b/gcc/doc/tm.texi
-@@ -6016,27 +6016,71 @@ against lower halves of vectors recursively until the specified mode is
- reached.  The default is @var{mode} which means no splitting.
- @end deftypefn
- 
--@deftypefn {Target Hook} void TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES (vector_sizes *@var{sizes})
--If the mode returned by @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE} is not
--the only one that is worth considering, this hook should add all suitable
--vector sizes to @var{sizes}, in order of decreasing preference.  The first
--one should be the size of @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE}.
-+@deftypefn {Target Hook} {unsigned int} TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES (vector_modes *@var{modes}, bool @var{all})
-+If using the mode returned by @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE}
-+is not the only approach worth considering, this hook should add one mode to
-+@var{modes} for each useful alternative approach.  These modes are then
-+passed to @code{TARGET_VECTORIZE_RELATED_MODE} to obtain the vector mode
-+for a given element mode.
-+
-+The modes returned in @var{modes} should use the smallest element mode
-+possible for the vectorization approach that they represent, preferring
-+integer modes over floating-poing modes in the event of a tie.  The first
-+mode should be the @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE} for its
-+element mode.
-+
-+If @var{all} is true, add suitable vector modes even when they are generally
-+not expected to be worthwhile.
-+
-+The hook returns a bitmask of flags that control how the modes in
-+@var{modes} are used.  The flags are:
-+@table @code
-+@item VECT_COMPARE_COSTS
-+Tells the loop vectorizer to try all the provided modes and pick the one
-+with the lowest cost.  By default the vectorizer will choose the first
-+mode that works.
-+@end table
- 
- The hook does not need to do anything if the vector returned by
- @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE} is the only one relevant
--for autovectorization.  The default implementation does nothing.
--@end deftypefn
--
--@deftypefn {Target Hook} opt_machine_mode TARGET_VECTORIZE_GET_MASK_MODE (poly_uint64 @var{nunits}, poly_uint64 @var{length})
--A vector mask is a value that holds one boolean result for every element
--in a vector.  This hook returns the machine mode that should be used to
--represent such a mask when the vector in question is @var{length} bytes
--long and contains @var{nunits} elements.  The hook returns an empty
--@code{opt_machine_mode} if no such mode exists.
--
--The default implementation returns the mode of an integer vector that
--is @var{length} bytes long and that contains @var{nunits} elements,
--if such a mode exists.
-+for autovectorization.  The default implementation adds no modes and
-+returns 0.
-+@end deftypefn
-+
-+@deftypefn {Target Hook} opt_machine_mode TARGET_VECTORIZE_RELATED_MODE (machine_mode @var{vector_mode}, scalar_mode @var{element_mode}, poly_uint64 @var{nunits})
-+If a piece of code is using vector mode @var{vector_mode} and also wants
-+to operate on elements of mode @var{element_mode}, return the vector mode
-+it should use for those elements.  If @var{nunits} is nonzero, ensure that
-+the mode has exactly @var{nunits} elements, otherwise pick whichever vector
-+size pairs the most naturally with @var{vector_mode}.  Return an empty
-+@code{opt_machine_mode} if there is no supported vector mode with the
-+required properties.
-+
-+There is no prescribed way of handling the case in which @var{nunits}
-+is zero.  One common choice is to pick a vector mode with the same size
-+as @var{vector_mode}; this is the natural choice if the target has a
-+fixed vector size.  Another option is to choose a vector mode with the
-+same number of elements as @var{vector_mode}; this is the natural choice
-+if the target has a fixed number of elements.  Alternatively, the hook
-+might choose a middle ground, such as trying to keep the number of
-+elements as similar as possible while applying maximum and minimum
-+vector sizes.
-+
-+The default implementation uses @code{mode_for_vector} to find the
-+requested mode, returning a mode with the same size as @var{vector_mode}
-+when @var{nunits} is zero.  This is the correct behavior for most targets.
-+@end deftypefn
-+
-+@deftypefn {Target Hook} opt_machine_mode TARGET_VECTORIZE_GET_MASK_MODE (machine_mode @var{mode})
-+Return the mode to use for a vector mask that holds one boolean
-+result for each element of vector mode @var{mode}.  The returned mask mode
-+can be a vector of integers (class @code{MODE_VECTOR_INT}), a vector of
-+booleans (class @code{MODE_VECTOR_BOOL}) or a scalar integer (class
-+@code{MODE_INT}).  Return an empty @code{opt_machine_mode} if no such
-+mask mode exists.
-+
-+The default implementation returns a @code{MODE_VECTOR_INT} with the
-+same size and number of elements as @var{mode}, if such a mode exists.
- @end deftypefn
- 
- @deftypefn {Target Hook} bool TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE (unsigned @var{ifn})
-diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
-index fe1194ef91a..bc362dca0f5 100644
---- a/gcc/doc/tm.texi.in
-+++ b/gcc/doc/tm.texi.in
-@@ -4172,7 +4172,9 @@ address;  but often a machine-dependent strategy can generate better code.
- 
- @hook TARGET_VECTORIZE_SPLIT_REDUCTION
- 
--@hook TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
-+@hook TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
-+
-+@hook TARGET_VECTORIZE_RELATED_MODE
- 
- @hook TARGET_VECTORIZE_GET_MASK_MODE
- 
-diff --git a/gcc/dojump.c b/gcc/dojump.c
-index 8626689463e..bac37a357a9 100644
---- a/gcc/dojump.c
-+++ b/gcc/dojump.c
-@@ -668,8 +668,6 @@ do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0,
-       code = LE;
-       if_true_label = if_false_label;
-       if_false_label = drop_through_label;
--      drop_through_if_true = false;
--      drop_through_if_false = true;
-       prob = prob.invert ();
-     }
- 
-diff --git a/gcc/early-remat.c b/gcc/early-remat.c
-index 122891c1edb..0396f16babf 100644
---- a/gcc/early-remat.c
-+++ b/gcc/early-remat.c
-@@ -1123,7 +1123,6 @@ early_remat::record_equiv_candidates (unsigned int cand1_index,
-       ec->representative = cand1_index;
-       cand1->equiv_class = ec;
-     }
--  cand1 = &m_candidates[ec->representative];
-   cand2->equiv_class = ec;
-   bitmap_set_bit (ec->members, cand2_index);
-   if (cand2_index > ec->representative)
-diff --git a/gcc/emit-rtl.c b/gcc/emit-rtl.c
-index 15dffa58a2e..78104603c76 100644
---- a/gcc/emit-rtl.c
-+++ b/gcc/emit-rtl.c
-@@ -3993,7 +3993,7 @@ try_split (rtx pat, rtx_insn *trial, int last)
-   before = PREV_INSN (trial);
-   after = NEXT_INSN (trial);
- 
--  tem = emit_insn_after_setloc (seq, trial, INSN_LOCATION (trial));
-+  emit_insn_after_setloc (seq, trial, INSN_LOCATION (trial));
- 
-   delete_insn (trial);
- 
-diff --git a/gcc/expmed.c b/gcc/expmed.c
-index c5f5499c013..34cdfbf151a 100644
---- a/gcc/expmed.c
-+++ b/gcc/expmed.c
-@@ -1662,12 +1662,10 @@ extract_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
- 	  poly_uint64 nunits;
- 	  if (!multiple_p (GET_MODE_BITSIZE (GET_MODE (op0)),
- 			   GET_MODE_UNIT_BITSIZE (tmode), &nunits)
--	      || !mode_for_vector (inner_mode, nunits).exists (&new_mode)
--	      || !VECTOR_MODE_P (new_mode)
-+	      || !related_vector_mode (tmode, inner_mode,
-+				       nunits).exists (&new_mode)
- 	      || maybe_ne (GET_MODE_SIZE (new_mode),
--			   GET_MODE_SIZE (GET_MODE (op0)))
--	      || GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode)
--	      || !targetm.vector_mode_supported_p (new_mode))
-+			   GET_MODE_SIZE (GET_MODE (op0))))
- 	    new_mode = VOIDmode;
- 	}
-       poly_uint64 pos;
-diff --git a/gcc/expr.c b/gcc/expr.c
-index fa15b7eceae..5e3700fe15f 100644
---- a/gcc/expr.c
-+++ b/gcc/expr.c
-@@ -249,6 +249,31 @@ convert_move (rtx to, rtx from, int unsignedp)
- 
-   if (VECTOR_MODE_P (to_mode) || VECTOR_MODE_P (from_mode))
-     {
-+      if (GET_MODE_UNIT_PRECISION (to_mode)
-+	  > GET_MODE_UNIT_PRECISION (from_mode))
-+	{
-+	  optab op = unsignedp ? zext_optab : sext_optab;
-+	  insn_code icode = convert_optab_handler (op, to_mode, from_mode);
-+	  if (icode != CODE_FOR_nothing)
-+	    {
-+	      emit_unop_insn (icode, to, from,
-+			      unsignedp ? ZERO_EXTEND : SIGN_EXTEND);
-+	      return;
-+	    }
-+	}
-+
-+      if (GET_MODE_UNIT_PRECISION (to_mode)
-+	  < GET_MODE_UNIT_PRECISION (from_mode))
-+	{
-+	  insn_code icode = convert_optab_handler (trunc_optab,
-+						   to_mode, from_mode);
-+	  if (icode != CODE_FOR_nothing)
-+	    {
-+	      emit_unop_insn (icode, to, from, TRUNCATE);
-+	      return;
-+	    }
-+	}
-+
-       gcc_assert (known_eq (GET_MODE_BITSIZE (from_mode),
- 			    GET_MODE_BITSIZE (to_mode)));
- 
-diff --git a/gcc/fold-const.h b/gcc/fold-const.h
-index 049fee91876..e2e66246315 100644
---- a/gcc/fold-const.h
-+++ b/gcc/fold-const.h
-@@ -83,7 +83,7 @@ extern bool fold_deferring_overflow_warnings_p (void);
- extern void fold_overflow_warning (const char*, enum warn_strict_overflow_code);
- extern enum tree_code fold_div_compare (enum tree_code, tree, tree,
- 					tree *, tree *, bool *);
--extern int operand_equal_p (const_tree, const_tree, unsigned int);
-+extern int operand_equal_p (const_tree, const_tree, unsigned int flags = 0);
- extern int multiple_of_p (tree, const_tree, const_tree);
- #define omit_one_operand(T1,T2,T3)\
-    omit_one_operand_loc (UNKNOWN_LOCATION, T1, T2, T3)
-diff --git a/gcc/fwprop.c b/gcc/fwprop.c
-index cf2c9de2d35..f2966fadae8 100644
---- a/gcc/fwprop.c
-+++ b/gcc/fwprop.c
-@@ -448,6 +448,18 @@ enum {
-   PR_OPTIMIZE_FOR_SPEED = 4
- };
- 
-+/* Check that X has a single def.  */
-+
-+static bool
-+reg_single_def_p (rtx x)
-+{
-+  if (!REG_P (x))
-+    return false;
-+
-+  int regno = REGNO (x);
-+  return (DF_REG_DEF_COUNT (regno) == 1
-+	  && !bitmap_bit_p (DF_LR_OUT (ENTRY_BLOCK_PTR_FOR_FN (cfun)), regno));
-+}
- 
- /* Replace all occurrences of OLD in *PX with NEW and try to simplify the
-    resulting expression.  Replace *PX with a new RTL expression if an
-@@ -547,6 +559,54 @@ propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags)
- 	  tem = simplify_gen_subreg (mode, op0, GET_MODE (SUBREG_REG (x)),
- 				     SUBREG_BYTE (x));
- 	}
-+
-+      else
-+	{
-+	  rtvec vec;
-+	  rtvec newvec;
-+	  const char *fmt = GET_RTX_FORMAT (code);
-+	  rtx op;
-+
-+	  for (int i = 0; fmt[i]; i++)
-+	    switch (fmt[i])
-+	      {
-+	      case 'E':
-+		vec = XVEC (x, i);
-+		newvec = vec;
-+		for (int j = 0; j < GET_NUM_ELEM (vec); j++)
-+		  {
-+		    op = RTVEC_ELT (vec, j);
-+		    valid_ops &= propagate_rtx_1 (&op, old_rtx, new_rtx, flags);
-+		    if (op != RTVEC_ELT (vec, j))
-+		      {
-+			if (newvec == vec)
-+			  {
-+			    newvec = shallow_copy_rtvec (vec);
-+			    if (!tem)
-+			      tem = shallow_copy_rtx (x);
-+			    XVEC (tem, i) = newvec;
-+			  }
-+			RTVEC_ELT (newvec, j) = op;
-+		      }
-+		  }
-+	        break;
-+
-+	      case 'e':
-+		if (XEXP (x, i))
-+		  {
-+		    op = XEXP (x, i);
-+		    valid_ops &= propagate_rtx_1 (&op, old_rtx, new_rtx, flags);
-+		    if (op != XEXP (x, i))
-+		      {
-+			if (!tem)
-+			  tem = shallow_copy_rtx (x);
-+			XEXP (tem, i) = op;
-+		      }
-+		  }
-+	        break;
-+	      }
-+	}
-+
-       break;
- 
-     case RTX_OBJ:
-@@ -1370,10 +1430,11 @@ forward_propagate_and_simplify (df_ref use, rtx_insn *def_insn, rtx def_set)
- 
- /* Given a use USE of an insn, if it has a single reaching
-    definition, try to forward propagate it into that insn.
--   Return true if cfg cleanup will be needed.  */
-+   Return true if cfg cleanup will be needed.
-+   REG_PROP_ONLY is true if we should only propagate register copies.  */
- 
- static bool
--forward_propagate_into (df_ref use)
-+forward_propagate_into (df_ref use, bool reg_prop_only = false)
- {
-   df_ref def;
-   rtx_insn *def_insn, *use_insn;
-@@ -1394,10 +1455,6 @@ forward_propagate_into (df_ref use)
-   if (DF_REF_IS_ARTIFICIAL (def))
-     return false;
- 
--  /* Do not propagate loop invariant definitions inside the loop.  */
--  if (DF_REF_BB (def)->loop_father != DF_REF_BB (use)->loop_father)
--    return false;
--
-   /* Check if the use is still present in the insn!  */
-   use_insn = DF_REF_INSN (use);
-   if (DF_REF_FLAGS (use) & DF_REF_IN_NOTE)
-@@ -1415,6 +1472,19 @@ forward_propagate_into (df_ref use)
-   if (!def_set)
-     return false;
- 
-+  if (reg_prop_only
-+      && (!reg_single_def_p (SET_SRC (def_set))
-+	  || !reg_single_def_p (SET_DEST (def_set))))
-+    return false;
-+
-+  /* Allow propagations into a loop only for reg-to-reg copies, since
-+     replacing one register by another shouldn't increase the cost.  */
-+
-+  if (DF_REF_BB (def)->loop_father != DF_REF_BB (use)->loop_father
-+      && (!reg_single_def_p (SET_SRC (def_set))
-+	  || !reg_single_def_p (SET_DEST (def_set))))
-+    return false;
-+
-   /* Only try one kind of propagation.  If two are possible, we'll
-      do it on the following iterations.  */
-   if (forward_propagate_and_simplify (use, def_insn, def_set)
-@@ -1483,7 +1553,7 @@ gate_fwprop (void)
- }
- 
- static unsigned int
--fwprop (void)
-+fwprop (bool fwprop_addr_p)
- {
-   unsigned i;
- 
-@@ -1502,11 +1572,16 @@ fwprop (void)
- 
-       df_ref use = DF_USES_GET (i);
-       if (use)
--	if (DF_REF_TYPE (use) == DF_REF_REG_USE
--	    || DF_REF_BB (use)->loop_father == NULL
--	    /* The outer most loop is not really a loop.  */
--	    || loop_outer (DF_REF_BB (use)->loop_father) == NULL)
--	  forward_propagate_into (use);
-+	{
-+	  if (DF_REF_TYPE (use) == DF_REF_REG_USE
-+	      || DF_REF_BB (use)->loop_father == NULL
-+	      /* The outer most loop is not really a loop.  */
-+	      || loop_outer (DF_REF_BB (use)->loop_father) == NULL)
-+	    forward_propagate_into (use, fwprop_addr_p);
-+
-+	  else if (fwprop_addr_p)
-+	    forward_propagate_into (use, false);
-+	}
-     }
- 
-   fwprop_done ();
-@@ -1537,7 +1612,7 @@ public:
- 
-   /* opt_pass methods: */
-   virtual bool gate (function *) { return gate_fwprop (); }
--  virtual unsigned int execute (function *) { return fwprop (); }
-+  virtual unsigned int execute (function *) { return fwprop (false); }
- 
- }; // class pass_rtl_fwprop
- 
-@@ -1549,33 +1624,6 @@ make_pass_rtl_fwprop (gcc::context *ctxt)
-   return new pass_rtl_fwprop (ctxt);
- }
- 
--static unsigned int
--fwprop_addr (void)
--{
--  unsigned i;
--
--  fwprop_init ();
--
--  /* Go through all the uses.  df_uses_create will create new ones at the
--     end, and we'll go through them as well.  */
--  for (i = 0; i < DF_USES_TABLE_SIZE (); i++)
--    {
--      if (!propagations_left)
--	break;
--
--      df_ref use = DF_USES_GET (i);
--      if (use)
--	if (DF_REF_TYPE (use) != DF_REF_REG_USE
--	    && DF_REF_BB (use)->loop_father != NULL
--	    /* The outer most loop is not really a loop.  */
--	    && loop_outer (DF_REF_BB (use)->loop_father) != NULL)
--	  forward_propagate_into (use);
--    }
--
--  fwprop_done ();
--  return 0;
--}
--
- namespace {
- 
- const pass_data pass_data_rtl_fwprop_addr =
-@@ -1600,7 +1648,7 @@ public:
- 
-   /* opt_pass methods: */
-   virtual bool gate (function *) { return gate_fwprop (); }
--  virtual unsigned int execute (function *) { return fwprop_addr (); }
-+  virtual unsigned int execute (function *) { return fwprop (true); }
- 
- }; // class pass_rtl_fwprop_addr
- 
-diff --git a/gcc/gimple.c b/gcc/gimple.c
-index 8fae60fb848..bf362dbe545 100644
---- a/gcc/gimple.c
-+++ b/gcc/gimple.c
-@@ -1771,6 +1771,8 @@ gimple_get_lhs (const gimple *stmt)
-     return gimple_assign_lhs (stmt);
-   else if (code == GIMPLE_CALL)
-     return gimple_call_lhs (stmt);
-+  else if (code == GIMPLE_PHI)
-+    return gimple_phi_result (stmt);
-   else
-     return NULL_TREE;
- }
-diff --git a/gcc/graphite-scop-detection.c b/gcc/graphite-scop-detection.c
-index 4534d43721f..489d0b93b42 100644
---- a/gcc/graphite-scop-detection.c
-+++ b/gcc/graphite-scop-detection.c
-@@ -1105,14 +1105,12 @@ assign_parameter_index_in_region (tree name, sese_info_p region)
-   gcc_assert (TREE_CODE (name) == SSA_NAME
- 	      && INTEGRAL_TYPE_P (TREE_TYPE (name))
- 	      && ! defined_in_sese_p (name, region->region));
--
-   int i;
-   tree p;
-   FOR_EACH_VEC_ELT (region->params, i, p)
-     if (p == name)
-       return;
- 
--  i = region->params.length ();
-   region->params.safe_push (name);
- }
- 
-diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
-index 95788dfee7d..21ecd566766 100644
---- a/gcc/internal-fn.c
-+++ b/gcc/internal-fn.c
-@@ -100,7 +100,7 @@ init_internal_fns ()
- /* Create static initializers for the information returned by
-    direct_internal_fn.  */
- #define not_direct { -2, -2, false }
--#define mask_load_direct { -1, 2, false }
-+#define mask_load_direct { -1, 2, true }
- #define load_lanes_direct { -1, -1, false }
- #define mask_load_lanes_direct { -1, -1, false }
- #define gather_load_direct { -1, -1, false }
-diff --git a/gcc/ira-color.c b/gcc/ira-color.c
-index aa91b56c81f..8a90ae1b4e6 100644
---- a/gcc/ira-color.c
-+++ b/gcc/ira-color.c
-@@ -1108,7 +1108,6 @@ setup_profitable_hard_regs (void)
- 	  || empty_profitable_hard_regs (a))
- 	continue;
-       data = ALLOCNO_COLOR_DATA (a);
--      mode = ALLOCNO_MODE (a);
-       if ((costs = ALLOCNO_UPDATED_HARD_REG_COSTS (a)) != NULL
- 	  || (costs = ALLOCNO_HARD_REG_COSTS (a)) != NULL)
- 	{
-diff --git a/gcc/ira.c b/gcc/ira.c
-index b330f2a287b..4262e5cf3b7 100644
---- a/gcc/ira.c
-+++ b/gcc/ira.c
-@@ -4414,10 +4414,9 @@ rtx_moveable_p (rtx *loc, enum op_type type)
- {
-   const char *fmt;
-   rtx x = *loc;
--  enum rtx_code code = GET_CODE (x);
-   int i, j;
- 
--  code = GET_CODE (x);
-+  enum rtx_code code = GET_CODE (x);
-   switch (code)
-     {
-     case CONST:
-diff --git a/gcc/lra-eliminations.c b/gcc/lra-eliminations.c
-index ee9fd51f129..7a345a52ae1 100644
---- a/gcc/lra-eliminations.c
-+++ b/gcc/lra-eliminations.c
-@@ -1146,7 +1146,6 @@ eliminate_regs_in_insn (rtx_insn *insn, bool replace_p, bool first_p,
-      single_set without having put new body into the insn and the
-      re-recognition won't hurt in this rare case.  */
-   id = lra_update_insn_recog_data (insn);
--  static_id = id->insn_static_data;
- }
- 
- /* Spill pseudos which are assigned to hard registers in SET.  Add
-diff --git a/gcc/lra.c b/gcc/lra.c
-index 1d2578f8c12..10b85340fc5 100644
---- a/gcc/lra.c
-+++ b/gcc/lra.c
-@@ -1029,12 +1029,8 @@ lra_set_insn_recog_data (rtx_insn *insn)
- 			       data->operand_loc,
- 			       constraints, operand_mode, NULL);
- 	  if (nop > 0)
--	    {
--	      const char *p =  recog_data.constraints[0];
--
--	      for (p =	constraints[0]; *p; p++)
--		nalt += *p == ',';
--	    }
-+	    for (const char *p =constraints[0]; *p; p++)
-+	      nalt += *p == ',';
- 	  data->insn_static_data = insn_static_data
- 	    = get_static_insn_data (-1, nop, 0, nalt);
- 	  for (i = 0; i < nop; i++)
-diff --git a/gcc/machmode.h b/gcc/machmode.h
-index 3a7cee88962..d564f9c6458 100644
---- a/gcc/machmode.h
-+++ b/gcc/machmode.h
-@@ -257,6 +257,9 @@ public:
-   bool exists () const;
-   template bool exists (U *) const;
- 
-+  bool operator== (const T &m) const { return m_mode == m; }
-+  bool operator!= (const T &m) const { return m_mode != m; }
-+
- private:
-   machine_mode m_mode;
- };
-@@ -841,20 +844,9 @@ smallest_int_mode_for_size (poly_uint64 size)
- extern opt_scalar_int_mode int_mode_for_mode (machine_mode);
- extern opt_machine_mode bitwise_mode_for_mode (machine_mode);
- extern opt_machine_mode mode_for_vector (scalar_mode, poly_uint64);
--extern opt_machine_mode mode_for_int_vector (unsigned int, poly_uint64);
--
--/* Return the integer vector equivalent of MODE, if one exists.  In other
--   words, return the mode for an integer vector that has the same number
--   of bits as MODE and the same number of elements as MODE, with the
--   latter being 1 if MODE is scalar.  The returned mode can be either
--   an integer mode or a vector mode.  */
--
--inline opt_machine_mode
--mode_for_int_vector (machine_mode mode)
--{
--  return mode_for_int_vector (GET_MODE_UNIT_BITSIZE (mode),
--			      GET_MODE_NUNITS (mode));
--}
-+extern opt_machine_mode related_vector_mode (machine_mode, scalar_mode,
-+					     poly_uint64 = 0);
-+extern opt_machine_mode related_int_vector_mode (machine_mode);
- 
- /* A class for iterating through possible bitfield modes.  */
- class bit_field_mode_iterator
-diff --git a/gcc/omp-expand.c b/gcc/omp-expand.c
-index 74159734fc8..0d7f104a2f2 100644
---- a/gcc/omp-expand.c
-+++ b/gcc/omp-expand.c
-@@ -4974,6 +4974,13 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
- 	  && loop->safelen > 1)
- 	{
- 	  loop->force_vectorize = true;
-+	  if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
-+	    {
-+	      unsigned HOST_WIDE_INT v
-+		= tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
-+	      if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
-+		loop->simdlen = v;
-+	    }
- 	  cfun->has_force_vectorize_loops = true;
- 	}
-       else if (dont_vectorize)
-diff --git a/gcc/omp-general.c b/gcc/omp-general.c
-index 356772ff458..4fb53af7587 100644
---- a/gcc/omp-general.c
-+++ b/gcc/omp-general.c
-@@ -468,13 +468,16 @@ omp_max_vf (void)
- 	  && global_options_set.x_flag_tree_loop_vectorize))
-     return 1;
- 
--  auto_vector_sizes sizes;
--  targetm.vectorize.autovectorize_vector_sizes (&sizes);
--  if (!sizes.is_empty ())
-+  auto_vector_modes modes;
-+  targetm.vectorize.autovectorize_vector_modes (&modes, true);
-+  if (!modes.is_empty ())
-     {
-       poly_uint64 vf = 0;
--      for (unsigned int i = 0; i < sizes.length (); ++i)
--	vf = ordered_max (vf, sizes[i]);
-+      for (unsigned int i = 0; i < modes.length (); ++i)
-+	/* The returned modes use the smallest element size (and thus
-+	   the largest nunits) for the vectorization approach that they
-+	   represent.  */
-+	vf = ordered_max (vf, GET_MODE_NUNITS (modes[i]));
-       return vf;
-     }
- 
-diff --git a/gcc/omp-low.c b/gcc/omp-low.c
-index 813cefd69b9..7866639f76c 100644
---- a/gcc/omp-low.c
-+++ b/gcc/omp-low.c
-@@ -3650,11 +3650,8 @@ omp_clause_aligned_alignment (tree clause)
-   /* Otherwise return implementation defined alignment.  */
-   unsigned int al = 1;
-   opt_scalar_mode mode_iter;
--  auto_vector_sizes sizes;
--  targetm.vectorize.autovectorize_vector_sizes (&sizes);
--  poly_uint64 vs = 0;
--  for (unsigned int i = 0; i < sizes.length (); ++i)
--    vs = ordered_max (vs, sizes[i]);
-+  auto_vector_modes modes;
-+  targetm.vectorize.autovectorize_vector_modes (&modes, true);
-   static enum mode_class classes[]
-     = { MODE_INT, MODE_VECTOR_INT, MODE_FLOAT, MODE_VECTOR_FLOAT };
-   for (int i = 0; i < 4; i += 2)
-@@ -3665,19 +3662,18 @@ omp_clause_aligned_alignment (tree clause)
- 	machine_mode vmode = targetm.vectorize.preferred_simd_mode (mode);
- 	if (GET_MODE_CLASS (vmode) != classes[i + 1])
- 	  continue;
--	while (maybe_ne (vs, 0U)
--	       && known_lt (GET_MODE_SIZE (vmode), vs)
--	       && GET_MODE_2XWIDER_MODE (vmode).exists ())
--	  vmode = GET_MODE_2XWIDER_MODE (vmode).require ();
-+	machine_mode alt_vmode;
-+	for (unsigned int j = 0; j < modes.length (); ++j)
-+	  if (related_vector_mode (modes[j], mode).exists (&alt_vmode)
-+	      && known_ge (GET_MODE_SIZE (alt_vmode), GET_MODE_SIZE (vmode)))
-+	    vmode = alt_vmode;
- 
- 	tree type = lang_hooks.types.type_for_mode (mode, 1);
- 	if (type == NULL_TREE || TYPE_MODE (type) != mode)
- 	  continue;
--	poly_uint64 nelts = exact_div (GET_MODE_SIZE (vmode),
--				       GET_MODE_SIZE (mode));
--	type = build_vector_type (type, nelts);
--	if (TYPE_MODE (type) != vmode)
--	  continue;
-+	type = build_vector_type_for_mode (type, vmode);
-+	/* The functions above are not allowed to return invalid modes.  */
-+	gcc_assert (TYPE_MODE (type) == vmode);
- 	if (TYPE_ALIGN_UNIT (type) > al)
- 	  al = TYPE_ALIGN_UNIT (type);
-       }
-diff --git a/gcc/optabs-query.c b/gcc/optabs-query.c
-index 71c73fb43cc..61de7dc283b 100644
---- a/gcc/optabs-query.c
-+++ b/gcc/optabs-query.c
-@@ -354,11 +354,8 @@ can_conditionally_move_p (machine_mode mode)
- opt_machine_mode
- qimode_for_vec_perm (machine_mode mode)
- {
--  machine_mode qimode;
--  if (GET_MODE_INNER (mode) != QImode
--      && mode_for_vector (QImode, GET_MODE_SIZE (mode)).exists (&qimode)
--      && VECTOR_MODE_P (qimode))
--    return qimode;
-+  if (GET_MODE_INNER (mode) != QImode)
-+    return related_vector_mode (mode, QImode, GET_MODE_SIZE (mode));
-   return opt_machine_mode ();
- }
- 
-@@ -587,22 +584,21 @@ can_vec_mask_load_store_p (machine_mode mode,
-   if (!VECTOR_MODE_P (vmode))
-     return false;
- 
--  if ((targetm.vectorize.get_mask_mode
--       (GET_MODE_NUNITS (vmode), GET_MODE_SIZE (vmode)).exists (&mask_mode))
-+  if (targetm.vectorize.get_mask_mode (vmode).exists (&mask_mode)
-       && convert_optab_handler (op, vmode, mask_mode) != CODE_FOR_nothing)
-     return true;
- 
--  auto_vector_sizes vector_sizes;
--  targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
--  for (unsigned int i = 0; i < vector_sizes.length (); ++i)
-+  auto_vector_modes vector_modes;
-+  targetm.vectorize.autovectorize_vector_modes (&vector_modes, true);
-+  for (unsigned int i = 0; i < vector_modes.length (); ++i)
-     {
--      poly_uint64 cur = vector_sizes[i];
-+      poly_uint64 cur = GET_MODE_SIZE (vector_modes[i]);
-       poly_uint64 nunits;
-       if (!multiple_p (cur, GET_MODE_SIZE (smode), &nunits))
- 	continue;
-       if (mode_for_vector (smode, nunits).exists (&vmode)
- 	  && VECTOR_MODE_P (vmode)
--	  && targetm.vectorize.get_mask_mode (nunits, cur).exists (&mask_mode)
-+	  && targetm.vectorize.get_mask_mode (vmode).exists (&mask_mode)
- 	  && convert_optab_handler (op, vmode, mask_mode) != CODE_FOR_nothing)
- 	return true;
-     }
-diff --git a/gcc/optabs-tree.c b/gcc/optabs-tree.c
-index 8157798cc71..341e02bd51c 100644
---- a/gcc/optabs-tree.c
-+++ b/gcc/optabs-tree.c
-@@ -300,6 +300,20 @@ supportable_convert_operation (enum tree_code code,
-       return true;
-     }
- 
-+  if (GET_MODE_UNIT_PRECISION (m1) > GET_MODE_UNIT_PRECISION (m2)
-+      && can_extend_p (m1, m2, TYPE_UNSIGNED (vectype_in)))
-+    {
-+      *code1 = code;
-+      return true;
-+    }
-+
-+  if (GET_MODE_UNIT_PRECISION (m1) < GET_MODE_UNIT_PRECISION (m2)
-+      && convert_optab_handler (trunc_optab, m1, m2) != CODE_FOR_nothing)
-+    {
-+      *code1 = code;
-+      return true;
-+    }
-+
-   /* Now check for builtin.  */
-   if (targetm.vectorize.builtin_conversion
-       && targetm.vectorize.builtin_conversion (code, vectype_out, vectype_in))
-diff --git a/gcc/optabs.c b/gcc/optabs.c
-index 7d7efe0a4a2..c2c1274ebdb 100644
---- a/gcc/optabs.c
-+++ b/gcc/optabs.c
-@@ -2095,8 +2095,8 @@ expand_twoval_binop (optab binoptab, rtx op0, rtx op1, rtx targ0, rtx targ1,
-       xop1 = avoid_expensive_constant (mode1, binoptab, 1, xop1, unsignedp);
- 
-       create_fixed_operand (&ops[0], targ0);
--      create_convert_operand_from (&ops[1], op0, mode, unsignedp);
--      create_convert_operand_from (&ops[2], op1, mode, unsignedp);
-+      create_convert_operand_from (&ops[1], xop0, mode, unsignedp);
-+      create_convert_operand_from (&ops[2], xop1, mode, unsignedp);
-       create_fixed_operand (&ops[3], targ1);
-       if (maybe_expand_insn (icode, 4, ops))
- 	return 1;
-@@ -5486,7 +5486,7 @@ expand_vec_perm_1 (enum insn_code icode, rtx target,
-   struct expand_operand ops[4];
- 
-   gcc_assert (GET_MODE_CLASS (smode) == MODE_VECTOR_INT
--	      || mode_for_int_vector (tmode).require () == smode);
-+	      || related_int_vector_mode (tmode).require () == smode);
-   create_output_operand (&ops[0], target, tmode);
-   create_input_operand (&ops[3], sel, smode);
- 
-@@ -5611,8 +5611,7 @@ expand_vec_perm_const (machine_mode mode, rtx v0, rtx v1,
-   /* The optabs are only defined for selectors with the same width
-      as the values being permuted.  */
-   machine_mode required_sel_mode;
--  if (!mode_for_int_vector (mode).exists (&required_sel_mode)
--      || !VECTOR_MODE_P (required_sel_mode))
-+  if (!related_int_vector_mode (mode).exists (&required_sel_mode))
-     {
-       delete_insns_since (last);
-       return NULL_RTX;
-diff --git a/gcc/params.def b/gcc/params.def
-index 3f18642475a..b269045fb9c 100644
---- a/gcc/params.def
-+++ b/gcc/params.def
-@@ -1403,7 +1403,7 @@ DEFPARAM (PARAM_MAX_VRP_SWITCH_ASSERTIONS,
- DEFPARAM (PARAM_VECT_EPILOGUES_NOMASK,
- 	  "vect-epilogues-nomask",
- 	  "Enable loop epilogue vectorization using smaller vector size.",
--	  0, 0, 1)
-+	  1, 0, 1)
- 
- DEFPARAM(PARAM_UNROLL_JAM_MIN_PERCENT,
- 	 "unroll-jam-min-percent",
-diff --git a/gcc/poly-int.h b/gcc/poly-int.h
-index d68a652b5fa..ba39ca471be 100644
---- a/gcc/poly-int.h
-+++ b/gcc/poly-int.h
-@@ -1528,6 +1528,29 @@ constant_lower_bound (const poly_int_pod &a)
-   return a.coeffs[0];
- }
- 
-+/* Return the constant lower bound of A, given that it is no less than B.  */
-+
-+template
-+inline POLY_CONST_COEFF (Ca, Cb)
-+constant_lower_bound_with_limit (const poly_int_pod &a, const Cb &b)
-+{
-+  if (known_ge (a, b))
-+    return a.coeffs[0];
-+  return b;
-+}
-+
-+/* Return the constant upper bound of A, given that it is no greater
-+   than B.  */
-+
-+template
-+inline POLY_CONST_COEFF (Ca, Cb)
-+constant_upper_bound_with_limit (const poly_int_pod &a, const Cb &b)
-+{
-+  if (known_le (a, b))
-+    return a.coeffs[0];
-+  return b;
-+}
-+
- /* Return a value that is known to be no greater than A and B.  This
-    will be the greatest lower bound for some indeterminate values but
-    not necessarily for all.  */
-diff --git a/gcc/read-rtl.c b/gcc/read-rtl.c
-index ebd69bde531..1af51f686c7 100644
---- a/gcc/read-rtl.c
-+++ b/gcc/read-rtl.c
-@@ -1282,7 +1282,7 @@ read_subst_mapping (htab_t subst_iters_table, htab_t subst_attrs_table,
-       m = add_mapping (&substs, subst_iters_table, attr_operands[1]);
-       end_ptr = &m->values;
-       end_ptr = add_map_value (end_ptr, 1, "");
--      end_ptr = add_map_value (end_ptr, 2, "");
-+      add_map_value (end_ptr, 2, "");
- 
-       add_define_attr_for_define_subst (attr_operands[1], queue);
-     }
-@@ -1290,7 +1290,7 @@ read_subst_mapping (htab_t subst_iters_table, htab_t subst_attrs_table,
-   m = add_mapping (&substs, subst_attrs_table, attr_operands[0]);
-   end_ptr = &m->values;
-   end_ptr = add_map_value (end_ptr, 1, attr_operands[2]);
--  end_ptr = add_map_value (end_ptr, 2, attr_operands[3]);
-+  add_map_value (end_ptr, 2, attr_operands[3]);
- }
- 
- /* Check newly-created code iterator ITERATOR to see whether every code has the
-diff --git a/gcc/regrename.c b/gcc/regrename.c
-index 637b3cbe6d7..5259d565e58 100644
---- a/gcc/regrename.c
-+++ b/gcc/regrename.c
-@@ -1426,10 +1426,9 @@ scan_rtx (rtx_insn *insn, rtx *loc, enum reg_class cl, enum scan_actions action,
- {
-   const char *fmt;
-   rtx x = *loc;
--  enum rtx_code code = GET_CODE (x);
-   int i, j;
- 
--  code = GET_CODE (x);
-+  enum rtx_code code = GET_CODE (x);
-   switch (code)
-     {
-     case CONST:
-diff --git a/gcc/reorg.c b/gcc/reorg.c
-index 81349382b81..bdfcf8851cd 100644
---- a/gcc/reorg.c
-+++ b/gcc/reorg.c
-@@ -2708,14 +2708,13 @@ fill_slots_from_thread (rtx_jump_insn *insn, rtx condition,
-       && GET_CODE (PATTERN (new_thread)) != ASM_INPUT
-       && asm_noperands (PATTERN (new_thread)) < 0)
-     {
--      rtx pat = PATTERN (new_thread);
-       rtx dest;
-       rtx src;
- 
-       /* We know "new_thread" is an insn due to NONJUMP_INSN_P (new_thread)
- 	 above.  */
-       trial = as_a  (new_thread);
--      pat = PATTERN (trial);
-+      rtx pat = PATTERN (trial);
- 
-       if (!NONJUMP_INSN_P (trial)
- 	  || GET_CODE (pat) != SET
-diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
-index 50bbb79655b..bdbd1b98eba 100644
---- a/gcc/simplify-rtx.c
-+++ b/gcc/simplify-rtx.c
-@@ -6709,6 +6709,17 @@ simplify_subreg (machine_mode outermode, rtx op,
- 	}
-     }
- 
-+  /* If OP is a vector comparison and the subreg is not changing the
-+     number of elements or the size of the elements, change the result
-+     of the comparison to the new mode.  */
-+  if (COMPARISON_P (op)
-+      && VECTOR_MODE_P (outermode)
-+      && VECTOR_MODE_P (innermode)
-+      && known_eq (GET_MODE_NUNITS (outermode), GET_MODE_NUNITS (innermode))
-+      && known_eq (GET_MODE_UNIT_SIZE (outermode),
-+		    GET_MODE_UNIT_SIZE (innermode)))
-+    return simplify_gen_relational (GET_CODE (op), outermode, innermode,
-+				    XEXP (op, 0), XEXP (op, 1));
-   return NULL_RTX;
- }
- 
-diff --git a/gcc/stor-layout.c b/gcc/stor-layout.c
-index 5d6f2e0166c..a054b7887e7 100644
---- a/gcc/stor-layout.c
-+++ b/gcc/stor-layout.c
-@@ -514,18 +514,43 @@ mode_for_vector (scalar_mode innermode, poly_uint64 nunits)
-   return opt_machine_mode ();
- }
- 
--/* Return the mode for a vector that has NUNITS integer elements of
--   INT_BITS bits each, if such a mode exists.  The mode can be either
--   an integer mode or a vector mode.  */
-+/* If a piece of code is using vector mode VECTOR_MODE and also wants
-+   to operate on elements of mode ELEMENT_MODE, return the vector mode
-+   it should use for those elements.  If NUNITS is nonzero, ensure that
-+   the mode has exactly NUNITS elements, otherwise pick whichever vector
-+   size pairs the most naturally with VECTOR_MODE; this may mean choosing
-+   a mode with a different size and/or number of elements, depending on
-+   what the target prefers.  Return an empty opt_machine_mode if there
-+   is no supported vector mode with the required properties.
-+
-+   Unlike mode_for_vector. any returned mode is guaranteed to satisfy
-+   both VECTOR_MODE_P and targetm.vector_mode_supported_p.  */
- 
- opt_machine_mode
--mode_for_int_vector (unsigned int int_bits, poly_uint64 nunits)
-+related_vector_mode (machine_mode vector_mode, scalar_mode element_mode,
-+		     poly_uint64 nunits)
- {
-+  gcc_assert (VECTOR_MODE_P (vector_mode));
-+  return targetm.vectorize.related_mode (vector_mode, element_mode, nunits);
-+}
-+
-+/* If a piece of code is using vector mode VECTOR_MODE and also wants
-+   to operate on integer vectors with the same element size and number
-+   of elements, return the vector mode it should use.  Return an empty
-+   opt_machine_mode if there is no supported vector mode with the
-+   required properties.
-+
-+   Unlike mode_for_vector. any returned mode is guaranteed to satisfy
-+   both VECTOR_MODE_P and targetm.vector_mode_supported_p.  */
-+
-+opt_machine_mode
-+related_int_vector_mode (machine_mode vector_mode)
-+{
-+  gcc_assert (VECTOR_MODE_P (vector_mode));
-   scalar_int_mode int_mode;
--  machine_mode vec_mode;
--  if (int_mode_for_size (int_bits, 0).exists (&int_mode)
--      && mode_for_vector (int_mode, nunits).exists (&vec_mode))
--    return vec_mode;
-+  if (int_mode_for_mode (GET_MODE_INNER (vector_mode)).exists (&int_mode))
-+    return related_vector_mode (vector_mode, int_mode,
-+				GET_MODE_NUNITS (vector_mode));
-   return opt_machine_mode ();
- }
- 
-diff --git a/gcc/target.def b/gcc/target.def
-index 66cee075018..f998470fffd 100644
---- a/gcc/target.def
-+++ b/gcc/target.def
-@@ -1894,33 +1894,80 @@ reached.  The default is @var{mode} which means no splitting.",
- /* Returns a mask of vector sizes to iterate over when auto-vectorizing
-    after processing the preferred one derived from preferred_simd_mode.  */
- DEFHOOK
--(autovectorize_vector_sizes,
-- "If the mode returned by @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE} is not\n\
--the only one that is worth considering, this hook should add all suitable\n\
--vector sizes to @var{sizes}, in order of decreasing preference.  The first\n\
--one should be the size of @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE}.\n\
-+(autovectorize_vector_modes,
-+ "If using the mode returned by @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE}\n\
-+is not the only approach worth considering, this hook should add one mode to\n\
-+@var{modes} for each useful alternative approach.  These modes are then\n\
-+passed to @code{TARGET_VECTORIZE_RELATED_MODE} to obtain the vector mode\n\
-+for a given element mode.\n\
-+\n\
-+The modes returned in @var{modes} should use the smallest element mode\n\
-+possible for the vectorization approach that they represent, preferring\n\
-+integer modes over floating-poing modes in the event of a tie.  The first\n\
-+mode should be the @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE} for its\n\
-+element mode.\n\
-+\n\
-+If @var{all} is true, add suitable vector modes even when they are generally\n\
-+not expected to be worthwhile.\n\
-+\n\
-+The hook returns a bitmask of flags that control how the modes in\n\
-+@var{modes} are used.  The flags are:\n\
-+@table @code\n\
-+@item VECT_COMPARE_COSTS\n\
-+Tells the loop vectorizer to try all the provided modes and pick the one\n\
-+with the lowest cost.  By default the vectorizer will choose the first\n\
-+mode that works.\n\
-+@end table\n\
- \n\
- The hook does not need to do anything if the vector returned by\n\
- @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE} is the only one relevant\n\
--for autovectorization.  The default implementation does nothing.",
-- void,
-- (vector_sizes *sizes),
-- default_autovectorize_vector_sizes)
-+for autovectorization.  The default implementation adds no modes and\n\
-+returns 0.",
-+ unsigned int,
-+ (vector_modes *modes, bool all),
-+ default_autovectorize_vector_modes)
-+
-+DEFHOOK
-+(related_mode,
-+ "If a piece of code is using vector mode @var{vector_mode} and also wants\n\
-+to operate on elements of mode @var{element_mode}, return the vector mode\n\
-+it should use for those elements.  If @var{nunits} is nonzero, ensure that\n\
-+the mode has exactly @var{nunits} elements, otherwise pick whichever vector\n\
-+size pairs the most naturally with @var{vector_mode}.  Return an empty\n\
-+@code{opt_machine_mode} if there is no supported vector mode with the\n\
-+required properties.\n\
-+\n\
-+There is no prescribed way of handling the case in which @var{nunits}\n\
-+is zero.  One common choice is to pick a vector mode with the same size\n\
-+as @var{vector_mode}; this is the natural choice if the target has a\n\
-+fixed vector size.  Another option is to choose a vector mode with the\n\
-+same number of elements as @var{vector_mode}; this is the natural choice\n\
-+if the target has a fixed number of elements.  Alternatively, the hook\n\
-+might choose a middle ground, such as trying to keep the number of\n\
-+elements as similar as possible while applying maximum and minimum\n\
-+vector sizes.\n\
-+\n\
-+The default implementation uses @code{mode_for_vector} to find the\n\
-+requested mode, returning a mode with the same size as @var{vector_mode}\n\
-+when @var{nunits} is zero.  This is the correct behavior for most targets.",
-+ opt_machine_mode,
-+ (machine_mode vector_mode, scalar_mode element_mode, poly_uint64 nunits),
-+ default_vectorize_related_mode)
- 
- /* Function to get a target mode for a vector mask.  */
- DEFHOOK
- (get_mask_mode,
-- "A vector mask is a value that holds one boolean result for every element\n\
--in a vector.  This hook returns the machine mode that should be used to\n\
--represent such a mask when the vector in question is @var{length} bytes\n\
--long and contains @var{nunits} elements.  The hook returns an empty\n\
--@code{opt_machine_mode} if no such mode exists.\n\
--\n\
--The default implementation returns the mode of an integer vector that\n\
--is @var{length} bytes long and that contains @var{nunits} elements,\n\
--if such a mode exists.",
-+ "Return the mode to use for a vector mask that holds one boolean\n\
-+result for each element of vector mode @var{mode}.  The returned mask mode\n\
-+can be a vector of integers (class @code{MODE_VECTOR_INT}), a vector of\n\
-+booleans (class @code{MODE_VECTOR_BOOL}) or a scalar integer (class\n\
-+@code{MODE_INT}).  Return an empty @code{opt_machine_mode} if no such\n\
-+mask mode exists.\n\
-+\n\
-+The default implementation returns a @code{MODE_VECTOR_INT} with the\n\
-+same size and number of elements as @var{mode}, if such a mode exists.",
-  opt_machine_mode,
-- (poly_uint64 nunits, poly_uint64 length),
-+ (machine_mode mode),
-  default_get_mask_mode)
- 
- /* Function to say whether a masked operation is expensive when the
-diff --git a/gcc/target.h b/gcc/target.h
-index 008932b5dbd..057e6ae8768 100644
---- a/gcc/target.h
-+++ b/gcc/target.h
-@@ -199,11 +199,19 @@ enum vect_cost_model_location {
- class vec_perm_indices;
- 
- /* The type to use for lists of vector sizes.  */
--typedef vec vector_sizes;
-+typedef vec vector_modes;
- 
- /* Same, but can be used to construct local lists that are
-    automatically freed.  */
--typedef auto_vec auto_vector_sizes;
-+typedef auto_vec auto_vector_modes;
-+
-+/* Flags returned by TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES:
-+
-+   VECT_COMPARE_COSTS
-+       Tells the loop vectorizer to try all the provided modes and
-+       pick the one with the lowest cost.  By default the vectorizer
-+       will choose the first mode that works.  */
-+const unsigned int VECT_COMPARE_COSTS = 1U << 0;
- 
- /* The target structure.  This holds all the backend hooks.  */
- #define DEFHOOKPOD(NAME, DOC, TYPE, INIT) TYPE NAME;
-diff --git a/gcc/targhooks.c b/gcc/targhooks.c
-index 02b9dc59611..6396f6f4bdf 100644
---- a/gcc/targhooks.c
-+++ b/gcc/targhooks.c
-@@ -1312,32 +1312,39 @@ default_split_reduction (machine_mode mode)
-   return mode;
- }
- 
--/* By default only the size derived from the preferred vector mode
--   is tried.  */
-+/* By default only the preferred vector mode is tried.  */
- 
--void
--default_autovectorize_vector_sizes (vector_sizes *)
-+unsigned int
-+default_autovectorize_vector_modes (vector_modes *, bool)
- {
-+  return 0;
- }
- 
--/* By default a vector of integers is used as a mask.  */
-+/* The default implementation of TARGET_VECTORIZE_RELATED_MODE.  */
- 
- opt_machine_mode
--default_get_mask_mode (poly_uint64 nunits, poly_uint64 vector_size)
--{
--  unsigned int elem_size = vector_element_size (vector_size, nunits);
--  scalar_int_mode elem_mode
--    = smallest_int_mode_for_size (elem_size * BITS_PER_UNIT);
--  machine_mode vector_mode;
-+default_vectorize_related_mode (machine_mode vector_mode,
-+				scalar_mode element_mode,
-+				poly_uint64 nunits)
-+{
-+  machine_mode result_mode;
-+  if ((maybe_ne (nunits, 0U)
-+       || multiple_p (GET_MODE_SIZE (vector_mode),
-+		      GET_MODE_SIZE (element_mode), &nunits))
-+      && mode_for_vector (element_mode, nunits).exists (&result_mode)
-+      && VECTOR_MODE_P (result_mode)
-+      && targetm.vector_mode_supported_p (result_mode))
-+    return result_mode;
- 
--  gcc_assert (known_eq (elem_size * nunits, vector_size));
-+  return opt_machine_mode ();
-+}
- 
--  if (mode_for_vector (elem_mode, nunits).exists (&vector_mode)
--      && VECTOR_MODE_P (vector_mode)
--      && targetm.vector_mode_supported_p (vector_mode))
--    return vector_mode;
-+/* By default a vector of integers is used as a mask.  */
- 
--  return opt_machine_mode ();
-+opt_machine_mode
-+default_get_mask_mode (machine_mode mode)
-+{
-+  return related_int_vector_mode (mode);
- }
- 
- /* By default consider masked stores to be expensive.  */
-diff --git a/gcc/targhooks.h b/gcc/targhooks.h
-index 59436278dcf..2d599190891 100644
---- a/gcc/targhooks.h
-+++ b/gcc/targhooks.h
-@@ -110,8 +110,11 @@ default_builtin_support_vector_misalignment (machine_mode mode,
- 					     int, bool);
- extern machine_mode default_preferred_simd_mode (scalar_mode mode);
- extern machine_mode default_split_reduction (machine_mode);
--extern void default_autovectorize_vector_sizes (vector_sizes *);
--extern opt_machine_mode default_get_mask_mode (poly_uint64, poly_uint64);
-+extern unsigned int default_autovectorize_vector_modes (vector_modes *, bool);
-+extern opt_machine_mode default_vectorize_related_mode (machine_mode,
-+							scalar_mode,
-+							poly_uint64);
-+extern opt_machine_mode default_get_mask_mode (machine_mode);
- extern bool default_empty_mask_is_expensive (unsigned);
- extern void *default_init_cost (struct loop *);
- extern unsigned default_add_stmt_cost (void *, int, enum vect_cost_for_stmt,
-diff --git a/gcc/testsuite/g++.dg/opt/pr92317.C b/gcc/testsuite/g++.dg/opt/pr92317.C
-new file mode 100644
-index 00000000000..2bb9729fc96
---- /dev/null
-+++ b/gcc/testsuite/g++.dg/opt/pr92317.C
-@@ -0,0 +1,51 @@
-+// Copied from pr87967.C
-+// { dg-do compile { target c++11 } }
-+// { dg-options "-O2 -ftree-vectorize -fno-tree-pre --param vect-epilogues-nomask=1" }
-+
-+void h();
-+template  struct k { using d = b; };
-+template  class> using e = k;
-+template  class f>
-+using g = typename e::d;
-+struct l {
-+  template  using ab = typename i::j;
-+};
-+struct n : l {
-+  using j = g;
-+};
-+class o {
-+public:
-+  long r();
-+};
-+char m;
-+char s() {
-+  if (m)
-+    return '0';
-+  return 'A';
-+}
-+class t {
-+public:
-+  typedef char *ad;
-+  ad m_fn2();
-+};
-+void fn3() {
-+  char *a;
-+  t b;
-+  bool p = false;
-+  while (*a) {
-+    h();
-+    o c;
-+    if (*a)
-+      a++;
-+    if (c.r()) {
-+      n::j q;
-+      for (t::ad d = b.m_fn2(), e; d != e; d++) {
-+        char f = *q;
-+        *d = f + s();
-+      }
-+      p = true;
-+    }
-+  }
-+  if (p)
-+    throw;
-+}
-diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr90883.C b/gcc/testsuite/g++.dg/tree-ssa/pr90883.C
-new file mode 100644
-index 00000000000..0e622f263d2
---- /dev/null
-+++ b/gcc/testsuite/g++.dg/tree-ssa/pr90883.C
-@@ -0,0 +1,20 @@
-+// { dg-options "-O2 -Os -fdump-tree-dse-details -std=c++11 --param max-inline-insns-size=1" }
-+
-+
-+    class C
-+    {
-+        char a[7]{};
-+        int b{};
-+    };
-+
-+    C slow()
-+    {
-+        return {};
-+    }
-+
-+
-+// We want to match enough here to capture that we deleted an empty
-+// constructor store
-+// aarch64 and mips will expand to loop to clear because CLEAR_RATIO.
-+// { dg-final { scan-tree-dump "Deleted redundant store: .*\.a = {}" "dse1" { xfail { aarch64-*-* mips*-*-* } } } }
-+
-diff --git a/gcc/testsuite/gcc.dg/pr92162.c b/gcc/testsuite/gcc.dg/pr92162.c
-new file mode 100644
-index 00000000000..ed82595a752
---- /dev/null
-+++ b/gcc/testsuite/gcc.dg/pr92162.c
-@@ -0,0 +1,10 @@
-+/* { dg-do compile } */
-+/* { dg-options "-Ofast" } */
-+
-+short int s8;
-+
-+void __attribute__ ((simd))
-+gn (void)
-+{
-+  s8 = 0;
-+}
-diff --git a/gcc/testsuite/gcc.dg/torture/pr91896.c b/gcc/testsuite/gcc.dg/torture/pr91896.c
-new file mode 100644
-index 00000000000..e728538bb9a
---- /dev/null
-+++ b/gcc/testsuite/gcc.dg/torture/pr91896.c
-@@ -0,0 +1,18 @@
-+/* { dg-do compile } */
-+/* { dg-additional-options "-ftree-vectorize" } */
-+
-+unsigned int
-+zj (unsigned int et)
-+{
-+  signed char jr = 0;
-+
-+  do {
-+    et *= 3;
-+    jr += 2;
-+  } while (jr >= 0);
-+
-+  if (et == (unsigned int) jr)
-+    et = 0;
-+
-+  return et;
-+}
-diff --git a/gcc/testsuite/gcc.dg/torture/pr92069.c b/gcc/testsuite/gcc.dg/torture/pr92069.c
-new file mode 100644
-index 00000000000..806ff5fba14
---- /dev/null
-+++ b/gcc/testsuite/gcc.dg/torture/pr92069.c
-@@ -0,0 +1,19 @@
-+/* { dg-do compile } */
-+/* { dg-additional-options "-ftree-vectorize" } */
-+
-+unsigned int a, c, d;
-+double b;
-+void e()
-+{
-+  for (; d; d++)
-+    {
-+      double f;
-+      a = 2;
-+      for (; a; a++)
-+	{
-+	  c = b;
-+	  b = f;
-+	  f = c;
-+	}
-+    }
-+}
-diff --git a/gcc/testsuite/gcc.dg/torture/pr92173.c b/gcc/testsuite/gcc.dg/torture/pr92173.c
-new file mode 100644
-index 00000000000..fcb3548b716
---- /dev/null
-+++ b/gcc/testsuite/gcc.dg/torture/pr92173.c
-@@ -0,0 +1,11 @@
-+/* { dg-do compile } */
-+/* { dg-additional-options "-ftree-vectorize" } */
-+
-+unsigned int
-+yo (unsigned int o0, signed char s1)
-+{
-+  for (s1 = 0; s1 < 1; s1 -= 2)
-+    o0 += o0;
-+
-+  return o0 + s1;
-+}
-diff --git a/gcc/testsuite/gcc.dg/torture/pr92241.c b/gcc/testsuite/gcc.dg/torture/pr92241.c
-new file mode 100644
-index 00000000000..331d03b3d44
---- /dev/null
-+++ b/gcc/testsuite/gcc.dg/torture/pr92241.c
-@@ -0,0 +1,13 @@
-+/* { dg-do compile } */
-+/* { dg-additional-options "-ftree-vectorize" } */
-+
-+int a, b;
-+char c[2];
-+void d() {
-+  char e;
-+  for (; b; b--) {
-+    e = 0;
-+    for (; e <= 1; e++)
-+      a &= c[b + e] && 1;
-+  }
-+}
-diff --git a/gcc/testsuite/gcc.dg/torture/pr92275.c b/gcc/testsuite/gcc.dg/torture/pr92275.c
-new file mode 100644
-index 00000000000..b9f70889758
---- /dev/null
-+++ b/gcc/testsuite/gcc.dg/torture/pr92275.c
-@@ -0,0 +1,13 @@
-+/* { dg-do compile } */
-+/* { dg-additional-options "-ftree-vectorize" } */
-+
-+unsigned long a, c;
-+int *b, *b2;
-+long d;
-+
-+void fn1()
-+{
-+  for (; b < b2; b++)
-+    d += *b * c;
-+  d *= a;
-+}
-diff --git a/gcc/testsuite/gcc.dg/torture/pr92371.c b/gcc/testsuite/gcc.dg/torture/pr92371.c
-new file mode 100644
-index 00000000000..0c78d32f471
---- /dev/null
-+++ b/gcc/testsuite/gcc.dg/torture/pr92371.c
-@@ -0,0 +1,12 @@
-+/* { dg-do compile } */
-+/* { dg-additional-options "-ftree-vectorize" } */
-+
-+int a, b;
-+void d()
-+{
-+  int c = sizeof(int);
-+  for (; a; a++)
-+    c *= sizeof(int);
-+  c *= sizeof(int);
-+  b = c;
-+}
-diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-36.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-36.c
-new file mode 100644
-index 00000000000..23a53bb4ad2
---- /dev/null
-+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-36.c
-@@ -0,0 +1,65 @@
-+/* { dg-options "-O2 -fdump-tree-dse-details -fno-tree-fre" } */
-+#include 
-+#include 
-+
-+struct X
-+{
-+  char mem0[10];
-+  char mem1[10];
-+};
-+
-+
-+void blah (struct X);
-+
-+
-+void
-+foo1()
-+{
-+  struct X x = { };
-+  memset (x.mem1, 0, sizeof x.mem1);
-+  blah (x);
-+}
-+
-+void
-+foo2()
-+{
-+  struct X x = { };
-+  x.mem1[5] = 0;
-+  blah (x);
-+}
-+
-+void
-+bar1 ()
-+{
-+  struct X x;
-+  memset (&x, 0, sizeof x);
-+  memset (&x.mem1, 0, sizeof x.mem1);
-+  blah (x);
-+}
-+void
-+bar2 ()
-+{
-+  struct X x;
-+  memset (&x, 0, sizeof x);
-+  x.mem1[5] = 0;
-+  blah (x);
-+}
-+
-+void
-+baz1 ()
-+{
-+  struct X *x = calloc (sizeof (struct X), 1);
-+  memset (&x->mem1, 0, sizeof x->mem1);
-+  blah (*x);
-+}
-+
-+void
-+baz2 ()
-+{
-+  struct X *x = calloc (sizeof (struct X), 1);
-+  x->mem1[5] = 0;
-+  blah (*x);
-+}
-+/* { dg-final { scan-tree-dump-times "Deleted redundant call" 3 "dse1" } } */
-+/* { dg-final { scan-tree-dump-times "Deleted redundant store" 3 "dse1" } } */
-+
-diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-div-2.c b/gcc/testsuite/gcc.dg/vect/bb-slp-div-2.c
-new file mode 100644
-index 00000000000..715c22ac6c6
---- /dev/null
-+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-div-2.c
-@@ -0,0 +1,14 @@
-+/* { dg-do compile } */
-+
-+int x[4], y[4], z[4];
-+
-+void
-+f (void)
-+{
-+  x[0] += y[0] / z[0] * 2;
-+  x[1] += y[1] / z[1] * 2;
-+  x[2] += y[2] / z[2] * 2;
-+  x[3] += y[3] / z[3] * 2;
-+}
-+
-+/* { dg-final { scan-tree-dump "basic block vectorized" "slp2" { target vect_int } } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr69907.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr69907.c
-index 85f9a02582f..813b1af089a 100644
---- a/gcc/testsuite/gcc.dg/vect/bb-slp-pr69907.c
-+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr69907.c
-@@ -18,5 +18,6 @@ void foo(unsigned *p1, unsigned short *p2)
- }
- 
- /* Disable for SVE because for long or variable-length vectors we don't
--   get an unrolled epilogue loop.  */
--/* { dg-final { scan-tree-dump "BB vectorization with gaps at the end of a load is not supported" "slp1" { target { ! aarch64_sve } } } } */
-+   get an unrolled epilogue loop.  Also disable for AArch64 Advanced SIMD,
-+   because there we can vectorize the epilogue using mixed vector sizes.  */
-+/* { dg-final { scan-tree-dump "BB vectorization with gaps at the end of a load is not supported" "slp1" { target { ! aarch64*-*-* } } } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/fast-math-vect-call-1.c b/gcc/testsuite/gcc.dg/vect/fast-math-vect-call-1.c
-index 228190ab05d..877de4eb5be 100644
---- a/gcc/testsuite/gcc.dg/vect/fast-math-vect-call-1.c
-+++ b/gcc/testsuite/gcc.dg/vect/fast-math-vect-call-1.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- #include "tree-vect.h"
- 
- extern float copysignf (float, float);
-diff --git a/gcc/testsuite/gcc.dg/vect/no-fast-math-vect16.c b/gcc/testsuite/gcc.dg/vect/no-fast-math-vect16.c
-index 7a148e41d51..5f871289337 100644
---- a/gcc/testsuite/gcc.dg/vect/no-fast-math-vect16.c
-+++ b/gcc/testsuite/gcc.dg/vect/no-fast-math-vect16.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_float_strict } */
- 
- #include 
-diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-noreassoc-slp-reduc-7.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-noreassoc-slp-reduc-7.c
-index 1d674504e2c..022d49f1175 100644
---- a/gcc/testsuite/gcc.dg/vect/no-scevccp-noreassoc-slp-reduc-7.c
-+++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-noreassoc-slp-reduc-7.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- 
- #include 
-diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-12.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-12.c
-index e4202b10d06..b5f8c3c88e4 100644
---- a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-12.c
-+++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-12.c
-@@ -46,4 +46,4 @@ int main (void)
- }
- 
- /* Until we support multiple types in the inner loop  */
--/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { xfail *-*-* } } } */
-+/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { xfail { ! aarch64*-*-* } } } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-vect-iv-3.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-vect-iv-3.c
-index 50b4998bb6c..7049e4936b9 100644
---- a/gcc/testsuite/gcc.dg/vect/no-scevccp-vect-iv-3.c
-+++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-vect-iv-3.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-do compile } */
- /* { dg-require-effective-target vect_int } */
- 
-diff --git a/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-31.c b/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-31.c
-index c3b242157ce..d2ae7976781 100644
---- a/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-31.c
-+++ b/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-31.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- /* { dg-add-options bind_pic_locally } */
- 
-diff --git a/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-64.c b/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-64.c
-index 470bbfb5537..243e01e6dad 100644
---- a/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-64.c
-+++ b/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-64.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- /* { dg-add-options bind_pic_locally } */
- 
-diff --git a/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-66.c b/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-66.c
-index 805024d8058..e339590bacb 100644
---- a/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-66.c
-+++ b/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-66.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- 
- #include 
-diff --git a/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-68.c b/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-68.c
-index 726c0de652f..c403a8302d8 100644
---- a/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-68.c
-+++ b/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-68.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- /* { dg-skip-if "AArch64 tiny code model does not support programs larger than 1MiB" {aarch64_tiny} } */
- /* { dg-add-options bind_pic_locally } */
-diff --git a/gcc/testsuite/gcc.dg/vect/no-vfa-vect-dv-2.c b/gcc/testsuite/gcc.dg/vect/no-vfa-vect-dv-2.c
-index 4513c40b34f..dcb53701795 100644
---- a/gcc/testsuite/gcc.dg/vect/no-vfa-vect-dv-2.c
-+++ b/gcc/testsuite/gcc.dg/vect/no-vfa-vect-dv-2.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- 
- #include 
-diff --git a/gcc/testsuite/gcc.dg/vect/pr33804.c b/gcc/testsuite/gcc.dg/vect/pr33804.c
-index 86babbe60e7..0db13674b42 100644
---- a/gcc/testsuite/gcc.dg/vect/pr33804.c
-+++ b/gcc/testsuite/gcc.dg/vect/pr33804.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-do compile } */
- /* { dg-require-effective-target vect_int } */
- 
-diff --git a/gcc/testsuite/gcc.dg/vect/pr53773.c b/gcc/testsuite/gcc.dg/vect/pr53773.c
-index 0bcc021767e..7f8229571ec 100644
---- a/gcc/testsuite/gcc.dg/vect/pr53773.c
-+++ b/gcc/testsuite/gcc.dg/vect/pr53773.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-do compile } */
- /* { dg-additional-options "-fdump-tree-optimized" } */
- 
-diff --git a/gcc/testsuite/gcc.dg/vect/pr65930-1.c b/gcc/testsuite/gcc.dg/vect/pr65930-1.c
-new file mode 100644
-index 00000000000..895fbf8869d
---- /dev/null
-+++ b/gcc/testsuite/gcc.dg/vect/pr65930-1.c
-@@ -0,0 +1,26 @@
-+/* { dg-require-effective-target vect_int } */
-+
-+#include "tree-vect.h"
-+
-+unsigned __attribute__((noipa))
-+bar (unsigned int *x)
-+{
-+  int sum = 4;
-+  x = __builtin_assume_aligned (x, __BIGGEST_ALIGNMENT__);
-+  for (int i = 0; i < 16; ++i)
-+    sum += x[i];
-+  return sum;
-+}
-+
-+int
-+main()
-+{
-+  static int a[16] __attribute__((aligned(__BIGGEST_ALIGNMENT__)))
-+    = { 1, 3, 5, 8, 9, 10, 17, 18, 23, 29, 30, 55, 42, 2, 3, 1 };
-+  check_vect ();
-+  if (bar (a) != 260)
-+    abort ();
-+  return 0;
-+}
-+
-+/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/pr65930-2.c b/gcc/testsuite/gcc.dg/vect/pr65930-2.c
-new file mode 100644
-index 00000000000..9cfb9b102d9
---- /dev/null
-+++ b/gcc/testsuite/gcc.dg/vect/pr65930-2.c
-@@ -0,0 +1,28 @@
-+/* { dg-require-effective-target vect_int } */
-+
-+#include "tree-vect.h"
-+
-+int __attribute__((noipa))
-+bar (unsigned int *x, int n)
-+{
-+  int sum = 4;
-+  x = __builtin_assume_aligned (x, __BIGGEST_ALIGNMENT__);
-+  for (int i = 0; i < n; ++i)
-+    sum += x[i*4+0]+ x[i*4 + 1] + x[i*4 + 2] + x[i*4 + 3];
-+  return sum;
-+}
-+
-+int
-+main ()
-+{
-+  static int a[16] __attribute__((aligned(__BIGGEST_ALIGNMENT__)))
-+    = { 1, 3, 5, 8, 9, 10, 17, 18, 23, 29, 30, 55, 42, 2, 3, 1 };
-+  check_vect ();
-+  if (bar (a, 4) != 260)
-+    abort ();
-+  return 0;
-+}
-+
-+/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */
-+/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" "vect" } } */
-+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-1.c b/gcc/testsuite/gcc.dg/vect/pr65947-1.c
-index 879819d576a..9fc74a1ab28 100644
---- a/gcc/testsuite/gcc.dg/vect/pr65947-1.c
-+++ b/gcc/testsuite/gcc.dg/vect/pr65947-1.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_condition } */
- 
- #include "tree-vect.h"
-@@ -41,5 +43,5 @@ main (void)
- }
- 
- /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
--/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
--/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 4 "vect" { target { ! vect_fold_extract_last } } } } */
-+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
-+/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { target { ! vect_fold_extract_last } } } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-10.c b/gcc/testsuite/gcc.dg/vect/pr65947-10.c
-index f37aecab082..e4a1d9419c2 100644
---- a/gcc/testsuite/gcc.dg/vect/pr65947-10.c
-+++ b/gcc/testsuite/gcc.dg/vect/pr65947-10.c
-@@ -42,6 +42,6 @@ main (void)
- }
- 
- /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
--/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
-+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
- /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
- 
-diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-12.c b/gcc/testsuite/gcc.dg/vect/pr65947-12.c
-index b84fd41bc63..a47f4146a29 100644
---- a/gcc/testsuite/gcc.dg/vect/pr65947-12.c
-+++ b/gcc/testsuite/gcc.dg/vect/pr65947-12.c
-@@ -42,5 +42,5 @@ main (void)
- }
- 
- /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
--/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
-+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
- /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-13.c b/gcc/testsuite/gcc.dg/vect/pr65947-13.c
-index e1d3ff52f5c..a703923151d 100644
---- a/gcc/testsuite/gcc.dg/vect/pr65947-13.c
-+++ b/gcc/testsuite/gcc.dg/vect/pr65947-13.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_condition } */
- 
- #include "tree-vect.h"
-@@ -41,5 +43,5 @@ main (void)
- }
- 
- /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
--/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 4 "vect" { xfail vect_fold_extract_last } } } */
--/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
-+/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { xfail vect_fold_extract_last } } } */
-+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-14.c b/gcc/testsuite/gcc.dg/vect/pr65947-14.c
-index 9f1e4e1eb6a..3b76fda2122 100644
---- a/gcc/testsuite/gcc.dg/vect/pr65947-14.c
-+++ b/gcc/testsuite/gcc.dg/vect/pr65947-14.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_condition } */
- 
- #include "tree-vect.h"
-@@ -41,5 +43,5 @@ main (void)
- }
- 
- /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
--/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
--/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 4 "vect" { target { ! vect_fold_extract_last } } } } */
-+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
-+/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { target { ! vect_fold_extract_last } } } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-2.c b/gcc/testsuite/gcc.dg/vect/pr65947-2.c
-index 18d33c436a5..58ba5f764d0 100644
---- a/gcc/testsuite/gcc.dg/vect/pr65947-2.c
-+++ b/gcc/testsuite/gcc.dg/vect/pr65947-2.c
-@@ -42,5 +42,5 @@ main (void)
- }
- 
- /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
--/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
-+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
- /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-3.c b/gcc/testsuite/gcc.dg/vect/pr65947-3.c
-index 427abdb4140..6b4077e1a62 100644
---- a/gcc/testsuite/gcc.dg/vect/pr65947-3.c
-+++ b/gcc/testsuite/gcc.dg/vect/pr65947-3.c
-@@ -52,5 +52,5 @@ main (void)
- }
- 
- /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
--/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
-+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
- /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-4.c b/gcc/testsuite/gcc.dg/vect/pr65947-4.c
-index 186e03a6346..471fbe2da21 100644
---- a/gcc/testsuite/gcc.dg/vect/pr65947-4.c
-+++ b/gcc/testsuite/gcc.dg/vect/pr65947-4.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_condition } */
- 
- #include "tree-vect.h"
-@@ -41,6 +43,6 @@ main (void)
- }
- 
- /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
--/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
--/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 4 "vect" { target { ! vect_fold_extract_last } } } } */
-+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
-+/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { target { ! vect_fold_extract_last } } } } */
- 
-diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-5.c b/gcc/testsuite/gcc.dg/vect/pr65947-5.c
-index c91b648aa05..4e3f765cd0c 100644
---- a/gcc/testsuite/gcc.dg/vect/pr65947-5.c
-+++ b/gcc/testsuite/gcc.dg/vect/pr65947-5.c
-@@ -53,5 +53,5 @@ main (void)
- /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" { target { ! vect_fold_extract_last } } } } */
- /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { target vect_fold_extract_last } } } */
- /* { dg-final { scan-tree-dump "loop size is greater than data size" "vect" { xfail vect_fold_extract_last } } } */
--/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
-+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
- /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-6.c b/gcc/testsuite/gcc.dg/vect/pr65947-6.c
-index b072c8d33a2..dde96d7a553 100644
---- a/gcc/testsuite/gcc.dg/vect/pr65947-6.c
-+++ b/gcc/testsuite/gcc.dg/vect/pr65947-6.c
-@@ -41,5 +41,5 @@ main (void)
- }
- 
- /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
--/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
-+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
- /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-9.c b/gcc/testsuite/gcc.dg/vect/pr65947-9.c
-index e43e0e473be..1f295306016 100644
---- a/gcc/testsuite/gcc.dg/vect/pr65947-9.c
-+++ b/gcc/testsuite/gcc.dg/vect/pr65947-9.c
-@@ -48,5 +48,5 @@ main ()
- /* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" { target { ! vect_fold_extract_last } } } } */
- /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" { target vect_fold_extract_last } } } */
- /* { dg-final { scan-tree-dump "loop size is greater than data size" "vect" { target { ! vect_fold_extract_last } } } } */
--/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
-+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 1 "vect" { target vect_fold_extract_last } } } */
- /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/pr80631-1.c b/gcc/testsuite/gcc.dg/vect/pr80631-1.c
-index f2405198a10..cbb9a6ff69a 100644
---- a/gcc/testsuite/gcc.dg/vect/pr80631-1.c
-+++ b/gcc/testsuite/gcc.dg/vect/pr80631-1.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* PR tree-optimization/80631 */
- 
- #include "tree-vect.h"
-@@ -72,5 +74,5 @@ main ()
- }
- 
- /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 5 "vect" { target vect_condition } } } */
--/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 10 "vect" { target vect_fold_extract_last } } } */
--/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 10 "vect" { target { { ! vect_fold_extract_last } && vect_condition } } } } */
-+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 5 "vect" { target vect_fold_extract_last } } } */
-+/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 5 "vect" { target { { ! vect_fold_extract_last } && vect_condition } } } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/pr80631-2.c b/gcc/testsuite/gcc.dg/vect/pr80631-2.c
-index b334ca2345b..61e11316af2 100644
---- a/gcc/testsuite/gcc.dg/vect/pr80631-2.c
-+++ b/gcc/testsuite/gcc.dg/vect/pr80631-2.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* PR tree-optimization/80631 */
- 
- #include "tree-vect.h"
-@@ -72,5 +74,5 @@ main ()
- }
- 
- /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 5 "vect" { target vect_condition } } } */
--/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 10 "vect" { target vect_condition xfail vect_fold_extract_last } } } */
--/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 10 "vect" { target vect_fold_extract_last } } } */
-+/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 5 "vect" { target vect_condition xfail vect_fold_extract_last } } } */
-+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 5 "vect" { target vect_fold_extract_last } } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/pr92205.c b/gcc/testsuite/gcc.dg/vect/pr92205.c
-new file mode 100644
-index 00000000000..a031c1fe297
---- /dev/null
-+++ b/gcc/testsuite/gcc.dg/vect/pr92205.c
-@@ -0,0 +1,13 @@
-+/* { dg-do compile } */
-+/* { dg-require-effective-target vect_int } */
-+
-+int b(int n, unsigned char *a)
-+{
-+  int d = 0;
-+  a = __builtin_assume_aligned (a, __BIGGEST_ALIGNMENT__);
-+  for (int c = 0; c < n; ++c)
-+    d |= a[c];
-+  return d;
-+}
-+
-+/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { xfail *-*-* } } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/slp-23.c b/gcc/testsuite/gcc.dg/vect/slp-23.c
-index 7d330c787d1..d7c67fe2c6e 100644
---- a/gcc/testsuite/gcc.dg/vect/slp-23.c
-+++ b/gcc/testsuite/gcc.dg/vect/slp-23.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- 
- #include 
-diff --git a/gcc/testsuite/gcc.dg/vect/slp-25.c b/gcc/testsuite/gcc.dg/vect/slp-25.c
-index ff7eff202cb..1c33927c434 100644
---- a/gcc/testsuite/gcc.dg/vect/slp-25.c
-+++ b/gcc/testsuite/gcc.dg/vect/slp-25.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- /* { dg-add-options bind_pic_locally } */
- 
-diff --git a/gcc/testsuite/gcc.dg/vect/slp-9.c b/gcc/testsuite/gcc.dg/vect/slp-9.c
-index d0c94f1986b..d5212dca3dd 100644
---- a/gcc/testsuite/gcc.dg/vect/slp-9.c
-+++ b/gcc/testsuite/gcc.dg/vect/slp-9.c
-@@ -44,5 +44,5 @@ int main (void)
- }
- 
- /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_widen_mult_hi_to_si } } }*/
--/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_widen_mult_hi_to_si } } } */
-+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" { target vect_widen_mult_hi_to_si } } } */
- 
-diff --git a/gcc/testsuite/gcc.dg/vect/slp-reduc-2.c b/gcc/testsuite/gcc.dg/vect/slp-reduc-2.c
-index 07c96c00eb0..15dd59922fc 100644
---- a/gcc/testsuite/gcc.dg/vect/slp-reduc-2.c
-+++ b/gcc/testsuite/gcc.dg/vect/slp-reduc-2.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- 
- #include 
-diff --git a/gcc/testsuite/gcc.dg/vect/slp-reduc-5.c b/gcc/testsuite/gcc.dg/vect/slp-reduc-5.c
-index fc689e46ba1..f457c11aa3c 100644
---- a/gcc/testsuite/gcc.dg/vect/slp-reduc-5.c
-+++ b/gcc/testsuite/gcc.dg/vect/slp-reduc-5.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- 
- #include 
-diff --git a/gcc/testsuite/gcc.dg/vect/slp-reduc-6.c b/gcc/testsuite/gcc.dg/vect/slp-reduc-6.c
-index 88591c5bdcb..1fd15aa3c87 100644
---- a/gcc/testsuite/gcc.dg/vect/slp-reduc-6.c
-+++ b/gcc/testsuite/gcc.dg/vect/slp-reduc-6.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- 
- #include 
-diff --git a/gcc/testsuite/gcc.dg/vect/slp-reduc-sad-2.c b/gcc/testsuite/gcc.dg/vect/slp-reduc-sad-2.c
-new file mode 100644
-index 00000000000..7d9255e48f2
---- /dev/null
-+++ b/gcc/testsuite/gcc.dg/vect/slp-reduc-sad-2.c
-@@ -0,0 +1,31 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
-+/* { dg-do compile } */
-+/* { dg-require-effective-target vect_usad_char } */
-+/* With AVX256 or more we do not pull off the trick eliding the epilogue.  */
-+/* { dg-additional-options "-mprefer-avx128" { target { x86_64-*-* i?86-*-* } } } */
-+
-+typedef unsigned char uint8_t;
-+int x264_pixel_sad_8x8( uint8_t *pix1, uint8_t *pix2, int i_stride_pix2 )
-+{
-+  int i_sum = 0;
-+  for( int y = 0; y < 8; y++ )
-+    {
-+      i_sum += __builtin_abs( pix1[0] - pix2[0] );
-+      i_sum += __builtin_abs( pix1[1] - pix2[1] );
-+      i_sum += __builtin_abs( pix1[2] - pix2[2] );
-+      i_sum += __builtin_abs( pix1[3] - pix2[3] );
-+      i_sum += __builtin_abs( pix1[4] - pix2[4] );
-+      i_sum += __builtin_abs( pix1[5] - pix2[5] );
-+      i_sum += __builtin_abs( pix1[6] - pix2[6] );
-+      i_sum += __builtin_abs( pix1[7] - pix2[7] );
-+      pix1 += 16;
-+      pix2 += i_stride_pix2;
-+    }
-+  return i_sum;
-+}
-+
-+/* { dg-final { scan-tree-dump "vect_recog_sad_pattern: detected" "vect" } } */
-+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */
-+/* { dg-final { scan-tree-dump-not "access with gaps requires scalar epilogue loop" "vect" } } */
-+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c b/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c
-index f5fb63e19f1..e3bfee33348 100644
---- a/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c
-+++ b/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- 
- #include "tree-vect.h"
-diff --git a/gcc/testsuite/gcc.dg/vect/slp-widen-mult-s16.c b/gcc/testsuite/gcc.dg/vect/slp-widen-mult-s16.c
-index 4460d59b5a1..abb10fde45b 100644
---- a/gcc/testsuite/gcc.dg/vect/slp-widen-mult-s16.c
-+++ b/gcc/testsuite/gcc.dg/vect/slp-widen-mult-s16.c
-@@ -38,5 +38,5 @@ int main (void)
- }
- 
- /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_widen_mult_hi_to_si || vect_unpack } } } } */
--/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_widen_mult_hi_to_si || vect_unpack } } } } */
-+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" { target { vect_widen_mult_hi_to_si || vect_unpack } } } } */
- 
-diff --git a/gcc/testsuite/gcc.dg/vect/slp-widen-mult-u8.c b/gcc/testsuite/gcc.dg/vect/slp-widen-mult-u8.c
-index 6e72c4878c2..0756119afb4 100644
---- a/gcc/testsuite/gcc.dg/vect/slp-widen-mult-u8.c
-+++ b/gcc/testsuite/gcc.dg/vect/slp-widen-mult-u8.c
-@@ -38,5 +38,5 @@ int main (void)
- }
- 
- /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_widen_mult_qi_to_hi || vect_unpack } } } } */
--/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_widen_mult_hi_to_si || vect_unpack } } } } */
-+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" { target { vect_widen_mult_hi_to_si || vect_unpack } } } } */
- 
-diff --git a/gcc/testsuite/gcc.dg/vect/trapv-vect-reduc-4.c b/gcc/testsuite/gcc.dg/vect/trapv-vect-reduc-4.c
-index 8a57eb69a91..f09c964fdc1 100644
---- a/gcc/testsuite/gcc.dg/vect/trapv-vect-reduc-4.c
-+++ b/gcc/testsuite/gcc.dg/vect/trapv-vect-reduc-4.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-do compile } */
- /* { dg-require-effective-target vect_int } */
- 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-103.c b/gcc/testsuite/gcc.dg/vect/vect-103.c
-index 4a9e1574eb0..2a4510482d4 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-103.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-103.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- 
- #include 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-109.c b/gcc/testsuite/gcc.dg/vect/vect-109.c
-index 9a507105899..ac5d0827899 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-109.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-109.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-skip-if "" { vect_no_align } } */
- /* { dg-require-effective-target vect_int } */
- /* { dg-add-options bind_pic_locally } */
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-119.c b/gcc/testsuite/gcc.dg/vect/vect-119.c
-index aa8c3002bff..29a9c51cd29 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-119.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-119.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-do compile } */
- /* { dg-require-effective-target vect_int } */
- 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-24.c b/gcc/testsuite/gcc.dg/vect/vect-24.c
-index cbff6c55fa4..fa4c0620d29 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-24.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-24.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- 
- #include 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-26.c b/gcc/testsuite/gcc.dg/vect/vect-26.c
-index 4f0472b5d0f..8a141f38400 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-26.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-26.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- 
- #include 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-27.c b/gcc/testsuite/gcc.dg/vect/vect-27.c
-index 590217feee7..ac86b21aceb 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-27.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-27.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- /* { dg-add-options bind_pic_locally } */
- 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-29.c b/gcc/testsuite/gcc.dg/vect/vect-29.c
-index 86ec2cc1ddf..bbd446dfe63 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-29.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-29.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- /* { dg-add-options bind_pic_locally } */
- 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-42.c b/gcc/testsuite/gcc.dg/vect/vect-42.c
-index a65b4a62276..086cbf20c0a 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-42.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-42.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_float } */
- /* { dg-add-options double_vectors } */
- 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-44.c b/gcc/testsuite/gcc.dg/vect/vect-44.c
-index 03ef2c0f671..f7f1fd28665 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-44.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-44.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_float } */
- /* { dg-additional-options "--param vect-max-peeling-for-alignment=0" } */
- 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-48.c b/gcc/testsuite/gcc.dg/vect/vect-48.c
-index bac6ef6b8dd..b29fe47635a 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-48.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-48.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_float } */
- /* { dg-add-options double_vectors } */
- 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-50.c b/gcc/testsuite/gcc.dg/vect/vect-50.c
-index c9500ca91e5..f43676896af 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-50.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-50.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_float } */
- /* { dg-additional-options "--param vect-max-peeling-for-alignment=0" } */
- 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-52.c b/gcc/testsuite/gcc.dg/vect/vect-52.c
-index 0343d9a24d1..c20a4be2ede 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-52.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-52.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_float } */
- /* { dg-add-options double_vectors } */
- 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-54.c b/gcc/testsuite/gcc.dg/vect/vect-54.c
-index 58201abe069..2b236e48e19 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-54.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-54.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_float } */
- /* { dg-add-options double_vectors } */
- 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-56.c b/gcc/testsuite/gcc.dg/vect/vect-56.c
-index 8060b05e781..c914126ece5 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-56.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-56.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_float } */
- 
- #include 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-58.c b/gcc/testsuite/gcc.dg/vect/vect-58.c
-index 441af51860e..da4f9740e33 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-58.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-58.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_float } */
- 
- #include 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-60.c b/gcc/testsuite/gcc.dg/vect/vect-60.c
-index 3b7477c96ab..121c503c63a 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-60.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-60.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_float } */
- 
- #include 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-72.c b/gcc/testsuite/gcc.dg/vect/vect-72.c
-index 472d8d57549..9e8e91b7ae6 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-72.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-72.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- /* { dg-add-options bind_pic_locally } */
- 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-75-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-75-big-array.c
-index 42b2b8d91aa..a3fb5053037 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-75-big-array.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-75-big-array.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- 
- #include 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-75.c b/gcc/testsuite/gcc.dg/vect/vect-75.c
-index 2cdd7032242..88da97f0bb7 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-75.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-75.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- 
- #include 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-77-alignchecks.c b/gcc/testsuite/gcc.dg/vect/vect-77-alignchecks.c
-index 56ee797d10b..fb3e4992782 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-77-alignchecks.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-77-alignchecks.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- 
- #include 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-77-global.c b/gcc/testsuite/gcc.dg/vect/vect-77-global.c
-index f0b73505d68..1580d6e075b 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-77-global.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-77-global.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- /* { dg-add-options bind_pic_locally } */
- 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-78-alignchecks.c b/gcc/testsuite/gcc.dg/vect/vect-78-alignchecks.c
-index c3ef8a36591..57e8da0a909 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-78-alignchecks.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-78-alignchecks.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- 
- #include 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-78-global.c b/gcc/testsuite/gcc.dg/vect/vect-78-global.c
-index 241e7fa94b5..ea039b389b2 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-78-global.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-78-global.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- /* { dg-add-options bind_pic_locally } */
- 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-89-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-89-big-array.c
-index decfbee318a..59e1aae0017 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-89-big-array.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-89-big-array.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- 
- #include 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-89.c b/gcc/testsuite/gcc.dg/vect/vect-89.c
-index 051698eada2..356ab96d330 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-89.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-89.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- 
- #include 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-91.c b/gcc/testsuite/gcc.dg/vect/vect-91.c
-index 9430da3290a..91264d9841d 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-91.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-91.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-do compile } */
- /* { dg-require-effective-target vect_int } */
- /* { dg-additional-options "--param vect-max-peeling-for-alignment=0" } */
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-92.c b/gcc/testsuite/gcc.dg/vect/vect-92.c
-index b9a1ce23d02..9ceb0fbadcd 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-92.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-92.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_float } */
- 
- #include 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-96.c b/gcc/testsuite/gcc.dg/vect/vect-96.c
-index 0cb935b9f16..c0d6c37b21d 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-96.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-96.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- /* { dg-add-options double_vectors } */
- 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-1.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-1.c
-index c2b1c773047..3887120b747 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-alias-check-1.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-1.c
-@@ -15,3 +15,5 @@ fn1 ()
- }
- 
- /* { dg-final { scan-tree-dump "improved number of alias checks from \[0-9\]* to 1" "vect" } } */
-+/* { dg-final { scan-tree-dump "using an address-based overlap test" "vect" } } */
-+/* { dg-final { scan-tree-dump-not "using an index-based" "vect" } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-10.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-10.c
-index 0e6285e4a23..b6cc309dbe8 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-alias-check-10.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-10.c
-@@ -65,3 +65,6 @@ main (void)
-   FOR_EACH_TYPE (DO_TEST)
-   return 0;
- }
-+
-+/* { dg-final { scan-tree-dump-not "using an address-based" "vect" } } */
-+/* { dg-final { scan-tree-dump-not "using an index-based" "vect" } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-11.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-11.c
-index a0d5abc3aa4..09a4ebfa69e 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-alias-check-11.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-11.c
-@@ -95,3 +95,6 @@ main (void)
- /* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* step[^ ]* \* 8[)]* is outside \(-24, 24\)} "vect" { target vect_double } } } */
- /* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* step[^ ]* \* 8[)]* is outside \(-32, 32\)} "vect" { target vect_double } } } */
- /* { dg-final { scan-tree-dump {run-time check [^\n]* abs \([^*]* \* 8[)]* >= 32} "vect" { target vect_double } } } */
-+
-+/* { dg-final { scan-tree-dump-not "using an address-based" "vect" } } */
-+/* { dg-final { scan-tree-dump-not "using an index-based" "vect" } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-12.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-12.c
-index 788cdfc3cdc..63a897f4bad 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-alias-check-12.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-12.c
-@@ -95,3 +95,6 @@ main (void)
- /* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* [_a-z][^ ]* \* 8[)]* is outside \[0, 24\)} "vect" { target vect_double } } } */
- /* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* [_a-z][^ ]* \* 8[)]* is outside \[0, 32\)} "vect" { target vect_double } } } */
- /* { dg-final { scan-tree-dump {run-time check [^\n]* unsigned \([^*]* \* 8[)]* >= 32} "vect" { target vect_double } } } */
-+
-+/* { dg-final { scan-tree-dump-not "using an address-based" "vect" } } */
-+/* { dg-final { scan-tree-dump-not "using an index-based" "vect" } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-13.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-13.c
-index 60bc4730724..812aa9027dd 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-alias-check-13.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-13.c
-@@ -18,4 +18,6 @@ f2 (int *x, long step2, int n)
- 
- /* { dg-final { scan-tree-dump {need run-time check that [^\n]*step1[^\n]* is nonzero} "vect" } } */
- /* { dg-final { scan-tree-dump-not {need run-time check that [^\n]*step2[^\n]* is nonzero} "vect" } } */
-+/* { dg-final { scan-tree-dump-not "using an address-based" "vect" } } */
-+/* { dg-final { scan-tree-dump-not "using an index-based" "vect" } } */
- /* { dg-final { scan-tree-dump-times {LOOP VECTORIZED} 2 "vect" } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-14.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-14.c
-new file mode 100644
-index 00000000000..1d148a04918
---- /dev/null
-+++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-14.c
-@@ -0,0 +1,64 @@
-+#define N 200
-+#define M 4
-+
-+typedef signed char sc;
-+typedef unsigned char uc;
-+typedef signed short ss;
-+typedef unsigned short us;
-+typedef int si;
-+typedef unsigned int ui;
-+typedef signed long long sll;
-+typedef unsigned long long ull;
-+
-+#define FOR_EACH_TYPE(M) \
-+  M (sc) M (uc) \
-+  M (ss) M (us) \
-+  M (si) M (ui) \
-+  M (sll) M (ull) \
-+  M (float) M (double)
-+
-+#define TEST_VALUE(I) ((I) * 17 / 2)
-+
-+#define ADD_TEST(TYPE)				\
-+  void __attribute__((noinline, noclone))	\
-+  test_##TYPE (TYPE *a, TYPE *b)		\
-+  {						\
-+    for (int i = 0; i < N; i += 2)		\
-+      {						\
-+	TYPE b0 = b[i + 0];			\
-+	TYPE b1 = b[i + 1];			\
-+	a[i + 0] = b0 + 2;			\
-+	a[i + 1] = b1 + 3;			\
-+      }						\
-+  }
-+
-+#define DO_TEST(TYPE)						\
-+  for (int j = 0; j < M; ++j)					\
-+    {								\
-+      TYPE a[N + M];						\
-+      for (int i = 0; i < N + M; ++i)				\
-+	a[i] = TEST_VALUE (i);					\
-+      test_##TYPE (a + j, a);					\
-+      for (int i = 0; i < N; i += 2)				\
-+	{							\
-+	  TYPE base1 = j == 0 ? TEST_VALUE (i) : a[i];		\
-+	  TYPE base2 = j <= 1 ? TEST_VALUE (i + 1) : a[i + 1];	\
-+	  if (a[i + j] != (TYPE) (base1 + 2)			\
-+	      || a[i + j + 1] != (TYPE) (base2 + 3))		\
-+	    __builtin_abort ();					\
-+	}							\
-+    }
-+
-+FOR_EACH_TYPE (ADD_TEST)
-+
-+int
-+main (void)
-+{
-+  FOR_EACH_TYPE (DO_TEST)
-+  return 0;
-+}
-+
-+/* { dg-final { scan-tree-dump {flags: *WAR\n} "vect" { target vect_int } } } */
-+/* { dg-final { scan-tree-dump-not {flags: [^\n]*ARBITRARY\n} "vect" } } */
-+/* { dg-final { scan-tree-dump "using an address-based WAR/WAW test" "vect" } } */
-+/* { dg-final { scan-tree-dump-not "using an index-based" "vect" } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-15.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-15.c
-new file mode 100644
-index 00000000000..fbe3f8431ff
---- /dev/null
-+++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-15.c
-@@ -0,0 +1,61 @@
-+#define N 200
-+#define DIST 32
-+
-+typedef signed char sc;
-+typedef unsigned char uc;
-+typedef signed short ss;
-+typedef unsigned short us;
-+typedef int si;
-+typedef unsigned int ui;
-+typedef signed long long sll;
-+typedef unsigned long long ull;
-+
-+#define FOR_EACH_TYPE(M) \
-+  M (sc) M (uc) \
-+  M (ss) M (us) \
-+  M (si) M (ui) \
-+  M (sll) M (ull) \
-+  M (float) M (double)
-+
-+#define ADD_TEST(TYPE)				\
-+  void __attribute__((noinline, noclone))	\
-+  test_##TYPE (TYPE *x, TYPE *y)		\
-+  {						\
-+    for (int i = 0; i < N; ++i)			\
-+      {						\
-+	x[i] = i;				\
-+	y[i] = 42 - i * 2;			\
-+      }						\
-+  }
-+
-+#define DO_TEST(TYPE)						\
-+  for (int i = 0; i < DIST * 2; ++i)				\
-+    {								\
-+      TYPE a[N + DIST * 2] = {};				\
-+      test_##TYPE (a + DIST, a + i);				\
-+      for (int j = 0; j < N + DIST * 2; ++j)			\
-+	{							\
-+	  TYPE expected = 0;					\
-+	  if (i > DIST && j >= i && j < i + N)			\
-+	    expected = 42 - (j - i) * 2;			\
-+	  if (j >= DIST && j < DIST + N)			\
-+	    expected = j - DIST;				\
-+	  if (i <= DIST && j >= i && j < i + N)			\
-+	    expected = 42 - (j - i) * 2;			\
-+	  if (expected != a[j])					\
-+	    __builtin_abort ();					\
-+	}							\
-+    }
-+
-+FOR_EACH_TYPE (ADD_TEST)
-+
-+int
-+main (void)
-+{
-+  FOR_EACH_TYPE (DO_TEST)
-+  return 0;
-+}
-+
-+/* { dg-final { scan-tree-dump {flags: *WAW\n} "vect" { target vect_int } } } */
-+/* { dg-final { scan-tree-dump "using an address-based WAR/WAW test" "vect" } } */
-+/* { dg-final { scan-tree-dump-not "using an index-based" "vect" } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-16.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-16.c
-new file mode 100644
-index 00000000000..81c252dfc23
---- /dev/null
-+++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-16.c
-@@ -0,0 +1,66 @@
-+#define N 200
-+#define DIST 32
-+
-+typedef signed char sc;
-+typedef unsigned char uc;
-+typedef signed short ss;
-+typedef unsigned short us;
-+typedef int si;
-+typedef unsigned int ui;
-+typedef signed long long sll;
-+typedef unsigned long long ull;
-+
-+#define FOR_EACH_TYPE(M) \
-+  M (sc) M (uc) \
-+  M (ss) M (us) \
-+  M (si) M (ui) \
-+  M (sll) M (ull) \
-+  M (float) M (double)
-+
-+#define TEST_VALUE(I) ((I) * 13 / 2)
-+
-+#define ADD_TEST(TYPE)				\
-+  TYPE __attribute__((noinline, noclone))	\
-+  test_##TYPE (TYPE *x, TYPE *y)		\
-+  {						\
-+    TYPE res = 0;				\
-+    for (int i = 0; i < N; ++i)			\
-+      {						\
-+	x[i] = i;				\
-+	res += y[i];				\
-+      }						\
-+    return res;					\
-+  }
-+
-+#define DO_TEST(TYPE)						\
-+  for (int i = 0; i < DIST * 2; ++i)				\
-+    {								\
-+      TYPE a[N + DIST * 2];					\
-+      for (int j = 0; j < N + DIST * 2; ++j)			\
-+	a[j] = TEST_VALUE (j);					\
-+      TYPE res = test_##TYPE (a + DIST, a + i);			\
-+      for (int j = 0; j < N; ++j)				\
-+	if (a[j + DIST] != (TYPE) j)				\
-+	  __builtin_abort ();					\
-+      TYPE expected_res = 0;					\
-+      for (int j = i; j < i + N; ++j)				\
-+	if (i <= DIST && j >= DIST && j < DIST + N)		\
-+	  expected_res += j - DIST;				\
-+	else							\
-+	  expected_res += TEST_VALUE (j);			\
-+      if (expected_res != res)					\
-+	__builtin_abort ();					\
-+    }
-+
-+FOR_EACH_TYPE (ADD_TEST)
-+
-+int
-+main (void)
-+{
-+  FOR_EACH_TYPE (DO_TEST)
-+  return 0;
-+}
-+
-+/* { dg-final { scan-tree-dump {flags: *RAW\n} "vect" { target vect_int } } } */
-+/* { dg-final { scan-tree-dump "using an address-based overlap test" "vect" } } */
-+/* { dg-final { scan-tree-dump-not "using an index-based" "vect" } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-17.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-17.c
-new file mode 100644
-index 00000000000..c49c497c2d0
---- /dev/null
-+++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-17.c
-@@ -0,0 +1,15 @@
-+/* { dg-do compile } */
-+/* { dg-require-effective-target vect_load_lanes } */
-+
-+struct s { int x[100]; };
-+
-+void
-+f (struct s *s1, int a, int b)
-+{
-+  for (int i = 0; i < 32; ++i)
-+    s1->x[a + i] = s1->x[b + i * 2] + s1->x[b + i * 3];
-+}
-+
-+/* { dg-final { scan-tree-dump {flags: *[^\n]*MIXED_STEPS} "vect" } } */
-+/* { dg-final { scan-tree-dump "using an address-based overlap test" "vect" } } */
-+/* { dg-final { scan-tree-dump-not "using an index-based" "vect" } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-18.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-18.c
-new file mode 100644
-index 00000000000..9d0739151d9
---- /dev/null
-+++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-18.c
-@@ -0,0 +1,64 @@
-+#define N 200
-+#define DIST 32
-+
-+typedef signed char sc;
-+typedef unsigned char uc;
-+typedef signed short ss;
-+typedef unsigned short us;
-+typedef int si;
-+typedef unsigned int ui;
-+typedef signed long long sll;
-+typedef unsigned long long ull;
-+
-+#define FOR_EACH_TYPE(M) \
-+  M (sc) M (uc) \
-+  M (ss) M (us) \
-+  M (si) M (ui) \
-+  M (sll) M (ull) \
-+  M (float) M (double)
-+
-+#define TEST_VALUE(I) ((I) * 11 / 2)
-+
-+#define ADD_TEST(TYPE)				\
-+  TYPE a_##TYPE[N * 2];				\
-+  void __attribute__((noinline, noclone))	\
-+  test_##TYPE (int x, int y)			\
-+  {						\
-+    for (int i = 0; i < N; ++i)			\
-+      a_##TYPE[x - i] += a_##TYPE[y - i];	\
-+  }
-+
-+#define DO_TEST(TYPE)						\
-+  for (int i = 0; i < DIST * 2; ++i)				\
-+    {								\
-+      for (int j = 0; j < N + DIST * 2; ++j)			\
-+	a_##TYPE[j] = TEST_VALUE (j);				\
-+      test_##TYPE (i + N - 1, DIST + N - 1);			\
-+      for (int j = 0; j < N + DIST * 2; ++j)			\
-+	{							\
-+	  TYPE expected;					\
-+	  if (j < i || j >= i + N)				\
-+	    expected = TEST_VALUE (j);				\
-+	  else if (i >= DIST)					\
-+	    expected = ((TYPE) TEST_VALUE (j)			\
-+			+ (TYPE) TEST_VALUE (j + DIST - i));	\
-+	  else							\
-+	    expected = ((TYPE) TEST_VALUE (j)			\
-+			+ a_##TYPE[j + DIST - i]);		\
-+	  if (expected != a_##TYPE[j])				\
-+	    __builtin_abort ();					\
-+	}							\
-+    }
-+
-+FOR_EACH_TYPE (ADD_TEST)
-+
-+int
-+main (void)
-+{
-+  FOR_EACH_TYPE (DO_TEST)
-+  return 0;
-+}
-+
-+/* { dg-final { scan-tree-dump {flags: *WAR\n} "vect" { target vect_int } } } */
-+/* { dg-final { scan-tree-dump "using an index-based WAR/WAW test" "vect" } } */
-+/* { dg-final { scan-tree-dump-not "using an address-based" "vect" } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-19.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-19.c
-new file mode 100644
-index 00000000000..7c0ff36a8c4
---- /dev/null
-+++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-19.c
-@@ -0,0 +1,62 @@
-+#define N 200
-+#define DIST 32
-+
-+typedef signed char sc;
-+typedef unsigned char uc;
-+typedef signed short ss;
-+typedef unsigned short us;
-+typedef int si;
-+typedef unsigned int ui;
-+typedef signed long long sll;
-+typedef unsigned long long ull;
-+
-+#define FOR_EACH_TYPE(M) \
-+  M (sc) M (uc) \
-+  M (ss) M (us) \
-+  M (si) M (ui) \
-+  M (sll) M (ull) \
-+  M (float) M (double)
-+
-+#define ADD_TEST(TYPE)				\
-+  TYPE a_##TYPE[N * 2];				\
-+  void __attribute__((noinline, noclone))	\
-+  test_##TYPE (int x, int y)			\
-+  {						\
-+    for (int i = 0; i < N; ++i)			\
-+      {						\
-+	a_##TYPE[i + x] = i;			\
-+	a_##TYPE[i + y] = 42 - i * 2;		\
-+      }						\
-+  }
-+
-+#define DO_TEST(TYPE)						\
-+  for (int i = 0; i < DIST * 2; ++i)				\
-+    {								\
-+      __builtin_memset (a_##TYPE, 0, sizeof (a_##TYPE));	\
-+      test_##TYPE (DIST, i);					\
-+      for (int j = 0; j < N + DIST * 2; ++j)			\
-+	{							\
-+	  TYPE expected = 0;					\
-+	  if (i > DIST && j >= i && j < i + N)			\
-+	    expected = 42 - (j - i) * 2;			\
-+	  if (j >= DIST && j < DIST + N)			\
-+	    expected = j - DIST;				\
-+	  if (i <= DIST && j >= i && j < i + N)			\
-+	    expected = 42 - (j - i) * 2;			\
-+	  if (expected != a_##TYPE[j])				\
-+	    __builtin_abort ();					\
-+	}							\
-+    }
-+
-+FOR_EACH_TYPE (ADD_TEST)
-+
-+int
-+main (void)
-+{
-+  FOR_EACH_TYPE (DO_TEST)
-+  return 0;
-+}
-+
-+/* { dg-final { scan-tree-dump {flags: *WAW\n} "vect" { target vect_int } } } */
-+/* { dg-final { scan-tree-dump "using an index-based WAR/WAW test" "vect" } } */
-+/* { dg-final { scan-tree-dump-not "using an address-based" "vect" } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-20.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-20.c
-new file mode 100644
-index 00000000000..8a699ebfda8
---- /dev/null
-+++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-20.c
-@@ -0,0 +1,66 @@
-+#define N 200
-+#define DIST 32
-+
-+typedef signed char sc;
-+typedef unsigned char uc;
-+typedef signed short ss;
-+typedef unsigned short us;
-+typedef int si;
-+typedef unsigned int ui;
-+typedef signed long long sll;
-+typedef unsigned long long ull;
-+
-+#define FOR_EACH_TYPE(M) \
-+  M (sc) M (uc) \
-+  M (ss) M (us) \
-+  M (si) M (ui) \
-+  M (sll) M (ull) \
-+  M (float) M (double)
-+
-+#define TEST_VALUE(I) ((I) * 11 / 2)
-+
-+#define ADD_TEST(TYPE)				\
-+  TYPE a_##TYPE[N * 2];				\
-+  TYPE __attribute__((noinline, noclone))	\
-+  test_##TYPE (int x, int y)			\
-+  {						\
-+    TYPE res = 0;				\
-+    for (int i = 0; i < N; ++i)			\
-+      {						\
-+	a_##TYPE[i + x] = i;			\
-+	res += a_##TYPE[i + y];			\
-+      }						\
-+    return res;					\
-+  }
-+
-+#define DO_TEST(TYPE)						\
-+  for (int i = 0; i < DIST * 2; ++i)				\
-+    {								\
-+      for (int j = 0; j < N + DIST * 2; ++j)			\
-+	a_##TYPE[j] = TEST_VALUE (j);				\
-+      TYPE res = test_##TYPE (DIST, i);				\
-+      for (int j = 0; j < N; ++j)				\
-+	if (a_##TYPE[j + DIST] != (TYPE) j)			\
-+	  __builtin_abort ();					\
-+      TYPE expected_res = 0;					\
-+      for (int j = i; j < i + N; ++j)				\
-+	if (i <= DIST && j >= DIST && j < DIST + N)		\
-+	  expected_res += j - DIST;				\
-+	else							\
-+	  expected_res += TEST_VALUE (j);			\
-+      if (expected_res != res)					\
-+	__builtin_abort ();					\
-+    }
-+
-+FOR_EACH_TYPE (ADD_TEST)
-+
-+int
-+main (void)
-+{
-+  FOR_EACH_TYPE (DO_TEST)
-+  return 0;
-+}
-+
-+/* { dg-final { scan-tree-dump {flags: *RAW\n} "vect" { target vect_int } } } */
-+/* { dg-final { scan-tree-dump "using an index-based overlap test" "vect" } } */
-+/* { dg-final { scan-tree-dump-not "using an address-based" "vect" } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-8.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-8.c
-index 0569ca487b5..7e5df138999 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-alias-check-8.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-8.c
-@@ -58,3 +58,7 @@ main (void)
-   FOR_EACH_TYPE (DO_TEST)
-   return 0;
- }
-+
-+/* { dg-final { scan-tree-dump {flags: *WAR\n} "vect" { target vect_int } } } */
-+/* { dg-final { scan-tree-dump "using an index-based WAR/WAW test" "vect" } } */
-+/* { dg-final { scan-tree-dump-not "using an address-based" "vect" } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-9.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-9.c
-index 5685bfee576..a7fc1fcebbb 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-alias-check-9.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-9.c
-@@ -17,7 +17,7 @@ typedef unsigned long long ull;
-   M (sll) M (ull) \
-   M (float) M (double)
- 
--#define TEST_VALUE(I) ((I) * 5 / 2)
-+#define TEST_VALUE(I) ((I) * 17 / 2)
- 
- #define ADD_TEST(TYPE)				\
-   void __attribute__((noinline, noclone))	\
-@@ -51,3 +51,7 @@ main (void)
-   FOR_EACH_TYPE (DO_TEST)
-   return 0;
- }
-+
-+/* { dg-final { scan-tree-dump {flags: [^\n]*ARBITRARY\n} "vect" { target vect_int } } } */
-+/* { dg-final { scan-tree-dump "using an address-based overlap test" "vect" } } */
-+/* { dg-final { scan-tree-dump-not "using an index-based" "vect" } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-bswap16.c b/gcc/testsuite/gcc.dg/vect/vect-bswap16.c
-index 3c98b07e425..d29b352b832 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-bswap16.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-bswap16.c
-@@ -1,4 +1,4 @@
--/* { dg-require-effective-target vect_bswap } */
-+/* { dg-additional-options "-msse4" { target sse4_runtime } } */
- 
- #include "tree-vect.h"
- 
-@@ -39,4 +39,4 @@ main (void)
-   return 0;
- }
- 
--/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
-+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_bswap || sse4_runtime } } } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-bswap16a.c b/gcc/testsuite/gcc.dg/vect/vect-bswap16a.c
-new file mode 100644
-index 00000000000..730dc4e8352
---- /dev/null
-+++ b/gcc/testsuite/gcc.dg/vect/vect-bswap16a.c
-@@ -0,0 +1,5 @@
-+/* { dg-additional-options "-msse2 -mno-sse3" { target sse2_runtime } } */
-+
-+#include "vect-bswap16.c"
-+
-+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_shift } } } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-3.c b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-3.c
-new file mode 100644
-index 00000000000..bb99b95eca5
---- /dev/null
-+++ b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-3.c
-@@ -0,0 +1,47 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
-+/* { dg-require-effective-target vect_condition } */
-+/* { dg-require-effective-target vect_float } */
-+
-+#include "tree-vect.h"
-+
-+extern void abort (void) __attribute__ ((noreturn));
-+
-+#define N 27
-+
-+/* Condition reduction with different types.  */
-+
-+int
-+condition_reduction (float *a, float min_v)
-+{
-+  int last = 0;
-+
-+  for (int i = 0; i < N; i++)
-+    if (a[i] < min_v)
-+      last = i;
-+
-+  return last;
-+}
-+
-+int
-+main (void)
-+{
-+  float a[N] = {
-+  11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
-+  1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
-+  21, 22, 23, 24, 25, 26, 27
-+  };
-+
-+  check_vect ();
-+
-+  int ret = condition_reduction (a, 10);
-+  if (ret != 18)
-+    abort ();
-+
-+  return 0;
-+}
-+
-+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
-+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
-+/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { target { ! vect_fold_extract_last } } } } */
-+
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-4.c b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-4.c
-new file mode 100644
-index 00000000000..8820075b1dc
---- /dev/null
-+++ b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-4.c
-@@ -0,0 +1,47 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
-+/* { dg-require-effective-target vect_condition } */
-+/* { dg-require-effective-target vect_double } */
-+
-+#include "tree-vect.h"
-+
-+extern void abort (void) __attribute__ ((noreturn));
-+
-+#define N 27
-+
-+/* Condition reduction with different types.  */
-+
-+int
-+condition_reduction (double *a, double min_v)
-+{
-+  int last = 0;
-+
-+  for (int i = 0; i < N; i++)
-+    if (a[i] < min_v)
-+      last = i;
-+
-+  return last;
-+}
-+
-+int
-+main (void)
-+{
-+  double a[N] = {
-+  11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
-+  1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
-+  21, 22, 23, 24, 25, 26, 27
-+  };
-+
-+  check_vect ();
-+
-+  int ret = condition_reduction (a, 10);
-+  if (ret != 18)
-+    abort ();
-+
-+  return 0;
-+}
-+
-+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
-+/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
-+/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { target { ! vect_fold_extract_last } } } } */
-+
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-double-reduc-5.c b/gcc/testsuite/gcc.dg/vect/vect-double-reduc-5.c
-index 0ba33895592..079704cee81 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-double-reduc-5.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-double-reduc-5.c
-@@ -52,5 +52,5 @@ int main ()
- 
- /* Vectorization of loops with multiple types and double reduction is not 
-    supported yet.  */       
--/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
-+/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail { ! aarch64*-*-* } } } } */
-       
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-epilogues.c b/gcc/testsuite/gcc.dg/vect/vect-epilogues.c
-new file mode 100644
-index 00000000000..946666e918f
---- /dev/null
-+++ b/gcc/testsuite/gcc.dg/vect/vect-epilogues.c
-@@ -0,0 +1,19 @@
-+/* { dg-do compile } */
-+
-+/* Copied from PR 88915.  */
-+void pixel_avg( unsigned char *dst, int i_dst_stride,
-+                               unsigned char *src1, int i_src1_stride,
-+                               unsigned char *src2, int i_src2_stride,
-+                               int i_width, int i_height )
-+ {
-+     for( int y = 0; y < i_height; y++ )
-+     {
-+         for( int x = 0; x < i_width; x++ )
-+             dst[x] = ( src1[x] + src2[x] + 1 ) >> 1;
-+         dst += i_dst_stride;
-+         src1 += i_src1_stride;
-+         src2 += i_src2_stride;
-+     }
-+ }
-+
-+/* { dg-final { scan-tree-dump "LOOP EPILOGUE VECTORIZED" "vect" { xfail { arm*-*-* } } } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-live-1.c b/gcc/testsuite/gcc.dg/vect/vect-live-1.c
-index e170875d7ab..f628c5d3998 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-live-1.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-live-1.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- /* { dg-additional-options "-fno-tree-scev-cprop" } */
- 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-live-2.c b/gcc/testsuite/gcc.dg/vect/vect-live-2.c
-index a6daa61829e..19d8c22859e 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-live-2.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-live-2.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_long } */
- /* { dg-require-effective-target vect_shift } */
- /* { dg-additional-options "-fno-tree-scev-cprop" } */
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-live-3.c b/gcc/testsuite/gcc.dg/vect/vect-live-3.c
-index 3ffa5166f45..8f5ccb27365 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-live-3.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-live-3.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- 
- #include "tree-vect.h"
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-live-4.c b/gcc/testsuite/gcc.dg/vect/vect-live-4.c
-index 21cc27320ac..553ffcd49f7 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-live-4.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-live-4.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- 
- #include "tree-vect.h"
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-live-slp-1.c b/gcc/testsuite/gcc.dg/vect/vect-live-slp-1.c
-index aff37c100f0..965437c8f03 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-live-slp-1.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-live-slp-1.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- /* { dg-additional-options "-fno-tree-scev-cprop" } */
- 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-live-slp-2.c b/gcc/testsuite/gcc.dg/vect/vect-live-slp-2.c
-index 35689665b54..0d2f17f9003 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-live-slp-2.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-live-slp-2.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- /* { dg-additional-options "-fno-tree-scev-cprop" } */
- 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-live-slp-3.c b/gcc/testsuite/gcc.dg/vect/vect-live-slp-3.c
-index 854116fa36e..a3f60f6ce6d 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-live-slp-3.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-live-slp-3.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_long } */
- /* { dg-additional-options "-fno-tree-scev-cprop" } */
- 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c b/gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c
-index 18bf5e80917..1f82121df06 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- /* { dg-add-options double_vectors } */
- 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c b/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c
-index 43887865bf4..b0f74083f2b 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- /* { dg-add-options bind_pic_locally } */
- 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c b/gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c
-index b47a93ab326..864b17ac640 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- /* { dg-add-options double_vectors } */
- 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-outer-4e.c b/gcc/testsuite/gcc.dg/vect/vect-outer-4e.c
-index 13238dbe2f9..e65a092f5bf 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-outer-4e.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-outer-4e.c
-@@ -23,4 +23,4 @@ foo (){
-   return;
- }
- 
--/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
-+/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail { ! aarch64*-*-* } } } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-outer-4f.c b/gcc/testsuite/gcc.dg/vect/vect-outer-4f.c
-index d1fbe346a48..a88014a2fbf 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-outer-4f.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-outer-4f.c
-@@ -65,4 +65,4 @@ int main (void)
-   return 0;
- }
- 
--/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
-+/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail { ! aarch64*-*-* } } } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-outer-4g.c b/gcc/testsuite/gcc.dg/vect/vect-outer-4g.c
-index d1fbe346a48..a88014a2fbf 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-outer-4g.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-outer-4g.c
-@@ -65,4 +65,4 @@ int main (void)
-   return 0;
- }
- 
--/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
-+/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail { ! aarch64*-*-* } } } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-outer-4k.c b/gcc/testsuite/gcc.dg/vect/vect-outer-4k.c
-index d1fbe346a48..a88014a2fbf 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-outer-4k.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-outer-4k.c
-@@ -65,4 +65,4 @@ int main (void)
-   return 0;
- }
- 
--/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
-+/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail { ! aarch64*-*-* } } } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-outer-4l.c b/gcc/testsuite/gcc.dg/vect/vect-outer-4l.c
-index d1fbe346a48..4f95c652ee3 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-outer-4l.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-outer-4l.c
-@@ -65,4 +65,4 @@ int main (void)
-   return 0;
- }
- 
--/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
-+/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail { ! aarch64*-*-* } } } }*/
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-outer-call-1.c b/gcc/testsuite/gcc.dg/vect/vect-outer-call-1.c
-new file mode 100644
-index 00000000000..f26d4220532
---- /dev/null
-+++ b/gcc/testsuite/gcc.dg/vect/vect-outer-call-1.c
-@@ -0,0 +1,22 @@
-+/* { dg-do compile } */
-+/* { dg-require-effective-target vect_float } */
-+/* { dg-additional-options "-fno-math-errno" } */
-+
-+void
-+foo (float * __restrict x, float *y, int n, int m)
-+{
-+  if (m > 0)
-+    for (int i = 0; i < n; ++i)
-+      {
-+	float tem = x[i], tem1;
-+	for (int j = 0; j < m; ++j)
-+	  {
-+	    tem += y[j];
-+	    tem1 = tem;
-+	    tem = __builtin_sqrtf (tem);
-+	  }
-+	x[i] = tem - tem1;
-+      }
-+}
-+
-+/* { dg-final { scan-tree-dump "OUTER LOOP VECTORIZED" "vect" { target { vect_call_sqrtf } } } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-peel-1-epilogue.c b/gcc/testsuite/gcc.dg/vect/vect-peel-1-epilogue.c
-new file mode 100644
-index 00000000000..cc23c6b0866
---- /dev/null
-+++ b/gcc/testsuite/gcc.dg/vect/vect-peel-1-epilogue.c
-@@ -0,0 +1,3 @@
-+/* { dg-require-effective-target vect_int } */
-+
-+#include "vect-peel-1-src.c"
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-peel-1-src.c b/gcc/testsuite/gcc.dg/vect/vect-peel-1-src.c
-new file mode 100644
-index 00000000000..7980d4dd643
---- /dev/null
-+++ b/gcc/testsuite/gcc.dg/vect/vect-peel-1-src.c
-@@ -0,0 +1,48 @@
-+#include 
-+#include "tree-vect.h"
-+
-+#define N 128
-+
-+int ib[N+7];
-+
-+__attribute__ ((noinline))
-+int main1 ()
-+{
-+  int i;
-+  int ia[N+1];
-+
-+  /* All the accesses are misaligned. With cost model disabled, we
-+     count the number of aligned accesses for each peeling option, and
-+     in this case we align the two loads if possible (i.e., if
-+     misaligned stores are supported).  */
-+  for (i = 1; i <= N; i++)
-+    {
-+      ia[i] = ib[i+2] + ib[i+6];
-+    }
-+
-+  /* check results:  */
-+  for (i = 1; i <= N; i++)
-+    {
-+      if (ia[i] != ib[i+2] + ib[i+6])
-+        abort ();
-+    }
-+
-+  return 0;
-+}
-+
-+int main (void)
-+{ 
-+  int i;
-+
-+  check_vect ();
-+
-+  for (i = 0; i <= N+6; i++)
-+    {
-+      asm volatile ("" : "+r" (i));
-+      ib[i] = i;
-+    }
-+
-+  return main1 ();
-+}
-+
-+
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-peel-1.c b/gcc/testsuite/gcc.dg/vect/vect-peel-1.c
-index fae99ab0b08..a7660a381c4 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-peel-1.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-peel-1.c
-@@ -1,51 +1,8 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- 
--#include 
--#include "tree-vect.h"
--
--#define N 128
--
--int ib[N+7];
--
--__attribute__ ((noinline))
--int main1 ()
--{
--  int i;
--  int ia[N+1];
--
--  /* All the accesses are misaligned. With cost model disabled, we
--     count the number of aligned accesses for each peeling option, and
--     in this case we align the two loads if possible (i.e., if
--     misaligned stores are supported).  */
--  for (i = 1; i <= N; i++)
--    {
--      ia[i] = ib[i+2] + ib[i+6];
--    }
--
--  /* check results:  */
--  for (i = 1; i <= N; i++)
--    {
--      if (ia[i] != ib[i+2] + ib[i+6])
--        abort ();
--    }
--
--  return 0;
--}
--
--int main (void)
--{ 
--  int i;
--
--  check_vect ();
--
--  for (i = 0; i <= N+6; i++)
--    {
--      asm volatile ("" : "+r" (i));
--      ib[i] = i;
--    }
--
--  return main1 ();
--}
-+#include "vect-peel-1-src.c"
- 
- /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
- /* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { target { { vect_element_align } && { vect_aligned_arrays } } xfail { ! vect_unaligned_possible } } } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-peel-3-epilogue.c b/gcc/testsuite/gcc.dg/vect/vect-peel-3-epilogue.c
-new file mode 100644
-index 00000000000..8af0fcdca0e
---- /dev/null
-+++ b/gcc/testsuite/gcc.dg/vect/vect-peel-3-epilogue.c
-@@ -0,0 +1,4 @@
-+/* { dg-require-effective-target vect_int } */
-+/* { dg-add-options bind_pic_locally } */
-+
-+#include "vect-peel-3-src.c"
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-peel-3-src.c b/gcc/testsuite/gcc.dg/vect/vect-peel-3-src.c
-new file mode 100644
-index 00000000000..a21ce8c3d6a
---- /dev/null
-+++ b/gcc/testsuite/gcc.dg/vect/vect-peel-3-src.c
-@@ -0,0 +1,58 @@
-+#include 
-+#include "tree-vect.h"
-+
-+#if VECTOR_BITS > 128
-+#define NINTS (VECTOR_BITS / 32)
-+#define EXTRA (NINTS * 2)
-+#else
-+#define NINTS 4
-+#define EXTRA 10
-+#endif
-+
-+#define N 128
-+
-+#define RES_A (N * N / 4)
-+#define RES_B (N * (N + 1) / 2 + (NINTS + 3) * (N + 1))
-+#define RES_C (N * (N + 1) / 2 + (N + 1))
-+#define RES (RES_A + RES_B + RES_C)
-+
-+int ib[N + EXTRA];
-+int ia[N + EXTRA];
-+int ic[N + EXTRA];
-+
-+__attribute__ ((noinline))
-+int main1 ()
-+{
-+  int i, suma = 0, sumb = 0, sumc = 0;
-+
-+  /* ib and ic have same misalignment, we peel to align them.  */
-+  for (i = 0; i <= N; i++)
-+    {
-+      suma += ia[i];
-+      sumb += ib[i + NINTS + 1];
-+      sumc += ic[i + 1];
-+    }
-+
-+  /* check results:  */
-+  if (suma + sumb + sumc != RES)
-+    abort ();
-+
-+  return 0;
-+}
-+
-+int main (void)
-+{
-+  int i;
-+
-+  check_vect ();
-+
-+  for (i = 0; i < N + EXTRA; i++)
-+    {
-+      asm volatile ("" : "+r" (i));
-+      ib[i] = i;
-+      ic[i] = i+2;
-+      ia[i] = i/2;
-+    }
-+
-+  return main1 ();
-+}
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-peel-3.c b/gcc/testsuite/gcc.dg/vect/vect-peel-3.c
-index d5c0cf10ce1..2cd99573fd1 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-peel-3.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-peel-3.c
-@@ -1,64 +1,9 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- /* { dg-add-options bind_pic_locally } */
- 
--#include 
--#include "tree-vect.h"
--
--#if VECTOR_BITS > 128
--#define NINTS (VECTOR_BITS / 32)
--#define EXTRA (NINTS * 2)
--#else
--#define NINTS 4
--#define EXTRA 10
--#endif
--
--#define N 128
--
--#define RES_A (N * N / 4)
--#define RES_B (N * (N + 1) / 2 + (NINTS + 3) * (N + 1))
--#define RES_C (N * (N + 1) / 2 + (N + 1))
--#define RES (RES_A + RES_B + RES_C)
--
--int ib[N + EXTRA];
--int ia[N + EXTRA];
--int ic[N + EXTRA];
--
--__attribute__ ((noinline))
--int main1 ()
--{
--  int i, suma = 0, sumb = 0, sumc = 0;
--
--  /* ib and ic have same misalignment, we peel to align them.  */
--  for (i = 0; i <= N; i++)
--    {
--      suma += ia[i];
--      sumb += ib[i + NINTS + 1];
--      sumc += ic[i + 1];
--    }
--
--  /* check results:  */
--  if (suma + sumb + sumc != RES)
--    abort ();
--
--  return 0;
--}
--
--int main (void)
--{
--  int i;
--
--  check_vect ();
--
--  for (i = 0; i < N + EXTRA; i++)
--    {
--      asm volatile ("" : "+r" (i));
--      ib[i] = i;
--      ic[i] = i+2;
--      ia[i] = i/2;
--    }
--
--  return main1 ();
--}
-+#include "vect-peel-3-src.c"
- 
- /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { vect_no_align && { ! vect_hw_misalign } } } } } */
- /* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect"  { xfail { { ! vect_unaligned_possible } || vect_sizes_32B_16B } } } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-peel-4-epilogue.c b/gcc/testsuite/gcc.dg/vect/vect-peel-4-epilogue.c
-new file mode 100644
-index 00000000000..783982f04f6
---- /dev/null
-+++ b/gcc/testsuite/gcc.dg/vect/vect-peel-4-epilogue.c
-@@ -0,0 +1,4 @@
-+/* { dg-require-effective-target vect_int } */
-+/* { dg-add-options bind_pic_locally } */
-+
-+#include "vect-peel-4-src.c"
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-peel-4-src.c b/gcc/testsuite/gcc.dg/vect/vect-peel-4-src.c
-new file mode 100644
-index 00000000000..33088fb0902
---- /dev/null
-+++ b/gcc/testsuite/gcc.dg/vect/vect-peel-4-src.c
-@@ -0,0 +1,45 @@
-+#include 
-+#include "tree-vect.h"
-+
-+#define N 128
-+
-+int ib[N+7];
-+int ia[N+1];
-+
-+__attribute__ ((noinline))
-+int main1 ()
-+{
-+  int i;
-+
-+  /* Don't peel keeping one load and the store aligned.  */
-+  for (i = 0; i <= N; i++)
-+    {
-+      ia[i] = ib[i] + ib[i+5];
-+    }
-+
-+  /* check results:  */
-+  for (i = 1; i <= N; i++)
-+    {
-+      if (ia[i] != ib[i] + ib[i+5])
-+        abort ();
-+    }
-+
-+  return 0;
-+}
-+
-+int main (void)
-+{ 
-+  int i;
-+
-+  check_vect ();
-+
-+  for (i = 0; i <= N+6; i++)
-+    {
-+      asm volatile ("" : "+r" (i));
-+      ib[i] = i;
-+    }
-+
-+  return main1 ();
-+}
-+
-+
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-peel-4.c b/gcc/testsuite/gcc.dg/vect/vect-peel-4.c
-index 88f9f0ddcba..3b5272f284f 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-peel-4.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-peel-4.c
-@@ -1,49 +1,9 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- /* { dg-add-options bind_pic_locally } */
- 
--#include 
--#include "tree-vect.h"
--
--#define N 128
--
--int ib[N+7];
--int ia[N+1];
--
--__attribute__ ((noinline))
--int main1 ()
--{
--  int i;
--
--  /* Don't peel keeping one load and the store aligned.  */
--  for (i = 0; i <= N; i++)
--    {
--      ia[i] = ib[i] + ib[i+5];
--    }
--
--  /* check results:  */
--  for (i = 1; i <= N; i++)
--    {
--      if (ia[i] != ib[i] + ib[i+5])
--        abort ();
--    }
--
--  return 0;
--}
--
--int main (void)
--{ 
--  int i;
--
--  check_vect ();
--
--  for (i = 0; i <= N+6; i++)
--    {
--      asm volatile ("" : "+r" (i));
--      ib[i] = i;
--    }
--
--  return main1 ();
--}
-+#include "vect-peel-4-src.c"
- 
- /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { vect_no_align && { ! vect_hw_misalign } } } } } */
- /* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect"  { xfail { ! vect_unaligned_possible } } } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-2char-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-2char-big-array.c
-index e246ae7f3c6..c40f8625b84 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-reduc-2char-big-array.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-2char-big-array.c
-@@ -62,4 +62,4 @@ int main (void)
-   return 0;
- }
- 
--/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail *-*-* } } } */
-+/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-2char.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-2char.c
-index 5f0551ee372..dd3045502f1 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-reduc-2char.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-2char.c
-@@ -46,4 +46,4 @@ int main (void)
-   return 0;
- }
- 
--/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail *-*-* } } } */
-+/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-2short.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-2short.c
-index 02c2bee8612..1a2d8d04f4e 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-reduc-2short.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-2short.c
-@@ -45,4 +45,4 @@ int main (void)
-   return 0;
- }
- 
--/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail *-*-* } } } */
-+/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-6.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-6.c
-index ad148046a8e..cc0d9694a4f 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-reduc-6.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-6.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_float_strict } */
- /* { dg-additional-options "-fno-fast-math" } */
- 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s16a.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s16a.c
-index 171451872e5..ffbc9706901 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s16a.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s16a.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- 
- #include 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8a.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8a.c
-index ac674749b6f..05e343ad782 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8a.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8a.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- /* { dg-require-effective-target arm_v8_2a_dotprod_neon_hw { target { aarch64*-*-* || arm*-*-* } } } */
- /* { dg-additional-options "-march=armv8.2-a+dotprod" { target { aarch64*-*-* } } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8b.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8b.c
-index b036ad5b0b4..e0f47d8a4f2 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8b.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8b.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- 
- #include 
-@@ -12,12 +14,6 @@ signed char Y[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
- 
- /* char->short->short dot product.
-    The dot-product pattern should be detected.
--   The reduction is currently not vectorized becaus of the signed->unsigned->signed
--   casts, since this patch:
--
--     2005-12-26  Kazu Hirata  
--                                                                                                
--        PR tree-optimization/25125
- 
-    When the dot-product is detected, the loop should be vectorized on vect_sdot_qi 
-    targets (targets that support dot-product of signed char).  
-@@ -60,5 +56,5 @@ int main (void)
- /* { dg-final { scan-tree-dump-times "vect_recog_dot_prod_pattern: detected" 1 "vect" { xfail *-*-* } } } */
- /* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" } } */
- 
--/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
-+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
- 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u16b.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u16b.c
-index 57e18040cf2..0fc112012cf 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u16b.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u16b.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- 
- #include 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u8a.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u8a.c
-index d020f643bb8..e23ebd9b072 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u8a.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u8a.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- /* { dg-require-effective-target arm_v8_2a_dotprod_neon_hw { target { aarch64*-*-* || arm*-*-* } } } */
- /* { dg-additional-options "-march=armv8.2-a+dotprod" { target { aarch64*-*-* } } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u8b.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u8b.c
-index 3155d97b3cd..288be13440d 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u8b.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u8b.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- 
- #include 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-epilogue-gaps.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-epilogue-gaps.c
-new file mode 100644
-index 00000000000..dc5704f5607
---- /dev/null
-+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-epilogue-gaps.c
-@@ -0,0 +1,45 @@
-+/* { dg-options "-O3 -fno-vect-cost-model" } */
-+struct {
-+    float real;
-+    float img;
-+} g[11];
-+
-+float __attribute__ ((noclone))
-+foo_11 (void)
-+{
-+  float sum = 0.0;
-+  for (int i = 0; i < 11; ++i)
-+    sum += g[i].real;
-+  return sum;
-+}
-+
-+float __attribute__ ((noclone))
-+foo_10 (void)
-+{
-+  float sum = 0.0;
-+  for (int i = 0; i < 10; ++i)
-+    sum += g[i].real;
-+  return sum;
-+}
-+
-+int main (void)
-+{
-+  float check_10 = 0.0;
-+  float check_11 = 0.0;
-+  for (int i = 0; i < 11; ++i)
-+    {
-+      asm volatile ("" : : : "memory");
-+      g[i].real = (float) i;
-+      g[i].img = (float) -i;
-+      if (i < 10)
-+	check_10 += (float) i;
-+      check_11 += (float) i;
-+    }
-+
-+  if (foo_10 () != check_10)
-+    __builtin_abort ();
-+  if (foo_11 () != check_11)
-+    __builtin_abort ();
-+
-+  return 0;
-+}
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1a.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1a.c
-index b06b234072b..1ddbe96ebc3 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1a.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1a.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- 
- #include 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1b-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1b-big-array.c
-index be03c7d011d..7ae2c838344 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1b-big-array.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1b-big-array.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- 
- #include 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1c-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1c-big-array.c
-index c30c85ce911..91ce0ef934e 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1c-big-array.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1c-big-array.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- 
- #include 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-2a.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-2a.c
-index a98edd3045a..2190eaa6242 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-2a.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-2a.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- 
- #include 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-2b-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-2b-big-array.c
-index 570e56a8c9b..6ad645b3bdd 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-2b-big-array.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-2b-big-array.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- 
- #include 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-2c.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-2c.c
-index 8190622d5d7..71df5741e16 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-2c.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-2c.c
-@@ -21,6 +21,8 @@ foo ()
-      2005-12-26  Kazu Hirata  
-                                                                                 
-         PR tree-optimization/25125
-+
-+     but we still handle the reduction.
-    */
- 
-   for (i = 0; i < N; i++)
-@@ -43,5 +45,4 @@ main (void)
- }
- 
- /* { dg-final { scan-tree-dump-times "vect_recog_widen_sum_pattern: detected" 1 "vect" { xfail *-*-* } } } */
--/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
--/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" { target { ! vect_widen_sum_qi_to_hi } } } } */
-+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-sad.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-sad.c
-index a033a7d27d1..2f0bb692564 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-reduc-sad.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-sad.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_usad_char } */
- 
- #include 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-1.c b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-1.c
-index b912a3431f7..e5bbeaede09 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-1.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-1.c
-@@ -106,4 +106,4 @@ main (int argc, const char **argv)
- }
- 
- /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { target avx2_runtime } } } */
--/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED \\(VS=16\\)" 2 "vect" { target avx2_runtime } } } */
-+/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED \\(MODE=V16QI\\)" 2 "vect" { target avx2_runtime } } } */
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c
-index 89f983cad06..4c95dd20179 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- /* { dg-additional-options "-fno-ipa-icf" } */
- 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c
-index e319699cd92..4075f815cea 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- /* { dg-additional-options "-fno-ipa-icf" } */
- 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c
-index ee0538c0635..c4ac88e186d 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- /* { dg-additional-options "-fno-ipa-icf" } */
- 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c
-index 6d74c693316..ebbf4f5e841 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- 
- #include "tree-vect.h"
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c
-index 942f63d6f31..2e28baae0b8 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- 
- #include 
-diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c
-index 98f78d3b37a..d277f0b2b94 100644
---- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c
-+++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- 
- #include 
-diff --git a/gcc/testsuite/gcc.dg/vect/wrapv-vect-reduc-dot-s8b.c b/gcc/testsuite/gcc.dg/vect/wrapv-vect-reduc-dot-s8b.c
-index 176f183f3ce..6fc7a282351 100644
---- a/gcc/testsuite/gcc.dg/vect/wrapv-vect-reduc-dot-s8b.c
-+++ b/gcc/testsuite/gcc.dg/vect/wrapv-vect-reduc-dot-s8b.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-require-effective-target vect_int } */
- 
- #include 
-diff --git a/gcc/testsuite/gcc.dg/vshift-5.c b/gcc/testsuite/gcc.dg/vshift-5.c
-index daa5f1c5cd8..62e6328cb28 100644
---- a/gcc/testsuite/gcc.dg/vshift-5.c
-+++ b/gcc/testsuite/gcc.dg/vshift-5.c
-@@ -40,6 +40,42 @@ f2 (void)
-   a[3] = a3;
- }
- 
-+__attribute__((noinline, noclone)) void
-+f2a (int x)
-+{
-+  long long a0, a1, a2, a3;
-+  a0 = a[0];
-+  a1 = a[1];
-+  a2 = a[2];
-+  a3 = a[3];
-+  a0 = a0 << x;
-+  a1 = a1 << 2;
-+  a2 = a2 << 2;
-+  a3 = a3 << 2;
-+  a[0] = a0;
-+  a[1] = a1;
-+  a[2] = a2;
-+  a[3] = a3;
-+}
-+
-+__attribute__((noinline, noclone)) void
-+f2b (int x)
-+{
-+  long long a0, a1, a2, a3;
-+  a0 = a[0];
-+  a1 = a[1];
-+  a2 = a[2];
-+  a3 = a[3];
-+  a0 = a0 << 2;
-+  a1 = a1 << 2;
-+  a2 = a2 << x;
-+  a3 = a3 << 2;
-+  a[0] = a0;
-+  a[1] = a1;
-+  a[2] = a2;
-+  a[3] = a3;
-+}
-+
- __attribute__((noinline, noclone)) void
- f3 (int x)
- {
-@@ -77,5 +113,13 @@ main ()
-   if (a[0] != (4LL << 7) || a[1] != (3LL << 8)
-       || a[2] != (2LL << 9) || a[3] != (1LL << 10))
-     abort ();
-+  f2a (3);
-+  if (a[0] != (4LL << 10) || a[1] != (3LL << 10)
-+      || a[2] != (2LL << 11) || a[3] != (1LL << 12))
-+    abort ();
-+  f2b (3);
-+  if (a[0] != (4LL << 12) || a[1] != (3LL << 12)
-+      || a[2] != (2LL << 14) || a[3] != (1LL << 14))
-+    abort ();
-   return 0;
- }
-diff --git a/gcc/testsuite/gcc.target/aarch64/sve/dot_1.c b/gcc/testsuite/gcc.target/aarch64/sve/dot_1.c
-new file mode 100644
-index 00000000000..8ff66714e9b
---- /dev/null
-+++ b/gcc/testsuite/gcc.target/aarch64/sve/dot_1.c
-@@ -0,0 +1,39 @@
-+/* { dg-do compile } */
-+/* { dg-options "-O2 -ftree-vectorize" } */
-+
-+#include 
-+
-+#define DEF_DOT(TYPE1, TYPE2)						\
-+TYPE1 __attribute__ ((noinline, noclone))				\
-+dot_##TYPE1##_##TYPE2 (TYPE2 *restrict x, TYPE2 *restrict y, int n)	\
-+{									\
-+  TYPE1 sum = 0;							\
-+  for (int i = 0; i < n; i++)						\
-+    {									\
-+      sum += x[i] * y[i];						\
-+    }									\
-+  return sum;								\
-+}
-+
-+DEF_DOT(uint32_t, uint8_t)
-+DEF_DOT(int32_t, int8_t)
-+DEF_DOT(int64_t, int16_t)
-+
-+/* The uint16_t->uint64_t dot product requires a casting to satisfy the C
-+   language rules.  */
-+uint64_t __attribute__ ((noinline, noclone))
-+dot_uint64_t_uint16_t (uint16_t *restrict x, uint16_t *restrict y, int n)
-+{
-+  uint64_t sum = 0;
-+  for (int i = 0; i < n; i++)
-+    {
-+      sum += (unsigned int)x[i] * y[i];
-+    }
-+  return sum;
-+}
-+
-+/* { dg-final { scan-assembler-times {\tudot\tz[0-9]+\.s, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
-+/* { dg-final { scan-assembler-times {\tsdot\tz[0-9]+\.s, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
-+/* { dg-final { scan-assembler-times {\tudot\tz[0-9]+\.d, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
-+/* { dg-final { scan-assembler-times {\tsdot\tz[0-9]+\.d, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
-+/* { dg-final { scan-assembler-times {\twhilelo\t} 8 } } */
-diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fmla_2.c b/gcc/testsuite/gcc.target/aarch64/sve/fmla_2.c
-index 5c04bcdb3f5..51925fa8f50 100644
---- a/gcc/testsuite/gcc.target/aarch64/sve/fmla_2.c
-+++ b/gcc/testsuite/gcc.target/aarch64/sve/fmla_2.c
-@@ -17,3 +17,4 @@ f (double *restrict a, double *restrict b, double *restrict c,
- 
- /* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
- /* { dg-final { scan-assembler-not {\tfmad\t} } } */
-+/* { dg-final { scan-assembler-times {\tst1d} 2 } } */
-diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mask_load_slp_1.c b/gcc/testsuite/gcc.target/aarch64/sve/mask_load_slp_1.c
-new file mode 100644
-index 00000000000..78c70b2be32
---- /dev/null
-+++ b/gcc/testsuite/gcc.target/aarch64/sve/mask_load_slp_1.c
-@@ -0,0 +1,90 @@
-+/* { dg-do compile } */
-+/* { dg-options "-O2 -ftree-vectorize" } */
-+
-+#include 
-+
-+#define MASK_SLP_2(TYPE_COND, ALT_VAL)					\
-+void __attribute__ ((noinline, noclone))				\
-+mask_slp_##TYPE_COND##_2_##ALT_VAL (int *restrict x, int *restrict y,	\
-+				    TYPE_COND *restrict z, int n)	\
-+{									\
-+  for (int i = 0; i < n; i += 2)					\
-+    {									\
-+      x[i] = y[i] ? z[i] : 1;						\
-+      x[i + 1] = y[i + 1] ? z[i + 1] : ALT_VAL;				\
-+    }									\
-+}
-+
-+#define MASK_SLP_4(TYPE_COND, ALT_VAL)					\
-+void __attribute__ ((noinline, noclone))				\
-+mask_slp_##TYPE_COND##_4_##ALT_VAL (int *restrict x, int *restrict y,	\
-+				    TYPE_COND *restrict z, int n)	\
-+{									\
-+  for (int i = 0; i < n; i += 4)					\
-+    {									\
-+      x[i] = y[i] ? z[i] : 1;						\
-+      x[i + 1] = y[i + 1] ? z[i + 1] : ALT_VAL;				\
-+      x[i + 2] = y[i + 2] ? z[i + 2] : 1;				\
-+      x[i + 3] = y[i + 3] ? z[i + 3] : ALT_VAL;				\
-+    }									\
-+}
-+
-+#define MASK_SLP_8(TYPE_COND, ALT_VAL)					\
-+void __attribute__ ((noinline, noclone))				\
-+mask_slp_##TYPE_COND##_8_##ALT_VAL (int *restrict x, int *restrict y,	\
-+				    TYPE_COND *restrict z, int n)	\
-+{									\
-+  for (int i = 0; i < n; i += 8)					\
-+    {									\
-+      x[i] = y[i] ? z[i] : 1;						\
-+      x[i + 1] = y[i + 1] ? z[i + 1] : ALT_VAL;				\
-+      x[i + 2] = y[i + 2] ? z[i + 2] : 1;				\
-+      x[i + 3] = y[i + 3] ? z[i + 3] : ALT_VAL;				\
-+      x[i + 4] = y[i + 4] ? z[i + 4] : 1;				\
-+      x[i + 5] = y[i + 5] ? z[i + 5] : ALT_VAL;				\
-+      x[i + 6] = y[i + 6] ? z[i + 6] : 1;				\
-+      x[i + 7] = y[i + 7] ? z[i + 7] : ALT_VAL;				\
-+    }									\
-+}
-+
-+#define MASK_SLP_FAIL(TYPE_COND)					\
-+void __attribute__ ((noinline, noclone))				\
-+mask_slp_##TYPE_COND##_FAIL (int *restrict x, int *restrict y,		\
-+			     TYPE_COND *restrict z, int n)		\
-+{									\
-+  for (int i = 0; i < n; i += 2)					\
-+    {									\
-+      x[i] = y[i] ? z[i] : 1;						\
-+      x[i + 1] = y[i + 1] ? z[i + 1] : x[z[i + 1]];			\
-+    }									\
-+}
-+
-+MASK_SLP_2(int8_t, 1)
-+MASK_SLP_2(int8_t, 2)
-+MASK_SLP_2(int, 1)
-+MASK_SLP_2(int, 2)
-+MASK_SLP_2(int64_t, 1)
-+MASK_SLP_2(int64_t, 2)
-+
-+MASK_SLP_4(int8_t, 1)
-+MASK_SLP_4(int8_t, 2)
-+MASK_SLP_4(int, 1)
-+MASK_SLP_4(int, 2)
-+MASK_SLP_4(int64_t, 1)
-+MASK_SLP_4(int64_t, 2)
-+
-+MASK_SLP_8(int8_t, 1)
-+MASK_SLP_8(int8_t, 2)
-+MASK_SLP_8(int, 1)
-+MASK_SLP_8(int, 2)
-+MASK_SLP_8(int64_t, 1)
-+MASK_SLP_8(int64_t, 2)
-+
-+MASK_SLP_FAIL(int8_t)
-+MASK_SLP_FAIL(int)
-+MASK_SLP_FAIL(int64_t)
-+
-+/* { dg-final { scan-assembler-not {\tld2w\t} } } */
-+/* { dg-final { scan-assembler-not {\tst2w\t} } } */
-+/* { dg-final { scan-assembler-times {\tld1w\t} 48 } } */
-+/* { dg-final { scan-assembler-times {\tst1w\t} 40 } } */
-diff --git a/gcc/testsuite/gcc.target/aarch64/sve/reduc_1.c b/gcc/testsuite/gcc.target/aarch64/sve/reduc_1.c
-index a258344b0a9..f152d04b473 100644
---- a/gcc/testsuite/gcc.target/aarch64/sve/reduc_1.c
-+++ b/gcc/testsuite/gcc.target/aarch64/sve/reduc_1.c
-@@ -105,8 +105,8 @@ reduc_##NAME##_##TYPE (TYPE *a, int n)		\
- 
- TEST_BITWISE (DEF_REDUC_BITWISE)
- 
--/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
--/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
-+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */
-+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
- /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
- /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
- 
-@@ -157,8 +157,8 @@ TEST_BITWISE (DEF_REDUC_BITWISE)
- /* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
- /* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
- 
--/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */
--/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
-+/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b\n} 2 } } */
-+/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h\n} 2 } } */
- /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */
- /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */
- /* { dg-final { scan-assembler-times {\tfaddv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
-diff --git a/gcc/testsuite/gcc.target/aarch64/sve/reduc_2.c b/gcc/testsuite/gcc.target/aarch64/sve/reduc_2.c
-index 376a453fc73..0640cba8e0f 100644
---- a/gcc/testsuite/gcc.target/aarch64/sve/reduc_2.c
-+++ b/gcc/testsuite/gcc.target/aarch64/sve/reduc_2.c
-@@ -116,8 +116,8 @@ reduc_##NAME##TYPE (TYPE (*restrict a)[NUM_ELEMS(TYPE)],	\
- 
- TEST_BITWISE (DEF_REDUC_BITWISE)
- 
--/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */
--/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
-+/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b\n} 2 } } */
-+/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h\n} 2 } } */
- /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */
- /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */
- /* { dg-final { scan-assembler-times {\tfaddv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
-diff --git a/gcc/testsuite/gcc.target/aarch64/sve/reduc_5.c b/gcc/testsuite/gcc.target/aarch64/sve/reduc_5.c
-index ff535942331..cced4ad488e 100644
---- a/gcc/testsuite/gcc.target/aarch64/sve/reduc_5.c
-+++ b/gcc/testsuite/gcc.target/aarch64/sve/reduc_5.c
-@@ -23,16 +23,12 @@ REDUC (uint64_t)
- REDUC (float)
- REDUC (double)
- 
--/* XFAILed until we support sub-int reductions for signed types.  */
--/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.b, p[0-7]/m} 2 { xfail *-*-* } } } */
--/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, p[0-7]/m} 2 { xfail *-*-* } } } */
--/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.b, p[0-7]/m} 1 } } */
--/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, p[0-7]/m} 1 } } */
-+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.b, p[0-7]/m} 2 } } */
-+/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, p[0-7]/m} 2 } } */
- /* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.s, p[0-7]/m} 2 } } */
- /* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.d, p[0-7]/m} 2 } } */
- /* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m} 1 } } */
- /* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m} 1 } } */
- 
--/* XFAILed until we support sub-int reductions for signed types.  */
--/* { dg-final { scan-assembler-times {\tsub\t} 8 { xfail *-*-* } } } */
-+/* { dg-final { scan-assembler-times {\tsub\t} 8 } } */
- /* { dg-final { scan-assembler-times {\tfsub\t} 2 } } */
-diff --git a/gcc/testsuite/gcc.target/aarch64/sve/reduc_8.c b/gcc/testsuite/gcc.target/aarch64/sve/reduc_8.c
-index 3913b8848c0..dec4c87e54d 100644
---- a/gcc/testsuite/gcc.target/aarch64/sve/reduc_8.c
-+++ b/gcc/testsuite/gcc.target/aarch64/sve/reduc_8.c
-@@ -15,6 +15,5 @@ reduc (int *restrict a, int *restrict b, int *restrict c)
- }
- 
- /* { dg-final { scan-assembler-times {\tcmpne\tp[0-9]+\.s, } 1 } } */
--/* We ought to use the CMPNE result for the SEL too.  */
--/* { dg-final { scan-assembler-not {\tcmpeq\tp[0-9]+\.s, } { xfail *-*-* } } } */
-+/* { dg-final { scan-assembler-not {\tcmpeq\tp[0-9]+\.s, } } } */
- /* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.s, } 1 } } */
-diff --git a/gcc/testsuite/gcc.target/aarch64/sve/reduc_strict_3.c b/gcc/testsuite/gcc.target/aarch64/sve/reduc_strict_3.c
-index a718e9d2ebf..83ebec50bc6 100644
---- a/gcc/testsuite/gcc.target/aarch64/sve/reduc_strict_3.c
-+++ b/gcc/testsuite/gcc.target/aarch64/sve/reduc_strict_3.c
-@@ -1,10 +1,7 @@
- /* { dg-do compile } */
--/* { dg-options "-O2 -ftree-vectorize -fno-inline -msve-vector-bits=256 -fdump-tree-vect-details" } */
-+/* { dg-options "-O2 -ftree-vectorize" } */
- 
--double mat[100][4];
--double mat2[100][8];
--double mat3[100][12];
--double mat4[100][3];
-+double mat[100][2];
- 
- double
- slp_reduc_plus (int n)
-@@ -14,115 +11,8 @@ slp_reduc_plus (int n)
-     {
-       tmp = tmp + mat[i][0];
-       tmp = tmp + mat[i][1];
--      tmp = tmp + mat[i][2];
--      tmp = tmp + mat[i][3];
-     }
-   return tmp;
- }
- 
--double
--slp_reduc_plus2 (int n)
--{
--  double tmp = 0.0;
--  for (int i = 0; i < n; i++)
--    {
--      tmp = tmp + mat2[i][0];
--      tmp = tmp + mat2[i][1];
--      tmp = tmp + mat2[i][2];
--      tmp = tmp + mat2[i][3];
--      tmp = tmp + mat2[i][4];
--      tmp = tmp + mat2[i][5];
--      tmp = tmp + mat2[i][6];
--      tmp = tmp + mat2[i][7];
--    }
--  return tmp;
--}
--
--double
--slp_reduc_plus3 (int n)
--{
--  double tmp = 0.0;
--  for (int i = 0; i < n; i++)
--    {
--      tmp = tmp + mat3[i][0];
--      tmp = tmp + mat3[i][1];
--      tmp = tmp + mat3[i][2];
--      tmp = tmp + mat3[i][3];
--      tmp = tmp + mat3[i][4];
--      tmp = tmp + mat3[i][5];
--      tmp = tmp + mat3[i][6];
--      tmp = tmp + mat3[i][7];
--      tmp = tmp + mat3[i][8];
--      tmp = tmp + mat3[i][9];
--      tmp = tmp + mat3[i][10];
--      tmp = tmp + mat3[i][11];
--    }
--  return tmp;
--}
--
--void
--slp_non_chained_reduc (int n, double * restrict out)
--{
--  for (int i = 0; i < 3; i++)
--    out[i] = 0;
--
--  for (int i = 0; i < n; i++)
--    {
--      out[0] = out[0] + mat4[i][0];
--      out[1] = out[1] + mat4[i][1];
--      out[2] = out[2] + mat4[i][2];
--    }
--}
--
--/* Strict FP reductions shouldn't be used for the outer loops, only the
--   inner loops.  */
--
--float
--double_reduc1 (float (*restrict i)[16])
--{
--  float l = 0;
--
--  for (int a = 0; a < 8; a++)
--    for (int b = 0; b < 8; b++)
--      l += i[b][a];
--  return l;
--}
--
--float
--double_reduc2 (float *restrict i)
--{
--  float l = 0;
--
--  for (int a = 0; a < 8; a++)
--    for (int b = 0; b < 16; b++)
--      {
--        l += i[b * 4];
--        l += i[b * 4 + 1];
--        l += i[b * 4 + 2];
--        l += i[b * 4 + 3];
--      }
--  return l;
--}
--
--float
--double_reduc3 (float *restrict i, float *restrict j)
--{
--  float k = 0, l = 0;
--
--  for (int a = 0; a < 8; a++)
--    for (int b = 0; b < 8; b++)
--      {
--        k += i[b];
--        l += j[b];
--      }
--  return l * k;
--}
--
--/* { dg-final { scan-assembler-times {\tfadda\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s} 4 } } */
--/* { dg-final { scan-assembler-times {\tfadda\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d} 9 } } */
--/* 1 reduction each for double_reduc{1,2} and 2 for double_reduc3.  Each one
--   is reported three times, once for SVE, once for 128-bit AdvSIMD and once
--   for 64-bit AdvSIMD.  */
--/* { dg-final { scan-tree-dump-times "Detected double reduction" 12 "vect" } } */
--/* double_reduc2 has 2 reductions and slp_non_chained_reduc has 3.  */
--/* { dg-final { scan-tree-dump-times "Detected reduction" 10 "vect" } } */
-+/* { dg-final { scan-assembler-times {\tfadda\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d\n} 1 } } */
-diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_13.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_13.c
-index 0b2a7ad57e3..37b5f1148a3 100644
---- a/gcc/testsuite/gcc.target/aarch64/sve/slp_13.c
-+++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_13.c
-@@ -32,7 +32,6 @@ vec_slp_##TYPE (TYPE *restrict a, int n)			\
- 
- TEST_ALL (VEC_PERM)
- 
--/* ??? We don't treat the int8_t and int16_t loops as reductions.  */
- /* ??? We don't treat the uint loops as SLP.  */
- /* The loop should be fully-masked.  */
- /* { dg-final { scan-assembler-times {\tld1b\t} 2 { xfail *-*-* } } } */
-@@ -41,15 +40,15 @@ TEST_ALL (VEC_PERM)
- /* { dg-final { scan-assembler-times {\tld1w\t} 2 } } */
- /* { dg-final { scan-assembler-times {\tld1d\t} 3 { xfail *-*-* } } } */
- /* { dg-final { scan-assembler-times {\tld1d\t} 2 } } */
--/* { dg-final { scan-assembler-not {\tldr} { xfail *-*-* } } } */
-+/* { dg-final { scan-assembler-not {\tldr} } } */
- 
--/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 4 { xfail *-*-* } } } */
--/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 6 { xfail *-*-* } } } */
-+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 4 } } */
-+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 6 } } */
- /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */
- /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 6 } } */
- 
--/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b\n} 2 { xfail *-*-* } } } */
--/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h\n} 2 { xfail *-*-* } } } */
-+/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b\n} 2 } } */
-+/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h\n} 2 } } */
- /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */
- /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */
- /* { dg-final { scan-assembler-times {\tfadda\th[0-9]+, p[0-7], h[0-9]+, z[0-9]+\.h\n} 1 } } */
-diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_5.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_5.c
-index b75edc69e2d..6a199d00659 100644
---- a/gcc/testsuite/gcc.target/aarch64/sve/slp_5.c
-+++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_5.c
-@@ -33,34 +33,24 @@ vec_slp_##TYPE (TYPE *restrict a, TYPE *restrict b, int n)	\
- 
- TEST_ALL (VEC_PERM)
- 
--/* ??? We don't think it's worth using SLP for the 64-bit loops and fall
--   back to the less efficient non-SLP implementation instead.  */
--/* ??? At present we don't treat the int8_t and int16_t loops as
--   reductions.  */
--/* { dg-final { scan-assembler-times {\tld1b\t} 2 { xfail *-*-* } } } */
--/* { dg-final { scan-assembler-times {\tld1h\t} 3 { xfail *-*-* } } } */
--/* { dg-final { scan-assembler-times {\tld1b\t} 1 } } */
--/* { dg-final { scan-assembler-times {\tld1h\t} 2 } } */
-+/* { dg-final { scan-assembler-times {\tld1b\t} 2 } } */
-+/* { dg-final { scan-assembler-times {\tld1h\t} 3 } } */
- /* { dg-final { scan-assembler-times {\tld1w\t} 3 } } */
- /* { dg-final { scan-assembler-times {\tld1d\t} 3 } } */
- /* { dg-final { scan-assembler-not {\tld2b\t} } } */
- /* { dg-final { scan-assembler-not {\tld2h\t} } } */
- /* { dg-final { scan-assembler-not {\tld2w\t} } } */
- /* { dg-final { scan-assembler-not {\tld2d\t} } } */
--/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b} 4 { xfail *-*-* } } } */
--/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h} 4 { xfail *-*-* } } } */
--/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b} 2 } } */
--/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h} 2 } } */
-+/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b} 4 } } */
-+/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h} 4 } } */
- /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s} 4 } } */
- /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.d} 4 } } */
- /* { dg-final { scan-assembler-times {\tfaddv\th[0-9]+, p[0-7], z[0-9]+\.h} 2 } } */
- /* { dg-final { scan-assembler-times {\tfaddv\ts[0-9]+, p[0-7], z[0-9]+\.s} 2 } } */
- /* { dg-final { scan-assembler-times {\tfaddv\td[0-9]+, p[0-7], z[0-9]+\.d} 2 } } */
- 
--/* Should be 4 and 6 respectively, if we used reductions for int8_t and
--   int16_t.  */
--/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 2 } } */
--/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 4 } } */
-+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 4 } } */
-+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 6 } } */
- /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */
- /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 6 } } */
- 
-diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_7.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_7.c
-index 9e6aa8ccbf8..19207207999 100644
---- a/gcc/testsuite/gcc.target/aarch64/sve/slp_7.c
-+++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_7.c
-@@ -31,45 +31,27 @@ vec_slp_##TYPE (TYPE *restrict a, TYPE *restrict b, int n)	\
-   T (uint16_t)					\
-   T (int32_t)					\
-   T (uint32_t)					\
--  T (int64_t)					\
--  T (uint64_t)					\
-   T (_Float16)					\
--  T (float)					\
--  T (double)
-+  T (float)
- 
- TEST_ALL (VEC_PERM)
- 
--/* We can't use SLP for the 64-bit loops, since the number of reduction
--   results might be greater than the number of elements in the vector.
--   Otherwise we have two loads per loop, one for the initial vector
--   and one for the loop body.  */
--/* ??? At present we don't treat the int8_t and int16_t loops as
--   reductions.  */
--/* { dg-final { scan-assembler-times {\tld1b\t} 2 { xfail *-*-* } } } */
--/* { dg-final { scan-assembler-times {\tld1h\t} 3 { xfail *-*-* } } } */
--/* { dg-final { scan-assembler-times {\tld1b\t} 1 } } */
--/* { dg-final { scan-assembler-times {\tld1h\t} 2 } } */
-+/* We have two loads per loop, one for the initial vector and one for
-+   the loop body.  */
-+/* { dg-final { scan-assembler-times {\tld1b\t} 2 } } */
-+/* { dg-final { scan-assembler-times {\tld1h\t} 3 } } */
- /* { dg-final { scan-assembler-times {\tld1w\t} 3 } } */
--/* { dg-final { scan-assembler-times {\tld4d\t} 3 } } */
- /* { dg-final { scan-assembler-not {\tld4b\t} } } */
- /* { dg-final { scan-assembler-not {\tld4h\t} } } */
- /* { dg-final { scan-assembler-not {\tld4w\t} } } */
--/* { dg-final { scan-assembler-not {\tld1d\t} } } */
--/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b} 8 { xfail *-*-* } } } */
--/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h} 8 { xfail *-*-* } } } */
--/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b} 4 } } */
--/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h} 4 } } */
-+/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b} 8 } } */
-+/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h} 8 } } */
- /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s} 8 } } */
--/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.d} 8 } } */
- /* { dg-final { scan-assembler-times {\tfaddv\th[0-9]+, p[0-7], z[0-9]+\.h} 4 } } */
- /* { dg-final { scan-assembler-times {\tfaddv\ts[0-9]+, p[0-7], z[0-9]+\.s} 4 } } */
--/* { dg-final { scan-assembler-times {\tfaddv\td[0-9]+, p[0-7], z[0-9]+\.d} 4 } } */
- 
--/* Should be 4 and 6 respectively, if we used reductions for int8_t and
--   int16_t.  */
--/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 2 } } */
--/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 4 } } */
-+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 4 } } */
-+/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 6 } } */
- /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */
--/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 6 } } */
- 
- /* { dg-final { scan-assembler-not {\tuqdec} } } */
-diff --git a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_1.c b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_1.c
-index 68baba9e965..40ff2d561a8 100644
---- a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_1.c
-+++ b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_1.c
-@@ -15,12 +15,9 @@ f (TYPE *x, TYPE *y, unsigned short n, l
- /* { dg-final { scan-assembler {\tst1w\tz[0-9]+} } } */
- /* { dg-final { scan-assembler {\tldr\tw[0-9]+} } } */
- /* { dg-final { scan-assembler {\tstr\tw[0-9]+} } } */
--/* Should multiply by (VF-1)*4 rather than (257-1)*4.  */
--/* { dg-final { scan-assembler-not {, 1024} } } */
--/* { dg-final { scan-assembler-not {lsl[^\n]*[, ]10} } } */
--/* { dg-final { scan-assembler-not {\tcmp\tx[0-9]+, 0} } } */
--/* { dg-final { scan-assembler-not {\tcmp\tw[0-9]+, 0} } } */
--/* { dg-final { scan-assembler-not {\tcsel\tx[0-9]+} } } */
--/* Two range checks and a check for n being zero.  */
--/* { dg-final { scan-assembler-times {\tcmp\t} 1 } } */
--/* { dg-final { scan-assembler-times {\tccmp\t} 2 } } */
-+/* Should use a WAR check that multiplies by (VF-2)*4 rather than
-+   an overlap check that multiplies by (257-1)*4.  */
-+/* { dg-final { scan-assembler {\tcntb\t(x[0-9]+)\n.*\tsub\tx[0-9]+, \1, #8\n.*\tmul\tx[0-9]+,[^\n]*\1} } } */
-+/* One range check and a check for n being zero.  */
-+/* { dg-final { scan-assembler-times {\t(?:cmp|tst)\t} 1 } } */
-+/* { dg-final { scan-assembler-times {\tccmp\t} 1 } } */
-diff --git a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_2.c b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_2.c
-index 30f6d2691b8..b8afea70207 100644
---- a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_2.c
-+++ b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_2.c
-@@ -15,7 +15,7 @@ f (TYPE *x, TYPE *y, unsigned short n, unsigned short m)
- /* { dg-final { scan-assembler {\tst1w\tz[0-9]+} } } */
- /* { dg-final { scan-assembler {\tldr\tw[0-9]+} } } */
- /* { dg-final { scan-assembler {\tstr\tw[0-9]+} } } */
--/* Should multiply by (257-1)*4 rather than (VF-1)*4.  */
-+/* Should multiply by (257-1)*4 rather than (VF-1)*4 or (VF-2)*4.  */
- /* { dg-final { scan-assembler-times {\tubfiz\tx[0-9]+, x2, 10, 16\n} 1 } } */
- /* { dg-final { scan-assembler-times {\tubfiz\tx[0-9]+, x3, 10, 16\n} 1 } } */
- /* { dg-final { scan-assembler-not {\tcmp\tx[0-9]+, 0} } } */
-diff --git a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_3.c b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_3.c
-index 70792ff9f33..5ab6859ad4e 100644
---- a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_3.c
-+++ b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_3.c
-@@ -15,13 +15,10 @@ f (TYPE *x, TYPE *y, int n, long m __attribute__((unused)))
- /* { dg-final { scan-assembler {\tst1w\tz[0-9]+} } } */
- /* { dg-final { scan-assembler {\tldr\tw[0-9]+} } } */
- /* { dg-final { scan-assembler {\tstr\tw[0-9]+} } } */
--/* Should multiply by (VF-1)*4 rather than (257-1)*4.  */
--/* { dg-final { scan-assembler-not {, 1024} } } */
--/* { dg-final { scan-assembler-not {\t.bfiz\t} } } */
--/* { dg-final { scan-assembler-not {lsl[^\n]*[, ]10} } } */
--/* { dg-final { scan-assembler-not {\tcmp\tx[0-9]+, 0} } } */
--/* { dg-final { scan-assembler {\tcmp\tw2, 0} } } */
--/* { dg-final { scan-assembler-times {\tcsel\tx[0-9]+} 2 } } */
--/* Two range checks and a check for n being zero.  */
--/* { dg-final { scan-assembler {\tcmp\t} } } */
--/* { dg-final { scan-assembler-times {\tccmp\t} 2 } } */
-+/* Should use a WAR check that multiplies by (VF-2)*4 rather than
-+   an overlap check that multiplies by (257-1)*4.  */
-+/* { dg-final { scan-assembler {\tcntb\t(x[0-9]+)\n.*\tsub\tx[0-9]+, \1, #8\n.*\tmul\tx[0-9]+,[^\n]*\1} } } */
-+/* { dg-final { scan-assembler-times {\tcsel\tx[0-9]+[^\n]*xzr} 1 } } */
-+/* One range check and a check for n being zero.  */
-+/* { dg-final { scan-assembler-times {\tcmp\t} 1 } } */
-+/* { dg-final { scan-assembler-times {\tccmp\t} 1 } } */
-diff --git a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_5.c b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_5.c
-index 688f3be61d7..93c114193e9 100644
---- a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_5.c
-+++ b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_5.c
-@@ -15,13 +15,10 @@ f (TYPE *x, TYPE *y, long n, long m __attribute__((unused)))
- /* { dg-final { scan-assembler {\tst1d\tz[0-9]+} } } */
- /* { dg-final { scan-assembler {\tldr\td[0-9]+} } } */
- /* { dg-final { scan-assembler {\tstr\td[0-9]+} } } */
--/* Should multiply by (VF-1)*8 rather than (257-1)*8.  */
--/* { dg-final { scan-assembler-not {, 2048} } } */
--/* { dg-final { scan-assembler-not {\t.bfiz\t} } } */
--/* { dg-final { scan-assembler-not {lsl[^\n]*[, ]11} } } */
--/* { dg-final { scan-assembler {\tcmp\tx[0-9]+, 0} } } */
--/* { dg-final { scan-assembler-not {\tcmp\tw[0-9]+, 0} } } */
--/* { dg-final { scan-assembler-times {\tcsel\tx[0-9]+} 2 } } */
--/* Two range checks and a check for n being zero.  */
--/* { dg-final { scan-assembler {\tcmp\t} } } */
--/* { dg-final { scan-assembler-times {\tccmp\t} 2 } } */
-+/* Should use a WAR check that multiplies by (VF-2)*8 rather than
-+   an overlap check that multiplies by (257-1)*4.  */
-+/* { dg-final { scan-assembler {\tcntb\t(x[0-9]+)\n.*\tsub\tx[0-9]+, \1, #16\n.*\tmul\tx[0-9]+,[^\n]*\1} } } */
-+/* { dg-final { scan-assembler-times {\tcsel\tx[0-9]+[^\n]*xzr} 1 } } */
-+/* One range check and a check for n being zero.  */
-+/* { dg-final { scan-assembler-times {\tcmp\t} 1 } } */
-+/* { dg-final { scan-assembler-times {\tccmp\t} 1 } } */
-diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_4.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_4.c
-index 00d84760a19..b38f23e87ba 100644
---- a/gcc/testsuite/gcc.target/aarch64/sve/vcond_4.c
-+++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_4.c
-@@ -98,24 +98,24 @@ TEST_CMP (nugt)
- /* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 { xfail *-*-* } } } */
- 
- /* 5 for lt, 5 for ult and 5 for nult.  */
--/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 { xfail *-*-* } } } */
--/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 { xfail *-*-* } } } */
-+/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 } } */
-+/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 } } */
- 
- /* 5 for le, 5 for ule and 5 for nule.  */
--/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 { xfail *-*-* } } } */
--/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 { xfail *-*-* } } } */
-+/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 } } */
-+/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 } } */
- 
- /* 5 for gt, 5 for ugt and 5 for nugt.  */
--/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 { xfail *-*-* } } } */
--/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 { xfail *-*-* } } } */
-+/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 } } */
-+/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 } } */
- 
- /* 5 for ge, 5 for uge and 5 for nuge.  */
--/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 { xfail *-*-* } } } */
--/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 { xfail *-*-* } } } */
-+/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 } } */
-+/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 } } */
- 
- /* { dg-final { scan-assembler-not {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} } } */
- /* 3 loops * 5 invocations for all 12 unordered comparisons.  */
--/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 180 { xfail *-*-* } } } */
-+/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 180 } } */
- 
- /* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 7 { xfail *-*-* } } } */
- /* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 14 { xfail *-*-* } } } */
-@@ -123,19 +123,19 @@ TEST_CMP (nugt)
- /* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 { xfail *-*-* } } } */
- /* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 { xfail *-*-* } } } */
- 
--/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 { xfail *-*-* } } } */
--/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 { xfail *-*-* } } } */
-+/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 } } */
-+/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 } } */
- 
--/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 { xfail *-*-* } } } */
--/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 { xfail *-*-* } } } */
-+/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 } } */
-+/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 } } */
- 
--/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 { xfail *-*-* } } } */
--/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 { xfail *-*-* } } } */
-+/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 } } */
-+/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 } } */
- 
--/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 { xfail *-*-* } } } */
--/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 { xfail *-*-* } } } */
-+/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 } } */
-+/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 } } */
- 
- /* { dg-final { scan-assembler-not {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} } } */
- /* 3 loops * 5 invocations, with 2 invocations having ncopies == 2,
-    for all 12 unordered comparisons.  */
--/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 252 { xfail *-*-* } } } */
-+/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 252 } } */
-diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_5.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_5.c
-index 23bfb7b2649..2f16fbff522 100644
---- a/gcc/testsuite/gcc.target/aarch64/sve/vcond_5.c
-+++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_5.c
-@@ -19,16 +19,16 @@
- /* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 40 { xfail *-*-* } } } */
- 
- /* 5 for le, 5 for ule and 5 for nule.  */
--/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 15 { xfail *-*-* } } } */
--/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 30 { xfail *-*-* } } } */
-+/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 15 } } */
-+/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 30 } } */
- 
- /* 5 for gt, 5 for ugt, 5 for nueq and 5 for nugt.  */
- /* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 20 { xfail *-*-* } } } */
- /* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 40 { xfail *-*-* } } } */
- 
- /* 5 for ge, 5 for uge and 5 for nuge.  */
--/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 15 { xfail *-*-* } } } */
--/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 30 { xfail *-*-* } } } */
-+/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 15 } } */
-+/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 30 } } */
- 
- /* { dg-final { scan-assembler-not {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} } } */
- /* 3 loops * 5 invocations for ordered, unordered amd ueq.  */
-@@ -43,14 +43,14 @@
- /* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 28 { xfail *-*-* } } } */
- /* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 56 { xfail *-*-* } } } */
- 
--/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 21 { xfail *-*-* } } } */
--/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 42 { xfail *-*-* } } } */
-+/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 21 } } */
-+/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 42 } } */
- 
- /* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 28 { xfail *-*-* } } } */
- /* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 56 { xfail *-*-* } } } */
- 
--/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 21 { xfail *-*-* } } } */
--/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 42 { xfail *-*-* } } } */
-+/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 21 } } */
-+/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 42 } } */
- 
- /* { dg-final { scan-assembler-not {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} } } */
- /* 3 loops * 5 invocations, with 2 invocations having ncopies == 2,
-diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_1.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_1.c
-new file mode 100644
-index 00000000000..fe490cfbf3f
---- /dev/null
-+++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_1.c
-@@ -0,0 +1,18 @@
-+/* { dg-options "-O2 -ftree-vectorize" } */
-+
-+#pragma GCC target "+nosve"
-+
-+#include 
-+
-+void
-+f (int64_t *x, int64_t *y, int32_t *z, int n)
-+{
-+  for (int i = 0; i < n; ++i)
-+    {
-+      x[i] += y[i];
-+      z[i] += z[i - 2];
-+    }
-+}
-+
-+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.2d,} 1 } } */
-+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.2s,} 1 } } */
-diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_10.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_10.c
-new file mode 100644
-index 00000000000..81e77a8bb04
---- /dev/null
-+++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_10.c
-@@ -0,0 +1,18 @@
-+/* { dg-options "-O2 -ftree-vectorize" } */
-+
-+#pragma GCC target "+nosve"
-+
-+#include 
-+
-+void
-+f (int16_t *x, int16_t *y, uint8_t *z, int n)
-+{
-+  for (int i = 0; i < n; ++i)
-+    {
-+      x[i] = z[i];
-+      y[i] += y[i - 8];
-+    }
-+}
-+
-+/* { dg-final { scan-assembler-times {\tuxtl\tv[0-9]+\.8h, v[0-9]+\.8b\n} 1 } } */
-+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.8h,} 1 } } */
-diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_11.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_11.c
-new file mode 100644
-index 00000000000..d9da6c1f12a
---- /dev/null
-+++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_11.c
-@@ -0,0 +1,18 @@
-+/* { dg-options "-O2 -ftree-vectorize" } */
-+
-+#pragma GCC target "+nosve"
-+
-+#include 
-+
-+void
-+f (int32_t *x, int64_t *y, int64_t *z, int n)
-+{
-+  for (int i = 0; i < n; ++i)
-+    {
-+      x[i] = z[i];
-+      y[i] += y[i - 2];
-+    }
-+}
-+
-+/* { dg-final { scan-assembler-times {\txtn\tv[0-9]+\.2s, v[0-9]+\.2d\n} 1 } } */
-+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.2d,} 1 } } */
-diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_12.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_12.c
-new file mode 100644
-index 00000000000..80dab8bf55f
---- /dev/null
-+++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_12.c
-@@ -0,0 +1,18 @@
-+/* { dg-options "-O2 -ftree-vectorize" } */
-+
-+#pragma GCC target "+nosve"
-+
-+#include 
-+
-+void
-+f (int16_t *x, int32_t *y, int32_t *z, int n)
-+{
-+  for (int i = 0; i < n; ++i)
-+    {
-+      x[i] = z[i];
-+      y[i] += y[i - 4];
-+    }
-+}
-+
-+/* { dg-final { scan-assembler-times {\txtn\tv[0-9]+\.4h, v[0-9]+\.4s\n} 1 } } */
-+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.4s,} 1 } } */
-diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_13.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_13.c
-new file mode 100644
-index 00000000000..655fa7d4bf1
---- /dev/null
-+++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_13.c
-@@ -0,0 +1,18 @@
-+/* { dg-options "-O2 -ftree-vectorize" } */
-+
-+#pragma GCC target "+nosve"
-+
-+#include 
-+
-+void
-+f (int8_t *x, int16_t *y, int16_t *z, int n)
-+{
-+  for (int i = 0; i < n; ++i)
-+    {
-+      x[i] = z[i];
-+      y[i] += y[i - 8];
-+    }
-+}
-+
-+/* { dg-final { scan-assembler-times {\txtn\tv[0-9]+\.8b, v[0-9]+\.8h\n} 1 } } */
-+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.8h,} 1 } } */
-diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_2.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_2.c
-new file mode 100644
-index 00000000000..1fe69cad259
---- /dev/null
-+++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_2.c
-@@ -0,0 +1,19 @@
-+/* { dg-options "-O2 -ftree-vectorize" } */
-+
-+#pragma GCC target "+nosve"
-+
-+#include 
-+
-+void
-+f (int32_t *x, int32_t *y, int16_t *z, int n)
-+{
-+  for (int i = 0; i < n; ++i)
-+    {
-+      x[i] += y[i];
-+      z[i] += z[i - 4];
-+    }
-+}
-+
-+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.4s,} 1 } } */
-+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.4h,} 1 } } */
-+/* { dg-final { scan-assembler-not {\tadd\tv[0-9]+\.2s,} } } */
-diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_3.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_3.c
-new file mode 100644
-index 00000000000..1290772216e
---- /dev/null
-+++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_3.c
-@@ -0,0 +1,19 @@
-+/* { dg-options "-O2 -ftree-vectorize" } */
-+
-+#pragma GCC target "+nosve"
-+
-+#include 
-+
-+void
-+f (int16_t *x, int16_t *y, int8_t *z, int n)
-+{
-+  for (int i = 0; i < n; ++i)
-+    {
-+      x[i] += y[i];
-+      z[i] += z[i - 8];
-+    }
-+}
-+
-+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.8h,} 1 } } */
-+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.8b,} 1 } } */
-+/* { dg-final { scan-assembler-not {\tadd\tv[0-9]+\.4h,} } } */
-diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_4.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_4.c
-new file mode 100644
-index 00000000000..768ea8c7164
---- /dev/null
-+++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_4.c
-@@ -0,0 +1,18 @@
-+/* { dg-options "-O2 -ftree-vectorize" } */
-+
-+#pragma GCC target "+nosve"
-+
-+#include 
-+
-+void
-+f (int64_t *x, int64_t *y, int8_t *z, int n)
-+{
-+  for (int i = 0; i < n; ++i)
-+    {
-+      x[i] += y[i];
-+      z[i] += z[i - 8];
-+    }
-+}
-+
-+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.2d,} 4 } } */
-+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.8b,} 1 } } */
-diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_5.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_5.c
-new file mode 100644
-index 00000000000..ca8a65a16e7
---- /dev/null
-+++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_5.c
-@@ -0,0 +1,18 @@
-+/* { dg-options "-O2 -ftree-vectorize" } */
-+
-+#pragma GCC target "+nosve"
-+
-+#include 
-+
-+void
-+f (int64_t *x, int64_t *y, int32_t *z, int n)
-+{
-+  for (int i = 0; i < n; ++i)
-+    {
-+      x[i] = z[i];
-+      y[i] += y[i - 2];
-+    }
-+}
-+
-+/* { dg-final { scan-assembler-times {\tsxtl\tv[0-9]+\.2d, v[0-9]+\.2s\n} 1 } } */
-+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.2d,} 1 } } */
-diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_6.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_6.c
-new file mode 100644
-index 00000000000..6c09b5b146b
---- /dev/null
-+++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_6.c
-@@ -0,0 +1,18 @@
-+/* { dg-options "-O2 -ftree-vectorize" } */
-+
-+#pragma GCC target "+nosve"
-+
-+#include 
-+
-+void
-+f (int32_t *x, int32_t *y, int16_t *z, int n)
-+{
-+  for (int i = 0; i < n; ++i)
-+    {
-+      x[i] = z[i];
-+      y[i] += y[i - 4];
-+    }
-+}
-+
-+/* { dg-final { scan-assembler-times {\tsxtl\tv[0-9]+\.4s, v[0-9]+\.4h\n} 1 } } */
-+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.4s,} 1 } } */
-diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_7.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_7.c
-new file mode 100644
-index 00000000000..94a66c545ef
---- /dev/null
-+++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_7.c
-@@ -0,0 +1,18 @@
-+/* { dg-options "-O2 -ftree-vectorize" } */
-+
-+#pragma GCC target "+nosve"
-+
-+#include 
-+
-+void
-+f (int16_t *x, int16_t *y, int8_t *z, int n)
-+{
-+  for (int i = 0; i < n; ++i)
-+    {
-+      x[i] = z[i];
-+      y[i] += y[i - 8];
-+    }
-+}
-+
-+/* { dg-final { scan-assembler-times {\tsxtl\tv[0-9]+\.8h, v[0-9]+\.8b\n} 1 } } */
-+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.8h,} 1 } } */
-diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_8.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_8.c
-new file mode 100644
-index 00000000000..9531966c294
---- /dev/null
-+++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_8.c
-@@ -0,0 +1,18 @@
-+/* { dg-options "-O2 -ftree-vectorize" } */
-+
-+#pragma GCC target "+nosve"
-+
-+#include 
-+
-+void
-+f (int64_t *x, int64_t *y, uint32_t *z, int n)
-+{
-+  for (int i = 0; i < n; ++i)
-+    {
-+      x[i] = z[i];
-+      y[i] += y[i - 2];
-+    }
-+}
-+
-+/* { dg-final { scan-assembler-times {\tuxtl\tv[0-9]+\.2d, v[0-9]+\.2s\n} 1 } } */
-+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.2d,} 1 } } */
-diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_9.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_9.c
-new file mode 100644
-index 00000000000..de8f6988685
---- /dev/null
-+++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_9.c
-@@ -0,0 +1,18 @@
-+/* { dg-options "-O2 -ftree-vectorize" } */
-+
-+#pragma GCC target "+nosve"
-+
-+#include 
-+
-+void
-+f (int32_t *x, int32_t *y, uint16_t *z, int n)
-+{
-+  for (int i = 0; i < n; ++i)
-+    {
-+      x[i] = z[i];
-+      y[i] += y[i - 4];
-+    }
-+}
-+
-+/* { dg-final { scan-assembler-times {\tuxtl\tv[0-9]+\.4s, v[0-9]+\.4h\n} 1 } } */
-+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.4s,} 1 } } */
-diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-19.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-19.c
-index ae2f8611ea6..9d926ca5dfe 100644
---- a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-19.c
-+++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-19.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-do compile } */
- /* { dg-options "-O3 -mavx -mtune=generic -dp" } */
- 
-diff --git a/gcc/testsuite/gcc.target/i386/avx2-vect-mask-store-move1.c b/gcc/testsuite/gcc.target/i386/avx2-vect-mask-store-move1.c
-index 2a105601c71..51765900fcf 100644
---- a/gcc/testsuite/gcc.target/i386/avx2-vect-mask-store-move1.c
-+++ b/gcc/testsuite/gcc.target/i386/avx2-vect-mask-store-move1.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-options "-O3 -mavx2 -fdump-tree-vect-details" } */
- /* { dg-require-effective-target avx2 } */
- 
-diff --git a/gcc/testsuite/gcc.target/i386/avx512f-gather-2.c b/gcc/testsuite/gcc.target/i386/avx512f-gather-2.c
-index a26aa6529e8..4de04511934 100644
---- a/gcc/testsuite/gcc.target/i386/avx512f-gather-2.c
-+++ b/gcc/testsuite/gcc.target/i386/avx512f-gather-2.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-do compile } */ /* PR59617 */
- /* { dg-options "-O3 -mavx512f -fdump-tree-vect-details -mtune=knl" } */
- 
-diff --git a/gcc/testsuite/gcc.target/i386/avx512f-gather-5.c b/gcc/testsuite/gcc.target/i386/avx512f-gather-5.c
-index 2bb9c5c090b..946117d9d30 100644
---- a/gcc/testsuite/gcc.target/i386/avx512f-gather-5.c
-+++ b/gcc/testsuite/gcc.target/i386/avx512f-gather-5.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-do compile } */
- /* { dg-options "-O3 -mavx512f -mtune=knl" } */
- 
-diff --git a/gcc/testsuite/gcc.target/i386/avx512f-simd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-simd-1.c
-new file mode 100644
-index 00000000000..235fb917e17
---- /dev/null
-+++ b/gcc/testsuite/gcc.target/i386/avx512f-simd-1.c
-@@ -0,0 +1,35 @@
-+/* { dg-do compile } */
-+/* { dg-options "-fopenmp-simd -O2 -mavx512f -masm=att" } */
-+/* { dg-final { scan-assembler "vpadd\[^\n\r]*%xmm" } } */
-+/* { dg-final { scan-assembler "vpadd\[^\n\r]*%ymm" } } */
-+/* { dg-final { scan-assembler "vpadd\[^\n\r]*%zmm" } } */
-+
-+#define N 1024
-+int a[N];
-+
-+void
-+f1 (void)
-+{
-+  int i;
-+  #pragma omp simd simdlen (4)
-+  for (i = 0; i < N; ++i)
-+    a[i] = a[i] + 1;
-+}
-+
-+void
-+f2 (void)
-+{
-+  int i;
-+  #pragma omp simd simdlen (8)
-+  for (i = 0; i < N; ++i)
-+    a[i] = a[i] + 2;
-+}
-+
-+void
-+f3 (void)
-+{
-+  int i;
-+  #pragma omp simd simdlen (16)
-+  for (i = 0; i < N; ++i)
-+    a[i] = a[i] + 3;
-+}
-diff --git a/gcc/testsuite/gcc.target/i386/l_fma_double_1.c b/gcc/testsuite/gcc.target/i386/l_fma_double_1.c
-index e5bcdabcf79..2472fb016ee 100644
---- a/gcc/testsuite/gcc.target/i386/l_fma_double_1.c
-+++ b/gcc/testsuite/gcc.target/i386/l_fma_double_1.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-do compile } */
- /* { dg-options "-O3 -Wno-attributes -mfpmath=sse -mfma -mtune=generic -mno-fma4" } */
- 
-diff --git a/gcc/testsuite/gcc.target/i386/l_fma_double_2.c b/gcc/testsuite/gcc.target/i386/l_fma_double_2.c
-index dbd078abc81..3d569733b1e 100644
---- a/gcc/testsuite/gcc.target/i386/l_fma_double_2.c
-+++ b/gcc/testsuite/gcc.target/i386/l_fma_double_2.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-do compile } */
- /* { dg-options "-O3 -Wno-attributes -mfpmath=sse -mfma -mtune=generic -mno-fma4" } */
- 
-diff --git a/gcc/testsuite/gcc.target/i386/l_fma_double_3.c b/gcc/testsuite/gcc.target/i386/l_fma_double_3.c
-index d0844f208e5..8e5ec4150cc 100644
---- a/gcc/testsuite/gcc.target/i386/l_fma_double_3.c
-+++ b/gcc/testsuite/gcc.target/i386/l_fma_double_3.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-do compile } */
- /* { dg-options "-O3 -Wno-attributes -mfpmath=sse -mfma -mtune=generic -mno-fma4" } */
- 
-diff --git a/gcc/testsuite/gcc.target/i386/l_fma_double_4.c b/gcc/testsuite/gcc.target/i386/l_fma_double_4.c
-index b9498a0ff13..0d2a0408d0b 100644
---- a/gcc/testsuite/gcc.target/i386/l_fma_double_4.c
-+++ b/gcc/testsuite/gcc.target/i386/l_fma_double_4.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-do compile } */
- /* { dg-options "-O3 -Wno-attributes -mfpmath=sse -mfma -mtune=generic -mno-fma4" } */
- 
-diff --git a/gcc/testsuite/gcc.target/i386/l_fma_double_5.c b/gcc/testsuite/gcc.target/i386/l_fma_double_5.c
-index 0292ba040a3..fcf1a6ceac1 100644
---- a/gcc/testsuite/gcc.target/i386/l_fma_double_5.c
-+++ b/gcc/testsuite/gcc.target/i386/l_fma_double_5.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-do compile } */
- /* { dg-options "-O3 -Wno-attributes -mfpmath=sse -mfma -mtune=generic -mno-fma4" } */
- 
-diff --git a/gcc/testsuite/gcc.target/i386/l_fma_double_6.c b/gcc/testsuite/gcc.target/i386/l_fma_double_6.c
-index a716006eda8..650e608117f 100644
---- a/gcc/testsuite/gcc.target/i386/l_fma_double_6.c
-+++ b/gcc/testsuite/gcc.target/i386/l_fma_double_6.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-do compile } */
- /* { dg-options "-O3 -Wno-attributes -mfpmath=sse -mfma -mtune=generic -mno-fma4" } */
- 
-diff --git a/gcc/testsuite/gcc.target/i386/l_fma_float_1.c b/gcc/testsuite/gcc.target/i386/l_fma_float_1.c
-index b386b83e39a..c29198ba666 100644
---- a/gcc/testsuite/gcc.target/i386/l_fma_float_1.c
-+++ b/gcc/testsuite/gcc.target/i386/l_fma_float_1.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-do compile } */
- /* { dg-options "-O3 -Wno-attributes -mfpmath=sse -mfma -mtune=generic -mno-fma4" } */
- 
-diff --git a/gcc/testsuite/gcc.target/i386/l_fma_float_2.c b/gcc/testsuite/gcc.target/i386/l_fma_float_2.c
-index 81193b2d8b1..cb38b77344f 100644
---- a/gcc/testsuite/gcc.target/i386/l_fma_float_2.c
-+++ b/gcc/testsuite/gcc.target/i386/l_fma_float_2.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-do compile } */
- /* { dg-options "-O3 -Wno-attributes -mfpmath=sse -mfma -mtune=generic -mno-fma4" } */
- 
-diff --git a/gcc/testsuite/gcc.target/i386/l_fma_float_3.c b/gcc/testsuite/gcc.target/i386/l_fma_float_3.c
-index d86cb904357..10a350e9e10 100644
---- a/gcc/testsuite/gcc.target/i386/l_fma_float_3.c
-+++ b/gcc/testsuite/gcc.target/i386/l_fma_float_3.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-do compile } */
- /* { dg-options "-O3 -Wno-attributes -mfpmath=sse -mfma -mtune=generic -mno-fma4" } */
- 
-diff --git a/gcc/testsuite/gcc.target/i386/l_fma_float_4.c b/gcc/testsuite/gcc.target/i386/l_fma_float_4.c
-index 68ca8388d70..020e5d86f35 100644
---- a/gcc/testsuite/gcc.target/i386/l_fma_float_4.c
-+++ b/gcc/testsuite/gcc.target/i386/l_fma_float_4.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-do compile } */
- /* { dg-options "-O3 -Wno-attributes -mfpmath=sse -mfma -mtune=generic -mno-fma4" } */
- 
-diff --git a/gcc/testsuite/gcc.target/i386/l_fma_float_5.c b/gcc/testsuite/gcc.target/i386/l_fma_float_5.c
-index 4db4749c024..3ff23c17aab 100644
---- a/gcc/testsuite/gcc.target/i386/l_fma_float_5.c
-+++ b/gcc/testsuite/gcc.target/i386/l_fma_float_5.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-do compile } */
- /* { dg-options "-O3 -Wno-attributes -mfpmath=sse -mfma -mtune=generic -mno-fma4" } */
- 
-diff --git a/gcc/testsuite/gcc.target/i386/l_fma_float_6.c b/gcc/testsuite/gcc.target/i386/l_fma_float_6.c
-index 0b86e6256bd..34671baa28a 100644
---- a/gcc/testsuite/gcc.target/i386/l_fma_float_6.c
-+++ b/gcc/testsuite/gcc.target/i386/l_fma_float_6.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-do compile } */
- /* { dg-options "-O3 -Wno-attributes -mfpmath=sse -mfma -mtune=generic -mno-fma4" } */
- 
-diff --git a/gcc/testsuite/gcc.target/i386/mask-pack.c b/gcc/testsuite/gcc.target/i386/mask-pack.c
-index 0b564ef4284..a607dfb460c 100644
---- a/gcc/testsuite/gcc.target/i386/mask-pack.c
-+++ b/gcc/testsuite/gcc.target/i386/mask-pack.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-do compile } */
- /* { dg-options "-mavx512bw -O3 -fopenmp-simd -fdump-tree-vect-details" } */
- /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 10 "vect" } } */
-diff --git a/gcc/testsuite/gcc.target/i386/mask-unpack.c b/gcc/testsuite/gcc.target/i386/mask-unpack.c
-index 4291480cfff..ca71ea2e29d 100644
---- a/gcc/testsuite/gcc.target/i386/mask-unpack.c
-+++ b/gcc/testsuite/gcc.target/i386/mask-unpack.c
-@@ -1,3 +1,5 @@
-+/* Disabling epilogues until we find a better way to deal with scans.  */
-+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
- /* { dg-do compile } */
- /* { dg-options "-mavx512bw -mavx512dq -mno-stackrealign -O3 -fopenmp-simd -fdump-tree-vect-details" } */
- /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 10 "vect" } } */
-diff --git a/gcc/testsuite/gcc.target/i386/pr90358.c b/gcc/testsuite/gcc.target/i386/pr90358.c
-new file mode 100644
-index 00000000000..4894fdbd079
---- /dev/null
-+++ b/gcc/testsuite/gcc.target/i386/pr90358.c
-@@ -0,0 +1,35 @@
-+/* PR target/90358 */
-+/* { dg-do run { target { sse4_runtime } } } */
-+/* { dg-options "-O3 -msse4" } */
-+
-+struct s { unsigned int a, b, c; };
-+
-+void __attribute__ ((noipa))
-+foo (struct s *restrict s1, struct s *restrict s2, int n)
-+{
-+  for (int i = 0; i < n; ++i)
-+    {
-+      s1[i].b = s2[i].b;
-+      s1[i].c = s2[i].c;
-+      s2[i].c = 0;
-+    }
-+}
-+                            
-+#define N 12
-+
-+int
-+main ()
-+{
-+  struct s s1[N], s2[N];
-+  for (unsigned int j = 0; j < N; ++j)
-+    {
-+      s2[j].a = j * 5;
-+      s2[j].b = j * 5 + 2;
-+      s2[j].c = j * 5 + 4;
-+    }
-+  foo (s1, s2, N);
-+  for (unsigned int j = 0; j < N; ++j)
-+  if (s1[j].b != j * 5 + 2)
-+    __builtin_abort ();
-+  return 0;
-+}
-diff --git a/gcc/testsuite/gcc.target/i386/pr91033.c b/gcc/testsuite/gcc.target/i386/pr91033.c
-new file mode 100644
-index 00000000000..43d99d5a7dc
---- /dev/null
-+++ b/gcc/testsuite/gcc.target/i386/pr91033.c
-@@ -0,0 +1,15 @@
-+/* PR tree-optimization/91033 */
-+/* { dg-do compile { target pthread } } */
-+/* { dg-options "-march=knl -O2 -fopenmp-simd -ftree-parallelize-loops=2" } */
-+
-+#define N 1024
-+int a[N];
-+
-+void
-+foo (void)
-+{
-+  int i;
-+  #pragma omp simd simdlen (4)
-+  for (i = 0; i < N; ++i)
-+    a[i] = a[i] + 1;
-+}
-diff --git a/gcc/testsuite/gfortran.dg/vect/vect-4.f90 b/gcc/testsuite/gfortran.dg/vect/vect-4.f90
-index b567cbd8644..c2eeafd3900 100644
---- a/gcc/testsuite/gfortran.dg/vect/vect-4.f90
-+++ b/gcc/testsuite/gfortran.dg/vect/vect-4.f90
-@@ -1,3 +1,5 @@
-+! Disabling epilogues until we find a better way to deal with scans.
-+! { dg-additional-options "--param vect-epilogues-nomask=0" }
- ! { dg-do compile }
- ! { dg-require-effective-target vect_float }
- ! { dg-additional-options "--param vect-max-peeling-for-alignment=0" }
-diff --git a/gcc/testsuite/gfortran.dg/vect/vect-8.f90 b/gcc/testsuite/gfortran.dg/vect/vect-8.f90
-index 0ac5f1c390b..1c243308476 100644
---- a/gcc/testsuite/gfortran.dg/vect/vect-8.f90
-+++ b/gcc/testsuite/gfortran.dg/vect/vect-8.f90
-@@ -704,5 +704,6 @@ CALL track('KERNEL  ')
- RETURN
- END SUBROUTINE kernel
- 
--! { dg-final { scan-tree-dump-times "vectorized 22 loops" 1 "vect" { target vect_intdouble_cvt } } }
--! { dg-final { scan-tree-dump-times "vectorized 17 loops" 1 "vect" { target { ! vect_intdouble_cvt } } } }
-+! { dg-final { scan-tree-dump-times "vectorized 23 loops" 1 "vect" { target aarch64*-*-* } } }
-+! { dg-final { scan-tree-dump-times "vectorized 22 loops" 1 "vect" { target { vect_intdouble_cvt && { ! aarch64*-*-* } } } } }
-+! { dg-final { scan-tree-dump-times "vectorized 17 loops" 1 "vect" { target { { ! vect_intdouble_cvt } && { ! aarch64*-*-* } } } } }
-diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
-index f8aeec9bae8..621c8ea3dad 100644
---- a/gcc/tree-cfg.c
-+++ b/gcc/tree-cfg.c
-@@ -3557,6 +3557,24 @@ verify_gimple_assign_unary (gassign *stmt)
-     {
-     CASE_CONVERT:
-       {
-+	/* Allow conversions between vectors with the same number of elements,
-+	   provided that the conversion is OK for the element types too.  */
-+	if (VECTOR_TYPE_P (lhs_type)
-+	    && VECTOR_TYPE_P (rhs1_type)
-+	    && known_eq (TYPE_VECTOR_SUBPARTS (lhs_type),
-+			 TYPE_VECTOR_SUBPARTS (rhs1_type)))
-+	  {
-+	    lhs_type = TREE_TYPE (lhs_type);
-+	    rhs1_type = TREE_TYPE (rhs1_type);
-+	  }
-+	else if (VECTOR_TYPE_P (lhs_type) || VECTOR_TYPE_P (rhs1_type))
-+	  {
-+	    error ("invalid vector types in nop conversion");
-+	    debug_generic_expr (lhs_type);
-+	    debug_generic_expr (rhs1_type);
-+	    return true;
-+	  }
-+
- 	/* Allow conversions from pointer type to integral type only if
- 	   there is no sign or zero extension involved.
- 	   For targets were the precision of ptrofftype doesn't match that
-diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c
-index d00c1bd31e6..c95dd204870 100644
---- a/gcc/tree-data-ref.c
-+++ b/gcc/tree-data-ref.c
-@@ -1287,7 +1287,7 @@ create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
-   return dr;
- }
- 
--/*  A helper function computes order between two tree epxressions T1 and T2.
-+/*  A helper function computes order between two tree expressions T1 and T2.
-     This is used in comparator functions sorting objects based on the order
-     of tree expressions.  The function returns -1, 0, or 1.  */
- 
-@@ -1454,6 +1454,54 @@ comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
-   return 0;
- }
- 
-+/* Dump information about ALIAS_PAIR, indenting each line by INDENT.  */
-+
-+static void
-+dump_alias_pair (dr_with_seg_len_pair_t *alias_pair, const char *indent)
-+{
-+  dump_printf (MSG_NOTE, "%sreference:      %T vs. %T\n", indent,
-+	       DR_REF (alias_pair->first.dr),
-+	       DR_REF (alias_pair->second.dr));
-+
-+  dump_printf (MSG_NOTE, "%ssegment length: %T", indent,
-+	       alias_pair->first.seg_len);
-+  if (!operand_equal_p (alias_pair->first.seg_len,
-+			alias_pair->second.seg_len, 0))
-+    dump_printf (MSG_NOTE, " vs. %T", alias_pair->second.seg_len);
-+
-+  dump_printf (MSG_NOTE, "\n%saccess size:    ", indent);
-+  dump_dec (MSG_NOTE, alias_pair->first.access_size);
-+  if (maybe_ne (alias_pair->first.access_size, alias_pair->second.access_size))
-+    {
-+      dump_printf (MSG_NOTE, " vs. ");
-+      dump_dec (MSG_NOTE, alias_pair->second.access_size);
-+    }
-+
-+  dump_printf (MSG_NOTE, "\n%salignment:      %d", indent,
-+	       alias_pair->first.align);
-+  if (alias_pair->first.align != alias_pair->second.align)
-+    dump_printf (MSG_NOTE, " vs. %d", alias_pair->second.align);
-+
-+  dump_printf (MSG_NOTE, "\n%sflags:         ", indent);
-+  if (alias_pair->flags & DR_ALIAS_RAW)
-+    dump_printf (MSG_NOTE, " RAW");
-+  if (alias_pair->flags & DR_ALIAS_WAR)
-+    dump_printf (MSG_NOTE, " WAR");
-+  if (alias_pair->flags & DR_ALIAS_WAW)
-+    dump_printf (MSG_NOTE, " WAW");
-+  if (alias_pair->flags & DR_ALIAS_ARBITRARY)
-+    dump_printf (MSG_NOTE, " ARBITRARY");
-+  if (alias_pair->flags & DR_ALIAS_SWAPPED)
-+    dump_printf (MSG_NOTE, " SWAPPED");
-+  if (alias_pair->flags & DR_ALIAS_UNSWAPPED)
-+    dump_printf (MSG_NOTE, " UNSWAPPED");
-+  if (alias_pair->flags & DR_ALIAS_MIXED_STEPS)
-+    dump_printf (MSG_NOTE, " MIXED_STEPS");
-+  if (alias_pair->flags == 0)
-+    dump_printf (MSG_NOTE, " ");
-+  dump_printf (MSG_NOTE, "\n");
-+}
-+
- /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
-    FACTOR is number of iterations that each data reference is accessed.
- 
-@@ -1488,19 +1536,50 @@ void
- prune_runtime_alias_test_list (vec *alias_pairs,
- 			       poly_uint64)
- {
-+  if (alias_pairs->is_empty ())
-+    return;
-+
-+  /* Canonicalize each pair so that the base components are ordered wrt
-+     data_ref_compare_tree.  This allows the loop below to merge more
-+     cases.  */
-+  unsigned int i;
-+  dr_with_seg_len_pair_t *alias_pair;
-+  FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
-+    {
-+      data_reference_p dr_a = alias_pair->first.dr;
-+      data_reference_p dr_b = alias_pair->second.dr;
-+      int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
-+					    DR_BASE_ADDRESS (dr_b));
-+      if (comp_res == 0)
-+	comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
-+      if (comp_res == 0)
-+	comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b));
-+      if (comp_res > 0)
-+	{
-+	  std::swap (alias_pair->first, alias_pair->second);
-+	  alias_pair->flags |= DR_ALIAS_SWAPPED;
-+	}
-+      else
-+	alias_pair->flags |= DR_ALIAS_UNSWAPPED;
-+    }
-+
-   /* Sort the collected data ref pairs so that we can scan them once to
-      combine all possible aliasing checks.  */
-   alias_pairs->qsort (comp_dr_with_seg_len_pair);
- 
-   /* Scan the sorted dr pairs and check if we can combine alias checks
-      of two neighboring dr pairs.  */
--  for (size_t i = 1; i < alias_pairs->length (); ++i)
-+  unsigned int last = 0;
-+  for (i = 1; i < alias_pairs->length (); ++i)
-     {
-       /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2).  */
--      dr_with_seg_len *dr_a1 = &(*alias_pairs)[i-1].first,
--		      *dr_b1 = &(*alias_pairs)[i-1].second,
--		      *dr_a2 = &(*alias_pairs)[i].first,
--		      *dr_b2 = &(*alias_pairs)[i].second;
-+      dr_with_seg_len_pair_t *alias_pair1 = &(*alias_pairs)[last];
-+      dr_with_seg_len_pair_t *alias_pair2 = &(*alias_pairs)[i];
-+
-+      dr_with_seg_len *dr_a1 = &alias_pair1->first;
-+      dr_with_seg_len *dr_b1 = &alias_pair1->second;
-+      dr_with_seg_len *dr_a2 = &alias_pair2->first;
-+      dr_with_seg_len *dr_b2 = &alias_pair2->second;
- 
-       /* Remove duplicate data ref pairs.  */
-       if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
-@@ -1509,10 +1588,16 @@ prune_runtime_alias_test_list (vec *alias_pairs,
- 	    dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
- 			 DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
- 			 DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
--	  alias_pairs->ordered_remove (i--);
-+	  alias_pair1->flags |= alias_pair2->flags;
- 	  continue;
- 	}
- 
-+      /* Assume that we won't be able to merge the pairs, then correct
-+	 if we do.  */
-+      last += 1;
-+      if (last != i)
-+	(*alias_pairs)[last] = (*alias_pairs)[i];
-+
-       if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
- 	{
- 	  /* We consider the case that DR_B1 and DR_B2 are same memrefs,
-@@ -1538,13 +1623,6 @@ prune_runtime_alias_test_list (vec *alias_pairs,
- 	  if (!ordered_p (init_a1, init_a2))
- 	    continue;
- 
--	  /* Make sure dr_a1 starts left of dr_a2.  */
--	  if (maybe_gt (init_a1, init_a2))
--	    {
--	      std::swap (*dr_a1, *dr_a2);
--	      std::swap (init_a1, init_a2);
--	    }
--
- 	  /* Work out what the segment length would be if we did combine
- 	     DR_A1 and DR_A2:
- 
-@@ -1561,7 +1639,10 @@ prune_runtime_alias_test_list (vec *alias_pairs,
- 
- 	     The lengths both have sizetype, so the sign is taken from
- 	     the step instead.  */
--	  if (!operand_equal_p (dr_a1->seg_len, dr_a2->seg_len, 0))
-+	  poly_uint64 new_seg_len = 0;
-+	  bool new_seg_len_p = !operand_equal_p (dr_a1->seg_len,
-+						 dr_a2->seg_len, 0);
-+	  if (new_seg_len_p)
- 	    {
- 	      poly_uint64 seg_len_a1, seg_len_a2;
- 	      if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
-@@ -1579,14 +1660,29 @@ prune_runtime_alias_test_list (vec *alias_pairs,
- 	      int sign_a = tree_int_cst_sgn (indicator_a);
- 	      int sign_b = tree_int_cst_sgn (indicator_b);
- 
--	      poly_uint64 new_seg_len;
- 	      if (sign_a <= 0 && sign_b <= 0)
- 		new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
- 	      else if (sign_a >= 0 && sign_b >= 0)
- 		new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
- 	      else
- 		continue;
-+	    }
-+	  /* At this point we're committed to merging the refs.  */
- 
-+	  /* Make sure dr_a1 starts left of dr_a2.  */
-+	  if (maybe_gt (init_a1, init_a2))
-+	    {
-+	      std::swap (*dr_a1, *dr_a2);
-+	      std::swap (init_a1, init_a2);
-+	    }
-+
-+	  /* The DR_Bs are equal, so only the DR_As can introduce
-+	     mixed steps.  */
-+	  if (!operand_equal_p (DR_STEP (dr_a1->dr), DR_STEP (dr_a2->dr), 0))
-+	    alias_pair1->flags |= DR_ALIAS_MIXED_STEPS;
-+
-+	  if (new_seg_len_p)
-+	    {
- 	      dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
- 					      new_seg_len);
- 	      dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
-@@ -1608,17 +1704,40 @@ prune_runtime_alias_test_list (vec *alias_pairs,
- 	    dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
- 			 DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
- 			 DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
--	  alias_pairs->ordered_remove (i);
--	  i--;
-+	  alias_pair1->flags |= alias_pair2->flags;
-+	  last -= 1;
- 	}
-     }
-+  alias_pairs->truncate (last + 1);
-+
-+  /* Try to restore the original dr_with_seg_len order within each
-+     dr_with_seg_len_pair_t.  If we ended up combining swapped and
-+     unswapped pairs into the same check, we have to invalidate any
-+     RAW, WAR and WAW information for it.  */
-+  if (dump_enabled_p ())
-+    dump_printf (MSG_NOTE, "merged alias checks:\n");
-+  FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
-+    {
-+      unsigned int swap_mask = (DR_ALIAS_SWAPPED | DR_ALIAS_UNSWAPPED);
-+      unsigned int swapped = (alias_pair->flags & swap_mask);
-+      if (swapped == DR_ALIAS_SWAPPED)
-+	std::swap (alias_pair->first, alias_pair->second);
-+      else if (swapped != DR_ALIAS_UNSWAPPED)
-+	alias_pair->flags |= DR_ALIAS_ARBITRARY;
-+      alias_pair->flags &= ~swap_mask;
-+      if (dump_enabled_p ())
-+	dump_alias_pair (alias_pair, "  ");
-+    }
- }
- 
--/* Given LOOP's two data references and segment lengths described by DR_A
--   and DR_B, create expression checking if the two addresses ranges intersect
--   with each other based on index of the two addresses.  This can only be
--   done if DR_A and DR_B referring to the same (array) object and the index
--   is the only difference.  For example:
-+/* Try to generate a runtime condition that is true if ALIAS_PAIR is
-+   free of aliases, using a condition based on index values instead
-+   of a condition based on addresses.  Return true on success,
-+   storing the condition in *COND_EXPR.
-+
-+   This can only be done if the two data references in ALIAS_PAIR access
-+   the same array object and the index is the only difference.  For example,
-+   if the two data references are DR_A and DR_B:
- 
-                        DR_A                           DR_B
-       data-ref         arr[i]                         arr[j]
-@@ -1635,16 +1754,20 @@ prune_runtime_alias_test_list (vec *alias_pairs,
- 
-    We can create expression based on index rather than address:
- 
--     (i_0 + 4 < j_0 || j_0 + 4 < i_0)
-+     (unsigned) (i_0 - j_0 + 3) <= 6
-+
-+   i.e. the indices are less than 4 apart.
- 
-    Note evolution step of index needs to be considered in comparison.  */
- 
- static bool
- create_intersect_range_checks_index (struct loop *loop, tree *cond_expr,
--				     const dr_with_seg_len& dr_a,
--				     const dr_with_seg_len& dr_b)
-+				     const dr_with_seg_len_pair_t &alias_pair)
- {
--  if (integer_zerop (DR_STEP (dr_a.dr))
-+  const dr_with_seg_len &dr_a = alias_pair.first;
-+  const dr_with_seg_len &dr_b = alias_pair.second;
-+  if ((alias_pair.flags & DR_ALIAS_MIXED_STEPS)
-+      || integer_zerop (DR_STEP (dr_a.dr))
-       || integer_zerop (DR_STEP (dr_b.dr))
-       || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
-     return false;
-@@ -1670,15 +1793,8 @@ create_intersect_range_checks_index (struct loop *loop, tree *cond_expr,
-   if (neg_step)
-     {
-       abs_step = -abs_step;
--      seg_len1 = -seg_len1;
--      seg_len2 = -seg_len2;
--    }
--  else
--    {
--      /* Include the access size in the length, so that we only have one
--	 tree addition below.  */
--      seg_len1 += dr_a.access_size;
--      seg_len2 += dr_b.access_size;
-+      seg_len1 = (-wi::to_poly_wide (dr_a.seg_len)).force_uhwi ();
-+      seg_len2 = (-wi::to_poly_wide (dr_b.seg_len)).force_uhwi ();
-     }
- 
-   /* Infer the number of iterations with which the memory segment is accessed
-@@ -1692,16 +1808,15 @@ create_intersect_range_checks_index (struct loop *loop, tree *cond_expr,
-       || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
-     return false;
- 
--  poly_uint64 niter_access1 = 0, niter_access2 = 0;
--  if (neg_step)
--    {
--      /* Divide each access size by the byte step, rounding up.  */
--      if (!can_div_trunc_p (dr_a.access_size - abs_step - 1,
--			    abs_step, &niter_access1)
--	  || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
--			       abs_step, &niter_access2))
--	return false;
--    }
-+  /* Divide each access size by the byte step, rounding up.  */
-+  poly_uint64 niter_access1, niter_access2;
-+  if (!can_div_trunc_p (dr_a.access_size + abs_step - 1,
-+			abs_step, &niter_access1)
-+      || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
-+			   abs_step, &niter_access2))
-+    return false;
-+
-+  bool waw_or_war_p = (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) == 0;
- 
-   unsigned int i;
-   for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
-@@ -1741,44 +1856,298 @@ create_intersect_range_checks_index (struct loop *loop, tree *cond_expr,
- 	 index of data reference.  Like segment length, index length is
- 	 linear function of the number of iterations with index_step as
- 	 the coefficient, i.e, niter_len * idx_step.  */
--      tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
--				   build_int_cst (TREE_TYPE (min1),
--						  niter_len1));
--      tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
--				   build_int_cst (TREE_TYPE (min2),
--						  niter_len2));
--      tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
--      tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
--      /* Adjust ranges for negative step.  */
-+      offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step),
-+						  SIGNED);
-       if (neg_step)
--	{
--	  /* IDX_LEN1 and IDX_LEN2 are negative in this case.  */
--	  std::swap (min1, max1);
--	  std::swap (min2, max2);
--
--	  /* As with the lengths just calculated, we've measured the access
--	     sizes in iterations, so multiply them by the index step.  */
--	  tree idx_access1
--	    = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
--			   build_int_cst (TREE_TYPE (min1), niter_access1));
--	  tree idx_access2
--	    = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
--			   build_int_cst (TREE_TYPE (min2), niter_access2));
--
--	  /* MINUS_EXPR because the above values are negative.  */
--	  max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (max1), max1, idx_access1);
--	  max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (max2), max2, idx_access2);
--	}
--      tree part_cond_expr
--	= fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
--	    fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
--	    fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
-+	abs_idx_step = -abs_idx_step;
-+      poly_offset_int idx_len1 = abs_idx_step * niter_len1;
-+      poly_offset_int idx_len2 = abs_idx_step * niter_len2;
-+      poly_offset_int idx_access1 = abs_idx_step * niter_access1;
-+      poly_offset_int idx_access2 = abs_idx_step * niter_access2;
-+
-+      gcc_assert (known_ge (idx_len1, 0)
-+		  && known_ge (idx_len2, 0)
-+		  && known_ge (idx_access1, 0)
-+		  && known_ge (idx_access2, 0));
-+
-+      /* Each access has the following pattern, with lengths measured
-+	 in units of INDEX:
-+
-+	      <-- idx_len -->
-+	      <--- A: -ve step --->
-+	      +-----+-------+-----+-------+-----+
-+	      | n-1 | ..... |  0  | ..... | n-1 |
-+	      +-----+-------+-----+-------+-----+
-+			    <--- B: +ve step --->
-+			    <-- idx_len -->
-+			    |
-+			   min
-+
-+	 where "n" is the number of scalar iterations covered by the segment
-+	 and where each access spans idx_access units.
-+
-+	 A is the range of bytes accessed when the step is negative,
-+	 B is the range when the step is positive.
-+
-+	 When checking for general overlap, we need to test whether
-+	 the range:
-+
-+	   [min1 + low_offset1, min2 + high_offset1 + idx_access1 - 1]
-+
-+	 overlaps:
-+
-+	   [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
-+
-+	 where:
-+
-+	    low_offsetN = +ve step ? 0 : -idx_lenN;
-+	   high_offsetN = +ve step ? idx_lenN : 0;
-+
-+	 This is equivalent to testing whether:
-+
-+	   min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
-+	   && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
-+
-+	 Converting this into a single test, there is an overlap if:
-+
-+	   0 <= min2 - min1 + bias <= limit
-+
-+	 where  bias = high_offset2 + idx_access2 - 1 - low_offset1
-+	       limit = (high_offset1 - low_offset1 + idx_access1 - 1)
-+		     + (high_offset2 - low_offset2 + idx_access2 - 1)
-+	  i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
-+
-+	 Combining the tests requires limit to be computable in an unsigned
-+	 form of the index type; if it isn't, we fall back to the usual
-+	 pointer-based checks.
-+
-+	 We can do better if DR_B is a write and if DR_A and DR_B are
-+	 well-ordered in both the original and the new code (see the
-+	 comment above the DR_ALIAS_* flags for details).  In this case
-+	 we know that for each i in [0, n-1], the write performed by
-+	 access i of DR_B occurs after access numbers j<=i of DR_A in
-+	 both the original and the new code.  Any write or anti
-+	 dependencies wrt those DR_A accesses are therefore maintained.
-+
-+	 We just need to make sure that each individual write in DR_B does not
-+	 overlap any higher-indexed access in DR_A; such DR_A accesses happen
-+	 after the DR_B access in the original code but happen before it in
-+	 the new code.
-+
-+	 We know the steps for both accesses are equal, so by induction, we
-+	 just need to test whether the first write of DR_B overlaps a later
-+	 access of DR_A.  In other words, we need to move min1 along by
-+	 one iteration:
-+
-+	   min1' = min1 + idx_step
-+
-+	 and use the ranges:
-+
-+	   [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
-+
-+	 and:
-+
-+	   [min2, min2 + idx_access2 - 1]
-+
-+	 where:
-+
-+	    low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
-+	   high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0.  */
-+      if (waw_or_war_p)
-+	idx_len1 -= abs_idx_step;
-+
-+      poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1;
-+      if (!waw_or_war_p)
-+	limit += idx_len2;
-+
-+      tree utype = unsigned_type_for (TREE_TYPE (min1));
-+      if (!wi::fits_to_tree_p (limit, utype))
-+	return false;
-+
-+      poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
-+      poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2;
-+      poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
-+      /* Equivalent to adding IDX_STEP to MIN1.  */
-+      if (waw_or_war_p)
-+	bias -= wi::to_offset (idx_step);
-+
-+      tree subject = fold_build2 (MINUS_EXPR, utype,
-+				  fold_convert (utype, min2),
-+				  fold_convert (utype, min1));
-+      subject = fold_build2 (PLUS_EXPR, utype, subject,
-+			     wide_int_to_tree (utype, bias));
-+      tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
-+					 wide_int_to_tree (utype, limit));
-       if (*cond_expr)
- 	*cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
- 				  *cond_expr, part_cond_expr);
-       else
- 	*cond_expr = part_cond_expr;
-     }
-+  if (dump_enabled_p ())
-+    {
-+      if (waw_or_war_p)
-+	dump_printf (MSG_NOTE, "using an index-based WAR/WAW test\n");
-+      else
-+	dump_printf (MSG_NOTE, "using an index-based overlap test\n");
-+    }
-+  return true;
-+}
-+
-+/* A subroutine of create_intersect_range_checks, with a subset of the
-+   same arguments.  Try to optimize cases in which the second access
-+   is a write and in which some overlap is valid.  */
-+
-+static bool
-+create_waw_or_war_checks (tree *cond_expr,
-+			  const dr_with_seg_len_pair_t &alias_pair)
-+{
-+  const dr_with_seg_len& dr_a = alias_pair.first;
-+  const dr_with_seg_len& dr_b = alias_pair.second;
-+
-+  /* Check for cases in which:
-+
-+     (a) DR_B is always a write;
-+     (b) the accesses are well-ordered in both the original and new code
-+	 (see the comment above the DR_ALIAS_* flags for details); and
-+     (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR.  */
-+  if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
-+    return false;
-+
-+  /* Check for equal (but possibly variable) steps.  */
-+  tree step = DR_STEP (dr_a.dr);
-+  if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
-+    return false;
-+
-+  /* Make sure that we can operate on sizetype without loss of precision.  */
-+  tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
-+  if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
-+    return false;
-+
-+  /* All addresses involved are known to have a common alignment ALIGN.
-+     We can therefore subtract ALIGN from an exclusive endpoint to get
-+     an inclusive endpoint.  In the best (and common) case, ALIGN is the
-+     same as the access sizes of both DRs, and so subtracting ALIGN
-+     cancels out the addition of an access size.  */
-+  unsigned int align = MIN (dr_a.align, dr_b.align);
-+  poly_uint64 last_chunk_a = dr_a.access_size - align;
-+  poly_uint64 last_chunk_b = dr_b.access_size - align;
-+
-+  /* Get a boolean expression that is true when the step is negative.  */
-+  tree indicator = dr_direction_indicator (dr_a.dr);
-+  tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
-+			       fold_convert (ssizetype, indicator),
-+			       ssize_int (0));
-+
-+  /* Get lengths in sizetype.  */
-+  tree seg_len_a
-+    = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len));
-+  step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step));
-+
-+  /* Each access has the following pattern:
-+
-+	  <- |seg_len| ->
-+	  <--- A: -ve step --->
-+	  +-----+-------+-----+-------+-----+
-+	  | n-1 | ..... |  0  | ..... | n-1 |
-+	  +-----+-------+-----+-------+-----+
-+			<--- B: +ve step --->
-+			<- |seg_len| ->
-+			|
-+		   base address
-+
-+     where "n" is the number of scalar iterations covered by the segment.
-+
-+     A is the range of bytes accessed when the step is negative,
-+     B is the range when the step is positive.
-+
-+     We know that DR_B is a write.  We also know (from checking that
-+     DR_A and DR_B are well-ordered) that for each i in [0, n-1],
-+     the write performed by access i of DR_B occurs after access numbers
-+     j<=i of DR_A in both the original and the new code.  Any write or
-+     anti dependencies wrt those DR_A accesses are therefore maintained.
-+
-+     We just need to make sure that each individual write in DR_B does not
-+     overlap any higher-indexed access in DR_A; such DR_A accesses happen
-+     after the DR_B access in the original code but happen before it in
-+     the new code.
-+
-+     We know the steps for both accesses are equal, so by induction, we
-+     just need to test whether the first write of DR_B overlaps a later
-+     access of DR_A.  In other words, we need to move addr_a along by
-+     one iteration:
-+
-+       addr_a' = addr_a + step
-+
-+     and check whether:
-+
-+       [addr_b, addr_b + last_chunk_b]
-+
-+     overlaps:
-+
-+       [addr_a' + low_offset_a, addr_a' + high_offset_a + last_chunk_a]
-+
-+     where [low_offset_a, high_offset_a] spans accesses [1, n-1].  I.e.:
-+
-+	low_offset_a = +ve step ? 0 : seg_len_a - step
-+       high_offset_a = +ve step ? seg_len_a - step : 0
-+
-+     This is equivalent to testing whether:
-+
-+       addr_a' + low_offset_a <= addr_b + last_chunk_b
-+       && addr_b <= addr_a' + high_offset_a + last_chunk_a
-+
-+     Converting this into a single test, there is an overlap if:
-+
-+       0 <= addr_b + last_chunk_b - addr_a' - low_offset_a <= limit
-+
-+     where limit = high_offset_a - low_offset_a + last_chunk_a + last_chunk_b
-+
-+     If DR_A is performed, limit + |step| - last_chunk_b is known to be
-+     less than the size of the object underlying DR_A.  We also know
-+     that last_chunk_b <= |step|; this is checked elsewhere if it isn't
-+     guaranteed at compile time.  There can therefore be no overflow if
-+     "limit" is calculated in an unsigned type with pointer precision.  */
-+  tree addr_a = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr),
-+					 DR_OFFSET (dr_a.dr));
-+  addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
-+
-+  tree addr_b = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr),
-+					 DR_OFFSET (dr_b.dr));
-+  addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
-+
-+  /* Advance ADDR_A by one iteration and adjust the length to compensate.  */
-+  addr_a = fold_build_pointer_plus (addr_a, step);
-+  tree seg_len_a_minus_step = fold_build2 (MINUS_EXPR, sizetype,
-+					   seg_len_a, step);
-+  if (!CONSTANT_CLASS_P (seg_len_a_minus_step))
-+    seg_len_a_minus_step = build1 (SAVE_EXPR, sizetype, seg_len_a_minus_step);
-+
-+  tree low_offset_a = fold_build3 (COND_EXPR, sizetype, neg_step,
-+				   seg_len_a_minus_step, size_zero_node);
-+  if (!CONSTANT_CLASS_P (low_offset_a))
-+    low_offset_a = build1 (SAVE_EXPR, sizetype, low_offset_a);
-+
-+  /* We could use COND_EXPR ,
-+     but it's usually more efficient to reuse the LOW_OFFSET_A result.  */
-+  tree high_offset_a = fold_build2 (MINUS_EXPR, sizetype, seg_len_a_minus_step,
-+				    low_offset_a);
-+
-+  /* The amount added to addr_b - addr_a'.  */
-+  tree bias = fold_build2 (MINUS_EXPR, sizetype,
-+			   size_int (last_chunk_b), low_offset_a);
-+
-+  tree limit = fold_build2 (MINUS_EXPR, sizetype, high_offset_a, low_offset_a);
-+  limit = fold_build2 (PLUS_EXPR, sizetype, limit,
-+		       size_int (last_chunk_a + last_chunk_b));
-+
-+  tree subject = fold_build2 (POINTER_DIFF_EXPR, ssizetype, addr_b, addr_a);
-+  subject = fold_build2 (PLUS_EXPR, sizetype,
-+			 fold_convert (sizetype, subject), bias);
-+
-+  *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit);
-+  if (dump_enabled_p ())
-+    dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n");
-   return true;
- }
- 
-@@ -1866,24 +2235,29 @@ get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
-   *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
- }
- 
--/* Given two data references and segment lengths described by DR_A and DR_B,
--   create expression checking if the two addresses ranges intersect with
--   each other:
-+/* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases,
-+   storing the condition in *COND_EXPR.  The fallback is to generate a
-+   a test that the two accesses do not overlap:
- 
--     ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0)
--     || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0))  */
-+     end_a <= start_b || end_b <= start_a.  */
- 
- static void
- create_intersect_range_checks (struct loop *loop, tree *cond_expr,
--			       const dr_with_seg_len& dr_a,
--			       const dr_with_seg_len& dr_b)
-+			       const dr_with_seg_len_pair_t &alias_pair)
- {
-+  const dr_with_seg_len& dr_a = alias_pair.first;
-+  const dr_with_seg_len& dr_b = alias_pair.second;
-   *cond_expr = NULL_TREE;
--  if (create_intersect_range_checks_index (loop, cond_expr, dr_a, dr_b))
-+  if (create_intersect_range_checks_index (loop, cond_expr, alias_pair))
-+    return;
-+
-+  if (create_waw_or_war_checks (cond_expr, alias_pair))
-     return;
- 
-   unsigned HOST_WIDE_INT min_align;
-   tree_code cmp_code;
-+  /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions
-+     are equivalent.  This is just an optimization heuristic.  */
-   if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
-       && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
-     {
-@@ -1924,6 +2298,8 @@ create_intersect_range_checks (struct loop *loop, tree *cond_expr,
-     = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
- 	fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
- 	fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
-+  if (dump_enabled_p ())
-+    dump_printf (MSG_NOTE, "using an address-based overlap test\n");
- }
- 
- /* Create a conditional expression that represents the run-time checks for
-@@ -1940,18 +2316,19 @@ create_runtime_alias_checks (struct loop *loop,
-   tree part_cond_expr;
- 
-   fold_defer_overflow_warnings ();
--  for (size_t i = 0, s = alias_pairs->length (); i < s; ++i)
-+  dr_with_seg_len_pair_t *alias_pair;
-+  unsigned int i;
-+  FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
-     {
--      const dr_with_seg_len& dr_a = (*alias_pairs)[i].first;
--      const dr_with_seg_len& dr_b = (*alias_pairs)[i].second;
--
-+      gcc_assert (alias_pair->flags);
-       if (dump_enabled_p ())
- 	dump_printf (MSG_NOTE,
- 		     "create runtime check for data references %T and %T\n",
--		     DR_REF (dr_a.dr), DR_REF (dr_b.dr));
-+		     DR_REF (alias_pair->first.dr),
-+		     DR_REF (alias_pair->second.dr));
- 
-       /* Create condition expression for each pair data references.  */
--      create_intersect_range_checks (loop, &part_cond_expr, dr_a, dr_b);
-+      create_intersect_range_checks (loop, &part_cond_expr, *alias_pair);
-       if (*cond_expr)
- 	*cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
- 				  *cond_expr, part_cond_expr);
-diff --git a/gcc/tree-data-ref.h b/gcc/tree-data-ref.h
-index 70cbb03b49c..9cb48a2ea3e 100644
---- a/gcc/tree-data-ref.h
-+++ b/gcc/tree-data-ref.h
-@@ -221,19 +221,113 @@ struct dr_with_seg_len
-   unsigned int align;
- };
- 
-+/* Flags that describe a potential alias between two dr_with_seg_lens.
-+   In general, each pair of dr_with_seg_lens represents a composite of
-+   multiple access pairs P, so testing flags like DR_IS_READ on the DRs
-+   does not give meaningful information.
-+
-+   DR_ALIAS_RAW:
-+	There is a pair in P for which the second reference is a read
-+	and the first is a write.
-+
-+   DR_ALIAS_WAR:
-+	There is a pair in P for which the second reference is a write
-+	and the first is a read.
-+
-+   DR_ALIAS_WAW:
-+	There is a pair in P for which both references are writes.
-+
-+   DR_ALIAS_ARBITRARY:
-+	Either
-+	(a) it isn't possible to classify one pair in P as RAW, WAW or WAR; or
-+	(b) there is a pair in P that breaks the ordering assumption below.
-+
-+	This flag overrides the RAW, WAR and WAW flags above.
-+
-+   DR_ALIAS_UNSWAPPED:
-+   DR_ALIAS_SWAPPED:
-+	Temporary flags that indicate whether there is a pair P whose
-+	DRs have or haven't been swapped around.
-+
-+   DR_ALIAS_MIXED_STEPS:
-+	The DR_STEP for one of the data references in the pair does not
-+	accurately describe that reference for all members of P.  (Note
-+	that the flag does not say anything about whether the DR_STEPs
-+	of the two references in the pair are the same.)
-+
-+   The ordering assumption mentioned above is that for every pair
-+   (DR_A, DR_B) in P:
-+
-+   (1) The original code accesses n elements for DR_A and n elements for DR_B,
-+       interleaved as follows:
-+
-+	 one access of size DR_A.access_size at DR_A.dr
-+	 one access of size DR_B.access_size at DR_B.dr
-+	 one access of size DR_A.access_size at DR_A.dr + STEP_A
-+	 one access of size DR_B.access_size at DR_B.dr + STEP_B
-+	 one access of size DR_A.access_size at DR_A.dr + STEP_A * 2
-+	 one access of size DR_B.access_size at DR_B.dr + STEP_B * 2
-+	 ...
-+
-+   (2) The new code accesses the same data in exactly two chunks:
-+
-+	 one group of accesses spanning |DR_A.seg_len| + DR_A.access_size
-+	 one group of accesses spanning |DR_B.seg_len| + DR_B.access_size
-+
-+   A pair might break this assumption if the DR_A and DR_B accesses
-+   in the original or the new code are mingled in some way.  For example,
-+   if DR_A.access_size represents the effect of two individual writes
-+   to nearby locations, the pair breaks the assumption if those writes
-+   occur either side of the access for DR_B.
-+
-+   Note that DR_ALIAS_ARBITRARY describes whether the ordering assumption
-+   fails to hold for any individual pair in P.  If the assumption *does*
-+   hold for every pair in P, it doesn't matter whether it holds for the
-+   composite pair or not.  In other words, P should represent the complete
-+   set of pairs that the composite pair is testing, so only the ordering
-+   of two accesses in the same member of P matters.  */
-+const unsigned int DR_ALIAS_RAW = 1U << 0;
-+const unsigned int DR_ALIAS_WAR = 1U << 1;
-+const unsigned int DR_ALIAS_WAW = 1U << 2;
-+const unsigned int DR_ALIAS_ARBITRARY = 1U << 3;
-+const unsigned int DR_ALIAS_SWAPPED = 1U << 4;
-+const unsigned int DR_ALIAS_UNSWAPPED = 1U << 5;
-+const unsigned int DR_ALIAS_MIXED_STEPS = 1U << 6;
-+
- /* This struct contains two dr_with_seg_len objects with aliasing data
-    refs.  Two comparisons are generated from them.  */
- 
- struct dr_with_seg_len_pair_t
- {
--  dr_with_seg_len_pair_t (const dr_with_seg_len& d1,
--			       const dr_with_seg_len& d2)
--    : first (d1), second (d2) {}
-+  /* WELL_ORDERED indicates that the ordering assumption described above
-+     DR_ALIAS_ARBITRARY holds.  REORDERED indicates that it doesn't.  */
-+  enum sequencing { WELL_ORDERED, REORDERED };
-+
-+  dr_with_seg_len_pair_t (const dr_with_seg_len &,
-+			  const dr_with_seg_len &, sequencing);
- 
-   dr_with_seg_len first;
-   dr_with_seg_len second;
-+  unsigned int flags;
- };
- 
-+inline dr_with_seg_len_pair_t::
-+dr_with_seg_len_pair_t (const dr_with_seg_len &d1, const dr_with_seg_len &d2,
-+			sequencing seq)
-+  : first (d1), second (d2), flags (0)
-+{
-+  if (DR_IS_READ (d1.dr) && DR_IS_WRITE (d2.dr))
-+    flags |= DR_ALIAS_WAR;
-+  else if (DR_IS_WRITE (d1.dr) && DR_IS_READ (d2.dr))
-+    flags |= DR_ALIAS_RAW;
-+  else if (DR_IS_WRITE (d1.dr) && DR_IS_WRITE (d2.dr))
-+    flags |= DR_ALIAS_WAW;
-+  else
-+    gcc_unreachable ();
-+  if (seq == REORDERED)
-+    flags |= DR_ALIAS_ARBITRARY;
-+}
-+
- enum data_dependence_direction {
-   dir_positive,
-   dir_negative,
-diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c
-index 2780a4b243f..bd946e14eb6 100644
---- a/gcc/tree-if-conv.c
-+++ b/gcc/tree-if-conv.c
-@@ -120,6 +120,7 @@ along with GCC; see the file COPYING3.  If not see
- #include "fold-const.h"
- #include "tree-ssa-sccvn.h"
- #include "tree-cfgcleanup.h"
-+#include "tree-ssa-dse.h"
- 
- /* Only handle PHIs with no more arguments unless we are asked to by
-    simd pragma.  */
-@@ -2884,7 +2885,7 @@ ifcvt_split_critical_edges (struct loop *loop, bool aggressive_if_conv)
-    loop vectorization.  */
- 
- static void
--ifcvt_local_dce (basic_block bb)
-+ifcvt_local_dce (class loop *loop)
- {
-   gimple *stmt;
-   gimple *stmt1;
-@@ -2901,6 +2902,10 @@ ifcvt_local_dce (basic_block bb)
-     replace_uses_by (name_pair->first, name_pair->second);
-   redundant_ssa_names.release ();
- 
-+  /* The loop has a single BB only.  */
-+  basic_block bb = loop->header;
-+  tree latch_vdef = NULL_TREE;
-+
-   worklist.create (64);
-   /* Consider all phi as live statements.  */
-   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
-@@ -2908,6 +2913,8 @@ ifcvt_local_dce (basic_block bb)
-       phi = gsi_stmt (gsi);
-       gimple_set_plf (phi, GF_PLF_2, true);
-       worklist.safe_push (phi);
-+      if (virtual_operand_p (gimple_phi_result (phi)))
-+	latch_vdef = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
-     }
-   /* Consider load/store statements, CALL and COND as live.  */
-   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
-@@ -2971,6 +2978,19 @@ ifcvt_local_dce (basic_block bb)
-   while (!gsi_end_p (gsi))
-     {
-       stmt = gsi_stmt (gsi);
-+      if (gimple_store_p (stmt))
-+	{
-+	  tree lhs = gimple_get_lhs (stmt);
-+	  ao_ref write;
-+	  ao_ref_init (&write, lhs);
-+
-+          if (dse_classify_store (&write, stmt, false, NULL, NULL, latch_vdef)
-+              == DSE_STORE_DEAD)
-+            delete_dead_or_redundant_assignment (&gsi, "dead");
-+	  gsi_next (&gsi);
-+	  continue;
-+	}
-+
-       if (gimple_plf (stmt, GF_PLF_2))
- 	{
- 	  gsi_next (&gsi);
-@@ -3071,9 +3091,6 @@ tree_if_conversion (struct loop *loop, vec *preds)
-      on-the-fly.  */
-   combine_blocks (loop);
- 
--  /* Delete dead predicate computations.  */
--  ifcvt_local_dce (loop->header);
--
-   /* Perform local CSE, this esp. helps the vectorizer analysis if loads
-      and stores are involved.  CSE only the loop body, not the entry
-      PHIs, those are to be kept in sync with the non-if-converted copy.
-@@ -3082,6 +3099,9 @@ tree_if_conversion (struct loop *loop, vec *preds)
-   bitmap_set_bit (exit_bbs, single_exit (loop)->dest->index);
-   bitmap_set_bit (exit_bbs, loop->latch->index);
-   todo |= do_rpo_vn (cfun, loop_preheader_edge (loop), exit_bbs);
-+
-+  /* Delete dead predicate computations.  */
-+  ifcvt_local_dce (loop);
-   BITMAP_FREE (exit_bbs);
- 
-   todo |= TODO_cleanup_cfg;
-diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c
-index d115fcb1a5b..2fbcd6e3e46 100644
---- a/gcc/tree-inline.c
-+++ b/gcc/tree-inline.c
-@@ -6201,11 +6201,11 @@ tree_function_versioning (tree old_decl, tree new_decl,
- 	     in the debug info that var (whole DECL_ORIGIN is the parm
- 	     PARM_DECL) is optimized away, but could be looked up at the
- 	     call site as value of D#X there.  */
--	  tree var = vars, vexpr;
-+	  tree vexpr;
- 	  gimple_stmt_iterator cgsi
- 	    = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
- 	  gimple *def_temp;
--	  var = vars;
-+	  tree var = vars;
- 	  i = vec_safe_length (*debug_args);
- 	  do
- 	    {
-diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c
-index 8959f52a67b..a002bcd57b2 100644
---- a/gcc/tree-loop-distribution.c
-+++ b/gcc/tree-loop-distribution.c
-@@ -2445,12 +2445,6 @@ compute_alias_check_pairs (struct loop *loop, vec *alias_ddrs,
-       struct data_reference *dr_a = DDR_A (ddr);
-       struct data_reference *dr_b = DDR_B (ddr);
-       tree seg_length_a, seg_length_b;
--      int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
--					    DR_BASE_ADDRESS (dr_b));
--
--      if (comp_res == 0)
--	comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
--      gcc_assert (comp_res != 0);
- 
-       if (latch_dominated_by_data_ref (loop, dr_a))
- 	seg_length_a = data_ref_segment_size (dr_a, niters_plus_one);
-@@ -2471,11 +2465,9 @@ compute_alias_check_pairs (struct loop *loop, vec *alias_ddrs,
- 
-       dr_with_seg_len_pair_t dr_with_seg_len_pair
- 	(dr_with_seg_len (dr_a, seg_length_a, access_size_a, align_a),
--	 dr_with_seg_len (dr_b, seg_length_b, access_size_b, align_b));
--
--      /* Canonicalize pairs by sorting the two DR members.  */
--      if (comp_res > 0)
--	std::swap (dr_with_seg_len_pair.first, dr_with_seg_len_pair.second);
-+	 dr_with_seg_len (dr_b, seg_length_b, access_size_b, align_b),
-+	 /* ??? Would WELL_ORDERED be safe?  */
-+	 dr_with_seg_len_pair_t::REORDERED);
- 
-       comp_alias_pairs->safe_push (dr_with_seg_len_pair);
-     }
-diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c
-index dad6e2884db..e841da66db5 100644
---- a/gcc/tree-parloops.c
-+++ b/gcc/tree-parloops.c
-@@ -88,7 +88,8 @@ along with GCC; see the file COPYING3.  If not see
-    More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC  */
- /*
-   Reduction handling:
--  currently we use vect_force_simple_reduction() to detect reduction patterns.
-+  currently we use code inspired by vect_force_simple_reduction to detect
-+  reduction patterns.
-   The code transformation will be introduced by an example.
- 
- 
-@@ -182,6 +183,717 @@ parloop
- 
- */
- 
-+/* Error reporting helper for parloops_is_simple_reduction below.  GIMPLE
-+   statement STMT is printed with a message MSG. */
-+
-+static void
-+report_ploop_op (dump_flags_t msg_type, gimple *stmt, const char *msg)
-+{
-+  dump_printf_loc (msg_type, vect_location, "%s%G", msg, stmt);
-+}
-+
-+/* DEF_STMT_INFO occurs in a loop that contains a potential reduction
-+   operation.  Return true if the results of DEF_STMT_INFO are something
-+   that can be accumulated by such a reduction.  */
-+
-+static bool
-+parloops_valid_reduction_input_p (stmt_vec_info def_stmt_info)
-+{
-+  return (is_gimple_assign (def_stmt_info->stmt)
-+	  || is_gimple_call (def_stmt_info->stmt)
-+	  || STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_induction_def
-+	  || (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI
-+	      && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def
-+	      && !is_loop_header_bb_p (gimple_bb (def_stmt_info->stmt))));
-+}
-+
-+/* Detect SLP reduction of the form:
-+
-+   #a1 = phi 
-+   a2 = operation (a1)
-+   a3 = operation (a2)
-+   a4 = operation (a3)
-+   a5 = operation (a4)
-+
-+   #a = phi 
-+
-+   PHI is the reduction phi node (#a1 = phi  above)
-+   FIRST_STMT is the first reduction stmt in the chain
-+   (a2 = operation (a1)).
-+
-+   Return TRUE if a reduction chain was detected.  */
-+
-+static bool
-+parloops_is_slp_reduction (loop_vec_info loop_info, gimple *phi,
-+			   gimple *first_stmt)
-+{
-+  class loop *loop = (gimple_bb (phi))->loop_father;
-+  class loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
-+  enum tree_code code;
-+  gimple *loop_use_stmt = NULL;
-+  stmt_vec_info use_stmt_info;
-+  tree lhs;
-+  imm_use_iterator imm_iter;
-+  use_operand_p use_p;
-+  int nloop_uses, size = 0, n_out_of_loop_uses;
-+  bool found = false;
-+
-+  if (loop != vect_loop)
-+    return false;
-+
-+  auto_vec reduc_chain;
-+  lhs = PHI_RESULT (phi);
-+  code = gimple_assign_rhs_code (first_stmt);
-+  while (1)
-+    {
-+      nloop_uses = 0;
-+      n_out_of_loop_uses = 0;
-+      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
-+        {
-+	  gimple *use_stmt = USE_STMT (use_p);
-+	  if (is_gimple_debug (use_stmt))
-+	    continue;
-+
-+          /* Check if we got back to the reduction phi.  */
-+	  if (use_stmt == phi)
-+            {
-+	      loop_use_stmt = use_stmt;
-+              found = true;
-+              break;
-+            }
-+
-+          if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
-+            {
-+	      loop_use_stmt = use_stmt;
-+	      nloop_uses++;
-+            }
-+           else
-+             n_out_of_loop_uses++;
-+
-+           /* There are can be either a single use in the loop or two uses in
-+              phi nodes.  */
-+           if (nloop_uses > 1 || (n_out_of_loop_uses && nloop_uses))
-+             return false;
-+        }
-+
-+      if (found)
-+        break;
-+
-+      /* We reached a statement with no loop uses.  */
-+      if (nloop_uses == 0)
-+	return false;
-+
-+      /* This is a loop exit phi, and we haven't reached the reduction phi.  */
-+      if (gimple_code (loop_use_stmt) == GIMPLE_PHI)
-+        return false;
-+
-+      if (!is_gimple_assign (loop_use_stmt)
-+	  || code != gimple_assign_rhs_code (loop_use_stmt)
-+	  || !flow_bb_inside_loop_p (loop, gimple_bb (loop_use_stmt)))
-+        return false;
-+
-+      /* Insert USE_STMT into reduction chain.  */
-+      use_stmt_info = loop_info->lookup_stmt (loop_use_stmt);
-+      reduc_chain.safe_push (use_stmt_info);
-+
-+      lhs = gimple_assign_lhs (loop_use_stmt);
-+      size++;
-+   }
-+
-+  if (!found || loop_use_stmt != phi || size < 2)
-+    return false;
-+
-+  /* Swap the operands, if needed, to make the reduction operand be the second
-+     operand.  */
-+  lhs = PHI_RESULT (phi);
-+  for (unsigned i = 0; i < reduc_chain.length (); ++i)
-+    {
-+      gassign *next_stmt = as_a  (reduc_chain[i]->stmt);
-+      if (gimple_assign_rhs2 (next_stmt) == lhs)
-+	{
-+	  tree op = gimple_assign_rhs1 (next_stmt);
-+	  stmt_vec_info def_stmt_info = loop_info->lookup_def (op);
-+
-+	  /* Check that the other def is either defined in the loop
-+	     ("vect_internal_def"), or it's an induction (defined by a
-+	     loop-header phi-node).  */
-+	  if (def_stmt_info
-+	      && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt))
-+	      && parloops_valid_reduction_input_p (def_stmt_info))
-+	    {
-+	      lhs = gimple_assign_lhs (next_stmt);
-+	      continue;
-+	    }
-+
-+	  return false;
-+	}
-+      else
-+	{
-+          tree op = gimple_assign_rhs2 (next_stmt);
-+	  stmt_vec_info def_stmt_info = loop_info->lookup_def (op);
-+
-+          /* Check that the other def is either defined in the loop
-+            ("vect_internal_def"), or it's an induction (defined by a
-+            loop-header phi-node).  */
-+	  if (def_stmt_info
-+	      && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt))
-+	      && parloops_valid_reduction_input_p (def_stmt_info))
-+	    {
-+	      if (dump_enabled_p ())
-+		dump_printf_loc (MSG_NOTE, vect_location, "swapping oprnds: %G",
-+				 next_stmt);
-+
-+	      swap_ssa_operands (next_stmt,
-+				 gimple_assign_rhs1_ptr (next_stmt),
-+                                 gimple_assign_rhs2_ptr (next_stmt));
-+	      update_stmt (next_stmt);
-+
-+	      if (CONSTANT_CLASS_P (gimple_assign_rhs1 (next_stmt)))
-+		LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
-+	    }
-+	  else
-+	    return false;
-+        }
-+
-+      lhs = gimple_assign_lhs (next_stmt);
-+    }
-+
-+  /* Build up the actual chain.  */
-+  for (unsigned i = 0; i < reduc_chain.length () - 1; ++i)
-+    {
-+      REDUC_GROUP_FIRST_ELEMENT (reduc_chain[i]) = reduc_chain[0];
-+      REDUC_GROUP_NEXT_ELEMENT (reduc_chain[i]) = reduc_chain[i+1];
-+    }
-+  REDUC_GROUP_FIRST_ELEMENT (reduc_chain.last ()) = reduc_chain[0];
-+  REDUC_GROUP_NEXT_ELEMENT (reduc_chain.last ()) = NULL;
-+
-+  /* Save the chain for further analysis in SLP detection.  */
-+  LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push (reduc_chain[0]);
-+  REDUC_GROUP_SIZE (reduc_chain[0]) = size;
-+
-+  return true;
-+}
-+
-+/* Return true if we need an in-order reduction for operation CODE
-+   on type TYPE.  NEED_WRAPPING_INTEGRAL_OVERFLOW is true if integer
-+   overflow must wrap.  */
-+
-+static bool
-+parloops_needs_fold_left_reduction_p (tree type, tree_code code,
-+				      bool need_wrapping_integral_overflow)
-+{
-+  /* CHECKME: check for !flag_finite_math_only too?  */
-+  if (SCALAR_FLOAT_TYPE_P (type))
-+    switch (code)
-+      {
-+      case MIN_EXPR:
-+      case MAX_EXPR:
-+	return false;
-+
-+      default:
-+	return !flag_associative_math;
-+      }
-+
-+  if (INTEGRAL_TYPE_P (type))
-+    {
-+      if (!operation_no_trapping_overflow (type, code))
-+	return true;
-+      if (need_wrapping_integral_overflow
-+	  && !TYPE_OVERFLOW_WRAPS (type)
-+	  && operation_can_overflow (code))
-+	return true;
-+      return false;
-+    }
-+
-+  if (SAT_FIXED_POINT_TYPE_P (type))
-+    return true;
-+
-+  return false;
-+}
-+
-+
-+/* Function parloops_is_simple_reduction
-+
-+   (1) Detect a cross-iteration def-use cycle that represents a simple
-+   reduction computation.  We look for the following pattern:
-+
-+   loop_header:
-+     a1 = phi < a0, a2 >
-+     a3 = ...
-+     a2 = operation (a3, a1)
-+
-+   or
-+
-+   a3 = ...
-+   loop_header:
-+     a1 = phi < a0, a2 >
-+     a2 = operation (a3, a1)
-+
-+   such that:
-+   1. operation is commutative and associative and it is safe to
-+      change the order of the computation
-+   2. no uses for a2 in the loop (a2 is used out of the loop)
-+   3. no uses of a1 in the loop besides the reduction operation
-+   4. no uses of a1 outside the loop.
-+
-+   Conditions 1,4 are tested here.
-+   Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
-+
-+   (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
-+   nested cycles.
-+
-+   (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
-+   reductions:
-+
-+     a1 = phi < a0, a2 >
-+     inner loop (def of a3)
-+     a2 = phi < a3 >
-+
-+   (4) Detect condition expressions, ie:
-+     for (int i = 0; i < N; i++)
-+       if (a[i] < val)
-+	ret_val = a[i];
-+
-+*/
-+
-+static stmt_vec_info
-+parloops_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
-+			  bool *double_reduc,
-+			  bool need_wrapping_integral_overflow,
-+			  enum vect_reduction_type *v_reduc_type)
-+{
-+  gphi *phi = as_a  (phi_info->stmt);
-+  class loop *loop = (gimple_bb (phi))->loop_father;
-+  class loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
-+  bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop);
-+  gimple *phi_use_stmt = NULL;
-+  enum tree_code orig_code, code;
-+  tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
-+  tree type;
-+  tree name;
-+  imm_use_iterator imm_iter;
-+  use_operand_p use_p;
-+  bool phi_def;
-+
-+  *double_reduc = false;
-+  *v_reduc_type = TREE_CODE_REDUCTION;
-+
-+  tree phi_name = PHI_RESULT (phi);
-+  /* ???  If there are no uses of the PHI result the inner loop reduction
-+     won't be detected as possibly double-reduction by vectorizable_reduction
-+     because that tries to walk the PHI arg from the preheader edge which
-+     can be constant.  See PR60382.  */
-+  if (has_zero_uses (phi_name))
-+    return NULL;
-+  unsigned nphi_def_loop_uses = 0;
-+  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, phi_name)
-+    {
-+      gimple *use_stmt = USE_STMT (use_p);
-+      if (is_gimple_debug (use_stmt))
-+	continue;
-+
-+      if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
-+        {
-+          if (dump_enabled_p ())
-+	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-+			     "intermediate value used outside loop.\n");
-+
-+          return NULL;
-+        }
-+
-+      nphi_def_loop_uses++;
-+      phi_use_stmt = use_stmt;
-+    }
-+
-+  edge latch_e = loop_latch_edge (loop);
-+  tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
-+  if (TREE_CODE (loop_arg) != SSA_NAME)
-+    {
-+      if (dump_enabled_p ())
-+	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-+			 "reduction: not ssa_name: %T\n", loop_arg);
-+      return NULL;
-+    }
-+
-+  stmt_vec_info def_stmt_info = loop_info->lookup_def (loop_arg);
-+  if (!def_stmt_info
-+      || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt)))
-+    return NULL;
-+
-+  if (gassign *def_stmt = dyn_cast  (def_stmt_info->stmt))
-+    {
-+      name = gimple_assign_lhs (def_stmt);
-+      phi_def = false;
-+    }
-+  else if (gphi *def_stmt = dyn_cast  (def_stmt_info->stmt))
-+    {
-+      name = PHI_RESULT (def_stmt);
-+      phi_def = true;
-+    }
-+  else
-+    {
-+      if (dump_enabled_p ())
-+	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-+			 "reduction: unhandled reduction operation: %G",
-+			 def_stmt_info->stmt);
-+      return NULL;
-+    }
-+
-+  unsigned nlatch_def_loop_uses = 0;
-+  auto_vec lcphis;
-+  bool inner_loop_of_double_reduc = false;
-+  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
-+    {
-+      gimple *use_stmt = USE_STMT (use_p);
-+      if (is_gimple_debug (use_stmt))
-+	continue;
-+      if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
-+	nlatch_def_loop_uses++;
-+      else
-+	{
-+	  /* We can have more than one loop-closed PHI.  */
-+	  lcphis.safe_push (as_a  (use_stmt));
-+	  if (nested_in_vect_loop
-+	      && (STMT_VINFO_DEF_TYPE (loop_info->lookup_stmt (use_stmt))
-+		  == vect_double_reduction_def))
-+	    inner_loop_of_double_reduc = true;
-+	}
-+    }
-+
-+  /* If this isn't a nested cycle or if the nested cycle reduction value
-+     is used ouside of the inner loop we cannot handle uses of the reduction
-+     value.  */
-+  if ((!nested_in_vect_loop || inner_loop_of_double_reduc)
-+      && (nlatch_def_loop_uses > 1 || nphi_def_loop_uses > 1))
-+    {
-+      if (dump_enabled_p ())
-+	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-+			 "reduction used in loop.\n");
-+      return NULL;
-+    }
-+
-+  /* If DEF_STMT is a phi node itself, we expect it to have a single argument
-+     defined in the inner loop.  */
-+  if (phi_def)
-+    {
-+      gphi *def_stmt = as_a  (def_stmt_info->stmt);
-+      op1 = PHI_ARG_DEF (def_stmt, 0);
-+
-+      if (gimple_phi_num_args (def_stmt) != 1
-+          || TREE_CODE (op1) != SSA_NAME)
-+        {
-+          if (dump_enabled_p ())
-+	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-+			     "unsupported phi node definition.\n");
-+
-+          return NULL;
-+        }
-+
-+      gimple *def1 = SSA_NAME_DEF_STMT (op1);
-+      if (gimple_bb (def1)
-+	  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
-+          && loop->inner
-+          && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1))
-+          && is_gimple_assign (def1)
-+	  && is_a  (phi_use_stmt)
-+	  && flow_bb_inside_loop_p (loop->inner, gimple_bb (phi_use_stmt)))
-+        {
-+          if (dump_enabled_p ())
-+            report_ploop_op (MSG_NOTE, def_stmt,
-+			     "detected double reduction: ");
-+
-+          *double_reduc = true;
-+	  return def_stmt_info;
-+        }
-+
-+      return NULL;
-+    }
-+
-+  /* If we are vectorizing an inner reduction we are executing that
-+     in the original order only in case we are not dealing with a
-+     double reduction.  */
-+  bool check_reduction = true;
-+  if (flow_loop_nested_p (vect_loop, loop))
-+    {
-+      gphi *lcphi;
-+      unsigned i;
-+      check_reduction = false;
-+      FOR_EACH_VEC_ELT (lcphis, i, lcphi)
-+	FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result (lcphi))
-+	  {
-+	    gimple *use_stmt = USE_STMT (use_p);
-+	    if (is_gimple_debug (use_stmt))
-+	      continue;
-+	    if (! flow_bb_inside_loop_p (vect_loop, gimple_bb (use_stmt)))
-+	      check_reduction = true;
-+	  }
-+    }
-+
-+  gassign *def_stmt = as_a  (def_stmt_info->stmt);
-+  code = orig_code = gimple_assign_rhs_code (def_stmt);
-+
-+  if (nested_in_vect_loop && !check_reduction)
-+    {
-+      /* FIXME: Even for non-reductions code generation is funneled
-+	 through vectorizable_reduction for the stmt defining the
-+	 PHI latch value.  So we have to artificially restrict ourselves
-+	 for the supported operations.  */
-+      switch (get_gimple_rhs_class (code))
-+	{
-+	case GIMPLE_BINARY_RHS:
-+	case GIMPLE_TERNARY_RHS:
-+	  break;
-+	default:
-+	  /* Not supported by vectorizable_reduction.  */
-+	  if (dump_enabled_p ())
-+	    report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
-+			     "nested cycle: not handled operation: ");
-+	  return NULL;
-+	}
-+      if (dump_enabled_p ())
-+	report_ploop_op (MSG_NOTE, def_stmt, "detected nested cycle: ");
-+      return def_stmt_info;
-+    }
-+
-+  /* We can handle "res -= x[i]", which is non-associative by
-+     simply rewriting this into "res += -x[i]".  Avoid changing
-+     gimple instruction for the first simple tests and only do this
-+     if we're allowed to change code at all.  */
-+  if (code == MINUS_EXPR && gimple_assign_rhs2 (def_stmt) != phi_name)
-+    code = PLUS_EXPR;
-+
-+  if (code == COND_EXPR)
-+    {
-+      if (! nested_in_vect_loop)
-+	*v_reduc_type = COND_REDUCTION;
-+
-+      op3 = gimple_assign_rhs1 (def_stmt);
-+      if (COMPARISON_CLASS_P (op3))
-+        {
-+          op4 = TREE_OPERAND (op3, 1);
-+          op3 = TREE_OPERAND (op3, 0);
-+        }
-+      if (op3 == phi_name || op4 == phi_name)
-+	{
-+	  if (dump_enabled_p ())
-+	    report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
-+			     "reduction: condition depends on previous"
-+			     " iteration: ");
-+	  return NULL;
-+	}
-+
-+      op1 = gimple_assign_rhs2 (def_stmt);
-+      op2 = gimple_assign_rhs3 (def_stmt);
-+    }
-+  else if (!commutative_tree_code (code) || !associative_tree_code (code))
-+    {
-+      if (dump_enabled_p ())
-+	report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
-+			 "reduction: not commutative/associative: ");
-+      return NULL;
-+    }
-+  else if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
-+    {
-+      op1 = gimple_assign_rhs1 (def_stmt);
-+      op2 = gimple_assign_rhs2 (def_stmt);
-+    }
-+  else
-+    {
-+      if (dump_enabled_p ())
-+	report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
-+			 "reduction: not handled operation: ");
-+      return NULL;
-+    }
-+
-+  if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
-+    {
-+      if (dump_enabled_p ())
-+	report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
-+			 "reduction: both uses not ssa_names: ");
-+
-+      return NULL;
-+    }
-+
-+  type = TREE_TYPE (gimple_assign_lhs (def_stmt));
-+  if ((TREE_CODE (op1) == SSA_NAME
-+       && !types_compatible_p (type,TREE_TYPE (op1)))
-+      || (TREE_CODE (op2) == SSA_NAME
-+          && !types_compatible_p (type, TREE_TYPE (op2)))
-+      || (op3 && TREE_CODE (op3) == SSA_NAME
-+          && !types_compatible_p (type, TREE_TYPE (op3)))
-+      || (op4 && TREE_CODE (op4) == SSA_NAME
-+          && !types_compatible_p (type, TREE_TYPE (op4))))
-+    {
-+      if (dump_enabled_p ())
-+        {
-+          dump_printf_loc (MSG_NOTE, vect_location,
-+			   "reduction: multiple types: operation type: "
-+			   "%T, operands types: %T,%T",
-+			   type,  TREE_TYPE (op1), TREE_TYPE (op2));
-+          if (op3)
-+	    dump_printf (MSG_NOTE, ",%T", TREE_TYPE (op3));
-+
-+          if (op4)
-+	    dump_printf (MSG_NOTE, ",%T", TREE_TYPE (op4));
-+          dump_printf (MSG_NOTE, "\n");
-+        }
-+
-+      return NULL;
-+    }
-+
-+  /* Check whether it's ok to change the order of the computation.
-+     Generally, when vectorizing a reduction we change the order of the
-+     computation.  This may change the behavior of the program in some
-+     cases, so we need to check that this is ok.  One exception is when
-+     vectorizing an outer-loop: the inner-loop is executed sequentially,
-+     and therefore vectorizing reductions in the inner-loop during
-+     outer-loop vectorization is safe.  */
-+  if (check_reduction
-+      && *v_reduc_type == TREE_CODE_REDUCTION
-+      && parloops_needs_fold_left_reduction_p (type, code,
-+					       need_wrapping_integral_overflow))
-+    *v_reduc_type = FOLD_LEFT_REDUCTION;
-+
-+  /* Reduction is safe. We're dealing with one of the following:
-+     1) integer arithmetic and no trapv
-+     2) floating point arithmetic, and special flags permit this optimization
-+     3) nested cycle (i.e., outer loop vectorization).  */
-+  stmt_vec_info def1_info = loop_info->lookup_def (op1);
-+  stmt_vec_info def2_info = loop_info->lookup_def (op2);
-+  if (code != COND_EXPR && !def1_info && !def2_info)
-+    {
-+      if (dump_enabled_p ())
-+	report_ploop_op (MSG_NOTE, def_stmt,
-+			 "reduction: no defs for operands: ");
-+      return NULL;
-+    }
-+
-+  /* Check that one def is the reduction def, defined by PHI,
-+     the other def is either defined in the loop ("vect_internal_def"),
-+     or it's an induction (defined by a loop-header phi-node).  */
-+
-+  if (def2_info
-+      && def2_info->stmt == phi
-+      && (code == COND_EXPR
-+	  || !def1_info
-+	  || !flow_bb_inside_loop_p (loop, gimple_bb (def1_info->stmt))
-+	  || parloops_valid_reduction_input_p (def1_info)))
-+    {
-+      if (dump_enabled_p ())
-+	report_ploop_op (MSG_NOTE, def_stmt, "detected reduction: ");
-+      return def_stmt_info;
-+    }
-+
-+  if (def1_info
-+      && def1_info->stmt == phi
-+      && (code == COND_EXPR
-+	  || !def2_info
-+	  || !flow_bb_inside_loop_p (loop, gimple_bb (def2_info->stmt))
-+	  || parloops_valid_reduction_input_p (def2_info)))
-+    {
-+      if (! nested_in_vect_loop && orig_code != MINUS_EXPR)
-+	{
-+	  /* Check if we can swap operands (just for simplicity - so that
-+	     the rest of the code can assume that the reduction variable
-+	     is always the last (second) argument).  */
-+	  if (code == COND_EXPR)
-+	    {
-+	      /* Swap cond_expr by inverting the condition.  */
-+	      tree cond_expr = gimple_assign_rhs1 (def_stmt);
-+	      enum tree_code invert_code = ERROR_MARK;
-+	      enum tree_code cond_code = TREE_CODE (cond_expr);
-+
-+	      if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
-+		{
-+		  bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
-+		  invert_code = invert_tree_comparison (cond_code, honor_nans);
-+		}
-+	      if (invert_code != ERROR_MARK)
-+		{
-+		  TREE_SET_CODE (cond_expr, invert_code);
-+		  swap_ssa_operands (def_stmt,
-+				     gimple_assign_rhs2_ptr (def_stmt),
-+				     gimple_assign_rhs3_ptr (def_stmt));
-+		}
-+	      else
-+		{
-+		  if (dump_enabled_p ())
-+		    report_ploop_op (MSG_NOTE, def_stmt,
-+				     "detected reduction: cannot swap operands "
-+				     "for cond_expr");
-+		  return NULL;
-+		}
-+	    }
-+	  else
-+	    swap_ssa_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
-+			       gimple_assign_rhs2_ptr (def_stmt));
-+
-+	  if (dump_enabled_p ())
-+	    report_ploop_op (MSG_NOTE, def_stmt,
-+			     "detected reduction: need to swap operands: ");
-+
-+	  if (CONSTANT_CLASS_P (gimple_assign_rhs1 (def_stmt)))
-+	    LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
-+        }
-+      else
-+        {
-+          if (dump_enabled_p ())
-+            report_ploop_op (MSG_NOTE, def_stmt, "detected reduction: ");
-+        }
-+
-+      return def_stmt_info;
-+    }
-+
-+  /* Try to find SLP reduction chain.  */
-+  if (! nested_in_vect_loop
-+      && code != COND_EXPR
-+      && orig_code != MINUS_EXPR
-+      && parloops_is_slp_reduction (loop_info, phi, def_stmt))
-+    {
-+      if (dump_enabled_p ())
-+        report_ploop_op (MSG_NOTE, def_stmt,
-+			 "reduction: detected reduction chain: ");
-+
-+      return def_stmt_info;
-+    }
-+
-+  /* Look for the expression computing loop_arg from loop PHI result.  */
-+  if (check_reduction_path (vect_location, loop, phi, loop_arg, code))
-+    return def_stmt_info;
-+
-+  if (dump_enabled_p ())
-+    {
-+      report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
-+		       "reduction: unknown pattern: ");
-+    }
-+
-+  return NULL;
-+}
-+
-+/* Wrapper around vect_is_simple_reduction, which will modify code
-+   in-place if it enables detection of more reductions.  Arguments
-+   as there.  */
-+
-+stmt_vec_info
-+parloops_force_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
-+			     bool *double_reduc,
-+			     bool need_wrapping_integral_overflow)
-+{
-+  enum vect_reduction_type v_reduc_type;
-+  stmt_vec_info def_info
-+    = parloops_is_simple_reduction (loop_info, phi_info, double_reduc,
-+				need_wrapping_integral_overflow,
-+				&v_reduc_type);
-+  if (def_info)
-+    {
-+      STMT_VINFO_REDUC_TYPE (phi_info) = v_reduc_type;
-+      STMT_VINFO_REDUC_DEF (phi_info) = def_info;
-+      STMT_VINFO_REDUC_TYPE (def_info) = v_reduc_type;
-+      STMT_VINFO_REDUC_DEF (def_info) = phi_info;
-+    }
-+  return def_info;
-+}
-+
- /* Minimal number of iterations of a loop that should be executed in each
-    thread.  */
- #define MIN_PER_THREAD PARAM_VALUE (PARAM_PARLOOPS_MIN_PER_THREAD)
-@@ -2614,9 +3326,9 @@ gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list
- 	continue;
- 
-       stmt_vec_info reduc_stmt_info
--	= vect_force_simple_reduction (simple_loop_info,
--				       simple_loop_info->lookup_stmt (phi),
--				       &double_reduc, true);
-+	= parloops_force_simple_reduction (simple_loop_info,
-+					   simple_loop_info->lookup_stmt (phi),
-+					   &double_reduc, true);
-       if (!reduc_stmt_info || !valid_reduction_p (reduc_stmt_info))
- 	continue;
- 
-@@ -2663,9 +3375,9 @@ gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list
- 	      stmt_vec_info inner_phi_info
- 		= simple_loop_info->lookup_stmt (inner_phi);
- 	      stmt_vec_info inner_reduc_stmt_info
--		= vect_force_simple_reduction (simple_loop_info,
--					       inner_phi_info,
--					       &double_reduc, true);
-+		= parloops_force_simple_reduction (simple_loop_info,
-+						   inner_phi_info,
-+						   &double_reduc, true);
- 	      gcc_assert (!double_reduc);
- 	      if (!inner_reduc_stmt_info
- 		  || !valid_reduction_p (inner_reduc_stmt_info))
-diff --git a/gcc/tree-ssa-alias.c b/gcc/tree-ssa-alias.c
-index 01f095382d6..54e8adc8d7c 100644
---- a/gcc/tree-ssa-alias.c
-+++ b/gcc/tree-ssa-alias.c
-@@ -2535,13 +2535,36 @@ stmt_kills_ref_p (gimple *stmt, ao_ref *ref)
- 	  case BUILT_IN_MEMSET_CHK:
- 	  case BUILT_IN_STRNCPY:
- 	  case BUILT_IN_STPNCPY:
-+	  case BUILT_IN_CALLOC:
- 	    {
- 	      /* For a must-alias check we need to be able to constrain
- 		 the access properly.  */
- 	      if (!ref->max_size_known_p ())
- 		return false;
--	      tree dest = gimple_call_arg (stmt, 0);
--	      tree len = gimple_call_arg (stmt, 2);
-+	      tree dest;
-+	      tree len;
-+
-+	      /* In execution order a calloc call will never kill
-+		 anything.  However, DSE will (ab)use this interface
-+		 to ask if a calloc call writes the same memory locations
-+		 as a later assignment, memset, etc.  So handle calloc
-+		 in the expected way.  */
-+	      if (DECL_FUNCTION_CODE (callee) == BUILT_IN_CALLOC)
-+		{
-+		  tree arg0 = gimple_call_arg (stmt, 0);
-+		  tree arg1 = gimple_call_arg (stmt, 1);
-+		  if (TREE_CODE (arg0) != INTEGER_CST
-+		      || TREE_CODE (arg1) != INTEGER_CST)
-+		    return false;
-+
-+		  dest = gimple_call_lhs (stmt);
-+		  len = fold_build2 (MULT_EXPR, TREE_TYPE (arg0), arg0, arg1);
-+		}
-+	      else
-+		{
-+		  dest = gimple_call_arg (stmt, 0);
-+		  len = gimple_call_arg (stmt, 2);
-+		}
- 	      if (!poly_int_tree_p (len))
- 		return false;
- 	      tree rbase = ref->base;
-diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c
-index efe5b31cc0a..c20fbe048ed 100644
---- a/gcc/tree-ssa-dse.c
-+++ b/gcc/tree-ssa-dse.c
-@@ -1,4 +1,4 @@
--/* Dead store elimination
-+/* Dead and redundant store elimination
-    Copyright (C) 2004-2019 Free Software Foundation, Inc.
- 
- This file is part of GCC.
-@@ -36,17 +36,26 @@ along with GCC; see the file COPYING3.  If not see
- #include "params.h"
- #include "alias.h"
- #include "tree-ssa-loop.h"
-+#include "tree-ssa-dse.h"
- 
- /* This file implements dead store elimination.
- 
-    A dead store is a store into a memory location which will later be
-    overwritten by another store without any intervening loads.  In this
--   case the earlier store can be deleted.
-+   case the earlier store can be deleted or trimmed if the store
-+   was partially dead.
-+
-+   A redundant store is a store into a memory location which stores
-+   the exact same value as a prior store to the same memory location.
-+   While this can often be handled by dead store elimination, removing
-+   the redundant store is often better than removing or trimming the
-+   dead store.
- 
-    In our SSA + virtual operand world we use immediate uses of virtual
--   operands to detect dead stores.  If a store's virtual definition
-+   operands to detect these cases.  If a store's virtual definition
-    is used precisely once by a later store to the same location which
--   post dominates the first store, then the first store is dead.
-+   post dominates the first store, then the first store is dead.  If
-+   the data stored is the same, then the second store is redundant.
- 
-    The single use of the store's virtual definition ensures that
-    there are no intervening aliased loads and the requirement that
-@@ -58,7 +67,9 @@ along with GCC; see the file COPYING3.  If not see
-    the point immediately before the later store.  Again, the single
-    use of the virtual definition and the post-dominance relationship
-    ensure that such movement would be safe.  Clearly if there are
--   back to back stores, then the second is redundant.
-+   back to back stores, then the second is makes the first dead.  If
-+   the second store stores the same value, then the second store is
-+   redundant.
- 
-    Reviewing section 10.7.2 in Morgan's "Building an Optimizing Compiler"
-    may also help in understanding this code since it discusses the
-@@ -66,19 +77,13 @@ along with GCC; see the file COPYING3.  If not see
-    fact, they are the same transformation applied to different views of
-    the CFG.  */
- 
-+void delete_dead_or_redundant_assignment (gimple_stmt_iterator *, const char *);
-+static void delete_dead_or_redundant_call (gimple_stmt_iterator *, const char *);
- 
- /* Bitmap of blocks that have had EH statements cleaned.  We should
-    remove their dead edges eventually.  */
- static bitmap need_eh_cleanup;
- 
--/* Return value from dse_classify_store */
--enum dse_store_status
--{
--  DSE_STORE_LIVE,
--  DSE_STORE_MAYBE_PARTIAL_DEAD,
--  DSE_STORE_DEAD
--};
--
- /* STMT is a statement that may write into memory.  Analyze it and
-    initialize WRITE to describe how STMT affects memory.
- 
-@@ -106,6 +111,25 @@ initialize_ao_ref_for_dse (gimple *stmt, ao_ref *write)
- 	      ao_ref_init_from_ptr_and_size (write, ptr, size);
- 	      return true;
- 	    }
-+
-+	  /* A calloc call can never be dead, but it can make
-+	     subsequent stores redundant if they store 0 into
-+	     the same memory locations.  */
-+	  case BUILT_IN_CALLOC:
-+	    {
-+	      tree nelem = gimple_call_arg (stmt, 0);
-+	      tree selem = gimple_call_arg (stmt, 1);
-+	      if (TREE_CODE (nelem) == INTEGER_CST
-+		  && TREE_CODE (selem) == INTEGER_CST)
-+		{
-+		  tree lhs = gimple_call_lhs (stmt);
-+		  tree size = fold_build2 (MULT_EXPR, TREE_TYPE (nelem),
-+					   nelem, selem);
-+		  ao_ref_init_from_ptr_and_size (write, lhs, size);
-+		  return true;
-+		}
-+	    }
-+
- 	  default:
- 	    break;
- 	}
-@@ -551,16 +575,84 @@ check_name (tree, tree *idx, void *data)
-   return true;
- }
- 
-+/* STMT stores the value 0 into one or more memory locations
-+   (via memset, empty constructor, calloc call, etc).
-+
-+   See if there is a subsequent store of the value 0 to one
-+   or more of the same memory location(s).  If so, the subsequent
-+   store is redundant and can be removed.
-+
-+   The subsequent stores could be via memset, empty constructors,
-+   simple MEM stores, etc.  */
-+
-+static void
-+dse_optimize_redundant_stores (gimple *stmt)
-+{
-+  int cnt = 0;
-+
-+  /* We could do something fairly complex and look through PHIs
-+     like DSE_CLASSIFY_STORE, but it doesn't seem to be worth
-+     the effort.
-+
-+     Look at all the immediate uses of the VDEF (which are obviously
-+     dominated by STMT).   See if one or more stores 0 into the same
-+     memory locations a STMT, if so remove the immediate use statements.  */
-+  tree defvar = gimple_vdef (stmt);
-+  imm_use_iterator ui;
-+  gimple *use_stmt;
-+  FOR_EACH_IMM_USE_STMT (use_stmt, ui, defvar)
-+    {
-+      /* Limit stmt walking.  */
-+      if (++cnt > PARAM_VALUE (PARAM_DSE_MAX_ALIAS_QUERIES_PER_STORE))
-+	BREAK_FROM_IMM_USE_STMT (ui);
-+
-+      /* If USE_STMT stores 0 into one or more of the same locations
-+	 as STMT and STMT would kill USE_STMT, then we can just remove
-+	 USE_STMT.  */
-+      tree fndecl;
-+      if ((is_gimple_assign (use_stmt)
-+	   && gimple_vdef (use_stmt)
-+	   && ((gimple_assign_rhs_code (use_stmt) == CONSTRUCTOR
-+	        && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (use_stmt)) == 0
-+	        && !gimple_clobber_p (stmt))
-+	       || (gimple_assign_rhs_code (use_stmt) == INTEGER_CST
-+		   && integer_zerop (gimple_assign_rhs1 (use_stmt)))))
-+	  || (gimple_call_builtin_p (use_stmt, BUILT_IN_NORMAL)
-+	      && (fndecl = gimple_call_fndecl (use_stmt)) != NULL
-+	      && (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_MEMSET
-+		  || DECL_FUNCTION_CODE (fndecl) == BUILT_IN_MEMSET_CHK)
-+	      && integer_zerop (gimple_call_arg (use_stmt, 1))))
-+	{
-+	  ao_ref write;
-+
-+	  if (!initialize_ao_ref_for_dse (use_stmt, &write))
-+	    BREAK_FROM_IMM_USE_STMT (ui)
-+
-+	  if (valid_ao_ref_for_dse (&write)
-+	      && stmt_kills_ref_p (stmt, &write))
-+	    {
-+	      gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
-+	      if (is_gimple_assign (use_stmt))
-+		delete_dead_or_redundant_assignment (&gsi, "redundant");
-+	      else if (is_gimple_call (use_stmt))
-+		delete_dead_or_redundant_call (&gsi, "redundant");
-+	      else
-+		gcc_unreachable ();
-+	    }
-+	}
-+    }
-+}
-+
- /* A helper of dse_optimize_stmt.
-    Given a GIMPLE_ASSIGN in STMT that writes to REF, classify it
-    according to downstream uses and defs.  Sets *BY_CLOBBER_P to true
-    if only clobber statements influenced the classification result.
-    Returns the classification.  */
- 
--static dse_store_status
-+dse_store_status
- dse_classify_store (ao_ref *ref, gimple *stmt,
- 		    bool byte_tracking_enabled, sbitmap live_bytes,
--		    bool *by_clobber_p = NULL)
-+		    bool *by_clobber_p, tree stop_at_vuse)
- {
-   gimple *temp;
-   int cnt = 0;
-@@ -596,6 +688,11 @@ dse_classify_store (ao_ref *ref, gimple *stmt,
- 	}
-       else
- 	defvar = gimple_vdef (temp);
-+
-+      /* If we're instructed to stop walking at region boundary, do so.  */
-+      if (defvar == stop_at_vuse)
-+	return DSE_STORE_LIVE;
-+
-       auto_vec defs;
-       gimple *phi_def = NULL;
-       FOR_EACH_IMM_USE_STMT (use_stmt, ui, defvar)
-@@ -763,12 +860,12 @@ private:
- 
- /* Delete a dead call at GSI, which is mem* call of some kind.  */
- static void
--delete_dead_call (gimple_stmt_iterator *gsi)
-+delete_dead_or_redundant_call (gimple_stmt_iterator *gsi, const char *type)
- {
-   gimple *stmt = gsi_stmt (*gsi);
-   if (dump_file && (dump_flags & TDF_DETAILS))
-     {
--      fprintf (dump_file, "  Deleted dead call: ");
-+      fprintf (dump_file, "  Deleted %s call: ", type);
-       print_gimple_stmt (dump_file, stmt, 0, dump_flags);
-       fprintf (dump_file, "\n");
-     }
-@@ -796,13 +893,13 @@ delete_dead_call (gimple_stmt_iterator *gsi)
- 
- /* Delete a dead store at GSI, which is a gimple assignment. */
- 
--static void
--delete_dead_assignment (gimple_stmt_iterator *gsi)
-+void
-+delete_dead_or_redundant_assignment (gimple_stmt_iterator *gsi, const char *type)
- {
-   gimple *stmt = gsi_stmt (*gsi);
-   if (dump_file && (dump_flags & TDF_DETAILS))
-     {
--      fprintf (dump_file, "  Deleted dead store: ");
-+      fprintf (dump_file, "  Deleted %s store: ", type);
-       print_gimple_stmt (dump_file, stmt, 0, dump_flags);
-       fprintf (dump_file, "\n");
-     }
-@@ -855,7 +952,8 @@ dse_dom_walker::dse_optimize_stmt (gimple_stmt_iterator *gsi)
-      some builtin calls.  */
-   if (gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
-     {
--      switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
-+      tree fndecl = gimple_call_fndecl (stmt);
-+      switch (DECL_FUNCTION_CODE (fndecl))
- 	{
- 	  case BUILT_IN_MEMCPY:
- 	  case BUILT_IN_MEMMOVE:
-@@ -867,10 +965,18 @@ dse_dom_walker::dse_optimize_stmt (gimple_stmt_iterator *gsi)
- 	      tree size = gimple_call_arg (stmt, 2);
- 	      if (integer_zerop (size))
- 		{
--		  delete_dead_call (gsi);
-+		  delete_dead_or_redundant_call (gsi, "dead");
- 		  return;
- 		}
- 
-+	      /* If this is a memset call that initializes an object
-+		 to zero, it may be redundant with an earlier memset
-+		 or empty CONSTRUCTOR of a larger object.  */
-+	      if ((DECL_FUNCTION_CODE (fndecl) == BUILT_IN_MEMSET
-+		   || DECL_FUNCTION_CODE (fndecl) == BUILT_IN_MEMSET_CHK)
-+		  && integer_zerop (gimple_call_arg (stmt, 1)))
-+		dse_optimize_redundant_stores (stmt);
-+
- 	      enum dse_store_status store_status;
- 	      m_byte_tracking_enabled
- 		= setup_live_bytes_from_ref (&ref, m_live_bytes);
-@@ -887,10 +993,14 @@ dse_dom_walker::dse_optimize_stmt (gimple_stmt_iterator *gsi)
- 		}
- 
- 	      if (store_status == DSE_STORE_DEAD)
--		delete_dead_call (gsi);
-+		delete_dead_or_redundant_call (gsi, "dead");
- 	      return;
- 	    }
- 
-+	  case BUILT_IN_CALLOC:
-+	    /* We already know the arguments are integer constants.  */
-+	    dse_optimize_redundant_stores (stmt);
-+
- 	  default:
- 	    return;
- 	}
-@@ -900,6 +1010,18 @@ dse_dom_walker::dse_optimize_stmt (gimple_stmt_iterator *gsi)
-     {
-       bool by_clobber_p = false;
- 
-+      /* First see if this store is a CONSTRUCTOR and if there
-+	 are subsequent CONSTRUCTOR stores which are totally
-+	 subsumed by this statement.  If so remove the subsequent
-+	 CONSTRUCTOR store.
-+
-+	 This will tend to make fewer calls into memset with longer
-+	 arguments.  */
-+      if (gimple_assign_rhs_code (stmt) == CONSTRUCTOR
-+	  && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt)) == 0
-+	  && !gimple_clobber_p (stmt))
-+	dse_optimize_redundant_stores (stmt);
-+
-       /* Self-assignments are zombies.  */
-       if (operand_equal_p (gimple_assign_rhs1 (stmt),
- 			   gimple_assign_lhs (stmt), 0))
-@@ -930,7 +1052,7 @@ dse_dom_walker::dse_optimize_stmt (gimple_stmt_iterator *gsi)
- 	  && !by_clobber_p)
- 	return;
- 
--      delete_dead_assignment (gsi);
-+      delete_dead_or_redundant_assignment (gsi, "dead");
-     }
- }
- 
-diff --git a/gcc/tree-ssa-dse.h b/gcc/tree-ssa-dse.h
-new file mode 100644
-index 00000000000..a5eccbd746d
---- /dev/null
-+++ b/gcc/tree-ssa-dse.h
-@@ -0,0 +1,36 @@
-+/* Support routines for dead store elimination. 
-+   Copyright (C) 2019 Free Software Foundation, Inc.
-+
-+This file is part of GCC.
-+
-+GCC is free software; you can redistribute it and/or modify
-+it under the terms of the GNU General Public License as published by
-+the Free Software Foundation; either version 3, or (at your option)
-+any later version.
-+
-+GCC is distributed in the hope that it will be useful,
-+but WITHOUT ANY WARRANTY; without even the implied warranty of
-+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-+GNU General Public License for more details.
-+
-+You should have received a copy of the GNU General Public License
-+along with GCC; see the file COPYING3.  If not see
-+.  */
-+
-+#ifndef GCC_TREE_SSA_DSE_H
-+#define GCC_TREE_SSA_DSE_H
-+
-+/* Return value from dse_classify_store */
-+enum dse_store_status
-+{
-+  DSE_STORE_LIVE,
-+  DSE_STORE_MAYBE_PARTIAL_DEAD,
-+  DSE_STORE_DEAD
-+};
-+
-+dse_store_status dse_classify_store (ao_ref *, gimple *, bool, sbitmap,
-+				     bool * = NULL, tree = NULL);
-+
-+void delete_dead_or_redundant_assignment (gimple_stmt_iterator *, const char *);
-+
-+#endif   /* GCC_TREE_SSA_DSE_H  */
-diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
-index d241becd481..2d54e13b180 100644
---- a/gcc/tree-ssa-loop-niter.c
-+++ b/gcc/tree-ssa-loop-niter.c
-@@ -1928,7 +1928,7 @@ number_of_iterations_cond (struct loop *loop,
- 
- tree
- simplify_replace_tree (tree expr, tree old, tree new_tree,
--		       tree (*valueize) (tree))
-+		       tree (*valueize) (tree, void*), void *context)
- {
-   unsigned i, n;
-   tree ret = NULL_TREE, e, se;
-@@ -1944,7 +1944,7 @@ simplify_replace_tree (tree expr, tree old, tree new_tree,
-     {
-       if (TREE_CODE (expr) == SSA_NAME)
- 	{
--	  new_tree = valueize (expr);
-+	  new_tree = valueize (expr, context);
- 	  if (new_tree != expr)
- 	    return new_tree;
- 	}
-@@ -1960,7 +1960,7 @@ simplify_replace_tree (tree expr, tree old, tree new_tree,
-   for (i = 0; i < n; i++)
-     {
-       e = TREE_OPERAND (expr, i);
--      se = simplify_replace_tree (e, old, new_tree, valueize);
-+      se = simplify_replace_tree (e, old, new_tree, valueize, context);
-       if (e == se)
- 	continue;
- 
-diff --git a/gcc/tree-ssa-loop-niter.h b/gcc/tree-ssa-loop-niter.h
-index dc116489218..fb192d2c250 100644
---- a/gcc/tree-ssa-loop-niter.h
-+++ b/gcc/tree-ssa-loop-niter.h
-@@ -53,7 +53,9 @@ extern bool scev_probably_wraps_p (tree, tree, tree, gimple *,
- 				   struct loop *, bool);
- extern void free_numbers_of_iterations_estimates (struct loop *);
- extern void free_numbers_of_iterations_estimates (function *);
--extern tree simplify_replace_tree (tree, tree, tree, tree (*)(tree) = NULL);
-+extern tree simplify_replace_tree (tree, tree,
-+				   tree, tree (*)(tree, void *) = NULL,
-+				   void * = NULL);
- extern void substitute_in_loop_info (struct loop *, tree, tree);
- 
- #endif /* GCC_TREE_SSA_LOOP_NITER_H */
-diff --git a/gcc/tree-ssa-loop.c b/gcc/tree-ssa-loop.c
-index 00a09508836..551718637f1 100644
---- a/gcc/tree-ssa-loop.c
-+++ b/gcc/tree-ssa-loop.c
-@@ -768,9 +768,9 @@ get_lsm_tmp_name (tree ref, unsigned n, const char *suffix)
-       ns[1] = 0;
-       lsm_tmp_name_add (ns);
-     }
--  return lsm_tmp_name;
-   if (suffix != NULL)
-     lsm_tmp_name_add (suffix);
-+  return lsm_tmp_name;
- }
- 
- /* Computes an estimated number of insns in LOOP, weighted by WEIGHTS.  */
-diff --git a/gcc/tree-ssa-reassoc.c b/gcc/tree-ssa-reassoc.c
-index 6794fbde29e..9c1a9a651fe 100644
---- a/gcc/tree-ssa-reassoc.c
-+++ b/gcc/tree-ssa-reassoc.c
-@@ -2039,9 +2039,6 @@ optimize_ops_list (enum tree_code opcode,
-       i++;
-     }
- 
--  length = ops->length ();
--  oelast = ops->last ();
--
-   if (iterate)
-     optimize_ops_list (opcode, ops);
- }
-diff --git a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c
-index 95fbead7b1e..cd5a3a75eaa 100644
---- a/gcc/tree-ssa-sccvn.c
-+++ b/gcc/tree-ssa-sccvn.c
-@@ -309,6 +309,10 @@ static vn_tables_t valid_info;
- /* Valueization hook.  Valueize NAME if it is an SSA name, otherwise
-    just return it.  */
- tree (*vn_valueize) (tree);
-+tree vn_valueize_wrapper (tree t, void* context ATTRIBUTE_UNUSED)
-+{
-+  return vn_valueize (t);
-+}
- 
- 
- /* This represents the top of the VN lattice, which is the universal
-@@ -6364,7 +6368,7 @@ process_bb (rpo_elim &avail, basic_block bb,
-       if (bb->loop_father->nb_iterations)
- 	bb->loop_father->nb_iterations
- 	  = simplify_replace_tree (bb->loop_father->nb_iterations,
--				   NULL_TREE, NULL_TREE, vn_valueize);
-+				   NULL_TREE, NULL_TREE, &vn_valueize_wrapper);
-     }
- 
-   /* Value-number all defs in the basic-block.  */
-diff --git a/gcc/tree-ssa-sink.c b/gcc/tree-ssa-sink.c
-index 2648b24f7d5..98b6caced03 100644
---- a/gcc/tree-ssa-sink.c
-+++ b/gcc/tree-ssa-sink.c
-@@ -433,7 +433,6 @@ statement_sink_location (gimple *stmt, basic_block frombb,
- 
-       if (gimple_code (use) != GIMPLE_PHI)
- 	{
--	  sinkbb = gimple_bb (use);
- 	  sinkbb = select_best_block (frombb, gimple_bb (use), stmt);
- 
- 	  if (sinkbb == frombb)
-diff --git a/gcc/tree-ssa-threadedge.c b/gcc/tree-ssa-threadedge.c
-index c3ea2d680d8..91494d76176 100644
---- a/gcc/tree-ssa-threadedge.c
-+++ b/gcc/tree-ssa-threadedge.c
-@@ -1299,7 +1299,6 @@ thread_across_edge (gcond *dummy_cond,
- 
-         x = new jump_thread_edge (taken_edge, EDGE_COPY_SRC_JOINER_BLOCK);
- 	path->safe_push (x);
--	found = false;
- 	found = thread_around_empty_blocks (taken_edge,
- 					    dummy_cond,
- 					    avail_exprs_stack,
-diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
-index 17a4fc8e279..e822ffc1a01 100644
---- a/gcc/tree-vect-data-refs.c
-+++ b/gcc/tree-vect-data-refs.c
-@@ -2863,10 +2863,12 @@ strip_conversion (tree op)
- }
- 
- /* Return true if vectorizable_* routines can handle statements STMT1_INFO
--   and STMT2_INFO being in a single group.  */
-+   and STMT2_INFO being in a single group.  When ALLOW_SLP_P, masked loads can
-+   be grouped in SLP mode.  */
- 
- static bool
--can_group_stmts_p (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info)
-+can_group_stmts_p (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info,
-+		   bool allow_slp_p)
- {
-   if (gimple_assign_single_p (stmt1_info->stmt))
-     return gimple_assign_single_p (stmt2_info->stmt);
-@@ -2888,7 +2890,8 @@ can_group_stmts_p (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info)
- 	 like those created by build_mask_conversion.  */
-       tree mask1 = gimple_call_arg (call1, 2);
-       tree mask2 = gimple_call_arg (call2, 2);
--      if (!operand_equal_p (mask1, mask2, 0))
-+      if (!operand_equal_p (mask1, mask2, 0)
-+          && (ifn == IFN_MASK_STORE || !allow_slp_p))
- 	{
- 	  mask1 = strip_conversion (mask1);
- 	  if (!mask1)
-@@ -2974,7 +2977,7 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)
- 	      || data_ref_compare_tree (DR_BASE_ADDRESS (dra),
- 					DR_BASE_ADDRESS (drb)) != 0
- 	      || data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb)) != 0
--	      || !can_group_stmts_p (stmtinfo_a, stmtinfo_b))
-+	      || !can_group_stmts_p (stmtinfo_a, stmtinfo_b, true))
- 	    break;
- 
- 	  /* Check that the data-refs have the same constant size.  */
-@@ -3059,6 +3062,13 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)
- 	  DR_GROUP_NEXT_ELEMENT (lastinfo) = stmtinfo_b;
- 	  lastinfo = stmtinfo_b;
- 
-+	  STMT_VINFO_SLP_VECT_ONLY (stmtinfo_a)
-+	    = !can_group_stmts_p (stmtinfo_a, stmtinfo_b, false);
-+
-+	  if (dump_enabled_p () && STMT_VINFO_SLP_VECT_ONLY (stmtinfo_a))
-+	    dump_printf_loc (MSG_NOTE, vect_location,
-+			     "Load suitable for SLP vectorization only.\n");
-+
- 	  if (init_b == init_prev
- 	      && !to_fixup.add (DR_GROUP_FIRST_ELEMENT (stmtinfo_a))
- 	      && dump_enabled_p ())
-@@ -3446,7 +3456,6 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
-   /* First, we collect all data ref pairs for aliasing checks.  */
-   FOR_EACH_VEC_ELT (may_alias_ddrs, i, ddr)
-     {
--      int comp_res;
-       poly_uint64 lower_bound;
-       tree segment_length_a, segment_length_b;
-       unsigned HOST_WIDE_INT access_size_a, access_size_b;
-@@ -3478,10 +3487,13 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
-       dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (DDR_B (ddr));
-       stmt_vec_info stmt_info_b = dr_info_b->stmt;
- 
-+      bool preserves_scalar_order_p
-+	= vect_preserves_scalar_order_p (dr_info_a, dr_info_b);
-+
-       /* Skip the pair if inter-iteration dependencies are irrelevant
- 	 and intra-iteration dependencies are guaranteed to be honored.  */
-       if (ignore_step_p
--	  && (vect_preserves_scalar_order_p (dr_info_a, dr_info_b)
-+	  && (preserves_scalar_order_p
- 	      || vectorizable_with_step_bound_p (dr_info_a, dr_info_b,
- 						 &lower_bound)))
- 	{
-@@ -3562,14 +3574,11 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
-       align_a = vect_vfa_align (dr_info_a);
-       align_b = vect_vfa_align (dr_info_b);
- 
--      comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_info_a->dr),
--					DR_BASE_ADDRESS (dr_info_b->dr));
--      if (comp_res == 0)
--	comp_res = data_ref_compare_tree (DR_OFFSET (dr_info_a->dr),
--					  DR_OFFSET (dr_info_b->dr));
--
-       /* See whether the alias is known at compilation time.  */
--      if (comp_res == 0
-+      if (operand_equal_p (DR_BASE_ADDRESS (dr_info_a->dr),
-+			   DR_BASE_ADDRESS (dr_info_b->dr), 0)
-+	  && operand_equal_p (DR_OFFSET (dr_info_a->dr),
-+			      DR_OFFSET (dr_info_b->dr), 0)
- 	  && TREE_CODE (DR_STEP (dr_info_a->dr)) == INTEGER_CST
- 	  && TREE_CODE (DR_STEP (dr_info_b->dr)) == INTEGER_CST
- 	  && poly_int_tree_p (segment_length_a)
-@@ -3602,15 +3611,21 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
- 					   stmt_info_b->stmt);
- 	}
- 
--      dr_with_seg_len_pair_t dr_with_seg_len_pair
--	(dr_with_seg_len (dr_info_a->dr, segment_length_a,
--			  access_size_a, align_a),
--	 dr_with_seg_len (dr_info_b->dr, segment_length_b,
--			  access_size_b, align_b));
-+      dr_with_seg_len dr_a (dr_info_a->dr, segment_length_a,
-+			    access_size_a, align_a);
-+      dr_with_seg_len dr_b (dr_info_b->dr, segment_length_b,
-+			    access_size_b, align_b);
-+      /* Canonicalize the order to be the one that's needed for accurate
-+	 RAW, WAR and WAW flags, in cases where the data references are
-+	 well-ordered.  The order doesn't really matter otherwise,
-+	 but we might as well be consistent.  */
-+      if (get_later_stmt (stmt_info_a, stmt_info_b) == stmt_info_a)
-+	std::swap (dr_a, dr_b);
- 
--      /* Canonicalize pairs by sorting the two DR members.  */
--      if (comp_res > 0)
--	std::swap (dr_with_seg_len_pair.first, dr_with_seg_len_pair.second);
-+      dr_with_seg_len_pair_t dr_with_seg_len_pair
-+	(dr_a, dr_b, (preserves_scalar_order_p
-+		      ? dr_with_seg_len_pair_t::WELL_ORDERED
-+		      : dr_with_seg_len_pair_t::REORDERED));
- 
-       comp_alias_ddrs.safe_push (dr_with_seg_len_pair);
-     }
-@@ -4123,7 +4138,7 @@ vect_find_stmt_data_reference (loop_p loop, gimple *stmt,
- */
- 
- opt_result
--vect_analyze_data_refs (vec_info *vinfo, poly_uint64 *min_vf)
-+vect_analyze_data_refs (vec_info *vinfo, poly_uint64 *min_vf, bool *fatal)
- {
-   struct loop *loop = NULL;
-   unsigned int i;
-@@ -4298,7 +4313,7 @@ vect_analyze_data_refs (vec_info *vinfo, poly_uint64 *min_vf)
-       /* Set vectype for STMT.  */
-       scalar_type = TREE_TYPE (DR_REF (dr));
-       STMT_VINFO_VECTYPE (stmt_info)
--	= get_vectype_for_scalar_type (scalar_type);
-+	= get_vectype_for_scalar_type (vinfo, scalar_type);
-       if (!STMT_VINFO_VECTYPE (stmt_info))
-         {
-           if (dump_enabled_p ())
-@@ -4344,13 +4359,18 @@ vect_analyze_data_refs (vec_info *vinfo, poly_uint64 *min_vf)
- 	  if (!vect_check_gather_scatter (stmt_info,
- 					  as_a  (vinfo),
- 					  &gs_info)
--	      || !get_vectype_for_scalar_type (TREE_TYPE (gs_info.offset)))
--	    return opt_result::failure_at
--	      (stmt_info->stmt,
--	       (gatherscatter == GATHER) ?
--	       "not vectorized: not suitable for gather load %G" :
--	       "not vectorized: not suitable for scatter store %G",
--	       stmt_info->stmt);
-+	      || !get_vectype_for_scalar_type (vinfo,
-+					       TREE_TYPE (gs_info.offset)))
-+	    {
-+	      if (fatal)
-+		*fatal = false;
-+	      return opt_result::failure_at
-+			(stmt_info->stmt,
-+			 (gatherscatter == GATHER)
-+			 ? "not vectorized: not suitable for gather load %G"
-+			 : "not vectorized: not suitable for scatter store %G",
-+			 stmt_info->stmt);
-+	    }
- 	  STMT_VINFO_GATHER_SCATTER_P (stmt_info) = gatherscatter;
- 	}
-     }
-diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c
-index ad1ea4e7b97..39bc2a82b37 100644
---- a/gcc/tree-vect-generic.c
-+++ b/gcc/tree-vect-generic.c
-@@ -694,7 +694,7 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
- 	      tree zero, cst, cond, mask_type;
- 	      gimple *stmt;
- 
--	      mask_type = build_same_sized_truth_vector_type (type);
-+	      mask_type = truth_type_for (type);
- 	      zero = build_zero_cst (type);
- 	      cond = build2 (LT_EXPR, mask_type, op0, zero);
- 	      tree_vector_builder vec (type, nunits, 1);
-diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c
-index b3fae5ba4da..20ede85633b 100644
---- a/gcc/tree-vect-loop-manip.c
-+++ b/gcc/tree-vect-loop-manip.c
-@@ -47,6 +47,9 @@ along with GCC; see the file COPYING3.  If not see
- #include "stor-layout.h"
- #include "optabs-query.h"
- #include "vec-perm-indices.h"
-+#include "insn-config.h"
-+#include "rtl.h"
-+#include "recog.h"
- 
- /*************************************************************************
-   Simple Loop Peeling Utilities
-@@ -323,13 +326,18 @@ vect_maybe_permute_loop_masks (gimple_seq *seq, rgroup_masks *dest_rgm,
-   tree src_masktype = src_rgm->mask_type;
-   tree dest_masktype = dest_rgm->mask_type;
-   machine_mode src_mode = TYPE_MODE (src_masktype);
-+  insn_code icode1, icode2;
-   if (dest_rgm->max_nscalars_per_iter <= src_rgm->max_nscalars_per_iter
--      && optab_handler (vec_unpacku_hi_optab, src_mode) != CODE_FOR_nothing
--      && optab_handler (vec_unpacku_lo_optab, src_mode) != CODE_FOR_nothing)
-+      && (icode1 = optab_handler (vec_unpacku_hi_optab,
-+				  src_mode)) != CODE_FOR_nothing
-+      && (icode2 = optab_handler (vec_unpacku_lo_optab,
-+				  src_mode)) != CODE_FOR_nothing)
-     {
-       /* Unpacking the source masks gives at least as many mask bits as
- 	 we need.  We can then VIEW_CONVERT any excess bits away.  */
--      tree unpack_masktype = vect_halve_mask_nunits (src_masktype);
-+      machine_mode dest_mode = insn_data[icode1].operand[0].mode;
-+      gcc_assert (dest_mode == insn_data[icode2].operand[0].mode);
-+      tree unpack_masktype = vect_halve_mask_nunits (src_masktype, dest_mode);
-       for (unsigned int i = 0; i < dest_rgm->masks.length (); ++i)
- 	{
- 	  tree src = src_rgm->masks[i / 2];
-@@ -1745,7 +1753,7 @@ vect_update_init_of_dr (struct data_reference *dr, tree niters, tree_code code)
-    Apply vect_update_inits_of_dr to all accesses in LOOP_VINFO.
-    CODE and NITERS are as for vect_update_inits_of_dr.  */
- 
--static void
-+void
- vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters,
- 			  tree_code code)
- {
-@@ -1755,21 +1763,12 @@ vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters,
- 
-   DUMP_VECT_SCOPE ("vect_update_inits_of_dr");
- 
--  /* Adjust niters to sizetype and insert stmts on loop preheader edge.  */
-+  /* Adjust niters to sizetype.  We used to insert the stmts on loop preheader
-+     here, but since we might use these niters to update the epilogues niters
-+     and data references we can't insert them here as this definition might not
-+     always dominate its uses.  */
-   if (!types_compatible_p (sizetype, TREE_TYPE (niters)))
--    {
--      gimple_seq seq;
--      edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
--      tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
--
--      niters = fold_convert (sizetype, niters);
--      niters = force_gimple_operand (niters, &seq, false, var);
--      if (seq)
--	{
--	  basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
--	  gcc_assert (!new_bb);
--	}
--    }
-+    niters = fold_convert (sizetype, niters);
- 
-   FOR_EACH_VEC_ELT (datarefs, i, dr)
-     {
-@@ -2032,6 +2031,29 @@ vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo,
-   *niters_vector_mult_vf_ptr = niters_vector_mult_vf;
- }
- 
-+/* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP,
-+   this function searches for the corresponding lcssa phi node in exit
-+   bb of LOOP.  If it is found, return the phi result; otherwise return
-+   NULL.  */
-+
-+static tree
-+find_guard_arg (class loop *loop, class loop *epilog ATTRIBUTE_UNUSED,
-+		gphi *lcssa_phi)
-+{
-+  gphi_iterator gsi;
-+  edge e = single_exit (loop);
-+
-+  gcc_assert (single_pred_p (e->dest));
-+  for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
-+    {
-+      gphi *phi = gsi.phi ();
-+      if (operand_equal_p (PHI_ARG_DEF (phi, 0),
-+			   PHI_ARG_DEF (lcssa_phi, 0), 0))
-+	return PHI_RESULT (phi);
-+    }
-+  return NULL_TREE;
-+}
-+
- /* Function slpeel_tree_duplicate_loop_to_edge_cfg duplciates FIRST/SECOND
-    from SECOND/FIRST and puts it at the original loop's preheader/exit
-    edge, the two loops are arranged as below:
-@@ -2119,6 +2141,29 @@ slpeel_update_phi_nodes_for_loops (loop_vec_info loop_vinfo,
- 	 incoming edge.  */
-       adjust_phi_and_debug_stmts (update_phi, second_preheader_e, arg);
-     }
-+
-+  /* For epilogue peeling we have to make sure to copy all LC PHIs
-+     for correct vectorization of live stmts.  */
-+  if (loop == first)
-+    {
-+      basic_block orig_exit = single_exit (second)->dest;
-+      for (gsi_orig = gsi_start_phis (orig_exit);
-+	   !gsi_end_p (gsi_orig); gsi_next (&gsi_orig))
-+	{
-+	  gphi *orig_phi = gsi_orig.phi ();
-+	  tree orig_arg = PHI_ARG_DEF (orig_phi, 0);
-+	  if (TREE_CODE (orig_arg) != SSA_NAME || virtual_operand_p  (orig_arg))
-+	    continue;
-+
-+	  /* Already created in the above loop.   */
-+	  if (find_guard_arg (first, second, orig_phi))
-+	    continue;
-+
-+	  tree new_res = copy_ssa_name (orig_arg);
-+	  gphi *lcphi = create_phi_node (new_res, between_bb);
-+	  add_phi_arg (lcphi, orig_arg, single_exit (first), UNKNOWN_LOCATION);
-+	}
-+    }
- }
- 
- /* Function slpeel_add_loop_guard adds guard skipping from the beginning
-@@ -2203,29 +2248,6 @@ slpeel_update_phi_nodes_for_guard1 (struct loop *skip_loop,
-     }
- }
- 
--/* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP,
--   this function searches for the corresponding lcssa phi node in exit
--   bb of LOOP.  If it is found, return the phi result; otherwise return
--   NULL.  */
--
--static tree
--find_guard_arg (struct loop *loop, struct loop *epilog ATTRIBUTE_UNUSED,
--		gphi *lcssa_phi)
--{
--  gphi_iterator gsi;
--  edge e = single_exit (loop);
--
--  gcc_assert (single_pred_p (e->dest));
--  for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
--    {
--      gphi *phi = gsi.phi ();
--      if (operand_equal_p (PHI_ARG_DEF (phi, 0),
--			   PHI_ARG_DEF (lcssa_phi, 0), 0))
--	return PHI_RESULT (phi);
--    }
--  return NULL_TREE;
--}
--
- /* LOOP and EPILOG are two consecutive loops in CFG and EPILOG is copied
-    from LOOP.  Function slpeel_add_loop_guard adds guard skipping from a
-    point between the two loops to the end of EPILOG.  Edges GUARD_EDGE
-@@ -2296,12 +2318,14 @@ slpeel_update_phi_nodes_for_guard2 (struct loop *loop, struct loop *epilog,
-     {
-       gphi *update_phi = gsi.phi ();
-       tree old_arg = PHI_ARG_DEF (update_phi, 0);
--      /* This loop-closed-phi actually doesn't represent a use out of the
--	 loop - the phi arg is a constant.  */
--      if (TREE_CODE (old_arg) != SSA_NAME)
--	continue;
- 
--      tree merge_arg = get_current_def (old_arg);
-+      tree merge_arg = NULL_TREE;
-+
-+      /* If the old argument is a SSA_NAME use its current_def.  */
-+      if (TREE_CODE (old_arg) == SSA_NAME)
-+	merge_arg = get_current_def (old_arg);
-+      /* If it's a constant or doesn't have a current_def, just use the old
-+	 argument.  */
-       if (!merge_arg)
- 	merge_arg = old_arg;
- 
-@@ -2412,7 +2436,22 @@ slpeel_update_phi_nodes_for_lcssa (struct loop *epilog)
- 
-    Note this function peels prolog and epilog only if it's necessary,
-    as well as guards.
--   Returns created epilogue or NULL.
-+   This function returns the epilogue loop if a decision was made to vectorize
-+   it, otherwise NULL.
-+
-+   The analysis resulting in this epilogue loop's loop_vec_info was performed
-+   in the same vect_analyze_loop call as the main loop's.  At that time
-+   vect_analyze_loop constructs a list of accepted loop_vec_info's for lower
-+   vectorization factors than the main loop.  This list is stored in the main
-+   loop's loop_vec_info in the 'epilogue_vinfos' member.  Everytime we decide to
-+   vectorize the epilogue loop for a lower vectorization factor,  the
-+   loop_vec_info sitting at the top of the epilogue_vinfos list is removed,
-+   updated and linked to the epilogue loop.  This is later used to vectorize
-+   the epilogue.  The reason the loop_vec_info needs updating is that it was
-+   constructed based on the original main loop, and the epilogue loop is a
-+   copy of this loop, so all links pointing to statements in the original loop
-+   need updating.  Furthermore, these loop_vec_infos share the
-+   data_reference's records, which will also need to be updated.
- 
-    TODO: Guard for prefer_scalar_loop should be emitted along with
-    versioning conditions if loop versioning is needed.  */
-@@ -2422,7 +2461,8 @@ struct loop *
- vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
- 		 tree *niters_vector, tree *step_vector,
- 		 tree *niters_vector_mult_vf_var, int th,
--		 bool check_profitability, bool niters_no_overflow)
-+		 bool check_profitability, bool niters_no_overflow,
-+		 tree *advance, drs_init_vec &orig_drs_init)
- {
-   edge e, guard_e;
-   tree type = TREE_TYPE (niters), guard_cond;
-@@ -2430,6 +2470,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
-   profile_probability prob_prolog, prob_vector, prob_epilog;
-   int estimated_vf;
-   int prolog_peeling = 0;
-+  bool vect_epilogues = loop_vinfo->epilogue_vinfos.length () > 0;
-   /* We currently do not support prolog peeling if the target alignment is not
-      known at compile time.  'vect_gen_prolog_loop_niters' depends on the
-      target alignment being constant.  */
-@@ -2483,19 +2524,77 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
-   int bound_prolog = 0;
-   if (prolog_peeling)
-     niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor,
--						 &bound_prolog);
-+						  &bound_prolog);
-   else
-     niters_prolog = build_int_cst (type, 0);
- 
-+  loop_vec_info epilogue_vinfo = NULL;
-+  if (vect_epilogues)
-+    {
-+      epilogue_vinfo = loop_vinfo->epilogue_vinfos[0];
-+      loop_vinfo->epilogue_vinfos.ordered_remove (0);
-+    }
-+
-+  tree niters_vector_mult_vf = NULL_TREE;
-+  /* Saving NITERs before the loop, as this may be changed by prologue.  */
-+  tree before_loop_niters = LOOP_VINFO_NITERS (loop_vinfo);
-+  edge update_e = NULL, skip_e = NULL;
-+  unsigned int lowest_vf = constant_lower_bound (vf);
-+  /* If we know the number of scalar iterations for the main loop we should
-+     check whether after the main loop there are enough iterations left over
-+     for the epilogue.  */
-+  if (vect_epilogues
-+      && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
-+      && prolog_peeling >= 0
-+      && known_eq (vf, lowest_vf))
-+    {
-+      unsigned HOST_WIDE_INT eiters
-+	= (LOOP_VINFO_INT_NITERS (loop_vinfo)
-+	   - LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
-+
-+      eiters -= prolog_peeling;
-+      eiters
-+	= eiters % lowest_vf + LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo);
-+
-+      unsigned int ratio;
-+      unsigned int epilogue_gaps
-+	= LOOP_VINFO_PEELING_FOR_GAPS (epilogue_vinfo);
-+      while (!(constant_multiple_p
-+	       (GET_MODE_SIZE (loop_vinfo->vector_mode),
-+		GET_MODE_SIZE (epilogue_vinfo->vector_mode), &ratio)
-+	       && eiters >= lowest_vf / ratio + epilogue_gaps))
-+	{
-+	  delete epilogue_vinfo;
-+	  epilogue_vinfo = NULL;
-+	  if (loop_vinfo->epilogue_vinfos.length () == 0)
-+	    {
-+	      vect_epilogues = false;
-+	      break;
-+	    }
-+	  epilogue_vinfo = loop_vinfo->epilogue_vinfos[0];
-+	  loop_vinfo->epilogue_vinfos.ordered_remove (0);
-+	  epilogue_gaps = LOOP_VINFO_PEELING_FOR_GAPS (epilogue_vinfo);
-+	}
-+    }
-   /* Prolog loop may be skipped.  */
-   bool skip_prolog = (prolog_peeling != 0);
--  /* Skip to epilog if scalar loop may be preferred.  It's only needed
--     when we peel for epilog loop and when it hasn't been checked with
--     loop versioning.  */
-+  /* Skip this loop to epilog when there are not enough iterations to enter this
-+     vectorized loop.  If true we should perform runtime checks on the NITERS
-+     to check whether we should skip the current vectorized loop.  If we know
-+     the number of scalar iterations we may choose to add a runtime check if
-+     this number "maybe" smaller than the number of iterations required
-+     when we know the number of scalar iterations may potentially
-+     be smaller than the number of iterations required to enter this loop, for
-+     this we use the upper bounds on the prolog and epilog peeling.  When we
-+     don't know the number of iterations and don't require versioning it is
-+     because we have asserted that there are enough scalar iterations to enter
-+     the main loop, so this skip is not necessary.  When we are versioning then
-+     we only add such a skip if we have chosen to vectorize the epilogue.  */
-   bool skip_vector = (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
- 		      ? maybe_lt (LOOP_VINFO_INT_NITERS (loop_vinfo),
- 				  bound_prolog + bound_epilog)
--		      : !LOOP_REQUIRES_VERSIONING (loop_vinfo));
-+		      : (!LOOP_REQUIRES_VERSIONING (loop_vinfo)
-+			 || vect_epilogues));
-   /* Epilog loop must be executed if the number of iterations for epilog
-      loop is known at compile time, otherwise we need to add a check at
-      the end of vector loop and skip to the end of epilog loop.  */
-@@ -2525,6 +2624,12 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
- 
-   dump_user_location_t loop_loc = find_loop_location (loop);
-   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
-+  if (vect_epilogues)
-+    /* Make sure to set the epilogue's epilogue scalar loop, such that we can
-+       use the original scalar loop as remaining epilogue if necessary.  */
-+    LOOP_VINFO_SCALAR_LOOP (epilogue_vinfo)
-+      = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
-+
-   if (prolog_peeling)
-     {
-       e = loop_preheader_edge (loop);
-@@ -2571,6 +2676,15 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
- 	  scale_bbs_frequencies (&bb_after_prolog, 1, prob_prolog);
- 	  scale_loop_profile (prolog, prob_prolog, bound_prolog);
- 	}
-+
-+      /* Save original inits for each data_reference before advancing them with
-+	 NITERS_PROLOG.  */
-+      unsigned int i;
-+      struct data_reference *dr;
-+      vec datarefs = loop_vinfo->shared->datarefs;
-+      FOR_EACH_VEC_ELT (datarefs, i, dr)
-+	orig_drs_init.safe_push (std::make_pair (dr, DR_OFFSET (dr)));
-+
-       /* Update init address of DRs.  */
-       vect_update_inits_of_drs (loop_vinfo, niters_prolog, PLUS_EXPR);
-       /* Update niters for vector loop.  */
-@@ -2605,8 +2719,15 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
- 			   "loop can't be duplicated to exit edge.\n");
- 	  gcc_unreachable ();
- 	}
--      /* Peel epilog and put it on exit edge of loop.  */
--      epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
-+      /* Peel epilog and put it on exit edge of loop.  If we are vectorizing
-+	 said epilog then we should use a copy of the main loop as a starting
-+	 point.  This loop may have already had some preliminary transformations
-+	 to allow for more optimal vectorization, for example if-conversion.
-+	 If we are not vectorizing the epilog then we should use the scalar loop
-+	 as the transformations mentioned above make less or no sense when not
-+	 vectorizing.  */
-+      epilog = vect_epilogues ? get_loop_copy (loop) : scalar_loop;
-+      epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, epilog, e);
-       if (!epilog)
- 	{
- 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
-@@ -2635,6 +2756,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
- 					   guard_to, guard_bb,
- 					   prob_vector.invert (),
- 					   irred_flag);
-+	  skip_e = guard_e;
- 	  e = EDGE_PRED (guard_to, 0);
- 	  e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
- 	  slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e);
-@@ -2656,7 +2778,6 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
- 	}
- 
-       basic_block bb_before_epilog = loop_preheader_edge (epilog)->src;
--      tree niters_vector_mult_vf;
-       /* If loop is peeled for non-zero constant times, now niters refers to
- 	 orig_niters - prolog_peeling, it won't overflow even the orig_niters
- 	 overflows.  */
-@@ -2679,7 +2800,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
-       /* Update IVs of original loop as if they were advanced by
- 	 niters_vector_mult_vf steps.  */
-       gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
--      edge update_e = skip_vector ? e : loop_preheader_edge (epilog);
-+      update_e = skip_vector ? e : loop_preheader_edge (epilog);
-       vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
- 					update_e);
- 
-@@ -2720,10 +2841,75 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
-       adjust_vec_debug_stmts ();
-       scev_reset ();
-     }
-+
-+  if (vect_epilogues)
-+    {
-+      epilog->aux = epilogue_vinfo;
-+      LOOP_VINFO_LOOP (epilogue_vinfo) = epilog;
-+
-+      loop_constraint_clear (epilog, LOOP_C_INFINITE);
-+
-+      /* We now must calculate the number of NITERS performed by the previous
-+	 loop and EPILOGUE_NITERS to be performed by the epilogue.  */
-+      tree niters = fold_build2 (PLUS_EXPR, TREE_TYPE (niters_vector_mult_vf),
-+				 niters_prolog, niters_vector_mult_vf);
-+
-+      /* If skip_vector we may skip the previous loop, we insert a phi-node to
-+	 determine whether we are coming from the previous vectorized loop
-+	 using the update_e edge or the skip_vector basic block using the
-+	 skip_e edge.  */
-+      if (skip_vector)
-+	{
-+	  gcc_assert (update_e != NULL && skip_e != NULL);
-+	  gphi *new_phi = create_phi_node (make_ssa_name (TREE_TYPE (niters)),
-+					   update_e->dest);
-+	  tree new_ssa = make_ssa_name (TREE_TYPE (niters));
-+	  gimple *stmt = gimple_build_assign (new_ssa, niters);
-+	  gimple_stmt_iterator gsi;
-+	  if (TREE_CODE (niters_vector_mult_vf) == SSA_NAME
-+	      && SSA_NAME_DEF_STMT (niters_vector_mult_vf)->bb != NULL)
-+	    {
-+	      gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (niters_vector_mult_vf));
-+	      gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
-+	    }
-+	  else
-+	    {
-+	      gsi = gsi_last_bb (update_e->src);
-+	      gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
-+	    }
-+
-+	  niters = new_ssa;
-+	  add_phi_arg (new_phi, niters, update_e, UNKNOWN_LOCATION);
-+	  add_phi_arg (new_phi, build_zero_cst (TREE_TYPE (niters)), skip_e,
-+		       UNKNOWN_LOCATION);
-+	  niters = PHI_RESULT (new_phi);
-+	}
-+
-+      /* Subtract the number of iterations performed by the vectorized loop
-+	 from the number of total iterations.  */
-+      tree epilogue_niters = fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
-+					  before_loop_niters,
-+					  niters);
-+
-+      LOOP_VINFO_NITERS (epilogue_vinfo) = epilogue_niters;
-+      LOOP_VINFO_NITERSM1 (epilogue_vinfo)
-+	= fold_build2 (MINUS_EXPR, TREE_TYPE (epilogue_niters),
-+		       epilogue_niters,
-+		       build_one_cst (TREE_TYPE (epilogue_niters)));
-+
-+      /* Set ADVANCE to the number of iterations performed by the previous
-+	 loop and its prologue.  */
-+      *advance = niters;
-+
-+      /* Redo the peeling for niter analysis as the NITERs and alignment
-+	 may have been updated to take the main loop into account.  */
-+      determine_peel_for_niter (epilogue_vinfo);
-+    }
-+
-   adjust_vec.release ();
-   free_original_copy_tables ();
- 
--  return epilog;
-+  return vect_epilogues ? epilog : NULL;
- }
- 
- /* Function vect_create_cond_for_niters_checks.
-@@ -2987,9 +3173,7 @@ vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
-    *COND_EXPR_STMT_LIST.  */
- 
- struct loop *
--vect_loop_versioning (loop_vec_info loop_vinfo,
--		      unsigned int th, bool check_profitability,
--		      poly_uint64 versioning_threshold)
-+vect_loop_versioning (loop_vec_info loop_vinfo)
- {
-   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
-   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
-@@ -3009,10 +3193,15 @@ vect_loop_versioning (loop_vec_info loop_vinfo,
-   bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
-   bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
-   bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
-+  poly_uint64 versioning_threshold
-+    = LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo);
-   tree version_simd_if_cond
-     = LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND (loop_vinfo);
-+  unsigned th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
- 
--  if (check_profitability)
-+  if (th >= vect_vf_for_cost (loop_vinfo)
-+      && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
-+      && !ordered_p (th, versioning_threshold))
-     cond_expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
- 			     build_int_cst (TREE_TYPE (scalar_loop_iters),
- 					    th - 1));
-diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
-index 0308b26b808..6cbdfd1ad1a 100644
---- a/gcc/tree-vect-loop.c
-+++ b/gcc/tree-vect-loop.c
-@@ -154,6 +154,8 @@ along with GCC; see the file COPYING3.  If not see
- */
- 
- static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *);
-+static stmt_vec_info vect_is_simple_reduction (loop_vec_info, stmt_vec_info,
-+					       bool *);
- 
- /* Subroutine of vect_determine_vf_for_stmt that handles only one
-    statement.  VECTYPE_MAYBE_SET_P is true if STMT_VINFO_VECTYPE
-@@ -325,7 +327,7 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
- 				 "get vectype for scalar type:  %T\n",
- 				 scalar_type);
- 
--	      vectype = get_vectype_for_scalar_type (scalar_type);
-+	      vectype = get_vectype_for_scalar_type (loop_vinfo, scalar_type);
- 	      if (!vectype)
- 		return opt_result::failure_at (phi,
- 					       "not vectorized: unsupported "
-@@ -559,19 +561,19 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
- 		  && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
- 
-       stmt_vec_info reduc_stmt_info
--	= vect_force_simple_reduction (loop_vinfo, stmt_vinfo,
--				       &double_reduc, false);
-+	= vect_is_simple_reduction (loop_vinfo, stmt_vinfo, &double_reduc);
-       if (reduc_stmt_info)
-         {
--          if (double_reduc)
--            {
--              if (dump_enabled_p ())
--                dump_printf_loc (MSG_NOTE, vect_location,
-+	  STMT_VINFO_REDUC_DEF (stmt_vinfo) = reduc_stmt_info;
-+	  STMT_VINFO_REDUC_DEF (reduc_stmt_info) = stmt_vinfo;
-+	  if (double_reduc)
-+	    {
-+	      if (dump_enabled_p ())
-+		dump_printf_loc (MSG_NOTE, vect_location,
- 				 "Detected double reduction.\n");
- 
-               STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_double_reduction_def;
--	      STMT_VINFO_DEF_TYPE (reduc_stmt_info)
--		= vect_double_reduction_def;
-+	      STMT_VINFO_DEF_TYPE (reduc_stmt_info) = vect_double_reduction_def;
-             }
-           else
-             {
-@@ -582,7 +584,6 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
- 				     "Detected vectorizable nested cycle.\n");
- 
-                   STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_nested_cycle;
--		  STMT_VINFO_DEF_TYPE (reduc_stmt_info) = vect_nested_cycle;
-                 }
-               else
-                 {
-@@ -688,13 +689,16 @@ vect_fixup_scalar_cycles_with_patterns (loop_vec_info loop_vinfo)
- 	stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (first);
- 	while (next)
- 	  {
--	    if (! STMT_VINFO_IN_PATTERN_P (next))
-+	    if (! STMT_VINFO_IN_PATTERN_P (next)
-+		|| STMT_VINFO_REDUC_IDX (STMT_VINFO_RELATED_STMT (next)) == -1)
- 	      break;
- 	    next = REDUC_GROUP_NEXT_ELEMENT (next);
- 	  }
--	/* If not all stmt in the chain are patterns try to handle
--	   the chain without patterns.  */
--	if (! next)
-+	/* If not all stmt in the chain are patterns or if we failed
-+	   to update STMT_VINFO_REDUC_IDX try to handle the chain
-+	   without patterns.  */
-+	if (! next
-+	    && STMT_VINFO_REDUC_IDX (STMT_VINFO_RELATED_STMT (first)) != -1)
- 	  {
- 	    vect_fixup_reduc_chain (first);
- 	    LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)[i]
-@@ -730,9 +734,7 @@ vect_get_loop_niters (struct loop *loop, tree *assumptions,
-   if (!exit)
-     return cond;
- 
--  niter = chrec_dont_know;
-   may_be_zero = NULL_TREE;
--  niter_assumptions = boolean_true_node;
-   if (!number_of_iterations_exit_assumptions (loop, exit, &niter_desc, NULL)
-       || chrec_contains_undetermined (niter_desc.niter))
-     return cond;
-@@ -826,6 +828,8 @@ _loop_vec_info::_loop_vec_info (struct loop *loop_in, vec_info_shared *shared)
-     ivexpr_map (NULL),
-     slp_unrolling_factor (1),
-     single_scalar_iteration_cost (0),
-+    vec_outside_cost (0),
-+    vec_inside_cost (0),
-     vectorizable (false),
-     can_fully_mask_p (true),
-     fully_masked_p (false),
-@@ -885,6 +889,8 @@ _loop_vec_info::_loop_vec_info (struct loop *loop_in, vec_info_shared *shared)
- 	    }
- 	}
-     }
-+
-+  epilogue_vinfos.create (6);
- }
- 
- /* Free all levels of MASKS.  */
-@@ -959,6 +965,7 @@ _loop_vec_info::~_loop_vec_info ()
- 
-   release_vec_loop_masks (&masks);
-   delete ivexpr_map;
-+  epilogue_vinfos.release ();
- 
-   loop->aux = NULL;
- }
-@@ -1431,8 +1438,8 @@ vect_update_vf_for_slp (loop_vec_info loop_vinfo)
- 	dump_printf_loc (MSG_NOTE, vect_location,
- 			 "Loop contains SLP and non-SLP stmts\n");
-       /* Both the vectorization factor and unroll factor have the form
--	 current_vector_size * X for some rational X, so they must have
--	 a common multiple.  */
-+	 GET_MODE_SIZE (loop_vinfo->vector_mode) * X for some rational X,
-+	 so they must have a common multiple.  */
-       vectorization_factor
- 	= force_common_multiple (vectorization_factor,
- 				 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
-@@ -1535,12 +1542,18 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
-                   phi_op = PHI_ARG_DEF (phi, 0);
- 		  stmt_vec_info op_def_info = loop_vinfo->lookup_def (phi_op);
- 		  if (!op_def_info)
--		    return opt_result::failure_at (phi, "unsupported phi");
-+		    return opt_result::failure_at (phi, "unsupported phi\n");
- 
- 		  if (STMT_VINFO_RELEVANT (op_def_info) != vect_used_in_outer
- 		      && (STMT_VINFO_RELEVANT (op_def_info)
- 			  != vect_used_in_outer_by_reduction))
--		    return opt_result::failure_at (phi, "unsupported phi");
-+		    return opt_result::failure_at (phi, "unsupported phi\n");
-+
-+		  if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def
-+		       || (STMT_VINFO_DEF_TYPE (stmt_info)
-+			   == vect_double_reduction_def))
-+		      && !vectorizable_lc_phi (stmt_info, NULL, NULL))
-+		    return opt_result::failure_at (phi, "unsupported phi\n");
-                 }
- 
-               continue;
-@@ -1564,18 +1577,19 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
- 		ok = vectorizable_induction (stmt_info, NULL, NULL, NULL,
- 					     &cost_vec);
- 	      else if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
-+			|| (STMT_VINFO_DEF_TYPE (stmt_info)
-+			    == vect_double_reduction_def)
- 			|| STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle)
- 		       && ! PURE_SLP_STMT (stmt_info))
--		ok = vectorizable_reduction (stmt_info, NULL, NULL, NULL, NULL,
--					     &cost_vec);
-+		ok = vectorizable_reduction (stmt_info, NULL, NULL, &cost_vec);
-             }
- 
- 	  /* SLP PHIs are tested by vect_slp_analyze_node_operations.  */
- 	  if (ok
- 	      && STMT_VINFO_LIVE_P (stmt_info)
- 	      && !PURE_SLP_STMT (stmt_info))
--	    ok = vectorizable_live_operation (stmt_info, NULL, NULL, -1, NULL,
--					      &cost_vec);
-+	    ok = vectorizable_live_operation (stmt_info, NULL, NULL, NULL,
-+					      -1, NULL, &cost_vec);
- 
-           if (!ok)
- 	    return opt_result::failure_at (phi,
-@@ -1692,9 +1706,20 @@ vect_analyze_loop_costing (loop_vec_info loop_vinfo)
-       return 0;
-     }
- 
--  HOST_WIDE_INT estimated_niter = estimated_stmt_executions_int (loop);
--  if (estimated_niter == -1)
--    estimated_niter = likely_max_stmt_executions_int (loop);
-+  HOST_WIDE_INT estimated_niter;
-+
-+  /* If we are vectorizing an epilogue then we know the maximum number of
-+     scalar iterations it will cover is at least one lower than the
-+     vectorization factor of the main loop.  */
-+  if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
-+    estimated_niter
-+      = vect_vf_for_cost (LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)) - 1;
-+  else
-+    {
-+      estimated_niter = estimated_stmt_executions_int (loop);
-+      if (estimated_niter == -1)
-+	estimated_niter = likely_max_stmt_executions_int (loop);
-+    }
-   if (estimated_niter != -1
-       && ((unsigned HOST_WIDE_INT) estimated_niter
- 	  < MAX (th, (unsigned) min_profitable_estimate)))
-@@ -1774,6 +1799,101 @@ vect_get_datarefs_in_loop (loop_p loop, basic_block *bbs,
-   return opt_result::success ();
- }
- 
-+/* Look for SLP-only access groups and turn each individual access into its own
-+   group.  */
-+static void
-+vect_dissolve_slp_only_groups (loop_vec_info loop_vinfo)
-+{
-+  unsigned int i;
-+  struct data_reference *dr;
-+
-+  DUMP_VECT_SCOPE ("vect_dissolve_slp_only_groups");
-+
-+  vec datarefs = loop_vinfo->shared->datarefs;
-+  FOR_EACH_VEC_ELT (datarefs, i, dr)
-+    {
-+      gcc_assert (DR_REF (dr));
-+      stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (DR_STMT (dr));
-+
-+      /* Check if the load is a part of an interleaving chain.  */
-+      if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
-+	{
-+	  stmt_vec_info first_element = DR_GROUP_FIRST_ELEMENT (stmt_info);
-+	  unsigned int group_size = DR_GROUP_SIZE (first_element);
-+
-+	  /* Check if SLP-only groups.  */
-+	  if (!STMT_SLP_TYPE (stmt_info)
-+	      && STMT_VINFO_SLP_VECT_ONLY (first_element))
-+	    {
-+	      /* Dissolve the group.  */
-+	      STMT_VINFO_SLP_VECT_ONLY (first_element) = false;
-+
-+	      stmt_vec_info vinfo = first_element;
-+	      while (vinfo)
-+		{
-+		  stmt_vec_info next = DR_GROUP_NEXT_ELEMENT (vinfo);
-+		  DR_GROUP_FIRST_ELEMENT (vinfo) = vinfo;
-+		  DR_GROUP_NEXT_ELEMENT (vinfo) = NULL;
-+		  DR_GROUP_SIZE (vinfo) = 1;
-+		  DR_GROUP_GAP (vinfo) = group_size - 1;
-+		  vinfo = next;
-+		}
-+	    }
-+	}
-+    }
-+}
-+
-+
-+/* Decides whether we need to create an epilogue loop to handle
-+   remaining scalar iterations and sets PEELING_FOR_NITERS accordingly.  */
-+
-+void
-+determine_peel_for_niter (loop_vec_info loop_vinfo)
-+{
-+  LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false;
-+
-+  unsigned HOST_WIDE_INT const_vf;
-+  HOST_WIDE_INT max_niter
-+    = likely_max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo));
-+
-+  unsigned th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
-+  if (!th && LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo))
-+    th = LOOP_VINFO_COST_MODEL_THRESHOLD (LOOP_VINFO_ORIG_LOOP_INFO
-+					  (loop_vinfo));
-+
-+  if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
-+    /* The main loop handles all iterations.  */
-+    LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false;
-+  else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
-+	   && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0)
-+    {
-+      /* Work out the (constant) number of iterations that need to be
-+	 peeled for reasons other than niters.  */
-+      unsigned int peel_niter = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
-+      if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
-+	peel_niter += 1;
-+      if (!multiple_p (LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_niter,
-+		       LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
-+	LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
-+    }
-+  else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
-+	   /* ??? When peeling for gaps but not alignment, we could
-+	      try to check whether the (variable) niters is known to be
-+	      VF * N + 1.  That's something of a niche case though.  */
-+	   || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
-+	   || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&const_vf)
-+	   || ((tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
-+		< (unsigned) exact_log2 (const_vf))
-+	       /* In case of versioning, check if the maximum number of
-+		  iterations is greater than th.  If they are identical,
-+		  the epilogue is unnecessary.  */
-+	       && (!LOOP_REQUIRES_VERSIONING (loop_vinfo)
-+		   || ((unsigned HOST_WIDE_INT) max_niter
-+		       > (th / const_vf) * const_vf))))
-+    LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
-+}
-+
-+
- /* Function vect_analyze_loop_2.
- 
-    Apply a set of analyses on LOOP, and create a loop_vec_info struct
-@@ -1786,6 +1906,15 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, unsigned *n_stmts)
-   int res;
-   unsigned int max_vf = MAX_VECTORIZATION_FACTOR;
-   poly_uint64 min_vf = 2;
-+  loop_vec_info orig_loop_vinfo = NULL;
-+
-+  /* If we are dealing with an epilogue then orig_loop_vinfo points to the
-+     loop_vec_info of the first vectorized loop.  */
-+  if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
-+    orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo);
-+  else
-+    orig_loop_vinfo = loop_vinfo;
-+  gcc_assert (orig_loop_vinfo);
- 
-   /* The first group of checks is independent of the vector size.  */
-   fatal = true;
-@@ -1824,7 +1953,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, unsigned *n_stmts)
-   /* Analyze the data references and also adjust the minimal
-      vectorization factor according to the loads and stores.  */
- 
--  ok = vect_analyze_data_refs (loop_vinfo, &min_vf);
-+  ok = vect_analyze_data_refs (loop_vinfo, &min_vf, &fatal);
-   if (!ok)
-     {
-       if (dump_enabled_p ())
-@@ -1855,7 +1984,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, unsigned *n_stmts)
- 
-   /* Data-flow analysis to detect stmts that do not need to be vectorized.  */
- 
--  ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
-+  ok = vect_mark_stmts_to_be_vectorized (loop_vinfo, &fatal);
-   if (!ok)
-     {
-       if (dump_enabled_p ())
-@@ -1901,7 +2030,6 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, unsigned *n_stmts)
-   vect_compute_single_scalar_iteration_cost (loop_vinfo);
- 
-   poly_uint64 saved_vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
--  unsigned th;
- 
-   /* Check the SLP opportunities in the loop, analyze and build SLP trees.  */
-   ok = vect_analyze_slp (loop_vinfo, *n_stmts);
-@@ -1941,9 +2069,6 @@ start_over:
- 		   LOOP_VINFO_INT_NITERS (loop_vinfo));
-     }
- 
--  HOST_WIDE_INT max_niter
--    = likely_max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo));
--
-   /* Analyze the alignment of the data-refs in the loop.
-      Fail if a data reference is found that cannot be vectorized.  */
- 
-@@ -1990,6 +2115,9 @@ start_over:
- 	}
-     }
- 
-+  /* Dissolve SLP-only groups.  */
-+  vect_dissolve_slp_only_groups (loop_vinfo);
-+
-   /* Scan all the remaining operations in the loop that are not subject
-      to SLP and make sure they are vectorizable.  */
-   ok = vect_analyze_loop_operations (loop_vinfo);
-@@ -2032,6 +2160,16 @@ start_over:
- 				       " support peeling for gaps.\n");
-     }
- 
-+  /* If we're vectorizing an epilogue loop, we either need a fully-masked
-+     loop or a loop that has a lower VF than the main loop.  */
-+  if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)
-+      && !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
-+      && maybe_ge (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
-+		   LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo)))
-+    return opt_result::failure_at (vect_location,
-+				   "Vectorization factor too high for"
-+				   " epilogue loop.\n");
-+
-   /* Check the costings of the loop make vectorizing worthwhile.  */
-   res = vect_analyze_loop_costing (loop_vinfo);
-   if (res < 0)
-@@ -2044,42 +2182,7 @@ start_over:
-     return opt_result::failure_at (vect_location,
- 				   "Loop costings not worthwhile.\n");
- 
--  /* Decide whether we need to create an epilogue loop to handle
--     remaining scalar iterations.  */
--  th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
--
--  unsigned HOST_WIDE_INT const_vf;
--  if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
--    /* The main loop handles all iterations.  */
--    LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false;
--  else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
--	   && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0)
--    {
--      /* Work out the (constant) number of iterations that need to be
--	 peeled for reasons other than niters.  */
--      unsigned int peel_niter = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
--      if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
--	peel_niter += 1;
--      if (!multiple_p (LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_niter,
--		       LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
--	LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
--    }
--  else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
--	   /* ??? When peeling for gaps but not alignment, we could
--	      try to check whether the (variable) niters is known to be
--	      VF * N + 1.  That's something of a niche case though.  */
--	   || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
--	   || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&const_vf)
--	   || ((tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
--		< (unsigned) exact_log2 (const_vf))
--	       /* In case of versioning, check if the maximum number of
--		  iterations is greater than th.  If they are identical,
--		  the epilogue is unnecessary.  */
--	       && (!LOOP_REQUIRES_VERSIONING (loop_vinfo)
--		   || ((unsigned HOST_WIDE_INT) max_niter
--		       > (th / const_vf) * const_vf))))
--    LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
--
-+  determine_peel_for_niter (loop_vinfo);
-   /* If an epilogue loop is required make sure we can create one.  */
-   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
-       || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))
-@@ -2101,10 +2204,21 @@ start_over:
-   /* During peeling, we need to check if number of loop iterations is
-      enough for both peeled prolog loop and vector loop.  This check
-      can be merged along with threshold check of loop versioning, so
--     increase threshold for this case if necessary.  */
--  if (LOOP_REQUIRES_VERSIONING (loop_vinfo))
-+     increase threshold for this case if necessary.
-+
-+     If we are analyzing an epilogue we still want to check what its
-+     versioning threshold would be.  If we decide to vectorize the epilogues we
-+     will want to use the lowest versioning threshold of all epilogues and main
-+     loop.  This will enable us to enter a vectorized epilogue even when
-+     versioning the loop.  We can't simply check whether the epilogue requires
-+     versioning though since we may have skipped some versioning checks when
-+     analyzing the epilogue.  For instance, checks for alias versioning will be
-+     skipped when dealing with epilogues as we assume we already checked them
-+     for the main loop.  So instead we always check the 'orig_loop_vinfo'.  */
-+  if (LOOP_REQUIRES_VERSIONING (orig_loop_vinfo))
-     {
-       poly_uint64 niters_th = 0;
-+      unsigned int th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
- 
-       if (!vect_use_loop_mask_for_alignment_p (loop_vinfo))
- 	{
-@@ -2125,6 +2239,14 @@ start_over:
-       /* One additional iteration because of peeling for gap.  */
-       if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
- 	niters_th += 1;
-+
-+      /*  Use the same condition as vect_transform_loop to decide when to use
-+	  the cost to determine a versioning threshold.  */
-+      if (th >= vect_vf_for_cost (loop_vinfo)
-+	  && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
-+	  && ordered_p (th, niters_th))
-+	niters_th = ordered_max (poly_uint64 (th), niters_th);
-+
-       LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo) = niters_th;
-     }
- 
-@@ -2240,22 +2362,95 @@ again:
-   goto start_over;
- }
- 
-+/* Return true if vectorizing a loop using NEW_LOOP_VINFO appears
-+   to be better than vectorizing it using OLD_LOOP_VINFO.  Assume that
-+   OLD_LOOP_VINFO is better unless something specifically indicates
-+   otherwise.
-+
-+   Note that this deliberately isn't a partial order.  */
-+
-+static bool
-+vect_better_loop_vinfo_p (loop_vec_info new_loop_vinfo,
-+			  loop_vec_info old_loop_vinfo)
-+{
-+  struct loop *loop = LOOP_VINFO_LOOP (new_loop_vinfo);
-+  gcc_assert (LOOP_VINFO_LOOP (old_loop_vinfo) == loop);
-+
-+  poly_int64 new_vf = LOOP_VINFO_VECT_FACTOR (new_loop_vinfo);
-+  poly_int64 old_vf = LOOP_VINFO_VECT_FACTOR (old_loop_vinfo);
-+
-+  /* Always prefer a VF of loop->simdlen over any other VF.  */
-+  if (loop->simdlen)
-+    {
-+      bool new_simdlen_p = known_eq (new_vf, loop->simdlen);
-+      bool old_simdlen_p = known_eq (old_vf, loop->simdlen);
-+      if (new_simdlen_p != old_simdlen_p)
-+	return new_simdlen_p;
-+    }
-+
-+  /* Limit the VFs to what is likely to be the maximum number of iterations,
-+     to handle cases in which at least one loop_vinfo is fully-masked.  */
-+  HOST_WIDE_INT estimated_max_niter = likely_max_stmt_executions_int (loop);
-+  if (estimated_max_niter != -1)
-+    {
-+      if (known_le (estimated_max_niter, new_vf))
-+	new_vf = estimated_max_niter;
-+      if (known_le (estimated_max_niter, old_vf))
-+	old_vf = estimated_max_niter;
-+    }
-+
-+  /* Check whether the (fractional) cost per scalar iteration is lower
-+     or higher: new_inside_cost / new_vf vs. old_inside_cost / old_vf.  */
-+  poly_widest_int rel_new = (new_loop_vinfo->vec_inside_cost
-+			     * poly_widest_int (old_vf));
-+  poly_widest_int rel_old = (old_loop_vinfo->vec_inside_cost
-+			     * poly_widest_int (new_vf));
-+  if (maybe_lt (rel_old, rel_new))
-+    return false;
-+  if (known_lt (rel_new, rel_old))
-+    return true;
-+
-+  /* If there's nothing to choose between the loop bodies, see whether
-+     there's a difference in the prologue and epilogue costs.  */
-+  if (new_loop_vinfo->vec_outside_cost != old_loop_vinfo->vec_outside_cost)
-+    return new_loop_vinfo->vec_outside_cost < old_loop_vinfo->vec_outside_cost;
-+
-+  return false;
-+}
-+
-+/* Decide whether to replace OLD_LOOP_VINFO with NEW_LOOP_VINFO.  Return
-+   true if we should.  */
-+
-+static bool
-+vect_joust_loop_vinfos (loop_vec_info new_loop_vinfo,
-+			loop_vec_info old_loop_vinfo)
-+{
-+  if (!vect_better_loop_vinfo_p (new_loop_vinfo, old_loop_vinfo))
-+    return false;
-+
-+  if (dump_enabled_p ())
-+    dump_printf_loc (MSG_NOTE, vect_location,
-+		     "***** Preferring vector mode %s to vector mode %s\n",
-+		     GET_MODE_NAME (new_loop_vinfo->vector_mode),
-+		     GET_MODE_NAME (old_loop_vinfo->vector_mode));
-+  return true;
-+}
-+
- /* Function vect_analyze_loop.
- 
-    Apply a set of analyses on LOOP, and create a loop_vec_info struct
-    for it.  The different analyses will record information in the
--   loop_vec_info struct.  If ORIG_LOOP_VINFO is not NULL epilogue must
--   be vectorized.  */
-+   loop_vec_info struct.  */
- opt_loop_vec_info
--vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo,
--		   vec_info_shared *shared)
-+vect_analyze_loop (struct loop *loop, vec_info_shared *shared)
- {
--  auto_vector_sizes vector_sizes;
-+  auto_vector_modes vector_modes;
- 
-   /* Autodetect first vector size we try.  */
--  current_vector_size = 0;
--  targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
--  unsigned int next_size = 0;
-+  unsigned int autovec_flags
-+    = targetm.vectorize.autovectorize_vector_modes (&vector_modes,
-+						    loop->simdlen != 0);
-+  unsigned int mode_i = 0;
- 
-   DUMP_VECT_SCOPE ("analyze_loop_nest");
- 
-@@ -2272,58 +2467,221 @@ vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo,
-        " loops cannot be vectorized\n");
- 
-   unsigned n_stmts = 0;
--  poly_uint64 autodetected_vector_size = 0;
-+  machine_mode autodetected_vector_mode = VOIDmode;
-+  opt_loop_vec_info first_loop_vinfo = opt_loop_vec_info::success (NULL);
-+  machine_mode next_vector_mode = VOIDmode;
-+  poly_uint64 lowest_th = 0;
-+  unsigned vectorized_loops = 0;
-+  bool pick_lowest_cost_p = ((autovec_flags & VECT_COMPARE_COSTS)
-+			     && !unlimited_cost_model (loop));
-+
-+  bool vect_epilogues = false;
-+  opt_result res = opt_result::success ();
-+  unsigned HOST_WIDE_INT simdlen = loop->simdlen;
-   while (1)
-     {
-       /* Check the CFG characteristics of the loop (nesting, entry/exit).  */
--      opt_loop_vec_info loop_vinfo
--	= vect_analyze_loop_form (loop, shared);
-+      opt_loop_vec_info loop_vinfo = vect_analyze_loop_form (loop, shared);
-       if (!loop_vinfo)
- 	{
- 	  if (dump_enabled_p ())
- 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- 			     "bad loop form.\n");
-+	  gcc_checking_assert (first_loop_vinfo == NULL);
- 	  return loop_vinfo;
- 	}
-+      loop_vinfo->vector_mode = next_vector_mode;
- 
-       bool fatal = false;
- 
--      if (orig_loop_vinfo)
--	LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = orig_loop_vinfo;
-+      /* When pick_lowest_cost_p is true, we should in principle iterate
-+	 over all the loop_vec_infos that LOOP_VINFO could replace and
-+	 try to vectorize LOOP_VINFO under the same conditions.
-+	 E.g. when trying to replace an epilogue loop, we should vectorize
-+	 LOOP_VINFO as an epilogue loop with the same VF limit.  When trying
-+	 to replace the main loop, we should vectorize LOOP_VINFO as a main
-+	 loop too.
-+
-+	 However, autovectorize_vector_modes is usually sorted as follows:
-+
-+	 - Modes that naturally produce lower VFs usually follow modes that
-+	   naturally produce higher VFs.
-+
-+	 - When modes naturally produce the same VF, maskable modes
-+	   usually follow unmaskable ones, so that the maskable mode
-+	   can be used to vectorize the epilogue of the unmaskable mode.
-+
-+	 This order is preferred because it leads to the maximum
-+	 epilogue vectorization opportunities.  Targets should only use
-+	 a different order if they want to make wide modes available while
-+	 disparaging them relative to earlier, smaller modes.  The assumption
-+	 in that case is that the wider modes are more expensive in some
-+	 way that isn't reflected directly in the costs.
-+
-+	 There should therefore be few interesting cases in which
-+	 LOOP_VINFO fails when treated as an epilogue loop, succeeds when
-+	 treated as a standalone loop, and ends up being genuinely cheaper
-+	 than FIRST_LOOP_VINFO.  */
-+      if (vect_epilogues)
-+	LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = first_loop_vinfo;
-+
-+      res = vect_analyze_loop_2 (loop_vinfo, fatal, &n_stmts);
-+      if (mode_i == 0)
-+	autodetected_vector_mode = loop_vinfo->vector_mode;
-+      if (dump_enabled_p ())
-+	{
-+	  if (res)
-+	    dump_printf_loc (MSG_NOTE, vect_location,
-+			     "***** Analysis succeeded with vector mode %s\n",
-+			     GET_MODE_NAME (loop_vinfo->vector_mode));
-+	  else
-+	    dump_printf_loc (MSG_NOTE, vect_location,
-+			     "***** Analysis failed with vector mode %s\n",
-+			     GET_MODE_NAME (loop_vinfo->vector_mode));
-+	}
-+
-+      loop->aux = NULL;
-+
-+      if (!fatal)
-+	while (mode_i < vector_modes.length ()
-+	       && vect_chooses_same_modes_p (loop_vinfo, vector_modes[mode_i]))
-+	  {
-+	    if (dump_enabled_p ())
-+	      dump_printf_loc (MSG_NOTE, vect_location,
-+			       "***** The result for vector mode %s would"
-+			       " be the same\n",
-+			       GET_MODE_NAME (vector_modes[mode_i]));
-+	    mode_i += 1;
-+	  }
- 
--      opt_result res = vect_analyze_loop_2 (loop_vinfo, fatal, &n_stmts);
-       if (res)
- 	{
- 	  LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
-+	  vectorized_loops++;
- 
--	  return loop_vinfo;
--	}
--
--      delete loop_vinfo;
-+	  /* Once we hit the desired simdlen for the first time,
-+	     discard any previous attempts.  */
-+	  if (simdlen
-+	      && known_eq (LOOP_VINFO_VECT_FACTOR (loop_vinfo), simdlen))
-+	    {
-+	      delete first_loop_vinfo;
-+	      first_loop_vinfo = opt_loop_vec_info::success (NULL);
-+	      LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = NULL;
-+	      simdlen = 0;
-+	    }
-+	  else if (pick_lowest_cost_p && first_loop_vinfo)
-+	    {
-+	      /* Keep trying to roll back vectorization attempts while the
-+		 loop_vec_infos they produced were worse than this one.  */
-+	      vec &vinfos = first_loop_vinfo->epilogue_vinfos;
-+	      while (!vinfos.is_empty ()
-+		     && vect_joust_loop_vinfos (loop_vinfo, vinfos.last ()))
-+		{
-+		  gcc_assert (vect_epilogues);
-+		  delete vinfos.pop ();
-+		}
-+	      if (vinfos.is_empty ()
-+		  && vect_joust_loop_vinfos (loop_vinfo, first_loop_vinfo))
-+		{
-+		  delete first_loop_vinfo;
-+		  first_loop_vinfo = opt_loop_vec_info::success (NULL);
-+		  LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = NULL;
-+		}
-+	    }
- 
--      if (next_size == 0)
--	autodetected_vector_size = current_vector_size;
-+	  if (first_loop_vinfo == NULL)
-+	    {
-+	      first_loop_vinfo = loop_vinfo;
-+	      lowest_th = LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo);
-+	    }
-+	  else if (vect_epilogues
-+		   /* For now only allow one epilogue loop.  */
-+		   && first_loop_vinfo->epilogue_vinfos.is_empty ())
-+	    {
-+	      first_loop_vinfo->epilogue_vinfos.safe_push (loop_vinfo);
-+	      poly_uint64 th = LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo);
-+	      gcc_assert (!LOOP_REQUIRES_VERSIONING (loop_vinfo)
-+			  || maybe_ne (lowest_th, 0U));
-+	      /* Keep track of the known smallest versioning
-+		 threshold.  */
-+	      if (ordered_p (lowest_th, th))
-+		lowest_th = ordered_min (lowest_th, th);
-+	    }
-+	  else
-+	    delete loop_vinfo;
-+
-+	  /* Only vectorize epilogues if PARAM_VECT_EPILOGUES_NOMASK is
-+	     enabled, SIMDUID is not set, it is the innermost loop and we have
-+	     either already found the loop's SIMDLEN or there was no SIMDLEN to
-+	     begin with.
-+	     TODO: Enable epilogue vectorization for loops with SIMDUID set.  */
-+	  vect_epilogues = (!simdlen
-+			    && loop->inner == NULL
-+			    && PARAM_VALUE (PARAM_VECT_EPILOGUES_NOMASK)
-+			    && LOOP_VINFO_PEELING_FOR_NITER (first_loop_vinfo)
-+			    && !loop->simduid
-+			    /* For now only allow one epilogue loop, but allow
-+			       pick_lowest_cost_p to replace it.  */
-+			    && (first_loop_vinfo->epilogue_vinfos.is_empty ()
-+				|| pick_lowest_cost_p));
-+
-+	  /* Commit to first_loop_vinfo if we have no reason to try
-+	     alternatives.  */
-+	  if (!simdlen && !vect_epilogues && !pick_lowest_cost_p)
-+	    break;
-+	}
-+      else
-+	{
-+	  delete loop_vinfo;
-+	  if (fatal)
-+	    {
-+	      gcc_checking_assert (first_loop_vinfo == NULL);
-+	      break;
-+	    }
-+	}
- 
--      if (next_size < vector_sizes.length ()
--	  && known_eq (vector_sizes[next_size], autodetected_vector_size))
--	next_size += 1;
-+      if (mode_i < vector_modes.length ()
-+	  && VECTOR_MODE_P (autodetected_vector_mode)
-+	  && (related_vector_mode (vector_modes[mode_i],
-+				   GET_MODE_INNER (autodetected_vector_mode))
-+	      == autodetected_vector_mode)
-+	  && (related_vector_mode (autodetected_vector_mode,
-+				   GET_MODE_INNER (vector_modes[mode_i]))
-+	      == vector_modes[mode_i]))
-+	{
-+	  if (dump_enabled_p ())
-+	    dump_printf_loc (MSG_NOTE, vect_location,
-+			     "***** Skipping vector mode %s, which would"
-+			     " repeat the analysis for %s\n",
-+			     GET_MODE_NAME (vector_modes[mode_i]),
-+			     GET_MODE_NAME (autodetected_vector_mode));
-+	  mode_i += 1;
-+	}
- 
--      if (fatal
--	  || next_size == vector_sizes.length ()
--	  || known_eq (current_vector_size, 0U))
--	return opt_loop_vec_info::propagate_failure (res);
-+      if (mode_i == vector_modes.length ()
-+	  || autodetected_vector_mode == VOIDmode)
-+	break;
- 
-       /* Try the next biggest vector size.  */
--      current_vector_size = vector_sizes[next_size++];
-+      next_vector_mode = vector_modes[mode_i++];
-       if (dump_enabled_p ())
--	{
--	  dump_printf_loc (MSG_NOTE, vect_location,
--			   "***** Re-trying analysis with "
--			   "vector size ");
--	  dump_dec (MSG_NOTE, current_vector_size);
--	  dump_printf (MSG_NOTE, "\n");
--	}
-+	dump_printf_loc (MSG_NOTE, vect_location,
-+			 "***** Re-trying analysis with vector mode %s\n",
-+			 GET_MODE_NAME (next_vector_mode));
-+    }
-+
-+  if (first_loop_vinfo)
-+    {
-+      loop->aux = (loop_vec_info) first_loop_vinfo;
-+      if (dump_enabled_p ())
-+	dump_printf_loc (MSG_NOTE, vect_location,
-+			 "***** Choosing vector mode %s\n",
-+			 GET_MODE_NAME (first_loop_vinfo->vector_mode));
-+      LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo) = lowest_th;
-+      return first_loop_vinfo;
-     }
-+
-+  return opt_loop_vec_info::propagate_failure (res);
- }
- 
- /* Return true if there is an in-order reduction function for CODE, storing
-@@ -2397,17 +2755,17 @@ reduction_fn_for_scalar_code (enum tree_code code, internal_fn *reduc_fn)
- 
- /* If there is a neutral value X such that SLP reduction NODE would not
-    be affected by the introduction of additional X elements, return that X,
--   otherwise return null.  CODE is the code of the reduction.  REDUC_CHAIN
--   is true if the SLP statements perform a single reduction, false if each
--   statement performs an independent reduction.  */
-+   otherwise return null.  CODE is the code of the reduction and VECTOR_TYPE
-+   is the vector type that would hold element X.  REDUC_CHAIN is true if
-+   the SLP statements perform a single reduction, false if each statement
-+   performs an independent reduction.  */
- 
- static tree
--neutral_op_for_slp_reduction (slp_tree slp_node, tree_code code,
--			      bool reduc_chain)
-+neutral_op_for_slp_reduction (slp_tree slp_node, tree vector_type,
-+			      tree_code code, bool reduc_chain)
- {
-   vec stmts = SLP_TREE_SCALAR_STMTS (slp_node);
-   stmt_vec_info stmt_vinfo = stmts[0];
--  tree vector_type = STMT_VINFO_VECTYPE (stmt_vinfo);
-   tree scalar_type = TREE_TYPE (vector_type);
-   struct loop *loop = gimple_bb (stmt_vinfo->stmt)->loop_father;
-   gcc_assert (loop);
-@@ -2453,241 +2811,55 @@ report_vect_op (dump_flags_t msg_type, gimple *stmt, const char *msg)
-   dump_printf_loc (msg_type, vect_location, "%s%G", msg, stmt);
- }
- 
--/* DEF_STMT_INFO occurs in a loop that contains a potential reduction
--   operation.  Return true if the results of DEF_STMT_INFO are something
--   that can be accumulated by such a reduction.  */
-+/* Return true if we need an in-order reduction for operation CODE
-+   on type TYPE.  NEED_WRAPPING_INTEGRAL_OVERFLOW is true if integer
-+   overflow must wrap.  */
- 
--static bool
--vect_valid_reduction_input_p (stmt_vec_info def_stmt_info)
-+bool
-+needs_fold_left_reduction_p (tree type, tree_code code)
- {
--  return (is_gimple_assign (def_stmt_info->stmt)
--	  || is_gimple_call (def_stmt_info->stmt)
--	  || STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_induction_def
--	  || (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI
--	      && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def
--	      && !is_loop_header_bb_p (gimple_bb (def_stmt_info->stmt))));
--}
-+  /* CHECKME: check for !flag_finite_math_only too?  */
-+  if (SCALAR_FLOAT_TYPE_P (type))
-+    switch (code)
-+      {
-+      case MIN_EXPR:
-+      case MAX_EXPR:
-+	return false;
- 
--/* Detect SLP reduction of the form:
-+      default:
-+	return !flag_associative_math;
-+      }
- 
--   #a1 = phi 
--   a2 = operation (a1)
--   a3 = operation (a2)
--   a4 = operation (a3)
--   a5 = operation (a4)
-+  if (INTEGRAL_TYPE_P (type))
-+    {
-+      if (!operation_no_trapping_overflow (type, code))
-+	return true;
-+      return false;
-+    }
- 
--   #a = phi 
-+  if (SAT_FIXED_POINT_TYPE_P (type))
-+    return true;
- 
--   PHI is the reduction phi node (#a1 = phi  above)
--   FIRST_STMT is the first reduction stmt in the chain
--   (a2 = operation (a1)).
-+  return false;
-+}
- 
--   Return TRUE if a reduction chain was detected.  */
-+/* Return true if the reduction PHI in LOOP with latch arg LOOP_ARG and
-+   has a handled computation expression.  Store the main reduction
-+   operation in *CODE.  */
- 
- static bool
--vect_is_slp_reduction (loop_vec_info loop_info, gimple *phi,
--		       gimple *first_stmt)
-+check_reduction_path (dump_user_location_t loc, loop_p loop, gphi *phi,
-+		      tree loop_arg, enum tree_code *code,
-+		      vec > &path)
- {
--  struct loop *loop = (gimple_bb (phi))->loop_father;
--  struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
--  enum tree_code code;
--  gimple *loop_use_stmt = NULL;
--  stmt_vec_info use_stmt_info;
--  tree lhs;
--  imm_use_iterator imm_iter;
--  use_operand_p use_p;
--  int nloop_uses, size = 0, n_out_of_loop_uses;
--  bool found = false;
--
--  if (loop != vect_loop)
--    return false;
--
--  auto_vec reduc_chain;
--  lhs = PHI_RESULT (phi);
--  code = gimple_assign_rhs_code (first_stmt);
--  while (1)
--    {
--      nloop_uses = 0;
--      n_out_of_loop_uses = 0;
--      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
--        {
--	  gimple *use_stmt = USE_STMT (use_p);
--	  if (is_gimple_debug (use_stmt))
--	    continue;
--
--          /* Check if we got back to the reduction phi.  */
--	  if (use_stmt == phi)
--            {
--	      loop_use_stmt = use_stmt;
--              found = true;
--              break;
--            }
--
--          if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
--            {
--	      loop_use_stmt = use_stmt;
--	      nloop_uses++;
--            }
--           else
--             n_out_of_loop_uses++;
--
--           /* There are can be either a single use in the loop or two uses in
--              phi nodes.  */
--           if (nloop_uses > 1 || (n_out_of_loop_uses && nloop_uses))
--             return false;
--        }
--
--      if (found)
--        break;
--
--      /* We reached a statement with no loop uses.  */
--      if (nloop_uses == 0)
--	return false;
--
--      /* This is a loop exit phi, and we haven't reached the reduction phi.  */
--      if (gimple_code (loop_use_stmt) == GIMPLE_PHI)
--        return false;
--
--      if (!is_gimple_assign (loop_use_stmt)
--	  || code != gimple_assign_rhs_code (loop_use_stmt)
--	  || !flow_bb_inside_loop_p (loop, gimple_bb (loop_use_stmt)))
--        return false;
--
--      /* Insert USE_STMT into reduction chain.  */
--      use_stmt_info = loop_info->lookup_stmt (loop_use_stmt);
--      reduc_chain.safe_push (use_stmt_info);
--
--      lhs = gimple_assign_lhs (loop_use_stmt);
--      size++;
--   }
--
--  if (!found || loop_use_stmt != phi || size < 2)
--    return false;
--
--  /* Swap the operands, if needed, to make the reduction operand be the second
--     operand.  */
--  lhs = PHI_RESULT (phi);
--  for (unsigned i = 0; i < reduc_chain.length (); ++i)
--    {
--      gassign *next_stmt = as_a  (reduc_chain[i]->stmt);
--      if (gimple_assign_rhs2 (next_stmt) == lhs)
--	{
--	  tree op = gimple_assign_rhs1 (next_stmt);
--	  stmt_vec_info def_stmt_info = loop_info->lookup_def (op);
--
--	  /* Check that the other def is either defined in the loop
--	     ("vect_internal_def"), or it's an induction (defined by a
--	     loop-header phi-node).  */
--	  if (def_stmt_info
--	      && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt))
--	      && vect_valid_reduction_input_p (def_stmt_info))
--	    {
--	      lhs = gimple_assign_lhs (next_stmt);
-- 	      continue;
--	    }
--
--	  return false;
--	}
--      else
--	{
--          tree op = gimple_assign_rhs2 (next_stmt);
--	  stmt_vec_info def_stmt_info = loop_info->lookup_def (op);
--
--          /* Check that the other def is either defined in the loop
--            ("vect_internal_def"), or it's an induction (defined by a
--            loop-header phi-node).  */
--	  if (def_stmt_info
--	      && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt))
--	      && vect_valid_reduction_input_p (def_stmt_info))
--  	    {
--	      if (dump_enabled_p ())
--		dump_printf_loc (MSG_NOTE, vect_location, "swapping oprnds: %G",
--				 next_stmt);
--
--	      swap_ssa_operands (next_stmt,
--	 		         gimple_assign_rhs1_ptr (next_stmt),
--                                 gimple_assign_rhs2_ptr (next_stmt));
--	      update_stmt (next_stmt);
--
--	      if (CONSTANT_CLASS_P (gimple_assign_rhs1 (next_stmt)))
--		LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
--	    }
--	  else
--	    return false;
--        }
--
--      lhs = gimple_assign_lhs (next_stmt);
--    }
--
--  /* Build up the actual chain.  */
--  for (unsigned i = 0; i < reduc_chain.length () - 1; ++i)
--    {
--      REDUC_GROUP_FIRST_ELEMENT (reduc_chain[i]) = reduc_chain[0];
--      REDUC_GROUP_NEXT_ELEMENT (reduc_chain[i]) = reduc_chain[i+1];
--    }
--  REDUC_GROUP_FIRST_ELEMENT (reduc_chain.last ()) = reduc_chain[0];
--  REDUC_GROUP_NEXT_ELEMENT (reduc_chain.last ()) = NULL;
--
--  /* Save the chain for further analysis in SLP detection.  */
--  LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push (reduc_chain[0]);
--  REDUC_GROUP_SIZE (reduc_chain[0]) = size;
--
--  return true;
--}
--
--/* Return true if we need an in-order reduction for operation CODE
--   on type TYPE.  NEED_WRAPPING_INTEGRAL_OVERFLOW is true if integer
--   overflow must wrap.  */
--
--static bool
--needs_fold_left_reduction_p (tree type, tree_code code,
--			     bool need_wrapping_integral_overflow)
--{
--  /* CHECKME: check for !flag_finite_math_only too?  */
--  if (SCALAR_FLOAT_TYPE_P (type))
--    switch (code)
--      {
--      case MIN_EXPR:
--      case MAX_EXPR:
--	return false;
--
--      default:
--	return !flag_associative_math;
--      }
--
--  if (INTEGRAL_TYPE_P (type))
--    {
--      if (!operation_no_trapping_overflow (type, code))
--	return true;
--      if (need_wrapping_integral_overflow
--	  && !TYPE_OVERFLOW_WRAPS (type)
--	  && operation_can_overflow (code))
--	return true;
--      return false;
--    }
--
--  if (SAT_FIXED_POINT_TYPE_P (type))
--    return true;
--
--  return false;
--}
--
--/* Return true if the reduction PHI in LOOP with latch arg LOOP_ARG and
--   reduction operation CODE has a handled computation expression.  */
--
--bool
--check_reduction_path (dump_user_location_t loc, loop_p loop, gphi *phi,
--		      tree loop_arg, enum tree_code code)
--{
--  auto_vec > path;
--  auto_bitmap visited;
--  tree lookfor = PHI_RESULT (phi);
--  ssa_op_iter curri;
--  use_operand_p curr = op_iter_init_phiuse (&curri, phi, SSA_OP_USE);
--  while (USE_FROM_PTR (curr) != loop_arg)
--    curr = op_iter_next_use (&curri);
--  curri.i = curri.numops;
--  do
-+  auto_bitmap visited;
-+  tree lookfor = PHI_RESULT (phi);
-+  ssa_op_iter curri;
-+  use_operand_p curr = op_iter_init_phiuse (&curri, phi, SSA_OP_USE);
-+  while (USE_FROM_PTR (curr) != loop_arg)
-+    curr = op_iter_next_use (&curri);
-+  curri.i = curri.numops;
-+  do
-     {
-       path.safe_push (std::make_pair (curri, curr));
-       tree use = USE_FROM_PTR (curr);
-@@ -2747,36 +2919,71 @@ pop:
-   /* Check whether the reduction path detected is valid.  */
-   bool fail = path.length () == 0;
-   bool neg = false;
-+  *code = ERROR_MARK;
-   for (unsigned i = 1; i < path.length (); ++i)
-     {
-       gimple *use_stmt = USE_STMT (path[i].second);
-       tree op = USE_FROM_PTR (path[i].second);
--      if (! has_single_use (op)
--	  || ! is_gimple_assign (use_stmt))
-+      if (! is_gimple_assign (use_stmt)
-+	  /* The following make sure we can compute the operand index
-+	     easily plus it mostly disallows chaining via COND_EXPR condition
-+	     operands.  */
-+	  || (gimple_assign_rhs1 (use_stmt) != op
-+	      && gimple_assign_rhs2 (use_stmt) != op
-+	      && gimple_assign_rhs3 (use_stmt) != op))
- 	{
- 	  fail = true;
- 	  break;
- 	}
--      if (gimple_assign_rhs_code (use_stmt) != code)
-+      /* Check there's only a single stmt the op is used on inside
-+         of the loop.  */
-+      imm_use_iterator imm_iter;
-+      gimple *op_use_stmt;
-+      unsigned cnt = 0;
-+      FOR_EACH_IMM_USE_STMT (op_use_stmt, imm_iter, op)
-+	if (!is_gimple_debug (op_use_stmt)
-+	    && flow_bb_inside_loop_p (loop, gimple_bb (op_use_stmt)))
-+	  cnt++;
-+      if (cnt != 1)
- 	{
--	  if (code == PLUS_EXPR
--	      && gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
--	    {
--	      /* Track whether we negate the reduction value each iteration.  */
--	      if (gimple_assign_rhs2 (use_stmt) == op)
--		neg = ! neg;
--	    }
--	  else
--	    {
--	      fail = true;
--	      break;
--	    }
-+	  fail = true;
-+	  break;
-+	}
-+      tree_code use_code = gimple_assign_rhs_code (use_stmt);
-+      if (use_code == MINUS_EXPR)
-+	{
-+	  use_code = PLUS_EXPR;
-+	  /* Track whether we negate the reduction value each iteration.  */
-+	  if (gimple_assign_rhs2 (use_stmt) == op)
-+	    neg = ! neg;
-+	}
-+      if (CONVERT_EXPR_CODE_P (use_code)
-+	  && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (use_stmt)),
-+				    TREE_TYPE (gimple_assign_rhs1 (use_stmt))))
-+	;
-+      else if (*code == ERROR_MARK)
-+	*code = use_code;
-+      else if (use_code != *code)
-+	{
-+	  fail = true;
-+	  break;
- 	}
-     }
--  return ! fail && ! neg;
-+  return ! fail && ! neg && *code != ERROR_MARK;
-+}
-+
-+bool
-+check_reduction_path (dump_user_location_t loc, loop_p loop, gphi *phi,
-+		      tree loop_arg, enum tree_code code)
-+{
-+  auto_vec > path;
-+  enum tree_code code_;
-+  return (check_reduction_path (loc, loop, phi, loop_arg, &code_, path)
-+	  && code_ == code);
- }
- 
- 
-+
- /* Function vect_is_simple_reduction
- 
-    (1) Detect a cross-iteration def-use cycle that represents a simple
-@@ -2823,25 +3030,15 @@ pop:
- 
- static stmt_vec_info
- vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
--			  bool *double_reduc,
--			  bool need_wrapping_integral_overflow,
--			  enum vect_reduction_type *v_reduc_type)
-+			  bool *double_reduc)
- {
-   gphi *phi = as_a  (phi_info->stmt);
--  struct loop *loop = (gimple_bb (phi))->loop_father;
--  struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
--  bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop);
-   gimple *phi_use_stmt = NULL;
--  enum tree_code orig_code, code;
--  tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
--  tree type;
--  tree name;
-   imm_use_iterator imm_iter;
-   use_operand_p use_p;
--  bool phi_def;
- 
-   *double_reduc = false;
--  *v_reduc_type = TREE_CODE_REDUCTION;
-+  STMT_VINFO_REDUC_TYPE (phi_info) = TREE_CODE_REDUCTION;
- 
-   tree phi_name = PHI_RESULT (phi);
-   /* ???  If there are no uses of the PHI result the inner loop reduction
-@@ -2850,6 +3047,7 @@ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
-      can be constant.  See PR60382.  */
-   if (has_zero_uses (phi_name))
-     return NULL;
-+  class loop *loop = (gimple_bb (phi))->loop_father;
-   unsigned nphi_def_loop_uses = 0;
-   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, phi_name)
-     {
-@@ -2870,44 +3068,26 @@ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
-       phi_use_stmt = use_stmt;
-     }
- 
--  edge latch_e = loop_latch_edge (loop);
--  tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
--  if (TREE_CODE (loop_arg) != SSA_NAME)
-+  tree latch_def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
-+  if (TREE_CODE (latch_def) != SSA_NAME)
-     {
-       if (dump_enabled_p ())
- 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
--			 "reduction: not ssa_name: %T\n", loop_arg);
-+			 "reduction: not ssa_name: %T\n", latch_def);
-       return NULL;
-     }
- 
--  stmt_vec_info def_stmt_info = loop_info->lookup_def (loop_arg);
-+  stmt_vec_info def_stmt_info = loop_info->lookup_def (latch_def);
-   if (!def_stmt_info
-       || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt)))
-     return NULL;
- 
--  if (gassign *def_stmt = dyn_cast  (def_stmt_info->stmt))
--    {
--      name = gimple_assign_lhs (def_stmt);
--      phi_def = false;
--    }
--  else if (gphi *def_stmt = dyn_cast  (def_stmt_info->stmt))
--    {
--      name = PHI_RESULT (def_stmt);
--      phi_def = true;
--    }
--  else
--    {
--      if (dump_enabled_p ())
--	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
--			 "reduction: unhandled reduction operation: %G",
--			 def_stmt_info->stmt);
--      return NULL;
--    }
--
-+  bool nested_in_vect_loop
-+    = flow_loop_nested_p (LOOP_VINFO_LOOP (loop_info), loop);
-   unsigned nlatch_def_loop_uses = 0;
-   auto_vec lcphis;
-   bool inner_loop_of_double_reduc = false;
--  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
-+  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, latch_def)
-     {
-       gimple *use_stmt = USE_STMT (use_p);
-       if (is_gimple_debug (use_stmt))
-@@ -2925,11 +3105,21 @@ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
- 	}
-     }
- 
-+  /* If we are vectorizing an inner reduction we are executing that
-+     in the original order only in case we are not dealing with a
-+     double reduction.  */
-+  if (nested_in_vect_loop && !inner_loop_of_double_reduc)
-+    {
-+      if (dump_enabled_p ())
-+	report_vect_op (MSG_NOTE, def_stmt_info->stmt,
-+			"detected nested cycle: ");
-+      return def_stmt_info;
-+    }
-+
-   /* If this isn't a nested cycle or if the nested cycle reduction value
-      is used ouside of the inner loop we cannot handle uses of the reduction
-      value.  */
--  if ((!nested_in_vect_loop || inner_loop_of_double_reduc)
--      && (nlatch_def_loop_uses > 1 || nphi_def_loop_uses > 1))
-+  if (nlatch_def_loop_uses > 1 || nphi_def_loop_uses > 1)
-     {
-       if (dump_enabled_p ())
- 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-@@ -2939,11 +3129,9 @@ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
- 
-   /* If DEF_STMT is a phi node itself, we expect it to have a single argument
-      defined in the inner loop.  */
--  if (phi_def)
-+  if (gphi *def_stmt = dyn_cast  (def_stmt_info->stmt))
-     {
--      gphi *def_stmt = as_a  (def_stmt_info->stmt);
--      op1 = PHI_ARG_DEF (def_stmt, 0);
--
-+      tree op1 = PHI_ARG_DEF (def_stmt, 0);
-       if (gimple_phi_num_args (def_stmt) != 1
-           || TREE_CODE (op1) != SSA_NAME)
-         {
-@@ -2974,290 +3162,74 @@ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
-       return NULL;
-     }
- 
--  /* If we are vectorizing an inner reduction we are executing that
--     in the original order only in case we are not dealing with a
--     double reduction.  */
--  bool check_reduction = true;
--  if (flow_loop_nested_p (vect_loop, loop))
--    {
--      gphi *lcphi;
--      unsigned i;
--      check_reduction = false;
--      FOR_EACH_VEC_ELT (lcphis, i, lcphi)
--	FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result (lcphi))
--	  {
--	    gimple *use_stmt = USE_STMT (use_p);
--	    if (is_gimple_debug (use_stmt))
--	      continue;
--	    if (! flow_bb_inside_loop_p (vect_loop, gimple_bb (use_stmt)))
--	      check_reduction = true;
--	  }
--    }
--
--  gassign *def_stmt = as_a  (def_stmt_info->stmt);
--  code = orig_code = gimple_assign_rhs_code (def_stmt);
--
--  if (nested_in_vect_loop && !check_reduction)
--    {
--      /* FIXME: Even for non-reductions code generation is funneled
--	 through vectorizable_reduction for the stmt defining the
--	 PHI latch value.  So we have to artificially restrict ourselves
--	 for the supported operations.  */
--      switch (get_gimple_rhs_class (code))
--	{
--	case GIMPLE_BINARY_RHS:
--	case GIMPLE_TERNARY_RHS:
--	  break;
--	default:
--	  /* Not supported by vectorizable_reduction.  */
--	  if (dump_enabled_p ())
--	    report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
--			    "nested cycle: not handled operation: ");
--	  return NULL;
--	}
--      if (dump_enabled_p ())
--	report_vect_op (MSG_NOTE, def_stmt, "detected nested cycle: ");
--      return def_stmt_info;
--    }
--
--  /* We can handle "res -= x[i]", which is non-associative by
--     simply rewriting this into "res += -x[i]".  Avoid changing
--     gimple instruction for the first simple tests and only do this
--     if we're allowed to change code at all.  */
--  if (code == MINUS_EXPR && gimple_assign_rhs2 (def_stmt) != phi_name)
--    code = PLUS_EXPR;
--
--  if (code == COND_EXPR)
-+  /* Look for the expression computing latch_def from then loop PHI result.  */
-+  auto_vec > path;
-+  enum tree_code code;
-+  if (check_reduction_path (vect_location, loop, phi, latch_def, &code,
-+			    path))
-     {
--      if (! nested_in_vect_loop)
--	*v_reduc_type = COND_REDUCTION;
-+      STMT_VINFO_REDUC_CODE (phi_info) = code;
-+      if (code == COND_EXPR && !nested_in_vect_loop)
-+	STMT_VINFO_REDUC_TYPE (phi_info) = COND_REDUCTION;
- 
--      op3 = gimple_assign_rhs1 (def_stmt);
--      if (COMPARISON_CLASS_P (op3))
--        {
--          op4 = TREE_OPERAND (op3, 1);
--          op3 = TREE_OPERAND (op3, 0);
--        }
--      if (op3 == phi_name || op4 == phi_name)
-+      /* Fill in STMT_VINFO_REDUC_IDX and gather stmts for an SLP
-+	 reduction chain for which the additional restriction is that
-+	 all operations in the chain are the same.  */
-+      auto_vec reduc_chain;
-+      unsigned i;
-+      bool is_slp_reduc = !nested_in_vect_loop && code != COND_EXPR;
-+      for (i = path.length () - 1; i >= 1; --i)
- 	{
--	  if (dump_enabled_p ())
--	    report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
--			    "reduction: condition depends on previous"
--			    " iteration: ");
--	  return NULL;
-+	  gimple *stmt = USE_STMT (path[i].second);
-+	  stmt_vec_info stmt_info = loop_info->lookup_stmt (stmt);
-+	  STMT_VINFO_REDUC_IDX (stmt_info)
-+	    = path[i].second->use - gimple_assign_rhs1_ptr (stmt);
-+	  enum tree_code stmt_code = gimple_assign_rhs_code (stmt);
-+	  bool leading_conversion = (CONVERT_EXPR_CODE_P (stmt_code)
-+				     && (i == 1 || i == path.length () - 1));
-+	  if ((stmt_code != code && !leading_conversion)
-+	      /* We can only handle the final value in epilogue
-+		 generation for reduction chains.  */
-+	      || (i != 1 && !has_single_use (gimple_assign_lhs (stmt))))
-+	    is_slp_reduc = false;
-+	  /* For reduction chains we support a trailing/leading
-+	     conversions.  We do not store those in the actual chain.  */
-+	  if (leading_conversion)
-+	    continue;
-+	  reduc_chain.safe_push (stmt_info);
- 	}
--
--      op1 = gimple_assign_rhs2 (def_stmt);
--      op2 = gimple_assign_rhs3 (def_stmt);
--    }
--  else if (!commutative_tree_code (code) || !associative_tree_code (code))
--    {
--      if (dump_enabled_p ())
--	report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
--			"reduction: not commutative/associative: ");
--      return NULL;
--    }
--  else if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
--    {
--      op1 = gimple_assign_rhs1 (def_stmt);
--      op2 = gimple_assign_rhs2 (def_stmt);
--    }
--  else
--    {
--      if (dump_enabled_p ())
--	report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
--			"reduction: not handled operation: ");
--      return NULL;
--    }
--
--  if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
--    {
--      if (dump_enabled_p ())
--	report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
--			"reduction: both uses not ssa_names: ");
--
--      return NULL;
--    }
--
--  type = TREE_TYPE (gimple_assign_lhs (def_stmt));
--  if ((TREE_CODE (op1) == SSA_NAME
--       && !types_compatible_p (type,TREE_TYPE (op1)))
--      || (TREE_CODE (op2) == SSA_NAME
--          && !types_compatible_p (type, TREE_TYPE (op2)))
--      || (op3 && TREE_CODE (op3) == SSA_NAME
--          && !types_compatible_p (type, TREE_TYPE (op3)))
--      || (op4 && TREE_CODE (op4) == SSA_NAME
--          && !types_compatible_p (type, TREE_TYPE (op4))))
--    {
--      if (dump_enabled_p ())
--        {
--          dump_printf_loc (MSG_NOTE, vect_location,
--			   "reduction: multiple types: operation type: "
--			   "%T, operands types: %T,%T",
--			   type,  TREE_TYPE (op1), TREE_TYPE (op2));
--          if (op3)
--	    dump_printf (MSG_NOTE, ",%T", TREE_TYPE (op3));
--
--          if (op4)
--	    dump_printf (MSG_NOTE, ",%T", TREE_TYPE (op4));
--          dump_printf (MSG_NOTE, "\n");
--        }
--
--      return NULL;
--    }
--
--  /* Check whether it's ok to change the order of the computation.
--     Generally, when vectorizing a reduction we change the order of the
--     computation.  This may change the behavior of the program in some
--     cases, so we need to check that this is ok.  One exception is when
--     vectorizing an outer-loop: the inner-loop is executed sequentially,
--     and therefore vectorizing reductions in the inner-loop during
--     outer-loop vectorization is safe.  */
--  if (check_reduction
--      && *v_reduc_type == TREE_CODE_REDUCTION
--      && needs_fold_left_reduction_p (type, code,
--				      need_wrapping_integral_overflow))
--    *v_reduc_type = FOLD_LEFT_REDUCTION;
--
--  /* Reduction is safe. We're dealing with one of the following:
--     1) integer arithmetic and no trapv
--     2) floating point arithmetic, and special flags permit this optimization
--     3) nested cycle (i.e., outer loop vectorization).  */
--  stmt_vec_info def1_info = loop_info->lookup_def (op1);
--  stmt_vec_info def2_info = loop_info->lookup_def (op2);
--  if (code != COND_EXPR && !def1_info && !def2_info)
--    {
--      if (dump_enabled_p ())
--	report_vect_op (MSG_NOTE, def_stmt, "reduction: no defs for operands: ");
--      return NULL;
--    }
--
--  /* Check that one def is the reduction def, defined by PHI,
--     the other def is either defined in the loop ("vect_internal_def"),
--     or it's an induction (defined by a loop-header phi-node).  */
--
--  if (def2_info
--      && def2_info->stmt == phi
--      && (code == COND_EXPR
--	  || !def1_info
--	  || !flow_bb_inside_loop_p (loop, gimple_bb (def1_info->stmt))
--	  || vect_valid_reduction_input_p (def1_info)))
--    {
--      if (dump_enabled_p ())
--	report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
--      return def_stmt_info;
--    }
--
--  if (def1_info
--      && def1_info->stmt == phi
--      && (code == COND_EXPR
--	  || !def2_info
--	  || !flow_bb_inside_loop_p (loop, gimple_bb (def2_info->stmt))
--	  || vect_valid_reduction_input_p (def2_info)))
--    {
--      if (! nested_in_vect_loop && orig_code != MINUS_EXPR)
-+      if (is_slp_reduc && reduc_chain.length () > 1)
- 	{
--	  /* Check if we can swap operands (just for simplicity - so that
--	     the rest of the code can assume that the reduction variable
--	     is always the last (second) argument).  */
--	  if (code == COND_EXPR)
-+	  for (unsigned i = 0; i < reduc_chain.length () - 1; ++i)
- 	    {
--	      /* Swap cond_expr by inverting the condition.  */
--	      tree cond_expr = gimple_assign_rhs1 (def_stmt);
--	      enum tree_code invert_code = ERROR_MARK;
--	      enum tree_code cond_code = TREE_CODE (cond_expr);
--
--	      if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
--		{
--		  bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
--		  invert_code = invert_tree_comparison (cond_code, honor_nans);
--		}
--	      if (invert_code != ERROR_MARK)
--		{
--		  TREE_SET_CODE (cond_expr, invert_code);
--		  swap_ssa_operands (def_stmt,
--				     gimple_assign_rhs2_ptr (def_stmt),
--				     gimple_assign_rhs3_ptr (def_stmt));
--		}
--	      else
--		{
--		  if (dump_enabled_p ())
--		    report_vect_op (MSG_NOTE, def_stmt,
--				    "detected reduction: cannot swap operands "
--				    "for cond_expr");
--		  return NULL;
--		}
-+	      REDUC_GROUP_FIRST_ELEMENT (reduc_chain[i]) = reduc_chain[0];
-+	      REDUC_GROUP_NEXT_ELEMENT (reduc_chain[i]) = reduc_chain[i+1];
- 	    }
--	  else
--	    swap_ssa_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
--			       gimple_assign_rhs2_ptr (def_stmt));
--
--	  if (dump_enabled_p ())
--	    report_vect_op (MSG_NOTE, def_stmt,
--			    "detected reduction: need to swap operands: ");
--
--	  if (CONSTANT_CLASS_P (gimple_assign_rhs1 (def_stmt)))
--	    LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
--        }
--      else
--        {
--          if (dump_enabled_p ())
--            report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
--        }
-+	  REDUC_GROUP_FIRST_ELEMENT (reduc_chain.last ()) = reduc_chain[0];
-+	  REDUC_GROUP_NEXT_ELEMENT (reduc_chain.last ()) = NULL;
- 
--      return def_stmt_info;
--    }
-+	  /* Save the chain for further analysis in SLP detection.  */
-+	  LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push (reduc_chain[0]);
-+	  REDUC_GROUP_SIZE (reduc_chain[0]) = reduc_chain.length ();
- 
--  /* Try to find SLP reduction chain.  */
--  if (! nested_in_vect_loop
--      && code != COND_EXPR
--      && orig_code != MINUS_EXPR
--      && vect_is_slp_reduction (loop_info, phi, def_stmt))
--    {
--      if (dump_enabled_p ())
--        report_vect_op (MSG_NOTE, def_stmt,
--			"reduction: detected reduction chain: ");
-+	  if (dump_enabled_p ())
-+	    dump_printf_loc (MSG_NOTE, vect_location,
-+			    "reduction: detected reduction chain\n");
-+	}
-+      else if (dump_enabled_p ())
-+	dump_printf_loc (MSG_NOTE, vect_location,
-+			 "reduction: detected reduction\n");
- 
-       return def_stmt_info;
-     }
- 
--  /* Look for the expression computing loop_arg from loop PHI result.  */
--  if (check_reduction_path (vect_location, loop, phi, loop_arg, code))
--    return def_stmt_info;
--
-   if (dump_enabled_p ())
--    {
--      report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
--		      "reduction: unknown pattern: ");
--    }
-+    dump_printf_loc (MSG_NOTE, vect_location,
-+		     "reduction: unknown pattern\n");
- 
-   return NULL;
- }
- 
--/* Wrapper around vect_is_simple_reduction, which will modify code
--   in-place if it enables detection of more reductions.  Arguments
--   as there.  */
--
--stmt_vec_info
--vect_force_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
--			     bool *double_reduc,
--			     bool need_wrapping_integral_overflow)
--{
--  enum vect_reduction_type v_reduc_type;
--  stmt_vec_info def_info
--    = vect_is_simple_reduction (loop_info, phi_info, double_reduc,
--				need_wrapping_integral_overflow,
--				&v_reduc_type);
--  if (def_info)
--    {
--      STMT_VINFO_REDUC_TYPE (phi_info) = v_reduc_type;
--      STMT_VINFO_REDUC_DEF (phi_info) = def_info;
--      STMT_VINFO_REDUC_TYPE (def_info) = v_reduc_type;
--      STMT_VINFO_REDUC_DEF (def_info) = phi_info;
--    }
--  return def_info;
--}
--
- /* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times.  */
- int
- vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
-@@ -3601,7 +3573,11 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
- 	       &vec_inside_cost, &vec_epilogue_cost);
- 
-   vec_outside_cost = (int)(vec_prologue_cost + vec_epilogue_cost);
--  
-+
-+  /* Stash the costs so that we can compare two loop_vec_infos.  */
-+  loop_vinfo->vec_inside_cost = vec_inside_cost;
-+  loop_vinfo->vec_outside_cost = vec_outside_cost;
-+
-   if (dump_enabled_p ())
-     {
-       dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
-@@ -3846,6 +3822,7 @@ have_whole_vector_shift (machine_mode mode)
- 
- static void
- vect_model_reduction_cost (stmt_vec_info stmt_info, internal_fn reduc_fn,
-+			   vect_reduction_type reduction_type,
- 			   int ncopies, stmt_vector_for_cost *cost_vec)
- {
-   int prologue_cost = 0, epilogue_cost = 0, inside_cost;
-@@ -3860,8 +3837,6 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, internal_fn reduc_fn,
-     loop = LOOP_VINFO_LOOP (loop_vinfo);
- 
-   /* Condition reductions generate two reductions in the loop.  */
--  vect_reduction_type reduction_type
--    = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
-   if (reduction_type == COND_REDUCTION)
-     ncopies *= 2;
- 
-@@ -4080,15 +4055,15 @@ vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies,
- 
-    A cost model should help decide between these two schemes.  */
- 
--tree
--get_initial_def_for_reduction (stmt_vec_info stmt_vinfo, tree init_val,
-+static tree
-+get_initial_def_for_reduction (stmt_vec_info stmt_vinfo,
-+			       enum tree_code code, tree init_val,
-                                tree *adjustment_def)
- {
-   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
-   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
-   tree scalar_type = TREE_TYPE (init_val);
--  tree vectype = get_vectype_for_scalar_type (scalar_type);
--  enum tree_code code = gimple_assign_rhs_code (stmt_vinfo->stmt);
-+  tree vectype = get_vectype_for_scalar_type (loop_vinfo, scalar_type);
-   tree def_for_init;
-   tree init_def;
-   REAL_VALUE_TYPE real_init_val = dconst0;
-@@ -4103,8 +4078,10 @@ get_initial_def_for_reduction (stmt_vec_info stmt_vinfo, tree init_val,
-   gcc_assert (nested_in_vect_loop_p (loop, stmt_vinfo)
- 	      || loop == (gimple_bb (stmt_vinfo->stmt))->loop_father);
- 
--  vect_reduction_type reduction_type
--    = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_vinfo);
-+  /* ADJUSTMENT_DEF is NULL when called from
-+     vect_create_epilog_for_reduction to vectorize double reduction.  */
-+  if (adjustment_def)
-+    *adjustment_def = NULL;
- 
-   switch (code)
-     {
-@@ -4118,11 +4095,6 @@ get_initial_def_for_reduction (stmt_vec_info stmt_vinfo, tree init_val,
-     case MULT_EXPR:
-     case BIT_AND_EXPR:
-       {
--        /* ADJUSTMENT_DEF is NULL when called from
--           vect_create_epilog_for_reduction to vectorize double reduction.  */
--        if (adjustment_def)
--	  *adjustment_def = init_val;
--
-         if (code == MULT_EXPR)
-           {
-             real_init_val = dconst1;
-@@ -4137,10 +4109,14 @@ get_initial_def_for_reduction (stmt_vec_info stmt_vinfo, tree init_val,
-         else
-           def_for_init = build_int_cst (scalar_type, int_init_val);
- 
--	if (adjustment_def)
--	  /* Option1: the first element is '0' or '1' as well.  */
--	  init_def = gimple_build_vector_from_val (&stmts, vectype,
--						   def_for_init);
-+	if (adjustment_def || operand_equal_p (def_for_init, init_val, 0))
-+	  {
-+	    /* Option1: the first element is '0' or '1' as well.  */
-+	    if (!operand_equal_p (def_for_init, init_val, 0))
-+	      *adjustment_def = init_val;
-+	    init_def = gimple_build_vector_from_val (&stmts, vectype,
-+						     def_for_init);
-+	  }
- 	else if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ())
- 	  {
- 	    /* Option2 (variable length): the first element is INIT_VAL.  */
-@@ -4164,16 +4140,6 @@ get_initial_def_for_reduction (stmt_vec_info stmt_vinfo, tree init_val,
-     case MAX_EXPR:
-     case COND_EXPR:
-       {
--	if (adjustment_def)
--          {
--	    *adjustment_def = NULL_TREE;
--	    if (reduction_type != COND_REDUCTION
--		&& reduction_type != EXTRACT_LAST_REDUCTION)
--	      {
--		init_def = vect_get_vec_def_for_operand (init_val, stmt_vinfo);
--		break;
--	      }
--	  }
- 	init_val = gimple_convert (&stmts, TREE_TYPE (vectype), init_val);
- 	init_def = gimple_build_vector_from_val (&stmts, vectype, init_val);
-       }
-@@ -4201,6 +4167,7 @@ get_initial_defs_for_reduction (slp_tree slp_node,
- {
-   vec stmts = SLP_TREE_SCALAR_STMTS (slp_node);
-   stmt_vec_info stmt_vinfo = stmts[0];
-+  vec_info *vinfo = stmt_vinfo->vinfo;
-   unsigned HOST_WIDE_INT nunits;
-   unsigned j, number_of_places_left_in_vector;
-   tree vector_type;
-@@ -4293,7 +4260,7 @@ get_initial_defs_for_reduction (slp_tree slp_node,
- 	    {
- 	      /* First time round, duplicate ELTS to fill the
- 		 required number of vectors.  */
--	      duplicate_and_interleave (&ctor_seq, vector_type, elts,
-+	      duplicate_and_interleave (vinfo, &ctor_seq, vector_type, elts,
- 					number_of_vectors, *vec_oprnds);
- 	      break;
- 	    }
-@@ -4309,42 +4276,47 @@ get_initial_defs_for_reduction (slp_tree slp_node,
-     gsi_insert_seq_on_edge_immediate (pe, ctor_seq);
- }
- 
-+/* For a statement STMT_INFO taking part in a reduction operation return
-+   the stmt_vec_info the meta information is stored on.  */
- 
--/* Function vect_create_epilog_for_reduction
--
--   Create code at the loop-epilog to finalize the result of a reduction
-+stmt_vec_info
-+info_for_reduction (stmt_vec_info stmt_info)
-+{
-+  stmt_info = vect_orig_stmt (stmt_info);
-+  gcc_assert (STMT_VINFO_REDUC_DEF (stmt_info));
-+  if (!is_a  (stmt_info->stmt))
-+    stmt_info = STMT_VINFO_REDUC_DEF (stmt_info);
-+  gphi *phi = as_a  (stmt_info->stmt);
-+  if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
-+    {
-+      if (gimple_phi_num_args (phi) == 1)
-+	stmt_info = STMT_VINFO_REDUC_DEF (stmt_info);
-+    }
-+  else if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle)
-+    {
-+      edge pe = loop_preheader_edge (gimple_bb (phi)->loop_father);
-+      stmt_vec_info info
-+	  = stmt_info->vinfo->lookup_def (PHI_ARG_DEF_FROM_EDGE (phi, pe));
-+      if (info && STMT_VINFO_DEF_TYPE (info) == vect_double_reduction_def)
-+	stmt_info = info;
-+    }
-+  return stmt_info;
-+}
-+
-+/* Function vect_create_epilog_for_reduction
-+
-+   Create code at the loop-epilog to finalize the result of a reduction
-    computation. 
-   
--   VECT_DEFS is list of vector of partial results, i.e., the lhs's of vector 
--     reduction statements. 
-    STMT_INFO is the scalar reduction stmt that is being vectorized.
--   NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the
--     number of elements that we can fit in a vectype (nunits).  In this case
--     we have to generate more than one vector stmt - i.e - we need to "unroll"
--     the vector stmt by a factor VF/nunits.  For more details see documentation
--     in vectorizable_operation.
--   REDUC_FN is the internal function for the epilog reduction.
--   REDUCTION_PHIS is a list of the phi-nodes that carry the reduction 
--     computation.
--   REDUC_INDEX is the index of the operand in the right hand side of the 
--     statement that is defined by REDUCTION_PHI.
--   DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled.
-    SLP_NODE is an SLP node containing a group of reduction statements. The 
-      first one in this group is STMT_INFO.
--   INDUC_VAL is for INTEGER_INDUC_COND_REDUCTION the value to use for the case
--     when the COND_EXPR is never true in the loop.  For MAX_EXPR, it needs to
--     be smaller than any value of the IV in the loop, for MIN_EXPR larger than
--     any value of the IV in the loop.
--   INDUC_CODE is the code for epilog reduction if INTEGER_INDUC_COND_REDUCTION.
--   NEUTRAL_OP is the value given by neutral_op_for_slp_reduction; it is
--     null if this is not an SLP reduction
-+   SLP_NODE_INSTANCE is the SLP node instance containing SLP_NODE
-+   REDUC_INDEX says which rhs operand of the STMT_INFO is the reduction phi
-+     (counting from 0)
- 
-    This function:
--   1. Creates the reduction def-use cycles: sets the arguments for 
--      REDUCTION_PHIS:
--      The loop-entry argument is the vectorized initial-value of the reduction.
--      The loop-latch argument is taken from VECT_DEFS - the vector of partial 
--      sums.
-+   1. Completes the reduction def-use cycles.
-    2. "Reduces" each vector of partial results VECT_DEFS into a single result,
-       by calling the function specified by REDUC_FN if available, or by
-       other means (whole-vector shifts or a scalar loop).
-@@ -4354,7 +4326,7 @@ get_initial_defs_for_reduction (slp_tree slp_node,
-      The flow at the entry to this function:
- 
-         loop:
--          vec_def = phi             # REDUCTION_PHI
-+          vec_def = phi         # REDUCTION_PHI
-           VECT_DEF = vector_stmt                # vectorized form of STMT_INFO
-           s_loop = scalar_stmt                  # (scalar) STMT_INFO
-         loop_exit:
-@@ -4379,21 +4351,34 @@ get_initial_defs_for_reduction (slp_tree slp_node,
- */
- 
- static void
--vect_create_epilog_for_reduction (vec vect_defs,
--				  stmt_vec_info stmt_info,
--				  gimple *reduc_def_stmt,
--				  int ncopies, internal_fn reduc_fn,
--				  vec reduction_phis,
--                                  bool double_reduc, 
-+vect_create_epilog_for_reduction (stmt_vec_info stmt_info,
- 				  slp_tree slp_node,
--				  slp_instance slp_node_instance,
--				  tree induc_val, enum tree_code induc_code,
--				  tree neutral_op)
-+				  slp_instance slp_node_instance)
- {
-+  stmt_vec_info reduc_info = info_for_reduction (stmt_info);
-+  gcc_assert (reduc_info->is_reduc_info);
-+  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
-+  /* For double reductions we need to get at the inner loop reduction
-+     stmt which has the meta info attached.  Our stmt_info is that of the
-+     loop-closed PHI of the inner loop which we remember as
-+     def for the reduction PHI generation.  */
-+  bool double_reduc = false;
-+  stmt_vec_info rdef_info = stmt_info;
-+  if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
-+    {
-+      gcc_assert (!slp_node);
-+      double_reduc = true;
-+      stmt_info = loop_vinfo->lookup_def (gimple_phi_arg_def
-+					    (stmt_info->stmt, 0));
-+      stmt_info = vect_stmt_to_vectorize (stmt_info);
-+    }
-+  gphi *reduc_def_stmt
-+    = as_a  (STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))->stmt);
-+  enum tree_code code = STMT_VINFO_REDUC_CODE (reduc_info);
-+  internal_fn reduc_fn = STMT_VINFO_REDUC_FN (reduc_info);
-   stmt_vec_info prev_phi_info;
-   tree vectype;
-   machine_mode mode;
--  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
-   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *outer_loop = NULL;
-   basic_block exit_bb;
-   tree scalar_dest;
-@@ -4401,32 +4386,24 @@ vect_create_epilog_for_reduction (vec vect_defs,
-   gimple *new_phi = NULL, *phi;
-   stmt_vec_info phi_info;
-   gimple_stmt_iterator exit_gsi;
--  tree vec_dest;
--  tree new_temp = NULL_TREE, new_dest, new_name, new_scalar_dest;
-+  tree new_temp = NULL_TREE, new_name, new_scalar_dest;
-   gimple *epilog_stmt = NULL;
--  enum tree_code code = gimple_assign_rhs_code (stmt_info->stmt);
-   gimple *exit_phi;
-   tree bitsize;
--  tree adjustment_def = NULL;
--  tree vec_initial_def = NULL;
--  tree expr, def, initial_def = NULL;
-+  tree def;
-   tree orig_name, scalar_result;
-   imm_use_iterator imm_iter, phi_imm_iter;
-   use_operand_p use_p, phi_use_p;
-   gimple *use_stmt;
--  stmt_vec_info reduction_phi_info = NULL;
-   bool nested_in_vect_loop = false;
-   auto_vec new_phis;
--  auto_vec inner_phis;
-   int j, i;
-   auto_vec scalar_results;
--  unsigned int group_size = 1, k, ratio;
--  auto_vec vec_initial_defs;
-+  unsigned int group_size = 1, k;
-   auto_vec phis;
-   bool slp_reduc = false;
-   bool direct_slp_reduc;
-   tree new_phi_result;
--  stmt_vec_info inner_phi = NULL;
-   tree induction_index = NULL_TREE;
- 
-   if (slp_node)
-@@ -4439,127 +4416,53 @@ vect_create_epilog_for_reduction (vec vect_defs,
-       nested_in_vect_loop = true;
-       gcc_assert (!slp_node);
-     }
-+  gcc_assert (!nested_in_vect_loop || double_reduc);
- 
--  vectype = STMT_VINFO_VECTYPE (stmt_info);
-+  vectype = STMT_VINFO_REDUC_VECTYPE (reduc_info);
-   gcc_assert (vectype);
-   mode = TYPE_MODE (vectype);
- 
--  /* 1. Create the reduction def-use cycle:
--     Set the arguments of REDUCTION_PHIS, i.e., transform
--
--        loop:
--          vec_def = phi             # REDUCTION_PHI
--          VECT_DEF = vector_stmt                # vectorized form of STMT
--          ...
--
--     into:
--
--        loop:
--          vec_def = phi     # REDUCTION_PHI
--          VECT_DEF = vector_stmt                # vectorized form of STMT
--          ...
--
--     (in case of SLP, do it for all the phis). */
--
--  /* Get the loop-entry arguments.  */
--  enum vect_def_type initial_def_dt = vect_unknown_def_type;
-+  tree initial_def = NULL;
-+  tree induc_val = NULL_TREE;
-+  tree adjustment_def = NULL;
-   if (slp_node)
--    {
--      unsigned vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
--      vec_initial_defs.reserve (vec_num);
--      get_initial_defs_for_reduction (slp_node_instance->reduc_phis,
--				      &vec_initial_defs, vec_num,
--				      REDUC_GROUP_FIRST_ELEMENT (stmt_info),
--				      neutral_op);
--    }
-+    ;
-   else
-     {
-       /* Get at the scalar def before the loop, that defines the initial value
- 	 of the reduction variable.  */
-       initial_def = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt,
- 					   loop_preheader_edge (loop));
--      /* Optimize: if initial_def is for REDUC_MAX smaller than the base
--	 and we can't use zero for induc_val, use initial_def.  Similarly
--	 for REDUC_MIN and initial_def larger than the base.  */
--      if (TREE_CODE (initial_def) == INTEGER_CST
--	  && (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
--	      == INTEGER_INDUC_COND_REDUCTION)
--	  && !integer_zerop (induc_val)
--	  && ((induc_code == MAX_EXPR
--	       && tree_int_cst_lt (initial_def, induc_val))
--	      || (induc_code == MIN_EXPR
--		  && tree_int_cst_lt (induc_val, initial_def))))
--	induc_val = initial_def;
--
--      if (double_reduc)
--	/* In case of double reduction we only create a vector variable
--	   to be put in the reduction phi node.  The actual statement
--	   creation is done later in this function.  */
--	vec_initial_def = vect_create_destination_var (initial_def, vectype);
-+      /* Optimize: for induction condition reduction, if we can't use zero
-+         for induc_val, use initial_def.  */
-+      if (STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION)
-+	induc_val = STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info);
-+      else if (double_reduc)
-+	;
-       else if (nested_in_vect_loop)
--	{
--	  /* Do not use an adjustment def as that case is not supported
--	     correctly if ncopies is not one.  */
--	  vect_is_simple_use (initial_def, loop_vinfo, &initial_def_dt);
--	  vec_initial_def = vect_get_vec_def_for_operand (initial_def,
--							  stmt_info);
--	}
-+	;
-       else
--	vec_initial_def
--	  = get_initial_def_for_reduction (stmt_info, initial_def,
--					   &adjustment_def);
--      vec_initial_defs.create (1);
--      vec_initial_defs.quick_push (vec_initial_def);
-+	adjustment_def = STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info);
-     }
- 
--  /* Set phi nodes arguments.  */
--  FOR_EACH_VEC_ELT (reduction_phis, i, phi_info)
-+  unsigned vec_num;
-+  int ncopies;
-+  if (slp_node)
-     {
--      tree vec_init_def = vec_initial_defs[i];
--      tree def = vect_defs[i];
--      for (j = 0; j < ncopies; j++)
--        {
--	  if (j != 0)
--	    {
--	      phi_info = STMT_VINFO_RELATED_STMT (phi_info);
--	      if (nested_in_vect_loop)
--		vec_init_def
--		  = vect_get_vec_def_for_stmt_copy (loop_vinfo, vec_init_def);
--	    }
--
--	  /* Set the loop-entry arg of the reduction-phi.  */
--
--	  gphi *phi = as_a  (phi_info->stmt);
--	  if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
--	      == INTEGER_INDUC_COND_REDUCTION)
--	    {
--	      /* Initialise the reduction phi to zero.  This prevents initial
--		 values of non-zero interferring with the reduction op.  */
--	      gcc_assert (ncopies == 1);
--	      gcc_assert (i == 0);
--
--	      tree vec_init_def_type = TREE_TYPE (vec_init_def);
--	      tree induc_val_vec
--		= build_vector_from_val (vec_init_def_type, induc_val);
--
--	      add_phi_arg (phi, induc_val_vec, loop_preheader_edge (loop),
--			   UNKNOWN_LOCATION);
--	    }
--	  else
--	    add_phi_arg (phi, vec_init_def, loop_preheader_edge (loop),
--			 UNKNOWN_LOCATION);
--
--          /* Set the loop-latch arg for the reduction-phi.  */
--          if (j > 0)
--	    def = vect_get_vec_def_for_stmt_copy (loop_vinfo, def);
--
--	  add_phi_arg (phi, def, loop_latch_edge (loop), UNKNOWN_LOCATION);
--
--          if (dump_enabled_p ())
--	    dump_printf_loc (MSG_NOTE, vect_location,
--			     "transform reduction: created def-use cycle: %G%G",
--			     phi, SSA_NAME_DEF_STMT (def));
--        }
-+      vec_num = SLP_TREE_VEC_STMTS (slp_node_instance->reduc_phis).length ();
-+      ncopies = 1;
-+    }
-+  else
-+    {
-+      vec_num = 1;
-+      ncopies = 0;
-+      phi_info = STMT_VINFO_VEC_STMT (loop_vinfo->lookup_stmt (reduc_def_stmt));
-+      do
-+	{
-+	  ncopies++;
-+	  phi_info = STMT_VINFO_RELATED_STMT (phi_info);
-+	}
-+      while (phi_info);
-     }
- 
-   /* For cond reductions we want to create a new vector (INDEX_COND_EXPR)
-@@ -4569,7 +4472,7 @@ vect_create_epilog_for_reduction (vec vect_defs,
-      The first match will be a 1 to allow 0 to be used for non-matching
-      indexes.  If there are no matches at all then the vector will be all
-      zeroes.  */
--  if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
-+  if (STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION)
-     {
-       tree indx_before_incr, indx_after_incr;
-       poly_uint64 nunits_out = TYPE_VECTOR_SUBPARTS (vectype);
-@@ -4627,11 +4530,17 @@ vect_create_epilog_for_reduction (vec vect_defs,
-       tree ccompare = unshare_expr (gimple_assign_rhs1 (vec_stmt));
- 
-       /* Create a conditional, where the condition is taken from vec_stmt
--	 (CCOMPARE), then is the induction index (INDEX_BEFORE_INCR) and
--	 else is the phi (NEW_PHI_TREE).  */
--      tree index_cond_expr = build3 (VEC_COND_EXPR, cr_index_vector_type,
--				     ccompare, indx_before_incr,
--				     new_phi_tree);
-+	 (CCOMPARE).  The then and else values mirror the main VEC_COND_EXPR:
-+	 the reduction phi corresponds to NEW_PHI_TREE and the new values
-+	 correspond to INDEX_BEFORE_INCR.  */
-+      gcc_assert (STMT_VINFO_REDUC_IDX (stmt_info) >= 1);
-+      tree index_cond_expr;
-+      if (STMT_VINFO_REDUC_IDX (stmt_info) == 2)
-+	index_cond_expr = build3 (VEC_COND_EXPR, cr_index_vector_type,
-+				  ccompare, indx_before_incr, new_phi_tree);
-+      else
-+	index_cond_expr = build3 (VEC_COND_EXPR, cr_index_vector_type,
-+				  ccompare, new_phi_tree, indx_before_incr);
-       induction_index = make_ssa_name (cr_index_vector_type);
-       gimple *index_condition = gimple_build_assign (induction_index,
- 						     index_cond_expr);
-@@ -4674,12 +4583,17 @@ vect_create_epilog_for_reduction (vec vect_defs,
-   /* 2.1 Create new loop-exit-phis to preserve loop-closed form:
-          v_out1 = phi  
-          Store them in NEW_PHIS.  */
--
-+  if (double_reduc)
-+    loop = outer_loop;
-   exit_bb = single_exit (loop)->dest;
-   prev_phi_info = NULL;
--  new_phis.create (vect_defs.length ());
--  FOR_EACH_VEC_ELT (vect_defs, i, def)
-+  new_phis.create (slp_node ? vec_num : ncopies);
-+  for (unsigned i = 0; i < vec_num; i++)
-     {
-+      if (slp_node)
-+	def = gimple_get_lhs (SLP_TREE_VEC_STMTS (slp_node)[i]->stmt);
-+      else
-+	def = gimple_get_lhs (STMT_VINFO_VEC_STMT (rdef_info)->stmt);
-       for (j = 0; j < ncopies; j++)
-         {
- 	  tree new_def = copy_ssa_name (def);
-@@ -4698,37 +4612,6 @@ vect_create_epilog_for_reduction (vec vect_defs,
-         }
-     }
- 
--  /* The epilogue is created for the outer-loop, i.e., for the loop being
--     vectorized.  Create exit phis for the outer loop.  */
--  if (double_reduc)
--    {
--      loop = outer_loop;
--      exit_bb = single_exit (loop)->dest;
--      inner_phis.create (vect_defs.length ());
--      FOR_EACH_VEC_ELT (new_phis, i, phi)
--	{
--	  stmt_vec_info phi_info = loop_vinfo->lookup_stmt (phi);
--	  tree new_result = copy_ssa_name (PHI_RESULT (phi));
--	  gphi *outer_phi = create_phi_node (new_result, exit_bb);
--	  SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
--			   PHI_RESULT (phi));
--	  prev_phi_info = loop_vinfo->add_stmt (outer_phi);
--	  inner_phis.quick_push (phi_info);
--	  new_phis[i] = outer_phi;
--	  while (STMT_VINFO_RELATED_STMT (phi_info))
--            {
--	      phi_info = STMT_VINFO_RELATED_STMT (phi_info);
--	      new_result = copy_ssa_name (PHI_RESULT (phi_info->stmt));
--	      outer_phi = create_phi_node (new_result, exit_bb);
--	      SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
--			       PHI_RESULT (phi_info->stmt));
--	      stmt_vec_info outer_phi_info = loop_vinfo->add_stmt (outer_phi);
--	      STMT_VINFO_RELATED_STMT (prev_phi_info) = outer_phi_info;
--	      prev_phi_info = outer_phi_info;
--	    }
--	}
--    }
--
-   exit_gsi = gsi_after_labels (exit_bb);
- 
-   /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
-@@ -4747,12 +4630,6 @@ vect_create_epilog_for_reduction (vec vect_defs,
-       gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
-       gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt_info);
-     }
--
--  code = gimple_assign_rhs_code (orig_stmt_info->stmt);
--  /* For MINUS_EXPR the initial vector is [init_val,0,...,0], therefore,
--     partial results are added and not subtracted.  */
--  if (code == MINUS_EXPR) 
--    code = PLUS_EXPR;
-   
-   scalar_dest = gimple_assign_lhs (orig_stmt_info->stmt);
-   scalar_type = TREE_TYPE (scalar_dest);
-@@ -4760,15 +4637,6 @@ vect_create_epilog_for_reduction (vec vect_defs,
-   new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
-   bitsize = TYPE_SIZE (scalar_type);
- 
--  /* In case this is a reduction in an inner-loop while vectorizing an outer
--     loop - we don't need to extract a single scalar result at the end of the
--     inner-loop (unless it is double reduction, i.e., the use of reduction is
--     outside the outer-loop).  The final vector of partial results will be used
--     in the vectorized outer-loop, or reduced to a scalar result at the end of
--     the outer-loop.  */
--  if (nested_in_vect_loop && !double_reduc)
--    goto vect_finalize_reduction;
--
-   /* SLP reduction without reduction chain, e.g.,
-      # a1 = phi 
-      # b1 = phi 
-@@ -4791,53 +4659,48 @@ vect_create_epilog_for_reduction (vec vect_defs,
-      one vector.  */
-   if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) || direct_slp_reduc)
-     {
-+      gimple_seq stmts = NULL;
-       tree first_vect = PHI_RESULT (new_phis[0]);
--      gassign *new_vec_stmt = NULL;
--      vec_dest = vect_create_destination_var (scalar_dest, vectype);
-+      first_vect = gimple_convert (&stmts, vectype, first_vect);
-       for (k = 1; k < new_phis.length (); k++)
-         {
- 	  gimple *next_phi = new_phis[k];
-           tree second_vect = PHI_RESULT (next_phi);
--          tree tem = make_ssa_name (vec_dest, new_vec_stmt);
--          new_vec_stmt = gimple_build_assign (tem, code,
--					      first_vect, second_vect);
--          gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT);
--	  first_vect = tem;
-+	  second_vect = gimple_convert (&stmts, vectype, second_vect);
-+          first_vect = gimple_build (&stmts, code, vectype,
-+				     first_vect, second_vect);
-         }
-+      gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
- 
-       new_phi_result = first_vect;
--      if (new_vec_stmt)
--        {
--          new_phis.truncate (0);
--          new_phis.safe_push (new_vec_stmt);
--        }
-+      new_phis.truncate (0);
-+      new_phis.safe_push (SSA_NAME_DEF_STMT (first_vect));
-     }
-   /* Likewise if we couldn't use a single defuse cycle.  */
-   else if (ncopies > 1)
-     {
-       gcc_assert (new_phis.length () == 1);
-+      gimple_seq stmts = NULL;
-       tree first_vect = PHI_RESULT (new_phis[0]);
--      gassign *new_vec_stmt = NULL;
--      vec_dest = vect_create_destination_var (scalar_dest, vectype);
-+      first_vect = gimple_convert (&stmts, vectype, first_vect);
-       stmt_vec_info next_phi_info = loop_vinfo->lookup_stmt (new_phis[0]);
-       for (int k = 1; k < ncopies; ++k)
- 	{
- 	  next_phi_info = STMT_VINFO_RELATED_STMT (next_phi_info);
- 	  tree second_vect = PHI_RESULT (next_phi_info->stmt);
--          tree tem = make_ssa_name (vec_dest, new_vec_stmt);
--          new_vec_stmt = gimple_build_assign (tem, code,
--					      first_vect, second_vect);
--          gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT);
--	  first_vect = tem;
-+	  second_vect = gimple_convert (&stmts, vectype, second_vect);
-+	  first_vect = gimple_build (&stmts, code, vectype,
-+				     first_vect, second_vect);
- 	}
-+      gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
-       new_phi_result = first_vect;
-       new_phis.truncate (0);
--      new_phis.safe_push (new_vec_stmt);
-+      new_phis.safe_push (SSA_NAME_DEF_STMT (first_vect));
-     }
-   else
-     new_phi_result = PHI_RESULT (new_phis[0]);
- 
--  if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION
-+  if (STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION
-       && reduc_fn != IFN_LAST)
-     {
-       /* For condition reductions, we have a vector (NEW_PHI_RESULT) containing
-@@ -4852,8 +4715,7 @@ vect_create_epilog_for_reduction (vec vect_defs,
-       tree index_vec_type = TREE_TYPE (induction_index);
-       gcc_checking_assert (TYPE_UNSIGNED (index_vec_type));
-       tree index_scalar_type = TREE_TYPE (index_vec_type);
--      tree index_vec_cmp_type = build_same_sized_truth_vector_type
--	(index_vec_type);
-+      tree index_vec_cmp_type = truth_type_for (index_vec_type);
- 
-       /* Get an unsigned integer version of the type of the data vector.  */
-       int scalar_precision
-@@ -4946,7 +4808,7 @@ vect_create_epilog_for_reduction (vec vect_defs,
-       gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
-       scalar_results.safe_push (new_temp);
-     }
--  else if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION
-+  else if (STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION
- 	   && reduc_fn == IFN_LAST)
-     {
-       /* Condition reduction without supported IFN_REDUC_MAX.  Generate
-@@ -4989,7 +4851,6 @@ vect_create_epilog_for_reduction (vec vect_defs,
- 	  if (off != 0)
- 	    {
- 	      tree new_idx_val = idx_val;
--	      tree new_val = val;
- 	      if (off != v_size - el_size)
- 		{
- 		  new_idx_val = make_ssa_name (idx_eltype);
-@@ -4998,7 +4859,7 @@ vect_create_epilog_for_reduction (vec vect_defs,
- 						     old_idx_val);
- 		  gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
- 		}
--	      new_val = make_ssa_name (data_eltype);
-+	      tree new_val = make_ssa_name (data_eltype);
- 	      epilog_stmt = gimple_build_assign (new_val,
- 						 COND_EXPR,
- 						 build2 (GT_EXPR,
-@@ -5060,9 +4921,8 @@ vect_create_epilog_for_reduction (vec vect_defs,
-       gimple_set_lhs (epilog_stmt, new_temp);
-       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
- 
--      if ((STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
--	   == INTEGER_INDUC_COND_REDUCTION)
--	  && !operand_equal_p (initial_def, induc_val, 0))
-+      if ((STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION)
-+	  && induc_val)
- 	{
- 	  /* Earlier we set the initial value to be a vector if induc_val
- 	     values.  Check the result and if it is induc_val then replace
-@@ -5100,7 +4960,7 @@ vect_create_epilog_for_reduction (vec vect_defs,
-       tree index = build_index_vector (vectype, 0, 1);
-       tree index_type = TREE_TYPE (index);
-       tree index_elt_type = TREE_TYPE (index_type);
--      tree mask_type = build_same_sized_truth_vector_type (index_type);
-+      tree mask_type = truth_type_for (index_type);
- 
-       /* Create a vector that, for each element, identifies which of
- 	 the REDUC_GROUP_SIZE results should use it.  */
-@@ -5112,6 +4972,14 @@ vect_create_epilog_for_reduction (vec vect_defs,
- 	 scalar value if we have one, otherwise the initial scalar value
- 	 is itself a neutral value.  */
-       tree vector_identity = NULL_TREE;
-+      tree neutral_op = NULL_TREE;
-+      if (slp_node)
-+	{
-+	  stmt_vec_info first = REDUC_GROUP_FIRST_ELEMENT (stmt_info);
-+	  neutral_op
-+	    = neutral_op_for_slp_reduction (slp_node_instance->reduc_phis,
-+					    vectype, code, first != NULL);
-+	}
-       if (neutral_op)
- 	vector_identity = gimple_build_vector_from_val (&seq, vectype,
- 							neutral_op);
-@@ -5161,32 +5029,19 @@ vect_create_epilog_for_reduction (vec vect_defs,
-       bool reduce_with_shift;
-       tree vec_temp;
- 
--      /* COND reductions all do the final reduction with MAX_EXPR
--	 or MIN_EXPR.  */
--      if (code == COND_EXPR)
--	{
--	  if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
--	      == INTEGER_INDUC_COND_REDUCTION)
--	    code = induc_code;
--	  else if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
--		   == CONST_COND_REDUCTION)
--	    code = STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info);
--	  else
--	    code = MAX_EXPR;
--	}
--
-       /* See if the target wants to do the final (shift) reduction
- 	 in a vector mode of smaller size and first reduce upper/lower
- 	 halves against each other.  */
-       enum machine_mode mode1 = mode;
--      tree vectype1 = vectype;
--      unsigned sz = tree_to_uhwi (TYPE_SIZE_UNIT (vectype));
--      unsigned sz1 = sz;
-+      tree stype = TREE_TYPE (vectype);
-+      unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
-+      unsigned nunits1 = nunits;
-       if (!slp_reduc
- 	  && (mode1 = targetm.vectorize.split_reduction (mode)) != mode)
--	sz1 = GET_MODE_SIZE (mode1).to_constant ();
-+	nunits1 = GET_MODE_NUNITS (mode1).to_constant ();
- 
--      vectype1 = get_vectype_for_scalar_type_and_size (scalar_type, sz1);
-+      tree vectype1 = get_related_vectype_for_scalar_type (TYPE_MODE (vectype),
-+							   stype, nunits1);
-       reduce_with_shift = have_whole_vector_shift (mode1);
-       if (!VECTOR_MODE_P (mode1))
- 	reduce_with_shift = false;
-@@ -5200,11 +5055,13 @@ vect_create_epilog_for_reduction (vec vect_defs,
-       /* First reduce the vector to the desired vector size we should
- 	 do shift reduction on by combining upper and lower halves.  */
-       new_temp = new_phi_result;
--      while (sz > sz1)
-+      while (nunits > nunits1)
- 	{
- 	  gcc_assert (!slp_reduc);
--	  sz /= 2;
--	  vectype1 = get_vectype_for_scalar_type_and_size (scalar_type, sz);
-+	  nunits /= 2;
-+	  vectype1 = get_related_vectype_for_scalar_type (TYPE_MODE (vectype),
-+							  stype, nunits);
-+	  unsigned int bitsize = tree_to_uhwi (TYPE_SIZE (vectype1));
- 
- 	  /* The target has to make sure we support lowpart/highpart
- 	     extraction, either via direct vector extract or through
-@@ -5229,15 +5086,14 @@ vect_create_epilog_for_reduction (vec vect_defs,
- 		  = gimple_build_assign (dst2, BIT_FIELD_REF,
- 					 build3 (BIT_FIELD_REF, vectype1,
- 						 new_temp, TYPE_SIZE (vectype1),
--						 bitsize_int (sz * BITS_PER_UNIT)));
-+						 bitsize_int (bitsize)));
- 	      gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
- 	    }
- 	  else
- 	    {
- 	      /* Extract via punning to appropriately sized integer mode
- 		 vector.  */
--	      tree eltype = build_nonstandard_integer_type (sz * BITS_PER_UNIT,
--							    1);
-+	      tree eltype = build_nonstandard_integer_type (bitsize, 1);
- 	      tree etype = build_vector_type (eltype, 2);
- 	      gcc_assert (convert_optab_handler (vec_extract_optab,
- 						 TYPE_MODE (etype),
-@@ -5266,7 +5122,7 @@ vect_create_epilog_for_reduction (vec vect_defs,
- 		  = gimple_build_assign (tem, BIT_FIELD_REF,
- 					 build3 (BIT_FIELD_REF, eltype,
- 						 new_temp, TYPE_SIZE (eltype),
--						 bitsize_int (sz * BITS_PER_UNIT)));
-+						 bitsize_int (bitsize)));
- 	      gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
- 	      dst2 =  make_ssa_name (vectype1);
- 	      epilog_stmt = gimple_build_assign (dst2, VIEW_CONVERT_EXPR,
-@@ -5307,8 +5163,8 @@ vect_create_epilog_for_reduction (vec vect_defs,
-             dump_printf_loc (MSG_NOTE, vect_location,
- 			     "Reduce using vector shifts\n");
- 
--	  mode1 = TYPE_MODE (vectype1);
--          vec_dest = vect_create_destination_var (scalar_dest, vectype1);
-+	  gimple_seq stmts = NULL;
-+	  new_temp = gimple_convert (&stmts, vectype1, new_temp);
-           for (elt_offset = nelements / 2;
-                elt_offset >= 1;
-                elt_offset /= 2)
-@@ -5316,18 +5172,12 @@ vect_create_epilog_for_reduction (vec vect_defs,
- 	      calc_vec_perm_mask_for_shift (elt_offset, nelements, &sel);
- 	      indices.new_vector (sel, 2, nelements);
- 	      tree mask = vect_gen_perm_mask_any (vectype1, indices);
--	      epilog_stmt = gimple_build_assign (vec_dest, VEC_PERM_EXPR,
--						 new_temp, zero_vec, mask);
--              new_name = make_ssa_name (vec_dest, epilog_stmt);
--              gimple_assign_set_lhs (epilog_stmt, new_name);
--              gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
--
--	      epilog_stmt = gimple_build_assign (vec_dest, code, new_name,
--						 new_temp);
--              new_temp = make_ssa_name (vec_dest, epilog_stmt);
--              gimple_assign_set_lhs (epilog_stmt, new_temp);
--              gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
-+	      new_name = gimple_build (&stmts, VEC_PERM_EXPR, vectype1,
-+				       new_temp, zero_vec, mask);
-+	      new_temp = gimple_build (&stmts, code,
-+				       vectype1, new_name, new_temp);
-             }
-+	  gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
- 
- 	  /* 2.4  Extract the final scalar result.  Create:
- 	     s_out3 = extract_field   */
-@@ -5439,9 +5289,8 @@ vect_create_epilog_for_reduction (vec vect_defs,
-             scalar_results.safe_push (new_temp);
-         }
- 
--      if ((STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
--	   == INTEGER_INDUC_COND_REDUCTION)
--	  && !operand_equal_p (initial_def, induc_val, 0))
-+      if ((STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION)
-+	  && induc_val)
- 	{
- 	  /* Earlier we set the initial value to be a vector if induc_val
- 	     values.  Check the result and if it is induc_val then replace
-@@ -5457,12 +5306,7 @@ vect_create_epilog_for_reduction (vec vect_defs,
- 	  scalar_results[0] = tmp;
- 	}
-     }
--  
--vect_finalize_reduction:
--
--  if (double_reduc)
--    loop = loop->inner;
--
-+ 
-   /* 2.5 Adjust the final result by the initial value of the reduction
- 	 variable. (When such adjustment is not needed, then
- 	 'adjustment_def' is zero).  For example, if code is PLUS we create:
-@@ -5471,25 +5315,26 @@ vect_finalize_reduction:
-   if (adjustment_def)
-     {
-       gcc_assert (!slp_reduc);
-+      gimple_seq stmts = NULL;
-       if (nested_in_vect_loop)
- 	{
-           new_phi = new_phis[0];
--	  gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE);
--	  expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def);
--	  new_dest = vect_create_destination_var (scalar_dest, vectype);
-+	  gcc_assert (VECTOR_TYPE_P (TREE_TYPE (adjustment_def)));
-+	  adjustment_def = gimple_convert (&stmts, vectype, adjustment_def);
-+	  new_temp = gimple_build (&stmts, code, vectype,
-+				   PHI_RESULT (new_phi), adjustment_def);
- 	}
-       else
- 	{
-           new_temp = scalar_results[0];
- 	  gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) != VECTOR_TYPE);
--	  expr = build2 (code, scalar_type, new_temp, adjustment_def);
--	  new_dest = vect_create_destination_var (scalar_dest, scalar_type);
-+	  adjustment_def = gimple_convert (&stmts, scalar_type, adjustment_def);
-+	  new_temp = gimple_build (&stmts, code, scalar_type,
-+				   new_temp, adjustment_def);
- 	}
- 
--      epilog_stmt = gimple_build_assign (new_dest, expr);
--      new_temp = make_ssa_name (new_dest, epilog_stmt);
--      gimple_assign_set_lhs (epilog_stmt, new_temp);
--      gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
-+      epilog_stmt = gimple_seq_last_stmt (stmts);
-+      gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
-       if (nested_in_vect_loop)
-         {
- 	  stmt_vec_info epilog_stmt_info = loop_vinfo->add_stmt (epilog_stmt);
-@@ -5507,6 +5352,9 @@ vect_finalize_reduction:
-       new_phis[0] = epilog_stmt;
-     }
- 
-+  if (double_reduc)
-+    loop = loop->inner;
-+
-   /* 2.6  Handle the loop-exit phis.  Replace the uses of scalar loop-exit
-           phis with new adjusted scalar results, i.e., replace use 
-           with use .        
-@@ -5552,24 +5400,10 @@ vect_finalize_reduction:
-      correspond to the first vector stmt, etc.
-      (RATIO is equal to (REDUC_GROUP_SIZE / number of new vector stmts)).  */
-   if (group_size > new_phis.length ())
--    {
--      ratio = group_size / new_phis.length ();
--      gcc_assert (!(group_size % new_phis.length ()));
--    }
--  else
--    ratio = 1;
-+    gcc_assert (!(group_size % new_phis.length ()));
- 
--  stmt_vec_info epilog_stmt_info = NULL;
-   for (k = 0; k < group_size; k++)
-     {
--      if (k % ratio == 0)
--        {
--	  epilog_stmt_info = loop_vinfo->lookup_stmt (new_phis[k / ratio]);
--	  reduction_phi_info = reduction_phis[k / ratio];
--	  if (double_reduc)
--	    inner_phi = inner_phis[k / ratio];
--        }
--
-       if (slp_reduc)
-         {
- 	  stmt_vec_info scalar_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[k];
-@@ -5580,121 +5414,12 @@ vect_finalize_reduction:
- 	  scalar_dest = gimple_assign_lhs (scalar_stmt_info->stmt);
-         }
- 
--      phis.create (3);
--      /* Find the loop-closed-use at the loop exit of the original scalar
--         result.  (The reduction result is expected to have two immediate uses -
--         one at the latch block, and one at the loop exit).  */
--      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
--        if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p)))
--	    && !is_gimple_debug (USE_STMT (use_p)))
--          phis.safe_push (USE_STMT (use_p));
--
--      /* While we expect to have found an exit_phi because of loop-closed-ssa
--         form we can end up without one if the scalar cycle is dead.  */
--
--      FOR_EACH_VEC_ELT (phis, i, exit_phi)
--        {
--          if (outer_loop)
--            {
--	      stmt_vec_info exit_phi_vinfo
--		= loop_vinfo->lookup_stmt (exit_phi);
--              gphi *vect_phi;
--
--	      if (double_reduc)
--		STMT_VINFO_VEC_STMT (exit_phi_vinfo) = inner_phi;
--	      else
--		STMT_VINFO_VEC_STMT (exit_phi_vinfo) = epilog_stmt_info;
--              if (!double_reduc
--                  || STMT_VINFO_DEF_TYPE (exit_phi_vinfo)
--                      != vect_double_reduction_def)
--                continue;
--
--              /* Handle double reduction:
--
--                 stmt1: s1 = phi   - double reduction phi (outer loop)
--                 stmt2:   s3 = phi  - (regular) reduc phi (inner loop)
--                 stmt3:   s4 = use (s3)     - (regular) reduc stmt (inner loop)
--                 stmt4: s2 = phi       - double reduction stmt (outer loop)
--
--                 At that point the regular reduction (stmt2 and stmt3) is
--                 already vectorized, as well as the exit phi node, stmt4.
--                 Here we vectorize the phi node of double reduction, stmt1, and
--                 update all relevant statements.  */
--
--              /* Go through all the uses of s2 to find double reduction phi
--                 node, i.e., stmt1 above.  */
--              orig_name = PHI_RESULT (exit_phi);
--              FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
--                {
--                  stmt_vec_info use_stmt_vinfo;
--                  tree vect_phi_init, preheader_arg, vect_phi_res;
--                  basic_block bb = gimple_bb (use_stmt);
--
--                  /* Check that USE_STMT is really double reduction phi
--                     node.  */
--                  if (gimple_code (use_stmt) != GIMPLE_PHI
--                      || gimple_phi_num_args (use_stmt) != 2
--                      || bb->loop_father != outer_loop)
--                    continue;
--		  use_stmt_vinfo = loop_vinfo->lookup_stmt (use_stmt);
--                  if (!use_stmt_vinfo
--                      || STMT_VINFO_DEF_TYPE (use_stmt_vinfo)
--                          != vect_double_reduction_def)
--		    continue;
--
--                  /* Create vector phi node for double reduction:
--                     vs1 = phi 
--                     vs1 was created previously in this function by a call to
--                       vect_get_vec_def_for_operand and is stored in
--                       vec_initial_def;
--                     vs2 is defined by INNER_PHI, the vectorized EXIT_PHI;
--                     vs0 is created here.  */
--
--                  /* Create vector phi node.  */
--                  vect_phi = create_phi_node (vec_initial_def, bb);
--		  loop_vec_info_for_loop (outer_loop)->add_stmt (vect_phi);
--
--                  /* Create vs0 - initial def of the double reduction phi.  */
--                  preheader_arg = PHI_ARG_DEF_FROM_EDGE (use_stmt,
--                                             loop_preheader_edge (outer_loop));
--                  vect_phi_init = get_initial_def_for_reduction
--		    (stmt_info, preheader_arg, NULL);
--
--                  /* Update phi node arguments with vs0 and vs2.  */
--                  add_phi_arg (vect_phi, vect_phi_init,
--                               loop_preheader_edge (outer_loop),
--                               UNKNOWN_LOCATION);
--		  add_phi_arg (vect_phi, PHI_RESULT (inner_phi->stmt),
--			       loop_latch_edge (outer_loop), UNKNOWN_LOCATION);
--                  if (dump_enabled_p ())
--		    dump_printf_loc (MSG_NOTE, vect_location,
--				     "created double reduction phi node: %G",
--				     vect_phi);
--
--                  vect_phi_res = PHI_RESULT (vect_phi);
--
--                  /* Replace the use, i.e., set the correct vs1 in the regular
--                     reduction phi node.  FORNOW, NCOPIES is always 1, so the
--                     loop is redundant.  */
--		  stmt_vec_info use_info = reduction_phi_info;
--		  for (j = 0; j < ncopies; j++)
--		    {
--		      edge pr_edge = loop_preheader_edge (loop);
--		      SET_PHI_ARG_DEF (as_a  (use_info->stmt),
--				       pr_edge->dest_idx, vect_phi_res);
--		      use_info = STMT_VINFO_RELATED_STMT (use_info);
--		    }
--                }
--            }
--        }
--
--      phis.release ();
-       if (nested_in_vect_loop)
-         {
-           if (double_reduc)
-             loop = outer_loop;
-           else
--            continue;
-+	    gcc_unreachable ();
-         }
- 
-       phis.create (3);
-@@ -5824,9 +5549,6 @@ vectorize_fold_left_reduction (stmt_vec_info stmt_info,
-   gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
-   gcc_assert (ncopies == 1);
-   gcc_assert (TREE_CODE_LENGTH (code) == binary_op);
--  gcc_assert (reduc_index == (code == MINUS_EXPR ? 0 : 1));
--  gcc_assert (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
--	      == FOLD_LEFT_REDUCTION);
- 
-   if (slp_node)
-     gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype_out),
-@@ -5840,10 +5562,7 @@ vectorize_fold_left_reduction (stmt_vec_info stmt_info,
-   if (slp_node)
-     {
-       auto_vec > vec_defs (2);
--      auto_vec sops(2);
--      sops.quick_push (ops[0]);
--      sops.quick_push (ops[1]);
--      vect_get_slp_defs (sops, slp_node, &vec_defs);
-+      vect_get_slp_defs (slp_node, &vec_defs);
-       vec_oprnds0.safe_splice (vec_defs[1 - reduc_index]);
-       vec_defs[0].release ();
-       vec_defs[1].release ();
-@@ -5984,6 +5703,55 @@ is_nonwrapping_integer_induction (stmt_vec_info stmt_vinfo, struct loop *loop)
- 	  <= TYPE_PRECISION (lhs_type));
- }
- 
-+/* Check if masking can be supported by inserting a conditional expression.
-+   CODE is the code for the operation.  COND_FN is the conditional internal
-+   function, if it exists.  VECTYPE_IN is the type of the vector input.  */
-+static bool
-+use_mask_by_cond_expr_p (enum tree_code code, internal_fn cond_fn,
-+			 tree vectype_in)
-+{
-+  if (cond_fn != IFN_LAST
-+      && direct_internal_fn_supported_p (cond_fn, vectype_in,
-+					 OPTIMIZE_FOR_SPEED))
-+    return false;
-+
-+  switch (code)
-+    {
-+    case DOT_PROD_EXPR:
-+      return true;
-+
-+    default:
-+      return false;
-+    }
-+}
-+
-+/* Insert a conditional expression to enable masked vectorization.  CODE is the
-+   code for the operation.  VOP is the array of operands.  MASK is the loop
-+   mask.  GSI is a statement iterator used to place the new conditional
-+   expression.  */
-+static void
-+build_vect_cond_expr (enum tree_code code, tree vop[3], tree mask,
-+		      gimple_stmt_iterator *gsi)
-+{
-+  switch (code)
-+    {
-+    case DOT_PROD_EXPR:
-+      {
-+	tree vectype = TREE_TYPE (vop[1]);
-+	tree zero = build_zero_cst (vectype);
-+	tree masked_op1 = make_temp_ssa_name (vectype, NULL, "masked_op1");
-+	gassign *select = gimple_build_assign (masked_op1, VEC_COND_EXPR,
-+					       mask, vop[1], zero);
-+	gsi_insert_before (gsi, select, GSI_SAME_STMT);
-+	vop[1] = masked_op1;
-+	break;
-+      }
-+
-+    default:
-+      gcc_unreachable ();
-+    }
-+}
-+
- /* Function vectorizable_reduction.
- 
-    Check if STMT_INFO performs a reduction operation that can be vectorized.
-@@ -6027,182 +5795,163 @@ is_nonwrapping_integer_induction (stmt_vec_info stmt_vinfo, struct loop *loop)
-    corresponds to the type of arguments to the reduction stmt, and should *NOT*
-    be used to create the vectorized stmt.  The right vectype for the vectorized
-    stmt is obtained from the type of the result X:
--        get_vectype_for_scalar_type (TREE_TYPE (X))
-+      get_vectype_for_scalar_type (vinfo, TREE_TYPE (X))
- 
-    This means that, contrary to "regular" reductions (or "regular" stmts in
-    general), the following equation:
--      STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
-+      STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (vinfo, TREE_TYPE (X))
-    does *NOT* necessarily hold for reduction patterns.  */
- 
- bool
--vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
--			stmt_vec_info *vec_stmt, slp_tree slp_node,
-+vectorizable_reduction (stmt_vec_info stmt_info, slp_tree slp_node,
- 			slp_instance slp_node_instance,
- 			stmt_vector_for_cost *cost_vec)
- {
--  tree vec_dest;
-   tree scalar_dest;
--  tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
-   tree vectype_in = NULL_TREE;
-   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
-   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
--  enum tree_code code, orig_code;
--  internal_fn reduc_fn;
--  machine_mode vec_mode;
--  int op_type;
--  optab optab;
--  tree new_temp = NULL_TREE;
--  enum vect_def_type dt, cond_reduc_dt = vect_unknown_def_type;
-+  enum vect_def_type cond_reduc_dt = vect_unknown_def_type;
-   stmt_vec_info cond_stmt_vinfo = NULL;
--  enum tree_code cond_reduc_op_code = ERROR_MARK;
-   tree scalar_type;
--  bool is_simple_use;
-   int i;
-   int ncopies;
--  int epilog_copies;
--  stmt_vec_info prev_stmt_info, prev_phi_info;
-   bool single_defuse_cycle = false;
--  stmt_vec_info new_stmt_info = NULL;
--  int j;
--  tree ops[3];
--  enum vect_def_type dts[3];
--  bool nested_cycle = false, found_nested_cycle_def = false;
-+  bool nested_cycle = false;
-   bool double_reduc = false;
--  basic_block def_bb;
--  struct loop * def_stmt_loop;
--  tree def_arg;
--  auto_vec vec_oprnds0;
--  auto_vec vec_oprnds1;
--  auto_vec vec_oprnds2;
--  auto_vec vect_defs;
--  auto_vec phis;
-   int vec_num;
--  tree def0, tem;
-+  tree tem;
-   tree cr_index_scalar_type = NULL_TREE, cr_index_vector_type = NULL_TREE;
-   tree cond_reduc_val = NULL_TREE;
- 
-   /* Make sure it was already recognized as a reduction computation.  */
-   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def
-+      && STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def
-       && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle)
-     return false;
- 
--  if (nested_in_vect_loop_p (loop, stmt_info))
-+  /* The stmt we store reduction analysis meta on.  */
-+  stmt_vec_info reduc_info = info_for_reduction (stmt_info);
-+  reduc_info->is_reduc_info = true;
-+
-+  if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle)
-     {
--      loop = loop->inner;
--      nested_cycle = true;
-+      if (is_a  (stmt_info->stmt))
-+	{
-+	  /* Analysis for double-reduction is done on the outer
-+	     loop PHI, nested cycles have no further restrictions.  */
-+	  STMT_VINFO_TYPE (stmt_info) = cycle_phi_info_type;
-+	  /* For nested cycles we want to let regular vectorizable_*
-+	     routines handle code-generation.  */
-+	  if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_double_reduction_def)
-+	    {
-+	      stmt_info = STMT_VINFO_REDUC_DEF (stmt_info);
-+	      STMT_VINFO_DEF_TYPE (stmt_info) = vect_internal_def;
-+	      STMT_VINFO_DEF_TYPE (vect_stmt_to_vectorize (stmt_info))
-+		= vect_internal_def;
-+	    }
-+	}
-+      else
-+	STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
-+      return true;
-     }
- 
--  if (REDUC_GROUP_FIRST_ELEMENT (stmt_info))
--    gcc_assert (slp_node
--		&& REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info);
--
--  if (gphi *phi = dyn_cast  (stmt_info->stmt))
-+  stmt_vec_info orig_stmt_of_analysis = stmt_info;
-+  stmt_vec_info phi_info = stmt_info;
-+  if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
-+      || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
-     {
--      tree phi_result = gimple_phi_result (phi);
--      /* Analysis is fully done on the reduction stmt invocation.  */
--      if (! vec_stmt)
-+      if (!is_a  (stmt_info->stmt))
- 	{
--	  if (slp_node)
--	    slp_node_instance->reduc_phis = slp_node;
--
- 	  STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
- 	  return true;
- 	}
--
--      if (STMT_VINFO_REDUC_TYPE (stmt_info) == FOLD_LEFT_REDUCTION)
--	/* Leave the scalar phi in place.  Note that checking
--	   STMT_VINFO_VEC_REDUCTION_TYPE (as below) only works
--	   for reductions involving a single statement.  */
--	return true;
--
--      stmt_vec_info reduc_stmt_info = STMT_VINFO_REDUC_DEF (stmt_info);
--      reduc_stmt_info = vect_stmt_to_vectorize (reduc_stmt_info);
--
--      if (STMT_VINFO_VEC_REDUCTION_TYPE (reduc_stmt_info)
--	  == EXTRACT_LAST_REDUCTION)
--	/* Leave the scalar phi in place.  */
--	return true;
--
--      gassign *reduc_stmt = as_a  (reduc_stmt_info->stmt);
--      code = gimple_assign_rhs_code (reduc_stmt);
--      for (unsigned k = 1; k < gimple_num_ops (reduc_stmt); ++k)
-+      if (slp_node)
- 	{
--	  tree op = gimple_op (reduc_stmt, k);
--	  if (op == phi_result)
--	    continue;
--	  if (k == 1 && code == COND_EXPR)
--	    continue;
--	  bool is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt);
--	  gcc_assert (is_simple_use);
--	  if (dt == vect_constant_def || dt == vect_external_def)
--	    continue;
--	  if (!vectype_in
--	      || (GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype_in)))
--		  < GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (op)))))
--	    vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op));
--	  break;
-+	  slp_node_instance->reduc_phis = slp_node;
-+	  /* ???  We're leaving slp_node to point to the PHIs, we only
-+	     need it to get at the number of vector stmts which wasn't
-+	     yet initialized for the instance root.  */
- 	}
--      /* For a nested cycle we might end up with an operation like
--         phi_result * phi_result.  */
--      if (!vectype_in)
--	vectype_in = STMT_VINFO_VECTYPE (stmt_info);
--      gcc_assert (vectype_in);
--
--      if (slp_node)
--	ncopies = 1;
--      else
--	ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
--
--      stmt_vec_info use_stmt_info;
--      if (ncopies > 1
--	  && STMT_VINFO_RELEVANT (reduc_stmt_info) <= vect_used_only_live
--	  && (use_stmt_info = loop_vinfo->lookup_single_use (phi_result))
--	  && vect_stmt_to_vectorize (use_stmt_info) == reduc_stmt_info)
--	single_defuse_cycle = true;
--
--      /* Create the destination vector  */
--      scalar_dest = gimple_assign_lhs (reduc_stmt);
--      vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
--
--      if (slp_node)
--	/* The size vect_schedule_slp_instance computes is off for us.  */
--	vec_num = vect_get_num_vectors
--	  (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
--	   * SLP_TREE_SCALAR_STMTS (slp_node).length (),
--	   vectype_in);
--      else
--	vec_num = 1;
-+      if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
-+	stmt_info = vect_stmt_to_vectorize (STMT_VINFO_REDUC_DEF (stmt_info));
-+      else /* STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def */
-+	{
-+	  use_operand_p use_p;
-+	  gimple *use_stmt;
-+	  bool res = single_imm_use (gimple_phi_result (stmt_info->stmt),
-+				     &use_p, &use_stmt);
-+	  gcc_assert (res);
-+	  phi_info = loop_vinfo->lookup_stmt (use_stmt);
-+	  stmt_info = vect_stmt_to_vectorize (STMT_VINFO_REDUC_DEF (phi_info));
-+	}
-+    }
- 
--      /* Generate the reduction PHIs upfront.  */
--      prev_phi_info = NULL;
--      for (j = 0; j < ncopies; j++)
-+  /* PHIs should not participate in patterns.  */
-+  gcc_assert (!STMT_VINFO_RELATED_STMT (phi_info));
-+  gphi *reduc_def_phi = as_a  (phi_info->stmt);
-+
-+  /* Verify following REDUC_IDX from the latch def leads us back to the PHI
-+     and compute the reduction chain length.  */
-+  tree reduc_def = PHI_ARG_DEF_FROM_EDGE (reduc_def_phi,
-+					  loop_latch_edge (loop));
-+  unsigned reduc_chain_length = 0;
-+  bool only_slp_reduc_chain = true;
-+  stmt_info = NULL;
-+  while (reduc_def != PHI_RESULT (reduc_def_phi))
-+    {
-+      stmt_vec_info def = loop_vinfo->lookup_def (reduc_def);
-+      stmt_vec_info vdef = vect_stmt_to_vectorize (def);
-+      if (STMT_VINFO_REDUC_IDX (vdef) == -1)
- 	{
--	  if (j == 0 || !single_defuse_cycle)
-+	  if (dump_enabled_p ())
-+	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-+			     "reduction chain broken by patterns.\n");
-+	  return false;
-+	}
-+      if (!REDUC_GROUP_FIRST_ELEMENT (vdef))
-+	only_slp_reduc_chain = false;
-+      /* ???  For epilogue generation live members of the chain need
-+         to point back to the PHI via their original stmt for
-+	 info_for_reduction to work.  */
-+      if (STMT_VINFO_LIVE_P (vdef))
-+	STMT_VINFO_REDUC_DEF (def) = phi_info;
-+      if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (vdef->stmt)))
-+	{
-+	  if (!tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (vdef->stmt)),
-+				      TREE_TYPE (gimple_assign_rhs1 (vdef->stmt))))
- 	    {
--	      for (i = 0; i < vec_num; i++)
--		{
--		  /* Create the reduction-phi that defines the reduction
--		     operand.  */
--		  gimple *new_phi = create_phi_node (vec_dest, loop->header);
--		  stmt_vec_info new_phi_info = loop_vinfo->add_stmt (new_phi);
--
--		  if (slp_node)
--		    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_phi_info);
--		  else
--		    {
--		      if (j == 0)
--			STMT_VINFO_VEC_STMT (stmt_info)
--			  = *vec_stmt = new_phi_info;
--		      else
--			STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi_info;
--		      prev_phi_info = new_phi_info;
--		    }
--		}
-+	      if (dump_enabled_p ())
-+		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-+				 "conversion in the reduction chain.\n");
-+	      return false;
- 	    }
- 	}
-+      else if (!stmt_info)
-+	/* First non-conversion stmt.  */
-+	stmt_info = vdef;
-+      reduc_def = gimple_op (vdef->stmt, 1 + STMT_VINFO_REDUC_IDX (vdef));
-+      reduc_chain_length++;
-+    }
-+  /* PHIs should not participate in patterns.  */
-+  gcc_assert (!STMT_VINFO_RELATED_STMT (phi_info));
- 
--      return true;
-+  if (nested_in_vect_loop_p (loop, stmt_info))
-+    {
-+      loop = loop->inner;
-+      nested_cycle = true;
-+    }
-+
-+  /* STMT_VINFO_REDUC_DEF doesn't point to the first but the last
-+     element.  */
-+  if (slp_node && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
-+    {
-+      gcc_assert (!REDUC_GROUP_NEXT_ELEMENT (stmt_info));
-+      stmt_info = REDUC_GROUP_FIRST_ELEMENT (stmt_info);
-     }
-+  if (REDUC_GROUP_FIRST_ELEMENT (stmt_info))
-+    gcc_assert (slp_node
-+		&& REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info);
- 
-   /* 1. Is vectorizable reduction?  */
-   /* Not supportable if the reduction variable is used in the loop, unless
-@@ -6235,37 +5984,13 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-         inside the loop body. The last operand is the reduction variable,
-         which is defined by the loop-header-phi.  */
- 
-+  tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
-+  STMT_VINFO_REDUC_VECTYPE (reduc_info) = vectype_out;
-   gassign *stmt = as_a  (stmt_info->stmt);
--
--  /* Flatten RHS.  */
--  switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
--    {
--    case GIMPLE_BINARY_RHS:
--      code = gimple_assign_rhs_code (stmt);
--      op_type = TREE_CODE_LENGTH (code);
--      gcc_assert (op_type == binary_op);
--      ops[0] = gimple_assign_rhs1 (stmt);
--      ops[1] = gimple_assign_rhs2 (stmt);
--      break;
--
--    case GIMPLE_TERNARY_RHS:
--      code = gimple_assign_rhs_code (stmt);
--      op_type = TREE_CODE_LENGTH (code);
--      gcc_assert (op_type == ternary_op);
--      ops[0] = gimple_assign_rhs1 (stmt);
--      ops[1] = gimple_assign_rhs2 (stmt);
--      ops[2] = gimple_assign_rhs3 (stmt);
--      break;
--
--    case GIMPLE_UNARY_RHS:
--      return false;
--
--    default:
--      gcc_unreachable ();
--    }
--
--  if (code == COND_EXPR && slp_node)
--    return false;
-+  enum tree_code code = gimple_assign_rhs_code (stmt);
-+  bool lane_reduc_code_p
-+    = (code == DOT_PROD_EXPR || code == WIDEN_SUM_EXPR || code == SAD_EXPR);
-+  int op_type = TREE_CODE_LENGTH (code);
- 
-   scalar_dest = gimple_assign_lhs (stmt);
-   scalar_type = TREE_TYPE (scalar_dest);
-@@ -6277,67 +6002,65 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-   if (!type_has_mode_precision_p (scalar_type))
-     return false;
- 
-+  /* For lane-reducing ops we're reducing the number of reduction PHIs
-+     which means the only use of that may be in the lane-reducing operation.  */
-+  if (lane_reduc_code_p
-+      && reduc_chain_length != 1
-+      && !only_slp_reduc_chain)
-+    {
-+      if (dump_enabled_p ())
-+	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-+			 "lane-reducing reduction with extra stmts.\n");
-+      return false;
-+    }
-+
-   /* All uses but the last are expected to be defined in the loop.
-      The last use is the reduction variable.  In case of nested cycle this
-      assumption is not true: we use reduc_index to record the index of the
-      reduction variable.  */
--  stmt_vec_info reduc_def_info;
--  if (orig_stmt_info)
--    reduc_def_info = STMT_VINFO_REDUC_DEF (orig_stmt_info);
--  else
--    reduc_def_info = STMT_VINFO_REDUC_DEF (stmt_info);
--  gcc_assert (reduc_def_info);
--  gphi *reduc_def_phi = as_a  (reduc_def_info->stmt);
--  tree reduc_def = PHI_RESULT (reduc_def_phi);
--  int reduc_index = -1;
-+  reduc_def = PHI_RESULT (reduc_def_phi);
-   for (i = 0; i < op_type; i++)
-     {
-+      tree op = gimple_op (stmt, i + 1);
-       /* The condition of COND_EXPR is checked in vectorizable_condition().  */
-       if (i == 0 && code == COND_EXPR)
-         continue;
- 
-       stmt_vec_info def_stmt_info;
--      is_simple_use = vect_is_simple_use (ops[i], loop_vinfo, &dts[i], &tem,
--					  &def_stmt_info);
--      dt = dts[i];
--      gcc_assert (is_simple_use);
--      if (dt == vect_reduction_def
--	  && ops[i] == reduc_def)
--	{
--	  reduc_index = i;
--	  continue;
--	}
--      else if (tem)
-+      enum vect_def_type dt;
-+      if (!vect_is_simple_use (op, loop_vinfo, &dt, &tem,
-+			       &def_stmt_info))
- 	{
--	  /* To properly compute ncopies we are interested in the widest
--	     input type in case we're looking at a widening accumulation.  */
--	  if (!vectype_in
--	      || (GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype_in)))
--		  < GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (tem)))))
--	    vectype_in = tem;
-+	  if (dump_enabled_p ())
-+	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-+			     "use not simple.\n");
-+	  return false;
- 	}
-+      if (i == STMT_VINFO_REDUC_IDX (stmt_info))
-+	continue;
- 
--      if (dt != vect_internal_def
--	  && dt != vect_external_def
--	  && dt != vect_constant_def
--	  && dt != vect_induction_def
--          && !(dt == vect_nested_cycle && nested_cycle))
-+      /* There should be only one cycle def in the stmt, the one
-+         leading to reduc_def.  */
-+      if (VECTORIZABLE_CYCLE_DEF (dt))
- 	return false;
- 
--      if (dt == vect_nested_cycle
--	  && ops[i] == reduc_def)
--	{
--	  found_nested_cycle_def = true;
--	  reduc_index = i;
--	}
-+      /* To properly compute ncopies we are interested in the widest
-+	 non-reduction input type in case we're looking at a widening
-+	 accumulation that we later handle in vect_transform_reduction.  */
-+      if (lane_reduc_code_p
-+	  && tem
-+	  && (!vectype_in
-+	      || (GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype_in)))
-+		  < GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (tem))))))
-+	vectype_in = tem;
- 
--      if (i == 1 && code == COND_EXPR)
-+      if (code == COND_EXPR)
- 	{
--	  /* Record how value of COND_EXPR is defined.  */
-+	  /* Record how the non-reduction-def value of COND_EXPR is defined.  */
- 	  if (dt == vect_constant_def)
- 	    {
- 	      cond_reduc_dt = dt;
--	      cond_reduc_val = ops[i];
-+	      cond_reduc_val = op;
- 	    }
- 	  if (dt == vect_induction_def
- 	      && def_stmt_info
-@@ -6348,93 +6071,35 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- 	    }
- 	}
-     }
--
-   if (!vectype_in)
--    vectype_in = vectype_out;
--
--  /* When vectorizing a reduction chain w/o SLP the reduction PHI is not
--     directy used in stmt.  */
--  if (reduc_index == -1)
--    {
--      if (STMT_VINFO_REDUC_TYPE (stmt_info) == FOLD_LEFT_REDUCTION)
--	{
--	  if (dump_enabled_p ())
--	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
--			     "in-order reduction chain without SLP.\n");
--	  return false;
--	}
--    }
--
--  if (!(reduc_index == -1
--	|| dts[reduc_index] == vect_reduction_def
--	|| dts[reduc_index] == vect_nested_cycle
--	|| ((dts[reduc_index] == vect_internal_def
--	     || dts[reduc_index] == vect_external_def
--	     || dts[reduc_index] == vect_constant_def
--	     || dts[reduc_index] == vect_induction_def)
--	    && nested_cycle && found_nested_cycle_def)))
--    {
--      /* For pattern recognized stmts, orig_stmt might be a reduction,
--	 but some helper statements for the pattern might not, or
--	 might be COND_EXPRs with reduction uses in the condition.  */
--      gcc_assert (orig_stmt_info);
--      return false;
--    }
--
--  /* PHIs should not participate in patterns.  */
--  gcc_assert (!STMT_VINFO_RELATED_STMT (reduc_def_info));
--  enum vect_reduction_type v_reduc_type
--    = STMT_VINFO_REDUC_TYPE (reduc_def_info);
--  stmt_vec_info tmp = STMT_VINFO_REDUC_DEF (reduc_def_info);
-+    vectype_in = STMT_VINFO_VECTYPE (phi_info);
-+  STMT_VINFO_REDUC_VECTYPE_IN (reduc_info) = vectype_in;
- 
--  STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = v_reduc_type;
-+  enum vect_reduction_type v_reduc_type = STMT_VINFO_REDUC_TYPE (phi_info);
-+  STMT_VINFO_REDUC_TYPE (reduc_info) = v_reduc_type;
-   /* If we have a condition reduction, see if we can simplify it further.  */
-   if (v_reduc_type == COND_REDUCTION)
-     {
--      /* TODO: We can't yet handle reduction chains, since we need to treat
--	 each COND_EXPR in the chain specially, not just the last one.
--	 E.g. for:
--
--	    x_1 = PHI 
--	    x_2 = a_2 ? ... : x_1;
--	    x_3 = a_3 ? ... : x_2;
-+      if (slp_node)
-+	return false;
- 
--	 we're interested in the last element in x_3 for which a_2 || a_3
--	 is true, whereas the current reduction chain handling would
--	 vectorize x_2 as a normal VEC_COND_EXPR and only treat x_3
--	 as a reduction operation.  */
--      if (reduc_index == -1)
-+      /* When the condition uses the reduction value in the condition, fail.  */
-+      if (STMT_VINFO_REDUC_IDX (stmt_info) == 0)
- 	{
- 	  if (dump_enabled_p ())
- 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
--			     "conditional reduction chains not supported\n");
-+			     "condition depends on previous iteration\n");
- 	  return false;
- 	}
- 
--      /* vect_is_simple_reduction ensured that operand 2 is the
--	 loop-carried operand.  */
--      gcc_assert (reduc_index == 2);
--
--      /* Loop peeling modifies initial value of reduction PHI, which
--	 makes the reduction stmt to be transformed different to the
--	 original stmt analyzed.  We need to record reduction code for
--	 CONST_COND_REDUCTION type reduction at analyzing stage, thus
--	 it can be used directly at transform stage.  */
--      if (STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info) == MAX_EXPR
--	  || STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info) == MIN_EXPR)
--	{
--	  /* Also set the reduction type to CONST_COND_REDUCTION.  */
--	  gcc_assert (cond_reduc_dt == vect_constant_def);
--	  STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = CONST_COND_REDUCTION;
--	}
--      else if (direct_internal_fn_supported_p (IFN_FOLD_EXTRACT_LAST,
--					       vectype_in, OPTIMIZE_FOR_SPEED))
-+      if (direct_internal_fn_supported_p (IFN_FOLD_EXTRACT_LAST,
-+					  vectype_in, OPTIMIZE_FOR_SPEED))
- 	{
- 	  if (dump_enabled_p ())
- 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- 			     "optimizing condition reduction with"
- 			     " FOLD_EXTRACT_LAST.\n");
--	  STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = EXTRACT_LAST_REDUCTION;
-+	  STMT_VINFO_REDUC_TYPE (reduc_info) = EXTRACT_LAST_REDUCTION;
- 	}
-       else if (cond_reduc_dt == vect_induction_def)
- 	{
-@@ -6445,6 +6110,7 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- 	  gcc_assert (TREE_CODE (base) == INTEGER_CST
- 		      && TREE_CODE (step) == INTEGER_CST);
- 	  cond_reduc_val = NULL_TREE;
-+	  enum tree_code cond_reduc_op_code = ERROR_MARK;
- 	  tree res = PHI_RESULT (STMT_VINFO_STMT (cond_stmt_vinfo));
- 	  if (!types_compatible_p (TREE_TYPE (res), TREE_TYPE (base)))
- 	    ;
-@@ -6477,16 +6143,17 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- 		dump_printf_loc (MSG_NOTE, vect_location,
- 				 "condition expression based on "
- 				 "integer induction.\n");
--	      STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
--		= INTEGER_INDUC_COND_REDUCTION;
-+	      STMT_VINFO_REDUC_CODE (reduc_info) = cond_reduc_op_code;
-+	      STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info)
-+		= cond_reduc_val;
-+	      STMT_VINFO_REDUC_TYPE (reduc_info) = INTEGER_INDUC_COND_REDUCTION;
- 	    }
- 	}
-       else if (cond_reduc_dt == vect_constant_def)
- 	{
- 	  enum vect_def_type cond_initial_dt;
--	  gimple *def_stmt = SSA_NAME_DEF_STMT (ops[reduc_index]);
- 	  tree cond_initial_val
--	    = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
-+	    = PHI_ARG_DEF_FROM_EDGE (reduc_def_phi, loop_preheader_edge (loop));
- 
- 	  gcc_assert (cond_reduc_val != NULL_TREE);
- 	  vect_is_simple_use (cond_initial_val, loop_vinfo, &cond_initial_dt);
-@@ -6503,25 +6170,15 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- 				     "condition expression based on "
- 				     "compile time constant.\n");
- 		  /* Record reduction code at analysis stage.  */
--		  STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info)
-+		  STMT_VINFO_REDUC_CODE (reduc_info)
- 		    = integer_onep (e) ? MAX_EXPR : MIN_EXPR;
--		  STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
--		    = CONST_COND_REDUCTION;
-+		  STMT_VINFO_REDUC_TYPE (reduc_info) = CONST_COND_REDUCTION;
- 		}
- 	    }
- 	}
-     }
- 
--  if (orig_stmt_info)
--    gcc_assert (tmp == orig_stmt_info
--		|| REDUC_GROUP_FIRST_ELEMENT (tmp) == orig_stmt_info);
--  else
--    /* We changed STMT to be the first stmt in reduction chain, hence we
--       check that in this case the first element in the chain is STMT.  */
--    gcc_assert (tmp == stmt_info
--		|| REDUC_GROUP_FIRST_ELEMENT (tmp) == stmt_info);
--
--  if (STMT_VINFO_LIVE_P (reduc_def_info))
-+  if (STMT_VINFO_LIVE_P (phi_info))
-     return false;
- 
-   if (slp_node)
-@@ -6531,102 +6188,13 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- 
-   gcc_assert (ncopies >= 1);
- 
--  vec_mode = TYPE_MODE (vectype_in);
-   poly_uint64 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
- 
-   if (nested_cycle)
-     {
--      def_bb = gimple_bb (reduc_def_phi);
--      def_stmt_loop = def_bb->loop_father;
--      def_arg = PHI_ARG_DEF_FROM_EDGE (reduc_def_phi,
--                                       loop_preheader_edge (def_stmt_loop));
--      stmt_vec_info def_arg_stmt_info = loop_vinfo->lookup_def (def_arg);
--      if (def_arg_stmt_info
--	  && (STMT_VINFO_DEF_TYPE (def_arg_stmt_info)
--	      == vect_double_reduction_def))
--        double_reduc = true;
--    }
--
--  vect_reduction_type reduction_type
--    = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
--  if ((double_reduc || reduction_type != TREE_CODE_REDUCTION)
--      && ncopies > 1)
--    {
--      if (dump_enabled_p ())
--	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
--			 "multiple types in double reduction or condition "
--			 "reduction.\n");
--      return false;
--    }
--
--  if (code == COND_EXPR)
--    {
--      /* Only call during the analysis stage, otherwise we'll lose
--	 STMT_VINFO_TYPE.  */
--      if (!vec_stmt && !vectorizable_condition (stmt_info, gsi, NULL,
--						true, NULL, cost_vec))
--        {
--          if (dump_enabled_p ())
--	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
--			     "unsupported condition in reduction\n");
--	  return false;
--        }
--    }
--  else if (code == LSHIFT_EXPR || code == RSHIFT_EXPR
--	   || code == LROTATE_EXPR || code == RROTATE_EXPR)
--    {
--      /* Only call during the analysis stage, otherwise we'll lose
--	 STMT_VINFO_TYPE.  We only support this for nested cycles
--	 without double reductions at the moment.  */
--      if (!nested_cycle
--	  || double_reduc
--	  || (!vec_stmt && !vectorizable_shift (stmt_info, gsi, NULL,
--						NULL, cost_vec)))
--	{
--          if (dump_enabled_p ())
--	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
--			     "unsupported shift or rotation in reduction\n");
--	  return false;
--	}
--    }
--  else
--    {
--      /* 4. Supportable by target?  */
--
--      /* 4.1. check support for the operation in the loop  */
--      optab = optab_for_tree_code (code, vectype_in, optab_default);
--      if (!optab)
--        {
--          if (dump_enabled_p ())
--	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
--			     "no optab.\n");
--
--          return false;
--        }
--
--      if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
--        {
--          if (dump_enabled_p ())
--            dump_printf (MSG_NOTE, "op not supported by target.\n");
--
--	  if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
--	      || !vect_worthwhile_without_simd_p (loop_vinfo, code))
--            return false;
--
--          if (dump_enabled_p ())
--  	    dump_printf (MSG_NOTE, "proceeding using word mode.\n");
--        }
--
--      /* Worthwhile without SIMD support?  */
--      if (!VECTOR_MODE_P (TYPE_MODE (vectype_in))
--	  && !vect_worthwhile_without_simd_p (loop_vinfo, code))
--        {
--          if (dump_enabled_p ())
--	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
--			     "not worthwhile without SIMD support.\n");
--
--          return false;
--        }
-+      gcc_assert (STMT_VINFO_DEF_TYPE (reduc_info)
-+		  == vect_double_reduction_def);
-+      double_reduc = true;
-     }
- 
-   /* 4.2. Check support for the epilog operation.
-@@ -6664,38 +6232,55 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-           (and also the same tree-code) when generating the epilog code and
-           when generating the code inside the loop.  */
- 
--  if (orig_stmt_info
--      && (reduction_type == TREE_CODE_REDUCTION
--	  || reduction_type == FOLD_LEFT_REDUCTION))
--    {
--      /* This is a reduction pattern: get the vectype from the type of the
--         reduction variable, and get the tree-code from orig_stmt.  */
--      orig_code = gimple_assign_rhs_code (orig_stmt_info->stmt);
--      gcc_assert (vectype_out);
--      vec_mode = TYPE_MODE (vectype_out);
--    }
--  else
--    {
--      /* Regular reduction: use the same vectype and tree-code as used for
--         the vector code inside the loop can be used for the epilog code. */
--      orig_code = code;
--
--      if (code == MINUS_EXPR)
--	orig_code = PLUS_EXPR;
-+  enum tree_code orig_code = STMT_VINFO_REDUC_CODE (phi_info);
-+  STMT_VINFO_REDUC_CODE (reduc_info) = orig_code;
- 
--      /* For simple condition reductions, replace with the actual expression
--	 we want to base our reduction around.  */
--      if (reduction_type == CONST_COND_REDUCTION)
-+  vect_reduction_type reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
-+  if (reduction_type == TREE_CODE_REDUCTION)
-+    {
-+      /* Check whether it's ok to change the order of the computation.
-+	 Generally, when vectorizing a reduction we change the order of the
-+	 computation.  This may change the behavior of the program in some
-+	 cases, so we need to check that this is ok.  One exception is when
-+	 vectorizing an outer-loop: the inner-loop is executed sequentially,
-+	 and therefore vectorizing reductions in the inner-loop during
-+	 outer-loop vectorization is safe.  */
-+      if (needs_fold_left_reduction_p (scalar_type, orig_code))
-+	{
-+	  /* When vectorizing a reduction chain w/o SLP the reduction PHI
-+	     is not directy used in stmt.  */
-+	  if (!only_slp_reduc_chain
-+	      && reduc_chain_length != 1)
-+	    {
-+	      if (dump_enabled_p ())
-+		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-+				 "in-order reduction chain without SLP.\n");
-+	      return false;
-+	    }
-+	  STMT_VINFO_REDUC_TYPE (reduc_info)
-+	    = reduction_type = FOLD_LEFT_REDUCTION;
-+	}
-+      else if (!commutative_tree_code (orig_code)
-+	       || !associative_tree_code (orig_code))
- 	{
--	  orig_code = STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info);
--	  gcc_assert (orig_code == MAX_EXPR || orig_code == MIN_EXPR);
-+	  if (dump_enabled_p ())
-+	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-+			    "reduction: not commutative/associative");
-+	  return false;
- 	}
--      else if (reduction_type == INTEGER_INDUC_COND_REDUCTION)
--	orig_code = cond_reduc_op_code;
-     }
- 
--  reduc_fn = IFN_LAST;
-+  if ((double_reduc || reduction_type != TREE_CODE_REDUCTION)
-+      && ncopies > 1)
-+    {
-+      if (dump_enabled_p ())
-+	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-+			 "multiple types in double reduction or condition "
-+			 "reduction or fold-left reduction.\n");
-+      return false;
-+    }
- 
-+  internal_fn reduc_fn = IFN_LAST;
-   if (reduction_type == TREE_CODE_REDUCTION
-       || reduction_type == FOLD_LEFT_REDUCTION
-       || reduction_type == INTEGER_INDUC_COND_REDUCTION
-@@ -6740,6 +6325,7 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- 					  OPTIMIZE_FOR_SPEED))
- 	reduc_fn = IFN_REDUC_MAX;
-     }
-+  STMT_VINFO_REDUC_FN (reduc_info) = reduc_fn;
- 
-   if (reduction_type != EXTRACT_LAST_REDUCTION
-       && (!nested_cycle || double_reduc)
-@@ -6757,7 +6343,7 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-   tree neutral_op = NULL_TREE;
-   if (slp_node)
-     neutral_op = neutral_op_for_slp_reduction
--      (slp_node_instance->reduc_phis, code,
-+      (slp_node_instance->reduc_phis, vectype_out, orig_code,
-        REDUC_GROUP_FIRST_ELEMENT (stmt_info) != NULL);
- 
-   if (double_reduc && reduction_type == FOLD_LEFT_REDUCTION)
-@@ -6822,10 +6408,11 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- 	 which each SLP statement has its own initial value and in which
- 	 that value needs to be repeated for every instance of the
- 	 statement within the initial vector.  */
--      unsigned int group_size = SLP_TREE_SCALAR_STMTS (slp_node).length ();
-+      unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
-       scalar_mode elt_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype_out));
-       if (!neutral_op
--	  && !can_duplicate_and_interleave_p (group_size, elt_mode))
-+	  && !can_duplicate_and_interleave_p (loop_vinfo, group_size,
-+					      elt_mode))
- 	{
- 	  if (dump_enabled_p ())
- 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-@@ -6848,26 +6435,6 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- 	}
-     }
- 
--  /* In case of widenning multiplication by a constant, we update the type
--     of the constant to be the type of the other operand.  We check that the
--     constant fits the type in the pattern recognition pass.  */
--  if (code == DOT_PROD_EXPR
--      && !types_compatible_p (TREE_TYPE (ops[0]), TREE_TYPE (ops[1])))
--    {
--      if (TREE_CODE (ops[0]) == INTEGER_CST)
--        ops[0] = fold_convert (TREE_TYPE (ops[1]), ops[0]);
--      else if (TREE_CODE (ops[1]) == INTEGER_CST)
--        ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
--      else
--        {
--          if (dump_enabled_p ())
--	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
--			     "invalid types in dot-prod\n");
--
--          return false;
--        }
--    }
--
-   if (reduction_type == COND_REDUCTION)
-     {
-       widest_int ni;
-@@ -6925,26 +6492,68 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-    This only works when we see both the reduction PHI and its only consumer
-    in vectorizable_reduction and there are no intermediate stmts
-    participating.  */
--  stmt_vec_info use_stmt_info;
--  tree reduc_phi_result = gimple_phi_result (reduc_def_phi);
-   if (ncopies > 1
-       && (STMT_VINFO_RELEVANT (stmt_info) <= vect_used_only_live)
--      && (use_stmt_info = loop_vinfo->lookup_single_use (reduc_phi_result))
--      && vect_stmt_to_vectorize (use_stmt_info) == stmt_info)
-+      && reduc_chain_length == 1)
-+    single_defuse_cycle = true;
-+
-+  if (single_defuse_cycle || lane_reduc_code_p)
-     {
--      single_defuse_cycle = true;
--      epilog_copies = 1;
-+      gcc_assert (code != COND_EXPR);
-+
-+      /* 4. Supportable by target?  */
-+      bool ok = true;
-+
-+      /* 4.1. check support for the operation in the loop  */
-+      optab optab = optab_for_tree_code (code, vectype_in, optab_vector);
-+      if (!optab)
-+	{
-+	  if (dump_enabled_p ())
-+	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-+			     "no optab.\n");
-+	  ok = false;
-+        }
-+
-+      machine_mode vec_mode = TYPE_MODE (vectype_in);
-+      if (ok && optab_handler (optab, vec_mode) == CODE_FOR_nothing)
-+        {
-+          if (dump_enabled_p ())
-+            dump_printf (MSG_NOTE, "op not supported by target.\n");
-+	  if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
-+	      || !vect_worthwhile_without_simd_p (loop_vinfo, code))
-+	    ok = false;
-+	  else
-+	    if (dump_enabled_p ())
-+	      dump_printf (MSG_NOTE, "proceeding using word mode.\n");
-+        }
-+
-+      /* Worthwhile without SIMD support?  */
-+      if (ok
-+	  && !VECTOR_MODE_P (TYPE_MODE (vectype_in))
-+	  && !vect_worthwhile_without_simd_p (loop_vinfo, code))
-+        {
-+          if (dump_enabled_p ())
-+	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-+			     "not worthwhile without SIMD support.\n");
-+	  ok = false;
-+        }
-+
-+      /* lane-reducing operations have to go through vect_transform_reduction.
-+         For the other cases try without the single cycle optimization.  */
-+      if (!ok)
-+	{
-+	  if (lane_reduc_code_p)
-+	    return false;
-+	  else
-+	    single_defuse_cycle = false;
-+	}
-     }
--  else
--    epilog_copies = ncopies;
-+  STMT_VINFO_FORCE_SINGLE_CYCLE (reduc_info) = single_defuse_cycle;
- 
-   /* If the reduction stmt is one of the patterns that have lane
-      reduction embedded we cannot handle the case of ! single_defuse_cycle.  */
--  if ((ncopies > 1
--       && ! single_defuse_cycle)
--      && (code == DOT_PROD_EXPR
--	  || code == WIDEN_SUM_EXPR
--	  || code == SAD_EXPR))
-+  if ((ncopies > 1 && ! single_defuse_cycle)
-+      && lane_reduc_code_p)
-     {
-       if (dump_enabled_p ())
- 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-@@ -6958,46 +6567,130 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-   else
-     vec_num = 1;
- 
-+  vect_model_reduction_cost (stmt_info, reduc_fn, reduction_type, ncopies,
-+			     cost_vec);
-+  if (dump_enabled_p ()
-+      && reduction_type == FOLD_LEFT_REDUCTION)
-+    dump_printf_loc (MSG_NOTE, vect_location,
-+		     "using an in-order (fold-left) reduction.\n");
-+  STMT_VINFO_TYPE (orig_stmt_of_analysis) = cycle_phi_info_type;
-+  /* All but single defuse-cycle optimized, lane-reducing and fold-left
-+     reductions go through their own vectorizable_* routines.  */
-+  if (!single_defuse_cycle
-+      && code != DOT_PROD_EXPR
-+      && code != WIDEN_SUM_EXPR
-+      && code != SAD_EXPR
-+      && reduction_type != FOLD_LEFT_REDUCTION)
-+    {
-+      stmt_vec_info tem
-+	= vect_stmt_to_vectorize (STMT_VINFO_REDUC_DEF (phi_info));
-+      if (slp_node && REDUC_GROUP_FIRST_ELEMENT (tem))
-+	{
-+	  gcc_assert (!REDUC_GROUP_NEXT_ELEMENT (tem));
-+	  tem = REDUC_GROUP_FIRST_ELEMENT (tem);
-+	}
-+      STMT_VINFO_DEF_TYPE (vect_orig_stmt (tem)) = vect_internal_def;
-+      STMT_VINFO_DEF_TYPE (tem) = vect_internal_def;
-+    }
-+  else if (loop_vinfo && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
-+    {
-+      vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
-+      internal_fn cond_fn = get_conditional_internal_fn (code);
-+
-+      if (reduction_type != FOLD_LEFT_REDUCTION
-+	  && !use_mask_by_cond_expr_p (code, cond_fn, vectype_in)
-+	  && (cond_fn == IFN_LAST
-+	      || !direct_internal_fn_supported_p (cond_fn, vectype_in,
-+						  OPTIMIZE_FOR_SPEED)))
-+	{
-+	  if (dump_enabled_p ())
-+	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-+			     "can't use a fully-masked loop because no"
-+			     " conditional operation is available.\n");
-+	  LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
-+	}
-+      else
-+	vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
-+			       vectype_in, NULL);
-+    }
-+  return true;
-+}
-+
-+/* Transform the definition stmt STMT_INFO of a reduction PHI backedge
-+   value.  */
-+
-+bool
-+vect_transform_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-+			  stmt_vec_info *vec_stmt, slp_tree slp_node)
-+{
-+  tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
-+  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
-+  class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
-+  int i;
-+  int ncopies;
-+  int j;
-+  int vec_num;
-+
-+  stmt_vec_info reduc_info = info_for_reduction (stmt_info);
-+  gcc_assert (reduc_info->is_reduc_info);
-+
-+  if (nested_in_vect_loop_p (loop, stmt_info))
-+    {
-+      loop = loop->inner;
-+      gcc_assert (STMT_VINFO_DEF_TYPE (reduc_info) == vect_double_reduction_def);
-+    }
-+
-+  gassign *stmt = as_a  (stmt_info->stmt);
-+  enum tree_code code = gimple_assign_rhs_code (stmt);
-+  int op_type = TREE_CODE_LENGTH (code);
-+
-+  /* Flatten RHS.  */
-+  tree ops[3];
-+  switch (get_gimple_rhs_class (code))
-+    {
-+    case GIMPLE_TERNARY_RHS:
-+      ops[2] = gimple_assign_rhs3 (stmt);
-+      /* Fall thru.  */
-+    case GIMPLE_BINARY_RHS:
-+      ops[0] = gimple_assign_rhs1 (stmt);
-+      ops[1] = gimple_assign_rhs2 (stmt);
-+      break;
-+    default:
-+      gcc_unreachable ();
-+    }
-+
-+  /* All uses but the last are expected to be defined in the loop.
-+     The last use is the reduction variable.  In case of nested cycle this
-+     assumption is not true: we use reduc_index to record the index of the
-+     reduction variable.  */
-+  stmt_vec_info phi_info = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info));
-+  gphi *reduc_def_phi = as_a  (phi_info->stmt);
-+  int reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
-+  tree vectype_in = STMT_VINFO_REDUC_VECTYPE_IN (reduc_info);
-+
-+  if (slp_node)
-+    {
-+      ncopies = 1;
-+      vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
-+    }
-+  else
-+    {
-+      ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
-+      vec_num = 1;
-+    }
-+
-   internal_fn cond_fn = get_conditional_internal_fn (code);
-   vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
--
--  if (!vec_stmt) /* transformation not required.  */
--    {
--      vect_model_reduction_cost (stmt_info, reduc_fn, ncopies, cost_vec);
--      if (loop_vinfo && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
--	{
--	  if (reduction_type != FOLD_LEFT_REDUCTION
--	      && (cond_fn == IFN_LAST
--		  || !direct_internal_fn_supported_p (cond_fn, vectype_in,
--						      OPTIMIZE_FOR_SPEED)))
--	    {
--	      if (dump_enabled_p ())
--		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
--				 "can't use a fully-masked loop because no"
--				 " conditional operation is available.\n");
--	      LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
--	    }
--	  else if (reduc_index == -1)
--	    {
--	      if (dump_enabled_p ())
--		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
--				 "can't use a fully-masked loop for chained"
--				 " reductions.\n");
--	      LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
--	    }
--	  else
--	    vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
--				   vectype_in);
--	}
--      if (dump_enabled_p ()
--	  && reduction_type == FOLD_LEFT_REDUCTION)
--	dump_printf_loc (MSG_NOTE, vect_location,
--			 "using an in-order (fold-left) reduction.\n");
--      STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
--      return true;
--    }
-+  bool mask_by_cond_expr = use_mask_by_cond_expr_p (code, cond_fn, vectype_in);
- 
-   /* Transform.  */
-+  stmt_vec_info new_stmt_info = NULL;
-+  stmt_vec_info prev_stmt_info;
-+  tree new_temp = NULL_TREE;
-+  auto_vec vec_oprnds0;
-+  auto_vec vec_oprnds1;
-+  auto_vec vec_oprnds2;
-+  tree def0;
- 
-   if (dump_enabled_p ())
-     dump_printf_loc (MSG_NOTE, vect_location, "transform reduction.\n");
-@@ -7008,23 +6701,26 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- 
-   bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
- 
-+  vect_reduction_type reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
-   if (reduction_type == FOLD_LEFT_REDUCTION)
--    return vectorize_fold_left_reduction
--      (stmt_info, gsi, vec_stmt, slp_node, reduc_def_phi, code,
--       reduc_fn, ops, vectype_in, reduc_index, masks);
--
--  if (reduction_type == EXTRACT_LAST_REDUCTION)
-     {
--      gcc_assert (!slp_node);
--      return vectorizable_condition (stmt_info, gsi, vec_stmt,
--				     true, NULL, NULL);
-+      internal_fn reduc_fn = STMT_VINFO_REDUC_FN (reduc_info);
-+      return vectorize_fold_left_reduction
-+	  (stmt_info, gsi, vec_stmt, slp_node, reduc_def_phi, code,
-+	   reduc_fn, ops, vectype_in, reduc_index, masks);
-     }
- 
-+  bool single_defuse_cycle = STMT_VINFO_FORCE_SINGLE_CYCLE (reduc_info);
-+  gcc_assert (single_defuse_cycle
-+	      || code == DOT_PROD_EXPR
-+	      || code == WIDEN_SUM_EXPR
-+	      || code == SAD_EXPR);
-+
-   /* Create the destination vector  */
--  vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
-+  tree scalar_dest = gimple_assign_lhs (stmt);
-+  tree vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
- 
-   prev_stmt_info = NULL;
--  prev_phi_info = NULL;
-   if (!slp_node)
-     {
-       vec_oprnds0.create (1);
-@@ -7033,32 +6729,8 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-         vec_oprnds2.create (1);
-     }
- 
--  phis.create (vec_num);
--  vect_defs.create (vec_num);
--  if (!slp_node)
--    vect_defs.quick_push (NULL_TREE);
--
--  if (slp_node)
--    phis.splice (SLP_TREE_VEC_STMTS (slp_node_instance->reduc_phis));
--  else
--    phis.quick_push (STMT_VINFO_VEC_STMT (reduc_def_info));
--
-   for (j = 0; j < ncopies; j++)
-     {
--      if (code == COND_EXPR)
--        {
--          gcc_assert (!slp_node);
--	  vectorizable_condition (stmt_info, gsi, vec_stmt,
--				  true, NULL, NULL);
--          break;
--        }
--      if (code == LSHIFT_EXPR
--	  || code == RSHIFT_EXPR)
--	{
--	  vectorizable_shift (stmt_info, gsi, vec_stmt, slp_node, NULL);
--	  break;
--	}
--
-       /* Handle uses.  */
-       if (j == 0)
-         {
-@@ -7066,16 +6738,8 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- 	    {
- 	      /* Get vec defs for all the operands except the reduction index,
- 		 ensuring the ordering of the ops in the vector is kept.  */
--	      auto_vec slp_ops;
- 	      auto_vec, 3> vec_defs;
--
--	      slp_ops.quick_push (ops[0]);
--	      slp_ops.quick_push (ops[1]);
--	      if (op_type == ternary_op)
--		slp_ops.quick_push (ops[2]);
--
--	      vect_get_slp_defs (slp_ops, slp_node, &vec_defs);
--
-+	      vect_get_slp_defs (slp_node, &vec_defs);
- 	      vec_oprnds0.safe_splice (vec_defs[0]);
- 	      vec_defs[0].release ();
- 	      vec_oprnds1.safe_splice (vec_defs[1]);
-@@ -7130,7 +6794,7 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-       FOR_EACH_VEC_ELT (vec_oprnds0, i, def0)
-         {
- 	  tree vop[3] = { def0, vec_oprnds1[i], NULL_TREE };
--	  if (masked_loop_p)
-+	  if (masked_loop_p && !mask_by_cond_expr)
- 	    {
- 	      /* Make sure that the reduction accumulator is vop[0].  */
- 	      if (reduc_index == 1)
-@@ -7154,6 +6818,14 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- 	      if (op_type == ternary_op)
- 		vop[2] = vec_oprnds2[i];
- 
-+	      if (masked_loop_p && mask_by_cond_expr)
-+		{
-+		  tree mask = vect_get_loop_mask (gsi, masks,
-+						  vec_num * ncopies,
-+						  vectype_in, i * ncopies + j);
-+		  build_vect_cond_expr (code, vop, mask, gsi);
-+		}
-+
- 	      gassign *new_stmt = gimple_build_assign (vec_dest, code,
- 						       vop[0], vop[1], vop[2]);
- 	      new_temp = make_ssa_name (vec_dest, new_stmt);
-@@ -7163,15 +6835,10 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- 	    }
- 
-           if (slp_node)
--            {
--	      SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
--              vect_defs.quick_push (new_temp);
--            }
--          else
--            vect_defs[0] = new_temp;
-+	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
-         }
- 
--      if (slp_node)
-+      if (slp_node || single_defuse_cycle)
-         continue;
- 
-       if (j == 0)
-@@ -7182,20 +6849,244 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-       prev_stmt_info = new_stmt_info;
-     }
- 
--  /* Finalize the reduction-phi (set its arguments) and create the
--     epilog reduction code.  */
--  if ((!single_defuse_cycle || code == COND_EXPR) && !slp_node)
--    vect_defs[0] = gimple_get_lhs ((*vec_stmt)->stmt);
-+  if (single_defuse_cycle && !slp_node)
-+    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
-+
-+  return true;
-+}
-+
-+/* Transform phase of a cycle PHI.  */
-+
-+bool
-+vect_transform_cycle_phi (stmt_vec_info stmt_info, stmt_vec_info *vec_stmt,
-+			  slp_tree slp_node, slp_instance slp_node_instance)
-+{
-+  tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
-+  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
-+  class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
-+  int i;
-+  int ncopies;
-+  stmt_vec_info prev_phi_info;
-+  int j;
-+  bool nested_cycle = false;
-+  int vec_num;
-+
-+  if (nested_in_vect_loop_p (loop, stmt_info))
-+    {
-+      loop = loop->inner;
-+      nested_cycle = true;
-+    }
-+
-+  stmt_vec_info reduc_stmt_info = STMT_VINFO_REDUC_DEF (stmt_info);
-+  reduc_stmt_info = vect_stmt_to_vectorize (reduc_stmt_info);
-+  stmt_vec_info reduc_info = info_for_reduction (stmt_info);
-+  gcc_assert (reduc_info->is_reduc_info);
-+
-+  if (STMT_VINFO_REDUC_TYPE (reduc_info) == EXTRACT_LAST_REDUCTION
-+      || STMT_VINFO_REDUC_TYPE (reduc_info) == FOLD_LEFT_REDUCTION)
-+    /* Leave the scalar phi in place.  */
-+    return true;
-+
-+  tree vectype_in = STMT_VINFO_REDUC_VECTYPE_IN (reduc_info);
-+  /* For a nested cycle we do not fill the above.  */
-+  if (!vectype_in)
-+    vectype_in = STMT_VINFO_VECTYPE (stmt_info);
-+  gcc_assert (vectype_in);
-+
-+  if (slp_node)
-+    {
-+      /* The size vect_schedule_slp_instance computes is off for us.  */
-+      vec_num = vect_get_num_vectors
-+	  (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
-+	   * SLP_TREE_SCALAR_STMTS (slp_node).length (), vectype_in);
-+      ncopies = 1;
-+    }
-+  else
-+    {
-+      vec_num = 1;
-+      ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
-+    }
-+
-+  /* Check whether we should use a single PHI node and accumulate
-+     vectors to one before the backedge.  */
-+  if (STMT_VINFO_FORCE_SINGLE_CYCLE (reduc_info))
-+    ncopies = 1;
-+
-+  /* Create the destination vector  */
-+  gphi *phi = as_a  (stmt_info->stmt);
-+  tree vec_dest = vect_create_destination_var (gimple_phi_result (phi),
-+					       vectype_out);
-+
-+  /* Get the loop-entry arguments.  */
-+  tree vec_initial_def;
-+  auto_vec vec_initial_defs;
-+  if (slp_node)
-+    {
-+      vec_initial_defs.reserve (vec_num);
-+      gcc_assert (slp_node == slp_node_instance->reduc_phis);
-+      stmt_vec_info first = REDUC_GROUP_FIRST_ELEMENT (reduc_stmt_info);
-+      tree neutral_op
-+	= neutral_op_for_slp_reduction (slp_node, vectype_out,
-+					STMT_VINFO_REDUC_CODE (reduc_info),
-+					first != NULL);
-+      get_initial_defs_for_reduction (slp_node_instance->reduc_phis,
-+				      &vec_initial_defs, vec_num,
-+				      first != NULL, neutral_op);
-+    }
-+  else
-+    {
-+      /* Get at the scalar def before the loop, that defines the initial
-+	 value of the reduction variable.  */
-+      tree initial_def = PHI_ARG_DEF_FROM_EDGE (phi,
-+						loop_preheader_edge (loop));
-+      /* Optimize: if initial_def is for REDUC_MAX smaller than the base
-+	 and we can't use zero for induc_val, use initial_def.  Similarly
-+	 for REDUC_MIN and initial_def larger than the base.  */
-+      if (STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION)
-+	{
-+	  tree induc_val = STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info);
-+	  if (TREE_CODE (initial_def) == INTEGER_CST
-+	      && !integer_zerop (induc_val)
-+	      && ((STMT_VINFO_REDUC_CODE (reduc_info) == MAX_EXPR
-+		   && tree_int_cst_lt (initial_def, induc_val))
-+		  || (STMT_VINFO_REDUC_CODE (reduc_info) == MIN_EXPR
-+		      && tree_int_cst_lt (induc_val, initial_def))))
-+	    {
-+	      induc_val = initial_def;
-+	      /* Communicate we used the initial_def to epilouge
-+		 generation.  */
-+	      STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info) = NULL_TREE;
-+	    }
-+	  vec_initial_def = build_vector_from_val (vectype_out, induc_val);
-+	}
-+      else if (nested_cycle)
-+	{
-+	  /* Do not use an adjustment def as that case is not supported
-+	     correctly if ncopies is not one.  */
-+	  vec_initial_def = vect_get_vec_def_for_operand (initial_def,
-+							  reduc_stmt_info);
-+	}
-+      else
-+	{
-+	  tree adjustment_def = NULL_TREE;
-+	  tree *adjustment_defp = &adjustment_def;
-+	  enum tree_code code = STMT_VINFO_REDUC_CODE (reduc_info);
-+	  if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
-+	    adjustment_defp = NULL;
-+	  vec_initial_def
-+	    = get_initial_def_for_reduction (reduc_stmt_info, code,
-+					     initial_def, adjustment_defp);
-+	  STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info) = adjustment_def;
-+	}
-+      vec_initial_defs.create (1);
-+      vec_initial_defs.quick_push (vec_initial_def);
-+    }
-+
-+  /* Generate the reduction PHIs upfront.  */
-+  prev_phi_info = NULL;
-+  for (i = 0; i < vec_num; i++)
-+    {
-+      tree vec_init_def = vec_initial_defs[i];
-+      for (j = 0; j < ncopies; j++)
-+	{
-+	  /* Create the reduction-phi that defines the reduction
-+	     operand.  */
-+	  gphi *new_phi = create_phi_node (vec_dest, loop->header);
-+	  stmt_vec_info new_phi_info = loop_vinfo->add_stmt (new_phi);
-+
-+	  /* Set the loop-entry arg of the reduction-phi.  */
-+	  if (j != 0 && nested_cycle)
-+	    vec_init_def = vect_get_vec_def_for_stmt_copy (loop_vinfo,
-+							   vec_init_def);
-+	  add_phi_arg (new_phi, vec_init_def, loop_preheader_edge (loop),
-+		       UNKNOWN_LOCATION);
-+
-+	  /* The loop-latch arg is set in epilogue processing.  */
-+
-+	  if (slp_node)
-+	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_phi_info);
-+	  else
-+	    {
-+	      if (j == 0)
-+		STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_phi_info;
-+	      else
-+		STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi_info;
-+	      prev_phi_info = new_phi_info;
-+	    }
-+	}
-+    }
-+
-+  return true;
-+}
-+
-+/* Vectorizes LC PHIs.  */
-+
-+bool
-+vectorizable_lc_phi (stmt_vec_info stmt_info, stmt_vec_info *vec_stmt,
-+		     slp_tree slp_node)
-+{
-+  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
-+  if (!loop_vinfo
-+      || !is_a  (stmt_info->stmt)
-+      || gimple_phi_num_args (stmt_info->stmt) != 1)
-+    return false;
-+
-+  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
-+      && STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def)
-+    return false;
-+
-+  if (!vec_stmt) /* transformation not required.  */
-+    {
-+      STMT_VINFO_TYPE (stmt_info) = lc_phi_info_type;
-+      return true;
-+    }
- 
--  vect_create_epilog_for_reduction (vect_defs, stmt_info, reduc_def_phi,
--				    epilog_copies, reduc_fn, phis,
--				    double_reduc, slp_node, slp_node_instance,
--				    cond_reduc_val, cond_reduc_op_code,
--				    neutral_op);
-+  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
-+  tree scalar_dest = gimple_phi_result (stmt_info->stmt);
-+  basic_block bb = gimple_bb (stmt_info->stmt);
-+  edge e = single_pred_edge (bb);
-+  tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
-+  vec vec_oprnds = vNULL;
-+  vect_get_vec_defs (gimple_phi_arg_def (stmt_info->stmt, 0), NULL_TREE,
-+		     stmt_info, &vec_oprnds, NULL, slp_node);
-+  if (slp_node)
-+    {
-+      unsigned vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
-+      gcc_assert (vec_oprnds.length () == vec_num);
-+      for (unsigned i = 0; i < vec_num; i++)
-+	{
-+	  /* Create the vectorized LC PHI node.  */
-+	  gphi *new_phi = create_phi_node (vec_dest, bb);
-+	  add_phi_arg (new_phi, vec_oprnds[i], e, UNKNOWN_LOCATION);
-+	  stmt_vec_info new_phi_info = loop_vinfo->add_stmt (new_phi);
-+	  SLP_TREE_VEC_STMTS (slp_node).quick_push (new_phi_info);
-+	}
-+    }
-+  else
-+    {
-+      unsigned ncopies = vect_get_num_copies (loop_vinfo, vectype);
-+      stmt_vec_info prev_phi_info = NULL;
-+      for (unsigned i = 0; i < ncopies; i++)
-+	{
-+	  if (i != 0)
-+	    vect_get_vec_defs_for_stmt_copy (loop_vinfo, &vec_oprnds, NULL);
-+	  /* Create the vectorized LC PHI node.  */
-+	  gphi *new_phi = create_phi_node (vec_dest, bb);
-+	  add_phi_arg (new_phi, vec_oprnds[0], e, UNKNOWN_LOCATION);
-+	  stmt_vec_info new_phi_info = loop_vinfo->add_stmt (new_phi);
-+	  if (i == 0)
-+	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_phi_info;
-+	  else
-+	    STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi_info;
-+	  prev_phi_info = new_phi_info;
-+	}
-+    }
-+  vec_oprnds.release ();
- 
-   return true;
- }
- 
-+
- /* Function vect_min_worthwhile_factor.
- 
-    For a loop where we could vectorize the operation indicated by CODE,
-@@ -7789,8 +7680,8 @@ vectorizable_induction (stmt_vec_info stmt_info,
- bool
- vectorizable_live_operation (stmt_vec_info stmt_info,
- 			     gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
--			     slp_tree slp_node, int slp_index,
--			     stmt_vec_info *vec_stmt,
-+			     slp_tree slp_node, slp_instance slp_node_instance,
-+			     int slp_index, stmt_vec_info *vec_stmt,
- 			     stmt_vector_for_cost *)
- {
-   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
-@@ -7807,8 +7698,33 @@ vectorizable_live_operation (stmt_vec_info stmt_info,
- 
-   gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
- 
--  if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
--    return false;
-+  /* If a stmt of a reduction is live, vectorize it via
-+     vect_create_epilog_for_reduction.  vectorizable_reduction assessed
-+     validity so just trigger the transform here.  */
-+  if (STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)))
-+    {
-+      if (!vec_stmt)
-+	return true;
-+      if (slp_node)
-+	{
-+	  /* For reduction chains the meta-info is attached to
-+	     the group leader.  */
-+	  if (REDUC_GROUP_FIRST_ELEMENT (stmt_info))
-+	    stmt_info = REDUC_GROUP_FIRST_ELEMENT (stmt_info);
-+	  /* For SLP reductions we vectorize the epilogue for
-+	     all involved stmts together.  */
-+	  else if (slp_index != 0)
-+	    return true;
-+	}
-+      stmt_vec_info reduc_info = info_for_reduction (stmt_info);
-+      gcc_assert (reduc_info->is_reduc_info);
-+      if (STMT_VINFO_REDUC_TYPE (reduc_info) == FOLD_LEFT_REDUCTION
-+	  || STMT_VINFO_REDUC_TYPE (reduc_info) == EXTRACT_LAST_REDUCTION)
-+	return true;
-+      vect_create_epilog_for_reduction (stmt_info, slp_node,
-+					slp_node_instance);
-+      return true;
-+    }
- 
-   /* FORNOW.  CHECKME.  */
-   if (nested_in_vect_loop_p (loop, stmt_info))
-@@ -7892,7 +7808,7 @@ vectorizable_live_operation (stmt_vec_info stmt_info,
- 	      gcc_assert (ncopies == 1 && !slp_node);
- 	      vect_record_loop_mask (loop_vinfo,
- 				     &LOOP_VINFO_MASKS (loop_vinfo),
--				     1, vectype);
-+				     1, vectype, NULL);
- 	    }
- 	}
-       return true;
-@@ -8071,31 +7987,34 @@ loop_niters_no_overflow (loop_vec_info loop_vinfo)
-   return false;
- }
- 
--/* Return a mask type with half the number of elements as TYPE.  */
-+/* Return a mask type with half the number of elements as OLD_TYPE,
-+   given that it should have mode NEW_MODE.  */
- 
- tree
--vect_halve_mask_nunits (tree type)
-+vect_halve_mask_nunits (tree old_type, machine_mode new_mode)
- {
--  poly_uint64 nunits = exact_div (TYPE_VECTOR_SUBPARTS (type), 2);
--  return build_truth_vector_type (nunits, current_vector_size);
-+  poly_uint64 nunits = exact_div (TYPE_VECTOR_SUBPARTS (old_type), 2);
-+  return build_truth_vector_type_for_mode (nunits, new_mode);
- }
- 
--/* Return a mask type with twice as many elements as TYPE.  */
-+/* Return a mask type with twice as many elements as OLD_TYPE,
-+   given that it should have mode NEW_MODE.  */
- 
- tree
--vect_double_mask_nunits (tree type)
-+vect_double_mask_nunits (tree old_type, machine_mode new_mode)
- {
--  poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (type) * 2;
--  return build_truth_vector_type (nunits, current_vector_size);
-+  poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (old_type) * 2;
-+  return build_truth_vector_type_for_mode (nunits, new_mode);
- }
- 
- /* Record that a fully-masked version of LOOP_VINFO would need MASKS to
-    contain a sequence of NVECTORS masks that each control a vector of type
--   VECTYPE.  */
-+   VECTYPE.  If SCALAR_MASK is nonnull, the fully-masked loop would AND
-+   these vector masks with the vector version of SCALAR_MASK.  */
- 
- void
- vect_record_loop_mask (loop_vec_info loop_vinfo, vec_loop_masks *masks,
--		       unsigned int nvectors, tree vectype)
-+		       unsigned int nvectors, tree vectype, tree scalar_mask)
- {
-   gcc_assert (nvectors != 0);
-   if (masks->length () < nvectors)
-@@ -8106,10 +8025,17 @@ vect_record_loop_mask (loop_vec_info loop_vinfo, vec_loop_masks *masks,
-   unsigned int nscalars_per_iter
-     = exact_div (nvectors * TYPE_VECTOR_SUBPARTS (vectype),
- 		 LOOP_VINFO_VECT_FACTOR (loop_vinfo)).to_constant ();
-+
-+  if (scalar_mask)
-+    {
-+      scalar_cond_masked_key cond (scalar_mask, nvectors);
-+      loop_vinfo->scalar_cond_masked_set.add (cond);
-+    }
-+
-   if (rgm->max_nscalars_per_iter < nscalars_per_iter)
-     {
-       rgm->max_nscalars_per_iter = nscalars_per_iter;
--      rgm->mask_type = build_same_sized_truth_vector_type (vectype);
-+      rgm->mask_type = truth_type_for (vectype);
-     }
- }
- 
-@@ -8154,7 +8080,7 @@ vect_get_loop_mask (gimple_stmt_iterator *gsi, vec_loop_masks *masks,
-       gcc_assert (multiple_p (TYPE_VECTOR_SUBPARTS (mask_type),
- 			      TYPE_VECTOR_SUBPARTS (vectype)));
-       gimple_seq seq = NULL;
--      mask_type = build_same_sized_truth_vector_type (vectype);
-+      mask_type = truth_type_for (vectype);
-       mask = gimple_build (&seq, VIEW_CONVERT_EXPR, mask_type, mask);
-       if (seq)
- 	gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
-@@ -8242,6 +8168,186 @@ vect_transform_loop_stmt (loop_vec_info loop_vinfo, stmt_vec_info stmt_info,
-     *seen_store = stmt_info;
- }
- 
-+/* Helper function to pass to simplify_replace_tree to enable replacing tree's
-+   in the hash_map with its corresponding values.  */
-+
-+static tree
-+find_in_mapping (tree t, void *context)
-+{
-+  hash_map* mapping = (hash_map*) context;
-+
-+  tree *value = mapping->get (t);
-+  return value ? *value : t;
-+}
-+
-+/* Update EPILOGUE's loop_vec_info.  EPILOGUE was constructed as a copy of the
-+   original loop that has now been vectorized.
-+
-+   The inits of the data_references need to be advanced with the number of
-+   iterations of the main loop.  This has been computed in vect_do_peeling and
-+   is stored in parameter ADVANCE.  We first restore the data_references
-+   initial offset with the values recored in ORIG_DRS_INIT.
-+
-+   Since the loop_vec_info of this EPILOGUE was constructed for the original
-+   loop, its stmt_vec_infos all point to the original statements.  These need
-+   to be updated to point to their corresponding copies as well as the SSA_NAMES
-+   in their PATTERN_DEF_SEQs and RELATED_STMTs.
-+
-+   The data_reference's connections also need to be updated.  Their
-+   corresponding dr_vec_info need to be reconnected to the EPILOGUE's
-+   stmt_vec_infos, their statements need to point to their corresponding copy,
-+   if they are gather loads or scatter stores then their reference needs to be
-+   updated to point to its corresponding copy and finally we set
-+   'base_misaligned' to false as we have already peeled for alignment in the
-+   prologue of the main loop.  */
-+
-+static void
-+update_epilogue_loop_vinfo (class loop *epilogue, tree advance,
-+			    drs_init_vec &orig_drs_init)
-+{
-+  loop_vec_info epilogue_vinfo = loop_vec_info_for_loop (epilogue);
-+  auto_vec stmt_worklist;
-+  hash_map mapping;
-+  gimple *orig_stmt, *new_stmt;
-+  gimple_stmt_iterator epilogue_gsi;
-+  gphi_iterator epilogue_phi_gsi;
-+  stmt_vec_info stmt_vinfo = NULL, related_vinfo;
-+  basic_block *epilogue_bbs = get_loop_body (epilogue);
-+
-+  LOOP_VINFO_BBS (epilogue_vinfo) = epilogue_bbs;
-+
-+  /* Restore original data_reference's offset, before the previous loop and its
-+     prologue.  */
-+  std::pair *dr_init;
-+  unsigned i;
-+  for (i = 0; orig_drs_init.iterate (i, &dr_init); i++)
-+    DR_OFFSET (dr_init->first) = dr_init->second;
-+
-+  /* Advance data_reference's with the number of iterations of the previous
-+     loop and its prologue.  */
-+  vect_update_inits_of_drs (epilogue_vinfo, advance, PLUS_EXPR);
-+
-+
-+  /* The EPILOGUE loop is a copy of the original loop so they share the same
-+     gimple UIDs.  In this loop we update the loop_vec_info of the EPILOGUE to
-+     point to the copied statements.  We also create a mapping of all LHS' in
-+     the original loop and all the LHS' in the EPILOGUE and create worklists to
-+     update teh STMT_VINFO_PATTERN_DEF_SEQs and STMT_VINFO_RELATED_STMTs.  */
-+  for (unsigned i = 0; i < epilogue->num_nodes; ++i)
-+    {
-+      for (epilogue_phi_gsi = gsi_start_phis (epilogue_bbs[i]);
-+	   !gsi_end_p (epilogue_phi_gsi); gsi_next (&epilogue_phi_gsi))
-+	{
-+	  new_stmt = epilogue_phi_gsi.phi ();
-+
-+	  gcc_assert (gimple_uid (new_stmt) > 0);
-+	  stmt_vinfo
-+	    = epilogue_vinfo->stmt_vec_infos[gimple_uid (new_stmt) - 1];
-+
-+	  orig_stmt = STMT_VINFO_STMT (stmt_vinfo);
-+	  STMT_VINFO_STMT (stmt_vinfo) = new_stmt;
-+
-+	  mapping.put (gimple_phi_result (orig_stmt),
-+		       gimple_phi_result (new_stmt));
-+	  /* PHI nodes can not have patterns or related statements.  */
-+	  gcc_assert (STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) == NULL
-+		      && STMT_VINFO_RELATED_STMT (stmt_vinfo) == NULL);
-+	}
-+
-+      for (epilogue_gsi = gsi_start_bb (epilogue_bbs[i]);
-+	   !gsi_end_p (epilogue_gsi); gsi_next (&epilogue_gsi))
-+	{
-+	  new_stmt = gsi_stmt (epilogue_gsi);
-+
-+	  gcc_assert (gimple_uid (new_stmt) > 0);
-+	  stmt_vinfo
-+	    = epilogue_vinfo->stmt_vec_infos[gimple_uid (new_stmt) - 1];
-+
-+	  orig_stmt = STMT_VINFO_STMT (stmt_vinfo);
-+	  STMT_VINFO_STMT (stmt_vinfo) = new_stmt;
-+
-+	  if (tree old_lhs = gimple_get_lhs (orig_stmt))
-+	    mapping.put (old_lhs, gimple_get_lhs (new_stmt));
-+
-+	  if (STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo))
-+	    {
-+	      gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
-+	      for (gimple_stmt_iterator gsi = gsi_start (seq);
-+		   !gsi_end_p (gsi); gsi_next (&gsi))
-+		stmt_worklist.safe_push (gsi_stmt (gsi));
-+	    }
-+
-+	  related_vinfo = STMT_VINFO_RELATED_STMT (stmt_vinfo);
-+	  if (related_vinfo != NULL && related_vinfo != stmt_vinfo)
-+	    {
-+	      gimple *stmt = STMT_VINFO_STMT (related_vinfo);
-+	      stmt_worklist.safe_push (stmt);
-+	      /* Set BB such that the assert in
-+		'get_initial_def_for_reduction' is able to determine that
-+		the BB of the related stmt is inside this loop.  */
-+	      gimple_set_bb (stmt,
-+			     gimple_bb (new_stmt));
-+	      related_vinfo = STMT_VINFO_RELATED_STMT (related_vinfo);
-+	      gcc_assert (related_vinfo == NULL
-+			  || related_vinfo == stmt_vinfo);
-+	    }
-+	}
-+    }
-+
-+  /* The PATTERN_DEF_SEQs and RELATED_STMTs in the epilogue were constructed
-+     using the original main loop and thus need to be updated to refer to the
-+     cloned variables used in the epilogue.  */
-+  for (unsigned i = 0; i < stmt_worklist.length (); ++i)
-+    {
-+      gimple *stmt = stmt_worklist[i];
-+      tree *new_op;
-+
-+      for (unsigned j = 1; j < gimple_num_ops (stmt); ++j)
-+	{
-+	  tree op = gimple_op (stmt, j);
-+	  if ((new_op = mapping.get(op)))
-+	    gimple_set_op (stmt, j, *new_op);
-+	  else
-+	    {
-+	      op = simplify_replace_tree (op, NULL_TREE, NULL_TREE,
-+				     &find_in_mapping, &mapping);
-+	      gimple_set_op (stmt, j, op);
-+	    }
-+	}
-+    }
-+
-+  struct data_reference *dr;
-+  vec datarefs = epilogue_vinfo->shared->datarefs;
-+  FOR_EACH_VEC_ELT (datarefs, i, dr)
-+    {
-+      orig_stmt = DR_STMT (dr);
-+      gcc_assert (gimple_uid (orig_stmt) > 0);
-+      stmt_vinfo = epilogue_vinfo->stmt_vec_infos[gimple_uid (orig_stmt) - 1];
-+      /* Data references for gather loads and scatter stores do not use the
-+	 updated offset we set using ADVANCE.  Instead we have to make sure the
-+	 reference in the data references point to the corresponding copy of
-+	 the original in the epilogue.  */
-+      if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
-+	{
-+	  DR_REF (dr)
-+	    = simplify_replace_tree (DR_REF (dr), NULL_TREE, NULL_TREE,
-+				     &find_in_mapping, &mapping);
-+	  DR_BASE_ADDRESS (dr)
-+	    = simplify_replace_tree (DR_BASE_ADDRESS (dr), NULL_TREE, NULL_TREE,
-+				     &find_in_mapping, &mapping);
-+	}
-+      DR_STMT (dr) = STMT_VINFO_STMT (stmt_vinfo);
-+      stmt_vinfo->dr_aux.stmt = stmt_vinfo;
-+      /* The vector size of the epilogue is smaller than that of the main loop
-+	 so the alignment is either the same or lower. This means the dr will
-+	 thus by definition be aligned.  */
-+      STMT_VINFO_DR_INFO (stmt_vinfo)->base_misaligned = false;
-+    }
-+
-+  epilogue_vinfo->shared->datarefs_copy.release ();
-+  epilogue_vinfo->shared->save_datarefs ();
-+}
-+
- /* Function vect_transform_loop.
- 
-    The analysis phase has determined that the loop is vectorizable.
-@@ -8279,11 +8385,11 @@ vect_transform_loop (loop_vec_info loop_vinfo)
-   if (th >= vect_vf_for_cost (loop_vinfo)
-       && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
-     {
--      if (dump_enabled_p ())
--	dump_printf_loc (MSG_NOTE, vect_location,
--			 "Profitability threshold is %d loop iterations.\n",
--                         th);
--      check_profitability = true;
-+	if (dump_enabled_p ())
-+	  dump_printf_loc (MSG_NOTE, vect_location,
-+			   "Profitability threshold is %d loop iterations.\n",
-+			   th);
-+	check_profitability = true;
-     }
- 
-   /* Make sure there exists a single-predecessor exit bb.  Do this before 
-@@ -8301,18 +8407,8 @@ vect_transform_loop (loop_vec_info loop_vinfo)
- 
-   if (LOOP_REQUIRES_VERSIONING (loop_vinfo))
-     {
--      poly_uint64 versioning_threshold
--	= LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo);
--      if (check_profitability
--	  && ordered_p (poly_uint64 (th), versioning_threshold))
--	{
--	  versioning_threshold = ordered_max (poly_uint64 (th),
--					      versioning_threshold);
--	  check_profitability = false;
--	}
-       struct loop *sloop
--	= vect_loop_versioning (loop_vinfo, th, check_profitability,
--				versioning_threshold);
-+	= vect_loop_versioning (loop_vinfo);
-       sloop->force_vectorize = false;
-       check_profitability = false;
-     }
-@@ -8337,9 +8433,13 @@ vect_transform_loop (loop_vec_info loop_vinfo)
-   LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = niters;
-   tree nitersm1 = unshare_expr (LOOP_VINFO_NITERSM1 (loop_vinfo));
-   bool niters_no_overflow = loop_niters_no_overflow (loop_vinfo);
-+  tree advance;
-+  drs_init_vec orig_drs_init;
-+
-   epilogue = vect_do_peeling (loop_vinfo, niters, nitersm1, &niters_vector,
- 			      &step_vector, &niters_vector_mult_vf, th,
--			      check_profitability, niters_no_overflow);
-+			      check_profitability, niters_no_overflow,
-+			      &advance, orig_drs_init);
- 
-   if (niters_vector == NULL_TREE)
-     {
-@@ -8413,7 +8513,9 @@ vect_transform_loop (loop_vec_info loop_vinfo)
- 
- 	  if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def
- 	       || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
--	       || STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle)
-+	       || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def
-+	       || STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
-+	       || STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def)
- 	      && ! PURE_SLP_STMT (stmt_info))
- 	    {
- 	      if (dump_enabled_p ())
-@@ -8565,12 +8667,9 @@ vect_transform_loop (loop_vec_info loop_vinfo)
- 	  dump_printf (MSG_NOTE, "\n");
- 	}
-       else
--	{
--	  dump_printf_loc (MSG_NOTE, vect_location,
--			   "LOOP EPILOGUE VECTORIZED (VS=");
--	  dump_dec (MSG_NOTE, current_vector_size);
--	  dump_printf (MSG_NOTE, ")\n");
--	}
-+	dump_printf_loc (MSG_NOTE, vect_location,
-+			 "LOOP EPILOGUE VECTORIZED (MODE=%s)\n",
-+			 GET_MODE_NAME (loop_vinfo->vector_mode));
-     }
- 
-   /* Loops vectorized with a variable factor won't benefit from
-@@ -8592,57 +8691,14 @@ vect_transform_loop (loop_vec_info loop_vinfo)
-      since vectorized loop can have loop-carried dependencies.  */
-   loop->safelen = 0;
- 
--  /* Don't vectorize epilogue for epilogue.  */
--  if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
--    epilogue = NULL;
--
--  if (!PARAM_VALUE (PARAM_VECT_EPILOGUES_NOMASK))
--    epilogue = NULL;
--
-   if (epilogue)
-     {
--      auto_vector_sizes vector_sizes;
--      targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
--      unsigned int next_size = 0;
--
--      /* Note LOOP_VINFO_NITERS_KNOWN_P and LOOP_VINFO_INT_NITERS work
--         on niters already ajusted for the iterations of the prologue.  */
--      if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
--	  && known_eq (vf, lowest_vf))
--	{
--	  unsigned HOST_WIDE_INT eiters
--	    = (LOOP_VINFO_INT_NITERS (loop_vinfo)
--	       - LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
--	  eiters
--	    = eiters % lowest_vf + LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo);
--	  epilogue->nb_iterations_upper_bound = eiters - 1;
--	  epilogue->any_upper_bound = true;
--
--	  unsigned int ratio;
--	  while (next_size < vector_sizes.length ()
--		 && !(constant_multiple_p (current_vector_size,
--					   vector_sizes[next_size], &ratio)
--		      && eiters >= lowest_vf / ratio))
--	    next_size += 1;
--	}
--      else
--	while (next_size < vector_sizes.length ()
--	       && maybe_lt (current_vector_size, vector_sizes[next_size]))
--	  next_size += 1;
--
--      if (next_size == vector_sizes.length ())
--	epilogue = NULL;
--    }
-+      update_epilogue_loop_vinfo (epilogue, advance, orig_drs_init);
- 
--  if (epilogue)
--    {
-+      epilogue->simduid = loop->simduid;
-       epilogue->force_vectorize = loop->force_vectorize;
-       epilogue->safelen = loop->safelen;
-       epilogue->dont_vectorize = false;
--
--      /* We may need to if-convert epilogue to vectorize it.  */
--      if (LOOP_VINFO_SCALAR_LOOP (loop_vinfo))
--	tree_if_conversion (epilogue);
-     }
- 
-   return epilogue;
-diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
-index badf4e7104e..6356ecd692f 100644
---- a/gcc/tree-vect-patterns.c
-+++ b/gcc/tree-vect-patterns.c
-@@ -46,6 +46,8 @@ along with GCC; see the file COPYING3.  If not see
- #include "cgraph.h"
- #include "omp-simd-clone.h"
- #include "predict.h"
-+#include "tree-vector-builder.h"
-+#include "vec-perm-indices.h"
- 
- /* Return true if we have a useful VR_RANGE range for VAR, storing it
-    in *MIN_VALUE and *MAX_VALUE if so.  Note the range in the dump files.  */
-@@ -185,15 +187,15 @@ vect_get_external_def_edge (vec_info *vinfo, tree var)
-    is nonnull.  */
- 
- static bool
--vect_supportable_direct_optab_p (tree otype, tree_code code,
-+vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
- 				 tree itype, tree *vecotype_out,
- 				 tree *vecitype_out = NULL)
- {
--  tree vecitype = get_vectype_for_scalar_type (itype);
-+  tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
-   if (!vecitype)
-     return false;
- 
--  tree vecotype = get_vectype_for_scalar_type (otype);
-+  tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
-   if (!vecotype)
-     return false;
- 
-@@ -632,6 +634,7 @@ static bool
- vect_split_statement (stmt_vec_info stmt2_info, tree new_rhs,
- 		      gimple *stmt1, tree vectype)
- {
-+  vec_info *vinfo = stmt2_info->vinfo;
-   if (is_pattern_stmt_p (stmt2_info))
-     {
-       /* STMT2_INFO is part of a pattern.  Get the statement to which
-@@ -675,7 +678,7 @@ vect_split_statement (stmt_vec_info stmt2_info, tree new_rhs,
- 	 two-statement pattern now.  */
-       gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
-       tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
--      tree lhs_vectype = get_vectype_for_scalar_type (lhs_type);
-+      tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
-       if (!lhs_vectype)
- 	return false;
- 
-@@ -712,6 +715,8 @@ static tree
- vect_convert_input (stmt_vec_info stmt_info, tree type,
- 		    vect_unpromoted_value *unprom, tree vectype)
- {
-+  vec_info *vinfo = stmt_info->vinfo;
-+
-   /* Check for a no-op conversion.  */
-   if (types_compatible_p (type, TREE_TYPE (unprom->op)))
-     return unprom->op;
-@@ -749,7 +754,7 @@ vect_convert_input (stmt_vec_info stmt_info, tree type,
- 	     unsigned promotion.  */
- 	  tree midtype = build_nonstandard_integer_type
- 	    (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
--	  tree vec_midtype = get_vectype_for_scalar_type (midtype);
-+	  tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
- 	  if (vec_midtype)
- 	    {
- 	      input = vect_recog_temp_ssa_var (midtype, NULL);
-@@ -830,17 +835,8 @@ vect_convert_output (stmt_vec_info stmt_info, tree type, gimple *pattern_stmt,
- /* Return true if STMT_VINFO describes a reduction for which reassociation
-    is allowed.  If STMT_INFO is part of a group, assume that it's part of
-    a reduction chain and optimistically assume that all statements
--   except the last allow reassociation.  */
--
--static bool
--vect_reassociating_reduction_p (stmt_vec_info stmt_vinfo)
--{
--  return (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
--	  ? STMT_VINFO_REDUC_TYPE (stmt_vinfo) != FOLD_LEFT_REDUCTION
--	  : REDUC_GROUP_FIRST_ELEMENT (stmt_vinfo) != NULL);
--}
--
--/* As above, but also require it to have code CODE and to be a reduction
-+   except the last allow reassociation.
-+   Also require it to have code CODE and to be a reduction
-    in the outermost loop.  When returning true, store the operands in
-    *OP0_OUT and *OP1_OUT.  */
- 
-@@ -862,11 +858,19 @@ vect_reassociating_reduction_p (stmt_vec_info stmt_info, tree_code code,
-   if (loop && nested_in_vect_loop_p (loop, stmt_info))
-     return false;
- 
--  if (!vect_reassociating_reduction_p (stmt_info))
-+  if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
-+    {
-+      if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
-+				       code))
-+	return false;
-+    }
-+  else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL)
-     return false;
- 
-   *op0_out = gimple_assign_rhs1 (assign);
-   *op1_out = gimple_assign_rhs2 (assign);
-+  if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
-+    std::swap (*op0_out, *op1_out);
-   return true;
- }
- 
-@@ -983,7 +987,7 @@ vect_recog_dot_prod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
-   vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
- 
-   tree half_vectype;
--  if (!vect_supportable_direct_optab_p (type, DOT_PROD_EXPR, half_type,
-+  if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
- 					type_out, &half_vectype))
-     return NULL;
- 
-@@ -1141,7 +1145,7 @@ vect_recog_sad_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
-   vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
- 
-   tree half_vectype;
--  if (!vect_supportable_direct_optab_p (sum_type, SAD_EXPR, half_type,
-+  if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
- 					type_out, &half_vectype))
-     return NULL;
- 
-@@ -1187,6 +1191,7 @@ vect_recog_widen_op_pattern (stmt_vec_info last_stmt_info, tree *type_out,
- 			     tree_code orig_code, tree_code wide_code,
- 			     bool shift_p, const char *name)
- {
-+  vec_info *vinfo = last_stmt_info->vinfo;
-   gimple *last_stmt = last_stmt_info->stmt;
- 
-   vect_unpromoted_value unprom[2];
-@@ -1206,8 +1211,8 @@ vect_recog_widen_op_pattern (stmt_vec_info last_stmt_info, tree *type_out,
- 					    TYPE_UNSIGNED (half_type));
- 
-   /* Check target support  */
--  tree vectype = get_vectype_for_scalar_type (half_type);
--  tree vecitype = get_vectype_for_scalar_type (itype);
-+  tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
-+  tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
-   enum tree_code dummy_code;
-   int dummy_int;
-   auto_vec dummy_vec;
-@@ -1219,7 +1224,7 @@ vect_recog_widen_op_pattern (stmt_vec_info last_stmt_info, tree *type_out,
- 					  &dummy_int, &dummy_vec))
-     return NULL;
- 
--  *type_out = get_vectype_for_scalar_type (type);
-+  *type_out = get_vectype_for_scalar_type (vinfo, type);
-   if (!*type_out)
-     return NULL;
- 
-@@ -1271,6 +1276,7 @@ vect_recog_widen_mult_pattern (stmt_vec_info last_stmt_info, tree *type_out)
- static gimple *
- vect_recog_pow_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
- {
-+  vec_info *vinfo = stmt_vinfo->vinfo;
-   gimple *last_stmt = stmt_vinfo->stmt;
-   tree base, exp;
-   gimple *stmt;
-@@ -1339,7 +1345,7 @@ vect_recog_pow_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
- 		  if (node->simd_clones == NULL)
- 		    return NULL;
- 		}
--	      *type_out = get_vectype_for_scalar_type (TREE_TYPE (base));
-+	      *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
- 	      if (!*type_out)
- 		return NULL;
- 	      tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
-@@ -1364,7 +1370,7 @@ vect_recog_pow_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
-       || (TREE_CODE (exp) == REAL_CST
-           && real_equal (&TREE_REAL_CST (exp), &dconst2)))
-     {
--      if (!vect_supportable_direct_optab_p (TREE_TYPE (base), MULT_EXPR,
-+      if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
- 					    TREE_TYPE (base), type_out))
- 	return NULL;
- 
-@@ -1377,7 +1383,7 @@ vect_recog_pow_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
-   if (TREE_CODE (exp) == REAL_CST
-       && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
-     {
--      *type_out = get_vectype_for_scalar_type (TREE_TYPE (base));
-+      *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
-       if (*type_out
- 	  && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
- 					     OPTIMIZE_FOR_SPEED))
-@@ -1470,8 +1476,8 @@ vect_recog_widen_sum_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
- 
-   vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
- 
--  if (!vect_supportable_direct_optab_p (type, WIDEN_SUM_EXPR, unprom0.type,
--					type_out))
-+  if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR,
-+					unprom0.type, type_out))
-     return NULL;
- 
-   var = vect_recog_temp_ssa_var (type, NULL);
-@@ -1662,7 +1668,7 @@ vect_recog_over_widening_pattern (stmt_vec_info last_stmt_info, tree *type_out)
- 
-   vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
- 
--  *type_out = get_vectype_for_scalar_type (type);
-+  *type_out = get_vectype_for_scalar_type (vinfo, type);
-   if (!*type_out)
-     return NULL;
- 
-@@ -1683,8 +1689,8 @@ vect_recog_over_widening_pattern (stmt_vec_info last_stmt_info, tree *type_out)
-      wants to rewrite anyway.  If targets have a minimum element size
-      for some optabs, we should pattern-match smaller ops to larger ops
-      where beneficial.  */
--  tree new_vectype = get_vectype_for_scalar_type (new_type);
--  tree op_vectype = get_vectype_for_scalar_type (op_type);
-+  tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
-+  tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
-   if (!new_vectype || !op_vectype)
-     return NULL;
- 
-@@ -1842,7 +1848,7 @@ vect_recog_average_pattern (stmt_vec_info last_stmt_info, tree *type_out)
- 					       TYPE_UNSIGNED (new_type));
- 
-   /* Check for target support.  */
--  tree new_vectype = get_vectype_for_scalar_type (new_type);
-+  tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
-   if (!new_vectype
-       || !direct_internal_fn_supported_p (ifn, new_vectype,
- 					  OPTIMIZE_FOR_SPEED))
-@@ -1850,7 +1856,7 @@ vect_recog_average_pattern (stmt_vec_info last_stmt_info, tree *type_out)
- 
-   /* The IR requires a valid vector type for the cast result, even though
-      it's likely to be discarded.  */
--  *type_out = get_vectype_for_scalar_type (type);
-+  *type_out = get_vectype_for_scalar_type (vinfo, type);
-   if (!*type_out)
-     return NULL;
- 
-@@ -1936,7 +1942,7 @@ vect_recog_cast_forwprop_pattern (stmt_vec_info last_stmt_info, tree *type_out)
-      the unnecessary widening and narrowing.  */
-   vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
- 
--  *type_out = get_vectype_for_scalar_type (lhs_type);
-+  *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
-   if (!*type_out)
-     return NULL;
- 
-@@ -1996,24 +2002,107 @@ vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
-   enum vect_def_type dt;
-   optab optab1, optab2;
-   edge ext_def = NULL;
-+  bool bswap16_p = false;
- 
--  if (!is_gimple_assign (last_stmt))
--    return NULL;
-+  if (is_gimple_assign (last_stmt))
-+    {
-+      rhs_code = gimple_assign_rhs_code (last_stmt);
-+      switch (rhs_code)
-+	{
-+	case LROTATE_EXPR:
-+	case RROTATE_EXPR:
-+	  break;
-+	default:
-+	  return NULL;
-+	}
- 
--  rhs_code = gimple_assign_rhs_code (last_stmt);
--  switch (rhs_code)
-+      lhs = gimple_assign_lhs (last_stmt);
-+      oprnd0 = gimple_assign_rhs1 (last_stmt);
-+      type = TREE_TYPE (oprnd0);
-+      oprnd1 = gimple_assign_rhs2 (last_stmt);
-+    }
-+  else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
-     {
--    case LROTATE_EXPR:
--    case RROTATE_EXPR:
--      break;
--    default:
--      return NULL;
-+      /* __builtin_bswap16 (x) is another form of x r>> 8.
-+	 The vectorizer has bswap support, but only if the argument isn't
-+	 promoted.  */
-+      lhs = gimple_call_lhs (last_stmt);
-+      oprnd0 = gimple_call_arg (last_stmt, 0);
-+      type = TREE_TYPE (oprnd0);
-+      if (TYPE_PRECISION (TREE_TYPE (lhs)) != 16
-+	  || TYPE_PRECISION (type) <= 16
-+	  || TREE_CODE (oprnd0) != SSA_NAME
-+	  || BITS_PER_UNIT != 8
-+	  || !TYPE_UNSIGNED (TREE_TYPE (lhs)))
-+	return NULL;
-+
-+      stmt_vec_info def_stmt_info;
-+      if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
-+	return NULL;
-+
-+      if (dt != vect_internal_def)
-+	return NULL;
-+
-+      if (gimple_assign_cast_p (def_stmt))
-+	{
-+	  def = gimple_assign_rhs1 (def_stmt);
-+	  if (INTEGRAL_TYPE_P (TREE_TYPE (def))
-+	      && TYPE_PRECISION (TREE_TYPE (def)) == 16)
-+	    oprnd0 = def;
-+	}
-+
-+      type = TREE_TYPE (lhs);
-+      vectype = get_vectype_for_scalar_type (vinfo, type);
-+      if (vectype == NULL_TREE)
-+	return NULL;
-+
-+      if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
-+	{
-+	  /* The encoding uses one stepped pattern for each byte in the
-+	     16-bit word.  */
-+	  vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
-+	  for (unsigned i = 0; i < 3; ++i)
-+	    for (unsigned j = 0; j < 2; ++j)
-+	      elts.quick_push ((i + 1) * 2 - j - 1);
-+
-+	  vec_perm_indices indices (elts, 1,
-+				    TYPE_VECTOR_SUBPARTS (char_vectype));
-+	  if (can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
-+	    {
-+	      /* vectorizable_bswap can handle the __builtin_bswap16 if we
-+		 undo the argument promotion.  */
-+	      if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
-+		{
-+		  def = vect_recog_temp_ssa_var (type, NULL);
-+		  def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
-+		  append_pattern_def_seq (stmt_vinfo, def_stmt);
-+		  oprnd0 = def;
-+		}
-+
-+	      /* Pattern detected.  */
-+	      vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
-+
-+	      *type_out = vectype;
-+
-+	      /* Pattern supported.  Create a stmt to be used to replace the
-+		 pattern, with the unpromoted argument.  */
-+	      var = vect_recog_temp_ssa_var (type, NULL);
-+	      pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
-+						1, oprnd0);
-+	      gimple_call_set_lhs (pattern_stmt, var);
-+	      gimple_call_set_fntype (as_a  (pattern_stmt),
-+				      gimple_call_fntype (last_stmt));
-+	      return pattern_stmt;
-+	    }
-+	}
-+
-+      oprnd1 = build_int_cst (integer_type_node, 8);
-+      rhs_code = LROTATE_EXPR;
-+      bswap16_p = true;
-     }
-+  else
-+    return NULL;
- 
--  lhs = gimple_assign_lhs (last_stmt);
--  oprnd0 = gimple_assign_rhs1 (last_stmt);
--  type = TREE_TYPE (oprnd0);
--  oprnd1 = gimple_assign_rhs2 (last_stmt);
-   if (TREE_CODE (oprnd0) != SSA_NAME
-       || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
-       || !INTEGRAL_TYPE_P (type)
-@@ -2029,7 +2118,7 @@ vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
-       && dt != vect_external_def)
-     return NULL;
- 
--  vectype = get_vectype_for_scalar_type (type);
-+  vectype = get_vectype_for_scalar_type (vinfo, type);
-   if (vectype == NULL_TREE)
-     return NULL;
- 
-@@ -2038,14 +2127,39 @@ vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
-   optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
-   if (optab1
-       && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
--    return NULL;
-+    {
-+     use_rotate:
-+      if (bswap16_p)
-+	{
-+	  if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
-+	    {
-+	      def = vect_recog_temp_ssa_var (type, NULL);
-+	      def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
-+	      append_pattern_def_seq (stmt_vinfo, def_stmt);
-+	      oprnd0 = def;
-+	    }
-+
-+	  /* Pattern detected.  */
-+	  vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
-+
-+	  *type_out = vectype;
-+
-+	  /* Pattern supported.  Create a stmt to be used to replace the
-+	     pattern.  */
-+	  var = vect_recog_temp_ssa_var (type, NULL);
-+	  pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
-+					      oprnd1);
-+	  return pattern_stmt;
-+	}
-+      return NULL;
-+    }
- 
-   if (is_a  (vinfo) || dt != vect_internal_def)
-     {
-       optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
-       if (optab2
- 	  && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
--	return NULL;
-+	goto use_rotate;
-     }
- 
-   /* If vector/vector or vector/scalar shifts aren't supported by the target,
-@@ -2070,6 +2184,14 @@ vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
- 
-   *type_out = vectype;
- 
-+  if (bswap16_p && !useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
-+    {
-+      def = vect_recog_temp_ssa_var (type, NULL);
-+      def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
-+      append_pattern_def_seq (stmt_vinfo, def_stmt);
-+      oprnd0 = def;
-+    }
-+
-   if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
-     ext_def = vect_get_external_def_edge (vinfo, oprnd1);
- 
-@@ -2106,7 +2228,7 @@ vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
-     }
-   else
-     {
--      tree vecstype = get_vectype_for_scalar_type (stype);
-+      tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
- 
-       if (vecstype == NULL_TREE)
- 	return NULL;
-@@ -2235,7 +2357,7 @@ vect_recog_vector_vector_shift_pattern (stmt_vec_info stmt_vinfo,
-   if (!def_vinfo)
-     return NULL;
- 
--  *type_out = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
-+  *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
-   if (*type_out == NULL_TREE)
-     return NULL;
- 
-@@ -2258,7 +2380,8 @@ vect_recog_vector_vector_shift_pattern (stmt_vec_info stmt_vinfo,
- 				       TYPE_PRECISION (TREE_TYPE (oprnd1)));
- 	      def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
- 	      def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
--	      tree vecstype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
-+	      tree vecstype = get_vectype_for_scalar_type (vinfo,
-+							   TREE_TYPE (rhs1));
- 	      append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
- 	    }
- 	}
-@@ -2423,6 +2546,7 @@ static gimple *
- vect_synth_mult_by_constant (tree op, tree val,
- 			     stmt_vec_info stmt_vinfo)
- {
-+  vec_info *vinfo = stmt_vinfo->vinfo;
-   tree itype = TREE_TYPE (op);
-   machine_mode mode = TYPE_MODE (itype);
-   struct algorithm alg;
-@@ -2441,7 +2565,7 @@ vect_synth_mult_by_constant (tree op, tree val,
- 
-   /* Targets that don't support vector shifts but support vector additions
-      can synthesize shifts that way.  */
--  bool synth_shift_p = !vect_supportable_shift (LSHIFT_EXPR, multtype);
-+  bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
- 
-   HOST_WIDE_INT hwval = tree_to_shwi (val);
-   /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
-@@ -2452,7 +2576,7 @@ vect_synth_mult_by_constant (tree op, tree val,
-   if (!possible)
-     return NULL;
- 
--  tree vectype = get_vectype_for_scalar_type (multtype);
-+  tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
- 
-   if (!vectype
-       || !target_supports_mult_synth_alg (&alg, variant,
-@@ -2598,6 +2722,7 @@ vect_synth_mult_by_constant (tree op, tree val,
- static gimple *
- vect_recog_mult_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
- {
-+  vec_info *vinfo = stmt_vinfo->vinfo;
-   gimple *last_stmt = stmt_vinfo->stmt;
-   tree oprnd0, oprnd1, vectype, itype;
-   gimple *pattern_stmt;
-@@ -2618,7 +2743,7 @@ vect_recog_mult_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
-       || !type_has_mode_precision_p (itype))
-     return NULL;
- 
--  vectype = get_vectype_for_scalar_type (itype);
-+  vectype = get_vectype_for_scalar_type (vinfo, itype);
-   if (vectype == NULL_TREE)
-     return NULL;
- 
-@@ -2686,6 +2811,7 @@ vect_recog_mult_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
- static gimple *
- vect_recog_divmod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
- {
-+  vec_info *vinfo = stmt_vinfo->vinfo;
-   gimple *last_stmt = stmt_vinfo->stmt;
-   tree oprnd0, oprnd1, vectype, itype, cond;
-   gimple *pattern_stmt, *def_stmt;
-@@ -2718,7 +2844,7 @@ vect_recog_divmod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
-     return NULL;
- 
-   scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
--  vectype = get_vectype_for_scalar_type (itype);
-+  vectype = get_vectype_for_scalar_type (vinfo, itype);
-   if (vectype == NULL_TREE)
-     return NULL;
- 
-@@ -2785,7 +2911,7 @@ vect_recog_divmod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
- 	    {
- 	      tree utype
- 		= build_nonstandard_integer_type (prec, 1);
--	      tree vecutype = get_vectype_for_scalar_type (utype);
-+	      tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
- 	      tree shift
- 		= build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
- 					- tree_log2 (oprnd1));
-@@ -3104,6 +3230,7 @@ vect_recog_divmod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
- static gimple *
- vect_recog_mixed_size_cond_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
- {
-+  vec_info *vinfo = stmt_vinfo->vinfo;
-   gimple *last_stmt = stmt_vinfo->stmt;
-   tree cond_expr, then_clause, else_clause;
-   tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
-@@ -3126,7 +3253,7 @@ vect_recog_mixed_size_cond_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
-     return NULL;
- 
-   comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
--  comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
-+  comp_vectype = get_vectype_for_scalar_type (vinfo, comp_scalar_type);
-   if (comp_vectype == NULL_TREE)
-     return NULL;
- 
-@@ -3174,7 +3301,7 @@ vect_recog_mixed_size_cond_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
-   if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
-     return NULL;
- 
--  vectype = get_vectype_for_scalar_type (type);
-+  vectype = get_vectype_for_scalar_type (vinfo, type);
-   if (vectype == NULL_TREE)
-     return NULL;
- 
-@@ -3189,7 +3316,7 @@ vect_recog_mixed_size_cond_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
-       || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
-     return NULL;
- 
--  vecitype = get_vectype_for_scalar_type (itype);
-+  vecitype = get_vectype_for_scalar_type (vinfo, itype);
-   if (vecitype == NULL_TREE)
-     return NULL;
- 
-@@ -3283,11 +3410,12 @@ check_bool_pattern (tree var, vec_info *vinfo, hash_set &stmts)
- 	  if (stmt_could_throw_p (cfun, def_stmt))
- 	    return false;
- 
--	  comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
-+	  comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
- 	  if (comp_vectype == NULL_TREE)
- 	    return false;
- 
--	  tree mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
-+	  tree mask_type = get_mask_type_for_scalar_type (vinfo,
-+							  TREE_TYPE (rhs1));
- 	  if (mask_type
- 	      && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
- 	    return false;
-@@ -3297,7 +3425,7 @@ check_bool_pattern (tree var, vec_info *vinfo, hash_set &stmts)
- 	      scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
- 	      tree itype
- 		= build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
--	      vecitype = get_vectype_for_scalar_type (itype);
-+	      vecitype = get_vectype_for_scalar_type (vinfo, itype);
- 	      if (vecitype == NULL_TREE)
- 		return false;
- 	    }
-@@ -3326,10 +3454,11 @@ check_bool_pattern (tree var, vec_info *vinfo, hash_set &stmts)
- static tree
- adjust_bool_pattern_cast (tree type, tree var, stmt_vec_info stmt_info)
- {
-+  vec_info *vinfo = stmt_info->vinfo;
-   gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
- 					   NOP_EXPR, var);
-   append_pattern_def_seq (stmt_info, cast_stmt,
--			  get_vectype_for_scalar_type (type));
-+			  get_vectype_for_scalar_type (vinfo, type));
-   return gimple_assign_lhs (cast_stmt);
- }
- 
-@@ -3343,6 +3472,7 @@ static void
- adjust_bool_pattern (tree var, tree out_type,
- 		     stmt_vec_info stmt_info, hash_map  &defs)
- {
-+  vec_info *vinfo = stmt_info->vinfo;
-   gimple *stmt = SSA_NAME_DEF_STMT (var);
-   enum tree_code rhs_code, def_rhs_code;
-   tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
-@@ -3504,7 +3634,7 @@ adjust_bool_pattern (tree var, tree out_type,
- 
-   gimple_set_location (pattern_stmt, loc);
-   append_pattern_def_seq (stmt_info, pattern_stmt,
--			  get_vectype_for_scalar_type (itype));
-+			  get_vectype_for_scalar_type (vinfo, itype));
-   defs.put (var, gimple_assign_lhs (pattern_stmt));
- }
- 
-@@ -3607,14 +3737,14 @@ search_type_for_mask_1 (tree var, vec_info *vinfo,
- 	      break;
- 	    }
- 
--	  comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
-+	  comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
- 	  if (comp_vectype == NULL_TREE)
- 	    {
- 	      res = NULL_TREE;
- 	      break;
- 	    }
- 
--	  mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
-+	  mask_type = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (rhs1));
- 	  if (!mask_type
- 	      || !expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
- 	    {
-@@ -3722,7 +3852,7 @@ vect_recog_bool_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
-       if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
- 	  || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
- 	return NULL;
--      vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
-+      vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
-       if (vectype == NULL_TREE)
- 	return NULL;
- 
-@@ -3759,7 +3889,7 @@ vect_recog_bool_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
- 
- 	  if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
- 	    {
--	      tree new_vectype = get_vectype_for_scalar_type (type);
-+	      tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
- 	      append_pattern_def_seq (stmt_vinfo, pattern_stmt, new_vectype);
- 
- 	      lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
-@@ -3775,7 +3905,7 @@ vect_recog_bool_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
-   else if (rhs_code == COND_EXPR
- 	   && TREE_CODE (var) == SSA_NAME)
-     {
--      vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
-+      vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
-       if (vectype == NULL_TREE)
- 	return NULL;
- 
-@@ -3789,7 +3919,7 @@ vect_recog_bool_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
-       tree type
- 	= build_nonstandard_integer_type (prec,
- 					  TYPE_UNSIGNED (TREE_TYPE (var)));
--      if (get_vectype_for_scalar_type (type) == NULL_TREE)
-+      if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
- 	return NULL;
- 
-       if (!check_bool_pattern (var, vinfo, bool_stmts))
-@@ -3833,7 +3963,7 @@ vect_recog_bool_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
- 
- 	  cst0 = build_int_cst (type, 0);
- 	  cst1 = build_int_cst (type, 1);
--	  new_vectype = get_vectype_for_scalar_type (type);
-+	  new_vectype = get_vectype_for_scalar_type (vinfo, type);
- 
- 	  rhs = vect_recog_temp_ssa_var (type, NULL);
- 	  pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
-@@ -3874,7 +4004,7 @@ build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo)
-   gimple *stmt;
-   tree masktype, tmp;
- 
--  masktype = build_same_sized_truth_vector_type (vectype);
-+  masktype = truth_type_for (vectype);
-   tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
-   stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
-   append_pattern_def_seq (stmt_vinfo, stmt, masktype);
-@@ -3934,19 +4064,19 @@ vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
- 	{
- 	  int rhs_index = internal_fn_stored_value_index (ifn);
- 	  tree rhs = gimple_call_arg (last_stmt, rhs_index);
--	  vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs));
-+	  vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
- 	}
-       else
- 	{
- 	  lhs = gimple_call_lhs (last_stmt);
--	  vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
-+	  vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
- 	}
- 
-       tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
-       tree mask_arg_type = search_type_for_mask (mask_arg, vinfo);
-       if (!mask_arg_type)
- 	return NULL;
--      vectype2 = get_mask_type_for_scalar_type (mask_arg_type);
-+      vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
- 
-       if (!vectype1 || !vectype2
- 	  || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
-@@ -3992,7 +4122,7 @@ vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
-   /* Check for cond expression requiring mask conversion.  */
-   if (rhs_code == COND_EXPR)
-     {
--      vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
-+      vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
- 
-       if (TREE_CODE (rhs1) == SSA_NAME)
- 	{
-@@ -4023,7 +4153,7 @@ vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
-       else
- 	return NULL;
- 
--      vectype2 = get_mask_type_for_scalar_type (rhs1_type);
-+      vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
- 
-       if (!vectype1 || !vectype2)
- 	return NULL;
-@@ -4058,7 +4188,8 @@ vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
- 	      tree wide_scalar_type = build_nonstandard_integer_type
- 		(tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))),
- 		 TYPE_UNSIGNED (rhs1_type));
--	      tree vectype3 = get_vectype_for_scalar_type (wide_scalar_type);
-+	      tree vectype3 = get_vectype_for_scalar_type (vinfo,
-+							   wide_scalar_type);
- 	      if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
- 		return NULL;
- 	    }
-@@ -4113,14 +4244,14 @@ vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
- 
-   if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
-     {
--      vectype1 = get_mask_type_for_scalar_type (rhs1_type);
-+      vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
-       if (!vectype1)
- 	return NULL;
-       rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo);
-     }
-   else
-     {
--      vectype1 = get_mask_type_for_scalar_type (rhs2_type);
-+      vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
-       if (!vectype1)
- 	return NULL;
-       rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo);
-@@ -4191,7 +4322,7 @@ vect_convert_mask_for_vectype (tree mask, tree vectype,
-   tree mask_type = search_type_for_mask (mask, vinfo);
-   if (mask_type)
-     {
--      tree mask_vectype = get_mask_type_for_scalar_type (mask_type);
-+      tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
-       if (mask_vectype
- 	  && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
- 		       TYPE_VECTOR_SUBPARTS (mask_vectype)))
-@@ -4214,10 +4345,11 @@ vect_add_conversion_to_pattern (tree type, tree value, stmt_vec_info stmt_info)
-   if (useless_type_conversion_p (type, TREE_TYPE (value)))
-     return value;
- 
-+  vec_info *vinfo = stmt_info->vinfo;
-   tree new_value = vect_recog_temp_ssa_var (type, NULL);
-   gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
-   append_pattern_def_seq (stmt_info, conversion,
--			  get_vectype_for_scalar_type (type));
-+			  get_vectype_for_scalar_type (vinfo, type));
-   return new_value;
- }
- 
-@@ -4253,7 +4385,8 @@ vect_recog_gather_scatter_pattern (stmt_vec_info stmt_info, tree *type_out)
-     return NULL;
- 
-   /* Convert the mask to the right form.  */
--  tree gs_vectype = get_vectype_for_scalar_type (gs_info.element_type);
-+  tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo,
-+						 gs_info.element_type);
-   if (mask)
-     mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
- 					  loop_vinfo);
-@@ -4731,6 +4864,7 @@ static inline void
- vect_mark_pattern_stmts (stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
-                          tree pattern_vectype)
- {
-+  stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
-   gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
- 
-   gimple *orig_pattern_stmt = NULL;
-@@ -4765,6 +4899,9 @@ vect_mark_pattern_stmts (stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
-     for (gimple_stmt_iterator si = gsi_start (def_seq);
- 	 !gsi_end_p (si); gsi_next (&si))
-       {
-+	if (dump_enabled_p ())
-+	  dump_printf_loc (MSG_NOTE, vect_location,
-+			   "extra pattern stmt: %G", gsi_stmt (si));
- 	stmt_vec_info pattern_stmt_info
- 	  = vect_init_pattern_stmt (gsi_stmt (si),
- 				    orig_stmt_info, pattern_vectype);
-@@ -4790,6 +4927,60 @@ vect_mark_pattern_stmts (stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
-     }
-   else
-     vect_set_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
-+
-+  /* Transfer reduction path info to the pattern.  */
-+  if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
-+    {
-+      vec_info *vinfo = orig_stmt_info_saved->vinfo;
-+      tree lookfor = gimple_op (orig_stmt_info_saved->stmt,
-+				1 + STMT_VINFO_REDUC_IDX (orig_stmt_info));
-+      /* Search the pattern def sequence and the main pattern stmt.  Note
-+         we may have inserted all into a containing pattern def sequence
-+	 so the following is a bit awkward.  */
-+      gimple_stmt_iterator si;
-+      gimple *s;
-+      if (def_seq)
-+	{
-+	  si = gsi_start (def_seq);
-+	  s = gsi_stmt (si);
-+	  gsi_next (&si);
-+	}
-+      else
-+	{
-+	  si = gsi_none ();
-+	  s = pattern_stmt;
-+	}
-+      do
-+	{
-+	  bool found = false;
-+	  for (unsigned i = 1; i < gimple_num_ops (s); ++i)
-+	    if (gimple_op (s, i) == lookfor)
-+	      {
-+		STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i - 1;
-+		lookfor = gimple_get_lhs (s);
-+		found = true;
-+		break;
-+	      }
-+	  if (s == pattern_stmt)
-+	    {
-+	      if (!found && dump_enabled_p ())
-+		dump_printf_loc (MSG_NOTE, vect_location,
-+				 "failed to update reduction index.\n");
-+	      break;
-+	    }
-+	  if (gsi_end_p (si))
-+	    s = pattern_stmt;
-+	  else
-+	    {
-+	      s = gsi_stmt (si);
-+	      if (s == pattern_stmt)
-+		/* Found the end inside a bigger pattern def seq.  */
-+		si = gsi_none ();
-+	      else
-+		gsi_next (&si);
-+	    }
-+	} while (1);
-+    }
- }
- 
- /* Function vect_pattern_recog_1
-diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
-index 2abf480270c..0bef35782b5 100644
---- a/gcc/tree-vect-slp.c
-+++ b/gcc/tree-vect-slp.c
-@@ -79,6 +79,7 @@ vect_free_slp_tree (slp_tree node, bool final_p)
- 
-   SLP_TREE_CHILDREN (node).release ();
-   SLP_TREE_SCALAR_STMTS (node).release ();
-+  SLP_TREE_SCALAR_OPS (node).release ();
-   SLP_TREE_VEC_STMTS (node).release ();
-   SLP_TREE_LOAD_PERMUTATION (node).release ();
- 
-@@ -122,6 +123,7 @@ vect_create_new_slp_node (vec scalar_stmts)
- 
-   node = XNEW (struct _slp_tree);
-   SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
-+  SLP_TREE_SCALAR_OPS (node) = vNULL;
-   SLP_TREE_VEC_STMTS (node).create (0);
-   SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
-   SLP_TREE_CHILDREN (node).create (nops);
-@@ -138,6 +140,28 @@ vect_create_new_slp_node (vec scalar_stmts)
-   return node;
- }
- 
-+/* Create an SLP node for OPS.  */
-+
-+static slp_tree
-+vect_create_new_slp_node (vec ops)
-+{
-+  slp_tree node;
-+
-+  node = XNEW (struct _slp_tree);
-+  SLP_TREE_SCALAR_STMTS (node) = vNULL;
-+  SLP_TREE_SCALAR_OPS (node) = ops;
-+  SLP_TREE_VEC_STMTS (node).create (0);
-+  SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
-+  SLP_TREE_CHILDREN (node) = vNULL;
-+  SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
-+  SLP_TREE_TWO_OPERATORS (node) = false;
-+  SLP_TREE_DEF_TYPE (node) = vect_external_def;
-+  node->refcnt = 1;
-+  node->max_nunits = 1;
-+
-+  return node;
-+}
-+
- 
- /* This structure is used in creation of an SLP tree.  Each instance
-    corresponds to the same operand in a group of scalar stmts in an SLP
-@@ -146,6 +170,8 @@ typedef struct _slp_oprnd_info
- {
-   /* Def-stmts for the operands.  */
-   vec def_stmts;
-+  /* Operands.  */
-+  vec ops;
-   /* Information about the first statement, its vector def-type, type, the
-      operand itself in case it's constant, and an indication if it's a pattern
-      stmt.  */
-@@ -169,6 +195,7 @@ vect_create_oprnd_info (int nops, int group_size)
-     {
-       oprnd_info = XNEW (struct _slp_oprnd_info);
-       oprnd_info->def_stmts.create (group_size);
-+      oprnd_info->ops.create (group_size);
-       oprnd_info->first_dt = vect_uninitialized_def;
-       oprnd_info->first_op_type = NULL_TREE;
-       oprnd_info->any_pattern = false;
-@@ -190,6 +217,7 @@ vect_free_oprnd_info (vec &oprnds_info)
-   FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
-     {
-       oprnd_info->def_stmts.release ();
-+      oprnd_info->ops.release ();
-       XDELETE (oprnd_info);
-     }
- 
-@@ -197,6 +225,19 @@ vect_free_oprnd_info (vec &oprnds_info)
- }
- 
- 
-+/* Return true if STMTS contains a pattern statement.  */
-+
-+static bool
-+vect_contains_pattern_stmt_p (vec stmts)
-+{
-+  stmt_vec_info stmt_info;
-+  unsigned int i;
-+  FOR_EACH_VEC_ELT (stmts, i, stmt_info)
-+    if (is_pattern_stmt_p (stmt_info))
-+      return true;
-+  return false;
-+}
-+
- /* Find the place of the data-ref in STMT_INFO in the interleaving chain
-    that starts from FIRST_STMT_INFO.  Return -1 if the data-ref is not a part
-    of the chain.  */
-@@ -231,7 +272,8 @@ vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info,
-    (if nonnull).  */
- 
- bool
--can_duplicate_and_interleave_p (unsigned int count, machine_mode elt_mode,
-+can_duplicate_and_interleave_p (vec_info *vinfo, unsigned int count,
-+				machine_mode elt_mode,
- 				unsigned int *nvectors_out,
- 				tree *vector_type_out,
- 				tree *permutes)
-@@ -243,7 +285,7 @@ can_duplicate_and_interleave_p (unsigned int count, machine_mode elt_mode,
-     {
-       scalar_int_mode int_mode;
-       poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
--      if (multiple_p (current_vector_size, elt_bytes, &nelts)
-+      if (multiple_p (GET_MODE_SIZE (vinfo->vector_mode), elt_bytes, &nelts)
- 	  && int_mode_for_size (elt_bits, 0).exists (&int_mode))
- 	{
- 	  tree int_type = build_nonstandard_integer_type
-@@ -322,6 +364,14 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
- 	{
- 	  internal_fn ifn = gimple_call_internal_fn (stmt);
- 	  commutative_op = first_commutative_argument (ifn);
-+
-+	  /* Masked load, only look at mask.  */
-+	  if (ifn == IFN_MASK_LOAD)
-+	    {
-+	      number_of_oprnds = 1;
-+	      /* Mask operand index.  */
-+	      first_op_idx = 5;
-+	    }
- 	}
-     }
-   else if (gassign *stmt = dyn_cast  (stmt_info->stmt))
-@@ -380,6 +430,13 @@ again:
- 
-       if (first)
- 	{
-+	  /* For the swapping logic below force vect_reduction_def
-+	     for the reduction op in a SLP reduction group.  */
-+	  if (!STMT_VINFO_DATA_REF (stmt_info)
-+	      && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
-+	      && (int)i == STMT_VINFO_REDUC_IDX (stmt_info)
-+	      && def_stmt_info)
-+	    dt = vect_reduction_def;
- 	  oprnd_info->first_dt = dt;
- 	  oprnd_info->first_op_type = TREE_TYPE (oprnd);
- 	}
-@@ -389,20 +446,35 @@ again:
- 	     the def-stmt/s of the first stmt.  Allow different definition
- 	     types for reduction chains: the first stmt must be a
- 	     vect_reduction_def (a phi node), and the rest
--	     vect_internal_def.  */
-+	     end in the reduction chain.  */
- 	  tree type = TREE_TYPE (oprnd);
- 	  if ((oprnd_info->first_dt != dt
- 	       && !(oprnd_info->first_dt == vect_reduction_def
--		    && dt == vect_internal_def)
-+		    && !STMT_VINFO_DATA_REF (stmt_info)
-+		    && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
-+		    && def_stmt_info
-+		    && !STMT_VINFO_DATA_REF (def_stmt_info)
-+		    && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
-+			== REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
- 	       && !((oprnd_info->first_dt == vect_external_def
- 		     || oprnd_info->first_dt == vect_constant_def)
- 		    && (dt == vect_external_def
- 			|| dt == vect_constant_def)))
--	      || !types_compatible_p (oprnd_info->first_op_type, type))
-+	      || !types_compatible_p (oprnd_info->first_op_type, type)
-+	      || (!STMT_VINFO_DATA_REF (stmt_info)
-+		  && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
-+		  && ((!def_stmt_info
-+		       || STMT_VINFO_DATA_REF (def_stmt_info)
-+		       || (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
-+			   != REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
-+		      != (oprnd_info->first_dt != vect_reduction_def))))
- 	    {
- 	      /* Try swapping operands if we got a mismatch.  */
- 	      if (i == commutative_op && !swapped)
- 		{
-+		  if (dump_enabled_p ())
-+		    dump_printf_loc (MSG_NOTE, vect_location,
-+				     "trying swapped operands\n");
- 		  swapped = true;
- 		  goto again;
- 		}
-@@ -415,9 +487,9 @@ again:
- 	    }
- 	  if ((dt == vect_constant_def
- 	       || dt == vect_external_def)
--	      && !current_vector_size.is_constant ()
-+	      && !GET_MODE_SIZE (vinfo->vector_mode).is_constant ()
- 	      && (TREE_CODE (type) == BOOLEAN_TYPE
--		  || !can_duplicate_and_interleave_p (stmts.length (),
-+		  || !can_duplicate_and_interleave_p (vinfo, stmts.length (),
- 						      TYPE_MODE (type))))
- 	    {
- 	      if (dump_enabled_p ())
-@@ -431,14 +503,37 @@ again:
-       /* Check the types of the definitions.  */
-       switch (dt)
- 	{
--	case vect_constant_def:
- 	case vect_external_def:
-+	  /* Make sure to demote the overall operand to external.  */
-+	  oprnd_info->first_dt = vect_external_def;
-+	  /* Fallthru.  */
-+	case vect_constant_def:
-+	  oprnd_info->def_stmts.quick_push (NULL);
-+	  oprnd_info->ops.quick_push (oprnd);
- 	  break;
- 
-+	case vect_internal_def:
- 	case vect_reduction_def:
-+	  if (oprnd_info->first_dt == vect_reduction_def
-+	      && !STMT_VINFO_DATA_REF (stmt_info)
-+	      && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
-+	      && !STMT_VINFO_DATA_REF (def_stmt_info)
-+	      && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
-+		  == REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
-+	    {
-+	      /* For a SLP reduction chain we want to duplicate the
-+	         reduction to each of the chain members.  That gets
-+		 us a sane SLP graph (still the stmts are not 100%
-+		 correct wrt the initial values).  */
-+	      gcc_assert (!first);
-+	      oprnd_info->def_stmts.quick_push (oprnd_info->def_stmts[0]);
-+	      oprnd_info->ops.quick_push (oprnd_info->ops[0]);
-+	      break;
-+	    }
-+	  /* Fallthru.  */
- 	case vect_induction_def:
--	case vect_internal_def:
- 	  oprnd_info->def_stmts.quick_push (def_stmt_info);
-+	  oprnd_info->ops.quick_push (oprnd);
- 	  break;
- 
- 	default:
-@@ -468,6 +563,8 @@ again:
- 
-       if (first_op_cond)
- 	{
-+	  /* To get rid of this swapping we have to move the stmt code
-+	     to the SLP tree as well (and gather it here per stmt).  */
- 	  gassign *stmt = as_a  (stmt_info->stmt);
- 	  tree cond = gimple_assign_rhs1 (stmt);
- 	  enum tree_code code = TREE_CODE (cond);
-@@ -492,10 +589,8 @@ again:
- 	}
-       else
- 	{
--	  unsigned int op = commutative_op + first_op_idx;
--	  swap_ssa_operands (stmt_info->stmt,
--			     gimple_op_ptr (stmt_info->stmt, op),
--			     gimple_op_ptr (stmt_info->stmt, op + 1));
-+	  /* Commutative ops need not reflect swapping, ops are in
-+	     the SLP tree.  */
- 	}
-       if (dump_enabled_p ())
- 	dump_printf_loc (MSG_NOTE, vect_location,
-@@ -620,7 +715,7 @@ vect_two_operations_perm_ok_p (vec stmts,
-    is false then this indicates the comparison could not be
-    carried out or the stmts will never be vectorized by SLP.
- 
--   Note COND_EXPR is possibly ismorphic to another one after swapping its
-+   Note COND_EXPR is possibly isomorphic to another one after swapping its
-    operands.  Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
-    the first stmt by swapping the two operands of comparison; set SWAP[i]
-    to 2 if stmt I is isormorphic to the first stmt by inverting the code
-@@ -1030,7 +1125,6 @@ vect_build_slp_tree_2 (vec_info *vinfo,
- 		       vec stmts, unsigned int group_size,
- 		       poly_uint64 *max_nunits,
- 		       bool *matches, unsigned *npermutes, unsigned *tree_size,
--		       unsigned max_tree_size,
- 		       scalar_stmts_to_slp_tree_map_t *bst_map);
- 
- static slp_tree
-@@ -1038,7 +1132,6 @@ vect_build_slp_tree (vec_info *vinfo,
- 		     vec stmts, unsigned int group_size,
- 		     poly_uint64 *max_nunits,
- 		     bool *matches, unsigned *npermutes, unsigned *tree_size,
--		     unsigned max_tree_size,
- 		     scalar_stmts_to_slp_tree_map_t *bst_map)
- {
-   if (slp_tree *leader = bst_map->get (stmts))
-@@ -1056,8 +1149,7 @@ vect_build_slp_tree (vec_info *vinfo,
-   poly_uint64 this_max_nunits = 1;
-   slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size,
- 					&this_max_nunits,
--					matches, npermutes, tree_size,
--					max_tree_size, bst_map);
-+					matches, npermutes, tree_size, bst_map);
-   if (res)
-     {
-       res->max_nunits = this_max_nunits;
-@@ -1081,7 +1173,6 @@ vect_build_slp_tree_2 (vec_info *vinfo,
- 		       vec stmts, unsigned int group_size,
- 		       poly_uint64 *max_nunits,
- 		       bool *matches, unsigned *npermutes, unsigned *tree_size,
--		       unsigned max_tree_size,
- 		       scalar_stmts_to_slp_tree_map_t *bst_map)
- {
-   unsigned nops, i, this_tree_size = 0;
-@@ -1109,7 +1200,7 @@ vect_build_slp_tree_2 (vec_info *vinfo,
-   if (gphi *stmt = dyn_cast  (stmt_info->stmt))
-     {
-       tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
--      tree vectype = get_vectype_for_scalar_type (scalar_type);
-+      tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
-       if (!vect_record_max_nunits (stmt_info, group_size, vectype, max_nunits))
- 	return NULL;
- 
-@@ -1129,18 +1220,12 @@ vect_build_slp_tree_2 (vec_info *vinfo,
- 	  /* Else def types have to match.  */
- 	  stmt_vec_info other_info;
- 	  FOR_EACH_VEC_ELT (stmts, i, other_info)
--	    {
--	      /* But for reduction chains only check on the first stmt.  */
--	      if (!STMT_VINFO_DATA_REF (other_info)
--		  && REDUC_GROUP_FIRST_ELEMENT (other_info)
--		  && REDUC_GROUP_FIRST_ELEMENT (other_info) != stmt_info)
--		continue;
--	      if (STMT_VINFO_DEF_TYPE (other_info) != def_type)
--		return NULL;
--	    }
-+	    if (STMT_VINFO_DEF_TYPE (other_info) != def_type)
-+	      return NULL;
- 	}
-       else
- 	return NULL;
-+      (*tree_size)++;
-       node = vect_create_new_slp_node (stmts);
-       return node;
-     }
-@@ -1152,13 +1237,23 @@ vect_build_slp_tree_2 (vec_info *vinfo,
- 			      &this_max_nunits, matches, &two_operators))
-     return NULL;
- 
--  /* If the SLP node is a load, terminate the recursion.  */
-+  /* If the SLP node is a load, terminate the recursion unless masked.  */
-   if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
-       && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
-     {
--      *max_nunits = this_max_nunits;
--      node = vect_create_new_slp_node (stmts);
--      return node;
-+      if (gcall *stmt = dyn_cast  (stmt_info->stmt))
-+	{
-+	  /* Masked load.  */
-+	  gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD));
-+	  nops = 1;
-+	}
-+      else
-+	{
-+	  *max_nunits = this_max_nunits;
-+	  (*tree_size)++;
-+	  node = vect_create_new_slp_node (stmts);
-+	  return node;
-+	}
-     }
- 
-   /* Get at the operands, verifying they are compatible.  */
-@@ -1184,9 +1279,6 @@ vect_build_slp_tree_2 (vec_info *vinfo,
- 
-   stmt_info = stmts[0];
- 
--  if (tree_size)
--    max_tree_size -= *tree_size;
--
-   /* Create SLP_TREE nodes for the definition node/s.  */
-   FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
-     {
-@@ -1194,32 +1286,34 @@ vect_build_slp_tree_2 (vec_info *vinfo,
-       unsigned old_tree_size = this_tree_size;
-       unsigned int j;
- 
-+      if (oprnd_info->first_dt == vect_uninitialized_def)
-+	{
-+	  /* COND_EXPR have one too many eventually if the condition
-+	     is a SSA name.  */
-+	  gcc_assert (i == 3 && nops == 4);
-+	  continue;
-+	}
-+
-       if (oprnd_info->first_dt != vect_internal_def
- 	  && oprnd_info->first_dt != vect_reduction_def
- 	  && oprnd_info->first_dt != vect_induction_def)
--        continue;
--
--      if (++this_tree_size > max_tree_size)
- 	{
--	  if (dump_enabled_p ())
--	    dump_printf_loc (MSG_MISSED_OPTIMIZATION,
--			     vect_location,
--			     "Build SLP failed: SLP tree too large\n");
--	  FOR_EACH_VEC_ELT (children, j, child)
--	    vect_free_slp_tree (child, false);
--	  vect_free_oprnd_info (oprnds_info);
--	  return NULL;
-+	  slp_tree invnode = vect_create_new_slp_node (oprnd_info->ops);
-+	  SLP_TREE_DEF_TYPE (invnode) = oprnd_info->first_dt;
-+	  oprnd_info->ops = vNULL;
-+	  children.safe_push (invnode);
-+	  continue;
- 	}
- 
-       if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
- 					group_size, &this_max_nunits,
- 					matches, npermutes,
--					&this_tree_size,
--					max_tree_size, bst_map)) != NULL)
-+					&this_tree_size, bst_map)) != NULL)
- 	{
- 	  /* If we have all children of child built up from scalars then just
- 	     throw that away and build it up this node from scalars.  */
--	  if (!SLP_TREE_CHILDREN (child).is_empty ()
-+	  if (is_a  (vinfo)
-+	      && !SLP_TREE_CHILDREN (child).is_empty ()
- 	      /* ???  Rejecting patterns this way doesn't work.  We'd have to
- 		 do extra work to cancel the pattern so the uses see the
- 		 scalar version.  */
-@@ -1244,6 +1338,9 @@ vect_build_slp_tree_2 (vec_info *vinfo,
- 				     "scalars instead\n");
- 		  oprnd_info->def_stmts = vNULL;
- 		  SLP_TREE_DEF_TYPE (child) = vect_external_def;
-+		  SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
-+		  oprnd_info->ops = vNULL;
-+		  ++this_tree_size;
- 		  children.safe_push (child);
- 		  continue;
- 		}
-@@ -1273,9 +1370,12 @@ vect_build_slp_tree_2 (vec_info *vinfo,
- 	  if (dump_enabled_p ())
- 	    dump_printf_loc (MSG_NOTE, vect_location,
- 			     "Building vector operands from scalars\n");
-+	  this_tree_size++;
- 	  child = vect_create_new_slp_node (oprnd_info->def_stmts);
- 	  SLP_TREE_DEF_TYPE (child) = vect_external_def;
-+	  SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
- 	  children.safe_push (child);
-+	  oprnd_info->ops = vNULL;
- 	  oprnd_info->def_stmts = vNULL;
- 	  continue;
- 	}
-@@ -1355,6 +1455,8 @@ vect_build_slp_tree_2 (vec_info *vinfo,
- 	      {
- 		std::swap (oprnds_info[0]->def_stmts[j],
- 			   oprnds_info[1]->def_stmts[j]);
-+		std::swap (oprnds_info[0]->ops[j],
-+			   oprnds_info[1]->ops[j]);
- 		if (dump_enabled_p ())
- 		  dump_printf (MSG_NOTE, "%d ", j);
- 	      }
-@@ -1365,37 +1467,12 @@ vect_build_slp_tree_2 (vec_info *vinfo,
- 	  if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
- 					    group_size, &this_max_nunits,
- 					    tem, npermutes,
--					    &this_tree_size,
--					    max_tree_size, bst_map)) != NULL)
-+					    &this_tree_size, bst_map)) != NULL)
- 	    {
--	      /* ... so if successful we can apply the operand swapping
--		 to the GIMPLE IL.  This is necessary because for example
--		 vect_get_slp_defs uses operand indexes and thus expects
--		 canonical operand order.  This is also necessary even
--		 if we end up building the operand from scalars as
--		 we'll continue to process swapped operand two.  */
--	      for (j = 0; j < group_size; ++j)
--		gimple_set_plf (stmts[j]->stmt, GF_PLF_1, false);
--	      for (j = 0; j < group_size; ++j)
--		if (matches[j] == !swap_not_matching)
--		  {
--		    gassign *stmt = as_a  (stmts[j]->stmt);
--		    /* Avoid swapping operands twice.  */
--		    if (gimple_plf (stmt, GF_PLF_1))
--		      continue;
--		    swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
--				       gimple_assign_rhs2_ptr (stmt));
--		    gimple_set_plf (stmt, GF_PLF_1, true);
--		  }
--	      /* Verify we swap all duplicates or none.  */
--	      if (flag_checking)
--		for (j = 0; j < group_size; ++j)
--		  gcc_assert (gimple_plf (stmts[j]->stmt, GF_PLF_1)
--			      == (matches[j] == !swap_not_matching));
--
- 	      /* If we have all children of child built up from scalars then
- 		 just throw that away and build it up this node from scalars.  */
--	      if (!SLP_TREE_CHILDREN (child).is_empty ()
-+	      if (is_a  (vinfo)
-+		  && !SLP_TREE_CHILDREN (child).is_empty ()
- 		  /* ???  Rejecting patterns this way doesn't work.  We'd have
- 		     to do extra work to cancel the pattern so the uses see the
- 		     scalar version.  */
-@@ -1421,6 +1498,9 @@ vect_build_slp_tree_2 (vec_info *vinfo,
- 					 "scalars instead\n");
- 		      oprnd_info->def_stmts = vNULL;
- 		      SLP_TREE_DEF_TYPE (child) = vect_external_def;
-+		      SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
-+		      oprnd_info->ops = vNULL;
-+		      ++this_tree_size;
- 		      children.safe_push (child);
- 		      continue;
- 		    }
-@@ -1444,8 +1524,7 @@ fail:
- 
-   vect_free_oprnd_info (oprnds_info);
- 
--  if (tree_size)
--    *tree_size += this_tree_size;
-+  *tree_size += this_tree_size + 1;
-   *max_nunits = this_max_nunits;
- 
-   node = vect_create_new_slp_node (stmts);
-@@ -1460,9 +1539,10 @@ static void
- vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
- 		     slp_tree node, hash_set &visited)
- {
--  int i;
-+  unsigned i;
-   stmt_vec_info stmt_info;
-   slp_tree child;
-+  tree op;
- 
-   if (visited.add (node))
-     return;
-@@ -1470,11 +1550,23 @@ vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
-   dump_metadata_t metadata (dump_kind, loc.get_impl_location ());
-   dump_user_location_t user_loc = loc.get_user_location ();
-   dump_printf_loc (metadata, user_loc, "node%s %p (max_nunits=%u)\n",
--		   SLP_TREE_DEF_TYPE (node) != vect_internal_def
--		   ? " (external)" : "", node,
-+		   SLP_TREE_DEF_TYPE (node) == vect_external_def
-+		   ? " (external)"
-+		   : (SLP_TREE_DEF_TYPE (node) == vect_constant_def
-+		      ? " (constant)"
-+		      : ""), node,
- 		   estimated_poly_value (node->max_nunits));
--  FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
--    dump_printf_loc (metadata, user_loc, "\tstmt %d %G", i, stmt_info->stmt);
-+  if (SLP_TREE_SCALAR_STMTS (node).exists ())
-+    FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
-+      dump_printf_loc (metadata, user_loc, "\tstmt %u %G", i, stmt_info->stmt);
-+  else
-+    {
-+      dump_printf_loc (metadata, user_loc, "\t{ ");
-+      FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
-+	dump_printf (metadata, "%T%s ", op,
-+		     i < SLP_TREE_SCALAR_OPS (node).length () - 1 ? "," : "");
-+      dump_printf (metadata, "}\n");
-+    }
-   if (SLP_TREE_CHILDREN (node).is_empty ())
-     return;
-   dump_printf_loc (metadata, user_loc, "\tchildren");
-@@ -1563,8 +1655,6 @@ vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
-                           vec permutation,
- 			  hash_set &visited)
- {
--  stmt_vec_info stmt_info;
--  vec tmp_stmts;
-   unsigned int i;
-   slp_tree child;
- 
-@@ -1574,15 +1664,30 @@ vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
-   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
-     vect_slp_rearrange_stmts (child, group_size, permutation, visited);
- 
--  gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
--  tmp_stmts.create (group_size);
--  tmp_stmts.quick_grow_cleared (group_size);
--
--  FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
--    tmp_stmts[permutation[i]] = stmt_info;
--
--  SLP_TREE_SCALAR_STMTS (node).release ();
--  SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
-+  if (SLP_TREE_SCALAR_STMTS (node).exists ())
-+    {
-+      gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
-+      vec tmp_stmts;
-+      tmp_stmts.create (group_size);
-+      tmp_stmts.quick_grow (group_size);
-+      stmt_vec_info stmt_info;
-+      FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
-+	tmp_stmts[permutation[i]] = stmt_info;
-+      SLP_TREE_SCALAR_STMTS (node).release ();
-+      SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
-+    }
-+  if (SLP_TREE_SCALAR_OPS (node).exists ())
-+    {
-+      gcc_assert (group_size == SLP_TREE_SCALAR_OPS (node).length ());
-+      vec tmp_ops;
-+      tmp_ops.create (group_size);
-+      tmp_ops.quick_grow (group_size);
-+      tree op;
-+      FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
-+	tmp_ops[permutation[i]] = op;
-+      SLP_TREE_SCALAR_OPS (node).release ();
-+      SLP_TREE_SCALAR_OPS (node) = tmp_ops;
-+    }
- }
- 
- 
-@@ -1668,9 +1773,10 @@ vect_gather_slp_loads (slp_instance inst, slp_tree node,
- 
-   if (SLP_TREE_CHILDREN (node).length () == 0)
-     {
-+      if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
-+	return;
-       stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
--      if (SLP_TREE_DEF_TYPE (node) == vect_internal_def
--	  && STMT_VINFO_GROUPED_ACCESS (stmt_info)
-+      if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
- 	  && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
- 	SLP_INSTANCE_LOADS (inst).safe_push (node);
-     }
-@@ -1913,7 +2019,7 @@ vect_analyze_slp_instance (vec_info *vinfo,
-   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
-     {
-       scalar_type = TREE_TYPE (DR_REF (dr));
--      vectype = get_vectype_for_scalar_type (scalar_type);
-+      vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
-       group_size = DR_GROUP_SIZE (stmt_info);
-     }
-   else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
-@@ -1964,7 +2070,8 @@ vect_analyze_slp_instance (vec_info *vinfo,
-       /* Mark the first element of the reduction chain as reduction to properly
- 	 transform the node.  In the reduction analysis phase only the last
- 	 element of the chain is marked as reduction.  */
--      STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
-+      STMT_VINFO_DEF_TYPE (stmt_info)
-+	= STMT_VINFO_DEF_TYPE (scalar_stmts.last ());
-       STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))
- 	= STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts.last ()));
-     }
-@@ -1982,9 +2089,10 @@ vect_analyze_slp_instance (vec_info *vinfo,
-   scalar_stmts_to_slp_tree_map_t *bst_map
-     = new scalar_stmts_to_slp_tree_map_t ();
-   poly_uint64 max_nunits = nunits;
-+  unsigned tree_size = 0;
-   node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
- 			      &max_nunits, matches, &npermutes,
--			      NULL, max_tree_size, bst_map);
-+			      &tree_size, bst_map);
-   /* The map keeps a reference on SLP nodes built, release that.  */
-   for (scalar_stmts_to_slp_tree_map_t::iterator it = bst_map->begin ();
-        it != bst_map->end (); ++it)
-@@ -1993,6 +2101,34 @@ vect_analyze_slp_instance (vec_info *vinfo,
-   delete bst_map;
-   if (node != NULL)
-     {
-+      /* If this is a reduction chain with a conversion in front
-+         amend the SLP tree with a node for that.  */
-+      if (!dr
-+	  && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
-+	  && STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
-+	{
-+	  /* Get at the conversion stmt - we know it's the single use
-+	     of the last stmt of the reduction chain.  */
-+	  gimple *tem = vect_orig_stmt (scalar_stmts[group_size - 1])->stmt;
-+	  use_operand_p use_p;
-+	  gimple *use_stmt;
-+	  bool r = single_imm_use (gimple_assign_lhs (tem), &use_p, &use_stmt);
-+	  gcc_assert (r);
-+	  next_info = vinfo->lookup_stmt (use_stmt);
-+	  next_info = vect_stmt_to_vectorize (next_info);
-+	  scalar_stmts = vNULL;
-+	  scalar_stmts.create (group_size);
-+	  for (unsigned i = 0; i < group_size; ++i)
-+	    scalar_stmts.quick_push (next_info);
-+	  slp_tree conv = vect_create_new_slp_node (scalar_stmts);
-+	  SLP_TREE_CHILDREN (conv).quick_push (node);
-+	  node = conv;
-+	  /* We also have to fake this conversion stmt as SLP reduction group
-+	     so we don't have to mess with too much code elsewhere.  */
-+	  REDUC_GROUP_FIRST_ELEMENT (next_info) = next_info;
-+	  REDUC_GROUP_NEXT_ELEMENT (next_info) = NULL;
-+	}
-+
-       /* Calculate the unrolling factor based on the smallest type.  */
-       poly_uint64 unrolling_factor
- 	= calculate_unrolling_factor (max_nunits, group_size);
-@@ -2025,6 +2161,10 @@ vect_analyze_slp_instance (vec_info *vinfo,
- 	  SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
- 	  SLP_INSTANCE_LOADS (new_instance) = vNULL;
- 	  vect_gather_slp_loads (new_instance, node);
-+	  if (dump_enabled_p ())
-+	    dump_printf_loc (MSG_NOTE, vect_location,
-+			     "SLP size %u vs. limit %u.\n",
-+			     tree_size, max_tree_size);
- 
- 	  /* Compute the load permutation.  */
- 	  slp_tree load_node;
-@@ -2231,8 +2371,11 @@ vect_make_slp_decision (loop_vec_info loop_vinfo)
-   FOR_EACH_VEC_ELT (slp_instances, i, instance)
-     {
-       /* FORNOW: SLP if you can.  */
--      /* All unroll factors have the form current_vector_size * X for some
--	 rational X, so they must have a common multiple.  */
-+      /* All unroll factors have the form:
-+
-+	   GET_MODE_SIZE (vinfo->vector_mode) * X
-+
-+	 for some rational X, so they must have a common multiple.  */
-       unrolling_factor
- 	= force_common_multiple (unrolling_factor,
- 				 SLP_INSTANCE_UNROLLING_FACTOR (instance));
-@@ -2327,7 +2470,8 @@ vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype,
- 
-   if (!only_edge)
-     FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
--      if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
-+      if (SLP_TREE_DEF_TYPE (child) != vect_external_def
-+	  && SLP_TREE_DEF_TYPE (child) != vect_constant_def)
- 	vect_detect_hybrid_slp_stmts (child, i, stype, visited);
- }
- 
-@@ -2514,8 +2658,15 @@ vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
-      VF divided by the number of elements in a vector.  */
-   if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
-       && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
--    SLP_TREE_NUMBER_OF_VEC_STMTS (node)
--      = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
-+    {
-+      for (unsigned i = 0; i < SLP_TREE_CHILDREN (node).length (); ++i)
-+	if (SLP_TREE_DEF_TYPE (SLP_TREE_CHILDREN (node)[i]) == vect_internal_def)
-+	  {
-+	    SLP_TREE_NUMBER_OF_VEC_STMTS (node)
-+	      = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[i]);
-+	    break;
-+	  }
-+    }
-   else
-     {
-       poly_uint64 vf;
-@@ -2533,6 +2684,39 @@ vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
-   return vect_analyze_stmt (stmt_info, &dummy, node, node_instance, cost_vec);
- }
- 
-+/* Try to build NODE from scalars, returning true on success.
-+   NODE_INSTANCE is the SLP instance that contains NODE.  */
-+
-+static bool
-+vect_slp_convert_to_external (vec_info *vinfo, slp_tree node,
-+			      slp_instance node_instance)
-+{
-+  stmt_vec_info stmt_info;
-+  unsigned int i;
-+
-+  if (!is_a  (vinfo)
-+      || node == SLP_INSTANCE_TREE (node_instance)
-+      || vect_contains_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (node)))
-+    return false;
-+
-+  if (dump_enabled_p ())
-+    dump_printf_loc (MSG_NOTE, vect_location,
-+		     "Building vector operands from scalars instead\n");
-+
-+  /* Don't remove and free the child nodes here, since they could be
-+     referenced by other structures.  The analysis and scheduling phases
-+     (need to) ignore child nodes of anything that isn't vect_internal_def.  */
-+  unsigned int group_size = SLP_TREE_SCALAR_STMTS (node).length ();
-+  SLP_TREE_DEF_TYPE (node) = vect_external_def;
-+  SLP_TREE_SCALAR_OPS (node).safe_grow (group_size);
-+  FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
-+    {
-+      tree lhs = gimple_get_lhs (vect_orig_stmt (stmt_info)->stmt);
-+      SLP_TREE_SCALAR_OPS (node)[i] = lhs;
-+    }
-+  return true;
-+}
-+
- /* Analyze statements contained in SLP tree NODE after recursively analyzing
-    the subtree.  NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
- 
-@@ -2559,6 +2743,13 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
-     {
-       SLP_TREE_NUMBER_OF_VEC_STMTS (node)
- 	= SLP_TREE_NUMBER_OF_VEC_STMTS (*leader);
-+      /* Cope with cases in which we made a late decision to build the
-+	 node from scalars.  */
-+      if (SLP_TREE_DEF_TYPE (*leader) == vect_external_def
-+	  && vect_slp_convert_to_external (vinfo, node, node_instance))
-+	;
-+      else
-+	gcc_assert (SLP_TREE_DEF_TYPE (node) == SLP_TREE_DEF_TYPE (*leader));
-       return true;
-     }
- 
-@@ -2579,25 +2770,31 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
-   auto_vec dt;
-   dt.safe_grow (SLP_TREE_CHILDREN (node).length ());
-   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
--    dt[j] = STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]);
-+    if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
-+      dt[j] = STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]);
- 
-   /* Push SLP node def-type to stmt operands.  */
-   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
--    if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
-+    if (SLP_TREE_DEF_TYPE (child) != vect_internal_def
-+	&& SLP_TREE_SCALAR_STMTS (child).length () != 0)
-       STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
- 	= SLP_TREE_DEF_TYPE (child);
- 
-   /* Check everything worked out.  */
-   bool res = true;
-   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
--    if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
--      {
--	if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
--	    != SLP_TREE_DEF_TYPE (child))
--	  res = false;
--      }
--    else if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]) != dt[j])
--      res = false;
-+      if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
-+	{
-+	  if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
-+	    {
-+	      if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
-+		  != SLP_TREE_DEF_TYPE (child))
-+		res = false;
-+	    }
-+	  else if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
-+		   != dt[j])
-+	    res = false;
-+	}
-   if (!res && dump_enabled_p ())
-     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- 		     "not vectorized: same operand with different "
-@@ -2609,7 +2806,13 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
- 
-   /* Restore def-types.  */
-   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
--    STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]) = dt[j];
-+    if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
-+      STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]) = dt[j];
-+
-+  /* If this node can't be vectorized, try pruning the tree here rather
-+     than felling the whole thing.  */
-+  if (!res && vect_slp_convert_to_external (vinfo, node, node_instance))
-+    res = true;
- 
-   return res;
- }
-@@ -2818,19 +3021,17 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
-   return true;
- }
- 
--/* Check if the basic block can be vectorized.  Returns a bb_vec_info
--   if so and sets fatal to true if failure is independent of
--   current_vector_size.  */
-+/* Check if the region described by BB_VINFO can be vectorized, returning
-+   true if so.  When returning false, set FATAL to true if the same failure
-+   would prevent vectorization at other vector sizes, false if it is still
-+   worth trying other sizes.  N_STMTS is the number of statements in the
-+   region.  */
- 
--static bb_vec_info
--vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
--		       gimple_stmt_iterator region_end,
--		       vec datarefs, int n_stmts,
--		       bool &fatal, vec_info_shared *shared)
-+static bool
-+vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal)
- {
-   DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
- 
--  bb_vec_info bb_vinfo;
-   slp_instance instance;
-   int i;
-   poly_uint64 min_vf = 2;
-@@ -2838,34 +3039,15 @@ vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
-   /* The first group of checks is independent of the vector size.  */
-   fatal = true;
- 
--  if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
--    {
--      if (dump_enabled_p ())
--	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
--			 "not vectorized: too many instructions in "
--			 "basic block.\n");
--      free_data_refs (datarefs);
--      return NULL;
--    }
--
--  bb_vinfo = new _bb_vec_info (region_begin, region_end, shared);
--  if (!bb_vinfo)
--    return NULL;
--
--  BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
--  bb_vinfo->shared->save_datarefs ();
--
-   /* Analyze the data references.  */
- 
--  if (!vect_analyze_data_refs (bb_vinfo, &min_vf))
-+  if (!vect_analyze_data_refs (bb_vinfo, &min_vf, NULL))
-     {
-       if (dump_enabled_p ())
-         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- 			 "not vectorized: unhandled data-ref in basic "
- 			 "block.\n");
--
--      delete bb_vinfo;
--      return NULL;
-+      return false;
-     }
- 
-   if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
-@@ -2874,9 +3056,7 @@ vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
-         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- 			 "not vectorized: not enough data-refs in "
- 			 "basic block.\n");
--
--      delete bb_vinfo;
--      return NULL;
-+      return false;
-     }
- 
-   if (!vect_analyze_data_ref_accesses (bb_vinfo))
-@@ -2885,9 +3065,7 @@ vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
-        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- 			"not vectorized: unhandled data access in "
- 			"basic block.\n");
--
--      delete bb_vinfo;
--      return NULL;
-+      return false;
-     }
- 
-   /* If there are no grouped stores in the region there is no need
-@@ -2899,9 +3077,7 @@ vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
- 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- 			 "not vectorized: no grouped stores in "
- 			 "basic block.\n");
--
--      delete bb_vinfo;
--      return NULL;
-+      return false;
-     }
- 
-   /* While the rest of the analysis below depends on it in some way.  */
-@@ -2921,9 +3097,7 @@ vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
- 			   "not vectorized: failed to find SLP opportunities "
- 			   "in basic block.\n");
- 	}
--
--      delete bb_vinfo;
--      return NULL;
-+      return false;
-     }
- 
-   vect_record_base_alignments (bb_vinfo);
-@@ -2954,19 +3128,14 @@ vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
-       i++;
-     }
-   if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
--    {
--      delete bb_vinfo;
--      return NULL;
--    }
-+    return false;
- 
-   if (!vect_slp_analyze_operations (bb_vinfo))
-     {
-       if (dump_enabled_p ())
-         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- 			 "not vectorized: bad operation in basic block.\n");
--
--      delete bb_vinfo;
--      return NULL;
-+      return false;
-     }
- 
-   /* Cost model: check if the vectorization is worthwhile.  */
-@@ -2977,80 +3146,61 @@ vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
-         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- 			 "not vectorized: vectorization is not "
- 			 "profitable.\n");
--
--      delete bb_vinfo;
--      return NULL;
-+      return false;
-     }
- 
-   if (dump_enabled_p ())
-     dump_printf_loc (MSG_NOTE, vect_location,
- 		     "Basic block will be vectorized using SLP\n");
--
--  return bb_vinfo;
-+  return true;
- }
- 
-+/* Subroutine of vect_slp_bb.  Try to vectorize the statements between
-+   REGION_BEGIN (inclusive) and REGION_END (exclusive), returning true
-+   on success.  The region has N_STMTS statements and has the datarefs
-+   given by DATAREFS.  */
- 
--/* Main entry for the BB vectorizer.  Analyze and transform BB, returns
--   true if anything in the basic-block was vectorized.  */
--
--bool
--vect_slp_bb (basic_block bb)
-+static bool
-+vect_slp_bb_region (gimple_stmt_iterator region_begin,
-+		    gimple_stmt_iterator region_end,
-+		    vec datarefs,
-+		    unsigned int n_stmts)
- {
-   bb_vec_info bb_vinfo;
--  gimple_stmt_iterator gsi;
--  bool any_vectorized = false;
--  auto_vector_sizes vector_sizes;
-+  auto_vector_modes vector_modes;
- 
-   /* Autodetect first vector size we try.  */
--  current_vector_size = 0;
--  targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
--  unsigned int next_size = 0;
-+  machine_mode next_vector_mode = VOIDmode;
-+  targetm.vectorize.autovectorize_vector_modes (&vector_modes, false);
-+  unsigned int mode_i = 0;
- 
--  gsi = gsi_start_bb (bb);
-+  vec_info_shared shared;
- 
--  poly_uint64 autodetected_vector_size = 0;
-+  machine_mode autodetected_vector_mode = VOIDmode;
-   while (1)
-     {
--      if (gsi_end_p (gsi))
--	break;
--
--      gimple_stmt_iterator region_begin = gsi;
--      vec datarefs = vNULL;
--      int insns = 0;
--
--      for (; !gsi_end_p (gsi); gsi_next (&gsi))
--	{
--	  gimple *stmt = gsi_stmt (gsi);
--	  if (is_gimple_debug (stmt))
--	    continue;
--	  insns++;
--
--	  if (gimple_location (stmt) != UNKNOWN_LOCATION)
--	    vect_location = stmt;
--
--	  if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs))
--	    break;
--	}
--
--      /* Skip leading unhandled stmts.  */
--      if (gsi_stmt (region_begin) == gsi_stmt (gsi))
--	{
--	  gsi_next (&gsi);
--	  continue;
--	}
--
--      gimple_stmt_iterator region_end = gsi;
--
-       bool vectorized = false;
-       bool fatal = false;
--      vec_info_shared shared;
--      bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end,
--					datarefs, insns, fatal, &shared);
--      if (bb_vinfo
-+      bb_vinfo = new _bb_vec_info (region_begin, region_end, &shared);
-+
-+      bool first_time_p = shared.datarefs.is_empty ();
-+      BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
-+      if (first_time_p)
-+	bb_vinfo->shared->save_datarefs ();
-+      else
-+	bb_vinfo->shared->check_datarefs ();
-+      bb_vinfo->vector_mode = next_vector_mode;
-+
-+      if (vect_slp_analyze_bb_1 (bb_vinfo, n_stmts, fatal)
- 	  && dbg_cnt (vect_slp))
- 	{
- 	  if (dump_enabled_p ())
--	    dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
-+	    {
-+	      dump_printf_loc (MSG_NOTE, vect_location,
-+			       "***** Analysis succeeded with vector mode"
-+			       " %s\n", GET_MODE_NAME (bb_vinfo->vector_mode));
-+	      dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
-+	    }
- 
- 	  bb_vinfo->shared->check_datarefs ();
- 	  vect_schedule_slp (bb_vinfo);
-@@ -3058,7 +3208,7 @@ vect_slp_bb (basic_block bb)
- 	  unsigned HOST_WIDE_INT bytes;
- 	  if (dump_enabled_p ())
- 	    {
--	      if (current_vector_size.is_constant (&bytes))
-+	      if (GET_MODE_SIZE (bb_vinfo->vector_mode).is_constant (&bytes))
- 		dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
- 				 "basic block part vectorized using %wu byte "
- 				 "vectors\n", bytes);
-@@ -3070,50 +3220,120 @@ vect_slp_bb (basic_block bb)
- 
- 	  vectorized = true;
- 	}
--      delete bb_vinfo;
-+      else
-+	{
-+	  if (dump_enabled_p ())
-+	    dump_printf_loc (MSG_NOTE, vect_location,
-+			     "***** Analysis failed with vector mode %s\n",
-+			     GET_MODE_NAME (bb_vinfo->vector_mode));
-+	}
- 
--      any_vectorized |= vectorized;
-+      if (mode_i == 0)
-+	autodetected_vector_mode = bb_vinfo->vector_mode;
- 
--      if (next_size == 0)
--	autodetected_vector_size = current_vector_size;
-+      if (!fatal)
-+	while (mode_i < vector_modes.length ()
-+	       && vect_chooses_same_modes_p (bb_vinfo, vector_modes[mode_i]))
-+	  {
-+	    if (dump_enabled_p ())
-+	      dump_printf_loc (MSG_NOTE, vect_location,
-+			       "***** The result for vector mode %s would"
-+			       " be the same\n",
-+			       GET_MODE_NAME (vector_modes[mode_i]));
-+	    mode_i += 1;
-+	  }
- 
--      if (next_size < vector_sizes.length ()
--	  && known_eq (vector_sizes[next_size], autodetected_vector_size))
--	next_size += 1;
-+      delete bb_vinfo;
-+
-+      if (mode_i < vector_modes.length ()
-+	  && VECTOR_MODE_P (autodetected_vector_mode)
-+	  && (related_vector_mode (vector_modes[mode_i],
-+				   GET_MODE_INNER (autodetected_vector_mode))
-+	      == autodetected_vector_mode)
-+	  && (related_vector_mode (autodetected_vector_mode,
-+				   GET_MODE_INNER (vector_modes[mode_i]))
-+	      == vector_modes[mode_i]))
-+	{
-+	  if (dump_enabled_p ())
-+	    dump_printf_loc (MSG_NOTE, vect_location,
-+			     "***** Skipping vector mode %s, which would"
-+			     " repeat the analysis for %s\n",
-+			     GET_MODE_NAME (vector_modes[mode_i]),
-+			     GET_MODE_NAME (autodetected_vector_mode));
-+	  mode_i += 1;
-+	}
- 
-       if (vectorized
--	  || next_size == vector_sizes.length ()
--	  || known_eq (current_vector_size, 0U)
-+	  || mode_i == vector_modes.length ()
-+	  || autodetected_vector_mode == VOIDmode
- 	  /* If vect_slp_analyze_bb_1 signaled that analysis for all
- 	     vector sizes will fail do not bother iterating.  */
- 	  || fatal)
-+	return vectorized;
-+
-+      /* Try the next biggest vector size.  */
-+      next_vector_mode = vector_modes[mode_i++];
-+      if (dump_enabled_p ())
-+	dump_printf_loc (MSG_NOTE, vect_location,
-+			 "***** Re-trying analysis with vector mode %s\n",
-+			 GET_MODE_NAME (next_vector_mode));
-+    }
-+}
-+
-+/* Main entry for the BB vectorizer.  Analyze and transform BB, returns
-+   true if anything in the basic-block was vectorized.  */
-+
-+bool
-+vect_slp_bb (basic_block bb)
-+{
-+  gimple_stmt_iterator gsi;
-+  bool any_vectorized = false;
-+
-+  gsi = gsi_start_bb (bb);
-+  while (!gsi_end_p (gsi))
-+    {
-+      gimple_stmt_iterator region_begin = gsi;
-+      vec datarefs = vNULL;
-+      int insns = 0;
-+
-+      for (; !gsi_end_p (gsi); gsi_next (&gsi))
- 	{
--	  if (gsi_end_p (region_end))
-+	  gimple *stmt = gsi_stmt (gsi);
-+	  if (is_gimple_debug (stmt))
-+	    continue;
-+	  insns++;
-+
-+	  if (gimple_location (stmt) != UNKNOWN_LOCATION)
-+	    vect_location = stmt;
-+
-+	  if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs))
- 	    break;
-+	}
- 
--	  /* Skip the unhandled stmt.  */
-+      /* Skip leading unhandled stmts.  */
-+      if (gsi_stmt (region_begin) == gsi_stmt (gsi))
-+	{
- 	  gsi_next (&gsi);
--
--	  /* And reset vector sizes.  */
--	  current_vector_size = 0;
--	  next_size = 0;
-+	  continue;
- 	}
--      else
-+
-+      gimple_stmt_iterator region_end = gsi;
-+
-+      if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
- 	{
--	  /* Try the next biggest vector size.  */
--	  current_vector_size = vector_sizes[next_size++];
- 	  if (dump_enabled_p ())
--	    {
--	      dump_printf_loc (MSG_NOTE, vect_location,
--			       "***** Re-trying analysis with "
--			       "vector size ");
--	      dump_dec (MSG_NOTE, current_vector_size);
--	      dump_printf (MSG_NOTE, "\n");
--	    }
--
--	  /* Start over.  */
--	  gsi = region_begin;
-+	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-+			     "not vectorized: too many instructions in "
-+			     "basic block.\n");
- 	}
-+      else if (vect_slp_bb_region (region_begin, region_end, datarefs, insns))
-+	any_vectorized = true;
-+
-+      if (gsi_end_p (region_end))
-+	break;
-+
-+      /* Skip the unhandled stmt.  */
-+      gsi_next (&gsi);
-     }
- 
-   return any_vectorized;
-@@ -3184,8 +3404,9 @@ vect_mask_constant_operand_p (stmt_vec_info stmt_vinfo)
-    to cut down on the number of interleaves.  */
- 
- void
--duplicate_and_interleave (gimple_seq *seq, tree vector_type, vec elts,
--			  unsigned int nresults, vec &results)
-+duplicate_and_interleave (vec_info *vinfo, gimple_seq *seq, tree vector_type,
-+			  vec elts, unsigned int nresults,
-+			  vec &results)
- {
-   unsigned int nelts = elts.length ();
-   tree element_type = TREE_TYPE (vector_type);
-@@ -3194,7 +3415,7 @@ duplicate_and_interleave (gimple_seq *seq, tree vector_type, vec elts,
-   unsigned int nvectors = 1;
-   tree new_vector_type;
-   tree permutes[2];
--  if (!can_duplicate_and_interleave_p (nelts, TYPE_MODE (element_type),
-+  if (!can_duplicate_and_interleave_p (vinfo, nelts, TYPE_MODE (element_type),
- 				       &nvectors, &new_vector_type,
- 				       permutes))
-     gcc_unreachable ();
-@@ -3276,52 +3497,45 @@ duplicate_and_interleave (gimple_seq *seq, tree vector_type, vec elts,
- 
- /* For constant and loop invariant defs of SLP_NODE this function returns
-    (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
--   OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
--   scalar stmts.  NUMBER_OF_VECTORS is the number of vector defs to create.
--   REDUC_INDEX is the index of the reduction operand in the statements, unless
--   it is -1.  */
-+   OP_NODE determines the node for the operand containing the scalar
-+   operands.  */
- 
- static void
--vect_get_constant_vectors (tree op, slp_tree slp_node,
--                           vec *vec_oprnds,
--			   unsigned int op_num, unsigned int number_of_vectors)
-+vect_get_constant_vectors (slp_tree op_node, slp_tree slp_node,
-+                           vec *vec_oprnds)
- {
--  vec stmts = SLP_TREE_SCALAR_STMTS (slp_node);
--  stmt_vec_info stmt_vinfo = stmts[0];
--  gimple *stmt = stmt_vinfo->stmt;
-+  stmt_vec_info stmt_vinfo = SLP_TREE_SCALAR_STMTS (slp_node)[0];
-+  vec_info *vinfo = stmt_vinfo->vinfo;
-   unsigned HOST_WIDE_INT nunits;
-   tree vec_cst;
-   unsigned j, number_of_places_left_in_vector;
-   tree vector_type;
-   tree vop;
--  int group_size = stmts.length ();
-+  int group_size = op_node->ops.length ();
-   unsigned int vec_num, i;
-   unsigned number_of_copies = 1;
--  vec voprnds;
--  voprnds.create (number_of_vectors);
--  bool constant_p, is_store;
-+  bool constant_p;
-   tree neutral_op = NULL;
--  enum tree_code code = gimple_expr_code (stmt);
-   gimple_seq ctor_seq = NULL;
-   auto_vec permute_results;
- 
-+  /* ???  SLP analysis should compute the vector type for the
-+     constant / invariant and store it in the SLP node.  */
-+  tree op = op_node->ops[0];
-   /* Check if vector type is a boolean vector.  */
-+  tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
-   if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
-       && vect_mask_constant_operand_p (stmt_vinfo))
--    vector_type
--      = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
--  else
--    vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
--
--  if (STMT_VINFO_DATA_REF (stmt_vinfo))
--    {
--      is_store = true;
--      op = gimple_assign_rhs1 (stmt);
--    }
-+    vector_type = truth_type_for (stmt_vectype);
-   else
--    is_store = false;
-+    vector_type = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op));
- 
--  gcc_assert (op);
-+  unsigned int number_of_vectors
-+    = vect_get_num_vectors (SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
-+			    * TYPE_VECTOR_SUBPARTS (stmt_vectype),
-+			    vector_type);
-+  vec_oprnds->create (number_of_vectors);
-+  auto_vec voprnds (number_of_vectors);
- 
-   /* NUMBER_OF_COPIES is the number of times we need to use the same values in
-      created vectors. It is greater than 1 if unrolling is performed.
-@@ -3353,56 +3567,8 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
-   bool place_after_defs = false;
-   for (j = 0; j < number_of_copies; j++)
-     {
--      for (i = group_size - 1; stmts.iterate (i, &stmt_vinfo); i--)
-+      for (i = group_size - 1; op_node->ops.iterate (i, &op); i--)
-         {
--	  stmt = stmt_vinfo->stmt;
--          if (is_store)
--            op = gimple_assign_rhs1 (stmt);
--          else
--	    {
--	      switch (code)
--		{
--		  case COND_EXPR:
--		    {
--		      tree cond = gimple_assign_rhs1 (stmt);
--		      if (TREE_CODE (cond) == SSA_NAME)
--			op = gimple_op (stmt, op_num + 1);
--		      else if (op_num == 0 || op_num == 1)
--			op = TREE_OPERAND (cond, op_num);
--		      else
--			{
--			  if (op_num == 2)
--			    op = gimple_assign_rhs2 (stmt);
--			  else
--			    op = gimple_assign_rhs3 (stmt);
--			}
--		    }
--		    break;
--
--		  case CALL_EXPR:
--		    op = gimple_call_arg (stmt, op_num);
--		    break;
--
--		  case LSHIFT_EXPR:
--		  case RSHIFT_EXPR:
--		  case LROTATE_EXPR:
--		  case RROTATE_EXPR:
--		    op = gimple_op (stmt, op_num + 1);
--		    /* Unlike the other binary operators, shifts/rotates have
--		       the shift count being int, instead of the same type as
--		       the lhs, so make sure the scalar is the right type if
--		       we are dealing with vectors of
--		       long long/long/short/char.  */
--		    if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
--		      op = fold_convert (TREE_TYPE (vector_type), op);
--		    break;
--
--		  default:
--		    op = gimple_op (stmt, op_num + 1);
--		    break;
--		}
--	    }
--
-           /* Create 'vect_ = {op0,op1,...,opn}'.  */
-           number_of_places_left_in_vector--;
- 	  tree orig_op = op;
-@@ -3472,9 +3638,9 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
- 		vec_cst = gimple_build_vector (&ctor_seq, &elts);
- 	      else
- 		{
--		  if (vec_oprnds->is_empty ())
--		    duplicate_and_interleave (&ctor_seq, vector_type, elts,
--					      number_of_vectors,
-+		  if (permute_results.is_empty ())
-+		    duplicate_and_interleave (vinfo, &ctor_seq, vector_type,
-+					      elts, number_of_vectors,
- 					      permute_results);
- 		  vec_cst = permute_results[number_of_vectors - j - 1];
- 		}
-@@ -3516,8 +3682,6 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
-       vec_oprnds->quick_push (vop);
-     }
- 
--  voprnds.release ();
--
-   /* In case that VF is greater than the unrolling factor needed for the SLP
-      group of stmts, NUMBER_OF_VECTORS to be created is greater than
-      NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
-@@ -3548,25 +3712,17 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
- static void
- vect_get_slp_vect_defs (slp_tree slp_node, vec *vec_oprnds)
- {
--  tree vec_oprnd;
-   stmt_vec_info vec_def_stmt_info;
-   unsigned int i;
- 
-   gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
- 
-   FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt_info)
--    {
--      gcc_assert (vec_def_stmt_info);
--      if (gphi *vec_def_phi = dyn_cast  (vec_def_stmt_info->stmt))
--	vec_oprnd = gimple_phi_result (vec_def_phi);
--      else
--	vec_oprnd = gimple_get_lhs (vec_def_stmt_info->stmt);
--      vec_oprnds->quick_push (vec_oprnd);
--    }
-+    vec_oprnds->quick_push (gimple_get_lhs (vec_def_stmt_info->stmt));
- }
- 
- 
--/* Get vectorized definitions for SLP_NODE.
-+/* Get N vectorized definitions for SLP_NODE.
-    If the scalar definitions are loop invariants or constants, collect them and
-    call vect_get_constant_vectors() to create vector stmts.
-    Otherwise, the def-stmts must be already vectorized and the vectorized stmts
-@@ -3574,91 +3730,26 @@ vect_get_slp_vect_defs (slp_tree slp_node, vec *vec_oprnds)
-    vect_get_slp_vect_defs () to retrieve them.  */
- 
- void
--vect_get_slp_defs (vec ops, slp_tree slp_node,
--		   vec > *vec_oprnds)
-+vect_get_slp_defs (slp_tree slp_node, vec > *vec_oprnds, unsigned n)
- {
--  int number_of_vects = 0, i;
--  unsigned int child_index = 0;
--  HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
--  slp_tree child = NULL;
--  vec vec_defs;
--  tree oprnd;
--  bool vectorized_defs;
-+  if (n == -1U)
-+    n = SLP_TREE_CHILDREN (slp_node).length ();
- 
--  stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
--  FOR_EACH_VEC_ELT (ops, i, oprnd)
-+  for (unsigned i = 0; i < n; ++i)
-     {
--      /* For each operand we check if it has vectorized definitions in a child
--	 node or we need to create them (for invariants and constants).  We
--	 check if the LHS of the first stmt of the next child matches OPRND.
--	 If it does, we found the correct child.  Otherwise, we call
--	 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
--	 to check this child node for the next operand.  */
--      vectorized_defs = false;
--      if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
--        {
--          child = SLP_TREE_CHILDREN (slp_node)[child_index];
--
--	  /* We have to check both pattern and original def, if available.  */
--	  if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
--	    {
--	      stmt_vec_info first_def_info = SLP_TREE_SCALAR_STMTS (child)[0];
--	      stmt_vec_info related = STMT_VINFO_RELATED_STMT (first_def_info);
--	      tree first_def_op;
--
--	      if (gphi *first_def = dyn_cast  (first_def_info->stmt))
--		first_def_op = gimple_phi_result (first_def);
--	      else
--		first_def_op = gimple_get_lhs (first_def_info->stmt);
--	      if (operand_equal_p (oprnd, first_def_op, 0)
--		  || (related
--		      && operand_equal_p (oprnd,
--					  gimple_get_lhs (related->stmt), 0)))
--		{
--		  /* The number of vector defs is determined by the number of
--		     vector statements in the node from which we get those
--		     statements.  */
--		  number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
--		  vectorized_defs = true;
--		  child_index++;
--		}
--	    }
--	  else
--	    child_index++;
--        }
--
--      if (!vectorized_defs)
--        {
--          if (i == 0)
--            {
--              number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
--              /* Number of vector stmts was calculated according to LHS in
--                 vect_schedule_slp_instance (), fix it by replacing LHS with
--                 RHS, if necessary.  See vect_get_smallest_scalar_type () for
--                 details.  */
--	      vect_get_smallest_scalar_type (first_stmt_info, &lhs_size_unit,
--					     &rhs_size_unit);
--              if (rhs_size_unit != lhs_size_unit)
--                {
--                  number_of_vects *= rhs_size_unit;
--                  number_of_vects /= lhs_size_unit;
--                }
--            }
--        }
-+      slp_tree child = SLP_TREE_CHILDREN (slp_node)[i];
- 
--      /* Allocate memory for vectorized defs.  */
--      vec_defs = vNULL;
--      vec_defs.create (number_of_vects);
-+      vec vec_defs = vNULL;
- 
--      /* For reduction defs we call vect_get_constant_vectors (), since we are
--         looking for initial loop invariant values.  */
--      if (vectorized_defs)
--        /* The defs are already vectorized.  */
--	vect_get_slp_vect_defs (child, &vec_defs);
-+      /* For each operand we check if it has vectorized definitions in a child
-+	 node or we need to create them (for invariants and constants).  */
-+      if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
-+	{
-+	  vec_defs.create (SLP_TREE_NUMBER_OF_VEC_STMTS (child));
-+	  vect_get_slp_vect_defs (child, &vec_defs);
-+	}
-       else
--	/* Build vectors from scalar defs.  */
--	vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
--				   number_of_vects);
-+	vect_get_constant_vectors (child, slp_node, &vec_defs);
- 
-       vec_oprnds->quick_push (vec_defs);
-     }
-@@ -3939,17 +4030,6 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance,
-   stmt_vec_info last_stmt_info = vect_find_last_scalar_stmt_in_slp (node);
-   si = gsi_for_stmt (last_stmt_info->stmt);
- 
--  /* Mark the first element of the reduction chain as reduction to properly
--     transform the node.  In the analysis phase only the last element of the
--     chain is marked as reduction.  */
--  if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
--      && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
--      && REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info)
--    {
--      STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
--      STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
--    }
--
-   /* Handle two-operation SLP nodes by vectorizing the group with
-      both operations and then performing a merge.  */
-   if (SLP_TREE_TWO_OPERATORS (node))
-diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
-index 74abfbfe56e..5d6da3d9708 100644
---- a/gcc/tree-vect-stmts.c
-+++ b/gcc/tree-vect-stmts.c
-@@ -329,13 +329,13 @@ vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
- 	  basic_block bb = gimple_bb (USE_STMT (use_p));
- 	  if (!flow_bb_inside_loop_p (loop, bb))
- 	    {
-+	      if (is_gimple_debug (USE_STMT (use_p)))
-+		continue;
-+
- 	      if (dump_enabled_p ())
- 		dump_printf_loc (MSG_NOTE, vect_location,
-                                  "vec_stmt_relevant_p: used out of loop.\n");
- 
--	      if (is_gimple_debug (USE_STMT (use_p)))
--		continue;
--
- 	      /* We expect all such uses to be in the loop exit phis
- 		 (because of loop closed form)   */
- 	      gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
-@@ -456,7 +456,6 @@ process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
- 	     bool force)
- {
-   stmt_vec_info dstmt_vinfo;
--  basic_block bb, def_bb;
-   enum vect_def_type dt;
- 
-   /* case 1: we are only interested in uses that need to be vectorized.  Uses
-@@ -472,28 +471,8 @@ process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
-   if (!dstmt_vinfo)
-     return opt_result::success ();
- 
--  def_bb = gimple_bb (dstmt_vinfo->stmt);
--
--  /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
--     DSTMT_VINFO must have already been processed, because this should be the
--     only way that STMT, which is a reduction-phi, was put in the worklist,
--     as there should be no other uses for DSTMT_VINFO in the loop.  So we just
--     check that everything is as expected, and we are done.  */
--  bb = gimple_bb (stmt_vinfo->stmt);
--  if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
--      && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
--      && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
--      && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
--      && bb->loop_father == def_bb->loop_father)
--    {
--      if (dump_enabled_p ())
--	dump_printf_loc (MSG_NOTE, vect_location,
--                         "reduc-stmt defining reduc-phi in the same nest.\n");
--      gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
--      gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
--		  || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
--      return opt_result::success ();
--    }
-+  basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
-+  basic_block bb = gimple_bb (stmt_vinfo->stmt);
- 
-   /* case 3a: outer-loop stmt defining an inner-loop stmt:
- 	outer-loop-header-bb:
-@@ -607,7 +586,7 @@ process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
-    This pass detects such stmts.  */
- 
- opt_result
--vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
-+vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
- {
-   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
-   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
-@@ -777,7 +756,11 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
- 	    = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
- 			   &worklist, true);
- 	  if (!res)
--	    return res;
-+	    {
-+	      if (fatal)
-+		*fatal = false;
-+	      return res;
-+	    }
- 	}
-     } /* while worklist */
- 
-@@ -791,6 +774,7 @@ vect_prologue_cost_for_slp_op (slp_tree node, stmt_vec_info stmt_info,
- 			       unsigned opno, enum vect_def_type dt,
- 			       stmt_vector_for_cost *cost_vec)
- {
-+  vec_info *vinfo = stmt_info->vinfo;
-   gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
-   tree op = gimple_op (stmt, opno);
-   unsigned prologue_cost = 0;
-@@ -798,7 +782,7 @@ vect_prologue_cost_for_slp_op (slp_tree node, stmt_vec_info stmt_info,
-   /* Without looking at the actual initializer a vector of
-      constants can be implemented as load from the constant pool.
-      When all elements are the same we can use a splat.  */
--  tree vectype = get_vectype_for_scalar_type (TREE_TYPE (op));
-+  tree vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op));
-   unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
-   unsigned num_vects_to_check;
-   unsigned HOST_WIDE_INT const_nunits;
-@@ -1603,9 +1587,9 @@ vect_get_vec_def_for_operand (tree op, stmt_vec_info stmt_vinfo, tree vectype)
- 	vector_type = vectype;
-       else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
- 	       && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
--	vector_type = build_same_sized_truth_vector_type (stmt_vectype);
-+	vector_type = truth_type_for (stmt_vectype);
-       else
--	vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
-+	vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
- 
-       gcc_assert (vector_type);
-       return vect_init_vector (stmt_vinfo, op, vector_type, NULL);
-@@ -1720,16 +1704,8 @@ vect_get_vec_defs (tree op0, tree op1, stmt_vec_info stmt_info,
- {
-   if (slp_node)
-     {
--      int nops = (op1 == NULL_TREE) ? 1 : 2;
--      auto_vec ops (nops);
--      auto_vec > vec_defs (nops);
--
--      ops.quick_push (op0);
--      if (op1)
--        ops.quick_push (op1);
--
--      vect_get_slp_defs (ops, slp_node, &vec_defs);
--
-+      auto_vec > vec_defs (SLP_TREE_CHILDREN (slp_node).length ());
-+      vect_get_slp_defs (slp_node, &vec_defs, op1 ? 2 : 1);
-       *vec_oprnds0 = vec_defs[0];
-       if (op1)
- 	*vec_oprnds1 = vec_defs[1];
-@@ -1874,7 +1850,8 @@ static tree permute_vec_elements (tree, tree, tree, stmt_vec_info,
-    says how the load or store is going to be implemented and GROUP_SIZE
-    is the number of load or store statements in the containing group.
-    If the access is a gather load or scatter store, GS_INFO describes
--   its arguments.
-+   its arguments.  If the load or store is conditional, SCALAR_MASK is the
-+   condition under which it occurs.
- 
-    Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
-    supported, otherwise record the required mask types.  */
-@@ -1883,7 +1860,7 @@ static void
- check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
- 			  vec_load_store_type vls_type, int group_size,
- 			  vect_memory_access_type memory_access_type,
--			  gather_scatter_info *gs_info)
-+			  gather_scatter_info *gs_info, tree scalar_mask)
- {
-   /* Invariant loads need no special support.  */
-   if (memory_access_type == VMAT_INVARIANT)
-@@ -1907,7 +1884,7 @@ check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
- 	  return;
- 	}
-       unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
--      vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
-+      vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
-       return;
-     }
- 
-@@ -1931,7 +1908,7 @@ check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
- 	  return;
- 	}
-       unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
--      vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
-+      vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
-       return;
-     }
- 
-@@ -1949,9 +1926,8 @@ check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
-     }
- 
-   machine_mode mask_mode;
--  if (!(targetm.vectorize.get_mask_mode
--	(GET_MODE_NUNITS (vecmode),
--	 GET_MODE_SIZE (vecmode)).exists (&mask_mode))
-+  if (!VECTOR_MODE_P (vecmode)
-+      || !targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
-       || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
-     {
-       if (dump_enabled_p ())
-@@ -1969,7 +1945,7 @@ check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
-   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
-   unsigned int nvectors;
-   if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
--    vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype);
-+    vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
-   else
-     gcc_unreachable ();
- }
-@@ -2311,6 +2287,29 @@ get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
- 	      && gap < (vect_known_alignment_in_bytes (first_dr_info)
- 			/ vect_get_scalar_dr_size (first_dr_info)))
- 	    overrun_p = false;
-+
-+	  /* If the gap splits the vector in half and the target
-+	     can do half-vector operations avoid the epilogue peeling
-+	     by simply loading half of the vector only.  Usually
-+	     the construction with an upper zero half will be elided.  */
-+	  dr_alignment_support alignment_support_scheme;
-+	  scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
-+	  machine_mode vmode;
-+	  if (overrun_p
-+	      && !masked_p
-+	      && (((alignment_support_scheme
-+		      = vect_supportable_dr_alignment (first_dr_info, false)))
-+		   == dr_aligned
-+		  || alignment_support_scheme == dr_unaligned_supported)
-+	      && known_eq (nunits, (group_size - gap) * 2)
-+	      && known_eq (nunits, group_size)
-+	      && related_vector_mode (TYPE_MODE (vectype), elmode,
-+				      group_size - gap).exists (&vmode)
-+	      && (convert_optab_handler (vec_init_optab,
-+					 TYPE_MODE (vectype), vmode)
-+		  != CODE_FOR_nothing))
-+	    overrun_p = false;
-+
- 	  if (overrun_p && !can_overrun_p)
- 	    {
- 	      if (dump_enabled_p ())
-@@ -2536,6 +2535,7 @@ vect_check_load_store_mask (stmt_vec_info stmt_info, tree mask,
- 			    vect_def_type *mask_dt_out,
- 			    tree *mask_vectype_out)
- {
-+  vec_info *vinfo = stmt_info->vinfo;
-   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
-     {
-       if (dump_enabled_p ())
-@@ -2564,7 +2564,7 @@ vect_check_load_store_mask (stmt_vec_info stmt_info, tree mask,
- 
-   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
-   if (!mask_vectype)
--    mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
-+    mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
- 
-   if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
-     {
-@@ -2728,7 +2728,7 @@ vect_build_gather_load_calls (stmt_vec_info stmt_info,
- 			   || TREE_CODE (masktype) == INTEGER_TYPE
- 			   || types_compatible_p (srctype, masktype)));
-   if (mask && TREE_CODE (masktype) == INTEGER_TYPE)
--    masktype = build_same_sized_truth_vector_type (srctype);
-+    masktype = truth_type_for (srctype);
- 
-   tree mask_halftype = masktype;
-   tree perm_mask = NULL_TREE;
-@@ -2774,8 +2774,7 @@ vect_build_gather_load_calls (stmt_vec_info stmt_info,
- 	  mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
- 	}
-       else if (mask)
--	mask_halftype
--	  = build_same_sized_truth_vector_type (gs_info->offset_vectype);
-+	mask_halftype = truth_type_for (gs_info->offset_vectype);
-     }
-   else
-     gcc_unreachable ();
-@@ -2952,6 +2951,7 @@ vect_get_gather_scatter_ops (struct loop *loop, stmt_vec_info stmt_info,
- 			     gather_scatter_info *gs_info,
- 			     tree *dataref_ptr, tree *vec_offset)
- {
-+  vec_info *vinfo = stmt_info->vinfo;
-   gimple_seq stmts = NULL;
-   *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
-   if (stmts != NULL)
-@@ -2962,7 +2962,7 @@ vect_get_gather_scatter_ops (struct loop *loop, stmt_vec_info stmt_info,
-       gcc_assert (!new_bb);
-     }
-   tree offset_type = TREE_TYPE (gs_info->offset);
--  tree offset_vectype = get_vectype_for_scalar_type (offset_type);
-+  tree offset_vectype = get_vectype_for_scalar_type (vinfo, offset_type);
-   *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt_info,
- 					      offset_vectype);
- }
-@@ -2997,7 +2997,7 @@ vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
-   /* The offset given in GS_INFO can have pointer type, so use the element
-      type of the vector instead.  */
-   tree offset_type = TREE_TYPE (gs_info->offset);
--  tree offset_vectype = get_vectype_for_scalar_type (offset_type);
-+  tree offset_vectype = get_vectype_for_scalar_type (loop_vinfo, offset_type);
-   offset_type = TREE_TYPE (offset_vectype);
- 
-   /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type.  */
-@@ -3161,8 +3161,7 @@ simple_integer_narrowing (tree vectype_out, tree vectype_in,
-   int multi_step_cvt = 0;
-   auto_vec  interm_types;
-   if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
--					&code, &multi_step_cvt,
--					&interm_types)
-+					&code, &multi_step_cvt, &interm_types)
-       || multi_step_cvt)
-     return false;
- 
-@@ -3295,10 +3294,10 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- 	  return false;
- 	}
-     }
--  /* If all arguments are external or constant defs use a vector type with
--     the same size as the output vector type.  */
-+  /* If all arguments are external or constant defs, infer the vector type
-+     from the scalar type.  */
-   if (!vectype_in)
--    vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
-+    vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type);
-   if (vec_stmt)
-     gcc_assert (vectype_in);
-   if (!vectype_in)
-@@ -3309,6 +3308,19 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- 
-       return false;
-     }
-+  /* FORNOW: we don't yet support mixtures of vector sizes for calls,
-+     just mixtures of nunits.  E.g. DI->SI versions of __builtin_ctz*
-+     are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
-+     by a pack of the two vectors into an SI vector.  We would need
-+     separate code to handle direct VnDI->VnSI IFN_CTZs.  */
-+  if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
-+    {
-+      if (dump_enabled_p ())
-+	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-+			 "mismatched vector sizes %T and %T\n",
-+			 vectype_in, vectype_out);
-+      return false;
-+    }
- 
-   /* FORNOW */
-   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
-@@ -3415,7 +3427,9 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- 	  unsigned int nvectors = (slp_node
- 				   ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
- 				   : ncopies);
--	  vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype_out);
-+	  tree scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
-+	  vect_record_loop_mask (loop_vinfo, masks, nvectors,
-+				 vectype_out, scalar_mask);
- 	}
-       return true;
-     }
-@@ -3446,9 +3460,7 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- 	      auto_vec > vec_defs (nargs);
- 	      vec vec_oprnds0;
- 
--	      for (i = 0; i < nargs; i++)
--		vargs[i] = gimple_call_arg (stmt, i);
--	      vect_get_slp_defs (vargs, slp_node, &vec_defs);
-+	      vect_get_slp_defs (slp_node, &vec_defs);
- 	      vec_oprnds0 = vec_defs[0];
- 
- 	      /* Arguments are ready.  Create the new vector stmt.  */
-@@ -3470,8 +3482,7 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- 			= gimple_build_call_internal_vec (ifn, vargs);
- 		      gimple_call_set_lhs (call, half_res);
- 		      gimple_call_set_nothrow (call, true);
--		      new_stmt_info
--			= vect_finish_stmt_generation (stmt_info, call, gsi);
-+		      vect_finish_stmt_generation (stmt_info, call, gsi);
- 		      if ((i & 1) == 0)
- 			{
- 			  prev_res = half_res;
-@@ -3523,8 +3534,7 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- 	  if (mask_opno >= 0 && !vectypes[mask_opno])
- 	    {
- 	      gcc_assert (modifier != WIDEN);
--	      vectypes[mask_opno]
--		= build_same_sized_truth_vector_type (vectype_in);
-+	      vectypes[mask_opno] = truth_type_for (vectype_in);
- 	    }
- 
- 	  for (i = 0; i < nargs; i++)
-@@ -3570,8 +3580,7 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- 	      gcall *call = gimple_build_call_internal_vec (ifn, vargs);
- 	      gimple_call_set_lhs (call, half_res);
- 	      gimple_call_set_nothrow (call, true);
--	      new_stmt_info
--		= vect_finish_stmt_generation (stmt_info, call, gsi);
-+	      vect_finish_stmt_generation (stmt_info, call, gsi);
- 	      if ((j & 1) == 0)
- 		{
- 		  prev_res = half_res;
-@@ -3622,9 +3631,7 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- 	      auto_vec > vec_defs (nargs);
- 	      vec vec_oprnds0;
- 
--	      for (i = 0; i < nargs; i++)
--		vargs.quick_push (gimple_call_arg (stmt, i));
--	      vect_get_slp_defs (vargs, slp_node, &vec_defs);
-+	      vect_get_slp_defs (slp_node, &vec_defs);
- 	      vec_oprnds0 = vec_defs[0];
- 
- 	      /* Arguments are ready.  Create the new vector stmt.  */
-@@ -4087,9 +4094,8 @@ vectorizable_simd_clone_call (stmt_vec_info stmt_info,
- 	 || arginfo[i].dt == vect_external_def)
- 	&& bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
-       {
--	arginfo[i].vectype
--	  = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
--								     i)));
-+	tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
-+	arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type);
- 	if (arginfo[i].vectype == NULL
- 	    || (simd_clone_subparts (arginfo[i].vectype)
- 		> bestn->simdclone->simdlen))
-@@ -4802,10 +4808,10 @@ vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- 	}
-     }
- 
--  /* If op0 is an external or constant defs use a vector type of
--     the same size as the output vector type.  */
-+  /* If op0 is an external or constant def, infer the vector type
-+     from the scalar type.  */
-   if (!vectype_in)
--    vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
-+    vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type);
-   if (vec_stmt)
-     gcc_assert (vectype_in);
-   if (!vectype_in)
-@@ -4863,7 +4869,9 @@ vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-   switch (modifier)
-     {
-     case NONE:
--      if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
-+      if (code != FIX_TRUNC_EXPR
-+	  && code != FLOAT_EXPR
-+	  && !CONVERT_EXPR_CODE_P (code))
- 	return false;
-       if (supportable_convert_operation (code, vectype_out, vectype_in,
- 					 &decl1, &code1))
-@@ -5452,7 +5460,7 @@ vectorizable_assignment (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-    either as shift by a scalar or by a vector.  */
- 
- bool
--vect_supportable_shift (enum tree_code code, tree scalar_type)
-+vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
- {
- 
-   machine_mode vec_mode;
-@@ -5460,7 +5468,7 @@ vect_supportable_shift (enum tree_code code, tree scalar_type)
-   int icode;
-   tree vectype;
- 
--  vectype = get_vectype_for_scalar_type (scalar_type);
-+  vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
-   if (!vectype)
-     return false;
- 
-@@ -5491,7 +5499,7 @@ vect_supportable_shift (enum tree_code code, tree scalar_type)
-    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
-    Return true if STMT_INFO is vectorizable in this way.  */
- 
--bool
-+static bool
- vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- 		    stmt_vec_info *vec_stmt, slp_tree slp_node,
- 		    stmt_vector_for_cost *cost_vec)
-@@ -5524,6 +5532,7 @@ vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-   bool scalar_shift_arg = true;
-   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
-   vec_info *vinfo = stmt_info->vinfo;
-+  bool incompatible_op1_vectype_p = false;
- 
-   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
-     return false;
-@@ -5565,10 +5574,10 @@ vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-                          "use not simple.\n");
-       return false;
-     }
--  /* If op0 is an external or constant def use a vector type with
--     the same size as the output vector type.  */
-+  /* If op0 is an external or constant def, infer the vector type
-+     from the scalar type.  */
-   if (!vectype)
--    vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
-+    vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0));
-   if (vec_stmt)
-     gcc_assert (vectype);
-   if (!vectype)
-@@ -5666,9 +5675,16 @@ vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-                          "vector/vector shift/rotate found.\n");
- 
-       if (!op1_vectype)
--	op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
--      if (op1_vectype == NULL_TREE
--	  || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
-+	op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1));
-+      incompatible_op1_vectype_p
-+	= (op1_vectype == NULL_TREE
-+	   || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
-+			TYPE_VECTOR_SUBPARTS (vectype))
-+	   || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
-+      if (incompatible_op1_vectype_p
-+	  && (!slp_node
-+	      || SLP_TREE_DEF_TYPE
-+		   (SLP_TREE_CHILDREN (slp_node)[1]) != vect_constant_def))
- 	{
- 	  if (dump_enabled_p ())
- 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-@@ -5707,7 +5723,10 @@ vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-                  so make sure the scalar is the right type if we are
- 		 dealing with vectors of long long/long/short/char.  */
-               if (dt[1] == vect_constant_def)
--                op1 = fold_convert (TREE_TYPE (vectype), op1);
-+		{
-+		  if (!slp_node)
-+		    op1 = fold_convert (TREE_TYPE (vectype), op1);
-+		}
- 	      else if (!useless_type_conversion_p (TREE_TYPE (vectype),
- 						   TREE_TYPE (op1)))
- 		{
-@@ -5818,6 +5837,21 @@ vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-                     }
-                 }
-             }
-+	  else if (slp_node && incompatible_op1_vectype_p)
-+	    {
-+	      /* Convert the scalar constant shift amounts in-place.  */
-+	      slp_tree shift = SLP_TREE_CHILDREN (slp_node)[1];
-+	      gcc_assert (SLP_TREE_DEF_TYPE (shift) == vect_constant_def);
-+	      for (unsigned i = 0;
-+		   i < SLP_TREE_SCALAR_OPS (shift).length (); ++i)
-+		{
-+		  SLP_TREE_SCALAR_OPS (shift)[i]
-+		    = fold_convert (TREE_TYPE (vectype),
-+				    SLP_TREE_SCALAR_OPS (shift)[i]);
-+		  gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (shift)[i])
-+			       == INTEGER_CST));
-+		}
-+	    }
- 
-           /* vec_oprnd1 is available if operand 1 should be of a scalar-type
-              (a special case for certain kind of vector shifts); otherwise,
-@@ -5894,7 +5928,7 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-   poly_uint64 nunits_in;
-   poly_uint64 nunits_out;
-   tree vectype_out;
--  int ncopies;
-+  int ncopies, vec_num;
-   int j, i;
-   vec vec_oprnds0 = vNULL;
-   vec vec_oprnds1 = vNULL;
-@@ -5964,8 +5998,8 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-                          "use not simple.\n");
-       return false;
-     }
--  /* If op0 is an external or constant def use a vector type with
--     the same size as the output vector type.  */
-+  /* If op0 is an external or constant def, infer the vector type
-+     from the scalar type.  */
-   if (!vectype)
-     {
-       /* For boolean type we cannot determine vectype by
-@@ -5985,7 +6019,7 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- 	  vectype = vectype_out;
- 	}
-       else
--	vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
-+	vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0));
-     }
-   if (vec_stmt)
-     gcc_assert (vectype);
-@@ -6031,9 +6065,15 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
-      case of SLP.  */
-   if (slp_node)
--    ncopies = 1;
-+    {
-+      ncopies = 1;
-+      vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
-+    }
-   else
--    ncopies = vect_get_num_copies (loop_vinfo, vectype);
-+    {
-+      ncopies = vect_get_num_copies (loop_vinfo, vectype);
-+      vec_num = 1;
-+    }
- 
-   gcc_assert (ncopies >= 1);
- 
-@@ -6086,8 +6126,34 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-       return false;
-     }
- 
-+  int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
-+  vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
-+  internal_fn cond_fn = get_conditional_internal_fn (code);
-+
-   if (!vec_stmt) /* transformation not required.  */
-     {
-+      /* If this operation is part of a reduction, a fully-masked loop
-+	 should only change the active lanes of the reduction chain,
-+	 keeping the inactive lanes as-is.  */
-+      if (loop_vinfo
-+	  && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
-+	  && reduc_idx >= 0)
-+	{
-+	  if (cond_fn == IFN_LAST
-+	      || !direct_internal_fn_supported_p (cond_fn, vectype,
-+						  OPTIMIZE_FOR_SPEED))
-+	    {
-+	      if (dump_enabled_p ())
-+		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-+				 "can't use a fully-masked loop because no"
-+				 " conditional operation is available.\n");
-+	      LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
-+	    }
-+	  else
-+	    vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
-+				   vectype, NULL);
-+	}
-+
-       STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
-       DUMP_VECT_SCOPE ("vectorizable_operation");
-       vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
-@@ -6100,6 +6166,8 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-     dump_printf_loc (MSG_NOTE, vect_location,
-                      "transform binary/unary operation.\n");
- 
-+  bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
-+
-   /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
-      vectors with unsigned elements, but the result is signed.  So, we
-      need to compute the MINUS_EXPR into vectype temporary and
-@@ -6180,12 +6248,8 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- 	    {
- 	      if (slp_node)
- 		{
--		  auto_vec ops(3);
--		  ops.quick_push (op0);
--		  ops.quick_push (op1);
--		  ops.quick_push (op2);
- 		  auto_vec > vec_defs(3);
--		  vect_get_slp_defs (ops, slp_node, &vec_defs);
-+		  vect_get_slp_defs (slp_node, &vec_defs);
- 		  vec_oprnds0 = vec_defs[0];
- 		  vec_oprnds1 = vec_defs[1];
- 		  vec_oprnds2 = vec_defs[2];
-@@ -6221,22 +6285,41 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- 		  ? vec_oprnds1[i] : NULL_TREE);
- 	  vop2 = ((op_type == ternary_op)
- 		  ? vec_oprnds2[i] : NULL_TREE);
--	  gassign *new_stmt = gimple_build_assign (vec_dest, code,
--						   vop0, vop1, vop2);
--	  new_temp = make_ssa_name (vec_dest, new_stmt);
--	  gimple_assign_set_lhs (new_stmt, new_temp);
--	  new_stmt_info
--	    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
--	  if (vec_cvt_dest)
-+	  if (masked_loop_p && reduc_idx >= 0)
- 	    {
--	      new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
--	      gassign *new_stmt
--		= gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
--				       new_temp);
--	      new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
-+	      /* Perform the operation on active elements only and take
-+		 inactive elements from the reduction chain input.  */
-+	      gcc_assert (!vop2);
-+	      vop2 = reduc_idx == 1 ? vop1 : vop0;
-+	      tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
-+					      vectype, i * ncopies + j);
-+	      gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
-+							vop0, vop1, vop2);
-+	      new_temp = make_ssa_name (vec_dest, call);
-+	      gimple_call_set_lhs (call, new_temp);
-+	      gimple_call_set_nothrow (call, true);
-+	      new_stmt_info
-+		= vect_finish_stmt_generation (stmt_info, call, gsi);
-+	    }
-+	  else
-+	    {
-+	      gassign *new_stmt = gimple_build_assign (vec_dest, code,
-+						       vop0, vop1, vop2);
-+	      new_temp = make_ssa_name (vec_dest, new_stmt);
- 	      gimple_assign_set_lhs (new_stmt, new_temp);
- 	      new_stmt_info
- 		= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
-+	      if (vec_cvt_dest)
-+		{
-+		  new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
-+		  gassign *new_stmt
-+		    = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
-+					   new_temp);
-+		  new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
-+		  gimple_assign_set_lhs (new_stmt, new_temp);
-+		  new_stmt_info
-+		    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
-+		}
- 	    }
-           if (slp_node)
- 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
-@@ -6517,7 +6600,7 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-       if (loop_vinfo
- 	  && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
- 	check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
--				  memory_access_type, &gs_info);
-+				  memory_access_type, &gs_info, mask);
- 
-       STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
-       vect_model_store_cost (stmt_info, ncopies, rhs_dt, memory_access_type,
-@@ -6580,8 +6663,7 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- 	  ncopies *= 2;
- 
- 	  if (mask)
--	    mask_halfvectype
--	      = build_same_sized_truth_vector_type (gs_info.offset_vectype);
-+	    mask_halfvectype = truth_type_for (gs_info.offset_vectype);
- 	}
-       else
- 	gcc_unreachable ();
-@@ -6840,9 +6922,8 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- 		 of vector elts directly.  */
- 	      scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
- 	      machine_mode vmode;
--	      if (!mode_for_vector (elmode, group_size).exists (&vmode)
--		  || !VECTOR_MODE_P (vmode)
--		  || !targetm.vector_mode_supported_p (vmode)
-+	      if (!related_vector_mode (TYPE_MODE (vectype), elmode,
-+					group_size).exists (&vmode)
- 		  || (convert_optab_handler (vec_extract_optab,
- 					     TYPE_MODE (vectype), vmode)
- 		      == CODE_FOR_nothing))
-@@ -6859,9 +6940,8 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- 		     element extracts from the original vector type and
- 		     element size stores.  */
- 		  if (int_mode_for_size (lsize, 0).exists (&elmode)
--		      && mode_for_vector (elmode, lnunits).exists (&vmode)
--		      && VECTOR_MODE_P (vmode)
--		      && targetm.vector_mode_supported_p (vmode)
-+		      && related_vector_mode (TYPE_MODE (vectype), elmode,
-+					      lnunits).exists (&vmode)
- 		      && (convert_optab_handler (vec_extract_optab,
- 						 vmode, elmode)
- 			  != CODE_FOR_nothing))
-@@ -7624,14 +7704,6 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-       if (!scalar_dest)
- 	return false;
- 
--      if (slp_node != NULL)
--	{
--	  if (dump_enabled_p ())
--	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
--			     "SLP of masked loads not supported.\n");
--	  return false;
--	}
--
-       int mask_index = internal_fn_mask_index (ifn);
-       if (mask_index >= 0)
- 	{
-@@ -7714,6 +7786,15 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-       first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
-       group_size = DR_GROUP_SIZE (first_stmt_info);
- 
-+      /* Refuse non-SLP vectorization of SLP-only groups.  */
-+      if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
-+	{
-+	  if (dump_enabled_p ())
-+	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-+			     "cannot vectorize load in non-SLP mode.\n");
-+	  return false;
-+	}
-+
-       if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
- 	slp_perm = true;
- 
-@@ -7767,7 +7848,7 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-       if (loop_vinfo
- 	  && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
- 	check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
--				  memory_access_type, &gs_info);
-+				  memory_access_type, &gs_info, mask);
- 
-       STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
-       vect_model_load_cost (stmt_info, ncopies, memory_access_type,
-@@ -7947,9 +8028,8 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- 		 vector elts directly.  */
- 	      scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
- 	      machine_mode vmode;
--	      if (mode_for_vector (elmode, group_size).exists (&vmode)
--		  && VECTOR_MODE_P (vmode)
--		  && targetm.vector_mode_supported_p (vmode)
-+	      if (related_vector_mode (TYPE_MODE (vectype), elmode,
-+				       group_size).exists (&vmode)
- 		  && (convert_optab_handler (vec_init_optab,
- 					     TYPE_MODE (vectype), vmode)
- 		      != CODE_FOR_nothing))
-@@ -7973,9 +8053,8 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- 		  /* If we can't construct such a vector fall back to
- 		     element loads of the original vector type.  */
- 		  if (int_mode_for_size (lsize, 0).exists (&elmode)
--		      && mode_for_vector (elmode, lnunits).exists (&vmode)
--		      && VECTOR_MODE_P (vmode)
--		      && targetm.vector_mode_supported_p (vmode)
-+		      && related_vector_mode (TYPE_MODE (vectype), elmode,
-+					      lnunits).exists (&vmode)
- 		      && (convert_optab_handler (vec_init_optab, vmode, elmode)
- 			  != CODE_FOR_nothing))
- 		    {
-@@ -8413,8 +8492,17 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- 					  simd_lane_access_p,
- 					  byte_offset, bump);
- 	  if (mask)
--	    vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
--						     mask_vectype);
-+	    {
-+	      if (slp_node)
-+		{
-+		  auto_vec > vec_defs (1);
-+		  vect_get_slp_defs (slp_node, &vec_defs);
-+		  vec_mask = vec_defs[0][0];
-+		}
-+	      else
-+		vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
-+							 mask_vectype);
-+	    }
- 	}
-       else
- 	{
-@@ -8564,8 +8652,25 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- 		      }
- 		    else
- 		      {
-+			tree ltype = vectype;
-+			/* If there's no peeling for gaps but we have a gap
-+			   with slp loads then load the lower half of the
-+			   vector only.  See get_group_load_store_type for
-+			   when we apply this optimization.  */
-+			if (slp
-+			    && loop_vinfo
-+			    && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
-+			    && DR_GROUP_GAP (first_stmt_info) != 0
-+			    && known_eq (nunits,
-+					 (group_size
-+					  - DR_GROUP_GAP (first_stmt_info)) * 2)
-+			    && known_eq (nunits, group_size))
-+			  ltype = build_vector_type (TREE_TYPE (vectype),
-+						     (group_size
-+						      - DR_GROUP_GAP
-+						          (first_stmt_info)));
- 			data_ref
--			  = fold_build2 (MEM_REF, vectype, dataref_ptr,
-+			  = fold_build2 (MEM_REF, ltype, dataref_ptr,
- 					 dataref_offset
- 					 ? dataref_offset
- 					 : build_int_cst (ref_type, 0));
-@@ -8579,6 +8684,23 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- 			  TREE_TYPE (data_ref)
- 			    = build_aligned_type (TREE_TYPE (data_ref),
- 						  TYPE_ALIGN (elem_type));
-+			if (ltype != vectype)
-+			  {
-+			    vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
-+			    tree tem = make_ssa_name (ltype);
-+			    new_stmt = gimple_build_assign (tem, data_ref);
-+			    vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
-+			    data_ref = NULL;
-+			    vec *v;
-+			    vec_alloc (v, 2);
-+			    CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
-+			    CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
-+						    build_zero_cst (ltype));
-+			    new_stmt
-+			      = gimple_build_assign (vec_dest,
-+						     build_constructor
-+						       (vectype, v));
-+			  }
- 		      }
- 		    break;
- 		  }
-@@ -8864,7 +8986,7 @@ vect_is_simple_cond (tree cond, vec_info *vinfo,
- 	scalar_type = build_nonstandard_integer_type
- 	  (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
- 	   TYPE_UNSIGNED (scalar_type));
--      *comp_vectype = get_vectype_for_scalar_type (scalar_type);
-+      *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
-     }
- 
-   return true;
-@@ -8881,9 +9003,9 @@ vect_is_simple_cond (tree cond, vec_info *vinfo,
- 
-    Return true if STMT_INFO is vectorizable in this way.  */
- 
--bool
-+static bool
- vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
--			stmt_vec_info *vec_stmt, bool for_reduction,
-+			stmt_vec_info *vec_stmt,
- 			slp_tree slp_node, stmt_vector_for_cost *cost_vec)
- {
-   vec_info *vinfo = stmt_info->vinfo;
-@@ -8913,22 +9035,39 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-   tree vec_cmp_type;
-   bool masked = false;
- 
--  if (for_reduction && STMT_SLP_TYPE (stmt_info))
-+  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
-+    return false;
-+
-+  /* Is vectorizable conditional operation?  */
-+  gassign *stmt = dyn_cast  (stmt_info->stmt);
-+  if (!stmt)
-+    return false;
-+
-+  code = gimple_assign_rhs_code (stmt);
-+  if (code != COND_EXPR)
-     return false;
- 
--  vect_reduction_type reduction_type
--    = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
--  if (reduction_type == TREE_CODE_REDUCTION)
-+  stmt_vec_info reduc_info = NULL;
-+  int reduc_index = -1;
-+  vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
-+  bool for_reduction
-+    = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
-+  if (for_reduction)
-     {
--      if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
-+      if (STMT_SLP_TYPE (stmt_info))
- 	return false;
--
--      if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
--	  && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
--	       && for_reduction))
-+      reduc_info = info_for_reduction (stmt_info);
-+      reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
-+      reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
-+      gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
-+		  || reduc_index != -1);
-+    }
-+  else
-+    {
-+      if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
- 	return false;
- 
--      /* FORNOW: not yet supported.  */
-+      /* FORNOW: only supported as part of a reduction.  */
-       if (STMT_VINFO_LIVE_P (stmt_info))
- 	{
- 	  if (dump_enabled_p ())
-@@ -8938,16 +9077,6 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- 	}
-     }
- 
--  /* Is vectorizable conditional operation?  */
--  gassign *stmt = dyn_cast  (stmt_info->stmt);
--  if (!stmt)
--    return false;
--
--  code = gimple_assign_rhs_code (stmt);
--
--  if (code != COND_EXPR)
--    return false;
--
-   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
-   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
- 
-@@ -8981,7 +9110,7 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-     return false;
- 
-   masked = !COMPARISON_CLASS_P (cond_expr);
--  vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
-+  vec_cmp_type = truth_type_for (comp_vectype);
- 
-   if (vec_cmp_type == NULL_TREE)
-     return false;
-@@ -8993,6 +9122,29 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-       cond_expr1 = TREE_OPERAND (cond_expr, 1);
-     }
- 
-+  /* For conditional reductions, the "then" value needs to be the candidate
-+     value calculated by this iteration while the "else" value needs to be
-+     the result carried over from previous iterations.  If the COND_EXPR
-+     is the other way around, we need to swap it.  */
-+  bool must_invert_cmp_result = false;
-+  if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
-+    {
-+      if (masked)
-+	must_invert_cmp_result = true;
-+      else
-+	{
-+	  bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
-+	  tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
-+	  if (new_code == ERROR_MARK)
-+	    must_invert_cmp_result = true;
-+	  else
-+	    cond_code = new_code;
-+	}
-+      /* Make sure we don't accidentally use the old condition.  */
-+      cond_expr = NULL_TREE;
-+      std::swap (then_clause, else_clause);
-+    }
-+
-   if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
-     {
-       /* Boolean values may have another representation in vectors
-@@ -9053,6 +9205,16 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- 		return false;
- 	    }
- 	}
-+      if (loop_vinfo
-+	  && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
-+	  && reduction_type == EXTRACT_LAST_REDUCTION)
-+	{
-+	  if (dump_enabled_p ())
-+	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-+			     "can't yet use a fully-masked loop for"
-+			     " EXTRACT_LAST_REDUCTION.\n");
-+	  LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
-+	}
-       if (expand_vec_cond_expr_p (vectype, comp_vectype,
- 				     cond_code))
- 	{
-@@ -9082,24 +9244,42 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-   /* Handle cond expr.  */
-   for (j = 0; j < ncopies; j++)
-     {
-+      tree loop_mask = NULL_TREE;
-+      bool swap_cond_operands = false;
-+
-+      /* See whether another part of the vectorized code applies a loop
-+	 mask to the condition, or to its inverse.  */
-+
-+      if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
-+	{
-+	  scalar_cond_masked_key cond (cond_expr, ncopies);
-+	  if (loop_vinfo->scalar_cond_masked_set.contains (cond))
-+	    {
-+	      vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
-+	      loop_mask = vect_get_loop_mask (gsi, masks, ncopies, vectype, j);
-+	    }
-+	  else
-+	    {
-+	      bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
-+	      cond.code = invert_tree_comparison (cond.code, honor_nans);
-+	      if (loop_vinfo->scalar_cond_masked_set.contains (cond))
-+		{
-+		  vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
-+		  loop_mask = vect_get_loop_mask (gsi, masks, ncopies,
-+						  vectype, j);
-+		  cond_code = cond.code;
-+		  swap_cond_operands = true;
-+		}
-+	    }
-+	}
-+
-       stmt_vec_info new_stmt_info = NULL;
-       if (j == 0)
- 	{
-           if (slp_node)
-             {
--              auto_vec ops;
- 	      auto_vec, 4> vec_defs;
--
--	      if (masked)
--		ops.safe_push (cond_expr);
--	      else
--		{
--		  ops.safe_push (cond_expr0);
--		  ops.safe_push (cond_expr1);
--		}
--              ops.safe_push (then_clause);
--              ops.safe_push (else_clause);
--              vect_get_slp_defs (ops, slp_node, &vec_defs);
-+              vect_get_slp_defs (slp_node, &vec_defs);
- 	      vec_oprnds3 = vec_defs.pop ();
- 	      vec_oprnds2 = vec_defs.pop ();
- 	      if (!masked)
-@@ -9159,6 +9339,9 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-           vec_then_clause = vec_oprnds2[i];
-           vec_else_clause = vec_oprnds3[i];
- 
-+	  if (swap_cond_operands)
-+	    std::swap (vec_then_clause, vec_else_clause);
-+
- 	  if (masked)
- 	    vec_compare = vec_cond_lhs;
- 	  else
-@@ -9197,6 +9380,50 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- 		    }
- 		}
- 	    }
-+
-+	  /* If we decided to apply a loop mask to the result of the vector
-+             comparison, AND the comparison with the mask now.  Later passes
-+             should then be able to reuse the AND results between mulitple
-+             vector statements.
-+
-+	     For example:
-+	     for (int i = 0; i < 100; ++i)
-+	       x[i] = y[i] ? z[i] : 10;
-+
-+	     results in following optimized GIMPLE:
-+
-+	     mask__35.8_43 = vect__4.7_41 != { 0, ... };
-+	     vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
-+	     _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
-+	     vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
-+	     vect_iftmp.12_52 = VEC_COND_EXPR ;
-+
-+	     instead of using a masked and unmasked forms of
-+	     vec != { 0, ... } (masked in the MASK_LOAD,
-+	     unmasked in the VEC_COND_EXPR).  */
-+
-+	  if (loop_mask)
-+	    {
-+	      if (COMPARISON_CLASS_P (vec_compare))
-+		{
-+		  tree tmp = make_ssa_name (vec_cmp_type);
-+		  tree op0 = TREE_OPERAND (vec_compare, 0);
-+		  tree op1 = TREE_OPERAND (vec_compare, 1);
-+		  gassign *g = gimple_build_assign (tmp,
-+						    TREE_CODE (vec_compare),
-+						    op0, op1);
-+		  vect_finish_stmt_generation (stmt_info, g, gsi);
-+		  vec_compare = tmp;
-+		}
-+
-+	      tree tmp2 = make_ssa_name (vec_cmp_type);
-+	      gassign *g = gimple_build_assign (tmp2, BIT_AND_EXPR,
-+						vec_compare, loop_mask);
-+	      vect_finish_stmt_generation (stmt_info, g, gsi);
-+	      vec_compare = tmp2;
-+	    }
-+
- 	  if (reduction_type == EXTRACT_LAST_REDUCTION)
- 	    {
- 	      if (!is_gimple_val (vec_compare))
-@@ -9207,6 +9434,15 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- 		  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
- 		  vec_compare = vec_compare_name;
- 		}
-+	      if (must_invert_cmp_result)
-+		{
-+		  tree vec_compare_name = make_ssa_name (vec_cmp_type);
-+		  gassign *new_stmt = gimple_build_assign (vec_compare_name,
-+							   BIT_NOT_EXPR,
-+							   vec_compare);
-+		  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
-+		  vec_compare = vec_compare_name;
-+		}
- 	      gcall *new_stmt = gimple_build_call_internal
- 		(IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
- 		 vec_then_clause);
-@@ -9345,7 +9581,7 @@ vectorizable_comparison (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-   /* Invariant comparison.  */
-   if (!vectype)
-     {
--      vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
-+      vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
-       if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
- 	return false;
-     }
-@@ -9446,12 +9682,8 @@ vectorizable_comparison (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- 	{
- 	  if (slp_node)
- 	    {
--	      auto_vec ops;
- 	      auto_vec, 2> vec_defs;
--
--	      ops.safe_push (rhs1);
--	      ops.safe_push (rhs2);
--	      vect_get_slp_defs (ops, slp_node, &vec_defs);
-+	      vect_get_slp_defs (slp_node, &vec_defs);
- 	      vec_oprnds1 = vec_defs.pop ();
- 	      vec_oprnds0 = vec_defs.pop ();
- 	      if (swap_p)
-@@ -9544,7 +9776,8 @@ vectorizable_comparison (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- 
- static bool
- can_vectorize_live_stmts (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
--			  slp_tree slp_node, stmt_vec_info *vec_stmt,
-+			  slp_tree slp_node, slp_instance slp_node_instance,
-+			  stmt_vec_info *vec_stmt,
- 			  stmt_vector_for_cost *cost_vec)
- {
-   if (slp_node)
-@@ -9554,13 +9787,15 @@ can_vectorize_live_stmts (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
- 	{
- 	  if (STMT_VINFO_LIVE_P (slp_stmt_info)
--	      && !vectorizable_live_operation (slp_stmt_info, gsi, slp_node, i,
-+	      && !vectorizable_live_operation (slp_stmt_info, gsi, slp_node,
-+					       slp_node_instance, i,
- 					       vec_stmt, cost_vec))
- 	    return false;
- 	}
-     }
-   else if (STMT_VINFO_LIVE_P (stmt_info)
--	   && !vectorizable_live_operation (stmt_info, gsi, slp_node, -1,
-+	   && !vectorizable_live_operation (stmt_info, gsi, slp_node,
-+					    slp_node_instance, -1,
- 					    vec_stmt, cost_vec))
-     return false;
- 
-@@ -9736,14 +9971,13 @@ vect_analyze_stmt (stmt_vec_info stmt_info, bool *need_to_vectorize,
- 	  || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
- 				cost_vec)
- 	  || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
--	  || vectorizable_reduction (stmt_info, NULL, NULL, node,
--				     node_instance, cost_vec)
-+	  || vectorizable_reduction (stmt_info, node, node_instance, cost_vec)
- 	  || vectorizable_induction (stmt_info, NULL, NULL, node, cost_vec)
- 	  || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
--	  || vectorizable_condition (stmt_info, NULL, NULL, false, node,
--				     cost_vec)
-+	  || vectorizable_condition (stmt_info, NULL, NULL, node, cost_vec)
- 	  || vectorizable_comparison (stmt_info, NULL, NULL, node,
--				      cost_vec));
-+				      cost_vec)
-+	  || vectorizable_lc_phi (stmt_info, NULL, node));
-   else
-     {
-       if (bb_vinfo)
-@@ -9759,8 +9993,7 @@ vect_analyze_stmt (stmt_vec_info stmt_info, bool *need_to_vectorize,
- 	      || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
- 				    cost_vec)
- 	      || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
--	      || vectorizable_condition (stmt_info, NULL, NULL, false, node,
--					 cost_vec)
-+	      || vectorizable_condition (stmt_info, NULL, NULL, node, cost_vec)
- 	      || vectorizable_comparison (stmt_info, NULL, NULL, node,
- 					  cost_vec));
-     }
-@@ -9775,7 +10008,9 @@ vect_analyze_stmt (stmt_vec_info stmt_info, bool *need_to_vectorize,
-       need extra handling, except for vectorizable reductions.  */
-   if (!bb_vinfo
-       && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
--      && !can_vectorize_live_stmts (stmt_info, NULL, node, NULL, cost_vec))
-+      && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
-+      && !can_vectorize_live_stmts (stmt_info, NULL, node, node_instance,
-+				    NULL, cost_vec))
-     return opt_result::failure_at (stmt_info->stmt,
- 				   "not vectorized:"
- 				   " live stmt not supported: %G",
-@@ -9864,8 +10099,7 @@ vect_transform_stmt (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-       break;
- 
-     case condition_vec_info_type:
--      done = vectorizable_condition (stmt_info, gsi, &vec_stmt, false,
--				     slp_node, NULL);
-+      done = vectorizable_condition (stmt_info, gsi, &vec_stmt, slp_node, NULL);
-       gcc_assert (done);
-       break;
- 
-@@ -9887,8 +10121,18 @@ vect_transform_stmt (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
-       break;
- 
-     case reduc_vec_info_type:
--      done = vectorizable_reduction (stmt_info, gsi, &vec_stmt, slp_node,
--				     slp_node_instance, NULL);
-+      done = vect_transform_reduction (stmt_info, gsi, &vec_stmt, slp_node);
-+      gcc_assert (done);
-+      break;
-+
-+    case cycle_phi_info_type:
-+      done = vect_transform_cycle_phi (stmt_info, &vec_stmt, slp_node,
-+				       slp_node_instance);
-+      gcc_assert (done);
-+      break;
-+
-+    case lc_phi_info_type:
-+      done = vectorizable_lc_phi (stmt_info, &vec_stmt, slp_node);
-       gcc_assert (done);
-       break;
- 
-@@ -9944,19 +10188,66 @@ vect_transform_stmt (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
- 	  }
-     }
- 
--  /* Handle stmts whose DEF is used outside the loop-nest that is
--     being vectorized.  */
--  if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
--    {
--      done = can_vectorize_live_stmts (stmt_info, gsi, slp_node, &vec_stmt,
--				       NULL);
--      gcc_assert (done);
--    }
--
-   if (vec_stmt)
-     STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
- 
--  return is_store;
-+  if (STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
-+    return is_store;
-+
-+  /* If this stmt defines a value used on a backedge, update the
-+     vectorized PHIs.  */
-+  stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
-+  stmt_vec_info reduc_info;
-+  if (STMT_VINFO_REDUC_DEF (orig_stmt_info)
-+      && vect_stmt_to_vectorize (orig_stmt_info) == stmt_info
-+      && (reduc_info = info_for_reduction (orig_stmt_info))
-+      && STMT_VINFO_REDUC_TYPE (reduc_info) != FOLD_LEFT_REDUCTION
-+      && STMT_VINFO_REDUC_TYPE (reduc_info) != EXTRACT_LAST_REDUCTION)
-+    {
-+      gphi *phi;
-+      if (!slp_node
-+	  && (phi = dyn_cast 
-+		      (STMT_VINFO_REDUC_DEF (orig_stmt_info)->stmt))
-+	  && dominated_by_p (CDI_DOMINATORS,
-+			     gimple_bb (orig_stmt_info->stmt), gimple_bb (phi)))
-+	{
-+	  edge e = loop_latch_edge (gimple_bb (phi)->loop_father);
-+	  stmt_vec_info phi_info
-+	    = STMT_VINFO_VEC_STMT (STMT_VINFO_REDUC_DEF (orig_stmt_info));
-+	  stmt_vec_info vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
-+	  do
-+	    {
-+	      add_phi_arg (as_a  (phi_info->stmt),
-+			   gimple_get_lhs (vec_stmt->stmt), e,
-+			   gimple_phi_arg_location (phi, e->dest_idx));
-+	      phi_info = STMT_VINFO_RELATED_STMT (phi_info);
-+	      vec_stmt = STMT_VINFO_RELATED_STMT (vec_stmt);
-+	    }
-+	  while (phi_info);
-+	  gcc_assert (!vec_stmt);
-+	}
-+      else if (slp_node
-+	       && slp_node != slp_node_instance->reduc_phis)
-+	{
-+	  slp_tree phi_node = slp_node_instance->reduc_phis;
-+	  gphi *phi = as_a  (SLP_TREE_SCALAR_STMTS (phi_node)[0]->stmt);
-+	  edge e = loop_latch_edge (gimple_bb (phi)->loop_father);
-+	  gcc_assert (SLP_TREE_VEC_STMTS (phi_node).length ()
-+		      == SLP_TREE_VEC_STMTS (slp_node).length ());
-+	  for (unsigned i = 0; i < SLP_TREE_VEC_STMTS (phi_node).length (); ++i)
-+	    add_phi_arg (as_a  (SLP_TREE_VEC_STMTS (phi_node)[i]->stmt),
-+			 gimple_get_lhs (SLP_TREE_VEC_STMTS (slp_node)[i]->stmt),
-+			 e, gimple_phi_arg_location (phi, e->dest_idx));
-+	}
-+    }
-+
-+  /* Handle stmts whose DEF is used outside the loop-nest that is
-+     being vectorized.  */
-+  done = can_vectorize_live_stmts (stmt_info, gsi, slp_node,
-+				   slp_node_instance, &vec_stmt, NULL);
-+  gcc_assert (done);
-+
-+  return false;
- }
- 
- 
-@@ -9979,18 +10270,28 @@ vect_remove_stores (stmt_vec_info first_stmt_info)
-     }
- }
- 
--/* Function get_vectype_for_scalar_type_and_size.
-+/* If NUNITS is nonzero, return a vector type that contains NUNITS
-+   elements of type SCALAR_TYPE, or null if the target doesn't support
-+   such a type.
- 
--   Returns the vector type corresponding to SCALAR_TYPE  and SIZE as supported
--   by the target.  */
-+   If NUNITS is zero, return a vector type that contains elements of
-+   type SCALAR_TYPE, choosing whichever vector size the target prefers.
-+
-+   If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
-+   for this vectorization region and want to "autodetect" the best choice.
-+   Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
-+   and we want the new type to be interoperable with it.   PREVAILING_MODE
-+   in this case can be a scalar integer mode or a vector mode; when it
-+   is a vector mode, the function acts like a tree-level version of
-+   related_vector_mode.  */
- 
- tree
--get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
-+get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
-+				     tree scalar_type, poly_uint64 nunits)
- {
-   tree orig_scalar_type = scalar_type;
-   scalar_mode inner_mode;
-   machine_mode simd_mode;
--  poly_uint64 nunits;
-   tree vectype;
- 
-   if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
-@@ -10030,19 +10331,45 @@ get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
-   if (scalar_type == NULL_TREE)
-     return NULL_TREE;
- 
--  /* If no size was supplied use the mode the target prefers.   Otherwise
--     lookup a vector mode of the specified size.  */
--  if (known_eq (size, 0U))
--    simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
--  else if (!multiple_p (size, nbytes, &nunits)
--	   || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
--    return NULL_TREE;
--  /* NOTE: nunits == 1 is allowed to support single element vector types.  */
--  if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
--    return NULL_TREE;
-+  /* If no prevailing mode was supplied, use the mode the target prefers.
-+     Otherwise lookup a vector mode based on the prevailing mode.  */
-+  if (prevailing_mode == VOIDmode)
-+    {
-+      gcc_assert (known_eq (nunits, 0U));
-+      simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
-+      if (SCALAR_INT_MODE_P (simd_mode))
-+	{
-+	  /* Traditional behavior is not to take the integer mode
-+	     literally, but simply to use it as a way of determining
-+	     the vector size.  It is up to mode_for_vector to decide
-+	     what the TYPE_MODE should be.
-+
-+	     Note that nunits == 1 is allowed in order to support single
-+	     element vector types.  */
-+	  if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
-+	      || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
-+	    return NULL_TREE;
-+	}
-+    }
-+  else if (SCALAR_INT_MODE_P (prevailing_mode)
-+	   || !related_vector_mode (prevailing_mode,
-+				    inner_mode, nunits).exists (&simd_mode))
-+    {
-+      /* Fall back to using mode_for_vector, mostly in the hope of being
-+	 able to use an integer mode.  */
-+      if (known_eq (nunits, 0U)
-+	  && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
-+	return NULL_TREE;
- 
--  vectype = build_vector_type (scalar_type, nunits);
-+      if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
-+	return NULL_TREE;
-+    }
-+
-+  vectype = build_vector_type_for_mode (scalar_type, simd_mode);
- 
-+  /* In cases where the mode was chosen by mode_for_vector, check that
-+     the target actually supports the chosen mode, or that it at least
-+     allows the vector mode to be replaced by a like-sized integer.  */
-   if (!VECTOR_MODE_P (TYPE_MODE (vectype))
-       && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
-     return NULL_TREE;
-@@ -10056,22 +10383,22 @@ get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
-   return vectype;
- }
- 
--poly_uint64 current_vector_size;
--
- /* Function get_vectype_for_scalar_type.
- 
-    Returns the vector type corresponding to SCALAR_TYPE as supported
-    by the target.  */
- 
- tree
--get_vectype_for_scalar_type (tree scalar_type)
-+get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type)
- {
--  tree vectype;
--  vectype = get_vectype_for_scalar_type_and_size (scalar_type,
--						  current_vector_size);
--  if (vectype
--      && known_eq (current_vector_size, 0U))
--    current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
-+  tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
-+						      scalar_type);
-+  if (vectype && vinfo->vector_mode == VOIDmode)
-+    vinfo->vector_mode = TYPE_MODE (vectype);
-+
-+  if (vectype)
-+    vinfo->used_vector_modes.add (TYPE_MODE (vectype));
-+
-   return vectype;
- }
- 
-@@ -10081,15 +10408,14 @@ get_vectype_for_scalar_type (tree scalar_type)
-    of vectors of specified SCALAR_TYPE as supported by target.  */
- 
- tree
--get_mask_type_for_scalar_type (tree scalar_type)
-+get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type)
- {
--  tree vectype = get_vectype_for_scalar_type (scalar_type);
-+  tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
- 
-   if (!vectype)
-     return NULL;
- 
--  return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
--				  current_vector_size);
-+  return truth_type_for (vectype);
- }
- 
- /* Function get_same_sized_vectype
-@@ -10101,10 +10427,29 @@ tree
- get_same_sized_vectype (tree scalar_type, tree vector_type)
- {
-   if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
--    return build_same_sized_truth_vector_type (vector_type);
-+    return truth_type_for (vector_type);
-+
-+  poly_uint64 nunits;
-+  if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
-+		   GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
-+    return NULL_TREE;
-+
-+  return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
-+					      scalar_type, nunits);
-+}
-+
-+/* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
-+   would not change the chosen vector modes.  */
- 
--  return get_vectype_for_scalar_type_and_size
--	   (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
-+bool
-+vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
-+{
-+  for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
-+       i != vinfo->used_vector_modes.end (); ++i)
-+    if (!VECTOR_MODE_P (*i)
-+	|| related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
-+      return false;
-+  return true;
- }
- 
- /* Function vect_is_simple_use.
-@@ -10492,11 +10837,8 @@ supportable_widening_operation (enum tree_code code, stmt_vec_info stmt_info,
-     {
-       intermediate_mode = insn_data[icode1].operand[0].mode;
-       if (VECTOR_BOOLEAN_TYPE_P (prev_type))
--	{
--	  intermediate_type = vect_halve_mask_nunits (prev_type);
--	  if (intermediate_mode != TYPE_MODE (intermediate_type))
--	    return false;
--	}
-+	intermediate_type
-+	  = vect_halve_mask_nunits (prev_type, intermediate_mode);
-       else
- 	intermediate_type
- 	  = lang_hooks.types.type_for_mode (intermediate_mode,
-@@ -10680,11 +11022,8 @@ supportable_narrowing_operation (enum tree_code code,
-     {
-       intermediate_mode = insn_data[icode1].operand[0].mode;
-       if (VECTOR_BOOLEAN_TYPE_P (prev_type))
--	{
--	  intermediate_type = vect_double_mask_nunits (prev_type);
--	  if (intermediate_mode != TYPE_MODE (intermediate_type))
--	    return false;
--	}
-+	intermediate_type
-+	  = vect_double_mask_nunits (prev_type, intermediate_mode);
-       else
- 	intermediate_type
- 	  = lang_hooks.types.type_for_mode (intermediate_mode, uns);
-@@ -10777,6 +11116,7 @@ vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
- 				tree *stmt_vectype_out,
- 				tree *nunits_vectype_out)
- {
-+  vec_info *vinfo = stmt_info->vinfo;
-   gimple *stmt = stmt_info->stmt;
- 
-   *stmt_vectype_out = NULL_TREE;
-@@ -10810,7 +11150,12 @@ vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
-   tree vectype;
-   tree scalar_type = NULL_TREE;
-   if (STMT_VINFO_VECTYPE (stmt_info))
--    *stmt_vectype_out = vectype = STMT_VINFO_VECTYPE (stmt_info);
-+    {
-+      *stmt_vectype_out = vectype = STMT_VINFO_VECTYPE (stmt_info);
-+      if (dump_enabled_p ())
-+	dump_printf_loc (MSG_NOTE, vect_location,
-+			 "precomputed vectype: %T\n", vectype);
-+    }
-   else
-     {
-       gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
-@@ -10842,8 +11187,8 @@ vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
- 
-       if (dump_enabled_p ())
- 	dump_printf_loc (MSG_NOTE, vect_location,
--			 "get vectype for scalar type:  %T\n", scalar_type);
--      vectype = get_vectype_for_scalar_type (scalar_type);
-+			 "get vectype for scalar type: %T\n", scalar_type);
-+      vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
-       if (!vectype)
- 	return opt_result::failure_at (stmt,
- 				       "not vectorized:"
-@@ -10859,42 +11204,38 @@ vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
- 
-   /* Don't try to compute scalar types if the stmt produces a boolean
-      vector; use the existing vector type instead.  */
--  tree nunits_vectype;
--  if (VECTOR_BOOLEAN_TYPE_P (vectype))
--    nunits_vectype = vectype;
--  else
-+  tree nunits_vectype = vectype;
-+  if (!VECTOR_BOOLEAN_TYPE_P (vectype)
-+      && *stmt_vectype_out != boolean_type_node)
-     {
-       /* The number of units is set according to the smallest scalar
- 	 type (or the largest vector size, but we only support one
- 	 vector size per vectorization).  */
--      if (*stmt_vectype_out != boolean_type_node)
-+      HOST_WIDE_INT dummy;
-+      scalar_type = vect_get_smallest_scalar_type (stmt_info, &dummy, &dummy);
-+      if (scalar_type != TREE_TYPE (vectype))
- 	{
--	  HOST_WIDE_INT dummy;
--	  scalar_type = vect_get_smallest_scalar_type (stmt_info,
--						       &dummy, &dummy);
-+	  if (dump_enabled_p ())
-+	    dump_printf_loc (MSG_NOTE, vect_location,
-+			     "get vectype for smallest scalar type: %T\n",
-+			     scalar_type);
-+	  nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
-+	  if (!nunits_vectype)
-+	    return opt_result::failure_at
-+	      (stmt, "not vectorized: unsupported data-type %T\n",
-+	       scalar_type);
-+	  if (dump_enabled_p ())
-+	    dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
-+			     nunits_vectype);
- 	}
--      if (dump_enabled_p ())
--	dump_printf_loc (MSG_NOTE, vect_location,
--			 "get vectype for scalar type:  %T\n", scalar_type);
--      nunits_vectype = get_vectype_for_scalar_type (scalar_type);
-     }
--  if (!nunits_vectype)
--    return opt_result::failure_at (stmt,
--				   "not vectorized: unsupported data-type %T\n",
--				   scalar_type);
- 
--  if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
--		GET_MODE_SIZE (TYPE_MODE (nunits_vectype))))
--    return opt_result::failure_at (stmt,
--				   "not vectorized: different sized vector "
--				   "types in statement, %T and %T\n",
--				   vectype, nunits_vectype);
-+  gcc_assert (*stmt_vectype_out == boolean_type_node
-+	      || multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
-+			     TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)));
- 
-   if (dump_enabled_p ())
-     {
--      dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n",
--		       nunits_vectype);
--
-       dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
-       dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
-       dump_printf (MSG_NOTE, "\n");
-@@ -10911,6 +11252,7 @@ vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
- opt_tree
- vect_get_mask_type_for_stmt (stmt_vec_info stmt_info)
- {
-+  vec_info *vinfo = stmt_info->vinfo;
-   gimple *stmt = stmt_info->stmt;
-   tree mask_type = NULL;
-   tree vectype, scalar_type;
-@@ -10920,7 +11262,7 @@ vect_get_mask_type_for_stmt (stmt_vec_info stmt_info)
-       && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt))))
-     {
-       scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
--      mask_type = get_mask_type_for_scalar_type (scalar_type);
-+      mask_type = get_mask_type_for_scalar_type (vinfo, scalar_type);
- 
-       if (!mask_type)
- 	return opt_tree::failure_at (stmt,
-@@ -10968,7 +11310,7 @@ vect_get_mask_type_for_stmt (stmt_vec_info stmt_info)
- 	  && !VECTOR_BOOLEAN_TYPE_P (mask_type)
- 	  && gimple_code (stmt) == GIMPLE_ASSIGN
- 	  && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison)
--	mask_type = build_same_sized_truth_vector_type (mask_type);
-+	mask_type = truth_type_for (mask_type);
-     }
- 
-   /* No mask_type should mean loop invariant predicate.
-diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
-index d89ec3b7c76..c2c6377d3f9 100644
---- a/gcc/tree-vectorizer.c
-+++ b/gcc/tree-vectorizer.c
-@@ -639,8 +639,11 @@ vec_info::new_stmt_vec_info (gimple *stmt)
-   STMT_VINFO_TYPE (res) = undef_vec_info_type;
-   STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
-   STMT_VINFO_VECTORIZABLE (res) = true;
--  STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
--  STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
-+  STMT_VINFO_REDUC_TYPE (res) = TREE_CODE_REDUCTION;
-+  STMT_VINFO_REDUC_CODE (res) = ERROR_MARK;
-+  STMT_VINFO_REDUC_FN (res) = IFN_LAST;
-+  STMT_VINFO_REDUC_IDX (res) = -1;
-+  STMT_VINFO_SLP_VECT_ONLY (res) = false;
- 
-   if (gimple_code (stmt) == GIMPLE_PHI
-       && is_loop_header_bb_p (gimple_bb (stmt)))
-@@ -862,8 +865,7 @@ set_uid_loop_bbs (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
- 
- static unsigned
- try_vectorize_loop_1 (hash_table *&simduid_to_vf_htab,
--		      unsigned *num_vectorized_loops,
--		      loop_p loop, loop_vec_info orig_loop_vinfo,
-+		      unsigned *num_vectorized_loops, loop_p loop,
- 		      gimple *loop_vectorized_call,
- 		      gimple *loop_dist_alias_call)
- {
-@@ -871,6 +873,7 @@ try_vectorize_loop_1 (hash_table *&simduid_to_vf_htab,
-   vec_info_shared shared;
-   auto_purge_vect_location sentinel;
-   vect_location = find_loop_location (loop);
-+
-   if (LOCATION_LOCUS (vect_location.get_location_t ()) != UNKNOWN_LOCATION
-       && dump_enabled_p ())
-     dump_printf (MSG_NOTE | MSG_PRIORITY_INTERNALS,
-@@ -878,10 +881,17 @@ try_vectorize_loop_1 (hash_table *&simduid_to_vf_htab,
- 		 LOCATION_FILE (vect_location.get_location_t ()),
- 		 LOCATION_LINE (vect_location.get_location_t ()));
- 
--  /* Try to analyze the loop, retaining an opt_problem if dump_enabled_p.  */
--  opt_loop_vec_info loop_vinfo
--    = vect_analyze_loop (loop, orig_loop_vinfo, &shared);
--  loop->aux = loop_vinfo;
-+  opt_loop_vec_info loop_vinfo = opt_loop_vec_info::success (NULL);
-+  /* In the case of epilogue vectorization the loop already has its
-+     loop_vec_info set, we do not require to analyze the loop in this case.  */
-+  if (loop_vec_info vinfo = loop_vec_info_for_loop (loop))
-+    loop_vinfo = opt_loop_vec_info::success (vinfo);
-+  else
-+    {
-+      /* Try to analyze the loop, retaining an opt_problem if dump_enabled_p.  */
-+      loop_vinfo = vect_analyze_loop (loop, &shared);
-+      loop->aux = loop_vinfo;
-+    }
- 
-   if (!loop_vinfo)
-     if (dump_enabled_p ())
-@@ -968,7 +978,7 @@ try_vectorize_loop_1 (hash_table *&simduid_to_vf_htab,
-   unsigned HOST_WIDE_INT bytes;
-   if (dump_enabled_p ())
-     {
--      if (current_vector_size.is_constant (&bytes))
-+      if (GET_MODE_SIZE (loop_vinfo->vector_mode).is_constant (&bytes))
- 	dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
- 			 "loop vectorized using %wu byte vectors\n", bytes);
-       else
-@@ -1009,8 +1019,13 @@ try_vectorize_loop_1 (hash_table *&simduid_to_vf_htab,
- 
-   /* Epilogue of vectorized loop must be vectorized too.  */
-   if (new_loop)
--    ret |= try_vectorize_loop_1 (simduid_to_vf_htab, num_vectorized_loops,
--				 new_loop, loop_vinfo, NULL, NULL);
-+    {
-+      /* Don't include vectorized epilogues in the "vectorized loops" count.
-+       */
-+      unsigned dont_count = *num_vectorized_loops;
-+      ret |= try_vectorize_loop_1 (simduid_to_vf_htab, &dont_count,
-+				   new_loop, NULL, NULL);
-+    }
- 
-   return ret;
- }
-@@ -1026,8 +1041,7 @@ try_vectorize_loop (hash_table *&simduid_to_vf_htab,
- 	|| loop->force_vectorize))
-     return 0;
- 
--  return try_vectorize_loop_1 (simduid_to_vf_htab, num_vectorized_loops,
--			       loop, NULL,
-+  return try_vectorize_loop_1 (simduid_to_vf_htab, num_vectorized_loops, loop,
- 			       vect_loop_vectorized_call (loop),
- 			       vect_loop_dist_alias_call (loop));
- }
-@@ -1344,7 +1358,8 @@ get_vec_alignment_for_array_type (tree type)
-   gcc_assert (TREE_CODE (type) == ARRAY_TYPE);
-   poly_uint64 array_size, vector_size;
- 
--  tree vectype = get_vectype_for_scalar_type (strip_array_types (type));
-+  tree scalar_type = strip_array_types (type);
-+  tree vectype = get_related_vectype_for_scalar_type (VOIDmode, scalar_type);
-   if (!vectype
-       || !poly_int_tree_p (TYPE_SIZE (type), &array_size)
-       || !poly_int_tree_p (TYPE_SIZE (vectype), &vector_size)
-@@ -1512,3 +1527,36 @@ make_pass_ipa_increase_alignment (gcc::context *ctxt)
- {
-   return new pass_ipa_increase_alignment (ctxt);
- }
-+
-+/* If the condition represented by T is a comparison or the SSA name
-+   result of a comparison, extract the comparison's operands.  Represent
-+   T as NE_EXPR  otherwise.  */
-+
-+void
-+scalar_cond_masked_key::get_cond_ops_from_tree (tree t)
-+{
-+  if (TREE_CODE_CLASS (TREE_CODE (t)) == tcc_comparison)
-+    {
-+      this->code = TREE_CODE (t);
-+      this->op0 = TREE_OPERAND (t, 0);
-+      this->op1 = TREE_OPERAND (t, 1);
-+      return;
-+    }
-+
-+  if (TREE_CODE (t) == SSA_NAME)
-+    if (gassign *stmt = dyn_cast (SSA_NAME_DEF_STMT (t)))
-+      {
-+	tree_code code = gimple_assign_rhs_code (stmt);
-+	if (TREE_CODE_CLASS (code) == tcc_comparison)
-+	  {
-+	    this->code = code;
-+	    this->op0 = gimple_assign_rhs1 (stmt);
-+	    this->op1 = gimple_assign_rhs2 (stmt);
-+	    return;
-+	  }
-+      }
-+
-+  this->code = NE_EXPR;
-+  this->op0 = t;
-+  this->op1 = build_zero_cst (TREE_TYPE (t));
-+}
-diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
-index 148b9a7f215..c46e2742c36 100644
---- a/gcc/tree-vectorizer.h
-+++ b/gcc/tree-vectorizer.h
-@@ -26,6 +26,7 @@ typedef struct _stmt_vec_info *stmt_vec_info;
- #include "tree-data-ref.h"
- #include "tree-hash-traits.h"
- #include "target.h"
-+#include 
- 
- /* Used for naming of new temporaries.  */
- enum vect_var_kind {
-@@ -120,6 +121,8 @@ struct _slp_tree {
-   vec children;
-   /* A group of scalar stmts to be vectorized together.  */
-   vec stmts;
-+  /* A group of scalar operands to be vectorized together.  */
-+  vec ops;
-   /* Load permutation relative to the stores, NULL if there is no
-      permutation.  */
-   vec load_permutation;
-@@ -170,13 +173,82 @@ typedef struct _slp_instance {
- 
- #define SLP_TREE_CHILDREN(S)                     (S)->children
- #define SLP_TREE_SCALAR_STMTS(S)                 (S)->stmts
-+#define SLP_TREE_SCALAR_OPS(S)                   (S)->ops
- #define SLP_TREE_VEC_STMTS(S)                    (S)->vec_stmts
- #define SLP_TREE_NUMBER_OF_VEC_STMTS(S)          (S)->vec_stmts_size
- #define SLP_TREE_LOAD_PERMUTATION(S)             (S)->load_permutation
- #define SLP_TREE_TWO_OPERATORS(S)		 (S)->two_operators
- #define SLP_TREE_DEF_TYPE(S)			 (S)->def_type
- 
-+/* Key for map that records association between
-+   scalar conditions and corresponding loop mask, and
-+   is populated by vect_record_loop_mask.  */
- 
-+struct scalar_cond_masked_key
-+{
-+  scalar_cond_masked_key (tree t, unsigned ncopies_)
-+    : ncopies (ncopies_)
-+  {
-+    get_cond_ops_from_tree (t);
-+  }
-+
-+  void get_cond_ops_from_tree (tree);
-+
-+  unsigned ncopies;
-+  tree_code code;
-+  tree op0;
-+  tree op1;
-+};
-+
-+template<>
-+struct default_hash_traits
-+{
-+  typedef scalar_cond_masked_key compare_type;
-+  typedef scalar_cond_masked_key value_type;
-+
-+  static inline hashval_t
-+  hash (value_type v)
-+  {
-+    inchash::hash h;
-+    h.add_int (v.code);
-+    inchash::add_expr (v.op0, h, 0);
-+    inchash::add_expr (v.op1, h, 0);
-+    h.add_int (v.ncopies);
-+    return h.end ();
-+  }
-+
-+  static inline bool
-+  equal (value_type existing, value_type candidate)
-+  {
-+    return (existing.ncopies == candidate.ncopies
-+           && existing.code == candidate.code
-+           && operand_equal_p (existing.op0, candidate.op0, 0)
-+           && operand_equal_p (existing.op1, candidate.op1, 0));
-+  }
-+
-+  static inline void
-+  mark_empty (value_type &v)
-+  {
-+    v.ncopies = 0;
-+  }
-+
-+  static inline bool
-+  is_empty (value_type v)
-+  {
-+    return v.ncopies == 0;
-+  }
-+
-+  static inline void mark_deleted (value_type &) {}
-+
-+  static inline bool is_deleted (const value_type &)
-+  {
-+    return false;
-+  }
-+
-+  static inline void remove (value_type &) {}
-+};
-+
-+typedef hash_set scalar_cond_masked_set_type;
- 
- /* Describes two objects whose addresses must be unequal for the vectorized
-    loop to be valid.  */
-@@ -217,6 +289,7 @@ struct vec_info_shared {
- 
- /* Vectorizer state common between loop and basic-block vectorization.  */
- struct vec_info {
-+  typedef hash_set > mode_set;
-   enum vec_kind { bb, loop };
- 
-   vec_info (vec_kind, void *, vec_info_shared *);
-@@ -254,6 +327,14 @@ struct vec_info {
-   /* Cost data used by the target cost model.  */
-   void *target_cost_data;
- 
-+  /* The set of vector modes used in the vectorized region.  */
-+  mode_set used_vector_modes;
-+
-+  /* The argument we should pass to related_vector_mode when looking up
-+     the vector mode for a scalar mode, or VOIDmode if we haven't yet
-+     made any decisions about which vector modes to use.  */
-+  machine_mode vector_mode;
-+
- private:
-   stmt_vec_info new_stmt_vec_info (gimple *stmt);
-   void set_vinfo_for_stmt (gimple *, stmt_vec_info);
-@@ -377,6 +458,8 @@ struct rgroup_masks {
- 
- typedef auto_vec vec_loop_masks;
- 
-+typedef auto_vec > drs_init_vec;
-+
- /*-----------------------------------------------------------------*/
- /* Info on vectorized loops.                                       */
- /*-----------------------------------------------------------------*/
-@@ -399,7 +482,7 @@ typedef struct _loop_vec_info : public vec_info {
-   /* Condition under which this loop is analyzed and versioned.  */
-   tree num_iters_assumptions;
- 
--  /* Threshold of number of iterations below which vectorzation will not be
-+  /* Threshold of number of iterations below which vectorization will not be
-      performed. It is calculated from MIN_PROFITABLE_ITERS and
-      PARAM_MIN_VECT_LOOP_BOUND.  */
-   unsigned int th;
-@@ -421,6 +504,9 @@ typedef struct _loop_vec_info : public vec_info {
-      on inactive scalars.  */
-   vec_loop_masks masks;
- 
-+  /* Set of scalar conditions that have loop mask applied.  */
-+  scalar_cond_masked_set_type scalar_cond_masked_set;
-+
-   /* If we are using a loop mask to align memory addresses, this variable
-      contains the number of vector elements that we should skip in the
-      first iteration of the vector loop (i.e. the number of leading
-@@ -497,6 +583,13 @@ typedef struct _loop_vec_info : public vec_info {
-   /* Cost of a single scalar iteration.  */
-   int single_scalar_iteration_cost;
- 
-+  /* The cost of the vector prologue and epilogue, including peeled
-+     iterations and set-up code.  */
-+  int vec_outside_cost;
-+
-+  /* The cost of the vector loop body.  */
-+  int vec_inside_cost;
-+
-   /* Is the loop vectorizable? */
-   bool vectorizable;
- 
-@@ -551,6 +644,10 @@ typedef struct _loop_vec_info : public vec_info {
-      this points to the original vectorized loop.  Otherwise NULL.  */
-   _loop_vec_info *orig_loop_info;
- 
-+  /* Used to store loop_vec_infos of epilogues of this loop during
-+     analysis.  */
-+  vec<_loop_vec_info *> epilogue_vinfos;
-+
- } *loop_vec_info;
- 
- /* Access Functions.  */
-@@ -682,6 +779,8 @@ enum stmt_vec_info_type {
-   type_promotion_vec_info_type,
-   type_demotion_vec_info_type,
-   type_conversion_vec_info_type,
-+  cycle_phi_info_type,
-+  lc_phi_info_type,
-   loop_exit_ctrl_vec_info_type
- };
- 
-@@ -917,21 +1016,42 @@ struct _stmt_vec_info {
-      for loop vectorization.  */
-   vect_memory_access_type memory_access_type;
- 
--  /* For reduction loops, this is the type of reduction.  */
--  enum vect_reduction_type v_reduc_type;
-+  /* For INTEGER_INDUC_COND_REDUCTION, the initial value to be used.  */
-+  tree induc_cond_initial_val;
- 
--  /* For CONST_COND_REDUCTION, record the reduc code.  */
--  enum tree_code const_cond_reduc_code;
-+  /* If not NULL the value to be added to compute final reduction value.  */
-+  tree reduc_epilogue_adjustment;
- 
-   /* On a reduction PHI the reduction type as detected by
--     vect_force_simple_reduction.  */
-+     vect_is_simple_reduction and vectorizable_reduction.  */
-   enum vect_reduction_type reduc_type;
- 
-+  /* The original reduction code, to be used in the epilogue.  */
-+  enum tree_code reduc_code;
-+  /* An internal function we should use in the epilogue.  */
-+  internal_fn reduc_fn;
-+
-+  /* On a stmt participating in the reduction the index of the operand
-+     on the reduction SSA cycle.  */
-+  int reduc_idx;
-+
-   /* On a reduction PHI the def returned by vect_force_simple_reduction.
-      On the def returned by vect_force_simple_reduction the
-      corresponding PHI.  */
-   stmt_vec_info reduc_def;
- 
-+  /* The vector input type relevant for reduction vectorization.  */
-+  tree reduc_vectype_in;
-+
-+  /* The vector type for performing the actual reduction.  */
-+  tree reduc_vectype;
-+
-+  /* Whether we force a single cycle PHI during reduction vectorization.  */
-+  bool force_single_cycle;
-+
-+  /* Whether on this stmt reduction meta is recorded.  */
-+  bool is_reduc_info;
-+
-   /* The number of scalar stmt references from active SLP instances.  */
-   unsigned int num_slp_uses;
- 
-@@ -949,6 +1069,9 @@ struct _stmt_vec_info {
-      and OPERATION_BITS without changing the result.  */
-   unsigned int operation_precision;
-   signop operation_sign;
-+
-+  /* True if this is only suitable for SLP vectorization.  */
-+  bool slp_vect_only_p;
- };
- 
- /* Information about a gather/scatter call.  */
-@@ -1011,8 +1134,10 @@ STMT_VINFO_BB_VINFO (stmt_vec_info stmt_vinfo)
- #define STMT_VINFO_STRIDED_P(S)	   	   (S)->strided_p
- #define STMT_VINFO_MEMORY_ACCESS_TYPE(S)   (S)->memory_access_type
- #define STMT_VINFO_SIMD_LANE_ACCESS_P(S)   (S)->simd_lane_access_p
--#define STMT_VINFO_VEC_REDUCTION_TYPE(S)   (S)->v_reduc_type
--#define STMT_VINFO_VEC_CONST_COND_REDUC_CODE(S) (S)->const_cond_reduc_code
-+#define STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL(S) (S)->induc_cond_initial_val
-+#define STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT(S) (S)->reduc_epilogue_adjustment
-+#define STMT_VINFO_REDUC_IDX(S)		   (S)->reduc_idx
-+#define STMT_VINFO_FORCE_SINGLE_CYCLE(S)   (S)->force_single_cycle
- 
- #define STMT_VINFO_DR_WRT_VEC_LOOP(S)      (S)->dr_wrt_vec_loop
- #define STMT_VINFO_DR_BASE_ADDRESS(S)      (S)->dr_wrt_vec_loop.base_address
-@@ -1043,7 +1168,12 @@ STMT_VINFO_BB_VINFO (stmt_vec_info stmt_vinfo)
- #define STMT_VINFO_MIN_NEG_DIST(S)	(S)->min_neg_dist
- #define STMT_VINFO_NUM_SLP_USES(S)	(S)->num_slp_uses
- #define STMT_VINFO_REDUC_TYPE(S)	(S)->reduc_type
-+#define STMT_VINFO_REDUC_CODE(S)	(S)->reduc_code
-+#define STMT_VINFO_REDUC_FN(S)		(S)->reduc_fn
- #define STMT_VINFO_REDUC_DEF(S)		(S)->reduc_def
-+#define STMT_VINFO_REDUC_VECTYPE(S)     (S)->reduc_vectype
-+#define STMT_VINFO_REDUC_VECTYPE_IN(S)  (S)->reduc_vectype_in
-+#define STMT_VINFO_SLP_VECT_ONLY(S)     (S)->slp_vect_only_p
- 
- #define DR_GROUP_FIRST_ELEMENT(S) \
-   (gcc_checking_assert ((S)->dr_aux.dr), (S)->first_element)
-@@ -1358,7 +1488,7 @@ vect_get_num_copies (loop_vec_info loop_vinfo, tree vectype)
- static inline void
- vect_update_max_nunits (poly_uint64 *max_nunits, poly_uint64 nunits)
- {
--  /* All unit counts have the form current_vector_size * X for some
-+  /* All unit counts have the form vec_info::vector_size * X for some
-      rational X, so two unit sizes must have a common multiple.
-      Everything is a multiple of the initial value of 1.  */
-   *max_nunits = force_common_multiple (*max_nunits, nunits);
-@@ -1466,20 +1596,22 @@ extern void vect_set_loop_condition (struct loop *, loop_vec_info,
- extern bool slpeel_can_duplicate_loop_p (const struct loop *, const_edge);
- struct loop *slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *,
- 						     struct loop *, edge);
--struct loop *vect_loop_versioning (loop_vec_info, unsigned int, bool,
--				   poly_uint64);
-+struct loop *vect_loop_versioning (loop_vec_info);
- extern struct loop *vect_do_peeling (loop_vec_info, tree, tree,
--				     tree *, tree *, tree *, int, bool, bool);
-+				    tree *, tree *, tree *, int, bool, bool,
-+				    tree *, drs_init_vec &);
- extern void vect_prepare_for_masked_peels (loop_vec_info);
- extern dump_user_location_t find_loop_location (struct loop *);
- extern bool vect_can_advance_ivs_p (loop_vec_info);
-+extern void vect_update_inits_of_drs (loop_vec_info, tree, tree_code);
- 
- /* In tree-vect-stmts.c.  */
--extern poly_uint64 current_vector_size;
--extern tree get_vectype_for_scalar_type (tree);
--extern tree get_vectype_for_scalar_type_and_size (tree, poly_uint64);
--extern tree get_mask_type_for_scalar_type (tree);
-+extern tree get_related_vectype_for_scalar_type (machine_mode, tree,
-+						 poly_uint64 = 0);
-+extern tree get_vectype_for_scalar_type (vec_info *, tree);
-+extern tree get_mask_type_for_scalar_type (vec_info *, tree);
- extern tree get_same_sized_vectype (tree, tree);
-+extern bool vect_chooses_same_modes_p (vec_info *, machine_mode);
- extern bool vect_get_loop_mask_type (loop_vec_info);
- extern bool vect_is_simple_use (tree, vec_info *, enum vect_def_type *,
- 				stmt_vec_info * = NULL, gimple ** = NULL);
-@@ -1491,15 +1623,15 @@ extern bool supportable_widening_operation (enum tree_code, stmt_vec_info,
- 					    enum tree_code *, int *,
- 					    vec *);
- extern bool supportable_narrowing_operation (enum tree_code, tree, tree,
--					     enum tree_code *,
--					     int *, vec *);
-+					     enum tree_code *, int *,
-+					     vec *);
- extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
- 				  enum vect_cost_for_stmt, stmt_vec_info,
- 				  int, enum vect_cost_model_location);
- extern stmt_vec_info vect_finish_replace_stmt (stmt_vec_info, gimple *);
- extern stmt_vec_info vect_finish_stmt_generation (stmt_vec_info, gimple *,
- 						  gimple_stmt_iterator *);
--extern opt_result vect_mark_stmts_to_be_vectorized (loop_vec_info);
-+extern opt_result vect_mark_stmts_to_be_vectorized (loop_vec_info, bool *);
- extern tree vect_get_store_rhs (stmt_vec_info);
- extern tree vect_get_vec_def_for_operand_1 (stmt_vec_info, enum vect_def_type);
- extern tree vect_get_vec_def_for_operand (tree, stmt_vec_info, tree = NULL);
-@@ -1515,19 +1647,13 @@ extern bool vect_transform_stmt (stmt_vec_info, gimple_stmt_iterator *,
- extern void vect_remove_stores (stmt_vec_info);
- extern opt_result vect_analyze_stmt (stmt_vec_info, bool *, slp_tree,
- 				     slp_instance, stmt_vector_for_cost *);
--extern bool vectorizable_condition (stmt_vec_info, gimple_stmt_iterator *,
--				    stmt_vec_info *, bool, slp_tree,
--				    stmt_vector_for_cost *);
--extern bool vectorizable_shift (stmt_vec_info, gimple_stmt_iterator *,
--				stmt_vec_info *, slp_tree,
--				stmt_vector_for_cost *);
- extern void vect_get_load_cost (stmt_vec_info, int, bool,
- 				unsigned int *, unsigned int *,
- 				stmt_vector_for_cost *,
- 				stmt_vector_for_cost *, bool);
- extern void vect_get_store_cost (stmt_vec_info, int,
- 				 unsigned int *, stmt_vector_for_cost *);
--extern bool vect_supportable_shift (enum tree_code, tree);
-+extern bool vect_supportable_shift (vec_info *, enum tree_code, tree);
- extern tree vect_gen_perm_mask_any (tree, const vec_perm_indices &);
- extern tree vect_gen_perm_mask_checked (tree, const vec_perm_indices &);
- extern void optimize_mask_stores (struct loop*);
-@@ -1557,7 +1683,7 @@ extern bool vect_check_gather_scatter (stmt_vec_info, loop_vec_info,
- 				       gather_scatter_info *);
- extern opt_result vect_find_stmt_data_reference (loop_p, gimple *,
- 						 vec *);
--extern opt_result vect_analyze_data_refs (vec_info *, poly_uint64 *);
-+extern opt_result vect_analyze_data_refs (vec_info *, poly_uint64 *, bool *);
- extern void vect_record_base_alignments (vec_info *);
- extern tree vect_create_data_ref_ptr (stmt_vec_info, tree, struct loop *, tree,
- 				      tree *, gimple_stmt_iterator *,
-@@ -1586,40 +1712,43 @@ extern tree vect_create_addr_base_for_vector_ref (stmt_vec_info, gimple_seq *,
- 						  tree, tree = NULL_TREE);
- 
- /* In tree-vect-loop.c.  */
--/* FORNOW: Used in tree-parloops.c.  */
--extern stmt_vec_info vect_force_simple_reduction (loop_vec_info, stmt_vec_info,
--						  bool *, bool);
--/* Used in gimple-loop-interchange.c.  */
-+/* Used in tree-vect-loop-manip.c */
-+extern void determine_peel_for_niter (loop_vec_info);
-+/* Used in gimple-loop-interchange.c and tree-parloops.c.  */
- extern bool check_reduction_path (dump_user_location_t, loop_p, gphi *, tree,
- 				  enum tree_code);
-+extern bool needs_fold_left_reduction_p (tree, tree_code);
- /* Drive for loop analysis stage.  */
--extern opt_loop_vec_info vect_analyze_loop (struct loop *,
--					    loop_vec_info,
--					    vec_info_shared *);
-+extern opt_loop_vec_info vect_analyze_loop (struct loop *, vec_info_shared *);
- extern tree vect_build_loop_niters (loop_vec_info, bool * = NULL);
- extern void vect_gen_vector_loop_niters (loop_vec_info, tree, tree *,
- 					 tree *, bool);
--extern tree vect_halve_mask_nunits (tree);
--extern tree vect_double_mask_nunits (tree);
-+extern tree vect_halve_mask_nunits (tree, machine_mode);
-+extern tree vect_double_mask_nunits (tree, machine_mode);
- extern void vect_record_loop_mask (loop_vec_info, vec_loop_masks *,
--				   unsigned int, tree);
-+				   unsigned int, tree, tree);
- extern tree vect_get_loop_mask (gimple_stmt_iterator *, vec_loop_masks *,
- 				unsigned int, tree, unsigned int);
-+extern stmt_vec_info info_for_reduction (stmt_vec_info);
- 
- /* Drive for loop transformation stage.  */
- extern struct loop *vect_transform_loop (loop_vec_info);
- extern opt_loop_vec_info vect_analyze_loop_form (struct loop *,
- 						 vec_info_shared *);
- extern bool vectorizable_live_operation (stmt_vec_info, gimple_stmt_iterator *,
--					 slp_tree, int, stmt_vec_info *,
-+					 slp_tree, slp_instance, int,
-+					 stmt_vec_info *,
- 					 stmt_vector_for_cost *);
--extern bool vectorizable_reduction (stmt_vec_info, gimple_stmt_iterator *,
--				    stmt_vec_info *, slp_tree, slp_instance,
-+extern bool vectorizable_reduction (stmt_vec_info, slp_tree, slp_instance,
- 				    stmt_vector_for_cost *);
- extern bool vectorizable_induction (stmt_vec_info, gimple_stmt_iterator *,
- 				    stmt_vec_info *, slp_tree,
- 				    stmt_vector_for_cost *);
--extern tree get_initial_def_for_reduction (stmt_vec_info, tree, tree *);
-+extern bool vect_transform_reduction (stmt_vec_info, gimple_stmt_iterator *,
-+				      stmt_vec_info *, slp_tree);
-+extern bool vect_transform_cycle_phi (stmt_vec_info, stmt_vec_info *,
-+				      slp_tree, slp_instance);
-+extern bool vectorizable_lc_phi (stmt_vec_info, stmt_vec_info *, slp_tree);
- extern bool vect_worthwhile_without_simd_p (vec_info *, tree_code);
- extern int vect_get_known_peeling_cost (loop_vec_info, int, int *,
- 					stmt_vector_for_cost *,
-@@ -1637,15 +1766,16 @@ extern void vect_schedule_slp (vec_info *);
- extern opt_result vect_analyze_slp (vec_info *, unsigned);
- extern bool vect_make_slp_decision (loop_vec_info);
- extern void vect_detect_hybrid_slp (loop_vec_info);
--extern void vect_get_slp_defs (vec , slp_tree, vec > *);
-+extern void vect_get_slp_defs (slp_tree, vec > *, unsigned n = -1U);
- extern bool vect_slp_bb (basic_block);
- extern stmt_vec_info vect_find_last_scalar_stmt_in_slp (slp_tree);
- extern bool is_simple_and_all_uses_invariant (stmt_vec_info, loop_vec_info);
--extern bool can_duplicate_and_interleave_p (unsigned int, machine_mode,
-+extern bool can_duplicate_and_interleave_p (vec_info *, unsigned int,
-+					    machine_mode,
- 					    unsigned int * = NULL,
- 					    tree * = NULL, tree * = NULL);
--extern void duplicate_and_interleave (gimple_seq *, tree, vec,
--				      unsigned int, vec &);
-+extern void duplicate_and_interleave (vec_info *, gimple_seq *, tree,
-+				      vec, unsigned int, vec &);
- extern int vect_get_place_in_interleaving_chain (stmt_vec_info, stmt_vec_info);
- 
- /* In tree-vect-patterns.c.  */
-diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
-index 2140101d7d2..fbcd8aa6367 100644
---- a/gcc/tree-vrp.c
-+++ b/gcc/tree-vrp.c
-@@ -69,23 +69,20 @@ along with GCC; see the file COPYING3.  If not see
- #include "builtins.h"
- #include "wide-int-range.h"
- 
-+static bool
-+ranges_from_anti_range (const value_range_base *ar,
-+			value_range_base *vr0, value_range_base *vr1,
-+			bool handle_pointers = false);
-+
- /* Set of SSA names found live during the RPO traversal of the function
-    for still active basic-blocks.  */
- static sbitmap *live;
- 
--void
--value_range_base::set (enum value_range_kind kind, tree min, tree max)
--{
--  m_kind = kind;
--  m_min = min;
--  m_max = max;
--  if (flag_checking)
--    check ();
--}
--
- void
- value_range::set_equiv (bitmap equiv)
- {
-+  if (undefined_p () || varying_p ())
-+    equiv = NULL;
-   /* Since updating the equivalence set involves deep copying the
-      bitmaps, only do it if absolutely necessary.
- 
-@@ -261,7 +258,8 @@ value_range_base::constant_p () const
- void
- value_range_base::set_undefined ()
- {
--  set (VR_UNDEFINED, NULL, NULL);
-+  m_kind = VR_UNDEFINED;
-+  m_min = m_max = NULL;
- }
- 
- void
-@@ -273,7 +271,8 @@ value_range::set_undefined ()
- void
- value_range_base::set_varying ()
- {
--  set (VR_VARYING, NULL, NULL);
-+  m_kind = VR_VARYING;
-+  m_min = m_max = NULL;
- }
- 
- void
-@@ -335,6 +334,24 @@ value_range::equiv_add (const_tree var,
- bool
- value_range_base::singleton_p (tree *result) const
- {
-+  if (m_kind == VR_ANTI_RANGE)
-+    {
-+      if (nonzero_p ())
-+	{
-+	  if (TYPE_PRECISION (type ()) == 1)
-+	    {
-+	      if (result)
-+		*result = m_max;
-+	      return true;
-+	    }
-+	  return false;
-+	}
-+
-+      value_range_base vr0, vr1;
-+      return (ranges_from_anti_range (this, &vr0, &vr1, true)
-+	      && vr1.undefined_p ()
-+	      && vr0.singleton_p (result));
-+    }
-   if (m_kind == VR_RANGE
-       && vrp_operand_equal_p (min (), max ())
-       && is_gimple_min_invariant (min ()))
-@@ -510,23 +527,28 @@ static assert_locus **asserts_for;
- /* Return the maximum value for TYPE.  */
- 
- tree
--vrp_val_max (const_tree type)
-+vrp_val_max (const_tree type, bool handle_pointers)
- {
--  if (!INTEGRAL_TYPE_P (type))
--    return NULL_TREE;
--
--  return TYPE_MAX_VALUE (type);
-+  if (INTEGRAL_TYPE_P (type))
-+    return TYPE_MAX_VALUE (type);
-+  if (POINTER_TYPE_P (type) && handle_pointers)
-+    {
-+      wide_int max = wi::max_value (TYPE_PRECISION (type), TYPE_SIGN (type));
-+      return wide_int_to_tree (const_cast (type), max);
-+    }
-+  return NULL_TREE;
- }
- 
- /* Return the minimum value for TYPE.  */
- 
- tree
--vrp_val_min (const_tree type)
-+vrp_val_min (const_tree type, bool handle_pointers)
- {
--  if (!INTEGRAL_TYPE_P (type))
--    return NULL_TREE;
--
--  return TYPE_MIN_VALUE (type);
-+  if (INTEGRAL_TYPE_P (type))
-+    return TYPE_MIN_VALUE (type);
-+  if (POINTER_TYPE_P (type) && handle_pointers)
-+    return build_zero_cst (const_cast (type));
-+  return NULL_TREE;
- }
- 
- /* Return whether VAL is equal to the maximum value of its type.
-@@ -637,8 +659,7 @@ intersect_range_with_nonzero_bits (enum value_range_kind vr_type,
-    extract ranges from var + CST op limit.  */
- 
- void
--value_range_base::set_and_canonicalize (enum value_range_kind kind,
--					tree min, tree max)
-+value_range_base::set (enum value_range_kind kind, tree min, tree max)
- {
-   /* Use the canonical setters for VR_UNDEFINED and VR_VARYING.  */
-   if (kind == VR_UNDEFINED)
-@@ -652,11 +673,31 @@ value_range_base::set_and_canonicalize (enum value_range_kind kind,
-       return;
-     }
- 
-+  /* Convert POLY_INT_CST bounds into worst-case INTEGER_CST bounds.  */
-+  if (POLY_INT_CST_P (min))
-+    {
-+      tree type_min = vrp_val_min (TREE_TYPE (min), true);
-+      widest_int lb
-+	= constant_lower_bound_with_limit (wi::to_poly_widest (min),
-+					   wi::to_widest (type_min));
-+      min = wide_int_to_tree (TREE_TYPE (min), lb);
-+    }
-+  if (POLY_INT_CST_P (max))
-+    {
-+      tree type_max = vrp_val_max (TREE_TYPE (max), true);
-+      widest_int ub
-+	= constant_upper_bound_with_limit (wi::to_poly_widest (max),
-+					   wi::to_widest (type_max));
-+      max = wide_int_to_tree (TREE_TYPE (max), ub);
-+    }
-+
-   /* Nothing to canonicalize for symbolic ranges.  */
-   if (TREE_CODE (min) != INTEGER_CST
-       || TREE_CODE (max) != INTEGER_CST)
-     {
--      set (kind, min, max);
-+      m_kind = kind;
-+      m_min = min;
-+      m_max = max;
-       return;
-     }
- 
-@@ -692,12 +733,13 @@ value_range_base::set_and_canonicalize (enum value_range_kind kind,
-       kind = kind == VR_RANGE ? VR_ANTI_RANGE : VR_RANGE;
-     }
- 
-+  tree type = TREE_TYPE (min);
-+
-   /* Anti-ranges that can be represented as ranges should be so.  */
-   if (kind == VR_ANTI_RANGE)
-     {
-       /* For -fstrict-enums we may receive out-of-range ranges so consider
-          values < -INF and values > INF as -INF/INF as well.  */
--      tree type = TREE_TYPE (min);
-       bool is_min = (INTEGRAL_TYPE_P (type)
- 		     && tree_int_cst_compare (min, TYPE_MIN_VALUE (type)) <= 0);
-       bool is_max = (INTEGRAL_TYPE_P (type)
-@@ -740,22 +782,37 @@ value_range_base::set_and_canonicalize (enum value_range_kind kind,
-         }
-     }
- 
-+  /* Normalize [MIN, MAX] into VARYING and ~[MIN, MAX] into UNDEFINED.
-+
-+     Avoid using TYPE_{MIN,MAX}_VALUE because -fstrict-enums can
-+     restrict those to a subset of what actually fits in the type.
-+     Instead use the extremes of the type precision which will allow
-+     compare_range_with_value() to check if a value is inside a range,
-+     whereas if we used TYPE_*_VAL, said function would just punt
-+     upon seeing a VARYING.  */
-+  unsigned prec = TYPE_PRECISION (type);
-+  signop sign = TYPE_SIGN (type);
-+  if (wi::eq_p (wi::to_wide (min), wi::min_value (prec, sign))
-+      && wi::eq_p (wi::to_wide (max), wi::max_value (prec, sign)))
-+    {
-+      if (kind == VR_RANGE)
-+	set_varying ();
-+      else if (kind == VR_ANTI_RANGE)
-+	set_undefined ();
-+      else
-+	gcc_unreachable ();
-+      return;
-+    }
-+
-   /* Do not drop [-INF(OVF), +INF(OVF)] to varying.  (OVF) has to be sticky
-      to make sure VRP iteration terminates, otherwise we can get into
-      oscillations.  */
- 
--  set (kind, min, max);
--}
--
--void
--value_range::set_and_canonicalize (enum value_range_kind kind,
--				   tree min, tree max, bitmap equiv)
--{
--  value_range_base::set_and_canonicalize (kind, min, max);
--  if (this->kind () == VR_RANGE || this->kind () == VR_ANTI_RANGE)
--    set_equiv (equiv);
--  else
--    equiv_clear ();
-+  m_kind = kind;
-+  m_min = min;
-+  m_max = max;
-+  if (flag_checking)
-+    check ();
- }
- 
- void
-@@ -776,32 +833,19 @@ value_range::set (tree val)
-   set (VR_RANGE, val, val, NULL);
- }
- 
--/* Set value range VR to a non-NULL range of type TYPE.  */
-+/* Set value range VR to a nonzero range of type TYPE.  */
- 
- void
--value_range_base::set_nonnull (tree type)
-+value_range_base::set_nonzero (tree type)
- {
-   tree zero = build_int_cst (type, 0);
-   set (VR_ANTI_RANGE, zero, zero);
- }
- 
--void
--value_range::set_nonnull (tree type)
--{
--  tree zero = build_int_cst (type, 0);
--  set (VR_ANTI_RANGE, zero, zero, NULL);
--}
--
--/* Set value range VR to a NULL range of type TYPE.  */
-+/* Set value range VR to a ZERO range of type TYPE.  */
- 
- void
--value_range_base::set_null (tree type)
--{
--  set (build_int_cst (type, 0));
--}
--
--void
--value_range::set_null (tree type)
-+value_range_base::set_zero (tree type)
- {
-   set (build_int_cst (type, 0));
- }
-@@ -830,22 +874,6 @@ vrp_bitmap_equal_p (const_bitmap b1, const_bitmap b2)
- 	      && bitmap_equal_p (b1, b2)));
- }
- 
--/* Return true if VR is [0, 0].  */
--
--static inline bool
--range_is_null (const value_range_base *vr)
--{
--  return vr->zero_p ();
--}
--
--static inline bool
--range_is_nonnull (const value_range_base *vr)
--{
--  return (vr->kind () == VR_ANTI_RANGE
--	  && vr->min () == vr->max ()
--	  && integer_zerop (vr->min ()));
--}
--
- /* Return true if max and min of VR are INTEGER_CST.  It's not necessary
-    a singleton.  */
- 
-@@ -949,22 +977,17 @@ operand_less_p (tree val, tree val2)
-   /* LT is folded faster than GE and others.  Inline the common case.  */
-   if (TREE_CODE (val) == INTEGER_CST && TREE_CODE (val2) == INTEGER_CST)
-     return tree_int_cst_lt (val, val2);
-+  else if (TREE_CODE (val) == SSA_NAME && TREE_CODE (val2) == SSA_NAME)
-+    return val == val2 ? 0 : -2;
-   else
-     {
--      tree tcmp;
--
--      fold_defer_overflow_warnings ();
--
--      tcmp = fold_binary_to_constant (LT_EXPR, boolean_type_node, val, val2);
--
--      fold_undefer_and_ignore_overflow_warnings ();
--
--      if (!tcmp
--	  || TREE_CODE (tcmp) != INTEGER_CST)
--	return -2;
--
--      if (!integer_zerop (tcmp))
-+      int cmp = compare_values (val, val2);
-+      if (cmp == -1)
- 	return 1;
-+      else if (cmp == 0 || cmp == 1)
-+	return 0;
-+      else
-+	return -2;
-     }
- 
-   return 0;
-@@ -998,8 +1021,8 @@ compare_values_warnv (tree val1, tree val2, bool *strict_overflow_p)
- 
-   /* Convert the two values into the same type.  This is needed because
-      sizetype causes sign extension even for unsigned types.  */
--  val2 = fold_convert (TREE_TYPE (val1), val2);
--  STRIP_USELESS_TYPE_CONVERSION (val2);
-+  if (!useless_type_conversion_p (TREE_TYPE (val1), TREE_TYPE (val2)))
-+    val2 = fold_convert (TREE_TYPE (val1), val2);
- 
-   const bool overflow_undefined
-     = INTEGRAL_TYPE_P (TREE_TYPE (val1))
-@@ -1107,32 +1130,43 @@ compare_values_warnv (tree val1, tree val2, bool *strict_overflow_p)
-     }
-   else
-     {
--      tree t;
-+      if (TREE_CODE (val1) == INTEGER_CST && TREE_CODE (val2) == INTEGER_CST)
-+	{
-+	  /* We cannot compare overflowed values.  */
-+	  if (TREE_OVERFLOW (val1) || TREE_OVERFLOW (val2))
-+	    return -2;
-+
-+	  return tree_int_cst_compare (val1, val2);
-+	}
- 
-       /* First see if VAL1 and VAL2 are not the same.  */
--      if (val1 == val2 || operand_equal_p (val1, val2, 0))
-+      if (operand_equal_p (val1, val2, 0))
- 	return 0;
- 
-+      fold_defer_overflow_warnings ();
-+
-       /* If VAL1 is a lower address than VAL2, return -1.  */
--      if (operand_less_p (val1, val2) == 1)
--	return -1;
-+      tree t = fold_binary_to_constant (LT_EXPR, boolean_type_node, val1, val2);
-+      if (t && integer_onep (t))
-+	{
-+	  fold_undefer_and_ignore_overflow_warnings ();
-+	  return -1;
-+	}
- 
-       /* If VAL1 is a higher address than VAL2, return +1.  */
--      if (operand_less_p (val2, val1) == 1)
--	return 1;
--
--      /* If VAL1 is different than VAL2, return +2.
--	 For integer constants we either have already returned -1 or 1
--	 or they are equivalent.  We still might succeed in proving
--	 something about non-trivial operands.  */
--      if (TREE_CODE (val1) != INTEGER_CST
--	  || TREE_CODE (val2) != INTEGER_CST)
-+      t = fold_binary_to_constant (LT_EXPR, boolean_type_node, val2, val1);
-+      if (t && integer_onep (t))
- 	{
--          t = fold_binary_to_constant (NE_EXPR, boolean_type_node, val1, val2);
--	  if (t && integer_onep (t))
--	    return 2;
-+	  fold_undefer_and_ignore_overflow_warnings ();
-+	  return 1;
- 	}
- 
-+      /* If VAL1 is different than VAL2, return +2.  */
-+      t = fold_binary_to_constant (NE_EXPR, boolean_type_node, val1, val2);
-+      fold_undefer_and_ignore_overflow_warnings ();
-+      if (t && integer_onep (t))
-+	return 2;
-+
-       return -2;
-     }
- }
-@@ -1231,7 +1265,8 @@ vrp_set_zero_nonzero_bits (const tree expr_type,
- 
- static bool
- ranges_from_anti_range (const value_range_base *ar,
--			value_range_base *vr0, value_range_base *vr1)
-+			value_range_base *vr0, value_range_base *vr1,
-+			bool handle_pointers)
- {
-   tree type = ar->type ();
- 
-@@ -1244,18 +1279,18 @@ ranges_from_anti_range (const value_range_base *ar,
-   if (ar->kind () != VR_ANTI_RANGE
-       || TREE_CODE (ar->min ()) != INTEGER_CST
-       || TREE_CODE (ar->max ()) != INTEGER_CST
--      || !vrp_val_min (type)
--      || !vrp_val_max (type))
-+      || !vrp_val_min (type, handle_pointers)
-+      || !vrp_val_max (type, handle_pointers))
-     return false;
- 
--  if (tree_int_cst_lt (vrp_val_min (type), ar->min ()))
-+  if (tree_int_cst_lt (vrp_val_min (type, handle_pointers), ar->min ()))
-     vr0->set (VR_RANGE,
--	      vrp_val_min (type),
-+	      vrp_val_min (type, handle_pointers),
- 	      wide_int_to_tree (type, wi::to_wide (ar->min ()) - 1));
--  if (tree_int_cst_lt (ar->max (), vrp_val_max (type)))
-+  if (tree_int_cst_lt (ar->max (), vrp_val_max (type, handle_pointers)))
-     vr1->set (VR_RANGE,
- 	      wide_int_to_tree (type, wi::to_wide (ar->max ()) + 1),
--	      vrp_val_max (type));
-+	      vrp_val_max (type, handle_pointers));
-   if (vr0->undefined_p ())
-     {
-       *vr0 = *vr1;
-@@ -1266,21 +1301,20 @@ ranges_from_anti_range (const value_range_base *ar,
- }
- 
- /* Extract the components of a value range into a pair of wide ints in
--   [WMIN, WMAX].
--
--   If the value range is anything but a VR_*RANGE of constants, the
--   resulting wide ints are set to [-MIN, +MAX] for the type.  */
-+   [WMIN, WMAX], after having normalized any symbolics from the input.  */
- 
- static void inline
--extract_range_into_wide_ints (const value_range_base *vr,
--			      signop sign, unsigned prec,
--			      wide_int &wmin, wide_int &wmax)
-+extract_range_into_wide_ints (const value_range_base *vr_,
-+			      tree type, wide_int &wmin, wide_int &wmax)
- {
--  gcc_assert (vr->kind () != VR_ANTI_RANGE || vr->symbolic_p ());
--  if (range_int_cst_p (vr))
-+  signop sign = TYPE_SIGN (type);
-+  unsigned int prec = TYPE_PRECISION (type);
-+  gcc_assert (vr_->kind () != VR_ANTI_RANGE || vr_->symbolic_p ());
-+  value_range vr = vr_->normalize_symbolics ();
-+  if (range_int_cst_p (&vr))
-     {
--      wmin = wi::to_wide (vr->min ());
--      wmax = wi::to_wide (vr->max ());
-+      wmin = wi::to_wide (vr.min ());
-+      wmax = wi::to_wide (vr.max ());
-     }
-   else
-     {
-@@ -1295,7 +1329,7 @@ extract_range_into_wide_ints (const value_range_base *vr,
- 
- static void
- extract_range_from_multiplicative_op (value_range_base *vr,
--				      enum tree_code code,
-+				      enum tree_code code, tree type,
- 				      const value_range_base *vr0,
- 				      const value_range_base *vr1)
- {
-@@ -1307,13 +1341,31 @@ extract_range_from_multiplicative_op (value_range_base *vr,
- 	      || code == ROUND_DIV_EXPR
- 	      || code == RSHIFT_EXPR
- 	      || code == LSHIFT_EXPR);
--  gcc_assert (vr0->kind () == VR_RANGE
--	      && vr0->kind () == vr1->kind ());
-+  if (!range_int_cst_p (vr1))
-+    {
-+      vr->set_varying ();
-+      return;
-+    }
-+
-+  /* Even if vr0 is VARYING or otherwise not usable, we can derive
-+     useful ranges just from the shift count.  E.g.
-+     x >> 63 for signed 64-bit x is always [-1, 0].  */
-+  value_range_base tem = vr0->normalize_symbolics ();
-+  tree vr0_min, vr0_max;
-+  if (tem.kind () == VR_RANGE)
-+    {
-+      vr0_min = tem.min ();
-+      vr0_max = tem.max ();
-+    }
-+  else
-+    {
-+      vr0_min = vrp_val_min (type);
-+      vr0_max = vrp_val_max (type);
-+    }
- 
--  tree type = vr0->type ();
-   wide_int res_lb, res_ub;
--  wide_int vr0_lb = wi::to_wide (vr0->min ());
--  wide_int vr0_ub = wi::to_wide (vr0->max ());
-+  wide_int vr0_lb = wi::to_wide (vr0_min);
-+  wide_int vr0_ub = wi::to_wide (vr0_max);
-   wide_int vr1_lb = wi::to_wide (vr1->min ());
-   wide_int vr1_ub = wi::to_wide (vr1->max ());
-   bool overflow_undefined = TYPE_OVERFLOW_UNDEFINED (type);
-@@ -1323,9 +1375,8 @@ extract_range_from_multiplicative_op (value_range_base *vr,
- 					code, TYPE_SIGN (type), prec,
- 					vr0_lb, vr0_ub, vr1_lb, vr1_ub,
- 					overflow_undefined))
--    vr->set_and_canonicalize (VR_RANGE,
--			      wide_int_to_tree (type, res_lb),
--			      wide_int_to_tree (type, res_ub));
-+    vr->set (VR_RANGE, wide_int_to_tree (type, res_lb),
-+	     wide_int_to_tree (type, res_ub));
-   else
-     vr->set_varying ();
- }
-@@ -1583,9 +1634,9 @@ extract_range_from_binary_expr (value_range_base *vr,
-      code is EXACT_DIV_EXPR.  We could mask out bits in the resulting
-      range, but then we also need to hack up vrp_union.  It's just
-      easier to special case when vr0 is ~[0,0] for EXACT_DIV_EXPR.  */
--  if (code == EXACT_DIV_EXPR && range_is_nonnull (&vr0))
-+  if (code == EXACT_DIV_EXPR && vr0.nonzero_p ())
-     {
--      vr->set_nonnull (expr_type);
-+      vr->set_nonzero (expr_type);
-       return;
-     }
- 
-@@ -1663,9 +1714,9 @@ extract_range_from_binary_expr (value_range_base *vr,
- 	     If both are null, then the result is null. Otherwise they
- 	     are varying.  */
- 	  if (!range_includes_zero_p (&vr0) && !range_includes_zero_p (&vr1))
--	    vr->set_nonnull (expr_type);
--	  else if (range_is_null (&vr0) && range_is_null (&vr1))
--	    vr->set_null (expr_type);
-+	    vr->set_nonzero (expr_type);
-+	  else if (vr0.zero_p () && vr1.zero_p ())
-+	    vr->set_zero (expr_type);
- 	  else
- 	    vr->set_varying ();
- 	}
-@@ -1692,9 +1743,9 @@ extract_range_from_binary_expr (value_range_base *vr,
- 	      && (flag_delete_null_pointer_checks
- 		  || (range_int_cst_p (&vr1)
- 		      && !tree_int_cst_sign_bit (vr1.max ()))))
--	    vr->set_nonnull (expr_type);
--	  else if (range_is_null (&vr0) && range_is_null (&vr1))
--	    vr->set_null (expr_type);
-+	    vr->set_nonzero (expr_type);
-+	  else if (vr0.zero_p () && vr1.zero_p ())
-+	    vr->set_zero (expr_type);
- 	  else
- 	    vr->set_varying ();
- 	}
-@@ -1702,8 +1753,8 @@ extract_range_from_binary_expr (value_range_base *vr,
- 	{
- 	  /* For pointer types, we are really only interested in asserting
- 	     whether the expression evaluates to non-NULL.  */
--	  if (range_is_null (&vr0) || range_is_null (&vr1))
--	    vr->set_null (expr_type);
-+	  if (vr0.zero_p () || vr1.zero_p ())
-+	    vr->set_zero (expr_type);
- 	  else
- 	    vr->set_varying ();
- 	}
-@@ -1717,19 +1768,30 @@ extract_range_from_binary_expr (value_range_base *vr,
-      range and see what we end up with.  */
-   if (code == PLUS_EXPR || code == MINUS_EXPR)
-     {
-+      value_range_kind vr0_kind = vr0.kind (), vr1_kind = vr1.kind ();
-+      tree vr0_min = vr0.min (), vr0_max = vr0.max ();
-+      tree vr1_min = vr1.min (), vr1_max = vr1.max ();
-       /* This will normalize things such that calculating
- 	 [0,0] - VR_VARYING is not dropped to varying, but is
- 	 calculated as [MIN+1, MAX].  */
-       if (vr0.varying_p ())
--	vr0.set (VR_RANGE, vrp_val_min (expr_type), vrp_val_max (expr_type));
-+	{
-+	  vr0_kind = VR_RANGE;
-+	  vr0_min = vrp_val_min (expr_type);
-+	  vr0_max = vrp_val_max (expr_type);
-+	}
-       if (vr1.varying_p ())
--	vr1.set (VR_RANGE, vrp_val_min (expr_type), vrp_val_max (expr_type));
-+	{
-+	  vr1_kind = VR_RANGE;
-+	  vr1_min = vrp_val_min (expr_type);
-+	  vr1_max = vrp_val_max (expr_type);
-+	}
- 
-       const bool minus_p = (code == MINUS_EXPR);
--      tree min_op0 = vr0.min ();
--      tree min_op1 = minus_p ? vr1.max () : vr1.min ();
--      tree max_op0 = vr0.max ();
--      tree max_op1 = minus_p ? vr1.min () : vr1.max ();
-+      tree min_op0 = vr0_min;
-+      tree min_op1 = minus_p ? vr1_max : vr1_min;
-+      tree max_op0 = vr0_max;
-+      tree max_op1 = minus_p ? vr1_min : vr1_max;
-       tree sym_min_op0 = NULL_TREE;
-       tree sym_min_op1 = NULL_TREE;
-       tree sym_max_op0 = NULL_TREE;
-@@ -1742,7 +1804,7 @@ extract_range_from_binary_expr (value_range_base *vr,
- 	 single-symbolic ranges, try to compute the precise resulting range,
- 	 but only if we know that this resulting range will also be constant
- 	 or single-symbolic.  */
--      if (vr0.kind () == VR_RANGE && vr1.kind () == VR_RANGE
-+      if (vr0_kind == VR_RANGE && vr1_kind == VR_RANGE
- 	  && (TREE_CODE (min_op0) == INTEGER_CST
- 	      || (sym_min_op0
- 		  = get_single_symbol (min_op0, &neg_min_op0, &min_op0)))
-@@ -1823,8 +1885,8 @@ extract_range_from_binary_expr (value_range_base *vr,
-       wide_int wmin, wmax;
-       wide_int vr0_min, vr0_max;
-       wide_int vr1_min, vr1_max;
--      extract_range_into_wide_ints (&vr0, sign, prec, vr0_min, vr0_max);
--      extract_range_into_wide_ints (&vr1, sign, prec, vr1_min, vr1_max);
-+      extract_range_into_wide_ints (&vr0, expr_type, vr0_min, vr0_max);
-+      extract_range_into_wide_ints (&vr1, expr_type, vr1_min, vr1_max);
-       if (wide_int_range_min_max (wmin, wmax, code, sign, prec,
- 				  vr0_min, vr0_max, vr1_min, vr1_max))
- 	vr->set (VR_RANGE, wide_int_to_tree (expr_type, wmin),
-@@ -1841,7 +1903,7 @@ extract_range_from_binary_expr (value_range_base *vr,
- 	  vr->set_varying ();
- 	  return;
- 	}
--      extract_range_from_multiplicative_op (vr, code, &vr0, &vr1);
-+      extract_range_from_multiplicative_op (vr, code, expr_type, &vr0, &vr1);
-       return;
-     }
-   else if (code == RSHIFT_EXPR
-@@ -1856,13 +1918,8 @@ extract_range_from_binary_expr (value_range_base *vr,
- 	{
- 	  if (code == RSHIFT_EXPR)
- 	    {
--	      /* Even if vr0 is VARYING or otherwise not usable, we can derive
--		 useful ranges just from the shift count.  E.g.
--		 x >> 63 for signed 64-bit x is always [-1, 0].  */
--	      if (vr0.kind () != VR_RANGE || vr0.symbolic_p ())
--		vr0.set (VR_RANGE, vrp_val_min (expr_type),
--			 vrp_val_max (expr_type));
--	      extract_range_from_multiplicative_op (vr, code, &vr0, &vr1);
-+	      extract_range_from_multiplicative_op (vr, code, expr_type,
-+						    &vr0, &vr1);
- 	      return;
- 	    }
- 	  else if (code == LSHIFT_EXPR
-@@ -1878,7 +1935,7 @@ extract_range_from_binary_expr (value_range_base *vr,
- 		{
- 		  min = wide_int_to_tree (expr_type, res_lb);
- 		  max = wide_int_to_tree (expr_type, res_ub);
--		  vr->set_and_canonicalize (VR_RANGE, min, max);
-+		  vr->set (VR_RANGE, min, max);
- 		  return;
- 		}
- 	    }
-@@ -1897,7 +1954,7 @@ extract_range_from_binary_expr (value_range_base *vr,
-       bool extra_range_p;
- 
-       /* Special case explicit division by zero as undefined.  */
--      if (range_is_null (&vr1))
-+      if (vr1.zero_p ())
- 	{
- 	  vr->set_undefined ();
- 	  return;
-@@ -1910,9 +1967,9 @@ extract_range_from_binary_expr (value_range_base *vr,
- 	 NOTE: As a future improvement, we may be able to do better
- 	 with mixed symbolic (anti-)ranges like [0, A].  See note in
- 	 ranges_from_anti_range.  */
--      extract_range_into_wide_ints (&vr0, sign, prec,
-+      extract_range_into_wide_ints (&vr0, expr_type,
- 				    dividend_min, dividend_max);
--      extract_range_into_wide_ints (&vr1, sign, prec,
-+      extract_range_into_wide_ints (&vr1, expr_type,
- 				    divisor_min, divisor_max);
-       if (!wide_int_range_div (wmin, wmax, code, sign, prec,
- 			       dividend_min, dividend_max,
-@@ -1936,15 +1993,15 @@ extract_range_from_binary_expr (value_range_base *vr,
-     }
-   else if (code == TRUNC_MOD_EXPR)
-     {
--      if (range_is_null (&vr1))
-+      if (vr1.zero_p ())
- 	{
- 	  vr->set_undefined ();
- 	  return;
- 	}
-       wide_int wmin, wmax, tmp;
-       wide_int vr0_min, vr0_max, vr1_min, vr1_max;
--      extract_range_into_wide_ints (&vr0, sign, prec, vr0_min, vr0_max);
--      extract_range_into_wide_ints (&vr1, sign, prec, vr1_min, vr1_max);
-+      extract_range_into_wide_ints (&vr0, expr_type, vr0_min, vr0_max);
-+      extract_range_into_wide_ints (&vr1, expr_type, vr1_min, vr1_max);
-       wide_int_range_trunc_mod (wmin, wmax, sign, prec,
- 				vr0_min, vr0_max, vr1_min, vr1_max);
-       min = wide_int_to_tree (expr_type, wmin);
-@@ -1962,8 +2019,8 @@ extract_range_from_binary_expr (value_range_base *vr,
- 				 &may_be_nonzero0, &must_be_nonzero0);
-       vrp_set_zero_nonzero_bits (expr_type, &vr1,
- 				 &may_be_nonzero1, &must_be_nonzero1);
--      extract_range_into_wide_ints (&vr0, sign, prec, vr0_min, vr0_max);
--      extract_range_into_wide_ints (&vr1, sign, prec, vr1_min, vr1_max);
-+      extract_range_into_wide_ints (&vr0, expr_type, vr0_min, vr0_max);
-+      extract_range_into_wide_ints (&vr1, expr_type, vr1_min, vr1_max);
-       if (code == BIT_AND_EXPR)
- 	{
- 	  if (wide_int_range_bit_and (wmin, wmax, sign, prec,
-@@ -2140,9 +2197,9 @@ extract_range_from_unary_expr (value_range_base *vr,
-       if (POINTER_TYPE_P (type) || POINTER_TYPE_P (op0_type))
- 	{
- 	  if (!range_includes_zero_p (&vr0))
--	    vr->set_nonnull (type);
--	  else if (range_is_null (&vr0))
--	    vr->set_null (type);
-+	    vr->set_nonzero (type);
-+	  else if (vr0.zero_p ())
-+	    vr->set_zero (type);
- 	  else
- 	    vr->set_varying ();
- 	  return;
-@@ -2167,8 +2224,7 @@ extract_range_from_unary_expr (value_range_base *vr,
-       signop outer_sign = TYPE_SIGN (outer_type);
-       unsigned inner_prec = TYPE_PRECISION (inner_type);
-       unsigned outer_prec = TYPE_PRECISION (outer_type);
--      extract_range_into_wide_ints (&vr0, inner_sign, inner_prec,
--				    vr0_min, vr0_max);
-+      extract_range_into_wide_ints (&vr0, inner_type, vr0_min, vr0_max);
-       if (wide_int_range_convert (wmin, wmax,
- 				  inner_sign, inner_prec,
- 				  outer_sign, outer_prec,
-@@ -2176,7 +2232,7 @@ extract_range_from_unary_expr (value_range_base *vr,
- 	{
- 	  tree min = wide_int_to_tree (outer_type, wmin);
- 	  tree max = wide_int_to_tree (outer_type, wmax);
--	  vr->set_and_canonicalize (VR_RANGE, min, max);
-+	  vr->set (VR_RANGE, min, max);
- 	}
-       else
- 	vr->set_varying ();
-@@ -2186,7 +2242,7 @@ extract_range_from_unary_expr (value_range_base *vr,
-     {
-       wide_int wmin, wmax;
-       wide_int vr0_min, vr0_max;
--      extract_range_into_wide_ints (&vr0, sign, prec, vr0_min, vr0_max);
-+      extract_range_into_wide_ints (&vr0, type, vr0_min, vr0_max);
-       if (wide_int_range_abs (wmin, wmax, sign, prec, vr0_min, vr0_max,
- 			      TYPE_OVERFLOW_UNDEFINED (type)))
- 	vr->set (VR_RANGE, wide_int_to_tree (type, wmin),
-@@ -2199,7 +2255,8 @@ extract_range_from_unary_expr (value_range_base *vr,
-     {
-       wide_int wmin, wmax;
-       wide_int vr0_min, vr0_max;
--      extract_range_into_wide_ints (&vr0, SIGNED, prec, vr0_min, vr0_max);
-+      tree signed_type = make_signed_type (TYPE_PRECISION (type));
-+      extract_range_into_wide_ints (&vr0, signed_type, vr0_min, vr0_max);
-       wide_int_range_absu (wmin, wmax, prec, vr0_min, vr0_max);
-       vr->set (VR_RANGE, wide_int_to_tree (type, wmin),
- 	       wide_int_to_tree (type, wmax));
-@@ -5468,8 +5525,10 @@ union_ranges (enum value_range_kind *vr0type,
- 	      enum value_range_kind vr1type,
- 	      tree vr1min, tree vr1max)
- {
--  bool mineq = vrp_operand_equal_p (*vr0min, vr1min);
--  bool maxeq = vrp_operand_equal_p (*vr0max, vr1max);
-+  int cmpmin = compare_values (*vr0min, vr1min);
-+  int cmpmax = compare_values (*vr0max, vr1max);
-+  bool mineq = cmpmin == 0;
-+  bool maxeq = cmpmax == 0;
- 
-   /* [] is vr0, () is vr1 in the following classification comments.  */
-   if (mineq && maxeq)
-@@ -5569,8 +5628,8 @@ union_ranges (enum value_range_kind *vr0type,
-       else
- 	gcc_unreachable ();
-     }
--  else if ((maxeq || operand_less_p (vr1max, *vr0max) == 1)
--	   && (mineq || operand_less_p (*vr0min, vr1min) == 1))
-+  else if ((maxeq || cmpmax == 1)
-+	   && (mineq || cmpmin == -1))
-     {
-       /* [ (  ) ] or [(  ) ] or [ (  )] */
-       if (*vr0type == VR_RANGE
-@@ -5603,8 +5662,8 @@ union_ranges (enum value_range_kind *vr0type,
-       else
- 	gcc_unreachable ();
-     }
--  else if ((maxeq || operand_less_p (*vr0max, vr1max) == 1)
--	   && (mineq || operand_less_p (vr1min, *vr0min) == 1))
-+  else if ((maxeq || cmpmax == -1)
-+	   && (mineq || cmpmin == 1))
-     {
-       /* ( [  ] ) or ([  ] ) or ( [  ]) */
-       if (*vr0type == VR_RANGE
-@@ -5643,10 +5702,10 @@ union_ranges (enum value_range_kind *vr0type,
-       else
- 	gcc_unreachable ();
-     }
--  else if ((operand_less_p (vr1min, *vr0max) == 1
--	    || operand_equal_p (vr1min, *vr0max, 0))
--	   && operand_less_p (*vr0min, vr1min) == 1
--	   && operand_less_p (*vr0max, vr1max) == 1)
-+  else if (cmpmin == -1
-+	   && cmpmax == -1
-+	   && (operand_less_p (vr1min, *vr0max) == 1
-+	       || operand_equal_p (vr1min, *vr0max, 0)))
-     {
-       /* [  (  ]  ) or [   ](   ) */
-       if (*vr0type == VR_RANGE
-@@ -5680,10 +5739,10 @@ union_ranges (enum value_range_kind *vr0type,
-       else
- 	gcc_unreachable ();
-     }
--  else if ((operand_less_p (*vr0min, vr1max) == 1
--	    || operand_equal_p (*vr0min, vr1max, 0))
--	   && operand_less_p (vr1min, *vr0min) == 1
--	   && operand_less_p (vr1max, *vr0max) == 1)
-+  else if (cmpmin == 1
-+	   && cmpmax == 1
-+	   && (operand_less_p (*vr0min, vr1max) == 1
-+	       || operand_equal_p (*vr0min, vr1max, 0)))
-     {
-       /* (  [  )  ] or (   )[   ] */
-       if (*vr0type == VR_RANGE
-@@ -6083,7 +6142,7 @@ value_range::intersect_helper (value_range *vr0, const value_range *vr1)
-      VR_RANGE can still be a VR_RANGE.  Work on a temporary so we can
-      fall back to vr0 when this turns things to varying.  */
-   value_range tem;
--  tem.set_and_canonicalize (vr0type, vr0min, vr0max);
-+  tem.set (vr0type, vr0min, vr0max);
-   /* If that failed, use the saved original VR0.  */
-   if (tem.varying_p ())
-     return;
-@@ -6152,8 +6211,8 @@ value_range_base::union_helper (const value_range_base *vr0,
- 		vr1->kind (), vr1->min (), vr1->max ());
- 
-   /* Work on a temporary so we can still use vr0 when union returns varying.  */
--  value_range tem;
--  tem.set_and_canonicalize (vr0type, vr0min, vr0max);
-+  value_range_base tem;
-+  tem.set (vr0type, vr0min, vr0max);
- 
-   /* Failed to find an efficient meet.  Before giving up and setting
-      the result to VARYING, see if we can at least derive a useful
-@@ -6162,7 +6221,7 @@ value_range_base::union_helper (const value_range_base *vr0,
-       && range_includes_zero_p (vr0) == 0
-       && range_includes_zero_p (vr1) == 0)
-     {
--      tem.set_nonnull (vr0->type ());
-+      tem.set_nonzero (vr0->type ());
-       return tem;
-     }
- 
-@@ -6233,6 +6292,58 @@ value_range::union_ (const value_range *other)
-     }
- }
- 
-+/* Normalize symbolics into constants.  */
-+
-+value_range_base
-+value_range_base::normalize_symbolics () const
-+{
-+  if (varying_p () || undefined_p ())
-+    return *this;
-+  tree ttype = type ();
-+  bool min_symbolic = !is_gimple_min_invariant (min ());
-+  bool max_symbolic = !is_gimple_min_invariant (max ());
-+  if (!min_symbolic && !max_symbolic)
-+    return *this;
-+
-+  // [SYM, SYM] -> VARYING
-+  if (min_symbolic && max_symbolic)
-+    {
-+      value_range_base var;
-+      var.set_varying ();
-+      return var;
-+    }
-+  if (kind () == VR_RANGE)
-+    {
-+      // [SYM, NUM] -> [-MIN, NUM]
-+      if (min_symbolic)
-+	return value_range_base (VR_RANGE, vrp_val_min (ttype), max ());
-+      // [NUM, SYM] -> [NUM, +MAX]
-+      return value_range_base (VR_RANGE, min (), vrp_val_max (ttype));
-+    }
-+  gcc_assert (kind () == VR_ANTI_RANGE);
-+  // ~[SYM, NUM] -> [NUM + 1, +MAX]
-+  if (min_symbolic)
-+    {
-+      if (!vrp_val_is_max (max ()))
-+	{
-+	  tree n = wide_int_to_tree (ttype, wi::to_wide (max ()) + 1);
-+	  return value_range_base (VR_RANGE, n, vrp_val_max (ttype));
-+	}
-+      value_range_base var;
-+      var.set_varying ();
-+      return var;
-+    }
-+  // ~[NUM, SYM] -> [-MIN, NUM - 1]
-+  if (!vrp_val_is_min (min ()))
-+    {
-+      tree n = wide_int_to_tree (ttype, wi::to_wide (min ()) - 1);
-+      return value_range_base (VR_RANGE, vrp_val_min (ttype), n);
-+    }
-+  value_range_base var;
-+  var.set_varying ();
-+  return var;
-+}
-+
- /* Visit all arguments for PHI node PHI that flow through executable
-    edges.  If a valid value range can be derived from all the incoming
-    value ranges, set a new range for the LHS of PHI.  */
-diff --git a/gcc/tree-vrp.h b/gcc/tree-vrp.h
-index 9d52b428d05..4bcff924b58 100644
---- a/gcc/tree-vrp.h
-+++ b/gcc/tree-vrp.h
-@@ -46,8 +46,8 @@ public:
- 
-   void set (value_range_kind, tree, tree);
-   void set (tree);
--  void set_nonnull (tree);
--  void set_null (tree);
-+  void set_nonzero (tree);
-+  void set_zero (tree);
- 
-   enum value_range_kind kind () const;
-   tree min () const;
-@@ -70,11 +70,13 @@ public:
-   /* Misc methods.  */
-   tree type () const;
-   bool may_contain_p (tree) const;
--  void set_and_canonicalize (enum value_range_kind, tree, tree);
-   bool zero_p () const;
-+  bool nonzero_p () const;
-   bool singleton_p (tree *result = NULL) const;
-   void dump (FILE *) const;
- 
-+  value_range_base normalize_symbolics () const;
-+
- protected:
-   void check ();
-   static value_range_base union_helper (const value_range_base *,
-@@ -118,8 +120,6 @@ class GTY((user)) value_range : public value_range_base
-   /* Deep-copies equiv bitmap argument.  */
-   void set (value_range_kind, tree, tree, bitmap = NULL);
-   void set (tree);
--  void set_nonnull (tree);
--  void set_null (tree);
- 
-   bool operator== (const value_range &) const /* = delete */;
-   bool operator!= (const value_range &) const /* = delete */;
-@@ -138,7 +138,6 @@ class GTY((user)) value_range : public value_range_base
- 
-   /* Misc methods.  */
-   void deep_copy (const value_range *);
--  void set_and_canonicalize (enum value_range_kind, tree, tree, bitmap = NULL);
-   void dump (FILE *) const;
- 
-  private:
-@@ -222,6 +221,16 @@ value_range_base::zero_p () const
- 	  && integer_zerop (m_max));
- }
- 
-+/* Return TRUE if range is nonzero.  */
-+
-+inline bool
-+value_range_base::nonzero_p () const
-+{
-+  return (m_kind == VR_ANTI_RANGE
-+	  && integer_zerop (m_min)
-+	  && integer_zerop (m_max));
-+}
-+
- extern void dump_value_range (FILE *, const value_range *);
- extern void dump_value_range (FILE *, const value_range_base *);
- 
-@@ -259,8 +268,8 @@ extern bool vrp_val_is_min (const_tree);
- extern bool vrp_val_is_max (const_tree);
- extern int value_inside_range (tree, tree, tree);
- 
--extern tree vrp_val_min (const_tree);
--extern tree vrp_val_max (const_tree);
-+extern tree vrp_val_min (const_tree, bool handle_pointers = false);
-+extern tree vrp_val_max (const_tree, bool handle_pointers = false);
- 
- extern void extract_range_from_unary_expr (value_range_base *vr,
- 					   enum tree_code code,
-diff --git a/gcc/tree.c b/gcc/tree.c
-index 32e94e48132..c4b8eea675f 100644
---- a/gcc/tree.c
-+++ b/gcc/tree.c
-@@ -8213,8 +8213,6 @@ build_nonstandard_integer_type (unsigned HOST_WIDE_INT precision,
-   else
-     fixup_signed_type (itype);
- 
--  ret = itype;
--
-   inchash::hash hstate;
-   inchash::add_expr (TYPE_MAX_VALUE (itype), hstate);
-   ret = type_hash_canon (hstate.end (), itype);
-@@ -11079,44 +11077,44 @@ build_vector_type (tree innertype, poly_int64 nunits)
-   return make_vector_type (innertype, nunits, VOIDmode);
- }
- 
--/* Build truth vector with specified length and number of units.  */
-+/* Build a truth vector with NUNITS units, giving it mode MASK_MODE.  */
- 
- tree
--build_truth_vector_type (poly_uint64 nunits, poly_uint64 vector_size)
-+build_truth_vector_type_for_mode (poly_uint64 nunits, machine_mode mask_mode)
- {
--  machine_mode mask_mode
--    = targetm.vectorize.get_mask_mode (nunits, vector_size).else_blk ();
--
--  poly_uint64 vsize;
--  if (mask_mode == BLKmode)
--    vsize = vector_size * BITS_PER_UNIT;
--  else
--    vsize = GET_MODE_BITSIZE (mask_mode);
-+  gcc_assert (mask_mode != BLKmode);
- 
-+  poly_uint64 vsize = GET_MODE_BITSIZE (mask_mode);
-   unsigned HOST_WIDE_INT esize = vector_element_size (vsize, nunits);
--
-   tree bool_type = build_nonstandard_boolean_type (esize);
- 
-   return make_vector_type (bool_type, nunits, mask_mode);
- }
- 
--/* Returns a vector type corresponding to a comparison of VECTYPE.  */
-+/* Build a vector type that holds one boolean result for each element of
-+   vector type VECTYPE.  The public interface for this operation is
-+   truth_type_for.  */
- 
--tree
--build_same_sized_truth_vector_type (tree vectype)
-+static tree
-+build_truth_vector_type_for (tree vectype)
- {
--  if (VECTOR_BOOLEAN_TYPE_P (vectype))
--    return vectype;
-+  machine_mode vector_mode = TYPE_MODE (vectype);
-+  poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
- 
--  poly_uint64 size = GET_MODE_SIZE (TYPE_MODE (vectype));
-+  machine_mode mask_mode;
-+  if (VECTOR_MODE_P (vector_mode)
-+      && targetm.vectorize.get_mask_mode (vector_mode).exists (&mask_mode))
-+    return build_truth_vector_type_for_mode (nunits, mask_mode);
- 
--  if (known_eq (size, 0U))
--    size = tree_to_uhwi (TYPE_SIZE_UNIT (vectype));
-+  poly_uint64 vsize = tree_to_poly_uint64 (TYPE_SIZE (vectype));
-+  unsigned HOST_WIDE_INT esize = vector_element_size (vsize, nunits);
-+  tree bool_type = build_nonstandard_boolean_type (esize);
- 
--  return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype), size);
-+  return make_vector_type (bool_type, nunits, BLKmode);
- }
- 
--/* Similarly, but builds a variant type with TYPE_VECTOR_OPAQUE set.  */
-+/* Like build_vector_type, but builds a variant type with TYPE_VECTOR_OPAQUE
-+   set.  */
- 
- tree
- build_opaque_vector_type (tree innertype, poly_int64 nunits)
-@@ -11915,8 +11913,7 @@ truth_type_for (tree type)
-     {
-       if (VECTOR_BOOLEAN_TYPE_P (type))
- 	return type;
--      return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (type),
--				      GET_MODE_SIZE (TYPE_MODE (type)));
-+      return build_truth_vector_type_for (type);
-     }
-   else
-     return boolean_type_node;
-diff --git a/gcc/tree.h b/gcc/tree.h
-index 2f8e37bb356..6f73593faa7 100644
---- a/gcc/tree.h
-+++ b/gcc/tree.h
-@@ -4272,8 +4272,7 @@ extern tree build_reference_type_for_mode (tree, machine_mode, bool);
- extern tree build_reference_type (tree);
- extern tree build_vector_type_for_mode (tree, machine_mode);
- extern tree build_vector_type (tree, poly_int64);
--extern tree build_truth_vector_type (poly_uint64, poly_uint64);
--extern tree build_same_sized_truth_vector_type (tree vectype);
-+extern tree build_truth_vector_type_for_mode (poly_uint64, machine_mode);
- extern tree build_opaque_vector_type (tree, poly_int64);
- extern tree build_index_type (tree);
- extern tree build_array_type (tree, tree, bool = false);
-diff --git a/gcc/vr-values.c b/gcc/vr-values.c
-index 0e10aca92bb..02c89ab030a 100644
---- a/gcc/vr-values.c
-+++ b/gcc/vr-values.c
-@@ -118,7 +118,10 @@ vr_values::get_value_range (const_tree var)
- 	  if (POINTER_TYPE_P (TREE_TYPE (sym))
- 	      && (nonnull_arg_p (sym)
- 		  || get_ptr_nonnull (var)))
--	    vr->set_nonnull (TREE_TYPE (sym));
-+	    {
-+	      vr->set_nonzero (TREE_TYPE (sym));
-+	      vr->equiv_clear ();
-+	    }
- 	  else if (INTEGRAL_TYPE_P (TREE_TYPE (sym)))
- 	    {
- 	      get_range_info (var, *vr);
-@@ -130,7 +133,10 @@ vr_values::get_value_range (const_tree var)
- 	}
-       else if (TREE_CODE (sym) == RESULT_DECL
- 	       && DECL_BY_REFERENCE (sym))
--	vr->set_nonnull (TREE_TYPE (sym));
-+	{
-+	  vr->set_nonzero (TREE_TYPE (sym));
-+	  vr->equiv_clear ();
-+	}
-     }
- 
-   return vr;
-@@ -491,9 +497,9 @@ vr_values::extract_range_for_var_from_comparison_expr (tree var,
-          vice-versa.  Use set_and_canonicalize which does this for
-          us.  */
-       if (cond_code == LE_EXPR)
--        vr_p->set_and_canonicalize (VR_RANGE, min, max, vr_p->equiv ());
-+        vr_p->set (VR_RANGE, min, max, vr_p->equiv ());
-       else if (cond_code == GT_EXPR)
--        vr_p->set_and_canonicalize (VR_ANTI_RANGE, min, max, vr_p->equiv ());
-+        vr_p->set (VR_ANTI_RANGE, min, max, vr_p->equiv ());
-       else
- 	gcc_unreachable ();
-     }
-@@ -565,7 +571,7 @@ vr_values::extract_range_for_var_from_comparison_expr (tree var,
- 	  && vrp_val_is_max (max))
- 	min = max = limit;
- 
--      vr_p->set_and_canonicalize (VR_ANTI_RANGE, min, max, vr_p->equiv ());
-+      vr_p->set (VR_ANTI_RANGE, min, max, vr_p->equiv ());
-     }
-   else if (cond_code == LE_EXPR || cond_code == LT_EXPR)
-     {
-@@ -858,7 +864,10 @@ vr_values::extract_range_from_binary_expr (value_range *vr,
- 	  || (vr1.kind () == VR_ANTI_RANGE
- 	      && vr1.min () == op0
- 	      && vr1.min () == vr1.max ())))
--      vr->set_nonnull (expr_type);
-+    {
-+      vr->set_nonzero (expr_type);
-+      vr->equiv_clear ();
-+    }
- }
- 
- /* Extract range information from a unary expression CODE OP0 based on
-@@ -1085,7 +1094,8 @@ vr_values::extract_range_basic (value_range *vr, gimple *stmt)
- 	      && TREE_CODE (SSA_NAME_VAR (arg)) == PARM_DECL
- 	      && cfun->after_inlining)
- 	    {
--	      vr->set_null (type);
-+	      vr->set_zero (type);
-+	      vr->equiv_clear ();
- 	      return;
- 	    }
- 	  break;
-@@ -1392,7 +1402,10 @@ vr_values::extract_range_basic (value_range *vr, gimple *stmt)
-       && gimple_stmt_nonnegative_warnv_p (stmt, &sop))
-     set_value_range_to_nonnegative (vr, type);
-   else if (vrp_stmt_computes_nonzero (stmt))
--    vr->set_nonnull (type);
-+    {
-+      vr->set_nonzero (type);
-+      vr->equiv_clear ();
-+    }
-   else
-     vr->set_varying ();
- }
-diff --git a/libgcc/libgcov-driver-system.c b/libgcc/libgcov-driver-system.c
-index b5f3e89ebdc..0d106002098 100644
---- a/libgcc/libgcov-driver-system.c
-+++ b/libgcc/libgcov-driver-system.c
-@@ -262,10 +262,8 @@ static int
- gcov_exit_open_gcda_file (struct gcov_info *gi_ptr,
- 			  struct gcov_filename *gf)
- {
--  const char *fname = gi_ptr->filename;
-   int append_slash = 0;
--
--  fname = gi_ptr->filename;
-+  const char *fname = gi_ptr->filename;
- 
-   /* Build relocated filename, stripping off leading
-      directories from the initial filename if requested. */
-diff --git a/libgcc/libgcov-util.c b/libgcc/libgcov-util.c
-index ae0dd017204..e672768966b 100644
---- a/libgcc/libgcov-util.c
-+++ b/libgcc/libgcov-util.c
-@@ -461,10 +461,9 @@ gcov_read_profile_dir (const char* dir_name, int recompute_summary ATTRIBUTE_UNU
- #ifdef HAVE_FTW_H
-   ftw (".", ftw_read_file, 50);
- #endif
--  ret = chdir (pwd);
-+  chdir (pwd);
-   free (pwd);
- 
--
-   return gcov_info_head;;
- }
- 
diff --git a/x86-Fix-bf16-and-matrix.patch b/x86-Fix-bf16-and-matrix.patch
deleted file mode 100644
index 8be95f82393cda051565836c2b6890616d6fedc8..0000000000000000000000000000000000000000
--- a/x86-Fix-bf16-and-matrix.patch
+++ /dev/null
@@ -1,321 +0,0 @@
-This backport contains 4 patchs from gcc main stream tree.
-The commit id of these patchs list as following in the order of time.
-
-0001-re-PR-target-90424-memcpy-into-vector-builtin-not-op.patch
-1bf2a0b90f2457f6d9301535560eb5e05978261b
-
-0002-testsuite-aarch64-arm-Add-missing-quotes-to-expected.patch
-0ec537f3500924f29505977aa89c2a1d4671c584
-
-0003-x86-Tweak-testcases-for-PR82361.patch
-ad4644f378fe2f731cd987a4aff14b935f530b88
-
-0004-x86-Robustify-vzeroupper-handling-across-calls.patch
-2a2e3a0dfcbe0861915f421d11b828f0c35023f0
-
-diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
-index 9282a8fb6..ba72da1ec 100644
---- a/gcc/config/i386/i386.c
-+++ b/gcc/config/i386/i386.c
-@@ -95,6 +95,7 @@ along with GCC; see the file COPYING3.  If not see
- #include "i386-builtins.h"
- #include "i386-expand.h"
- #include "i386-features.h"
-+#include "function-abi.h"
- 
- /* This file should be included last.  */
- #include "target-def.h"
-@@ -13529,6 +13530,15 @@ ix86_avx_u128_mode_needed (rtx_insn *insn)
- 	    }
- 	}
- 
-+      /* If the function is known to preserve some SSE registers,
-+	 RA and previous passes can legitimately rely on that for
-+	 modes wider than 256 bits.  It's only safe to issue a
-+	 vzeroupper if all SSE registers are clobbered.  */
-+      const function_abi &abi = insn_callee_abi (insn);
-+      if (!hard_reg_set_subset_p (reg_class_contents[ALL_SSE_REGS],
-+				  abi.mode_clobbers (V4DImode)))
-+	return AVX_U128_ANY;
-+
-       return AVX_U128_CLEAN;
-     }
- 
-diff --git a/gcc/testsuite/g++.target/i386/pr90424-1.C b/gcc/testsuite/g++.target/i386/pr90424-1.C
-new file mode 100644
-index 000000000..9df8c089b
---- /dev/null
-+++ b/gcc/testsuite/g++.target/i386/pr90424-1.C
-@@ -0,0 +1,32 @@
-+/* { dg-do compile { target c++11 } } */
-+/* { dg-options "-O2 -msse2 -fdump-tree-optimized" } */
-+
-+template 
-+using V [[gnu::vector_size(16)]] = T;
-+
-+template )>
-+V load(const void *p) {
-+  using W = V;
-+  W r;
-+  __builtin_memcpy(&r, p, M);
-+  return r;
-+}
-+
-+// movq or movsd
-+template V load(const void *);     // bad
-+template V load(const void *);   // bad
-+template V load(const void *);       // bad
-+template V load(const void *);     // good
-+// the following is disabled because V2SF isn't a supported mode
-+// template V load(const void *);   // bad
-+template V load(const void *); // good (movsd?)
-+
-+// movd or movss
-+template V load(const void *);   // bad
-+template V load(const void *); // bad
-+template V load(const void *);     // good
-+template V load(const void *); // good
-+
-+/* We should end up with one load and one insert for each function.  */
-+/* { dg-final { scan-tree-dump-times "BIT_INSERT_EXPR" 9 "optimized" } } */
-+/* { dg-final { scan-tree-dump-times "MEM" 9 "optimized" } } */
-diff --git a/gcc/testsuite/g++.target/i386/pr90424-2.C b/gcc/testsuite/g++.target/i386/pr90424-2.C
-new file mode 100644
-index 000000000..3abb65f45
---- /dev/null
-+++ b/gcc/testsuite/g++.target/i386/pr90424-2.C
-@@ -0,0 +1,31 @@
-+/* { dg-do compile { target c++11 } } */
-+/* { dg-options "-O2 -msse2 -fdump-tree-optimized" } */
-+
-+template 
-+using V [[gnu::vector_size(16)]] = T;
-+
-+template )>
-+V load(const void *p) {
-+  V r = {};
-+  __builtin_memcpy(&r, p, M);
-+  return r;
-+}
-+
-+// movq or movsd
-+template V load(const void *);     // bad
-+template V load(const void *);   // bad
-+template V load(const void *);       // bad
-+template V load(const void *);     // good
-+// the following is disabled because V2SF isn't a supported mode
-+// template V load(const void *);   // bad
-+template V load(const void *); // good (movsd?)
-+
-+// movd or movss
-+template V load(const void *);   // bad
-+template V load(const void *); // bad
-+template V load(const void *);     // good
-+template V load(const void *); // good
-+
-+/* We should end up with one load and one insert for each function.  */
-+/* { dg-final { scan-tree-dump-times "BIT_INSERT_EXPR" 9 "optimized" } } */
-+/* { dg-final { scan-tree-dump-times "MEM" 9 "optimized" } } */
-diff --git a/gcc/testsuite/gcc.target/aarch64/target_attr_10.c b/gcc/testsuite/gcc.target/aarch64/target_attr_10.c
-index 184990471..d96a8733a 100644
---- a/gcc/testsuite/gcc.target/aarch64/target_attr_10.c
-+++ b/gcc/testsuite/gcc.target/aarch64/target_attr_10.c
-@@ -13,4 +13,4 @@ foo (uint8x16_t a, uint8x16_t b, uint8x16_t c)
-   return vbslq_u8 (a, b, c); /* { dg-message "called from here" } */
- }
- 
--/* { dg-error "inlining failed in call to always_inline" "" { target *-*-* } 0 } */
-+/* { dg-error "inlining failed in call to 'always_inline'" "" { target *-*-* } 0 } */
-diff --git a/gcc/testsuite/gcc.target/arm/attr-neon-builtin-fail.c b/gcc/testsuite/gcc.target/arm/attr-neon-builtin-fail.c
-index 05dc579f2..fb6e0b9cd 100644
---- a/gcc/testsuite/gcc.target/arm/attr-neon-builtin-fail.c
-+++ b/gcc/testsuite/gcc.target/arm/attr-neon-builtin-fail.c
-@@ -14,5 +14,5 @@ foo (uint8x16_t *p)
-   *p = vmovq_n_u8 (3); /* { dg-message "called from here" } */
- }
- 
--/* { dg-error "inlining failed in call to always_inline" "" { target *-*-* } 0 } */
-+/* { dg-error "inlining failed in call to 'always_inline'" "" { target *-*-* } 0 } */
- 
-diff --git a/gcc/testsuite/gcc.target/i386/pr82361-1.c b/gcc/testsuite/gcc.target/i386/pr82361-1.c
-index e7c356557..dec1792ae 100644
---- a/gcc/testsuite/gcc.target/i386/pr82361-1.c
-+++ b/gcc/testsuite/gcc.target/i386/pr82361-1.c
-@@ -4,50 +4,50 @@
- /* We should be able to optimize all %eax to %rax zero extensions, because
-    div and idiv instructions with 32-bit operands zero-extend both results.   */
- /* { dg-final { scan-assembler-not "movl\t%eax, %eax" } } */
--/* FIXME: We are still not able to optimize the modulo in f1/f2, only manage
--   one.  */
-+/* FIXME: The compiler does not merge zero-extension to the modulo part
-+   of f1 and f2.  */
- /* { dg-final { scan-assembler-times "movl\t%edx" 2 } } */
- 
- void
- f1 (unsigned int a, unsigned int b)
- {
--  unsigned long long c = a / b;
--  unsigned long long d = a % b;
-+  register unsigned long long c asm ("rax") = a / b;
-+  register unsigned long long d asm ("rdx") = a % b;
-   asm volatile ("" : : "r" (c), "r" (d));
- }
- 
- void
- f2 (int a, int b)
- {
--  unsigned long long c = (unsigned int) (a / b);
--  unsigned long long d = (unsigned int) (a % b);
-+  register unsigned long long c asm ("rax") = (unsigned int) (a / b);
-+  register unsigned long long d asm ("rdx") = (unsigned int) (a % b);
-   asm volatile ("" : : "r" (c), "r" (d));
- }
- 
- void
- f3 (unsigned int a, unsigned int b)
- {
--  unsigned long long c = a / b;
-+  register unsigned long long c asm ("rax") = a / b;
-   asm volatile ("" : : "r" (c));
- }
- 
- void
- f4 (int a, int b)
- {
--  unsigned long long c = (unsigned int) (a / b);
-+  register unsigned long long c asm ("rax") = (unsigned int) (a / b);
-   asm volatile ("" : : "r" (c));
- }
- 
- void
- f5 (unsigned int a, unsigned int b)
- {
--  unsigned long long d = a % b;
-+  register unsigned long long d asm ("rdx") = a % b;
-   asm volatile ("" : : "r" (d));
- }
- 
- void
- f6 (int a, int b)
- {
--  unsigned long long d = (unsigned int) (a % b);
-+  register unsigned long long d asm ("rdx") = (unsigned int) (a % b);
-   asm volatile ("" : : "r" (d));
- }
-diff --git a/gcc/testsuite/gcc.target/i386/pr82361-2.c b/gcc/testsuite/gcc.target/i386/pr82361-2.c
-index c1e484d6e..2d87de182 100644
---- a/gcc/testsuite/gcc.target/i386/pr82361-2.c
-+++ b/gcc/testsuite/gcc.target/i386/pr82361-2.c
-@@ -4,7 +4,8 @@
- /* We should be able to optimize all %eax to %rax zero extensions, because
-    div and idiv instructions with 32-bit operands zero-extend both results.   */
- /* { dg-final { scan-assembler-not "movl\t%eax, %eax" } } */
--/* Ditto %edx to %rdx zero extensions.  */
--/* { dg-final { scan-assembler-not "movl\t%edx, %edx" } } */
-+/* FIXME: The compiler does not merge zero-extension to the modulo part
-+   of f1 and f2.  */
-+/* { dg-final { scan-assembler-times "movl\t%edx" 4 } } */
- 
- #include "pr82361-1.c"
-diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
-index 527deffe4..be47519bc 100644
---- a/gcc/tree-cfg.c
-+++ b/gcc/tree-cfg.c
-@@ -4297,8 +4297,17 @@ verify_gimple_assign_ternary (gassign *stmt)
- 	}
-       if (! ((INTEGRAL_TYPE_P (rhs1_type)
- 	      && INTEGRAL_TYPE_P (rhs2_type))
-+	     /* Vector element insert.  */
- 	     || (VECTOR_TYPE_P (rhs1_type)
--		 && types_compatible_p (TREE_TYPE (rhs1_type), rhs2_type))))
-+		 && types_compatible_p (TREE_TYPE (rhs1_type), rhs2_type))
-+	     /* Aligned sub-vector insert.  */
-+	     || (VECTOR_TYPE_P (rhs1_type)
-+		 && VECTOR_TYPE_P (rhs2_type)
-+		 && types_compatible_p (TREE_TYPE (rhs1_type),
-+					TREE_TYPE (rhs2_type))
-+		 && multiple_p (TYPE_VECTOR_SUBPARTS (rhs1_type),
-+				TYPE_VECTOR_SUBPARTS (rhs2_type))
-+		 && multiple_of_p (bitsizetype, rhs3, TYPE_SIZE (rhs2_type)))))
- 	{
- 	  error ("not allowed type combination in BIT_INSERT_EXPR");
- 	  debug_generic_expr (rhs1_type);
-diff --git a/gcc/tree-ssa.c b/gcc/tree-ssa.c
-index 1dc544b6d..a149f5e79 100644
---- a/gcc/tree-ssa.c
-+++ b/gcc/tree-ssa.c
-@@ -1522,8 +1522,6 @@ non_rewritable_lvalue_p (tree lhs)
-       if (DECL_P (decl)
- 	  && VECTOR_TYPE_P (TREE_TYPE (decl))
- 	  && TYPE_MODE (TREE_TYPE (decl)) != BLKmode
--	  && operand_equal_p (TYPE_SIZE_UNIT (TREE_TYPE (lhs)),
--			      TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (decl))), 0)
- 	  && known_ge (mem_ref_offset (lhs), 0)
- 	  && known_gt (wi::to_poly_offset (TYPE_SIZE_UNIT (TREE_TYPE (decl))),
- 		       mem_ref_offset (lhs))
-@@ -1531,7 +1529,24 @@ non_rewritable_lvalue_p (tree lhs)
- 			    TYPE_SIZE_UNIT (TREE_TYPE (lhs)))
- 	  && known_ge (wi::to_poly_offset (TYPE_SIZE (TREE_TYPE (decl))),
- 		       wi::to_poly_offset (TYPE_SIZE (TREE_TYPE (lhs)))))
--	return false;
-+	{
-+	  poly_uint64 lhs_bits, nelts;
-+	  if (poly_int_tree_p (TYPE_SIZE (TREE_TYPE (lhs)), &lhs_bits)
-+	      && multiple_p (lhs_bits,
-+			     tree_to_uhwi
-+			       (TYPE_SIZE (TREE_TYPE (TREE_TYPE (decl)))),
-+			     &nelts))
-+	    {
-+	      if (known_eq (nelts, 1u))
-+		return false;
-+	      /* For sub-vector inserts the insert vector mode has to be
-+		 supported.  */
-+	      tree vtype = build_vector_type (TREE_TYPE (TREE_TYPE (decl)),
-+					      nelts);
-+	      if (TYPE_MODE (vtype) != BLKmode)
-+		return false;
-+	    }
-+	}
-     }
- 
-   /* A vector-insert using a BIT_FIELD_REF is rewritable using
-@@ -1869,20 +1884,30 @@ execute_update_addresses_taken (void)
- 		    && bitmap_bit_p (suitable_for_renaming, DECL_UID (sym))
- 		    && VECTOR_TYPE_P (TREE_TYPE (sym))
- 		    && TYPE_MODE (TREE_TYPE (sym)) != BLKmode
--		    && operand_equal_p (TYPE_SIZE_UNIT (TREE_TYPE (lhs)),
--					TYPE_SIZE_UNIT
--					  (TREE_TYPE (TREE_TYPE (sym))), 0)
--		    && tree_fits_uhwi_p (TREE_OPERAND (lhs, 1))
--		    && tree_int_cst_lt (TREE_OPERAND (lhs, 1),
--					TYPE_SIZE_UNIT (TREE_TYPE (sym)))
--		    && (tree_to_uhwi (TREE_OPERAND (lhs, 1))
--			% tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (lhs)))) == 0)
-+		    && known_ge (mem_ref_offset (lhs), 0)
-+		    && known_gt (wi::to_poly_offset
-+				   (TYPE_SIZE_UNIT (TREE_TYPE (sym))),
-+				 mem_ref_offset (lhs))
-+		    && multiple_of_p (sizetype,
-+				      TREE_OPERAND (lhs, 1),
-+				      TYPE_SIZE_UNIT (TREE_TYPE (lhs))))
- 		  {
- 		    tree val = gimple_assign_rhs1 (stmt);
- 		    if (! types_compatible_p (TREE_TYPE (val),
- 					      TREE_TYPE (TREE_TYPE (sym))))
- 		      {
--			tree tem = make_ssa_name (TREE_TYPE (TREE_TYPE (sym)));
-+			poly_uint64 lhs_bits, nelts;
-+			tree temtype = TREE_TYPE (TREE_TYPE (sym));
-+			if (poly_int_tree_p (TYPE_SIZE (TREE_TYPE (lhs)),
-+					     &lhs_bits)
-+			    && multiple_p (lhs_bits,
-+					   tree_to_uhwi
-+					     (TYPE_SIZE (TREE_TYPE
-+							   (TREE_TYPE (sym)))),
-+					   &nelts)
-+			    && maybe_ne (nelts, 1u))
-+			  temtype = build_vector_type (temtype, nelts);
-+			tree tem = make_ssa_name (temtype);
- 			gimple *pun
- 			  = gimple_build_assign (tem,
- 						 build1 (VIEW_CONVERT_EXPR,